From 8020f71afd34d7696d7933659df2d763ab05542f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 16:31:17 +0200 Subject: Adding upstream version 1.37.1. Signed-off-by: Daniel Baumann --- .clang-format | 118 + .codacy.yml | 17 + .codeclimate.yml | 93 + .csslintrc | 2 + .devcontainer/devcontainer.json | 14 + .dockerignore | 1 + .eslintignore | 2 + .eslintrc | 213 + .gitattributes | 2 + .github/CODEOWNERS | 57 + .github/ISSUE_TEMPLATE.md | 15 + .github/ISSUE_TEMPLATE/BUG_REPORT.yml | 102 + .github/ISSUE_TEMPLATE/FEAT_REQUEST.yml | 62 + .github/ISSUE_TEMPLATE/config.yml | 12 + .github/PULL_REQUEST_TEMPLATE.md | 27 + .github/codeql/python-config.yml | 10 + .github/data/distros.yml | 214 + .github/dependabot.yml | 9 + .github/dockerfiles/Dockerfile.build_test | 18 + .github/dockerfiles/Dockerfile.clang | 18 + .github/labeler.yml | 190 + .github/scripts/build-artifacts.sh | 82 + .github/scripts/build-dist.sh | 71 + .github/scripts/build-static.sh | 61 + .github/scripts/bump-packaging-version.sh | 6 + .github/scripts/check-updater.sh | 49 + .github/scripts/ci-support-pkgs.sh | 14 + .github/scripts/docker-test.sh | 60 + .github/scripts/functions.sh | 69 + .github/scripts/gen-docker-tags.py | 14 + .github/scripts/get-static-cache-key.sh | 15 + .github/scripts/netdata-pkgcloud-cleanup.py | 190 + .github/scripts/package-upload.sh | 43 + .github/scripts/package_cloud_wrapper.sh | 48 + .github/scripts/pkg-test.sh | 138 + .github/scripts/prepare-release-base.sh | 176 + .github/scripts/run-updater-check.sh | 14 + .github/scripts/run_install_with_dist_file.sh | 39 + .github/stale.yml | 18 + .github/workflows/add-to-project.yml | 26 + .github/workflows/build.yml | 861 + .github/workflows/checks.yml | 61 + .github/workflows/cloud_regression.yml | 54 + .github/workflows/codeql.yml | 117 + .github/workflows/coverity.yml | 63 + .github/workflows/dashboard-pr.yml | 54 + .github/workflows/docker.yml | 298 + .github/workflows/docs.yml | 29 + .github/workflows/labeler.yml | 18 + .github/workflows/packagecloud.yml | 36 + .github/workflows/packaging.yml | 279 + .github/workflows/release.yml | 214 + .github/workflows/repoconfig-packages.yml | 183 + .github/workflows/review.yml | 172 + .github/workflows/tests.yml | 41 + .github/workflows/trigger-learn-update.yml | 37 + .gitignore | 235 + .gitmodules | 15 + .lgtm.yml | 24 + .mlc_config.json | 20 + .remarkignore | 1 + .remarkrc.js | 111 + .squash.yml | 7 + .travis.yml | 68 + .travis/README.md | 149 + .travis/check_changelog_last_modification.sh | 38 + .travis/create_artifacts.sh | 77 + .travis/create_changelog.sh | 47 + .travis/current_build_status | 1 + .travis/draft_release.sh | 65 + .travis/gcs-credentials.json.enc | Bin 0 -> 2320 bytes .travis/generate_changelog_and_tag_release.sh | 64 + .travis/generate_changelog_for_nightlies.sh | 50 + .travis/nightlies.sh | 51 + .travis/tagger.sh | 61 + .travis/trigger_artifact_build.sh | 20 + .travis/trigger_docker_build.sh | 19 + .travis/trigger_package_build.sh | 20 + .travis/utils.sh | 29 + .yamllint.yml | 40 + BREAKING_CHANGES.md | 11 + BUILD.md | 365 + CHANGELOG.md | 589 + CMakeLists.txt | 1802 + Dockerfile | 1 + Dockerfile.test | 74 + HISTORICAL_CHANGELOG.md | 650 + LICENSE | 684 + Makefile.am | 1317 + README.md | 244 + REDISTRIBUTED.md | 188 + aclk/README.md | 147 + aclk/aclk.c | 1173 + aclk/aclk.h | 57 + aclk/aclk_alarm_api.c | 44 + aclk/aclk_alarm_api.h | 14 + aclk/aclk_capas.c | 47 + aclk/aclk_capas.h | 14 + aclk/aclk_contexts_api.c | 41 + aclk/aclk_contexts_api.h | 14 + aclk/aclk_otp.c | 888 + aclk/aclk_otp.h | 18 + aclk/aclk_proxy.c | 186 + aclk/aclk_proxy.h | 22 + aclk/aclk_query.c | 383 + aclk/aclk_query.h | 36 + aclk/aclk_query_queue.c | 136 + aclk/aclk_query_queue.h | 86 + aclk/aclk_rrdhost_state.h | 11 + aclk/aclk_rx_msgs.c | 551 + aclk/aclk_rx_msgs.h | 17 + aclk/aclk_stats.c | 408 + aclk/aclk_stats.h | 79 + aclk/aclk_tx_msgs.c | 276 + aclk/aclk_tx_msgs.h | 20 + aclk/aclk_util.c | 388 + aclk/aclk_util.h | 112 + aclk/helpers/mqtt_wss_pal.h | 19 + aclk/helpers/ringbuffer_pal.h | 11 + aclk/https_client.c | 688 + aclk/https_client.h | 78 + aclk/schema-wrappers/alarm_config.cc | 147 + aclk/schema-wrappers/alarm_config.h | 69 + aclk/schema-wrappers/alarm_stream.cc | 253 + aclk/schema-wrappers/alarm_stream.h | 136 + aclk/schema-wrappers/capability.cc | 11 + aclk/schema-wrappers/capability.h | 24 + aclk/schema-wrappers/connection.cc | 72 + aclk/schema-wrappers/connection.h | 47 + aclk/schema-wrappers/context.cc | 125 + aclk/schema-wrappers/context.h | 53 + aclk/schema-wrappers/context_stream.cc | 42 + aclk/schema-wrappers/context_stream.h | 36 + aclk/schema-wrappers/node_connection.cc | 46 + aclk/schema-wrappers/node_connection.h | 32 + aclk/schema-wrappers/node_creation.cc | 39 + aclk/schema-wrappers/node_creation.h | 31 + aclk/schema-wrappers/node_info.cc | 136 + aclk/schema-wrappers/node_info.h | 79 + aclk/schema-wrappers/proto_2_json.cc | 85 + aclk/schema-wrappers/proto_2_json.h | 18 + aclk/schema-wrappers/schema_wrapper_utils.cc | 22 + aclk/schema-wrappers/schema_wrapper_utils.h | 24 + aclk/schema-wrappers/schema_wrappers.h | 18 + build-artifacts.sh | 27 + build/m4/ax_c__generic.m4 | 28 + build/m4/ax_c_lto.m4 | 21 + build/m4/ax_c_mallinfo.m4 | 24 + build/m4/ax_c_mallopt.m4 | 20 + build/m4/ax_c_statement_expressions.m4 | 23 + build/m4/ax_check_compile_flag.m4 | 50 + build/m4/ax_check_enable_debug.m4 | 122 + build/m4/ax_gcc_func_attribute.m4 | 223 + build/m4/ax_pthread.m4 | 522 + build/m4/jemalloc.m4 | 75 + build/m4/tcmalloc.m4 | 45 + build/subst.inc | 22 + build_external/README.md | 128 + build_external/bin/clean-install.sh | 54 + build_external/bin/make-install.sh | 8 + build_external/clean-install-arch-debug.Dockerfile | 62 + .../clean-install-arch-extras.Dockerfile | 58 + build_external/clean-install-arch.Dockerfile | 54 + build_external/clean-install.Dockerfile | 39 + build_external/make-install.Dockerfile | 11 + .../scenarios/aclk-testing/agent-compose.yml | 18 + .../aclk-testing/agent-valgrind-compose.yml | 18 + .../scenarios/aclk-testing/agent_netdata.conf | 7115 + .../aclk-testing/configureVerneMQ.Dockerfile | 8 + .../scenarios/aclk-testing/paho-compose.yml | 6 + .../scenarios/aclk-testing/paho-inspection.py | 33 + .../scenarios/aclk-testing/paho.Dockerfile | 12 + .../scenarios/aclk-testing/vernemq-compose.yml | 6 + build_external/scenarios/aclk-testing/vernemq.conf | 68 + build_external/scenarios/gaps_hi/child-compose.yml | 13 + build_external/scenarios/gaps_hi/child_guid | 1 + build_external/scenarios/gaps_hi/child_stream.conf | 11 + .../scenarios/gaps_hi/middle-compose.yml | 13 + build_external/scenarios/gaps_hi/middle_guid | 1 + .../scenarios/gaps_hi/middle_stream.conf | 23 + build_external/scenarios/gaps_hi/min.conf | 6 + .../scenarios/gaps_hi/parent-compose.yml | 13 + build_external/scenarios/gaps_hi/parent_guid | 1 + .../scenarios/gaps_hi/parent_stream.conf | 11 + build_external/scenarios/gaps_lo/child-compose.yml | 14 + build_external/scenarios/gaps_lo/child_guid | 1 + build_external/scenarios/gaps_lo/child_stream.conf | 11 + .../scenarios/gaps_lo/middle-compose.yml | 14 + build_external/scenarios/gaps_lo/middle_guid | 1 + .../scenarios/gaps_lo/middle_stream.conf | 20 + build_external/scenarios/gaps_lo/mostly_off.conf | 964 + .../scenarios/gaps_lo/parent-compose.yml | 13 + build_external/scenarios/gaps_lo/parent_guid | 1 + .../scenarios/gaps_lo/parent_stream.conf | 12 + .../scenarios/only-agent/docker-compose.yml | 8 + .../scenarios/parent-child/child_stream.conf | 10 + .../scenarios/parent-child/docker-compose.yml | 23 + .../scenarios/parent-child/parent_stream.conf | 7 + claim/Makefile.am | 21 + claim/README.md | 609 + claim/claim.c | 208 + claim/claim.h | 16 + claim/netdata-claim.sh.in | 442 + cli/Makefile.am | 8 + cli/README.md | 40 + cli/cli.c | 201 + cli/cli.h | 8 + collectors/COLLECTORS.md | 518 + collectors/Makefile.am | 40 + collectors/README.md | 48 + collectors/REFERENCE.md | 170 + collectors/all.h | 367 + collectors/apps.plugin/Makefile.am | 12 + collectors/apps.plugin/README.md | 399 + collectors/apps.plugin/apps_groups.conf | 422 + collectors/apps.plugin/apps_plugin.c | 5015 + collectors/cgroups.plugin/Makefile.am | 13 + collectors/cgroups.plugin/README.md | 308 + collectors/cgroups.plugin/cgroup-name.sh | 597 + collectors/cgroups.plugin/cgroup-network-helper.sh | 302 + collectors/cgroups.plugin/cgroup-network.c | 723 + collectors/cgroups.plugin/sys_fs_cgroup.c | 4887 + collectors/cgroups.plugin/sys_fs_cgroup.h | 44 + .../cgroups.plugin/tests/test_cgroups_plugin.c | 131 + .../cgroups.plugin/tests/test_cgroups_plugin.h | 16 + collectors/cgroups.plugin/tests/test_doubles.c | 157 + collectors/charts.d.plugin/Makefile.am | 50 + collectors/charts.d.plugin/README.md | 198 + collectors/charts.d.plugin/ap/Makefile.inc | 13 + collectors/charts.d.plugin/ap/README.md | 99 + collectors/charts.d.plugin/ap/ap.chart.sh | 179 + collectors/charts.d.plugin/ap/ap.conf | 23 + collectors/charts.d.plugin/apcupsd/Makefile.inc | 13 + collectors/charts.d.plugin/apcupsd/README.md | 21 + .../charts.d.plugin/apcupsd/apcupsd.chart.sh | 223 + collectors/charts.d.plugin/apcupsd/apcupsd.conf | 25 + collectors/charts.d.plugin/charts.d.conf | 48 + .../charts.d.plugin/charts.d.dryrun-helper.sh | 72 + collectors/charts.d.plugin/charts.d.plugin.in | 732 + collectors/charts.d.plugin/example/Makefile.inc | 13 + collectors/charts.d.plugin/example/README.md | 10 + .../charts.d.plugin/example/example.chart.sh | 123 + collectors/charts.d.plugin/example/example.conf | 21 + collectors/charts.d.plugin/libreswan/Makefile.inc | 13 + collectors/charts.d.plugin/libreswan/README.md | 56 + .../charts.d.plugin/libreswan/libreswan.chart.sh | 187 + .../charts.d.plugin/libreswan/libreswan.conf | 29 + collectors/charts.d.plugin/loopsleepms.sh.inc | 227 + collectors/charts.d.plugin/nut/Makefile.inc | 13 + collectors/charts.d.plugin/nut/README.md | 74 + collectors/charts.d.plugin/nut/nut.chart.sh | 232 + collectors/charts.d.plugin/nut/nut.conf | 33 + collectors/charts.d.plugin/opensips/Makefile.inc | 13 + collectors/charts.d.plugin/opensips/README.md | 19 + .../charts.d.plugin/opensips/opensips.chart.sh | 325 + collectors/charts.d.plugin/opensips/opensips.conf | 21 + collectors/charts.d.plugin/sensors/Makefile.inc | 13 + collectors/charts.d.plugin/sensors/README.md | 79 + .../charts.d.plugin/sensors/sensors.chart.sh | 250 + collectors/charts.d.plugin/sensors/sensors.conf | 32 + collectors/cups.plugin/Makefile.am | 8 + collectors/cups.plugin/README.md | 64 + collectors/cups.plugin/cups_plugin.c | 439 + collectors/diskspace.plugin/Makefile.am | 8 + collectors/diskspace.plugin/README.md | 43 + collectors/diskspace.plugin/plugin_diskspace.c | 690 + collectors/ebpf.plugin/Makefile.am | 42 + collectors/ebpf.plugin/README.md | 970 + collectors/ebpf.plugin/ebpf.c | 2249 + collectors/ebpf.plugin/ebpf.d.conf | 69 + collectors/ebpf.plugin/ebpf.d/cachestat.conf | 36 + collectors/ebpf.plugin/ebpf.d/dcstat.conf | 34 + collectors/ebpf.plugin/ebpf.d/disk.conf | 9 + .../ebpf.plugin/ebpf.d/ebpf_kernel_reject_list.txt | 1 + collectors/ebpf.plugin/ebpf.d/fd.conf | 21 + collectors/ebpf.plugin/ebpf.d/filesystem.conf | 20 + collectors/ebpf.plugin/ebpf.d/hardirq.conf | 8 + collectors/ebpf.plugin/ebpf.d/mdflush.conf | 7 + collectors/ebpf.plugin/ebpf.d/mount.conf | 19 + collectors/ebpf.plugin/ebpf.d/network.conf | 53 + collectors/ebpf.plugin/ebpf.d/oomkill.conf | 7 + collectors/ebpf.plugin/ebpf.d/process.conf | 25 + collectors/ebpf.plugin/ebpf.d/shm.conf | 36 + collectors/ebpf.plugin/ebpf.d/softirq.conf | 8 + collectors/ebpf.plugin/ebpf.d/swap.conf | 28 + collectors/ebpf.plugin/ebpf.d/sync.conf | 36 + collectors/ebpf.plugin/ebpf.d/vfs.conf | 19 + collectors/ebpf.plugin/ebpf.h | 299 + collectors/ebpf.plugin/ebpf_apps.c | 1155 + collectors/ebpf.plugin/ebpf_apps.h | 441 + collectors/ebpf.plugin/ebpf_cachestat.c | 1335 + collectors/ebpf.plugin/ebpf_cachestat.h | 90 + collectors/ebpf.plugin/ebpf_cgroup.c | 317 + collectors/ebpf.plugin/ebpf_cgroup.h | 69 + collectors/ebpf.plugin/ebpf_dcstat.c | 1225 + collectors/ebpf.plugin/ebpf_dcstat.h | 84 + collectors/ebpf.plugin/ebpf_disk.c | 865 + collectors/ebpf.plugin/ebpf_disk.h | 78 + collectors/ebpf.plugin/ebpf_fd.c | 1183 + collectors/ebpf.plugin/ebpf_fd.h | 85 + collectors/ebpf.plugin/ebpf_filesystem.c | 638 + collectors/ebpf.plugin/ebpf_filesystem.h | 49 + collectors/ebpf.plugin/ebpf_hardirq.c | 507 + collectors/ebpf.plugin/ebpf_hardirq.h | 73 + collectors/ebpf.plugin/ebpf_mdflush.c | 334 + collectors/ebpf.plugin/ebpf_mdflush.h | 42 + collectors/ebpf.plugin/ebpf_mount.c | 517 + collectors/ebpf.plugin/ebpf_mount.h | 44 + collectors/ebpf.plugin/ebpf_oomkill.c | 402 + collectors/ebpf.plugin/ebpf_oomkill.h | 32 + collectors/ebpf.plugin/ebpf_process.c | 1327 + collectors/ebpf.plugin/ebpf_process.h | 107 + collectors/ebpf.plugin/ebpf_shm.c | 1137 + collectors/ebpf.plugin/ebpf_shm.h | 63 + collectors/ebpf.plugin/ebpf_socket.c | 3976 + collectors/ebpf.plugin/ebpf_socket.h | 370 + collectors/ebpf.plugin/ebpf_softirq.c | 290 + collectors/ebpf.plugin/ebpf_softirq.h | 34 + collectors/ebpf.plugin/ebpf_swap.c | 915 + collectors/ebpf.plugin/ebpf_swap.h | 53 + collectors/ebpf.plugin/ebpf_sync.c | 608 + collectors/ebpf.plugin/ebpf_sync.h | 59 + collectors/ebpf.plugin/ebpf_vfs.c | 1983 + collectors/ebpf.plugin/ebpf_vfs.h | 177 + collectors/fping.plugin/Makefile.am | 24 + collectors/fping.plugin/README.md | 110 + collectors/fping.plugin/fping.conf | 44 + collectors/fping.plugin/fping.plugin.in | 202 + collectors/freebsd.plugin/Makefile.am | 8 + collectors/freebsd.plugin/README.md | 12 + collectors/freebsd.plugin/freebsd_devstat.c | 759 + collectors/freebsd.plugin/freebsd_getifaddrs.c | 599 + collectors/freebsd.plugin/freebsd_getmntinfo.c | 299 + collectors/freebsd.plugin/freebsd_ipfw.c | 359 + collectors/freebsd.plugin/freebsd_kstat_zfs.c | 304 + collectors/freebsd.plugin/freebsd_sysctl.c | 3062 + collectors/freebsd.plugin/plugin_freebsd.c | 136 + collectors/freebsd.plugin/plugin_freebsd.h | 55 + collectors/freeipmi.plugin/Makefile.am | 8 + collectors/freeipmi.plugin/README.md | 202 + collectors/freeipmi.plugin/freeipmi_plugin.c | 1857 + collectors/idlejitter.plugin/Makefile.am | 8 + collectors/idlejitter.plugin/README.md | 32 + collectors/idlejitter.plugin/plugin_idlejitter.c | 93 + collectors/ioping.plugin/Makefile.am | 24 + collectors/ioping.plugin/README.md | 86 + collectors/ioping.plugin/ioping.conf | 40 + collectors/ioping.plugin/ioping.plugin.in | 210 + collectors/macos.plugin/Makefile.am | 8 + collectors/macos.plugin/README.md | 12 + collectors/macos.plugin/macos_fw.c | 648 + collectors/macos.plugin/macos_mach_smi.c | 227 + collectors/macos.plugin/macos_sysctl.c | 1424 + collectors/macos.plugin/plugin_macos.c | 81 + collectors/macos.plugin/plugin_macos.h | 12 + collectors/nfacct.plugin/Makefile.am | 8 + collectors/nfacct.plugin/README.md | 54 + collectors/nfacct.plugin/plugin_nfacct.c | 886 + collectors/perf.plugin/Makefile.am | 8 + collectors/perf.plugin/README.md | 80 + collectors/perf.plugin/perf_plugin.c | 1353 + collectors/plugins.d/Makefile.am | 11 + collectors/plugins.d/README.md | 599 + collectors/plugins.d/plugins_d.c | 290 + collectors/plugins.d/plugins_d.h | 103 + collectors/plugins.d/pluginsd_parser.c | 1360 + collectors/plugins.d/pluginsd_parser.h | 39 + collectors/proc.plugin/Makefile.am | 8 + collectors/proc.plugin/README.md | 607 + collectors/proc.plugin/ipc.c | 554 + collectors/proc.plugin/plugin_proc.c | 189 + collectors/proc.plugin/plugin_proc.h | 66 + collectors/proc.plugin/proc_diskstats.c | 2045 + collectors/proc.plugin/proc_interrupts.c | 245 + collectors/proc.plugin/proc_loadavg.c | 126 + collectors/proc.plugin/proc_mdstat.c | 640 + collectors/proc.plugin/proc_meminfo.c | 513 + collectors/proc.plugin/proc_net_dev.c | 1522 + collectors/proc.plugin/proc_net_ip_vs_stats.c | 123 + collectors/proc.plugin/proc_net_netstat.c | 3110 + collectors/proc.plugin/proc_net_rpc_nfs.c | 439 + collectors/proc.plugin/proc_net_rpc_nfsd.c | 763 + collectors/proc.plugin/proc_net_sctp_snmp.c | 367 + collectors/proc.plugin/proc_net_sockstat.c | 529 + collectors/proc.plugin/proc_net_sockstat6.c | 278 + collectors/proc.plugin/proc_net_softnet_stat.c | 149 + collectors/proc.plugin/proc_net_stat_conntrack.c | 345 + collectors/proc.plugin/proc_net_stat_synproxy.c | 153 + collectors/proc.plugin/proc_net_wireless.c | 432 + collectors/proc.plugin/proc_pagetypeinfo.c | 338 + collectors/proc.plugin/proc_pressure.c | 209 + collectors/proc.plugin/proc_pressure.h | 43 + collectors/proc.plugin/proc_self_mountinfo.c | 458 + collectors/proc.plugin/proc_self_mountinfo.h | 61 + collectors/proc.plugin/proc_softirqs.c | 243 + collectors/proc.plugin/proc_spl_kstat_zfs.c | 423 + collectors/proc.plugin/proc_stat.c | 1064 + .../proc_sys_kernel_random_entropy_avail.c | 47 + collectors/proc.plugin/proc_uptime.c | 42 + collectors/proc.plugin/proc_vmstat.c | 311 + collectors/proc.plugin/sys_block_zram.c | 279 + collectors/proc.plugin/sys_class_infiniband.c | 703 + collectors/proc.plugin/sys_class_power_supply.c | 414 + .../proc.plugin/sys_devices_system_edac_mc.c | 204 + collectors/proc.plugin/sys_devices_system_node.c | 165 + collectors/proc.plugin/sys_fs_btrfs.c | 743 + collectors/proc.plugin/sys_kernel_mm_ksm.c | 194 + collectors/proc.plugin/zfs_common.c | 960 + collectors/proc.plugin/zfs_common.h | 115 + collectors/python.d.plugin/Makefile.am | 236 + collectors/python.d.plugin/README.md | 270 + .../python.d.plugin/adaptec_raid/Makefile.inc | 13 + collectors/python.d.plugin/adaptec_raid/README.md | 80 + .../adaptec_raid/adaptec_raid.chart.py | 247 + .../python.d.plugin/adaptec_raid/adaptec_raid.conf | 53 + collectors/python.d.plugin/alarms/Makefile.inc | 13 + collectors/python.d.plugin/alarms/README.md | 66 + collectors/python.d.plugin/alarms/alarms.chart.py | 95 + collectors/python.d.plugin/alarms/alarms.conf | 60 + collectors/python.d.plugin/am2320/Makefile.inc | 8 + collectors/python.d.plugin/am2320/README.md | 53 + collectors/python.d.plugin/am2320/am2320.chart.py | 68 + collectors/python.d.plugin/am2320/am2320.conf | 68 + collectors/python.d.plugin/anomalies/Makefile.inc | 13 + collectors/python.d.plugin/anomalies/README.md | 245 + .../python.d.plugin/anomalies/anomalies.chart.py | 426 + .../python.d.plugin/anomalies/anomalies.conf | 184 + collectors/python.d.plugin/beanstalk/Makefile.inc | 13 + collectors/python.d.plugin/beanstalk/README.md | 133 + .../python.d.plugin/beanstalk/beanstalk.chart.py | 252 + .../python.d.plugin/beanstalk/beanstalk.conf | 78 + collectors/python.d.plugin/bind_rndc/Makefile.inc | 13 + collectors/python.d.plugin/bind_rndc/README.md | 79 + .../python.d.plugin/bind_rndc/bind_rndc.chart.py | 252 + .../python.d.plugin/bind_rndc/bind_rndc.conf | 110 + collectors/python.d.plugin/boinc/Makefile.inc | 13 + collectors/python.d.plugin/boinc/README.md | 41 + collectors/python.d.plugin/boinc/boinc.chart.py | 168 + collectors/python.d.plugin/boinc/boinc.conf | 66 + collectors/python.d.plugin/ceph/Makefile.inc | 13 + collectors/python.d.plugin/ceph/README.md | 48 + collectors/python.d.plugin/ceph/ceph.chart.py | 374 + collectors/python.d.plugin/ceph/ceph.conf | 75 + .../python.d.plugin/changefinder/Makefile.inc | 13 + collectors/python.d.plugin/changefinder/README.md | 217 + .../changefinder/changefinder.chart.py | 185 + .../python.d.plugin/changefinder/changefinder.conf | 74 + collectors/python.d.plugin/dockerd/Makefile.inc | 13 + collectors/python.d.plugin/dockerd/README.md | 46 + .../python.d.plugin/dockerd/dockerd.chart.py | 86 + collectors/python.d.plugin/dockerd/dockerd.conf | 77 + collectors/python.d.plugin/dovecot/Makefile.inc | 13 + collectors/python.d.plugin/dovecot/README.md | 105 + .../python.d.plugin/dovecot/dovecot.chart.py | 143 + collectors/python.d.plugin/dovecot/dovecot.conf | 98 + collectors/python.d.plugin/example/Makefile.inc | 13 + collectors/python.d.plugin/example/README.md | 14 + .../python.d.plugin/example/example.chart.py | 51 + collectors/python.d.plugin/example/example.conf | 87 + collectors/python.d.plugin/exim/Makefile.inc | 13 + collectors/python.d.plugin/exim/README.md | 41 + collectors/python.d.plugin/exim/exim.chart.py | 39 + collectors/python.d.plugin/exim/exim.conf | 91 + collectors/python.d.plugin/fail2ban/Makefile.inc | 13 + collectors/python.d.plugin/fail2ban/README.md | 82 + .../python.d.plugin/fail2ban/fail2ban.chart.py | 217 + collectors/python.d.plugin/fail2ban/fail2ban.conf | 68 + collectors/python.d.plugin/gearman/Makefile.inc | 13 + collectors/python.d.plugin/gearman/README.md | 50 + .../python.d.plugin/gearman/gearman.chart.py | 243 + collectors/python.d.plugin/gearman/gearman.conf | 72 + collectors/python.d.plugin/go_expvar/Makefile.inc | 13 + collectors/python.d.plugin/go_expvar/README.md | 319 + .../python.d.plugin/go_expvar/go_expvar.chart.py | 253 + .../python.d.plugin/go_expvar/go_expvar.conf | 108 + collectors/python.d.plugin/haproxy/Makefile.inc | 13 + collectors/python.d.plugin/haproxy/README.md | 67 + .../python.d.plugin/haproxy/haproxy.chart.py | 360 + collectors/python.d.plugin/haproxy/haproxy.conf | 83 + collectors/python.d.plugin/hddtemp/Makefile.inc | 13 + collectors/python.d.plugin/hddtemp/README.md | 38 + .../python.d.plugin/hddtemp/hddtemp.chart.py | 99 + collectors/python.d.plugin/hddtemp/hddtemp.conf | 95 + collectors/python.d.plugin/hpssa/Makefile.inc | 13 + collectors/python.d.plugin/hpssa/README.md | 83 + collectors/python.d.plugin/hpssa/hpssa.chart.py | 396 + collectors/python.d.plugin/hpssa/hpssa.conf | 61 + collectors/python.d.plugin/icecast/Makefile.inc | 13 + collectors/python.d.plugin/icecast/README.md | 44 + .../python.d.plugin/icecast/icecast.chart.py | 94 + collectors/python.d.plugin/icecast/icecast.conf | 81 + collectors/python.d.plugin/ipfs/Makefile.inc | 13 + collectors/python.d.plugin/ipfs/README.md | 51 + collectors/python.d.plugin/ipfs/ipfs.chart.py | 149 + collectors/python.d.plugin/ipfs/ipfs.conf | 82 + collectors/python.d.plugin/litespeed/Makefile.inc | 13 + collectors/python.d.plugin/litespeed/README.md | 72 + .../python.d.plugin/litespeed/litespeed.chart.py | 188 + .../python.d.plugin/litespeed/litespeed.conf | 72 + collectors/python.d.plugin/logind/Makefile.inc | 13 + collectors/python.d.plugin/logind/README.md | 86 + collectors/python.d.plugin/logind/logind.chart.py | 85 + collectors/python.d.plugin/logind/logind.conf | 60 + collectors/python.d.plugin/megacli/Makefile.inc | 13 + collectors/python.d.plugin/megacli/README.md | 86 + .../python.d.plugin/megacli/megacli.chart.py | 278 + collectors/python.d.plugin/megacli/megacli.conf | 60 + collectors/python.d.plugin/memcached/Makefile.inc | 13 + collectors/python.d.plugin/memcached/README.md | 99 + .../python.d.plugin/memcached/memcached.chart.py | 197 + .../python.d.plugin/memcached/memcached.conf | 90 + collectors/python.d.plugin/mongodb/Makefile.inc | 13 + collectors/python.d.plugin/mongodb/README.md | 210 + .../python.d.plugin/mongodb/mongodb.chart.py | 786 + collectors/python.d.plugin/mongodb/mongodb.conf | 102 + collectors/python.d.plugin/monit/Makefile.inc | 13 + collectors/python.d.plugin/monit/README.md | 52 + collectors/python.d.plugin/monit/monit.chart.py | 360 + collectors/python.d.plugin/monit/monit.conf | 86 + collectors/python.d.plugin/nsd/Makefile.inc | 13 + collectors/python.d.plugin/nsd/README.md | 68 + collectors/python.d.plugin/nsd/nsd.chart.py | 105 + collectors/python.d.plugin/nsd/nsd.conf | 91 + collectors/python.d.plugin/ntpd/Makefile.inc | 13 + collectors/python.d.plugin/ntpd/README.md | 90 + collectors/python.d.plugin/ntpd/ntpd.chart.py | 385 + collectors/python.d.plugin/ntpd/ntpd.conf | 89 + collectors/python.d.plugin/nvidia_smi/Makefile.inc | 13 + collectors/python.d.plugin/nvidia_smi/README.md | 66 + .../python.d.plugin/nvidia_smi/nvidia_smi.chart.py | 589 + .../python.d.plugin/nvidia_smi/nvidia_smi.conf | 68 + collectors/python.d.plugin/openldap/Makefile.inc | 13 + collectors/python.d.plugin/openldap/README.md | 79 + .../python.d.plugin/openldap/openldap.chart.py | 216 + collectors/python.d.plugin/openldap/openldap.conf | 75 + collectors/python.d.plugin/oracledb/Makefile.inc | 13 + collectors/python.d.plugin/oracledb/README.md | 97 + .../python.d.plugin/oracledb/oracledb.chart.py | 831 + collectors/python.d.plugin/oracledb/oracledb.conf | 84 + collectors/python.d.plugin/pandas/Makefile.inc | 13 + collectors/python.d.plugin/pandas/README.md | 92 + collectors/python.d.plugin/pandas/pandas.chart.py | 89 + collectors/python.d.plugin/pandas/pandas.conf | 191 + collectors/python.d.plugin/postfix/Makefile.inc | 13 + collectors/python.d.plugin/postfix/README.md | 36 + .../python.d.plugin/postfix/postfix.chart.py | 52 + collectors/python.d.plugin/postfix/postfix.conf | 72 + collectors/python.d.plugin/proxysql/Makefile.inc | 13 + collectors/python.d.plugin/proxysql/README.md | 106 + .../python.d.plugin/proxysql/proxysql.chart.py | 352 + collectors/python.d.plugin/proxysql/proxysql.conf | 116 + collectors/python.d.plugin/puppet/Makefile.inc | 13 + collectors/python.d.plugin/puppet/README.md | 67 + collectors/python.d.plugin/puppet/puppet.chart.py | 121 + collectors/python.d.plugin/puppet/puppet.conf | 94 + collectors/python.d.plugin/python.d.conf | 84 + collectors/python.d.plugin/python.d.plugin.in | 922 + .../python.d.plugin/python_modules/__init__.py | 0 .../bases/FrameworkServices/ExecutableService.py | 91 + .../bases/FrameworkServices/LogService.py | 82 + .../bases/FrameworkServices/MySQLService.py | 163 + .../bases/FrameworkServices/SimpleService.py | 261 + .../bases/FrameworkServices/SocketService.py | 336 + .../bases/FrameworkServices/UrlService.py | 207 + .../bases/FrameworkServices/__init__.py | 0 .../python_modules/bases/__init__.py | 0 .../python.d.plugin/python_modules/bases/charts.py | 431 + .../python_modules/bases/collection.py | 117 + .../python_modules/bases/loaders.py | 46 + .../python_modules/bases/loggers.py | 206 + .../python_modules/pyyaml2/__init__.py | 316 + .../python_modules/pyyaml2/composer.py | 140 + .../python_modules/pyyaml2/constructor.py | 676 + .../python_modules/pyyaml2/cyaml.py | 86 + .../python_modules/pyyaml2/dumper.py | 63 + .../python_modules/pyyaml2/emitter.py | 1141 + .../python_modules/pyyaml2/error.py | 76 + .../python_modules/pyyaml2/events.py | 87 + .../python_modules/pyyaml2/loader.py | 41 + .../python_modules/pyyaml2/nodes.py | 50 + .../python_modules/pyyaml2/parser.py | 590 + .../python_modules/pyyaml2/reader.py | 191 + .../python_modules/pyyaml2/representer.py | 485 + .../python_modules/pyyaml2/resolver.py | 225 + .../python_modules/pyyaml2/scanner.py | 1458 + .../python_modules/pyyaml2/serializer.py | 112 + .../python_modules/pyyaml2/tokens.py | 105 + .../python_modules/pyyaml3/__init__.py | 313 + .../python_modules/pyyaml3/composer.py | 140 + .../python_modules/pyyaml3/constructor.py | 687 + .../python_modules/pyyaml3/cyaml.py | 86 + .../python_modules/pyyaml3/dumper.py | 63 + .../python_modules/pyyaml3/emitter.py | 1138 + .../python_modules/pyyaml3/error.py | 76 + .../python_modules/pyyaml3/events.py | 87 + .../python_modules/pyyaml3/loader.py | 41 + .../python_modules/pyyaml3/nodes.py | 50 + .../python_modules/pyyaml3/parser.py | 590 + .../python_modules/pyyaml3/reader.py | 193 + .../python_modules/pyyaml3/representer.py | 375 + .../python_modules/pyyaml3/resolver.py | 225 + .../python_modules/pyyaml3/scanner.py | 1449 + .../python_modules/pyyaml3/serializer.py | 112 + .../python_modules/pyyaml3/tokens.py | 105 + .../python_modules/third_party/__init__.py | 0 .../python_modules/third_party/boinc_client.py | 515 + .../python_modules/third_party/filelock.py | 451 + .../python_modules/third_party/lm_sensors.py | 327 + .../python_modules/third_party/mcrcon.py | 74 + .../python_modules/third_party/monotonic.py | 201 + .../python_modules/third_party/ordereddict.py | 110 + .../python_modules/urllib3/__init__.py | 98 + .../python_modules/urllib3/_collections.py | 320 + .../python_modules/urllib3/connection.py | 374 + .../python_modules/urllib3/connectionpool.py | 900 + .../python_modules/urllib3/contrib/__init__.py | 0 .../urllib3/contrib/_securetransport/__init__.py | 0 .../urllib3/contrib/_securetransport/bindings.py | 591 + .../urllib3/contrib/_securetransport/low_level.py | 344 + .../python_modules/urllib3/contrib/appengine.py | 297 + .../python_modules/urllib3/contrib/ntlmpool.py | 113 + .../python_modules/urllib3/contrib/pyopenssl.py | 458 + .../urllib3/contrib/securetransport.py | 808 + .../python_modules/urllib3/contrib/socks.py | 189 + .../python_modules/urllib3/exceptions.py | 247 + .../python_modules/urllib3/fields.py | 179 + .../python_modules/urllib3/filepost.py | 95 + .../python_modules/urllib3/packages/__init__.py | 5 + .../urllib3/packages/backports/__init__.py | 0 .../urllib3/packages/backports/makefile.py | 54 + .../urllib3/packages/ordered_dict.py | 260 + .../python_modules/urllib3/packages/six.py | 852 + .../packages/ssl_match_hostname/__init__.py | 20 + .../packages/ssl_match_hostname/_implementation.py | 156 + .../python_modules/urllib3/poolmanager.py | 441 + .../python_modules/urllib3/request.py | 149 + .../python_modules/urllib3/response.py | 623 + .../python_modules/urllib3/util/__init__.py | 55 + .../python_modules/urllib3/util/connection.py | 131 + .../python_modules/urllib3/util/request.py | 119 + .../python_modules/urllib3/util/response.py | 82 + .../python_modules/urllib3/util/retry.py | 402 + .../python_modules/urllib3/util/selectors.py | 588 + .../python_modules/urllib3/util/ssl_.py | 338 + .../python_modules/urllib3/util/timeout.py | 243 + .../python_modules/urllib3/util/url.py | 231 + .../python_modules/urllib3/util/wait.py | 41 + collectors/python.d.plugin/rabbitmq/Makefile.inc | 13 + collectors/python.d.plugin/rabbitmq/README.md | 138 + .../python.d.plugin/rabbitmq/rabbitmq.chart.py | 443 + collectors/python.d.plugin/rabbitmq/rabbitmq.conf | 86 + collectors/python.d.plugin/rethinkdbs/Makefile.inc | 13 + collectors/python.d.plugin/rethinkdbs/README.md | 53 + .../python.d.plugin/rethinkdbs/rethinkdbs.chart.py | 247 + .../python.d.plugin/rethinkdbs/rethinkdbs.conf | 76 + collectors/python.d.plugin/retroshare/Makefile.inc | 13 + collectors/python.d.plugin/retroshare/README.md | 47 + .../python.d.plugin/retroshare/retroshare.chart.py | 78 + .../python.d.plugin/retroshare/retroshare.conf | 72 + collectors/python.d.plugin/riakkv/Makefile.inc | 13 + collectors/python.d.plugin/riakkv/README.md | 126 + collectors/python.d.plugin/riakkv/riakkv.chart.py | 334 + collectors/python.d.plugin/riakkv/riakkv.conf | 68 + collectors/python.d.plugin/samba/Makefile.inc | 13 + collectors/python.d.plugin/samba/README.md | 121 + collectors/python.d.plugin/samba/samba.chart.py | 144 + collectors/python.d.plugin/samba/samba.conf | 60 + collectors/python.d.plugin/sensors/Makefile.inc | 13 + collectors/python.d.plugin/sensors/README.md | 33 + .../python.d.plugin/sensors/sensors.chart.py | 179 + collectors/python.d.plugin/sensors/sensors.conf | 61 + collectors/python.d.plugin/smartd_log/Makefile.inc | 13 + collectors/python.d.plugin/smartd_log/README.md | 125 + .../python.d.plugin/smartd_log/smartd_log.chart.py | 772 + .../python.d.plugin/smartd_log/smartd_log.conf | 75 + collectors/python.d.plugin/spigotmc/Makefile.inc | 13 + collectors/python.d.plugin/spigotmc/README.md | 38 + .../python.d.plugin/spigotmc/spigotmc.chart.py | 184 + collectors/python.d.plugin/spigotmc/spigotmc.conf | 66 + collectors/python.d.plugin/springboot/Makefile.inc | 13 + collectors/python.d.plugin/springboot/README.md | 145 + .../python.d.plugin/springboot/springboot.chart.py | 160 + .../python.d.plugin/springboot/springboot.conf | 118 + collectors/python.d.plugin/squid/Makefile.inc | 13 + collectors/python.d.plugin/squid/README.md | 58 + collectors/python.d.plugin/squid/squid.chart.py | 123 + collectors/python.d.plugin/squid/squid.conf | 167 + collectors/python.d.plugin/tomcat/Makefile.inc | 13 + collectors/python.d.plugin/tomcat/README.md | 53 + collectors/python.d.plugin/tomcat/tomcat.chart.py | 199 + collectors/python.d.plugin/tomcat/tomcat.conf | 89 + collectors/python.d.plugin/tor/Makefile.inc | 13 + collectors/python.d.plugin/tor/README.md | 66 + collectors/python.d.plugin/tor/tor.chart.py | 107 + collectors/python.d.plugin/tor/tor.conf | 79 + collectors/python.d.plugin/traefik/Makefile.inc | 13 + collectors/python.d.plugin/traefik/README.md | 74 + .../python.d.plugin/traefik/traefik.chart.py | 198 + collectors/python.d.plugin/traefik/traefik.conf | 77 + collectors/python.d.plugin/uwsgi/Makefile.inc | 13 + collectors/python.d.plugin/uwsgi/README.md | 52 + collectors/python.d.plugin/uwsgi/uwsgi.chart.py | 177 + collectors/python.d.plugin/uwsgi/uwsgi.conf | 92 + collectors/python.d.plugin/varnish/Makefile.inc | 13 + collectors/python.d.plugin/varnish/README.md | 65 + .../python.d.plugin/varnish/varnish.chart.py | 385 + collectors/python.d.plugin/varnish/varnish.conf | 66 + collectors/python.d.plugin/w1sensor/Makefile.inc | 13 + collectors/python.d.plugin/w1sensor/README.md | 28 + .../python.d.plugin/w1sensor/w1sensor.chart.py | 97 + collectors/python.d.plugin/w1sensor/w1sensor.conf | 72 + collectors/python.d.plugin/zscores/Makefile.inc | 12 + collectors/python.d.plugin/zscores/README.md | 144 + .../python.d.plugin/zscores/zscores.chart.py | 146 + collectors/python.d.plugin/zscores/zscores.conf | 108 + collectors/slabinfo.plugin/Makefile.am | 14 + collectors/slabinfo.plugin/README.md | 29 + collectors/slabinfo.plugin/slabinfo.c | 393 + collectors/statsd.plugin/Makefile.am | 23 + collectors/statsd.plugin/README.md | 699 + collectors/statsd.plugin/asterisk.conf | 208 + collectors/statsd.plugin/asterisk.md | 61 + collectors/statsd.plugin/example.conf | 64 + collectors/statsd.plugin/k6.conf | 110 + collectors/statsd.plugin/k6.md | 76 + collectors/statsd.plugin/statsd.c | 2844 + collectors/tc.plugin/Makefile.am | 20 + collectors/tc.plugin/README.md | 205 + collectors/tc.plugin/plugin_tc.c | 1182 + collectors/tc.plugin/tc-qos-helper.sh.in | 297 + collectors/timex.plugin/Makefile.am | 8 + collectors/timex.plugin/README.md | 31 + collectors/timex.plugin/plugin_timex.c | 176 + collectors/xenstat.plugin/Makefile.am | 8 + collectors/xenstat.plugin/README.md | 53 + collectors/xenstat.plugin/xenstat_plugin.c | 1071 + config.cmake.h.in | 56 + configs.signatures | 764 + configure.ac | 1809 + contrib/README.md | 61 + contrib/debian/changelog | 6 + contrib/debian/compat | 1 + contrib/debian/control | 66 + contrib/debian/control.xenial | 61 + contrib/debian/copyright | 10 + contrib/debian/install_go.sh | 98 + contrib/debian/netdata.default | 3 + contrib/debian/netdata.docs | 1 + contrib/debian/netdata.init | 56 + contrib/debian/netdata.install | 1 + contrib/debian/netdata.lintian-overrides | 16 + contrib/debian/netdata.postinst | 85 + contrib/debian/netdata.postrm | 53 + contrib/debian/netdata.preinst | 18 + contrib/debian/rules | 137 + contrib/debian/source/format | 1 + contrib/rhel/build-netdata-rpm.sh | 56 + coverity-scan.sh | 208 + cppcheck.sh | 35 + daemon/Makefile.am | 23 + daemon/README.md | 520 + daemon/analytics.c | 1034 + daemon/analytics.h | 87 + daemon/anonymous-statistics.sh.in | 177 + daemon/buildinfo.c | 470 + daemon/buildinfo.h | 14 + daemon/commands.c | 736 + daemon/commands.h | 82 + daemon/common.c | 21 + daemon/common.h | 108 + daemon/config/README.md | 230 + daemon/daemon.c | 502 + daemon/daemon.h | 18 + daemon/get-kubernetes-labels.sh.in | 59 + daemon/global_statistics.c | 2894 + daemon/global_statistics.h | 30 + daemon/main.c | 1617 + daemon/main.h | 30 + daemon/service.c | 297 + daemon/signals.c | 285 + daemon/signals.h | 13 + daemon/static_threads.c | 190 + daemon/static_threads.h | 52 + daemon/static_threads_freebsd.c | 31 + daemon/static_threads_linux.c | 92 + daemon/static_threads_macos.c | 33 + daemon/system-info.sh | 511 + daemon/unit_test.c | 2600 + daemon/unit_test.h | 26 + database/KolmogorovSmirnovDist.c | 788 + database/KolmogorovSmirnovDist.h | 91 + database/Makefile.am | 14 + database/README.md | 147 + database/engine/Makefile.am | 11 + database/engine/README.md | 302 + database/engine/datafile.c | 460 + database/engine/datafile.h | 67 + database/engine/journalfile.c | 587 + database/engine/journalfile.h | 49 + database/engine/metadata_log/README.md | 0 database/engine/pagecache.c | 1313 + database/engine/pagecache.h | 254 + database/engine/rrddiskprotocol.h | 120 + database/engine/rrdengine.c | 1508 + database/engine/rrdengine.h | 283 + database/engine/rrdengineapi.c | 1272 + database/engine/rrdengineapi.h | 144 + database/engine/rrdenginelib.c | 311 + database/engine/rrdenginelib.h | 102 + database/engine/rrdenglocking.c | 241 + database/engine/rrdenglocking.h | 17 + database/ram/Makefile.am | 11 + database/ram/README.md | 7 + database/ram/rrddim_mem.c | 277 + database/ram/rrddim_mem.h | 49 + database/rrd.c | 168 + database/rrd.h | 1413 + database/rrdcalc.c | 800 + database/rrdcalc.h | 258 + database/rrdcalctemplate.c | 241 + database/rrdcalctemplate.h | 128 + database/rrdcontext.c | 3937 + database/rrdcontext.h | 250 + database/rrddim.c | 756 + database/rrddimvar.c | 272 + database/rrddimvar.h | 21 + database/rrdfamily.c | 68 + database/rrdfunctions.c | 758 + database/rrdfunctions.h | 35 + database/rrdhost.c | 1640 + database/rrdlabels.c | 1183 + database/rrdset.c | 2173 + database/rrdsetvar.c | 293 + database/rrdsetvar.h | 30 + database/rrdvar.c | 317 + database/rrdvar.h | 71 + database/sqlite/Makefile.am | 4 + database/sqlite/sqlite3.c | 239479 ++++++++++++++++++ database/sqlite/sqlite3.h | 12806 + database/sqlite/sqlite_aclk.c | 804 + database/sqlite/sqlite_aclk.h | 175 + database/sqlite/sqlite_aclk_alert.c | 1079 + database/sqlite/sqlite_aclk_alert.h | 31 + database/sqlite/sqlite_aclk_node.c | 132 + database/sqlite/sqlite_aclk_node.h | 8 + database/sqlite/sqlite_context.c | 569 + database/sqlite/sqlite_context.h | 70 + database/sqlite/sqlite_db_migration.c | 255 + database/sqlite/sqlite_db_migration.h | 12 + database/sqlite/sqlite_functions.c | 1379 + database/sqlite/sqlite_functions.h | 86 + database/sqlite/sqlite_health.c | 1125 + database/sqlite/sqlite_health.h | 17 + database/sqlite/sqlite_metadata.c | 1580 + database/sqlite/sqlite_metadata.h | 28 + database/storage_engine.c | 152 + database/storage_engine.h | 12 + diagrams/Makefile.am | 24 + diagrams/build.sh | 21 + diagrams/config.puml | 46 + diagrams/data_structures/README.md | 18 + diagrams/data_structures/netdata_config.svg | 2 + diagrams/data_structures/registry.svg | 2 + diagrams/data_structures/rrd.svg | 2 + diagrams/data_structures/src/netdata_config.xml | 1 + diagrams/data_structures/src/registry.xml | 1 + diagrams/data_structures/src/rrd.xml | 1 + diagrams/data_structures/src/web.xml | 1 + diagrams/data_structures/web.svg | 2 + diagrams/netdata-for-ephemeral-nodes.xml | 1 + diagrams/netdata-overview.xml | 1 + diagrams/netdata-proxies-example.xml | 1 + diagrams/registry.puml | 40 + .../.page-level/_collector-page-template.mdx | 68 + .../.page-level/_concept-page-template.md | 30 + docs/.templates/.page-level/_task-page-template.md | 41 + .../.page-level/_tutorial-page-template.mdx | 54 + docs/Add-more-charts-to-netdata.md | 13 + docs/Demo-Sites.md | 24 + docs/Donations-netdata-has-received.md | 29 + docs/README.md | 17 + docs/Running-behind-apache.md | 371 + docs/Running-behind-caddy.md | 34 + docs/Running-behind-h2o.md | 183 + docs/Running-behind-haproxy.md | 293 + docs/Running-behind-lighttpd.md | 71 + docs/Running-behind-nginx.md | 270 + docs/a-github-star-is-important.md | 24 + docs/agent-cloud.md | 77 + docs/anonymous-statistics.md | 105 + docs/collect/application-metrics.md | 81 + docs/collect/container-metrics.md | 99 + docs/collect/enable-configure.md | 68 + docs/collect/how-collectors-work.md | 78 + docs/collect/system-metrics.md | 60 + docs/configure/common-changes.md | 214 + docs/configure/nodes.md | 170 + docs/configure/secure-nodes.md | 123 + docs/configure/start-stop-restart.md | 98 + docs/contributing/contributing-documentation.md | 109 + docs/contributing/style-guide.md | 492 + docs/dashboard/customize.mdx | 93 + docs/dashboard/dimensions-contexts-families.mdx | 96 + docs/dashboard/how-dashboard-works.mdx | 112 + docs/dashboard/import-export-print-snapshot.mdx | 83 + docs/dashboard/interact-charts.mdx | 197 + docs/dashboard/reference-web-server.mdx | 278 + .../visualization-date-and-time-controls.mdx | 121 + docs/export/enable-connector.md | 95 + docs/export/external-databases.md | 89 + docs/get-started.mdx | 107 + docs/guides/collect-apache-nginx-web-logs.md | 127 + docs/guides/collect-unbound-metrics.md | 140 + docs/guides/configure/performance.md | 277 + docs/guides/deploy/ansible.md | 176 + .../export/export-netdata-metrics-graphite.md | 183 + docs/guides/longer-metrics-storage.md | 158 + docs/guides/monitor-cockroachdb.md | 136 + docs/guides/monitor-hadoop-cluster.md | 203 + docs/guides/monitor/anomaly-detection-python.md | 189 + docs/guides/monitor/anomaly-detection.md | 75 + docs/guides/monitor/dimension-templates.md | 176 + docs/guides/monitor/kubernetes-k8s-netdata.md | 254 + docs/guides/monitor/lamp-stack.md | 246 + docs/guides/monitor/pi-hole-raspberry-pi.md | 162 + docs/guides/monitor/process.md | 301 + .../monitor/raspberry-pi-anomaly-detection.md | 125 + docs/guides/monitor/statsd.md | 298 + docs/guides/monitor/stop-notifications-alarms.md | 92 + docs/guides/monitor/visualize-monitor-anomalies.md | 142 + docs/guides/python-collector.md | 486 + docs/guides/step-by-step/step-00.md | 120 + docs/guides/step-by-step/step-01.md | 156 + docs/guides/step-by-step/step-02.md | 208 + docs/guides/step-by-step/step-03.md | 91 + docs/guides/step-by-step/step-04.md | 144 + docs/guides/step-by-step/step-05.md | 350 + docs/guides/step-by-step/step-06.md | 122 + docs/guides/step-by-step/step-07.md | 114 + docs/guides/step-by-step/step-08.md | 395 + docs/guides/step-by-step/step-09.md | 162 + docs/guides/step-by-step/step-10.md | 232 + docs/guides/step-by-step/step-99.md | 51 + .../monitor-debug-applications-ebpf.md | 270 + .../troubleshooting-agent-with-cloud-connection.md | 117 + docs/guides/using-host-labels.md | 212 + .../enable-streaming.mdx | 151 + .../how-streaming-works.mdx | 92 + .../reference-streaming.mdx | 486 + docs/monitor/configure-alarms.md | 148 + docs/monitor/enable-notifications.md | 147 + docs/monitor/view-active-alarms.md | 75 + docs/netdata-for-IoT.md | 74 + docs/netdata-security.md | 229 + docs/overview/netdata-monitoring-stack.md | 62 + docs/overview/what-is-netdata.md | 74 + docs/overview/why-netdata.md | 63 + docs/quickstart/infrastructure.md | 185 + docs/quickstart/single-node.md | 92 + docs/store/change-metrics-storage.md | 98 + docs/store/distributed-data-architecture.md | 88 + docs/visualize/create-dashboards.md | 64 + docs/visualize/interact-dashboards-charts.md | 131 + docs/visualize/overview-infrastructure.md | 111 + docs/why-netdata/1s-granularity.md | 59 + docs/why-netdata/README.md | 35 + docs/why-netdata/immediate-results.md | 46 + docs/why-netdata/meaningful-presentation.md | 68 + docs/why-netdata/unlimited-metrics.md | 49 + exporting/Makefile.am | 29 + exporting/README.md | 324 + exporting/TIMESCALE.md | 69 + exporting/WALKTHROUGH.md | 266 + exporting/aws_kinesis/Makefile.am | 8 + exporting/aws_kinesis/README.md | 58 + exporting/aws_kinesis/aws_kinesis.c | 218 + exporting/aws_kinesis/aws_kinesis.h | 16 + exporting/aws_kinesis/aws_kinesis_put_record.cc | 151 + exporting/aws_kinesis/aws_kinesis_put_record.h | 35 + exporting/check_filters.c | 88 + exporting/clean_connectors.c | 85 + exporting/exporting.conf | 95 + exporting/exporting_engine.c | 217 + exporting/exporting_engine.h | 324 + exporting/graphite/Makefile.am | 4 + exporting/graphite/README.md | 35 + exporting/graphite/graphite.c | 220 + exporting/graphite/graphite.h | 18 + exporting/init_connectors.c | 218 + exporting/json/Makefile.am | 4 + exporting/json/README.md | 35 + exporting/json/json.c | 350 + exporting/json/json.h | 21 + exporting/mongodb/Makefile.am | 8 + exporting/mongodb/README.md | 38 + exporting/mongodb/mongodb.c | 390 + exporting/mongodb/mongodb.h | 35 + exporting/nc-exporting.sh | 158 + exporting/opentsdb/Makefile.am | 8 + exporting/opentsdb/README.md | 35 + exporting/opentsdb/opentsdb.c | 401 + exporting/opentsdb/opentsdb.h | 26 + exporting/process_data.c | 445 + exporting/prometheus/Makefile.am | 12 + exporting/prometheus/README.md | 461 + exporting/prometheus/prometheus.c | 902 + exporting/prometheus/prometheus.h | 41 + exporting/prometheus/remote_write/Makefile.am | 14 + exporting/prometheus/remote_write/README.md | 58 + exporting/prometheus/remote_write/remote_write.c | 406 + exporting/prometheus/remote_write/remote_write.h | 32 + .../prometheus/remote_write/remote_write.proto | 29 + .../remote_write/remote_write_request.cc | 258 + .../prometheus/remote_write/remote_write_request.h | 42 + exporting/pubsub/Makefile.am | 8 + exporting/pubsub/README.md | 38 + exporting/pubsub/pubsub.c | 195 + exporting/pubsub/pubsub.h | 14 + exporting/pubsub/pubsub_publish.cc | 258 + exporting/pubsub/pubsub_publish.h | 37 + exporting/read_config.c | 511 + exporting/send_data.c | 445 + exporting/send_internal_metrics.c | 156 + exporting/tests/Makefile.am | 4 + exporting/tests/exporting_doubles.c | 405 + exporting/tests/exporting_fixtures.c | 184 + exporting/tests/netdata_doubles.c | 255 + exporting/tests/system_doubles.c | 61 + exporting/tests/test_exporting_engine.c | 2068 + exporting/tests/test_exporting_engine.h | 212 + health/Makefile.am | 103 + health/QUICKSTART.md | 143 + health/README.md | 38 + health/REFERENCE.md | 1023 + health/health.c | 1581 + health/health.d/adaptec_raid.conf | 30 + health/health.d/anomalies.conf | 23 + health/health.d/apcupsd.conf | 49 + health/health.d/bcache.conf | 30 + health/health.d/beanstalkd.conf | 41 + health/health.d/bind_rndc.conf | 12 + health/health.d/boinc.conf | 74 + health/health.d/btrfs.conf | 68 + health/health.d/ceph.conf | 15 + health/health.d/cgroups.conf | 141 + health/health.d/cockroachdb.conf | 73 + health/health.d/cpu.conf | 67 + health/health.d/dbengine.conf | 64 + health/health.d/disks.conf | 173 + health/health.d/dns_query.conf | 14 + health/health.d/dnsmasq_dhcp.conf | 15 + health/health.d/dockerd.conf | 11 + health/health.d/entropy.conf | 19 + health/health.d/exporting.conf | 29 + health/health.d/fping.conf | 64 + health/health.d/gearman.conf | 14 + health/health.d/geth.conf | 12 + health/health.d/go.d.plugin.conf | 17 + health/health.d/haproxy.conf | 23 + health/health.d/hdfs.conf | 76 + health/health.d/httpcheck.conf | 112 + health/health.d/ioping.conf | 16 + health/health.d/ipc.conf | 34 + health/health.d/ipfs.conf | 14 + health/health.d/ipmi.conf | 26 + health/health.d/isc_dhcpd.conf | 10 + health/health.d/kubelet.conf | 145 + health/health.d/linux_power_supply.conf | 15 + health/health.d/load.conf | 66 + health/health.d/mdstat.conf | 52 + health/health.d/megacli.conf | 71 + health/health.d/memcached.conf | 48 + health/health.d/memory.conf | 47 + health/health.d/ml.conf | 53 + health/health.d/mysql.conf | 176 + health/health.d/net.conf | 256 + health/health.d/netfilter.conf | 19 + health/health.d/nut.conf | 47 + health/health.d/nvme.conf | 15 + health/health.d/pihole.conf | 32 + health/health.d/ping.conf | 50 + health/health.d/portcheck.conf | 44 + health/health.d/postgres.conf | 214 + health/health.d/processes.conf | 16 + health/health.d/python.d.plugin.conf | 17 + health/health.d/qos.conf | 18 + health/health.d/ram.conf | 80 + health/health.d/redis.conf | 57 + health/health.d/retroshare.conf | 16 + health/health.d/riakkv.conf | 93 + health/health.d/scaleio.conf | 31 + health/health.d/softnet.conf | 54 + health/health.d/swap.conf | 35 + health/health.d/synchronization.conf | 12 + health/health.d/systemdunits.conf | 141 + health/health.d/tcp_conn.conf | 22 + health/health.d/tcp_listen.conf | 96 + health/health.d/tcp_mem.conf | 23 + health/health.d/tcp_orphans.conf | 24 + health/health.d/tcp_resets.conf | 69 + health/health.d/timex.conf | 17 + health/health.d/udp_errors.conf | 38 + health/health.d/unbound.conf | 28 + health/health.d/vcsa.conf | 141 + health/health.d/vernemq.conf | 365 + health/health.d/vsphere.conf | 174 + health/health.d/web_log.conf | 210 + health/health.d/whoisquery.conf | 13 + health/health.d/wmi.conf | 139 + health/health.d/x509check.conf | 24 + health/health.d/zfs.conf | 41 + health/health.h | 103 + health/health_config.c | 1182 + health/health_json.c | 439 + health/health_log.c | 583 + health/notifications/Makefile.am | 53 + health/notifications/README.md | 86 + health/notifications/alarm-email.sh | 7 + health/notifications/alarm-notify.sh.in | 3594 + health/notifications/alarm-test.sh | 12 + health/notifications/alerta/Makefile.inc | 12 + health/notifications/alerta/README.md | 81 + health/notifications/awssns/Makefile.inc | 12 + health/notifications/awssns/README.md | 53 + health/notifications/custom/Makefile.inc | 12 + health/notifications/custom/README.md | 92 + health/notifications/discord/Makefile.inc | 12 + health/notifications/discord/README.md | 50 + health/notifications/dynatrace/Makefile.inc | 12 + health/notifications/dynatrace/README.md | 34 + health/notifications/email/Makefile.inc | 12 + health/notifications/email/README.md | 77 + health/notifications/flock/Makefile.inc | 12 + health/notifications/flock/README.md | 37 + health/notifications/gotify/Makefile.inc | 11 + health/notifications/gotify/README.md | 62 + health/notifications/hangouts/Makefile.inc | 12 + health/notifications/hangouts/README.md | 55 + health/notifications/health_alarm_notify.conf | 1286 + health/notifications/health_email_recipients.conf | 2 + health/notifications/irc/Makefile.inc | 12 + health/notifications/irc/README.md | 78 + health/notifications/kavenegar/Makefile.inc | 12 + health/notifications/kavenegar/README.md | 46 + health/notifications/matrix/Makefile.inc | 12 + health/notifications/matrix/README.md | 58 + health/notifications/messagebird/Makefile.inc | 12 + health/notifications/messagebird/README.md | 45 + health/notifications/msteams/Makefile.inc | 12 + health/notifications/msteams/README.md | 43 + health/notifications/opsgenie/Makefile.inc | 12 + health/notifications/opsgenie/README.md | 62 + health/notifications/pagerduty/Makefile.inc | 12 + health/notifications/pagerduty/README.md | 63 + health/notifications/prowl/Makefile.inc | 12 + health/notifications/prowl/README.md | 27 + health/notifications/pushbullet/Makefile.inc | 12 + health/notifications/pushbullet/README.md | 50 + health/notifications/pushover/Makefile.inc | 12 + health/notifications/pushover/README.md | 23 + health/notifications/rocketchat/Makefile.inc | 12 + health/notifications/rocketchat/README.md | 52 + health/notifications/slack/Makefile.inc | 12 + health/notifications/slack/README.md | 50 + health/notifications/smstools3/Makefile.inc | 12 + health/notifications/smstools3/README.md | 44 + health/notifications/stackpulse/Makefile.inc | 12 + health/notifications/stackpulse/README.md | 81 + health/notifications/syslog/Makefile.inc | 12 + health/notifications/syslog/README.md | 34 + health/notifications/telegram/Makefile.inc | 12 + health/notifications/telegram/README.md | 45 + health/notifications/twilio/Makefile.inc | 12 + health/notifications/twilio/README.md | 47 + health/notifications/web/Makefile.inc | 12 + health/notifications/web/README.md | 13 + libnetdata/Makefile.am | 37 + libnetdata/README.md | 10 + libnetdata/adaptive_resortable_list/Makefile.am | 8 + libnetdata/adaptive_resortable_list/README.md | 99 + .../adaptive_resortable_list.c | 280 + .../adaptive_resortable_list.h | 138 + libnetdata/arrayalloc/Makefile.am | 8 + libnetdata/arrayalloc/README.md | 7 + libnetdata/arrayalloc/arrayalloc.c | 489 + libnetdata/arrayalloc/arrayalloc.h | 48 + libnetdata/avl/Makefile.am | 8 + libnetdata/avl/README.md | 17 + libnetdata/avl/avl.c | 420 + libnetdata/avl/avl.h | 92 + libnetdata/buffer/Makefile.am | 8 + libnetdata/buffer/README.md | 16 + libnetdata/buffer/buffer.c | 525 + libnetdata/buffer/buffer.h | 89 + libnetdata/circular_buffer/Makefile.am | 8 + libnetdata/circular_buffer/README.md | 10 + libnetdata/circular_buffer/circular_buffer.c | 96 + libnetdata/circular_buffer/circular_buffer.h | 19 + libnetdata/clocks/Makefile.am | 8 + libnetdata/clocks/README.md | 5 + libnetdata/clocks/clocks.c | 432 + libnetdata/clocks/clocks.h | 160 + libnetdata/completion/Makefile.am | 4 + libnetdata/completion/completion.c | 34 + libnetdata/completion/completion.h | 22 + libnetdata/config/Makefile.am | 8 + libnetdata/config/README.md | 54 + libnetdata/config/appconfig.c | 960 + libnetdata/config/appconfig.h | 219 + libnetdata/dictionary/Makefile.am | 8 + libnetdata/dictionary/README.md | 231 + libnetdata/dictionary/dictionary.c | 3620 + libnetdata/dictionary/dictionary.h | 323 + libnetdata/ebpf/Makefile.am | 8 + libnetdata/ebpf/README.md | 5 + libnetdata/ebpf/ebpf.c | 1427 + libnetdata/ebpf/ebpf.h | 371 + libnetdata/eval/Makefile.am | 8 + libnetdata/eval/README.md | 1 + libnetdata/eval/eval.c | 1201 + libnetdata/eval/eval.h | 87 + libnetdata/health/Makefile.am | 7 + libnetdata/health/health.c | 173 + libnetdata/health/health.h | 55 + libnetdata/inlined.h | 269 + libnetdata/json/Makefile.am | 8 + libnetdata/json/README.md | 10 + libnetdata/json/jsmn.c | 328 + libnetdata/json/jsmn.h | 75 + libnetdata/json/json.c | 557 + libnetdata/json/json.h | 77 + libnetdata/libjudy/src/Judy.h | 622 + libnetdata/libjudy/src/JudyCommon/JudyMalloc.c | 87 + libnetdata/libjudy/src/JudyCommon/JudyPrivate.h | 1613 + libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h | 485 + .../libjudy/src/JudyCommon/JudyPrivateBranch.h | 788 + libnetdata/libjudy/src/JudyHS/JudyHS.c | 771 + libnetdata/libjudy/src/JudyL/JudyL.h | 505 + libnetdata/libjudy/src/JudyL/JudyLByCount.c | 954 + libnetdata/libjudy/src/JudyL/JudyLCascade.c | 1942 + libnetdata/libjudy/src/JudyL/JudyLCount.c | 1195 + libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c | 314 + libnetdata/libjudy/src/JudyL/JudyLDecascade.c | 1206 + libnetdata/libjudy/src/JudyL/JudyLDel.c | 2146 + libnetdata/libjudy/src/JudyL/JudyLFirst.c | 213 + libnetdata/libjudy/src/JudyL/JudyLFreeArray.c | 363 + libnetdata/libjudy/src/JudyL/JudyLGet.c | 1094 + libnetdata/libjudy/src/JudyL/JudyLIns.c | 1873 + libnetdata/libjudy/src/JudyL/JudyLInsArray.c | 1178 + libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c | 135 + libnetdata/libjudy/src/JudyL/JudyLMallocIF.c | 782 + libnetdata/libjudy/src/JudyL/JudyLMemActive.c | 259 + libnetdata/libjudy/src/JudyL/JudyLMemUsed.c | 61 + libnetdata/libjudy/src/JudyL/JudyLNext.c | 1890 + libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c | 1390 + libnetdata/libjudy/src/JudyL/JudyLPrev.c | 1890 + libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c | 1390 + libnetdata/libjudy/src/JudyL/JudyLTablesGen.c | 296 + libnetdata/libjudy/src/JudyL/j__udyLGet.c | 1094 + libnetdata/libnetdata.c | 1936 + libnetdata/libnetdata.h | 507 + libnetdata/locks/Makefile.am | 8 + libnetdata/locks/README.md | 100 + libnetdata/locks/locks.c | 757 + libnetdata/locks/locks.h | 127 + libnetdata/log/Makefile.am | 8 + libnetdata/log/README.md | 5 + libnetdata/log/log.c | 1084 + libnetdata/log/log.h | 141 + libnetdata/onewayalloc/Makefile.am | 8 + libnetdata/onewayalloc/README.md | 71 + libnetdata/onewayalloc/onewayalloc.c | 193 + libnetdata/onewayalloc/onewayalloc.h | 19 + libnetdata/os.c | 230 + libnetdata/os.h | 67 + libnetdata/popen/Makefile.am | 8 + libnetdata/popen/README.md | 5 + libnetdata/popen/popen.c | 460 + libnetdata/popen/popen.h | 40 + libnetdata/procfile/Makefile.am | 8 + libnetdata/procfile/README.md | 67 + libnetdata/procfile/procfile.c | 479 + libnetdata/procfile/procfile.h | 108 + libnetdata/required_dummies.h | 43 + libnetdata/simple_pattern/Makefile.am | 8 + libnetdata/simple_pattern/README.md | 43 + libnetdata/simple_pattern/simple_pattern.c | 363 + libnetdata/simple_pattern/simple_pattern.h | 42 + libnetdata/socket/Makefile.am | 8 + libnetdata/socket/README.md | 5 + libnetdata/socket/security.c | 390 + libnetdata/socket/security.h | 63 + libnetdata/socket/socket.c | 1914 + libnetdata/socket/socket.h | 215 + libnetdata/statistical/Makefile.am | 8 + libnetdata/statistical/README.md | 5 + libnetdata/statistical/statistical.c | 460 + libnetdata/statistical/statistical.h | 34 + libnetdata/storage_number/Makefile.am | 12 + libnetdata/storage_number/README.md | 17 + libnetdata/storage_number/storage_number.c | 231 + libnetdata/storage_number/storage_number.h | 232 + libnetdata/storage_number/tests/Makefile.am | 4 + .../storage_number/tests/test_storage_number.c | 52 + libnetdata/string/Makefile.am | 8 + libnetdata/string/README.md | 20 + libnetdata/string/string.c | 596 + libnetdata/string/string.h | 30 + libnetdata/string/utf8.h | 9 + libnetdata/tests/Makefile.am | 4 + libnetdata/tests/test_str2ld.c | 48 + libnetdata/threads/Makefile.am | 8 + libnetdata/threads/README.md | 5 + libnetdata/threads/threads.c | 267 + libnetdata/threads/threads.h | 49 + libnetdata/url/Makefile.am | 8 + libnetdata/url/README.md | 5 + libnetdata/url/url.c | 391 + libnetdata/url/url.h | 35 + libnetdata/worker_utilization/Makefile.am | 8 + libnetdata/worker_utilization/README.md | 90 + libnetdata/worker_utilization/worker_utilization.c | 362 + libnetdata/worker_utilization/worker_utilization.h | 47 + ml/ADCharts.cc | 233 + ml/ADCharts.h | 23 + ml/Config.cc | 114 + ml/Config.h | 51 + ml/Dimension.cc | 173 + ml/Dimension.h | 94 + ml/Host.cc | 255 + ml/Host.h | 100 + ml/KMeans.cc | 43 + ml/KMeans.h | 41 + ml/Query.h | 57 + ml/README.md | 310 + ml/SamplesBuffer.cc | 150 + ml/SamplesBuffer.h | 146 + ml/SamplesBufferTests.cc | 146 + ml/ml-dummy.c | 50 + ml/ml-private.h | 26 + ml/ml.cc | 165 + ml/ml.h | 50 + ml/notebooks/README.md | 5 + .../netdata_anomaly_detection_deepdive.ipynb | 1712 + netdata-installer.sh | 1812 + netdata.cppcheck | 17 + netdata.spec.in | 612 + packaging/PLATFORM_SUPPORT.md | 179 + packaging/build_package_install_test.sh | 37 + packaging/building-native-packages-locally.md | 106 + packaging/bundle-ebpf-co-re.sh | 9 + packaging/bundle-ebpf.sh | 19 + packaging/bundle-libbpf.sh | 27 + packaging/bundle-protobuf.sh | 16 + packaging/check-kernel-config.sh | 73 + packaging/current_libbpf.checksums | 1 + packaging/current_libbpf.version | 1 + packaging/docker/Dockerfile | 126 + packaging/docker/README.md | 536 + packaging/docker/gen-cflags.sh | 9 + packaging/docker/health.sh | 18 + packaging/docker/run.sh | 62 + packaging/ebpf-co-re.checksums | 1 + packaging/ebpf-co-re.version | 1 + packaging/ebpf.checksums | 3 + packaging/ebpf.version | 1 + packaging/go.d.checksums | 17 + packaging/go.d.version | 1 + packaging/installer/README.md | 218 + packaging/installer/REINSTALL.md | 72 + packaging/installer/UNINSTALL.md | 92 + packaging/installer/UPDATE.md | 174 + packaging/installer/dependencies/alpine.sh | 99 + packaging/installer/dependencies/arch.sh | 99 + packaging/installer/dependencies/centos.sh | 196 + packaging/installer/dependencies/clearlinux.sh | 87 + packaging/installer/dependencies/debian.sh | 104 + packaging/installer/dependencies/fedora.sh | 118 + packaging/installer/dependencies/freebsd.sh | 142 + packaging/installer/dependencies/gentoo.sh | 97 + packaging/installer/dependencies/macos.sh | 0 packaging/installer/dependencies/ol.sh | 156 + packaging/installer/dependencies/opensuse.sh | 102 + packaging/installer/dependencies/rhel.sh | 0 packaging/installer/dependencies/rockylinux.sh | 162 + packaging/installer/dependencies/sabayon.sh | 0 packaging/installer/dependencies/ubuntu.sh | 101 + packaging/installer/functions.sh | 1073 + packaging/installer/install-required-packages.sh | 2073 + packaging/installer/kickstart-ng.sh | 1 + packaging/installer/kickstart-static64.sh | 1 + packaging/installer/kickstart.sh | 2278 + packaging/installer/methods/cloud-providers.md | 126 + packaging/installer/methods/freebsd.md | 107 + packaging/installer/methods/freenas.md | 24 + packaging/installer/methods/kickstart.md | 172 + packaging/installer/methods/kubernetes.md | 197 + packaging/installer/methods/macos.md | 111 + packaging/installer/methods/manual.md | 240 + packaging/installer/methods/offline.md | 66 + packaging/installer/methods/pfsense.md | 83 + packaging/installer/methods/source.md | 236 + packaging/installer/methods/synology.md | 59 + packaging/installer/netdata-uninstaller.sh | 756 + packaging/installer/netdata-updater.sh | 953 + packaging/jsonc.checksums | 1 + packaging/jsonc.version | 1 + packaging/libbpf.checksums | 1 + packaging/libbpf.version | 1 + packaging/libbpf_0_0_9.checksums | 1 + packaging/libbpf_0_0_9.version | 1 + packaging/maintainers/README.md | 86 + packaging/makeself/README.md | 54 + packaging/makeself/build-static.sh | 58 + packaging/makeself/build-x86_64-static.sh | 7 + packaging/makeself/build.sh | 65 + packaging/makeself/functions.sh | 104 + packaging/makeself/install-alpine-packages.sh | 49 + packaging/makeself/install-or-update.sh | 266 + .../jobs/10-prepare-destination.install.sh | 23 + packaging/makeself/jobs/20-openssl.install.sh | 48 + packaging/makeself/jobs/50-bash-5.1.16.install.sh | 49 + packaging/makeself/jobs/50-curl-7.82.0.install.sh | 62 + packaging/makeself/jobs/50-fping-5.1.install.sh | 45 + packaging/makeself/jobs/50-ioping-1.2.install.sh | 32 + packaging/makeself/jobs/70-netdata-git.install.sh | 58 + .../makeself/jobs/90-netdata-runtime-check.sh | 54 + packaging/makeself/jobs/99-makeself.install.sh | 111 + packaging/makeself/makeself-header.sh | 556 + packaging/makeself/makeself-help-header.txt | 49 + packaging/makeself/makeself-license.txt | 44 + packaging/makeself/makeself.lsm | 16 + packaging/makeself/makeself.sh | 621 + packaging/makeself/openssl.version | 1 + packaging/makeself/post-installer.sh | 11 + packaging/makeself/run-all-jobs.sh | 42 + packaging/makeself/uname2platform.sh | 18 + packaging/protobuf.checksums | 1 + packaging/protobuf.version | 1 + packaging/repoconfig/Makefile | 31 + packaging/repoconfig/build-deb.sh | 49 + packaging/repoconfig/build-rpm.sh | 26 + packaging/repoconfig/debian/changelog | 12 + packaging/repoconfig/debian/compat | 1 + packaging/repoconfig/debian/control | 19 + packaging/repoconfig/debian/copyright | 10 + packaging/repoconfig/debian/rules | 21 + packaging/repoconfig/debian/source/format | 1 + packaging/repoconfig/netdata-edge.repo.centos | 21 + packaging/repoconfig/netdata-edge.repo.fedora | 21 + packaging/repoconfig/netdata-edge.repo.ol | 21 + packaging/repoconfig/netdata-edge.repo.suse | 19 + packaging/repoconfig/netdata-repo.spec | 102 + packaging/repoconfig/netdata.list.in | 2 + packaging/repoconfig/netdata.repo.centos | 21 + packaging/repoconfig/netdata.repo.fedora | 21 + packaging/repoconfig/netdata.repo.ol | 21 + packaging/repoconfig/netdata.repo.suse | 19 + packaging/version | 1 + parser/Makefile.am | 9 + parser/README.md | 147 + parser/parser.c | 391 + parser/parser.h | 123 + registry/Makefile.am | 8 + registry/README.md | 214 + registry/registry.c | 449 + registry/registry.h | 83 + registry/registry_db.c | 347 + registry/registry_init.c | 147 + registry/registry_internals.c | 332 + registry/registry_internals.h | 90 + registry/registry_log.c | 136 + registry/registry_machine.c | 109 + registry/registry_machine.h | 43 + registry/registry_person.c | 254 + registry/registry_person.h | 61 + registry/registry_url.c | 88 + registry/registry_url.h | 35 + spawn/Makefile.am | 9 + spawn/README.md | 0 spawn/spawn.c | 289 + spawn/spawn.h | 109 + spawn/spawn_client.c | 248 + spawn/spawn_server.c | 389 + streaming/Makefile.am | 12 + streaming/README.md | 734 + streaming/compression.c | 322 + streaming/receiver.c | 809 + streaming/replication.c | 1446 + streaming/replication.h | 33 + streaming/rrdpush.c | 1044 + streaming/rrdpush.h | 305 + streaming/sender.c | 1358 + streaming/stream.conf | 246 + system/.install-type | 1 + system/Makefile.am | 71 + system/edit-config.in | 83 + system/install-service.sh.in | 786 + system/netdata-freebsd.in | 53 + system/netdata-init-d.in | 95 + system/netdata-lsb.in | 116 + system/netdata-openrc.in | 88 + system/netdata-updater.service.in | 8 + system/netdata-updater.timer | 12 + system/netdata.conf | 19 + system/netdata.crontab.in | 1 + system/netdata.logrotate.in | 12 + system/netdata.plist.in | 15 + system/netdata.service.in | 76 + system/netdata.service.v235.in | 42 + tests/Makefile.am | 35 + tests/acls/acl.sh.in | 126 + tests/acls/netdata.cfg | 20 + tests/acls/netdata.ssl.cfg | 24 + tests/alarm_repetition/alarm.sh.in | 89 + .../alarm_repetition/netdata.conf_with_repetition | 55 + .../netdata.conf_without_repetition | 55 + tests/alarm_repetition/ram_with_repetition.conf | 64 + tests/alarm_repetition/ram_without_repetition.conf | 63 + tests/api/fuzzer.py | 378 + tests/ebpf/README.md | 1 + tests/ebpf/sync_tester.c | 120 + tests/exporting/prometheus-avg-oldunits.txt | 148 + tests/exporting/prometheus-avg.txt | 148 + tests/exporting/prometheus-raw.txt | 156 + tests/exporting/prometheus.bats | 31 + tests/health_mgmtapi/README.md | 15 + .../expected_list/ALARM_CPU_IOWAIT-list.json | 1 + .../expected_list/ALARM_CPU_USAGE-list.json | 1 + .../expected_list/CONTEXT_SYSTEM_CPU-list.json | 1 + .../health_mgmtapi/expected_list/DISABLE-list.json | 1 + .../expected_list/DISABLE_ALL-list.json | 1 + .../expected_list/DISABLE_ALL_ERROR-list.json | 1 + .../expected_list/DISABLE_SYSTEM_LOAD-list.json | 1 + .../expected_list/FAMILIES_LOAD-list.json | 1 + tests/health_mgmtapi/expected_list/HOSTS-list.json | 1 + tests/health_mgmtapi/expected_list/RESET-list.json | 1 + .../health_mgmtapi/expected_list/SILENCE-list.json | 1 + .../expected_list/SILENCE_2-list.json | 1 + .../expected_list/SILENCE_3-list.json | 1 + .../SILENCE_ALARM_CPU_USAGE-list.json | 1 + .../SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json | 1 + .../expected_list/SILENCE_ALL-list.json | 1 + tests/health_mgmtapi/health-cmdapi-test.sh.in | 226 + tests/installer/checksums.sh | 53 + tests/installer/slack.sh | 65 + tests/k6/data.js | 67 + tests/lifecycle.bats | 60 + tests/profile/Makefile | 51 + tests/profile/benchmark-dictionary.c | 130 + tests/profile/benchmark-line-parsing.c | 702 + tests/profile/benchmark-procfile-parser.c | 329 + tests/profile/benchmark-registry.c | 227 + tests/profile/benchmark-value-pairs.c | 623 + tests/profile/statsd-stress.c | 151 + tests/profile/test-eval.c | 289 + tests/run-unit-tests.sh | 39 + tests/stress.sh | 83 + .../system_cpu.conf.alarm_foreach | 8 + .../system_cpu.conf.alarm_foreach_sp | 8 + .../system_cpu.conf.template_alarm | 26 + .../system_cpu.conf.template_foreach | 8 + .../system_cpu.conf.template_foreach_sp | 8 + .../system_cpu.conf.unique_alarm | 26 + tests/template_dimension/template_dim.sh.in | 88 + tests/updater_checks.bats | 65 + tests/updater_checks.sh | 71 + tests/urls/request.sh.in | 307 + web/Makefile.am | 22 + web/README.md | 234 + web/api/Makefile.am | 21 + web/api/README.md | 12 + web/api/badges/Makefile.am | 8 + web/api/badges/README.md | 365 + web/api/badges/web_buffer_svg.c | 1156 + web/api/badges/web_buffer_svg.h | 18 + web/api/exporters/Makefile.am | 13 + web/api/exporters/README.md | 10 + web/api/exporters/allmetrics.c | 133 + web/api/exporters/allmetrics.h | 12 + web/api/exporters/prometheus/Makefile.am | 8 + web/api/exporters/prometheus/README.md | 10 + web/api/exporters/shell/Makefile.am | 8 + web/api/exporters/shell/README.md | 69 + web/api/exporters/shell/allmetrics_shell.c | 169 + web/api/exporters/shell/allmetrics_shell.h | 21 + web/api/formatters/Makefile.am | 15 + web/api/formatters/README.md | 78 + web/api/formatters/charts2json.c | 191 + web/api/formatters/charts2json.h | 12 + web/api/formatters/csv/Makefile.am | 8 + web/api/formatters/csv/README.md | 144 + web/api/formatters/csv/csv.c | 142 + web/api/formatters/csv/csv.h | 12 + web/api/formatters/json/Makefile.am | 8 + web/api/formatters/json/README.md | 156 + web/api/formatters/json/json.c | 283 + web/api/formatters/json/json.h | 10 + web/api/formatters/json_wrapper.c | 423 + web/api/formatters/json_wrapper.h | 15 + web/api/formatters/rrd2json.c | 339 + web/api/formatters/rrd2json.h | 84 + web/api/formatters/rrdset2json.c | 155 + web/api/formatters/rrdset2json.h | 10 + web/api/formatters/ssv/Makefile.am | 8 + web/api/formatters/ssv/README.md | 59 + web/api/formatters/ssv/ssv.c | 45 + web/api/formatters/ssv/ssv.h | 10 + web/api/formatters/value/Makefile.am | 8 + web/api/formatters/value/README.md | 24 + web/api/formatters/value/value.c | 162 + web/api/formatters/value/value.h | 31 + web/api/health/Makefile.am | 8 + web/api/health/README.md | 225 + web/api/health/health_cmdapi.c | 206 + web/api/health/health_cmdapi.h | 31 + web/api/netdata-swagger.json | 3288 + web/api/netdata-swagger.yaml | 2614 + web/api/queries/Makefile.am | 23 + web/api/queries/README.md | 176 + web/api/queries/average/Makefile.am | 8 + web/api/queries/average/README.md | 46 + web/api/queries/average/average.c | 59 + web/api/queries/average/average.h | 15 + web/api/queries/countif/Makefile.am | 8 + web/api/queries/countif/README.md | 36 + web/api/queries/countif/countif.c | 136 + web/api/queries/countif/countif.h | 15 + web/api/queries/des/Makefile.am | 8 + web/api/queries/des/README.md | 73 + web/api/queries/des/des.c | 137 + web/api/queries/des/des.h | 17 + web/api/queries/incremental_sum/Makefile.am | 8 + web/api/queries/incremental_sum/README.md | 41 + web/api/queries/incremental_sum/incremental_sum.c | 66 + web/api/queries/incremental_sum/incremental_sum.h | 15 + web/api/queries/max/Makefile.am | 8 + web/api/queries/max/README.md | 38 + web/api/queries/max/max.c | 57 + web/api/queries/max/max.h | 15 + web/api/queries/median/Makefile.am | 8 + web/api/queries/median/README.md | 60 + web/api/queries/median/median.c | 140 + web/api/queries/median/median.h | 23 + web/api/queries/min/Makefile.am | 8 + web/api/queries/min/README.md | 38 + web/api/queries/min/min.c | 57 + web/api/queries/min/min.h | 15 + web/api/queries/percentile/Makefile.am | 8 + web/api/queries/percentile/README.md | 58 + web/api/queries/percentile/percentile.c | 169 + web/api/queries/percentile/percentile.h | 23 + web/api/queries/query.c | 2175 + web/api/queries/query.h | 59 + web/api/queries/rrdr.c | 101 + web/api/queries/rrdr.h | 152 + web/api/queries/ses/Makefile.am | 8 + web/api/queries/ses/README.md | 61 + web/api/queries/ses/ses.c | 90 + web/api/queries/ses/ses.h | 17 + web/api/queries/stddev/Makefile.am | 8 + web/api/queries/stddev/README.md | 93 + web/api/queries/stddev/stddev.c | 173 + web/api/queries/stddev/stddev.h | 18 + web/api/queries/sum/Makefile.am | 8 + web/api/queries/sum/README.md | 41 + web/api/queries/sum/sum.c | 55 + web/api/queries/sum/sum.h | 15 + web/api/queries/trimmed_mean/Makefile.am | 8 + web/api/queries/trimmed_mean/README.md | 56 + web/api/queries/trimmed_mean/trimmed_mean.c | 166 + web/api/queries/trimmed_mean/trimmed_mean.h | 22 + web/api/queries/weights.c | 1107 + web/api/queries/weights.h | 33 + web/api/tests/valid_urls.c | 789 + web/api/tests/web_api.c | 473 + web/api/web_api_v1.c | 1707 + web/api/web_api_v1.h | 41 + web/gui/.dashboard-notice.md | 7 + web/gui/.well-known/dnt/cookies | 14 + web/gui/Makefile.am | 99 + web/gui/README.md | 167 + web/gui/browserconfig.xml | 2 + web/gui/bundle_dashboard.py | 125 + web/gui/confluence/README.md | 1019 + web/gui/css/c3-0.4.18.min.css | 2 + web/gui/css/morris-0.5.1.css | 3 + web/gui/custom/README.md | 604 + web/gui/dashboard/Makefile.am | 206 + web/gui/dashboard/README.md | 1 + web/gui/dashboard/asset-manifest.json | 47 + web/gui/dashboard/console.html | 72 + web/gui/dashboard/css/bootstrap-3.3.7.css | 6760 + .../dashboard/css/bootstrap-slate-flat-3.3.7.css | 7101 + .../dashboard/css/bootstrap-slider-10.0.0.min.css | 22 + .../dashboard/css/bootstrap-theme-3.3.7.min.css | 7 + .../dashboard/css/bootstrap-toggle-2.2.2.min.css | 29 + web/gui/dashboard/css/dashboard.css | 785 + web/gui/dashboard/css/dashboard.slate.css | 803 + web/gui/dashboard/dash-example.html | 1020 + web/gui/dashboard/dashboard-react.js | 675 + web/gui/dashboard/dashboard.css | 785 + web/gui/dashboard/dashboard.html | 699 + web/gui/dashboard/dashboard.js | 10340 + web/gui/dashboard/dashboard.slate.css | 803 + web/gui/dashboard/demo.html | 51 + web/gui/dashboard/demo2.html | 143 + web/gui/dashboard/demosites.html | 1469 + web/gui/dashboard/demosites2.html | 1112 + web/gui/dashboard/favicon.ico | Bin 0 -> 1150 bytes .../fonts/glyphicons-halflings-regular.eot | Bin 0 -> 20127 bytes .../fonts/glyphicons-halflings-regular.svg | 289 + .../fonts/glyphicons-halflings-regular.ttf | Bin 0 -> 45404 bytes .../fonts/glyphicons-halflings-regular.woff | Bin 0 -> 23424 bytes .../fonts/glyphicons-halflings-regular.woff2 | Bin 0 -> 18028 bytes web/gui/dashboard/goto-host-from-alarm.html | 250 + web/gui/dashboard/images/alert-128-orange.png | Bin 0 -> 3477 bytes web/gui/dashboard/images/alert-128-red.png | Bin 0 -> 3743 bytes .../dashboard/images/alert-multi-size-orange.ico | Bin 0 -> 112374 bytes web/gui/dashboard/images/alert-multi-size-red.ico | Bin 0 -> 112458 bytes web/gui/dashboard/images/alerts.jpg | Bin 0 -> 388104 bytes web/gui/dashboard/images/alerts.png | Bin 0 -> 59943 bytes web/gui/dashboard/images/android-icon-144x144.png | Bin 0 -> 2721 bytes web/gui/dashboard/images/android-icon-192x192.png | Bin 0 -> 3923 bytes web/gui/dashboard/images/android-icon-36x36.png | Bin 0 -> 539 bytes web/gui/dashboard/images/android-icon-48x48.png | Bin 0 -> 762 bytes web/gui/dashboard/images/android-icon-72x72.png | Bin 0 -> 1153 bytes web/gui/dashboard/images/android-icon-96x96.png | Bin 0 -> 1747 bytes web/gui/dashboard/images/animated.gif | Bin 0 -> 389597 bytes web/gui/dashboard/images/apple-icon-114x114.png | Bin 0 -> 3651 bytes web/gui/dashboard/images/apple-icon-120x120.png | Bin 0 -> 2315 bytes web/gui/dashboard/images/apple-icon-144x144.png | Bin 0 -> 4001 bytes web/gui/dashboard/images/apple-icon-152x152.png | Bin 0 -> 5026 bytes web/gui/dashboard/images/apple-icon-180x180.png | Bin 0 -> 3645 bytes web/gui/dashboard/images/apple-icon-57x57.png | Bin 0 -> 1250 bytes web/gui/dashboard/images/apple-icon-60x60.png | Bin 0 -> 1052 bytes web/gui/dashboard/images/apple-icon-72x72.png | Bin 0 -> 1427 bytes web/gui/dashboard/images/apple-icon-76x76.png | Bin 0 -> 1711 bytes .../dashboard/images/apple-icon-precomposed.png | Bin 0 -> 3926 bytes web/gui/dashboard/images/apple-icon.png | Bin 0 -> 3926 bytes web/gui/dashboard/images/banner-icon-144x144.png | Bin 0 -> 2724 bytes .../dashboard/images/check-mark-2-128-green.png | Bin 0 -> 3771 bytes .../images/check-mark-2-multi-size-green.ico | Bin 0 -> 111893 bytes web/gui/dashboard/images/dashboards.png | Bin 0 -> 175464 bytes web/gui/dashboard/images/favicon-128.png | Bin 0 -> 2436 bytes web/gui/dashboard/images/favicon-16x16.png | Bin 0 -> 285 bytes web/gui/dashboard/images/favicon-196x196.png | Bin 0 -> 10025 bytes web/gui/dashboard/images/favicon-32x32.png | Bin 0 -> 454 bytes web/gui/dashboard/images/favicon-96x96.png | Bin 0 -> 1925 bytes web/gui/dashboard/images/favicon.ico | Bin 0 -> 1150 bytes web/gui/dashboard/images/home.png | Bin 0 -> 98888 bytes web/gui/dashboard/images/ms-icon-144x144.png | Bin 0 -> 4001 bytes web/gui/dashboard/images/ms-icon-150x150.png | Bin 0 -> 2867 bytes web/gui/dashboard/images/ms-icon-310x150.png | Bin 0 -> 3632 bytes web/gui/dashboard/images/ms-icon-310x310.png | Bin 0 -> 7215 bytes web/gui/dashboard/images/ms-icon-36x36.png | Bin 0 -> 536 bytes web/gui/dashboard/images/ms-icon-70x70.png | Bin 0 -> 2436 bytes web/gui/dashboard/images/netdata-logomark.svg | 8 + web/gui/dashboard/images/netdata.svg | 18 + web/gui/dashboard/images/nodeView.png | Bin 0 -> 645133 bytes web/gui/dashboard/images/nodes.jpg | Bin 0 -> 452421 bytes web/gui/dashboard/images/overview.png | Bin 0 -> 101515 bytes web/gui/dashboard/images/packaging-beta-tag.svg | 42 + web/gui/dashboard/images/post.png | Bin 0 -> 9043 bytes web/gui/dashboard/images/pricing.png | Bin 0 -> 148860 bytes web/gui/dashboard/images/seo-performance-128.png | Bin 0 -> 1828 bytes web/gui/dashboard/index-node-view.html | 30 + web/gui/dashboard/index.html | 16 + web/gui/dashboard/infographic.html | 171 + web/gui/dashboard/lib/bootstrap-3.3.7.min.js | 8 + .../dashboard/lib/bootstrap-slider-10.0.0.min.js | 6 + .../dashboard/lib/bootstrap-table-1.11.0.min.js | 9 + .../lib/bootstrap-table-export-1.11.0.min.js | 8 + .../dashboard/lib/bootstrap-toggle-2.2.2.min.js | 10 + .../dashboard/lib/clipboard-polyfill-be05dad.js | 9 + web/gui/dashboard/lib/d3-4.12.2.min.js | 3 + web/gui/dashboard/lib/d3pie-0.2.1-netdata-3.js | 2124 + web/gui/dashboard/lib/dygraph-c91c859.min.js | 7 + .../lib/dygraph-smooth-plotter-c91c859.js | 141 + web/gui/dashboard/lib/fontawesome-all-5.0.1.min.js | 6 + web/gui/dashboard/lib/gauge-1.3.2.min.js | 2 + web/gui/dashboard/lib/jquery-3.6.0.min.js | 2 + .../lib/jquery.easypiechart-97b5824.min.js | 10 + web/gui/dashboard/lib/jquery.peity-3.2.0.min.js | 14 + .../dashboard/lib/jquery.sparkline-2.1.2.min.js | 6 + web/gui/dashboard/lib/lz-string-1.4.4.min.js | 2 + web/gui/dashboard/lib/pako-1.0.6.min.js | 2 + .../dashboard/lib/perfect-scrollbar-0.6.15.min.js | 3 + web/gui/dashboard/lib/tableExport-1.6.0.min.js | 55 + web/gui/dashboard/manifest.json | 41 + ...he-manifest.5fec6109084644adf7bf854243e1a044.js | 190 + web/gui/dashboard/refresh-badges.js | 98 + web/gui/dashboard/robots.txt | 7 + web/gui/dashboard/service-worker.js | 39 + web/gui/dashboard/sitemap.xml | 9 + web/gui/dashboard/static/css/2.c454aab8.chunk.css | 15 + .../dashboard/static/css/2.c454aab8.chunk.css.map | 1 + web/gui/dashboard/static/css/4.a36e3b73.chunk.css | 42 + .../dashboard/static/css/4.a36e3b73.chunk.css.map | 1 + .../dashboard/static/css/main.53ba10f1.chunk.css | 2 + .../static/css/main.53ba10f1.chunk.css.map | 1 + web/gui/dashboard/static/js/10.a5cd7d0e.chunk.js | 2 + .../dashboard/static/js/10.a5cd7d0e.chunk.js.map | 1 + web/gui/dashboard/static/js/2.92ca8446.chunk.js | 3 + .../static/js/2.92ca8446.chunk.js.LICENSE | 278 + .../dashboard/static/js/2.92ca8446.chunk.js.map | 1 + web/gui/dashboard/static/js/3.f137faca.chunk.js | 2 + .../dashboard/static/js/3.f137faca.chunk.js.map | 1 + web/gui/dashboard/static/js/4.2dbcd906.chunk.js | 2 + .../dashboard/static/js/4.2dbcd906.chunk.js.map | 1 + web/gui/dashboard/static/js/5.2f783a54.chunk.js | 3 + .../static/js/5.2f783a54.chunk.js.LICENSE | 3 + .../dashboard/static/js/5.2f783a54.chunk.js.map | 1 + web/gui/dashboard/static/js/6.e1951239.chunk.js | 2 + .../dashboard/static/js/6.e1951239.chunk.js.map | 1 + web/gui/dashboard/static/js/7.c2417fb0.chunk.js | 2 + .../dashboard/static/js/7.c2417fb0.chunk.js.map | 1 + web/gui/dashboard/static/js/8.b4161ea2.chunk.js | 2 + .../dashboard/static/js/8.b4161ea2.chunk.js.map | 1 + web/gui/dashboard/static/js/9.a4363968.chunk.js | 2 + .../dashboard/static/js/9.a4363968.chunk.js.map | 1 + web/gui/dashboard/static/js/main.7d1bdca1.chunk.js | 3 + .../static/js/main.7d1bdca1.chunk.js.LICENSE | 8 + .../dashboard/static/js/main.7d1bdca1.chunk.js.map | 1 + .../dashboard/static/js/runtime-main.08abed8f.js | 2 + .../static/js/runtime-main.08abed8f.js.map | 1 + .../media/ibm-plex-sans-latin-100.245539db.woff2 | Bin 0 -> 18212 bytes .../media/ibm-plex-sans-latin-100.9a582f3a.woff | Bin 0 -> 23296 bytes .../ibm-plex-sans-latin-100italic.1ea7c5d2.woff | Bin 0 -> 25732 bytes .../ibm-plex-sans-latin-100italic.3c34cf08.woff2 | Bin 0 -> 20052 bytes .../media/ibm-plex-sans-latin-200.67524c36.woff | Bin 0 -> 24468 bytes .../media/ibm-plex-sans-latin-200.bf72c841.woff2 | Bin 0 -> 19240 bytes .../ibm-plex-sans-latin-200italic.52df2560.woff | Bin 0 -> 26196 bytes .../ibm-plex-sans-latin-200italic.bbc2d552.woff2 | Bin 0 -> 20536 bytes .../media/ibm-plex-sans-latin-300.10bb6a0a.woff | Bin 0 -> 24220 bytes .../media/ibm-plex-sans-latin-300.9e1c48af.woff2 | Bin 0 -> 19124 bytes .../ibm-plex-sans-latin-300italic.c76f2ab5.woff2 | Bin 0 -> 20468 bytes .../ibm-plex-sans-latin-300italic.d3566d5b.woff | Bin 0 -> 25876 bytes .../media/ibm-plex-sans-latin-400.263d6267.woff2 | Bin 0 -> 17784 bytes .../media/ibm-plex-sans-latin-400.a2c56f94.woff | Bin 0 -> 22620 bytes .../ibm-plex-sans-latin-400italic.272f8611.woff | Bin 0 -> 24664 bytes .../ibm-plex-sans-latin-400italic.89a93a1b.woff2 | Bin 0 -> 19364 bytes .../media/ibm-plex-sans-latin-500.0866c244.woff2 | Bin 0 -> 18640 bytes .../media/ibm-plex-sans-latin-500.f6d5c5d5.woff | Bin 0 -> 23792 bytes .../ibm-plex-sans-latin-500italic.ccd41bd1.woff | Bin 0 -> 25852 bytes .../ibm-plex-sans-latin-500italic.ffd12d59.woff2 | Bin 0 -> 20380 bytes .../media/ibm-plex-sans-latin-600.337b1651.woff | Bin 0 -> 24104 bytes .../media/ibm-plex-sans-latin-600.7852d4dc.woff2 | Bin 0 -> 18912 bytes .../ibm-plex-sans-latin-600italic.17e5379f.woff2 | Bin 0 -> 19940 bytes .../ibm-plex-sans-latin-600italic.6f4ba6aa.woff | Bin 0 -> 25428 bytes .../media/ibm-plex-sans-latin-700.b8809d61.woff | Bin 0 -> 22904 bytes .../media/ibm-plex-sans-latin-700.c9983d3d.woff2 | Bin 0 -> 17976 bytes .../ibm-plex-sans-latin-700italic.02954bee.woff2 | Bin 0 -> 19360 bytes .../ibm-plex-sans-latin-700italic.72e9af40.woff | Bin 0 -> 24584 bytes .../static/media/material-icons.0509ab09.woff2 | Bin 0 -> 60840 bytes web/gui/dashboard/tv-react.html | 287 + web/gui/dashboard/tv.html | 279 + web/gui/dashboard_info.js | 7686 + web/gui/dashboard_info_custom_example.js | 62 + web/gui/main.css | 764 + web/gui/main.js | 5162 + web/gui/old/index.html | 1321 + web/gui/src/dashboard.js/alarms.js | 422 + web/gui/src/dashboard.js/boot.js | 142 + web/gui/src/dashboard.js/chart-registry.js | 94 + web/gui/src/dashboard.js/charting.js | 492 + web/gui/src/dashboard.js/charting/_c3.js | 114 + web/gui/src/dashboard.js/charting/_morris.js | 81 + web/gui/src/dashboard.js/charting/_raphael.js | 48 + web/gui/src/dashboard.js/charting/d3.js | 43 + web/gui/src/dashboard.js/charting/d3pie.js | 341 + web/gui/src/dashboard.js/charting/dygraph.js | 1065 + .../src/dashboard.js/charting/easy-pie-chart.js | 281 + web/gui/src/dashboard.js/charting/gauge.js | 406 + web/gui/src/dashboard.js/charting/google-charts.js | 132 + web/gui/src/dashboard.js/charting/peity.js | 62 + web/gui/src/dashboard.js/charting/sparkline.js | 155 + web/gui/src/dashboard.js/charting/textonly.js | 18 + web/gui/src/dashboard.js/colors.js | 34 + web/gui/src/dashboard.js/common.js | 249 + web/gui/src/dashboard.js/compatibility.js | 31 + web/gui/src/dashboard.js/dependencies.js | 21 + web/gui/src/dashboard.js/epilogue.js.inc | 1 + web/gui/src/dashboard.js/error-handling.js | 52 + web/gui/src/dashboard.js/localstorage.js | 173 + web/gui/src/dashboard.js/main.js | 4279 + web/gui/src/dashboard.js/options.js | 225 + web/gui/src/dashboard.js/prologue.js.inc | 84 + web/gui/src/dashboard.js/registry.js | 311 + web/gui/src/dashboard.js/server-detection.js | 29 + web/gui/src/dashboard.js/themes.js | 92 + web/gui/src/dashboard.js/timeout.js | 100 + web/gui/src/dashboard.js/units-conversion.js | 528 + web/gui/src/dashboard.js/utils.js | 432 + web/gui/src/dashboard.js/xss.js | 84 + web/gui/static/img/netdata-logomark.svg | 3 + web/server/Makefile.am | 12 + web/server/README.md | 243 + web/server/static/Makefile.am | 11 + web/server/static/README.md | 17 + web/server/static/static-threaded.c | 550 + web/server/static/static-threaded.h | 10 + web/server/web_client.c | 1937 + web/server/web_client.h | 211 + web/server/web_client_cache.c | 269 + web/server/web_client_cache.h | 31 + web/server/web_server.c | 159 + web/server/web_server.h | 64 + 1908 files changed, 688525 insertions(+) create mode 100644 .clang-format create mode 100644 .codacy.yml create mode 100644 .codeclimate.yml create mode 100644 .csslintrc create mode 100644 .devcontainer/devcontainer.json create mode 120000 .dockerignore create mode 100644 .eslintignore create mode 100644 .eslintrc create mode 100644 .gitattributes create mode 100644 .github/CODEOWNERS create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/ISSUE_TEMPLATE/BUG_REPORT.yml create mode 100644 .github/ISSUE_TEMPLATE/FEAT_REQUEST.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/codeql/python-config.yml create mode 100644 .github/data/distros.yml create mode 100644 .github/dependabot.yml create mode 100644 .github/dockerfiles/Dockerfile.build_test create mode 100644 .github/dockerfiles/Dockerfile.clang create mode 100644 .github/labeler.yml create mode 100755 .github/scripts/build-artifacts.sh create mode 100755 .github/scripts/build-dist.sh create mode 100755 .github/scripts/build-static.sh create mode 100755 .github/scripts/bump-packaging-version.sh create mode 100755 .github/scripts/check-updater.sh create mode 100755 .github/scripts/ci-support-pkgs.sh create mode 100755 .github/scripts/docker-test.sh create mode 100644 .github/scripts/functions.sh create mode 100755 .github/scripts/gen-docker-tags.py create mode 100755 .github/scripts/get-static-cache-key.sh create mode 100755 .github/scripts/netdata-pkgcloud-cleanup.py create mode 100755 .github/scripts/package-upload.sh create mode 100755 .github/scripts/package_cloud_wrapper.sh create mode 100755 .github/scripts/pkg-test.sh create mode 100755 .github/scripts/prepare-release-base.sh create mode 100755 .github/scripts/run-updater-check.sh create mode 100755 .github/scripts/run_install_with_dist_file.sh create mode 100644 .github/stale.yml create mode 100644 .github/workflows/add-to-project.yml create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/checks.yml create mode 100644 .github/workflows/cloud_regression.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/coverity.yml create mode 100644 .github/workflows/dashboard-pr.yml create mode 100644 .github/workflows/docker.yml create mode 100644 .github/workflows/docs.yml create mode 100644 .github/workflows/labeler.yml create mode 100644 .github/workflows/packagecloud.yml create mode 100644 .github/workflows/packaging.yml create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/repoconfig-packages.yml create mode 100644 .github/workflows/review.yml create mode 100644 .github/workflows/tests.yml create mode 100644 .github/workflows/trigger-learn-update.yml create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 .lgtm.yml create mode 100644 .mlc_config.json create mode 100644 .remarkignore create mode 100644 .remarkrc.js create mode 100644 .squash.yml create mode 100644 .travis.yml create mode 100644 .travis/README.md create mode 100755 .travis/check_changelog_last_modification.sh create mode 100755 .travis/create_artifacts.sh create mode 100755 .travis/create_changelog.sh create mode 100644 .travis/current_build_status create mode 100755 .travis/draft_release.sh create mode 100644 .travis/gcs-credentials.json.enc create mode 100755 .travis/generate_changelog_and_tag_release.sh create mode 100755 .travis/generate_changelog_for_nightlies.sh create mode 100755 .travis/nightlies.sh create mode 100755 .travis/tagger.sh create mode 100755 .travis/trigger_artifact_build.sh create mode 100755 .travis/trigger_docker_build.sh create mode 100755 .travis/trigger_package_build.sh create mode 100644 .travis/utils.sh create mode 100644 .yamllint.yml create mode 100644 BREAKING_CHANGES.md create mode 100644 BUILD.md create mode 100644 CHANGELOG.md create mode 100644 CMakeLists.txt create mode 120000 Dockerfile create mode 100644 Dockerfile.test create mode 100644 HISTORICAL_CHANGELOG.md create mode 100644 LICENSE create mode 100644 Makefile.am create mode 100644 README.md create mode 100644 REDISTRIBUTED.md create mode 100644 aclk/README.md create mode 100644 aclk/aclk.c create mode 100644 aclk/aclk.h create mode 100644 aclk/aclk_alarm_api.c create mode 100644 aclk/aclk_alarm_api.h create mode 100644 aclk/aclk_capas.c create mode 100644 aclk/aclk_capas.h create mode 100644 aclk/aclk_contexts_api.c create mode 100644 aclk/aclk_contexts_api.h create mode 100644 aclk/aclk_otp.c create mode 100644 aclk/aclk_otp.h create mode 100644 aclk/aclk_proxy.c create mode 100644 aclk/aclk_proxy.h create mode 100644 aclk/aclk_query.c create mode 100644 aclk/aclk_query.h create mode 100644 aclk/aclk_query_queue.c create mode 100644 aclk/aclk_query_queue.h create mode 100644 aclk/aclk_rrdhost_state.h create mode 100644 aclk/aclk_rx_msgs.c create mode 100644 aclk/aclk_rx_msgs.h create mode 100644 aclk/aclk_stats.c create mode 100644 aclk/aclk_stats.h create mode 100644 aclk/aclk_tx_msgs.c create mode 100644 aclk/aclk_tx_msgs.h create mode 100644 aclk/aclk_util.c create mode 100644 aclk/aclk_util.h create mode 100644 aclk/helpers/mqtt_wss_pal.h create mode 100644 aclk/helpers/ringbuffer_pal.h create mode 100644 aclk/https_client.c create mode 100644 aclk/https_client.h create mode 100644 aclk/schema-wrappers/alarm_config.cc create mode 100644 aclk/schema-wrappers/alarm_config.h create mode 100644 aclk/schema-wrappers/alarm_stream.cc create mode 100644 aclk/schema-wrappers/alarm_stream.h create mode 100644 aclk/schema-wrappers/capability.cc create mode 100644 aclk/schema-wrappers/capability.h create mode 100644 aclk/schema-wrappers/connection.cc create mode 100644 aclk/schema-wrappers/connection.h create mode 100644 aclk/schema-wrappers/context.cc create mode 100644 aclk/schema-wrappers/context.h create mode 100644 aclk/schema-wrappers/context_stream.cc create mode 100644 aclk/schema-wrappers/context_stream.h create mode 100644 aclk/schema-wrappers/node_connection.cc create mode 100644 aclk/schema-wrappers/node_connection.h create mode 100644 aclk/schema-wrappers/node_creation.cc create mode 100644 aclk/schema-wrappers/node_creation.h create mode 100644 aclk/schema-wrappers/node_info.cc create mode 100644 aclk/schema-wrappers/node_info.h create mode 100644 aclk/schema-wrappers/proto_2_json.cc create mode 100644 aclk/schema-wrappers/proto_2_json.h create mode 100644 aclk/schema-wrappers/schema_wrapper_utils.cc create mode 100644 aclk/schema-wrappers/schema_wrapper_utils.h create mode 100644 aclk/schema-wrappers/schema_wrappers.h create mode 100755 build-artifacts.sh create mode 100644 build/m4/ax_c__generic.m4 create mode 100644 build/m4/ax_c_lto.m4 create mode 100644 build/m4/ax_c_mallinfo.m4 create mode 100644 build/m4/ax_c_mallopt.m4 create mode 100644 build/m4/ax_c_statement_expressions.m4 create mode 100644 build/m4/ax_check_compile_flag.m4 create mode 100644 build/m4/ax_check_enable_debug.m4 create mode 100644 build/m4/ax_gcc_func_attribute.m4 create mode 100644 build/m4/ax_pthread.m4 create mode 100644 build/m4/jemalloc.m4 create mode 100644 build/m4/tcmalloc.m4 create mode 100644 build/subst.inc create mode 100644 build_external/README.md create mode 100755 build_external/bin/clean-install.sh create mode 100755 build_external/bin/make-install.sh create mode 100644 build_external/clean-install-arch-debug.Dockerfile create mode 100644 build_external/clean-install-arch-extras.Dockerfile create mode 100644 build_external/clean-install-arch.Dockerfile create mode 100644 build_external/clean-install.Dockerfile create mode 100644 build_external/make-install.Dockerfile create mode 100644 build_external/scenarios/aclk-testing/agent-compose.yml create mode 100644 build_external/scenarios/aclk-testing/agent-valgrind-compose.yml create mode 100644 build_external/scenarios/aclk-testing/agent_netdata.conf create mode 100644 build_external/scenarios/aclk-testing/configureVerneMQ.Dockerfile create mode 100644 build_external/scenarios/aclk-testing/paho-compose.yml create mode 100644 build_external/scenarios/aclk-testing/paho-inspection.py create mode 100644 build_external/scenarios/aclk-testing/paho.Dockerfile create mode 100644 build_external/scenarios/aclk-testing/vernemq-compose.yml create mode 100644 build_external/scenarios/aclk-testing/vernemq.conf create mode 100644 build_external/scenarios/gaps_hi/child-compose.yml create mode 100644 build_external/scenarios/gaps_hi/child_guid create mode 100644 build_external/scenarios/gaps_hi/child_stream.conf create mode 100644 build_external/scenarios/gaps_hi/middle-compose.yml create mode 100644 build_external/scenarios/gaps_hi/middle_guid create mode 100644 build_external/scenarios/gaps_hi/middle_stream.conf create mode 100644 build_external/scenarios/gaps_hi/min.conf create mode 100644 build_external/scenarios/gaps_hi/parent-compose.yml create mode 100644 build_external/scenarios/gaps_hi/parent_guid create mode 100644 build_external/scenarios/gaps_hi/parent_stream.conf create mode 100644 build_external/scenarios/gaps_lo/child-compose.yml create mode 100644 build_external/scenarios/gaps_lo/child_guid create mode 100644 build_external/scenarios/gaps_lo/child_stream.conf create mode 100644 build_external/scenarios/gaps_lo/middle-compose.yml create mode 100644 build_external/scenarios/gaps_lo/middle_guid create mode 100644 build_external/scenarios/gaps_lo/middle_stream.conf create mode 100644 build_external/scenarios/gaps_lo/mostly_off.conf create mode 100644 build_external/scenarios/gaps_lo/parent-compose.yml create mode 100644 build_external/scenarios/gaps_lo/parent_guid create mode 100644 build_external/scenarios/gaps_lo/parent_stream.conf create mode 100644 build_external/scenarios/only-agent/docker-compose.yml create mode 100644 build_external/scenarios/parent-child/child_stream.conf create mode 100644 build_external/scenarios/parent-child/docker-compose.yml create mode 100644 build_external/scenarios/parent-child/parent_stream.conf create mode 100644 claim/Makefile.am create mode 100644 claim/README.md create mode 100644 claim/claim.c create mode 100644 claim/claim.h create mode 100755 claim/netdata-claim.sh.in create mode 100644 cli/Makefile.am create mode 100644 cli/README.md create mode 100644 cli/cli.c create mode 100644 cli/cli.h create mode 100644 collectors/COLLECTORS.md create mode 100644 collectors/Makefile.am create mode 100644 collectors/README.md create mode 100644 collectors/REFERENCE.md create mode 100644 collectors/all.h create mode 100644 collectors/apps.plugin/Makefile.am create mode 100644 collectors/apps.plugin/README.md create mode 100644 collectors/apps.plugin/apps_groups.conf create mode 100644 collectors/apps.plugin/apps_plugin.c create mode 100644 collectors/cgroups.plugin/Makefile.am create mode 100644 collectors/cgroups.plugin/README.md create mode 100755 collectors/cgroups.plugin/cgroup-name.sh create mode 100755 collectors/cgroups.plugin/cgroup-network-helper.sh create mode 100644 collectors/cgroups.plugin/cgroup-network.c create mode 100644 collectors/cgroups.plugin/sys_fs_cgroup.c create mode 100644 collectors/cgroups.plugin/sys_fs_cgroup.h create mode 100644 collectors/cgroups.plugin/tests/test_cgroups_plugin.c create mode 100644 collectors/cgroups.plugin/tests/test_cgroups_plugin.h create mode 100644 collectors/cgroups.plugin/tests/test_doubles.c create mode 100644 collectors/charts.d.plugin/Makefile.am create mode 100644 collectors/charts.d.plugin/README.md create mode 100644 collectors/charts.d.plugin/ap/Makefile.inc create mode 100644 collectors/charts.d.plugin/ap/README.md create mode 100644 collectors/charts.d.plugin/ap/ap.chart.sh create mode 100644 collectors/charts.d.plugin/ap/ap.conf create mode 100644 collectors/charts.d.plugin/apcupsd/Makefile.inc create mode 100644 collectors/charts.d.plugin/apcupsd/README.md create mode 100644 collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh create mode 100644 collectors/charts.d.plugin/apcupsd/apcupsd.conf create mode 100644 collectors/charts.d.plugin/charts.d.conf create mode 100755 collectors/charts.d.plugin/charts.d.dryrun-helper.sh create mode 100755 collectors/charts.d.plugin/charts.d.plugin.in create mode 100644 collectors/charts.d.plugin/example/Makefile.inc create mode 100644 collectors/charts.d.plugin/example/README.md create mode 100644 collectors/charts.d.plugin/example/example.chart.sh create mode 100644 collectors/charts.d.plugin/example/example.conf create mode 100644 collectors/charts.d.plugin/libreswan/Makefile.inc create mode 100644 collectors/charts.d.plugin/libreswan/README.md create mode 100644 collectors/charts.d.plugin/libreswan/libreswan.chart.sh create mode 100644 collectors/charts.d.plugin/libreswan/libreswan.conf create mode 100644 collectors/charts.d.plugin/loopsleepms.sh.inc create mode 100644 collectors/charts.d.plugin/nut/Makefile.inc create mode 100644 collectors/charts.d.plugin/nut/README.md create mode 100644 collectors/charts.d.plugin/nut/nut.chart.sh create mode 100644 collectors/charts.d.plugin/nut/nut.conf create mode 100644 collectors/charts.d.plugin/opensips/Makefile.inc create mode 100644 collectors/charts.d.plugin/opensips/README.md create mode 100644 collectors/charts.d.plugin/opensips/opensips.chart.sh create mode 100644 collectors/charts.d.plugin/opensips/opensips.conf create mode 100644 collectors/charts.d.plugin/sensors/Makefile.inc create mode 100644 collectors/charts.d.plugin/sensors/README.md create mode 100644 collectors/charts.d.plugin/sensors/sensors.chart.sh create mode 100644 collectors/charts.d.plugin/sensors/sensors.conf create mode 100644 collectors/cups.plugin/Makefile.am create mode 100644 collectors/cups.plugin/README.md create mode 100644 collectors/cups.plugin/cups_plugin.c create mode 100644 collectors/diskspace.plugin/Makefile.am create mode 100644 collectors/diskspace.plugin/README.md create mode 100644 collectors/diskspace.plugin/plugin_diskspace.c create mode 100644 collectors/ebpf.plugin/Makefile.am create mode 100644 collectors/ebpf.plugin/README.md create mode 100644 collectors/ebpf.plugin/ebpf.c create mode 100644 collectors/ebpf.plugin/ebpf.d.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/cachestat.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/dcstat.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/disk.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/ebpf_kernel_reject_list.txt create mode 100644 collectors/ebpf.plugin/ebpf.d/fd.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/filesystem.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/hardirq.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/mdflush.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/mount.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/network.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/oomkill.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/process.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/shm.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/softirq.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/swap.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/sync.conf create mode 100644 collectors/ebpf.plugin/ebpf.d/vfs.conf create mode 100644 collectors/ebpf.plugin/ebpf.h create mode 100644 collectors/ebpf.plugin/ebpf_apps.c create mode 100644 collectors/ebpf.plugin/ebpf_apps.h create mode 100644 collectors/ebpf.plugin/ebpf_cachestat.c create mode 100644 collectors/ebpf.plugin/ebpf_cachestat.h create mode 100644 collectors/ebpf.plugin/ebpf_cgroup.c create mode 100644 collectors/ebpf.plugin/ebpf_cgroup.h create mode 100644 collectors/ebpf.plugin/ebpf_dcstat.c create mode 100644 collectors/ebpf.plugin/ebpf_dcstat.h create mode 100644 collectors/ebpf.plugin/ebpf_disk.c create mode 100644 collectors/ebpf.plugin/ebpf_disk.h create mode 100644 collectors/ebpf.plugin/ebpf_fd.c create mode 100644 collectors/ebpf.plugin/ebpf_fd.h create mode 100644 collectors/ebpf.plugin/ebpf_filesystem.c create mode 100644 collectors/ebpf.plugin/ebpf_filesystem.h create mode 100644 collectors/ebpf.plugin/ebpf_hardirq.c create mode 100644 collectors/ebpf.plugin/ebpf_hardirq.h create mode 100644 collectors/ebpf.plugin/ebpf_mdflush.c create mode 100644 collectors/ebpf.plugin/ebpf_mdflush.h create mode 100644 collectors/ebpf.plugin/ebpf_mount.c create mode 100644 collectors/ebpf.plugin/ebpf_mount.h create mode 100644 collectors/ebpf.plugin/ebpf_oomkill.c create mode 100644 collectors/ebpf.plugin/ebpf_oomkill.h create mode 100644 collectors/ebpf.plugin/ebpf_process.c create mode 100644 collectors/ebpf.plugin/ebpf_process.h create mode 100644 collectors/ebpf.plugin/ebpf_shm.c create mode 100644 collectors/ebpf.plugin/ebpf_shm.h create mode 100644 collectors/ebpf.plugin/ebpf_socket.c create mode 100644 collectors/ebpf.plugin/ebpf_socket.h create mode 100644 collectors/ebpf.plugin/ebpf_softirq.c create mode 100644 collectors/ebpf.plugin/ebpf_softirq.h create mode 100644 collectors/ebpf.plugin/ebpf_swap.c create mode 100644 collectors/ebpf.plugin/ebpf_swap.h create mode 100644 collectors/ebpf.plugin/ebpf_sync.c create mode 100644 collectors/ebpf.plugin/ebpf_sync.h create mode 100644 collectors/ebpf.plugin/ebpf_vfs.c create mode 100644 collectors/ebpf.plugin/ebpf_vfs.h create mode 100644 collectors/fping.plugin/Makefile.am create mode 100644 collectors/fping.plugin/README.md create mode 100644 collectors/fping.plugin/fping.conf create mode 100755 collectors/fping.plugin/fping.plugin.in create mode 100644 collectors/freebsd.plugin/Makefile.am create mode 100644 collectors/freebsd.plugin/README.md create mode 100644 collectors/freebsd.plugin/freebsd_devstat.c create mode 100644 collectors/freebsd.plugin/freebsd_getifaddrs.c create mode 100644 collectors/freebsd.plugin/freebsd_getmntinfo.c create mode 100644 collectors/freebsd.plugin/freebsd_ipfw.c create mode 100644 collectors/freebsd.plugin/freebsd_kstat_zfs.c create mode 100644 collectors/freebsd.plugin/freebsd_sysctl.c create mode 100644 collectors/freebsd.plugin/plugin_freebsd.c create mode 100644 collectors/freebsd.plugin/plugin_freebsd.h create mode 100644 collectors/freeipmi.plugin/Makefile.am create mode 100644 collectors/freeipmi.plugin/README.md create mode 100644 collectors/freeipmi.plugin/freeipmi_plugin.c create mode 100644 collectors/idlejitter.plugin/Makefile.am create mode 100644 collectors/idlejitter.plugin/README.md create mode 100644 collectors/idlejitter.plugin/plugin_idlejitter.c create mode 100644 collectors/ioping.plugin/Makefile.am create mode 100644 collectors/ioping.plugin/README.md create mode 100644 collectors/ioping.plugin/ioping.conf create mode 100755 collectors/ioping.plugin/ioping.plugin.in create mode 100644 collectors/macos.plugin/Makefile.am create mode 100644 collectors/macos.plugin/README.md create mode 100644 collectors/macos.plugin/macos_fw.c create mode 100644 collectors/macos.plugin/macos_mach_smi.c create mode 100644 collectors/macos.plugin/macos_sysctl.c create mode 100644 collectors/macos.plugin/plugin_macos.c create mode 100644 collectors/macos.plugin/plugin_macos.h create mode 100644 collectors/nfacct.plugin/Makefile.am create mode 100644 collectors/nfacct.plugin/README.md create mode 100644 collectors/nfacct.plugin/plugin_nfacct.c create mode 100644 collectors/perf.plugin/Makefile.am create mode 100644 collectors/perf.plugin/README.md create mode 100644 collectors/perf.plugin/perf_plugin.c create mode 100644 collectors/plugins.d/Makefile.am create mode 100644 collectors/plugins.d/README.md create mode 100644 collectors/plugins.d/plugins_d.c create mode 100644 collectors/plugins.d/plugins_d.h create mode 100644 collectors/plugins.d/pluginsd_parser.c create mode 100644 collectors/plugins.d/pluginsd_parser.h create mode 100644 collectors/proc.plugin/Makefile.am create mode 100644 collectors/proc.plugin/README.md create mode 100644 collectors/proc.plugin/ipc.c create mode 100644 collectors/proc.plugin/plugin_proc.c create mode 100644 collectors/proc.plugin/plugin_proc.h create mode 100644 collectors/proc.plugin/proc_diskstats.c create mode 100644 collectors/proc.plugin/proc_interrupts.c create mode 100644 collectors/proc.plugin/proc_loadavg.c create mode 100644 collectors/proc.plugin/proc_mdstat.c create mode 100644 collectors/proc.plugin/proc_meminfo.c create mode 100644 collectors/proc.plugin/proc_net_dev.c create mode 100644 collectors/proc.plugin/proc_net_ip_vs_stats.c create mode 100644 collectors/proc.plugin/proc_net_netstat.c create mode 100644 collectors/proc.plugin/proc_net_rpc_nfs.c create mode 100644 collectors/proc.plugin/proc_net_rpc_nfsd.c create mode 100644 collectors/proc.plugin/proc_net_sctp_snmp.c create mode 100644 collectors/proc.plugin/proc_net_sockstat.c create mode 100644 collectors/proc.plugin/proc_net_sockstat6.c create mode 100644 collectors/proc.plugin/proc_net_softnet_stat.c create mode 100644 collectors/proc.plugin/proc_net_stat_conntrack.c create mode 100644 collectors/proc.plugin/proc_net_stat_synproxy.c create mode 100644 collectors/proc.plugin/proc_net_wireless.c create mode 100644 collectors/proc.plugin/proc_pagetypeinfo.c create mode 100644 collectors/proc.plugin/proc_pressure.c create mode 100644 collectors/proc.plugin/proc_pressure.h create mode 100644 collectors/proc.plugin/proc_self_mountinfo.c create mode 100644 collectors/proc.plugin/proc_self_mountinfo.h create mode 100644 collectors/proc.plugin/proc_softirqs.c create mode 100644 collectors/proc.plugin/proc_spl_kstat_zfs.c create mode 100644 collectors/proc.plugin/proc_stat.c create mode 100644 collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c create mode 100644 collectors/proc.plugin/proc_uptime.c create mode 100644 collectors/proc.plugin/proc_vmstat.c create mode 100644 collectors/proc.plugin/sys_block_zram.c create mode 100644 collectors/proc.plugin/sys_class_infiniband.c create mode 100644 collectors/proc.plugin/sys_class_power_supply.c create mode 100644 collectors/proc.plugin/sys_devices_system_edac_mc.c create mode 100644 collectors/proc.plugin/sys_devices_system_node.c create mode 100644 collectors/proc.plugin/sys_fs_btrfs.c create mode 100644 collectors/proc.plugin/sys_kernel_mm_ksm.c create mode 100644 collectors/proc.plugin/zfs_common.c create mode 100644 collectors/proc.plugin/zfs_common.h create mode 100644 collectors/python.d.plugin/Makefile.am create mode 100644 collectors/python.d.plugin/README.md create mode 100644 collectors/python.d.plugin/adaptec_raid/Makefile.inc create mode 100644 collectors/python.d.plugin/adaptec_raid/README.md create mode 100644 collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py create mode 100644 collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf create mode 100644 collectors/python.d.plugin/alarms/Makefile.inc create mode 100644 collectors/python.d.plugin/alarms/README.md create mode 100644 collectors/python.d.plugin/alarms/alarms.chart.py create mode 100644 collectors/python.d.plugin/alarms/alarms.conf create mode 100644 collectors/python.d.plugin/am2320/Makefile.inc create mode 100644 collectors/python.d.plugin/am2320/README.md create mode 100644 collectors/python.d.plugin/am2320/am2320.chart.py create mode 100644 collectors/python.d.plugin/am2320/am2320.conf create mode 100644 collectors/python.d.plugin/anomalies/Makefile.inc create mode 100644 collectors/python.d.plugin/anomalies/README.md create mode 100644 collectors/python.d.plugin/anomalies/anomalies.chart.py create mode 100644 collectors/python.d.plugin/anomalies/anomalies.conf create mode 100644 collectors/python.d.plugin/beanstalk/Makefile.inc create mode 100644 collectors/python.d.plugin/beanstalk/README.md create mode 100644 collectors/python.d.plugin/beanstalk/beanstalk.chart.py create mode 100644 collectors/python.d.plugin/beanstalk/beanstalk.conf create mode 100644 collectors/python.d.plugin/bind_rndc/Makefile.inc create mode 100644 collectors/python.d.plugin/bind_rndc/README.md create mode 100644 collectors/python.d.plugin/bind_rndc/bind_rndc.chart.py create mode 100644 collectors/python.d.plugin/bind_rndc/bind_rndc.conf create mode 100644 collectors/python.d.plugin/boinc/Makefile.inc create mode 100644 collectors/python.d.plugin/boinc/README.md create mode 100644 collectors/python.d.plugin/boinc/boinc.chart.py create mode 100644 collectors/python.d.plugin/boinc/boinc.conf create mode 100644 collectors/python.d.plugin/ceph/Makefile.inc create mode 100644 collectors/python.d.plugin/ceph/README.md create mode 100644 collectors/python.d.plugin/ceph/ceph.chart.py create mode 100644 collectors/python.d.plugin/ceph/ceph.conf create mode 100644 collectors/python.d.plugin/changefinder/Makefile.inc create mode 100644 collectors/python.d.plugin/changefinder/README.md create mode 100644 collectors/python.d.plugin/changefinder/changefinder.chart.py create mode 100644 collectors/python.d.plugin/changefinder/changefinder.conf create mode 100644 collectors/python.d.plugin/dockerd/Makefile.inc create mode 100644 collectors/python.d.plugin/dockerd/README.md create mode 100644 collectors/python.d.plugin/dockerd/dockerd.chart.py create mode 100644 collectors/python.d.plugin/dockerd/dockerd.conf create mode 100644 collectors/python.d.plugin/dovecot/Makefile.inc create mode 100644 collectors/python.d.plugin/dovecot/README.md create mode 100644 collectors/python.d.plugin/dovecot/dovecot.chart.py create mode 100644 collectors/python.d.plugin/dovecot/dovecot.conf create mode 100644 collectors/python.d.plugin/example/Makefile.inc create mode 100644 collectors/python.d.plugin/example/README.md create mode 100644 collectors/python.d.plugin/example/example.chart.py create mode 100644 collectors/python.d.plugin/example/example.conf create mode 100644 collectors/python.d.plugin/exim/Makefile.inc create mode 100644 collectors/python.d.plugin/exim/README.md create mode 100644 collectors/python.d.plugin/exim/exim.chart.py create mode 100644 collectors/python.d.plugin/exim/exim.conf create mode 100644 collectors/python.d.plugin/fail2ban/Makefile.inc create mode 100644 collectors/python.d.plugin/fail2ban/README.md create mode 100644 collectors/python.d.plugin/fail2ban/fail2ban.chart.py create mode 100644 collectors/python.d.plugin/fail2ban/fail2ban.conf create mode 100644 collectors/python.d.plugin/gearman/Makefile.inc create mode 100644 collectors/python.d.plugin/gearman/README.md create mode 100644 collectors/python.d.plugin/gearman/gearman.chart.py create mode 100644 collectors/python.d.plugin/gearman/gearman.conf create mode 100644 collectors/python.d.plugin/go_expvar/Makefile.inc create mode 100644 collectors/python.d.plugin/go_expvar/README.md create mode 100644 collectors/python.d.plugin/go_expvar/go_expvar.chart.py create mode 100644 collectors/python.d.plugin/go_expvar/go_expvar.conf create mode 100644 collectors/python.d.plugin/haproxy/Makefile.inc create mode 100644 collectors/python.d.plugin/haproxy/README.md create mode 100644 collectors/python.d.plugin/haproxy/haproxy.chart.py create mode 100644 collectors/python.d.plugin/haproxy/haproxy.conf create mode 100644 collectors/python.d.plugin/hddtemp/Makefile.inc create mode 100644 collectors/python.d.plugin/hddtemp/README.md create mode 100644 collectors/python.d.plugin/hddtemp/hddtemp.chart.py create mode 100644 collectors/python.d.plugin/hddtemp/hddtemp.conf create mode 100644 collectors/python.d.plugin/hpssa/Makefile.inc create mode 100644 collectors/python.d.plugin/hpssa/README.md create mode 100644 collectors/python.d.plugin/hpssa/hpssa.chart.py create mode 100644 collectors/python.d.plugin/hpssa/hpssa.conf create mode 100644 collectors/python.d.plugin/icecast/Makefile.inc create mode 100644 collectors/python.d.plugin/icecast/README.md create mode 100644 collectors/python.d.plugin/icecast/icecast.chart.py create mode 100644 collectors/python.d.plugin/icecast/icecast.conf create mode 100644 collectors/python.d.plugin/ipfs/Makefile.inc create mode 100644 collectors/python.d.plugin/ipfs/README.md create mode 100644 collectors/python.d.plugin/ipfs/ipfs.chart.py create mode 100644 collectors/python.d.plugin/ipfs/ipfs.conf create mode 100644 collectors/python.d.plugin/litespeed/Makefile.inc create mode 100644 collectors/python.d.plugin/litespeed/README.md create mode 100644 collectors/python.d.plugin/litespeed/litespeed.chart.py create mode 100644 collectors/python.d.plugin/litespeed/litespeed.conf create mode 100644 collectors/python.d.plugin/logind/Makefile.inc create mode 100644 collectors/python.d.plugin/logind/README.md create mode 100644 collectors/python.d.plugin/logind/logind.chart.py create mode 100644 collectors/python.d.plugin/logind/logind.conf create mode 100644 collectors/python.d.plugin/megacli/Makefile.inc create mode 100644 collectors/python.d.plugin/megacli/README.md create mode 100644 collectors/python.d.plugin/megacli/megacli.chart.py create mode 100644 collectors/python.d.plugin/megacli/megacli.conf create mode 100644 collectors/python.d.plugin/memcached/Makefile.inc create mode 100644 collectors/python.d.plugin/memcached/README.md create mode 100644 collectors/python.d.plugin/memcached/memcached.chart.py create mode 100644 collectors/python.d.plugin/memcached/memcached.conf create mode 100644 collectors/python.d.plugin/mongodb/Makefile.inc create mode 100644 collectors/python.d.plugin/mongodb/README.md create mode 100644 collectors/python.d.plugin/mongodb/mongodb.chart.py create mode 100644 collectors/python.d.plugin/mongodb/mongodb.conf create mode 100644 collectors/python.d.plugin/monit/Makefile.inc create mode 100644 collectors/python.d.plugin/monit/README.md create mode 100644 collectors/python.d.plugin/monit/monit.chart.py create mode 100644 collectors/python.d.plugin/monit/monit.conf create mode 100644 collectors/python.d.plugin/nsd/Makefile.inc create mode 100644 collectors/python.d.plugin/nsd/README.md create mode 100644 collectors/python.d.plugin/nsd/nsd.chart.py create mode 100644 collectors/python.d.plugin/nsd/nsd.conf create mode 100644 collectors/python.d.plugin/ntpd/Makefile.inc create mode 100644 collectors/python.d.plugin/ntpd/README.md create mode 100644 collectors/python.d.plugin/ntpd/ntpd.chart.py create mode 100644 collectors/python.d.plugin/ntpd/ntpd.conf create mode 100644 collectors/python.d.plugin/nvidia_smi/Makefile.inc create mode 100644 collectors/python.d.plugin/nvidia_smi/README.md create mode 100644 collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py create mode 100644 collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf create mode 100644 collectors/python.d.plugin/openldap/Makefile.inc create mode 100644 collectors/python.d.plugin/openldap/README.md create mode 100644 collectors/python.d.plugin/openldap/openldap.chart.py create mode 100644 collectors/python.d.plugin/openldap/openldap.conf create mode 100644 collectors/python.d.plugin/oracledb/Makefile.inc create mode 100644 collectors/python.d.plugin/oracledb/README.md create mode 100644 collectors/python.d.plugin/oracledb/oracledb.chart.py create mode 100644 collectors/python.d.plugin/oracledb/oracledb.conf create mode 100644 collectors/python.d.plugin/pandas/Makefile.inc create mode 100644 collectors/python.d.plugin/pandas/README.md create mode 100644 collectors/python.d.plugin/pandas/pandas.chart.py create mode 100644 collectors/python.d.plugin/pandas/pandas.conf create mode 100644 collectors/python.d.plugin/postfix/Makefile.inc create mode 100644 collectors/python.d.plugin/postfix/README.md create mode 100644 collectors/python.d.plugin/postfix/postfix.chart.py create mode 100644 collectors/python.d.plugin/postfix/postfix.conf create mode 100644 collectors/python.d.plugin/proxysql/Makefile.inc create mode 100644 collectors/python.d.plugin/proxysql/README.md create mode 100644 collectors/python.d.plugin/proxysql/proxysql.chart.py create mode 100644 collectors/python.d.plugin/proxysql/proxysql.conf create mode 100644 collectors/python.d.plugin/puppet/Makefile.inc create mode 100644 collectors/python.d.plugin/puppet/README.md create mode 100644 collectors/python.d.plugin/puppet/puppet.chart.py create mode 100644 collectors/python.d.plugin/puppet/puppet.conf create mode 100644 collectors/python.d.plugin/python.d.conf create mode 100644 collectors/python.d.plugin/python.d.plugin.in create mode 100644 collectors/python.d.plugin/python_modules/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py create mode 100644 collectors/python.d.plugin/python_modules/bases/FrameworkServices/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/bases/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/bases/charts.py create mode 100644 collectors/python.d.plugin/python_modules/bases/collection.py create mode 100644 collectors/python.d.plugin/python_modules/bases/loaders.py create mode 100644 collectors/python.d.plugin/python_modules/bases/loggers.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/composer.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/constructor.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/dumper.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/emitter.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/error.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/events.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/loader.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/nodes.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/parser.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/reader.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/representer.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/resolver.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/scanner.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/serializer.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml2/tokens.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/composer.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/constructor.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/dumper.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/emitter.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/error.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/events.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/loader.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/nodes.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/parser.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/reader.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/representer.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/resolver.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/scanner.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/serializer.py create mode 100644 collectors/python.d.plugin/python_modules/pyyaml3/tokens.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/boinc_client.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/filelock.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/lm_sensors.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/mcrcon.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/monotonic.py create mode 100644 collectors/python.d.plugin/python_modules/third_party/ordereddict.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/_collections.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/connection.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/connectionpool.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/exceptions.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/fields.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/filepost.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/backports/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/six.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/poolmanager.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/request.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/response.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/__init__.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/connection.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/request.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/response.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/retry.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/selectors.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/timeout.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/url.py create mode 100644 collectors/python.d.plugin/python_modules/urllib3/util/wait.py create mode 100644 collectors/python.d.plugin/rabbitmq/Makefile.inc create mode 100644 collectors/python.d.plugin/rabbitmq/README.md create mode 100644 collectors/python.d.plugin/rabbitmq/rabbitmq.chart.py create mode 100644 collectors/python.d.plugin/rabbitmq/rabbitmq.conf create mode 100644 collectors/python.d.plugin/rethinkdbs/Makefile.inc create mode 100644 collectors/python.d.plugin/rethinkdbs/README.md create mode 100644 collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py create mode 100644 collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf create mode 100644 collectors/python.d.plugin/retroshare/Makefile.inc create mode 100644 collectors/python.d.plugin/retroshare/README.md create mode 100644 collectors/python.d.plugin/retroshare/retroshare.chart.py create mode 100644 collectors/python.d.plugin/retroshare/retroshare.conf create mode 100644 collectors/python.d.plugin/riakkv/Makefile.inc create mode 100644 collectors/python.d.plugin/riakkv/README.md create mode 100644 collectors/python.d.plugin/riakkv/riakkv.chart.py create mode 100644 collectors/python.d.plugin/riakkv/riakkv.conf create mode 100644 collectors/python.d.plugin/samba/Makefile.inc create mode 100644 collectors/python.d.plugin/samba/README.md create mode 100644 collectors/python.d.plugin/samba/samba.chart.py create mode 100644 collectors/python.d.plugin/samba/samba.conf create mode 100644 collectors/python.d.plugin/sensors/Makefile.inc create mode 100644 collectors/python.d.plugin/sensors/README.md create mode 100644 collectors/python.d.plugin/sensors/sensors.chart.py create mode 100644 collectors/python.d.plugin/sensors/sensors.conf create mode 100644 collectors/python.d.plugin/smartd_log/Makefile.inc create mode 100644 collectors/python.d.plugin/smartd_log/README.md create mode 100644 collectors/python.d.plugin/smartd_log/smartd_log.chart.py create mode 100644 collectors/python.d.plugin/smartd_log/smartd_log.conf create mode 100644 collectors/python.d.plugin/spigotmc/Makefile.inc create mode 100644 collectors/python.d.plugin/spigotmc/README.md create mode 100644 collectors/python.d.plugin/spigotmc/spigotmc.chart.py create mode 100644 collectors/python.d.plugin/spigotmc/spigotmc.conf create mode 100644 collectors/python.d.plugin/springboot/Makefile.inc create mode 100644 collectors/python.d.plugin/springboot/README.md create mode 100644 collectors/python.d.plugin/springboot/springboot.chart.py create mode 100644 collectors/python.d.plugin/springboot/springboot.conf create mode 100644 collectors/python.d.plugin/squid/Makefile.inc create mode 100644 collectors/python.d.plugin/squid/README.md create mode 100644 collectors/python.d.plugin/squid/squid.chart.py create mode 100644 collectors/python.d.plugin/squid/squid.conf create mode 100644 collectors/python.d.plugin/tomcat/Makefile.inc create mode 100644 collectors/python.d.plugin/tomcat/README.md create mode 100644 collectors/python.d.plugin/tomcat/tomcat.chart.py create mode 100644 collectors/python.d.plugin/tomcat/tomcat.conf create mode 100644 collectors/python.d.plugin/tor/Makefile.inc create mode 100644 collectors/python.d.plugin/tor/README.md create mode 100644 collectors/python.d.plugin/tor/tor.chart.py create mode 100644 collectors/python.d.plugin/tor/tor.conf create mode 100644 collectors/python.d.plugin/traefik/Makefile.inc create mode 100644 collectors/python.d.plugin/traefik/README.md create mode 100644 collectors/python.d.plugin/traefik/traefik.chart.py create mode 100644 collectors/python.d.plugin/traefik/traefik.conf create mode 100644 collectors/python.d.plugin/uwsgi/Makefile.inc create mode 100644 collectors/python.d.plugin/uwsgi/README.md create mode 100644 collectors/python.d.plugin/uwsgi/uwsgi.chart.py create mode 100644 collectors/python.d.plugin/uwsgi/uwsgi.conf create mode 100644 collectors/python.d.plugin/varnish/Makefile.inc create mode 100644 collectors/python.d.plugin/varnish/README.md create mode 100644 collectors/python.d.plugin/varnish/varnish.chart.py create mode 100644 collectors/python.d.plugin/varnish/varnish.conf create mode 100644 collectors/python.d.plugin/w1sensor/Makefile.inc create mode 100644 collectors/python.d.plugin/w1sensor/README.md create mode 100644 collectors/python.d.plugin/w1sensor/w1sensor.chart.py create mode 100644 collectors/python.d.plugin/w1sensor/w1sensor.conf create mode 100644 collectors/python.d.plugin/zscores/Makefile.inc create mode 100644 collectors/python.d.plugin/zscores/README.md create mode 100644 collectors/python.d.plugin/zscores/zscores.chart.py create mode 100644 collectors/python.d.plugin/zscores/zscores.conf create mode 100644 collectors/slabinfo.plugin/Makefile.am create mode 100644 collectors/slabinfo.plugin/README.md create mode 100644 collectors/slabinfo.plugin/slabinfo.c create mode 100644 collectors/statsd.plugin/Makefile.am create mode 100644 collectors/statsd.plugin/README.md create mode 100644 collectors/statsd.plugin/asterisk.conf create mode 100644 collectors/statsd.plugin/asterisk.md create mode 100644 collectors/statsd.plugin/example.conf create mode 100644 collectors/statsd.plugin/k6.conf create mode 100644 collectors/statsd.plugin/k6.md create mode 100644 collectors/statsd.plugin/statsd.c create mode 100644 collectors/tc.plugin/Makefile.am create mode 100644 collectors/tc.plugin/README.md create mode 100644 collectors/tc.plugin/plugin_tc.c create mode 100755 collectors/tc.plugin/tc-qos-helper.sh.in create mode 100644 collectors/timex.plugin/Makefile.am create mode 100644 collectors/timex.plugin/README.md create mode 100644 collectors/timex.plugin/plugin_timex.c create mode 100644 collectors/xenstat.plugin/Makefile.am create mode 100644 collectors/xenstat.plugin/README.md create mode 100644 collectors/xenstat.plugin/xenstat_plugin.c create mode 100644 config.cmake.h.in create mode 100644 configs.signatures create mode 100644 configure.ac create mode 100644 contrib/README.md create mode 100644 contrib/debian/changelog create mode 100644 contrib/debian/compat create mode 100644 contrib/debian/control create mode 100644 contrib/debian/control.xenial create mode 100644 contrib/debian/copyright create mode 100755 contrib/debian/install_go.sh create mode 100644 contrib/debian/netdata.default create mode 100644 contrib/debian/netdata.docs create mode 100755 contrib/debian/netdata.init create mode 100644 contrib/debian/netdata.install create mode 100644 contrib/debian/netdata.lintian-overrides create mode 100644 contrib/debian/netdata.postinst create mode 100644 contrib/debian/netdata.postrm create mode 100644 contrib/debian/netdata.preinst create mode 100755 contrib/debian/rules create mode 100644 contrib/debian/source/format create mode 100755 contrib/rhel/build-netdata-rpm.sh create mode 100755 coverity-scan.sh create mode 100755 cppcheck.sh create mode 100644 daemon/Makefile.am create mode 100644 daemon/README.md create mode 100644 daemon/analytics.c create mode 100644 daemon/analytics.h create mode 100755 daemon/anonymous-statistics.sh.in create mode 100644 daemon/buildinfo.c create mode 100644 daemon/buildinfo.h create mode 100644 daemon/commands.c create mode 100644 daemon/commands.h create mode 100644 daemon/common.c create mode 100644 daemon/common.h create mode 100644 daemon/config/README.md create mode 100644 daemon/daemon.c create mode 100644 daemon/daemon.h create mode 100644 daemon/get-kubernetes-labels.sh.in create mode 100644 daemon/global_statistics.c create mode 100644 daemon/global_statistics.h create mode 100644 daemon/main.c create mode 100644 daemon/main.h create mode 100644 daemon/service.c create mode 100644 daemon/signals.c create mode 100644 daemon/signals.h create mode 100644 daemon/static_threads.c create mode 100644 daemon/static_threads.h create mode 100644 daemon/static_threads_freebsd.c create mode 100644 daemon/static_threads_linux.c create mode 100644 daemon/static_threads_macos.c create mode 100755 daemon/system-info.sh create mode 100644 daemon/unit_test.c create mode 100644 daemon/unit_test.h create mode 100644 database/KolmogorovSmirnovDist.c create mode 100644 database/KolmogorovSmirnovDist.h create mode 100644 database/Makefile.am create mode 100644 database/README.md create mode 100644 database/engine/Makefile.am create mode 100644 database/engine/README.md create mode 100644 database/engine/datafile.c create mode 100644 database/engine/datafile.h create mode 100644 database/engine/journalfile.c create mode 100644 database/engine/journalfile.h create mode 100644 database/engine/metadata_log/README.md create mode 100644 database/engine/pagecache.c create mode 100644 database/engine/pagecache.h create mode 100644 database/engine/rrddiskprotocol.h create mode 100644 database/engine/rrdengine.c create mode 100644 database/engine/rrdengine.h create mode 100755 database/engine/rrdengineapi.c create mode 100644 database/engine/rrdengineapi.h create mode 100644 database/engine/rrdenginelib.c create mode 100644 database/engine/rrdenginelib.h create mode 100644 database/engine/rrdenglocking.c create mode 100644 database/engine/rrdenglocking.h create mode 100644 database/ram/Makefile.am create mode 100644 database/ram/README.md create mode 100644 database/ram/rrddim_mem.c create mode 100644 database/ram/rrddim_mem.h create mode 100644 database/rrd.c create mode 100644 database/rrd.h create mode 100644 database/rrdcalc.c create mode 100644 database/rrdcalc.h create mode 100644 database/rrdcalctemplate.c create mode 100644 database/rrdcalctemplate.h create mode 100644 database/rrdcontext.c create mode 100644 database/rrdcontext.h create mode 100644 database/rrddim.c create mode 100644 database/rrddimvar.c create mode 100644 database/rrddimvar.h create mode 100644 database/rrdfamily.c create mode 100644 database/rrdfunctions.c create mode 100644 database/rrdfunctions.h create mode 100644 database/rrdhost.c create mode 100644 database/rrdlabels.c create mode 100644 database/rrdset.c create mode 100644 database/rrdsetvar.c create mode 100644 database/rrdsetvar.h create mode 100644 database/rrdvar.c create mode 100644 database/rrdvar.h create mode 100644 database/sqlite/Makefile.am create mode 100644 database/sqlite/sqlite3.c create mode 100644 database/sqlite/sqlite3.h create mode 100644 database/sqlite/sqlite_aclk.c create mode 100644 database/sqlite/sqlite_aclk.h create mode 100644 database/sqlite/sqlite_aclk_alert.c create mode 100644 database/sqlite/sqlite_aclk_alert.h create mode 100644 database/sqlite/sqlite_aclk_node.c create mode 100644 database/sqlite/sqlite_aclk_node.h create mode 100644 database/sqlite/sqlite_context.c create mode 100644 database/sqlite/sqlite_context.h create mode 100644 database/sqlite/sqlite_db_migration.c create mode 100644 database/sqlite/sqlite_db_migration.h create mode 100644 database/sqlite/sqlite_functions.c create mode 100644 database/sqlite/sqlite_functions.h create mode 100644 database/sqlite/sqlite_health.c create mode 100644 database/sqlite/sqlite_health.h create mode 100644 database/sqlite/sqlite_metadata.c create mode 100644 database/sqlite/sqlite_metadata.h create mode 100644 database/storage_engine.c create mode 100644 database/storage_engine.h create mode 100644 diagrams/Makefile.am create mode 100755 diagrams/build.sh create mode 100644 diagrams/config.puml create mode 100644 diagrams/data_structures/README.md create mode 100644 diagrams/data_structures/netdata_config.svg create mode 100644 diagrams/data_structures/registry.svg create mode 100644 diagrams/data_structures/rrd.svg create mode 100644 diagrams/data_structures/src/netdata_config.xml create mode 100644 diagrams/data_structures/src/registry.xml create mode 100644 diagrams/data_structures/src/rrd.xml create mode 100644 diagrams/data_structures/src/web.xml create mode 100644 diagrams/data_structures/web.svg create mode 100644 diagrams/netdata-for-ephemeral-nodes.xml create mode 100644 diagrams/netdata-overview.xml create mode 100644 diagrams/netdata-proxies-example.xml create mode 100644 diagrams/registry.puml create mode 100644 docs/.templates/.page-level/_collector-page-template.mdx create mode 100644 docs/.templates/.page-level/_concept-page-template.md create mode 100644 docs/.templates/.page-level/_task-page-template.md create mode 100644 docs/.templates/.page-level/_tutorial-page-template.mdx create mode 100644 docs/Add-more-charts-to-netdata.md create mode 100644 docs/Demo-Sites.md create mode 100644 docs/Donations-netdata-has-received.md create mode 100644 docs/README.md create mode 100644 docs/Running-behind-apache.md create mode 100644 docs/Running-behind-caddy.md create mode 100644 docs/Running-behind-h2o.md create mode 100644 docs/Running-behind-haproxy.md create mode 100644 docs/Running-behind-lighttpd.md create mode 100644 docs/Running-behind-nginx.md create mode 100644 docs/a-github-star-is-important.md create mode 100644 docs/agent-cloud.md create mode 100644 docs/anonymous-statistics.md create mode 100644 docs/collect/application-metrics.md create mode 100644 docs/collect/container-metrics.md create mode 100644 docs/collect/enable-configure.md create mode 100644 docs/collect/how-collectors-work.md create mode 100644 docs/collect/system-metrics.md create mode 100644 docs/configure/common-changes.md create mode 100644 docs/configure/nodes.md create mode 100644 docs/configure/secure-nodes.md create mode 100644 docs/configure/start-stop-restart.md create mode 100644 docs/contributing/contributing-documentation.md create mode 100644 docs/contributing/style-guide.md create mode 100644 docs/dashboard/customize.mdx create mode 100644 docs/dashboard/dimensions-contexts-families.mdx create mode 100644 docs/dashboard/how-dashboard-works.mdx create mode 100644 docs/dashboard/import-export-print-snapshot.mdx create mode 100644 docs/dashboard/interact-charts.mdx create mode 100644 docs/dashboard/reference-web-server.mdx create mode 100644 docs/dashboard/visualization-date-and-time-controls.mdx create mode 100644 docs/export/enable-connector.md create mode 100644 docs/export/external-databases.md create mode 100644 docs/get-started.mdx create mode 100644 docs/guides/collect-apache-nginx-web-logs.md create mode 100644 docs/guides/collect-unbound-metrics.md create mode 100644 docs/guides/configure/performance.md create mode 100644 docs/guides/deploy/ansible.md create mode 100644 docs/guides/export/export-netdata-metrics-graphite.md create mode 100644 docs/guides/longer-metrics-storage.md create mode 100644 docs/guides/monitor-cockroachdb.md create mode 100644 docs/guides/monitor-hadoop-cluster.md create mode 100644 docs/guides/monitor/anomaly-detection-python.md create mode 100644 docs/guides/monitor/anomaly-detection.md create mode 100644 docs/guides/monitor/dimension-templates.md create mode 100644 docs/guides/monitor/kubernetes-k8s-netdata.md create mode 100644 docs/guides/monitor/lamp-stack.md create mode 100644 docs/guides/monitor/pi-hole-raspberry-pi.md create mode 100644 docs/guides/monitor/process.md create mode 100644 docs/guides/monitor/raspberry-pi-anomaly-detection.md create mode 100644 docs/guides/monitor/statsd.md create mode 100644 docs/guides/monitor/stop-notifications-alarms.md create mode 100644 docs/guides/monitor/visualize-monitor-anomalies.md create mode 100644 docs/guides/python-collector.md create mode 100644 docs/guides/step-by-step/step-00.md create mode 100644 docs/guides/step-by-step/step-01.md create mode 100644 docs/guides/step-by-step/step-02.md create mode 100644 docs/guides/step-by-step/step-03.md create mode 100644 docs/guides/step-by-step/step-04.md create mode 100644 docs/guides/step-by-step/step-05.md create mode 100644 docs/guides/step-by-step/step-06.md create mode 100644 docs/guides/step-by-step/step-07.md create mode 100644 docs/guides/step-by-step/step-08.md create mode 100644 docs/guides/step-by-step/step-09.md create mode 100644 docs/guides/step-by-step/step-10.md create mode 100644 docs/guides/step-by-step/step-99.md create mode 100644 docs/guides/troubleshoot/monitor-debug-applications-ebpf.md create mode 100644 docs/guides/troubleshoot/troubleshooting-agent-with-cloud-connection.md create mode 100644 docs/guides/using-host-labels.md create mode 100644 docs/metrics-storage-management/enable-streaming.mdx create mode 100644 docs/metrics-storage-management/how-streaming-works.mdx create mode 100644 docs/metrics-storage-management/reference-streaming.mdx create mode 100644 docs/monitor/configure-alarms.md create mode 100644 docs/monitor/enable-notifications.md create mode 100644 docs/monitor/view-active-alarms.md create mode 100644 docs/netdata-for-IoT.md create mode 100644 docs/netdata-security.md create mode 100644 docs/overview/netdata-monitoring-stack.md create mode 100644 docs/overview/what-is-netdata.md create mode 100644 docs/overview/why-netdata.md create mode 100644 docs/quickstart/infrastructure.md create mode 100644 docs/quickstart/single-node.md create mode 100644 docs/store/change-metrics-storage.md create mode 100644 docs/store/distributed-data-architecture.md create mode 100644 docs/visualize/create-dashboards.md create mode 100644 docs/visualize/interact-dashboards-charts.md create mode 100644 docs/visualize/overview-infrastructure.md create mode 100644 docs/why-netdata/1s-granularity.md create mode 100644 docs/why-netdata/README.md create mode 100644 docs/why-netdata/immediate-results.md create mode 100644 docs/why-netdata/meaningful-presentation.md create mode 100644 docs/why-netdata/unlimited-metrics.md create mode 100644 exporting/Makefile.am create mode 100644 exporting/README.md create mode 100644 exporting/TIMESCALE.md create mode 100644 exporting/WALKTHROUGH.md create mode 100644 exporting/aws_kinesis/Makefile.am create mode 100644 exporting/aws_kinesis/README.md create mode 100644 exporting/aws_kinesis/aws_kinesis.c create mode 100644 exporting/aws_kinesis/aws_kinesis.h create mode 100644 exporting/aws_kinesis/aws_kinesis_put_record.cc create mode 100644 exporting/aws_kinesis/aws_kinesis_put_record.h create mode 100644 exporting/check_filters.c create mode 100644 exporting/clean_connectors.c create mode 100644 exporting/exporting.conf create mode 100644 exporting/exporting_engine.c create mode 100644 exporting/exporting_engine.h create mode 100644 exporting/graphite/Makefile.am create mode 100644 exporting/graphite/README.md create mode 100644 exporting/graphite/graphite.c create mode 100644 exporting/graphite/graphite.h create mode 100644 exporting/init_connectors.c create mode 100644 exporting/json/Makefile.am create mode 100644 exporting/json/README.md create mode 100644 exporting/json/json.c create mode 100644 exporting/json/json.h create mode 100644 exporting/mongodb/Makefile.am create mode 100644 exporting/mongodb/README.md create mode 100644 exporting/mongodb/mongodb.c create mode 100644 exporting/mongodb/mongodb.h create mode 100755 exporting/nc-exporting.sh create mode 100644 exporting/opentsdb/Makefile.am create mode 100644 exporting/opentsdb/README.md create mode 100644 exporting/opentsdb/opentsdb.c create mode 100644 exporting/opentsdb/opentsdb.h create mode 100644 exporting/process_data.c create mode 100644 exporting/prometheus/Makefile.am create mode 100644 exporting/prometheus/README.md create mode 100644 exporting/prometheus/prometheus.c create mode 100644 exporting/prometheus/prometheus.h create mode 100644 exporting/prometheus/remote_write/Makefile.am create mode 100644 exporting/prometheus/remote_write/README.md create mode 100644 exporting/prometheus/remote_write/remote_write.c create mode 100644 exporting/prometheus/remote_write/remote_write.h create mode 100644 exporting/prometheus/remote_write/remote_write.proto create mode 100644 exporting/prometheus/remote_write/remote_write_request.cc create mode 100644 exporting/prometheus/remote_write/remote_write_request.h create mode 100644 exporting/pubsub/Makefile.am create mode 100644 exporting/pubsub/README.md create mode 100644 exporting/pubsub/pubsub.c create mode 100644 exporting/pubsub/pubsub.h create mode 100644 exporting/pubsub/pubsub_publish.cc create mode 100644 exporting/pubsub/pubsub_publish.h create mode 100644 exporting/read_config.c create mode 100644 exporting/send_data.c create mode 100644 exporting/send_internal_metrics.c create mode 100644 exporting/tests/Makefile.am create mode 100644 exporting/tests/exporting_doubles.c create mode 100644 exporting/tests/exporting_fixtures.c create mode 100644 exporting/tests/netdata_doubles.c create mode 100644 exporting/tests/system_doubles.c create mode 100644 exporting/tests/test_exporting_engine.c create mode 100644 exporting/tests/test_exporting_engine.h create mode 100644 health/Makefile.am create mode 100644 health/QUICKSTART.md create mode 100644 health/README.md create mode 100644 health/REFERENCE.md create mode 100644 health/health.c create mode 100644 health/health.d/adaptec_raid.conf create mode 100644 health/health.d/anomalies.conf create mode 100644 health/health.d/apcupsd.conf create mode 100644 health/health.d/bcache.conf create mode 100644 health/health.d/beanstalkd.conf create mode 100644 health/health.d/bind_rndc.conf create mode 100644 health/health.d/boinc.conf create mode 100644 health/health.d/btrfs.conf create mode 100644 health/health.d/ceph.conf create mode 100644 health/health.d/cgroups.conf create mode 100644 health/health.d/cockroachdb.conf create mode 100644 health/health.d/cpu.conf create mode 100644 health/health.d/dbengine.conf create mode 100644 health/health.d/disks.conf create mode 100644 health/health.d/dns_query.conf create mode 100644 health/health.d/dnsmasq_dhcp.conf create mode 100644 health/health.d/dockerd.conf create mode 100644 health/health.d/entropy.conf create mode 100644 health/health.d/exporting.conf create mode 100644 health/health.d/fping.conf create mode 100644 health/health.d/gearman.conf create mode 100644 health/health.d/geth.conf create mode 100644 health/health.d/go.d.plugin.conf create mode 100644 health/health.d/haproxy.conf create mode 100644 health/health.d/hdfs.conf create mode 100644 health/health.d/httpcheck.conf create mode 100644 health/health.d/ioping.conf create mode 100644 health/health.d/ipc.conf create mode 100644 health/health.d/ipfs.conf create mode 100644 health/health.d/ipmi.conf create mode 100644 health/health.d/isc_dhcpd.conf create mode 100644 health/health.d/kubelet.conf create mode 100644 health/health.d/linux_power_supply.conf create mode 100644 health/health.d/load.conf create mode 100644 health/health.d/mdstat.conf create mode 100644 health/health.d/megacli.conf create mode 100644 health/health.d/memcached.conf create mode 100644 health/health.d/memory.conf create mode 100644 health/health.d/ml.conf create mode 100644 health/health.d/mysql.conf create mode 100644 health/health.d/net.conf create mode 100644 health/health.d/netfilter.conf create mode 100644 health/health.d/nut.conf create mode 100644 health/health.d/nvme.conf create mode 100644 health/health.d/pihole.conf create mode 100644 health/health.d/ping.conf create mode 100644 health/health.d/portcheck.conf create mode 100644 health/health.d/postgres.conf create mode 100644 health/health.d/processes.conf create mode 100644 health/health.d/python.d.plugin.conf create mode 100644 health/health.d/qos.conf create mode 100644 health/health.d/ram.conf create mode 100644 health/health.d/redis.conf create mode 100644 health/health.d/retroshare.conf create mode 100644 health/health.d/riakkv.conf create mode 100644 health/health.d/scaleio.conf create mode 100644 health/health.d/softnet.conf create mode 100644 health/health.d/swap.conf create mode 100644 health/health.d/synchronization.conf create mode 100644 health/health.d/systemdunits.conf create mode 100644 health/health.d/tcp_conn.conf create mode 100644 health/health.d/tcp_listen.conf create mode 100644 health/health.d/tcp_mem.conf create mode 100644 health/health.d/tcp_orphans.conf create mode 100644 health/health.d/tcp_resets.conf create mode 100644 health/health.d/timex.conf create mode 100644 health/health.d/udp_errors.conf create mode 100644 health/health.d/unbound.conf create mode 100644 health/health.d/vcsa.conf create mode 100644 health/health.d/vernemq.conf create mode 100644 health/health.d/vsphere.conf create mode 100644 health/health.d/web_log.conf create mode 100644 health/health.d/whoisquery.conf create mode 100644 health/health.d/wmi.conf create mode 100644 health/health.d/x509check.conf create mode 100644 health/health.d/zfs.conf create mode 100644 health/health.h create mode 100644 health/health_config.c create mode 100644 health/health_json.c create mode 100644 health/health_log.c create mode 100644 health/notifications/Makefile.am create mode 100644 health/notifications/README.md create mode 100755 health/notifications/alarm-email.sh create mode 100755 health/notifications/alarm-notify.sh.in create mode 100755 health/notifications/alarm-test.sh create mode 100644 health/notifications/alerta/Makefile.inc create mode 100644 health/notifications/alerta/README.md create mode 100644 health/notifications/awssns/Makefile.inc create mode 100644 health/notifications/awssns/README.md create mode 100644 health/notifications/custom/Makefile.inc create mode 100644 health/notifications/custom/README.md create mode 100644 health/notifications/discord/Makefile.inc create mode 100644 health/notifications/discord/README.md create mode 100644 health/notifications/dynatrace/Makefile.inc create mode 100644 health/notifications/dynatrace/README.md create mode 100644 health/notifications/email/Makefile.inc create mode 100644 health/notifications/email/README.md create mode 100644 health/notifications/flock/Makefile.inc create mode 100644 health/notifications/flock/README.md create mode 100644 health/notifications/gotify/Makefile.inc create mode 100644 health/notifications/gotify/README.md create mode 100644 health/notifications/hangouts/Makefile.inc create mode 100644 health/notifications/hangouts/README.md create mode 100755 health/notifications/health_alarm_notify.conf create mode 100644 health/notifications/health_email_recipients.conf create mode 100644 health/notifications/irc/Makefile.inc create mode 100644 health/notifications/irc/README.md create mode 100644 health/notifications/kavenegar/Makefile.inc create mode 100644 health/notifications/kavenegar/README.md create mode 100644 health/notifications/matrix/Makefile.inc create mode 100644 health/notifications/matrix/README.md create mode 100644 health/notifications/messagebird/Makefile.inc create mode 100644 health/notifications/messagebird/README.md create mode 100644 health/notifications/msteams/Makefile.inc create mode 100644 health/notifications/msteams/README.md create mode 100644 health/notifications/opsgenie/Makefile.inc create mode 100644 health/notifications/opsgenie/README.md create mode 100644 health/notifications/pagerduty/Makefile.inc create mode 100644 health/notifications/pagerduty/README.md create mode 100644 health/notifications/prowl/Makefile.inc create mode 100644 health/notifications/prowl/README.md create mode 100644 health/notifications/pushbullet/Makefile.inc create mode 100644 health/notifications/pushbullet/README.md create mode 100644 health/notifications/pushover/Makefile.inc create mode 100644 health/notifications/pushover/README.md create mode 100644 health/notifications/rocketchat/Makefile.inc create mode 100644 health/notifications/rocketchat/README.md create mode 100644 health/notifications/slack/Makefile.inc create mode 100644 health/notifications/slack/README.md create mode 100644 health/notifications/smstools3/Makefile.inc create mode 100644 health/notifications/smstools3/README.md create mode 100644 health/notifications/stackpulse/Makefile.inc create mode 100644 health/notifications/stackpulse/README.md create mode 100644 health/notifications/syslog/Makefile.inc create mode 100644 health/notifications/syslog/README.md create mode 100644 health/notifications/telegram/Makefile.inc create mode 100644 health/notifications/telegram/README.md create mode 100644 health/notifications/twilio/Makefile.inc create mode 100644 health/notifications/twilio/README.md create mode 100644 health/notifications/web/Makefile.inc create mode 100644 health/notifications/web/README.md create mode 100644 libnetdata/Makefile.am create mode 100644 libnetdata/README.md create mode 100644 libnetdata/adaptive_resortable_list/Makefile.am create mode 100644 libnetdata/adaptive_resortable_list/README.md create mode 100644 libnetdata/adaptive_resortable_list/adaptive_resortable_list.c create mode 100644 libnetdata/adaptive_resortable_list/adaptive_resortable_list.h create mode 100644 libnetdata/arrayalloc/Makefile.am create mode 100644 libnetdata/arrayalloc/README.md create mode 100644 libnetdata/arrayalloc/arrayalloc.c create mode 100644 libnetdata/arrayalloc/arrayalloc.h create mode 100644 libnetdata/avl/Makefile.am create mode 100644 libnetdata/avl/README.md create mode 100644 libnetdata/avl/avl.c create mode 100644 libnetdata/avl/avl.h create mode 100644 libnetdata/buffer/Makefile.am create mode 100644 libnetdata/buffer/README.md create mode 100644 libnetdata/buffer/buffer.c create mode 100644 libnetdata/buffer/buffer.h create mode 100644 libnetdata/circular_buffer/Makefile.am create mode 100644 libnetdata/circular_buffer/README.md create mode 100644 libnetdata/circular_buffer/circular_buffer.c create mode 100644 libnetdata/circular_buffer/circular_buffer.h create mode 100644 libnetdata/clocks/Makefile.am create mode 100644 libnetdata/clocks/README.md create mode 100644 libnetdata/clocks/clocks.c create mode 100644 libnetdata/clocks/clocks.h create mode 100644 libnetdata/completion/Makefile.am create mode 100644 libnetdata/completion/completion.c create mode 100644 libnetdata/completion/completion.h create mode 100644 libnetdata/config/Makefile.am create mode 100644 libnetdata/config/README.md create mode 100644 libnetdata/config/appconfig.c create mode 100644 libnetdata/config/appconfig.h create mode 100644 libnetdata/dictionary/Makefile.am create mode 100644 libnetdata/dictionary/README.md create mode 100644 libnetdata/dictionary/dictionary.c create mode 100644 libnetdata/dictionary/dictionary.h create mode 100644 libnetdata/ebpf/Makefile.am create mode 100644 libnetdata/ebpf/README.md create mode 100644 libnetdata/ebpf/ebpf.c create mode 100644 libnetdata/ebpf/ebpf.h create mode 100644 libnetdata/eval/Makefile.am create mode 100644 libnetdata/eval/README.md create mode 100644 libnetdata/eval/eval.c create mode 100644 libnetdata/eval/eval.h create mode 100644 libnetdata/health/Makefile.am create mode 100644 libnetdata/health/health.c create mode 100644 libnetdata/health/health.h create mode 100644 libnetdata/inlined.h create mode 100644 libnetdata/json/Makefile.am create mode 100644 libnetdata/json/README.md create mode 100644 libnetdata/json/jsmn.c create mode 100644 libnetdata/json/jsmn.h create mode 100644 libnetdata/json/json.c create mode 100644 libnetdata/json/json.h create mode 100644 libnetdata/libjudy/src/Judy.h create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyMalloc.c create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyPrivate.h create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h create mode 100644 libnetdata/libjudy/src/JudyHS/JudyHS.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyL.h create mode 100644 libnetdata/libjudy/src/JudyL/JudyLByCount.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLCascade.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLCount.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLDecascade.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLDel.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLFirst.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLFreeArray.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLGet.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLIns.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLInsArray.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLMallocIF.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLMemActive.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLMemUsed.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLNext.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLPrev.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLTablesGen.c create mode 100644 libnetdata/libjudy/src/JudyL/j__udyLGet.c create mode 100644 libnetdata/libnetdata.c create mode 100644 libnetdata/libnetdata.h create mode 100644 libnetdata/locks/Makefile.am create mode 100644 libnetdata/locks/README.md create mode 100644 libnetdata/locks/locks.c create mode 100644 libnetdata/locks/locks.h create mode 100644 libnetdata/log/Makefile.am create mode 100644 libnetdata/log/README.md create mode 100644 libnetdata/log/log.c create mode 100644 libnetdata/log/log.h create mode 100644 libnetdata/onewayalloc/Makefile.am create mode 100644 libnetdata/onewayalloc/README.md create mode 100644 libnetdata/onewayalloc/onewayalloc.c create mode 100644 libnetdata/onewayalloc/onewayalloc.h create mode 100644 libnetdata/os.c create mode 100644 libnetdata/os.h create mode 100644 libnetdata/popen/Makefile.am create mode 100644 libnetdata/popen/README.md create mode 100644 libnetdata/popen/popen.c create mode 100644 libnetdata/popen/popen.h create mode 100644 libnetdata/procfile/Makefile.am create mode 100644 libnetdata/procfile/README.md create mode 100644 libnetdata/procfile/procfile.c create mode 100644 libnetdata/procfile/procfile.h create mode 100644 libnetdata/required_dummies.h create mode 100644 libnetdata/simple_pattern/Makefile.am create mode 100644 libnetdata/simple_pattern/README.md create mode 100644 libnetdata/simple_pattern/simple_pattern.c create mode 100644 libnetdata/simple_pattern/simple_pattern.h create mode 100644 libnetdata/socket/Makefile.am create mode 100644 libnetdata/socket/README.md create mode 100644 libnetdata/socket/security.c create mode 100644 libnetdata/socket/security.h create mode 100644 libnetdata/socket/socket.c create mode 100644 libnetdata/socket/socket.h create mode 100644 libnetdata/statistical/Makefile.am create mode 100644 libnetdata/statistical/README.md create mode 100644 libnetdata/statistical/statistical.c create mode 100644 libnetdata/statistical/statistical.h create mode 100644 libnetdata/storage_number/Makefile.am create mode 100644 libnetdata/storage_number/README.md create mode 100644 libnetdata/storage_number/storage_number.c create mode 100644 libnetdata/storage_number/storage_number.h create mode 100644 libnetdata/storage_number/tests/Makefile.am create mode 100644 libnetdata/storage_number/tests/test_storage_number.c create mode 100644 libnetdata/string/Makefile.am create mode 100644 libnetdata/string/README.md create mode 100644 libnetdata/string/string.c create mode 100644 libnetdata/string/string.h create mode 100644 libnetdata/string/utf8.h create mode 100644 libnetdata/tests/Makefile.am create mode 100644 libnetdata/tests/test_str2ld.c create mode 100644 libnetdata/threads/Makefile.am create mode 100644 libnetdata/threads/README.md create mode 100644 libnetdata/threads/threads.c create mode 100644 libnetdata/threads/threads.h create mode 100644 libnetdata/url/Makefile.am create mode 100644 libnetdata/url/README.md create mode 100644 libnetdata/url/url.c create mode 100644 libnetdata/url/url.h create mode 100644 libnetdata/worker_utilization/Makefile.am create mode 100644 libnetdata/worker_utilization/README.md create mode 100644 libnetdata/worker_utilization/worker_utilization.c create mode 100644 libnetdata/worker_utilization/worker_utilization.h create mode 100644 ml/ADCharts.cc create mode 100644 ml/ADCharts.h create mode 100644 ml/Config.cc create mode 100644 ml/Config.h create mode 100644 ml/Dimension.cc create mode 100644 ml/Dimension.h create mode 100644 ml/Host.cc create mode 100644 ml/Host.h create mode 100644 ml/KMeans.cc create mode 100644 ml/KMeans.h create mode 100644 ml/Query.h create mode 100644 ml/README.md create mode 100644 ml/SamplesBuffer.cc create mode 100644 ml/SamplesBuffer.h create mode 100644 ml/SamplesBufferTests.cc create mode 100644 ml/ml-dummy.c create mode 100644 ml/ml-private.h create mode 100644 ml/ml.cc create mode 100644 ml/ml.h create mode 100644 ml/notebooks/README.md create mode 100644 ml/notebooks/netdata_anomaly_detection_deepdive.ipynb create mode 100755 netdata-installer.sh create mode 100644 netdata.cppcheck create mode 100644 netdata.spec.in create mode 100644 packaging/PLATFORM_SUPPORT.md create mode 100755 packaging/build_package_install_test.sh create mode 100644 packaging/building-native-packages-locally.md create mode 100755 packaging/bundle-ebpf-co-re.sh create mode 100755 packaging/bundle-ebpf.sh create mode 100755 packaging/bundle-libbpf.sh create mode 100755 packaging/bundle-protobuf.sh create mode 100755 packaging/check-kernel-config.sh create mode 100644 packaging/current_libbpf.checksums create mode 100644 packaging/current_libbpf.version create mode 100644 packaging/docker/Dockerfile create mode 100644 packaging/docker/README.md create mode 100755 packaging/docker/gen-cflags.sh create mode 100755 packaging/docker/health.sh create mode 100755 packaging/docker/run.sh create mode 100644 packaging/ebpf-co-re.checksums create mode 100644 packaging/ebpf-co-re.version create mode 100644 packaging/ebpf.checksums create mode 100644 packaging/ebpf.version create mode 100644 packaging/go.d.checksums create mode 100644 packaging/go.d.version create mode 100644 packaging/installer/README.md create mode 100644 packaging/installer/REINSTALL.md create mode 100644 packaging/installer/UNINSTALL.md create mode 100644 packaging/installer/UPDATE.md create mode 100755 packaging/installer/dependencies/alpine.sh create mode 100755 packaging/installer/dependencies/arch.sh create mode 100755 packaging/installer/dependencies/centos.sh create mode 100755 packaging/installer/dependencies/clearlinux.sh create mode 100755 packaging/installer/dependencies/debian.sh create mode 100755 packaging/installer/dependencies/fedora.sh create mode 100755 packaging/installer/dependencies/freebsd.sh create mode 100755 packaging/installer/dependencies/gentoo.sh create mode 100755 packaging/installer/dependencies/macos.sh create mode 100755 packaging/installer/dependencies/ol.sh create mode 100755 packaging/installer/dependencies/opensuse.sh create mode 100755 packaging/installer/dependencies/rhel.sh create mode 100755 packaging/installer/dependencies/rockylinux.sh create mode 100755 packaging/installer/dependencies/sabayon.sh create mode 100755 packaging/installer/dependencies/ubuntu.sh create mode 100644 packaging/installer/functions.sh create mode 100755 packaging/installer/install-required-packages.sh create mode 120000 packaging/installer/kickstart-ng.sh create mode 120000 packaging/installer/kickstart-static64.sh create mode 100755 packaging/installer/kickstart.sh create mode 100644 packaging/installer/methods/cloud-providers.md create mode 100644 packaging/installer/methods/freebsd.md create mode 100644 packaging/installer/methods/freenas.md create mode 100644 packaging/installer/methods/kickstart.md create mode 100644 packaging/installer/methods/kubernetes.md create mode 100644 packaging/installer/methods/macos.md create mode 100644 packaging/installer/methods/manual.md create mode 100644 packaging/installer/methods/offline.md create mode 100644 packaging/installer/methods/pfsense.md create mode 100644 packaging/installer/methods/source.md create mode 100644 packaging/installer/methods/synology.md create mode 100755 packaging/installer/netdata-uninstaller.sh create mode 100755 packaging/installer/netdata-updater.sh create mode 100644 packaging/jsonc.checksums create mode 100644 packaging/jsonc.version create mode 100644 packaging/libbpf.checksums create mode 100644 packaging/libbpf.version create mode 100644 packaging/libbpf_0_0_9.checksums create mode 100644 packaging/libbpf_0_0_9.version create mode 100644 packaging/maintainers/README.md create mode 100644 packaging/makeself/README.md create mode 100755 packaging/makeself/build-static.sh create mode 100755 packaging/makeself/build-x86_64-static.sh create mode 100755 packaging/makeself/build.sh create mode 100755 packaging/makeself/functions.sh create mode 100755 packaging/makeself/install-alpine-packages.sh create mode 100755 packaging/makeself/install-or-update.sh create mode 100755 packaging/makeself/jobs/10-prepare-destination.install.sh create mode 100755 packaging/makeself/jobs/20-openssl.install.sh create mode 100755 packaging/makeself/jobs/50-bash-5.1.16.install.sh create mode 100755 packaging/makeself/jobs/50-curl-7.82.0.install.sh create mode 100755 packaging/makeself/jobs/50-fping-5.1.install.sh create mode 100755 packaging/makeself/jobs/50-ioping-1.2.install.sh create mode 100755 packaging/makeself/jobs/70-netdata-git.install.sh create mode 100755 packaging/makeself/jobs/90-netdata-runtime-check.sh create mode 100755 packaging/makeself/jobs/99-makeself.install.sh create mode 100755 packaging/makeself/makeself-header.sh create mode 100644 packaging/makeself/makeself-help-header.txt create mode 100644 packaging/makeself/makeself-license.txt create mode 100644 packaging/makeself/makeself.lsm create mode 100755 packaging/makeself/makeself.sh create mode 100644 packaging/makeself/openssl.version create mode 100755 packaging/makeself/post-installer.sh create mode 100755 packaging/makeself/run-all-jobs.sh create mode 100755 packaging/makeself/uname2platform.sh create mode 100644 packaging/protobuf.checksums create mode 100644 packaging/protobuf.version create mode 100644 packaging/repoconfig/Makefile create mode 100755 packaging/repoconfig/build-deb.sh create mode 100755 packaging/repoconfig/build-rpm.sh create mode 100644 packaging/repoconfig/debian/changelog create mode 100644 packaging/repoconfig/debian/compat create mode 100644 packaging/repoconfig/debian/control create mode 100644 packaging/repoconfig/debian/copyright create mode 100755 packaging/repoconfig/debian/rules create mode 100644 packaging/repoconfig/debian/source/format create mode 100644 packaging/repoconfig/netdata-edge.repo.centos create mode 100644 packaging/repoconfig/netdata-edge.repo.fedora create mode 100644 packaging/repoconfig/netdata-edge.repo.ol create mode 100644 packaging/repoconfig/netdata-edge.repo.suse create mode 100644 packaging/repoconfig/netdata-repo.spec create mode 100644 packaging/repoconfig/netdata.list.in create mode 100644 packaging/repoconfig/netdata.repo.centos create mode 100644 packaging/repoconfig/netdata.repo.fedora create mode 100644 packaging/repoconfig/netdata.repo.ol create mode 100644 packaging/repoconfig/netdata.repo.suse create mode 100644 packaging/version create mode 100644 parser/Makefile.am create mode 100644 parser/README.md create mode 100644 parser/parser.c create mode 100644 parser/parser.h create mode 100644 registry/Makefile.am create mode 100644 registry/README.md create mode 100644 registry/registry.c create mode 100644 registry/registry.h create mode 100644 registry/registry_db.c create mode 100644 registry/registry_init.c create mode 100644 registry/registry_internals.c create mode 100644 registry/registry_internals.h create mode 100644 registry/registry_log.c create mode 100644 registry/registry_machine.c create mode 100644 registry/registry_machine.h create mode 100644 registry/registry_person.c create mode 100644 registry/registry_person.h create mode 100644 registry/registry_url.c create mode 100644 registry/registry_url.h create mode 100644 spawn/Makefile.am create mode 100644 spawn/README.md create mode 100644 spawn/spawn.c create mode 100644 spawn/spawn.h create mode 100644 spawn/spawn_client.c create mode 100644 spawn/spawn_server.c create mode 100644 streaming/Makefile.am create mode 100644 streaming/README.md create mode 100644 streaming/compression.c create mode 100644 streaming/receiver.c create mode 100644 streaming/replication.c create mode 100644 streaming/replication.h create mode 100644 streaming/rrdpush.c create mode 100644 streaming/rrdpush.h create mode 100644 streaming/sender.c create mode 100644 streaming/stream.conf create mode 100644 system/.install-type create mode 100644 system/Makefile.am create mode 100755 system/edit-config.in create mode 100755 system/install-service.sh.in create mode 100644 system/netdata-freebsd.in create mode 100644 system/netdata-init-d.in create mode 100644 system/netdata-lsb.in create mode 100644 system/netdata-openrc.in create mode 100644 system/netdata-updater.service.in create mode 100644 system/netdata-updater.timer create mode 100644 system/netdata.conf create mode 100644 system/netdata.crontab.in create mode 100644 system/netdata.logrotate.in create mode 100644 system/netdata.plist.in create mode 100644 system/netdata.service.in create mode 100644 system/netdata.service.v235.in create mode 100644 tests/Makefile.am create mode 100644 tests/acls/acl.sh.in create mode 100644 tests/acls/netdata.cfg create mode 100644 tests/acls/netdata.ssl.cfg create mode 100644 tests/alarm_repetition/alarm.sh.in create mode 100644 tests/alarm_repetition/netdata.conf_with_repetition create mode 100644 tests/alarm_repetition/netdata.conf_without_repetition create mode 100644 tests/alarm_repetition/ram_with_repetition.conf create mode 100644 tests/alarm_repetition/ram_without_repetition.conf create mode 100644 tests/api/fuzzer.py create mode 100644 tests/ebpf/README.md create mode 100644 tests/ebpf/sync_tester.c create mode 100644 tests/exporting/prometheus-avg-oldunits.txt create mode 100644 tests/exporting/prometheus-avg.txt create mode 100644 tests/exporting/prometheus-raw.txt create mode 100755 tests/exporting/prometheus.bats create mode 100644 tests/health_mgmtapi/README.md create mode 100644 tests/health_mgmtapi/expected_list/ALARM_CPU_IOWAIT-list.json create mode 100644 tests/health_mgmtapi/expected_list/ALARM_CPU_USAGE-list.json create mode 100644 tests/health_mgmtapi/expected_list/CONTEXT_SYSTEM_CPU-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE_ALL-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE_ALL_ERROR-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE_SYSTEM_LOAD-list.json create mode 100644 tests/health_mgmtapi/expected_list/FAMILIES_LOAD-list.json create mode 100644 tests/health_mgmtapi/expected_list/HOSTS-list.json create mode 100644 tests/health_mgmtapi/expected_list/RESET-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_2-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_3-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_ALL-list.json create mode 100755 tests/health_mgmtapi/health-cmdapi-test.sh.in create mode 100755 tests/installer/checksums.sh create mode 100755 tests/installer/slack.sh create mode 100644 tests/k6/data.js create mode 100755 tests/lifecycle.bats create mode 100644 tests/profile/Makefile create mode 100644 tests/profile/benchmark-dictionary.c create mode 100644 tests/profile/benchmark-line-parsing.c create mode 100644 tests/profile/benchmark-procfile-parser.c create mode 100644 tests/profile/benchmark-registry.c create mode 100644 tests/profile/benchmark-value-pairs.c create mode 100644 tests/profile/statsd-stress.c create mode 100644 tests/profile/test-eval.c create mode 100755 tests/run-unit-tests.sh create mode 100755 tests/stress.sh create mode 100644 tests/template_dimension/system_cpu.conf.alarm_foreach create mode 100644 tests/template_dimension/system_cpu.conf.alarm_foreach_sp create mode 100644 tests/template_dimension/system_cpu.conf.template_alarm create mode 100644 tests/template_dimension/system_cpu.conf.template_foreach create mode 100644 tests/template_dimension/system_cpu.conf.template_foreach_sp create mode 100644 tests/template_dimension/system_cpu.conf.unique_alarm create mode 100644 tests/template_dimension/template_dim.sh.in create mode 100755 tests/updater_checks.bats create mode 100755 tests/updater_checks.sh create mode 100644 tests/urls/request.sh.in create mode 100644 web/Makefile.am create mode 100644 web/README.md create mode 100644 web/api/Makefile.am create mode 100644 web/api/README.md create mode 100644 web/api/badges/Makefile.am create mode 100644 web/api/badges/README.md create mode 100644 web/api/badges/web_buffer_svg.c create mode 100644 web/api/badges/web_buffer_svg.h create mode 100644 web/api/exporters/Makefile.am create mode 100644 web/api/exporters/README.md create mode 100644 web/api/exporters/allmetrics.c create mode 100644 web/api/exporters/allmetrics.h create mode 100644 web/api/exporters/prometheus/Makefile.am create mode 100644 web/api/exporters/prometheus/README.md create mode 100644 web/api/exporters/shell/Makefile.am create mode 100644 web/api/exporters/shell/README.md create mode 100644 web/api/exporters/shell/allmetrics_shell.c create mode 100644 web/api/exporters/shell/allmetrics_shell.h create mode 100644 web/api/formatters/Makefile.am create mode 100644 web/api/formatters/README.md create mode 100644 web/api/formatters/charts2json.c create mode 100644 web/api/formatters/charts2json.h create mode 100644 web/api/formatters/csv/Makefile.am create mode 100644 web/api/formatters/csv/README.md create mode 100644 web/api/formatters/csv/csv.c create mode 100644 web/api/formatters/csv/csv.h create mode 100644 web/api/formatters/json/Makefile.am create mode 100644 web/api/formatters/json/README.md create mode 100644 web/api/formatters/json/json.c create mode 100644 web/api/formatters/json/json.h create mode 100644 web/api/formatters/json_wrapper.c create mode 100644 web/api/formatters/json_wrapper.h create mode 100644 web/api/formatters/rrd2json.c create mode 100644 web/api/formatters/rrd2json.h create mode 100644 web/api/formatters/rrdset2json.c create mode 100644 web/api/formatters/rrdset2json.h create mode 100644 web/api/formatters/ssv/Makefile.am create mode 100644 web/api/formatters/ssv/README.md create mode 100644 web/api/formatters/ssv/ssv.c create mode 100644 web/api/formatters/ssv/ssv.h create mode 100644 web/api/formatters/value/Makefile.am create mode 100644 web/api/formatters/value/README.md create mode 100644 web/api/formatters/value/value.c create mode 100644 web/api/formatters/value/value.h create mode 100644 web/api/health/Makefile.am create mode 100644 web/api/health/README.md create mode 100644 web/api/health/health_cmdapi.c create mode 100644 web/api/health/health_cmdapi.h create mode 100644 web/api/netdata-swagger.json create mode 100644 web/api/netdata-swagger.yaml create mode 100644 web/api/queries/Makefile.am create mode 100644 web/api/queries/README.md create mode 100644 web/api/queries/average/Makefile.am create mode 100644 web/api/queries/average/README.md create mode 100644 web/api/queries/average/average.c create mode 100644 web/api/queries/average/average.h create mode 100644 web/api/queries/countif/Makefile.am create mode 100644 web/api/queries/countif/README.md create mode 100644 web/api/queries/countif/countif.c create mode 100644 web/api/queries/countif/countif.h create mode 100644 web/api/queries/des/Makefile.am create mode 100644 web/api/queries/des/README.md create mode 100644 web/api/queries/des/des.c create mode 100644 web/api/queries/des/des.h create mode 100644 web/api/queries/incremental_sum/Makefile.am create mode 100644 web/api/queries/incremental_sum/README.md create mode 100644 web/api/queries/incremental_sum/incremental_sum.c create mode 100644 web/api/queries/incremental_sum/incremental_sum.h create mode 100644 web/api/queries/max/Makefile.am create mode 100644 web/api/queries/max/README.md create mode 100644 web/api/queries/max/max.c create mode 100644 web/api/queries/max/max.h create mode 100644 web/api/queries/median/Makefile.am create mode 100644 web/api/queries/median/README.md create mode 100644 web/api/queries/median/median.c create mode 100644 web/api/queries/median/median.h create mode 100644 web/api/queries/min/Makefile.am create mode 100644 web/api/queries/min/README.md create mode 100644 web/api/queries/min/min.c create mode 100644 web/api/queries/min/min.h create mode 100644 web/api/queries/percentile/Makefile.am create mode 100644 web/api/queries/percentile/README.md create mode 100644 web/api/queries/percentile/percentile.c create mode 100644 web/api/queries/percentile/percentile.h create mode 100644 web/api/queries/query.c create mode 100644 web/api/queries/query.h create mode 100644 web/api/queries/rrdr.c create mode 100644 web/api/queries/rrdr.h create mode 100644 web/api/queries/ses/Makefile.am create mode 100644 web/api/queries/ses/README.md create mode 100644 web/api/queries/ses/ses.c create mode 100644 web/api/queries/ses/ses.h create mode 100644 web/api/queries/stddev/Makefile.am create mode 100644 web/api/queries/stddev/README.md create mode 100644 web/api/queries/stddev/stddev.c create mode 100644 web/api/queries/stddev/stddev.h create mode 100644 web/api/queries/sum/Makefile.am create mode 100644 web/api/queries/sum/README.md create mode 100644 web/api/queries/sum/sum.c create mode 100644 web/api/queries/sum/sum.h create mode 100644 web/api/queries/trimmed_mean/Makefile.am create mode 100644 web/api/queries/trimmed_mean/README.md create mode 100644 web/api/queries/trimmed_mean/trimmed_mean.c create mode 100644 web/api/queries/trimmed_mean/trimmed_mean.h create mode 100644 web/api/queries/weights.c create mode 100644 web/api/queries/weights.h create mode 100644 web/api/tests/valid_urls.c create mode 100644 web/api/tests/web_api.c create mode 100644 web/api/web_api_v1.c create mode 100644 web/api/web_api_v1.h create mode 100644 web/gui/.dashboard-notice.md create mode 100644 web/gui/.well-known/dnt/cookies create mode 100644 web/gui/Makefile.am create mode 100644 web/gui/README.md create mode 100644 web/gui/browserconfig.xml create mode 100755 web/gui/bundle_dashboard.py create mode 100644 web/gui/confluence/README.md create mode 100644 web/gui/css/c3-0.4.18.min.css create mode 100644 web/gui/css/morris-0.5.1.css create mode 100644 web/gui/custom/README.md create mode 100644 web/gui/dashboard/Makefile.am create mode 120000 web/gui/dashboard/README.md create mode 100644 web/gui/dashboard/asset-manifest.json create mode 100644 web/gui/dashboard/console.html create mode 100644 web/gui/dashboard/css/bootstrap-3.3.7.css create mode 100644 web/gui/dashboard/css/bootstrap-slate-flat-3.3.7.css create mode 100644 web/gui/dashboard/css/bootstrap-slider-10.0.0.min.css create mode 100644 web/gui/dashboard/css/bootstrap-theme-3.3.7.min.css create mode 100644 web/gui/dashboard/css/bootstrap-toggle-2.2.2.min.css create mode 100644 web/gui/dashboard/css/dashboard.css create mode 100644 web/gui/dashboard/css/dashboard.slate.css create mode 100644 web/gui/dashboard/dash-example.html create mode 100644 web/gui/dashboard/dashboard-react.js create mode 100644 web/gui/dashboard/dashboard.css create mode 100644 web/gui/dashboard/dashboard.html create mode 100644 web/gui/dashboard/dashboard.js create mode 100644 web/gui/dashboard/dashboard.slate.css create mode 100644 web/gui/dashboard/demo.html create mode 100644 web/gui/dashboard/demo2.html create mode 100644 web/gui/dashboard/demosites.html create mode 100644 web/gui/dashboard/demosites2.html create mode 100644 web/gui/dashboard/favicon.ico create mode 100644 web/gui/dashboard/fonts/glyphicons-halflings-regular.eot create mode 100644 web/gui/dashboard/fonts/glyphicons-halflings-regular.svg create mode 100644 web/gui/dashboard/fonts/glyphicons-halflings-regular.ttf create mode 100644 web/gui/dashboard/fonts/glyphicons-halflings-regular.woff create mode 100644 web/gui/dashboard/fonts/glyphicons-halflings-regular.woff2 create mode 100644 web/gui/dashboard/goto-host-from-alarm.html create mode 100644 web/gui/dashboard/images/alert-128-orange.png create mode 100644 web/gui/dashboard/images/alert-128-red.png create mode 100644 web/gui/dashboard/images/alert-multi-size-orange.ico create mode 100644 web/gui/dashboard/images/alert-multi-size-red.ico create mode 100644 web/gui/dashboard/images/alerts.jpg create mode 100644 web/gui/dashboard/images/alerts.png create mode 100644 web/gui/dashboard/images/android-icon-144x144.png create mode 100644 web/gui/dashboard/images/android-icon-192x192.png create mode 100644 web/gui/dashboard/images/android-icon-36x36.png create mode 100644 web/gui/dashboard/images/android-icon-48x48.png create mode 100644 web/gui/dashboard/images/android-icon-72x72.png create mode 100644 web/gui/dashboard/images/android-icon-96x96.png create mode 100644 web/gui/dashboard/images/animated.gif create mode 100644 web/gui/dashboard/images/apple-icon-114x114.png create mode 100644 web/gui/dashboard/images/apple-icon-120x120.png create mode 100644 web/gui/dashboard/images/apple-icon-144x144.png create mode 100644 web/gui/dashboard/images/apple-icon-152x152.png create mode 100644 web/gui/dashboard/images/apple-icon-180x180.png create mode 100644 web/gui/dashboard/images/apple-icon-57x57.png create mode 100644 web/gui/dashboard/images/apple-icon-60x60.png create mode 100644 web/gui/dashboard/images/apple-icon-72x72.png create mode 100644 web/gui/dashboard/images/apple-icon-76x76.png create mode 100644 web/gui/dashboard/images/apple-icon-precomposed.png create mode 100644 web/gui/dashboard/images/apple-icon.png create mode 100644 web/gui/dashboard/images/banner-icon-144x144.png create mode 100644 web/gui/dashboard/images/check-mark-2-128-green.png create mode 100644 web/gui/dashboard/images/check-mark-2-multi-size-green.ico create mode 100644 web/gui/dashboard/images/dashboards.png create mode 100644 web/gui/dashboard/images/favicon-128.png create mode 100644 web/gui/dashboard/images/favicon-16x16.png create mode 100644 web/gui/dashboard/images/favicon-196x196.png create mode 100644 web/gui/dashboard/images/favicon-32x32.png create mode 100644 web/gui/dashboard/images/favicon-96x96.png create mode 100644 web/gui/dashboard/images/favicon.ico create mode 100644 web/gui/dashboard/images/home.png create mode 100644 web/gui/dashboard/images/ms-icon-144x144.png create mode 100644 web/gui/dashboard/images/ms-icon-150x150.png create mode 100644 web/gui/dashboard/images/ms-icon-310x150.png create mode 100644 web/gui/dashboard/images/ms-icon-310x310.png create mode 100644 web/gui/dashboard/images/ms-icon-36x36.png create mode 100644 web/gui/dashboard/images/ms-icon-70x70.png create mode 100644 web/gui/dashboard/images/netdata-logomark.svg create mode 100644 web/gui/dashboard/images/netdata.svg create mode 100644 web/gui/dashboard/images/nodeView.png create mode 100644 web/gui/dashboard/images/nodes.jpg create mode 100644 web/gui/dashboard/images/overview.png create mode 100644 web/gui/dashboard/images/packaging-beta-tag.svg create mode 100644 web/gui/dashboard/images/post.png create mode 100644 web/gui/dashboard/images/pricing.png create mode 100644 web/gui/dashboard/images/seo-performance-128.png create mode 100644 web/gui/dashboard/index-node-view.html create mode 100644 web/gui/dashboard/index.html create mode 100644 web/gui/dashboard/infographic.html create mode 100644 web/gui/dashboard/lib/bootstrap-3.3.7.min.js create mode 100644 web/gui/dashboard/lib/bootstrap-slider-10.0.0.min.js create mode 100644 web/gui/dashboard/lib/bootstrap-table-1.11.0.min.js create mode 100644 web/gui/dashboard/lib/bootstrap-table-export-1.11.0.min.js create mode 100644 web/gui/dashboard/lib/bootstrap-toggle-2.2.2.min.js create mode 100644 web/gui/dashboard/lib/clipboard-polyfill-be05dad.js create mode 100644 web/gui/dashboard/lib/d3-4.12.2.min.js create mode 100644 web/gui/dashboard/lib/d3pie-0.2.1-netdata-3.js create mode 100644 web/gui/dashboard/lib/dygraph-c91c859.min.js create mode 100644 web/gui/dashboard/lib/dygraph-smooth-plotter-c91c859.js create mode 100644 web/gui/dashboard/lib/fontawesome-all-5.0.1.min.js create mode 100644 web/gui/dashboard/lib/gauge-1.3.2.min.js create mode 100644 web/gui/dashboard/lib/jquery-3.6.0.min.js create mode 100644 web/gui/dashboard/lib/jquery.easypiechart-97b5824.min.js create mode 100644 web/gui/dashboard/lib/jquery.peity-3.2.0.min.js create mode 100644 web/gui/dashboard/lib/jquery.sparkline-2.1.2.min.js create mode 100644 web/gui/dashboard/lib/lz-string-1.4.4.min.js create mode 100644 web/gui/dashboard/lib/pako-1.0.6.min.js create mode 100644 web/gui/dashboard/lib/perfect-scrollbar-0.6.15.min.js create mode 100644 web/gui/dashboard/lib/tableExport-1.6.0.min.js create mode 100644 web/gui/dashboard/manifest.json create mode 100644 web/gui/dashboard/precache-manifest.5fec6109084644adf7bf854243e1a044.js create mode 100644 web/gui/dashboard/refresh-badges.js create mode 100644 web/gui/dashboard/robots.txt create mode 100644 web/gui/dashboard/service-worker.js create mode 100644 web/gui/dashboard/sitemap.xml create mode 100644 web/gui/dashboard/static/css/2.c454aab8.chunk.css create mode 100644 web/gui/dashboard/static/css/2.c454aab8.chunk.css.map create mode 100644 web/gui/dashboard/static/css/4.a36e3b73.chunk.css create mode 100644 web/gui/dashboard/static/css/4.a36e3b73.chunk.css.map create mode 100644 web/gui/dashboard/static/css/main.53ba10f1.chunk.css create mode 100644 web/gui/dashboard/static/css/main.53ba10f1.chunk.css.map create mode 100644 web/gui/dashboard/static/js/10.a5cd7d0e.chunk.js create mode 100644 web/gui/dashboard/static/js/10.a5cd7d0e.chunk.js.map create mode 100644 web/gui/dashboard/static/js/2.92ca8446.chunk.js create mode 100644 web/gui/dashboard/static/js/2.92ca8446.chunk.js.LICENSE create mode 100644 web/gui/dashboard/static/js/2.92ca8446.chunk.js.map create mode 100644 web/gui/dashboard/static/js/3.f137faca.chunk.js create mode 100644 web/gui/dashboard/static/js/3.f137faca.chunk.js.map create mode 100644 web/gui/dashboard/static/js/4.2dbcd906.chunk.js create mode 100644 web/gui/dashboard/static/js/4.2dbcd906.chunk.js.map create mode 100644 web/gui/dashboard/static/js/5.2f783a54.chunk.js create mode 100644 web/gui/dashboard/static/js/5.2f783a54.chunk.js.LICENSE create mode 100644 web/gui/dashboard/static/js/5.2f783a54.chunk.js.map create mode 100644 web/gui/dashboard/static/js/6.e1951239.chunk.js create mode 100644 web/gui/dashboard/static/js/6.e1951239.chunk.js.map create mode 100644 web/gui/dashboard/static/js/7.c2417fb0.chunk.js create mode 100644 web/gui/dashboard/static/js/7.c2417fb0.chunk.js.map create mode 100644 web/gui/dashboard/static/js/8.b4161ea2.chunk.js create mode 100644 web/gui/dashboard/static/js/8.b4161ea2.chunk.js.map create mode 100644 web/gui/dashboard/static/js/9.a4363968.chunk.js create mode 100644 web/gui/dashboard/static/js/9.a4363968.chunk.js.map create mode 100644 web/gui/dashboard/static/js/main.7d1bdca1.chunk.js create mode 100644 web/gui/dashboard/static/js/main.7d1bdca1.chunk.js.LICENSE create mode 100644 web/gui/dashboard/static/js/main.7d1bdca1.chunk.js.map create mode 100644 web/gui/dashboard/static/js/runtime-main.08abed8f.js create mode 100644 web/gui/dashboard/static/js/runtime-main.08abed8f.js.map create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-100.245539db.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-100.9a582f3a.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-100italic.1ea7c5d2.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-100italic.3c34cf08.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-200.67524c36.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-200.bf72c841.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-200italic.52df2560.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-200italic.bbc2d552.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-300.10bb6a0a.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-300.9e1c48af.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-300italic.c76f2ab5.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-300italic.d3566d5b.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-400.263d6267.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-400.a2c56f94.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-400italic.272f8611.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-400italic.89a93a1b.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-500.0866c244.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-500.f6d5c5d5.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-500italic.ccd41bd1.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-500italic.ffd12d59.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-600.337b1651.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-600.7852d4dc.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-600italic.17e5379f.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-600italic.6f4ba6aa.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-700.b8809d61.woff create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-700.c9983d3d.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-700italic.02954bee.woff2 create mode 100644 web/gui/dashboard/static/media/ibm-plex-sans-latin-700italic.72e9af40.woff create mode 100644 web/gui/dashboard/static/media/material-icons.0509ab09.woff2 create mode 100644 web/gui/dashboard/tv-react.html create mode 100644 web/gui/dashboard/tv.html create mode 100644 web/gui/dashboard_info.js create mode 100644 web/gui/dashboard_info_custom_example.js create mode 100644 web/gui/main.css create mode 100644 web/gui/main.js create mode 100644 web/gui/old/index.html create mode 100644 web/gui/src/dashboard.js/alarms.js create mode 100644 web/gui/src/dashboard.js/boot.js create mode 100644 web/gui/src/dashboard.js/chart-registry.js create mode 100644 web/gui/src/dashboard.js/charting.js create mode 100644 web/gui/src/dashboard.js/charting/_c3.js create mode 100644 web/gui/src/dashboard.js/charting/_morris.js create mode 100644 web/gui/src/dashboard.js/charting/_raphael.js create mode 100644 web/gui/src/dashboard.js/charting/d3.js create mode 100644 web/gui/src/dashboard.js/charting/d3pie.js create mode 100644 web/gui/src/dashboard.js/charting/dygraph.js create mode 100644 web/gui/src/dashboard.js/charting/easy-pie-chart.js create mode 100644 web/gui/src/dashboard.js/charting/gauge.js create mode 100644 web/gui/src/dashboard.js/charting/google-charts.js create mode 100644 web/gui/src/dashboard.js/charting/peity.js create mode 100644 web/gui/src/dashboard.js/charting/sparkline.js create mode 100644 web/gui/src/dashboard.js/charting/textonly.js create mode 100644 web/gui/src/dashboard.js/colors.js create mode 100644 web/gui/src/dashboard.js/common.js create mode 100644 web/gui/src/dashboard.js/compatibility.js create mode 100644 web/gui/src/dashboard.js/dependencies.js create mode 100644 web/gui/src/dashboard.js/epilogue.js.inc create mode 100644 web/gui/src/dashboard.js/error-handling.js create mode 100644 web/gui/src/dashboard.js/localstorage.js create mode 100644 web/gui/src/dashboard.js/main.js create mode 100644 web/gui/src/dashboard.js/options.js create mode 100644 web/gui/src/dashboard.js/prologue.js.inc create mode 100644 web/gui/src/dashboard.js/registry.js create mode 100644 web/gui/src/dashboard.js/server-detection.js create mode 100644 web/gui/src/dashboard.js/themes.js create mode 100644 web/gui/src/dashboard.js/timeout.js create mode 100644 web/gui/src/dashboard.js/units-conversion.js create mode 100644 web/gui/src/dashboard.js/utils.js create mode 100644 web/gui/src/dashboard.js/xss.js create mode 100644 web/gui/static/img/netdata-logomark.svg create mode 100644 web/server/Makefile.am create mode 100644 web/server/README.md create mode 100644 web/server/static/Makefile.am create mode 100644 web/server/static/README.md create mode 100644 web/server/static/static-threaded.c create mode 100644 web/server/static/static-threaded.h create mode 100644 web/server/web_client.c create mode 100644 web/server/web_client.h create mode 100644 web/server/web_client_cache.c create mode 100644 web/server/web_client_cache.h create mode 100644 web/server/web_server.c create mode 100644 web/server/web_server.h diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..ded82a9 --- /dev/null +++ b/.clang-format @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# clang-format configuration file. Intended for clang-format >= 4. +# +# For more information, see: +# +# Documentation/process/clang-format.rst +# https://clang.llvm.org/docs/ClangFormat.html +# https://clang.llvm.org/docs/ClangFormatStyleOptions.html +# +--- +AccessModifierOffset: -4 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +#AlignEscapedNewlines: Left # Unknown to clang-format-4.0 +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + #AfterExternBlock: false # Unknown to clang-format-5.0 + BeforeCatch: false + BeforeElse: false + IndentBraces: false + #SplitEmptyFunction: true # Unknown to clang-format-4.0 + #SplitEmptyRecord: true # Unknown to clang-format-4.0 + #SplitEmptyNamespace: true # Unknown to clang-format-4.0 +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0 +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0 +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: false +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +#CompactNamespaces: false # Unknown to clang-format-4.0 +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +#FixNamespaceComments: false # Unknown to clang-format-4.0 + +#IncludeBlocks: Preserve # Unknown to clang-format-5.0 +IncludeCategories: + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: true +#IndentPPDirectives: None # Unknown to clang-format-5.0 +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: Inner +#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0 +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true + +# Taken from git's rules +#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0 +PenaltyBreakBeforeFirstCallParameter: 30 +PenaltyBreakComment: 10 +PenaltyBreakFirstLessLess: 0 +PenaltyBreakString: 10 +PenaltyExcessCharacter: 100 +PenaltyReturnTypeOnItsOwnLine: 60 + +PointerAlignment: Right +ReflowComments: false +SortIncludes: false +#SortUsingDeclarations: false # Unknown to clang-format-4.0 +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0 +#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0 +SpaceBeforeParens: ControlStatements +#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0 +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp03 +TabWidth: 4 +UseTab: Never +... diff --git a/.codacy.yml b/.codacy.yml new file mode 100644 index 0000000..f4fd5d6 --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,17 @@ +--- +exclude_paths: + - collectors/python.d.plugin/python_modules/pyyaml2/** + - collectors/python.d.plugin/python_modules/pyyaml3/** + - collectors/python.d.plugin/python_modules/urllib3/** + - collectors/python.d.plugin/python_modules/third_party/** + - contrib/** + - packaging/makeself/** + - web/gui/css/** + - web/gui/lib/** + - web/gui/old/** + - web/gui/src/** + - web/gui/main.js + - tests/** + - aclk/tests/** + - libnetdata/libjudy/** + diff --git a/.codeclimate.yml b/.codeclimate.yml new file mode 100644 index 0000000..2d33eaf --- /dev/null +++ b/.codeclimate.yml @@ -0,0 +1,93 @@ +version: "2" +checks: + argument-count: + enabled: false + config: + threshold: 10 + complex-logic: + enabled: false + config: + threshold: 10 + file-lines: + enabled: false + config: + threshold: 5000 + method-complexity: + enabled: false + config: + threshold: 20 + method-count: + enabled: false + config: + threshold: 50 + method-lines: + enabled: false + config: + threshold: 250 + nested-control-flow: + enabled: false + config: + threshold: 4 + return-statements: + enabled: false + config: + threshold: 4 + similar-code: + enabled: false + identical-code: + enabled: false +plugins: + csslint: + enabled: true + duplication: + enabled: false + config: + languages: + - javascript: + mass_threshold: 100 + - python: + python_version: 3 + mass_threshold: 100 + checks: + Similar code: + enabled: false + Identical code: + enabled: false + eslint: + enabled: true + checks: + max-statements: + enabled: false + complexity: + enabled: false + no-eval: + enabled: false + no-extend-native: + enabled: false + no-void: + enabled: false + no-alert: + enabled: false + no-undef-init: + enabled: false + fixme: + enabled: false + phpmd: + enabled: true + radon: + enabled: true + checks: + Complexity: + enabled: false +exclude_patterns: + - ".gitignore" + - ".githooks/" + - "tests/" + - "m4/" + - "web/css/" + - "web/lib/" + - "web/fonts/" + - "web/old/" + - "collectors/python.d.plugin/python_modules/pyyaml2/" + - "collectors/python.d.plugin/python_modules/pyyaml3/" + - "collectors/python.d.plugin/python_modules/urllib3/" diff --git a/.csslintrc b/.csslintrc new file mode 100644 index 0000000..aacba95 --- /dev/null +++ b/.csslintrc @@ -0,0 +1,2 @@ +--exclude-exts=.min.css +--ignore=adjoining-classes,box-model,ids,order-alphabetical,unqualified-attributes diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..0f1be69 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,14 @@ +{ + "image": "netdata/devenv", + "extensions":[ + "golang.go", + "exiasr.hadolint", + "ms-python.python", + "timonwong.shellcheck", + "redhat.vscode-yaml", + "dbaeumer.vscode-eslint", + "jasonnutter.search-node-modules", + "mgmcdermott.vscode-language-babel", + ], + "forwardPorts": [19999] +} \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 120000 index 0000000..3e4e48b --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +.gitignore \ No newline at end of file diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000..8cefe0e --- /dev/null +++ b/.eslintignore @@ -0,0 +1,2 @@ +**/*{.,-}min.js +web/gui/dashboard/* diff --git a/.eslintrc b/.eslintrc new file mode 100644 index 0000000..9faa375 --- /dev/null +++ b/.eslintrc @@ -0,0 +1,213 @@ +ecmaFeatures: + modules: true + jsx: true + +env: + amd: true + browser: true + es6: true + jquery: true + node: true + +# http://eslint.org/docs/rules/ +rules: + # Possible Errors + comma-dangle: [2, never] + no-cond-assign: 2 + no-console: 0 + no-constant-condition: 2 + no-control-regex: 2 + no-debugger: 2 + no-dupe-args: 2 + no-dupe-keys: 2 + no-duplicate-case: 2 + no-empty: 2 + no-empty-character-class: 2 + no-ex-assign: 2 + no-extra-boolean-cast: 2 + no-extra-parens: 0 + no-extra-semi: 2 + no-func-assign: 2 + no-inner-declarations: [2, functions] + no-invalid-regexp: 2 + no-irregular-whitespace: 2 + no-negated-in-lhs: 2 + no-obj-calls: 2 + no-regex-spaces: 2 + no-sparse-arrays: 2 + no-unexpected-multiline: 2 + no-unreachable: 2 + use-isnan: 2 + valid-jsdoc: 0 + valid-typeof: 2 + + # Best Practices + accessor-pairs: 2 + block-scoped-var: 0 + complexity: [2, 6] + consistent-return: 0 + curly: 0 + default-case: 0 + dot-location: 0 + dot-notation: 0 + eqeqeq: 2 + guard-for-in: 2 + no-alert: 2 + no-caller: 2 + no-case-declarations: 2 + no-div-regex: 2 + no-else-return: 0 + no-empty-label: 2 + no-empty-pattern: 2 + no-eq-null: 2 + no-eval: 2 + no-extend-native: 2 + no-extra-bind: 2 + no-fallthrough: 2 + no-floating-decimal: 0 + no-implicit-coercion: 0 + no-implied-eval: 2 + no-invalid-this: 0 + no-iterator: 2 + no-labels: 0 + no-lone-blocks: 2 + no-loop-func: 2 + no-magic-number: 0 + no-multi-spaces: 0 + no-multi-str: 0 + no-native-reassign: 2 + no-new-func: 2 + no-new-wrappers: 2 + no-new: 2 + no-octal-escape: 2 + no-octal: 2 + no-proto: 2 + no-redeclare: 2 + no-return-assign: 2 + no-script-url: 2 + no-self-compare: 2 + no-sequences: 0 + no-throw-literal: 0 + no-unused-expressions: 2 + no-useless-call: 2 + no-useless-concat: 2 + no-void: 2 + no-warning-comments: 0 + no-with: 2 + radix: 2 + vars-on-top: 0 + wrap-iife: 2 + yoda: 0 + + # Strict + strict: 0 + + # Variables + init-declarations: 0 + no-catch-shadow: 2 + no-delete-var: 2 + no-label-var: 2 + no-shadow-restricted-names: 2 + no-shadow: 0 + no-undef-init: 2 + no-undef: 0 + no-undefined: 0 + no-unused-vars: 0 + no-use-before-define: 0 + + # Node.js and CommonJS + callback-return: 2 + global-require: 2 + handle-callback-err: 2 + no-mixed-requires: 0 + no-new-require: 0 + no-path-concat: 2 + no-process-exit: 2 + no-restricted-modules: 0 + no-sync: 0 + + # Stylistic Issues + array-bracket-spacing: 0 + block-spacing: 0 + brace-style: 0 + camelcase: 0 + comma-spacing: 0 + comma-style: 0 + computed-property-spacing: 0 + consistent-this: 0 + eol-last: 0 + func-names: 0 + func-style: 0 + id-length: 0 + id-match: 0 + indent: 0 + jsx-quotes: 0 + key-spacing: 0 + linebreak-style: 0 + lines-around-comment: 0 + max-depth: 0 + max-len: 0 + max-nested-callbacks: 0 + max-params: 0 + max-statements: [2, 30] + new-cap: 0 + new-parens: 0 + newline-after-var: 0 + no-array-constructor: 0 + no-bitwise: 0 + no-continue: 0 + no-inline-comments: 0 + no-lonely-if: 0 + no-mixed-spaces-and-tabs: 0 + no-multiple-empty-lines: 0 + no-negated-condition: 0 + no-nested-ternary: 0 + no-new-object: 0 + no-plusplus: 0 + no-restricted-syntax: 0 + no-spaced-func: 0 + no-ternary: 0 + no-trailing-spaces: 0 + no-underscore-dangle: 0 + no-unneeded-ternary: 0 + object-curly-spacing: 0 + one-var: 0 + operator-assignment: 0 + operator-linebreak: 0 + padded-blocks: 0 + quote-props: 0 + quotes: 0 + require-jsdoc: 0 + semi-spacing: 0 + semi: 0 + sort-vars: 0 + space-after-keywords: 0 + space-before-blocks: 0 + space-before-function-paren: 0 + space-before-keywords: 0 + space-in-parens: 0 + space-infix-ops: 0 + space-return-throw-case: 0 + space-unary-ops: 0 + spaced-comment: 0 + wrap-regex: 0 + + # ECMAScript 6 + arrow-body-style: 0 + arrow-parens: 0 + arrow-spacing: 0 + constructor-super: 0 + generator-star-spacing: 0 + no-arrow-condition: 0 + no-class-assign: 0 + no-const-assign: 0 + no-dupe-class-members: 0 + no-this-before-super: 0 + no-var: 0 + object-shorthand: 0 + prefer-arrow-callback: 0 + prefer-const: 0 + prefer-reflect: 0 + prefer-spread: 0 + prefer-template: 0 + require-yield: 0 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..45ec515 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.c diff=cpp +*.h diff=cpp diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..c513b71 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,57 @@ +# Files which shouldn't be changed manually are owned by @netdatabot. +# This way we prevent modifications which will be overwriten by automation. + +# Global (default) code owner +* @Ferroin + +# Ownership by directory structure +.travis/ @Ferroin +.github/ @Ferroin +aclk/ @stelfrag @underhood +build/ @Ferroin +contrib/debian @Ferroin +collectors/ @thiagoftsm +collectors/ebpf.plugin/ @thiagoftsm +collectors/charts.d.plugin/ @ilyam8 @Ferroin +collectors/freebsd.plugin/ @thiagoftsm +collectors/macos.plugin/ @thiagoftsm +collectors/python.d.plugin/ @ilyam8 +collectors/cups.plugin/ @simonnagl @thiagoftsm +exporting/ @thiagoftsm +daemon/ @thiagoftsm @vkalintiris +database/ @thiagoftsm @vkalintiris +docs/ @DShreve2 +health/ @thiagoftsm @vkalintiris @MrZammler +health/health.d/ @thiagoftsm @MrZammler +health/notifications/ @Ferroin @thiagoftsm @MrZammler +ml/ @andrewm4894 @vkalintiris +libnetdata/ @thiagoftsm @vkalintiris +packaging/ @Ferroin +registry/ @jacekkolasa +streaming/ @thiagoftsm +system/ @Ferroin +tests/ @Ferroin @vkalintiris +web/ @thiagoftsm @vkalintiris +web/gui/ @jacekkolasa + +# Ownership by filetype (overwrites ownership by directory) +*.am @Ferroin +*.md @DShreve2 +Dockerfile* @Ferroin + +# Ownership of specific files +.gitignore @Ferroin @vkalintiris +.travis.yml @Ferroin +.lgtm.yml @Ferroin +.eslintrc @Ferroin +.eslintignore @Ferroin +.csslintrc @Ferroin +.codeclimate.yml @Ferroin +.codacy.yml @Ferroin +.yamllint.yml @Ferroin +netdata.spec.in @Ferroin +netdata-installer.sh @Ferroin +packaging/version @netdatabot @Ferroin + +LICENSE.md @DShreve2 @Ferroin @vkalintiris +CHANGELOG.md @netdatabot @Ferroin diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..bd939ba --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +--- +about: General issue template +labels: "needs triage", "no changelog" +--- + + + +#### Summary + diff --git a/.github/ISSUE_TEMPLATE/BUG_REPORT.yml b/.github/ISSUE_TEMPLATE/BUG_REPORT.yml new file mode 100644 index 0000000..d82d463 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.yml @@ -0,0 +1,102 @@ +name: "Netdata Agent: Bug report" +description: "Submit a report and help us improve our free and open-source Netdata Agent" +title: "[Bug]: " +labels: ["bug", "needs triage"] +body: + - type: markdown + attributes: + value: "### Thank you for contributing to our project!" + - type: markdown + attributes: + value: | + + - type: markdown + attributes: + value: | + Before submitting, we'd appreciate it if you: + - Verify that your issue is not already reported on GitHub. + - Check if your Netdata Agent is up to date. If not, we recommend that you [update](https://learn.netdata.cloud/docs/agent/packaging/installer/update) first. + - type: textarea + id: bug-description + attributes: + label: Bug description + description: Provide a description of the bug you're experiencing. + validations: + required: true + - type: textarea + id: expected-behavior + attributes: + label: Expected behavior + description: Describe what you expected to happen. + validations: + required: true + - type: textarea + id: reproduce + attributes: + label: Steps to reproduce + description: Describe the steps to reproduce the bug. + value: | + 1. + 2. + 3. + ... + validations: + required: true + - type: dropdown + id: install-method + attributes: + label: Installation method + description: | + Select [installation method](https://learn.netdata.cloud/docs/agent/packaging/installer#alternative-methods) you used. + Describe the method in the "Additional info" section if you chose "other". + options: + - "kickstart.sh" + - "kickstart-static64.sh" + - "native binary packages (.deb/.rpm)" + - "from git" + - "from source" + - "docker" + - "helmchart (kubernetes)" + - "other" + validations: + required: true + - type: textarea + id: system-info + attributes: + label: System info + description: | + Provide information about your system. To get this information, execute one of the following commands based on your OS: + ```shell + # Linux + uname -a; grep -HvE "^#|URL" /etc/*release + # BSD + uname -a; uname -K + # macOS + uname -a; sw_vers + ``` + > NOTE: This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true + - type: textarea + id: netdata-buildfinfo + attributes: + label: Netdata build info + description: | + Provide Netdata Agent version and build info. To get this information, execute: + ```shell + netdata -W buildinfo + # If get "netdata: command not found", try (required running Netdata) + $(ps aux | grep -m1 -E -o "[a-zA-Z/]+netdata ") -W buildinfo + ``` + > NOTE: This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true + - type: textarea + id: additional-info + attributes: + label: Additional info + description: Any additional information related to the issue (ex. logs). + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/FEAT_REQUEST.yml b/.github/ISSUE_TEMPLATE/FEAT_REQUEST.yml new file mode 100644 index 0000000..11d77a0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/FEAT_REQUEST.yml @@ -0,0 +1,62 @@ +name: "Netdata Agent: Feature request" +description: "Submit a feature request and help us improve our free and open-source Netdata Agent" +title: "[Feat]: " +labels: ["feature request", "needs triage"] +body: + - type: markdown + attributes: + value: "### Thank you for contributing to our project!" + - type: markdown + attributes: + value: | + Submit a feature request and help us improve our free and open-source Netdata Agent. + - type: textarea + id: problem + attributes: + label: Problem + description: | + Is your feature request intended to solve a problem? If so, provide a description of the problem. + validations: + required: false + - type: textarea + id: description + attributes: + label: Description + description: | + Provide a clear and concise description of the feature you want or need. + validations: + required: true + - type: dropdown + id: importance + attributes: + label: Importance + description: | + Help us to understand the importance of your feature request. Choose "blocker" if lack of this feature stops you from using Netdata Agent. + options: + - "nice to have" + - "really want" + - "must have" + - "blocker" + validations: + required: true + - type: textarea + id: value-proposition + attributes: + label: Value proposition + description: | + Help us to understand why we need this feaure. Describe benefits that users receive if we implement this feature request. + value: | + 1. + 2. + 3. + ... + validations: + required: true + - type: textarea + id: proposed-implementation + attributes: + label: Proposed implementation + description: | + Share your proposal if you have any ideas on how this feature can be implemented. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..79678d7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,12 @@ +# Ref: https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/configuring-issue-templates-for-your-repository#configuring-the-template-chooser +blank_issues_enabled: false +contact_links: + - name: "Netdata Agent: Question" + url: https://github.com/netdata/netdata/discussions/new?category=q-a + about: Ask a question about Netdata Agent + - name: "Netdata Cloud" + url: https://github.com/netdata/netdata-cloud/issues/new/choose + about: Create a report to help us improve our web application + - name: Community + url: https://netdata.cloud/community + about: If you don't know where to start, visit our community page! diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..829d8e6 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,27 @@ +##### Summary + + +##### Test Plan + + + +##### Additional Information + + +
For users: How does this change affect me? + +
diff --git a/.github/codeql/python-config.yml b/.github/codeql/python-config.yml new file mode 100644 index 0000000..c82727c --- /dev/null +++ b/.github/codeql/python-config.yml @@ -0,0 +1,10 @@ +paths-ignore: + - .github + - build_external/ + - ml/dlib + - ml/json + - tests/api + - web/gui + - collectors/python.d.plugin/python_modules/pyyaml* + - collectors/python.d.plugin/python_modules/third_party + - collectors/python.d.plugin/python_modules/urllib3 diff --git a/.github/data/distros.yml b/.github/data/distros.yml new file mode 100644 index 0000000..cc52752 --- /dev/null +++ b/.github/data/distros.yml @@ -0,0 +1,214 @@ +# This defines the full set of distros we run CI on. +--- +platform_map: # map packaging architectures to docker platforms + aarch64: linux/arm64/v8 + amd64: linux/amd64 + arm64: linux/arm64/v8 + armhf: linux/arm/v7 + armhfp: linux/arm/v7 + i386: linux/i386 + x86_64: linux/amd64 +arch_order: # sort order for per-architecture jobs in CI + - amd64 + - x86_64 + - i386 + - armhf + - armhfp + - arm64 + - aarch64 +include: + - &alpine + distro: alpine + version: edge + env_prep: | + apk add -U bash + jsonc_removal: | + apk del json-c-dev + test: + ebpf-core: true + - <<: *alpine + version: "3.17" + - <<: *alpine + version: "3.16" + - <<: *alpine + version: "3.15" + - <<: *alpine + version: "3.14" + + - distro: archlinux + version: latest + env_prep: | + pacman --noconfirm -Syu && pacman --noconfirm -Sy grep libffi + test: + ebpf-core: true + + - &alma + distro: almalinux + version: "9" + base_image: almalinux + jsonc_removal: | + dnf remove -y json-c-devel + packages: &alma_packages + type: rpm + repo_distro: el/9 + arches: + - x86_64 + - aarch64 + test: + ebpf-core: true + - <<: *alma + version: "8" + packages: + <<: *alma_packages + repo_distro: el/8 + + - distro: centos + version: "7" + packages: + type: rpm + repo_distro: el/7 + arches: + - x86_64 + test: + ebpf-core: false + + - &debian + distro: debian + version: "11" + env_prep: | + apt-get update + jsonc_removal: | + apt-get purge -y libjson-c-dev + packages: &debian_packages + type: deb + repo_distro: debian/bullseye + arches: + - i386 + - amd64 + - armhf + - arm64 + test: + ebpf-core: true + - <<: *debian + version: "10" + packages: + <<: *debian_packages + repo_distro: debian/buster + test: + ebpf-core: false + + - &fedora + distro: fedora + version: "37" + jsonc_removal: | + dnf remove -y json-c-devel + packages: &fedora_packages + type: rpm + repo_distro: fedora/37 + arches: + - x86_64 + - aarch64 + test: + ebpf-core: true + - <<: *fedora + version: "36" + packages: + <<: *fedora_packages + repo_distro: fedora/36 + arches: + - x86_64 + - armhfp + - aarch64 + test: + ebpf-core: true + - <<: *fedora + version: "35" + packages: + <<: *fedora_packages + repo_distro: fedora/35 + arches: + - x86_64 + - armhfp + - aarch64 + test: + ebpf-core: true + + - &opensuse + distro: opensuse + version: "15.4" + base_image: opensuse/leap + jsonc_removal: | + zypper rm -y libjson-c-devel + packages: &opensuse_packages + type: rpm + repo_distro: opensuse/15.4 + arches: + - x86_64 + - aarch64 + test: + ebpf-core: true + - <<: *opensuse + version: "15.3" + packages: + <<: *opensuse_packages + repo_distro: opensuse/15.3 + test: + ebpf-core: false + + - &oracle + distro: oraclelinux + version: "8" + jsonc_removal: | + dnf remove -y json-c-devel + packages: &oracle_packages + type: rpm + repo_distro: ol/8 + arches: + - x86_64 + - aarch64 + test: + ebpf-core: true + - <<: *oracle + version: "9" + packages: + <<: *oracle_packages + repo_distro: ol/9 + + - &ubuntu + distro: ubuntu + version: "22.10" + env_prep: | + rm -f /etc/apt/apt.conf.d/docker && apt-get update + jsonc_removal: | + apt-get remove -y libjson-c-dev + packages: &ubuntu_packages + type: deb + repo_distro: ubuntu/kinetic + arches: + - amd64 + - armhf + - arm64 + test: + ebpf-core: true + - <<: *ubuntu + version: "22.04" + packages: + <<: *ubuntu_packages + repo_distro: ubuntu/jammy + - <<: *ubuntu + version: "20.04" + packages: + <<: *ubuntu_packages + repo_distro: ubuntu/focal + - <<: *ubuntu + version: "18.04" + packages: + <<: *ubuntu_packages + repo_distro: ubuntu/bionic + arches: + - i386 + - amd64 + - armhf + - arm64 + test: + ebpf-core: false diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..b02b155 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,9 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + labels: + - "no changelog" + - "area/ci" diff --git a/.github/dockerfiles/Dockerfile.build_test b/.github/dockerfiles/Dockerfile.build_test new file mode 100644 index 0000000..c275d61 --- /dev/null +++ b/.github/dockerfiles/Dockerfile.build_test @@ -0,0 +1,18 @@ +ARG BASE + +FROM ${BASE} + +ARG PRE +ENV PRE=${PRE} +ARG RMJSONC +ENV RMJSONC=${RMJSONC} +ENV DISABLE_TELEMETRY=1 +ENV GITHUB_ACTIONS=true + +RUN echo "${PRE}" > /prep-cmd.sh && \ + echo "${RMJSONC}" > /rmjsonc.sh && chmod +x /rmjsonc.sh && \ + /bin/sh /prep-cmd.sh + +COPY . /netdata + +RUN /netdata/packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata diff --git a/.github/dockerfiles/Dockerfile.clang b/.github/dockerfiles/Dockerfile.clang new file mode 100644 index 0000000..62bb019 --- /dev/null +++ b/.github/dockerfiles/Dockerfile.clang @@ -0,0 +1,18 @@ +FROM debian:buster AS build + +# Disable apt/dpkg interactive mode +ENV DEBIAN_FRONTEND=noninteractive + +# Install all build dependencies +COPY packaging/installer/install-required-packages.sh /tmp/install-required-packages.sh +RUN /tmp/install-required-packages.sh --dont-wait --non-interactive netdata-all + +# Install Clang and set as default CC +RUN apt-get install -y clang && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100 + +WORKDIR /netdata +COPY . . + +# Build Netdata +RUN ./netdata-installer.sh --dont-wait --dont-start-it --disable-go --require-cloud diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000..c723250 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,190 @@ +# This configures label matching for PR's. +# +# The keys are labels, and the values are lists of minimatch patterns +# to which those labels apply. +# +# NOTE: This can only add labels, not remove them. +# NOTE: Due to YAML syntax limitations, patterns or labels which start +# with a character that is part of the standard YAML syntax must be +# quoted. +# +# Please keep the labels sorted and deduplicated. + +area/ACLK: + - aclk/* + - aclk/**/* + - database/sqlite/sqlite_aclk* + - mqtt_websockets + +area/claim: + - claim/* + +area/exporting: + - exporting/* + - exporting/**/* + +area/build: + - build/* + - build/**/* + - build_external/* + - build_external/**/* + - CMakeLists.txt + - configure.ac + - Makefile.am + - "**/Makefile.am" + +area/ci: + - .travis/* + - .travis/**/* + - .github/* + - .github/**/* + +area/daemon: + - daemon/* + - daemon/**/* + +area/database: + - database/* + - database/**/* + +area/docs: + - "*.md" + - "**/*.md" + - "**/*.mdx" + - diagrams/* + - diagrams/**/* + +# -----------------collectors---------------------- + +area/collectors: + - collectors/* + - collectors/**/* + +collectors/plugins.d: + - collectors/plugins.d/* + - collectors/plugins.d/**/* + +collectors/apps: + - collectors/apps.plugin/* + - collectors/apps.plugin/**/* + +collectors/cgroups: + - collectors/cgroups.plugin/* + - collectors/cgroups.plugin/**/* + +collectors/charts.d: + - collectors/charts.d.plugin/* + - collectors/charts.d.plugin/**/* + +collectors/cups: + - collectors/cups.plugin/* + - collectors/cups.plugin/**/* + +collectors/diskspace: + - collectors/diskspace.plugin/* + - collectors/diskspace.plugin/**/* + +collectors/ebpf: + - collectors/ebpf.plugin/* + - collectors/ebpf.plugin/**/* + +collectors/fping: + - collectors/fping.plugin/* + - collectors/fping.plugin/**/* + +collectors/freebsd: + - collectors/freebsd.plugin/* + - collectors/freebsd.plugin/**/* + +collectors/freeipmi: + - collectors/freeipmi.plugin/* + - collectors/freeipmi.plugin/**/* + +collectors/idlejitter: + - collectors/idlejitter.plugin/* + - collectors/idlejitter.plugin/**/* + +collectors/ioping: + - collectors/ioping.plugin/* + - collectors/ioping.plugin/**/* + +collectors/macos: + - collectors/macos.plugin/* + - collectors/macos.plugin/**/* + +collectors/nfacct: + - collectors/nfacct.plugin/* + - collectors/nfacct.plugin/**/* + +collectors/perf: + - collectors/perf.plugin/* + - collectors/perf.plugin/**/* + +collectors/proc: + - collectors/proc.plugin/* + - collectors/proc.plugin/**/* + +collectors/python.d: + - collectors/python.d.plugin/* + - collectors/python.d.plugin/**/* + +collectors/slabinfo: + - collectors/slabinfo.plugin/* + - collectors/slabinfo.plugin/**/* + +collectors/statsd: + - collectors/statsd.plugin/* + - collectors/statsd.plugin/**/* + +collectors/tc: + - collectors/tc.plugin/* + - collectors/tc.plugin/**/* + +collectors/timex: + - collectors/timex.plugin/* + - collectors/timex.plugin/**/* + +collectors/xenstat: + - collectors/xenstat.plugin/* + - collectors/xenstat.plugin/**/* + +# ----------------/collectors---------------------- + +area/health: + - health/* + - health/**/* + +area/ml: + - ml/* + - ml/**/* + +area/packaging: + - contrib/* + - contrib/**/* + - packaging/* + - packaging/**/* + - system/* + - system/**/* + - Dockerfile* + - netdata-installer.sh + - netdata.spec.in + +area/registry: + - registry/* + - registry/**/* + +area/streaming: + - streaming/* + - streaming/**/* + +area/tests: + - tests/* + - tests/**/* + - daemon/unit_test* + - coverity-scan.sh + - cppcheck.sh + - netdata.cppcheck + +area/web: + - web/* + - web/**/* diff --git a/.github/scripts/build-artifacts.sh b/.github/scripts/build-artifacts.sh new file mode 100755 index 0000000..569c79a --- /dev/null +++ b/.github/scripts/build-artifacts.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# +# Builds the netdata-vX.y.Z-xxxx.tar.gz source tarball (dist) +# and netdata-vX.Y.Z-xxxx.gz.run (static x86_64) artifacts. + +set -e + +# shellcheck source=.github/scripts/functions.sh +. "$(dirname "$0")/functions.sh" + +NAME="${NAME:-netdata}" +VERSION="${VERSION:-"$(git describe)"}" +BASENAME="$NAME-$VERSION" + +prepare_build() { + progress "Preparing build" + ( + test -d artifacts || mkdir -p artifacts + echo "${VERSION}" > packaging/version + ) >&2 +} + +build_dist() { + progress "Building dist" + ( + command -v git > /dev/null && [ -d .git ] && git clean -d -f + autoreconf -ivf + ./configure \ + --prefix=/usr \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --libexecdir=/usr/libexec \ + --with-zlib \ + --with-math \ + --with-user=netdata \ + --disable-dependency-tracking \ + CFLAGS=-O2 + make dist + mv "${BASENAME}.tar.gz" artifacts/ + ) >&2 +} + +build_static_x86_64() { + progress "Building static x86_64" + ( + command -v git > /dev/null && [ -d .git ] && git clean -d -f + USER="" ./packaging/makeself/build-x86_64-static.sh + ) >&2 +} + +prepare_assets() { + progress "Preparing assets" + ( + cp packaging/version artifacts/latest-version.txt + + cd artifacts || exit 1 + ln -f "${BASENAME}.tar.gz" netdata-latest.tar.gz + ln -f "${BASENAME}.gz.run" netdata-latest.gz.run + sha256sum -b ./* > "sha256sums.txt" + ) >&2 +} + +steps="prepare_build build_dist build_static_x86_64" +steps="$steps prepare_assets" + +_main() { + for step in $steps; do + if ! run "$step"; then + if [ -t 1 ]; then + debug + else + fail "Build failed" + fi + fi + done + + echo "🎉 All Done!" +} + +if [ -n "$0" ] && [ x"$0" != x"-bash" ]; then + _main "$@" +fi diff --git a/.github/scripts/build-dist.sh b/.github/scripts/build-dist.sh new file mode 100755 index 0000000..027b621 --- /dev/null +++ b/.github/scripts/build-dist.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# +# Builds the netdata-vX.y.Z-xxxx.tar.gz source tarball (dist) + +set -e + +# shellcheck source=.github/scripts/functions.sh +. "$(dirname "$0")/functions.sh" + +NAME="${NAME:-netdata}" +VERSION="${VERSION:-"$(git describe --always)"}" +BASENAME="$NAME-$VERSION" + +prepare_build() { + progress "Preparing build" + ( + test -d artifacts || mkdir -p artifacts + echo "${VERSION}" > packaging/version + ) >&2 +} + +build_dist() { + progress "Building dist" + ( + command -v git > /dev/null && [ -d .git ] && git clean -d -f + autoreconf -ivf + ./configure \ + --prefix=/usr \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --libexecdir=/usr/libexec \ + --with-zlib \ + --with-math \ + --with-user=netdata \ + --disable-dependency-tracking \ + CFLAGS=-O2 + make dist + mv "${BASENAME}.tar.gz" artifacts/ + ) >&2 +} + +prepare_assets() { + progress "Preparing assets" + ( + cp packaging/version artifacts/latest-version.txt + cd artifacts || exit 1 + ln -f "${BASENAME}.tar.gz" netdata-latest.tar.gz + ln -f "${BASENAME}.gz.run" netdata-latest.gz.run + sha256sum -b ./* > "sha256sums.txt" + ) >&2 +} + +steps="prepare_build build_dist prepare_assets" + +_main() { + for step in $steps; do + if ! run "$step"; then + if [ -t 1 ]; then + debug + else + fail "Build failed" + fi + fi + done + + echo "🎉 All Done!" +} + +if [ -n "$0" ] && [ x"$0" != x"-bash" ]; then + _main "$@" +fi diff --git a/.github/scripts/build-static.sh b/.github/scripts/build-static.sh new file mode 100755 index 0000000..e810514 --- /dev/null +++ b/.github/scripts/build-static.sh @@ -0,0 +1,61 @@ +#!/bin/sh +# +# Builds the netdata-vX.Y.Z-xxxx.gz.run (static x86_64) artifact. + +set -e + +# shellcheck source=.github/scripts/functions.sh +. "$(dirname "$0")/functions.sh" + +BUILDARCH="${1}" +NAME="${NAME:-netdata}" +VERSION="${VERSION:-"$(git describe)"}" +BASENAME="$NAME-$BUILDARCH-$VERSION" + +prepare_build() { + progress "Preparing build" + ( + test -d artifacts || mkdir -p artifacts + ) >&2 +} + +build_static() { + progress "Building static ${BUILDARCH}" + ( + USER="" ./packaging/makeself/build-static.sh "${BUILDARCH}" + ) >&2 +} + +prepare_assets() { + progress "Preparing assets" + ( + cp packaging/version artifacts/latest-version.txt + + cd artifacts || exit 1 + ln -s "${BASENAME}.gz.run" "netdata-${BUILDARCH}-latest.gz.run" + if [ "${BUILDARCH}" = "x86_64" ]; then + ln -s "${BASENAME}.gz.run" netdata-latest.gz.run + fi + ) >&2 +} + +steps="prepare_build build_static" +steps="$steps prepare_assets" + +_main() { + for step in $steps; do + if ! run "$step"; then + if [ -t 1 ]; then + debug + else + fail "Build failed" + fi + fi + done + + echo "🎉 All Done!" +} + +if [ -n "$0" ] && [ x"$0" != x"-bash" ]; then + _main "$@" +fi diff --git a/.github/scripts/bump-packaging-version.sh b/.github/scripts/bump-packaging-version.sh new file mode 100755 index 0000000..bffcb0c --- /dev/null +++ b/.github/scripts/bump-packaging-version.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +VERSION="$(git describe)" +echo "$VERSION" > packaging/version +git add -A +git ci -m "[netdata nightly] $VERSION" diff --git a/.github/scripts/check-updater.sh b/.github/scripts/check-updater.sh new file mode 100755 index 0000000..3df0c9d --- /dev/null +++ b/.github/scripts/check-updater.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# +set -e +# shellcheck source=.github/scripts/functions.sh +. "$(dirname "$0")/functions.sh" + +check_successful_update() { + progress "Check netdata version after update" + ( + netdata_version=$(netdata -v | awk '{print $2}') + updater_version=$(cat packaging/version) + if [ "$netdata_version" = "$updater_version" ]; then + echo "Update successful!" + else + exit 1 + fi + ) >&2 +} + +check_autoupdate_enabled() { + progress "Check autoupdate still enabled after update" + ( + if [ -f /etc/periodic/daily/netdata-updater ] || [ -f /etc/cron.daily/netdata-updater ]; then + echo "Update successful!" + else + exit 1 + fi + ) >&2 +} + +steps="check_successful_update check_autoupdate_enabled" + +_main() { + for step in $steps; do + if ! run "$step"; then + if [ -t 1 ]; then + debug + else + fail "Build failed" + fi + fi + done + + echo "🎉 All Done!" +} + +if [ -n "$0" ] && [ x"$0" != x"-bash" ]; then + _main "$@" +fi diff --git a/.github/scripts/ci-support-pkgs.sh b/.github/scripts/ci-support-pkgs.sh new file mode 100755 index 0000000..bfa9c83 --- /dev/null +++ b/.github/scripts/ci-support-pkgs.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +# This script installs supporting packages needed for CI, which provide following: +# cron, pidof + +set -e + +if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/fedora-release ] || [ -f /etc/almalinux-release ]; then + # Alma, Fedora, CentOS, Redhat + dnf install -y procps-ng cronie cronie-anacron || yum install -y procps-ng cronie cronie-anacron +elif [ -f /etc/arch-release ]; then + # Arch + pacman -S --noconfirm cronie +fi diff --git a/.github/scripts/docker-test.sh b/.github/scripts/docker-test.sh new file mode 100755 index 0000000..22821d1 --- /dev/null +++ b/.github/scripts/docker-test.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +export DEBIAN_FRONTEND=noninteractive + +wait_for() { + host="${1}" + port="${2}" + name="${3}" + timeout="30" + + if command -v nc > /dev/null ; then + netcat="nc" + elif command -v netcat > /dev/null ; then + netcat="netcat" + else + printf "Unable to find a usable netcat command.\n" + return 1 + fi + + printf "Waiting for %s on %s:%s ... " "${name}" "${host}" "${port}" + + sleep 30 + + i=0 + while ! ${netcat} -z "${host}" "${port}"; do + sleep 1 + if [ "$i" -gt "$timeout" ]; then + printf "Timed out!\n" + return 1 + fi + i="$((i + 1))" + done + printf "OK\n" +} + +if [ -z "$(command -v nc 2>/dev/null)" ] && [ -z "$(command -v netcat 2>/dev/null)" ]; then + sudo apt-get update && sudo apt-get upgrade -y && sudo apt-get install -y netcat +fi + +docker run -d --name=netdata \ + -p 19999:19999 \ + -v netdataconfig:/etc/netdata \ + -v netdatalib:/var/lib/netdata \ + -v netdatacache:/var/cache/netdata \ + -v /etc/passwd:/host/etc/passwd:ro \ + -v /etc/group:/host/etc/group:ro \ + -v /proc:/host/proc:ro \ + -v /sys:/host/sys:ro \ + -v /etc/os-release:/host/etc/os-release:ro \ + --cap-add SYS_PTRACE \ + --security-opt apparmor=unconfined \ + netdata/netdata:test + +wait_for localhost 19999 netdata || exit 1 + +curl -sS http://127.0.0.1:19999/api/v1/info > ./response || exit 1 + +cat ./response + +jq '.version' ./response || exit 1 diff --git a/.github/scripts/functions.sh b/.github/scripts/functions.sh new file mode 100644 index 0000000..7cd2e08 --- /dev/null +++ b/.github/scripts/functions.sh @@ -0,0 +1,69 @@ +#!/bin/sh + +# This file is included by download.sh & build.sh + +set -e + +color() { + fg="$1" + bg="${2}" + ft="${3:-0}" + + printf "\33[%s;%s;%s" "$ft" "$fg" "$bg" +} + +color_reset() { + printf "\033[0m" +} + +ok() { + if [ -t 1 ]; then + printf "%s[ OK ]%s\n" "$(color 37 42m 1)" "$(color_reset)" + else + printf "%s\n" "[ OK ]" + fi +} + +err() { + if [ -t 1 ]; then + printf "%s[ ERR ]%s\n" "$(color 37 41m 1)" "$(color_reset)" + else + printf "%s\n" "[ ERR ]" + fi +} + +run() { + retval=0 + logfile="$(mktemp -t "run-XXXXXX")" + if "$@" 2> "$logfile"; then + ok + else + retval=$? + err + tail -n 100 "$logfile" || true + fi + rm -rf "$logfile" + return $retval +} + +progress() { + printf "%-40s" "$(printf "%s ... " "$1")" +} + +log() { + printf "%s\n" "$1" +} + +error() { + log "ERROR: ${1}" +} + +fail() { + log "FATAL: ${1}" + exit 1 +} + +debug() { + log "Dropping into a shell for debugging ..." + exec /bin/sh +} diff --git a/.github/scripts/gen-docker-tags.py b/.github/scripts/gen-docker-tags.py new file mode 100755 index 0000000..df4dc02 --- /dev/null +++ b/.github/scripts/gen-docker-tags.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +import sys + +version = sys.argv[1].split('.') +suffix = sys.argv[2] + +REPO = f'netdata/netdata{suffix}' + +MAJOR = ':'.join([REPO, version[0]]) +MINOR = ':'.join([REPO, '.'.join(version[0:2])]) +PATCH = ':'.join([REPO, '.'.join(version[0:3])]) + +print(','.join([MAJOR, MINOR, PATCH])) diff --git a/.github/scripts/get-static-cache-key.sh b/.github/scripts/get-static-cache-key.sh new file mode 100755 index 0000000..d9fa285 --- /dev/null +++ b/.github/scripts/get-static-cache-key.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +arch="${1}" +platform="$(packaging/makeself/uname2platform.sh "${arch}")" + +docker pull --platform "${platform}" netdata/static-builder + +# shellcheck disable=SC2046 +cat $(find packaging/makeself/jobs -type f ! -regex '.*\(netdata\|-makeself\).*') > /tmp/static-cache-key-data + +docker run -it --rm --platform "${platform}" netdata/static-builder sh -c 'apk list -I 2>/dev/null' >> /tmp/static-cache-key-data + +h="$(sha256sum /tmp/static-cache-key-data | cut -f 1 -d ' ')" + +echo "::set-output name=key::static-${arch}-${h}" diff --git a/.github/scripts/netdata-pkgcloud-cleanup.py b/.github/scripts/netdata-pkgcloud-cleanup.py new file mode 100755 index 0000000..f6311e4 --- /dev/null +++ b/.github/scripts/netdata-pkgcloud-cleanup.py @@ -0,0 +1,190 @@ +#!/bin/env python3 + +import requests +from requests.auth import HTTPBasicAuth +from datetime import date, datetime, timedelta +import os +import sys +import argparse +from pprint import pprint +from datetime import datetime +from dateutil import parser + + +class PackageCloud: + NUM_PACKAGE_MINOR_TO_KEEP = 5 + NUM_RETENTION_DAYS = 30 + # number of pages to process. Use '0' to process all + MAX_PAGES = 0 + + def __init__(self, repo_type, dry_run=True, auth_token=None): + self.headers = { + "Accept" : "application/json", + "Content-Type" : "application/json", + } + self.dry_run = dry_run + self.repo_type = repo_type + if repo_type == "stable": + repo = "netdata/netdata" + elif repo_type == "devel": + repo = "netdata/netdata-devel" + elif repo_type == "edge": + repo = "netdata/netdata-edge" + else: + print(f"ERROR: unknown repo type '{repo_type}'!\nAccepted values are: stable,devel,edge") + sys.exit(1) + self.base_url = f"https://packagecloud.io/api/v1/repos/{repo}" + self.auth = HTTPBasicAuth(username=auth_token, password='') if auth_token else None + + def get_all_packages(self): + page = 1 + all_pkg_list = [] + while True: + url = f"{self.base_url}/packages.json?page={page}" + if page > self.MAX_PAGES and self.MAX_PAGES != 0: + break + else: + pkg_list = requests.get(url, auth=self.auth, headers=self.headers).json() + if len(pkg_list) == 0: + break + else: + print(f"Processing page: {page}") + for element in pkg_list: + self.is_pkg_older_than_days(element, 30) + if element['name'] != 'netdata-repo' and element['name'] != 'netdata-repo-edge': + all_pkg_list.append(element) + page += 1 + return all_pkg_list + + def delete_package(self, destroy_url): + if self.dry_run: + print(f" - DRY_RUN mode. Not deleting package '{destroy_url}'.") + else: + print(f" - Deleting package: {destroy_url}") + url = f"https://packagecloud.io{destroy_url}" + response = requests.delete(url, auth=self.auth, headers=self.headers).json() + response = None + if not response: + print(f" Package deleted successfully.") + else: + print(f" Failed deleting package!") + + def get_destroy_url(self, pkg_url): + url = f"https://packagecloud.io{pkg_url}" + response = requests.get(url, auth=self.auth, headers=self.headers) + response.raise_for_status() + return response.json()['destroy_url'] + + def get_packages_for_distro(self, distro, all_pkg_list): + distro_pkg_list = [ pkg for pkg in all_pkg_list if pkg['distro_version'] == distro ] + return distro_pkg_list + + def get_packages_for_arch(self, arch, all_pkg_list): + arch_pkg_list = [ pkg for pkg in all_pkg_list if pkg['package_url'].split('/')[11] == arch ] + return arch_pkg_list + + def get_arches(self, pkg_list): + arches = list(set([pkg['package_url'].split('/')[11] for pkg in pkg_list ])) + return arches + + def get_pkg_list(self, pkg_name, pkg_list): + filtered_list = [ pkg for pkg in pkg_list if pkg['name'] == pkg_name ] + return filtered_list + + def get_minor_versions(self, all_versions): + minor_versions = ['.'.join(version.split('.')[:-1]) for version in all_versions ] + minor_versions = list(set(minor_versions)) + minor_versions.sort() + return minor_versions + + def is_pkg_older_than_days(self, pkg, num_days): + pkg_create_date = datetime.strptime(pkg['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ') + time_difference = datetime.now() - pkg_create_date + return time_difference.days > num_days + + def cleanup_repo(self): + if self.repo_type == 'stable': + self.cleanup_stable_repo() + else: + self.cleanup_edge_repo() + + def cleanup_edge_repo(self): + all_pkg_list = self.get_all_packages() + pkgs_to_delete = [] + pkgs_to_keep = [] + for package in all_pkg_list: + if self.is_pkg_older_than_days(package, self.NUM_RETENTION_DAYS): + pkgs_to_delete.append(package) + else: + pkgs_to_keep.append(package) + print(f"Keeping the following packages (newer than {self.NUM_RETENTION_DAYS} days):") + for pkg in pkgs_to_keep: + print(f" > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}") + print(f"Deleting the following packages (older than {self.NUM_RETENTION_DAYS} days):") + for pkg in pkgs_to_delete: + print(f" > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}") + self.delete_package(pkg['destroy_url']) + + def cleanup_stable_repo(self): + all_pkg_list = self.get_all_packages() + all_distros = list(set([ pkg['distro_version'] for pkg in all_pkg_list ])) + all_distros = sorted(all_distros) + print(f"<> Distributions list: {all_distros}") + + for distro in all_distros: + print(f">> Processing distro: {distro}") + pkg_list_distro = self.get_packages_for_distro(distro, all_pkg_list) + arches = self.get_arches(pkg_list_distro) + print(f" <> Arch list: {arches}") + for arch in arches: + print(f" >> Processing arch: {distro} -> {arch}") + pkg_list_arch = self.get_packages_for_arch(arch, pkg_list_distro) + pkg_names = [pkg['name'] for pkg in pkg_list_arch] + pkg_names = list(set(pkg_names)) + print(f" <> Package names: {pkg_names}") + for pkg_name in pkg_names: + print(f" >> Processing package: {distro} -> {arch} -> {pkg_name}") + pkg_list = self.get_pkg_list(pkg_name, pkg_list_arch) + pkg_versions = [pkg['version'] for pkg in pkg_list] + pkg_minor_versions = self.get_minor_versions(pkg_versions) + pkg_minor_to_keep = pkg_minor_versions[-self.NUM_PACKAGE_MINOR_TO_KEEP:] + print(f" <> Minor Package Versions to Keep: {pkg_minor_to_keep}") + pkg_minor_to_delete = list(set(pkg_minor_versions) - set(pkg_minor_to_keep)) + print(f" <> Minor Package Versions to Delete: {pkg_minor_to_delete}") + urls_to_keep = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_keep] + urls_to_delete = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_delete] + for pkg_url in urls_to_delete: + destroy_url = self.get_destroy_url(pkg_url) + self.delete_package(destroy_url) + + +def configure(): + parser = argparse.ArgumentParser() + parser.add_argument('--repo-type', '-r', required=True, + help='Repository type against to perform cleanup') + parser.add_argument('--dry-run', '-d', action='store_true', + help='Dry-run Mode') + args = parser.parse_args() + try: + token = os.environ['PKGCLOUD_TOKEN'] + except Exception as e: + print(f"FATAL: 'PKGCLOUD_TOKEN' environment variable is not set!", file=sys.stderr) + sys.exit(1) + repo_type = args.repo_type + dry_run = args.dry_run + conf = { + 'repo_type': args.repo_type, + 'dry_run': args.dry_run, + 'token': token + } + return conf + + +def main(): + config = configure() + pkg_cloud = PackageCloud(config['repo_type'], config['dry_run'], config['token']) + pkg_cloud.cleanup_repo() + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/package-upload.sh b/.github/scripts/package-upload.sh new file mode 100755 index 0000000..fd8a8cd --- /dev/null +++ b/.github/scripts/package-upload.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +set -e + +host="packages.netdata.cloud" +user="netdatabot" + +distro="${1}" +arch="${2}" +format="${3}" +repo="${4}" + +staging="${TMPDIR:-/tmp}/package-staging" +prefix="/home/netdatabot/incoming/${repo}/" + +packages="$(find artifacts -name "*.${format}")" + +mkdir -p "${staging}" + +case "${format}" in + deb) + src="${staging}/$(echo "${distro}" | cut -f 1 -d '/')/pool/" + mkdir -p "${src}" + + for pkg in ${packages}; do + cp "${pkg}" "${src}" + done + ;; + rpm) + src="${staging}/${distro}/${arch}/" + mkdir -p "${src}" + + for pkg in ${packages}; do + cp "${pkg}" "${src}" + done + ;; + *) + echo "Unrecognized package format ${format}." + exit 1 + ;; +esac + +rsync -vrptO "${staging}/" "${user}@${host}:${prefix}" diff --git a/.github/scripts/package_cloud_wrapper.sh b/.github/scripts/package_cloud_wrapper.sh new file mode 100755 index 0000000..7640ef4 --- /dev/null +++ b/.github/scripts/package_cloud_wrapper.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# This is a tool to help removal of packages from packagecloud.io +# It utilizes the package_cloud utility provided from packagecloud.io +# +# Depends on: +# 1) package cloud gem (detects absence and installs it) +# +# Requires: +# 1) PKG_CLOUD_TOKEN variable exported +# 2) To properly install package_cloud when not found, it requires: ruby gcc gcc-c++ ruby-devel +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pavlos Emm. Katsoulakis (paul@netdata.cloud) +#shellcheck disable=SC2068,SC2145 +set -e +PKG_CLOUD_CONFIG="$HOME/.package_cloud_configuration.cfg" + +# If we are not in netdata git repo, at the top level directory, fail +TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") +CWD=$(git rev-parse --show-cdup) +if [ -n "$CWD" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then + echo "Run as .github/scripts/$(basename "$0") from top level directory of netdata git repository" + echo "Docker build process aborted" + exit 1 +fi + +# Install dependency if not there +if ! command -v package_cloud > /dev/null 2>&1; then + echo "No package cloud gem found, installing" + sudo gem install -V package_cloud || (echo "Package cloud installation failed. you might want to check if required dependencies are there (ruby gcc gcc-c++ ruby-devel)" && exit 1) +else + echo "Found package_cloud gem, continuing" +fi + +# Check for required token and prepare config +if [ -z "${PKG_CLOUD_TOKEN}" ]; then + echo "Please set PKG_CLOUD_TOKEN to be able to use ${0}" + exit 1 +fi +echo "{\"url\":\"https://packagecloud.io\",\"token\":\"${PKG_CLOUD_TOKEN}\"}" > "${PKG_CLOUD_CONFIG}" + +echo "Executing package_cloud with config ${PKG_CLOUD_CONFIG} and parameters $@" +package_cloud $@ --config="${PKG_CLOUD_CONFIG}" + +rm -rf "${PKG_CLOUD_CONFIG}" +echo "Done!" diff --git a/.github/scripts/pkg-test.sh b/.github/scripts/pkg-test.sh new file mode 100755 index 0000000..e3bc3e7 --- /dev/null +++ b/.github/scripts/pkg-test.sh @@ -0,0 +1,138 @@ +#!/bin/sh + +install_debian_like() { + # This is needed to ensure package installs don't prompt for any user input. + export DEBIAN_FRONTEND=noninteractive + + if apt-cache show netcat 2>&1 | grep -q "No packages found"; then + netcat="netcat-traditional" + else + netcat="netcat" + fi + + apt-get update + + # Install Netdata + apt-get install -y /netdata/artifacts/netdata_"${VERSION}"*_*.deb || exit 1 + + # Install testing tools + apt-get install -y --no-install-recommends curl "${netcat}" jq || exit 1 +} + +install_fedora_like() { + # Using a glob pattern here because I can't reliably determine what the + # resulting package name will be (TODO: There must be a better way!) + + PKGMGR="$( (command -v dnf > /dev/null && echo "dnf") || echo "yum")" + + pkg_version="$(echo "${VERSION}" | tr - .)" + + # Install Netdata + "$PKGMGR" install -y /netdata/artifacts/netdata-"${pkg_version}"-*.rpm + + # Install testing tools + "$PKGMGR" install -y curl nc jq || exit 1 +} + +install_centos() { + # Using a glob pattern here because I can't reliably determine what the + # resulting package name will be (TODO: There must be a better way!) + + PKGMGR="$( (command -v dnf > /dev/null && echo "dnf") || echo "yum")" + + pkg_version="$(echo "${VERSION}" | tr - .)" + + if [ "${PKGMGR}" = "dnf" ]; then + opts="--allowerasing" + fi + + # Install EPEL (needed for `jq` + "$PKGMGR" install -y epel-release || exit 1 + + # Install Netdata + "$PKGMGR" install -y /netdata/artifacts/netdata-"${pkg_version}"-*.rpm + + # Install testing tools + "$PKGMGR" install -y ${opts} curl nc jq || exit 1 +} + +install_suse_like() { + # Using a glob pattern here because I can't reliably determine what the + # resulting package name will be (TODO: There must be a better way!) + + pkg_version="$(echo "${VERSION}" | tr - .)" + + # Install Netdata + zypper install -y --allow-unsigned-rpm /netdata/artifacts/netdata-"${pkg_version}"-*.rpm + + # Install testing tools + zypper install -y --no-recommends curl netcat-openbsd jq || exit 1 +} + +dump_log() { + cat ./netdata.log +} + +wait_for() { + host="${1}" + port="${2}" + name="${3}" + timeout="30" + + if command -v nc > /dev/null ; then + netcat="nc" + elif command -v netcat > /dev/null ; then + netcat="netcat" + else + printf "Unable to find a usable netcat command.\n" + return 1 + fi + + printf "Waiting for %s on %s:%s ... " "${name}" "${host}" "${port}" + + sleep 30 + + i=0 + while ! ${netcat} -z "${host}" "${port}"; do + sleep 1 + if [ "$i" -gt "$timeout" ]; then + printf "Timed out!\n" + return 1 + fi + i="$((i + 1))" + done + printf "OK\n" +} + +case "${DISTRO}" in + debian | ubuntu) + install_debian_like + ;; + fedora | oraclelinux) + install_fedora_like + ;; + centos | rockylinux | almalinux) + install_centos + ;; + opensuse) + install_suse_like + ;; + *) + printf "ERROR: unsupported distro: %s_%s\n" "${DISTRO}" "${DISTRO_VERSION}" + exit 1 + ;; +esac + +trap dump_log EXIT + +/usr/sbin/netdata -D > ./netdata.log 2>&1 & + +wait_for localhost 19999 netdata || exit 1 + +curl -sS http://127.0.0.1:19999/api/v1/info > ./response || exit 1 + +cat ./response + +jq '.version' ./response || exit 1 + +trap - EXIT diff --git a/.github/scripts/prepare-release-base.sh b/.github/scripts/prepare-release-base.sh new file mode 100755 index 0000000..7c24f6b --- /dev/null +++ b/.github/scripts/prepare-release-base.sh @@ -0,0 +1,176 @@ +#!/bin/sh + +set -e + +REPO="${1}" +EVENT_NAME="${2}" +EVENT_TYPE="${3}" +EVENT_VERSION="${4}" +RELEASE_TEST="${5}" + +############################################################## +# Version validation functions + +check_version_format() { + if ! echo "${EVENT_VERSION}" | grep -qE '^v[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+$'; then + echo "::error::The supplied version (${EVENT_VERSION}) is not a valid version string." + return 1 + fi +} + +patch_is_zero() { + if ! echo "${EVENT_VERSION}" | grep -qE '^v[[:digit:]]+\.[[:digit:]]+\.0$'; then + echo "::error::The patch number for a ${EVENT_TYPE} build must be 0." + return 1 + fi +} + +minor_is_zero() { + if ! echo "${EVENT_VERSION}" | grep -qE '^v[[:digit:]]+\.0'; then + echo "::error::The minor version number for a ${EVENT_TYPE} build must be 0." + return 1 + fi +} + +major_matches() { + current_major="$(cut -f 1 -d '-' packaging/version | cut -f 1 -d '.' | cut -f 2 -d 'v')" + target_major="$(echo "${EVENT_VERSION}" | cut -f 1 -d '.' | cut -f 2 -d 'v')" + + if [ "${target_major}" != "${current_major}" ]; then + echo "::error::Major version mismatch, expected ${current_major} but got ${target_major}." + return 1 + fi +} + +minor_matches() { + current_minor="$(cut -f 1 -d '-' packaging/version | cut -f 2 -d '.')" + target_minor="$(echo "${EVENT_VERSION}" | cut -f 2 -d '.')" + + if [ "${target_minor}" != "${current_minor}" ]; then + echo "::error::Minor version mismatch, expected ${current_minor} but got ${target_minor}." + return 1 + fi +} + +check_for_existing_tag() { + if git tag | grep -qE "^${EVENT_VERSION}$"; then + echo "::error::A tag for version ${EVENT_VERSION} already exists." + return 1 + fi +} + +check_newer_major_version() { + current="$(cut -f 1 -d '-' packaging/version | cut -f 1 -d '.' | cut -f 2 -d 'v')" + target="$(echo "${EVENT_VERSION}" | cut -f 1 -d '.' | cut -f 2 -d 'v')" + + if [ "${target}" -le "${current}" ]; then + echo "::error::Version ${EVENT_VERSION} is not newer than the current version." + return 1 + fi +} + +check_newer_minor_version() { + current="$(cut -f 1 -d '-' packaging/version | cut -f 2 -d '.')" + target="$(echo "${EVENT_VERSION}" | cut -f 2 -d '.')" + + if [ "${target}" -le "${current}" ]; then + echo "::error::Version ${EVENT_VERSION} is not newer than the current version." + return 1 + fi +} + +check_newer_patch_version() { + current="$(cut -f 1 -d '-' packaging/version | cut -f 3 -d '.')" + target="$(echo "${EVENT_VERSION}" | cut -f 3 -d '.')" + + if [ "${target}" -le "${current}" ]; then + echo "::error::Version ${EVENT_VERSION} is not newer than the current version." + return 1 + fi +} + +############################################################## +# Core logic + +git config user.name "netdatabot" +git config user.email "bot@netdata.cloud" + +if [ "${REPO}" != "netdata/netdata" ] && [ -z "${RELEASE_TEST}" ]; then + echo "::notice::Not running in the netdata/netdata repository, not queueing a release build." + echo "::set-output name=run::false" +elif [ "${EVENT_NAME}" = 'schedule' ] || [ "${EVENT_TYPE}" = 'nightly' ]; then + echo "::notice::Preparing a nightly release build." + LAST_TAG=$(git describe --abbrev=0 --tags) + COMMITS_SINCE_RELEASE=$(git rev-list "${LAST_TAG}"..HEAD --count) + NEW_VERSION="${LAST_TAG}-$((COMMITS_SINCE_RELEASE + 1))-nightly" + LAST_VERSION_COMMIT="$(git rev-list -1 HEAD packaging/version)" + HEAD_COMMIT="$(git rev-parse HEAD)" + if [ "${EVENT_NAME}" = 'schedule' ] && [ "${LAST_VERSION_COMMIT}" = "${HEAD_COMMIT}" ] && grep -qE '.*-nightly$' packaging/version; then + echo "::notice::No commits since last nightly build, not publishing a new nightly build." + echo "::set-output name=run::false" + else + echo "${NEW_VERSION}" > packaging/version || exit 1 + echo "::set-output name=run::true" + echo "::set-output name=message::Update changelog and version for nightly build: ${NEW_VERSION}." + echo "::set-output name=ref::master" + echo "::set-output name=type::nightly" + echo "::set-output name=branch::master" + echo "::set-output name=version::nightly" + fi +elif [ "${EVENT_TYPE}" = 'patch' ] && [ "${EVENT_VERSION}" != "nightly" ]; then + echo "::notice::Preparing a patch release build." + check_version_format || exit 1 + check_for_existing_tag || exit 1 + branch_name="$(echo "${EVENT_VERSION}" | cut -f 1-2 -d '.')" + if ! git checkout "${branch_name}"; then + echo "::error::Could not find a branch for the ${branch_name}.x release series." + exit 1 + fi + minor_matches || exit 1 + major_matches || exit 1 + check_newer_patch_version || exit 1 + echo "${EVENT_VERSION}" > packaging/version || exit 1 + echo "::set-output name=run::true" + echo "::set-output name=message::Patch release ${EVENT_VERSION}." + echo "::set-output name=ref::${EVENT_VERSION}" + echo "::set-output name=type::release" + echo "::set-output name=branch::${branch_name}" + echo "::set-output name=version::$(tr -d 'v' < packaging/version)" +elif [ "${EVENT_TYPE}" = 'minor' ] && [ "${EVENT_VERSION}" != "nightly" ]; then + echo "::notice::Preparing a minor release build." + check_version_format || exit 1 + patch_is_zero || exit 1 + major_matches || exit 1 + check_newer_minor_version || exit 1 + check_for_existing_tag || exit 1 + branch_name="$(echo "${EVENT_VERSION}" | cut -f 1-2 -d '.')" + if [ -n "$(git branch --list "${branch_name}")" ]; then + echo "::error::A branch named ${branch_name} already exists in the repository." + exit 1 + fi + echo "${EVENT_VERSION}" > packaging/version || exit 1 + echo "::set-output name=run::true" + echo "::set-output name=message::Minor release ${EVENT_VERSION}." + echo "::set-output name=ref::${EVENT_VERSION}" + echo "::set-output name=type::release" + echo "::set-output name=branch::master" + echo "::set-output name=new-branch::${branch_name}" + echo "::set-output name=version::$(tr -d 'v' < packaging/version)" +elif [ "${EVENT_TYPE}" = 'major' ] && [ "${EVENT_VERSION}" != "nightly" ]; then + echo "::notice::Preparing a major release build." + check_version_format || exit 1 + minor_is_zero || exit 1 + patch_is_zero || exit 1 + check_newer_major_version || exit 1 + check_for_existing_tag || exit 1 + echo "${EVENT_VERSION}" > packaging/version || exit 1 + echo "::set-output name=run::true" + echo "::set-output name=message::Major release ${EVENT_VERSION}" + echo "::set-output name=ref::${EVENT_VERSION}" + echo "::set-output name=type::release" + echo "::set-output name=branch::master" + echo "::set-output name=version::$(tr -d 'v' < packaging/version)" +else + echo '::error::Unrecognized release type or invalid version.' + exit 1 +fi diff --git a/.github/scripts/run-updater-check.sh b/.github/scripts/run-updater-check.sh new file mode 100755 index 0000000..31ab71d --- /dev/null +++ b/.github/scripts/run-updater-check.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +echo ">>> Installing CI support packages..." +/netdata/.github/scripts/ci-support-pkgs.sh +echo ">>> Installing Netdata..." +/netdata/packaging/installer/kickstart.sh --dont-wait --build-only --disable-telemetry || exit 1 +echo "::group::Environment File Contents" +cat /etc/netdata/.environment +echo "::endgroup::" +echo ">>> Updating Netdata..." +export NETDATA_NIGHTLIES_BASEURL="http://localhost:8080/artifacts/" # Pull the tarball from the local web server. +/netdata/packaging/installer/netdata-updater.sh --not-running-from-cron --no-updater-self-update || exit 1 +echo ">>> Checking if update was successful..." +/netdata/.github/scripts/check-updater.sh || exit 1 diff --git a/.github/scripts/run_install_with_dist_file.sh b/.github/scripts/run_install_with_dist_file.sh new file mode 100755 index 0000000..d59e8b1 --- /dev/null +++ b/.github/scripts/run_install_with_dist_file.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# +# This script is evaluating netdata installation with the source from make dist +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pavlos Emm. Katsoulakis &2 "Usage: %s \n" "$(basename "$0")" + exit 1 +fi + +distfile="${1}" +shift + +printf >&2 "Opening dist archive %s ... " "${distfile}" +tar -xovf "${distfile}" +distdir="$(echo "${distfile}" | rev | cut -d. -f3- | rev)" +cp -a packaging/installer/install-required-packages.sh "${distdir}/install-required-packages.sh" +if [ ! -d "${distdir}" ]; then + printf >&2 "ERROR: %s is not a directory" "${distdir}" + exit 2 +fi + +printf >&2 "Entering %s and starting docker run ..." "${distdir}" + +pushd "${distdir}" || exit 1 +docker run \ + -e DISABLE_TELEMETRY=1 \ + -v "${PWD}:/netdata" \ + -w /netdata \ + "ubuntu:latest" \ + /bin/bash -c "./install-required-packages.sh --dont-wait --non-interactive netdata && apt install wget && ./netdata-installer.sh --dont-wait --require-cloud --disable-telemetry --install /tmp --one-time-build && echo \"Validating netdata instance is running\" && wget -O - 'http://127.0.0.1:19999/api/v1/info' | grep version" +popd || exit 1 + +echo "All Done!" diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 0000000..abf927a --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,18 @@ +--- +only: issues +limitPerRun: 30 +daysUntilStale: 30 +daysUntilClose: 7 +exemptLabels: + - bug + - help wanted + - feature request +exemptProjects: true +exemptMilestones: true +staleLabel: stale +markComment: > + This issue has been inactive for 30 days. + It will be closed in one week, unless it is updated. +closeComment: > + This issue has been automatically closed due to extended period of inactivity. + Please reopen if it is still valid. Thank you for your contributions. diff --git a/.github/workflows/add-to-project.yml b/.github/workflows/add-to-project.yml new file mode 100644 index 0000000..a80d8b4 --- /dev/null +++ b/.github/workflows/add-to-project.yml @@ -0,0 +1,26 @@ +name: Add issues to Agent Board + +on: + issues: + types: + - opened + - transferred + +jobs: + add-to-project: + name: Add issue to project + if: github.repository == 'netdata/netdata' + runs-on: ubuntu-latest + steps: + - name: Add issues to Agent project board + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/netdata/projects/32 + github-token: ${{ secrets.NETDATABOT_ORG_GITHUB_TOKEN }} + + - name: Add issues to Product Bug project board + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/netdata/projects/45 + github-token: ${{ secrets.NETDATABOT_ORG_GITHUB_TOKEN }} + labeled: bug diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..53f1590 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,861 @@ +--- +# Ci code for building release artifacts. +name: Build +on: + push: # Master branch checks only validate the build and generate artifacts for testing. + branches: + - master + pull_request: null # PR checks only validate the build and generate artifacts for testing. + workflow_dispatch: # Dispatch runs build and validate, then push to the appropriate storage location. + inputs: + type: + description: Build Type + default: nightly + required: true + version: + description: Version Tag + default: nightly + required: true +concurrency: # This keeps multiple instances of the job from running concurrently for the same ref and event type. + group: build-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true +jobs: + build-dist: # Build the distribution tarball and store it as an artifact. + name: Build Distribution Tarball + runs-on: ubuntu-latest + outputs: + distfile: ${{ steps.build.outputs.distfile }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: recursive + - name: Fix tags + id: fix-tags + if: github.event_name != 'push' + run: | + git fetch --tags --force + - name: Mark Stable + id: channel + if: github.event_name == 'workflow_dispatch' && github.event.inputs.type != 'nightly' + run: | + sed -i 's/^RELEASE_CHANNEL="nightly" *#/RELEASE_CHANNEL="stable" #/' netdata-installer.sh + - name: Build + id: build + run: | + git describe + mkdir -p artifacts + ./packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata + autoreconf -ivf + ./configure --prefix=/usr \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --libexecdir=/usr/libexec \ + --with-zlib \ + --with-math \ + --with-user=netdata + make dist + echo "::set-output name=distfile::$(find . -name 'netdata-*.tar.gz')" + cp netdata-*.tar.gz artifacts/ + - name: Store + id: store + uses: actions/upload-artifact@v3 + with: + name: dist-tarball + path: artifacts/*.tar.gz + retention-days: 30 + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Distribution tarball creation failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to create source tarball for distribution. + Checkout: ${{ steps.checkout.outcome }} + Fix Tags: ${{ steps.fix-tags.outcome }} + Mark stable: ${{ steps.channel.outcome }} + Build: ${{ steps.build.outcome }} + Store: ${{ steps.store.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + build-static: # Build the static binary archives, and store them as artifacts. + name: Build Static + runs-on: ubuntu-latest + strategy: + matrix: + arch: + - x86_64 + - armv7l + - aarch64 + - ppc64le + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: recursive + - name: Fix tags + id: fix-tags + if: github.event_name != 'push' + run: | + git fetch --tags --force + - name: Mark Stable + id: channel + if: github.event_name == 'workflow_dispatch' && github.event.inputs.type != 'nightly' + run: | + sed -i 's/^RELEASE_CHANNEL="nightly" *#/RELEASE_CHANNEL="stable" #/' netdata-installer.sh packaging/makeself/install-or-update.sh + - name: Get Cache Key + id: cache-key + run: .github/scripts/get-static-cache-key.sh ${{ matrix.arch }} + - name: Cache + id: cache + uses: actions/cache@v3 + with: + path: artifacts/cache + key: ${{ steps.cache-key.outputs.key }} + - name: Build + if: github.event_name != 'workflow_dispatch' # Don’t use retries on PRs. + run: .github/scripts/build-static.sh ${{ matrix.arch }} + - name: Build + if: github.event_name == 'workflow_dispatch' + id: build + uses: nick-fields/retry@v2 + with: + timeout_minutes: 180 + retries: 3 + command: .github/scripts/build-static.sh ${{ matrix.arch }} + - name: Store + id: store + uses: actions/upload-artifact@v3 + with: + name: static-archive + path: artifacts/*.gz.run + retention-days: 30 + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Static build failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to create static installer archive for ${{ matrix.arch }}. + Checkout: ${{ steps.checkout.outcome }} + Fix Tags: ${{ steps.fix-tags.outcome }} + Mark stable: ${{ steps.channel.outcome }} + Build: ${{ steps.build.outcome }} + Store: ${{ steps.store.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + matrix: # Generate the shared build matrix for our build tests. + name: Prepare Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Prepare tools + id: prepare + run: | + sudo apt-get update && sudo apt-get install -y python3-ruamel.yaml + - name: Read build matrix + id: set-matrix + shell: python3 {0} + run: | + from ruamel.yaml import YAML + import json + yaml = YAML(typ='safe') + entries = list() + + with open('.github/data/distros.yml') as f: + data = yaml.load(f) + + for i, v in enumerate(data['include']): + e = { + 'artifact_key': v['distro'] + str(v['version']).replace('.', ''), + 'version': v['version'], + } + + if 'base_image' in v: + e['distro'] = ':'.join([v['base_image'], str(v['version'])]) + else: + e['distro'] = ':'.join([v['distro'], str(v['version'])]) + + if 'env_prep' in v: + e['env_prep'] = v['env_prep'] + + if 'jsonc_removal' in v: + e['jsonc_removal'] = v['jsonc_removal'] + + entries.append(e) + + entries.sort(key=lambda k: k['distro']) + matrix = json.dumps({'include': entries}, sort_keys=True) + print('Generated Matrix: ' + matrix) + print('::set-output name=matrix::' + matrix) + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Build matrix preparation failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to prepare build matrix for build checks. + Checkout: ${{ steps.checkout.outcome }} + Prepare tools: ${{ steps.prepare.outcome }} + Read build matrix: ${{ steps.set-matrix.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + prepare-test-images: # Prepare the test environments for our build checks. This also checks dependency handling code for each tested environment. + name: Prepare Test Environments + runs-on: ubuntu-latest + needs: + - matrix + env: + RETRY_DELAY: 300 + strategy: + # Unlike the actal build tests, this completes _very_ fast (average of about 3 minutes for each job), so we + # just run everything in parallel instead lof limiting job concurrency. + fail-fast: false + matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Setup Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Build test environment + id: build1 + uses: docker/build-push-action@v3 + continue-on-error: true # We retry 3 times at 5 minute intervals if there is a failure here. + with: + push: false + load: false + file: .github/dockerfiles/Dockerfile.build_test + build-args: | + BASE=${{ matrix.distro }} + PRE=${{ matrix.env_prep }} + RMJSONC=${{ matrix.jsonc_removal }} + outputs: type=oci,dest=/tmp/image.tar + tags: test:${{ matrix.artifact_key }} + - name: Retry delay + if: ${{ steps.build1.outcome == 'failure' }} + run: sleep "${RETRY_DELAY}" + - name: Build test environment (attempt 2) + if: ${{ steps.build1.outcome == 'failure' }} + id: build2 + uses: docker/build-push-action@v3 + continue-on-error: true # We retry 3 times at 5 minute intervals if there is a failure here. + with: + push: false + load: false + file: .github/dockerfiles/Dockerfile.build_test + build-args: | + BASE=${{ matrix.distro }} + PRE=${{ matrix.env_prep }} + RMJSONC=${{ matrix.jsonc_removal }} + outputs: type=oci,dest=/tmp/image.tar + tags: test:${{ matrix.artifact_key }} + - name: Retry delay + if: ${{ steps.build1.outcome == 'failure' && steps.build2.outcome == 'failure' }} + run: sleep "${RETRY_DELAY}" + - name: Build test environment (attempt 3) + if: ${{ steps.build1.outcome == 'failure' && steps.build2.outcome == 'failure' }} + id: build3 + uses: docker/build-push-action@v3 + with: + push: false + load: false + file: .github/dockerfiles/Dockerfile.build_test + build-args: | + BASE=${{ matrix.distro }} + PRE=${{ matrix.env_prep }} + RMJSONC=${{ matrix.jsonc_removal }} + outputs: type=oci,dest=/tmp/image.tar + tags: test:${{ matrix.artifact_key }} + - name: Upload image artifact + id: upload + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.artifact_key }}-test-env + path: /tmp/image.tar + retention-days: 30 + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Test environment preparation for ${{ matrix.distro }} failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Test environment preparation for ${{ matrix.distro }} failed. + Checkout: ${{ steps.checkout.outcome }} + Set up Buildx: ${{ steps.buildx.outcome }} + Build test environment: ${{ steps.build1.outcome }} + Build test environment (attempt 2): ${{ steps.build2.outcome }} + Build test environment (attempt 3): ${{ steps.build3.outcome }} + Upload: ${{ steps.upload.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + source-build: # Test various source build arrangements. + name: Test Source Build + runs-on: ubuntu-latest + needs: + - matrix + - prepare-test-images + strategy: + fail-fast: false + max-parallel: 8 + matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Fetch test environment + id: fetch + uses: actions/download-artifact@v3 + with: + name: ${{ matrix.artifact_key }}-test-env + - name: Load test environment + id: load + run: | + docker load --input image.tar | tee image-info.txt + echo "::set-output name=image::$(cut -d ':' -f 3 image-info.txt)" + - name: Regular build on ${{ matrix.distro }} + id: build-basic + run: | + docker run --security-opt seccomp=unconfined -w /netdata sha256:${{ steps.load.outputs.image }} \ + /bin/sh -c 'autoreconf -ivf && ./configure --disable-dependency-tracking && make -j2' + - name: netdata-installer on ${{ matrix.distro }}, disable cloud + id: build-no-cloud + run: | + docker run --security-opt seccomp=unconfined -w /netdata sha256:${{ steps.load.outputs.image }} \ + /bin/sh -c './netdata-installer.sh --dont-wait --dont-start-it --disable-cloud --one-time-build' + - name: netdata-installer on ${{ matrix.distro }}, require cloud + id: build-cloud + run: | + docker run --security-opt seccomp=unconfined -w /netdata sha256:${{ steps.load.outputs.image }} \ + /bin/sh -c './netdata-installer.sh --dont-wait --dont-start-it --require-cloud --one-time-build' + - name: netdata-installer on ${{ matrix.distro }}, require cloud, no JSON-C + id: build-no-jsonc + if: matrix.jsonc_removal != '' + run: | + docker run --security-opt seccomp=unconfined -w /netdata sha256:${{ steps.load.outputs.image }} \ + /bin/sh -c '/rmjsonc.sh && ./netdata-installer.sh --dont-wait --dont-start-it --require-cloud --one-time-build' + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Build tests for ${{ matrix.distro }} failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Build tests for ${{ matrix.distro }} failed. + Checkout: ${{ steps.checkout.outcome }} + Fetch test environment: ${{ steps.fetch.outcome }} + Load test environment: ${{ steps.load.outcome }} + Regular build: ${{ steps.build-basic.outcome }} + netdata-installer, disable cloud: ${{ steps.build-no-cloud.outcome }} + netdata-installer, require cloud: ${{ steps.build-cloud.outcome }} + netdata-installer, no JSON-C: ${{ steps.build-no-jsonc.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + updater-check: # Test the generated dist archive using the updater code. + name: Test Generated Distfile and Updater Code + runs-on: ubuntu-latest + needs: + - build-dist + - matrix + - prepare-test-images + strategy: + fail-fast: false + max-parallel: 8 + matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} + services: + apache: # This gets used to serve the dist tarball for the updater script. + image: httpd:2.4 + ports: + - 8080:80 + volumes: + - ${{ github.workspace }}:/usr/local/apache2/htdocs/ + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Fetch dist tarball artifacts + id: fetch-tarball + uses: actions/download-artifact@v3 + with: + name: dist-tarball + path: dist-tarball + - name: Prepare artifact directory + id: prepare + run: | + mkdir -p artifacts || exit 1 + echo "9999.0.0-0" > artifacts/latest-version.txt || exit 1 + cp dist-tarball/* artifacts || exit 1 + cd artifacts || exit 1 + ln -s ${{ needs.build-dist.outputs.distfile }} netdata-latest.tar.gz || exit 1 + sha256sum -b ./* > "sha256sums.txt" || exit 1 + cat sha256sums.txt + - name: Fetch test environment + id: fetch-test-environment + uses: actions/download-artifact@v3 + with: + name: ${{ matrix.artifact_key }}-test-env + - name: Load test environment + id: load + run: | + docker load --input image.tar | tee image-info.txt + echo "::set-output name=image::$(cut -d ':' -f 3 image-info.txt)" + - name: Install netdata and run the updater on ${{ matrix.distro }} + id: updater-check + run: | + docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 --network host -w /netdata sha256:${{ steps.load.outputs.image }} \ + /netdata/.github/scripts/run-updater-check.sh + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Updater checks for ${{ matrix.distro }} failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Updater checks for ${{ matrix.distro }} failed. + Checkout: ${{ steps.checkout.outcome }} + Fetch dist tarball: ${{ steps.fetch-tarball.outcome }} + Prepare artifact directory: ${{ steps.prepare.outcome }} + Fetch test environment: ${{ steps.fetch-test-environment.outcome }} + Load test environment: ${{ steps.load.outcome }} + Updater check: ${{ steps.updater-check.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + prepare-upload: # Consolidate the artifacts for uploading or releasing. + name: Prepare Artifacts + runs-on: ubuntu-latest + needs: + - build-dist + - build-static + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Prepare Environment + id: prepare + run: mkdir -p artifacts + - name: Retrieve Dist Tarball + id: fetch-dist + uses: actions/download-artifact@v3 + with: + name: dist-tarball + path: dist-tarball + - name: Retrieve Static Build Artifacts + id: fetch-static + uses: actions/download-artifact@v3 + with: + name: static-archive + path: static-archive + - name: Prepare Artifacts + id: consolidate + working-directory: ./artifacts/ + run: | + mv ../dist-tarball/* . || exit 1 + mv ../static-archive/* . || exit 1 + ln -s ${{ needs.build-dist.outputs.distfile }} netdata-latest.tar.gz || exit 1 + cp ../packaging/version ./latest-version.txt || exit 1 + sha256sum -b ./* > sha256sums.txt || exit 1 + cat sha256sums.txt + - name: Store Artifacts + id: store + uses: actions/upload-artifact@v3 + with: + name: final-artifacts + path: artifacts/* + retention-days: 30 + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to prepare release artifacts for upload:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to prepare release artifacts for upload. + CHeckout: ${{ steps.checkout.outcome }} + Prepare environment: ${{ steps.prepare.outcome }} + Fetch dist tarball: ${{ steps.fetch-dist.outcome }} + Fetch static builds: ${{ steps.fetch-static.outcome }} + Consolidate artifacts: ${{ steps.consolidate.outcome }} + Store: ${{ steps.store.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + artifact-verification-dist: # Verify the regular installer works with the consolidated artifacts. + name: Test Consolidated Artifacts (Source) + runs-on: ubuntu-latest + needs: + - prepare-upload + services: + apache: # This gets used to serve the dist tarball for the updater script. + image: httpd:2.4 + ports: + - 8080:80 + volumes: + - ${{ github.workspace }}:/usr/local/apache2/htdocs/ + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Fetch artifacts + id: fetch + uses: actions/download-artifact@v3 + with: + name: final-artifacts + path: artifacts + - name: Verify that artifacts work with installer + id: verify + env: + NETDATA_TARBALL_BASEURL: http://localhost:8080/artifacts + run: packaging/installer/kickstart.sh --build-only --dont-start-it --disable-telemetry --dont-wait + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Artifact verification for source tarball failed.' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Artifact verification for source tarball failed. + Checkout: ${{ steps.checkout.outcome }} + Fetch artifacts: ${{ steps.fetch.outcome }} + Verify artifacts: ${{ steps.verify.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + artifact-verification-static: # Verify the static installer works with the consolidated artifacts. + name: Test Consolidated Artifacts (Static) + runs-on: ubuntu-latest + needs: + - prepare-upload + services: + apache: # This gets used to serve the static archives. + image: httpd:2.4 + ports: + - 8080:80 + volumes: + - ${{ github.workspace }}:/usr/local/apache2/htdocs/ + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Fetch artifacts + id: fetch-artifacts + uses: actions/download-artifact@v3 + with: + name: final-artifacts + path: artifacts + - name: Verify that artifacts work with installer + id: verify + env: + NETDATA_TARBALL_BASEURL: http://localhost:8080/artifacts + run: packaging/installer/kickstart.sh --static-only --dont-start-it --disable-telemetry + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Artifact verification for static build failed.' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Artifact verification for static build failed. + Checkout: ${{ steps.checkout.outcome }} + Fetch artifacts: ${{ steps.fetch-artifacts.outcome }} + Verify artifacts: ${{ steps.verify.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + && github.repository == 'netdata/netdata' + }} + + upload-nightly: # Upload the nightly build artifacts to GCS. + name: Upload Nightly Artifacts + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'nightly' && github.repository == 'netdata/netdata' + needs: + - updater-check + - source-build + - artifact-verification-dist + - artifact-verification-static + steps: + - name: Retrieve Artifacts + id: fetch + uses: actions/download-artifact@v3 + with: + name: final-artifacts + path: final-artifacts + - name: Authenticate to GCS + id: gcs-auth + uses: google-github-actions/auth@v1 + with: + project_id: ${{ secrets.GCP_NIGHTLY_STORAGE_PROJECT }} + credentials_json: ${{ secrets.GCS_STORAGE_SERVICE_KEY_JSON }} + - name: Setup GCS + id: gcs-setup + uses: google-github-actions/setup-gcloud@v1.0.1 + - name: Upload Artifacts + id: upload + uses: google-github-actions/upload-cloud-storage@v1.0.0 + with: + destination: ${{ secrets.GCP_NIGHTLY_STORAGE_BUCKET }} + gzip: false + path: ./final-artifacts + parent: false + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to upload nightly release artifacts:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to upload nightly release artifacts. + Fetch artifacts: ${{ steps.fetch.outcome }} + Authenticatie GCS: ${{ steps.gcs-auth.outcome }} + Setup GCS: ${{ steps.gcs-setup.outcome }} + Upload artifacts: ${{ steps.upload.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && startsWith(github.ref, 'refs/heads/master') + && github.event_name != 'pull_request' + }} + + create-nightly: # Create a nightly build release in netdata/netdata-nightlies + name: Create Nightly Release + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'nightly' && github.repository == 'netdata/netdata' + needs: + - updater-check + - source-build + - artifact-verification-dist + - artifact-verification-static + steps: + - name: Checkout Main Repo + id: checkout-main + uses: actions/checkout@v3 + with: + path: main + - name: Checkout Nightly Repo + id: checkout-nightly + uses: actions/checkout@v3 + with: + repository: netdata/netdata-nightlies + path: nightlies + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + - name: Retrieve Artifacts + id: fetch + uses: actions/download-artifact@v3 + with: + name: final-artifacts + path: final-artifacts + - name: Prepare version info + id: version + run: | + # shellcheck disable=SC2129 + echo "version=$(cat main/packaging/version)" >> "${GITHUB_OUTPUT}" + echo "commit=$(cd nightlies && git rev-parse HEAD)" >> "${GITHUB_OUTPUT}" + echo "date=$(date +%F)" >> "${GITHUB_OUTPUT}" + - name: Create Release + id: create-release + uses: ncipollo/release-action@v1 + with: + allowUpdates: false + artifactErrorsFailBuild: true + artifacts: 'final-artifacts/sha256sums.txt,final-artifacts/netdata-*.tar.gz,final-artifacts/netdata-*.gz.run' + owner: netdata + repo: netdata-nightlies + body: Netdata nightly build for ${{ steps.version.outputs.date }}. + commit: ${{ steps.version.outputs.commit }} + tag: ${{ steps.version.outputs.version }} + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to draft release:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to create nightly release or attach artifacts. + Checkout netdata/netdata: ${{ steps.checkout-main.outcome }} + Checkout netdata/netdata-nightlies: ${{ steps.checkout-nightly.outcome }} + Fetch artifacts: ${{ steps.fetch.outcome }} + Prepare version info: ${{ steps.version.outcome }} + Create release: ${{ steps.create-release.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name == 'workflow_dispatch' + }} + + normalize-tag: # Fix the release tag if needed + name: Normalize Release Tag + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'release' + outputs: + tag: ${{ steps.tag.outputs.tag }} + steps: + - name: Normalize Tag + id: tag + run: | + if echo ${{ github.event.inputs.version }} | grep -qE '^[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+$'; then + echo "::set-output name=tag::v${{ github.event.inputs.version }}" + else + echo "::set-output name=tag::${{ github.event.inputs.version }}" + fi + + upload-release: # Create the draft release and upload the build artifacts. + name: Create Release Draft + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'release' && github.repository == 'netdata/netdata' + needs: + - updater-check + - source-build + - artifact-verification-dist + - artifact-verification-static + - normalize-tag + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Retrieve Artifacts + id: fetch + uses: actions/download-artifact@v3 + with: + name: final-artifacts + path: final-artifacts + - name: Create Release + id: create-release + uses: ncipollo/release-action@v1 + with: + allowUpdates: false + artifactErrorsFailBuild: true + artifacts: 'final-artifacts/sha256sums.txt,final-artifacts/netdata-*.tar.gz,final-artifacts/netdata-*.gz.run' + draft: true + tag: ${{ needs.normalize-tag.outputs.tag }} + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to draft release:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to create draft release or attach artifacts. + Checkout: ${{ steps.checkout.outcome }} + Fetch artifacts: ${{ steps.fetch.outcome }} + Create draft release: ${{ steps.create-release.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name == 'workflow_dispatch' + }} + - name: Success Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'good' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Created agent draft release:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: "${{ github.repository }}: ${{ steps.create-release.outputs.html_url }}" + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + success() + && github.event_name == 'workflow_dispatch' + }} diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 0000000..65ad6ac --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,61 @@ +--- +name: Checks +on: + push: + branches: + - master + pull_request: null +env: + DISABLE_TELEMETRY: 1 +concurrency: + group: checks-${{ github.ref }} + cancel-in-progress: true +jobs: + libressl-checks: + name: LibreSSL + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Build + run: > + docker run -v "$PWD":/netdata -w /netdata alpine:latest /bin/sh -c + 'apk add bash; + ./packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata; + apk del openssl openssl-dev; + apk add libressl libressl-dev; + autoreconf -ivf; + ./configure --disable-dependency-tracking; + make;' + clang-checks: + name: Clang + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Build + run: | + docker build -f .github/dockerfiles/Dockerfile.clang . + gitignore-check: + name: .gitignore + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Prepare environment + run: ./packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata + - name: Build netdata + run: ./netdata-installer.sh --dont-start-it --disable-telemetry --dont-wait --install /tmp/install --one-time-build + - name: Check that repo is clean + run: | + git status --porcelain=v1 > /tmp/porcelain + if [ -s /tmp/porcelain ]; then + cat /tmp/porcelain + exit 1 + fi diff --git a/.github/workflows/cloud_regression.yml b/.github/workflows/cloud_regression.yml new file mode 100644 index 0000000..b6e321f --- /dev/null +++ b/.github/workflows/cloud_regression.yml @@ -0,0 +1,54 @@ +name: Trigger Cloud Regression E2E Tests +on: + push: + branches: [master] + paths: + - 'CMakeLists.txt' + - '**.c' + - '**.cc' + - '**.cpp' + - '**.h' + - 'mqtt_websockets/**' + - 'aclk/aclk-schemas/**' +jobs: + trigger_cloud_regression_tests: + runs-on: ubuntu-latest + if: github.repository == 'netdata/netdata' + steps: + - name: Evaluate workflow dispatch parameters + env: + PR_REPO_NAME: ${{ github.event.pull_request.head.repo.full_name }} + PR_BRANCH_NAME: ${{ github.event.pull_request.head.ref }} + PR_COMMIT_HASH: ${{ github.event.pull_request.head.sha }} + id: output-workflow-dispatch-params + run: | + if [ ${{ github.event_name }} == 'pull_request_target' ]; then + NETDATA_CUSTOM_REPO="$PR_REPO_NAME" + NETDATA_CUSTOM_BRANCH="$PR_BRANCH_NAME" + NETDATA_CUSTOM_PR_NUMBER="${{ github.event.number }}" + NETDATA_CUSTOM_COMMIT_HASH="$PR_COMMIT_HASH" + elif [ ${{ github.event_name }} == 'push' ]; then + NETDATA_CUSTOM_REPO="netdata/netdata" + NETDATA_CUSTOM_BRANCH="master" + NETDATA_CUSTOM_PR_NUMBER="" + NETDATA_CUSTOM_COMMIT_HASH="${{ github.sha }}" + fi + echo "::set-output name=netdata_repo::${NETDATA_CUSTOM_REPO}" + echo "::set-output name=netdata_branch::${NETDATA_CUSTOM_BRANCH}" + echo "::set-output name=netdata_pr_number::${NETDATA_CUSTOM_PR_NUMBER}" + echo "::set-output name=netdata_commit_hash::${NETDATA_CUSTOM_COMMIT_HASH}" + + - name: Trigger Cloud Regression + uses: aurelien-baudet/workflow-dispatch@v2 + with: + repo: netdata/test-automation + ref: refs/heads/master + workflow: regression.yml + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + inputs: '{ "netdata_branch": "${{ steps.output-workflow-dispatch-params.outputs.netdata_branch }}", + "netdata_repo": "${{ steps.output-workflow-dispatch-params.outputs.netdata_repo }}", + "netdata_pr_number": "${{ steps.output-workflow-dispatch-params.outputs.netdata_pr_number }}", + "netdata_branch_commit_hash": "${{ steps.output-workflow-dispatch-params.outputs.netdata_commit_hash }}", + "custom_netdata_image": "true" + }' + wait-for-completion: false diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..021376a --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,117 @@ +--- +# Run CodeQL to analyze C/C++ and Python code. +name: CodeQL +on: + pull_request: + types: [opened, reopened, labeled, synchronize] + branches: [master] + push: + branches: [master] + schedule: + - cron: "27 2 * * 1" +env: + DISABLE_TELEMETRY: 1 +concurrency: + group: codeql-${{ github.ref }} + cancel-in-progress: true +jobs: + prepare: + name: Prepare Jobs + runs-on: ubuntu-latest + outputs: + cpp: ${{ steps.cpp.outputs.run }} + python: ${{ steps.python.outputs.run }} + steps: + - name: Clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Check if we should always run + id: always + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + if [ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/codeql') }}" = "true" ]; then + echo '::set-output name=run::true' + echo '::notice::Found ci/codeql label, unconditionally running all CodeQL checks.' + else + echo '::set-output name=run::false' + fi + else + echo '::set-output name=run::true' + fi + - name: Check for C/C++ changes + id: cpp + run: | + if [ "${{ steps.always.outputs.run }}" = "false" ]; then + if git diff --name-only origin/${{ github.base_ref }} HEAD | grep -Eq '.*\.[ch](xx|\+\+)?' ; then + echo '::set-output name=run::true' + echo '::notice::C/C++ code has changed, need to run CodeQL.' + else + echo '::set-output name=run::false' + fi + else + echo '::set-output name=run::true' + fi + - name: Check for python changes + id: python + run: | + if [ "${{ steps.always.outputs.run }}" = "false" ]; then + if git diff --name-only origin/${{ github.base_ref }} HEAD | grep -Eq 'collectors/python.d.plugin/.*\.py' ; then + echo '::set-output name=run::true' + echo '::notice::Python code has changed, need to run CodeQL.' + else + echo '::set-output name=run::false' + fi + else + echo '::set-output name=run::true' + fi + + analyze-cpp: + name: Analyze C/C++ + runs-on: ubuntu-latest + needs: prepare + if: needs.prepare.outputs.cpp == 'true' + permissions: + security-events: write + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: cpp + - name: Prepare environment + run: ./packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata + - name: Build netdata + run: ./netdata-installer.sh --dont-start-it --disable-telemetry --dont-wait --install /tmp/install --one-time-build + - name: Run CodeQL + uses: github/codeql-action/analyze@v2 + with: + category: "/language:cpp" + + analyze-python: + name: Analyze Python + runs-on: ubuntu-latest + needs: prepare + if: needs.prepare.outputs.python == 'true' + permissions: + security-events: write + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + config-file: ./.github/codeql/python-config.yml + languages: python + - name: Run CodeQL + uses: github/codeql-action/analyze@v2 + with: + category: "/language:python" diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml new file mode 100644 index 0000000..9d1119a --- /dev/null +++ b/.github/workflows/coverity.yml @@ -0,0 +1,63 @@ +--- +# Runs coverity-scan.sh every 24h on `master` +name: Coverity Scan +on: + schedule: + - cron: '0 1 * * *' + pull_request: + paths: + - .github/workflows/coverity.yml + - coverity-scan.sh +env: + DISABLE_TELEMETRY: 1 +concurrency: + group: coverity-${{ github.ref }} + cancel-in-progress: true +jobs: + coverity: + if: github.repository == 'netdata/netdata' + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + id: checkout + with: + submodules: recursive + - name: Prepare environment + id: prepare + env: + DEBIAN_FRONTEND: 'noninteractive' + run: | + ./packaging/installer/install-required-packages.sh \ + --dont-wait --non-interactive netdata + sudo apt-get install -y libjson-c-dev libipmimonitoring-dev \ + libcups2-dev libsnappy-dev libprotobuf-dev \ + libprotoc-dev libssl-dev protobuf-compiler \ + libnetfilter-acct-dev + - name: Run coverity-scan + id: run + env: + REPOSITORY: 'netdata/netdata' + COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} + COVERITY_SCAN_SUBMIT_MAIL: ${{ secrets.COVERITY_SCAN_SUBMIT_MAIL }} + run: | + bash -x ./coverity-scan.sh --with-install + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Coverity run failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Coverity failed to run correctly. + Checkout: ${{ steps.checkout.outcome }} + Environment preparation: ${{ steps.prepare.outcome }} + Coverity run: ${{ steps.run.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + }} diff --git a/.github/workflows/dashboard-pr.yml b/.github/workflows/dashboard-pr.yml new file mode 100644 index 0000000..c99f989 --- /dev/null +++ b/.github/workflows/dashboard-pr.yml @@ -0,0 +1,54 @@ +--- +# Create a PR to update the react dashboard code. +name: Dashboard Version PR + +on: + workflow_dispatch: + inputs: + dashboard_version: + # This must be specified, and must _exactly_ match the version + # tag for the release to be used for the update. + description: Dashboard Version + required: true + +env: + DISABLE_TELEMETRY: 1 + +jobs: + dashboard-pr: + name: Generate Dashboard Version Bump PR + runs-on: ubuntu-latest + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Update Files + id: update + run: | + web/gui/bundle_dashboard.py ${{ github.event.inputs.dashboard_version }} + - name: Create Pull Request + id: pr + uses: peter-evans/create-pull-request@v4 + with: + title: 'Update dashboard to version ${{ github.event.inputs.dashboard_version }}.' + body: 'See https://github.com/netdata/dashboard/releases/tag/${{ github.event.inputs.dashboard_version }} for changes.' + branch: dashboard-${{ github.event.inputs.dashboard_version }} + branch-suffix: timestamp + delete-branch: true + commit-message: 'Update dashboard to version ${{ github.event.inputs.dashboard_version }}.' + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Dashboard update PR creation failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to create PR to update dashboard code to newest release. + Checkout: ${{ steps.checkout.outcome }} + Update files: ${{ steps.update.outcome }} + Create PR: ${{ steps.pr.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: failure() diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..b7eb53c --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,298 @@ +--- +name: Docker +on: + push: + branches: + - master + pull_request: null + workflow_dispatch: + inputs: + version: + description: Version Tag + default: nightly + required: true +env: + DISABLE_TELEMETRY: 1 +concurrency: + group: docker-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true +jobs: + docker-test: + name: Docker Runtime Test + runs-on: ubuntu-latest + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Setup Buildx + id: prepare + uses: docker/setup-buildx-action@v2 + - name: Test Build + id: build + uses: docker/build-push-action@v3 + with: + load: true + push: false + tags: netdata/netdata:test + - name: Test Image + id: test + run: .github/scripts/docker-test.sh + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Docker runtime testing failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Building or testing Docker image for linux/amd64 failed. + CHeckout: ${{ steps.checkout.outcome }} + Setup buildx: ${{ steps.prepare.outcome }} + Build image: ${{ steps.build.outcome }} + Test image: ${{ steps.test.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} + + docker-ci: + if: github.event_name != 'workflow_dispatch' + name: Docker Alt Arch Builds + needs: docker-test + runs-on: ubuntu-latest + strategy: + matrix: + platforms: + - linux/i386 + - linux/arm/v7 + - linux/arm64 + - linux/ppc64le + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Setup QEMU + id: qemu + if: matrix.platforms != 'linux/i386' + uses: docker/setup-qemu-action@v2 + - name: Setup Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Build + id: build + uses: docker/build-push-action@v3 + with: + platforms: ${{ matrix.platforms }} + load: false + push: false + tags: netdata/netdata:test + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Docker build testing failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Building Docker image for ${{ matrix.platforms }} failed. + CHeckout: ${{ steps.checkout.outcome }} + Setup QEMU: ${{ steps.qemu.outcome }} + Setup buildx: ${{ steps.buildx.outcome }} + Build image: ${{ steps.build.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} + + normalize-tag: # Fix the release tag if needed + name: Normalize Release Tag + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' + outputs: + tag: ${{ steps.tag.outputs.tag }} + steps: + - name: Normalize Tag + id: tag + run: | + if echo ${{ github.event.inputs.version }} | grep -qE '^[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+$'; then + echo "::set-output name=tag::v${{ github.event.inputs.version }}" + else + echo "::set-output name=tag::${{ github.event.inputs.version }}" + fi + + docker-publish: + if: github.event_name == 'workflow_dispatch' + name: Docker Build and Publish + needs: + - docker-test + - normalize-tag + runs-on: ubuntu-latest + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Determine which tags to use + id: release-tags + if: github.event.inputs.version != 'nightly' + run: | + echo "tags=netdata/netdata:latest,netdata/netdata:stable,$(.github/scripts/gen-docker-tags.py ${{ needs.normalize-tag.outputs.tag }} '')" \ + >> "${GITHUB_ENV}" + - name: Determine which tags to use + id: nightly-tags + if: github.event.inputs.version == 'nightly' + run: | + echo "tags=netdata/netdata:latest,netdata/netdata:edge" >> "${GITHUB_ENV}" + - name: Mark image as official + id: env + if: github.repository == 'netdata/netdata' + run: echo "OFFICIAL_IMAGE=true" >> "${GITHUB_ENV}" + - name: Setup QEMU + id: qemu + uses: docker/setup-qemu-action@v2 + - name: Setup Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Docker Hub Login + id: login + if: github.repository == 'netdata/netdata' + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_PASSWORD }} + - name: Docker Build + id: build + uses: docker/build-push-action@v3 + with: + platforms: linux/amd64,linux/i386,linux/arm/v7,linux/arm64,linux/ppc64le + push: ${{ github.repository == 'netdata/netdata' }} + tags: ${{ env.tags }} + build-args: OFFICIAL_IMAGE=${{ env.OFFICIAL_IMAGE }} + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Docker Build failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to build or publish Docker images. + CHeckout: ${{ steps.checkout.outcome }} + Generate release tags: ${{ steps.release-tags.outcome }} + Generate nightly tags: ${{ steps.nightly-tags.outcome }} + Setup environment: ${{ steps.env.outcome }} + Setup QEMU: ${{ steps.qemu.outcome }} + Setup buildx: ${{ steps.buildx.outcome }} + Authenticate against DockerHub: ${{ steps.login.outcome }} + Build and publish images: ${{ steps.build.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} + - name: Trigger Helmchart PR + if: github.event_name == 'workflow_dispatch' && github.event.inputs.version != 'nightly' && github.repository == 'netdata/netdata' + uses: benc-uk/workflow-dispatch@v1 + with: + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + repo: netdata/helmchart + workflow: Agent Version PR + ref: refs/heads/master + inputs: '{"agent_version": "${{ needs.normalize-tag.outputs.tag }}"}' + + docker-dbg-publish: + if: github.event_name == 'workflow_dispatch' + name: Docker Build and Publish (Debuging Image) + needs: + - docker-test + - normalize-tag + runs-on: ubuntu-latest + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Determine which tags to use + id: release-tags + if: github.event.inputs.version != 'nightly' + run: | + echo "tags=netdata/netdata-debug:latest,netdata/netdata-debug:stable,$(.github/scripts/gen-docker-tags.py ${{ needs.normalize-tag.outputs.tag }} '-debug')" \ + >> "${GITHUB_ENV}" + - name: Determine which tags to use + id: nightly-tags + if: github.event.inputs.version == 'nightly' + run: | + echo "tags=netdata/netdata-debug:latest,netdata/netdata-debug:edge" >> "${GITHUB_ENV}" + - name: Mark image as official + id: env + if: github.repository == 'netdata/netdata' + run: echo "OFFICIAL_IMAGE=true" >> "${GITHUB_ENV}" + - name: Setup QEMU + id: qemu + uses: docker/setup-qemu-action@v2 + - name: Setup Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Docker Hub Login + id: login + if: github.repository == 'netdata/netdata' + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_PASSWORD }} + - name: Docker Build + id: build + uses: docker/build-push-action@v3 + with: + platforms: linux/amd64,linux/i386,linux/arm/v7,linux/arm64,linux/ppc64le + push: ${{ github.repository == 'netdata/netdata' }} + tags: ${{ env.tags }} + build-args: | + OFFICIAL_IMAGE=${{ env.OFFICIAL_IMAGE }} + DEBUG_BUILD=1 + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Docker Debug Build failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to build or publish Docker debug images. + CHeckout: ${{ steps.checkout.outcome }} + Generate release tags: ${{ steps.release-tags.outcome }} + Generate nightly tags: ${{ steps.nightly-tags.outcome }} + Setup environment: ${{ steps.env.outcome }} + Setup QEMU: ${{ steps.qemu.outcome }} + Setup buildx: ${{ steps.buildx.outcome }} + Authenticate against DockerHub: ${{ steps.login.outcome }} + Build and publish images: ${{ steps.build.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..69fda40 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,29 @@ +--- +name: Docs +on: + push: + branches: + - master + paths: + - '**.md' + pull_request: + paths: + - '**.md' +env: + DISABLE_TELEMETRY: 1 +jobs: + markdown-link-check: + name: Broken Links + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Run link check + uses: gaurav-nelson/github-action-markdown-link-check@v1 + with: + use-quiet-mode: 'no' + use-verbose-mode: 'yes' + check-modified-files-only: 'yes' + config-file: '.mlc_config.json' diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 0000000..0854080 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,18 @@ +--- +# Handles labelling of PR's. +name: Pull Request Labeler +on: + schedule: + - cron: '*/10 * * * *' +env: + DISABLE_TELEMETRY: 1 +jobs: + labeler: + runs-on: ubuntu-latest + steps: + - uses: docker://docker.io/ilyam8/periodic-pr-labeler:v0.1.0 + if: github.repository == 'netdata/netdata' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + LABEL_MAPPINGS_FILE: .github/labeler.yml diff --git a/.github/workflows/packagecloud.yml b/.github/workflows/packagecloud.yml new file mode 100644 index 0000000..ba70c17 --- /dev/null +++ b/.github/workflows/packagecloud.yml @@ -0,0 +1,36 @@ +--- +# Runs PackageCloud cleanup every day at 9pm +name: PackageCloud Cleanup +on: + schedule: + - cron: '0 21 * * *' + workflow_dispatch: null + +jobs: + cleanup: + name: PackageCloud Cleanup + runs-on: ubuntu-latest + if: github.repository == 'netdata/netdata' + strategy: + fail-fast: false + matrix: + repos: + - stable + - edge + - devel + steps: + - name: Checkout + uses: actions/checkout@v3 + id: checkout + with: + submodules: recursive + - name: Prepare environment + id: prepare + run: | + pip3 install requests python-dateutil + - name: Run PackageCloud Cleanup + id: cleanup + env: + PKGCLOUD_TOKEN: ${{ secrets.PACKAGE_CLOUD_API_KEY }} + run: | + python3 .github/scripts/netdata-pkgcloud-cleanup.py -r ${{ matrix.repos }} diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml new file mode 100644 index 0000000..ddd8356 --- /dev/null +++ b/.github/workflows/packaging.yml @@ -0,0 +1,279 @@ +--- +# Handles building of binary packages for the agent. +name: Packages +on: + pull_request: + types: + - opened + - reopened + - labeled + - synchronize + branches: + - master + push: + branches: + - master + workflow_dispatch: + inputs: + type: + description: Package build type + default: devel + required: true + version: + description: Package version + required: false +env: + DISABLE_TELEMETRY: 1 + REPO_PREFIX: netdata/netdata +concurrency: + group: packages-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true +jobs: + matrix: + name: Prepare Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Prepare tools + id: prepare + run: | + sudo apt-get update && sudo apt-get install -y python3-ruamel.yaml + - name: Read build matrix + id: set-matrix + shell: python3 {0} + run: | + from ruamel.yaml import YAML + import json + import re + import os + ALWAYS_RUN_ARCHES = ["amd64", "x86_64"] + yaml = YAML(typ='safe') + entries = list() + run_limited = False + + with open('.github/data/distros.yml') as f: + data = yaml.load(f) + + if "${{ github.event_name }}" == "pull_request" and "${{ !contains(github.event.pull_request.labels.*.name, 'run-ci/packaging') }}": + run_limited = True + + for i, v in enumerate(data['include']): + if 'packages' in data['include'][i]: + for arch in data['include'][i]['packages']['arches']: + if arch in ALWAYS_RUN_ARCHES or not run_limited: + entries.append({ + 'distro': data['include'][i]['distro'], + 'version': data['include'][i]['version'], + 'repo_distro': data['include'][i]['packages']['repo_distro'], + 'format': data['include'][i]['packages']['type'], + 'base_image': data['include'][i]['base_image'] if 'base_image' in data['include'][i] else data['include'][i]['distro'], + 'platform': data['platform_map'][arch], + 'arch': arch + }) + + entries.sort(key=lambda k: (data['arch_order'].index(k['arch']), k['distro'], k['version'])) + matrix = json.dumps({'include': entries}, sort_keys=True) + print('Generated Matrix: ' + matrix) + print('::set-output name=matrix::' + matrix) + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Package Build matrix generation failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to generate build matrix for package build. + Checkout: ${{ steps.checkout.outcome }} + Prepare Tools: ${{ steps.prepare.outcome }} + Read Build Matrix: ${{ steps.set-matrix.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} + + version-check: + name: Version check + runs-on: ubuntu-latest + outputs: + repo: ${{ steps.check-version.outputs.repo }} + version: ${{ steps.check-version.outputs.version }} + retention: ${{ steps.check-version.outputs.retention }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Check Version + id: check-version + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + case "${{ github.event.inputs.type }}" in + "release") + echo "::set-output name=repo::${REPO_PREFIX}" + echo "::set-output name=version::${{ github.event.inputs.version }}" + echo "::set-output name=retention::365" + ;; + "nightly") + echo "::set-output name=repo::${REPO_PREFIX}-edge" + echo "::set-output name=version::$(tr -d 'v' < packaging/version)" + echo "::set-output name=retention::30" + ;; + *) + echo "::set-output name=repo::${REPO_PREFIX}-devel" + echo "::set-output name=version::0.${GITHUB_SHA}" + echo "::set-output name=retention::30" + ;; + esac + else + echo "::set-output name=version::$(cut -d'-' -f 1 packaging/version | tr -d 'v')" + echo "::set-output name=retention::0" + fi + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Package Build version check failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to generate version information for package build. + Checkout: ${{ steps.checkout.outcome }} + Check Version: ${{ steps.check-version.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} + + build: + name: Build + runs-on: ubuntu-latest + env: + DOCKER_CLI_EXPERIMENTAL: enabled + needs: + - matrix + - version-check + strategy: + matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} + # We intentiaonally disable the fail-fast behavior so that a + # build failure for one version doesn't prevent us from publishing + # successfully built and tested packages for another version. + fail-fast: false + max-parallel: 8 + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 # We need full history for versioning + submodules: recursive + - name: Setup QEMU + id: qemu + if: matrix.platform != 'linux/amd64' && matrix.platform != 'linux/i386' + uses: docker/setup-qemu-action@v2 + - name: Prepare Docker Environment + id: docker-config + shell: bash + run: | + echo '{"cgroup-parent": "/actions_job", "experimental": true}' | sudo tee /etc/docker/daemon.json 2>/dev/null + sudo service docker restart + - name: Fetch images + id: fetch-images + uses: nick-invision/retry@v2 + with: + max_attempts: 3 + retry_wait_seconds: 30 + timeout_seconds: 900 + command: | + docker pull --platform ${{ matrix.platform }} ${{ matrix.base_image }}:${{ matrix.version }} + docker pull --platform ${{ matrix.platform }} netdata/package-builders:${{ matrix.distro }}${{ matrix.version }} + - name: Build Packages + id: build + shell: bash + run: | + docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 -e VERSION=${{ needs.version-check.outputs.version }} \ + --platform=${{ matrix.platform }} -v "$PWD":/netdata netdata/package-builders:${{ matrix.distro }}${{ matrix.version }} + - name: Save Packages + id: artifacts + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.distro }}-${{ matrix.version }}-${{ matrix.arch }}-packages + path: ${{ github.workspace }}/artifacts/* + - name: Test Packages + id: test + shell: bash + run: | + docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 -e DISTRO=${{ matrix.distro }} \ + -e VERSION=${{ needs.version-check.outputs.version }} -e DISTRO_VERSION=${{ matrix.version }} \ + --platform=${{ matrix.platform }} -v "$PWD":/netdata ${{ matrix.base_image }}:${{ matrix.version }} \ + /netdata/.github/scripts/pkg-test.sh + - name: SSH setup + id: ssh-setup + if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' + continue-on-error: true + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ secrets.NETDATABOT_PACKAGES_SSH_KEY }} + name: id_ecdsa + known_hosts: ${{ secrets.PACKAGES_KNOWN_HOSTS }} + - name: Upload to packages.netdata.cloud + id: package-upload + continue-on-error: true + if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' + run: | + .github/scripts/package-upload.sh \ + ${{ matrix.repo_distro }} \ + ${{ matrix.arch }} \ + ${{ matrix.format }} \ + ${{ needs.version-check.outputs.repo }} + - name: Upload to PackageCloud + id: upload + if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' + shell: bash + env: + PKG_CLOUD_TOKEN: ${{ secrets.PACKAGE_CLOUD_API_KEY }} + run: | + printf "Packages to upload:\n%s" "$(ls artifacts/*.${{ matrix.format }})" + for pkgfile in artifacts/*.${{ matrix.format }} ; do + .github/scripts/package_cloud_wrapper.sh yank ${{ needs.version-check.outputs.repo }}/${{ matrix.repo_distro }} \ + "$(basename "${pkgfile}")" || true + .github/scripts/package_cloud_wrapper.sh push ${{ needs.version-check.outputs.repo }}/${{ matrix.repo_distro }} "${pkgfile}" + done + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Package Build failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: ${{ matrix.repo_distro }} ${{ matrix.version }} package build for ${{ matrix.arch }} failed. + Checkout: ${{ steps.checkout.outcome }} + Setup QEMU: ${{ steps.qemu.outcome }} + Setup Docker: ${{ steps.docker-config.outcome }} + Fetch images: ${{ steps.fetch-images.outcome }} + Build: ${{ steps.build.outcome }} + Test: ${{ steps.test.outcome }} + Import SSH Key: ${{ steps.ssh-setup.outcome }} + Publish to packages.netdata.cloud: ${{ steps.package-upload.outcome }} + Publish to PackageCloud: ${{ steps.upload.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..e16ecab --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,214 @@ +--- +# Workflow for triggering a release. +name: Release +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: # Dispatch runs build and validate, then push to the appropriate storage location. + inputs: + type: + description: Build Type + default: nightly + required: true + version: + description: Version Tag + default: nightly + required: true +concurrency: # This keeps multiple instances of the job from running concurrently for the same ref and event type. + group: release-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true +jobs: + update-changelogs: + name: Update changelog + runs-on: ubuntu-latest + outputs: + ref: ${{ steps.target.outputs.ref }} + version: ${{ steps.target.outputs.version }} + type: ${{ steps.target.outputs.type }} + run: ${{ steps.target.outputs.run }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + submodules: recursive + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + - name: Prepare base ref + id: target + run: >- + .github/scripts/prepare-release-base.sh \ + ${{ github.repository }} \ + ${{ github.event_name }} \ + ${{ github.event.inputs.type }} \ + ${{ github.event.inputs.version }} \ + ${{ secrets.NETDATA_RELEASE_TEST }} + - name: Generate Nightly Changleog + id: nightly-changelog + if: steps.target.outputs.run == 'true' && steps.target.outputs.type == 'nightly' + uses: heinrichreimer/github-changelog-generator-action@v2.3 + with: + bugLabels: IGNOREBUGS + excludeLabels: "stale,duplicate,question,invalid,wontfix,discussion,no changelog" + issues: false + sinceTag: v1.10.0 + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + unreleasedLabel: "**Next release**" + verbose: true + maxIssues: 500 + - name: Generate Release Changelog + id: release-changelog + if: steps.target.outputs.run == 'true' && steps.target.outputs.type != 'nightly' + uses: heinrichreimer/github-changelog-generator-action@v2.3 + with: + bugLabels: IGNOREBUGS + excludeLabels: "stale,duplicate,question,invalid,wontfix,discussion,no changelog" + futureRelease: ${{ github.event.inputs.version }} + issues: false + sinceTag: v1.10.0 + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + unreleasedLabel: "**Next release**" + verbose: true + maxIssues: 500 + - name: Commit Changes + id: commit + if: steps.target.outputs.run == 'true' + env: + GITHUB_TOKEN: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + run: | + git config user.name "netdatabot" + git config user.email "bot@netdata.cloud" + git add packaging/version CHANGELOG.md + git commit -m "[ci skip] ${{ steps.target.outputs.message }}" + if [ "${{ steps.target.outputs.type }}" != "nightly" ]; then + git tag -a "${{ github.event.inputs.version }}" -m "${{ steps.target.outputs.message }}" + fi + if [ -n "${{ steps.target.outputs.new-branch }}" ]; then + git branch "${{ steps.target.outputs.new-branch }}" + fi + git push --tags origin "${{ steps.target.outputs.branch }}" + if [ -n "${{ steps.target.outputs.new-branch }}" ]; then + git push origin "${{ steps.target.outputs.new-branch }}" + fi + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to prepare changelog:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to prepare changelog. + Checkout: ${{ steps.checkout.outcome }} + Prepare base ref: ${{ steps.target.outcome }} + Generate nightly changelog: ${{ steps.nightly-changelog.outcome }} + Generate release changelog: ${{ steps.release-changelog.outcome }} + Commit changes: ${{ steps.commit.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: failure() + + trigger-artifacts: + name: Trigger artifact builds + runs-on: ubuntu-latest + needs: update-changelogs + if: needs.update-changelogs.outputs.run == 'true' + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v2 + with: + ref: ${{ needs.update-changelogs.outputs.ref }} + - name: Trigger build + id: trigger + uses: benc-uk/workflow-dispatch@v1 + with: + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + repo: ${{ github.repository }} + workflow: Build + ref: ${{ needs.update-changelogs.outputs.ref }} + inputs: '{"version": "${{ needs.update-changelogs.outputs.version }}", "type": "${{ needs.update-changelogs.outputs.type }}"}' + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to trigger ${{ needs.update-changelogs.outputs.type }} artifact builds:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to trigger ${{ needs.update-changelogs.outputs.type }} artifact builds. + Checkout: ${{ steps.checkout.outcome }} + Trigger build: ${{ steps.trigger.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: failure() + + trigger-docker: + name: Trigger docker builds + runs-on: ubuntu-latest + needs: update-changelogs + if: needs.update-changelogs.outputs.run == 'true' + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v2 + with: + ref: ${{ needs.update-changelogs.outputs.ref }} + - name: Trigger build + id: trigger + uses: benc-uk/workflow-dispatch@v1 + with: + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + repo: ${{ github.repository }} + workflow: Docker + ref: ${{ needs.update-changelogs.outputs.ref }} + inputs: '{"version": "${{ needs.update-changelogs.outputs.version }}"}' + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to trigger ${{ needs.update-changelogs.outputs.type }} Docker builds:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to trigger ${{ needs.update-changelogs.outputs.type }} Docker builds. + Checkout: ${{ steps.checkout.outcome }} + Trigger build: ${{ steps.trigger.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: failure() + + trigger-packages: + name: Trigger package builds + runs-on: ubuntu-latest + needs: update-changelogs + if: needs.update-changelogs.outputs.run == 'true' + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v2 + with: + ref: ${{ needs.update-changelogs.outputs.ref }} + - name: Trigger build + id: trigger + uses: benc-uk/workflow-dispatch@v1 + with: + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + repo: ${{ github.repository }} + workflow: Packages + ref: ${{ needs.update-changelogs.outputs.ref }} + inputs: '{"version": "${{ needs.update-changelogs.outputs.version }}", "type": "${{ needs.update-changelogs.outputs.type }}"}' + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Failed to trigger ${{ needs.update-changelogs.outputs.type }} package builds:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to trigger ${{ needs.update-changelogs.outputs.type }} package builds. + Checkout: ${{ steps.checkout.outcome }} + Trigger build: ${{ steps.trigger.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: failure() diff --git a/.github/workflows/repoconfig-packages.yml b/.github/workflows/repoconfig-packages.yml new file mode 100644 index 0000000..824ddd3 --- /dev/null +++ b/.github/workflows/repoconfig-packages.yml @@ -0,0 +1,183 @@ +--- +# Handles building of binary packages for the agent. +name: Repository Packages +on: + workflow_dispatch: null + pull_request: + paths: + - packaging/repoconfig/** + - .github/workflows/repoconfig-packages.yml + - .github/data/distros.yml + push: + branches: + - master + paths: + - packaging/repoconfig/** + - .github/workflows/repoconfig-packages.yml + - .github/data/distros.yml +env: + DISABLE_TELEMETRY: 1 + REPO_PREFIX: netdata/netdata +jobs: + matrix: + name: Prepare Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + - name: Prepare tools + id: prepare + run: | + sudo apt-get update && sudo apt-get install -y python3-ruamel.yaml + - name: Read build matrix + id: set-matrix + shell: python3 {0} + run: | + from ruamel.yaml import YAML + import json + yaml = YAML(typ='safe') + entries = list() + + with open('.github/data/distros.yml') as f: + data = yaml.load(f) + + for i, v in enumerate(data['include']): + if 'packages' in data['include'][i]: + entries.append({ + 'distro': data['include'][i]['distro'], + 'version': data['include'][i]['version'], + 'pkgclouddistro': data['include'][i]['packages']['repo_distro'], + 'format': data['include'][i]['packages']['type'], + 'base_image': data['include'][i]['base_image'] if 'base_image' in data['include'][i] else data['include'][i]['distro'], + 'platform': data['platform_map']['amd64'] + }) + + entries.sort(key=lambda k: (k['distro'], k['version'])) + matrix = json.dumps({'include': entries}, sort_keys=True) + print('Generated Matrix: ' + matrix) + print('::set-output name=matrix::' + matrix) + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Repository Package Build matrix generation failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to generate build matrix for repository package build. + Checkout: ${{ steps.checkout.outcome }} + Prepare Tools: ${{ steps.prepare.outcome }} + Read Build Matrix: ${{ steps.set-matrix.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: >- + ${{ + failure() + && github.event_name != 'pull_request' + && startsWith(github.ref, 'refs/heads/master') + && github.repository == 'netdata/netdata' + }} + + build: + name: Build + runs-on: ubuntu-latest + env: + DISABLE_TELEMETRY: 1 + DOCKER_CLI_EXPERIMENTAL: enabled + needs: + - matrix + strategy: + matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} + # We intentiaonally disable the fail-fast behavior so that a + # build failure for one version doesn't prevent us from publishing + # successfully built and tested packages for another version. + fail-fast: false + max-parallel: 8 + steps: + - name: Checkout + id: checkout + uses: actions/checkout@v3 + # Unlike normally, we do not need a deep clone or submodules for this. + - name: Fetch base image + id: fetch-images + uses: nick-invision/retry@v2 + with: + max_attempts: 3 + retry_wait_seconds: 30 + timeout_seconds: 900 + command: docker pull --platform ${{ matrix.platform }} ${{ matrix.base_image }}:${{ matrix.version }} + - name: Build Packages + id: build + shell: bash + run: | + docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 --platform ${{ matrix.platform }} \ + -v "$PWD":/netdata ${{ matrix.base_image }}:${{ matrix.version }} \ + /netdata/packaging/repoconfig/build-${{ matrix.format }}.sh + - name: SSH setup + id: ssh-setup + if: github.event_name == 'workflow_dispatch' + continue-on-error: true + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ secrets.NETDATABOT_PACKAGES_SSH_KEY }} + name: id_ecdsa + known_hosts: ${{ secrets.PACKAGES_KNOWN_HOSTS }} + - name: Upload to packages.netdata.cloud + id: package-upload + continue-on-error: true + if: github.event_name == 'workflow_dispatch' + run: | + .github/scripts/package-upload.sh \ + ${{ matrix.repo_distro }} \ + ${{ matrix.arch }} \ + ${{ matrix.format }} \ + netdata/netdata + .github/scripts/package-upload.sh \ + ${{ matrix.repo_distro }} \ + ${{ matrix.arch }} \ + ${{ matrix.format }} \ + netdata/netdata-edge + .github/scripts/package-upload.sh \ + ${{ matrix.repo_distro }} \ + ${{ matrix.arch }} \ + ${{ matrix.format }} \ + netdata/netdata-repoconfig + - name: Upload Packages + id: publish + if: github.event_name != 'pull_request' && github.repository == 'netdata/netdata' + shell: bash + env: + PKG_CLOUD_TOKEN: ${{ secrets.PACKAGE_CLOUD_API_KEY }} + run: | + printf "Packages to upload:\n%s" "$(ls artifacts/*.${{ matrix.format }})" + for pkgfile in artifacts/*.${{ matrix.format }} ; do + .github/scripts/package_cloud_wrapper.sh yank "${REPO_PREFIX}/${{ matrix.pkgclouddistro }}" \ + "$(basename "${pkgfile}")" || true + .github/scripts/package_cloud_wrapper.sh push "${REPO_PREFIX}/${{ matrix.pkgclouddistro }}" "${pkgfile}" + .github/scripts/package_cloud_wrapper.sh yank "${REPO_PREFIX}-edge/${{ matrix.pkgclouddistro }}" \ + "$(basename "${pkgfile}")" || true + .github/scripts/package_cloud_wrapper.sh push "${REPO_PREFIX}-edge/${{ matrix.pkgclouddistro }}" "${pkgfile}" + .github/scripts/package_cloud_wrapper.sh yank "${REPO_PREFIX}-repoconfig/${{ matrix.pkgclouddistro }}" \ + "$(basename "${pkgfile}")" || true + .github/scripts/package_cloud_wrapper.sh push "${REPO_PREFIX}-repoconfig/${{ matrix.pkgclouddistro }}" "${pkgfile}" + done + - name: Failure Notification + if: ${{ failure() && github.repository == 'netdata/netdata' }} + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Repository Package Build failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: ${{ matrix.pkgclouddistro }} ${{ matrix.version }} repository package build failed. + Checkout: ${{ steps.checkout.outcome }} + Fetch images: ${{ steps.fetch-images.outcome }} + Build: ${{ steps.build.outcome }} + Import SSH Key: ${{ steps.ssh-setup.outcome }} + Publish to packages.netdata.cloud: ${{ steps.package-upload.outcome }} + Publish to PackageCloud: ${{ steps.publish.outcome }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml new file mode 100644 index 0000000..5679b24 --- /dev/null +++ b/.github/workflows/review.yml @@ -0,0 +1,172 @@ +--- +# Runs various ReviewDog based checks against PR with suggested changes to improve quality +name: Review +on: + pull_request: + types: [opened, reopened, labeled, synchronize] +env: + DISABLE_TELEMETRY: 1 +concurrency: + group: review-${{ github.ref }} + cancel-in-progress: true +jobs: + prep-review: + name: Prepare Review Jobs + runs-on: ubuntu-latest + outputs: + actionlint: ${{ steps.actionlint.outputs.run }} + eslint: ${{ steps.eslint.outputs.run }} + hadolint: ${{ steps.hadolint.outputs.run }} + shellcheck: ${{ steps.shellcheck.outputs.run }} + yamllint: ${{ steps.yamllint.outputs.run }} + steps: + - name: Clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Check files for actionlint + id: actionlint + run: | + if [ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/actionlint') }}" = "true" ]; then + echo '::set-output name=run::true' + elif git diff --name-only origin/${{ github.base_ref }} HEAD | grep -Eq '\.github/workflows/.*' ; then + echo '::set-output name=run::true' + echo 'GitHub Actions workflows have changed, need to run actionlint.' + else + echo '::set-output name=run::false' + fi + - name: Check files for eslint + id: eslint + run: | + if [ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/eslint') }}" = "true" ]; then + echo '::set-output name=run::true' + elif git diff --name-only origin/${{ github.base_ref }} HEAD | grep -v "web/gui/dashboard" | grep -Eq '.*\.js|node\.d\.plugin\.in' ; then + echo '::set-output name=run::true' + echo 'JS files have changed, need to run ESLint.' + else + echo '::set-output name=run::false' + fi + - name: Check files for hadolint + id: hadolint + run: | + if [ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/hadolint') }}" = "true" ]; then + echo '::set-output name=run::true' + elif git diff --name-only origin/${{ github.base_ref }} HEAD | grep -Eq '.*Dockerfile.*' ; then + echo '::set-output name=run::true' + echo 'Dockerfiles have changed, need to run Hadolint.' + else + echo '::set-output name=run::false' + fi + - name: Check files for shellcheck + id: shellcheck + run: | + if [ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/shellcheck') }}" = "true" ]; then + echo '::set-output name=run::true' + elif git diff --name-only origin/${{ github.base_ref }} HEAD | grep -Eq '.*\.sh.*' ; then + echo '::set-output name=run::true' + echo 'Shell scripts have changed, need to run shellcheck.' + else + echo '::set-output name=run::false' + fi + - name: Check files for yamllint + id: yamllint + run: | + if [ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/yamllint') }}" = "true" ]; then + echo '::set-output name=run::true' + elif git diff --name-only origin/${{ github.base_ref }} HEAD | grep -Eq '.*\.ya?ml|python\.d/.*\.conf' ; then + echo '::set-output name=run::true' + echo 'YAML files have changed, need to run yamllint.' + else + echo '::set-output name=run::false' + fi + + actionlint: + name: actionlint + needs: prep-review + if: needs.prep-review.outputs.actionlint == 'true' + runs-on: ubuntu-latest + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Run actionlint + uses: reviewdog/action-actionlint@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-check + + eslint: + name: eslint + needs: prep-review + if: needs.prep-review.outputs.eslint == 'true' + runs-on: ubuntu-latest + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Install eslint + run: npm install eslint -D + - name: Run eslint + uses: reviewdog/action-eslint@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-check + eslint_flags: '.' + + hadolint: + name: hadolint + needs: prep-review + if: needs.prep-review.outputs.hadolint == 'true' + runs-on: ubuntu-latest + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Run hadolint + uses: reviewdog/action-hadolint@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-check + + shellcheck: + name: shellcheck + needs: prep-review + if: needs.prep-review.outputs.shellcheck == 'true' + runs-on: ubuntu-latest + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Run shellcheck + uses: reviewdog/action-shellcheck@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-check + path: "." + pattern: "*.sh*" + exclude: "./.git/*" + + yamllint: + name: yamllint + needs: prep-review + if: needs.prep-review.outputs.yamllint == 'true' + runs-on: ubuntu-latest + steps: + - name: Git clone repository + uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - name: Run yamllint + uses: reviewdog/action-yamllint@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-check diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..d483868 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,41 @@ +--- +# Runs Tests on Pushes to `master` and Pull Requests +name: Tests +on: + push: + branches: + - master + paths: + - 'CMakeLists.txt' + - '**.c' + - '**.h' + pull_request: + paths: + - 'CMakeLists.txt' + - '**.c' + - '**.h' +env: + DISABLE_TELEMETRY: 1 +concurrency: + group: tests-${{ github.ref }} + cancel-in-progress: true +jobs: + unit-tests-legacy: + name: Unit Tests (legacy) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Prepare environment + run: | + ./packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata-all + sudo apt-get install -y libjson-c-dev libipmimonitoring-dev libcups2-dev libsnappy-dev \ + libprotobuf-dev libprotoc-dev libssl-dev protobuf-compiler \ + libnetfilter-acct-dev + - name: Run ./tests/run-unit-tests.sh + env: + CFLAGS: "-O1 -DNETDATA_INTERNAL_CHECKS=1 -DNETDATA_VERIFY_LOCKS=1" + run: | + ./tests/run-unit-tests.sh diff --git a/.github/workflows/trigger-learn-update.yml b/.github/workflows/trigger-learn-update.yml new file mode 100644 index 0000000..3d39eba --- /dev/null +++ b/.github/workflows/trigger-learn-update.yml @@ -0,0 +1,37 @@ +--- +name: Trigger Netdata Learn documentation update +on: + push: + branches: + - master + paths: + - "**.mdx?" + - "packaging/installer/kickstart.sh" +concurrency: + group: learn-trigger-${{ github.ref }} + cancel-in-progress: true +jobs: + trigger-ingest: + name: Trigger Netdata Learn ingest workflow. + if: github.repository == 'netdata/netdata' + runs-on: ubuntu-latest + steps: + - name: Trigger Netdata Learn ingest workflow. + uses: benc-uk/workflow-dispatch@v1 + with: + token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }} + repo: netdata/learn + workflow: Ingest + ref: refs/heads/master + - name: Failure Notification + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_COLOR: 'danger' + SLACK_FOOTER: '' + SLACK_ICON_EMOJI: ':github-actions:' + SLACK_TITLE: 'Triggering Netdata Learn documentation update failed:' + SLACK_USERNAME: 'GitHub Actions' + SLACK_MESSAGE: |- + ${{ github.repository }}: Failed to trigger Netdata Learn documentation update workflow. + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + if: failure() diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b90428a --- /dev/null +++ b/.gitignore @@ -0,0 +1,235 @@ +# Secrets +gcs-credentials.json + +.deps +.libs +.dirstamp +.project +.pydevproject + +# local dev outputs +dist/ + +*.o +*.a +config.h.in +Makefile.in +*~ +.*.swp +*.old +*.log +*.pyc + +Makefile +aclocal.m4 +autom4te.cache +compile +config.guess +config.h +config.status +config.sub +configure +depcomp +install-sh +libtool +ltmain.sh +missing +stamp-h1 +netdata.spec +sha256sums.txt + +# netdata binaries +netdata +netdatacli +!netdata/ +upload/ +artifacts/ + +apps.plugin +!apps.plugin/ + +freeipmi.plugin +!freeipmi.plugin/ + +cups.plugin +!cups.plugin/ + +nfacct.plugin +!nfacct.plugin/ + +xenstat.plugin +!xenstat.plugin/ + +perf.plugin +!perf.plugin/ + +slabinfo.plugin +!slabinfo.plugin/ + +cgroup-network +!cgroup-network/ + +ebpf.plugin +collectors/ebpf.plugin/reset_netdata_trace.sh +!ebpf.plugin/ +collectors/ebpf.plugin/includes/ + +# protoc generated files +*.pb.cc +*.pb.h + +# installation artifacts +packaging/installer/.environment.sh +*.tar.* +*.run + +# netdata makeself downloads +packaging/makeself/tmp/ + +# Libbpf is always overwritten depending of kernel version +packaging/libbpf.* + +# coverity +cov-int/ +netdata-coverity-analysis.tgz +.coverity-scan.conf + +.cproject/ +.idea/ +.vscode/ +.project/ +.settings/ +README +TODO.md +TODO.txt + +web/gui/chart-info/ +web/gui/control.html +web/gui/dashboard.js +web/gui/datasource.css +web/gui/gadget.xml +web/gui/index_new.html +web/gui/version.txt + +# related to karma/javascript/node +/node_modules/ +/coverage/ + +system/netdata-lsb +system/netdata-openrc +system/netdata-init-d +system/netdata.logrotate +system/netdata.service +system/netdata.service.* +system/netdata-updater.service +!system/netdata.service.in +!system/netdata.service.*.in +system/netdata.plist +system/netdata-freebsd +system/edit-config +system/netdata.crontab +system/install-service.sh + +daemon/anonymous-statistics.sh +daemon/get-kubernetes-labels.sh + +health/notifications/alarm-notify.sh +claim/netdata-claim.sh +collectors/tc.plugin/tc-qos-helper.sh +collectors/charts.d.plugin/charts.d.plugin +collectors/python.d.plugin/python.d.plugin +collectors/fping.plugin/fping.plugin +collectors/ioping.plugin/ioping.plugin +collectors/go.d.plugin +web/netdata-switch-dashboard.sh + +# installer generated files +/netdata-uninstaller.sh +/netdata-updater.sh + +# cmake files +cmake-build-debug/ +cmake-build-release/ +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake +.cmake +compile_commands.json + +# jetbrains IDE +.jetbrains* + +.DS_Store +webcopylocal* + +# converted diagrams +diagrams/*.png +diagrams/*.svg +diagrams/*.atxt +diagrams/plantuml.jar + +# cppcheck +cppcheck-build/ + +# python virtual environment +venv/ + +# debugging / profiling +makeself/debug/ +tests/profile/benchmark-dictionary +tests/profile/benchmark-registry +tests/profile/test-eval +tests/profile/benchmark-line-parsing +tests/profile/benchmark-procfile-parser +tests/profile/benchmark-value-pairs +tests/profile/statsd-stress +tests/health_mgmtapi/health-cmdapi-test.sh +oprofile_data/ +vgcore.* +callgrind.out.* +gmon.out +gmon.txt +sitespeed-result/ +tests/acls/acl.sh +tests/urls/request.sh +tests/alarm_repetition/alarm.sh +tests/template_dimension/template_dim.sh +aclk/legacy/tests/install-fake-charts.d.sh + +# tests and temp files +test-driver +**/tests/*_testdriver +**/tests/*_testdriver.trs +python.d/python-modules-installer.sh + +# documentation generated files +docs/generator/src +docs/generator/build +docs/generator/doc +docs/generator/localization +docs/generator/mkdocs.yml + +.environment.sh + +#CLion files +netdata.cbp + +# External dependencies +externaldeps/ + +# vim sessions +Session.vim +Session.*.vim + +# clangd cache +.cache/ + +# Special exceptions +!packaging/repoconfig/Makefile + +# Jupyter notebook checkpoints +.ipynb_checkpoints + +# Judy stuff +JudyLTables.c +judyltablesgen diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..d3c7ace --- /dev/null +++ b/.gitmodules @@ -0,0 +1,15 @@ +[submodule "mqtt_websockets"] + path = mqtt_websockets + url = https://github.com/underhood/mqtt_websockets.git +[submodule "aclk/aclk-schemas"] + path = aclk/aclk-schemas + url = https://github.com/netdata/aclk-schemas.git +[submodule "ml/kmeans/dlib"] + path = ml/dlib + url = https://github.com/davisking/dlib.git + shallow = true + ignore = dirty +[submodule "ml/json"] + path = ml/json + url = https://github.com/nlohmann/json.git + shallow = true diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000..6fdab0e --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,24 @@ +--- +# LGTM does a good job at classifying files, but sometimes it needs some help. +# To classify files which shouldn't be checked we need to define where such +# files are located and manually assign them one of possible categories: +# docs, generated, library, template, test +# More information can be found in lgtm documentation: +# https://help.semmle.com/lgtm-enterprise/user/help/file-classification.html#built-in-tags +# https://lgtm.com/help/lgtm/lgtm.yml-configuration-file +path_classifiers: + library: + - collectors/python.d.plugin/python_modules/third_party/ + - collectors/python.d.plugin/python_modules/urllib3/ + - collectors/python.d.plugin/python_modules/pyyaml2/ + - collectors/python.d.plugin/python_modules/pyyaml3/ + - ml/kmeans/dlib/ + - ml/dlib/dlib/ + - ml/json/ + - web/gui/lib/ + - web/gui/src/ + - web/gui/css/ + - web/gui/dashboard/lib/ + - libnetdata/libjudy/ + test: + - tests/ diff --git a/.mlc_config.json b/.mlc_config.json new file mode 100644 index 0000000..8164bc4 --- /dev/null +++ b/.mlc_config.json @@ -0,0 +1,20 @@ +{ + "ignorePatterns": [ + { + "pattern": "^https:\/\/pi-hole\.net\b" + }, + { + "pattern": "^https:\/\/docs\.stackpulse\.io\b" + } + ], + "replacementPatterns": [ + { + "pattern": "^/", + "replacement": "/github/workspace/" + } + ], + "aliveStatusCodes": [ + 200, + 429 + ] +} diff --git a/.remarkignore b/.remarkignore new file mode 100644 index 0000000..83b6947 --- /dev/null +++ b/.remarkignore @@ -0,0 +1 @@ +CHANGELOG.md \ No newline at end of file diff --git a/.remarkrc.js b/.remarkrc.js new file mode 100644 index 0000000..55793e4 --- /dev/null +++ b/.remarkrc.js @@ -0,0 +1,111 @@ +// Source: https://github.com/codacy/codacy-remark-lint/raw/master/.remarkrc.js + +exports.settings = { + gfm: true, + commonmark: true, + looseTable: false, + spacedTable: false, + paddedTable: false, + fences: true, + rule: "-", + ruleRepetition: 3, + emphasis: "*", + strong: "*", + bullet: "-", + listItemIndent: "1", + incrementListMarker: true +}; + +const remarkPresetLintMarkdownStyleGuide = { + plugins: require("remark-preset-lint-markdown-style-guide").plugins.filter( + function(elem) { + return elem != require("remark-lint-no-duplicate-headings"); + } + ) +}; + +exports.plugins = [ + require("remark-preset-lint-consistent"), + require("remark-preset-lint-recommended"), + remarkPresetLintMarkdownStyleGuide, + [require("remark-lint-no-dead-urls"), { skipOffline: true }], + require("remark-lint-heading-whitespace"), + [require("remark-lint-maximum-line-length"), 120], + [require("remark-lint-maximum-heading-length"), 120], + [require("remark-lint-list-item-indent"), "tab-size"], + [require("remark-lint-list-item-spacing"), false], + [require("remark-lint-strong-marker"), "*"], + [require("remark-lint-emphasis-marker"), "_"], + [require("remark-lint-unordered-list-marker-style"), "-"], + [require("remark-lint-ordered-list-marker-style"), "."], + [require("remark-lint-ordered-list-marker-value"), "ordered"], + /*[ + require("remark-lint-write-good"), + [ + "warn", + { + passive: false, + illusion: true, + so: true, + thereIs: true, + weasel: true, + adverb: true, + tooWordy: true, + cliches: true, + eprime: false + } + ] + ],*/ + require("remark-validate-links"), + require("remark-frontmatter"), + /*[ + require("remark-retext"), + require("unified")().use({ + plugins: [ + require("retext-english"), + require("retext-syntax-urls"), + [ + require("retext-spell"), + { + ignoreLiteral: true, + dictionary: require("dictionary-en-us"), + ...personalDictionary + } + ], + [ + require("retext-sentence-spacing"), + { + preferred: 1 + } + ], + require("retext-repeated-words"), + require("retext-usage"), + require("retext-indefinite-article"), + require("retext-redundant-acronyms"), + [ + require("retext-contractions"), + { + straight: true, + allowLiteral: true + } + ], + require("retext-diacritics"), + [ + require("retext-quotes"), + { + preferred: "straight" + } + ], + require("retext-equality"), + require("retext-passive"), + require("retext-profanities"), + [ + require("retext-readability"), + { + age: 20 + } + ] + ] + }) + ]*/ +]; diff --git a/.squash.yml b/.squash.yml new file mode 100644 index 0000000..58e5910 --- /dev/null +++ b/.squash.yml @@ -0,0 +1,7 @@ +deployments: + netdata: + auto_deploy_on_commits: true + filename: ./packaging/docker/Dockerfile + context_path: ./ + port_forwarding: 80:19999 + run_options: -v /proc:/host/proc:ro -v /sys:/host/sys:ro -v /var/run/docker.sock:/var/run/docker.sock:ro --cap-add SYS_PTRACE --security-opt apparmor=unconfined diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e729815 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,68 @@ +--- +dist: focal +language: c + +addons: + apt: + packages: ['moreutils'] + +env: + global: + - RELEASE_CHANNEL=nightly + +before_install: + - exec > >(ts -s '%H:%M:%.S ') 2>&1 + - source .travis/utils.sh + +# Install dependencies for all, once +# +install: + - sudo apt-get install -y libuv1-dev liblz4-dev libcap2-bin zlib1g-dev uuid-dev fakeroot libipmimonitoring-dev libmnl-dev libnetfilter-acct-dev gnupg python3-pip + - sudo pip3 install git-semver==0.3.2 # 11/Sep/2019: git-semver tip was broken, so we had to force last good run of it + - source tests/installer/slack.sh + - export NOTIF_CHANNEL="automation-beta" + - if [ "${TRAVIS_REPO_SLUG}" = "netdata/netdata" ]; then export NOTIF_CHANNEL="automation"; fi; + - export BUILD_VERSION="$(cat packaging/version | cut -d'-' -f1)" + - export LATEST_RELEASE_VERSION="$(cat packaging/version | cut -d'-' -f1)" + - export LATEST_RELEASE_DATE="$(git log -1 --format=%aD "${LATEST_RELEASE_VERSION}" | cat)" + - if [[ "${TRAVIS_COMMIT_MESSAGE}" = *"[Build latest]"* ]]; then export BUILD_VERSION="$(cat packaging/version | cut -d'-' -f1,2 | sed -e 's/-/./g').latest"; fi; + - export DEPLOY_REPO="netdata" # Default production packaging repository + - export PACKAGING_USER="netdata" # Standard package cloud account + - if [[ "${TRAVIS_COMMIT_MESSAGE}" = *"[Build latest]"* ]]; then export DEPLOY_REPO="netdata-edge"; fi; + - export PACKAGE_CLOUD_RETENTION_DAYS=30 + - if [ ! "${TRAVIS_REPO_SLUG}" = "netdata/netdata" ]; then export DEPLOY_REPO="netdata-devel"; fi; + # These are release-related artifacts and have to be evaluated before we start doing conditional checks inside stages + - source ".travis/tagger.sh" + - export GIT_TAG="$(git tag --points-at)" + - git submodule update --init --recursive + + +# Setup notification system +# +notifications: + webhooks: + urls: + - https://app.fossa.io/hooks/travisci + +# Define the stage sequence and conditionals +# +stages: + # Mandatory runs, we always want these executed + - name: Build process + if: commit_message =~ /^((?!\[Package (amd64|arm64|i386) (DEB|RPM)( .*)?\]).)*$/ + + +# Define stage implementation details +# +jobs: + # This is a hook to help us introduce "soft" errors on our process + allow_failures: + - env: ALLOW_SOFT_FAILURE_HERE=true + include: + # Ensure netdata code builds successfully + - stage: Build process + + name: Standard netdata build + script: fakeroot ./netdata-installer.sh --install $HOME --dont-wait --dont-start-it --enable-plugin-nfacct --enable-plugin-freeipmi --disable-lto + env: CFLAGS='-O1 -Wall -Wextra -Wformat-signedness -fstack-protector-all -fno-common -DNETDATA_INTERNAL_CHECKS=1 -D_FORTIFY_SOURCE=2 -DNETDATA_VERIFY_LOCKS=1' + after_failure: post_message "TRAVIS_MESSAGE" " standard netdata build is failing (Still dont know which one, will improve soon)" diff --git a/.travis/README.md b/.travis/README.md new file mode 100644 index 0000000..8927dd4 --- /dev/null +++ b/.travis/README.md @@ -0,0 +1,149 @@ + + +# Description of CI build configuration + +## Variables needed by travis + +- GITHUB_TOKEN - GitHub token with push access to repository +- DOCKER_USERNAME - Username (netdatabot) with write access to docker hub repository +- DOCKER_PWD - Password to docker hub +- encrypted_8daf19481253_key - key needed by openssl to decrypt GCS credentials file +- encrypted_8daf19481253_iv - IV needed by openssl to decrypt GCS credentials file +- COVERITY_SCAN_TOKEN - Token to allow coverity test analysis uploads +- SLACK_USERNAME - This is required for the slack notifications triggered by travis pipeline +- SLACK_CHANNEL - This is the channel that Travis will be posting messages +- SLACK_NOTIFY_WEBHOOK_URL - This is the incoming URL webhook as provided by slack integration. Visit Apps integration in slack to generate the required hook +- SLACK_BOT_NAME - This is the name your bot will appear with on slack + +## CI workflow details +Our CI pipeline is designed to help us identify and mitigate risks at all stages of implementation. +To accommodate this need, we used [Travis CI](http://www.travis-ci.com) as our CI/CD tool. +Our main areas of concern are: +1) Only push code that is working. That means fail fast so that we can improve before we reach the public + +2) Reduce the time to market to minimum, by streamlining the release process. + That means a lot of testing, a lot of consistency checks, a lot of validations + +3) Generated artifacts consistency. We should not allow broken software to reach the public. + When this happens, it's embarrassing and we struggle to eliminate it. + +4) We are an innovative company, so we love to automate :) + + +Having said that, here's a brief introduction to Netdata's improved CI/CD pipeline with Travis. +Our CI/CD lifecycle contains three different execution entry points: +1) A user opens a pull request to netdata/master: Travis will run a pipeline on the branch under that PR +2) A merge or commit happens on netdata/master. This will trigger travis to run, but we have two distinct cases in this scenario: + a) A user merges a pull request to netdata/master: Travis will run on master, after the merge. + b) A user runs a commit/merge with a special keyword (mentioned later). + This triggers a release for either minor, major or release candidate versions, depending the keyword +3) A scheduled job runs on master once per day: Travis will run on master at the scheduled interval + +To accommodate all three entry points our CI/CD workflow has a set of steps that run on all three entry points. +Once all these steps are successful, then our pipeline executes another subset of steps for entry points 2 and 3. +In travis terms the "steps" are "Stages" and within each stage we execute a set of activities called "jobs" in travis. + +### Always run: Stages that running on all three execution entry points + +## Code quality, linting, syntax, code style +At this early stage we iterate through a set of basic quality control checks: +- Shell checking: Run linters for our various BASH scripts +- Checksum validators: Run validators to ensure our installers and documentation are in sync +- Dashboard validator: We provide a pre-generated dashboard.js script file that we need to make sure its up to date. We validate that. + +## Build process +At this stage, basically, we build :-) +We do a baseline check of our build artifacts to guarantee they are not broken +Briefly our activities include: +- Verify docker builds successfully +- Run the standard Netdata installer, to make sure we build & run properly +- Do the same through 'make dist', as this is our stable channel for our kickstart files + +## Artifacts validation +At this point we know our software is building, we need to go through the a set of checks, to guarantee +that our product meets certain expectations. At the current stage, we are focusing on basic capabilities +like installing in different distributions, running the full lifecycle of install-run-update-install and so on. +We are still working on enriching this with more and more use cases, to get us closer to achieving full stability of our software. +Briefly we currently evaluate the following activities: +- Basic software unit testing (only run when changes happen that require it) +- Non containerized build and install on ubuntu 14.04 +- Non containerized build and install on ubuntu 18.04 +- Running the full Netdata lifecycle (install, update, uninstall) on ubuntu 18.04 +- Build and install on CentOS 7 +(More to come) + +### Nightly operations: Stages that run daily under cronjob +The nightly stages are related to the daily nightly activities, that produce our daily latest releases. +We also maintain a couple of cronjobs that run during the night to provide us with deeper insights, +like for example coverity scanning or extended kickstart checksum checks + +## Nightly operations +At this stage we run scheduled jobs and execute the nightly changelog generator, coverity scans, +labeler for our issues and extended kickstart files checksum validations. + +## Nightly release +During this stage we are building and publishing latest docker images, prepare the nightly artifacts +and deploy them (the artifacts) to our google cloud service provider. + + +### Publishing +Publishing is responsible for executing the major/minor/patch releases and is separated +in two stages: packaging preparation process and publishing. + +## Packaging for release +During packaging we are preparing the release changelog information and run the labeler. + +## Publish for release +The publishing stage is the most complex part in publishing. This is the stage were we generate and publish docker images, +prepare the release artifacts and get ready with the release draft. + +### Package Management workflows +As part of our goal to provide the best support to our customers, we have created a set of CI workflows to automatically produce +DEB and RPM for multiple distributions. These workflows are implemented under the templated stages '_DEB_TEMPLATE' and '_RPM_TEMPLATE'. +We currently plan to actively support the following Operating Systems, with a plan to further expand this list following our users needs. + +### Operating systems supported +The following distributions are supported +- Debian versions + - Buster (TBD - not released yet, check [debian releases](https://www.debian.org/releases/) for details) + - Stretch + - Jessie + - Wheezy + +- Ubuntu versions + - Disco + - Cosmic + - Bionic + - artful + +- Enterprise Linux versions (Covers Red Hat, CentOS, and Amazon Linux with version 6) + - Version 8 (TBD) + - Version 7 + - Version 6 + +- Fedora versions + - Version 31 (TBD) + - Version 30 + - Version 29 + - Version 28 + +- openSUSE versions + - 15.1 + - 15.0 + +- Gentoo distributions + - TBD + +### Architectures supported +We plan to support amd64, x86 and arm64 architectures. As of June 2019 only amd64 and x86 will become available, as we are still working on solving issues with the architecture. + +The Package deployment can be triggered manually by executing an empty commit with the following message pattern: `[Package PACKAGE_TYPE PACKAGE_ARCH] DESCRIBE_THE_REASONING_HERE`. +Travis Yaml configuration allows the user to combine package type and architecture as necessary to regenerate the current stable release (For example tag v1.15.0 as of 4th of May 2019) +Sample patterns to trigger building of packages for all amd64 supported architecture: +- '[Package amd64 RPM]': Build & publish all amd64 available RPM packages +- '[Package amd64 DEB]': Build & publish all amd64 available DEB packages diff --git a/.travis/check_changelog_last_modification.sh b/.travis/check_changelog_last_modification.sh new file mode 100755 index 0000000..ae0da87 --- /dev/null +++ b/.travis/check_changelog_last_modification.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# This scriptlet validates nightlies age and notifies is if it gets too old +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pavlos Emm. Katsoulakis (paul@netdata.cloud) + +set -e + +# If we are not in netdata git repo, at the top level directory, fail +TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") +CWD=$(git rev-parse --show-cdup || echo "") +if [ -n "${CWD}" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then + echo "Run as .travis/$(basename "$0") from top level directory of netdata git repository" + echo "Changelog age checker exited abnormally" + exit 1 +fi + +source tests/installer/slack.sh || echo "I could not load slack library" + +LAST_MODIFICATION="$(git log -1 --pretty="format:%at" CHANGELOG.md)" +CURRENT_TIME="$(date +"%s")" +TWO_DAYS_IN_SECONDS=172800 + +DIFF=$((CURRENT_TIME - LAST_MODIFICATION)) + +echo "Checking CHANGELOG.md last modification time on GIT.." +echo "CHANGELOG.md timestamp: ${LAST_MODIFICATION}" +echo "Current timestamp: ${CURRENT_TIME}" +echo "Diff: ${DIFF}" + +if [ ${DIFF} -gt ${TWO_DAYS_IN_SECONDS} ]; then + echo "CHANGELOG.md is more than two days old!" + post_message "TRAVIS_MESSAGE" "Hi , CHANGELOG.md was found more than two days old (Diff: ${DIFF} seconds)" "${NOTIF_CHANNEL}" +else + echo "CHANGELOG.md is less than two days old, fine" +fi diff --git a/.travis/create_artifacts.sh b/.travis/create_artifacts.sh new file mode 100755 index 0000000..2742891 --- /dev/null +++ b/.travis/create_artifacts.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# +# Artifacts creation script. +# This script generates two things: +# 1) The static binary that can run on all linux distros (built-in dependencies etc) +# 2) The distribution source tarball +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author: Paul Emm. Katsoulakis +# +# shellcheck disable=SC2230 + +set -e + +# If we are not in netdata git repo, at the top level directory, fail +TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") +CWD=$(git rev-parse --show-cdup || echo "") +if [ -n "${CWD}" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then + echo "Run as .travis/$(basename "$0") from top level directory of netdata git repository" + exit 1 +fi + +if [ ! "${TRAVIS_REPO_SLUG}" == "netdata/netdata" ]; then + echo "Beta mode on ${TRAVIS_REPO_SLUG}, not running anything here" + exit 0 +fi + +echo "--- Initialize git configuration ---" +git checkout "${1-master}" +git pull + +if [ "${RELEASE_CHANNEL}" == stable ]; then + echo "--- Set default release channel to stable ---" + sed -i 's/^RELEASE_CHANNEL="nightly" *#/RELEASE_CHANNEL="stable" #/' \ + netdata-installer.sh \ + packaging/makeself/install-or-update.sh +fi + +# Everything from this directory will be uploaded to GCS +mkdir -p artifacts +BASENAME="netdata-$(git describe)" + +# Make sure stdout is in blocking mode. If we don't, then conda create will barf during downloads. +# See https://github.com/travis-ci/travis-ci/issues/4704#issuecomment-348435959 for details. +python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' +echo "--- Create tarball ---" +command -v git > /dev/null && [ -d .git ] && git clean -d -f +autoreconf -ivf +./configure --prefix=/usr --sysconfdir=/etc --localstatedir=/var --libexecdir=/usr/libexec --with-zlib --with-math --with-user=netdata CFLAGS=-O2 +make dist +mv "${BASENAME}.tar.gz" artifacts/ + +echo "--- Create self-extractor ---" +sxarches="x86_64 armv7l aarch64" +for arch in ${sxarches}; do + git clean -d -f + rm -rf packating/makeself/tmp + ./packaging/makeself/build-static.sh ${arch} +done + +# Needed for GCS +echo "--- Copy artifacts to separate directory ---" +#shellcheck disable=SC2164 +cp packaging/version artifacts/latest-version.txt +cd artifacts +ln -s "${BASENAME}.tar.gz" netdata-latest.tar.gz + +for arch in ${sxarches}; do + ln -s "netdata-${arch}-$(git describe).gz.run" netdata-${arch}-latest.gz.run +done + +ln -s "${BASENAME}.gz.run" netdata-latest.gz.run + +sha256sum -b ./* > "sha256sums.txt" +echo "checksums:" +cat sha256sums.txt diff --git a/.travis/create_changelog.sh b/.travis/create_changelog.sh new file mode 100755 index 0000000..83584aa --- /dev/null +++ b/.travis/create_changelog.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pavlos Emm. Katsoulakis (paul@netdata.cloud) +set -e + +# If we are not in netdata git repo, at the top level directory, fail +TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") +CWD=$(git rev-parse --show-cdup || echo "") +if [ -n "$CWD" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then + echo "Run as .travis/$(basename "$0") from top level directory of netdata git repository" + echo "Changelog creation aborted" + exit 1 +fi + +ORGANIZATION=$(echo "$TRAVIS_REPO_SLUG" | awk -F '/' '{print $1}') +PROJECT=$(echo "$TRAVIS_REPO_SLUG" | awk -F '/' '{print $2}') +GIT_MAIL=${GIT_MAIL:-"bot@netdata.cloud"} +GIT_USER=${GIT_USER:-"netdatabot"} + +if [ -z ${GIT_TAG+x} ]; then + OPTS="" +else + OPTS="--future-release ${GIT_TAG}" +fi + +if [ ! "${TRAVIS_REPO_SLUG}" == "netdata/netdata" ]; then + echo "Beta mode on ${TRAVIS_REPO_SLUG}, nothing else to do here" + exit 0 +fi + +echo "--- Creating changelog ---" +git checkout master +git pull + +docker login -u "${DOCKER_USERNAME}" -p "${DOCKER_PWD}" +docker run -it -v "$(pwd)":/project markmandel/github-changelog-generator:latest \ + --user "${ORGANIZATION}" \ + --project "${PROJECT}" \ + --token "${GITHUB_TOKEN}" \ + --since-tag "v1.10.0" \ + --unreleased-label "**Next release**" \ + --no-issues \ + --exclude-labels "stale,duplicate,question,invalid,wontfix,discussion,no changelog" \ + --max-issues 500 \ + --bug-labels IGNOREBUGS ${OPTS} diff --git a/.travis/current_build_status b/.travis/current_build_status new file mode 100644 index 0000000..11a6d0a --- /dev/null +++ b/.travis/current_build_status @@ -0,0 +1 @@ +changes-#18220 diff --git a/.travis/draft_release.sh b/.travis/draft_release.sh new file mode 100755 index 0000000..ddc0f9a --- /dev/null +++ b/.travis/draft_release.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Draft release generator. +# This utility is responsible for submitting a draft release to github repo +# It is agnostic of other processes, when executed it will draft a release, +# based on the most recent reachable tag. +# +# Requirements: +# - GITHUB_TOKEN variable set with GitHub token. Access level: repo.public_repo +# - artifacts directory in place +# - The directory is created by create_artifacts.sh mechanism +# - The artifacts need to be created with the same tag, obviously +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author: Pavlos Emm. Katsoulakis + +set -e + +if [ ! -f .gitignore ]; then + echo "Run as ./travis/$(basename "$0") from top level directory of git repository" + exit 1 +fi + +echo "--- Initialize git configuration ---" +git checkout master +git pull + + +if [[ $(git describe) =~ -rc* ]]; then + echo "This is a release candidate tag, we do not generate a release draft" + exit 0 +fi + +# Load the tag, if any +GIT_TAG=$(git describe) + +if [ ! "${TRAVIS_REPO_SLUG}" == "netdata/netdata" ]; then + echo "Beta mode on ${TRAVIS_REPO_SLUG}, i was about to run for release (${GIT_TAG}), but i am emulating, so bye" + exit 0 +fi; + +echo "---- CREATING RELEASE DRAFT WITH ASSETS -----" +# Download hub +HUB_VERSION=${HUB_VERSION:-"2.5.1"} +wget "https://github.com/github/hub/releases/download/v${HUB_VERSION}/hub-linux-amd64-${HUB_VERSION}.tgz" -O "/tmp/hub-linux-amd64-${HUB_VERSION}.tgz" +tar -C /tmp -xvf "/tmp/hub-linux-amd64-${HUB_VERSION}.tgz" +export PATH=$PATH:"/tmp/hub-linux-amd64-${HUB_VERSION}/bin" + +# Create a release draft +if [ -z ${GIT_TAG+x} ]; then + echo "Variable GIT_TAG is not set. Something went terribly wrong! Exiting." + exit 1 +fi +if [ "${GIT_TAG}" != "$(git tag --points-at)" ]; then + echo "ERROR! Current commit is not tagged. Stopping release creation." + exit 1 +fi +until hub release create --draft \ + -a "artifacts/netdata-${GIT_TAG}.tar.gz" \ + -a "artifacts/netdata-${GIT_TAG}.gz.run" \ + -a "artifacts/sha256sums.txt" \ + -m "${GIT_TAG}" "${GIT_TAG}"; do + sleep 5 +done diff --git a/.travis/gcs-credentials.json.enc b/.travis/gcs-credentials.json.enc new file mode 100644 index 0000000..5d1e7b2 Binary files /dev/null and b/.travis/gcs-credentials.json.enc differ diff --git a/.travis/generate_changelog_and_tag_release.sh b/.travis/generate_changelog_and_tag_release.sh new file mode 100755 index 0000000..bf5555b --- /dev/null +++ b/.travis/generate_changelog_and_tag_release.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# +# Script to automatically do a couple of things: +# - generate a new tag according to semver (https://semver.org/) +# - generate CHANGELOG.md by using https://github.com/skywinder/github-changelog-generator +# +# Tags are generated by searching for a keyword in last commit message. Keywords are: +# - [patch] or [fix] to bump patch number +# - [minor], [feature] or [feat] to bump minor number +# - [major] or [breaking change] to bump major number +# All keywords MUST be surrounded with square braces. +# +# Script uses git mechanisms for locking, so it can be used in parallel builds +# +# Requirements: +# - GITHUB_TOKEN variable set with GitHub token. Access level: repo.public_repo +# - docker +# +# This is a modified version of: +# https://github.com/paulfantom/travis-helper/blob/master/releasing/releaser.sh +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author: Pavlos Emm. Katsoulakis +# Author: Pawel Krupa (@paulfantom) +set -e + +if [ ! -f .gitignore ]; then + echo "Run as ./travis/$(basename "$0") from top level directory of git repository" + exit 1 +fi + +echo "--- Changelog generator script starting ---" +# If we dont have a produced TAG there is nothing to do, so bail out happy +if [ -z "${GIT_TAG}" ]; then + echo "GIT_TAG is empty, that is not suppose to happen (Value: $GIT_TAG)" + exit 1 +fi + +if [ ! "${TRAVIS_REPO_SLUG}" == "netdata/netdata" ]; then + echo "Beta mode on ${TRAVIS_REPO_SLUG}, nothing to do on the changelog generator and tagging script for (${GIT_TAG}), bye" + exit 0 +fi + +echo "--- Initialize git configuration ---" +export GIT_MAIL="bot@netdata.cloud" +export GIT_USER="netdatabot" +git checkout master +git pull + +echo "---- UPDATE VERSION FILE ----" +echo "$GIT_TAG" >packaging/version +git add packaging/version + +echo "---- Create CHANGELOG -----" +./.travis/create_changelog.sh +git add CHANGELOG.md + +echo "---- COMMIT AND PUSH CHANGES ----" +git commit -m "[ci skip] release $GIT_TAG" --author "${GIT_USER} <${GIT_MAIL}>" +git tag "$GIT_TAG" -a -m "Automatic tag generation for travis build no. $TRAVIS_BUILD_NUMBER" +git push "https://${GITHUB_TOKEN}:@$(git config --get remote.origin.url | sed -e 's/^https:\/\///')" +git push "https://${GITHUB_TOKEN}:@$(git config --get remote.origin.url | sed -e 's/^https:\/\///')" --tags +# After those operations output of command `git describe` should be identical with a value of GIT_TAG diff --git a/.travis/generate_changelog_for_nightlies.sh b/.travis/generate_changelog_for_nightlies.sh new file mode 100755 index 0000000..59173af --- /dev/null +++ b/.travis/generate_changelog_for_nightlies.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# +# Changelog generation scriptlet, for nightlies +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pawel Krupa (paulfantom) +# Author : Pavlos Emm. Katsoulakis (paul@netdata.cloud) +set -e + +# If we are not in netdata git repo, at the top level directory, fail +TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") +CWD=$(git rev-parse --show-cdup || echo "") +if [ -n "$CWD" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then + echo "Run as .travis/$(basename "$0") from top level directory of netdata git repository" + echo "Changelog generation process aborted" + exit 1 +fi + +LAST_TAG="$1" +COMMITS_SINCE_RELEASE="$2" +NEW_VERSION="${LAST_TAG}-$((COMMITS_SINCE_RELEASE + 1))-nightly" +GIT_MAIL=${GIT_MAIL:-"bot@netdata.cloud"} +GIT_USER=${GIT_USER:-"netdatabot"} +PUSH_URL=$(git config --get remote.origin.url | sed -e 's/^https:\/\///') +FAIL=0 + +if [ ! "${TRAVIS_REPO_SLUG}" == "netdata/netdata" ]; then + echo "Beta mode on ${TRAVIS_REPO_SLUG}, nothing else to do here" + exit 0 +fi + +echo "Running changelog creation mechanism" +.travis/create_changelog.sh + +echo "Changelog created! Adding packaging/version(${NEW_VERSION}) and CHANGELOG.md to the repository" +echo "${NEW_VERSION}" > packaging/version +git add packaging/version && echo "1) Added packaging/version to repository" || FAIL=1 +git add CHANGELOG.md && echo "2) Added changelog file to repository" || FAIL=1 +git commit -m '[ci skip] create nightly packages and update changelog' --author "${GIT_USER} <${GIT_MAIL}>" && echo "3) Committed changes to repository" || FAIL=1 +git push "https://${GITHUB_TOKEN}:@${PUSH_URL}" && echo "4) Pushed changes to remote ${PUSH_URL}" || FAIL=1 + +# In case of a failure, wrap it up and bail out cleanly +if [ $FAIL -eq 1 ]; then + git clean -xfd + echo "Changelog generation failed during github UPDATE!" + exit 1 +fi + +echo "Changelog generation completed successfully!" diff --git a/.travis/nightlies.sh b/.travis/nightlies.sh new file mode 100755 index 0000000..7240a78 --- /dev/null +++ b/.travis/nightlies.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# +# This is the nightly changelog generation script +# It is responsible for two major activities: +# 1) Update packaging/version with the current nightly version +# 2) Generate the changelog for the mentioned version +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pawel Krupa (paulfantom) +# Author : Pavlos Emm. Katsoulakis (paul@netdata.cloud) +set -e +FAIL=0 + +source tests/installer/slack.sh || echo "Could not load slack library" + +# If we are not in netdata git repo, at the top level directory, fail +TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") +CWD=$(git rev-parse --show-cdup || echo "") +if [ -n "${CWD}" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then + echo "Run as .travis/$(basename "$0") from top level directory of netdata git repository" + echo "Changelog generation process aborted" + exit 1 +fi + +LAST_TAG=$(git describe --abbrev=0 --tags) +COMMITS_SINCE_RELEASE=$(git rev-list "${LAST_TAG}"..HEAD --count) +PREVIOUS_NIGHTLY_COUNT="$(rev + +# Breaking changes + +- remove deprecated bash modules (`apache`, `cpu_apps`, `cpufreq`, `exim`, `hddtemp`, `load_average`, `mem_apps`, `mysql`, `nginx`, `phpfpm`, `postfix`, `squid`, `tomcat`) [[#7962](https://github.com/netdata/netdata/pull/7962)] + + diff --git a/BUILD.md b/BUILD.md new file mode 100644 index 0000000..83f7e99 --- /dev/null +++ b/BUILD.md @@ -0,0 +1,365 @@ + + +# The build system + +We are currently migrating from `autotools` to `CMake` as a build-system. This document +currently describes how we intend to perform this migration, and will be updated after +the migration to explain how the new `CMake` configuration works. + +## Stages during the build + +1. The `netdata-installer.sh`, take in arguments and environment settings to control the + build. +2. The configure step: `autoreconf -ivf ; ./configure` passing arguments into the configure + script. This becomes `generation-time` in CMake. This includes package / system detection + and configuration resulting in the `config.h` in the source root. +3. The build step: recurse through the generated Makefiles and build the executable. +4. The first install step: calls `make install` to handle all the install steps put into + the Makefiles by the configure step (puts binaries / libraries / config into target + tree structure). +5. The second install step: the rest of the installer after the make install handles + system-level configuration (privilege setting, user / groups, fetch/build/install `go.d` + plugins, telemetry, installing service for startup, uninstaller, auto-updates. + +The ideal migration result is to replace all of this with the following steps: +``` +mkdir build ; cd build ; cmake .. -D... ; cmake --build . --target install +``` + +The `-D...` indicates where the command-line arguments for configuration are passed into +`CMake`. + +## CMake generation time + +At generation time we need to solve the following issues: + +### Feature flags + +Every command-line switch on the installer and the configure script needs to becomes an +argument to the CMake generation, we can do this with variables in the CMake cache: + +CMakeLists.txt: +``` +option(ENABLE_DBENGINE "Enable the dbengine storage" ON) +... +if(${ENABLE_DBENGINE}) +... +endif() +``` + +Command-line interface +``` +cmake -DENABLE_DBENGINE +``` + +### Dependency detection + +We have a mixture of soft- and hard-dependencies on libraries. For most of these we expect +`pkg-config` information, for some we manually probe for libraries and include files. We +should treat all of the external dependencies consistently: + +1. Default to autodetect using `pkg-config` (e.g. the standard `jemalloc` drops a `.pc` + into the system but we do not check for it. +2. If no `.pc` is found perform a manual search for libraries under known names, and + check for accessible symbols inside them. +3. Check that include paths work. +4. Allow a command-line override (e.g. `-DWITH_JEMALLOC=/...`). +5. If none of the above work then fail the install if the dependency is hard, otherwise + indicate it is not present in the `config.h`. + +Before doing any dependency detection we need to determine which search paths are +really in use for the current compiler, after the `project` declaration we can use: +``` +execute_process(COMMAND ${CMAKE_C_COMPILER} "--print-search-dirs" + COMMAND grep "^libraries:" + COMMAND sed "s/^libraries: =//" + COMMAND tr ":" " " + COMMAND tr -d "\n" + OUTPUT_VARIABLE CC_SEARCH_DIRS + RESULTS_VARIABLE CC_SEARCH_RES) +string(REGEX MATCH "^[0-9]+" CC_SEARCH_RES ${CC_SEARCH_RES}) +#string(STRIP "${CC_SEARCH_RES}" CC_SEARCH_RES) +if(0 LESS ${CC_SEARCH_RES}) + message(STATUS "Warning - cannot determine standard compiler library paths") + # Note: we will probably need a different method for Windows... +endif() + +``` + +The output format for this switch works on both `Clang` and `gcc`, it also includes +the include search path, which can be extracted in a similar way. Standard advice here +is to list the `ldconfig` cache or use the `-V` flag to check, but this does not work +consistently across platforms - in particular `gcc` will reconfigure `ld` when it is +called to gcc's internal view of search paths. During experiments each of these +alternative missed / added unused paths. Dumping the compiler's own estimate of the +search paths seems to work consistently across clang/gcc/linux/freebsd configurations. + +The default behaviour in CMake is to search across predefined paths (e.g. `CMAKE_LIBRARY_PATH`) +that are based on heuristics about the current platform. Most projects using CMake seem +to overwrite this with their own estimates. + +We can use the extracted paths as a base, add our own heuristics based on OS and then +`set(CMAKE_LIBRARY_PATH ${OUR_OWN_LIB_SEARCH})` to get the best results. Roughly we do +the following for each external dependency: +``` +set(WITH_JSONC "Detect" CACHE STRING "Manually set the path to a json-c installation") +... +if(${WITH_JSONC} STREQUAL "Detect") + pkg_check_modules(JSONC json-c) # Don't set the REQUIRED flag + if(JSONC_FOUND) + message(STATUS "libjsonc found through .pc -> ${JSONC_CFLAGS_OTHER} ${JSONC_LIBRARIES}") + # ... setup using JSONC_CFLAGS_OTHER JSONC_LIBRARIES and JSONC_INCLUDE_DIRS + else() + find_library(LIB_JSONC + NAMES json-c libjson-c + PATHS ${CMAKE_LIBRARY_PATH}) # Includes our additions by this point + if(${LIB_JSONC} STREQUAL "LIB_JSONC-NOTFOUND") + message(STATUS "Library json-c not installed, disabling") + else() + check_library_exists(${LIB_JSONC} json_object_get_type "" HAVE_JSONC) + # ... setup using heuristics for CFLAGS and check include files are available + endif() + endif() +else() + # ... use explicit path as base to check for library and includes ... +endif() + +``` + +For checking the include path we have two options, if we overwrite the `CMAKE_`... variables +to change the internal search path we can use: +``` +CHECK_INCLUDE_FILE(json/json.h HAVE_JSONC_H) +``` +Or we can build a custom search path and then use: +``` +find_file(HAVE_JSONC_H json/json.h PATHS ${OUR_INCLUDE_PATHS}) +``` + +Note: we may have cases where there is no `.pc` but we have access to a `.cmake` (e.g. AWS SDK, mongodb,cmocka) - these need to be checked / pulled inside the repo while building a prototype. + +### Compiler compatibility checks + +In CMakeLists.txt: + +``` +CHECK_INCLUDE_FILE(sys/prctl.h HAVE_PRCTL_H) +configure_file(cmake/config.in config.h) +``` + +In cmake/config.in: + +``` +#cmakedefine HAVE_PRCTL_H 1 +``` + +If we want to check explicitly if something compiles (e.g. the accept4 check, or the +`strerror_r` typing issue) then we set the `CMAKE_`... paths and then use: +``` +check_c_source_compiles( + " + #include + int main() { char x = *strerror_r(0, &x, sizeof(x)); return 0; } + " + STRERROR_R_CHAR_P) + +``` +This produces a bool that we can use inside CMake or propagate into the `config.h`. + +We can handle the atomic checks with: +``` +check_c_source_compiles( + " + int main (int argc, char **argv) + { + volatile unsigned long ul1 = 1, ul2 = 0, ul3 = 2; + __atomic_load_n(&ul1, __ATOMIC_SEQ_CST); + __atomic_compare_exchange(&ul1, &ul2, &ul3, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&ul1, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_sub(&ul3, 1, __ATOMIC_SEQ_CST); + __atomic_or_fetch(&ul1, ul2, __ATOMIC_SEQ_CST); + __atomic_and_fetch(&ul1, ul2, __ATOMIC_SEQ_CST); + volatile unsigned long long ull1 = 1, ull2 = 0, ull3 = 2; + __atomic_load_n(&ull1, __ATOMIC_SEQ_CST); + __atomic_compare_exchange(&ull1, &ull2, &ull3, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&ull1, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_sub(&ull3, 1, __ATOMIC_SEQ_CST); + __atomic_or_fetch(&ull1, ull2, __ATOMIC_SEQ_CST); + __atomic_and_fetch(&ull1, ull2, __ATOMIC_SEQ_CST); + return 0; + } + " + HAVE_C__ATOMIC) +``` + +For the specific problem of getting the correct type signature in log.c for the `strerror_r` +calls we can replicate what we have now, or we can delete this code completely and use a +better solution that is documented [here](http://www.club.cc.cmu.edu/~cmccabe/blog_strerror.html). +To replicate what we have now: +``` +check_c_source_compiles( + " + #include + int main() { char x = *strerror_r(0, &x, sizeof(x)); return 0; } + " + STRERROR_R_CHAR_P) + +check_c_source_compiles( + " + #include + int main() { int x = strerror_r(0, &x, sizeof(x)); return 0; } + " + STRERROR_R_INT) + +if("${STRERROR_R_CHAR_P}" OR "${STRERROR_R_INT}") + set(HAVE_DECL_STRERROR_R 1) +endif() +message(STATUS "Result was ${HAVE_DECL_STRERROR_R}") + +``` + +Note: I did not find an explicit way to select compiler when both `clang` and `gcc` are +present. We might have an implicit way (like redirecting `cc`) but we should put one in. + + + +### Debugging problems in test compilations + +Test compilations attempt to feed a test-input into the targeted compiler and result +in a yes/no decision, this is similar to `AC_LANG_SOURCE(.... if test $ac_...` in .`m4`. +We have two techniques to use in CMake: +``` +cmake_minimum_required(VERSION 3.1.0) +include(CheckCCompilerFlag) +project(empty C) + +check_c_source_compiles( + " + #include + int main() { char x = *strerror_r(0, &x, sizeof(x)); return 0; } + " + STRERROR_R_CHAR_P) + +try_compile(HAVE_JEMALLOC ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/quickdemo.c + LINK_LIBRARIES jemalloc) +``` + +The `check_c_source_compiles` is light-weight: + +* Inline source for the test, easy to follow. +* Build errors are reported in `CMakeFiles/CMakeErrors.log` + +But we cannot alter the include-paths / library-paths / compiler-flags specifically for +the test without overwriting the current CMake settings. The alternative approach is +slightly more heavy-weight: + +* Can't inline source for `try_compile` - it requires a `.c` file in the tree. +* Build errors are not shown, the recovery process for them is somewhat difficult. + +``` +rm -rf * && cmake .. --debug-trycompile +grep jemal CMakeFiles/CMakeTmp/CMakeFiles/*dir/* +cd CMakeFiles/CMakeTmp/CMakeFiles/cmTC_d6f0e.dir # for example +cmake --build ../.. +``` + +This implies that we can do this to diagnose problems / develop test-programs, but we +have to make them *bullet-proof* as we cannot expose this to end-users. This means that +the results of the compilation must be *crisp* - exactly yes/no if the feature we are +testing is supported. + +### System configuration checks + +For any system configuration checks that fall outside of the above scope (includes, libraries, +packages, test-compilation checks) we have a fall-back that we can use to glue any holes +that we need, e.g. to pull out the packaging strings, inside the `CMakeLists.h`: +``` +execute_process(COMMAND cat ${CMAKE_CURRENT_SOURCE_DIR}/packaging/version + COMMAND tr -d '\n' + OUTPUT_VARIABLE VERSION_FROM_FILE) +message(STATUS "Packaging version ${VERSION_FROM_FILE}") +``` +and this in the `config.h.in`: +``` +#define VERSION_FROM_FILE "@VERSION_FROM_FILE@" +``` + +## CMake build time + +We have a working definition of the targets that is in use with CLion and works on modern +CMake (3.15). It breaks on older CMake version (e.g. 3.7) with an error message (issue#7091). +No PoC yet to fix this, but it looks like changing the target properties should do it (in the +worst case we can drop the separate object completely and merge the sources directly into +the final target). + +Steps needed for building a prototype: + +1. Pick a reasonable configuration. +2. Use the PoC techniques above to do a full generation of `CMAKE_` variables in the cache + according to the feature options and dependencies. +3. Push these into the project variables. +4. Work on it until the build succeeds in at least one known configuration. +5. Smoke-test that the output is valid (i.e. the executable loads and runs, and we can + access the dashboard). +6. Do a full comparison of the `config.h` generated by autotools against the CMake version + and document / fix any deviations. + +## CMake install target + +I've only looked at this superficially as we do not have a prototype yet, but each of the +first-stage install steps (in `make install`) and the second-stage (in `netdata-installer.sh`) +look feasible. + +## General issues + +* We need to choose a minimum CMake version that is an available package across all of our + supported environments. There is currently a build issue #7091 that documents a problem + in the compilation phase (we cannot link in libnetdata as an object on old CMake versions + and need to find a different way to express this). + +* The default variable-expansion / comparisons in CMake are awkward, we need this to make it + sane: + ``` + cmake_policy(SET CMP0054 "NEW") + ``` +* Default paths for libs / includes are not comprehensive on most environments, we still need + some heuristics for common locations, e.g. `/usr/local` on FreeBSD. + +# Recommendations + +We should follow these steps: + +1. Build a prototype. +2. Build a test-environment to check the prototype against environments / configurations that + the team uses. +3. Perform an "internal" release - merge the new CMake into master, but not announce it or + offer to support it. +4. Check it works for the team internally. +5. Do a soft-release: offer it externally as a replacement option for autotools. +6. Gather feedback and usage reports on a wider range of configurations. +7. Do a hard-release: switch over the preferred build-system in the installation instructions. +8. Gather feedback and usage reports on a wider range of configurations (again). +9. Deprecate / remove the autotools build-system completely (so that we can support a single + build-system). + +Some smaller miscellaneous suggestions: + +1. Remove the `_Generic` / `strerror_r` config to make the system simpler (use the technique + on the blog post to make the standard version re-entrant so that it is thread-safe). +2. Pull in jemalloc by source into the repo if it is our preferred malloc implementation. + +# Background + +* [Stack overflow starting point](https://stackoverflow.com/questions/7132862/how-do-i-convert-an-autotools-project-to-a-cmake-project#7680240) +* [CMake wiki including previous autotools conversions](https://gitlab.kitware.com/cmake/community/wikis/Home) +* [Commands section in old CMake docs](https://cmake.org/cmake/help/v2.8.8/cmake.html#section_Commands) +* [try_compile in newer CMake docs](https://cmake.org/cmake/help/v3.7/command/try_compile.html) +* [configure_file in newer CMake docs](https://cmake.org/cmake/help/v3.7/command/configure_file.html?highlight=configure_file) +* [header checks in CMake](https://stackoverflow.com/questions/647892/how-to-check-header-files-and-library-functions-in-cmake-like-it-is-done-in-auto) +* [how to write platform checks](https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks) + + diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7619153 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,589 @@ +# Changelog + +## [v1.37.1](https://github.com/netdata/netdata/tree/v1.37.1) (2022-12-05) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.37.0...v1.37.1) + +**Merged pull requests:** + +- fix v1.37 dbengine page alignment crashes [\#14086](https://github.com/netdata/netdata/pull/14086) ([ktsaou](https://github.com/ktsaou)) +- Fix \_\_atomic\_compare\_exchange\_n\(\) atomics [\#14085](https://github.com/netdata/netdata/pull/14085) ([ktsaou](https://github.com/ktsaou)) +- Fix 1.37 crashes [\#14081](https://github.com/netdata/netdata/pull/14081) ([stelfrag](https://github.com/stelfrag)) +- add basic dashboard info for NGINX Plus [\#14080](https://github.com/netdata/netdata/pull/14080) ([ilyam8](https://github.com/ilyam8)) +- replication fixes 9 [\#14079](https://github.com/netdata/netdata/pull/14079) ([ktsaou](https://github.com/ktsaou)) +- optimize workers statistics performance [\#14077](https://github.com/netdata/netdata/pull/14077) ([ktsaou](https://github.com/ktsaou)) +- fix SSL related crashes [\#14076](https://github.com/netdata/netdata/pull/14076) ([ktsaou](https://github.com/ktsaou)) +- remove python.d/springboot [\#14075](https://github.com/netdata/netdata/pull/14075) ([ilyam8](https://github.com/ilyam8)) +- fix backfilling statistics [\#14074](https://github.com/netdata/netdata/pull/14074) ([ktsaou](https://github.com/ktsaou)) +- Add workflow dispatch trigger for parent/child with cloud integration smoke tests [\#14070](https://github.com/netdata/netdata/pull/14070) ([dimko](https://github.com/dimko)) + +## [v1.37.0](https://github.com/netdata/netdata/tree/v1.37.0) (2022-11-30) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.36.1...v1.37.0) + +**Merged pull requests:** + +- fix spinlock [\#14068](https://github.com/netdata/netdata/pull/14068) ([ktsaou](https://github.com/ktsaou)) +- Sanitize command arguments. [\#14064](https://github.com/netdata/netdata/pull/14064) ([vkalintiris](https://github.com/vkalintiris)) +- Strict control of streaming API keys and MACHINE GUIDs in stream.conf [\#14063](https://github.com/netdata/netdata/pull/14063) ([ktsaou](https://github.com/ktsaou)) +- replication fixes No 8 [\#14061](https://github.com/netdata/netdata/pull/14061) ([ktsaou](https://github.com/ktsaou)) +- fix\(updater\): don't produce any output if static update completed successfully [\#14058](https://github.com/netdata/netdata/pull/14058) ([ilyam8](https://github.com/ilyam8)) +- Add Alpine 3.17 to supported distros. [\#14056](https://github.com/netdata/netdata/pull/14056) ([Ferroin](https://github.com/Ferroin)) +- replication fixes No 7 [\#14053](https://github.com/netdata/netdata/pull/14053) ([ktsaou](https://github.com/ktsaou)) +- bump go.d.plugin to v0.45.0 [\#14052](https://github.com/netdata/netdata/pull/14052) ([ilyam8](https://github.com/ilyam8)) +- Remove eBPF plugin warning [\#14047](https://github.com/netdata/netdata/pull/14047) ([thiagoftsm](https://github.com/thiagoftsm)) +- replication fixes \#6 [\#14046](https://github.com/netdata/netdata/pull/14046) ([ktsaou](https://github.com/ktsaou)) +- fix build on old openssl versions on centos [\#14045](https://github.com/netdata/netdata/pull/14045) ([underhood](https://github.com/underhood)) +- Don't let slow disk plugin thread delay shutdown [\#14044](https://github.com/netdata/netdata/pull/14044) ([MrZammler](https://github.com/MrZammler)) +- minor - wss to point to master instead of branch [\#14043](https://github.com/netdata/netdata/pull/14043) ([underhood](https://github.com/underhood)) +- fix dictionaries unittest [\#14042](https://github.com/netdata/netdata/pull/14042) ([ktsaou](https://github.com/ktsaou)) +- minor - Adds better information in case of SSL error [\#14041](https://github.com/netdata/netdata/pull/14041) ([underhood](https://github.com/underhood)) +- replication fixes \#5 [\#14038](https://github.com/netdata/netdata/pull/14038) ([ktsaou](https://github.com/ktsaou)) +- do not merge duplicate replication requests [\#14037](https://github.com/netdata/netdata/pull/14037) ([ktsaou](https://github.com/ktsaou)) +- Replication fixes \#3 [\#14035](https://github.com/netdata/netdata/pull/14035) ([ktsaou](https://github.com/ktsaou)) +- improve performance of worker utilization statistics [\#14034](https://github.com/netdata/netdata/pull/14034) ([ktsaou](https://github.com/ktsaou)) +- use 2 levels of judy arrays to speed up replication on very busy parents [\#14031](https://github.com/netdata/netdata/pull/14031) ([ktsaou](https://github.com/ktsaou)) +- bump go.d.plugin v0.44.0 [\#14030](https://github.com/netdata/netdata/pull/14030) ([ilyam8](https://github.com/ilyam8)) +- remove retries from SSL [\#14026](https://github.com/netdata/netdata/pull/14026) ([ktsaou](https://github.com/ktsaou)) +- Fix documentation TLS streaming [\#14024](https://github.com/netdata/netdata/pull/14024) ([thiagoftsm](https://github.com/thiagoftsm)) +- streaming compression, query planner and replication fixes [\#14023](https://github.com/netdata/netdata/pull/14023) ([ktsaou](https://github.com/ktsaou)) +- Change relative links to absolute for learn components [\#14015](https://github.com/netdata/netdata/pull/14015) ([tkatsoulas](https://github.com/tkatsoulas)) +- allow statsd tags to modify chart metadata on the fly [\#14014](https://github.com/netdata/netdata/pull/14014) ([ktsaou](https://github.com/ktsaou)) +- minor - silence misleading error [\#14013](https://github.com/netdata/netdata/pull/14013) ([underhood](https://github.com/underhood)) +- Improve eBPF exit [\#14012](https://github.com/netdata/netdata/pull/14012) ([thiagoftsm](https://github.com/thiagoftsm)) +- Change static image urls to app.netdata.cloud in alarm-notify.sh [\#14007](https://github.com/netdata/netdata/pull/14007) ([MrZammler](https://github.com/MrZammler)) +- Fix connection resets on big parents [\#14004](https://github.com/netdata/netdata/pull/14004) ([underhood](https://github.com/underhood)) +- minor typo in the uninstall command [\#14002](https://github.com/netdata/netdata/pull/14002) ([tkatsoulas](https://github.com/tkatsoulas)) +- Revert "New journal disk based indexing for agent memory reduction" [\#14000](https://github.com/netdata/netdata/pull/14000) ([ktsaou](https://github.com/ktsaou)) +- Revert "enable dbengine tiering by default" [\#13999](https://github.com/netdata/netdata/pull/13999) ([ktsaou](https://github.com/ktsaou)) +- fixes MQTT-NG QoS0 [\#13997](https://github.com/netdata/netdata/pull/13997) ([underhood](https://github.com/underhood)) +- remove python.d/nginx\_plus [\#13995](https://github.com/netdata/netdata/pull/13995) ([ilyam8](https://github.com/ilyam8)) +- bump go.d.plugin to v0.43.1 [\#13993](https://github.com/netdata/netdata/pull/13993) ([ilyam8](https://github.com/ilyam8)) +- Add 'funcs' capability [\#13992](https://github.com/netdata/netdata/pull/13992) ([underhood](https://github.com/underhood)) +- added debug info on left-over query targets [\#13990](https://github.com/netdata/netdata/pull/13990) ([ktsaou](https://github.com/ktsaou)) +- replication improvements [\#13989](https://github.com/netdata/netdata/pull/13989) ([ktsaou](https://github.com/ktsaou)) +- use calculator app instead of spreadsheet [\#13981](https://github.com/netdata/netdata/pull/13981) ([andrewm4894](https://github.com/andrewm4894)) +- apps.plugin function processes: keys should not have spaces [\#13980](https://github.com/netdata/netdata/pull/13980) ([ktsaou](https://github.com/ktsaou)) +- dont crash when netdata cannot execute its external plugins [\#13978](https://github.com/netdata/netdata/pull/13978) ([ktsaou](https://github.com/ktsaou)) +- Add \_total suffix to raw increment metrics for remote write [\#13977](https://github.com/netdata/netdata/pull/13977) ([vlvkobal](https://github.com/vlvkobal)) +- add Cassandra icon to dashboard info [\#13975](https://github.com/netdata/netdata/pull/13975) ([ilyam8](https://github.com/ilyam8)) +- Change the db-engine calculator to a read only gsheet [\#13974](https://github.com/netdata/netdata/pull/13974) ([tkatsoulas](https://github.com/tkatsoulas)) +- enable collecting ECC memory errors by default [\#13970](https://github.com/netdata/netdata/pull/13970) ([ilyam8](https://github.com/ilyam8)) +- break active-active loop from replicating non-existing child to each other [\#13968](https://github.com/netdata/netdata/pull/13968) ([ktsaou](https://github.com/ktsaou)) +- document password param for tor collector [\#13966](https://github.com/netdata/netdata/pull/13966) ([andrewm4894](https://github.com/andrewm4894)) +- Fallback to ar and ranlib if llvm-ar and llvm-ranlib are not there [\#13959](https://github.com/netdata/netdata/pull/13959) ([MrZammler](https://github.com/MrZammler)) +- require -DENABLE\_DLSYM=1 to use dlsym\(\) [\#13958](https://github.com/netdata/netdata/pull/13958) ([ktsaou](https://github.com/ktsaou)) +- health/ping: use 'host' label in alerts info [\#13955](https://github.com/netdata/netdata/pull/13955) ([ilyam8](https://github.com/ilyam8)) +- bump go.d.plugin to v0.43.0 [\#13954](https://github.com/netdata/netdata/pull/13954) ([ilyam8](https://github.com/ilyam8)) +- Fix local dashboard cloud links [\#13953](https://github.com/netdata/netdata/pull/13953) ([underhood](https://github.com/underhood)) +- Remove health\_thread\_stop [\#13948](https://github.com/netdata/netdata/pull/13948) ([MrZammler](https://github.com/MrZammler)) +- Provide improved messaging in the kickstart script for existing installs managed by the system package manager. [\#13947](https://github.com/netdata/netdata/pull/13947) ([Ferroin](https://github.com/Ferroin)) +- do not resend charts upstream when chart variables are being updated [\#13946](https://github.com/netdata/netdata/pull/13946) ([ktsaou](https://github.com/ktsaou)) +- recalculate last\_collected\_total [\#13945](https://github.com/netdata/netdata/pull/13945) ([ktsaou](https://github.com/ktsaou)) +- fix chart definition end time\_t printing and parsing [\#13942](https://github.com/netdata/netdata/pull/13942) ([ktsaou](https://github.com/ktsaou)) +- Setup default certificates path [\#13941](https://github.com/netdata/netdata/pull/13941) ([MrZammler](https://github.com/MrZammler)) +- update link to point at demo spaces [\#13939](https://github.com/netdata/netdata/pull/13939) ([andrewm4894](https://github.com/andrewm4894)) +- Statsd dictionaries should be multi-threaded [\#13938](https://github.com/netdata/netdata/pull/13938) ([ktsaou](https://github.com/ktsaou)) +- Fix oracle linux \(eBPF plugin\) [\#13935](https://github.com/netdata/netdata/pull/13935) ([thiagoftsm](https://github.com/thiagoftsm)) +- Update print message on startup [\#13934](https://github.com/netdata/netdata/pull/13934) ([andrewm4894](https://github.com/andrewm4894)) +- Rrddim acquire on replay set [\#13932](https://github.com/netdata/netdata/pull/13932) ([ktsaou](https://github.com/ktsaou)) +- fix compiling without dbengine [\#13931](https://github.com/netdata/netdata/pull/13931) ([ilyam8](https://github.com/ilyam8)) +- bump go.d.plugin to v0.42.1 [\#13930](https://github.com/netdata/netdata/pull/13930) ([ilyam8](https://github.com/ilyam8)) +- Remove pluginsd action param & dead code. [\#13928](https://github.com/netdata/netdata/pull/13928) ([vkalintiris](https://github.com/vkalintiris)) +- Do not force internal collectors to call rrdset\_next. [\#13926](https://github.com/netdata/netdata/pull/13926) ([vkalintiris](https://github.com/vkalintiris)) +- Return accidentaly removed 32bit RPi keep alive fix [\#13925](https://github.com/netdata/netdata/pull/13925) ([underhood](https://github.com/underhood)) +- error\_limit\(\) function to limit number of error lines per instance [\#13924](https://github.com/netdata/netdata/pull/13924) ([ktsaou](https://github.com/ktsaou)) +- fix crash on query plan switch [\#13920](https://github.com/netdata/netdata/pull/13920) ([ktsaou](https://github.com/ktsaou)) +- Enable aclk conversation log even without NETDATA\_INTERNAL CHECKS [\#13917](https://github.com/netdata/netdata/pull/13917) ([MrZammler](https://github.com/MrZammler)) +- add ping dashboard info and alarms [\#13916](https://github.com/netdata/netdata/pull/13916) ([ilyam8](https://github.com/ilyam8)) +- enable dbengine tiering by default [\#13914](https://github.com/netdata/netdata/pull/13914) ([ktsaou](https://github.com/ktsaou)) +- bump go.d.plugin to v0.42.0 [\#13913](https://github.com/netdata/netdata/pull/13913) ([ilyam8](https://github.com/ilyam8)) +- do not free hosts if a change on db mode is not needed [\#13912](https://github.com/netdata/netdata/pull/13912) ([ktsaou](https://github.com/ktsaou)) +- timeframe matching should take into account the update frequency of the chart [\#13911](https://github.com/netdata/netdata/pull/13911) ([ktsaou](https://github.com/ktsaou)) +- WMI Process \(Dashboard, Documentation\) [\#13910](https://github.com/netdata/netdata/pull/13910) ([thiagoftsm](https://github.com/thiagoftsm)) +- feat\(packaging\): add CAP\_NET\_RAW to go.d.plugin [\#13909](https://github.com/netdata/netdata/pull/13909) ([ilyam8](https://github.com/ilyam8)) +- Reference the bash collector for RPi [\#13907](https://github.com/netdata/netdata/pull/13907) ([cakrit](https://github.com/cakrit)) +- Improve intro paragraph [\#13906](https://github.com/netdata/netdata/pull/13906) ([cakrit](https://github.com/cakrit)) +- bump go.d.plugin v0.41.2 [\#13903](https://github.com/netdata/netdata/pull/13903) ([ilyam8](https://github.com/ilyam8)) +- apps.plugin function add max value on all value columns [\#13899](https://github.com/netdata/netdata/pull/13899) ([ktsaou](https://github.com/ktsaou)) +- Reduce unnecessary alert events to the cloud [\#13897](https://github.com/netdata/netdata/pull/13897) ([MrZammler](https://github.com/MrZammler)) +- add pandas collector to collectors.md [\#13895](https://github.com/netdata/netdata/pull/13895) ([andrewm4894](https://github.com/andrewm4894)) +- Fix reading health "enable" from the configuration [\#13894](https://github.com/netdata/netdata/pull/13894) ([stelfrag](https://github.com/stelfrag)) +- fix\(proc.plugin\): fix read retry logic when reading interface speed [\#13893](https://github.com/netdata/netdata/pull/13893) ([ilyam8](https://github.com/ilyam8)) +- Record installation command in telemetry events. [\#13892](https://github.com/netdata/netdata/pull/13892) ([Ferroin](https://github.com/Ferroin)) +- Prompt users about updates/claiming on unknown install types. [\#13890](https://github.com/netdata/netdata/pull/13890) ([Ferroin](https://github.com/Ferroin)) +- tune rrdcontext timings [\#13889](https://github.com/netdata/netdata/pull/13889) ([ktsaou](https://github.com/ktsaou)) +- filtering out charts in context queries, includes them in full\_xxx variables [\#13886](https://github.com/netdata/netdata/pull/13886) ([ktsaou](https://github.com/ktsaou)) +- New journal disk based indexing for agent memory reduction [\#13885](https://github.com/netdata/netdata/pull/13885) ([stelfrag](https://github.com/stelfrag)) +- Fix systemd chart update \(eBPF\) [\#13884](https://github.com/netdata/netdata/pull/13884) ([thiagoftsm](https://github.com/thiagoftsm)) +- apps.plugin function processes cosmetic changes [\#13880](https://github.com/netdata/netdata/pull/13880) ([ktsaou](https://github.com/ktsaou)) +- allow single chart to be filtered in context queries [\#13879](https://github.com/netdata/netdata/pull/13879) ([ktsaou](https://github.com/ktsaou)) +- Add description for TCP WMI [\#13878](https://github.com/netdata/netdata/pull/13878) ([thiagoftsm](https://github.com/thiagoftsm)) +- Use print macros [\#13876](https://github.com/netdata/netdata/pull/13876) ([MrZammler](https://github.com/MrZammler)) +- Suppress ML and dlib ABI warnings [\#13875](https://github.com/netdata/netdata/pull/13875) ([Dim-P](https://github.com/Dim-P)) +- bump go.d.plugin v0.41.1 [\#13874](https://github.com/netdata/netdata/pull/13874) ([ilyam8](https://github.com/ilyam8)) +- Replication of metrics \(gaps filling\) during streaming [\#13873](https://github.com/netdata/netdata/pull/13873) ([vkalintiris](https://github.com/vkalintiris)) +- Don't create a REMOVED alert event after a REMOVED. [\#13871](https://github.com/netdata/netdata/pull/13871) ([MrZammler](https://github.com/MrZammler)) +- Store hidden status when creating / updating dimension metadata [\#13869](https://github.com/netdata/netdata/pull/13869) ([stelfrag](https://github.com/stelfrag)) +- Find the chart and dimension UUID from the context [\#13868](https://github.com/netdata/netdata/pull/13868) ([stelfrag](https://github.com/stelfrag)) +- fix\(cgroup.plugin\): handle qemu-1- prefix when extracting virsh domain [\#13866](https://github.com/netdata/netdata/pull/13866) ([ilyam8](https://github.com/ilyam8)) +- Update step-09 for dbmode update.md [\#13864](https://github.com/netdata/netdata/pull/13864) ([DShreve2](https://github.com/DShreve2)) +- add ACLK access to ml\_info \(fix anomalies tab in cloud\) [\#13863](https://github.com/netdata/netdata/pull/13863) ([underhood](https://github.com/underhood)) +- bump go.d.plugin to v0.41.0 [\#13861](https://github.com/netdata/netdata/pull/13861) ([ilyam8](https://github.com/ilyam8)) +- app to api netdata cloud [\#13856](https://github.com/netdata/netdata/pull/13856) ([underhood](https://github.com/underhood)) +- Use llvm's ar and ranlib when compiling with clang [\#13854](https://github.com/netdata/netdata/pull/13854) ([MrZammler](https://github.com/MrZammler)) +- fix typo [\#13853](https://github.com/netdata/netdata/pull/13853) ([andrewm4894](https://github.com/andrewm4894)) +- Retry reading carrier, duplex, and speed files periodically [\#13850](https://github.com/netdata/netdata/pull/13850) ([vlvkobal](https://github.com/vlvkobal)) +- Properly guard commands when installing services for offline service managers. [\#13848](https://github.com/netdata/netdata/pull/13848) ([Ferroin](https://github.com/Ferroin)) +- fix tiers update frequency [\#13844](https://github.com/netdata/netdata/pull/13844) ([ktsaou](https://github.com/ktsaou)) +- Fix service installation on FreeBSD. [\#13842](https://github.com/netdata/netdata/pull/13842) ([Ferroin](https://github.com/Ferroin)) +- Cassandra dashboard description [\#13835](https://github.com/netdata/netdata/pull/13835) ([thiagoftsm](https://github.com/thiagoftsm)) +- Use mmap to read an extent from a datafile [\#13834](https://github.com/netdata/netdata/pull/13834) ([stelfrag](https://github.com/stelfrag)) +- chore\(health\): rm pihole\_blocklist\_gravity\_file\_existence\_state [\#13826](https://github.com/netdata/netdata/pull/13826) ([ilyam8](https://github.com/ilyam8)) +- Improve error and warning messages in the kickstart script. [\#13825](https://github.com/netdata/netdata/pull/13825) ([Ferroin](https://github.com/Ferroin)) +- Remove option to use MQTT 3 [\#13824](https://github.com/netdata/netdata/pull/13824) ([underhood](https://github.com/underhood)) +- extended processes function info from apps.plugin [\#13822](https://github.com/netdata/netdata/pull/13822) ([ktsaou](https://github.com/ktsaou)) +- Fix crash on child reconnect and lost metrics [\#13821](https://github.com/netdata/netdata/pull/13821) ([stelfrag](https://github.com/stelfrag)) +- Remove NFS readahead histogram [\#13819](https://github.com/netdata/netdata/pull/13819) ([vlvkobal](https://github.com/vlvkobal)) +- minor - add trace alloc to buildinfo [\#13817](https://github.com/netdata/netdata/pull/13817) ([underhood](https://github.com/underhood)) +- Fix exporting unit tests [\#13816](https://github.com/netdata/netdata/pull/13816) ([vlvkobal](https://github.com/vlvkobal)) +- Inject costallocz to mqtt\_websockets library and its children [\#13813](https://github.com/netdata/netdata/pull/13813) ([underhood](https://github.com/underhood)) +- overload libc memory allocators with custom ones to trace all allocations [\#13810](https://github.com/netdata/netdata/pull/13810) ([ktsaou](https://github.com/ktsaou)) +- bump go.d.plugin v0.40.4 [\#13808](https://github.com/netdata/netdata/pull/13808) ([ilyam8](https://github.com/ilyam8)) +- fix post-processing of contexts [\#13807](https://github.com/netdata/netdata/pull/13807) ([ktsaou](https://github.com/ktsaou)) +- Merge netstat, snmp, and snmp6 modules [\#13806](https://github.com/netdata/netdata/pull/13806) ([vlvkobal](https://github.com/vlvkobal)) +- fix warning when -Wfree-nonheap-object is used [\#13805](https://github.com/netdata/netdata/pull/13805) ([underhood](https://github.com/underhood)) +- ARAL optimal alloc size [\#13804](https://github.com/netdata/netdata/pull/13804) ([ktsaou](https://github.com/ktsaou)) +- internal log error, when passing NULL dictionary [\#13803](https://github.com/netdata/netdata/pull/13803) ([ktsaou](https://github.com/ktsaou)) +- Properly propagate errors from installer/updater to kickstart script. [\#13802](https://github.com/netdata/netdata/pull/13802) ([Ferroin](https://github.com/Ferroin)) +- Add up to date info on improving performance [\#13801](https://github.com/netdata/netdata/pull/13801) ([cakrit](https://github.com/cakrit)) +- Return memory freed properly [\#13799](https://github.com/netdata/netdata/pull/13799) ([stelfrag](https://github.com/stelfrag)) +- Use string\_freez instead of freez in rrdhost\_init\_timezone [\#13798](https://github.com/netdata/netdata/pull/13798) ([MrZammler](https://github.com/MrZammler)) +- Fix runtime directory ownership when installed as non-root user. [\#13797](https://github.com/netdata/netdata/pull/13797) ([Ferroin](https://github.com/Ferroin)) +- Fix minor typo in systemdunits.conf alert [\#13796](https://github.com/netdata/netdata/pull/13796) ([tkatsoulas](https://github.com/tkatsoulas)) +- Also enable rrdvars from health [\#13795](https://github.com/netdata/netdata/pull/13795) ([MrZammler](https://github.com/MrZammler)) +- feat\(python.d\): respect NETDATA\_INTERNALS\_MONITORING [\#13793](https://github.com/netdata/netdata/pull/13793) ([ilyam8](https://github.com/ilyam8)) +- Array Allocator Memory Leak Fix [\#13792](https://github.com/netdata/netdata/pull/13792) ([ktsaou](https://github.com/ktsaou)) +- Add variants of functions allowing callers to specify the time to use. [\#13791](https://github.com/netdata/netdata/pull/13791) ([vkalintiris](https://github.com/vkalintiris)) +- Remove extern from function declared in headers. [\#13790](https://github.com/netdata/netdata/pull/13790) ([vkalintiris](https://github.com/vkalintiris)) +- full memory tracking and profiling of Netdata Agent [\#13789](https://github.com/netdata/netdata/pull/13789) ([ktsaou](https://github.com/ktsaou)) +- allow disabling netdata monitoring section of the dashboard [\#13788](https://github.com/netdata/netdata/pull/13788) ([ktsaou](https://github.com/ktsaou)) +- Stop pulling in netcat as a mandatory dependency. [\#13787](https://github.com/netdata/netdata/pull/13787) ([Ferroin](https://github.com/Ferroin)) +- Initialize st-\>rrdvars from rrdset insert callback [\#13786](https://github.com/netdata/netdata/pull/13786) ([MrZammler](https://github.com/MrZammler)) +- Add Ubuntu 22.10 to supported distros, CI, and package builds. [\#13785](https://github.com/netdata/netdata/pull/13785) ([Ferroin](https://github.com/Ferroin)) +- minor - add host labels for ephemerality and nodes with unstable connections [\#13784](https://github.com/netdata/netdata/pull/13784) ([underhood](https://github.com/underhood)) +- Add a thread to asynchronously process metadata updates [\#13783](https://github.com/netdata/netdata/pull/13783) ([stelfrag](https://github.com/stelfrag)) +- Parser cleanup [\#13782](https://github.com/netdata/netdata/pull/13782) ([stelfrag](https://github.com/stelfrag)) +- allow netdata installer to install and run netdata as any user [\#13780](https://github.com/netdata/netdata/pull/13780) ([ktsaou](https://github.com/ktsaou)) +- Update libbpf 1.0.1 [\#13778](https://github.com/netdata/netdata/pull/13778) ([thiagoftsm](https://github.com/thiagoftsm)) +- Bump websockets submodule [\#13776](https://github.com/netdata/netdata/pull/13776) ([underhood](https://github.com/underhood)) +- Rename variable for old CentOS version [\#13775](https://github.com/netdata/netdata/pull/13775) ([thiagoftsm](https://github.com/thiagoftsm)) +- Further improvements to the new service installation code. [\#13774](https://github.com/netdata/netdata/pull/13774) ([Ferroin](https://github.com/Ferroin)) +- Pandas collector [\#13773](https://github.com/netdata/netdata/pull/13773) ([andrewm4894](https://github.com/andrewm4894)) +- dbengine free from RRDSET and RRDDIM [\#13772](https://github.com/netdata/netdata/pull/13772) ([ktsaou](https://github.com/ktsaou)) +- bump go.d.plugin v0.40.3 [\#13771](https://github.com/netdata/netdata/pull/13771) ([ilyam8](https://github.com/ilyam8)) +- Update fping plugin documentation with better details about the required version. [\#13765](https://github.com/netdata/netdata/pull/13765) ([Ferroin](https://github.com/Ferroin)) +- fix bad merge [\#13764](https://github.com/netdata/netdata/pull/13764) ([ktsaou](https://github.com/ktsaou)) +- Remove anomaly rates chart. [\#13763](https://github.com/netdata/netdata/pull/13763) ([vkalintiris](https://github.com/vkalintiris)) +- add 1m delay for tcp reset alarms [\#13761](https://github.com/netdata/netdata/pull/13761) ([ilyam8](https://github.com/ilyam8)) +- Use /bin/sh instead of ls to detect glibc [\#13758](https://github.com/netdata/netdata/pull/13758) ([MrZammler](https://github.com/MrZammler)) +- Add ZFS rate charts [\#13757](https://github.com/netdata/netdata/pull/13757) ([vlvkobal](https://github.com/vlvkobal)) +- Count currently streaming senders on the localhost [\#13755](https://github.com/netdata/netdata/pull/13755) ([MrZammler](https://github.com/MrZammler)) +- Fix streaming crash when child reconnects and is archived on the parent [\#13754](https://github.com/netdata/netdata/pull/13754) ([stelfrag](https://github.com/stelfrag)) +- Add CloudLinux OS detection to the updater script [\#13752](https://github.com/netdata/netdata/pull/13752) ([Pulseeey](https://github.com/Pulseeey)) +- Add CloudLinux OS detection to kickstart [\#13750](https://github.com/netdata/netdata/pull/13750) ([Pulseeey](https://github.com/Pulseeey)) +- bump go.d v0.40.2 [\#13747](https://github.com/netdata/netdata/pull/13747) ([ilyam8](https://github.com/ilyam8)) +- fix\(python.d\): set correct label source for \_collect\_job label [\#13746](https://github.com/netdata/netdata/pull/13746) ([ilyam8](https://github.com/ilyam8)) +- Provide Details on Label Filtering/Custom Labels [\#13745](https://github.com/netdata/netdata/pull/13745) ([DShreve2](https://github.com/DShreve2)) +- Fix handling of temporary directories in kickstart code. [\#13744](https://github.com/netdata/netdata/pull/13744) ([Ferroin](https://github.com/Ferroin)) +- Dont send NodeInfo during first database cleanup [\#13740](https://github.com/netdata/netdata/pull/13740) ([MrZammler](https://github.com/MrZammler)) +- CMake - add possibility to build without ACLK [\#13736](https://github.com/netdata/netdata/pull/13736) ([underhood](https://github.com/underhood)) +- Change cast to remove coverity warnings [\#13735](https://github.com/netdata/netdata/pull/13735) ([thiagoftsm](https://github.com/thiagoftsm)) +- Do not try to start an archived host in dbengine if dbengine is not compiled [\#13724](https://github.com/netdata/netdata/pull/13724) ([stelfrag](https://github.com/stelfrag)) +- Allow netdata plugins to expose functions for querying more information about specific charts [\#13720](https://github.com/netdata/netdata/pull/13720) ([ktsaou](https://github.com/ktsaou)) +- feat\(health\): add new Redis alarms [\#13715](https://github.com/netdata/netdata/pull/13715) ([ilyam8](https://github.com/ilyam8)) +- Health thread per host [\#13712](https://github.com/netdata/netdata/pull/13712) ([MrZammler](https://github.com/MrZammler)) +- Faster streaming by 25% on the child [\#13708](https://github.com/netdata/netdata/pull/13708) ([ktsaou](https://github.com/ktsaou)) +- Do not create train/predict dimensions meant for tracking anomaly rates. [\#13707](https://github.com/netdata/netdata/pull/13707) ([vkalintiris](https://github.com/vkalintiris)) +- Update exporting unit tests [\#13706](https://github.com/netdata/netdata/pull/13706) ([vlvkobal](https://github.com/vlvkobal)) +- bump go.d.plugin to v0.40.1 [\#13704](https://github.com/netdata/netdata/pull/13704) ([ilyam8](https://github.com/ilyam8)) +- Build judy even without dbengine [\#13703](https://github.com/netdata/netdata/pull/13703) ([underhood](https://github.com/underhood)) +- alarms collector: ability to exclude certain alarms via config [\#13701](https://github.com/netdata/netdata/pull/13701) ([andrewm4894](https://github.com/andrewm4894)) +- Fix inconsistent alert class names [\#13699](https://github.com/netdata/netdata/pull/13699) ([ralphm](https://github.com/ralphm)) +- disable Postgres last vacuum/analyze alarms [\#13698](https://github.com/netdata/netdata/pull/13698) ([ilyam8](https://github.com/ilyam8)) +- QUERY\_TARGET: new query engine for Netdata Agent [\#13697](https://github.com/netdata/netdata/pull/13697) ([ktsaou](https://github.com/ktsaou)) +- Update dashboard to version v2.29.1. [\#13696](https://github.com/netdata/netdata/pull/13696) ([netdatabot](https://github.com/netdatabot)) +- docs: nvidia-smi in a container limitation note [\#13695](https://github.com/netdata/netdata/pull/13695) ([ilyam8](https://github.com/ilyam8)) +- Use CMake generated config.h also in out of tree CMake build [\#13692](https://github.com/netdata/netdata/pull/13692) ([underhood](https://github.com/underhood)) +- Add info for Docker containers about using hostname from host. [\#13685](https://github.com/netdata/netdata/pull/13685) ([Ferroin](https://github.com/Ferroin)) +- add node level AR based example [\#13684](https://github.com/netdata/netdata/pull/13684) ([andrewm4894](https://github.com/andrewm4894)) +- Store nulls instead of empty strings in health tables [\#13683](https://github.com/netdata/netdata/pull/13683) ([MrZammler](https://github.com/MrZammler)) +- Fix warnings during compilation time on ARM \(32 bits\) [\#13681](https://github.com/netdata/netdata/pull/13681) ([thiagoftsm](https://github.com/thiagoftsm)) +- dictionary updated documentation and cosmetics [\#13679](https://github.com/netdata/netdata/pull/13679) ([ktsaou](https://github.com/ktsaou)) +- disable internal log [\#13678](https://github.com/netdata/netdata/pull/13678) ([ktsaou](https://github.com/ktsaou)) +- bump go.d.plugin v0.40.0 [\#13675](https://github.com/netdata/netdata/pull/13675) ([ilyam8](https://github.com/ilyam8)) +- remove \_instance\_family label [\#13674](https://github.com/netdata/netdata/pull/13674) ([ilyam8](https://github.com/ilyam8)) +- fix typo not deleting collected flag; force removing collected flag on child disconnect [\#13672](https://github.com/netdata/netdata/pull/13672) ([ktsaou](https://github.com/ktsaou)) +- feat\(health\): add Postgres alarms [\#13671](https://github.com/netdata/netdata/pull/13671) ([ilyam8](https://github.com/ilyam8)) +- add proxysql dashboard info [\#13669](https://github.com/netdata/netdata/pull/13669) ([ilyam8](https://github.com/ilyam8)) +- Additional sqlite statistics [\#13668](https://github.com/netdata/netdata/pull/13668) ([stelfrag](https://github.com/stelfrag)) +- Advance the buffer properly to scan the journal file [\#13666](https://github.com/netdata/netdata/pull/13666) ([stelfrag](https://github.com/stelfrag)) +- Add sqlite page cache hit and miss statistics [\#13665](https://github.com/netdata/netdata/pull/13665) ([stelfrag](https://github.com/stelfrag)) +- bump go.d.plugin to v0.39.0 [\#13662](https://github.com/netdata/netdata/pull/13662) ([ilyam8](https://github.com/ilyam8)) +- update Postgres dashboard info [\#13661](https://github.com/netdata/netdata/pull/13661) ([ilyam8](https://github.com/ilyam8)) +- Use mmap if possible during startup for journal replay [\#13660](https://github.com/netdata/netdata/pull/13660) ([stelfrag](https://github.com/stelfrag)) +- Remove anomaly detector [\#13657](https://github.com/netdata/netdata/pull/13657) ([vkalintiris](https://github.com/vkalintiris)) +- Update dashboard to version v2.29.0. [\#13654](https://github.com/netdata/netdata/pull/13654) ([netdatabot](https://github.com/netdatabot)) +- Fix container virtualization info [\#13653](https://github.com/netdata/netdata/pull/13653) ([vlvkobal](https://github.com/vlvkobal)) +- Do not free AR dimensions from within ML. [\#13651](https://github.com/netdata/netdata/pull/13651) ([vkalintiris](https://github.com/vkalintiris)) +- Remove Chart/Dim based communication [\#13650](https://github.com/netdata/netdata/pull/13650) ([underhood](https://github.com/underhood)) +- Improve agent shutdown time [\#13649](https://github.com/netdata/netdata/pull/13649) ([stelfrag](https://github.com/stelfrag)) +- add \_collect\_job label to python.d/\* charts [\#13648](https://github.com/netdata/netdata/pull/13648) ([ilyam8](https://github.com/ilyam8)) +- RRD structures managed by dictionaries [\#13646](https://github.com/netdata/netdata/pull/13646) ([ktsaou](https://github.com/ktsaou)) +- fix rrdcontexts left in the post-processing queue from the garbage collector [\#13645](https://github.com/netdata/netdata/pull/13645) ([ktsaou](https://github.com/ktsaou)) +- apps.plugin: Re-add `chrome` to the `webbrowser` group. [\#13642](https://github.com/netdata/netdata/pull/13642) ([Ferroin](https://github.com/Ferroin)) +- Fix a memory leak on archived host creation [\#13641](https://github.com/netdata/netdata/pull/13641) ([stelfrag](https://github.com/stelfrag)) +- fix compile issues [\#13640](https://github.com/netdata/netdata/pull/13640) ([ktsaou](https://github.com/ktsaou)) +- Obsolete RRDSET state [\#13635](https://github.com/netdata/netdata/pull/13635) ([ktsaou](https://github.com/ktsaou)) +- Updated tc.plugin \(linux bandwidth QoS\) [\#13634](https://github.com/netdata/netdata/pull/13634) ([ktsaou](https://github.com/ktsaou)) +- Fix worker utilization cleanup [\#13633](https://github.com/netdata/netdata/pull/13633) ([stelfrag](https://github.com/stelfrag)) +- remove forgotten avl structure from rrdcalc [\#13632](https://github.com/netdata/netdata/pull/13632) ([ktsaou](https://github.com/ktsaou)) +- Deaggregate the `gui` and `email` app groupx and improve GUI coverage. [\#13631](https://github.com/netdata/netdata/pull/13631) ([Ferroin](https://github.com/Ferroin)) +- Faster rrdcontext [\#13629](https://github.com/netdata/netdata/pull/13629) ([ktsaou](https://github.com/ktsaou)) +- Update uninstaller documentation. [\#13627](https://github.com/netdata/netdata/pull/13627) ([Ferroin](https://github.com/Ferroin)) +- eBPF different improvements [\#13624](https://github.com/netdata/netdata/pull/13624) ([thiagoftsm](https://github.com/thiagoftsm)) +- adjust systemdunits alarms [\#13623](https://github.com/netdata/netdata/pull/13623) ([ilyam8](https://github.com/ilyam8)) +- fix apps plugin users charts descriptipon [\#13621](https://github.com/netdata/netdata/pull/13621) ([ilyam8](https://github.com/ilyam8)) +- add Postgres total connection utilization alarm [\#13620](https://github.com/netdata/netdata/pull/13620) ([ilyam8](https://github.com/ilyam8)) +- update Postgres "connections" dashboard info [\#13619](https://github.com/netdata/netdata/pull/13619) ([ilyam8](https://github.com/ilyam8)) +- Assorted updates for apps\_groups.conf. [\#13618](https://github.com/netdata/netdata/pull/13618) ([Ferroin](https://github.com/Ferroin)) +- add spiceproxy to proxmox group [\#13615](https://github.com/netdata/netdata/pull/13615) ([ilyam8](https://github.com/ilyam8)) +- Improve coverage of Linux kernel threads in apps\_groups.conf [\#13612](https://github.com/netdata/netdata/pull/13612) ([Ferroin](https://github.com/Ferroin)) +- Clean chart hash map [\#13611](https://github.com/netdata/netdata/pull/13611) ([stelfrag](https://github.com/stelfrag)) +- Update dashboard to version v2.28.8. [\#13609](https://github.com/netdata/netdata/pull/13609) ([netdatabot](https://github.com/netdatabot)) +- Don't try to load db rows when chart\_id or dim\_id is null [\#13608](https://github.com/netdata/netdata/pull/13608) ([MrZammler](https://github.com/MrZammler)) +- Add info text for wal and update for checkpoints [\#13607](https://github.com/netdata/netdata/pull/13607) ([shyamvalsan](https://github.com/shyamvalsan)) +- bump go.d.plugin to v0.38.0 [\#13603](https://github.com/netdata/netdata/pull/13603) ([ilyam8](https://github.com/ilyam8)) +- Use prepared statements for context related queries [\#13602](https://github.com/netdata/netdata/pull/13602) ([stelfrag](https://github.com/stelfrag)) +- fix\(cgroups.plugin\): fix chart id length check [\#13601](https://github.com/netdata/netdata/pull/13601) ([ilyam8](https://github.com/ilyam8)) +- Temporary fix for command injection vulnerability in GHA workflow. [\#13600](https://github.com/netdata/netdata/pull/13600) ([Ferroin](https://github.com/Ferroin)) +- update logind dashboard info [\#13597](https://github.com/netdata/netdata/pull/13597) ([ilyam8](https://github.com/ilyam8)) +- Add link to the performance optimization guide [\#13595](https://github.com/netdata/netdata/pull/13595) ([cakrit](https://github.com/cakrit)) +- sqlite3 global statistics [\#13594](https://github.com/netdata/netdata/pull/13594) ([ktsaou](https://github.com/ktsaou)) +- feat\(python.d/nvidia\_smi\): collect power state [\#13580](https://github.com/netdata/netdata/pull/13580) ([ilyam8](https://github.com/ilyam8)) +- fix\(python.d/nvidia\_smi\): repsect update\_every for polling [\#13579](https://github.com/netdata/netdata/pull/13579) ([ilyam8](https://github.com/ilyam8)) +- prevent crash on rrdcontext apis when rrdcontexts is not initialized [\#13578](https://github.com/netdata/netdata/pull/13578) ([ktsaou](https://github.com/ktsaou)) +- CMake improvements part 1 [\#13575](https://github.com/netdata/netdata/pull/13575) ([underhood](https://github.com/underhood)) +- bump go.d.plugin to v0.37.2 [\#13574](https://github.com/netdata/netdata/pull/13574) ([ilyam8](https://github.com/ilyam8)) +- Updating info for postgreqsql metrics [\#13573](https://github.com/netdata/netdata/pull/13573) ([shyamvalsan](https://github.com/shyamvalsan)) +- add `apt` to `apps_groups.conf` [\#13571](https://github.com/netdata/netdata/pull/13571) ([andrewm4894](https://github.com/andrewm4894)) +- Deduplicate all netdata strings [\#13570](https://github.com/netdata/netdata/pull/13570) ([ktsaou](https://github.com/ktsaou)) +- eBPF cmake missing include dir [\#13568](https://github.com/netdata/netdata/pull/13568) ([underhood](https://github.com/underhood)) +- chore: removing logging that a chart collection in the same interpolation point [\#13567](https://github.com/netdata/netdata/pull/13567) ([ilyam8](https://github.com/ilyam8)) +- add more monitoring tools to `apps_groups.conf` [\#13566](https://github.com/netdata/netdata/pull/13566) ([andrewm4894](https://github.com/andrewm4894)) +- fix\(health.d/mysql\): adjust `mysql_galera_cluster_size_max_2m` lookup to make time in warn/crit predictable [\#13563](https://github.com/netdata/netdata/pull/13563) ([ilyam8](https://github.com/ilyam8)) +- Update dashboard to version v2.28.6. [\#13562](https://github.com/netdata/netdata/pull/13562) ([netdatabot](https://github.com/netdatabot)) +- Prefer context attributes from non archived charts [\#13559](https://github.com/netdata/netdata/pull/13559) ([MrZammler](https://github.com/MrZammler)) +- bump go.d.plugin to v0.37.1 [\#13555](https://github.com/netdata/netdata/pull/13555) ([ilyam8](https://github.com/ilyam8)) +- Fix coverity 380387 [\#13551](https://github.com/netdata/netdata/pull/13551) ([MrZammler](https://github.com/MrZammler)) +- add docker dashboard info [\#13547](https://github.com/netdata/netdata/pull/13547) ([ilyam8](https://github.com/ilyam8)) +- bump go.d.plugin to v0.37.0 [\#13546](https://github.com/netdata/netdata/pull/13546) ([ilyam8](https://github.com/ilyam8)) +- feat\(python.d/sensors\): discover chips, features at runtime [\#13545](https://github.com/netdata/netdata/pull/13545) ([ilyam8](https://github.com/ilyam8)) +- Remove aclk\_api.\[ch\] [\#13540](https://github.com/netdata/netdata/pull/13540) ([underhood](https://github.com/underhood)) +- Cleanup of APIs [\#13539](https://github.com/netdata/netdata/pull/13539) ([underhood](https://github.com/underhood)) +- bump go.d version to v0.36.0 [\#13538](https://github.com/netdata/netdata/pull/13538) ([ilyam8](https://github.com/ilyam8)) +- chore\(python.d\): rename dockerd job on lock registration [\#13537](https://github.com/netdata/netdata/pull/13537) ([ilyam8](https://github.com/ilyam8)) +- Update MacOS community support details [\#13536](https://github.com/netdata/netdata/pull/13536) ([DShreve2](https://github.com/DShreve2)) +- Fix a crash when xen libraries are misconfigured [\#13535](https://github.com/netdata/netdata/pull/13535) ([vlvkobal](https://github.com/vlvkobal)) +- Add summary dashboard for PostgreSQL [\#13534](https://github.com/netdata/netdata/pull/13534) ([shyamvalsan](https://github.com/shyamvalsan)) +- add `jupyter` to `apps_groups.conf` [\#13533](https://github.com/netdata/netdata/pull/13533) ([andrewm4894](https://github.com/andrewm4894)) +- Schedule next rotation based on absolute time [\#13531](https://github.com/netdata/netdata/pull/13531) ([MrZammler](https://github.com/MrZammler)) +- Improve PID monitoring \(step 2\) [\#13530](https://github.com/netdata/netdata/pull/13530) ([thiagoftsm](https://github.com/thiagoftsm)) +- fix\(health\): set default curl connection timeout if not set [\#13529](https://github.com/netdata/netdata/pull/13529) ([ilyam8](https://github.com/ilyam8)) +- Update FreeIPMI and CUPS plugin documentation. [\#13526](https://github.com/netdata/netdata/pull/13526) ([Ferroin](https://github.com/Ferroin)) +- Use LVM UUIDs in chart ids for logical volumes [\#13525](https://github.com/netdata/netdata/pull/13525) ([vlvkobal](https://github.com/vlvkobal)) +- fix\(cgroups.plugin\): use Docker API for name resolution when Docker is a snap package [\#13523](https://github.com/netdata/netdata/pull/13523) ([ilyam8](https://github.com/ilyam8)) +- remove reference to charts now in netdata monitoring [\#13521](https://github.com/netdata/netdata/pull/13521) ([andrewm4894](https://github.com/andrewm4894)) +- fix\(ci\): fix fetching tags in Build workflow [\#13517](https://github.com/netdata/netdata/pull/13517) ([ilyam8](https://github.com/ilyam8)) +- docs\(postfix\): add a note about `authorized_mailq_users` [\#13515](https://github.com/netdata/netdata/pull/13515) ([ilyam8](https://github.com/ilyam8)) +- Remove extra U from log message [\#13514](https://github.com/netdata/netdata/pull/13514) ([uplime](https://github.com/uplime)) +- Print rrdcontexts versions with PRIu64 [\#13511](https://github.com/netdata/netdata/pull/13511) ([MrZammler](https://github.com/MrZammler)) +- Calculate name hash after rrdvar\_fix\_name [\#13509](https://github.com/netdata/netdata/pull/13509) ([MrZammler](https://github.com/MrZammler)) +- fix\(packaging\): add CAP\_NET\_ADMIN for go.d.plugin [\#13507](https://github.com/netdata/netdata/pull/13507) ([ilyam8](https://github.com/ilyam8)) +- netdata.service: Update PIDFile to avoid systemd legacy path warning [\#13504](https://github.com/netdata/netdata/pull/13504) ([candrews](https://github.com/candrews)) +- chore\(python.d\): remove python.d/\* announced in v1.36.0 deprecation notice [\#13503](https://github.com/netdata/netdata/pull/13503) ([ilyam8](https://github.com/ilyam8)) +- Add Fedora 37 to CI and package builds. [\#13489](https://github.com/netdata/netdata/pull/13489) ([Ferroin](https://github.com/Ferroin)) +- Overhaul handling of installation of Netdata as a system service. [\#13451](https://github.com/netdata/netdata/pull/13451) ([Ferroin](https://github.com/Ferroin)) +- reduce memcpy and memory usage on mqtt5 [\#13450](https://github.com/netdata/netdata/pull/13450) ([underhood](https://github.com/underhood)) + +## [v1.36.1](https://github.com/netdata/netdata/tree/v1.36.1) (2022-08-15) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.36.0...v1.36.1) + +## [v1.36.0](https://github.com/netdata/netdata/tree/v1.36.0) (2022-08-10) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.35.1...v1.36.0) + +**Merged pull requests:** + +- rrdcontexts allow not linked dimensions and charts [\#13501](https://github.com/netdata/netdata/pull/13501) ([ktsaou](https://github.com/ktsaou)) +- docs: add deprecation notice to python.d/postgres readme [\#13497](https://github.com/netdata/netdata/pull/13497) ([ilyam8](https://github.com/ilyam8)) +- docs: change postgres links to go version [\#13496](https://github.com/netdata/netdata/pull/13496) ([ilyam8](https://github.com/ilyam8)) +- bump go.d version to v0.35.0 [\#13494](https://github.com/netdata/netdata/pull/13494) ([ilyam8](https://github.com/ilyam8)) +- add PgBouncer charts description and icon to dashboard info [\#13493](https://github.com/netdata/netdata/pull/13493) ([ilyam8](https://github.com/ilyam8)) +- Add chart\_context to alert snapshots [\#13492](https://github.com/netdata/netdata/pull/13492) ([MrZammler](https://github.com/MrZammler)) +- Remove prompt to add dashboard issues [\#13490](https://github.com/netdata/netdata/pull/13490) ([cakrit](https://github.com/cakrit)) +- docs: fix unresolved file references [\#13488](https://github.com/netdata/netdata/pull/13488) ([ilyam8](https://github.com/ilyam8)) +- docs: add a note about edit-config for docker installs [\#13487](https://github.com/netdata/netdata/pull/13487) ([ilyam8](https://github.com/ilyam8)) +- health: disable go python last collected alarms [\#13485](https://github.com/netdata/netdata/pull/13485) ([ilyam8](https://github.com/ilyam8)) +- bump go.d.plugin version to v0.34.0 [\#13484](https://github.com/netdata/netdata/pull/13484) ([ilyam8](https://github.com/ilyam8)) +- chore: add WireGuard description and icon to dashboard info [\#13483](https://github.com/netdata/netdata/pull/13483) ([ilyam8](https://github.com/ilyam8)) +- feat\(cgroups.plugin\): resolve nomad containers name \(docker driver only\) [\#13481](https://github.com/netdata/netdata/pull/13481) ([ilyam8](https://github.com/ilyam8)) +- Check for protected when excluding mounts [\#13479](https://github.com/netdata/netdata/pull/13479) ([MrZammler](https://github.com/MrZammler)) +- update postgres dashboard info [\#13474](https://github.com/netdata/netdata/pull/13474) ([ilyam8](https://github.com/ilyam8)) +- Remove the single threaded arrayallocator optiomization during agent startup [\#13473](https://github.com/netdata/netdata/pull/13473) ([stelfrag](https://github.com/stelfrag)) +- Handle cases where entries where stored as text \(with strftime\("%s"\)\) [\#13472](https://github.com/netdata/netdata/pull/13472) ([stelfrag](https://github.com/stelfrag)) +- Enable rrdcontexts by default [\#13471](https://github.com/netdata/netdata/pull/13471) ([stelfrag](https://github.com/stelfrag)) +- Fix cgroup name detection for docker containers in containerd cgroup [\#13470](https://github.com/netdata/netdata/pull/13470) ([xkisu](https://github.com/xkisu)) +- Trimmed-median, trimmed-mean and percentile [\#13469](https://github.com/netdata/netdata/pull/13469) ([ktsaou](https://github.com/ktsaou)) +- rrdcontext support for hidden charts [\#13466](https://github.com/netdata/netdata/pull/13466) ([ktsaou](https://github.com/ktsaou)) +- Load host labels for archived hosts [\#13464](https://github.com/netdata/netdata/pull/13464) ([stelfrag](https://github.com/stelfrag)) +- fix\(python.d/smartd\_log\): handle log rotation [\#13460](https://github.com/netdata/netdata/pull/13460) ([ilyam8](https://github.com/ilyam8)) +- docs: add a note about network interface monitoring when running in a Docker container [\#13458](https://github.com/netdata/netdata/pull/13458) ([ilyam8](https://github.com/ilyam8)) +- fix a guide so we can reference it's subsections [\#13455](https://github.com/netdata/netdata/pull/13455) ([tkatsoulas](https://github.com/tkatsoulas)) +- Revert "Query queue only for queries" [\#13452](https://github.com/netdata/netdata/pull/13452) ([stelfrag](https://github.com/stelfrag)) +- /api/v1/weights endpoint [\#13449](https://github.com/netdata/netdata/pull/13449) ([ktsaou](https://github.com/ktsaou)) +- Get last\_entry\_t only when st changes [\#13448](https://github.com/netdata/netdata/pull/13448) ([MrZammler](https://github.com/MrZammler)) +- additional stats [\#13445](https://github.com/netdata/netdata/pull/13445) ([ktsaou](https://github.com/ktsaou)) +- Store host label information in the metadata database [\#13441](https://github.com/netdata/netdata/pull/13441) ([stelfrag](https://github.com/stelfrag)) +- Fix typo in PostgreSQL section header [\#13440](https://github.com/netdata/netdata/pull/13440) ([shyamvalsan](https://github.com/shyamvalsan)) +- Fix tests so that the actual metadata database is not accessed [\#13439](https://github.com/netdata/netdata/pull/13439) ([stelfrag](https://github.com/stelfrag)) +- Delete aclk\_alert table on start streaming from seq 1 batch 1 [\#13438](https://github.com/netdata/netdata/pull/13438) ([MrZammler](https://github.com/MrZammler)) +- Fix agent crash when archived host has not been registered to the cloud [\#13437](https://github.com/netdata/netdata/pull/13437) ([stelfrag](https://github.com/stelfrag)) +- Dont duplicate buffered bytes [\#13435](https://github.com/netdata/netdata/pull/13435) ([vlvkobal](https://github.com/vlvkobal)) +- Show last 15 alerts in notification [\#13434](https://github.com/netdata/netdata/pull/13434) ([MrZammler](https://github.com/MrZammler)) +- Query queue only for queries [\#13431](https://github.com/netdata/netdata/pull/13431) ([underhood](https://github.com/underhood)) + +## [v1.35.1](https://github.com/netdata/netdata/tree/v1.35.1) (2022-06-10) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.35.0...v1.35.1) + +## [v1.35.0](https://github.com/netdata/netdata/tree/v1.35.0) (2022-06-08) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.34.1...v1.35.0) + +## [v1.34.1](https://github.com/netdata/netdata/tree/v1.34.1) (2022-04-15) + +[Full Changelog](https://github.com/netdata/netdata/compare/1.34.0...v1.34.1) + +## [1.34.0](https://github.com/netdata/netdata/tree/1.34.0) (2022-04-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.34.0...1.34.0) + +## [v1.34.0](https://github.com/netdata/netdata/tree/v1.34.0) (2022-04-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.33.1...v1.34.0) + +## [v1.33.1](https://github.com/netdata/netdata/tree/v1.33.1) (2022-02-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.33.0...v1.33.1) + +## [v1.33.0](https://github.com/netdata/netdata/tree/v1.33.0) (2022-01-26) + +[Full Changelog](https://github.com/netdata/netdata/compare/1.32.1...v1.33.0) + +## [1.32.1](https://github.com/netdata/netdata/tree/1.32.1) (2021-12-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.32.1...1.32.1) + +## [v1.32.1](https://github.com/netdata/netdata/tree/v1.32.1) (2021-12-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.32.0...v1.32.1) + +## [v1.32.0](https://github.com/netdata/netdata/tree/v1.32.0) (2021-11-30) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.31.0...v1.32.0) + +## [v1.31.0](https://github.com/netdata/netdata/tree/v1.31.0) (2021-05-19) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.30.1...v1.31.0) + +## [v1.30.1](https://github.com/netdata/netdata/tree/v1.30.1) (2021-04-12) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.30.0...v1.30.1) + +## [v1.30.0](https://github.com/netdata/netdata/tree/v1.30.0) (2021-03-31) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.29.3...v1.30.0) + +## [v1.29.3](https://github.com/netdata/netdata/tree/v1.29.3) (2021-02-23) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.29.2...v1.29.3) + +## [v1.29.2](https://github.com/netdata/netdata/tree/v1.29.2) (2021-02-18) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.29.1...v1.29.2) + +## [v1.29.1](https://github.com/netdata/netdata/tree/v1.29.1) (2021-02-09) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.29.0...v1.29.1) + +## [v1.29.0](https://github.com/netdata/netdata/tree/v1.29.0) (2021-02-03) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.27.0_0104103941...v1.29.0) + +## [v1.27.0_0104103941](https://github.com/netdata/netdata/tree/v1.27.0_0104103941) (2021-01-04) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.28.0...v1.27.0_0104103941) + +## [v1.28.0](https://github.com/netdata/netdata/tree/v1.28.0) (2020-12-18) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.27.0...v1.28.0) + +## [v1.27.0](https://github.com/netdata/netdata/tree/v1.27.0) (2020-12-17) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.26.0...v1.27.0) + +## [v1.26.0](https://github.com/netdata/netdata/tree/v1.26.0) (2020-10-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/before_rebase...v1.26.0) + +## [before_rebase](https://github.com/netdata/netdata/tree/before_rebase) (2020-09-24) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.25.0...before_rebase) + +## [v1.25.0](https://github.com/netdata/netdata/tree/v1.25.0) (2020-09-15) + +[Full Changelog](https://github.com/netdata/netdata/compare/poc2...v1.25.0) + +## [poc2](https://github.com/netdata/netdata/tree/poc2) (2020-08-25) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.24.0...poc2) + +## [v1.24.0](https://github.com/netdata/netdata/tree/v1.24.0) (2020-08-10) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.23.2...v1.24.0) + +## [v1.23.2](https://github.com/netdata/netdata/tree/v1.23.2) (2020-07-16) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.23.1_infiniband...v1.23.2) + +## [v1.23.1_infiniband](https://github.com/netdata/netdata/tree/v1.23.1_infiniband) (2020-07-03) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.23.1...v1.23.1_infiniband) + +## [v1.23.1](https://github.com/netdata/netdata/tree/v1.23.1) (2020-07-01) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.23.0...v1.23.1) + +## [v1.23.0](https://github.com/netdata/netdata/tree/v1.23.0) (2020-06-25) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.22.1...v1.23.0) + +## [v1.22.1](https://github.com/netdata/netdata/tree/v1.22.1) (2020-05-12) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.22.0...v1.22.1) + +## [v1.22.0](https://github.com/netdata/netdata/tree/v1.22.0) (2020-05-11) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.21.1...v1.22.0) + +## [v1.21.1](https://github.com/netdata/netdata/tree/v1.21.1) (2020-04-13) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.21.0...v1.21.1) + +## [v1.21.0](https://github.com/netdata/netdata/tree/v1.21.0) (2020-04-06) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.20.0...v1.21.0) + +## [v1.20.0](https://github.com/netdata/netdata/tree/v1.20.0) (2020-02-21) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.19.0...v1.20.0) + +## [v1.19.0](https://github.com/netdata/netdata/tree/v1.19.0) (2019-11-27) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.18.1...v1.19.0) + +## [v1.18.1](https://github.com/netdata/netdata/tree/v1.18.1) (2019-10-18) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.18.0...v1.18.1) + +## [v1.18.0](https://github.com/netdata/netdata/tree/v1.18.0) (2019-10-10) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.17.1...v1.18.0) + +## [v1.17.1](https://github.com/netdata/netdata/tree/v1.17.1) (2019-09-12) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.17.0...v1.17.1) + +## [v1.17.0](https://github.com/netdata/netdata/tree/v1.17.0) (2019-09-03) + +[Full Changelog](https://github.com/netdata/netdata/compare/issue_4934...v1.17.0) + +## [issue_4934](https://github.com/netdata/netdata/tree/issue_4934) (2019-08-03) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.16.1...issue_4934) + +## [v1.16.1](https://github.com/netdata/netdata/tree/v1.16.1) (2019-07-31) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.16.0...v1.16.1) + +## [v1.16.0](https://github.com/netdata/netdata/tree/v1.16.0) (2019-07-08) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.15.0...v1.16.0) + +## [v1.15.0](https://github.com/netdata/netdata/tree/v1.15.0) (2019-05-22) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.14.0...v1.15.0) + +## [v1.14.0](https://github.com/netdata/netdata/tree/v1.14.0) (2019-04-18) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.14.0-rc0...v1.14.0) + +## [v1.14.0-rc0](https://github.com/netdata/netdata/tree/v1.14.0-rc0) (2019-03-30) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.13.0...v1.14.0-rc0) + +## [v1.13.0](https://github.com/netdata/netdata/tree/v1.13.0) (2019-03-14) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.2...v1.13.0) + +## [v1.12.2](https://github.com/netdata/netdata/tree/v1.12.2) (2019-02-28) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.1...v1.12.2) + +## [v1.12.1](https://github.com/netdata/netdata/tree/v1.12.1) (2019-02-21) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.0...v1.12.1) + +## [v1.12.0](https://github.com/netdata/netdata/tree/v1.12.0) (2019-02-06) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.0-rc3...v1.12.0) + +## [v1.12.0-rc3](https://github.com/netdata/netdata/tree/v1.12.0-rc3) (2019-01-17) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.0-rc2...v1.12.0-rc3) + +## [v1.12.0-rc2](https://github.com/netdata/netdata/tree/v1.12.0-rc2) (2019-01-03) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.0-rc1...v1.12.0-rc2) + +## [v1.12.0-rc1](https://github.com/netdata/netdata/tree/v1.12.0-rc1) (2018-12-19) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.12.0-rc0...v1.12.0-rc1) + +## [v1.12.0-rc0](https://github.com/netdata/netdata/tree/v1.12.0-rc0) (2018-12-06) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.11.1...v1.12.0-rc0) + +## [v1.11.1](https://github.com/netdata/netdata/tree/v1.11.1) (2018-11-22) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.11.0...v1.11.1) + +## [v1.11.0](https://github.com/netdata/netdata/tree/v1.11.0) (2018-11-02) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.10.0...v1.11.0) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c12e9c8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,1802 @@ + +# SPDX-License-Identifier: GPL-3.0-or-later +# This file is only used for development (netdata in Clion) +# It can build netdata, but you are on your own... + +cmake_minimum_required(VERSION 3.1.0) +project(netdata C CXX) + +find_package(Threads REQUIRED) +find_package(PkgConfig REQUIRED) + +include(CheckFunctionExists) +include(CheckLibraryExists) + +# default is "Debug" +#set(CMAKE_BUILD_TYPE "Release") + +# set this to see the compilation commands +#set(CMAKE_VERBOSE_MAKEFILE 1) + +# ----------------------------------------------------------------------------- +# Set compilation options according to build type + +set(CMAKE_C_STANDARD 11) + +IF("${CMAKE_BUILD_TYPE}" MATCHES "Debug") + set(CXX_DEFAULT_CFLAGS "-O0 -g -DNETDATA_INTERNAL_CHECKS=1 -DNETDATA_DEV_MODE=1 -fstack-protector-all -fno-omit-frame-pointer") + message(STATUS "building for: debugging") +ELSE() + set(CXX_DEFAULT_CFLAGS "-O2") + message(STATUS "building for: release") + cmake_policy(SET CMP0069 "NEW") + include(CheckIPOSupported) + check_ipo_supported(RESULT ipo_supported OUTPUT error) + IF(${ipo_supported}) + message(STATUS "link time optimization: supported") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + ELSE() + message(STATUS "link time optimization: not supported") + ENDIF() +ENDIF() + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O1 -ggdb -Wall -Wextra -Wformat-signedness -Werror=format-security -Wunused-result ${CXX_DEFAULT_CFLAGS}") + +# ----------------------------------------------------------------------------- +# O/S Detection + +# these are defined in common.h too +SET(LINUX False) +SET(FREEBSD False) +SET(MACOS False) + +# Detect the operating system +IF(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + SET(MACOS True) +ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + SET(FREEBSD True) +ELSE() + SET(LINUX True) +ENDIF() + +# show the operating system on the console +message(STATUS "system name: ${CMAKE_SYSTEM_NAME}") + +set(GENERATED_CONFIG_H_DIR ${CMAKE_BINARY_DIR}) +set(GENERATED_CONFIG_H ${GENERATED_CONFIG_H_DIR}/config.h) + +# ----------------------------------------------------------------------------- +# Math + +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} m) +set(NETDATA_REQUIRED_DEFINES "-DSTORAGE_WITH_MATH=1 ${NETDATA_REQUIRED_DEFINES}") + +# ----------------------------------------------------------------------------- +# Detect libuuid + +pkg_check_modules(UUID REQUIRED uuid) +set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${UUID_CFLAGS_OTHER}) +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${UUID_LIBRARIES}) +set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${UUID_INCLUDE_DIRS}) + +# ----------------------------------------------------------------------------- +# Detect libz + +pkg_check_modules(ZLIB REQUIRED zlib) +set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${ZLIB_CFLAGS_OTHER}) +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${ZLIB_LIBRARIES}) +set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS}) +# set(NETDATA_REQUIRED_DEFINES "${NETDATA_REQUIRED_DEFINES} -DNETDATA_WITH_ZLIB=1") + +# ----------------------------------------------------------------------------- +# libuv multi-platform support library with a focus on asynchronous I/O + +pkg_check_modules(LIBUV REQUIRED libuv) +set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${LIBUV_CFLAGS_OTHER}) +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${LIBUV_LIBRARIES}) +set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${LIBUV_INCLUDE_DIRS}) + +# ----------------------------------------------------------------------------- +# lz4 Extremely Fast Compression algorithm + +pkg_check_modules(LIBLZ4 REQUIRED liblz4) +set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${LIBLZ4_CFLAGS_OTHER}) +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${LIBLZ4_LIBRARIES}) +set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${LIBLZ4_INCLUDE_DIRS}) +# set(NETDATA_REQUIRED_DEFINES "${NETDATA_REQUIRED_DEFINES} -DENABLE_COMPRESSION=1") + +# ----------------------------------------------------------------------------- +# Judy General purpose dynamic array + +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} judy) + +# ----------------------------------------------------------------------------- +# OpenSSL Cryptography and SSL/TLS Toolkit + +pkg_check_modules(OPENSSL REQUIRED openssl) +set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${OPENSSL_CFLAGS_OTHER}) +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${OPENSSL_LIBRARIES}) +set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIRS}) +# set(NETDATA_REQUIRED_DEFINES "${NETDATA_REQUIRED_DEFINES} -DENABLE_HTTPS=1") + +# ----------------------------------------------------------------------------- +# JSON-C used to health + +pkg_check_modules(JSON REQUIRED json-c) +set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${JSONC_CFLAGS_OTHER}) +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${JSON_LIBRARIES}) +set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${JSON_INCLUDE_DIRS}) + + +# ----------------------------------------------------------------------------- +# Detect libcap + +IF(LINUX) + pkg_check_modules(CAP QUIET libcap) + # later we use: + # ${CAP_LIBRARIES} + # ${CAP_CFLAGS_OTHER} + # ${CAP_INCLUDE_DIRS} +ENDIF(LINUX) + + +# ----------------------------------------------------------------------------- +# Detect libipmimonitoring + +IF(LINUX) + pkg_check_modules(IPMI libipmimonitoring) + # later we use: + # ${IPMI_LIBRARIES} + # ${IPMI_CFLAGS_OTHER} + # ${IPMI_INCLUDE_DIRS} +ENDIF(LINUX) + + +# ----------------------------------------------------------------------------- +# Detect libmnl +IF(LINUX) + pkg_check_modules(MNL libmnl) + # later we use: + # ${MNL_LIBRARIES} + # ${MNL_CFLAGS_OTHER} + # ${MNL_INCLUDE_DIRS} +ENDIF(LINUX) + + +# ----------------------------------------------------------------------------- +# Detect libmnl + +pkg_check_modules(NFACCT libnetfilter_acct) +# later we use: +# ${NFACCT_LIBRARIES} +# ${NFACCT_CFLAGS_OTHER} +# ${NFACCT_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect libxenstat + +pkg_check_modules(XENSTAT libxenstat) +# later we use: +# ${XENSTAT_LIBRARIES} +# ${XENSTAT_CFLAGS_OTHER} +# ${XENSTAT_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect MacOS IOKit/Foundation framework + +IF(MACOS) + find_library(IOKIT IOKit) + find_library(FOUNDATION Foundation) + # later we use: + # ${FOUNDATION} + # ${IOKIT} +ENDIF(MACOS) + + +# ----------------------------------------------------------------------------- +# Detect libcrypto + +pkg_check_modules(CRYPTO libcrypto) +# later we use: +# ${CRYPTO_LIBRARIES} +# ${CRYPTO_CFLAGS_OTHER} +# ${CRYPTO_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect libssl + +pkg_check_modules(SSL libssl) +# later we use: +# ${SSL_LIBRARIES} +# ${SSL_CFLAGS_OTHER} +# ${SSL_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect libcurl + +pkg_check_modules(CURL libcurl) +# later we use: +# ${CURL_LIBRARIES} +# ${CURL_CFLAGS_OTHER} +# ${CURL_INCLUDE_DIRS} + +# ----------------------------------------------------------------------------- +# Detect libcups + +pkg_check_modules(CUPS libcups) +IF(NOT CUPS_LIBRARIES) + pkg_check_modules(CUPS cups) +ENDIF() + +# later we use: +# ${CUPS_LIBRARIES} +# ${CUPS_CFLAGS_OTHER} +# ${CUPS_INCLUDE_DIRS} + +# ----------------------------------------------------------------------------- +# Detect libaws-c-common + +find_library(HAVE_AWS_CHECKSUMS aws-checksums) +# later we use: +# ${HAVE_AWS_CHECKSUMS} + +# ----------------------------------------------------------------------------- +# Detect libaws-c-common + +find_library(HAVE_AWS_COMMON aws-c-common) +# later we use: +# ${HAVE_AWS_COMMON} + +# ----------------------------------------------------------------------------- +# Detect libaws-c-common + +find_library(HAVE_AWS_EVENT_STREAM aws-c-event-stream) +# later we use: +# ${HAVE_AWS_EVENT_STREAM} + +# ----------------------------------------------------------------------------- +# Detect libaws-cpp-sdk-core + +pkg_check_modules(AWS_CORE aws-cpp-sdk-core) +# later we use: +# ${AWS_CORE_LIBRARIES} +# ${AWS_CORE_CFLAGS_OTHER} +# ${AWS_CORE_INCLUDE_DIRS} + +# ----------------------------------------------------------------------------- +# Detect libaws-cpp-sdk-kinesis + +pkg_check_modules(KINESIS aws-cpp-sdk-kinesis) +# later we use: +# ${KINESIS_LIBRARIES} +# ${KINESIS_CFLAGS_OTHER} +# ${KINESIS_INCLUDE_DIRS} + +# ----------------------------------------------------------------------------- +# Detect libgrpc + +pkg_check_modules(GRPC grpc) +# later we use: +# ${GRPC_LIBRARIES} +# ${GRPC_CFLAGS_OTHER} +# ${GRPC_INCLUDE_DIRS} + +# ----------------------------------------------------------------------------- +# Detect libgoogleapis_cpp_pubsub_protos + +pkg_check_modules(PUBSUB google_cloud_cpp_pubsub_protos) +# later we use: +# ${PUBSUB_LIBRARIES} +# ${PUBSUB_CFLAGS_OTHER} +# ${PUBSUB_INCLUDE_DIRS} + +# ----------------------------------------------------------------------------- +# Detect libprotobuf + +pkg_check_modules(PROTOBUF protobuf>=3) +# later we use: +# ${PROTOBUF_LIBRARIES} +# ${PROTOBUF_CFLAGS_OTHER} +# ${PROTOBUF_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect libsnappy + +pkg_check_modules(SNAPPY snappy) +# later we use: +# ${SNAPPY_LIBRARIES} +# ${SNAPPY_CFLAGS_OTHER} +# ${SNAPPY_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect libmongoc + +pkg_check_modules(MONGOC libmongoc-1.0) +# later we use: +# ${MONGOC_LIBRARIES} +# ${MONGOC_CFLAGS_OTHER} +# ${MONGOC_INCLUDE_DIRS} + + +# ----------------------------------------------------------------------------- +# Detect libbpf +IF(LINUX AND EXISTS "${CMAKE_SOURCE_DIR}/externaldeps/libbpf/libbpf.a") + message(STATUS "libbpf library found") + pkg_check_modules(ELF REQUIRED libelf) + # later we use: + # ${ELF_LIBRARIES} + # ${ELF_CFLAGS_OTHER} + # ${ELF_INCLUDE_DIRS} + IF(ELF_LIBRARIES) + list(APPEND NETDATA_COMMON_CFLAGS ${ELF_CFLAGS_OTHER}) + list(INSERT NETDATA_COMMON_LIBRARIES 0 + ${CMAKE_SOURCE_DIR}/externaldeps/libbpf/libbpf.a + ${ELF_LIBRARIES}) + list(APPEND NETDATA_COMMON_INCLUDE_DIRS ${ELF_INCLUDE_DIRS}) + include_directories(BEFORE ${CMAKE_SOURCE_DIR}/externaldeps/libbpf/include ${CMAKE_SOURCE_DIR}/externaldeps/libbpf/include/uapi) + set(ENABLE_PLUGIN_EBPF True) + set(HAVE_LIBBPF True) + ELSE(ELF_LIBRARIES) + set(ENABLE_PLUGIN_EBPF False) + message(STATUS "ebpf plugin: disabled (requires libelf)") + ENDIF(ELF_LIBRARIES) +ENDIF() + +# ----------------------------------------------------------------------------- +# Detect ml dependencies +file(STRINGS "${CMAKE_SOURCE_DIR}/config.h" DEFINE_ENABLE_ML REGEX "^#define ENABLE_ML 1$") +IF(DEFINE_ENABLE_ML MATCHES ".+" AND + EXISTS "${CMAKE_SOURCE_DIR}/ml/dlib/dlib/all/source.cpp" AND + EXISTS "${CMAKE_SOURCE_DIR}/ml/json/single_include/nlohmann/json.hpp") + set(ENABLE_ML True) + list(APPEND NETDATA_COMMON_CFLAGS "-DDLIB_NO_GUI_SUPPORT") + list(APPEND NETDATA_COMMON_INCLUDE_DIRS "ml/dlib") +ELSE() + set(ENABLE_ML False) +ENDIF() + +set(LIBJUDY_SOURCES + libnetdata/libjudy/src/Judy.h + libnetdata/libjudy/src/JudyCommon/JudyMalloc.c + libnetdata/libjudy/src/JudyCommon/JudyPrivate.h + libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h + libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h + libnetdata/libjudy/src/JudyL/JudyL.h + libnetdata/libjudy/src/JudyL/JudyLByCount.c + libnetdata/libjudy/src/JudyL/JudyLCascade.c + libnetdata/libjudy/src/JudyL/JudyLCount.c + libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c + libnetdata/libjudy/src/JudyL/JudyLDecascade.c + libnetdata/libjudy/src/JudyL/JudyLDel.c + libnetdata/libjudy/src/JudyL/JudyLFirst.c + libnetdata/libjudy/src/JudyL/JudyLFreeArray.c + libnetdata/libjudy/src/JudyL/j__udyLGet.c + libnetdata/libjudy/src/JudyL/JudyLGet.c + libnetdata/libjudy/src/JudyL/JudyLInsArray.c + libnetdata/libjudy/src/JudyL/JudyLIns.c + libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c + libnetdata/libjudy/src/JudyL/JudyLMallocIF.c + libnetdata/libjudy/src/JudyL/JudyLMemActive.c + libnetdata/libjudy/src/JudyL/JudyLMemUsed.c + libnetdata/libjudy/src/JudyL/JudyLNext.c + libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c + libnetdata/libjudy/src/JudyL/JudyLPrev.c + libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c + JudyLTables.c + libnetdata/libjudy/src/JudyHS/JudyHS.c) + +ADD_LIBRARY(judy STATIC + ${LIBJUDY_SOURCES}) + +ADD_EXECUTABLE(judyltablesgen + libnetdata/libjudy/src/JudyL/JudyLTablesGen.c) + +target_include_directories(judyltablesgen PUBLIC + libnetdata/libjudy/src + libnetdata/libjudy/src/JudyCommon) + +target_compile_options(judyltablesgen PUBLIC + -Wno-format + -Wno-format-security) + +include_directories(BEFORE ${CMAKE_SOURCE_DIR}/libnetdata/libjudy/src) + +target_compile_definitions(judyltablesgen PUBLIC + JU_64BIT + JUDYL) + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/JudyLTables.c + COMMAND judyltablesgen + DEPENDS judyltablesgen + ) + +target_include_directories(judy PUBLIC + libnetdata/libjudy/src + libnetdata/libjudy/src/JudyCommon) + +target_include_directories(judy BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + +target_compile_definitions(judy PUBLIC + JU_64BIT + JUDYL) + +target_compile_options(judy PUBLIC + -Wno-sign-compare + -Wno-implicit-fallthrough) + +set(LIBJUDY_PREV_FILES + libnetdata/libjudy/src/JudyL/JudyLPrev.c + libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c) + +set(LIBJUDY_NEXT_FILES + libnetdata/libjudy/src/JudyL/JudyLNext.c + libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c) + +set_source_files_properties(${LIBJUDY_PREV_FILES} PROPERTIES COMPILE_OPTIONS "-DJUDYPREV") +set_source_files_properties(${LIBJUDY_NEXT_FILES} PROPERTIES COMPILE_OPTIONS "-DJUDYNEXT") +set_source_files_properties(libnetdata/libjudy/src/JudyL/j__udyLGet.c PROPERTIES COMPILE_OPTIONS "-DJUDYGETINLINE") +set_source_files_properties(libnetdata/libjudy/src/JudyL/JudyLByCount.c PROPERTIES COMPILE_OPTIONS "-DNOSMARTJBB -DNOSMARTJBU -DNOSMARTJLB") +set_source_files_properties(JudyLTables.c PROPERTIES COMPILE_OPTIONS "-I${CMAKE_SOURCE_DIR}/libnetdata/libjudy/src/JudyL") + +# ----------------------------------------------------------------------------- +# netdata files + +set(LIBNETDATA_FILES + libnetdata/adaptive_resortable_list/adaptive_resortable_list.c + libnetdata/adaptive_resortable_list/adaptive_resortable_list.h + libnetdata/config/appconfig.c + libnetdata/config/appconfig.h + libnetdata/arrayalloc/arrayalloc.c + libnetdata/arrayalloc/arrayalloc.h + libnetdata/avl/avl.c + libnetdata/avl/avl.h + libnetdata/buffer/buffer.c + libnetdata/buffer/buffer.h + libnetdata/circular_buffer/circular_buffer.c + libnetdata/circular_buffer/circular_buffer.h + libnetdata/clocks/clocks.c + libnetdata/clocks/clocks.h + libnetdata/completion/completion.c + libnetdata/completion/completion.h + libnetdata/dictionary/dictionary.c + libnetdata/dictionary/dictionary.h + libnetdata/eval/eval.c + libnetdata/eval/eval.h + libnetdata/health/health.c + libnetdata/health/health.h + libnetdata/inlined.h + libnetdata/json/json.c + libnetdata/json/json.h + libnetdata/json/jsmn.c + libnetdata/json/jsmn.h + libnetdata/libnetdata.c + libnetdata/libnetdata.h + libnetdata/locks/locks.c + libnetdata/locks/locks.h + libnetdata/log/log.c + libnetdata/log/log.h + libnetdata/os.c + libnetdata/os.h + libnetdata/onewayalloc/onewayalloc.c + libnetdata/onewayalloc/onewayalloc.h + libnetdata/popen/popen.c + libnetdata/popen/popen.h + libnetdata/procfile/procfile.c + libnetdata/procfile/procfile.h + libnetdata/required_dummies.h + libnetdata/socket/security.c + libnetdata/socket/security.h + libnetdata/simple_pattern/simple_pattern.c + libnetdata/simple_pattern/simple_pattern.h + libnetdata/socket/socket.c + libnetdata/socket/socket.h + libnetdata/statistical/statistical.c + libnetdata/statistical/statistical.h + libnetdata/storage_number/storage_number.c + libnetdata/storage_number/storage_number.h + libnetdata/string/string.c + libnetdata/string/string.h + libnetdata/threads/threads.c + libnetdata/threads/threads.h + libnetdata/url/url.c + libnetdata/url/url.h + libnetdata/string/utf8.h + libnetdata/worker_utilization/worker_utilization.c + libnetdata/worker_utilization/worker_utilization.h + ) + +IF(ENABLE_PLUGIN_EBPF) + list(APPEND LIBNETDATA_FILES + libnetdata/ebpf/ebpf.c + libnetdata/ebpf/ebpf.h) +ENDIF() + +add_library(libnetdata OBJECT ${LIBNETDATA_FILES}) + +target_include_directories(libnetdata BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + +set(APPS_PLUGIN_FILES + collectors/apps.plugin/apps_plugin.c) + +set(FREEBSD_PLUGIN_FILES + collectors/freebsd.plugin/plugin_freebsd.c + collectors/freebsd.plugin/plugin_freebsd.h + collectors/freebsd.plugin/freebsd_sysctl.c + collectors/freebsd.plugin/freebsd_getmntinfo.c + collectors/freebsd.plugin/freebsd_getifaddrs.c + collectors/freebsd.plugin/freebsd_devstat.c + collectors/freebsd.plugin/freebsd_kstat_zfs.c + collectors/freebsd.plugin/freebsd_ipfw.c + collectors/proc.plugin/zfs_common.c + collectors/proc.plugin/zfs_common.h + ) + +set(HEALTH_PLUGIN_FILES + health/health.c + health/health.h + health/health_config.c + health/health_json.c + health/health_log.c) + +set(IDLEJITTER_PLUGIN_FILES + collectors/idlejitter.plugin/plugin_idlejitter.c + ) + +set(CGROUPS_PLUGIN_FILES + collectors/cgroups.plugin/sys_fs_cgroup.c + collectors/cgroups.plugin/sys_fs_cgroup.h + ) + +set(CGROUP_NETWORK_FILES + collectors/cgroups.plugin/cgroup-network.c + ) + +set(DISKSPACE_PLUGIN_FILES + collectors/diskspace.plugin/plugin_diskspace.c + ) + +set(TIMEX_PLUGIN_FILES + collectors/timex.plugin/plugin_timex.c + ) + +set(FREEIPMI_PLUGIN_FILES + collectors/freeipmi.plugin/freeipmi_plugin.c + ) + +set(NFACCT_PLUGIN_FILES + collectors/nfacct.plugin/plugin_nfacct.c + ) + +set(XENSTAT_PLUGIN_FILES + collectors/xenstat.plugin/xenstat_plugin.c + ) + +set(PERF_PLUGIN_FILES + collectors/perf.plugin/perf_plugin.c + ) + +set(SLABINFO_PLUGIN_FILES + collectors/slabinfo.plugin/slabinfo.c + ) + +set(CUPS_PLUGIN_FILES + collectors/cups.plugin/cups_plugin.c + ) + +set(EBPF_PROCESS_PLUGIN_FILES + collectors/ebpf.plugin/ebpf.c + collectors/ebpf.plugin/ebpf.h + collectors/ebpf.plugin/ebpf_cachestat.c + collectors/ebpf.plugin/ebpf_cachestat.h + collectors/ebpf.plugin/ebpf_dcstat.c + collectors/ebpf.plugin/ebpf_dcstat.h + collectors/ebpf.plugin/ebpf_disk.c + collectors/ebpf.plugin/ebpf_disk.h + collectors/ebpf.plugin/ebpf_fd.c + collectors/ebpf.plugin/ebpf_fd.h + collectors/ebpf.plugin/ebpf_hardirq.c + collectors/ebpf.plugin/ebpf_hardirq.h + collectors/ebpf.plugin/ebpf_mdflush.c + collectors/ebpf.plugin/ebpf_mdflush.h + collectors/ebpf.plugin/ebpf_mount.c + collectors/ebpf.plugin/ebpf_mount.h + collectors/ebpf.plugin/ebpf_filesystem.c + collectors/ebpf.plugin/ebpf_filesystem.h + collectors/ebpf.plugin/ebpf_oomkill.c + collectors/ebpf.plugin/ebpf_oomkill.h + collectors/ebpf.plugin/ebpf_process.c + collectors/ebpf.plugin/ebpf_process.h + collectors/ebpf.plugin/ebpf_shm.c + collectors/ebpf.plugin/ebpf_shm.h + collectors/ebpf.plugin/ebpf_socket.c + collectors/ebpf.plugin/ebpf_socket.h + collectors/ebpf.plugin/ebpf_softirq.c + collectors/ebpf.plugin/ebpf_softirq.h + collectors/ebpf.plugin/ebpf_sync.c + collectors/ebpf.plugin/ebpf_sync.h + collectors/ebpf.plugin/ebpf_swap.c + collectors/ebpf.plugin/ebpf_swap.h + collectors/ebpf.plugin/ebpf_vfs.c + collectors/ebpf.plugin/ebpf_vfs.h + collectors/ebpf.plugin/ebpf_apps.c + collectors/ebpf.plugin/ebpf_apps.h + collectors/ebpf.plugin/ebpf_cgroup.c + collectors/ebpf.plugin/ebpf_cgroup.h + ) + +set(PROC_PLUGIN_FILES + collectors/proc.plugin/ipc.c + collectors/proc.plugin/plugin_proc.c + collectors/proc.plugin/plugin_proc.h + collectors/proc.plugin/proc_diskstats.c + collectors/proc.plugin/proc_mdstat.c + collectors/proc.plugin/proc_interrupts.c + collectors/proc.plugin/proc_softirqs.c + collectors/proc.plugin/proc_loadavg.c + collectors/proc.plugin/proc_meminfo.c + collectors/proc.plugin/proc_pagetypeinfo.c + collectors/proc.plugin/proc_net_dev.c + collectors/proc.plugin/proc_net_wireless.c + collectors/proc.plugin/proc_net_ip_vs_stats.c + collectors/proc.plugin/proc_net_netstat.c + collectors/proc.plugin/proc_net_rpc_nfs.c + collectors/proc.plugin/proc_net_rpc_nfsd.c + collectors/proc.plugin/proc_net_sctp_snmp.c + collectors/proc.plugin/proc_net_sockstat.c + collectors/proc.plugin/proc_net_sockstat6.c + collectors/proc.plugin/proc_net_softnet_stat.c + collectors/proc.plugin/proc_net_stat_conntrack.c + collectors/proc.plugin/proc_net_stat_synproxy.c + collectors/proc.plugin/proc_self_mountinfo.c + collectors/proc.plugin/proc_self_mountinfo.h + collectors/proc.plugin/zfs_common.c + collectors/proc.plugin/zfs_common.h + collectors/proc.plugin/proc_spl_kstat_zfs.c + collectors/proc.plugin/proc_stat.c + collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c + collectors/proc.plugin/proc_vmstat.c + collectors/proc.plugin/proc_uptime.c + collectors/proc.plugin/proc_pressure.c + collectors/proc.plugin/proc_pressure.h + collectors/proc.plugin/sys_kernel_mm_ksm.c + collectors/proc.plugin/sys_block_zram.c + collectors/proc.plugin/sys_devices_system_edac_mc.c + collectors/proc.plugin/sys_devices_system_node.c + collectors/proc.plugin/sys_class_infiniband.c + collectors/proc.plugin/sys_fs_btrfs.c + collectors/proc.plugin/sys_class_power_supply.c + ) + +set(TC_PLUGIN_FILES + collectors/tc.plugin/plugin_tc.c + ) + +set(MACOS_PLUGIN_FILES + collectors/macos.plugin/plugin_macos.c + collectors/macos.plugin/plugin_macos.h + collectors/macos.plugin/macos_sysctl.c + collectors/macos.plugin/macos_mach_smi.c + collectors/macos.plugin/macos_fw.c + ) + +set(PLUGINSD_PLUGIN_FILES + collectors/plugins.d/plugins_d.c + collectors/plugins.d/plugins_d.h + collectors/plugins.d/pluginsd_parser.c + collectors/plugins.d/pluginsd_parser.h + ) + +set(PARSER_PLUGIN_FILES + parser/parser.c + parser/parser.h + ) + +set(REGISTRY_PLUGIN_FILES + registry/registry.c + registry/registry.h + registry/registry_db.c + registry/registry_init.c + registry/registry_internals.c + registry/registry_internals.h + registry/registry_log.c + registry/registry_machine.c + registry/registry_machine.h + registry/registry_person.c + registry/registry_person.h + registry/registry_url.c + registry/registry_url.h + ) + +set(STATSD_PLUGIN_FILES + collectors/statsd.plugin/statsd.c + ) + +set(RRD_PLUGIN_FILES + database/rrdcalc.c + database/rrdcalc.h + database/rrdcalctemplate.c + database/rrdcalctemplate.h + database/rrdcontext.c + database/rrdcontext.h + database/rrddim.c + database/rrddimvar.c + database/rrddimvar.h + database/rrdfamily.c + database/rrdfunctions.c + database/rrdfunctions.h + database/rrdhost.c + database/rrdlabels.c + database/rrd.c + database/rrd.h + database/rrdset.c + database/rrdsetvar.c + database/rrdsetvar.h + database/rrdvar.c + database/rrdvar.h + database/storage_engine.c + database/storage_engine.h + database/ram/rrddim_mem.c + database/ram/rrddim_mem.h + database/sqlite/sqlite_metadata.c + database/sqlite/sqlite_metadata.h + database/sqlite/sqlite_functions.c + database/sqlite/sqlite_functions.h + database/sqlite/sqlite_context.c + database/sqlite/sqlite_context.h + database/sqlite/sqlite_db_migration.c + database/sqlite/sqlite_db_migration.h + database/sqlite/sqlite_aclk.c + database/sqlite/sqlite_aclk.h + database/sqlite/sqlite_health.c + database/sqlite/sqlite_health.h + database/sqlite/sqlite_aclk_node.c + database/sqlite/sqlite_aclk_node.h + database/sqlite/sqlite_aclk_alert.c + database/sqlite/sqlite_aclk_alert.h + database/sqlite/sqlite3.c + database/sqlite/sqlite3.h + database/engine/rrdengine.c + database/engine/rrdengine.h + database/engine/rrddiskprotocol.h + database/engine/datafile.c + database/engine/datafile.h + database/engine/journalfile.c + database/engine/journalfile.h + database/engine/rrdenginelib.c + database/engine/rrdenginelib.h + database/engine/rrdengineapi.c + database/engine/rrdengineapi.h + database/engine/pagecache.c + database/engine/pagecache.h + database/engine/rrdenglocking.c + database/engine/rrdenglocking.h + database/KolmogorovSmirnovDist.c + database/KolmogorovSmirnovDist.h + ) + +set(WEB_PLUGIN_FILES + web/server/web_client.c + web/server/web_client.h + web/server/web_server.c + web/server/web_server.h + web/server/static/static-threaded.c + web/server/static/static-threaded.h + web/server/web_client_cache.c + web/server/web_client_cache.h + ) + +set(API_PLUGIN_FILES + web/api/web_api_v1.c + web/api/web_api_v1.h + web/api/badges/web_buffer_svg.c + web/api/badges/web_buffer_svg.h + web/api/exporters/allmetrics.c + web/api/exporters/allmetrics.h + web/api/exporters/shell/allmetrics_shell.c + web/api/exporters/shell/allmetrics_shell.h + web/api/queries/rrdr.c + web/api/queries/rrdr.h + web/api/queries/query.c + web/api/queries/query.h + web/api/queries/average/average.c + web/api/queries/average/average.h + web/api/queries/countif/countif.c + web/api/queries/countif/countif.h + web/api/queries/incremental_sum/incremental_sum.c + web/api/queries/incremental_sum/incremental_sum.h + web/api/queries/max/max.c + web/api/queries/max/max.h + web/api/queries/min/min.c + web/api/queries/min/min.h + web/api/queries/sum/sum.c + web/api/queries/sum/sum.h + web/api/queries/median/median.c + web/api/queries/median/median.h + web/api/queries/percentile/percentile.c + web/api/queries/percentile/percentile.h + web/api/queries/stddev/stddev.c + web/api/queries/stddev/stddev.h + web/api/queries/ses/ses.c + web/api/queries/ses/ses.h + web/api/queries/des/des.c + web/api/queries/des/des.h + web/api/queries/trimmed_mean/trimmed_mean.c + web/api/queries/trimmed_mean/trimmed_mean.h + web/api/queries/weights.c + web/api/queries/weights.h + web/api/formatters/rrd2json.c + web/api/formatters/rrd2json.h + web/api/formatters/csv/csv.c + web/api/formatters/csv/csv.h + web/api/formatters/json/json.c + web/api/formatters/json/json.h + web/api/formatters/ssv/ssv.c + web/api/formatters/ssv/ssv.h + web/api/formatters/value/value.c + web/api/formatters/value/value.h + web/api/formatters/json_wrapper.c + web/api/formatters/json_wrapper.h + web/api/formatters/charts2json.c + web/api/formatters/charts2json.h + web/api/formatters/rrdset2json.c + web/api/formatters/rrdset2json.h + web/api/health/health_cmdapi.c + ) + +set(STREAMING_PLUGIN_FILES + streaming/rrdpush.c + streaming/rrdpush.h + streaming/compression.c + streaming/receiver.c + streaming/sender.c + streaming/replication.c + streaming/replication.h + ) + +set(CLAIM_PLUGIN_FILES + claim/claim.c + claim/claim.h + ) + +set(ACLK_ALWAYS_BUILD + aclk/aclk_rrdhost_state.h + aclk/aclk_proxy.c + aclk/aclk_proxy.h + aclk/aclk.c + aclk/aclk.h + ) + +set(ACLK_FILES + aclk/aclk_util.c + aclk/aclk_util.h + aclk/aclk_stats.c + aclk/aclk_stats.h + aclk/aclk_query.c + aclk/aclk_query.h + aclk/aclk_query_queue.c + aclk/aclk_query_queue.h + aclk/aclk_otp.c + aclk/aclk_otp.h + aclk/aclk_tx_msgs.c + aclk/aclk_tx_msgs.h + aclk/aclk_rx_msgs.c + aclk/aclk_rx_msgs.h + aclk/https_client.c + aclk/https_client.h + aclk/aclk_alarm_api.c + aclk/aclk_alarm_api.h + aclk/aclk_contexts_api.c + aclk/aclk_contexts_api.h + aclk/aclk_capas.c + aclk/aclk_capas.h + aclk/schema-wrappers/connection.cc + aclk/schema-wrappers/connection.h + aclk/schema-wrappers/node_connection.cc + aclk/schema-wrappers/node_connection.h + aclk/schema-wrappers/node_creation.cc + aclk/schema-wrappers/node_creation.h + aclk/schema-wrappers/alarm_stream.cc + aclk/schema-wrappers/alarm_stream.h + aclk/schema-wrappers/alarm_config.cc + aclk/schema-wrappers/alarm_config.h + aclk/schema-wrappers/node_info.cc + aclk/schema-wrappers/node_info.h + aclk/schema-wrappers/capability.cc + aclk/schema-wrappers/capability.h + aclk/schema-wrappers/proto_2_json.cc + aclk/schema-wrappers/proto_2_json.h + aclk/schema-wrappers/context_stream.cc + aclk/schema-wrappers/context_stream.h + aclk/schema-wrappers/context.cc + aclk/schema-wrappers/context.h + aclk/schema-wrappers/schema_wrappers.h + aclk/schema-wrappers/schema_wrapper_utils.cc + aclk/schema-wrappers/schema_wrapper_utils.h + aclk/helpers/mqtt_wss_pal.h + aclk/helpers/ringbuffer_pal.h + ) + +set(MQTT_WEBSOCKETS_FILES + mqtt_websockets/src/mqtt_wss_client.c + mqtt_websockets/src/include/mqtt_wss_client.h + mqtt_websockets/src/mqtt_wss_log.c + mqtt_websockets/src/include/mqtt_wss_log.h + mqtt_websockets/src/ws_client.c + mqtt_websockets/src/include/ws_client.h + mqtt_websockets/src/mqtt_ng.c + mqtt_websockets/src/include/mqtt_ng.h + mqtt_websockets/src/common_public.c + mqtt_websockets/src/include/common_public.h + mqtt_websockets/src/include/common_internal.h + mqtt_websockets/c-rbuf/src/ringbuffer.c + mqtt_websockets/c-rbuf/include/ringbuffer.h + mqtt_websockets/c-rbuf/src/ringbuffer_internal.h + mqtt_websockets/MQTT-C/src/mqtt.c + mqtt_websockets/MQTT-C/include/mqtt.h + ) + +set(SPAWN_PLUGIN_FILES + spawn/spawn.c + spawn/spawn_server.c + spawn/spawn_client.c + spawn/spawn.h + ) + +set(EXPORTING_ENGINE_FILES + exporting/exporting_engine.c + exporting/exporting_engine.h + exporting/graphite/graphite.c + exporting/graphite/graphite.h + exporting/json/json.c + exporting/json/json.h + exporting/opentsdb/opentsdb.c + exporting/opentsdb/opentsdb.h + exporting/prometheus/prometheus.c + exporting/prometheus/prometheus.h + exporting/read_config.c + exporting/clean_connectors.c + exporting/init_connectors.c + exporting/process_data.c + exporting/check_filters.c + exporting/send_data.c + exporting/send_internal_metrics.c + ) + +set(PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES + exporting/prometheus/remote_write/remote_write.c + exporting/prometheus/remote_write/remote_write.h + exporting/prometheus/remote_write/remote_write_request.cc + exporting/prometheus/remote_write/remote_write_request.h + ) + +set(KINESIS_EXPORTING_FILES + exporting/aws_kinesis/aws_kinesis.c + exporting/aws_kinesis/aws_kinesis.h + exporting/aws_kinesis/aws_kinesis_put_record.cc + exporting/aws_kinesis/aws_kinesis_put_record.h + ) + +set(PUBSUB_EXPORTING_FILES + exporting/pubsub/pubsub.c + exporting/pubsub/pubsub.h + exporting/pubsub/pubsub_publish.cc + exporting/pubsub/pubsub_publish.h + ) + +set(MONGODB_EXPORTING_FILES + exporting/mongodb/mongodb.c + exporting/mongodb/mongodb.h + ) + +set(DAEMON_FILES + daemon/buildinfo.c + daemon/buildinfo.h + daemon/common.c + daemon/common.h + daemon/daemon.c + daemon/daemon.h + daemon/global_statistics.c + daemon/global_statistics.h + daemon/analytics.c + daemon/analytics.h + daemon/main.c + daemon/main.h + daemon/signals.c + daemon/signals.h + daemon/service.c + daemon/static_threads.c + daemon/static_threads.h + daemon/commands.c + daemon/commands.h + daemon/unit_test.c + daemon/unit_test.h + ) + +set(ML_FILES + ml/ml.h + ml/ml-dummy.c +) + +# ----------------------------------------------------------------------------- +# ML + +IF(ENABLE_ML) + message(STATUS "ML: enabled") + list(APPEND ML_FILES + ml/Config.h + ml/Config.cc + ml/Dimension.cc + ml/Dimension.h + ml/Host.h + ml/Host.cc + ml/Query.h + ml/KMeans.h + ml/KMeans.cc + ml/SamplesBuffer.h + ml/SamplesBuffer.cc + ml/dlib/dlib/all/source.cpp + ml/json/single_include/nlohmann/json.hpp + ml/ml.cc + ml/ml-private.h + ) +ELSE() + message(STATUS "ML: disabled") +ENDIF() + +set(NETDATA_FILES + collectors/all.h + ${DAEMON_FILES} + ${API_PLUGIN_FILES} + ${EXPORTING_ENGINE_FILES} + ${HEALTH_PLUGIN_FILES} + ${IDLEJITTER_PLUGIN_FILES} + ${ML_FILES} + ${PLUGINSD_PLUGIN_FILES} + ${RRD_PLUGIN_FILES} + ${REGISTRY_PLUGIN_FILES} + ${STATSD_PLUGIN_FILES} + ${STREAMING_PLUGIN_FILES} + ${WEB_PLUGIN_FILES} + ${CLAIM_PLUGIN_FILES} + ${SPAWN_PLUGIN_FILES} + ${PARSER_PLUGIN_FILES} +) + +set(NETDATACLI_FILES + daemon/commands.h + cli/cli.c + cli/cli.h + ) + +include_directories(AFTER .) + +add_definitions( + -DHAVE_CONFIG_H + -DCACHE_DIR="/var/cache/netdata" + -DCONFIG_DIR="/etc/netdata" + -DLIBCONFIG_DIR="/usr/lib/netdata/conf.d" + -DLOG_DIR="/var/log/netdata" + -DPLUGINS_DIR="/usr/libexec/netdata/plugins.d" + -DWEB_DIR="/usr/share/netdata/web" + -DVARLIB_DIR="/var/lib/netdata" +) + +# ----------------------------------------------------------------------------- +# kinesis exporting connector + +IF(KINESIS_LIBRARIES AND AWS_CORE_LIBRARIES AND HAVE_AWS_EVENT_STREAM AND HAVE_AWS_COMMON AND HAVE_AWS_CHECKSUMS AND + CRYPTO_LIBRARIES AND SSL_LIBRARIES AND CURL_LIBRARIES) + SET(ENABLE_EXPORTING_KINESIS True) +ELSE() + SET(ENABLE_EXPORTING_KINESIS False) +ENDIF() + +IF(ENABLE_EXPORTING_KINESIS) + message(STATUS "kinesis exporting: enabled") + list(APPEND NETDATA_FILES ${KINESIS_EXPORTING_FILES}) + list(APPEND NETDATA_COMMON_LIBRARIES ${KINESIS_LIBRARIES} ${AWS_CORE_LIBRARIES} + ${CRYPTO_LIBRARIES} ${SSL_LIBRARIES} ${CURL_LIBRARIES}) + list(APPEND NETDATA_COMMON_INCLUDE_DIRS ${KINESIS_INCLUDE_DIRS} ${AWS_CORE_INCLUDE_DIRS} + ${CRYPTO_INCLUDE_DIRS} ${SSL_INCLUDE_DIRS} ${CURL_INCLUDE_DIRS}) + list(APPEND NETDATA_COMMON_CFLAGS ${CRYPTO_CFLAGS_OTHER} ${SSL_CFLAGS_OTHER} ${CURL_CFLAGS_OTHER}) +ELSE() + message(STATUS "kinesis exporting: disabled (requires AWS SDK for C++)") +ENDIF() + +# ----------------------------------------------------------------------------- +# Pub/Sub exporting connector + +IF(GRPC_LIBRARIES AND PUBSUB_LIBRARIES) + SET(ENABLE_EXPORTING_PUBSUB True) +ELSE() + SET(ENABLE_EXPORTING_PUBSUB False) +ENDIF() + +IF(ENABLE_EXPORTING_PUBSUB) + message(STATUS "pubsub exporting connector: enabled") + list(APPEND NETDATA_FILES ${PUBSUB_EXPORTING_FILES}) + list(APPEND NETDATA_COMMON_LIBRARIES ${GRPC_LIBRARIES} ${PUBSUB_LIBRARIES}) + list(APPEND NETDATA_COMMON_INCLUDE_DIRS ${GRPC_INCLUDE_DIRS} ${PUBSUB_INCLUDE_DIRS}) + list(APPEND NETDATA_COMMON_CFLAGS ${GRPC_CFLAGS_OTHER} ${PUBSUB_CFLAGS_OTHER}) +ELSE() + message(STATUS "pubsub exporting connector: disabled (requires grpc and googleapis)") +ENDIF() + +# ----------------------------------------------------------------------------- +# prometheus remote write exporting connector + +IF(PROTOBUF_LIBRARIES AND SNAPPY_LIBRARIES) + SET(ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE True) +ELSE() + SET(ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE False) +ENDIF() + +IF(ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE) + message(STATUS "prometheus remote write exporting: enabled") + + find_package(Protobuf REQUIRED) + + function(PROTOBUF_REMOTE_WRITE_GENERATE_CPP SRCS HDRS) + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_REMOTE_WRITE_GENERATE_CPP() called without any proto files") + return() + endif() + + set(${SRCS}) + set(${HDRS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(DIR ${ABS_FIL} DIRECTORY) + get_filename_component(FIL_WE ${FIL} NAME_WE) + set(GENERATED_PB_CC "${DIR}/${FIL_WE}.pb.cc") + set(GENERATED_PB_H "${DIR}/${FIL_WE}.pb.h") + list(APPEND ${SRCS} ${GENERATED_PB_CC}) + list(APPEND ${HDRS} ${GENERATED_PB_H}) + add_custom_command( + OUTPUT ${GENERATED_PB_CC} + ${GENERATED_PB_H} + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS -I=${CMAKE_SOURCE_DIR}/exporting/prometheus/remote_write --cpp_out=${CMAKE_SOURCE_DIR}/exporting/prometheus/remote_write ${ABS_FIL} + DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} + COMMENT "Running C++ protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + set(${HDRS} ${${HDRS}} PARENT_SCOPE) + endfunction() + + protobuf_remote_write_generate_cpp(PROTO_SRCS PROTO_HDRS exporting/prometheus/remote_write/remote_write.proto) + + list(APPEND NETDATA_FILES ${PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) + list(APPEND NETDATA_COMMON_LIBRARIES ${PROTOBUF_LIBRARIES} ${SNAPPY_LIBRARIES}) + list(APPEND NETDATA_COMMON_INCLUDE_DIRS ${PROTOBUF_INCLUDE_DIRS} ${SNAPPY_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}) + list(APPEND NETDATA_COMMON_CFLAGS ${PROTOBUF_CFLAGS_OTHER} ${SNAPPY_CFLAGS_OTHER}) +ELSE() + message(STATUS "prometheus remote write exporting: disabled (requires protobuf and snappy libraries)") +ENDIF() + +# ----------------------------------------------------------------------------- +# mongodb exporting connector + +IF(MONGOC_LIBRARIES) + message(STATUS "mongodb exporting: enabled") + + list(APPEND NETDATA_FILES ${MONGODB_EXPORTING_FILES}) + list(APPEND NETDATA_COMMON_LIBRARIES ${MONGOC_LIBRARIES}) + list(APPEND NETDATA_COMMON_INCLUDE_DIRS ${MONGOC_INCLUDE_DIRS}) + list(APPEND NETDATA_COMMON_CFLAGS ${MONGOC_CFLAGS_OTHER}) +ELSE() + message(STATUS "mongodb exporting: disabled (requires mongoc library)") +ENDIF() + +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} m ${CMAKE_THREAD_LIBS_INIT}) + +option(ENABLE_ACLK "Enables functionality required to connect to cloud (must be activated by user at run time)" ON) + +if(ENABLE_ACLK) + +find_package(Protobuf REQUIRED) + +function(PROTOBUF_ACLK_GENERATE_CPP SRCS HDRS) + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_ACLK_GENERATE_CPP() called without any proto files") + return() + endif() + + set(${SRCS}) + set(${HDRS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(DIR ${ABS_FIL} DIRECTORY) + get_filename_component(FIL_WE ${FIL} NAME_WE) + set(GENERATED_PB_CC "${DIR}/${FIL_WE}.pb.cc") + set(GENERATED_PB_H "${DIR}/${FIL_WE}.pb.h") +# cmake > 3.20 required :( +# cmake_path(SET GENERATED_PB_CC "${DIR}") +# cmake_path(SET GENERATED_PB_H "${DIR}") +# cmake_path(APPEND GENERATED_PB_CC "${FIL_WE}.pb.cc") +# cmake_path(APPEND GENERATED_PB_H "${FIL_WE}.pb.h") + + list(APPEND ${SRCS} ${GENERATED_PB_CC}) + list(APPEND ${HDRS} ${GENERATED_PB_H}) + add_custom_command( + OUTPUT ${GENERATED_PB_CC} + ${GENERATED_PB_H} + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS -I=${CMAKE_SOURCE_DIR}/aclk/aclk-schemas --cpp_out=${CMAKE_SOURCE_DIR}/aclk/aclk-schemas ${ABS_FIL} + DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} + COMMENT "Running C++ protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + set(${HDRS} ${${HDRS}} PARENT_SCOPE) +endfunction() + +set(ACLK_PROTO_DEFS + aclk/aclk-schemas/proto/aclk/v1/lib.proto + aclk/aclk-schemas/proto/agent/v1/disconnect.proto + aclk/aclk-schemas/proto/agent/v1/connection.proto + aclk/aclk-schemas/proto/alarm/v1/config.proto + aclk/aclk-schemas/proto/alarm/v1/stream.proto + aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.proto + aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.proto + aclk/aclk-schemas/proto/nodeinstance/info/v1/info.proto + aclk/aclk-schemas/proto/context/v1/context.proto + aclk/aclk-schemas/proto/context/v1/stream.proto + ) +PROTOBUF_ACLK_GENERATE_CPP(ACLK_PROTO_BUILT_SRCS ACLK_PROTO_BUILT_HDRS ${ACLK_PROTO_DEFS}) + +list(APPEND NETDATA_COMMON_LIBRARIES ${PROTOBUF_LIBRARIES}) +list(APPEND NETDATA_COMMON_INCLUDE_DIRS ${PROTOBUF_INCLUDE_DIRS}) +list(APPEND NETDATA_COMMON_CFLAGS ${PROTOBUF_CFLAGS_OTHER}) +list(APPEND NETDATA_FILES ${ACLK_FILES} ${ACLK_PROTO_BUILT_SRCS} ${ACLK_PROTO_BUILT_HDRS}) +include_directories(BEFORE ${CMAKE_SOURCE_DIR}/aclk/aclk-schemas) +include_directories(BEFORE ${CMAKE_SOURCE_DIR}/mqtt_websockets/MQTT-C/include) +include_directories(BEFORE ${CMAKE_SOURCE_DIR}/mqtt_websockets/src/include) +include_directories(BEFORE ${CMAKE_SOURCE_DIR}/mqtt_websockets/c-rbuf/include) + +ADD_LIBRARY(mqttwebsockets STATIC + ${MQTT_WEBSOCKETS_FILES}) + +target_compile_options(mqttwebsockets PUBLIC + -DMQTT_WSS_CUSTOM_ALLOC + -DRBUF_CUSTOM_MALLOC) + +target_include_directories(mqttwebsockets PUBLIC + ${CMAKE_SOURCE_DIR}/aclk/helpers) + +set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} mqttwebsockets) + +ENDIF() + +list(APPEND NETDATA_FILES ${ACLK_ALWAYS_BUILD}) +list(APPEND NETDATA_FILES ${TIMEX_PLUGIN_FILES}) + +# ----------------------------------------------------------------------------- +# netdata + +IF(LINUX) + list(APPEND NETDATA_FILES daemon/static_threads_linux.c) + list(APPEND NETDATA_COMMON_LIBRARIES rt) + + add_executable(netdata ${GENERATED_CONFIG_H} ${NETDATA_FILES} + ${CGROUPS_PLUGIN_FILES} + ${DISKSPACE_PLUGIN_FILES} + ${PROC_PLUGIN_FILES} + ${TC_PLUGIN_FILES} + ) + target_link_libraries (netdata libnetdata ${NETDATA_COMMON_LIBRARIES}) + target_include_directories(netdata PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(netdata BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(netdata PUBLIC ${NETDATA_COMMON_CFLAGS}) + + SET(ENABLE_PLUGIN_CGROUP_NETWORK True) + SET(ENABLE_PLUGIN_APPS True) + SET(ENABLE_PLUGIN_PERF True) + SET(ENABLE_PLUGIN_SLABINFO True) + +ELSEIF(FREEBSD) + list(APPEND NETDATA_FILES daemon/static_threads_freebsd.c) + + add_executable(netdata ${GENERATED_CONFIG_H} ${NETDATA_FILES} ${FREEBSD_PLUGIN_FILES}) + target_link_libraries (netdata libnetdata ${NETDATA_COMMON_LIBRARIES}) + target_include_directories(netdata PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(netdata BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(netdata PUBLIC ${NETDATA_COMMON_CFLAGS}) + SET(ENABLE_PLUGIN_CGROUP_NETWORK False) + SET(ENABLE_PLUGIN_APPS True) + SET(ENABLE_PLUGIN_PERF False) + SET(ENABLE_PLUGIN_SLABINFO False) + SET(ENABLE_PLUGIN_EBPF False) + +ELSEIF(MACOS) + list(APPEND NETDATA_FILES daemon/static_threads_macos.c) + + add_executable(netdata ${GENERATED_CONFIG_H} ${NETDATA_FILES} ${MACOS_PLUGIN_FILES}) + target_link_libraries (netdata libnetdata ${NETDATA_COMMON_LIBRARIES} ${IOKIT} ${FOUNDATION}) + target_include_directories(netdata PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(netdata BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(netdata PUBLIC ${NETDATA_COMMON_CFLAGS}) + SET(ENABLE_PLUGIN_CGROUP_NETWORK False) + SET(ENABLE_PLUGIN_APPS False) + SET(ENABLE_PLUGIN_PERF False) + SET(ENABLE_PLUGIN_SLABINFO False) + SET(ENABLE_PLUGIN_EBPF False) + +ENDIF() + +IF(ENABLE_EXPORTING_KINESIS OR ENABLE_EXPORTING_PUBSUB OR ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE) + set_property(TARGET netdata PROPERTY CXX_STANDARD 11) + set_property(TARGET netdata PROPERTY CMAKE_CXX_STANDARD_REQUIRED ON) +ENDIF() + +IF(IPMI_LIBRARIES) + SET(ENABLE_PLUGIN_FREEIPMI True) +ELSE() + SET(ENABLE_PLUGIN_FREEIPMI False) +ENDIF() + +IF(LINUX AND MNL_LIBRARIES AND NFACCT_LIBRARIES) + SET(ENABLE_PLUGIN_NFACCT True) +ELSE() + SET(ENABLE_PLUGIN_NFACCT False) +ENDIF() + +IF(XENSTAT_LIBRARIES) + SET(ENABLE_PLUGIN_XENSTAT True) +ELSE() + SET(ENABLE_PLUGIN_XENSTAT False) +ENDIF() + +IF(CUPS_LIBRARIES) + SET(ENABLE_PLUGIN_CUPS True) +ELSE() + SET(ENABLE_PLUGIN_CUPS False) +ENDIF() + + +# ----------------------------------------------------------------------------- +# netdatacli + +add_executable(netdatacli ${GENERATED_CONFIG_H} ${NETDATACLI_FILES}) +target_link_libraries (netdatacli libnetdata ${NETDATA_COMMON_LIBRARIES}) +target_include_directories(netdatacli PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) +target_include_directories(netdatacli BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) +target_compile_options(netdatacli PUBLIC ${NETDATA_COMMON_CFLAGS}) + + +# ----------------------------------------------------------------------------- +# apps.plugin + +IF(ENABLE_PLUGIN_APPS) + message(STATUS "apps.plugin: enabled") + add_executable(apps.plugin ${GENERATED_CONFIG_H} ${APPS_PLUGIN_FILES}) + target_link_libraries (apps.plugin libnetdata ${NETDATA_COMMON_LIBRARIES} ${CAP_LIBRARIES}) + target_include_directories(apps.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS} ${CAP_INCLUDE_DIRS}) + target_include_directories(apps.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(apps.plugin PUBLIC ${NETDATA_COMMON_CFLAGS} ${CAP_CFLAGS_OTHER}) +ELSE() + message(STATUS "apps.plugin: disabled") +ENDIF() + + +# ----------------------------------------------------------------------------- +# freeipmi.plugin + +IF(ENABLE_PLUGIN_FREEIPMI) + message(STATUS "freeipmi.plugin: enabled") + add_executable(freeipmi.plugin ${GENERATED_CONFIG_H} ${FREEIPMI_PLUGIN_FILES}) + target_link_libraries (freeipmi.plugin libnetdata ${NETDATA_COMMON_LIBRARIES} ${IPMI_LIBRARIES}) + target_include_directories(freeipmi.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS} ${IPMI_INCLUDE_DIRS}) + target_include_directories(freeipmi.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(freeipmi.plugin PUBLIC ${NETDATA_COMMON_CFLAGS} ${IPMI_CFLAGS_OTHER}) +ELSE() + message(STATUS "freeipmi.plugin: disabled (depends on libipmimonitoring)") +ENDIF() + + +# ----------------------------------------------------------------------------- +# nfacct.plugin + +IF(ENABLE_PLUGIN_NFACCT) + message(STATUS "nfacct.plugin: enabled") + add_executable(nfacct.plugin ${GENERATED_CONFIG_H} ${NFACCT_PLUGIN_FILES}) + target_link_libraries (nfacct.plugin libnetdata ${NETDATA_COMMON_LIBRARIES} ${MNL_LIBRARIES} ${NFACCT_LIBRARIES}) + target_include_directories(nfacct.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS} ${MNL_INCLUDE_DIRS} ${NFACCT_INCLUDE_DIRS}) + target_include_directories(nfacct.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(nfacct.plugin PUBLIC ${NETDATA_COMMON_CFLAGS} ${MNL_CFLAGS_OTHER} ${NFACCT_CFLAGS_OTHER}) +ELSE() + message(STATUS "nfacct.plugin: disabled (requires libmnl and libnetfilter_acct)") +ENDIF() + + +# ----------------------------------------------------------------------------- +# xenstat.plugin + +IF(ENABLE_PLUGIN_XENSTAT) + message(STATUS "xenstat.plugin: enabled") + add_executable(xenstat.plugin ${GENERATED_CONFIG_H} ${XENSTAT_PLUGIN_FILES}) + target_link_libraries (xenstat.plugin libnetdata ${NETDATA_COMMON_LIBRARIES} ${XENSTAT_LIBRARIES}) + target_include_directories(xenstat.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS} ${XENSTAT_INCLUDE_DIRS}) + target_include_directories(xenstat.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(xenstat.plugin PUBLIC ${NETDATA_COMMON_CFLAGS} ${XENSTAT_CFLAGS_OTHER}) +ELSE() + message(STATUS "xenstat.plugin: disabled (requires libxenstat)") +ENDIF() + + +# ----------------------------------------------------------------------------- +# perf.plugin + +IF(ENABLE_PLUGIN_PERF) + message(STATUS "perf.plugin: enabled") + add_executable(perf.plugin ${GENERATED_CONFIG_H} ${PERF_PLUGIN_FILES}) + target_link_libraries (perf.plugin libnetdata ${NETDATA_COMMON_LIBRARIES}) + target_include_directories(perf.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(perf.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(perf.plugin PUBLIC ${NETDATA_COMMON_CFLAGS}) +ELSE() + message(STATUS "perf.plugin: disabled") +ENDIF() + + +# ----------------------------------------------------------------------------- +# ebpf_process.plugin + +IF(ENABLE_PLUGIN_EBPF) + message(STATUS "ebpf.plugin: enabled") + add_executable(ebpf.plugin ${GENERATED_CONFIG_H} ${EBPF_PROCESS_PLUGIN_FILES}) + target_link_libraries (ebpf.plugin libnetdata ${NETDATA_COMMON_LIBRARIES}) + target_include_directories(ebpf.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(ebpf.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(ebpf.plugin PUBLIC ${NETDATA_COMMON_CFLAGS}) +ELSE() + message(STATUS "ebpf.plugin: disabled") +ENDIF() + + +# ----------------------------------------------------------------------------- +# slabinfo.plugin + +IF(ENABLE_PLUGIN_SLABINFO) + message(STATUS "slabinfo.plugin: enabled") + add_executable(slabinfo.plugin ${GENERATED_CONFIG_H} ${SLABINFO_PLUGIN_FILES}) + target_link_libraries (slabinfo.plugin libnetdata ${NETDATA_COMMON_LIBRARIES}) + target_include_directories(slabinfo.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(slabinfo.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(slabinfo.plugin PUBLIC ${NETDATA_COMMON_CFLAGS}) +ELSE() + message(STATUS "slabinfo.plugin: disabled") +ENDIF() + + +# ----------------------------------------------------------------------------- +# cups.plugin + +IF(ENABLE_PLUGIN_CUPS) + message(STATUS "cups.plugin: enabled") + add_executable(cups.plugin ${GENERATED_CONFIG_H} ${CUPS_PLUGIN_FILES}) + target_link_libraries (cups.plugin libnetdata ${NETDATA_COMMON_LIBRARIES} ${CUPS_LIBRARIES}) + target_include_directories(cups.plugin PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS} ${CUPS_INCLUDE_DIRS}) + target_include_directories(cups.plugin BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(cups.plugin PUBLIC ${NETDATA_COMMON_CFLAGS} ${CUPS_CFLAGS_OTHER}) +ELSE() + message(STATUS "cups.plugin: disabled") +ENDIF() + + +# ----------------------------------------------------------------------------- +# cgroup-network + +IF(ENABLE_PLUGIN_CGROUP_NETWORK) + message(STATUS "cgroup-network: enabled") + add_executable(cgroup-network ${GENERATED_CONFIG_H} ${CGROUP_NETWORK_FILES}) + target_link_libraries (cgroup-network libnetdata ${NETDATA_COMMON_LIBRARIES}) + target_include_directories(cgroup-network PUBLIC ${NETDATA_COMMON_INCLUDE_DIRS}) + target_include_directories(cgroup-network BEFORE PUBLIC ${GENERATED_CONFIG_H_DIR}) + target_compile_options(cgroup-network PUBLIC ${NETDATA_COMMON_CFLAGS}) +ELSE() + message(STATUS "cgroup-network: disabled (requires Linux)") +ENDIF() + +# ----------------------------------------------------------------------------- +# Unit tests + +if(UNIT_TESTING) + message(STATUS "Looking for CMocka which is required for unit testing") + find_package(CMocka REQUIRED) + include(CTest) + +if(BUILD_TESTING) + add_executable(str2ld_testdriver libnetdata/tests/test_str2ld.c) + target_link_libraries(str2ld_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) + add_test(NAME test_str2ld COMMAND str2ld_testdriver) + + add_executable(storage_number_testdriver libnetdata/storage_number/tests/test_storage_number.c) + target_link_libraries(storage_number_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) + add_test(NAME test_storage_number COMMAND storage_number_testdriver) + + set(EXPORTING_ENGINE_TEST_FILES + exporting/tests/test_exporting_engine.c + exporting/tests/test_exporting_engine.h + exporting/tests/exporting_fixtures.c + exporting/tests/exporting_doubles.c + exporting/tests/netdata_doubles.c + exporting/tests/system_doubles.c + database/rrdlabels.c + database/rrdvar.c + ) + set(TEST_NAME exporting_engine) + set(PROMETHEUS_REMOTE_WRITE_LINK_OPTIONS) + set(KINESIS_LINK_OPTIONS) + set(PUBSUB_LINK_OPTIONS) + set(MONGODB_LINK_OPTIONS) +if(ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE) + list(APPEND EXPORTING_ENGINE_FILES ${PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) + list( + APPEND PROMETHEUS_REMOTE_WRITE_LINK_OPTIONS + -Wl,--wrap=init_write_request + -Wl,--wrap=add_host_info + -Wl,--wrap=add_label + -Wl,--wrap=add_metric + ) +endif() +if(ENABLE_EXPORTING_KINESIS) + list(APPEND EXPORTING_ENGINE_FILES ${KINESIS_EXPORTING_FILES}) + list( + APPEND KINESIS_LINK_OPTIONS + -Wl,--wrap=aws_sdk_init + -Wl,--wrap=kinesis_init + -Wl,--wrap=kinesis_put_record + -Wl,--wrap=kinesis_get_result + ) +endif() +if(ENABLE_EXPORTING_PUBSUB) + list(APPEND EXPORTING_ENGINE_FILES ${PUBSUB_EXPORTING_FILES}) + list( + APPEND PUBSUB_LINK_OPTIONS + -Wl,--wrap=pubsub_init + -Wl,--wrap=pubsub_add_message + -Wl,--wrap=pubsub_publish + -Wl,--wrap=pubsub_get_result + ) +endif() +if(MONGOC_LIBRARIES) + list(APPEND EXPORTING_ENGINE_FILES ${MONGODB_EXPORTING_FILES}) + list( + APPEND MONGODB_LINK_OPTIONS + -Wl,--wrap=mongoc_init + -Wl,--wrap=mongoc_uri_new_with_error + -Wl,--wrap=mongoc_uri_get_option_as_int32 + -Wl,--wrap=mongoc_uri_set_option_as_int32 + -Wl,--wrap=mongoc_client_new_from_uri + -Wl,--wrap=mongoc_client_set_appname + -Wl,--wrap=mongoc_client_get_collection + -Wl,--wrap=mongoc_uri_destroy + -Wl,--wrap=mongoc_collection_insert_many + ) +endif() + add_executable(${TEST_NAME}_testdriver ${EXPORTING_ENGINE_TEST_FILES} ${EXPORTING_ENGINE_FILES}) + target_compile_options( + ${TEST_NAME}_testdriver + PRIVATE + -DUNIT_TESTING + ) + target_include_directories( + ${TEST_NAME}_testdriver + PUBLIC + ${CMAKE_CURRENT_BINARY_DIR} + ${NETDATA_COMMON_INCLUDE_DIRS} + ) + target_link_options( + ${TEST_NAME}_testdriver + PRIVATE + -Wl,--wrap=read_exporting_config + -Wl,--wrap=init_connectors + -Wl,--wrap=mark_scheduled_instances + -Wl,--wrap=rrdhost_is_exportable + -Wl,--wrap=rrdset_is_exportable + -Wl,--wrap=exporting_calculate_value_from_stored_data + -Wl,--wrap=prepare_buffers + -Wl,--wrap=send_internal_metrics + -Wl,--wrap=now_realtime_sec + -Wl,--wrap=uv_thread_set_name_np + -Wl,--wrap=uv_thread_create + -Wl,--wrap=uv_mutex_lock + -Wl,--wrap=uv_mutex_unlock + -Wl,--wrap=uv_cond_signal + -Wl,--wrap=uv_cond_wait + -Wl,--wrap=strdupz + -Wl,--wrap=info_int + -Wl,--wrap=recv + -Wl,--wrap=send + -Wl,--wrap=connect_to_one_of + -Wl,--wrap=create_main_rusage_chart + -Wl,--wrap=send_main_rusage + -Wl,--wrap=simple_connector_end_batch + ${PROMETHEUS_REMOTE_WRITE_LINK_OPTIONS} + ${KINESIS_LINK_OPTIONS} + ${PUBSUB_LINK_OPTIONS} + ${MONGODB_LINK_OPTIONS} + ) + target_link_libraries(${TEST_NAME}_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) + add_test(NAME test_${TEST_NAME} COMMAND ${TEST_NAME}_testdriver) + + set(WEB_API_TEST_FILES + web/api/tests/web_api.c + web/server/web_client.c + ) + add_executable(web_api_testdriver ${WEB_API_TEST_FILES}) + target_link_options( + web_api_testdriver + PRIVATE + -Wl,--wrap=rrdhost_find_by_hostname + -Wl,--wrap=finished_web_request_statistics + -Wl,--wrap=config_get + -Wl,--wrap=web_client_api_request_v1 + -Wl,--wrap=rrdhost_find_by_guid + -Wl,--wrap=rrdset_find_byname + -Wl,--wrap=sql_create_host_by_uuid + -Wl,--wrap=rrdset_find + -Wl,--wrap=rrdpush_receiver_thread_spawn + -Wl,--wrap=debug_int + -Wl,--wrap=error_int + -Wl,--wrap=info_int + -Wl,--wrap=fatal_int + ) + target_link_libraries(web_api_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) +# add_test(NAME test_web_api COMMAND web_api_testdriver) + + set(VALID_URLS_TEST_FILES + web/api/tests/valid_urls.c + web/server/web_client.c + ) + add_executable(valid_urls_testdriver ${VALID_URLS_TEST_FILES}) + target_link_options( + valid_urls_testdriver + PRIVATE + -Wl,--wrap=rrdhost_find_by_hostname + -Wl,--wrap=finished_web_request_statistics + -Wl,--wrap=config_get + -Wl,--wrap=web_client_api_request_v1 + -Wl,--wrap=rrdhost_find_by_guid + -Wl,--wrap=rrdset_find_byname + -Wl,--wrap=sql_create_host_by_uuid + -Wl,--wrap=rrdset_find + -Wl,--wrap=rrdpush_receiver_thread_spawn + -Wl,--wrap=debug_int + -Wl,--wrap=error_int + -Wl,--wrap=info_int + -Wl,--wrap=fatal_int + -Wl,--wrap=mysendfile + -DREMOVE_MYSENDFILE + ) + target_link_libraries(valid_urls_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) +# add_test(NAME test_valid_urls COMMAND valid_urls_testdriver) + + set(CGROUPS_TEST_FILES + collectors/cgroups.plugin/tests/test_cgroups_plugin.c + collectors/cgroups.plugin/tests/test_cgroups_plugin.h + collectors/cgroups.plugin/tests/test_doubles.c + database/rrdlabels.c + database/rrd.h + ) + add_executable(cgroups_testdriver ${CGROUPS_TEST_FILES} ${CGROUPS_PLUGIN_FILES}) + target_link_libraries(cgroups_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) + add_test(NAME test_cgroups COMMAND cgroups_testdriver) + + set_target_properties( + str2ld_testdriver + storage_number_testdriver + exporting_engine_testdriver + web_api_testdriver + valid_urls_testdriver + cgroups_testdriver + PROPERTIES RUNTIME_OUTPUT_DIRECTORY tests + ) + +endif() +endif() + + +# generate config.h so that CMake becomes independent of automake + +## netdata version +set(GIT_EXECUTABLE "git") +execute_process(COMMAND ${GIT_EXECUTABLE} describe OUTPUT_VARIABLE NETDATA_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) + +IF("${NETDATA_VERSION}" STREQUAL "") + file(STRINGS "packaging/version" NETDATA_VERSION LIMIT_COUNT 1) +ENDIF() + +set(NETDATA_USER "netdata" CACHE STRING "use this user to drop privileges") + +if(MAC_OS) + set(ENABLE_APPS_PLUGIN False) +else() + set(ENABLE_APPS_PLUGIN True) +endif() + +check_include_file(time.h HAVE_TIME_H) +check_include_file(unistd.h HAVE_UNISTD_H) +if (HAVE_TIME_H) + include(CheckStructHasMember) + check_struct_has_member("struct timespec" tv_sec "time.h" HAVE_STRUCT_TIMESPEC) +endif () + +check_include_file(sys/statfs.h HAVE_SYS_STATFS_H) +check_include_file(sys/statvfs.h HAVE_SYS_STATVFS_H) +check_include_file(inttypes.h HAVE_INTTYPES_H) +check_include_file(stdint.h HAVE_STDINT_H) + +include(CheckSymbolExists) +check_include_file(sys/mkdev.h HAVE_SYS_MKDEV_H) +if (HAVE_SYS_MKDEV_H) + check_symbol_exists(major "sys/mkdev.h" MAJOR_IN_MKDEV) +endif() +check_include_file(sys/sysmacros.h HAVE_SYS_SYSMACROS_H) +if (HAVE_SYS_SYSMACROS_H) + check_symbol_exists(major "sys/sysmacros.h" MAJOR_IN_SYSMACROS) +endif() + +if (CRYPTO_FOUND) + set(HAVE_CRYPTO True) + FIND_LIBRARY(CRYPTO_LIBRARY_LOCATION NAMES crypto) + check_library_exists(crypto X509_VERIFY_PARAM_set1_host ${CRYPTO_LIBRARY_LOCATION} HAVE_X509_VERIFY_PARAM_set1_host) + if (HAVE_X509_VERIFY_PARAM_set1_host) + set(HAVE_X509_VERIFY_PARAM_set1_host True) + endif() +endif() + +include(CheckCSourceCompiles) +check_c_source_compiles(" + #define _GNU_SOURCE + #include + int main() { char x = *strerror_r(0, &x, sizeof(x)); return 0; } + " STRERROR_R_CHAR_P) + +IF(${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + SET(LIKELY_MACRO "__builtin_expect(!!(x), 1)") + SET(UNLIKELY_MACRO "__builtin_expect(!!(x), 0)") +ELSE() + SET(LIKELY_MACRO "(x)") + SET(UNLIKELY_MACRO "(x)") +ENDIF() + +IF(${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + SET(ALWAYS_UNUSED_MACRO "__attribute__((unused))") + SET(MAYBE_UNUSED_MACRO "__attribute__((unused))") +ELSE() + SET(ALWAYS_UNUSED_MACRO "") + SET(MAYBE_UNUSED_MACRO "") +ENDIF() + +# ----------------------------------------------------------------------------- + +check_library_exists(c clock_gettime "" HAVE_CLOCK_GETTIME) + +IF(NOT HAVE_CLOCK_GETTIME) + CHECK_LIBRARY_EXISTS(rt clock_gettime "" HAVE_CLOCK_GETTIME) +ENDIF() + +IF(HAVE_CLOCK_GETTIME) + message("-- clock_gettime(): found") + set(NETDATA_REQUIRED_DEFINES "-DHAVE_CLOCK_GETTIME=1 ${NETDATA_REQUIRED_DEFINES}") +ELSE() + message("-- clock_gettime(): not found") +ENDIF() + +# ----------------------------------------------------------------------------- +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NETDATA_REQUIRED_DEFINES}") +message("CFLAGS=\"${CMAKE_C_FLAGS}\"") + +configure_file(config.cmake.h.in config.h) diff --git a/Dockerfile b/Dockerfile new file mode 120000 index 0000000..96ebbda --- /dev/null +++ b/Dockerfile @@ -0,0 +1 @@ +./packaging/docker/Dockerfile \ No newline at end of file diff --git a/Dockerfile.test b/Dockerfile.test new file mode 100644 index 0000000..ef087d8 --- /dev/null +++ b/Dockerfile.test @@ -0,0 +1,74 @@ +# NOTE: This Dockerfile (`Dockerfile.test`) should only be used for dev/testing +# and *NOT* in production. Use the top-level `Dockerfile` which points to +# `./packaging/docker/Dockerfile`. +# +# TODO: Create a netdata/package-builder:alpine9 +#FROM netdata/package-builder:alpine AS build +FROM alpine:3.9 AS build + +# Install Dependencies +RUN apk add --no-cache -U alpine-sdk bash curl libuv-dev zlib-dev \ + util-linux-dev libmnl-dev gcc make git autoconf \ + automake pkgconfig python logrotate openssl-dev \ + cmake + +# Pass optional ./netdata-installer.sh args with --build-arg INSTALLER_ARGS=... +ARG INSTALLER_ARGS="" + +# Copy Sources +# Can also bind-mount sources with: +# $ docker run -v $PWD:/netdata + +WORKDIR /netdata +COPY . . + +# Build +RUN ./netdata-installer.sh --dont-wait --dont-start-it --disable-go "${INSTALLER_ARGS}" + +FROM alpine:3.9 AS runtime + +# Install runtime dependencies +RUN apk --no-cache -U add curl bash libuv zlib util-linux libmnl python + +# Create netdata user/group +RUN addgroup -S netdata && \ + adduser -D -S -h /var/empty -s /bin/false -G netdata netdata + +# Copy binary from build layer +COPY --from=build /usr/sbin/netdata /usr/sbin/netdata + +# Copy configuration files from build layer +COPY --from=build /etc/netdata/ /etc/netdata/ +COPY --from=build /usr/lib/netdata/ /usr/lib/netdata/ + +# Copy assets from build layer +COPY --from=build /usr/share/netdata/ /usr/share/netdata/ + +# Create some directories netdata needs +RUN mkdir -p \ + /etc/netdata \ + /var/log/netdata \ + /var/lib/netdata \ + /var/cache/netdata \ + /usr/lib/netdata/conf.d \ + /usr/libexec/netdata/plugins.d + +# Fix permissions/ownerships +RUN chown -R netdata:netdata \ + /etc/netdata/ \ + /usr/lib/netdata/ \ + /usr/share/netdata/ \ + /var/log/netdata \ + /var/lib/netdata \ + /var/cache/netdata \ + /usr/libexec/netdata + +VOLUME /etc/netdata +VOLUME /var/lib/netdata +VOLUME /var/log/netdata + +EXPOSE 19999/tcp + +USER netdata + +CMD ["/usr/sbin/netdata", "-D"] diff --git a/HISTORICAL_CHANGELOG.md b/HISTORICAL_CHANGELOG.md new file mode 100644 index 0000000..16ef786 --- /dev/null +++ b/HISTORICAL_CHANGELOG.md @@ -0,0 +1,650 @@ +netdata (1.10.0) - 2018-03-27 + + Please check full changelog at github. + + +netdata (1.9.0) - 2017-12-17 + + Please check full changelog at github. + + +netdata (1.8.0) - 2017-09-17 + + This is mainly a bugfix release. + Please check full changelog at github. + +netdata (1.7.0) - 2017-07-16 + +- netdata is still spreading fast + + we are at 320.000 users and 132.000 servers + + Almost 100k new users, 52k new installations and 800k docker pulls + since the previous release, 4 and a half months ago. + + netdata user base grows at about 1000 new users and 600 new servers + per day. Thank you. You are awesome. + +- The next release (v1.8) will be focused on providing a global health + monitoring service, for all netdata users, for free. + +- netdata is now a (very fast) fully featured statsd server and the + only one with automatic visualization: push a statsd metric and hit + F5 on the netdata dashboard: your metric visualized. It also supports + synthetic charts, defined by you, so that you can correlate and + visualize your application the way you like it. + +- netdata got new installation options + It is now easier than ever to install netdata - we also distribute a + statically linked netdata x86_64 binary, including key dependencies + (like bash, curl, etc) that can run everywhere a Linux kernel runs + (CoreOS, CirrOS, etc). + +- metrics streaming and replication has been improved significantly. + All known issues have been solved and key enhancements have been added. + Headless collectors and proxies can now send metrics to backends when + data source = as collected. + +- backends have got quite a few enhancements, including host tags and + metrics filtering at the netdata side; + prometheus support has been re-written to utilize more prometheus + features and provide more flexibility and integration options. + +- netdata now monitors ZFS (on Linux and FreeBSD), ElasticSearch, + RabbitMQ, Go applications (via expvar), ipfw (on FreeBSD 11), samba, + squid logs (with web_log plugin). + +- netdata dashboard loading times have been improved significantly + (hit F5 a few times on a netdata dashboard - it is now amazingly fast), + to support dashboards with thousands of charts. + +- netdata alarms now support custom hooks, so you can run whatever you + like in parallel with netdata alarms. + +- As usual, this release brings dozens of more improvements, enhancements + and compatibility fixes. + +netdata (1.6.0) - 2017-03-20 + +- birthday release: 1 year netdata + + netdata was first published on March 30th, 2016. + It has been a crazy year since then: + + 225.000 unique netdata users + currently, at 1.000 new unique users per day + + 80.000 unique netdata installations + currently, at 500 new installation per day + + 610.000 docker pulls on docker hub + + 4.000.000 netdata sessions served + currently, at 15.000 sessions served per day + + 20.000 github stars + + ``` + Thank you! + You are awesome! + ``` + +- central netdata is here + + This is the first release that supports real-time streaming of + metrics between netdata servers. + + netdata can now be: + + - autonomous host monitoring + (like it always has been) + + - headless data collector + (collect and stream metrics in real-time to another netdata) + + - headless proxy + (collect metrics from multiple netdata and stream them to another netdata) + + - store and forward proxy + (like headless proxy, but with a local database) + + - central database + (metrics from multiple hosts are aggregated) + + metrics databases can be configured on all nodes and each node maintaining + a database may have a different retention policy and possibly run + (even different) alarms on them. + +- monitoring ephemeral nodes + + netdata now supports monitoring autoscaled ephemeral nodes, + that are started and stopped on demand (their IP is not known). + + When the ephemeral nodes start streaming metrics to the central + netdata, the central netdata will show register them at "my-netdata" + menu on the dashboard. + + For more information check: + + +- monitoring ephemeral containers and VM guests + + netdata now cleans up container, guest VM, network interfaces and mounted + disk metrics, disabling automatically their alarms too. + + For more information check: + + +- apps.plugin ported for FreeBSD + + @vlvkobal has ported "apps.plugin" to FreeBSD. netdata can now provide + "Applications", "Users" and "User Groups" on FreeBSD. + +- web_log plugin + + @l2isbad has done a wonderful job creating a unified web log parsing plugin + for all kinds of web server logs. With it, netdata provides real-time + performance information and health monitoring alarms for web applications + and web sites! + + For more information check: + + +- backends + + netdata can now archive metrics to `JSON` backends + (both push, by @lfdominguez, and pull modes). + +- IPMI monitoring + + netdata now has an IPMI plugin (based on freeipmi) + for monitoring server hardware. + + The plugin creates (up to) 8 charts: + + 1. number of sensors by state + 2. number of events in SEL + 3. Temperatures CELSIUS + 4. Temperatures FAHRENHEIT + 5. Voltages + 6. Currents + 7. Power + 8. Fans + + It also supports alarms (including the number of sensors in critical state). + + For more information, check: + + +- new plugins + + @l2isbad builds python data collection plugins for netdata at an wonderful + rate! He rocks! + + - **web_log** for monitoring in real-time all kinds of web server log files @l2isbad + - **freeipmi** for monitoring IPMI (server hardware) + - **nsd** (the [name server daemon](https://www.nlnetlabs.nl/projects/nsd/)) @383c57 + - **mongodb** @l2isbad + - **smartd_log** (monitoring disk S.M.A.R.T. values) @l2isbad + +- improved plugins + + - **nfacct** reworked and now collects connection tracker information using netlink. + - **ElasticSearch** re-worked @l2isbad + - **mysql** re-worked to allow faster development of custom mysql based plugins (MySQLService) @l2isbad + - **SNMP** + - **tomcat** @NMcCloud + - **ap** (monitoring hostapd access points) + - **php_fpm** @l2isbad + - **postgres** @l2isbad + - **isc_dhcpd** @l2isbad + - **bind_rndc** @l2isbad + - **numa** + - **apps.plugin** improvements and freebsd support @vlvkobal + - **fail2ban** @l2isbad + - **freeradius** @l2isbad + - **nut** (monitoring UPSes) + - **tc** (Linux QoS) now works on qdiscs instead of classes for the same result (a lot faster) @t-h-e + - **varnish** @l2isbad + +- new and improved alarms + - **web_log**, many alarms to detect common web site/API issues + - **fping**, alarms to detect packet loss, disconnects and unusually high latency + - **cpu**, cpu utilization alarm now ignores `nice` + +- new and improved alarm notification methods + - **HipChat** to allow hosted HipChat @frei-style + - **discordapp** @lowfive + +- dashboard improvements + - dashboard now works on HiDPi screens + - dashboard now shows version of netdata + - dashboard now resets charts properly + - dashboard updated to use latest gauge.js release + +- other improvements + - thanks to @rlefevre netdata now uses a lot of different high resolution system clocks. + + netdata has received a lot more improvements from many more contributors! + + Thank you all! + +netdata (1.5.0) - 2017-01-22 + +- yet another release that makes netdata the fastest + netdata ever! + +- netdata runs on FreeBSD, FreeNAS and MacOS ! + + Vladimir Kobal (@vlvkobal) has done a magnificent work + porting netdata to FreeBSD and MacOS. + + Everything works: cpu, memory, disks performance, disks space, + network interfaces, interrupts, IPv4 metrics, IPv6 metrics + processes, context switches, softnet, IPC queues, + IPC semaphores, IPC shared memory, uptime, etc. Wow! + +- netdata supports data archiving to backend databases: + + - Graphite + - OpenTSDB + - Prometheus + + and of course all the compatible ones + (KairosDB, InfluxDB, Blueflood, etc) + +- new plugins: + + Ilya Mashchenko (@l2isbad) has created most of the python + data collection plugins in this release ! + + - systemd Services (using cgroups!) + - FPing (yes, network latency in netdata!) + - postgres databases @facetoe, @moumoul + - Vanish disk cache (v3 and v4) @l2isbad + - ElasticSearch @l2isbad + - HAproxy @l2isbad + - FreeRadius @l2isbad, @lgz + - mdstat (RAID) @l2isbad + - ISC bind (via rndc) @l2isbad + - ISC dhcpd @l2isbad, @lgz + - Fail2Ban @l2isbad + - OpenVPN status log @l2isbad, @lgz + - NUMA memory @tycho + - CPU Idle @tycho + - gunicorn log @deltaskelta + - ECC memory hardware errors + - IPC semaphores + - uptime plugin (with a nice badge too) + +- improved plugins: + + - netfilter conntrack + - mysql (replication) @l2isbad + - ipfs @pjz + - cpufreq @tycho + - hddtemp @l2isbad + - sensors @l2isbad + - nginx @leolovenet + - nginx_log @paulfantom + - phpfpm @leolovenet + - redis @leolovenet + - dovecot @justohall + - cgroups + - disk space + - apps.plugin + - /proc/interrupts @rlefevre + - /proc/softirqs @rlefevre + - /proc/vmstat (system memory charts) + - /proc/net/snmp6 (IPv6 charts) + - /proc/self/meminfo (system memory charts) + - /proc/net/dev (network interfaces) + - tc (linux QoS) + +- new/improved alarms: + + - MySQL / MariaDB alarms (incl. replication) + - IPFS alarms + - HAproxy alarms + - UDP buffer alarms + - TCP AttemptFails + - ECC memory alarms + - netfilter connections alarms + - SNMP + +- new alarm notifications: + + - messagebird.com @tech-no-logical + - pagerduty.com @jimcooley + - pushbullet.com @tperalta82 + - twilio.com @shadycuz + - HipChat + - kafka + +- shell integration + + - shell scripts can now query netdata easily! + +- dashboard improvements: + - dashboard is now faster on firefox, safari, opera, edge + (edge is still the slowest) + - dashboard now has a little bigger fonts + - SHIFT + mouse wheel to zoom charts, works on all browsers + - perfect-scrollbar on the dashboard + - dashboard 4K resolution fixes + - dashboard compatibility fixes for embedding charts in + third party web sites + - charts on custom dashboards can have common min/max + even if they come from different netdata servers + - alarm log is now saved and loaded back so that + the alarm history is available at the dashboard + +- other improvements: + - python.d.plugin has received way to many improvements + from many contributors! + - charts.d.plugin can now be forked to support + multiple independent instances + - registry has been re-factored to lower its memory + requirements (required for the public registry) + - simple patterns in cgroups, disks and alarms + - netdata-installer.sh can now correctly install + netdata in containers + - supplied logrotate script compatibility fixes + - spec cleanup @breed808 + - clocks and timers reworked @rlefevre + + netdata has received a lot more improvements from many more + contributors! + + Thank you all guys! + +netdata (1.4.0) - 2016-10-04 + + At a glance: + +- the fastest netdata ever (with a better look too)! + +- improved IoT and containers support! + +- alarms improved in almost every way! + +- new plugins: + softnet netdev, + extended TCP metrics, + UDPLite + NFS v2, v3 client (server was there already), + NFS v4 server & client, + APCUPSd, + RetroShare + +- improved plugins: + mysql, + cgroups, + hddtemp, + sensors, + phpfpm, + tc (QoS) + + In detail: + +- improved alarms + + Many new alarms have been added to detect common kernel + configuration errors and old alarms have been re-worked + to avoid notification floods. + + Alarms now support notification hysteresis (both static + and dynamic), notification self-cancellation, dynamic + thresholds based on current alarm status + +- improved alarm notifications + + netdata now supports: + + - email notifications + - slack.com notifications on slack channels + - pushover.net notifications (mobile push notifications) + - telegram.org notifications + + For all the above methods, netdata supports role-based + notifications, with multiple recipients for each role + and severity filtering per recipient! + + Also, netdata support HTML5 notifications, while the + dashboard is open in a browser window (no need to be + the active one). + + All notifications are now clickable to get to the chart + that raised the alarm. + +- improved IoT support! + + netdata builds and runs with musl libc and runs on systems + based on busybox. + +- improved containers support! + + netdata runs on alpine linux (a low profile linux distribution + used in containers). + +- Dozens of other improvements and bugfixes + +netdata (1.3.0) - 2016-08-28 + + At a glance: + +- netdata has health monitoring / alarms! +- netdata has badges that can be embeded anywhere! +- netdata plugins are now written in Python! +- new plugins: redis, memcached, nginx_log, ipfs, apache_cache + + IMPORTANT: + Since netdata now uses Python plugins, new packages are + required to be installed on a system to allow it work. + For more information, please check the installation page: + + + + In detail: + +- netdata has alarms! + + Based on the POLL we made on github + (), + health monitoring was the winner. So here it is! + + netdata now has a powerful health monitoring system embedded. + Please check the wiki page: + + + +- netdata has badges! + + netdata can generate badges with live information from the + collected metrics. + Please check the wiki page: + + + +- netdata plugins are now written in Python! + + Thanks to the great work of Paweł Krupa (@paulfantom), most BASH + plugins have been ported to Python. + + The new python.d.plugin supports both python2 and python3 and + data collection from multiple sources for all modules. + + The following pre-existing modules have been ported to Python: + + - apache + - cpufreq + - example + - exim + - hddtemp + - mysql + - nginx + - phpfpm + - postfix + - sensors + - squid + - tomcat + + The following new modules have been added: + + - apache_cache + - dovecot + - ipfs + - memcached + - nginx_log + - redis + +- other data collectors: + + - Thanks to @simonnagl netdata now reports disk space usage. + +- dashboards now transfer a certain settings from server to server + when changing servers via the my-netdata menu. + + The settings transferred are the dashboard theme, the online + help status and current pan and zoom timeframe of the dashboard. + +- API improvements: + + - reduction functions now support 'min', 'sum' and 'incremental-sum'. + + - netdata now offers a multi-threaded and a single threaded + web server (single threaded is better for IoT). + +- apps.plugin improvements: + + - can now run with command line argument 'without-files' + to prevent it from enumerating all the open files/sockets/pipes + of all running processes. + + - apps.plugin now scales the collected values to match the + the total system usage. + + - apps.plugin can now report guest CPU usage per process. + + - repeating errors are now logged once per process. + +- netdata now runs with IDLE process priority (lower than nice 19) + +- netdata now instructs the kernel to kill it first when it starves + for memory. + +- netdata listens for signals: + + - SIGHUP to netdata instructs it to re-open its log files + (new logrotate files added too). + + - SIGUSR1 to netdata saves the database + + - SIGUSR2 to netdata reloads health / alarms configuration + +- netdata can now bind to multiple IPs and ports. + +- netdata now has new systemd service file (it starts as user + netdata and does not fork). + +- Dozens of other improvements and bugfixes + +netdata (1.2.0) - 2016-05-16 + + At a glance: + +- netdata is now 30% faster +- netdata now has a registry (my-netdata dashboard menu) +- netdata now monitors Linux Containers (docker, lxc, etc) + + IMPORTANT: + This version requires libuuid. The package you need is: + +- uuid-dev (debian/ubuntu), or +- libuuid-devel (centos/fedora/redhat) + + In detail: + +- netdata is now 30% faster ! + + - Patches submitted by @fredericopissarra improved overall + netdata performance by 10%. + + - A new improved search function in the internal indexes + made all searches faster by 50%, resulting in about + 20% better performance for the core of netdata. + + - More efficient threads locking in key components + contributed to the overall efficiency. + +- netdata now has a CENTRAL REGISTRY ! + + The central registry tracks all your netdata servers + and bookmarks them for you at the 'my-netdata' menu + on all dashboards. + + Every netdata can act as a registry, but there is also + a global registry provided for free for all netdata users! + +- netdata now monitors CONTAINERS ! + + docker, lxc, or anything else. For each container it monitors + CPU, RAM, DISK I/O (network interfaces were already monitored) + +- apps.plugin: now uses linux capabilities by default + without setuid to root + +- netdata has now an improved signal handler + thanks to @simonnagl + +- API: new improved CORS support + +- SNMP: counter64 support fixed + +- MYSQL: more charts, about QCache, MyISAM key cache, + InnoDB buffer pools, open files + +- DISK charts now show mount point when available + +- Dashboard: improved support for older web browsers + and mobile web browsers (thanks to @simonnagl) + +- Multi-server dashboards now allow de-coupled refreshes for + each chart, so that if one netdata has a network latency + the other charts are not affected + +- Several other minor improvements and bugfixes + +netdata (1.1.0) - 2016-04-20 + + Dozens of commits that improve netdata in several ways: + +- Data collection: added IPv6 monitoring +- Data collection: added SYNPROXY DDoS protection monitoring +- Data collection: apps.plugin: added charts for users and user groups +- Data collection: apps.plugin: grouping of processes now support patterns +- Data collection: apps.plugin: now it is faster, after the new features added +- Data collection: better auto-detection of partitions for disk monitoring +- Data collection: better fireqos integration for QoS monitoring +- Data collection: squid monitoring now uses squidclient +- Data collection: SNMP monitoring now supports 64bit counters +- API: fixed issues in CSV output generation +- API: netdata can now be restricted to listen on a specific IP +- Core and apps.plugin: error log flood protection +- Dashboard: better error handling when the netdata server is unreachable +- Dashboard: each chart now has a toolbox +- Dashboard: on-line help support +- Dashboard: check for netdata updates button +- Dashboard: added example /tv.html dashboard +- Packaging: now compiles with musl libc (alpine linux) +- Packaging: added debian packaging +- Packaging: support non-root installations +- Packaging: the installer generates uninstall script + +netdata (1.0.0) - 2016-03-22 + +- first public release + +netdata (1.0.0-rc.1) - 2015-11-28 + +- initial packaging diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1351f44 --- /dev/null +++ b/LICENSE @@ -0,0 +1,684 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. + +--------------------------------------------------------------------------- + +Note: +Individual files contain the following tag instead of the full license text. + + SPDX-License-Identifier: GPL-3.0-or-later + +This enables machine processing of license information based on the SPDX +License Identifiers that are here available: http://spdx.org/licenses/ diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..5e16052 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,1317 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = foreign subdir-objects 1.11 +ACLOCAL_AMFLAGS = -I build/m4 + +nodist_netdata_SOURCES=$(NULL) +BUILT_SOURCES=$(NULL) + +MAINTAINERCLEANFILES = \ + config.log config.status \ + $(srcdir)/Makefile.in \ + $(srcdir)/config.h.in $(srcdir)/config.h.in~ $(srcdir)/configure \ + $(srcdir)/install-sh $(srcdir)/ltmain.sh $(srcdir)/missing \ + $(srcdir)/compile $(srcdir)/depcomp $(srcdir)/aclocal.m4 \ + $(srcdir)/config.guess $(srcdir)/config.sub \ + $(srcdir)/m4/ltsugar.m4 $(srcdir)/m4/libtool.m4 \ + $(srcdir)/m4/ltversion.m4 $(srcdir)/m4/lt~obsolete.m4 \ + $(srcdir)/m4/ltoptions.m4 \ + $(srcdir)/pkcs11-helper.spec $(srcdir)/config-w32-vc.h + +CLEANFILES = \ + $(srcdir)/$(distdir).tar.gz + +EXTRA_DIST = \ + .gitignore \ + .csslintrc \ + .eslintignore \ + .eslintrc \ + .github/CODEOWNERS \ + build/m4/jemalloc.m4 \ + build/m4/ax_check_enable_debug.m4 \ + build/m4/ax_c_mallinfo.m4 \ + build/m4/ax_gcc_func_attribute.m4 \ + build/m4/ax_check_compile_flag.m4 \ + build/m4/ax_c_statement_expressions.m4 \ + build/m4/ax_pthread.m4 \ + build/m4/ax_c_lto.m4 \ + build/m4/ax_c_mallopt.m4 \ + build/m4/tcmalloc.m4 \ + build/m4/ax_c__generic.m4 \ + ml/dlib \ + README.md \ + LICENSE \ + REDISTRIBUTED.md \ + $(NULL) + +SUBDIRS = \ + diagrams \ + system \ + tests \ + $(NULL) + +dist_noinst_DATA = \ + CHANGELOG.md \ + cppcheck.sh \ + configs.signatures \ + contrib \ + docs \ + mqtt_websockets \ + netdata.cppcheck \ + netdata.spec \ + packaging/bundle-ebpf.sh \ + packaging/bundle-ebpf-co-re.sh \ + packaging/bundle-libbpf.sh \ + packaging/check-kernel-config.sh \ + packaging/ebpf.checksums \ + packaging/ebpf.version \ + packaging/ebpf-co-re.checksums \ + packaging/ebpf-co-re.version \ + packaging/go.d.checksums \ + packaging/go.d.version \ + packaging/installer/README.md \ + packaging/installer/UNINSTALL.md \ + packaging/installer/UPDATE.md \ + packaging/jsonc.checksums \ + packaging/jsonc.version \ + packaging/libbpf.checksums \ + packaging/libbpf.version \ + packaging/protobuf.checksums \ + packaging/protobuf.version \ + packaging/version \ + $(NULL) + +# until integrated within build +# should be proper init.d/openrc/systemd usable +dist_noinst_SCRIPTS = \ + coverity-scan.sh \ + packaging/installer/netdata-updater.sh \ + packaging/installer/netdata-uninstaller.sh \ + packaging/installer/kickstart.sh \ + packaging/installer/kickstart-static64.sh \ + packaging/installer/functions.sh \ + netdata-installer.sh + $(NULL) + +# ----------------------------------------------------------------------------- +# Compile netdata binaries + +SUBDIRS += \ + collectors \ + daemon \ + database \ + exporting \ + health \ + libnetdata \ + registry \ + streaming \ + web \ + claim \ + parser \ + spawn \ + $(NULL) + +AM_CFLAGS = \ + $(OPTIONAL_MATH_CFLAGS) \ + $(OPTIONAL_NFACCT_CFLAGS) \ + $(OPTIONAL_ZLIB_CFLAGS) \ + $(OPTIONAL_UUID_CFLAGS) \ + $(OPTIONAL_MQTT_CFLAGS) \ + $(OPTIONAL_LIBCAP_LIBS) \ + $(OPTIONAL_IPMIMONITORING_CFLAGS) \ + $(OPTIONAL_CUPS_CFLAGS) \ + $(OPTIONAL_XENSTAT_CFLAGS) \ + $(OPTIONAL_BPF_CFLAGS) \ + $(NULL) + +sbin_PROGRAMS = +plugins_PROGRAMS = + +LIBNETDATA_FILES = \ + libnetdata/adaptive_resortable_list/adaptive_resortable_list.c \ + libnetdata/adaptive_resortable_list/adaptive_resortable_list.h \ + libnetdata/config/appconfig.c \ + libnetdata/config/appconfig.h \ + libnetdata/arrayalloc/arrayalloc.c \ + libnetdata/arrayalloc/arrayalloc.h \ + libnetdata/avl/avl.c \ + libnetdata/avl/avl.h \ + libnetdata/buffer/buffer.c \ + libnetdata/buffer/buffer.h \ + libnetdata/circular_buffer/circular_buffer.c \ + libnetdata/circular_buffer/circular_buffer.h \ + libnetdata/clocks/clocks.c \ + libnetdata/clocks/clocks.h \ + libnetdata/completion/completion.c \ + libnetdata/completion/completion.h \ + libnetdata/dictionary/dictionary.c \ + libnetdata/dictionary/dictionary.h \ + libnetdata/eval/eval.c \ + libnetdata/eval/eval.h \ + libnetdata/inlined.h \ + libnetdata/libnetdata.c \ + libnetdata/libnetdata.h \ + libnetdata/required_dummies.h \ + libnetdata/locks/locks.c \ + libnetdata/locks/locks.h \ + libnetdata/log/log.c \ + libnetdata/log/log.h \ + libnetdata/onewayalloc/onewayalloc.c \ + libnetdata/onewayalloc/onewayalloc.h \ + libnetdata/popen/popen.c \ + libnetdata/popen/popen.h \ + libnetdata/procfile/procfile.c \ + libnetdata/procfile/procfile.h \ + libnetdata/os.c \ + libnetdata/os.h \ + libnetdata/simple_pattern/simple_pattern.c \ + libnetdata/simple_pattern/simple_pattern.h \ + libnetdata/socket/socket.c \ + libnetdata/socket/socket.h \ + libnetdata/socket/security.c \ + libnetdata/socket/security.h \ + libnetdata/statistical/statistical.c \ + libnetdata/statistical/statistical.h \ + libnetdata/string/string.c \ + libnetdata/string/string.h \ + libnetdata/storage_number/storage_number.c \ + libnetdata/storage_number/storage_number.h \ + libnetdata/threads/threads.c \ + libnetdata/threads/threads.h \ + libnetdata/url/url.c \ + libnetdata/url/url.h \ + libnetdata/json/json.c \ + libnetdata/json/json.h \ + libnetdata/json/jsmn.c \ + libnetdata/json/jsmn.h \ + libnetdata/health/health.c \ + libnetdata/health/health.h \ + libnetdata/string/utf8.h \ + libnetdata/worker_utilization/worker_utilization.c \ + libnetdata/worker_utilization/worker_utilization.h \ + $(NULL) + +if ENABLE_PLUGIN_EBPF + LIBNETDATA_FILES += \ + libnetdata/ebpf/ebpf.c \ + libnetdata/ebpf/ebpf.h \ + $(NULL) +endif + +APPS_PLUGIN_FILES = \ + collectors/apps.plugin/apps_plugin.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +FREEBSD_PLUGIN_FILES = \ + collectors/freebsd.plugin/plugin_freebsd.c \ + collectors/freebsd.plugin/plugin_freebsd.h \ + collectors/freebsd.plugin/freebsd_sysctl.c \ + collectors/freebsd.plugin/freebsd_getmntinfo.c \ + collectors/freebsd.plugin/freebsd_getifaddrs.c \ + collectors/freebsd.plugin/freebsd_devstat.c \ + collectors/freebsd.plugin/freebsd_kstat_zfs.c \ + collectors/freebsd.plugin/freebsd_ipfw.c \ + collectors/proc.plugin/zfs_common.c \ + collectors/proc.plugin/zfs_common.h \ + $(NULL) + +HEALTH_PLUGIN_FILES = \ + health/health.c \ + health/health.h \ + health/health_config.c \ + health/health_json.c \ + health/health_log.c \ + $(NULL) + +ML_FILES = \ + ml/ml.h \ + ml/ml-dummy.c \ + $(NULL) + +if ENABLE_ML + +ML_FILES += \ + ml/ADCharts.h \ + ml/ADCharts.cc \ + ml/Config.h \ + ml/Config.cc \ + ml/Dimension.cc \ + ml/Dimension.h \ + ml/Host.h \ + ml/Host.cc \ + ml/Query.h \ + ml/KMeans.h \ + ml/KMeans.cc \ + ml/SamplesBuffer.h \ + ml/SamplesBuffer.cc \ + ml/dlib/dlib/all/source.cpp \ + ml/json/single_include/nlohmann/json.hpp \ + ml/ml.cc \ + ml/ml-private.h \ + $(NULL) + +# Disable warnings from dlib library +ml/dlib/dlib/all/source.$(OBJEXT) : CXXFLAGS += -Wno-sign-compare -Wno-type-limits -Wno-aggressive-loop-optimizations -Wno-stringop-overflow -Wno-psabi + +# Disable ml warnings +ml/Dimension.$(OBJEXT) : CXXFLAGS += -Wno-psabi +ml/Host.$(OBJEXT) : CXXFLAGS += -Wno-psabi +ml/KMeans.$(OBJEXT) : CXXFLAGS += -Wno-psabi +ml/ml.$(OBJEXT) : CXXFLAGS += -Wno-psabi + +endif + + +if ENABLE_ML_TESTS +ML_TESTS_FILES = \ + ml/SamplesBufferTests.cc \ + $(NULL) +endif + +IDLEJITTER_PLUGIN_FILES = \ + collectors/idlejitter.plugin/plugin_idlejitter.c \ + $(NULL) + +CGROUPS_PLUGIN_FILES = \ + collectors/cgroups.plugin/sys_fs_cgroup.c \ + collectors/cgroups.plugin/sys_fs_cgroup.h \ + $(NULL) + +CGROUP_NETWORK_FILES = \ + collectors/cgroups.plugin/cgroup-network.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +DISKSPACE_PLUGIN_FILES = \ + collectors/diskspace.plugin/plugin_diskspace.c \ + $(NULL) + +TIMEX_PLUGIN_FILES = \ + collectors/timex.plugin/plugin_timex.c \ + $(NULL) + +FREEIPMI_PLUGIN_FILES = \ + collectors/freeipmi.plugin/freeipmi_plugin.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +CUPS_PLUGIN_FILES = \ + collectors/cups.plugin/cups_plugin.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +NFACCT_PLUGIN_FILES = \ + collectors/nfacct.plugin/plugin_nfacct.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +SLABINFO_PLUGIN_FILES = \ + collectors/slabinfo.plugin/slabinfo.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +XENSTAT_PLUGIN_FILES = \ + collectors/xenstat.plugin/xenstat_plugin.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +PERF_PLUGIN_FILES = \ + collectors/perf.plugin/perf_plugin.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + +EBPF_PLUGIN_FILES = \ + collectors/ebpf.plugin/ebpf.c \ + collectors/ebpf.plugin/ebpf_cachestat.c \ + collectors/ebpf.plugin/ebpf_cachestat.h \ + collectors/ebpf.plugin/ebpf_dcstat.c \ + collectors/ebpf.plugin/ebpf_dcstat.h \ + collectors/ebpf.plugin/ebpf_disk.c \ + collectors/ebpf.plugin/ebpf_disk.h \ + collectors/ebpf.plugin/ebpf_fd.c \ + collectors/ebpf.plugin/ebpf_fd.h \ + collectors/ebpf.plugin/ebpf_filesystem.c \ + collectors/ebpf.plugin/ebpf_filesystem.h \ + collectors/ebpf.plugin/ebpf_hardirq.c \ + collectors/ebpf.plugin/ebpf_hardirq.h \ + collectors/ebpf.plugin/ebpf_mdflush.c \ + collectors/ebpf.plugin/ebpf_mdflush.h \ + collectors/ebpf.plugin/ebpf_mount.c \ + collectors/ebpf.plugin/ebpf_mount.h \ + collectors/ebpf.plugin/ebpf_oomkill.c \ + collectors/ebpf.plugin/ebpf_oomkill.h \ + collectors/ebpf.plugin/ebpf_process.c \ + collectors/ebpf.plugin/ebpf_process.h \ + collectors/ebpf.plugin/ebpf_shm.c \ + collectors/ebpf.plugin/ebpf_shm.h \ + collectors/ebpf.plugin/ebpf_socket.c \ + collectors/ebpf.plugin/ebpf_socket.h \ + collectors/ebpf.plugin/ebpf_softirq.c \ + collectors/ebpf.plugin/ebpf_softirq.h \ + collectors/ebpf.plugin/ebpf_sync.c \ + collectors/ebpf.plugin/ebpf_sync.h \ + collectors/ebpf.plugin/ebpf_swap.c \ + collectors/ebpf.plugin/ebpf_swap.h \ + collectors/ebpf.plugin/ebpf_vfs.c \ + collectors/ebpf.plugin/ebpf_vfs.h \ + collectors/ebpf.plugin/ebpf.h \ + collectors/ebpf.plugin/ebpf_apps.c \ + collectors/ebpf.plugin/ebpf_apps.h \ + collectors/ebpf.plugin/ebpf_cgroup.c \ + collectors/ebpf.plugin/ebpf_cgroup.h \ + $(LIBNETDATA_FILES) \ + $(NULL) + +PROC_PLUGIN_FILES = \ + collectors/proc.plugin/ipc.c \ + collectors/proc.plugin/plugin_proc.c \ + collectors/proc.plugin/plugin_proc.h \ + collectors/proc.plugin/proc_diskstats.c \ + collectors/proc.plugin/proc_mdstat.c \ + collectors/proc.plugin/proc_interrupts.c \ + collectors/proc.plugin/proc_softirqs.c \ + collectors/proc.plugin/proc_loadavg.c \ + collectors/proc.plugin/proc_meminfo.c \ + collectors/proc.plugin/proc_pagetypeinfo.c \ + collectors/proc.plugin/proc_pressure.c \ + collectors/proc.plugin/proc_pressure.h \ + collectors/proc.plugin/proc_net_dev.c \ + collectors/proc.plugin/proc_net_wireless.c \ + collectors/proc.plugin/proc_net_ip_vs_stats.c \ + collectors/proc.plugin/proc_net_netstat.c \ + collectors/proc.plugin/proc_net_rpc_nfs.c \ + collectors/proc.plugin/proc_net_rpc_nfsd.c \ + collectors/proc.plugin/proc_net_sctp_snmp.c \ + collectors/proc.plugin/proc_net_sockstat.c \ + collectors/proc.plugin/proc_net_sockstat6.c \ + collectors/proc.plugin/proc_net_softnet_stat.c \ + collectors/proc.plugin/proc_net_stat_conntrack.c \ + collectors/proc.plugin/proc_net_stat_synproxy.c \ + collectors/proc.plugin/proc_self_mountinfo.c \ + collectors/proc.plugin/proc_self_mountinfo.h \ + collectors/proc.plugin/zfs_common.c \ + collectors/proc.plugin/zfs_common.h \ + collectors/proc.plugin/proc_spl_kstat_zfs.c \ + collectors/proc.plugin/proc_stat.c \ + collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c \ + collectors/proc.plugin/proc_vmstat.c \ + collectors/proc.plugin/proc_uptime.c \ + collectors/proc.plugin/sys_kernel_mm_ksm.c \ + collectors/proc.plugin/sys_block_zram.c \ + collectors/proc.plugin/sys_devices_system_edac_mc.c \ + collectors/proc.plugin/sys_devices_system_node.c \ + collectors/proc.plugin/sys_fs_btrfs.c \ + collectors/proc.plugin/sys_class_power_supply.c \ + collectors/proc.plugin/sys_class_infiniband.c \ + $(NULL) + +TC_PLUGIN_FILES = \ + collectors/tc.plugin/plugin_tc.c \ + $(NULL) + +MACOS_PLUGIN_FILES = \ + collectors/macos.plugin/plugin_macos.c \ + collectors/macos.plugin/plugin_macos.h \ + collectors/macos.plugin/macos_sysctl.c \ + collectors/macos.plugin/macos_mach_smi.c \ + collectors/macos.plugin/macos_fw.c \ + $(NULL) + +PLUGINSD_PLUGIN_FILES = \ + collectors/plugins.d/plugins_d.c \ + collectors/plugins.d/plugins_d.h \ + collectors/plugins.d/pluginsd_parser.c \ + collectors/plugins.d/pluginsd_parser.h \ + $(NULL) + +RRD_PLUGIN_FILES = \ + database/rrdcalc.c \ + database/rrdcalc.h \ + database/rrdcalctemplate.c \ + database/rrdcalctemplate.h \ + database/rrdcontext.c \ + database/rrdcontext.h \ + database/rrddim.c \ + database/rrddimvar.c \ + database/rrddimvar.h \ + database/rrdfamily.c \ + database/rrdhost.c \ + database/rrdlabels.c \ + database/rrd.c \ + database/rrd.h \ + database/rrdset.c \ + database/rrdfunctions.c \ + database/rrdfunctions.h \ + database/rrdsetvar.c \ + database/rrdsetvar.h \ + database/rrdvar.c \ + database/rrdvar.h \ + database/storage_engine.c \ + database/storage_engine.h \ + database/ram/rrddim_mem.c \ + database/ram/rrddim_mem.h \ + database/sqlite/sqlite_functions.c \ + database/sqlite/sqlite_functions.h \ + database/sqlite/sqlite_context.c \ + database/sqlite/sqlite_context.h \ + database/sqlite/sqlite_db_migration.c \ + database/sqlite/sqlite_db_migration.h \ + database/sqlite/sqlite_aclk.c \ + database/sqlite/sqlite_aclk.h \ + database/sqlite/sqlite_metadata.c \ + database/sqlite/sqlite_metadata.h \ + database/sqlite/sqlite_health.c \ + database/sqlite/sqlite_health.h \ + database/sqlite/sqlite_aclk_node.c \ + database/sqlite/sqlite_aclk_node.h \ + database/sqlite/sqlite_aclk_alert.c \ + database/sqlite/sqlite_aclk_alert.h \ + database/sqlite/sqlite3.c \ + database/sqlite/sqlite3.h \ + database/KolmogorovSmirnovDist.c \ + database/KolmogorovSmirnovDist.h \ + $(NULL) + +database/sqlite/sqlite3.$(OBJEXT) : CFLAGS += -Wno-cast-function-type +database/KolmogorovSmirnovDist.$(OBJEXT) : CFLAGS += -Wno-maybe-uninitialized + +noinst_LIBRARIES = libjudy.a + +libjudy_a_SOURCES = libnetdata/libjudy/src/Judy.h \ + libnetdata/libjudy/src/JudyCommon/JudyMalloc.c \ + libnetdata/libjudy/src/JudyCommon/JudyPrivate.h \ + libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h \ + libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h \ + libnetdata/libjudy/src/JudyL/JudyL.h \ + libnetdata/libjudy/src/JudyL/JudyLByCount.c \ + libnetdata/libjudy/src/JudyL/JudyLCascade.c \ + libnetdata/libjudy/src/JudyL/JudyLCount.c \ + libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c \ + libnetdata/libjudy/src/JudyL/JudyLDecascade.c \ + libnetdata/libjudy/src/JudyL/JudyLDel.c \ + libnetdata/libjudy/src/JudyL/JudyLFirst.c \ + libnetdata/libjudy/src/JudyL/JudyLFreeArray.c \ + libnetdata/libjudy/src/JudyL/j__udyLGet.c \ + libnetdata/libjudy/src/JudyL/JudyLGet.c \ + libnetdata/libjudy/src/JudyL/JudyLInsArray.c \ + libnetdata/libjudy/src/JudyL/JudyLIns.c \ + libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c \ + libnetdata/libjudy/src/JudyL/JudyLMallocIF.c \ + libnetdata/libjudy/src/JudyL/JudyLMemActive.c \ + libnetdata/libjudy/src/JudyL/JudyLMemUsed.c \ + libnetdata/libjudy/src/JudyL/JudyLNext.c \ + libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c \ + libnetdata/libjudy/src/JudyL/JudyLPrev.c \ + libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c \ + libnetdata/libjudy/src/JudyHS/JudyHS.c \ + $(NULL) + +nodist_libjudy_a_SOURCES = JudyLTables.c + +BUILT_SOURCES += JudyLTables.c + +CLEANFILES += JudyLTables.c + +libjudy_a_CFLAGS = $(LIBJUDY_CFLAGS) -DJUDYL -I$(abs_top_srcdir)/libnetdata/libjudy/src -I$(abs_top_srcdir)/libnetdata/libjudy/src/JudyCommon -Wno-sign-compare -Wno-implicit-fallthrough + +libnetdata/libjudy/src/JudyL/libjudy_a-JudyLPrev.$(OBJEXT) : CFLAGS += -DJUDYPREV +libnetdata/libjudy/src/JudyL/libjudy_a-JudyLPrevEmpty.$(OBJEXT) : CFLAGS += -DJUDYPREV +libnetdata/libjudy/src/JudyL/libjudy_a-JudyLNext.$(OBJEXT) : CFLAGS += -DJUDYNEXT +libnetdata/libjudy/src/JudyL/libjudy_a-JudyLNextEmpty.$(OBJEXT) : CFLAGS += -DJUDYNEXT +libnetdata/libjudy/src/JudyL/libjudy_a-JudyLByCount.$(OBJEXT) : CFLAGS += -DNOSMARTJBB -DNOSMARTJBU -DNOSMARTJLB +libnetdata/libjudy/src/JudyL/libjudy_a-j__udyLGet.$(OBJEXT) : CFLAGS += -DJUDYGETINLINE + +noinst_PROGRAMS = judyltablesgen + +judyltablesgen_SOURCES = libnetdata/libjudy/src/JudyL/JudyLTablesGen.c +judyltablesgen_CFLAGS = $(LIBJUDY_CFLAGS) -DJUDYL -I$(abs_top_srcdir)/libnetdata/libjudy/src -I$(abs_top_srcdir)/libnetdata/libjudy/src/JudyCommon -Wno-sign-compare -Wno-implicit-fallthrough + +$(builddir)/judyltablesgen$(EXEEXT) : CFLAGS += -Wno-format -Wno-format-security + +JudyLTables.c: $(abs_top_srcdir)/libnetdata/libjudy/src/JudyL/JudyLTablesGen.c $(builddir)/judyltablesgen$(EXEEXT) + $(builddir)/judyltablesgen$(EXEEXT) + +libjudy_a-JudyLTables.$(OBJEXT) : CFLAGS += -I$(abs_top_srcdir)/libnetdata/libjudy/src/JudyL + +if ENABLE_DBENGINE + RRD_PLUGIN_FILES += \ + database/engine/rrdengine.c \ + database/engine/rrdengine.h \ + database/engine/rrddiskprotocol.h \ + database/engine/datafile.c \ + database/engine/datafile.h \ + database/engine/journalfile.c \ + database/engine/journalfile.h \ + database/engine/rrdenginelib.c \ + database/engine/rrdenginelib.h \ + database/engine/rrdengineapi.c \ + database/engine/rrdengineapi.h \ + database/engine/pagecache.c \ + database/engine/pagecache.h \ + database/engine/rrdenglocking.c \ + database/engine/rrdenglocking.h \ + $(NULL) +endif + +API_PLUGIN_FILES = \ + web/api/badges/web_buffer_svg.c \ + web/api/badges/web_buffer_svg.h \ + web/api/exporters/allmetrics.c \ + web/api/exporters/allmetrics.h \ + web/api/exporters/shell/allmetrics_shell.c \ + web/api/exporters/shell/allmetrics_shell.h \ + web/api/queries/average/average.c \ + web/api/queries/average/average.h \ + web/api/queries/countif/countif.c \ + web/api/queries/countif/countif.h \ + web/api/queries/des/des.c \ + web/api/queries/des/des.h \ + web/api/queries/incremental_sum/incremental_sum.c \ + web/api/queries/incremental_sum/incremental_sum.h \ + web/api/queries/max/max.c \ + web/api/queries/max/max.h \ + web/api/queries/median/median.c \ + web/api/queries/median/median.h \ + web/api/queries/min/min.c \ + web/api/queries/min/min.h \ + web/api/queries/percentile/percentile.c \ + web/api/queries/percentile/percentile.h \ + web/api/queries/trimmed_mean/trimmed_mean.c \ + web/api/queries/trimmed_mean/trimmed_mean.h \ + web/api/queries/query.c \ + web/api/queries/query.h \ + web/api/queries/rrdr.c \ + web/api/queries/rrdr.h \ + web/api/queries/ses/ses.c \ + web/api/queries/ses/ses.h \ + web/api/queries/stddev/stddev.c \ + web/api/queries/stddev/stddev.h \ + web/api/queries/sum/sum.c \ + web/api/queries/sum/sum.h \ + web/api/queries/weights.c \ + web/api/queries/weights.h \ + web/api/formatters/rrd2json.c \ + web/api/formatters/rrd2json.h \ + web/api/formatters/csv/csv.c \ + web/api/formatters/csv/csv.h \ + web/api/formatters/json/json.c \ + web/api/formatters/json/json.h \ + web/api/formatters/ssv/ssv.c \ + web/api/formatters/ssv/ssv.h \ + web/api/formatters/value/value.c \ + web/api/formatters/value/value.h \ + web/api/formatters/json_wrapper.c \ + web/api/formatters/json_wrapper.h \ + web/api/formatters/charts2json.c \ + web/api/formatters/charts2json.h \ + web/api/formatters/rrdset2json.c \ + web/api/formatters/rrdset2json.h \ + web/api/health/health_cmdapi.c \ + web/api/health/health_cmdapi.h \ + web/api/web_api_v1.c \ + web/api/web_api_v1.h \ + $(NULL) + +STREAMING_PLUGIN_FILES = \ + streaming/rrdpush.c \ + streaming/compression.c \ + streaming/sender.c \ + streaming/receiver.c \ + streaming/replication.h \ + streaming/replication.c \ + streaming/rrdpush.h \ + $(NULL) + +REGISTRY_PLUGIN_FILES = \ + registry/registry.c \ + registry/registry.h \ + registry/registry_db.c \ + registry/registry_init.c \ + registry/registry_internals.c \ + registry/registry_internals.h \ + registry/registry_log.c \ + registry/registry_machine.c \ + registry/registry_machine.h \ + registry/registry_person.c \ + registry/registry_person.h \ + registry/registry_url.c \ + registry/registry_url.h \ + $(NULL) + +STATSD_PLUGIN_FILES = \ + collectors/statsd.plugin/statsd.c \ + $(NULL) + +WEB_PLUGIN_FILES = \ + web/server/web_client.c \ + web/server/web_client.h \ + web/server/web_server.c \ + web/server/web_server.h \ + web/server/web_client_cache.c \ + web/server/web_client_cache.h \ + web/server/static/static-threaded.c \ + web/server/static/static-threaded.h \ + $(NULL) + +CLAIM_FILES = \ + claim/claim.c \ + claim/claim.h \ + $(NULL) + +PARSER_FILES = \ + parser/parser.c \ + parser/parser.h \ + $(NULL) + +if ENABLE_ACLK +ACLK_FILES = \ + aclk/aclk_util.c \ + aclk/aclk_util.h \ + aclk/aclk_stats.c \ + aclk/aclk_stats.h \ + aclk/aclk_query.c \ + aclk/aclk_query.h \ + aclk/aclk_query_queue.c \ + aclk/aclk_query_queue.h \ + aclk/aclk_otp.c \ + aclk/aclk_otp.h \ + aclk/aclk_tx_msgs.c \ + aclk/aclk_tx_msgs.h \ + aclk/aclk_rx_msgs.c \ + aclk/aclk_rx_msgs.h \ + aclk/https_client.c \ + aclk/https_client.h \ + aclk/aclk_alarm_api.c \ + aclk/aclk_alarm_api.h \ + aclk/aclk_contexts_api.c \ + aclk/aclk_contexts_api.h \ + aclk/aclk_capas.c \ + aclk/aclk_capas.h \ + aclk/helpers/mqtt_wss_pal.h \ + aclk/helpers/ringbuffer_pal.h \ + aclk/schema-wrappers/connection.cc \ + aclk/schema-wrappers/connection.h \ + aclk/schema-wrappers/node_connection.cc \ + aclk/schema-wrappers/node_connection.h \ + aclk/schema-wrappers/node_creation.cc \ + aclk/schema-wrappers/node_creation.h \ + aclk/schema-wrappers/alarm_stream.cc \ + aclk/schema-wrappers/alarm_stream.h \ + aclk/schema-wrappers/alarm_config.cc \ + aclk/schema-wrappers/alarm_config.h \ + aclk/schema-wrappers/node_info.cc \ + aclk/schema-wrappers/node_info.h \ + aclk/schema-wrappers/capability.cc \ + aclk/schema-wrappers/capability.h \ + aclk/schema-wrappers/proto_2_json.cc \ + aclk/schema-wrappers/proto_2_json.h \ + aclk/schema-wrappers/schema_wrappers.h \ + aclk/schema-wrappers/schema_wrapper_utils.cc \ + aclk/schema-wrappers/schema_wrapper_utils.h \ + aclk/schema-wrappers/context_stream.cc \ + aclk/schema-wrappers/context_stream.h \ + aclk/schema-wrappers/context.cc \ + aclk/schema-wrappers/context.h \ + $(NULL) + +noinst_LIBRARIES += libmqttwebsockets.a + +libmqttwebsockets_a_SOURCES = \ + mqtt_websockets/src/mqtt_wss_client.c \ + mqtt_websockets/src/include/mqtt_wss_client.h \ + mqtt_websockets/src/mqtt_wss_log.c \ + mqtt_websockets/src/include/mqtt_wss_log.h \ + mqtt_websockets/src/ws_client.c \ + mqtt_websockets/src/include/ws_client.h \ + mqtt_websockets/src/mqtt_ng.c \ + mqtt_websockets/src/include/mqtt_ng.h \ + mqtt_websockets/src/common_public.c \ + mqtt_websockets/src/include/common_public.h \ + mqtt_websockets/src/include/common_internal.h \ + mqtt_websockets/c-rbuf/src/ringbuffer.c \ + mqtt_websockets/c-rbuf/include/ringbuffer.h \ + mqtt_websockets/c-rbuf/src/ringbuffer_internal.h \ + mqtt_websockets/MQTT-C/src/mqtt.c \ + mqtt_websockets/MQTT-C/include/mqtt.h + +libmqttwebsockets_a_CFLAGS = $(CFLAGS) -DMQTT_WSS_CUSTOM_ALLOC -DRBUF_CUSTOM_MALLOC -I$(srcdir)/aclk/helpers + +mqtt_websockets/src/mqtt_wss_client.$(OBJEXT) : CFLAGS += -Wno-unused-result + +ACLK_PROTO_DEFINITIONS = \ + aclk/aclk-schemas/proto/aclk/v1/lib.proto \ + aclk/aclk-schemas/proto/agent/v1/disconnect.proto \ + aclk/aclk-schemas/proto/agent/v1/connection.proto \ + aclk/aclk-schemas/proto/alarm/v1/config.proto \ + aclk/aclk-schemas/proto/alarm/v1/stream.proto \ + aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.proto \ + aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.proto \ + aclk/aclk-schemas/proto/nodeinstance/info/v1/info.proto \ + aclk/aclk-schemas/proto/context/v1/context.proto \ + aclk/aclk-schemas/proto/context/v1/stream.proto \ + $(NULL) + +dist_noinst_DATA += $(ACLK_PROTO_DEFINITIONS) + +ACLK_PROTO_BUILT_FILES = aclk/aclk-schemas/proto/agent/v1/connection.pb.cc \ + aclk/aclk-schemas/proto/agent/v1/connection.pb.h \ + aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.pb.cc \ + aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.pb.h \ + aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.pb.cc \ + aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.pb.h \ + aclk/aclk-schemas/proto/aclk/v1/lib.pb.cc \ + aclk/aclk-schemas/proto/aclk/v1/lib.pb.h \ + aclk/aclk-schemas/proto/agent/v1/disconnect.pb.cc \ + aclk/aclk-schemas/proto/agent/v1/disconnect.pb.h \ + aclk/aclk-schemas/proto/alarm/v1/config.pb.cc \ + aclk/aclk-schemas/proto/alarm/v1/config.pb.h \ + aclk/aclk-schemas/proto/alarm/v1/stream.pb.cc \ + aclk/aclk-schemas/proto/alarm/v1/stream.pb.h \ + aclk/aclk-schemas/proto/nodeinstance/info/v1/info.pb.cc \ + aclk/aclk-schemas/proto/nodeinstance/info/v1/info.pb.h \ + aclk/aclk-schemas/proto/context/v1/context.pb.cc \ + aclk/aclk-schemas/proto/context/v1/context.pb.h \ + aclk/aclk-schemas/proto/context/v1/stream.pb.cc \ + aclk/aclk-schemas/proto/context/v1/stream.pb.h \ + $(NULL) + +BUILT_SOURCES += $(ACLK_PROTO_BUILT_FILES) +nodist_netdata_SOURCES += $(ACLK_PROTO_BUILT_FILES) +CLEANFILES += $(ACLK_PROTO_BUILT_FILES) + +aclk/aclk-schemas/proto/agent/v1/connection.pb.cc \ +aclk/aclk-schemas/proto/agent/v1/connection.pb.h: aclk/aclk-schemas/proto/agent/v1/connection.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.pb.cc \ +aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.pb.h: aclk/aclk-schemas/proto/nodeinstance/connection/v1/connection.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.pb.cc \ +aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.pb.h: aclk/aclk-schemas/proto/nodeinstance/create/v1/creation.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/aclk/v1/lib.pb.cc \ +aclk/aclk-schemas/proto/aclk/v1/lib.pb.h: aclk/aclk-schemas/proto/aclk/v1/lib.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/agent/v1/disconnect.pb.cc \ +aclk/aclk-schemas/proto/agent/v1/disconnect.pb.h: aclk/aclk-schemas/proto/agent/v1/disconnect.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/alarm/v1/config.pb.cc \ +aclk/aclk-schemas/proto/alarm/v1/config.pb.h: aclk/aclk-schemas/proto/alarm/v1/config.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/alarm/v1/stream.pb.cc \ +aclk/aclk-schemas/proto/alarm/v1/stream.pb.h: aclk/aclk-schemas/proto/alarm/v1/stream.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/nodeinstance/info/v1/info.pb.cc \ +aclk/aclk-schemas/proto/nodeinstance/info/v1/info.pb.h: aclk/aclk-schemas/proto/nodeinstance/info/v1/info.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/context/v1/context.pb.cc \ +aclk/aclk-schemas/proto/context/v1/context.pb.h: aclk/aclk-schemas/proto/context/v1/context.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +aclk/aclk-schemas/proto/context/v1/stream.pb.cc \ +aclk/aclk-schemas/proto/context/v1/stream.pb.h: aclk/aclk-schemas/proto/context/v1/stream.proto + $(PROTOC) -I=aclk/aclk-schemas --cpp_out=$(builddir)/aclk/aclk-schemas $^ + +endif #ENABLE_ACLK + +ACLK_ALWAYS_BUILD_FILES = \ + aclk/aclk_rrdhost_state.h \ + aclk/aclk_proxy.c \ + aclk/aclk_proxy.h \ + aclk/aclk.c \ + aclk/aclk.h \ + $(NULL) + +SPAWN_PLUGIN_FILES = \ + spawn/spawn.c \ + spawn/spawn_server.c \ + spawn/spawn_client.c \ + spawn/spawn.h \ + $(NULL) + +EXPORTING_ENGINE_FILES = \ + exporting/exporting_engine.c \ + exporting/exporting_engine.h \ + exporting/graphite/graphite.c \ + exporting/graphite/graphite.h \ + exporting/json/json.c \ + exporting/json/json.h \ + exporting/opentsdb/opentsdb.c \ + exporting/opentsdb/opentsdb.h \ + exporting/prometheus/prometheus.c \ + exporting/prometheus/prometheus.h \ + exporting/read_config.c \ + exporting/clean_connectors.c \ + exporting/init_connectors.c \ + exporting/process_data.c \ + exporting/check_filters.c \ + exporting/send_data.c \ + exporting/send_internal_metrics.c \ + $(NULL) + +PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES = \ + exporting/prometheus/remote_write/remote_write.c \ + exporting/prometheus/remote_write/remote_write.h \ + exporting/prometheus/remote_write/remote_write_request.cc \ + exporting/prometheus/remote_write/remote_write_request.h \ + exporting/prometheus/remote_write/remote_write.proto \ + $(NULL) + +KINESIS_EXPORTING_FILES = \ + exporting/aws_kinesis/aws_kinesis.c \ + exporting/aws_kinesis/aws_kinesis.h \ + exporting/aws_kinesis/aws_kinesis_put_record.cc \ + exporting/aws_kinesis/aws_kinesis_put_record.h \ + $(NULL) + +PUBSUB_EXPORTING_FILES = \ + exporting/pubsub/pubsub.c \ + exporting/pubsub/pubsub.h \ + exporting/pubsub/pubsub_publish.cc \ + exporting/pubsub/pubsub_publish.h \ + $(NULL) + +MONGODB_EXPORTING_FILES = \ + exporting/mongodb/mongodb.c \ + exporting/mongodb/mongodb.h \ + $(NULL) + +DAEMON_FILES = \ + daemon/buildinfo.c \ + daemon/buildinfo.h \ + daemon/common.c \ + daemon/common.h \ + daemon/daemon.c \ + daemon/daemon.h \ + daemon/global_statistics.c \ + daemon/global_statistics.h \ + daemon/analytics.c \ + daemon/analytics.h \ + daemon/main.c \ + daemon/main.h \ + daemon/signals.c \ + daemon/signals.h \ + daemon/service.c \ + daemon/static_threads.h \ + daemon/static_threads.c \ + daemon/commands.c \ + daemon/commands.h \ + daemon/unit_test.c \ + daemon/unit_test.h \ + $(NULL) + +NETDATA_FILES = \ + collectors/all.h \ + $(DAEMON_FILES) \ + $(LIBNETDATA_FILES) \ + $(API_PLUGIN_FILES) \ + $(EXPORTING_ENGINE_FILES) \ + $(HEALTH_PLUGIN_FILES) \ + $(ML_FILES) \ + $(ML_TESTS_FILES) \ + $(IDLEJITTER_PLUGIN_FILES) \ + $(PLUGINSD_PLUGIN_FILES) \ + $(REGISTRY_PLUGIN_FILES) \ + $(RRD_PLUGIN_FILES) \ + $(STREAMING_PLUGIN_FILES) \ + $(STATSD_PLUGIN_FILES) \ + $(WEB_PLUGIN_FILES) \ + $(CLAIM_FILES) \ + $(PARSER_FILES) \ + $(ACLK_ALWAYS_BUILD_FILES) \ + $(ACLK_FILES) \ + $(SPAWN_PLUGIN_FILES) \ + $(TIMEX_PLUGIN_FILES) \ + $(NULL) + +if FREEBSD + NETDATA_FILES += \ + daemon/static_threads_freebsd.c \ + $(FREEBSD_PLUGIN_FILES) \ + $(NULL) +endif + +if MACOS + NETDATA_FILES += \ + daemon/static_threads_macos.c \ + $(MACOS_PLUGIN_FILES) \ + $(NULL) +endif + +if LINUX + NETDATA_FILES += \ + daemon/static_threads_linux.c \ + $(CGROUPS_PLUGIN_FILES) \ + $(DISKSPACE_PLUGIN_FILES) \ + $(PROC_PLUGIN_FILES) \ + $(TC_PLUGIN_FILES) \ + $(NULL) +endif + +NETDATA_COMMON_LIBS = \ + $(OPTIONAL_MATH_LIBS) \ + $(OPTIONAL_BPF_LIBS) \ + $(OPTIONAL_ZLIB_LIBS) \ + $(OPTIONAL_SSL_LIBS) \ + $(OPTIONAL_UUID_LIBS) \ + $(OPTIONAL_MQTT_LIBS) \ + $(OPTIONAL_UV_LIBS) \ + $(OPTIONAL_LZ4_LIBS) \ + libjudy.a \ + $(OPTIONAL_SSL_LIBS) \ + $(OPTIONAL_JSONC_LIBS) \ + $(OPTIONAL_ATOMIC_LIBS) \ + $(OPTIONAL_DL_LIBS) \ + $(NULL) + +if ENABLE_ACLK + NETDATA_COMMON_LIBS += libmqttwebsockets.a +endif + +if LINK_STATIC_JSONC + NETDATA_COMMON_LIBS += $(abs_top_srcdir)/externaldeps/jsonc/libjson-c.a +endif + +NETDATACLI_FILES = \ + daemon/commands.h \ + $(LIBNETDATA_FILES) \ + cli/cli.c \ + cli/cli.h \ + $(NULL) + +sbin_PROGRAMS += netdata +netdata_SOURCES = $(NETDATA_FILES) + +if LINUX + NETDATA_COMMON_LIBS += -lrt +endif + +netdata_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(NULL) + +if ENABLE_ACLK + netdata_LDADD += $(OPTIONAL_PROTOBUF_LIBS) \ + $(OPTIONAL_ATOMIC_LIBS) \ + $(NULL) +endif + +if ENABLE_ML_TESTS + netdata_LDADD += $(OPTIONAL_ML_TESTS_LIBS) \ + $(NULL) +endif + +netdata_LINK = $(CXXLD) $(CXXFLAGS) $(LDFLAGS) -o $@ + +sbin_PROGRAMS += netdatacli +netdatacli_SOURCES = $(NETDATACLI_FILES) +netdatacli_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(NULL) + +netdatacli_LINK = $(CXXLD) $(CXXFLAGS) $(LDFLAGS) -o $@ + +if ENABLE_PLUGIN_APPS + plugins_PROGRAMS += apps.plugin + apps_plugin_SOURCES = $(APPS_PLUGIN_FILES) + apps_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(OPTIONAL_LIBCAP_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_CGROUP_NETWORK + plugins_PROGRAMS += cgroup-network + cgroup_network_SOURCES = $(CGROUP_NETWORK_FILES) + cgroup_network_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_FREEIPMI + plugins_PROGRAMS += freeipmi.plugin + freeipmi_plugin_SOURCES = $(FREEIPMI_PLUGIN_FILES) + freeipmi_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(OPTIONAL_IPMIMONITORING_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_EBPF + plugins_PROGRAMS += ebpf.plugin + ebpf_plugin_SOURCES = $(EBPF_PLUGIN_FILES) + ebpf_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_CUPS + plugins_PROGRAMS += cups.plugin + cups_plugin_SOURCES = $(CUPS_PLUGIN_FILES) + cups_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(OPTIONAL_CUPS_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_NFACCT + plugins_PROGRAMS += nfacct.plugin + nfacct_plugin_SOURCES = $(NFACCT_PLUGIN_FILES) + nfacct_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(OPTIONAL_NFACCT_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_XENSTAT + plugins_PROGRAMS += xenstat.plugin + xenstat_plugin_SOURCES = $(XENSTAT_PLUGIN_FILES) + xenstat_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(OPTIONAL_XENSTAT_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_PERF + plugins_PROGRAMS += perf.plugin + perf_plugin_SOURCES = $(PERF_PLUGIN_FILES) + perf_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(NULL) +endif + +if ENABLE_PLUGIN_SLABINFO + plugins_PROGRAMS += slabinfo.plugin + slabinfo_plugin_SOURCES = $(SLABINFO_PLUGIN_FILES) + slabinfo_plugin_LDADD = \ + $(NETDATA_COMMON_LIBS) \ + $(NULL) +endif + +if ENABLE_EXPORTING_KINESIS + netdata_SOURCES += $(KINESIS_EXPORTING_FILES) + netdata_LDADD += $(OPTIONAL_KINESIS_LIBS) +endif + +if ENABLE_EXPORTING_PUBSUB + netdata_SOURCES += $(PUBSUB_EXPORTING_FILES) + netdata_LDADD += $(OPTIONAL_PUBSUB_LIBS) +endif + +if ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE + netdata_SOURCES += $(PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES) + netdata_LDADD += $(OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS) \ + $(OPTIONAL_PROTOBUF_LIBS) \ + $(NULL) + EXPORTING_PROMETHEUS_BUILT_SOURCES = \ + exporting/prometheus/remote_write/remote_write.pb.cc \ + exporting/prometheus/remote_write/remote_write.pb.h \ + $(NULL) + BUILT_SOURCES += $(EXPORTING_PROMETHEUS_BUILT_SOURCES) + nodist_netdata_SOURCES += $(EXPORTING_PROMETHEUS_BUILT_SOURCES) + +exporting/prometheus/remote_write/remote_write.pb.cc \ +exporting/prometheus/remote_write/remote_write.pb.h: exporting/prometheus/remote_write/remote_write.proto + $(PROTOC) --proto_path=$(srcdir) --cpp_out=$(builddir) $^ + +endif + +if ENABLE_EXPORTING_MONGODB + netdata_SOURCES += $(MONGODB_EXPORTING_FILES) + netdata_LDADD += $(OPTIONAL_MONGOC_LIBS) +endif + +if ENABLE_UNITTESTS + check_PROGRAMS = \ + libnetdata/tests/str2ld_testdriver \ + libnetdata/storage_number/tests/storage_number_testdriver \ + exporting/tests/exporting_engine_testdriver \ + web/api/tests/web_api_testdriver \ + web/api/tests/valid_urls_testdriver \ + collectors/cgroups_plugin/tests/cgroups_testdriver \ + $(NULL) + + TESTS = $(check_PROGRAMS) + + XFAIL_TESTS = \ + web/api/tests/web_api_testdriver \ + web/api/tests/valid_urls_testdriver \ + $(NULL) + + web_api_tests_valid_urls_testdriver_LDFLAGS = \ + -Wl,--wrap=rrdhost_find_by_hostname \ + -Wl,--wrap=finished_web_request_statistics \ + -Wl,--wrap=config_get \ + -Wl,--wrap=web_client_api_request_v1 \ + -Wl,--wrap=rrdhost_find_by_guid \ + -Wl,--wrap=rrdset_find_byname \ + -Wl,--wrap=rrdset_find \ + -Wl,--wrap=rrdpush_receiver_thread_spawn \ + -Wl,--wrap=debug_int \ + -Wl,--wrap=error_int \ + -Wl,--wrap=info_int \ + -Wl,--wrap=fatal_int \ + -Wl,--wrap=mysendfile \ + -DREMOVE_MYSENDFILE \ + $(TEST_LDFLAGS) \ + $(NULL) + web_api_tests_valid_urls_testdriver_SOURCES = \ + web/api/tests/valid_urls.c \ + web/server/web_client.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + web_api_tests_valid_urls_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) + + web_api_tests_web_api_testdriver_LDFLAGS = \ + -Wl,--wrap=rrdhost_find_by_hostname \ + -Wl,--wrap=finished_web_request_statistics \ + -Wl,--wrap=config_get \ + -Wl,--wrap=web_client_api_request_v1 \ + -Wl,--wrap=rrdhost_find_by_guid \ + -Wl,--wrap=rrdset_find_byname \ + -Wl,--wrap=rrdset_find \ + -Wl,--wrap=rrdpush_receiver_thread_spawn \ + -Wl,--wrap=debug_int \ + -Wl,--wrap=error_int \ + -Wl,--wrap=info_int \ + -Wl,--wrap=fatal_int \ + $(TEST_LDFLAGS) \ + $(NULL) + web_api_tests_web_api_testdriver_SOURCES = \ + web/api/tests/web_api.c \ + web/server/web_client.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + web_api_tests_web_api_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) + + libnetdata_tests_str2ld_testdriver_SOURCES = \ + libnetdata/tests/test_str2ld.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + libnetdata_tests_str2ld_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) + + libnetdata_storage_number_tests_storage_number_testdriver_SOURCES = \ + libnetdata/storage_number/tests/test_storage_number.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + libnetdata_storage_number_tests_storage_number_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) + + EXPORTING_ENGINE_TEST_FILES = \ + exporting/tests/test_exporting_engine.c \ + exporting/tests/test_exporting_engine.h \ + exporting/tests/exporting_fixtures.c \ + exporting/tests/exporting_doubles.c \ + exporting/tests/netdata_doubles.c \ + exporting/tests/system_doubles.c \ + $(NULL) + exporting_tests_exporting_engine_testdriver_SOURCES = \ + $(EXPORTING_ENGINE_TEST_FILES) \ + $(EXPORTING_ENGINE_FILES) \ + $(LIBNETDATA_FILES) \ + database/rrdlabels.c \ + database/rrdvar.c \ + $(NULL) + exporting_tests_exporting_engine_testdriver_CFLAGS = \ + $(AM_CFLAGS) \ + -DUNIT_TESTING \ + $(NULL) + exporting_tests_exporting_engine_testdriver_LDFLAGS = \ + -Wl,--wrap=read_exporting_config \ + -Wl,--wrap=init_connectors \ + -Wl,--wrap=mark_scheduled_instances \ + -Wl,--wrap=rrdhost_is_exportable \ + -Wl,--wrap=rrdset_is_exportable \ + -Wl,--wrap=exporting_calculate_value_from_stored_data \ + -Wl,--wrap=prepare_buffers \ + -Wl,--wrap=send_internal_metrics \ + -Wl,--wrap=now_realtime_sec \ + -Wl,--wrap=uv_thread_set_name_np \ + -Wl,--wrap=uv_thread_create \ + -Wl,--wrap=uv_mutex_lock \ + -Wl,--wrap=uv_mutex_unlock \ + -Wl,--wrap=uv_cond_signal \ + -Wl,--wrap=uv_cond_wait \ + -Wl,--wrap=strdupz \ + -Wl,--wrap=info_int \ + -Wl,--wrap=recv \ + -Wl,--wrap=send \ + -Wl,--wrap=connect_to_one_of \ + -Wl,--wrap=create_main_rusage_chart \ + -Wl,--wrap=send_main_rusage \ + -Wl,--wrap=simple_connector_end_batch \ + $(TEST_LDFLAGS) \ + $(NULL) + exporting_tests_exporting_engine_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) +if ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE + exporting_tests_exporting_engine_testdriver_SOURCES += $(PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES) + exporting_tests_exporting_engine_testdriver_LDADD += \ + $(OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS) \ + $(OPTIONAL_PROTOBUF_LIBS) \ + $(NULL) + exporting_tests_exporting_engine_testdriver_LDFLAGS += \ + -Wl,--wrap=init_write_request \ + -Wl,--wrap=add_host_info \ + -Wl,--wrap=add_label \ + -Wl,--wrap=add_metric \ + $(NULL) + nodist_exporting_tests_exporting_engine_testdriver_SOURCES = $(EXPORTING_PROMETHEUS_BUILT_SOURCES) +endif +if ENABLE_EXPORTING_KINESIS + exporting_tests_exporting_engine_testdriver_SOURCES += $(KINESIS_EXPORTING_FILES) + exporting_tests_exporting_engine_testdriver_LDADD += $(OPTIONAL_KINESIS_LIBS) + exporting_tests_exporting_engine_testdriver_LDFLAGS += \ + -Wl,--wrap=aws_sdk_init \ + -Wl,--wrap=kinesis_init \ + -Wl,--wrap=kinesis_put_record \ + -Wl,--wrap=kinesis_get_result \ + $(NULL) +endif +if ENABLE_EXPORTING_PUBSUB + exporting_tests_exporting_engine_testdriver_SOURCES += $(PUBSUB_EXPORTING_FILES) + exporting_tests_exporting_engine_testdriver_LDADD += $(OPTIONAL_PUBSUB_LIBS) + exporting_tests_exporting_engine_testdriver_LDFLAGS += \ + -Wl,--wrap=pubsub_init \ + -Wl,--wrap=pubsub_add_message \ + -Wl,--wrap=pubsub_publish \ + -Wl,--wrap=pubsub_get_result \ + $(NULL) +endif +if ENABLE_EXPORTING_MONGODB + exporting_tests_exporting_engine_testdriver_SOURCES += $(MONGODB_EXPORTING_FILES) + exporting_tests_exporting_engine_testdriver_LDADD += $(OPTIONAL_MONGOC_LIBS) + exporting_tests_exporting_engine_testdriver_LDFLAGS += \ + -Wl,--wrap=mongoc_init \ + -Wl,--wrap=mongoc_uri_new_with_error \ + -Wl,--wrap=mongoc_uri_get_option_as_int32 \ + -Wl,--wrap=mongoc_uri_set_option_as_int32 \ + -Wl,--wrap=mongoc_client_new_from_uri \ + -Wl,--wrap=mongoc_client_set_appname \ + -Wl,--wrap=mongoc_client_get_collection \ + -Wl,--wrap=mongoc_uri_destroy \ + -Wl,--wrap=mongoc_collection_insert_many \ + $(NULL) +endif + + collectors_cgroups_plugin_tests_cgroups_testdriver_SOURCES = \ + collectors/cgroups.plugin/tests/test_cgroups_plugin.c \ + collectors/cgroups.plugin/tests/test_cgroups_plugin.h \ + collectors/cgroups.plugin/tests/test_doubles.c \ + $(CGROUPS_PLUGIN_FILES) \ + database/rrdlabels.c \ + database/rrd.h \ + $(LIBNETDATA_FILES) \ + $(NULL) + collectors_cgroups_plugin_tests_cgroups_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) + collectors_cgroups_plugin_tests_cgroups_testdriver_LDFLAGS = \ + -Wl,--wrap=rrdlabels_add \ + $(NULL) + +endif diff --git a/README.md b/README.md new file mode 100644 index 0000000..f1f9a4c --- /dev/null +++ b/README.md @@ -0,0 +1,244 @@ +

Netdata

+ +

Netdata is high-fidelity infrastructure monitoring and troubleshooting.
Open-source, free, preconfigured, opinionated, and always real-time.

+
+

+ GitHub Stars +
+ Latest release + Nightly release +
+ Build status + CII Best Practices + License: GPL v3+ +
+ Code Climate + LGTM C + + LGTM PYTHON +

+ +--- + +Netdata's **distributed, real-time monitoring Agent** collects thousands of metrics from systems, hardware, containers, +and applications with zero configuration. It runs permanently on all your physical/virtual servers, containers, cloud +deployments, and edge/IoT devices, and is perfectly safe to install on your systems mid-incident without any +preparation. + +You can install Netdata on most Linux distributions (Ubuntu, Debian, CentOS, and more), container platforms (Kubernetes +clusters, Docker), and many other operating systems (FreeBSD, macOS). No `sudo` required. + +Netdata is designed by system administrators, DevOps engineers, and developers to collect everything, help you visualize +metrics, troubleshoot complex performance problems, and make data interoperable with the rest of your monitoring stack. + +People get addicted to Netdata. Once you use it on your systems, there's no going back! _You've been warned..._ + +![Users who are addicted to +Netdata](https://user-images.githubusercontent.com/1153921/96495792-2e881380-11fd-11eb-85a3-53d3a84dcb29.png) + +## Menu + +- [Features](#features) +- [Get Netdata](#get-netdata) + - [Docker](#docker) + - [Other operating systems](#other-operating-systems) + - [Post-installation](#post-installation) + - [Netdata Cloud](#netdata-cloud) +- [How it works](#how-it-works) +- [Infographic](#infographic) +- [Documentation](#documentation) +- [Community](#community) +- [Contribute](#contribute) +- [License](#license) +- [Is it any good?](#is-it-any-good) + +## Features + +![Netdata in +action](https://user-images.githubusercontent.com/1153921/113440964-449c2180-93a2-11eb-9664-663afa1257a8.gif) + +Here's what you can expect from Netdata: + +- **1s granularity**: The highest possible resolution for all metrics. +- **Unlimited metrics**: Netdata collects all the available metrics—the more, the better. +- **1% CPU utilization of a single core**: It's unbelievably optimized. +- **A few MB of RAM**: The highly-efficient database engine stores per-second metrics in RAM and then "spills" + historical metrics to disk long-term storage. +- **Minimal disk I/O**: While running, Netdata only writes historical metrics and reads `error` and `access` logs. +- **Zero configuration**: Netdata auto-detects everything, and can collect up to 10,000 metrics per server out of the + box. +- **Zero maintenance**: You just run it. Netdata does the rest. +- **Stunningly fast, interactive visualizations**: The dashboard responds to queries in less than 1ms per metric to + synchronize charts as you pan through time, zoom in on anomalies, and more. +- **Visual anomaly detection**: Our UI/UX emphasizes the relationships between charts to help you detect the root + cause of anomalies. +- **Machine learning (ML) features out of the box**: Unsupervised ML-based [anomaly detection](https://learn.netdata.cloud/docs/cloud/insights/anomaly-advisor), every second, every metric, zero-config! [Metric correlations](https://learn.netdata.cloud/docs/cloud/insights/metric-correlations) to help with short-term change detection. And other [additional](https://learn.netdata.cloud/guides/monitor/anomaly-detection) ML-based features to help make your life easier. +- **Scales to infinity**: You can install it on all your servers, containers, VMs, and IoT devices. Metrics are not + centralized by default, so there is no limit. +- **Several operating modes**: Autonomous host monitoring (the default), headless data collector, forwarding proxy, + store and forward proxy, central multi-host monitoring, in all possible configurations. Use different metrics + retention policies per node and run with or without health monitoring. + +Netdata works with tons of applications, notifications platforms, and other time-series databases: + +- **300+ system, container, and application endpoints**: Collectors autodetect metrics from default endpoints and + immediately visualize them into meaningful charts designed for troubleshooting. See [everything we + support](https://learn.netdata.cloud/docs/agent/collectors/collectors). +- **20+ notification platforms**: Netdata's health watchdog sends warning and critical alarms to your [favorite + platform](https://learn.netdata.cloud/docs/monitor/enable-notifications) to inform you of anomalies just seconds + after they affect your node. +- **30+ external time-series databases**: Export resampled metrics as they're collected to other [local- and + Cloud-based databases](https://learn.netdata.cloud/docs/export/external-databases) for best-in-class + interoperability. + +> 💡 **Want to leverage the monitoring power of Netdata across entire infrastructure**? View metrics from +> any number of distributed nodes in a single interface and unlock even more +> [features](https://learn.netdata.cloud/docs/overview/why-netdata) with [Netdata +> Cloud](https://learn.netdata.cloud/docs/overview/what-is-netdata#netdata-cloud). + +## Get Netdata + +

+ User base + Servers monitored + Sessions served + Docker Hub pulls +
+ New users today + New machines today + Sessions today + Docker Hub pulls today +

+ +To install Netdata from source on most Linux systems (physical, virtual, container, IoT, edge), run our [one-line +installation script](https://learn.netdata.cloud/docs/agent/packaging/installer/methods/packages). This script downloads +and builds all dependencies, including those required to connect to [Netdata Cloud](https://netdata.cloud/cloud) if you +choose, and enables [automatic nightly +updates](https://learn.netdata.cloud/docs/agent/packaging/installer#nightly-vs-stable-releases) and [anonymous +statistics](https://learn.netdata.cloud/docs/agent/anonymous-statistics). + +```bash +wget -O /tmp/netdata-kickstart.sh https://my-netdata.io/kickstart.sh && sh /tmp/netdata-kickstart.sh +``` + +To view the Netdata dashboard, navigate to `http://localhost:19999`, or `http://NODE:19999`. + +### Docker + +You can also try out Netdata's capabilities in a [Docker +container](https://learn.netdata.cloud/docs/agent/packaging/docker/): + +```bash +docker run -d --name=netdata \ + -p 19999:19999 \ + -v netdataconfig:/etc/netdata \ + -v netdatalib:/var/lib/netdata \ + -v netdatacache:/var/cache/netdata \ + -v /etc/passwd:/host/etc/passwd:ro \ + -v /etc/group:/host/etc/group:ro \ + -v /proc:/host/proc:ro \ + -v /sys:/host/sys:ro \ + -v /etc/os-release:/host/etc/os-release:ro \ + --restart unless-stopped \ + --cap-add SYS_PTRACE \ + --security-opt apparmor=unconfined \ + netdata/netdata +``` + +To view the Netdata dashboard, navigate to `http://localhost:19999`, or `http://NODE:19999`. + +### Other operating systems + +See our documentation for [additional operating +systems](/packaging/installer/README.md#have-a-different-operating-system-or-want-to-try-another-method), including +[Kubernetes](/packaging/installer/methods/kubernetes.md), [`.deb`/`.rpm` +packages](/packaging/installer/methods/kickstart.md#native-packages), and more. + +### Post-installation + +When you're finished with installation, check out our [single-node](/docs/quickstart/single-node.md) or +[infrastructure](/docs/quickstart/infrastructure.md) monitoring quickstart guides based on your use case. + +Or, skip straight to [configuring the Netdata Agent](/docs/configure/nodes.md). + +Read through Netdata's [documentation](https://learn.netdata.cloud/docs), which is structured based on actions and +solutions, to enable features like health monitoring, alarm notifications, long-term metrics storage, exporting to +external databases, and more. + +### Netdata Cloud + +Netdata Cloud works with Netdata's free, open-source monitoring agent to help you monitor and troubleshoot every +layer of your systems to find weaknesses before they turn into outages. [Using both tools](https://learn.netdata.cloud/docs/agent/claim) +can help you turn data into insights immediately. + +[Get Netdata Cloud now!](https://app.netdata.cloud/) + +## How it works + +Netdata is a highly efficient, highly modular, metrics management engine. Its lockless design makes it ideal for +concurrent operations on the metrics. + +![Diagram of Netdata's core +functionality](https://user-images.githubusercontent.com/1153921/95367248-5f755980-0889-11eb-827f-9b7aa02a556e.png) + +The result is a highly efficient, low-latency system, supporting multiple readers and one writer on each metric. + +## Infographic + +This is a high-level overview of Netdata features and architecture. Click on it to view an interactive version, which +has links to our documentation. + +[![An infographic of how Netdata +works](https://user-images.githubusercontent.com/43294513/60951037-8ba5d180-a2f8-11e9-906e-e27356f168bc.png)](https://my-netdata.io/infographic.html) + +## Documentation + +Netdata's documentation is available at [**Netdata Learn**](https://learn.netdata.cloud). + +This site also hosts a number of [guides](https://learn.netdata.cloud/guides) to help newer users better understand how +to collect metrics, troubleshoot via charts, export to external databases, and more. + +## Community + +Netdata is an inclusive open-source project and community. Please read our [Code of Conduct](https://learn.netdata.cloud/contribute/code-of-conduct). + +Find most of the Netdata team in our [community forums](https://community.netdata.cloud). It's the best place to +ask questions, find resources, and engage with passionate professionals. The team is also available and active in our [Discord](https://discord.com/invite/mPZ6WZKKG2) too. + +You can also find Netdata on: + +- [Twitter](https://twitter.com/linuxnetdata) +- [YouTube](https://www.youtube.com/c/Netdata) +- [Reddit](https://www.reddit.com/r/netdata/) +- [LinkedIn](https://www.linkedin.com/company/netdata-cloud/) +- [StackShare](https://stackshare.io/netdata) +- [Product Hunt](https://www.producthunt.com/posts/netdata-monitoring-agent/) +- [Repology](https://repology.org/metapackage/netdata/versions) +- [Facebook](https://www.facebook.com/linuxnetdata/) + +## Contribute + +Contributions are the lifeblood of open-source projects. While we continue to invest in and improve Netdata, we need help to democratize monitoring! + +- Read our [Contributing Guide](https://learn.netdata.cloud/contribute/handbook), which contains all the information you need to contribute to Netdata, such as improving our documentation, engaging in the community, and developing new features. We've made it as frictionless as possible, but if you need help, just ping us on our community forums! +- We have a whole category dedicated to contributing and extending Netdata on our [community forums](https://community.netdata.cloud/c/agent-development/9) +- Found a bug? Open a [GitHub issue](https://github.com/netdata/netdata/issues/new?assignees=&labels=bug%2Cneeds+triage&template=BUG_REPORT.yml&title=%5BBug%5D%3A+). +- View our [Security Policy](https://github.com/netdata/netdata/security/policy). + +Package maintainers should read the guide on [building Netdata from source](/packaging/installer/methods/source.md) for +instructions on building each Netdata component from source and preparing a package. + +## License + +The Netdata Agent is [GPLv3+](/LICENSE). Netdata re-distributes other open-source tools and libraries. Please check the +[third party licenses](/REDISTRIBUTED.md). + +## Is it any good? + +Yes. + +_When people first hear about a new product, they frequently ask if it is any good. A Hacker News user +[remarked](https://news.ycombinator.com/item?id=3067434):_ + +> Note to self: Starting immediately, all raganwald projects will have a “Is it any good?” section in the readme, and +> the answer shall be “yes.". diff --git a/REDISTRIBUTED.md b/REDISTRIBUTED.md new file mode 100644 index 0000000..6b82cd1 --- /dev/null +++ b/REDISTRIBUTED.md @@ -0,0 +1,188 @@ + + +# Redistributed software + +Netdata copyright info: + Copyright 2016-2018, Costa Tsaousis. + Copyright 2018, Netdata Inc. + Released under [GPL v3 or later](https://raw.githubusercontent.com/netdata/netdata/master/LICENSE). + +Netdata uses SPDX license tags to identify the license for its files. +Individual licenses referenced in the tags are available on the [SPDX project site](http://spdx.org/licenses/). + +Netdata redistributes the following third-party software. +We have decided to redistribute all these, instead of using them +through a CDN, to allow Netdata to work in cases where Internet +connectivity is not available. + +- [Dygraphs](http://dygraphs.com/) + + Copyright 2009, Dan Vanderkam + [MIT License](http://dygraphs.com/legal.html) + +- [Easy Pie Chart](https://rendro.github.io/easy-pie-chart/) + + Copyright 2013, Robert Fleischmann + [MIT License](https://github.com/rendro/easy-pie-chart/blob/master/LICENSE) + +- [Gauge.js](http://bernii.github.io/gauge.js/) + + Copyright, Bernard Kobos + [MIT License](https://github.com/getgauge/gauge-js/blob/master/LICENSE) + +- [d3pie](https://github.com/benkeen/d3pie) + + Copyright (c) 2014-2015 Benjamin Keen + [MIT License](https://github.com/benkeen/d3pie/blob/master/LICENSE) + +- [jQuery Sparklines](http://omnipotent.net/jquery.sparkline/) + + Copyright 2009-2012, Splunk Inc. + [New BSD License](http://opensource.org/licenses/BSD-3-Clause) + +- [Peity](http://benpickles.github.io/peity/) + + Copyright 2009-2015, Ben Pickles + [MIT License](https://github.com/benpickles/peity/blob/master/LICENCE) + +- [morris.js](http://morrisjs.github.io/morris.js/) + + Copyright 2013, Olly Smith + [Simplified BSD License](http://morrisjs.github.io/morris.js/) + +- [Raphaël](http://dmitrybaranovskiy.github.io/raphael/) + + Copyright 2008, Dmitry Baranovskiy + [MIT License](http://dmitrybaranovskiy.github.io/raphael/license.html) + +- [C3](http://c3js.org/) + + Copyright 2013, Masayuki Tanaka + [MIT License](https://github.com/masayuki0812/c3/blob/master/LICENSE) + +- [D3](http://d3js.org/) + + Copyright 2015, Mike Bostock + [BSD License](http://opensource.org/licenses/BSD-3-Clause) + +- [jQuery](https://jquery.org/) + + Copyright 2015, jQuery Foundation + [MIT License](https://jquery.org/license/) + +- [Bootstrap](http://getbootstrap.com/getting-started/) + + Copyright 2015, Twitter + [MIT License](https://github.com/twbs/bootstrap/blob/v4-dev/LICENSE) + +- [Bootstrap Toggle](http://www.bootstraptoggle.com/) + + Copyright (c) 2011-2014 Min Hur, The New York Times Company + [MIT License](https://github.com/minhur/bootstrap-toggle/blob/master/LICENSE) + +- [Bootstrap-slider](http://seiyria.com/bootstrap-slider/) + + Copyright 2017 Kyle Kemp, Rohit Kalkur, and contributors + [MIT License](https://github.com/seiyria/bootstrap-slider/blob/master/LICENSE.md) + +- [bootstrap-table](http://bootstrap-table.wenzhixin.net.cn/) + + Copyright (c) 2012-2016 Zhixin Wen [wenzhixin2010@gmail.com](mailto:wenzhixin2010@gmail.com) + [MIT License](https://github.com/wenzhixin/bootstrap-table/blob/master/LICENSE) + +- [tableExport.jquery.plugin](https://github.com/hhurz/tableExport.jquery.plugin) + + Copyright (c) 2015,2016 hhurz + [MIT License](https://github.com/hhurz/tableExport.jquery.plugin/blob/master/LICENSE) + +- [perfect-scrollbar](https://jamesflorentino.github.io/nanoScrollerJS/) + + Copyright 2016, Hyunje Alex Jun and other contributors + [MIT License](https://github.com/noraesae/perfect-scrollbar/blob/master/LICENSE) + +- [FontAwesome](https://fortawesome.github.io/Font-Awesome/) + + Created by Dave Gandy + Font license: [SIL OFL 1.1](http://scripts.sil.org/OFL) + Icon license [Creative Commons Attribution 4.0 (CC-BY 4.0)](https://creativecommons.org/licenses/by/4.0/) + Code license: [MIT License](http://opensource.org/licenses/mit-license.html) + +- [node-extend](https://github.com/justmoon/node-extend) + + Copyright 2014, Stefan Thomas + [MIT License](https://github.com/justmoon/node-extend/blob/master/LICENSE) + +- [node-net-snmp](https://github.com/stephenwvickers/node-net-snmp) + + Copyright 2013, Stephen Vickers + [MIT License](https://github.com/nospaceships/node-net-snmp#license) + +- [node-asn1-ber](https://github.com/stephenwvickers/node-asn1-ber) + + Copyright 2017, Stephen Vickers + Copyright 2011, Mark Cavage + [MIT License](https://github.com/nospaceships/node-asn1-ber#license) + +- [pixl-xml](https://github.com/jhuckaby/pixl-xml) + + Copyright 2015, Joseph Huckaby + [MIT License](https://github.com/jhuckaby/pixl-xml#license) + +- [sensors](https://github.com/paroj/sensors.py) + + Copyright 2014, Pavel Rojtberg + [LGPL 2.1 License](http://opensource.org/licenses/LGPL-2.1) + +- [PyYAML](https://bitbucket.org/blackjack/pysensors) + + Copyright 2006, Kirill Simonov + [MIT License](https://github.com/yaml/pyyaml/blob/master/LICENSE) + +- [urllib3](https://github.com/shazow/urllib3) + + Copyright 2008-2016 Andrey Petrov and [contributors](https://github.com/shazow/urllib3/blob/master/CONTRIBUTORS.txt) + [MIT License](https://github.com/shazow/urllib3/blob/master/LICENSE.txt) + +- [lz-string](http://pieroxy.net/blog/pages/lz-string/index.html) + + Copyright 2013 Pieroxy + [WTFPL License](http://pieroxy.net/blog/pages/lz-string/index.html#inline_menu_10) + +- [pako](http://nodeca.github.io/pako/) + + Copyright 2014-2017 Vitaly Puzrin and Andrei Tuputcyn + [MIT License](https://github.com/nodeca/pako/blob/master/LICENSE) + +- [clipboard-polyfill](https://github.com/lgarron/clipboard-polyfill) + + Copyright (c) 2014 Lucas Garron + [MIT License](https://github.com/lgarron/clipboard-polyfill/blob/master/LICENSE.md) + +- [Utilities for writing code that runs on Python 2 and 3](https://raw.githubusercontent.com/netdata/netdata/master/collectors/python.d.plugin/python_modules/urllib3/packages/six.py) + + Copyright (c) 2010-2015 Benjamin Peterson + [MIT License](https://github.com/benjaminp/six/blob/master/LICENSE) + +- [mcrcon](https://github.com/barneygale/MCRcon) + + Copyright (C) 2015 Barnaby Gale + [MIT License](https://raw.githubusercontent.com/barneygale/MCRcon/master/COPYING.txt) + +- [monotonic](https://github.com/atdt/monotonic) + + Copyright 2014, 2015, 2016 Ori Livneh [ori@wikimedia.org](mailto:ori@wikimedia.org) + [Apache-2.0](http://www.apache.org/licenses/LICENSE-2.0) + +- [filelock](https://github.com/benediktschmitt/py-filelock) + + Copyright 2015, Benedikt Schmitt [Unlicense License](https://unlicense.org/) + +- [Kolmogorov-Smirnov distribution](http://simul.iro.umontreal.ca/ksdir/) + + Copyright March 2010 by Université de Montréal, Richard Simard and Pierre L'Ecuyer + [GPL 3.0](https://www.gnu.org/licenses/gpl-3.0.en.html) + + diff --git a/aclk/README.md b/aclk/README.md new file mode 100644 index 0000000..af0f5fd --- /dev/null +++ b/aclk/README.md @@ -0,0 +1,147 @@ + + +# Agent-cloud link (ACLK) + +The Agent-Cloud link (ACLK) is the mechanism responsible for securely connecting a Netdata Agent to your web browser +through Netdata Cloud. The ACLK establishes an outgoing secure WebSocket (WSS) connection to Netdata Cloud on port +`443`. The ACLK is encrypted, safe, and _is only established if you connect your node_. + +The Cloud App lives at app.netdata.cloud which currently resolves to the following list of IPs: + +- 54.198.178.11 +- 44.207.131.212 +- 44.196.50.41 + +:::caution + +This list of IPs can change without notice, we strongly advise you to whitelist following domains `api.netdata.cloud`, `mqtt.netdata.cloud`, if +this is not an option in your case always verify the current domain resolution (e.g via the `host` command). + +::: + +For a guide to connecting a node using the ACLK, plus additional troubleshooting and reference information, read our [get +started with Cloud](https://learn.netdata.cloud/docs/cloud/get-started) guide or the full [connect to Cloud +documentation](/claim/README.md). + +## Data privacy +[Data privacy](https://netdata.cloud/privacy/) is very important to us. We firmly believe that your data belongs to +you. This is why **we don't store any metric data in Netdata Cloud**. + +All the data that you see in the web browser when using Netdata Cloud, is actually streamed directly from the Netdata Agent to the Netdata Cloud dashboard. +The data passes through our systems, but it isn't stored. + +However, to be able to offer the stunning visualizations and advanced functionality of Netdata Cloud, it does store a limited number of _metadata_. + +Read more about [Data privacy in the Netdata Cloud](https://learn.netdata.cloud/docs/cloud/data-privacy) in the documentation. + + +## Enable and configure the ACLK + +The ACLK is enabled by default, with its settings automatically configured and stored in the Agent's memory. No file is +created at `/var/lib/netdata/cloud.d/cloud.conf` until you either connect a node or create it yourself. The default +configuration uses two settings: + +```conf +[global] + enabled = yes + cloud base url = https://api.netdata.cloud +``` + +If your Agent needs to use a proxy to access the internet, you must [set up a proxy for +connecting to cloud](/claim/README.md#connect-through-a-proxy). + +You can configure following keys in the `netdata.conf` section `[cloud]`: +``` +[cloud] + statistics = yes + query thread count = 2 +``` + +- `statistics` enables/disables ACLK related statistics and their charts. You can disable this to save some space in the database and slightly reduce memory usage of Netdata Agent. +- `query thread count` specifies the number of threads to process cloud queries. Increasing this setting is useful for nodes with many children (streaming), which can expect to handle more queries (and/or more complicated queries). + +## Disable the ACLK + +You have two options if you prefer to disable the ACLK and not use Netdata Cloud. + +### Disable at installation + +You can pass the `--disable-cloud` parameter to the Agent installation when using a kickstart script +([kickstart.sh](/packaging/installer/methods/kickstart.md), or a [manual installation from +Git](/packaging/installer/methods/manual.md). + +When you pass this parameter, the installer does not download or compile any extra libraries. Once running, the Agent +kills the thread responsible for the ACLK and connecting behavior, and behaves as though the ACLK, and thus Netdata Cloud, +does not exist. + +### Disable at runtime + +You can change a runtime setting in your `cloud.conf` file to disable the ACLK. This setting only stops the Agent from +attempting any connection via the ACLK, but does not prevent the installer from downloading and compiling the ACLK's +dependencies. + +The file typically exists at `/var/lib/netdata/cloud.d/cloud.conf`, but can change if you set a prefix during +installation. To disable the ACLK, open that file and change the `enabled` setting to `no`: + +```conf +[global] + enabled = no +``` + +If the file at `/var/lib/netdata/cloud.d/cloud.conf` doesn't exist, you need to create it. + +Copy and paste the first two lines from below, which will change your prompt to `cat`. + +```bash +cd /var/lib/netdata/cloud.d +cat > cloud.conf << EOF +``` + +Copy and paste in lines 3-6, and after the final `EOF`, hit **Enter**. The final line must contain only `EOF`. Hit **Enter** again to return to your normal prompt with the newly-created file. + +To get your normal prompt back, the final line +must contain only `EOF`. + +```bash +[global] + enabled = no + cloud base url = https://api.netdata.cloud +EOF +``` + +You also need to change the file's permissions. Use `grep "run as user" /etc/netdata/netdata.conf` to figure out which +user your Agent runs as (typically `netdata`), and replace `netdata:netdata` as shown below if necessary: + +```bash +sudo chmod 0770 cloud.conf +sudo chown netdata:netdata cloud.conf +``` + +Restart your Agent to disable the ACLK. + +### Re-enable the ACLK + +If you first disable the ACLK and any Cloud functionality and then decide you would like to use Cloud, you must either +[reinstall Netdata](/packaging/installer/REINSTALL.md) with Cloud enabled or change the runtime setting in your +`cloud.conf` file. + +If you passed `--disable-cloud` to `netdata-installer.sh` during installation, you must +[reinstall](/packaging/installer/REINSTALL.md) your Agent. Use the same method as before, but pass `--require-cloud` to +the installer. When installation finishes you can [connect your node](/claim/README.md#how-to-connect-a-node). + +If you changed the runtime setting in your `var/lib/netdata/cloud.d/cloud.conf` file, edit the file again and change +`enabled` to `yes`: + +```conf +[global] + enabled = yes +``` + +Restart your Agent and [connect your node](/claim/README.md#how-to-connect-a-node). + + diff --git a/aclk/aclk.c b/aclk/aclk.c new file mode 100644 index 0000000..3b035b8 --- /dev/null +++ b/aclk/aclk.c @@ -0,0 +1,1173 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk.h" + +#ifdef ENABLE_ACLK +#include "aclk_stats.h" +#include "mqtt_wss_client.h" +#include "aclk_otp.h" +#include "aclk_tx_msgs.h" +#include "aclk_query.h" +#include "aclk_query_queue.h" +#include "aclk_util.h" +#include "aclk_rx_msgs.h" +#include "https_client.h" +#include "schema-wrappers/schema_wrappers.h" +#include "aclk_capas.h" + +#include "aclk_proxy.h" + +#ifdef ACLK_LOG_CONVERSATION_DIR +#include +#include +#include +#endif + +#define ACLK_STABLE_TIMEOUT 3 // Minimum delay to mark AGENT as stable + +#endif /* ENABLE_ACLK */ + +int aclk_pubacks_per_conn = 0; // How many PubAcks we got since MQTT conn est. +int aclk_rcvd_cloud_msgs = 0; +int aclk_connection_counter = 0; +int disconnect_req = 0; + +int aclk_connected = 0; +int aclk_ctx_based = 0; +int aclk_disable_runtime = 0; +int aclk_stats_enabled; +int aclk_kill_link = 0; + +usec_t aclk_session_us = 0; +time_t aclk_session_sec = 0; + +time_t last_conn_time_mqtt = 0; +time_t last_conn_time_appl = 0; +time_t last_disconnect_time = 0; +time_t next_connection_attempt = 0; +float last_backoff_value = 0; + +time_t aclk_block_until = 0; + +#ifdef ENABLE_ACLK +mqtt_wss_client mqttwss_client; + +netdata_mutex_t aclk_shared_state_mutex = NETDATA_MUTEX_INITIALIZER; +#define ACLK_SHARED_STATE_LOCK netdata_mutex_lock(&aclk_shared_state_mutex) +#define ACLK_SHARED_STATE_UNLOCK netdata_mutex_unlock(&aclk_shared_state_mutex) + +struct aclk_shared_state aclk_shared_state = { + .mqtt_shutdown_msg_id = -1, + .mqtt_shutdown_msg_rcvd = 0 +}; + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 +OSSL_DECODER_CTX *aclk_dctx = NULL; +EVP_PKEY *aclk_private_key = NULL; +#else +static RSA *aclk_private_key = NULL; +#endif +static int load_private_key() +{ + if (aclk_private_key != NULL) { +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 + EVP_PKEY_free(aclk_private_key); + if (aclk_dctx) + OSSL_DECODER_CTX_free(aclk_dctx); + + aclk_dctx = NULL; +#else + RSA_free(aclk_private_key); +#endif + } + aclk_private_key = NULL; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/cloud.d/private.pem", netdata_configured_varlib_dir); + + long bytes_read; + char *private_key = read_by_filename(filename, &bytes_read); + if (!private_key) { + error("Claimed agent cannot establish ACLK - unable to load private key '%s' failed.", filename); + return 1; + } + debug(D_ACLK, "Claimed agent loaded private key len=%ld bytes", bytes_read); + + BIO *key_bio = BIO_new_mem_buf(private_key, -1); + if (key_bio==NULL) { + error("Claimed agent cannot establish ACLK - failed to create BIO for key"); + goto biofailed; + } + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 + aclk_dctx = OSSL_DECODER_CTX_new_for_pkey(&aclk_private_key, "PEM", NULL, + "RSA", + OSSL_KEYMGMT_SELECT_PRIVATE_KEY, + NULL, NULL); + + if (!aclk_dctx) { + error("Loading private key (from claiming) failed - no OpenSSL Decoders found"); + goto biofailed; + } + + // this is necesseary to avoid RSA key with wrong size + if (!OSSL_DECODER_from_bio(aclk_dctx, key_bio)) { + error("Decoding private key (from claiming) failed - invalid format."); + goto biofailed; + } +#else + aclk_private_key = PEM_read_bio_RSAPrivateKey(key_bio, NULL, NULL, NULL); +#endif + BIO_free(key_bio); + if (aclk_private_key!=NULL) + { + freez(private_key); + return 0; + } + char err[512]; + ERR_error_string_n(ERR_get_error(), err, sizeof(err)); + error("Claimed agent cannot establish ACLK - cannot create private key: %s", err); + +biofailed: + freez(private_key); + return 1; +} + +static int wait_till_cloud_enabled() +{ + info("Waiting for Cloud to be enabled"); + while (!netdata_cloud_setting) { + sleep_usec(USEC_PER_SEC * 1); + if (netdata_exit) + return 1; + } + return 0; +} + +/** + * Will block until agent is claimed. Returns only if agent claimed + * or if agent needs to shutdown. + * + * @return `0` if agent has been claimed, + * `1` if interrupted due to agent shutting down + */ +static int wait_till_agent_claimed(void) +{ + //TODO prevent malloc and freez + char *agent_id = get_agent_claimid(); + while (likely(!agent_id)) { + sleep_usec(USEC_PER_SEC * 1); + if (netdata_exit) + return 1; + agent_id = get_agent_claimid(); + } + freez(agent_id); + return 0; +} + +/** + * Checks everything is ready for connection + * agent claimed, cloud url set and private key available + * + * @param aclk_hostname points to location where string pointer to hostname will be set + * @param aclk_port port to int where port will be saved + * + * @return If non 0 returned irrecoverable error happened (or netdata_exit) and ACLK should be terminated + */ +static int wait_till_agent_claim_ready() +{ + url_t url; + while (!netdata_exit) { + if (wait_till_agent_claimed()) + return 1; + + // The NULL return means the value was never initialised, but this value has been initialized in post_conf_load. + // We trap the impossible NULL here to keep the linter happy without using a fatal() in the code. + char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL); + if (cloud_base_url == NULL) { + error("Do not move the cloud base url out of post_conf_load!!"); + return 1; + } + + // We just check configuration is valid here + // TODO make it without malloc/free + memset(&url, 0, sizeof(url_t)); + if (url_parse(cloud_base_url, &url)) { + error("Agent is claimed but the URL in configuration key \"cloud base url\" is invalid, please fix"); + url_t_destroy(&url); + sleep(5); + continue; + } + url_t_destroy(&url); + + if (!load_private_key()) + return 0; + + sleep(5); + } + + return 1; +} + +void aclk_mqtt_wss_log_cb(mqtt_wss_log_type_t log_type, const char* str) +{ + switch(log_type) { + case MQTT_WSS_LOG_ERROR: + case MQTT_WSS_LOG_FATAL: + case MQTT_WSS_LOG_WARN: + error_report("%s", str); + return; + case MQTT_WSS_LOG_INFO: + info("%s", str); + return; + case MQTT_WSS_LOG_DEBUG: + debug(D_ACLK, "%s", str); + return; + default: + error("Unknown log type from mqtt_wss"); + } +} + +//TODO prevent big buffer on stack +#define RX_MSGLEN_MAX 4096 +static void msg_callback(const char *topic, const void *msg, size_t msglen, int qos) +{ + UNUSED(qos); + aclk_rcvd_cloud_msgs++; + if (msglen > RX_MSGLEN_MAX) + error("Incoming ACLK message was bigger than MAX of %d and got truncated.", RX_MSGLEN_MAX); + + debug(D_ACLK, "Got Message From Broker Topic \"%s\" QOS %d", topic, qos); + + if (aclk_shared_state.mqtt_shutdown_msg_id > 0) { + error("Link is shutting down. Ignoring incoming message."); + return; + } + + const char *msgtype = strrchr(topic, '/'); + if (unlikely(!msgtype)) { + error_report("Cannot get message type from topic. Ignoring message from topic \"%s\"", topic); + return; + } + msgtype++; + if (unlikely(!*msgtype)) { + error_report("Message type empty. Ignoring message from topic \"%s\"", topic); + return; + } + +#ifdef ACLK_LOG_CONVERSATION_DIR +#define FN_MAX_LEN 512 + char filename[FN_MAX_LEN]; + int logfd; + snprintf(filename, FN_MAX_LEN, ACLK_LOG_CONVERSATION_DIR "/%010d-rx-%s.bin", ACLK_GET_CONV_LOG_NEXT(), msgtype); + logfd = open(filename, O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR ); + if(logfd < 0) + error("Error opening ACLK Conversation logfile \"%s\" for RX message.", filename); + write(logfd, msg, msglen); + close(logfd); +#endif + + aclk_handle_new_cloud_msg(msgtype, msg, msglen, topic); +} + +static void puback_callback(uint16_t packet_id) +{ + if (++aclk_pubacks_per_conn == ACLK_PUBACKS_CONN_STABLE) { + last_conn_time_appl = now_realtime_sec(); + aclk_tbeb_reset(); + } + +#ifdef NETDATA_INTERNAL_CHECKS + aclk_stats_msg_puback(packet_id); +#endif + + if (aclk_shared_state.mqtt_shutdown_msg_id == (int)packet_id) { + info("Shutdown message has been acknowledged by the cloud. Exiting gracefully"); + aclk_shared_state.mqtt_shutdown_msg_rcvd = 1; + } +} + +static int read_query_thread_count() +{ + int threads = MIN(processors/2, 6); + threads = MAX(threads, 2); + threads = config_get_number(CONFIG_SECTION_CLOUD, "query thread count", threads); + if(threads < 1) { + error("You need at least one query thread. Overriding configured setting of \"%d\"", threads); + threads = 1; + config_set_number(CONFIG_SECTION_CLOUD, "query thread count", threads); + } + return threads; +} + +void aclk_graceful_disconnect(mqtt_wss_client client); + +/* Keeps connection alive and handles all network communications. + * Returns on error or when netdata is shutting down. + * @param client instance of mqtt_wss_client + * @returns 0 - Netdata Exits + * >0 - Error happened. Reconnect and start over. + */ +static int handle_connection(mqtt_wss_client client) +{ + time_t last_periodic_query_wakeup = now_monotonic_sec(); + while (!netdata_exit) { + // timeout 1000 to check at least once a second + // for netdata_exit + if (mqtt_wss_service(client, 1000) < 0){ + error_report("Connection Error or Dropped"); + return 1; + } + + if (disconnect_req || aclk_kill_link) { + info("Going to restart connection due to disconnect_req=%s (cloud req), aclk_kill_link=%s (reclaim)", + disconnect_req ? "true" : "false", + aclk_kill_link ? "true" : "false"); + disconnect_req = 0; + aclk_kill_link = 0; + aclk_graceful_disconnect(client); + aclk_queue_unlock(); + aclk_shared_state.mqtt_shutdown_msg_id = -1; + aclk_shared_state.mqtt_shutdown_msg_rcvd = 0; + return 1; + } + + // mqtt_wss_service will return faster than in one second + // if there is enough work to do + time_t now = now_monotonic_sec(); + if (last_periodic_query_wakeup < now) { + // wake up at least one Query Thread at least + // once per second + last_periodic_query_wakeup = now; + QUERY_THREAD_WAKEUP; + } + } + return 0; +} + +static inline void mqtt_connected_actions(mqtt_wss_client client) +{ + char *topic = (char*)aclk_get_topic(ACLK_TOPICID_COMMAND); + + if (!topic) + error("Unable to fetch topic for COMMAND (to subscribe)"); + else + mqtt_wss_subscribe(client, topic, 1); + + topic = (char*)aclk_get_topic(ACLK_TOPICID_CMD_NG_V1); + if (!topic) + error("Unable to fetch topic for protobuf COMMAND (to subscribe)"); + else + mqtt_wss_subscribe(client, topic, 1); + + aclk_stats_upd_online(1); + aclk_connected = 1; + aclk_pubacks_per_conn = 0; + aclk_rcvd_cloud_msgs = 0; + aclk_connection_counter++; + + aclk_send_agent_connection_update(client, 1); +} + +void aclk_graceful_disconnect(mqtt_wss_client client) +{ + info("Preparing to gracefully shutdown ACLK connection"); + aclk_queue_lock(); + aclk_queue_flush(); + + aclk_shared_state.mqtt_shutdown_msg_id = aclk_send_agent_connection_update(client, 0); + + time_t t = now_monotonic_sec(); + while (!mqtt_wss_service(client, 100)) { + if (now_monotonic_sec() - t >= 2) { + error("Wasn't able to gracefully shutdown ACLK in time!"); + break; + } + if (aclk_shared_state.mqtt_shutdown_msg_rcvd) { + info("MQTT App Layer `disconnect` message sent successfully"); + break; + } + } + info("ACLK link is down"); + log_access("ACLK DISCONNECTED"); + aclk_stats_upd_online(0); + last_disconnect_time = now_realtime_sec(); + aclk_connected = 0; + + info("Attempting to gracefully shutdown the MQTT/WSS connection"); + mqtt_wss_disconnect(client, 1000); +} + +static unsigned long aclk_reconnect_delay() { + unsigned long recon_delay; + time_t now; + + if (aclk_disable_runtime) { + aclk_tbeb_reset(); + return 60 * MSEC_PER_SEC; + } + + now = now_monotonic_sec(); + if (aclk_block_until) { + if (now < aclk_block_until) { + recon_delay = aclk_block_until - now; + recon_delay *= MSEC_PER_SEC; + aclk_block_until = 0; + aclk_tbeb_reset(); + return recon_delay; + } + aclk_block_until = 0; + } + + if (!aclk_env || !aclk_env->backoff.base) + return aclk_tbeb_delay(0, 2, 0, 1024); + + return aclk_tbeb_delay(0, aclk_env->backoff.base, aclk_env->backoff.min_s, aclk_env->backoff.max_s); +} + +/* Block till aclk_reconnect_delay is satisfied or netdata_exit is signalled + * @return 0 - Go ahead and connect (delay expired) + * 1 - netdata_exit + */ +#define NETDATA_EXIT_POLL_MS (MSEC_PER_SEC/4) +static int aclk_block_till_recon_allowed() { + unsigned long recon_delay = aclk_reconnect_delay(); + + next_connection_attempt = now_realtime_sec() + (recon_delay / MSEC_PER_SEC); + last_backoff_value = (float)recon_delay / MSEC_PER_SEC; + + info("Wait before attempting to reconnect in %.3f seconds\n", recon_delay / (float)MSEC_PER_SEC); + // we want to wake up from time to time to check netdata_exit + while (recon_delay) + { + if (netdata_exit) + return 1; + if (recon_delay > NETDATA_EXIT_POLL_MS) { + sleep_usec(NETDATA_EXIT_POLL_MS * USEC_PER_MS); + recon_delay -= NETDATA_EXIT_POLL_MS; + continue; + } + sleep_usec(recon_delay * USEC_PER_MS); + recon_delay = 0; + } + return netdata_exit; +} + +#ifndef ACLK_DISABLE_CHALLENGE +/* Cloud returns transport list ordered with highest + * priority first. This function selects highest prio + * transport that we can actually use (support) + */ +static int aclk_get_transport_idx(aclk_env_t *env) { + for (size_t i = 0; i < env->transport_count; i++) { + // currently we support only MQTT 5 + // therefore select first transport that matches + if (env->transports[i]->type == ACLK_TRP_MQTT_5) { + return i; + } + } + return -1; +} +#endif + +/* Attempts to make a connection to MQTT broker over WSS + * @param client instance of mqtt_wss_client + * @return 0 - Successful Connection, + * <0 - Irrecoverable Error -> Kill ACLK, + * >0 - netdata_exit + */ +#define CLOUD_BASE_URL_READ_RETRY 30 +#ifdef ACLK_SSL_ALLOW_SELF_SIGNED +#define ACLK_SSL_FLAGS MQTT_WSS_SSL_ALLOW_SELF_SIGNED +#else +#define ACLK_SSL_FLAGS MQTT_WSS_SSL_CERT_CHECK_FULL +#endif +static int aclk_attempt_to_connect(mqtt_wss_client client) +{ + int ret; + + url_t base_url; + +#ifndef ACLK_DISABLE_CHALLENGE + url_t auth_url; + url_t mqtt_url; +#endif + + while (!netdata_exit) { + char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL); + if (cloud_base_url == NULL) { + error_report("Do not move the cloud base url out of post_conf_load!!"); + return -1; + } + + if (aclk_block_till_recon_allowed()) + return 1; + + info("Attempting connection now"); + memset(&base_url, 0, sizeof(url_t)); + if (url_parse(cloud_base_url, &base_url)) { + error_report("ACLK base URL configuration key could not be parsed. Will retry in %d seconds.", CLOUD_BASE_URL_READ_RETRY); + sleep(CLOUD_BASE_URL_READ_RETRY); + url_t_destroy(&base_url); + continue; + } + + struct mqtt_wss_proxy proxy_conf = { .host = NULL, .port = 0, .username = NULL, .password = NULL, .type = MQTT_WSS_DIRECT }; + aclk_set_proxy((char**)&proxy_conf.host, &proxy_conf.port, &proxy_conf.type); + + struct mqtt_connect_params mqtt_conn_params = { + .clientid = "anon", + .username = "anon", + .password = "anon", + .will_topic = "lwt", + .will_msg = NULL, + .will_flags = MQTT_WSS_PUB_QOS2, + .keep_alive = 60, + .drop_on_publish_fail = 1 + }; + +#ifndef ACLK_DISABLE_CHALLENGE + if (aclk_env) { + aclk_env_t_destroy(aclk_env); + freez(aclk_env); + } + aclk_env = callocz(1, sizeof(aclk_env_t)); + + ret = aclk_get_env(aclk_env, base_url.host, base_url.port); + url_t_destroy(&base_url); + if (ret) { + error_report("Failed to Get ACLK environment"); + // delay handled by aclk_block_till_recon_allowed + continue; + } + + if (netdata_exit) + return 1; + + if (aclk_env->encoding != ACLK_ENC_PROTO) { + error_report("This agent can only use the new cloud protocol but cloud requested old one."); + continue; + } + + if (!aclk_env_has_capa("proto")) { + error_report("Can't use encoding=proto without at least \"proto\" capability."); + continue; + } + info("New ACLK protobuf protocol negotiated successfully (/env response)."); + + memset(&auth_url, 0, sizeof(url_t)); + if (url_parse(aclk_env->auth_endpoint, &auth_url)) { + error_report("Parsing URL returned by env endpoint for authentication failed. \"%s\"", aclk_env->auth_endpoint); + url_t_destroy(&auth_url); + continue; + } + + ret = aclk_get_mqtt_otp(aclk_private_key, (char **)&mqtt_conn_params.clientid, (char **)&mqtt_conn_params.username, (char **)&mqtt_conn_params.password, &auth_url); + url_t_destroy(&auth_url); + if (ret) { + error_report("Error passing Challenge/Response to get OTP"); + continue; + } + + // aclk_get_topic moved here as during OTP we + // generate the topic cache + mqtt_conn_params.will_topic = aclk_get_topic(ACLK_TOPICID_AGENT_CONN); + + if (!mqtt_conn_params.will_topic) { + error_report("Couldn't get LWT topic. Will not send LWT."); + continue; + } + + // Do the MQTT connection + ret = aclk_get_transport_idx(aclk_env); + if (ret < 0) { + error_report("Cloud /env endpoint didn't return any transport usable by this Agent."); + continue; + } + + memset(&mqtt_url, 0, sizeof(url_t)); + if (url_parse(aclk_env->transports[ret]->endpoint, &mqtt_url)){ + error_report("Failed to parse target URL for /env trp idx %d \"%s\"", ret, aclk_env->transports[ret]->endpoint); + url_t_destroy(&mqtt_url); + continue; + } +#endif + + aclk_session_newarch = now_realtime_usec(); + aclk_session_sec = aclk_session_newarch / USEC_PER_SEC; + aclk_session_us = aclk_session_newarch % USEC_PER_SEC; + + mqtt_conn_params.will_msg = aclk_generate_lwt(&mqtt_conn_params.will_msg_len); + +#ifdef ACLK_DISABLE_CHALLENGE + ret = mqtt_wss_connect(client, base_url.host, base_url.port, &mqtt_conn_params, ACLK_SSL_FLAGS, &proxy_conf); + url_t_destroy(&base_url); +#else + ret = mqtt_wss_connect(client, mqtt_url.host, mqtt_url.port, &mqtt_conn_params, ACLK_SSL_FLAGS, &proxy_conf); + url_t_destroy(&mqtt_url); + + freez((char*)mqtt_conn_params.clientid); + freez((char*)mqtt_conn_params.password); + freez((char*)mqtt_conn_params.username); +#endif + + freez((char *)mqtt_conn_params.will_msg); + + if (!ret) { + last_conn_time_mqtt = now_realtime_sec(); + info("ACLK connection successfully established"); + log_access("ACLK CONNECTED"); + mqtt_connected_actions(client); + return 0; + } + + error_report("Connect failed"); + } + + return 1; +} + +/** + * Main agent cloud link thread + * + * This thread will simply call the main event loop that handles + * pending requests - both inbound and outbound + * + * @param ptr is a pointer to the netdata_static_thread structure. + * + * @return It always returns NULL + */ +void *aclk_main(void *ptr) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + + struct aclk_stats_thread *stats_thread = NULL; + + struct aclk_query_threads query_threads; + query_threads.thread_list = NULL; + + ACLK_PROXY_TYPE proxy_type; + aclk_get_proxy(&proxy_type); + if (proxy_type == PROXY_TYPE_SOCKS5) { + error("SOCKS5 proxy is not supported by ACLK-NG yet."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + return NULL; + } + + unsigned int proto_hdl_cnt = aclk_init_rx_msg_handlers(); + + // This thread is unusual in that it cannot be cancelled by cancel_main_threads() + // as it must notify the far end that it shutdown gracefully and avoid the LWT. + netdata_thread_disable_cancelability(); + +#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) + info("Killing ACLK thread -> cloud functionality has been disabled"); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + return NULL; +#endif + query_threads.count = read_query_thread_count(); + + if (wait_till_cloud_enabled()) + goto exit; + + if (wait_till_agent_claim_ready()) + goto exit; + + if (!(mqttwss_client = mqtt_wss_new("mqtt_wss", aclk_mqtt_wss_log_cb, msg_callback, puback_callback, 1))) { + error("Couldn't initialize MQTT_WSS network library"); + goto exit; + } + + // Enable MQTT buffer growth if necessary + // e.g. old cloud architecture clients with huge nodes + // that send JSON payloads of 10 MB as single messages + mqtt_wss_set_max_buf_size(mqttwss_client, 25*1024*1024); + + aclk_stats_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "statistics", global_statistics_enabled); + if (aclk_stats_enabled) { + stats_thread = callocz(1, sizeof(struct aclk_stats_thread)); + stats_thread->thread = mallocz(sizeof(netdata_thread_t)); + stats_thread->query_thread_count = query_threads.count; + stats_thread->client = mqttwss_client; + aclk_stats_thread_prepare(query_threads.count, proto_hdl_cnt); + netdata_thread_create( + stats_thread->thread, ACLK_STATS_THREAD_NAME, NETDATA_THREAD_OPTION_JOINABLE, aclk_stats_main_thread, + stats_thread); + } + + // Keep reconnecting and talking until our time has come + // and the Grim Reaper (netdata_exit) calls + do { + if (aclk_attempt_to_connect(mqttwss_client)) + goto exit_full; + + if (unlikely(!query_threads.thread_list)) + aclk_query_threads_start(&query_threads, mqttwss_client); + + if (handle_connection(mqttwss_client)) { + aclk_stats_upd_online(0); + last_disconnect_time = now_realtime_sec(); + aclk_connected = 0; + log_access("ACLK DISCONNECTED"); + } + } while (!netdata_exit); + + aclk_graceful_disconnect(mqttwss_client); + +exit_full: +// Tear Down + QUERY_THREAD_WAKEUP_ALL; + + aclk_query_threads_cleanup(&query_threads); + + if (aclk_stats_enabled) { + netdata_thread_join(*stats_thread->thread, NULL); + aclk_stats_thread_cleanup(); + freez(stats_thread->thread); + freez(stats_thread); + } + free_topic_cache(); + mqtt_wss_destroy(mqttwss_client); +exit: + if (aclk_env) { + aclk_env_t_destroy(aclk_env); + freez(aclk_env); + } + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + return NULL; +} + +void aclk_host_state_update(RRDHOST *host, int cmd) +{ + uuid_t node_id; + int ret; + + if (!aclk_connected) + return; + + ret = get_node_id(&host->host_uuid, &node_id); + if (ret > 0) { + // this means we were not able to check if node_id already present + error("Unable to check for node_id. Ignoring the host state update."); + return; + } + if (ret < 0) { + // node_id not found + aclk_query_t create_query; + create_query = aclk_query_new(REGISTER_NODE); + rrdhost_aclk_state_lock(localhost); + node_instance_creation_t node_instance_creation = { + .claim_id = localhost->aclk_state.claimed_id, + .hops = host->system_info->hops, + .hostname = rrdhost_hostname(host), + .machine_guid = host->machine_guid + }; + create_query->data.bin_payload.payload = generate_node_instance_creation(&create_query->data.bin_payload.size, &node_instance_creation); + rrdhost_aclk_state_unlock(localhost); + create_query->data.bin_payload.topic = ACLK_TOPICID_CREATE_NODE; + create_query->data.bin_payload.msg_name = "CreateNodeInstance"; + info("Registering host=%s, hops=%u",host->machine_guid, host->system_info->hops); + aclk_queue_query(create_query); + return; + } + + aclk_query_t query = aclk_query_new(NODE_STATE_UPDATE); + node_instance_connection_t node_state_update = { + .hops = host->system_info->hops, + .live = cmd, + .queryable = 1, + .session_id = aclk_session_newarch + }; + node_state_update.node_id = mallocz(UUID_STR_LEN); + uuid_unparse_lower(node_id, (char*)node_state_update.node_id); + + node_state_update.capabilities = aclk_get_agent_capas(); + + rrdhost_aclk_state_lock(localhost); + node_state_update.claim_id = localhost->aclk_state.claimed_id; + query->data.bin_payload.payload = generate_node_instance_connection(&query->data.bin_payload.size, &node_state_update); + rrdhost_aclk_state_unlock(localhost); + + info("Queuing status update for node=%s, live=%d, hops=%u",(char*)node_state_update.node_id, cmd, + host->system_info->hops); + freez((void*)node_state_update.node_id); + query->data.bin_payload.msg_name = "UpdateNodeInstanceConnection"; + query->data.bin_payload.topic = ACLK_TOPICID_NODE_CONN; + aclk_queue_query(query); +} + +void aclk_send_node_instances() +{ + struct node_instance_list *list_head = get_node_list(); + struct node_instance_list *list = list_head; + if (unlikely(!list)) { + error_report("Failure to get_node_list from DB!"); + return; + } + while (!uuid_is_null(list->host_id)) { + if (!uuid_is_null(list->node_id)) { + aclk_query_t query = aclk_query_new(NODE_STATE_UPDATE); + node_instance_connection_t node_state_update = { + .live = list->live, + .hops = list->hops, + .queryable = 1, + .session_id = aclk_session_newarch + }; + node_state_update.node_id = mallocz(UUID_STR_LEN); + uuid_unparse_lower(list->node_id, (char*)node_state_update.node_id); + + char host_id[UUID_STR_LEN]; + uuid_unparse_lower(list->host_id, host_id); + + RRDHOST *host = rrdhost_find_by_guid(host_id); + node_state_update.capabilities = aclk_get_node_instance_capas(host); + + rrdhost_aclk_state_lock(localhost); + node_state_update.claim_id = localhost->aclk_state.claimed_id; + query->data.bin_payload.payload = generate_node_instance_connection(&query->data.bin_payload.size, &node_state_update); + rrdhost_aclk_state_unlock(localhost); + info("Queuing status update for node=%s, live=%d, hops=%d",(char*)node_state_update.node_id, + list->live, + list->hops); + + freez((void*)node_state_update.capabilities); + freez((void*)node_state_update.node_id); + query->data.bin_payload.msg_name = "UpdateNodeInstanceConnection"; + query->data.bin_payload.topic = ACLK_TOPICID_NODE_CONN; + aclk_queue_query(query); + } else { + aclk_query_t create_query; + create_query = aclk_query_new(REGISTER_NODE); + node_instance_creation_t node_instance_creation = { + .hops = list->hops, + .hostname = list->hostname, + }; + node_instance_creation.machine_guid = mallocz(UUID_STR_LEN); + uuid_unparse_lower(list->host_id, (char*)node_instance_creation.machine_guid); + create_query->data.bin_payload.topic = ACLK_TOPICID_CREATE_NODE; + create_query->data.bin_payload.msg_name = "CreateNodeInstance"; + rrdhost_aclk_state_lock(localhost); + node_instance_creation.claim_id = localhost->aclk_state.claimed_id, + create_query->data.bin_payload.payload = generate_node_instance_creation(&create_query->data.bin_payload.size, &node_instance_creation); + rrdhost_aclk_state_unlock(localhost); + info("Queuing registration for host=%s, hops=%d",(char*)node_instance_creation.machine_guid, + list->hops); + freez((void *)node_instance_creation.machine_guid); + aclk_queue_query(create_query); + } + freez(list->hostname); + + list++; + } + freez(list_head); +} + +void aclk_send_bin_msg(char *msg, size_t msg_len, enum aclk_topics subtopic, const char *msgname) +{ + aclk_send_bin_message_subtopic_pid(mqttwss_client, msg, msg_len, subtopic, msgname); +} + +static void fill_alert_status_for_host(BUFFER *wb, RRDHOST *host) +{ + struct proto_alert_status status; + memset(&status, 0, sizeof(status)); + if (get_proto_alert_status(host, &status)) { + buffer_strcat(wb, "\nFailed to get alert streaming status for this host"); + return; + } + buffer_sprintf(wb, + "\n\t\tUpdates: %d" + "\n\t\tBatch ID: %"PRIu64 + "\n\t\tLast Acked Seq ID: %"PRIu64 + "\n\t\tPending Min Seq ID: %"PRIu64 + "\n\t\tPending Max Seq ID: %"PRIu64 + "\n\t\tLast Submitted Seq ID: %"PRIu64, + status.alert_updates, + status.alerts_batch_id, + status.last_acked_sequence_id, + status.pending_min_sequence_id, + status.pending_max_sequence_id, + status.last_submitted_sequence_id + ); +} +#endif /* ENABLE_ACLK */ + +char *aclk_state(void) +{ +#ifndef ENABLE_ACLK + return strdupz("ACLK Available: No"); +#else + BUFFER *wb = buffer_create(1024); + struct tm *tmptr, tmbuf; + char *ret; + + buffer_strcat(wb, + "ACLK Available: Yes\n" + "ACLK Version: 2\n" + "Protocols Supported: Protobuf\n" + ); + buffer_sprintf(wb, "Protocol Used: Protobuf\nMQTT Version: %d\nClaimed: ", 5); + + char *agent_id = get_agent_claimid(); + if (agent_id == NULL) + buffer_strcat(wb, "No\n"); + else { + char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL); + buffer_sprintf(wb, "Yes\nClaimed Id: %s\nCloud URL: %s\n", agent_id, cloud_base_url ? cloud_base_url : "null"); + freez(agent_id); + } + + buffer_sprintf(wb, "Online: %s\nReconnect count: %d\nBanned By Cloud: %s\n", aclk_connected ? "Yes" : "No", aclk_connection_counter > 0 ? (aclk_connection_counter - 1) : 0, aclk_disable_runtime ? "Yes" : "No"); + if (last_conn_time_mqtt && (tmptr = localtime_r(&last_conn_time_mqtt, &tmbuf)) ) { + char timebuf[26]; + strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); + buffer_sprintf(wb, "Last Connection Time: %s\n", timebuf); + } + if (last_conn_time_appl && (tmptr = localtime_r(&last_conn_time_appl, &tmbuf)) ) { + char timebuf[26]; + strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); + buffer_sprintf(wb, "Last Connection Time + %d PUBACKs received: %s\n", ACLK_PUBACKS_CONN_STABLE, timebuf); + } + if (last_disconnect_time && (tmptr = localtime_r(&last_disconnect_time, &tmbuf)) ) { + char timebuf[26]; + strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); + buffer_sprintf(wb, "Last Disconnect Time: %s\n", timebuf); + } + if (!aclk_connected && next_connection_attempt && (tmptr = localtime_r(&next_connection_attempt, &tmbuf)) ) { + char timebuf[26]; + strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); + buffer_sprintf(wb, "Next Connection Attempt At: %s\nLast Backoff: %.3f", timebuf, last_backoff_value); + } + + if (aclk_connected) { + buffer_sprintf(wb, "Received Cloud MQTT Messages: %d\nMQTT Messages Confirmed by Remote Broker (PUBACKs): %d", aclk_rcvd_cloud_msgs, aclk_pubacks_per_conn); + + RRDHOST *host; + rrd_rdlock(); + rrdhost_foreach_read(host) { + buffer_sprintf(wb, "\n\n> Node Instance for mGUID: \"%s\" hostname \"%s\"\n", host->machine_guid, rrdhost_hostname(host)); + + buffer_strcat(wb, "\tClaimed ID: "); + rrdhost_aclk_state_lock(host); + if (host->aclk_state.claimed_id) + buffer_strcat(wb, host->aclk_state.claimed_id); + else + buffer_strcat(wb, "null"); + rrdhost_aclk_state_unlock(host); + + + if (host->node_id == NULL || uuid_is_null(*host->node_id)) { + buffer_strcat(wb, "\n\tNode ID: null\n"); + } else { + char node_id[GUID_LEN + 1]; + uuid_unparse_lower(*host->node_id, node_id); + buffer_sprintf(wb, "\n\tNode ID: %s\n", node_id); + } + + buffer_sprintf(wb, "\tStreaming Hops: %d\n\tRelationship: %s", host->system_info->hops, host == localhost ? "self" : "child"); + + if (host != localhost) + buffer_sprintf(wb, "\n\tStreaming Connection Live: %s", host->receiver ? "true" : "false"); + + buffer_strcat(wb, "\n\tAlert Streaming Status:"); + fill_alert_status_for_host(wb, host); + } + rrd_unlock(); + } + + ret = strdupz(buffer_tostring(wb)); + buffer_free(wb); + return ret; +#endif /* ENABLE_ACLK */ +} + +#ifdef ENABLE_ACLK +static void fill_alert_status_for_host_json(json_object *obj, RRDHOST *host) +{ + struct proto_alert_status status; + memset(&status, 0, sizeof(status)); + if (get_proto_alert_status(host, &status)) + return; + + json_object *tmp = json_object_new_int(status.alert_updates); + json_object_object_add(obj, "updates", tmp); + + tmp = json_object_new_int(status.alerts_batch_id); + json_object_object_add(obj, "batch-id", tmp); + + tmp = json_object_new_int(status.last_acked_sequence_id); + json_object_object_add(obj, "last-acked-seq-id", tmp); + + tmp = json_object_new_int(status.pending_min_sequence_id); + json_object_object_add(obj, "pending-min-seq-id", tmp); + + tmp = json_object_new_int(status.pending_max_sequence_id); + json_object_object_add(obj, "pending-max-seq-id", tmp); + + tmp = json_object_new_int(status.last_submitted_sequence_id); + json_object_object_add(obj, "last-submitted-seq-id", tmp); +} + +static json_object *timestamp_to_json(const time_t *t) +{ + struct tm *tmptr, tmbuf; + if (*t && (tmptr = gmtime_r(t, &tmbuf)) ) { + char timebuf[26]; + strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); + return json_object_new_string(timebuf); + } + return NULL; +} +#endif /* ENABLE_ACLK */ + +char *aclk_state_json(void) +{ +#ifndef ENABLE_ACLK + return strdupz("{\"aclk-available\":false}"); +#else + json_object *tmp, *grp, *msg = json_object_new_object(); + + tmp = json_object_new_boolean(1); + json_object_object_add(msg, "aclk-available", tmp); + + tmp = json_object_new_int(2); + json_object_object_add(msg, "aclk-version", tmp); + + grp = json_object_new_array(); + tmp = json_object_new_string("Protobuf"); + json_object_array_add(grp, tmp); + json_object_object_add(msg, "protocols-supported", grp); + + char *agent_id = get_agent_claimid(); + tmp = json_object_new_boolean(agent_id != NULL); + json_object_object_add(msg, "agent-claimed", tmp); + + if (agent_id) { + tmp = json_object_new_string(agent_id); + freez(agent_id); + } else + tmp = NULL; + json_object_object_add(msg, "claimed-id", tmp); + + char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL); + tmp = cloud_base_url ? json_object_new_string(cloud_base_url) : NULL; + json_object_object_add(msg, "cloud-url", tmp); + + tmp = json_object_new_boolean(aclk_connected); + json_object_object_add(msg, "online", tmp); + + tmp = json_object_new_string("Protobuf"); + json_object_object_add(msg, "used-cloud-protocol", tmp); + + tmp = json_object_new_int(5); + json_object_object_add(msg, "mqtt-version", tmp); + + tmp = json_object_new_int(aclk_rcvd_cloud_msgs); + json_object_object_add(msg, "received-app-layer-msgs", tmp); + + tmp = json_object_new_int(aclk_pubacks_per_conn); + json_object_object_add(msg, "received-mqtt-pubacks", tmp); + + tmp = json_object_new_int(aclk_connection_counter > 0 ? (aclk_connection_counter - 1) : 0); + json_object_object_add(msg, "reconnect-count", tmp); + + json_object_object_add(msg, "last-connect-time-utc", timestamp_to_json(&last_conn_time_mqtt)); + json_object_object_add(msg, "last-connect-time-puback-utc", timestamp_to_json(&last_conn_time_appl)); + json_object_object_add(msg, "last-disconnect-time-utc", timestamp_to_json(&last_disconnect_time)); + json_object_object_add(msg, "next-connection-attempt-utc", !aclk_connected ? timestamp_to_json(&next_connection_attempt) : NULL); + tmp = NULL; + if (!aclk_connected && last_backoff_value) + tmp = json_object_new_double(last_backoff_value); + json_object_object_add(msg, "last-backoff-value", tmp); + + tmp = json_object_new_boolean(aclk_disable_runtime); + json_object_object_add(msg, "banned-by-cloud", tmp); + + grp = json_object_new_array(); + + RRDHOST *host; + rrd_rdlock(); + rrdhost_foreach_read(host) { + json_object *nodeinstance = json_object_new_object(); + + tmp = json_object_new_string(rrdhost_hostname(host)); + json_object_object_add(nodeinstance, "hostname", tmp); + + tmp = json_object_new_string(host->machine_guid); + json_object_object_add(nodeinstance, "mguid", tmp); + + rrdhost_aclk_state_lock(host); + if (host->aclk_state.claimed_id) { + tmp = json_object_new_string(host->aclk_state.claimed_id); + json_object_object_add(nodeinstance, "claimed_id", tmp); + } else + json_object_object_add(nodeinstance, "claimed_id", NULL); + rrdhost_aclk_state_unlock(host); + + if (host->node_id == NULL || uuid_is_null(*host->node_id)) { + json_object_object_add(nodeinstance, "node-id", NULL); + } else { + char node_id[GUID_LEN + 1]; + uuid_unparse_lower(*host->node_id, node_id); + tmp = json_object_new_string(node_id); + json_object_object_add(nodeinstance, "node-id", tmp); + } + + tmp = json_object_new_int(host->system_info->hops); + json_object_object_add(nodeinstance, "streaming-hops", tmp); + + tmp = json_object_new_string(host == localhost ? "self" : "child"); + json_object_object_add(nodeinstance, "relationship", tmp); + + tmp = json_object_new_boolean((host->receiver || host == localhost)); + json_object_object_add(nodeinstance, "streaming-online", tmp); + + tmp = json_object_new_object(); + fill_alert_status_for_host_json(tmp, host); + json_object_object_add(nodeinstance, "alert-sync-status", tmp); + + json_object_array_add(grp, nodeinstance); + } + rrd_unlock(); + json_object_object_add(msg, "node-instances", grp); + + char *str = strdupz(json_object_to_json_string_ext(msg, JSON_C_TO_STRING_PLAIN)); + json_object_put(msg); + return str; +#endif /* ENABLE_ACLK */ +} + +void add_aclk_host_labels(void) { + DICTIONARY *labels = localhost->rrdlabels; + +#ifdef ENABLE_ACLK + rrdlabels_add(labels, "_aclk_available", "true", RRDLABEL_SRC_AUTO|RRDLABEL_SRC_ACLK); + ACLK_PROXY_TYPE aclk_proxy; + char *proxy_str; + aclk_get_proxy(&aclk_proxy); + + switch(aclk_proxy) { + case PROXY_TYPE_SOCKS5: + proxy_str = "SOCKS5"; + break; + case PROXY_TYPE_HTTP: + proxy_str = "HTTP"; + break; + default: + proxy_str = "none"; + break; + } + + rrdlabels_add(labels, "_mqtt_version", "5", RRDLABEL_SRC_AUTO); + rrdlabels_add(labels, "_aclk_proxy", proxy_str, RRDLABEL_SRC_AUTO); + rrdlabels_add(labels, "_aclk_ng_new_cloud_protocol", "true", RRDLABEL_SRC_AUTO|RRDLABEL_SRC_ACLK); +#else + rrdlabels_add(labels, "_aclk_available", "false", RRDLABEL_SRC_AUTO|RRDLABEL_SRC_ACLK); +#endif +} + +void aclk_queue_node_info(RRDHOST *host) { + struct aclk_database_worker_config *wc = (struct aclk_database_worker_config *) host->dbsync_worker; + if (likely(wc)) { + wc->node_info_send = 1; + } +} diff --git a/aclk/aclk.h b/aclk/aclk.h new file mode 100644 index 0000000..6aed548 --- /dev/null +++ b/aclk/aclk.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#ifndef ACLK_H +#define ACLK_H + +#include "daemon/common.h" + +#ifdef ENABLE_ACLK +#include "aclk_util.h" +#include "aclk_rrdhost_state.h" + +// How many MQTT PUBACKs we need to get to consider connection +// stable for the purposes of TBEB (truncated binary exponential backoff) +#define ACLK_PUBACKS_CONN_STABLE 3 +#endif /* ENABLE_ACLK */ + +extern int aclk_connected; +extern int aclk_ctx_based; +extern int aclk_disable_runtime; +extern int aclk_stats_enabled; +extern int aclk_kill_link; + +extern usec_t aclk_session_us; +extern time_t aclk_session_sec; + +extern time_t aclk_block_until; + +extern int disconnect_req; + +#ifdef ENABLE_ACLK +void *aclk_main(void *ptr); + +extern netdata_mutex_t aclk_shared_state_mutex; +#define ACLK_SHARED_STATE_LOCK netdata_mutex_lock(&aclk_shared_state_mutex) +#define ACLK_SHARED_STATE_UNLOCK netdata_mutex_unlock(&aclk_shared_state_mutex) + +extern struct aclk_shared_state { + // To wait for `disconnect` message PUBACK + // when shutting down + // at the same time if > 0 we know link is + // shutting down + int mqtt_shutdown_msg_id; + int mqtt_shutdown_msg_rcvd; +} aclk_shared_state; + +void aclk_host_state_update(RRDHOST *host, int cmd); +void aclk_send_node_instances(void); + +void aclk_send_bin_msg(char *msg, size_t msg_len, enum aclk_topics subtopic, const char *msgname); + +#endif /* ENABLE_ACLK */ + +char *aclk_state(void); +char *aclk_state_json(void); +void add_aclk_host_labels(void); +void aclk_queue_node_info(RRDHOST *host); + +#endif /* ACLK_H */ diff --git a/aclk/aclk_alarm_api.c b/aclk/aclk_alarm_api.c new file mode 100644 index 0000000..7df51a7 --- /dev/null +++ b/aclk/aclk_alarm_api.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_alarm_api.h" + +#include "aclk_query_queue.h" + +#include "aclk_util.h" + +#include "aclk.h" + +void aclk_send_alarm_log_health(struct alarm_log_health *log_health) +{ + aclk_query_t query = aclk_query_new(ALARM_LOG_HEALTH); + query->data.bin_payload.payload = generate_alarm_log_health(&query->data.bin_payload.size, log_health); + query->data.bin_payload.topic = ACLK_TOPICID_ALARM_HEALTH; + query->data.bin_payload.msg_name = "AlarmLogHealth"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} + +void aclk_send_alarm_log_entry(struct alarm_log_entry *log_entry) +{ + size_t payload_size; + char *payload = generate_alarm_log_entry(&payload_size, log_entry); + + aclk_send_bin_msg(payload, payload_size, ACLK_TOPICID_ALARM_LOG, "AlarmLogEntry"); +} + +void aclk_send_provide_alarm_cfg(struct provide_alarm_configuration *cfg) +{ + aclk_query_t query = aclk_query_new(ALARM_PROVIDE_CFG); + query->data.bin_payload.payload = generate_provide_alarm_configuration(&query->data.bin_payload.size, cfg); + query->data.bin_payload.topic = ACLK_TOPICID_ALARM_CONFIG; + query->data.bin_payload.msg_name = "ProvideAlarmConfiguration"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} + +void aclk_send_alarm_snapshot(alarm_snapshot_proto_ptr_t snapshot) +{ + aclk_query_t query = aclk_query_new(ALARM_SNAPSHOT); + query->data.bin_payload.payload = generate_alarm_snapshot_bin(&query->data.bin_payload.size, snapshot); + query->data.bin_payload.topic = ACLK_TOPICID_ALARM_SNAPSHOT; + query->data.bin_payload.msg_name = "AlarmSnapshot"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} diff --git a/aclk/aclk_alarm_api.h b/aclk/aclk_alarm_api.h new file mode 100644 index 0000000..e3fa92b --- /dev/null +++ b/aclk/aclk_alarm_api.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_ALARM_API_H +#define ACLK_ALARM_API_H + +#include "../daemon/common.h" +#include "schema-wrappers/schema_wrappers.h" + +void aclk_send_alarm_log_health(struct alarm_log_health *log_health); +void aclk_send_alarm_log_entry(struct alarm_log_entry *log_entry); +void aclk_send_provide_alarm_cfg(struct provide_alarm_configuration *cfg); +void aclk_send_alarm_snapshot(alarm_snapshot_proto_ptr_t snapshot); + +#endif /* ACLK_ALARM_API_H */ diff --git a/aclk/aclk_capas.c b/aclk/aclk_capas.c new file mode 100644 index 0000000..df9d18f --- /dev/null +++ b/aclk/aclk_capas.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_capas.h" + +#include "ml/ml.h" + +const struct capability *aclk_get_agent_capas() +{ + static struct capability agent_capabilities[] = { + { .name = "json", .version = 2, .enabled = 0 }, + { .name = "proto", .version = 1, .enabled = 1 }, + { .name = "ml", .version = 0, .enabled = 0 }, + { .name = "mc", .version = 0, .enabled = 0 }, + { .name = "ctx", .version = 1, .enabled = 1 }, + { .name = "funcs", .version = 1, .enabled = 1 }, + { .name = NULL, .version = 0, .enabled = 0 } + }; + agent_capabilities[2].version = ml_capable() ? 1 : 0; + agent_capabilities[2].enabled = ml_enabled(localhost); + + agent_capabilities[3].version = enable_metric_correlations ? metric_correlations_version : 0; + agent_capabilities[3].enabled = enable_metric_correlations; + + return agent_capabilities; +} + +struct capability *aclk_get_node_instance_capas(RRDHOST *host) +{ + struct capability ni_caps[] = { + { .name = "proto", .version = 1, .enabled = 1 }, + { .name = "ml", .version = ml_capable(), .enabled = ml_enabled(host) }, + { .name = "mc", + .version = enable_metric_correlations ? metric_correlations_version : 0, + .enabled = enable_metric_correlations }, + { .name = "ctx", .version = 1, .enabled = 1 }, + { .name = "funcs", .version = 0, .enabled = 0 }, + { .name = NULL, .version = 0, .enabled = 0 } + }; + if (host->receiver && stream_has_capability(host->receiver, STREAM_CAP_FUNCTIONS)) { + ni_caps[4].version = 1; + ni_caps[4].enabled = 1; + } + + struct capability *ret = mallocz(sizeof(ni_caps)); + memcpy(ret, ni_caps, sizeof(ni_caps)); + return ret; +} diff --git a/aclk/aclk_capas.h b/aclk/aclk_capas.h new file mode 100644 index 0000000..c39a197 --- /dev/null +++ b/aclk/aclk_capas.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_CAPAS_H +#define ACLK_CAPAS_H + +#include "daemon/common.h" +#include "libnetdata/libnetdata.h" + +#include "schema-wrappers/capability.h" + +const struct capability *aclk_get_agent_capas(); +struct capability *aclk_get_node_instance_capas(RRDHOST *host); + +#endif /* ACLK_CAPAS_H */ diff --git a/aclk/aclk_contexts_api.c b/aclk/aclk_contexts_api.c new file mode 100644 index 0000000..f334493 --- /dev/null +++ b/aclk/aclk_contexts_api.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_query_queue.h" + +#include "aclk_contexts_api.h" + +void aclk_send_contexts_snapshot(contexts_snapshot_t data) +{ + aclk_query_t query = aclk_query_new(PROTO_BIN_MESSAGE); + query->data.bin_payload.topic = ACLK_TOPICID_CTXS_SNAPSHOT; + query->data.bin_payload.payload = contexts_snapshot_2bin(data, &query->data.bin_payload.size); + query->data.bin_payload.msg_name = "ContextsSnapshot"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} + +void aclk_send_contexts_updated(contexts_updated_t data) +{ + aclk_query_t query = aclk_query_new(PROTO_BIN_MESSAGE); + query->data.bin_payload.topic = ACLK_TOPICID_CTXS_UPDATED; + query->data.bin_payload.payload = contexts_updated_2bin(data, &query->data.bin_payload.size); + query->data.bin_payload.msg_name = "ContextsUpdated"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} + +void aclk_update_node_collectors(struct update_node_collectors *collectors) +{ + aclk_query_t query = aclk_query_new(UPDATE_NODE_COLLECTORS); + query->data.bin_payload.topic = ACLK_TOPICID_NODE_COLLECTORS; + query->data.bin_payload.payload = generate_update_node_collectors_message(&query->data.bin_payload.size, collectors); + query->data.bin_payload.msg_name = "UpdateNodeCollectors"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} + +void aclk_update_node_info(struct update_node_info *info) +{ + aclk_query_t query = aclk_query_new(UPDATE_NODE_INFO); + query->data.bin_payload.topic = ACLK_TOPICID_NODE_INFO; + query->data.bin_payload.payload = generate_update_node_info_message(&query->data.bin_payload.size, info); + query->data.bin_payload.msg_name = "UpdateNodeInfo"; + QUEUE_IF_PAYLOAD_PRESENT(query); +} diff --git a/aclk/aclk_contexts_api.h b/aclk/aclk_contexts_api.h new file mode 100644 index 0000000..f0b5ec7 --- /dev/null +++ b/aclk/aclk_contexts_api.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#ifndef ACLK_CONTEXTS_API_H +#define ACLK_CONTEXTS_API_H + +#include "schema-wrappers/schema_wrappers.h" + + +void aclk_send_contexts_snapshot(contexts_snapshot_t data); +void aclk_send_contexts_updated(contexts_updated_t data); +void aclk_update_node_collectors(struct update_node_collectors *collectors); +void aclk_update_node_info(struct update_node_info *info); + +#endif /* ACLK_CONTEXTS_API_H */ + diff --git a/aclk/aclk_otp.c b/aclk/aclk_otp.c new file mode 100644 index 0000000..2bdbb70 --- /dev/null +++ b/aclk/aclk_otp.c @@ -0,0 +1,888 @@ + +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_otp.h" +#include "aclk_util.h" +#include "aclk.h" + +#include "daemon/common.h" + +#include "mqtt_websockets/c-rbuf/include/ringbuffer.h" + +static int aclk_https_request(https_req_t *request, https_req_response_t *response) { + int rc; + // wrapper for ACLK only which loads ACLK specific proxy settings + // then only calls https_request + struct mqtt_wss_proxy proxy_conf = { .host = NULL, .port = 0, .username = NULL, .password = NULL, .type = MQTT_WSS_DIRECT }; + aclk_set_proxy((char**)&proxy_conf.host, &proxy_conf.port, &proxy_conf.type); + + if (proxy_conf.type == MQTT_WSS_PROXY_HTTP) { + request->proxy_host = (char*)proxy_conf.host; // TODO make it const as well + request->proxy_port = proxy_conf.port; + } + + rc = https_request(request, response); + freez((char*)proxy_conf.host); + return rc; +} + +struct auth_data { + char *client_id; + char *username; + char *passwd; +}; + +#define PARSE_ENV_JSON_CHK_TYPE(it, type, name) \ + if (json_object_get_type(json_object_iter_peek_value(it)) != type) { \ + error("value of key \"%s\" should be %s", name, #type); \ + goto exit; \ + } + +#define JSON_KEY_CLIENTID "clientID" +#define JSON_KEY_USER "username" +#define JSON_KEY_PASS "password" +#define JSON_KEY_TOPICS "topics" + +static int parse_passwd_response(const char *json_str, struct auth_data *auth) { + int rc = 1; + json_object *json; + struct json_object_iterator it; + struct json_object_iterator itEnd; + + json = json_tokener_parse(json_str); + if (!json) { + error("JSON-C failed to parse the payload of http response of /env endpoint"); + return 1; + } + + it = json_object_iter_begin(json); + itEnd = json_object_iter_end(json); + + while (!json_object_iter_equal(&it, &itEnd)) { + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_CLIENTID)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_CLIENTID) + + auth->client_id = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_USER)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_USER) + + auth->username = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_PASS)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_PASS) + + auth->passwd = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_TOPICS)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_array, JSON_KEY_TOPICS) + + if (aclk_generate_topic_cache(json_object_iter_peek_value(&it))) { + error("Failed to generate topic cache!"); + goto exit; + } + json_object_iter_next(&it); + continue; + } + error("Unknown key \"%s\" in passwd response payload. Ignoring", json_object_iter_peek_name(&it)); + json_object_iter_next(&it); + } + + if (!auth->client_id) { + error(JSON_KEY_CLIENTID " is compulsory key in /password response"); + goto exit; + } + if (!auth->passwd) { + error(JSON_KEY_PASS " is compulsory in /password response"); + goto exit; + } + if (!auth->username) { + error(JSON_KEY_USER " is compulsory in /password response"); + goto exit; + } + + rc = 0; +exit: + json_object_put(json); + return rc; +} + +#define JSON_KEY_ERTRY "errorNonRetryable" +#define JSON_KEY_EDELAY "errorRetryDelaySeconds" +#define JSON_KEY_EEC "errorCode" +#define JSON_KEY_EMSGKEY "errorMsgKey" +#define JSON_KEY_EMSG "errorMessage" +#if JSON_C_MINOR_VERSION >= 13 +static const char *get_json_str_by_path(json_object *json, const char *path) { + json_object *ptr; + if (json_pointer_get(json, path, &ptr)) { + error("Missing compulsory key \"%s\" in error response", path); + return NULL; + } + if (json_object_get_type(ptr) != json_type_string) { + error("Value of Key \"%s\" in error response should be string", path); + return NULL; + } + return json_object_get_string(ptr); +} + +static int aclk_parse_otp_error(const char *json_str) { + int rc = 1; + json_object *json, *ptr; + const char *ec; + const char *ek; + const char *emsg; + int block_retry = -1, backoff = -1; + + + json = json_tokener_parse(json_str); + if (!json) { + error("JSON-C failed to parse the payload of http response of /env endpoint"); + return 1; + } + + if ((ec = get_json_str_by_path(json, "/" JSON_KEY_EEC)) == NULL) + goto exit; + + if ((ek = get_json_str_by_path(json, "/" JSON_KEY_EMSGKEY)) == NULL) + goto exit; + + if ((emsg = get_json_str_by_path(json, "/" JSON_KEY_EMSG)) == NULL) + goto exit; + + // optional field + if (!json_pointer_get(json, "/" JSON_KEY_ERTRY, &ptr)) { + if (json_object_get_type(ptr) != json_type_boolean) { + error("Error response Key " "/" JSON_KEY_ERTRY " should be of boolean type"); + goto exit; + } + block_retry = json_object_get_boolean(ptr); + } + + // optional field + if (!json_pointer_get(json, "/" JSON_KEY_EDELAY, &ptr)) { + if (json_object_get_type(ptr) != json_type_int) { + error("Error response Key " "/" JSON_KEY_EDELAY " should be of integer type"); + goto exit; + } + backoff = json_object_get_int(ptr); + } + + if (block_retry > 0) + aclk_disable_runtime = 1; + + if (backoff > 0) + aclk_block_until = now_monotonic_sec() + backoff; + + error("Cloud returned EC=\"%s\", Msg-Key:\"%s\", Msg:\"%s\", BlockRetry:%s, Backoff:%ds (-1 unset by cloud)", ec, ek, emsg, block_retry > 0 ? "true" : "false", backoff); + rc = 0; +exit: + json_object_put(json); + return rc; +} +#else +static int aclk_parse_otp_error(const char *json_str) { + int rc = 1; + int block_retry = -1, backoff = -1; + + const char *ec = NULL; + const char *ek = NULL; + const char *emsg = NULL; + + json_object *json; + struct json_object_iterator it; + struct json_object_iterator itEnd; + + json = json_tokener_parse(json_str); + if (!json) { + error("JSON-C failed to parse the payload of http response of /env endpoint"); + return 1; + } + + it = json_object_iter_begin(json); + itEnd = json_object_iter_end(json); + + while (!json_object_iter_equal(&it, &itEnd)) { + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_EMSG)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_EMSG) + + emsg = json_object_get_string(json_object_iter_peek_value(&it)); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_EMSGKEY)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_EMSGKEY) + + ek = json_object_get_string(json_object_iter_peek_value(&it)); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_EEC)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_EEC) + + ec = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_EDELAY)) { + if (json_object_get_type(json_object_iter_peek_value(&it)) != json_type_int) { + error("value of key " JSON_KEY_EDELAY " should be integer"); + goto exit; + } + + backoff = json_object_get_int(json_object_iter_peek_value(&it)); + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_ERTRY)) { + if (json_object_get_type(json_object_iter_peek_value(&it)) != json_type_boolean) { + error("value of key " JSON_KEY_ERTRY " should be integer"); + goto exit; + } + + block_retry = json_object_get_boolean(json_object_iter_peek_value(&it)); + json_object_iter_next(&it); + continue; + } + error("Unknown key \"%s\" in error response payload. Ignoring", json_object_iter_peek_name(&it)); + json_object_iter_next(&it); + } + + if (block_retry > 0) + aclk_disable_runtime = 1; + + if (backoff > 0) + aclk_block_until = now_monotonic_sec() + backoff; + + error("Cloud returned EC=\"%s\", Msg-Key:\"%s\", Msg:\"%s\", BlockRetry:%s, Backoff:%ds (-1 unset by cloud)", ec, ek, emsg, block_retry > 0 ? "true" : "false", backoff); + rc = 0; +exit: + json_object_put(json); + return rc; +} +#endif + +#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 +static EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void) +{ + EVP_ENCODE_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); + + if (ctx != NULL) { + memset(ctx, 0, sizeof(*ctx)); + } + return ctx; +} +static void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx) +{ + OPENSSL_free(ctx); + return; +} +#endif + +#define CHALLENGE_LEN 256 +#define CHALLENGE_LEN_BASE64 344 +inline static int base64_decode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len) +{ + unsigned char remaining_data[CHALLENGE_LEN]; + EVP_ENCODE_CTX *ctx = EVP_ENCODE_CTX_new(); + EVP_DecodeInit(ctx); + EVP_DecodeUpdate(ctx, out, outl, in, in_len); + int remainder = 0; + EVP_DecodeFinal(ctx, remaining_data, &remainder); + EVP_ENCODE_CTX_free(ctx); + if (remainder) { + error("Unexpected data at EVP_DecodeFinal"); + return 1; + } + return 0; +} + +inline static int base64_encode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len) +{ + int len; + unsigned char *str = out; + EVP_ENCODE_CTX *ctx = EVP_ENCODE_CTX_new(); + EVP_EncodeInit(ctx); + EVP_EncodeUpdate(ctx, str, outl, in, in_len); + str += *outl; + EVP_EncodeFinal(ctx, str, &len); + *outl += len; + // if we ever expect longer output than what OpenSSL would pack into single line + // we would have to skip the endlines, until then we can just cut the string short + str = (unsigned char*)strchr((char*)out, '\n'); + if (str) + *str = 0; + EVP_ENCODE_CTX_free(ctx); + return 0; +} + +#define OTP_URL_PREFIX "/api/v1/auth/node/" +int aclk_get_otp_challenge(url_t *target, const char *agent_id, unsigned char **challenge, int *challenge_bytes) +{ + int rc = 1; + https_req_t req = HTTPS_REQ_T_INITIALIZER; + https_req_response_t resp = HTTPS_REQ_RESPONSE_T_INITIALIZER; + + BUFFER *url = buffer_create(strlen(OTP_URL_PREFIX) + UUID_STR_LEN + 20); + + req.host = target->host; + req.port = target->port; + buffer_sprintf(url, "%s/node/%s/challenge", target->path, agent_id); + req.url = (char *)buffer_tostring(url); + + if (aclk_https_request(&req, &resp)) { + error ("ACLK_OTP Challenge failed"); + buffer_free(url); + return 1; + } + if (resp.http_code != 200) { + error ("ACLK_OTP Challenge HTTP code not 200 OK (got %d)", resp.http_code); + buffer_free(url); + if (resp.payload_size) + aclk_parse_otp_error(resp.payload); + goto cleanup_resp; + } + buffer_free(url); + + info ("ACLK_OTP Got Challenge from Cloud"); + + json_object *json = json_tokener_parse(resp.payload); + if (!json) { + error ("Couldn't parse HTTP GET challenge payload"); + goto cleanup_resp; + } + json_object *challenge_json; + if (!json_object_object_get_ex(json, "challenge", &challenge_json)) { + error ("No key named \"challenge\" in the returned JSON"); + goto cleanup_json; + } + if (!json_object_is_type(challenge_json, json_type_string)) { + error ("\"challenge\" is not a string JSON type"); + goto cleanup_json; + } + const char *challenge_base64; + if (!(challenge_base64 = json_object_get_string(challenge_json))) { + error("Failed to extract challenge from JSON object"); + goto cleanup_json; + } + if (strlen(challenge_base64) != CHALLENGE_LEN_BASE64) { + error("Received Challenge has unexpected length of %zu (expected %d)", strlen(challenge_base64), CHALLENGE_LEN_BASE64); + goto cleanup_json; + } + + *challenge = mallocz((CHALLENGE_LEN_BASE64 / 4) * 3); + base64_decode_helper(*challenge, challenge_bytes, (const unsigned char*)challenge_base64, strlen(challenge_base64)); + if (*challenge_bytes != CHALLENGE_LEN) { + error("Unexpected challenge length of %d instead of %d", *challenge_bytes, CHALLENGE_LEN); + freez(*challenge); + *challenge = NULL; + goto cleanup_json; + } + rc = 0; + +cleanup_json: + json_object_put(json); +cleanup_resp: + https_req_response_free(&resp); + return rc; +} + +int aclk_send_otp_response(const char *agent_id, const unsigned char *response, int response_bytes, url_t *target, struct auth_data *mqtt_auth) +{ + int len; + int rc = 1; + https_req_t req = HTTPS_REQ_T_INITIALIZER; + https_req_response_t resp = HTTPS_REQ_RESPONSE_T_INITIALIZER; + + req.host = target->host; + req.port = target->port; + req.request_type = HTTP_REQ_POST; + + unsigned char base64[CHALLENGE_LEN_BASE64 + 1]; + memset(base64, 0, CHALLENGE_LEN_BASE64 + 1); + + base64_encode_helper(base64, &len, response, response_bytes); + + BUFFER *url = buffer_create(strlen(OTP_URL_PREFIX) + UUID_STR_LEN + 20); + BUFFER *resp_json = buffer_create(strlen(OTP_URL_PREFIX) + UUID_STR_LEN + 20); + + buffer_sprintf(url, "%s/node/%s/password", target->path, agent_id); + buffer_sprintf(resp_json, "{\"response\":\"%s\"}", base64); + + req.url = (char *)buffer_tostring(url); + req.payload = (char *)buffer_tostring(resp_json); + req.payload_size = strlen(req.payload); + + if (aclk_https_request(&req, &resp)) { + error ("ACLK_OTP Password error trying to post result to password"); + goto cleanup_buffers; + } + if (resp.http_code != 201) { + error ("ACLK_OTP Password HTTP code not 201 Created (got %d)", resp.http_code); + if (resp.payload_size) + aclk_parse_otp_error(resp.payload); + goto cleanup_response; + } + info ("ACLK_OTP Got Password from Cloud"); + + if (parse_passwd_response(resp.payload, mqtt_auth)){ + error("Error parsing response of password endpoint"); + goto cleanup_response; + } + + rc = 0; + +cleanup_response: + https_req_response_free(&resp); +cleanup_buffers: + buffer_free(resp_json); + buffer_free(url); + return rc; +} + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 +static int private_decrypt(EVP_PKEY *p_key, unsigned char * enc_data, int data_len, unsigned char **decrypted) +#else +static int private_decrypt(RSA *p_key, unsigned char * enc_data, int data_len, unsigned char **decrypted) +#endif +{ + int result; +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 + size_t outlen = EVP_PKEY_size(p_key); + EVP_PKEY_CTX *ctx = EVP_PKEY_CTX_new(p_key, NULL); + if (!ctx) + return 1; + + if (EVP_PKEY_decrypt_init(ctx) <= 0) + return 1; + + if (EVP_PKEY_CTX_set_rsa_padding(ctx, RSA_PKCS1_OAEP_PADDING) <= 0) + return 1; + + *decrypted = mallocz(outlen); + + if (EVP_PKEY_decrypt(ctx, *decrypted, &outlen, enc_data, data_len) == 1) + result = (int) outlen; + else + result = -1; +#else + *decrypted = mallocz(RSA_size(p_key)); + result = RSA_private_decrypt(data_len, enc_data, *decrypted, p_key, RSA_PKCS1_OAEP_PADDING); +#endif + if (result == -1) + { + char err[512]; + ERR_error_string_n(ERR_get_error(), err, sizeof(err)); + error("Decryption of the challenge failed: %s", err); + } + return result; +} + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 +int aclk_get_mqtt_otp(EVP_PKEY *p_key, char **mqtt_id, char **mqtt_usr, char **mqtt_pass, url_t *target) +#else +int aclk_get_mqtt_otp(RSA *p_key, char **mqtt_id, char **mqtt_usr, char **mqtt_pass, url_t *target) +#endif +{ + unsigned char *challenge = NULL; + int challenge_bytes; + + char *agent_id = get_agent_claimid(); + if (agent_id == NULL) { + error("Agent was not claimed - cannot perform challenge/response"); + return 1; + } + + // Get Challenge + if (aclk_get_otp_challenge(target, agent_id, &challenge, &challenge_bytes)) { + error("Error getting challenge"); + freez(agent_id); + return 1; + } + + // Decrypt Challenge / Get response + unsigned char *response_plaintext; + int response_plaintext_bytes = private_decrypt(p_key, challenge, challenge_bytes, &response_plaintext); + if (response_plaintext_bytes < 0) { + error ("Couldn't decrypt the challenge received"); + freez(response_plaintext); + freez(challenge); + freez(agent_id); + return 1; + } + freez(challenge); + + // Encode and Send Challenge + struct auth_data data = { .client_id = NULL, .passwd = NULL, .username = NULL }; + if (aclk_send_otp_response(agent_id, response_plaintext, response_plaintext_bytes, target, &data)) { + error("Error getting response"); + freez(response_plaintext); + freez(agent_id); + return 1; + } + + *mqtt_pass = data.passwd; + *mqtt_usr = data.username; + *mqtt_id = data.client_id; + + freez(response_plaintext); + freez(agent_id); + return 0; +} + +#define JSON_KEY_ENC "encoding" +#define JSON_KEY_AUTH_ENDPOINT "authEndpoint" +#define JSON_KEY_TRP "transports" +#define JSON_KEY_TRP_TYPE "type" +#define JSON_KEY_TRP_ENDPOINT "endpoint" +#define JSON_KEY_BACKOFF "backoff" +#define JSON_KEY_BACKOFF_BASE "base" +#define JSON_KEY_BACKOFF_MAX "maxSeconds" +#define JSON_KEY_BACKOFF_MIN "minSeconds" +#define JSON_KEY_CAPS "capabilities" + +static int parse_json_env_transport(json_object *json, aclk_transport_desc_t *trp) { + struct json_object_iterator it; + struct json_object_iterator itEnd; + + it = json_object_iter_begin(json); + itEnd = json_object_iter_end(json); + + while (!json_object_iter_equal(&it, &itEnd)) { + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_TRP_TYPE)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_TRP_TYPE) + if (trp->type != ACLK_TRP_UNKNOWN) { + error(JSON_KEY_TRP_TYPE " set already"); + goto exit; + } + trp->type = aclk_transport_type_t_from_str(json_object_get_string(json_object_iter_peek_value(&it))); + if (trp->type == ACLK_TRP_UNKNOWN) { + error(JSON_KEY_TRP_TYPE " unknown type \"%s\"", json_object_get_string(json_object_iter_peek_value(&it))); + goto exit; + } + json_object_iter_next(&it); + continue; + } + + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_TRP_ENDPOINT)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_TRP_ENDPOINT) + if (trp->endpoint) { + error(JSON_KEY_TRP_ENDPOINT " set already"); + goto exit; + } + trp->endpoint = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + + error ("unknown JSON key in dictionary (\"%s\")", json_object_iter_peek_name(&it)); + json_object_iter_next(&it); + } + + if (!trp->endpoint) { + error (JSON_KEY_TRP_ENDPOINT " is missing from JSON dictionary"); + goto exit; + } + + if (trp->type == ACLK_TRP_UNKNOWN) { + error ("transport type not set"); + goto exit; + } + + return 0; + +exit: + aclk_transport_desc_t_destroy(trp); + return 1; +} + +static int parse_json_env_transports(json_object *json_array, aclk_env_t *env) { + aclk_transport_desc_t *trp; + json_object *obj; + + if (env->transports) { + error("transports have been set already"); + return 1; + } + + env->transport_count = json_object_array_length(json_array); + + env->transports = callocz(env->transport_count , sizeof(aclk_transport_desc_t *)); + + for (size_t i = 0; i < env->transport_count; i++) { + trp = callocz(1, sizeof(aclk_transport_desc_t)); + obj = json_object_array_get_idx(json_array, i); + if (parse_json_env_transport(obj, trp)) { + error("error parsing transport idx %d", (int)i); + freez(trp); + return 1; + } + env->transports[i] = trp; + } + + return 0; +} + +#define MATCHED_CORRECT 1 +#define MATCHED_ERROR -1 +#define NOT_MATCHED 0 +static int parse_json_backoff_int(struct json_object_iterator *it, int *out, const char* name, int min, int max) { + if (!strcmp(json_object_iter_peek_name(it), name)) { + if (json_object_get_type(json_object_iter_peek_value(it)) != json_type_int) { + error("Could not parse \"%s\". Not an integer as expected.", name); + return MATCHED_ERROR; + } + + *out = json_object_get_int(json_object_iter_peek_value(it)); + + if (*out < min || *out > max) { + error("Value of \"%s\"=%d out of range (%d-%d).", name, *out, min, max); + return MATCHED_ERROR; + } + + return MATCHED_CORRECT; + } + return NOT_MATCHED; +} + +static int parse_json_backoff(json_object *json, aclk_backoff_t *backoff) { + struct json_object_iterator it; + struct json_object_iterator itEnd; + int ret; + + it = json_object_iter_begin(json); + itEnd = json_object_iter_end(json); + + while (!json_object_iter_equal(&it, &itEnd)) { + if ( (ret = parse_json_backoff_int(&it, &backoff->base, JSON_KEY_BACKOFF_BASE, 1, 10)) ) { + if (ret == MATCHED_ERROR) { + return 1; + } + json_object_iter_next(&it); + continue; + } + + if ( (ret = parse_json_backoff_int(&it, &backoff->max_s, JSON_KEY_BACKOFF_MAX, 500, INT_MAX)) ) { + if (ret == MATCHED_ERROR) { + return 1; + } + json_object_iter_next(&it); + continue; + } + + if ( (ret = parse_json_backoff_int(&it, &backoff->min_s, JSON_KEY_BACKOFF_MIN, 0, INT_MAX)) ) { + if (ret == MATCHED_ERROR) { + return 1; + } + json_object_iter_next(&it); + continue; + } + + error ("unknown JSON key in dictionary (\"%s\")", json_object_iter_peek_name(&it)); + json_object_iter_next(&it); + } + + return 0; +} + +static int parse_json_env_caps(json_object *json, aclk_env_t *env) { + json_object *obj; + const char *str; + + if (env->capabilities) { + error("transports have been set already"); + return 1; + } + + env->capability_count = json_object_array_length(json); + + // empty capabilities list is allowed + if (!env->capability_count) + return 0; + + env->capabilities = callocz(env->capability_count , sizeof(char *)); + + for (size_t i = 0; i < env->capability_count; i++) { + obj = json_object_array_get_idx(json, i); + if (json_object_get_type(obj) != json_type_string) { + error("Capability at index %d not a string!", (int)i); + return 1; + } + str = json_object_get_string(obj); + if (!str) { + error("Error parsing capabilities"); + return 1; + } + env->capabilities[i] = strdupz(str); + } + + return 0; +} + +static int parse_json_env(const char *json_str, aclk_env_t *env) { + json_object *json; + struct json_object_iterator it; + struct json_object_iterator itEnd; + + json = json_tokener_parse(json_str); + if (!json) { + error("JSON-C failed to parse the payload of http response of /env endpoint"); + return 1; + } + + it = json_object_iter_begin(json); + itEnd = json_object_iter_end(json); + + while (!json_object_iter_equal(&it, &itEnd)) { + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_AUTH_ENDPOINT)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_AUTH_ENDPOINT) + if (env->auth_endpoint) { + error("authEndpoint set already"); + goto exit; + } + env->auth_endpoint = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_ENC)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_string, JSON_KEY_ENC) + if (env->encoding != ACLK_ENC_UNKNOWN) { + error(JSON_KEY_ENC " set already"); + goto exit; + } + env->encoding = aclk_encoding_type_t_from_str(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_TRP)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_array, JSON_KEY_TRP) + + json_object *now = json_object_iter_peek_value(&it); + parse_json_env_transports(now, env); + + json_object_iter_next(&it); + continue; + } + + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_BACKOFF)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_object, JSON_KEY_BACKOFF) + + if (parse_json_backoff(json_object_iter_peek_value(&it), &env->backoff)) { + env->backoff.base = 0; + error("Error parsing Backoff parameters in env"); + goto exit; + } + + json_object_iter_next(&it); + continue; + } + + if (!strcmp(json_object_iter_peek_name(&it), JSON_KEY_CAPS)) { + PARSE_ENV_JSON_CHK_TYPE(&it, json_type_array, JSON_KEY_CAPS) + + if (parse_json_env_caps(json_object_iter_peek_value(&it), env)) { + error("Error parsing capabilities list"); + goto exit; + } + + json_object_iter_next(&it); + continue; + } + + error ("unknown JSON key in dictionary (\"%s\")", json_object_iter_peek_name(&it)); + json_object_iter_next(&it); + } + + // Check all compulsory keys have been set + if (env->transport_count < 1) { + error("env has to return at least one transport"); + goto exit; + } + if (!env->auth_endpoint) { + error(JSON_KEY_AUTH_ENDPOINT " is compulsory"); + goto exit; + } + if (env->encoding == ACLK_ENC_UNKNOWN) { + error(JSON_KEY_ENC " is compulsory"); + goto exit; + } + if (!env->backoff.base) { + error(JSON_KEY_BACKOFF " is compulsory"); + goto exit; + } + + json_object_put(json); + return 0; + +exit: + aclk_env_t_destroy(env); + json_object_put(json); + return 1; +} + +int aclk_get_env(aclk_env_t *env, const char* aclk_hostname, int aclk_port) { + BUFFER *buf = buffer_create(1024); + + https_req_t req = HTTPS_REQ_T_INITIALIZER; + https_req_response_t resp = HTTPS_REQ_RESPONSE_T_INITIALIZER; + + req.request_type = HTTP_REQ_GET; + + char *agent_id = get_agent_claimid(); + if (agent_id == NULL) + { + error("Agent was not claimed - cannot perform challenge/response"); + buffer_free(buf); + return 1; + } + + buffer_sprintf(buf, "/api/v1/env?v=%s&cap=proto,ctx&claim_id=%s", &(VERSION[1]) /* skip 'v' at beginning */, agent_id); + + freez(agent_id); + + req.host = (char*)aclk_hostname; + req.port = aclk_port; + req.url = buf->buffer; + if (aclk_https_request(&req, &resp)) { + error("Error trying to contact env endpoint"); + https_req_response_free(&resp); + buffer_free(buf); + return 1; + } + if (resp.http_code != 200) { + error("The HTTP code not 200 OK (Got %d)", resp.http_code); + if (resp.payload_size) + aclk_parse_otp_error(resp.payload); + https_req_response_free(&resp); + buffer_free(buf); + return 1; + } + + if (!resp.payload || !resp.payload_size) { + error("Unexpected empty payload as response to /env call"); + https_req_response_free(&resp); + buffer_free(buf); + return 1; + } + + if (parse_json_env(resp.payload, env)) { + error ("error parsing /env message"); + https_req_response_free(&resp); + buffer_free(buf); + return 1; + } + + info("Getting Cloud /env successful"); + + https_req_response_free(&resp); + buffer_free(buf); + return 0; +} diff --git a/aclk/aclk_otp.h b/aclk/aclk_otp.h new file mode 100644 index 0000000..2d660e5 --- /dev/null +++ b/aclk/aclk_otp.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_OTP_H +#define ACLK_OTP_H + +#include "daemon/common.h" + +#include "https_client.h" +#include "aclk_util.h" + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 +int aclk_get_mqtt_otp(EVP_PKEY *p_key, char **mqtt_id, char **mqtt_usr, char **mqtt_pass, url_t *target); +#else +int aclk_get_mqtt_otp(RSA *p_key, char **mqtt_id, char **mqtt_usr, char **mqtt_pass, url_t *target); +#endif +int aclk_get_env(aclk_env_t *env, const char *aclk_hostname, int aclk_port); + +#endif /* ACLK_OTP_H */ diff --git a/aclk/aclk_proxy.c b/aclk/aclk_proxy.c new file mode 100644 index 0000000..1701eb8 --- /dev/null +++ b/aclk/aclk_proxy.c @@ -0,0 +1,186 @@ +#include "aclk_proxy.h" + +#include "daemon/common.h" + +#define ACLK_PROXY_ENV "env" +#define ACLK_PROXY_CONFIG_VAR "proxy" + +struct { + ACLK_PROXY_TYPE type; + const char *url_str; +} supported_proxy_types[] = { + { .type = PROXY_TYPE_SOCKS5, .url_str = "socks5" ACLK_PROXY_PROTO_ADDR_SEPARATOR }, + { .type = PROXY_TYPE_SOCKS5, .url_str = "socks5h" ACLK_PROXY_PROTO_ADDR_SEPARATOR }, + { .type = PROXY_TYPE_HTTP, .url_str = "http" ACLK_PROXY_PROTO_ADDR_SEPARATOR }, + { .type = PROXY_TYPE_UNKNOWN, .url_str = NULL }, +}; + +const char *aclk_proxy_type_to_s(ACLK_PROXY_TYPE *type) +{ + switch (*type) { + case PROXY_DISABLED: + return "disabled"; + case PROXY_TYPE_HTTP: + return "HTTP"; + case PROXY_TYPE_SOCKS5: + return "SOCKS"; + default: + return "Unknown"; + } +} + +static inline ACLK_PROXY_TYPE aclk_find_proxy(const char *string) +{ + int i = 0; + while (supported_proxy_types[i].url_str) { + if (!strncmp(supported_proxy_types[i].url_str, string, strlen(supported_proxy_types[i].url_str))) + return supported_proxy_types[i].type; + i++; + } + return PROXY_TYPE_UNKNOWN; +} + +ACLK_PROXY_TYPE aclk_verify_proxy(const char *string) +{ + if (!string) + return PROXY_TYPE_UNKNOWN; + + while (*string == 0x20) + string++; + + if (!*string) + return PROXY_TYPE_UNKNOWN; + + return aclk_find_proxy(string); +} + +// helper function to censor user&password +// for logging purposes +void safe_log_proxy_censor(char *proxy) +{ + size_t length = strlen(proxy); + char *auth = proxy + length - 1; + char *cur; + + while ((auth >= proxy) && (*auth != '@')) + auth--; + + //if not found or @ is first char do nothing + if (auth <= proxy) + return; + + cur = strstr(proxy, ACLK_PROXY_PROTO_ADDR_SEPARATOR); + if (!cur) + cur = proxy; + else + cur += strlen(ACLK_PROXY_PROTO_ADDR_SEPARATOR); + + while (cur < auth) { + *cur = 'X'; + cur++; + } +} + +static inline void safe_log_proxy_error(char *str, const char *proxy) +{ + char *log = strdupz(proxy); + safe_log_proxy_censor(log); + error("%s Provided Value:\"%s\"", str, log); + freez(log); +} + +static inline int check_socks_enviroment(const char **proxy) +{ + char *tmp = getenv("socks_proxy"); + + if (!tmp) + return 1; + + if (aclk_verify_proxy(tmp) == PROXY_TYPE_SOCKS5) { + *proxy = tmp; + return 0; + } + + safe_log_proxy_error( + "Environment var \"socks_proxy\" defined but of unknown format. Supported syntax: \"socks5[h]://[user:pass@]host:ip\".", + tmp); + return 1; +} + +static inline int check_http_enviroment(const char **proxy) +{ + char *tmp = getenv("http_proxy"); + + if (!tmp) + return 1; + + if (aclk_verify_proxy(tmp) == PROXY_TYPE_HTTP) { + *proxy = tmp; + return 0; + } + + safe_log_proxy_error( + "Environment var \"http_proxy\" defined but of unknown format. Supported syntax: \"http[s]://[user:pass@]host:ip\".", + tmp); + return 1; +} + +const char *aclk_lws_wss_get_proxy_setting(ACLK_PROXY_TYPE *type) +{ + const char *proxy = config_get(CONFIG_SECTION_CLOUD, ACLK_PROXY_CONFIG_VAR, ACLK_PROXY_ENV); + *type = PROXY_DISABLED; + + if (strcmp(proxy, "none") == 0) + return proxy; + + if (strcmp(proxy, ACLK_PROXY_ENV) == 0) { + if (check_socks_enviroment(&proxy) == 0) { +#ifdef LWS_WITH_SOCKS5 + *type = PROXY_TYPE_SOCKS5; + return proxy; +#else + safe_log_proxy_error("socks_proxy environment variable set to use SOCKS5 proxy " + "but Libwebsockets used doesn't have SOCKS5 support built in. " + "Ignoring and checking for other options.", + proxy); +#endif + } + if (check_http_enviroment(&proxy) == 0) + *type = PROXY_TYPE_HTTP; + return proxy; + } + + *type = aclk_verify_proxy(proxy); +#ifndef LWS_WITH_SOCKS5 + if (*type == PROXY_TYPE_SOCKS5) { + safe_log_proxy_error( + "Config var \"" ACLK_PROXY_CONFIG_VAR + "\" set to use SOCKS5 proxy but Libwebsockets used is built without support for SOCKS proxy. ACLK will be disabled.", + proxy); + } +#endif + if (*type == PROXY_TYPE_UNKNOWN) { + *type = PROXY_DISABLED; + safe_log_proxy_error( + "Config var \"" ACLK_PROXY_CONFIG_VAR + "\" defined but of unknown format. Supported syntax: \"socks5[h]://[user:pass@]host:ip\".", + proxy); + } + + return proxy; +} + +// helper function to read settings only once (static) +// as claiming, challenge/response and ACLK +// read the same thing, no need to parse again +const char *aclk_get_proxy(ACLK_PROXY_TYPE *type) +{ + static const char *proxy = NULL; + static ACLK_PROXY_TYPE proxy_type = PROXY_NOT_SET; + + if (proxy_type == PROXY_NOT_SET) + proxy = aclk_lws_wss_get_proxy_setting(&proxy_type); + + *type = proxy_type; + return proxy; +} diff --git a/aclk/aclk_proxy.h b/aclk/aclk_proxy.h new file mode 100644 index 0000000..b4ceb7d --- /dev/null +++ b/aclk/aclk_proxy.h @@ -0,0 +1,22 @@ +#ifndef ACLK_PROXY_H +#define ACLK_PROXY_H + +#include + +#define ACLK_PROXY_PROTO_ADDR_SEPARATOR "://" + +typedef enum aclk_proxy_type { + PROXY_TYPE_UNKNOWN = 0, + PROXY_TYPE_SOCKS5, + PROXY_TYPE_HTTP, + PROXY_DISABLED, + PROXY_NOT_SET, +} ACLK_PROXY_TYPE; + +const char *aclk_proxy_type_to_s(ACLK_PROXY_TYPE *type); +ACLK_PROXY_TYPE aclk_verify_proxy(const char *string); +const char *aclk_lws_wss_get_proxy_setting(ACLK_PROXY_TYPE *type); +void safe_log_proxy_censor(char *proxy); +const char *aclk_get_proxy(ACLK_PROXY_TYPE *type); + +#endif /* ACLK_PROXY_H */ diff --git a/aclk/aclk_query.c b/aclk/aclk_query.c new file mode 100644 index 0000000..5301c28 --- /dev/null +++ b/aclk/aclk_query.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_query.h" +#include "aclk_stats.h" +#include "aclk_tx_msgs.h" + +#define ACLK_QUERY_THREAD_NAME "ACLK_Query" + +#define WEB_HDR_ACCEPT_ENC "Accept-Encoding:" + +pthread_cond_t query_cond_wait = PTHREAD_COND_INITIALIZER; +pthread_mutex_t query_lock_wait = PTHREAD_MUTEX_INITIALIZER; +#define QUERY_THREAD_LOCK pthread_mutex_lock(&query_lock_wait) +#define QUERY_THREAD_UNLOCK pthread_mutex_unlock(&query_lock_wait) + +static usec_t aclk_web_api_v1_request(RRDHOST *host, struct web_client *w, char *url) +{ + usec_t t; + + t = now_monotonic_high_precision_usec(); + w->response.code = web_client_api_request_v1(host, w, url); + t = now_monotonic_high_precision_usec() - t; + + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_metrics_per_sample.cloud_q_process_total += t; + aclk_metrics_per_sample.cloud_q_process_count++; + if (aclk_metrics_per_sample.cloud_q_process_max < t) + aclk_metrics_per_sample.cloud_q_process_max = t; + ACLK_STATS_UNLOCK; + } + + return t; +} + +static RRDHOST *node_id_2_rrdhost(const char *node_id) +{ + int res; + uuid_t node_id_bin, host_id_bin; + + RRDHOST *host = find_host_by_node_id((char *)node_id); + if (host) + return host; + + char host_id[UUID_STR_LEN]; + if (uuid_parse(node_id, node_id_bin)) { + error("Couldn't parse UUID %s", node_id); + return NULL; + } + if ((res = get_host_id(&node_id_bin, &host_id_bin))) { + error("node not found rc=%d", res); + return NULL; + } + uuid_unparse_lower(host_id_bin, host_id); + return rrdhost_find_by_guid(host_id); +} + +#define NODE_ID_QUERY "/node/" +// TODO this function should be quarantied and written nicely +// lots of skeletons from initial ACLK Legacy impl. +// quick and dirty from the start +static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query) +{ + int retval = 0; + usec_t t; + BUFFER *local_buffer = NULL; + BUFFER *log_buffer = buffer_create(NETDATA_WEB_REQUEST_URL_SIZE); + RRDHOST *query_host = localhost; + +#ifdef NETDATA_WITH_ZLIB + int z_ret; + BUFFER *z_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE); + char *start, *end; +#endif + + struct web_client *w = (struct web_client *)callocz(1, sizeof(struct web_client)); + w->response.data = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE); + w->response.header = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE); + w->response.header_output = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE); + strcpy(w->origin, "*"); // Simulate web_client_create_on_fd() + w->cookie1[0] = 0; // Simulate web_client_create_on_fd() + w->cookie2[0] = 0; // Simulate web_client_create_on_fd() + w->acl = WEB_CLIENT_ACL_ACLK; + + buffer_strcat(log_buffer, query->data.http_api_v2.query); + size_t size = 0; + size_t sent = 0; + w->tv_in = query->created_tv; + now_realtime_timeval(&w->tv_ready); + + if (query->timeout) { + int in_queue = (int) (dt_usec(&w->tv_in, &w->tv_ready) / 1000); + if (in_queue > query->timeout) { + log_access("QUERY CANCELED: QUEUE TIME EXCEEDED %d ms (LIMIT %d ms)", in_queue, query->timeout); + retval = 1; + w->response.code = HTTP_RESP_BACKEND_FETCH_FAILED; + aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, NULL, 0); + goto cleanup; + } + } + + if (!strncmp(query->data.http_api_v2.query, NODE_ID_QUERY, strlen(NODE_ID_QUERY))) { + char *node_uuid = query->data.http_api_v2.query + strlen(NODE_ID_QUERY); + char nodeid[UUID_STR_LEN]; + if (strlen(node_uuid) < (UUID_STR_LEN - 1)) { + error_report(CLOUD_EMSG_MALFORMED_NODE_ID); + retval = 1; + w->response.code = 404; + aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_MALFORMED_NODE_ID, CLOUD_EMSG_MALFORMED_NODE_ID, NULL, 0); + goto cleanup; + } + strncpyz(nodeid, node_uuid, UUID_STR_LEN - 1); + + query_host = node_id_2_rrdhost(nodeid); + if (!query_host) { + error_report("Host with node_id \"%s\" not found! Returning 404 to Cloud!", nodeid); + retval = 1; + w->response.code = 404; + aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_NODE_NOT_FOUND, CLOUD_EMSG_NODE_NOT_FOUND, NULL, 0); + goto cleanup; + } + } + + char *mysep = strchr(query->data.http_api_v2.query, '?'); + if (mysep) { + url_decode_r(w->decoded_query_string, mysep, NETDATA_WEB_REQUEST_URL_SIZE + 1); + *mysep = '\0'; + } else + url_decode_r(w->decoded_query_string, query->data.http_api_v2.query, NETDATA_WEB_REQUEST_URL_SIZE + 1); + + mysep = strrchr(query->data.http_api_v2.query, '/'); + + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + int stat_idx = aclk_cloud_req_http_type_to_idx(mysep ? mysep + 1 : "other"); + aclk_metrics_per_sample.cloud_req_http_by_type[stat_idx]++; + ACLK_STATS_UNLOCK; + } + + // execute the query + t = aclk_web_api_v1_request(query_host, w, mysep ? mysep + 1 : "noop"); + size = (w->mode == WEB_CLIENT_MODE_FILECOPY) ? w->response.rlen : w->response.data->len; + sent = size; + +#ifdef NETDATA_WITH_ZLIB + // check if gzip encoding can and should be used + if ((start = strstr((char *)query->data.http_api_v2.payload, WEB_HDR_ACCEPT_ENC))) { + start += strlen(WEB_HDR_ACCEPT_ENC); + end = strstr(start, "\x0D\x0A"); + start = strstr(start, "gzip"); + + if (start && start < end) { + w->response.zstream.zalloc = Z_NULL; + w->response.zstream.zfree = Z_NULL; + w->response.zstream.opaque = Z_NULL; + if(deflateInit2(&w->response.zstream, web_gzip_level, Z_DEFLATED, 15 + 16, 8, web_gzip_strategy) == Z_OK) { + w->response.zinitialized = 1; + w->response.zoutput = 1; + } else + error("Failed to initialize zlib. Proceeding without compression."); + } + } + + if (w->response.data->len && w->response.zinitialized) { + w->response.zstream.next_in = (Bytef *)w->response.data->buffer; + w->response.zstream.avail_in = w->response.data->len; + do { + w->response.zstream.avail_out = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE; + w->response.zstream.next_out = w->response.zbuffer; + z_ret = deflate(&w->response.zstream, Z_FINISH); + if(z_ret < 0) { + if(w->response.zstream.msg) + error("Error compressing body. ZLIB error: \"%s\"", w->response.zstream.msg); + else + error("Unknown error during zlib compression."); + retval = 1; + w->response.code = 500; + aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_ZLIB_ERROR, CLOUD_EMSG_ZLIB_ERROR, NULL, 0); + goto cleanup; + } + int bytes_to_cpy = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE - w->response.zstream.avail_out; + buffer_need_bytes(z_buffer, bytes_to_cpy); + memcpy(&z_buffer->buffer[z_buffer->len], w->response.zbuffer, bytes_to_cpy); + z_buffer->len += bytes_to_cpy; + } while(z_ret != Z_STREAM_END); + // so that web_client_build_http_header + // puts correct content length into header + buffer_free(w->response.data); + w->response.data = z_buffer; + z_buffer = NULL; + } +#endif + + w->response.data->date = w->tv_ready.tv_sec; + web_client_build_http_header(w); + local_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE); + local_buffer->contenttype = CT_APPLICATION_JSON; + + buffer_strcat(local_buffer, w->response.header_output->buffer); + + if (w->response.data->len) { +#ifdef NETDATA_WITH_ZLIB + if (w->response.zinitialized) { + buffer_need_bytes(local_buffer, w->response.data->len); + memcpy(&local_buffer->buffer[local_buffer->len], w->response.data->buffer, w->response.data->len); + local_buffer->len += w->response.data->len; + sent = sent - size + w->response.data->len; + } else { +#endif + buffer_strcat(local_buffer, w->response.data->buffer); +#ifdef NETDATA_WITH_ZLIB + } +#endif + } + + // send msg. + aclk_http_msg_v2(query_thr->client, query->callback_topic, query->msg_id, t, query->created, w->response.code, local_buffer->buffer, local_buffer->len); + + struct timeval tv; + +cleanup: + now_realtime_timeval(&tv); + log_access("%llu: %d '[ACLK]:%d' '%s' (sent/all = %zu/%zu bytes %0.0f%%, prep/sent/total = %0.2f/%0.2f/%0.2f ms) %d '%s'", + w->id + , gettid() + , query_thr->idx + , "DATA" + , sent + , size + , size > sent ? -(((size - sent) / (double)size) * 100.0) : ((size > 0) ? (((sent - size ) / (double)size) * 100.0) : 0.0) + , dt_usec(&w->tv_ready, &w->tv_in) / 1000.0 + , dt_usec(&tv, &w->tv_ready) / 1000.0 + , dt_usec(&tv, &w->tv_in) / 1000.0 + , w->response.code + , strip_control_characters((char *)buffer_tostring(log_buffer)) + ); + +#ifdef NETDATA_WITH_ZLIB + if(w->response.zinitialized) + deflateEnd(&w->response.zstream); + buffer_free(z_buffer); +#endif + buffer_free(w->response.data); + buffer_free(w->response.header); + buffer_free(w->response.header_output); + freez(w); + buffer_free(local_buffer); + buffer_free(log_buffer); + return retval; +} + +static int send_bin_msg(struct aclk_query_thread *query_thr, aclk_query_t query) +{ + // this will be simplified when legacy support is removed + aclk_send_bin_message_subtopic_pid(query_thr->client, query->data.bin_payload.payload, query->data.bin_payload.size, query->data.bin_payload.topic, query->data.bin_payload.msg_name); + return 0; +} + +const char *aclk_query_get_name(aclk_query_type_t qt, int unknown_ok) +{ + switch (qt) { + case HTTP_API_V2: return "http_api_request_v2"; + case REGISTER_NODE: return "register_node"; + case NODE_STATE_UPDATE: return "node_state_update"; + case CHART_DIMS_UPDATE: return "chart_and_dim_update"; + case CHART_CONFIG_UPDATED: return "chart_config_updated"; + case CHART_RESET: return "reset_chart_messages"; + case RETENTION_UPDATED: return "update_retention_info"; + case UPDATE_NODE_INFO: return "update_node_info"; + case ALARM_LOG_HEALTH: return "alarm_log_health"; + case ALARM_PROVIDE_CFG: return "provide_alarm_config"; + case ALARM_SNAPSHOT: return "alarm_snapshot"; + case UPDATE_NODE_COLLECTORS: return "update_node_collectors"; + case PROTO_BIN_MESSAGE: return "generic_binary_proto_message"; + default: + if (!unknown_ok) + error_report("Unknown query type used %d", (int) qt); + return "unknown"; + } +} + +static void aclk_query_process_msg(struct aclk_query_thread *query_thr, aclk_query_t query) +{ + if (query->type == UNKNOWN || query->type >= ACLK_QUERY_TYPE_COUNT) { + error_report("Unknown query in query queue. %u", query->type); + aclk_query_free(query); + return; + } + + worker_is_busy(query->type); + if (query->type == HTTP_API_V2) { + debug(D_ACLK, "Processing Queued Message of type: \"http_api_request_v2\""); + http_api_v2(query_thr, query); + } else { + debug(D_ACLK, "Processing Queued Message of type: \"%s\"", query->data.bin_payload.msg_name); + send_bin_msg(query_thr, query); + } + + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_metrics_per_sample.queries_dispatched++; + aclk_queries_per_thread[query_thr->idx]++; + aclk_metrics_per_sample.queries_per_type[query->type]++; + ACLK_STATS_UNLOCK; + } + + aclk_query_free(query); + + worker_is_idle(); +} + +/* Processes messages from queue. Compete for work with other threads + */ +int aclk_query_process_msgs(struct aclk_query_thread *query_thr) +{ + aclk_query_t query; + while ((query = aclk_queue_pop())) + aclk_query_process_msg(query_thr, query); + + return 0; +} + +static void worker_aclk_register(void) { + worker_register("ACLKQUERY"); + for (int i = 1; i < ACLK_QUERY_TYPE_COUNT; i++) { + worker_register_job_name(i, aclk_query_get_name(i, 0)); + } +} + +/** + * Main query processing thread + */ +void *aclk_query_main_thread(void *ptr) +{ + worker_aclk_register(); + + struct aclk_query_thread *query_thr = ptr; + + while (!netdata_exit) { + aclk_query_process_msgs(query_thr); + + worker_is_idle(); + QUERY_THREAD_LOCK; + if (unlikely(pthread_cond_wait(&query_cond_wait, &query_lock_wait))) + sleep_usec(USEC_PER_SEC * 1); + QUERY_THREAD_UNLOCK; + } + + worker_unregister(); + return NULL; +} + +#define TASK_LEN_MAX 22 +void aclk_query_threads_start(struct aclk_query_threads *query_threads, mqtt_wss_client client) +{ + info("Starting %d query threads.", query_threads->count); + + char thread_name[TASK_LEN_MAX]; + query_threads->thread_list = callocz(query_threads->count, sizeof(struct aclk_query_thread)); + for (int i = 0; i < query_threads->count; i++) { + query_threads->thread_list[i].idx = i; //thread needs to know its index for statistics + + if(unlikely(snprintfz(thread_name, TASK_LEN_MAX, "%s_%d", ACLK_QUERY_THREAD_NAME, i) < 0)) + error("snprintf encoding error"); + netdata_thread_create( + &query_threads->thread_list[i].thread, thread_name, NETDATA_THREAD_OPTION_JOINABLE, aclk_query_main_thread, + &query_threads->thread_list[i]); + + query_threads->thread_list[i].client = client; + } +} + +void aclk_query_threads_cleanup(struct aclk_query_threads *query_threads) +{ + if (query_threads && query_threads->thread_list) { + for (int i = 0; i < query_threads->count; i++) { + netdata_thread_join(query_threads->thread_list[i].thread, NULL); + } + freez(query_threads->thread_list); + } + aclk_queue_lock(); + aclk_queue_flush(); +} diff --git a/aclk/aclk_query.h b/aclk/aclk_query.h new file mode 100644 index 0000000..c006b01 --- /dev/null +++ b/aclk/aclk_query.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ACLK_QUERY_H +#define NETDATA_ACLK_QUERY_H + +#include "libnetdata/libnetdata.h" + +#include "mqtt_wss_client.h" + +#include "aclk_query_queue.h" + +extern pthread_cond_t query_cond_wait; +extern pthread_mutex_t query_lock_wait; +#define QUERY_THREAD_WAKEUP pthread_cond_signal(&query_cond_wait) +#define QUERY_THREAD_WAKEUP_ALL pthread_cond_broadcast(&query_cond_wait) + +// TODO +//extern volatile int aclk_connected; + +struct aclk_query_thread { + netdata_thread_t thread; + int idx; + mqtt_wss_client client; +}; + +struct aclk_query_threads { + struct aclk_query_thread *thread_list; + int count; +}; + +void aclk_query_threads_start(struct aclk_query_threads *query_threads, mqtt_wss_client client); +void aclk_query_threads_cleanup(struct aclk_query_threads *query_threads); + +const char *aclk_query_get_name(aclk_query_type_t qt, int unknown_ok); + +#endif //NETDATA_AGENT_CLOUD_LINK_H diff --git a/aclk/aclk_query_queue.c b/aclk/aclk_query_queue.c new file mode 100644 index 0000000..9a45057 --- /dev/null +++ b/aclk/aclk_query_queue.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_query_queue.h" +#include "aclk_query.h" +#include "aclk_stats.h" + +static netdata_mutex_t aclk_query_queue_mutex = NETDATA_MUTEX_INITIALIZER; +#define ACLK_QUEUE_LOCK netdata_mutex_lock(&aclk_query_queue_mutex) +#define ACLK_QUEUE_UNLOCK netdata_mutex_unlock(&aclk_query_queue_mutex) + +static struct aclk_query_queue { + aclk_query_t head; + aclk_query_t tail; + int block_push; +} aclk_query_queue = { + .head = NULL, + .tail = NULL, + .block_push = 0 +}; + +static inline int _aclk_queue_query(aclk_query_t query) +{ + now_realtime_timeval(&query->created_tv); + query->created = now_realtime_usec(); + + ACLK_QUEUE_LOCK; + if (aclk_query_queue.block_push) { + ACLK_QUEUE_UNLOCK; + if(!netdata_exit) + error("Query Queue is blocked from accepting new requests. This is normally the case when ACLK prepares to shutdown."); + aclk_query_free(query); + return 1; + } + if (!aclk_query_queue.head) { + aclk_query_queue.head = query; + aclk_query_queue.tail = query; + ACLK_QUEUE_UNLOCK; + return 0; + } + // TODO deduplication + aclk_query_queue.tail->next = query; + aclk_query_queue.tail = query; + ACLK_QUEUE_UNLOCK; + return 0; + +} + +int aclk_queue_query(aclk_query_t query) +{ + int ret = _aclk_queue_query(query); + if (!ret) { + QUERY_THREAD_WAKEUP; + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_metrics_per_sample.queries_queued++; + ACLK_STATS_UNLOCK; + } + } + return ret; +} + +aclk_query_t aclk_queue_pop(void) +{ + aclk_query_t ret; + + ACLK_QUEUE_LOCK; + if (aclk_query_queue.block_push) { + ACLK_QUEUE_UNLOCK; + if(!netdata_exit) + error("POP Query Queue is blocked from accepting new requests. This is normally the case when ACLK prepares to shutdown."); + return NULL; + } + + ret = aclk_query_queue.head; + if (!ret) { + ACLK_QUEUE_UNLOCK; + return ret; + } + + aclk_query_queue.head = ret->next; + if (unlikely(!aclk_query_queue.head)) + aclk_query_queue.tail = aclk_query_queue.head; + ACLK_QUEUE_UNLOCK; + + ret->next = NULL; + return ret; +} + +void aclk_queue_flush(void) +{ + aclk_query_t query = aclk_queue_pop(); + while (query) { + aclk_query_free(query); + query = aclk_queue_pop(); + }; +} + +aclk_query_t aclk_query_new(aclk_query_type_t type) +{ + aclk_query_t query = callocz(1, sizeof(struct aclk_query)); + query->type = type; + return query; +} + +void aclk_query_free(aclk_query_t query) +{ + switch (query->type) { + case HTTP_API_V2: + freez(query->data.http_api_v2.payload); + if (query->data.http_api_v2.query != query->dedup_id) + freez(query->data.http_api_v2.query); + break; + + default: + break; + } + + freez(query->dedup_id); + freez(query->callback_topic); + freez(query->msg_id); + freez(query); +} + +void aclk_queue_lock(void) +{ + ACLK_QUEUE_LOCK; + aclk_query_queue.block_push = 1; + ACLK_QUEUE_UNLOCK; +} + +void aclk_queue_unlock(void) +{ + ACLK_QUEUE_LOCK; + aclk_query_queue.block_push = 0; + ACLK_QUEUE_UNLOCK; +} diff --git a/aclk/aclk_query_queue.h b/aclk/aclk_query_queue.h new file mode 100644 index 0000000..ab94b63 --- /dev/null +++ b/aclk/aclk_query_queue.h @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ACLK_QUERY_QUEUE_H +#define NETDATA_ACLK_QUERY_QUEUE_H + +#include "libnetdata/libnetdata.h" +#include "daemon/common.h" +#include "schema-wrappers/schema_wrappers.h" + +#include "aclk_util.h" + +typedef enum { + UNKNOWN = 0, + HTTP_API_V2, + REGISTER_NODE, + NODE_STATE_UPDATE, + CHART_DIMS_UPDATE, + CHART_CONFIG_UPDATED, + CHART_RESET, + RETENTION_UPDATED, + UPDATE_NODE_INFO, + ALARM_LOG_HEALTH, + ALARM_PROVIDE_CFG, + ALARM_SNAPSHOT, + UPDATE_NODE_COLLECTORS, + PROTO_BIN_MESSAGE, + ACLK_QUERY_TYPE_COUNT // always keep this as last +} aclk_query_type_t; + +struct aclk_query_http_api_v2 { + char *payload; + char *query; +}; + +struct aclk_bin_payload { + char *payload; + size_t size; + enum aclk_topics topic; + const char *msg_name; +}; + +typedef struct aclk_query *aclk_query_t; +struct aclk_query { + aclk_query_type_t type; + + // dedup_id is used to deduplicate queries in the list + // if type and dedup_id is the same message is deduplicated + // set dedup_id to NULL to never deduplicate the message + // set dedup_id to constant (e.g. empty string "") to make + // message of this type ever exist only once in the list + char *dedup_id; + char *callback_topic; + char *msg_id; + + struct timeval created_tv; + usec_t created; + int timeout; + aclk_query_t next; + + // TODO maybe remove? + int version; + union { + struct aclk_query_http_api_v2 http_api_v2; + struct aclk_bin_payload bin_payload; + } data; +}; + +aclk_query_t aclk_query_new(aclk_query_type_t type); +void aclk_query_free(aclk_query_t query); + +int aclk_queue_query(aclk_query_t query); +aclk_query_t aclk_queue_pop(void); +void aclk_queue_flush(void); + +void aclk_queue_lock(void); +void aclk_queue_unlock(void); + +#define QUEUE_IF_PAYLOAD_PRESENT(query) \ + if (likely(query->data.bin_payload.payload)) { \ + aclk_queue_query(query); \ + } else { \ + error("Failed to generate payload (%s)", __FUNCTION__); \ + aclk_query_free(query); \ + } + +#endif /* NETDATA_ACLK_QUERY_QUEUE_H */ diff --git a/aclk/aclk_rrdhost_state.h b/aclk/aclk_rrdhost_state.h new file mode 100644 index 0000000..5c8a2dd --- /dev/null +++ b/aclk/aclk_rrdhost_state.h @@ -0,0 +1,11 @@ +#ifndef ACLK_RRDHOST_STATE_H +#define ACLK_RRDHOST_STATE_H + +#include "libnetdata/libnetdata.h" + +typedef struct aclk_rrdhost_state { + char *claimed_id; // Claimed ID if host has one otherwise NULL + char *prev_claimed_id; // Claimed ID if changed (reclaimed) during runtime +} aclk_rrdhost_state; + +#endif /* ACLK_RRDHOST_STATE_H */ diff --git a/aclk/aclk_rx_msgs.c b/aclk/aclk_rx_msgs.c new file mode 100644 index 0000000..83bc550 --- /dev/null +++ b/aclk/aclk_rx_msgs.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_rx_msgs.h" + +#include "aclk_stats.h" +#include "aclk_query_queue.h" +#include "aclk.h" +#include "aclk_capas.h" + +#include "schema-wrappers/proto_2_json.h" + +#define ACLK_V2_PAYLOAD_SEPARATOR "\x0D\x0A\x0D\x0A" +#define ACLK_CLOUD_REQ_V2_PREFIX "GET /" + +#define ACLK_V_COMPRESSION 2 + +struct aclk_request { + char *type_id; + char *msg_id; + char *callback_topic; + char *payload; + int version; + int timeout; + int min_version; + int max_version; +}; + +static int cloud_to_agent_parse(JSON_ENTRY *e) +{ + struct aclk_request *data = e->callback_data; + + switch (e->type) { + case JSON_OBJECT: + case JSON_ARRAY: + break; + case JSON_STRING: + if (!strcmp(e->name, "msg-id")) { + data->msg_id = strdupz(e->data.string); + break; + } + if (!strcmp(e->name, "type")) { + data->type_id = strdupz(e->data.string); + break; + } + if (!strcmp(e->name, "callback-topic")) { + data->callback_topic = strdupz(e->data.string); + break; + } + if (!strcmp(e->name, "payload")) { + if (likely(e->data.string)) { + size_t len = strlen(e->data.string); + data->payload = mallocz(len+1); + if (!url_decode_r(data->payload, e->data.string, len + 1)) + strcpy(data->payload, e->data.string); + } + break; + } + break; + case JSON_NUMBER: + if (!strcmp(e->name, "version")) { + data->version = (int)e->data.number; + break; + } + if (!strcmp(e->name, "timeout")) { + data->timeout = (int)e->data.number; + break; + } + if (!strcmp(e->name, "min-version")) { + data->min_version = (int)e->data.number; + break; + } + if (!strcmp(e->name, "max-version")) { + data->max_version = (int)e->data.number; + break; + } + + break; + + case JSON_BOOLEAN: + break; + + case JSON_NULL: + break; + } + return 0; +} + +static inline int aclk_extract_v2_data(char *payload, char **data) +{ + char* ptr = strstr(payload, ACLK_V2_PAYLOAD_SEPARATOR); + if(!ptr) + return 1; + ptr += strlen(ACLK_V2_PAYLOAD_SEPARATOR); + *data = strdupz(ptr); + return 0; +} + +static inline int aclk_v2_payload_get_query(const char *payload, char **query_url) +{ + const char *start, *end; + + // TODO better check of URL + if(strncmp(payload, ACLK_CLOUD_REQ_V2_PREFIX, strlen(ACLK_CLOUD_REQ_V2_PREFIX))) { + errno = 0; + error("Only accepting requests that start with \"%s\" from CLOUD.", ACLK_CLOUD_REQ_V2_PREFIX); + return 1; + } + start = payload + 4; + + if(!(end = strstr(payload, " HTTP/1.1\x0D\x0A"))) { + errno = 0; + error("Doesn't look like HTTP GET request."); + return 1; + } + + *query_url = mallocz((end - start) + 1); + strncpyz(*query_url, start, end - start); + + return 0; +} + +static int aclk_handle_cloud_http_request_v2(struct aclk_request *cloud_to_agent, char *raw_payload) +{ + aclk_query_t query; + + errno = 0; + if (cloud_to_agent->version < ACLK_V_COMPRESSION) { + error( + "This handler cannot reply to request with version older than %d, received %d.", + ACLK_V_COMPRESSION, + cloud_to_agent->version); + return 1; + } + + query = aclk_query_new(HTTP_API_V2); + + if (unlikely(aclk_extract_v2_data(raw_payload, &query->data.http_api_v2.payload))) { + error("Error extracting payload expected after the JSON dictionary."); + goto error; + } + + if (unlikely(aclk_v2_payload_get_query(query->data.http_api_v2.payload, &query->dedup_id))) { + error("Could not extract payload from query"); + goto error; + } + + if (unlikely(!cloud_to_agent->callback_topic)) { + error("Missing callback_topic"); + goto error; + } + + if (unlikely(!cloud_to_agent->msg_id)) { + error("Missing msg_id"); + goto error; + } + + // aclk_queue_query takes ownership of data pointer + query->callback_topic = cloud_to_agent->callback_topic; + query->timeout = cloud_to_agent->timeout; + // for clarity and code readability as when we process the request + // it would be strange to get URL from `dedup_id` + query->data.http_api_v2.query = query->dedup_id; + query->msg_id = cloud_to_agent->msg_id; + aclk_queue_query(query); + return 0; + +error: + aclk_query_free(query); + return 1; +} + +int aclk_handle_cloud_cmd_message(char *payload) +{ + struct aclk_request cloud_to_agent; + memset(&cloud_to_agent, 0, sizeof(struct aclk_request)); + + if (unlikely(!payload)) { + error_report("ACLK incoming 'cmd' message is empty"); + return 1; + } + + debug(D_ACLK, "ACLK incoming 'cmd' message (%s)", payload); + + int rc = json_parse(payload, &cloud_to_agent, cloud_to_agent_parse); + + if (unlikely(rc != JSON_OK)) { + error_report("Malformed json request (%s)", payload); + goto err_cleanup; + } + + if (!cloud_to_agent.type_id) { + error_report("Cloud message is missing compulsory key \"type\""); + goto err_cleanup; + } + + // Originally we were expecting to have multiple types of 'cmd' message, + // but after the new protocol was designed we will ever only have 'http' + if (strcmp(cloud_to_agent.type_id, "http")) { + error_report("Only 'http' cmd message is supported"); + goto err_cleanup; + } + + if (likely(!aclk_handle_cloud_http_request_v2(&cloud_to_agent, payload))) { + // aclk_handle_cloud_request takes ownership of the pointers + // (to avoid copying) in case of success + freez(cloud_to_agent.type_id); + return 0; + } + +err_cleanup: + if (cloud_to_agent.payload) + freez(cloud_to_agent.payload); + if (cloud_to_agent.type_id) + freez(cloud_to_agent.type_id); + if (cloud_to_agent.msg_id) + freez(cloud_to_agent.msg_id); + if (cloud_to_agent.callback_topic) + freez(cloud_to_agent.callback_topic); + + return 1; +} + +typedef uint32_t simple_hash_t; +typedef int(*rx_msg_handler)(const char *msg, size_t msg_len); + +int handle_old_proto_cmd(const char *msg, size_t msg_len) +{ + // msg is binary payload in all other cases + // however in this message from old legacy cloud + // we have to convert it to C string + char *str = mallocz(msg_len+1); + memcpy(str, msg, msg_len); + str[msg_len] = 0; + if (aclk_handle_cloud_cmd_message(str)) { + freez(str); + return 1; + } + freez(str); + return 0; +} + +int create_node_instance_result(const char *msg, size_t msg_len) +{ + node_instance_creation_result_t res = parse_create_node_instance_result(msg, msg_len); + if (!res.machine_guid || !res.node_id) { + error_report("Error parsing CreateNodeInstanceResult"); + freez(res.machine_guid); + freez(res.node_id); + return 1; + } + + debug(D_ACLK, "CreateNodeInstanceResult: guid:%s nodeid:%s", res.machine_guid, res.node_id); + + uuid_t host_id, node_id; + if (uuid_parse(res.machine_guid, host_id)) { + error("Error parsing machine_guid provided by CreateNodeInstanceResult"); + freez(res.machine_guid); + freez(res.node_id); + return 1; + } + if (uuid_parse(res.node_id, node_id)) { + error("Error parsing node_id provided by CreateNodeInstanceResult"); + freez(res.machine_guid); + freez(res.node_id); + return 1; + } + update_node_id(&host_id, &node_id); + + aclk_query_t query = aclk_query_new(NODE_STATE_UPDATE); + node_instance_connection_t node_state_update = { + .hops = 1, + .live = 0, + .queryable = 1, + .session_id = aclk_session_newarch, + .node_id = res.node_id + }; + + RRDHOST *host = rrdhost_find_by_guid(res.machine_guid); + if (host) { + // not all host must have RRDHOST struct created for them + // if they never connected during runtime of agent + if (host == localhost) { + node_state_update.live = 1; + node_state_update.hops = 0; + } else { + netdata_mutex_lock(&host->receiver_lock); + node_state_update.live = (host->receiver != NULL); + netdata_mutex_unlock(&host->receiver_lock); + node_state_update.hops = host->system_info->hops; + } + } + + node_state_update.capabilities = aclk_get_node_instance_capas(host); + + rrdhost_aclk_state_lock(localhost); + node_state_update.claim_id = localhost->aclk_state.claimed_id; + query->data.bin_payload.payload = generate_node_instance_connection(&query->data.bin_payload.size, &node_state_update); + rrdhost_aclk_state_unlock(localhost); + + freez((void *)node_state_update.capabilities); + + query->data.bin_payload.msg_name = "UpdateNodeInstanceConnection"; + query->data.bin_payload.topic = ACLK_TOPICID_NODE_CONN; + + aclk_queue_query(query); + freez(res.node_id); + freez(res.machine_guid); + return 0; +} + +int send_node_instances(const char *msg, size_t msg_len) +{ + UNUSED(msg); + UNUSED(msg_len); + aclk_send_node_instances(); + return 0; +} + +int stream_charts_and_dimensions(const char *msg, size_t msg_len) +{ + UNUSED(msg); + UNUSED(msg_len); + error_report("Received obsolete StreamChartsAndDimensions msg"); + return 0; +} + +int charts_and_dimensions_ack(const char *msg, size_t msg_len) +{ + UNUSED(msg); + UNUSED(msg_len); + error_report("Received obsolete StreamChartsAndDimensionsAck msg"); + return 0; +} + +int update_chart_configs(const char *msg, size_t msg_len) +{ + UNUSED(msg); + UNUSED(msg_len); + error_report("Received obsolete UpdateChartConfigs msg"); + return 0; +} + +int start_alarm_streaming(const char *msg, size_t msg_len) +{ + struct start_alarm_streaming res = parse_start_alarm_streaming(msg, msg_len); + if (!res.node_id || !res.batch_id) { + error("Error parsing StartAlarmStreaming"); + freez(res.node_id); + return 1; + } + aclk_start_alert_streaming(res.node_id, res.batch_id, res.start_seq_id); + freez(res.node_id); + return 0; +} + +int send_alarm_log_health(const char *msg, size_t msg_len) +{ + char *node_id = parse_send_alarm_log_health(msg, msg_len); + if (!node_id) { + error("Error parsing SendAlarmLogHealth"); + return 1; + } + aclk_send_alarm_health_log(node_id); + freez(node_id); + return 0; +} + +int send_alarm_configuration(const char *msg, size_t msg_len) +{ + char *config_hash = parse_send_alarm_configuration(msg, msg_len); + if (!config_hash || !*config_hash) { + error("Error parsing SendAlarmConfiguration"); + freez(config_hash); + return 1; + } + aclk_send_alarm_configuration(config_hash); + freez(config_hash); + return 0; +} + +int send_alarm_snapshot(const char *msg, size_t msg_len) +{ + struct send_alarm_snapshot *sas = parse_send_alarm_snapshot(msg, msg_len); + if (!sas->node_id || !sas->claim_id) { + error("Error parsing SendAlarmSnapshot"); + destroy_send_alarm_snapshot(sas); + return 1; + } + aclk_process_send_alarm_snapshot(sas->node_id, sas->claim_id, sas->snapshot_id, sas->sequence_id); + destroy_send_alarm_snapshot(sas); + return 0; +} + +int handle_disconnect_req(const char *msg, size_t msg_len) +{ + struct disconnect_cmd *cmd = parse_disconnect_cmd(msg, msg_len); + if (!cmd) + return 1; + if (cmd->permaban) { + error("Cloud Banned This Agent!"); + aclk_disable_runtime = 1; + } + info("Cloud requested disconnect (EC=%u, \"%s\")", (unsigned int)cmd->error_code, cmd->error_description); + if (cmd->reconnect_after_s > 0) { + aclk_block_until = now_monotonic_sec() + cmd->reconnect_after_s; + info( + "Cloud asks not to reconnect for %u seconds. We shall honor that request", + (unsigned int)cmd->reconnect_after_s); + } + disconnect_req = 1; + freez(cmd->error_description); + freez(cmd); + return 0; +} + +int contexts_checkpoint(const char *msg, size_t msg_len) +{ + aclk_ctx_based = 1; + + struct ctxs_checkpoint *cmd = parse_ctxs_checkpoint(msg, msg_len); + if (!cmd) + return 1; + + rrdcontext_hub_checkpoint_command(cmd); + + freez(cmd->claim_id); + freez(cmd->node_id); + freez(cmd); + return 0; +} + +int stop_streaming_contexts(const char *msg, size_t msg_len) +{ + if (!aclk_ctx_based) { + error_report("Received StopStreamingContexts message but context based communication was not enabled (Cloud violated the protocol). Ignoring message"); + return 1; + } + + struct stop_streaming_ctxs *cmd = parse_stop_streaming_ctxs(msg, msg_len); + if (!cmd) + return 1; + + rrdcontext_hub_stop_streaming_command(cmd); + + freez(cmd->claim_id); + freez(cmd->node_id); + freez(cmd); + return 0; +} + +typedef struct { + const char *name; + simple_hash_t name_hash; + rx_msg_handler fnc; +} new_cloud_rx_msg_t; + +new_cloud_rx_msg_t rx_msgs[] = { + { .name = "cmd", .name_hash = 0, .fnc = handle_old_proto_cmd }, + { .name = "CreateNodeInstanceResult", .name_hash = 0, .fnc = create_node_instance_result }, + { .name = "SendNodeInstances", .name_hash = 0, .fnc = send_node_instances }, + { .name = "StreamChartsAndDimensions", .name_hash = 0, .fnc = stream_charts_and_dimensions }, + { .name = "ChartsAndDimensionsAck", .name_hash = 0, .fnc = charts_and_dimensions_ack }, + { .name = "UpdateChartConfigs", .name_hash = 0, .fnc = update_chart_configs }, + { .name = "StartAlarmStreaming", .name_hash = 0, .fnc = start_alarm_streaming }, + { .name = "SendAlarmLogHealth", .name_hash = 0, .fnc = send_alarm_log_health }, + { .name = "SendAlarmConfiguration", .name_hash = 0, .fnc = send_alarm_configuration }, + { .name = "SendAlarmSnapshot", .name_hash = 0, .fnc = send_alarm_snapshot }, + { .name = "DisconnectReq", .name_hash = 0, .fnc = handle_disconnect_req }, + { .name = "ContextsCheckpoint", .name_hash = 0, .fnc = contexts_checkpoint }, + { .name = "StopStreamingContexts", .name_hash = 0, .fnc = stop_streaming_contexts }, + { .name = NULL, .name_hash = 0, .fnc = NULL }, +}; + +new_cloud_rx_msg_t *find_rx_handler_by_hash(simple_hash_t hash) +{ + // we can afford to not compare strings after hash match + // because we check for collisions at initialization in + // aclk_init_rx_msg_handlers() + for (int i = 0; rx_msgs[i].fnc; i++) { + if (rx_msgs[i].name_hash == hash) + return &rx_msgs[i]; + } + return NULL; +} + +const char *rx_handler_get_name(size_t i) +{ + return rx_msgs[i].name; +} + +unsigned int aclk_init_rx_msg_handlers(void) +{ + int i; + for (i = 0; rx_msgs[i].fnc; i++) { + simple_hash_t hash = simple_hash(rx_msgs[i].name); + new_cloud_rx_msg_t *hdl = find_rx_handler_by_hash(hash); + if (unlikely(hdl)) { + // the list of message names changes only by changing + // the source code, therefore fatal is appropriate + fatal("Hash collision. Choose better hash. Added '%s' clashes with existing '%s'", rx_msgs[i].name, hdl->name); + } + rx_msgs[i].name_hash = hash; + } + return i; +} + +void aclk_handle_new_cloud_msg(const char *message_type, const char *msg, size_t msg_len, const char *topic __maybe_unused) +{ + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_metrics_per_sample.cloud_req_recvd++; + ACLK_STATS_UNLOCK; + } + new_cloud_rx_msg_t *msg_descriptor = find_rx_handler_by_hash(simple_hash(message_type)); + debug(D_ACLK, "Got message named '%s' from cloud", message_type); + if (unlikely(!msg_descriptor)) { + error("Do not know how to handle message of type '%s'. Ignoring", message_type); + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_metrics_per_sample.cloud_req_err++; + ACLK_STATS_UNLOCK; + } + return; + } + + + if (aclklog_enabled) { + if (!strncmp(message_type, "cmd", strlen("cmd"))) { + log_aclk_message_bin(msg, msg_len, 0, topic, msg_descriptor->name); + } else { + char *json = protomsg_to_json(msg, msg_len, msg_descriptor->name); + log_aclk_message_bin(json, strlen(json), 0, topic, msg_descriptor->name); + freez(json); + } + } + + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_proto_rx_msgs_sample[msg_descriptor-rx_msgs]++; + ACLK_STATS_UNLOCK; + } + if (msg_descriptor->fnc(msg, msg_len)) { + error("Error processing message of type '%s'", message_type); + if (aclk_stats_enabled) { + ACLK_STATS_LOCK; + aclk_metrics_per_sample.cloud_req_err++; + ACLK_STATS_UNLOCK; + } + return; + } +} diff --git a/aclk/aclk_rx_msgs.h b/aclk/aclk_rx_msgs.h new file mode 100644 index 0000000..61921fa --- /dev/null +++ b/aclk/aclk_rx_msgs.h @@ -0,0 +1,17 @@ + + +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_RX_MSGS_H +#define ACLK_RX_MSGS_H + +#include "daemon/common.h" +#include "libnetdata/libnetdata.h" + +int aclk_handle_cloud_cmd_message(char *payload); + +const char *rx_handler_get_name(size_t i); +unsigned int aclk_init_rx_msg_handlers(void); +void aclk_handle_new_cloud_msg(const char *message_type, const char *msg, size_t msg_len, const char *topic); + +#endif /* ACLK_RX_MSGS_H */ diff --git a/aclk/aclk_stats.c b/aclk/aclk_stats.c new file mode 100644 index 0000000..215313f --- /dev/null +++ b/aclk/aclk_stats.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_stats.h" + +#include "aclk_query.h" + +netdata_mutex_t aclk_stats_mutex = NETDATA_MUTEX_INITIALIZER; + +struct { + int query_thread_count; + unsigned int proto_hdl_cnt; + uint32_t *aclk_proto_rx_msgs_sample; + RRDDIM **rx_msg_dims; +} aclk_stats_cfg; // there is only 1 stats thread at a time + +// data ACLK stats need per query thread +struct aclk_qt_data { + RRDDIM *dim; +} *aclk_qt_data = NULL; + +uint32_t *aclk_queries_per_thread = NULL; +uint32_t *aclk_queries_per_thread_sample = NULL; +uint32_t *aclk_proto_rx_msgs_sample = NULL; + +struct aclk_metrics aclk_metrics = { + .online = 0, +}; + +struct aclk_metrics_per_sample aclk_metrics_per_sample; + +static void aclk_stats_collect(struct aclk_metrics_per_sample *per_sample, struct aclk_metrics *permanent) +{ + static RRDSET *st_aclkstats = NULL; + static RRDDIM *rd_online_status = NULL; + + if (unlikely(!st_aclkstats)) { + st_aclkstats = rrdset_create_localhost( + "netdata", "aclk_status", NULL, "aclk", NULL, "ACLK/Cloud connection status", + "connected", "netdata", "stats", 200000, localhost->rrd_update_every, RRDSET_TYPE_LINE); + + rd_online_status = rrddim_add(st_aclkstats, "online", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_aclkstats, rd_online_status, per_sample->offline_during_sample ? 0 : permanent->online); + + rrdset_done(st_aclkstats); +} + +static void aclk_stats_query_queue(struct aclk_metrics_per_sample *per_sample) +{ + static RRDSET *st_query_thread = NULL; + static RRDDIM *rd_queued = NULL; + static RRDDIM *rd_dispatched = NULL; + + if (unlikely(!st_query_thread)) { + st_query_thread = rrdset_create_localhost( + "netdata", "aclk_query_per_second", NULL, "aclk", NULL, "ACLK Queries per second", "queries/s", + "netdata", "stats", 200001, localhost->rrd_update_every, RRDSET_TYPE_AREA); + + rd_queued = rrddim_add(st_query_thread, "added", NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + rd_dispatched = rrddim_add(st_query_thread, "dispatched", NULL, -1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_query_thread, rd_queued, per_sample->queries_queued); + rrddim_set_by_pointer(st_query_thread, rd_dispatched, per_sample->queries_dispatched); + + rrdset_done(st_query_thread); +} + +#ifdef NETDATA_INTERNAL_CHECKS +static void aclk_stats_latency(struct aclk_metrics_per_sample *per_sample) +{ + static RRDSET *st = NULL; + static RRDDIM *rd_avg = NULL; + static RRDDIM *rd_max = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_latency_mqtt", NULL, "aclk", NULL, "ACLK Message Publish Latency", "ms", + "netdata", "stats", 200002, localhost->rrd_update_every, RRDSET_TYPE_LINE); + + rd_avg = rrddim_add(st, "avg", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_max = rrddim_add(st, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + if(per_sample->latency_count) + rrddim_set_by_pointer(st, rd_avg, roundf((float)per_sample->latency_total / per_sample->latency_count)); + else + rrddim_set_by_pointer(st, rd_avg, 0); + + rrddim_set_by_pointer(st, rd_max, per_sample->latency_max); + + rrdset_done(st); +} +#endif + +static void aclk_stats_cloud_req(struct aclk_metrics_per_sample *per_sample) +{ + static RRDSET *st = NULL; + static RRDDIM *rd_rq_rcvd = NULL; + static RRDDIM *rd_rq_err = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_cloud_req", NULL, "aclk", NULL, "Requests received from cloud", "req/s", + "netdata", "stats", 200005, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + rd_rq_rcvd = rrddim_add(st, "received", NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + rd_rq_err = rrddim_add(st, "malformed", NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_rq_rcvd, per_sample->cloud_req_recvd - per_sample->cloud_req_err); + rrddim_set_by_pointer(st, rd_rq_err, per_sample->cloud_req_err); + + rrdset_done(st); +} + +static void aclk_stats_cloud_req_type(struct aclk_metrics_per_sample *per_sample) +{ + static RRDSET *st = NULL; + static RRDDIM *dims[ACLK_QUERY_TYPE_COUNT]; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_processed_query_type", NULL, "aclk", NULL, "Query thread commands processed by their type", "cmd/s", + "netdata", "stats", 200006, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + for (int i = 0; i < ACLK_QUERY_TYPE_COUNT; i++) + dims[i] = rrddim_add(st, aclk_query_get_name(i, 1), NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + + } + + for (int i = 0; i < ACLK_QUERY_TYPE_COUNT; i++) + rrddim_set_by_pointer(st, dims[i], per_sample->queries_per_type[i]); + + rrdset_done(st); +} + +static char *cloud_req_http_type_names[ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT] = { + "other", + "info", + "data", + "alarms", + "alarm_log", + "chart", + "charts" + // if you change then update `ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT`. +}; + +int aclk_cloud_req_http_type_to_idx(const char *name) +{ + for (int i = 1; i < ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT; i++) + if (!strcmp(cloud_req_http_type_names[i], name)) + return i; + return 0; +} + +static void aclk_stats_cloud_req_http_type(struct aclk_metrics_per_sample *per_sample) +{ + static RRDSET *st = NULL; + static RRDDIM *rd_rq_types[ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT]; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_cloud_req_http_type", NULL, "aclk", NULL, "Requests received from cloud via HTTP by their type", "req/s", + "netdata", "stats", 200007, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + for (int i = 0; i < ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT; i++) + rd_rq_types[i] = rrddim_add(st, cloud_req_http_type_names[i], NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + } + + for (int i = 0; i < ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT; i++) + rrddim_set_by_pointer(st, rd_rq_types[i], per_sample->cloud_req_http_by_type[i]); + + rrdset_done(st); +} + +#define MAX_DIM_NAME 22 +static void aclk_stats_query_threads(uint32_t *queries_per_thread) +{ + static RRDSET *st = NULL; + + char dim_name[MAX_DIM_NAME]; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_query_threads", NULL, "aclk", NULL, "Queries Processed Per Thread", "req/s", + "netdata", "stats", 200009, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + for (int i = 0; i < aclk_stats_cfg.query_thread_count; i++) { + if (snprintfz(dim_name, MAX_DIM_NAME, "Query %d", i) < 0) + error("snprintf encoding error"); + aclk_qt_data[i].dim = rrddim_add(st, dim_name, NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + } + } + + for (int i = 0; i < aclk_stats_cfg.query_thread_count; i++) { + rrddim_set_by_pointer(st, aclk_qt_data[i].dim, queries_per_thread[i]); + } + + rrdset_done(st); +} + +static void aclk_stats_query_time(struct aclk_metrics_per_sample *per_sample) +{ + static RRDSET *st = NULL; + static RRDDIM *rd_rq_avg = NULL; + static RRDDIM *rd_rq_max = NULL; + static RRDDIM *rd_rq_total = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_query_time", NULL, "aclk", NULL, "Time it took to process cloud requested DB queries", "us", + "netdata", "stats", 200008, localhost->rrd_update_every, RRDSET_TYPE_LINE); + + rd_rq_avg = rrddim_add(st, "avg", NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + rd_rq_max = rrddim_add(st, "max", NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + rd_rq_total = rrddim_add(st, "total", NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + } + + if(per_sample->cloud_q_process_count) + rrddim_set_by_pointer(st, rd_rq_avg, roundf((float)per_sample->cloud_q_process_total / per_sample->cloud_q_process_count)); + else + rrddim_set_by_pointer(st, rd_rq_avg, 0); + rrddim_set_by_pointer(st, rd_rq_max, per_sample->cloud_q_process_max); + rrddim_set_by_pointer(st, rd_rq_total, per_sample->cloud_q_process_total); + + rrdset_done(st); +} + +const char *rx_handler_get_name(size_t i); +static void aclk_stats_newproto_rx(uint32_t *rx_msgs_sample) +{ + static RRDSET *st = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_protobuf_rx_types", NULL, "aclk", NULL, "Received new cloud architecture messages by their type.", "msg/s", + "netdata", "stats", 200010, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + for (unsigned int i = 0; i < aclk_stats_cfg.proto_hdl_cnt; i++) { + aclk_stats_cfg.rx_msg_dims[i] = rrddim_add(st, rx_handler_get_name(i), NULL, 1, localhost->rrd_update_every, RRD_ALGORITHM_ABSOLUTE); + } + } + + for (unsigned int i = 0; i < aclk_stats_cfg.proto_hdl_cnt; i++) + rrddim_set_by_pointer(st, aclk_stats_cfg.rx_msg_dims[i], rx_msgs_sample[i]); + + rrdset_done(st); +} + +static void aclk_stats_mqtt_wss(struct mqtt_wss_stats *stats) +{ + static RRDSET *st = NULL; + static RRDDIM *rd_sent = NULL; + static RRDDIM *rd_recvd = NULL; + static uint64_t sent = 0; + static uint64_t recvd = 0; + + sent += stats->bytes_tx; + recvd += stats->bytes_rx; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata", "aclk_openssl_bytes", NULL, "aclk", NULL, "Received and Sent bytes.", "B/s", + "netdata", "stats", 200011, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + rd_sent = rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_recvd = rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_sent, sent); + rrddim_set_by_pointer(st, rd_recvd, recvd); + + rrdset_done(st); +} + +void aclk_stats_thread_prepare(int query_thread_count, unsigned int proto_hdl_cnt) +{ + aclk_qt_data = callocz(query_thread_count, sizeof(struct aclk_qt_data)); + aclk_queries_per_thread = callocz(query_thread_count, sizeof(uint32_t)); + aclk_queries_per_thread_sample = callocz(query_thread_count, sizeof(uint32_t)); + + memset(&aclk_metrics_per_sample, 0, sizeof(struct aclk_metrics_per_sample)); + + aclk_stats_cfg.proto_hdl_cnt = proto_hdl_cnt; + aclk_stats_cfg.aclk_proto_rx_msgs_sample = callocz(proto_hdl_cnt, sizeof(*aclk_proto_rx_msgs_sample)); + aclk_proto_rx_msgs_sample = callocz(proto_hdl_cnt, sizeof(*aclk_proto_rx_msgs_sample)); + aclk_stats_cfg.rx_msg_dims = callocz(proto_hdl_cnt, sizeof(RRDDIM*)); +} + +void aclk_stats_thread_cleanup() +{ + freez(aclk_stats_cfg.rx_msg_dims); + freez(aclk_proto_rx_msgs_sample); + freez(aclk_stats_cfg.aclk_proto_rx_msgs_sample); + freez(aclk_qt_data); + freez(aclk_queries_per_thread); + freez(aclk_queries_per_thread_sample); +} + +void *aclk_stats_main_thread(void *ptr) +{ + struct aclk_stats_thread *args = ptr; + + aclk_stats_cfg.query_thread_count = args->query_thread_count; + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step_ut = localhost->rrd_update_every * USEC_PER_SEC; + + struct aclk_metrics_per_sample per_sample; + struct aclk_metrics permanent; + + while (!netdata_exit) { + netdata_thread_testcancel(); + // ------------------------------------------------------------------------ + // Wait for the next iteration point. + + heartbeat_next(&hb, step_ut); + if (netdata_exit) break; + + ACLK_STATS_LOCK; + // to not hold lock longer than necessary, especially not to hold it + // during database rrd* operations + memcpy(&per_sample, &aclk_metrics_per_sample, sizeof(struct aclk_metrics_per_sample)); + + memcpy(aclk_stats_cfg.aclk_proto_rx_msgs_sample, aclk_proto_rx_msgs_sample, sizeof(*aclk_proto_rx_msgs_sample) * aclk_stats_cfg.proto_hdl_cnt); + memset(aclk_proto_rx_msgs_sample, 0, sizeof(*aclk_proto_rx_msgs_sample) * aclk_stats_cfg.proto_hdl_cnt); + + memcpy(&permanent, &aclk_metrics, sizeof(struct aclk_metrics)); + memset(&aclk_metrics_per_sample, 0, sizeof(struct aclk_metrics_per_sample)); + + memcpy(aclk_queries_per_thread_sample, aclk_queries_per_thread, sizeof(uint32_t) * aclk_stats_cfg.query_thread_count); + memset(aclk_queries_per_thread, 0, sizeof(uint32_t) * aclk_stats_cfg.query_thread_count); + ACLK_STATS_UNLOCK; + + aclk_stats_collect(&per_sample, &permanent); + aclk_stats_query_queue(&per_sample); +#ifdef NETDATA_INTERNAL_CHECKS + aclk_stats_latency(&per_sample); +#endif + + aclk_stats_cloud_req(&per_sample); + aclk_stats_cloud_req_type(&per_sample); + aclk_stats_cloud_req_http_type(&per_sample); + + aclk_stats_query_threads(aclk_queries_per_thread_sample); + + aclk_stats_query_time(&per_sample); + + struct mqtt_wss_stats mqtt_wss_stats = mqtt_wss_get_stats(args->client); + aclk_stats_mqtt_wss(&mqtt_wss_stats); + + aclk_stats_newproto_rx(aclk_stats_cfg.aclk_proto_rx_msgs_sample); + } + + return 0; +} + +void aclk_stats_upd_online(int online) { + if(!aclk_stats_enabled) + return; + + ACLK_STATS_LOCK; + aclk_metrics.online = online; + + if(!online) + aclk_metrics_per_sample.offline_during_sample = 1; + ACLK_STATS_UNLOCK; +} + +#ifdef NETDATA_INTERNAL_CHECKS +static usec_t pub_time[UINT16_MAX + 1] = {0}; +void aclk_stats_msg_published(uint16_t id) +{ + ACLK_STATS_LOCK; + pub_time[id] = now_boottime_usec(); + ACLK_STATS_UNLOCK; +} + +void aclk_stats_msg_puback(uint16_t id) +{ + ACLK_STATS_LOCK; + usec_t t; + + if (!aclk_stats_enabled) { + ACLK_STATS_UNLOCK; + return; + } + + if (unlikely(!pub_time[id])) { + ACLK_STATS_UNLOCK; + error("Received PUBACK for unknown message?!"); + return; + } + + t = now_boottime_usec() - pub_time[id]; + t /= USEC_PER_MS; + pub_time[id] = 0; + if (aclk_metrics_per_sample.latency_max < t) + aclk_metrics_per_sample.latency_max = t; + + aclk_metrics_per_sample.latency_total += t; + aclk_metrics_per_sample.latency_count++; + ACLK_STATS_UNLOCK; +} +#endif /* NETDATA_INTERNAL_CHECKS */ diff --git a/aclk/aclk_stats.h b/aclk/aclk_stats.h new file mode 100644 index 0000000..bec9ac2 --- /dev/null +++ b/aclk/aclk_stats.h @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ACLK_STATS_H +#define NETDATA_ACLK_STATS_H + +#include "daemon/common.h" +#include "libnetdata/libnetdata.h" +#include "aclk_query_queue.h" +#include "mqtt_wss_client.h" + +#define ACLK_STATS_THREAD_NAME "ACLK_Stats" + +extern netdata_mutex_t aclk_stats_mutex; + +#define ACLK_STATS_LOCK netdata_mutex_lock(&aclk_stats_mutex) +#define ACLK_STATS_UNLOCK netdata_mutex_unlock(&aclk_stats_mutex) + +// if you change update `cloud_req_http_type_names`. +#define ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT 7 + +int aclk_cloud_req_http_type_to_idx(const char *name); + +struct aclk_stats_thread { + netdata_thread_t *thread; + int query_thread_count; + mqtt_wss_client client; +}; + +// preserve between samples +struct aclk_metrics { + volatile uint8_t online; +}; + +// reset to 0 on every sample +extern struct aclk_metrics_per_sample { + /* in the unlikely event of ACLK disconnecting + and reconnecting under 1 sampling rate + we want to make sure we record the disconnection + despite it being then seemingly longer in graph */ + volatile uint8_t offline_during_sample; + + volatile uint32_t queries_queued; + volatile uint32_t queries_dispatched; + +#ifdef NETDATA_INTERNAL_CHECKS + volatile uint32_t latency_max; + volatile uint32_t latency_total; + volatile uint32_t latency_count; +#endif + + volatile uint32_t cloud_req_recvd; + volatile uint32_t cloud_req_err; + + // query types. + volatile uint32_t queries_per_type[ACLK_QUERY_TYPE_COUNT]; + + // HTTP-specific request types. + volatile uint32_t cloud_req_http_by_type[ACLK_STATS_CLOUD_HTTP_REQ_TYPE_CNT]; + + volatile uint32_t cloud_q_process_total; + volatile uint32_t cloud_q_process_count; + volatile uint32_t cloud_q_process_max; +} aclk_metrics_per_sample; + +extern uint32_t *aclk_proto_rx_msgs_sample; + +extern uint32_t *aclk_queries_per_thread; + +void *aclk_stats_main_thread(void *ptr); +void aclk_stats_thread_prepare(int query_thread_count, unsigned int proto_hdl_cnt); +void aclk_stats_thread_cleanup(); +void aclk_stats_upd_online(int online); + +#ifdef NETDATA_INTERNAL_CHECKS +void aclk_stats_msg_published(uint16_t id); +void aclk_stats_msg_puback(uint16_t id); +#endif /* NETDATA_INTERNAL_CHECKS */ + +#endif /* NETDATA_ACLK_STATS_H */ diff --git a/aclk/aclk_tx_msgs.c b/aclk/aclk_tx_msgs.c new file mode 100644 index 0000000..532b964 --- /dev/null +++ b/aclk/aclk_tx_msgs.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_tx_msgs.h" +#include "daemon/common.h" +#include "aclk_util.h" +#include "aclk_stats.h" +#include "aclk.h" +#include "aclk_capas.h" + +#include "schema-wrappers/proto_2_json.h" + +#ifndef __GNUC__ +#pragma region aclk_tx_msgs helper functions +#endif + +// version for aclk legacy (old cloud arch) +#define ACLK_VERSION 2 + +static void freez_aclk_publish5a(void *ptr) { + freez(ptr); +} +static void freez_aclk_publish5b(void *ptr) { + freez(ptr); +} + +uint16_t aclk_send_bin_message_subtopic_pid(mqtt_wss_client client, char *msg, size_t msg_len, enum aclk_topics subtopic, const char *msgname) +{ +#ifndef ACLK_LOG_CONVERSATION_DIR + UNUSED(msgname); +#endif + uint16_t packet_id; + const char *topic = aclk_get_topic(subtopic); + + if (unlikely(!topic)) { + error("Couldn't get topic. Aborting message send."); + return 0; + } + + mqtt_wss_publish5(client, (char*)topic, NULL, msg, &freez_aclk_publish5a, msg_len, MQTT_WSS_PUB_QOS1, &packet_id); + +#ifdef NETDATA_INTERNAL_CHECKS + aclk_stats_msg_published(packet_id); +#endif + + if (aclklog_enabled) { + char *json = protomsg_to_json(msg, msg_len, msgname); + log_aclk_message_bin(json, strlen(json), 1, topic, msgname); + freez(json); + } + + return packet_id; +} + +// json_object_put returns int unfortunately :D +// we need void(*fnc)(void *); +static void json_object_put_wrapper(void *jsonobj) +{ + json_object_put(jsonobj); +} + +#define TOPIC_MAX_LEN 512 +#define V2_BIN_PAYLOAD_SEPARATOR "\x0D\x0A\x0D\x0A" +static int aclk_send_message_with_bin_payload(mqtt_wss_client client, json_object *msg, const char *topic, const void *payload, size_t payload_len) +{ + uint16_t packet_id; + const char *str; + char *full_msg = NULL; + int len; + + if (unlikely(!topic || topic[0] != '/')) { + error ("Full topic required!"); + json_object_put(msg); + return HTTP_RESP_INTERNAL_SERVER_ERROR; + } + + str = json_object_to_json_string_ext(msg, JSON_C_TO_STRING_PLAIN); + len = strlen(str); + + if (payload_len) { + full_msg = mallocz(len + strlen(V2_BIN_PAYLOAD_SEPARATOR) + payload_len); + + memcpy(full_msg, str, len); + json_object_put(msg); + msg = NULL; + memcpy(&full_msg[len], V2_BIN_PAYLOAD_SEPARATOR, strlen(V2_BIN_PAYLOAD_SEPARATOR)); + len += strlen(V2_BIN_PAYLOAD_SEPARATOR); + memcpy(&full_msg[len], payload, payload_len); + len += payload_len; + } + + mqtt_wss_publish5(client, (char*)topic, NULL, (char*)(payload_len ? full_msg : str), (payload_len ? &freez_aclk_publish5b : &json_object_put_wrapper), len, MQTT_WSS_PUB_QOS1, &packet_id); + +#ifdef NETDATA_INTERNAL_CHECKS + aclk_stats_msg_published(packet_id); +#endif + + return 0; +} + +/* + * Creates universal header common for all ACLK messages. User gets ownership of json object created. + * Usually this is freed by send function after message has been sent. + */ +static struct json_object *create_hdr(const char *type, const char *msg_id, time_t ts_secs, usec_t ts_us, int version) +{ + uuid_t uuid; + char uuid_str[36 + 1]; + json_object *tmp; + json_object *obj = json_object_new_object(); + + tmp = json_object_new_string(type); + json_object_object_add(obj, "type", tmp); + + if (unlikely(!msg_id)) { + uuid_generate(uuid); + uuid_unparse(uuid, uuid_str); + msg_id = uuid_str; + } + + if (ts_secs == 0) { + ts_us = now_realtime_usec(); + ts_secs = ts_us / USEC_PER_SEC; + ts_us = ts_us % USEC_PER_SEC; + } + + tmp = json_object_new_string(msg_id); + json_object_object_add(obj, "msg-id", tmp); + + tmp = json_object_new_int64(ts_secs); + json_object_object_add(obj, "timestamp", tmp); + +// TODO handle this somehow on older json-c +// tmp = json_object_new_uint64(ts_us); +// probably jso->_to_json_string -> custom function +// jso->o.c_uint64 -> map this with pointer to signed int +// commit that implements json_object_new_uint64 is 3c3b592 +// between 0.14 and 0.15 + tmp = json_object_new_int64(ts_us); + json_object_object_add(obj, "timestamp-offset-usec", tmp); + + tmp = json_object_new_int64(aclk_session_sec); + json_object_object_add(obj, "connect", tmp); + +// TODO handle this somehow see above +// tmp = json_object_new_uint64(0 /* TODO aclk_session_us */); + tmp = json_object_new_int64(aclk_session_us); + json_object_object_add(obj, "connect-offset-usec", tmp); + + tmp = json_object_new_int(version); + json_object_object_add(obj, "version", tmp); + + return obj; +} + +#ifndef __GNUC__ +#pragma endregion +#endif + +#ifndef __GNUC__ +#pragma region aclk_tx_msgs message generators +#endif + +void aclk_http_msg_v2_err(mqtt_wss_client client, const char *topic, const char *msg_id, int http_code, int ec, const char* emsg, const char *payload, size_t payload_len) +{ + json_object *tmp, *msg; + msg = create_hdr("http", msg_id, 0, 0, 2); + tmp = json_object_new_int(http_code); + json_object_object_add(msg, "http-code", tmp); + + tmp = json_object_new_int(ec); + json_object_object_add(msg, "error-code", tmp); + + tmp = json_object_new_string(emsg); + json_object_object_add(msg, "error-description", tmp); + + if (aclk_send_message_with_bin_payload(client, msg, topic, payload, payload_len)) { + error("Failed to send cancelation message for http reply"); + } +} + +void aclk_http_msg_v2(mqtt_wss_client client, const char *topic, const char *msg_id, usec_t t_exec, usec_t created, int http_code, const char *payload, size_t payload_len) +{ + json_object *tmp, *msg; + + msg = create_hdr("http", msg_id, 0, 0, 2); + + tmp = json_object_new_int64(t_exec); + json_object_object_add(msg, "t-exec", tmp); + + tmp = json_object_new_int64(created); + json_object_object_add(msg, "t-rx", tmp); + + tmp = json_object_new_int(http_code); + json_object_object_add(msg, "http-code", tmp); + + int rc = aclk_send_message_with_bin_payload(client, msg, topic, payload, payload_len); + + switch (rc) { + case HTTP_RESP_FORBIDDEN: + aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_REQ_REPLY_TOO_BIG, CLOUD_EMSG_REQ_REPLY_TOO_BIG, payload, payload_len); + break; + case HTTP_RESP_INTERNAL_SERVER_ERROR: + aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_FAIL_TOPIC, CLOUD_EMSG_FAIL_TOPIC, payload, payload_len); + break; + case HTTP_RESP_BACKEND_FETCH_FAILED: + aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, payload, payload_len); + break; + } +} + +uint16_t aclk_send_agent_connection_update(mqtt_wss_client client, int reachable) { + size_t len; + uint16_t pid; + + update_agent_connection_t conn = { + .reachable = (reachable ? 1 : 0), + .lwt = 0, + .session_id = aclk_session_newarch, + .capabilities = aclk_get_agent_capas() + }; + + rrdhost_aclk_state_lock(localhost); + if (unlikely(!localhost->aclk_state.claimed_id)) { + error("Internal error. Should not come here if not claimed"); + rrdhost_aclk_state_unlock(localhost); + return 0; + } + if (localhost->aclk_state.prev_claimed_id) + conn.claim_id = localhost->aclk_state.prev_claimed_id; + else + conn.claim_id = localhost->aclk_state.claimed_id; + + char *msg = generate_update_agent_connection(&len, &conn); + rrdhost_aclk_state_unlock(localhost); + + if (!msg) { + error("Error generating agent::v1::UpdateAgentConnection payload"); + return 0; + } + + pid = aclk_send_bin_message_subtopic_pid(client, msg, len, ACLK_TOPICID_AGENT_CONN, "UpdateAgentConnection"); + if (localhost->aclk_state.prev_claimed_id) { + freez(localhost->aclk_state.prev_claimed_id); + localhost->aclk_state.prev_claimed_id = NULL; + } + return pid; +} + +char *aclk_generate_lwt(size_t *size) { + update_agent_connection_t conn = { + .reachable = 0, + .lwt = 1, + .session_id = aclk_session_newarch, + .capabilities = NULL + }; + + rrdhost_aclk_state_lock(localhost); + if (unlikely(!localhost->aclk_state.claimed_id)) { + error("Internal error. Should not come here if not claimed"); + rrdhost_aclk_state_unlock(localhost); + return NULL; + } + conn.claim_id = localhost->aclk_state.claimed_id; + + char *msg = generate_update_agent_connection(size, &conn); + rrdhost_aclk_state_unlock(localhost); + + if (!msg) + error("Error generating agent::v1::UpdateAgentConnection payload for LWT"); + + return msg; +} + +#ifndef __GNUC__ +#pragma endregion +#endif diff --git a/aclk/aclk_tx_msgs.h b/aclk/aclk_tx_msgs.h new file mode 100644 index 0000000..31e5924 --- /dev/null +++ b/aclk/aclk_tx_msgs.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#ifndef ACLK_TX_MSGS_H +#define ACLK_TX_MSGS_H + +#include +#include "libnetdata/libnetdata.h" +#include "daemon/common.h" +#include "mqtt_wss_client.h" +#include "schema-wrappers/schema_wrappers.h" +#include "aclk_util.h" + +uint16_t aclk_send_bin_message_subtopic_pid(mqtt_wss_client client, char *msg, size_t msg_len, enum aclk_topics subtopic, const char *msgname); + +void aclk_http_msg_v2_err(mqtt_wss_client client, const char *topic, const char *msg_id, int http_code, int ec, const char* emsg, const char *payload, size_t payload_len); +void aclk_http_msg_v2(mqtt_wss_client client, const char *topic, const char *msg_id, usec_t t_exec, usec_t created, int http_code, const char *payload, size_t payload_len); + +uint16_t aclk_send_agent_connection_update(mqtt_wss_client client, int reachable); +char *aclk_generate_lwt(size_t *size); + +#endif diff --git a/aclk/aclk_util.c b/aclk/aclk_util.c new file mode 100644 index 0000000..01eaedc --- /dev/null +++ b/aclk/aclk_util.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "aclk_util.h" +#include "aclk_proxy.h" + +#include "daemon/common.h" + +usec_t aclk_session_newarch = 0; + +aclk_env_t *aclk_env = NULL; + +int chart_batch_id; + +aclk_encoding_type_t aclk_encoding_type_t_from_str(const char *str) { + if (!strcmp(str, "json")) { + return ACLK_ENC_JSON; + } + if (!strcmp(str, "proto")) { + return ACLK_ENC_PROTO; + } + return ACLK_ENC_UNKNOWN; +} + +aclk_transport_type_t aclk_transport_type_t_from_str(const char *str) { + if (!strcmp(str, "MQTTv3")) { + return ACLK_TRP_MQTT_3_1_1; + } + if (!strcmp(str, "MQTTv5")) { + return ACLK_TRP_MQTT_5; + } + return ACLK_TRP_UNKNOWN; +} + +void aclk_transport_desc_t_destroy(aclk_transport_desc_t *trp_desc) { + freez(trp_desc->endpoint); +} + +void aclk_env_t_destroy(aclk_env_t *env) { + freez(env->auth_endpoint); + if (env->transports) { + for (size_t i = 0; i < env->transport_count; i++) { + if(env->transports[i]) { + aclk_transport_desc_t_destroy(env->transports[i]); + freez(env->transports[i]); + env->transports[i] = NULL; + } + } + freez(env->transports); + } + if (env->capabilities) { + for (size_t i = 0; i < env->capability_count; i++) + freez(env->capabilities[i]); + freez(env->capabilities); + } +} + +int aclk_env_has_capa(const char *capa) +{ + for (int i = 0; i < (int) aclk_env->capability_count; i++) { + if (!strcasecmp(capa, aclk_env->capabilities[i])) + return 1; + } + return 0; +} + +#ifdef ACLK_LOG_CONVERSATION_DIR +volatile int aclk_conversation_log_counter = 0; +#endif + +#define ACLK_TOPIC_PREFIX "/agent/" + +struct aclk_topic { + enum aclk_topics topic_id; + // as received from cloud - we keep this for + // eventual topic list update when claim_id changes + char *topic_recvd; + // constructed topic + char *topic; +}; + +// This helps to cache finalized topics (assembled with claim_id) +// to not have to alloc or create buffer and construct topic every +// time message is sent as in old ACLK +static struct aclk_topic **aclk_topic_cache = NULL; +static size_t aclk_topic_cache_items = 0; + +void free_topic_cache(void) +{ + if (aclk_topic_cache) { + for (size_t i = 0; i < aclk_topic_cache_items; i++) { + freez(aclk_topic_cache[i]->topic); + freez(aclk_topic_cache[i]->topic_recvd); + freez(aclk_topic_cache[i]); + } + freez(aclk_topic_cache); + aclk_topic_cache = NULL; + aclk_topic_cache_items = 0; + } +} + +#define JSON_TOPIC_KEY_TOPIC "topic" +#define JSON_TOPIC_KEY_NAME "name" + +struct topic_name { + enum aclk_topics id; + // cloud name - how is it called + // in answer to /password endpoint + const char *name; +} topic_names[] = { + { .id = ACLK_TOPICID_CHART, .name = "chart" }, + { .id = ACLK_TOPICID_ALARMS, .name = "alarms" }, + { .id = ACLK_TOPICID_METADATA, .name = "meta" }, + { .id = ACLK_TOPICID_COMMAND, .name = "inbox-cmd" }, + { .id = ACLK_TOPICID_AGENT_CONN, .name = "agent-connection" }, + { .id = ACLK_TOPICID_CMD_NG_V1, .name = "inbox-cmd-v1" }, + { .id = ACLK_TOPICID_CREATE_NODE, .name = "create-node-instance" }, + { .id = ACLK_TOPICID_NODE_CONN, .name = "node-instance-connection" }, + { .id = ACLK_TOPICID_CHART_DIMS, .name = "chart-and-dims-updated" }, + { .id = ACLK_TOPICID_CHART_CONFIGS_UPDATED, .name = "chart-configs-updated" }, + { .id = ACLK_TOPICID_CHART_RESET, .name = "reset-charts" }, + { .id = ACLK_TOPICID_RETENTION_UPDATED, .name = "chart-retention-updated" }, + { .id = ACLK_TOPICID_NODE_INFO, .name = "node-instance-info" }, + { .id = ACLK_TOPICID_ALARM_LOG, .name = "alarm-log" }, + { .id = ACLK_TOPICID_ALARM_HEALTH, .name = "alarm-health" }, + { .id = ACLK_TOPICID_ALARM_CONFIG, .name = "alarm-config" }, + { .id = ACLK_TOPICID_ALARM_SNAPSHOT, .name = "alarm-snapshot" }, + { .id = ACLK_TOPICID_NODE_COLLECTORS, .name = "node-instance-collectors" }, + { .id = ACLK_TOPICID_CTXS_SNAPSHOT, .name = "contexts-snapshot" }, + { .id = ACLK_TOPICID_CTXS_UPDATED, .name = "contexts-updated" }, + { .id = ACLK_TOPICID_UNKNOWN, .name = NULL } +}; + +enum aclk_topics compulsory_topics[] = { +// TODO remove old topics once not needed anymore + ACLK_TOPICID_CHART, //TODO from legacy + ACLK_TOPICID_ALARMS, //TODO from legacy + ACLK_TOPICID_METADATA, //TODO from legacy + ACLK_TOPICID_COMMAND, + ACLK_TOPICID_AGENT_CONN, + ACLK_TOPICID_CMD_NG_V1, + ACLK_TOPICID_CREATE_NODE, + ACLK_TOPICID_NODE_CONN, + ACLK_TOPICID_CHART_DIMS, + ACLK_TOPICID_CHART_CONFIGS_UPDATED, + ACLK_TOPICID_CHART_RESET, + ACLK_TOPICID_RETENTION_UPDATED, + ACLK_TOPICID_NODE_INFO, + ACLK_TOPICID_ALARM_LOG, + ACLK_TOPICID_ALARM_HEALTH, + ACLK_TOPICID_ALARM_CONFIG, + ACLK_TOPICID_ALARM_SNAPSHOT, + ACLK_TOPICID_NODE_COLLECTORS, + ACLK_TOPICID_CTXS_SNAPSHOT, + ACLK_TOPICID_CTXS_UPDATED, + ACLK_TOPICID_UNKNOWN +}; + +static enum aclk_topics topic_name_to_id(const char *name) { + struct topic_name *topic = topic_names; + while (topic->name) { + if (!strcmp(topic->name, name)) { + return topic->id; + } + topic++; + } + return ACLK_TOPICID_UNKNOWN; +} + +static const char *topic_id_to_name(enum aclk_topics tid) { + struct topic_name *topic = topic_names; + while (topic->name) { + if (topic->id == tid) + return topic->name; + topic++; + } + return "unknown"; +} + +#define CLAIM_ID_REPLACE_TAG "#{claim_id}" +static void topic_generate_final(struct aclk_topic *t) { + char *dest; + char *replace_tag = strstr(t->topic_recvd, CLAIM_ID_REPLACE_TAG); + if (!replace_tag) + return; + + rrdhost_aclk_state_lock(localhost); + if (unlikely(!localhost->aclk_state.claimed_id)) { + error("This should never be called if agent not claimed"); + rrdhost_aclk_state_unlock(localhost); + return; + } + + t->topic = mallocz(strlen(t->topic_recvd) + 1 - strlen(CLAIM_ID_REPLACE_TAG) + strlen(localhost->aclk_state.claimed_id)); + memcpy(t->topic, t->topic_recvd, replace_tag - t->topic_recvd); + dest = t->topic + (replace_tag - t->topic_recvd); + + memcpy(dest, localhost->aclk_state.claimed_id, strlen(localhost->aclk_state.claimed_id)); + dest += strlen(localhost->aclk_state.claimed_id); + rrdhost_aclk_state_unlock(localhost); + replace_tag += strlen(CLAIM_ID_REPLACE_TAG); + strcpy(dest, replace_tag); + dest += strlen(replace_tag); + *dest = 0; +} + +static int topic_cache_add_topic(struct json_object *json, struct aclk_topic *topic) +{ + struct json_object_iterator it; + struct json_object_iterator itEnd; + + it = json_object_iter_begin(json); + itEnd = json_object_iter_end(json); + + while (!json_object_iter_equal(&it, &itEnd)) { + if (!strcmp(json_object_iter_peek_name(&it), JSON_TOPIC_KEY_NAME)) { + if (json_object_get_type(json_object_iter_peek_value(&it)) != json_type_string) { + error("topic dictionary key \"" JSON_TOPIC_KEY_NAME "\" is expected to be json_type_string"); + return 1; + } + topic->topic_id = topic_name_to_id(json_object_get_string(json_object_iter_peek_value(&it))); + if (topic->topic_id == ACLK_TOPICID_UNKNOWN) { + debug(D_ACLK, "topic dictionary has unknown topic name \"%s\"", json_object_get_string(json_object_iter_peek_value(&it))); + } + json_object_iter_next(&it); + continue; + } + if (!strcmp(json_object_iter_peek_name(&it), JSON_TOPIC_KEY_TOPIC)) { + if (json_object_get_type(json_object_iter_peek_value(&it)) != json_type_string) { + error("topic dictionary key \"" JSON_TOPIC_KEY_TOPIC "\" is expected to be json_type_string"); + return 1; + } + topic->topic_recvd = strdupz(json_object_get_string(json_object_iter_peek_value(&it))); + json_object_iter_next(&it); + continue; + } + + error("topic dictionary has Unknown/Unexpected key \"%s\" in topic description. Ignoring!", json_object_iter_peek_name(&it)); + json_object_iter_next(&it); + } + + if (!topic->topic_recvd) { + error("topic dictionary Missig compulsory key %s", JSON_TOPIC_KEY_TOPIC); + return 1; + } + + topic_generate_final(topic); + aclk_topic_cache_items++; + + return 0; +} + +int aclk_generate_topic_cache(struct json_object *json) +{ + json_object *obj; + + size_t array_size = json_object_array_length(json); + if (!array_size) { + error("Empty topic list!"); + return 1; + } + + if (aclk_topic_cache) + free_topic_cache(); + + aclk_topic_cache = callocz(array_size, sizeof(struct aclk_topic *)); + + for (size_t i = 0; i < array_size; i++) { + obj = json_object_array_get_idx(json, i); + if (json_object_get_type(obj) != json_type_object) { + error("expected json_type_object"); + return 1; + } + aclk_topic_cache[i] = callocz(1, sizeof(struct aclk_topic)); + if (topic_cache_add_topic(obj, aclk_topic_cache[i])) { + error("failed to parse topic @idx=%d", (int)i); + return 1; + } + } + + for (int i = 0; compulsory_topics[i] != ACLK_TOPICID_UNKNOWN; i++) { + if (!aclk_get_topic(compulsory_topics[i])) { + error("missing compulsory topic \"%s\" in password response from cloud", topic_id_to_name(compulsory_topics[i])); + return 1; + } + } + + return 0; +} + +/* + * Build a topic based on sub_topic and final_topic + * if the sub topic starts with / assume that is an absolute topic + * + */ +const char *aclk_get_topic(enum aclk_topics topic) +{ + if (!aclk_topic_cache) { + error("Topic cache not initialized"); + return NULL; + } + + for (size_t i = 0; i < aclk_topic_cache_items; i++) { + if (aclk_topic_cache[i]->topic_id == topic) + return aclk_topic_cache[i]->topic; + } + error("Unknown topic"); + return NULL; +} + +/* + * TBEB with randomness + * + * @param reset 1 - to reset the delay, + * 0 - to advance a step and calculate sleep time in ms + * @param min, max in seconds + * @returns delay in ms + * + */ + +unsigned long int aclk_tbeb_delay(int reset, int base, unsigned long int min, unsigned long int max) { + static int attempt = -1; + + if (reset) { + attempt = -1; + return 0; + } + + attempt++; + + if (attempt == 0) { + srandom(time(NULL)); + return 0; + } + + unsigned long int delay = pow(base, attempt - 1); + delay *= MSEC_PER_SEC; + + delay += (random() % (MAX(1000, delay/2))); + + if (delay <= min * MSEC_PER_SEC) + return min; + + if (delay >= max * MSEC_PER_SEC) + return max; + + return delay; +} + + +#define HTTP_PROXY_PREFIX "http://" +void aclk_set_proxy(char **ohost, int *port, enum mqtt_wss_proxy_type *type) +{ + ACLK_PROXY_TYPE pt; + const char *ptr = aclk_get_proxy(&pt); + char *tmp; + char *host; + if (pt != PROXY_TYPE_HTTP) + return; + + *port = 0; + + if (!strncmp(ptr, HTTP_PROXY_PREFIX, strlen(HTTP_PROXY_PREFIX))) + ptr += strlen(HTTP_PROXY_PREFIX); + + if ((tmp = strchr(ptr, '@'))) + ptr = tmp; + + if ((tmp = strchr(ptr, '/'))) { + host = mallocz((tmp - ptr) + 1); + memcpy(host, ptr, (tmp - ptr)); + host[tmp - ptr] = 0; + } else + host = strdupz(ptr); + + if ((tmp = strchr(host, ':'))) { + *tmp = 0; + tmp++; + *port = atoi(tmp); + } + + if (*port <= 0 || *port > 65535) + *port = 8080; + + *ohost = host; + + if (type) + *type = MQTT_WSS_PROXY_HTTP; +} diff --git a/aclk/aclk_util.h b/aclk/aclk_util.h new file mode 100644 index 0000000..ed715e0 --- /dev/null +++ b/aclk/aclk_util.h @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#ifndef ACLK_UTIL_H +#define ACLK_UTIL_H + +#include "libnetdata/libnetdata.h" +#include "mqtt_wss_client.h" + +#define CLOUD_EC_MALFORMED_NODE_ID 1 +#define CLOUD_EMSG_MALFORMED_NODE_ID "URL requests node_id but there is not enough chars following (for it to be valid uuid)." +#define CLOUD_EC_NODE_NOT_FOUND 2 +#define CLOUD_EMSG_NODE_NOT_FOUND "Node with requested node_id not found" +#define CLOUD_EC_ZLIB_ERROR 3 +#define CLOUD_EMSG_ZLIB_ERROR "Error during zlib compression" +#define CLOUD_EC_REQ_REPLY_TOO_BIG 4 +#define CLOUD_EMSG_REQ_REPLY_TOO_BIG "Request reply produces message bigger than allowed maximum" +#define CLOUD_EC_FAIL_TOPIC 5 +#define CLOUD_EMSG_FAIL_TOPIC "Internal Topic Error" +#define CLOUD_EC_SND_TIMEOUT 6 +#define CLOUD_EMSG_SND_TIMEOUT "Timeout sending binpacked message" + +// Helper stuff which should not have any further inside ACLK dependency +// and are supposed not to be needed outside of ACLK +extern usec_t aclk_session_newarch; + +extern int chart_batch_id; + +typedef enum { + ACLK_ENC_UNKNOWN = 0, + ACLK_ENC_JSON, + ACLK_ENC_PROTO +} aclk_encoding_type_t; + +typedef enum { + ACLK_TRP_UNKNOWN = 0, + ACLK_TRP_MQTT_3_1_1, + ACLK_TRP_MQTT_5 +} aclk_transport_type_t; + +typedef struct { + char *endpoint; + aclk_transport_type_t type; +} aclk_transport_desc_t; + +typedef struct { + int base; + int max_s; + int min_s; +} aclk_backoff_t; + +typedef struct { + char *auth_endpoint; + aclk_encoding_type_t encoding; + + aclk_transport_desc_t **transports; + size_t transport_count; + + char **capabilities; + size_t capability_count; + + aclk_backoff_t backoff; +} aclk_env_t; + +extern aclk_env_t *aclk_env; + +aclk_encoding_type_t aclk_encoding_type_t_from_str(const char *str); +aclk_transport_type_t aclk_transport_type_t_from_str(const char *str); + +void aclk_transport_desc_t_destroy(aclk_transport_desc_t *trp_desc); +void aclk_env_t_destroy(aclk_env_t *env); +int aclk_env_has_capa(const char *capa); + +enum aclk_topics { + ACLK_TOPICID_UNKNOWN = 0, + ACLK_TOPICID_CHART = 1, + ACLK_TOPICID_ALARMS = 2, + ACLK_TOPICID_METADATA = 3, + ACLK_TOPICID_COMMAND = 4, + ACLK_TOPICID_AGENT_CONN = 5, + ACLK_TOPICID_CMD_NG_V1 = 6, + ACLK_TOPICID_CREATE_NODE = 7, + ACLK_TOPICID_NODE_CONN = 8, + ACLK_TOPICID_CHART_DIMS = 9, + ACLK_TOPICID_CHART_CONFIGS_UPDATED = 10, + ACLK_TOPICID_CHART_RESET = 11, + ACLK_TOPICID_RETENTION_UPDATED = 12, + ACLK_TOPICID_NODE_INFO = 13, + ACLK_TOPICID_ALARM_LOG = 14, + ACLK_TOPICID_ALARM_HEALTH = 15, + ACLK_TOPICID_ALARM_CONFIG = 16, + ACLK_TOPICID_ALARM_SNAPSHOT = 17, + ACLK_TOPICID_NODE_COLLECTORS = 18, + ACLK_TOPICID_CTXS_SNAPSHOT = 19, + ACLK_TOPICID_CTXS_UPDATED = 20 +}; + +const char *aclk_get_topic(enum aclk_topics topic); +int aclk_generate_topic_cache(struct json_object *json); +void free_topic_cache(void); +// TODO +// aclk_topics_reload //when claim id changes + +#ifdef ACLK_LOG_CONVERSATION_DIR +extern volatile int aclk_conversation_log_counter; +#define ACLK_GET_CONV_LOG_NEXT() __atomic_fetch_add(&aclk_conversation_log_counter, 1, __ATOMIC_SEQ_CST) +#endif + +unsigned long int aclk_tbeb_delay(int reset, int base, unsigned long int min, unsigned long int max); +#define aclk_tbeb_reset(x) aclk_tbeb_delay(1, 0, 0, 0) + +void aclk_set_proxy(char **ohost, int *port, enum mqtt_wss_proxy_type *type); + +#endif /* ACLK_UTIL_H */ diff --git a/aclk/helpers/mqtt_wss_pal.h b/aclk/helpers/mqtt_wss_pal.h new file mode 100644 index 0000000..5c89f8b --- /dev/null +++ b/aclk/helpers/mqtt_wss_pal.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef MQTT_WSS_PAL_H +#define MQTT_WSS_PAL_H + +#include "libnetdata/libnetdata.h" + +#undef OPENSSL_VERSION_095 +#undef OPENSSL_VERSION_097 +#undef OPENSSL_VERSION_110 +#undef OPENSSL_VERSION_111 + +#define mw_malloc(...) mallocz(__VA_ARGS__) +#define mw_calloc(...) callocz(__VA_ARGS__) +#define mw_free(...) freez(__VA_ARGS__) +#define mw_strdup(...) strdupz(__VA_ARGS__) +#define mw_realloc(...) reallocz(__VA_ARGS__) + +#endif /* MQTT_WSS_PAL_H */ diff --git a/aclk/helpers/ringbuffer_pal.h b/aclk/helpers/ringbuffer_pal.h new file mode 100644 index 0000000..2f7e1cb --- /dev/null +++ b/aclk/helpers/ringbuffer_pal.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef RINGBUFFER_PAL_H +#define RINGBUFFER_PAL_H + +#include "libnetdata/libnetdata.h" + +#define crbuf_malloc(...) mallocz(__VA_ARGS__) +#define crbuf_free(...) freez(__VA_ARGS__) + +#endif /* RINGBUFFER_PAL_H */ diff --git a/aclk/https_client.c b/aclk/https_client.c new file mode 100644 index 0000000..1a32f83 --- /dev/null +++ b/aclk/https_client.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" + +#include "https_client.h" + +#include "mqtt_websockets/c-rbuf/include/ringbuffer.h" + +enum http_parse_state { + HTTP_PARSE_INITIAL = 0, + HTTP_PARSE_HEADERS, + HTTP_PARSE_CONTENT +}; + +static const char *http_req_type_to_str(http_req_type_t req) { + switch (req) { + case HTTP_REQ_GET: + return "GET"; + case HTTP_REQ_POST: + return "POST"; + case HTTP_REQ_CONNECT: + return "CONNECT"; + default: + return "unknown"; + } +} + +typedef struct { + enum http_parse_state state; + int content_length; + int http_code; +} http_parse_ctx; + +#define HTTP_PARSE_CTX_INITIALIZER { .state = HTTP_PARSE_INITIAL, .content_length = -1, .http_code = 0 } +static inline void http_parse_ctx_clear(http_parse_ctx *ctx) { + ctx->state = HTTP_PARSE_INITIAL; + ctx->content_length = -1; + ctx->http_code = 0; +} + +#define POLL_TO_MS 100 + +#define NEED_MORE_DATA 0 +#define PARSE_SUCCESS 1 +#define PARSE_ERROR -1 +#define HTTP_LINE_TERM "\x0D\x0A" +#define RESP_PROTO "HTTP/1.1 " +#define HTTP_KEYVAL_SEPARATOR ": " +#define HTTP_HDR_BUFFER_SIZE 256 +#define PORT_STR_MAX_BYTES 12 + +static void process_http_hdr(http_parse_ctx *parse_ctx, const char *key, const char *val) +{ + // currently we care only about content-length + // but in future the way this is written + // it can be extended + if (!strcmp("content-length", key)) { + parse_ctx->content_length = atoi(val); + } +} + +static int parse_http_hdr(rbuf_t buf, http_parse_ctx *parse_ctx) +{ + int idx, idx_end; + char buf_key[HTTP_HDR_BUFFER_SIZE]; + char buf_val[HTTP_HDR_BUFFER_SIZE]; + char *ptr = buf_key; + if (!rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx_end)) { + error("CRLF expected"); + return 1; + } + + char *separator = rbuf_find_bytes(buf, HTTP_KEYVAL_SEPARATOR, strlen(HTTP_KEYVAL_SEPARATOR), &idx); + if (!separator) { + error("Missing Key/Value separator"); + return 1; + } + if (idx >= HTTP_HDR_BUFFER_SIZE) { + error("Key name is too long"); + return 1; + } + + rbuf_pop(buf, buf_key, idx); + buf_key[idx] = 0; + + rbuf_bump_tail(buf, strlen(HTTP_KEYVAL_SEPARATOR)); + idx_end -= strlen(HTTP_KEYVAL_SEPARATOR) + idx; + if (idx_end >= HTTP_HDR_BUFFER_SIZE) { + error("Value of key \"%s\" too long", buf_key); + return 1; + } + + rbuf_pop(buf, buf_val, idx_end); + buf_val[idx_end] = 0; + + for (ptr = buf_key; *ptr; ptr++) + *ptr = tolower(*ptr); + + process_http_hdr(parse_ctx, buf_key, buf_val); + + return 0; +} + +static int parse_http_response(rbuf_t buf, http_parse_ctx *parse_ctx) +{ + int idx; + char rc[4]; + + do { + if (parse_ctx->state != HTTP_PARSE_CONTENT && !rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx)) + return NEED_MORE_DATA; + switch (parse_ctx->state) { + case HTTP_PARSE_INITIAL: + if (rbuf_memcmp_n(buf, RESP_PROTO, strlen(RESP_PROTO))) { + error("Expected response to start with \"%s\"", RESP_PROTO); + return PARSE_ERROR; + } + rbuf_bump_tail(buf, strlen(RESP_PROTO)); + if (rbuf_pop(buf, rc, 4) != 4) { + error("Expected HTTP status code"); + return PARSE_ERROR; + } + if (rc[3] != ' ') { + error("Expected space after HTTP return code"); + return PARSE_ERROR; + } + rc[3] = 0; + parse_ctx->http_code = atoi(rc); + if (parse_ctx->http_code < 100 || parse_ctx->http_code >= 600) { + error("HTTP code not in range 100 to 599"); + return PARSE_ERROR; + } + + rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx); + + rbuf_bump_tail(buf, idx + strlen(HTTP_LINE_TERM)); + + parse_ctx->state = HTTP_PARSE_HEADERS; + break; + case HTTP_PARSE_HEADERS: + if (!idx) { + parse_ctx->state = HTTP_PARSE_CONTENT; + rbuf_bump_tail(buf, strlen(HTTP_LINE_TERM)); + break; + } + if (parse_http_hdr(buf, parse_ctx)) + return PARSE_ERROR; + rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx); + rbuf_bump_tail(buf, idx + strlen(HTTP_LINE_TERM)); + break; + case HTTP_PARSE_CONTENT: + // replies like CONNECT etc. do not have content + if (parse_ctx->content_length < 0) + return PARSE_SUCCESS; + + if (rbuf_bytes_available(buf) >= (size_t)parse_ctx->content_length) + return PARSE_SUCCESS; + return NEED_MORE_DATA; + } + } while(1); +} + +typedef struct https_req_ctx { + https_req_t *request; + + int sock; + rbuf_t buf_rx; + + struct pollfd poll_fd; + + SSL_CTX *ssl_ctx; + SSL *ssl; + + size_t written; + + int self_signed_allowed; + + http_parse_ctx parse_ctx; + + time_t req_start_time; +} https_req_ctx_t; + +static int https_req_check_timedout(https_req_ctx_t *ctx) { + if (now_realtime_sec() > ctx->req_start_time + ctx->request->timeout_s) { + error("request timed out"); + return 1; + } + return 0; +} + +static char *_ssl_err_tos(int err) +{ + switch(err){ + case SSL_ERROR_SSL: + return "SSL_ERROR_SSL"; + case SSL_ERROR_WANT_READ: + return "SSL_ERROR_WANT_READ"; + case SSL_ERROR_WANT_WRITE: + return "SSL_ERROR_WANT_WRITE"; + case SSL_ERROR_NONE: + return "SSL_ERROR_NONE"; + case SSL_ERROR_ZERO_RETURN: + return "SSL_ERROR_ZERO_RETURN"; + case SSL_ERROR_WANT_CONNECT: + return "SSL_ERROR_WANT_CONNECT"; + case SSL_ERROR_WANT_ACCEPT: + return "SSL_ERROR_WANT_ACCEPT"; + } + return "Unknown!!!"; +} + +static int socket_write_all(https_req_ctx_t *ctx, char *data, size_t data_len) { + ctx->written = 0; + ctx->poll_fd.events = POLLOUT; + + do { + int ret = poll(&ctx->poll_fd, 1, POLL_TO_MS); + if (ret < 0) { + error("poll error"); + return 1; + } + if (ret == 0) { + if (https_req_check_timedout(ctx)) { + error("Poll timed out"); + return 2; + } + continue; + } + + ret = write(ctx->sock, &data[ctx->written], data_len - ctx->written); + if (ret > 0) { + ctx->written += ret; + } else if (errno != EAGAIN && errno != EWOULDBLOCK) { + error("Error writing to socket"); + return 3; + } + } while (ctx->written < data_len); + + return 0; +} + +static int ssl_write_all(https_req_ctx_t *ctx, char *data, size_t data_len) { + ctx->written = 0; + ctx->poll_fd.events |= POLLOUT; + + do { + int ret = poll(&ctx->poll_fd, 1, POLL_TO_MS); + if (ret < 0) { + error("poll error"); + return 1; + } + if (ret == 0) { + if (https_req_check_timedout(ctx)) { + error("Poll timed out"); + return 2; + } + continue; + } + ctx->poll_fd.events = 0; + + ret = SSL_write(ctx->ssl, &data[ctx->written], data_len - ctx->written); + if (ret > 0) { + ctx->written += ret; + } else { + ret = SSL_get_error(ctx->ssl, ret); + switch (ret) { + case SSL_ERROR_WANT_READ: + ctx->poll_fd.events |= POLLIN; + break; + case SSL_ERROR_WANT_WRITE: + ctx->poll_fd.events |= POLLOUT; + break; + default: + error("SSL_write Err: %s", _ssl_err_tos(ret)); + return 3; + } + } + } while (ctx->written < data_len); + + return 0; +} + +static inline int https_client_write_all(https_req_ctx_t *ctx, char *data, size_t data_len) { + if (ctx->ssl_ctx) + return ssl_write_all(ctx, data, data_len); + return socket_write_all(ctx, data, data_len); +} + +static int read_parse_response(https_req_ctx_t *ctx) { + int ret; + char *ptr; + size_t size; + + ctx->poll_fd.events = POLLIN; + do { + ret = poll(&ctx->poll_fd, 1, POLL_TO_MS); + if (ret < 0) { + error("poll error"); + return 1; + } + if (ret == 0) { + if (https_req_check_timedout(ctx)) { + error("Poll timed out"); + return 2; + } + if (!ctx->ssl_ctx) + continue; + } + ctx->poll_fd.events = 0; + + ptr = rbuf_get_linear_insert_range(ctx->buf_rx, &size); + + if (ctx->ssl_ctx) + ret = SSL_read(ctx->ssl, ptr, size); + else + ret = read(ctx->sock, ptr, size); + + if (ret > 0) { + rbuf_bump_head(ctx->buf_rx, ret); + } else { + if (ctx->ssl_ctx) { + ret = SSL_get_error(ctx->ssl, ret); + switch (ret) { + case SSL_ERROR_WANT_READ: + ctx->poll_fd.events |= POLLIN; + break; + case SSL_ERROR_WANT_WRITE: + ctx->poll_fd.events |= POLLOUT; + break; + default: + error("SSL_read Err: %s", _ssl_err_tos(ret)); + return 3; + } + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + error("write error"); + return 3; + } + ctx->poll_fd.events |= POLLIN; + } + } + } while (!(ret = parse_http_response(ctx->buf_rx, &ctx->parse_ctx))); + + if (ret != PARSE_SUCCESS) { + error("Error parsing HTTP response"); + return 1; + } + + return 0; +} + +#define TX_BUFFER_SIZE 8192 +#define RX_BUFFER_SIZE (TX_BUFFER_SIZE*2) +static int handle_http_request(https_req_ctx_t *ctx) { + BUFFER *hdr = buffer_create(TX_BUFFER_SIZE); + int rc = 0; + + http_parse_ctx_clear(&ctx->parse_ctx); + + // Prepare data to send + switch (ctx->request->request_type) { + case HTTP_REQ_CONNECT: + buffer_strcat(hdr, "CONNECT "); + break; + case HTTP_REQ_GET: + buffer_strcat(hdr, "GET "); + break; + case HTTP_REQ_POST: + buffer_strcat(hdr, "POST "); + break; + default: + error("Unknown HTTPS request type!"); + rc = 1; + goto err_exit; + } + + if (ctx->request->request_type == HTTP_REQ_CONNECT) { + buffer_strcat(hdr, ctx->request->host); + buffer_sprintf(hdr, ":%d", ctx->request->port); + } else { + buffer_strcat(hdr, ctx->request->url); + } + + buffer_strcat(hdr, " HTTP/1.1\x0D\x0A"); + + //TODO Headers! + if (ctx->request->request_type != HTTP_REQ_CONNECT) { + buffer_sprintf(hdr, "Host: %s\x0D\x0A", ctx->request->host); + } + buffer_strcat(hdr, "User-Agent: Netdata/rocks newhttpclient\x0D\x0A"); + + if (ctx->request->request_type == HTTP_REQ_POST && ctx->request->payload && ctx->request->payload_size) { + buffer_sprintf(hdr, "Content-Length: %zu\x0D\x0A", ctx->request->payload_size); + } + + buffer_strcat(hdr, "\x0D\x0A"); + + // Send the request + if (https_client_write_all(ctx, hdr->buffer, hdr->len)) { + error("Couldn't write HTTP request header into SSL connection"); + rc = 2; + goto err_exit; + } + + if (ctx->request->request_type == HTTP_REQ_POST && ctx->request->payload && ctx->request->payload_size) { + if (https_client_write_all(ctx, ctx->request->payload, ctx->request->payload_size)) { + error("Couldn't write payload into SSL connection"); + rc = 3; + goto err_exit; + } + } + + // Read The Response + if (read_parse_response(ctx)) { + error("Error reading or parsing response from server"); + rc = 4; + goto err_exit; + } + +err_exit: + buffer_free(hdr); + return rc; +} + +static int cert_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +{ + X509 *err_cert; + int err, depth; + char *err_str; + + if (!preverify_ok) { + err = X509_STORE_CTX_get_error(ctx); + depth = X509_STORE_CTX_get_error_depth(ctx); + err_cert = X509_STORE_CTX_get_current_cert(ctx); + err_str = X509_NAME_oneline(X509_get_subject_name(err_cert), NULL, 0); + + error("Cert Chain verify error:num=%d:%s:depth=%d:%s", err, + X509_verify_cert_error_string(err), depth, err_str); + + free(err_str); + } + +#ifdef ACLK_SSL_ALLOW_SELF_SIGNED + if (!preverify_ok && err == X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT) + { + preverify_ok = 1; + error("Self Signed Certificate Accepted as the agent was built with ACLK_SSL_ALLOW_SELF_SIGNED"); + } +#endif + + return preverify_ok; +} + +int https_request(https_req_t *request, https_req_response_t *response) { + int rc = 1, ret; + char connect_port_str[PORT_STR_MAX_BYTES]; + + const char *connect_host = request->proxy_host ? request->proxy_host : request->host; + int connect_port = request->proxy_host ? request->proxy_port : request->port; + struct timeval timeout = { .tv_sec = request->timeout_s, .tv_usec = 0 }; + + https_req_ctx_t *ctx = callocz(1, sizeof(https_req_ctx_t)); + ctx->req_start_time = now_realtime_sec(); + + ctx->buf_rx = rbuf_create(RX_BUFFER_SIZE); + if (!ctx->buf_rx) { + error("Couldn't allocate buffer for RX data"); + goto exit_req_ctx; + } + + snprintfz(connect_port_str, PORT_STR_MAX_BYTES, "%d", connect_port); + + ctx->sock = connect_to_this_ip46(IPPROTO_TCP, SOCK_STREAM, connect_host, 0, connect_port_str, &timeout); + if (ctx->sock < 0) { + error("Error connecting TCP socket to \"%s\"", connect_host); + goto exit_buf_rx; + } + + if (fcntl(ctx->sock, F_SETFL, fcntl(ctx->sock, F_GETFL, 0) | O_NONBLOCK) == -1) { + error("Error setting O_NONBLOCK to TCP socket."); + goto exit_sock; + } + + ctx->poll_fd.fd = ctx->sock; + + // Do the CONNECT if proxy is used + if (request->proxy_host) { + https_req_t req = HTTPS_REQ_T_INITIALIZER; + req.request_type = HTTP_REQ_CONNECT; + req.timeout_s = request->timeout_s; + req.host = request->host; + req.port = request->port; + req.url = request->url; + ctx->request = &req; + if (handle_http_request(ctx)) { + error("Failed to CONNECT with proxy"); + goto exit_sock; + } + if (ctx->parse_ctx.http_code != 200) { + error("Proxy didn't return 200 OK (got %d)", ctx->parse_ctx.http_code); + goto exit_sock; + } + info("Proxy accepted CONNECT upgrade"); + } + ctx->request = request; + + ctx->ssl_ctx = security_initialize_openssl_client(); + if (ctx->ssl_ctx==NULL) { + error("Cannot allocate SSL context"); + goto exit_sock; + } + + if (!SSL_CTX_set_default_verify_paths(ctx->ssl_ctx)) { + error("Error setting default verify paths"); + goto exit_CTX; + } + SSL_CTX_set_verify(ctx->ssl_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, cert_verify_callback); + + ctx->ssl = SSL_new(ctx->ssl_ctx); + if (ctx->ssl==NULL) { + error("Cannot allocate SSL"); + goto exit_CTX; + } + + SSL_set_fd(ctx->ssl, ctx->sock); + ret = SSL_connect(ctx->ssl); + if (ret != -1 && ret != 1) { + error("SSL could not connect"); + goto exit_SSL; + } + if (ret == -1) { + // expected as underlying socket is non blocking! + // consult SSL_connect documentation for details + int ec = SSL_get_error(ctx->ssl, ret); + if (ec != SSL_ERROR_WANT_READ && ec != SSL_ERROR_WANT_WRITE) { + error("Failed to start SSL connection"); + goto exit_SSL; + } + } + + // The actual request here + if (handle_http_request(ctx)) { + error("Couldn't process request"); + goto exit_SSL; + } + response->http_code = ctx->parse_ctx.http_code; + if (ctx->parse_ctx.content_length > 0) { + response->payload_size = ctx->parse_ctx.content_length; + response->payload = mallocz(response->payload_size + 1); + ret = rbuf_pop(ctx->buf_rx, response->payload, response->payload_size); + if (ret != (int)response->payload_size) { + error("Payload size doesn't match remaining data on the buffer!"); + response->payload_size = ret; + } + // normally we take payload as it is and copy it + // but for convenience in cases where payload is sth. like + // json we add terminating zero so that user of the data + // doesn't have to convert to C string (0 terminated) + // other uses still have correct payload_size and can copy + // only exact data without affixed 0x00 + ((char*)response->payload)[response->payload_size] = 0; // mallocz(response->payload_size + 1); + } + info("HTTPS \"%s\" request to \"%s\" finished with HTTP code: %d", http_req_type_to_str(ctx->request->request_type), ctx->request->host, response->http_code); + + rc = 0; + +exit_SSL: + SSL_free(ctx->ssl); +exit_CTX: + SSL_CTX_free(ctx->ssl_ctx); +exit_sock: + close(ctx->sock); +exit_buf_rx: + rbuf_free(ctx->buf_rx); +exit_req_ctx: + freez(ctx); + return rc; +} + +void https_req_response_free(https_req_response_t *res) { + freez(res->payload); +} + +void https_req_response_init(https_req_response_t *res) { + res->http_code = 0; + res->payload = NULL; + res->payload_size = 0; +} + +static inline char *UNUSED_FUNCTION(min_non_null)(char *a, char *b) { + if (!a) + return b; + if (!b) + return a; + return (a < b ? a : b); +} + +#define URI_PROTO_SEPARATOR "://" +#define URL_PARSER_LOG_PREFIX "url_parser " + +static int parse_host_port(url_t *url) { + char *ptr = strrchr(url->host, ':'); + if (ptr) { + size_t port_len = strlen(ptr + 1); + if (!port_len) { + error(URL_PARSER_LOG_PREFIX ": specified but no port number"); + return 1; + } + if (port_len > 5 /* MAX port length is 5digit long in decimal */) { + error(URL_PARSER_LOG_PREFIX "port # is too long"); + return 1; + } + *ptr = 0; + if (!strlen(url->host)) { + error(URL_PARSER_LOG_PREFIX "host empty after removing port"); + return 1; + } + url->port = atoi (ptr + 1); + } + return 0; +} + +static inline void port_by_proto(url_t *url) { + if (url->port) + return; + if (!url->proto) + return; + if (!strcmp(url->proto, "http")) { + url->port = 80; + return; + } + if (!strcmp(url->proto, "https")) { + url->port = 443; + return; + } +} + +#define STRDUPZ_2PTR(dest, start, end) \ + { \ + dest = mallocz(1 + end - start); \ + memcpy(dest, start, end - start); \ + dest[end - start] = 0; \ + } + +int url_parse(const char *url, url_t *parsed) { + const char *start = url; + const char *end = strstr(url, URI_PROTO_SEPARATOR); + + if (end) { + if (end == start) { + error (URL_PARSER_LOG_PREFIX "found " URI_PROTO_SEPARATOR " without protocol specified"); + return 1; + } + + STRDUPZ_2PTR(parsed->proto, start, end) + start = end + strlen(URI_PROTO_SEPARATOR); + } + + end = strchr(start, '/'); + if (!end) + end = start + strlen(start); + + if (start == end) { + error(URL_PARSER_LOG_PREFIX "Host empty"); + return 1; + } + + STRDUPZ_2PTR(parsed->host, start, end); + + if (parse_host_port(parsed)) + return 1; + + if (!*end) { + parsed->path = strdupz("/"); + port_by_proto(parsed); + return 0; + } + + parsed->path = strdupz(end); + port_by_proto(parsed); + return 0; +} + +void url_t_destroy(url_t *url) { + freez(url->host); + freez(url->path); + freez(url->proto); +} diff --git a/aclk/https_client.h b/aclk/https_client.h new file mode 100644 index 0000000..f7bc3d4 --- /dev/null +++ b/aclk/https_client.h @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_HTTPS_CLIENT_H +#define NETDATA_HTTPS_CLIENT_H + +#include "libnetdata/libnetdata.h" + +typedef enum http_req_type { + HTTP_REQ_GET = 0, + HTTP_REQ_POST, + HTTP_REQ_CONNECT +} http_req_type_t; + +typedef struct { + http_req_type_t request_type; + + char *host; + int port; + char *url; + + time_t timeout_s; //timeout in seconds for the network operation (send/recv) + + void *payload; + size_t payload_size; + + char *proxy_host; + int proxy_port; +} https_req_t; + +typedef struct { + int http_code; + + void *payload; + size_t payload_size; +} https_req_response_t; + + +// Non feature complete URL parser +// feel free to extend when needed +// currently implements only what ACLK +// needs +// proto://host[:port]/path +typedef struct { + char *proto; + char *host; + int port; + char* path; +} url_t; + +int url_parse(const char *url, url_t *parsed); +void url_t_destroy(url_t *url); + +void https_req_response_free(https_req_response_t *res); +void https_req_response_init(https_req_response_t *res); + +#define HTTPS_REQ_RESPONSE_T_INITIALIZER \ + { \ + .http_code = 0, \ + .payload = NULL, \ + .payload_size = 0 \ + } + +#define HTTPS_REQ_T_INITIALIZER \ + { \ + .request_type = HTTP_REQ_GET, \ + .host = NULL, \ + .port = 443, \ + .url = NULL, \ + .timeout_s = 30, \ + .payload = NULL, \ + .payload_size = 0, \ + .proxy_host = NULL, \ + .proxy_port = 8080 \ + } + +int https_request(https_req_t *request, https_req_response_t *response); + +#endif /* NETDATA_HTTPS_CLIENT_H */ diff --git a/aclk/schema-wrappers/alarm_config.cc b/aclk/schema-wrappers/alarm_config.cc new file mode 100644 index 0000000..56d7e6f --- /dev/null +++ b/aclk/schema-wrappers/alarm_config.cc @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "alarm_config.h" + +#include "proto/alarm/v1/config.pb.h" + +#include "libnetdata/libnetdata.h" + +#include "schema_wrapper_utils.h" + +using namespace alarms::v1; + +void destroy_aclk_alarm_configuration(struct aclk_alarm_configuration *cfg) +{ + freez(cfg->alarm); + freez(cfg->tmpl); + freez(cfg->on_chart); + + freez(cfg->classification); + freez(cfg->type); + freez(cfg->component); + + freez(cfg->os); + freez(cfg->hosts); + freez(cfg->plugin); + freez(cfg->module); + freez(cfg->charts); + freez(cfg->families); + freez(cfg->lookup); + freez(cfg->every); + freez(cfg->units); + + freez(cfg->green); + freez(cfg->red); + + freez(cfg->calculation_expr); + freez(cfg->warning_expr); + freez(cfg->critical_expr); + + freez(cfg->recipient); + freez(cfg->exec); + freez(cfg->delay); + freez(cfg->repeat); + freez(cfg->info); + freez(cfg->options); + freez(cfg->host_labels); + + freez(cfg->p_db_lookup_dimensions); + freez(cfg->p_db_lookup_method); + freez(cfg->p_db_lookup_options); +} + +char *generate_provide_alarm_configuration(size_t *len, struct provide_alarm_configuration *data) +{ + ProvideAlarmConfiguration msg; + AlarmConfiguration *cfg = msg.mutable_config(); + + msg.set_config_hash(data->cfg_hash); + + if (data->cfg.alarm) + cfg->set_alarm(data->cfg.alarm); + if (data->cfg.tmpl) + cfg->set_template_(data->cfg.tmpl); + if(data->cfg.on_chart) + cfg->set_on_chart(data->cfg.on_chart); + + if (data->cfg.classification) + cfg->set_classification(data->cfg.classification); + if (data->cfg.type) + cfg->set_type(data->cfg.type); + if (data->cfg.component) + cfg->set_component(data->cfg.component); + + if (data->cfg.os) + cfg->set_os(data->cfg.os); + if (data->cfg.hosts) + cfg->set_hosts(data->cfg.hosts); + if (data->cfg.plugin) + cfg->set_plugin(data->cfg.plugin); + if(data->cfg.module) + cfg->set_module(data->cfg.module); + if(data->cfg.charts) + cfg->set_charts(data->cfg.charts); + if(data->cfg.families) + cfg->set_families(data->cfg.families); + if(data->cfg.lookup) + cfg->set_lookup(data->cfg.lookup); + if(data->cfg.every) + cfg->set_every(data->cfg.every); + if(data->cfg.units) + cfg->set_units(data->cfg.units); + + if (data->cfg.green) + cfg->set_green(data->cfg.green); + if (data->cfg.red) + cfg->set_red(data->cfg.red); + + if (data->cfg.calculation_expr) + cfg->set_calculation_expr(data->cfg.calculation_expr); + if (data->cfg.warning_expr) + cfg->set_warning_expr(data->cfg.warning_expr); + if (data->cfg.critical_expr) + cfg->set_critical_expr(data->cfg.critical_expr); + + if (data->cfg.recipient) + cfg->set_recipient(data->cfg.recipient); + if (data->cfg.exec) + cfg->set_exec(data->cfg.exec); + if (data->cfg.delay) + cfg->set_delay(data->cfg.delay); + if (data->cfg.repeat) + cfg->set_repeat(data->cfg.repeat); + if (data->cfg.info) + cfg->set_info(data->cfg.info); + if (data->cfg.options) + cfg->set_options(data->cfg.options); + if (data->cfg.host_labels) + cfg->set_host_labels(data->cfg.host_labels); + + cfg->set_p_db_lookup_after(data->cfg.p_db_lookup_after); + cfg->set_p_db_lookup_before(data->cfg.p_db_lookup_before); + if (data->cfg.p_db_lookup_dimensions) + cfg->set_p_db_lookup_dimensions(data->cfg.p_db_lookup_dimensions); + if (data->cfg.p_db_lookup_method) + cfg->set_p_db_lookup_method(data->cfg.p_db_lookup_method); + if (data->cfg.p_db_lookup_options) + cfg->set_p_db_lookup_options(data->cfg.p_db_lookup_options); + cfg->set_p_update_every(data->cfg.p_update_every); + + *len = PROTO_COMPAT_MSG_SIZE(msg); + char *bin = (char*)mallocz(*len); + if (!msg.SerializeToArray(bin, *len)) + return NULL; + + return bin; +} + +char *parse_send_alarm_configuration(const char *data, size_t len) +{ + SendAlarmConfiguration msg; + if (!msg.ParseFromArray(data, len)) + return NULL; + if (!msg.config_hash().c_str()) + return NULL; + return strdupz(msg.config_hash().c_str()); +} + diff --git a/aclk/schema-wrappers/alarm_config.h b/aclk/schema-wrappers/alarm_config.h new file mode 100644 index 0000000..157fbc6 --- /dev/null +++ b/aclk/schema-wrappers/alarm_config.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_ALARM_CONFIG_H +#define ACLK_SCHEMA_WRAPPER_ALARM_CONFIG_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct aclk_alarm_configuration { + char *alarm; + char *tmpl; + char *on_chart; + + char *classification; + char *type; + char *component; + + char *os; + char *hosts; + char *plugin; + char *module; + char *charts; + char *families; + char *lookup; + char *every; + char *units; + + char *green; + char *red; + + char *calculation_expr; + char *warning_expr; + char *critical_expr; + + char *recipient; + char *exec; + char *delay; + char *repeat; + char *info; + char *options; + char *host_labels; + + int32_t p_db_lookup_after; + int32_t p_db_lookup_before; + char *p_db_lookup_dimensions; + char *p_db_lookup_method; + char *p_db_lookup_options; + int32_t p_update_every; +}; + +void destroy_aclk_alarm_configuration(struct aclk_alarm_configuration *cfg); + +struct provide_alarm_configuration { + char *cfg_hash; + struct aclk_alarm_configuration cfg; +}; + +char *generate_provide_alarm_configuration(size_t *len, struct provide_alarm_configuration *data); +char *parse_send_alarm_configuration(const char *data, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_ALARM_CONFIG_H */ diff --git a/aclk/schema-wrappers/alarm_stream.cc b/aclk/schema-wrappers/alarm_stream.cc new file mode 100644 index 0000000..f643933 --- /dev/null +++ b/aclk/schema-wrappers/alarm_stream.cc @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "alarm_stream.h" + +#include "proto/alarm/v1/stream.pb.h" + +#include "libnetdata/libnetdata.h" + +#include "schema_wrapper_utils.h" + +using namespace alarms::v1; + +struct start_alarm_streaming parse_start_alarm_streaming(const char *data, size_t len) +{ + struct start_alarm_streaming ret; + memset(&ret, 0, sizeof(ret)); + + StartAlarmStreaming msg; + + if (!msg.ParseFromArray(data, len)) + return ret; + + ret.node_id = strdupz(msg.node_id().c_str()); + ret.batch_id = msg.batch_id(); + ret.start_seq_id = msg.start_sequnce_id(); + + return ret; +} + +char *parse_send_alarm_log_health(const char *data, size_t len) +{ + SendAlarmLogHealth msg; + if (!msg.ParseFromArray(data, len)) + return NULL; + return strdupz(msg.node_id().c_str()); +} + +char *generate_alarm_log_health(size_t *len, struct alarm_log_health *data) +{ + AlarmLogHealth msg; + LogEntries *entries; + + msg.set_claim_id(data->claim_id); + msg.set_node_id(data->node_id); + msg.set_enabled(data->enabled); + + switch (data->status) { + case alarm_log_status_aclk::ALARM_LOG_STATUS_IDLE: + msg.set_status(alarms::v1::ALARM_LOG_STATUS_IDLE); + break; + case alarm_log_status_aclk::ALARM_LOG_STATUS_RUNNING: + msg.set_status(alarms::v1::ALARM_LOG_STATUS_RUNNING); + break; + case alarm_log_status_aclk::ALARM_LOG_STATUS_UNSPECIFIED: + msg.set_status(alarms::v1::ALARM_LOG_STATUS_UNSPECIFIED); + break; + default: + error("Unknown status of AlarmLogHealth LogEntry"); + return NULL; + } + + entries = msg.mutable_log_entries(); + entries->set_first_sequence_id(data->log_entries.first_seq_id); + entries->set_last_sequence_id(data->log_entries.last_seq_id); + + set_google_timestamp_from_timeval(data->log_entries.first_when, entries->mutable_first_when()); + set_google_timestamp_from_timeval(data->log_entries.last_when, entries->mutable_last_when()); + + *len = PROTO_COMPAT_MSG_SIZE(msg); + char *bin = (char*)mallocz(*len); + if (!msg.SerializeToArray(bin, *len)) + return NULL; + + return bin; +} + +static alarms::v1::AlarmStatus aclk_alarm_status_to_proto(enum aclk_alarm_status status) +{ + switch (status) { + case aclk_alarm_status::ALARM_STATUS_NULL: + return alarms::v1::ALARM_STATUS_NULL; + case aclk_alarm_status::ALARM_STATUS_UNKNOWN: + return alarms::v1::ALARM_STATUS_UNKNOWN; + case aclk_alarm_status::ALARM_STATUS_REMOVED: + return alarms::v1::ALARM_STATUS_REMOVED; + case aclk_alarm_status::ALARM_STATUS_NOT_A_NUMBER: + return alarms::v1::ALARM_STATUS_NOT_A_NUMBER; + case aclk_alarm_status::ALARM_STATUS_CLEAR: + return alarms::v1::ALARM_STATUS_CLEAR; + case aclk_alarm_status::ALARM_STATUS_WARNING: + return alarms::v1::ALARM_STATUS_WARNING; + case aclk_alarm_status::ALARM_STATUS_CRITICAL: + return alarms::v1::ALARM_STATUS_CRITICAL; + default: + error("Unknown alarm status"); + return alarms::v1::ALARM_STATUS_UNKNOWN; + } +} + +void destroy_alarm_log_entry(struct alarm_log_entry *entry) +{ + //freez(entry->node_id); + //freez(entry->claim_id); + + freez(entry->chart); + freez(entry->name); + freez(entry->family); + + freez(entry->config_hash); + + freez(entry->timezone); + + freez(entry->exec_path); + freez(entry->conf_source); + freez(entry->command); + + freez(entry->value_string); + freez(entry->old_value_string); + + freez(entry->rendered_info); + freez(entry->chart_context); +} + +static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *proto) +{ + proto->set_node_id(data->node_id); + proto->set_claim_id(data->claim_id); + + proto->set_chart(data->chart); + proto->set_name(data->name); + if (data->family) + proto->set_family(data->family); + + proto->set_batch_id(data->batch_id); + proto->set_sequence_id(data->sequence_id); + proto->set_when(data->when); + + proto->set_config_hash(data->config_hash); + + proto->set_utc_offset(data->utc_offset); + proto->set_timezone(data->timezone); + + proto->set_exec_path(data->exec_path); + proto->set_conf_source(data->conf_source); + proto->set_command(data->command); + + proto->set_duration(data->duration); + proto->set_non_clear_duration(data->non_clear_duration); + + + proto->set_status(aclk_alarm_status_to_proto(data->status)); + proto->set_old_status(aclk_alarm_status_to_proto(data->old_status)); + proto->set_delay(data->delay); + proto->set_delay_up_to_timestamp(data->delay_up_to_timestamp); + + proto->set_last_repeat(data->last_repeat); + proto->set_silenced(data->silenced); + + if (data->value_string) + proto->set_value_string(data->value_string); + if (data->old_value_string) + proto->set_old_value_string(data->old_value_string); + + proto->set_value(data->value); + proto->set_old_value(data->old_value); + + proto->set_updated(data->updated); + + proto->set_rendered_info(data->rendered_info); + + proto->set_chart_context(data->chart_context); +} + +char *generate_alarm_log_entry(size_t *len, struct alarm_log_entry *data) +{ + AlarmLogEntry le; + + fill_alarm_log_entry(data, &le); + + *len = PROTO_COMPAT_MSG_SIZE(le); + char *bin = (char*)mallocz(*len); + if (!le.SerializeToArray(bin, *len)) { + freez(bin); + return NULL; + } + + return bin; +} + +struct send_alarm_snapshot *parse_send_alarm_snapshot(const char *data, size_t len) +{ + SendAlarmSnapshot msg; + if (!msg.ParseFromArray(data, len)) + return NULL; + + struct send_alarm_snapshot *ret = (struct send_alarm_snapshot*)callocz(1, sizeof(struct send_alarm_snapshot)); + if (msg.claim_id().c_str()) + ret->claim_id = strdupz(msg.claim_id().c_str()); + if (msg.node_id().c_str()) + ret->node_id = strdupz(msg.node_id().c_str()); + ret->snapshot_id = msg.snapshot_id(); + ret->sequence_id = msg.sequence_id(); + + return ret; +} + +void destroy_send_alarm_snapshot(struct send_alarm_snapshot *ptr) +{ + freez(ptr->claim_id); + freez(ptr->node_id); + freez(ptr); +} + +alarm_snapshot_proto_ptr_t generate_alarm_snapshot_proto(struct alarm_snapshot *data) +{ + AlarmSnapshot *msg = new AlarmSnapshot; + if (unlikely(!msg)) fatal("Cannot allocate memory for AlarmSnapshot"); + + msg->set_node_id(data->node_id); + msg->set_claim_id(data->claim_id); + msg->set_snapshot_id(data->snapshot_id); + msg->set_chunks(data->chunks); + msg->set_chunk(data->chunk); + + // this is handled automatically by add_alarm_log_entry2snapshot function + msg->set_chunk_size(0); + + return msg; +} + +void add_alarm_log_entry2snapshot(alarm_snapshot_proto_ptr_t snapshot, struct alarm_log_entry *data) +{ + AlarmSnapshot *alarm_snapshot = (AlarmSnapshot *)snapshot; + AlarmLogEntry *alarm_log_entry = alarm_snapshot->add_alarms(); + + fill_alarm_log_entry(data, alarm_log_entry); + + alarm_snapshot->set_chunk_size(alarm_snapshot->chunk_size() + 1); +} + +char *generate_alarm_snapshot_bin(size_t *len, alarm_snapshot_proto_ptr_t snapshot) +{ + AlarmSnapshot *alarm_snapshot = (AlarmSnapshot *)snapshot; + *len = PROTO_COMPAT_MSG_SIZE_PTR(alarm_snapshot); + char *bin = (char*)mallocz(*len); + if (!alarm_snapshot->SerializeToArray(bin, *len)) { + delete alarm_snapshot; + return NULL; + } + + delete alarm_snapshot; + return bin; +} diff --git a/aclk/schema-wrappers/alarm_stream.h b/aclk/schema-wrappers/alarm_stream.h new file mode 100644 index 0000000..63911da --- /dev/null +++ b/aclk/schema-wrappers/alarm_stream.h @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_ALARM_STREAM_H +#define ACLK_SCHEMA_WRAPPER_ALARM_STREAM_H + +#include + +#include "database/rrd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum alarm_log_status_aclk { + ALARM_LOG_STATUS_UNSPECIFIED = 0, + ALARM_LOG_STATUS_RUNNING = 1, + ALARM_LOG_STATUS_IDLE = 2 +}; + +struct alarm_log_entries { + int64_t first_seq_id; + struct timeval first_when; + + int64_t last_seq_id; + struct timeval last_when; +}; + +struct alarm_log_health { + char *claim_id; + char *node_id; + int enabled; + enum alarm_log_status_aclk status; + struct alarm_log_entries log_entries; +}; + +struct start_alarm_streaming { + char *node_id; + uint64_t batch_id; + uint64_t start_seq_id; +}; + +struct start_alarm_streaming parse_start_alarm_streaming(const char *data, size_t len); +char *parse_send_alarm_log_health(const char *data, size_t len); + +char *generate_alarm_log_health(size_t *len, struct alarm_log_health *data); + +enum aclk_alarm_status { + ALARM_STATUS_NULL = 0, + ALARM_STATUS_UNKNOWN = 1, + ALARM_STATUS_REMOVED = 2, + ALARM_STATUS_NOT_A_NUMBER = 3, + ALARM_STATUS_CLEAR = 4, + ALARM_STATUS_WARNING = 5, + ALARM_STATUS_CRITICAL = 6 +}; + +struct alarm_log_entry { + char *node_id; + char *claim_id; + + char *chart; + char *name; + char *family; + + uint64_t batch_id; + uint64_t sequence_id; + uint64_t when; + + char *config_hash; + + int32_t utc_offset; + char *timezone; + + char *exec_path; + char *conf_source; + char *command; + + uint32_t duration; + uint32_t non_clear_duration; + + enum aclk_alarm_status status; + enum aclk_alarm_status old_status; + uint64_t delay; + uint64_t delay_up_to_timestamp; + + uint64_t last_repeat; + int silenced; + + char *value_string; + char *old_value_string; + + double value; + double old_value; + + // updated alarm entry, when the status of the alarm has been updated by a later entry + int updated; + + // rendered_info + char *rendered_info; + + char *chart_context; +}; + +struct send_alarm_snapshot { + char *node_id; + char *claim_id; + uint64_t snapshot_id; + uint64_t sequence_id; +}; + +struct alarm_snapshot { + char *node_id; + char *claim_id; + uint64_t snapshot_id; + uint32_t chunks; + uint32_t chunk; +}; + +typedef void* alarm_snapshot_proto_ptr_t; + +void destroy_alarm_log_entry(struct alarm_log_entry *entry); + +char *generate_alarm_log_entry(size_t *len, struct alarm_log_entry *data); + +struct send_alarm_snapshot *parse_send_alarm_snapshot(const char *data, size_t len); +void destroy_send_alarm_snapshot(struct send_alarm_snapshot *ptr); + +alarm_snapshot_proto_ptr_t generate_alarm_snapshot_proto(struct alarm_snapshot *data); +void add_alarm_log_entry2snapshot(alarm_snapshot_proto_ptr_t snapshot, struct alarm_log_entry *data); +char *generate_alarm_snapshot_bin(size_t *len, alarm_snapshot_proto_ptr_t snapshot); + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_ALARM_STREAM_H */ diff --git a/aclk/schema-wrappers/capability.cc b/aclk/schema-wrappers/capability.cc new file mode 100644 index 0000000..af45740 --- /dev/null +++ b/aclk/schema-wrappers/capability.cc @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "proto/aclk/v1/lib.pb.h" + +#include "capability.h" + +void capability_set(aclk_lib::v1::Capability *proto_capa, const struct capability *c_capa) { + proto_capa->set_name(c_capa->name); + proto_capa->set_enabled(c_capa->enabled); + proto_capa->set_version(c_capa->version); +} diff --git a/aclk/schema-wrappers/capability.h b/aclk/schema-wrappers/capability.h new file mode 100644 index 0000000..c6085a4 --- /dev/null +++ b/aclk/schema-wrappers/capability.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_CAPABILITY_H +#define ACLK_SCHEMA_CAPABILITY_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct capability { + const char *name; + uint32_t version; + int enabled; +}; + +#ifdef __cplusplus +} + +#include "proto/aclk/v1/lib.pb.h" + +void capability_set(aclk_lib::v1::Capability *proto_capa, const struct capability *c_capa); +#endif + +#endif /* ACLK_SCHEMA_CAPABILITY_H */ diff --git a/aclk/schema-wrappers/connection.cc b/aclk/schema-wrappers/connection.cc new file mode 100644 index 0000000..20b40ec --- /dev/null +++ b/aclk/schema-wrappers/connection.cc @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "proto/agent/v1/connection.pb.h" +#include "proto/agent/v1/disconnect.pb.h" +#include "connection.h" + +#include "schema_wrapper_utils.h" + +#include +#include + +using namespace agent::v1; + +char *generate_update_agent_connection(size_t *len, const update_agent_connection_t *data) +{ + UpdateAgentConnection connupd; + + connupd.set_claim_id(data->claim_id); + connupd.set_reachable(data->reachable); + connupd.set_session_id(data->session_id); + + connupd.set_update_source((data->lwt) ? CONNECTION_UPDATE_SOURCE_LWT : CONNECTION_UPDATE_SOURCE_AGENT); + + struct timeval tv; + gettimeofday(&tv, NULL); + + google::protobuf::Timestamp *timestamp = connupd.mutable_updated_at(); + timestamp->set_seconds(tv.tv_sec); + timestamp->set_nanos(tv.tv_usec * 1000); + + if (data->capabilities) { + const struct capability *capa = data->capabilities; + while (capa->name) { + aclk_lib::v1::Capability *proto_capa = connupd.add_capabilities(); + capability_set(proto_capa, capa); + capa++; + } + } + + *len = PROTO_COMPAT_MSG_SIZE(connupd); + char *msg = (char*)mallocz(*len); + if (msg) + connupd.SerializeToArray(msg, *len); + + return msg; +} + +struct disconnect_cmd *parse_disconnect_cmd(const char *data, size_t len) { + DisconnectReq req; + struct disconnect_cmd *res; + + if (!req.ParseFromArray(data, len)) + return NULL; + + res = (struct disconnect_cmd *)callocz(1, sizeof(struct disconnect_cmd)); + + if (!res) + return NULL; + + res->reconnect_after_s = req.reconnect_after_seconds(); + res->permaban = req.permaban(); + res->error_code = req.error_code(); + if (req.error_description().c_str()) { + res->error_description = strdupz(req.error_description().c_str()); + if (!res->error_description) { + freez(res); + return NULL; + } + } + + return res; +} diff --git a/aclk/schema-wrappers/connection.h b/aclk/schema-wrappers/connection.h new file mode 100644 index 0000000..0356c7d --- /dev/null +++ b/aclk/schema-wrappers/connection.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_CONNECTION_H +#define ACLK_SCHEMA_WRAPPER_CONNECTION_H + +#include "capability.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + const char *claim_id; + unsigned int reachable:1; + + int64_t session_id; + + unsigned int lwt:1; + + const struct capability *capabilities; + +// TODO in future optional fields +// > 15 optional fields: +// How long the system was running until connection (only applicable when reachable=true) +// google.protobuf.Duration system_uptime = 15; +// How long the netdata agent was running until connection (only applicable when reachable=true) +// google.protobuf.Duration agent_uptime = 16; + + +} update_agent_connection_t; + +char *generate_update_agent_connection(size_t *len, const update_agent_connection_t *data); + +struct disconnect_cmd { + uint64_t reconnect_after_s; + int permaban; + uint32_t error_code; + char *error_description; +}; + +struct disconnect_cmd *parse_disconnect_cmd(const char *data, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_CONNECTION_H */ diff --git a/aclk/schema-wrappers/context.cc b/aclk/schema-wrappers/context.cc new file mode 100644 index 0000000..b04c9d2 --- /dev/null +++ b/aclk/schema-wrappers/context.cc @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "proto/context/v1/context.pb.h" + +#include "libnetdata/libnetdata.h" + +#include "schema_wrapper_utils.h" + +#include "context.h" + +using namespace context::v1; + +// ContextsSnapshot +contexts_snapshot_t contexts_snapshot_new(const char *claim_id, const char *node_id, uint64_t version) +{ + ContextsSnapshot *ctxs_snap = new ContextsSnapshot; + + if (ctxs_snap == NULL) + fatal("Cannot allocate ContextsSnapshot object. OOM"); + + ctxs_snap->set_claim_id(claim_id); + ctxs_snap->set_node_id(node_id); + ctxs_snap->set_version(version); + + return ctxs_snap; +} + +void contexts_snapshot_delete(contexts_snapshot_t snapshot) +{ + delete (ContextsSnapshot *)snapshot; +} + +void contexts_snapshot_set_version(contexts_snapshot_t ctxs_snapshot, uint64_t version) +{ + ((ContextsSnapshot *)ctxs_snapshot)->set_version(version); +} + +static void fill_ctx_updated(ContextUpdated *ctx, struct context_updated *c_ctx) +{ + ctx->set_id(c_ctx->id); + ctx->set_version(c_ctx->version); + ctx->set_first_entry(c_ctx->first_entry); + ctx->set_last_entry(c_ctx->last_entry); + ctx->set_deleted(c_ctx->deleted); + ctx->set_title(c_ctx->title); + ctx->set_priority(c_ctx->priority); + ctx->set_chart_type(c_ctx->chart_type); + ctx->set_units(c_ctx->units); + ctx->set_family(c_ctx->family); +} + +void contexts_snapshot_add_ctx_update(contexts_snapshot_t ctxs_snapshot, struct context_updated *ctx_update) +{ + ContextsSnapshot *ctxs_snap = (ContextsSnapshot *)ctxs_snapshot; + ContextUpdated *ctx = ctxs_snap->add_contexts(); + + fill_ctx_updated(ctx, ctx_update); +} + +char *contexts_snapshot_2bin(contexts_snapshot_t ctxs_snapshot, size_t *len) +{ + ContextsSnapshot *ctxs_snap = (ContextsSnapshot *)ctxs_snapshot; + *len = PROTO_COMPAT_MSG_SIZE_PTR(ctxs_snap); + char *bin = (char*)mallocz(*len); + if (!ctxs_snap->SerializeToArray(bin, *len)) { + freez(bin); + delete ctxs_snap; + return NULL; + } + + delete ctxs_snap; + return bin; +} + +// ContextsUpdated +contexts_updated_t contexts_updated_new(const char *claim_id, const char *node_id, uint64_t version_hash, uint64_t created_at) +{ + ContextsUpdated *ctxs_updated = new ContextsUpdated; + + if (ctxs_updated == NULL) + fatal("Cannot allocate ContextsUpdated object. OOM"); + + ctxs_updated->set_claim_id(claim_id); + ctxs_updated->set_node_id(node_id); + ctxs_updated->set_version_hash(version_hash); + ctxs_updated->set_created_at(created_at); + + return ctxs_updated; +} + +void contexts_updated_delete(contexts_updated_t ctxs_updated) +{ + delete (ContextsUpdated *)ctxs_updated; +} + +void contexts_updated_update_version_hash(contexts_updated_t ctxs_updated, uint64_t version_hash) +{ + ((ContextsUpdated *)ctxs_updated)->set_version_hash(version_hash); +} + +void contexts_updated_add_ctx_update(contexts_updated_t ctxs_updated, struct context_updated *ctx_update) +{ + ContextsUpdated *ctxs_update = (ContextsUpdated *)ctxs_updated; + ContextUpdated *ctx = ctxs_update->add_contextupdates(); + + if (ctx == NULL) + fatal("Cannot allocate ContextUpdated object. OOM"); + + fill_ctx_updated(ctx, ctx_update); +} + +char *contexts_updated_2bin(contexts_updated_t ctxs_updated, size_t *len) +{ + ContextsUpdated *ctxs_update = (ContextsUpdated *)ctxs_updated; + *len = PROTO_COMPAT_MSG_SIZE_PTR(ctxs_update); + char *bin = (char*)mallocz(*len); + if (!ctxs_update->SerializeToArray(bin, *len)) { + freez(bin); + delete ctxs_update; + return NULL; + } + + delete ctxs_update; + return bin; +} diff --git a/aclk/schema-wrappers/context.h b/aclk/schema-wrappers/context.h new file mode 100644 index 0000000..cbb7701 --- /dev/null +++ b/aclk/schema-wrappers/context.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_CONTEXT_H +#define ACLK_SCHEMA_WRAPPER_CONTEXT_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* contexts_updated_t; +typedef void* contexts_snapshot_t; + +struct context_updated { + // context id + const char *id; + + uint64_t version; + + uint64_t first_entry; + uint64_t last_entry; + + int deleted; + + const char *title; + uint64_t priority; + const char *chart_type; + const char *units; + const char *family; +}; + +// ContextS Snapshot related +contexts_snapshot_t contexts_snapshot_new(const char *claim_id, const char *node_id, uint64_t version); +void contexts_snapshot_delete(contexts_snapshot_t ctxs_snapshot); +void contexts_snapshot_set_version(contexts_snapshot_t ctxs_snapshot, uint64_t version); +void contexts_snapshot_add_ctx_update(contexts_snapshot_t ctxs_snapshot, struct context_updated *ctx_update); +char *contexts_snapshot_2bin(contexts_snapshot_t ctxs_snapshot, size_t *len); + +// ContextS Updated related +contexts_updated_t contexts_updated_new(const char *claim_id, const char *node_id, uint64_t version_hash, uint64_t created_at); +void contexts_updated_delete(contexts_updated_t ctxs_updated); +void contexts_updated_update_version_hash(contexts_updated_t ctxs_updated, uint64_t version_hash); +void contexts_updated_add_ctx_update(contexts_updated_t ctxs_updated, struct context_updated *ctx_update); +char *contexts_updated_2bin(contexts_updated_t ctxs_updated, size_t *len); + + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_CONTEXT_H */ diff --git a/aclk/schema-wrappers/context_stream.cc b/aclk/schema-wrappers/context_stream.cc new file mode 100644 index 0000000..3bb1956 --- /dev/null +++ b/aclk/schema-wrappers/context_stream.cc @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "proto/context/v1/stream.pb.h" + +#include "context_stream.h" + +#include "libnetdata/libnetdata.h" + +struct stop_streaming_ctxs *parse_stop_streaming_ctxs(const char *data, size_t len) +{ + context::v1::StopStreamingContexts msg; + + struct stop_streaming_ctxs *res; + + if (!msg.ParseFromArray(data, len)) + return NULL; + + res = (struct stop_streaming_ctxs *)callocz(1, sizeof(struct stop_streaming_ctxs)); + + res->claim_id = strdupz(msg.claim_id().c_str()); + res->node_id = strdupz(msg.node_id().c_str()); + + return res; +} + +struct ctxs_checkpoint *parse_ctxs_checkpoint(const char *data, size_t len) +{ + context::v1::ContextsCheckpoint msg; + + struct ctxs_checkpoint *res; + + if (!msg.ParseFromArray(data, len)) + return NULL; + + res = (struct ctxs_checkpoint *)callocz(1, sizeof(struct ctxs_checkpoint)); + + res->claim_id = strdupz(msg.claim_id().c_str()); + res->node_id = strdupz(msg.node_id().c_str()); + res->version_hash = msg.version_hash(); + + return res; +} diff --git a/aclk/schema-wrappers/context_stream.h b/aclk/schema-wrappers/context_stream.h new file mode 100644 index 0000000..8c691d2 --- /dev/null +++ b/aclk/schema-wrappers/context_stream.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_CONTEXT_STREAM_H +#define ACLK_SCHEMA_WRAPPER_CONTEXT_STREAM_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct stop_streaming_ctxs { + char *claim_id; + char *node_id; + // we omit reason as there is only one defined at this point + // as soon as there is more than one defined in StopStreaminContextsReason + // we should add it + // 0 - RATE_LIMIT_EXCEEDED +}; + +struct stop_streaming_ctxs *parse_stop_streaming_ctxs(const char *data, size_t len); + +struct ctxs_checkpoint { + char *claim_id; + char *node_id; + + uint64_t version_hash; +}; + +struct ctxs_checkpoint *parse_ctxs_checkpoint(const char *data, size_t len); + + + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_CONTEXT_STREAM_H */ diff --git a/aclk/schema-wrappers/node_connection.cc b/aclk/schema-wrappers/node_connection.cc new file mode 100644 index 0000000..db1fa64 --- /dev/null +++ b/aclk/schema-wrappers/node_connection.cc @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "proto/nodeinstance/connection/v1/connection.pb.h" +#include "node_connection.h" + +#include "schema_wrapper_utils.h" + +#include +#include + +char *generate_node_instance_connection(size_t *len, const node_instance_connection_t *data) { + nodeinstance::v1::UpdateNodeInstanceConnection msg; + + if(data->claim_id) + msg.set_claim_id(data->claim_id); + msg.set_node_id(data->node_id); + + msg.set_liveness(data->live); + msg.set_queryable(data->queryable); + + msg.set_session_id(data->session_id); + msg.set_hops(data->hops); + + struct timeval tv; + gettimeofday(&tv, NULL); + + google::protobuf::Timestamp *timestamp = msg.mutable_updated_at(); + timestamp->set_seconds(tv.tv_sec); + timestamp->set_nanos(tv.tv_usec * 1000); + + if (data->capabilities) { + const struct capability *capa = data->capabilities; + while (capa->name) { + aclk_lib::v1::Capability *proto_capa = msg.add_capabilities(); + capability_set(proto_capa, capa); + capa++; + } + } + + *len = PROTO_COMPAT_MSG_SIZE(msg); + char *bin = (char*)mallocz(*len); + if (bin) + msg.SerializeToArray(bin, *len); + + return bin; +} diff --git a/aclk/schema-wrappers/node_connection.h b/aclk/schema-wrappers/node_connection.h new file mode 100644 index 0000000..dac0d8f --- /dev/null +++ b/aclk/schema-wrappers/node_connection.h @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_NODE_CONNECTION_H +#define ACLK_SCHEMA_WRAPPER_NODE_CONNECTION_H + +#include "capability.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + const char* claim_id; + const char* node_id; + + unsigned int live:1; + unsigned int queryable:1; + + int64_t session_id; + + int32_t hops; + const struct capability *capabilities; +} node_instance_connection_t; + +char *generate_node_instance_connection(size_t *len, const node_instance_connection_t *data); + + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_NODE_CONNECTION_H */ diff --git a/aclk/schema-wrappers/node_creation.cc b/aclk/schema-wrappers/node_creation.cc new file mode 100644 index 0000000..5ad25b7 --- /dev/null +++ b/aclk/schema-wrappers/node_creation.cc @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "proto/nodeinstance/create/v1/creation.pb.h" +#include "node_creation.h" + +#include "schema_wrapper_utils.h" + +#include + +char *generate_node_instance_creation(size_t *len, const node_instance_creation_t *data) +{ + nodeinstance::create::v1::CreateNodeInstance msg; + + if (data->claim_id) + msg.set_claim_id(data->claim_id); + msg.set_machine_guid(data->machine_guid); + msg.set_hostname(data->hostname); + msg.set_hops(data->hops); + + *len = PROTO_COMPAT_MSG_SIZE(msg); + char *bin = (char*)mallocz(*len); + if (bin) + msg.SerializeToArray(bin, *len); + + return bin; +} + +node_instance_creation_result_t parse_create_node_instance_result(const char *data, size_t len) +{ + nodeinstance::create::v1::CreateNodeInstanceResult msg; + node_instance_creation_result_t res = { .node_id = NULL, .machine_guid = NULL }; + + if (!msg.ParseFromArray(data, len)) + return res; + + res.node_id = strdupz(msg.node_id().c_str()); + res.machine_guid = strdupz(msg.machine_guid().c_str()); + return res; +} diff --git a/aclk/schema-wrappers/node_creation.h b/aclk/schema-wrappers/node_creation.h new file mode 100644 index 0000000..7a8c7f7 --- /dev/null +++ b/aclk/schema-wrappers/node_creation.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_NODE_CREATION_H +#define ACLK_SCHEMA_WRAPPER_NODE_CREATION_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + const char *claim_id; + const char *machine_guid; + const char *hostname; + + int32_t hops; +} node_instance_creation_t; + +typedef struct { + char *node_id; + char *machine_guid; +} node_instance_creation_result_t; + +char *generate_node_instance_creation(size_t *len, const node_instance_creation_t *data); +node_instance_creation_result_t parse_create_node_instance_result(const char *data, size_t len); + + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_NODE_CREATION_H */ diff --git a/aclk/schema-wrappers/node_info.cc b/aclk/schema-wrappers/node_info.cc new file mode 100644 index 0000000..5e321f6 --- /dev/null +++ b/aclk/schema-wrappers/node_info.cc @@ -0,0 +1,136 @@ +#include "node_info.h" + +#include "proto/nodeinstance/info/v1/info.pb.h" + +#include "schema_wrapper_utils.h" + +static int generate_node_info(nodeinstance::info::v1::NodeInfo *info, struct aclk_node_info *data) +{ + google::protobuf::Map *map; + + if (data->name) + info->set_name(data->name); + + if (data->os) + info->set_os(data->os); + if (data->os_name) + info->set_os_name(data->os_name); + if (data->os_version) + info->set_os_version(data->os_version); + + if (data->kernel_name) + info->set_kernel_name(data->kernel_name); + if (data->kernel_version) + info->set_kernel_version(data->kernel_version); + + if (data->architecture) + info->set_architecture(data->architecture); + + info->set_cpus(data->cpus); + + if (data->cpu_frequency) + info->set_cpu_frequency(data->cpu_frequency); + + if (data->memory) + info->set_memory(data->memory); + + if (data->disk_space) + info->set_disk_space(data->disk_space); + + if (data->version) + info->set_version(data->version); + + if (data->release_channel) + info->set_release_channel(data->release_channel); + + if (data->timezone) + info->set_timezone(data->timezone); + + if (data->virtualization_type) + info->set_virtualization_type(data->virtualization_type); + + if (data->container_type) + info->set_container_type(data->container_type); + + if (data->custom_info) + info->set_custom_info(data->custom_info); + + if (data->machine_guid) + info->set_machine_guid(data->machine_guid); + + nodeinstance::info::v1::MachineLearningInfo *ml_info = info->mutable_ml_info(); + ml_info->set_ml_capable(data->ml_info.ml_capable); + ml_info->set_ml_enabled(data->ml_info.ml_enabled); + + map = info->mutable_host_labels(); + rrdlabels_walkthrough_read(data->host_labels_ptr, label_add_to_map_callback, map); + return 0; +} + +char *generate_update_node_info_message(size_t *len, struct update_node_info *info) +{ + nodeinstance::info::v1::UpdateNodeInfo msg; + + msg.set_node_id(info->node_id); + msg.set_claim_id(info->claim_id); + + if (generate_node_info(msg.mutable_data(), &info->data)) + return NULL; + + set_google_timestamp_from_timeval(info->updated_at, msg.mutable_updated_at()); + msg.set_machine_guid(info->machine_guid); + msg.set_child(info->child); + + nodeinstance::info::v1::MachineLearningInfo *ml_info = msg.mutable_ml_info(); + ml_info->set_ml_capable(info->ml_info.ml_capable); + ml_info->set_ml_enabled(info->ml_info.ml_enabled); + + struct capability *capa; + if (info->node_capabilities) { + capa = info->node_capabilities; + while (capa->name) { + aclk_lib::v1::Capability *proto_capa = msg.mutable_node_info()->add_capabilities(); + capability_set(proto_capa, capa); + capa++; + } + } + if (info->node_instance_capabilities) { + capa = info->node_instance_capabilities; + while (capa->name) { + aclk_lib::v1::Capability *proto_capa = msg.mutable_node_instance_info()->add_capabilities(); + capability_set(proto_capa, capa); + capa++; + } + } + + *len = PROTO_COMPAT_MSG_SIZE(msg); + char *bin = (char*)mallocz(*len); + if (bin) + msg.SerializeToArray(bin, *len); + + return bin; +} + +char *generate_update_node_collectors_message(size_t *len, struct update_node_collectors *upd_node_collectors) +{ + nodeinstance::info::v1::UpdateNodeCollectors msg; + + msg.set_node_id(upd_node_collectors->node_id); + msg.set_claim_id(upd_node_collectors->claim_id); + + void *colls; + dfe_start_read(upd_node_collectors->node_collectors, colls) { + struct collector_info *c =(struct collector_info *)colls; + nodeinstance::info::v1::CollectorInfo *col = msg.add_collectors(); + col->set_plugin(c->plugin); + col->set_module(c->module); + } + dfe_done(colls); + + *len = PROTO_COMPAT_MSG_SIZE(msg); + char *bin = (char*)mallocz(*len); + if (bin) + msg.SerializeToArray(bin, *len); + + return bin; +} diff --git a/aclk/schema-wrappers/node_info.h b/aclk/schema-wrappers/node_info.h new file mode 100644 index 0000000..de4ade7 --- /dev/null +++ b/aclk/schema-wrappers/node_info.h @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ACLK_SCHEMA_WRAPPER_NODE_INFO_H +#define ACLK_SCHEMA_WRAPPER_NODE_INFO_H + +#include +#include + +#include "capability.h" +#include "database/rrd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct machine_learning_info { + bool ml_capable; + bool ml_enabled; +}; + +struct aclk_node_info { + const char *name; + + const char *os; + const char *os_name; + const char *os_version; + const char *kernel_name; + const char *kernel_version; + const char *architecture; + uint32_t cpus; + const char *cpu_frequency; + const char *memory; + const char *disk_space; + const char *version; + const char *release_channel; + const char *timezone; + const char *virtualization_type; + const char *container_type; + const char *custom_info; + const char *machine_guid; + + DICTIONARY *host_labels_ptr; + struct machine_learning_info ml_info; +}; + +struct update_node_info { + char *node_id; + char *claim_id; + struct aclk_node_info data; + struct timeval updated_at; + char *machine_guid; + int child; + + struct machine_learning_info ml_info; + + struct capability *node_capabilities; + struct capability *node_instance_capabilities; +}; + +struct collector_info { + const char *module; + const char *plugin; +}; + +struct update_node_collectors { + char *claim_id; + char *node_id; + DICTIONARY *node_collectors; +}; + +char *generate_update_node_info_message(size_t *len, struct update_node_info *info); + +char *generate_update_node_collectors_message(size_t *len, struct update_node_collectors *collectors); + +#ifdef __cplusplus +} +#endif + +#endif /* ACLK_SCHEMA_WRAPPER_NODE_INFO_H */ diff --git a/aclk/schema-wrappers/proto_2_json.cc b/aclk/schema-wrappers/proto_2_json.cc new file mode 100644 index 0000000..8853b2e --- /dev/null +++ b/aclk/schema-wrappers/proto_2_json.cc @@ -0,0 +1,85 @@ +#include +#include + +#include "proto/alarm/v1/config.pb.h" +#include "proto/alarm/v1/stream.pb.h" +#include "proto/aclk/v1/lib.pb.h" +#include "proto/agent/v1/connection.pb.h" +#include "proto/agent/v1/disconnect.pb.h" +#include "proto/nodeinstance/connection/v1/connection.pb.h" +#include "proto/nodeinstance/create/v1/creation.pb.h" +#include "proto/nodeinstance/info/v1/info.pb.h" +#include "proto/context/v1/stream.pb.h" +#include "proto/context/v1/context.pb.h" + +#include "libnetdata/libnetdata.h" + +#include "proto_2_json.h" + +using namespace google::protobuf::util; + +static google::protobuf::Message *msg_name_to_protomsg(const char *msgname) +{ +//tx side + if (!strcmp(msgname, "UpdateAgentConnection")) + return new agent::v1::UpdateAgentConnection; + if (!strcmp(msgname, "UpdateNodeInstanceConnection")) + return new nodeinstance::v1::UpdateNodeInstanceConnection; + if (!strcmp(msgname, "CreateNodeInstance")) + return new nodeinstance::create::v1::CreateNodeInstance; + if (!strcmp(msgname, "UpdateNodeInfo")) + return new nodeinstance::info::v1::UpdateNodeInfo; + if (!strcmp(msgname, "AlarmLogHealth")) + return new alarms::v1::AlarmLogHealth; + if (!strcmp(msgname, "ProvideAlarmConfiguration")) + return new alarms::v1::ProvideAlarmConfiguration; + if (!strcmp(msgname, "AlarmSnapshot")) + return new alarms::v1::AlarmSnapshot; + if (!strcmp(msgname, "AlarmLogEntry")) + return new alarms::v1::AlarmLogEntry; + if (!strcmp(msgname, "UpdateNodeCollectors")) + return new nodeinstance::info::v1::UpdateNodeCollectors; + if (!strcmp(msgname, "ContextsUpdated")) + return new context::v1::ContextsUpdated; + if (!strcmp(msgname, "ContextsSnapshot")) + return new context::v1::ContextsSnapshot; + +//rx side + if (!strcmp(msgname, "CreateNodeInstanceResult")) + return new nodeinstance::create::v1::CreateNodeInstanceResult; + if (!strcmp(msgname, "SendNodeInstances")) + return new agent::v1::SendNodeInstances; + if (!strcmp(msgname, "StartAlarmStreaming")) + return new alarms::v1::StartAlarmStreaming; + if (!strcmp(msgname, "SendAlarmLogHealth")) + return new alarms::v1::SendAlarmLogHealth; + if (!strcmp(msgname, "SendAlarmConfiguration")) + return new alarms::v1::SendAlarmConfiguration; + if (!strcmp(msgname, "SendAlarmSnapshot")) + return new alarms::v1::SendAlarmSnapshot; + if (!strcmp(msgname, "DisconnectReq")) + return new agent::v1::DisconnectReq; + if (!strcmp(msgname, "ContextsCheckpoint")) + return new context::v1::ContextsCheckpoint; + if (!strcmp(msgname, "StopStreamingContexts")) + return new context::v1::StopStreamingContexts; + + return NULL; +} + +char *protomsg_to_json(const void *protobin, size_t len, const char *msgname) +{ + google::protobuf::Message *msg = msg_name_to_protomsg(msgname); + if (msg == NULL) + return strdupz("Don't know this message type by name."); + + if (!msg->ParseFromArray(protobin, len)) + return strdupz("Can't parse this message. Malformed or wrong parser used."); + + JsonPrintOptions options; + + std::string output; + google::protobuf::util::MessageToJsonString(*msg, &output, options); + delete msg; + return strdupz(output.c_str()); +} diff --git a/aclk/schema-wrappers/proto_2_json.h b/aclk/schema-wrappers/proto_2_json.h new file mode 100644 index 0000000..3bd9847 --- /dev/null +++ b/aclk/schema-wrappers/proto_2_json.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef PROTO_2_JSON_H +#define PROTO_2_JSON_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +char *protomsg_to_json(const void *protobin, size_t len, const char *msgname); + +#ifdef __cplusplus +} +#endif + +#endif /* PROTO_2_JSON_H */ diff --git a/aclk/schema-wrappers/schema_wrapper_utils.cc b/aclk/schema-wrappers/schema_wrapper_utils.cc new file mode 100644 index 0000000..6573e62 --- /dev/null +++ b/aclk/schema-wrappers/schema_wrapper_utils.cc @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "schema_wrapper_utils.h" + +void set_google_timestamp_from_timeval(struct timeval tv, google::protobuf::Timestamp *ts) +{ + ts->set_nanos(tv.tv_usec*1000); + ts->set_seconds(tv.tv_sec); +} + +void set_timeval_from_google_timestamp(const google::protobuf::Timestamp &ts, struct timeval *tv) +{ + tv->tv_sec = ts.seconds(); + tv->tv_usec = ts.nanos()/1000; +} + +int label_add_to_map_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + (void)ls; + auto map = (google::protobuf::Map *)data; + map->insert({name, value}); + return 1; +} diff --git a/aclk/schema-wrappers/schema_wrapper_utils.h b/aclk/schema-wrappers/schema_wrapper_utils.h new file mode 100644 index 0000000..2815d0f --- /dev/null +++ b/aclk/schema-wrappers/schema_wrapper_utils.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef SCHEMA_WRAPPER_UTILS_H +#define SCHEMA_WRAPPER_UTILS_H + +#include "database/rrd.h" + +#include +#include +#include + +#if GOOGLE_PROTOBUF_VERSION < 3001000 +#define PROTO_COMPAT_MSG_SIZE(msg) (size_t)msg.ByteSize(); +#define PROTO_COMPAT_MSG_SIZE_PTR(msg) (size_t)msg->ByteSize(); +#else +#define PROTO_COMPAT_MSG_SIZE(msg) msg.ByteSizeLong(); +#define PROTO_COMPAT_MSG_SIZE_PTR(msg) msg->ByteSizeLong(); +#endif + +void set_google_timestamp_from_timeval(struct timeval tv, google::protobuf::Timestamp *ts); +void set_timeval_from_google_timestamp(const google::protobuf::Timestamp &ts, struct timeval *tv); +int label_add_to_map_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data); + +#endif /* SCHEMA_WRAPPER_UTILS_H */ diff --git a/aclk/schema-wrappers/schema_wrappers.h b/aclk/schema-wrappers/schema_wrappers.h new file mode 100644 index 0000000..a96f7ea --- /dev/null +++ b/aclk/schema-wrappers/schema_wrappers.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// utility header to include all the message wrappers at once + +#ifndef SCHEMA_WRAPPERS_H +#define SCHEMA_WRAPPERS_H + +#include "connection.h" +#include "node_connection.h" +#include "node_creation.h" +#include "alarm_config.h" +#include "alarm_stream.h" +#include "node_info.h" +#include "capability.h" +#include "context_stream.h" +#include "context.h" + +#endif /* SCHEMA_WRAPPERS_H */ diff --git a/build-artifacts.sh b/build-artifacts.sh new file mode 100755 index 0000000..a759efd --- /dev/null +++ b/build-artifacts.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +BASENAME="netdata-$(git describe)" + +mkdir -p artifacts + +autoreconf -ivf +./configure \ + --prefix=/usr \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --libexecdir=/usr/libexec \ + --with-zlib \ + --with-math \ + --with-user=netdata \ + CFLAGS=-O2 +make dist +mv "${BASENAME}.tar.gz" artifacts/ + +USER="" ./packaging/makeself/build-x86_64-static.sh + +cp packaging/version artifacts/latest-version.txt + +cd artifacts || exit 1 +ln -s "${BASENAME}.tar.gz" netdata-latest.tar.gz +ln -s "${BASENAME}.gz.run" netdata-latest.gz.run +sha256sum -b ./* > "sha256sums.txt" diff --git a/build/m4/ax_c__generic.m4 b/build/m4/ax_c__generic.m4 new file mode 100644 index 0000000..0c4dd52 --- /dev/null +++ b/build/m4/ax_c__generic.m4 @@ -0,0 +1,28 @@ +# https://lists.gnu.org/archive/html/autoconf-commit/2012-12/msg00004.html +# AC_C__GENERIC +# ------------- +# Define HAVE_C__GENERIC if _Generic works, a la C11. +AN_IDENTIFIER([_Generic], [AC_C__GENERIC]) +AC_DEFUN([AC_C__GENERIC], +[AC_CACHE_CHECK([for _Generic], ac_cv_c__Generic, +[AC_COMPILE_IFELSE( + [AC_LANG_SOURCE( + [[int + main (int argc, char **argv) + { + int a = _Generic (argc, int: argc = 1); + int *b = &_Generic (argc, default: argc); + char ***c = _Generic (argv, int: argc, default: argv ? &argv : 0); + _Generic (1 ? 0 : b, int: a, default: b) = &argc; + _Generic (a = 1, default: a) = 3; + return a + !b + !c; + } + ]])], + [ac_cv_c__Generic=yes], + [ac_cv_c__Generic=no])]) +if test $ac_cv_c__Generic = yes; then + AC_DEFINE([HAVE_C__GENERIC], 1, + [Define to 1 if C11-style _Generic works.]) +fi +])# AC_C__GENERIC + diff --git a/build/m4/ax_c_lto.m4 b/build/m4/ax_c_lto.m4 new file mode 100644 index 0000000..7e6bc01 --- /dev/null +++ b/build/m4/ax_c_lto.m4 @@ -0,0 +1,21 @@ +# AC_C_LTO +# ------------- +# Define HAVE_LTO if -flto works. +AN_IDENTIFIER([lto], [AC_C_LTO]) +AC_DEFUN([AC_C_LTO], +[AC_CACHE_CHECK([if -flto builds executables], ac_cv_c_lto, +[AC_RUN_IFELSE( + [AC_LANG_SOURCE( + [[#include + int main(int argc, char **argv) { + return 0; + } + ]])], + [ac_cv_c_lto=yes], + [ac_cv_c_lto=no], + [ac_cv_c_lto=${ac_cv_c_lto_cross_compile}])]) +if test "${ac_cv_c_lto}" = "yes"; then + AC_DEFINE([HAVE_LTO], 1, + [Define to 1 if -flto works.]) +fi +])# AC_C_LTO diff --git a/build/m4/ax_c_mallinfo.m4 b/build/m4/ax_c_mallinfo.m4 new file mode 100644 index 0000000..af8d048 --- /dev/null +++ b/build/m4/ax_c_mallinfo.m4 @@ -0,0 +1,24 @@ +# AC_C_MALLINFO +# ------------- +# Define HAVE_C_MALLINFO if mallinfo() works. +AN_IDENTIFIER([mallinfo], [AC_C_MALLINFO]) +AC_DEFUN([AC_C_MALLINFO], +[AC_CACHE_CHECK([for mallinfo], ac_cv_c_mallinfo, +[AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[ + struct mallinfo mi = mallinfo(); + /* make sure that fields exists */ + mi.uordblks = 0; + mi.hblkhd = 0; + mi.arena = 0; + ]] + )], + [ac_cv_c_mallinfo=yes], + [ac_cv_c_mallinfo=no])]) +if test $ac_cv_c_mallinfo = yes; then + AC_DEFINE([HAVE_C_MALLINFO], 1, + [Define to 1 if glibc mallinfo exists.]) +fi +])# AC_C_MALLINFO diff --git a/build/m4/ax_c_mallopt.m4 b/build/m4/ax_c_mallopt.m4 new file mode 100644 index 0000000..31c4fdc --- /dev/null +++ b/build/m4/ax_c_mallopt.m4 @@ -0,0 +1,20 @@ +# AC_C_MALLOPT +# ------------- +# Define HAVE_C_MALLOPT if mallopt() works. +AN_IDENTIFIER([mallopt], [AC_C_MALLOPT]) +AC_DEFUN([AC_C_MALLOPT], +[AC_CACHE_CHECK([for mallopt], ac_cv_c_mallopt, +[AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [[#include + int main(int argc, char **argv) { + mallopt(M_ARENA_MAX, 1); + } + ]])], + [ac_cv_c_mallopt=yes], + [ac_cv_c_mallopt=no])]) +if test $ac_cv_c_mallopt = yes; then + AC_DEFINE([HAVE_C_MALLOPT], 1, + [Define to 1 if glibc mallopt exists.]) +fi +])# AC_C_MALLOPT diff --git a/build/m4/ax_c_statement_expressions.m4 b/build/m4/ax_c_statement_expressions.m4 new file mode 100644 index 0000000..fb259e7 --- /dev/null +++ b/build/m4/ax_c_statement_expressions.m4 @@ -0,0 +1,23 @@ +# AC_C_STMT_EXPR +# ------------- +# Define HAVE_STMT_EXPR if compiler has statement expressions. +AN_IDENTIFIER([_Generic], [AC_C_STMT_EXPR]) +AC_DEFUN([AC_C_STMT_EXPR], +[AC_CACHE_CHECK([for statement expressions], ac_cv_c_stmt_expr, +[AC_COMPILE_IFELSE( + [AC_LANG_SOURCE( + [[int + main (int argc, char **argv) + { + int x = ({ int y = 1; y; }); + return x; + } + ]])], + [ac_cv_c_stmt_expr=yes], + [ac_cv_c_stmt_expr=no])]) +if test $ac_cv_c_stmt_expr = yes; then + AC_DEFINE([HAVE_STMT_EXPR], 1, + [Define to 1 if compiler supports statement expressions.]) +fi +])# AC_C_STMT_EXPR + diff --git a/build/m4/ax_check_compile_flag.m4 b/build/m4/ax_check_compile_flag.m4 new file mode 100644 index 0000000..c515602 --- /dev/null +++ b/build/m4/ax_check_compile_flag.m4 @@ -0,0 +1,50 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the current language's compiler +# or gives an error. (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the current language's default +# flags (e.g. CFLAGS) when the check is done. The check is thus made with +# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to +# force the compiler to issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# SPDX-License-Identifier: GPL-3.0 + +#serial 3 + +AC_DEFUN([AX_CHECK_COMPILE_FLAG], +[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX +AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl +AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ + ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" + AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[yes])], + [AS_VAR_SET(CACHEVAR,[no])]) + _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) +AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes], + [m4_default([$2], :)], + [m4_default([$3], :)]) +AS_VAR_POPDEF([CACHEVAR])dnl +])dnl AX_CHECK_COMPILE_FLAGS diff --git a/build/m4/ax_check_enable_debug.m4 b/build/m4/ax_check_enable_debug.m4 new file mode 100644 index 0000000..db5bab2 --- /dev/null +++ b/build/m4/ax_check_enable_debug.m4 @@ -0,0 +1,122 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_check_enable_debug.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_ENABLE_DEBUG([enable by default=yes/info/profile/no], [ENABLE DEBUG VARIABLES ...], [DISABLE DEBUG VARIABLES NDEBUG ...], [IS-RELEASE]) +# +# DESCRIPTION +# +# Check for the presence of an --enable-debug option to configure, with +# the specified default value used when the option is not present. Return +# the value in the variable $ax_enable_debug. +# +# Specifying 'yes' adds '-g -O0' to the compilation flags for all +# languages. Specifying 'info' adds '-g' to the compilation flags. +# Specifying 'profile' adds '-g -pg' to the compilation flags and '-pg' to +# the linking flags. Otherwise, nothing is added. +# +# Define the variables listed in the second argument if debug is enabled, +# defaulting to no variables. Defines the variables listed in the third +# argument if debug is disabled, defaulting to NDEBUG. All lists of +# variables should be space-separated. +# +# If debug is not enabled, ensure AC_PROG_* will not add debugging flags. +# Should be invoked prior to any AC_PROG_* compiler checks. +# +# IS-RELEASE can be used to change the default to 'no' when making a +# release. Set IS-RELEASE to 'yes' or 'no' as appropriate. By default, it +# uses the value of $ax_is_release, so if you are using the AX_IS_RELEASE +# macro, there is no need to pass this parameter. +# +# AX_IS_RELEASE([git-directory]) +# AX_CHECK_ENABLE_DEBUG() +# +# LICENSE +# +# Copyright (c) 2011 Rhys Ulerich +# Copyright (c) 2014, 2015 Philip Withnall +# +# SPDX-License-Identifier: FSFAP + +#serial 5 + +AC_DEFUN([AX_CHECK_ENABLE_DEBUG],[ + AC_BEFORE([$0],[AC_PROG_CC])dnl + AC_BEFORE([$0],[AC_PROG_CXX])dnl + AC_BEFORE([$0],[AC_PROG_F77])dnl + AC_BEFORE([$0],[AC_PROG_FC])dnl + + AC_MSG_CHECKING(whether to enable debugging) + + ax_enable_debug_default=m4_tolower(m4_normalize(ifelse([$1],,[no],[$1]))) + ax_enable_debug_is_release=m4_tolower(m4_normalize(ifelse([$4],, + [$ax_is_release], + [$4]))) + + # If this is a release, override the default. + AS_IF([test "$ax_enable_debug_is_release" = "yes"], + [ax_enable_debug_default="no"]) + + m4_define(ax_enable_debug_vars,[m4_normalize(ifelse([$2],,,[$2]))]) + m4_define(ax_disable_debug_vars,[m4_normalize(ifelse([$3],,[NDEBUG],[$3]))]) + + AC_ARG_ENABLE(debug, + [AS_HELP_STRING([--enable-debug=]@<:@yes/info/profile/no@:>@,[compile with debugging])], + [],enable_debug=$ax_enable_debug_default) + + # empty mean debug yes + AS_IF([test "x$enable_debug" = "x"], + [enable_debug="yes"]) + + # case of debug + AS_CASE([$enable_debug], + [yes],[ + AC_MSG_RESULT(yes) + CFLAGS="${CFLAGS} -g -O0" + CXXFLAGS="${CXXFLAGS} -g -O0" + FFLAGS="${FFLAGS} -g -O0" + FCFLAGS="${FCFLAGS} -g -O0" + OBJCFLAGS="${OBJCFLAGS} -g -O0" + ], + [info],[ + AC_MSG_RESULT(info) + CFLAGS="${CFLAGS} -g" + CXXFLAGS="${CXXFLAGS} -g" + FFLAGS="${FFLAGS} -g" + FCFLAGS="${FCFLAGS} -g" + OBJCFLAGS="${OBJCFLAGS} -g" + ], + [profile],[ + AC_MSG_RESULT(profile) + CFLAGS="${CFLAGS} -g -pg" + CXXFLAGS="${CXXFLAGS} -g -pg" + FFLAGS="${FFLAGS} -g -pg" + FCFLAGS="${FCFLAGS} -g -pg" + OBJCFLAGS="${OBJCFLAGS} -g -pg" + LDFLAGS="${LDFLAGS} -pg" + ], + [ + AC_MSG_RESULT(no) + dnl Ensure AC_PROG_CC/CXX/F77/FC/OBJC will not enable debug flags + dnl by setting any unset environment flag variables + AS_IF([test "x${CFLAGS+set}" != "xset"], + [CFLAGS=""]) + AS_IF([test "x${CXXFLAGS+set}" != "xset"], + [CXXFLAGS=""]) + AS_IF([test "x${FFLAGS+set}" != "xset"], + [FFLAGS=""]) + AS_IF([test "x${FCFLAGS+set}" != "xset"], + [FCFLAGS=""]) + AS_IF([test "x${OBJCFLAGS+set}" != "xset"], + [OBJCFLAGS=""]) + ]) + + dnl Define various variables if debugging is disabled. + dnl assert.h is a NOP if NDEBUG is defined, so define it by default. + AS_IF([test "x$enable_debug" = "xyes"], + [m4_map_args_w(ax_enable_debug_vars, [AC_DEFINE(], [,,[Define if debugging is enabled])])], + [m4_map_args_w(ax_disable_debug_vars, [AC_DEFINE(], [,,[Define if debugging is disabled])])]) + ax_enable_debug=$enable_debug +]) diff --git a/build/m4/ax_gcc_func_attribute.m4 b/build/m4/ax_gcc_func_attribute.m4 new file mode 100644 index 0000000..fe22658 --- /dev/null +++ b/build/m4/ax_gcc_func_attribute.m4 @@ -0,0 +1,223 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE) +# +# DESCRIPTION +# +# This macro checks if the compiler supports one of GCC's function +# attributes; many other compilers also provide function attributes with +# the same syntax. Compiler warnings are used to detect supported +# attributes as unsupported ones are ignored by default so quieting +# warnings when using this macro will yield false positives. +# +# The ATTRIBUTE parameter holds the name of the attribute to be checked. +# +# If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_. +# +# The macro caches its result in the ax_cv_have_func_attribute_ +# variable. +# +# The macro currently supports the following function attributes: +# +# alias +# aligned +# alloc_size +# always_inline +# artificial +# cold +# const +# constructor +# constructor_priority for constructor attribute with priority +# deprecated +# destructor +# dllexport +# dllimport +# error +# externally_visible +# flatten +# format +# format_arg +# gnu_inline +# hot +# ifunc +# leaf +# malloc +# noclone +# noinline +# nonnull +# noreturn +# nothrow +# optimize +# pure +# unused +# used +# visibility +# warning +# warn_unused_result +# weak +# weakref +# +# Unsupported function attributes will be tested with a prototype returning +# an int and not accepting any arguments and the result of the check might +# be wrong or meaningless so use with care. +# +# LICENSE +# +# Copyright (c) 2013 Gabriele Svelto +# +# SPDX-License-Identifier: FSFAP + +#serial 4 + +AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [ + AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1]) + + AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([ + m4_case([$1], + [alias], [ + int foo( void ) { return 0; } + int bar( void ) __attribute__(($1("foo"))); + ], + [aligned], [ + int foo( void ) __attribute__(($1(32))); + ], + [alloc_size], [ + void *foo(int a) __attribute__(($1(1))); + ], + [always_inline], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [artificial], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [cold], [ + int foo( void ) __attribute__(($1)); + ], + [const], [ + int foo( void ) __attribute__(($1)); + ], + [constructor_priority], [ + int foo( void ) __attribute__((__constructor__(65535/2))); + ], + [constructor], [ + int foo( void ) __attribute__(($1)); + ], + [deprecated], [ + int foo( void ) __attribute__(($1(""))); + ], + [destructor], [ + int foo( void ) __attribute__(($1)); + ], + [dllexport], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [dllimport], [ + int foo( void ) __attribute__(($1)); + ], + [error], [ + int foo( void ) __attribute__(($1(""))); + ], + [externally_visible], [ + int foo( void ) __attribute__(($1)); + ], + [flatten], [ + int foo( void ) __attribute__(($1)); + ], + [format], [ + int foo(const char *p, ...) __attribute__(($1(printf, 1, 2))); + ], + [format_arg], [ + char *foo(const char *p) __attribute__(($1(1))); + ], + [gnu_inline], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [hot], [ + int foo( void ) __attribute__(($1)); + ], + [ifunc], [ + int my_foo( void ) { return 0; } + static int (*resolve_foo(void))(void) { return my_foo; } + int foo( void ) __attribute__(($1("resolve_foo"))); + ], + [leaf], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [malloc], [ + void *foo( void ) __attribute__(($1)); + ], + [noclone], [ + int foo( void ) __attribute__(($1)); + ], + [noinline], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [nonnull], [ + int foo(char *p) __attribute__(($1(1))); + ], + [noreturn], [ + void foo( void ) __attribute__(($1)); + ], + [nothrow], [ + int foo( void ) __attribute__(($1)); + ], + [optimize], [ + __attribute__(($1(3))) int foo( void ) { return 0; } + ], + [pure], [ + int foo( void ) __attribute__(($1)); + ], + [returns_nonnull], [ + void *foo( void ) __attribute__(($1)); + ], + [unused], [ + int foo( void ) __attribute__(($1)); + ], + [used], [ + int foo( void ) __attribute__(($1)); + ], + [visibility], [ + int foo_def( void ) __attribute__(($1("default"))); + int foo_hid( void ) __attribute__(($1("hidden"))); + int foo_int( void ) __attribute__(($1("internal"))); + int foo_pro( void ) __attribute__(($1("protected"))); + ], + [warning], [ + int foo( void ) __attribute__(($1(""))); + ], + [warn_unused_result], [ + int foo( void ) __attribute__(($1)); + ], + [weak], [ + int foo( void ) __attribute__(($1)); + ], + [weakref], [ + static int foo( void ) { return 0; } + static int bar( void ) __attribute__(($1("foo"))); + ], + [ + m4_warn([syntax], [Unsupported attribute $1, the test may fail]) + int foo( void ) __attribute__(($1)); + ] + )], []) + ], + dnl GCC doesn't exit with an error if an unknown attribute is + dnl provided but only outputs a warning, so accept the attribute + dnl only if no warning were issued. + [AS_IF([test -s conftest.err], + [AS_VAR_SET([ac_var], [no])], + [AS_VAR_SET([ac_var], [yes])])], + [AS_VAR_SET([ac_var], [no])]) + ]) + + AS_IF([test yes = AS_VAR_GET([ac_var])], + [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1, + [Define to 1 if the system has the `$1' function attribute])], []) + + AS_VAR_POPDEF([ac_var]) +]) diff --git a/build/m4/ax_pthread.m4 b/build/m4/ax_pthread.m4 new file mode 100644 index 0000000..9f35d13 --- /dev/null +++ b/build/m4/ax_pthread.m4 @@ -0,0 +1,522 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_pthread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. It +# sets the PTHREAD_LIBS output variable to the threads library and linker +# flags, and the PTHREAD_CFLAGS output variable to any special C compiler +# flags that are needed. (The user can also force certain compiler +# flags/libs to be tested by setting these environment variables.) +# +# Also sets PTHREAD_CC and PTHREAD_CXX to any special C compiler that is +# needed for multi-threaded programs (defaults to the value of CC +# respectively CXX otherwise). (This is necessary on e.g. AIX to use the +# special cc_r/CC_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also to link with them as well. For example, you might link with +# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# $PTHREAD_CXX $CXXFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# +# If you are only building threaded programs, you may wish to use these +# variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# CXX="$PTHREAD_CXX" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant +# has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to +# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the +# PTHREAD_PRIO_INHERIT symbol is defined when compiling with +# PTHREAD_CFLAGS. +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads library +# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it +# is not found. If ACTION-IF-FOUND is not specified, the default action +# will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or if +# you have any other suggestions or comments. This macro was based on work +# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help +# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by +# Alejandro Forero Cuervo to the autoconf macro repository. We are also +# grateful for the helpful feedback of numerous users. +# +# Updated for Autoconf 2.68 by Daniel Richard G. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2011 Daniel Richard G. +# Copyright (c) 2019 Marc Stevens +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 31 + +AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) +AC_DEFUN([AX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_REQUIRE([AC_PROG_CC]) +AC_REQUIRE([AC_PROG_SED]) +AC_LANG_PUSH([C]) +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on Tru64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then + ax_pthread_save_CC="$CC" + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) + AS_IF([test "x$PTHREAD_CXX" != "x"], [CXX="$PTHREAD_CXX"]) + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) + AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) + AC_MSG_RESULT([$ax_pthread_ok]) + if test "x$ax_pthread_ok" = "xno"; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + CC="$ax_pthread_save_CC" + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items with a "," contain both +# C compiler flags (before ",") and linker flags (after ","). Other items +# starting with a "-" are C compiler flags, and remaining items are +# library names, except for "none" which indicates that we try without +# any flags at all, and "pthread-config" which is a program returning +# the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 +# (Note: HP C rejects this with "bad form for `-t' option") +# -pthreads: Solaris/gcc (Note: HP C also rejects) +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads and +# -D_REENTRANT too), HP C (must be checked before -lpthread, which +# is present but should not be used directly; and before -mthreads, +# because the compiler interprets this as "-mt" + "-hreads") +# -mthreads: Mingw32/gcc, Lynx/gcc +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case $host_os in + + freebsd*) + + # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) + # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) + + ax_pthread_flags="-kthread lthread $ax_pthread_flags" + ;; + + hpux*) + + # From the cc(1) man page: "[-mt] Sets various -D flags to enable + # multi-threading and also sets -lpthread." + + ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" + ;; + + openedition*) + + # IBM z/OS requires a feature-test macro to be defined in order to + # enable POSIX threads at all, so give the user a hint if this is + # not set. (We don't define these ourselves, as they can affect + # other portions of the system API in unpredictable ways.) + + AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], + [ +# if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) + AX_PTHREAD_ZOS_MISSING +# endif + ], + [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) + ;; + + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (N.B.: The stubs are missing + # pthread_cleanup_push, or rather a function called by this macro, + # so we could check for that, but who knows whether they'll stub + # that too in a future libc.) So we'll check first for the + # standard Solaris way of linking pthreads (-mt -lpthread). + + ax_pthread_flags="-mt,-lpthread pthread $ax_pthread_flags" + ;; +esac + +# Are we compiling with Clang? + +AC_CACHE_CHECK([whether $CC is Clang], + [ax_cv_PTHREAD_CLANG], + [ax_cv_PTHREAD_CLANG=no + # Note that Autoconf sets GCC=yes for Clang as well as GCC + if test "x$GCC" = "xyes"; then + AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], + [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ +# if defined(__clang__) && defined(__llvm__) + AX_PTHREAD_CC_IS_CLANG +# endif + ], + [ax_cv_PTHREAD_CLANG=yes]) + fi + ]) +ax_pthread_clang="$ax_cv_PTHREAD_CLANG" + + +# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) + +# Note that for GCC and Clang -pthread generally implies -lpthread, +# except when -nostdlib is passed. +# This is problematic using libtool to build C++ shared libraries with pthread: +# [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25460 +# [2] https://bugzilla.redhat.com/show_bug.cgi?id=661333 +# [3] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=468555 +# To solve this, first try -pthread together with -lpthread for GCC + +AS_IF([test "x$GCC" = "xyes"], + [ax_pthread_flags="-pthread,-lpthread -pthread -pthreads $ax_pthread_flags"]) + +# Clang takes -pthread (never supported any other flag), but we'll try with -lpthread first + +AS_IF([test "x$ax_pthread_clang" = "xyes"], + [ax_pthread_flags="-pthread,-lpthread -pthread"]) + + +# The presence of a feature test macro requesting re-entrant function +# definitions is, on some systems, a strong hint that pthreads support is +# correctly enabled + +case $host_os in + darwin* | hpux* | linux* | osf* | solaris*) + ax_pthread_check_macro="_REENTRANT" + ;; + + aix*) + ax_pthread_check_macro="_THREAD_SAFE" + ;; + + *) + ax_pthread_check_macro="--" + ;; +esac +AS_IF([test "x$ax_pthread_check_macro" = "x--"], + [ax_pthread_check_cond=0], + [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) + + +if test "x$ax_pthread_ok" = "xno"; then +for ax_pthread_try_flag in $ax_pthread_flags; do + + case $ax_pthread_try_flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + *,*) + PTHREAD_CFLAGS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\1/"` + PTHREAD_LIBS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\2/"` + AC_MSG_CHECKING([whether pthreads work with "$PTHREAD_CFLAGS" and "$PTHREAD_LIBS"]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) + PTHREAD_CFLAGS="$ax_pthread_try_flag" + ;; + + pthread-config) + AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) + AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) + PTHREAD_LIBS="-l$ax_pthread_try_flag" + ;; + esac + + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include +# if $ax_pthread_check_cond +# error "$ax_pthread_check_macro must be defined" +# endif + static void *some_global = NULL; + static void routine(void *a) + { + /* To avoid any unused-parameter or + unused-but-set-parameter warning. */ + some_global = a; + } + static void *start_routine(void *a) { return a; }], + [pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */])], + [ax_pthread_ok=yes], + []) + + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" + + AC_MSG_RESULT([$ax_pthread_ok]) + AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + + +# Clang needs special handling, because older versions handle the -pthread +# option in a rather... idiosyncratic way + +if test "x$ax_pthread_clang" = "xyes"; then + + # Clang takes -pthread; it has never supported any other flag + + # (Note 1: This will need to be revisited if a system that Clang + # supports has POSIX threads in a separate library. This tends not + # to be the way of modern systems, but it's conceivable.) + + # (Note 2: On some systems, notably Darwin, -pthread is not needed + # to get POSIX threads support; the API is always present and + # active. We could reasonably leave PTHREAD_CFLAGS empty. But + # -pthread does define _REENTRANT, and while the Darwin headers + # ignore this macro, third-party headers might not.) + + # However, older versions of Clang make a point of warning the user + # that, in an invocation where only linking and no compilation is + # taking place, the -pthread option has no effect ("argument unused + # during compilation"). They expect -pthread to be passed in only + # when source code is being compiled. + # + # Problem is, this is at odds with the way Automake and most other + # C build frameworks function, which is that the same flags used in + # compilation (CFLAGS) are also used in linking. Many systems + # supported by AX_PTHREAD require exactly this for POSIX threads + # support, and in fact it is often not straightforward to specify a + # flag that is used only in the compilation phase and not in + # linking. Such a scenario is extremely rare in practice. + # + # Even though use of the -pthread flag in linking would only print + # a warning, this can be a nuisance for well-run software projects + # that build with -Werror. So if the active version of Clang has + # this misfeature, we search for an option to squash it. + + AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], + [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], + [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown + # Create an alternate version of $ac_link that compiles and + # links in two steps (.c -> .o, .o -> exe) instead of one + # (.c -> exe), because the warning occurs only in the second + # step + ax_pthread_save_ac_link="$ac_link" + ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' + ax_pthread_link_step=`AS_ECHO(["$ac_link"]) | sed "$ax_pthread_sed"` + ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" + ax_pthread_save_CFLAGS="$CFLAGS" + for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do + AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) + CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" + ac_link="$ax_pthread_save_ac_link" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], + [ac_link="$ax_pthread_2step_ac_link" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], + [break]) + ]) + done + ac_link="$ax_pthread_save_ac_link" + CFLAGS="$ax_pthread_save_CFLAGS" + AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) + ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" + ]) + + case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in + no | unknown) ;; + *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; + esac + +fi # $ax_pthread_clang = yes + + + +# Various other checks: +if test "x$ax_pthread_ok" = "xyes"; then + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_CACHE_CHECK([for joinable pthread attribute], + [ax_cv_PTHREAD_JOINABLE_ATTR], + [ax_cv_PTHREAD_JOINABLE_ATTR=unknown + for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [int attr = $ax_pthread_attr; return attr /* ; */])], + [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], + []) + done + ]) + AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ + test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ + test "x$ax_pthread_joinable_attr_defined" != "xyes"], + [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], + [$ax_cv_PTHREAD_JOINABLE_ATTR], + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + ax_pthread_joinable_attr_defined=yes + ]) + + AC_CACHE_CHECK([whether more special flags are required for pthreads], + [ax_cv_PTHREAD_SPECIAL_FLAGS], + [ax_cv_PTHREAD_SPECIAL_FLAGS=no + case $host_os in + solaris*) + ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" + ;; + esac + ]) + AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ + test "x$ax_pthread_special_flags_added" != "xyes"], + [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" + ax_pthread_special_flags_added=yes]) + + AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], + [ax_cv_PTHREAD_PRIO_INHERIT], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], + [[int i = PTHREAD_PRIO_INHERIT; + return i;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) + ]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ + test "x$ax_pthread_prio_inherit_defined" != "xyes"], + [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) + ax_pthread_prio_inherit_defined=yes + ]) + + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" + + # More AIX lossage: compile with *_r variant + if test "x$GCC" != "xyes"; then + case $host_os in + aix*) + AS_CASE(["x/$CC"], + [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], + [#handle absolute path differently from PATH based program lookup + AS_CASE(["x$CC"], + [x/*], + [ + AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"]) + AS_IF([test "x${CXX}" != "x"], [AS_IF([AS_EXECUTABLE_P([${CXX}_r])],[PTHREAD_CXX="${CXX}_r"])]) + ], + [ + AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC]) + AS_IF([test "x${CXX}" != "x"], [AC_CHECK_PROGS([PTHREAD_CXX],[${CXX}_r],[$CXX])]) + ] + ) + ]) + ;; + esac + fi +fi + +test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" +test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX" + +AC_SUBST([PTHREAD_LIBS]) +AC_SUBST([PTHREAD_CFLAGS]) +AC_SUBST([PTHREAD_CC]) +AC_SUBST([PTHREAD_CXX]) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test "x$ax_pthread_ok" = "xyes"; then + ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) + : +else + ax_pthread_ok=no + $2 +fi +AC_LANG_POP +])dnl AX_PTHREAD diff --git a/build/m4/jemalloc.m4 b/build/m4/jemalloc.m4 new file mode 100644 index 0000000..c2008a8 --- /dev/null +++ b/build/m4/jemalloc.m4 @@ -0,0 +1,75 @@ +dnl -------------------------------------------------------- -*- autoconf -*- +dnl SPDX-License-Identifier: Apache-2.0 + +dnl +dnl jemalloc.m4: Trafficserver's jemalloc autoconf macros +dnl modified to skip other TS_ helpers +dnl + +AC_DEFUN([TS_CHECK_JEMALLOC], [ +AC_ARG_WITH([jemalloc-prefix], + [AS_HELP_STRING([--with-jemalloc-prefix=PREFIX],[Specify the jemalloc prefix [default=""]])], + [ + jemalloc_prefix="$withval" + ],[ + if test "`uname -s`" = "Darwin"; then + jemalloc_prefix="je_" + else + jemalloc_prefix="" + fi + ] +) +AC_DEFINE_UNQUOTED([prefix_jemalloc], [${jemalloc_prefix}], [jemalloc prefix]) + +enable_jemalloc=no +AC_ARG_WITH([jemalloc], [AS_HELP_STRING([--with-jemalloc=DIR], [use a specific jemalloc library])], +[ + if test "$withval" != "no"; then + if test "x${enable_tcmalloc}" = "xyes"; then + AC_MSG_ERROR([Cannot compile with both jemalloc and tcmalloc]) + fi + enable_jemalloc=yes + jemalloc_base_dir="$withval" + case "$withval" in + yes) + jemalloc_base_dir="/usr" + AC_MSG_CHECKING(checking for jemalloc includes standard directories) + ;; + *":"*) + jemalloc_include="`echo $withval |sed -e 's/:.*$//'`" + jemalloc_ldflags="`echo $withval |sed -e 's/^.*://'`" + AC_MSG_CHECKING(checking for jemalloc includes in $jemalloc_include libs in $jemalloc_ldflags) + ;; + *) + jemalloc_include="$withval/include" + jemalloc_ldflags="$withval/lib" + AC_MSG_CHECKING(checking for jemalloc includes in $withval) + ;; + esac + fi +]) + +has_jemalloc=0 +if test "$enable_jemalloc" != "no"; then + jemalloc_have_headers=0 + jemalloc_have_libs=0 + if test "$jemalloc_base_dir" != "/usr"; then + CFLAGS="${CFLAGS} -I${jemalloc_include}" + LDFLAGS="${LDFLAGS} -L${jemalloc_ldflags}" + LIBTOOL_LINK_FLAGS="${LIBTOOL_LINK_FLAGS} -R${jemalloc_ldflags}" + fi + func="${jemalloc_prefix}malloc_stats_print" + AC_CHECK_LIB(jemalloc, ${func}, [jemalloc_have_libs=1]) + if test "$jemalloc_have_libs" != "0"; then + AC_CHECK_HEADERS([jemalloc/jemalloc.h], [jemalloc_have_headers=1]) + fi + if test "$jemalloc_have_headers" != "0"; then + has_jemalloc=1 + LIBS="${LIBS} -ljemalloc" + AC_DEFINE(has_jemalloc, [1], [Link/compile against jemalloc]) + else + AC_MSG_ERROR([Couldn't find a jemalloc installation]) + fi +fi +AC_SUBST(has_jemalloc) +]) diff --git a/build/m4/tcmalloc.m4 b/build/m4/tcmalloc.m4 new file mode 100644 index 0000000..765d2ac --- /dev/null +++ b/build/m4/tcmalloc.m4 @@ -0,0 +1,45 @@ +dnl -------------------------------------------------------- -*- autoconf -*- +dnl SPDX-License-Identifier: Apache-2.0 + +dnl +dnl tcmalloc.m4: Trafficserver's tcmalloc autoconf macros +dnl modified to skip other TS_ helpers +dnl + +dnl This is kinda fugly, but need a way to both specify a directory and which +dnl of the many tcmalloc libraries to use ... +AC_DEFUN([TS_CHECK_TCMALLOC], [ +AC_ARG_WITH([tcmalloc-lib], + [AS_HELP_STRING([--with-tcmalloc-lib],[specify the tcmalloc library to use [default=tcmalloc]])], + [ + with_tcmalloc_lib="$withval" + ],[ + with_tcmalloc_lib="tcmalloc" + ] +) + +has_tcmalloc=0 +AC_ARG_WITH([tcmalloc], [AS_HELP_STRING([--with-tcmalloc=DIR], [use the tcmalloc library])], +[ + if test "$withval" != "no"; then + if test "x${enable_jemalloc}" = "xyes"; then + AC_MSG_ERROR([Cannot compile with both tcmalloc and jemalloc]) + fi + tcmalloc_have_lib=0 + if test "x$withval" != "xyes" && test "x$withval" != "x"; then + tcmalloc_ldflags="$withval/lib" + LDFLAGS="${LDFLAGS} -L${tcmalloc_ldflags}" + LIBTOOL_LINK_FLAGS="${LIBTOOL_LINK_FLAGS} -rpath ${tcmalloc_ldflags}" + fi + AC_CHECK_LIB(${with_tcmalloc_lib}, tc_cfree, [tcmalloc_have_lib=1]) + if test "$tcmalloc_have_lib" != "0"; then + LIBS="${LIBS} -l${with_tcmalloc_lib}" + has_tcmalloc=1 + AC_DEFINE(has_tcmalloc, [1], [Link/compile against tcmalloc]) + else + AC_MSG_ERROR([Couldn't find a tcmalloc installation]) + fi + fi +]) +AC_SUBST(has_tcmalloc) +]) diff --git a/build/subst.inc b/build/subst.inc new file mode 100644 index 0000000..3c97211 --- /dev/null +++ b/build/subst.inc @@ -0,0 +1,22 @@ +.in: + if sed \ + -e 's#[@]localstatedir_POST@#$(localstatedir)#g' \ + -e 's#[@]sbindir_POST@#$(sbindir)#g' \ + -e 's#[@]pluginsdir_POST@#$(pluginsdir)#g' \ + -e 's#[@]configdir_POST@#$(configdir)#g' \ + -e 's#[@]libconfigdir_POST@#$(libconfigdir)#g' \ + -e 's#[@]pkglibexecdir_POST@#$(pkglibexecdir)#g' \ + -e 's#[@]cachedir_POST@#$(cachedir)#g' \ + -e 's#[@]registrydir_POST@#$(registrydir)#g' \ + -e 's#[@]varlibdir_POST@#$(varlibdir)#g' \ + -e 's#[@]webdir_POST@#$(webdir)#g' \ + -e 's#[@]libsysdir_POST@#$(libsysdir)#g' \ + -e 's#[@]enable_aclk_POST@#$(enable_aclk)#g' \ + -e 's#[@]enable_cloud_POST@#$(enable_cloud)#g' \ + -e 's#[@]netdata_user_POST@#$(netdata_user)#g' \ + $< > $@.tmp; then \ + mv "$@.tmp" "$@"; \ + else \ + rm -f "$@.tmp"; \ + false; \ + fi diff --git a/build_external/README.md b/build_external/README.md new file mode 100644 index 0000000..d68e4d0 --- /dev/null +++ b/build_external/README.md @@ -0,0 +1,128 @@ + + +# External build-system + +This wraps the build-system in Docker so that the host system and the target system are +decoupled. This allows: + +- Cross-compilation (e.g. linux development from macOS) +- Cross-distro (e.g. using CentOS user-land while developing on Debian) +- Multi-host scenarios (e.g. parent-child configurations) +- Bleeding-edge scenarios (e.g. using the ACLK (**currently for internal-use only**)) + +The advantage of these scenarios is that they allow **reproducible** builds and testing +for developers. This is the first iteration of the build-system to allow the team to use +it and get used to it. + +For configurations that involve building and running the agent alone, we still use +`docker-compose` for consistency with more complex configurations. The more complex +configurations allow the agent to be run in conjunction with parts of the cloud +infrastructure (these parts of the code are not public), or with external brokers +(such as VerneMQ for MQTT), or with other external tools (such as TSDB to allow the agent to +export metrics). Note: no external TSDB scenarios are available in the first iteration, +they will be added in subsequent iterations. + +This differs from the packaging dockerfiles as it designed to be used for local development. +The main difference is that these files are designed to support incremental compilation in +the following way: + +1. The initial build should be performed using `bin/clean-install.sh` to create a docker + image with the agent built from the source tree and installed into standard system paths + using `netdata-installer.sh`. In addition to the steps performed by the standard packaging + builds a manifest is created to allow subsequent builds to be made incrementally using + `make` inside the container. Any libraries that are required for 'bleeding-edge' development + are added on top of the standard install. +2. When the `bin/make-install.sh` script is used the docker container will be updated with + a sanitized version of the current build-tree. The manifest will be used to line up the + state of the incoming docker cache with `make`'s view of the file-system according to the + manifest. This means the `make install` inside the container will only rebuild changes + since the last time the disk image was created. + +The exact improvement on the compile-cycle depends on the speed of the network connection +to pull the netdata dependencies, but should shrink the time considerably. For example, +on a macbook pro the initial install takes about 1min + network delay [Note: there is +something bad happening with the standard installer at the end of the container build as +it tries to kill the running agent - this is very slow and bad] and the incremental +step only takes 15s. On a debian host with a fast network this reduces 1m30 -> 13s. + +## Examples + +1. Simple cross-compilation / cross-distro builds. + +```bash +build_external/bin/clean-install.sh arch current +docker run -it --rm arch_current_dev +echo >>daemon/main.c # Simulate edit by touching file +build_external/bin/make-install.sh arch current +docker run -it --rm arch_current_dev +``` + +Currently there is no detection of when the installer needs to be rerun (really this is +when the `autoreconf` / `configure` step must be rerun). Netdata was not written with +multi-stage builds in mind and we need to work out how to do this in the future. For now +it is up to you to know when you need to rerun the clean build step. + +```bash +build_external/bin/clean-install.sh arch current +build_external/bin/clean-install.sh ubuntu 19.10 +docker run -it --rm arch_current_dev +echo >>daemon/main.c # Simulate edit by touching file +build_external/bin/make-install.sh arch current +docker run -it --rm arch_current_dev +echo >>daemon/daemon.c # Simulate second edit step +build_external/bin/make-install.sh arch current # Observe a single file is rebuilt +build_external/bin/make-install.sh arch current # Observe both files are rebuilt +``` + +The state of the build in the two containers is independent. + +2. Single agent config in docker-compose + +This functions the same as the previous example but is wrapped in docker-compose to +allow injection into more complex test configurations. + +```bash +Distro=debian Version=10 docker-compose -f projects/only-agent/docker-compose.yml up +``` + +Note: it is possible to run multiple copies of the agent using the `--scale` option for +`docker-compose up`. + +```bash +Distro=debian Version=10 docker-compose -f projects/only-agent/docker-compose.yml up --scale agent=3 +``` + +3. A simple parent-child scenario + +```bash +# Need to call clean-install on the configs used in the parent-child containers +docker-compose -f parent-child/docker-compose.yml up --scale agent_child1=2 +``` + +Note: this is not production ready yet, but it is left in so that we can see how it behaves +and improve it. Currently it produces the following problems: + * Only the base-configuration in the compose without scaling works. + * The containers are hard-coded in the compose. + * There is no way to separate the agent configurations, so running multiple agent child nodes with the same GUID kills + the parent which exits with a fatal condition. + +4. The ACLK + +This is for internal use only as it requires access to a private repo. Clone the vernemq-docker +repo and follow the instructions within to build an image called `vernemq`. + +```bash +build_external/bin/clean-install.sh arch current # Only needed first time +docker-compose -f build_external/projects/aclk-testing/vernemq-compose.yml -f build_external/projects/aclk-testing/agent-compose.yml up --build +``` + +Notes: +* We are currently limited to arch because of restrictions on libwebsockets +* There is not yet a good way to configure the target agent container from the docker-compose command line. +* Several other containers should be in this compose (a paho client, tshark etc). + + diff --git a/build_external/bin/clean-install.sh b/build_external/bin/clean-install.sh new file mode 100755 index 0000000..78d1f32 --- /dev/null +++ b/build_external/bin/clean-install.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +DISTRO="$1" +VERSION="$2" +BuildBase="$(cd "$(dirname "$0")" && cd .. && pwd)" + +# This is temporary - not all of the package-builder images from the helper-images repo +# are available on Docker Hub. When everything falls under the "happy case" below this +# can be deleted in a future iteration. This is written in a weird way for portability, +# can't rely on bash 4.0+ to allow case fall-through with ;& + +if cat < not safe if tree state changed on host since last config +# Kill everything that is not in .gitignore preserving any fresh changes, i.e. untracked changes will be +# deleted but local changes to tracked files will be preserved. +RUN if git status --porcelain | grep '^[MADRC]'; then \ + git stash && git clean -dxf && (git stash apply || true) \ + else \ + git clean -dxf ; \ + fi + +# Not everybody is updating distclean properly - fix. +RUN find . -name '*.Po' -exec rm \{\} \; +RUN rm -rf autom4te.cache +RUN rm -rf .git/ +RUN find . -type f >/opt/netdata/manifest + +RUN CFLAGS="-Og -g -ggdb -Wall -Wextra -Wformat-signedness -fstack-protector-all -DNETDATA_INTERNAL_CHECKS=1\ + -D_FORTIFY_SOURCE=2 -DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --require-cloud --disable-lto + +RUN ln -sf /dev/stdout /var/log/netdata/access.log +RUN ln -sf /dev/stdout /var/log/netdata/debug.log +RUN ln -sf /dev/stderr /var/log/netdata/error.log + +RUN printf >/opt/netdata/source/gdb_batch '\ +set args -D \n\ +handle SIG32 nostop \n\ +run \n\ +bt' + +#CMD ["/usr/sbin/valgrind", "--leak-check=full", "/usr/sbin/netdata", "-D"] +CMD ["/usr/bin/gdb", "-x", "/opt/netdata/source/gdb_batch", "/usr/sbin/netdata"] diff --git a/build_external/clean-install-arch-extras.Dockerfile b/build_external/clean-install-arch-extras.Dockerfile new file mode 100644 index 0000000..1d18f7a --- /dev/null +++ b/build_external/clean-install-arch-extras.Dockerfile @@ -0,0 +1,58 @@ +FROM archlinux/base:latest + +# There is some redundancy between this file and the archlinux Dockerfile in the helper images +# repo and also with the clean-install.Dockerfile. Once the help image is available on Docker +# Hub this file can be deleted. +RUN echo sdlsjdkls +RUN pacman -Syyu --noconfirm +RUN pacman --noconfirm --needed -S autoconf \ + autoconf-archive \ + autogen \ + automake \ + gcc \ + make \ + git \ + libuv \ + lz4 \ + netcat \ + openssl \ + pkgconfig \ + python \ + libvirt \ + cmake \ + valgrind \ + gdb + +ARG EXTRA_CFLAGS +COPY . /opt/netdata/source +WORKDIR /opt/netdata/source + +RUN git config --global user.email "root@container" +RUN git config --global user.name "Fake root" + +# RUN make distclean -> not safe if tree state changed on host since last config +# Kill everything that is not in .gitignore preserving any fresh changes, i.e. untracked changes will be +# deleted but local changes to tracked files will be preserved. +RUN if git status --porcelain | grep '^[MADRC]'; then \ + git stash && git clean -dxf && (git stash apply || true) \ + else \ + git clean -dxf ; \ + fi + +# Not everybody is updating distclean properly - fix. +RUN find . -name '*.Po' -exec rm \{\} \; +RUN rm -rf autom4te.cache +RUN rm -rf .git/ +RUN find . -type f >/opt/netdata/manifest + +RUN CFLAGS="-Og -g -ggdb -Wall -Wextra -Wformat-signedness -fstack-protector-all -DNETDATA_INTERNAL_CHECKS=1\ + -D_FORTIFY_SOURCE=2 -DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --require-cloud --disable-lto + +RUN ln -sf /dev/stdout /var/log/netdata/access.log +RUN ln -sf /dev/stdout /var/log/netdata/debug.log +RUN ln -sf /dev/stderr /var/log/netdata/error.log + +RUN rm /var/lib/netdata/registry/netdata.public.unique.id + +CMD ["/usr/sbin/valgrind", "--leak-check=full", "/usr/sbin/netdata", "-D"] + diff --git a/build_external/clean-install-arch.Dockerfile b/build_external/clean-install-arch.Dockerfile new file mode 100644 index 0000000..92bd2c6 --- /dev/null +++ b/build_external/clean-install-arch.Dockerfile @@ -0,0 +1,54 @@ +FROM archlinux/base:latest + +# There is some redundancy between this file and the archlinux Dockerfile in the helper images +# repo and also with the clean-install.Dockerfile. Once the help image is available on Docker +# Hub this file can be deleted. + +RUN pacman -Sy +RUN pacman --noconfirm --needed -S autoconf \ + autoconf-archive \ + autogen \ + automake \ + gcc \ + make \ + git \ + libuv \ + lz4 \ + netcat \ + openssl \ + pkgconfig \ + python \ + libvirt \ + cmake + +ARG ACLK=no +ARG EXTRA_CFLAGS +COPY . /opt/netdata/source +WORKDIR /opt/netdata/source + +RUN git config --global user.email "root@container" +RUN git config --global user.name "Fake root" + +# RUN make distclean -> not safe if tree state changed on host since last config +# Kill everything that is not in .gitignore preserving any fresh changes, i.e. untracked changes will be +# deleted but local changes to tracked files will be preserved. +RUN if git status --porcelain | grep '^[MADRC]'; then \ + git stash && git clean -dxf && (git stash apply || true) \ + else \ + git clean -dxf ; \ + fi + +# Not everybody is updating distclean properly - fix. +RUN find . -name '*.Po' -exec rm \{\} \; +RUN rm -rf autom4te.cache +RUN rm -rf .git/ +RUN find . -type f >/opt/netdata/manifest + +RUN CFLAGS="-O1 -ggdb -Wall -Wextra -Wformat-signedness -fstack-protector-all -DNETDATA_INTERNAL_CHECKS=1\ + -D_FORTIFY_SOURCE=2 -DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --disable-lto + +RUN ln -sf /dev/stdout /var/log/netdata/access.log +RUN ln -sf /dev/stdout /var/log/netdata/debug.log +RUN ln -sf /dev/stderr /var/log/netdata/error.log + +CMD ["/usr/sbin/netdata", "-D"] diff --git a/build_external/clean-install.Dockerfile b/build_external/clean-install.Dockerfile new file mode 100644 index 0000000..18586e8 --- /dev/null +++ b/build_external/clean-install.Dockerfile @@ -0,0 +1,39 @@ +ARG DISTRO=arch +ARG VERSION=current +FROM netdata/package-builders:${DISTRO}${VERSION} + +ARG ACLK=no +ARG EXTRA_CFLAGS + +COPY . /opt/netdata/source +WORKDIR /opt/netdata/source + +RUN git config --global user.email "root@container" +RUN git config --global user.name "Fake root" + +# RUN make distclean -> not safe if tree state changed on host since last config +# Kill everything that is not in .gitignore preserving any fresh changes, i.e. untracked changes will be +# deleted but local changes to tracked files will be preserved. +RUN if git status --porcelain | grep '^[MADRC]'; then \ + git stash && git clean -dxf && (git stash apply || true) \ + else \ + git clean -dxf ; \ + fi + +# Not everybody is updating distclean properly - fix. +RUN find . -name '*.Po' -exec rm \{\} \; +RUN rm -rf autom4te.cache +RUN rm -rf .git/ +RUN find . -type f >/opt/netdata/manifest + +RUN CFLAGS="-O1 -ggdb -Wall -Wextra -Wformat-signedness -fstack-protector-all -DNETDATA_INTERNAL_CHECKS=1\ + -D_FORTIFY_SOURCE=2 -DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --disable-lto + +RUN ln -sf /dev/stdout /var/log/netdata/access.log +RUN ln -sf /dev/stdout /var/log/netdata/debug.log +RUN ln -sf /dev/stderr /var/log/netdata/error.log + +RUN rm /var/lib/netdata/registry/netdata.public.unique.id + +CMD ["/usr/sbin/netdata","-D"] +ENTRYPOINT [] diff --git a/build_external/make-install.Dockerfile b/build_external/make-install.Dockerfile new file mode 100644 index 0000000..1341b58 --- /dev/null +++ b/build_external/make-install.Dockerfile @@ -0,0 +1,11 @@ +ARG DISTRO=arch +ARG VERSION=current + +FROM ${DISTRO}_${VERSION}_dev:latest + +# Sanitize new source tree by removing config-time state +COPY . /opt/netdata/latest +WORKDIR /opt/netdata/latest +RUN while read -r f; do cp -p "$f" "../source/$f"; done <../manifest +WORKDIR /opt/netdata/source +RUN make install diff --git a/build_external/scenarios/aclk-testing/agent-compose.yml b/build_external/scenarios/aclk-testing/agent-compose.yml new file mode 100644 index 0000000..5f0f19a --- /dev/null +++ b/build_external/scenarios/aclk-testing/agent-compose.yml @@ -0,0 +1,18 @@ +version: '3.3' +services: + agent_parent: + build: + context: ../../.. + dockerfile: build_external/make-install.Dockerfile + args: + - DISTRO=arch + - VERSION=current + image: arch_current_dev:latest + command: > + sh -c "echo -n 00000000-0000-0000-0000-000000000000 >/var/lib/netdata/cloud.d/claimed_id && + echo '[agent_cloud_link]' >>/etc/netdata/netdata.conf && + echo ' agent cloud link hostname = vernemq' >>/etc/netdata/netdata.conf && + echo ' agent cloud link port = 9002' >>/etc/netdata/netdata.conf && + /usr/sbin/netdata -D" + ports: + - 20000:19999 diff --git a/build_external/scenarios/aclk-testing/agent-valgrind-compose.yml b/build_external/scenarios/aclk-testing/agent-valgrind-compose.yml new file mode 100644 index 0000000..3173e81 --- /dev/null +++ b/build_external/scenarios/aclk-testing/agent-valgrind-compose.yml @@ -0,0 +1,18 @@ +version: '3.3' +services: + agent_parent: + build: + context: ../../.. + dockerfile: build_external/make-install.Dockerfile + args: + - DISTRO=arch + - VERSION=extras + image: arch_extras_dev:latest + command: > + sh -c "echo -n 00000000-0000-0000-0000-000000000000 >/var/lib/netdata/cloud.d/claimed_id && + echo '[agent_cloud_link]' >>/etc/netdata/netdata.conf && + echo ' agent cloud link hostname = vernemq' >>/etc/netdata/netdata.conf && + echo ' agent cloud link port = 9002' >>/etc/netdata/netdata.conf && + /usr/sbin/valgrind --leak-check=full /usr/sbin/netdata -D -W debug_flags=0x200000000" + ports: + - 20000:19999 diff --git a/build_external/scenarios/aclk-testing/agent_netdata.conf b/build_external/scenarios/aclk-testing/agent_netdata.conf new file mode 100644 index 0000000..0d3627d --- /dev/null +++ b/build_external/scenarios/aclk-testing/agent_netdata.conf @@ -0,0 +1,7115 @@ +# netdata configuration +# +# You can download the latest version of this file, using: +# +# wget -O /etc/netdata/netdata.conf http://localhost:19999/netdata.conf +# or +# curl -o /etc/netdata/netdata.conf http://localhost:19999/netdata.conf +# +# You can uncomment and change any of the options below. +# The value shown in the commented settings, is the default value. +# + +# global netdata configuration + +[global] + # glibc malloc arena max for plugins = 1 + # glibc malloc arena max for netdata = 1 + # hostname = b073e16793c4 + # history = 3996 + # update every = 1 + # memory mode = dbengine + # page cache size = 32 + # dbengine disk space = 256 + # host access prefix = + # memory deduplication (ksm) = yes + # timezone = Etc/UTC + # run as user = netdata + # OOM score = 1000 + # process scheduling policy = idle + # pthread stack size = 8388608 + # cleanup obsolete charts after seconds = 3600 + # gap when lost iterations above = 1 + # cleanup orphan hosts after seconds = 3600 + # delete obsolete charts files = yes + # delete orphan hosts files = yes + # enable zero metrics = no + +[web] + # ssl key = /etc/netdata/ssl/key.pem + # ssl certificate = /etc/netdata/ssl/cert.pem + # ses max window = 15 + # des max window = 15 + # mode = static-threaded + # listen backlog = 4096 + # default port = 19999 + # bind to = * + # disconnect idle clients after seconds = 60 + # timeout for first request = 60 + # accept a streaming request every seconds = 0 + # respect do not track policy = no + # x-frame-options response header = + # allow connections from = localhost * + # allow connections by dns = heuristic + # allow dashboard from = localhost * + # allow dashboard by dns = heuristic + # allow badges from = * + # allow badges by dns = heuristic + # allow streaming from = * + # allow streaming by dns = heuristic + # allow netdata.conf from = localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.* 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.* 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.* 172.31.* + # allow netdata.conf by dns = no + # allow management from = localhost + # allow management by dns = heuristic + # enable gzip compression = yes + # gzip compression strategy = default + # gzip compression level = 3 + # web server threads = 6 + # web server max sockets = 262144 + +[plugins] + # checks = no + # proc = yes + # diskspace = yes + # cgroups = yes + # tc = yes + # idlejitter = yes + # enable running new plugins = yes + # check for new plugins every = 60 + # slabinfo = no + # go.d = yes + # apps = yes + # charts.d = yes + # fping = yes + # python.d = yes + # perf = yes + # ioping = yes + +[health] + # silencers file = /var/lib/netdata/health.silencers.json + # enabled = yes + # default repeat warning = never + # default repeat critical = never + # in memory max health log entries = 1000 + # script to execute on alarm = /usr/libexec/netdata/plugins.d/alarm-notify.sh + # rotate log every lines = 2000 + # run at least every seconds = 10 + # postpone alarms during hibernation for seconds = 60 + +[registry] + # enabled = no + # registry db directory = /var/lib/netdata/registry + # netdata unique id file = /var/lib/netdata/registry/netdata.public.unique.id + # registry db file = /var/lib/netdata/registry/registry.db + # registry log file = /var/lib/netdata/registry/registry-log.db + # registry save db every new entries = 1000000 + # registry expire idle persons days = 365 + # registry domain = + # registry to announce = https://registry.my-netdata.io + # registry hostname = b073e16793c4 + # verify browser cookies support = yes + # max URL length = 1024 + # max URL name length = 50 + # netdata management api key file = /var/lib/netdata/netdata.api.key + # allow from = * + # allow by dns = heuristic + +[cloud] + # cloud base url = https://netdata.cloud + +[statsd] + # enabled = yes + # update every (flushInterval) = 1 + # udp messages to process at once = 10 + # create private charts for metrics matching = * + # max private charts allowed = 200 + # max private charts hard limit = 1000 + # private charts memory mode = dbengine + # private charts history = 3996 + # decimal detail = 1000 + # disconnect idle tcp clients after seconds = 600 + # private charts hidden = no + # histograms and timers percentile (percentThreshold) = 95.00000 + # add dimension for number of events received = yes + # gaps on gauges (deleteGauges) = no + # gaps on counters (deleteCounters) = no + # gaps on meters (deleteMeters) = no + # gaps on sets (deleteSets) = no + # gaps on histograms (deleteHistograms) = no + # gaps on timers (deleteTimers) = no + # statsd server max TCP sockets = 262144 + # listen backlog = 4096 + # default port = 8125 + # bind to = udp:localhost tcp:localhost + + +# per plugin configuration + +[plugin:cgroups] + # cgroups plugin resource charts = yes + # update every = 1 + # check for new cgroups every = 10 + # use unified cgroups = no + # containers priority = 40000 + # enable cpuacct stat (total CPU) = auto + # enable cpuacct usage (per core CPU) = auto + # enable memory (used mem including cache) = auto + # enable detailed memory = auto + # enable memory limits fail count = auto + # enable swap memory = auto + # enable blkio bandwidth = auto + # enable blkio operations = auto + # enable blkio throttle bandwidth = auto + # enable blkio throttle operations = auto + # enable blkio queued operations = auto + # enable blkio merged operations = auto + # enable cpu pressure = auto + # enable io some pressure = auto + # enable io full pressure = auto + # enable memory some pressure = auto + # enable memory full pressure = auto + # recheck zero blkio every iterations = 10 + # recheck zero memory failcnt every iterations = 10 + # recheck zero detailed memory every iterations = 10 + # enable systemd services = yes + # enable systemd services detailed memory = no + # report used memory without cache = yes + # path to /sys/fs/cgroup/cpuacct = /sys/fs/cgroup/cpu,cpuacct + # path to /sys/fs/cgroup/cpuset = /sys/fs/cgroup/cpuset + # path to /sys/fs/cgroup/blkio = /sys/fs/cgroup/blkio + # path to /sys/fs/cgroup/memory = /sys/fs/cgroup/memory + # path to /sys/fs/cgroup/devices = /sys/fs/cgroup/devices + # max cgroups to allow = 1000 + # max cgroups depth to monitor = 0 + # enable by default cgroups matching = !*/init.scope !/system.slice/run-*.scope *.scope /machine.slice/*.service !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.socket !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/* !/lxc.monitor !/lxc.pivot !/lxc.payload !/machine !/qemu !/system !/systemd !/user * + # search for cgroups in subpaths matching = !*/init.scope !*-qemu !*.libvirt-qemu !/init.scope !/system !/systemd !/user !/user.slice !/lxc/*/* !/lxc.monitor !/lxc.payload/*/* * + # script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh + # script to get cgroup network interfaces = /usr/libexec/netdata/plugins.d/cgroup-network + # run script to rename cgroups matching = !/ !*.mount !*.socket !*.partition /machine.slice/*.service !*.service !*.slice !*.swap !*.user !init.scope !*.scope/vcpu* !*.scope/emulator *.scope *docker* *lxc* *qemu* *kubepods* *.libvirt-qemu * + # cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service + +[plugin:proc] + # netdata server resources = yes + # /proc/pagetypeinfo = no + # /proc/stat = yes + # /proc/uptime = yes + # /proc/loadavg = yes + # /proc/sys/kernel/random/entropy_avail = yes + # /proc/pressure = yes + # /proc/interrupts = yes + # /proc/softirqs = yes + # /proc/vmstat = yes + # /proc/meminfo = yes + # /sys/kernel/mm/ksm = yes + # /sys/block/zram = yes + # /sys/devices/system/edac/mc = yes + # /sys/devices/system/node = yes + # /proc/net/dev = yes + # /proc/net/sockstat = yes + # /proc/net/sockstat6 = yes + # /proc/net/netstat = yes + # /proc/net/snmp = yes + # /proc/net/snmp6 = yes + # /proc/net/sctp/snmp = yes + # /proc/net/softnet_stat = yes + # /proc/net/ip_vs/stats = yes + # /proc/net/stat/conntrack = yes + # /proc/net/stat/synproxy = yes + # /proc/diskstats = yes + # /proc/mdstat = yes + # /proc/net/rpc/nfsd = yes + # /proc/net/rpc/nfs = yes + # /proc/spl/kstat/zfs/arcstats = yes + # /sys/fs/btrfs = yes + # ipc = yes + # /sys/class/power_supply = yes + +[plugin:proc:diskspace] + # remove charts of unmounted disks = yes + # update every = 1 + # check for new mount points every = 15 + # exclude space metrics on paths = /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/* + # exclude space metrics on filesystems = *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs + # space usage for all disks = auto + # inodes usage for all disks = auto + +[plugin:tc] + # script to run to get tc values = /usr/libexec/netdata/plugins.d/tc-qos-helper.sh + +[plugin:idlejitter] + # loop time in ms = 20 + +[plugin:go.d] + # update every = 1 + # command options = + +[plugin:apps] + # update every = 1 + # command options = + +[plugin:charts.d] + # update every = 1 + # command options = + +[plugin:fping] + # update every = 1 + # command options = + +[plugin:python.d] + # update every = 1 + # command options = + +[plugin:perf] + # update every = 1 + # command options = + +[plugin:ioping] + # update every = 1 + # command options = + +[plugin:proc:/proc/stat] + # cpu utilization = yes + # per cpu core utilization = yes + # cpu interrupts = yes + # context switches = yes + # processes started = yes + # processes running = yes + # keep per core files open = yes + # keep cpuidle files open = yes + # core_throttle_count = auto + # package_throttle_count = no + # cpu frequency = yes + # cpu idle states = yes + # core_throttle_count filename to monitor = /sys/devices/system/cpu/%s/thermal_throttle/core_throttle_count + # package_throttle_count filename to monitor = /sys/devices/system/cpu/%s/thermal_throttle/package_throttle_count + # scaling_cur_freq filename to monitor = /sys/devices/system/cpu/%s/cpufreq/scaling_cur_freq + # time_in_state filename to monitor = /sys/devices/system/cpu/%s/cpufreq/stats/time_in_state + # schedstat filename to monitor = /proc/schedstat + # cpuidle name filename to monitor = /sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/name + # cpuidle time filename to monitor = /sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/time + # filename to monitor = /proc/stat + +[plugin:proc:diskspace:/] + # space usage = auto + # inodes usage = auto + +[plugin:proc:diskspace:/dev] + # space usage = auto + # inodes usage = auto + +[plugin:proc:diskspace:/sys/fs/cgroup] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/systemd] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/cpu,cpuacct] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/devices] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/blkio] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/perf_event] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/rdma] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/net_cls,net_prio] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/pids] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/cpuset] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/memory] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/fs/cgroup/freezer] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/dev/shm] + # space usage = auto + # inodes usage = auto + +[plugin:proc:diskspace:/etc/resolv.conf] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/etc/hostname] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/etc/hosts] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/proc/asound] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/proc/acpi] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/proc/kcore] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/proc/keys] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/proc/timer_list] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/proc/sched_debug] + # space usage = no + # inodes usage = no + +[plugin:proc:diskspace:/sys/firmware] + # space usage = no + # inodes usage = no + +[plugin:proc:/proc/uptime] + # filename to monitor = /proc/uptime + +[plugin:proc:/proc/loadavg] + # filename to monitor = /proc/loadavg + # enable load average = yes + # enable total processes = yes + +[plugin:proc:/proc/sys/kernel/random/entropy_avail] + # filename to monitor = /proc/sys/kernel/random/entropy_avail + +[plugin:proc:/proc/pressure] + # base path of pressure metrics = /proc/pressure + # enable cpu some pressure = yes + # enable memory some pressure = yes + # enable memory full pressure = yes + # enable io some pressure = yes + # enable io full pressure = yes + +[plugin:proc:/proc/interrupts] + # interrupts per core = auto + # filename to monitor = /proc/interrupts + +[plugin:proc:/proc/softirqs] + # interrupts per core = auto + # filename to monitor = /proc/softirqs + +[plugin:proc:/proc/vmstat] + # swap i/o = auto + # disk i/o = yes + # memory page faults = yes + # system-wide numa metric summary = auto + # filename to monitor = /proc/vmstat + +[plugin:proc:/sys/devices/system/node] + # directory to monitor = /sys/devices/system/node + # enable per-node numa metrics = auto + +[plugin:proc:/proc/meminfo] + # system ram = yes + # system swap = auto + # hardware corrupted ECC = auto + # committed memory = yes + # writeback memory = yes + # kernel memory = yes + # slab memory = yes + # hugepages = auto + # transparent hugepages = auto + # filename to monitor = /proc/meminfo + +[plugin:proc:/sys/kernel/mm/ksm] + # /sys/kernel/mm/ksm/pages_shared = /sys/kernel/mm/ksm/pages_shared + # /sys/kernel/mm/ksm/pages_sharing = /sys/kernel/mm/ksm/pages_sharing + # /sys/kernel/mm/ksm/pages_unshared = /sys/kernel/mm/ksm/pages_unshared + # /sys/kernel/mm/ksm/pages_volatile = /sys/kernel/mm/ksm/pages_volatile + +[plugin:proc:/sys/devices/system/edac/mc] + # directory to monitor = /sys/devices/system/edac/mc + +[plugin:proc:/proc/net/dev] + # filename to monitor = /proc/net/dev + # path to get virtual interfaces = /sys/devices/virtual/net/%s + # path to get net device speed = /sys/class/net/%s/speed + # path to get net device duplex = /sys/class/net/%s/duplex + # path to get net device operstate = /sys/class/net/%s/operstate + # enable new interfaces detected at runtime = auto + # bandwidth for all interfaces = auto + # packets for all interfaces = auto + # errors for all interfaces = auto + # drops for all interfaces = auto + # fifo for all interfaces = auto + # compressed packets for all interfaces = auto + # frames, collisions, carrier counters for all interfaces = auto + # disable by default interfaces matching = lo fireqos* *-ifb + # refresh interface speed every seconds = 10 + # refresh interface duplex every seconds = 10 + # refresh interface operstate every seconds = 10 + +[plugin:proc:/proc/net/dev:lo] + # enabled = no + # virtual = yes + +[plugin:proc:/proc/net/dev:eth0] + # enabled = yes + # virtual = yes + # bandwidth = auto + # packets = auto + # errors = auto + # drops = auto + # fifo = auto + # compressed = auto + # events = auto + +[plugin:proc:/proc/net/sockstat] + # ipv4 sockets = auto + # ipv4 TCP sockets = auto + # ipv4 TCP memory = auto + # ipv4 UDP sockets = auto + # ipv4 UDP memory = auto + # ipv4 UDPLITE sockets = auto + # ipv4 RAW sockets = auto + # ipv4 FRAG sockets = auto + # ipv4 FRAG memory = auto + # update constants every = 60 + # filename to monitor = /proc/net/sockstat + +[plugin:proc:/proc/net/sockstat6] + # ipv6 TCP sockets = auto + # ipv6 UDP sockets = auto + # ipv6 UDPLITE sockets = auto + # ipv6 RAW sockets = auto + # ipv6 FRAG sockets = auto + # filename to monitor = /proc/net/sockstat6 + +[plugin:proc:/proc/net/netstat] + # bandwidth = auto + # input errors = auto + # multicast bandwidth = auto + # broadcast bandwidth = auto + # multicast packets = auto + # broadcast packets = auto + # ECN packets = auto + # TCP reorders = auto + # TCP SYN cookies = auto + # TCP out-of-order queue = auto + # TCP connection aborts = auto + # TCP memory pressures = auto + # TCP SYN queue = auto + # TCP accept queue = auto + # filename to monitor = /proc/net/netstat + +[plugin:proc:/proc/net/snmp] + # ipv4 packets = auto + # ipv4 fragments sent = auto + # ipv4 fragments assembly = auto + # ipv4 errors = auto + # ipv4 TCP connections = auto + # ipv4 TCP packets = auto + # ipv4 TCP errors = auto + # ipv4 TCP opens = auto + # ipv4 TCP handshake issues = auto + # ipv4 UDP packets = auto + # ipv4 UDP errors = auto + # ipv4 ICMP packets = auto + # ipv4 ICMP messages = auto + # ipv4 UDPLite packets = auto + # filename to monitor = /proc/net/snmp + +[plugin:proc:/proc/net/snmp6] + # ipv6 packets = auto + # ipv6 fragments sent = auto + # ipv6 fragments assembly = auto + # ipv6 errors = auto + # ipv6 UDP packets = auto + # ipv6 UDP errors = auto + # ipv6 UDPlite packets = auto + # ipv6 UDPlite errors = auto + # bandwidth = auto + # multicast bandwidth = auto + # broadcast bandwidth = auto + # multicast packets = auto + # icmp = auto + # icmp redirects = auto + # icmp errors = auto + # icmp echos = auto + # icmp group membership = auto + # icmp router = auto + # icmp neighbor = auto + # icmp mldv2 = auto + # icmp types = auto + # ect = auto + # filename to monitor = /proc/net/snmp6 + +[plugin:proc:/proc/net/sctp/snmp] + # established associations = auto + # association transitions = auto + # fragmentation = auto + # packets = auto + # packet errors = auto + # chunk types = auto + # filename to monitor = /proc/net/sctp/snmp + +[plugin:proc:/proc/net/softnet_stat] + # softnet_stat per core = yes + # filename to monitor = /proc/net/softnet_stat + +[plugin:proc:/proc/net/ip_vs_stats] + # IPVS bandwidth = yes + # IPVS connections = yes + # IPVS packets = yes + # filename to monitor = /proc/net/ip_vs_stats + +[plugin:proc:/proc/net/stat/nf_conntrack] + # filename to monitor = /proc/net/stat/nf_conntrack + # netfilter new connections = yes + # netfilter connection changes = yes + # netfilter connection expectations = yes + # netfilter connection searches = yes + # netfilter errors = yes + # netfilter connections = yes + +[plugin:proc:/proc/sys/net/netfilter/nf_conntrack_max] + # filename to monitor = /proc/sys/net/netfilter/nf_conntrack_max + # read every seconds = 10 + +[plugin:proc:/proc/net/stat/synproxy] + # SYNPROXY entries = auto + # SYNPROXY cookies = auto + # SYNPROXY SYN received = auto + # SYNPROXY connections reopened = auto + # filename to monitor = /proc/net/stat/synproxy + +[plugin:proc:/proc/diskstats] + # enable new disks detected at runtime = yes + # performance metrics for physical disks = auto + # performance metrics for virtual disks = auto + # performance metrics for partitions = no + # bandwidth for all disks = auto + # operations for all disks = auto + # merged operations for all disks = auto + # i/o time for all disks = auto + # queued operations for all disks = auto + # utilization percentage for all disks = auto + # backlog for all disks = auto + # bcache for all disks = auto + # bcache priority stats update every = 0 + # remove charts of removed disks = yes + # path to get block device = /sys/block/%s + # path to get block device bcache = /sys/block/%s/bcache + # path to get virtual block device = /sys/devices/virtual/block/%s + # path to get block device infos = /sys/dev/block/%lu:%lu/%s + # path to device mapper = /dev/mapper + # path to /dev/disk/by-label = /dev/disk/by-label + # path to /dev/disk/by-id = /dev/disk/by-id + # path to /dev/vx/dsk = /dev/vx/dsk + # name disks by id = no + # preferred disk ids = * + # exclude disks = loop* ram* + # filename to monitor = /proc/diskstats + # performance metrics for disks with major 8 = yes + +[plugin:proc:/proc/diskstats:sda] + # enable = yes + # enable performance metrics = yes + # bandwidth = auto + # operations = auto + # merged operations = auto + # i/o time = auto + # queued operations = auto + # utilization percentage = auto + # backlog = auto + +[plugin:proc:/proc/diskstats:sda1] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sda2] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sda3] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sda4] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sda5] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sdb] + # enable = yes + # enable performance metrics = yes + # bandwidth = auto + # operations = auto + # merged operations = auto + # i/o time = auto + # queued operations = auto + # utilization percentage = auto + # backlog = auto + +[plugin:proc:/proc/diskstats:sdb1] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sdb2] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sdb3] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sdb4] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/diskstats:sdb5] + # enable = yes + # enable performance metrics = no + # bandwidth = no + # operations = no + # merged operations = no + # i/o time = no + # queued operations = no + # utilization percentage = no + # backlog = no + +[plugin:proc:/proc/mdstat] + # faulty devices = yes + # nonredundant arrays availability = yes + # mismatch count = auto + # disk stats = yes + # operation status = yes + # make charts obsolete = yes + # filename to monitor = /proc/mdstat + # mismatch_cnt filename to monitor = /sys/block/%s/md/mismatch_cnt + +[plugin:proc:/proc/net/rpc/nfsd] + # filename to monitor = /proc/net/rpc/nfsd + +[plugin:proc:/proc/net/rpc/nfs] + # filename to monitor = /proc/net/rpc/nfs + +[plugin:proc:/proc/spl/kstat/zfs/arcstats] + # filename to monitor = /proc/spl/kstat/zfs/arcstats + +[plugin:proc:/sys/fs/btrfs] + # path to monitor = /sys/fs/btrfs + # check for btrfs changes every = 60 + # physical disks allocation = auto + # data allocation = auto + # metadata allocation = auto + # system allocation = auto + +[plugin:proc:ipc] + # message queues = yes + # semaphore totals = yes + # shared memory totals = yes + # msg filename to monitor = /proc/sysvipc/msg + # shm filename to monitor = /proc/sysvipc/shm + # max dimensions in memory allowed = 50 + +[plugin:proc:/sys/class/power_supply] + # battery capacity = yes + # battery charge = no + # battery energy = no + # power supply voltage = no + # keep files open = auto + # directory to monitor = /sys/class/power_supply + + +# per chart configuration + +[system.idlejitter] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.idlejitter + # chart type = area + # type = system + # family = idlejitter + # units = microseconds lost/s + # context = system.idlejitter + # priority = 800 + # name = system.idlejitter + # title = CPU Idle Jitter + # dim min name = min + # dim min algorithm = absolute + # dim min multiplier = 1 + # dim min divisor = 1 + # dim max name = max + # dim max algorithm = absolute + # dim max multiplier = 1 + # dim max divisor = 1 + # dim average name = average + # dim average algorithm = absolute + # dim average multiplier = 1 + # dim average divisor = 1 + +[netdata.statsd_metrics] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.statsd_metrics + # chart type = stacked + # type = netdata + # family = statsd + # units = metrics + # context = netdata.statsd_metrics + # priority = 132010 + # name = netdata.statsd_metrics + # title = Metrics in the netdata statsd database + # dim gauges name = gauges + # dim gauges algorithm = absolute + # dim gauges multiplier = 1 + # dim gauges divisor = 1 + # dim counters name = counters + # dim counters algorithm = absolute + # dim counters multiplier = 1 + # dim counters divisor = 1 + # dim timers name = timers + # dim timers algorithm = absolute + # dim timers multiplier = 1 + # dim timers divisor = 1 + # dim meters name = meters + # dim meters algorithm = absolute + # dim meters multiplier = 1 + # dim meters divisor = 1 + # dim histograms name = histograms + # dim histograms algorithm = absolute + # dim histograms multiplier = 1 + # dim histograms divisor = 1 + # dim sets name = sets + # dim sets algorithm = absolute + # dim sets multiplier = 1 + # dim sets divisor = 1 + +[netdata.statsd_useful_metrics] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.statsd_useful_metrics + # chart type = stacked + # type = netdata + # family = statsd + # units = metrics + # context = netdata.statsd_useful_metrics + # priority = 132010 + # name = netdata.statsd_useful_metrics + # title = Useful metrics in the netdata statsd database + # dim gauges name = gauges + # dim gauges algorithm = absolute + # dim gauges multiplier = 1 + # dim gauges divisor = 1 + # dim counters name = counters + # dim counters algorithm = absolute + # dim counters multiplier = 1 + # dim counters divisor = 1 + # dim timers name = timers + # dim timers algorithm = absolute + # dim timers multiplier = 1 + # dim timers divisor = 1 + # dim meters name = meters + # dim meters algorithm = absolute + # dim meters multiplier = 1 + # dim meters divisor = 1 + # dim histograms name = histograms + # dim histograms algorithm = absolute + # dim histograms multiplier = 1 + # dim histograms divisor = 1 + # dim sets name = sets + # dim sets algorithm = absolute + # dim sets multiplier = 1 + # dim sets divisor = 1 + +[netdata.statsd_events] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.statsd_events + # chart type = stacked + # type = netdata + # family = statsd + # units = events/s + # context = netdata.statsd_events + # priority = 132011 + # name = netdata.statsd_events + # title = Events processed by the netdata statsd server + # dim gauges name = gauges + # dim gauges algorithm = incremental + # dim gauges multiplier = 1 + # dim gauges divisor = 1 + # dim counters name = counters + # dim counters algorithm = incremental + # dim counters multiplier = 1 + # dim counters divisor = 1 + # dim timers name = timers + # dim timers algorithm = incremental + # dim timers multiplier = 1 + # dim timers divisor = 1 + # dim meters name = meters + # dim meters algorithm = incremental + # dim meters multiplier = 1 + # dim meters divisor = 1 + # dim histograms name = histograms + # dim histograms algorithm = incremental + # dim histograms multiplier = 1 + # dim histograms divisor = 1 + # dim sets name = sets + # dim sets algorithm = incremental + # dim sets multiplier = 1 + # dim sets divisor = 1 + # dim unknown name = unknown + # dim unknown algorithm = incremental + # dim unknown multiplier = 1 + # dim unknown divisor = 1 + # dim errors name = errors + # dim errors algorithm = incremental + # dim errors multiplier = 1 + # dim errors divisor = 1 + +[netdata.statsd_reads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.statsd_reads + # chart type = stacked + # type = netdata + # family = statsd + # units = reads/s + # context = netdata.statsd_reads + # priority = 132012 + # name = netdata.statsd_reads + # title = Read operations made by the netdata statsd server + # dim tcp name = tcp + # dim tcp algorithm = incremental + # dim tcp multiplier = 1 + # dim tcp divisor = 1 + # dim udp name = udp + # dim udp algorithm = incremental + # dim udp multiplier = 1 + # dim udp divisor = 1 + +[netdata.statsd_bytes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.statsd_bytes + # chart type = stacked + # type = netdata + # family = statsd + # units = kilobits/s + # context = netdata.statsd_bytes + # priority = 132013 + # name = netdata.statsd_bytes + # title = Bytes read by the netdata statsd server + # dim tcp name = tcp + # dim tcp algorithm = incremental + # dim tcp multiplier = 8 + # dim tcp divisor = 1000 + # dim udp name = udp + # dim udp algorithm = incremental + # dim udp multiplier = 8 + # dim udp divisor = 1000 + +[netdata.statsd_packets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.statsd_packets + # chart type = stacked + # type = netdata + # family = statsd + # units = packets/s + # context = netdata.statsd_packets + # priority = 132014 + # name = netdata.statsd_packets + # title = Network packets processed by the netdata statsd server + # dim tcp name = tcp + # dim tcp algorithm = incremental + # dim tcp multiplier = 1 + # dim tcp divisor = 1 + # dim udp name = udp + # dim udp algorithm = incremental + # dim udp multiplier = 1 + # dim udp divisor = 1 + +[netdata.tcp_connects] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.tcp_connects + # chart type = line + # type = netdata + # family = statsd + # units = events + # context = netdata.tcp_connects + # priority = 132015 + # name = netdata.tcp_connects + # title = statsd server TCP connects and disconnects + # dim connects name = connects + # dim connects algorithm = incremental + # dim connects multiplier = 1 + # dim connects divisor = 1 + # dim disconnects name = disconnects + # dim disconnects algorithm = incremental + # dim disconnects multiplier = -1 + # dim disconnects divisor = 1 + +[netdata.tcp_connected] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.tcp_connected + # chart type = line + # type = netdata + # family = statsd + # units = sockets + # context = netdata.tcp_connected + # priority = 132016 + # name = netdata.tcp_connected + # title = statsd server TCP connected sockets + # dim connected name = connected + # dim connected algorithm = absolute + # dim connected multiplier = 1 + # dim connected divisor = 1 + +[netdata.private_charts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.private_charts + # chart type = area + # type = netdata + # family = statsd + # units = charts + # context = netdata.private_charts + # priority = 132020 + # name = netdata.private_charts + # title = Private metric charts created by the netdata statsd server + # dim charts name = charts + # dim charts algorithm = absolute + # dim charts multiplier = 1 + # dim charts divisor = 1 + +[netdata.plugin_statsd_charting_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_statsd_charting_cpu + # chart type = stacked + # type = netdata + # family = statsd + # units = milliseconds/s + # context = netdata.statsd_cpu + # priority = 132001 + # name = netdata.plugin_statsd_charting_cpu + # title = Netdata statsd charting thread CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.plugin_statsd_collector1_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_statsd_collector1_cpu + # chart type = stacked + # type = netdata + # family = statsd + # units = milliseconds/s + # context = netdata.statsd_cpu + # priority = 132002 + # name = netdata.plugin_statsd_collector1_cpu + # title = Netdata statsd collector thread No 1 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[system.cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.cpu + # chart type = stacked + # type = system + # family = cpu + # units = percentage + # context = system.cpu + # priority = 100 + # name = system.cpu + # title = Total CPU utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu0] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu0 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1001 + # name = cpu.cpu0 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu1] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu1 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1002 + # name = cpu.cpu1 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[disk_space._] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_space._ + # chart type = stacked + # type = disk_space + # family = / + # units = GiB + # context = disk.space + # priority = 2023 + # name = disk_space._ + # title = Disk Space Usage for / [overlay] + # dim avail name = avail + # dim avail algorithm = absolute + # dim avail multiplier = 4096 + # dim avail divisor = 1073741824 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 4096 + # dim used divisor = 1073741824 + # dim reserved_for_root name = reserved for root + # dim reserved_for_root algorithm = absolute + # dim reserved_for_root multiplier = 4096 + # dim reserved_for_root divisor = 1073741824 + +[disk_inodes._] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_inodes._ + # chart type = stacked + # type = disk_inodes + # family = / + # units = inodes + # context = disk.inodes + # priority = 2024 + # name = disk_inodes._ + # title = Disk Files (inodes) Usage for / [overlay] + # dim avail name = avail + # dim avail algorithm = absolute + # dim avail multiplier = 1 + # dim avail divisor = 1 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 1 + # dim used divisor = 1 + # dim reserved_for_root name = reserved for root + # dim reserved_for_root algorithm = absolute + # dim reserved_for_root multiplier = 1 + # dim reserved_for_root divisor = 1 + +[disk_space._dev] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_space._dev + # chart type = stacked + # type = disk_space + # family = /dev + # units = GiB + # context = disk.space + # priority = 2023 + # name = disk_space._dev + # title = Disk Space Usage for /dev [tmpfs] + # dim avail name = avail + # dim avail algorithm = absolute + # dim avail multiplier = 4096 + # dim avail divisor = 1073741824 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 4096 + # dim used divisor = 1073741824 + # dim reserved_for_root name = reserved for root + # dim reserved_for_root algorithm = absolute + # dim reserved_for_root multiplier = 4096 + # dim reserved_for_root divisor = 1073741824 + +[disk_inodes._dev] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_inodes._dev + # chart type = stacked + # type = disk_inodes + # family = /dev + # units = inodes + # context = disk.inodes + # priority = 2024 + # name = disk_inodes._dev + # title = Disk Files (inodes) Usage for /dev [tmpfs] + # dim avail name = avail + # dim avail algorithm = absolute + # dim avail multiplier = 1 + # dim avail divisor = 1 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 1 + # dim used divisor = 1 + # dim reserved_for_root name = reserved for root + # dim reserved_for_root algorithm = absolute + # dim reserved_for_root multiplier = 1 + # dim reserved_for_root divisor = 1 + +[disk_space._dev_shm] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_space._dev_shm + # chart type = stacked + # type = disk_space + # family = /dev/shm + # units = GiB + # context = disk.space + # priority = 2023 + # name = disk_space._dev_shm + # title = Disk Space Usage for /dev/shm [shm] + # dim avail name = avail + # dim avail algorithm = absolute + # dim avail multiplier = 4096 + # dim avail divisor = 1073741824 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 4096 + # dim used divisor = 1073741824 + # dim reserved_for_root name = reserved for root + # dim reserved_for_root algorithm = absolute + # dim reserved_for_root multiplier = 4096 + # dim reserved_for_root divisor = 1073741824 + +[disk_inodes._dev_shm] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_inodes._dev_shm + # chart type = stacked + # type = disk_inodes + # family = /dev/shm + # units = inodes + # context = disk.inodes + # priority = 2024 + # name = disk_inodes._dev_shm + # title = Disk Files (inodes) Usage for /dev/shm [shm] + # dim avail name = avail + # dim avail algorithm = absolute + # dim avail multiplier = 1 + # dim avail divisor = 1 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 1 + # dim used divisor = 1 + # dim reserved_for_root name = reserved for root + # dim reserved_for_root algorithm = absolute + # dim reserved_for_root multiplier = 1 + # dim reserved_for_root divisor = 1 + +[cpu.cpu2] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu2 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1003 + # name = cpu.cpu2 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu3] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu3 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1004 + # name = cpu.cpu3 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu4] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu4 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1005 + # name = cpu.cpu4 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu5] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu5 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1006 + # name = cpu.cpu5 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu6] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu6 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1007 + # name = cpu.cpu6 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[cpu.cpu7] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu7 + # chart type = stacked + # type = cpu + # family = utilization + # units = percentage + # context = cpu.cpu + # priority = 1008 + # name = cpu.cpu7 + # title = Core utilization + # dim guest_nice name = guest_nice + # dim guest_nice algorithm = percentage-of-incremental-row + # dim guest_nice multiplier = 1 + # dim guest_nice divisor = 1 + # dim guest name = guest + # dim guest algorithm = percentage-of-incremental-row + # dim guest multiplier = 1 + # dim guest divisor = 1 + # dim steal name = steal + # dim steal algorithm = percentage-of-incremental-row + # dim steal multiplier = 1 + # dim steal divisor = 1 + # dim softirq name = softirq + # dim softirq algorithm = percentage-of-incremental-row + # dim softirq multiplier = 1 + # dim softirq divisor = 1 + # dim irq name = irq + # dim irq algorithm = percentage-of-incremental-row + # dim irq multiplier = 1 + # dim irq divisor = 1 + # dim user name = user + # dim user algorithm = percentage-of-incremental-row + # dim user multiplier = 1 + # dim user divisor = 1 + # dim system name = system + # dim system algorithm = percentage-of-incremental-row + # dim system multiplier = 1 + # dim system divisor = 1 + # dim nice name = nice + # dim nice algorithm = percentage-of-incremental-row + # dim nice multiplier = 1 + # dim nice divisor = 1 + # dim iowait name = iowait + # dim iowait algorithm = percentage-of-incremental-row + # dim iowait multiplier = 1 + # dim iowait divisor = 1 + # dim idle name = idle + # dim idle algorithm = percentage-of-incremental-row + # dim idle multiplier = 1 + # dim idle divisor = 1 + +[system.intr] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.intr + # chart type = line + # type = system + # family = interrupts + # units = interrupts/s + # context = system.intr + # priority = 900 + # name = system.intr + # title = CPU Interrupts + # dim interrupts name = interrupts + # dim interrupts algorithm = incremental + # dim interrupts multiplier = 1 + # dim interrupts divisor = 1 + +[system.ctxt] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.ctxt + # chart type = line + # type = system + # family = processes + # units = context switches/s + # context = system.ctxt + # priority = 800 + # name = system.ctxt + # title = CPU Context Switches + # dim switches name = switches + # dim switches algorithm = incremental + # dim switches multiplier = 1 + # dim switches divisor = 1 + +[system.forks] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.forks + # chart type = line + # type = system + # family = processes + # units = processes/s + # context = system.forks + # priority = 700 + # name = system.forks + # title = Started Processes + # dim started name = started + # dim started algorithm = incremental + # dim started multiplier = 1 + # dim started divisor = 1 + +[system.processes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.processes + # chart type = line + # type = system + # family = processes + # units = processes + # context = system.processes + # priority = 600 + # name = system.processes + # title = System Processes + # dim running name = running + # dim running algorithm = absolute + # dim running multiplier = 1 + # dim running divisor = 1 + # dim blocked name = blocked + # dim blocked algorithm = absolute + # dim blocked multiplier = -1 + # dim blocked divisor = 1 + +[cpu.core_throttling] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.core_throttling + # chart type = line + # type = cpu + # family = throttling + # units = events/s + # context = cpu.core_throttling + # priority = 5001 + # name = cpu.core_throttling + # title = Core Thermal Throttling Events + # dim cpu0 name = cpu0 + # dim cpu0 algorithm = incremental + # dim cpu0 multiplier = 1 + # dim cpu0 divisor = 1 + # dim cpu1 name = cpu1 + # dim cpu1 algorithm = incremental + # dim cpu1 multiplier = 1 + # dim cpu1 divisor = 1 + # dim cpu2 name = cpu2 + # dim cpu2 algorithm = incremental + # dim cpu2 multiplier = 1 + # dim cpu2 divisor = 1 + # dim cpu3 name = cpu3 + # dim cpu3 algorithm = incremental + # dim cpu3 multiplier = 1 + # dim cpu3 divisor = 1 + # dim cpu4 name = cpu4 + # dim cpu4 algorithm = incremental + # dim cpu4 multiplier = 1 + # dim cpu4 divisor = 1 + # dim cpu5 name = cpu5 + # dim cpu5 algorithm = incremental + # dim cpu5 multiplier = 1 + # dim cpu5 divisor = 1 + # dim cpu6 name = cpu6 + # dim cpu6 algorithm = incremental + # dim cpu6 multiplier = 1 + # dim cpu6 divisor = 1 + # dim cpu7 name = cpu7 + # dim cpu7 algorithm = incremental + # dim cpu7 multiplier = 1 + # dim cpu7 divisor = 1 + +[cpu.cpufreq] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpufreq + # chart type = line + # type = cpu + # family = cpufreq + # units = MHz + # context = cpufreq.cpufreq + # priority = 5003 + # name = cpu.cpufreq + # title = Current CPU Frequency + # dim cpu0 name = cpu0 + # dim cpu0 algorithm = absolute + # dim cpu0 multiplier = 1 + # dim cpu0 divisor = 1000 + # dim cpu1 name = cpu1 + # dim cpu1 algorithm = absolute + # dim cpu1 multiplier = 1 + # dim cpu1 divisor = 1000 + # dim cpu2 name = cpu2 + # dim cpu2 algorithm = absolute + # dim cpu2 multiplier = 1 + # dim cpu2 divisor = 1000 + # dim cpu3 name = cpu3 + # dim cpu3 algorithm = absolute + # dim cpu3 multiplier = 1 + # dim cpu3 divisor = 1000 + # dim cpu4 name = cpu4 + # dim cpu4 algorithm = absolute + # dim cpu4 multiplier = 1 + # dim cpu4 divisor = 1000 + # dim cpu5 name = cpu5 + # dim cpu5 algorithm = absolute + # dim cpu5 multiplier = 1 + # dim cpu5 divisor = 1000 + # dim cpu6 name = cpu6 + # dim cpu6 algorithm = absolute + # dim cpu6 multiplier = 1 + # dim cpu6 divisor = 1000 + # dim cpu7 name = cpu7 + # dim cpu7 algorithm = absolute + # dim cpu7 multiplier = 1 + # dim cpu7 divisor = 1000 + +[netdata.plugin_cgroups_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_cgroups_cpu + # chart type = stacked + # type = netdata + # family = cgroups + # units = milliseconds/s + # context = netdata.plugin_cgroups_cpu + # priority = 132000 + # name = netdata.plugin_cgroups_cpu + # title = Netdata CGroups Plugin CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.plugin_diskspace] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_diskspace + # chart type = stacked + # type = netdata + # family = diskspace + # units = milliseconds/s + # context = netdata.plugin_diskspace + # priority = 132020 + # name = netdata.plugin_diskspace + # title = Netdata Disk Space Plugin CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.plugin_diskspace_dt] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_diskspace_dt + # chart type = area + # type = netdata + # family = diskspace + # units = milliseconds/run + # context = netdata.plugin_diskspace_dt + # priority = 132021 + # name = netdata.plugin_diskspace_dt + # title = Netdata Disk Space Plugin Duration + # dim duration name = duration + # dim duration algorithm = absolute + # dim duration multiplier = 1 + # dim duration divisor = 1000 + +[cpu.cpu0_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu0_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6000 + # name = cpu.cpu0_cpuidle + # title = C-state residency time + # dim cpu0_active_time name = C0 (active) + # dim cpu0_active_time algorithm = percentage-of-incremental-row + # dim cpu0_active_time multiplier = 1 + # dim cpu0_active_time divisor = 1 + # dim cpu0_cpuidle_state0_time name = POLL + # dim cpu0_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu0_cpuidle_state0_time multiplier = 1 + # dim cpu0_cpuidle_state0_time divisor = 1 + # dim cpu0_cpuidle_state1_time name = C1 + # dim cpu0_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu0_cpuidle_state1_time multiplier = 1 + # dim cpu0_cpuidle_state1_time divisor = 1 + # dim cpu0_cpuidle_state2_time name = C1E + # dim cpu0_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu0_cpuidle_state2_time multiplier = 1 + # dim cpu0_cpuidle_state2_time divisor = 1 + # dim cpu0_cpuidle_state3_time name = C3 + # dim cpu0_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu0_cpuidle_state3_time multiplier = 1 + # dim cpu0_cpuidle_state3_time divisor = 1 + # dim cpu0_cpuidle_state4_time name = C6 + # dim cpu0_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu0_cpuidle_state4_time multiplier = 1 + # dim cpu0_cpuidle_state4_time divisor = 1 + # dim cpu0_cpuidle_state5_time name = C7s + # dim cpu0_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu0_cpuidle_state5_time multiplier = 1 + # dim cpu0_cpuidle_state5_time divisor = 1 + +[cpu.cpu1_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu1_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6001 + # name = cpu.cpu1_cpuidle + # title = C-state residency time + # dim cpu1_active_time name = C0 (active) + # dim cpu1_active_time algorithm = percentage-of-incremental-row + # dim cpu1_active_time multiplier = 1 + # dim cpu1_active_time divisor = 1 + # dim cpu1_cpuidle_state0_time name = POLL + # dim cpu1_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu1_cpuidle_state0_time multiplier = 1 + # dim cpu1_cpuidle_state0_time divisor = 1 + # dim cpu1_cpuidle_state1_time name = C1 + # dim cpu1_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu1_cpuidle_state1_time multiplier = 1 + # dim cpu1_cpuidle_state1_time divisor = 1 + # dim cpu1_cpuidle_state2_time name = C1E + # dim cpu1_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu1_cpuidle_state2_time multiplier = 1 + # dim cpu1_cpuidle_state2_time divisor = 1 + # dim cpu1_cpuidle_state3_time name = C3 + # dim cpu1_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu1_cpuidle_state3_time multiplier = 1 + # dim cpu1_cpuidle_state3_time divisor = 1 + # dim cpu1_cpuidle_state4_time name = C6 + # dim cpu1_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu1_cpuidle_state4_time multiplier = 1 + # dim cpu1_cpuidle_state4_time divisor = 1 + # dim cpu1_cpuidle_state5_time name = C7s + # dim cpu1_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu1_cpuidle_state5_time multiplier = 1 + # dim cpu1_cpuidle_state5_time divisor = 1 + +[cpu.cpu2_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu2_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6002 + # name = cpu.cpu2_cpuidle + # title = C-state residency time + # dim cpu2_active_time name = C0 (active) + # dim cpu2_active_time algorithm = percentage-of-incremental-row + # dim cpu2_active_time multiplier = 1 + # dim cpu2_active_time divisor = 1 + # dim cpu2_cpuidle_state0_time name = POLL + # dim cpu2_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu2_cpuidle_state0_time multiplier = 1 + # dim cpu2_cpuidle_state0_time divisor = 1 + # dim cpu2_cpuidle_state1_time name = C1 + # dim cpu2_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu2_cpuidle_state1_time multiplier = 1 + # dim cpu2_cpuidle_state1_time divisor = 1 + # dim cpu2_cpuidle_state2_time name = C1E + # dim cpu2_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu2_cpuidle_state2_time multiplier = 1 + # dim cpu2_cpuidle_state2_time divisor = 1 + # dim cpu2_cpuidle_state3_time name = C3 + # dim cpu2_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu2_cpuidle_state3_time multiplier = 1 + # dim cpu2_cpuidle_state3_time divisor = 1 + # dim cpu2_cpuidle_state4_time name = C6 + # dim cpu2_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu2_cpuidle_state4_time multiplier = 1 + # dim cpu2_cpuidle_state4_time divisor = 1 + # dim cpu2_cpuidle_state5_time name = C7s + # dim cpu2_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu2_cpuidle_state5_time multiplier = 1 + # dim cpu2_cpuidle_state5_time divisor = 1 + +[cpu.cpu3_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu3_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6003 + # name = cpu.cpu3_cpuidle + # title = C-state residency time + # dim cpu3_active_time name = C0 (active) + # dim cpu3_active_time algorithm = percentage-of-incremental-row + # dim cpu3_active_time multiplier = 1 + # dim cpu3_active_time divisor = 1 + # dim cpu3_cpuidle_state0_time name = POLL + # dim cpu3_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu3_cpuidle_state0_time multiplier = 1 + # dim cpu3_cpuidle_state0_time divisor = 1 + # dim cpu3_cpuidle_state1_time name = C1 + # dim cpu3_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu3_cpuidle_state1_time multiplier = 1 + # dim cpu3_cpuidle_state1_time divisor = 1 + # dim cpu3_cpuidle_state2_time name = C1E + # dim cpu3_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu3_cpuidle_state2_time multiplier = 1 + # dim cpu3_cpuidle_state2_time divisor = 1 + # dim cpu3_cpuidle_state3_time name = C3 + # dim cpu3_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu3_cpuidle_state3_time multiplier = 1 + # dim cpu3_cpuidle_state3_time divisor = 1 + # dim cpu3_cpuidle_state4_time name = C6 + # dim cpu3_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu3_cpuidle_state4_time multiplier = 1 + # dim cpu3_cpuidle_state4_time divisor = 1 + # dim cpu3_cpuidle_state5_time name = C7s + # dim cpu3_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu3_cpuidle_state5_time multiplier = 1 + # dim cpu3_cpuidle_state5_time divisor = 1 + +[cpu.cpu4_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu4_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6004 + # name = cpu.cpu4_cpuidle + # title = C-state residency time + # dim cpu4_active_time name = C0 (active) + # dim cpu4_active_time algorithm = percentage-of-incremental-row + # dim cpu4_active_time multiplier = 1 + # dim cpu4_active_time divisor = 1 + # dim cpu4_cpuidle_state0_time name = POLL + # dim cpu4_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu4_cpuidle_state0_time multiplier = 1 + # dim cpu4_cpuidle_state0_time divisor = 1 + # dim cpu4_cpuidle_state1_time name = C1 + # dim cpu4_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu4_cpuidle_state1_time multiplier = 1 + # dim cpu4_cpuidle_state1_time divisor = 1 + # dim cpu4_cpuidle_state2_time name = C1E + # dim cpu4_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu4_cpuidle_state2_time multiplier = 1 + # dim cpu4_cpuidle_state2_time divisor = 1 + # dim cpu4_cpuidle_state3_time name = C3 + # dim cpu4_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu4_cpuidle_state3_time multiplier = 1 + # dim cpu4_cpuidle_state3_time divisor = 1 + # dim cpu4_cpuidle_state4_time name = C6 + # dim cpu4_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu4_cpuidle_state4_time multiplier = 1 + # dim cpu4_cpuidle_state4_time divisor = 1 + # dim cpu4_cpuidle_state5_time name = C7s + # dim cpu4_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu4_cpuidle_state5_time multiplier = 1 + # dim cpu4_cpuidle_state5_time divisor = 1 + +[cpu.cpu5_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu5_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6005 + # name = cpu.cpu5_cpuidle + # title = C-state residency time + # dim cpu5_active_time name = C0 (active) + # dim cpu5_active_time algorithm = percentage-of-incremental-row + # dim cpu5_active_time multiplier = 1 + # dim cpu5_active_time divisor = 1 + # dim cpu5_cpuidle_state0_time name = POLL + # dim cpu5_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu5_cpuidle_state0_time multiplier = 1 + # dim cpu5_cpuidle_state0_time divisor = 1 + # dim cpu5_cpuidle_state1_time name = C1 + # dim cpu5_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu5_cpuidle_state1_time multiplier = 1 + # dim cpu5_cpuidle_state1_time divisor = 1 + # dim cpu5_cpuidle_state2_time name = C1E + # dim cpu5_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu5_cpuidle_state2_time multiplier = 1 + # dim cpu5_cpuidle_state2_time divisor = 1 + # dim cpu5_cpuidle_state3_time name = C3 + # dim cpu5_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu5_cpuidle_state3_time multiplier = 1 + # dim cpu5_cpuidle_state3_time divisor = 1 + # dim cpu5_cpuidle_state4_time name = C6 + # dim cpu5_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu5_cpuidle_state4_time multiplier = 1 + # dim cpu5_cpuidle_state4_time divisor = 1 + # dim cpu5_cpuidle_state5_time name = C7s + # dim cpu5_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu5_cpuidle_state5_time multiplier = 1 + # dim cpu5_cpuidle_state5_time divisor = 1 + +[cpu.cpu6_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu6_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6006 + # name = cpu.cpu6_cpuidle + # title = C-state residency time + # dim cpu6_active_time name = C0 (active) + # dim cpu6_active_time algorithm = percentage-of-incremental-row + # dim cpu6_active_time multiplier = 1 + # dim cpu6_active_time divisor = 1 + # dim cpu6_cpuidle_state0_time name = POLL + # dim cpu6_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu6_cpuidle_state0_time multiplier = 1 + # dim cpu6_cpuidle_state0_time divisor = 1 + # dim cpu6_cpuidle_state1_time name = C1 + # dim cpu6_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu6_cpuidle_state1_time multiplier = 1 + # dim cpu6_cpuidle_state1_time divisor = 1 + # dim cpu6_cpuidle_state2_time name = C1E + # dim cpu6_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu6_cpuidle_state2_time multiplier = 1 + # dim cpu6_cpuidle_state2_time divisor = 1 + # dim cpu6_cpuidle_state3_time name = C3 + # dim cpu6_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu6_cpuidle_state3_time multiplier = 1 + # dim cpu6_cpuidle_state3_time divisor = 1 + # dim cpu6_cpuidle_state4_time name = C6 + # dim cpu6_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu6_cpuidle_state4_time multiplier = 1 + # dim cpu6_cpuidle_state4_time divisor = 1 + # dim cpu6_cpuidle_state5_time name = C7s + # dim cpu6_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu6_cpuidle_state5_time multiplier = 1 + # dim cpu6_cpuidle_state5_time divisor = 1 + +[cpu.cpu7_cpuidle] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu7_cpuidle + # chart type = stacked + # type = cpu + # family = cpuidle + # units = percentage + # context = cpuidle.cpuidle + # priority = 6007 + # name = cpu.cpu7_cpuidle + # title = C-state residency time + # dim cpu7_active_time name = C0 (active) + # dim cpu7_active_time algorithm = percentage-of-incremental-row + # dim cpu7_active_time multiplier = 1 + # dim cpu7_active_time divisor = 1 + # dim cpu7_cpuidle_state0_time name = POLL + # dim cpu7_cpuidle_state0_time algorithm = percentage-of-incremental-row + # dim cpu7_cpuidle_state0_time multiplier = 1 + # dim cpu7_cpuidle_state0_time divisor = 1 + # dim cpu7_cpuidle_state1_time name = C1 + # dim cpu7_cpuidle_state1_time algorithm = percentage-of-incremental-row + # dim cpu7_cpuidle_state1_time multiplier = 1 + # dim cpu7_cpuidle_state1_time divisor = 1 + # dim cpu7_cpuidle_state2_time name = C1E + # dim cpu7_cpuidle_state2_time algorithm = percentage-of-incremental-row + # dim cpu7_cpuidle_state2_time multiplier = 1 + # dim cpu7_cpuidle_state2_time divisor = 1 + # dim cpu7_cpuidle_state3_time name = C3 + # dim cpu7_cpuidle_state3_time algorithm = percentage-of-incremental-row + # dim cpu7_cpuidle_state3_time multiplier = 1 + # dim cpu7_cpuidle_state3_time divisor = 1 + # dim cpu7_cpuidle_state4_time name = C6 + # dim cpu7_cpuidle_state4_time algorithm = percentage-of-incremental-row + # dim cpu7_cpuidle_state4_time multiplier = 1 + # dim cpu7_cpuidle_state4_time divisor = 1 + # dim cpu7_cpuidle_state5_time name = C7s + # dim cpu7_cpuidle_state5_time algorithm = percentage-of-incremental-row + # dim cpu7_cpuidle_state5_time multiplier = 1 + # dim cpu7_cpuidle_state5_time divisor = 1 + +[system.uptime] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.uptime + # chart type = line + # type = system + # family = uptime + # units = seconds + # context = system.uptime + # priority = 1000 + # name = system.uptime + # title = System Uptime + # dim uptime name = uptime + # dim uptime algorithm = absolute + # dim uptime multiplier = 1 + # dim uptime divisor = 1000 + +[system.load] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.load + # chart type = line + # type = system + # family = load + # units = load + # context = system.load + # priority = 100 + # name = system.load + # title = System Load Average + # dim load1 name = load1 + # dim load1 algorithm = absolute + # dim load1 multiplier = 1 + # dim load1 divisor = 1000 + # dim load5 name = load5 + # dim load5 algorithm = absolute + # dim load5 multiplier = 1 + # dim load5 divisor = 1000 + # dim load15 name = load15 + # dim load15 algorithm = absolute + # dim load15 multiplier = 1 + # dim load15 divisor = 1000 + +[system.active_processes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.active_processes + # chart type = line + # type = system + # family = processes + # units = processes + # context = system.active_processes + # priority = 750 + # name = system.active_processes + # title = System Active Processes + # dim active name = active + # dim active algorithm = absolute + # dim active multiplier = 1 + # dim active divisor = 1 + +[system.entropy] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.entropy + # chart type = line + # type = system + # family = entropy + # units = entropy + # context = system.entropy + # priority = 1000 + # name = system.entropy + # title = Available Entropy + # dim entropy name = entropy + # dim entropy algorithm = absolute + # dim entropy multiplier = 1 + # dim entropy divisor = 1 + +[system.interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.interrupts + # chart type = stacked + # type = system + # family = interrupts + # units = interrupts/s + # context = system.interrupts + # priority = 1000 + # name = system.interrupts + # title = System interrupts + # dim 0 name = timer_0 + # dim 0 algorithm = incremental + # dim 0 multiplier = 1 + # dim 0 divisor = 1 + # dim 8 name = rtc0_8 + # dim 8 algorithm = incremental + # dim 8 multiplier = 1 + # dim 8 divisor = 1 + # dim 16 name = ehci_hcd:usb1_16 + # dim 16 algorithm = incremental + # dim 16 multiplier = 1 + # dim 16 divisor = 1 + # dim 18 name = snd_hda_intel:card1_18 + # dim 18 algorithm = incremental + # dim 18 multiplier = 1 + # dim 18 divisor = 1 + # dim 23 name = ehci_hcd:usb3_23 + # dim 23 algorithm = incremental + # dim 23 multiplier = 1 + # dim 23 divisor = 1 + # dim 25 name = ahci[0000:00:1f.2]_25 + # dim 25 algorithm = incremental + # dim 25 multiplier = 1 + # dim 25 divisor = 1 + # dim 26 name = xhci_hcd_26 + # dim 26 algorithm = incremental + # dim 26 multiplier = 1 + # dim 26 divisor = 1 + # dim 27 name = mei_me_27 + # dim 27 algorithm = incremental + # dim 27 multiplier = 1 + # dim 27 divisor = 1 + # dim 28 name = snd_hda_intel:card0_28 + # dim 28 algorithm = incremental + # dim 28 multiplier = 1 + # dim 28 divisor = 1 + # dim 29 name = enp4s0_29 + # dim 29 algorithm = incremental + # dim 29 multiplier = 1 + # dim 29 divisor = 1 + # dim 30 name = enp4s0-TxRx-0_30 + # dim 30 algorithm = incremental + # dim 30 multiplier = 1 + # dim 30 divisor = 1 + # dim 31 name = enp4s0-tx-1_31 + # dim 31 algorithm = incremental + # dim 31 multiplier = 1 + # dim 31 divisor = 1 + # dim 32 name = enp4s0-tx-2_32 + # dim 32 algorithm = incremental + # dim 32 multiplier = 1 + # dim 32 divisor = 1 + # dim 33 name = enp4s0-tx-3_33 + # dim 33 algorithm = incremental + # dim 33 multiplier = 1 + # dim 33 divisor = 1 + # dim 34 name = nvidia_34 + # dim 34 algorithm = incremental + # dim 34 multiplier = 1 + # dim 34 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim IWI name = IWI + # dim IWI algorithm = incremental + # dim IWI multiplier = 1 + # dim IWI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu0_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu0_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1100 + # name = cpu.cpu0_interrupts + # title = CPU0 Interrupts + # dim 0 name = timer_0 + # dim 0 algorithm = incremental + # dim 0 multiplier = 1 + # dim 0 divisor = 1 + # dim 34 name = nvidia_34 + # dim 34 algorithm = incremental + # dim 34 multiplier = 1 + # dim 34 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim IWI name = IWI + # dim IWI algorithm = incremental + # dim IWI multiplier = 1 + # dim IWI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu1_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu1_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1101 + # name = cpu.cpu1_interrupts + # title = CPU1 Interrupts + # dim 27 name = mei_me_27 + # dim 27 algorithm = incremental + # dim 27 multiplier = 1 + # dim 27 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu2_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu2_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1102 + # name = cpu.cpu2_interrupts + # title = CPU2 Interrupts + # dim 8 name = rtc0_8 + # dim 8 algorithm = incremental + # dim 8 multiplier = 1 + # dim 8 divisor = 1 + # dim 28 name = snd_hda_intel:card0_28 + # dim 28 algorithm = incremental + # dim 28 multiplier = 1 + # dim 28 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu3_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu3_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1103 + # name = cpu.cpu3_interrupts + # title = CPU3 Interrupts + # dim 18 name = snd_hda_intel:card1_18 + # dim 18 algorithm = incremental + # dim 18 multiplier = 1 + # dim 18 divisor = 1 + # dim 29 name = enp4s0_29 + # dim 29 algorithm = incremental + # dim 29 multiplier = 1 + # dim 29 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim IWI name = IWI + # dim IWI algorithm = incremental + # dim IWI multiplier = 1 + # dim IWI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu4_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu4_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1104 + # name = cpu.cpu4_interrupts + # title = CPU4 Interrupts + # dim 16 name = ehci_hcd:usb1_16 + # dim 16 algorithm = incremental + # dim 16 multiplier = 1 + # dim 16 divisor = 1 + # dim 30 name = enp4s0-TxRx-0_30 + # dim 30 algorithm = incremental + # dim 30 multiplier = 1 + # dim 30 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu5_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu5_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1105 + # name = cpu.cpu5_interrupts + # title = CPU5 Interrupts + # dim 25 name = ahci[0000:00:1f.2]_25 + # dim 25 algorithm = incremental + # dim 25 multiplier = 1 + # dim 25 divisor = 1 + # dim 31 name = enp4s0-tx-1_31 + # dim 31 algorithm = incremental + # dim 31 multiplier = 1 + # dim 31 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu6_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu6_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1106 + # name = cpu.cpu6_interrupts + # title = CPU6 Interrupts + # dim 26 name = xhci_hcd_26 + # dim 26 algorithm = incremental + # dim 26 multiplier = 1 + # dim 26 divisor = 1 + # dim 32 name = enp4s0-tx-2_32 + # dim 32 algorithm = incremental + # dim 32 multiplier = 1 + # dim 32 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim IWI name = IWI + # dim IWI algorithm = incremental + # dim IWI multiplier = 1 + # dim IWI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[cpu.cpu7_interrupts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu7_interrupts + # chart type = stacked + # type = cpu + # family = interrupts + # units = interrupts/s + # context = cpu.interrupts + # priority = 1107 + # name = cpu.cpu7_interrupts + # title = CPU7 Interrupts + # dim 23 name = ehci_hcd:usb3_23 + # dim 23 algorithm = incremental + # dim 23 multiplier = 1 + # dim 23 divisor = 1 + # dim 33 name = enp4s0-tx-3_33 + # dim 33 algorithm = incremental + # dim 33 multiplier = 1 + # dim 33 divisor = 1 + # dim NMI name = NMI + # dim NMI algorithm = incremental + # dim NMI multiplier = 1 + # dim NMI divisor = 1 + # dim LOC name = LOC + # dim LOC algorithm = incremental + # dim LOC multiplier = 1 + # dim LOC divisor = 1 + # dim PMI name = PMI + # dim PMI algorithm = incremental + # dim PMI multiplier = 1 + # dim PMI divisor = 1 + # dim RES name = RES + # dim RES algorithm = incremental + # dim RES multiplier = 1 + # dim RES divisor = 1 + # dim CAL name = CAL + # dim CAL algorithm = incremental + # dim CAL multiplier = 1 + # dim CAL divisor = 1 + # dim TLB name = TLB + # dim TLB algorithm = incremental + # dim TLB multiplier = 1 + # dim TLB divisor = 1 + # dim MCP name = MCP + # dim MCP algorithm = incremental + # dim MCP multiplier = 1 + # dim MCP divisor = 1 + +[system.softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.softirqs + # chart type = stacked + # type = system + # family = softirqs + # units = softirqs/s + # context = system.softirqs + # priority = 950 + # name = system.softirqs + # title = System softirqs + # dim HI name = HI + # dim HI algorithm = incremental + # dim HI multiplier = 1 + # dim HI divisor = 1 + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim HRTIMER name = HRTIMER + # dim HRTIMER algorithm = incremental + # dim HRTIMER multiplier = 1 + # dim HRTIMER divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu0_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu0_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3000 + # name = cpu.cpu0_softirqs + # title = CPU0 softirqs + # dim HI name = HI + # dim HI algorithm = incremental + # dim HI multiplier = 1 + # dim HI divisor = 1 + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu1_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu1_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3001 + # name = cpu.cpu1_softirqs + # title = CPU1 softirqs + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu2_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu2_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3002 + # name = cpu.cpu2_softirqs + # title = CPU2 softirqs + # dim HI name = HI + # dim HI algorithm = incremental + # dim HI multiplier = 1 + # dim HI divisor = 1 + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu3_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu3_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3003 + # name = cpu.cpu3_softirqs + # title = CPU3 softirqs + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu4_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu4_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3004 + # name = cpu.cpu4_softirqs + # title = CPU4 softirqs + # dim HI name = HI + # dim HI algorithm = incremental + # dim HI multiplier = 1 + # dim HI divisor = 1 + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim HRTIMER name = HRTIMER + # dim HRTIMER algorithm = incremental + # dim HRTIMER multiplier = 1 + # dim HRTIMER divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu5_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu5_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3005 + # name = cpu.cpu5_softirqs + # title = CPU5 softirqs + # dim HI name = HI + # dim HI algorithm = incremental + # dim HI multiplier = 1 + # dim HI divisor = 1 + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu6_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu6_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3006 + # name = cpu.cpu6_softirqs + # title = CPU6 softirqs + # dim HI name = HI + # dim HI algorithm = incremental + # dim HI multiplier = 1 + # dim HI divisor = 1 + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[cpu.cpu7_softirqs] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu7_softirqs + # chart type = stacked + # type = cpu + # family = softirqs + # units = softirqs/s + # context = cpu.softirqs + # priority = 3007 + # name = cpu.cpu7_softirqs + # title = CPU7 softirqs + # dim TIMER name = TIMER + # dim TIMER algorithm = incremental + # dim TIMER multiplier = 1 + # dim TIMER divisor = 1 + # dim NET_TX name = NET_TX + # dim NET_TX algorithm = incremental + # dim NET_TX multiplier = 1 + # dim NET_TX divisor = 1 + # dim NET_RX name = NET_RX + # dim NET_RX algorithm = incremental + # dim NET_RX multiplier = 1 + # dim NET_RX divisor = 1 + # dim TASKLET name = TASKLET + # dim TASKLET algorithm = incremental + # dim TASKLET multiplier = 1 + # dim TASKLET divisor = 1 + # dim SCHED name = SCHED + # dim SCHED algorithm = incremental + # dim SCHED multiplier = 1 + # dim SCHED divisor = 1 + # dim HRTIMER name = HRTIMER + # dim HRTIMER algorithm = incremental + # dim HRTIMER multiplier = 1 + # dim HRTIMER divisor = 1 + # dim RCU name = RCU + # dim RCU algorithm = incremental + # dim RCU multiplier = 1 + # dim RCU divisor = 1 + +[system.pgpgio] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.pgpgio + # chart type = area + # type = system + # family = disk + # units = KiB/s + # context = system.pgpgio + # priority = 151 + # name = system.pgpgio + # title = Memory Paged from/to disk + # dim in name = in + # dim in algorithm = incremental + # dim in multiplier = 1 + # dim in divisor = 1 + # dim out name = out + # dim out algorithm = incremental + # dim out multiplier = -1 + # dim out divisor = 1 + +[mem.pgfaults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.pgfaults + # chart type = line + # type = mem + # family = system + # units = faults/s + # context = mem.pgfaults + # priority = 1030 + # name = mem.pgfaults + # title = Memory Page Faults + # dim minor name = minor + # dim minor algorithm = incremental + # dim minor multiplier = 1 + # dim minor divisor = 1 + # dim major name = major + # dim major algorithm = incremental + # dim major multiplier = -1 + # dim major divisor = 1 + +[system.ram] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.ram + # chart type = stacked + # type = system + # family = ram + # units = MiB + # context = system.ram + # priority = 200 + # name = system.ram + # title = System RAM + # dim free name = free + # dim free algorithm = absolute + # dim free multiplier = 1 + # dim free divisor = 1024 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 1 + # dim used divisor = 1024 + # dim cached name = cached + # dim cached algorithm = absolute + # dim cached multiplier = 1 + # dim cached divisor = 1024 + # dim buffers name = buffers + # dim buffers algorithm = absolute + # dim buffers multiplier = 1 + # dim buffers divisor = 1024 + +[mem.available] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.available + # chart type = area + # type = mem + # family = system + # units = MiB + # context = mem.available + # priority = 1010 + # name = mem.available + # title = Available RAM for applications + # dim MemAvailable name = avail + # dim MemAvailable algorithm = absolute + # dim MemAvailable multiplier = 1 + # dim MemAvailable divisor = 1024 + +[system.swap] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.swap + # chart type = stacked + # type = system + # family = swap + # units = MiB + # context = system.swap + # priority = 201 + # name = system.swap + # title = System Swap + # dim free name = free + # dim free algorithm = absolute + # dim free multiplier = 1 + # dim free divisor = 1024 + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 1 + # dim used divisor = 1024 + +[mem.committed] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.committed + # chart type = area + # type = mem + # family = system + # units = MiB + # context = mem.committed + # priority = 1020 + # name = mem.committed + # title = Committed (Allocated) Memory + # dim Committed_AS name = Committed_AS + # dim Committed_AS algorithm = absolute + # dim Committed_AS multiplier = 1 + # dim Committed_AS divisor = 1024 + +[mem.writeback] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.writeback + # chart type = line + # type = mem + # family = kernel + # units = MiB + # context = mem.writeback + # priority = 1100 + # name = mem.writeback + # title = Writeback Memory + # dim Dirty name = Dirty + # dim Dirty algorithm = absolute + # dim Dirty multiplier = 1 + # dim Dirty divisor = 1024 + # dim Writeback name = Writeback + # dim Writeback algorithm = absolute + # dim Writeback multiplier = 1 + # dim Writeback divisor = 1024 + # dim FuseWriteback name = FuseWriteback + # dim FuseWriteback algorithm = absolute + # dim FuseWriteback multiplier = 1 + # dim FuseWriteback divisor = 1024 + # dim NfsWriteback name = NfsWriteback + # dim NfsWriteback algorithm = absolute + # dim NfsWriteback multiplier = 1 + # dim NfsWriteback divisor = 1024 + # dim Bounce name = Bounce + # dim Bounce algorithm = absolute + # dim Bounce multiplier = 1 + # dim Bounce divisor = 1024 + +[mem.kernel] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.kernel + # chart type = stacked + # type = mem + # family = kernel + # units = MiB + # context = mem.kernel + # priority = 1101 + # name = mem.kernel + # title = Memory Used by Kernel + # dim Slab name = Slab + # dim Slab algorithm = absolute + # dim Slab multiplier = 1 + # dim Slab divisor = 1024 + # dim KernelStack name = KernelStack + # dim KernelStack algorithm = absolute + # dim KernelStack multiplier = 1 + # dim KernelStack divisor = 1024 + # dim PageTables name = PageTables + # dim PageTables algorithm = absolute + # dim PageTables multiplier = 1 + # dim PageTables divisor = 1024 + # dim VmallocUsed name = VmallocUsed + # dim VmallocUsed algorithm = absolute + # dim VmallocUsed multiplier = 1 + # dim VmallocUsed divisor = 1024 + +[mem.slab] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.slab + # chart type = stacked + # type = mem + # family = slab + # units = MiB + # context = mem.slab + # priority = 1200 + # name = mem.slab + # title = Reclaimable Kernel Memory + # dim reclaimable name = reclaimable + # dim reclaimable algorithm = absolute + # dim reclaimable multiplier = 1 + # dim reclaimable divisor = 1024 + # dim unreclaimable name = unreclaimable + # dim unreclaimable algorithm = absolute + # dim unreclaimable multiplier = 1 + # dim unreclaimable divisor = 1024 + +[mem.transparent_hugepages] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/mem.transparent_hugepages + # chart type = stacked + # type = mem + # family = hugepages + # units = MiB + # context = mem.transparent_hugepages + # priority = 1250 + # name = mem.transparent_hugepages + # title = Transparent HugePages Memory + # dim anonymous name = anonymous + # dim anonymous algorithm = absolute + # dim anonymous multiplier = 1 + # dim anonymous divisor = 1024 + # dim shmem name = shmem + # dim shmem algorithm = absolute + # dim shmem multiplier = 1 + # dim shmem divisor = 1024 + +[net.eth0] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/net.eth0 + # chart type = area + # type = net + # family = eth0 + # units = kilobits/s + # context = net.net + # priority = 7000 + # name = net.eth0 + # title = Bandwidth + # dim received name = received + # dim received algorithm = incremental + # dim received multiplier = 8 + # dim received divisor = 1000 + # dim sent name = sent + # dim sent algorithm = incremental + # dim sent multiplier = -8 + # dim sent divisor = 1000 + +[net_packets.eth0] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/net_packets.eth0 + # chart type = line + # type = net_packets + # family = eth0 + # units = packets/s + # context = net.packets + # priority = 7001 + # name = net_packets.eth0 + # title = Packets + # dim received name = received + # dim received algorithm = incremental + # dim received multiplier = 1 + # dim received divisor = 1 + # dim sent name = sent + # dim sent algorithm = incremental + # dim sent multiplier = -1 + # dim sent divisor = 1 + # dim multicast name = multicast + # dim multicast algorithm = incremental + # dim multicast multiplier = 1 + # dim multicast divisor = 1 + +[ipv4.sockstat_sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.sockstat_sockets + # chart type = line + # type = ipv4 + # family = sockets + # units = sockets + # context = ipv4.sockstat_sockets + # priority = 5100 + # name = ipv4.sockstat_sockets + # title = IPv4 Sockets Used + # dim used name = used + # dim used algorithm = absolute + # dim used multiplier = 1 + # dim used divisor = 1 + +[ipv4.sockstat_tcp_sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.sockstat_tcp_sockets + # chart type = line + # type = ipv4 + # family = tcp + # units = sockets + # context = ipv4.sockstat_tcp_sockets + # priority = 5201 + # name = ipv4.sockstat_tcp_sockets + # title = IPv4 TCP Sockets + # dim alloc name = alloc + # dim alloc algorithm = absolute + # dim alloc multiplier = 1 + # dim alloc divisor = 1 + # dim orphan name = orphan + # dim orphan algorithm = absolute + # dim orphan multiplier = 1 + # dim orphan divisor = 1 + # dim inuse name = inuse + # dim inuse algorithm = absolute + # dim inuse multiplier = 1 + # dim inuse divisor = 1 + # dim timewait name = timewait + # dim timewait algorithm = absolute + # dim timewait multiplier = 1 + # dim timewait divisor = 1 + +[ipv4.sockstat_tcp_mem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.sockstat_tcp_mem + # chart type = area + # type = ipv4 + # family = tcp + # units = KiB + # context = ipv4.sockstat_tcp_mem + # priority = 5290 + # name = ipv4.sockstat_tcp_mem + # title = IPv4 TCP Sockets Memory + # dim mem name = mem + # dim mem algorithm = absolute + # dim mem multiplier = 4096 + # dim mem divisor = 1024 + +[ipv4.sockstat_udp_sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.sockstat_udp_sockets + # chart type = line + # type = ipv4 + # family = udp + # units = sockets + # context = ipv4.sockstat_udp_sockets + # priority = 5300 + # name = ipv4.sockstat_udp_sockets + # title = IPv4 UDP Sockets + # dim inuse name = inuse + # dim inuse algorithm = absolute + # dim inuse multiplier = 1 + # dim inuse divisor = 1 + +[ipv4.sockstat_udp_mem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.sockstat_udp_mem + # chart type = area + # type = ipv4 + # family = udp + # units = KiB + # context = ipv4.sockstat_udp_mem + # priority = 5390 + # name = ipv4.sockstat_udp_mem + # title = IPv4 UDP Sockets Memory + # dim mem name = mem + # dim mem algorithm = absolute + # dim mem multiplier = 4096 + # dim mem divisor = 1024 + +[ipv6.sockstat6_tcp_sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv6.sockstat6_tcp_sockets + # chart type = line + # type = ipv6 + # family = tcp6 + # units = sockets + # context = ipv6.sockstat6_tcp_sockets + # priority = 6500 + # name = ipv6.sockstat6_tcp_sockets + # title = IPv6 TCP Sockets + # dim inuse name = inuse + # dim inuse algorithm = absolute + # dim inuse multiplier = 1 + # dim inuse divisor = 1 + +[ip.tcpconnaborts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ip.tcpconnaborts + # chart type = line + # type = ip + # family = tcp + # units = connections/s + # context = ip.tcpconnaborts + # priority = 4210 + # name = ip.tcpconnaborts + # title = TCP Connection Aborts + # dim TCPAbortOnData name = baddata + # dim TCPAbortOnData algorithm = incremental + # dim TCPAbortOnData multiplier = 1 + # dim TCPAbortOnData divisor = 1 + # dim TCPAbortOnClose name = userclosed + # dim TCPAbortOnClose algorithm = incremental + # dim TCPAbortOnClose multiplier = 1 + # dim TCPAbortOnClose divisor = 1 + # dim TCPAbortOnMemory name = nomemory + # dim TCPAbortOnMemory algorithm = incremental + # dim TCPAbortOnMemory multiplier = 1 + # dim TCPAbortOnMemory divisor = 1 + # dim TCPAbortOnTimeout name = timeout + # dim TCPAbortOnTimeout algorithm = incremental + # dim TCPAbortOnTimeout multiplier = 1 + # dim TCPAbortOnTimeout divisor = 1 + # dim TCPAbortOnLinger name = linger + # dim TCPAbortOnLinger algorithm = incremental + # dim TCPAbortOnLinger multiplier = 1 + # dim TCPAbortOnLinger divisor = 1 + # dim TCPAbortFailed name = failed + # dim TCPAbortFailed algorithm = incremental + # dim TCPAbortFailed multiplier = -1 + # dim TCPAbortFailed divisor = 1 + +[ip.tcpofo] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ip.tcpofo + # chart type = line + # type = ip + # family = tcp + # units = packets/s + # context = ip.tcpofo + # priority = 4250 + # name = ip.tcpofo + # title = TCP Out-Of-Order Queue + # dim TCPOFOQueue name = inqueue + # dim TCPOFOQueue algorithm = incremental + # dim TCPOFOQueue multiplier = 1 + # dim TCPOFOQueue divisor = 1 + # dim TCPOFODrop name = dropped + # dim TCPOFODrop algorithm = incremental + # dim TCPOFODrop multiplier = -1 + # dim TCPOFODrop divisor = 1 + # dim TCPOFOMerge name = merged + # dim TCPOFOMerge algorithm = incremental + # dim TCPOFOMerge multiplier = 1 + # dim TCPOFOMerge divisor = 1 + # dim OfoPruned name = pruned + # dim OfoPruned algorithm = incremental + # dim OfoPruned multiplier = -1 + # dim OfoPruned divisor = 1 + +[system.ip] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.ip + # chart type = area + # type = system + # family = network + # units = kilobits/s + # context = system.ip + # priority = 501 + # name = system.ip + # title = IP Bandwidth + # dim InOctets name = received + # dim InOctets algorithm = incremental + # dim InOctets multiplier = 8 + # dim InOctets divisor = 1000 + # dim OutOctets name = sent + # dim OutOctets algorithm = incremental + # dim OutOctets multiplier = -8 + # dim OutOctets divisor = 1000 + +[ip.bcast] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ip.bcast + # chart type = area + # type = ip + # family = broadcast + # units = kilobits/s + # context = ip.bcast + # priority = 4500 + # name = ip.bcast + # title = IP Broadcast Bandwidth + # dim InBcastOctets name = received + # dim InBcastOctets algorithm = incremental + # dim InBcastOctets multiplier = 8 + # dim InBcastOctets divisor = 1000 + # dim OutBcastOctets name = sent + # dim OutBcastOctets algorithm = incremental + # dim OutBcastOctets multiplier = -8 + # dim OutBcastOctets divisor = 1000 + +[ip.bcastpkts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ip.bcastpkts + # chart type = line + # type = ip + # family = broadcast + # units = packets/s + # context = ip.bcastpkts + # priority = 4510 + # name = ip.bcastpkts + # title = IP Broadcast Packets + # dim InBcastPkts name = received + # dim InBcastPkts algorithm = incremental + # dim InBcastPkts multiplier = 1 + # dim InBcastPkts divisor = 1 + # dim OutBcastPkts name = sent + # dim OutBcastPkts algorithm = incremental + # dim OutBcastPkts multiplier = -1 + # dim OutBcastPkts divisor = 1 + +[ip.ecnpkts] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ip.ecnpkts + # chart type = line + # type = ip + # family = ecn + # units = packets/s + # context = ip.ecnpkts + # priority = 4700 + # name = ip.ecnpkts + # title = IP ECN Statistics + # dim InCEPkts name = CEP + # dim InCEPkts algorithm = incremental + # dim InCEPkts multiplier = 1 + # dim InCEPkts divisor = 1 + # dim InNoECTPkts name = NoECTP + # dim InNoECTPkts algorithm = incremental + # dim InNoECTPkts multiplier = -1 + # dim InNoECTPkts divisor = 1 + # dim InECT0Pkts name = ECTP0 + # dim InECT0Pkts algorithm = incremental + # dim InECT0Pkts multiplier = 1 + # dim InECT0Pkts divisor = 1 + # dim InECT1Pkts name = ECTP1 + # dim InECT1Pkts algorithm = incremental + # dim InECT1Pkts multiplier = 1 + # dim InECT1Pkts divisor = 1 + +[ipv4.packets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.packets + # chart type = line + # type = ipv4 + # family = packets + # units = packets/s + # context = ipv4.packets + # priority = 5130 + # name = ipv4.packets + # title = IPv4 Packets + # dim InReceives name = received + # dim InReceives algorithm = incremental + # dim InReceives multiplier = 1 + # dim InReceives divisor = 1 + # dim OutRequests name = sent + # dim OutRequests algorithm = incremental + # dim OutRequests multiplier = -1 + # dim OutRequests divisor = 1 + # dim ForwDatagrams name = forwarded + # dim ForwDatagrams algorithm = incremental + # dim ForwDatagrams multiplier = 1 + # dim ForwDatagrams divisor = 1 + # dim InDelivers name = delivered + # dim InDelivers algorithm = incremental + # dim InDelivers multiplier = 1 + # dim InDelivers divisor = 1 + +[ipv4.tcppackets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.tcppackets + # chart type = line + # type = ipv4 + # family = tcp + # units = packets/s + # context = ipv4.tcppackets + # priority = 5204 + # name = ipv4.tcppackets + # title = IPv4 TCP Packets + # dim InSegs name = received + # dim InSegs algorithm = incremental + # dim InSegs multiplier = 1 + # dim InSegs divisor = 1 + # dim OutSegs name = sent + # dim OutSegs algorithm = incremental + # dim OutSegs multiplier = -1 + # dim OutSegs divisor = 1 + +[ipv4.tcpopens] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.tcpopens + # chart type = line + # type = ipv4 + # family = tcp + # units = connections/s + # context = ipv4.tcpopens + # priority = 5205 + # name = ipv4.tcpopens + # title = IPv4 TCP Opens + # dim ActiveOpens name = active + # dim ActiveOpens algorithm = incremental + # dim ActiveOpens multiplier = 1 + # dim ActiveOpens divisor = 1 + # dim PassiveOpens name = passive + # dim PassiveOpens algorithm = incremental + # dim PassiveOpens multiplier = 1 + # dim PassiveOpens divisor = 1 + +[ipv4.tcphandshake] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.tcphandshake + # chart type = line + # type = ipv4 + # family = tcp + # units = events/s + # context = ipv4.tcphandshake + # priority = 5230 + # name = ipv4.tcphandshake + # title = IPv4 TCP Handshake Issues + # dim EstabResets name = EstabResets + # dim EstabResets algorithm = incremental + # dim EstabResets multiplier = 1 + # dim EstabResets divisor = 1 + # dim OutRsts name = OutRsts + # dim OutRsts algorithm = incremental + # dim OutRsts multiplier = 1 + # dim OutRsts divisor = 1 + # dim AttemptFails name = AttemptFails + # dim AttemptFails algorithm = incremental + # dim AttemptFails multiplier = 1 + # dim AttemptFails divisor = 1 + # dim TCPSynRetrans name = SynRetrans + # dim TCPSynRetrans algorithm = incremental + # dim TCPSynRetrans multiplier = 1 + # dim TCPSynRetrans divisor = 1 + +[ipv4.udppackets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.udppackets + # chart type = line + # type = ipv4 + # family = udp + # units = packets/s + # context = ipv4.udppackets + # priority = 5300 + # name = ipv4.udppackets + # title = IPv4 UDP Packets + # dim InDatagrams name = received + # dim InDatagrams algorithm = incremental + # dim InDatagrams multiplier = 1 + # dim InDatagrams divisor = 1 + # dim OutDatagrams name = sent + # dim OutDatagrams algorithm = incremental + # dim OutDatagrams multiplier = -1 + # dim OutDatagrams divisor = 1 + +[ipv4.udperrors] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv4.udperrors + # chart type = line + # type = ipv4 + # family = udp + # units = events/s + # context = ipv4.udperrors + # priority = 5310 + # name = ipv4.udperrors + # title = IPv4 UDP Errors + # dim RcvbufErrors name = RcvbufErrors + # dim RcvbufErrors algorithm = incremental + # dim RcvbufErrors multiplier = 1 + # dim RcvbufErrors divisor = 1 + # dim SndbufErrors name = SndbufErrors + # dim SndbufErrors algorithm = incremental + # dim SndbufErrors multiplier = -1 + # dim SndbufErrors divisor = 1 + # dim InErrors name = InErrors + # dim InErrors algorithm = incremental + # dim InErrors multiplier = 1 + # dim InErrors divisor = 1 + # dim NoPorts name = NoPorts + # dim NoPorts algorithm = incremental + # dim NoPorts multiplier = 1 + # dim NoPorts divisor = 1 + # dim InCsumErrors name = InCsumErrors + # dim InCsumErrors algorithm = incremental + # dim InCsumErrors multiplier = 1 + # dim InCsumErrors divisor = 1 + # dim IgnoredMulti name = IgnoredMulti + # dim IgnoredMulti algorithm = incremental + # dim IgnoredMulti multiplier = 1 + # dim IgnoredMulti divisor = 1 + +[system.ipv6] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.ipv6 + # chart type = area + # type = system + # family = network + # units = kilobits/s + # context = system.ipv6 + # priority = 502 + # name = system.ipv6 + # title = IPv6 Bandwidth + # dim InOctets name = received + # dim InOctets algorithm = incremental + # dim InOctets multiplier = 8 + # dim InOctets divisor = 1000 + # dim OutOctets name = sent + # dim OutOctets algorithm = incremental + # dim OutOctets multiplier = -8 + # dim OutOctets divisor = 1000 + +[ipv6.packets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv6.packets + # chart type = line + # type = ipv6 + # family = packets + # units = packets/s + # context = ipv6.packets + # priority = 6200 + # name = ipv6.packets + # title = IPv6 Packets + # dim InReceives name = received + # dim InReceives algorithm = incremental + # dim InReceives multiplier = 1 + # dim InReceives divisor = 1 + # dim OutRequests name = sent + # dim OutRequests algorithm = incremental + # dim OutRequests multiplier = -1 + # dim OutRequests divisor = 1 + # dim OutForwDatagrams name = forwarded + # dim OutForwDatagrams algorithm = incremental + # dim OutForwDatagrams multiplier = -1 + # dim OutForwDatagrams divisor = 1 + # dim InDelivers name = delivers + # dim InDelivers algorithm = incremental + # dim InDelivers multiplier = 1 + # dim InDelivers divisor = 1 + +[ipv6.errors] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/ipv6.errors + # chart type = line + # type = ipv6 + # family = errors + # units = packets/s + # context = ipv6.errors + # priority = 6300 + # name = ipv6.errors + # title = IPv6 Errors + # dim InDiscards name = InDiscards + # dim InDiscards algorithm = incremental + # dim InDiscards multiplier = 1 + # dim InDiscards divisor = 1 + # dim OutDiscards name = OutDiscards + # dim OutDiscards algorithm = incremental + # dim OutDiscards multiplier = -1 + # dim OutDiscards divisor = 1 + # dim InHdrErrors name = InHdrErrors + # dim InHdrErrors algorithm = incremental + # dim InHdrErrors multiplier = 1 + # dim InHdrErrors divisor = 1 + # dim InAddrErrors name = InAddrErrors + # dim InAddrErrors algorithm = incremental + # dim InAddrErrors multiplier = 1 + # dim InAddrErrors divisor = 1 + # dim InUnknownProtos name = InUnknownProtos + # dim InUnknownProtos algorithm = incremental + # dim InUnknownProtos multiplier = 1 + # dim InUnknownProtos divisor = 1 + # dim InTooBigErrors name = InTooBigErrors + # dim InTooBigErrors algorithm = incremental + # dim InTooBigErrors multiplier = 1 + # dim InTooBigErrors divisor = 1 + # dim InTruncatedPkts name = InTruncatedPkts + # dim InTruncatedPkts algorithm = incremental + # dim InTruncatedPkts multiplier = 1 + # dim InTruncatedPkts divisor = 1 + # dim InNoRoutes name = InNoRoutes + # dim InNoRoutes algorithm = incremental + # dim InNoRoutes multiplier = 1 + # dim InNoRoutes divisor = 1 + # dim OutNoRoutes name = OutNoRoutes + # dim OutNoRoutes algorithm = incremental + # dim OutNoRoutes multiplier = -1 + # dim OutNoRoutes divisor = 1 + +[system.softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.softnet_stat + # chart type = line + # type = system + # family = softnet_stat + # units = events/s + # context = system.softnet_stat + # priority = 955 + # name = system.softnet_stat + # title = System softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu0_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu0_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4101 + # name = cpu.cpu0_softnet_stat + # title = CPU0 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu1_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu1_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4102 + # name = cpu.cpu1_softnet_stat + # title = CPU1 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu2_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu2_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4103 + # name = cpu.cpu2_softnet_stat + # title = CPU2 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu3_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu3_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4104 + # name = cpu.cpu3_softnet_stat + # title = CPU3 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu4_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu4_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4105 + # name = cpu.cpu4_softnet_stat + # title = CPU4 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu5_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu5_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4106 + # name = cpu.cpu5_softnet_stat + # title = CPU5 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu6_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu6_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4107 + # name = cpu.cpu6_softnet_stat + # title = CPU6 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[cpu.cpu7_softnet_stat] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/cpu.cpu7_softnet_stat + # chart type = line + # type = cpu + # family = softnet_stat + # units = events/s + # context = cpu.softnet_stat + # priority = 4108 + # name = cpu.cpu7_softnet_stat + # title = CPU7 softnet_stat + # dim processed name = processed + # dim processed algorithm = incremental + # dim processed multiplier = 1 + # dim processed divisor = 1 + # dim dropped name = dropped + # dim dropped algorithm = incremental + # dim dropped multiplier = 1 + # dim dropped divisor = 1 + # dim squeezed name = squeezed + # dim squeezed algorithm = incremental + # dim squeezed multiplier = 1 + # dim squeezed divisor = 1 + # dim received_rps name = received_rps + # dim received_rps algorithm = incremental + # dim received_rps multiplier = 1 + # dim received_rps divisor = 1 + # dim flow_limit_count name = flow_limit_count + # dim flow_limit_count algorithm = incremental + # dim flow_limit_count multiplier = 1 + # dim flow_limit_count divisor = 1 + +[netfilter.conntrack_sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netfilter.conntrack_sockets + # chart type = line + # type = netfilter + # family = conntrack + # units = active connections + # context = netfilter.conntrack_sockets + # priority = 8700 + # name = netfilter.conntrack_sockets + # title = Connection Tracker Connections + # dim connections name = connections + # dim connections algorithm = absolute + # dim connections multiplier = 1 + # dim connections divisor = 1 + +[netfilter.conntrack_new] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netfilter.conntrack_new + # chart type = line + # type = netfilter + # family = conntrack + # units = connections/s + # context = netfilter.conntrack_new + # priority = 8701 + # name = netfilter.conntrack_new + # title = Connection Tracker New Connections + # dim new name = new + # dim new algorithm = incremental + # dim new multiplier = 1 + # dim new divisor = 1 + # dim ignore name = ignore + # dim ignore algorithm = incremental + # dim ignore multiplier = -1 + # dim ignore divisor = 1 + # dim invalid name = invalid + # dim invalid algorithm = incremental + # dim invalid multiplier = -1 + # dim invalid divisor = 1 + +[netfilter.conntrack_changes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netfilter.conntrack_changes + # chart type = line + # type = netfilter + # family = conntrack + # units = changes/s + # context = netfilter.conntrack_changes + # priority = 8702 + # name = netfilter.conntrack_changes + # title = Connection Tracker Changes + # dim inserted name = inserted + # dim inserted algorithm = incremental + # dim inserted multiplier = 1 + # dim inserted divisor = 1 + # dim deleted name = deleted + # dim deleted algorithm = incremental + # dim deleted multiplier = -1 + # dim deleted divisor = 1 + # dim delete_list name = delete_list + # dim delete_list algorithm = incremental + # dim delete_list multiplier = -1 + # dim delete_list divisor = 1 + +[netfilter.conntrack_expect] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netfilter.conntrack_expect + # chart type = line + # type = netfilter + # family = conntrack + # units = expectations/s + # context = netfilter.conntrack_expect + # priority = 8703 + # name = netfilter.conntrack_expect + # title = Connection Tracker Expectations + # dim created name = created + # dim created algorithm = incremental + # dim created multiplier = 1 + # dim created divisor = 1 + # dim deleted name = deleted + # dim deleted algorithm = incremental + # dim deleted multiplier = -1 + # dim deleted divisor = 1 + # dim new name = new + # dim new algorithm = incremental + # dim new multiplier = 1 + # dim new divisor = 1 + +[netfilter.conntrack_search] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netfilter.conntrack_search + # chart type = line + # type = netfilter + # family = conntrack + # units = searches/s + # context = netfilter.conntrack_search + # priority = 8710 + # name = netfilter.conntrack_search + # title = Connection Tracker Searches + # dim searched name = searched + # dim searched algorithm = incremental + # dim searched multiplier = 1 + # dim searched divisor = 1 + # dim restarted name = restarted + # dim restarted algorithm = incremental + # dim restarted multiplier = -1 + # dim restarted divisor = 1 + # dim found name = found + # dim found algorithm = incremental + # dim found multiplier = 1 + # dim found divisor = 1 + +[netfilter.conntrack_errors] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netfilter.conntrack_errors + # chart type = line + # type = netfilter + # family = conntrack + # units = events/s + # context = netfilter.conntrack_errors + # priority = 8705 + # name = netfilter.conntrack_errors + # title = Connection Tracker Errors + # dim icmp_error name = icmp_error + # dim icmp_error algorithm = incremental + # dim icmp_error multiplier = 1 + # dim icmp_error divisor = 1 + # dim insert_failed name = insert_failed + # dim insert_failed algorithm = incremental + # dim insert_failed multiplier = -1 + # dim insert_failed divisor = 1 + # dim drop name = drop + # dim drop algorithm = incremental + # dim drop multiplier = -1 + # dim drop divisor = 1 + # dim early_drop name = early_drop + # dim early_drop algorithm = incremental + # dim early_drop multiplier = -1 + # dim early_drop divisor = 1 + +[disk.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk.sda + # chart type = area + # type = disk + # family = sda + # units = KiB/s + # context = disk.io + # priority = 2000 + # name = disk.sda + # title = Disk I/O Bandwidth + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 512 + # dim reads divisor = 1024 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -512 + # dim writes divisor = 1024 + +[disk_ops.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_ops.sda + # chart type = line + # type = disk_ops + # family = sda + # units = operations/s + # context = disk.ops + # priority = 2001 + # name = disk_ops.sda + # title = Disk Completed I/O Operations + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[disk_backlog.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_backlog.sda + # chart type = area + # type = disk_backlog + # family = sda + # units = milliseconds + # context = disk.backlog + # priority = 2003 + # name = disk_backlog.sda + # title = Disk Backlog + # dim backlog name = backlog + # dim backlog algorithm = incremental + # dim backlog multiplier = 1 + # dim backlog divisor = 10 + +[disk_util.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_util.sda + # chart type = area + # type = disk_util + # family = sda + # units = % of time working + # context = disk.util + # priority = 2004 + # name = disk_util.sda + # title = Disk Utilization Time + # dim utilization name = utilization + # dim utilization algorithm = incremental + # dim utilization multiplier = 1 + # dim utilization divisor = 10 + +[disk_iotime.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_iotime.sda + # chart type = line + # type = disk_iotime + # family = sda + # units = milliseconds/s + # context = disk.iotime + # priority = 2022 + # name = disk_iotime.sda + # title = Disk Total I/O Time + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[disk.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk.sdb + # chart type = area + # type = disk + # family = sdb + # units = KiB/s + # context = disk.io + # priority = 2000 + # name = disk.sdb + # title = Disk I/O Bandwidth + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 512 + # dim reads divisor = 1024 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -512 + # dim writes divisor = 1024 + +[disk_ops.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_ops.sdb + # chart type = line + # type = disk_ops + # family = sdb + # units = operations/s + # context = disk.ops + # priority = 2001 + # name = disk_ops.sdb + # title = Disk Completed I/O Operations + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[disk_backlog.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_backlog.sdb + # chart type = area + # type = disk_backlog + # family = sdb + # units = milliseconds + # context = disk.backlog + # priority = 2003 + # name = disk_backlog.sdb + # title = Disk Backlog + # dim backlog name = backlog + # dim backlog algorithm = incremental + # dim backlog multiplier = 1 + # dim backlog divisor = 10 + +[disk_util.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_util.sdb + # chart type = area + # type = disk_util + # family = sdb + # units = % of time working + # context = disk.util + # priority = 2004 + # name = disk_util.sdb + # title = Disk Utilization Time + # dim utilization name = utilization + # dim utilization algorithm = incremental + # dim utilization multiplier = 1 + # dim utilization divisor = 10 + +[disk_mops.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_mops.sdb + # chart type = line + # type = disk_mops + # family = sdb + # units = merged operations/s + # context = disk.mops + # priority = 2021 + # name = disk_mops.sdb + # title = Disk Merged Operations + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[disk_iotime.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_iotime.sdb + # chart type = line + # type = disk_iotime + # family = sdb + # units = milliseconds/s + # context = disk.iotime + # priority = 2022 + # name = disk_iotime.sdb + # title = Disk Total I/O Time + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[system.io] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.io + # chart type = area + # type = system + # family = disk + # units = KiB/s + # context = system.io + # priority = 150 + # name = system.io + # title = Disk I/O + # dim in name = in + # dim in algorithm = incremental + # dim in multiplier = 1 + # dim in divisor = 1 + # dim out name = out + # dim out algorithm = incremental + # dim out multiplier = -1 + # dim out divisor = 1 + +[system.ipc_semaphores] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.ipc_semaphores + # chart type = area + # type = system + # family = ipc semaphores + # units = semaphores + # context = system.ipc_semaphores + # priority = 1203 + # name = system.ipc_semaphores + # title = IPC Semaphores + # dim semaphores name = semaphores + # dim semaphores algorithm = absolute + # dim semaphores multiplier = 1 + # dim semaphores divisor = 1 + +[system.ipc_semaphore_arrays] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.ipc_semaphore_arrays + # chart type = area + # type = system + # family = ipc semaphores + # units = arrays + # context = system.ipc_semaphore_arrays + # priority = 1204 + # name = system.ipc_semaphore_arrays + # title = IPC Semaphore Arrays + # dim arrays name = arrays + # dim arrays algorithm = absolute + # dim arrays multiplier = 1 + # dim arrays divisor = 1 + +[system.shared_memory_segments] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.shared_memory_segments + # chart type = stacked + # type = system + # family = ipc shared memory + # units = segments + # context = system.shared_memory_segments + # priority = 1205 + # name = system.shared_memory_segments + # title = IPC Shared Memory Number of Segments + # dim segments name = segments + # dim segments algorithm = absolute + # dim segments multiplier = 1 + # dim segments divisor = 1 + +[system.shared_memory_bytes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/system.shared_memory_bytes + # chart type = stacked + # type = system + # family = ipc shared memory + # units = bytes + # context = system.shared_memory_bytes + # priority = 1206 + # name = system.shared_memory_bytes + # title = IPC Shared Memory Used Bytes + # dim bytes name = bytes + # dim bytes algorithm = absolute + # dim bytes multiplier = 1 + # dim bytes divisor = 1 + +[netdata.plugin_proc_modules] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_proc_modules + # chart type = stacked + # type = netdata + # family = proc + # units = milliseconds/run + # context = netdata.plugin_proc_modules + # priority = 132001 + # name = netdata.plugin_proc_modules + # title = Netdata Proc Plugin Modules Durations + # dim stat name = stat + # dim stat algorithm = absolute + # dim stat multiplier = 1 + # dim stat divisor = 1000 + # dim uptime name = uptime + # dim uptime algorithm = absolute + # dim uptime multiplier = 1 + # dim uptime divisor = 1000 + # dim loadavg name = loadavg + # dim loadavg algorithm = absolute + # dim loadavg multiplier = 1 + # dim loadavg divisor = 1000 + # dim entropy name = entropy + # dim entropy algorithm = absolute + # dim entropy multiplier = 1 + # dim entropy divisor = 1000 + # dim interrupts name = interrupts + # dim interrupts algorithm = absolute + # dim interrupts multiplier = 1 + # dim interrupts divisor = 1000 + # dim softirqs name = softirqs + # dim softirqs algorithm = absolute + # dim softirqs multiplier = 1 + # dim softirqs divisor = 1000 + # dim vmstat name = vmstat + # dim vmstat algorithm = absolute + # dim vmstat multiplier = 1 + # dim vmstat divisor = 1000 + # dim meminfo name = meminfo + # dim meminfo algorithm = absolute + # dim meminfo multiplier = 1 + # dim meminfo divisor = 1000 + # dim ksm name = ksm + # dim ksm algorithm = absolute + # dim ksm multiplier = 1 + # dim ksm divisor = 1000 + # dim numa name = numa + # dim numa algorithm = absolute + # dim numa multiplier = 1 + # dim numa divisor = 1000 + # dim netdev name = netdev + # dim netdev algorithm = absolute + # dim netdev multiplier = 1 + # dim netdev divisor = 1000 + # dim sockstat name = sockstat + # dim sockstat algorithm = absolute + # dim sockstat multiplier = 1 + # dim sockstat divisor = 1000 + # dim sockstat6 name = sockstat6 + # dim sockstat6 algorithm = absolute + # dim sockstat6 multiplier = 1 + # dim sockstat6 divisor = 1000 + # dim netstat name = netstat + # dim netstat algorithm = absolute + # dim netstat multiplier = 1 + # dim netstat divisor = 1000 + # dim snmp name = snmp + # dim snmp algorithm = absolute + # dim snmp multiplier = 1 + # dim snmp divisor = 1000 + # dim snmp6 name = snmp6 + # dim snmp6 algorithm = absolute + # dim snmp6 multiplier = 1 + # dim snmp6 divisor = 1000 + # dim softnet name = softnet + # dim softnet algorithm = absolute + # dim softnet multiplier = 1 + # dim softnet divisor = 1000 + # dim conntrack name = conntrack + # dim conntrack algorithm = absolute + # dim conntrack multiplier = 1 + # dim conntrack divisor = 1000 + # dim diskstats name = diskstats + # dim diskstats algorithm = absolute + # dim diskstats multiplier = 1 + # dim diskstats divisor = 1000 + # dim mdstat name = mdstat + # dim mdstat algorithm = absolute + # dim mdstat multiplier = 1 + # dim mdstat divisor = 1000 + # dim btrfs name = btrfs + # dim btrfs algorithm = absolute + # dim btrfs multiplier = 1 + # dim btrfs divisor = 1000 + # dim ipc name = ipc + # dim ipc algorithm = absolute + # dim ipc multiplier = 1 + # dim ipc divisor = 1000 + # dim power_supply name = power_supply + # dim power_supply algorithm = absolute + # dim power_supply multiplier = 1 + # dim power_supply divisor = 1000 + +[netdata.plugin_proc_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.plugin_proc_cpu + # chart type = stacked + # type = netdata + # family = proc + # units = milliseconds/s + # context = netdata.plugin_proc_cpu + # priority = 132000 + # name = netdata.plugin_proc_cpu + # title = Netdata Proc Plugin CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.server_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.server_cpu + # chart type = stacked + # type = netdata + # family = netdata + # units = milliseconds/s + # context = netdata.server_cpu + # priority = 130000 + # name = netdata.server_cpu + # title = Netdata CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.clients] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.clients + # chart type = line + # type = netdata + # family = netdata + # units = connected clients + # context = netdata.clients + # priority = 130200 + # name = netdata.clients + # title = Netdata Web Clients + # dim clients name = clients + # dim clients algorithm = absolute + # dim clients multiplier = 1 + # dim clients divisor = 1 + +[netdata.requests] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.requests + # chart type = line + # type = netdata + # family = netdata + # units = requests/s + # context = netdata.requests + # priority = 130300 + # name = netdata.requests + # title = Netdata Web Requests + # dim requests name = requests + # dim requests algorithm = incremental + # dim requests multiplier = 1 + # dim requests divisor = 1 + +[netdata.net] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.net + # chart type = area + # type = netdata + # family = netdata + # units = kilobits/s + # context = netdata.net + # priority = 130000 + # name = netdata.net + # title = Netdata Network Traffic + # dim in name = in + # dim in algorithm = incremental + # dim in multiplier = 8 + # dim in divisor = 1000 + # dim out name = out + # dim out algorithm = incremental + # dim out multiplier = -8 + # dim out divisor = 1000 + +[netdata.response_time] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.response_time + # chart type = line + # type = netdata + # family = netdata + # units = milliseconds/request + # context = netdata.response_time + # priority = 130400 + # name = netdata.response_time + # title = Netdata API Response Time + # dim average name = average + # dim average algorithm = absolute + # dim average multiplier = 1 + # dim average divisor = 1000 + # dim max name = max + # dim max algorithm = absolute + # dim max multiplier = 1 + # dim max divisor = 1000 + +[netdata.compression_ratio] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.compression_ratio + # chart type = line + # type = netdata + # family = netdata + # units = percentage + # context = netdata.compression_ratio + # priority = 130500 + # name = netdata.compression_ratio + # title = Netdata API Responses Compression Savings Ratio + # dim savings name = savings + # dim savings algorithm = absolute + # dim savings multiplier = 1 + # dim savings divisor = 1000 + +[netdata.dbengine_compression_ratio] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_compression_ratio + # chart type = line + # type = netdata + # family = dbengine + # units = percentage + # context = netdata.dbengine_compression_ratio + # priority = 130502 + # name = netdata.dbengine_compression_ratio + # title = Netdata DB engine data extents' compression savings ratio + # dim savings name = savings + # dim savings algorithm = absolute + # dim savings multiplier = 1 + # dim savings divisor = 1000 + +[netdata.page_cache_hit_ratio] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.page_cache_hit_ratio + # chart type = line + # type = netdata + # family = dbengine + # units = percentage + # context = netdata.page_cache_hit_ratio + # priority = 130503 + # name = netdata.page_cache_hit_ratio + # title = Netdata DB engine page cache hit ratio + # dim ratio name = ratio + # dim ratio algorithm = absolute + # dim ratio multiplier = 1 + # dim ratio divisor = 1000 + +[netdata.page_cache_stats] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.page_cache_stats + # chart type = line + # type = netdata + # family = dbengine + # units = pages + # context = netdata.page_cache_stats + # priority = 130504 + # name = netdata.page_cache_stats + # title = Netdata dbengine page cache statistics + # dim descriptors name = descriptors + # dim descriptors algorithm = absolute + # dim descriptors multiplier = 1 + # dim descriptors divisor = 1 + # dim populated name = populated + # dim populated algorithm = absolute + # dim populated multiplier = 1 + # dim populated divisor = 1 + # dim dirty name = dirty + # dim dirty algorithm = absolute + # dim dirty multiplier = 1 + # dim dirty divisor = 1 + # dim backfills name = backfills + # dim backfills algorithm = incremental + # dim backfills multiplier = 1 + # dim backfills divisor = 1 + # dim evictions name = evictions + # dim evictions algorithm = incremental + # dim evictions multiplier = -1 + # dim evictions divisor = 1 + # dim used_by_collectors name = used_by_collectors + # dim used_by_collectors algorithm = absolute + # dim used_by_collectors multiplier = 1 + # dim used_by_collectors divisor = 1 + +[netdata.dbengine_long_term_page_stats] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_long_term_page_stats + # chart type = line + # type = netdata + # family = dbengine + # units = pages + # context = netdata.dbengine_long_term_page_stats + # priority = 130505 + # name = netdata.dbengine_long_term_page_stats + # title = Netdata dbengine long-term page statistics + # dim total name = total + # dim total algorithm = absolute + # dim total multiplier = 1 + # dim total divisor = 1 + # dim insertions name = insertions + # dim insertions algorithm = incremental + # dim insertions multiplier = 1 + # dim insertions divisor = 1 + # dim deletions name = deletions + # dim deletions algorithm = incremental + # dim deletions multiplier = -1 + # dim deletions divisor = 1 + # dim flushing_pressure_deletions name = flushing_pressure_deletions + # dim flushing_pressure_deletions algorithm = incremental + # dim flushing_pressure_deletions multiplier = -1 + # dim flushing_pressure_deletions divisor = 1 + +[netdata.dbengine_io_throughput] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_io_throughput + # chart type = line + # type = netdata + # family = dbengine + # units = MiB/s + # context = netdata.dbengine_io_throughput + # priority = 130506 + # name = netdata.dbengine_io_throughput + # title = Netdata DB engine I/O throughput + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1048576 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1048576 + +[netdata.dbengine_io_operations] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_io_operations + # chart type = line + # type = netdata + # family = dbengine + # units = operations/s + # context = netdata.dbengine_io_operations + # priority = 130507 + # name = netdata.dbengine_io_operations + # title = Netdata DB engine I/O operations + # dim reads name = reads + # dim reads algorithm = incremental + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = incremental + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[netdata.dbengine_global_errors] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_global_errors + # chart type = line + # type = netdata + # family = dbengine + # units = errors/s + # context = netdata.dbengine_global_errors + # priority = 130508 + # name = netdata.dbengine_global_errors + # title = Netdata DB engine errors + # dim io_errors name = io_errors + # dim io_errors algorithm = incremental + # dim io_errors multiplier = 1 + # dim io_errors divisor = 1 + # dim fs_errors name = fs_errors + # dim fs_errors algorithm = incremental + # dim fs_errors multiplier = 1 + # dim fs_errors divisor = 1 + # dim pg_cache_over_half_dirty_events name = pg_cache_over_half_dirty_events + # dim pg_cache_over_half_dirty_events algorithm = incremental + # dim pg_cache_over_half_dirty_events multiplier = 1 + # dim pg_cache_over_half_dirty_events divisor = 1 + +[netdata.dbengine_global_file_descriptors] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_global_file_descriptors + # chart type = line + # type = netdata + # family = dbengine + # units = descriptors + # context = netdata.dbengine_global_file_descriptors + # priority = 130509 + # name = netdata.dbengine_global_file_descriptors + # title = Netdata DB engine File Descriptors + # dim current name = current + # dim current algorithm = absolute + # dim current multiplier = 1 + # dim current divisor = 1 + # dim max name = max + # dim max algorithm = absolute + # dim max multiplier = 1 + # dim max divisor = 1 + +[netdata.dbengine_ram] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.dbengine_ram + # chart type = stacked + # type = netdata + # family = dbengine + # units = MiB + # context = netdata.dbengine_ram + # priority = 130510 + # name = netdata.dbengine_ram + # title = Netdata DB engine RAM usage + # dim cache name = cache + # dim cache algorithm = absolute + # dim cache multiplier = 1 + # dim cache divisor = 256 + # dim collectors name = collectors + # dim collectors algorithm = absolute + # dim collectors multiplier = 1 + # dim collectors divisor = 256 + # dim metadata name = metadata + # dim metadata algorithm = absolute + # dim metadata multiplier = 1 + # dim metadata divisor = 1048576 + +[netdata.web_thread4_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.web_thread4_cpu + # chart type = stacked + # type = netdata + # family = web + # units = milliseconds/s + # context = netdata.web_cpu + # priority = 132003 + # name = netdata.web_thread4_cpu + # title = Netdata web server thread No 4 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.web_thread1_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.web_thread1_cpu + # chart type = stacked + # type = netdata + # family = web + # units = milliseconds/s + # context = netdata.web_cpu + # priority = 132000 + # name = netdata.web_thread1_cpu + # title = Netdata web server thread No 1 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.web_thread6_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.web_thread6_cpu + # chart type = stacked + # type = netdata + # family = web + # units = milliseconds/s + # context = netdata.web_cpu + # priority = 132005 + # name = netdata.web_thread6_cpu + # title = Netdata web server thread No 6 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.web_thread3_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.web_thread3_cpu + # chart type = stacked + # type = netdata + # family = web + # units = milliseconds/s + # context = netdata.web_cpu + # priority = 132002 + # name = netdata.web_thread3_cpu + # title = Netdata web server thread No 3 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.web_thread2_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.web_thread2_cpu + # chart type = stacked + # type = netdata + # family = web + # units = milliseconds/s + # context = netdata.web_cpu + # priority = 132001 + # name = netdata.web_thread2_cpu + # title = Netdata web server thread No 2 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[disk_await.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_await.sda + # chart type = line + # type = disk_await + # family = sda + # units = milliseconds/operation + # context = disk.await + # priority = 2005 + # name = disk_await.sda + # title = Average Completed I/O Operation Time + # dim reads name = reads + # dim reads algorithm = absolute + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = absolute + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[disk_avgsz.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_avgsz.sda + # chart type = area + # type = disk_avgsz + # family = sda + # units = KiB/operation + # context = disk.avgsz + # priority = 2006 + # name = disk_avgsz.sda + # title = Average Completed I/O Operation Bandwidth + # dim reads name = reads + # dim reads algorithm = absolute + # dim reads multiplier = 512 + # dim reads divisor = 1024 + # dim writes name = writes + # dim writes algorithm = absolute + # dim writes multiplier = -512 + # dim writes divisor = 1024 + +[disk_svctm.sda] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_svctm.sda + # chart type = line + # type = disk_svctm + # family = sda + # units = milliseconds/operation + # context = disk.svctm + # priority = 2007 + # name = disk_svctm.sda + # title = Average Service Time + # dim svctm name = svctm + # dim svctm algorithm = absolute + # dim svctm multiplier = 1 + # dim svctm divisor = 1 + +[disk_await.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_await.sdb + # chart type = line + # type = disk_await + # family = sdb + # units = milliseconds/operation + # context = disk.await + # priority = 2005 + # name = disk_await.sdb + # title = Average Completed I/O Operation Time + # dim reads name = reads + # dim reads algorithm = absolute + # dim reads multiplier = 1 + # dim reads divisor = 1 + # dim writes name = writes + # dim writes algorithm = absolute + # dim writes multiplier = -1 + # dim writes divisor = 1 + +[disk_avgsz.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_avgsz.sdb + # chart type = area + # type = disk_avgsz + # family = sdb + # units = KiB/operation + # context = disk.avgsz + # priority = 2006 + # name = disk_avgsz.sdb + # title = Average Completed I/O Operation Bandwidth + # dim reads name = reads + # dim reads algorithm = absolute + # dim reads multiplier = 512 + # dim reads divisor = 1024 + # dim writes name = writes + # dim writes algorithm = absolute + # dim writes multiplier = -512 + # dim writes divisor = 1024 + +[disk_svctm.sdb] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/disk_svctm.sdb + # chart type = line + # type = disk_svctm + # family = sdb + # units = milliseconds/operation + # context = disk.svctm + # priority = 2007 + # name = disk_svctm.sdb + # title = Average Service Time + # dim svctm name = svctm + # dim svctm algorithm = absolute + # dim svctm multiplier = 1 + # dim svctm divisor = 1 + +[netdata.web_thread5_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.web_thread5_cpu + # chart type = stacked + # type = netdata + # family = web + # units = milliseconds/s + # context = netdata.web_cpu + # priority = 132004 + # name = netdata.web_thread5_cpu + # title = Netdata web server thread No 5 CPU usage + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.apps_cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.apps_cpu + # chart type = stacked + # type = netdata + # family = apps.plugin + # units = milliseconds/s + # context = netdata.apps_cpu + # priority = 140000 + # name = netdata.apps_cpu + # title = Apps Plugin CPU + # dim user name = user + # dim user algorithm = incremental + # dim user multiplier = 1 + # dim user divisor = 1000 + # dim system name = system + # dim system algorithm = incremental + # dim system multiplier = 1 + # dim system divisor = 1000 + +[netdata.apps_sizes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.apps_sizes + # chart type = line + # type = netdata + # family = apps.plugin + # units = files/s + # context = netdata.apps_sizes + # priority = 140001 + # name = netdata.apps_sizes + # title = Apps Plugin Files + # dim calls name = calls + # dim calls algorithm = incremental + # dim calls multiplier = 1 + # dim calls divisor = 1 + # dim files name = files + # dim files algorithm = incremental + # dim files multiplier = 1 + # dim files divisor = 1 + # dim filenames name = filenames + # dim filenames algorithm = incremental + # dim filenames multiplier = 1 + # dim filenames divisor = 1 + # dim inode_changes name = inode_changes + # dim inode_changes algorithm = incremental + # dim inode_changes multiplier = 1 + # dim inode_changes divisor = 1 + # dim link_changes name = link_changes + # dim link_changes algorithm = incremental + # dim link_changes multiplier = 1 + # dim link_changes divisor = 1 + # dim pids name = pids + # dim pids algorithm = absolute + # dim pids multiplier = 1 + # dim pids divisor = 1 + # dim fds name = fds + # dim fds algorithm = absolute + # dim fds multiplier = 1 + # dim fds divisor = 1 + # dim targets name = targets + # dim targets algorithm = absolute + # dim targets multiplier = 1 + # dim targets divisor = 1 + # dim new_pids name = new pids + # dim new_pids algorithm = incremental + # dim new_pids multiplier = 1 + # dim new_pids divisor = 1 + +[netdata.apps_fix] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.apps_fix + # chart type = line + # type = netdata + # family = apps.plugin + # units = percentage + # context = netdata.apps_fix + # priority = 140002 + # name = netdata.apps_fix + # title = Apps Plugin Normalization Ratios + # dim utime name = utime + # dim utime algorithm = absolute + # dim utime multiplier = 1 + # dim utime divisor = 10000 + # dim stime name = stime + # dim stime algorithm = absolute + # dim stime multiplier = 1 + # dim stime divisor = 10000 + # dim gtime name = gtime + # dim gtime algorithm = absolute + # dim gtime multiplier = 1 + # dim gtime divisor = 10000 + # dim minflt name = minflt + # dim minflt algorithm = absolute + # dim minflt multiplier = 1 + # dim minflt divisor = 10000 + # dim majflt name = majflt + # dim majflt algorithm = absolute + # dim majflt multiplier = 1 + # dim majflt divisor = 10000 + +[netdata.apps_children_fix] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/netdata.apps_children_fix + # chart type = line + # type = netdata + # family = apps.plugin + # units = percentage + # context = netdata.apps_children_fix + # priority = 140003 + # name = netdata.apps_children_fix + # title = Apps Plugin Exited Children Normalization Ratios + # dim cutime name = cutime + # dim cutime algorithm = absolute + # dim cutime multiplier = 1 + # dim cutime divisor = 10000 + # dim cstime name = cstime + # dim cstime algorithm = absolute + # dim cstime multiplier = 1 + # dim cstime divisor = 10000 + # dim cgtime name = cgtime + # dim cgtime algorithm = absolute + # dim cgtime multiplier = 1 + # dim cgtime divisor = 10000 + # dim cminflt name = cminflt + # dim cminflt algorithm = absolute + # dim cminflt multiplier = 1 + # dim cminflt divisor = 10000 + # dim cmajflt name = cmajflt + # dim cmajflt algorithm = absolute + # dim cmajflt multiplier = 1 + # dim cmajflt divisor = 10000 + +[apps.cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.cpu + # chart type = stacked + # type = apps + # family = cpu + # units = percentage + # context = apps.cpu + # priority = 20001 + # name = apps.cpu + # title = Apps CPU Time (800% = 8 cores) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10000 + +[apps.mem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.mem + # chart type = stacked + # type = apps + # family = mem + # units = MiB + # context = apps.mem + # priority = 20003 + # name = apps.mem + # title = Apps Real Memory (w/o shared) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1024 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1024 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1024 + +[apps.vmem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.vmem + # chart type = stacked + # type = apps + # family = mem + # units = MiB + # context = apps.vmem + # priority = 20005 + # name = apps.vmem + # title = Apps Virtual Memory Size + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1024 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1024 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1024 + +[apps.threads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.threads + # chart type = stacked + # type = apps + # family = processes + # units = threads + # context = apps.threads + # priority = 20006 + # name = apps.threads + # title = Apps Threads + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.processes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.processes + # chart type = stacked + # type = apps + # family = processes + # units = processes + # context = apps.processes + # priority = 20007 + # name = apps.processes + # title = Apps Processes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.uptime] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.uptime + # chart type = line + # type = apps + # family = processes + # units = seconds + # context = apps.uptime + # priority = 20008 + # name = apps.uptime + # title = Apps Carried Over Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.uptime_min] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.uptime_min + # chart type = line + # type = apps + # family = processes + # units = seconds + # context = apps.uptime_min + # priority = 20009 + # name = apps.uptime_min + # title = Apps Minimum Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.uptime_avg] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.uptime_avg + # chart type = line + # type = apps + # family = processes + # units = seconds + # context = apps.uptime_avg + # priority = 20010 + # name = apps.uptime_avg + # title = Apps Average Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.uptime_max] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.uptime_max + # chart type = line + # type = apps + # family = processes + # units = seconds + # context = apps.uptime_max + # priority = 20011 + # name = apps.uptime_max + # title = Apps Maximum Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.cpu_user] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.cpu_user + # chart type = stacked + # type = apps + # family = cpu + # units = percentage + # context = apps.cpu_user + # priority = 20020 + # name = apps.cpu_user + # title = Apps CPU User Time (800% = 8 cores) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10000 + +[apps.cpu_system] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.cpu_system + # chart type = stacked + # type = apps + # family = cpu + # units = percentage + # context = apps.cpu_system + # priority = 20021 + # name = apps.cpu_system + # title = Apps CPU System Time (800% = 8 cores) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10000 + +[apps.swap] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.swap + # chart type = stacked + # type = apps + # family = swap + # units = MiB + # context = apps.swap + # priority = 20011 + # name = apps.swap + # title = Apps Swap Memory + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1024 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1024 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1024 + +[apps.major_faults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.major_faults + # chart type = stacked + # type = apps + # family = swap + # units = page faults/s + # context = apps.major_faults + # priority = 20012 + # name = apps.major_faults + # title = Apps Major Page Faults (swap read) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10000 + +[apps.minor_faults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.minor_faults + # chart type = stacked + # type = apps + # family = mem + # units = page faults/s + # context = apps.minor_faults + # priority = 20011 + # name = apps.minor_faults + # title = Apps Minor Page Faults + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10000 + +[apps.preads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.preads + # chart type = stacked + # type = apps + # family = disk + # units = KiB/s + # context = apps.preads + # priority = 20002 + # name = apps.preads + # title = Apps Disk Reads + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10240000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10240000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10240000 + +[apps.pwrites] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.pwrites + # chart type = stacked + # type = apps + # family = disk + # units = KiB/s + # context = apps.pwrites + # priority = 20002 + # name = apps.pwrites + # title = Apps Disk Writes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10240000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10240000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10240000 + +[apps.lreads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.lreads + # chart type = stacked + # type = apps + # family = disk + # units = KiB/s + # context = apps.lreads + # priority = 20042 + # name = apps.lreads + # title = Apps Disk Logical Reads + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10240000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10240000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10240000 + +[apps.lwrites] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.lwrites + # chart type = stacked + # type = apps + # family = disk + # units = KiB/s + # context = apps.lwrites + # priority = 20042 + # name = apps.lwrites + # title = Apps I/O Logical Writes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 10240000 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 10240000 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 10240000 + +[apps.files] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.files + # chart type = stacked + # type = apps + # family = disk + # units = open files + # context = apps.files + # priority = 20050 + # name = apps.files + # title = Apps Open Files + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.sockets + # chart type = stacked + # type = apps + # family = net + # units = open sockets + # context = apps.sockets + # priority = 20051 + # name = apps.sockets + # title = Apps Open Sockets + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[apps.pipes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/apps.pipes + # chart type = stacked + # type = apps + # family = processes + # units = open pipes + # context = apps.pipes + # priority = 20053 + # name = apps.pipes + # title = Apps Pipes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim apps.plugin name = apps.plugin + # dim apps.plugin algorithm = absolute + # dim apps.plugin multiplier = 1 + # dim apps.plugin divisor = 1 + # dim go.d.plugin name = go.d.plugin + # dim go.d.plugin algorithm = absolute + # dim go.d.plugin multiplier = 1 + # dim go.d.plugin divisor = 1 + # dim other name = other + # dim other algorithm = absolute + # dim other multiplier = 1 + # dim other divisor = 1 + +[users.cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.cpu + # chart type = stacked + # type = users + # family = cpu + # units = percentage + # context = users.cpu + # priority = 20001 + # name = users.cpu + # title = Users CPU Time (800% = 8 cores) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + +[users.mem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.mem + # chart type = stacked + # type = users + # family = mem + # units = MiB + # context = users.mem + # priority = 20003 + # name = users.mem + # title = Users Real Memory (w/o shared) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1024 + +[users.vmem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.vmem + # chart type = stacked + # type = users + # family = mem + # units = MiB + # context = users.vmem + # priority = 20005 + # name = users.vmem + # title = Users Virtual Memory Size + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1024 + +[users.threads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.threads + # chart type = stacked + # type = users + # family = processes + # units = threads + # context = users.threads + # priority = 20006 + # name = users.threads + # title = Users Threads + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.processes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.processes + # chart type = stacked + # type = users + # family = processes + # units = processes + # context = users.processes + # priority = 20007 + # name = users.processes + # title = Users Processes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.uptime] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.uptime + # chart type = line + # type = users + # family = processes + # units = seconds + # context = users.uptime + # priority = 20008 + # name = users.uptime + # title = Users Carried Over Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.uptime_min] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.uptime_min + # chart type = line + # type = users + # family = processes + # units = seconds + # context = users.uptime_min + # priority = 20009 + # name = users.uptime_min + # title = Users Minimum Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.uptime_avg] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.uptime_avg + # chart type = line + # type = users + # family = processes + # units = seconds + # context = users.uptime_avg + # priority = 20010 + # name = users.uptime_avg + # title = Users Average Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.uptime_max] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.uptime_max + # chart type = line + # type = users + # family = processes + # units = seconds + # context = users.uptime_max + # priority = 20011 + # name = users.uptime_max + # title = Users Maximum Uptime + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.cpu_user] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.cpu_user + # chart type = stacked + # type = users + # family = cpu + # units = percentage + # context = users.cpu_user + # priority = 20020 + # name = users.cpu_user + # title = Users CPU User Time (800% = 8 cores) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + +[users.cpu_system] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.cpu_system + # chart type = stacked + # type = users + # family = cpu + # units = percentage + # context = users.cpu_system + # priority = 20021 + # name = users.cpu_system + # title = Users CPU System Time (800% = 8 cores) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + +[users.swap] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.swap + # chart type = stacked + # type = users + # family = swap + # units = MiB + # context = users.swap + # priority = 20011 + # name = users.swap + # title = Users Swap Memory + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1024 + +[users.major_faults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.major_faults + # chart type = stacked + # type = users + # family = swap + # units = page faults/s + # context = users.major_faults + # priority = 20012 + # name = users.major_faults + # title = Users Major Page Faults (swap read) + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + +[users.minor_faults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.minor_faults + # chart type = stacked + # type = users + # family = mem + # units = page faults/s + # context = users.minor_faults + # priority = 20011 + # name = users.minor_faults + # title = Users Minor Page Faults + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + +[users.preads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.preads + # chart type = stacked + # type = users + # family = disk + # units = KiB/s + # context = users.preads + # priority = 20002 + # name = users.preads + # title = Users Disk Reads + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + +[users.pwrites] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.pwrites + # chart type = stacked + # type = users + # family = disk + # units = KiB/s + # context = users.pwrites + # priority = 20002 + # name = users.pwrites + # title = Users Disk Writes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + +[users.lreads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.lreads + # chart type = stacked + # type = users + # family = disk + # units = KiB/s + # context = users.lreads + # priority = 20042 + # name = users.lreads + # title = Users Disk Logical Reads + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + +[users.lwrites] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.lwrites + # chart type = stacked + # type = users + # family = disk + # units = KiB/s + # context = users.lwrites + # priority = 20042 + # name = users.lwrites + # title = Users I/O Logical Writes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + +[users.files] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.files + # chart type = stacked + # type = users + # family = disk + # units = open files + # context = users.files + # priority = 20050 + # name = users.files + # title = Users Open Files + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.sockets + # chart type = stacked + # type = users + # family = net + # units = open sockets + # context = users.sockets + # priority = 20051 + # name = users.sockets + # title = Users Open Sockets + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[users.pipes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/users.pipes + # chart type = stacked + # type = users + # family = processes + # units = open pipes + # context = users.pipes + # priority = 20053 + # name = users.pipes + # title = Users Pipes + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + +[groups.cpu] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.cpu + # chart type = stacked + # type = groups + # family = cpu + # units = percentage + # context = groups.cpu + # priority = 20001 + # name = groups.cpu + # title = User Groups CPU Time (800% = 8 cores) + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + +[groups.mem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.mem + # chart type = stacked + # type = groups + # family = mem + # units = MiB + # context = groups.mem + # priority = 20003 + # name = groups.mem + # title = User Groups Real Memory (w/o shared) + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1024 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + +[groups.vmem] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.vmem + # chart type = stacked + # type = groups + # family = mem + # units = MiB + # context = groups.vmem + # priority = 20005 + # name = groups.vmem + # title = User Groups Virtual Memory Size + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1024 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + +[groups.threads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.threads + # chart type = stacked + # type = groups + # family = processes + # units = threads + # context = groups.threads + # priority = 20006 + # name = groups.threads + # title = User Groups Threads + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.processes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.processes + # chart type = stacked + # type = groups + # family = processes + # units = processes + # context = groups.processes + # priority = 20007 + # name = groups.processes + # title = User Groups Processes + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.uptime] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.uptime + # chart type = line + # type = groups + # family = processes + # units = seconds + # context = groups.uptime + # priority = 20008 + # name = groups.uptime + # title = User Groups Carried Over Uptime + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.uptime_min] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.uptime_min + # chart type = line + # type = groups + # family = processes + # units = seconds + # context = groups.uptime_min + # priority = 20009 + # name = groups.uptime_min + # title = User Groups Minimum Uptime + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.uptime_avg] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.uptime_avg + # chart type = line + # type = groups + # family = processes + # units = seconds + # context = groups.uptime_avg + # priority = 20010 + # name = groups.uptime_avg + # title = User Groups Average Uptime + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.uptime_max] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.uptime_max + # chart type = line + # type = groups + # family = processes + # units = seconds + # context = groups.uptime_max + # priority = 20011 + # name = groups.uptime_max + # title = User Groups Maximum Uptime + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.cpu_user] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.cpu_user + # chart type = stacked + # type = groups + # family = cpu + # units = percentage + # context = groups.cpu_user + # priority = 20020 + # name = groups.cpu_user + # title = User Groups CPU User Time (800% = 8 cores) + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + +[groups.cpu_system] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.cpu_system + # chart type = stacked + # type = groups + # family = cpu + # units = percentage + # context = groups.cpu_system + # priority = 20021 + # name = groups.cpu_system + # title = User Groups CPU System Time (800% = 8 cores) + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + +[groups.swap] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.swap + # chart type = stacked + # type = groups + # family = swap + # units = MiB + # context = groups.swap + # priority = 20011 + # name = groups.swap + # title = User Groups Swap Memory + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1024 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1024 + +[groups.major_faults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.major_faults + # chart type = stacked + # type = groups + # family = swap + # units = page faults/s + # context = groups.major_faults + # priority = 20012 + # name = groups.major_faults + # title = User Groups Major Page Faults (swap read) + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + +[groups.minor_faults] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.minor_faults + # chart type = stacked + # type = groups + # family = mem + # units = page faults/s + # context = groups.minor_faults + # priority = 20011 + # name = groups.minor_faults + # title = User Groups Minor Page Faults + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10000 + +[groups.preads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.preads + # chart type = stacked + # type = groups + # family = disk + # units = KiB/s + # context = groups.preads + # priority = 20002 + # name = groups.preads + # title = User Groups Disk Reads + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + +[groups.pwrites] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.pwrites + # chart type = stacked + # type = groups + # family = disk + # units = KiB/s + # context = groups.pwrites + # priority = 20002 + # name = groups.pwrites + # title = User Groups Disk Writes + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + +[groups.lreads] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.lreads + # chart type = stacked + # type = groups + # family = disk + # units = KiB/s + # context = groups.lreads + # priority = 20042 + # name = groups.lreads + # title = User Groups Disk Logical Reads + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + +[groups.lwrites] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.lwrites + # chart type = stacked + # type = groups + # family = disk + # units = KiB/s + # context = groups.lwrites + # priority = 20042 + # name = groups.lwrites + # title = User Groups I/O Logical Writes + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 10240000 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 10240000 + +[groups.files] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.files + # chart type = stacked + # type = groups + # family = disk + # units = open files + # context = groups.files + # priority = 20050 + # name = groups.files + # title = User Groups Open Files + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.sockets] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.sockets + # chart type = stacked + # type = groups + # family = net + # units = open sockets + # context = groups.sockets + # priority = 20051 + # name = groups.sockets + # title = User Groups Open Sockets + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 + +[groups.pipes] + history = 5 + # enabled = yes + # cache directory = /var/cache/netdata/groups.pipes + # chart type = stacked + # type = groups + # family = processes + # units = open pipes + # context = groups.pipes + # priority = 20053 + # name = groups.pipes + # title = User Groups Pipes + # dim root name = root + # dim root algorithm = absolute + # dim root multiplier = 1 + # dim root divisor = 1 + # dim netdata name = netdata + # dim netdata algorithm = absolute + # dim netdata multiplier = 1 + # dim netdata divisor = 1 diff --git a/build_external/scenarios/aclk-testing/configureVerneMQ.Dockerfile b/build_external/scenarios/aclk-testing/configureVerneMQ.Dockerfile new file mode 100644 index 0000000..228548c --- /dev/null +++ b/build_external/scenarios/aclk-testing/configureVerneMQ.Dockerfile @@ -0,0 +1,8 @@ +FROM vernemq:latest +EXPOSE 9002 +COPY vernemq.conf /vernemq/etc/vernemq.conf +WORKDIR /vernemq +#RUN openssl req -newkey rsa:2048 -nodes -keyout server.key -x509 -days 365 -out server.crt -subj '/CN=vernemq' +RUN openssl req -newkey rsa:4096 -x509 -sha256 -days 3650 -nodes -out server.crt -keyout server.key -subj "/C=SK/ST=XX/L=XX/O=NetdataIsAwesome/OU=NotSupremeLeader/CN=netdata.cloud" +RUN chown vernemq:vernemq /vernemq/server.key /vernemq/server.crt +RUN cat /vernemq/etc/vernemq.conf diff --git a/build_external/scenarios/aclk-testing/paho-compose.yml b/build_external/scenarios/aclk-testing/paho-compose.yml new file mode 100644 index 0000000..8c39e20 --- /dev/null +++ b/build_external/scenarios/aclk-testing/paho-compose.yml @@ -0,0 +1,6 @@ +version: '3.3' +services: + paho_inspect: + build: + context: . + dockerfile: paho.Dockerfile diff --git a/build_external/scenarios/aclk-testing/paho-inspection.py b/build_external/scenarios/aclk-testing/paho-inspection.py new file mode 100644 index 0000000..e9343cc --- /dev/null +++ b/build_external/scenarios/aclk-testing/paho-inspection.py @@ -0,0 +1,33 @@ +import ssl +import paho.mqtt.client as mqtt + +def on_connect(mqttc, obj, flags, rc): + print("connected rc: "+str(rc), flush=True) + mqttc.subscribe("/agent/#",0) +def on_disconnect(mqttc, obj, flags, rc): + print("disconnected rc: "+str(rc), flush=True) +def on_message(mqttc, obj, msg): + print(msg.topic+" "+str(msg.qos)+" "+str(msg.payload), flush=True) +def on_publish(mqttc, obj, mid): + print("mid: "+str(mid), flush=True) +def on_subscribe(mqttc, obj, mid, granted_qos): + print("Subscribed: "+str(mid)+" "+str(granted_qos), flush=True) +def on_log(mqttc, obj, level, string): + print(string) +print("Starting paho-inspection", flush=True) +mqttc = mqtt.Client(transport='websockets') +#mqttc.tls_set(certfile="server.crt", keyfile="server.key", cert_reqs=ssl.CERT_REQUIRED, tls_version=ssl.PROTOCOL_TLS, ciphers=None) +#mqttc.tls_set(ca_certs="server.crt", cert_reqs=ssl.CERT_REQUIRED, tls_version=ssl.PROTOCOL_TLS, ciphers=None) +mqttc.tls_set(cert_reqs=ssl.CERT_NONE, tls_version=ssl.PROTOCOL_TLS, ciphers=None) +mqttc.tls_insecure_set(True) +mqttc.on_message = on_message +mqttc.on_connect = on_connect +mqttc.on_disconnect = on_disconnect +mqttc.on_publish = on_publish +mqttc.on_subscribe = on_subscribe +mqttc.connect("vernemq", 9002, 60) + +#mqttc.publish("/agent/mine","Test1") +#mqttc.subscribe("$SYS/#", 0) +print("Connected successfully, monitoring /agent/#", flush=True) +mqttc.loop_forever() diff --git a/build_external/scenarios/aclk-testing/paho.Dockerfile b/build_external/scenarios/aclk-testing/paho.Dockerfile new file mode 100644 index 0000000..77a49e7 --- /dev/null +++ b/build_external/scenarios/aclk-testing/paho.Dockerfile @@ -0,0 +1,12 @@ +FROM archlinux/base:latest + +RUN pacman -Syyu --noconfirm +RUN pacman --noconfirm --needed -S python-pip + +RUN pip install paho-mqtt + +RUN mkdir -p /opt/paho +COPY paho-inspection.py /opt/paho/ + +WORKDIR /opt/paho +CMD ["/usr/sbin/python", "paho-inspection.py"] \ No newline at end of file diff --git a/build_external/scenarios/aclk-testing/vernemq-compose.yml b/build_external/scenarios/aclk-testing/vernemq-compose.yml new file mode 100644 index 0000000..3ec805a --- /dev/null +++ b/build_external/scenarios/aclk-testing/vernemq-compose.yml @@ -0,0 +1,6 @@ +version: '3.3' +services: + vernemq: + build: + dockerfile: configureVerneMQ.Dockerfile + context: . diff --git a/build_external/scenarios/aclk-testing/vernemq.conf b/build_external/scenarios/aclk-testing/vernemq.conf new file mode 100644 index 0000000..18e8432 --- /dev/null +++ b/build_external/scenarios/aclk-testing/vernemq.conf @@ -0,0 +1,68 @@ +allow_anonymous = on +allow_register_during_netsplit = off +allow_publish_during_netsplit = off +allow_subscribe_during_netsplit = off +allow_unsubscribe_during_netsplit = off +allow_multiple_sessions = off +coordinate_registrations = on +max_inflight_messages = 20 +max_online_messages = 1000 +max_offline_messages = 1000 +max_message_size = 0 +upgrade_outgoing_qos = off +listener.max_connections = 10000 +listener.nr_of_acceptors = 10 +listener.tcp.default = 127.0.0.1:1883 +listener.wss.keyfile = /vernemq/server.key +listener.wss.certfile = /vernemq/server.crt +listener.wss.default = 0.0.0.0:9002 +listener.vmq.clustering = 0.0.0.0:44053 +listener.http.default = 127.0.0.1:8888 +listener.ssl.require_certificate = off +listener.wss.require_certificate = off +systree_enabled = on +systree_interval = 20000 +graphite_enabled = off +graphite_host = localhost +graphite_port = 2003 +graphite_interval = 20000 +shared_subscription_policy = prefer_local +plugins.vmq_passwd = on +plugins.vmq_acl = on +plugins.vmq_diversity = off +plugins.vmq_webhooks = off +plugins.vmq_bridge = off +metadata_plugin = vmq_plumtree +vmq_acl.acl_file = ./etc/vmq.acl +vmq_acl.acl_reload_interval = 10 +vmq_passwd.password_file = ./etc/vmq.passwd +vmq_passwd.password_reload_interval = 10 +vmq_diversity.script_dir = ./share/lua +vmq_diversity.auth_postgres.enabled = off +vmq_diversity.postgres.ssl = off +vmq_diversity.postgres.password_hash_method = crypt +vmq_diversity.auth_cockroachdb.enabled = off +vmq_diversity.cockroachdb.ssl = on +vmq_diversity.cockroachdb.password_hash_method = bcrypt +vmq_diversity.auth_mysql.enabled = off +vmq_diversity.mysql.password_hash_method = password +vmq_diversity.auth_mongodb.enabled = off +vmq_diversity.mongodb.ssl = off +vmq_diversity.auth_redis.enabled = off +vmq_bcrypt.pool_size = 1 +log.console = file +log.console.level = info +log.console.file = ./log/console.log +log.error.file = ./log/error.log +log.syslog = off +log.crash = on +log.crash.file = ./log/crash.log +log.crash.maximum_message_size = 64KB +log.crash.size = 10MB +log.crash.rotation = $D0 +log.crash.rotation.keep = 5 +nodename = VerneMQ@127.0.0.1 +distributed_cookie = vmq +erlang.async_threads = 64 +erlang.max_ports = 262144 +leveldb.maximum_memory.percent = 70 diff --git a/build_external/scenarios/gaps_hi/child-compose.yml b/build_external/scenarios/gaps_hi/child-compose.yml new file mode 100644 index 0000000..2ca306f --- /dev/null +++ b/build_external/scenarios/gaps_hi/child-compose.yml @@ -0,0 +1,13 @@ +version: '3.3' +services: + agent_child: + image: debian_10_dev + command: /usr/sbin/netdata -D + #ports: + #- 21002+:19999 + volumes: + - ./child_stream.conf:/etc/netdata/stream.conf:ro + #- ./child_guid:/var/lib/netdata/registry/netdata.public.unique.id:ro + - ./min.conf:/etc/netdata/netdata.conf:ro + cap_add: + - SYS_PTRACE diff --git a/build_external/scenarios/gaps_hi/child_guid b/build_external/scenarios/gaps_hi/child_guid new file mode 100644 index 0000000..670f7c2 --- /dev/null +++ b/build_external/scenarios/gaps_hi/child_guid @@ -0,0 +1 @@ +22222222-2222-2222-2222-222222222222 \ No newline at end of file diff --git a/build_external/scenarios/gaps_hi/child_stream.conf b/build_external/scenarios/gaps_hi/child_stream.conf new file mode 100644 index 0000000..2218c68 --- /dev/null +++ b/build_external/scenarios/gaps_hi/child_stream.conf @@ -0,0 +1,11 @@ +[stream] + enabled = yes +# destination = tcp:agent_middle + destination = tcp:192.168.1.2 + api key = 00000000-0000-0000-0000-000000000000 + timeout seconds = 60 + default port = 19999 + send charts matching = * + buffer size bytes = 10485760 + reconnect delay seconds = 5 + initial clock resync iterations = 60 diff --git a/build_external/scenarios/gaps_hi/middle-compose.yml b/build_external/scenarios/gaps_hi/middle-compose.yml new file mode 100644 index 0000000..c316164 --- /dev/null +++ b/build_external/scenarios/gaps_hi/middle-compose.yml @@ -0,0 +1,13 @@ +version: '3.3' +services: + agent_middle: + image: debian_10_dev + command: /usr/sbin/netdata -D + ports: + - 21001:19999 + volumes: + - ./middle_stream.conf:/etc/netdata/stream.conf:ro + - ./middle_guid:/var/lib/netdata/registry/netdata.public.unique.id:ro + - ./min.conf:/etc/netdata/netdata.conf:ro + cap_add: + - SYS_PTRACE diff --git a/build_external/scenarios/gaps_hi/middle_guid b/build_external/scenarios/gaps_hi/middle_guid new file mode 100644 index 0000000..f8a43c2 --- /dev/null +++ b/build_external/scenarios/gaps_hi/middle_guid @@ -0,0 +1 @@ +11111111-1111-1111-1111-111111111111 \ No newline at end of file diff --git a/build_external/scenarios/gaps_hi/middle_stream.conf b/build_external/scenarios/gaps_hi/middle_stream.conf new file mode 100644 index 0000000..132eaa1 --- /dev/null +++ b/build_external/scenarios/gaps_hi/middle_stream.conf @@ -0,0 +1,23 @@ +[stream] + enabled = yes + destination = tcp:agent_parent + api key = 00000000-0000-0000-0000-000000000000 + timeout seconds = 60 + default port = 19999 + + send charts matching = * + buffer size bytes = 1048576 + reconnect delay seconds = 5 + initial clock resync iterations = 60 + +[00000000-0000-0000-0000-000000000000] + enabled = yes + allow from = * + default history = 3600 + # default memory mode = ram + + health enabled by default = auto + + # postpone alarms for a short period after the sender is connected + default postpone alarms on connect seconds = 60 + multiple connections = allow diff --git a/build_external/scenarios/gaps_hi/min.conf b/build_external/scenarios/gaps_hi/min.conf new file mode 100644 index 0000000..83fa23e --- /dev/null +++ b/build_external/scenarios/gaps_hi/min.conf @@ -0,0 +1,6 @@ +[global] + debug flags = 0x0000000040000000 + errors flood protection period = 0 +[web] + ssl key = /etc/netdata/ssl/key.pem + ssl certificate = /etc/netdata/ssl/cert.pem diff --git a/build_external/scenarios/gaps_hi/parent-compose.yml b/build_external/scenarios/gaps_hi/parent-compose.yml new file mode 100644 index 0000000..2944bbc --- /dev/null +++ b/build_external/scenarios/gaps_hi/parent-compose.yml @@ -0,0 +1,13 @@ +version: '3.3' +services: + agent_parent: + image: debian_10_dev + command: /usr/sbin/netdata -D + ports: + - 21000:19999 + volumes: + - ./parent_stream.conf:/etc/netdata/stream.conf:ro + - ./parent_guid:/var/lib/netdata/registry/netdata.public.unique.id:ro + - ./min.conf:/etc/netdata/netdata.conf:ro + cap_add: + - SYS_PTRACE diff --git a/build_external/scenarios/gaps_hi/parent_guid b/build_external/scenarios/gaps_hi/parent_guid new file mode 100644 index 0000000..fee6f32 --- /dev/null +++ b/build_external/scenarios/gaps_hi/parent_guid @@ -0,0 +1 @@ +00000000-0000-0000-0000-000000000000 \ No newline at end of file diff --git a/build_external/scenarios/gaps_hi/parent_stream.conf b/build_external/scenarios/gaps_hi/parent_stream.conf new file mode 100644 index 0000000..600a9fa --- /dev/null +++ b/build_external/scenarios/gaps_hi/parent_stream.conf @@ -0,0 +1,11 @@ +[00000000-0000-0000-0000-000000000000] + enabled = yes + allow from = * + default history = 3600 + default memory mode = dbengine + health enabled by default = no + + # postpone alarms for a short period after the sender is connected + default postpone alarms on connect seconds = 60 + multiple connections = allow + diff --git a/build_external/scenarios/gaps_lo/child-compose.yml b/build_external/scenarios/gaps_lo/child-compose.yml new file mode 100644 index 0000000..dee06f2 --- /dev/null +++ b/build_external/scenarios/gaps_lo/child-compose.yml @@ -0,0 +1,14 @@ +version: '3.3' +services: + agent_child: + image: arch_extras_dev + #command: /usr/sbin/valgrind --leak-check=full /usr/sbin/netdata -D + command: /usr/sbin/netdata -D # gdb does not like valgrind ! + #ports: + #- 21002:19999 + volumes: + - ./child_stream.conf:/etc/netdata/stream.conf:ro + # - ./child_guid:/var/lib/netdata/registry/netdata.public.unique.id:ro + - ./mostly_off.conf:/etc/netdata/netdata.conf:ro + cap_add: + - SYS_PTRACE diff --git a/build_external/scenarios/gaps_lo/child_guid b/build_external/scenarios/gaps_lo/child_guid new file mode 100644 index 0000000..670f7c2 --- /dev/null +++ b/build_external/scenarios/gaps_lo/child_guid @@ -0,0 +1 @@ +22222222-2222-2222-2222-222222222222 \ No newline at end of file diff --git a/build_external/scenarios/gaps_lo/child_stream.conf b/build_external/scenarios/gaps_lo/child_stream.conf new file mode 100644 index 0000000..00bcdfa --- /dev/null +++ b/build_external/scenarios/gaps_lo/child_stream.conf @@ -0,0 +1,11 @@ +[stream] + # Enable this on child nodes, to have them send metrics. + enabled = yes + destination = tcp:192.168.1.2 + api key = 00000000-0000-0000-0000-000000000000 + timeout seconds = 60 + default port = 19999 + send charts matching = * + buffer size bytes = 1048576 + reconnect delay seconds = 5 + initial clock resync iterations = 60 diff --git a/build_external/scenarios/gaps_lo/middle-compose.yml b/build_external/scenarios/gaps_lo/middle-compose.yml new file mode 100644 index 0000000..f94e1fe --- /dev/null +++ b/build_external/scenarios/gaps_lo/middle-compose.yml @@ -0,0 +1,14 @@ +version: '3.3' +services: + agent_middle: + image: arch_extras_dev + #command: /usr/sbin/valgrind --leak-check=full /usr/sbin/netdata -D + command: /usr/sbin/netdata -D + ports: + - 21001:19999 + volumes: + - ./middle_stream.conf:/etc/netdata/stream.conf:ro + - ./middle_guid:/var/lib/netdata/registry/netdata.public.unique.id:ro + - ./mostly_off.conf:/etc/netdata/netdata.conf:ro + cap_add: + - SYS_PTRACE diff --git a/build_external/scenarios/gaps_lo/middle_guid b/build_external/scenarios/gaps_lo/middle_guid new file mode 100644 index 0000000..f8a43c2 --- /dev/null +++ b/build_external/scenarios/gaps_lo/middle_guid @@ -0,0 +1 @@ +11111111-1111-1111-1111-111111111111 \ No newline at end of file diff --git a/build_external/scenarios/gaps_lo/middle_stream.conf b/build_external/scenarios/gaps_lo/middle_stream.conf new file mode 100644 index 0000000..3e52e83 --- /dev/null +++ b/build_external/scenarios/gaps_lo/middle_stream.conf @@ -0,0 +1,20 @@ +[stream] + enabled = yes + destination = tcp:agent_parent + api key = 00000000-0000-0000-0000-000000000000 + timeout seconds = 60 + default port = 19999 + send charts matching = * + buffer size bytes = 1048576 + reconnect delay seconds = 5 + initial clock resync iterations = 60 + +[00000000-0000-0000-0000-000000000000] + enabled = yes + allow from = * + default history = 3600 + # default memory mode = ram + health enabled by default = auto + # postpone alarms for a short period after the sender is connected + default postpone alarms on connect seconds = 60 + multiple connections = allow diff --git a/build_external/scenarios/gaps_lo/mostly_off.conf b/build_external/scenarios/gaps_lo/mostly_off.conf new file mode 100644 index 0000000..079fef0 --- /dev/null +++ b/build_external/scenarios/gaps_lo/mostly_off.conf @@ -0,0 +1,964 @@ +# netdata configuration +# +# You can download the latest version of this file, using: +# +# wget -O /etc/netdata/netdata.conf http://localhost:19999/netdata.conf +# or +# curl -o /etc/netdata/netdata.conf http://localhost:19999/netdata.conf +# +# You can uncomment and change any of the options below. +# The value shown in the commented settings, is the default value. +# + +# global netdata configuration + +[global] +debug flags = 0x0000000040000000 +errors flood protection period = 0 + +[plugins] + diskspace = no + cgroups = no + tc = no + idlejitter = no + ioping = no + apps = no + go.d = no + perf = no + fping = no + python.d = no + charts.d = no + nfacct = no + cups = no + freeipmi = no + +[health] +enabled = no + +[statsd] +enabled = no + +[plugin:proc] + /proc/uptime = yes + /proc/loadavg = no + /proc/sys/kernel/random/entropy_avail = no + /proc/pressure = no + /proc/interrupts = no + /proc/softirqs = no + /proc/vmstat = no + /proc/meminfo = no + /sys/kernel/mm/ksm = no + /sys/block/zram = no + /sys/devices/system/edac/mc = no + /sys/devices/system/node = no + /proc/net/dev = no + /proc/net/sockstat = no + /proc/net/sockstat6 = no + /proc/net/netstat = no + /proc/net/snmp = no + /proc/net/snmp6 = no + /proc/net/sctp/snmp = no + /proc/net/softnet_stat = no + /proc/net/ip_vs/stats = no + /proc/net/stat/conntrack = no + /proc/net/stat/synproxy = no + /proc/diskstats = no + /proc/mdstat = no + /proc/net/rpc/nfsd = no + /proc/net/rpc/nfs = no + /proc/spl/kstat/zfs/arcstats = no + /sys/fs/btrfs = no + ipc = no + /sys/class/power_supply = no + + +[plugin:proc:/proc/net/dev:docker0] +enabled = no + +[plugin:proc:/proc/net/dev:br-b87e56f878f1] +enabled = no + +[plugin:proc:/proc/net/dev:enp4s0] +enabled = no + +[plugin:proc:/proc/net/stat/nf_conntrack] +filename to monitor = /proc/net/stat/nf_conntrack +netfilter new connections = no +netfilter connection changes = no +netfilter connection expectations = no +netfilter connection searches = no +netfilter errors = no +netfilter connections = no + +[system.idlejitter] +enabled = no + +[netdata.statsd_metrics] +enabled = no + +[netdata.statsd_useful_metrics] +enabled = no + +[netdata.statsd_events] +enabled = no + +[netdata.statsd_reads] +enabled = no + +[netdata.statsd_bytes] +enabled = no + +[netdata.statsd_packets] +enabled = no + +[netdata.tcp_connects] +enabled = no + +[netdata.tcp_connected] +enabled = no + +[netdata.private_charts] +enabled = no + +[netdata.plugin_statsd_charting_cpu] +enabled = no + +[netdata.plugin_statsd_collector1_cpu] +enabled = no + +[netdata.plugin_tc_cpu] +enabled = no + +[netdata.plugin_tc_time] +enabled = no + +[netdata.runtime_sensors] +enabled = no + +[sensors.coretemp-isa-0000_temperature] +enabled = no + +[sensors.acpitz-acpi-0_temperature] +enabled = no + +[system.cpu] +enabled = yes + +[disk_space._dev] +enabled = no + +[netdata.plugin_cgroups_cpu] +enabled = no + +[netdata.apps_cpu] +enabled = no + +[netdata.apps_sizes] +enabled = no + +[netdata.apps_fix] +enabled = no + +[netdata.apps_children_fix] +enabled = no + +[apps.cpu] +enabled = no + +[apps.mem] +enabled = no + +[apps.vmem] +enabled = no + +[apps.threads] +enabled = no + +[apps.processes] +enabled = no + +[apps.uptime] +enabled = no + +[apps.uptime_min] +enabled = no + +[apps.uptime_avg] +enabled = no + +[apps.uptime_max] +enabled = no + +[apps.cpu_user] +enabled = no + +[apps.cpu_system] +enabled = no + +[apps.swap] +enabled = no + +[apps.major_faults] +enabled = no + +[apps.minor_faults] +enabled = no + +[apps.preads] +enabled = no + +[apps.pwrites] +enabled = no + +[apps.lreads] +enabled = no + +[apps.lwrites] +enabled = no + +[apps.files] +enabled = no + +[apps.sockets] +enabled = no + +[apps.pipes] +enabled = no + +[users.cpu] +enabled = no + +[users.mem] +enabled = no + +[users.vmem] +enabled = no + +[users.threads] +enabled = no + +[users.processes] +enabled = no + +[users.uptime] +enabled = no + +[users.uptime_min] +enabled = no + +[users.uptime_avg] +enabled = no + +[users.uptime_max] +enabled = no + +[users.cpu_user] +enabled = no + +[users.cpu_system] +enabled = no + +[users.swap] +enabled = no + +[users.major_faults] +enabled = no + +[users.minor_faults] +enabled = no + +[users.preads] +enabled = no + +[users.pwrites] +enabled = no + +[users.lreads] +enabled = no + +[users.lwrites] +enabled = no + +[users.files] +enabled = no + +[users.sockets] +enabled = no + +[users.pipes] +enabled = no + +[groups.cpu] +enabled = no + +[groups.mem] +enabled = no + +[groups.vmem] +enabled = no + +[groups.threads] +enabled = no + +[groups.processes] +enabled = no + +[groups.uptime] +enabled = no + +[groups.uptime_min] +enabled = no + +[groups.uptime_avg] +enabled = no + +[groups.uptime_max] +enabled = no + +[groups.cpu_user] +enabled = no + +[groups.cpu_system] +enabled = no + +[groups.swap] +enabled = no + +[groups.major_faults] +enabled = no + +[groups.minor_faults] +enabled = no + +[groups.preads] +enabled = no + +[groups.pwrites] +enabled = no + +[groups.lreads] +enabled = no + +[groups.lwrites] +enabled = no + +[groups.files] +enabled = no + +[groups.sockets] +enabled = no + +[groups.pipes] +enabled = no + +[netdata.web_thread4_cpu] +enabled = no + +[netdata.web_thread2_cpu] +enabled = no + +[netdata.web_thread5_cpu] +enabled = no + +[netdata.web_thread3_cpu] +enabled = no + +[netdata.web_thread6_cpu] +enabled = no + +[netdata.web_thread1_cpu] +enabled = no + +[disk_inodes._dev] +enabled = no + +[disk_space._run] +enabled = no + +[disk_inodes._run] +enabled = no + +[disk_space._] +enabled = no + +[disk_inodes._] +enabled = no + +[disk_space._dev_shm] +enabled = no + +[disk_inodes._dev_shm] +enabled = no + +[disk_space._run_lock] +enabled = no + +[disk_inodes._run_lock] +enabled = no + +[disk_space._home] +enabled = no + +[disk_inodes._home] +enabled = no + +[disk_space._boot_efi] +enabled = no + +[disk_space._media_amoss_deb10] +enabled = no + +[netdata.plugin_diskspace] +enabled = no + +[netdata.plugin_diskspace_dt] +enabled = no + +[cpu.cpu0] +enabled = no + +[cpu.cpu1] +enabled = no + +[cpu.cpu2] +enabled = no + +[cpu.cpu3] +enabled = no + +[cpu.cpu4] +enabled = no + +[cpu.cpu5] +enabled = no + +[cpu.cpu6] +enabled = no + +[cpu.cpu7] +enabled = no + +[system.intr] +enabled = no + +[system.ctxt] +enabled = no + +[system.forks] +enabled = no + +[system.processes] +enabled = no + +[cpu.core_throttling] +enabled = no + +[cpu.cpufreq] +enabled = no + +[cpu.cpu0_cpuidle] +enabled = no + +[cpu.cpu1_cpuidle] +enabled = no + +[cpu.cpu2_cpuidle] +enabled = no + +[cpu.cpu3_cpuidle] +enabled = no + +[cpu.cpu4_cpuidle] +enabled = no + +[cpu.cpu5_cpuidle] +enabled = no + +[cpu.cpu6_cpuidle] +enabled = no + +[cpu.cpu7_cpuidle] +enabled = no + +[system.uptime] +enabled = yes + +[system.load] +enabled = no + +[system.active_processes] +enabled = no + +[system.entropy] +enabled = no + +[system.interrupts] +enabled = no + +[cpu.cpu0_interrupts] +enabled = no + +[cpu.cpu1_interrupts] +enabled = no + +[cpu.cpu2_interrupts] +enabled = no + +[cpu.cpu3_interrupts] +enabled = no + +[cpu.cpu4_interrupts] +enabled = no + +[cpu.cpu5_interrupts] +enabled = no + +[cpu.cpu6_interrupts] +enabled = no + +[cpu.cpu7_interrupts] +enabled = no + +[system.softirqs] +enabled = no + +[cpu.cpu0_softirqs] +enabled = no + +[cpu.cpu1_softirqs] +enabled = no + +[cpu.cpu2_softirqs] +enabled = no + +[cpu.cpu3_softirqs] +enabled = no + +[cpu.cpu4_softirqs] +enabled = no + +[cpu.cpu5_softirqs] +enabled = no + +[cpu.cpu6_softirqs] +enabled = no + +[cpu.cpu7_softirqs] +enabled = no + +[system.swapio] +enabled = no + +[system.pgpgio] +enabled = no + +[mem.pgfaults] +enabled = no + +[system.ram] +enabled = no + +[mem.available] +enabled = no + +[system.swap] +enabled = no + +[mem.committed] +enabled = no + +[mem.writeback] +enabled = no + +[mem.kernel] +enabled = no + +[mem.slab] +enabled = no + +[mem.transparent_hugepages] +enabled = no + +[net.docker0] +enabled = no + +[net_packets.docker0] +enabled = no + +[net.br-b87e56f878f1] +enabled = no + +[net_packets.br-b87e56f878f1] +enabled = no + +[net.enp4s0] +enabled = no + +[net_packets.enp4s0] +enabled = no + +[system.net] +enabled = no + +[ipv4.sockstat_sockets] +enabled = no + +[ipv4.sockstat_tcp_sockets] +enabled = no + +[ipv4.sockstat_tcp_mem] +enabled = no + +[ipv4.sockstat_udp_sockets] +enabled = no + +[ipv4.sockstat_udp_mem] +enabled = no + +[ipv4.sockstat_raw_sockets] +enabled = no + +[ipv6.sockstat6_tcp_sockets] +enabled = no + +[ipv6.sockstat6_udp_sockets] +enabled = no + +[ip.tcpconnaborts] +enabled = no + +[ip.tcpreorders] +enabled = no + +[ip.tcpofo] +enabled = no + +[system.ip] +enabled = no + +[ip.inerrors] +enabled = no + +[ip.mcast] +enabled = no + +[ip.bcast] +enabled = no + +[ip.mcastpkts] +enabled = no + +[ip.bcastpkts] +enabled = no + +[ip.ecnpkts] +enabled = no + +[ipv4.packets] +enabled = no + +[ipv4.fragsin] +enabled = no + +[ipv4.errors] +enabled = no + +[ipv4.icmp] +enabled = no + +[ipv4.icmp_errors] +enabled = no + +[ipv4.icmpmsg] +enabled = no + +[ipv4.tcpsock] +enabled = no + +[ipv4.tcppackets] +enabled = no + +[ipv4.tcperrors] +enabled = no + +[ipv4.tcpopens] +enabled = no + +[ipv4.tcphandshake] +enabled = no + +[ipv4.udppackets] +enabled = no + +[ipv4.udperrors] +enabled = no + +[system.ipv6] +enabled = no + +[ipv6.packets] +enabled = no + +[ipv6.errors] +enabled = no + +[ipv6.udppackets] +enabled = no + +[ipv6.udperrors] +enabled = no + +[ipv6.mcast] +enabled = no + +[ipv6.mcastpkts] +enabled = no + +[ipv6.icmp] +enabled = no + +[ipv6.icmperrors] +enabled = no + +[ipv6.icmprouter] +enabled = no + +[ipv6.icmpneighbor] +enabled = no + +[ipv6.icmpmldv2] +enabled = no + +[ipv6.icmptypes] +enabled = no + +[ipv6.ect] +enabled = no + +[system.softnet_stat] +enabled = no + +[cpu.cpu0_softnet_stat] +enabled = no + +[cpu.cpu1_softnet_stat] +enabled = no + +[cpu.cpu2_softnet_stat] +enabled = no + +[cpu.cpu3_softnet_stat] +enabled = no + +[cpu.cpu4_softnet_stat] +enabled = no + +[cpu.cpu5_softnet_stat] +enabled = no + +[cpu.cpu6_softnet_stat] +enabled = no + +[cpu.cpu7_softnet_stat] +enabled = no + +[netfilter.conntrack_sockets] +enabled = no + +[netfilter.conntrack_new] +enabled = no + +[netfilter.conntrack_changes] +enabled = no + +[netfilter.conntrack_expect] +enabled = no + +[netfilter.conntrack_search] +enabled = no + +[netfilter.conntrack_errors] +enabled = no + +[disk.sda] +enabled = no + +[disk_ops.sda] +enabled = no + +[disk_backlog.sda] +enabled = no + +[disk_util.sda] +enabled = no + +[disk_iotime.sda] +enabled = no + +[disk.sdb] +enabled = no + +[disk_ops.sdb] +enabled = no + +[disk_backlog.sdb] +enabled = no + +[disk_util.sdb] +enabled = no + +[disk_mops.sdb] +enabled = no + +[disk_iotime.sdb] +enabled = no + +[disk.sdc] +enabled = no + +[disk_ops.sdc] +enabled = no + +[disk_backlog.sdc] +enabled = no + +[disk_util.sdc] +enabled = no + +[disk_mops.sdc] +enabled = no + +[disk_iotime.sdc] +enabled = no + +[system.io] +enabled = no + +[system.ipc_semaphores] +enabled = no + +[system.ipc_semaphore_arrays] +enabled = no + +[system.shared_memory_segments] +enabled = no + +[system.shared_memory_bytes] +enabled = no + +[netdata.plugin_proc_modules] +enabled = no + +[netdata.plugin_proc_cpu] +enabled = no + +[netdata.server_cpu] +enabled = no + +[netdata.clients] +enabled = no + +[netdata.requests] +enabled = no + +[netdata.net] +enabled = no + +[netdata.response_time] +enabled = no + +[netdata.compression_ratio] +enabled = no + +[netdata.dbengine_compression_ratio] +enabled = no + +[netdata.page_cache_hit_ratio] +enabled = no + +[netdata.page_cache_stats] +enabled = no + +[netdata.dbengine_long_term_page_stats] +enabled = no + +[netdata.dbengine_io_throughput] +enabled = no + +[netdata.dbengine_io_operations] +enabled = no + +[netdata.dbengine_global_errors] +enabled = no + +[netdata.dbengine_global_file_descriptors] +enabled = no + +[netdata.dbengine_ram] +enabled = no + +[disk_await.sda] +enabled = no + +[disk_avgsz.sda] +enabled = no + +[disk_svctm.sda] +enabled = no + +[disk_await.sdb] +enabled = no + +[disk_avgsz.sdb] +enabled = no + +[disk_svctm.sdb] +enabled = no + +[disk_await.sdc] +enabled = no + +[disk_avgsz.sdc] +enabled = no + +[disk_svctm.sdc] +enabled = no + +[netdata.queries] +enabled = no + +[netdata.db_points] +enabled = no + +[services.cpu] +enabled = no + +[services.mem_usage] +enabled = no + +[services.throttle_io_read] +enabled = no + +[services.throttle_io_write] +enabled = no + +[services.throttle_io_ops_read] +enabled = no + +[services.throttle_io_ops_write] +enabled = no + +[netfilter.netlink_new] +enabled = no + +[netfilter.netlink_changes] +enabled = no + +[netfilter.netlink_search] +enabled = no + +[netfilter.netlink_errors] +enabled = no + +[netfilter.netlink_expect] +enabled = no diff --git a/build_external/scenarios/gaps_lo/parent-compose.yml b/build_external/scenarios/gaps_lo/parent-compose.yml new file mode 100644 index 0000000..e7baad2 --- /dev/null +++ b/build_external/scenarios/gaps_lo/parent-compose.yml @@ -0,0 +1,13 @@ +version: '3.3' +services: + agent_parent: + image: debian_10_dev + command: /usr/sbin/netdata -D + ports: + - 21000:19999 + volumes: + - ./parent_stream.conf:/etc/netdata/stream.conf:ro + - ./parent_guid:/var/lib/netdata/registry/netdata.public.unique.id:ro + - ./mostly_off.conf:/etc/netdata/netdata.conf:ro + cap_add: + - SYS_PTRACE diff --git a/build_external/scenarios/gaps_lo/parent_guid b/build_external/scenarios/gaps_lo/parent_guid new file mode 100644 index 0000000..fee6f32 --- /dev/null +++ b/build_external/scenarios/gaps_lo/parent_guid @@ -0,0 +1 @@ +00000000-0000-0000-0000-000000000000 \ No newline at end of file diff --git a/build_external/scenarios/gaps_lo/parent_stream.conf b/build_external/scenarios/gaps_lo/parent_stream.conf new file mode 100644 index 0000000..99611cc --- /dev/null +++ b/build_external/scenarios/gaps_lo/parent_stream.conf @@ -0,0 +1,12 @@ +[00000000-0000-0000-0000-000000000000] + enabled = yes + allow from = * + default history = 3600 + # default memory mode = ram + + health enabled by default = auto + + # postpone alarms for a short period after the sender is connected + default postpone alarms on connect seconds = 60 + multiple connections = allow + diff --git a/build_external/scenarios/only-agent/docker-compose.yml b/build_external/scenarios/only-agent/docker-compose.yml new file mode 100644 index 0000000..eb1386f --- /dev/null +++ b/build_external/scenarios/only-agent/docker-compose.yml @@ -0,0 +1,8 @@ +version: '3' +services: + agent: + image: ${Distro}_${Version}_dev + command: /usr/sbin/netdata -D + ports: + - 80 + - 443 diff --git a/build_external/scenarios/parent-child/child_stream.conf b/build_external/scenarios/parent-child/child_stream.conf new file mode 100644 index 0000000..7348f8c --- /dev/null +++ b/build_external/scenarios/parent-child/child_stream.conf @@ -0,0 +1,10 @@ +[stream] + enabled = yes + destination = tcp:agent_parent + api key = 00000000-0000-0000-0000-000000000000 + timeout seconds = 60 + default port = 19999 + send charts matching = * + buffer size bytes = 1048576 + reconnect delay seconds = 5 + initial clock resync iterations = 60 diff --git a/build_external/scenarios/parent-child/docker-compose.yml b/build_external/scenarios/parent-child/docker-compose.yml new file mode 100644 index 0000000..ed6df15 --- /dev/null +++ b/build_external/scenarios/parent-child/docker-compose.yml @@ -0,0 +1,23 @@ +version: '3.3' +services: + agent_parent: + image: debian_10_dev + command: /usr/sbin/netdata -D + ports: + - 20000:19999 + volumes: + - ./parent_stream.conf:/etc/netdata/stream.conf:ro + agent_child1: + image: debian_9_dev + command: /usr/sbin/netdata -D + #ports: Removed to allow scaling + #- 20001:19999 + volumes: + - ./child_stream.conf:/etc/netdata/stream.conf:ro + agent_child2: + image: fedora_30_dev + command: /usr/sbin/netdata -D + #ports: Removed to allow scaling + #- 20002:19999 + volumes: + - ./child_stream.conf:/etc/netdata/stream.conf:ro diff --git a/build_external/scenarios/parent-child/parent_stream.conf b/build_external/scenarios/parent-child/parent_stream.conf new file mode 100644 index 0000000..bf85ae2 --- /dev/null +++ b/build_external/scenarios/parent-child/parent_stream.conf @@ -0,0 +1,7 @@ +[00000000-0000-0000-0000-000000000000] + enabled = yes + allow from = * + default history = 3600 + health enabled by default = auto + default postpone alarms on connect seconds = 60 + multiple connections = allow diff --git a/claim/Makefile.am b/claim/Makefile.am new file mode 100644 index 0000000..c838db9 --- /dev/null +++ b/claim/Makefile.am @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +CLEANFILES = \ + netdata-claim.sh \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +sbin_SCRIPTS = \ + netdata-claim.sh \ + $(NULL) + +dist_noinst_DATA = \ + netdata-claim.sh.in \ + README.md \ + $(NULL) + diff --git a/claim/README.md b/claim/README.md new file mode 100644 index 0000000..3731d20 --- /dev/null +++ b/claim/README.md @@ -0,0 +1,609 @@ + + +# Connect Agent to Cloud + +You can securely connect a Netdata Agent, running on a distributed node, to Netdata Cloud. A Space's +administrator creates a **claiming token**, which is used to add an Agent to their Space via the [Agent-Cloud link +(ACLK)](/aclk/README.md). + +Are you just starting out with Netdata Cloud? See our [get started with +Cloud](https://learn.netdata.cloud/docs/cloud/get-started) guide for a walkthrough of the process and simplified +instructions. + +When connecting an agent (also referred to as a node) to Netdata Cloud, you must complete a verification process that proves you have some level of authorization to manage the node itself. This verification is a security feature that helps prevent unauthorized users from seeing the data on your node. + +Only the administrators of a Space in Netdata Cloud can view the claiming token and accompanying script generated by +Netdata Cloud. + +> The connection process ensures no third party can add your node, and then view your node's metrics, in a Cloud account, +> Space, or War Room that you did not authorize. + +By connecting a node, you opt-in to sending data from your Agent to Netdata Cloud via the [ACLK](/aclk/README.md). This +data is encrypted by TLS while it is in transit. We use the RSA keypair created during the connection process to authenticate the +identity of the Netdata Agent when it connects to the Cloud. While the data does flow through Netdata Cloud servers on its way +from Agents to the browser, we do not store or log it. + +You can connect a node during the Netdata Cloud onboarding process, or after you created a Space by clicking on **Connect +Nodes** in the [Spaces management area](https://learn.netdata.cloud/docs/cloud/spaces#manage-spaces). + +There are two important notes regarding connecting nodes: + +- _You can only connect any given node in a single Space_. You can, however, add that connected node to multiple War Rooms + within that one Space. +- You must repeat the connection process on every node you want to add to Netdata Cloud. + +## How to connect a node + +There will be three main flows from where you might want to connect a node to Netdata Cloud. +* when you are on an [ +War Room](#empty-war-room) and you want to connect your first node +* when you are at the [Manage Space](#manage-space-or-war-room) area and you select **Connect Nodes** to connect a node, coming from Manage Space or Manage War Room +* when you are on the [Nodes view page](https://learn.netdata.cloud/docs/cloud/visualize/nodes) and want to connect a node - this process falls into the [Manage Space](#manage-space-or-war-room) flow + +Please note that only the administrators of a Space in Netdata Cloud can view the claiming token and accompanying script, generated by Netdata Cloud, to trigger the connection process. + +### Empty War Room + +Either at your first sign in or following ones, when you enter Netdata Cloud and are at a War Room that doesn’t have any node added to it, you will be able to: +* connect a new node to Netdata Cloud and add it to the War Room +* add a previously connected node to the War Room + +If your case is to connect a new node and add it to the War Room, you will need to tell us what environment the node is running on (Linux, Docker, macOS, Kubernetes) and then we will provide you with a script to initiate the connection process. You just will need to copy and paste it into your node's terminal. See one of the following sections depending on your case: +* [Linux](#connect-an-agent-running-in-linux) +* [Docker](#connect-an-agent-running-in-docker) +* [macOS](#connect-an-agent-running-in-macos) +* [Kubernetes](#connect-a-kubernetes-clusters-parent-netdata-pod) + +Repeat this process with every node you want to add to Netdata Cloud during onboarding. You can also add more nodes once you've +finished onboarding. + +### Manage Space or War Room + +To connect a node, select which War Rooms you want to add this node to with the dropdown, then copy and paste the script +given by Netdata Cloud into your node's terminal. + +When coming from [Nodes view page](https://learn.netdata.cloud/docs/cloud/visualize/nodes) the room parameter is already defined to current War Room. + +### Connect an agent running in Linux + +If you want to connect a node that is running on a Linux environment, the script that will be provided to you by Netdata Cloud is the [kickstart](/packaging/installer/README.md#automatic-one-line-installation-script) which will install the Netdata Agent on your node, if it isn't already installed, and connect the node to Netdata Cloud. It should be similar to: + +``` +wget -O /tmp/netdata-kickstart.sh https://my-netdata.io/kickstart.sh && sh /tmp/netdata-kickstart.sh --claim-token TOKEN --claim-rooms ROOM1,ROOM2 --claim-url https://api.netdata.cloud +``` +The script should return `Agent was successfully claimed.`. If the connecting to Netdata Cloud process returns errors, or if you don't see +the node in your Space after 60 seconds, see the [troubleshooting information](#troubleshooting). + +Please note that to run it you will either need to have root privileges or run it with the user that is running the agent, more details on the [Connect an agent without root privileges](#connect-an-agent-without-root-privileges) section. + +For more details on what are the extra parameters `claim-token`, `claim-rooms` and `claim-url` please refer to [Connect node to Netdata Cloud during installation](/packaging/installer/methods/kickstart.md#connect-node-to-netdata-cloud-during-installation). + +### Connect an agent without root privileges + +If you don't want to run the installation script to connect your nodes to Netdata Cloud with root privileges, you can discover which user is running the Agent, +switch to that user, and run the script. + +Use `grep` to search your `netdata.conf` file, which is typically located at `/etc/netdata/netdata.conf`, for the `run +as user` setting. For example: +To connect a node, select which War Rooms you want to add this node to with the dropdown, then copy and paste the script +given by Netdata Cloud into your node's terminal. + +```bash +grep "run as user" /etc/netdata/netdata.conf + # run as user = netdata +``` + +The default user is `netdata`. Yours may be different, so pay attention to the output from `grep`. Switch to that user +and run the script. + +```bash +wget -O /tmp/netdata-kickstart.sh https://my-netdata.io/kickstart.sh && sh /tmp/netdata-kickstart.sh --claim-token TOKEN --claim-rooms ROOM1,ROOM2 --claim-url https://api.netdata.cloud +``` +### Connect an agent running in Docker + +To connect an instance of the Netdata Agent running inside of a Docker container, it is recommended that you follow +the instructions and use the commands provided either in the `Nodes` tab of an [empty War Room](#empty-war-room) on Netdata Cloud or +in the shelf that appears when you click **Connect Nodes** and select **Docker**. + +However, users can also claim a new node by claiming environment variables in the container to have it automatically +connected on startup or restart. + +For the connection process to work, the contents of `/var/lib/netdata` _must_ be preserved across container +restarts using a persistent volume. See our [recommended `docker run` and Docker Compose +examples](/packaging/docker/README.md#create-a-new-netdata-agent-container) for details. + +#### Known issues on older hosts with seccomp enabled + +The nodes running on the following hosts **cannot be claimed**: + +- `libseccomp` version less than v2.3.3. +- Docker version less than v18.04.0-ce. +- The kernel is configured with CONFIG_SECCOMP enabled. + +To check if your kernel supports `seccomp`: + +```cmd +# grep CONFIG_SECCOMP= /boot/config-$(uname -r) 2>/dev/null || zgrep CONFIG_SECCOMP /proc/config.gz 2>/dev/null +CONFIG_SECCOMP=y +``` + +To resolve the issue, do one of the following actions: + +- Update to a newer version of Docker and `libseccomp` (recommended). +- Create a custom profile and pass it for the container. +- Run [without the default seccomp profile](https://docs.docker.com/engine/security/seccomp/#run-without-the-default-seccomp-profile) (unsafe, not recommended). + +
+See how to create a custom profile + +1. Download the moby default seccomp profile and change `defaultAction` to `SCMP_ACT_TRACE` on line 2. + + ```cmd + sudo wget https://raw.githubusercontent.com/moby/moby/master/profiles/seccomp/default.json -O /etc/docker/seccomp.json + sudo sed -i '2s/SCMP_ACT_ERRNO/SCMP_ACT_TRACE/' /etc/docker/seccomp.json + ``` + +2. Specify the new policy for the container explicitly. + + - When using `docker run`: + + ```cmd + docker run -d --name=netdata \ + --security-opt=seccomp=/etc/docker/seccomp.json \ + ... + ``` + + - When using `docker-compose`: + + > :warning: The security_opt option is ignored when deploying a stack in swarm mode. + + ```yaml + version: '3' + services: + netdata: + security_opt: + - seccomp:/etc/docker/seccomp.json + ... + ``` + + - When using `docker stack deploy`: + + Change the default profile globally by adding `--seccomp-profile=/etc/docker/seccomp.json` to the options passed to + dockerd on startup. + +
+ +#### Using environment variables + +The Netdata Docker container looks for the following environment variables on startup: + +- `NETDATA_CLAIM_TOKEN` +- `NETDATA_CLAIM_URL` +- `NETDATA_CLAIM_ROOMS` +- `NETDATA_CLAIM_PROXY` + +If the token and URL are specified in their corresponding variables _and_ the container is not already connected, +it will use these values to attempt to connect the container, automatically adding the node to the specified War +Rooms. If a proxy is specified, it will be used for the connection process and for connecting to Netdata Cloud. + +These variables can be specified using any mechanism supported by your container tooling for setting environment +variables inside containers. + +When using the `docker run` command, if you have an agent container already running, it is important to know that there will be a short period of downtime. This is due to the process of recreating the new agent container. + +The command to connect a new node to Netdata Cloud is: + +```bash +docker run -d --name=netdata \ + -p 19999:19999 \ + -v netdataconfig:/etc/netdata \ + -v netdatalib:/var/lib/netdata \ + -v netdatacache:/var/cache/netdata \ + -v /etc/passwd:/host/etc/passwd:ro \ + -v /etc/group:/host/etc/group:ro \ + -v /proc:/host/proc:ro \ + -v /sys:/host/sys:ro \ + -v /etc/os-release:/host/etc/os-release:ro \ + --restart unless-stopped \ + --cap-add SYS_PTRACE \ + --security-opt apparmor=unconfined \ + -e NETDATA_CLAIM_TOKEN=TOKEN \ + -e NETDATA_CLAIM_URL="https://api.netdata.cloud" \ + -e NETDATA_CLAIM_ROOMS=ROOM1,ROOM2 \ + -e NETDATA_CLAIM_PROXY=PROXY \ + netdata/netdata +``` +>Note: This command is suggested for connecting a new container. Using this command for an existing container recreates the container, though data +and configuration of the old container may be preserved. If you are claiming an existing container that can not be recreated, +you can add the container by going to Netdata Cloud, clicking the **Nodes** tab, clicking **Connect Nodes**, selecting **Docker**, and following +the instructions and commands provided or by following the instructions in an [empty War Room](#empty-war-room). + +The output that would be seen from the connection process when using other methods will be present in the container logs. + +Using the environment variables like this to handle the connection process is the preferred method of connecting Docker containers +as it works in the widest variety of situations and simplifies configuration management. + +#### Using Docker compose + +If you use `docker compose`, you can copy the config provided by Netdata Cloud, which should be same as the one below: + +```bash +version: '3' +services: + netdata: + image: netdata/netdata + container_name: netdata + hostname: example.com # set to fqdn of host + ports: + - 19999:19999 + restart: unless-stopped + cap_add: + - SYS_PTRACE + security_opt: + - apparmor:unconfined + volumes: + - netdataconfig:/etc/netdata + - netdatalib:/var/lib/netdata + - netdatacache:/var/cache/netdata + - /etc/passwd:/host/etc/passwd:ro + - /etc/group:/host/etc/group:ro + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /etc/os-release:/host/etc/os-release:ro + environment: + - NETDATA_CLAIM_TOKEN=TOKEN + - NETDATA_CLAIM_URL="https://api.netdata.cloud" + - NETDATA_CLAIM_ROOMS=ROOM1,ROOM2 + +volumes: + netdataconfig: + netdatalib: + netdatacache: +``` + +Then run the following command in the same directory as the `docker-compose.yml` file to start the container. + +```bash +docker-compose up -d +``` +#### Using docker exec + +Connect a _running Netdata Agent container_, where you don't want to recreate the existing container, append the script offered by Netdata Cloud to a `docker exec ...` command, replacing +`netdata` with the name of your running container: + +```bash +docker exec -it netdata netdata-claim.sh -token=TOKEN -rooms=ROOM1,ROOM2 -url=https://api.netdata.cloud +``` +The values for `ROOM1,ROOM2` can be found by by going to Netdata Cloud, clicking the **Nodes** tab, clicking **Connect Nodes**, selecting **Docker**, and copying the `rooms=` value in the command provided. + +The script should return `Agent was successfully claimed.`. If the connection process returns errors, or if +you don't see the node in your Space after 60 seconds, see the [troubleshooting information](#troubleshooting). + +### Connect an agent running in macOS + +To connect a node that is running on a macOS environment the script that will be provided to you by Netdata Cloud is the [kickstart](/packaging/installer/methods/macos.md#install-netdata-with-our-automatic-one-line-installation-script) which will install the Netdata Agent on your node, if it isn't already installed, and connect the node to Netdata Cloud. It should be similar to: + +```bash +curl https://my-netdata.io/kickstart.sh > /tmp/netdata-kickstart.sh && sh /tmp/netdata-kickstart.sh --install /usr/local/ --claim-token TOKEN --claim-rooms ROOM1,ROOM2 --claim-url https://api.netdata.cloud +``` +The script should return `Agent was successfully claimed.`. If the connecting to Netdata Cloud process returns errors, or if you don't see +the node in your Space after 60 seconds, see the [troubleshooting information](#troubleshooting). + +### Connect a Kubernetes cluster's parent Netdata pod + +Read our [Kubernetes installation](/packaging/installer/methods/kubernetes.md#connect-your-kubernetes-cluster-to-netdata-cloud) +for details on connecting a parent Netdata pod. + +### Connect through a proxy + +A Space's administrator can connect a node through HTTP(S) proxy. + +You should first configure the proxy in the `[cloud]` section of `netdata.conf`. The proxy settings you specify here +will also be used to tunnel the ACLK. The default `proxy` setting is `none`. + +```conf +[cloud] + proxy = none +``` + +The `proxy` setting can take one of the following values: + +- `none`: Do not use a proxy, even if the system configured otherwise. +- `env`: Try to read proxy settings from set environment variables `http_proxy`. +- `http://[user:pass@]host:ip`: The ACLK and connection process will use the specified HTTP(S) proxy. + +For example, a HTTP proxy setting may look like the following: + +```conf +[cloud] + proxy = http://203.0.113.0:1080 # With an IP address + proxy = http://proxy.example.com:1080 # With a URL +``` + +You can now move on to connecting. When you connect with the [kickstart](/packaging/installer/README.md#automatic-one-line-installation-script) script, add the `--claim-proxy=` parameter and +append the same proxy setting you added to `netdata.conf`. + +```bash +wget -O /tmp/netdata-kickstart.sh https://my-netdata.io/kickstart.sh && sh /tmp/netdata-kickstart.sh --claim-token TOKEN --claim-rooms ROOM1,ROOM2 --claim-url https://api.netdata.cloud --claim-proxy http://[user:pass@]host:ip +``` + +Hit **Enter**. The script should return `Agent was successfully claimed.`. If the connecting to Netdata Cloud process returns errors, or if +you don't see the node in your Space after 60 seconds, see the [troubleshooting information](#troubleshooting). + +### Troubleshooting + +If you're having trouble connecting a node, this may be because the [ACLK](/aclk/README.md) cannot connect to Cloud. + +With the Netdata Agent running, visit `http://NODE:19999/api/v1/info` in your browser, replacing `NODE` with the IP +address or hostname of your Agent. The returned JSON contains four keys that will be helpful to diagnose any issues you +might be having with the ACLK or connection process. + +```json + "cloud-enabled" + "cloud-available" + "agent-claimed" + "aclk-available" +``` + +On Netdata agent version `1.32` (`netdata -v` to find your version) and newer, the `netdata -W aclk-state` command can be used to get some diagnostic information about ACLK. Sample output: + +``` +ACLK Available: Yes +ACLK Implementation: Next Generation +New Cloud Protocol Support: Yes +Claimed: Yes +Claimed Id: 53aa76c2-8af5-448f-849a-b16872cc4ba1 +Online: Yes +Used Cloud Protocol: New +``` + +Use these keys and the information below to troubleshoot the ACLK. + +#### kickstart: unsupported Netdata installation + +If you run the kickstart script and get the following error `Existing install appears to be handled manually or through the system package manager.` you most probably installed Netdata using an unsupported package. + +If you are using an unsupported package, such as a third-party `.deb`/`.rpm` package provided by your distribution, +please remove that package and reinstall using our [recommended kickstart +script](/docs/get-started.mdx#install-on-linux-with-one-line-installer). + +#### kickstart: Failed to write new machine GUID + +If you run the kickstart script but don't have privileges required for the actions done on the connecting to Netdata Cloud process you will get the following error: + +```bash +Failed to write new machine GUID. Please make sure you have rights to write to /var/lib/netdata/registry/netdata.public.unique.id. +``` +For a successful execution you will need to run the script with root privileges or run it with the user that is running the agent, more details on the [Connect an agent without root privileges](#connect-an-agent-without-root-privileges) section. + +#### bash: netdata-claim.sh: command not found + +If you run the claiming script and see a `command not found` error, you either installed Netdata in a non-standard +location or are using an unsupported package. If you installed Netdata in a non-standard path using the `--install` +option, you need to update your `$PATH` or run `netdata-claim.sh` using the full path. For example, if you installed +Netdata to `/opt/netdata`, use `/opt/netdata/bin/netdata-claim.sh` to run the claiming script. + +If you are using an unsupported package, such as a third-party `.deb`/`.rpm` package provided by your distribution, +please remove that package and reinstall using our [recommended kickstart +script](/docs/get-started.mdx#install-on-linux-with-one-line-installer). + +#### Connecting on older distributions (Ubuntu 14.04, Debian 8, CentOS 6) + +If you're running an older Linux distribution or one that has reached EOL, such as Ubuntu 14.04 LTS, Debian 8, or CentOS +6, your Agent may not be able to securely connect to Netdata Cloud due to an outdated version of OpenSSL. These old +versions of OpenSSL cannot perform [hostname validation](https://wiki.openssl.org/index.php/Hostname_validation), which +helps securely encrypt SSL connections. + +We recommend you reinstall Netdata with a [static build](/packaging/installer/methods/kickstart.md#static-builds), which uses an +up-to-date version of OpenSSL with hostname validation enabled. + +If you choose to continue using the outdated version of OpenSSL, your node will still connect to Netdata Cloud, albeit +with hostname verification disabled. Without verification, your Netdata Cloud connection could be vulnerable to +man-in-the-middle attacks. + +#### cloud-enabled is false + +If `cloud-enabled` is `false`, you probably ran the installer with `--disable-cloud` option. + +Additionally, check that the `enabled` setting in `var/lib/netdata/cloud.d/cloud.conf` is set to `true`: + +```conf +[global] + enabled = true +``` + +To fix this issue, reinstall Netdata using your [preferred method](/packaging/installer/README.md) and do not add the +`--disable-cloud` option. + +#### cloud-available is false / ACLK Available: No + +If `cloud-available` is `false` after you verified Cloud is enabled in the previous step, the most likely issue is that +Cloud features failed to build during installation. + +If Cloud features fail to build, the installer continues and finishes the process without Cloud functionality as opposed +to failing the installation altogether. We do this to ensure the Agent will always finish installing. + +If you can't see an explicit error in the installer's output, you can run the installer with the `--require-cloud` +option. This option causes the installation to fail if Cloud functionality can't be built and enabled, and the +installer's output should give you more error details. + +You may see one of the following error messages during installation: + +- Failed to build libmosquitto. The install process will continue, but you will not be able to connect this node to + Netdata Cloud. +- Unable to fetch sources for libmosquitto. The install process will continue, but you will not be able to connect + this node to Netdata Cloud. +- Failed to build libwebsockets. The install process will continue, but you may not be able to connect this node to + Netdata Cloud. +- Unable to fetch sources for libwebsockets. The install process will continue, but you may not be able to connect + this node to Netdata Cloud. +- Could not find cmake, which is required to build libwebsockets. The install process will continue, but you may not + be able to connect this node to Netdata Cloud. +- Could not find cmake, which is required to build JSON-C. The install process will continue, but Netdata Cloud + support will be disabled. +- Failed to build JSON-C. Netdata Cloud support will be disabled. +- Unable to fetch sources for JSON-C. Netdata Cloud support will be disabled. + +One common cause of the installer failing to build Cloud features is not having one of the following dependencies on +your system: `cmake`, `json-c` and `OpenSSL`, including corresponding `devel` packages. + +You can also look for error messages in `/var/log/netdata/error.log`. Try one of the following two commands to search +for ACLK-related errors. + +```bash +less /var/log/netdata/error.log +grep -i ACLK /var/log/netdata/error.log +``` + +If the installer's output does not help you enable Cloud features, contact us by [creating an issue on +GitHub](https://github.com/netdata/netdata/issues/new?assignees=&labels=bug%2Cneeds+triage&template=BUG_REPORT.yml&title=The+installer+failed+to+prepare+the+required+dependencies+for+Netdata+Cloud+functionality) +with details about your system and relevant output from `error.log`. + +#### agent-claimed is false / Claimed: No + +You must [connect your node](#how-to-connect-a-node). + +#### aclk-available is false / Online: No + +If `aclk-available` is `false` and all other keys are `true`, your Agent is having trouble connecting to the Cloud +through the ACLK. Please check your system's firewall. + +If your Agent needs to use a proxy to access the internet, you must [set up a proxy for +connecting](#connect-through-a-proxy). + +If you are certain firewall and proxy settings are not the issue, you should consult the Agent's `error.log` at +`/var/log/netdata/error.log` and contact us by [creating an issue on +GitHub](https://github.com/netdata/netdata/issues/new?assignees=&labels=bug%2Cneeds+triage&template=BUG_REPORT.yml&title=ACLK-available-is-false) +with details about your system and relevant output from `error.log`. + +### Remove and reconnect a node + +To remove a node from your Space in Netdata Cloud, delete the `cloud.d/` directory in your Netdata library directory. + +```bash +cd /var/lib/netdata # Replace with your Netdata library directory, if not /var/lib/netdata/ +sudo rm -rf cloud.d/ +``` + +This node no longer has access to the credentials it was used when connecting to Netdata Cloud via the ACLK. +You will still be able to see this node in your War Rooms in an **unreachable** state. + +If you want to reconnect this node, you need to: +1. Ensure that the `/var/lib/netdata/cloud.d` directory doesn't exist. In some installations, the path is `/opt/netdata/var/lib/netdata/cloud.d`. +2. Stop the agent. +3. Ensure that the `uuidgen-runtime` package is installed. Run ```echo "$(uuidgen)"``` and validate you get back a UUID. +4. Copy the kickstart.sh command to add a node from your space and add to the end of it `--claim-id "$(uuidgen)"`. Run the command and look for the message `Node was successfully claimed.` +5. Start the agent + + +## Connecting reference +In the sections below, you can find reference material for the kickstart script, claiming script, connecting via the Agent's command line +tool, and details about the files found in `cloud.d`. + +### The `cloud.conf` file + +This section defines how and whether your Agent connects to [Netdata Cloud](https://learn.netdata.cloud/docs/cloud/) +using the [ACLK](/aclk/README.md). + +| setting | default | info | +|:-------------- |:------------------------- |:-------------------------------------------------------------------------------------------------------------------------------------- | +| cloud base url | https://api.netdata.cloud | The URL for the Netdata Cloud web application. You should not change this. If you want to disable Cloud, change the `enabled` setting. | +| enabled | yes | The runtime option to disable the [Agent-Cloud link](/aclk/README.md) and prevent your Agent from connecting to Netdata Cloud. | + +### kickstart script + +The best way to install Netdata and connect your nodes to Netdata Cloud is with our automatic one-line installation script, [kickstart](/packaging/installer/README.md#automatic-one-line-installation-script). This script will install the Netdata Agent, in case it isn't already installed, and connect your node to Netdata Cloud. + +This works with: +* most Linux distributions, see [Netdata's platform support policy](/packaging/PLATFORM_SUPPORT.md) +* macOS + +For details on how to run this script please check [How to connect a node](#how-to-connect-a-node) and choose your environment. + +In case Netdata Agent is already installed and you run this script to connect a node to Netdata Cloud it will not upgrade your agent automatically. If you also want to upgrade the Agent installation you'll need to run the script again without the connection options. + +Our suggestion is to first run kickstart to upgrade your agent by running the command below and the run the [How to connect a node] +(#how-to-connect-a-node). + +**Linux** + +```bash +wget -O /tmp/netdata-kickstart.sh https://my-netdata.io/kickstart.sh && sh /tmp/netdata-kickstart.sh +``` + +**macOS** + +```bash +curl https://my-netdata.io/kickstart.sh > /tmp/netdata-kickstart.sh && sh /tmp/netdata-kickstart.sh --install /usr/local/ +``` +### Claiming script + +A Space's administrator can also connect an Agent by directly calling the `netdata-claim.sh` script either with root privileges +using `sudo`, or as the user running the Agent (typically `netdata`), and passing the following arguments: + +```sh +-token=TOKEN + where TOKEN is the Space's claiming token. +-rooms=ROOM1,ROOM2,... + where ROOMX is the War Room this node should be added to. This list is optional. +-url=URL_BASE + where URL_BASE is the Netdata Cloud endpoint base URL. By default, this is https://api.netdata.cloud. +-id=AGENT_ID + where AGENT_ID is the unique identifier of the Agent. This is the Agent's MACHINE_GUID by default. +-hostname=HOSTNAME + where HOSTNAME is the result of the hostname command by default. +-proxy=PROXY_URL + where PROXY_URL is the endpoint of a HTTP or HTTPS proxy. +``` + +For example, the following command connects an Agent and adds it to rooms `room1` and `room2`: + +```sh +netdata-claim.sh -token=MYTOKEN1234567 -rooms=room1,room2 +``` + +You should then update the `netdata` service about the result with `netdatacli`: + +```sh +netdatacli reload-claiming-state +``` + +This reloads the Agent connection state from disk. + +Our recommendation is to trigger the connection process using the [kickstart](/packaging/installer/README.md#automatic-one-line-installation-script) whenever possible. + +### Netdata Agent command line + +If a Netdata Agent is running, the Space's administrator can connect a node using the `netdata` service binary with +additional command line parameters: + +```sh +-W "claim -token=TOKEN -rooms=ROOM1,ROOM2" +``` + +For example: + +```sh +/usr/sbin/netdata -D -W "claim -token=MYTOKEN1234567 -rooms=room1,room2" +``` + +If need be, the user can override the Agent's defaults by providing additional arguments like those described +[here](#claiming-script). + +### Connection directory + +Netdata stores the Agent's connection-related state in the Netdata library directory under `cloud.d`. For a default +installation, this directory exists at `/var/lib/netdata/cloud.d`. The directory and its files should be owned by the +user that runs the Agent, which is typically the `netdata` user. + +The `cloud.d/token` file should contain the claiming-token and the `cloud.d/rooms` file should contain the list of War +Rooms you added that node to. + +The user can also put the Cloud endpoint's full certificate chain in `cloud.d/cloud_fullchain.pem` so that the Agent +can trust the endpoint if necessary. + + diff --git a/claim/claim.c b/claim/claim.c new file mode 100644 index 0000000..d997fc8 --- /dev/null +++ b/claim/claim.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "claim.h" +#include "registry/registry_internals.h" +#include "aclk/aclk.h" +#include "aclk/aclk_proxy.h" + +char *claiming_pending_arguments = NULL; + +static char *claiming_errors[] = { + "Agent claimed successfully", // 0 + "Unknown argument", // 1 + "Problems with claiming working directory", // 2 + "Missing dependencies", // 3 + "Failure to connect to endpoint", // 4 + "The CLI didn't work", // 5 + "Wrong user", // 6 + "Unknown HTTP error message", // 7 + "invalid node id", // 8 + "invalid node name", // 9 + "invalid room id", // 10 + "invalid public key", // 11 + "token expired/token not found/invalid token", // 12 + "already claimed", // 13 + "processing claiming", // 14 + "Internal Server Error", // 15 + "Gateway Timeout", // 16 + "Service Unavailable", // 17 + "Agent Unique Id Not Readable" // 18 +}; + +/* Retrieve the claim id for the agent. + * Caller owns the string. +*/ +char *get_agent_claimid() +{ + char *result; + rrdhost_aclk_state_lock(localhost); + result = (localhost->aclk_state.claimed_id == NULL) ? NULL : strdupz(localhost->aclk_state.claimed_id); + rrdhost_aclk_state_unlock(localhost); + return result; +} + +#define CLAIMING_COMMAND_LENGTH 16384 +#define CLAIMING_PROXY_LENGTH CLAIMING_COMMAND_LENGTH/4 + +extern struct registry registry; + +/* rrd_init() and post_conf_load() must have been called before this function */ +void claim_agent(char *claiming_arguments) +{ + if (!netdata_cloud_setting) { + error("Refusing to claim agent -> cloud functionality has been disabled"); + return; + } + +#ifndef DISABLE_CLOUD + int exit_code; + pid_t command_pid; + char command_buffer[CLAIMING_COMMAND_LENGTH + 1]; + FILE *fp_child_output, *fp_child_input; + + // This is guaranteed to be set early in main via post_conf_load() + char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL); + if (cloud_base_url == NULL) + fatal("Do not move the cloud base url out of post_conf_load!!"); + const char *proxy_str; + ACLK_PROXY_TYPE proxy_type; + char proxy_flag[CLAIMING_PROXY_LENGTH] = "-noproxy"; + + proxy_str = aclk_get_proxy(&proxy_type); + + if (proxy_type == PROXY_TYPE_SOCKS5 || proxy_type == PROXY_TYPE_HTTP) + snprintf(proxy_flag, CLAIMING_PROXY_LENGTH, "-proxy=\"%s\"", proxy_str); + + snprintfz(command_buffer, + CLAIMING_COMMAND_LENGTH, + "exec netdata-claim.sh %s -hostname=%s -id=%s -url=%s -noreload %s", + + proxy_flag, + netdata_configured_hostname, + localhost->machine_guid, + cloud_base_url, + claiming_arguments); + + info("Executing agent claiming command 'netdata-claim.sh'"); + fp_child_output = netdata_popen(command_buffer, &command_pid, &fp_child_input); + if(!fp_child_output) { + error("Cannot popen(\"%s\").", command_buffer); + return; + } + info("Waiting for claiming command to finish."); + while (fgets(command_buffer, CLAIMING_COMMAND_LENGTH, fp_child_output) != NULL) {;} + exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); + info("Agent claiming command returned with code %d", exit_code); + if (0 == exit_code) { + load_claiming_state(); + return; + } + if (exit_code < 0) { + error("Agent claiming command failed to complete its run."); + return; + } + errno = 0; + unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1; + + if ((unsigned)exit_code > maximum_known_exit_code) { + error("Agent failed to be claimed with an unknown error."); + return; + } + error("Agent failed to be claimed with the following error message:"); + error("\"%s\"", claiming_errors[exit_code]); +#else + UNUSED(claiming_arguments); + UNUSED(claiming_errors); +#endif +} + +#ifdef ENABLE_ACLK +extern int aclk_connected, aclk_kill_link, aclk_disable_runtime; +#endif + +/* Change the claimed state of the agent. + * + * This only happens when the user has explicitly requested it: + * - via the cli tool by reloading the claiming state + * - after spawning the claim because of a command-line argument + * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK + */ +void load_claiming_state(void) +{ + // -------------------------------------------------------------------- + // Check if the cloud is enabled +#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) + netdata_cloud_setting = 0; +#else + uuid_t uuid; + + // Propagate into aclk and registry. Be kind of atomic... + appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL); + + rrdhost_aclk_state_lock(localhost); + if (localhost->aclk_state.claimed_id) { + if (aclk_connected) + localhost->aclk_state.prev_claimed_id = strdupz(localhost->aclk_state.claimed_id); + freez(localhost->aclk_state.claimed_id); + localhost->aclk_state.claimed_id = NULL; + } + if (aclk_connected) + { + info("Agent was already connected to Cloud - forcing reconnection under new credentials"); + aclk_kill_link = 1; + } + aclk_disable_runtime = 0; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/cloud.d/claimed_id", netdata_configured_varlib_dir); + + long bytes_read; + char *claimed_id = read_by_filename(filename, &bytes_read); + if(claimed_id && uuid_parse(claimed_id, uuid)) { + error("claimed_id \"%s\" doesn't look like valid UUID", claimed_id); + freez(claimed_id); + claimed_id = NULL; + } + + if(claimed_id) { + localhost->aclk_state.claimed_id = mallocz(UUID_STR_LEN); + uuid_unparse_lower(uuid, localhost->aclk_state.claimed_id); + } + + invalidate_node_instances(&localhost->host_uuid, claimed_id ? &uuid : NULL); + metaqueue_store_claim_id(&localhost->host_uuid, claimed_id ? &uuid : NULL); + + rrdhost_aclk_state_unlock(localhost); + if (!claimed_id) { + info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename); + return; + } + + freez(claimed_id); + + info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename); + netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1); +#endif +} + +struct config cloud_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +void load_cloud_conf(int silent) +{ + char *filename; + errno = 0; + + int ret = 0; + + filename = strdupz_path_subpath(netdata_configured_varlib_dir, "cloud.d/cloud.conf"); + + ret = appconfig_load(&cloud_config, filename, 1, NULL); + if(!ret && !silent) { + info("CONFIG: cannot load cloud config '%s'. Running with internal defaults.", filename); + } + freez(filename); +} diff --git a/claim/claim.h b/claim/claim.h new file mode 100644 index 0000000..fc76037 --- /dev/null +++ b/claim/claim.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CLAIM_H +#define NETDATA_CLAIM_H 1 + +#include "daemon/common.h" + +extern char *claiming_pending_arguments; +extern struct config cloud_config; + +void claim_agent(char *claiming_arguments); +char *get_agent_claimid(void); +void load_claiming_state(void); +void load_cloud_conf(int silent); + +#endif //NETDATA_CLAIM_H diff --git a/claim/netdata-claim.sh.in b/claim/netdata-claim.sh.in new file mode 100755 index 0000000..f87fbc2 --- /dev/null +++ b/claim/netdata-claim.sh.in @@ -0,0 +1,442 @@ +#!/usr/bin/env bash +# netdata +# real-time performance and health monitoring, done right! +# (C) 2017 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later + +# Exit code: 0 - Success +# Exit code: 1 - Unknown argument +# Exit code: 2 - Problems with claiming working directory +# Exit code: 3 - Missing dependencies +# Exit code: 4 - Failure to connect to endpoint +# Exit code: 5 - The CLI didn't work +# Exit code: 6 - Wrong user +# Exit code: 7 - Unknown HTTP error message +# +# OK: Agent claimed successfully +# HTTP Status code: 204 +# Exit code: 0 +# +# Unknown HTTP error message +# HTTP Status code: 422 +# Exit code: 7 +ERROR_KEYS[7]="None" +ERROR_MESSAGES[7]="Unknown HTTP error message" + +# Error: The agent id is invalid; it does not fulfill the constraints +# HTTP Status code: 422 +# Exit code: 8 +ERROR_KEYS[8]="ErrInvalidNodeID" +ERROR_MESSAGES[8]="invalid node id" + +# Error: The agent hostname is invalid; it does not fulfill the constraints +# HTTP Status code: 422 +# Exit code: 9 +ERROR_KEYS[9]="ErrInvalidNodeName" +ERROR_MESSAGES[9]="invalid node name" + +# Error: At least one of the given rooms ids is invalid; it does not fulfill the constraints +# HTTP Status code: 422 +# Exit code: 10 +ERROR_KEYS[10]="ErrInvalidRoomID" +ERROR_MESSAGES[10]="invalid room id" + +# Error: Invalid public key; the public key is empty or not present +# HTTP Status code: 422 +# Exit code: 11 +ERROR_KEYS[11]="ErrInvalidPublicKey" +ERROR_MESSAGES[11]="invalid public key" +# +# Error: Expired, missing or invalid token +# HTTP Status code: 403 +# Exit code: 12 +ERROR_KEYS[12]="ErrForbidden" +ERROR_MESSAGES[12]="token expired/token not found/invalid token" + +# Error: Duplicate agent id; an agent with the same id is already registered in the cloud +# HTTP Status code: 409 +# Exit code: 13 +ERROR_KEYS[13]="ErrAlreadyClaimed" +ERROR_MESSAGES[13]="already claimed" + +# Error: The node claiming process is still in progress. +# HTTP Status code: 102 +# Exit code: 14 +ERROR_KEYS[14]="ErrProcessingClaim" +ERROR_MESSAGES[14]="processing claiming" + +# Error: Internal server error. Any other unexpected error (DB problems, etc.) +# HTTP Status code: 500 +# Exit code: 15 +ERROR_KEYS[15]="ErrInternalServerError" +ERROR_MESSAGES[15]="Internal Server Error" + +# Error: There was a timeout processing the claim. +# HTTP Status code: 504 +# Exit code: 16 +ERROR_KEYS[16]="ErrGatewayTimeout" +ERROR_MESSAGES[16]="Gateway Timeout" + +# Error: The service cannot handle the claiming request at this time. +# HTTP Status code: 503 +# Exit code: 17 +ERROR_KEYS[17]="ErrServiceUnavailable" +ERROR_MESSAGES[17]="Service Unavailable" + +# Exit code: 18 - Agent unique id is not generated yet. + +NETDATA_RUNNING=1 + +get_config_value() { + conf_file="${1}" + section="${2}" + key_name="${3}" + if [ "${NETDATA_RUNNING}" -eq 1 ]; then + config_result=$(@sbindir_POST@/netdatacli 2>/dev/null read-config "$conf_file|$section|$key_name"; exit $?) + result="$?" + if [ "${result}" -ne 0 ]; then + echo >&2 "Unable to communicate with Netdata daemon, querying config from disk instead." + NETDATA_RUNNING=0 + fi + fi + if [ "${NETDATA_RUNNING}" -eq 0 ]; then + config_result=$(@sbindir_POST@/netdata 2>/dev/null -W get2 "$conf_file" "$section" "$key_name" unknown_default) + fi + echo "$config_result" +} +if command -v curl >/dev/null 2>&1 ; then + URLTOOL="curl" +elif command -v wget >/dev/null 2>&1 ; then + URLTOOL="wget" +else + echo >&2 "I need curl or wget to proceed, but neither is available on this system." + exit 3 +fi +if ! command -v openssl >/dev/null 2>&1 ; then + echo >&2 "I need openssl to proceed, but it is not available on this system." + exit 3 +fi + +# shellcheck disable=SC2050 +if [ "@enable_cloud_POST@" = "no" ]; then + echo >&2 "This agent was built with --disable-cloud and cannot be claimed" + exit 3 +fi +# shellcheck disable=SC2050 +if [ "@enable_aclk_POST@" != "yes" ]; then + echo >&2 "This agent was built without the dependencies for Cloud and cannot be claimed" + exit 3 +fi + +# ----------------------------------------------------------------------------- +# defaults to allow running this script by hand + +[ -z "${NETDATA_VARLIB_DIR}" ] && NETDATA_VARLIB_DIR="@varlibdir_POST@" +MACHINE_GUID_FILE="@registrydir_POST@/netdata.public.unique.id" +CLAIMING_DIR="${NETDATA_VARLIB_DIR}/cloud.d" +TOKEN="unknown" +URL_BASE=$(get_config_value cloud global "cloud base url") +[ -z "$URL_BASE" ] && URL_BASE="https://api.netdata.cloud" # Cover post-install with --dont-start +ID="unknown" +ROOMS="" +[ -z "$HOSTNAME" ] && HOSTNAME=$(hostname) +CLOUD_CERTIFICATE_FILE="${CLAIMING_DIR}/cloud_fullchain.pem" +VERBOSE=0 +INSECURE=0 +RELOAD=1 +NETDATA_USER=$(get_config_value netdata global "run as user") +[ -z "$EUID" ] && EUID="$(id -u)" + + +gen_id() { + local id + + if command -v uuidgen > /dev/null 2>&1; then + id="$(uuidgen | tr '[:upper:]' '[:lower:]')" + elif [ -r /proc/sys/kernel/random/uuid ]; then + id="$(cat /proc/sys/kernel/random/uuid)" + else + echo >&2 "Unable to generate machine ID." + exit 18 + fi + + if [ "${id}" = "8a795b0c-2311-11e6-8563-000c295076a6" ] || [ "${id}" = "4aed1458-1c3e-11e6-a53f-000c290fc8f5" ]; then + gen_id + else + echo "${id}" + fi +} + +# get the MACHINE_GUID by default +if [ -r "${MACHINE_GUID_FILE}" ]; then + ID="$(cat "${MACHINE_GUID_FILE}")" + MGUID=$ID +elif [ -f "${MACHINE_GUID_FILE}" ]; then + echo >&2 "netdata.public.unique.id is not readable. Please make sure you have rights to read it (Filename: ${MACHINE_GUID_FILE})." + exit 18 +else + if mkdir -p "${MACHINE_GUID_FILE%/*}" && /bin/echo -n "$(gen_id)" > "${MACHINE_GUID_FILE}"; then + ID="$(cat "${MACHINE_GUID_FILE}")" + MGUID=$ID + else + echo >&2 "Failed to write new machine GUID. Please make sure you have rights to write to ${MACHINE_GUID_FILE}." + exit 18 + fi +fi + +# get token from file +if [ -r "${CLAIMING_DIR}/token" ]; then + TOKEN="$(cat "${CLAIMING_DIR}/token")" +fi + +# get rooms from file +if [ -r "${CLAIMING_DIR}/rooms" ]; then + ROOMS="$(cat "${CLAIMING_DIR}/rooms")" +fi + +for arg in "$@" +do + case $arg in + -token=*) TOKEN=${arg:7} ;; + -url=*) [ -n "${arg:5}" ] && URL_BASE=${arg:5} ;; + -id=*) ID=$(echo "${arg:4}" | tr '[:upper:]' '[:lower:]');; + -rooms=*) ROOMS=${arg:7} ;; + -hostname=*) HOSTNAME=${arg:10} ;; + -verbose) VERBOSE=1 ;; + -insecure) INSECURE=1 ;; + -proxy=*) PROXY=${arg:7} ;; + -noproxy) NOPROXY=yes ;; + -noreload) RELOAD=0 ;; + -user=*) NETDATA_USER=${arg:6} ;; + -daemon-not-running) NETDATA_RUNNING=0 ;; + *) echo >&2 "Unknown argument ${arg}" + exit 1 ;; + esac + shift 1 +done + +if [ "$EUID" != "0" ] && [ "$(whoami)" != "$NETDATA_USER" ]; then + echo >&2 "This script must be run by the $NETDATA_USER user account" + exit 6 +fi + +# if curl not installed give warning SOCKS can't be used +if [[ "${URLTOOL}" != "curl" && "${PROXY:0:5}" = socks ]] ; then + echo >&2 "wget doesn't support SOCKS. Please install curl or disable SOCKS proxy." + exit 1 +fi + +echo >&2 "Token: ****************" +echo >&2 "Base URL: $URL_BASE" +echo >&2 "Id: $ID" +echo >&2 "Rooms: $ROOMS" +echo >&2 "Hostname: $HOSTNAME" +echo >&2 "Proxy: $PROXY" +echo >&2 "Netdata user: $NETDATA_USER" + +# create the claiming directory for this user +if [ ! -d "${CLAIMING_DIR}" ] ; then + mkdir -p "${CLAIMING_DIR}" && chmod 0770 "${CLAIMING_DIR}" +# shellcheck disable=SC2181 + if [ $? -ne 0 ] ; then + echo >&2 "Failed to create claiming working directory ${CLAIMING_DIR}" + exit 2 + fi +fi +if [ ! -w "${CLAIMING_DIR}" ] ; then + echo >&2 "No write permission in claiming working directory ${CLAIMING_DIR}" + exit 2 +fi + +if [ ! -f "${CLAIMING_DIR}/private.pem" ] ; then + echo >&2 "Generating private/public key for the first time." + if ! openssl genrsa -out "${CLAIMING_DIR}/private.pem" 2048 ; then + echo >&2 "Failed to generate private/public key pair." + exit 2 + fi +fi +if [ ! -f "${CLAIMING_DIR}/public.pem" ] ; then + echo >&2 "Extracting public key from private key." + if ! openssl rsa -in "${CLAIMING_DIR}/private.pem" -outform PEM -pubout -out "${CLAIMING_DIR}/public.pem" ; then + echo >&2 "Failed to extract public key." + exit 2 + fi +fi + +TARGET_URL="${URL_BASE%/}/api/v1/spaces/nodes/${ID}" +# shellcheck disable=SC2002 +KEY=$(cat "${CLAIMING_DIR}/public.pem" | tr '\n' '!' | sed -e 's/!/\\n/g') +# shellcheck disable=SC2001 +[ -n "$ROOMS" ] && ROOMS=\"$(echo "$ROOMS" | sed s'/,/", "/g')\" + +cat > "${CLAIMING_DIR}/tmpin.txt" <"${CLAIMING_DIR}/tmpout.txt" + else + eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt" 2>&1 + fi + URLCOMMAND_EXIT_CODE=$? + if [ "${URLTOOL}" = "wget" ] && [ "${URLCOMMAND_EXIT_CODE}" -eq 8 ] ; then + # We consider the server issuing an error response a successful attempt at communicating + URLCOMMAND_EXIT_CODE=0 + fi + + # Check if URLCOMMAND connected and received reply + if [ "${URLCOMMAND_EXIT_CODE}" -ne 0 ] ; then + echo >&2 "Failed to connect to ${URL_BASE}, return code ${URLCOMMAND_EXIT_CODE}" + rm -f "${CLAIMING_DIR}/tmpout.txt" + return 4 + fi + + if [ "${VERBOSE}" == 1 ] ; then + echo "Response from server:" + cat "${CLAIMING_DIR}/tmpout.txt" + fi + + return 0 +} + +for i in {1..3} +do + if attempt_contact ; then + echo "Connection attempt $i successful" + break + fi + echo "Connection attempt $i failed. Retry in ${i}s." + if [ "$i" -eq 5 ] ; then + rm -f "${CLAIMING_DIR}/tmpin.txt" + exit 4 + fi + sleep "$i" +done + +rm -f "${CLAIMING_DIR}/tmpin.txt" + +ERROR_KEY=$(grep "\"errorMsgKey\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "errorMsgKey\":\"" '{print $2}' | awk -F "\"" '{print $1}') +case ${ERROR_KEY} in + "ErrInvalidNodeID") EXIT_CODE=8 ;; + "ErrInvalidNodeName") EXIT_CODE=9 ;; + "ErrInvalidRoomID") EXIT_CODE=10 ;; + "ErrInvalidPublicKey") EXIT_CODE=11 ;; + "ErrForbidden") EXIT_CODE=12 ;; + "ErrAlreadyClaimed") EXIT_CODE=13 ;; + "ErrProcessingClaim") EXIT_CODE=14 ;; + "ErrInternalServerError") EXIT_CODE=15 ;; + "ErrGatewayTimeout") EXIT_CODE=16 ;; + "ErrServiceUnavailable") EXIT_CODE=17 ;; + *) EXIT_CODE=7 ;; +esac + +HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | tail -1 | awk -F " " '{print $2}') +if [ "${HTTP_STATUS_CODE}" = "204" ] ; then + EXIT_CODE=0 +fi + +if [ "${HTTP_STATUS_CODE}" = "204" ] || [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then + rm -f "${CLAIMING_DIR}/tmpout.txt" + if [ "${HTTP_STATUS_CODE}" = "204" ] ; then + echo -n "${ID}" >"${CLAIMING_DIR}/claimed_id" || (echo >&2 "Claiming failed"; set -e; exit 2) + fi + rm -f "${CLAIMING_DIR}/token" || (echo >&2 "Claiming failed"; set -e; exit 2) + + # Rewrite the cloud.conf on the disk + cat > "$CLAIMING_DIR/cloud.conf" <&2 "Claiming failed"; set -e; exit 2) + fi + if [ "${RELOAD}" == "0" ] ; then + exit $EXIT_CODE + fi + + if [ -z "${PROXY}" ]; then + PROXYMSG="" + else + PROXYMSG="You have attempted to claim this node through a proxy - please update your the proxy setting in your netdata.conf to ${PROXY}. " + fi + # Update cloud.conf in the agent memory + @sbindir_POST@/netdatacli write-config 'cloud|global|enabled|yes' && \ + @sbindir_POST@/netdatacli write-config "cloud|global|cloud base url|$URL_BASE" && \ + @sbindir_POST@/netdatacli reload-claiming-state && \ + if [ "${HTTP_STATUS_CODE}" = "204" ] ; then + echo >&2 "${PROXYMSG}Node was successfully claimed." + else + echo >&2 "The agent cloud base url is set to the url provided." + echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored." + echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\"" + fi && exit $EXIT_CODE + + if [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then + echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored." + echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\"" + exit $EXIT_CODE + fi + echo >&2 "${PROXYMSG}The claim was successful but the agent could not be notified ($?)- it requires a restart to connect to the cloud." + [ "$NETDATA_RUNNING" -eq 0 ] && exit 0 || exit 5 +fi + +echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\"" +if [ "${VERBOSE}" == 1 ]; then + echo >&2 "Error key was:\"${ERROR_KEYS[$EXIT_CODE]}\"" +fi +rm -f "${CLAIMING_DIR}/tmpout.txt" +exit $EXIT_CODE diff --git a/cli/Makefile.am b/cli/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/cli/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..afd5651 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,40 @@ + + +# Netdata CLI + +You can see the commands `netdatacli` supports by executing it with `netdatacli` and entering `help` in +standard input. All commands are given as standard input to `netdatacli`. + +The commands that a running netdata agent can execute are the following: + +```sh +The commands are (arguments are in brackets): +help + Show this help menu. +reload-health + Reload health configuration. +reload-labels + Reload all labels. +save-database + Save internal DB to disk for database mode save. +reopen-logs + Close and reopen log files. +shutdown-agent + Cleanup and exit the netdata agent. +fatal-agent + Log the state and halt the netdata agent. +reload-claiming-state + Reload agent claiming state from disk. +ping + Return with 'pong' if agent is alive. +aclk-state [json] + Returns current state of ACLK and Cloud connection. (optionally in json) +``` + +Those commands are the same that can be sent to netdata via [signals](/daemon/README.md#command-line-options). + + diff --git a/cli/cli.c b/cli/cli.c new file mode 100644 index 0000000..c14653f --- /dev/null +++ b/cli/cli.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "cli.h" +#include "libnetdata/required_dummies.h" + +static uv_pipe_t client_pipe; +static uv_write_t write_req; +static uv_shutdown_t shutdown_req; + +static char command_string[MAX_COMMAND_LENGTH]; +static unsigned command_string_size; + +static char response_string[MAX_COMMAND_LENGTH]; +static unsigned response_string_size; + +static int exit_status; + +struct command_context { + uv_work_t work; + uv_stream_t *client; + cmd_t idx; + char *args; + char *message; + cmd_status_t status; +}; + +static void parse_command_reply(void) +{ + FILE *stream = NULL; + char *pos; + int syntax_error = 0; + + for (pos = response_string ; + pos < response_string + response_string_size && !syntax_error ; + ++pos) { + /* Skip white-space characters */ + for ( ; isspace(*pos) && ('\0' != *pos); ++pos) {;} + + if ('\0' == *pos) + continue; + + switch (*pos) { + case CMD_PREFIX_EXIT_CODE: + exit_status = atoi(++pos); + break; + case CMD_PREFIX_INFO: + stream = stdout; + break; + case CMD_PREFIX_ERROR: + stream = stderr; + break; + default: + syntax_error = 1; + fprintf(stderr, "Syntax error, failed to parse command response.\n"); + break; + } + if (stream) { + fprintf(stream, "%s\n", ++pos); + pos += strlen(pos); + stream = NULL; + } + } +} + +static void pipe_read_cb(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf) +{ + if (0 == nread) { + fprintf(stderr, "%s: Zero bytes read by command pipe.\n", __func__); + } else if (UV_EOF == nread) { +// fprintf(stderr, "EOF found in command pipe.\n"); + parse_command_reply(); + } else if (nread < 0) { + fprintf(stderr, "%s: %s\n", __func__, uv_strerror(nread)); + } + + if (nread < 0) { /* stop stream due to EOF or error */ + (void)uv_read_stop((uv_stream_t *)client); + } else if (nread) { + size_t to_copy; + + to_copy = MIN((unsigned int) nread, MAX_COMMAND_LENGTH - 1 - response_string_size); + memcpy(response_string + response_string_size, buf->base, to_copy); + response_string_size += to_copy; + response_string[response_string_size] = '\0'; + } + if (buf && buf->len) { + free(buf->base); + } +} + +static void alloc_cb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf) +{ + (void)handle; + + buf->base = malloc(suggested_size); + buf->len = suggested_size; +} + +static void shutdown_cb(uv_shutdown_t* req, int status) +{ + int ret; + + (void)req; + (void)status; + + /* receive reply */ + response_string_size = 0; + response_string[0] = '\0'; + + ret = uv_read_start((uv_stream_t *)&client_pipe, alloc_cb, pipe_read_cb); + if (ret) { + fprintf(stderr, "uv_read_start(): %s\n", uv_strerror(ret)); + uv_close((uv_handle_t *)&client_pipe, NULL); + return; + } + +} + +static void pipe_write_cb(uv_write_t* req, int status) +{ + int ret; + + (void)req; + (void)status; + + ret = uv_shutdown(&shutdown_req, (uv_stream_t *)&client_pipe, shutdown_cb); + if (ret) { + fprintf(stderr, "uv_shutdown(): %s\n", uv_strerror(ret)); + uv_close((uv_handle_t *)&client_pipe, NULL); + return; + } +} + +static void connect_cb(uv_connect_t* req, int status) +{ + int ret; + uv_buf_t write_buf; + char *s; + + (void)req; + if (status) { + fprintf(stderr, "uv_pipe_connect(): %s\n", uv_strerror(status)); + fprintf(stderr, "Make sure the netdata service is running.\n"); + exit(-1); + } + if (0 == command_string_size) { + s = fgets(command_string, MAX_COMMAND_LENGTH, stdin); + } + (void)s; /* We don't need input to communicate with the server */ + command_string_size = strlen(command_string); + + write_req.data = &client_pipe; + write_buf.base = command_string; + write_buf.len = command_string_size; + ret = uv_write(&write_req, (uv_stream_t *)&client_pipe, &write_buf, 1, pipe_write_cb); + if (ret) { + fprintf(stderr, "uv_write(): %s\n", uv_strerror(ret)); + } +// fprintf(stderr, "COMMAND: Sending command: \"%s\"\n", command_string); +} + +int main(int argc, char **argv) +{ + int ret, i; + static uv_loop_t* loop; + uv_connect_t req; + + exit_status = -1; /* default status for when there is no command response from server */ + + loop = uv_default_loop(); + + ret = uv_pipe_init(loop, &client_pipe, 1); + if (ret) { + fprintf(stderr, "uv_pipe_init(): %s\n", uv_strerror(ret)); + return exit_status; + } + + command_string_size = 0; + command_string[0] = '\0'; + for (i = 1 ; i < argc ; ++i) { + size_t to_copy; + + to_copy = MIN(strlen(argv[i]), MAX_COMMAND_LENGTH - 1 - command_string_size); + strncpyz(command_string + command_string_size, argv[i], to_copy); + command_string_size += to_copy; + + if (command_string_size < MAX_COMMAND_LENGTH - 1) { + command_string[command_string_size++] = ' '; + } else { + break; + } + } + + uv_pipe_connect(&req, &client_pipe, PIPENAME, connect_cb); + + uv_run(loop, UV_RUN_DEFAULT); + + uv_close((uv_handle_t *)&client_pipe, NULL); + + return exit_status; +} diff --git a/cli/cli.h b/cli/cli.h new file mode 100644 index 0000000..056e998 --- /dev/null +++ b/cli/cli.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CLI_H +#define NETDATA_CLI_H 1 + +#include "daemon/common.h" + +#endif //NETDATA_CLI_H diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md new file mode 100644 index 0000000..7f66076 --- /dev/null +++ b/collectors/COLLECTORS.md @@ -0,0 +1,518 @@ + + +# Supported collectors list + +Netdata uses collectors to help you gather metrics from your favorite applications and services and view them in +real-time, interactive charts. The following list includes collectors for both external services/applications and +internal system metrics. + +Learn more about [how collectors work](/docs/collect/how-collectors-work.md), and then learn how to [enable or +configure](/docs/collect/enable-configure.md) any of the below collectors using the same process. + +Some collectors have both Go and Python versions as we continue our effort to migrate all collectors to Go. In these +cases, _Netdata always prioritizes the Go version_, and we highly recommend you use the Go versions for the best +experience. + +If you want to use a Python version of a collector, you need to explicitly [disable the Go +version](/docs/collect/enable-configure.md), and enable the Python version. Netdata then skips the Go version and +attempts to load the Python version and its accompanying configuration file. + +If you don't see the app/service you'd like to monitor in this list: + +- Check out our [GitHub issues](https://github.com/netdata/netdata/issues). Use the search bar to look for previous + discussions about that collector—we may be looking for assistance from users such as yourself! +- If you don't see the collector there, you can make + a [feature request](https://github.com/netdata/netdata/issues/new/choose) on GitHub. +- If you have basic software development skills, you can add your own plugin + in [Go](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin#how-to-develop-a-collector) + or [Python](https://learn.netdata.cloud/guides/python-collector) + +Supported Collectors List: + +- [Service and application collectors](#service-and-application-collectors) + - [Generic](#generic) + - [APM (application performance monitoring)](#apm-application-performance-monitoring) + - [Containers and VMs](#containers-and-vms) + - [Data stores](#data-stores) + - [Distributed computing](#distributed-computing) + - [Email](#email) + - [Kubernetes](#kubernetes) + - [Logs](#logs) + - [Messaging](#messaging) + - [Network](#network) + - [Provisioning](#provisioning) + - [Remote devices](#remote-devices) + - [Search](#search) + - [Storage](#storage) + - [Web](#web) +- [System collectors](#system-collectors) + - [Applications](#applications) + - [Disks and filesystems](#disks-and-filesystems) + - [eBPF](#ebpf) + - [Hardware](#hardware) + - [Memory](#memory) + - [Networks](#networks) + - [Operating systems](#operating-systems) + - [Processes](#processes) + - [Resources](#resources) + - [Users](#users) +- [Netdata collectors](#netdata-collectors) +- [Orchestrators](#orchestrators) +- [Third-party collectors](#third-party-collectors) +- [Etc](#etc) + +## Service and application collectors + +The Netdata Agent auto-detects and collects metrics from all of the services and applications below. You can also +configure any of these collectors according to your setup and infrastructure. + +### Generic + +- [Prometheus endpoints](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/prometheus): Gathers + metrics from any number of Prometheus endpoints, with support to autodetect more than 600 services and applications. +- [Pandas](https://learn.netdata.cloud/docs/agent/collectors/python.d.plugin/pandas): A Python collector that gathers + metrics from a [pandas](https://pandas.pydata.org/) dataframe. Pandas is a high level data processing library in + Python that can read various formats of data from local files or web endpoints. Custom processing and transformation + logic can also be expressed as part of the collector configuration. + +### APM (application performance monitoring) + +- [Go applications](/collectors/python.d.plugin/go_expvar/README.md): Monitor any Go application that exposes its + metrics with the `expvar` package from the Go standard library. +- [Java Spring Boot 2 + applications](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/springboot2/) (Go version): + Monitor running Java Spring Boot 2 applications that expose their metrics with the use of the Spring Boot Actuator. +- [Java Spring Boot 2 applications](/collectors/python.d.plugin/springboot/README.md) (Python version): Monitor + running Java Spring Boot applications that expose their metrics with the use of the Spring Boot Actuator. +- [statsd](/collectors/statsd.plugin/README.md): Implement a high performance `statsd` server for Netdata. +- [phpDaemon](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/phpdaemon/): Collect worker + statistics (total, active, idle), and uptime for web and network applications. +- [uWSGI](/collectors/python.d.plugin/uwsgi/README.md): Monitor performance metrics exposed by the uWSGI Stats + Server. + +### Containers and VMs + +- [Docker containers](/collectors/cgroups.plugin/README.md): Monitor the health and performance of individual Docker + containers using the cgroups collector plugin. +- [DockerD](/collectors/python.d.plugin/dockerd/README.md): Collect container health statistics. +- [Docker Engine](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/docker_engine/): Collect + runtime statistics from the `docker` daemon using the `metrics-address` feature. +- [Docker Hub](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/dockerhub/): Collect statistics + about Docker repositories, such as pulls, starts, status, time since last update, and more. +- [Libvirt](/collectors/cgroups.plugin/README.md): Monitor the health and performance of individual Libvirt containers + using the cgroups collector plugin. +- [LXC](/collectors/cgroups.plugin/README.md): Monitor the health and performance of individual LXC containers using + the cgroups collector plugin. +- [LXD](/collectors/cgroups.plugin/README.md): Monitor the health and performance of individual LXD containers using + the cgroups collector plugin. +- [systemd-nspawn](/collectors/cgroups.plugin/README.md): Monitor the health and performance of individual + systemd-nspawn containers using the cgroups collector plugin. +- [vCenter Server Appliance](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/vcsa/): Monitor + appliance system, components, and software update health statuses via the Health API. +- [vSphere](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/vsphere/): Collect host and virtual + machine performance metrics. +- [Xen/XCP-ng](/collectors/xenstat.plugin/README.md): Collect XenServer and XCP-ng metrics using `libxenstat`. + +### Data stores + +- [CockroachDB](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/cockroachdb/): Monitor various + database components using `_status/vars` endpoint. +- [Consul](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/consul/): Capture service and unbound + checks status (passing, warning, critical, maintenance). +- [Couchbase](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/couchbase/): Gather per-bucket + metrics from any number of instances of the distributed JSON document database. +- [CouchDB](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/couchdb): Monitor database health and + performance metrics + (reads/writes, HTTP traffic, replication status, etc). +- [MongoDB](/collectors/python.d.plugin/mongodb/README.md): Collect memory-caching system performance metrics and + reads the server's response to `stats` command (stats interface). +- [MySQL](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/mysql/): Collect database global, + replication and per user statistics. +- [OracleDB](/collectors/python.d.plugin/oracledb/README.md): Monitor database performance and health metrics. +- [Pika](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/pika/): Gather metric, such as clients, + memory usage, queries, and more from the Redis interface-compatible database. +- [Postgres](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/postgres): Collect database health + and performance metrics. +- [ProxySQL](/collectors/python.d.plugin/proxysql/README.md): Monitor database backend and frontend performance + metrics. +- [Redis](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/redis/): Monitor status from any + number of database instances by reading the server's response to the `INFO ALL` command. +- [RethinkDB](/collectors/python.d.plugin/rethinkdbs/README.md): Collect database server and cluster statistics. +- [Riak KV](/collectors/python.d.plugin/riakkv/README.md): Collect database stats from the `/stats` endpoint. +- [Zookeeper](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/zookeeper/): Monitor application + health metrics reading the server's response to the `mntr` command. +- [Memcached](/collectors/python.d.plugin/memcached/README.md): Collect memory-caching system performance metrics. + +### Distributed computing + +- [BOINC](/collectors/python.d.plugin/boinc/README.md): Monitor the total number of tasks, open tasks, and task + states for the distributed computing client. +- [Gearman](/collectors/python.d.plugin/gearman/README.md): Collect application summary (queued, running) and per-job + worker statistics (queued, idle, running). + +### Email + +- [Dovecot](/collectors/python.d.plugin/dovecot/README.md): Collect email server performance metrics by reading the + server's response to the `EXPORT global` command. +- [EXIM](/collectors/python.d.plugin/exim/README.md): Uses the `exim` tool to monitor the queue length of a + mail/message transfer agent (MTA). +- [Postfix](/collectors/python.d.plugin/postfix/README.md): Uses the `postqueue` tool to monitor the queue length of a + mail/message transfer agent (MTA). + +### Kubernetes + +- [Kubelet](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/k8s_kubelet/): Monitor one or more + instances of the Kubelet agent and collects metrics on number of pods/containers running, volume of Docker + operations, and more. +- [kube-proxy](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/k8s_kubeproxy/): Collect + metrics, such as syncing proxy rules and REST client requests, from one or more instances of `kube-proxy`. +- [Service discovery](https://github.com/netdata/agent-service-discovery/): Find what services are running on a + cluster's pods, converts that into configuration files, and exports them so they can be monitored by Netdata. + +### Logs + +- [Fluentd](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/fluentd/): Gather application + plugins metrics from an endpoint provided by `in_monitor plugin`. +- [Logstash](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/logstash/): Monitor JVM threads, + memory usage, garbage collection statistics, and more. +- [OpenVPN status logs](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/openvpn_status_log): Parse + server log files and provide summary (client, traffic) metrics. +- [Squid web server logs](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/squidlog/): Tail Squid + access logs to return the volume of requests, types of requests, bandwidth, and much more. +- [Web server logs (Go version for Apache, + NGINX)](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/weblog/): Tail access logs and provide + very detailed web server performance statistics. This module is able to parse 200k+ rows in less than half a second. +- [Web server logs (Apache, NGINX)](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/weblog): Tail + access log + file and collect web server/caching proxy metrics. + +### Messaging + +- [ActiveMQ](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/activemq/): Collect message broker + queues and topics statistics using the ActiveMQ Console API. +- [Beanstalk](/collectors/python.d.plugin/beanstalk/README.md): Collect server and tube-level statistics, such as CPU + usage, jobs rates, commands, and more. +- [Pulsar](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/pulsar/): Collect summary, + namespaces, and topics performance statistics. +- [RabbitMQ (Go)](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/rabbitmq/): Collect message + broker overview, system and per virtual host metrics. +- [RabbitMQ (Python)](/collectors/python.d.plugin/rabbitmq/README.md): Collect message broker global and per virtual + host metrics. +- [VerneMQ](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/vernemq/): Monitor MQTT broker + health and performance metrics. It collects all available info for both MQTTv3 and v5 communication + +### Network + +- [Bind 9](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/bind/): Collect nameserver summary + performance statistics via a web interface (`statistics-channels` feature). +- [Chrony](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/chrony): Monitor the precision and + statistics of a local `chronyd` server. +- [CoreDNS](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/coredns/): Measure DNS query round + trip time. +- [Dnsmasq](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/dnsmasq_dhcp/): Automatically + detects all configured `Dnsmasq` DHCP ranges and Monitor their utilization. +- [DNSdist](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/dnsdist/): Collect + load-balancer performance and health metrics. +- [Dnsmasq DNS Forwarder](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/dnsmasq/): Gather + queries, entries, operations, and events for the lightweight DNS forwarder. +- [DNS Query Time](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/dnsquery/): Monitor the round + trip time for DNS queries in milliseconds. +- [Freeradius](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/freeradius/): Collect + server authentication and accounting statistics from the `status server`. +- [Libreswan](/collectors/charts.d.plugin/libreswan/README.md): Collect bytes-in, bytes-out, and uptime metrics. +- [Icecast](/collectors/python.d.plugin/icecast/README.md): Monitor the number of listeners for active sources. +- [ISC Bind (RDNC)](/collectors/python.d.plugin/bind_rndc/README.md): Collect nameserver summary performance + statistics using the `rndc` tool. +- [ISC DHCP](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/isc_dhcpd): Reads a + `dhcpd.leases` file and collects metrics on total active leases, pool active leases, and pool utilization. +- [OpenLDAP](/collectors/python.d.plugin/openldap/README.md): Provides statistics information from the OpenLDAP + (`slapd`) server. +- [NSD](/collectors/python.d.plugin/nsd/README.md): Monitor nameserver performance metrics using the `nsd-control` + tool. +- [NTP daemon](/collectors/python.d.plugin/ntpd/README.md): Monitor the system variables of the local `ntpd` daemon + (optionally including variables of the polled peers) using the NTP Control Message Protocol via a UDP socket. +- [OpenSIPS](/collectors/charts.d.plugin/opensips/README.md): Collect server health and performance metrics using the + `opensipsctl` tool. +- [OpenVPN](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/openvpn/): Gather server summary + (client, traffic) and per user metrics (traffic, connection time) stats using `management-interface`. +- [Pi-hole](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/pihole/): Monitor basic (DNS + queries, clients, blocklist) and extended (top clients, top permitted, and blocked domains) statistics using the PHP + API. +- [PowerDNS Authoritative Server](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/powerdns): + Monitor one or more instances of the nameserver software to collect questions, events, and latency metrics. +- [PowerDNS Recursor](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/powerdns_recursor): + Gather incoming/outgoing questions, drops, timeouts, and cache usage from any number of DNS recursor instances. +- [RetroShare](/collectors/python.d.plugin/retroshare/README.md): Monitor application bandwidth, peers, and DHT + metrics. +- [Tor](/collectors/python.d.plugin/tor/README.md): Capture traffic usage statistics using the Tor control port. +- [Unbound](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/unbound/): Collect DNS resolver + summary and extended system and per thread metrics via the `remote-control` interface. + +### Provisioning + +- [Puppet](/collectors/python.d.plugin/puppet/README.md): Monitor the status of Puppet Server and Puppet DB. + +### Remote devices + +- [AM2320](/collectors/python.d.plugin/am2320/README.md): Monitor sensor temperature and humidity. +- [Access point](/collectors/charts.d.plugin/ap/README.md): Monitor client, traffic and signal metrics using the `aw` + tool. +- [APC UPS](/collectors/charts.d.plugin/apcupsd/README.md): Capture status information using the `apcaccess` tool. +- [Energi Core](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/energid): Monitor + blockchain indexes, memory usage, network usage, and transactions of wallet instances. +- [UPS/PDU](/collectors/charts.d.plugin/nut/README.md): Read the status of UPS/PDU devices using the `upsc` tool. +- [SNMP devices](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/snmp): Gather data using the SNMP + protocol. +- [1-Wire sensors](/collectors/python.d.plugin/w1sensor/README.md): Monitor sensor temperature. + +### Search + +- [Elasticsearch](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/elasticsearch): Collect + dozens of metrics on search engine performance from local nodes and local indices. Includes cluster health and + statistics. +- [Solr](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/solr/): Collect application search + requests, search errors, update requests, and update errors statistics. + +### Storage + +- [Ceph](/collectors/python.d.plugin/ceph/README.md): Monitor the Ceph cluster usage and server data consumption. +- [HDFS](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/hdfs/): Monitor health and performance + metrics for filesystem datanodes and namenodes. +- [IPFS](/collectors/python.d.plugin/ipfs/README.md): Collect file system bandwidth, peers, and repo metrics. +- [Scaleio](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/scaleio/): Monitor storage system, + storage pools, and SDCS health and performance metrics via VxFlex OS Gateway API. +- [Samba](/collectors/python.d.plugin/samba/README.md): Collect file sharing metrics using the `smbstatus` tool. + +### Web + +- [Apache](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/apache/): Collect Apache web + server performance metrics via the `server-status?auto` endpoint. +- [HAProxy](/collectors/python.d.plugin/haproxy/README.md): Collect frontend, backend, and health metrics. +- [HTTP endpoints](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/httpcheck/): Monitor + any HTTP endpoint's availability and response time. +- [Lighttpd](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/lighttpd/): Collect web server + performance metrics using the `server-status?auto` endpoint. +- [Lighttpd2](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/lighttpd2/): Collect web server + performance metrics using the `server-status?format=plain` endpoint. +- [Litespeed](/collectors/python.d.plugin/litespeed/README.md): Collect web server data (network, connection, + requests, cache) by reading `.rtreport*` files. +- [Nginx](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/nginx/): Monitor web server + status information by gathering metrics via `ngx_http_stub_status_module`. +- [Nginx VTS](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/nginxvts/): Gathers metrics from + any Nginx deployment with the _virtual host traffic status module_ enabled, including metrics on uptime, memory + usage, and cache, and more. +- [PHP-FPM](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/phpfpm/): Collect application + summary and processes health metrics by scraping the status page (`/status?full`). +- [TCP endpoints](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/portcheck/): Monitor any + TCP endpoint's availability and response time. +- [Spigot Minecraft servers](/collectors/python.d.plugin/spigotmc/README.md): Monitor average ticket rate and number + of users. +- [Squid](/collectors/python.d.plugin/squid/README.md): Monitor client and server bandwidth/requests by gathering + data from the Cache Manager component. +- [Tengine](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/tengine/): Monitor web server + statistics using information provided by `ngx_http_reqstat_module`. +- [Tomcat](/collectors/python.d.plugin/tomcat/README.md): Collect web server performance metrics from the Manager App + (`/manager/status?XML=true`). +- [Traefik](/collectors/python.d.plugin/traefik/README.md): Uses Traefik's Health API to provide statistics. +- [Varnish](/collectors/python.d.plugin/varnish/README.md): Provides HTTP accelerator global, backends (VBE), and + disks (SMF) statistics using the `varnishstat` tool. +- [x509 check](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/x509check/): Monitor certificate + expiration time. +- [Whois domain expiry](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/whoisquery/): Checks the + remaining time until a given domain is expired. + +## System collectors + +The Netdata Agent can collect these system- and hardware-level metrics using a variety of collectors, some of which +(such as `proc.plugin`) collect multiple types of metrics simultaneously. + +### Applications + +- [Fail2ban](/collectors/python.d.plugin/fail2ban/README.md): Parses configuration files to detect all jails, then + uses log files to report ban rates and volume of banned IPs. +- [Monit](/collectors/python.d.plugin/monit/README.md): Monitor statuses of targets (service-checks) using the XML + stats interface. +- [WMI (Windows Management Instrumentation) + exporter](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/wmi/): Collect CPU, memory, + network, disk, OS, system, and log-in metrics scraping `wmi_exporter`. + +### Disks and filesystems + +- [BCACHE](/collectors/proc.plugin/README.md): Monitor BCACHE statistics with the the `proc.plugin` collector. +- [Block devices](/collectors/proc.plugin/README.md): Gather metrics about the health and performance of block + devices using the the `proc.plugin` collector. +- [Btrfs](/collectors/proc.plugin/README.md): Monitors Btrfs filesystems with the the `proc.plugin` collector. +- [Device mapper](/collectors/proc.plugin/README.md): Gather metrics about the Linux device mapper with the proc + collector. +- [Disk space](/collectors/diskspace.plugin/README.md): Collect disk space usage metrics on Linux mount points. +- [Clock synchronization](/collectors/timex.plugin/README.md): Collect the system clock synchronization status on Linux. +- [Files and directories](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/filecheck): Gather + metrics about the existence, modification time, and size of files or directories. +- [ioping.plugin](/collectors/ioping.plugin/README.md): Measure disk read/write latency. +- [NFS file servers and clients](/collectors/proc.plugin/README.md): Gather operations, utilization, and space usage + using the the `proc.plugin` collector. +- [RAID arrays](/collectors/proc.plugin/README.md): Collect health, disk status, operation status, and more with the + the `proc.plugin` collector. +- [Veritas Volume Manager](/collectors/proc.plugin/README.md): Gather metrics about the Veritas Volume Manager (VVM). +- [ZFS](/collectors/proc.plugin/README.md): Monitor bandwidth and utilization of ZFS disks/partitions using the proc + collector. + +### eBPF + +- [Files](/collectors/ebpf.plugin/README.md): Provides information about how often a system calls kernel + functions related to file descriptors using the eBPF collector. +- [Virtual file system (VFS)](/collectors/ebpf.plugin/README.md): Monitor IO, errors, deleted objects, and + more for kernel virtual file systems (VFS) using the eBPF collector. +- [Processes](/collectors/ebpf.plugin/README.md): Monitor threads, task exits, and errors using the eBPF collector. + +### Hardware + +- [Adaptec RAID](/collectors/python.d.plugin/adaptec_raid/README.md): Monitor logical and physical devices health + metrics using the `arcconf` tool. +- [CUPS](/collectors/cups.plugin/README.md): Monitor CUPS. +- [FreeIPMI](/collectors/freeipmi.plugin/README.md): Uses `libipmimonitoring-dev` or `libipmimonitoring-devel` to + monitor the number of sensors, temperatures, voltages, currents, and more. +- [Hard drive temperature](/collectors/python.d.plugin/hddtemp/README.md): Monitor the temperature of storage + devices. +- [HP Smart Storage Arrays](/collectors/python.d.plugin/hpssa/README.md): Monitor controller, cache module, logical + and physical drive state, and temperature using the `ssacli` tool. +- [MegaRAID controllers](/collectors/python.d.plugin/megacli/README.md): Collect adapter, physical drives, and + battery stats using the `megacli` tool. +- [NVIDIA GPU](/collectors/python.d.plugin/nvidia_smi/README.md): Monitor performance metrics (memory usage, fan + speed, pcie bandwidth utilization, temperature, and more) using the `nvidia-smi` tool. +- [Sensors](/collectors/python.d.plugin/sensors/README.md): Reads system sensors information (temperature, voltage, + electric current, power, and more) from `/sys/devices/`. +- [S.M.A.R.T](/collectors/python.d.plugin/smartd_log/README.md): Reads SMART Disk Monitoring daemon logs. + +### Memory + +- [Available memory](/collectors/proc.plugin/README.md): Tracks changes in available RAM using the the `proc.plugin` + collector. +- [Committed memory](/collectors/proc.plugin/README.md): Monitor committed memory using the `proc.plugin` collector. +- [Huge pages](/collectors/proc.plugin/README.md): Gather metrics about huge pages in Linux and FreeBSD with the + `proc.plugin` collector. +- [KSM](/collectors/proc.plugin/README.md): Measure the amount of merging, savings, and effectiveness using the + `proc.plugin` collector. +- [Numa](/collectors/proc.plugin/README.md): Gather metrics on the number of non-uniform memory access (NUMA) events + every second using the `proc.plugin` collector. +- [Page faults](/collectors/proc.plugin/README.md): Collect the number of memory page faults per second using the + `proc.plugin` collector. +- [RAM](/collectors/proc.plugin/README.md): Collect metrics on system RAM, available RAM, and more using the + `proc.plugin` collector. +- [SLAB](/collectors/slabinfo.plugin/README.md): Collect kernel SLAB details on Linux systems. +- [swap](/collectors/proc.plugin/README.md): Monitor the amount of free and used swap at every second using the + `proc.plugin` collector. +- [Writeback memory](/collectors/proc.plugin/README.md): Collect how much memory is actively being written to disk at + every second using the `proc.plugin` collector. + +### Networks + +- [Access points](/collectors/charts.d.plugin/ap/README.md): Visualizes data related to access points. +- [fping.plugin](fping.plugin/README.md): Measure network latency, jitter and packet loss between the monitored node + and any number of remote network end points. +- [Netfilter](/collectors/nfacct.plugin/README.md): Collect netfilter firewall, connection tracker, and accounting + metrics using `libmnl` and `libnetfilter_acct`. +- [Network stack](/collectors/proc.plugin/README.md): Monitor the networking stack for errors, TCP connection aborts, + bandwidth, and more. +- [Network QoS](/collectors/tc.plugin/README.md): Collect traffic QoS metrics (`tc`) of Linux network interfaces. +- [SYNPROXY](/collectors/proc.plugin/README.md): Monitor entries uses, SYN packets received, TCP cookies, and more. + +### Operating systems + +- [freebsd.plugin](freebsd.plugin/README.md): Collect resource usage and performance data on FreeBSD systems. +- [macOS](/collectors/macos.plugin/README.md): Collect resource usage and performance data on macOS systems. + +### Processes + +- [Applications](/collectors/apps.plugin/README.md): Gather CPU, disk, memory, network, eBPF, and other metrics per + application using the `apps.plugin` collector. +- [systemd](/collectors/cgroups.plugin/README.md): Monitor the CPU and memory usage of systemd services using the + `cgroups.plugin` collector. +- [systemd unit states](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/systemdunits): See the + state (active, inactive, activating, deactivating, failed) of various systemd unit types. +- [System processes](/collectors/proc.plugin/README.md): Collect metrics on system load and total processes running + using `/proc/loadavg` and the `proc.plugin` collector. +- [Uptime](/collectors/proc.plugin/README.md): Monitor the uptime of a system using the `proc.plugin` collector. + +### Resources + +- [CPU frequency](/collectors/proc.plugin/README.md): Monitor CPU frequency, as set by the `cpufreq` kernel module, + using the `proc.plugin` collector. +- [CPU idle](/collectors/proc.plugin/README.md): Measure CPU idle every second using the `proc.plugin` collector. +- [CPU performance](/collectors/perf.plugin/README.md): Collect CPU performance metrics using performance monitoring + units (PMU). +- [CPU throttling](/collectors/proc.plugin/README.md): Gather metrics about thermal throttling using the `/proc/stat` + module and the `proc.plugin` collector. +- [CPU utilization](/collectors/proc.plugin/README.md): Capture CPU utilization, both system-wide and per-core, using + the `/proc/stat` module and the `proc.plugin` collector. +- [Entropy](/collectors/proc.plugin/README.md): Monitor the available entropy on a system using the `proc.plugin` + collector. +- [Interprocess Communication (IPC)](/collectors/proc.plugin/README.md): Monitor IPC semaphores and shared memory + using the `proc.plugin` collector. +- [Interrupts](/collectors/proc.plugin/README.md): Monitor interrupts per second using the `proc.plugin` collector. +- [IdleJitter](/collectors/idlejitter.plugin/README.md): Measure CPU latency and jitter on all operating systems. +- [SoftIRQs](/collectors/proc.plugin/README.md): Collect metrics on SoftIRQs, both system-wide and per-core, using the + `proc.plugin` collector. +- [SoftNet](/collectors/proc.plugin/README.md): Capture SoftNet events per second, both system-wide and per-core, + using the `proc.plugin` collector. + +### Users + +- [systemd-logind](/collectors/python.d.plugin/logind/README.md): Monitor active sessions, users, and seats tracked + by `systemd-logind` or `elogind`. +- [User/group usage](/collectors/apps.plugin/README.md): Gather CPU, disk, memory, network, and other metrics per user + and user group using the `apps.plugin` collector. + +## Netdata collectors + +These collectors are recursive in nature, in that they monitor some function of the Netdata Agent itself. Some +collectors are described only in code and associated charts in Netdata dashboards. + +- [ACLK (code only)](https://github.com/netdata/netdata/blob/master/aclk/legacy/aclk_stats.c): View whether a Netdata + Agent is connected to Netdata Cloud via the [ACLK](/aclk/README.md), the volume of queries, process times, and more. +- [Alarms](https://learn.netdata.cloud/docs/agent/collectors/python.d.plugin/alarms): This collector creates an + **Alarms** menu with one line plot showing the alarm states of a Netdata Agent over time. +- [Anomalies](https://learn.netdata.cloud/docs/agent/collectors/python.d.plugin/anomalies): This collector uses the + Python PyOD library to perform unsupervised anomaly detection on your Netdata charts and/or dimensions. +- [Exporting (code only)](https://github.com/netdata/netdata/blob/master/exporting/send_internal_metrics.c): Gather + metrics on CPU utilization for the [exporting engine](/exporting/README.md), and specific metrics for each enabled + exporting connector. +- [Global statistics (code only)](https://github.com/netdata/netdata/blob/master/daemon/global_statistics.c): See + metrics on the CPU utilization, network traffic, volume of web clients, API responses, database engine usage, and + more. + +## Orchestrators + +Plugin orchestrators organize and run many of the above collectors. + +If you're interested in developing a new collector that you'd like to contribute to Netdata, we highly recommend using +the `go.d.plugin`. + +- [go.d.plugin](https://github.com/netdata/go.d.plugin): An orchestrator for data collection modules written in `go`. +- [python.d.plugin](python.d.plugin/README.md): An orchestrator for data collection modules written in `python` v2/v3. +- [charts.d.plugin](charts.d.plugin/README.md): An orchestrator for data collection modules written in `bash` v4+. + +## Third-party collectors + +These collectors are developed and maintained by third parties and, unlike the other collectors, are not installed by +default. To use a third-party collector, visit their GitHub/documentation page and follow their installation procedures. + +- [CyberPower UPS](https://github.com/HawtDogFlvrWtr/netdata_cyberpwrups_plugin): Polls CyberPower UPS data using + PowerPanel® Personal Linux. +- [Logged-in users](https://github.com/veksh/netdata-numsessions): Collect the number of currently logged-on users. +- [nextcloud](https://github.com/arnowelzel/netdata-nextcloud): Monitor Nextcloud servers. +- [nim-netdata-plugin](https://github.com/FedericoCeratto/nim-netdata-plugin): A helper to create native Netdata + plugins using Nim. +- [Nvidia GPUs](https://github.com/coraxx/netdata_nv_plugin): Monitor Nvidia GPUs. +- [Teamspeak 3](https://github.com/coraxx/netdata_ts3_plugin): Pulls active users and bandwidth from TeamSpeak 3 + servers. +- [SSH](https://github.com/Yaser-Amiri/netdata-ssh-module): Monitor failed authentication requests of an SSH server. + +## Etc + +- [charts.d example](charts.d.plugin/example/README.md): An example `charts.d` collector. +- [python.d example](python.d.plugin/example/README.md): An example `python.d` collector. diff --git a/collectors/Makefile.am b/collectors/Makefile.am new file mode 100644 index 0000000..9f8bf52 --- /dev/null +++ b/collectors/Makefile.am @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + plugins.d \ + apps.plugin \ + cgroups.plugin \ + charts.d.plugin \ + cups.plugin \ + diskspace.plugin \ + timex.plugin \ + fping.plugin \ + ioping.plugin \ + freebsd.plugin \ + freeipmi.plugin \ + idlejitter.plugin \ + macos.plugin \ + nfacct.plugin \ + xenstat.plugin \ + perf.plugin \ + proc.plugin \ + python.d.plugin \ + slabinfo.plugin \ + statsd.plugin \ + ebpf.plugin \ + tc.plugin \ + $(NULL) + +usercustompluginsconfigdir=$(configdir)/custom-plugins.d +usergoconfigdir=$(configdir)/go.d + +# Explicitly install directories to avoid permission issues due to umask +install-exec-local: + $(INSTALL) -d $(DESTDIR)$(usercustompluginsconfigdir) + $(INSTALL) -d $(DESTDIR)$(usergoconfigdir) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/README.md b/collectors/README.md new file mode 100644 index 0000000..de46a72 --- /dev/null +++ b/collectors/README.md @@ -0,0 +1,48 @@ + + +# Collecting metrics + +Netdata can collect metrics from hundreds of different sources, be they internal data created by the system itself, or +external data created by services or applications. To see _all_ of the sources Netdata collects from, view our [list of +supported collectors](/collectors/COLLECTORS.md). + +There are two essential points to understand about how collecting metrics works in Netdata: + +- All collectors are **installed by default** with every installation of Netdata. You do not need to install + collectors manually to collect metrics from new sources. +- Upon startup, Netdata will **auto-detect** any application or service that has a + [collector](/collectors/COLLECTORS.md), as long as both the collector and the app/service are configured correctly. + +Most users will want to enable a new Netdata collector for their app/service. For those details, see +our [collectors' configuration reference](/collectors/REFERENCE.md). + +## Take your next steps with collectors + +[Supported collectors list](/collectors/COLLECTORS.md) + +[Collectors configuration reference](/collectors/REFERENCE.md) + +## Guides + +[Monitor Nginx or Apache web server log files with Netdata](/docs/guides/collect-apache-nginx-web-logs.md) + +[Monitor CockroachDB metrics with Netdata](/docs/guides/monitor-cockroachdb.md) + +[Monitor Unbound DNS servers with Netdata](/docs/guides/collect-unbound-metrics.md) + +[Monitor a Hadoop cluster with Netdata](/docs/guides/monitor-hadoop-cluster.md) + +## Related features + +**[Dashboards](/web/README.md)**: Visualize your newly-collect metrics in real-time using Netdata's [built-in +dashboard](/web/gui/README.md). + +**[Exporting](/exporting/README.md)**: Extend our built-in [database engine](/database/engine/README.md), which supports +long-term metrics storage, by archiving metrics to external databases like Graphite, Prometheus, MongoDB, TimescaleDB, and more. +It can export metrics to multiple databases simultaneously. + + diff --git a/collectors/REFERENCE.md b/collectors/REFERENCE.md new file mode 100644 index 0000000..939b189 --- /dev/null +++ b/collectors/REFERENCE.md @@ -0,0 +1,170 @@ + + +# Collectors configuration reference + +Welcome to the collector configuration reference guide. + +This guide contains detailed information about enabling/disabling plugins or modules, in addition a quick reference to +the internal plugins API. + +## Netdata's collector architecture + +Netdata has an intricate system for organizing and managing its collectors. **Collectors** are the processes/programs +that actually gather metrics from various sources. Collectors are organized by **plugins**, which help manage all the +independent processes in a variety of programming languages based on their purpose and performance requirements. +**Modules** are a type of collector, used primarily to connect to external applications, such as an Nginx web server or +MySQL database, among many others. + +For most users, enabling individual collectors for the application/service you're interested in is far more important +than knowing which plugin it uses. See our [collectors list](/collectors/COLLECTORS.md) to see whether your favorite app/service has +a collector, and then read the documentation for that specific collector to figure out how to enable it. + +There are three types of plugins: + +- **Internal** plugins organize collectors that gather metrics from `/proc`, `/sys` and other Linux kernel sources. + They are written in `C`, and run as threads within the Netdata daemon. +- **External** plugins organize collectors that gather metrics from external processes, such as a MySQL database or + Nginx web server. They can be written in any language, and the `netdata` daemon spawns them as long-running + independent processes. They communicate with the daemon via pipes. +- **Plugin orchestrators**, which are external plugins that instead support a number of **modules**. Modules are a + type of collector. We have a few plugin orchestrators available for those who want to develop their own collectors, + but focus most of our efforts on the [Go plugin](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/). + +## Enable, configure, and disable modules + +Most collector modules come with **auto-detection**, configured to work out-of-the-box on popular operating systems with +the default settings. + +However, there are cases that auto-detection fails. Usually, the reason is that the applications to be monitored do not +allow Netdata to connect. In most of the cases, allowing the user `netdata` from `localhost` to connect and collect +metrics, will automatically enable data collection for the application in question (it will require a Netdata restart). + + +## Troubleshoot a collector + +First, navigate to your plugins directory, which is usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case +on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the plugins directory, +switch to the `netdata` user. + +```bash +cd /usr/libexec/netdata/plugins.d/ +sudo su -s /bin/bash netdata +``` + +The next step is based on the collector's orchestrator. You can figure out which orchestrator the collector uses by + +uses either +by viewing the [collectors list](COLLECTORS.md) and referencing the _configuration file_ field. For example, if that +field contains `go.d`, that collector uses the Go orchestrator. + +```bash +# Go orchestrator (go.d.plugin) +./go.d.plugin -d -m + +# Python orchestrator (python.d.plugin) +./python.d.plugin debug trace + +# Bash orchestrator (bash.d.plugin) +./charts.d.plugin debug 1 +``` + +The output from the relevant command will provide valuable troubleshooting information. If you can't figure out how to +enable the collector using the details from this output, feel free to [create an issue on our +GitHub](https://github.com/netdata/netdata/issues/new?assignees=&labels=bug%2Cneeds+triage&template=BUG_REPORT.yml) to get some +help from our collectors experts. + +## Enable and disable plugins + +You can enable or disable individual plugins by opening `netdata.conf` and scrolling down to the `[plugins]` section. +This section features a list of Netdata's plugins, with a boolean setting to enable or disable them. The exception is +`statsd.plugin`, which has its own `[statsd]` section. Your `[plugins]` section should look similar to this: + +```conf +[plugins] + # proc = yes + # diskspace = yes + # timex = yes + # cgroups = yes + # tc = yes + # idlejitter = yes + # enable running new plugins = yes + # check for new plugins every = 60 + # slabinfo = no + # fping = yes + # ioping = yes + # python.d = yes + # go.d = yes + # apps = yes + # perf = yes + # charts.d = yes +``` + +By default, most plugins are enabled, so you don't need to enable them explicitly to use their collectors. To enable or +disable any specific plugin, remove the comment (`#`) and change the boolean setting to `yes` or `no`. + +All **external plugins** are managed by [plugins.d](plugins.d/README.md), which provides additional management options. + +## Internal plugins + +Each of the internal plugins runs as a thread inside the `netdata` daemon. Once this thread has started, the plugin may +spawn additional threads according to its design. + +### Internal plugins API + +The internal data collection API consists of the following calls: + +```c +collect_data() { + // collect data here (one iteration) + + collected_number collected_value = collect_a_value(); + + // give the metrics to Netdata + + static RRDSET *st = NULL; // the chart + static RRDDIM *rd = NULL; // a dimension attached to this chart + + if(unlikely(!st)) { + // we haven't created this chart before + // create it now + st = rrdset_create_localhost( + "type" + , "id" + , "name" + , "family" + , "context" + , "Chart Title" + , "units" + , "plugin-name" + , "module-name" + , priority + , update_every + , chart_type + ); + + // attach a metric to it + rd = rrddim_add(st, "id", "name", multiplier, divider, algorithm); + } + + // give the collected value(s) to the chart + rrddim_set_by_pointer(st, rd, collected_value); + + // signal Netdata we are done with this iteration + rrdset_done(st); +} +``` + +Of course, Netdata has a lot of libraries to help you also in collecting the metrics. The best way to find your way +through this, is to examine what other similar plugins do. + +## External Plugins + +**External plugins** use the API and are managed by [plugins.d](plugins.d/README.md). + +## Write a custom collector + +You can add custom collectors by following the [external plugins documentation](/collectors/plugins.d/README.md). + diff --git a/collectors/all.h b/collectors/all.h new file mode 100644 index 0000000..85a7ac8 --- /dev/null +++ b/collectors/all.h @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ALL_H +#define NETDATA_ALL_H 1 + +#include "daemon/common.h" + +// netdata internal data collection plugins + +#include "plugins.d/plugins_d.h" + +// ---------------------------------------------------------------------------- +// netdata chart priorities + +// This is a work in progress - to scope is to collect here all chart priorities. +// These should be based on the CONTEXT of the charts + the chart id when needed +// - for each SECTION +1000 (or +X000 for big sections) +// - for each FAMILY +100 +// - for each CHART +10 + +#define NETDATA_CHART_PRIO_SYSTEM_CPU 100 +#define NETDATA_CHART_PRIO_SYSTEM_LOAD 100 +#define NETDATA_CHART_PRIO_SYSTEM_IO 150 +#define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151 +#define NETDATA_CHART_PRIO_SYSTEM_RAM 200 +#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201 +#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250 +#define NETDATA_CHART_PRIO_SYSTEM_NET 500 +#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_IP 501 +#define NETDATA_CHART_PRIO_SYSTEM_IPV6 502 +#define NETDATA_CHART_PRIO_SYSTEM_PROCESSES 600 +#define NETDATA_CHART_PRIO_SYSTEM_PROCESS_STATES 601 +#define NETDATA_CHART_PRIO_SYSTEM_FORKS 700 +#define NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES 750 +#define NETDATA_CHART_PRIO_SYSTEM_CTXT 800 +#define NETDATA_CHART_PRIO_SYSTEM_IDLEJITTER 800 +#define NETDATA_CHART_PRIO_SYSTEM_INTR 900 +#define NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS 950 +#define NETDATA_CHART_PRIO_SYSTEM_SOFTNET_STAT 955 +#define NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS 1000 +#define NETDATA_CHART_PRIO_SYSTEM_DEV_INTR 1000 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_SOFT_INTR 1100 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_ENTROPY 1000 +#define NETDATA_CHART_PRIO_SYSTEM_UPTIME 1000 +#define NETDATA_CHART_PRIO_CLOCK_SYNC_STATE 1100 +#define NETDATA_CHART_PRIO_CLOCK_STATUS 1105 +#define NETDATA_CHART_PRIO_CLOCK_SYNC_OFFSET 1110 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_QUEUES 1200 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_MESSAGES 1201 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_SIZE 1202 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES 1203 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SEM_ARRAYS 1204 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SEGS 1205 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SIZE 1206 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_CALLS 1207 +#define NETDATA_CHART_PRIO_SYSTEM_PACKETS 7001 // freebsd only + + +// CPU per core + +#define NETDATA_CHART_PRIO_CPU_PER_CORE 1000 // +1 per core +#define NETDATA_CHART_PRIO_CPU_TEMPERATURE 1050 // freebsd only +#define NETDATA_CHART_PRIO_CPUFREQ_SCALING_CUR_FREQ 5003 // freebsd only +#define NETDATA_CHART_PRIO_CPUIDLE 6000 + +#define NETDATA_CHART_PRIO_CORE_THROTTLING 5001 +#define NETDATA_CHART_PRIO_PACKAGE_THROTTLING 5002 + +// Interrupts per core + +#define NETDATA_CHART_PRIO_INTERRUPTS_PER_CORE 1100 // +1 per core + +// Memory Section - 1xxx + +#define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE 1010 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL 1020 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED 1030 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1040 +#define NETDATA_CHART_PRIO_MEM_KERNEL 1100 +#define NETDATA_CHART_PRIO_MEM_SLAB 1200 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250 +#define NETDATA_CHART_PRIO_MEM_KSM 1300 +#define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS 1301 +#define NETDATA_CHART_PRIO_MEM_KSM_RATIOS 1302 +#define NETDATA_CHART_PRIO_MEM_NUMA 1400 +#define NETDATA_CHART_PRIO_MEM_NUMA_NODES 1410 +#define NETDATA_CHART_PRIO_MEM_PAGEFRAG 1450 +#define NETDATA_CHART_PRIO_MEM_HW 1500 +#define NETDATA_CHART_PRIO_MEM_HW_ECC_CE 1550 +#define NETDATA_CHART_PRIO_MEM_HW_ECC_UE 1560 +#define NETDATA_CHART_PRIO_MEM_ZRAM 1600 +#define NETDATA_CHART_PRIO_MEM_ZRAM_SAVINGS 1601 +#define NETDATA_CHART_PRIO_MEM_ZRAM_RATIO 1602 +#define NETDATA_CHART_PRIO_MEM_ZRAM_EFFICIENCY 1603 + +// Disks + +#define NETDATA_CHART_PRIO_DISK_IO 2000 +#define NETDATA_CHART_PRIO_DISK_OPS 2010 +#define NETDATA_CHART_PRIO_DISK_QOPS 2015 +#define NETDATA_CHART_PRIO_DISK_BACKLOG 2020 +#define NETDATA_CHART_PRIO_DISK_BUSY 2030 +#define NETDATA_CHART_PRIO_DISK_UTIL 2040 +#define NETDATA_CHART_PRIO_DISK_AWAIT 2050 +#define NETDATA_CHART_PRIO_DISK_AVGSZ 2060 +#define NETDATA_CHART_PRIO_DISK_SVCTM 2070 +#define NETDATA_CHART_PRIO_DISK_MOPS 2080 +#define NETDATA_CHART_PRIO_DISK_IOTIME 2090 +#define NETDATA_CHART_PRIO_DISK_LATENCY 2095 +#define NETDATA_CHART_PRIO_BCACHE_CACHE_ALLOC 2120 +#define NETDATA_CHART_PRIO_BCACHE_HIT_RATIO 2120 +#define NETDATA_CHART_PRIO_BCACHE_RATES 2121 +#define NETDATA_CHART_PRIO_BCACHE_SIZE 2122 +#define NETDATA_CHART_PRIO_BCACHE_USAGE 2123 +#define NETDATA_CHART_PRIO_BCACHE_OPS 2124 +#define NETDATA_CHART_PRIO_BCACHE_BYPASS 2125 +#define NETDATA_CHART_PRIO_BCACHE_CACHE_READ_RACES 2126 + +#define NETDATA_CHART_PRIO_DISKSPACE_SPACE 2023 +#define NETDATA_CHART_PRIO_DISKSPACE_INODES 2024 + +// MDSTAT + +#define NETDATA_CHART_PRIO_MDSTAT_HEALTH 2100 +#define NETDATA_CHART_PRIO_MDSTAT_FLUSH 2101 +#define NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT 2105 +#define NETDATA_CHART_PRIO_MDSTAT_DISKS 2106 // 5 charts per raid +#define NETDATA_CHART_PRIO_MDSTAT_MISMATCH 2107 +#define NETDATA_CHART_PRIO_MDSTAT_OPERATION 2108 +#define NETDATA_CHART_PRIO_MDSTAT_FINISH 2109 +#define NETDATA_CHART_PRIO_MDSTAT_SPEED 2110 + +// Filesystem +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_CLEAN 2150 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_COUNT 2151 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_BYTES 2152 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EBYTES 2153 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_FSYNC 2154 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EFSYNC 2155 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_OPEN 2156 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EOPEN 2157 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_CREATE 2158 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_ECREATE 2159 + +#define NETDATA_CHART_PRIO_EBPF_FILESYSTEM_CHARTS 2160 + +// Mount Points +#define NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS 2190 + +// File descriptor +#define NETDATA_CHART_PRIO_EBPF_FD_CHARTS 2195 + + +// NFS (server) + +#define NETDATA_CHART_PRIO_NFSD_READCACHE 2200 +#define NETDATA_CHART_PRIO_NFSD_FILEHANDLES 2201 +#define NETDATA_CHART_PRIO_NFSD_IO 2202 +#define NETDATA_CHART_PRIO_NFSD_THREADS 2203 +#define NETDATA_CHART_PRIO_NFSD_THREADS_FULLCNT 2204 +#define NETDATA_CHART_PRIO_NFSD_THREADS_HISTOGRAM 2205 +#define NETDATA_CHART_PRIO_NFSD_READAHEAD 2205 +#define NETDATA_CHART_PRIO_NFSD_NET 2207 +#define NETDATA_CHART_PRIO_NFSD_RPC 2208 +#define NETDATA_CHART_PRIO_NFSD_PROC2 2209 +#define NETDATA_CHART_PRIO_NFSD_PROC3 2210 +#define NETDATA_CHART_PRIO_NFSD_PROC4 2211 +#define NETDATA_CHART_PRIO_NFSD_PROC4OPS 2212 + +// NFS (client) + +#define NETDATA_CHART_PRIO_NFS_NET 2307 +#define NETDATA_CHART_PRIO_NFS_RPC 2308 +#define NETDATA_CHART_PRIO_NFS_PROC2 2309 +#define NETDATA_CHART_PRIO_NFS_PROC3 2310 +#define NETDATA_CHART_PRIO_NFS_PROC4 2311 + +// BTRFS + +#define NETDATA_CHART_PRIO_BTRFS_DISK 2400 +#define NETDATA_CHART_PRIO_BTRFS_DATA 2401 +#define NETDATA_CHART_PRIO_BTRFS_METADATA 2402 +#define NETDATA_CHART_PRIO_BTRFS_SYSTEM 2403 + +// ZFS + +#define NETDATA_CHART_PRIO_ZFS_ARC_SIZE 2500 +#define NETDATA_CHART_PRIO_ZFS_L2_SIZE 2500 +#define NETDATA_CHART_PRIO_ZFS_READS 2510 +#define NETDATA_CHART_PRIO_ZFS_ACTUAL_HITS 2519 +#define NETDATA_CHART_PRIO_ZFS_ARC_SIZE_BREAKDOWN 2520 +#define NETDATA_CHART_PRIO_ZFS_IMPORTANT_OPS 2522 +#define NETDATA_CHART_PRIO_ZFS_MEMORY_OPS 2523 +#define NETDATA_CHART_PRIO_ZFS_IO 2700 +#define NETDATA_CHART_PRIO_ZFS_HITS 2520 +#define NETDATA_CHART_PRIO_ZFS_DHITS 2530 +#define NETDATA_CHART_PRIO_ZFS_DEMAND_DATA_HITS 2540 +#define NETDATA_CHART_PRIO_ZFS_PREFETCH_DATA_HITS 2550 +#define NETDATA_CHART_PRIO_ZFS_PHITS 2560 +#define NETDATA_CHART_PRIO_ZFS_MHITS 2570 +#define NETDATA_CHART_PRIO_ZFS_L2HITS 2580 +#define NETDATA_CHART_PRIO_ZFS_LIST_HITS 2600 +#define NETDATA_CHART_PRIO_ZFS_HASH_ELEMENTS 2800 +#define NETDATA_CHART_PRIO_ZFS_HASH_CHAINS 2810 + +#define NETDATA_CHART_PRIO_ZFS_POOL_STATE 2820 + +// HARDIRQS + +#define NETDATA_CHART_PRIO_HARDIRQ_LATENCY 2900 + +// SOFTIRQs + +#define NETDATA_CHART_PRIO_SOFTIRQS_PER_CORE 3000 // +1 per core + +// IPFW (freebsd) + +#define NETDATA_CHART_PRIO_IPFW_PACKETS 3001 +#define NETDATA_CHART_PRIO_IPFW_BYTES 3002 +#define NETDATA_CHART_PRIO_IPFW_ACTIVE 3003 +#define NETDATA_CHART_PRIO_IPFW_EXPIRED 3004 +#define NETDATA_CHART_PRIO_IPFW_MEM 3005 + + +// IPVS + +#define NETDATA_CHART_PRIO_IPVS_NET 3100 +#define NETDATA_CHART_PRIO_IPVS_SOCKETS 3101 +#define NETDATA_CHART_PRIO_IPVS_PACKETS 3102 + +// Softnet + +#define NETDATA_CHART_PRIO_SOFTNET_PER_CORE 4101 // +1 per core + +// IP STACK + +#define NETDATA_CHART_PRIO_IP_ERRORS 4100 +#define NETDATA_CHART_PRIO_IP_TCP_CONNABORTS 4210 +#define NETDATA_CHART_PRIO_IP_TCP_SYN_QUEUE 4215 +#define NETDATA_CHART_PRIO_IP_TCP_ACCEPT_QUEUE 4216 +#define NETDATA_CHART_PRIO_IP_TCP_REORDERS 4220 +#define NETDATA_CHART_PRIO_IP_TCP_OFO 4250 +#define NETDATA_CHART_PRIO_IP_TCP_SYNCOOKIES 4260 +#define NETDATA_CHART_PRIO_IP_TCP_MEM 4290 +#define NETDATA_CHART_PRIO_IP_BCAST 4500 +#define NETDATA_CHART_PRIO_IP_BCAST_PACKETS 4510 +#define NETDATA_CHART_PRIO_IP_MCAST 4600 +#define NETDATA_CHART_PRIO_IP_MCAST_PACKETS 4610 +#define NETDATA_CHART_PRIO_IP_ECN 4700 + +// IPv4 + +#define NETDATA_CHART_PRIO_IPV4_SOCKETS 5100 +#define NETDATA_CHART_PRIO_IPV4_PACKETS 5130 +#define NETDATA_CHART_PRIO_IPV4_ERRORS 5150 +#define NETDATA_CHART_PRIO_IPV4_ICMP 5170 +#define NETDATA_CHART_PRIO_IPV4_TCP 5200 +#define NETDATA_CHART_PRIO_IPV4_TCP_SOCKETS 5201 +#define NETDATA_CHART_PRIO_IPV4_TCP_MEM 5290 +#define NETDATA_CHART_PRIO_IPV4_UDP 5300 +#define NETDATA_CHART_PRIO_IPV4_UDP_MEM 5390 +#define NETDATA_CHART_PRIO_IPV4_UDPLITE 5400 +#define NETDATA_CHART_PRIO_IPV4_RAW 5450 +#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS 5460 +#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS_MEM 5470 + +// IPv6 + +#define NETDATA_CHART_PRIO_IPV6_PACKETS 6200 +#define NETDATA_CHART_PRIO_IPV6_ECT 6210 +#define NETDATA_CHART_PRIO_IPV6_ERRORS 6300 +#define NETDATA_CHART_PRIO_IPV6_FRAGMENTS 6400 +#define NETDATA_CHART_PRIO_IPV6_FRAGSOUT 6401 +#define NETDATA_CHART_PRIO_IPV6_FRAGSIN 6402 +#define NETDATA_CHART_PRIO_IPV6_TCP 6500 +#define NETDATA_CHART_PRIO_IPV6_UDP 6600 +#define NETDATA_CHART_PRIO_IPV6_UDP_PACKETS 6601 +#define NETDATA_CHART_PRIO_IPV6_UDP_ERRORS 6610 +#define NETDATA_CHART_PRIO_IPV6_UDPLITE 6700 +#define NETDATA_CHART_PRIO_IPV6_UDPLITE_PACKETS 6701 +#define NETDATA_CHART_PRIO_IPV6_UDPLITE_ERRORS 6710 +#define NETDATA_CHART_PRIO_IPV6_RAW 6800 +#define NETDATA_CHART_PRIO_IPV6_BCAST 6840 +#define NETDATA_CHART_PRIO_IPV6_MCAST 6850 +#define NETDATA_CHART_PRIO_IPV6_MCAST_PACKETS 6851 +#define NETDATA_CHART_PRIO_IPV6_ICMP 6900 +#define NETDATA_CHART_PRIO_IPV6_ICMP_REDIR 6910 +#define NETDATA_CHART_PRIO_IPV6_ICMP_ERRORS 6920 +#define NETDATA_CHART_PRIO_IPV6_ICMP_ECHOS 6930 +#define NETDATA_CHART_PRIO_IPV6_ICMP_GROUPMEMB 6940 +#define NETDATA_CHART_PRIO_IPV6_ICMP_ROUTER 6950 +#define NETDATA_CHART_PRIO_IPV6_ICMP_NEIGHBOR 6960 +#define NETDATA_CHART_PRIO_IPV6_ICMP_LDV2 6970 +#define NETDATA_CHART_PRIO_IPV6_ICMP_TYPES 6980 + + +// Network interfaces + +#define NETDATA_CHART_PRIO_FIRST_NET_IFACE 7000 // 6 charts per interface +#define NETDATA_CHART_PRIO_FIRST_NET_PACKETS 7001 +#define NETDATA_CHART_PRIO_FIRST_NET_ERRORS 7002 +#define NETDATA_CHART_PRIO_FIRST_NET_DROPS 7003 +#define NETDATA_CHART_PRIO_FIRST_NET_EVENTS 7006 +#define NETDATA_CHART_PRIO_CGROUP_NET_IFACE 43000 + +// SCTP + +#define NETDATA_CHART_PRIO_SCTP 7000 + +// QoS + +#define NETDATA_CHART_PRIO_TC_QOS 7000 +#define NETDATA_CHART_PRIO_TC_QOS_PACKETS 7010 +#define NETDATA_CHART_PRIO_TC_QOS_DROPPED 7020 +#define NETDATA_CHART_PRIO_TC_QOS_TOKENS 7030 +#define NETDATA_CHART_PRIO_TC_QOS_CTOKENS 7040 + +// Infiniband +#define NETDATA_CHART_PRIO_INFINIBAND 7100 + +// Netfilter + +#define NETDATA_CHART_PRIO_NETFILTER_SOCKETS 8700 +#define NETDATA_CHART_PRIO_NETFILTER_NEW 8701 +#define NETDATA_CHART_PRIO_NETFILTER_CHANGES 8702 +#define NETDATA_CHART_PRIO_NETFILTER_EXPECT 8703 +#define NETDATA_CHART_PRIO_NETFILTER_ERRORS 8705 +#define NETDATA_CHART_PRIO_NETFILTER_SEARCH 8710 + +// SYNPROXY + +#define NETDATA_CHART_PRIO_SYNPROXY_SYN_RECEIVED 8751 +#define NETDATA_CHART_PRIO_SYNPROXY_COOKIES 8752 +#define NETDATA_CHART_PRIO_SYNPROXY_CONN_OPEN 8753 +#define NETDATA_CHART_PRIO_SYNPROXY_ENTRIES 8754 + +// Linux Power Supply + +#define NETDATA_CHART_PRIO_POWER_SUPPLY_CAPACITY 9500 // 4 charts per power supply +#define NETDATA_CHART_PRIO_POWER_SUPPLY_CHARGE 9501 +#define NETDATA_CHART_PRIO_POWER_SUPPLY_ENERGY 9502 +#define NETDATA_CHART_PRIO_POWER_SUPPLY_VOLTAGE 9503 + + +// Wireless + +#define NETDATA_CHART_PRIO_WIRELESS_IFACE 7110 + +// CGROUPS + +#define NETDATA_CHART_PRIO_CGROUPS_SYSTEMD 19000 // many charts +#define NETDATA_CHART_PRIO_CGROUPS_CONTAINERS 40000 // many charts + +// STATSD + +#define NETDATA_CHART_PRIO_STATSD_PRIVATE 90000 // many charts + +// INTERNAL NETDATA INFO + +#define NETDATA_CHART_PRIO_CHECKS 99999 + +#define NETDATA_CHART_PRIO_NETDATA_TIMEX 132030 +#define NETDATA_CHART_PRIO_NETDATA_TC_TIME 1000100 + + +#endif //NETDATA_ALL_H diff --git a/collectors/apps.plugin/Makefile.am b/collectors/apps.plugin/Makefile.am new file mode 100644 index 0000000..533b14d --- /dev/null +++ b/collectors/apps.plugin/Makefile.am @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) + +dist_libconfig_DATA = \ + apps_groups.conf \ + $(NULL) diff --git a/collectors/apps.plugin/README.md b/collectors/apps.plugin/README.md new file mode 100644 index 0000000..150889d --- /dev/null +++ b/collectors/apps.plugin/README.md @@ -0,0 +1,399 @@ + + +# apps.plugin + +`apps.plugin` breaks down system resource usage to **processes**, **users** and **user groups**. + +To achieve this task, it iterates through the whole process tree, collecting resource usage information +for every process found running. + +Since Netdata needs to present this information in charts and track them through time, +instead of presenting a `top` like list, `apps.plugin` uses a pre-defined list of **process groups** +to which it assigns all running processes. This list is customizable via `apps_groups.conf`, and Netdata +ships with a good default for most cases (to edit it on your system run `/etc/netdata/edit-config apps_groups.conf`). + +So, `apps.plugin` builds a process tree (much like `ps fax` does in Linux), and groups +processes together (evaluating both child and parent processes) so that the result is always a list with +a predefined set of members (of course, only process groups found running are reported). + +> If you find that `apps.plugin` categorizes standard applications as `other`, we would be +> glad to accept pull requests improving the defaults shipped with Netdata in `apps_groups.conf`. + +Unlike traditional process monitoring tools (like `top`), `apps.plugin` is able to account the resource +utilization of exit processes. Their utilization is accounted at their currently running parents. +So, `apps.plugin` is perfectly able to measure the resources used by shell scripts and other processes +that fork/spawn other short lived processes hundreds of times per second. + +## Charts + +`apps.plugin` provides charts for 3 sections: + +1. Per application charts as **Applications** at Netdata dashboards +2. Per user charts as **Users** at Netdata dashboards +3. Per user group charts as **User Groups** at Netdata dashboards + +Each of these sections provides the same number of charts: + +- CPU utilization (`apps.cpu`) + - Total CPU usage + - User/system CPU usage (`apps.cpu_user`/`apps.cpu_system`) +- Disk I/O + - Physical reads/writes (`apps.preads`/`apps.pwrites`) + - Logical reads/writes (`apps.lreads`/`apps.lwrites`) + - Open unique files (if a file is found open multiple times, it is counted just once, `apps.files`) +- Memory + - Real Memory Used (non-shared, `apps.mem`) + - Virtual Memory Allocated (`apps.vmem`) + - Minor page faults (i.e. memory activity, `apps.minor_faults`) +- Processes + - Threads running (`apps.threads`) + - Processes running (`apps.processes`) + - Carried over uptime (since the last Netdata Agent restart, `apps.uptime`) + - Minimum uptime (`apps.uptime_min`) + - Average uptime (`apps.uptime_average`) + - Maximum uptime (`apps.uptime_max`) + - Pipes open (`apps.pipes`) +- Swap memory + - Swap memory used (`apps.swap`) + - Major page faults (i.e. swap activity, `apps.major_faults`) +- Network + - Sockets open (`apps.sockets`) + +In addition, if the [eBPF collector](/collectors/ebpf.plugin/README.md) is running, your dashboard will also show an +additional [list of charts](/collectors/ebpf.plugin/README.md#integration-with-appsplugin) using low-level Linux +metrics. + +The above are reported: + +- For **Applications** per target configured. +- For **Users** per username or UID (when the username is not available). +- For **User Groups** per groupname or GID (when groupname is not available). + +## Performance + +`apps.plugin` is a complex piece of software and has a lot of work to do +We are proud that `apps.plugin` is a lot faster compared to any other similar tool, +while collecting a lot more information for the processes, however the fact is that +this plugin requires more CPU resources than the `netdata` daemon itself. + +Under Linux, for each process running, `apps.plugin` reads several `/proc` files +per process. Doing this work per-second, especially on hosts with several thousands +of processes, may increase the CPU resources consumed by the plugin. + +In such cases, you many need to lower its data collection frequency. + +To do this, edit `/etc/netdata/netdata.conf` and find this section: + +``` +[plugin:apps] + # update every = 1 + # command options = +``` + +Uncomment the line `update every` and set it to a higher number. If you just set it to `2`, +its CPU resources will be cut in half, and data collection will be once every 2 seconds. + +## Configuration + +The configuration file is `/etc/netdata/apps_groups.conf`. To edit it on your system, run `/etc/netdata/edit-config apps_groups.conf`. + +The configuration file works accepts multiple lines, each having this format: + +```txt +group: process1 process2 ... +``` + +Each group can be given multiple times, to add more processes to it. + +For the **Applications** section, only groups configured in this file are reported. +All other processes will be reported as `other`. + +For each process given, its whole process tree will be grouped, not just the process matched. +The plugin will include both parents and children. If including the parents into the group is +undesirable, the line `other: *` should be appended to the `apps_groups.conf`. + +The process names are the ones returned by: + +- `ps -e` or `cat /proc/PID/stat` +- in case of substring mode (see below): `/proc/PID/cmdline` + +To add process names with spaces, enclose them in quotes (single or double) +example: `'Plex Media Serv'` or `"my other process"`. + +You can add an asterisk `*` at the beginning and/or the end of a process: + +- `*name` _suffix_ mode: will search for processes ending with `name` (at `/proc/PID/stat`) +- `name*` _prefix_ mode: will search for processes beginning with `name` (at `/proc/PID/stat`) +- `*name*` _substring_ mode: will search for `name` in the whole command line (at `/proc/PID/cmdline`) + +If you enter even just one _name_ (substring), `apps.plugin` will process +`/proc/PID/cmdline` for all processes (of course only once per process: when they are first seen). + +To add processes with single quotes, enclose them in double quotes: `"process with this ' single quote"` + +To add processes with double quotes, enclose them in single quotes: `'process with this " double quote'` + +If a group or process name starts with a `-`, the dimension will be hidden from the chart (cpu chart only). + +If a process starts with a `+`, debugging will be enabled for it (debugging produces a lot of output - do not enable it in production systems). + +You can add any number of groups. Only the ones found running will affect the charts generated. +However, producing charts with hundreds of dimensions may slow down your web browser. + +The order of the entries in this list is important: the first that matches a process is used, so put important +ones at the top. Processes not matched by any row, will inherit it from their parents or children. + +The order also controls the order of the dimensions on the generated charts (although applications started +after apps.plugin is started, will be appended to the existing list of dimensions the `netdata` daemon maintains). + +There are a few command line options you can pass to `apps.plugin`. The list of available options can be acquired with the `--help` flag. The options can be set in the `netdata.conf` file. For example, to disable user and user group charts you should set + +``` +[plugin:apps] + command options = without-users without-groups +``` + +### Integration with eBPF + +If you don't see charts under the **eBPF syscall** or **eBPF net** sections, you should edit your +[`ebpf.d.conf`](/collectors/ebpf.plugin/README.md#configure-the-ebpf-collector) file to ensure the eBPF program is enabled. + +Also see our [guide on troubleshooting apps with eBPF +metrics](/docs/guides/troubleshoot/monitor-debug-applications-ebpf.md) for ideas on how to interpret these charts in a +few scenarios. + +## Permissions + +`apps.plugin` requires additional privileges to collect all the information it needs. +The problem is described in issue #157. + +When Netdata is installed, `apps.plugin` is given the capabilities `cap_dac_read_search,cap_sys_ptrace+ep`. +If this fails (i.e. `setcap` fails), `apps.plugin` is setuid to `root`. + +### linux capabilities in containers + +There are a few cases, like `docker` and `virtuozzo` containers, where `setcap` succeeds, but the capabilities +are silently ignored (in `lxc` containers `setcap` fails). + +In these cases ()`setcap` succeeds but capabilities do not work), you will have to setuid +to root `apps.plugin` by running these commands: + +```sh +chown root:netdata /usr/libexec/netdata/plugins.d/apps.plugin +chmod 4750 /usr/libexec/netdata/plugins.d/apps.plugin +``` + +You will have to run these, every time you update Netdata. + +## Security + +`apps.plugin` performs a hard-coded function of building the process tree in memory, +iterating forever, collecting metrics for each running process and sending them to Netdata. +This is a one-way communication, from `apps.plugin` to Netdata. + +So, since `apps.plugin` cannot be instructed by Netdata for the actions it performs, +we think it is pretty safe to allow it have these increased privileges. + +Keep in mind that `apps.plugin` will still run without escalated permissions, +but it will not be able to collect all the information. + +## Application Badges + +You can create badges that you can embed anywhere you like, with URLs like this: + +``` +https://your.netdata.ip:19999/api/v1/badge.svg?chart=apps.processes&dimensions=myapp&value_color=green%3E0%7Cred +``` + +The color expression unescaped is this: `value_color=green>0|red`. + +Here is an example for the process group `sql` at `https://registry.my-netdata.io`: + +![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.processes&dimensions=sql&value_color=green%3E0%7Cred) + +Netdata is able give you a lot more badges for your app. +Examples below for process group `sql`: + +- CPU usage: ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.cpu&dimensions=sql&value_color=green=0%7Corange%3C50%7Cred) +- Disk Physical Reads ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.preads&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Disk Physical Writes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.pwrites&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Disk Logical Reads ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.lreads&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Disk Logical Writes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.lwrites&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Open Files ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.files&dimensions=sql&value_color=green%3E30%7Cred) +- Real Memory ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.mem&dimensions=sql&value_color=green%3C100%7Corange%3C200%7Cred) +- Virtual Memory ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.vmem&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Swap Memory ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.swap&dimensions=sql&value_color=green=0%7Cred) +- Minor Page Faults ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.minor_faults&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Processes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.processes&dimensions=sql&value_color=green%3E0%7Cred) +- Threads ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.threads&dimensions=sql&value_color=green%3E=28%7Cred) +- Major Faults (swap activity) ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.major_faults&dimensions=sql&value_color=green=0%7Cred) +- Open Pipes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.pipes&dimensions=sql&value_color=green=0%7Cred) +- Open Sockets ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.sockets&dimensions=sql&value_color=green%3E=3%7Cred) + +For more information about badges check [Generating Badges](/web/api/badges/README.md) + +## Comparison with console tools + +SSH to a server running Netdata and execute this: + +```sh +while true; do ls -l /var/run >/dev/null; done +``` + +In most systems `/var/run` is a `tmpfs` device, so there is nothing that can stop this command +from consuming entirely one of the CPU cores of the machine. + +As we will see below, **none** of the console performance monitoring tools can report that this +command is using 100% CPU. They do report of course that the CPU is busy, but **they fail to +identify the process that consumes so much CPU**. + +Here is what common Linux console monitoring tools report: + +### top + +`top` reports that `bash` is using just 14%. + +If you check the total system CPU utilization, it says there is no idle CPU at all, but `top` +fails to provide a breakdown of the CPU consumption in the system. The sum of the CPU utilization +of all processes reported by `top`, is 15.6%. + +``` +top - 18:46:28 up 3 days, 20:14, 2 users, load average: 0.22, 0.05, 0.02 +Tasks: 76 total, 2 running, 74 sleeping, 0 stopped, 0 zombie +%Cpu(s): 32.8 us, 65.6 sy, 0.0 ni, 0.0 id, 0.0 wa, 1.3 hi, 0.3 si, 0.0 st +KiB Mem : 1016576 total, 244112 free, 52012 used, 720452 buff/cache +KiB Swap: 0 total, 0 free, 0 used. 753712 avail Mem + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND +12789 root 20 0 14980 4180 3020 S 14.0 0.4 0:02.82 bash + 9 root 20 0 0 0 0 S 1.0 0.0 0:22.36 rcuos/0 + 642 netdata 20 0 132024 20112 2660 S 0.3 2.0 14:26.29 netdata +12522 netdata 20 0 9508 2476 1828 S 0.3 0.2 0:02.26 apps.plugin + 1 root 20 0 67196 10216 7500 S 0.0 1.0 0:04.83 systemd + 2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd +``` + +### htop + +Exactly like `top`, `htop` is providing an incomplete breakdown of the system CPU utilization. + +``` + CPU[||||||||||||||||||||||||100.0%] Tasks: 27, 11 thr; 2 running + Mem[||||||||||||||||||||85.4M/993M] Load average: 1.16 0.88 0.90 + Swp[ 0K/0K] Uptime: 3 days, 21:37:03 + + PID USER PRI NI VIRT RES SHR S CPU% MEM% TIME+ Command +12789 root 20 0 15104 4484 3208 S 14.0 0.4 10:57.15 -bash + 7024 netdata 20 0 9544 2480 1744 S 0.7 0.2 0:00.88 /usr/libexec/netd + 7009 netdata 20 0 138M 21016 2712 S 0.7 2.1 0:00.89 /usr/sbin/netdata + 7012 netdata 20 0 138M 21016 2712 S 0.0 2.1 0:00.31 /usr/sbin/netdata + 563 root 20 0 308M 202M 202M S 0.0 20.4 1:00.81 /usr/lib/systemd/ + 7019 netdata 20 0 138M 21016 2712 S 0.0 2.1 0:00.14 /usr/sbin/netdata +``` + +### atop + +`atop` also fails to break down CPU usage. + +``` +ATOP - localhost 2016/12/10 20:11:27 ----------- 10s elapsed +PRC | sys 1.13s | user 0.43s | #proc 75 | #zombie 0 | #exit 5383 | +CPU | sys 67% | user 31% | irq 2% | idle 0% | wait 0% | +CPL | avg1 1.34 | avg5 1.05 | avg15 0.96 | csw 51346 | intr 10508 | +MEM | tot 992.8M | free 211.5M | cache 470.0M | buff 87.2M | slab 164.7M | +SWP | tot 0.0M | free 0.0M | | vmcom 207.6M | vmlim 496.4M | +DSK | vda | busy 0% | read 0 | write 4 | avio 1.50 ms | +NET | transport | tcpi 16 | tcpo 15 | udpi 0 | udpo 0 | +NET | network | ipi 16 | ipo 15 | ipfrw 0 | deliv 16 | +NET | eth0 ---- | pcki 16 | pcko 15 | si 1 Kbps | so 4 Kbps | + + PID SYSCPU USRCPU VGROW RGROW RDDSK WRDSK ST EXC S CPU CMD 1/600 +12789 0.98s 0.40s 0K 0K 0K 336K -- - S 14% bash + 9 0.08s 0.00s 0K 0K 0K 0K -- - S 1% rcuos/0 + 7024 0.03s 0.00s 0K 0K 0K 0K -- - S 0% apps.plugin + 7009 0.01s 0.01s 0K 0K 0K 4K -- - S 0% netdata +``` + +### glances + +And the same is true for `glances`. The system runs at 100%, but `glances` reports only 17% +per process utilization. + +Note also, that being a `python` program, `glances` uses 1.6% CPU while it runs. + +``` +localhost Uptime: 3 days, 21:42:00 + +CPU [100.0%] CPU 100.0% MEM 23.7% SWAP 0.0% LOAD 1-core +MEM [ 23.7%] user: 30.9% total: 993M total: 0 1 min: 1.18 +SWAP [ 0.0%] system: 67.8% used: 236M used: 0 5 min: 1.08 + idle: 0.0% free: 757M free: 0 15 min: 1.00 + +NETWORK Rx/s Tx/s TASKS 75 (90 thr), 1 run, 74 slp, 0 oth +eth0 168b 2Kb +eth1 0b 0b CPU% MEM% PID USER NI S Command +lo 0b 0b 13.5 0.4 12789 root 0 S -bash + 1.6 2.2 7025 root 0 R /usr/bin/python /u +DISK I/O R/s W/s 1.0 0.0 9 root 0 S rcuos/0 +vda1 0 4K 0.3 0.2 7024 netdata 0 S /usr/libexec/netda + 0.3 0.0 7 root 0 S rcu_sched +FILE SYS Used Total 0.3 2.1 7009 netdata 0 S /usr/sbin/netdata +/ (vda1) 1.56G 29.5G 0.0 0.0 17 root 0 S oom_reaper +``` + +### why does this happen? + +All the console tools report usage based on the processes found running *at the moment they +examine the process tree*. So, they see just one `ls` command, which is actually very quick +with minor CPU utilization. But the shell, is spawning hundreds of them, one after another +(much like shell scripts do). + +### What does Netdata report? + +The total CPU utilization of the system: + +![image](https://cloud.githubusercontent.com/assets/2662304/21076212/9198e5a6-bf2e-11e6-9bc0-6bdea25befb2.png) +
***Figure 1**: The system overview section at Netdata, just a few seconds after the command was run* + +And at the applications `apps.plugin` breaks down CPU usage per application: + +![image](https://cloud.githubusercontent.com/assets/2662304/21076220/c9687848-bf2e-11e6-8d81-348592c5aca2.png) +
***Figure 2**: The Applications section at Netdata, just a few seconds after the command was run* + +So, the `ssh` session is using 95% CPU time. + +Why `ssh`? + +`apps.plugin` groups all processes based on its configuration file. +The default configuration has nothing for `bash`, but it has for `sshd`, so Netdata accumulates +all ssh sessions to a dimension on the charts, called `ssh`. This includes all the processes in +the process tree of `sshd`, **including the exited children**. + +> Distributions based on `systemd`, provide another way to get cpu utilization per user session +> or service running: control groups, or cgroups, commonly used as part of containers +> `apps.plugin` does not use these mechanisms. The process grouping made by `apps.plugin` works +> on any Linux, `systemd` based or not. + +#### a more technical description of how Netdata works + +Netdata reads `/proc//stat` for all processes, once per second and extracts `utime` and +`stime` (user and system cpu utilization), much like all the console tools do. + +But it also extracts `cutime` and `cstime` that account the user and system time of the exit children of each process. +By keeping a map in memory of the whole process tree, it is capable of assigning the right time to every process, taking +into account all its exited children. + +It is tricky, since a process may be running for 1 hour and once it exits, its parent should not +receive the whole 1 hour of cpu time in just 1 second - you have to subtract the cpu time that has +been reported for it prior to this iteration. + +It is even trickier, because walking through the entire process tree takes some time itself. So, +if you sum the CPU utilization of all processes, you might have more CPU time than the reported +total cpu time of the system. Netdata solves this, by adapting the per process cpu utilization to +the total of the system. [Netdata adds charts that document this normalization](https://london.my-netdata.io/default.html#menu_netdata_submenu_apps_plugin). + + diff --git a/collectors/apps.plugin/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf new file mode 100644 index 0000000..dd45c5a --- /dev/null +++ b/collectors/apps.plugin/apps_groups.conf @@ -0,0 +1,422 @@ +# +# apps.plugin process grouping +# +# The apps.plugin displays charts with information about the processes running. +# This config allows grouping processes together, so that several processes +# will be reported as one. +# +# Only groups in this file are reported. All other processes will be reported +# as 'other'. +# +# For each process given, its whole process tree will be grouped, not just +# the process matched. The plugin will include both parents and childs. +# +# The format is: +# +# group: process1 process2 process3 ... +# +# Each group can be given multiple times, to add more processes to it. +# +# The process names are the ones returned by: +# +# - ps -e or /proc/PID/stat +# - in case of substring mode (see below): /proc/PID/cmdline +# +# To add process names with spaces, enclose them in quotes (single or double) +# example: 'Plex Media Serv' "my other process". +# +# Note that spaces are not supported for process groups. Use a dash "-" instead. +# example-process-group: process1 process2 +# +# Wildcard support: +# You can add an asterisk (*) at the beginning and/or the end of a process: +# +# *name suffix mode: will search for processes ending with 'name' +# (/proc/PID/stat) +# +# name* prefix mode: will search for processes beginning with 'name' +# (/proc/PID/stat) +# +# *name* substring mode: will search for 'name' in the whole command line +# (/proc/PID/cmdline) +# +# If you enter even just one *name* (substring), apps.plugin will process +# /proc/PID/cmdline for all processes, just once (when they are first seen). +# +# To add processes with single quotes, enclose them in double quotes +# example: "process with this ' single quote" +# +# To add processes with double quotes, enclose them in single quotes: +# example: 'process with this " double quote' +# +# If a group or process name starts with a -, the dimension will be hidden +# (cpu chart only). +# +# If a process starts with a +, debugging will be enabled for it +# (debugging produces a lot of output - do not enable it in production systems) +# +# You can add any number of groups you like. Only the ones found running will +# affect the charts generated. However, producing charts with hundreds of +# dimensions may slow down your web browser. +# +# The order of the entries in this list is important: the first that matches +# a process is used, so put important ones at the top. Processes not matched +# by any row, will inherit it from their parents or children. +# +# The order also controls the order of the dimensions on the generated charts +# (although applications started after apps.plugin is started, will be appended +# to the existing list of dimensions the netdata daemon maintains). + +# ----------------------------------------------------------------------------- +# NETDATA processes accounting + +# netdata main process +netdata: netdata + +# netdata known plugins +# plugins not defined here will be accumulated in netdata, above +apps.plugin: apps.plugin +freeipmi.plugin: freeipmi.plugin +nfacct.plugin: nfacct.plugin +cups.plugin: cups.plugin +xenstat.plugin: xenstat.plugin +perf.plugin: perf.plugin +charts.d.plugin: *charts.d.plugin* +python.d.plugin: *python.d.plugin* +tc-qos-helper: *tc-qos-helper.sh* +fping: fping +ioping: ioping +go.d.plugin: *go.d.plugin* +slabinfo.plugin: slabinfo.plugin +ebpf.plugin: *ebpf.plugin* + +# agent-service-discovery +agent_sd: agent_sd + +# ----------------------------------------------------------------------------- +# authentication/authorization related servers + +auth: radius* openldap* ldap* slapd authelia sssd saslauthd polkitd gssproxy +fail2ban: fail2ban* + +# ----------------------------------------------------------------------------- +# web/ftp servers + +httpd: apache* httpd nginx* lighttpd hiawatha caddy h2o +proxy: squid* c-icap squidGuard varnish* +php: php* lsphp* +ftpd: proftpd in.tftpd vsftpd +uwsgi: uwsgi +unicorn: *unicorn* +puma: *puma* + +# ----------------------------------------------------------------------------- +# database servers + +sql: mysqld* mariad* postgres* postmaster* oracle_* ora_* sqlservr +nosql: mongod redis* memcached *couchdb* +timedb: prometheus *carbon-cache.py* *carbon-aggregator.py* *graphite/manage.py* *net.opentsdb.tools.TSDMain* influxd* +columndb: clickhouse-server* + +# ----------------------------------------------------------------------------- +# email servers + +mta: amavis* zmstat-* zmdiaglog zmmailboxdmgr opendkim postfwd2 smtp* lmtp* sendmail postfix master pickup qmgr showq tlsmgr postscreen oqmgr msmtp* nullmailer* +mda: dovecot *imapd *pop3d *popd + +# ----------------------------------------------------------------------------- +# network, routing, VPN + +ppp: ppp* +vpn: openvpn pptp* cjdroute gvpe tincd wireguard tailscaled +wifi: hostapd wpa_supplicant +routing: ospfd* ospf6d* bgpd bfdd fabricd isisd eigrpd sharpd staticd ripd ripngd pimd pbrd nhrpd ldpd zebra vrrpd vtysh bird* +modem: ModemManager +netmanager: NetworkManager nm* systemd-networkd networkctl netplan connmand wicked* avahi-autoipd networkd-dispatcher +firewall: firewalld ufw nft +tor: tor +bluetooth: bluetooth bluez bluedevil obexd + +# ----------------------------------------------------------------------------- +# high availability and balancers + +camo: *camo* +balancer: ipvs_* haproxy +ha: corosync hs_logd ha_logd stonithd pacemakerd lrmd crmd keepalived ucarp* + +# ----------------------------------------------------------------------------- +# telephony + +pbx: asterisk safe_asterisk *vicidial* +sip: opensips* stund + +# ----------------------------------------------------------------------------- +# chat + +chat: irssi *vines* *prosody* murmurd + +# ----------------------------------------------------------------------------- +# monitoring + +logs: ulogd* syslog* rsyslog* logrotate systemd-journald rotatelogs sysklogd metalog +nms: snmpd vnstatd smokeping zabbix* munin* mon openhpid tailon nrpe +monit: monit +splunk: splunkd +azure: mdsd *waagent* *omiserver* *omiagent* hv_kvp_daemon hv_vss_daemon *auoms* *omsagent* +datadog: *datadog* +edgedelta: edgedelta +newrelic: newrelic* +google-agent: *google_guest_agent* *google_osconfig_agent* +nvidia-smi: nvidia-smi +htop: htop +watchdog: watchdog + +# ----------------------------------------------------------------------------- +# storage, file systems and file servers + +ceph: ceph-* ceph_* radosgw* rbd-* cephfs-* osdmaptool crushtool +samba: smbd nmbd winbindd ctdbd ctdb-* ctdb_* +nfs: rpcbind rpc.* nfs* +zfs: spl_* z_* txg_* zil_* arc_* l2arc* +btrfs: btrfs* +iscsi: iscsid iscsi_eh +afp: netatalk afpd cnid_dbd cnid_metad +ntfs-3g: ntfs-3g + +# ----------------------------------------------------------------------------- +# kubernetes + +kubelet: kubelet +kube-dns: kube-dns +kube-proxy: kube-proxy +metrics-server: metrics-server +heapster: heapster + +# ----------------------------------------------------------------------------- +# AWS + +aws-s3: '*aws s3*' s3cmd s5cmd +aws: aws + +# ----------------------------------------------------------------------------- +# virtualization platform + +proxmox-ve: pve* spiceproxy + +# ----------------------------------------------------------------------------- +# containers & virtual machines + +containers: lxc* docker* balena* +VMs: vbox* VBox* qemu* kvm* +libvirt: virtlogd virtqemud virtstoraged virtnetworkd virtlockd virtinterfaced +libvirt: virtnodedevd virtproxyd virtsecretd libvirtd +guest-agent: qemu-ga spice-vdagent cloud-init* + +# ----------------------------------------------------------------------------- +# ssh servers and clients + +ssh: ssh* scp sftp* dropbear + +# ----------------------------------------------------------------------------- +# print servers and clients + +print: cups* lpd lpq + +# ----------------------------------------------------------------------------- +# time servers and clients + +time: ntp* systemd-timesyn* chronyd ptp* + +# ----------------------------------------------------------------------------- +# dhcp servers and clients + +dhcp: *dhcp* dhclient + +# ----------------------------------------------------------------------------- +# name servers and clients + +dns: named unbound nsd pdns_server knotd gdnsd yadifad dnsmasq systemd-resolve* pihole* avahi-daemon avahi-dnsconfd +dnsdist: dnsdist + +# ----------------------------------------------------------------------------- +# installation / compilation / debugging + +build: cc1 cc1plus as gcc* cppcheck ld make cmake automake autoconf autoreconf +build: cargo rustc bazel buck git gdb valgrind* rpmbuild dpkg-buildpackage + +# ----------------------------------------------------------------------------- +# package management + +packagemanager: apt* dpkg* dselect dnf yum rpm zypp* yast* pacman xbps* swupd* emerge* +packagemanager: packagekitd pkgin pkg apk snapd slackpkg slapt-get + +# ----------------------------------------------------------------------------- +# antivirus + +antivirus: clam* *clam imunify360* + +# ----------------------------------------------------------------------------- +# torrent clients + +torrents: *deluge* transmission* *SickBeard* *CouchPotato* *rtorrent* + +# ----------------------------------------------------------------------------- +# backup servers and clients + +backup: rsync lsyncd bacula* borg rclone + +# ----------------------------------------------------------------------------- +# cron + +cron: cron* atd anacron systemd-cron* incrond + +# ----------------------------------------------------------------------------- +# UPS + +ups: upsmon upsd */nut/* apcupsd + +# ----------------------------------------------------------------------------- +# media players, servers, clients + +media: mplayer vlc xine mediatomb omxplayer* kodi* xbmc* mediacenter eventlircd +media: mpd minidlnad mt-daapd Plex* jellyfin squeeze* jackett Ombi +media: strawberry* clementine* + +audio: pulse* pipewire wireplumber jack* + +# ----------------------------------------------------------------------------- +# java applications + +hdfsdatanode: *org.apache.hadoop.hdfs.server.datanode.DataNode* +hdfsnamenode: *org.apache.hadoop.hdfs.server.namenode.NameNode* +hdfsjournalnode: *org.apache.hadoop.hdfs.qjournal.server.JournalNode* +hdfszkfc: *org.apache.hadoop.hdfs.tools.DFSZKFailoverController* + +yarnnode: *org.apache.hadoop.yarn.server.nodemanager.NodeManager* +yarnmgr: *org.apache.hadoop.yarn.server.resourcemanager.ResourceManager* +yarnproxy: *org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer* + +sparkworker: *org.apache.spark.deploy.worker.Worker* +sparkmaster: *org.apache.spark.deploy.master.Master* + +hbaseregion: *org.apache.hadoop.hbase.regionserver.HRegionServer* +hbaserest: *org.apache.hadoop.hbase.rest.RESTServer* +hbasethrift: *org.apache.hadoop.hbase.thrift.ThriftServer* +hbasemaster: *org.apache.hadoop.hbase.master.HMaster* + +zookeeper: *org.apache.zookeeper.server.quorum.QuorumPeerMain* + +hive2: *org.apache.hive.service.server.HiveServer2* +hivemetastore: *org.apache.hadoop.hive.metastore.HiveMetaStore* + +solr: *solr.install.dir* + +airflow: *airflow* + +# ----------------------------------------------------------------------------- +# GUI + +X: X Xorg xinit xdm Xwayland xsettingsd +wayland: swaylock swayidle waypipe wayvnc +kde: *kdeinit* kdm sddm plasmashell startplasma-* kwin* kwallet* krunner kactivitymanager* +gnome: gnome-* gdm gconf* mutter +mate: mate-* msd-* marco* +cinnamon: cinnamon* muffin +xfce: xfwm4 xfdesktop xfce* Thunar xfsettingsd xfconf* +lxde: lxde* startlxde lxdm lxappearance* lxlauncher* lxpanel* lxsession* lxsettings* +lxqt: lxqt* startlxqt +enlightenment: entrance enlightenment* +i3: i3* +awesome: awesome awesome-client +dwm: dwm.* +sway: sway +weston: weston +cage: cage +wayfire: wayfire +gui: lightdm colord seatd greetd gkrellm slim qingy dconf* *gvfs gvfs* +gui: '*systemd --user*' xdg-* at-spi-* + +webbrowser: *chrome-sandbox* *google-chrome* *chromium* *firefox* vivaldi* opera* epiphany chrome* +webbrowser: lynx elinks w3m w3mmee links +mua: evolution-* thunderbird* mutt neomutt pine mailx alpine + +# ----------------------------------------------------------------------------- +# Kernel / System + +ksmd: ksmd +khugepaged: khugepaged +kdamond: kdamond +kswapd: kswapd +zswap: zswap +kcompactd: kcompactd + +system: systemd-* udisks* udevd* *udevd ipv6_addrconf dbus-* rtkit* +system: mdadm acpid uuidd upowerd elogind* eudev mdev lvmpolld dmeventd +system: accounts-daemon rngd haveged rasdaemon irqbalance start-stop-daemon +system: supervise-daemon openrc* init runit runsvdir runsv auditd lsmd +system: abrt* nscd rtkit-daemon gpg-agent usbguard* + +kernel: kworker kthreadd kauditd lockd khelper kdevtmpfs khungtaskd rpciod +kernel: fsnotify_mark kthrotld deferwq scsi_* kdmflush oom_reaper kdevtempfs +kernel: ksoftirqd + +# ----------------------------------------------------------------------------- +# inetd + +inetd: inetd xinetd + +# ----------------------------------------------------------------------------- +# other application servers + +consul: consul + +kafka: *kafka.Kafka* + +rabbitmq: *rabbitmq* + +sidekiq: *sidekiq* +java: java +ipfs: ipfs + +node: node* +factorio: factorio + +p4: p4* + +git-services: gitea gitlab-runner + +freeswitch: freeswitch* + +# -------- web3 / blockchains ---------- + +go-ethereum: geth* +nethermind-ethereum: nethermind* +besu-ethereum: besu* +openEthereum: openethereum* +urbit: urbit* +bitcoin-node: *bitcoind* lnd* +filecoin: lotus* lotus-miner* lotus-worker* +solana: solana* +web3: *hardhat* *ganache* *truffle* *brownie* *waffle* +terra: terra* mantle* + +# ----------------------------------------------------------------------------- +# chaos engineering tools + +stress: stress stress-ng* +gremlin: gremlin* + +# ----------------------------------------------------------------------------- +# load testing tools + +locust: locust + +# ----------------------------------------------------------------------------- +# data science and machine learning tools + +jupyter: jupyter* + +# ----------------------------------------------------------------------------- +# File synchronization tools + +filesync: dropbox syncthing diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c new file mode 100644 index 0000000..89b8333 --- /dev/null +++ b/collectors/apps.plugin/apps_plugin.c @@ -0,0 +1,5015 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +/* + * netdata apps.plugin + * (C) Copyright 2016-2017 Costa Tsaousis + * Released under GPL v3+ + */ + +#include "collectors/all.h" +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#define APPS_PLUGIN_FUNCTIONS() do { \ + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"processes\" 10 \"Detailed information on the currently running processes on this node\"\n"); \ + } while(0) + + +// ---------------------------------------------------------------------------- +// debugging + +static int debug_enabled = 0; +static inline void debug_log_int(const char *fmt, ... ) { + va_list args; + + fprintf( stderr, "apps.plugin: "); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); + + fputc('\n', stderr); +} + +#ifdef NETDATA_INTERNAL_CHECKS + +#define debug_log(fmt, args...) do { if(unlikely(debug_enabled)) debug_log_int(fmt, ##args); } while(0) + +#else + +static inline void debug_log_dummy(void) {} +#define debug_log(fmt, args...) debug_log_dummy() + +#endif + + +// ---------------------------------------------------------------------------- + +#ifdef __FreeBSD__ +#include +#endif + +// ---------------------------------------------------------------------------- +// per O/S configuration + +// the minimum PID of the system +// this is also the pid of the init process +#define INIT_PID 1 + +// if the way apps.plugin will work, will read the entire process list, +// including the resource utilization of each process, instantly +// set this to 1 +// when set to 0, apps.plugin builds a sort list of processes, in order +// to process children processes, before parent processes +#ifdef __FreeBSD__ +#define ALL_PIDS_ARE_READ_INSTANTLY 1 +#else +#define ALL_PIDS_ARE_READ_INSTANTLY 0 +#endif + +// ---------------------------------------------------------------------------- +// string lengths + +#define MAX_COMPARE_NAME 100 +#define MAX_NAME 100 +#define MAX_CMDLINE 16384 + +// ---------------------------------------------------------------------------- +// the rates we are going to send to netdata will have this detail a value of: +// - 1 will send just integer parts to netdata +// - 100 will send 2 decimal points +// - 1000 will send 3 decimal points +// etc. +#define RATES_DETAIL 10000ULL + +// ---------------------------------------------------------------------------- +// factor for calculating correct CPU time values depending on units of raw data +static unsigned int time_factor = 0; + +// ---------------------------------------------------------------------------- +// to avoid reallocating too frequently, we can increase the number of spare +// file descriptors used by processes. +// IMPORTANT: +// having a lot of spares, increases the CPU utilization of the plugin. +#define MAX_SPARE_FDS 1 + +// ---------------------------------------------------------------------------- +// command line options + +static int + update_every = 1, + enable_guest_charts = 0, +#ifdef __FreeBSD__ + enable_file_charts = 0, +#else + enable_file_charts = 1, + max_fds_cache_seconds = 60, +#endif + enable_detailed_uptime_charts = 0, + enable_users_charts = 1, + enable_groups_charts = 1, + include_exited_childs = 1; + +// will be changed to getenv(NETDATA_USER_CONFIG_DIR) if it exists +static char *user_config_dir = CONFIG_DIR; +static char *stock_config_dir = LIBCONFIG_DIR; + +// some variables for keeping track of processes count by states +typedef enum { + PROC_STATUS_RUNNING = 0, + PROC_STATUS_SLEEPING_D, // uninterruptible sleep + PROC_STATUS_SLEEPING, // interruptible sleep + PROC_STATUS_ZOMBIE, + PROC_STATUS_STOPPED, + PROC_STATUS_END, //place holder for ending enum fields +} proc_state; + +#ifndef __FreeBSD__ +static proc_state proc_state_count[PROC_STATUS_END]; +static const char *proc_states[] = { + [PROC_STATUS_RUNNING] = "running", + [PROC_STATUS_SLEEPING] = "sleeping_interruptible", + [PROC_STATUS_SLEEPING_D] = "sleeping_uninterruptible", + [PROC_STATUS_ZOMBIE] = "zombie", + [PROC_STATUS_STOPPED] = "stopped", + }; +#endif + +// ---------------------------------------------------------------------------- +// internal flags +// handled in code (automatically set) + +static int + show_guest_time = 0, // 1 when guest values are collected + show_guest_time_old = 0, + proc_pid_cmdline_is_needed = 0; // 1 when we need to read /proc/cmdline + + +// ---------------------------------------------------------------------------- +// internal counters + +static size_t + global_iterations_counter = 1, + calls_counter = 0, + file_counter = 0, + filenames_allocated_counter = 0, + inodes_changed_counter = 0, + links_changed_counter = 0, + targets_assignment_counter = 0; + + +// ---------------------------------------------------------------------------- +// Normalization +// +// With normalization we lower the collected metrics by a factor to make them +// match the total utilization of the system. +// The discrepancy exists because apps.plugin needs some time to collect all +// the metrics. This results in utilization that exceeds the total utilization +// of the system. +// +// With normalization we align the per-process utilization, to the total of +// the system. We first consume the exited children utilization and it the +// collected values is above the total, we proportionally scale each reported +// metric. + +// the total system time, as reported by /proc/stat +static kernel_uint_t + global_utime = 0, + global_stime = 0, + global_gtime = 0; + +// the normalization ratios, as calculated by normalize_utilization() +NETDATA_DOUBLE + utime_fix_ratio = 1.0, + stime_fix_ratio = 1.0, + gtime_fix_ratio = 1.0, + minflt_fix_ratio = 1.0, + majflt_fix_ratio = 1.0, + cutime_fix_ratio = 1.0, + cstime_fix_ratio = 1.0, + cgtime_fix_ratio = 1.0, + cminflt_fix_ratio = 1.0, + cmajflt_fix_ratio = 1.0; + + +struct pid_on_target { + int32_t pid; + struct pid_on_target *next; +}; + +struct openfds { + kernel_uint_t files; + kernel_uint_t pipes; + kernel_uint_t sockets; + kernel_uint_t inotifies; + kernel_uint_t eventfds; + kernel_uint_t timerfds; + kernel_uint_t signalfds; + kernel_uint_t eventpolls; + kernel_uint_t other; +}; + +// ---------------------------------------------------------------------------- +// target +// +// target is the structure that processes are aggregated to be reported +// to netdata. +// +// - Each entry in /etc/apps_groups.conf creates a target. +// - Each user and group used by a process in the system, creates a target. + +struct target { + char compare[MAX_COMPARE_NAME + 1]; + uint32_t comparehash; + size_t comparelen; + + char id[MAX_NAME + 1]; + uint32_t idhash; + + char name[MAX_NAME + 1]; + + uid_t uid; + gid_t gid; + + kernel_uint_t minflt; + kernel_uint_t cminflt; + kernel_uint_t majflt; + kernel_uint_t cmajflt; + kernel_uint_t utime; + kernel_uint_t stime; + kernel_uint_t gtime; + kernel_uint_t cutime; + kernel_uint_t cstime; + kernel_uint_t cgtime; + kernel_uint_t num_threads; + // kernel_uint_t rss; + + kernel_uint_t status_vmsize; + kernel_uint_t status_vmrss; + kernel_uint_t status_vmshared; + kernel_uint_t status_rssfile; + kernel_uint_t status_rssshmem; + kernel_uint_t status_vmswap; + + kernel_uint_t io_logical_bytes_read; + kernel_uint_t io_logical_bytes_written; + kernel_uint_t io_read_calls; + kernel_uint_t io_write_calls; + kernel_uint_t io_storage_bytes_read; + kernel_uint_t io_storage_bytes_written; + kernel_uint_t io_cancelled_write_bytes; + + int *target_fds; + int target_fds_size; + + struct openfds openfds; + + kernel_uint_t starttime; + kernel_uint_t collected_starttime; + kernel_uint_t uptime_min; + kernel_uint_t uptime_sum; + kernel_uint_t uptime_max; + + unsigned int processes; // how many processes have been merged to this + int exposed; // if set, we have sent this to netdata + int hidden; // if set, we set the hidden flag on the dimension + int debug_enabled; + int ends_with; + int starts_with; // if set, the compare string matches only the + // beginning of the command + + struct pid_on_target *root_pid; // list of aggregated pids for target debugging + + struct target *target; // the one that will be reported to netdata + struct target *next; +}; + +struct target + *apps_groups_default_target = NULL, // the default target + *apps_groups_root_target = NULL, // apps_groups.conf defined + *users_root_target = NULL, // users + *groups_root_target = NULL; // user groups + +size_t + apps_groups_targets_count = 0; // # of apps_groups.conf targets + + +// ---------------------------------------------------------------------------- +// pid_stat +// +// structure to store data for each process running +// see: man proc for the description of the fields + +struct pid_fd { + int fd; + +#ifndef __FreeBSD__ + ino_t inode; + char *filename; + uint32_t link_hash; + size_t cache_iterations_counter; + size_t cache_iterations_reset; +#endif +}; + +struct pid_stat { + int32_t pid; + char comm[MAX_COMPARE_NAME + 1]; + char *cmdline; + + uint32_t log_thrown; + + char state; + int32_t ppid; + // int32_t pgrp; + // int32_t session; + // int32_t tty_nr; + // int32_t tpgid; + // uint64_t flags; + + // these are raw values collected + kernel_uint_t minflt_raw; + kernel_uint_t cminflt_raw; + kernel_uint_t majflt_raw; + kernel_uint_t cmajflt_raw; + kernel_uint_t utime_raw; + kernel_uint_t stime_raw; + kernel_uint_t gtime_raw; // guest_time + kernel_uint_t cutime_raw; + kernel_uint_t cstime_raw; + kernel_uint_t cgtime_raw; // cguest_time + + // these are rates + kernel_uint_t minflt; + kernel_uint_t cminflt; + kernel_uint_t majflt; + kernel_uint_t cmajflt; + kernel_uint_t utime; + kernel_uint_t stime; + kernel_uint_t gtime; + kernel_uint_t cutime; + kernel_uint_t cstime; + kernel_uint_t cgtime; + + // int64_t priority; + // int64_t nice; + int32_t num_threads; + // int64_t itrealvalue; + kernel_uint_t collected_starttime; + // kernel_uint_t vsize; + // kernel_uint_t rss; + // kernel_uint_t rsslim; + // kernel_uint_t starcode; + // kernel_uint_t endcode; + // kernel_uint_t startstack; + // kernel_uint_t kstkesp; + // kernel_uint_t kstkeip; + // uint64_t signal; + // uint64_t blocked; + // uint64_t sigignore; + // uint64_t sigcatch; + // uint64_t wchan; + // uint64_t nswap; + // uint64_t cnswap; + // int32_t exit_signal; + // int32_t processor; + // uint32_t rt_priority; + // uint32_t policy; + // kernel_uint_t delayacct_blkio_ticks; + + uid_t uid; + gid_t gid; + + kernel_uint_t status_vmsize; + kernel_uint_t status_vmrss; + kernel_uint_t status_vmshared; + kernel_uint_t status_rssfile; + kernel_uint_t status_rssshmem; + kernel_uint_t status_vmswap; +#ifndef __FreeBSD__ + ARL_BASE *status_arl; +#endif + + kernel_uint_t io_logical_bytes_read_raw; + kernel_uint_t io_logical_bytes_written_raw; + kernel_uint_t io_read_calls_raw; + kernel_uint_t io_write_calls_raw; + kernel_uint_t io_storage_bytes_read_raw; + kernel_uint_t io_storage_bytes_written_raw; + kernel_uint_t io_cancelled_write_bytes_raw; + + kernel_uint_t io_logical_bytes_read; + kernel_uint_t io_logical_bytes_written; + kernel_uint_t io_read_calls; + kernel_uint_t io_write_calls; + kernel_uint_t io_storage_bytes_read; + kernel_uint_t io_storage_bytes_written; + kernel_uint_t io_cancelled_write_bytes; + + struct pid_fd *fds; // array of fds it uses + size_t fds_size; // the size of the fds array + + struct openfds openfds; + + int children_count; // number of processes directly referencing this + unsigned char keep:1; // 1 when we need to keep this process in memory even after it exited + int keeploops; // increases by 1 every time keep is 1 and updated 0 + unsigned char updated:1; // 1 when the process is currently running + unsigned char merged:1; // 1 when it has been merged to its parent + unsigned char read:1; // 1 when we have already read this process for this iteration + + int sortlist; // higher numbers = top on the process tree + // each process gets a unique number + + struct target *target; // app_groups.conf targets + struct target *user_target; // uid based targets + struct target *group_target; // gid based targets + + usec_t stat_collected_usec; + usec_t last_stat_collected_usec; + + usec_t io_collected_usec; + usec_t last_io_collected_usec; + + kernel_uint_t uptime; + + char *fds_dirname; // the full directory name in /proc/PID/fd + + char *stat_filename; + char *status_filename; + char *io_filename; + char *cmdline_filename; + + struct pid_stat *parent; + struct pid_stat *prev; + struct pid_stat *next; +}; + +size_t pagesize; + +kernel_uint_t global_uptime; + +// log each problem once per process +// log flood protection flags (log_thrown) +#define PID_LOG_IO 0x00000001 +#define PID_LOG_STATUS 0x00000002 +#define PID_LOG_CMDLINE 0x00000004 +#define PID_LOG_FDS 0x00000008 +#define PID_LOG_STAT 0x00000010 + +static struct pid_stat + *root_of_pids = NULL, // global list of all processes running + **all_pids = NULL; // to avoid allocations, we pre-allocate + // a pointer for each pid in the entire pid space. + +static size_t + all_pids_count = 0; // the number of processes running + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +// Another pre-allocated list of all possible pids. +// We need it to pids and assign them a unique sortlist id, so that we +// read parents before children. This is needed to prevent a situation where +// a child is found running, but until we read its parent, it has exited and +// its parent has accumulated its resources. +static pid_t + *all_pids_sortlist = NULL; +#endif + +// ---------------------------------------------------------------------------- +// file descriptor +// +// this is used to keep a global list of all open files of the system. +// it is needed in order to calculate the unique files processes have open. + +#define FILE_DESCRIPTORS_INCREASE_STEP 100 + +// types for struct file_descriptor->type +typedef enum fd_filetype { + FILETYPE_OTHER, + FILETYPE_FILE, + FILETYPE_PIPE, + FILETYPE_SOCKET, + FILETYPE_INOTIFY, + FILETYPE_EVENTFD, + FILETYPE_EVENTPOLL, + FILETYPE_TIMERFD, + FILETYPE_SIGNALFD +} FD_FILETYPE; + +struct file_descriptor { + avl_t avl; + +#ifdef NETDATA_INTERNAL_CHECKS + uint32_t magic; +#endif /* NETDATA_INTERNAL_CHECKS */ + + const char *name; + uint32_t hash; + + FD_FILETYPE type; + int count; + int pos; +} *all_files = NULL; + +static int + all_files_len = 0, + all_files_size = 0; + +long currentmaxfds = 0; + +// ---------------------------------------------------------------------------- +// read users and groups from files + +struct user_or_group_id { + avl_t avl; + + union { + uid_t uid; + gid_t gid; + } id; + + char *name; + + int updated; + + struct user_or_group_id * next; +}; + +enum user_or_group_id_type { + USER_ID, + GROUP_ID +}; + +struct user_or_group_ids{ + enum user_or_group_id_type type; + + avl_tree_type index; + struct user_or_group_id *root; + + char filename[FILENAME_MAX + 1]; +}; + +int user_id_compare(void* a, void* b) { + if(((struct user_or_group_id *)a)->id.uid < ((struct user_or_group_id *)b)->id.uid) + return -1; + + else if(((struct user_or_group_id *)a)->id.uid > ((struct user_or_group_id *)b)->id.uid) + return 1; + + else + return 0; +} + +struct user_or_group_ids all_user_ids = { + .type = USER_ID, + + .index = { + NULL, + user_id_compare + }, + + .root = NULL, + + .filename = "", +}; + +int group_id_compare(void* a, void* b) { + if(((struct user_or_group_id *)a)->id.gid < ((struct user_or_group_id *)b)->id.gid) + return -1; + + else if(((struct user_or_group_id *)a)->id.gid > ((struct user_or_group_id *)b)->id.gid) + return 1; + + else + return 0; +} + +struct user_or_group_ids all_group_ids = { + .type = GROUP_ID, + + .index = { + NULL, + group_id_compare + }, + + .root = NULL, + + .filename = "", +}; + +int file_changed(const struct stat *statbuf, struct timespec *last_modification_time) { + if(likely(statbuf->st_mtim.tv_sec == last_modification_time->tv_sec && + statbuf->st_mtim.tv_nsec == last_modification_time->tv_nsec)) return 0; + + last_modification_time->tv_sec = statbuf->st_mtim.tv_sec; + last_modification_time->tv_nsec = statbuf->st_mtim.tv_nsec; + + return 1; +} + +int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_modification_time) { + struct stat statbuf; + if(unlikely(stat(ids->filename, &statbuf))) + return 1; + else + if(likely(!file_changed(&statbuf, last_modification_time))) return 0; + + procfile *ff = procfile_open(ids->filename, " :\t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 1; + + size_t line, lines = procfile_lines(ff); + + for(line = 0; line < lines ;line++) { + size_t words = procfile_linewords(ff, line); + if(unlikely(words < 3)) continue; + + char *name = procfile_lineword(ff, line, 0); + if(unlikely(!name || !*name)) continue; + + char *id_string = procfile_lineword(ff, line, 2); + if(unlikely(!id_string || !*id_string)) continue; + + + struct user_or_group_id *user_or_group_id = callocz(1, sizeof(struct user_or_group_id)); + + if(ids->type == USER_ID) + user_or_group_id->id.uid = (uid_t)str2ull(id_string); + else + user_or_group_id->id.gid = (uid_t)str2ull(id_string); + + user_or_group_id->name = strdupz(name); + user_or_group_id->updated = 1; + + struct user_or_group_id *existing_user_id = NULL; + + if(likely(ids->root)) + existing_user_id = (struct user_or_group_id *)avl_search(&ids->index, (avl_t *) user_or_group_id); + + if(unlikely(existing_user_id)) { + freez(existing_user_id->name); + existing_user_id->name = user_or_group_id->name; + existing_user_id->updated = 1; + freez(user_or_group_id); + } + else { + if(unlikely(avl_insert(&ids->index, (avl_t *) user_or_group_id) != (void *) user_or_group_id)) { + error("INTERNAL ERROR: duplicate indexing of id during realloc"); + }; + + user_or_group_id->next = ids->root; + ids->root = user_or_group_id; + } + } + + procfile_close(ff); + + // remove unused ids + struct user_or_group_id *user_or_group_id = ids->root, *prev_user_id = NULL; + + while(user_or_group_id) { + if(unlikely(!user_or_group_id->updated)) { + if(unlikely((struct user_or_group_id *)avl_remove(&ids->index, (avl_t *) user_or_group_id) != user_or_group_id)) + error("INTERNAL ERROR: removal of unused id from index, removed a different id"); + + if(prev_user_id) + prev_user_id->next = user_or_group_id->next; + else + ids->root = user_or_group_id->next; + + freez(user_or_group_id->name); + freez(user_or_group_id); + + if(prev_user_id) + user_or_group_id = prev_user_id->next; + else + user_or_group_id = ids->root; + } + else { + user_or_group_id->updated = 0; + + prev_user_id = user_or_group_id; + user_or_group_id = user_or_group_id->next; + } + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// apps_groups.conf +// aggregate all processes in groups, to have a limited number of dimensions + +static struct target *get_users_target(uid_t uid) { + struct target *w; + for(w = users_root_target ; w ; w = w->next) + if(w->uid == uid) return w; + + w = callocz(sizeof(struct target), 1); + snprintfz(w->compare, MAX_COMPARE_NAME, "%u", uid); + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); + + snprintfz(w->id, MAX_NAME, "%u", uid); + w->idhash = simple_hash(w->id); + + struct user_or_group_id user_id_to_find, *user_or_group_id = NULL; + user_id_to_find.id.uid = uid; + + if(*netdata_configured_host_prefix) { + static struct timespec last_passwd_modification_time; + int ret = read_user_or_group_ids(&all_user_ids, &last_passwd_modification_time); + + if(likely(!ret && all_user_ids.index.root)) + user_or_group_id = (struct user_or_group_id *)avl_search(&all_user_ids.index, (avl_t *) &user_id_to_find); + } + + if(user_or_group_id && user_or_group_id->name && *user_or_group_id->name) { + snprintfz(w->name, MAX_NAME, "%s", user_or_group_id->name); + } + else { + struct passwd *pw = getpwuid(uid); + if(!pw || !pw->pw_name || !*pw->pw_name) + snprintfz(w->name, MAX_NAME, "%u", uid); + else + snprintfz(w->name, MAX_NAME, "%s", pw->pw_name); + } + + netdata_fix_chart_name(w->name); + + w->uid = uid; + + w->next = users_root_target; + users_root_target = w; + + debug_log("added uid %u ('%s') target", w->uid, w->name); + + return w; +} + +struct target *get_groups_target(gid_t gid) +{ + struct target *w; + for(w = groups_root_target ; w ; w = w->next) + if(w->gid == gid) return w; + + w = callocz(sizeof(struct target), 1); + snprintfz(w->compare, MAX_COMPARE_NAME, "%u", gid); + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); + + snprintfz(w->id, MAX_NAME, "%u", gid); + w->idhash = simple_hash(w->id); + + struct user_or_group_id group_id_to_find, *group_id = NULL; + group_id_to_find.id.gid = gid; + + if(*netdata_configured_host_prefix) { + static struct timespec last_group_modification_time; + int ret = read_user_or_group_ids(&all_group_ids, &last_group_modification_time); + + if(likely(!ret && all_group_ids.index.root)) + group_id = (struct user_or_group_id *)avl_search(&all_group_ids.index, (avl_t *) &group_id_to_find); + } + + if(group_id && group_id->name && *group_id->name) { + snprintfz(w->name, MAX_NAME, "%s", group_id->name); + } + else { + struct group *gr = getgrgid(gid); + if(!gr || !gr->gr_name || !*gr->gr_name) + snprintfz(w->name, MAX_NAME, "%u", gid); + else + snprintfz(w->name, MAX_NAME, "%s", gr->gr_name); + } + + netdata_fix_chart_name(w->name); + + w->gid = gid; + + w->next = groups_root_target; + groups_root_target = w; + + debug_log("added gid %u ('%s') target", w->gid, w->name); + + return w; +} + +// find or create a new target +// there are targets that are just aggregated to other target (the second argument) +static struct target *get_apps_groups_target(const char *id, struct target *target, const char *name) { + int tdebug = 0, thidden = target?target->hidden:0, ends_with = 0; + const char *nid = id; + + // extract the options + while(nid[0] == '-' || nid[0] == '+' || nid[0] == '*') { + if(nid[0] == '-') thidden = 1; + if(nid[0] == '+') tdebug = 1; + if(nid[0] == '*') ends_with = 1; + nid++; + } + uint32_t hash = simple_hash(id); + + // find if it already exists + struct target *w, *last = apps_groups_root_target; + for(w = apps_groups_root_target ; w ; w = w->next) { + if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0) + return w; + + last = w; + } + + // find an existing target + if(unlikely(!target)) { + while(*name == '-') { + if(*name == '-') thidden = 1; + name++; + } + + for(target = apps_groups_root_target ; target != NULL ; target = target->next) { + if(!target->target && strcmp(name, target->name) == 0) + break; + } + + if(unlikely(debug_enabled)) { + if(unlikely(target)) + debug_log("REUSING TARGET NAME '%s' on ID '%s'", target->name, target->id); + else + debug_log("NEW TARGET NAME '%s' on ID '%s'", name, id); + } + } + + if(target && target->target) + fatal("Internal Error: request to link process '%s' to target '%s' which is linked to target '%s'", id, target->id, target->target->id); + + w = callocz(sizeof(struct target), 1); + strncpyz(w->id, nid, MAX_NAME); + w->idhash = simple_hash(w->id); + + if(unlikely(!target)) + // copy the name + strncpyz(w->name, name, MAX_NAME); + else + // copy the id + strncpyz(w->name, nid, MAX_NAME); + + strncpyz(w->compare, nid, MAX_COMPARE_NAME); + size_t len = strlen(w->compare); + if(w->compare[len - 1] == '*') { + w->compare[len - 1] = '\0'; + w->starts_with = 1; + } + w->ends_with = ends_with; + + if(w->starts_with && w->ends_with) + proc_pid_cmdline_is_needed = 1; + + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); + + w->hidden = thidden; +#ifdef NETDATA_INTERNAL_CHECKS + w->debug_enabled = tdebug; +#else + if(tdebug) + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif + w->target = target; + + // append it, to maintain the order in apps_groups.conf + if(last) last->next = w; + else apps_groups_root_target = w; + + debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s" + , w->id + , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) + , w->target?w->target->name:w->name + , (w->hidden)?"hidden":"-" + , (w->debug_enabled)?"debug":"-" + ); + + return w; +} + +// read the apps_groups.conf file +static int read_apps_groups_conf(const char *path, const char *file) +{ + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", path, file); + + debug_log("process groups file: '%s'", filename); + + // ---------------------------------------- + + procfile *ff = procfile_open(filename, " :\t", PROCFILE_FLAG_DEFAULT); + if(!ff) return 1; + + procfile_set_quotes(ff, "'\""); + + ff = procfile_readall(ff); + if(!ff) + return 1; + + size_t line, lines = procfile_lines(ff); + + for(line = 0; line < lines ;line++) { + size_t word, words = procfile_linewords(ff, line); + if(!words) continue; + + char *name = procfile_lineword(ff, line, 0); + if(!name || !*name) continue; + + // find a possibly existing target + struct target *w = NULL; + + // loop through all words, skipping the first one (the name) + for(word = 0; word < words ;word++) { + char *s = procfile_lineword(ff, line, word); + if(!s || !*s) continue; + if(*s == '#') break; + + // is this the first word? skip it + if(s == name) continue; + + // add this target + struct target *n = get_apps_groups_target(s, w, name); + if(!n) { + error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); + continue; + } + + // just some optimization + // to avoid searching for a target for each process + if(!w) w = n->target?n->target:n; + } + } + + procfile_close(ff); + + apps_groups_default_target = get_apps_groups_target("p+!o@w#e$i^r&7*5(-i)l-o_", NULL, "other"); // match nothing + if(!apps_groups_default_target) + fatal("Cannot create default target"); + + // allow the user to override group 'other' + if(apps_groups_default_target->target) + apps_groups_default_target = apps_groups_default_target->target; + + return 0; +} + + +// ---------------------------------------------------------------------------- +// struct pid_stat management +static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size); + +static inline struct pid_stat *get_pid_entry(pid_t pid) { + if(unlikely(all_pids[pid])) + return all_pids[pid]; + + struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); + p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); + p->fds_size = MAX_SPARE_FDS; + init_pid_fds(p, 0, p->fds_size); + p->pid = pid; + + DOUBLE_LINKED_LIST_APPEND_UNSAFE(root_of_pids, p, prev, next); + + all_pids[pid] = p; + all_pids_count++; + + return p; +} + +static inline void del_pid_entry(pid_t pid) { + struct pid_stat *p = all_pids[pid]; + + if(unlikely(!p)) { + error("attempted to free pid %d that is not allocated.", pid); + return; + } + + debug_log("process %d %s exited, deleting it.", pid, p->comm); + + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(root_of_pids, p, prev, next); + + // free the filename +#ifndef __FreeBSD__ + { + size_t i; + for(i = 0; i < p->fds_size; i++) + if(p->fds[i].filename) + freez(p->fds[i].filename); + } +#endif + freez(p->fds); + + freez(p->fds_dirname); + freez(p->stat_filename); + freez(p->status_filename); +#ifndef __FreeBSD__ + arl_free(p->status_arl); +#endif + freez(p->io_filename); + freez(p->cmdline_filename); + freez(p->cmdline); + freez(p); + + all_pids[pid] = NULL; + all_pids_count--; +} + +// ---------------------------------------------------------------------------- + +static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { + if(unlikely(!status)) { + // error("command failed log %u, errno %d", log, errno); + + if(unlikely(debug_enabled || errno != ENOENT)) { + if(unlikely(debug_enabled || !(p->log_thrown & log))) { + p->log_thrown |= log; + switch(log) { + case PID_LOG_IO: + #ifdef __FreeBSD__ + error("Cannot fetch process %d I/O info (command '%s')", p->pid, p->comm); + #else + error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif + break; + + case PID_LOG_STATUS: + #ifdef __FreeBSD__ + error("Cannot fetch process %d status info (command '%s')", p->pid, p->comm); + #else + error("Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif + break; + + case PID_LOG_CMDLINE: + #ifdef __FreeBSD__ + error("Cannot fetch process %d command line (command '%s')", p->pid, p->comm); + #else + error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif + break; + + case PID_LOG_FDS: + #ifdef __FreeBSD__ + error("Cannot fetch process %d files (command '%s')", p->pid, p->comm); + #else + error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif + break; + + case PID_LOG_STAT: + break; + + default: + error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + break; + } + } + } + errno = 0; + } + else if(unlikely(p->log_thrown & log)) { + // error("unsetting log %u on pid %d", log, p->pid); + p->log_thrown &= ~log; + } + + return status; +} + +static inline void assign_target_to_pid(struct pid_stat *p) { + targets_assignment_counter++; + + uint32_t hash = simple_hash(p->comm); + size_t pclen = strlen(p->comm); + + struct target *w; + for(w = apps_groups_root_target; w ; w = w->next) { + // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); + + // find it - 4 cases: + // 1. the target is not a pattern + // 2. the target has the prefix + // 3. the target has the suffix + // 4. the target is something inside cmdline + + if(unlikely(( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) + || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) + || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) + || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && p->cmdline && strstr(p->cmdline, w->compare)) + ))) { + + if(w->target) p->target = w->target; + else p->target = w; + + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("%s linked to target %s", p->comm, p->target->name); + + break; + } + } +} + + +// ---------------------------------------------------------------------------- +// update pids from proc + +static inline int read_proc_pid_cmdline(struct pid_stat *p) { + static char cmdline[MAX_CMDLINE + 1]; + +#ifdef __FreeBSD__ + size_t i, bytes = MAX_CMDLINE; + int mib[4]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_ARGS; + mib[3] = p->pid; + if (unlikely(sysctl(mib, 4, cmdline, &bytes, NULL, 0))) + goto cleanup; +#else + if(unlikely(!p->cmdline_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); + p->cmdline_filename = strdupz(filename); + } + + int fd = open(p->cmdline_filename, procfile_open_flags, 0666); + if(unlikely(fd == -1)) goto cleanup; + + ssize_t i, bytes = read(fd, cmdline, MAX_CMDLINE); + close(fd); + + if(unlikely(bytes < 0)) goto cleanup; +#endif + + cmdline[bytes] = '\0'; + for(i = 0; i < bytes ; i++) { + if(unlikely(!cmdline[i])) cmdline[i] = ' '; + } + + if(p->cmdline) freez(p->cmdline); + p->cmdline = strdupz(cmdline); + + debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); + + return 1; + +cleanup: + // copy the command to the command line + if(p->cmdline) freez(p->cmdline); + p->cmdline = strdupz(p->comm); + return 0; +} + +// ---------------------------------------------------------------------------- +// macro to calculate the incremental rate of a value +// each parameter is accessed only ONCE - so it is safe to pass function calls +// or other macros as parameters + +#define incremental_rate(rate_variable, last_kernel_variable, new_kernel_value, collected_usec, last_collected_usec) { \ + kernel_uint_t _new_tmp = new_kernel_value; \ + (rate_variable) = (_new_tmp - (last_kernel_variable)) * (USEC_PER_SEC * RATES_DETAIL) / ((collected_usec) - (last_collected_usec)); \ + (last_kernel_variable) = _new_tmp; \ + } + +// the same macro for struct pid members +#define pid_incremental_rate(type, var, value) \ + incremental_rate(var, var##_raw, value, p->type##_collected_usec, p->last_##type##_collected_usec) + + +// ---------------------------------------------------------------------------- + +#ifndef __FreeBSD__ +struct arl_callback_ptr { + struct pid_stat *p; + procfile *ff; + size_t line; +}; + +void arl_callback_status_uid(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; + + //const char *real_uid = procfile_lineword(aptr->ff, aptr->line, 1); + const char *effective_uid = procfile_lineword(aptr->ff, aptr->line, 2); + //const char *saved_uid = procfile_lineword(aptr->ff, aptr->line, 3); + //const char *filesystem_uid = procfile_lineword(aptr->ff, aptr->line, 4); + + if(likely(effective_uid && *effective_uid)) + aptr->p->uid = (uid_t)str2l(effective_uid); +} + +void arl_callback_status_gid(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; + + //const char *real_gid = procfile_lineword(aptr->ff, aptr->line, 1); + const char *effective_gid = procfile_lineword(aptr->ff, aptr->line, 2); + //const char *saved_gid = procfile_lineword(aptr->ff, aptr->line, 3); + //const char *filesystem_gid = procfile_lineword(aptr->ff, aptr->line, 4); + + if(likely(effective_gid && *effective_gid)) + aptr->p->gid = (uid_t)str2l(effective_gid); +} + +void arl_callback_status_vmsize(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_vmsize = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_vmswap(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_vmswap = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_vmrss(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_vmrss = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_rssfile(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_rssfile = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_rssshmem(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_rssshmem = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +static void update_proc_state_count(char proc_state) { + switch (proc_state) { + case 'S': + proc_state_count[PROC_STATUS_SLEEPING] += 1; + break; + case 'R': + proc_state_count[PROC_STATUS_RUNNING] += 1; + break; + case 'D': + proc_state_count[PROC_STATUS_SLEEPING_D] += 1; + break; + case 'Z': + proc_state_count[PROC_STATUS_ZOMBIE] += 1; + break; + case 'T': + proc_state_count[PROC_STATUS_STOPPED] += 1; + break; + default: + break; + } +} +#endif // !__FreeBSD__ + +static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { + p->status_vmsize = 0; + p->status_vmrss = 0; + p->status_vmshared = 0; + p->status_rssfile = 0; + p->status_rssshmem = 0; + p->status_vmswap = 0; + +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + + p->uid = proc_info->ki_uid; + p->gid = proc_info->ki_groups[0]; + p->status_vmsize = proc_info->ki_size / 1024; // in KiB + p->status_vmrss = proc_info->ki_rssize * pagesize / 1024; // in KiB + // TODO: what about shared and swap memory on FreeBSD? + return 1; +#else + (void)ptr; + + static struct arl_callback_ptr arl_ptr; + static procfile *ff = NULL; + + if(unlikely(!p->status_arl)) { + p->status_arl = arl_create("/proc/pid/status", NULL, 60); + arl_expect_custom(p->status_arl, "Uid", arl_callback_status_uid, &arl_ptr); + arl_expect_custom(p->status_arl, "Gid", arl_callback_status_gid, &arl_ptr); + arl_expect_custom(p->status_arl, "VmSize", arl_callback_status_vmsize, &arl_ptr); + arl_expect_custom(p->status_arl, "VmRSS", arl_callback_status_vmrss, &arl_ptr); + arl_expect_custom(p->status_arl, "RssFile", arl_callback_status_rssfile, &arl_ptr); + arl_expect_custom(p->status_arl, "RssShmem", arl_callback_status_rssshmem, &arl_ptr); + arl_expect_custom(p->status_arl, "VmSwap", arl_callback_status_vmswap, &arl_ptr); + } + + + if(unlikely(!p->status_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/status", netdata_configured_host_prefix, p->pid); + p->status_filename = strdupz(filename); + } + + ff = procfile_reopen(ff, p->status_filename, (!ff)?" \t:,-()/":NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) return 0; + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; + + calls_counter++; + + // let ARL use this pid + arl_ptr.p = p; + arl_ptr.ff = ff; + + size_t lines = procfile_lines(ff), l; + arl_begin(p->status_arl); + + for(l = 0; l < lines ;l++) { + // debug_log("CHECK: line %zu of %zu, key '%s' = '%s'", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); + arl_ptr.line = l; + if(unlikely(arl_check(p->status_arl, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; + } + + p->status_vmshared = p->status_rssfile + p->status_rssshmem; + + // debug_log("%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); + + return 1; +#endif +} + + +// ---------------------------------------------------------------------------- + +static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { + (void)ptr; + +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + if (unlikely(proc_info->ki_tdflags & TDF_IDLETD)) + goto cleanup; +#else + static procfile *ff = NULL; + + if(unlikely(!p->stat_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); + p->stat_filename = strdupz(filename); + } + + int set_quotes = (!ff)?1:0; + + ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) goto cleanup; + + // if(set_quotes) procfile_set_quotes(ff, "()"); + if(unlikely(set_quotes)) + procfile_set_open_close(ff, "(", ")"); + + ff = procfile_readall(ff); + if(unlikely(!ff)) goto cleanup; +#endif + + p->last_stat_collected_usec = p->stat_collected_usec; + p->stat_collected_usec = now_monotonic_usec(); + calls_counter++; + +#ifdef __FreeBSD__ + char *comm = proc_info->ki_comm; + p->ppid = proc_info->ki_ppid; +#else + // p->pid = str2pid_t(procfile_lineword(ff, 0, 0)); + char *comm = procfile_lineword(ff, 0, 1); + p->state = *(procfile_lineword(ff, 0, 2)); + p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + // p->pgrp = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4)); + // p->session = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5)); + // p->tty_nr = (int32_t)str2pid_t(procfile_lineword(ff, 0, 6)); + // p->tpgid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7)); + // p->flags = str2uint64_t(procfile_lineword(ff, 0, 8)); +#endif + if(strcmp(p->comm, comm) != 0) { + if(unlikely(debug_enabled)) { + if(p->comm[0]) + debug_log("\tpid %d (%s) changed name to '%s'", p->pid, p->comm, comm); + else + debug_log("\tJust added %d (%s)", p->pid, comm); + } + + strncpyz(p->comm, comm, MAX_COMPARE_NAME); + + // /proc//cmdline + if(likely(proc_pid_cmdline_is_needed)) + managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); + + assign_target_to_pid(p); + } + +#ifdef __FreeBSD__ + pid_incremental_rate(stat, p->minflt, (kernel_uint_t)proc_info->ki_rusage.ru_minflt); + pid_incremental_rate(stat, p->cminflt, (kernel_uint_t)proc_info->ki_rusage_ch.ru_minflt); + pid_incremental_rate(stat, p->majflt, (kernel_uint_t)proc_info->ki_rusage.ru_majflt); + pid_incremental_rate(stat, p->cmajflt, (kernel_uint_t)proc_info->ki_rusage_ch.ru_majflt); + pid_incremental_rate(stat, p->utime, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * 100 + proc_info->ki_rusage.ru_utime.tv_usec / 10000); + pid_incremental_rate(stat, p->stime, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * 100 + proc_info->ki_rusage.ru_stime.tv_usec / 10000); + pid_incremental_rate(stat, p->cutime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_utime.tv_usec / 10000); + pid_incremental_rate(stat, p->cstime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_stime.tv_usec / 10000); + + p->num_threads = proc_info->ki_numthreads; + + if(enable_guest_charts) { + enable_guest_charts = 0; + info("Guest charts aren't supported by FreeBSD"); + } +#else + pid_incremental_rate(stat, p->minflt, str2kernel_uint_t(procfile_lineword(ff, 0, 9))); + pid_incremental_rate(stat, p->cminflt, str2kernel_uint_t(procfile_lineword(ff, 0, 10))); + pid_incremental_rate(stat, p->majflt, str2kernel_uint_t(procfile_lineword(ff, 0, 11))); + pid_incremental_rate(stat, p->cmajflt, str2kernel_uint_t(procfile_lineword(ff, 0, 12))); + pid_incremental_rate(stat, p->utime, str2kernel_uint_t(procfile_lineword(ff, 0, 13))); + pid_incremental_rate(stat, p->stime, str2kernel_uint_t(procfile_lineword(ff, 0, 14))); + pid_incremental_rate(stat, p->cutime, str2kernel_uint_t(procfile_lineword(ff, 0, 15))); + pid_incremental_rate(stat, p->cstime, str2kernel_uint_t(procfile_lineword(ff, 0, 16))); + // p->priority = str2kernel_uint_t(procfile_lineword(ff, 0, 17)); + // p->nice = str2kernel_uint_t(procfile_lineword(ff, 0, 18)); + p->num_threads = (int32_t)str2uint32_t(procfile_lineword(ff, 0, 19)); + // p->itrealvalue = str2kernel_uint_t(procfile_lineword(ff, 0, 20)); + p->collected_starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21)) / system_hz; + p->uptime = (global_uptime > p->collected_starttime)?(global_uptime - p->collected_starttime):0; + // p->vsize = str2kernel_uint_t(procfile_lineword(ff, 0, 22)); + // p->rss = str2kernel_uint_t(procfile_lineword(ff, 0, 23)); + // p->rsslim = str2kernel_uint_t(procfile_lineword(ff, 0, 24)); + // p->starcode = str2kernel_uint_t(procfile_lineword(ff, 0, 25)); + // p->endcode = str2kernel_uint_t(procfile_lineword(ff, 0, 26)); + // p->startstack = str2kernel_uint_t(procfile_lineword(ff, 0, 27)); + // p->kstkesp = str2kernel_uint_t(procfile_lineword(ff, 0, 28)); + // p->kstkeip = str2kernel_uint_t(procfile_lineword(ff, 0, 29)); + // p->signal = str2kernel_uint_t(procfile_lineword(ff, 0, 30)); + // p->blocked = str2kernel_uint_t(procfile_lineword(ff, 0, 31)); + // p->sigignore = str2kernel_uint_t(procfile_lineword(ff, 0, 32)); + // p->sigcatch = str2kernel_uint_t(procfile_lineword(ff, 0, 33)); + // p->wchan = str2kernel_uint_t(procfile_lineword(ff, 0, 34)); + // p->nswap = str2kernel_uint_t(procfile_lineword(ff, 0, 35)); + // p->cnswap = str2kernel_uint_t(procfile_lineword(ff, 0, 36)); + // p->exit_signal = str2kernel_uint_t(procfile_lineword(ff, 0, 37)); + // p->processor = str2kernel_uint_t(procfile_lineword(ff, 0, 38)); + // p->rt_priority = str2kernel_uint_t(procfile_lineword(ff, 0, 39)); + // p->policy = str2kernel_uint_t(procfile_lineword(ff, 0, 40)); + // p->delayacct_blkio_ticks = str2kernel_uint_t(procfile_lineword(ff, 0, 41)); + + if(enable_guest_charts) { + + pid_incremental_rate(stat, p->gtime, str2kernel_uint_t(procfile_lineword(ff, 0, 42))); + pid_incremental_rate(stat, p->cgtime, str2kernel_uint_t(procfile_lineword(ff, 0, 43))); + + if (show_guest_time || p->gtime || p->cgtime) { + p->utime -= (p->utime >= p->gtime) ? p->gtime : p->utime; + p->cutime -= (p->cutime >= p->cgtime) ? p->cgtime : p->cutime; + show_guest_time = 1; + } + } +#endif + + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); + + if(unlikely(global_iterations_counter == 1)) { + p->minflt = 0; + p->cminflt = 0; + p->majflt = 0; + p->cmajflt = 0; + p->utime = 0; + p->stime = 0; + p->gtime = 0; + p->cutime = 0; + p->cstime = 0; + p->cgtime = 0; + } +#ifndef __FreeBSD__ + update_proc_state_count(p->state); +#endif + return 1; + +cleanup: + p->minflt = 0; + p->cminflt = 0; + p->majflt = 0; + p->cmajflt = 0; + p->utime = 0; + p->stime = 0; + p->gtime = 0; + p->cutime = 0; + p->cstime = 0; + p->cgtime = 0; + p->num_threads = 0; + // p->rss = 0; + return 0; +} + +// ---------------------------------------------------------------------------- + +static inline int read_proc_pid_io(struct pid_stat *p, void *ptr) { + (void)ptr; +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; +#else + static procfile *ff = NULL; + + if(unlikely(!p->io_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", netdata_configured_host_prefix, p->pid); + p->io_filename = strdupz(filename); + } + + // open the file + ff = procfile_reopen(ff, p->io_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) goto cleanup; + + ff = procfile_readall(ff); + if(unlikely(!ff)) goto cleanup; +#endif + + calls_counter++; + + p->last_io_collected_usec = p->io_collected_usec; + p->io_collected_usec = now_monotonic_usec(); + +#ifdef __FreeBSD__ + pid_incremental_rate(io, p->io_storage_bytes_read, proc_info->ki_rusage.ru_inblock); + pid_incremental_rate(io, p->io_storage_bytes_written, proc_info->ki_rusage.ru_oublock); +#else + pid_incremental_rate(io, p->io_logical_bytes_read, str2kernel_uint_t(procfile_lineword(ff, 0, 1))); + pid_incremental_rate(io, p->io_logical_bytes_written, str2kernel_uint_t(procfile_lineword(ff, 1, 1))); + pid_incremental_rate(io, p->io_read_calls, str2kernel_uint_t(procfile_lineword(ff, 2, 1))); + pid_incremental_rate(io, p->io_write_calls, str2kernel_uint_t(procfile_lineword(ff, 3, 1))); + pid_incremental_rate(io, p->io_storage_bytes_read, str2kernel_uint_t(procfile_lineword(ff, 4, 1))); + pid_incremental_rate(io, p->io_storage_bytes_written, str2kernel_uint_t(procfile_lineword(ff, 5, 1))); + pid_incremental_rate(io, p->io_cancelled_write_bytes, str2kernel_uint_t(procfile_lineword(ff, 6, 1))); +#endif + + if(unlikely(global_iterations_counter == 1)) { + p->io_logical_bytes_read = 0; + p->io_logical_bytes_written = 0; + p->io_read_calls = 0; + p->io_write_calls = 0; + p->io_storage_bytes_read = 0; + p->io_storage_bytes_written = 0; + p->io_cancelled_write_bytes = 0; + } + + return 1; + +#ifndef __FreeBSD__ +cleanup: + p->io_logical_bytes_read = 0; + p->io_logical_bytes_written = 0; + p->io_read_calls = 0; + p->io_write_calls = 0; + p->io_storage_bytes_read = 0; + p->io_storage_bytes_written = 0; + p->io_cancelled_write_bytes = 0; + return 0; +#endif +} + +#ifndef __FreeBSD__ +static inline int read_global_time() { + static char filename[FILENAME_MAX + 1] = ""; + static procfile *ff = NULL; + static kernel_uint_t utime_raw = 0, stime_raw = 0, gtime_raw = 0, gntime_raw = 0, ntime_raw = 0; + static usec_t collected_usec = 0, last_collected_usec = 0; + + if(unlikely(!ff)) { + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", netdata_configured_host_prefix); + ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) goto cleanup; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) goto cleanup; + + last_collected_usec = collected_usec; + collected_usec = now_monotonic_usec(); + + calls_counter++; + + // temporary - it is added global_ntime; + kernel_uint_t global_ntime = 0; + + incremental_rate(global_utime, utime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 1)), collected_usec, last_collected_usec); + incremental_rate(global_ntime, ntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 2)), collected_usec, last_collected_usec); + incremental_rate(global_stime, stime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 3)), collected_usec, last_collected_usec); + incremental_rate(global_gtime, gtime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 10)), collected_usec, last_collected_usec); + + global_utime += global_ntime; + + if(enable_guest_charts) { + // temporary - it is added global_ntime; + kernel_uint_t global_gntime = 0; + + // guest nice time, on guest time + incremental_rate(global_gntime, gntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 11)), collected_usec, last_collected_usec); + + global_gtime += global_gntime; + + // remove guest time from user time + global_utime -= (global_utime > global_gtime) ? global_gtime : global_utime; + } + + if(unlikely(global_iterations_counter == 1)) { + global_utime = 0; + global_stime = 0; + global_gtime = 0; + } + + return 1; + +cleanup: + global_utime = 0; + global_stime = 0; + global_gtime = 0; + return 0; +} +#else +static inline int read_global_time() { + static kernel_uint_t utime_raw = 0, stime_raw = 0, ntime_raw = 0; + static usec_t collected_usec = 0, last_collected_usec = 0; + long cp_time[CPUSTATES]; + + if (unlikely(CPUSTATES != 5)) { + goto cleanup; + } else { + static int mib[2] = {0, 0}; + + if (unlikely(GETSYSCTL_SIMPLE("kern.cp_time", mib, cp_time))) { + goto cleanup; + } + } + + last_collected_usec = collected_usec; + collected_usec = now_monotonic_usec(); + + calls_counter++; + + // temporary - it is added global_ntime; + kernel_uint_t global_ntime = 0; + + incremental_rate(global_utime, utime_raw, cp_time[0] * 100LLU / system_hz, collected_usec, last_collected_usec); + incremental_rate(global_ntime, ntime_raw, cp_time[1] * 100LLU / system_hz, collected_usec, last_collected_usec); + incremental_rate(global_stime, stime_raw, cp_time[2] * 100LLU / system_hz, collected_usec, last_collected_usec); + + global_utime += global_ntime; + + if(unlikely(global_iterations_counter == 1)) { + global_utime = 0; + global_stime = 0; + global_gtime = 0; + } + + return 1; + +cleanup: + global_utime = 0; + global_stime = 0; + global_gtime = 0; + return 0; +} +#endif /* !__FreeBSD__ */ + +// ---------------------------------------------------------------------------- + +int file_descriptor_compare(void* a, void* b) { +#ifdef NETDATA_INTERNAL_CHECKS + if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE) + error("Corrupted index data detected. Please report this."); +#endif /* NETDATA_INTERNAL_CHECKS */ + + if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash) + return -1; + + else if(((struct file_descriptor *)a)->hash > ((struct file_descriptor *)b)->hash) + return 1; + + else + return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); +} + +// int file_descriptor_iterator(avl_t *a) { if(a) {}; return 0; } + +avl_tree_type all_files_index = { + NULL, + file_descriptor_compare +}; + +static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) { + struct file_descriptor tmp; + tmp.hash = (hash)?hash:simple_hash(name); + tmp.name = name; + tmp.count = 0; + tmp.pos = 0; +#ifdef NETDATA_INTERNAL_CHECKS + tmp.magic = 0x0BADCAFE; +#endif /* NETDATA_INTERNAL_CHECKS */ + + return (struct file_descriptor *)avl_search(&all_files_index, (avl_t *) &tmp); +} + +#define file_descriptor_add(fd) avl_insert(&all_files_index, (avl_t *)(fd)) +#define file_descriptor_remove(fd) avl_remove(&all_files_index, (avl_t *)(fd)) + +// ---------------------------------------------------------------------------- + +static inline void file_descriptor_not_used(int id) +{ + if(id > 0 && id < all_files_size) { + +#ifdef NETDATA_INTERNAL_CHECKS + if(all_files[id].magic != 0x0BADCAFE) { + error("Ignoring request to remove empty file id %d.", id); + return; + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + debug_log("decreasing slot %d (count = %d).", id, all_files[id].count); + + if(all_files[id].count > 0) { + all_files[id].count--; + + if(!all_files[id].count) { + debug_log(" >> slot %d is empty.", id); + + if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id])) + error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); + +#ifdef NETDATA_INTERNAL_CHECKS + all_files[id].magic = 0x00000000; +#endif /* NETDATA_INTERNAL_CHECKS */ + all_files_len--; + } + } + else + error("Request to decrease counter of fd %d (%s), while the use counter is 0", id, all_files[id].name); + } + else error("Request to decrease counter of fd %d, which is outside the array size (1 to %d)", id, all_files_size); +} + +static inline void all_files_grow() { + void *old = all_files; + int i; + + // there is no empty slot + debug_log("extending fd array to %d entries", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); + + all_files = reallocz(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor)); + + // if the address changed, we have to rebuild the index + // since all pointers are now invalid + + if(unlikely(old && old != (void *)all_files)) { + debug_log(" >> re-indexing."); + + all_files_index.root = NULL; + for(i = 0; i < all_files_size; i++) { + if(!all_files[i].count) continue; + if(unlikely(file_descriptor_add(&all_files[i]) != (void *)&all_files[i])) + error("INTERNAL ERROR: duplicate indexing of fd during realloc."); + } + + debug_log(" >> re-indexing done."); + } + + // initialize the newly added entries + + for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) { + all_files[i].count = 0; + all_files[i].name = NULL; +#ifdef NETDATA_INTERNAL_CHECKS + all_files[i].magic = 0x00000000; +#endif /* NETDATA_INTERNAL_CHECKS */ + all_files[i].pos = i; + } + + if(unlikely(!all_files_size)) all_files_len = 1; + all_files_size += FILE_DESCRIPTORS_INCREASE_STEP; +} + +static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, FD_FILETYPE type) { + // check we have enough memory to add it + if(!all_files || all_files_len == all_files_size) + all_files_grow(); + + debug_log(" >> searching for empty slot."); + + // search for an empty slot + + static int last_pos = 0; + int i, c; + for(i = 0, c = last_pos ; i < all_files_size ; i++, c++) { + if(c >= all_files_size) c = 0; + if(c == 0) continue; + + if(!all_files[c].count) { + debug_log(" >> Examining slot %d.", c); + +#ifdef NETDATA_INTERNAL_CHECKS + if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) + error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name); +#endif /* NETDATA_INTERNAL_CHECKS */ + + debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name); + + freez((void *)all_files[c].name); + all_files[c].name = NULL; + last_pos = c; + break; + } + } + + all_files_len++; + + if(i == all_files_size) { + fatal("We should find an empty slot, but there isn't any"); + exit(1); + } + // else we have an empty slot in 'c' + + debug_log(" >> updating slot %d.", c); + + all_files[c].name = strdupz(name); + all_files[c].hash = hash; + all_files[c].type = type; + all_files[c].pos = c; + all_files[c].count = 1; +#ifdef NETDATA_INTERNAL_CHECKS + all_files[c].magic = 0x0BADCAFE; +#endif /* NETDATA_INTERNAL_CHECKS */ + if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c])) + error("INTERNAL ERROR: duplicate indexing of fd."); + + debug_log("using fd position %d (name: %s)", c, all_files[c].name); + + return c; +} + +static inline int file_descriptor_find_or_add(const char *name, uint32_t hash) { + if(unlikely(!hash)) + hash = simple_hash(name); + + debug_log("adding or finding name '%s' with hash %u", name, hash); + + struct file_descriptor *fd = file_descriptor_find(name, hash); + if(fd) { + // found + debug_log(" >> found on slot %d", fd->pos); + + fd->count++; + return fd->pos; + } + // not found + + FD_FILETYPE type; + if(likely(name[0] == '/')) type = FILETYPE_FILE; + else if(likely(strncmp(name, "pipe:", 5) == 0)) type = FILETYPE_PIPE; + else if(likely(strncmp(name, "socket:", 7) == 0)) type = FILETYPE_SOCKET; + else if(likely(strncmp(name, "anon_inode:", 11) == 0)) { + const char *t = &name[11]; + + if(strcmp(t, "inotify") == 0) type = FILETYPE_INOTIFY; + else if(strcmp(t, "[eventfd]") == 0) type = FILETYPE_EVENTFD; + else if(strcmp(t, "[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; + else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; + else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; + else { + debug_log("UNKNOWN anonymous inode: %s", name); + type = FILETYPE_OTHER; + } + } + else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; + else { + debug_log("UNKNOWN linkname: %s", name); + type = FILETYPE_OTHER; + } + + return file_descriptor_set_on_empty_slot(name, hash, type); +} + +static inline void clear_pid_fd(struct pid_fd *pfd) { + pfd->fd = 0; + + #ifndef __FreeBSD__ + pfd->link_hash = 0; + pfd->inode = 0; + pfd->cache_iterations_counter = 0; + pfd->cache_iterations_reset = 0; +#endif +} + +static inline void make_all_pid_fds_negative(struct pid_stat *p) { + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + while(pfd < pfdend) { + pfd->fd = -(pfd->fd); + pfd++; + } +} + +static inline void cleanup_negative_pid_fds(struct pid_stat *p) { + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + + while(pfd < pfdend) { + int fd = pfd->fd; + + if(unlikely(fd < 0)) { + file_descriptor_not_used(-(fd)); + clear_pid_fd(pfd); + } + + pfd++; + } +} + +static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size) { + struct pid_fd *pfd = &p->fds[first], *pfdend = &p->fds[first + size]; + + while(pfd < pfdend) { +#ifndef __FreeBSD__ + pfd->filename = NULL; +#endif + clear_pid_fd(pfd); + pfd++; + } +} + +static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { + (void)ptr; +#ifdef __FreeBSD__ + int mib[4]; + size_t size; + struct kinfo_file *fds; + static char *fdsbuf; + char *bfdsbuf, *efdsbuf; + char fdsname[FILENAME_MAX + 1]; +#define SHM_FORMAT_LEN 31 // format: 21 + size: 10 + char shm_name[FILENAME_MAX - SHM_FORMAT_LEN + 1]; + + // we make all pid fds negative, so that + // we can detect unused file descriptors + // at the end, to free them + make_all_pid_fds_negative(p); + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_FILEDESC; + mib[3] = p->pid; + + if (unlikely(sysctl(mib, 4, NULL, &size, NULL, 0))) { + error("sysctl error: Can't get file descriptors data size for pid %d", p->pid); + return 0; + } + if (likely(size > 0)) + fdsbuf = reallocz(fdsbuf, size); + if (unlikely(sysctl(mib, 4, fdsbuf, &size, NULL, 0))) { + error("sysctl error: Can't get file descriptors data for pid %d", p->pid); + return 0; + } + + bfdsbuf = fdsbuf; + efdsbuf = fdsbuf + size; + while (bfdsbuf < efdsbuf) { + fds = (struct kinfo_file *)(uintptr_t)bfdsbuf; + if (unlikely(fds->kf_structsize == 0)) + break; + + // do not process file descriptors for current working directory, root directory, + // jail directory, ktrace vnode, text vnode and controlling terminal + if (unlikely(fds->kf_fd < 0)) { + bfdsbuf += fds->kf_structsize; + continue; + } + + // get file descriptors array index + size_t fdid = fds->kf_fd; + + // check if the fds array is small + if (unlikely(fdid >= p->fds_size)) { + // it is small, extend it + + debug_log("extending fd memory slots for %s from %d to %d", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); + + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); + + // and initialize it + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = fdid + MAX_SPARE_FDS; + } + + if (unlikely(p->fds[fdid].fd == 0)) { + // we don't know this fd, get it + + switch (fds->kf_type) { + case KF_TYPE_FIFO: + case KF_TYPE_VNODE: + if (unlikely(!fds->kf_path[0])) { + sprintf(fdsname, "other: inode: %lu", fds->kf_un.kf_file.kf_file_fileid); + break; + } + sprintf(fdsname, "%s", fds->kf_path); + break; + case KF_TYPE_SOCKET: + switch (fds->kf_sock_domain) { + case AF_INET: + case AF_INET6: + if (fds->kf_sock_protocol == IPPROTO_TCP) + sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_inpcb); + else + sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_pcb); + break; + case AF_UNIX: + /* print address of pcb and connected pcb */ + sprintf(fdsname, "socket: %lx %lx", fds->kf_un.kf_sock.kf_sock_pcb, fds->kf_un.kf_sock.kf_sock_unpconn); + break; + default: + /* print protocol number and socket address */ +#if __FreeBSD_version < 1200031 + sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_sa_local.__ss_pad1, fds->kf_sa_local.__ss_pad2); +#else + sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sa_local.__ss_pad1, fds->kf_un.kf_sock.kf_sa_local.__ss_pad2); +#endif + } + break; + case KF_TYPE_PIPE: + sprintf(fdsname, "pipe: %lu %lu", fds->kf_un.kf_pipe.kf_pipe_addr, fds->kf_un.kf_pipe.kf_pipe_peer); + break; + case KF_TYPE_PTS: +#if __FreeBSD_version < 1200031 + sprintf(fdsname, "other: pts: %u", fds->kf_un.kf_pts.kf_pts_dev); +#else + sprintf(fdsname, "other: pts: %lu", fds->kf_un.kf_pts.kf_pts_dev); +#endif + break; + case KF_TYPE_SHM: + strncpyz(shm_name, fds->kf_path, FILENAME_MAX - SHM_FORMAT_LEN); + sprintf(fdsname, "other: shm: %s size: %lu", shm_name, fds->kf_un.kf_file.kf_file_size); + break; + case KF_TYPE_SEM: + sprintf(fdsname, "other: sem: %u", fds->kf_un.kf_sem.kf_sem_value); + break; + default: + sprintf(fdsname, "other: pid: %d fd: %d", fds->kf_un.kf_proc.kf_pid, fds->kf_fd); + } + + // if another process already has this, we will get + // the same id + p->fds[fdid].fd = file_descriptor_find_or_add(fdsname, 0); + } + + // else make it positive again, we need it + // of course, the actual file may have changed + + else + p->fds[fdid].fd = -p->fds[fdid].fd; + + bfdsbuf += fds->kf_structsize; + } +#else + if(unlikely(!p->fds_dirname)) { + char dirname[FILENAME_MAX+1]; + snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", netdata_configured_host_prefix, p->pid); + p->fds_dirname = strdupz(dirname); + } + + DIR *fds = opendir(p->fds_dirname); + if(unlikely(!fds)) return 0; + + struct dirent *de; + char linkname[FILENAME_MAX + 1]; + + // we make all pid fds negative, so that + // we can detect unused file descriptors + // at the end, to free them + make_all_pid_fds_negative(p); + + while((de = readdir(fds))) { + // we need only files with numeric names + + if(unlikely(de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + // get its number + int fdid = (int) str2l(de->d_name); + if(unlikely(fdid < 0)) continue; + + // check if the fds array is small + if(unlikely((size_t)fdid >= p->fds_size)) { + // it is small, extend it + + debug_log("extending fd memory slots for %s from %d to %d" + , p->comm + , p->fds_size + , fdid + MAX_SPARE_FDS + ); + + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); + + // and initialize it + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = (size_t)fdid + MAX_SPARE_FDS; + } + + if(unlikely(p->fds[fdid].fd < 0 && de->d_ino != p->fds[fdid].inode)) { + // inodes do not match, clear the previous entry + inodes_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + if(p->fds[fdid].fd < 0 && p->fds[fdid].cache_iterations_counter > 0) { + p->fds[fdid].fd = -p->fds[fdid].fd; + p->fds[fdid].cache_iterations_counter--; + continue; + } + + if(unlikely(!p->fds[fdid].filename)) { + filenames_allocated_counter++; + char fdname[FILENAME_MAX + 1]; + snprintfz(fdname, FILENAME_MAX, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); + p->fds[fdid].filename = strdupz(fdname); + } + + file_counter++; + ssize_t l = readlink(p->fds[fdid].filename, linkname, FILENAME_MAX); + if(unlikely(l == -1)) { + // cannot read the link + + if(debug_enabled || (p->target && p->target->debug_enabled)) + error("Cannot read link %s", p->fds[fdid].filename); + + if(unlikely(p->fds[fdid].fd < 0)) { + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + continue; + } + else + linkname[l] = '\0'; + + uint32_t link_hash = simple_hash(linkname); + + if(unlikely(p->fds[fdid].fd < 0 && p->fds[fdid].link_hash != link_hash)) { + // the link changed + links_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + if(unlikely(p->fds[fdid].fd == 0)) { + // we don't know this fd, get it + + // if another process already has this, we will get + // the same id + p->fds[fdid].fd = file_descriptor_find_or_add(linkname, link_hash); + p->fds[fdid].inode = de->d_ino; + p->fds[fdid].link_hash = link_hash; + } + else { + // else make it positive again, we need it + p->fds[fdid].fd = -p->fds[fdid].fd; + } + + // caching control + // without this we read all the files on every iteration + if(max_fds_cache_seconds > 0) { + size_t spread = ((size_t)max_fds_cache_seconds > 10) ? 10 : (size_t)max_fds_cache_seconds; + + // cache it for a few iterations + size_t max = ((size_t) max_fds_cache_seconds + (fdid % spread)) / (size_t) update_every; + p->fds[fdid].cache_iterations_reset++; + + if(unlikely(p->fds[fdid].cache_iterations_reset % spread == (size_t) fdid % spread)) + p->fds[fdid].cache_iterations_reset++; + + if(unlikely((fdid <= 2 && p->fds[fdid].cache_iterations_reset > 5) || + p->fds[fdid].cache_iterations_reset > max)) { + // for stdin, stdout, stderr (fdid <= 2) we have checked a few times, or if it goes above the max, goto max + p->fds[fdid].cache_iterations_reset = max; + } + + p->fds[fdid].cache_iterations_counter = p->fds[fdid].cache_iterations_reset; + } + } + + closedir(fds); +#endif + cleanup_negative_pid_fds(p); + + return 1; +} + +// ---------------------------------------------------------------------------- + +static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t time) { + char *prefix = "\\_ "; + int indent = 0; + + if(p->parent) + indent = debug_print_process_and_parents(p->parent, p->stat_collected_usec); + else + prefix = " > "; + + char buffer[indent + 1]; + int i; + + for(i = 0; i < indent ;i++) buffer[i] = ' '; + buffer[i] = '\0'; + + fprintf(stderr, " %s %s%s (%d %s %llu" + , buffer + , prefix + , p->comm + , p->pid + , p->updated?"running":"exited" + , p->stat_collected_usec - time + ); + + if(p->utime) fprintf(stderr, " utime=" KERNEL_UINT_FORMAT, p->utime); + if(p->stime) fprintf(stderr, " stime=" KERNEL_UINT_FORMAT, p->stime); + if(p->gtime) fprintf(stderr, " gtime=" KERNEL_UINT_FORMAT, p->gtime); + if(p->cutime) fprintf(stderr, " cutime=" KERNEL_UINT_FORMAT, p->cutime); + if(p->cstime) fprintf(stderr, " cstime=" KERNEL_UINT_FORMAT, p->cstime); + if(p->cgtime) fprintf(stderr, " cgtime=" KERNEL_UINT_FORMAT, p->cgtime); + if(p->minflt) fprintf(stderr, " minflt=" KERNEL_UINT_FORMAT, p->minflt); + if(p->cminflt) fprintf(stderr, " cminflt=" KERNEL_UINT_FORMAT, p->cminflt); + if(p->majflt) fprintf(stderr, " majflt=" KERNEL_UINT_FORMAT, p->majflt); + if(p->cmajflt) fprintf(stderr, " cmajflt=" KERNEL_UINT_FORMAT, p->cmajflt); + fprintf(stderr, ")\n"); + + return indent + 1; +} + +static inline void debug_print_process_tree(struct pid_stat *p, char *msg __maybe_unused) { + debug_log("%s: process %s (%d, %s) with parents:", msg, p->comm, p->pid, p->updated?"running":"exited"); + debug_print_process_and_parents(p, p->stat_collected_usec); +} + +static inline void debug_find_lost_child(struct pid_stat *pe, kernel_uint_t lost, int type) { + int found = 0; + struct pid_stat *p = NULL; + + for(p = root_of_pids; p ; p = p->next) { + if(p == pe) continue; + + switch(type) { + case 1: + if(p->cminflt > lost) { + fprintf(stderr, " > process %d (%s) could use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + found++; + } + break; + + case 2: + if(p->cmajflt > lost) { + fprintf(stderr, " > process %d (%s) could use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + found++; + } + break; + + case 3: + if(p->cutime > lost) { + fprintf(stderr, " > process %d (%s) could use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + found++; + } + break; + + case 4: + if(p->cstime > lost) { + fprintf(stderr, " > process %d (%s) could use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + found++; + } + break; + + case 5: + if(p->cgtime > lost) { + fprintf(stderr, " > process %d (%s) could use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + found++; + } + break; + } + } + + if(!found) { + switch(type) { + case 1: + fprintf(stderr, " > cannot find any process to use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); + break; + + case 2: + fprintf(stderr, " > cannot find any process to use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); + break; + + case 3: + fprintf(stderr, " > cannot find any process to use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); + break; + + case 4: + fprintf(stderr, " > cannot find any process to use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); + break; + + case 5: + fprintf(stderr, " > cannot find any process to use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); + break; + } + } +} + +static inline kernel_uint_t remove_exited_child_from_parent(kernel_uint_t *field, kernel_uint_t *pfield) { + kernel_uint_t absorbed = 0; + + if(*field > *pfield) { + absorbed += *pfield; + *field -= *pfield; + *pfield = 0; + } + else { + absorbed += *field; + *pfield -= *field; + *field = 0; + } + + return absorbed; +} + +static inline void process_exited_processes() { + struct pid_stat *p; + + for(p = root_of_pids; p ; p = p->next) { + if(p->updated || !p->stat_collected_usec) + continue; + + kernel_uint_t utime = (p->utime_raw + p->cutime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t stime = (p->stime_raw + p->cstime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t gtime = (p->gtime_raw + p->cgtime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t minflt = (p->minflt_raw + p->cminflt_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t majflt = (p->majflt_raw + p->cmajflt_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + + if(utime + stime + gtime + minflt + majflt == 0) + continue; + + if(unlikely(debug_enabled)) { + debug_log("Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" + , p->comm + , p->pid + , p->updated?"running":"exited" + , utime + , stime + , gtime + , minflt + , majflt + ); + debug_print_process_tree(p, "Searching parents"); + } + + struct pid_stat *pp; + for(pp = p->parent; pp ; pp = pp->parent) { + if(!pp->updated) continue; + + kernel_uint_t absorbed; + absorbed = remove_exited_child_from_parent(&utime, &pp->cutime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); + + absorbed = remove_exited_child_from_parent(&stime, &pp->cstime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); + + absorbed = remove_exited_child_from_parent(>ime, &pp->cgtime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); + + absorbed = remove_exited_child_from_parent(&minflt, &pp->cminflt); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); + + absorbed = remove_exited_child_from_parent(&majflt, &pp->cmajflt); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); + } + + if(unlikely(utime + stime + gtime + minflt + majflt > 0)) { + if(unlikely(debug_enabled)) { + if(utime) debug_find_lost_child(p, utime, 3); + if(stime) debug_find_lost_child(p, stime, 4); + if(gtime) debug_find_lost_child(p, gtime, 5); + if(minflt) debug_find_lost_child(p, minflt, 1); + if(majflt) debug_find_lost_child(p, majflt, 2); + } + + p->keep = 1; + + debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" + , p->comm + , p->pid + , p->updated?"running":"exited" + , utime + , stime + , gtime + , minflt + , majflt + ); + + for(pp = p->parent; pp ; pp = pp->parent) { + if(pp->updated) break; + pp->keep = 1; + + debug_log(" > - KEEP - parent for another loop: %s (%d %s)" + , pp->comm + , pp->pid + , pp->updated?"running":"exited" + ); + } + + p->utime_raw = utime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); + p->stime_raw = stime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); + p->gtime_raw = gtime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); + p->minflt_raw = minflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); + p->majflt_raw = majflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); + p->cutime_raw = p->cstime_raw = p->cgtime_raw = p->cminflt_raw = p->cmajflt_raw = 0; + + debug_log(" "); + } + else + debug_log(" > totally absorbed - DONE - %s (%d %s)" + , p->comm + , p->pid + , p->updated?"running":"exited" + ); + } +} + +static inline void link_all_processes_to_their_parents(void) { + struct pid_stat *p, *pp; + + // link all children to their parents + // and update children count on parents + for(p = root_of_pids; p ; p = p->next) { + // for each process found + + p->sortlist = 0; + p->parent = NULL; + + if(unlikely(!p->ppid)) { + //unnecessary code from apps_plugin.c + //p->parent = NULL; + continue; + } + + pp = all_pids[p->ppid]; + if(likely(pp)) { + p->parent = pp; + pp->children_count++; + + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("child %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); + } + else { + p->parent = NULL; + error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); + } + } +} + +// ---------------------------------------------------------------------------- + +// 1. read all files in /proc +// 2. for each numeric directory: +// i. read /proc/pid/stat +// ii. read /proc/pid/status +// iii. read /proc/pid/io (requires root access) +// iii. read the entries in directory /proc/pid/fd (requires root access) +// for each entry: +// a. find or create a struct file_descriptor +// b. cleanup any old/unused file_descriptors + +// after all these, some pids may be linked to targets, while others may not + +// in case of errors, only 1 every 1000 errors is printed +// to avoid filling up all disk space +// if debug is enabled, all errors are printed + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +static int compar_pid(const void *pid1, const void *pid2) { + + struct pid_stat *p1 = all_pids[*((pid_t *)pid1)]; + struct pid_stat *p2 = all_pids[*((pid_t *)pid2)]; + + if(p1->sortlist > p2->sortlist) + return -1; + else + return 1; +} +#endif + +static inline int collect_data_for_pid(pid_t pid, void *ptr) { + if(unlikely(pid < 0 || pid > pid_max)) { + error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); + return 0; + } + + struct pid_stat *p = get_pid_entry(pid); + if(unlikely(!p || p->read)) return 0; + p->read = 1; + + // debug_log("Reading process %d (%s), sortlist %d", p->pid, p->comm, p->sortlist); + + // -------------------------------------------------------------------- + // /proc//stat + + if(unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p, ptr)))) + // there is no reason to proceed if we cannot get its status + return 0; + + // check its parent pid + if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { + error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + p->ppid = 0; + } + + // -------------------------------------------------------------------- + // /proc//io + + managed_log(p, PID_LOG_IO, read_proc_pid_io(p, ptr)); + + // -------------------------------------------------------------------- + // /proc//status + + if(unlikely(!managed_log(p, PID_LOG_STATUS, read_proc_pid_status(p, ptr)))) + // there is no reason to proceed if we cannot get its status + return 0; + + // -------------------------------------------------------------------- + // /proc//fd + + if(enable_file_charts) + managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p, ptr)); + + // -------------------------------------------------------------------- + // done! + + if(unlikely(debug_enabled && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)) + debug_log("Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); + + // mark it as updated + p->updated = 1; + p->keep = 0; + p->keeploops = 0; + + return 1; +} + +static int collect_data_for_all_processes(void) { + struct pid_stat *p = NULL; + +#ifndef __FreeBSD__ + // clear process state counter + memset(proc_state_count, 0, sizeof proc_state_count); +#else + int i, procnum; + + static size_t procbase_size = 0; + static struct kinfo_proc *procbase = NULL; + + size_t new_procbase_size; + + int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_PROC }; + if (unlikely(sysctl(mib, 3, NULL, &new_procbase_size, NULL, 0))) { + error("sysctl error: Can't get processes data size"); + return 0; + } + + // give it some air for processes that may be started + // during this little time. + new_procbase_size += 100 * sizeof(struct kinfo_proc); + + // increase the buffer if needed + if(new_procbase_size > procbase_size) { + procbase_size = new_procbase_size; + procbase = reallocz(procbase, procbase_size); + } + + // sysctl() gets from new_procbase_size the buffer size + // and also returns to it the amount of data filled in + new_procbase_size = procbase_size; + + // get the processes from the system + if (unlikely(sysctl(mib, 3, procbase, &new_procbase_size, NULL, 0))) { + error("sysctl error: Can't get processes data"); + return 0; + } + + // based on the amount of data filled in + // calculate the number of processes we got + procnum = new_procbase_size / sizeof(struct kinfo_proc); + +#endif + + if(all_pids_count) { +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + size_t slc = 0; +#endif + for(p = root_of_pids; p ; p = p->next) { + p->read = 0; // mark it as not read, so that collect_data_for_pid() will read it + p->updated = 0; + p->merged = 0; + p->children_count = 0; + p->parent = NULL; + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + all_pids_sortlist[slc++] = p->pid; +#endif + } + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + if(unlikely(slc != all_pids_count)) { + error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are %zu.", all_pids_count, slc); + all_pids_count = slc; + } + + if(include_exited_childs) { + // Read parents before childs + // This is needed to prevent a situation where + // a child is found running, but until we read + // its parent, it has exited and its parent + // has accumulated its resources. + + qsort((void *)all_pids_sortlist, (size_t)all_pids_count, sizeof(pid_t), compar_pid); + + // we forward read all running processes + // collect_data_for_pid() is smart enough, + // not to read the same pid twice per iteration + for(slc = 0; slc < all_pids_count; slc++) { + collect_data_for_pid(all_pids_sortlist[slc], NULL); + } + } +#endif + } + +#ifdef __FreeBSD__ + for (i = 0 ; i < procnum ; ++i) { + pid_t pid = procbase[i].ki_pid; + collect_data_for_pid(pid, &procbase[i]); + } +#else + static char uptime_filename[FILENAME_MAX + 1] = ""; + if(*uptime_filename == '\0') + snprintfz(uptime_filename, FILENAME_MAX, "%s/proc/uptime", netdata_configured_host_prefix); + + global_uptime = (kernel_uint_t)(uptime_msec(uptime_filename) / MSEC_PER_SEC); + + char dirname[FILENAME_MAX + 1]; + + snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); + DIR *dir = opendir(dirname); + if(!dir) return 0; + + struct dirent *de = NULL; + + while((de = readdir(dir))) { + char *endptr = de->d_name; + + if(unlikely(de->d_type != DT_DIR || de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + pid_t pid = (pid_t) strtoul(de->d_name, &endptr, 10); + + // make sure we read a valid number + if(unlikely(endptr == de->d_name || *endptr != '\0')) + continue; + + collect_data_for_pid(pid, NULL); + } + closedir(dir); +#endif + + if(!all_pids_count) + return 0; + + // we need /proc/stat to normalize the cpu consumption of the exited childs + read_global_time(); + + // build the process tree + link_all_processes_to_their_parents(); + + // normally this is done + // however we may have processes exited while we collected values + // so let's find the exited ones + // we do this by collecting the ownership of process + // if we manage to get the ownership, the process still runs + process_exited_processes(); + return 1; +} + +// ---------------------------------------------------------------------------- +// update statistics on the targets + +// 1. link all childs to their parents +// 2. go from bottom to top, marking as merged all childs to their parents +// this step links all parents without a target to the child target, if any +// 3. link all top level processes (the ones not merged) to the default target +// 4. go from top to bottom, linking all childs without a target, to their parent target +// after this step, all processes have a target +// [5. for each killed pid (updated = 0), remove its usage from its target] +// 6. zero all apps_groups_targets +// 7. concentrate all values on the apps_groups_targets +// 8. remove all killed processes +// 9. find the unique file count for each target +// check: update_apps_groups_statistics() + +static void cleanup_exited_pids(void) { + size_t c; + struct pid_stat *p = NULL; + + for(p = root_of_pids; p ;) { + if(!p->updated && (!p->keep || p->keeploops > 0)) { + if(unlikely(debug_enabled && (p->keep || p->keeploops))) + debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); + + for(c = 0; c < p->fds_size; c++) + if(p->fds[c].fd > 0) { + file_descriptor_not_used(p->fds[c].fd); + clear_pid_fd(&p->fds[c]); + } + + pid_t r = p->pid; + p = p->next; + del_pid_entry(r); + } + else { + if(unlikely(p->keep)) p->keeploops++; + p->keep = 0; + p = p->next; + } + } +} + +static void apply_apps_groups_targets_inheritance(void) { + struct pid_stat *p = NULL; + + // children that do not have a target + // inherit their target from their parent + int found = 1, loops = 0; + while(found) { + if(unlikely(debug_enabled)) loops++; + found = 0; + for(p = root_of_pids; p ; p = p->next) { + // if this process does not have a target + // and it has a parent + // and its parent has a target + // then, set the parent's target to this process + if(unlikely(!p->target && p->parent && p->parent->target)) { + p->target = p->parent->target; + found++; + + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + } + } + } + + // find all the procs with 0 childs and merge them to their parents + // repeat, until nothing more can be done. + int sortlist = 1; + found = 1; + while(found) { + if(unlikely(debug_enabled)) loops++; + found = 0; + + for(p = root_of_pids; p ; p = p->next) { + if(unlikely(!p->sortlist && !p->children_count)) + p->sortlist = sortlist++; + + if(unlikely( + !p->children_count // if this process does not have any children + && !p->merged // and is not already merged + && p->parent // and has a parent + && p->parent->children_count // and its parent has children + // and the target of this process and its parent is the same, + // or the parent does not have a target + && (p->target == p->parent->target || !p->parent->target) + && p->ppid != INIT_PID // and its parent is not init + )) { + // mark it as merged + p->parent->children_count--; + p->merged = 1; + + // the parent inherits the child's target, if it does not have a target itself + if(unlikely(p->target && !p->parent->target)) { + p->parent->target = p->target; + + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); + } + + found++; + } + } + + debug_log("TARGET INHERITANCE: merged %d processes", found); + } + + // init goes always to default target + if(all_pids[INIT_PID]) + all_pids[INIT_PID]->target = apps_groups_default_target; + + // pid 0 goes always to default target + if(all_pids[0]) + all_pids[0]->target = apps_groups_default_target; + + // give a default target on all top level processes + if(unlikely(debug_enabled)) loops++; + for(p = root_of_pids; p ; p = p->next) { + // if the process is not merged itself + // then is is a top level process + if(unlikely(!p->merged && !p->target)) + p->target = apps_groups_default_target; + + // make sure all processes have a sortlist + if(unlikely(!p->sortlist)) + p->sortlist = sortlist++; + } + + if(all_pids[1]) + all_pids[1]->sortlist = sortlist++; + + // give a target to all merged child processes + found = 1; + while(found) { + if(unlikely(debug_enabled)) loops++; + found = 0; + for(p = root_of_pids; p ; p = p->next) { + if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) { + p->target = p->parent->target; + found++; + + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + } + } + } + + debug_log("apply_apps_groups_targets_inheritance() made %d loops on the process tree", loops); +} + +static size_t zero_all_targets(struct target *root) { + struct target *w; + size_t count = 0; + + for (w = root; w ; w = w->next) { + count++; + + w->minflt = 0; + w->majflt = 0; + w->utime = 0; + w->stime = 0; + w->gtime = 0; + w->cminflt = 0; + w->cmajflt = 0; + w->cutime = 0; + w->cstime = 0; + w->cgtime = 0; + w->num_threads = 0; + // w->rss = 0; + w->processes = 0; + + w->status_vmsize = 0; + w->status_vmrss = 0; + w->status_vmshared = 0; + w->status_rssfile = 0; + w->status_rssshmem = 0; + w->status_vmswap = 0; + + w->io_logical_bytes_read = 0; + w->io_logical_bytes_written = 0; + w->io_read_calls = 0; + w->io_write_calls = 0; + w->io_storage_bytes_read = 0; + w->io_storage_bytes_written = 0; + w->io_cancelled_write_bytes = 0; + + // zero file counters + if(w->target_fds) { + memset(w->target_fds, 0, sizeof(int) * w->target_fds_size); + w->openfds.files = 0; + w->openfds.pipes = 0; + w->openfds.sockets = 0; + w->openfds.inotifies = 0; + w->openfds.eventfds = 0; + w->openfds.timerfds = 0; + w->openfds.signalfds = 0; + w->openfds.eventpolls = 0; + w->openfds.other = 0; + } + + w->collected_starttime = 0; + w->uptime_min = 0; + w->uptime_sum = 0; + w->uptime_max = 0; + + if(unlikely(w->root_pid)) { + struct pid_on_target *pid_on_target_to_free, *pid_on_target = w->root_pid; + + while(pid_on_target) { + pid_on_target_to_free = pid_on_target; + pid_on_target = pid_on_target->next; + freez(pid_on_target_to_free); + } + + w->root_pid = NULL; + } + } + + return count; +} + +static inline void reallocate_target_fds(struct target *w) { + if(unlikely(!w)) + return; + + if(unlikely(!w->target_fds || w->target_fds_size < all_files_size)) { + w->target_fds = reallocz(w->target_fds, sizeof(int) * all_files_size); + memset(&w->target_fds[w->target_fds_size], 0, sizeof(int) * (all_files_size - w->target_fds_size)); + w->target_fds_size = all_files_size; + } +} + +static void aggregage_fd_type_on_openfds(FD_FILETYPE type, struct openfds *openfds) { + switch(type) { + case FILETYPE_FILE: + openfds->files++; + break; + + case FILETYPE_PIPE: + openfds->pipes++; + break; + + case FILETYPE_SOCKET: + openfds->sockets++; + break; + + case FILETYPE_INOTIFY: + openfds->inotifies++; + break; + + case FILETYPE_EVENTFD: + openfds->eventfds++; + break; + + case FILETYPE_TIMERFD: + openfds->timerfds++; + break; + + case FILETYPE_SIGNALFD: + openfds->signalfds++; + break; + + case FILETYPE_EVENTPOLL: + openfds->eventpolls++; + break; + + case FILETYPE_OTHER: + openfds->other++; + break; + } +} + +static inline void aggregate_fd_on_target(int fd, struct target *w) { + if(unlikely(!w)) + return; + + if(unlikely(w->target_fds[fd])) { + // it is already aggregated + // just increase its usage counter + w->target_fds[fd]++; + return; + } + + // increase its usage counter + // so that we will not add it again + w->target_fds[fd]++; + + aggregage_fd_type_on_openfds(all_files[fd].type, &w->openfds); +} + +static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) { + + if(unlikely(!p->updated)) { + // the process is not running + return; + } + + struct target *w = p->target, *u = p->user_target, *g = p->group_target; + + reallocate_target_fds(w); + reallocate_target_fds(u); + reallocate_target_fds(g); + + p->openfds.files = 0; + p->openfds.pipes = 0; + p->openfds.sockets = 0; + p->openfds.inotifies = 0; + p->openfds.eventfds = 0; + p->openfds.timerfds = 0; + p->openfds.signalfds = 0; + p->openfds.eventpolls = 0; + p->openfds.other = 0; + + long currentfds = 0; + size_t c, size = p->fds_size; + struct pid_fd *fds = p->fds; + for(c = 0; c < size ;c++) { + int fd = fds[c].fd; + + if(likely(fd <= 0 || fd >= all_files_size)) + continue; + + currentfds++; + aggregage_fd_type_on_openfds(all_files[fd].type, &p->openfds); + + aggregate_fd_on_target(fd, w); + aggregate_fd_on_target(fd, u); + aggregate_fd_on_target(fd, g); + } + + if (currentfds >= currentmaxfds) + currentmaxfds = currentfds; +} + +static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) { + (void)o; + + if(unlikely(!p->updated)) { + // the process is not running + return; + } + + if(unlikely(!w)) { + error("pid %d %s was left without a target!", p->pid, p->comm); + return; + } + + w->cutime += p->cutime; + w->cstime += p->cstime; + w->cgtime += p->cgtime; + w->cminflt += p->cminflt; + w->cmajflt += p->cmajflt; + + w->utime += p->utime; + w->stime += p->stime; + w->gtime += p->gtime; + w->minflt += p->minflt; + w->majflt += p->majflt; + + // w->rss += p->rss; + + w->status_vmsize += p->status_vmsize; + w->status_vmrss += p->status_vmrss; + w->status_vmshared += p->status_vmshared; + w->status_rssfile += p->status_rssfile; + w->status_rssshmem += p->status_rssshmem; + w->status_vmswap += p->status_vmswap; + + w->io_logical_bytes_read += p->io_logical_bytes_read; + w->io_logical_bytes_written += p->io_logical_bytes_written; + w->io_read_calls += p->io_read_calls; + w->io_write_calls += p->io_write_calls; + w->io_storage_bytes_read += p->io_storage_bytes_read; + w->io_storage_bytes_written += p->io_storage_bytes_written; + w->io_cancelled_write_bytes += p->io_cancelled_write_bytes; + + w->processes++; + w->num_threads += p->num_threads; + + if(!w->collected_starttime || p->collected_starttime < w->collected_starttime) w->collected_starttime = p->collected_starttime; + if(!w->uptime_min || p->uptime < w->uptime_min) w->uptime_min = p->uptime; + w->uptime_sum += p->uptime; + if(!w->uptime_max || w->uptime_max < p->uptime) w->uptime_max = p->uptime; + + if(unlikely(debug_enabled || w->debug_enabled)) { + debug_log_int("aggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + + struct pid_on_target *pid_on_target = mallocz(sizeof(struct pid_on_target)); + pid_on_target->pid = p->pid; + pid_on_target->next = w->root_pid; + w->root_pid = pid_on_target; + } +} + +static inline void post_aggregate_targets(struct target *root) { + struct target *w; + for (w = root; w ; w = w->next) { + if(w->collected_starttime) { + if (!w->starttime || w->collected_starttime < w->starttime) { + w->starttime = w->collected_starttime; + } + } else { + w->starttime = 0; + } + } +} + +static void calculate_netdata_statistics(void) { + + apply_apps_groups_targets_inheritance(); + + zero_all_targets(users_root_target); + zero_all_targets(groups_root_target); + apps_groups_targets_count = zero_all_targets(apps_groups_root_target); + + // this has to be done, before the cleanup + struct pid_stat *p = NULL; + struct target *w = NULL, *o = NULL; + + // concentrate everything on the targets + for(p = root_of_pids; p ; p = p->next) { + + // -------------------------------------------------------------------- + // apps_groups target + + aggregate_pid_on_target(p->target, p, NULL); + + + // -------------------------------------------------------------------- + // user target + + o = p->user_target; + if(likely(p->user_target && p->user_target->uid == p->uid)) + w = p->user_target; + else { + if(unlikely(debug_enabled && p->user_target)) + debug_log("pid %d (%s) switched user from %u (%s) to %u.", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); + + w = p->user_target = get_users_target(p->uid); + } + + aggregate_pid_on_target(w, p, o); + + + // -------------------------------------------------------------------- + // user group target + + o = p->group_target; + if(likely(p->group_target && p->group_target->gid == p->gid)) + w = p->group_target; + else { + if(unlikely(debug_enabled && p->group_target)) + debug_log("pid %d (%s) switched group from %u (%s) to %u.", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); + + w = p->group_target = get_groups_target(p->gid); + } + + aggregate_pid_on_target(w, p, o); + + + // -------------------------------------------------------------------- + // aggregate all file descriptors + + if(enable_file_charts) + aggregate_pid_fds_on_targets(p); + } + + post_aggregate_targets(apps_groups_root_target); + post_aggregate_targets(users_root_target); + post_aggregate_targets(groups_root_target); + + cleanup_exited_pids(); +} + +// ---------------------------------------------------------------------------- +// update chart dimensions + +static inline void send_BEGIN(const char *type, const char *id, usec_t usec) { + fprintf(stdout, "BEGIN %s.%s %llu\n", type, id, usec); +} + +static inline void send_SET(const char *name, kernel_uint_t value) { + fprintf(stdout, "SET %s = " KERNEL_UINT_FORMAT "\n", name, value); +} + +static inline void send_END(void) { + fprintf(stdout, "END\n"); +} + +void send_resource_usage_to_netdata(usec_t dt) { + static struct timeval last = { 0, 0 }; + static struct rusage me_last; + + struct timeval now; + struct rusage me; + + usec_t cpuuser; + usec_t cpusyst; + + if(!last.tv_sec) { + now_monotonic_timeval(&last); + getrusage(RUSAGE_SELF, &me_last); + + cpuuser = 0; + cpusyst = 0; + } + else { + now_monotonic_timeval(&now); + getrusage(RUSAGE_SELF, &me); + + cpuuser = me.ru_utime.tv_sec * USEC_PER_SEC + me.ru_utime.tv_usec; + cpusyst = me.ru_stime.tv_sec * USEC_PER_SEC + me.ru_stime.tv_usec; + + memmove(&last, &now, sizeof(struct timeval)); + memmove(&me_last, &me, sizeof(struct rusage)); + } + + static char created_charts = 0; + if(unlikely(!created_charts)) { + created_charts = 1; + + fprintf(stdout, + "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n" + "DIMENSION user '' incremental 1 1000\n" + "DIMENSION system '' incremental 1 1000\n" + "CHART netdata.apps_sizes '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_sizes line 140001 %1$d\n" + "DIMENSION calls '' incremental 1 1\n" + "DIMENSION files '' incremental 1 1\n" + "DIMENSION filenames '' incremental 1 1\n" + "DIMENSION inode_changes '' incremental 1 1\n" + "DIMENSION link_changes '' incremental 1 1\n" + "DIMENSION pids '' absolute 1 1\n" + "DIMENSION fds '' absolute 1 1\n" + "DIMENSION targets '' absolute 1 1\n" + "DIMENSION new_pids 'new pids' incremental 1 1\n" + , update_every + ); + + fprintf(stdout, + "CHART netdata.apps_fix '' 'Apps Plugin Normalization Ratios' 'percentage' apps.plugin netdata.apps_fix line 140002 %1$d\n" + "DIMENSION utime '' absolute 1 %2$llu\n" + "DIMENSION stime '' absolute 1 %2$llu\n" + "DIMENSION gtime '' absolute 1 %2$llu\n" + "DIMENSION minflt '' absolute 1 %2$llu\n" + "DIMENSION majflt '' absolute 1 %2$llu\n" + , update_every + , RATES_DETAIL + ); + + if(include_exited_childs) + fprintf(stdout, + "CHART netdata.apps_children_fix '' 'Apps Plugin Exited Children Normalization Ratios' 'percentage' apps.plugin netdata.apps_children_fix line 140003 %1$d\n" + "DIMENSION cutime '' absolute 1 %2$llu\n" + "DIMENSION cstime '' absolute 1 %2$llu\n" + "DIMENSION cgtime '' absolute 1 %2$llu\n" + "DIMENSION cminflt '' absolute 1 %2$llu\n" + "DIMENSION cmajflt '' absolute 1 %2$llu\n" + , update_every + , RATES_DETAIL + ); + + } + + fprintf(stdout, + "BEGIN netdata.apps_cpu %llu\n" + "SET user = %llu\n" + "SET system = %llu\n" + "END\n" + "BEGIN netdata.apps_sizes %llu\n" + "SET calls = %zu\n" + "SET files = %zu\n" + "SET filenames = %zu\n" + "SET inode_changes = %zu\n" + "SET link_changes = %zu\n" + "SET pids = %zu\n" + "SET fds = %d\n" + "SET targets = %zu\n" + "SET new_pids = %zu\n" + "END\n" + , dt + , cpuuser + , cpusyst + , dt + , calls_counter + , file_counter + , filenames_allocated_counter + , inodes_changed_counter + , links_changed_counter + , all_pids_count + , all_files_len + , apps_groups_targets_count + , targets_assignment_counter + ); + + fprintf(stdout, + "BEGIN netdata.apps_fix %llu\n" + "SET utime = %u\n" + "SET stime = %u\n" + "SET gtime = %u\n" + "SET minflt = %u\n" + "SET majflt = %u\n" + "END\n" + , dt + , (unsigned int)(utime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(stime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(gtime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(minflt_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(majflt_fix_ratio * 100 * RATES_DETAIL) + ); + + if(include_exited_childs) + fprintf(stdout, + "BEGIN netdata.apps_children_fix %llu\n" + "SET cutime = %u\n" + "SET cstime = %u\n" + "SET cgtime = %u\n" + "SET cminflt = %u\n" + "SET cmajflt = %u\n" + "END\n" + , dt + , (unsigned int)(cutime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cstime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cgtime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cminflt_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cmajflt_fix_ratio * 100 * RATES_DETAIL) + ); +} + +static void normalize_utilization(struct target *root) { + struct target *w; + + // childs processing introduces spikes + // here we try to eliminate them by disabling childs processing either for specific dimensions + // or entirely. Of course, either way, we disable it just a single iteration. + + kernel_uint_t max_time = processors * time_factor * RATES_DETAIL; + kernel_uint_t utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0; + + if(global_utime > max_time) global_utime = max_time; + if(global_stime > max_time) global_stime = max_time; + if(global_gtime > max_time) global_gtime = max_time; + + for(w = root; w ; w = w->next) { + if(w->target || (!w->processes && !w->exposed)) continue; + + utime += w->utime; + stime += w->stime; + gtime += w->gtime; + cutime += w->cutime; + cstime += w->cstime; + cgtime += w->cgtime; + + minflt += w->minflt; + majflt += w->majflt; + cminflt += w->cminflt; + cmajflt += w->cmajflt; + } + + if(global_utime || global_stime || global_gtime) { + if(global_utime + global_stime + global_gtime > utime + cutime + stime + cstime + gtime + cgtime) { + // everything we collected fits + utime_fix_ratio = + stime_fix_ratio = + gtime_fix_ratio = + cutime_fix_ratio = + cstime_fix_ratio = + cgtime_fix_ratio = 1.0; //(NETDATA_DOUBLE)(global_utime + global_stime) / (NETDATA_DOUBLE)(utime + cutime + stime + cstime); + } + else if((global_utime + global_stime > utime + stime) && (cutime || cstime)) { + // children resources are too high + // lower only the children resources + utime_fix_ratio = + stime_fix_ratio = + gtime_fix_ratio = 1.0; + cutime_fix_ratio = + cstime_fix_ratio = + cgtime_fix_ratio = (NETDATA_DOUBLE)((global_utime + global_stime) - (utime + stime)) / (NETDATA_DOUBLE)(cutime + cstime); + } + else if(utime || stime) { + // even running processes are unrealistic + // zero the children resources + // lower the running processes resources + utime_fix_ratio = + stime_fix_ratio = + gtime_fix_ratio = (NETDATA_DOUBLE)(global_utime + global_stime) / (NETDATA_DOUBLE)(utime + stime); + cutime_fix_ratio = + cstime_fix_ratio = + cgtime_fix_ratio = 0.0; + } + else { + utime_fix_ratio = + stime_fix_ratio = + gtime_fix_ratio = + cutime_fix_ratio = + cstime_fix_ratio = + cgtime_fix_ratio = 0.0; + } + } + else { + utime_fix_ratio = + stime_fix_ratio = + gtime_fix_ratio = + cutime_fix_ratio = + cstime_fix_ratio = + cgtime_fix_ratio = 0.0; + } + + if(utime_fix_ratio > 1.0) utime_fix_ratio = 1.0; + if(cutime_fix_ratio > 1.0) cutime_fix_ratio = 1.0; + if(stime_fix_ratio > 1.0) stime_fix_ratio = 1.0; + if(cstime_fix_ratio > 1.0) cstime_fix_ratio = 1.0; + if(gtime_fix_ratio > 1.0) gtime_fix_ratio = 1.0; + if(cgtime_fix_ratio > 1.0) cgtime_fix_ratio = 1.0; + + // if(utime_fix_ratio < 0.0) utime_fix_ratio = 0.0; + // if(cutime_fix_ratio < 0.0) cutime_fix_ratio = 0.0; + // if(stime_fix_ratio < 0.0) stime_fix_ratio = 0.0; + // if(cstime_fix_ratio < 0.0) cstime_fix_ratio = 0.0; + // if(gtime_fix_ratio < 0.0) gtime_fix_ratio = 0.0; + // if(cgtime_fix_ratio < 0.0) cgtime_fix_ratio = 0.0; + + // TODO + // we use cpu time to normalize page faults + // the problem is that to find the proper max values + // for page faults we have to parse /proc/vmstat + // which is quite big to do it again (netdata does it already) + // + // a better solution could be to somehow have netdata + // do this normalization for us + + if(utime || stime || gtime) + majflt_fix_ratio = + minflt_fix_ratio = (NETDATA_DOUBLE)(utime * utime_fix_ratio + stime * stime_fix_ratio + gtime * gtime_fix_ratio) / (NETDATA_DOUBLE)(utime + stime + gtime); + else + minflt_fix_ratio = + majflt_fix_ratio = 1.0; + + if(cutime || cstime || cgtime) + cmajflt_fix_ratio = + cminflt_fix_ratio = (NETDATA_DOUBLE)(cutime * cutime_fix_ratio + cstime * cstime_fix_ratio + cgtime * cgtime_fix_ratio) / (NETDATA_DOUBLE)(cutime + cstime + cgtime); + else + cminflt_fix_ratio = + cmajflt_fix_ratio = 1.0; + + // the report + + debug_log( + "SYSTEM: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " + "COLLECTED: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " + "DELTA: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " + "FIX: u=%0.2f s=%0.2f g=%0.2f cu=%0.2f cs=%0.2f cg=%0.2f " + "FINALLY: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " + , global_utime + , global_stime + , global_gtime + , utime + , stime + , gtime + , cutime + , cstime + , cgtime + , utime + cutime - global_utime + , stime + cstime - global_stime + , gtime + cgtime - global_gtime + , utime_fix_ratio + , stime_fix_ratio + , gtime_fix_ratio + , cutime_fix_ratio + , cstime_fix_ratio + , cgtime_fix_ratio + , (kernel_uint_t)(utime * utime_fix_ratio) + , (kernel_uint_t)(stime * stime_fix_ratio) + , (kernel_uint_t)(gtime * gtime_fix_ratio) + , (kernel_uint_t)(cutime * cutime_fix_ratio) + , (kernel_uint_t)(cstime * cstime_fix_ratio) + , (kernel_uint_t)(cgtime * cgtime_fix_ratio) + ); +} + +static void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt) { + struct target *w; + + send_BEGIN(type, "cpu", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (kernel_uint_t)(w->utime * utime_fix_ratio) + (kernel_uint_t)(w->stime * stime_fix_ratio) + (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cutime * cutime_fix_ratio) + (kernel_uint_t)(w->cstime * cstime_fix_ratio) + (kernel_uint_t)(w->cgtime * cgtime_fix_ratio)):0ULL)); + } + send_END(); + + send_BEGIN(type, "cpu_user", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (kernel_uint_t)(w->utime * utime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cutime * cutime_fix_ratio)):0ULL)); + } + send_END(); + + send_BEGIN(type, "cpu_system", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (kernel_uint_t)(w->stime * stime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cstime * cstime_fix_ratio)):0ULL)); + } + send_END(); + + if(show_guest_time) { + send_BEGIN(type, "cpu_guest", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cgtime * cgtime_fix_ratio)):0ULL)); + } + send_END(); + } + + send_BEGIN(type, "threads", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + send_SET(w->name, w->num_threads); + } + send_END(); + + send_BEGIN(type, "processes", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + send_SET(w->name, w->processes); + } + send_END(); + +#ifndef __FreeBSD__ + send_BEGIN(type, "uptime", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (global_uptime > w->starttime)?(global_uptime - w->starttime):0); + } + send_END(); + + if (enable_detailed_uptime_charts) { + send_BEGIN(type, "uptime_min", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->uptime_min); + } + send_END(); + + send_BEGIN(type, "uptime_avg", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->uptime_sum / w->processes); + } + send_END(); + + send_BEGIN(type, "uptime_max", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->uptime_max); + } + send_END(); + } +#endif + + send_BEGIN(type, "mem", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (w->status_vmrss > w->status_vmshared)?(w->status_vmrss - w->status_vmshared):0ULL); + } + send_END(); + + send_BEGIN(type, "vmem", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->status_vmsize); + } + send_END(); + +#ifndef __FreeBSD__ + send_BEGIN(type, "swap", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->status_vmswap); + } + send_END(); +#endif + + send_BEGIN(type, "minor_faults", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (kernel_uint_t)(w->minflt * minflt_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cminflt * cminflt_fix_ratio)):0ULL)); + } + send_END(); + + send_BEGIN(type, "major_faults", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, (kernel_uint_t)(w->majflt * majflt_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cmajflt * cmajflt_fix_ratio)):0ULL)); + } + send_END(); + +#ifndef __FreeBSD__ + send_BEGIN(type, "lreads", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->io_logical_bytes_read); + } + send_END(); + + send_BEGIN(type, "lwrites", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->io_logical_bytes_written); + } + send_END(); +#endif + + send_BEGIN(type, "preads", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->io_storage_bytes_read); + } + send_END(); + + send_BEGIN(type, "pwrites", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->io_storage_bytes_written); + } + send_END(); + + if(enable_file_charts) { + send_BEGIN(type, "files", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.files); + } + if (!strcmp("apps", type)){ + kernel_uint_t usedfdpercentage = (kernel_uint_t) ((currentmaxfds * 100) / sysconf(_SC_OPEN_MAX)); + fprintf(stdout, "VARIABLE fdperc = " KERNEL_UINT_FORMAT "\n", usedfdpercentage); + } + send_END(); + + send_BEGIN(type, "sockets", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.sockets); + } + send_END(); + + send_BEGIN(type, "pipes", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.pipes); + } + send_END(); + } +} + + +// ---------------------------------------------------------------------------- +// generate the charts + +static void send_charts_updates_to_netdata(struct target *root, const char *type, const char *title) +{ + struct target *w; + int newly_added = 0; + + for(w = root ; w ; w = w->next) { + if (w->target) continue; + + if(unlikely(w->processes && (debug_enabled || w->debug_enabled))) { + struct pid_on_target *pid_on_target; + + fprintf(stderr, "apps.plugin: target '%s' has aggregated %u process%s:", w->name, w->processes, (w->processes == 1)?"":"es"); + + for(pid_on_target = w->root_pid; pid_on_target; pid_on_target = pid_on_target->next) { + fprintf(stderr, " %d", pid_on_target->pid); + } + + fputc('\n', stderr); + } + + if (!w->exposed && w->processes) { + newly_added++; + w->exposed = 1; + if (debug_enabled || w->debug_enabled) + debug_log_int("%s just added - regenerating charts.", w->name); + } + } + + // nothing more to show + if(!newly_added && show_guest_time == show_guest_time_old) return; + + // we have something new to show + // update the charts + fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (100%% = 1 core)' 'percentage' cpu %s.cpu stacked 20001 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, time_factor * RATES_DETAIL / 100, w->hidden ? "hidden" : ""); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MiB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L); + } + APPS_PLUGIN_FUNCTIONS(); + + + fprintf(stdout, "CHART %s.vmem '' '%s Virtual Memory Size' 'MiB' mem %s.vmem stacked 20005 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20006 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20007 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + +#ifndef __FreeBSD__ + fprintf(stdout, "CHART %s.uptime '' '%s Carried Over Uptime' 'seconds' processes %s.uptime line 20008 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + if (enable_detailed_uptime_charts) { + fprintf(stdout, "CHART %s.uptime_min '' '%s Minimum Uptime' 'seconds' processes %s.uptime_min line 20009 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.uptime_avg '' '%s Average Uptime' 'seconds' processes %s.uptime_avg line 20010 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.uptime_max '' '%s Maximum Uptime' 'seconds' processes %s.uptime_max line 20011 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + } +#endif + + fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (100%% = 1 core)' 'percentage' cpu %s.cpu_user stacked 20020 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, time_factor * RATES_DETAIL / 100LLU); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (100%% = 1 core)' 'percentage' cpu %s.cpu_system stacked 20021 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, time_factor * RATES_DETAIL / 100LLU); + } + APPS_PLUGIN_FUNCTIONS(); + + if(show_guest_time) { + fprintf(stdout, "CHART %s.cpu_guest '' '%s CPU Guest Time (100%% = 1 core)' 'percentage' cpu %s.cpu_guest stacked 20022 %d\n", type, title, type, update_every); + for (w = root; w; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, time_factor * RATES_DETAIL / 100LLU); + } + APPS_PLUGIN_FUNCTIONS(); + } + +#ifndef __FreeBSD__ + fprintf(stdout, "CHART %s.swap '' '%s Swap Memory' 'MiB' swap %s.swap stacked 20011 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L); + } + APPS_PLUGIN_FUNCTIONS(); +#endif + + fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20012 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.minor_faults '' '%s Minor Page Faults' 'page faults/s' mem %s.minor_faults stacked 20011 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); + +#ifdef __FreeBSD__ + fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'blocks/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'blocks/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); +#else + fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'KiB/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'KiB/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.lreads '' '%s Disk Logical Reads' 'KiB/s' disk %s.lreads stacked 20042 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.lwrites '' '%s I/O Logical Writes' 'KiB/s' disk %s.lwrites stacked 20042 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL); + } + APPS_PLUGIN_FUNCTIONS(); +#endif + + if(enable_file_charts) { + fprintf(stdout, "CHART %s.files '' '%s Open Files' 'open files' disk %s.files stacked 20050 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.sockets '' '%s Open Sockets' 'open sockets' net %s.sockets stacked 20051 %d\n", + type, title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.pipes '' '%s Pipes' 'open pipes' processes %s.pipes stacked 20053 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + } +} + + +#ifndef __FreeBSD__ +static void send_proc_states_count(usec_t dt) +{ + static bool chart_added = false; + // create chart for count of processes in different states + if (!chart_added) { + fprintf( + stdout, + "CHART system.processes_state '' 'System Processes State' 'processes' processes system.processes_state line %d %d\n", + NETDATA_CHART_PRIO_SYSTEM_PROCESS_STATES, + update_every); + for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) { + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", proc_states[i]); + } + chart_added = true; + } + + // send process state count + send_BEGIN("system", "processes_state", dt); + for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) { + send_SET(proc_states[i], proc_state_count[i]); + } + send_END(); +} +#endif + +// ---------------------------------------------------------------------------- +// parse command line arguments + +int check_proc_1_io() { + int ret = 0; + + procfile *ff = procfile_open("/proc/1/io", NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(!ff) goto cleanup; + + ff = procfile_readall(ff); + if(!ff) goto cleanup; + + ret = 1; + +cleanup: + procfile_close(ff); + return ret; +} + +static void parse_args(int argc, char **argv) +{ + int i, freq = 0; + + for(i = 1; i < argc; i++) { + if(!freq) { + int n = (int)str2l(argv[i]); + if(n > 0) { + freq = n; + continue; + } + } + + if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { + printf("apps.plugin %s\n", VERSION); + exit(0); + } + + if(strcmp("test-permissions", argv[i]) == 0 || strcmp("-t", argv[i]) == 0) { + if(!check_proc_1_io()) { + perror("Tried to read /proc/1/io and it failed"); + exit(1); + } + printf("OK\n"); + exit(0); + } + + if(strcmp("debug", argv[i]) == 0) { + debug_enabled = 1; +#ifndef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif + continue; + } + +#ifndef __FreeBSD__ + if(strcmp("fds-cache-secs", argv[i]) == 0) { + if(argc <= i + 1) { + fprintf(stderr, "Parameter 'fds-cache-secs' requires a number as argument.\n"); + exit(1); + } + i++; + max_fds_cache_seconds = str2i(argv[i]); + if(max_fds_cache_seconds < 0) max_fds_cache_seconds = 0; + continue; + } +#endif + + if(strcmp("no-childs", argv[i]) == 0 || strcmp("without-childs", argv[i]) == 0) { + include_exited_childs = 0; + continue; + } + + if(strcmp("with-childs", argv[i]) == 0) { + include_exited_childs = 1; + continue; + } + + if(strcmp("with-guest", argv[i]) == 0) { + enable_guest_charts = 1; + continue; + } + + if(strcmp("no-guest", argv[i]) == 0 || strcmp("without-guest", argv[i]) == 0) { + enable_guest_charts = 0; + continue; + } + + if(strcmp("with-files", argv[i]) == 0) { + enable_file_charts = 1; + continue; + } + + if(strcmp("no-files", argv[i]) == 0 || strcmp("without-files", argv[i]) == 0) { + enable_file_charts = 0; + continue; + } + + if(strcmp("no-users", argv[i]) == 0 || strcmp("without-users", argv[i]) == 0) { + enable_users_charts = 0; + continue; + } + + if(strcmp("no-groups", argv[i]) == 0 || strcmp("without-groups", argv[i]) == 0) { + enable_groups_charts = 0; + continue; + } + + if(strcmp("with-detailed-uptime", argv[i]) == 0) { + enable_detailed_uptime_charts = 1; + continue; + } + + if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { + fprintf(stderr, + "\n" + " netdata apps.plugin %s\n" + " Copyright (C) 2016-2017 Costa Tsaousis \n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " This program is a data collector plugin for netdata.\n" + "\n" + " Available command line options:\n" + "\n" + " SECONDS set the data collection frequency\n" + "\n" + " debug enable debugging (lot of output)\n" + "\n" + " with-childs\n" + " without-childs enable / disable aggregating exited\n" + " children resources into parents\n" + " (default is enabled)\n" + "\n" + " with-guest\n" + " without-guest enable / disable reporting guest charts\n" + " (default is disabled)\n" + "\n" + " with-files\n" + " without-files enable / disable reporting files, sockets, pipes\n" + " (default is enabled)\n" + "\n" + " without-users disable reporting per user charts\n" + "\n" + " without-groups disable reporting per user group charts\n" + "\n" + " with-detailed-uptime enable reporting min/avg/max uptime charts\n" + "\n" +#ifndef __FreeBSD__ + " fds-cache-secs N cache the files of processed for N seconds\n" + " caching is adaptive per file (when a file\n" + " is found, it starts at 0 and while the file\n" + " remains open, it is incremented up to the\n" + " max given)\n" + " (default is %d seconds)\n" + "\n" +#endif + " version or -v or -V print program version and exit\n" + "\n" + , VERSION +#ifndef __FreeBSD__ + , max_fds_cache_seconds +#endif + ); + exit(1); + } + + error("Cannot understand option %s", argv[i]); + exit(1); + } + + if(freq > 0) update_every = freq; + + if(read_apps_groups_conf(user_config_dir, "groups")) { + info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", user_config_dir, stock_config_dir); + + if(read_apps_groups_conf(stock_config_dir, "groups")) { + error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", stock_config_dir); + exit(1); + } + else + info("Loaded config file '%s/apps_groups.conf'", stock_config_dir); + } + else + info("Loaded config file '%s/apps_groups.conf'", user_config_dir); +} + +static int am_i_running_as_root() { + uid_t uid = getuid(), euid = geteuid(); + + if(uid == 0 || euid == 0) { + if(debug_enabled) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); + return 1; + } + + if(debug_enabled) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); + return 0; +} + +#ifdef HAVE_CAPABILITY +static int check_capabilities() { + cap_t caps = cap_get_proc(); + if(!caps) { + error("Cannot get current capabilities."); + return 0; + } + else if(debug_enabled) + info("Received my capabilities from the system."); + + int ret = 1; + + cap_flag_value_t cfv = CAP_CLEAR; + if(cap_get_flag(caps, CAP_DAC_READ_SEARCH, CAP_EFFECTIVE, &cfv) == -1) { + error("Cannot find if CAP_DAC_READ_SEARCH is effective."); + ret = 0; + } + else { + if(cfv != CAP_SET) { + error("apps.plugin should run with CAP_DAC_READ_SEARCH."); + ret = 0; + } + else if(debug_enabled) + info("apps.plugin runs with CAP_DAC_READ_SEARCH."); + } + + cfv = CAP_CLEAR; + if(cap_get_flag(caps, CAP_SYS_PTRACE, CAP_EFFECTIVE, &cfv) == -1) { + error("Cannot find if CAP_SYS_PTRACE is effective."); + ret = 0; + } + else { + if(cfv != CAP_SET) { + error("apps.plugin should run with CAP_SYS_PTRACE."); + ret = 0; + } + else if(debug_enabled) + info("apps.plugin runs with CAP_SYS_PTRACE."); + } + + cap_free(caps); + + return ret; +} +#else +static int check_capabilities() { + return 0; +} +#endif + +netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; + +#define PROCESS_FILTER_CATEGORY "category:" +#define PROCESS_FILTER_USER "user:" +#define PROCESS_FILTER_GROUP "group:" +#define PROCESS_FILTER_PROCESS "process:" +#define PROCESS_FILTER_PID "pid:" +#define PROCESS_FILTER_UID "uid:" +#define PROCESS_FILTER_GID "gid:" + +static struct target *find_target_by_name(struct target *base, const char *name) { + struct target *t; + for(t = base; t ; t = t->next) { + if (strcmp(t->name, name) == 0) + return t; + } + + return NULL; +} + +static kernel_uint_t MemTotal = 0; + +static void get_MemTotal(void) { +#ifdef __FreeBSD__ + // TODO - fix this for FreeBSD + return; +#else + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/meminfo", netdata_configured_host_prefix); + + procfile *ff = procfile_open(filename, ": \t", PROCFILE_FLAG_DEFAULT); + if(!ff) + return; + + ff = procfile_readall(ff); + if(!ff) + return; + + size_t line, lines = procfile_lines(ff); + + for(line = 0; line < lines ;line++) { + size_t words = procfile_linewords(ff, line); + if(words == 3 && strcmp(procfile_lineword(ff, line, 0), "MemTotal") == 0 && strcmp(procfile_lineword(ff, line, 2), "kB") == 0) { + kernel_uint_t n = str2ull(procfile_lineword(ff, line, 1)); + if(n) MemTotal = n; + break; + } + } + + procfile_close(ff); +#endif +} + +static void apps_plugin_function_error(const char *transaction, int code, const char *msg) { + char buffer[PLUGINSD_LINE_MAX + 1]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec()); + fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + pluginsd_function_result_end_to_stdout(); +} + +static void apps_plugin_function_processes_help(const char *transaction) { + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600); + fprintf(stdout, "%s", + "apps.plugin / processes\n" + "\n" + "Function `processes` presents all the currently running processes of the system.\n" + "\n" + "The following filters are supported:\n" + "\n" + " category:NAME\n" + " Shows only processes that are assigned the category `NAME` in apps_groups.conf\n" + "\n" + " user:NAME\n" + " Shows only processes that are running as user name `NAME`.\n" + "\n" + " group:NAME\n" + " Shows only processes that are running as group name `NAME`.\n" + "\n" + " process:NAME\n" + " Shows only processes that their Command is `NAME` or their parent's Command is `NAME`.\n" + "\n" + " pid:NUMBER\n" + " Shows only processes that their PID is `NUMBER` or their parent's PID is `NUMBER`\n" + "\n" + " uid:NUMBER\n" + " Shows only processes that their UID is `NUMBER`\n" + "\n" + " gid:NUMBER\n" + " Shows only processes that their GID is `NUMBER`\n" + "\n" + "Filters can be combined. Each filter can be given only one time.\n" + ); + pluginsd_function_result_end_to_stdout(); +} + +#define add_table_field(wb, key, name, visible, type, units, max, sort, sortable, sticky, unique_key, pointer_to, summary) do { \ + if(fields_added) buffer_strcat(wb, ","); \ + buffer_sprintf(wb, "\n \"%s\": {", key); \ + buffer_sprintf(wb, "\n \"index\":%d,", fields_added); \ + buffer_sprintf(wb, "\n \"unique_key\":%s,", (unique_key)?"true":"false"); \ + buffer_sprintf(wb, "\n \"name\":\"%s\",", name); \ + buffer_sprintf(wb, "\n \"visible\":%s,", (visible)?"true":"false"); \ + buffer_sprintf(wb, "\n \"type\":\"%s\",", type); \ + if(units) \ + buffer_sprintf(wb, "\n \"units\":\"%s\",", (char*)(units)); \ + if(!isnan((NETDATA_DOUBLE)(max))) \ + buffer_sprintf(wb, "\n \"max\":%f,", (NETDATA_DOUBLE)(max)); \ + if(pointer_to) \ + buffer_sprintf(wb, "\n \"pointer_to\":\"%s\",", (char *)(pointer_to)); \ + buffer_sprintf(wb, "\n \"sort\":\"%s\",", sort); \ + buffer_sprintf(wb, "\n \"sortable\":%s,", (sortable)?"true":"false"); \ + buffer_sprintf(wb, "\n \"sticky\":%s,", (sticky)?"true":"false"); \ + buffer_sprintf(wb, "\n \"summary\":\"%s\"", summary); \ + buffer_sprintf(wb, "\n }"); \ + fields_added++; \ +} while(0) + +#define add_value_field_llu_with_max(wb, key, value) do { \ + unsigned long long _tmp = (value); \ + key ## _max = (rows == 0) ? (_tmp) : MAX(key ## _max, _tmp); \ + buffer_fast_strcat(wb, ",", 1); \ + buffer_print_llu(wb, _tmp); \ +} while(0) + +#define add_value_field_ndd_with_max(wb, key, value) do { \ + NETDATA_DOUBLE _tmp = (value); \ + key ## _max = (rows == 0) ? (_tmp) : MAX(key ## _max, _tmp); \ + buffer_fast_strcat(wb, ",", 1); \ + buffer_rrd_value(wb, _tmp); \ +} while(0) + +static void apps_plugin_function_processes(const char *transaction, char *function __maybe_unused, char *line_buffer __maybe_unused, int line_max __maybe_unused, int timeout __maybe_unused) { + struct pid_stat *p; + + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + size_t num_words = pluginsd_split_words(function, words, PLUGINSD_MAX_WORDS, NULL, NULL, 0); + + struct target *category = NULL, *user = NULL, *group = NULL; + const char *process_name = NULL; + pid_t pid = 0; + uid_t uid = 0; + gid_t gid = 0; + + bool filter_pid = false, filter_uid = false, filter_gid = false; + + for(int i = 1; i < PLUGINSD_MAX_WORDS ;i++) { + const char *keyword = get_word(words, num_words, i); + if(!keyword) break; + + if(!category && strncmp(keyword, PROCESS_FILTER_CATEGORY, strlen(PROCESS_FILTER_CATEGORY)) == 0) { + category = find_target_by_name(apps_groups_root_target, &keyword[strlen(PROCESS_FILTER_CATEGORY)]); + if(!category) { + apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, "No category with that name found."); + return; + } + } + else if(!user && strncmp(keyword, PROCESS_FILTER_USER, strlen(PROCESS_FILTER_USER)) == 0) { + user = find_target_by_name(users_root_target, &keyword[strlen(PROCESS_FILTER_USER)]); + if(!user) { + apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, "No user with that name found."); + return; + } + } + else if(strncmp(keyword, PROCESS_FILTER_GROUP, strlen(PROCESS_FILTER_GROUP)) == 0) { + group = find_target_by_name(groups_root_target, &keyword[strlen(PROCESS_FILTER_GROUP)]); + if(!group) { + apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, "No group with that name found."); + return; + } + } + else if(!process_name && strncmp(keyword, PROCESS_FILTER_PROCESS, strlen(PROCESS_FILTER_PROCESS)) == 0) { + process_name = &keyword[strlen(PROCESS_FILTER_PROCESS)]; + } + else if(!pid && strncmp(keyword, PROCESS_FILTER_PID, strlen(PROCESS_FILTER_PID)) == 0) { + pid = str2i(&keyword[strlen(PROCESS_FILTER_PID)]); + filter_pid = true; + } + else if(!uid && strncmp(keyword, PROCESS_FILTER_UID, strlen(PROCESS_FILTER_UID)) == 0) { + uid = str2i(&keyword[strlen(PROCESS_FILTER_UID)]); + filter_uid = true; + } + else if(!gid && strncmp(keyword, PROCESS_FILTER_GID, strlen(PROCESS_FILTER_GID)) == 0) { + gid = str2i(&keyword[strlen(PROCESS_FILTER_GID)]); + filter_gid = true; + } + else if(strcmp(keyword, "help") == 0) { + apps_plugin_function_processes_help(transaction); + return; + } + else { + char msg[PLUGINSD_LINE_MAX]; + snprintfz(msg, PLUGINSD_LINE_MAX, "Invalid parameter '%s'", keyword); + apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, msg); + return; + } + } + + time_t expires = now_realtime_sec() + update_every; + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires); + + unsigned int cpu_divisor = time_factor * RATES_DETAIL / 100; + unsigned int memory_divisor = 1024; + unsigned int io_divisor = 1024 * RATES_DETAIL; + + BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX); + buffer_sprintf(wb, + "{" + "\n \"status\":%d" + ",\n \"type\":\"table\"" + ",\n \"update_every\":%d" + ",\n \"data\":[" + "\n" + , HTTP_RESP_OK + , update_every + ); + + NETDATA_DOUBLE + UserCPU_max = 0.0 + , SysCPU_max = 0.0 + , GuestCPU_max = 0.0 + , CUserCPU_max = 0.0 + , CSysCPU_max = 0.0 + , CGuestCPU_max = 0.0 + , CPU_max = 0.0 + , VMSize_max = 0.0 + , RSS_max = 0.0 + , Shared_max = 0.0 + , Swap_max = 0.0 + , MemPcnt_max = 0.0 + ; + + unsigned long long + Processes_max = 0 + , Threads_max = 0 + , Uptime_max = 0 + , MinFlt_max = 0 + , CMinFlt_max = 0 + , TMinFlt_max = 0 + , MajFlt_max = 0 + , CMajFlt_max = 0 + , TMajFlt_max = 0 + , PReads_max = 0 + , PWrites_max = 0 + , RCalls_max = 0 + , WCalls_max = 0 + , Files_max = 0 + , Pipes_max = 0 + , Sockets_max = 0 + , iNotiFDs_max = 0 + , EventFDs_max = 0 + , TimerFDs_max = 0 + , SigFDs_max = 0 + , EvPollFDs_max = 0 + , OtherFDs_max = 0 + , FDs_max = 0 + ; + +#ifndef __FreeBSD__ + unsigned long long + LReads_max = 0 + , LWrites_max = 0 + ; +#endif + + int rows= 0; + for(p = root_of_pids; p ; p = p->next) { + if(!p->updated) + continue; + + if(category && p->target != category) + continue; + + if(user && p->user_target != user) + continue; + + if(group && p->group_target != group) + continue; + + if(process_name && ((strcmp(p->comm, process_name) != 0 && !p->parent) || (p->parent && strcmp(p->comm, process_name) != 0 && strcmp(p->parent->comm, process_name) != 0))) + continue; + + if(filter_pid && p->pid != pid && p->ppid != pid) + continue; + + if(filter_uid && p->uid != uid) + continue; + + if(filter_gid && p->gid != gid) + continue; + + if(rows) buffer_fast_strcat(wb, ",\n", 2); + rows++; + + buffer_strcat(wb, " ["); + + // IMPORTANT! + // THE ORDER SHOULD BE THE SAME WITH THE FIELDS! + + // pid + buffer_print_llu(wb, p->pid); + + // cmd + buffer_fast_strcat(wb, ",\"", 2); + buffer_strcat_jsonescape(wb, p->comm); + buffer_fast_strcat(wb, "\"", 1); + +#ifdef NETDATA_DEV_MODE + // cmdline + buffer_fast_strcat(wb, ",\"", 2); + buffer_strcat_jsonescape(wb, (p->cmdline && *p->cmdline) ? p->cmdline : p->comm); + buffer_fast_strcat(wb, "\"", 1); +#endif + + // ppid + buffer_fast_strcat(wb, ",", 1); buffer_print_llu(wb, p->ppid); + + // category + buffer_fast_strcat(wb, ",\"", 2); + buffer_strcat_jsonescape(wb, p->target ? p->target->name : "-"); + buffer_fast_strcat(wb, "\"", 1); + + // user + buffer_fast_strcat(wb, ",\"", 2); + buffer_strcat_jsonescape(wb, p->user_target ? p->user_target->name : "-"); + buffer_fast_strcat(wb, "\"", 1); + + // uid + buffer_fast_strcat(wb, ",", 1); buffer_print_llu(wb, p->uid); + + // group + buffer_fast_strcat(wb, ",\"", 2); + buffer_strcat_jsonescape(wb, p->group_target ? p->group_target->name : "-"); + buffer_fast_strcat(wb, "\"", 1); + + // gid + buffer_fast_strcat(wb, ",", 1); buffer_print_llu(wb, p->gid); + + // procs + add_value_field_llu_with_max(wb, Processes, p->children_count); + + // threads + add_value_field_llu_with_max(wb, Threads, p->num_threads); + + // uptime + add_value_field_llu_with_max(wb, Uptime, p->uptime); + + // minor page faults + add_value_field_llu_with_max(wb, MinFlt, p->minflt / RATES_DETAIL); + add_value_field_llu_with_max(wb, CMinFlt, p->cminflt / RATES_DETAIL); + add_value_field_llu_with_max(wb, TMinFlt, (p->minflt + p->cminflt) / RATES_DETAIL); + + // major page faults + add_value_field_llu_with_max(wb, MajFlt, p->majflt / RATES_DETAIL); + add_value_field_llu_with_max(wb, CMajFlt, p->cmajflt / RATES_DETAIL); + add_value_field_llu_with_max(wb, TMajFlt, (p->majflt + p->cmajflt) / RATES_DETAIL); + + // CPU utilization % + add_value_field_ndd_with_max(wb, UserCPU, (NETDATA_DOUBLE)(p->utime) / cpu_divisor); + add_value_field_ndd_with_max(wb, SysCPU, (NETDATA_DOUBLE)(p->stime) / cpu_divisor); + add_value_field_ndd_with_max(wb, GuestCPU, (NETDATA_DOUBLE)(p->gtime) / cpu_divisor); + add_value_field_ndd_with_max(wb, CUserCPU, (NETDATA_DOUBLE)(p->cutime) / cpu_divisor); + add_value_field_ndd_with_max(wb, CSysCPU, (NETDATA_DOUBLE)(p->cstime) / cpu_divisor); + add_value_field_ndd_with_max(wb, CGuestCPU, (NETDATA_DOUBLE)(p->cgtime) / cpu_divisor); + add_value_field_ndd_with_max(wb, CPU, (NETDATA_DOUBLE)(p->utime + p->stime + p->gtime + p->cutime + p->cstime + p->cgtime) / cpu_divisor); + + // memory MiB + add_value_field_ndd_with_max(wb, VMSize, (NETDATA_DOUBLE)p->status_vmsize / memory_divisor); + add_value_field_ndd_with_max(wb, RSS, (NETDATA_DOUBLE)p->status_vmrss / memory_divisor); + add_value_field_ndd_with_max(wb, Shared, (NETDATA_DOUBLE)p->status_vmshared / memory_divisor); + add_value_field_ndd_with_max(wb, Swap, (NETDATA_DOUBLE)p->status_vmswap / memory_divisor); + + if(MemTotal) + add_value_field_ndd_with_max(wb, MemPcnt, (NETDATA_DOUBLE)p->status_vmrss * 100.0 / (NETDATA_DOUBLE)MemTotal); + + // Logical I/O +#ifndef __FreeBSD__ + add_value_field_llu_with_max(wb, LReads, p->io_logical_bytes_read / io_divisor); + add_value_field_llu_with_max(wb, LWrites, p->io_logical_bytes_written / io_divisor); +#endif + + // Physical I/O + add_value_field_llu_with_max(wb, PReads, p->io_storage_bytes_read / io_divisor); + add_value_field_llu_with_max(wb, PWrites, p->io_storage_bytes_written / io_divisor); + + // I/O calls + add_value_field_llu_with_max(wb, RCalls, p->io_read_calls / RATES_DETAIL); + add_value_field_llu_with_max(wb, WCalls, p->io_write_calls / RATES_DETAIL); + + // open file descriptors + add_value_field_llu_with_max(wb, Files, p->openfds.files); + add_value_field_llu_with_max(wb, Pipes, p->openfds.pipes); + add_value_field_llu_with_max(wb, Sockets, p->openfds.sockets); + add_value_field_llu_with_max(wb, iNotiFDs, p->openfds.inotifies); + add_value_field_llu_with_max(wb, EventFDs, p->openfds.eventfds); + add_value_field_llu_with_max(wb, TimerFDs, p->openfds.timerfds); + add_value_field_llu_with_max(wb, SigFDs, p->openfds.signalfds); + add_value_field_llu_with_max(wb, EvPollFDs, p->openfds.eventpolls); + add_value_field_llu_with_max(wb, OtherFDs, p->openfds.other); + add_value_field_llu_with_max(wb, FDs, p->openfds.files + p->openfds.pipes + p->openfds.sockets + p->openfds.inotifies + p->openfds.eventfds + p->openfds.timerfds + p->openfds.signalfds + p->openfds.eventpolls + p->openfds.other); + + buffer_fast_strcat(wb, "]", 1); + + fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout); + buffer_flush(wb); + } + + { + int fields_added = 0; + + buffer_flush(wb); + buffer_sprintf(wb, "\n ],\n \"columns\": {"); + + // IMPORTANT! + // THE ORDER SHOULD BE THE SAME WITH THE VALUES! + add_table_field(wb, "Pid", "Process ID", true, "integer", NULL, NAN, "ascending", true, true, true, NULL, "count_unique"); + add_table_field(wb, "Cmd", "Process Name", true, "string", NULL, NAN, "ascending", true, true, false, NULL, "count_unique"); + +#ifdef NETDATA_DEV_MODE + add_table_field(wb, "CmdLine", "Command Line", false, "detail-string:Cmd", NULL, NAN, "ascending", true, false, false, NULL, "count_unique"); +#endif + add_table_field(wb, "PPid", "Parent Process ID", false, "integer", NULL, NAN, "ascending", true, false, false, "Pid", "count_unique"); + add_table_field(wb, "Category", "Category (apps_groups.conf)", true, "string", NULL, NAN, "ascending", true, true, false, NULL, "count_unique"); + add_table_field(wb, "User", "User Owner", true, "string", NULL, NAN, "ascending", true, false, false, NULL, "count_unique"); + add_table_field(wb, "Uid", "User ID", false, "integer", NULL, NAN, "ascending", true, false, false, NULL, "count_unique"); + add_table_field(wb, "Group", "Group Owner", false, "string", NULL, NAN, "ascending", true, false, false, NULL, "count_unique"); + add_table_field(wb, "Gid", "Group ID", false, "integer", NULL, NAN, "ascending", true, false, false, NULL, "count_unique"); + add_table_field(wb, "Processes", "Processes", true, "bar-with-integer", "processes", Processes_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "Threads", "Threads", true, "bar-with-integer", "threads", Threads_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "Uptime", "Uptime in seconds", true, "duration", "seconds", Uptime_max, "descending", true, false, false, NULL, "max"); + + // minor page faults + add_table_field(wb, "MinFlt", "Minor Page Faults/s", false, "bar", "pgflts/s", MinFlt_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "CMinFlt", "Children Minor Page Faults/s", false, "bar", "pgflts/s", CMinFlt_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "TMinFlt", "Total Minor Page Faults/s", false, "bar", "pgflts/s", TMinFlt_max, "descending", true, false, false, NULL, "sum"); + + // major page faults + add_table_field(wb, "MajFlt", "Major Page Faults/s", false, "bar", "pgflts/s", MajFlt_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "CMajFlt", "Children Major Page Faults/s", false, "bar", "pgflts/s", CMajFlt_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "TMajFlt", "Total Major Page Faults/s", true, "bar", "pgflts/s", TMajFlt_max, "descending", true, false, false, NULL, "sum"); + + // CPU utilization + add_table_field(wb, "UserCPU", "User CPU time", false, "bar", "%", UserCPU_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "SysCPU", "System CPU Time", false, "bar", "%", SysCPU_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "GuestCPU", "Guest CPU Time", false, "bar", "%", GuestCPU_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "CUserCPU", "Children User CPU Time", false, "bar", "%", CUserCPU_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "CSysCPU", "Children System CPU Time", false, "bar", "%", CSysCPU_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "CGuestCPU", "Children Guest CPU Time", false, "bar", "%", CGuestCPU_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "CPU", "Total CPU Time", true, "bar", "%", CPU_max, "descending", true, false, false, NULL, "sum"); + + // memory + add_table_field(wb, "VMSize", "Virtual Memory Size", false, "bar", "MiB", VMSize_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "RSS", "Resident Set Size", MemTotal ? false : true, "bar", "MiB", RSS_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "Shared", "Shared Pages", false, "bar", "MiB", Shared_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "Swap", "Swap Memory", false, "bar", "MiB", Swap_max, "descending", true, false, false, NULL, "sum"); + + if(MemTotal) + add_table_field(wb, "MemPcnt", "Memory Percentage", true, "bar", "%", 100.0, "descending", true, false, false, NULL, "sum"); + + // Logical I/O +#ifndef __FreeBSD__ + add_table_field(wb, "LReads", "Logical I/O Reads", false, "bar", "KiB/s", LReads_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "LWrites", "Logical I/O Writes", false, "bar", "KiB/s", LWrites_max, "descending", true, false, false, NULL, "sum"); +#endif + + // Physical I/O + add_table_field(wb, "PReads", "Physical I/O Reads", true, "bar", "KiB/s", PReads_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "PWrites", "Physical I/O Writes", true, "bar", "KiB/s", PWrites_max, "descending", true, false, false, NULL, "sum"); + + // I/O calls + add_table_field(wb, "RCalls", "I/O Read Calls", false, "bar", "calls/s", RCalls_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "WCalls", "I/O Write Calls", false, "bar", "calls/s", WCalls_max, "descending", true, false, false, NULL, "sum"); + + // open file descriptors + add_table_field(wb, "Files", "Open Files", false, "bar", "fds", Files_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "Pipes", "Open Pipes", false, "bar", "fds", Pipes_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "Sockets", "Open Sockets", false, "bar", "fds", Sockets_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "iNotiFDs", "Open iNotify Descriptors", false, "bar", "fds", iNotiFDs_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "EventFDs", "Open Event Descriptors", false, "bar", "fds", EventFDs_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "TimerFDs", "Open Timer Descriptors", false, "bar", "fds", TimerFDs_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "SigFDs", "Open Signal Descriptors", false, "bar", "fds", SigFDs_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "EvPollFDs", "Open Event Poll Descriptors", false, "bar", "fds", EvPollFDs_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "OtherFDs", "Other Open Descriptors", false, "bar", "fds", OtherFDs_max, "descending", true, false, false, NULL, "sum"); + add_table_field(wb, "FDs", "All Open File Descriptors", true, "bar", "fds", FDs_max, "descending", true, false, false, NULL, "sum"); + + buffer_strcat( + wb, + "" + "\n }," + "\n \"default_sort_column\": \"CPU\"," + "\n \"charts\": {" + "\n \"CPU\": {" + "\n \"name\":\"CPU Utilization\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"UserCPU\", \"SysCPU\", \"GuestCPU\", \"CUserCPU\", \"CSysCPU\", \"CGuestCPU\" ]" + "\n }," + "\n \"Memory\": {" + "\n \"name\":\"Memory\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"VMSize\", \"RSS\", \"Shared\", \"Swap\" ]" + "\n }," + ); + + if(MemTotal) + buffer_strcat( + wb, + "" + "\n \"MemoryPercent\": {" + "\n \"name\":\"Memory Percentage\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"MemPcnt\" ]" + "\n }," + ); + + buffer_strcat( + wb, "" + #ifndef __FreeBSD__ + "\n \"Reads\": {" + "\n \"name\":\"I/O Reads\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"LReads\", \"PReads\" ]" + "\n }," + "\n \"Writes\": {" + "\n \"name\":\"I/O Writes\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"LWrites\", \"PWrites\" ]" + "\n }," + "\n \"LogicalIO\": {" + "\n \"name\":\"Logical I/O\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"LReads\", \"LWrites\" ]" + "\n }," + #endif + "\n \"PhysicalIO\": {" + "\n \"name\":\"Physical I/O\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"PReads\", \"PWrites\" ]" + "\n }," + "\n \"IOCalls\": {" + "\n \"name\":\"I/O Calls\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"RCalls\", \"WCalls\" ]" + "\n }," + "\n \"MinFlt\": {" + "\n \"name\":\"Minor Page Faults\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"MinFlt\", \"CMinFlt\" ]" + "\n }," + "\n \"MajFlt\": {" + "\n \"name\":\"Major Page Faults\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"MajFlt\", \"CMajFlt\" ]" + "\n }," + "\n \"Threads\": {" + "\n \"name\":\"Threads\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"Threads\" ]" + "\n }," + "\n \"Processes\": {" + "\n \"name\":\"Processes\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"Processes\" ]" + "\n }," + "\n \"FDs\": {" + "\n \"name\":\"File Descriptors\"," + "\n \"type\":\"stacked-bar\"," + "\n \"columns\": [ \"Files\", \"Pipes\", \"Sockets\", \"iNotiFDs\", \"EventFDs\", \"TimerFDs\", \"SigFDs\", \"EvPollFDs\", \"OtherFDs\" ]" + "\n }" + "\n }," + "\n \"group_by\": {" + "\n \"pid\": {" + "\n \"name\":\"Process Tree by PID\"," + "\n \"columns\":[ \"PPid\" ]" + "\n }," + "\n \"category\": {" + "\n \"name\":\"Process Tree by Category\"," + "\n \"columns\":[ \"Category\", \"PPid\" ]" + "\n }," + "\n \"user\": {" + "\n \"name\":\"Process Tree by User\"," + "\n \"columns\":[ \"User\", \"PPid\" ]" + "\n }," + "\n \"group\": {" + "\n \"name\":\"Process Tree by Group\"," + "\n \"columns\":[ \"Group\", \"PPid\" ]" + "\n }" + "\n }" + ); + + fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout); + } + + buffer_free(wb); + + fprintf(stdout, ",\n \"expires\":%lld", (long long)expires); + fprintf(stdout, "\n}"); + + pluginsd_function_result_end_to_stdout(); +} + +bool apps_plugin_exit = false; + +void *reader_main(void *arg __maybe_unused) { + char buffer[PLUGINSD_LINE_MAX + 1]; + + char *s = NULL; + while(!apps_plugin_exit && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) { + + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + size_t num_words = pluginsd_split_words(buffer, words, PLUGINSD_MAX_WORDS, NULL, NULL, 0); + + const char *keyword = get_word(words, num_words, 0); + + if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) { + char *transaction = get_word(words, num_words, 1); + char *timeout_s = get_word(words, num_words, 2); + char *function = get_word(words, num_words, 3); + + if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { + error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + keyword, + transaction?transaction:"(unset)", + timeout_s?timeout_s:"(unset)", + function?function:"(unset)"); + } + else { + int timeout = str2i(timeout_s); + if(timeout <= 0) timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT; + +// internal_error(true, "Received function '%s', transaction '%s', timeout %d", function, transaction, timeout); + + netdata_mutex_lock(&mutex); + + if(strncmp(function, "processes", strlen("processes")) == 0) + apps_plugin_function_processes(transaction, function, buffer, PLUGINSD_LINE_MAX + 1, timeout); + else + apps_plugin_function_error(transaction, HTTP_RESP_NOT_FOUND, "No function with this name found in apps.plugin."); + + fflush(stdout); + netdata_mutex_unlock(&mutex); + +// internal_error(true, "Done with function '%s', transaction '%s', timeout %d", function, transaction, timeout); + } + } + else + error("Received unknown command: %s", keyword?keyword:"(unset)"); + } + + if(!s || feof(stdin) || ferror(stdin)) { + apps_plugin_exit = true; + error("Received error on stdin."); + } + + exit(1); + return NULL; +} + +int main(int argc, char **argv) { + // debug_flags = D_PROCFILE; + + clocks_init(); + + pagesize = (size_t)sysconf(_SC_PAGESIZE); + + // set the name for logging + program_name = "apps.plugin"; + + // disable syslog for apps.plugin + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + bool send_resource_usage = true; + { + const char *s = getenv("NETDATA_INTERNALS_MONITORING"); + if(s && *s && strcmp(s, "NO") == 0) + send_resource_usage = false; + } + + // since apps.plugin runs as root, prevent it from opening symbolic links + procfile_open_flags = O_RDONLY|O_NOFOLLOW; + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix() == -1) exit(1); + + user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); + if(user_config_dir == NULL) { + // info("NETDATA_CONFIG_DIR is not passed from netdata"); + user_config_dir = CONFIG_DIR; + } + // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); + + stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); + if(stock_config_dir == NULL) { + // info("NETDATA_CONFIG_DIR is not passed from netdata"); + stock_config_dir = LIBCONFIG_DIR; + } + // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); + +#ifdef NETDATA_INTERNAL_CHECKS + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + info("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + procfile_adaptive_initial_allocation = 1; + + get_system_HZ(); +#ifdef __FreeBSD__ + time_factor = 1000000ULL / RATES_DETAIL; // FreeBSD uses usecs +#else + time_factor = system_hz; // Linux uses clock ticks +#endif + + get_system_pid_max(); + get_system_cpus(); + + parse_args(argc, argv); + + if(!check_capabilities() && !am_i_running_as_root() && !check_proc_1_io()) { + uid_t uid = getuid(), euid = geteuid(); +#ifdef HAVE_CAPABILITY + error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " + , uid, euid, argv[0], argv[0], argv[0] + ); +#else + error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "Your system does not support capabilities. " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " + , uid, euid, argv[0], argv[0] + ); +#endif + } + + info("started on pid %d", getpid()); + + snprintfz(all_user_ids.filename, FILENAME_MAX, "%s/etc/passwd", netdata_configured_host_prefix); + debug_log("passwd file: '%s'", all_user_ids.filename); + + snprintfz(all_group_ids.filename, FILENAME_MAX, "%s/etc/group", netdata_configured_host_prefix); + debug_log("group file: '%s'", all_group_ids.filename); + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1); +#endif + + all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1); + + netdata_thread_t reader_thread; + netdata_thread_create(&reader_thread, "APPS_READER", NETDATA_THREAD_OPTION_DONT_LOG, reader_main, NULL); + netdata_mutex_lock(&mutex); + + usec_t step = update_every * USEC_PER_SEC; + global_iterations_counter = 1; + heartbeat_t hb; + heartbeat_init(&hb); + for(; !apps_plugin_exit ; global_iterations_counter++) { + netdata_mutex_unlock(&mutex); + +#ifdef NETDATA_PROFILING +#warning "compiling for profiling" + static int profiling_count=0; + profiling_count++; + if(unlikely(profiling_count > 2000)) exit(0); + usec_t dt = update_every * USEC_PER_SEC; +#else + usec_t dt = heartbeat_next(&hb, step); +#endif + netdata_mutex_lock(&mutex); + + struct pollfd pollfd = { .fd = fileno(stdout), .events = POLLERR }; + if (unlikely(poll(&pollfd, 1, 0) < 0)) { + netdata_mutex_unlock(&mutex); + netdata_thread_cancel(reader_thread); + fatal("Cannot check if a pipe is available"); + } + if (unlikely(pollfd.revents & POLLERR)) { + netdata_mutex_unlock(&mutex); + netdata_thread_cancel(reader_thread); + fatal("Received error on read pipe."); + } + + if(global_iterations_counter % 10 == 0) + get_MemTotal(); + + if(!collect_data_for_all_processes()) { + error("Cannot collect /proc data for running processes. Disabling apps.plugin..."); + printf("DISABLE\n"); + netdata_mutex_unlock(&mutex); + netdata_thread_cancel(reader_thread); + exit(1); + } + + currentmaxfds = 0; + calculate_netdata_statistics(); + normalize_utilization(apps_groups_root_target); + + if(send_resource_usage) + send_resource_usage_to_netdata(dt); + +#ifndef __FreeBSD__ + send_proc_states_count(dt); +#endif + + // this is smart enough to show only newly added apps, when needed + send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps"); + if(likely(enable_users_charts)) + send_charts_updates_to_netdata(users_root_target, "users", "Users"); + + if(likely(enable_groups_charts)) + send_charts_updates_to_netdata(groups_root_target, "groups", "User Groups"); + + send_collected_data_to_netdata(apps_groups_root_target, "apps", dt); + + if(likely(enable_users_charts)) + send_collected_data_to_netdata(users_root_target, "users", dt); + + if(likely(enable_groups_charts)) + send_collected_data_to_netdata(groups_root_target, "groups", dt); + + fflush(stdout); + + show_guest_time_old = show_guest_time; + + debug_log("done Loop No %zu", global_iterations_counter); + } +} diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am new file mode 100644 index 0000000..354b9fb --- /dev/null +++ b/collectors/cgroups.plugin/Makefile.am @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_plugins_SCRIPTS = \ + cgroup-name.sh \ + cgroup-network-helper.sh \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/cgroups.plugin/README.md b/collectors/cgroups.plugin/README.md new file mode 100644 index 0000000..d0f822e --- /dev/null +++ b/collectors/cgroups.plugin/README.md @@ -0,0 +1,308 @@ + + +# cgroups.plugin + +You can monitor containers and virtual machines using **cgroups**. + +cgroups (or control groups), are a Linux kernel feature that provides accounting and resource usage limiting for +processes. When cgroups are bundled with namespaces (i.e. isolation), they form what we usually call **containers**. + +cgroups are hierarchical, meaning that cgroups can contain child cgroups, which can contain more cgroups, etc. All +accounting is reported (and resource usage limits are applied) also in a hierarchical way. + +To visualize cgroup metrics Netdata provides configuration for cherry picking the cgroups of interest. By default ( +without any configuration) Netdata should pick **systemd services**, all kinds of **containers** (lxc, docker, etc) +and **virtual machines** spawn by managers that register them with cgroups (qemu, libvirt, etc). + +## Configuring Netdata for cgroups + +In general, no additional settings are required. Netdata discovers all available cgroups on the host system and +collects their metrics. + +### how Netdata finds the available cgroups + +Linux exposes resource usage reporting and provides dynamic configuration for cgroups, using virtual files (usually) +under `/sys/fs/cgroup`. Netdata reads `/proc/self/mountinfo` to detect the exact mount point of cgroups. Netdata also +allows manual configuration of this mount point, using these settings: + +```text +[plugin:cgroups] + check for new cgroups every = 10 + path to /sys/fs/cgroup/cpuacct = /sys/fs/cgroup/cpuacct + path to /sys/fs/cgroup/blkio = /sys/fs/cgroup/blkio + path to /sys/fs/cgroup/memory = /sys/fs/cgroup/memory + path to /sys/fs/cgroup/devices = /sys/fs/cgroup/devices +``` + +Netdata rescans these directories for added or removed cgroups every `check for new cgroups every` seconds. + +### hierarchical search for cgroups + +Since cgroups are hierarchical, for each of the directories shown above, Netdata walks through the subdirectories +recursively searching for cgroups (each subdirectory is another cgroup). + +To provide a sane default for this setting, Netdata uses the following pattern list (patterns starting with `!` give a +negative match and their order is important: the first matching a path will be used): + +```text +[plugin:cgroups] + search for cgroups in subpaths matching = !*/init.scope !*-qemu !/init.scope !/system !/systemd !/user !/user.slice * +``` + +So, we disable checking for **child cgroups** in systemd internal +cgroups ([systemd services are monitored by Netdata](#monitoring-systemd-services)), user cgroups (normally used for +desktop and remote user sessions), qemu virtual machines (child cgroups of virtual machines) and `init.scope`. All +others are enabled. + +### unified cgroups (cgroups v2) support + +Netdata automatically detects cgroups version. If detection fails Netdata assumes v1. +To switch to v2 manually add: + +```text +[plugin:cgroups] + use unified cgroups = yes + path to unified cgroups = /sys/fs/cgroup +``` + +Unified cgroups use same name pattern matching as v1 cgroups. `cgroup_enable_systemd_services_detailed_memory` is +currently unsupported when using unified cgroups. + +### enabled cgroups + +To provide a sane default, Netdata uses the +following [pattern list](https://learn.netdata.cloud/docs/agent/libnetdata/simple_pattern): + +- checks the pattern against the path of the cgroup + + ```text + [plugin:cgroups] + enable by default cgroups matching = !*/init.scope *.scope !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/ns !/lxc/*/ns/* !/machine !/qemu !/system !/systemd !/user * + ``` + +- checks the pattern against the name of the cgroup (as you see it on the dashboard) + + ```text + [plugin:cgroups] + enable by default cgroups names matching = * + ``` + +Renaming is configured with the following options: + +```text +[plugin:cgroups] + run script to rename cgroups matching = *.scope *docker* *lxc* *qemu* !/ !*.mount !*.partition !*.service !*.slice !*.swap !*.user * + script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh +``` + +The whole point for the additional pattern list, is to limit the number of times the script will be called. Without this +pattern list, the script might be called thousands of times, depending on the number of cgroups available in the system. + +The above pattern list is matched against the path of the cgroup. For matched cgroups, Netdata calls the +script [cgroup-name.sh](https://raw.githubusercontent.com/netdata/netdata/master/collectors/cgroups.plugin/cgroup-name.sh) +to get its name. This script queries `docker`, `kubectl`, `podman`, or applies heuristics to find give a name for the +cgroup. + +#### Note on Podman container names + +Podman's security model is a lot more restrictive than Docker's, so Netdata will not be able to detect container names +out of the box unless they were started by the same user as Netdata itself. + +If Podman is used in "rootful" mode, it's also possible to use `podman system service` to grant Netdata access to +container names. To do this, ensure `podman system service` is running and Netdata has access +to `/run/podman/podman.sock` (the default permissions as specified by upstream are `0600`, with owner `root`, so you +will have to adjust the configuration). + +[docker-socket-proxy](https://github.com/Tecnativa/docker-socket-proxy) can also be used to give Netdata restricted +access to the socket. Note that `PODMAN_HOST` in Netdata's environment should be set to the proxy's URL in this case. + +### charts with zero metrics + +By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are +ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be +automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a +chart instead of `auto` to enable it permanently. For example: + +```text +[plugin:cgroups] + enable memory (used mem including cache) = yes +``` + +You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero +metrics for all internal Netdata plugins. + +### alarms + +CPU and memory limits are watched and used to rise alarms. Memory usage for every cgroup is checked against `ram` +and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alarms is available in `health.d/cgroups.conf` +file. + +## Monitoring systemd services + +Netdata monitors **systemd services**. Example: + +![image](https://cloud.githubusercontent.com/assets/2662304/21964372/20cd7b84-db53-11e6-98a2-b9c986b082c0.png) + +Support per distribution: + +| system | charts shown | `/sys/fs/cgroup` tree | comments | +|:----------------:|:------------:|:------------------------------------:|:--------------------------| +| Arch Linux | YES | | | +| Gentoo | NO | | can be enabled, see below | +| Ubuntu 16.04 LTS | YES | | | +| Ubuntu 16.10 | YES | [here](http://pastebin.com/PiWbQEXy) | | +| Fedora 25 | YES | [here](http://pastebin.com/ax0373wF) | | +| Debian 8 | NO | | can be enabled, see below | +| AMI | NO | [here](http://pastebin.com/FrxmptjL) | not a systemd system | +| CentOS 7.3.1611 | NO | [here](http://pastebin.com/SpzgezAg) | can be enabled, see below | + +### Monitored systemd service metrics + +- CPU utilization +- Used memory +- RSS memory +- Mapped memory +- Cache memory +- Writeback memory +- Memory minor page faults +- Memory major page faults +- Memory charging activity +- Memory uncharging activity +- Memory limit failures +- Swap memory used +- Disk read bandwidth +- Disk write bandwidth +- Disk read operations +- Disk write operations +- Throttle disk read bandwidth +- Throttle disk write bandwidth +- Throttle disk read operations +- Throttle disk write operations +- Queued disk read operations +- Queued disk write operations +- Merged disk read operations +- Merged disk write operations + +### how to enable cgroup accounting on systemd systems that is by default disabled + +You can verify there is no accounting enabled, by running `systemd-cgtop`. The program will show only resources for +cgroup `/`, but all services will show nothing. + +To enable cgroup accounting, execute this: + +```sh +sed -e 's|^#Default\(.*\)Accounting=.*$|Default\1Accounting=yes|g' /etc/systemd/system.conf >/tmp/system.conf +``` + +To see the changes it made, run this: + +```sh +# diff /etc/systemd/system.conf /tmp/system.conf +40,44c40,44 +< #DefaultCPUAccounting=no +< #DefaultIOAccounting=no +< #DefaultBlockIOAccounting=no +< #DefaultMemoryAccounting=no +< #DefaultTasksAccounting=yes +--- +> DefaultCPUAccounting=yes +> DefaultIOAccounting=yes +> DefaultBlockIOAccounting=yes +> DefaultMemoryAccounting=yes +> DefaultTasksAccounting=yes +``` + +If you are happy with the changes, run: + +```sh +# copy the file to the right location +sudo cp /tmp/system.conf /etc/systemd/system.conf + +# restart systemd to take it into account +sudo systemctl daemon-reexec +``` + +(`systemctl daemon-reload` does not reload the configuration of the server - so you have to +execute `systemctl daemon-reexec`). + +Now, when you run `systemd-cgtop`, services will start reporting usage (if it does not, restart any service to wake it up). Refresh your Netdata dashboard, and you will have the charts too. + +In case memory accounting is missing, you will need to enable it at your kernel, by appending the following kernel boot +options and rebooting: + +```sh +cgroup_enable=memory swapaccount=1 +``` + +You can add the above, directly at the `linux` line in your `/boot/grub/grub.cfg` or appending them to +the `GRUB_CMDLINE_LINUX` in `/etc/default/grub` (in which case you will have to run `update-grub` before rebooting). On +DigitalOcean debian images you may have to set it at `/etc/default/grub.d/50-cloudimg-settings.cfg`. + +Which systemd services are monitored by Netdata is determined by the following pattern list: + +```text +[plugin:cgroups] + cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service +``` + +- - - + +## Monitoring ephemeral containers + +Netdata monitors containers automatically when it is installed at the host, or when it is installed in a container that +has access to the `/proc` and `/sys` filesystems of the host. + +Netdata prior to v1.6 had 2 issues when such containers were monitored: + +1. network interface alarms where triggering when containers were stopped + +2. charts were never cleaned up, so after some time dozens of containers were showing up on the dashboard, and they were + occupying memory. + +### the current Netdata + +network interfaces and cgroups (containers) are now self-cleaned. + +So, when a network interface or container stops, Netdata might log a few errors in error.log complaining about files it +cannot find, but immediately: + +1. it will detect this is a removed container or network interface +2. it will freeze/pause all alarms for them +3. it will mark their charts as obsolete +4. obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) +5. existing dashboard sessions will continue to see them, but of course they will not refresh +6. obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable + with `[global].cleanup obsolete charts after seconds = 3600` (at `netdata.conf`). +7. when obsolete charts are removed from memory they are also deleted from disk (configurable + with `[global].delete obsolete charts files = yes`) + +### Monitored container metrics + +- CPU usage +- CPU usage within the limits +- CPU usage per core +- Memory usage +- Writeback memory +- Memory activity +- Memory page faults +- Used memory +- Used RAM within the limits +- Memory utilization +- Memory limit failures +- I/O bandwidth (all disks) +- Serviced I/O operations (all disks) +- Throttle I/O bandwidth (all disks) +- Throttle serviced I/O operations (all disks) +- Queued I/O operations (all disks) +- Merged I/O operations (all disks) +- CPU pressure +- Memory pressure +- Memory full pressure +- I/O pressure +- I/O full pressure + +Network interfaces are monitored by means of +the [proc plugin](/collectors/proc.plugin/README.md#monitored-network-interface-metrics). diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh new file mode 100755 index 0000000..55b02ac --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-name.sh @@ -0,0 +1,597 @@ +#!/usr/bin/env bash +#shellcheck disable=SC2001 + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Script to find a better name for cgroups +# + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +function parse_docker_like_inspect_output() { + local output="${1}" + eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME)=" <<<"$output")" + if [ -n "$NOMAD_NAMESPACE" ] && [ -n "$NOMAD_JOB_NAME" ] && [ -n "$NOMAD_TASK_NAME" ] && [ -n "$NOMAD_SHORT_ALLOC_ID" ]; then + echo "${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}" + else + echo "${CONT_NAME}" | sed 's|^/||' + fi +} + +function docker_like_get_name_command() { + local command="${1}" + local id="${2}" + info "Running command: ${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' \"${id}\"" + if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' "${id}")" && + [ -n "$OUTPUT" ]; then + NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + fi + return 0 +} + +function docker_like_get_name_api() { + local host_var="${1}" + local host="${!host_var}" + local path="/containers/${2}/json" + if [ -z "${host}" ]; then + warning "No ${host_var} is set" + return 1 + fi + if ! command -v jq >/dev/null 2>&1; then + warning "Can't find jq command line tool. jq is required for netdata to retrieve container name using ${host} API, falling back to docker ps" + return 1 + fi + if [ -S "${host}" ]; then + info "Running API command: curl --unix-socket \"${host}\" http://localhost${path}" + JSON=$(curl -sS --unix-socket "${host}" "http://localhost${path}") + else + info "Running API command: curl \"${host}${path}\"" + JSON=$(curl -sS "${host}${path}") + fi + if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)"') && [ -n "$OUTPUT" ]; then + NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + fi + return 0 +} + +# get_lbl_val returns the value for the label with the given name. +# Returns "null" string if the label doesn't exist. +# Expected labels format: 'name="value",...'. +function get_lbl_val() { + local labels want_name + labels="${1}" + want_name="${2}" + + IFS=, read -ra labels <<< "$labels" + + local lname lval + for l in "${labels[@]}"; do + IFS="=" read -r lname lval <<< "$l" + if [ "$want_name" = "$lname" ] && [ -n "$lval" ]; then + echo "${lval:1:-1}" # trim " + return 0 + fi + done + + echo "null" + return 1 +} + +function add_lbl_prefix() { + local orig_labels prefix + orig_labels="${1}" + prefix="${2}" + + IFS=, read -ra labels <<< "$orig_labels" + + local new_labels + for l in "${labels[@]}"; do + new_labels+="${prefix}${l}," + done + + echo "${new_labels:0:-1}" # trim last ',' +} + +function k8s_is_pause_container() { + local cgroup_path="${1}" + + local file + if [ -d "${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct" ]; then + file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct/$cgroup_path/cgroup.procs" + else + file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/$cgroup_path/cgroup.procs" + fi + + [ ! -f "$file" ] && return 1 + + local procs + IFS= read -rd' ' procs 2>/dev/null <"$file" + #shellcheck disable=SC2206 + procs=($procs) + + [ "${#procs[@]}" -ne 1 ] && return 1 + + IFS= read -r comm 2>/dev/null <"/proc/${procs[0]}/comm" + + [ "$comm" == "pause" ] + return +} + +function k8s_gcp_get_cluster_name() { + local header url id loc name + header="Metadata-Flavor: Google" + url="http://metadata/computeMetadata/v1" + if id=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/project/project-id") && + loc=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-location") && + name=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-name") && + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ]; then + echo "gke_${id}_${loc}_${name}" + return 0 + fi + return 1 +} + +# k8s_get_kubepod_name resolves */kubepods/* cgroup name. +# pod level cgroup name format: 'pod__' +# container level cgroup name format: 'cntr___' +function k8s_get_kubepod_name() { + # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): + # |-- kubepods + # | |-- burstable + # | | |-- pod98cee708-023b-11eb-933d-42010a800193 + # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03 + # | `-- pode314bbac-d577-11ea-a171-42010a80013b + # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930 + # + # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice + # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope + # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice + # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope + # + # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice + # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope + # + # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice + # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope + # + # kind v0.14.0 + # |-- kubelet.slice + # | |-- kubelet-kubepods.slice + # | | |-- kubelet-kubepods-besteffort.slice + # | | | |-- kubelet-kubepods-besteffort-pod7881ed9e_c63e_4425_b5e0_ac55a08ae939.slice + # | | | | |-- cri-containerd-00c7939458bffc416bb03451526e9fde13301d6654cfeadf5b4964a7fb5be1a9.scope + # + # NOTE: cgroups plugin + # - uses '_' to join dir names (so it is ___...) + # - replaces '.' with '-' + + local fn="${FUNCNAME[0]}" + local cgroup_path="${1}" + local id="${2}" + + if [[ ! $id =~ ^.*kubepods.* ]]; then + warning "${fn}: '${id}' is not kubepod cgroup." + return 1 + fi + + local clean_id="$id" + clean_id=${clean_id//.slice/} + clean_id=${clean_id//.scope/} + + local name pod_uid cntr_id + if [[ $clean_id == "kubepods" ]]; then + name="$clean_id" + elif [[ $clean_id =~ .+(besteffort|burstable|guaranteed)$ ]]; then + # kubepods_ + # kubepods_kubepods- + name=${clean_id//-/_} + name=${name/#kubepods_kubepods/kubepods} + elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then + # ...pod_(docker|crio|cri-containerd)- (POD_UID w/ "_") + cntr_id=${BASH_REMATCH[2]} + elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then + # ...pod_ + cntr_id=${BASH_REMATCH[1]} + elif [[ $clean_id =~ .+pod([a-f0-9_-]+)$ ]]; then + # ...pod (POD_UID w/ and w/o "_") + pod_uid=${BASH_REMATCH[1]} + pod_uid=${pod_uid//_/-} + fi + + if [ -n "$name" ]; then + echo "$name" + return 0 + fi + + if [ -z "$pod_uid" ] && [ -z "$cntr_id" ]; then + warning "${fn}: can't extract pod_uid or container_id from the cgroup '$id'." + return 3 + fi + + [ -n "$pod_uid" ] && info "${fn}: cgroup '$id' is a pod(uid:$pod_uid)" + [ -n "$cntr_id" ] && info "${fn}: cgroup '$id' is a container(id:$cntr_id)" + + if [ -n "$cntr_id" ] && k8s_is_pause_container "$cgroup_path"; then + return 3 + fi + + if ! command -v jq > /dev/null 2>&1; then + warning "${fn}: 'jq' command not available." + return 1 + fi + + local tmp_kube_cluster_name="${TMPDIR:-"/tmp"}/netdata-cgroups-k8s-cluster-name" + local tmp_kube_system_ns_uid_file="${TMPDIR:-"/tmp"}/netdata-cgroups-kubesystem-uid" + local tmp_kube_containers_file="${TMPDIR:-"/tmp"}/netdata-cgroups-containers" + + local kube_cluster_name + local kube_system_uid + local labels + + if [ -n "$cntr_id" ] && + [ -f "$tmp_kube_cluster_name" ] && + [ -f "$tmp_kube_system_ns_uid_file" ] && + [ -f "$tmp_kube_containers_file" ] && + labels=$(grep "$cntr_id" "$tmp_kube_containers_file" 2>/dev/null); then + IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" + IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" + else + IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" + IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" + [ -z "$kube_cluster_name" ] && ! kube_cluster_name=$(k8s_gcp_get_cluster_name) && kube_cluster_name="unknown" + + local kube_system_ns + local pods + + if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then + local token header host url + token="$(&1); then + warning "${fn}: error on curl '${url}': ${kube_system_ns}." + fi + fi + + url="https://$host/api/v1/pods" + [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" + # FIX: check HTTP response code + if ! pods=$(curl --fail -sSk -H "$header" "$url" 2>&1); then + warning "${fn}: error on curl '${url}': ${pods}." + return 1 + fi + elif ps -C kubelet >/dev/null 2>&1 && command -v kubectl >/dev/null 2>&1; then + if [ -z "$kube_system_uid" ]; then + if ! kube_system_ns=$(kubectl --kubeconfig="$KUBE_CONFIG" get namespaces kube-system -o json 2>&1); then + warning "${fn}: error on 'kubectl': ${kube_system_ns}." + fi + fi + + [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf" + if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then + warning "${fn}: error on 'kubectl': ${pods}." + return 1 + fi + else + warning "${fn}: not inside the k8s cluster and 'kubectl' command not available." + return 1 + fi + + if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<<"$kube_system_ns" 2>&1); then + warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}." + fi + + local jq_filter + jq_filter+='.items[] | "' + jq_filter+='namespace=\"\(.metadata.namespace)\",' + jq_filter+='pod_name=\"\(.metadata.name)\",' + jq_filter+='pod_uid=\"\(.metadata.uid)\",' + #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)' + jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")' + jq_filter+='node_name=\"\(.spec.nodeName)\",' + jq_filter+='" + ' + jq_filter+='(.status.containerStatuses[]? | "' + jq_filter+='container_name=\"\(.name)\",' + jq_filter+='container_id=\"\(.containerID)\"' + jq_filter+='") | ' + jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 + + local containers + if ! containers=$(jq -r "${jq_filter}" <<<"$pods" 2>&1); then + warning "${fn}: error on 'jq' parse pods: ${containers}." + return 1 + fi + + [ -n "$kube_cluster_name" ] && echo "$kube_cluster_name" >"$tmp_kube_cluster_name" 2>/dev/null + [ -n "$kube_system_ns" ] && [ -n "$kube_system_uid" ] && echo "$kube_system_uid" >"$tmp_kube_system_ns_uid_file" 2>/dev/null + echo "$containers" >"$tmp_kube_containers_file" 2>/dev/null + fi + + local qos_class + if [[ $clean_id =~ .+(besteffort|burstable) ]]; then + qos_class="${BASH_REMATCH[1]}" + else + qos_class="guaranteed" + fi + + # available labels: + # namespace, pod_name, pod_uid, container_name, container_id, node_name + if [ -n "$cntr_id" ]; then + if [ -n "$labels" ] || labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then + labels+=',kind="container"' + labels+=",qos_class=\"$qos_class\"" + [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" + [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" + name="cntr" + name+="_$(get_lbl_val "$labels" namespace)" + name+="_$(get_lbl_val "$labels" pod_name)" + name+="_$(get_lbl_val "$labels" container_name)" + labels=$(add_lbl_prefix "$labels" "k8s_") + name+=" $labels" + else + return 2 + fi + elif [ -n "$pod_uid" ]; then + if labels=$(grep "$pod_uid" -m 1 <<< "$containers" 2> /dev/null); then + labels="${labels%%,container_*}" + labels+=',kind="pod"' + labels+=",qos_class=\"$qos_class\"" + [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" + [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" + name="pod" + name+="_$(get_lbl_val "$labels" namespace)" + name+="_$(get_lbl_val "$labels" pod_name)" + labels=$(add_lbl_prefix "$labels" "k8s_") + name+=" $labels" + else + return 2 + fi + fi + + # jq filter nonexistent field and nonexistent label value is 'null' + if [[ $name =~ _null(_|$) ]]; then + warning "${fn}: invalid name: $name (cgroup '$id')" + return 1 + fi + + echo "$name" + [ -n "$name" ] + return +} + +function k8s_get_name() { + local fn="${FUNCNAME[0]}" + local cgroup_path="${1}" + local id="${2}" + + NAME=$(k8s_get_kubepod_name "$cgroup_path" "$id") + + case "$?" in + 0) + NAME="k8s_${NAME}" + + local name labels + name=${NAME%% *} + labels=${NAME#* } + if [ "$name" != "$labels" ]; then + info "${fn}: cgroup '${id}' has chart name '${name}', labels '${labels}" + else + info "${fn}: cgroup '${id}' has chart name '${NAME}'" + fi + EXIT_CODE=$EXIT_SUCCESS + ;; + 1) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and enabling it." + EXIT_CODE=$EXIT_SUCCESS + ;; + 2) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and asking for retry." + EXIT_CODE=$EXIT_RETRY + ;; + *) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and disabling it." + EXIT_CODE=$EXIT_DISABLE + ;; + esac +} + +function docker_get_name() { + local id="${1}" + # See https://github.com/netdata/netdata/pull/13523 for details + if command -v snap >/dev/null 2>&1 && snap list docker >/dev/null 2>&1; then + docker_like_get_name_api DOCKER_HOST "${id}" + elif hash docker 2> /dev/null; then + docker_like_get_name_command docker "${id}" + else + docker_like_get_name_api DOCKER_HOST "${id}" || docker_like_get_name_command podman "${id}" + fi + if [ -z "${NAME}" ]; then + warning "cannot find the name of docker container '${id}'" + EXIT_CODE=$EXIT_RETRY + NAME="${id:0:12}" + else + info "docker container '${id}' is named '${NAME}'" + fi +} + +function docker_validate_id() { + local id="${1}" + if [ -n "${id}" ] && { [ ${#id} -eq 64 ] || [ ${#id} -eq 12 ]; }; then + docker_get_name "${id}" + else + error "a docker id cannot be extracted from docker cgroup '${CGROUP}'." + fi +} + +function podman_get_name() { + local id="${1}" + + # for Podman, prefer using the API if we can, as netdata will not normally have access + # to other users' containers, so they will not be visible when running `podman ps` + docker_like_get_name_api PODMAN_HOST "${id}" || docker_like_get_name_command podman "${id}" + + if [ -z "${NAME}" ]; then + warning "cannot find the name of podman container '${id}'" + EXIT_CODE=$EXIT_RETRY + NAME="${id:0:12}" + else + info "podman container '${id}' is named '${NAME}'" + fi +} + +function podman_validate_id() { + local id="${1}" + if [ -n "${id}" ] && [ ${#id} -eq 64 ]; then + podman_get_name "${id}" + else + error "a podman id cannot be extracted from docker cgroup '${CGROUP}'." + fi +} + +# ----------------------------------------------------------------------------- + +DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}" +PODMAN_HOST="${PODMAN_HOST:=/run/podman/podman.sock}" +CGROUP_PATH="${1}" # the path as it is (e.g. '/docker/efcf4c409') +CGROUP="${2}" # the modified path (e.g. 'docker_efcf4c409') +EXIT_SUCCESS=0 +EXIT_RETRY=2 +EXIT_DISABLE=3 +EXIT_CODE=$EXIT_SUCCESS +NAME= + +# ----------------------------------------------------------------------------- + +if [ -z "${CGROUP}" ]; then + fatal "called without a cgroup name. Nothing to do." +fi + +if [ -z "${NAME}" ]; then + if [[ ${CGROUP} =~ ^.*kubepods.* ]]; then + k8s_get_name "${CGROUP_PATH}" "${CGROUP}" + fi +fi + +if [ -z "${NAME}" ]; then + if [[ ${CGROUP} =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then + # docker containers + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*docker[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ ^.*ecs[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then + # ECS + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ecs[-_/].*[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ system.slice_containerd.service_cpuset_[a-fA-F0-9]+[-_\.]?.*$ ]]; then + # docker containers under containerd + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ystem.slice_containerd.service_cpuset_\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ ^.*libpod-[a-fA-F0-9]+.*$ ]]; then + # Podman + PODMANID="$(echo "${CGROUP}" | sed "s|^.*libpod-\([a-fA-F0-9]\+\).*$|\1|")" + podman_validate_id "${PODMANID}" + + elif [[ ${CGROUP} =~ machine.slice[_/].*\.service ]]; then + # systemd-nspawn + NAME="$(echo "${CGROUP}" | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')" + + elif [[ ${CGROUP} =~ machine.slice_machine.*-lxc ]]; then + # libvirtd / lxc containers + # machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope => lxc/hubud0xians01 + # machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope => lxc/hubud0xians01/libvirt_init + NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" + elif [[ ${CGROUP} =~ machine.slice_machine.*-qemu ]]; then + # libvirtd / qemu virtual machines + # machine.slice_machine-qemu_x2d1_x2dopnsense.scope => qemu_opnsense + NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" + + elif [[ ${CGROUP} =~ machine_.*\.libvirt-qemu ]]; then + # libvirtd / qemu virtual machines + NAME="qemu_$(echo "${CGROUP}" | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')" + + elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d /etc/pve ]]; then + # Proxmox VMs + + FILENAME="/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" + if [[ -f $FILENAME && -r $FILENAME ]]; then + NAME="qemu_$(grep -e '^name: ' "/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')" + else + error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." + fi + elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d /etc/pve ]]; then + # Proxmox Containers (LXC) + + FILENAME="/etc/pve/lxc/${BASH_REMATCH[1]}.conf" + if [[ -f ${FILENAME} && -r ${FILENAME} ]]; then + NAME=$(grep -e '^hostname: ' "/etc/pve/lxc/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p') + else + error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." + fi + elif [[ ${CGROUP} =~ lxc.payload.* ]]; then + # LXC 4.0 + NAME="$(echo "${CGROUP}" | sed 's/lxc\.payload\.\(.*\)/\1/g')" + fi + + [ -z "${NAME}" ] && NAME="${CGROUP}" + [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}" +fi + +info "cgroup '${CGROUP}' is called '${NAME}'" +echo "${NAME}" + +exit ${EXIT_CODE} diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh new file mode 100755 index 0000000..783332f --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh @@ -0,0 +1,302 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1117 + +# cgroup-network-helper.sh +# detect container and virtual machine interfaces +# +# (C) 2017 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This script is called as root (by cgroup-network), with either a pid, or a cgroup path. +# It tries to find all the network interfaces that belong to the same cgroup. +# +# It supports several method for this detection: +# +# 1. cgroup-network (the binary father of this script) detects veth network interfaces, +# by examining iflink and ifindex IDs and switching namespaces +# (it also detects the interface name as it is used by the container). +# +# 2. this script, uses /proc/PID/fdinfo to find tun/tap network interfaces. +# +# 3. this script, calls virsh to find libvirt network interfaces. +# + +# ----------------------------------------------------------------------------- + +# the system path is cleared by cgroup-network +# shellcheck source=/dev/null +[ -f /etc/profile ] && source /etc/profile + +export LC_ALL=C + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=${NETDATA_CGROUP_NETWORK_HELPER_DEBUG=0} +debug() { + [ "${debug}" = "1" ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- +# check for BASH v4+ (required for associative arrays) + +[ $(( BASH_VERSINFO[0] )) -lt 4 ] && \ + fatal "BASH version 4 or later is required (this is ${BASH_VERSION})." + +# ----------------------------------------------------------------------------- +# parse the arguments + +pid= +cgroup= +while [ -n "${1}" ] +do + case "${1}" in + --cgroup) cgroup="${2}"; shift 1;; + --pid|-p) pid="${2}"; shift 1;; + --debug|debug) debug=1;; + *) fatal "Cannot understand argument '${1}'";; + esac + + shift +done + +if [ -z "${pid}" ] && [ -z "${cgroup}" ] +then + fatal "Either --pid or --cgroup is required" +fi + +# ----------------------------------------------------------------------------- + +set_source() { + [ ${debug} -eq 1 ] && echo "SRC ${*}" +} + + +# ----------------------------------------------------------------------------- +# veth interfaces via cgroup + +# cgroup-network can detect veth interfaces by itself (written in C). +# If you seek for a shell version of what it does, check this: +# https://github.com/netdata/netdata/issues/474#issuecomment-317866709 + + +# ----------------------------------------------------------------------------- +# tun/tap interfaces via /proc/PID/fdinfo + +# find any tun/tap devices linked to a pid +proc_pid_fdinfo_iff() { + local p="${1}" # the pid + + debug "Searching for tun/tap interfaces for pid ${p}..." + set_source "fdinfo" + grep "^iff:.*" "${NETDATA_HOST_PREFIX}/proc/${p}/fdinfo"/* 2>/dev/null | cut -f 2 +} + +find_tun_tap_interfaces_for_cgroup() { + local c="${1}" # the cgroup path + [ -d "${c}/emulator" ] && c="${c}/emulator" # check for 'emulator' subdirectory + c="${c}/cgroup.procs" # make full path + + # for each pid of the cgroup + # find any tun/tap devices linked to the pid + if [ -f "${c}" ] + then + local p + for p in $(< "${c}" ) + do + proc_pid_fdinfo_iff "${p}" + done + else + debug "Cannot find file '${c}', not searching for tun/tap interfaces." + fi +} + + +# ----------------------------------------------------------------------------- +# virsh domain network interfaces + +virsh_cgroup_to_domain_name() { + local c="${1}" # the cgroup path + + debug "extracting a possible virsh domain from cgroup ${c}..." + + # extract for the cgroup path + sed -n -e "s|.*/machine-qemu\\\\x2d[0-9]\+\\\\x2d\(.*\)\.scope$|\1|p" \ + -e "s|.*/machine/qemu-[0-9]\+-\(.*\)\.libvirt-qemu$|\1|p" \ + -e "s|.*/machine/\(.*\)\.libvirt-qemu$|\1|p" \ + < vm01-web (https://github.com/netdata/netdata/issues/11088#issuecomment-832618149) + d="$(printf '%b' "${d}")" + + if [ -n "${d}" ] + then + debug "running: virsh domiflist ${d}; to find the network interfaces" + + # 'virsh -r domiflist ' example output + # Interface Type Source Model MAC + #-------------------------------------------------------------- + # vnet3 bridge br0 virtio 52:54:00:xx:xx:xx + # vnet4 network default virtio 52:54:00:yy:yy:yy + + # match only 'network' interfaces from virsh output + set_source "virsh" + "${virsh}" -r domiflist "${d}" |\ + sed -n \ + -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+network[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" \ + -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+bridge[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" + else + debug "no virsh domain extracted from cgroup ${c}" + fi + else + debug "virsh command is not available" + fi +} + +# ----------------------------------------------------------------------------- +# netnsid detected interfaces + +netnsid_find_all_interfaces_for_pid() { + local pid="${1}" + [ -z "${pid}" ] && return 1 + + local nsid + nsid=$(lsns -t net -p "${pid}" -o NETNSID -nr 2>/dev/null) + if [ -z "${nsid}" ] || [ "${nsid}" = "unassigned" ]; then + return 1 + fi + + set_source "netnsid" + ip link show |\ + grep -B 1 -E " link-netnsid ${nsid}($| )" |\ + sed -n -e "s|^[[:space:]]*[0-9]\+:[[:space:]]\+\([A-Za-z0-9_]\+\)\(@[A-Za-z0-9_]\+\)*:[[:space:]].*$|\1|p" +} + +netnsid_find_all_interfaces_for_cgroup() { + local c="${1}" # the cgroup path + + if [ -f "${c}/cgroup.procs" ]; then + netnsid_find_all_interfaces_for_pid "$(head -n 1 "${c}/cgroup.procs" 2>/dev/null)" + else + debug "Cannot find file '${c}/cgroup.procs', not searching for netnsid interfaces." + fi +} + +# ----------------------------------------------------------------------------- + +find_all_interfaces_of_pid_or_cgroup() { + local p="${1}" c="${2}" # the pid and the cgroup path + + if [ -n "${pid}" ] + then + # we have been called with a pid + + proc_pid_fdinfo_iff "${p}" + netnsid_find_all_interfaces_for_pid "${p}" + + elif [ -n "${c}" ] + then + # we have been called with a cgroup + + info "searching for network interfaces of cgroup '${c}'" + + find_tun_tap_interfaces_for_cgroup "${c}" + virsh_find_all_interfaces_for_cgroup "${c}" + netnsid_find_all_interfaces_for_cgroup "${c}" + + else + + error "Either a pid or a cgroup path is needed" + return 1 + + fi + + return 0 +} + +# ----------------------------------------------------------------------------- + +# an associative array to store the interfaces +# the index is the interface name as seen by the host +# the value is the interface name as seen by the guest / container +declare -A devs=() + +# store all interfaces found in the associative array +# this will also give the unique devices, as seen by the host +last_src= +# shellcheck disable=SC2162 +while read host_device guest_device +do + [ -z "${host_device}" ] && continue + + [ "${host_device}" = "SRC" ] && last_src="${guest_device}" && continue + + # the default guest_device is the host_device + [ -z "${guest_device}" ] && guest_device="${host_device}" + + # when we run in debug, show the source + debug "Found host device '${host_device}', guest device '${guest_device}', detected via '${last_src}'" + + if [ -z "${devs[${host_device}]}" ] || [ "${devs[${host_device}]}" = "${host_device}" ]; then + devs[${host_device}]="${guest_device}" + fi + +done < <( find_all_interfaces_of_pid_or_cgroup "${pid}" "${cgroup}" ) + +# print the interfaces found, in the format netdata expects them +found=0 +for x in "${!devs[@]}" +do + found=$((found + 1)) + echo "${x} ${devs[${x}]}" +done + +debug "found ${found} network interfaces for pid '${pid}', cgroup '${cgroup}', run as ${USER}, ${UID}" + +# let netdata know if we found any +[ ${found} -eq 0 ] && exit 1 +exit 0 diff --git a/collectors/cgroups.plugin/cgroup-network.c b/collectors/cgroups.plugin/cgroup-network.c new file mode 100644 index 0000000..0b66ea4 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-network.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#ifdef HAVE_SETNS +#ifndef _GNU_SOURCE +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#endif +#include +#endif + +char environment_variable2[FILENAME_MAX + 50] = ""; +char *environment[] = { + "PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin", + environment_variable2, + NULL +}; + +struct iface { + const char *device; + uint32_t hash; + + unsigned int ifindex; + unsigned int iflink; + + struct iface *next; +}; + +unsigned int calc_num_ifaces(struct iface *root) { + unsigned int num = 0; + for (struct iface *h = root; h; h = h->next) { + num++; + } + return num; +} + +unsigned int read_iface_iflink(const char *prefix, const char *iface) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/iflink", prefix, iface); + + unsigned long long iflink = 0; + int ret = read_single_number_file(filename, &iflink); + if(ret) error("Cannot read '%s'.", filename); + + return (unsigned int)iflink; +} + +unsigned int read_iface_ifindex(const char *prefix, const char *iface) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/ifindex", prefix, iface); + + unsigned long long ifindex = 0; + int ret = read_single_number_file(filename, &ifindex); + if(ret) error("Cannot read '%s'.", filename); + + return (unsigned int)ifindex; +} + +struct iface *read_proc_net_dev(const char *scope __maybe_unused, const char *prefix) { + if(!prefix) prefix = ""; + + procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", prefix, (*prefix)?"/proc/1/net/dev":"/proc/net/dev"); + +#ifdef NETDATA_INTERNAL_CHECKS + info("parsing '%s'", filename); +#endif + + ff = procfile_open(filename, " \t,:|", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + error("Cannot open file '%s'", filename); + return NULL; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + error("Cannot read file '%s'", filename); + return NULL; + } + + size_t lines = procfile_lines(ff), l; + struct iface *root = NULL; + for(l = 2; l < lines ;l++) { + if (unlikely(procfile_linewords(ff, l) < 1)) continue; + + struct iface *t = callocz(1, sizeof(struct iface)); + t->device = strdupz(procfile_lineword(ff, l, 0)); + t->hash = simple_hash(t->device); + t->ifindex = read_iface_ifindex(prefix, t->device); + t->iflink = read_iface_iflink(prefix, t->device); + t->next = root; + root = t; + +#ifdef NETDATA_INTERNAL_CHECKS + info("added %s interface '%s', ifindex %u, iflink %u", scope, t->device, t->ifindex, t->iflink); +#endif + } + + procfile_close(ff); + + return root; +} + +void free_iface(struct iface *iface) { + freez((void *)iface->device); + freez(iface); +} + +void free_host_ifaces(struct iface *iface) { + while(iface) { + struct iface *t = iface->next; + free_iface(iface); + iface = t; + } +} + +int iface_is_eligible(struct iface *iface) { + if(iface->iflink != iface->ifindex) + return 1; + + return 0; +} + +int eligible_ifaces(struct iface *root) { + int eligible = 0; + + struct iface *t; + for(t = root; t ; t = t->next) + if(iface_is_eligible(t)) + eligible++; + + return eligible; +} + +static void continue_as_child(void) { + pid_t child = fork(); + int status; + pid_t ret; + + if (child < 0) + error("fork() failed"); + + /* Only the child returns */ + if (child == 0) + return; + + for (;;) { + ret = waitpid(child, &status, WUNTRACED); + if ((ret == child) && (WIFSTOPPED(status))) { + /* The child suspended so suspend us as well */ + kill(getpid(), SIGSTOP); + kill(child, SIGCONT); + } else { + break; + } + } + + /* Return the child's exit code if possible */ + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + + exit(EXIT_FAILURE); +} + +int proc_pid_fd(const char *prefix, const char *ns, pid_t pid) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/%s", prefix, (int)pid, ns); + int fd = open(filename, O_RDONLY); + + if(fd == -1) + error("Cannot open proc_pid_fd() file '%s'", filename); + + return fd; +} + +static struct ns { + int nstype; + int fd; + int status; + const char *name; + const char *path; +} all_ns[] = { + // { .nstype = CLONE_NEWUSER, .fd = -1, .status = -1, .name = "user", .path = "ns/user" }, + // { .nstype = CLONE_NEWCGROUP, .fd = -1, .status = -1, .name = "cgroup", .path = "ns/cgroup" }, + // { .nstype = CLONE_NEWIPC, .fd = -1, .status = -1, .name = "ipc", .path = "ns/ipc" }, + // { .nstype = CLONE_NEWUTS, .fd = -1, .status = -1, .name = "uts", .path = "ns/uts" }, + { .nstype = CLONE_NEWNET, .fd = -1, .status = -1, .name = "network", .path = "ns/net" }, + { .nstype = CLONE_NEWPID, .fd = -1, .status = -1, .name = "pid", .path = "ns/pid" }, + { .nstype = CLONE_NEWNS, .fd = -1, .status = -1, .name = "mount", .path = "ns/mnt" }, + + // terminator + { .nstype = 0, .fd = -1, .status = -1, .name = NULL, .path = NULL } +}; + +int switch_namespace(const char *prefix, pid_t pid) { + +#ifdef HAVE_SETNS + + int i; + for(i = 0; all_ns[i].name ; i++) + all_ns[i].fd = proc_pid_fd(prefix, all_ns[i].path, pid); + + int root_fd = proc_pid_fd(prefix, "root", pid); + int cwd_fd = proc_pid_fd(prefix, "cwd", pid); + + setgroups(0, NULL); + + // 2 passes - found it at nsenter source code + // this is related CLONE_NEWUSER functionality + + // This code cannot switch user namespace (it can all the other namespaces) + // Fortunately, we don't need to switch user namespaces. + + int pass; + for(pass = 0; pass < 2 ;pass++) { + for(i = 0; all_ns[i].name ; i++) { + if (all_ns[i].fd != -1 && all_ns[i].status == -1) { + if(setns(all_ns[i].fd, all_ns[i].nstype) == -1) { + if(pass == 1) { + all_ns[i].status = 0; + error("Cannot switch to %s namespace of pid %d", all_ns[i].name, (int) pid); + } + } + else + all_ns[i].status = 1; + } + } + } + + setgroups(0, NULL); + + if(root_fd != -1) { + if(fchdir(root_fd) < 0) + error("Cannot fchdir() to pid %d root directory", (int)pid); + + if(chroot(".") < 0) + error("Cannot chroot() to pid %d root directory", (int)pid); + + close(root_fd); + } + + if(cwd_fd != -1) { + if(fchdir(cwd_fd) < 0) + error("Cannot fchdir() to pid %d current working directory", (int)pid); + + close(cwd_fd); + } + + int do_fork = 0; + for(i = 0; all_ns[i].name ; i++) + if(all_ns[i].fd != -1) { + + // CLONE_NEWPID requires a fork() to become effective + if(all_ns[i].nstype == CLONE_NEWPID && all_ns[i].status) + do_fork = 1; + + close(all_ns[i].fd); + } + + if(do_fork) + continue_as_child(); + + return 0; + +#else + + errno = ENOSYS; + error("setns() is missing on this system."); + return 1; + +#endif +} + +pid_t read_pid_from_cgroup_file(const char *filename) { + int fd = open(filename, procfile_open_flags); + if(fd == -1) { + error("Cannot open pid_from_cgroup() file '%s'.", filename); + return 0; + } + + FILE *fp = fdopen(fd, "r"); + if(!fp) { + error("Cannot upgrade fd to fp for file '%s'.", filename); + return 0; + } + + char buffer[100 + 1]; + pid_t pid = 0; + char *s; + while((s = fgets(buffer, 100, fp))) { + buffer[100] = '\0'; + pid = atoi(s); + if(pid > 0) break; + } + + fclose(fp); + +#ifdef NETDATA_INTERNAL_CHECKS + if(pid > 0) info("found pid %d on file '%s'", pid, filename); +#endif + + return pid; +} + +pid_t read_pid_from_cgroup_files(const char *path) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/cgroup.procs", path); + pid_t pid = read_pid_from_cgroup_file(filename); + if(pid > 0) return pid; + + snprintfz(filename, FILENAME_MAX, "%s/tasks", path); + return read_pid_from_cgroup_file(filename); +} + +pid_t read_pid_from_cgroup(const char *path) { + pid_t pid = read_pid_from_cgroup_files(path); + if (pid > 0) return pid; + + DIR *dir = opendir(path); + if (!dir) { + error("cannot read directory '%s'", path); + return 0; + } + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if (de->d_type == DT_DIR) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); + pid = read_pid_from_cgroup(filename); + if(pid > 0) break; + } + } + closedir(dir); + return pid; +} + +// ---------------------------------------------------------------------------- +// send the result to netdata + +struct found_device { + const char *host_device; + const char *guest_device; + + uint32_t host_device_hash; + + struct found_device *next; +} *detected_devices = NULL; + +void add_device(const char *host, const char *guest) { +#ifdef NETDATA_INTERNAL_CHECKS + info("adding device with host '%s', guest '%s'", host, guest); +#endif + + uint32_t hash = simple_hash(host); + + if(guest && (!*guest || strcmp(host, guest) == 0)) + guest = NULL; + + struct found_device *f; + for(f = detected_devices; f ; f = f->next) { + if(f->host_device_hash == hash && !strcmp(host, f->host_device)) { + + if(guest && (!f->guest_device || !strcmp(f->host_device, f->guest_device))) { + if(f->guest_device) freez((void *)f->guest_device); + f->guest_device = strdupz(guest); + } + + return; + } + } + + f = mallocz(sizeof(struct found_device)); + f->host_device = strdupz(host); + f->host_device_hash = hash; + f->guest_device = (guest)?strdupz(guest):NULL; + f->next = detected_devices; + detected_devices = f; +} + +int send_devices(void) { + int found = 0; + + struct found_device *f; + for(f = detected_devices; f ; f = f->next) { + found++; + printf("%s %s\n", f->host_device, (f->guest_device)?f->guest_device:f->host_device); + } + + return found; +} + +// ---------------------------------------------------------------------------- +// this function should be called only **ONCE** +// also it has to be the **LAST** to be called +// since it switches namespaces, so after this call, everything is different! + +void detect_veth_interfaces(pid_t pid) { + struct iface *cgroup = NULL; + struct iface *host, *h, *c; + + host = read_proc_net_dev("host", netdata_configured_host_prefix); + if(!host) { + errno = 0; + error("cannot read host interface list."); + goto cleanup; + } + + if(!eligible_ifaces(host)) { + errno = 0; + info("there are no double-linked host interfaces available."); + goto cleanup; + } + + if(switch_namespace(netdata_configured_host_prefix, pid)) { + errno = 0; + error("cannot switch to the namespace of pid %u", (unsigned int) pid); + goto cleanup; + } + +#ifdef NETDATA_INTERNAL_CHECKS + info("switched to namespaces of pid %d", pid); +#endif + + cgroup = read_proc_net_dev("cgroup", NULL); + if(!cgroup) { + errno = 0; + error("cannot read cgroup interface list."); + goto cleanup; + } + + if(!eligible_ifaces(cgroup)) { + errno = 0; + error("there are not double-linked cgroup interfaces available."); + goto cleanup; + } + + unsigned int host_dev_num = calc_num_ifaces(host); + unsigned int cgroup_dev_num = calc_num_ifaces(cgroup); + // host ifaces == guest ifaces => we are still in the host namespace + // and we can't really identify which ifaces belong to the cgroup (e.g. Proxmox VM). + if (host_dev_num == cgroup_dev_num) { + unsigned int m = 0; + for (h = host; h; h = h->next) { + for (c = cgroup; c; c = c->next) { + if (h->ifindex == c->ifindex && h->iflink == c->iflink) { + m++; + break; + } + } + } + if (host_dev_num == m) { + goto cleanup; + } + } + + for(h = host; h ; h = h->next) { + if(iface_is_eligible(h)) { + for (c = cgroup; c; c = c->next) { + if(iface_is_eligible(c) && h->ifindex == c->iflink && h->iflink == c->ifindex) { + add_device(h->device, c->device); + } + } + } + } + +cleanup: + free_host_ifaces(cgroup); + free_host_ifaces(host); +} + +// ---------------------------------------------------------------------------- +// call the external helper + +#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 +void call_the_helper(pid_t pid, const char *cgroup) { + if(setresuid(0, 0, 0) == -1) + error("setresuid(0, 0, 0) failed."); + + char command[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + if(cgroup) + snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --cgroup '%s'", cgroup); + else + snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --pid %d", pid); + + info("running: %s", command); + + pid_t cgroup_pid; + FILE *fp_child_input, *fp_child_output; + + if(cgroup) { + (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup); + } + else { + char buffer[100]; + snprintfz(buffer, sizeof(buffer) - 1, "%d", pid); + (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer); + } + + if(fp_child_output) { + char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + char *s; + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + trim(s); + + if(*s && *s != '\n') { + char *t = s; + while(*t && *t != ' ') t++; + if(*t == ' ') { + *t = '\0'; + t++; + } + + if(!*s || !*t) continue; + add_device(s, t); + } + } + + netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + } + else + error("cannot execute cgroup-network helper script: %s", command); +} + +int is_valid_path_symbol(char c) { + switch(c) { + case '/': // path separators + case '\\': // needed for virsh domains \x2d1\x2dname + case ' ': // space + case '-': // hyphen + case '_': // underscore + case '.': // dot + case ',': // comma + return 1; + + default: + return 0; + } +} + +// we will pass this path a shell script running as root +// so, we need to make sure the path will be valid +// and will not include anything that could allow +// the caller use shell expansion for gaining escalated +// privileges. +int verify_path(const char *path) { + struct stat sb; + + char c; + const char *s = path; + while((c = *s++)) { + if(!( isalnum(c) || is_valid_path_symbol(c) )) { + error("invalid character in path '%s'", path); + return -1; + } + } + + if(strstr(path, "\\") && !strstr(path, "\\x")) { + error("invalid escape sequence in path '%s'", path); + return 1; + } + + if(strstr(path, "/../")) { + error("invalid parent path sequence detected in '%s'", path); + return 1; + } + + if(path[0] != '/') { + error("only absolute path names are supported - invalid path '%s'", path); + return -1; + } + + if (stat(path, &sb) == -1) { + error("cannot stat() path '%s'", path); + return -1; + } + + if((sb.st_mode & S_IFMT) != S_IFDIR) { + error("path '%s' is not a directory", path); + return -1; + } + + return 0; +} + +/* +char *fix_path_variable(void) { + const char *path = getenv("PATH"); + if(!path || !*path) return 0; + + char *p = strdupz(path); + char *safe_path = callocz(1, strlen(p) + strlen("PATH=") + 1); + strcpy(safe_path, "PATH="); + + int added = 0; + char *ptr = p; + while(ptr && *ptr) { + char *s = strsep(&ptr, ":"); + if(s && *s) { + if(verify_path(s) == -1) { + error("the PATH variable includes an invalid path '%s' - removed it.", s); + } + else { + info("the PATH variable includes a valid path '%s'.", s); + if(added) strcat(safe_path, ":"); + strcat(safe_path, s); + added++; + } + } + } + + info("unsafe PATH: '%s'.", path); + info(" safe PATH: '%s'.", safe_path); + + freez(p); + return safe_path; +} +*/ + +// ---------------------------------------------------------------------------- +// main + +void usage(void) { + fprintf(stderr, "%s [ -p PID | --pid PID | --cgroup /path/to/cgroup ]\n", program_name); + exit(1); +} + +int main(int argc, char **argv) { + pid_t pid = 0; + + program_name = argv[0]; + program_version = VERSION; + error_log_syslog = 0; + + // since cgroup-network runs as root, prevent it from opening symbolic links + procfile_open_flags = O_RDONLY|O_NOFOLLOW; + + // ------------------------------------------------------------------------ + // make sure NETDATA_HOST_PREFIX is safe + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix() == -1) exit(1); + + if(netdata_configured_host_prefix[0] != '\0' && verify_path(netdata_configured_host_prefix) == -1) + fatal("invalid NETDATA_HOST_PREFIX '%s'", netdata_configured_host_prefix); + + // ------------------------------------------------------------------------ + // build a safe environment for our script + + // the first environment variable is a fixed PATH= + snprintfz(environment_variable2, sizeof(environment_variable2) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix); + + // ------------------------------------------------------------------------ + + if(argc == 2 && (!strcmp(argv[1], "version") || !strcmp(argv[1], "-version") || !strcmp(argv[1], "--version") || !strcmp(argv[1], "-v") || !strcmp(argv[1], "-V"))) { + fprintf(stderr, "cgroup-network %s\n", VERSION); + exit(0); + } + + if(argc != 3) + usage(); + + int arg = 1; + int helper = 1; + if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) + helper = 0; + + if(!strcmp(argv[arg], "-p") || !strcmp(argv[arg], "--pid")) { + pid = atoi(argv[arg+1]); + + if(pid <= 0) { + errno = 0; + error("Invalid pid %d given", (int) pid); + return 2; + } + + if(helper) call_the_helper(pid, NULL); + } + else if(!strcmp(argv[arg], "--cgroup")) { + char *cgroup = argv[arg+1]; + if(verify_path(cgroup) == -1) { + error("cgroup '%s' does not exist or is not valid.", cgroup); + return 1; + } + + pid = read_pid_from_cgroup(cgroup); + if(helper) call_the_helper(pid, cgroup); + + if(pid <= 0 && !detected_devices) { + errno = 0; + error("Cannot find a cgroup PID from cgroup '%s'", cgroup); + } + } + else + usage(); + + if(pid > 0) + detect_veth_interfaces(pid); + + int found = send_devices(); + if(found <= 0) return 1; + return 0; +} diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c new file mode 100644 index 0000000..8f75482 --- /dev/null +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -0,0 +1,4887 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sys_fs_cgroup.h" + +#define PLUGIN_CGROUPS_NAME "cgroups.plugin" +#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" +#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" + +#ifdef NETDATA_INTERNAL_CHECKS +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT +#else +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO +#endif + +// main cgroups thread worker jobs +#define WORKER_CGROUPS_LOCK 0 +#define WORKER_CGROUPS_READ 1 +#define WORKER_CGROUPS_CHART 2 + +// discovery cgroup thread worker jobs +#define WORKER_DISCOVERY_INIT 0 +#define WORKER_DISCOVERY_FIND 1 +#define WORKER_DISCOVERY_PROCESS 2 +#define WORKER_DISCOVERY_PROCESS_RENAME 3 +#define WORKER_DISCOVERY_PROCESS_NETWORK 4 +#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5 +#define WORKER_DISCOVERY_UPDATE 6 +#define WORKER_DISCOVERY_CLEANUP 7 +#define WORKER_DISCOVERY_COPY 8 +#define WORKER_DISCOVERY_SHARE 9 +#define WORKER_DISCOVERY_LOCK 10 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11 +#endif + +// ---------------------------------------------------------------------------- +// cgroup globals + +static char cgroup_chart_id_prefix[] = "cgroup_"; + +static int is_inside_k8s = 0; + +static long system_page_size = 4096; // system will be queried via sysconf() in configuration() + +static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES; +static int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO; +static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; + +static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; +static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; +static int cgroup_used_memory = CONFIG_BOOLEAN_YES; + +static int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO; +static int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO; + +static int cgroup_search_in_devices = 1; + +static int cgroup_check_for_new_every = 10; +static int cgroup_update_every = 1; +static int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; + +static int cgroup_recheck_zero_blkio_every_iterations = 10; +static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10; +static int cgroup_recheck_zero_mem_detailed_every_iterations = 10; + +static char *cgroup_cpuacct_base = NULL; +static char *cgroup_cpuset_base = NULL; +static char *cgroup_blkio_base = NULL; +static char *cgroup_memory_base = NULL; +static char *cgroup_devices_base = NULL; +static char *cgroup_unified_base = NULL; + +static int cgroup_root_count = 0; +static int cgroup_root_max = 1000; +static int cgroup_max_depth = 0; + +static SIMPLE_PATTERN *enabled_cgroup_paths = NULL; +static SIMPLE_PATTERN *enabled_cgroup_names = NULL; +static SIMPLE_PATTERN *search_cgroup_paths = NULL; +static SIMPLE_PATTERN *enabled_cgroup_renames = NULL; +static SIMPLE_PATTERN *systemd_services_cgroups = NULL; + +static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; + +static char *cgroups_rename_script = NULL; +static char *cgroups_network_interface_script = NULL; + +static int cgroups_check = 0; + +static uint32_t Read_hash = 0; +static uint32_t Write_hash = 0; +static uint32_t user_hash = 0; +static uint32_t system_hash = 0; +static uint32_t user_usec_hash = 0; +static uint32_t system_usec_hash = 0; +static uint32_t nr_periods_hash = 0; +static uint32_t nr_throttled_hash = 0; +static uint32_t throttled_time_hash = 0; +static uint32_t throttled_usec_hash = 0; + +enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 }; + +enum cgroups_systemd_setting { + SYSTEMD_CGROUP_ERR, + SYSTEMD_CGROUP_LEGACY, + SYSTEMD_CGROUP_HYBRID, + SYSTEMD_CGROUP_UNIFIED +}; + +struct cgroups_systemd_config_setting { + char *name; + enum cgroups_systemd_setting setting; +}; + +static struct cgroups_systemd_config_setting cgroups_systemd_options[] = { + { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY }, + { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID }, + { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED }, + { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, +}; + +// Shared memory with information from detected cgroups +netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL}; +static int shm_fd_cgroup_ebpf = -1; +sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED; + +/* on Fed systemd is not in PATH for some reason */ +#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" +#define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" + +#define MAXSIZE_PROC_CMDLINE 4096 +static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) +{ + pid_t command_pid; + enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR; + char buf[MAXSIZE_PROC_CMDLINE]; + char *begin, *end; + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(exec, &command_pid, &fp_child_input); + + if (!fp_child_output) + return retval; + + fd_set rfds; + struct timeval timeout; + int fd = fileno(fp_child_output); + int ret = -1; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + timeout.tv_sec = 3; + timeout.tv_usec = 0; + + if (fd != -1) { + ret = select(fd + 1, &rfds, NULL, NULL, &timeout); + } + + if (ret == -1) { + error("Failed to get the output of \"%s\"", exec); + } else if (ret == 0) { + info("Cannot get the output of \"%s\" within %"PRId64" seconds", exec, (int64_t)timeout.tv_sec); + } else { + while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) { + end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING); + if (!*begin) + break; + while (isalpha(*end)) + end++; + *end = 0; + for (int i = 0; cgroups_systemd_options[i].name; i++) { + if (!strcmp(begin, cgroups_systemd_options[i].name)) { + retval = cgroups_systemd_options[i].setting; + break; + } + } + break; + } + } + } + + if (netdata_pclose(fp_child_input, fp_child_output, command_pid)) + return SYSTEMD_CGROUP_ERR; + + return retval; +} + +static enum cgroups_type cgroups_try_detect_version() +{ + pid_t command_pid; + char buf[MAXSIZE_PROC_CMDLINE]; + enum cgroups_systemd_setting systemd_setting; + int cgroups2_available = 0; + + // 1. check if cgroups2 available on system at all + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen("grep cgroup /proc/filesystems", &command_pid, &fp_child_input); + if (!fp_child_output) { + error("popen failed"); + return CGROUPS_AUTODETECT_FAIL; + } + while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + if (strstr(buf, "cgroup2")) { + cgroups2_available = 1; + break; + } + } + if(netdata_pclose(fp_child_input, fp_child_output, command_pid)) + return CGROUPS_AUTODETECT_FAIL; + + if(!cgroups2_available) + return CGROUPS_V1; + +#if defined CGROUP2_SUPER_MAGIC + // 2. check filesystem type for the default mountpoint + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/cgroup"); + struct statfs fsinfo; + if (!statfs(filename, &fsinfo)) { + if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) + return CGROUPS_V2; + } +#endif + + // 3. check systemd compiletime setting + if ((systemd_setting = cgroups_detect_systemd("systemd --version")) == SYSTEMD_CGROUP_ERR) + systemd_setting = cgroups_detect_systemd(SYSTEMD_CMD_RHEL); + + if(systemd_setting == SYSTEMD_CGROUP_ERR) + return CGROUPS_AUTODETECT_FAIL; + + if(systemd_setting == SYSTEMD_CGROUP_LEGACY || systemd_setting == SYSTEMD_CGROUP_HYBRID) { + // currently we prefer V1 if HYBRID is set as it seems to be more feature complete + // in the future we might want to continue here if SYSTEMD_CGROUP_HYBRID + // and go ahead with V2 + return CGROUPS_V1; + } + + // 4. if we are unified as on Fedora (default cgroups2 only mode) + // check kernel command line flag that can override that setting + FILE *fp = fopen("/proc/cmdline", "r"); + if (!fp) { + error("Error reading kernel boot commandline parameters"); + return CGROUPS_AUTODETECT_FAIL; + } + + if (!fgets(buf, MAXSIZE_PROC_CMDLINE, fp)) { + error("couldn't read all cmdline params into buffer"); + fclose(fp); + return CGROUPS_AUTODETECT_FAIL; + } + + fclose(fp); + + if (strstr(buf, "systemd.unified_cgroup_hierarchy=0")) { + info("cgroups v2 (unified cgroups) is available but are disabled on this system."); + return CGROUPS_V1; + } + return CGROUPS_V2; +} + +void set_cgroup_base_path(char *filename, char *path) { + if (strncmp(netdata_configured_host_prefix, path, strlen(netdata_configured_host_prefix)) == 0) { + snprintfz(filename, FILENAME_MAX, "%s", path); + } else { + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, path); + } +} + +void read_cgroup_plugin_configuration() { + system_page_size = sysconf(_SC_PAGESIZE); + + Read_hash = simple_hash("Read"); + Write_hash = simple_hash("Write"); + user_hash = simple_hash("user"); + system_hash = simple_hash("system"); + user_usec_hash = simple_hash("user_usec"); + system_usec_hash = simple_hash("system_usec"); + nr_periods_hash = simple_hash("nr_periods"); + nr_throttled_hash = simple_hash("nr_throttled"); + throttled_time_hash = simple_hash("throttled_time"); + throttled_usec_hash = simple_hash("throttled_usec"); + + cgroup_update_every = (int)config_get_number("plugin:cgroups", "update every", localhost->rrd_update_every); + if(cgroup_update_every < localhost->rrd_update_every) + cgroup_update_every = localhost->rrd_update_every; + + cgroup_check_for_new_every = (int)config_get_number("plugin:cgroups", "check for new cgroups every", (long long)cgroup_check_for_new_every * (long long)cgroup_update_every); + if(cgroup_check_for_new_every < cgroup_update_every) + cgroup_check_for_new_every = cgroup_update_every; + + cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", CONFIG_BOOLEAN_AUTO); + if(cgroup_use_unified_cgroups == CONFIG_BOOLEAN_AUTO) + cgroup_use_unified_cgroups = (cgroups_try_detect_version() == CGROUPS_V2); + + info("use unified cgroups %s", cgroup_use_unified_cgroups ? "true" : "false"); + + cgroup_containers_chart_priority = (int)config_get_number("plugin:cgroups", "containers priority", cgroup_containers_chart_priority); + if(cgroup_containers_chart_priority < 1) + cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; + + cgroup_enable_cpuacct_stat = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct stat (total CPU)", cgroup_enable_cpuacct_stat); + cgroup_enable_cpuacct_usage = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct usage (per core CPU)", cgroup_enable_cpuacct_usage); + cgroup_enable_cpuacct_cpu_throttling = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu throttling", cgroup_enable_cpuacct_cpu_throttling); + cgroup_enable_cpuacct_cpu_shares = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu shares", cgroup_enable_cpuacct_cpu_shares); + + cgroup_enable_memory = config_get_boolean_ondemand("plugin:cgroups", "enable memory", cgroup_enable_memory); + cgroup_enable_detailed_memory = config_get_boolean_ondemand("plugin:cgroups", "enable detailed memory", cgroup_enable_detailed_memory); + cgroup_enable_memory_failcnt = config_get_boolean_ondemand("plugin:cgroups", "enable memory limits fail count", cgroup_enable_memory_failcnt); + cgroup_enable_swap = config_get_boolean_ondemand("plugin:cgroups", "enable swap memory", cgroup_enable_swap); + + cgroup_enable_blkio_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio bandwidth", cgroup_enable_blkio_io); + cgroup_enable_blkio_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio operations", cgroup_enable_blkio_ops); + cgroup_enable_blkio_throttle_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle bandwidth", cgroup_enable_blkio_throttle_io); + cgroup_enable_blkio_throttle_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle operations", cgroup_enable_blkio_throttle_ops); + cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); + cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); + + cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu); + cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some); + cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full); + cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some); + cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full); + + cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); + cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); + cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); + + cgroup_enable_systemd_services = config_get_boolean("plugin:cgroups", "enable systemd services", cgroup_enable_systemd_services); + cgroup_enable_systemd_services_detailed_memory = config_get_boolean("plugin:cgroups", "enable systemd services detailed memory", cgroup_enable_systemd_services_detailed_memory); + cgroup_used_memory = config_get_boolean("plugin:cgroups", "report used memory", cgroup_used_memory); + + char filename[FILENAME_MAX + 1], *s; + struct mountinfo *mi, *root = mountinfo_read(0); + if(!cgroup_use_unified_cgroups) { + // cgroup v1 does not have pressure metrics + cgroup_enable_pressure_cpu = + cgroup_enable_pressure_io_some = + cgroup_enable_pressure_io_full = + cgroup_enable_pressure_memory_some = + cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); + if(!mi) { + error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct"); + s = "/sys/fs/cgroup/cpuacct"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); + if(!mi) { + error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset"); + s = "/sys/fs/cgroup/cpuset"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); + if(!mi) { + error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio"); + s = "/sys/fs/cgroup/blkio"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); + if(!mi) { + error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory"); + s = "/sys/fs/cgroup/memory"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); + if(!mi) { + error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices"); + s = "/sys/fs/cgroup/devices"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename); + } + else { + //cgroup_enable_cpuacct_stat = + cgroup_enable_cpuacct_usage = + //cgroup_enable_memory = + //cgroup_enable_detailed_memory = + cgroup_enable_memory_failcnt = + //cgroup_enable_swap = + //cgroup_enable_blkio_io = + //cgroup_enable_blkio_ops = + cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = + cgroup_enable_blkio_merged_ops = + cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO; + cgroup_search_in_devices = 0; + cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; + cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values + + //TODO: can there be more than 1 cgroup2 mount point? + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option + if(mi) debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point); + if(!mi) { + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup"); + if(mi) debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point); + } + if(!mi) { + error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup"); + s = "/sys/fs/cgroup"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename); + debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base); + } + + cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); + cgroup_max_depth = (int)config_get_number("plugin:cgroups", "max cgroups depth to monitor", cgroup_max_depth); + + enabled_cgroup_paths = simple_pattern_create( + config_get("plugin:cgroups", "enable by default cgroups matching", + // ---------------------------------------------------------------- + + " !*/init.scope " // ignore init.scope + " !/system.slice/run-*.scope " // ignore system.slice/run-XXXX.scope + " *.scope " // we need all other *.scope for sure + + // ---------------------------------------------------------------- + + " /machine.slice/*.service " // #3367 systemd-nspawn + + // ---------------------------------------------------------------- + + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + + // ---------------------------------------------------------------- + + " !*/vcpu* " // libvirtd adds these sub-cgroups + " !*/emulator " // libvirtd adds these sub-cgroups + " !*.mount " + " !*.partition " + " !*.service " + " !*.socket " + " !*.slice " + " !*.swap " + " !*.user " + " !/ " + " !/docker " + " !*/libvirt " + " !/lxc " + " !/lxc/*/* " // #1397 #2649 + " !/lxc.monitor* " + " !/lxc.pivot " + " !/lxc.payload " + " !/machine " + " !/qemu " + " !/system " + " !/systemd " + " !/user " + " * " // enable anything else + ), NULL, SIMPLE_PATTERN_EXACT); + + enabled_cgroup_names = simple_pattern_create( + config_get("plugin:cgroups", "enable by default cgroups names matching", + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + search_cgroup_paths = simple_pattern_create( + config_get("plugin:cgroups", "search for cgroups in subpaths matching", + " !*/init.scope " // ignore init.scope + " !*-qemu " // #345 + " !*.libvirt-qemu " // #3010 + " !/init.scope " + " !/system " + " !/systemd " + " !/user " + " !/user.slice " + " !/lxc/*/* " // #2161 #2649 + " !/lxc.monitor " + " !/lxc.payload/*/* " + " !/lxc.payload.* " + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + snprintfz(filename, FILENAME_MAX, "%s/cgroup-name.sh", netdata_configured_primary_plugins_dir); + cgroups_rename_script = config_get("plugin:cgroups", "script to get cgroup names", filename); + + snprintfz(filename, FILENAME_MAX, "%s/cgroup-network", netdata_configured_primary_plugins_dir); + cgroups_network_interface_script = config_get("plugin:cgroups", "script to get cgroup network interfaces", filename); + + enabled_cgroup_renames = simple_pattern_create( + config_get("plugin:cgroups", "run script to rename cgroups matching", + " !/ " + " !*.mount " + " !*.socket " + " !*.partition " + " /machine.slice/*.service " // #3367 systemd-nspawn + " !*.service " + " !*.slice " + " !*.swap " + " !*.user " + " !init.scope " + " !*.scope/vcpu* " // libvirtd adds these sub-cgroups + " !*.scope/emulator " // libvirtd adds these sub-cgroups + " *.scope " + " *docker* " + " *lxc* " + " *qemu* " + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + " *.libvirt-qemu " // #3010 + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + if(cgroup_enable_systemd_services) { + systemd_services_cgroups = simple_pattern_create( + config_get("plugin:cgroups", "cgroups to match as systemd services", + " !/system.slice/*/*.service " + " /system.slice/*.service " + ), NULL, SIMPLE_PATTERN_EXACT); + } + + mountinfo_free_all(root); +} + +void netdata_cgroup_ebpf_set_values(size_t length) +{ + sem_wait(shm_mutex_cgroup_ebpf); + + shm_cgroup_ebpf.header->cgroup_max = cgroup_root_max; + shm_cgroup_ebpf.header->systemd_enabled = cgroup_enable_systemd_services | + cgroup_enable_systemd_services_detailed_memory | + cgroup_used_memory; + shm_cgroup_ebpf.header->body_length = length; + + sem_post(shm_mutex_cgroup_ebpf); +} + +void netdata_cgroup_ebpf_initialize_shm() +{ + shm_fd_cgroup_ebpf = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_CREAT | O_RDWR, 0660); + if (shm_fd_cgroup_ebpf < 0) { + error("Cannot initialize shared memory used by cgroup and eBPF, integration won't happen."); + return; + } + + size_t length = sizeof(netdata_ebpf_cgroup_shm_header_t) + cgroup_root_max * sizeof(netdata_ebpf_cgroup_shm_body_t); + if (ftruncate(shm_fd_cgroup_ebpf, length)) { + error("Cannot set size for shared memory."); + goto end_init_shm; + } + + shm_cgroup_ebpf.header = (netdata_ebpf_cgroup_shm_header_t *) mmap(NULL, length, + PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd_cgroup_ebpf, 0); + + if (!shm_cgroup_ebpf.header) { + error("Cannot map shared memory used between cgroup and eBPF, integration won't happen"); + goto end_init_shm; + } + shm_cgroup_ebpf.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_cgroup_ebpf.header + + sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_mutex_cgroup_ebpf = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, 1); + + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + netdata_cgroup_ebpf_set_values(length); + return; + } + + error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + munmap(shm_cgroup_ebpf.header, length); + +end_init_shm: + close(shm_fd_cgroup_ebpf); + shm_fd_cgroup_ebpf = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); +} + +// ---------------------------------------------------------------------------- +// cgroup objects + +struct blkio { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int delay_counter; + + char *filename; + + unsigned long long Read; + unsigned long long Write; +/* + unsigned long long Sync; + unsigned long long Async; + unsigned long long Total; +*/ +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt +struct memory { + ARL_BASE *arl_base; + ARL_ENTRY *arl_dirty; + ARL_ENTRY *arl_swap; + + int updated_detailed; + int updated_usage_in_bytes; + int updated_msw_usage_in_bytes; + int updated_failcnt; + + int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + int delay_counter_detailed; + int delay_counter_failcnt; + + char *filename_detailed; + char *filename_usage_in_bytes; + char *filename_msw_usage_in_bytes; + char *filename_failcnt; + + int detailed_has_dirty; + int detailed_has_swap; + + // detailed metrics +/* + unsigned long long cache; + unsigned long long rss; + unsigned long long rss_huge; + unsigned long long mapped_file; + unsigned long long writeback; + unsigned long long dirty; + unsigned long long swap; + unsigned long long pgpgin; + unsigned long long pgpgout; + unsigned long long pgfault; + unsigned long long pgmajfault; + unsigned long long inactive_anon; + unsigned long long active_anon; + unsigned long long inactive_file; + unsigned long long active_file; + unsigned long long unevictable; + unsigned long long hierarchical_memory_limit; +*/ + //unified cgroups metrics + unsigned long long anon; + unsigned long long kernel_stack; + unsigned long long slab; + unsigned long long sock; + unsigned long long shmem; + unsigned long long anon_thp; + //unsigned long long file_writeback; + //unsigned long long file_dirty; + //unsigned long long file; + + unsigned long long total_cache; + unsigned long long total_rss; + unsigned long long total_rss_huge; + unsigned long long total_mapped_file; + unsigned long long total_writeback; + unsigned long long total_dirty; + unsigned long long total_swap; + unsigned long long total_pgpgin; + unsigned long long total_pgpgout; + unsigned long long total_pgfault; + unsigned long long total_pgmajfault; +/* + unsigned long long total_inactive_anon; + unsigned long long total_active_anon; +*/ + + unsigned long long total_inactive_file; + +/* + unsigned long long total_active_file; + unsigned long long total_unevictable; +*/ + + // single file metrics + unsigned long long usage_in_bytes; + unsigned long long msw_usage_in_bytes; + unsigned long long failcnt; +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_stat { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long user; // v1, v2(user_usec) + unsigned long long system; // v1, v2(system_usec) +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_usage { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned int cpus; + unsigned long long *cpu_percpu; +}; + +// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec' +struct cpuacct_cpu_throttling { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long nr_periods; + unsigned long long nr_throttled; + unsigned long long throttled_time; + + unsigned long long nr_throttled_perc; +}; + +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications +struct cpuacct_cpu_shares { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long shares; +}; + +struct cgroup_network_interface { + const char *host_device; + const char *container_device; + struct cgroup_network_interface *next; +}; + +enum cgroups_container_orchestrator { + CGROUPS_ORCHESTRATOR_UNSET, + CGROUPS_ORCHESTRATOR_UNKNOWN, + CGROUPS_ORCHESTRATOR_K8S +}; + +// *** WARNING *** The fields are not thread safe. Take care of safe usage. +struct cgroup { + uint32_t options; + + int first_time_seen; // first time seen by the discoverer + int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option) + + char available; // found in the filesystem + char enabled; // enabled in the config + + char pending_renames; + char *intermediate_id; // TODO: remove it when the renaming script is fixed + + char *id; + uint32_t hash; + + char *chart_id; + uint32_t hash_chart; + + char *chart_title; + + DICTIONARY *chart_labels; + + int container_orchestrator; + + struct cpuacct_stat cpuacct_stat; + struct cpuacct_usage cpuacct_usage; + struct cpuacct_cpu_throttling cpuacct_cpu_throttling; + struct cpuacct_cpu_shares cpuacct_cpu_shares; + + struct memory memory; + + struct blkio io_service_bytes; // bytes + struct blkio io_serviced; // operations + + struct blkio throttle_io_service_bytes; // bytes + struct blkio throttle_io_serviced; // operations + + struct blkio io_merged; // operations + struct blkio io_queued; // operations + + struct cgroup_network_interface *interfaces; + + struct pressure cpu_pressure; + struct pressure io_pressure; + struct pressure memory_pressure; + + // per cgroup charts + RRDSET *st_cpu; + RRDSET *st_cpu_limit; + RRDSET *st_cpu_per_core; + RRDSET *st_cpu_nr_throttled; + RRDSET *st_cpu_throttled_time; + RRDSET *st_cpu_shares; + + RRDSET *st_mem; + RRDSET *st_mem_utilization; + RRDSET *st_writeback; + RRDSET *st_mem_activity; + RRDSET *st_pgfaults; + RRDSET *st_mem_usage; + RRDSET *st_mem_usage_limit; + RRDSET *st_mem_failcnt; + + RRDSET *st_io; + RRDSET *st_serviced_ops; + RRDSET *st_throttle_io; + RRDSET *st_throttle_serviced_ops; + RRDSET *st_queued_ops; + RRDSET *st_merged_ops; + + // per cgroup chart variables + char *filename_cpuset_cpus; + unsigned long long cpuset_cpus; + + char *filename_cpu_cfs_period; + unsigned long long cpu_cfs_period; + + char *filename_cpu_cfs_quota; + unsigned long long cpu_cfs_quota; + + const RRDSETVAR_ACQUIRED *chart_var_cpu_limit; + NETDATA_DOUBLE prev_cpu_usage; + + char *filename_memory_limit; + unsigned long long memory_limit; + const RRDSETVAR_ACQUIRED *chart_var_memory_limit; + + char *filename_memoryswap_limit; + unsigned long long memoryswap_limit; + const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit; + + // services + RRDDIM *rd_cpu; + RRDDIM *rd_mem_usage; + RRDDIM *rd_mem_failcnt; + RRDDIM *rd_swap_usage; + + RRDDIM *rd_mem_detailed_cache; + RRDDIM *rd_mem_detailed_rss; + RRDDIM *rd_mem_detailed_mapped; + RRDDIM *rd_mem_detailed_writeback; + RRDDIM *rd_mem_detailed_pgpgin; + RRDDIM *rd_mem_detailed_pgpgout; + RRDDIM *rd_mem_detailed_pgfault; + RRDDIM *rd_mem_detailed_pgmajfault; + + RRDDIM *rd_io_service_bytes_read; + RRDDIM *rd_io_serviced_read; + RRDDIM *rd_throttle_io_read; + RRDDIM *rd_throttle_io_serviced_read; + RRDDIM *rd_io_queued_read; + RRDDIM *rd_io_merged_read; + + RRDDIM *rd_io_service_bytes_write; + RRDDIM *rd_io_serviced_write; + RRDDIM *rd_throttle_io_write; + RRDDIM *rd_throttle_io_serviced_write; + RRDDIM *rd_io_queued_write; + RRDDIM *rd_io_merged_write; + + struct cgroup *next; + struct cgroup *discovered_next; + +} *cgroup_root = NULL; + +uv_mutex_t cgroup_root_mutex; + +struct cgroup *discovered_cgroup_root = NULL; + +struct discovery_thread { + uv_thread_t thread; + uv_mutex_t mutex; + uv_cond_t cond_var; + int start_discovery; + int exited; +} discovery_thread; + +// --------------------------------------------------------------------------------------------- + +static inline int matches_enabled_cgroup_paths(char *id) { + return simple_pattern_matches(enabled_cgroup_paths, id); +} + +static inline int matches_enabled_cgroup_names(char *name) { + return simple_pattern_matches(enabled_cgroup_names, name); +} + +static inline int matches_enabled_cgroup_renames(char *id) { + return simple_pattern_matches(enabled_cgroup_renames, id); +} + +static inline int matches_systemd_services_cgroups(char *id) { + return simple_pattern_matches(systemd_services_cgroups, id); +} + +static inline int matches_search_cgroup_paths(const char *dir) { + return simple_pattern_matches(search_cgroup_paths, dir); +} + +static inline int matches_entrypoint_parent_process_comm(const char *comm) { + return simple_pattern_matches(entrypoint_parent_process_comm, comm); +} + +static inline int is_cgroup_systemd_service(struct cgroup *cg) { + return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); +} + +// --------------------------------------------------------------------------------------------- +static int k8s_is_kubepod(struct cgroup *cg) { + return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S; +} + +static int k8s_is_container(const char *id) { + // examples: + // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 + const char *p = id; + const char *pp = NULL; + int i = 0; + size_t l = 3; // pod + while ((p = strstr(p, "pod"))) { + i++; + p += l; + pp = p; + } + return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp); +} + +#define TASK_COMM_LEN 16 + +static int k8s_get_container_first_proc_comm(const char *id, char *comm) { + if (!k8s_is_container(id)) { + return 1; + } + + static procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); + + ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + unsigned long lines = procfile_lines(ff); + if (likely(lines < 2)) { + return 1; + } + + char *pid = procfile_lineword(ff, 0, 0); + if (!pid || !*pid) { + return 1; + } + + snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid); + + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + lines = procfile_lines(ff); + if (unlikely(lines != 2)) { + return 1; + } + + char *proc_comm = procfile_lineword(ff, 0, 0); + if (!proc_comm || !*proc_comm) { + return 1; + } + + strncpyz(comm, proc_comm, TASK_COMM_LEN); + return 0; +} + +// --------------------------------------------------------------------------------------------- + +static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) { + if (prev > curr) { + return 0; + } + return curr - prev; +} + +static unsigned long long calc_percentage(unsigned long long value, unsigned long long total) { + if (total == 0) { + return 0; + } + return (NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100; +} + +static int calc_cgroup_depth(const char *id) { + int depth = 0; + const char *s; + for (s = id; *s; s++) { + depth += unlikely(*s == '/'); + } + return depth; +} + +// ---------------------------------------------------------------------------- +// read values from /sys + +static inline void cgroup_read_cpuacct_stat(struct cpuacct_stat *cp) { + static procfile *ff = NULL; + + if(likely(cp->filename)) { + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", cp->filename); + cp->updated = 0; + return; + } + + for(i = 0; i < lines ; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if(unlikely(hash == user_hash && !strcmp(s, "user"))) + cp->user = str2ull(procfile_lineword(ff, i, 1)); + + else if(unlikely(hash == system_hash && !strcmp(s, "system"))) + cp->system = str2ull(procfile_lineword(ff, i, 1)); + } + + cp->updated = 1; + + if(unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO && + (cp->user || cp->system || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + cp->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_cpuacct_cpu_stat(struct cpuacct_cpu_throttling *cp) { + if (unlikely(!cp->filename)) { + return; + } + + static procfile *ff = NULL; + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long lines = procfile_lines(ff); + if (unlikely(lines < 3)) { + error("CGROUP: file '%s' should have 3 lines.", cp->filename); + cp->updated = 0; + return; + } + + unsigned long long nr_periods_last = cp->nr_periods; + unsigned long long nr_throttled_last = cp->nr_throttled; + + for (unsigned long i = 0; i < lines; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { + cp->nr_periods = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { + cp->nr_throttled = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == throttled_time_hash && !strcmp(s, "throttled_time"))) { + cp->throttled_time = str2ull(procfile_lineword(ff, i, 1)); + } + } + cp->nr_throttled_perc = + calc_percentage(calc_delta(cp->nr_throttled, nr_throttled_last), calc_delta(cp->nr_periods, nr_periods_last)); + + cp->updated = 1; + + if (unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO)) { + if (likely( + cp->nr_periods || cp->nr_throttled || cp->throttled_time || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } + } +} + +static inline void cgroup2_read_cpuacct_cpu_stat(struct cpuacct_stat *cp, struct cpuacct_cpu_throttling *cpt) { + static procfile *ff = NULL; + if (unlikely(!cp->filename)) { + return; + } + + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long lines = procfile_lines(ff); + + if (unlikely(lines < 3)) { + error("CGROUP: file '%s' should have at least 3 lines.", cp->filename); + cp->updated = 0; + return; + } + + unsigned long long nr_periods_last = cpt->nr_periods; + unsigned long long nr_throttled_last = cpt->nr_throttled; + + for (unsigned long i = 0; i < lines; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if (unlikely(hash == user_usec_hash && !strcmp(s, "user_usec"))) { + cp->user = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == system_usec_hash && !strcmp(s, "system_usec"))) { + cp->system = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { + cpt->nr_periods = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { + cpt->nr_throttled = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == throttled_usec_hash && !strcmp(s, "throttled_usec"))) { + cpt->throttled_time = str2ull(procfile_lineword(ff, i, 1)) * 1000; // usec -> ns + } + } + cpt->nr_throttled_perc = + calc_percentage(calc_delta(cpt->nr_throttled, nr_throttled_last), calc_delta(cpt->nr_periods, nr_periods_last)); + + cp->updated = 1; + cpt->updated = 1; + + if (unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO)) { + if (likely(cp->user || cp->system || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } + } + if (unlikely(cpt->enabled == CONFIG_BOOLEAN_AUTO)) { + if (likely( + cpt->nr_periods || cpt->nr_throttled || cpt->throttled_time || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cpt->enabled = CONFIG_BOOLEAN_YES; + } + } +} + +static inline void cgroup_read_cpuacct_cpu_shares(struct cpuacct_cpu_shares *cp) { + if (unlikely(!cp->filename)) { + return; + } + + if (unlikely(read_single_number_file(cp->filename, &cp->shares))) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + cp->updated = 1; + if (unlikely((cp->enabled == CONFIG_BOOLEAN_AUTO)) && + (cp->shares || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { + static procfile *ff = NULL; + + if(likely(ca->filename)) { + ff = procfile_reopen(ff, ca->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + ca->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + ca->updated = 0; + cgroups_check = 1; + return; + } + + if(unlikely(procfile_lines(ff) < 1)) { + error("CGROUP: file '%s' should have 1+ lines but has %zu.", ca->filename, procfile_lines(ff)); + ca->updated = 0; + return; + } + + unsigned long i = procfile_linewords(ff, 0); + if(unlikely(i == 0)) { + ca->updated = 0; + return; + } + + // we may have 1 more CPU reported + while(i > 0) { + char *s = procfile_lineword(ff, 0, i - 1); + if(!*s) i--; + else break; + } + + if(unlikely(i != ca->cpus)) { + freez(ca->cpu_percpu); + ca->cpu_percpu = mallocz(sizeof(unsigned long long) * i); + ca->cpus = (unsigned int)i; + } + + unsigned long long total = 0; + for(i = 0; i < ca->cpus ;i++) { + unsigned long long n = str2ull(procfile_lineword(ff, 0, i)); + ca->cpu_percpu[i] = n; + total += n; + } + + ca->updated = 1; + + if(unlikely(ca->enabled == CONFIG_BOOLEAN_AUTO && + (total || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + ca->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_blkio(struct blkio *io) { + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { + io->delay_counter--; + return; + } + + if(likely(io->filename)) { + static procfile *ff = NULL; + + ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", io->filename); + io->updated = 0; + return; + } + + io->Read = 0; + io->Write = 0; +/* + io->Sync = 0; + io->Async = 0; + io->Total = 0; +*/ + + for(i = 0; i < lines ; i++) { + char *s = procfile_lineword(ff, i, 1); + uint32_t hash = simple_hash(s); + + if(unlikely(hash == Read_hash && !strcmp(s, "Read"))) + io->Read += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Write_hash && !strcmp(s, "Write"))) + io->Write += str2ull(procfile_lineword(ff, i, 2)); + +/* + else if(unlikely(hash == Sync_hash && !strcmp(s, "Sync"))) + io->Sync += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Async_hash && !strcmp(s, "Async"))) + io->Async += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Total_hash && !strcmp(s, "Total"))) + io->Total += str2ull(procfile_lineword(ff, i, 2)); +*/ + } + + io->updated = 1; + + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(io->Read || io->Write || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) + io->enabled = CONFIG_BOOLEAN_YES; + else + io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; + } + } +} + +static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset) { + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { + io->delay_counter--; + return; + } + + if(likely(io->filename)) { + static procfile *ff = NULL; + + ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if (unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", io->filename); + io->updated = 0; + return; + } + + io->Read = 0; + io->Write = 0; + + for (i = 0; i < lines; i++) { + io->Read += str2ull(procfile_lineword(ff, i, 2 + word_offset)); + io->Write += str2ull(procfile_lineword(ff, i, 4 + word_offset)); + } + + io->updated = 1; + + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(io->Read || io->Write || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) + io->enabled = CONFIG_BOOLEAN_YES; + else + io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; + } + } +} + +static inline void cgroup2_read_pressure(struct pressure *res) { + static procfile *ff = NULL; + + if (likely(res->filename)) { + ff = procfile_reopen(ff, res->filename, " =", CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + size_t lines = procfile_lines(ff); + if (lines < 1) { + error("CGROUP: file '%s' should have 1+ lines.", res->filename); + res->updated = 0; + return; + } + + res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms + + if (lines > 2) { + res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + res->full.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms + } + + res->updated = 1; + + if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) { + res->some.enabled = CONFIG_BOOLEAN_YES; + if (lines > 2) { + res->full.enabled = CONFIG_BOOLEAN_YES; + } else { + res->full.enabled = CONFIG_BOOLEAN_NO; + } + } + } +} + +static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) { + static procfile *ff = NULL; + + // read detailed ram usage + if(likely(mem->filename_detailed)) { + if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO && mem->delay_counter_detailed > 0)) { + mem->delay_counter_detailed--; + goto memory_next; + } + + ff = procfile_reopen(ff, mem->filename_detailed, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + mem->updated_detailed = 0; + cgroups_check = 1; + goto memory_next; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + mem->updated_detailed = 0; + cgroups_check = 1; + goto memory_next; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", mem->filename_detailed); + mem->updated_detailed = 0; + goto memory_next; + } + + + if(unlikely(!mem->arl_base)) { + if(parent_cg_is_unified == 0){ + mem->arl_base = arl_create("cgroup/memory", NULL, 60); + + arl_expect(mem->arl_base, "total_cache", &mem->total_cache); + arl_expect(mem->arl_base, "total_rss", &mem->total_rss); + arl_expect(mem->arl_base, "total_rss_huge", &mem->total_rss_huge); + arl_expect(mem->arl_base, "total_mapped_file", &mem->total_mapped_file); + arl_expect(mem->arl_base, "total_writeback", &mem->total_writeback); + mem->arl_dirty = arl_expect(mem->arl_base, "total_dirty", &mem->total_dirty); + mem->arl_swap = arl_expect(mem->arl_base, "total_swap", &mem->total_swap); + arl_expect(mem->arl_base, "total_pgpgin", &mem->total_pgpgin); + arl_expect(mem->arl_base, "total_pgpgout", &mem->total_pgpgout); + arl_expect(mem->arl_base, "total_pgfault", &mem->total_pgfault); + arl_expect(mem->arl_base, "total_pgmajfault", &mem->total_pgmajfault); + arl_expect(mem->arl_base, "total_inactive_file", &mem->total_inactive_file); + } else { + mem->arl_base = arl_create("cgroup/memory", NULL, 60); + + arl_expect(mem->arl_base, "anon", &mem->anon); + arl_expect(mem->arl_base, "kernel_stack", &mem->kernel_stack); + arl_expect(mem->arl_base, "slab", &mem->slab); + arl_expect(mem->arl_base, "sock", &mem->sock); + arl_expect(mem->arl_base, "anon_thp", &mem->anon_thp); + arl_expect(mem->arl_base, "file", &mem->total_mapped_file); + arl_expect(mem->arl_base, "file_writeback", &mem->total_writeback); + mem->arl_dirty = arl_expect(mem->arl_base, "file_dirty", &mem->total_dirty); + arl_expect(mem->arl_base, "pgfault", &mem->total_pgfault); + arl_expect(mem->arl_base, "pgmajfault", &mem->total_pgmajfault); + arl_expect(mem->arl_base, "inactive_file", &mem->total_inactive_file); + } + } + + arl_begin(mem->arl_base); + + for(i = 0; i < lines ; i++) { + if(arl_check(mem->arl_base, + procfile_lineword(ff, i, 0), + procfile_lineword(ff, i, 1))) break; + } + + if(unlikely(mem->arl_dirty->flags & ARL_ENTRY_FLAG_FOUND)) + mem->detailed_has_dirty = 1; + + if(unlikely(parent_cg_is_unified == 0 && mem->arl_swap->flags & ARL_ENTRY_FLAG_FOUND)) + mem->detailed_has_swap = 1; + + // fprintf(stderr, "READ: '%s', cache: %llu, rss: %llu, rss_huge: %llu, mapped_file: %llu, writeback: %llu, dirty: %llu, swap: %llu, pgpgin: %llu, pgpgout: %llu, pgfault: %llu, pgmajfault: %llu, inactive_anon: %llu, active_anon: %llu, inactive_file: %llu, active_file: %llu, unevictable: %llu, hierarchical_memory_limit: %llu, total_cache: %llu, total_rss: %llu, total_rss_huge: %llu, total_mapped_file: %llu, total_writeback: %llu, total_dirty: %llu, total_swap: %llu, total_pgpgin: %llu, total_pgpgout: %llu, total_pgfault: %llu, total_pgmajfault: %llu, total_inactive_anon: %llu, total_active_anon: %llu, total_inactive_file: %llu, total_active_file: %llu, total_unevictable: %llu\n", mem->filename, mem->cache, mem->rss, mem->rss_huge, mem->mapped_file, mem->writeback, mem->dirty, mem->swap, mem->pgpgin, mem->pgpgout, mem->pgfault, mem->pgmajfault, mem->inactive_anon, mem->active_anon, mem->inactive_file, mem->active_file, mem->unevictable, mem->hierarchical_memory_limit, mem->total_cache, mem->total_rss, mem->total_rss_huge, mem->total_mapped_file, mem->total_writeback, mem->total_dirty, mem->total_swap, mem->total_pgpgin, mem->total_pgpgout, mem->total_pgfault, mem->total_pgmajfault, mem->total_inactive_anon, mem->total_active_anon, mem->total_inactive_file, mem->total_active_file, mem->total_unevictable); + + mem->updated_detailed = 1; + + if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO)) { + if(( (!parent_cg_is_unified) && ( mem->total_cache || mem->total_dirty || mem->total_rss || mem->total_rss_huge || mem->total_mapped_file || mem->total_writeback + || mem->total_swap || mem->total_pgpgin || mem->total_pgpgout || mem->total_pgfault || mem->total_pgmajfault || mem->total_inactive_file)) + || (parent_cg_is_unified && ( mem->anon || mem->total_dirty || mem->kernel_stack || mem->slab || mem->sock || mem->total_writeback + || mem->anon_thp || mem->total_pgfault || mem->total_pgmajfault || mem->total_inactive_file)) + || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES) + mem->enabled_detailed = CONFIG_BOOLEAN_YES; + else + mem->delay_counter_detailed = cgroup_recheck_zero_mem_detailed_every_iterations; + } + } + +memory_next: + + // read usage_in_bytes + if(likely(mem->filename_usage_in_bytes)) { + mem->updated_usage_in_bytes = !read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes); + if(unlikely(mem->updated_usage_in_bytes && mem->enabled_usage_in_bytes == CONFIG_BOOLEAN_AUTO && + (mem->usage_in_bytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + mem->enabled_usage_in_bytes = CONFIG_BOOLEAN_YES; + } + + if (likely(mem->updated_usage_in_bytes && mem->updated_detailed)) { + mem->usage_in_bytes = + (mem->usage_in_bytes > mem->total_inactive_file) ? (mem->usage_in_bytes - mem->total_inactive_file) : 0; + } + + // read msw_usage_in_bytes + if(likely(mem->filename_msw_usage_in_bytes)) { + mem->updated_msw_usage_in_bytes = !read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes); + if(unlikely(mem->updated_msw_usage_in_bytes && mem->enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_AUTO && + (mem->msw_usage_in_bytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + mem->enabled_msw_usage_in_bytes = CONFIG_BOOLEAN_YES; + } + + // read failcnt + if(likely(mem->filename_failcnt)) { + if(unlikely(mem->enabled_failcnt == CONFIG_BOOLEAN_AUTO && mem->delay_counter_failcnt > 0)) { + mem->updated_failcnt = 0; + mem->delay_counter_failcnt--; + } + else { + mem->updated_failcnt = !read_single_number_file(mem->filename_failcnt, &mem->failcnt); + if(unlikely(mem->updated_failcnt && mem->enabled_failcnt == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(mem->failcnt || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) + mem->enabled_failcnt = CONFIG_BOOLEAN_YES; + else + mem->delay_counter_failcnt = cgroup_recheck_zero_mem_failcnt_every_iterations; + } + } + } +} + +static inline void read_cgroup(struct cgroup *cg) { + debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + cgroup_read_cpuacct_stat(&cg->cpuacct_stat); + cgroup_read_cpuacct_usage(&cg->cpuacct_usage); + cgroup_read_cpuacct_cpu_stat(&cg->cpuacct_cpu_throttling); + cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); + cgroup_read_memory(&cg->memory, 0); + cgroup_read_blkio(&cg->io_service_bytes); + cgroup_read_blkio(&cg->io_serviced); + cgroup_read_blkio(&cg->throttle_io_service_bytes); + cgroup_read_blkio(&cg->throttle_io_serviced); + cgroup_read_blkio(&cg->io_merged); + cgroup_read_blkio(&cg->io_queued); + } + else { + //TODO: io_service_bytes and io_serviced use same file merge into 1 function + cgroup2_read_blkio(&cg->io_service_bytes, 0); + cgroup2_read_blkio(&cg->io_serviced, 4); + cgroup2_read_cpuacct_cpu_stat(&cg->cpuacct_stat, &cg->cpuacct_cpu_throttling); + cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); + cgroup2_read_pressure(&cg->cpu_pressure); + cgroup2_read_pressure(&cg->io_pressure); + cgroup2_read_pressure(&cg->memory_pressure); + cgroup_read_memory(&cg->memory, 1); + } +} + +static inline void read_all_discovered_cgroups(struct cgroup *root) { + debug(D_CGROUP, "reading metrics for all cgroups"); + + struct cgroup *cg; + for (cg = root; cg; cg = cg->next) { + if (cg->enabled && !cg->pending_renames) { + read_cgroup(cg); + } + } +} + +// ---------------------------------------------------------------------------- +// cgroup network interfaces + +#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 +static inline void read_cgroup_network_interfaces(struct cgroup *cg) { + debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + + pid_t cgroup_pid; + char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id); + } + else { + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); + } + + debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); + FILE *fp_child_input, *fp_child_output; + (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); + if(!fp_child_output) { + error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); + return; + } + + char *s; + char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + trim(s); + + if(*s && *s != '\n') { + char *t = s; + while(*t && *t != ' ') t++; + if(*t == ' ') { + *t = '\0'; + t++; + } + + if(!*s) { + error("CGROUP: empty host interface returned by script"); + continue; + } + + if(!*t) { + error("CGROUP: empty guest interface returned by script"); + continue; + } + + struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface)); + i->host_device = strdupz(s); + i->container_device = strdupz(t); + i->next = cg->interfaces; + cg->interfaces = i; + + info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); + + // register a device rename to proc_net_dev.c + netdev_rename_device_add( + i->host_device, i->container_device, cg->chart_id, cg->chart_labels, k8s_is_kubepod(cg) ? "k8s." : ""); + } + } + + netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + // debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); +} + +static inline void free_cgroup_network_interfaces(struct cgroup *cg) { + while(cg->interfaces) { + struct cgroup_network_interface *i = cg->interfaces; + cg->interfaces = i->next; + + // delete the registration of proc_net_dev rename + netdev_rename_device_del(i->host_device); + + freez((void *)i->host_device); + freez((void *)i->container_device); + freez((void *)i); + } +} + +// ---------------------------------------------------------------------------- +// add/remove/find cgroup objects + +#define CGROUP_CHARTID_LINE_MAX 1024 + +static inline char *cgroup_title_strdupz(const char *s) { + if(!s || !*s) s = "/"; + + if(*s == '/' && s[1] != '\0') s++; + + char *r = strdupz(s); + netdata_fix_chart_name(r); + + return r; +} + +static inline char *cgroup_chart_id_strdupz(const char *s) { + if(!s || !*s) s = "/"; + + if(*s == '/' && s[1] != '\0') s++; + + char *r = strdupz(s); + netdata_fix_chart_id(r); + + return r; +} + +// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed +static inline void substitute_dots_in_id(char *s) { + // dots are used to distinguish chart type and id in streaming, so we should replace them + for (char *d = s; *d; d++) { + if (*d == '.') + *d = '-'; + } +} + +// ---------------------------------------------------------------------------- +// parse k8s labels + +char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data) { + // the first word, up to the first space is the name + char *name = mystrsep(&data, " "); + + // the rest are key=value pairs separated by comma + while(data) { + char *pair = mystrsep(&data, ","); + rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO| RRDLABEL_SRC_K8S); + } + + return name; +} + +// ---------------------------------------------------------------------------- + +static inline void free_pressure(struct pressure *res) { + if (res->some.share_time.st) rrdset_is_obsolete(res->some.share_time.st); + if (res->some.total_time.st) rrdset_is_obsolete(res->some.total_time.st); + if (res->full.share_time.st) rrdset_is_obsolete(res->full.share_time.st); + if (res->full.total_time.st) rrdset_is_obsolete(res->full.total_time.st); + freez(res->filename); +} + +static inline void cgroup_free(struct cgroup *cg) { + debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); + + if(cg->st_cpu) rrdset_is_obsolete(cg->st_cpu); + if(cg->st_cpu_limit) rrdset_is_obsolete(cg->st_cpu_limit); + if(cg->st_cpu_per_core) rrdset_is_obsolete(cg->st_cpu_per_core); + if(cg->st_cpu_nr_throttled) rrdset_is_obsolete(cg->st_cpu_nr_throttled); + if(cg->st_cpu_throttled_time) rrdset_is_obsolete(cg->st_cpu_throttled_time); + if(cg->st_cpu_shares) rrdset_is_obsolete(cg->st_cpu_shares); + if(cg->st_mem) rrdset_is_obsolete(cg->st_mem); + if(cg->st_writeback) rrdset_is_obsolete(cg->st_writeback); + if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity); + if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults); + if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage); + if(cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit); + if(cg->st_mem_utilization) rrdset_is_obsolete(cg->st_mem_utilization); + if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt); + if(cg->st_io) rrdset_is_obsolete(cg->st_io); + if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops); + if(cg->st_throttle_io) rrdset_is_obsolete(cg->st_throttle_io); + if(cg->st_throttle_serviced_ops) rrdset_is_obsolete(cg->st_throttle_serviced_ops); + if(cg->st_queued_ops) rrdset_is_obsolete(cg->st_queued_ops); + if(cg->st_merged_ops) rrdset_is_obsolete(cg->st_merged_ops); + + freez(cg->filename_cpuset_cpus); + freez(cg->filename_cpu_cfs_period); + freez(cg->filename_cpu_cfs_quota); + freez(cg->filename_memory_limit); + freez(cg->filename_memoryswap_limit); + + free_cgroup_network_interfaces(cg); + + freez(cg->cpuacct_usage.cpu_percpu); + + freez(cg->cpuacct_stat.filename); + freez(cg->cpuacct_usage.filename); + freez(cg->cpuacct_cpu_throttling.filename); + freez(cg->cpuacct_cpu_shares.filename); + + arl_free(cg->memory.arl_base); + freez(cg->memory.filename_detailed); + freez(cg->memory.filename_failcnt); + freez(cg->memory.filename_usage_in_bytes); + freez(cg->memory.filename_msw_usage_in_bytes); + + freez(cg->io_service_bytes.filename); + freez(cg->io_serviced.filename); + + freez(cg->throttle_io_service_bytes.filename); + freez(cg->throttle_io_serviced.filename); + + freez(cg->io_merged.filename); + freez(cg->io_queued.filename); + + free_pressure(&cg->cpu_pressure); + free_pressure(&cg->io_pressure); + free_pressure(&cg->memory_pressure); + + freez(cg->id); + freez(cg->intermediate_id); + freez(cg->chart_id); + freez(cg->chart_title); + + rrdlabels_destroy(cg->chart_labels); + + freez(cg); + + cgroup_root_count--; +} + +// ---------------------------------------------------------------------------- + +static inline void discovery_rename_cgroup(struct cgroup *cg) { + if (!cg->pending_renames) { + return; + } + cg->pending_renames--; + + debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); + pid_t cgroup_pid; + + FILE *fp_child_input, *fp_child_output; + (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); + if (!fp_child_output) { + error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); + cg->pending_renames = 0; + cg->processed = 1; + return; + } + + char buffer[CGROUP_CHARTID_LINE_MAX + 1]; + char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + + switch (exit_code) { + case 0: + cg->pending_renames = 0; + break; + + case 3: + cg->pending_renames = 0; + cg->processed = 1; + break; + } + + if(cg->pending_renames || cg->processed) return; + if(!new_name || !*new_name || *new_name == '\n') return; + if(!(new_name = trim(new_name))) return; + + char *name = new_name; + if (!strncmp(new_name, "k8s_", 4)) { + if(!cg->chart_labels) cg->chart_labels = rrdlabels_create(); + + // read the new labels and remove the obsolete ones + rrdlabels_unmark_all(cg->chart_labels); + name = k8s_parse_resolved_name_and_labels(cg->chart_labels, new_name); + rrdlabels_remove_all_unmarked(cg->chart_labels); + } + + freez(cg->chart_title); + cg->chart_title = cgroup_title_strdupz(name); + + freez(cg->chart_id); + cg->chart_id = cgroup_chart_id_strdupz(name); + + substitute_dots_in_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); +} + +static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) { + struct stat buf; + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + out->path[0] = '\0'; + out->enabled = 0; +} + +static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { + char buffer[CGROUP_CHARTID_LINE_MAX]; + cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; + strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); + char *s = buffer; + + // skip to the last slash + size_t len = strlen(s); + while (len--) { + if (unlikely(s[len] == '/')) { + break; + } + } + if (len) { + s = &s[len + 1]; + } + + // remove extension + len = strlen(s); + while (len--) { + if (unlikely(s[len] == '.')) { + break; + } + } + if (len) { + s[len] = '\0'; + } + + freez(cg->chart_title); + cg->chart_title = cgroup_title_strdupz(s); +} + +static inline struct cgroup *discovery_cgroup_add(const char *id) { + debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); + + struct cgroup *cg = callocz(1, sizeof(struct cgroup)); + cg->id = strdupz(id); + cg->hash = simple_hash(cg->id); + cg->chart_title = cgroup_title_strdupz(id); + cg->intermediate_id = cgroup_chart_id_strdupz(id); + cg->chart_id = cgroup_chart_id_strdupz(id); + substitute_dots_in_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); + if (cgroup_use_unified_cgroups) { + cg->options |= CGROUP_OPTIONS_IS_UNIFIED; + } + + if (!discovered_cgroup_root) + discovered_cgroup_root = cg; + else { + struct cgroup *t; + for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) { + } + t->discovered_next = cg; + } + + return cg; +} + +static inline struct cgroup *discovery_cgroup_find(const char *id) { + debug(D_CGROUP, "searching for cgroup '%s'", id); + + uint32_t hash = simple_hash(id); + + struct cgroup *cg; + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + if(hash == cg->hash && strcmp(id, cg->id) == 0) + break; + } + + debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); + return cg; +} + +static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { + if (!dir || !*dir) { + dir = "/"; + } + debug(D_CGROUP, "examining cgroup dir '%s'", dir); + + struct cgroup *cg = discovery_cgroup_find(dir); + if (cg) { + cg->available = 1; + return; + } + + if (cgroup_root_count >= cgroup_root_max) { + info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, dir); + return; + } + + if (cgroup_max_depth > 0) { + int depth = calc_cgroup_depth(dir); + if (depth > cgroup_max_depth) { + info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); + return; + } + } + + cg = discovery_cgroup_add(dir); + cg->available = 1; + cg->first_time_seen = 1; + cgroup_root_count++; +} + +static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { + if(!this) this = base; + debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); + + size_t dirlen = strlen(this), baselen = strlen(base); + + int ret = -1; + int enabled = -1; + + const char *relative_path = &this[baselen]; + if(!*relative_path) relative_path = "/"; + + DIR *dir = opendir(this); + if(!dir) { + error("CGROUP: cannot read directory '%s'", base); + return ret; + } + ret = 1; + + callback(relative_path); + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if(de->d_type == DT_DIR) { + if(enabled == -1) { + const char *r = relative_path; + if(*r == '\0') r = "/"; + + // do not decent in directories we are not interested + enabled = matches_search_cgroup_paths(r); + } + + if(enabled) { + char *s = mallocz(dirlen + strlen(de->d_name) + 2); + strcpy(s, this); + strcat(s, "/"); + strcat(s, de->d_name); + int ret2 = discovery_find_dir_in_subdirs(base, s, callback); + if(ret2 > 0) ret += ret2; + freez(s); + } + } + } + + closedir(dir); + return ret; +} + +static inline void discovery_mark_all_cgroups_as_unavailable() { + debug(D_CGROUP, "marking all cgroups as not available"); + struct cgroup *cg; + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + cg->available = 0; + } +} + +static inline void discovery_update_filenames() { + struct cgroup *cg; + struct stat buf; + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) + continue; + + debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); + + // check for newly added cgroups + // and update the filenames they read + char filename[FILENAME_MAX + 1]; + if(!cgroup_use_unified_cgroups) { + if(unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_stat.filename = strdupz(filename); + cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; + snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id); + cg->filename_cpuset_cpus = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id); + cg->filename_cpu_cfs_period = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id); + cg->filename_cpu_cfs_quota = strdupz(filename); + debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); + } + else + debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_usage.filename = strdupz(filename); + cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage; + debug(D_CGROUP, "cpuacct.usage_percpu filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_usage.filename); + } + else + debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_throttling.filename = strdupz(filename); + cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; + debug(D_CGROUP, "cpu.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_throttling.filename); + } + else + debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely( + cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && + !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + debug( + D_CGROUP, "cpu.shares filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + } else + debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_detailed = strdupz(filename); + cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; + debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); + } + else + debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; + debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id); + cg->filename_memory_limit = strdupz(filename); + } + else + debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id); + cg->filename_memoryswap_limit = strdupz(filename); + debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else + debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_failcnt = strdupz(filename); + cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt; + debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); + } + else + debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + debug(D_CGROUP, "blkio.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + debug(D_CGROUP, "blkio.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + debug(D_CGROUP, "blkio.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + debug(D_CGROUP, "blkio.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->throttle_io_service_bytes.filename = strdupz(filename); + cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; + debug(D_CGROUP,"blkio.throttle.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz( + filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->throttle_io_service_bytes.filename = strdupz(filename); + cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; + debug(D_CGROUP, "blkio.throttle.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->throttle_io_serviced.filename = strdupz(filename); + cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; + debug(D_CGROUP, "blkio.throttle.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->throttle_io_serviced.filename = strdupz(filename); + cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; + debug(D_CGROUP, "blkio.throttle.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_merged.filename = strdupz(filename); + cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; + debug(D_CGROUP, "blkio.io_merged_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); + } else { + debug(D_CGROUP, "blkio.io_merged_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_merged.filename = strdupz(filename); + cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; + debug(D_CGROUP, "blkio.io_merged filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); + } else { + debug(D_CGROUP, "blkio.io_merged file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_queued.filename = strdupz(filename); + cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; + debug(D_CGROUP, "blkio.io_queued_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); + } else { + debug(D_CGROUP, "blkio.io_queued_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_queued.filename = strdupz(filename); + cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; + debug(D_CGROUP, "blkio.io_queued filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); + } else { + debug(D_CGROUP, "blkio.io_queued file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + } + else if(likely(cgroup_unified_exist)) { + if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else + debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else + debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely( + (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && + !cg->cpuacct_stat.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_stat.filename = strdupz(filename); + cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; + cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; + cg->filename_cpuset_cpus = NULL; + cg->filename_cpu_cfs_period = NULL; + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id); + cg->filename_cpu_cfs_quota = strdupz(filename); + debug(D_CGROUP, "cpu.stat filename for unified cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); + } + else + debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + } else + debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_detailed = strdupz(filename); + cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; + debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); + } + else + debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; + debug(D_CGROUP, "memory.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id); + cg->filename_memory_limit = strdupz(filename); + } + else + debug(D_CGROUP, "memory.current file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id); + cg->filename_memoryswap_limit = strdupz(filename); + debug(D_CGROUP, "memory.swap.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else + debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpu_pressure.filename = strdupz(filename); + cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; + cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; + debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); + } else { + debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_pressure.filename = strdupz(filename); + cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; + cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; + debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); + } else { + debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory_pressure.filename = strdupz(filename); + cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; + cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; + debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); + } else { + debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + } + } +} + +static inline void discovery_cleanup_all_cgroups() { + struct cgroup *cg = discovered_cgroup_root, *last = NULL; + + for(; cg ;) { + if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for(t = discovered_cgroup_root; t ; t = t->discovered_next) { + if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } + + if(!last) + discovered_cgroup_root = cg->discovered_next; + else + last->discovered_next = cg->discovered_next; + + cgroup_free(cg); + + if(!last) + cg = discovered_cgroup_root; + else + cg = last->discovered_next; + } + else { + last = cg; + cg = cg->discovered_next; + } + } +} + +static inline void discovery_copy_discovered_cgroups_to_reader() { + debug(D_CGROUP, "copy discovered cgroups to the main group list"); + + struct cgroup *cg; + + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + cg->next = cg->discovered_next; + } + + cgroup_root = discovered_cgroup_root; +} + +static inline void discovery_share_cgroups_with_ebpf() { + struct cgroup *cg; + int count; + struct stat buf; + + if (shm_mutex_cgroup_ebpf == SEM_FAILED) { + return; + } + sem_wait(shm_mutex_cgroup_ebpf); + + for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; + char *prefix = (is_cgroup_systemd_service(cg)) ? "" : "cgroup_"; + snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_title); + ptr->hash = simple_hash(ptr->name); + ptr->options = cg->options; + ptr->enabled = cg->enabled; + if (cgroup_use_unified_cgroups) { + snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); + if (likely(stat(ptr->path, &buf) == -1)) { + ptr->path[0] = '\0'; + ptr->enabled = 0; + } + } else { + is_cgroup_procs_exist(ptr, cg->id); + } + + debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); + } + + shm_cgroup_ebpf.header->cgroup_root_count = count; + sem_post(shm_mutex_cgroup_ebpf); +} + +static inline void discovery_find_all_cgroups_v1() { + if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { + if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled cpu statistics."); + } + } + + if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || + cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { + if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops = + CONFIG_BOOLEAN_NO; + error("CGROUP: disabled blkio statistics."); + } + } + + if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { + if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt = + CONFIG_BOOLEAN_NO; + error("CGROUP: disabled memory statistics."); + } + } + + if (cgroup_search_in_devices) { + if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_search_in_devices = 0; + error("CGROUP: disabled devices statistics."); + } + } +} + +static inline void discovery_find_all_cgroups_v2() { + if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_unified_exist = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled unified cgroups statistics."); + } +} + +static int is_digits_only(const char *s) { + do { + if (!isdigit(*s++)) { + return 0; + } + } while (*s); + + return 1; +} + +static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { + if (!cg->first_time_seen) { + return; + } + cg->first_time_seen = 0; + + char comm[TASK_COMM_LEN]; + + if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { + if (strstr(cg->id, "kubepods")) { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S; + } else { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN; + } + } + + if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { + // container initialization may take some time when CPU % is high + // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) + if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { + cg->first_time_seen = 1; + return; + } + if (!strcmp(comm, "pause")) { + // a container that holds the network namespace for the pod + // we don't need to collect its metrics + cg->processed = 1; + return; + } + } + + if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { + debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_title); + convert_cgroup_to_systemd_service(cg); + return; + } + + if (matches_enabled_cgroup_renames(cg->id)) { + debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_title); + if (is_inside_k8s && k8s_is_container(cg->id)) { + // it may take up to a minute for the K8s API to return data for the container + // tested on AWS K8s cluster with 100% CPU utilization + cg->pending_renames = 9; // 1.5 minute + } else { + cg->pending_renames = 2; + } + } +} + +static int discovery_is_cgroup_duplicate(struct cgroup *cg) { + // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 + struct cgroup *c; + for (c = discovered_cgroup_root; c; c = c->discovered_next) { + if (c != cg && c->enabled && c->hash_chart == cg->hash_chart && !strcmp(c->chart_id, cg->chart_id)) { + error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", cg->chart_id, c->id, cg->id); + return 1; + } + } + return 0; +} + +static inline void discovery_process_cgroup(struct cgroup *cg) { + if (!cg) { + debug(D_CGROUP, "discovery_process_cgroup() received NULL"); + return; + } + if (!cg->available || cg->processed) { + return; + } + + if (cg->first_time_seen) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME); + discovery_process_first_time_seen_cgroup(cg); + if (unlikely(cg->first_time_seen || cg->processed)) { + return; + } + } + + if (cg->pending_renames) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME); + discovery_rename_cgroup(cg); + if (unlikely(cg->pending_renames || cg->processed)) { + return; + } + } + + cg->processed = 1; + + if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) { + info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id); + return; + } + + if (is_cgroup_systemd_service(cg)) { + cg->enabled = 1; + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_names(cg->chart_title))) { + debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->chart_title); + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { + debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->chart_title); + return; + } + + if (discovery_is_cgroup_duplicate(cg)) { + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + return; + } + + worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); + read_cgroup_network_interfaces(cg); +} + +static inline void discovery_find_all_cgroups() { + debug(D_CGROUP, "searching for cgroups"); + + worker_is_busy(WORKER_DISCOVERY_INIT); + discovery_mark_all_cgroups_as_unavailable(); + + worker_is_busy(WORKER_DISCOVERY_FIND); + if (!cgroup_use_unified_cgroups) { + discovery_find_all_cgroups_v1(); + } else { + discovery_find_all_cgroups_v2(); + } + + struct cgroup *cg; + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + worker_is_busy(WORKER_DISCOVERY_PROCESS); + discovery_process_cgroup(cg); + } + + worker_is_busy(WORKER_DISCOVERY_UPDATE); + discovery_update_filenames(); + + worker_is_busy(WORKER_DISCOVERY_LOCK); + uv_mutex_lock(&cgroup_root_mutex); + + worker_is_busy(WORKER_DISCOVERY_CLEANUP); + discovery_cleanup_all_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_COPY); + discovery_copy_discovered_cgroups_to_reader(); + + uv_mutex_unlock(&cgroup_root_mutex); + + worker_is_busy(WORKER_DISCOVERY_SHARE); + discovery_share_cgroups_with_ebpf(); + + debug(D_CGROUP, "done searching for cgroups"); +} + +void cgroup_discovery_worker(void *ptr) +{ + UNUSED(ptr); + + worker_register("CGROUPSDISC"); + worker_register_job_name(WORKER_DISCOVERY_INIT, "init"); + worker_register_job_name(WORKER_DISCOVERY_FIND, "find"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new"); + worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update"); + worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup"); + worker_register_job_name(WORKER_DISCOVERY_COPY, "copy"); + worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); + worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); + + entrypoint_parent_process_comm = simple_pattern_create( + " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) + " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 + NULL, + SIMPLE_PATTERN_EXACT); + + while (!netdata_exit) { + worker_is_idle(); + + uv_mutex_lock(&discovery_thread.mutex); + while (!discovery_thread.start_discovery) + uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); + discovery_thread.start_discovery = 0; + uv_mutex_unlock(&discovery_thread.mutex); + + if (unlikely(netdata_exit)) + break; + + discovery_find_all_cgroups(); + } + + discovery_thread.exited = 1; + worker_unregister(); +} + +// ---------------------------------------------------------------------------- +// generate charts + +#define CHART_TITLE_MAX 300 + +void update_systemd_services_charts( + int update_every + , int do_cpu + , int do_mem_usage + , int do_mem_detailed + , int do_mem_failcnt + , int do_swap_usage + , int do_io + , int do_io_ops + , int do_throttle_io + , int do_throttle_ops + , int do_queued_ops + , int do_merged_ops +) { + static RRDSET + *st_cpu = NULL, + *st_mem_usage = NULL, + *st_mem_failcnt = NULL, + *st_swap_usage = NULL, + + *st_mem_detailed_cache = NULL, + *st_mem_detailed_rss = NULL, + *st_mem_detailed_mapped = NULL, + *st_mem_detailed_writeback = NULL, + *st_mem_detailed_pgfault = NULL, + *st_mem_detailed_pgmajfault = NULL, + *st_mem_detailed_pgpgin = NULL, + *st_mem_detailed_pgpgout = NULL, + + *st_io_read = NULL, + *st_io_serviced_read = NULL, + *st_throttle_io_read = NULL, + *st_throttle_ops_read = NULL, + *st_queued_ops_read = NULL, + *st_merged_ops_read = NULL, + + *st_io_write = NULL, + *st_io_serviced_write = NULL, + *st_throttle_io_write = NULL, + *st_throttle_ops_write = NULL, + *st_queued_ops_write = NULL, + *st_merged_ops_write = NULL; + + // create the charts + + if (unlikely(do_cpu && !st_cpu)) { + char title[CHART_TITLE_MAX + 1]; + snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (100%% = 1 core)"); + + st_cpu = rrdset_create_localhost( + "services" + , "cpu" + , NULL + , "cpu" + , "services.cpu" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if (unlikely(do_mem_usage && !st_mem_usage)) { + st_mem_usage = rrdset_create_localhost( + "services" + , "mem_usage" + , NULL + , "mem" + , "services.mem_usage" + , "Systemd Services Used Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 10 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(likely(do_mem_detailed)) { + if(unlikely(!st_mem_detailed_rss)) { + st_mem_detailed_rss = rrdset_create_localhost( + "services" + , "mem_rss" + , NULL + , "mem" + , "services.mem_rss" + , "Systemd Services RSS Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 20 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_mapped)) { + st_mem_detailed_mapped = rrdset_create_localhost( + "services" + , "mem_mapped" + , NULL + , "mem" + , "services.mem_mapped" + , "Systemd Services Mapped Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 30 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_cache)) { + st_mem_detailed_cache = rrdset_create_localhost( + "services" + , "mem_cache" + , NULL + , "mem" + , "services.mem_cache" + , "Systemd Services Cache Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 40 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_writeback)) { + st_mem_detailed_writeback = rrdset_create_localhost( + "services" + , "mem_writeback" + , NULL + , "mem" + , "services.mem_writeback" + , "Systemd Services Writeback Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 50 + , update_every + , RRDSET_TYPE_STACKED + ); + + } + + if(unlikely(!st_mem_detailed_pgfault)) { + st_mem_detailed_pgfault = rrdset_create_localhost( + "services" + , "mem_pgfault" + , NULL + , "mem" + , "services.mem_pgfault" + , "Systemd Services Memory Minor Page Faults" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 60 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_pgmajfault)) { + st_mem_detailed_pgmajfault = rrdset_create_localhost( + "services" + , "mem_pgmajfault" + , NULL + , "mem" + , "services.mem_pgmajfault" + , "Systemd Services Memory Major Page Faults" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_pgpgin)) { + st_mem_detailed_pgpgin = rrdset_create_localhost( + "services" + , "mem_pgpgin" + , NULL + , "mem" + , "services.mem_pgpgin" + , "Systemd Services Memory Charging Activity" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 80 + , update_every + , RRDSET_TYPE_STACKED + ); + + } + + if(unlikely(!st_mem_detailed_pgpgout)) { + st_mem_detailed_pgpgout = rrdset_create_localhost( + "services" + , "mem_pgpgout" + , NULL + , "mem" + , "services.mem_pgpgout" + , "Systemd Services Memory Uncharging Activity" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 90 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(unlikely(do_mem_failcnt && !st_mem_failcnt)) { + st_mem_failcnt = rrdset_create_localhost( + "services" + , "mem_failcnt" + , NULL + , "mem" + , "services.mem_failcnt" + , "Systemd Services Memory Limit Failures" + , "failures" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 110 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if (do_swap_usage && !st_swap_usage) { + st_swap_usage = rrdset_create_localhost( + "services" + , "swap_usage" + , NULL + , "swap" + , "services.swap_usage" + , "Systemd Services Swap Memory Used" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 100 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(likely(do_io)) { + if(unlikely(!st_io_read)) { + st_io_read = rrdset_create_localhost( + "services" + , "io_read" + , NULL + , "disk" + , "services.io_read" + , "Systemd Services Disk Read Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 120 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_io_write)) { + st_io_write = rrdset_create_localhost( + "services" + , "io_write" + , NULL + , "disk" + , "services.io_write" + , "Systemd Services Disk Write Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 130 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_io_ops)) { + if(unlikely(!st_io_serviced_read)) { + st_io_serviced_read = rrdset_create_localhost( + "services" + , "io_ops_read" + , NULL + , "disk" + , "services.io_ops_read" + , "Systemd Services Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 140 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_io_serviced_write)) { + st_io_serviced_write = rrdset_create_localhost( + "services" + , "io_ops_write" + , NULL + , "disk" + , "services.io_ops_write" + , "Systemd Services Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 150 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_throttle_io)) { + if(unlikely(!st_throttle_io_read)) { + + st_throttle_io_read = rrdset_create_localhost( + "services" + , "throttle_io_read" + , NULL + , "disk" + , "services.throttle_io_read" + , "Systemd Services Throttle Disk Read Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 160 + , update_every + , RRDSET_TYPE_STACKED + ); + + } + + if(unlikely(!st_throttle_io_write)) { + st_throttle_io_write = rrdset_create_localhost( + "services" + , "throttle_io_write" + , NULL + , "disk" + , "services.throttle_io_write" + , "Systemd Services Throttle Disk Write Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 170 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_throttle_ops)) { + if(unlikely(!st_throttle_ops_read)) { + st_throttle_ops_read = rrdset_create_localhost( + "services" + , "throttle_io_ops_read" + , NULL + , "disk" + , "services.throttle_io_ops_read" + , "Systemd Services Throttle Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 180 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_throttle_ops_write)) { + st_throttle_ops_write = rrdset_create_localhost( + "services" + , "throttle_io_ops_write" + , NULL + , "disk" + , "services.throttle_io_ops_write" + , "Systemd Services Throttle Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 190 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_queued_ops)) { + if(unlikely(!st_queued_ops_read)) { + st_queued_ops_read = rrdset_create_localhost( + "services" + , "queued_io_ops_read" + , NULL + , "disk" + , "services.queued_io_ops_read" + , "Systemd Services Queued Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 200 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_queued_ops_write)) { + + st_queued_ops_write = rrdset_create_localhost( + "services" + , "queued_io_ops_write" + , NULL + , "disk" + , "services.queued_io_ops_write" + , "Systemd Services Queued Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 210 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_merged_ops)) { + if(unlikely(!st_merged_ops_read)) { + st_merged_ops_read = rrdset_create_localhost( + "services" + , "merged_io_ops_read" + , NULL + , "disk" + , "services.merged_io_ops_read" + , "Systemd Services Merged Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 220 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_merged_ops_write)) { + st_merged_ops_write = rrdset_create_localhost( + "services" + , "merged_io_ops_write" + , NULL + , "disk" + , "services.merged_io_ops_write" + , "Systemd Services Merged Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 230 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + // update the values + struct cgroup *cg; + for(cg = cgroup_root; cg ; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg))) + continue; + + if(likely(do_cpu && cg->cpuacct_stat.updated)) { + if(unlikely(!cg->rd_cpu)){ + + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + cg->rd_cpu = rrddim_add(st_cpu, cg->chart_id, cg->chart_title, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + } else { + cg->rd_cpu = rrddim_add(st_cpu, cg->chart_id, cg->chart_title, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + rrddim_set_by_pointer(st_cpu, cg->rd_cpu, cg->cpuacct_stat.user + cg->cpuacct_stat.system); + } + + if(likely(do_mem_usage && cg->memory.updated_usage_in_bytes)) { + if(unlikely(!cg->rd_mem_usage)) + cg->rd_mem_usage = rrddim_add(st_mem_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_usage, cg->rd_mem_usage, cg->memory.usage_in_bytes); + } + + if(likely(do_mem_detailed && cg->memory.updated_detailed)) { + if(unlikely(!cg->rd_mem_detailed_rss)) + cg->rd_mem_detailed_rss = rrddim_add(st_mem_detailed_rss, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss); + + if(unlikely(!cg->rd_mem_detailed_mapped)) + cg->rd_mem_detailed_mapped = rrddim_add(st_mem_detailed_mapped, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_mapped, cg->rd_mem_detailed_mapped, cg->memory.total_mapped_file); + + if(unlikely(!cg->rd_mem_detailed_cache)) + cg->rd_mem_detailed_cache = rrddim_add(st_mem_detailed_cache, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_cache, cg->rd_mem_detailed_cache, cg->memory.total_cache); + + if(unlikely(!cg->rd_mem_detailed_writeback)) + cg->rd_mem_detailed_writeback = rrddim_add(st_mem_detailed_writeback, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_writeback, cg->rd_mem_detailed_writeback, cg->memory.total_writeback); + + if(unlikely(!cg->rd_mem_detailed_pgfault)) + cg->rd_mem_detailed_pgfault = rrddim_add(st_mem_detailed_pgfault, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgfault, cg->rd_mem_detailed_pgfault, cg->memory.total_pgfault); + + if(unlikely(!cg->rd_mem_detailed_pgmajfault)) + cg->rd_mem_detailed_pgmajfault = rrddim_add(st_mem_detailed_pgmajfault, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgmajfault, cg->rd_mem_detailed_pgmajfault, cg->memory.total_pgmajfault); + + if(unlikely(!cg->rd_mem_detailed_pgpgin)) + cg->rd_mem_detailed_pgpgin = rrddim_add(st_mem_detailed_pgpgin, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgpgin, cg->rd_mem_detailed_pgpgin, cg->memory.total_pgpgin); + + if(unlikely(!cg->rd_mem_detailed_pgpgout)) + cg->rd_mem_detailed_pgpgout = rrddim_add(st_mem_detailed_pgpgout, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgpgout, cg->rd_mem_detailed_pgpgout, cg->memory.total_pgpgout); + } + + if(likely(do_mem_failcnt && cg->memory.updated_failcnt)) { + if(unlikely(!cg->rd_mem_failcnt)) + cg->rd_mem_failcnt = rrddim_add(st_mem_failcnt, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_failcnt, cg->rd_mem_failcnt, cg->memory.failcnt); + } + + if(likely(do_swap_usage && cg->memory.updated_msw_usage_in_bytes)) { + if(unlikely(!cg->rd_swap_usage)) + cg->rd_swap_usage = rrddim_add(st_swap_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set_by_pointer( + st_swap_usage, + cg->rd_swap_usage, + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); + } else { + rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes); + } + } + + if(likely(do_io && cg->io_service_bytes.updated)) { + if(unlikely(!cg->rd_io_service_bytes_read)) + cg->rd_io_service_bytes_read = rrddim_add(st_io_read, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_read, cg->rd_io_service_bytes_read, cg->io_service_bytes.Read); + + if(unlikely(!cg->rd_io_service_bytes_write)) + cg->rd_io_service_bytes_write = rrddim_add(st_io_write, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_write, cg->rd_io_service_bytes_write, cg->io_service_bytes.Write); + } + + if(likely(do_io_ops && cg->io_serviced.updated)) { + if(unlikely(!cg->rd_io_serviced_read)) + cg->rd_io_serviced_read = rrddim_add(st_io_serviced_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_serviced_read, cg->rd_io_serviced_read, cg->io_serviced.Read); + + if(unlikely(!cg->rd_io_serviced_write)) + cg->rd_io_serviced_write = rrddim_add(st_io_serviced_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_serviced_write, cg->rd_io_serviced_write, cg->io_serviced.Write); + } + + if(likely(do_throttle_io && cg->throttle_io_service_bytes.updated)) { + if(unlikely(!cg->rd_throttle_io_read)) + cg->rd_throttle_io_read = rrddim_add(st_throttle_io_read, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_io_read, cg->rd_throttle_io_read, cg->throttle_io_service_bytes.Read); + + if(unlikely(!cg->rd_throttle_io_write)) + cg->rd_throttle_io_write = rrddim_add(st_throttle_io_write, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_io_write, cg->rd_throttle_io_write, cg->throttle_io_service_bytes.Write); + } + + if(likely(do_throttle_ops && cg->throttle_io_serviced.updated)) { + if(unlikely(!cg->rd_throttle_io_serviced_read)) + cg->rd_throttle_io_serviced_read = rrddim_add(st_throttle_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_ops_read, cg->rd_throttle_io_serviced_read, cg->throttle_io_serviced.Read); + + if(unlikely(!cg->rd_throttle_io_serviced_write)) + cg->rd_throttle_io_serviced_write = rrddim_add(st_throttle_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_ops_write, cg->rd_throttle_io_serviced_write, cg->throttle_io_serviced.Write); + } + + if(likely(do_queued_ops && cg->io_queued.updated)) { + if(unlikely(!cg->rd_io_queued_read)) + cg->rd_io_queued_read = rrddim_add(st_queued_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_queued_ops_read, cg->rd_io_queued_read, cg->io_queued.Read); + + if(unlikely(!cg->rd_io_queued_write)) + cg->rd_io_queued_write = rrddim_add(st_queued_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_queued_ops_write, cg->rd_io_queued_write, cg->io_queued.Write); + } + + if(likely(do_merged_ops && cg->io_merged.updated)) { + if(unlikely(!cg->rd_io_merged_read)) + cg->rd_io_merged_read = rrddim_add(st_merged_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_merged_ops_read, cg->rd_io_merged_read, cg->io_merged.Read); + + if(unlikely(!cg->rd_io_merged_write)) + cg->rd_io_merged_write = rrddim_add(st_merged_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_merged_ops_write, cg->rd_io_merged_write, cg->io_merged.Write); + } + } + + // complete the iteration + if(likely(do_cpu)) + rrdset_done(st_cpu); + + if(likely(do_mem_usage)) + rrdset_done(st_mem_usage); + + if(unlikely(do_mem_detailed)) { + rrdset_done(st_mem_detailed_cache); + rrdset_done(st_mem_detailed_rss); + rrdset_done(st_mem_detailed_mapped); + rrdset_done(st_mem_detailed_writeback); + rrdset_done(st_mem_detailed_pgfault); + rrdset_done(st_mem_detailed_pgmajfault); + rrdset_done(st_mem_detailed_pgpgin); + rrdset_done(st_mem_detailed_pgpgout); + } + + if(likely(do_mem_failcnt)) + rrdset_done(st_mem_failcnt); + + if(likely(do_swap_usage)) + rrdset_done(st_swap_usage); + + if(likely(do_io)) { + rrdset_done(st_io_read); + rrdset_done(st_io_write); + } + + if(likely(do_io_ops)) { + rrdset_done(st_io_serviced_read); + rrdset_done(st_io_serviced_write); + } + + if(likely(do_throttle_io)) { + rrdset_done(st_throttle_io_read); + rrdset_done(st_throttle_io_write); + } + + if(likely(do_throttle_ops)) { + rrdset_done(st_throttle_ops_read); + rrdset_done(st_throttle_ops_write); + } + + if(likely(do_queued_ops)) { + rrdset_done(st_queued_ops_read); + rrdset_done(st_queued_ops_write); + } + + if(likely(do_merged_ops)) { + rrdset_done(st_merged_ops_read); + rrdset_done(st_merged_ops_write); + } +} + +static inline char *cgroup_chart_type(char *buffer, const char *id, size_t len) { + if(buffer[0]) return buffer; + + if(id[0] == '\0' || (id[0] == '/' && id[1] == '\0')) + strncpy(buffer, "cgroup_root", len); + else + snprintfz(buffer, len, "%s%s", cgroup_chart_id_prefix, id); + + netdata_fix_chart_id(buffer); + return buffer; +} + +static inline unsigned long long cpuset_str2ull(char **s) { + unsigned long long n = 0; + char c; + for(c = **s; c >= '0' && c <= '9' ; c = *(++*s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) { + if(*filename) { + int ret = -1; + + if(value == &cg->cpuset_cpus) { + static char *buf = NULL; + static size_t buf_size = 0; + + if(!buf) { + buf_size = 100U + 6 * get_system_cpus(); // taken from kernel/cgroup/cpuset.c + buf = mallocz(buf_size + 1); + } + + ret = read_file(*filename, buf, buf_size); + + if(!ret) { + char *s = buf; + unsigned long long ncpus = 0; + + // parse the cpuset string and calculate the number of cpus the cgroup is allowed to use + while(*s) { + unsigned long long n = cpuset_str2ull(&s); + ncpus++; + if(*s == ',') { + s++; + continue; + } + if(*s == '-') { + s++; + unsigned long long m = cpuset_str2ull(&s); + ncpus += m - n; // calculate the number of cpus in the region + } + s++; + } + + if(likely(ncpus)) *value = ncpus; + } + } + else if(value == &cg->cpu_cfs_period) { + ret = read_single_number_file(*filename, value); + } + else if(value == &cg->cpu_cfs_quota) { + ret = read_single_number_file(*filename, value); + } + else ret = -1; + + if(ret) { + error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); + freez(*filename); + *filename = NULL; + } + } +} + +static inline void update_cpu_limits2(struct cgroup *cg) { + if(cg->filename_cpu_cfs_quota){ + static procfile *ff = NULL; + + ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + goto cpu_limits2_err; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + goto cpu_limits2_err; + } + + unsigned long lines = procfile_lines(ff); + + if (unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1 lines.", cg->filename_cpu_cfs_quota); + return; + } + + cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1)); + cg->cpuset_cpus = get_system_cpus(); + + char *s = "max\n\0"; + if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){ + cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus; + } else { + cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0)); + } + debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota); + return; + +cpu_limits2_err: + error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, cg->filename_cpu_cfs_quota); + freez(cg->filename_cpu_cfs_quota); + cg->filename_cpu_cfs_quota = NULL; + + } +} + +static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED **chart_var, unsigned long long *value, const char *chart_var_name, struct cgroup *cg) { + if(*filename) { + if(unlikely(!*chart_var)) { + *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, chart_var_name); + if(!*chart_var) { + error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, chart_var_name); + freez(*filename); + *filename = NULL; + } + } + + if(*filename && *chart_var) { + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + if(read_single_number_file(*filename, value)) { + error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); + freez(*filename); + *filename = NULL; + } + else { + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + return 1; + } + } else { + char buffer[30 + 1]; + int ret = read_file(*filename, buffer, 30); + if(ret) { + error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); + freez(*filename); + *filename = NULL; + return 0; + } + char *s = "max\n\0"; + if(strcmp(s, buffer) == 0){ + *value = UINT64_MAX; + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + return 1; + } + *value = str2ull(buffer); + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + return 1; + } + } + } + return 0; +} + +void update_cgroup_charts(int update_every) { + debug(D_CGROUP, "updating cgroups charts"); + + char type[RRD_ID_LENGTH_MAX + 1]; + char title[CHART_TITLE_MAX + 1]; + + int services_do_cpu = 0, + services_do_mem_usage = 0, + services_do_mem_detailed = 0, + services_do_mem_failcnt = 0, + services_do_swap_usage = 0, + services_do_io = 0, + services_do_io_ops = 0, + services_do_throttle_io = 0, + services_do_throttle_ops = 0, + services_do_queued_ops = 0, + services_do_merged_ops = 0; + + struct cgroup *cg; + for(cg = cgroup_root; cg ; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames)) + continue; + + if(likely(cgroup_enable_systemd_services && is_cgroup_systemd_service(cg))) { + if(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES) services_do_cpu++; + + if(cgroup_enable_systemd_services_detailed_memory && cg->memory.updated_detailed && cg->memory.enabled_detailed) services_do_mem_detailed++; + if(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_mem_usage++; + if(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES) services_do_mem_failcnt++; + if(cg->memory.updated_msw_usage_in_bytes && cg->memory.enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_swap_usage++; + + if(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_io++; + if(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_io_ops++; + if(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_io++; + if(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_ops++; + if(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES) services_do_queued_ops++; + if(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES) services_do_merged_ops++; + continue; + } + + type[0] = '\0'; + + if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_cpu)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core)"); + + cg->st_cpu = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_cpu, cg->chart_labels); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + } + else { + rrddim_add(cg->st_cpu, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_cpu, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + rrddim_set(cg->st_cpu, "user", cg->cpuacct_stat.user); + rrddim_set(cg->st_cpu, "system", cg->cpuacct_stat.system); + rrdset_done(cg->st_cpu); + + if(likely(cg->filename_cpuset_cpus || cg->filename_cpu_cfs_period || cg->filename_cpu_cfs_quota)) { + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + update_cpu_limits(&cg->filename_cpuset_cpus, &cg->cpuset_cpus, cg); + update_cpu_limits(&cg->filename_cpu_cfs_period, &cg->cpu_cfs_period, cg); + update_cpu_limits(&cg->filename_cpu_cfs_quota, &cg->cpu_cfs_quota, cg); + } else { + update_cpu_limits2(cg); + } + + if(unlikely(!cg->chart_var_cpu_limit)) { + cg->chart_var_cpu_limit = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_cpu, "cpu_limit"); + if(!cg->chart_var_cpu_limit) { + error("Cannot create cgroup %s chart variable 'cpu_limit'. Will not update its limit anymore.", cg->id); + if(cg->filename_cpuset_cpus) freez(cg->filename_cpuset_cpus); + cg->filename_cpuset_cpus = NULL; + if(cg->filename_cpu_cfs_period) freez(cg->filename_cpu_cfs_period); + cg->filename_cpu_cfs_period = NULL; + if(cg->filename_cpu_cfs_quota) freez(cg->filename_cpu_cfs_quota); + cg->filename_cpu_cfs_quota = NULL; + } + } + else { + NETDATA_DOUBLE value = 0, quota = 0; + + if(likely( ((!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) && (cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota))) + || ((cg->options & CGROUP_OPTIONS_IS_UNIFIED) && cg->filename_cpu_cfs_quota))) { + if(unlikely(cg->cpu_cfs_quota > 0)) + quota = (NETDATA_DOUBLE)cg->cpu_cfs_quota / (NETDATA_DOUBLE)cg->cpu_cfs_period; + + if(unlikely(quota > 0 && quota < cg->cpuset_cpus)) + value = quota * 100; + else + value = (NETDATA_DOUBLE)cg->cpuset_cpus * 100; + } + if(likely(value)) { + if(unlikely(!cg->st_cpu_limit)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits"); + + cg->st_cpu_limit = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_limit" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority - 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_limit, cg->chart_labels); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) + rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); + else + rrddim_add(cg->st_cpu_limit, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); + cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + } + + NETDATA_DOUBLE cpu_usage = 0; + cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every); + + rrdset_isnot_obsolete(cg->st_cpu_limit); + + rrddim_set(cg->st_cpu_limit, "used", (cpu_used > 0)?cpu_used:0); + + cg->prev_cpu_usage = cpu_usage; + + rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, value); + rrdset_done(cg->st_cpu_limit); + } + else { + if(unlikely(cg->st_cpu_limit)) { + rrdset_is_obsolete(cg->st_cpu_limit); + cg->st_cpu_limit = NULL; + } + rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN); + } + } + } + } + + if (likely(cg->cpuacct_cpu_throttling.updated && cg->cpuacct_cpu_throttling.enabled == CONFIG_BOOLEAN_YES)) { + if (unlikely(!cg->st_cpu_nr_throttled)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Runnable Periods"); + + cg->st_cpu_nr_throttled = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttled" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 10 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_nr_throttled, cg->chart_labels); + rrddim_add(cg->st_cpu_nr_throttled, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_set(cg->st_cpu_nr_throttled, "throttled", cg->cpuacct_cpu_throttling.nr_throttled_perc); + rrdset_done(cg->st_cpu_nr_throttled); + } + + if (unlikely(!cg->st_cpu_throttled_time)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Time Duration"); + + cg->st_cpu_throttled_time = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttled_duration" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 15 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_throttled_time, cg->chart_labels); + rrddim_add(cg->st_cpu_throttled_time, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); + } else { + rrddim_set(cg->st_cpu_throttled_time, "duration", cg->cpuacct_cpu_throttling.throttled_time); + rrdset_done(cg->st_cpu_throttled_time); + } + } + + if (likely(cg->cpuacct_cpu_shares.updated && cg->cpuacct_cpu_shares.enabled == CONFIG_BOOLEAN_YES)) { + if (unlikely(!cg->st_cpu_shares)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Time Relative Share"); + + cg->st_cpu_shares = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_shares" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares" + , title + , "shares" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 20 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_shares, cg->chart_labels); + rrddim_add(cg->st_cpu_shares, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_set(cg->st_cpu_shares, "shares", cg->cpuacct_cpu_shares.shares); + rrdset_done(cg->st_cpu_shares); + } + } + + if(likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) { + char id[RRD_ID_LENGTH_MAX + 1]; + unsigned int i; + + if(unlikely(!cg->st_cpu_per_core)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core) Per Core"); + + cg->st_cpu_per_core = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_per_core" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 100 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels); + + for(i = 0; i < cg->cpuacct_usage.cpus; i++) { + snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); + rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + for(i = 0; i < cg->cpuacct_usage.cpus ;i++) { + snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); + rrddim_set(cg->st_cpu_per_core, id, cg->cpuacct_usage.cpu_percpu[i]); + } + rrdset_done(cg->st_cpu_per_core); + } + + if(likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_mem)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Usage"); + + cg->st_mem = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 220 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_mem, cg->chart_labels); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + if(cg->memory.detailed_has_swap) + rrddim_add(cg->st_mem, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_add(cg->st_mem, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_add(cg->st_mem, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + } + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set(cg->st_mem, "cache", cg->memory.total_cache); + rrddim_set(cg->st_mem, "rss", (cg->memory.total_rss > cg->memory.total_rss_huge)?(cg->memory.total_rss - cg->memory.total_rss_huge):0); + + if(cg->memory.detailed_has_swap) + rrddim_set(cg->st_mem, "swap", cg->memory.total_swap); + + rrddim_set(cg->st_mem, "rss_huge", cg->memory.total_rss_huge); + rrddim_set(cg->st_mem, "mapped_file", cg->memory.total_mapped_file); + } else { + rrddim_set(cg->st_mem, "anon", cg->memory.anon); + rrddim_set(cg->st_mem, "kernel_stack", cg->memory.kernel_stack); + rrddim_set(cg->st_mem, "slab", cg->memory.slab); + rrddim_set(cg->st_mem, "sock", cg->memory.sock); + rrddim_set(cg->st_mem, "anon_thp", cg->memory.anon_thp); + rrddim_set(cg->st_mem, "file", cg->memory.total_mapped_file); + } + rrdset_done(cg->st_mem); + + if(unlikely(!cg->st_writeback)) { + snprintfz(title, CHART_TITLE_MAX, "Writeback Memory"); + + cg->st_writeback = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "writeback" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 300 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_writeback, cg->chart_labels); + + if(cg->memory.detailed_has_dirty) + rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_add(cg->st_writeback, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + if(cg->memory.detailed_has_dirty) + rrddim_set(cg->st_writeback, "dirty", cg->memory.total_dirty); + + rrddim_set(cg->st_writeback, "writeback", cg->memory.total_writeback); + rrdset_done(cg->st_writeback); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + if(unlikely(!cg->st_mem_activity)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Activity"); + + cg->st_mem_activity = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_activity" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity" + , title + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 400 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_mem_activity, cg->chart_labels); + + rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_mem_activity, "pgpgin", cg->memory.total_pgpgin); + rrddim_set(cg->st_mem_activity, "pgpgout", cg->memory.total_pgpgout); + rrdset_done(cg->st_mem_activity); + } + + if(unlikely(!cg->st_pgfaults)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults"); + + cg->st_pgfaults = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "pgfaults" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults" + , title + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 500 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_pgfaults, cg->chart_labels); + + rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_pgfaults, "pgfault", cg->memory.total_pgfault); + rrddim_set(cg->st_pgfaults, "pgmajfault", cg->memory.total_pgmajfault); + rrdset_done(cg->st_pgfaults); + } + + if(likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_mem_usage)) { + snprintfz(title, CHART_TITLE_MAX, "Used Memory"); + + cg->st_mem_usage = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_usage" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 210 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_mem_usage, cg->chart_labels); + + rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(cg->st_mem_usage, "ram", cg->memory.usage_in_bytes); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set( + cg->st_mem_usage, + "swap", + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); + } else { + rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes); + } + rrdset_done(cg->st_mem_usage); + + if (likely(update_memory_limits(&cg->filename_memory_limit, &cg->chart_var_memory_limit, &cg->memory_limit, "memory_limit", cg))) { + static unsigned long long ram_total = 0; + + if(unlikely(!ram_total)) { + procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo"); + ff = procfile_open(config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + + if(likely(ff)) + ff = procfile_readall(ff); + if(likely(ff && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8))) + ram_total = str2ull(procfile_word(ff, 1)) * 1024; + else { + error("Cannot read file %s. Will not update cgroup %s RAM limit anymore.", filename, cg->id); + freez(cg->filename_memory_limit); + cg->filename_memory_limit = NULL; + } + + procfile_close(ff); + } + + if(likely(ram_total)) { + unsigned long long memory_limit = ram_total; + + if(unlikely(cg->memory_limit < ram_total)) + memory_limit = cg->memory_limit; + + if(unlikely(!cg->st_mem_usage_limit)) { + snprintfz(title, CHART_TITLE_MAX, "Used RAM within the limits"); + + cg->st_mem_usage_limit = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_usage_limit" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit": "cgroup.mem_usage_limit" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 200 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_mem_usage_limit, cg->chart_labels); + + rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrdset_isnot_obsolete(cg->st_mem_usage_limit); + + rrddim_set(cg->st_mem_usage_limit, "available", memory_limit - cg->memory.usage_in_bytes); + rrddim_set(cg->st_mem_usage_limit, "used", cg->memory.usage_in_bytes); + rrdset_done(cg->st_mem_usage_limit); + + if (unlikely(!cg->st_mem_utilization)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Utilization"); + + cg->st_mem_utilization = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_utilization" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 199 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_mem_utilization, cg->chart_labels); + + rrddim_add(cg->st_mem_utilization, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + if (memory_limit) { + rrdset_isnot_obsolete(cg->st_mem_utilization); + + rrddim_set( + cg->st_mem_utilization, "utilization", cg->memory.usage_in_bytes * 100 / memory_limit); + rrdset_done(cg->st_mem_utilization); + } + } + } + else { + if(unlikely(cg->st_mem_usage_limit)) { + rrdset_is_obsolete(cg->st_mem_usage_limit); + cg->st_mem_usage_limit = NULL; + } + + if(unlikely(cg->st_mem_utilization)) { + rrdset_is_obsolete(cg->st_mem_utilization); + cg->st_mem_utilization = NULL; + } + } + + update_memory_limits(&cg->filename_memoryswap_limit, &cg->chart_var_memoryswap_limit, &cg->memoryswap_limit, "memory_and_swap_limit", cg); + } + + if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_mem_failcnt)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures"); + + cg->st_mem_failcnt = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_failcnt" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt" + , title + , "count" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 250 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_mem_failcnt, cg->chart_labels); + + rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_mem_failcnt, "failures", cg->memory.failcnt); + rrdset_done(cg->st_mem_failcnt); + } + + if(likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_io)) { + snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks)"); + + cg->st_io = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io" + , title + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_io, cg->chart_labels); + + rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_io, "read", cg->io_service_bytes.Read); + rrddim_set(cg->st_io, "write", cg->io_service_bytes.Write); + rrdset_done(cg->st_io); + } + + if(likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_serviced_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks)"); + + cg->st_serviced_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "serviced_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops" + , title + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_serviced_ops, cg->chart_labels); + + rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_serviced_ops, "read", cg->io_serviced.Read); + rrddim_set(cg->st_serviced_ops, "write", cg->io_serviced.Write); + rrdset_done(cg->st_serviced_ops); + } + + if(likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_throttle_io)) { + snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks)"); + + cg->st_throttle_io = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttle_io" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io" + , title + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_throttle_io, cg->chart_labels); + + rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_throttle_io, "read", cg->throttle_io_service_bytes.Read); + rrddim_set(cg->st_throttle_io, "write", cg->throttle_io_service_bytes.Write); + rrdset_done(cg->st_throttle_io); + } + + if(likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_throttle_serviced_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks)"); + + cg->st_throttle_serviced_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttle_serviced_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops" + , title + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_throttle_serviced_ops, cg->chart_labels); + + rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_throttle_serviced_ops, "read", cg->throttle_io_serviced.Read); + rrddim_set(cg->st_throttle_serviced_ops, "write", cg->throttle_io_serviced.Write); + rrdset_done(cg->st_throttle_serviced_ops); + } + + if(likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_queued_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks)"); + + cg->st_queued_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "queued_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops" + , title + , "operations" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2000 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_queued_ops, cg->chart_labels); + + rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(cg->st_queued_ops, "read", cg->io_queued.Read); + rrddim_set(cg->st_queued_ops, "write", cg->io_queued.Write); + rrdset_done(cg->st_queued_ops); + } + + if(likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_merged_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks)"); + + cg->st_merged_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "merged_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops" + , title + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2100 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_merged_ops, cg->chart_labels); + + rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_merged_ops, "read", cg->io_merged.Read); + rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); + rrdset_done(cg->st_merged_ops); + } + + if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { + struct pressure *res = &cg->cpu_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_some_pressure" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2200 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_some_pressure_stall_time" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2220 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU full pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_full_pressure" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2240 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_full_pressure_stall_time" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2260 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + res = &cg->memory_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_some_pressure" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2300 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "memory_some_pressure_stall_time" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : "cgroup.memory_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2320 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure"); + + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_full_pressure" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2340 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "memory_full_pressure_stall_time" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : "cgroup.memory_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2360 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + res = &cg->io_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_some_pressure" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2400 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_some_pressure_stall_time" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2420 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2440 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure_stall_time" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2460 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + } + } + + if(likely(cgroup_enable_systemd_services)) + update_systemd_services_charts(update_every, services_do_cpu, services_do_mem_usage, services_do_mem_detailed + , services_do_mem_failcnt, services_do_swap_usage, services_do_io + , services_do_io_ops, services_do_throttle_io, services_do_throttle_ops + , services_do_queued_ops, services_do_merged_ops + ); + + debug(D_CGROUP, "done updating cgroups charts"); +} + +// ---------------------------------------------------------------------------- +// cgroups main + +static void cgroup_main_cleanup(void *ptr) { + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + usec_t max = 2 * USEC_PER_SEC, step = 50000; + + if (!discovery_thread.exited) { + info("stopping discovery thread worker"); + uv_mutex_lock(&discovery_thread.mutex); + discovery_thread.start_discovery = 1; + uv_cond_signal(&discovery_thread.cond_var); + uv_mutex_unlock(&discovery_thread.mutex); + } + + info("waiting for discovery thread to finish..."); + + while (!discovery_thread.exited && max > 0) { + max -= step; + sleep_usec(step); + } + + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + sem_close(shm_mutex_cgroup_ebpf); + } + + if (shm_cgroup_ebpf.header) { + munmap(shm_cgroup_ebpf.header, shm_cgroup_ebpf.header->body_length); + } + + if (shm_fd_cgroup_ebpf > 0) { + close(shm_fd_cgroup_ebpf); + } + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *cgroups_main(void *ptr) { + worker_register("CGROUPS"); + worker_register_job_name(WORKER_CGROUPS_LOCK, "lock"); + worker_register_job_name(WORKER_CGROUPS_READ, "read"); + worker_register_job_name(WORKER_CGROUPS_CHART, "chart"); + + netdata_thread_cleanup_push(cgroup_main_cleanup, ptr); + + if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) { + is_inside_k8s = 1; + cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_YES; + } + + read_cgroup_plugin_configuration(); + netdata_cgroup_ebpf_initialize_shm(); + + if (uv_mutex_init(&cgroup_root_mutex)) { + error("CGROUP: cannot initialize mutex for the main cgroup list"); + goto exit; + } + + // dispatch a discovery worker thread + discovery_thread.start_discovery = 0; + discovery_thread.exited = 0; + + if (uv_mutex_init(&discovery_thread.mutex)) { + error("CGROUP: cannot initialize mutex for discovery thread"); + goto exit; + } + if (uv_cond_init(&discovery_thread.cond_var)) { + error("CGROUP: cannot initialize conditional variable for discovery thread"); + goto exit; + } + + int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL); + if (error) { + error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error)); + goto exit; + } + uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]"); + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = cgroup_update_every * USEC_PER_SEC; + usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0; + + while(!netdata_exit) { + worker_is_idle(); + + usec_t hb_dt = heartbeat_next(&hb, step); + if(unlikely(netdata_exit)) break; + + find_dt += hb_dt; + if (unlikely(find_dt >= find_every || (!is_inside_k8s && cgroups_check))) { + uv_cond_signal(&discovery_thread.cond_var); + discovery_thread.start_discovery = 1; + find_dt = 0; + cgroups_check = 0; + } + + worker_is_busy(WORKER_CGROUPS_LOCK); + uv_mutex_lock(&cgroup_root_mutex); + + worker_is_busy(WORKER_CGROUPS_READ); + read_all_discovered_cgroups(cgroup_root); + + worker_is_busy(WORKER_CGROUPS_CHART); + update_cgroup_charts(cgroup_update_every); + + worker_is_idle(); + uv_mutex_unlock(&cgroup_root_mutex); + } + +exit: + worker_unregister(); + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h new file mode 100644 index 0000000..d1adf8a --- /dev/null +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SYS_FS_CGROUP_H +#define NETDATA_SYS_FS_CGROUP_H 1 + +#include "daemon/common.h" + +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 +#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 +#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 + +typedef struct netdata_ebpf_cgroup_shm_header { + int cgroup_root_count; + int cgroup_max; + int systemd_enabled; + int __pad; + size_t body_length; +} netdata_ebpf_cgroup_shm_header_t; + +#define CGROUP_EBPF_NAME_SHARED_LENGTH 256 + +typedef struct netdata_ebpf_cgroup_shm_body { + // Considering what is exposed in this link https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + // this length is enough to store what we want. + char name[CGROUP_EBPF_NAME_SHARED_LENGTH]; + uint32_t hash; + uint32_t options; + int enabled; + char path[FILENAME_MAX + 1]; +} netdata_ebpf_cgroup_shm_body_t; + +typedef struct netdata_ebpf_cgroup_shm { + netdata_ebpf_cgroup_shm_header_t *header; + netdata_ebpf_cgroup_shm_body_t *body; +} netdata_ebpf_cgroup_shm_t; + +#define NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME "netdata_shm_cgroup_ebpf" +#define NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME "/netdata_sem_cgroup_ebpf" + +#include "../proc.plugin/plugin_proc.h" + +char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data); + +#endif //NETDATA_SYS_FS_CGROUP_H diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c new file mode 100644 index 0000000..25939a9 --- /dev/null +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "test_cgroups_plugin.h" +#include "libnetdata/required_dummies.h" + +RRDHOST *localhost; +int netdata_zero_metrics_enabled = 1; +struct config netdata_config; +char *netdata_configured_primary_plugins_dir = NULL; + +struct k8s_test_data { + char *data; + char *name; + char *key[3]; + char *value[3]; + + const char *result_key[3]; + const char *result_value[3]; + int result_ls[3]; + int i; +}; + +static int read_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) +{ + struct k8s_test_data *test_data = (struct k8s_test_data *)data; + + test_data->result_key[test_data->i] = name; + test_data->result_value[test_data->i] = value; + test_data->result_ls[test_data->i] = ls; + + test_data->i++; + + return 1; +} + +static void test_k8s_parse_resolved_name(void **state) +{ + UNUSED(state); + + DICTIONARY *labels = rrdlabels_create(); + + struct k8s_test_data test_data[] = { + // One label + { .data = "name label1=\"value1\"", + .name = "name", + .key[0] = "label1", .value[0] = "value1" }, + + // Three labels + { .data = "name label1=\"value1\",label2=\"value2\",label3=\"value3\"", + .name = "name", + .key[0] = "label1", .value[0] = "value1", + .key[1] = "label2", .value[1] = "value2", + .key[2] = "label3", .value[2] = "value3" }, + + // Comma at the end of the data string + { .data = "name label1=\"value1\",", + .name = "name", + .key[0] = "label1", .value[0] = "value1" }, + + // Equals sign in the value + // { .data = "name label1=\"value=1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value=1" }, + + // Double quotation mark in the value + // { .data = "name label1=\"value\"1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value" }, + + // Escaped double quotation mark in the value + // { .data = "name label1=\"value\\\"1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value\\\"1" }, + + // Equals sign in the key + // { .data = "name label=1=\"value1\"", + // .name = "name", + // .key[0] = "label", .value[0] = "1=\"value1\"" }, + + // Skipped value + // { .data = "name label1=,label2=\"value2\"", + // .name = "name", + // .key[0] = "label2", .value[0] = "value2" }, + + // A pair of equals signs + { .data = "name= =", + .name = "name=" }, + + // A pair of commas + { .data = "name, ,", + .name = "name," }, + + { .data = NULL } + }; + + for (int i = 0; test_data[i].data != NULL; i++) { + char *data = strdup(test_data[i].data); + + char *name = k8s_parse_resolved_name_and_labels(labels, data); + + assert_string_equal(name, test_data[i].name); + + rrdlabels_walkthrough_read(labels, read_label_callback, &test_data[i]); + + for (int l = 0; l < 3 && test_data[i].key[l] != NULL; l++) { + char *key = test_data[i].key[l]; + char *value = test_data[i].value[l]; + + const char *result_key = test_data[i].result_key[l]; + const char *result_value = test_data[i].result_value[l]; + int ls = test_data[i].result_ls[l]; + + assert_string_equal(key, result_key); + assert_string_equal(value, result_value); + assert_int_equal(RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S, ls); + } + + free(data); + } +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_k8s_parse_resolved_name), + }; + + int test_res = cmocka_run_group_tests_name("test_k8s_parse_resolved_name", tests, NULL, NULL); + + return test_res; +} diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.h b/collectors/cgroups.plugin/tests/test_cgroups_plugin.h new file mode 100644 index 0000000..3d68e92 --- /dev/null +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef TEST_CGROUPS_PLUGIN_H +#define TEST_CGROUPS_PLUGIN_H 1 + +#include "libnetdata/libnetdata.h" + +#include "../sys_fs_cgroup.h" + +#include +#include +#include +#include +#include + +#endif /* TEST_CGROUPS_PLUGIN_H */ diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c new file mode 100644 index 0000000..498f649 --- /dev/null +++ b/collectors/cgroups.plugin/tests/test_doubles.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "test_cgroups_plugin.h" + +void rrdset_is_obsolete(RRDSET *st) +{ + UNUSED(st); +} + +void rrdset_isnot_obsolete(RRDSET *st) +{ + UNUSED(st); +} + +struct mountinfo *mountinfo_read(int do_statvfs) +{ + UNUSED(do_statvfs); + + return NULL; +} + +struct mountinfo * +mountinfo_find_by_filesystem_mount_source(struct mountinfo *root, const char *filesystem, const char *mount_source) +{ + UNUSED(root); + UNUSED(filesystem); + UNUSED(mount_source); + + return NULL; +} + +struct mountinfo * +mountinfo_find_by_filesystem_super_option(struct mountinfo *root, const char *filesystem, const char *super_options) +{ + UNUSED(root); + UNUSED(filesystem); + UNUSED(super_options); + + return NULL; +} + +void mountinfo_free_all(struct mountinfo *mi) +{ + UNUSED(mi); +} + +RRDSET *rrdset_create_custom( + RRDHOST *host, const char *type, const char *id, const char *name, const char *family, const char *context, + const char *title, const char *units, const char *plugin, const char *module, long priority, int update_every, + RRDSET_TYPE chart_type, RRD_MEMORY_MODE memory_mode, long history_entries) +{ + UNUSED(host); + UNUSED(type); + UNUSED(id); + UNUSED(name); + UNUSED(family); + UNUSED(context); + UNUSED(title); + UNUSED(units); + UNUSED(plugin); + UNUSED(module); + UNUSED(priority); + UNUSED(update_every); + UNUSED(chart_type); + UNUSED(memory_mode); + UNUSED(history_entries); + + return NULL; +} + +RRDDIM *rrddim_add_custom( + RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, + RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) +{ + UNUSED(st); + UNUSED(id); + UNUSED(name); + UNUSED(multiplier); + UNUSED(divisor); + UNUSED(algorithm); + UNUSED(memory_mode); + + return NULL; +} + +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) +{ + UNUSED(st); + UNUSED(id); + UNUSED(value); + + return 0; +} + +collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) +{ + UNUSED(st); + UNUSED(rd); + UNUSED(value); + + return 0; +} + +const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name) +{ + UNUSED(st); + UNUSED(name); + + return NULL; +} + +void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value) +{ + UNUSED(st); + UNUSED(rsa); + UNUSED(value); +} + +void rrdset_next_usec(RRDSET *st, usec_t microseconds) +{ + UNUSED(st); + UNUSED(microseconds); +} + +void rrdset_done(RRDSET *st) +{ + UNUSED(st); +} + +void update_pressure_charts(struct pressure_charts *charts) +{ + UNUSED(charts); +} + +void netdev_rename_device_add( + const char *host_device, const char *container_device, const char *container_name, DICTIONARY *labels, const char *ctx_prefix) +{ + UNUSED(host_device); + UNUSED(container_device); + UNUSED(container_name); + UNUSED(labels); + UNUSED(ctx_prefix); +} + +void netdev_rename_device_del(const char *host_device) +{ + UNUSED(host_device); +} + +void rrdcalc_update_rrdlabels(RRDSET *st) { + (void)st; +} + +void db_execute(const char *cmd) +{ + UNUSED(cmd); +} diff --git a/collectors/charts.d.plugin/Makefile.am b/collectors/charts.d.plugin/Makefile.am new file mode 100644 index 0000000..03c7f0a --- /dev/null +++ b/collectors/charts.d.plugin/Makefile.am @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +CLEANFILES = \ + charts.d.plugin \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_libconfig_DATA = \ + charts.d.conf \ + $(NULL) + +dist_plugins_SCRIPTS = \ + charts.d.dryrun-helper.sh \ + charts.d.plugin \ + loopsleepms.sh.inc \ + $(NULL) + +dist_noinst_DATA = \ + charts.d.plugin.in \ + README.md \ + $(NULL) + +dist_charts_SCRIPTS = \ + $(NULL) + +dist_charts_DATA = \ + $(NULL) + +userchartsconfigdir=$(configdir)/charts.d +dist_userchartsconfig_DATA = \ + $(NULL) + +# Explicitly install directories to avoid permission issues due to umask +install-exec-local: + $(INSTALL) -d $(DESTDIR)$(userchartsconfigdir) + +chartsconfigdir=$(libconfigdir)/charts.d +dist_chartsconfig_DATA = \ + $(NULL) + +include ap/Makefile.inc +include apcupsd/Makefile.inc +include example/Makefile.inc +include libreswan/Makefile.inc +include nut/Makefile.inc +include opensips/Makefile.inc +include sensors/Makefile.inc diff --git a/collectors/charts.d.plugin/README.md b/collectors/charts.d.plugin/README.md new file mode 100644 index 0000000..06f4af1 --- /dev/null +++ b/collectors/charts.d.plugin/README.md @@ -0,0 +1,198 @@ + + +# charts.d.plugin + +`charts.d.plugin` is a Netdata external plugin. It is an **orchestrator** for data collection modules written in `BASH` v4+. + +1. It runs as an independent process `ps fax` shows it +2. It is started and stopped automatically by Netdata +3. It communicates with Netdata via a unidirectional pipe (sending data to the `netdata` daemon) +4. Supports any number of data collection **modules** + +`charts.d.plugin` has been designed so that the actual script that will do data collection will be permanently in +memory, collecting data with as little overheads as possible +(i.e. initialize once, repeatedly collect values with minimal overhead). + +`charts.d.plugin` looks for scripts in `/usr/lib/netdata/charts.d`. +The scripts should have the filename suffix: `.chart.sh`. + +## Configuration + +`charts.d.plugin` itself can be configured using the configuration file `/etc/netdata/charts.d.conf` +(to edit it on your system run `/etc/netdata/edit-config charts.d.conf`). This file is also a BASH script. + +In this file, you can place statements like this: + +``` +enable_all_charts="yes" +X="yes" +Y="no" +``` + +where `X` and `Y` are the names of individual charts.d collector scripts. +When set to `yes`, charts.d will evaluate the collector script (see below). +When set to `no`, charts.d will ignore the collector script. + +The variable `enable_all_charts` sets the default enable/disable state for all charts. + +## A charts.d module + +A `charts.d.plugin` module is a BASH script defining a few functions. + +For a module called `X`, the following criteria must be met: + +1. The module script must be called `X.chart.sh` and placed in `/usr/libexec/netdata/charts.d`. + +2. If the module needs a configuration, it should be called `X.conf` and placed in `/etc/netdata/charts.d`. + The configuration file `X.conf` is also a BASH script itself. + To edit the default files supplied by Netdata, run `/etc/netdata/edit-config charts.d/X.conf`, + where `X` is the name of the module. + +3. All functions and global variables defined in the script and its configuration, must begin with `X_`. + +4. The following functions must be defined: + + - `X_check()` - returns 0 or 1 depending on whether the module is able to run or not + (following the standard Linux command line return codes: 0 = OK, the collector can operate and 1 = FAILED, + the collector cannot be used). + + - `X_create()` - creates the Netdata charts, following the standard Netdata plugin guides as described in + **[External Plugins](/collectors/plugins.d/README.md)** (commands `CHART` and `DIMENSION`). + The return value does matter: 0 = OK, 1 = FAILED. + + - `X_update()` - collects the values for the defined charts, following the standard Netdata plugin guides + as described in **[External Plugins](/collectors/plugins.d/README.md)** (commands `BEGIN`, `SET`, `END`). + The return value also matters: 0 = OK, 1 = FAILED. + +5. The following global variables are available to be set: + - `X_update_every` - is the data collection frequency for the module script, in seconds. + +The module script may use more functions or variables. But all of them must begin with `X_`. + +The standard Netdata plugin variables are also available (check **[External Plugins](/collectors/plugins.d/README.md)**). + +### X_check() + +The purpose of the BASH function `X_check()` is to check if the module can collect data (or check its config). + +For example, if the module is about monitoring a local mysql database, the `X_check()` function may attempt to +connect to a local mysql database to find out if it can read the values it needs. + +`X_check()` is run only once for the lifetime of the module. + +### X_create() + +The purpose of the BASH function `X_create()` is to create the charts and dimensions using the standard Netdata +plugin guides (**[External Plugins](/collectors/plugins.d/README.md)**). + +`X_create()` will be called just once and only after `X_check()` was successful. +You can however call it yourself when there is need for it (for example to add a new dimension to an existing chart). + +A non-zero return value will disable the collector. + +### X_update() + +`X_update()` will be called repeatedly every `X_update_every` seconds, to collect new values and send them to Netdata, +following the Netdata plugin guides (**[External Plugins](/collectors/plugins.d/README.md)**). + +The function will be called with one parameter: microseconds since the last time it was run. This value should be +appended to the `BEGIN` statement of every chart updated by the collector script. + +A non-zero return value will disable the collector. + +### Useful functions charts.d provides + +Module scripts can use the following charts.d functions: + +#### require_cmd command + +`require_cmd()` will check if a command is available in the running system. + +For example, your `X_check()` function may use it like this: + +```sh +mysql_check() { + require_cmd mysql || return 1 + return 0 +} +``` + +Using the above, if the command `mysql` is not available in the system, the `mysql` module will be disabled. + +#### fixid "string" + +`fixid()` will get a string and return a properly formatted id for a chart or dimension. + +This is an expensive function that should not be used in `X_update()`. +You can keep the generated id in a BASH associative array to have the values availables in `X_update()`, like this: + +```sh +declare -A X_ids=() +X_create() { + local name="a very bad name for id" + + X_ids[$name]="$(fixid "$name")" +} + +X_update() { + local microseconds="$1" + + ... + local name="a very bad name for id" + ... + + echo "BEGIN ${X_ids[$name]} $microseconds" + ... +} +``` + +### Debugging your collectors + +You can run `charts.d.plugin` by hand with something like this: + +```sh +# become user netdata +sudo su -s /bin/sh netdata + +# run the plugin in debug mode +/usr/libexec/netdata/plugins.d/charts.d.plugin debug 1 X Y Z +``` + +Charts.d will run in `debug` mode, with an update frequency of `1`, evaluating only the collector scripts +`X`, `Y` and `Z`. You can define zero or more module scripts. If none is defined, charts.d will evaluate all +module scripts available. + +Keep in mind that if your configs are not in `/etc/netdata`, you should do the following before running +`charts.d.plugin`: + +```sh +export NETDATA_USER_CONFIG_DIR="/path/to/etc/netdata" +``` + +Also, remember that Netdata runs `chart.d.plugin` as user `netdata` (or any other user the `netdata` process is configured to run as). + +## Running multiple instances of charts.d.plugin + +`charts.d.plugin` will call the `X_update()` function one after another. This means that a delay in collector `X` +will also delay the collection of `Y` and `Z`. + +You can have multiple `charts.d.plugin` running to overcome this problem. + +This is what you need to do: + +1. Decide a new name for the new charts.d instance: example `charts2.d`. + +2. Create/edit the files `/etc/netdata/charts.d.conf` and `/etc/netdata/charts2.d.conf` and enable / disable the + module you want each to run. Remember to set `enable_all_charts="no"` to both of them, and enable the individual + modules for each. + +3. link `/usr/libexec/netdata/plugins.d/charts.d.plugin` to `/usr/libexec/netdata/plugins.d/charts2.d.plugin`. + Netdata will spawn a new charts.d process. + +Execute the above in this order, since Netdata will (by default) attempt to start new plugins soon after they are +created in `/usr/libexec/netdata/plugins.d/`. + + diff --git a/collectors/charts.d.plugin/ap/Makefile.inc b/collectors/charts.d.plugin/ap/Makefile.inc new file mode 100644 index 0000000..a2dd375 --- /dev/null +++ b/collectors/charts.d.plugin/ap/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += ap/ap.chart.sh +dist_chartsconfig_DATA += ap/ap.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += ap/README.md ap/Makefile.inc + diff --git a/collectors/charts.d.plugin/ap/README.md b/collectors/charts.d.plugin/ap/README.md new file mode 100644 index 0000000..a7953a5 --- /dev/null +++ b/collectors/charts.d.plugin/ap/README.md @@ -0,0 +1,99 @@ + + +# Access point monitoring with Netdata + +The `ap` collector visualizes data related to access points. + +## Example Netdata charts + +![image](https://cloud.githubusercontent.com/assets/2662304/12377654/9f566e88-bd2d-11e5-855a-e0ba96b8fd98.png) + +## How it works + +It does the following: + +1. Runs `iw dev` searching for interfaces that have `type AP`. + + From the same output it collects the SSIDs each AP supports by looking for lines `ssid NAME`. + + Example: + +```sh +# iw dev +phy#0 + Interface wlan0 + ifindex 3 + wdev 0x1 + addr 7c:dd:90:77:34:2a + ssid TSAOUSIS + type AP + channel 7 (2442 MHz), width: 20 MHz, center1: 2442 MHz +``` + +2. For each interface found, it runs `iw INTERFACE station dump`. + + From the output is collects: + + - rx/tx bytes + - rx/tx packets + - tx retries + - tx failed + - signal strength + - rx/tx bitrate + - expected throughput + + Example: + +```sh +# iw wlan0 station dump +Station 40:b8:37:5a:ed:5e (on wlan0) + inactive time: 910 ms + rx bytes: 15588897 + rx packets: 127772 + tx bytes: 52257763 + tx packets: 95802 + tx retries: 2162 + tx failed: 28 + signal: -43 dBm + signal avg: -43 dBm + tx bitrate: 65.0 MBit/s MCS 7 + rx bitrate: 1.0 MBit/s + expected throughput: 32.125Mbps + authorized: yes + authenticated: yes + preamble: long + WMM/WME: yes + MFP: no + TDLS peer: no +``` + +3. For each interface found, it creates 6 charts: + + - Number of Connected clients + - Bandwidth for all clients + - Packets for all clients + - Transmit Issues for all clients + - Average Signal among all clients + - Average Bitrate (including average expected throughput) among all clients + +## Configuration + +Edit the `charts.d/ap.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d/ap.conf +``` + +You can only set `ap_update_every=NUMBER` to change the data collection frequency. + +## Auto-detection + +The plugin is able to auto-detect if you are running access points on your linux box. + + diff --git a/collectors/charts.d.plugin/ap/ap.chart.sh b/collectors/charts.d.plugin/ap/ap.chart.sh new file mode 100644 index 0000000..80c9dc6 --- /dev/null +++ b/collectors/charts.d.plugin/ap/ap.chart.sh @@ -0,0 +1,179 @@ +# shellcheck shell=bash +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# + +# _update_every is a special variable - it holds the number of seconds +# between the calls of the _update() function +ap_update_every= +ap_priority=6900 + +declare -A ap_devs=() + +# _check is called once, to find out if this chart should be enabled or not +ap_check() { + require_cmd iw || return 1 + local ev + ev=$(run iw dev | awk ' + BEGIN { + i = ""; + ssid = ""; + ap = 0; + } + /^[ \t]+Interface / { + if( ap == 1 ) { + print "ap_devs[" i "]=\"" ssid "\"" + } + + i = $2; + ssid = ""; + ap = 0; + } + /^[ \t]+ssid / { ssid = $2; } + /^[ \t]+type AP$/ { ap = 1; } + END { + if( ap == 1 ) { + print "ap_devs[" i "]=\"" ssid "\"" + } + } + ') + eval "${ev}" + + # this should return: + # - 0 to enable the chart + # - 1 to disable the chart + + [ ${#ap_devs[@]} -gt 0 ] && return 0 + error "no devices found in AP mode, with 'iw dev'" + return 1 +} + +# _create is called once, to create the charts +ap_create() { + local ssid dev + + for dev in "${!ap_devs[@]}"; do + ssid="${ap_devs[${dev}]}" + + # create the chart with 3 dimensions + cat << EOF +CHART ap_clients.${dev} '' "Connected clients to ${ssid} on ${dev}" "clients" ${dev} ap.clients line $((ap_priority + 1)) $ap_update_every '' '' 'ap' +DIMENSION clients '' absolute 1 1 + +CHART ap_bandwidth.${dev} '' "Bandwidth for ${ssid} on ${dev}" "kilobits/s" ${dev} ap.net area $((ap_priority + 2)) $ap_update_every '' '' 'ap' +DIMENSION received '' incremental 8 1024 +DIMENSION sent '' incremental -8 1024 + +CHART ap_packets.${dev} '' "Packets for ${ssid} on ${dev}" "packets/s" ${dev} ap.packets line $((ap_priority + 3)) $ap_update_every '' '' 'ap' +DIMENSION received '' incremental 1 1 +DIMENSION sent '' incremental -1 1 + +CHART ap_issues.${dev} '' "Transmit Issues for ${ssid} on ${dev}" "issues/s" ${dev} ap.issues line $((ap_priority + 4)) $ap_update_every '' '' 'ap' +DIMENSION retries 'tx retries' incremental 1 1 +DIMENSION failures 'tx failures' incremental -1 1 + +CHART ap_signal.${dev} '' "Average Signal for ${ssid} on ${dev}" "dBm" ${dev} ap.signal line $((ap_priority + 5)) $ap_update_every '' '' 'ap' +DIMENSION signal 'average signal' absolute 1 1000 + +CHART ap_bitrate.${dev} '' "Bitrate for ${ssid} on ${dev}" "Mbps" ${dev} ap.bitrate line $((ap_priority + 6)) $ap_update_every '' '' 'ap' +DIMENSION receive '' absolute 1 1000 +DIMENSION transmit '' absolute -1 1000 +DIMENSION expected 'expected throughput' absolute 1 1000 +EOF + done + + return 0 +} + +# _update is called continuously, to collect the values +ap_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + # do all the work to collect / calculate the values + # for each dimension + # remember: KEEP IT SIMPLE AND SHORT + + for dev in "${!ap_devs[@]}"; do + echo + echo "DEVICE ${dev}" + iw "${dev}" station dump + done | awk ' + function zero_data() { + dev = ""; + c = 0; + rb = 0; + tb = 0; + rp = 0; + tp = 0; + tr = 0; + tf = 0; + tt = 0; + rt = 0; + s = 0; + g = 0; + e = 0; + } + function print_device() { + if(dev != "" && length(dev) > 0) { + print "BEGIN ap_clients." dev; + print "SET clients = " c; + print "END"; + print "BEGIN ap_bandwidth." dev; + print "SET received = " rb; + print "SET sent = " tb; + print "END"; + print "BEGIN ap_packets." dev; + print "SET received = " rp; + print "SET sent = " tp; + print "END"; + print "BEGIN ap_issues." dev; + print "SET retries = " tr; + print "SET failures = " tf; + print "END"; + + if( c == 0 ) c = 1; + print "BEGIN ap_signal." dev; + print "SET signal = " int(s / c); + print "END"; + print "BEGIN ap_bitrate." dev; + print "SET receive = " int(rt / c); + print "SET transmit = " int(tt / c); + print "SET expected = " int(e / c); + print "END"; + } + zero_data(); + } + BEGIN { + zero_data(); + } + /^DEVICE / { + print_device(); + dev = $2; + } + /^Station/ { c++; } + /^[ \t]+rx bytes:/ { rb += $3; } + /^[ \t]+tx bytes:/ { tb += $3; } + /^[ \t]+rx packets:/ { rp += $3; } + /^[ \t]+tx packets:/ { tp += $3; } + /^[ \t]+tx retries:/ { tr += $3; } + /^[ \t]+tx failed:/ { tf += $3; } + /^[ \t]+signal:/ { x = $2; s += x * 1000; } + /^[ \t]+rx bitrate:/ { x = $3; rt += x * 1000; } + /^[ \t]+tx bitrate:/ { x = $3; tt += x * 1000; } + /^[ \t]+expected throughput:(.*)Mbps/ { + x=$3; + sub(/Mbps/, "", x); + e += x * 1000; + } + END { + print_device(); + } + ' + + return 0 +} diff --git a/collectors/charts.d.plugin/ap/ap.conf b/collectors/charts.d.plugin/ap/ap.conf new file mode 100644 index 0000000..38fc157 --- /dev/null +++ b/collectors/charts.d.plugin/ap/ap.conf @@ -0,0 +1,23 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ + +# nothing fancy to configure. +# this module will run +# iw dev - to find wireless devices in AP mode +# iw ${dev} station dump - to get connected clients +# based on the above, it generates several charts + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#ap_update_every= + +# the charts priority on the dashboard +#ap_priority=6900 + +# the number of retries to do in case of failure +# before disabling the module +#ap_retries=10 diff --git a/collectors/charts.d.plugin/apcupsd/Makefile.inc b/collectors/charts.d.plugin/apcupsd/Makefile.inc new file mode 100644 index 0000000..19cb9ca --- /dev/null +++ b/collectors/charts.d.plugin/apcupsd/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += apcupsd/apcupsd.chart.sh +dist_chartsconfig_DATA += apcupsd/apcupsd.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += apcupsd/README.md apcupsd/Makefile.inc + diff --git a/collectors/charts.d.plugin/apcupsd/README.md b/collectors/charts.d.plugin/apcupsd/README.md new file mode 100644 index 0000000..f1aebf9 --- /dev/null +++ b/collectors/charts.d.plugin/apcupsd/README.md @@ -0,0 +1,21 @@ + + +# APC UPS monitoring with Netdata + +Monitors different APC UPS models and retrieves status information using `apcaccess` tool. + +## Configuration + +Edit the `charts.d/apcupsd.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d/apcupsd.conf +``` + + diff --git a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh new file mode 100644 index 0000000..ef9a905 --- /dev/null +++ b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh @@ -0,0 +1,223 @@ +# shellcheck shell=bash +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# + +apcupsd_ip= +apcupsd_port= + +declare -A apcupsd_sources=( + ["local"]="127.0.0.1:3551" +) + +# how frequently to collect UPS data +apcupsd_update_every=10 + +apcupsd_timeout=3 + +# the priority of apcupsd related to other charts +apcupsd_priority=90000 + +apcupsd_get() { + run -t $apcupsd_timeout apcaccess status "$1" +} + +is_ups_alive() { + local status + status="$(apcupsd_get "$1" | sed -e 's/STATUS.*: //' -e 't' -e 'd')" + case "$status" in + "" | "COMMLOST" | "SHUTTING DOWN") return 1 ;; + *) return 0 ;; + esac +} + +apcupsd_check() { + + # this should return: + # - 0 to enable the chart + # - 1 to disable the chart + + require_cmd apcaccess || return 1 + + # backwards compatibility + if [ "${apcupsd_ip}:${apcupsd_port}" != ":" ]; then + apcupsd_sources["local"]="${apcupsd_ip}:${apcupsd_port}" + fi + + local host working=0 failed=0 + for host in "${!apcupsd_sources[@]}"; do + apcupsd_get "${apcupsd_sources[${host}]}" > /dev/null + # shellcheck disable=2181 + if [ $? -ne 0 ]; then + error "cannot get information for apcupsd server ${host} on ${apcupsd_sources[${host}]}." + failed=$((failed + 1)) + else + if ! is_ups_alive ${apcupsd_sources[${host}]}; then + error "APC UPS ${host} on ${apcupsd_sources[${host}]} is not online." + failed=$((failed + 1)) + else + working=$((working + 1)) + fi + fi + done + + if [ ${working} -eq 0 ]; then + error "No APC UPSes found available." + return 1 + fi + + return 0 +} + +apcupsd_create() { + local host + for host in "${!apcupsd_sources[@]}"; do + # create the charts + cat << EOF +CHART apcupsd_${host}.charge '' "UPS Charge" "percentage" ups apcupsd.charge area $((apcupsd_priority + 2)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION battery_charge charge absolute 1 100 + +CHART apcupsd_${host}.battery_voltage '' "UPS Battery Voltage" "Volts" ups apcupsd.battery.voltage line $((apcupsd_priority + 4)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION battery_voltage voltage absolute 1 100 +DIMENSION battery_voltage_nominal nominal absolute 1 100 + +CHART apcupsd_${host}.input_voltage '' "UPS Input Voltage" "Volts" input apcupsd.input.voltage line $((apcupsd_priority + 5)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION input_voltage voltage absolute 1 100 +DIMENSION input_voltage_min min absolute 1 100 +DIMENSION input_voltage_max max absolute 1 100 + +CHART apcupsd_${host}.input_frequency '' "UPS Input Frequency" "Hz" input apcupsd.input.frequency line $((apcupsd_priority + 6)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION input_frequency frequency absolute 1 100 + +CHART apcupsd_${host}.output_voltage '' "UPS Output Voltage" "Volts" output apcupsd.output.voltage line $((apcupsd_priority + 7)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION output_voltage voltage absolute 1 100 +DIMENSION output_voltage_nominal nominal absolute 1 100 + +CHART apcupsd_${host}.load '' "UPS Load" "percentage" ups apcupsd.load area $((apcupsd_priority)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION load load absolute 1 100 + +CHART apcupsd_${host}.load_usage '' "UPS Load Usage" "Watts" ups apcupsd.load_usage area $((apcupsd_priority + 1)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION load_usage load absolute 1 100 + +CHART apcupsd_${host}.temp '' "UPS Temperature" "Celsius" ups apcupsd.temperature line $((apcupsd_priority + 8)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION temp temp absolute 1 100 + +CHART apcupsd_${host}.time '' "UPS Time Remaining" "Minutes" ups apcupsd.time area $((apcupsd_priority + 3)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION time time absolute 1 100 + +CHART apcupsd_${host}.online '' "UPS ONLINE flag" "boolean" ups apcupsd.online line $((apcupsd_priority + 9)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION online online absolute 0 1 + +EOF + done + return 0 +} + +apcupsd_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + # do all the work to collect / calculate the values + # for each dimension + # remember: KEEP IT SIMPLE AND SHORT + + local host working=0 failed=0 + for host in "${!apcupsd_sources[@]}"; do + apcupsd_get "${apcupsd_sources[${host}]}" | awk " + +BEGIN { + battery_charge = 0; + battery_voltage = 0; + battery_voltage_nominal = 0; + input_voltage = 0; + input_voltage_min = 0; + input_voltage_max = 0; + input_frequency = 0; + output_voltage = 0; + output_voltage_nominal = 0; + load = 0; + temp = 0; + time = 0; + nompower = 0; + load_usage = 0; +} +/^BCHARGE.*/ { battery_charge = \$3 * 100 }; +/^BATTV.*/ { battery_voltage = \$3 * 100 }; +/^NOMBATTV.*/ { battery_voltage_nominal = \$3 * 100 }; +/^LINEV.*/ { input_voltage = \$3 * 100 }; +/^MINLINEV.*/ { input_voltage_min = \$3 * 100 }; +/^MAXLINEV.*/ { input_voltage_max = \$3 * 100 }; +/^LINEFREQ.*/ { input_frequency = \$3 * 100 }; +/^OUTPUTV.*/ { output_voltage = \$3 * 100 }; +/^NOMOUTV.*/ { output_voltage_nominal = \$3 * 100 }; +/^LOADPCT.*/ { load = \$3 * 100 }; +/^ITEMP.*/ { temp = \$3 * 100 }; +/^NOMPOWER.*/ { nompower = \$3 }; +/^TIMELEFT.*/ { time = \$3 * 100 }; +/^STATUS.*/ { online=(\$3 != \"COMMLOST\" && !(\$3 == \"SHUTTING\" && \$4 == \"DOWN\"))?1:0 }; +END { + { load_usage = nompower * load / 100 }; + + print \"BEGIN apcupsd_${host}.online $1\"; + print \"SET online = \" online; + print \"END\" + + if (online == 1) { + print \"BEGIN apcupsd_${host}.charge $1\"; + print \"SET battery_charge = \" battery_charge; + print \"END\" + + print \"BEGIN apcupsd_${host}.battery_voltage $1\"; + print \"SET battery_voltage = \" battery_voltage; + print \"SET battery_voltage_nominal = \" battery_voltage_nominal; + print \"END\" + + print \"BEGIN apcupsd_${host}.input_voltage $1\"; + print \"SET input_voltage = \" input_voltage; + print \"SET input_voltage_min = \" input_voltage_min; + print \"SET input_voltage_max = \" input_voltage_max; + print \"END\" + + print \"BEGIN apcupsd_${host}.input_frequency $1\"; + print \"SET input_frequency = \" input_frequency; + print \"END\" + + print \"BEGIN apcupsd_${host}.output_voltage $1\"; + print \"SET output_voltage = \" output_voltage; + print \"SET output_voltage_nominal = \" output_voltage_nominal; + print \"END\" + + print \"BEGIN apcupsd_${host}.load $1\"; + print \"SET load = \" load; + print \"END\" + + print \"BEGIN apcupsd_${host}.load_usage $1\"; + print \"SET load_usage = \" load_usage; + print \"END\" + + print \"BEGIN apcupsd_${host}.temp $1\"; + print \"SET temp = \" temp; + print \"END\" + + print \"BEGIN apcupsd_${host}.time $1\"; + print \"SET time = \" time; + print \"END\" + } +}" + # shellcheck disable=SC2181 + if [ $? -ne 0 ]; then + failed=$((failed + 1)) + error "failed to get values for APC UPS ${host} on ${apcupsd_sources[${host}]}" && return 1 + else + working=$((working + 1)) + fi + done + + [ $working -eq 0 ] && error "failed to get values from all APC UPSes" && return 1 + + return 0 +} diff --git a/collectors/charts.d.plugin/apcupsd/apcupsd.conf b/collectors/charts.d.plugin/apcupsd/apcupsd.conf new file mode 100644 index 0000000..679c0d6 --- /dev/null +++ b/collectors/charts.d.plugin/apcupsd/apcupsd.conf @@ -0,0 +1,25 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ + +# add all your APC UPSes in this array - uncomment it too +#declare -A apcupsd_sources=( +# ["local"]="127.0.0.1:3551" +#) + +# how long to wait for apcupsd to respond +#apcupsd_timeout=3 + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#apcupsd_update_every=10 + +# the charts priority on the dashboard +#apcupsd_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#apcupsd_retries=10 diff --git a/collectors/charts.d.plugin/charts.d.conf b/collectors/charts.d.plugin/charts.d.conf new file mode 100644 index 0000000..2d32f73 --- /dev/null +++ b/collectors/charts.d.plugin/charts.d.conf @@ -0,0 +1,48 @@ +# This is the configuration for charts.d.plugin + +# Each of its collectors can read configuration either from this file +# or a NAME.conf file (where NAME is the collector name). +# The collector specific file has higher precedence. + +# This file is a shell script too. + +# ----------------------------------------------------------------------------- + +# number of seconds to run without restart +# after this time, charts.d.plugin will exit +# netdata will restart it, but a small gap +# will appear in the charts.d.plugin charts. +#restart_timeout=$[3600 * 4] + +# when making iterations, charts.d can loop more frequently +# to prevent plugins missing iterations. +# this is a percentage relative to update_every to align its +# iterations. +# The minimum is 10%, the maximum 100%. +# So, if update_every is 1 second and time_divisor is 50, +# charts.d will iterate every 500ms. +# Charts will be called to collect data only if the time +# passed since the last time the collected data is equal or +# above their update_every. +#time_divisor=50 + +# ----------------------------------------------------------------------------- + +# the default enable/disable for all charts.d collectors +# the default is "yes" +# enable_all_charts="yes" + +# BY DEFAULT ENABLED MODULES +# ap=yes +# apcupsd=yes +# libreswan=yes +# nut=yes +# opensips=yes + +# ----------------------------------------------------------------------------- +# THESE NEED TO BE SET TO "force" TO BE ENABLED + +# Nothing useful. +# Just an example charts.d plugin you can use as a template. +# example=force +# sensors=force diff --git a/collectors/charts.d.plugin/charts.d.dryrun-helper.sh b/collectors/charts.d.plugin/charts.d.dryrun-helper.sh new file mode 100755 index 0000000..91af2c5 --- /dev/null +++ b/collectors/charts.d.plugin/charts.d.dryrun-helper.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later + +# shellcheck disable=SC2181 + +# will stop the script for any error +set -e + +me="$0" +name="$1" +chart="$2" +conf="$3" + +can_diff=1 + +tmp1="$(mktemp)" +tmp2="$(mktemp)" + +myset() { + set | grep -v "^_=" | grep -v "^PIPESTATUS=" | grep -v "^BASH_LINENO=" +} + +# save 2 'set' +myset >"$tmp1" +myset >"$tmp2" + +# make sure they don't differ +diff "$tmp1" "$tmp2" >/dev/null 2>&1 +if [ $? -ne 0 ]; then + # they differ, we cannot do the check + echo >&2 "$me: cannot check with diff." + can_diff=0 +fi + +# do it again, now including the script +myset >"$tmp1" + +# include the plugin and its config +if [ -f "$conf" ]; then + # shellcheck source=/dev/null + . "$conf" + if [ $? -ne 0 ]; then + echo >&2 "$me: cannot load config file $conf" + rm "$tmp1" "$tmp2" + exit 1 + fi +fi + +# shellcheck source=/dev/null +. "$chart" +if [ $? -ne 0 ]; then + echo >&2 "$me: cannot load chart file $chart" + rm "$tmp1" "$tmp2" + exit 1 +fi + +# remove all variables starting with the plugin name +myset | grep -v "^$name" >"$tmp2" + +if [ $can_diff -eq 1 ]; then + # check if they are different + # make sure they don't differ + diff "$tmp1" "$tmp2" >&2 + if [ $? -ne 0 ]; then + # they differ + rm "$tmp1" "$tmp2" + exit 1 + fi +fi + +rm "$tmp1" "$tmp2" +exit 0 diff --git a/collectors/charts.d.plugin/charts.d.plugin.in b/collectors/charts.d.plugin/charts.d.plugin.in new file mode 100755 index 0000000..9187fc2 --- /dev/null +++ b/collectors/charts.d.plugin/charts.d.plugin.in @@ -0,0 +1,732 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2017 Costa Tsaousis +# GPL v3+ +# +# charts.d.plugin allows easy development of BASH plugins +# +# if you need to run parallel charts.d processes, link this file to a different name +# in the same directory, with a .plugin suffix and netdata will start both of them, +# each will have a different config file and modules configuration directory. +# + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" + +PROGRAM_FILE="$0" +PROGRAM_NAME="$(basename $0)" +PROGRAM_NAME="${PROGRAM_NAME/.plugin/}" +MODULE_NAME="main" + +# ----------------------------------------------------------------------------- +# create temp dir + +debug=0 +TMP_DIR= +chartsd_cleanup() { + trap '' EXIT QUIT HUP INT TERM + + if [ ! -z "$TMP_DIR" -a -d "$TMP_DIR" ]; then + [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: cleaning up temporary directory $TMP_DIR ..." + rm -rf "$TMP_DIR" + fi + exit 0 +} +trap chartsd_cleanup EXIT QUIT HUP INT TERM + +if [ $UID = "0" ]; then + TMP_DIR="$(mktemp -d /var/run/netdata-${PROGRAM_NAME}-XXXXXXXXXX)" +else + TMP_DIR="$(mktemp -d /tmp/.netdata-${PROGRAM_NAME}-XXXXXXXXXX)" +fi + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + echo "DISABLE" + exit 1 +} + +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- +# check a few commands + +require_cmd() { + local x=$(which "${1}" 2>/dev/null || command -v "${1}" 2>/dev/null) + if [ -z "${x}" -o ! -x "${x}" ]; then + warning "command '${1}' is not found in ${PATH}." + eval "${1^^}_CMD=\"\"" + return 1 + fi + + eval "${1^^}_CMD=\"${x}\"" + return 0 +} + +require_cmd date || exit 1 +require_cmd sed || exit 1 +require_cmd basename || exit 1 +require_cmd dirname || exit 1 +require_cmd cat || exit 1 +require_cmd grep || exit 1 +require_cmd egrep || exit 1 +require_cmd mktemp || exit 1 +require_cmd awk || exit 1 +require_cmd timeout || exit 1 +require_cmd curl || exit 1 + +# ----------------------------------------------------------------------------- + +[ $((BASH_VERSINFO[0])) -lt 4 ] && fatal "BASH version 4 or later is required, but found version: ${BASH_VERSION}. Please upgrade." + +info "started from '$PROGRAM_FILE' with options: $*" + +# ----------------------------------------------------------------------------- +# internal defaults +# netdata exposes a few environment variables for us + +[ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")" +[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@" +[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" + +pluginsd="${NETDATA_PLUGINS_DIR}" +stockconfd="${NETDATA_STOCK_CONFIG_DIR}/${PROGRAM_NAME}" +userconfd="${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}" +olduserconfd="${NETDATA_USER_CONFIG_DIR}" +chartsd="$pluginsd/../charts.d" + +minimum_update_frequency="${NETDATA_UPDATE_EVERY-1}" +update_every=${minimum_update_frequency} # this will be overwritten by the command line + +# work around for non BASH shells +charts_create="_create" +charts_update="_update" +charts_check="_check" +charts_underscore="_" + +# when making iterations, charts.d can loop more frequently +# to prevent plugins missing iterations. +# this is a percentage relative to update_every to align its +# iterations. +# The minimum is 10%, the maximum 100%. +# So, if update_every is 1 second and time_divisor is 50, +# charts.d will iterate every 500ms. +# Charts will be called to collect data only if the time +# passed since the last time the collected data is equal or +# above their update_every. +time_divisor=50 + +# number of seconds to run without restart +# after this time, charts.d.plugin will exit +# netdata will restart it +restart_timeout=$((3600 * 4)) + +# check if the charts.d plugins are using global variables +# they should not. +# It does not currently support BASH v4 arrays, so it is +# disabled +dryrunner=0 + +# check for timeout command +check_for_timeout=1 + +# the default enable/disable value for all charts +enable_all_charts="yes" + +# ----------------------------------------------------------------------------- +# parse parameters + +check=0 +chart_only= +while [ ! -z "$1" ]; do + if [ "$1" = "check" ]; then + check=1 + shift + continue + fi + + if [ "$1" = "debug" -o "$1" = "all" ]; then + debug=1 + shift + continue + fi + + if [ -f "$chartsd/$1.chart.sh" ]; then + debug=1 + chart_only="$(echo $1.chart.sh | sed "s/\.chart\.sh$//g")" + shift + continue + fi + + if [ -f "$chartsd/$1" ]; then + debug=1 + chart_only="$(echo $1 | sed "s/\.chart\.sh$//g")" + shift + continue + fi + + # number check + n="$1" + x=$((n)) + if [ "$x" = "$n" ]; then + shift + update_every=$x + [ $update_every -lt $minimum_update_frequency ] && update_every=$minimum_update_frequency + continue + fi + + fatal "Cannot understand parameter $1. Aborting." +done + +# ----------------------------------------------------------------------------- +# loop control + +# default sleep function +LOOPSLEEPMS_HIGHRES=0 +now_ms= +current_time_ms_default() { + now_ms="$(date +'%s')000" +} +current_time_ms="current_time_ms_default" +current_time_ms_accuracy=1 +mysleep="sleep" + +# if found and included, this file overwrites loopsleepms() +# and current_time_ms() with a high resolution timer function +# for precise looping. +source "$pluginsd/loopsleepms.sh.inc" +[ $? -ne 0 ] && error "Failed to load '$pluginsd/loopsleepms.sh.inc'." + +# ----------------------------------------------------------------------------- +# load my configuration + +for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf"; do + if [ -f "$myconfig" ]; then + source "$myconfig" + if [ $? -ne 0 ]; then + error "Config file '$myconfig' loaded with errors." + else + info "Configuration file '$myconfig' loaded." + fi + else + warning "Configuration file '$myconfig' not found." + fi +done + +# make sure time_divisor is right +time_divisor=$((time_divisor)) +[ $time_divisor -lt 10 ] && time_divisor=10 +[ $time_divisor -gt 100 ] && time_divisor=100 + +# we check for the timeout command, after we load our +# configuration, so that the user may overwrite the +# timeout command we use, providing a function that +# can emulate the timeout command we need: +# > timeout SECONDS command ... +if [ $check_for_timeout -eq 1 ]; then + require_cmd timeout || exit 1 +fi + +# ----------------------------------------------------------------------------- +# internal checks + +# netdata passes the requested update frequency as the first argument +update_every=$((update_every + 1 - 1)) # makes sure it is a number +test $update_every -eq 0 && update_every=1 # if it is zero, make it 1 + +# check the charts.d directory +[ ! -d "$chartsd" ] && fatal "cannot find charts directory '$chartsd'" + +# ----------------------------------------------------------------------------- +# library functions + +fixid() { + echo "$*" | + tr -c "[A-Z][a-z][0-9]" "_" | + sed -e "s|^_\+||g" -e "s|_\+$||g" -e "s|_\+|_|g" | + tr "[A-Z]" "[a-z]" +} + +isvarset() { + [ -n "$1" ] && [ "$1" != "unknown" ] && [ "$1" != "none" ] + return $? +} + +getosid() { + if isvarset "${NETDATA_CONTAINER_OS_ID}"; then + echo "${NETDATA_CONTAINER_OS_ID}" + else + echo "${NETDATA_SYSTEM_OS_ID}" + fi +} + +run() { + local ret pid="${BASHPID}" t + + if [ "z${1}" = "z-t" -a "${2}" != "0" ]; then + t="${2}" + shift 2 + timeout "${t}" "${@}" 2>"${TMP_DIR}/run.${pid}" + ret=$? + else + "${@}" 2>"${TMP_DIR}/run.${pid}" + ret=$? + fi + + if [ ${ret} -ne 0 ]; then + { + printf "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: command '" + printf "%q " "${@}" + printf "' failed with code ${ret}:\n --- BEGIN TRACE ---\n" + cat "${TMP_DIR}/run.${pid}" + printf " --- END TRACE ---\n" + } >&2 + fi + rm -f "${TMP_DIR}/run.${pid}" + + return ${ret} +} + +# convert any floating point number +# to integer, give a multiplier +# the result is stored in ${FLOAT2INT_RESULT} +# so that no fork is necessary +# the multiplier must be a power of 10 +float2int() { + local f m="$2" a b l v=($1) + f=${v[0]} + + # the length of the multiplier - 1 + l=$((${#m} - 1)) + + # check if the number is in scientific notation + if [[ ${f} =~ ^[[:space:]]*(-)?[0-9.]+(e|E)(\+|-)[0-9]+ ]]; then + # convert it to decimal + # unfortunately, this fork cannot be avoided + # if you know of a way to avoid it, please let me know + f=$(printf "%0.${l}f" ${f}) + fi + + # split the floating point number + # in integer (a) and decimal (b) + a=${f/.*/} + b=${f/*./} + + # if the integer part is missing + # set it to zero + [ -z "${a}" ] && a="0" + + # strip leading zeros from the integer part + # base 10 conversion + a=$((10#$a)) + + # check the length of the decimal part + # against the length of the multiplier + if [ ${#b} -gt ${l} ]; then + # too many digits - take the most significant + b=${b:0:l} + + elif [ ${#b} -lt ${l} ]; then + # too few digits - pad with zero on the right + local z="00000000000000000000000" r=$((l - ${#b})) + b="${b}${z:0:r}" + fi + + # strip leading zeros from the decimal part + # base 10 conversion + b=$((10#$b)) + + # store the result + FLOAT2INT_RESULT=$(((a * m) + b)) +} + +# ----------------------------------------------------------------------------- +# charts check functions + +all_charts() { + cd "$chartsd" + [ $? -ne 0 ] && error "cannot cd to $chartsd" && return 1 + + ls *.chart.sh | sed "s/\.chart\.sh$//g" +} + +declare -A charts_enable_keyword=( + ['apache']="force" + ['cpu_apps']="force" + ['cpufreq']="force" + ['example']="force" + ['exim']="force" + ['hddtemp']="force" + ['load_average']="force" + ['mem_apps']="force" + ['mysql']="force" + ['nginx']="force" + ['phpfpm']="force" + ['postfix']="force" + ['sensors']="force" + ['squid']="force" + ['tomcat']="force" +) + +declare -A obsolete_charts=( + ['apache']="python.d.plugin module" + ['cpu_apps']="apps.plugin" + ['cpufreq']="proc plugin" + ['exim']="python.d.plugin module" + ['hddtemp']="python.d.plugin module" + ['load_average']="proc plugin" + ['mem_apps']="proc plugin" + ['mysql']="python.d.plugin module" + ['nginx']="python.d.plugin module" + ['phpfpm']="python.d.plugin module" + ['postfix']="python.d.plugin module" + ['squid']="python.d.plugin module" + ['tomcat']="python.d.plugin module" +) + +all_enabled_charts() { + local charts enabled required + + # find all enabled charts + for chart in $(all_charts); do + MODULE_NAME="${chart}" + + if [ -n "${obsolete_charts["$MODULE_NAME"]}" ]; then + debug "is replaced by ${obsolete_charts["$MODULE_NAME"]}, skipping it." + continue + fi + + eval "enabled=\$$chart" + if [ -z "${enabled}" ]; then + enabled="${enable_all_charts}" + fi + + required="${charts_enable_keyword[${chart}]}" + [ -z "${required}" ] && required="yes" + + if [ ! "${enabled}" = "${required}" ]; then + info "is disabled. Add a line with $chart=$required in '${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf' to enable it (or remove the line that disables it)." + else + debug "is enabled for auto-detection." + local charts="$charts $chart" + fi + done + MODULE_NAME="main" + + local charts2= + for chart in $charts; do + MODULE_NAME="${chart}" + + # check the enabled charts + local check="$(cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_check()")" + if [ -z "$check" ]; then + error "module '$chart' does not seem to have a $chart$charts_check() function. Disabling it." + continue + fi + + local create="$(cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_create()")" + if [ -z "$create" ]; then + error "module '$chart' does not seem to have a $chart$charts_create() function. Disabling it." + continue + fi + + local update="$(cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_update()")" + if [ -z "$update" ]; then + error "module '$chart' does not seem to have a $chart$charts_update() function. Disabling it." + continue + fi + + # check its config + #if [ -f "$userconfd/$chart.conf" ] + #then + # if [ ! -z "$( cat "$userconfd/$chart.conf" | sed "s/^ \+//g" | grep -v "^$" | grep -v "^#" | grep -v "^$chart$charts_underscore" )" ] + # then + # error "module's $chart config $userconfd/$chart.conf should only have lines starting with $chart$charts_underscore . Disabling it." + # continue + # fi + #fi + + #if [ $dryrunner -eq 1 ] + # then + # "$pluginsd/charts.d.dryrun-helper.sh" "$chart" "$chartsd/$chart.chart.sh" "$userconfd/$chart.conf" >/dev/null + # if [ $? -ne 0 ] + # then + # error "module's $chart did not pass the dry run check. This means it uses global variables not starting with $chart. Disabling it." + # continue + # fi + #fi + + local charts2="$charts2 $chart" + done + MODULE_NAME="main" + + echo $charts2 + debug "enabled charts: $charts2" +} + +# ----------------------------------------------------------------------------- +# load the charts + +suffix_retries="_retries" +suffix_update_every="_update_every" +active_charts= +for chart in $(all_enabled_charts); do + MODULE_NAME="${chart}" + + debug "loading module: '$chartsd/$chart.chart.sh'" + + source "$chartsd/$chart.chart.sh" + [ $? -ne 0 ] && warning "Module '$chartsd/$chart.chart.sh' loaded with errors." + + # first load the stock config + if [ -f "$stockconfd/$chart.conf" ]; then + debug "loading module configuration: '$stockconfd/$chart.conf'" + source "$stockconfd/$chart.conf" + [ $? -ne 0 ] && warning "Config file '$stockconfd/$chart.conf' loaded with errors." + else + debug "not found module configuration: '$stockconfd/$chart.conf'" + fi + + # then load the user config (it overwrites the stock) + if [ -f "$userconfd/$chart.conf" ]; then + debug "loading module configuration: '$userconfd/$chart.conf'" + source "$userconfd/$chart.conf" + [ $? -ne 0 ] && warning "Config file '$userconfd/$chart.conf' loaded with errors." + else + debug "not found module configuration: '$userconfd/$chart.conf'" + + if [ -f "$olduserconfd/$chart.conf" ]; then + # support for very old netdata that had the charts.d module configs in /etc/netdata + info "loading module configuration from obsolete location: '$olduserconfd/$chart.conf'" + source "$olduserconfd/$chart.conf" + [ $? -ne 0 ] && warning "Config file '$olduserconfd/$chart.conf' loaded with errors." + fi + fi + + eval "dt=\$$chart$suffix_update_every" + dt=$((dt + 1 - 1)) # make sure it is a number + if [ $dt -lt $update_every ]; then + eval "$chart$suffix_update_every=$update_every" + fi + + $chart$charts_check + if [ $? -eq 0 ]; then + debug "module '$chart' activated" + active_charts="$active_charts $chart" + else + error "module's '$chart' check() function reports failure." + fi +done +MODULE_NAME="main" +debug "activated modules: $active_charts" + +# ----------------------------------------------------------------------------- +# check overwrites + +# enable work time reporting +debug_time= +test $debug -eq 1 && debug_time=tellwork + +# if we only need a specific chart, remove all the others +if [ ! -z "${chart_only}" ]; then + debug "requested to run only for: '${chart_only}'" + check_charts= + for chart in $active_charts; do + if [ "$chart" = "$chart_only" ]; then + check_charts="$chart" + break + fi + done + active_charts="$check_charts" +fi +debug "activated charts: $active_charts" + +# stop if we just need a pre-check +if [ $check -eq 1 ]; then + info "CHECK RESULT" + info "Will run the charts: $active_charts" + exit 0 +fi + +# ----------------------------------------------------------------------------- + +cd "${TMP_DIR}" || exit 1 + +# ----------------------------------------------------------------------------- +# create charts + +run_charts= +for chart in $active_charts; do + MODULE_NAME="${chart}" + + debug "calling '$chart$charts_create()'..." + $chart$charts_create + if [ $? -eq 0 ]; then + run_charts="$run_charts $chart" + debug "'$chart' initialized." + else + error "module's '$chart' function '$chart$charts_create()' reports failure." + fi +done +MODULE_NAME="main" +debug "run_charts='$run_charts'" + +# ----------------------------------------------------------------------------- +# update dimensions + +[ -z "$run_charts" ] && fatal "No charts to collect data from." + +keepalive() { + if [ ! -t 1 ] && ! printf "\n"; then + chartsd_cleanup + fi +} + +declare -A charts_last_update=() charts_update_every=() charts_retries=() charts_next_update=() charts_run_counter=() charts_serial_failures=() +global_update() { + local exit_at \ + c=0 dt ret last_ms exec_start_ms exec_end_ms \ + chart now_charts=() next_charts=($run_charts) \ + next_ms x seconds millis + + # return the current time in ms in $now_ms + ${current_time_ms} + + exit_at=$((now_ms + (restart_timeout * 1000))) + + for chart in $run_charts; do + eval "charts_update_every[$chart]=\$$chart$suffix_update_every" + test -z "${charts_update_every[$chart]}" && charts_update_every[$chart]=$update_every + + eval "charts_retries[$chart]=\$$chart$suffix_retries" + test -z "${charts_retries[$chart]}" && charts_retries[$chart]=10 + + charts_last_update[$chart]=$((now_ms - (now_ms % (charts_update_every[$chart] * 1000)))) + charts_next_update[$chart]=$((charts_last_update[$chart] + (charts_update_every[$chart] * 1000))) + charts_run_counter[$chart]=0 + charts_serial_failures[$chart]=0 + + echo "CHART netdata.plugin_chartsd_$chart '' 'Execution time for $chart plugin' 'milliseconds / run' charts.d netdata.plugin_charts area 145000 ${charts_update_every[$chart]} '' '' '$chart'" + echo "DIMENSION run_time 'run time' absolute 1 1" + done + + # the main loop + while [ "${#next_charts[@]}" -gt 0 ]; do + keepalive + + c=$((c + 1)) + now_charts=("${next_charts[@]}") + next_charts=() + + # return the current time in ms in $now_ms + ${current_time_ms} + + for chart in "${now_charts[@]}"; do + MODULE_NAME="${chart}" + + if [ ${now_ms} -ge ${charts_next_update[$chart]} ]; then + last_ms=${charts_last_update[$chart]} + dt=$((now_ms - last_ms)) + + charts_last_update[$chart]=${now_ms} + + while [ ${charts_next_update[$chart]} -lt ${now_ms} ]; do + charts_next_update[$chart]=$((charts_next_update[$chart] + (charts_update_every[$chart] * 1000))) + done + + # the first call should not give a duration + # so that netdata calibrates to current time + dt=$((dt * 1000)) + charts_run_counter[$chart]=$((charts_run_counter[$chart] + 1)) + if [ ${charts_run_counter[$chart]} -eq 1 ]; then + dt= + fi + + exec_start_ms=$now_ms + $chart$charts_update $dt + ret=$? + + # return the current time in ms in $now_ms + ${current_time_ms} + exec_end_ms=$now_ms + + echo "BEGIN netdata.plugin_chartsd_$chart $dt" + echo "SET run_time = $((exec_end_ms - exec_start_ms))" + echo "END" + + if [ $ret -eq 0 ]; then + charts_serial_failures[$chart]=0 + next_charts+=($chart) + else + charts_serial_failures[$chart]=$((charts_serial_failures[$chart] + 1)) + + if [ ${charts_serial_failures[$chart]} -gt ${charts_retries[$chart]} ]; then + error "module's '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it." + else + error "module's '$chart' update() function reports failure. Will keep trying for a while." + next_charts+=($chart) + fi + fi + else + next_charts+=($chart) + fi + done + MODULE_NAME="${chart}" + + # wait the time you are required to + next_ms=$((now_ms + (update_every * 1000 * 100))) + for x in "${charts_next_update[@]}"; do [ ${x} -lt ${next_ms} ] && next_ms=${x}; done + next_ms=$((next_ms - now_ms)) + + if [ ${LOOPSLEEPMS_HIGHRES} -eq 1 -a ${next_ms} -gt 0 ]; then + next_ms=$((next_ms + current_time_ms_accuracy)) + seconds=$((next_ms / 1000)) + millis=$((next_ms % 1000)) + if [ ${millis} -lt 10 ]; then + millis="00${millis}" + elif [ ${millis} -lt 100 ]; then + millis="0${millis}" + fi + + debug "sleeping for ${seconds}.${millis} seconds." + ${mysleep} ${seconds}.${millis} + else + debug "sleeping for ${update_every} seconds." + ${mysleep} $update_every + fi + + test ${now_ms} -ge ${exit_at} && exit 0 + done + + fatal "nothing left to do, exiting..." +} + +global_update diff --git a/collectors/charts.d.plugin/example/Makefile.inc b/collectors/charts.d.plugin/example/Makefile.inc new file mode 100644 index 0000000..e6838fb --- /dev/null +++ b/collectors/charts.d.plugin/example/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += example/example.chart.sh +dist_chartsconfig_DATA += example/example.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += example/README.md example/Makefile.inc + diff --git a/collectors/charts.d.plugin/example/README.md b/collectors/charts.d.plugin/example/README.md new file mode 100644 index 0000000..77446b2 --- /dev/null +++ b/collectors/charts.d.plugin/example/README.md @@ -0,0 +1,10 @@ + + +# Example + +If you want to understand how charts.d data collector functions, check out the [charts.d example](https://raw.githubusercontent.com/netdata/netdata/master/collectors/charts.d.plugin/example/example.chart.sh). + + diff --git a/collectors/charts.d.plugin/example/example.chart.sh b/collectors/charts.d.plugin/example/example.chart.sh new file mode 100644 index 0000000..6bbbcf1 --- /dev/null +++ b/collectors/charts.d.plugin/example/example.chart.sh @@ -0,0 +1,123 @@ +# shellcheck shell=bash +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# + +# if this chart is called X.chart.sh, then all functions and global variables +# must start with X_ + +# _update_every is a special variable - it holds the number of seconds +# between the calls of the _update() function +example_update_every= + +# the priority is used to sort the charts on the dashboard +# 1 = the first chart +example_priority=150000 + +# to enable this chart, you have to set this to 12345 +# (just a demonstration for something that needs to be checked) +example_magic_number= + +# global variables to store our collected data +# remember: they need to start with the module name example_ +example_value1= +example_value2= +example_value3= +example_value4= +example_last=0 +example_count=0 + +example_get() { + # do all the work to collect / calculate the values + # for each dimension + # + # Remember: + # 1. KEEP IT SIMPLE AND SHORT + # 2. AVOID FORKS (avoid piping commands) + # 3. AVOID CALLING TOO MANY EXTERNAL PROGRAMS + # 4. USE LOCAL VARIABLES (global variables may overlap with other modules) + + example_value1=$RANDOM + example_value2=$RANDOM + example_value3=$RANDOM + example_value4=$((8192 + (RANDOM * 16383 / 32767))) + + if [ $example_count -gt 0 ]; then + example_count=$((example_count - 1)) + + [ $example_last -gt 16383 ] && example_value4=$((example_last + (RANDOM * ((32767 - example_last) / 2) / 32767))) + [ $example_last -le 16383 ] && example_value4=$((example_last - (RANDOM * (example_last / 2) / 32767))) + else + example_count=$((1 + (RANDOM * 5 / 32767))) + + if [ $example_last -gt 16383 ] && [ $example_value4 -gt 16383 ]; then + example_value4=$((example_value4 - 16383)) + fi + if [ $example_last -le 16383 ] && [ $example_value4 -lt 16383 ]; then + example_value4=$((example_value4 + 16383)) + fi + fi + example_last=$example_value4 + + # this should return: + # - 0 to send the data to netdata + # - 1 to report a failure to collect the data + + return 0 +} + +# _check is called once, to find out if this chart should be enabled or not +example_check() { + # this should return: + # - 0 to enable the chart + # - 1 to disable the chart + + # check something + [ "${example_magic_number}" != "12345" ] && error "manual configuration required: you have to set example_magic_number=$example_magic_number in example.conf to start example chart." && return 1 + + # check that we can collect data + example_get || return 1 + + return 0 +} + +# _create is called once, to create the charts +example_create() { + # create the chart with 3 dimensions + cat << EOF +CHART example.random '' "Random Numbers Stacked Chart" "% of random numbers" random random stacked $((example_priority)) $example_update_every '' '' 'example' +DIMENSION random1 '' percentage-of-absolute-row 1 1 +DIMENSION random2 '' percentage-of-absolute-row 1 1 +DIMENSION random3 '' percentage-of-absolute-row 1 1 +CHART example.random2 '' "A random number" "random number" random random area $((example_priority + 1)) $example_update_every '' '' 'example' +DIMENSION random '' absolute 1 1 +EOF + + return 0 +} + +# _update is called continuously, to collect the values +example_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + example_get || return 1 + + # write the result of the work. + cat << VALUESEOF +BEGIN example.random $1 +SET random1 = $example_value1 +SET random2 = $example_value2 +SET random3 = $example_value3 +END +BEGIN example.random2 $1 +SET random = $example_value4 +END +VALUESEOF + + return 0 +} diff --git a/collectors/charts.d.plugin/example/example.conf b/collectors/charts.d.plugin/example/example.conf new file mode 100644 index 0000000..6232ca5 --- /dev/null +++ b/collectors/charts.d.plugin/example/example.conf @@ -0,0 +1,21 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ + +# to enable this chart, you have to set this to 12345 +# (just a demonstration for something that needs to be checked) +#example_magic_number=12345 + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#example_update_every= + +# the charts priority on the dashboard +#example_priority=150000 + +# the number of retries to do in case of failure +# before disabling the module +#example_retries=10 diff --git a/collectors/charts.d.plugin/libreswan/Makefile.inc b/collectors/charts.d.plugin/libreswan/Makefile.inc new file mode 100644 index 0000000..af767d0 --- /dev/null +++ b/collectors/charts.d.plugin/libreswan/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += libreswan/libreswan.chart.sh +dist_chartsconfig_DATA += libreswan/libreswan.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += libreswan/README.md libreswan/Makefile.inc + diff --git a/collectors/charts.d.plugin/libreswan/README.md b/collectors/charts.d.plugin/libreswan/README.md new file mode 100644 index 0000000..41c4e24 --- /dev/null +++ b/collectors/charts.d.plugin/libreswan/README.md @@ -0,0 +1,56 @@ + + +# Libreswan IPSec tunnel monitoring with Netdata + +Collects bytes-in, bytes-out and uptime for all established libreswan IPSEC tunnels. + +The following charts are created, **per tunnel**: + +1. **Uptime** + +- the uptime of the tunnel + +2. **Traffic** + +- bytes in +- bytes out + +## Configuration + +Edit the `charts.d/libreswan.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d/libreswan.conf +``` + +The plugin executes 2 commands to collect all the information it needs: + +```sh +ipsec whack --status +ipsec whack --trafficstatus +``` + +The first command is used to extract the currently established tunnels, their IDs and their names. +The second command is used to extract the current uptime and traffic. + +Most probably user `netdata` will not be able to query libreswan, so the `ipsec` commands will be denied. +The plugin attempts to run `ipsec` as `sudo ipsec ...`, to get access to libreswan statistics. + +To allow user `netdata` execute `sudo ipsec ...`, create the file `/etc/sudoers.d/netdata` with this content: + +``` +netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status +netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus +``` + +Make sure the path `/sbin/ipsec` matches your setup (execute `which ipsec` to find the right path). + +--- + + diff --git a/collectors/charts.d.plugin/libreswan/libreswan.chart.sh b/collectors/charts.d.plugin/libreswan/libreswan.chart.sh new file mode 100644 index 0000000..d526f7a --- /dev/null +++ b/collectors/charts.d.plugin/libreswan/libreswan.chart.sh @@ -0,0 +1,187 @@ +# shellcheck shell=bash disable=SC1117 +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# + +# _update_every is a special variable - it holds the number of seconds +# between the calls of the _update() function +libreswan_update_every=1 + +# the priority is used to sort the charts on the dashboard +# 1 = the first chart +libreswan_priority=90000 + +# set to 1, to run ipsec with sudo +libreswan_sudo=1 + +# global variables to store our collected data + +# [TUNNELID] = TUNNELNAME +# here we track the *latest* established tunnels +# as detected by: ipsec whack --status +declare -A libreswan_connected_tunnels=() + +# [TUNNELID] = VALUE +# here we track values of all established tunnels (not only the latest) +# as detected by: ipsec whack --trafficstatus +declare -A libreswan_traffic_in=() +declare -A libreswan_traffic_out=() +declare -A libreswan_established_add_time=() + +# [TUNNELNAME] = CHARTID +# here we remember CHARTIDs of all tunnels +# we need this to avoid converting tunnel names to chart IDs on every iteration +declare -A libreswan_tunnel_charts=() + +is_able_sudo_ipsec() { + if ! sudo -n -l "${IPSEC_CMD}" whack --status > /dev/null 2>&1; then + return 1 + fi + if ! sudo -n -l "${IPSEC_CMD}" whack --trafficstatus > /dev/null 2>&1; then + return 1 + fi + return 0 +} + +# run the ipsec command +libreswan_ipsec() { + if [ ${libreswan_sudo} -ne 0 ]; then + sudo -n "${IPSEC_CMD}" "${@}" + return $? + else + "${IPSEC_CMD}" "${@}" + return $? + fi +} + +# fetch latest values - fill the arrays +libreswan_get() { + # do all the work to collect / calculate the values + # for each dimension + + # empty the variables + libreswan_traffic_in=() + libreswan_traffic_out=() + libreswan_established_add_time=() + libreswan_connected_tunnels=() + + # convert the ipsec command output to a shell script + # and source it to get the values + # shellcheck disable=SC1090 + source <( + { + libreswan_ipsec whack --status + libreswan_ipsec whack --trafficstatus + } | sed -n \ + -e "s|[0-9]\+ #\([0-9]\+\): \"\(.*\)\".*IPsec SA established.*newest IPSEC.*|libreswan_connected_tunnels[\"\1\"]=\"\2\"|p" \ + -e "s|[0-9]\+ #\([0-9]\+\): \"\(.*\)\",\{0,1\}.* add_time=\([0-9]\+\),.* inBytes=\([0-9]\+\),.* outBytes=\([0-9]\+\).*|libreswan_traffic_in[\"\1\"]=\"\4\"; libreswan_traffic_out[\"\1\"]=\"\5\"; libreswan_established_add_time[\"\1\"]=\"\3\";|p" + ) || return 1 + + # check we got some data + [ ${#libreswan_connected_tunnels[@]} -eq 0 ] && return 1 + + return 0 +} + +# _check is called once, to find out if this chart should be enabled or not +libreswan_check() { + # this should return: + # - 0 to enable the chart + # - 1 to disable the chart + + require_cmd ipsec || return 1 + + # make sure it is libreswan + # shellcheck disable=SC2143 + if [ -z "$(ipsec --version | grep -i libreswan)" ]; then + error "ipsec command is not Libreswan. Disabling Libreswan plugin." + return 1 + fi + + if [ ${libreswan_sudo} -ne 0 ] && ! is_able_sudo_ipsec; then + error "not enough permissions to execute ipsec with sudo. Disabling Libreswan plugin." + return 1 + fi + + # check that we can collect data + libreswan_get || return 1 + + return 0 +} + +# create the charts for an ipsec tunnel +libreswan_create_one() { + local n="${1}" name + + name="${libreswan_connected_tunnels[${n}]}" + + [ -n "${libreswan_tunnel_charts[${name}]}" ] && return 0 + + libreswan_tunnel_charts[${name}]="$(fixid "${name}")" + + cat << EOF +CHART libreswan.${libreswan_tunnel_charts[${name}]}_net '${name}_net' "LibreSWAN Tunnel ${name} Traffic" "kilobits/s" "${name}" libreswan.net area $((libreswan_priority)) $libreswan_update_every '' '' 'libreswan' +DIMENSION in '' incremental 8 1000 +DIMENSION out '' incremental -8 1000 +CHART libreswan.${libreswan_tunnel_charts[${name}]}_uptime '${name}_uptime' "LibreSWAN Tunnel ${name} Uptime" "seconds" "${name}" libreswan.uptime line $((libreswan_priority + 1)) $libreswan_update_every '' '' 'libreswan' +DIMENSION uptime '' absolute 1 1 +EOF + + return 0 + +} + +# _create is called once, to create the charts +libreswan_create() { + local n + for n in "${!libreswan_connected_tunnels[@]}"; do + libreswan_create_one "${n}" + done + return 0 +} + +libreswan_now=$(date +%s) + +# send the values to netdata for an ipsec tunnel +libreswan_update_one() { + local n="${1}" microseconds="${2}" name id uptime + + name="${libreswan_connected_tunnels[${n}]}" + id="${libreswan_tunnel_charts[${name}]}" + + [ -z "${id}" ] && libreswan_create_one "${name}" + + uptime=$((libreswan_now - libreswan_established_add_time[${n}])) + [ ${uptime} -lt 0 ] && uptime=0 + + # write the result of the work. + cat << VALUESEOF +BEGIN libreswan.${id}_net ${microseconds} +SET in = ${libreswan_traffic_in[${n}]} +SET out = ${libreswan_traffic_out[${n}]} +END +BEGIN libreswan.${id}_uptime ${microseconds} +SET uptime = ${uptime} +END +VALUESEOF +} + +# _update is called continuously, to collect the values +libreswan_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + libreswan_get || return 1 + libreswan_now=$(date +%s) + + local n + for n in "${!libreswan_connected_tunnels[@]}"; do + libreswan_update_one "${n}" "${@}" + done + + return 0 +} diff --git a/collectors/charts.d.plugin/libreswan/libreswan.conf b/collectors/charts.d.plugin/libreswan/libreswan.conf new file mode 100644 index 0000000..9b3ee77 --- /dev/null +++ b/collectors/charts.d.plugin/libreswan/libreswan.conf @@ -0,0 +1,29 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ +# + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#libreswan_update_every=1 + +# the charts priority on the dashboard +#libreswan_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#libreswan_retries=10 + +# set to 1, to run ipsec with sudo (the default) +# set to 0, to run ipsec without sudo +#libreswan_sudo=1 + +# TO ALLOW NETDATA RUN ipsec AS ROOT +# CREATE THE FILE: /etc/sudoers.d/netdata +# WITH THESE 2 LINES (uncommented of course): +# +# netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status +# netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus diff --git a/collectors/charts.d.plugin/loopsleepms.sh.inc b/collectors/charts.d.plugin/loopsleepms.sh.inc new file mode 100644 index 0000000..5608b8d --- /dev/null +++ b/collectors/charts.d.plugin/loopsleepms.sh.inc @@ -0,0 +1,227 @@ +# no need for shebang - this file is included from other scripts +# SPDX-License-Identifier: GPL-3.0-or-later + +LOOPSLEEP_DATE="$(which date 2>/dev/null || command -v date 2>/dev/null)" +if [ -z "$LOOPSLEEP_DATE" ]; then + echo >&2 "$0: ERROR: Cannot find the command 'date' in the system path." + exit 1 +fi + +# ----------------------------------------------------------------------------- +# use the date command as a high resolution timer + +# macOS 'date' doesn't support '%N' precision +# echo $(/bin/date +"%N") is "N" +if [ "$($LOOPSLEEP_DATE +"%N")" = "N" ]; then + LOOPSLEEP_DATE_FORMAT="%s * 1000" +else + LOOPSLEEP_DATE_FORMAT="%s * 1000 + 10#%-N / 1000000" +fi + +now_ms= +LOOPSLEEPMS_HIGHRES=1 +test "$($LOOPSLEEP_DATE +%N)" = "%N" && LOOPSLEEPMS_HIGHRES=0 +test -z "$($LOOPSLEEP_DATE +%N)" && LOOPSLEEPMS_HIGHRES=0 +current_time_ms_from_date() { + if [ $LOOPSLEEPMS_HIGHRES -eq 0 ]; then + now_ms="$($LOOPSLEEP_DATE +'%s')000" + else + now_ms="$(($($LOOPSLEEP_DATE +"$LOOPSLEEP_DATE_FORMAT")))" + fi +} + +# ----------------------------------------------------------------------------- +# use /proc/uptime as a high resolution timer + +current_time_ms_from_date +current_time_ms_from_uptime_started="${now_ms}" +current_time_ms_from_uptime_last="${now_ms}" +current_time_ms_from_uptime_first=0 +current_time_ms_from_uptime() { + local up rest arr=() n + + read up rest &2 "$0: Cannot read /proc/uptime - falling back to current_time_ms_from_date()." + current_time_ms="current_time_ms_from_date" + current_time_ms_from_date + current_time_ms_accuracy=1 + return + fi + + arr=(${up//./ }) + + if [ ${#arr[1]} -lt 1 ]; then + n="${arr[0]}000" + elif [ ${#arr[1]} -lt 2 ]; then + n="${arr[0]}${arr[1]}00" + elif [ ${#arr[1]} -lt 3 ]; then + n="${arr[0]}${arr[1]}0" + else + n="${arr[0]}${arr[1]}" + fi + + now_ms=$((current_time_ms_from_uptime_started - current_time_ms_from_uptime_first + n)) + + if [ "${now_ms}" -lt "${current_time_ms_from_uptime_last}" ]; then + echo >&2 "$0: Cannot use current_time_ms_from_uptime() - new time ${now_ms} is older than the last ${current_time_ms_from_uptime_last} - falling back to current_time_ms_from_date()." + current_time_ms="current_time_ms_from_date" + current_time_ms_from_date + current_time_ms_accuracy=1 + fi + + current_time_ms_from_uptime_last="${now_ms}" +} +current_time_ms_from_uptime +current_time_ms_from_uptime_first="$((now_ms - current_time_ms_from_uptime_started))" +current_time_ms_from_uptime_last="${current_time_ms_from_uptime_first}" +current_time_ms="current_time_ms_from_uptime" +current_time_ms_accuracy=10 +if [ "${current_time_ms_from_uptime_first}" -eq 0 ]; then + echo >&2 "$0: Invalid setup for current_time_ms_from_uptime() - falling back to current_time_ms_from_date()." + current_time_ms="current_time_ms_from_date" + current_time_ms_accuracy=1 +fi + +# ----------------------------------------------------------------------------- +# use read with timeout for sleep + +mysleep="" + +mysleep_fifo="${NETDATA_CACHE_DIR-/tmp}/.netdata_bash_sleep_timer_fifo" +[ -f "${mysleep_fifo}" ] && rm "${mysleep_fifo}" +[ ! -p "${mysleep_fifo}" ] && mkfifo "${mysleep_fifo}" +[ -p "${mysleep_fifo}" ] && mysleep="mysleep_read" + +mysleep_read() { + read -t "${1}" <>"${mysleep_fifo}" + ret=$? + if [ $ret -le 128 ]; then + echo >&2 "$0: Cannot use read for sleeping (return code ${ret})." + mysleep="sleep" + ${mysleep} "${1}" + fi +} + +# ----------------------------------------------------------------------------- +# use bash loadable module for sleep + +mysleep_builtin() { + builtin sleep "${1}" + ret=$? + if [ $ret -ne 0 ]; then + echo >&2 "$0: Cannot use builtin sleep for sleeping (return code ${ret})." + mysleep="sleep" + ${mysleep} "${1}" + fi +} + +if [ -z "${mysleep}" -a "$((BASH_VERSINFO[0] + 0))" -ge 3 -a "${NETDATA_BASH_LOADABLES}" != "DISABLE" ]; then + # enable modules only for bash version 3+ + + for bash_modules_path in ${BASH_LOADABLES_PATH//:/ } "$(pkg-config bash --variable=loadablesdir 2>/dev/null)" "/usr/lib/bash" "/lib/bash" "/lib64/bash" "/usr/local/lib/bash" "/usr/local/lib64/bash"; do + [ -z "${bash_modules_path}" -o ! -d "${bash_modules_path}" ] && continue + + # check for sleep + for bash_module_sleep in "sleep" "sleep.so"; do + if [ -f "${bash_modules_path}/${bash_module_sleep}" ]; then + if enable -f "${bash_modules_path}/${bash_module_sleep}" sleep 2>/dev/null; then + mysleep="mysleep_builtin" + # echo >&2 "$0: Using bash loadable ${bash_modules_path}/${bash_module_sleep} for sleep" + break + fi + fi + + done + + [ ! -z "${mysleep}" ] && break + done +fi + +# ----------------------------------------------------------------------------- +# fallback to external sleep + +[ -z "${mysleep}" ] && mysleep="sleep" + +# ----------------------------------------------------------------------------- +# this function is used to sleep a fraction of a second +# it calculates the difference between every time is called +# and tries to align the sleep time to give you exactly the +# loop you need. + +LOOPSLEEPMS_LASTRUN=0 +LOOPSLEEPMS_NEXTRUN=0 +LOOPSLEEPMS_LASTSLEEP=0 +LOOPSLEEPMS_LASTWORK=0 + +loopsleepms() { + local tellwork=0 t="${1}" div s m now mstosleep + + if [ "${t}" = "tellwork" ]; then + tellwork=1 + shift + t="${1}" + fi + + # $t = the time in seconds to wait + + # if high resolution is not supported + # just sleep the time requested, in seconds + if [ ${LOOPSLEEPMS_HIGHRES} -eq 0 ]; then + sleep ${t} + return + fi + + # get the current time, in ms in ${now_ms} + ${current_time_ms} + + # calculate ms since last run + [ ${LOOPSLEEPMS_LASTRUN} -gt 0 ] && + LOOPSLEEPMS_LASTWORK=$((now_ms - LOOPSLEEPMS_LASTRUN - LOOPSLEEPMS_LASTSLEEP + current_time_ms_accuracy)) + # echo "# last loop's work took $LOOPSLEEPMS_LASTWORK ms" + + # remember this run + LOOPSLEEPMS_LASTRUN=${now_ms} + + # calculate the next run + LOOPSLEEPMS_NEXTRUN=$(((now_ms - (now_ms % (t * 1000))) + (t * 1000))) + + # calculate ms to sleep + mstosleep=$((LOOPSLEEPMS_NEXTRUN - now_ms + current_time_ms_accuracy)) + # echo "# mstosleep is $mstosleep ms" + + # if we are too slow, sleep some time + test ${mstosleep} -lt 200 && mstosleep=200 + + s=$((mstosleep / 1000)) + m=$((mstosleep - (s * 1000))) + [ "${m}" -lt 100 ] && m="0${m}" + [ "${m}" -lt 10 ] && m="0${m}" + + test $tellwork -eq 1 && echo >&2 " >>> PERFORMANCE >>> WORK TOOK ${LOOPSLEEPMS_LASTWORK} ms ( $((LOOPSLEEPMS_LASTWORK * 100 / 1000)).$((LOOPSLEEPMS_LASTWORK % 10))% cpu ) >>> SLEEPING ${mstosleep} ms" + + # echo "# sleeping ${s}.${m}" + # echo + ${mysleep} ${s}.${m} + + # keep the values we need + # for our next run + LOOPSLEEPMS_LASTSLEEP=$mstosleep +} + +# test it +#while [ 1 ] +#do +# r=$(( (RANDOM * 2000 / 32767) )) +# s=$((r / 1000)) +# m=$((r - (s * 1000))) +# [ "${m}" -lt 100 ] && m="0${m}" +# [ "${m}" -lt 10 ] && m="0${m}" +# echo "${r} = ${s}.${m}" +# +# # the work +# ${mysleep} ${s}.${m} +# +# # the alignment loop +# loopsleepms tellwork 1 +#done diff --git a/collectors/charts.d.plugin/nut/Makefile.inc b/collectors/charts.d.plugin/nut/Makefile.inc new file mode 100644 index 0000000..4fb4714 --- /dev/null +++ b/collectors/charts.d.plugin/nut/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += nut/nut.chart.sh +dist_chartsconfig_DATA += nut/nut.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += nut/README.md nut/Makefile.inc + diff --git a/collectors/charts.d.plugin/nut/README.md b/collectors/charts.d.plugin/nut/README.md new file mode 100644 index 0000000..69d7622 --- /dev/null +++ b/collectors/charts.d.plugin/nut/README.md @@ -0,0 +1,74 @@ + + +# UPS/PDU monitoring with Netdata + +Collects UPS data for all power devices configured in the system. + +The following charts will be created: + +1. **UPS Charge** + +- percentage changed + +2. **UPS Battery Voltage** + +- current voltage +- high voltage +- low voltage +- nominal voltage + +3. **UPS Input Voltage** + +- current voltage +- fault voltage +- nominal voltage + +4. **UPS Input Current** + +- nominal current + +5. **UPS Input Frequency** + +- current frequency +- nominal frequency + +6. **UPS Output Voltage** + +- current voltage + +7. **UPS Load** + +- current load + +8. **UPS Temperature** + +- current temperature + +## Configuration + +Edit the `charts.d/nut.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d/nut.conf +``` + +This is the internal default for `charts.d/nut.conf` + +```sh +# a space separated list of UPS names +# if empty, the list returned by 'upsc -l' will be used +nut_ups= + +# how frequently to collect UPS data +nut_update_every=2 +``` + +--- + + diff --git a/collectors/charts.d.plugin/nut/nut.chart.sh b/collectors/charts.d.plugin/nut/nut.chart.sh new file mode 100644 index 0000000..2f7e3f3 --- /dev/null +++ b/collectors/charts.d.plugin/nut/nut.chart.sh @@ -0,0 +1,232 @@ +# shellcheck shell=bash +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016-2017 Costa Tsaousis +# + +# a space separated list of UPS names +# if empty, the list returned by 'upsc -l' will be used +nut_ups= + +# how frequently to collect UPS data +nut_update_every=2 + +# how much time in seconds, to wait for nut to respond +nut_timeout=2 + +# set this to 1, to enable another chart showing the number +# of UPS clients connected to upsd +nut_clients_chart=0 + +# the priority of nut related to other charts +nut_priority=90000 + +declare -A nut_ids=() +declare -A nut_names=() + +nut_get_all() { + run -t $nut_timeout upsc -l +} + +nut_get() { + run -t $nut_timeout upsc "$1" + + if [ "${nut_clients_chart}" -eq "1" ]; then + printf "ups.connected_clients: " + run -t $nut_timeout upsc -c "$1" | wc -l + fi +} + +nut_check() { + + # this should return: + # - 0 to enable the chart + # - 1 to disable the chart + + local x + + require_cmd upsc || return 1 + + [ -z "$nut_ups" ] && nut_ups="$(nut_get_all)" + + for x in $nut_ups; do + nut_get "$x" > /dev/null + # shellcheck disable=SC2181 + if [ $? -eq 0 ]; then + if [ -n "${nut_names[${x}]}" ]; then + nut_ids[$x]="$(fixid "${nut_names[${x}]}")" + else + nut_ids[$x]="$(fixid "$x")" + fi + continue + fi + error "cannot get information for NUT UPS '$x'." + done + + if [ ${#nut_ids[@]} -eq 0 ]; then + # shellcheck disable=SC2154 + error "Cannot find UPSes - please set nut_ups='ups_name' in $confd/nut.conf" + return 1 + fi + + return 0 +} + +nut_create() { + # create the charts + local x + + for x in "${nut_ids[@]}"; do + cat << EOF +CHART nut_$x.charge '' "UPS Charge" "percentage" ups nut.charge area $((nut_priority + 1)) $nut_update_every +DIMENSION battery_charge charge absolute 1 100 + +CHART nut_$x.runtime '' "UPS Runtime" "seconds" ups nut.runtime area $((nut_priority + 2)) $nut_update_every +DIMENSION battery_runtime runtime absolute 1 100 + +CHART nut_$x.battery_voltage '' "UPS Battery Voltage" "Volts" ups nut.battery.voltage line $((nut_priority + 3)) $nut_update_every +DIMENSION battery_voltage voltage absolute 1 100 +DIMENSION battery_voltage_high high absolute 1 100 +DIMENSION battery_voltage_low low absolute 1 100 +DIMENSION battery_voltage_nominal nominal absolute 1 100 + +CHART nut_$x.input_voltage '' "UPS Input Voltage" "Volts" input nut.input.voltage line $((nut_priority + 4)) $nut_update_every +DIMENSION input_voltage voltage absolute 1 100 +DIMENSION input_voltage_fault fault absolute 1 100 +DIMENSION input_voltage_nominal nominal absolute 1 100 + +CHART nut_$x.input_current '' "UPS Input Current" "Ampere" input nut.input.current line $((nut_priority + 5)) $nut_update_every +DIMENSION input_current_nominal nominal absolute 1 100 + +CHART nut_$x.input_frequency '' "UPS Input Frequency" "Hz" input nut.input.frequency line $((nut_priority + 6)) $nut_update_every +DIMENSION input_frequency frequency absolute 1 100 +DIMENSION input_frequency_nominal nominal absolute 1 100 + +CHART nut_$x.output_voltage '' "UPS Output Voltage" "Volts" output nut.output.voltage line $((nut_priority + 7)) $nut_update_every +DIMENSION output_voltage voltage absolute 1 100 + +CHART nut_$x.load '' "UPS Load" "percentage" ups nut.load area $((nut_priority)) $nut_update_every +DIMENSION load load absolute 1 100 + +CHART nut_$x.temp '' "UPS Temperature" "temperature" ups nut.temperature line $((nut_priority + 8)) $nut_update_every +DIMENSION temp temp absolute 1 100 +EOF + + if [ "${nut_clients_chart}" = "1" ]; then + cat << EOF2 +CHART nut_$x.clients '' "UPS Connected Clients" "clients" ups nut.clients area $((nut_priority + 9)) $nut_update_every +DIMENSION clients '' absolute 1 1 +EOF2 + fi + + done + + return 0 +} + +nut_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + # do all the work to collect / calculate the values + # for each dimension + # remember: KEEP IT SIMPLE AND SHORT + + local i x + for i in "${!nut_ids[@]}"; do + x="${nut_ids[$i]}" + nut_get "$i" | awk " +BEGIN { + battery_charge = 0; + battery_runtime = 0; + battery_voltage = 0; + battery_voltage_high = 0; + battery_voltage_low = 0; + battery_voltage_nominal = 0; + input_voltage = 0; + input_voltage_fault = 0; + input_voltage_nominal = 0; + input_current_nominal = 0; + input_frequency = 0; + input_frequency_nominal = 0; + output_voltage = 0; + load = 0; + temp = 0; + client = 0; + do_clients = ${nut_clients_chart}; +} +/^battery.charge: .*/ { battery_charge = \$2 * 100 }; +/^battery.runtime: .*/ { battery_runtime = \$2 * 100 }; +/^battery.voltage: .*/ { battery_voltage = \$2 * 100 }; +/^battery.voltage.high: .*/ { battery_voltage_high = \$2 * 100 }; +/^battery.voltage.low: .*/ { battery_voltage_low = \$2 * 100 }; +/^battery.voltage.nominal: .*/ { battery_voltage_nominal = \$2 * 100 }; +/^input.voltage: .*/ { input_voltage = \$2 * 100 }; +/^input.voltage.fault: .*/ { input_voltage_fault = \$2 * 100 }; +/^input.voltage.nominal: .*/ { input_voltage_nominal = \$2 * 100 }; +/^input.current.nominal: .*/ { input_current_nominal = \$2 * 100 }; +/^input.frequency: .*/ { input_frequency = \$2 * 100 }; +/^input.frequency.nominal: .*/ { input_frequency_nominal = \$2 * 100 }; +/^output.voltage: .*/ { output_voltage = \$2 * 100 }; +/^ups.load: .*/ { load = \$2 * 100 }; +/^ups.temperature: .*/ { temp = \$2 * 100 }; +/^ups.connected_clients: .*/ { clients = \$2 }; +END { + print \"BEGIN nut_$x.charge $1\"; + print \"SET battery_charge = \" battery_charge; + print \"END\" + + print \"BEGIN nut_$x.runtime $1\"; + print \"SET battery_runtime = \" battery_runtime; + print \"END\" + + print \"BEGIN nut_$x.battery_voltage $1\"; + print \"SET battery_voltage = \" battery_voltage; + print \"SET battery_voltage_high = \" battery_voltage_high; + print \"SET battery_voltage_low = \" battery_voltage_low; + print \"SET battery_voltage_nominal = \" battery_voltage_nominal; + print \"END\" + + print \"BEGIN nut_$x.input_voltage $1\"; + print \"SET input_voltage = \" input_voltage; + print \"SET input_voltage_fault = \" input_voltage_fault; + print \"SET input_voltage_nominal = \" input_voltage_nominal; + print \"END\" + + print \"BEGIN nut_$x.input_current $1\"; + print \"SET input_current_nominal = \" input_current_nominal; + print \"END\" + + print \"BEGIN nut_$x.input_frequency $1\"; + print \"SET input_frequency = \" input_frequency; + print \"SET input_frequency_nominal = \" input_frequency_nominal; + print \"END\" + + print \"BEGIN nut_$x.output_voltage $1\"; + print \"SET output_voltage = \" output_voltage; + print \"END\" + + print \"BEGIN nut_$x.load $1\"; + print \"SET load = \" load; + print \"END\" + + print \"BEGIN nut_$x.temp $1\"; + print \"SET temp = \" temp; + print \"END\" + + if(do_clients) { + print \"BEGIN nut_$x.clients $1\"; + print \"SET clients = \" clients; + print \"END\" + } +}" + # shellcheck disable=2181 + [ $? -ne 0 ] && unset "nut_ids[$i]" && error "failed to get values for '$i', disabling it." + done + + [ ${#nut_ids[@]} -eq 0 ] && error "no UPSes left active." && return 1 + return 0 +} diff --git a/collectors/charts.d.plugin/nut/nut.conf b/collectors/charts.d.plugin/nut/nut.conf new file mode 100644 index 0000000..b95ad90 --- /dev/null +++ b/collectors/charts.d.plugin/nut/nut.conf @@ -0,0 +1,33 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ + +# a space separated list of UPS names +# if empty, the list returned by 'upsc -l' will be used +#nut_ups= + +# each line represents an alias for one UPS +# if empty, the FQDN will be used +#nut_names["FQDN1"]="alias" +#nut_names["FQDN2"]="alias" + +# how much time in seconds, to wait for nut to respond +#nut_timeout=2 + +# set this to 1, to enable another chart showing the number +# of UPS clients connected to upsd +#nut_clients_chart=1 + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#nut_update_every=2 + +# the charts priority on the dashboard +#nut_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#nut_retries=10 diff --git a/collectors/charts.d.plugin/opensips/Makefile.inc b/collectors/charts.d.plugin/opensips/Makefile.inc new file mode 100644 index 0000000..a7b5d3a --- /dev/null +++ b/collectors/charts.d.plugin/opensips/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += opensips/opensips.chart.sh +dist_chartsconfig_DATA += opensips/opensips.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += opensips/README.md opensips/Makefile.inc + diff --git a/collectors/charts.d.plugin/opensips/README.md b/collectors/charts.d.plugin/opensips/README.md new file mode 100644 index 0000000..b08d192 --- /dev/null +++ b/collectors/charts.d.plugin/opensips/README.md @@ -0,0 +1,19 @@ + + +# OpenSIPS monitoring with Netdata + +## Configuration + +Edit the `charts.d/opensips.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d/opensips.conf +``` + + diff --git a/collectors/charts.d.plugin/opensips/opensips.chart.sh b/collectors/charts.d.plugin/opensips/opensips.chart.sh new file mode 100644 index 0000000..02401fd --- /dev/null +++ b/collectors/charts.d.plugin/opensips/opensips.chart.sh @@ -0,0 +1,325 @@ +# shellcheck shell=bash disable=SC1117,SC2154,SC2086 +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# + +opensips_opts="fifo get_statistics all" +opensips_cmd= +opensips_update_every=5 +opensips_timeout=2 +opensips_priority=80000 + +opensips_get_stats() { + run -t $opensips_timeout "$opensips_cmd" $opensips_opts | + grep "^\(core\|dialog\|net\|registrar\|shmem\|siptrace\|sl\|tm\|uri\|usrloc\):[a-zA-Z0-9_-]\+[[:space:]]*[=:]\+[[:space:]]*[0-9]\+[[:space:]]*$" | + sed \ + -e "s|[[:space:]]*[=:]\+[[:space:]]*\([0-9]\+\)[[:space:]]*$|=\1|g" \ + -e "s|[[:space:]:-]\+|_|g" \ + -e "s|^|opensips_|g" + + local ret=$? + [ $ret -ne 0 ] && echo "opensips_command_failed=1" + return $ret +} + +opensips_check() { + # if the user did not provide an opensips_cmd + # try to find it in the system + if [ -z "$opensips_cmd" ]; then + require_cmd opensipsctl || return 1 + opensips_cmd="$OPENSIPSCTL_CMD" + fi + + # check once if the command works + local x + x="$(opensips_get_stats | grep "^opensips_core_")" + # shellcheck disable=SC2181 + if [ ! $? -eq 0 ] || [ -z "$x" ]; then + error "cannot get global status. Please set opensips_opts='options' whatever needed to get connected to opensips server, in $confd/opensips.conf" + return 1 + fi + + return 0 +} + +opensips_create() { + # create the charts + cat << EOF +CHART opensips.dialogs_active '' "OpenSIPS Active Dialogs" "dialogs" dialogs '' area $((opensips_priority + 1)) $opensips_update_every '' '' 'opensips' +DIMENSION dialog_active_dialogs active absolute 1 1 +DIMENSION dialog_early_dialogs early absolute -1 1 + +CHART opensips.users '' "OpenSIPS Users" "users" users '' line $((opensips_priority + 2)) $opensips_update_every '' '' 'opensips' +DIMENSION usrloc_registered_users registered absolute 1 1 +DIMENSION usrloc_location_users location absolute 1 1 +DIMENSION usrloc_location_contacts contacts absolute 1 1 +DIMENSION usrloc_location_expires expires incremental -1 1 + +CHART opensips.registrar '' "OpenSIPS Registrar" "registrations/s" registrar '' line $((opensips_priority + 3)) $opensips_update_every '' '' 'opensips' +DIMENSION registrar_accepted_regs accepted incremental 1 1 +DIMENSION registrar_rejected_regs rejected incremental -1 1 + +CHART opensips.transactions '' "OpenSIPS Transactions" "transactions/s" transactions '' line $((opensips_priority + 4)) $opensips_update_every '' '' 'opensips' +DIMENSION tm_UAS_transactions UAS incremental 1 1 +DIMENSION tm_UAC_transactions UAC incremental -1 1 + +CHART opensips.core_rcv '' "OpenSIPS Core Receives" "queries/s" core '' line $((opensips_priority + 5)) $opensips_update_every '' '' 'opensips' +DIMENSION core_rcv_requests requests incremental 1 1 +DIMENSION core_rcv_replies replies incremental -1 1 + +CHART opensips.core_fwd '' "OpenSIPS Core Forwards" "queries/s" core '' line $((opensips_priority + 6)) $opensips_update_every '' '' 'opensips' +DIMENSION core_fwd_requests requests incremental 1 1 +DIMENSION core_fwd_replies replies incremental -1 1 + +CHART opensips.core_drop '' "OpenSIPS Core Drops" "queries/s" core '' line $((opensips_priority + 7)) $opensips_update_every '' '' 'opensips' +DIMENSION core_drop_requests requests incremental 1 1 +DIMENSION core_drop_replies replies incremental -1 1 + +CHART opensips.core_err '' "OpenSIPS Core Errors" "queries/s" core '' line $((opensips_priority + 8)) $opensips_update_every '' '' 'opensips' +DIMENSION core_err_requests requests incremental 1 1 +DIMENSION core_err_replies replies incremental -1 1 + +CHART opensips.core_bad '' "OpenSIPS Core Bad" "queries/s" core '' line $((opensips_priority + 9)) $opensips_update_every '' '' 'opensips' +DIMENSION core_bad_URIs_rcvd bad_URIs_rcvd incremental 1 1 +DIMENSION core_unsupported_methods unsupported_methods incremental 1 1 +DIMENSION core_bad_msg_hdr bad_msg_hdr incremental 1 1 + +CHART opensips.tm_replies '' "OpenSIPS TM Replies" "replies/s" transactions '' line $((opensips_priority + 10)) $opensips_update_every '' '' 'opensips' +DIMENSION tm_received_replies received incremental 1 1 +DIMENSION tm_relayed_replies relayed incremental 1 1 +DIMENSION tm_local_replies local incremental 1 1 + +CHART opensips.transactions_status '' "OpenSIPS Transactions Status" "transactions/s" transactions '' line $((opensips_priority + 11)) $opensips_update_every '' '' 'opensips' +DIMENSION tm_2xx_transactions 2xx incremental 1 1 +DIMENSION tm_3xx_transactions 3xx incremental 1 1 +DIMENSION tm_4xx_transactions 4xx incremental 1 1 +DIMENSION tm_5xx_transactions 5xx incremental 1 1 +DIMENSION tm_6xx_transactions 6xx incremental 1 1 + +CHART opensips.transactions_inuse '' "OpenSIPS InUse Transactions" "transactions" transactions '' line $((opensips_priority + 12)) $opensips_update_every '' '' 'opensips' +DIMENSION tm_inuse_transactions inuse absolute 1 1 + +CHART opensips.sl_replies '' "OpenSIPS SL Replies" "replies/s" core '' line $((opensips_priority + 13)) $opensips_update_every '' '' 'opensips' +DIMENSION sl_1xx_replies 1xx incremental 1 1 +DIMENSION sl_2xx_replies 2xx incremental 1 1 +DIMENSION sl_3xx_replies 3xx incremental 1 1 +DIMENSION sl_4xx_replies 4xx incremental 1 1 +DIMENSION sl_5xx_replies 5xx incremental 1 1 +DIMENSION sl_6xx_replies 6xx incremental 1 1 +DIMENSION sl_sent_replies sent incremental 1 1 +DIMENSION sl_sent_err_replies error incremental 1 1 +DIMENSION sl_received_ACKs ACKed incremental 1 1 + +CHART opensips.dialogs '' "OpenSIPS Dialogs" "dialogs/s" dialogs '' line $((opensips_priority + 14)) $opensips_update_every '' '' 'opensips' +DIMENSION dialog_processed_dialogs processed incremental 1 1 +DIMENSION dialog_expired_dialogs expired incremental 1 1 +DIMENSION dialog_failed_dialogs failed incremental -1 1 + +CHART opensips.net_waiting '' "OpenSIPS Network Waiting" "kilobytes" net '' line $((opensips_priority + 15)) $opensips_update_every '' '' 'opensips' +DIMENSION net_waiting_udp UDP absolute 1 1024 +DIMENSION net_waiting_tcp TCP absolute 1 1024 + +CHART opensips.uri_checks '' "OpenSIPS URI Checks" "checks / sec" uri '' line $((opensips_priority + 16)) $opensips_update_every '' '' 'opensips' +DIMENSION uri_positive_checks positive incremental 1 1 +DIMENSION uri_negative_checks negative incremental -1 1 + +CHART opensips.traces '' "OpenSIPS Traces" "traces / sec" traces '' line $((opensips_priority + 17)) $opensips_update_every '' '' 'opensips' +DIMENSION siptrace_traced_requests requests incremental 1 1 +DIMENSION siptrace_traced_replies replies incremental -1 1 + +CHART opensips.shmem '' "OpenSIPS Shared Memory" "kilobytes" mem '' line $((opensips_priority + 18)) $opensips_update_every '' '' 'opensips' +DIMENSION shmem_total_size total absolute 1 1024 +DIMENSION shmem_used_size used absolute 1 1024 +DIMENSION shmem_real_used_size real_used absolute 1 1024 +DIMENSION shmem_max_used_size max_used absolute 1 1024 +DIMENSION shmem_free_size free absolute 1 1024 + +CHART opensips.shmem_fragments '' "OpenSIPS Shared Memory Fragmentation" "fragments" mem '' line $((opensips_priority + 19)) $opensips_update_every '' '' 'opensips' +DIMENSION shmem_fragments fragments absolute 1 1 +EOF + + return 0 +} + +opensips_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + # do all the work to collect / calculate the values + # for each dimension + + # 1. get the counters page from opensips + # 2. sed to remove spaces; replace . with _; remove spaces around =; prepend each line with: local opensips_ + # 3. egrep lines starting with: + # local opensips_client_http_ then one or more of these a-z 0-9 _ then = and one of more of 0-9 + # local opensips_server_all_ then one or more of these a-z 0-9 _ then = and one of more of 0-9 + # 4. then execute this as a script with the eval + # be very careful with eval: + # prepare the script and always grep at the end the lines that are useful, so that + # even if something goes wrong, no other code can be executed + + unset \ + opensips_dialog_active_dialogs \ + opensips_dialog_early_dialogs \ + opensips_usrloc_registered_users \ + opensips_usrloc_location_users \ + opensips_usrloc_location_contacts \ + opensips_usrloc_location_expires \ + opensips_registrar_accepted_regs \ + opensips_registrar_rejected_regs \ + opensips_tm_UAS_transactions \ + opensips_tm_UAC_transactions \ + opensips_core_rcv_requests \ + opensips_core_rcv_replies \ + opensips_core_fwd_requests \ + opensips_core_fwd_replies \ + opensips_core_drop_requests \ + opensips_core_drop_replies \ + opensips_core_err_requests \ + opensips_core_err_replies \ + opensips_core_bad_URIs_rcvd \ + opensips_core_unsupported_methods \ + opensips_core_bad_msg_hdr \ + opensips_tm_received_replies \ + opensips_tm_relayed_replies \ + opensips_tm_local_replies \ + opensips_tm_2xx_transactions \ + opensips_tm_3xx_transactions \ + opensips_tm_4xx_transactions \ + opensips_tm_5xx_transactions \ + opensips_tm_6xx_transactions \ + opensips_tm_inuse_transactions \ + opensips_sl_1xx_replies \ + opensips_sl_2xx_replies \ + opensips_sl_3xx_replies \ + opensips_sl_4xx_replies \ + opensips_sl_5xx_replies \ + opensips_sl_6xx_replies \ + opensips_sl_sent_replies \ + opensips_sl_sent_err_replies \ + opensips_sl_received_ACKs \ + opensips_dialog_processed_dialogs \ + opensips_dialog_expired_dialogs \ + opensips_dialog_failed_dialogs \ + opensips_net_waiting_udp \ + opensips_net_waiting_tcp \ + opensips_uri_positive_checks \ + opensips_uri_negative_checks \ + opensips_siptrace_traced_requests \ + opensips_siptrace_traced_replies \ + opensips_shmem_total_size \ + opensips_shmem_used_size \ + opensips_shmem_real_used_size \ + opensips_shmem_max_used_size \ + opensips_shmem_free_size \ + opensips_shmem_fragments + + opensips_command_failed=0 + eval "local $(opensips_get_stats)" + # shellcheck disable=SC2181 + [ $? -ne 0 ] && return 1 + + [ $opensips_command_failed -eq 1 ] && error "failed to get values, disabling." && return 1 + + # write the result of the work. + cat << VALUESEOF +BEGIN opensips.dialogs_active $1 +SET dialog_active_dialogs = $opensips_dialog_active_dialogs +SET dialog_early_dialogs = $opensips_dialog_early_dialogs +END +BEGIN opensips.users $1 +SET usrloc_registered_users = $opensips_usrloc_registered_users +SET usrloc_location_users = $opensips_usrloc_location_users +SET usrloc_location_contacts = $opensips_usrloc_location_contacts +SET usrloc_location_expires = $opensips_usrloc_location_expires +END +BEGIN opensips.registrar $1 +SET registrar_accepted_regs = $opensips_registrar_accepted_regs +SET registrar_rejected_regs = $opensips_registrar_rejected_regs +END +BEGIN opensips.transactions $1 +SET tm_UAS_transactions = $opensips_tm_UAS_transactions +SET tm_UAC_transactions = $opensips_tm_UAC_transactions +END +BEGIN opensips.core_rcv $1 +SET core_rcv_requests = $opensips_core_rcv_requests +SET core_rcv_replies = $opensips_core_rcv_replies +END +BEGIN opensips.core_fwd $1 +SET core_fwd_requests = $opensips_core_fwd_requests +SET core_fwd_replies = $opensips_core_fwd_replies +END +BEGIN opensips.core_drop $1 +SET core_drop_requests = $opensips_core_drop_requests +SET core_drop_replies = $opensips_core_drop_replies +END +BEGIN opensips.core_err $1 +SET core_err_requests = $opensips_core_err_requests +SET core_err_replies = $opensips_core_err_replies +END +BEGIN opensips.core_bad $1 +SET core_bad_URIs_rcvd = $opensips_core_bad_URIs_rcvd +SET core_unsupported_methods = $opensips_core_unsupported_methods +SET core_bad_msg_hdr = $opensips_core_bad_msg_hdr +END +BEGIN opensips.tm_replies $1 +SET tm_received_replies = $opensips_tm_received_replies +SET tm_relayed_replies = $opensips_tm_relayed_replies +SET tm_local_replies = $opensips_tm_local_replies +END +BEGIN opensips.transactions_status $1 +SET tm_2xx_transactions = $opensips_tm_2xx_transactions +SET tm_3xx_transactions = $opensips_tm_3xx_transactions +SET tm_4xx_transactions = $opensips_tm_4xx_transactions +SET tm_5xx_transactions = $opensips_tm_5xx_transactions +SET tm_6xx_transactions = $opensips_tm_6xx_transactions +END +BEGIN opensips.transactions_inuse $1 +SET tm_inuse_transactions = $opensips_tm_inuse_transactions +END +BEGIN opensips.sl_replies $1 +SET sl_1xx_replies = $opensips_sl_1xx_replies +SET sl_2xx_replies = $opensips_sl_2xx_replies +SET sl_3xx_replies = $opensips_sl_3xx_replies +SET sl_4xx_replies = $opensips_sl_4xx_replies +SET sl_5xx_replies = $opensips_sl_5xx_replies +SET sl_6xx_replies = $opensips_sl_6xx_replies +SET sl_sent_replies = $opensips_sl_sent_replies +SET sl_sent_err_replies = $opensips_sl_sent_err_replies +SET sl_received_ACKs = $opensips_sl_received_ACKs +END +BEGIN opensips.dialogs $1 +SET dialog_processed_dialogs = $opensips_dialog_processed_dialogs +SET dialog_expired_dialogs = $opensips_dialog_expired_dialogs +SET dialog_failed_dialogs = $opensips_dialog_failed_dialogs +END +BEGIN opensips.net_waiting $1 +SET net_waiting_udp = $opensips_net_waiting_udp +SET net_waiting_tcp = $opensips_net_waiting_tcp +END +BEGIN opensips.uri_checks $1 +SET uri_positive_checks = $opensips_uri_positive_checks +SET uri_negative_checks = $opensips_uri_negative_checks +END +BEGIN opensips.traces $1 +SET siptrace_traced_requests = $opensips_siptrace_traced_requests +SET siptrace_traced_replies = $opensips_siptrace_traced_replies +END +BEGIN opensips.shmem $1 +SET shmem_total_size = $opensips_shmem_total_size +SET shmem_used_size = $opensips_shmem_used_size +SET shmem_real_used_size = $opensips_shmem_real_used_size +SET shmem_max_used_size = $opensips_shmem_max_used_size +SET shmem_free_size = $opensips_shmem_free_size +END +BEGIN opensips.shmem_fragments $1 +SET shmem_fragments = $opensips_shmem_fragments +END +VALUESEOF + + return 0 +} diff --git a/collectors/charts.d.plugin/opensips/opensips.conf b/collectors/charts.d.plugin/opensips/opensips.conf new file mode 100644 index 0000000..e25111d --- /dev/null +++ b/collectors/charts.d.plugin/opensips/opensips.conf @@ -0,0 +1,21 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ + +#opensips_opts="fifo get_statistics all" +#opensips_cmd= +#opensips_timeout=2 + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#opensips_update_every=5 + +# the charts priority on the dashboard +#opensips_priority=80000 + +# the number of retries to do in case of failure +# before disabling the module +#opensips_retries=10 diff --git a/collectors/charts.d.plugin/sensors/Makefile.inc b/collectors/charts.d.plugin/sensors/Makefile.inc new file mode 100644 index 0000000..f466a1b --- /dev/null +++ b/collectors/charts.d.plugin/sensors/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_charts_DATA += sensors/sensors.chart.sh +dist_chartsconfig_DATA += sensors/sensors.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += sensors/README.md sensors/Makefile.inc + diff --git a/collectors/charts.d.plugin/sensors/README.md b/collectors/charts.d.plugin/sensors/README.md new file mode 100644 index 0000000..1b98b1a --- /dev/null +++ b/collectors/charts.d.plugin/sensors/README.md @@ -0,0 +1,79 @@ + + +# Linux machine sensors monitoring with Netdata + +Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures). +For all other cases use the [Python collector](/collectors/python.d.plugin/sensors), which supports multiple +jobs, is more efficient and performs calculations on top of the kernel provided values. + +This plugin will provide charts for all configured system sensors, by reading sensors directly from the kernel. +The values graphed are the raw hardware values of the sensors. + +The plugin will create Netdata charts for: + +1. **Temperature** +2. **Voltage** +3. **Current** +4. **Power** +5. **Fans Speed** +6. **Energy** +7. **Humidity** + +One chart for every sensor chip found and each of the above will be created. + +## Enable the collector + +The `sensors` collector is disabled by default. To enable it, edit the `charts.d.conf` file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d.conf +``` + +It also needs to be set to "force" to be enabled: + +```shell +# example=force +sensors=force +``` + +## Configuration + +Edit the `charts.d/sensors.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d/sensors.conf +``` + +This is the internal default for `charts.d/sensors.conf` + +```sh +# the directory the kernel keeps sensor data +sensors_sys_dir="${NETDATA_HOST_PREFIX}/sys/devices" + +# how deep in the tree to check for sensor data +sensors_sys_depth=10 + +# if set to 1, the script will overwrite internal +# script functions with code generated ones +# leave to 1, is faster +sensors_source_update=1 + +# how frequently to collect sensor data +# the default is to collect it at every iteration of charts.d +sensors_update_every= + +# array of sensors which are excluded +# the default is to include all +sensors_excluded=() +``` + +--- + + diff --git a/collectors/charts.d.plugin/sensors/sensors.chart.sh b/collectors/charts.d.plugin/sensors/sensors.chart.sh new file mode 100644 index 0000000..0527e1e --- /dev/null +++ b/collectors/charts.d.plugin/sensors/sensors.chart.sh @@ -0,0 +1,250 @@ +# shellcheck shell=bash +# no need for shebang - this file is loaded from charts.d.plugin +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# + +# sensors docs +# https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface + +# if this chart is called X.chart.sh, then all functions and global variables +# must start with X_ + +# the directory the kernel keeps sensor data +sensors_sys_dir="${NETDATA_HOST_PREFIX}/sys/devices" + +# how deep in the tree to check for sensor data +sensors_sys_depth=10 + +# if set to 1, the script will overwrite internal +# script functions with code generated ones +# leave to 1, is faster +sensors_source_update=1 + +# how frequently to collect sensor data +# the default is to collect it at every iteration of charts.d +sensors_update_every= + +sensors_priority=90000 + +declare -A sensors_excluded=() + +sensors_find_all_files() { + find "$1" -maxdepth $sensors_sys_depth -name \*_input -o -name temp 2>/dev/null +} + +sensors_find_all_dirs() { + # shellcheck disable=SC2162 + sensors_find_all_files "$1" | while read; do + dirname "$REPLY" + done | sort -u +} + +# _check is called once, to find out if this chart should be enabled or not +sensors_check() { + + # this should return: + # - 0 to enable the chart + # - 1 to disable the chart + + [ -z "$(sensors_find_all_files "$sensors_sys_dir")" ] && error "no sensors found in '$sensors_sys_dir'." && return 1 + return 0 +} + +sensors_check_files() { + # we only need sensors that report a non-zero value + # also remove not needed sensors + + local f v excluded + for f in "$@"; do + [ ! -f "$f" ] && continue + for ex in "${sensors_excluded[@]}"; do + [[ $f =~ .*$ex$ ]] && excluded='1' && break + done + + [ "$excluded" != "1" ] && v="$(cat "$f")" || v=0 + v=$((v + 1 - 1)) + [ $v -ne 0 ] && echo "$f" && continue + excluded= + + error "$f gives zero values" + done +} + +sensors_check_temp_type() { + # valid temp types are 1 to 6 + # disabled sensors have the value 0 + + local f t v + for f in "$@"; do + # shellcheck disable=SC2001 + t=$(echo "$f" | sed "s|_input$|_type|g") + [ "$f" = "$t" ] && echo "$f" && continue + [ ! -f "$t" ] && echo "$f" && continue + + v="$(cat "$t")" + v=$((v + 1 - 1)) + [ $v -ne 0 ] && echo "$f" && continue + + error "$f is disabled" + done +} + +# _create is called once, to create the charts +sensors_create() { + local path dir name x file lfile labelname device subsystem id type mode files multiplier divisor + + # we create a script with the source of the + # sensors_update() function + # - the highest speed we can achieve - + [ $sensors_source_update -eq 1 ] && echo >"$TMP_DIR/sensors.sh" "sensors_update() {" + + for path in $(sensors_find_all_dirs "$sensors_sys_dir" | sort -u); do + dir=$(basename "$path") + device= + subsystem= + id= + type= + name= + + [ -h "$path/device" ] && device=$(readlink -f "$path/device") + [ ! -z "$device" ] && device=$(basename "$device") + [ -z "$device" ] && device="$dir" + + [ -h "$path/subsystem" ] && subsystem=$(readlink -f "$path/subsystem") + [ ! -z "$subsystem" ] && subsystem=$(basename "$subsystem") + [ -z "$subsystem" ] && subsystem="$dir" + + [ -f "$path/name" ] && name=$(cat "$path/name") + [ -z "$name" ] && name="$dir" + + [ -f "$path/type" ] && type=$(cat "$path/type") + [ -z "$type" ] && type="$dir" + + id="$(fixid "$device.$subsystem.$dir")" + + debug "path='$path', dir='$dir', device='$device', subsystem='$subsystem', id='$id', name='$name'" + + for mode in temperature voltage fans power current energy humidity; do + files= + multiplier=1 + divisor=1 + algorithm="absolute" + + case $mode in + temperature) + files="$( + ls "$path"/temp*_input 2>/dev/null + ls "$path/temp" 2>/dev/null + )" + files="$(sensors_check_files "$files")" + files="$(sensors_check_temp_type "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.temp_${id}_${name}' '' 'Temperature' 'Celsius' 'temperature' 'sensors.temp' line $((sensors_priority + 1)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.temp_${id}_${name}' \$1\"" + divisor=1000 + ;; + + voltage) + files="$(ls "$path"/in*_input 2>/dev/null)" + files="$(sensors_check_files "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.volt_${id}_${name}' '' 'Voltage' 'Volts' 'voltage' 'sensors.volt' line $((sensors_priority + 2)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.volt_${id}_${name}' \$1\"" + divisor=1000 + ;; + + current) + files="$(ls "$path"/curr*_input 2>/dev/null)" + files="$(sensors_check_files "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.curr_${id}_${name}' '' 'Current' 'Ampere' 'current' 'sensors.curr' line $((sensors_priority + 3)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.curr_${id}_${name}' \$1\"" + divisor=1000 + ;; + + power) + files="$(ls "$path"/power*_input 2>/dev/null)" + files="$(sensors_check_files "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.power_${id}_${name}' '' 'Power' 'Watt' 'power' 'sensors.power' line $((sensors_priority + 4)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.power_${id}_${name}' \$1\"" + divisor=1000000 + ;; + + fans) + files="$(ls "$path"/fan*_input 2>/dev/null)" + files="$(sensors_check_files "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.fan_${id}_${name}' '' 'Fans Speed' 'Rotations / Minute' 'fans' 'sensors.fans' line $((sensors_priority + 5)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.fan_${id}_${name}' \$1\"" + ;; + + energy) + files="$(ls "$path"/energy*_input 2>/dev/null)" + files="$(sensors_check_files "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.energy_${id}_${name}' '' 'Energy' 'Joule' 'energy' 'sensors.energy' areastack $((sensors_priority + 6)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.energy_${id}_${name}' \$1\"" + algorithm="incremental" + divisor=1000000 + ;; + + humidity) + files="$(ls "$path"/humidity*_input 2>/dev/null)" + files="$(sensors_check_files "$files")" + [ -z "$files" ] && continue + echo "CHART 'sensors.humidity_${id}_${name}' '' 'Humidity' 'Percent' 'humidity' 'sensors.humidity' line $((sensors_priority + 7)) $sensors_update_every '' '' 'sensors'" + echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.humidity_${id}_${name}' \$1\"" + divisor=1000 + ;; + + *) + continue + ;; + esac + + for x in $files; do + file="$x" + fid="$(fixid "$file")" + lfile="$(basename "$file" | sed "s|_input$|_label|g")" + labelname="$(basename "$file" | sed "s|_input$||g")" + + if [ ! "$path/$lfile" = "$file" ] && [ -f "$path/$lfile" ]; then + labelname="$(cat "$path/$lfile")" + fi + + echo "DIMENSION $fid '$labelname' $algorithm $multiplier $divisor" + echo >>"$TMP_DIR/sensors.sh" "echo \"SET $fid = \"\$(< $file )" + done + + echo >>"$TMP_DIR/sensors.sh" "echo END" + done + done + + [ $sensors_source_update -eq 1 ] && echo >>"$TMP_DIR/sensors.sh" "}" + + # ok, load the function sensors_update() we created + # shellcheck source=/dev/null + [ $sensors_source_update -eq 1 ] && . "$TMP_DIR/sensors.sh" + + return 0 +} + +# _update is called continuously, to collect the values +sensors_update() { + # the first argument to this function is the microseconds since last update + # pass this parameter to the BEGIN statement (see below). + + # do all the work to collect / calculate the values + # for each dimension + # remember: KEEP IT SIMPLE AND SHORT + + # shellcheck source=/dev/null + [ $sensors_source_update -eq 0 ] && . "$TMP_DIR/sensors.sh" "$1" + + return 0 +} diff --git a/collectors/charts.d.plugin/sensors/sensors.conf b/collectors/charts.d.plugin/sensors/sensors.conf new file mode 100644 index 0000000..bcb2880 --- /dev/null +++ b/collectors/charts.d.plugin/sensors/sensors.conf @@ -0,0 +1,32 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis +# GPL v3+ + +# THIS PLUGIN IS DEPRECATED +# USE THE PYTHON.D ONE + +# the directory the kernel keeps sensor data +#sensors_sys_dir="/sys/devices" + +# how deep in the tree to check for sensor data +#sensors_sys_depth=10 + +# if set to 1, the script will overwrite internal +# script functions with code generated ones +# leave to 1, is faster +#sensors_source_update=1 + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#sensors_update_every= + +# the charts priority on the dashboard +#sensors_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#sensors_retries=10 + diff --git a/collectors/cups.plugin/Makefile.am b/collectors/cups.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/cups.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/cups.plugin/README.md b/collectors/cups.plugin/README.md new file mode 100644 index 0000000..f3b2a28 --- /dev/null +++ b/collectors/cups.plugin/README.md @@ -0,0 +1,64 @@ + + +# cups.plugin + +`cups.plugin` collects Common Unix Printing System (CUPS) metrics. + +## Prerequisites + +This plugin needs a running local CUPS daemon (`cupsd`). This plugin does not need any configuration. Supports cups since version 1.7. + +If you installed Netdata using our native packages, you will have to additionaly install `netdata-plugin-cups` to use this plugin for data collection. It is not installed by default due to the large number of dependencies it requires. + +## Charts + +`cups.plugin` provides one common section `destinations` and one section per destination. + +> Destinations in CUPS represent individual printers or classes (collections or pools) of printers () + +The section `server` provides these charts: + +1. **destinations by state** + + - idle + - printing + - stopped + +2. **destinations by options** + + - total + - accepting jobs + - shared + +3. **total job number by status** + + - pending + - processing + - held + +4. **total job size by status** + + - pending + - processing + - held + +For each destination the plugin provides these charts: + +1. **job number by status** + + - pending + - held + - processing + +2. **job size by status** + + - pending + - held + - processing + +At the moment only job status pending, processing, and held are reported because we do not have a method to collect stopped, canceled, aborted and completed jobs which scales. + + diff --git a/collectors/cups.plugin/cups_plugin.c b/collectors/cups.plugin/cups_plugin.c new file mode 100644 index 0000000..9a200c3 --- /dev/null +++ b/collectors/cups.plugin/cups_plugin.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +/* + * netdata cups.plugin + * (C) Copyright 2017-2018 Simon Nagl + * Released under GPL v3+ + */ + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#include +#include + +// Variables + +static int debug = 0; + +static int netdata_update_every = 1; +static int netdata_priority = 100004; + +http_t *http; // connection to the cups daemon + +/* + * Used to aggregate job metrics for a destination (and all destinations). + */ +struct job_metrics { + int is_collected; // flag if this was collected in the current cycle + + int num_pending; + int num_processing; + int num_held; + + int size_pending; // in kilobyte + int size_processing; // in kilobyte + int size_held; // in kilobyte +}; +DICTIONARY *dict_dest_job_metrics = NULL; +struct job_metrics global_job_metrics; + +int num_dest_total; +int num_dest_accepting_jobs; +int num_dest_shared; + +int num_dest_idle; +int num_dest_printing; +int num_dest_stopped; + +void print_help() { + fprintf(stderr, + "\n" + "netdata cups.plugin %s\n" + "\n" + "Copyright (C) 2017-2018 Simon Nagl \n" + "Released under GNU General Public License v3+.\n" + "All rights reserved.\n" + "\n" + "This program is a data collector plugin for netdata.\n" + "\n" + "SYNOPSIS: cups.plugin [-d][-h][-v] COLLECTION_FREQUENCY\n" + "\n" + "Options:" + "\n" + " COLLECTION_FREQUENCY data collection frequency in seconds\n" + "\n" + " -d enable verbose output\n" + " default: disabled\n" + "\n" + " -v print version and exit\n" + "\n" + " -h print this message and exit\n" + "\n", + VERSION); +} + +void parse_command_line(int argc, char **argv) { + int i; + int freq = 0; + int update_every_found = 0; + for (i = 1; i < argc; i++) { + if (isdigit(*argv[i]) && !update_every_found) { + int n = str2i(argv[i]); + if (n > 0 && n < 86400) { + freq = n; + continue; + } + } else if (strcmp("-v", argv[i]) == 0) { + printf("cups.plugin %s\n", VERSION); + exit(0); + } else if (strcmp("-d", argv[i]) == 0) { + debug = 1; + continue; + } else if (strcmp("-h", argv[i]) == 0) { + print_help(); + exit(0); + } + + print_help(); + exit(1); + } + + if (freq >= netdata_update_every) { + netdata_update_every = freq; + } else if (freq) { + error("update frequency %d seconds is too small for CUPS. Using %d.", freq, netdata_update_every); + } +} + +/* + * 'cupsGetIntegerOption()' - Get an integer option value. + * + * INT_MIN is returned when the option does not exist, is not an integer, or + * exceeds the range of values for the "int" type. + * + * @since CUPS 2.2.4/macOS 10.13@ + */ + +int /* O - Option value or @code INT_MIN@ */ +getIntegerOption( + const char *name, /* I - Name of option */ + int num_options, /* I - Number of options */ + cups_option_t *options) /* I - Options */ +{ + const char *value = cupsGetOption(name, num_options, options); + /* String value of option */ + char *ptr; /* Pointer into string value */ + long intvalue; /* Integer value */ + + + if (!value || !*value) + return (INT_MIN); + + intvalue = strtol(value, &ptr, 10); + if (intvalue < INT_MIN || intvalue > INT_MAX || *ptr) + return (INT_MIN); + + return ((int)intvalue); +} + +static int reset_job_metrics(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data __maybe_unused) { + struct job_metrics *jm = (struct job_metrics *)entry; + + jm->is_collected = 0; + jm->num_held = 0; + jm->num_pending = 0; + jm->num_processing = 0; + jm->size_held = 0; + jm->size_pending = 0; + jm->size_processing = 0; + + return 0; +} + +struct job_metrics *get_job_metrics(char *dest) { + struct job_metrics *jm = dictionary_get(dict_dest_job_metrics, dest); + + if (unlikely(!jm)) { + struct job_metrics new_job_metrics; + reset_job_metrics(NULL, &new_job_metrics, NULL); + jm = dictionary_set(dict_dest_job_metrics, dest, &new_job_metrics, sizeof(struct job_metrics)); + + printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.job_num stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + + printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.job_size stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + }; + return jm; +} + +int collect_job_metrics(const DICTIONARY_ITEM *item, void *entry, void *data __maybe_unused) { + const char *name = dictionary_acquired_item_name(item); + + struct job_metrics *jm = (struct job_metrics *)entry; + + if (jm->is_collected) { + printf( + "BEGIN cups.job_num_%s\n" + "SET pending = %d\n" + "SET held = %d\n" + "SET processing = %d\n" + "END\n", + name, jm->num_pending, jm->num_held, jm->num_processing); + printf( + "BEGIN cups.job_size_%s\n" + "SET pending = %d\n" + "SET held = %d\n" + "SET processing = %d\n" + "END\n", + name, jm->size_pending, jm->size_held, jm->size_processing); + } else { + printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.job_num stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + + printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.job_size stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + dictionary_del(dict_dest_job_metrics, name); + } + + return 0; +} + +void reset_metrics() { + num_dest_total = 0; + num_dest_accepting_jobs = 0; + num_dest_shared = 0; + + num_dest_idle = 0; + num_dest_printing = 0; + num_dest_stopped = 0; + + reset_job_metrics(NULL, &global_job_metrics, NULL); + dictionary_walkthrough_write(dict_dest_job_metrics, reset_job_metrics, NULL); +} + +int main(int argc, char **argv) { + clocks_init(); + + // ------------------------------------------------------------------------ + // initialization of netdata plugin + + program_name = "cups.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + parse_command_line(argc, argv); + + errno = 0; + + dict_dest_job_metrics = dictionary_create(DICT_OPTION_SINGLE_THREADED); + + // ------------------------------------------------------------------------ + // the main loop + + if (debug) + fprintf(stderr, "starting data collection\n"); + + time_t started_t = now_monotonic_sec(); + size_t iteration = 0; + usec_t step = netdata_update_every * USEC_PER_SEC; + + heartbeat_t hb; + heartbeat_init(&hb); + for (iteration = 0; 1; iteration++) + { + heartbeat_next(&hb, step); + + if (unlikely(netdata_exit)) + { + break; + } + + reset_metrics(); + + cups_dest_t *dests; + num_dest_total = cupsGetDests2(http, &dests); + + if(unlikely(num_dest_total == 0)) { + // reconnect to cups to check if the server is down. + httpClose(http); + http = httpConnect2(cupsServer(), ippPort(), NULL, AF_UNSPEC, cupsEncryption(), 0, netdata_update_every * 1000, NULL); + if(http == NULL) { + error("cups daemon is not running. Exiting!"); + exit(1); + } + } + + cups_dest_t *curr_dest = dests; + int counter = 0; + while (counter < num_dest_total) { + if (counter != 0) { + curr_dest++; + } + counter++; + + const char *printer_uri_supported = cupsGetOption("printer-uri-supported", curr_dest->num_options, curr_dest->options); + if (!printer_uri_supported) { + if(debug) + fprintf(stderr, "destination %s discovered, but not yet setup as a local printer", curr_dest->name); + continue; + } + + const char *printer_is_accepting_jobs = cupsGetOption("printer-is-accepting-jobs", curr_dest->num_options, curr_dest->options); + if (printer_is_accepting_jobs && !strcmp(printer_is_accepting_jobs, "true")) { + num_dest_accepting_jobs++; + } + + const char *printer_is_shared = cupsGetOption("printer-is-shared", curr_dest->num_options, curr_dest->options); + if (printer_is_shared && !strcmp(printer_is_shared, "true")) { + num_dest_shared++; + } + + int printer_state = getIntegerOption("printer-state", curr_dest->num_options, curr_dest->options); + switch (printer_state) { + case 3: + num_dest_idle++; + break; + case 4: + num_dest_printing++; + break; + case 5: + num_dest_stopped++; + break; + case INT_MIN: + if(debug) + fprintf(stderr, "printer state is missing for destination %s", curr_dest->name); + break; + default: + error("Unknown printer state (%d) found.", printer_state); + break; + } + + /* + * flag job metrics to print values. + * This is needed to report also destinations with zero active jobs. + */ + struct job_metrics *jm = get_job_metrics(curr_dest->name); + jm->is_collected = 1; + } + cupsFreeDests(num_dest_total, dests); + + if (unlikely(netdata_exit)) + break; + + cups_job_t *jobs, *curr_job; + int num_jobs = cupsGetJobs2(http, &jobs, NULL, 0, CUPS_WHICHJOBS_ACTIVE); + int i; + for (i = num_jobs, curr_job = jobs; i > 0; i--, curr_job++) { + struct job_metrics *jm = get_job_metrics(curr_job->dest); + jm->is_collected = 1; + + switch (curr_job->state) { + case IPP_JOB_PENDING: + jm->num_pending++; + jm->size_pending += curr_job->size; + global_job_metrics.num_pending++; + global_job_metrics.size_pending += curr_job->size; + break; + case IPP_JOB_HELD: + jm->num_held++; + jm->size_held += curr_job->size; + global_job_metrics.num_held++; + global_job_metrics.size_held += curr_job->size; + break; + case IPP_JOB_PROCESSING: + jm->num_processing++; + jm->size_processing += curr_job->size; + global_job_metrics.num_processing++; + global_job_metrics.size_processing += curr_job->size; + break; + default: + error("Unsupported job state (%u) found.", curr_job->state); + break; + } + } + cupsFreeJobs(num_jobs, jobs); + + dictionary_walkthrough_write(dict_dest_job_metrics, collect_job_metrics, NULL); + + static int cups_printer_by_option_created = 0; + if (unlikely(!cups_printer_by_option_created)) + { + cups_printer_by_option_created = 1; + printf("CHART cups.dest_state '' 'Destinations by state' dests overview cups.dests_state stacked 100000 %i\n", netdata_update_every); + printf("DIMENSION idle '' absolute 1 1\n"); + printf("DIMENSION printing '' absolute 1 1\n"); + printf("DIMENSION stopped '' absolute 1 1\n"); + + printf("CHART cups.dest_option '' 'Destinations by option' dests overview cups.dests_option line 100001 %i\n", netdata_update_every); + printf("DIMENSION total '' absolute 1 1\n"); + printf("DIMENSION acceptingjobs '' absolute 1 1\n"); + printf("DIMENSION shared '' absolute 1 1\n"); + + printf("CHART cups.job_num '' 'Active jobs' jobs overview cups.job_num stacked 100002 %i\n", netdata_update_every); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + + printf("CHART cups.job_size '' 'Active jobs size' KB overview cups.job_size stacked 100003 %i\n", netdata_update_every); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + } + + printf( + "BEGIN cups.dest_state\n" + "SET idle = %d\n" + "SET printing = %d\n" + "SET stopped = %d\n" + "END\n", + num_dest_idle, num_dest_printing, num_dest_stopped); + printf( + "BEGIN cups.dest_option\n" + "SET total = %d\n" + "SET acceptingjobs = %d\n" + "SET shared = %d\n" + "END\n", + num_dest_total, num_dest_accepting_jobs, num_dest_shared); + printf( + "BEGIN cups.job_num\n" + "SET pending = %d\n" + "SET held = %d\n" + "SET processing = %d\n" + "END\n", + global_job_metrics.num_pending, global_job_metrics.num_held, global_job_metrics.num_processing); + printf( + "BEGIN cups.job_size\n" + "SET pending = %d\n" + "SET held = %d\n" + "SET processing = %d\n" + "END\n", + global_job_metrics.size_pending, global_job_metrics.size_held, global_job_metrics.size_processing); + + fflush(stdout); + + if (unlikely(netdata_exit)) + break; + + // restart check (14400 seconds) + if (!now_monotonic_sec() - started_t > 14400) + break; + } + + httpClose(http); + info("CUPS process exiting"); +} diff --git a/collectors/diskspace.plugin/Makefile.am b/collectors/diskspace.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/diskspace.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/diskspace.plugin/README.md b/collectors/diskspace.plugin/README.md new file mode 100644 index 0000000..c037a0b --- /dev/null +++ b/collectors/diskspace.plugin/README.md @@ -0,0 +1,43 @@ + + +# diskspace.plugin + +This plugin monitors the disk space usage of mounted disks, under Linux. The plugin requires Netdata to have execute/search permissions on the mount point itself, as well as each component of the absolute path to the mount point. + +Two charts are available for every mount: + +- Disk Space Usage +- Disk Files (inodes) Usage + +## configuration + +Simple patterns can be used to exclude mounts from showed statistics based on path or filesystem. By default read-only mounts are not displayed. To display them `yes` should be set for a chart instead of `auto`. + +By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a chart instead of `auto` to enable it permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. + +``` +[plugin:proc:diskspace] + # remove charts of unmounted disks = yes + # update every = 1 + # check for new mount points every = 15 + # exclude space metrics on paths = /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/* + # exclude space metrics on filesystems = *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs + # space usage for all disks = auto + # inodes usage for all disks = auto +``` + +Charts can be enabled/disabled for every mount separately: + +``` +[plugin:proc:diskspace:/] + # space usage = auto + # inodes usage = auto +``` + +> for disks performance monitoring, see the `proc` plugin, [here](/collectors/proc.plugin/README.md#monitoring-disks) + + diff --git a/collectors/diskspace.plugin/plugin_diskspace.c b/collectors/diskspace.plugin/plugin_diskspace.c new file mode 100644 index 0000000..e806a33 --- /dev/null +++ b/collectors/diskspace.plugin/plugin_diskspace.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../proc.plugin/plugin_proc.h" + +#define PLUGIN_DISKSPACE_NAME "diskspace.plugin" +#define THREAD_DISKSPACE_SLOW_NAME "PLUGIN[diskspace slow]" + +#define DEFAULT_EXCLUDED_PATHS "/proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/*" +#define DEFAULT_EXCLUDED_FILESYSTEMS "*gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs" +#define CONFIG_SECTION_DISKSPACE "plugin:proc:diskspace" + +#define MAX_STAT_USEC 10000LU +#define SLOW_UPDATE_EVERY 5 + +static netdata_thread_t *diskspace_slow_thread = NULL; + +static struct mountinfo *disk_mountinfo_root = NULL; +static int check_for_new_mountpoints_every = 15; +static int cleanup_mount_points = 1; + +static inline void mountinfo_reload(int force) { + static time_t last_loaded = 0; + time_t now = now_realtime_sec(); + + if(force || now - last_loaded >= check_for_new_mountpoints_every) { + // mountinfo_free_all() can be called with NULL disk_mountinfo_root + mountinfo_free_all(disk_mountinfo_root); + + // re-read mountinfo in case something changed + disk_mountinfo_root = mountinfo_read(0); + + last_loaded = now; + } +} + +// Data to be stored in DICTIONARY dict_mountpoints used by do_disk_space_stats(). +// This DICTIONARY is used to lookup the settings of the mount point on each iteration. +struct mount_point_metadata { + int do_space; + int do_inodes; + int shown_error; + int updated; + int slow; + + DICTIONARY *chart_labels; + + size_t collected; // the number of times this has been collected + + RRDSET *st_space; + RRDDIM *rd_space_used; + RRDDIM *rd_space_avail; + RRDDIM *rd_space_reserved; + + RRDSET *st_inodes; + RRDDIM *rd_inodes_used; + RRDDIM *rd_inodes_avail; + RRDDIM *rd_inodes_reserved; +}; + +static DICTIONARY *dict_mountpoints = NULL; + +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) + +int mount_point_cleanup(const char *name, void *entry, int slow) { + (void)name; + + struct mount_point_metadata *mp = (struct mount_point_metadata *)entry; + if(!mp) return 0; + + if (slow != mp->slow) + return 0; + + if(likely(mp->updated)) { + mp->updated = 0; + return 0; + } + + if(likely(cleanup_mount_points && mp->collected)) { + mp->collected = 0; + mp->updated = 0; + mp->shown_error = 0; + + mp->rd_space_avail = NULL; + mp->rd_space_used = NULL; + mp->rd_space_reserved = NULL; + + mp->rd_inodes_avail = NULL; + mp->rd_inodes_used = NULL; + mp->rd_inodes_reserved = NULL; + + rrdset_obsolete_and_pointer_null(mp->st_space); + rrdset_obsolete_and_pointer_null(mp->st_inodes); + } + + return 0; +} + +int mount_point_cleanup_cb(const DICTIONARY_ITEM *item, void *entry, void *data __maybe_unused) { + const char *name = dictionary_acquired_item_name(item); + + return mount_point_cleanup(name, (struct mount_point_metadata *)entry, 0); +} + +// a copy of basic mountinfo fields +struct basic_mountinfo { + char *persistent_id; + char *root; + char *mount_point; + char *filesystem; + + struct basic_mountinfo *next; +}; + +static struct basic_mountinfo *slow_mountinfo_tmp_root = NULL; +static netdata_mutex_t slow_mountinfo_mutex; + +static struct basic_mountinfo *basic_mountinfo_create_and_copy(struct mountinfo* mi) +{ + struct basic_mountinfo *bmi = callocz(1, sizeof(struct basic_mountinfo)); + + if (mi) { + bmi->persistent_id = strdupz(mi->persistent_id); + bmi->root = strdupz(mi->root); + bmi->mount_point = strdupz(mi->mount_point); + bmi->filesystem = strdupz(mi->filesystem); + } + + return bmi; +} + +static void add_basic_mountinfo(struct basic_mountinfo **root, struct mountinfo *mi) +{ + if (!root) + return; + + struct basic_mountinfo *bmi = basic_mountinfo_create_and_copy(mi); + + bmi->next = *root; + *root = bmi; +}; + +static void free_basic_mountinfo(struct basic_mountinfo *bmi) +{ + if (bmi) { + freez(bmi->persistent_id); + freez(bmi->root); + freez(bmi->mount_point); + freez(bmi->filesystem); + + freez(bmi); + } +}; + +static void free_basic_mountinfo_list(struct basic_mountinfo *root) +{ + struct basic_mountinfo *bmi = root, *next; + + while (bmi) { + next = bmi->next; + free_basic_mountinfo(bmi); + bmi = next; + } +} + +static void calculate_values_and_show_charts( + struct basic_mountinfo *mi, + struct mount_point_metadata *m, + struct statvfs *buff_statvfs, + int update_every) +{ + const char *family = mi->mount_point; + const char *disk = mi->persistent_id; + + // logic found at get_fs_usage() in coreutils + unsigned long bsize = (buff_statvfs->f_frsize) ? buff_statvfs->f_frsize : buff_statvfs->f_bsize; + + fsblkcnt_t bavail = buff_statvfs->f_bavail; + fsblkcnt_t btotal = buff_statvfs->f_blocks; + fsblkcnt_t bavail_root = buff_statvfs->f_bfree; + fsblkcnt_t breserved_root = bavail_root - bavail; + fsblkcnt_t bused = likely(btotal >= bavail_root) ? btotal - bavail_root : bavail_root - btotal; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(btotal != bavail + breserved_root + bused)) + error("DISKSPACE: disk block statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)btotal, (unsigned long long)bavail, (unsigned long long)breserved_root, (unsigned long long)bused); +#endif + + // -------------------------------------------------------------------------- + + fsfilcnt_t favail = buff_statvfs->f_favail; + fsfilcnt_t ftotal = buff_statvfs->f_files; + fsfilcnt_t favail_root = buff_statvfs->f_ffree; + fsfilcnt_t freserved_root = favail_root - favail; + fsfilcnt_t fused = ftotal - favail_root; + + if(m->do_inodes == CONFIG_BOOLEAN_AUTO && favail == (fsfilcnt_t)-1) { + // this file system does not support inodes reporting + // eg. cephfs + m->do_inodes = CONFIG_BOOLEAN_NO; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(btotal != bavail + breserved_root + bused)) + error("DISKSPACE: disk inode statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)ftotal, (unsigned long long)favail, (unsigned long long)freserved_root, (unsigned long long)fused); +#endif + + int rendered = 0; + + if(m->do_space == CONFIG_BOOLEAN_YES || (m->do_space == CONFIG_BOOLEAN_AUTO && + (bavail || breserved_root || bused || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if(unlikely(!m->st_space) || m->st_space->update_every != update_every) { + m->do_space = CONFIG_BOOLEAN_YES; + m->st_space = rrdset_find_active_bytype_localhost("disk_space", disk); + if(unlikely(!m->st_space || m->st_space->update_every != update_every)) { + char title[4096 + 1]; + snprintfz(title, 4096, "Disk Space Usage"); + m->st_space = rrdset_create_localhost( + "disk_space" + , disk + , NULL + , family + , "disk.space" + , title + , "GiB" + , PLUGIN_DISKSPACE_NAME + , NULL + , NETDATA_CHART_PRIO_DISKSPACE_SPACE + , update_every + , RRDSET_TYPE_STACKED + ); + } + + rrdset_update_rrdlabels(m->st_space, m->chart_labels); + + m->rd_space_avail = rrddim_add(m->st_space, "avail", NULL, (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + m->rd_space_used = rrddim_add(m->st_space, "used", NULL, (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + m->rd_space_reserved = rrddim_add(m->st_space, "reserved_for_root", "reserved for root", (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(m->st_space, m->rd_space_avail, (collected_number)bavail); + rrddim_set_by_pointer(m->st_space, m->rd_space_used, (collected_number)bused); + rrddim_set_by_pointer(m->st_space, m->rd_space_reserved, (collected_number)breserved_root); + rrdset_done(m->st_space); + + rendered++; + } + + if(m->do_inodes == CONFIG_BOOLEAN_YES || (m->do_inodes == CONFIG_BOOLEAN_AUTO && + (favail || freserved_root || fused || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if(unlikely(!m->st_inodes) || m->st_inodes->update_every != update_every) { + m->do_inodes = CONFIG_BOOLEAN_YES; + m->st_inodes = rrdset_find_active_bytype_localhost("disk_inodes", disk); + if(unlikely(!m->st_inodes) || m->st_inodes->update_every != update_every) { + char title[4096 + 1]; + snprintfz(title, 4096, "Disk Files (inodes) Usage"); + m->st_inodes = rrdset_create_localhost( + "disk_inodes" + , disk + , NULL + , family + , "disk.inodes" + , title + , "inodes" + , PLUGIN_DISKSPACE_NAME + , NULL + , NETDATA_CHART_PRIO_DISKSPACE_INODES + , update_every + , RRDSET_TYPE_STACKED + ); + } + + rrdset_update_rrdlabels(m->st_inodes, m->chart_labels); + + m->rd_inodes_avail = rrddim_add(m->st_inodes, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + m->rd_inodes_used = rrddim_add(m->st_inodes, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + m->rd_inodes_reserved = rrddim_add(m->st_inodes, "reserved_for_root", "reserved for root", 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_avail, (collected_number)favail); + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_used, (collected_number)fused); + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_reserved, (collected_number)freserved_root); + rrdset_done(m->st_inodes); + + rendered++; + } + + if(likely(rendered)) + m->collected++; +} + +static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { + const char *disk = mi->persistent_id; + + static SIMPLE_PATTERN *excluded_mountpoints = NULL; + static SIMPLE_PATTERN *excluded_filesystems = NULL; + + usec_t slow_timeout = MAX_STAT_USEC * update_every; + + int do_space, do_inodes; + + if(unlikely(!dict_mountpoints)) { + SIMPLE_PREFIX_MODE mode = SIMPLE_PATTERN_EXACT; + + if(config_move("plugin:proc:/proc/diskstats", "exclude space metrics on paths", CONFIG_SECTION_DISKSPACE, "exclude space metrics on paths") != -1) { + // old configuration, enable backwards compatibility + mode = SIMPLE_PATTERN_PREFIX; + } + + excluded_mountpoints = simple_pattern_create( + config_get(CONFIG_SECTION_DISKSPACE, "exclude space metrics on paths", DEFAULT_EXCLUDED_PATHS) + , NULL + , mode + ); + + excluded_filesystems = simple_pattern_create( + config_get(CONFIG_SECTION_DISKSPACE, "exclude space metrics on filesystems", DEFAULT_EXCLUDED_FILESYSTEMS) + , NULL + , SIMPLE_PATTERN_EXACT + ); + + dict_mountpoints = dictionary_create(DICT_OPTION_NONE); + } + + struct mount_point_metadata *m = dictionary_get(dict_mountpoints, mi->mount_point); + if(unlikely(!m)) { + int slow = 0; + char var_name[4096 + 1]; + snprintfz(var_name, 4096, "plugin:proc:diskspace:%s", mi->mount_point); + + int def_space = config_get_boolean_ondemand(CONFIG_SECTION_DISKSPACE, "space usage for all disks", CONFIG_BOOLEAN_AUTO); + int def_inodes = config_get_boolean_ondemand(CONFIG_SECTION_DISKSPACE, "inodes usage for all disks", CONFIG_BOOLEAN_AUTO); + + if(unlikely(simple_pattern_matches(excluded_mountpoints, mi->mount_point))) { + def_space = CONFIG_BOOLEAN_NO; + def_inodes = CONFIG_BOOLEAN_NO; + } + + if(unlikely(simple_pattern_matches(excluded_filesystems, mi->filesystem))) { + def_space = CONFIG_BOOLEAN_NO; + def_inodes = CONFIG_BOOLEAN_NO; + } + + // check if the mount point is a directory #2407 + // but only when it is enabled by default #4491 + if(def_space != CONFIG_BOOLEAN_NO || def_inodes != CONFIG_BOOLEAN_NO) { + usec_t start_time = now_monotonic_high_precision_usec(); + struct stat bs; + + if(stat(mi->mount_point, &bs) == -1) { + error("DISKSPACE: Cannot stat() mount point '%s' (disk '%s', filesystem '%s', root '%s')." + , mi->mount_point + , disk + , mi->filesystem?mi->filesystem:"" + , mi->root?mi->root:"" + ); + def_space = CONFIG_BOOLEAN_NO; + def_inodes = CONFIG_BOOLEAN_NO; + } + else { + if((bs.st_mode & S_IFMT) != S_IFDIR) { + error("DISKSPACE: Mount point '%s' (disk '%s', filesystem '%s', root '%s') is not a directory." + , mi->mount_point + , disk + , mi->filesystem?mi->filesystem:"" + , mi->root?mi->root:"" + ); + def_space = CONFIG_BOOLEAN_NO; + def_inodes = CONFIG_BOOLEAN_NO; + } + } + + if ((now_monotonic_high_precision_usec() - start_time) > slow_timeout) + slow = 1; + } + + do_space = config_get_boolean_ondemand(var_name, "space usage", def_space); + do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", def_inodes); + + struct mount_point_metadata mp = { + .do_space = do_space, + .do_inodes = do_inodes, + .shown_error = 0, + .updated = 0, + .slow = 0, + + .collected = 0, + + .st_space = NULL, + .rd_space_avail = NULL, + .rd_space_used = NULL, + .rd_space_reserved = NULL, + + .st_inodes = NULL, + .rd_inodes_avail = NULL, + .rd_inodes_used = NULL, + .rd_inodes_reserved = NULL + }; + + mp.chart_labels = rrdlabels_create(); + rrdlabels_add(mp.chart_labels, "mount_point", mi->mount_point, RRDLABEL_SRC_AUTO); + rrdlabels_add(mp.chart_labels, "filesystem", mi->filesystem, RRDLABEL_SRC_AUTO); + rrdlabels_add(mp.chart_labels, "mount_root", mi->root, RRDLABEL_SRC_AUTO); + + m = dictionary_set(dict_mountpoints, mi->mount_point, &mp, sizeof(struct mount_point_metadata)); + + m->slow = slow; + } + + if (m->slow) { + add_basic_mountinfo(&slow_mountinfo_tmp_root, mi); + return; + } + + m->updated = 1; + + if(unlikely(m->do_space == CONFIG_BOOLEAN_NO && m->do_inodes == CONFIG_BOOLEAN_NO)) + return; + + if (unlikely( + mi->flags & MOUNTINFO_READONLY && + !(mi->flags & MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST) && + !m->collected && + m->do_space != CONFIG_BOOLEAN_YES && + m->do_inodes != CONFIG_BOOLEAN_YES)) + return; + + usec_t start_time = now_monotonic_high_precision_usec(); + struct statvfs buff_statvfs; + + if (statvfs(mi->mount_point, &buff_statvfs) < 0) { + if(!m->shown_error) { + error("DISKSPACE: failed to statvfs() mount point '%s' (disk '%s', filesystem '%s', root '%s')" + , mi->mount_point + , disk + , mi->filesystem?mi->filesystem:"" + , mi->root?mi->root:"" + ); + m->shown_error = 1; + } + return; + } + + if ((now_monotonic_high_precision_usec() - start_time) > slow_timeout) + m->slow = 1; + + m->shown_error = 0; + + struct basic_mountinfo bmi; + bmi.mount_point = mi->mount_point; + bmi.persistent_id = mi->persistent_id; + bmi.filesystem = mi->filesystem; + bmi.root = mi->root; + + calculate_values_and_show_charts(&bmi, m, &buff_statvfs, update_every); +} + +static inline void do_slow_disk_space_stats(struct basic_mountinfo *mi, int update_every) { + struct mount_point_metadata *m = dictionary_get(dict_mountpoints, mi->mount_point); + + m->updated = 1; + + struct statvfs buff_statvfs; + if (statvfs(mi->mount_point, &buff_statvfs) < 0) { + if(!m->shown_error) { + error("DISKSPACE: failed to statvfs() mount point '%s' (disk '%s', filesystem '%s', root '%s')" + , mi->mount_point + , mi->persistent_id + , mi->filesystem?mi->filesystem:"" + , mi->root?mi->root:"" + ); + m->shown_error = 1; + } + return; + } + m->shown_error = 0; + + calculate_values_and_show_charts(mi, m, &buff_statvfs, update_every); +} + +static void diskspace_slow_worker_cleanup(void *ptr) +{ + UNUSED(ptr); + + info("cleaning up..."); + + worker_unregister(); +} + +#define WORKER_JOB_SLOW_MOUNTPOINT 0 +#define WORKER_JOB_SLOW_CLEANUP 1 + +struct slow_worker_data { + netdata_thread_t *slow_thread; + int update_every; +}; + +void *diskspace_slow_worker(void *ptr) +{ + struct slow_worker_data *data = (struct slow_worker_data *)ptr; + + worker_register("DISKSPACE_SLOW"); + worker_register_job_name(WORKER_JOB_SLOW_MOUNTPOINT, "mountpoint"); + worker_register_job_name(WORKER_JOB_SLOW_CLEANUP, "cleanup"); + + struct basic_mountinfo *slow_mountinfo_root = NULL; + + int slow_update_every = data->update_every > SLOW_UPDATE_EVERY ? data->update_every : SLOW_UPDATE_EVERY; + + netdata_thread_cleanup_push(diskspace_slow_worker_cleanup, data->slow_thread); + + usec_t step = slow_update_every * USEC_PER_SEC; + usec_t real_step = USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while(!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, USEC_PER_SEC); + + if (real_step < step) { + real_step += USEC_PER_SEC; + continue; + } + real_step = USEC_PER_SEC; + + usec_t start_time = now_monotonic_high_precision_usec(); + + if (!dict_mountpoints) + continue; + + if(unlikely(netdata_exit)) break; + + // -------------------------------------------------------------------------- + // disk space metrics + + worker_is_busy(WORKER_JOB_SLOW_MOUNTPOINT); + + netdata_mutex_lock(&slow_mountinfo_mutex); + free_basic_mountinfo_list(slow_mountinfo_root); + slow_mountinfo_root = slow_mountinfo_tmp_root; + slow_mountinfo_tmp_root = NULL; + netdata_mutex_unlock(&slow_mountinfo_mutex); + + struct basic_mountinfo *bmi; + for(bmi = slow_mountinfo_root; bmi; bmi = bmi->next) { + do_slow_disk_space_stats(bmi, slow_update_every); + + if(unlikely(netdata_exit)) break; + } + + if(unlikely(netdata_exit)) break; + + worker_is_busy(WORKER_JOB_SLOW_CLEANUP); + + for(bmi = slow_mountinfo_root; bmi; bmi = bmi->next) { + struct mount_point_metadata *m = dictionary_get(dict_mountpoints, bmi->mount_point); + + if (m) + mount_point_cleanup(bmi->mount_point, m, 1); + } + + usec_t dt = now_monotonic_high_precision_usec() - start_time; + if (dt > step) { + slow_update_every = (dt / USEC_PER_SEC) * 3 / 2; + if (slow_update_every % SLOW_UPDATE_EVERY) + slow_update_every += SLOW_UPDATE_EVERY - slow_update_every % SLOW_UPDATE_EVERY; + step = slow_update_every * USEC_PER_SEC; + } + } + + netdata_thread_cleanup_pop(1); + + free_basic_mountinfo_list(slow_mountinfo_root); + + return NULL; +} + +static void diskspace_main_cleanup(void *ptr) { + rrd_collector_finished(); + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + if (diskspace_slow_thread) { + netdata_thread_join(*diskspace_slow_thread, NULL); + freez(diskspace_slow_thread); + } + + free_basic_mountinfo_list(slow_mountinfo_tmp_root); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +#define WORKER_JOB_MOUNTINFO 0 +#define WORKER_JOB_MOUNTPOINT 1 +#define WORKER_JOB_CLEANUP 2 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 3 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3 +#endif + +void *diskspace_main(void *ptr) { + worker_register("DISKSPACE"); + worker_register_job_name(WORKER_JOB_MOUNTINFO, "mountinfo"); + worker_register_job_name(WORKER_JOB_MOUNTPOINT, "mountpoint"); + worker_register_job_name(WORKER_JOB_CLEANUP, "cleanup"); + + rrd_collector_started(); + + netdata_thread_cleanup_push(diskspace_main_cleanup, ptr); + + cleanup_mount_points = config_get_boolean(CONFIG_SECTION_DISKSPACE, "remove charts of unmounted disks" , cleanup_mount_points); + + int update_every = (int)config_get_number(CONFIG_SECTION_DISKSPACE, "update every", localhost->rrd_update_every); + if(update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + check_for_new_mountpoints_every = (int)config_get_number(CONFIG_SECTION_DISKSPACE, "check for new mount points every", check_for_new_mountpoints_every); + if(check_for_new_mountpoints_every < update_every) + check_for_new_mountpoints_every = update_every; + + netdata_mutex_init(&slow_mountinfo_mutex); + + diskspace_slow_thread = mallocz(sizeof(netdata_thread_t)); + + struct slow_worker_data slow_worker_data = {.slow_thread = diskspace_slow_thread, .update_every = update_every}; + + netdata_thread_create( + diskspace_slow_thread, + THREAD_DISKSPACE_SLOW_NAME, + NETDATA_THREAD_OPTION_JOINABLE, + diskspace_slow_worker, + &slow_worker_data); + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + while(!netdata_exit) { + worker_is_idle(); + /* usec_t hb_dt = */ heartbeat_next(&hb, step); + + if(unlikely(netdata_exit)) break; + + // -------------------------------------------------------------------------- + // this is smart enough not to reload it every time + + worker_is_busy(WORKER_JOB_MOUNTINFO); + mountinfo_reload(0); + + // -------------------------------------------------------------------------- + // disk space metrics + + netdata_mutex_lock(&slow_mountinfo_mutex); + free_basic_mountinfo_list(slow_mountinfo_tmp_root); + slow_mountinfo_tmp_root = NULL; + + struct mountinfo *mi; + for(mi = disk_mountinfo_root; mi; mi = mi->next) { + if(unlikely(mi->flags & (MOUNTINFO_IS_DUMMY | MOUNTINFO_IS_BIND))) + continue; + + // exclude mounts made by ProtectHome and ProtectSystem systemd hardening options + // https://github.com/netdata/netdata/issues/11498#issuecomment-950982878 + if(mi->flags & MOUNTINFO_READONLY && mi->flags & MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST && !strcmp(mi->root, mi->mount_point)) + continue; + + worker_is_busy(WORKER_JOB_MOUNTPOINT); + do_disk_space_stats(mi, update_every); + if(unlikely(netdata_exit)) break; + } + netdata_mutex_unlock(&slow_mountinfo_mutex); + + if(unlikely(netdata_exit)) break; + + if(dict_mountpoints) { + worker_is_busy(WORKER_JOB_CLEANUP); + dictionary_walkthrough_read(dict_mountpoints, mount_point_cleanup_cb, NULL); + } + + } + worker_unregister(); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/Makefile.am b/collectors/ebpf.plugin/Makefile.am new file mode 100644 index 0000000..2d5f92a --- /dev/null +++ b/collectors/ebpf.plugin/Makefile.am @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +userebpfconfigdir=$(configdir)/ebpf.d + +# Explicitly install directories to avoid permission issues due to umask +install-exec-local: + $(INSTALL) -d $(DESTDIR)$(userebpfconfigdir) + +dist_noinst_DATA = \ + README.md \ + $(NULL) + +ebpfconfigdir=$(libconfigdir)/ebpf.d +dist_libconfig_DATA = \ + ebpf.d.conf \ + $(NULL) + +dist_ebpfconfig_DATA = \ + ebpf.d/ebpf_kernel_reject_list.txt \ + ebpf.d/cachestat.conf \ + ebpf.d/dcstat.conf \ + ebpf.d/disk.conf \ + ebpf.d/fd.conf \ + ebpf.d/filesystem.conf \ + ebpf.d/hardirq.conf \ + ebpf.d/mdflush.conf \ + ebpf.d/mount.conf \ + ebpf.d/network.conf \ + ebpf.d/oomkill.conf \ + ebpf.d/process.conf \ + ebpf.d/shm.conf \ + ebpf.d/softirq.conf \ + ebpf.d/sync.conf \ + ebpf.d/swap.conf \ + ebpf.d/vfs.conf \ + $(NULL) diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md new file mode 100644 index 0000000..7762ed3 --- /dev/null +++ b/collectors/ebpf.plugin/README.md @@ -0,0 +1,970 @@ + + +# eBPF monitoring with Netdata + +The Netdata Agent provides many [eBPF](https://ebpf.io/what-is-ebpf/) programs to help you troubleshoot and debug how applications interact with the Linux kernel. The `ebpf.plugin` uses [tracepoints, trampoline, and2 kprobes](#how-netdata-collects-data-using-probes-and-tracepoints) to collect a wide array of high value data about the host that would otherwise be impossible to capture. + +> ❗ eBPF monitoring only works on Linux systems and with specific Linux kernels, including all kernels newer than `4.11.0`, and all kernels on CentOS 7.6 or later. For kernels older than `4.11.0`, improved support is in active development. + +This document provides comprehensive details about the `ebpf.plugin`. +For hands-on configuration and troubleshooting tips see our [tutorial on troubleshooting apps with eBPF metrics](/docs/guides/troubleshoot/monitor-debug-applications-ebpf.md). + +
+ An example of VFS charts, made possible by the eBPF collector plugin +
An example of virtual file system (VFS) charts made possible by the eBPF collector plugin.
+
+ +## How Netdata collects data using probes and tracepoints + +Netdata uses the following features from the Linux kernel to run eBPF programs: + +- Tracepoints are hooks to call specific functions. Tracepoints are more stable than `kprobes` and are preferred when + both options are available. +- Trampolines are bridges between kernel functions, and BPF programs. Netdata uses them by default whenever available. +- Kprobes and return probes (`kretprobe`): Probes can insert virtually into any kernel instruction. When eBPF runs in `entry` mode, it attaches only `kprobes` for internal functions monitoring calls and some arguments every time a function is called. The user can also change configuration to use [`return`](#global-configuration-options) mode, and this will allow users to monitor return from these functions and detect possible failures. + +In each case, wherever a normal kprobe, kretprobe, or tracepoint would have run its hook function, an eBPF program is run instead, performing various collection logic before letting the kernel continue its normal control flow. + +There are more methods to trigger eBPF programs, such as uprobes, but currently are not supported. + +## Configuring ebpf.plugin + +The eBPF collector is installed and enabled by default on most new installations of the Agent. +If your Agent is v1.22 or older, you may to enable the collector yourself. + +### Enable the eBPF collector + +To enable or disable the entire eBPF collector: + +1. Navigate to the [Netdata config directory](/docs/configure/nodes.md#the-netdata-config-directory). + ```bash + cd /etc/netdata + ``` + +2. Use the [`edit-config`](/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) script to edit `netdata.conf`. + + ```bash + ./edit-config netdata.conf + ``` + +3. Enable the collector by scrolling down to the `[plugins]` section. Uncomment the line `ebpf` (not + `ebpf_process`) and set it to `yes`. + + ```conf + [plugins] + ebpf = yes + ``` + +### Configure the eBPF collector + +You can configure the eBPF collector's behavior to fine-tune which metrics you receive and [optimize performance]\(#performance opimization). + +To edit the `ebpf.d.conf`: + +1. Navigate to the [Netdata config directory](/docs/configure/nodes.md#the-netdata-config-directory). + ```bash + cd /etc/netdata + ``` +2. Use the [`edit-config`](/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) script to edit [`ebpf.d.conf`](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/ebpf.d.conf). + + ```bash + ./edit-config ebpf.d.conf + ``` + + You can now edit the behavior of the eBPF collector. The following sections describe each configuration option in detail. + +### `[global]` configuration options + +The `[global]` section defines settings for the whole eBPF collector. + +#### eBPF load mode + +The collector uses two different eBPF programs. These programs rely on the same functions inside the kernel, but they +monitor, process, and display different kinds of information. + +By default, this plugin uses the `entry` mode. Changing this mode can create significant overhead on your operating +system, but also offer valuable information if you are developing or debugging software. The `ebpf load mode` option +accepts the following values: + +- `entry`: This is the default mode. In this mode, the eBPF collector only monitors calls for the functions described in + the sections above, and does not show charts related to errors. +- `return`: In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates new + charts for the return of these functions, such as errors. Monitoring function returns can help in debugging software, + such as failing to close file descriptors or creating zombie processes. +- `update every`: Number of seconds used for eBPF to send data for Netdata. +- `pid table size`: Defines the maximum number of PIDs stored inside the application hash table. + +#### Integration with `apps.plugin` + +The eBPF collector also creates charts for each running application through an integration with the +[`apps.plugin`](/collectors/apps.plugin/README.md). This integration helps you understand how specific applications +interact with the Linux kernel. + +If you want to _disable_ the integration with `apps.plugin` along with the above charts, change the setting `apps` to +`no`. + +```conf +[global] + apps = yes +``` + +When the integration is enabled, eBPF collector allocates memory for each process running. The total allocated memory +has direct relationship with the kernel version. When the eBPF plugin is running on kernels newer than `4.15`, it uses +per-cpu maps to speed up the update of hash tables. This also implies storing data for the same PID for each processor +it runs. + +#### Integration with `cgroups.plugin` + +The eBPF collector also creates charts for each cgroup through an integration with the +[`cgroups.plugin`](/collectors/cgroups.plugin/README.md). This integration helps you understand how a specific cgroup +interacts with the Linux kernel. + +The integration with `cgroups.plugin` is disabled by default to avoid creating overhead on your system. If you want to +_enable_ the integration with `cgroups.plugin`, change the `cgroups` setting to `yes`. + +```conf +[global] + cgroups = yes +``` + +If you do not need to monitor specific metrics for your `cgroups`, you can enable `cgroups` inside +`ebpf.d.conf`, and then disable the plugin for a specific `thread` by following the steps in the +[Configuration](#configuring-ebpfplugin) section. + +#### Collect PID + +When one of the previous integrations is enabled, `ebpf.plugin` will use Process Identifier (`PID`) to identify the +process group for which it needs to plot data. + +There are different ways to collect PID, and you can select the way `ebpf.plugin` collects data with the following +values: + +- `real parent`: This is the default mode. Collection will aggregate data for the real parent, the thread that creates + child threads. +- `parent`: Parent and real parent are the same when a process starts, but this value can be changed during run time. +- `all`: This option will store all PIDs that run on the host. Note, this method can be expensive for the host, + because more memory needs to be allocated and parsed. + +The threads that have integration with other collectors have an internal clean up wherein they attach either a +`trampoline` or a `kprobe` to `release_task` internal function. To avoid `overload` on this function, `ebpf.plugin` +will only enable these threads integrated with other collectors when the kernel is compiled with +`CONFIG_DEBUG_INFO_BTF`, unless you enable them manually. + +#### Integration Dashboard Elements + +When an integration is enabled, your dashboard will also show the following cgroups and apps charts using low-level +Linux metrics: + +> Note: The parenthetical accompanying each bulleted item provides the chart name. + +- mem + - Number of processes killed due out of memory. (`oomkills`) +- process + - Number of processes created with `do_fork`. (`process_create`) + - Number of threads created with `do_fork` or `clone (2)`, depending on your system's kernel + version. (`thread_create`) + - Number of times that a process called `do_exit`. (`task_exit`) + - Number of times that a process called `release_task`. (`task_close`) + - Number of times that an error happened to create thread or process. (`task_error`) +- swap + - Number of calls to `swap_readpage`. (`swap_read_call`) + - Number of calls to `swap_writepage`. (`swap_write_call`) +- network + - Number of outbound connections using TCP/IPv4. (`outbound_conn_ipv4`) + - Number of outbound connections using TCP/IPv6. (`outbound_conn_ipv6`) + - Number of bytes sent. (`total_bandwidth_sent`) + - Number of bytes received. (`total_bandwidth_recv`) + - Number of calls to `tcp_sendmsg`. (`bandwidth_tcp_send`) + - Number of calls to `tcp_cleanup_rbuf`. (`bandwidth_tcp_recv`) + - Number of calls to `tcp_retransmit_skb`. (`bandwidth_tcp_retransmit`) + - Number of calls to `udp_sendmsg`. (`bandwidth_udp_send`) + - Number of calls to `udp_recvmsg`. (`bandwidth_udp_recv`) +- file access + - Number of calls to open files. (`file_open`) + - Number of calls to open files that returned errors. (`open_error`) + - Number of files closed. (`file_closed`) + - Number of calls to close files that returned errors. (`file_error_closed`) +- vfs + - Number of calls to `vfs_unlink`. (`file_deleted`) + - Number of calls to `vfs_write`. (`vfs_write_call`) + - Number of calls to write a file that returned errors. (`vfs_write_error`) + - Number of calls to `vfs_read`. (`vfs_read_call`) + - - Number of calls to read a file that returned errors. (`vfs_read_error`) + - Number of bytes written with `vfs_write`. (`vfs_write_bytes`) + - Number of bytes read with `vfs_read`. (`vfs_read_bytes`) + - Number of calls to `vfs_fsync`. (`vfs_fsync`) + - Number of calls to sync file that returned errors. (`vfs_fsync_error`) + - Number of calls to `vfs_open`. (`vfs_open`) + - Number of calls to open file that returned errors. (`vfs_open_error`) + - Number of calls to `vfs_create`. (`vfs_create`) + - Number of calls to open file that returned errors. (`vfs_create_error`) +- page cache + - Ratio of pages accessed. (`cachestat_ratio`) + - Number of modified pages ("dirty"). (`cachestat_dirties`) + - Number of accessed pages. (`cachestat_hits`) + - Number of pages brought from disk. (`cachestat_misses`) +- directory cache + - Ratio of files available in directory cache. (`dc_hit_ratio`) + - Number of files accessed. (`dc_reference`) + - Number of files accessed that were not in cache. (`dc_not_cache`) + - Number of files not found. (`dc_not_found`) +- ipc shm + - Number of calls to `shm_get`. (`shmget_call`) + - Number of calls to `shm_at`. (`shmat_call`) + - Number of calls to `shm_dt`. (`shmdt_call`) + - Number of calls to `shm_ctl`. (`shmctl_call`) + +### `[ebpf programs]` configuration options + +The eBPF collector enables and runs the following eBPF programs by default: + +- `fd` : This eBPF program creates charts that show information about calls to open files. +- `mount`: This eBPF program creates charts that show calls to syscalls mount(2) and umount(2). +- `shm`: This eBPF program creates charts that show calls to syscalls shmget(2), shmat(2), shmdt(2) and shmctl(2). +- `sync`: Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). +- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the + bandwidth consumed by each. +- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. +- `process`: This eBPF program creates charts that show information about process life. When in `return` mode, it also + creates charts showing errors when these operations are executed. +- `hardirq`: This eBPF program creates charts that show information about time spent servicing individual hardware + interrupt requests (hard IRQs). +- `softirq`: This eBPF program creates charts that show information about time spent servicing individual software + interrupt requests (soft IRQs). +- `oomkill`: This eBPF program creates a chart that shows OOM kills for all applications recognized via + the `apps.plugin` integration. Note that this program will show application charts regardless of whether apps + integration is turned on or off. + +You can also enable the following eBPF programs: + +- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with + [`apps.plugin`](/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ + for each application. +- `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends + `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and files are + not found. +- `disk` : This eBPF program creates charts that show information about disk latency independent of filesystem. +- `filesystem` : This eBPF program creates charts that show information about some filesystem latency. +- `swap` : This eBPF program creates charts that show information about swap access. +- `mdflush`: This eBPF program creates charts that show information about + multi-device software flushes. + +### Configuring eBPF threads + +You can configure each thread of the eBPF data collector. This allows you to overwrite global options defined in `/etc/netdata/ebpf.d.conf` and configure specific options for each thread. + +To configure an eBPF thread: + +1. Navigate to the [Netdata config directory](/docs/configure/nodes.md#the-netdata-config-directory). + ```bash + cd /etc/netdata + ``` +2. Use the [`edit-config`](/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) script to edit a thread configuration file. The following configuration files are available: + + - `network.conf`: Configuration for the [`network` thread](#network-configuration). This config file overwrites the global options and also + lets you specify which network the eBPF collector monitors. + - `process.conf`: Configuration for the [`process` thread](#sync-configuration). + - `cachestat.conf`: Configuration for the `cachestat` thread(#filesystem-configuration). + - `dcstat.conf`: Configuration for the `dcstat` thread. + - `disk.conf`: Configuration for the `disk` thread. + - `fd.conf`: Configuration for the `file descriptor` thread. + - `filesystem.conf`: Configuration for the `filesystem` thread. + - `hardirq.conf`: Configuration for the `hardirq` thread. + - `softirq.conf`: Configuration for the `softirq` thread. + - `sync.conf`: Configuration for the `sync` thread. + - `vfs.conf`: Configuration for the `vfs` thread. + + ```bash + ./edit-config FILE.conf + ``` + +### Network configuration + +The network configuration has specific options to configure which network(s) the eBPF collector monitors. These options +are divided in the following sections: + +#### `[network connections]` + +You can configure the information shown on `outbound` and `inbound` charts with the settings in this section. + +```conf +[network connections] + maximum dimensions = 500 + resolve hostname ips = no + ports = 1-1024 !145 !domain + hostnames = !example.com + ips = !127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 +``` + +When you define a `ports` setting, Netdata will collect network metrics for that specific port. For example, if you +write `ports = 19999`, Netdata will collect only connections for itself. The `hostnames` setting accepts +[simple patterns](/libnetdata/simple_pattern/README.md). The `ports`, and `ips` settings accept negation (`!`) to deny +specific values or asterisk alone to define all values. + +In the above example, Netdata will collect metrics for all ports between 1 and 443, with the exception of 53 (domain) +and 145. + +The following options are available: + +- `ports`: Define the destination ports for Netdata to monitor. +- `hostnames`: The list of hostnames that can be resolved to an IP address. +- `ips`: The IP or range of IPs that you want to monitor. You can use IPv4 or IPv6 addresses, use dashes to define a + range of IPs, or use CIDR values. By default, only data for private IP addresses is collected, but this can + be changed with the `ips` setting. + +By default, Netdata displays up to 500 dimensions on network connection charts. If there are more possible dimensions, +they will be bundled into the `other` dimension. You can increase the number of shown dimensions by changing +the `maximum dimensions` setting. + +The dimensions for the traffic charts are created using the destination IPs of the sockets by default. This can be +changed setting `resolve hostname ips = yes` and restarting Netdata, after this Netdata will create dimensions using +the `hostnames` every time that is possible to resolve IPs to their hostnames. + +#### `[service name]` + +Netdata uses the list of services in `/etc/services` to plot network connection charts. If this file does not contain +the name for a particular service you use in your infrastructure, you will need to add it to the `[service name]` +section. + +For example, Netdata's default port (`19999`) is not listed in `/etc/services`. To associate that port with the Netdata +service in network connection charts, and thus see the name of the service instead of its port, define it: + +```conf +[service name] + 19999 = Netdata +``` + +### Sync configuration + +The sync configuration has specific options to disable monitoring for syscalls. All syscalls are monitored by default. + +```conf +[syscalls] + sync = yes + msync = yes + fsync = yes + fdatasync = yes + syncfs = yes + sync_file_range = yes +``` + +### Filesystem configuration + +The filesystem configuration has specific options to disable monitoring for filesystems; by default, all filesystems are +monitored. + +```conf +[filesystem] + btrfsdist = yes + ext4dist = yes + nfsdist = yes + xfsdist = yes + zfsdist = yes +``` + +The ebpf program `nfsdist` monitors only `nfs` mount points. + +## Troubleshooting + +If the eBPF collector does not work, you can troubleshoot it by running the `ebpf.plugin` command and investigating its +output. + +```bash +cd /usr/libexec/netdata/plugins.d/ +sudo su -s /bin/bash ./ebpf.plugin +``` + +You can also use `grep` to search the Agent's `error.log` for messages related to eBPF monitoring. + +```bash +grep -i ebpf /var/log/netdata/error.log +``` + +### Confirm kernel compatibility + +The eBPF collector only works on Linux systems and with specific Linux kernels. We support all kernels more recent than +`4.11.0`, and all kernels on CentOS 7.6 or later. + +You can run our helper script to determine whether your system can support eBPF monitoring. If it returns no output, your system is ready to compile and run the eBPF collector. + +```bash +curl -sSL https://raw.githubusercontent.com/netdata/kernel-collector/master/tools/check-kernel-config.sh | sudo bash +``` + + +If you see a warning about a missing kernel +configuration (`KPROBES KPROBES_ON_FTRACE HAVE_KPROBES BPF BPF_SYSCALL BPF_JIT`), you will need to recompile your kernel +to support this configuration. The process of recompiling Linux kernels varies based on your distribution and version. +Read the documentation for your system's distribution to learn more about the specific workflow for recompiling the +kernel, ensuring that you set all the necessary + +- [Ubuntu](https://wiki.ubuntu.com/Kernel/BuildYourOwnKernel) +- [Debian](https://kernel-team.pages.debian.net/kernel-handbook/ch-common-tasks.html#s-common-official) +- [Fedora](https://fedoraproject.org/wiki/Building_a_custom_kernel) +- [CentOS](https://wiki.centos.org/HowTos/Custom_Kernel) +- [Arch Linux](https://wiki.archlinux.org/index.php/Kernel/Traditional_compilation) +- [Slackware](https://docs.slackware.com/howtos:slackware_admin:kernelbuilding) + +### Mount `debugfs` and `tracefs` + +The eBPF collector also requires both the `tracefs` and `debugfs` filesystems. Try mounting the `tracefs` and `debugfs` +filesystems using the commands below: + +```bash +sudo mount -t debugfs nodev /sys/kernel/debug +sudo mount -t tracefs nodev /sys/kernel/tracing +``` + +If they are already mounted, you will see an error. You can also configure your system's `/etc/fstab` configuration to +mount these filesystems on startup. More information can be found in +the [ftrace documentation](https://www.kernel.org/doc/Documentation/trace/ftrace.txt). + +## Charts + +The eBPF collector creates charts on different menus, like System Overview, Memory, MD arrays, Disks, Filesystem, +Mount Points, Networking Stack, systemd Services, and Applications. + +The collector stores the actual value inside of its process, but charts only show the difference between the values +collected in the previous and current seconds. + +### System overview + +Not all charts within the System Overview menu are enabled by default. Charts that rely on `kprobes` are disabled by default because they add around 100ns overhead for each function call. This is a small number from a human's perspective, but the functions are called many times and create an impact +on host. See the [configuration](#configuring-ebpfplugin) section for details about how to enable them. + +#### Processes + +Internally, the Linux kernel treats both processes and threads as `tasks`. To create a thread, the kernel offers a few +system calls: `fork(2)`, `vfork(2)`, and `clone(2)`. To generate this chart, the eBPF +collector uses the following `tracepoints` and `kprobe`: + +- `sched/sched_process_fork`: Tracepoint called after a call for `fork (2)`, `vfork (2)` and `clone (2)`. +- `sched/sched_process_exec`: Tracepoint called after a exec-family syscall. +- `kprobe/kernel_clone`: This is the main [`fork()`](https://elixir.bootlin.com/linux/v5.10/source/kernel/fork.c#L2415) + routine since kernel `5.10.0` was released. +- `kprobe/_do_fork`: Like `kernel_clone`, but this was the main function between kernels `4.2.0` and `5.9.16` +- `kprobe/do_fork`: This was the main function before kernel `4.2.0`. + +#### Process Exit + +Ending a task requires two steps. The first is a call to the internal function `do_exit`, which notifies the operating +system that the task is finishing its work. The second step is to release the kernel information with the internal +function `release_task`. The difference between the two dimensions can help you discover +[zombie processes](https://en.wikipedia.org/wiki/Zombie_process). To get the metrics, the collector uses: + +- `sched/sched_process_exit`: Tracepoint called after a task exits. +- `kprobe/release_task`: This function is called when a process exits, as the kernel still needs to remove the process + descriptor. + +#### Task error + +The functions responsible for ending tasks do not return values, so this chart contains information about failures on +process and thread creation only. + +#### Swap + +Inside the swap submenu the eBPF plugin creates the chart `swapcalls`; this chart is displaying when processes are +calling functions [`swap_readpage` and `swap_writepage`](https://hzliu123.github.io/linux-kernel/Page%20Cache%20in%20Linux%202.6.pdf), +which are functions responsible for doing IO in swap memory. To collect the exact moment that an access to swap happens, +the collector attaches `kprobes` for cited functions. + +#### Soft IRQ + +The following `tracepoints` are used to measure time usage for soft IRQs: + +- [`irq/softirq_entry`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_softirq_entry): Called + before softirq handler +- [`irq/softirq_exit`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_softirq_exit): Called when + softirq handler returns. + +#### Hard IRQ + +The following tracepoints are used to measure the latency of servicing a +hardware interrupt request (hard IRQ). + +- [`irq/irq_handler_entry`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_irq_handler_entry): + Called immediately before the IRQ action handler. +- [`irq/irq_handler_exit`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_irq_handler_exit): + Called immediately after the IRQ action handler returns. +- `irq_vectors`: These are traces from `irq_handler_entry` and + `irq_handler_exit` when an IRQ is handled. The following elements from vector + are triggered: + - `irq_vectors/local_timer_entry` + - `irq_vectors/local_timer_exit` + - `irq_vectors/reschedule_entry` + - `irq_vectors/reschedule_exit` + - `irq_vectors/call_function_entry` + - `irq_vectors/call_function_exit` + - `irq_vectors/call_function_single_entry` + - `irq_vectors/call_function_single_xit` + - `irq_vectors/irq_work_entry` + - `irq_vectors/irq_work_exit` + - `irq_vectors/error_apic_entry` + - `irq_vectors/error_apic_exit` + - `irq_vectors/thermal_apic_entry` + - `irq_vectors/thermal_apic_exit` + - `irq_vectors/threshold_apic_entry` + - `irq_vectors/threshold_apic_exit` + - `irq_vectors/deferred_error_entry` + - `irq_vectors/deferred_error_exit` + - `irq_vectors/spurious_apic_entry` + - `irq_vectors/spurious_apic_exit` + - `irq_vectors/x86_platform_ipi_entry` + - `irq_vectors/x86_platform_ipi_exit` + +#### IPC shared memory + +To monitor shared memory system call counts, Netdata attaches tracing in the following functions: + +- `shmget`: Runs when [`shmget`](https://man7.org/linux/man-pages/man2/shmget.2.html) is called. +- `shmat`: Runs when [`shmat`](https://man7.org/linux/man-pages/man2/shmat.2.html) is called. +- `shmdt`: Runs when [`shmdt`](https://man7.org/linux/man-pages/man2/shmat.2.html) is called. +- `shmctl`: Runs when [`shmctl`](https://man7.org/linux/man-pages/man2/shmctl.2.html) is called. + +### Memory + +In the memory submenu the eBPF plugin creates two submenus **page cache** and **synchronization** with the following +organization: + +- Page Cache + - Page cache ratio + - Dirty pages + - Page cache hits + - Page cache misses +- Synchronization + - File sync + - Memory map sync + - File system sync + - File range sync + +#### Page cache hits + +When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. + If the entry is there, a page cache hit has occurred and the read is from the cache. + +A page cache hit is when the page cache is successfully accessed with a read operation. We do not count pages that were +added relatively recently. + +#### Dirty pages + +A "dirty page" is a page in the page cache that was modified after being created. Since non-dirty pages in the page cache + have identical copies in secondary storage (e.g. hard disk drive or solid-state drive), discarding and reusing their space + is much quicker than paging out application memory, and is often preferred over flushing the dirty pages into secondary storage + and reusing their space. + +On `cachestat_dirties` Netdata demonstrates the number of pages that were modified. This chart shows the number of calls +to the function `mark_buffer_dirty`. + +#### Page cache ratio + +When the processor needs to read or write in a specific memory address, it checks for a corresponding entry in the page cache. +If the processor hits a page cache (`page cache hit`), it reads the entry from the cache. If there is no entry (`page cache miss`), + the kernel allocates a new entry and copies data from the disk. Netdata calculates the percentage of accessed files that are cached on + memory. The ratio is calculated counting the accessed cached pages + (without counting [dirty pages](#dirty-pages) and pages added because of read misses) divided by total access without dirty pages. + +> \_\_**\_\_\_\_**Number of accessed cached pages\***\*\_\_\*\***
+> Number of total accessed pages - dirty pages - missed pages + +The chart `cachestat_ratio` shows how processes are accessing page cache. In a normal scenario, we expect values around +100%, which means that the majority of the work on the machine is processed in memory. To calculate the ratio, Netdata +attaches `kprobes` for kernel functions: + +- `add_to_page_cache_lru`: Page addition. +- `mark_page_accessed`: Access to cache. +- `account_page_dirtied`: Dirty (modified) pages. +- `mark_buffer_dirty`: Writes to page cache. + +#### Page cache misses + +A page cache miss means that a page was not inside memory when the process tried to access it. This chart shows the +result of the difference for calls between functions `add_to_page_cache_lru` and `account_page_dirtied`. + +#### File sync + +This chart shows calls to synchronization methods, [`fsync(2)`](https://man7.org/linux/man-pages/man2/fdatasync.2.html) +and [`fdatasync(2)`](https://man7.org/linux/man-pages/man2/fdatasync.2.html), to transfer all modified page caches +for the files on disk devices. These calls block until the disk reports that the transfer has been completed. They flush +data for specific file descriptors. + +#### Memory map sync + +The chart shows calls to [`msync(2)`](https://man7.org/linux/man-pages/man2/msync.2.html) syscalls. This syscall flushes +changes to a file that was mapped into memory using [`mmap(2)`](https://man7.org/linux/man-pages/man2/mmap.2.html). + +#### File system sync + +This chart monitors calls demonstrating commits from filesystem caches to disk. Netdata attaches `tracing` for +[`sync(2)`](https://man7.org/linux/man-pages/man2/sync.2.html), and [`syncfs(2)`](https://man7.org/linux/man-pages/man2/sync.2.html). + +#### File range sync + +This chart shows calls to [`sync_file_range(2)`](https://man7.org/linux/man-pages/man2/sync_file_range.2.html) which +synchronizes file segments with disk. + +> Note: This is the most dangerous syscall to synchronize data, according to its manual. + +### Multiple Device (MD) arrays + +The eBPF plugin shows multi-device flushes happening in real time. This can be used to explain some spikes happening +in [disk latency](#disk) charts. + +By default, MD flush is disabled. To enable it, configure your +`/etc/netdata/ebpf.d.conf` file as: + +```conf +[global] + mdflush = yes +``` + +#### MD flush + +To collect data related to Linux multi-device (MD) flushing, the following kprobe is used: + +- `kprobe/md_flush_request`: called whenever a request for flushing multi-device data is made. + +### Disk + +The eBPF plugin also shows a chart in the Disk section when the `disk` thread is enabled. + +#### Disk Latency + +This will create the chart `disk_latency_io` for each disk on the host. The following tracepoints are used: + +- [`block/block_rq_issue`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_block_rq_issue): + IO request operation to a device drive. +- [`block/block_rq_complete`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_block_rq_complete): + IO operation completed by device. + +Disk Latency is the single most important metric to focus on when it comes to storage performance, under most circumstances. +For hard drives, an average latency somewhere between 10 to 20 ms can be considered acceptable. For SSD (Solid State Drives), +in most cases, workloads experience less than 1 ms latency numbers, but workloads should never reach higher than 3 ms. +The dimensions refer to time intervals. + +### Filesystem + +This group has charts demonstrating how applications interact with the Linux kernel to open and close file descriptors. +It also brings latency charts for several different filesystems. + +#### Latency Algorithm + +We calculate the difference between the calling and return times, spanning disk I/O, file system operations (lock, I/O), +run queue latency and all events related to the monitored action. + +#### ext4 + +To measure the latency of executing some actions in an +[ext4](https://elixir.bootlin.com/linux/latest/source/fs/ext4) filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `ext4_file_read_iter`: Function used to measure read latency. +- `ext4_file_write_iter`: Function used to measure write latency. +- `ext4_file_open`: Function used to measure open latency. +- `ext4_sync_file`: Function used to measure sync latency. + +#### ZFS + +To measure the latency of executing some actions in a zfs filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `zpl_iter_read`: Function used to measure read latency. +- `zpl_iter_write`: Function used to measure write latency. +- `zpl_open`: Function used to measure open latency. +- `zpl_fsync`: Function used to measure sync latency. + +#### XFS + +To measure the latency of executing some actions in an +[xfs](https://elixir.bootlin.com/linux/latest/source/fs/xfs) filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `xfs_file_read_iter`: Function used to measure read latency. +- `xfs_file_write_iter`: Function used to measure write latency. +- `xfs_file_open`: Function used to measure open latency. +- `xfs_file_fsync`: Function used to measure sync latency. + +#### NFS + +To measure the latency of executing some actions in an +[nfs](https://elixir.bootlin.com/linux/latest/source/fs/nfs) filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `nfs_file_read`: Function used to measure read latency. +- `nfs_file_write`: Function used to measure write latency. +- `nfs_file_open`: Functions used to measure open latency. +- `nfs4_file_open`: Functions used to measure open latency for NFS v4. +- `nfs_getattr`: Function used to measure sync latency. + +#### btrfs + +To measure the latency of executing some actions in a [btrfs](https://elixir.bootlin.com/linux/latest/source/fs/btrfs/file.c) +filesystem, the collector needs to attach `kprobes` and `kretprobes` for each of the following functions: + +> Note: We are listing two functions used to measure `read` latency, but we use either `btrfs_file_read_iter` or +> `generic_file_read_iter`, depending on kernel version. + +- `btrfs_file_read_iter`: Function used to measure read latency since kernel `5.10.0`. +- `generic_file_read_iter`: Like `btrfs_file_read_iter`, but this function was used before kernel `5.10.0`. +- `btrfs_file_write_iter`: Function used to write data. +- `btrfs_file_open`: Function used to open files. +- `btrfs_sync_file`: Function used to synchronize data to filesystem. + +#### File descriptor + +To give metrics related to `open` and `close` events, instead of attaching kprobes for each syscall used to do these +events, the collector attaches `kprobes` for the common function used for syscalls: + +- [`do_sys_open`](https://0xax.gitbooks.io/linux-insides/content/SysCall/linux-syscall-5.html): Internal function used to + open files. +- [`do_sys_openat2`](https://elixir.bootlin.com/linux/v5.6/source/fs/open.c#L1162): + Function called from `do_sys_open` since version `5.6.0`. +- [`close_fd`](https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg2271761.html): Function used to close file + descriptor since kernel `5.11.0`. +- `__close_fd`: Function used to close files before version `5.11.0`. + +#### File error + +This chart shows the number of times some software tried and failed to open or close a file descriptor. + +#### VFS + +The Linux Virtual File System (VFS) is an abstraction layer on top of a +concrete filesystem like the ones listed in the parent section, e.g. `ext4`. + +In this section we list the mechanism by which we gather VFS data, and what +charts are consequently created. + +##### VFS eBPF Hooks + +To measure the latency and total quantity of executing some VFS-level +functions, ebpf.plugin needs to attach kprobes and kretprobes for each of the +following functions: + +- `vfs_write`: Function used monitoring the number of successful & failed + filesystem write calls, as well as the total number of written bytes. +- `vfs_writev`: Same function as `vfs_write` but for vector writes (i.e. a + single write operation using a group of buffers rather than 1). +- `vfs_read`: Function used for monitoring the number of successful & failed + filesystem read calls, as well as the total number of read bytes. +- `vfs_readv` Same function as `vfs_read` but for vector reads (i.e. a single + read operation using a group of buffers rather than 1). +- `vfs_unlink`: Function used for monitoring the number of successful & failed + filesystem unlink calls. +- `vfs_fsync`: Function used for monitoring the number of successful & failed + filesystem fsync calls. +- `vfs_open`: Function used for monitoring the number of successful & failed + filesystem open calls. +- `vfs_create`: Function used for monitoring the number of successful & failed + filesystem create calls. + +##### VFS Deleted objects + +This chart monitors calls to `vfs_unlink`. This function is responsible for removing objects from the file system. + +##### VFS IO + +This chart shows the number of calls to the functions `vfs_read` and `vfs_write`. + +##### VFS IO bytes + +This chart also monitors `vfs_read` and `vfs_write` but, instead of the number of calls, it shows the total amount of +bytes read and written with these functions. + +The Agent displays the number of bytes written as negative because they are moving down to disk. + +##### VFS IO errors + +The Agent counts and shows the number of instances where a running program experiences a read or write error. + +##### VFS Create + +This chart shows the number of calls to `vfs_create`. This function is responsible for creating files. + +##### VFS Synchronization + +This chart shows the number of calls to `vfs_fsync`. This function is responsible for calling `fsync(2)` or +`fdatasync(2)` on a file. You can see more details in the Synchronization section. + +##### VFS Open + +This chart shows the number of calls to `vfs_open`. This function is responsible for opening files. + +#### Directory Cache + +Metrics for directory cache are collected using kprobe for `lookup_fast`, because we are interested in the number of +times this function is accessed. On the other hand, for `d_lookup` we are not only interested in the number of times it +is accessed, but also in possible errors, so we need to attach a `kretprobe`. For this reason, the following is used: + +- [`lookup_fast`](https://lwn.net/Articles/649115/): Called to look at data inside the directory cache. +- [`d_lookup`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/dcache.c?id=052b398a43a7de8c68c13e7fa05d6b3d16ce6801#n2223): + Called when the desired file is not inside the directory cache. + +##### Directory Cache Interpretation + +When directory cache is showing 100% that means that every accessed file was present in the directory cache. +If files are not present in the directory cache, they are either not present in the file system or the files were not +accessed before. + +### Mount Points + +The following `tracing` are used to collect `mount` & `unmount` call counts: + +- [`mount`](https://man7.org/linux/man-pages/man2/mount.2.html): mount filesystem on host. +- [`umount`](https://man7.org/linux/man-pages/man2/umount.2.html): umount filesystem on host. + +### Networking Stack + +Netdata monitors socket bandwidth attaching `tracing` for internal functions. + +#### TCP outbound connections + +This chart demonstrates calls to `tcp_v4_connection` and `tcp_v6_connection` that start connections for IPV4 and IPV6, respectively. + +#### TCP inbound connections + +This chart demonstrates TCP and UDP connections that the host receives. +To collect this information, netdata attaches a tracing to `inet_csk_accept`. + +#### TCP bandwidth functions + +This chart demonstrates calls to functions `tcp_sendmsg`, `tcp_cleanup_rbuf`, and `tcp_close`; these functions are used +to send & receive data and to close connections when `TCP` protocol is used. + +#### TCP bandwidth + +This chart demonstrates calls to functions: + +- `tcp_sendmsg`: Function responsible to send data for a specified destination. +- `tcp_cleanup_rbuf`: We use this function instead of `tcp_recvmsg`, because the last one misses `tcp_read_sock` traffic + and we would also need to add more `tracing` to get the socket and package size. +- `tcp_close`: Function responsible to close connection. + +#### TCP retransmit + +This chart demonstrates calls to function `tcp_retransmit` that is responsible for executing TCP retransmission when the +receiver did not return the packet during the expected time. + +#### UDP functions + +This chart demonstrates calls to functions `udp_sendmsg` and `udp_recvmsg`, which are responsible for sending & +receiving data for connections when the `UDP` protocol is used. + +#### UDP bandwidth + +Like the previous chart, this one also monitors `udp_sendmsg` and `udp_recvmsg`, but instead of showing the number of +calls, it monitors the number of bytes sent and received. + +### Apps + +#### OOM Killing + +These are tracepoints related to [OOM](https://en.wikipedia.org/wiki/Out_of_memory) killing processes. + +- `oom/mark_victim`: Monitors when an oomkill event happens. + +## Known issues + +### Performance opimization + +eBPF monitoring is complex and produces a large volume of metrics. We've discovered scenarios where the eBPF plugin +significantly increases kernel memory usage by several hundred MB. + +If your node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart, +consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuring-ebpfplugin). Next, +[restart Netdata](/docs/configure/start-stop-restart.md) with `sudo systemctl restart netdata` to see if system memory +usage (see the `system.ram` chart) has dropped significantly. + +Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#ebpf-load-mode) +in `ebpf.conf`. + +### SELinux + +When [SELinux](https://www.redhat.com/en/topics/linux/what-is-selinux) is enabled, it may prevent `ebpf.plugin` from +starting correctly. Check the Agent's `error.log` file for errors like the ones below: + +```bash +2020-06-14 15:32:08: ebpf.plugin ERROR : EBPF PROCESS : Cannot load program: /usr/libexec/netdata/plugins.d/pnetdata_ebpf_process.3.10.0.o (errno 13, Permission denied) +2020-06-14 15:32:19: netdata ERROR : PLUGINSD[ebpf] : read failed: end of file (errno 9, Bad file descriptor) +``` + +You can also check for errors related to `ebpf.plugin` inside `/var/log/audit/audit.log`: + +```bash +type=AVC msg=audit(1586260134.952:97): avc: denied { map_create } for pid=1387 comm="ebpf.pl" scontext=system_u:system_r:unconfined_service_t:s0 tcontext=system_u:system_r:unconfined_service_t:s0 tclass=bpf permissive=0 +type=SYSCALL msg=audit(1586260134.952:97): arch=c000003e syscall=321 success=no exit=-13 a0=0 a1=7ffe6b36f000 a2=70 a3=0 items=0 ppid=1135 pid=1387 auid=4294967295 uid=994 gid=990 euid=0 suid=0 fsuid=0 egid=990 sgid=990 fsgid=990 tty=(none) ses=4294967295 comm="ebpf_proc +ess.pl" exe="/usr/libexec/netdata/plugins.d/ebpf.plugin" subj=system_u:system_r:unconfined_service_t:s0 key=(null) +``` + +If you see similar errors, you will have to adjust SELinux's policies to enable the eBPF collector. + +#### Creation of bpf policies + +To enable `ebpf.plugin` to run on a distribution with SELinux enabled, it will be necessary to take the following +actions. + +First, stop the Netdata Agent. + +```bash +# systemctl stop netdata +``` + +Next, create a policy with the `audit.log` file you examined earlier. + +```bash +# grep ebpf.plugin /var/log/audit/audit.log | audit2allow -M netdata_ebpf +``` + +This will create two new files: `netdata_ebpf.te` and `netdata_ebpf.mod`. + +Edit the `netdata_ebpf.te` file to change the options `class` and `allow`. You should have the following at the end of +the `netdata_ebpf.te` file. + +```conf +module netdata_ebpf 1.0; +require { + type unconfined_service_t; + class bpf { map_create map_read map_write prog_load prog_run }; +} +#============= unconfined_service_t ============== +allow unconfined_service_t self:bpf { map_create map_read map_write prog_load prog_run }; +``` + +Then compile your `netdata_ebpf.te` file with the following commands to create a binary that loads the new policies: + +```bash +# checkmodule -M -m -o netdata_ebpf.mod netdata_ebpf.te +# semodule_package -o netdata_ebpf.pp -m netdata_ebpf.mod +``` + +Finally, you can load the new policy and start the Netdata agent again: + +```bash +# semodule -i netdata_ebpf.pp +# systemctl start netdata +``` + +### Linux kernel lockdown + +Beginning with [version 5.4](https://www.zdnet.com/article/linux-to-get-kernel-lockdown-feature/), the Linux kernel has +a feature called "lockdown," which may affect `ebpf.plugin` depending how the kernel was compiled. The following table +shows how the lockdown module impacts `ebpf.plugin` based on the selected options: + +| Enforcing kernel lockdown | Enable lockdown LSM early in init | Default lockdown mode | Can `ebpf.plugin` run with this? | +| :------------------------ | :-------------------------------- | :-------------------- | :------------------------------- | +| YES | NO | NO | YES | +| YES | Yes | None | YES | +| YES | Yes | Integrity | YES | +| YES | Yes | Confidentiality | NO | + +If you or your distribution compiled the kernel with the last combination, your system cannot load shared libraries +required to run `ebpf.plugin`. diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c new file mode 100644 index 0000000..00b53a5 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.c @@ -0,0 +1,2249 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include + +#include "ebpf.h" +#include "ebpf_socket.h" +#include "libnetdata/required_dummies.h" + +/***************************************************************** + * + * GLOBAL VARIABLES + * + *****************************************************************/ + +char *ebpf_plugin_dir = PLUGINS_DIR; +static char *ebpf_configured_log_dir = LOG_DIR; + +char *ebpf_algorithms[] = {"absolute", "incremental"}; +struct config collector_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +int running_on_kernel = 0; +int ebpf_nprocs; +int isrh = 0; +int main_thread_id = 0; + +pthread_mutex_t lock; +pthread_mutex_t ebpf_exit_cleanup; +pthread_mutex_t collect_data_mutex; +pthread_cond_t collect_data_cond_var; + +ebpf_module_t ebpf_modules[] = { + { .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &process_config, + .config_file = NETDATA_PROCESS_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10 | + NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &socket_config, + .config_file = NETDATA_NETWORK_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_cachestat_create_apps_charts, .maps = cachestat_maps, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &cachestat_config, + .config_file = NETDATA_CACHESTAT_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18| + NETDATA_V5_4 | NETDATA_V5_14 | NETDATA_V5_15 | NETDATA_V5_16, + .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config, + .config_file = NETDATA_SYNC_CONFIG_FILE, + // All syscalls have the same kernels + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_dcstat_create_apps_charts, .maps = dcstat_maps, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &dcstat_config, + .config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "swap", .config_name = "swap", .enabled = 0, .start_routine = ebpf_swap_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_swap_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &swap_config, + .config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "vfs", .config_name = "vfs", .enabled = 0, .start_routine = ebpf_vfs_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_vfs_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &vfs_config, + .config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = vfs_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "filesystem", .config_name = "filesystem", .enabled = 0, .start_routine = ebpf_filesystem_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, + .config_file = NETDATA_FILESYSTEM_CONFIG_FILE, + //We are setting kernels as zero, because we load eBPF programs according the kernel running. + .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL }, + { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, + .config_file = NETDATA_DISK_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "mount", .config_name = "mount", .enabled = 0, .start_routine = ebpf_mount_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config, + .config_file = NETDATA_MOUNT_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "fd", .config_name = "fd", .enabled = 0, .start_routine = ebpf_fd_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fd_config, + .config_file = NETDATA_FD_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_11 | + NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = fd_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, + .config_file = NETDATA_HARDIRQ_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "softirq", .config_name = "softirq", .enabled = 0, .start_routine = ebpf_softirq_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config, + .config_file = NETDATA_SOFTIRQ_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "oomkill", .config_name = "oomkill", .enabled = 0, .start_routine = ebpf_oomkill_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_oomkill_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &oomkill_config, + .config_file = NETDATA_OOMKILL_CONFIG_FILE, + .kernels = NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "shm", .config_name = "shm", .enabled = 0, .start_routine = ebpf_shm_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_shm_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &shm_config, + .config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = "mdflush", .config_name = "mdflush", .enabled = 0, .start_routine = ebpf_mdflush_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config, + .config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY, + .global_charts = 0, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = 0, .names = NULL, .cfg = NULL, .config_name = NULL, .kernels = 0, .load = EBPF_LOAD_LEGACY, + .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, +}; + +struct netdata_static_thread ebpf_threads[] = { + { + .name = "EBPF PROCESS", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF SOCKET", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF CACHESTAT", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF SYNC", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF DCSTAT", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF SWAP", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF VFS", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF FILESYSTEM", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF DISK", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF MOUNT", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF FD", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF HARDIRQ", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF SOFTIRQ", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF OOMKILL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF SHM", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = "EBPF MDFLUSH", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, +}; + +ebpf_filesystem_partitions_t localfs[] = + {{.filesystem = "ext4", + .optional_filesystem = NULL, + .family = "ext4", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "xfs", + .optional_filesystem = NULL, + .family = "xfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "nfs", + .optional_filesystem = "nfs4", + .family = "nfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_ATTR_CHARTS, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "zfs", + .optional_filesystem = NULL, + .family = "zfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "btrfs", + .optional_filesystem = NULL, + .family = "btrfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = "btrfs_file_operations", .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10}, + {.filesystem = NULL, + .optional_filesystem = NULL, + .family = NULL, + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = 0}}; + +ebpf_sync_syscalls_t local_syscalls[] = { + {.syscall = NETDATA_SYSCALLS_SYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_SYNCFS, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_MSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_FSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_FDATASYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_SYNC_FILE_RANGE, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NULL, .enabled = CONFIG_BOOLEAN_NO, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + } +}; + + +// Link with apps.plugin +ebpf_process_stat_t *global_process_stat = NULL; + +// Link with cgroup.plugin +netdata_ebpf_cgroup_shm_t shm_ebpf_cgroup = {NULL, NULL}; +int shm_fd_ebpf_cgroup = -1; +sem_t *shm_sem_ebpf_cgroup = SEM_FAILED; +pthread_mutex_t mutex_cgroup_shm; + +//Network viewer +ebpf_network_viewer_options_t network_viewer_opt; + +// Statistic +ebpf_plugin_stats_t plugin_statistics = {.core = 0, .legacy = 0, .running = 0, .threads = 0, .tracepoints = 0, + .probes = 0, .retprobes = 0, .trampolines = 0}; + +#ifdef LIBBPF_MAJOR_VERSION +struct btf *default_btf = NULL; +#else +void *default_btf = NULL; +#endif +char *btf_path = NULL; + +/***************************************************************** + * + * FUNCTIONS USED TO CLEAN MEMORY AND OPERATE SYSTEM FILES + * + *****************************************************************/ + +/** + * Close the collector gracefully + */ +static void ebpf_exit() +{ +#ifdef LIBBPF_MAJOR_VERSION + pthread_mutex_lock(&ebpf_exit_cleanup); + if (default_btf) { + btf__free(default_btf); + default_btf = NULL; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); +#endif + + char filename[FILENAME_MAX + 1]; + ebpf_pid_file(filename, FILENAME_MAX); + if (unlink(filename)) + error("Cannot remove PID file %s", filename); + + exit(0); +} + +/** + * Unload loegacy code + * + * @param objects objects loaded from eBPF programs + * @param probe_links links from loader + */ +static void ebpf_unload_legacy_code(struct bpf_object *objects, struct bpf_link **probe_links) +{ + if (!probe_links || !objects) + return; + + struct bpf_program *prog; + size_t j = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[j]); + j++; + } + freez(probe_links); + if (objects) + bpf_object__close(objects); +} + +int ebpf_exit_plugin = 0; +/** + * Close the collector gracefully + * + * @param sig is the signal number used to close the collector + */ +static void ebpf_stop_threads(int sig) +{ + UNUSED(sig); + static int only_one = 0; + + int i; + // Child thread should be closed by itself. + pthread_mutex_lock(&ebpf_exit_cleanup); + if (main_thread_id != gettid() || only_one) { + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + only_one = 1; + for (i = 0; ebpf_threads[i].name != NULL; i++) { + if (ebpf_threads[i].enabled != NETDATA_THREAD_EBPF_STOPPED) + netdata_thread_cancel(*ebpf_threads[i].thread); + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_exit_plugin = 1; + usec_t max = 3 * USEC_PER_SEC, step = 100000; + while (i && max) { + max -= step; + sleep_usec(step); + i = 0; + int j; + pthread_mutex_lock(&ebpf_exit_cleanup); + for (j = 0; ebpf_threads[j].name != NULL; j++) { + if (ebpf_threads[j].enabled != NETDATA_THREAD_EBPF_STOPPED) + i++; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + } + + //Unload threads(except sync and filesystem) + pthread_mutex_lock(&ebpf_exit_cleanup); + for (i = 0; ebpf_threads[i].name != NULL; i++) { + if (ebpf_threads[i].enabled == NETDATA_THREAD_EBPF_STOPPED && i != EBPF_MODULE_FILESYSTEM_IDX && + i != EBPF_MODULE_SYNC_IDX) + ebpf_unload_legacy_code(ebpf_modules[i].objects, ebpf_modules[i].probe_links); + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + //Unload filesystem + pthread_mutex_lock(&ebpf_exit_cleanup); + if (ebpf_threads[EBPF_MODULE_FILESYSTEM_IDX].enabled == NETDATA_THREAD_EBPF_STOPPED) { + for (i = 0; localfs[i].filesystem != NULL; i++) { + ebpf_unload_legacy_code(localfs[i].objects, localfs[i].probe_links); + } + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + //Unload Sync + pthread_mutex_lock(&ebpf_exit_cleanup); + if (ebpf_threads[EBPF_MODULE_SYNC_IDX].enabled == NETDATA_THREAD_EBPF_STOPPED) { + for (i = 0; local_syscalls[i].syscall != NULL; i++) { + ebpf_unload_legacy_code(local_syscalls[i].objects, local_syscalls[i].probe_links); + } + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_exit(); +} + +/***************************************************************** + * + * FUNCTIONS TO CREATE CHARTS + * + *****************************************************************/ + +/** + * Get a value from a structure. + * + * @param basis it is the first address of the structure + * @param offset it is the offset of the data you want to access. + * @return + */ +collected_number get_value_from_structure(char *basis, size_t offset) +{ + collected_number *value = (collected_number *)(basis + offset); + + collected_number ret = (collected_number)llabs(*value); + // this reset is necessary to avoid keep a constant value while processing is not executing a task + *value = 0; + + return ret; +} + +/** + * Write begin command on standard output + * + * @param family the chart family name + * @param name the chart name + */ +void write_begin_chart(char *family, char *name) +{ + printf("BEGIN %s.%s\n", family, name); +} + +/** + * Write END command on stdout. + */ +inline void write_end_chart() +{ + printf("END\n"); +} + +/** + * Write set command on standard output + * + * @param dim the dimension name + * @param value the value for the dimension + */ +void write_chart_dimension(char *dim, long long value) +{ + printf("SET %s = %lld\n", dim, value); +} + +/** + * Call the necessary functions to create a chart. + * + * @param name the chart name + * @param family the chart family + * @param move the pointer with the values that will be published + * @param end the number of values that will be written on standard output + * + * @return It returns a variable that maps the charts that did not have zero values. + */ +void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end) +{ + write_begin_chart(family, name); + + uint32_t i = 0; + while (move && i < end) { + write_chart_dimension(move->name, move->ncall); + + move = move->next; + i++; + } + + write_end_chart(); +} + +/** + * Call the necessary functions to create a chart. + * + * @param name the chart name + * @param family the chart family + * @param move the pointer with the values that will be published + * @param end the number of values that will be written on standard output + */ +void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, int end) +{ + write_begin_chart(family, name); + + int i = 0; + while (move && i < end) { + write_chart_dimension(move->name, move->nerr); + + move = move->next; + i++; + } + + write_end_chart(); +} + +/** + * Write charts + * + * Write the current information to publish the charts. + * + * @param family chart family + * @param chart chart id + * @param dim dimension name + * @param v1 value. + */ +void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1) +{ + write_begin_chart(family, chart); + + write_chart_dimension(dim, v1); + + write_end_chart(); +} + +/** + * Call the necessary functions to create a chart. + * + * @param chart the chart name + * @param family the chart family + * @param dwrite the dimension name + * @param vwrite the value for previous dimension + * @param dread the dimension name + * @param vread the value for previous dimension + * + * @return It returns a variable that maps the charts that did not have zero values. + */ +void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, char *dread, long long vread) +{ + write_begin_chart(family, chart); + + write_chart_dimension(dwrite, vwrite); + write_chart_dimension(dread, vread); + + write_end_chart(); +} + +/** + * Write chart cmd on standard output + * + * @param type chart type + * @param id chart id + * @param title chart title + * @param units units label + * @param family group name used to attach the chart on dashboard + * @param charttype chart type + * @param context chart context + * @param order chart order + * @param update_every update interval used by plugin + * @param module chart module name, this is the eBPF thread. + */ +void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *family, + char *charttype, char *context, int order, int update_every, char *module) +{ + printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d '' 'ebpf.plugin' '%s'\n", + type, + id, + title, + units, + (family)?family:"", + (context)?context:"", + (charttype)?charttype:"", + order, + update_every, + module); +} + +/** + * Write chart cmd on standard output + * + * @param type chart type + * @param id chart id + * @param title chart title + * @param units units label + * @param family group name used to attach the chart on dashboard + * @param charttype chart type + * @param context chart context + * @param order chart order + * @param update_every value to overwrite the update frequency set by the server. + */ +void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, + char *charttype, char *context, int order, int update_every) +{ + printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d 'obsolete'\n", + type, + id, + title, + units, + (family)?family:"", + (context)?context:"", + (charttype)?charttype:"", + order, + update_every); +} + +/** + * Write the dimension command on standard output + * + * @param name the dimension name + * @param id the dimension id + * @param algo the dimension algorithm + */ +void ebpf_write_global_dimension(char *name, char *id, char *algorithm) +{ + printf("DIMENSION %s %s %s 1 1\n", name, id, algorithm); +} + +/** + * Call ebpf_write_global_dimension to create the dimensions for a specific chart + * + * @param ptr a pointer to a structure of the type netdata_publish_syscall_t + * @param end the number of dimensions for the structure ptr + */ +void ebpf_create_global_dimension(void *ptr, int end) +{ + netdata_publish_syscall_t *move = ptr; + + int i = 0; + while (move && i < end) { + ebpf_write_global_dimension(move->name, move->dimension, move->algorithm); + + move = move->next; + i++; + } +} + +/** + * Call write_chart_cmd to create the charts + * + * @param type chart type + * @param id chart id + * @param title chart title + * @param units axis label + * @param family group name used to attach the chart on dashboard + * @param context chart context + * @param charttype chart type + * @param order order number of the specified chart + * @param ncd a pointer to a function called to create dimensions + * @param move a pointer for a structure that has the dimensions + * @param end number of dimensions for the chart created + * @param update_every update interval used with chart. + * @param module chart module name, this is the eBPF thread. + */ +void ebpf_create_chart(char *type, + char *id, + char *title, + char *units, + char *family, + char *context, + char *charttype, + int order, + void (*ncd)(void *, int), + void *move, + int end, + int update_every, + char *module) +{ + ebpf_write_chart_cmd(type, id, title, units, family, charttype, context, order, update_every, module); + + if (ncd) { + ncd(move, end); + } +} + +/** + * Create charts on apps submenu + * + * @param id the chart id + * @param title the value displayed on vertical axis. + * @param units the value displayed on vertical axis. + * @param family Submenu that the chart will be attached on dashboard. + * @param charttype chart type + * @param order the chart order + * @param algorithm the algorithm used by dimension + * @param root structure used to create the dimensions. + * @param update_every update interval used by plugin + * @param module chart module name, this is the eBPF thread. + */ +void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family, char *charttype, int order, + char *algorithm, struct target *root, int update_every, char *module) +{ + struct target *w; + ebpf_write_chart_cmd(NETDATA_APPS_FAMILY, id, title, units, family, charttype, NULL, order, + update_every, module); + + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' %s 1 1\n", w->name, algorithm); + } +} + +/** + * Call the necessary functions to create a name. + * + * @param family family name + * @param name chart name + * @param hist0 histogram values + * @param dimensions dimension values. + * @param end number of bins that will be sent to Netdata. + * + * @return It returns a variable that maps the charts that did not have zero values. + */ +void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end) +{ + write_begin_chart(family, name); + + uint32_t i; + for (i = 0; i < end; i++) { + write_chart_dimension(dimensions[i], (long long) hist[i]); + } + + write_end_chart(); + + fflush(stdout); +} + +/***************************************************************** + * + * FUNCTIONS TO DEFINE OPTIONS + * + *****************************************************************/ + +/** + * Define labels used to generate charts + * + * @param is structure with information about number of calls made for a function. + * @param pio structure used to generate charts. + * @param dim a pointer for the dimensions name + * @param name a pointer for the tensor with the name of the functions. + * @param algorithm a vector with the algorithms used to make the charts + * @param end the number of elements in the previous 4 arguments. + */ +void ebpf_global_labels(netdata_syscall_stat_t *is, netdata_publish_syscall_t *pio, char **dim, + char **name, int *algorithm, int end) +{ + int i; + + netdata_syscall_stat_t *prev = NULL; + netdata_publish_syscall_t *publish_prev = NULL; + for (i = 0; i < end; i++) { + if (prev) { + prev->next = &is[i]; + } + prev = &is[i]; + + pio[i].dimension = dim[i]; + pio[i].name = name[i]; + pio[i].algorithm = strdupz(ebpf_algorithms[algorithm[i]]); + if (publish_prev) { + publish_prev->next = &pio[i]; + } + publish_prev = &pio[i]; + } +} + +/** + * Define thread mode for all ebpf program. + * + * @param lmode the mode that will be used for them. + */ +static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode) +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].mode = lmode; + } +} + +/** + * Enable specific charts selected by user. + * + * @param em the structure that will be changed + * @param disable_apps the status about the apps charts. + * @param disable_cgroup the status about the cgroups charts. + */ +static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_apps, int disable_cgroup) +{ + em->enabled = CONFIG_BOOLEAN_YES; + + // oomkill stores data inside apps submenu, so it always need to have apps_enabled for plugin to create + // its chart, without this comparison eBPF.plugin will try to store invalid data when apps is disabled. + if (!disable_apps || !strcmp(em->thread_name, "oomkill")) { + em->apps_charts = NETDATA_EBPF_APPS_FLAG_YES; + } + + if (!disable_cgroup) { + em->cgroup_charts = CONFIG_BOOLEAN_YES; + } + + em->global_charts = CONFIG_BOOLEAN_YES; +} + +/** + * Enable all charts + * + * @param apps what is the current status of apps + * @param cgroups what is the current status of cgroups + */ +static inline void ebpf_enable_all_charts(int apps, int cgroups) +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_enable_specific_chart(&ebpf_modules[i], apps, cgroups); + } +} + +/** + * Disable all Global charts + * + * Disable charts + */ +static inline void disable_all_global_charts() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].enabled = 0; + ebpf_modules[i].global_charts = 0; + } +} + + +/** + * Enable the specified chart group + * + * @param idx the index of ebpf_modules that I am enabling + * @param disable_apps should I keep apps charts? + */ +static inline void ebpf_enable_chart(int idx, int disable_apps, int disable_cgroup) +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + if (i == idx) { + ebpf_enable_specific_chart(&ebpf_modules[i], disable_apps, disable_cgroup); + break; + } + } +} + +/** + * Disable APPs + * + * Disable charts for apps loading only global charts. + */ +static inline void ebpf_disable_apps() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].apps_charts = NETDATA_EBPF_APPS_FLAG_NO; + } +} + +/** + * Disable Cgroups + * + * Disable charts for apps loading only global charts. + */ +static inline void ebpf_disable_cgroups() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].cgroup_charts = 0; + } +} + +/** + * Update Disabled Plugins + * + * This function calls ebpf_update_stats to update statistics for collector. + * + * @param em a pointer to `struct ebpf_module` + */ +void ebpf_update_disabled_plugin_stats(ebpf_module_t *em) +{ + pthread_mutex_lock(&lock); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); +} + +/** + * Print help on standard error for user knows how to use the collector. + */ +void ebpf_print_help() +{ + const time_t t = time(NULL); + struct tm ct; + struct tm *test = localtime_r(&t, &ct); + int year; + if (test) + year = ct.tm_year; + else + year = 0; + + fprintf(stderr, + "\n" + " Netdata ebpf.plugin %s\n" + " Copyright (C) 2016-%d Costa Tsaousis \n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " This eBPF.plugin is a data collector plugin for netdata.\n" + "\n" + " This plugin only accepts long options with one or two dashes. The available command line options are:\n" + "\n" + " SECONDS Set the data collection frequency.\n" + "\n" + " [-]-help Show this help.\n" + "\n" + " [-]-version Show software version.\n" + "\n" + " [-]-global Disable charts per application and cgroup.\n" + "\n" + " [-]-all Enable all chart groups (global, apps, and cgroup), unless -g is also given.\n" + "\n" + " [-]-cachestat Enable charts related to process run time.\n" + "\n" + " [-]-dcstat Enable charts related to directory cache.\n" + "\n" + " [-]-disk Enable charts related to disk monitoring.\n" + "\n" + " [-]-filesystem Enable chart related to filesystem run time.\n" + "\n" + " [-]-hardirq Enable chart related to hard IRQ latency.\n" + "\n" + " [-]-mdflush Enable charts related to multi-device flush.\n" + "\n" + " [-]-mount Enable charts related to mount monitoring.\n" + "\n" + " [-]-net Enable network viewer charts.\n" + "\n" + " [-]-oomkill Enable chart related to OOM kill tracking.\n" + "\n" + " [-]-process Enable charts related to process run time.\n" + "\n" + " [-]-return Run the collector in return mode.\n" + "\n" + " [-]-shm Enable chart related to shared memory tracking.\n" + "\n" + " [-]-softirq Enable chart related to soft IRQ latency.\n" + "\n" + " [-]-sync Enable chart related to sync run time.\n" + "\n" + " [-]-swap Enable chart related to swap run time.\n" + "\n" + " [-]-vfs Enable chart related to vfs run time.\n" + "\n" + " [-]-legacy Load legacy eBPF programs.\n" + "\n" + " [-]-core Use CO-RE when available(Working in progress).\n" + "\n", + VERSION, + (year >= 116) ? year + 1900 : 2020); +} + +/***************************************************************** + * + * TRACEPOINT MANAGEMENT FUNCTIONS + * + *****************************************************************/ + +/** + * Enable a tracepoint. + * + * @return 0 on success, -1 on error. + */ +int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp) +{ + int test = ebpf_is_tracepoint_enabled(tp->class, tp->event); + + // err? + if (test == -1) { + return -1; + } + // disabled? + else if (test == 0) { + // enable it then. + if (ebpf_enable_tracing_values(tp->class, tp->event)) { + return -1; + } + } + + // enabled now or already was. + tp->enabled = true; + + return 0; +} + +/** + * Disable a tracepoint if it's enabled. + * + * @return 0 on success, -1 on error. + */ +int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp) +{ + int test = ebpf_is_tracepoint_enabled(tp->class, tp->event); + + // err? + if (test == -1) { + return -1; + } + // enabled? + else if (test == 1) { + // disable it then. + if (ebpf_disable_tracing_values(tp->class, tp->event)) { + return -1; + } + } + + // disable now or already was. + tp->enabled = false; + + return 0; +} + +/** + * Enable multiple tracepoints on a list of tracepoints which end when the + * class is NULL. + * + * @return the number of successful enables. + */ +uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps) +{ + uint32_t cnt = 0; + for (int i = 0; tps[i].class != NULL; i++) { + if (ebpf_enable_tracepoint(&tps[i]) == -1) { + infoerr("failed to enable tracepoint %s:%s", + tps[i].class, tps[i].event); + } + else { + cnt += 1; + } + } + return cnt; +} + +/***************************************************************** + * + * AUXILIARY FUNCTIONS USED DURING INITIALIZATION + * + *****************************************************************/ + +/** + * Read Local Ports + * + * Parse /proc/net/{tcp,udp} and get the ports Linux is listening. + * + * @param filename the proc file to parse. + * @param proto is the magic number associated to the protocol file we are reading. + */ +static void read_local_ports(char *filename, uint8_t proto) +{ + procfile *ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return; + + ff = procfile_readall(ff); + if (!ff) + return; + + size_t lines = procfile_lines(ff), l; + netdata_passive_connection_t values = {.counter = 0, .tgid = 0, .pid = 0}; + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + // This is header or end of file + if (unlikely(words < 14)) + continue; + + // https://elixir.bootlin.com/linux/v5.7.8/source/include/net/tcp_states.h + // 0A = TCP_LISTEN + if (strcmp("0A", procfile_lineword(ff, l, 5))) + continue; + + // Read local port + uint16_t port = (uint16_t)strtol(procfile_lineword(ff, l, 2), NULL, 16); + update_listen_table(htons(port), proto, &values); + } + + procfile_close(ff); +} + +/** + * Read Local addresseses + * + * Read the local address from the interfaces. + */ +static void read_local_addresses() +{ + struct ifaddrs *ifaddr, *ifa; + if (getifaddrs(&ifaddr) == -1) { + error("Cannot get the local IP addresses, it is no possible to do separation between inbound and outbound connections"); + return; + } + + char *notext = { "No text representation" }; + for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL) + continue; + + if ((ifa->ifa_addr->sa_family != AF_INET) && (ifa->ifa_addr->sa_family != AF_INET6)) + continue; + + ebpf_network_viewer_ip_list_t *w = callocz(1, sizeof(ebpf_network_viewer_ip_list_t)); + + int family = ifa->ifa_addr->sa_family; + w->ver = (uint8_t) family; + char text[INET6_ADDRSTRLEN]; + if (family == AF_INET) { + struct sockaddr_in *in = (struct sockaddr_in*) ifa->ifa_addr; + + w->first.addr32[0] = in->sin_addr.s_addr; + w->last.addr32[0] = in->sin_addr.s_addr; + + if (inet_ntop(AF_INET, w->first.addr8, text, INET_ADDRSTRLEN)) { + w->value = strdupz(text); + w->hash = simple_hash(text); + } else { + w->value = strdupz(notext); + w->hash = simple_hash(notext); + } + } else { + struct sockaddr_in6 *in6 = (struct sockaddr_in6*) ifa->ifa_addr; + + memcpy(w->first.addr8, (void *)&in6->sin6_addr, sizeof(struct in6_addr)); + memcpy(w->last.addr8, (void *)&in6->sin6_addr, sizeof(struct in6_addr)); + + if (inet_ntop(AF_INET6, w->first.addr8, text, INET_ADDRSTRLEN)) { + w->value = strdupz(text); + w->hash = simple_hash(text); + } else { + w->value = strdupz(notext); + w->hash = simple_hash(notext); + } + } + + fill_ip_list((family == AF_INET)?&network_viewer_opt.ipv4_local_ip:&network_viewer_opt.ipv6_local_ip, + w, + "selector"); + } + + freeifaddrs(ifaddr); +} + +/** + * Start Pthread Variable + * + * This function starts all pthread variables. + * + * @return It returns 0 on success and -1. + */ +int ebpf_start_pthread_variables() +{ + pthread_mutex_init(&lock, NULL); + pthread_mutex_init(&ebpf_exit_cleanup, NULL); + pthread_mutex_init(&collect_data_mutex, NULL); + + if (pthread_cond_init(&collect_data_cond_var, NULL)) { + error("Cannot start conditional variable to control Apps charts."); + return -1; + } + + return 0; +} + +/** + * Am I collecting PIDs? + * + * Test if eBPF plugin needs to collect PID information. + * + * @return It returns 1 if at least one thread needs to collect the data, or zero otherwise. + */ +static inline uint32_t ebpf_am_i_collect_pids() +{ + uint32_t ret = 0; + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ret |= ebpf_modules[i].cgroup_charts | (ebpf_modules[i].apps_charts & NETDATA_EBPF_APPS_FLAG_YES); + } + + return ret; +} + +/** + * Allocate the vectors used for all threads. + */ +static void ebpf_allocate_common_vectors() +{ + if (unlikely(!ebpf_am_i_collect_pids())) { + return; + } + + all_pids = callocz((size_t)pid_max, sizeof(struct pid_stat *)); + global_process_stat = callocz((size_t)ebpf_nprocs, sizeof(ebpf_process_stat_t)); +} + +/** + * Define how to load the ebpf programs + * + * @param ptr the option given by users + */ +static inline void how_to_load(char *ptr) +{ + if (!strcasecmp(ptr, EBPF_CFG_LOAD_MODE_RETURN)) + ebpf_set_thread_mode(MODE_RETURN); + else if (!strcasecmp(ptr, EBPF_CFG_LOAD_MODE_DEFAULT)) + ebpf_set_thread_mode(MODE_ENTRY); + else + error("the option %s for \"ebpf load mode\" is not a valid option.", ptr); +} + +/** + * Update interval + * + * Update default interval with value from user + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_update_interval(int update_every) +{ + int i; + int value = (int) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_UPDATE_EVERY, + update_every); + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].update_every = value; + } +} + +/** + * Update PID table size + * + * Update default size with value from user + */ +static void ebpf_update_table_size() +{ + int i; + uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, + EBPF_CFG_PID_SIZE, ND_EBPF_DEFAULT_PID_SIZE); + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].pid_map_size = value; + } +} + +/** + * Set Load mode + * + * @param origin specify the configuration file loaded + */ +static inline void ebpf_set_load_mode(netdata_ebpf_load_mode_t load, netdata_ebpf_load_mode_t origin) +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].load &= ~NETDATA_EBPF_LOAD_METHODS; + ebpf_modules[i].load |= load | origin ; + } +} + +/** + * Update mode + * + * @param str value read from configuration file. + * @param origin specify the configuration file loaded + */ +static inline void epbf_update_load_mode(char *str, netdata_ebpf_load_mode_t origin) +{ + netdata_ebpf_load_mode_t load = epbf_convert_string_to_load_mode(str); + + ebpf_set_load_mode(load, origin); +} + +/** + * Read collector values + * + * @param disable_apps variable to store information related to apps. + * @param disable_cgroups variable to store information related to cgroups. + * @param update_every value to overwrite the update frequency set by the server. + * @param origin specify the configuration file loaded + */ +static void read_collector_values(int *disable_apps, int *disable_cgroups, + int update_every, netdata_ebpf_load_mode_t origin) +{ + // Read global section + char *value; + if (appconfig_exists(&collector_config, EBPF_GLOBAL_SECTION, "load")) // Backward compatibility + value = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, "load", + EBPF_CFG_LOAD_MODE_DEFAULT); + else + value = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, + EBPF_CFG_LOAD_MODE_DEFAULT); + + how_to_load(value); + + btf_path = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_PROGRAM_PATH, + EBPF_DEFAULT_BTF_PATH); + +#ifdef LIBBPF_MAJOR_VERSION + default_btf = ebpf_load_btf_file(btf_path, EBPF_DEFAULT_BTF_FILE); +#endif + + value = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_TYPE_FORMAT, EBPF_CFG_DEFAULT_PROGRAM); + + epbf_update_load_mode(value, origin); + + ebpf_update_interval(update_every); + + ebpf_update_table_size(); + + // This is kept to keep compatibility + uint32_t enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, "disable apps", + CONFIG_BOOLEAN_NO); + if (!enabled) { + // Apps is a positive sentence, so we need to invert the values to disable apps. + enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_APPLICATION, + CONFIG_BOOLEAN_YES); + enabled = (enabled == CONFIG_BOOLEAN_NO)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_NO; + } + *disable_apps = (int)enabled; + + // Cgroup is a positive sentence, so we need to invert the values to disable apps. + // We are using the same pattern for cgroup and apps + enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_CGROUP, CONFIG_BOOLEAN_NO); + *disable_cgroups = (enabled == CONFIG_BOOLEAN_NO)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_NO; + + // Read ebpf programs section + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, + ebpf_modules[EBPF_MODULE_PROCESS_IDX].config_name, CONFIG_BOOLEAN_YES); + int started = 0; + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_apps, *disable_cgroups); + started++; + } + + // This is kept to keep compatibility + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "network viewer", + CONFIG_BOOLEAN_NO); + if (!enabled) + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, + ebpf_modules[EBPF_MODULE_SOCKET_IDX].config_name, + CONFIG_BOOLEAN_NO); + + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps, *disable_cgroups); + // Read network viewer section if network viewer is enabled + // This is kept here to keep backward compatibility + parse_network_viewer_section(&collector_config); + parse_service_name_section(&collector_config); + started++; + } + + // This is kept to keep compatibility + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "network connection monitoring", + CONFIG_BOOLEAN_NO); + if (!enabled) + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "network connections", + CONFIG_BOOLEAN_NO); + ebpf_modules[EBPF_MODULE_SOCKET_IDX].optional = (int)enabled; + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "cachestat", + CONFIG_BOOLEAN_NO); + + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "sync", + CONFIG_BOOLEAN_YES); + + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "dcstat", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "swap", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SWAP_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "vfs", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_VFS_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "filesystem", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_FILESYSTEM_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "disk", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mount", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_MOUNT_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "fd", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_FD_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "hardirq", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "softirq", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SOFTIRQ_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "oomkill", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_OOMKILL_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "shm", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SHM_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mdflush", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_MDFLUSH_IDX, *disable_apps, *disable_cgroups); + started++; + } + + if (!started){ + ebpf_enable_all_charts(*disable_apps, *disable_cgroups); + // Read network viewer section + // This is kept here to keep backward compatibility + parse_network_viewer_section(&collector_config); + parse_service_name_section(&collector_config); + } +} + +/** + * Load collector config + * + * @param path the path where the file ebpf.conf is stored. + * @param disable_apps variable to store the information about apps plugin status. + * @param disable_cgroups variable to store the information about cgroups plugin status. + * @param update_every value to overwrite the update frequency set by the server. + * + * @return 0 on success and -1 otherwise. + */ +static int load_collector_config(char *path, int *disable_apps, int *disable_cgroups, int update_every) +{ + char lpath[4096]; + netdata_ebpf_load_mode_t origin; + + snprintf(lpath, 4095, "%s/%s", path, NETDATA_EBPF_CONFIG_FILE); + if (!appconfig_load(&collector_config, lpath, 0, NULL)) { + snprintf(lpath, 4095, "%s/%s", path, NETDATA_EBPF_OLD_CONFIG_FILE); + if (!appconfig_load(&collector_config, lpath, 0, NULL)) { + return -1; + } + origin = EBPF_LOADED_FROM_STOCK; + } else + origin = EBPF_LOADED_FROM_USER; + + read_collector_values(disable_apps, disable_cgroups, update_every, origin); + + return 0; +} + +/** + * Set global variables reading environment variables + */ +void set_global_variables() +{ + // Get environment variables + ebpf_plugin_dir = getenv("NETDATA_PLUGINS_DIR"); + if (!ebpf_plugin_dir) + ebpf_plugin_dir = PLUGINS_DIR; + + ebpf_user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); + if (!ebpf_user_config_dir) + ebpf_user_config_dir = CONFIG_DIR; + + ebpf_stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); + if (!ebpf_stock_config_dir) + ebpf_stock_config_dir = LIBCONFIG_DIR; + + ebpf_configured_log_dir = getenv("NETDATA_LOG_DIR"); + if (!ebpf_configured_log_dir) + ebpf_configured_log_dir = LOG_DIR; + + ebpf_nprocs = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (ebpf_nprocs > NETDATA_MAX_PROCESSOR) { + ebpf_nprocs = NETDATA_MAX_PROCESSOR; + } + + isrh = get_redhat_release(); + pid_max = get_system_pid_max(); + running_on_kernel = ebpf_get_kernel_version(); +} + +/** + * Load collector config + */ +static inline void ebpf_load_thread_config() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_update_module(&ebpf_modules[i], default_btf, running_on_kernel, isrh); + } +} + +/** + * Parse arguments given from user. + * + * @param argc the number of arguments + * @param argv the pointer to the arguments + */ +static void ebpf_parse_args(int argc, char **argv) +{ + int disable_apps = 0; + int disable_cgroups = 1; + int freq = 0; + int option_index = 0; + uint64_t select_threads = 0; + static struct option long_options[] = { + {"process", no_argument, 0, 0 }, + {"net", no_argument, 0, 0 }, + {"cachestat", no_argument, 0, 0 }, + {"sync", no_argument, 0, 0 }, + {"dcstat", no_argument, 0, 0 }, + {"swap", no_argument, 0, 0 }, + {"vfs", no_argument, 0, 0 }, + {"filesystem", no_argument, 0, 0 }, + {"disk", no_argument, 0, 0 }, + {"mount", no_argument, 0, 0 }, + {"filedescriptor", no_argument, 0, 0 }, + {"hardirq", no_argument, 0, 0 }, + {"softirq", no_argument, 0, 0 }, + {"oomkill", no_argument, 0, 0 }, + {"shm", no_argument, 0, 0 }, + {"mdflush", no_argument, 0, 0 }, + /* INSERT NEW THREADS BEFORE THIS COMMENT TO KEEP COMPATIBILITY WITH enum ebpf_module_indexes */ + {"all", no_argument, 0, 0 }, + {"version", no_argument, 0, 0 }, + {"help", no_argument, 0, 0 }, + {"global", no_argument, 0, 0 }, + {"return", no_argument, 0, 0 }, + {"legacy", no_argument, 0, 0 }, + {"core", no_argument, 0, 0 }, + {0, 0, 0, 0} + }; + + memset(&network_viewer_opt, 0, sizeof(network_viewer_opt)); + network_viewer_opt.max_dim = NETDATA_NV_CAP_VALUE; + + if (argc > 1) { + int n = (int)str2l(argv[1]); + if (n > 0) { + freq = n; + } + } + + if (!freq) + freq = EBPF_DEFAULT_UPDATE_EVERY; + + if (load_collector_config(ebpf_user_config_dir, &disable_apps, &disable_cgroups, freq)) { + info( + "Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.", + ebpf_user_config_dir); + if (load_collector_config(ebpf_stock_config_dir, &disable_apps, &disable_cgroups, freq)) { + info("Does not have a stock file. It is starting with default options."); + } + } + + ebpf_load_thread_config(); + + while (1) { + int c = getopt_long_only(argc, argv, "", long_options, &option_index); + if (c == -1) + break; + + switch (option_index) { + case EBPF_MODULE_PROCESS_IDX: { + select_threads |= 1< 63) + length = 63; + + buffer[length] = '\0'; + old_pid = (pid_t)str2uint32_t(buffer); + } + fclose(fp); + + return old_pid; +} + +/** + * Kill previous process + * + * Kill previous process whether it was not closed. + * + * @param filename is the full name of the file. + * @param pid that identifies the process + */ +static void ebpf_kill_previous_process(char *filename, pid_t pid) +{ + pid_t old_pid = ebpf_read_previous_pid(filename); + if (!old_pid) + return; + + // Process is not running + char *prev_name = ebpf_get_process_name(old_pid); + if (!prev_name) + return; + + char *current_name = ebpf_get_process_name(pid); + + if (!strcmp(prev_name, current_name)) + kill(old_pid, SIGKILL); + + freez(prev_name); + freez(current_name); + + // wait few microseconds before start new plugin + sleep_usec(USEC_PER_MS * 300); +} + +/** + * PID file + * + * Write the filename for PID inside the given vector. + * + * @param filename vector where we will store the name. + * @param length number of bytes available in filename vector + */ +void ebpf_pid_file(char *filename, size_t length) +{ + snprintfz(filename, length, "%s%s/ebpf.d/ebpf.pid", netdata_configured_host_prefix, ebpf_plugin_dir); +} + +/** + * Manage PID + * + * This function kills another instance of eBPF whether it is necessary and update the file content. + * + * @param pid that identifies the process + */ +static void ebpf_manage_pid(pid_t pid) +{ + char filename[FILENAME_MAX + 1]; + ebpf_pid_file(filename, FILENAME_MAX); + + ebpf_kill_previous_process(filename, pid); + ebpf_update_pid_file(filename, pid); +} + +/** + * Set start routine + * + * Set static routine before threads to be created. + */ + static void ebpf_set_static_routine() + { + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_threads[i].start_routine = ebpf_modules[i].start_routine; + } + } + +/** + * Entry point + * + * @param argc the number of arguments + * @param argv the pointer to the arguments + * + * @return it returns 0 on success and another integer otherwise + */ +int main(int argc, char **argv) +{ + clocks_init(); + main_thread_id = gettid(); + + set_global_variables(); + ebpf_parse_args(argc, argv); + ebpf_manage_pid(getpid()); + + if (!has_condition_to_run(running_on_kernel)) { + error("The current collector cannot run on this kernel."); + return 2; + } + + if (!am_i_running_as_root()) { + error( + "ebpf.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities..", + (unsigned int)getuid(), (unsigned int)geteuid()); + return 3; + } + + // set name + program_name = "ebpf.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY }; + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + error("Setrlimit(RLIMIT_MEMLOCK)"); + return 4; + } + + signal(SIGINT, ebpf_stop_threads); + signal(SIGQUIT, ebpf_stop_threads); + signal(SIGTERM, ebpf_stop_threads); + signal(SIGPIPE, ebpf_stop_threads); + + if (ebpf_start_pthread_variables()) { + error("Cannot start mutex to control overall charts."); + ebpf_exit(); + } + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix() == -1) ebpf_exit(6); + + ebpf_allocate_common_vectors(); + +#ifdef LIBBPF_MAJOR_VERSION + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); +#endif + + read_local_addresses(); + read_local_ports("/proc/net/tcp", IPPROTO_TCP); + read_local_ports("/proc/net/tcp6", IPPROTO_TCP); + read_local_ports("/proc/net/udp", IPPROTO_UDP); + read_local_ports("/proc/net/udp6", IPPROTO_UDP); + + ebpf_set_static_routine(); + + int i; + for (i = 0; ebpf_threads[i].name != NULL; i++) { + struct netdata_static_thread *st = &ebpf_threads[i]; + + ebpf_module_t *em = &ebpf_modules[i]; + em->thread = st; + // We always initialize process, because it is responsible to take care of apps integration + if (em->enabled || !i) { + st->thread = mallocz(sizeof(netdata_thread_t)); + em->thread_id = i; + st->enabled = NETDATA_THREAD_EBPF_RUNNING; + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, em); + } else { + st->enabled = NETDATA_THREAD_EBPF_STOPPED; + } + } + + usec_t step = EBPF_DEFAULT_UPDATE_EVERY * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + //Plugin will be killed when it receives a signal + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + } + + return 0; +} diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf new file mode 100644 index 0000000..cf5c740 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -0,0 +1,69 @@ +# +# Global options +# +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change the setting +# `apps` and `cgroups` to 'no'. +# +# The `update every` option defines the number of seconds used to read data from kernel and send to netdata +# +# The `pid table size` defines the maximum number of PIDs stored in the application hash tables. +# +[global] + ebpf load mode = entry + apps = yes + cgroups = no + update every = 5 + pid table size = 32768 + btf path = /sys/kernel/btf/ + +# +# eBPF Programs +# +# The eBPF collector has the following eBPF programs: +# +# `cachestat` : Make charts for kernel functions related to page cache. +# `dcstat` : Make charts for kernel functions related to directory cache. +# `disk` : Monitor I/O latencies for disks +# `fd` : This eBPF program creates charts that show information about file manipulation. +# `filesystem`: Monitor calls for functions used to manipulate specific filesystems +# `hardirq` : Monitor latency of serving hardware interrupt requests (hard IRQs). +# `mdflush` : Monitors flush counts for multi-devices. +# `mount` : Monitor calls for syscalls mount and umount +# `oomkill` : This eBPF program creates a chart that shows which process got OOM killed and when. +# `process` : This eBPF program creates charts that show information about process life. +# `shm` : Monitor calls for syscalls shmget, shmat, shmdt and shmctl. +# `socket` : This eBPF program creates charts with information about `TCP` and `UDP` functions, including the +# bandwidth consumed by each. +# `softirq` : Monitor latency of serving software interrupt requests (soft IRQs). +# `sync` : Monitor calls for syscall sync(2). +# `swap` : Monitor calls for internal swap functions. +# `vfs` : This eBPF program creates charts that show information about process VFS IO, VFS file manipulation and +# files removed. +# +# When plugin detects that system has support to BTF, it enables integration with apps.plugin. +# +[ebpf programs] + cachestat = no + dcstat = no + disk = no + fd = yes + filesystem = no + hardirq = yes + mdflush = no + mount = yes + oomkill = yes + process = yes + shm = no + socket = yes + softirq = yes + sync = yes + swap = no + vfs = yes + network connections = no diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf new file mode 100644 index 0000000..52466be --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -0,0 +1,36 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `pid table size` defines the maximum number of PIDs stored inside the application hash table. +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `probe` : This is the same as legacy code. +# +# The `collect pid` option defines the PID stored inside hash tables and accepts the following options: +# `real parent`: Only stores real parent inside PID +# `parent` : Only stores parent PID. +# `all` : Stores all PIDs used by software. This is the most expensive option. +# +# Uncomment lines to define specific options for thread. +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + ebpf type format = auto + ebpf co-re tracing = trampoline + collect pid = real parent diff --git a/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/collectors/ebpf.plugin/ebpf.d/dcstat.conf new file mode 100644 index 0000000..8aed8f7 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -0,0 +1,34 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `probe` : This is the same as legacy code. +# +# The `collect pid` option defines the PID stored inside hash tables and accepts the following options: +# `real parent`: Only stores real parent inside PID +# `parent` : Only stores parent PID. +# `all` : Stores all PIDs used by software. This is the most expensive option. +# +# Uncomment lines to define specific options for thread. +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + ebpf type format = auto + ebpf co-re tracing = trampoline + collect pid = real parent diff --git a/collectors/ebpf.plugin/ebpf.d/disk.conf b/collectors/ebpf.plugin/ebpf.d/disk.conf new file mode 100644 index 0000000..4adf88e --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/disk.conf @@ -0,0 +1,9 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 10 + diff --git a/collectors/ebpf.plugin/ebpf.d/ebpf_kernel_reject_list.txt b/collectors/ebpf.plugin/ebpf.d/ebpf_kernel_reject_list.txt new file mode 100644 index 0000000..539bf35 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/ebpf_kernel_reject_list.txt @@ -0,0 +1 @@ +Ubuntu 4.18.0 diff --git a/collectors/ebpf.plugin/ebpf.d/fd.conf b/collectors/ebpf.plugin/ebpf.d/fd.conf new file mode 100644 index 0000000..8333520 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -0,0 +1,21 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `pid table size` defines the maximum number of PIDs stored inside the hash table. +# +# Uncomment lines to define specific options for thread. +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + ebpf type format = auto + ebpf co-re tracing = trampoline diff --git a/collectors/ebpf.plugin/ebpf.d/filesystem.conf b/collectors/ebpf.plugin/ebpf.d/filesystem.conf new file mode 100644 index 0000000..c5eb01e --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/filesystem.conf @@ -0,0 +1,20 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. +# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to +# 'no'. +# +#[global] +# ebpf load mode = entry +# update every = 10 + +# All filesystems are named as 'NAMEdist' where NAME is the filesystem name while 'dist' is a reference for distribution. +[filesystem] + btrfsdist = yes + ext4dist = yes + nfsdist = yes + xfsdist = yes + zfsdist = yes diff --git a/collectors/ebpf.plugin/ebpf.d/hardirq.conf b/collectors/ebpf.plugin/ebpf.d/hardirq.conf new file mode 100644 index 0000000..f2bae1d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/hardirq.conf @@ -0,0 +1,8 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 10 diff --git a/collectors/ebpf.plugin/ebpf.d/mdflush.conf b/collectors/ebpf.plugin/ebpf.d/mdflush.conf new file mode 100644 index 0000000..e65e867 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/mdflush.conf @@ -0,0 +1,7 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +#[global] +# ebpf load mode = entry +# update every = 1 diff --git a/collectors/ebpf.plugin/ebpf.d/mount.conf b/collectors/ebpf.plugin/ebpf.d/mount.conf new file mode 100644 index 0000000..fdd82f2 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/mount.conf @@ -0,0 +1,19 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. +# `probe` : This is the same as legacy code. +[global] +# ebpf load mode = entry +# update every = 1 + ebpf type format = auto + ebpf co-re tracing = trampoline diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf new file mode 100644 index 0000000..d939d8e --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/network.conf @@ -0,0 +1,53 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The following options change the hash table size: +# `bandwidth table size`: Maximum number of connections monitored +# `ipv4 connection table size`: Maximum number of IPV4 connections monitored +# `ipv6 connection table size`: Maximum number of IPV6 connections monitored +# `udp connection table size`: Maximum number of UDP connections monitored +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. +# `probe` : This is the same as legacy code. +# +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 + bandwidth table size = 16384 + ipv4 connection table size = 16384 + ipv6 connection table size = 16384 + udp connection table size = 4096 + ebpf type format = auto + ebpf co-re tracing = trampoline + +# +# Network Connection +# +# This is a feature with status WIP(Work in Progress) +# +[network connections] + maximum dimensions = 50 + resolve hostnames = no + resolve service names = no + ports = * + ips = !127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 !::1/128 + hostnames = * + +[service name] + 19999 = Netdata diff --git a/collectors/ebpf.plugin/ebpf.d/oomkill.conf b/collectors/ebpf.plugin/ebpf.d/oomkill.conf new file mode 100644 index 0000000..e65e867 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/oomkill.conf @@ -0,0 +1,7 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +#[global] +# ebpf load mode = entry +# update every = 1 diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf new file mode 100644 index 0000000..1da5f84 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/process.conf @@ -0,0 +1,25 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `pid table size` defines the maximum number of PIDs stored inside the hash table. +# +# The `collect pid` option defines the PID stored inside hash tables and accepts the following options: +# `real parent`: Only stores real parent inside PID +# `parent` : Only stores parent PID. +# `all` : Stores all PIDs used by software. This is the most expensive option. +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 +# collect pid = real parent diff --git a/collectors/ebpf.plugin/ebpf.d/shm.conf b/collectors/ebpf.plugin/ebpf.d/shm.conf new file mode 100644 index 0000000..23ab96d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -0,0 +1,36 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. +# `probe` : This is the same as legacy code. +# +# Uncomment lines to define specific options for thread. +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + ebpf type format = auto + ebpf co-re tracing = trampoline + +# List of monitored syscalls +[syscalls] + shmget = yes + shmat = yes + shmdt = yes + shmctl = yes diff --git a/collectors/ebpf.plugin/ebpf.d/softirq.conf b/collectors/ebpf.plugin/ebpf.d/softirq.conf new file mode 100644 index 0000000..f2bae1d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/softirq.conf @@ -0,0 +1,8 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 10 diff --git a/collectors/ebpf.plugin/ebpf.d/swap.conf b/collectors/ebpf.plugin/ebpf.d/swap.conf new file mode 100644 index 0000000..3986ae4 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -0,0 +1,28 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `probe` : This is the same as legacy code. +# +# Uncomment lines to define specific options for thread. +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + ebpf type format = auto + ebpf co-re tracing = trampoline diff --git a/collectors/ebpf.plugin/ebpf.d/sync.conf b/collectors/ebpf.plugin/ebpf.d/sync.conf new file mode 100644 index 0000000..ebec5d3 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/sync.conf @@ -0,0 +1,36 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. +# `probe` : This is the same as legacy code. +# +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 + ebpf type format = auto + ebpf co-re tracing = trampoline + +# List of monitored syscalls +[syscalls] + sync = yes + msync = yes + fsync = yes + fdatasync = yes + syncfs = yes + sync_file_range = yes diff --git a/collectors/ebpf.plugin/ebpf.d/vfs.conf b/collectors/ebpf.plugin/ebpf.d/vfs.conf new file mode 100644 index 0000000..fa5d5b4 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -0,0 +1,19 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# Uncomment lines to define specific options for thread. +[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + ebpf type format = auto + ebpf co-re tracing = trampoline diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h new file mode 100644 index 0000000..28b04ce --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.h @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COLLECTOR_EBPF_H +#define NETDATA_COLLECTOR_EBPF_H 1 + +#ifndef __FreeBSD__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +// From libnetdata.h +#include "libnetdata/threads/threads.h" +#include "libnetdata/locks/locks.h" +#include "libnetdata/avl/avl.h" +#include "libnetdata/clocks/clocks.h" +#include "libnetdata/config/appconfig.h" +#include "libnetdata/ebpf/ebpf.h" +#include "libnetdata/procfile/procfile.h" +#include "collectors/cgroups.plugin/sys_fs_cgroup.h" +#include "daemon/main.h" + +#include "ebpf_apps.h" +#include "ebpf_cgroup.h" + +#define NETDATA_EBPF_OLD_CONFIG_FILE "ebpf.conf" +#define NETDATA_EBPF_CONFIG_FILE "ebpf.d.conf" + +typedef struct netdata_syscall_stat { + unsigned long bytes; // total number of bytes + uint64_t call; // total number of calls + uint64_t ecall; // number of calls that returned error + struct netdata_syscall_stat *next; // Link list +} netdata_syscall_stat_t; + +typedef uint64_t netdata_idx_t; + +typedef struct netdata_publish_syscall { + char *dimension; + char *name; + char *algorithm; + unsigned long nbyte; + unsigned long pbyte; + uint64_t ncall; + uint64_t pcall; + uint64_t nerr; + uint64_t perr; + struct netdata_publish_syscall *next; +} netdata_publish_syscall_t; + +typedef struct netdata_publish_vfs_common { + long write; + long read; + + long running; + long zombie; +} netdata_publish_vfs_common_t; + +typedef struct netdata_error_report { + char comm[16]; + __u32 pid; + + int type; + int err; +} netdata_error_report_t; + +extern ebpf_module_t ebpf_modules[]; +enum ebpf_main_index { + EBPF_MODULE_PROCESS_IDX, + EBPF_MODULE_SOCKET_IDX, + EBPF_MODULE_CACHESTAT_IDX, + EBPF_MODULE_SYNC_IDX, + EBPF_MODULE_DCSTAT_IDX, + EBPF_MODULE_SWAP_IDX, + EBPF_MODULE_VFS_IDX, + EBPF_MODULE_FILESYSTEM_IDX, + EBPF_MODULE_DISK_IDX, + EBPF_MODULE_MOUNT_IDX, + EBPF_MODULE_FD_IDX, + EBPF_MODULE_HARDIRQ_IDX, + EBPF_MODULE_SOFTIRQ_IDX, + EBPF_MODULE_OOMKILL_IDX, + EBPF_MODULE_SHM_IDX, + EBPF_MODULE_MDFLUSH_IDX, + /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ + EBPF_OPTION_ALL_CHARTS, + EBPF_OPTION_VERSION, + EBPF_OPTION_HELP, + EBPF_OPTION_GLOBAL_CHART, + EBPF_OPTION_RETURN_MODE, + EBPF_OPTION_LEGACY, + EBPF_OPTION_CORE +}; + +typedef struct ebpf_tracepoint { + bool enabled; + char *class; + char *event; +} ebpf_tracepoint_t; + +enum ebpf_threads_status { + NETDATA_THREAD_EBPF_RUNNING, + NETDATA_THREAD_EBPF_STOPPING, + NETDATA_THREAD_EBPF_STOPPED +}; + +// Copied from musl header +#ifndef offsetof +#if __GNUC__ > 3 +#define offsetof(type, member) __builtin_offsetof(type, member) +#else +#define offsetof(type, member) ((size_t)((char *)&(((type *)0)->member) - (char *)0)) +#endif +#endif + +// Chart definitions +#define NETDATA_EBPF_FAMILY "ebpf" +#define NETDATA_EBPF_IP_FAMILY "ip" +#define NETDATA_FILESYSTEM_FAMILY "filesystem" +#define NETDATA_EBPF_MOUNT_GLOBAL_FAMILY "mount_points" +#define NETDATA_EBPF_CHART_TYPE_LINE "line" +#define NETDATA_EBPF_CHART_TYPE_STACKED "stacked" +#define NETDATA_EBPF_MEMORY_GROUP "mem" +#define NETDATA_EBPF_SYSTEM_GROUP "system" +#define NETDATA_SYSTEM_SWAP_SUBMENU "swap" +#define NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU "swap (eBPF)" +#define NETDATA_SYSTEM_IPC_SHM_SUBMENU "ipc shared memory" +#define NETDATA_MONITORING_FAMILY "netdata" + +// Statistics charts +#define NETDATA_EBPF_THREADS "ebpf_threads" +#define NETDATA_EBPF_LOAD_METHOD "ebpf_load_methods" + +// Log file +#define NETDATA_DEVELOPER_LOG_FILE "developer.log" + +// Maximum number of processors monitored on perf events +#define NETDATA_MAX_PROCESSOR 512 + +// Kernel versions calculated with the formula: +// R = MAJOR*65536 + MINOR*256 + PATCH +#define NETDATA_KERNEL_V5_3 328448 +#define NETDATA_KERNEL_V4_15 265984 + +#define EBPF_SYS_CLONE_IDX 11 +#define EBPF_MAX_MAPS 32 + +#define EBPF_DEFAULT_UPDATE_EVERY 10 + +enum ebpf_algorithms_list { + NETDATA_EBPF_ABSOLUTE_IDX, + NETDATA_EBPF_INCREMENTAL_IDX +}; + +// Threads +void *ebpf_process_thread(void *ptr); +void *ebpf_socket_thread(void *ptr); + +// Common variables +extern pthread_mutex_t lock; +extern pthread_mutex_t ebpf_exit_cleanup; +extern int ebpf_nprocs; +extern int running_on_kernel; +extern int isrh; +extern char *ebpf_plugin_dir; + +extern pthread_mutex_t collect_data_mutex; +extern pthread_cond_t collect_data_cond_var; + +// Common functions +void ebpf_global_labels(netdata_syscall_stat_t *is, + netdata_publish_syscall_t *pio, + char **dim, + char **name, + int *algorithm, + int end); + +void ebpf_write_chart_cmd(char *type, + char *id, + char *title, + char *units, + char *family, + char *charttype, + char *context, + int order, + int update_every, + char *module); + +void ebpf_write_global_dimension(char *name, char *id, char *algorithm); + +void ebpf_create_global_dimension(void *ptr, int end); + +void ebpf_create_chart(char *type, + char *id, + char *title, + char *units, + char *family, + char *context, + char *charttype, + int order, + void (*ncd)(void *, int), + void *move, + int end, + int update_every, + char *module); + +void write_begin_chart(char *family, char *name); + +void write_chart_dimension(char *dim, long long value); + +void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end); + +void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, int end); + +void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, + char *dread, long long vread); + +void ebpf_create_charts_on_apps(char *name, + char *title, + char *units, + char *family, + char *charttype, + int order, + char *algorithm, + struct target *root, + int update_every, + char *module); + +void write_end_chart(); + +void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps); + +int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp); +int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp); +uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps); + +void ebpf_pid_file(char *filename, size_t length); + +#define EBPF_PROGRAMS_SECTION "ebpf programs" + +#define EBPF_COMMON_DIMENSION_PERCENTAGE "%" +#define EBPF_COMMON_DIMENSION_CALL "calls/s" +#define EBPF_COMMON_DIMENSION_CONNECTIONS "connections/s" +#define EBPF_COMMON_DIMENSION_BITS "kilobits/s" +#define EBPF_COMMON_DIMENSION_BYTES "bytes/s" +#define EBPF_COMMON_DIMENSION_DIFFERENCE "difference" +#define EBPF_COMMON_DIMENSION_PACKETS "packets" +#define EBPF_COMMON_DIMENSION_FILES "files" +#define EBPF_COMMON_DIMENSION_MILLISECONDS "milliseconds" +#define EBPF_COMMON_DIMENSION_KILLS "kills" + +// Common variables +extern int debug_enabled; +extern struct pid_stat *root_of_pids; +extern ebpf_cgroup_target_t *ebpf_cgroup_pids; +extern char *ebpf_algorithms[]; +extern struct config collector_config; +extern ebpf_process_stat_t *global_process_stat; +extern netdata_ebpf_cgroup_shm_t shm_ebpf_cgroup; +extern int shm_fd_ebpf_cgroup; +extern sem_t *shm_sem_ebpf_cgroup; +extern pthread_mutex_t mutex_cgroup_shm; +extern size_t all_pids_count; +extern ebpf_plugin_stats_t plugin_statistics; +#ifdef LIBBPF_MAJOR_VERSION +extern struct btf *default_btf; +#else +extern void *default_btf; +#endif + +// Socket functions and variables +// Common functions +void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr); +void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr); +void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *root); +void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1); +collected_number get_value_from_structure(char *basis, size_t offset); +void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em); +void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, + char *charttype, char *context, int order, int update_every); +void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end); +void ebpf_update_disabled_plugin_stats(ebpf_module_t *em); +extern ebpf_filesystem_partitions_t localfs[]; +extern ebpf_sync_syscalls_t local_syscalls[]; +extern int ebpf_exit_plugin; + +#define EBPF_MAX_SYNCHRONIZATION_TIME 300 + +#endif /* NETDATA_COLLECTOR_EBPF_H */ diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c new file mode 100644 index 0000000..7519e06 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -0,0 +1,1155 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_socket.h" +#include "ebpf_apps.h" + +// ---------------------------------------------------------------------------- +// internal flags +// handled in code (automatically set) + +static int proc_pid_cmdline_is_needed = 0; // 1 when we need to read /proc/cmdline + +/***************************************************************** + * + * FUNCTIONS USED TO READ HASH TABLES + * + *****************************************************************/ + +/** + * Read statistic hash table. + * + * @param ep the output structure. + * @param fd the file descriptor mapped from kernel ring. + * @param pid the index used to select the data. + * @param bpf_map_lookup_elem a pointer for the function used to read data. + * + * @return It returns 0 when the data was copied and -1 otherwise + */ +int ebpf_read_hash_table(void *ep, int fd, uint32_t pid) +{ + if (!ep) + return -1; + + if (!bpf_map_lookup_elem(fd, &pid, ep)) + return 0; + + return -1; +} + +/** + * Read socket statistic + * + * Read information from kernel ring to user ring. + * + * @param ep the table with all process stats values. + * @param fd the file descriptor mapped from kernel + * @param ef a pointer for the functions mapped from dynamic library + * @param pids the list of pids associated to a target. + * + * @return + */ +size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep, int fd, struct pid_on_target *pids) +{ + size_t count = 0; + while (pids) { + uint32_t current_pid = pids->pid; + if (!ebpf_read_hash_table(ep[current_pid], fd, current_pid)) + count++; + + pids = pids->next; + } + + return count; +} + +/** + * Read bandwidth statistic using hash table + * + * @param out the output tensor that will receive the information. + * @param fd the file descriptor that has the data + * @param bpf_map_lookup_elem a pointer for the function to read the data + * @param bpf_map_get_next_key a pointer fo the function to read the index. + */ +size_t read_bandwidth_statistic_using_hash_table(ebpf_bandwidth_t **out, int fd) +{ + size_t count = 0; + uint32_t key = 0; + uint32_t next_key = 0; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + ebpf_bandwidth_t *eps = out[next_key]; + if (!eps) { + eps = callocz(1, sizeof(ebpf_process_stat_t)); + out[next_key] = eps; + } + ebpf_read_hash_table(eps, fd, next_key); + } + + return count; +} + +/***************************************************************** + * + * FUNCTIONS CALLED FROM COLLECTORS + * + *****************************************************************/ + +/** + * Am I running as Root + * + * Verify the user that is running the collector. + * + * @return It returns 1 for root and 0 otherwise. + */ +int am_i_running_as_root() +{ + uid_t uid = getuid(), euid = geteuid(); + + if (uid == 0 || euid == 0) { + return 1; + } + + return 0; +} + +/** + * Reset the target values + * + * @param root the pointer to the chain that will be reset. + * + * @return it returns the number of structures that was reset. + */ +size_t zero_all_targets(struct target *root) +{ + struct target *w; + size_t count = 0; + + for (w = root; w; w = w->next) { + count++; + + if (unlikely(w->root_pid)) { + struct pid_on_target *pid_on_target = w->root_pid; + + while (pid_on_target) { + struct pid_on_target *pid_on_target_to_free = pid_on_target; + pid_on_target = pid_on_target->next; + freez(pid_on_target_to_free); + } + + w->root_pid = NULL; + } + } + + return count; +} + +/** + * Clean the allocated structures + * + * @param agrt the pointer to be cleaned. + */ +void clean_apps_groups_target(struct target *agrt) +{ + struct target *current_target; + while (agrt) { + current_target = agrt; + agrt = current_target->target; + + freez(current_target); + } +} + +/** + * Find or create a new target + * there are targets that are just aggregated to other target (the second argument) + * + * @param id + * @param target + * @param name + * + * @return It returns the target on success and NULL otherwise + */ +struct target *get_apps_groups_target(struct target **agrt, const char *id, struct target *target, const char *name) +{ + int tdebug = 0, thidden = target ? target->hidden : 0, ends_with = 0; + const char *nid = id; + + // extract the options + while (nid[0] == '-' || nid[0] == '+' || nid[0] == '*') { + if (nid[0] == '-') + thidden = 1; + if (nid[0] == '+') + tdebug = 1; + if (nid[0] == '*') + ends_with = 1; + nid++; + } + uint32_t hash = simple_hash(id); + + // find if it already exists + struct target *w, *last = *agrt; + for (w = *agrt; w; w = w->next) { + if (w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0) + return w; + + last = w; + } + + // find an existing target + if (unlikely(!target)) { + while (*name == '-') { + if (*name == '-') + thidden = 1; + name++; + } + + for (target = *agrt; target != NULL; target = target->next) { + if (!target->target && strcmp(name, target->name) == 0) + break; + } + } + + if (target && target->target) + fatal( + "Internal Error: request to link process '%s' to target '%s' which is linked to target '%s'", id, + target->id, target->target->id); + + w = callocz(1, sizeof(struct target)); + strncpyz(w->id, nid, MAX_NAME); + w->idhash = simple_hash(w->id); + + if (unlikely(!target)) + // copy the name + strncpyz(w->name, name, MAX_NAME); + else + // copy the id + strncpyz(w->name, nid, MAX_NAME); + + strncpyz(w->compare, nid, MAX_COMPARE_NAME); + size_t len = strlen(w->compare); + if (w->compare[len - 1] == '*') { + w->compare[len - 1] = '\0'; + w->starts_with = 1; + } + w->ends_with = ends_with; + + if (w->starts_with && w->ends_with) + proc_pid_cmdline_is_needed = 1; + + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); + + w->hidden = thidden; +#ifdef NETDATA_INTERNAL_CHECKS + w->debug_enabled = tdebug; +#else + if (tdebug) + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif + w->target = target; + + // append it, to maintain the order in apps_groups.conf + if (last) + last->next = w; + else + *agrt = w; + + return w; +} + +/** + * Read the apps_groups.conf file + * + * @param agrt a pointer to apps_group_root_target + * @param path the directory to search apps_%s.conf + * @param file the word to complement the file name. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_read_apps_groups_conf(struct target **agdt, struct target **agrt, const char *path, const char *file) +{ + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", path, file); + + // ---------------------------------------- + + procfile *ff = procfile_open_no_log(filename, " :\t", PROCFILE_FLAG_DEFAULT); + if (!ff) + return -1; + + procfile_set_quotes(ff, "'\""); + + ff = procfile_readall(ff); + if (!ff) + return -1; + + size_t line, lines = procfile_lines(ff); + + for (line = 0; line < lines; line++) { + size_t word, words = procfile_linewords(ff, line); + if (!words) + continue; + + char *name = procfile_lineword(ff, line, 0); + if (!name || !*name) + continue; + + // find a possibly existing target + struct target *w = NULL; + + // loop through all words, skipping the first one (the name) + for (word = 0; word < words; word++) { + char *s = procfile_lineword(ff, line, word); + if (!s || !*s) + continue; + if (*s == '#') + break; + + // is this the first word? skip it + if (s == name) + continue; + + // add this target + struct target *n = get_apps_groups_target(agrt, s, w, name); + if (!n) { + error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); + continue; + } + + // just some optimization + // to avoid searching for a target for each process + if (!w) + w = n->target ? n->target : n; + } + } + + procfile_close(ff); + + *agdt = get_apps_groups_target(agrt, "p+!o@w#e$i^r&7*5(-i)l-o_", NULL, "other"); // match nothing + if (!*agdt) + fatal("Cannot create default target"); + + struct target *ptr = *agdt; + if (ptr->target) + *agdt = ptr->target; + + return 0; +} + +// the minimum PID of the system +// this is also the pid of the init process +#define INIT_PID 1 + +// ---------------------------------------------------------------------------- +// string lengths + +#define MAX_COMPARE_NAME 100 +#define MAX_NAME 100 +#define MAX_CMDLINE 16384 + +struct pid_stat **all_pids = NULL; // to avoid allocations, we pre-allocate the + // the entire pid space. +struct pid_stat *root_of_pids = NULL; // global list of all processes running + +size_t all_pids_count = 0; // the number of processes running + +struct target + *apps_groups_default_target = NULL, // the default target + *apps_groups_root_target = NULL, // apps_groups.conf defined + *users_root_target = NULL, // users + *groups_root_target = NULL; // user groups + +size_t apps_groups_targets_count = 0; // # of apps_groups.conf targets + +// ---------------------------------------------------------------------------- +// internal counters + +static size_t + // global_iterations_counter = 1, + calls_counter = 0, + // file_counter = 0, + // filenames_allocated_counter = 0, + // inodes_changed_counter = 0, + // links_changed_counter = 0, + targets_assignment_counter = 0; + +// ---------------------------------------------------------------------------- +// debugging + +// log each problem once per process +// log flood protection flags (log_thrown) +#define PID_LOG_IO 0x00000001 +#define PID_LOG_STATUS 0x00000002 +#define PID_LOG_CMDLINE 0x00000004 +#define PID_LOG_FDS 0x00000008 +#define PID_LOG_STAT 0x00000010 + +int debug_enabled = 0; + +#ifdef NETDATA_INTERNAL_CHECKS + +#define debug_log(fmt, args...) \ + do { \ + if (unlikely(debug_enabled)) \ + debug_log_int(fmt, ##args); \ + } while (0) + +#else + +static inline void debug_log_dummy(void) +{ +} +#define debug_log(fmt, args...) debug_log_dummy() + +#endif + +/** + * Managed log + * + * Store log information if it is necessary. + * + * @param p the pid stat structure + * @param log the log id + * @param status the return from a function. + * + * @return It returns the status value. + */ +static inline int managed_log(struct pid_stat *p, uint32_t log, int status) +{ + if (unlikely(!status)) { + // error("command failed log %u, errno %d", log, errno); + + if (unlikely(debug_enabled || errno != ENOENT)) { + if (unlikely(debug_enabled || !(p->log_thrown & log))) { + p->log_thrown |= log; + switch (log) { + case PID_LOG_IO: + error( + "Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, + p->comm); + break; + + case PID_LOG_STATUS: + error( + "Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, + p->comm); + break; + + case PID_LOG_CMDLINE: + error( + "Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, + p->comm); + break; + + case PID_LOG_FDS: + error( + "Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, + p->pid, p->comm); + break; + + case PID_LOG_STAT: + break; + + default: + error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + break; + } + } + } + errno = 0; + } else if (unlikely(p->log_thrown & log)) { + // error("unsetting log %u on pid %d", log, p->pid); + p->log_thrown &= ~log; + } + + return status; +} + +/** + * Get PID entry + * + * Get or allocate the PID entry for the specified pid. + * + * @param pid the pid to search the data. + * + * @return It returns the pid entry structure + */ +static inline struct pid_stat *get_pid_entry(pid_t pid) +{ + if (unlikely(all_pids[pid])) + return all_pids[pid]; + + struct pid_stat *p = callocz(1, sizeof(struct pid_stat)); + + if (likely(root_of_pids)) + root_of_pids->prev = p; + + p->next = root_of_pids; + root_of_pids = p; + + p->pid = pid; + + all_pids[pid] = p; + all_pids_count++; + + return p; +} + +/** + * Assign the PID to a target. + * + * @param p the pid_stat structure to assign for a target. + */ +static inline void assign_target_to_pid(struct pid_stat *p) +{ + targets_assignment_counter++; + + uint32_t hash = simple_hash(p->comm); + size_t pclen = strlen(p->comm); + + struct target *w; + for (w = apps_groups_root_target; w; w = w->next) { + // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); + + // find it - 4 cases: + // 1. the target is not a pattern + // 2. the target has the prefix + // 3. the target has the suffix + // 4. the target is something inside cmdline + + if (unlikely( + ((!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) || + (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) || + (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) || + (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && p->cmdline && strstr(p->cmdline, w->compare))))) { + if (w->target) + p->target = w->target; + else + p->target = w; + + if (debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("%s linked to target %s", p->comm, p->target->name); + + break; + } + } +} + +// ---------------------------------------------------------------------------- +// update pids from proc + +/** + * Read cmd line from /proc/PID/cmdline + * + * @param p the pid_stat_structure. + * + * @return It returns 1 on success and 0 otherwise. + */ +static inline int read_proc_pid_cmdline(struct pid_stat *p) +{ + static char cmdline[MAX_CMDLINE + 1]; + + if (unlikely(!p->cmdline_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); + p->cmdline_filename = strdupz(filename); + } + + int fd = open(p->cmdline_filename, procfile_open_flags, 0666); + if (unlikely(fd == -1)) + goto cleanup; + + ssize_t i, bytes = read(fd, cmdline, MAX_CMDLINE); + close(fd); + + if (unlikely(bytes < 0)) + goto cleanup; + + cmdline[bytes] = '\0'; + for (i = 0; i < bytes; i++) { + if (unlikely(!cmdline[i])) + cmdline[i] = ' '; + } + + if (p->cmdline) + freez(p->cmdline); + p->cmdline = strdupz(cmdline); + + debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); + + return 1; + +cleanup: + // copy the command to the command line + if (p->cmdline) + freez(p->cmdline); + p->cmdline = strdupz(p->comm); + return 0; +} + +/** + * Read information from /proc/PID/stat and /proc/PID/cmdline + * Assign target to pid + * + * @param p the pid stat structure to store the data. + * @param ptr an useless argument. + */ +static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) +{ + UNUSED(ptr); + + static procfile *ff = NULL; + + if (unlikely(!p->stat_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); + p->stat_filename = strdupz(filename); + } + + int set_quotes = (!ff) ? 1 : 0; + + struct stat statbuf; + if (stat(p->stat_filename, &statbuf)) + return 0; + + ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if (unlikely(!ff)) + return 0; + + if (unlikely(set_quotes)) + procfile_set_open_close(ff, "(", ")"); + + ff = procfile_readall(ff); + if (unlikely(!ff)) + return 0; + + p->last_stat_collected_usec = p->stat_collected_usec; + p->stat_collected_usec = now_monotonic_usec(); + calls_counter++; + + char *comm = procfile_lineword(ff, 0, 1); + p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + + if (strcmp(p->comm, comm) != 0) { + if (unlikely(debug_enabled)) { + if (p->comm[0]) + debug_log("\tpid %d (%s) changed name to '%s'", p->pid, p->comm, comm); + else + debug_log("\tJust added %d (%s)", p->pid, comm); + } + + strncpyz(p->comm, comm, MAX_COMPARE_NAME); + + // /proc//cmdline + if (likely(proc_pid_cmdline_is_needed)) + managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); + + assign_target_to_pid(p); + } + + if (unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int( + "READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu)", + netdata_configured_host_prefix, p->pid, p->comm, (p->target) ? p->target->name : "UNSET", + p->stat_collected_usec - p->last_stat_collected_usec); + + return 1; +} + +/** + * Collect data for PID + * + * @param pid the current pid that we are working + * @param ptr a NULL value + * + * @return It returns 1 on success and 0 otherwise + */ +static inline int collect_data_for_pid(pid_t pid, void *ptr) +{ + if (unlikely(pid < 0 || pid > pid_max)) { + error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); + return 0; + } + + struct pid_stat *p = get_pid_entry(pid); + if (unlikely(!p || p->read)) + return 0; + p->read = 1; + + if (unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p, ptr)))) + // there is no reason to proceed if we cannot get its status + return 0; + + // check its parent pid + if (unlikely(p->ppid < 0 || p->ppid > pid_max)) { + error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + p->ppid = 0; + } + + // mark it as updated + p->updated = 1; + p->keep = 0; + p->keeploops = 0; + + return 1; +} + +/** + * Fill link list of parents with children PIDs + */ +static inline void link_all_processes_to_their_parents(void) +{ + struct pid_stat *p, *pp; + + // link all children to their parents + // and update children count on parents + for (p = root_of_pids; p; p = p->next) { + // for each process found + + p->sortlist = 0; + p->parent = NULL; + + if (unlikely(!p->ppid)) { + p->parent = NULL; + continue; + } + + pp = all_pids[p->ppid]; + if (likely(pp)) { + p->parent = pp; + pp->children_count++; + + if (unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int( + "child %d (%s, %s) on target '%s' has parent %d (%s, %s).", p->pid, p->comm, + p->updated ? "running" : "exited", (p->target) ? p->target->name : "UNSET", pp->pid, pp->comm, + pp->updated ? "running" : "exited"); + } else { + p->parent = NULL; + debug_log("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); + } + } +} + +/** + * Aggregate PIDs to targets. + */ +static void apply_apps_groups_targets_inheritance(void) +{ + struct pid_stat *p = NULL; + + // children that do not have a target + // inherit their target from their parent + int found = 1, loops = 0; + while (found) { + if (unlikely(debug_enabled)) + loops++; + found = 0; + for (p = root_of_pids; p; p = p->next) { + // if this process does not have a target + // and it has a parent + // and its parent has a target + // then, set the parent's target to this process + if (unlikely(!p->target && p->parent && p->parent->target)) { + p->target = p->parent->target; + found++; + + if (debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int( + "TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, + p->pid, p->comm, p->parent->pid, p->parent->comm); + } + } + } + + // find all the procs with 0 childs and merge them to their parents + // repeat, until nothing more can be done. + int sortlist = 1; + found = 1; + while (found) { + if (unlikely(debug_enabled)) + loops++; + found = 0; + + for (p = root_of_pids; p; p = p->next) { + if (unlikely(!p->sortlist && !p->children_count)) + p->sortlist = sortlist++; + + if (unlikely( + !p->children_count // if this process does not have any children + && !p->merged // and is not already merged + && p->parent // and has a parent + && p->parent->children_count // and its parent has children + // and the target of this process and its parent is the same, + // or the parent does not have a target + && (p->target == p->parent->target || !p->parent->target) && + p->ppid != INIT_PID // and its parent is not init + )) { + // mark it as merged + p->parent->children_count--; + p->merged = 1; + + // the parent inherits the child's target, if it does not have a target itself + if (unlikely(p->target && !p->parent->target)) { + p->parent->target = p->target; + + if (debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int( + "TARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).", p->target->name, + p->parent->pid, p->parent->comm, p->pid, p->comm); + } + + found++; + } + } + + debug_log("TARGET INHERITANCE: merged %d processes", found); + } + + // init goes always to default target + if (all_pids[INIT_PID]) + all_pids[INIT_PID]->target = apps_groups_default_target; + + // pid 0 goes always to default target + if (all_pids[0]) + all_pids[0]->target = apps_groups_default_target; + + // give a default target on all top level processes + if (unlikely(debug_enabled)) + loops++; + for (p = root_of_pids; p; p = p->next) { + // if the process is not merged itself + // then is is a top level process + if (unlikely(!p->merged && !p->target)) + p->target = apps_groups_default_target; + + // make sure all processes have a sortlist + if (unlikely(!p->sortlist)) + p->sortlist = sortlist++; + } + + if (all_pids[1]) + all_pids[1]->sortlist = sortlist++; + + // give a target to all merged child processes + found = 1; + while (found) { + if (unlikely(debug_enabled)) + loops++; + found = 0; + for (p = root_of_pids; p; p = p->next) { + if (unlikely(!p->target && p->merged && p->parent && p->parent->target)) { + p->target = p->parent->target; + found++; + + if (debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int( + "TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.", + p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + } + } + } + + debug_log("apply_apps_groups_targets_inheritance() made %d loops on the process tree", loops); +} + +/** + * Update target timestamp. + * + * @param root the targets that will be updated. + */ +static inline void post_aggregate_targets(struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (w->collected_starttime) { + if (!w->starttime || w->collected_starttime < w->starttime) { + w->starttime = w->collected_starttime; + } + } else { + w->starttime = 0; + } + } +} + +/** + * Remove PID from the link list. + * + * @param pid the PID that will be removed. + */ +static inline void del_pid_entry(pid_t pid) +{ + struct pid_stat *p = all_pids[pid]; + + if (unlikely(!p)) { + error("attempted to free pid %d that is not allocated.", pid); + return; + } + + debug_log("process %d %s exited, deleting it.", pid, p->comm); + + if (root_of_pids == p) + root_of_pids = p->next; + + if (p->next) + p->next->prev = p->prev; + if (p->prev) + p->prev->next = p->next; + + freez(p->stat_filename); + freez(p->status_filename); + freez(p->io_filename); + freez(p->cmdline_filename); + freez(p->cmdline); + freez(p); + + all_pids[pid] = NULL; + all_pids_count--; +} + +/** + * Get command string associated with a PID. + * This can only safely be used when holding the `collect_data_mutex` lock. + * + * @param pid the pid to search the data. + * @param n the maximum amount of bytes to copy into dest. + * if this is greater than the size of the command, it is clipped. + * @param dest the target memory buffer to write the command into. + * @return -1 if the PID hasn't been scraped yet, 0 otherwise. + */ +int get_pid_comm(pid_t pid, size_t n, char *dest) +{ + struct pid_stat *stat; + + stat = all_pids[pid]; + if (unlikely(stat == NULL)) { + return -1; + } + + if (unlikely(n > sizeof(stat->comm))) { + n = sizeof(stat->comm); + } + + strncpyz(dest, stat->comm, n); + return 0; +} + +/** + * Cleanup variable from other threads + * + * @param pid current pid. + */ +void cleanup_variables_from_other_threads(uint32_t pid) +{ + // Clean socket structures + if (socket_bandwidth_curr) { + freez(socket_bandwidth_curr[pid]); + socket_bandwidth_curr[pid] = NULL; + } + + // Clean cachestat structure + if (cachestat_pid) { + freez(cachestat_pid[pid]); + cachestat_pid[pid] = NULL; + } + + // Clean directory cache structure + if (dcstat_pid) { + freez(dcstat_pid[pid]); + dcstat_pid[pid] = NULL; + } + + // Clean swap structure + if (swap_pid) { + freez(swap_pid[pid]); + swap_pid[pid] = NULL; + } + + // Clean vfs structure + if (vfs_pid) { + freez(vfs_pid[pid]); + vfs_pid[pid] = NULL; + } + + // Clean fd structure + if (fd_pid) { + freez(fd_pid[pid]); + fd_pid[pid] = NULL; + } + + // Clean shm structure + if (shm_pid) { + freez(shm_pid[pid]); + shm_pid[pid] = NULL; + } +} + +/** + * Remove PIDs when they are not running more. + */ +void cleanup_exited_pids() +{ + struct pid_stat *p = NULL; + + for (p = root_of_pids; p;) { + if (!p->updated && (!p->keep || p->keeploops > 0)) { + if (unlikely(debug_enabled && (p->keep || p->keeploops))) + debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); + + pid_t r = p->pid; + p = p->next; + + // Clean process structure + freez(global_process_stats[r]); + global_process_stats[r] = NULL; + + freez(current_apps_data[r]); + current_apps_data[r] = NULL; + + cleanup_variables_from_other_threads(r); + + del_pid_entry(r); + } else { + if (unlikely(p->keep)) + p->keeploops++; + p->keep = 0; + p = p->next; + } + } +} + +/** + * Read proc filesystem for the first time. + * + * @return It returns 0 on success and -1 otherwise. + */ +static inline void read_proc_filesystem() +{ + char dirname[FILENAME_MAX + 1]; + + snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); + DIR *dir = opendir(dirname); + if (!dir) + return; + + struct dirent *de = NULL; + + while ((de = readdir(dir))) { + char *endptr = de->d_name; + + if (unlikely(de->d_type != DT_DIR || de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + pid_t pid = (pid_t)strtoul(de->d_name, &endptr, 10); + + // make sure we read a valid number + if (unlikely(endptr == de->d_name || *endptr != '\0')) + continue; + + collect_data_for_pid(pid, NULL); + } + closedir(dir); +} + +/** + * Aggregated PID on target + * + * @param w the target output + * @param p the pid with information to update + * @param o never used + */ +static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) +{ + UNUSED(o); + + if (unlikely(!p->updated)) { + // the process is not running + return; + } + + if (unlikely(!w)) { + error("pid %d %s was left without a target!", p->pid, p->comm); + return; + } + + w->processes++; + struct pid_on_target *pid_on_target = mallocz(sizeof(struct pid_on_target)); + pid_on_target->pid = p->pid; + pid_on_target->next = w->root_pid; + w->root_pid = pid_on_target; +} + +/** + * Collect data for all process + * + * Read data from hash table and store it in appropriate vectors. + * It also creates the link between targets and PIDs. + * + * @param tbl_pid_stats_fd The mapped file descriptor for the hash table. + */ +void collect_data_for_all_processes(int tbl_pid_stats_fd) +{ + if (unlikely(!all_pids)) + return; + + struct pid_stat *pids = root_of_pids; // global list of all processes running + while (pids) { + if (pids->updated_twice) { + pids->read = 0; // mark it as not read, so that collect_data_for_pid() will read it + pids->updated = 0; + pids->merged = 0; + pids->children_count = 0; + pids->parent = NULL; + } else { + if (pids->updated) + pids->updated_twice = 1; + } + + pids = pids->next; + } + + read_proc_filesystem(); + + uint32_t key; + pids = root_of_pids; // global list of all processes running + // while (bpf_map_get_next_key(tbl_pid_stats_fd, &key, &next_key) == 0) { + while (pids) { + key = pids->pid; + ebpf_process_stat_t *w = global_process_stats[key]; + if (!w) { + w = callocz(1, sizeof(ebpf_process_stat_t)); + global_process_stats[key] = w; + } + + if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, w)) { + // Clean Process structures + freez(w); + global_process_stats[key] = NULL; + + freez(current_apps_data[key]); + current_apps_data[key] = NULL; + + cleanup_variables_from_other_threads(key); + + pids = pids->next; + continue; + } + + pids = pids->next; + } + + link_all_processes_to_their_parents(); + + apply_apps_groups_targets_inheritance(); + + apps_groups_targets_count = zero_all_targets(apps_groups_root_target); + + // this has to be done, before the cleanup + // // concentrate everything on the targets + for (pids = root_of_pids; pids; pids = pids->next) + aggregate_pid_on_target(pids->target, pids, NULL); + + post_aggregate_targets(apps_groups_root_target); +} diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h new file mode 100644 index 0000000..0bea912 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_APPS_H +#define NETDATA_EBPF_APPS_H 1 + +#include "libnetdata/threads/threads.h" +#include "libnetdata/locks/locks.h" +#include "libnetdata/avl/avl.h" +#include "libnetdata/clocks/clocks.h" +#include "libnetdata/config/appconfig.h" +#include "libnetdata/ebpf/ebpf.h" + +#define NETDATA_APPS_FAMILY "apps" +#define NETDATA_APPS_FILE_GROUP "file_access" +#define NETDATA_APPS_FILE_CGROUP_GROUP "file_access (eBPF)" +#define NETDATA_APPS_PROCESS_GROUP "process (eBPF)" +#define NETDATA_APPS_NET_GROUP "net" +#define NETDATA_APPS_IPC_SHM_GROUP "ipc shm (eBPF)" + +#include "ebpf_process.h" +#include "ebpf_dcstat.h" +#include "ebpf_disk.h" +#include "ebpf_fd.h" +#include "ebpf_filesystem.h" +#include "ebpf_hardirq.h" +#include "ebpf_cachestat.h" +#include "ebpf_mdflush.h" +#include "ebpf_mount.h" +#include "ebpf_oomkill.h" +#include "ebpf_shm.h" +#include "ebpf_socket.h" +#include "ebpf_softirq.h" +#include "ebpf_sync.h" +#include "ebpf_swap.h" +#include "ebpf_vfs.h" + +#define MAX_COMPARE_NAME 100 +#define MAX_NAME 100 + +// ---------------------------------------------------------------------------- +// process_pid_stat +// +// Fields read from the kernel ring for a specific PID +// +typedef struct process_pid_stat { + uint64_t pid_tgid; // Unique identifier + uint32_t pid; // process id + + // Count number of calls done for specific function + uint32_t open_call; + uint32_t write_call; + uint32_t writev_call; + uint32_t read_call; + uint32_t readv_call; + uint32_t unlink_call; + uint32_t exit_call; + uint32_t release_call; + uint32_t fork_call; + uint32_t clone_call; + uint32_t close_call; + + // Count number of bytes written or read + uint64_t write_bytes; + uint64_t writev_bytes; + uint64_t readv_bytes; + uint64_t read_bytes; + + // Count number of errors for the specified function + uint32_t open_err; + uint32_t write_err; + uint32_t writev_err; + uint32_t read_err; + uint32_t readv_err; + uint32_t unlink_err; + uint32_t fork_err; + uint32_t clone_err; + uint32_t close_err; +} process_pid_stat_t; + +// ---------------------------------------------------------------------------- +// socket_bandwidth +// +// Fields read from the kernel ring for a specific PID +// +typedef struct socket_bandwidth { + uint64_t first; + uint64_t ct; + uint64_t sent; + uint64_t received; + unsigned char removed; +} socket_bandwidth_t; + +// ---------------------------------------------------------------------------- +// pid_stat +// +// structure to store data for each process running +// see: man proc for the description of the fields + +struct pid_fd { + int fd; + +#ifndef __FreeBSD__ + ino_t inode; + char *filename; + uint32_t link_hash; + size_t cache_iterations_counter; + size_t cache_iterations_reset; +#endif +}; + +struct target { + char compare[MAX_COMPARE_NAME + 1]; + uint32_t comparehash; + size_t comparelen; + + char id[MAX_NAME + 1]; + uint32_t idhash; + + char name[MAX_NAME + 1]; + + uid_t uid; + gid_t gid; + + // Changes made to simplify integration between apps and eBPF. + netdata_publish_cachestat_t cachestat; + netdata_publish_dcstat_t dcstat; + netdata_publish_swap_t swap; + netdata_publish_vfs_t vfs; + netdata_fd_stat_t fd; + netdata_publish_shm_t shm; + + /* These variables are not necessary for eBPF collector + kernel_uint_t minflt; + kernel_uint_t cminflt; + kernel_uint_t majflt; + kernel_uint_t cmajflt; + kernel_uint_t utime; + kernel_uint_t stime; + kernel_uint_t gtime; + kernel_uint_t cutime; + kernel_uint_t cstime; + kernel_uint_t cgtime; + kernel_uint_t num_threads; + // kernel_uint_t rss; + + kernel_uint_t status_vmsize; + kernel_uint_t status_vmrss; + kernel_uint_t status_vmshared; + kernel_uint_t status_rssfile; + kernel_uint_t status_rssshmem; + kernel_uint_t status_vmswap; + + kernel_uint_t io_logical_bytes_read; + kernel_uint_t io_logical_bytes_written; + // kernel_uint_t io_read_calls; + // kernel_uint_t io_write_calls; + kernel_uint_t io_storage_bytes_read; + kernel_uint_t io_storage_bytes_written; + // kernel_uint_t io_cancelled_write_bytes; + + int *target_fds; + int target_fds_size; + + kernel_uint_t openfiles; + kernel_uint_t openpipes; + kernel_uint_t opensockets; + kernel_uint_t openinotifies; + kernel_uint_t openeventfds; + kernel_uint_t opentimerfds; + kernel_uint_t opensignalfds; + kernel_uint_t openeventpolls; + kernel_uint_t openother; + */ + + kernel_uint_t starttime; + kernel_uint_t collected_starttime; + + /* + kernel_uint_t uptime_min; + kernel_uint_t uptime_sum; + kernel_uint_t uptime_max; + */ + + unsigned int processes; // how many processes have been merged to this + int exposed; // if set, we have sent this to netdata + int hidden; // if set, we set the hidden flag on the dimension + int debug_enabled; + int ends_with; + int starts_with; // if set, the compare string matches only the + // beginning of the command + + struct pid_on_target *root_pid; // list of aggregated pids for target debugging + + struct target *target; // the one that will be reported to netdata + struct target *next; +}; + +extern struct target *apps_groups_default_target; +extern struct target *apps_groups_root_target; +extern struct target *users_root_target; +extern struct target *groups_root_target; + +struct pid_stat { + int32_t pid; + char comm[MAX_COMPARE_NAME + 1]; + char *cmdline; + + uint32_t log_thrown; + + // char state; + int32_t ppid; + + // int32_t pgrp; + // int32_t session; + // int32_t tty_nr; + // int32_t tpgid; + // uint64_t flags; + + /* + // these are raw values collected + kernel_uint_t minflt_raw; + kernel_uint_t cminflt_raw; + kernel_uint_t majflt_raw; + kernel_uint_t cmajflt_raw; + kernel_uint_t utime_raw; + kernel_uint_t stime_raw; + kernel_uint_t gtime_raw; // guest_time + kernel_uint_t cutime_raw; + kernel_uint_t cstime_raw; + kernel_uint_t cgtime_raw; // cguest_time + + // these are rates + kernel_uint_t minflt; + kernel_uint_t cminflt; + kernel_uint_t majflt; + kernel_uint_t cmajflt; + kernel_uint_t utime; + kernel_uint_t stime; + kernel_uint_t gtime; + kernel_uint_t cutime; + kernel_uint_t cstime; + kernel_uint_t cgtime; + + // int64_t priority; + // int64_t nice; + int32_t num_threads; + // int64_t itrealvalue; + kernel_uint_t collected_starttime; + // kernel_uint_t vsize; + // kernel_uint_t rss; + // kernel_uint_t rsslim; + // kernel_uint_t starcode; + // kernel_uint_t endcode; + // kernel_uint_t startstack; + // kernel_uint_t kstkesp; + // kernel_uint_t kstkeip; + // uint64_t signal; + // uint64_t blocked; + // uint64_t sigignore; + // uint64_t sigcatch; + // uint64_t wchan; + // uint64_t nswap; + // uint64_t cnswap; + // int32_t exit_signal; + // int32_t processor; + // uint32_t rt_priority; + // uint32_t policy; + // kernel_uint_t delayacct_blkio_ticks; + + uid_t uid; + gid_t gid; + + kernel_uint_t status_vmsize; + kernel_uint_t status_vmrss; + kernel_uint_t status_vmshared; + kernel_uint_t status_rssfile; + kernel_uint_t status_rssshmem; + kernel_uint_t status_vmswap; +#ifndef __FreeBSD__ + ARL_BASE *status_arl; +#endif + + kernel_uint_t io_logical_bytes_read_raw; + kernel_uint_t io_logical_bytes_written_raw; + // kernel_uint_t io_read_calls_raw; + // kernel_uint_t io_write_calls_raw; + kernel_uint_t io_storage_bytes_read_raw; + kernel_uint_t io_storage_bytes_written_raw; + // kernel_uint_t io_cancelled_write_bytes_raw; + + kernel_uint_t io_logical_bytes_read; + kernel_uint_t io_logical_bytes_written; + // kernel_uint_t io_read_calls; + // kernel_uint_t io_write_calls; + kernel_uint_t io_storage_bytes_read; + kernel_uint_t io_storage_bytes_written; + // kernel_uint_t io_cancelled_write_bytes; + */ + + struct pid_fd *fds; // array of fds it uses + size_t fds_size; // the size of the fds array + + int children_count; // number of processes directly referencing this + unsigned char keep : 1; // 1 when we need to keep this process in memory even after it exited + int keeploops; // increases by 1 every time keep is 1 and updated 0 + unsigned char updated : 1; // 1 when the process is currently running + unsigned char updated_twice : 1; // 1 when the process was running in the previous iteration + unsigned char merged : 1; // 1 when it has been merged to its parent + unsigned char read : 1; // 1 when we have already read this process for this iteration + + int sortlist; // higher numbers = top on the process tree + + // each process gets a unique number + + struct target *target; // app_groups.conf targets + struct target *user_target; // uid based targets + struct target *group_target; // gid based targets + + usec_t stat_collected_usec; + usec_t last_stat_collected_usec; + + usec_t io_collected_usec; + usec_t last_io_collected_usec; + + kernel_uint_t uptime; + + char *fds_dirname; // the full directory name in /proc/PID/fd + + char *stat_filename; + char *status_filename; + char *io_filename; + char *cmdline_filename; + + struct pid_stat *parent; + struct pid_stat *prev; + struct pid_stat *next; +}; + +// ---------------------------------------------------------------------------- +// target +// +// target is the structure that processes are aggregated to be reported +// to netdata. +// +// - Each entry in /etc/apps_groups.conf creates a target. +// - Each user and group used by a process in the system, creates a target. +struct pid_on_target { + int32_t pid; + struct pid_on_target *next; +}; + +// ---------------------------------------------------------------------------- +// Structures used to read information from kernel ring +typedef struct ebpf_process_stat { + uint64_t pid_tgid; + uint32_t pid; + + //Counter + uint32_t exit_call; + uint32_t release_call; + uint32_t create_process; + uint32_t create_thread; + + //Counter + uint32_t task_err; + + uint8_t removeme; +} ebpf_process_stat_t; + +typedef struct ebpf_bandwidth { + uint32_t pid; + + uint64_t first; // First timestamp + uint64_t ct; // Last timestamp + uint64_t bytes_sent; // Bytes sent + uint64_t bytes_received; // Bytes received + uint64_t call_tcp_sent; // Number of times tcp_sendmsg was called + uint64_t call_tcp_received; // Number of times tcp_cleanup_rbuf was called + uint64_t retransmit; // Number of times tcp_retransmit was called + uint64_t call_udp_sent; // Number of times udp_sendmsg was called + uint64_t call_udp_received; // Number of times udp_recvmsg was called + uint64_t close; // Number of times tcp_close was called + uint64_t drop; // THIS IS NOT USED FOR WHILE, we are in groom section + uint32_t tcp_v4_connection; // Number of times tcp_v4_connection was called. + uint32_t tcp_v6_connection; // Number of times tcp_v6_connection was called. +} ebpf_bandwidth_t; + +/** + * Internal function used to write debug messages. + * + * @param fmt the format to create the message. + * @param ... the arguments to fill the format. + */ +static inline void debug_log_int(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "apps.plugin: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + fputc('\n', stderr); +} + +// ---------------------------------------------------------------------------- +// Exported variabled and functions +// +extern struct pid_stat **all_pids; + +int ebpf_read_apps_groups_conf(struct target **apps_groups_default_target, + struct target **apps_groups_root_target, + const char *path, + const char *file); + +void clean_apps_groups_target(struct target *apps_groups_root_target); + +size_t zero_all_targets(struct target *root); + +int am_i_running_as_root(); + +void cleanup_exited_pids(); + +int ebpf_read_hash_table(void *ep, int fd, uint32_t pid); + +int get_pid_comm(pid_t pid, size_t n, char *dest); + +size_t read_processes_statistic_using_pid_on_target(ebpf_process_stat_t **ep, + int fd, + struct pid_on_target *pids); + +size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep, int fd, struct pid_on_target *pids); + +void collect_data_for_all_processes(int tbl_pid_stats_fd); + +extern ebpf_process_stat_t **global_process_stats; +extern ebpf_process_publish_apps_t **current_apps_data; +extern netdata_publish_cachestat_t **cachestat_pid; +extern netdata_publish_dcstat_t **dcstat_pid; + +#endif /* NETDATA_EBPF_APPS_H */ diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c new file mode 100644 index 0000000..4c41064 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -0,0 +1,1335 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_cachestat.h" + +netdata_publish_cachestat_t **cachestat_pid; + +static char *cachestat_counter_dimension_name[NETDATA_CACHESTAT_END] = { "ratio", "dirty", "hit", + "miss" }; +static netdata_syscall_stat_t cachestat_counter_aggregated_data[NETDATA_CACHESTAT_END]; +static netdata_publish_syscall_t cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_END]; + +netdata_cachestat_pid_t *cachestat_vector = NULL; + +static netdata_idx_t cachestat_hash_values[NETDATA_CACHESTAT_END]; +static netdata_idx_t *cachestat_values = NULL; + +struct netdata_static_thread cachestat_threads = {.name = "CACHESTAT KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL}; + +ebpf_local_maps_t cachestat_maps[] = {{.name = "cstat_global", .internal_input = NETDATA_CACHESTAT_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "cstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +struct config cachestat_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +netdata_ebpf_targets_t cachestat_targets[] = { {.name = "add_to_page_cache_lru", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "mark_page_accessed", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "mark_buffer_dirty", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/cachestat.skel.h" // BTF code + +static struct cachestat_bpf *bpf_obj = NULL; + +/** + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects + */ +static void ebpf_cachestat_disable_probe(struct cachestat_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_add_to_page_cache_lru_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_mark_page_accessed_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_mark_buffer_dirty_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_kprobe, false); +} + +/* + * Disable specific probe + * + * Disable probes according the kernel version + * + * @param obj is the main structure for bpf objects + */ +static void ebpf_cachestat_disable_specific_probe(struct cachestat_bpf *obj) +{ + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) { + bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_kprobe, false); + } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) { + bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_kprobe, false); + } else { + bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_kprobe, false); + } +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_cachestat_disable_trampoline(struct cachestat_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_add_to_page_cache_lru_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_mark_page_accessed_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_mark_buffer_dirty_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_fentry, false); +} + +/* + * Disable specific trampoline + * + * Disable trampoline according to kernel version. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_cachestat_disable_specific_trampoline(struct cachestat_bpf *obj) +{ + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) { + bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_fentry, false); + } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) { + bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_fentry, false); + } else { + bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_fentry, false); + } +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static inline void netdata_set_trampoline_target(struct cachestat_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_add_to_page_cache_lru_fentry, 0, + cachestat_targets[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU].name); + + bpf_program__set_attach_target(obj->progs.netdata_mark_page_accessed_fentry, 0, + cachestat_targets[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED].name); + + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) { + bpf_program__set_attach_target(obj->progs.netdata_folio_mark_dirty_fentry, 0, + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name); + } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) { + bpf_program__set_attach_target(obj->progs.netdata_set_page_dirty_fentry, 0, + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name); + } else { + bpf_program__set_attach_target(obj->progs.netdata_account_page_dirtied_fentry, 0, + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name); + } + + bpf_program__set_attach_target(obj->progs.netdata_mark_buffer_dirty_fentry, 0, + cachestat_targets[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY].name); + + bpf_program__set_attach_target(obj->progs.netdata_release_task_fentry, 0, + EBPF_COMMON_FNCT_CLEAN_UP); +} + +/** + * Mount Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_cachestat_attach_probe(struct cachestat_bpf *obj) +{ + obj->links.netdata_add_to_page_cache_lru_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_add_to_page_cache_lru_kprobe, + false, + cachestat_targets[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU].name); + int ret = libbpf_get_error(obj->links.netdata_add_to_page_cache_lru_kprobe); + if (ret) + return -1; + + obj->links.netdata_mark_page_accessed_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_mark_page_accessed_kprobe, + false, + cachestat_targets[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED].name); + ret = libbpf_get_error(obj->links.netdata_mark_page_accessed_kprobe); + if (ret) + return -1; + + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) { + obj->links.netdata_folio_mark_dirty_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_folio_mark_dirty_kprobe, + false, + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name); + ret = libbpf_get_error(obj->links.netdata_folio_mark_dirty_kprobe); + } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) { + obj->links.netdata_set_page_dirty_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_set_page_dirty_kprobe, + false, + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name); + ret = libbpf_get_error(obj->links.netdata_set_page_dirty_kprobe); + } else { + obj->links.netdata_account_page_dirtied_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_account_page_dirtied_kprobe, + false, + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name); + ret = libbpf_get_error(obj->links.netdata_account_page_dirtied_kprobe); + } + + if (ret) + return -1; + + obj->links.netdata_mark_buffer_dirty_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_mark_buffer_dirty_kprobe, + false, + cachestat_targets[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY].name); + ret = libbpf_get_error(obj->links.netdata_mark_buffer_dirty_kprobe); + if (ret) + return -1; + + obj->links.netdata_release_task_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_release_task_kprobe, + false, + EBPF_COMMON_FNCT_CLEAN_UP); + ret = libbpf_get_error(obj->links.netdata_release_task_kprobe); + if (ret) + return -1; + + return 0; +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_cachestat_adjust_map_size(struct cachestat_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.cstat_pid, &cachestat_maps[NETDATA_CACHESTAT_PID_STATS], + em, bpf_map__name(obj->maps.cstat_pid)); +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_cachestat_set_hash_tables(struct cachestat_bpf *obj) +{ + cachestat_maps[NETDATA_CACHESTAT_GLOBAL_STATS].map_fd = bpf_map__fd(obj->maps.cstat_global); + cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd = bpf_map__fd(obj->maps.cstat_pid); + cachestat_maps[NETDATA_CACHESTAT_CTRL].map_fd = bpf_map__fd(obj->maps.cstat_ctrl); +} + +/** + * Disable Release Task + * + * Disable release task when apps is not enabled. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_cachestat_disable_release_task(struct cachestat_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_release_task_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_fentry, false); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_cachestat_load_and_attach(struct cachestat_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *mt = em->targets; + netdata_ebpf_program_loaded_t test = mt[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU].mode; + + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_cachestat_disable_probe(obj); + ebpf_cachestat_disable_specific_trampoline(obj); + + netdata_set_trampoline_target(obj); + } else { + ebpf_cachestat_disable_trampoline(obj); + ebpf_cachestat_disable_specific_probe(obj); + } + + ebpf_cachestat_adjust_map_size(obj, em); + + if (!em->apps_charts && !em->cgroup_charts) + ebpf_cachestat_disable_release_task(obj); + + int ret = cachestat_bpf__load(obj); + if (ret) { + return ret; + } + + ret = (test == EBPF_LOAD_TRAMPOLINE) ? cachestat_bpf__attach(obj) : ebpf_cachestat_attach_probe(obj); + if (!ret) { + ebpf_cachestat_set_hash_tables(obj); + + ebpf_update_controller(cachestat_maps[NETDATA_CACHESTAT_CTRL].map_fd, em); + } + + return ret; +} +#endif +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Cachestat Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_cachestat_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_cleanup_publish_syscall(cachestat_counter_publish_aggregated); + + freez(cachestat_vector); + freez(cachestat_values); + freez(cachestat_threads.thread); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + cachestat_bpf__destroy(bpf_obj); +#endif + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Cachestat exit. + * + * Cancel child and exit. + * + * @param ptr thread data. + */ +static void ebpf_cachestat_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*cachestat_threads.thread); + ebpf_cachestat_free(em); +} + +/** + * Cachestat cleanup + * + * Clean up allocated addresses. + * + * @param ptr thread data. + */ +static void ebpf_cachestat_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_cachestat_free(em); +} + +/***************************************************************** + * + * COMMON FUNCTIONS + * + *****************************************************************/ + +/** + * Update publish + * + * Update publish values before to write dimension. + * + * @param out structure that will receive data. + * @param mpa calls for mark_page_accessed during the last second. + * @param mbd calls for mark_buffer_dirty during the last second. + * @param apcl calls for add_to_page_cache_lru during the last second. + * @param apd calls for account_page_dirtied during the last second. + */ +void cachestat_update_publish(netdata_publish_cachestat_t *out, uint64_t mpa, uint64_t mbd, + uint64_t apcl, uint64_t apd) +{ + // Adapted algorithm from https://github.com/iovisor/bcc/blob/master/tools/cachestat.py#L126-L138 + NETDATA_DOUBLE total = (NETDATA_DOUBLE) (((long long)mpa) - ((long long)mbd)); + if (total < 0) + total = 0; + + NETDATA_DOUBLE misses = (NETDATA_DOUBLE) ( ((long long) apcl) - ((long long) apd) ); + if (misses < 0) + misses = 0; + + // If hits are < 0, then its possible misses are overestimate due to possibly page cache read ahead adding + // more pages than needed. In this case just assume misses as total and reset hits. + NETDATA_DOUBLE hits = total - misses; + if (hits < 0 ) { + misses = total; + hits = 0; + } + + NETDATA_DOUBLE ratio = (total > 0) ? hits/total : 1; + + out->ratio = (long long )(ratio*100); + out->hit = (long long)hits; + out->miss = (long long)misses; +} + +/** + * Save previous values + * + * Save values used this time. + * + * @param publish + */ +static void save_previous_values(netdata_publish_cachestat_t *publish) { + publish->prev.mark_page_accessed = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED]; + publish->prev.account_page_dirtied = cachestat_hash_values[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED]; + publish->prev.add_to_page_cache_lru = cachestat_hash_values[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU]; + publish->prev.mark_buffer_dirty = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY]; +} + +/** + * Calculate statistics + * + * @param publish the structure where we will store the data. + */ +static void calculate_stats(netdata_publish_cachestat_t *publish) { + if (!publish->prev.mark_page_accessed) { + save_previous_values(publish); + return; + } + + uint64_t mpa = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED] - publish->prev.mark_page_accessed; + uint64_t mbd = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY] - publish->prev.mark_buffer_dirty; + uint64_t apcl = cachestat_hash_values[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU] - publish->prev.add_to_page_cache_lru; + uint64_t apd = cachestat_hash_values[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED] - publish->prev.account_page_dirtied; + + save_previous_values(publish); + + // We are changing the original algorithm to have a smooth ratio. + cachestat_update_publish(publish, mpa, mbd, apcl, apd); +} + + +/***************************************************************** + * + * APPS + * + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_cachestat_pid_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_cachestat_pid_t *w = &out[i]; + total->account_page_dirtied += w->account_page_dirtied; + total->add_to_page_cache_lru += w->add_to_page_cache_lru; + total->mark_buffer_dirty += w->mark_buffer_dirty; + total->mark_page_accessed += w->mark_page_accessed; + } +} + +/** + * Save Pid values + * + * Save the current values inside the structure + * + * @param out vector used to plot charts + * @param publish vector with values read from hash tables. + */ +static inline void cachestat_save_pid_values(netdata_publish_cachestat_t *out, netdata_cachestat_pid_t *publish) +{ + if (!out->current.mark_page_accessed) { + memcpy(&out->current, &publish[0], sizeof(netdata_cachestat_pid_t)); + return; + } + + memcpy(&out->prev, &out->current, sizeof(netdata_cachestat_pid_t)); + memcpy(&out->current, &publish[0], sizeof(netdata_cachestat_pid_t)); +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void cachestat_fill_pid(uint32_t current_pid, netdata_cachestat_pid_t *publish) +{ + netdata_publish_cachestat_t *curr = cachestat_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_cachestat_t)); + cachestat_pid[current_pid] = curr; + + cachestat_save_pid_values(curr, publish); + return; + } + + cachestat_save_pid_values(curr, publish); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_cachestat_pid_t *cv = cachestat_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; + size_t length = sizeof(netdata_cachestat_pid_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + cachestat_apps_accumulator(cv); + + cachestat_fill_pid(key, cv); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_cachestat_cgroup() +{ + netdata_cachestat_pid_t *cv = cachestat_vector; + int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; + size_t length = sizeof(netdata_cachestat_pid_t) * ebpf_nprocs; + + ebpf_cgroup_target_t *ect; + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_cachestat_pid_t *out = &pids->cachestat; + if (likely(cachestat_pid) && cachestat_pid[pid]) { + netdata_publish_cachestat_t *in = cachestat_pid[pid]; + + memcpy(out, &in->current, sizeof(netdata_cachestat_pid_t)); + } else { + memset(cv, 0, length); + if (bpf_map_lookup_elem(fd, &pid, cv)) { + continue; + } + + cachestat_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_cachestat_pid_t)); + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 20090, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20091, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20092, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20093, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Read global counter + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = cachestat_hash_values; + netdata_idx_t *stored = cachestat_values; + int fd = cachestat_maps[NETDATA_CACHESTAT_GLOBAL_STATS].map_fd; + + for (idx = NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU; idx < NETDATA_CACHESTAT_END; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; + } + } +} + +/** + * Socket read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_cachestat_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_cachestat_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_CACHESTAT_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Send global + * + * Send global charts to Netdata + */ +static void cachestat_send_global(netdata_publish_cachestat_t *publish) +{ + calculate_stats(publish); + + netdata_publish_syscall_t *ptr = cachestat_counter_publish_aggregated; + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, ptr[NETDATA_CACHESTAT_IDX_RATIO].dimension, + publish->ratio); + + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, ptr[NETDATA_CACHESTAT_IDX_DIRTY].dimension, + cachestat_hash_values[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY]); + + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_CHART, ptr[NETDATA_CACHESTAT_IDX_HIT].dimension, publish->hit); + + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_MISSES_CHART, ptr[NETDATA_CACHESTAT_IDX_MISS].dimension, + publish->miss); +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_cachestat_sum_pids(netdata_publish_cachestat_t *publish, struct pid_on_target *root) +{ + memcpy(&publish->prev, &publish->current,sizeof(publish->current)); + memset(&publish->current, 0, sizeof(publish->current)); + + netdata_cachestat_pid_t *dst = &publish->current; + while (root) { + int32_t pid = root->pid; + netdata_publish_cachestat_t *w = cachestat_pid[pid]; + if (w) { + netdata_cachestat_pid_t *src = &w->current; + dst->account_page_dirtied += src->account_page_dirtied; + dst->add_to_page_cache_lru += src->add_to_page_cache_lru; + dst->mark_buffer_dirty += src->mark_buffer_dirty; + dst->mark_page_accessed += src->mark_page_accessed; + } + + root = root->next; + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. +*/ +void ebpf_cache_send_apps_data(struct target *root) +{ + struct target *w; + collected_number value; + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_cachestat_sum_pids(&w->cachestat, w->root_pid); + netdata_cachestat_pid_t *current = &w->cachestat.current; + netdata_cachestat_pid_t *prev = &w->cachestat.prev; + + uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; + uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; + w->cachestat.dirty = mbd; + uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; + uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; + + cachestat_update_publish(&w->cachestat, mpa, mbd, apcl, apd); + value = (collected_number) w->cachestat.ratio; + // Here we are using different approach to have a chart more smooth + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = (collected_number) w->cachestat.dirty; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = (collected_number) w->cachestat.hit; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_MISSES_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = (collected_number) w->cachestat.miss; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_cachestat_sum_cgroup_pids(netdata_publish_cachestat_t *publish, struct pid_on_target2 *root) +{ + memcpy(&publish->prev, &publish->current,sizeof(publish->current)); + memset(&publish->current, 0, sizeof(publish->current)); + + netdata_cachestat_pid_t *dst = &publish->current; + while (root) { + netdata_cachestat_pid_t *src = &root->cachestat; + + dst->account_page_dirtied += src->account_page_dirtied; + dst->add_to_page_cache_lru += src->add_to_page_cache_lru; + dst->mark_buffer_dirty += src->mark_buffer_dirty; + dst->mark_page_accessed += src->mark_page_accessed; + + root = root->next; + } +} + +/** + * Calc chart values + * + * Do necessary math to plot charts. + */ +void ebpf_cachestat_calc_chart_values() +{ + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_cachestat_sum_cgroup_pids(&ect->publish_cachestat, ect->pids); + + netdata_cachestat_pid_t *current = &ect->publish_cachestat.current; + netdata_cachestat_pid_t *prev = &ect->publish_cachestat.prev; + + uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; + uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; + ect->publish_cachestat.dirty = mbd; + uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; + uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; + + cachestat_update_publish(&ect->publish_cachestat, mpa, mbd, apcl, apd); + } +} + +/** + * Create Systemd cachestat Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_cachestat_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21100, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21101, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_HIT_CHART, "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21102, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_MISSES_CHART, "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21103, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); +} + +/** + * Send Cache Stat charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_systemd_cachestat_charts() +{ + ebpf_cgroup_target_t *ect; + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.ratio); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.dirty); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.hit); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.miss); + } + } + write_end_chart(); +} + +/** + * Send Directory Cache charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_specific_cachestat_data(char *type, netdata_publish_cachestat_t *npc) +{ + write_begin_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_RATIO].name, (long long)npc->ratio); + write_end_chart(); + + write_begin_chart(type, NETDATA_CACHESTAT_DIRTY_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY].name, (long long)npc->dirty); + write_end_chart(); + + write_begin_chart(type, NETDATA_CACHESTAT_HIT_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT].name, (long long)npc->hit); + write_end_chart(); + + write_begin_chart(type, NETDATA_CACHESTAT_MISSES_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS].name, (long long)npc->miss); + write_end_chart(); +} + +/** + * Create specific cache Stat charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_cachestat_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5200, + ebpf_create_global_dimension, + cachestat_counter_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(type, NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5201, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY], 1, + update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(type, NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5202, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT], 1, + update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(type, NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5203, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS], 1, + update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); +} + +/** + * Obsolete specific cache stat charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5200, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5201, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5202, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5203, update_every); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_cachestat_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + ebpf_cachestat_calc_chart_values(); + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_cachestat_charts(update_every); + } + + ebpf_send_systemd_cachestat_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART) && ect->updated) { + ebpf_create_specific_cachestat_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART) { + if (ect->updated) { + ebpf_send_specific_cachestat_data(ect->name, &ect->publish_cachestat); + } else { + ebpf_obsolete_specific_cachestat_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void cachestat_collector(ebpf_module_t *em) +{ + cachestat_threads.thread = callocz(1, sizeof(netdata_thread_t)); + cachestat_threads.start_routine = ebpf_cachestat_read_hash; + + netdata_thread_create(cachestat_threads.thread, cachestat_threads.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_cachestat_read_hash, em); + + netdata_publish_cachestat_t publish; + memset(&publish, 0, sizeof(publish)); + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); + + if (cgroups) + ebpf_update_cachestat_cgroup(); + + pthread_mutex_lock(&lock); + + cachestat_send_global(&publish); + + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_cache_send_apps_data(apps_groups_root_target); + + if (cgroups) + ebpf_cachestat_send_cgroup_data(update_every); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_create_memory_charts(ebpf_module_t *em) +{ + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21100, + ebpf_create_global_dimension, + cachestat_counter_publish_aggregated, 1, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21101, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY], 1, + em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21102, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT], 1, + em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21103, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS], 1, + em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + fflush(stdout); +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_cachestat_allocate_global_vectors(int apps) +{ + if (apps) + cachestat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_cachestat_t *)); + + cachestat_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_cachestat_pid_t)); + + cachestat_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + memset(cachestat_hash_values, 0, NETDATA_CACHESTAT_END * sizeof(netdata_idx_t)); + memset(cachestat_counter_aggregated_data, 0, NETDATA_CACHESTAT_END * sizeof(netdata_syscall_stat_t)); + memset(cachestat_counter_publish_aggregated, 0, NETDATA_CACHESTAT_END * sizeof(netdata_publish_syscall_t)); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Update Internal value + * + * Update values used during runtime. + */ +static void ebpf_cachestat_set_internal_value() +{ + static char *account_page[] = { "account_page_dirtied", "__set_page_dirty", "__folio_mark_dirty" }; + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = account_page[NETDATA_CACHESTAT_FOLIO_DIRTY]; + else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = account_page[NETDATA_CACHESTAT_SET_PAGE_DIRTY]; + else + cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = account_page[NETDATA_CACHESTAT_ACCOUNT_PAGE_DIRTY]; +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_cachestat_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU].mode); + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = cachestat_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_cachestat_load_and_attach(bpf_obj, em); + } +#endif + + if (ret) + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + +/** + * Cachestat thread + * + * Thread used to make cachestat thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_cachestat_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_cachestat_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = cachestat_maps; + + ebpf_update_pid_table(&cachestat_maps[NETDATA_CACHESTAT_PID_STATS], em); + + ebpf_cachestat_set_internal_value(); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_cachestat_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endcachestat; + } + + ebpf_cachestat_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_CACHESTAT_END] = { + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX + }; + + ebpf_global_labels(cachestat_counter_aggregated_data, cachestat_counter_publish_aggregated, + cachestat_counter_dimension_name, cachestat_counter_dimension_name, + algorithms, NETDATA_CACHESTAT_END); + + pthread_mutex_lock(&lock); + ebpf_update_stats(&plugin_statistics, em); + ebpf_create_memory_charts(em); + pthread_mutex_unlock(&lock); + + cachestat_collector(em); + +endcachestat: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_cachestat.h b/collectors/ebpf.plugin/ebpf_cachestat.h new file mode 100644 index 0000000..07f0745 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cachestat.h @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_CACHESTAT_H +#define NETDATA_EBPF_CACHESTAT_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_CACHESTAT "cachestat" + +// charts +#define NETDATA_CACHESTAT_HIT_RATIO_CHART "cachestat_ratio" +#define NETDATA_CACHESTAT_DIRTY_CHART "cachestat_dirties" +#define NETDATA_CACHESTAT_HIT_CHART "cachestat_hits" +#define NETDATA_CACHESTAT_MISSES_CHART "cachestat_misses" + +#define NETDATA_CACHESTAT_SUBMENU "page_cache" +#define NETDATA_CACHESTAT_CGROUP_SUBMENU "page cache (eBPF)" + +#define EBPF_CACHESTAT_DIMENSION_PAGE "pages/s" +#define EBPF_CACHESTAT_DIMENSION_HITS "hits/s" +#define EBPF_CACHESTAT_DIMENSION_MISSES "misses/s" + +#define NETDATA_LATENCY_CACHESTAT_SLEEP_MS 600000ULL + +// configuration file +#define NETDATA_CACHESTAT_CONFIG_FILE "cachestat.conf" + +// Contexts +#define NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT "cgroup.cachestat_ratio" +#define NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT "cgroup.cachestat_dirties" +#define NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT "cgroup.cachestat_hits" +#define NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT "cgroup.cachestat_misses" + +#define NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT "services.cachestat_ratio" +#define NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT "services.cachestat_dirties" +#define NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT "services.cachestat_hits" +#define NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT "services.cachestat_misses" + +// variables +enum cachestat_counters { + NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU, + NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED, + NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED, + NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY, + + NETDATA_CACHESTAT_END +}; + +enum cachestat_account_dirty_pages { + NETDATA_CACHESTAT_ACCOUNT_PAGE_DIRTY, + NETDATA_CACHESTAT_SET_PAGE_DIRTY, + NETDATA_CACHESTAT_FOLIO_DIRTY +}; + +enum cachestat_indexes { + NETDATA_CACHESTAT_IDX_RATIO, + NETDATA_CACHESTAT_IDX_DIRTY, + NETDATA_CACHESTAT_IDX_HIT, + NETDATA_CACHESTAT_IDX_MISS +}; + +enum cachestat_tables { + NETDATA_CACHESTAT_GLOBAL_STATS, + NETDATA_CACHESTAT_PID_STATS, + NETDATA_CACHESTAT_CTRL +}; + +typedef struct netdata_publish_cachestat_pid { + uint64_t add_to_page_cache_lru; + uint64_t mark_page_accessed; + uint64_t account_page_dirtied; + uint64_t mark_buffer_dirty; +} netdata_cachestat_pid_t; + +typedef struct netdata_publish_cachestat { + long long ratio; + long long dirty; + long long hit; + long long miss; + + netdata_cachestat_pid_t current; + netdata_cachestat_pid_t prev; +} netdata_publish_cachestat_t; + +void *ebpf_cachestat_thread(void *ptr); + +extern struct config cachestat_config; +extern netdata_ebpf_targets_t cachestat_targets[]; +extern ebpf_local_maps_t cachestat_maps[]; + +#endif // NETDATA_EBPF_CACHESTAT_H diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c new file mode 100644 index 0000000..42c0453 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cgroup.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "ebpf.h" +#include "ebpf_cgroup.h" + +ebpf_cgroup_target_t *ebpf_cgroup_pids = NULL; +int send_cgroup_chart = 0; + +// -------------------------------------------------------------------------------------------------------------------- +// Map shared memory + +/** + * Map Shared Memory locally + * + * Map the shared memory for current process + * + * @param fd file descriptor returned after shm_open was called. + * @param length length of the shared memory + * + * @return It returns a pointer to the region mapped. + */ +static inline void *ebpf_cgroup_map_shm_locally(int fd, size_t length) +{ + void *value; + + value = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (!value) { + error("Cannot map shared memory used between eBPF and cgroup, integration between processes won't happen"); + close(shm_fd_ebpf_cgroup); + shm_fd_ebpf_cgroup = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); + } + + return value; +} + +/** + * Map cgroup shared memory + * + * Map cgroup shared memory from cgroup to plugin + */ +void ebpf_map_cgroup_shared_memory() +{ + static int limit_try = 0; + static time_t next_try = 0; + + if (shm_ebpf_cgroup.header || limit_try > NETDATA_EBPF_CGROUP_MAX_TRIES) + return; + + time_t curr_time = time(NULL); + if (curr_time < next_try) + return; + + limit_try++; + next_try = curr_time + NETDATA_EBPF_CGROUP_NEXT_TRY_SEC; + + shm_fd_ebpf_cgroup = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_RDWR, 0660); + if (shm_fd_ebpf_cgroup < 0) { + if (limit_try == NETDATA_EBPF_CGROUP_MAX_TRIES) + error("Shared memory was not initialized, integration between processes won't happen."); + + return; + } + + // Map only header + shm_ebpf_cgroup.header = (netdata_ebpf_cgroup_shm_header_t *) ebpf_cgroup_map_shm_locally(shm_fd_ebpf_cgroup, + sizeof(netdata_ebpf_cgroup_shm_header_t)); + if (!shm_ebpf_cgroup.header) { + limit_try = NETDATA_EBPF_CGROUP_MAX_TRIES + 1; + return; + } + + size_t length = shm_ebpf_cgroup.header->body_length; + + munmap(shm_ebpf_cgroup.header, sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_ebpf_cgroup.header = (netdata_ebpf_cgroup_shm_header_t *)ebpf_cgroup_map_shm_locally(shm_fd_ebpf_cgroup, length); + if (!shm_ebpf_cgroup.header) { + limit_try = NETDATA_EBPF_CGROUP_MAX_TRIES + 1; + return; + } + shm_ebpf_cgroup.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_ebpf_cgroup.header + + sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_sem_ebpf_cgroup = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, 0660, 1); + + if (shm_sem_ebpf_cgroup == SEM_FAILED) { + error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + munmap(shm_ebpf_cgroup.header, length); + shm_ebpf_cgroup.header = NULL; + close(shm_fd_ebpf_cgroup); + shm_fd_ebpf_cgroup = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Close and Cleanup + +/** + * Clean Specific cgroup pid + * + * Clean all PIDs associated with cgroup. + * + * @param pt structure pid on target that will have your PRs removed + */ +static inline void ebpf_clean_specific_cgroup_pids(struct pid_on_target2 *pt) +{ + while (pt) { + struct pid_on_target2 *next_pid = pt->next; + + freez(pt); + pt = next_pid; + } +} + +/** + * Remove Cgroup Update Target Update List + * + * Remove from cgroup target and update the link list + */ +static void ebpf_remove_cgroup_target_update_list() +{ + ebpf_cgroup_target_t *next, *ect = ebpf_cgroup_pids; + ebpf_cgroup_target_t *prev = ebpf_cgroup_pids; + while (ect) { + next = ect->next; + if (!ect->updated) { + if (ect == ebpf_cgroup_pids) { + ebpf_cgroup_pids = next; + prev = next; + } else { + prev->next = next; + } + + ebpf_clean_specific_cgroup_pids(ect->pids); + freez(ect); + } else { + prev = ect; + } + + ect = next; + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Fill variables + +/** + * Set Target Data + * + * Set local variable values according shared memory information. + * + * @param out local output variable. + * @param ptr input from shared memory. + */ +static inline void ebpf_cgroup_set_target_data(ebpf_cgroup_target_t *out, netdata_ebpf_cgroup_shm_body_t *ptr) +{ + out->hash = ptr->hash; + snprintfz(out->name, 255, "%s", ptr->name); + out->systemd = ptr->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; + out->updated = 1; +} + +/** + * Find or create + * + * Find the structure inside the link list or allocate and link when it is not present. + * + * @param ptr Input from shared memory. + * + * @return It returns a pointer for the structure associated with the input. + */ +static ebpf_cgroup_target_t * ebpf_cgroup_find_or_create(netdata_ebpf_cgroup_shm_body_t *ptr) +{ + ebpf_cgroup_target_t *ect, *prev; + for (ect = ebpf_cgroup_pids, prev = ebpf_cgroup_pids; ect; prev = ect, ect = ect->next) { + if (ect->hash == ptr->hash && !strcmp(ect->name, ptr->name)) { + ect->updated = 1; + return ect; + } + } + + ebpf_cgroup_target_t *new_ect = callocz(1, sizeof(ebpf_cgroup_target_t)); + + ebpf_cgroup_set_target_data(new_ect, ptr); + if (!ebpf_cgroup_pids) { + ebpf_cgroup_pids = new_ect; + } else { + prev->next = new_ect; + } + + return new_ect; +} + +/** + * Update pid link list + * + * Update PIDs list associated with specific cgroup. + * + * @param ect cgroup structure where pids will be stored + * @param path file with PIDs associated to cgroup. + */ +static void ebpf_update_pid_link_list(ebpf_cgroup_target_t *ect, char *path) +{ + procfile *ff = procfile_open_no_log(path, " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return; + + ff = procfile_readall(ff); + if (!ff) + return; + + size_t lines = procfile_lines(ff), l; + for (l = 0; l < lines ;l++) { + int pid = (int)str2l(procfile_lineword(ff, l, 0)); + if (pid) { + struct pid_on_target2 *pt, *prev; + for (pt = ect->pids, prev = ect->pids; pt; prev = pt, pt = pt->next) { + if (pt->pid == pid) + break; + } + + if (!pt) { + struct pid_on_target2 *w = callocz(1, sizeof(struct pid_on_target2)); + w->pid = pid; + if (!ect->pids) + ect->pids = w; + else + prev->next = w; + } + } + } + + procfile_close(ff); +} + +/** + * Set remove var + * + * Set variable remove. If this variable is not reset, the structure will be removed from link list. + */ +void ebpf_reset_updated_var() + { + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + ect->updated = 0; + } + } + +/** + * Parse cgroup shared memory + * + * This function is responsible to copy necessary data from shared memory to local memory. + */ +void ebpf_parse_cgroup_shm_data() +{ + static int previous = 0; + if (shm_ebpf_cgroup.header) { + sem_wait(shm_sem_ebpf_cgroup); + int i, end = shm_ebpf_cgroup.header->cgroup_root_count; + + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_remove_cgroup_target_update_list(); + + ebpf_reset_updated_var(); + + for (i = 0; i < end; i++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_ebpf_cgroup.body[i]; + if (ptr->enabled) { + ebpf_cgroup_target_t *ect = ebpf_cgroup_find_or_create(ptr); + ebpf_update_pid_link_list(ect, ptr->path); + } + } + send_cgroup_chart = previous != shm_ebpf_cgroup.header->cgroup_root_count; + previous = shm_ebpf_cgroup.header->cgroup_root_count; +#ifdef NETDATA_DEV_MODE + error("Updating cgroup %d (Previous: %d, Current: %d)", send_cgroup_chart, previous, shm_ebpf_cgroup.header->cgroup_root_count); +#endif + pthread_mutex_unlock(&mutex_cgroup_shm); + + sem_post(shm_sem_ebpf_cgroup); + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Create charts + +/** + * Create charts on systemd submenu + * + * @param id the chart id + * @param title the value displayed on vertical axis. + * @param units the value displayed on vertical axis. + * @param family Submenu that the chart will be attached on dashboard. + * @param charttype chart type + * @param order the chart order + * @param algorithm the algorithm used by dimension + * @param context add context for chart + * @param module chart module name, this is the eBPF thread. + * @param update_every value to overwrite the update frequency set by the server. + */ +void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order, + char *algorithm, char *context, char *module, int update_every) +{ + ebpf_cgroup_target_t *w; + ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, title, units, family, charttype, context, + order, update_every, module); + + for (w = ebpf_cgroup_pids; w; w = w->next) { + if (unlikely(w->systemd) && unlikely(w->updated)) + fprintf(stdout, "DIMENSION %s '' %s 1 1\n", w->name, algorithm); + } +} diff --git a/collectors/ebpf.plugin/ebpf_cgroup.h b/collectors/ebpf.plugin/ebpf_cgroup.h new file mode 100644 index 0000000..19da7fc --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cgroup.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_CGROUP_H +#define NETDATA_EBPF_CGROUP_H 1 + +#define NETDATA_EBPF_CGROUP_MAX_TRIES 3 +#define NETDATA_EBPF_CGROUP_NEXT_TRY_SEC 30 + +#include "ebpf.h" +#include "ebpf_apps.h" + +#define NETDATA_SERVICE_FAMILY "services" + +struct pid_on_target2 { + int32_t pid; + int updated; + + netdata_publish_swap_t swap; + netdata_fd_stat_t fd; + netdata_publish_vfs_t vfs; + ebpf_process_stat_t ps; + netdata_dcstat_pid_t dc; + netdata_publish_shm_t shm; + ebpf_bandwidth_t socket; + netdata_cachestat_pid_t cachestat; + + struct pid_on_target2 *next; +}; + +enum ebpf_cgroup_flags { + NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART = 1, + NETDATA_EBPF_CGROUP_HAS_SWAP_CHART = 1<<2, + NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART = 1<<3, + NETDATA_EBPF_CGROUP_HAS_FD_CHART = 1<<4, + NETDATA_EBPF_CGROUP_HAS_VFS_CHART = 1<<5, + NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART = 1<<6, + NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART = 1<<7, + NETDATA_EBPF_CGROUP_HAS_DC_CHART = 1<<8, + NETDATA_EBPF_CGROUP_HAS_SHM_CHART = 1<<9 +}; + +typedef struct ebpf_cgroup_target { + char name[256]; // title + uint32_t hash; + uint32_t flags; + uint32_t systemd; + uint32_t updated; + + netdata_publish_swap_t publish_systemd_swap; + netdata_fd_stat_t publish_systemd_fd; + netdata_publish_vfs_t publish_systemd_vfs; + ebpf_process_stat_t publish_systemd_ps; + netdata_publish_dcstat_t publish_dc; + int oomkill; + netdata_publish_shm_t publish_shm; + ebpf_socket_publish_apps_t publish_socket; + netdata_publish_cachestat_t publish_cachestat; + + struct pid_on_target2 *pids; + struct ebpf_cgroup_target *next; +} ebpf_cgroup_target_t; + +void ebpf_map_cgroup_shared_memory(); +void ebpf_parse_cgroup_shm_data(); +void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order, + char *algorithm, char *context, char *module, int update_every); +extern int send_cgroup_chart; + +#endif /* NETDATA_EBPF_CGROUP_H */ diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c new file mode 100644 index 0000000..71169e1 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -0,0 +1,1225 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_dcstat.h" + +static char *dcstat_counter_dimension_name[NETDATA_DCSTAT_IDX_END] = { "ratio", "reference", "slow", "miss" }; +static netdata_syscall_stat_t dcstat_counter_aggregated_data[NETDATA_DCSTAT_IDX_END]; +static netdata_publish_syscall_t dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_END]; + +netdata_dcstat_pid_t *dcstat_vector = NULL; +netdata_publish_dcstat_t **dcstat_pid = NULL; + +static netdata_idx_t dcstat_hash_values[NETDATA_DCSTAT_IDX_END]; +static netdata_idx_t *dcstat_values = NULL; + +struct config dcstat_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +struct netdata_static_thread dcstat_threads = {"DCSTAT KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL}; + +ebpf_local_maps_t dcstat_maps[] = {{.name = "dcstat_global", .internal_input = NETDATA_DIRECTORY_CACHE_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "dcstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "dcstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +static ebpf_specify_name_t dc_optional_name[] = { {.program_name = "netdata_lookup_fast", + .function_to_attach = "lookup_fast", + .optional = NULL, + .retprobe = CONFIG_BOOLEAN_NO}, + {.program_name = NULL}}; + +netdata_ebpf_targets_t dc_targets[] = { {.name = "lookup_fast", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "d_lookup", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/dc.skel.h" // BTF code + +static struct dc_bpf *bpf_obj = NULL; + +/** + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects + */ +static inline void ebpf_dc_disable_probes(struct dc_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_lookup_fast_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_d_lookup_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_dcstat_release_task_kprobe, false); +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_dc_disable_trampoline(struct dc_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_lookup_fast_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_d_lookup_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_dcstat_release_task_fentry, false); +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_dc_set_trampoline_target(struct dc_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_lookup_fast_fentry, 0, + dc_targets[NETDATA_DC_TARGET_LOOKUP_FAST].name); + + bpf_program__set_attach_target(obj->progs.netdata_d_lookup_fexit, 0, + dc_targets[NETDATA_DC_TARGET_D_LOOKUP].name); + + bpf_program__set_attach_target(obj->progs.netdata_dcstat_release_task_fentry, 0, + EBPF_COMMON_FNCT_CLEAN_UP); +} + +/** + * Mount Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_dc_attach_probes(struct dc_bpf *obj) +{ + obj->links.netdata_d_lookup_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_d_lookup_kretprobe, + true, + dc_targets[NETDATA_DC_TARGET_D_LOOKUP].name); + int ret = libbpf_get_error(obj->links.netdata_d_lookup_kretprobe); + if (ret) + return -1; + + char *lookup_name = (dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].optional) ? + dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].optional : + dc_targets[NETDATA_DC_TARGET_LOOKUP_FAST].name ; + + obj->links.netdata_lookup_fast_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_lookup_fast_kprobe, + false, + lookup_name); + ret = libbpf_get_error(obj->links.netdata_lookup_fast_kprobe); + if (ret) + return -1; + + obj->links.netdata_dcstat_release_task_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_dcstat_release_task_kprobe, + false, + EBPF_COMMON_FNCT_CLEAN_UP); + ret = libbpf_get_error(obj->links.netdata_dcstat_release_task_kprobe); + if (ret) + return -1; + + return 0; +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_dc_adjust_map_size(struct dc_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.dcstat_pid, &dcstat_maps[NETDATA_DCSTAT_PID_STATS], + em, bpf_map__name(obj->maps.dcstat_pid)); +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_dc_set_hash_tables(struct dc_bpf *obj) +{ + dcstat_maps[NETDATA_DCSTAT_GLOBAL_STATS].map_fd = bpf_map__fd(obj->maps.dcstat_global); + dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd = bpf_map__fd(obj->maps.dcstat_pid); + dcstat_maps[NETDATA_DCSTAT_CTRL].map_fd = bpf_map__fd(obj->maps.dcstat_ctrl); +} + +/** + * Update Load + * + * For directory cache, some distributions change the function name, and we do not have condition to use + * TRAMPOLINE like other functions. + * + * @param em structure with configuration + * + * @return When then symbols were not modified, it returns TRAMPOLINE, else it returns RETPROBE. + */ +netdata_ebpf_program_loaded_t ebpf_dc_update_load(ebpf_module_t *em) +{ + if (!strcmp(dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].optional, + dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].function_to_attach)) + return EBPF_LOAD_TRAMPOLINE; + + if (em->targets[NETDATA_DC_TARGET_LOOKUP_FAST].mode != EBPF_LOAD_RETPROBE) + info("When your kernel was compiled the symbol %s was modified, instead to use `trampoline`, the plugin will use `probes`.", + dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].function_to_attach); + + return EBPF_LOAD_RETPROBE; +} + +/** + * Disable Release Task + * + * Disable release task when apps is not enabled. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_dc_disable_release_task(struct dc_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_dcstat_release_task_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_dcstat_release_task_fentry, false); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_dc_load_and_attach(struct dc_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_program_loaded_t test = ebpf_dc_update_load(em); + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_dc_disable_probes(obj); + + ebpf_dc_set_trampoline_target(obj); + } else { + ebpf_dc_disable_trampoline(obj); + } + + ebpf_dc_adjust_map_size(obj, em); + + if (!em->apps_charts && !em->cgroup_charts) + ebpf_dc_disable_release_task(obj); + + int ret = dc_bpf__load(obj); + if (ret) { + return ret; + } + + ret = (test == EBPF_LOAD_TRAMPOLINE) ? dc_bpf__attach(obj) : ebpf_dc_attach_probes(obj); + if (!ret) { + ebpf_dc_set_hash_tables(obj); + + ebpf_update_controller(dcstat_maps[NETDATA_DCSTAT_CTRL].map_fd, em); + } + + return ret; +} +#endif + +/***************************************************************** + * + * COMMON FUNCTIONS + * + *****************************************************************/ + +/** + * Update publish + * + * Update publish values before to write dimension. + * + * @param out structure that will receive data. + * @param cache_access number of access to directory cache. + * @param not_found number of files not found on the file system + */ +void dcstat_update_publish(netdata_publish_dcstat_t *out, uint64_t cache_access, uint64_t not_found) +{ + NETDATA_DOUBLE successful_access = (NETDATA_DOUBLE) (((long long)cache_access) - ((long long)not_found)); + NETDATA_DOUBLE ratio = (cache_access) ? successful_access/(NETDATA_DOUBLE)cache_access : 0; + + out->ratio = (long long )(ratio*100); +} + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean names + * + * Clean the optional names allocated during startup. + */ +void ebpf_dcstat_clean_names() +{ + size_t i = 0; + while (dc_optional_name[i].program_name) { + freez(dc_optional_name[i].optional); + i++; + } +} + +/** + * DCstat Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_dcstat_free(ebpf_module_t *em ) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(dcstat_vector); + freez(dcstat_values); + freez(dcstat_threads.thread); + + ebpf_cleanup_publish_syscall(dcstat_counter_publish_aggregated); + + ebpf_dcstat_clean_names(); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + dc_bpf__destroy(bpf_obj); +#endif + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * DCstat exit + * + * Cancel child and exit. + * + * @param ptr thread data. + */ +static void ebpf_dcstat_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*dcstat_threads.thread); + ebpf_dcstat_free(em); +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_dcstat_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_dcstat_free(em); +} + +/***************************************************************** + * + * APPS + * + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 20100, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_charts_on_apps(NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20101, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20102, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20103, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void dcstat_apps_accumulator(netdata_dcstat_pid_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_dcstat_pid_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_dcstat_pid_t *w = &out[i]; + total->cache_access += w->cache_access; + total->file_system += w->file_system; + total->not_found += w->not_found; + } +} + +/** + * Save PID values + * + * Save the current values inside the structure + * + * @param out vector used to plot charts + * @param publish vector with values read from hash tables. + */ +static inline void dcstat_save_pid_values(netdata_publish_dcstat_t *out, netdata_dcstat_pid_t *publish) +{ + memcpy(&out->curr, &publish[0], sizeof(netdata_dcstat_pid_t)); +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void dcstat_fill_pid(uint32_t current_pid, netdata_dcstat_pid_t *publish) +{ + netdata_publish_dcstat_t *curr = dcstat_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_dcstat_t)); + dcstat_pid[current_pid] = curr; + } + + dcstat_save_pid_values(curr, publish); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_dcstat_pid_t *cv = dcstat_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; + size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + dcstat_apps_accumulator(cv); + + dcstat_fill_pid(key, cv); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_dc_cgroup() +{ + netdata_dcstat_pid_t *cv = dcstat_vector; + int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; + size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs; + + ebpf_cgroup_target_t *ect; + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_dcstat_pid_t *out = &pids->dc; + if (likely(dcstat_pid) && dcstat_pid[pid]) { + netdata_publish_dcstat_t *in = dcstat_pid[pid]; + + memcpy(out, &in->curr, sizeof(netdata_dcstat_pid_t)); + } else { + memset(cv, 0, length); + if (bpf_map_lookup_elem(fd, &pid, cv)) { + continue; + } + + dcstat_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_dcstat_pid_t)); + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read global table + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = dcstat_hash_values; + netdata_idx_t *stored = dcstat_values; + int fd = dcstat_maps[NETDATA_DCSTAT_GLOBAL_STATS].map_fd; + + for (idx = NETDATA_KEY_DC_REFERENCE; idx < NETDATA_DIRECTORY_CACHE_END; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; + } + } +} + +/** + * DCstat read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_dcstat_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_dcstat_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_DCSTAT_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_dcstat_sum_pids(netdata_publish_dcstat_t *publish, struct pid_on_target *root) +{ + memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); + netdata_dcstat_pid_t *dst = &publish->curr; + while (root) { + int32_t pid = root->pid; + netdata_publish_dcstat_t *w = dcstat_pid[pid]; + if (w) { + netdata_dcstat_pid_t *src = &w->curr; + dst->cache_access += src->cache_access; + dst->file_system += src->file_system; + dst->not_found += src->not_found; + } + + root = root->next; + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. +*/ +void ebpf_dcache_send_apps_data(struct target *root) +{ + struct target *w; + collected_number value; + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_HIT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_dcstat_sum_pids(&w->dcstat, w->root_pid); + + uint64_t cache = w->dcstat.curr.cache_access; + uint64_t not_found = w->dcstat.curr.not_found; + + dcstat_update_publish(&w->dcstat, cache, not_found); + value = (collected_number) w->dcstat.ratio; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REFERENCE_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + if (w->dcstat.curr.cache_access < w->dcstat.prev.cache_access) { + w->dcstat.prev.cache_access = 0; + } + + w->dcstat.cache_access = (long long)w->dcstat.curr.cache_access - (long long)w->dcstat.prev.cache_access; + value = (collected_number) w->dcstat.cache_access; + write_chart_dimension(w->name, value); + w->dcstat.prev.cache_access = w->dcstat.curr.cache_access; + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + if (w->dcstat.curr.file_system < w->dcstat.prev.file_system) { + w->dcstat.prev.file_system = 0; + } + + value = (collected_number) (!w->dcstat.cache_access) ? 0 : + (long long )w->dcstat.curr.file_system - (long long)w->dcstat.prev.file_system; + write_chart_dimension(w->name, value); + w->dcstat.prev.file_system = w->dcstat.curr.file_system; + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + if (w->dcstat.curr.not_found < w->dcstat.prev.not_found) { + w->dcstat.prev.not_found = 0; + } + value = (collected_number) (!w->dcstat.cache_access) ? 0 : + (long long)w->dcstat.curr.not_found - (long long)w->dcstat.prev.not_found; + write_chart_dimension(w->name, value); + w->dcstat.prev.not_found = w->dcstat.curr.not_found; + } + } + write_end_chart(); +} + +/** + * Send global + * + * Send global charts to Netdata + */ +static void dcstat_send_global(netdata_publish_dcstat_t *publish) +{ + dcstat_update_publish(publish, dcstat_hash_values[NETDATA_KEY_DC_REFERENCE], + dcstat_hash_values[NETDATA_KEY_DC_MISS]); + + netdata_publish_syscall_t *ptr = dcstat_counter_publish_aggregated; + netdata_idx_t value = dcstat_hash_values[NETDATA_KEY_DC_REFERENCE]; + if (value != ptr[NETDATA_DCSTAT_IDX_REFERENCE].pcall) { + ptr[NETDATA_DCSTAT_IDX_REFERENCE].ncall = value - ptr[NETDATA_DCSTAT_IDX_REFERENCE].pcall; + ptr[NETDATA_DCSTAT_IDX_REFERENCE].pcall = value; + + value = dcstat_hash_values[NETDATA_KEY_DC_SLOW]; + ptr[NETDATA_DCSTAT_IDX_SLOW].ncall = value - ptr[NETDATA_DCSTAT_IDX_SLOW].pcall; + ptr[NETDATA_DCSTAT_IDX_SLOW].pcall = value; + + value = dcstat_hash_values[NETDATA_KEY_DC_MISS]; + ptr[NETDATA_DCSTAT_IDX_MISS].ncall = value - ptr[NETDATA_DCSTAT_IDX_MISS].pcall; + ptr[NETDATA_DCSTAT_IDX_MISS].pcall = value; + } else { + ptr[NETDATA_DCSTAT_IDX_REFERENCE].ncall = 0; + ptr[NETDATA_DCSTAT_IDX_SLOW].ncall = 0; + ptr[NETDATA_DCSTAT_IDX_MISS].ncall = 0; + } + + ebpf_one_dimension_write_charts(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, + ptr[NETDATA_DCSTAT_IDX_RATIO].dimension, publish->ratio); + + write_count_chart( + NETDATA_DC_REFERENCE_CHART, NETDATA_FILESYSTEM_FAMILY, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3); +} + +/** + * Create specific directory cache charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_dc_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_DC_HIT_CHART, "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700, + ebpf_create_global_dimension, + dcstat_counter_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(type, NETDATA_DC_REFERENCE_CHART, "Count file access", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_REFERENCE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 1, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW], 1, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5703, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS], 1, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); +} + +/** + * Obsolete specific directory cache charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_dc_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_REFERENCE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5703, update_every); +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_dc_sum_cgroup_pids(netdata_publish_dcstat_t *publish, struct pid_on_target2 *root) +{ + memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); + netdata_dcstat_pid_t *dst = &publish->curr; + while (root) { + netdata_dcstat_pid_t *src = &root->dc; + + dst->cache_access += src->cache_access; + dst->file_system += src->file_system; + dst->not_found += src->not_found; + + root = root->next; + } +} + +/** + * Calc chart values + * + * Do necessary math to plot charts. + */ +void ebpf_dc_calc_chart_values() +{ + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_dc_sum_cgroup_pids(&ect->publish_dc, ect->pids); + uint64_t cache = ect->publish_dc.curr.cache_access; + uint64_t not_found = ect->publish_dc.curr.not_found; + + dcstat_update_publish(&ect->publish_dc, cache, not_found); + + ect->publish_dc.cache_access = (long long)ect->publish_dc.curr.cache_access - + (long long)ect->publish_dc.prev.cache_access; + ect->publish_dc.prev.cache_access = ect->publish_dc.curr.cache_access; + + if (ect->publish_dc.curr.not_found < ect->publish_dc.prev.not_found) { + ect->publish_dc.prev.not_found = 0; + } + } +} + +/** + * Create Systemd directory cache Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_dc_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21200, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21201, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21202, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21202, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); +} + +/** + * Send Directory Cache charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_systemd_dc_charts() +{ + collected_number value; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_dc.ratio); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_dc.cache_access); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : + (long long )ect->publish_dc.curr.file_system - (long long)ect->publish_dc.prev.file_system; + ect->publish_dc.prev.file_system = ect->publish_dc.curr.file_system; + + write_chart_dimension(ect->name, (long long) value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : + (long long)ect->publish_dc.curr.not_found - (long long)ect->publish_dc.prev.not_found; + + ect->publish_dc.prev.not_found = ect->publish_dc.curr.not_found; + + write_chart_dimension(ect->name, (long long) value); + } + } + write_end_chart(); +} + +/** + * Send Directory Cache charts + * + * Send collected data to Netdata. + * + */ +static void ebpf_send_specific_dc_data(char *type, netdata_publish_dcstat_t *pdc) +{ + collected_number value; + write_begin_chart(type, NETDATA_DC_HIT_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_RATIO].name, + (long long) pdc->ratio); + write_end_chart(); + + write_begin_chart(type, NETDATA_DC_REFERENCE_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE].name, + (long long) pdc->cache_access); + write_end_chart(); + + value = (collected_number) (!pdc->cache_access) ? 0 : + (long long )pdc->curr.file_system - (long long)pdc->prev.file_system; + pdc->prev.file_system = pdc->curr.file_system; + + write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW].name, (long long) value); + write_end_chart(); + + value = (collected_number) (!pdc->cache_access) ? 0 : + (long long)pdc->curr.not_found - (long long)pdc->prev.not_found; + pdc->prev.not_found = pdc->curr.not_found; + + write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS].name, (long long) value); + write_end_chart(); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_dc_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + ebpf_dc_calc_chart_values(); + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_dc_charts(update_every); + } + + ebpf_send_systemd_dc_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_DC_CHART) && ect->updated) { + ebpf_create_specific_dc_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_DC_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_DC_CHART) { + if (ect->updated) { + ebpf_send_specific_dc_data(ect->name, &ect->publish_dc); + } else { + ebpf_obsolete_specific_dc_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_DC_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void dcstat_collector(ebpf_module_t *em) +{ + dcstat_threads.thread = mallocz(sizeof(netdata_thread_t)); + dcstat_threads.start_routine = ebpf_dcstat_read_hash; + + netdata_thread_create(dcstat_threads.thread, dcstat_threads.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_dcstat_read_hash, em); + + netdata_publish_dcstat_t publish; + memset(&publish, 0, sizeof(publish)); + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); + + if (cgroups) + ebpf_update_dc_cgroup(); + + pthread_mutex_lock(&lock); + + dcstat_send_global(&publish); + + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_dcache_send_apps_data(apps_groups_root_target); + + if (cgroups) + ebpf_dc_send_cgroup_data(update_every); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create filesystem charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_filesystem_charts(int update_every) +{ + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21200, + ebpf_create_global_dimension, + dcstat_counter_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_REFERENCE_CHART, + "Variables used to calculate hit ratio.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21201, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + fflush(stdout); +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_dcstat_allocate_global_vectors(int apps) +{ + if (apps) + dcstat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_dcstat_t *)); + + dcstat_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_dcstat_pid_t)); + dcstat_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + memset(dcstat_counter_aggregated_data, 0, NETDATA_DCSTAT_IDX_END * sizeof(netdata_syscall_stat_t)); + memset(dcstat_counter_publish_aggregated, 0, NETDATA_DCSTAT_IDX_END * sizeof(netdata_publish_syscall_t)); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_dcstat_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_DC_TARGET_LOOKUP_FAST].mode); + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = dc_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_dc_load_and_attach(bpf_obj, em); + } +#endif + + if (ret) + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + +/** + * Directory Cache thread + * + * Thread used to make dcstat thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always returns NULL + */ +void *ebpf_dcstat_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_dcstat_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = dcstat_maps; + + ebpf_update_pid_table(&dcstat_maps[NETDATA_DCSTAT_PID_STATS], em); + + ebpf_update_names(dc_optional_name, em); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_dcstat_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto enddcstat; + } + + ebpf_dcstat_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_DCSTAT_IDX_END] = { + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, + NETDATA_EBPF_ABSOLUTE_IDX + }; + + ebpf_global_labels(dcstat_counter_aggregated_data, dcstat_counter_publish_aggregated, + dcstat_counter_dimension_name, dcstat_counter_dimension_name, + algorithms, NETDATA_DCSTAT_IDX_END); + + pthread_mutex_lock(&lock); + ebpf_create_filesystem_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + dcstat_collector(em); + +enddcstat: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h new file mode 100644 index 0000000..d8687f9 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_DCSTAT_H +#define NETDATA_EBPF_DCSTAT_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_DCSTAT "dcstat" + +// charts +#define NETDATA_DC_HIT_CHART "dc_hit_ratio" +#define NETDATA_DC_REFERENCE_CHART "dc_reference" +#define NETDATA_DC_REQUEST_NOT_CACHE_CHART "dc_not_cache" +#define NETDATA_DC_REQUEST_NOT_FOUND_CHART "dc_not_found" + +#define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache (eBPF)" + +// configuration file +#define NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE "dcstat.conf" + +// Contexts +#define NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT "cgroup.dc_ratio" +#define NETDATA_CGROUP_DC_REFERENCE_CONTEXT "cgroup.dc_reference" +#define NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT "cgroup.dc_not_cache" +#define NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT "cgroup.dc_not_found" + +#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "services.dc_ratio" +#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "services.dc_reference" +#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "services.dc_not_cache" +#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "services.dc_not_found" + +#define NETDATA_LATENCY_DCSTAT_SLEEP_MS 700000ULL + +enum directory_cache_indexes { + NETDATA_DCSTAT_IDX_RATIO, + NETDATA_DCSTAT_IDX_REFERENCE, + NETDATA_DCSTAT_IDX_SLOW, + NETDATA_DCSTAT_IDX_MISS, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_DCSTAT_IDX_END +}; + +enum directory_cache_tables { + NETDATA_DCSTAT_GLOBAL_STATS, + NETDATA_DCSTAT_PID_STATS, + NETDATA_DCSTAT_CTRL +}; + +// variables +enum directory_cache_counters { + NETDATA_KEY_DC_REFERENCE, + NETDATA_KEY_DC_SLOW, + NETDATA_KEY_DC_MISS, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_DIRECTORY_CACHE_END +}; + +enum directory_cache_targets { + NETDATA_DC_TARGET_LOOKUP_FAST, + NETDATA_DC_TARGET_D_LOOKUP +}; + +typedef struct netdata_publish_dcstat_pid { + uint64_t cache_access; + uint64_t file_system; + uint64_t not_found; +} netdata_dcstat_pid_t; + +typedef struct netdata_publish_dcstat { + long long ratio; + long long cache_access; + + netdata_dcstat_pid_t curr; + netdata_dcstat_pid_t prev; +} netdata_publish_dcstat_t; + +void *ebpf_dcstat_thread(void *ptr); +void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr); +extern struct config dcstat_config; +extern netdata_ebpf_targets_t dc_targets[]; +extern ebpf_local_maps_t dcstat_maps[]; + +#endif // NETDATA_EBPF_DCSTAT_H diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c new file mode 100644 index 0000000..a27bd81 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -0,0 +1,865 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include + +#include "ebpf.h" +#include "ebpf_disk.h" + +struct config disk_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t disk_maps[] = {{.name = "tbl_disk_iocall", .internal_input = NETDATA_DISK_HISTOGRAM_LENGTH, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; +static avl_tree_lock disk_tree; +netdata_ebpf_disks_t *disk_list = NULL; + +char *tracepoint_block_type = { "block"} ; +char *tracepoint_block_issue = { "block_rq_issue" }; +char *tracepoint_block_rq_complete = { "block_rq_complete" }; + +static int was_block_issue_enabled = 0; +static int was_block_rq_complete_enabled = 0; + +static char **dimensions = NULL; +static netdata_syscall_stat_t disk_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; +static netdata_publish_syscall_t disk_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; + +static netdata_idx_t *disk_hash_values = NULL; +static struct netdata_static_thread disk_threads = { + .name = "DISK KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +ebpf_publish_disk_t *plot_disks = NULL; +pthread_mutex_t plot_mutex; + +/***************************************************************** + * + * FUNCTIONS TO MANIPULATE HARD DISKS + * + *****************************************************************/ + +/** + * Parse start + * + * Parse start address of disk + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_disk_parse_start(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, 4095); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + w->start = strtoul(content, NULL, 10); + } + close(fd); + + return 0; +} + +/** + * Parse uevent + * + * Parse uevent file + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_parse_uevent(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, FILENAME_MAX); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + + char *s = strstr(content, "PARTNAME=EFI"); + if (s) { + w->main->boot_partition = w; + w->flags |= NETDATA_DISK_HAS_EFI; + w->boot_chart = strdupz("disk_bootsector"); + } + } + close(fd); + + return 0; +} + +/** + * Parse Size + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_parse_size(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, FILENAME_MAX); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + w->end = w->start + strtoul(content, NULL, 10) -1; + } + close(fd); + + return 0; +} + +/** + * Read Disk information + * + * Read disk information from /sys/block + * + * @param w structure where data is stored + * @param name disk name + */ +static void ebpf_read_disk_info(netdata_ebpf_disks_t *w, char *name) +{ + static netdata_ebpf_disks_t *main_disk = NULL; + static uint32_t key = 0; + char *path = { "/sys/block" }; + char disk[NETDATA_DISK_NAME_LEN + 1]; + char filename[FILENAME_MAX + 1]; + snprintfz(disk, NETDATA_DISK_NAME_LEN, "%s", name); + size_t length = strlen(disk); + if (!length) { + return; + } + + length--; + size_t curr = length; + while (isdigit((int)disk[length])) { + disk[length--] = '\0'; + } + + // We are looking for partition information, if it is a device we will ignore it. + if (curr == length) { + main_disk = w; + key = MKDEV(w->major, w->minor); + w->bootsector_key = key; + return; + } + w->bootsector_key = key; + w->main = main_disk; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/uevent", path, disk, name); + if (ebpf_parse_uevent(w, filename)) + return; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/start", path, disk, name); + if (ebpf_disk_parse_start(w, filename)) + return; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/size", path, disk, name); + ebpf_parse_size(w, filename); +} + +/** + * New encode dev + * + * New encode algorithm extracted from https://elixir.bootlin.com/linux/v5.10.8/source/include/linux/kdev_t.h#L39 + * + * @param major driver major number + * @param minor driver minor number + * + * @return + */ +static inline uint32_t netdata_new_encode_dev(uint32_t major, uint32_t minor) { + return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); +} + +/** + * Compare disks + * + * Compare major and minor values to add disks to tree. + * + * @param a pointer to netdata_ebpf_disks + * @param b pointer to netdata_ebpf_disks + * + * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. +*/ +static int ebpf_compare_disks(void *a, void *b) +{ + netdata_ebpf_disks_t *ptr1 = a; + netdata_ebpf_disks_t *ptr2 = b; + + if (ptr1->dev > ptr2->dev) + return 1; + if (ptr1->dev < ptr2->dev) + return -1; + + return 0; +} + +/** + * Update listen table + * + * Update link list when it is necessary. + * + * @param name disk name + * @param major major disk identifier + * @param minor minor disk identifier + * @param current_time current timestamp + */ +static void update_disk_table(char *name, int major, int minor, time_t current_time) +{ + netdata_ebpf_disks_t find; + netdata_ebpf_disks_t *w; + size_t length; + + uint32_t dev = netdata_new_encode_dev(major, minor); + find.dev = dev; + netdata_ebpf_disks_t *ret = (netdata_ebpf_disks_t *) avl_search_lock(&disk_tree, (avl_t *)&find); + if (ret) { // Disk is already present + ret->flags |= NETDATA_DISK_IS_HERE; + ret->last_update = current_time; + return; + } + + netdata_ebpf_disks_t *update_next = disk_list; + if (likely(disk_list)) { + netdata_ebpf_disks_t *move = disk_list; + while (move) { + if (dev == move->dev) + return; + + update_next = move; + move = move->next; + } + + w = callocz(1, sizeof(netdata_ebpf_disks_t)); + length = strlen(name); + if (length >= NETDATA_DISK_NAME_LEN) + length = NETDATA_DISK_NAME_LEN; + + memcpy(w->family, name, length); + w->family[length] = '\0'; + w->major = major; + w->minor = minor; + w->dev = netdata_new_encode_dev(major, minor); + update_next->next = w; + } else { + disk_list = callocz(1, sizeof(netdata_ebpf_disks_t)); + length = strlen(name); + if (length >= NETDATA_DISK_NAME_LEN) + length = NETDATA_DISK_NAME_LEN; + + memcpy(disk_list->family, name, length); + disk_list->family[length] = '\0'; + disk_list->major = major; + disk_list->minor = minor; + disk_list->dev = netdata_new_encode_dev(major, minor); + + w = disk_list; + } + + ebpf_read_disk_info(w, name); + + netdata_ebpf_disks_t *check; + check = (netdata_ebpf_disks_t *) avl_insert_lock(&disk_tree, (avl_t *)w); + if (check != w) + error("Internal error, cannot insert the AVL tree."); + +#ifdef NETDATA_INTERNAL_CHECKS + info("The Latency is monitoring the hard disk %s (Major = %d, Minor = %d, Device = %u)", name, major, minor,w->dev); +#endif + + w->flags |= NETDATA_DISK_IS_HERE; +} + +/** + * Read Local Disks + * + * Parse /proc/partitions to get block disks used to measure latency. + * + * @return It returns 0 on success and -1 otherwise + */ +static int read_local_disks() +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, NETDATA_EBPF_PROC_PARTITIONS); + procfile *ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return -1; + + ff = procfile_readall(ff); + if (!ff) + return -1; + + size_t lines = procfile_lines(ff), l; + time_t current_time = now_realtime_sec(); + for(l = 2; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + // This is header or end of file + if (unlikely(words < 4)) + continue; + + int major = (int)strtol(procfile_lineword(ff, l, 0), NULL, 10); + // The main goal of this thread is to measure block devices, so any block device with major number + // smaller than 7 according /proc/devices is not "important". + if (major > 7) { + int minor = (int)strtol(procfile_lineword(ff, l, 1), NULL, 10); + update_disk_table(procfile_lineword(ff, l, 3), major, minor, current_time); + } + } + + procfile_close(ff); + + return 0; +} + +/** + * Update disks + * + * @param em main thread structure + */ +void ebpf_update_disks(ebpf_module_t *em) +{ + static time_t update_every = 0; + time_t curr = now_realtime_sec(); + if (curr < update_every) + return; + + update_every = curr + 5 * em->update_every; + + (void)read_local_disks(); +} + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Disk disable tracepoints + * + * Disable tracepoints when the plugin was responsible to enable it. + */ +static void ebpf_disk_disable_tracepoints() +{ + char *default_message = { "Cannot disable the tracepoint" }; + if (!was_block_issue_enabled) { + if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_issue)) + error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_issue); + } + + if (!was_block_rq_complete_enabled) { + if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_rq_complete)) + error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_rq_complete); + } +} + +/** + * Cleanup plot disks + * + * Clean disk list + */ +static void ebpf_cleanup_plot_disks() +{ + ebpf_publish_disk_t *move = plot_disks, *next; + while (move) { + next = move->next; + + freez(move); + + move = next; + } +} + +/** + * Cleanup Disk List + */ +static void ebpf_cleanup_disk_list() +{ + netdata_ebpf_disks_t *move = disk_list; + while (move) { + netdata_ebpf_disks_t *next = move->next; + + freez(move->histogram.name); + freez(move->boot_chart); + freez(move); + + move = next; + } +} + +/** + * DISK Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_disk_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_disk_disable_tracepoints(); + + if (dimensions) + ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + freez(disk_hash_values); + freez(disk_threads.thread); + pthread_mutex_destroy(&plot_mutex); + + ebpf_cleanup_plot_disks(); + ebpf_cleanup_disk_list(); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Disk exit. + * + * Cancel child and exit. + * + * @param ptr thread data. + */ +static void ebpf_disk_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*disk_threads.thread); + ebpf_disk_free(em); +} + +/** + * Disk Cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void ebpf_disk_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_disk_free(em); +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Fill Plot list + * + * @param ptr a pointer for current disk + */ +static void ebpf_fill_plot_disks(netdata_ebpf_disks_t *ptr) +{ + pthread_mutex_lock(&plot_mutex); + ebpf_publish_disk_t *w; + if (likely(plot_disks)) { + ebpf_publish_disk_t *move = plot_disks, *store = plot_disks; + while (move) { + if (move->plot == ptr) { + pthread_mutex_unlock(&plot_mutex); + return; + } + + store = move; + move = move->next; + } + + w = callocz(1, sizeof(ebpf_publish_disk_t)); + w->plot = ptr; + store->next = w; + } else { + plot_disks = callocz(1, sizeof(ebpf_publish_disk_t)); + plot_disks->plot = ptr; + } + pthread_mutex_unlock(&plot_mutex); + + ptr->flags |= NETDATA_DISK_ADDED_TO_PLOT_LIST; +} + +/** + * Read hard disk table + * + * @param table file descriptor for table + * + * Read the table with number of calls for all functions + */ +static void read_hard_disk_tables(int table) +{ + netdata_idx_t *values = disk_hash_values; + block_key_t key = {}; + block_key_t next_key = {}; + + netdata_ebpf_disks_t *ret = NULL; + + while (bpf_map_get_next_key(table, &key, &next_key) == 0) { + int test = bpf_map_lookup_elem(table, &key, values); + if (test < 0) { + key = next_key; + continue; + } + + netdata_ebpf_disks_t find; + find.dev = key.dev; + + if (likely(ret)) { + if (find.dev != ret->dev) + ret = (netdata_ebpf_disks_t *)avl_search_lock(&disk_tree, (avl_t *)&find); + } else + ret = (netdata_ebpf_disks_t *)avl_search_lock(&disk_tree, (avl_t *)&find); + + // Disk was inserted after we parse /proc/partitions + if (!ret) { + if (read_local_disks()) { + key = next_key; + continue; + } + + ret = (netdata_ebpf_disks_t *)avl_search_lock(&disk_tree, (avl_t *)&find); + if (!ret) { + // We should never reach this point, but we are adding it to keep a safe code + key = next_key; + continue; + } + } + + uint64_t total = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total += values[i]; + } + + ret->histogram.histogram[key.bin] = total; + + if (!(ret->flags & NETDATA_DISK_ADDED_TO_PLOT_LIST)) + ebpf_fill_plot_disks(ret); + + key = next_key; + } +} + +/** + * Disk read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_disk_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_disk_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_DISK_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Obsolete Hard Disk charts + * + * Make Hard disk charts and fill chart name + * + * @param w the structure with necessary information to create the chart + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_hd_charts(netdata_ebpf_disks_t *w, int update_every) +{ + ebpf_write_chart_obsolete(w->histogram.name, w->family, w->histogram.title, EBPF_COMMON_DIMENSION_CALL, + w->family, NETDATA_EBPF_CHART_TYPE_STACKED, "disk.latency_io", + w->histogram.order, update_every); + + w->flags = 0; +} + +/** + * Create Hard Disk charts + * + * Make Hard disk charts and fill chart name + * + * @param w the structure with necessary information to create the chart + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_hd_charts(netdata_ebpf_disks_t *w, int update_every) +{ + int order = NETDATA_CHART_PRIO_DISK_LATENCY; + char *family = w->family; + + w->histogram.name = strdupz("disk_latency_io"); + w->histogram.title = NULL; + w->histogram.order = order; + + ebpf_create_chart(w->histogram.name, family, "Disk latency", EBPF_COMMON_DIMENSION_CALL, + family, "disk.latency_io", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, disk_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_DISK); + order++; + + w->flags |= NETDATA_DISK_CHART_CREATED; +} + +/** + * Remove pointer from plot + * + * Remove pointer from plot list when the disk is not present. + */ +static void ebpf_remove_pointer_from_plot_disk(ebpf_module_t *em) +{ + time_t current_time = now_realtime_sec(); + time_t limit = 10 * em->update_every; + pthread_mutex_lock(&plot_mutex); + ebpf_publish_disk_t *move = plot_disks, *prev = plot_disks; + int update_every = em->update_every; + while (move) { + netdata_ebpf_disks_t *ned = move->plot; + uint32_t flags = ned->flags; + + if (!(flags & NETDATA_DISK_IS_HERE) && ((current_time - ned->last_update) > limit)) { + ebpf_obsolete_hd_charts(ned, update_every); + avl_t *ret = (avl_t *)avl_remove_lock(&disk_tree, (avl_t *)ned); + UNUSED(ret); + if (move == plot_disks) { + freez(move); + plot_disks = NULL; + break; + } else { + prev->next = move->next; + ebpf_publish_disk_t *clean = move; + move = move->next; + freez(clean); + continue; + } + } + + prev = move; + move = move->next; + } + pthread_mutex_unlock(&plot_mutex); +} + +/** + * Send Hard disk data + * + * Send hard disk information to Netdata. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_latency_send_hd_data(int update_every) +{ + pthread_mutex_lock(&plot_mutex); + if (!plot_disks) { + pthread_mutex_unlock(&plot_mutex); + return; + } + + ebpf_publish_disk_t *move = plot_disks; + while (move) { + netdata_ebpf_disks_t *ned = move->plot; + uint32_t flags = ned->flags; + if (!(flags & NETDATA_DISK_CHART_CREATED)) { + ebpf_create_hd_charts(ned, update_every); + } + + if ((flags & NETDATA_DISK_CHART_CREATED)) { + write_histogram_chart(ned->histogram.name, ned->family, + ned->histogram.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + } + + ned->flags &= ~NETDATA_DISK_IS_HERE; + + move = move->next; + } + pthread_mutex_unlock(&plot_mutex); +} + +/** +* Main loop for this collector. +*/ +static void disk_collector(ebpf_module_t *em) +{ + disk_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + disk_threads.thread = mallocz(sizeof(netdata_thread_t)); + disk_threads.start_routine = ebpf_disk_read_hash; + + netdata_thread_create(disk_threads.thread, disk_threads.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_disk_read_hash, em); + + int update_every = em->update_every; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + ebpf_remove_pointer_from_plot_disk(em); + ebpf_latency_send_hd_data(update_every); + + pthread_mutex_unlock(&lock); + + ebpf_update_disks(em); + } +} + +/***************************************************************** + * + * EBPF DISK THREAD + * + *****************************************************************/ + +/** + * Enable tracepoints + * + * Enable necessary tracepoints for thread. + * + * @return It returns 0 on success and -1 otherwise + */ +static int ebpf_disk_enable_tracepoints() +{ + int test = ebpf_is_tracepoint_enabled(tracepoint_block_type, tracepoint_block_issue); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_block_type, tracepoint_block_issue)) + return -1; + } + was_block_issue_enabled = test; + + test = ebpf_is_tracepoint_enabled(tracepoint_block_type, tracepoint_block_rq_complete); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_block_type, tracepoint_block_rq_complete)) + return -1; + } + was_block_rq_complete_enabled = test; + + return 0; +} + +/** + * Disk thread + * + * Thread used to generate disk charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_disk_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_disk_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = disk_maps; + + if (ebpf_disk_enable_tracepoints()) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto enddisk; + } + + avl_init_lock(&disk_tree, ebpf_compare_disks); + if (read_local_disks()) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto enddisk; + } + + if (pthread_mutex_init(&plot_mutex, NULL)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + error("Cannot initialize local mutex"); + goto enddisk; + } + + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto enddisk; + } + + int algorithms[NETDATA_EBPF_HIST_MAX_BINS]; + ebpf_fill_algorithms(algorithms, NETDATA_EBPF_HIST_MAX_BINS, NETDATA_EBPF_INCREMENTAL_IDX); + dimensions = ebpf_fill_histogram_dimension(NETDATA_EBPF_HIST_MAX_BINS); + + ebpf_global_labels(disk_aggregated_data, disk_publish_aggregated, dimensions, dimensions, algorithms, + NETDATA_EBPF_HIST_MAX_BINS); + + pthread_mutex_lock(&lock); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + disk_collector(em); + +enddisk: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_disk.h b/collectors/ebpf.plugin/ebpf_disk.h new file mode 100644 index 0000000..c14b887 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_disk.h @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_DISK_H +#define NETDATA_EBPF_DISK_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_DISK "disk" + +#include "libnetdata/avl/avl.h" +#include "libnetdata/ebpf/ebpf.h" + +#define NETDATA_EBPF_PROC_PARTITIONS "/proc/partitions" + +#define NETDATA_LATENCY_DISK_SLEEP_MS 650000ULL + +// Process configuration name +#define NETDATA_DISK_CONFIG_FILE "disk.conf" + +// Decode function extracted from: https://elixir.bootlin.com/linux/v5.10.8/source/include/linux/kdev_t.h#L7 +#define MINORBITS 20 +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +enum netdata_latency_disks_flags { + NETDATA_DISK_ADDED_TO_PLOT_LIST = 1, + NETDATA_DISK_CHART_CREATED = 2, + NETDATA_DISK_IS_HERE = 4, + NETDATA_DISK_HAS_EFI = 8 +}; + +/* + * The definition (DISK_NAME_LEN) has been a stable value since Kernel 3.0, + * I decided to bring it as internal definition, to avoid include linux/genhd.h. + */ +#define NETDATA_DISK_NAME_LEN 32 +typedef struct netdata_ebpf_disks { + // Search + avl_t avl; + uint32_t dev; + uint32_t major; + uint32_t minor; + uint32_t bootsector_key; + uint64_t start; // start sector + uint64_t end; // end sector + + // Print information + char family[NETDATA_DISK_NAME_LEN + 1]; + char *boot_chart; + + netdata_ebpf_histogram_t histogram; + + uint32_t flags; + time_t last_update; + + struct netdata_ebpf_disks *main; + struct netdata_ebpf_disks *boot_partition; + struct netdata_ebpf_disks *next; +} netdata_ebpf_disks_t; + +enum ebpf_disk_tables { + NETDATA_DISK_READ +}; + +typedef struct block_key { + uint32_t bin; + uint32_t dev; +} block_key_t; + +typedef struct netdata_ebpf_publish_disk { + netdata_ebpf_disks_t *plot; + struct netdata_ebpf_publish_disk *next; +} ebpf_publish_disk_t; + +extern struct config disk_config; + +void *ebpf_disk_thread(void *ptr); + +#endif /* NETDATA_EBPF_DISK_H */ + diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c new file mode 100644 index 0000000..30b7f22 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -0,0 +1,1183 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_fd.h" + +static char *fd_dimension_names[NETDATA_FD_SYSCALL_END] = { "open", "close" }; +static char *fd_id_names[NETDATA_FD_SYSCALL_END] = { "do_sys_open", "__close_fd" }; + +static netdata_syscall_stat_t fd_aggregated_data[NETDATA_FD_SYSCALL_END]; +static netdata_publish_syscall_t fd_publish_aggregated[NETDATA_FD_SYSCALL_END]; + +static ebpf_local_maps_t fd_maps[] = {{.name = "tbl_fd_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_fd_global", .internal_input = NETDATA_KEY_END_VECTOR, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "fd_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + + +struct config fd_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, + .index = {.avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +struct netdata_static_thread fd_thread = {"FD KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL}; + +static netdata_idx_t fd_hash_values[NETDATA_FD_COUNTER]; +static netdata_idx_t *fd_values = NULL; + +netdata_fd_stat_t *fd_vector = NULL; +netdata_fd_stat_t **fd_pid = NULL; + +netdata_ebpf_targets_t fd_targets[] = { {.name = "open", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "close", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/fd.skel.h" // BTF code + +static struct fd_bpf *bpf_obj = NULL; + +/** + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects +*/ +static inline void ebpf_fd_disable_probes(struct fd_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_sys_open_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_sys_open_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_fd_kprobe, false); + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) { + bpf_program__set_autoload(obj->progs.netdata___close_fd_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata___close_fd_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_kprobe, false); + } else { + bpf_program__set_autoload(obj->progs.netdata___close_fd_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_kprobe, false); + } +} + +/* + * Disable specific probe + * + * Disable probes according the kernel version + * + * @param obj is the main structure for bpf objects + */ +static inline void ebpf_disable_specific_probes(struct fd_bpf *obj) +{ + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) { + bpf_program__set_autoload(obj->progs.netdata___close_fd_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata___close_fd_kprobe, false); + } else { + bpf_program__set_autoload(obj->progs.netdata_close_fd_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_kprobe, false); + } +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_disable_trampoline(struct fd_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_sys_open_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_sys_open_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_fexit, false); + bpf_program__set_autoload(obj->progs.netdata___close_fd_fentry, false); + bpf_program__set_autoload(obj->progs.netdata___close_fd_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_fd_fentry, false); +} + +/* + * Disable specific trampoline + * + * Disable trampoline according to kernel version. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_disable_specific_trampoline(struct fd_bpf *obj) +{ + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) { + bpf_program__set_autoload(obj->progs.netdata___close_fd_fentry, false); + bpf_program__set_autoload(obj->progs.netdata___close_fd_fexit, false); + } else { + bpf_program__set_autoload(obj->progs.netdata_close_fd_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_close_fd_fexit, false); + } +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_set_trampoline_target(struct fd_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_sys_open_fentry, 0, fd_targets[NETDATA_FD_SYSCALL_OPEN].name); + bpf_program__set_attach_target(obj->progs.netdata_sys_open_fexit, 0, fd_targets[NETDATA_FD_SYSCALL_OPEN].name); + bpf_program__set_attach_target(obj->progs.netdata_release_task_fd_fentry, 0, EBPF_COMMON_FNCT_CLEAN_UP); + + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) { + bpf_program__set_attach_target( + obj->progs.netdata_close_fd_fentry, 0, fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + bpf_program__set_attach_target(obj->progs.netdata_close_fd_fexit, 0, fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + } else { + bpf_program__set_attach_target( + obj->progs.netdata___close_fd_fentry, 0, fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + bpf_program__set_attach_target( + obj->progs.netdata___close_fd_fexit, 0, fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + } +} + +/** + * Mount Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_fd_attach_probe(struct fd_bpf *obj) +{ + obj->links.netdata_sys_open_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_sys_open_kprobe, false, + fd_targets[NETDATA_FD_SYSCALL_OPEN].name); + int ret = libbpf_get_error(obj->links.netdata_sys_open_kprobe); + if (ret) + return -1; + + obj->links.netdata_sys_open_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_sys_open_kretprobe, true, + fd_targets[NETDATA_FD_SYSCALL_OPEN].name); + ret = libbpf_get_error(obj->links.netdata_sys_open_kretprobe); + if (ret) + return -1; + + obj->links.netdata_release_task_fd_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_release_task_fd_kprobe, + false, + EBPF_COMMON_FNCT_CLEAN_UP); + ret = libbpf_get_error(obj->links.netdata_release_task_fd_kprobe); + if (ret) + return -1; + + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) { + obj->links.netdata_close_fd_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_close_fd_kretprobe, true, + fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + ret = libbpf_get_error(obj->links.netdata_close_fd_kretprobe); + if (ret) + return -1; + + obj->links.netdata_close_fd_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_close_fd_kprobe, false, + fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + ret = libbpf_get_error(obj->links.netdata_close_fd_kprobe); + if (ret) + return -1; + } else { + obj->links.netdata___close_fd_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata___close_fd_kretprobe, + true, + fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + ret = libbpf_get_error(obj->links.netdata___close_fd_kretprobe); + if (ret) + return -1; + + obj->links.netdata___close_fd_kprobe = bpf_program__attach_kprobe(obj->progs.netdata___close_fd_kprobe, + false, + fd_targets[NETDATA_FD_SYSCALL_CLOSE].name); + ret = libbpf_get_error(obj->links.netdata___close_fd_kprobe); + if (ret) + return -1; + } + + return 0; +} + +/** + * Set target values + * + * Set pointers used to laod data. + */ +static void ebpf_fd_set_target_values() +{ + static char *close_targets[] = {"close_fd", "__close_fd"}; + static char *open_targets[] = {"do_sys_openat2", "do_sys_open"}; + if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) { + fd_targets[NETDATA_FD_SYSCALL_OPEN].name = open_targets[0]; + fd_targets[NETDATA_FD_SYSCALL_CLOSE].name = close_targets[0]; + } else { + fd_targets[NETDATA_FD_SYSCALL_OPEN].name = open_targets[1]; + fd_targets[NETDATA_FD_SYSCALL_CLOSE].name = close_targets[1]; + } +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_fd_set_hash_tables(struct fd_bpf *obj) +{ + fd_maps[NETDATA_FD_GLOBAL_STATS].map_fd = bpf_map__fd(obj->maps.tbl_fd_global); + fd_maps[NETDATA_FD_PID_STATS].map_fd = bpf_map__fd(obj->maps.tbl_fd_pid); + fd_maps[NETDATA_FD_CONTROLLER].map_fd = bpf_map__fd(obj->maps.fd_ctrl); +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_fd_adjust_map_size(struct fd_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.tbl_fd_pid, &fd_maps[NETDATA_FD_PID_STATS], + em, bpf_map__name(obj->maps.tbl_fd_pid)); +} + +/** + * Disable Release Task + * + * Disable release task when apps is not enabled. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_fd_disable_release_task(struct fd_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_release_task_fd_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_fd_fentry, false); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *mt = em->targets; + netdata_ebpf_program_loaded_t test = mt[NETDATA_FD_SYSCALL_OPEN].mode; + + ebpf_fd_set_target_values(); + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_fd_disable_probes(obj); + ebpf_disable_specific_trampoline(obj); + + ebpf_set_trampoline_target(obj); + // TODO: Remove this in next PR, because this specific trampoline has an error. + bpf_program__set_autoload(obj->progs.netdata_release_task_fd_fentry, false); + } else { + ebpf_disable_trampoline(obj); + ebpf_disable_specific_probes(obj); + } + + ebpf_fd_adjust_map_size(obj, em); + + if (!em->apps_charts && !em->cgroup_charts) + ebpf_fd_disable_release_task(obj); + + int ret = fd_bpf__load(obj); + if (ret) { + return ret; + } + + ret = (test == EBPF_LOAD_TRAMPOLINE) ? fd_bpf__attach(obj) : ebpf_fd_attach_probe(obj); + if (!ret) { + ebpf_fd_set_hash_tables(obj); + + ebpf_update_controller(fd_maps[NETDATA_CACHESTAT_CTRL].map_fd, em); + } + + return ret; +} +#endif + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * FD Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_fd_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_cleanup_publish_syscall(fd_publish_aggregated); + freez(fd_thread.thread); + freez(fd_values); + freez(fd_vector); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + fd_bpf__destroy(bpf_obj); +#endif + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * FD Exit + * + * Cancel child thread and exit. + * + * @param ptr thread data. + */ +static void ebpf_fd_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*fd_thread.thread); + ebpf_fd_free(em); +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_fd_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_fd_free(em); +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + */ +static void ebpf_fd_send_data(ebpf_module_t *em) +{ + fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].ncall = fd_hash_values[NETDATA_KEY_CALLS_DO_SYS_OPEN]; + fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].nerr = fd_hash_values[NETDATA_KEY_ERROR_DO_SYS_OPEN]; + + fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].ncall = fd_hash_values[NETDATA_KEY_CALLS_CLOSE_FD]; + fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].nerr = fd_hash_values[NETDATA_KEY_ERROR_CLOSE_FD]; + + write_count_chart(NETDATA_FILE_OPEN_CLOSE_COUNT, NETDATA_FILESYSTEM_FAMILY, fd_publish_aggregated, + NETDATA_FD_SYSCALL_END); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_FILE_OPEN_ERR_COUNT, NETDATA_FILESYSTEM_FAMILY, + fd_publish_aggregated, NETDATA_FD_SYSCALL_END); + } +} + +/** + * Read global counter + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = fd_hash_values; + netdata_idx_t *stored = fd_values; + int fd = fd_maps[NETDATA_FD_GLOBAL_STATS].map_fd; + + for (idx = NETDATA_KEY_CALLS_DO_SYS_OPEN; idx < NETDATA_FD_COUNTER; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; + } + } +} + +/** + * File descriptor read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_fd_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_fd_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + usec_t step = NETDATA_FD_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void fd_apps_accumulator(netdata_fd_stat_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_fd_stat_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_fd_stat_t *w = &out[i]; + total->open_call += w->open_call; + total->close_call += w->close_call; + total->open_err += w->open_err; + total->close_err += w->close_err; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void fd_fill_pid(uint32_t current_pid, netdata_fd_stat_t *publish) +{ + netdata_fd_stat_t *curr = fd_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_fd_stat_t)); + fd_pid[current_pid] = curr; + } + + memcpy(curr, &publish[0], sizeof(netdata_fd_stat_t)); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_fd_stat_t *fv = fd_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; + size_t length = sizeof(netdata_fd_stat_t) * ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, fv)) { + pids = pids->next; + continue; + } + + fd_apps_accumulator(fv); + + fd_fill_pid(key, fv); + + // We are cleaning to avoid passing data read from one process to other. + memset(fv, 0, length); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_fd_cgroup() +{ + ebpf_cgroup_target_t *ect ; + netdata_fd_stat_t *fv = fd_vector; + int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; + size_t length = sizeof(netdata_fd_stat_t) * ebpf_nprocs; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_fd_stat_t *out = &pids->fd; + if (likely(fd_pid) && fd_pid[pid]) { + netdata_fd_stat_t *in = fd_pid[pid]; + + memcpy(out, in, sizeof(netdata_fd_stat_t)); + } else { + memset(fv, 0, length); + if (!bpf_map_lookup_elem(fd, &pid, fv)) { + fd_apps_accumulator(fv); + + memcpy(out, fv, sizeof(netdata_fd_stat_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param fd the output + * @param root list of pids + */ +static void ebpf_fd_sum_pids(netdata_fd_stat_t *fd, struct pid_on_target *root) +{ + uint32_t open_call = 0; + uint32_t close_call = 0; + uint32_t open_err = 0; + uint32_t close_err = 0; + + while (root) { + int32_t pid = root->pid; + netdata_fd_stat_t *w = fd_pid[pid]; + if (w) { + open_call += w->open_call; + close_call += w->close_call; + open_err += w->open_err; + close_err += w->close_err; + } + + root = root->next; + } + + // These conditions were added, because we are using incremental algorithm + fd->open_call = (open_call >= fd->open_call) ? open_call : fd->open_call; + fd->close_call = (close_call >= fd->close_call) ? close_call : fd->close_call; + fd->open_err = (open_err >= fd->open_err) ? open_err : fd->open_err; + fd->close_err = (close_err >= fd->close_err) ? close_err : fd->close_err; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + * @param root the target list. +*/ +void ebpf_fd_send_apps_data(ebpf_module_t *em, struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_fd_sum_pids(&w->fd, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.close_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.close_err); + } + } + write_end_chart(); + } +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param fd structure used to store data + * @param pids input data + */ +static void ebpf_fd_sum_cgroup_pids(netdata_fd_stat_t *fd, struct pid_on_target2 *pids) +{ + netdata_fd_stat_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + netdata_fd_stat_t *w = &pids->fd; + + accumulator.open_err += w->open_err; + accumulator.open_call += w->open_call; + accumulator.close_call += w->close_call; + accumulator.close_err += w->close_err; + + pids = pids->next; + } + + fd->open_call = (accumulator.open_call >= fd->open_call) ? accumulator.open_call : fd->open_call; + fd->open_err = (accumulator.open_err >= fd->open_err) ? accumulator.open_err : fd->open_err; + fd->close_call = (accumulator.close_call >= fd->close_call) ? accumulator.close_call : fd->close_call; + fd->close_err = (accumulator.close_err >= fd->close_err) ? accumulator.close_err : fd->close_err; +} + +/** + * Create specific file descriptor charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_create_specific_fd_charts(char *type, ebpf_module_t *em) +{ + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_OPEN_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN], + 1, em->update_every, + NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_CLOSE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE], + 1, em->update_every, + NETDATA_EBPF_MODULE_NAME_SWAP); + } +} + +/** + * Obsolete specific file descriptor charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403, em->update_every); + } +} + +/* + * Send specific file descriptor data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em) +{ + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_err); + write_end_chart(); + } +} + +/** + * Create systemd file descriptor charts + * + * Create charts when systemd is enabled + * + * @param em the main collector structure + **/ +static void ebpf_create_systemd_fd_charts(ebpf_module_t *em) +{ + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20061, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_OPEN_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20062, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20063, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_CLOSE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20064, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + } +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @param em the main collector structure + */ +static void ebpf_send_systemd_fd_charts(ebpf_module_t *em) +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.close_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.close_err); + } + } + write_end_chart(); + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the main collector structure +*/ +static void ebpf_fd_send_cgroup_data(ebpf_module_t *em) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_fd_sum_cgroup_pids(&ect->publish_systemd_fd, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_fd_charts(em); + } + + ebpf_send_systemd_fd_charts(em); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_FD_CHART) && ect->updated) { + ebpf_create_specific_fd_charts(ect->name, em); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_FD_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_FD_CHART ) { + if (ect->updated) { + ebpf_send_specific_fd_data(ect->name, &ect->publish_systemd_fd, em); + } else { + ebpf_obsolete_specific_fd_charts(ect->name, em); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_FD_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void fd_collector(ebpf_module_t *em) +{ + fd_thread.thread = mallocz(sizeof(netdata_thread_t)); + fd_thread.start_routine = ebpf_fd_read_hash; + + netdata_thread_create(fd_thread.thread, fd_thread.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_fd_read_hash, em); + + int cgroups = em->cgroup_charts; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); + + if (cgroups) + ebpf_update_fd_cgroup(); + + pthread_mutex_lock(&lock); + + ebpf_fd_send_data(em); + + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_fd_send_apps_data(em, apps_groups_root_target); + + if (cgroups) + ebpf_fd_send_cgroup_data(em); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * CREATE CHARTS + * + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN, + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20061, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20062, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSED, + "Files closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20063, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20064, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_fd_global_charts(ebpf_module_t *em) +{ + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_CLOSE_COUNT, + "Open and close calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS, + ebpf_create_global_dimension, + fd_publish_aggregated, + NETDATA_FD_SYSCALL_END, + em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_ERR_COUNT, + "Open fails", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS + 1, + ebpf_create_global_dimension, + fd_publish_aggregated, + NETDATA_FD_SYSCALL_END, + em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + } +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_fd_allocate_global_vectors(int apps) +{ + if (apps) + fd_pid = callocz((size_t)pid_max, sizeof(netdata_fd_stat_t *)); + + fd_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_fd_stat_t)); + + fd_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_fd_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_FD_SYSCALL_OPEN].mode); + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->enabled = CONFIG_BOOLEAN_NO; + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = fd_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_fd_load_and_attach(bpf_obj, em); + } +#endif + + if (ret) + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + +/** + * Directory Cache thread + * + * Thread used to make dcstat thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always returns NULL + */ +void *ebpf_fd_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_fd_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = fd_maps; + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_fd_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endfd; + } + + ebpf_fd_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_FD_SYSCALL_END] = { + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX + }; + + ebpf_global_labels(fd_aggregated_data, fd_publish_aggregated, fd_dimension_names, fd_id_names, + algorithms, NETDATA_FD_SYSCALL_END); + + pthread_mutex_lock(&lock); + ebpf_create_fd_global_charts(em); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + fd_collector(em); + +endfd: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_fd.h b/collectors/ebpf.plugin/ebpf_fd.h new file mode 100644 index 0000000..914a34b --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_fd.h @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_FD_H +#define NETDATA_EBPF_FD_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_FD "filedescriptor" + +#define NETDATA_FD_SLEEP_MS 850000ULL + +// Menu group +#define NETDATA_FILE_GROUP "file_access" + +// Global chart name +#define NETDATA_FILE_OPEN_CLOSE_COUNT "file_descriptor" +#define NETDATA_FILE_OPEN_ERR_COUNT "file_error" + +// Charts created on Apps submenu +#define NETDATA_SYSCALL_APPS_FILE_OPEN "file_open" +#define NETDATA_SYSCALL_APPS_FILE_CLOSED "file_closed" +#define NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR "file_open_error" +#define NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR "file_close_error" + +// Process configuration name +#define NETDATA_FD_CONFIG_FILE "fd.conf" + +// Contexts +#define NETDATA_CGROUP_FD_OPEN_CONTEXT "cgroup.fd_open" +#define NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT "cgroup.fd_open_error" +#define NETDATA_CGROUP_FD_CLOSE_CONTEXT "cgroup.fd_close" +#define NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT "cgroup.fd_close_error" + +#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "services.fd_open" +#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "services.fd_open_error" +#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "services.fd_close" +#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "services.fd_close_error" + +typedef struct netdata_fd_stat { + uint64_t pid_tgid; // Unique identifier + uint32_t pid; // Process ID + + uint32_t open_call; // Open syscalls (open and openat) + uint32_t close_call; // Close syscall (close) + + // Errors + uint32_t open_err; + uint32_t close_err; +} netdata_fd_stat_t; + +enum fd_tables { + NETDATA_FD_PID_STATS, + NETDATA_FD_GLOBAL_STATS, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_FD_CONTROLLER +}; + +enum fd_counters { + NETDATA_KEY_CALLS_DO_SYS_OPEN, + NETDATA_KEY_ERROR_DO_SYS_OPEN, + + NETDATA_KEY_CALLS_CLOSE_FD, + NETDATA_KEY_ERROR_CLOSE_FD, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_FD_COUNTER +}; + +enum fd_syscalls { + NETDATA_FD_SYSCALL_OPEN, + NETDATA_FD_SYSCALL_CLOSE, + + // Do not insert nothing after this value + NETDATA_FD_SYSCALL_END +}; + + +void *ebpf_fd_thread(void *ptr); +void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr); +extern struct config fd_config; +extern netdata_fd_stat_t **fd_pid; +extern netdata_ebpf_targets_t fd_targets[]; + +#endif /* NETDATA_EBPF_FD_H */ + diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c new file mode 100644 index 0000000..7dbec74 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -0,0 +1,638 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf_filesystem.h" + +struct config fs_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t fs_maps[] = {{.name = "tbl_ext4", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_xfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_nfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_zfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_btrfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_ext_addr", .internal_input = 1, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +struct netdata_static_thread filesystem_threads = { + .name = "EBPF FS READ", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +static netdata_syscall_stat_t filesystem_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; +static netdata_publish_syscall_t filesystem_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; + +char **dimensions = NULL; +static netdata_idx_t *filesystem_hash_values = NULL; + +/***************************************************************** + * + * COMMON FUNCTIONS + * + *****************************************************************/ + +/** + * Create Filesystem chart + * + * Create latency charts + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_fs_charts(int update_every) +{ + int i; + uint32_t test = NETDATA_FILESYSTEM_FLAG_CHART_CREATED | NETDATA_FILESYSTEM_REMOVE_CHARTS; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + uint32_t flags = efp->flags; + if ((flags & test) == test) { + flags &= ~NETDATA_FILESYSTEM_FLAG_CHART_CREATED; + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + efp->hread.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hread.order, update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + efp->hwrite.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hwrite.order, update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, efp->hopen.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hopen.order, update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, efp->hadditional.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hadditional.order, + update_every); + } + efp->flags = flags; + } +} + +/** + * Create Filesystem chart + * + * Create latency charts + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_fs_charts(int update_every) +{ + static int order = NETDATA_CHART_PRIO_EBPF_FILESYSTEM_CHARTS; + char chart_name[64], title[256], family[64]; + int i; + uint32_t test = NETDATA_FILESYSTEM_FLAG_CHART_CREATED|NETDATA_FILESYSTEM_REMOVE_CHARTS; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + uint32_t flags = efp->flags; + if (flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION && !(flags & test)) { + snprintfz(title, 255, "%s latency for each read request.", efp->filesystem); + snprintfz(family, 63, "%s_latency", efp->family); + snprintfz(chart_name, 63, "%s_read_latency", efp->filesystem); + efp->hread.name = strdupz(chart_name); + efp->hread.title = strdupz(title); + efp->hread.order = order; + efp->family_name = strdupz(family); + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + + snprintfz(title, 255, "%s latency for each write request.", efp->filesystem); + snprintfz(chart_name, 63, "%s_write_latency", efp->filesystem); + efp->hwrite.name = strdupz(chart_name); + efp->hwrite.title = strdupz(title); + efp->hwrite.order = order; + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + + snprintfz(title, 255, "%s latency for each open request.", efp->filesystem); + snprintfz(chart_name, 63, "%s_open_latency", efp->filesystem); + efp->hopen.name = strdupz(chart_name); + efp->hopen.title = strdupz(title); + efp->hopen.order = order; + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, + title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + + char *type = (efp->flags & NETDATA_FILESYSTEM_ATTR_CHARTS) ? "attribute" : "sync"; + snprintfz(title, 255, "%s latency for each %s request.", efp->filesystem, type); + snprintfz(chart_name, 63, "%s_%s_latency", efp->filesystem, type); + efp->hadditional.name = strdupz(chart_name); + efp->hadditional.title = strdupz(title); + efp->hadditional.order = order; + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + efp->flags |= NETDATA_FILESYSTEM_FLAG_CHART_CREATED; + } + } +} + +/** + * Initialize eBPF data + * + * @param em main thread structure. + * + * @return it returns 0 on success and -1 otherwise. + */ +int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) +{ + int i; + const char *saved_name = em->thread_name; + uint64_t kernels = em->kernels; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (!efp->probe_links && efp->flags & NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM) { + em->thread_name = efp->filesystem; + em->kernels = efp->kernels; + efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects); + if (!efp->probe_links) { + em->thread_name = saved_name; + em->kernels = kernels; + return -1; + } + efp->flags |= NETDATA_FILESYSTEM_FLAG_HAS_PARTITION; + + // Nedeed for filesystems like btrfs + if ((efp->flags & NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE) && (efp->addresses.function)) { + ebpf_load_addresses(&efp->addresses, fs_maps[i + 1].map_fd); + } + } + efp->flags &= ~NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + } + em->thread_name = saved_name; + em->kernels = kernels; + + if (!dimensions) { + dimensions = ebpf_fill_histogram_dimension(NETDATA_EBPF_HIST_MAX_BINS); + + memset(filesystem_aggregated_data, 0 , NETDATA_EBPF_HIST_MAX_BINS * sizeof(netdata_syscall_stat_t)); + memset(filesystem_publish_aggregated, 0 , NETDATA_EBPF_HIST_MAX_BINS * sizeof(netdata_publish_syscall_t)); + + filesystem_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + } + + return 0; +} + +/** + * Read Local partitions + * + * @return the total of partitions that will be monitored + */ +static int ebpf_read_local_partitions() +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/self/mountinfo", netdata_configured_host_prefix); + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + snprintfz(filename, FILENAME_MAX, "%s/proc/1/mountinfo", netdata_configured_host_prefix); + ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 0; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; + + int count = 0; + unsigned long l, i, lines = procfile_lines(ff); + for (i = 0; localfs[i].filesystem; i++) { + localfs[i].flags |= NETDATA_FILESYSTEM_REMOVE_CHARTS; + } + + for(l = 0; l < lines ; l++) { + // In "normal" situation the expected value is at column 7 + // When `shared` options is added to mount information, the filesystem is at column 8 + // Finally when we have systemd starting netdata, it will be at column 9 + unsigned long index = procfile_linewords(ff, l) - 3; + + char *fs = procfile_lineword(ff, l, index); + + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *w = &localfs[i]; + if (w->enabled && (!strcmp(fs, w->filesystem) || + (w->optional_filesystem && !strcmp(fs, w->optional_filesystem)))) { + localfs[i].flags |= NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + localfs[i].flags &= ~NETDATA_FILESYSTEM_REMOVE_CHARTS; + count++; + break; + } + } + } + procfile_close(ff); + + return count; +} + +/** + * Update partition + * + * Update the partition structures before to plot + * + * @param em main thread structure + * + * @return 0 on success and -1 otherwise. + */ +static int ebpf_update_partitions(ebpf_module_t *em) +{ + static time_t update_every = 0; + time_t curr = now_realtime_sec(); + if (curr < update_every) + return 0; + + update_every = curr + 5 * em->update_every; + if (!ebpf_read_local_partitions()) { + em->optional = -1; + return -1; + } + + if (ebpf_filesystem_initialize_ebpf_data(em)) { + return -1; + } + + return 0; +} + +/***************************************************************** + * + * CLEANUP FUNCTIONS + * + *****************************************************************/ + +/* + * Cleanup eBPF data + */ +void ebpf_filesystem_cleanup_ebpf_data() +{ + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (efp->probe_links) { + freez(efp->family_name); + + freez(efp->hread.name); + freez(efp->hread.title); + + freez(efp->hwrite.name); + freez(efp->hwrite.title); + + freez(efp->hopen.name); + freez(efp->hopen.title); + + freez(efp->hadditional.name); + freez(efp->hadditional.title); + } + } +} + +/** + * Filesystem Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_filesystem_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(filesystem_threads.thread); + ebpf_cleanup_publish_syscall(filesystem_publish_aggregated); + + ebpf_filesystem_cleanup_ebpf_data(); + if (dimensions) + ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); + freez(filesystem_hash_values); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Filesystem exit + * + * Cancel child thread. + * + * @param ptr thread data. + */ +static void ebpf_filesystem_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*filesystem_threads.thread); + ebpf_filesystem_free(em); +} + +/** + * File system cleanup + * + * Clean up allocated thread. + * + * @param ptr thread data. + */ +static void ebpf_filesystem_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_filesystem_free(em); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Select hist + * + * Select a histogram to store data. + * + * @param efp pointer for the structure with pointers. + * @param id histogram selector + * + * @return It returns a pointer for the histogram + */ +static inline netdata_ebpf_histogram_t *select_hist(ebpf_filesystem_partitions_t *efp, uint32_t *idx, uint32_t id) +{ + if (id < NETDATA_KEY_CALLS_READ) { + *idx = id; + return &efp->hread; + } else if (id < NETDATA_KEY_CALLS_WRITE) { + *idx = id - NETDATA_KEY_CALLS_READ; + return &efp->hwrite; + } else if (id < NETDATA_KEY_CALLS_OPEN) { + *idx = id - NETDATA_KEY_CALLS_WRITE; + return &efp->hopen; + } else if (id < NETDATA_KEY_CALLS_SYNC ){ + *idx = id - NETDATA_KEY_CALLS_OPEN; + return &efp->hadditional; + } + + return NULL; +} + +/** + * Read hard disk table + * + * @param table index for the hash table + * + * Read the table with number of calls for all functions + */ +static void read_filesystem_table(ebpf_filesystem_partitions_t *efp, int fd) +{ + netdata_idx_t *values = filesystem_hash_values; + uint32_t key; + uint32_t idx; + for (key = 0; key < NETDATA_KEY_CALLS_SYNC; key++) { + netdata_ebpf_histogram_t *w = select_hist(efp, &idx, key); + if (!w) { + continue; + } + + int test = bpf_map_lookup_elem(fd, &key, values); + if (test < 0) { + continue; + } + + uint64_t total = 0; + int i; + int end = ebpf_nprocs; + for (i = 0; i < end; i++) { + total += values[i]; + } + + if (idx >= NETDATA_EBPF_HIST_MAX_BINS) + idx = NETDATA_EBPF_HIST_MAX_BINS - 1; + w->histogram[idx] = total; + } +} + +/** + * Read hard disk table + * + * @param table index for the hash table + * + * Read the table with number of calls for all functions + */ +static void read_filesystem_tables() +{ + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (efp->flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION) { + read_filesystem_table(efp, fs_maps[i].map_fd); + } + } +} + +/** + * Socket read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_filesystem_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_filesystem_cleanup, ptr); + ebpf_module_t *em = (ebpf_module_t *)ptr; + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = NETDATA_FILESYSTEM_READ_SLEEP_MS * em->update_every; + int update_every = em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + (void) ebpf_update_partitions(em); + ebpf_obsolete_fs_charts(update_every); + + // No more partitions, it is not necessary to read tables + if (em->optional) + continue; + + read_filesystem_tables(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Send Hard disk data + * + * Send hard disk information to Netdata. + */ +static void ebpf_histogram_send_data() +{ + uint32_t i; + uint32_t test = NETDATA_FILESYSTEM_FLAG_HAS_PARTITION | NETDATA_FILESYSTEM_REMOVE_CHARTS; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if ((efp->flags & test) == NETDATA_FILESYSTEM_FLAG_HAS_PARTITION) { + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + efp->hread.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + efp->hwrite.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, + efp->hopen.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, + efp->hadditional.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + } + } +} + +/** + * Main loop for this collector. + * + * @param em main structure for this thread + */ +static void filesystem_collector(ebpf_module_t *em) +{ + filesystem_threads.thread = mallocz(sizeof(netdata_thread_t)); + filesystem_threads.start_routine = ebpf_filesystem_read_hash; + + netdata_thread_create(filesystem_threads.thread, filesystem_threads.name, + NETDATA_THREAD_OPTION_DEFAULT, ebpf_filesystem_read_hash, em); + + int update_every = em->update_every; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + ebpf_create_fs_charts(update_every); + ebpf_histogram_send_data(); + + pthread_mutex_unlock(&lock); + } +} + +/***************************************************************** + * + * ENTRY THREAD + * + *****************************************************************/ + +/** + * Update Filesystem + * + * Update file system structure using values read from configuration file. + */ +static void ebpf_update_filesystem() +{ + char dist[NETDATA_FS_MAX_DIST_NAME + 1]; + int i; + for (i = 0; localfs[i].filesystem; i++) { + snprintfz(dist, NETDATA_FS_MAX_DIST_NAME, "%sdist", localfs[i].filesystem); + + localfs[i].enabled = appconfig_get_boolean(&fs_config, NETDATA_FILESYSTEM_CONFIG_NAME, dist, + CONFIG_BOOLEAN_YES); + } +} + +/** + * Filesystem thread + * + * Thread used to generate socket charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_filesystem_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_filesystem_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = fs_maps; + ebpf_update_filesystem(); + + // Initialize optional as zero, to identify when there are not partitions to monitor + em->optional = 0; + + if (ebpf_update_partitions(em)) { + if (em->optional) + info("Netdata cannot monitor the filesystems used on this host."); + + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endfilesystem; + } + + int algorithms[NETDATA_EBPF_HIST_MAX_BINS]; + ebpf_fill_algorithms(algorithms, NETDATA_EBPF_HIST_MAX_BINS, NETDATA_EBPF_INCREMENTAL_IDX); + ebpf_global_labels(filesystem_aggregated_data, filesystem_publish_aggregated, dimensions, dimensions, + algorithms, NETDATA_EBPF_HIST_MAX_BINS); + + pthread_mutex_lock(&lock); + ebpf_create_fs_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + filesystem_collector(em); + +endfilesystem: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_filesystem.h b/collectors/ebpf.plugin/ebpf_filesystem.h new file mode 100644 index 0000000..0d558df --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_filesystem.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_FILESYSTEM_H +#define NETDATA_EBPF_FILESYSTEM_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_FILESYSTEM "filesystem" + +#include "ebpf.h" + +#define NETDATA_FS_MAX_DIST_NAME 64UL + +#define NETDATA_FILESYSTEM_CONFIG_NAME "filesystem" +#define NETDATA_FILESYSTEM_READ_SLEEP_MS 600000ULL + +// Process configuration name +#define NETDATA_FILESYSTEM_CONFIG_FILE "filesystem.conf" + +typedef struct netdata_fs_hist { + uint32_t hist_id; + uint32_t bin; +} netdata_fs_hist_t; + +enum filesystem_limit { + NETDATA_KEY_CALLS_READ = 24, + NETDATA_KEY_CALLS_WRITE = 48, + NETDATA_KEY_CALLS_OPEN = 72, + NETDATA_KEY_CALLS_SYNC = 96 +}; + +enum netdata_filesystem_flags { + NETDATA_FILESYSTEM_FLAG_NO_PARTITION = 0, + NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM = 1, + NETDATA_FILESYSTEM_FLAG_HAS_PARTITION = 2, + NETDATA_FILESYSTEM_FLAG_CHART_CREATED = 4, + NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE = 8, + NETDATA_FILESYSTEM_REMOVE_CHARTS = 16, + NETDATA_FILESYSTEM_ATTR_CHARTS = 32 +}; + +enum netdata_filesystem_table { + NETDATA_MAIN_FS_TABLE, + NETDATA_ADDR_FS_TABLE +}; + +void *ebpf_filesystem_thread(void *ptr); +extern struct config fs_config; + +#endif /* NETDATA_EBPF_FILESYSTEM_H */ diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c new file mode 100644 index 0000000..b07dd24 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -0,0 +1,507 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_hardirq.h" + +struct config hardirq_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define HARDIRQ_MAP_LATENCY 0 +#define HARDIRQ_MAP_LATENCY_STATIC 1 +static ebpf_local_maps_t hardirq_maps[] = { + { + .name = "tbl_hardirq", + .internal_input = NETDATA_HARDIRQ_MAX_IRQS, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + { + .name = "tbl_hardirq_static", + .internal_input = HARDIRQ_EBPF_STATIC_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +#define HARDIRQ_TP_CLASS_IRQ "irq" +#define HARDIRQ_TP_CLASS_IRQ_VECTORS "irq_vectors" +static ebpf_tracepoint_t hardirq_tracepoints[] = { + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ, .event = "irq_handler_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ, .event = "irq_handler_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "thermal_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "thermal_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "threshold_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "threshold_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "error_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "error_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "deferred_error_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "deferred_error_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "spurious_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "spurious_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_single_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_single_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "reschedule_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "reschedule_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "local_timer_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "local_timer_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "irq_work_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "irq_work_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "x86_platform_ipi_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "x86_platform_ipi_exit"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +static hardirq_static_val_t hardirq_static_vals[] = { + { + .idx = HARDIRQ_EBPF_STATIC_APIC_THERMAL, + .name = "apic_thermal", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_THRESHOLD, + .name = "apic_threshold", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_ERROR, + .name = "apic_error", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_DEFERRED_ERROR, + .name = "apic_deferred_error", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_SPURIOUS, + .name = "apic_spurious", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_FUNC_CALL, + .name = "func_call", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_FUNC_CALL_SINGLE, + .name = "func_call_single", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_RESCHEDULE, + .name = "reschedule", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_LOCAL_TIMER, + .name = "local_timer", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_IRQ_WORK, + .name = "irq_work", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_X86_PLATFORM_IPI, + .name = "x86_platform_ipi", + .latency = 0 + }, +}; + +// store for "published" data from the reader thread, which the collector +// thread will write to netdata agent. +static avl_tree_lock hardirq_pub; + +// tmp store for dynamic hard IRQ values we get from a per-CPU eBPF map. +static hardirq_ebpf_val_t *hardirq_ebpf_vals = NULL; + +// tmp store for static hard IRQ values we get from a per-CPU eBPF map. +static hardirq_ebpf_static_val_t *hardirq_ebpf_static_vals = NULL; + +static struct netdata_static_thread hardirq_threads = { + .name = "HARDIRQ KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +/** + * Hardirq Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_hardirq_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(hardirq_threads.thread); + for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&hardirq_tracepoints[i]); + } + freez(hardirq_ebpf_vals); + freez(hardirq_ebpf_static_vals); + + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; +} + +/** + * Hardirq Exit + * + * Cancel child and exit. + * + * @param ptr thread data. + */ +static void hardirq_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*hardirq_threads.thread); + ebpf_hardirq_free(em); +} + +/** + * Hardirq clean up + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void hardirq_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_hardirq_free(em); +} + +/***************************************************************** + * MAIN LOOP + *****************************************************************/ + +/** + * Compare hard IRQ values. + * + * @param a `hardirq_val_t *`. + * @param b `hardirq_val_t *`. + * + * @return 0 if a==b, 1 if a>b, -1 if airq > ptr2->irq) { + return 1; + } + else if (ptr1->irq < ptr2->irq) { + return -1; + } + else { + return 0; + } +} + +static void hardirq_read_latency_map(int mapfd) +{ + hardirq_ebpf_key_t key = {}; + hardirq_ebpf_key_t next_key = {}; + hardirq_val_t search_v = {}; + hardirq_val_t *v = NULL; + + while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) { + // get val for this key. + int test = bpf_map_lookup_elem(mapfd, &key, hardirq_ebpf_vals); + if (unlikely(test < 0)) { + key = next_key; + continue; + } + + // is this IRQ saved yet? + // + // if not, make a new one, mark it as unsaved for now, and continue; we + // will insert it at the end after all of its values are correctly set, + // so that we can safely publish it to the collector within a single, + // short locked operation. + // + // otherwise simply continue; we will only update the latency, which + // can be republished safely without a lock. + // + // NOTE: lock isn't strictly necessary for this initial search, as only + // this thread does writing, but the AVL is using a read-write lock so + // there is no congestion. + bool v_is_new = false; + search_v.irq = key.irq; + v = (hardirq_val_t *)avl_search_lock(&hardirq_pub, (avl_t *)&search_v); + if (unlikely(v == NULL)) { + // latency/name can only be added reliably at a later time. + // when they're added, only then will we AVL insert. + v = callocz(1, sizeof(hardirq_val_t)); + v->irq = key.irq; + v->dim_exists = false; + + v_is_new = true; + } + + // note two things: + // 1. we must add up latency value for this IRQ across all CPUs. + // 2. the name is unfortunately *not* available on all CPU maps - only + // a single map contains the name, so we must find it. we only need + // to copy it though if the IRQ is new for us. + bool name_saved = false; + uint64_t total_latency = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total_latency += hardirq_ebpf_vals[i].latency/1000; + + // copy name for new IRQs. + if (v_is_new && !name_saved && hardirq_ebpf_vals[i].name[0] != '\0') { + strncpyz( + v->name, + hardirq_ebpf_vals[i].name, + NETDATA_HARDIRQ_NAME_LEN + ); + name_saved = true; + } + } + + // can now safely publish latency for existing IRQs. + v->latency = total_latency; + + // can now safely publish new IRQ. + if (v_is_new) { + avl_t *check = avl_insert_lock(&hardirq_pub, (avl_t *)v); + if (check != (avl_t *)v) { + error("Internal error, cannot insert the AVL tree."); + } + } + + key = next_key; + } +} + +static void hardirq_read_latency_static_map(int mapfd) +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + uint32_t map_i = hardirq_static_vals[i].idx; + int test = bpf_map_lookup_elem(mapfd, &map_i, hardirq_ebpf_static_vals); + if (unlikely(test < 0)) { + continue; + } + + uint64_t total_latency = 0; + int cpu_i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (cpu_i = 0; cpu_i < end; cpu_i++) { + total_latency += hardirq_ebpf_static_vals[cpu_i].latency/1000; + } + + hardirq_static_vals[i].latency = total_latency; + } +} + +/** + * Read eBPF maps for hard IRQ. + */ +static void *hardirq_reader(void *ptr) +{ + netdata_thread_cleanup_push(hardirq_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_HARDIRQ_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + hardirq_read_latency_map(hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd); + hardirq_read_latency_static_map(hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +static void hardirq_create_charts(int update_every) +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + "hardirq_latency", + "Hardware IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "interrupts", + NULL, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_HARDIRQ_LATENCY, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_HARDIRQ + ); + + fflush(stdout); +} + +static void hardirq_create_static_dims() +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + ebpf_write_global_dimension( + hardirq_static_vals[i].name, hardirq_static_vals[i].name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + } +} + +// callback for avl tree traversal on `hardirq_pub`. +static int hardirq_write_dims(void *entry, void *data) +{ + UNUSED(data); + + hardirq_val_t *v = entry; + + // IRQs get dynamically added in, so add the dimension if we haven't yet. + if (!v->dim_exists) { + ebpf_write_global_dimension( + v->name, v->name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + v->dim_exists = true; + } + + write_chart_dimension(v->name, v->latency); + + return 1; +} + +static inline void hardirq_write_static_dims() +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + write_chart_dimension( + hardirq_static_vals[i].name, + hardirq_static_vals[i].latency + ); + } +} + +/** +* Main loop for this collector. +*/ +static void hardirq_collector(ebpf_module_t *em) +{ + hardirq_ebpf_vals = callocz( + (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs, + sizeof(hardirq_ebpf_val_t) + ); + hardirq_ebpf_static_vals = callocz( + (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs, + sizeof(hardirq_ebpf_static_val_t) + ); + + avl_init_lock(&hardirq_pub, hardirq_val_cmp); + + // create reader thread. + hardirq_threads.thread = mallocz(sizeof(netdata_thread_t)); + hardirq_threads.start_routine = hardirq_reader; + netdata_thread_create( + hardirq_threads.thread, + hardirq_threads.name, + NETDATA_THREAD_OPTION_DEFAULT, + hardirq_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + hardirq_create_charts(em->update_every); + hardirq_create_static_dims(); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency"); + avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL); + hardirq_write_static_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } +} + +/***************************************************************** + * EBPF HARDIRQ THREAD + *****************************************************************/ + +/** + * Hard IRQ latency thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_hardirq_thread(void *ptr) +{ + netdata_thread_cleanup_push(hardirq_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = hardirq_maps; + + if (ebpf_enable_tracepoints(hardirq_tracepoints) == 0) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endhardirq; + } + + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endhardirq; + } + + hardirq_collector(em); + +endhardirq: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_hardirq.h b/collectors/ebpf.plugin/ebpf_hardirq.h new file mode 100644 index 0000000..381da57 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_hardirq.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_HARDIRQ_H +#define NETDATA_EBPF_HARDIRQ_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_HARDIRQ_NAME_LEN 32 +#define NETDATA_HARDIRQ_MAX_IRQS 1024L + +typedef struct hardirq_ebpf_key { + int irq; +} hardirq_ebpf_key_t; + +typedef struct hardirq_ebpf_val { + uint64_t latency; + uint64_t ts; + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_ebpf_val_t; + +enum hardirq_ebpf_static { + HARDIRQ_EBPF_STATIC_APIC_THERMAL, + HARDIRQ_EBPF_STATIC_APIC_THRESHOLD, + HARDIRQ_EBPF_STATIC_APIC_ERROR, + HARDIRQ_EBPF_STATIC_APIC_DEFERRED_ERROR, + HARDIRQ_EBPF_STATIC_APIC_SPURIOUS, + HARDIRQ_EBPF_STATIC_FUNC_CALL, + HARDIRQ_EBPF_STATIC_FUNC_CALL_SINGLE, + HARDIRQ_EBPF_STATIC_RESCHEDULE, + HARDIRQ_EBPF_STATIC_LOCAL_TIMER, + HARDIRQ_EBPF_STATIC_IRQ_WORK, + HARDIRQ_EBPF_STATIC_X86_PLATFORM_IPI, + + HARDIRQ_EBPF_STATIC_END +}; + +typedef struct hardirq_ebpf_static_val { + uint64_t latency; + uint64_t ts; +} hardirq_ebpf_static_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_HARDIRQ "hardirq" +#define NETDATA_HARDIRQ_SLEEP_MS 650000ULL +#define NETDATA_HARDIRQ_CONFIG_FILE "hardirq.conf" + +typedef struct hardirq_val { + // must be at top for simplified AVL tree usage. + // if it's not at the top, we need to use `containerof` for almost all ops. + avl_t avl; + + int irq; + bool dim_exists; // keep this after `int irq` for alignment byte savings. + uint64_t latency; + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_val_t; + +typedef struct hardirq_static_val { + enum hardirq_ebpf_static idx; + char *name; + uint64_t latency; +} hardirq_static_val_t; + +extern struct config hardirq_config; +void *ebpf_hardirq_thread(void *ptr); + +#endif /* NETDATA_EBPF_HARDIRQ_H */ diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c new file mode 100644 index 0000000..dc805da --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_mdflush.h" + +struct config mdflush_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define MDFLUSH_MAP_COUNT 0 +static ebpf_local_maps_t mdflush_maps[] = { + { + .name = "tbl_mdflush", + .internal_input = 1024, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +// store for "published" data from the reader thread, which the collector +// thread will write to netdata agent. +static avl_tree_lock mdflush_pub; + +// tmp store for mdflush values we get from a per-CPU eBPF map. +static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL; + +static struct netdata_static_thread mdflush_threads = { + .name = "MDFLUSH KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +/** + * MDflush Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_mdflush_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(mdflush_ebpf_vals); + freez(mdflush_threads.thread); + + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; +} + +/** + * MDflush exit + * + * Cancel thread and exit. + * + * @param ptr thread data. + */ +static void mdflush_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_mdflush_free(em); +} + +/** + * CLeanup + * + * Clean allocated memory. + * + * @param ptr thread data. + */ +static void mdflush_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*mdflush_threads.thread); + ebpf_mdflush_free(em); +} + +/** + * Compare mdflush values. + * + * @param a `netdata_mdflush_t *`. + * @param b `netdata_mdflush_t *`. + * + * @return 0 if a==b, 1 if a>b, -1 if aunit > ptr2->unit) { + return 1; + } + else if (ptr1->unit < ptr2->unit) { + return -1; + } + else { + return 0; + } +} + +static void mdflush_read_count_map() +{ + int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd; + mdflush_ebpf_key_t curr_key = (uint32_t)-1; + mdflush_ebpf_key_t key = (uint32_t)-1; + netdata_mdflush_t search_v; + netdata_mdflush_t *v = NULL; + + while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) { + curr_key = key; + + // get val for this key. + int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals); + if (unlikely(test < 0)) { + continue; + } + + // is this record saved yet? + // + // if not, make a new one, mark it as unsaved for now, and continue; we + // will insert it at the end after all of its values are correctly set, + // so that we can safely publish it to the collector within a single, + // short locked operation. + // + // otherwise simply continue; we will only update the flush count, + // which can be republished safely without a lock. + // + // NOTE: lock isn't strictly necessary for this initial search, as only + // this thread does writing, but the AVL is using a read-write lock so + // there is no congestion. + bool v_is_new = false; + search_v.unit = key; + v = (netdata_mdflush_t *)avl_search_lock( + &mdflush_pub, + (avl_t *)&search_v + ); + if (unlikely(v == NULL)) { + // flush count can only be added reliably at a later time. + // when they're added, only then will we AVL insert. + v = callocz(1, sizeof(netdata_mdflush_t)); + v->unit = key; + sprintf(v->disk_name, "md%u", key); + v->dim_exists = false; + + v_is_new = true; + } + + // we must add up count value for this record across all CPUs. + uint64_t total_cnt = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total_cnt += mdflush_ebpf_vals[i]; + } + + // can now safely publish count for existing records. + v->cnt = total_cnt; + + // can now safely publish new record. + if (v_is_new) { + avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v); + if (check != (avl_t *)v) { + error("Internal error, cannot insert the AVL tree."); + } + } + } +} + +/** + * Read eBPF maps for mdflush. + */ +static void *mdflush_reader(void *ptr) +{ + netdata_thread_cleanup_push(mdflush_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_MDFLUSH_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + mdflush_read_count_map(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +static void mdflush_create_charts(int update_every) +{ + ebpf_create_chart( + "mdstat", + "mdstat_flush", + "MD flushes", + "flushes", + "flush (eBPF)", + "md.flush", + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_MDSTAT_FLUSH, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_MDFLUSH + ); + + fflush(stdout); +} + +// callback for avl tree traversal on `mdflush_pub`. +static int mdflush_write_dims(void *entry, void *data) +{ + UNUSED(data); + + netdata_mdflush_t *v = entry; + + // records get dynamically added in, so add the dim if we haven't yet. + if (!v->dim_exists) { + ebpf_write_global_dimension( + v->disk_name, v->disk_name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + v->dim_exists = true; + } + + write_chart_dimension(v->disk_name, v->cnt); + + return 1; +} + +/** +* Main loop for this collector. +*/ +static void mdflush_collector(ebpf_module_t *em) +{ + mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t)); + + avl_init_lock(&mdflush_pub, mdflush_val_cmp); + + // create reader thread. + mdflush_threads.thread = mallocz(sizeof(netdata_thread_t)); + mdflush_threads.start_routine = mdflush_reader; + netdata_thread_create( + mdflush_threads.thread, + mdflush_threads.name, + NETDATA_THREAD_OPTION_DEFAULT, + mdflush_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + mdflush_create_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + // write dims now for all hitherto discovered devices. + write_begin_chart("mdstat", "mdstat_flush"); + avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } +} + +/** + * mdflush thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_mdflush_thread(void *ptr) +{ + netdata_thread_cleanup_push(mdflush_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = mdflush_maps; + + char *md_flush_request = ebpf_find_symbol("md_flush_request"); + if (!md_flush_request) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + error("Cannot monitor MD devices, because md is not loaded."); + } + freez(md_flush_request); + + if (em->thread->enabled == NETDATA_THREAD_EBPF_STOPPED) { + goto endmdflush; + } + + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endmdflush; + } + + mdflush_collector(em); + +endmdflush: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_mdflush.h b/collectors/ebpf.plugin/ebpf_mdflush.h new file mode 100644 index 0000000..b04eefd --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mdflush.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_MDFLUSH_H +#define NETDATA_EBPF_MDFLUSH_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_MDFLUSH "mdflush" + +#define NETDATA_MDFLUSH_SLEEP_MS 850000ULL + +// charts +#define NETDATA_MDFLUSH_GLOBAL_CHART "mdflush" + +// configuration file +#define NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE "mdflush.conf" + +// copy of mdflush types from kernel-collectors repo. +typedef uint32_t mdflush_ebpf_key_t; +typedef uint64_t mdflush_ebpf_val_t; + +typedef struct netdata_mdflush { + // must be at top for simplified AVL tree usage. + // if it's not at the top, we need to use `containerof` for almost all ops. + avl_t avl; + + // key & name of device. + // the name is generated by the key, usually as `md`. + uint32_t unit; + char disk_name[32]; + + // have we defined the dimension for this device yet? + bool dim_exists; + + // incremental flush count value. + uint64_t cnt; +} netdata_mdflush_t; + +void *ebpf_mdflush_thread(void *ptr); + +extern struct config mdflush_config; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c new file mode 100644 index 0000000..ec1f07a --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_mount.h" + +static ebpf_local_maps_t mount_maps[] = {{.name = "tbl_mount", .internal_input = NETDATA_MOUNT_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +static char *mount_dimension_name[NETDATA_EBPF_MOUNT_SYSCALL] = { "mount", "umount" }; +static netdata_syscall_stat_t mount_aggregated_data[NETDATA_EBPF_MOUNT_SYSCALL]; +static netdata_publish_syscall_t mount_publish_aggregated[NETDATA_EBPF_MOUNT_SYSCALL]; + +struct config mount_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, + .index = {.avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static netdata_idx_t *mount_values = NULL; + +static netdata_idx_t mount_hash_values[NETDATA_MOUNT_END]; + +struct netdata_static_thread mount_thread = { + .name = "MOUNT KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +netdata_ebpf_targets_t mount_targets[] = { {.name = "mount", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "umount", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/mount.skel.h" // BTF code + +static struct mount_bpf *bpf_obj = NULL; + +/***************************************************************** + * + * BTF FUNCTIONS + * + *****************************************************************/ + +/* + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_mount_disable_probe(struct mount_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_mount_probe, false); + bpf_program__set_autoload(obj->progs.netdata_umount_probe, false); + + bpf_program__set_autoload(obj->progs.netdata_mount_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_umount_retprobe, false); +} + +/* + * Disable tracepoint + * + * Disable all tracepoints to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_mount_disable_tracepoint(struct mount_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_mount_exit, false); + bpf_program__set_autoload(obj->progs.netdata_umount_exit, false); +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_mount_disable_trampoline(struct mount_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_mount_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_umount_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_mount_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_umount_fexit, false); +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static inline void netdata_set_trampoline_target(struct mount_bpf *obj) +{ + char syscall[NETDATA_EBPF_MAX_SYSCALL_LENGTH + 1]; + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + mount_targets[NETDATA_MOUNT_SYSCALL].name, running_on_kernel); + + bpf_program__set_attach_target(obj->progs.netdata_mount_fentry, 0, + syscall); + + bpf_program__set_attach_target(obj->progs.netdata_mount_fexit, 0, + syscall); + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + mount_targets[NETDATA_UMOUNT_SYSCALL].name, running_on_kernel); + + bpf_program__set_attach_target(obj->progs.netdata_umount_fentry, 0, + syscall); + + bpf_program__set_attach_target(obj->progs.netdata_umount_fexit, 0, + syscall); +} + +/** + * Mount Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_mount_attach_probe(struct mount_bpf *obj) +{ + char syscall[NETDATA_EBPF_MAX_SYSCALL_LENGTH + 1]; + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + mount_targets[NETDATA_MOUNT_SYSCALL].name, running_on_kernel); + + obj->links.netdata_mount_probe = bpf_program__attach_kprobe(obj->progs.netdata_mount_probe, + false, syscall); + int ret = (int)libbpf_get_error(obj->links.netdata_mount_probe); + if (ret) + return -1; + + obj->links.netdata_mount_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_mount_retprobe, + true, syscall); + ret = (int)libbpf_get_error(obj->links.netdata_mount_retprobe); + if (ret) + return -1; + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + mount_targets[NETDATA_UMOUNT_SYSCALL].name, running_on_kernel); + + obj->links.netdata_umount_probe = bpf_program__attach_kprobe(obj->progs.netdata_umount_probe, + false, syscall); + ret = (int)libbpf_get_error(obj->links.netdata_umount_probe); + if (ret) + return -1; + + obj->links.netdata_umount_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_umount_retprobe, + true, syscall); + ret = (int)libbpf_get_error(obj->links.netdata_umount_retprobe); + if (ret) + return -1; + + return 0; +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_mount_set_hash_tables(struct mount_bpf *obj) +{ + mount_maps[NETDATA_KEY_MOUNT_TABLE].map_fd = bpf_map__fd(obj->maps.tbl_mount); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_mount_load_and_attach(struct mount_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *mt = em->targets; + netdata_ebpf_program_loaded_t test = mt[NETDATA_MOUNT_SYSCALL].mode; + + // We are testing only one, because all will have the same behavior + if (test == EBPF_LOAD_TRAMPOLINE ) { + ebpf_mount_disable_probe(obj); + ebpf_mount_disable_tracepoint(obj); + + netdata_set_trampoline_target(obj); + } else if (test == EBPF_LOAD_PROBE || + test == EBPF_LOAD_RETPROBE ) { + ebpf_mount_disable_tracepoint(obj); + ebpf_mount_disable_trampoline(obj); + } else { + ebpf_mount_disable_probe(obj); + ebpf_mount_disable_trampoline(obj); + } + + int ret = mount_bpf__load(obj); + if (!ret) { + if (test != EBPF_LOAD_PROBE && test != EBPF_LOAD_RETPROBE ) + ret = mount_bpf__attach(obj); + else + ret = ebpf_mount_attach_probe(obj); + + if (!ret) + ebpf_mount_set_hash_tables(obj); + } + + return ret; +} +#endif +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Mount Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_mount_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(mount_thread.thread); + freez(mount_values); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + mount_bpf__destroy(bpf_obj); +#endif + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Mount Exit + * + * Cancel child thread. + * + * @param ptr thread data. + */ +static void ebpf_mount_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*mount_thread.thread); + ebpf_mount_free(em); +} + +/** + * Mount cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void ebpf_mount_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_mount_free(em); +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Read global table + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = mount_hash_values; + netdata_idx_t *stored = mount_values; + int fd = mount_maps[NETDATA_KEY_MOUNT_TABLE].map_fd; + + for (idx = NETDATA_KEY_MOUNT_CALL; idx < NETDATA_MOUNT_END; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; + } + } +} + +/** + * Mount read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_mount_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_mount_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_MOUNT_SLEEP_MS * em->update_every; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Send data to Netdata calling auxiliary functions. +*/ +static void ebpf_mount_send_data() +{ + int i, j; + int end = NETDATA_EBPF_MOUNT_SYSCALL; + for (i = NETDATA_KEY_MOUNT_CALL, j = NETDATA_KEY_MOUNT_ERROR; i < end; i++, j++) { + mount_publish_aggregated[i].ncall = mount_hash_values[i]; + mount_publish_aggregated[i].nerr = mount_hash_values[j]; + } + + write_count_chart(NETDATA_EBPF_MOUNT_CALLS, NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL); + + write_err_chart(NETDATA_EBPF_MOUNT_ERRORS, NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL); +} + +/** +* Main loop for this collector. +*/ +static void mount_collector(ebpf_module_t *em) +{ + mount_thread.thread = mallocz(sizeof(netdata_thread_t)); + mount_thread.start_routine = ebpf_mount_read_hash; + memset(mount_hash_values, 0, sizeof(mount_hash_values)); + + mount_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + netdata_thread_create(mount_thread.thread, mount_thread.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_mount_read_hash, em); + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + ebpf_mount_send_data(); + + pthread_mutex_unlock(&lock); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create mount charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_mount_charts(int update_every) +{ + ebpf_create_chart(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, NETDATA_EBPF_MOUNT_CALLS, + "Calls to mount and umount syscalls", + EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_MOUNT_FAMILY, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS, + ebpf_create_global_dimension, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL, + update_every, NETDATA_EBPF_MODULE_NAME_MOUNT); + + ebpf_create_chart(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, NETDATA_EBPF_MOUNT_ERRORS, + "Errors to mount and umount file systems", + EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_MOUNT_FAMILY, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS + 1, + ebpf_create_global_dimension, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL, + update_every, NETDATA_EBPF_MODULE_NAME_MOUNT); + + fflush(stdout); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_mount_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->enabled = CONFIG_BOOLEAN_NO; + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = mount_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_mount_load_and_attach(bpf_obj, em); + } +#endif + + if (ret) + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + +/** + * Mount thread + * + * Thread used to make mount thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always returns NULL + */ +void *ebpf_mount_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_mount_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = mount_maps; + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_mount_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endmount; + } + + int algorithms[NETDATA_EBPF_MOUNT_SYSCALL] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX }; + + ebpf_global_labels(mount_aggregated_data, mount_publish_aggregated, mount_dimension_name, mount_dimension_name, + algorithms, NETDATA_EBPF_MOUNT_SYSCALL); + + pthread_mutex_lock(&lock); + ebpf_create_mount_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + mount_collector(em); + +endmount: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_mount.h b/collectors/ebpf.plugin/ebpf_mount.h new file mode 100644 index 0000000..5a8d11a --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mount.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_MOUNT_H +#define NETDATA_EBPF_MOUNT_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_MOUNT "mount" + +#define NETDATA_EBPF_MOUNT_SYSCALL 2 + +#define NETDATA_LATENCY_MOUNT_SLEEP_MS 700000ULL + +#define NETDATA_EBPF_MOUNT_CALLS "call" +#define NETDATA_EBPF_MOUNT_ERRORS "error" +#define NETDATA_EBPF_MOUNT_FAMILY "mount (eBPF)" + +// Process configuration name +#define NETDATA_MOUNT_CONFIG_FILE "mount.conf" + +enum mount_counters { + NETDATA_KEY_MOUNT_CALL, + NETDATA_KEY_UMOUNT_CALL, + NETDATA_KEY_MOUNT_ERROR, + NETDATA_KEY_UMOUNT_ERROR, + + NETDATA_MOUNT_END +}; + +enum mount_tables { + NETDATA_KEY_MOUNT_TABLE +}; + +enum netdata_mount_syscalls { + NETDATA_MOUNT_SYSCALL, + NETDATA_UMOUNT_SYSCALL, + + NETDATA_MOUNT_SYSCALLS_END +}; + +extern struct config mount_config; +void *ebpf_mount_thread(void *ptr); +extern netdata_ebpf_targets_t mount_targets[]; + +#endif /* NETDATA_EBPF_MOUNT_H */ diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c new file mode 100644 index 0000000..d93e415 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_oomkill.h" + +struct config oomkill_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define OOMKILL_MAP_KILLCNT 0 +static ebpf_local_maps_t oomkill_maps[] = { + { + .name = "tbl_oomkill", + .internal_input = NETDATA_OOMKILL_MAX_ENTRIES, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +static ebpf_tracepoint_t oomkill_tracepoints[] = { + {.enabled = false, .class = "oom", .event = "mark_victim"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill", .dimension = "oomkill", + .algorithm = "absolute", + .next = NULL}; + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void oomkill_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; +} + +static void oomkill_write_data(int32_t *keys, uint32_t total) +{ + // for each app, see if it was OOM killed. record as 1 if so otherwise 0. + struct target *w; + for (w = apps_groups_root_target; w != NULL; w = w->next) { + if (likely(w->exposed && w->processes)) { + bool was_oomkilled = false; + struct pid_on_target *pids = w->root_pid; + while (pids) { + uint32_t j; + for (j = 0; j < total; j++) { + if (pids->pid == keys[j]) { + was_oomkilled = true; + // set to 0 so we consider it "done". + keys[j] = 0; + goto write_dim; + } + } + pids = pids->next; + } + + write_dim:; + write_chart_dimension(w->name, was_oomkilled); + } + } + + // for any remaining keys for which we couldn't find a group, this could be + // for various reasons, but the primary one is that the PID has not yet + // been picked up by the process thread when parsing the proc filesystem. + // since it's been OOM killed, it will never be parsed in the future, so + // we have no choice but to dump it into `other`. + uint32_t j; + uint32_t rem_count = 0; + for (j = 0; j < total; j++) { + int32_t key = keys[j]; + if (key != 0) { + rem_count += 1; + } + } + if (rem_count > 0) { + write_chart_dimension("other", rem_count); + } +} + +/** + * Create specific OOMkill charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_oomkill_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CGROUP_OOMKILLS_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, + ebpf_create_global_dimension, + &oomkill_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL); +} + +/** + * Create Systemd OOMkill Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_oomkill_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_OOMKILL, update_every); +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_systemd_oomkill_charts() +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->oomkill); + ect->oomkill = 0; + } + } + write_end_chart(); +} + +/* + * Send Specific OOMkill data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param value value for oomkill + */ +static void ebpf_send_specific_oomkill_data(char *type, int value) +{ + write_begin_chart(type, NETDATA_OOMKILL_CHART); + write_chart_dimension(oomkill_publish_aggregated.name, (long long)value); + write_end_chart(); +} + +/** + * Create specific OOMkill charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_OOMKILLS_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, update_every); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_oomkill_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_oomkill_charts(update_every); + } + ebpf_send_systemd_oomkill_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART) && ect->updated) { + ebpf_create_specific_oomkill_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART && ect->updated) { + ebpf_send_specific_oomkill_data(ect->name, ect->oomkill); + } else { + ebpf_obsolete_specific_oomkill_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART; + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read data + * + * Read OOMKILL events from table. + * + * @param keys vector where data will be stored + * + * @return It returns the number of read elements + */ +static uint32_t oomkill_read_data(int32_t *keys) +{ + // the first `i` entries of `keys` will contain the currently active PIDs + // in the eBPF map. + uint32_t i = 0; + + uint32_t curr_key = 0; + uint32_t key = 0; + int mapfd = oomkill_maps[OOMKILL_MAP_KILLCNT].map_fd; + while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) { + curr_key = key; + + keys[i] = (int32_t)key; + i += 1; + + // delete this key now that we've recorded its existence. there's no + // race here, as the same PID will only get OOM killed once. + int test = bpf_map_delete_elem(mapfd, &key); + if (unlikely(test < 0)) { + // since there's only 1 thread doing these deletions, it should be + // impossible to get this condition. + error("key unexpectedly not available for deletion."); + } + } + + return i; +} + +/** + * Update cgroup + * + * Update cgroup data based in + * + * @param keys vector with pids that had oomkill event + * @param total number of elements in keys vector. + */ +static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total) +{ + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + ect->oomkill = 0; + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + uint32_t j; + int32_t pid = pids->pid; + for (j = 0; j < total; j++) { + if (pid == keys[j]) { + ect->oomkill = 1; + break; + } + } + } + } +} + +/** +* Main loop for this collector. +*/ +static void oomkill_collector(ebpf_module_t *em) +{ + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int32_t keys[NETDATA_OOMKILL_MAX_ENTRIES]; + memset(keys, 0, sizeof(keys)); + + // loop and read until ebpf plugin is closed. + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&collect_data_mutex); + pthread_mutex_lock(&lock); + + uint32_t count = oomkill_read_data(keys); + if (cgroups && count) + ebpf_update_oomkill_cgroup(keys, count); + + // write everything from the ebpf map. + if (cgroups) + ebpf_oomkill_send_cgroup_data(update_every); + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART); + oomkill_write_data(keys, count); + write_end_chart(); + } + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_OOMKILL_CHART, + "OOM kills", + EBPF_COMMON_DIMENSION_KILLS, + "mem", + NETDATA_EBPF_CHART_TYPE_STACKED, + 20020, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/** + * OOM kill tracking thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_oomkill_thread(void *ptr) +{ + netdata_thread_cleanup_push(oomkill_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = oomkill_maps; + +#define NETDATA_DEFAULT_OOM_DISABLED_MSG "Disabling OOMKILL thread, because" + if (unlikely(!all_pids || !em->apps_charts)) { + // When we are not running integration with apps, we won't fill necessary variables for this thread to run, so + // we need to disable it. + if (em->thread->enabled) + info("%s apps integration is completely disabled.", NETDATA_DEFAULT_OOM_DISABLED_MSG); + + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + } else if (running_on_kernel < NETDATA_EBPF_KERNEL_4_14) { + if (em->thread->enabled) + info("%s kernel does not have necessary tracepoints.", NETDATA_DEFAULT_OOM_DISABLED_MSG); + + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + } + + if (em->thread->enabled == NETDATA_THREAD_EBPF_STOPPED) { + goto endoomkill; + } + + if (ebpf_enable_tracepoints(oomkill_tracepoints) == 0) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endoomkill; + } + + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endoomkill; + } + + pthread_mutex_lock(&lock); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + oomkill_collector(em); + +endoomkill: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_oomkill.h b/collectors/ebpf.plugin/ebpf_oomkill.h new file mode 100644 index 0000000..7860863 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_oomkill.h @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_OOMKILL_H +#define NETDATA_EBPF_OOMKILL_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_OOMKILL_MAX_ENTRIES 64 + +typedef uint8_t oomkill_ebpf_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_OOMKILL "oomkill" +#define NETDATA_OOMKILL_SLEEP_MS 650000ULL +#define NETDATA_OOMKILL_CONFIG_FILE "oomkill.conf" + +#define NETDATA_OOMKILL_CHART "oomkills" + +// Contexts +#define NETDATA_CGROUP_OOMKILLS_CONTEXT "cgroup.oomkills" + +extern struct config oomkill_config; +void *ebpf_oomkill_thread(void *ptr); +void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr); + +#endif /* NETDATA_EBPF_OOMKILL_H */ diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c new file mode 100644 index 0000000..682577d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -0,0 +1,1327 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "ebpf.h" +#include "ebpf_process.h" + +/***************************************************************** + * + * GLOBAL VARIABLES + * + *****************************************************************/ + +static char *process_dimension_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "process", "task", "process", "thread" }; +static char *process_id_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "do_exit", "release_task", "_do_fork", "sys_clone" }; +static char *status[] = { "process", "zombie" }; + +static ebpf_local_maps_t process_maps[] = {{.name = "tbl_pid_stats", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_total_stats", .internal_input = NETDATA_KEY_END_VECTOR, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "process_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +char *tracepoint_sched_type = { "sched" } ; +char *tracepoint_sched_process_exit = { "sched_process_exit" }; +char *tracepoint_sched_process_exec = { "sched_process_exec" }; +char *tracepoint_sched_process_fork = { "sched_process_fork" }; +static int was_sched_process_exit_enabled = 0; +static int was_sched_process_exec_enabled = 0; +static int was_sched_process_fork_enabled = 0; + +static netdata_idx_t *process_hash_values = NULL; +static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END]; +static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END]; + +ebpf_process_stat_t **global_process_stats = NULL; +ebpf_process_publish_apps_t **current_apps_data = NULL; + +int process_enabled = 0; +bool publish_internal_metrics = true; + +struct config process_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static char *threads_stat[NETDATA_EBPF_THREAD_STAT_END] = {"total", "running"}; +static char *load_event_stat[NETDATA_EBPF_LOAD_STAT_END] = {"legacy", "co-re"}; + +static struct netdata_static_thread cgroup_thread = { + .name = "EBPF CGROUP", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +/***************************************************************** + * + * PROCESS DATA AND SEND TO NETDATA + * + *****************************************************************/ + +/** + * Update publish structure before to send data to Netdata. + * + * @param publish the first output structure with independent dimensions + * @param pvc the second output structure with correlated dimensions + * @param input the structure with the input data. + */ +static void ebpf_update_global_publish(netdata_publish_syscall_t *publish, netdata_publish_vfs_common_t *pvc, + netdata_syscall_stat_t *input) +{ + netdata_publish_syscall_t *move = publish; + int selector = NETDATA_KEY_PUBLISH_PROCESS_EXIT; + while (move) { + move->ncall = (input->call > move->pcall) ? input->call - move->pcall : move->pcall - input->call; + move->nbyte = (input->bytes > move->pbyte) ? input->bytes - move->pbyte : move->pbyte - input->bytes; + move->nerr = (input->ecall > move->nerr) ? input->ecall - move->perr : move->perr - input->ecall; + + move->pcall = input->call; + move->pbyte = input->bytes; + move->perr = input->ecall; + + input = input->next; + move = move->next; + selector++; + } + + pvc->running = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_FORK].ncall - + (long)publish[NETDATA_KEY_PUBLISH_PROCESS_CLONE].ncall; + publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall = -publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall; + pvc->zombie = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_EXIT].ncall + + (long)publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall; +} + +/** + * Call the necessary functions to create a chart. + * + * @param family the chart family + * @param move the pointer with the values that will be published + */ +static void write_status_chart(char *family, netdata_publish_vfs_common_t *pvc) +{ + write_begin_chart(family, NETDATA_PROCESS_STATUS_NAME); + + write_chart_dimension(status[0], (long long)pvc->running); + write_chart_dimension(status[1], (long long)pvc->zombie); + + write_end_chart(); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + */ +static void ebpf_process_send_data(ebpf_module_t *em) +{ + netdata_publish_vfs_common_t pvc; + ebpf_update_global_publish(process_publish_aggregated, &pvc, process_aggregated_data); + + write_count_chart(NETDATA_EXIT_SYSCALL, NETDATA_EBPF_SYSTEM_GROUP, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], 2); + write_count_chart(NETDATA_PROCESS_SYSCALL, NETDATA_EBPF_SYSTEM_GROUP, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2); + + write_status_chart(NETDATA_EBPF_SYSTEM_GROUP, &pvc); + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_PROCESS_ERROR_NAME, NETDATA_EBPF_SYSTEM_GROUP, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2); + } +} + +/** + * Sum values for pid + * + * @param root the structure with all available PIDs + * + * @param offset the address that we are reading + * + * @return it returns the sum of all PIDs + */ +long long ebpf_process_sum_values_for_pids(struct pid_on_target *root, size_t offset) +{ + long long ret = 0; + while (root) { + int32_t pid = root->pid; + ebpf_process_publish_apps_t *w = current_apps_data[pid]; + if (w) { + ret += get_value_from_structure((char *)w, offset); + } + + root = root->next; + } + + return ret; +} + +/** + * Remove process pid + * + * Remove from PID task table when task_release was called. + */ +void ebpf_process_remove_pids() +{ + struct pid_stat *pids = root_of_pids; + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; + while (pids) { + uint32_t pid = pids->pid; + ebpf_process_stat_t *w = global_process_stats[pid]; + if (w) { + freez(w); + global_process_stats[pid] = NULL; + bpf_map_delete_elem(pid_fd, &pid); + } + + pids = pids->next; + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. + */ +void ebpf_process_send_apps_data(struct target *root, ebpf_module_t *em) +{ + struct target *w; + collected_number value; + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, create_process)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, create_thread)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, + call_do_exit)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, + call_release_task)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, + task_err)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + } + + ebpf_process_remove_pids(); +} + +/***************************************************************** + * + * READ INFORMATION FROM KERNEL RING + * + *****************************************************************/ + +/** + * Read the hash table and store data to allocated vectors. + */ +static void read_hash_global_tables() +{ + uint64_t idx; + netdata_idx_t res[NETDATA_KEY_END_VECTOR]; + + netdata_idx_t *val = process_hash_values; + int fd = process_maps[NETDATA_PROCESS_GLOBAL_TABLE].map_fd; + for (idx = 0; idx < NETDATA_KEY_END_VECTOR; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, val)) { + uint64_t total = 0; + int i; + int end = ebpf_nprocs; + for (i = 0; i < end; i++) + total += val[i]; + + res[idx] = total; + } else { + res[idx] = 0; + } + } + + process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_EXIT].call = res[NETDATA_KEY_CALLS_DO_EXIT]; + process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].call = res[NETDATA_KEY_CALLS_RELEASE_TASK]; + process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_FORK].call = res[NETDATA_KEY_CALLS_DO_FORK]; + process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_CLONE].call = res[NETDATA_KEY_CALLS_SYS_CLONE]; + + process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_FORK].ecall = res[NETDATA_KEY_ERROR_DO_FORK]; + process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_CLONE].ecall = res[NETDATA_KEY_ERROR_SYS_CLONE]; +} + +/** + * Read the hash table and store data to allocated vectors. + */ +static void ebpf_process_update_apps_data() +{ + struct pid_stat *pids = root_of_pids; + while (pids) { + uint32_t current_pid = pids->pid; + ebpf_process_stat_t *ps = global_process_stats[current_pid]; + if (!ps) { + pids = pids->next; + continue; + } + + ebpf_process_publish_apps_t *cad = current_apps_data[current_pid]; + if (!cad) { + cad = callocz(1, sizeof(ebpf_process_publish_apps_t)); + current_apps_data[current_pid] = cad; + } + + //Read data + cad->call_do_exit = ps->exit_call; + cad->call_release_task = ps->release_call; + cad->create_process = ps->create_process; + cad->create_thread = ps->create_thread; + + cad->task_err = ps->task_err; + + pids = pids->next; + } +} + +/** + * Cgroup Exit + * + * Function used with netdata_thread_clean_push + * + * @param ptr unused argument + */ +static void ebpf_cgroup_exit(void *ptr) +{ + UNUSED(ptr); +} + +/** + * Cgroup update shm + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data from shared memory. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_cgroup_update_shm(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_cgroup_exit, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + usec_t step = 3 * USEC_PER_SEC; + int counter = NETDATA_EBPF_CGROUP_UPDATE - 1; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + // We are using a small heartbeat time to wake up thread, + // but we should not update so frequently the shared memory data + if (++counter >= NETDATA_EBPF_CGROUP_UPDATE) { + counter = 0; + if (!shm_ebpf_cgroup.header) + ebpf_map_cgroup_shared_memory(); + + ebpf_parse_cgroup_shm_data(); + } + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_process_cgroup() +{ + ebpf_cgroup_target_t *ect ; + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + ebpf_process_stat_t *out = &pids->ps; + if (global_process_stats[pid]) { + ebpf_process_stat_t *in = global_process_stats[pid]; + + memcpy(out, in, sizeof(ebpf_process_stat_t)); + } else { + if (bpf_map_lookup_elem(pid_fd, &pid, out)) { + memset(out, 0, sizeof(ebpf_process_stat_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/***************************************************************** + * + * FUNCTIONS TO CREATE CHARTS + * + *****************************************************************/ + +/** + * Create process status chart + * + * @param family the chart family + * @param name the chart name + * @param axis the axis label + * @param web the group name used to attach the chart on dashboard + * @param order the order number of the specified chart + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_process_status_chart(char *family, char *name, char *axis, + char *web, char *algorithm, int order, int update_every) +{ + printf("CHART %s.%s '' 'Process not closed' '%s' '%s' '' line %d %d '' 'ebpf.plugin' 'process'\n", + family, + name, + axis, + web, + order, + update_every); + + printf("DIMENSION %s '' %s 1 1\n", status[0], algorithm); + printf("DIMENSION %s '' %s 1 1\n", status[1], algorithm); +} + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_global_charts(ebpf_module_t *em) +{ + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_SYSCALL, + "Start process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21002, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_EXIT_SYSCALL, + "Exit process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21003, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_process_status_chart(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_STATUS_NAME, + EBPF_COMMON_DIMENSION_DIFFERENCE, + NETDATA_PROCESS_GROUP, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + 21004, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_ERROR_NAME, + "Fails to create process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21005, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } +} + +/** + * Create chart for Statistic Thread + * + * Write to standard output current values for threads. + * + * @param em a pointer to the structure with the default values. + */ +static inline void ebpf_create_statistic_thread_chart(ebpf_module_t *em) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_THREADS, + "Threads info.", + "threads", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 140000, + em->update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], + threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], + threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart for Load Thread + * + * Write to standard output current values for load mode. + * + * @param em a pointer to the structure with the default values. + */ +static inline void ebpf_create_statistic_load_chart(ebpf_module_t *em) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_LOAD_METHOD, + "Load info.", + "methods", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 140001, + em->update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], + load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], + load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Update Internal Metric variable + * + * By default eBPF.plugin sends internal metrics for netdata, but user can + * disable this. + * + * The function updates the variable used to send charts. + */ +static void update_internal_metric_variable() +{ + const char *s = getenv("NETDATA_INTERNALS_MONITORING"); + if (s && *s && strcmp(s, "NO") == 0) + publish_internal_metrics = false; +} + +/** + * Create Statistics Charts + * + * Create charts that will show statistics related to eBPF plugin. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_statistic_charts(ebpf_module_t *em) +{ + update_internal_metric_variable(); + if (!publish_internal_metrics) + return; + + ebpf_create_statistic_thread_chart(em); + + ebpf_create_statistic_load_chart(em); +} + +/** + * Create process apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + * @param ptr a pointer for the targets. + */ +void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_PROCESS, + "Process started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20065, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_THREAD, + "Threads started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20066, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_EXIT, + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20067, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_CLOSE, + "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20068, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_ERROR, + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20069, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, + em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param root a pointer for the targets. + */ +static void ebpf_create_apps_charts(struct target *root) +{ + if (unlikely(!all_pids)) + return; + + struct target *w; + int newly_added = 0; + + for (w = root; w; w = w->next) { + if (w->target) + continue; + + if (unlikely(w->processes && (debug_enabled || w->debug_enabled))) { + struct pid_on_target *pid_on_target; + + fprintf( + stderr, "ebpf.plugin: target '%s' has aggregated %u process%s:", w->name, w->processes, + (w->processes == 1) ? "" : "es"); + + for (pid_on_target = w->root_pid; pid_on_target; pid_on_target = pid_on_target->next) { + fprintf(stderr, " %d", pid_on_target->pid); + } + + fputc('\n', stderr); + } + + if (!w->exposed && w->processes) { + newly_added++; + w->exposed = 1; + if (debug_enabled || w->debug_enabled) + debug_log_int("%s just added - regenerating charts.", w->name); + } + } + + if (!newly_added) + return; + + int counter; + for (counter = 0; ebpf_modules[counter].thread_name; counter++) { + ebpf_module_t *current = &ebpf_modules[counter]; + if (current->enabled && current->apps_charts && current->apps_routine) + current->apps_routine(current, root); + } +} + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Process disable tracepoints + * + * Disable tracepoints when the plugin was responsible to enable it. + */ +static void ebpf_process_disable_tracepoints() +{ + char *default_message = { "Cannot disable the tracepoint" }; + if (!was_sched_process_exit_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); + } + + if (!was_sched_process_exec_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); + } + + if (!was_sched_process_fork_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); + } +} + +/** + * Process Exit + * + * Cancel child thread. + * + * @param ptr thread data. + */ +static void ebpf_process_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + + ebpf_cleanup_publish_syscall(process_publish_aggregated); + freez(process_hash_values); + + ebpf_process_disable_tracepoints(); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); + pthread_cancel(*cgroup_thread.thread); +} + +/***************************************************************** + * + * FUNCTIONS WITH THE MAIN LOOP + * + *****************************************************************/ + + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param ps structure used to store data + * @param pids input data + */ +static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_target2 *pids) +{ + ebpf_process_stat_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + ebpf_process_stat_t *ps = &pids->ps; + + accumulator.exit_call += ps->exit_call; + accumulator.release_call += ps->release_call; + accumulator.create_process += ps->create_process; + accumulator.create_thread += ps->create_thread; + + accumulator.task_err += ps->task_err; + + pids = pids->next; + } + + ps->exit_call = (accumulator.exit_call >= ps->exit_call) ? accumulator.exit_call : ps->exit_call; + ps->release_call = (accumulator.release_call >= ps->release_call) ? accumulator.release_call : ps->release_call; + ps->create_process = (accumulator.create_process >= ps->create_process) ? accumulator.create_process : ps->create_process; + ps->create_thread = (accumulator.create_thread >= ps->create_thread) ? accumulator.create_thread : ps->create_thread; + + ps->task_err = (accumulator.task_err >= ps->task_err) ? accumulator.task_err : ps->task_err; +} + +/* + * Send Specific Process data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + * @param em the structure with thread information + */ +static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em) +{ + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name, + (long long) values->create_process); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE].name, + (long long) values->create_thread); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name, + (long long) values->release_call); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].name, + (long long) values->release_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name, + (long long) values->task_err); + write_end_chart(); + } +} + +/** + * Create specific process charts + * + * Create charts for cgroup/application + * + * @param type the chart type. + * @param em the structure with thread information + */ +static void ebpf_create_specific_process_charts(char *type, ebpf_module_t *em) +{ + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_CREATE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5000, + ebpf_create_global_dimension, &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT, "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR, "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } +} + +/** + * Obsolete specific process charts + * + * Obsolete charts for cgroup/application + * + * @param type the chart type. + * @param em the structure with thread information + */ +static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5000, + em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001, + em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT,"Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002, + em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE,"Tasks closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR,"Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004, + em->update_every); + } +} + +/** + * Create Systemd process Charts + * + * Create charts when systemd is enabled + * + * @param em the structure with thread information + **/ +static void ebpf_create_systemd_process_charts(ebpf_module_t *em) +{ + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20065, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20066, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20067, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_EXIT, "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20068, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_ERROR, "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20069, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + } +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @param em the structure with thread information + */ +static void ebpf_send_systemd_process_charts(ebpf_module_t *em) +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.create_process); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.create_thread); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.exit_call); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.release_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.task_err); + } + } + write_end_chart(); + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information +*/ +static void ebpf_process_send_cgroup_data(ebpf_module_t *em) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_process_sum_cgroup_pids(&ect->publish_systemd_ps, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_process_charts(em); + } + + ebpf_send_systemd_process_charts(em); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART) && ect->updated) { + ebpf_create_specific_process_charts(ect->name, em); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART) { + if (ect->updated) { + ebpf_send_specific_process_data(ect->name, &ect->publish_systemd_ps, em); + } else { + ebpf_obsolete_specific_process_charts(ect->name, em); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Update Cgroup algorithm + * + * Change algorithm from absolute to incremental + */ +void ebpf_process_update_cgroup_algorithm() +{ + int i; + for (i = 0; i < NETDATA_KEY_PUBLISH_PROCESS_END; i++) { + netdata_publish_syscall_t *ptr = &process_publish_aggregated[i]; + freez(ptr->algorithm); + ptr->algorithm = strdupz(ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } +} + +/** + * Send Statistic Data + * + * Send statistic information to netdata. + */ +void ebpf_send_statistic_data() +{ + if (!publish_internal_metrics) + return; + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS); + write_chart_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], (long long)plugin_statistics.threads); + write_chart_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], (long long)plugin_statistics.running); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD); + write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy); + write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core); + write_end_chart(); +} + +/** + * Main loop for this collector. + * + * @param em the structure with thread information + */ +static void process_collector(ebpf_module_t *em) +{ + // Start cgroup integration before other threads + cgroup_thread.thread = mallocz(sizeof(netdata_thread_t)); + cgroup_thread.start_routine = ebpf_cgroup_update_shm; + + netdata_thread_create(cgroup_thread.thread, cgroup_thread.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_cgroup_update_shm, NULL); + + heartbeat_t hb; + heartbeat_init(&hb); + int publish_global = em->global_charts; + int cgroups = em->cgroup_charts; + int thread_enabled = em->enabled; + if (cgroups) + ebpf_process_update_cgroup_algorithm(); + + int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT; + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; + int update_every = em->update_every; + int counter = update_every - 1; + int update_apps_list = update_apps_every - 1; + while (!ebpf_exit_plugin) { + usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); + (void)dt; + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&collect_data_mutex); + if (++update_apps_list == update_apps_every) { + update_apps_list = 0; + cleanup_exited_pids(); + collect_data_for_all_processes(pid_fd); + } + pthread_mutex_unlock(&collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + + read_hash_global_tables(); + + netdata_apps_integration_flags_t apps_enabled = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + + ebpf_create_apps_charts(apps_groups_root_target); + if (all_pids_count > 0) { + if (apps_enabled) { + ebpf_process_update_apps_data(); + } + + if (cgroups) { + ebpf_update_process_cgroup(); + } + } + + pthread_mutex_lock(&lock); + ebpf_send_statistic_data(); + + if (thread_enabled) { + if (publish_global) { + ebpf_process_send_data(em); + } + + if (apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_process_send_apps_data(apps_groups_root_target, em); + } + + if (cgroups) { + ebpf_process_send_cgroup_data(em); + } + } + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } + + fflush(stdout); + } +} + +/***************************************************************** + * + * FUNCTIONS TO START THREAD + * + *****************************************************************/ + +/** + * Allocate vectors used with this thread. + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param length is the length for the vectors used inside the collector. + */ +static void ebpf_process_allocate_global_vectors(size_t length) +{ + memset(process_aggregated_data, 0, length * sizeof(netdata_syscall_stat_t)); + memset(process_publish_aggregated, 0, length * sizeof(netdata_publish_syscall_t)); + process_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + + global_process_stats = callocz((size_t)pid_max, sizeof(ebpf_process_stat_t *)); + current_apps_data = callocz((size_t)pid_max, sizeof(ebpf_process_publish_apps_t *)); +} + +static void change_syscalls() +{ + static char *lfork = { "do_fork" }; + process_id_names[NETDATA_KEY_PUBLISH_PROCESS_FORK] = lfork; +} + +/** + * Set local variables + * + */ +static void set_local_pointers() +{ + if (isrh >= NETDATA_MINIMUM_RH_VERSION && isrh < NETDATA_RH_8) + change_syscalls(); +} + +/***************************************************************** + * + * EBPF PROCESS THREAD + * + *****************************************************************/ + +/** + * Enable tracepoints + * + * Enable necessary tracepoints for thread. + * + * @return It returns 0 on success and -1 otherwise + */ +static int ebpf_process_enable_tracepoints() +{ + int test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_exit); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) + return -1; + } + was_sched_process_exit_enabled = test; + + test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_exec); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) + return -1; + } + was_sched_process_exec_enabled = test; + + test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_fork); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) + return -1; + } + was_sched_process_fork_enabled = test; + + return 0; +} + +/** + * Process thread + * + * Thread used to generate process charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_process_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_process_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = process_maps; + + if (ebpf_process_enable_tracepoints()) { + em->enabled = em->global_charts = em->apps_charts = em->cgroup_charts = CONFIG_BOOLEAN_NO; + } + process_enabled = em->enabled; + + pthread_mutex_lock(&lock); + ebpf_process_allocate_global_vectors(NETDATA_KEY_PUBLISH_PROCESS_END); + + ebpf_update_pid_table(&process_maps[0], em); + + set_local_pointers(); + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->enabled = CONFIG_BOOLEAN_NO; + pthread_mutex_unlock(&lock); + goto endprocess; + } + + int algorithms[NETDATA_KEY_PUBLISH_PROCESS_END] = { + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX + }; + + ebpf_global_labels( + process_aggregated_data, process_publish_aggregated, process_dimension_names, process_id_names, + algorithms, NETDATA_KEY_PUBLISH_PROCESS_END); + + if (process_enabled) { + ebpf_create_global_charts(em); + } + + ebpf_update_stats(&plugin_statistics, em); + ebpf_create_statistic_charts(em); + + pthread_mutex_unlock(&lock); + + process_collector(em); + +endprocess: + if (!em->enabled) + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h new file mode 100644 index 0000000..43df34d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_PROCESS_H +#define NETDATA_EBPF_PROCESS_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_PROCESS "process" + +// Groups used on Dashboard +#define NETDATA_PROCESS_GROUP "processes" +#define NETDATA_PROCESS_CGROUP_GROUP "processes (eBPF)" + +// Global chart name +#define NETDATA_EXIT_SYSCALL "exit" +#define NETDATA_PROCESS_SYSCALL "process_thread" +#define NETDATA_PROCESS_ERROR_NAME "task_error" +#define NETDATA_PROCESS_STATUS_NAME "process_status" + +// Charts created on Apps submenu +#define NETDATA_SYSCALL_APPS_TASK_PROCESS "process_create" +#define NETDATA_SYSCALL_APPS_TASK_THREAD "thread_create" +#define NETDATA_SYSCALL_APPS_TASK_EXIT "task_exit" +#define NETDATA_SYSCALL_APPS_TASK_CLOSE "task_close" +#define NETDATA_SYSCALL_APPS_TASK_ERROR "task_error" + +// Process configuration name +#define NETDATA_PROCESS_CONFIG_FILE "process.conf" + +// Contexts +#define NETDATA_CGROUP_PROCESS_CREATE_CONTEXT "cgroup.process_create" +#define NETDATA_CGROUP_THREAD_CREATE_CONTEXT "cgroup.thread_create" +#define NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT "cgroup.task_close" +#define NETDATA_CGROUP_PROCESS_EXIT_CONTEXT "cgroup.task_exit" +#define NETDATA_CGROUP_PROCESS_ERROR_CONTEXT "cgroup.task_error" + +#define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "services.process_create" +#define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "services.thread_create" +#define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "services.task_close" +#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "services.task_exit" +#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "services.task_error" + +#define NETDATA_EBPF_CGROUP_UPDATE 10 + +// Statistical information +enum netdata_ebpf_thread_stats{ + NETDATA_EBPF_THREAD_STAT_TOTAL, + NETDATA_EBPF_THREAD_STAT_RUNNING, + + NETDATA_EBPF_THREAD_STAT_END +}; + +enum netdata_ebpf_load_mode_stats{ + NETDATA_EBPF_LOAD_STAT_LEGACY, + NETDATA_EBPF_LOAD_STAT_CORE, + + NETDATA_EBPF_LOAD_STAT_END +}; + +// Index from kernel +typedef enum ebpf_process_index { + NETDATA_KEY_CALLS_DO_EXIT, + + NETDATA_KEY_CALLS_RELEASE_TASK, + + NETDATA_KEY_CALLS_DO_FORK, + NETDATA_KEY_ERROR_DO_FORK, + + NETDATA_KEY_CALLS_SYS_CLONE, + NETDATA_KEY_ERROR_SYS_CLONE, + + NETDATA_KEY_END_VECTOR +} ebpf_process_index_t; + +// This enum acts as an index for publish vector. +// Do not change the enum order because we use +// different algorithms to make charts with incremental +// values (the three initial positions) and absolute values +// (the remaining charts). +typedef enum netdata_publish_process { + NETDATA_KEY_PUBLISH_PROCESS_EXIT, + NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK, + NETDATA_KEY_PUBLISH_PROCESS_FORK, + NETDATA_KEY_PUBLISH_PROCESS_CLONE, + + NETDATA_KEY_PUBLISH_PROCESS_END +} netdata_publish_process_t; + +typedef struct ebpf_process_publish_apps { + // Number of calls during the last read + uint64_t call_do_exit; + uint64_t call_release_task; + uint64_t create_process; + uint64_t create_thread; + + // Number of errors during the last read + uint64_t task_err; +} ebpf_process_publish_apps_t; + +enum ebpf_process_tables { + NETDATA_PROCESS_PID_TABLE, + NETDATA_PROCESS_GLOBAL_TABLE, + NETDATA_PROCESS_CTRL_TABLE +}; + +extern struct config process_config; + +#endif /* NETDATA_EBPF_PROCESS_H */ diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c new file mode 100644 index 0000000..f81287d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -0,0 +1,1137 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_shm.h" + +static char *shm_dimension_name[NETDATA_SHM_END] = { "get", "at", "dt", "ctl" }; +static netdata_syscall_stat_t shm_aggregated_data[NETDATA_SHM_END]; +static netdata_publish_syscall_t shm_publish_aggregated[NETDATA_SHM_END]; + +netdata_publish_shm_t *shm_vector = NULL; + +static netdata_idx_t shm_hash_values[NETDATA_SHM_END]; +static netdata_idx_t *shm_values = NULL; + +netdata_publish_shm_t **shm_pid = NULL; + +struct config shm_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t shm_maps[] = {{.name = "tbl_pid_shm", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "shm_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_shm", .internal_input = NETDATA_SHM_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +struct netdata_static_thread shm_threads = { + .name = "SHM KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +netdata_ebpf_targets_t shm_targets[] = { {.name = "shmget", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "shmat", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "shmdt", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "shmctl", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/shm.skel.h" + +static struct shm_bpf *bpf_obj = NULL; + +/***************************************************************** + * + * BTF FUNCTIONS + * + *****************************************************************/ + +/* + * Disable tracepoint + * + * Disable all tracepoints to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_shm_disable_tracepoint(struct shm_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_syscall_shmget, false); + bpf_program__set_autoload(obj->progs.netdata_syscall_shmat, false); + bpf_program__set_autoload(obj->progs.netdata_syscall_shmdt, false); + bpf_program__set_autoload(obj->progs.netdata_syscall_shmctl, false); +} + +/* + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_disable_probe(struct shm_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_shmget_probe, false); + bpf_program__set_autoload(obj->progs.netdata_shmat_probe, false); + bpf_program__set_autoload(obj->progs.netdata_shmdt_probe, false); + bpf_program__set_autoload(obj->progs.netdata_shmctl_probe, false); + bpf_program__set_autoload(obj->progs.netdata_shm_release_task_probe, false); +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_disable_trampoline(struct shm_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_shmget_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_shmat_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_shmdt_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_shmctl_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_shm_release_task_fentry, false); +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_set_trampoline_target(struct shm_bpf *obj) +{ + char syscall[NETDATA_EBPF_MAX_SYSCALL_LENGTH + 1]; + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMGET_CALL].name, running_on_kernel); + + bpf_program__set_attach_target(obj->progs.netdata_shmget_fentry, 0, + syscall); + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMAT_CALL].name, running_on_kernel); + bpf_program__set_attach_target(obj->progs.netdata_shmat_fentry, 0, + syscall); + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMDT_CALL].name, running_on_kernel); + bpf_program__set_attach_target(obj->progs.netdata_shmdt_fentry, 0, + syscall); + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMCTL_CALL].name, running_on_kernel); + bpf_program__set_attach_target(obj->progs.netdata_shmctl_fentry, 0, + syscall); + + bpf_program__set_attach_target(obj->progs.netdata_shm_release_task_fentry, 0, + EBPF_COMMON_FNCT_CLEAN_UP); +} + +/** + * SHM Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_shm_attach_probe(struct shm_bpf *obj) +{ + char syscall[NETDATA_EBPF_MAX_SYSCALL_LENGTH + 1]; + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMGET_CALL].name, running_on_kernel); + + obj->links.netdata_shmget_probe = bpf_program__attach_kprobe(obj->progs.netdata_shmget_probe, + false, syscall); + int ret = (int)libbpf_get_error(obj->links.netdata_shmget_probe); + if (ret) + return -1; + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMAT_CALL].name, running_on_kernel); + obj->links.netdata_shmat_probe = bpf_program__attach_kprobe(obj->progs.netdata_shmat_probe, + false, syscall); + ret = (int)libbpf_get_error(obj->links.netdata_shmat_probe); + if (ret) + return -1; + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMDT_CALL].name, running_on_kernel); + obj->links.netdata_shmdt_probe = bpf_program__attach_kprobe(obj->progs.netdata_shmdt_probe, + false, syscall); + ret = (int)libbpf_get_error(obj->links.netdata_shmdt_probe); + if (ret) + return -1; + + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, + shm_targets[NETDATA_KEY_SHMCTL_CALL].name, running_on_kernel); + obj->links.netdata_shmctl_probe = bpf_program__attach_kprobe(obj->progs.netdata_shmctl_probe, + false, syscall); + ret = (int)libbpf_get_error(obj->links.netdata_shmctl_probe); + if (ret) + return -1; + + obj->links.netdata_shm_release_task_probe = bpf_program__attach_kprobe(obj->progs.netdata_shm_release_task_probe, + false, EBPF_COMMON_FNCT_CLEAN_UP); + ret = (int)libbpf_get_error(obj->links.netdata_shm_release_task_probe); + if (ret) + return -1; + + + return 0; +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + */ +static void ebpf_shm_set_hash_tables(struct shm_bpf *obj) +{ + shm_maps[NETDATA_PID_SHM_TABLE].map_fd = bpf_map__fd(obj->maps.tbl_pid_shm); + shm_maps[NETDATA_SHM_CONTROLLER].map_fd = bpf_map__fd(obj->maps.shm_ctrl); + shm_maps[NETDATA_SHM_GLOBAL_TABLE].map_fd = bpf_map__fd(obj->maps.tbl_shm); +} + +/** + * Disable Release Task + * + * Disable release task when apps is not enabled. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_shm_disable_release_task(struct shm_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_shm_release_task_probe, false); + bpf_program__set_autoload(obj->progs.netdata_shm_release_task_fentry, false); +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_shm_adjust_map_size(struct shm_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.tbl_pid_shm, &shm_maps[NETDATA_PID_SHM_TABLE], + em, bpf_map__name(obj->maps.tbl_pid_shm)); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_shm_load_and_attach(struct shm_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *shmt = em->targets; + netdata_ebpf_program_loaded_t test = shmt[NETDATA_KEY_SHMGET_CALL].mode; + + // We are testing only one, because all will have the same behavior + if (test == EBPF_LOAD_TRAMPOLINE ) { + ebpf_shm_disable_tracepoint(obj); + ebpf_disable_probe(obj); + + ebpf_set_trampoline_target(obj); + } else if (test == EBPF_LOAD_PROBE || test == EBPF_LOAD_RETPROBE ) { + ebpf_shm_disable_tracepoint(obj); + ebpf_disable_trampoline(obj); + } else { + ebpf_disable_probe(obj); + ebpf_disable_trampoline(obj); + } + + ebpf_shm_adjust_map_size(obj, em); + if (!em->apps_charts && !em->cgroup_charts) + ebpf_shm_disable_release_task(obj); + + int ret = shm_bpf__load(obj); + if (!ret) { + if (test != EBPF_LOAD_PROBE && test != EBPF_LOAD_RETPROBE) + shm_bpf__attach(obj); + else + ret = ebpf_shm_attach_probe(obj); + + if (!ret) + ebpf_shm_set_hash_tables(obj); + } + + return ret; +} +#endif +/***************************************************************** + * FUNCTIONS TO CLOSE THE THREAD + *****************************************************************/ + +/** + * SHM Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_shm_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_cleanup_publish_syscall(shm_publish_aggregated); + + freez(shm_vector); + freez(shm_values); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + shm_bpf__destroy(bpf_obj); +#endif + + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; +} + +/** + * SHM Exit + * + * Cancel child thread. + * + * @param ptr thread data. + */ +static void ebpf_shm_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*shm_threads.thread); + ebpf_shm_free(em); +} + +/** + * SHM Cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void ebpf_shm_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_shm_free(em); +} + +/***************************************************************** + * COLLECTOR THREAD + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void shm_apps_accumulator(netdata_publish_shm_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_publish_shm_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_publish_shm_t *w = &out[i]; + total->get += w->get; + total->at += w->at; + total->dt += w->dt; + total->ctl += w->ctl; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void shm_fill_pid(uint32_t current_pid, netdata_publish_shm_t *publish) +{ + netdata_publish_shm_t *curr = shm_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_shm_t)); + shm_pid[current_pid] = curr; + } + + memcpy(curr, publish, sizeof(netdata_publish_shm_t)); +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_shm_cgroup() +{ + netdata_publish_shm_t *cv = shm_vector; + int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; + size_t length = sizeof(netdata_publish_shm_t) * ebpf_nprocs; + ebpf_cgroup_target_t *ect; + + memset(cv, 0, length); + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_publish_shm_t *out = &pids->shm; + if (likely(shm_pid) && shm_pid[pid]) { + netdata_publish_shm_t *in = shm_pid[pid]; + + memcpy(out, in, sizeof(netdata_publish_shm_t)); + } else { + if (!bpf_map_lookup_elem(fd, &pid, cv)) { + shm_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_publish_shm_t)); + + // now that we've consumed the value, zero it out in the map. + memset(cv, 0, length); + bpf_map_update_elem(fd, &pid, cv, BPF_EXIST); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_publish_shm_t *cv = shm_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; + size_t length = sizeof(netdata_publish_shm_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + shm_apps_accumulator(cv); + + shm_fill_pid(key, cv); + + // now that we've consumed the value, zero it out in the map. + memset(cv, 0, length); + bpf_map_update_elem(fd, &key, cv, BPF_EXIST); + + pids = pids->next; + } +} + +/** +* Send global charts to netdata agent. +*/ +static void shm_send_global() +{ + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_SHM_GLOBAL_CHART); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMGET_CALL] + ); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMAT_CALL] + ); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMDT_CALL] + ); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMCTL_CALL] + ); + write_end_chart(); +} + +/** + * Read global counter + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + netdata_idx_t *stored = shm_values; + netdata_idx_t *val = shm_hash_values; + int fd = shm_maps[NETDATA_SHM_GLOBAL_TABLE].map_fd; + + uint32_t i, end = NETDATA_SHM_END; + for (i = NETDATA_KEY_SHMGET_CALL; i < end; i++) { + if (!bpf_map_lookup_elem(fd, &i, stored)) { + int j; + int last = ebpf_nprocs; + netdata_idx_t total = 0; + for (j = 0; j < last; j++) + total += stored[j]; + + val[i] = total; + } + } +} + +/** + * Shared memory reader thread. + * + * @param ptr It is a NULL value for this thread. + * @return It always returns NULL. + */ +void *ebpf_shm_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_shm_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + usec_t step = NETDATA_SHM_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Sum values for all targets. + */ +static void ebpf_shm_sum_pids(netdata_publish_shm_t *shm, struct pid_on_target *root) +{ + while (root) { + int32_t pid = root->pid; + netdata_publish_shm_t *w = shm_pid[pid]; + if (w) { + shm->get += w->get; + shm->at += w->at; + shm->dt += w->dt; + shm->ctl += w->ctl; + + // reset for next collection. + w->get = 0; + w->at = 0; + w->dt = 0; + w->ctl = 0; + } + root = root->next; + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. +*/ +void ebpf_shm_send_apps_data(struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_shm_sum_pids(&w->shm, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMGET_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.get); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMAT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.at); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMDT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.dt); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMCTL_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.ctl); + } + } + write_end_chart(); +} + +/** + * Sum values for all targets. + */ +static void ebpf_shm_sum_cgroup_pids(netdata_publish_shm_t *shm, struct pid_on_target2 *root) +{ + netdata_publish_shm_t shmv; + memset(&shmv, 0, sizeof(shmv)); + while (root) { + netdata_publish_shm_t *w = &root->shm; + shmv.get += w->get; + shmv.at += w->at; + shmv.dt += w->dt; + shmv.ctl += w->ctl; + + root = root->next; + } + + memcpy(shm, &shmv, sizeof(shmv)); +} + +/** + * Create specific shared memory charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_shm_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_SHMGET_CHART, + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_GET_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5800, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_chart(type, NETDATA_SHMAT_CHART, + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_AT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5801, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_chart(type, NETDATA_SHMDT_CHART, + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_DT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5802, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_chart(type, NETDATA_SHMCTL_CHART, + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_CTL_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5803, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); +} + +/** + * Obsolete specific shared memory charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_shm_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_SHMGET_CHART, + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_GET_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5800, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SHMAT_CHART, + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_AT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5801, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SHMDT_CHART, + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_DT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5802, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SHMCTL_CHART, + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_CTL_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5803, update_every); +} + +/** + * Create Systemd Swap Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_shm_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_SHMGET_CHART, + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_GET_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); + + ebpf_create_charts_on_systemd(NETDATA_SHMAT_CHART, + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_AT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); + + ebpf_create_charts_on_systemd(NETDATA_SHMDT_CHART, + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20193, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_DT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); + + ebpf_create_charts_on_systemd(NETDATA_SHMCTL_CHART, + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20193, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_CTL_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_systemd_shm_charts() +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.get); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.at); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.dt); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.ctl); + } + } + write_end_chart(); +} + +/* + * Send Specific Shared memory data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_shm_data(char *type, netdata_publish_shm_t *values) +{ + write_begin_chart(type, NETDATA_SHMGET_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].name, (long long)values->get); + write_end_chart(); + + write_begin_chart(type, NETDATA_SHMAT_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL].name, (long long)values->at); + write_end_chart(); + + write_begin_chart(type, NETDATA_SHMDT_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL].name, (long long)values->dt); + write_end_chart(); + + write_begin_chart(type, NETDATA_SHMCTL_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].name, (long long)values->ctl); + write_end_chart(); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_shm_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_shm_sum_cgroup_pids(&ect->publish_shm, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_shm_charts(update_every); + } + + ebpf_send_systemd_shm_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_SHM_CHART) && ect->updated) { + ebpf_create_specific_shm_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_SHM_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_SHM_CHART) { + if (ect->updated) { + ebpf_send_specific_shm_data(ect->name, &ect->publish_shm); + } else { + ebpf_obsolete_specific_shm_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_SWAP_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void shm_collector(ebpf_module_t *em) +{ + shm_threads.thread = mallocz(sizeof(netdata_thread_t)); + shm_threads.start_routine = ebpf_shm_read_hash; + + netdata_thread_create( + shm_threads.thread, + shm_threads.name, + NETDATA_THREAD_OPTION_DEFAULT, + ebpf_shm_read_hash, + em + ); + + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) { + read_apps_table(); + } + + if (cgroups) { + ebpf_update_shm_cgroup(); + } + + pthread_mutex_lock(&lock); + + shm_send_global(); + + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_shm_send_apps_data(apps_groups_root_target); + } + + if (cgroups) { + ebpf_shm_send_cgroup_data(update_every); + } + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * INITIALIZE THREAD + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_SHMGET_CHART, + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_charts_on_apps(NETDATA_SHMAT_CHART, + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_charts_on_apps(NETDATA_SHMDT_CHART, + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20193, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_charts_on_apps(NETDATA_SHMCTL_CHART, + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20194, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_shm_allocate_global_vectors(int apps) +{ + if (apps) + shm_pid = callocz((size_t)pid_max, sizeof(netdata_publish_shm_t *)); + + shm_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_publish_shm_t)); + + shm_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + memset(shm_hash_values, 0, sizeof(shm_hash_values)); +} + +/***************************************************************** + * MAIN THREAD + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_shm_charts(int update_every) +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_SHM_GLOBAL_CHART, + "Calls to shared memory system calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_IPC_SHM_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_CALLS, + ebpf_create_global_dimension, + shm_publish_aggregated, + NETDATA_SHM_END, + update_every, NETDATA_EBPF_MODULE_NAME_SHM + ); + + fflush(stdout); +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_shm_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + + ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_KEY_SHMGET_CALL].mode); + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->enabled = CONFIG_BOOLEAN_NO; + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = shm_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_shm_load_and_attach(bpf_obj, em); + } +#endif + + + if (ret) + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + +/** + * Shared memory thread. + * + * @param ptr a pointer to `struct ebpf_module` + * @return It always return NULL + */ +void *ebpf_shm_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_shm_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = shm_maps; + + ebpf_update_pid_table(&shm_maps[NETDATA_PID_SHM_TABLE], em); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_shm_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endshm; + } + + ebpf_shm_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_SHM_END] = { + NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX + }; + ebpf_global_labels( + shm_aggregated_data, + shm_publish_aggregated, + shm_dimension_name, + shm_dimension_name, + algorithms, + NETDATA_SHM_END + ); + + pthread_mutex_lock(&lock); + ebpf_create_shm_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + shm_collector(em); + +endshm: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_shm.h b/collectors/ebpf.plugin/ebpf_shm.h new file mode 100644 index 0000000..4e06881 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_shm.h @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SHM_H +#define NETDATA_EBPF_SHM_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_SHM "shm" + +#define NETDATA_SHM_SLEEP_MS 850000ULL + +// charts +#define NETDATA_SHM_GLOBAL_CHART "shared_memory_calls" +#define NETDATA_SHMGET_CHART "shmget_call" +#define NETDATA_SHMAT_CHART "shmat_call" +#define NETDATA_SHMDT_CHART "shmdt_call" +#define NETDATA_SHMCTL_CHART "shmctl_call" + +// configuration file +#define NETDATA_DIRECTORY_SHM_CONFIG_FILE "shm.conf" + +// Contexts +#define NETDATA_CGROUP_SHM_GET_CONTEXT "cgroup.shmget" +#define NETDATA_CGROUP_SHM_AT_CONTEXT "cgroup.shmat" +#define NETDATA_CGROUP_SHM_DT_CONTEXT "cgroup.shmdt" +#define NETDATA_CGROUP_SHM_CTL_CONTEXT "cgroup.shmctl" + +#define NETDATA_SYSTEMD_SHM_GET_CONTEXT "services.shmget" +#define NETDATA_SYSTEMD_SHM_AT_CONTEXT "services.shmat" +#define NETDATA_SYSTEMD_SHM_DT_CONTEXT "services.shmdt" +#define NETDATA_SYSTEMD_SHM_CTL_CONTEXT "services.shmctl" + +typedef struct netdata_publish_shm { + uint64_t get; + uint64_t at; + uint64_t dt; + uint64_t ctl; +} netdata_publish_shm_t; + +enum shm_tables { + NETDATA_PID_SHM_TABLE, + NETDATA_SHM_CONTROLLER, + NETDATA_SHM_GLOBAL_TABLE +}; + +enum shm_counters { + NETDATA_KEY_SHMGET_CALL, + NETDATA_KEY_SHMAT_CALL, + NETDATA_KEY_SHMDT_CALL, + NETDATA_KEY_SHMCTL_CALL, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_SHM_END +}; + +extern netdata_publish_shm_t **shm_pid; + +void *ebpf_shm_thread(void *ptr); +void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr); +extern netdata_ebpf_targets_t shm_targets[]; + +extern struct config shm_config; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c new file mode 100644 index 0000000..3a023e4 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -0,0 +1,3976 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "ebpf.h" +#include "ebpf_socket.h" + +/***************************************************************** + * + * GLOBAL VARIABLES + * + *****************************************************************/ + +static char *socket_dimension_names[NETDATA_MAX_SOCKET_VECTOR] = { "received", "sent", "close", + "received", "sent", "retransmitted", + "connected_V4", "connected_V6", "connected_tcp", + "connected_udp"}; +static char *socket_id_names[NETDATA_MAX_SOCKET_VECTOR] = { "tcp_cleanup_rbuf", "tcp_sendmsg", "tcp_close", + "udp_recvmsg", "udp_sendmsg", "tcp_retransmit_skb", + "tcp_connect_v4", "tcp_connect_v6", "inet_csk_accept_tcp", + "inet_csk_accept_udp" }; + +static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth", + .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_global_sock", + .internal_input = NETDATA_SOCKET_COUNTER, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_lports", + .internal_input = NETDATA_SOCKET_COUNTER, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_conn_ipv4", + .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_conn_ipv6", + .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_nv_udp", + .internal_input = NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "socket_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +static netdata_idx_t *socket_hash_values = NULL; +static netdata_syscall_stat_t socket_aggregated_data[NETDATA_MAX_SOCKET_VECTOR]; +static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VECTOR]; + +ebpf_socket_publish_apps_t **socket_bandwidth_curr = NULL; +static ebpf_bandwidth_t *bandwidth_vector = NULL; + +pthread_mutex_t nv_mutex; +int wait_to_plot = 0; + +netdata_vector_plot_t inbound_vectors = { .plot = NULL, .next = 0, .last = 0 }; +netdata_vector_plot_t outbound_vectors = { .plot = NULL, .next = 0, .last = 0 }; +netdata_socket_t *socket_values; + +ebpf_network_viewer_port_list_t *listen_ports = NULL; + +struct config socket_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +netdata_ebpf_targets_t socket_targets[] = { {.name = "inet_csk_accept", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "tcp_retransmit_skb", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "tcp_cleanup_rbuf", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "tcp_close", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "udp_recvmsg", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "tcp_sendmsg", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "udp_sendmsg", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "tcp_v4_connect", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "tcp_v6_connect", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +struct netdata_static_thread socket_threads = { + .name = "EBPF SOCKET READ", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/socket.skel.h" // BTF code + +static struct socket_bpf *bpf_obj = NULL; + +/** + * Disable Probe + * + * Disable probes to use trampoline. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_socket_disable_probes(struct socket_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_inet_csk_accept_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_retransmit_skb_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_cleanup_rbuf_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_close_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_udp_recvmsg_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_udp_recvmsg_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_socket_release_task_kprobe, false); +} + +/** + * Disable Trampoline + * + * Disable trampoline to use probes. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_socket_disable_trampoline(struct socket_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_inet_csk_accept_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_retransmit_skb_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_cleanup_rbuf_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_close_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_udp_recvmsg_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_udp_recvmsg_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_socket_release_task_fentry, false); +} + +/** + * Set trampoline target. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_set_trampoline_target(struct socket_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_inet_csk_accept_fentry, 0, + socket_targets[NETDATA_FCNT_INET_CSK_ACCEPT].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_v4_connect_fexit, 0, + socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_v6_connect_fexit, 0, + socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_retransmit_skb_fentry, 0, + socket_targets[NETDATA_FCNT_TCP_RETRANSMIT].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_cleanup_rbuf_fentry, 0, + socket_targets[NETDATA_FCNT_CLEANUP_RBUF].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_close_fentry, 0, socket_targets[NETDATA_FCNT_TCP_CLOSE].name); + + bpf_program__set_attach_target(obj->progs.netdata_udp_recvmsg_fentry, 0, + socket_targets[NETDATA_FCNT_UDP_RECEVMSG].name); + + bpf_program__set_attach_target(obj->progs.netdata_udp_recvmsg_fexit, 0, + socket_targets[NETDATA_FCNT_UDP_RECEVMSG].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_sendmsg_fentry, 0, + socket_targets[NETDATA_FCNT_TCP_SENDMSG].name); + + bpf_program__set_attach_target(obj->progs.netdata_tcp_sendmsg_fexit, 0, + socket_targets[NETDATA_FCNT_TCP_SENDMSG].name); + + bpf_program__set_attach_target(obj->progs.netdata_udp_sendmsg_fentry, 0, + socket_targets[NETDATA_FCNT_UDP_SENDMSG].name); + + bpf_program__set_attach_target(obj->progs.netdata_udp_sendmsg_fexit, 0, + socket_targets[NETDATA_FCNT_UDP_SENDMSG].name); + + bpf_program__set_attach_target(obj->progs.netdata_socket_release_task_fentry, 0, EBPF_COMMON_FNCT_CLEAN_UP); +} + + +/** + * Disable specific trampoline + * + * Disable specific trampoline to match user selection. + * + * @param obj is the main structure for bpf objects. + * @param sel option selected by user. + */ +static inline void ebpf_socket_disable_specific_trampoline(struct socket_bpf *obj, netdata_run_mode_t sel) +{ + if (sel == MODE_RETURN) { + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fentry, false); + } else { + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fexit, false); + } +} + +/** + * Disable specific probe + * + * Disable specific probe to match user selection. + * + * @param obj is the main structure for bpf objects. + * @param sel option selected by user. + */ +static inline void ebpf_socket_disable_specific_probe(struct socket_bpf *obj, netdata_run_mode_t sel) +{ + if (sel == MODE_RETURN) { + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kprobe, false); + } else { + bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kretprobe, false); + } +} + +/** + * Attach probes + * + * Attach probes to targets. + * + * @param obj is the main structure for bpf objects. + * @param sel option selected by user. + */ +static int ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t sel) +{ + obj->links.netdata_inet_csk_accept_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_inet_csk_accept_kretprobe, + true, + socket_targets[NETDATA_FCNT_INET_CSK_ACCEPT].name); + int ret = libbpf_get_error(obj->links.netdata_inet_csk_accept_kretprobe); + if (ret) + return -1; + + obj->links.netdata_tcp_v4_connect_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v4_connect_kretprobe, + true, + socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name); + ret = libbpf_get_error(obj->links.netdata_tcp_v4_connect_kretprobe); + if (ret) + return -1; + + obj->links.netdata_tcp_v6_connect_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v6_connect_kretprobe, + true, + socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); + ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kretprobe); + if (ret) + return -1; + + obj->links.netdata_tcp_retransmit_skb_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_retransmit_skb_kprobe, + false, + socket_targets[NETDATA_FCNT_TCP_RETRANSMIT].name); + ret = libbpf_get_error(obj->links.netdata_tcp_retransmit_skb_kprobe); + if (ret) + return -1; + + obj->links.netdata_tcp_cleanup_rbuf_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_cleanup_rbuf_kprobe, + false, + socket_targets[NETDATA_FCNT_CLEANUP_RBUF].name); + ret = libbpf_get_error(obj->links.netdata_tcp_cleanup_rbuf_kprobe); + if (ret) + return -1; + + obj->links.netdata_tcp_close_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_close_kprobe, + false, + socket_targets[NETDATA_FCNT_TCP_CLOSE].name); + ret = libbpf_get_error(obj->links.netdata_tcp_close_kprobe); + if (ret) + return -1; + + obj->links.netdata_udp_recvmsg_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_udp_recvmsg_kprobe, + false, + socket_targets[NETDATA_FCNT_UDP_RECEVMSG].name); + ret = libbpf_get_error(obj->links.netdata_udp_recvmsg_kprobe); + if (ret) + return -1; + + obj->links.netdata_udp_recvmsg_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_udp_recvmsg_kretprobe, + true, + socket_targets[NETDATA_FCNT_UDP_RECEVMSG].name); + ret = libbpf_get_error(obj->links.netdata_udp_recvmsg_kretprobe); + if (ret) + return -1; + + if (sel == MODE_RETURN) { + obj->links.netdata_tcp_sendmsg_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_sendmsg_kretprobe, + true, + socket_targets[NETDATA_FCNT_TCP_SENDMSG].name); + ret = libbpf_get_error(obj->links.netdata_tcp_sendmsg_kretprobe); + if (ret) + return -1; + + obj->links.netdata_udp_sendmsg_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_udp_sendmsg_kretprobe, + true, + socket_targets[NETDATA_FCNT_UDP_SENDMSG].name); + ret = libbpf_get_error(obj->links.netdata_udp_sendmsg_kretprobe); + if (ret) + return -1; + } else { + obj->links.netdata_tcp_sendmsg_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_sendmsg_kprobe, + false, + socket_targets[NETDATA_FCNT_TCP_SENDMSG].name); + ret = libbpf_get_error(obj->links.netdata_tcp_sendmsg_kprobe); + if (ret) + return -1; + + obj->links.netdata_udp_sendmsg_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_udp_sendmsg_kprobe, + false, + socket_targets[NETDATA_FCNT_UDP_SENDMSG].name); + ret = libbpf_get_error(obj->links.netdata_udp_sendmsg_kprobe); + if (ret) + return -1; + } + + obj->links.netdata_socket_release_task_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_socket_release_task_kprobe, + false, EBPF_COMMON_FNCT_CLEAN_UP); + ret = libbpf_get_error(obj->links.netdata_socket_release_task_kprobe); + if (ret) + return -1; + + return 0; +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_socket_set_hash_tables(struct socket_bpf *obj) +{ + socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd = bpf_map__fd(obj->maps.tbl_bandwidth); + socket_maps[NETDATA_SOCKET_GLOBAL].map_fd = bpf_map__fd(obj->maps.tbl_global_sock); + socket_maps[NETDATA_SOCKET_LPORTS].map_fd = bpf_map__fd(obj->maps.tbl_lports); + socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd = bpf_map__fd(obj->maps.tbl_conn_ipv4); + socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd = bpf_map__fd(obj->maps.tbl_conn_ipv6); + socket_maps[NETDATA_SOCKET_TABLE_UDP].map_fd = bpf_map__fd(obj->maps.tbl_nv_udp); + socket_maps[NETDATA_SOCKET_TABLE_CTRL].map_fd = bpf_map__fd(obj->maps.socket_ctrl); +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_socket_adjust_map_size(struct socket_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.tbl_bandwidth, &socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH], + em, bpf_map__name(obj->maps.tbl_bandwidth)); + + ebpf_update_map_size(obj->maps.tbl_conn_ipv4, &socket_maps[NETDATA_SOCKET_TABLE_IPV4], + em, bpf_map__name(obj->maps.tbl_conn_ipv4)); + + ebpf_update_map_size(obj->maps.tbl_conn_ipv6, &socket_maps[NETDATA_SOCKET_TABLE_IPV6], + em, bpf_map__name(obj->maps.tbl_conn_ipv6)); + + ebpf_update_map_size(obj->maps.tbl_nv_udp, &socket_maps[NETDATA_SOCKET_TABLE_UDP], + em, bpf_map__name(obj->maps.tbl_nv_udp)); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *mt = em->targets; + netdata_ebpf_program_loaded_t test = mt[NETDATA_FCNT_INET_CSK_ACCEPT].mode; + + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_socket_disable_probes(obj); + + ebpf_set_trampoline_target(obj); + ebpf_socket_disable_specific_trampoline(obj, em->mode); + } else { // We are not using tracepoints for this thread. + ebpf_socket_disable_trampoline(obj); + + ebpf_socket_disable_specific_probe(obj, em->mode); + } + + int ret = socket_bpf__load(obj); + if (ret) { + fprintf(stderr, "failed to load BPF object: %d\n", ret); + return ret; + } + + ebpf_socket_adjust_map_size(obj, em); + + if (test == EBPF_LOAD_TRAMPOLINE) { + ret = socket_bpf__attach(obj); + } else { + ret = ebpf_socket_attach_probes(obj, em->mode); + } + + if (!ret) { + ebpf_socket_set_hash_tables(obj); + + ebpf_update_controller(socket_maps[NETDATA_SOCKET_TABLE_CTRL].map_fd, em); + } + + return ret; +} +#endif + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean internal socket plot + * + * Clean all structures allocated with strdupz. + * + * @param ptr the pointer with addresses to clean. + */ +static inline void clean_internal_socket_plot(netdata_socket_plot_t *ptr) +{ + freez(ptr->dimension_recv); + freez(ptr->dimension_sent); + freez(ptr->resolved_name); + freez(ptr->dimension_retransmit); +} + +/** + * Clean socket plot + * + * Clean the allocated data for inbound and outbound vectors. + */ +static void clean_allocated_socket_plot() +{ + uint32_t i; + uint32_t end = inbound_vectors.last; + netdata_socket_plot_t *plot = inbound_vectors.plot; + for (i = 0; i < end; i++) { + clean_internal_socket_plot(&plot[i]); + } + + clean_internal_socket_plot(&plot[inbound_vectors.last]); + + end = outbound_vectors.last; + plot = outbound_vectors.plot; + for (i = 0; i < end; i++) { + clean_internal_socket_plot(&plot[i]); + } + clean_internal_socket_plot(&plot[outbound_vectors.last]); +} + +/** + * Clean network ports allocated during initialization. + * + * @param ptr a pointer to the link list. + */ +static void clean_network_ports(ebpf_network_viewer_port_list_t *ptr) +{ + if (unlikely(!ptr)) + return; + + while (ptr) { + ebpf_network_viewer_port_list_t *next = ptr->next; + freez(ptr->value); + freez(ptr); + ptr = next; + } +} + +/** + * Clean service names + * + * Clean the allocated link list that stores names. + * + * @param names the link list. + */ +static void clean_service_names(ebpf_network_viewer_dim_name_t *names) +{ + if (unlikely(!names)) + return; + + while (names) { + ebpf_network_viewer_dim_name_t *next = names->next; + freez(names->name); + freez(names); + names = next; + } +} + +/** + * Clean hostnames + * + * @param hostnames the hostnames to clean + */ +static void clean_hostnames(ebpf_network_viewer_hostname_list_t *hostnames) +{ + if (unlikely(!hostnames)) + return; + + while (hostnames) { + ebpf_network_viewer_hostname_list_t *next = hostnames->next; + freez(hostnames->value); + simple_pattern_free(hostnames->value_pattern); + freez(hostnames); + hostnames = next; + } +} + +/** + * Cleanup publish syscall + * + * @param nps list of structures to clean + */ +void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps) +{ + while (nps) { + freez(nps->algorithm); + nps = nps->next; + } +} + +/** + * Clean port Structure + * + * Clean the allocated list. + * + * @param clean the list that will be cleaned + */ +void clean_port_structure(ebpf_network_viewer_port_list_t **clean) +{ + ebpf_network_viewer_port_list_t *move = *clean; + while (move) { + ebpf_network_viewer_port_list_t *next = move->next; + freez(move->value); + freez(move); + + move = next; + } + *clean = NULL; +} + +/** + * Clean IP structure + * + * Clean the allocated list. + * + * @param clean the list that will be cleaned + */ +static void clean_ip_structure(ebpf_network_viewer_ip_list_t **clean) +{ + ebpf_network_viewer_ip_list_t *move = *clean; + while (move) { + ebpf_network_viewer_ip_list_t *next = move->next; + freez(move->value); + freez(move); + + move = next; + } + *clean = NULL; +} + +/** + * Socket Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_socket_free(ebpf_module_t *em ) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_cleanup_publish_syscall(socket_publish_aggregated); + freez(socket_hash_values); + + freez(bandwidth_vector); + + freez(socket_values); + clean_allocated_socket_plot(); + freez(inbound_vectors.plot); + freez(outbound_vectors.plot); + + clean_port_structure(&listen_ports); + + ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled = 0; + + clean_network_ports(network_viewer_opt.included_port); + clean_network_ports(network_viewer_opt.excluded_port); + clean_service_names(network_viewer_opt.names); + clean_hostnames(network_viewer_opt.included_hostnames); + clean_hostnames(network_viewer_opt.excluded_hostnames); + + pthread_mutex_destroy(&nv_mutex); + + freez(socket_threads.thread); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + socket_bpf__destroy(bpf_obj); +#endif + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Socket exit + * + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_socket_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*socket_threads.thread); + ebpf_socket_free(em); +} + +/** + * Socket cleanup + * + * Clean up allocated addresses. + * + * @param ptr thread data. + */ +void ebpf_socket_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_socket_free(em); +} + +/***************************************************************** + * + * PROCESS DATA AND SEND TO NETDATA + * + *****************************************************************/ + +/** + * Update publish structure before to send data to Netdata. + * + * @param publish the first output structure with independent dimensions + * @param tcp structure to store IO from tcp sockets + * @param udp structure to store IO from udp sockets + * @param input the structure with the input data. + */ +static void ebpf_update_global_publish( + netdata_publish_syscall_t *publish, netdata_publish_vfs_common_t *tcp, netdata_publish_vfs_common_t *udp, + netdata_syscall_stat_t *input) +{ + netdata_publish_syscall_t *move = publish; + while (move) { + if (input->call != move->pcall) { + // This condition happens to avoid initial values with dimensions higher than normal values. + if (move->pcall) { + move->ncall = (input->call > move->pcall) ? input->call - move->pcall : move->pcall - input->call; + move->nbyte = (input->bytes > move->pbyte) ? input->bytes - move->pbyte : move->pbyte - input->bytes; + move->nerr = (input->ecall > move->nerr) ? input->ecall - move->perr : move->perr - input->ecall; + } else { + move->ncall = 0; + move->nbyte = 0; + move->nerr = 0; + } + + move->pcall = input->call; + move->pbyte = input->bytes; + move->perr = input->ecall; + } else { + move->ncall = 0; + move->nbyte = 0; + move->nerr = 0; + } + + input = input->next; + move = move->next; + } + + tcp->write = -(long)publish[0].nbyte; + tcp->read = (long)publish[1].nbyte; + + udp->write = -(long)publish[3].nbyte; + udp->read = (long)publish[4].nbyte; +} + +/** + * Update Network Viewer plot data + * + * @param plot the structure where the data will be stored + * @param sock the last update from the socket + */ +static inline void update_nv_plot_data(netdata_plot_values_t *plot, netdata_socket_t *sock) +{ + if (sock->ct > plot->last_time) { + plot->last_time = sock->ct; + plot->plot_recv_packets = sock->recv_packets; + plot->plot_sent_packets = sock->sent_packets; + plot->plot_recv_bytes = sock->recv_bytes; + plot->plot_sent_bytes = sock->sent_bytes; + plot->plot_retransmit = sock->retransmit; + } + + sock->recv_packets = 0; + sock->sent_packets = 0; + sock->recv_bytes = 0; + sock->sent_bytes = 0; + sock->retransmit = 0; +} + +/** + * Calculate Network Viewer Plot + * + * Do math with collected values before to plot data. + */ +static inline void calculate_nv_plot() +{ + uint32_t i; + uint32_t end = inbound_vectors.next; + for (i = 0; i < end; i++) { + update_nv_plot_data(&inbound_vectors.plot[i].plot, &inbound_vectors.plot[i].sock); + } + inbound_vectors.max_plot = end; + + // The 'Other' dimension is always calculated for the chart to have at least one dimension + update_nv_plot_data(&inbound_vectors.plot[inbound_vectors.last].plot, + &inbound_vectors.plot[inbound_vectors.last].sock); + + end = outbound_vectors.next; + for (i = 0; i < end; i++) { + update_nv_plot_data(&outbound_vectors.plot[i].plot, &outbound_vectors.plot[i].sock); + } + outbound_vectors.max_plot = end; + + // The 'Other' dimension is always calculated for the chart to have at least one dimension + update_nv_plot_data(&outbound_vectors.plot[outbound_vectors.last].plot, + &outbound_vectors.plot[outbound_vectors.last].sock); +} + +/** + * Network viewer send bytes + * + * @param ptr the structure with values to plot + * @param chart the chart name. + */ +static inline void ebpf_socket_nv_send_bytes(netdata_vector_plot_t *ptr, char *chart) +{ + uint32_t i; + uint32_t end = ptr->last_plot; + netdata_socket_plot_t *w = ptr->plot; + collected_number value; + + write_begin_chart(NETDATA_EBPF_FAMILY, chart); + for (i = 0; i < end; i++) { + value = ((collected_number) w[i].plot.plot_sent_bytes); + write_chart_dimension(w[i].dimension_sent, value); + value = (collected_number) w[i].plot.plot_recv_bytes; + write_chart_dimension(w[i].dimension_recv, value); + } + + i = ptr->last; + value = ((collected_number) w[i].plot.plot_sent_bytes); + write_chart_dimension(w[i].dimension_sent, value); + value = (collected_number) w[i].plot.plot_recv_bytes; + write_chart_dimension(w[i].dimension_recv, value); + write_end_chart(); +} + +/** + * Network Viewer Send packets + * + * @param ptr the structure with values to plot + * @param chart the chart name. + */ +static inline void ebpf_socket_nv_send_packets(netdata_vector_plot_t *ptr, char *chart) +{ + uint32_t i; + uint32_t end = ptr->last_plot; + netdata_socket_plot_t *w = ptr->plot; + collected_number value; + + write_begin_chart(NETDATA_EBPF_FAMILY, chart); + for (i = 0; i < end; i++) { + value = ((collected_number)w[i].plot.plot_sent_packets); + write_chart_dimension(w[i].dimension_sent, value); + value = (collected_number) w[i].plot.plot_recv_packets; + write_chart_dimension(w[i].dimension_recv, value); + } + + i = ptr->last; + value = ((collected_number)w[i].plot.plot_sent_packets); + write_chart_dimension(w[i].dimension_sent, value); + value = (collected_number)w[i].plot.plot_recv_packets; + write_chart_dimension(w[i].dimension_recv, value); + write_end_chart(); +} + +/** + * Network Viewer Send Retransmit + * + * @param ptr the structure with values to plot + * @param chart the chart name. + */ +static inline void ebpf_socket_nv_send_retransmit(netdata_vector_plot_t *ptr, char *chart) +{ + uint32_t i; + uint32_t end = ptr->last_plot; + netdata_socket_plot_t *w = ptr->plot; + collected_number value; + + write_begin_chart(NETDATA_EBPF_FAMILY, chart); + for (i = 0; i < end; i++) { + value = (collected_number) w[i].plot.plot_retransmit; + write_chart_dimension(w[i].dimension_retransmit, value); + } + + i = ptr->last; + value = (collected_number)w[i].plot.plot_retransmit; + write_chart_dimension(w[i].dimension_retransmit, value); + write_end_chart(); +} + +/** + * Send network viewer data + * + * @param ptr the pointer to plot data + */ +static void ebpf_socket_send_nv_data(netdata_vector_plot_t *ptr) +{ + if (!ptr->flags) + return; + + if (ptr == (netdata_vector_plot_t *)&outbound_vectors) { + ebpf_socket_nv_send_bytes(ptr, NETDATA_NV_OUTBOUND_BYTES); + fflush(stdout); + + ebpf_socket_nv_send_packets(ptr, NETDATA_NV_OUTBOUND_PACKETS); + fflush(stdout); + + ebpf_socket_nv_send_retransmit(ptr, NETDATA_NV_OUTBOUND_RETRANSMIT); + fflush(stdout); + } else { + ebpf_socket_nv_send_bytes(ptr, NETDATA_NV_INBOUND_BYTES); + fflush(stdout); + + ebpf_socket_nv_send_packets(ptr, NETDATA_NV_INBOUND_PACKETS); + fflush(stdout); + } +} + +/** + * Send Global Inbound connection + * + * Send number of connections read per protocol. + */ +static void ebpf_socket_send_global_inbound_conn() +{ + uint64_t udp_conn = 0; + uint64_t tcp_conn = 0; + ebpf_network_viewer_port_list_t *move = listen_ports; + while (move) { + if (move->protocol == IPPROTO_TCP) + tcp_conn += move->connections; + else + udp_conn += move->connections; + + move = move->next; + } + + write_begin_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_INBOUND_CONNECTIONS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_TCP].name, (long long) tcp_conn); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_UDP].name, (long long) udp_conn); + write_end_chart(); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + */ +static void ebpf_socket_send_data(ebpf_module_t *em) +{ + netdata_publish_vfs_common_t common_tcp; + netdata_publish_vfs_common_t common_udp; + ebpf_update_global_publish(socket_publish_aggregated, &common_tcp, &common_udp, socket_aggregated_data); + + ebpf_socket_send_global_inbound_conn(); + write_count_chart(NETDATA_TCP_OUTBOUND_CONNECTIONS, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 2); + + // We read bytes from function arguments, but bandwidth is given in bits, + // so we need to multiply by 8 to convert for the final value. + write_count_chart(NETDATA_TCP_FUNCTION_COUNT, NETDATA_EBPF_IP_FAMILY, socket_publish_aggregated, 3); + write_io_chart(NETDATA_TCP_FUNCTION_BITS, NETDATA_EBPF_IP_FAMILY, socket_id_names[0], + common_tcp.read * 8/BITS_IN_A_KILOBIT, socket_id_names[1], + common_tcp.write * 8/BITS_IN_A_KILOBIT); + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_TCP_FUNCTION_ERROR, NETDATA_EBPF_IP_FAMILY, socket_publish_aggregated, 2); + } + write_count_chart(NETDATA_TCP_RETRANSMIT, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT],1); + + write_count_chart(NETDATA_UDP_FUNCTION_COUNT, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF],2); + write_io_chart(NETDATA_UDP_FUNCTION_BITS, NETDATA_EBPF_IP_FAMILY, + socket_id_names[3], (long long)common_udp.read * 8/BITS_IN_A_KILOBIT, + socket_id_names[4], (long long)common_udp.write * 8/BITS_IN_A_KILOBIT); + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_UDP_FUNCTION_ERROR, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_UDP_START], 2); + } +} + +/** + * Sum values for pid + * + * @param root the structure with all available PIDs + * + * @param offset the address that we are reading + * + * @return it returns the sum of all PIDs + */ +long long ebpf_socket_sum_values_for_pids(struct pid_on_target *root, size_t offset) +{ + long long ret = 0; + while (root) { + int32_t pid = root->pid; + ebpf_socket_publish_apps_t *w = socket_bandwidth_curr[pid]; + if (w) { + ret += get_value_from_structure((char *)w, offset); + } + + root = root->next; + } + + return ret; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + * @param root the target list. + */ +void ebpf_socket_send_apps_data(ebpf_module_t *em, struct target *root) +{ + UNUSED(em); + + struct target *w; + collected_number value; + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + call_tcp_v4_connection)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + call_tcp_v6_connection)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + bytes_sent)); + // We multiply by 0.008, because we read bytes, but we display bits + write_chart_dimension(w->name, ((value)*8)/1000); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + bytes_received)); + // We multiply by 0.008, because we read bytes, but we display bits + write_chart_dimension(w->name, ((value)*8)/1000); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + call_tcp_sent)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + call_tcp_received)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + retransmit)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + call_udp_sent)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, + call_udp_received)); + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + +} + +/***************************************************************** + * + * FUNCTIONS TO CREATE CHARTS + * + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_global_charts(ebpf_module_t *em) +{ + int order = 21070; + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_INBOUND_CONNECTIONS, + "Inbound connections.", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_TCP], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_TCP_OUTBOUND_CONNECTIONS, + "TCP outbound connections.", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_TCP_FUNCTION_COUNT, + "Calls to internal functions", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + socket_publish_aggregated, + 3, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_BITS, + "TCP bandwidth", EBPF_COMMON_DIMENSION_BITS, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + socket_publish_aggregated, + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_TCP_FUNCTION_ERROR, + "TCP errors", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + socket_publish_aggregated, + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + } + + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_TCP_RETRANSMIT, + "Packages retransmitted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_UDP_FUNCTION_COUNT, + "UDP calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_BITS, + "UDP bandwidth", EBPF_COMMON_DIMENSION_BITS, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, + NETDATA_UDP_FUNCTION_ERROR, + "UDP errors", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SOCKET_KERNEL_FUNCTIONS, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + order++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + } +} + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + * @param ptr a pointer for targets + */ +void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + int order = 20080; + ebpf_create_charts_on_apps(NETDATA_NET_APPS_CONNECTION_TCP_V4, + "Calls to tcp_v4_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_CONNECTION_TCP_V6, + "Calls to tcp_v6_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_SENT, + "Bytes sent", EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_RECV, + "bytes received", EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + "Calls for tcp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + "Calls for tcp_cleanup_rbuf", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + "Calls for tcp_retransmit", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + "Calls for udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + "Calls for udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/** + * Create network viewer chart + * + * Create common charts. + * + * @param id chart id + * @param title chart title + * @param units units label + * @param family group name used to attach the chart on dashboard + * @param order chart order + * @param update_every value to overwrite the update frequency set by the server. + * @param ptr plot structure with values. + */ +static void ebpf_socket_create_nv_chart(char *id, char *title, char *units, + char *family, int order, int update_every, netdata_vector_plot_t *ptr) +{ + ebpf_write_chart_cmd(NETDATA_EBPF_FAMILY, + id, + title, + units, + family, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + order, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + + uint32_t i; + uint32_t end = ptr->last_plot; + netdata_socket_plot_t *w = ptr->plot; + for (i = 0; i < end; i++) { + fprintf(stdout, "DIMENSION %s '' incremental -1 1\n", w[i].dimension_sent); + fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[i].dimension_recv); + } + + end = ptr->last; + fprintf(stdout, "DIMENSION %s '' incremental -1 1\n", w[end].dimension_sent); + fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[end].dimension_recv); +} + +/** + * Create network viewer retransmit + * + * Create a specific chart. + * + * @param id the chart id + * @param title the chart title + * @param units the units label + * @param family the group name used to attach the chart on dashboard + * @param order the chart order + * @param update_every value to overwrite the update frequency set by the server. + * @param ptr the plot structure with values. + */ +static void ebpf_socket_create_nv_retransmit(char *id, char *title, char *units, + char *family, int order, int update_every, netdata_vector_plot_t *ptr) +{ + ebpf_write_chart_cmd(NETDATA_EBPF_FAMILY, + id, + title, + units, + family, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + order, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + + uint32_t i; + uint32_t end = ptr->last_plot; + netdata_socket_plot_t *w = ptr->plot; + for (i = 0; i < end; i++) { + fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[i].dimension_retransmit); + } + + end = ptr->last; + fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[end].dimension_retransmit); +} + +/** + * Create Network Viewer charts + * + * Recreate the charts when new sockets are created. + * + * @param ptr a pointer for inbound or outbound vectors. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_socket_create_nv_charts(netdata_vector_plot_t *ptr, int update_every) +{ + // We do not have new sockets, so we do not need move forward + if (ptr->max_plot == ptr->last_plot) + return; + + ptr->last_plot = ptr->max_plot; + + if (ptr == (netdata_vector_plot_t *)&outbound_vectors) { + ebpf_socket_create_nv_chart(NETDATA_NV_OUTBOUND_BYTES, + "Outbound connections (bytes).", EBPF_COMMON_DIMENSION_BYTES, + NETDATA_NETWORK_CONNECTIONS_GROUP, + 21080, + update_every, ptr); + + ebpf_socket_create_nv_chart(NETDATA_NV_OUTBOUND_PACKETS, + "Outbound connections (packets)", + EBPF_COMMON_DIMENSION_PACKETS, + NETDATA_NETWORK_CONNECTIONS_GROUP, + 21082, + update_every, ptr); + + ebpf_socket_create_nv_retransmit(NETDATA_NV_OUTBOUND_RETRANSMIT, + "Retransmitted packets", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_NETWORK_CONNECTIONS_GROUP, + 21083, + update_every, ptr); + } else { + ebpf_socket_create_nv_chart(NETDATA_NV_INBOUND_BYTES, + "Inbound connections (bytes)", EBPF_COMMON_DIMENSION_BYTES, + NETDATA_NETWORK_CONNECTIONS_GROUP, + 21084, + update_every, ptr); + + ebpf_socket_create_nv_chart(NETDATA_NV_INBOUND_PACKETS, + "Inbound connections (packets)", + EBPF_COMMON_DIMENSION_PACKETS, + NETDATA_NETWORK_CONNECTIONS_GROUP, + 21085, + update_every, ptr); + } + + ptr->flags |= NETWORK_VIEWER_CHARTS_CREATED; +} + +/***************************************************************** + * + * READ INFORMATION FROM KERNEL RING + * + *****************************************************************/ + +/** + * Is specific ip inside the range + * + * Check if the ip is inside a IP range previously defined + * + * @param cmp the IP to compare + * @param family the IP family + * + * @return It returns 1 if the IP is inside the range and 0 otherwise + */ +static int is_specific_ip_inside_range(union netdata_ip_t *cmp, int family) +{ + if (!network_viewer_opt.excluded_ips && !network_viewer_opt.included_ips) + return 1; + + uint32_t ipv4_test = ntohl(cmp->addr32[0]); + ebpf_network_viewer_ip_list_t *move = network_viewer_opt.excluded_ips; + while (move) { + if (family == AF_INET) { + if (ntohl(move->first.addr32[0]) <= ipv4_test && + ipv4_test <= ntohl(move->last.addr32[0]) ) + return 0; + } else { + if (memcmp(move->first.addr8, cmp->addr8, sizeof(union netdata_ip_t)) <= 0 && + memcmp(move->last.addr8, cmp->addr8, sizeof(union netdata_ip_t)) >= 0) { + return 0; + } + } + move = move->next; + } + + move = network_viewer_opt.included_ips; + while (move) { + if (family == AF_INET) { + if (ntohl(move->first.addr32[0]) <= ipv4_test && + ntohl(move->last.addr32[0]) >= ipv4_test) + return 1; + } else { + if (memcmp(move->first.addr8, cmp->addr8, sizeof(union netdata_ip_t)) <= 0 && + memcmp(move->last.addr8, cmp->addr8, sizeof(union netdata_ip_t)) >= 0) { + return 1; + } + } + move = move->next; + } + + return 0; +} + +/** + * Is port inside range + * + * Verify if the cmp port is inside the range [first, last]. + * This function expects only the last parameter as big endian. + * + * @param cmp the value to compare + * + * @return It returns 1 when cmp is inside and 0 otherwise. + */ +static int is_port_inside_range(uint16_t cmp) +{ + // We do not have restrictions for ports. + if (!network_viewer_opt.excluded_port && !network_viewer_opt.included_port) + return 1; + + // Test if port is excluded + ebpf_network_viewer_port_list_t *move = network_viewer_opt.excluded_port; + cmp = htons(cmp); + while (move) { + if (move->cmp_first <= cmp && cmp <= move->cmp_last) + return 0; + + move = move->next; + } + + // Test if the port is inside allowed range + move = network_viewer_opt.included_port; + while (move) { + if (move->cmp_first <= cmp && cmp <= move->cmp_last) + return 1; + + move = move->next; + } + + return 0; +} + +/** + * Hostname matches pattern + * + * @param cmp the value to compare + * + * @return It returns 1 when the value matches and zero otherwise. + */ +int hostname_matches_pattern(char *cmp) +{ + if (!network_viewer_opt.included_hostnames && !network_viewer_opt.excluded_hostnames) + return 1; + + ebpf_network_viewer_hostname_list_t *move = network_viewer_opt.excluded_hostnames; + while (move) { + if (simple_pattern_matches(move->value_pattern, cmp)) + return 0; + + move = move->next; + } + + move = network_viewer_opt.included_hostnames; + while (move) { + if (simple_pattern_matches(move->value_pattern, cmp)) + return 1; + + move = move->next; + } + + + return 0; +} + +/** + * Is socket allowed? + * + * Compare destination addresses and destination ports to define next steps + * + * @param key the socket read from kernel ring + * @param family the family used to compare IPs (AF_INET and AF_INET6) + * + * @return It returns 1 if this socket is inside the ranges and 0 otherwise. + */ +int is_socket_allowed(netdata_socket_idx_t *key, int family) +{ + if (!is_port_inside_range(key->dport)) + return 0; + + return is_specific_ip_inside_range(&key->daddr, family); +} + +/** + * Compare sockets + * + * Compare destination address and destination port. + * We do not compare source port, because it is random. + * We also do not compare source address, because inbound and outbound connections are stored in separated AVL trees. + * + * @param a pointer to netdata_socket_plot + * @param b pointer to netdata_socket_plot + * + * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. + */ +static int compare_sockets(void *a, void *b) +{ + struct netdata_socket_plot *val1 = a; + struct netdata_socket_plot *val2 = b; + int cmp; + + // We do not need to compare val2 family, because data inside hash table is always from the same family + if (val1->family == AF_INET) { //IPV4 + if (val1->flags & NETDATA_INBOUND_DIRECTION) { + if (val1->index.sport == val2->index.sport) + cmp = 0; + else { + cmp = (val1->index.sport > val2->index.sport)?1:-1; + } + } else { + cmp = memcmp(&val1->index.dport, &val2->index.dport, sizeof(uint16_t)); + if (!cmp) { + cmp = memcmp(&val1->index.daddr.addr32[0], &val2->index.daddr.addr32[0], sizeof(uint32_t)); + } + } + } else { + if (val1->flags & NETDATA_INBOUND_DIRECTION) { + if (val1->index.sport == val2->index.sport) + cmp = 0; + else { + cmp = (val1->index.sport > val2->index.sport)?1:-1; + } + } else { + cmp = memcmp(&val1->index.dport, &val2->index.dport, sizeof(uint16_t)); + if (!cmp) { + cmp = memcmp(&val1->index.daddr.addr32, &val2->index.daddr.addr32, 4*sizeof(uint32_t)); + } + } + } + + return cmp; +} + +/** + * Build dimension name + * + * Fill dimension name vector with values given + * + * @param dimname the output vector + * @param hostname the hostname for the socket. + * @param service_name the service used to connect. + * @param proto the protocol used in this connection + * @param family is this IPV4(AF_INET) or IPV6(AF_INET6) + * + * @return it returns the size of the data copied on success and -1 otherwise. + */ +static inline int build_outbound_dimension_name(char *dimname, char *hostname, char *service_name, + char *proto, int family) +{ + return snprintf(dimname, CONFIG_MAX_NAME - 7, (family == AF_INET)?"%s:%s:%s_":"%s:%s:[%s]_", + service_name, proto, + hostname); +} + +/** + * Fill inbound dimension name + * + * Mount the dimension name with the input given + * + * @param dimname the output vector + * @param service_name the service used to connect. + * @param proto the protocol used in this connection + * + * @return it returns the size of the data copied on success and -1 otherwise. + */ +static inline int build_inbound_dimension_name(char *dimname, char *service_name, char *proto) +{ + return snprintf(dimname, CONFIG_MAX_NAME - 7, "%s:%s_", service_name, + proto); +} + +/** + * Fill Resolved Name + * + * Fill the resolved name structure with the value given. + * The hostname is the largest value possible, if it is necessary to cut some value, it must be cut. + * + * @param ptr the output vector + * @param hostname the hostname resolved or IP. + * @param length the length for the hostname. + * @param service_name the service name associated to the connection + * @param is_outbound the is this an outbound connection + */ +static inline void fill_resolved_name(netdata_socket_plot_t *ptr, char *hostname, size_t length, + char *service_name, int is_outbound) +{ + if (length < NETDATA_MAX_NETWORK_COMBINED_LENGTH) + ptr->resolved_name = strdupz(hostname); + else { + length = NETDATA_MAX_NETWORK_COMBINED_LENGTH; + ptr->resolved_name = mallocz( NETDATA_MAX_NETWORK_COMBINED_LENGTH + 1); + memcpy(ptr->resolved_name, hostname, length); + ptr->resolved_name[length] = '\0'; + } + + char dimname[CONFIG_MAX_NAME]; + int size; + char *protocol; + if (ptr->sock.protocol == IPPROTO_UDP) { + protocol = "UDP"; + } else if (ptr->sock.protocol == IPPROTO_TCP) { + protocol = "TCP"; + } else { + protocol = "ALL"; + } + + if (is_outbound) + size = build_outbound_dimension_name(dimname, hostname, service_name, protocol, ptr->family); + else + size = build_inbound_dimension_name(dimname,service_name, protocol); + + if (size > 0) { + strcpy(&dimname[size], "sent"); + dimname[size + 4] = '\0'; + ptr->dimension_sent = strdupz(dimname); + + strcpy(&dimname[size], "recv"); + ptr->dimension_recv = strdupz(dimname); + + dimname[size - 1] = '\0'; + ptr->dimension_retransmit = strdupz(dimname); + } +} + +/** + * Mount dimension names + * + * Fill the vector names after to resolve the addresses + * + * @param ptr a pointer to the structure where the values are stored. + * @param is_outbound is a outbound ptr value? + * + * @return It returns 1 if the name is valid and 0 otherwise. + */ +int fill_names(netdata_socket_plot_t *ptr, int is_outbound) +{ + char hostname[NI_MAXHOST], service_name[NI_MAXSERV]; + if (ptr->resolved) + return 1; + + int ret; + static int resolve_name = -1; + static int resolve_service = -1; + if (resolve_name == -1) + resolve_name = network_viewer_opt.hostname_resolution_enabled; + + if (resolve_service == -1) + resolve_service = network_viewer_opt.service_resolution_enabled; + + netdata_socket_idx_t *idx = &ptr->index; + + char *errname = { "Not resolved" }; + // Resolve Name + if (ptr->family == AF_INET) { //IPV4 + struct sockaddr_in myaddr; + memset(&myaddr, 0 , sizeof(myaddr)); + + myaddr.sin_family = ptr->family; + if (is_outbound) { + myaddr.sin_port = idx->dport; + myaddr.sin_addr.s_addr = idx->daddr.addr32[0]; + } else { + myaddr.sin_port = idx->sport; + myaddr.sin_addr.s_addr = idx->saddr.addr32[0]; + } + + ret = (!resolve_name)?-1:getnameinfo((struct sockaddr *)&myaddr, sizeof(myaddr), hostname, + sizeof(hostname), service_name, sizeof(service_name), NI_NAMEREQD); + + if (!ret && !resolve_service) { + snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr.sin_port)); + } + + if (ret) { + // I cannot resolve the name, I will use the IP + if (!inet_ntop(AF_INET, &myaddr.sin_addr.s_addr, hostname, NI_MAXHOST)) { + strncpy(hostname, errname, 13); + } + + snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr.sin_port)); + ret = 1; + } + } else { // IPV6 + struct sockaddr_in6 myaddr6; + memset(&myaddr6, 0 , sizeof(myaddr6)); + + myaddr6.sin6_family = AF_INET6; + if (is_outbound) { + myaddr6.sin6_port = idx->dport; + memcpy(myaddr6.sin6_addr.s6_addr, idx->daddr.addr8, sizeof(union netdata_ip_t)); + } else { + myaddr6.sin6_port = idx->sport; + memcpy(myaddr6.sin6_addr.s6_addr, idx->saddr.addr8, sizeof(union netdata_ip_t)); + } + + ret = (!resolve_name)?-1:getnameinfo((struct sockaddr *)&myaddr6, sizeof(myaddr6), hostname, + sizeof(hostname), service_name, sizeof(service_name), NI_NAMEREQD); + + if (!ret && !resolve_service) { + snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr6.sin6_port)); + } + + if (ret) { + // I cannot resolve the name, I will use the IP + if (!inet_ntop(AF_INET6, myaddr6.sin6_addr.s6_addr, hostname, NI_MAXHOST)) { + strncpy(hostname, errname, 13); + } + + snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr6.sin6_port)); + + ret = 1; + } + } + + fill_resolved_name(ptr, hostname, + strlen(hostname) + strlen(service_name)+ NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH, + service_name, is_outbound); + + if (resolve_name && !ret) + ret = hostname_matches_pattern(hostname); + + ptr->resolved++; + + return ret; +} + +/** + * Fill last Network Viewer Dimension + * + * Fill the unique dimension that is always plotted. + * + * @param ptr the pointer for the last dimension + * @param is_outbound is this an inbound structure? + */ +static void fill_last_nv_dimension(netdata_socket_plot_t *ptr, int is_outbound) +{ + char hostname[NI_MAXHOST], service_name[NI_MAXSERV]; + char *other = { "other" }; + // We are also copying the NULL bytes to avoid warnings in new compilers + strncpy(hostname, other, 6); + strncpy(service_name, other, 6); + + ptr->family = AF_INET; + ptr->sock.protocol = 255; + ptr->flags = (!is_outbound)?NETDATA_INBOUND_DIRECTION:NETDATA_OUTBOUND_DIRECTION; + + fill_resolved_name(ptr, hostname, 10 + NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH, service_name, is_outbound); + +#ifdef NETDATA_INTERNAL_CHECKS + info("Last %s dimension added: ID = %u, IP = OTHER, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", + (is_outbound)?"outbound":"inbound", network_viewer_opt.max_dim - 1, ptr->resolved_name, + ptr->dimension_recv, ptr->dimension_sent, ptr->dimension_retransmit); +#endif +} + +/** + * Update Socket Data + * + * Update the socket information with last collected data + * + * @param sock + * @param lvalues + */ +static inline void update_socket_data(netdata_socket_t *sock, netdata_socket_t *lvalues) +{ + sock->recv_packets += lvalues->recv_packets; + sock->sent_packets += lvalues->sent_packets; + sock->recv_bytes += lvalues->recv_bytes; + sock->sent_bytes += lvalues->sent_bytes; + sock->retransmit += lvalues->retransmit; + + if (lvalues->ct > sock->ct) + sock->ct = lvalues->ct; +} + +/** + * Store socket inside avl + * + * Store the socket values inside the avl tree. + * + * @param out the structure with information used to plot charts. + * @param lvalues Values read from socket ring. + * @param lindex the index information, the real socket. + * @param family the family associated to the socket + * @param flags the connection flags + */ +static void store_socket_inside_avl(netdata_vector_plot_t *out, netdata_socket_t *lvalues, + netdata_socket_idx_t *lindex, int family, uint32_t flags) +{ + netdata_socket_plot_t test, *ret ; + + memcpy(&test.index, lindex, sizeof(netdata_socket_idx_t)); + test.flags = flags; + + ret = (netdata_socket_plot_t *) avl_search_lock(&out->tree, (avl_t *)&test); + if (ret) { + if (lvalues->ct > ret->plot.last_time) { + update_socket_data(&ret->sock, lvalues); + } + } else { + uint32_t curr = out->next; + uint32_t last = out->last; + + netdata_socket_plot_t *w = &out->plot[curr]; + + int resolved; + if (curr == last) { + if (lvalues->ct > w->plot.last_time) { + update_socket_data(&w->sock, lvalues); + } + return; + } else { + memcpy(&w->sock, lvalues, sizeof(netdata_socket_t)); + memcpy(&w->index, lindex, sizeof(netdata_socket_idx_t)); + w->family = family; + + resolved = fill_names(w, out != (netdata_vector_plot_t *)&inbound_vectors); + } + + if (!resolved) { + freez(w->resolved_name); + freez(w->dimension_sent); + freez(w->dimension_recv); + freez(w->dimension_retransmit); + + memset(w, 0, sizeof(netdata_socket_plot_t)); + + return; + } + + w->flags = flags; + netdata_socket_plot_t *check ; + check = (netdata_socket_plot_t *) avl_insert_lock(&out->tree, (avl_t *)w); + if (check != w) + error("Internal error, cannot insert the AVL tree."); + +#ifdef NETDATA_INTERNAL_CHECKS + char iptext[INET6_ADDRSTRLEN]; + if (inet_ntop(family, &w->index.daddr.addr8, iptext, sizeof(iptext))) + info("New %s dimension added: ID = %u, IP = %s, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", + (out == &inbound_vectors)?"inbound":"outbound", curr, iptext, w->resolved_name, + w->dimension_recv, w->dimension_sent, w->dimension_retransmit); +#endif + curr++; + if (curr > last) + curr = last; + out->next = curr; + } +} + +/** + * Compare Vector to store + * + * Compare input values with local address to select table to store. + * + * @param direction store inbound and outbound direction. + * @param cmp index read from hash table. + * @param proto the protocol read. + * + * @return It returns the structure with address to compare. + */ +netdata_vector_plot_t * select_vector_to_store(uint32_t *direction, netdata_socket_idx_t *cmp, uint8_t proto) +{ + if (!listen_ports) { + *direction = NETDATA_OUTBOUND_DIRECTION; + return &outbound_vectors; + } + + ebpf_network_viewer_port_list_t *move_ports = listen_ports; + while (move_ports) { + if (move_ports->protocol == proto && move_ports->first == cmp->sport) { + *direction = NETDATA_INBOUND_DIRECTION; + return &inbound_vectors; + } + + move_ports = move_ports->next; + } + + *direction = NETDATA_OUTBOUND_DIRECTION; + return &outbound_vectors; +} + +/** + * Hash accumulator + * + * @param values the values used to calculate the data. + * @param key the key to store data. + * @param family the connection family + * @param end the values size. + */ +static void hash_accumulator(netdata_socket_t *values, netdata_socket_idx_t *key, int family, int end) +{ + uint64_t bsent = 0, brecv = 0, psent = 0, precv = 0; + uint16_t retransmit = 0; + int i; + uint8_t protocol = values[0].protocol; + uint64_t ct = values[0].ct; + for (i = 1; i < end; i++) { + netdata_socket_t *w = &values[i]; + + precv += w->recv_packets; + psent += w->sent_packets; + brecv += w->recv_bytes; + bsent += w->sent_bytes; + retransmit += w->retransmit; + + if (!protocol) + protocol = w->protocol; + + if (w->ct > ct) + ct = w->ct; + } + + values[0].recv_packets += precv; + values[0].sent_packets += psent; + values[0].recv_bytes += brecv; + values[0].sent_bytes += bsent; + values[0].retransmit += retransmit; + values[0].protocol = (!protocol)?IPPROTO_TCP:protocol; + values[0].ct = ct; + + if (is_socket_allowed(key, family)) { + uint32_t dir; + netdata_vector_plot_t *table = select_vector_to_store(&dir, key, protocol); + store_socket_inside_avl(table, &values[0], key, family, dir); + } +} + +/** + * Read socket hash table + * + * Read data from hash tables created on kernel ring. + * + * @param fd the hash table with data. + * @param family the family associated to the hash table + * + * @return it returns 0 on success and -1 otherwise. + */ +static void read_socket_hash_table(int fd, int family, int network_connection) +{ + if (wait_to_plot) + return; + + netdata_socket_idx_t key = {}; + netdata_socket_idx_t next_key = {}; + + netdata_socket_t *values = socket_values; + size_t length = ebpf_nprocs*sizeof(netdata_socket_t); + int test, end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + // We need to reset the values when we are working on kernel 4.15 or newer, because kernel does not create + // values for specific processor unless it is used to store data. As result of this behavior one the next socket + // can have values from the previous one. + memset(values, 0, length); + test = bpf_map_lookup_elem(fd, &key, values); + if (test < 0) { + key = next_key; + continue; + } + + if (network_connection) { + hash_accumulator(values, &key, family, end); + } + + key = next_key; + } +} + +/** + * Fill Network Viewer Port list + * + * Fill the strcture with values read from /proc or hash table. + * + * @param out the structure where we will store data. + * @param value the ports we are listen to. + * @param proto the protocol used for this connection. + * @param in the strcuture with values read form different sources. + */ +static inline void fill_nv_port_list(ebpf_network_viewer_port_list_t *out, uint16_t value, uint16_t proto, + netdata_passive_connection_t *in) +{ + out->first = value; + out->protocol = proto; + out->pid = in->pid; + out->tgid = in->tgid; + out->connections = in->counter; +} + +/** + * Update listen table + * + * Update link list when it is necessary. + * + * @param value the ports we are listen to. + * @param proto the protocol used with port connection. + * @param in the strcuture with values read form different sources. + */ +void update_listen_table(uint16_t value, uint16_t proto, netdata_passive_connection_t *in) +{ + ebpf_network_viewer_port_list_t *w; + if (likely(listen_ports)) { + ebpf_network_viewer_port_list_t *move = listen_ports, *store = listen_ports; + while (move) { + if (move->protocol == proto && move->first == value) { + move->pid = in->pid; + move->tgid = in->tgid; + move->connections = in->counter; + return; + } + + store = move; + move = move->next; + } + + w = callocz(1, sizeof(ebpf_network_viewer_port_list_t)); + store->next = w; + } else { + w = callocz(1, sizeof(ebpf_network_viewer_port_list_t)); + + listen_ports = w; + } + fill_nv_port_list(w, value, proto, in); + +#ifdef NETDATA_INTERNAL_CHECKS + info("The network viewer is monitoring inbound connections for port %u", ntohs(value)); +#endif +} + +/** + * Read listen table + * + * Read the table with all ports that we are listen on host. + */ +static void read_listen_table() +{ + netdata_passive_connection_idx_t key = {}; + netdata_passive_connection_idx_t next_key = {}; + + int fd = socket_maps[NETDATA_SOCKET_LPORTS].map_fd; + netdata_passive_connection_t value = {}; + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + int test = bpf_map_lookup_elem(fd, &key, &value); + if (test < 0) { + key = next_key; + continue; + } + + // The correct protocol must come from kernel + update_listen_table(key.port, key.protocol, &value); + + key = next_key; + memset(&value, 0, sizeof(value)); + } + + if (next_key.port && value.pid) { + // The correct protocol must come from kernel + update_listen_table(next_key.port, next_key.protocol, &value); + } +} + +/** + * Socket read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_socket_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_socket_cleanup, ptr); + ebpf_module_t *em = (ebpf_module_t *)ptr; + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = NETDATA_SOCKET_READ_SLEEP_MS * em->update_every; + int fd_ipv4 = socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd; + int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd; + int network_connection = em->optional; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + pthread_mutex_lock(&nv_mutex); + read_listen_table(); + read_socket_hash_table(fd_ipv4, AF_INET, network_connection); + read_socket_hash_table(fd_ipv6, AF_INET6, network_connection); + wait_to_plot = 1; + pthread_mutex_unlock(&nv_mutex); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Read the hash table and store data to allocated vectors. + */ +static void read_hash_global_tables() +{ + uint64_t idx; + netdata_idx_t res[NETDATA_SOCKET_COUNTER]; + + netdata_idx_t *val = socket_hash_values; + int fd = socket_maps[NETDATA_SOCKET_GLOBAL].map_fd; + for (idx = 0; idx < NETDATA_SOCKET_COUNTER; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, val)) { + uint64_t total = 0; + int i; + int end = ebpf_nprocs; + for (i = 0; i < end; i++) + total += val[i]; + + res[idx] = total; + } else { + res[idx] = 0; + } + } + + socket_aggregated_data[NETDATA_IDX_TCP_SENDMSG].call = res[NETDATA_KEY_CALLS_TCP_SENDMSG]; + socket_aggregated_data[NETDATA_IDX_TCP_CLEANUP_RBUF].call = res[NETDATA_KEY_CALLS_TCP_CLEANUP_RBUF]; + socket_aggregated_data[NETDATA_IDX_TCP_CLOSE].call = res[NETDATA_KEY_CALLS_TCP_CLOSE]; + socket_aggregated_data[NETDATA_IDX_UDP_RECVBUF].call = res[NETDATA_KEY_CALLS_UDP_RECVMSG]; + socket_aggregated_data[NETDATA_IDX_UDP_SENDMSG].call = res[NETDATA_KEY_CALLS_UDP_SENDMSG]; + socket_aggregated_data[NETDATA_IDX_TCP_RETRANSMIT].call = res[NETDATA_KEY_TCP_RETRANSMIT]; + socket_aggregated_data[NETDATA_IDX_TCP_CONNECTION_V4].call = res[NETDATA_KEY_CALLS_TCP_CONNECT_IPV4]; + socket_aggregated_data[NETDATA_IDX_TCP_CONNECTION_V6].call = res[NETDATA_KEY_CALLS_TCP_CONNECT_IPV6]; + + socket_aggregated_data[NETDATA_IDX_TCP_SENDMSG].ecall = res[NETDATA_KEY_ERROR_TCP_SENDMSG]; + socket_aggregated_data[NETDATA_IDX_TCP_CLEANUP_RBUF].ecall = res[NETDATA_KEY_ERROR_TCP_CLEANUP_RBUF]; + socket_aggregated_data[NETDATA_IDX_UDP_RECVBUF].ecall = res[NETDATA_KEY_ERROR_UDP_RECVMSG]; + socket_aggregated_data[NETDATA_IDX_UDP_SENDMSG].ecall = res[NETDATA_KEY_ERROR_UDP_SENDMSG]; + socket_aggregated_data[NETDATA_IDX_TCP_CONNECTION_V4].ecall = res[NETDATA_KEY_ERROR_TCP_CONNECT_IPV4]; + socket_aggregated_data[NETDATA_IDX_TCP_CONNECTION_V6].ecall = res[NETDATA_KEY_ERROR_TCP_CONNECT_IPV6]; + + socket_aggregated_data[NETDATA_IDX_TCP_SENDMSG].bytes = res[NETDATA_KEY_BYTES_TCP_SENDMSG]; + socket_aggregated_data[NETDATA_IDX_TCP_CLEANUP_RBUF].bytes = res[NETDATA_KEY_BYTES_TCP_CLEANUP_RBUF]; + socket_aggregated_data[NETDATA_IDX_UDP_RECVBUF].bytes = res[NETDATA_KEY_BYTES_UDP_RECVMSG]; + socket_aggregated_data[NETDATA_IDX_UDP_SENDMSG].bytes = res[NETDATA_KEY_BYTES_UDP_SENDMSG]; +} + +/** + * Fill publish apps when necessary. + * + * @param current_pid the PID that I am updating + * @param eb the structure with data read from memory. + */ +void ebpf_socket_fill_publish_apps(uint32_t current_pid, ebpf_bandwidth_t *eb) +{ + ebpf_socket_publish_apps_t *curr = socket_bandwidth_curr[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(ebpf_socket_publish_apps_t)); + socket_bandwidth_curr[current_pid] = curr; + } + + curr->bytes_sent = eb->bytes_sent; + curr->bytes_received = eb->bytes_received; + curr->call_tcp_sent = eb->call_tcp_sent; + curr->call_tcp_received = eb->call_tcp_received; + curr->retransmit = eb->retransmit; + curr->call_udp_sent = eb->call_udp_sent; + curr->call_udp_received = eb->call_udp_received; + curr->call_close = eb->close; + curr->call_tcp_v4_connection = eb->tcp_v4_connection; + curr->call_tcp_v6_connection = eb->tcp_v6_connection; +} + +/** + * Bandwidth accumulator. + * + * @param out the vector with the values to sum + */ +void ebpf_socket_bandwidth_accumulator(ebpf_bandwidth_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + ebpf_bandwidth_t *total = &out[0]; + for (i = 1; i < end; i++) { + ebpf_bandwidth_t *move = &out[i]; + total->bytes_sent += move->bytes_sent; + total->bytes_received += move->bytes_received; + total->call_tcp_sent += move->call_tcp_sent; + total->call_tcp_received += move->call_tcp_received; + total->retransmit += move->retransmit; + total->call_udp_sent += move->call_udp_sent; + total->call_udp_received += move->call_udp_received; + total->close += move->close; + total->tcp_v4_connection += move->tcp_v4_connection; + total->tcp_v6_connection += move->tcp_v6_connection; + } +} + +/** + * Update the apps data reading information from the hash table + */ +static void ebpf_socket_update_apps_data() +{ + int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd; + ebpf_bandwidth_t *eb = bandwidth_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, eb)) { + pids = pids->next; + continue; + } + + ebpf_socket_bandwidth_accumulator(eb); + + ebpf_socket_fill_publish_apps(key, eb); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_socket_cgroup() +{ + ebpf_cgroup_target_t *ect ; + + ebpf_bandwidth_t *eb = bandwidth_vector; + int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + ebpf_bandwidth_t *out = &pids->socket; + ebpf_socket_publish_apps_t *publish = &ect->publish_socket; + if (likely(socket_bandwidth_curr) && socket_bandwidth_curr[pid]) { + ebpf_socket_publish_apps_t *in = socket_bandwidth_curr[pid]; + + publish->bytes_sent = in->bytes_sent; + publish->bytes_received = in->bytes_received; + publish->call_tcp_sent = in->call_tcp_sent; + publish->call_tcp_received = in->call_tcp_received; + publish->retransmit = in->retransmit; + publish->call_udp_sent = in->call_udp_sent; + publish->call_udp_received = in->call_udp_received; + publish->call_close = in->call_close; + publish->call_tcp_v4_connection = in->call_tcp_v4_connection; + publish->call_tcp_v6_connection = in->call_tcp_v6_connection; + } else { + if (!bpf_map_lookup_elem(fd, &pid, eb)) { + ebpf_socket_bandwidth_accumulator(eb); + + memcpy(out, eb, sizeof(ebpf_bandwidth_t)); + + publish->bytes_sent = out->bytes_sent; + publish->bytes_received = out->bytes_received; + publish->call_tcp_sent = out->call_tcp_sent; + publish->call_tcp_received = out->call_tcp_received; + publish->retransmit = out->retransmit; + publish->call_udp_sent = out->call_udp_sent; + publish->call_udp_received = out->call_udp_received; + publish->call_close = out->close; + publish->call_tcp_v4_connection = out->tcp_v4_connection; + publish->call_tcp_v6_connection = out->tcp_v6_connection; + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param fd structure used to store data + * @param pids input data + */ +static void ebpf_socket_sum_cgroup_pids(ebpf_socket_publish_apps_t *socket, struct pid_on_target2 *pids) +{ + ebpf_socket_publish_apps_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + ebpf_bandwidth_t *w = &pids->socket; + + accumulator.bytes_received += w->bytes_received; + accumulator.bytes_sent += w->bytes_sent; + accumulator.call_tcp_received += w->call_tcp_received; + accumulator.call_tcp_sent += w->call_tcp_sent; + accumulator.retransmit += w->retransmit; + accumulator.call_udp_received += w->call_udp_received; + accumulator.call_udp_sent += w->call_udp_sent; + accumulator.call_close += w->close; + accumulator.call_tcp_v4_connection += w->tcp_v4_connection; + accumulator.call_tcp_v6_connection += w->tcp_v6_connection; + + pids = pids->next; + } + + socket->bytes_sent = (accumulator.bytes_sent >= socket->bytes_sent) ? accumulator.bytes_sent : socket->bytes_sent; + socket->bytes_received = (accumulator.bytes_received >= socket->bytes_received) ? accumulator.bytes_received : socket->bytes_received; + socket->call_tcp_sent = (accumulator.call_tcp_sent >= socket->call_tcp_sent) ? accumulator.call_tcp_sent : socket->call_tcp_sent; + socket->call_tcp_received = (accumulator.call_tcp_received >= socket->call_tcp_received) ? accumulator.call_tcp_received : socket->call_tcp_received; + socket->retransmit = (accumulator.retransmit >= socket->retransmit) ? accumulator.retransmit : socket->retransmit; + socket->call_udp_sent = (accumulator.call_udp_sent >= socket->call_udp_sent) ? accumulator.call_udp_sent : socket->call_udp_sent; + socket->call_udp_received = (accumulator.call_udp_received >= socket->call_udp_received) ? accumulator.call_udp_received : socket->call_udp_received; + socket->call_close = (accumulator.call_close >= socket->call_close) ? accumulator.call_close : socket->call_close; + socket->call_tcp_v4_connection = (accumulator.call_tcp_v4_connection >= socket->call_tcp_v4_connection) ? + accumulator.call_tcp_v4_connection : socket->call_tcp_v4_connection; + socket->call_tcp_v6_connection = (accumulator.call_tcp_v6_connection >= socket->call_tcp_v6_connection) ? + accumulator.call_tcp_v6_connection : socket->call_tcp_v6_connection; +} + +/** + * Create specific socket charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_socket_charts(char *type, int update_every) +{ + int order_basis = 5300; + ebpf_create_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, + "Calls to tcp_v4_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6, + "Calls to tcp_v6_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, + "Bytes received", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, + "Bytes sent", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + socket_publish_aggregated, 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + socket_publish_aggregated, 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + "Calls to tcp_retransmit.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + "Calls to udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + "Calls to udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); +} + +/** + * Obsolete specific socket charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) +{ + int order_basis = 5300; + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "Calls to tcp_v4_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V6,"Calls to tcp_v6_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "Bytes received", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT,"Bytes sent", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "Calls to tcp_retransmit.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "Calls to udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "Calls to udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); +} + +/* + * Send Specific Swap data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_socket_data(char *type, ebpf_socket_publish_apps_t *values) +{ + write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4].name, + (long long) values->call_tcp_v4_connection); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name, + (long long) values->call_tcp_v6_connection); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, + (long long) values->bytes_sent); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, + (long long) values->bytes_received); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, + (long long) values->call_tcp_sent); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, + (long long) values->call_tcp_received); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT].name, + (long long) values->retransmit); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG].name, + (long long) values->call_udp_sent); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF].name, + (long long) values->call_udp_received); + write_end_chart(); +} + +/** + * Create Systemd Socket Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_socket_charts(int update_every) +{ + int order = 20080; + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_CONNECTION_TCP_V4, + "Calls to tcp_v4_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_CONNECTION_TCP_V6, + "Calls to tcp_v6_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_RECV, + "Bytes received", EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_SENT, + "Bytes sent", EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + "Calls to tcp_retransmit", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + "Calls to udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + "Calls to udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_systemd_socket_charts() +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v4_connection); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v6_connection); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_sent); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_received); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_sent); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_received); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.retransmit); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_sent); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_received); + } + } + write_end_chart(); +} + +/** + * Update Cgroup algorithm + * + * Change algorithm from absolute to incremental + */ +void ebpf_socket_update_cgroup_algorithm() +{ + int i; + for (i = 0; i < NETDATA_MAX_SOCKET_VECTOR; i++) { + netdata_publish_syscall_t *ptr = &socket_publish_aggregated[i]; + freez(ptr->algorithm); + ptr->algorithm = strdupz(ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +static void ebpf_socket_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_socket_sum_cgroup_pids(&ect->publish_socket, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_socket_charts(update_every); + } + ebpf_send_systemd_socket_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART)) { + ebpf_create_specific_socket_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART && ect->updated) { + ebpf_send_specific_socket_data(ect->name, &ect->publish_socket); + } else { + ebpf_obsolete_specific_socket_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART; + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/***************************************************************** + * + * FUNCTIONS WITH THE MAIN LOOP + * + *****************************************************************/ + +/** + * Main loop for this collector. + * + * @param step the number of microseconds used with heart beat + * @param em the structure with thread information + */ +static void socket_collector(usec_t step, ebpf_module_t *em) +{ + heartbeat_t hb; + heartbeat_init(&hb); + + socket_threads.thread = mallocz(sizeof(netdata_thread_t)); + socket_threads.start_routine = ebpf_socket_read_hash; + + netdata_thread_create(socket_threads.thread, socket_threads.name, + NETDATA_THREAD_OPTION_DEFAULT, ebpf_socket_read_hash, em); + + int cgroups = em->cgroup_charts; + if (cgroups) + ebpf_socket_update_cgroup_algorithm(); + + int socket_global_enabled = em->global_charts; + int network_connection = em->optional; + int update_every = em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t socket_apps_enabled = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (socket_global_enabled) + read_hash_global_tables(); + + if (socket_apps_enabled) + ebpf_socket_update_apps_data(); + + if (cgroups) + ebpf_update_socket_cgroup(); + + calculate_nv_plot(); + + pthread_mutex_lock(&lock); + if (socket_global_enabled) + ebpf_socket_send_data(em); + + if (socket_apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_socket_send_apps_data(em, apps_groups_root_target); + + if (cgroups) + ebpf_socket_send_cgroup_data(update_every); + + fflush(stdout); + + if (network_connection) { + // We are calling fflush many times, because when we have a lot of dimensions + // we began to have not expected outputs and Netdata closed the plugin. + pthread_mutex_lock(&nv_mutex); + ebpf_socket_create_nv_charts(&inbound_vectors, update_every); + fflush(stdout); + ebpf_socket_send_nv_data(&inbound_vectors); + + ebpf_socket_create_nv_charts(&outbound_vectors, update_every); + fflush(stdout); + ebpf_socket_send_nv_data(&outbound_vectors); + wait_to_plot = 0; + pthread_mutex_unlock(&nv_mutex); + + } + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * FUNCTIONS TO START THREAD + * + *****************************************************************/ + +/** + * Allocate vectors used with this thread. + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_socket_allocate_global_vectors(int apps) +{ + memset(socket_aggregated_data, 0 ,NETDATA_MAX_SOCKET_VECTOR * sizeof(netdata_syscall_stat_t)); + memset(socket_publish_aggregated, 0 ,NETDATA_MAX_SOCKET_VECTOR * sizeof(netdata_publish_syscall_t)); + socket_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + + if (apps) + socket_bandwidth_curr = callocz((size_t)pid_max, sizeof(ebpf_socket_publish_apps_t *)); + + bandwidth_vector = callocz((size_t)ebpf_nprocs, sizeof(ebpf_bandwidth_t)); + + socket_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_socket_t)); + inbound_vectors.plot = callocz(network_viewer_opt.max_dim, sizeof(netdata_socket_plot_t)); + outbound_vectors.plot = callocz(network_viewer_opt.max_dim, sizeof(netdata_socket_plot_t)); +} + +/** + * Initialize Inbound and Outbound + * + * Initialize the common outbound and inbound sockets. + */ +static void initialize_inbound_outbound() +{ + inbound_vectors.last = network_viewer_opt.max_dim - 1; + outbound_vectors.last = inbound_vectors.last; + fill_last_nv_dimension(&inbound_vectors.plot[inbound_vectors.last], 0); + fill_last_nv_dimension(&outbound_vectors.plot[outbound_vectors.last], 1); +} + +/***************************************************************** + * + * EBPF SOCKET THREAD + * + *****************************************************************/ + +/** + * Fill Port list + * + * @param out a pointer to the link list. + * @param in the structure that will be linked. + */ +static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_network_viewer_port_list_t *in) +{ + if (likely(*out)) { + ebpf_network_viewer_port_list_t *move = *out, *store = *out; + uint16_t first = ntohs(in->first); + uint16_t last = ntohs(in->last); + while (move) { + uint16_t cmp_first = ntohs(move->first); + uint16_t cmp_last = ntohs(move->last); + if (cmp_first <= first && first <= cmp_last && + cmp_first <= last && last <= cmp_last ) { + info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.", + first, last, cmp_first, cmp_last); + freez(in->value); + freez(in); + return; + } else if (first <= cmp_first && cmp_first <= last && + first <= cmp_last && cmp_last <= last) { + info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.", + first, last, cmp_first, cmp_last); + freez(move->value); + move->value = in->value; + move->first = in->first; + move->last = in->last; + freez(in); + return; + } + + store = move; + move = move->next; + } + + store->next = in; + } else { + *out = in; + } + +#ifdef NETDATA_INTERNAL_CHECKS + info("Adding values %s( %u, %u) to %s port list used on network viewer", + in->value, ntohs(in->first), ntohs(in->last), + (*out == network_viewer_opt.included_port)?"included":"excluded"); +#endif +} + +/** + * Parse Service List + * + * @param out a pointer to store the link list + * @param service the service used to create the structure that will be linked. + */ +static void parse_service_list(void **out, char *service) +{ + ebpf_network_viewer_port_list_t **list = (ebpf_network_viewer_port_list_t **)out; + struct servent *serv = getservbyname((const char *)service, "tcp"); + if (!serv) + serv = getservbyname((const char *)service, "udp"); + + if (!serv) { + info("Cannot resolv the service '%s' with protocols TCP and UDP, it will be ignored", service); + return; + } + + ebpf_network_viewer_port_list_t *w = callocz(1, sizeof(ebpf_network_viewer_port_list_t)); + w->value = strdupz(service); + w->hash = simple_hash(service); + + w->first = w->last = (uint16_t)serv->s_port; + + fill_port_list(list, w); +} + +/** + * Netmask + * + * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h) + * + * @param prefix create the netmask based in the CIDR value. + * + * @return + */ +static inline in_addr_t netmask(int prefix) { + + if (prefix == 0) + return (~((in_addr_t) - 1)); + else + return (in_addr_t)(~((1 << (32 - prefix)) - 1)); + +} + +/** + * Broadcast + * + * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h) + * + * @param addr is the ip address + * @param prefix is the CIDR value. + * + * @return It returns the last address of the range + */ +static inline in_addr_t broadcast(in_addr_t addr, int prefix) +{ + return (addr | ~netmask(prefix)); +} + +/** + * Network + * + * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h) + * + * @param addr is the ip address + * @param prefix is the CIDR value. + * + * @return It returns the first address of the range. + */ +static inline in_addr_t ipv4_network(in_addr_t addr, int prefix) +{ + return (addr & netmask(prefix)); +} + +/** + * IP to network long + * + * @param dst the vector to store the result + * @param ip the source ip given by our users. + * @param domain the ip domain (IPV4 or IPV6) + * @param source the original string + * + * @return it returns 0 on success and -1 otherwise. + */ +static inline int ip2nl(uint8_t *dst, char *ip, int domain, char *source) +{ + if (inet_pton(domain, ip, dst) <= 0) { + error("The address specified (%s) is invalid ", source); + return -1; + } + + return 0; +} + +/** + * Get IPV6 Last Address + * + * @param out the address to store the last address. + * @param in the address used to do the math. + * @param prefix number of bits used to calculate the address + */ +static void get_ipv6_last_addr(union netdata_ip_t *out, union netdata_ip_t *in, uint64_t prefix) +{ + uint64_t mask,tmp; + uint64_t ret[2]; + memcpy(ret, in->addr32, sizeof(union netdata_ip_t)); + + if (prefix == 128) { + memcpy(out->addr32, in->addr32, sizeof(union netdata_ip_t)); + return; + } else if (!prefix) { + ret[0] = ret[1] = 0xFFFFFFFFFFFFFFFF; + memcpy(out->addr32, ret, sizeof(union netdata_ip_t)); + return; + } else if (prefix <= 64) { + ret[1] = 0xFFFFFFFFFFFFFFFFULL; + + tmp = be64toh(ret[0]); + if (prefix > 0) { + mask = 0xFFFFFFFFFFFFFFFFULL << (64 - prefix); + tmp |= ~mask; + } + ret[0] = htobe64(tmp); + } else { + mask = 0xFFFFFFFFFFFFFFFFULL << (128 - prefix); + tmp = be64toh(ret[1]); + tmp |= ~mask; + ret[1] = htobe64(tmp); + } + + memcpy(out->addr32, ret, sizeof(union netdata_ip_t)); +} + +/** + * Calculate ipv6 first address + * + * @param out the address to store the first address. + * @param in the address used to do the math. + * @param prefix number of bits used to calculate the address + */ +static void get_ipv6_first_addr(union netdata_ip_t *out, union netdata_ip_t *in, uint64_t prefix) +{ + uint64_t mask,tmp; + uint64_t ret[2]; + + memcpy(ret, in->addr32, sizeof(union netdata_ip_t)); + + if (prefix == 128) { + memcpy(out->addr32, in->addr32, sizeof(union netdata_ip_t)); + return; + } else if (!prefix) { + ret[0] = ret[1] = 0; + memcpy(out->addr32, ret, sizeof(union netdata_ip_t)); + return; + } else if (prefix <= 64) { + ret[1] = 0ULL; + + tmp = be64toh(ret[0]); + if (prefix > 0) { + mask = 0xFFFFFFFFFFFFFFFFULL << (64 - prefix); + tmp &= mask; + } + ret[0] = htobe64(tmp); + } else { + mask = 0xFFFFFFFFFFFFFFFFULL << (128 - prefix); + tmp = be64toh(ret[1]); + tmp &= mask; + ret[1] = htobe64(tmp); + } + + memcpy(out->addr32, ret, sizeof(union netdata_ip_t)); +} + +/** + * Is ip inside the range + * + * Check if the ip is inside a IP range + * + * @param rfirst the first ip address of the range + * @param rlast the last ip address of the range + * @param cmpfirst the first ip to compare + * @param cmplast the last ip to compare + * @param family the IP family + * + * @return It returns 1 if the IP is inside the range and 0 otherwise + */ +static int is_ip_inside_range(union netdata_ip_t *rfirst, union netdata_ip_t *rlast, + union netdata_ip_t *cmpfirst, union netdata_ip_t *cmplast, int family) +{ + if (family == AF_INET) { + if (ntohl(rfirst->addr32[0]) <= ntohl(cmpfirst->addr32[0]) && + ntohl(rlast->addr32[0]) >= ntohl(cmplast->addr32[0])) + return 1; + } else { + if (memcmp(rfirst->addr8, cmpfirst->addr8, sizeof(union netdata_ip_t)) <= 0 && + memcmp(rlast->addr8, cmplast->addr8, sizeof(union netdata_ip_t)) >= 0) { + return 1; + } + + } + return 0; +} + +/** + * Fill IP list + * + * @param out a pointer to the link list. + * @param in the structure that will be linked. + */ +void fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table) +{ +#ifndef NETDATA_INTERNAL_CHECKS + UNUSED(table); +#endif + if (likely(*out)) { + ebpf_network_viewer_ip_list_t *move = *out, *store = *out; + while (move) { + if (in->ver == move->ver && is_ip_inside_range(&move->first, &move->last, &in->first, &in->last, in->ver)) { + info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.", + in->value, move->value); + freez(in->value); + freez(in); + return; + } + store = move; + move = move->next; + } + + store->next = in; + } else { + *out = in; + } + +#ifdef NETDATA_INTERNAL_CHECKS + char first[512], last[512]; + if (in->ver == AF_INET) { + if (inet_ntop(AF_INET, in->first.addr8, first, INET_ADDRSTRLEN) && + inet_ntop(AF_INET, in->last.addr8, last, INET_ADDRSTRLEN)) + info("Adding values %s - %s to %s IP list \"%s\" used on network viewer", + first, last, + (*out == network_viewer_opt.included_ips)?"included":"excluded", + table); + } else { + if (inet_ntop(AF_INET6, in->first.addr8, first, INET6_ADDRSTRLEN) && + inet_ntop(AF_INET6, in->last.addr8, last, INET6_ADDRSTRLEN)) + info("Adding values %s - %s to %s IP list \"%s\" used on network viewer", + first, last, + (*out == network_viewer_opt.included_ips)?"included":"excluded", + table); + } +#endif +} + +/** + * Parse IP List + * + * Parse IP list and link it. + * + * @param out a pointer to store the link list + * @param ip the value given as parameter + */ +static void parse_ip_list(void **out, char *ip) +{ + ebpf_network_viewer_ip_list_t **list = (ebpf_network_viewer_ip_list_t **)out; + + char *ipdup = strdupz(ip); + union netdata_ip_t first = { }; + union netdata_ip_t last = { }; + char *is_ipv6; + if (*ip == '*' && *(ip+1) == '\0') { + memset(first.addr8, 0, sizeof(first.addr8)); + memset(last.addr8, 0xFF, sizeof(last.addr8)); + + is_ipv6 = ip; + + clean_ip_structure(list); + goto storethisip; + } + + char *end = ip; + // Move while I cannot find a separator + while (*end && *end != '/' && *end != '-') end++; + + // We will use only the classic IPV6 for while, but we could consider the base 85 in a near future + // https://tools.ietf.org/html/rfc1924 + is_ipv6 = strchr(ip, ':'); + + int select; + if (*end && !is_ipv6) { // IPV4 range + select = (*end == '/') ? 0 : 1; + *end++ = '\0'; + if (*end == '!') { + info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + goto cleanipdup; + } + + if (!select) { // CIDR + select = ip2nl(first.addr8, ip, AF_INET, ipdup); + if (select) + goto cleanipdup; + + select = (int) str2i(end); + if (select < NETDATA_MINIMUM_IPV4_CIDR || select > NETDATA_MAXIMUM_IPV4_CIDR) { + info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip); + goto cleanipdup; + } + + last.addr32[0] = htonl(broadcast(ntohl(first.addr32[0]), select)); + // This was added to remove + // https://app.codacy.com/manual/netdata/netdata/pullRequest?prid=5810941&bid=19021977 + UNUSED(last.addr32[0]); + + uint32_t ipv4_test = htonl(ipv4_network(ntohl(first.addr32[0]), select)); + if (first.addr32[0] != ipv4_test) { + first.addr32[0] = ipv4_test; + struct in_addr ipv4_convert; + ipv4_convert.s_addr = ipv4_test; + char ipv4_msg[INET_ADDRSTRLEN]; + if(inet_ntop(AF_INET, &ipv4_convert, ipv4_msg, INET_ADDRSTRLEN)) + info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg); + } + } else { // Range + select = ip2nl(first.addr8, ip, AF_INET, ipdup); + if (select) + goto cleanipdup; + + select = ip2nl(last.addr8, end, AF_INET, ipdup); + if (select) + goto cleanipdup; + } + + if (htonl(first.addr32[0]) > htonl(last.addr32[0])) { + info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", + ipdup); + goto cleanipdup; + } + } else if (is_ipv6) { // IPV6 + if (!*end) { // Unique + select = ip2nl(first.addr8, ip, AF_INET6, ipdup); + if (select) + goto cleanipdup; + + memcpy(last.addr8, first.addr8, sizeof(first.addr8)); + } else if (*end == '-') { + *end++ = 0x00; + if (*end == '!') { + info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + goto cleanipdup; + } + + select = ip2nl(first.addr8, ip, AF_INET6, ipdup); + if (select) + goto cleanipdup; + + select = ip2nl(last.addr8, end, AF_INET6, ipdup); + if (select) + goto cleanipdup; + } else { // CIDR + *end++ = 0x00; + if (*end == '!') { + info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + goto cleanipdup; + } + + select = str2i(end); + if (select < 0 || select > 128) { + info("The CIDR %s is not valid, the address %s will be ignored.", end, ip); + goto cleanipdup; + } + + uint64_t prefix = (uint64_t)select; + select = ip2nl(first.addr8, ip, AF_INET6, ipdup); + if (select) + goto cleanipdup; + + get_ipv6_last_addr(&last, &first, prefix); + + union netdata_ip_t ipv6_test; + get_ipv6_first_addr(&ipv6_test, &first, prefix); + + if (memcmp(first.addr8, ipv6_test.addr8, sizeof(union netdata_ip_t)) != 0) { + memcpy(first.addr8, ipv6_test.addr8, sizeof(union netdata_ip_t)); + + struct in6_addr ipv6_convert; + memcpy(ipv6_convert.s6_addr, ipv6_test.addr8, sizeof(union netdata_ip_t)); + + char ipv6_msg[INET6_ADDRSTRLEN]; + if(inet_ntop(AF_INET6, &ipv6_convert, ipv6_msg, INET6_ADDRSTRLEN)) + info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg); + } + } + + if ((be64toh(*(uint64_t *)&first.addr32[2]) > be64toh(*(uint64_t *)&last.addr32[2]) && + !memcmp(first.addr32, last.addr32, 2*sizeof(uint32_t))) || + (be64toh(*(uint64_t *)&first.addr32) > be64toh(*(uint64_t *)&last.addr32)) ) { + info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", + ipdup); + goto cleanipdup; + } + } else { // Unique ip + select = ip2nl(first.addr8, ip, AF_INET, ipdup); + if (select) + goto cleanipdup; + + memcpy(last.addr8, first.addr8, sizeof(first.addr8)); + } + + ebpf_network_viewer_ip_list_t *store; + + storethisip: + store = callocz(1, sizeof(ebpf_network_viewer_ip_list_t)); + store->value = ipdup; + store->hash = simple_hash(ipdup); + store->ver = (uint8_t)(!is_ipv6)?AF_INET:AF_INET6; + memcpy(store->first.addr8, first.addr8, sizeof(first.addr8)); + memcpy(store->last.addr8, last.addr8, sizeof(last.addr8)); + + fill_ip_list(list, store, "socket"); + return; + +cleanipdup: + freez(ipdup); +} + +/** + * Parse IP Range + * + * Parse the IP ranges given and create Network Viewer IP Structure + * + * @param ptr is a pointer with the text to parse. + */ +static void parse_ips(char *ptr) +{ + // No value + if (unlikely(!ptr)) + return; + + while (likely(ptr)) { + // Move forward until next valid character + while (isspace(*ptr)) ptr++; + + // No valid value found + if (unlikely(!*ptr)) + return; + + // Find space that ends the list + char *end = strchr(ptr, ' '); + if (end) { + *end++ = '\0'; + } + + int neg = 0; + if (*ptr == '!') { + neg++; + ptr++; + } + + if (isascii(*ptr)) { // Parse port + parse_ip_list((!neg)?(void **)&network_viewer_opt.included_ips:(void **)&network_viewer_opt.excluded_ips, + ptr); + } + + ptr = end; + } +} + + + +/** + * Parse port list + * + * Parse an allocated port list with the range given + * + * @param out a pointer to store the link list + * @param range the informed range for the user. + */ +static void parse_port_list(void **out, char *range) +{ + int first, last; + ebpf_network_viewer_port_list_t **list = (ebpf_network_viewer_port_list_t **)out; + + char *copied = strdupz(range); + if (*range == '*' && *(range+1) == '\0') { + first = 1; + last = 65535; + + clean_port_structure(list); + goto fillenvpl; + } + + char *end = range; + //Move while I cannot find a separator + while (*end && *end != ':' && *end != '-') end++; + + //It has a range + if (likely(*end)) { + *end++ = '\0'; + if (*end == '!') { + info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied); + freez(copied); + return; + } + last = str2i((const char *)end); + } else { + last = 0; + } + + first = str2i((const char *)range); + if (first < NETDATA_MINIMUM_PORT_VALUE || first > NETDATA_MAXIMUM_PORT_VALUE) { + info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied); + freez(copied); + return; + } + + if (!last) + last = first; + + if (last < NETDATA_MINIMUM_PORT_VALUE || last > NETDATA_MAXIMUM_PORT_VALUE) { + info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied); + freez(copied); + return; + } + + if (first > last) { + info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied); + freez(copied); + return; + } + + ebpf_network_viewer_port_list_t *w; +fillenvpl: + w = callocz(1, sizeof(ebpf_network_viewer_port_list_t)); + w->value = copied; + w->hash = simple_hash(copied); + w->first = (uint16_t)htons((uint16_t)first); + w->last = (uint16_t)htons((uint16_t)last); + w->cmp_first = (uint16_t)first; + w->cmp_last = (uint16_t)last; + + fill_port_list(list, w); +} + +/** + * Read max dimension. + * + * Netdata plot two dimensions per connection, so it is necessary to adjust the values. + * + * @param cfg the configuration structure + */ +static void read_max_dimension(struct config *cfg) +{ + int maxdim ; + maxdim = (int) appconfig_get_number(cfg, + EBPF_NETWORK_VIEWER_SECTION, + EBPF_MAXIMUM_DIMENSIONS, + NETDATA_NV_CAP_VALUE); + if (maxdim < 0) { + error("'maximum dimensions = %d' must be a positive number, Netdata will change for default value %ld.", + maxdim, NETDATA_NV_CAP_VALUE); + maxdim = NETDATA_NV_CAP_VALUE; + } + + maxdim /= 2; + if (!maxdim) { + info("The number of dimensions is too small (%u), we are setting it to minimum 2", network_viewer_opt.max_dim); + network_viewer_opt.max_dim = 1; + return; + } + + network_viewer_opt.max_dim = (uint32_t)maxdim; +} + +/** + * Parse Port Range + * + * Parse the port ranges given and create Network Viewer Port Structure + * + * @param ptr is a pointer with the text to parse. + */ +static void parse_ports(char *ptr) +{ + // No value + if (unlikely(!ptr)) + return; + + while (likely(ptr)) { + // Move forward until next valid character + while (isspace(*ptr)) ptr++; + + // No valid value found + if (unlikely(!*ptr)) + return; + + // Find space that ends the list + char *end = strchr(ptr, ' '); + if (end) { + *end++ = '\0'; + } + + int neg = 0; + if (*ptr == '!') { + neg++; + ptr++; + } + + if (isdigit(*ptr)) { // Parse port + parse_port_list((!neg)?(void **)&network_viewer_opt.included_port:(void **)&network_viewer_opt.excluded_port, + ptr); + } else if (isalpha(*ptr)) { // Parse service + parse_service_list((!neg)?(void **)&network_viewer_opt.included_port:(void **)&network_viewer_opt.excluded_port, + ptr); + } else if (*ptr == '*') { // All + parse_port_list((!neg)?(void **)&network_viewer_opt.included_port:(void **)&network_viewer_opt.excluded_port, + ptr); + } + + ptr = end; + } +} + +/** + * Link hostname + * + * @param out is the output link list + * @param in the hostname to add to list. + */ +static void link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_network_viewer_hostname_list_t *in) +{ + if (likely(*out)) { + ebpf_network_viewer_hostname_list_t *move = *out; + for (; move->next ; move = move->next ) { + if (move->hash == in->hash && !strcmp(move->value, in->value)) { + info("The hostname %s was already inserted, it will be ignored.", in->value); + freez(in->value); + simple_pattern_free(in->value_pattern); + freez(in); + return; + } + } + + move->next = in; + } else { + *out = in; + } +#ifdef NETDATA_INTERNAL_CHECKS + info("Adding value %s to %s hostname list used on network viewer", + in->value, + (*out == network_viewer_opt.included_hostnames)?"included":"excluded"); +#endif +} + +/** + * Link Hostnames + * + * Parse the list of hostnames to create the link list. + * This is not associated with the IP, because simple patterns like *example* cannot be resolved to IP. + * + * @param out is the output link list + * @param parse is a pointer with the text to parser. + */ +static void link_hostnames(char *parse) +{ + // No value + if (unlikely(!parse)) + return; + + while (likely(parse)) { + // Find the first valid value + while (isspace(*parse)) parse++; + + // No valid value found + if (unlikely(!*parse)) + return; + + // Find space that ends the list + char *end = strchr(parse, ' '); + if (end) { + *end++ = '\0'; + } + + int neg = 0; + if (*parse == '!') { + neg++; + parse++; + } + + ebpf_network_viewer_hostname_list_t *hostname = callocz(1 , sizeof(ebpf_network_viewer_hostname_list_t)); + hostname->value = strdupz(parse); + hostname->hash = simple_hash(parse); + hostname->value_pattern = simple_pattern_create(parse, NULL, SIMPLE_PATTERN_EXACT); + + link_hostname((!neg)?&network_viewer_opt.included_hostnames:&network_viewer_opt.excluded_hostnames, + hostname); + + parse = end; + } +} + +/** + * Parse network viewer section + * + * @param cfg the configuration structure + */ +void parse_network_viewer_section(struct config *cfg) +{ + read_max_dimension(cfg); + + network_viewer_opt.hostname_resolution_enabled = appconfig_get_boolean(cfg, + EBPF_NETWORK_VIEWER_SECTION, + EBPF_CONFIG_RESOLVE_HOSTNAME, + CONFIG_BOOLEAN_NO); + + network_viewer_opt.service_resolution_enabled = appconfig_get_boolean(cfg, + EBPF_NETWORK_VIEWER_SECTION, + EBPF_CONFIG_RESOLVE_SERVICE, + CONFIG_BOOLEAN_NO); + + char *value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_PORTS, NULL); + parse_ports(value); + + if (network_viewer_opt.hostname_resolution_enabled) { + value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_HOSTNAMES, NULL); + link_hostnames(value); + } else { + info("Name resolution is disabled, collector will not parser \"hostnames\" list."); + } + + value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, + "ips", "!127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 !::1/128"); + parse_ips(value); +} + +/** + * Link dimension name + * + * Link user specified names inside a link list. + * + * @param port the port number associated to the dimension name. + * @param hash the calculated hash for the dimension name. + * @param name the dimension name. + */ +static void link_dimension_name(char *port, uint32_t hash, char *value) +{ + int test = str2i(port); + if (test < NETDATA_MINIMUM_PORT_VALUE || test > NETDATA_MAXIMUM_PORT_VALUE){ + error("The dimension given (%s = %s) has an invalid value and it will be ignored.", port, value); + return; + } + + ebpf_network_viewer_dim_name_t *w; + w = callocz(1, sizeof(ebpf_network_viewer_dim_name_t)); + + w->name = strdupz(value); + w->hash = hash; + + w->port = (uint16_t) htons(test); + + ebpf_network_viewer_dim_name_t *names = network_viewer_opt.names; + if (unlikely(!names)) { + network_viewer_opt.names = w; + } else { + for (; names->next; names = names->next) { + if (names->port == w->port) { + info("Duplicated definition for a service, the name %s will be ignored. ", names->name); + freez(names->name); + names->name = w->name; + names->hash = w->hash; + freez(w); + return; + } + } + names->next = w; + } + +#ifdef NETDATA_INTERNAL_CHECKS + info("Adding values %s( %u) to dimension name list used on network viewer", w->name, htons(w->port)); +#endif +} + +/** + * Parse service Name section. + * + * This function gets the values that will be used to overwrite dimensions. + * + * @param cfg the configuration structure + */ +void parse_service_name_section(struct config *cfg) +{ + struct section *co = appconfig_get_section(cfg, EBPF_SERVICE_NAME_SECTION); + if (co) { + struct config_option *cv; + for (cv = co->values; cv ; cv = cv->next) { + link_dimension_name(cv->name, cv->hash, cv->value); + } + } + + // Always associated the default port to Netdata + ebpf_network_viewer_dim_name_t *names = network_viewer_opt.names; + if (names) { + uint16_t default_port = htons(19999); + while (names) { + if (names->port == default_port) + return; + + names = names->next; + } + } + + char *port_string = getenv("NETDATA_LISTEN_PORT"); + if (port_string) { + // if variable has an invalid value, we assume netdata is using 19999 + int default_port = str2i(port_string); + if (default_port > 0 && default_port < 65536) + link_dimension_name(port_string, simple_hash(port_string), "Netdata"); + } +} + +void parse_table_size_options(struct config *cfg) +{ + socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_BANDWIDTH_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED); + + socket_maps[NETDATA_SOCKET_TABLE_IPV4].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_IPV4_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED); + + socket_maps[NETDATA_SOCKET_TABLE_IPV6].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_IPV6_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED); + + socket_maps[NETDATA_SOCKET_TABLE_UDP].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_UDP_SIZE, NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED); +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_socket_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = socket_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_socket_load_and_attach(bpf_obj, em); + } +#endif + + if (ret) { + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + } + + return ret; +} + +/** + * Socket thread + * + * Thread used to generate socket charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_socket_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_socket_exit, ptr); + + memset(&inbound_vectors.tree, 0, sizeof(avl_tree_lock)); + memset(&outbound_vectors.tree, 0, sizeof(avl_tree_lock)); + avl_init_lock(&inbound_vectors.tree, compare_sockets); + avl_init_lock(&outbound_vectors.tree, compare_sockets); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = socket_maps; + + parse_network_viewer_section(&socket_config); + parse_service_name_section(&socket_config); + parse_table_size_options(&socket_config); + + if (pthread_mutex_init(&nv_mutex, NULL)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + error("Cannot initialize local mutex"); + goto endsocket; + } + + ebpf_socket_allocate_global_vectors(em->apps_charts); + initialize_inbound_outbound(); + + if (running_on_kernel < NETDATA_EBPF_KERNEL_5_0) + em->mode = MODE_ENTRY; + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_socket_load_bpf(em)) { + em->enabled = CONFIG_BOOLEAN_NO; + pthread_mutex_unlock(&lock); + goto endsocket; + } + + int algorithms[NETDATA_MAX_SOCKET_VECTOR] = { + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX + }; + ebpf_global_labels( + socket_aggregated_data, socket_publish_aggregated, socket_dimension_names, socket_id_names, + algorithms, NETDATA_MAX_SOCKET_VECTOR); + + pthread_mutex_lock(&lock); + ebpf_create_global_charts(em); + + ebpf_update_stats(&plugin_statistics, em); + + pthread_mutex_unlock(&lock); + + socket_collector((usec_t)(em->update_every * USEC_PER_SEC), em); + +endsocket: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h new file mode 100644 index 0000000..ca6b193 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#ifndef NETDATA_EBPF_SOCKET_H +#define NETDATA_EBPF_SOCKET_H 1 +#include +#include "libnetdata/avl/avl.h" + +// Module name +#define NETDATA_EBPF_MODULE_NAME_SOCKET "socket" + +// Vector indexes +#define NETDATA_UDP_START 3 + +#define NETDATA_SOCKET_READ_SLEEP_MS 800000ULL + +// config file +#define NETDATA_NETWORK_CONFIG_FILE "network.conf" +#define EBPF_NETWORK_VIEWER_SECTION "network connections" +#define EBPF_SERVICE_NAME_SECTION "service name" +#define EBPF_CONFIG_RESOLVE_HOSTNAME "resolve hostnames" +#define EBPF_CONFIG_RESOLVE_SERVICE "resolve service names" +#define EBPF_CONFIG_PORTS "ports" +#define EBPF_CONFIG_HOSTNAMES "hostnames" +#define EBPF_CONFIG_BANDWIDTH_SIZE "bandwidth table size" +#define EBPF_CONFIG_IPV4_SIZE "ipv4 connection table size" +#define EBPF_CONFIG_IPV6_SIZE "ipv6 connection table size" +#define EBPF_CONFIG_UDP_SIZE "udp connection table size" +#define EBPF_MAXIMUM_DIMENSIONS "maximum dimensions" + +enum ebpf_socket_table_list { + NETDATA_SOCKET_TABLE_BANDWIDTH, + NETDATA_SOCKET_GLOBAL, + NETDATA_SOCKET_LPORTS, + NETDATA_SOCKET_TABLE_IPV4, + NETDATA_SOCKET_TABLE_IPV6, + NETDATA_SOCKET_TABLE_UDP, + NETDATA_SOCKET_TABLE_CTRL +}; + +enum ebpf_socket_publish_index { + NETDATA_IDX_TCP_SENDMSG, + NETDATA_IDX_TCP_CLEANUP_RBUF, + NETDATA_IDX_TCP_CLOSE, + NETDATA_IDX_UDP_RECVBUF, + NETDATA_IDX_UDP_SENDMSG, + NETDATA_IDX_TCP_RETRANSMIT, + NETDATA_IDX_TCP_CONNECTION_V4, + NETDATA_IDX_TCP_CONNECTION_V6, + NETDATA_IDX_INCOMING_CONNECTION_TCP, + NETDATA_IDX_INCOMING_CONNECTION_UDP, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_MAX_SOCKET_VECTOR +}; + +enum socket_functions { + NETDATA_FCNT_INET_CSK_ACCEPT, + NETDATA_FCNT_TCP_RETRANSMIT, + NETDATA_FCNT_CLEANUP_RBUF, + NETDATA_FCNT_TCP_CLOSE, + NETDATA_FCNT_UDP_RECEVMSG, + NETDATA_FCNT_TCP_SENDMSG, + NETDATA_FCNT_UDP_SENDMSG, + NETDATA_FCNT_TCP_V4_CONNECT, + NETDATA_FCNT_TCP_V6_CONNECT +}; + +typedef enum ebpf_socket_idx { + NETDATA_KEY_CALLS_TCP_SENDMSG, + NETDATA_KEY_ERROR_TCP_SENDMSG, + NETDATA_KEY_BYTES_TCP_SENDMSG, + + NETDATA_KEY_CALLS_TCP_CLEANUP_RBUF, + NETDATA_KEY_ERROR_TCP_CLEANUP_RBUF, + NETDATA_KEY_BYTES_TCP_CLEANUP_RBUF, + + NETDATA_KEY_CALLS_TCP_CLOSE, + + NETDATA_KEY_CALLS_UDP_RECVMSG, + NETDATA_KEY_ERROR_UDP_RECVMSG, + NETDATA_KEY_BYTES_UDP_RECVMSG, + + NETDATA_KEY_CALLS_UDP_SENDMSG, + NETDATA_KEY_ERROR_UDP_SENDMSG, + NETDATA_KEY_BYTES_UDP_SENDMSG, + + NETDATA_KEY_TCP_RETRANSMIT, + + NETDATA_KEY_CALLS_TCP_CONNECT_IPV4, + NETDATA_KEY_ERROR_TCP_CONNECT_IPV4, + + NETDATA_KEY_CALLS_TCP_CONNECT_IPV6, + NETDATA_KEY_ERROR_TCP_CONNECT_IPV6, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_SOCKET_COUNTER +} ebpf_socket_index_t; + +#define NETDATA_SOCKET_KERNEL_FUNCTIONS "kernel" +#define NETDATA_NETWORK_CONNECTIONS_GROUP "network connections" +#define NETDATA_CGROUP_NET_GROUP "network (eBPF)" + +// Global chart name +#define NETDATA_TCP_OUTBOUND_CONNECTIONS "tcp_outbound_conn" +#define NETDATA_INBOUND_CONNECTIONS "inbound_conn" +#define NETDATA_TCP_FUNCTION_COUNT "tcp_functions" +#define NETDATA_TCP_FUNCTION_BITS "total_tcp_bandwidth" +#define NETDATA_TCP_FUNCTION_ERROR "tcp_error" +#define NETDATA_TCP_RETRANSMIT "tcp_retransmit" +#define NETDATA_UDP_FUNCTION_COUNT "udp_functions" +#define NETDATA_UDP_FUNCTION_BITS "total_udp_bandwidth" +#define NETDATA_UDP_FUNCTION_ERROR "udp_error" + +// Charts created on Apps submenu +#define NETDATA_NET_APPS_CONNECTION_TCP_V4 "outbound_conn_v4" +#define NETDATA_NET_APPS_CONNECTION_TCP_V6 "outbound_conn_v6" +#define NETDATA_NET_APPS_BANDWIDTH_SENT "total_bandwidth_sent" +#define NETDATA_NET_APPS_BANDWIDTH_RECV "total_bandwidth_recv" +#define NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS "bandwidth_tcp_send" +#define NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS "bandwidth_tcp_recv" +#define NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT "bandwidth_tcp_retransmit" +#define NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS "bandwidth_udp_send" +#define NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS "bandwidth_udp_recv" + +// Network viewer charts +#define NETDATA_NV_OUTBOUND_BYTES "outbound_bytes" +#define NETDATA_NV_OUTBOUND_PACKETS "outbound_packets" +#define NETDATA_NV_OUTBOUND_RETRANSMIT "outbound_retransmit" +#define NETDATA_NV_INBOUND_BYTES "inbound_bytes" +#define NETDATA_NV_INBOUND_PACKETS "inbound_packets" + +// Port range +#define NETDATA_MINIMUM_PORT_VALUE 1 +#define NETDATA_MAXIMUM_PORT_VALUE 65535 +#define NETDATA_COMPILED_CONNECTIONS_ALLOWED 65535U +#define NETDATA_MAXIMUM_CONNECTIONS_ALLOWED 16384U +#define NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED 8192U +#define NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED 4096U + +#define NETDATA_MINIMUM_IPV4_CIDR 0 +#define NETDATA_MAXIMUM_IPV4_CIDR 32 + +// Contexts +#define NETDATA_CGROUP_TCP_V4_CONN_CONTEXT "cgroup.net_conn_ipv4" +#define NETDATA_CGROUP_TCP_V6_CONN_CONTEXT "cgroup.net_conn_ipv6" +#define NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT "cgroup.net_bytes_recv" +#define NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT "cgroup.net_bytes_send" +#define NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT "cgroup.net_tcp_recv" +#define NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT "cgroup.net_tcp_send" +#define NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT "cgroup.net_retransmit" +#define NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT "cgroup.net_udp_recv" +#define NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT "cgroup.net_udp_send" + +#define NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT "services.net_conn_ipv4" +#define NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT "services.net_conn_ipv6" +#define NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT "services.net_bytes_recv" +#define NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT "services.net_bytes_send" +#define NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT "services.net_tcp_recv" +#define NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT "services.net_tcp_send" +#define NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT "services.net_retransmit" +#define NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT "services.net_udp_recv" +#define NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT "services.net_udp_send" + +typedef struct ebpf_socket_publish_apps { + // Data read + uint64_t bytes_sent; // Bytes sent + uint64_t bytes_received; // Bytes received + uint64_t call_tcp_sent; // Number of times tcp_sendmsg was called + uint64_t call_tcp_received; // Number of times tcp_cleanup_rbuf was called + uint64_t retransmit; // Number of times tcp_retransmit was called + uint64_t call_udp_sent; // Number of times udp_sendmsg was called + uint64_t call_udp_received; // Number of times udp_recvmsg was called + uint64_t call_close; // Number of times tcp_close was called + uint64_t call_tcp_v4_connection;// Number of times tcp_v4_connect was called + uint64_t call_tcp_v6_connection;// Number of times tcp_v6_connect was called +} ebpf_socket_publish_apps_t; + +typedef struct ebpf_network_viewer_dimension_names { + char *name; + uint32_t hash; + + uint16_t port; + + struct ebpf_network_viewer_dimension_names *next; +} ebpf_network_viewer_dim_name_t ; + +typedef struct ebpf_network_viewer_port_list { + char *value; + uint32_t hash; + + uint16_t first; + uint16_t last; + + uint16_t cmp_first; + uint16_t cmp_last; + + uint16_t protocol; + uint32_t pid; + uint32_t tgid; + uint64_t connections; + struct ebpf_network_viewer_port_list *next; +} ebpf_network_viewer_port_list_t; + +typedef struct netdata_passive_connection { + uint32_t tgid; + uint32_t pid; + uint64_t counter; +} netdata_passive_connection_t; + +typedef struct netdata_passive_connection_idx { + uint16_t protocol; + uint16_t port; +} netdata_passive_connection_idx_t; + +/** + * Union used to store ip addresses + */ +union netdata_ip_t { + uint8_t addr8[16]; + uint16_t addr16[8]; + uint32_t addr32[4]; + uint64_t addr64[2]; +}; + +typedef struct ebpf_network_viewer_ip_list { + char *value; // IP value + uint32_t hash; // IP hash + + uint8_t ver; // IP version + + union netdata_ip_t first; // The IP address informed + union netdata_ip_t last; // The IP address informed + + struct ebpf_network_viewer_ip_list *next; +} ebpf_network_viewer_ip_list_t; + +typedef struct ebpf_network_viewer_hostname_list { + char *value; // IP value + uint32_t hash; // IP hash + + SIMPLE_PATTERN *value_pattern; + + struct ebpf_network_viewer_hostname_list *next; +} ebpf_network_viewer_hostname_list_t; + +#define NETDATA_NV_CAP_VALUE 50L +typedef struct ebpf_network_viewer_options { + uint32_t max_dim; // Store value read from 'maximum dimensions' + + uint32_t hostname_resolution_enabled; + uint32_t service_resolution_enabled; + + ebpf_network_viewer_port_list_t *excluded_port; + ebpf_network_viewer_port_list_t *included_port; + + ebpf_network_viewer_dim_name_t *names; + + ebpf_network_viewer_ip_list_t *excluded_ips; + ebpf_network_viewer_ip_list_t *included_ips; + + ebpf_network_viewer_hostname_list_t *excluded_hostnames; + ebpf_network_viewer_hostname_list_t *included_hostnames; + + ebpf_network_viewer_ip_list_t *ipv4_local_ip; + ebpf_network_viewer_ip_list_t *ipv6_local_ip; +} ebpf_network_viewer_options_t; + +extern ebpf_network_viewer_options_t network_viewer_opt; + +/** + * Structure to store socket information + */ +typedef struct netdata_socket { + uint64_t recv_packets; + uint64_t sent_packets; + uint64_t recv_bytes; + uint64_t sent_bytes; + uint64_t first; // First timestamp + uint64_t ct; // Current timestamp + uint32_t retransmit; // It is never used with UDP + uint16_t protocol; + uint16_t reserved; +} netdata_socket_t; + +typedef struct netdata_plot_values { + // Values used in the previous iteration + uint64_t recv_packets; + uint64_t sent_packets; + uint64_t recv_bytes; + uint64_t sent_bytes; + uint32_t retransmit; + + uint64_t last_time; + + // Values used to plot + uint64_t plot_recv_packets; + uint64_t plot_sent_packets; + uint64_t plot_recv_bytes; + uint64_t plot_sent_bytes; + uint16_t plot_retransmit; +} netdata_plot_values_t; + +/** + * Index used together previous structure + */ +typedef struct netdata_socket_idx { + union netdata_ip_t saddr; + uint16_t sport; + union netdata_ip_t daddr; + uint16_t dport; +} netdata_socket_idx_t; + +// Next values were defined according getnameinfo(3) +#define NETDATA_MAX_NETWORK_COMBINED_LENGTH 1018 +#define NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH 5 // :TCP: +#define NETDATA_DIM_LENGTH_WITHOUT_SERVICE_PROTOCOL 979 + +#define NETDATA_INBOUND_DIRECTION (uint32_t)1 +#define NETDATA_OUTBOUND_DIRECTION (uint32_t)2 +/** + * Allocate the maximum number of structures in the beginning, this can force the collector to use more memory + * in the long term, on the other had it is faster. + */ +typedef struct netdata_socket_plot { + // Search + avl_t avl; + netdata_socket_idx_t index; + + // Current data + netdata_socket_t sock; + + // Previous values and values used to write on chart. + netdata_plot_values_t plot; + + int family; // AF_INET or AF_INET6 + char *resolved_name; // Resolve only in the first call + unsigned char resolved; + + char *dimension_sent; + char *dimension_recv; + char *dimension_retransmit; + + uint32_t flags; +} netdata_socket_plot_t; + +#define NETWORK_VIEWER_CHARTS_CREATED (uint32_t)1 +typedef struct netdata_vector_plot { + netdata_socket_plot_t *plot; // Vector used to plot charts + + avl_tree_lock tree; // AVL tree to speed up search + uint32_t last; // The 'other' dimension, the last chart accepted. + uint32_t next; // The next position to store in the vector. + uint32_t max_plot; // Max number of elements to plot. + uint32_t last_plot; // Last element plot + + uint32_t flags; // Flags + +} netdata_vector_plot_t; + +void clean_port_structure(ebpf_network_viewer_port_list_t **clean); +extern ebpf_network_viewer_port_list_t *listen_ports; +void update_listen_table(uint16_t value, uint16_t proto, netdata_passive_connection_t *values); +void parse_network_viewer_section(struct config *cfg); +void fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table); +void parse_service_name_section(struct config *cfg); + +extern ebpf_socket_publish_apps_t **socket_bandwidth_curr; +extern struct config socket_config; +extern netdata_ebpf_targets_t socket_targets[]; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c new file mode 100644 index 0000000..3b5d159 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_softirq.h" + +struct config softirq_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define SOFTIRQ_MAP_LATENCY 0 +static ebpf_local_maps_t softirq_maps[] = { + { + .name = "tbl_softirq", + .internal_input = NETDATA_SOFTIRQ_MAX_IRQS, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +#define SOFTIRQ_TP_CLASS_IRQ "irq" +static ebpf_tracepoint_t softirq_tracepoints[] = { + {.enabled = false, .class = SOFTIRQ_TP_CLASS_IRQ, .event = "softirq_entry"}, + {.enabled = false, .class = SOFTIRQ_TP_CLASS_IRQ, .event = "softirq_exit"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +// these must be in the order defined by the kernel: +// https://elixir.bootlin.com/linux/v5.12.19/source/include/trace/events/irq.h#L13 +static softirq_val_t softirq_vals[] = { + {.name = "HI", .latency = 0}, + {.name = "TIMER", .latency = 0}, + {.name = "NET_TX", .latency = 0}, + {.name = "NET_RX", .latency = 0}, + {.name = "BLOCK", .latency = 0}, + {.name = "IRQ_POLL", .latency = 0}, + {.name = "TASKLET", .latency = 0}, + {.name = "SCHED", .latency = 0}, + {.name = "HRTIMER", .latency = 0}, + {.name = "RCU", .latency = 0}, +}; + +// tmp store for soft IRQ values we get from a per-CPU eBPF map. +static softirq_ebpf_val_t *softirq_ebpf_vals = NULL; + +static struct netdata_static_thread softirq_threads = { + .name = "SOFTIRQ KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +/** + * Cachestat Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_softirq_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(softirq_threads.thread); + + for (int i = 0; softirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&softirq_tracepoints[i]); + } + freez(softirq_ebpf_vals); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_MAIN_THREAD_EXITED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Exit + * + * Cancel thread. + * + * @param ptr thread data. + */ +static void softirq_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*softirq_threads.thread); + ebpf_softirq_free(em); +} + +/** + * Cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void softirq_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_softirq_free(em); +} + +/***************************************************************** + * MAIN LOOP + *****************************************************************/ + +static void softirq_read_latency_map() +{ + int fd = softirq_maps[SOFTIRQ_MAP_LATENCY].map_fd; + int i; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { + int test = bpf_map_lookup_elem(fd, &i, softirq_ebpf_vals); + if (unlikely(test < 0)) { + continue; + } + + uint64_t total_latency = 0; + int cpu_i; + int end = ebpf_nprocs; + for (cpu_i = 0; cpu_i < end; cpu_i++) { + total_latency += softirq_ebpf_vals[cpu_i].latency/1000; + } + + softirq_vals[i].latency = total_latency; + } +} + +/** + * Read eBPF maps for soft IRQ. + */ +static void *softirq_reader(void *ptr) +{ + netdata_thread_cleanup_push(softirq_exit, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_SOFTIRQ_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + softirq_read_latency_map(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +static void softirq_create_charts(int update_every) +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + "softirq_latency", + "Software IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "softirqs", + NULL, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS+1, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_SOFTIRQ + ); + + fflush(stdout); +} + +static void softirq_create_dims() +{ + uint32_t i; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { + ebpf_write_global_dimension( + softirq_vals[i].name, softirq_vals[i].name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + } +} + +static inline void softirq_write_dims() +{ + uint32_t i; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { + write_chart_dimension(softirq_vals[i].name, softirq_vals[i].latency); + } +} + +/** +* Main loop for this collector. +*/ +static void softirq_collector(ebpf_module_t *em) +{ + softirq_ebpf_vals = callocz(ebpf_nprocs, sizeof(softirq_ebpf_val_t)); + + // create reader thread. + softirq_threads.thread = mallocz(sizeof(netdata_thread_t)); + softirq_threads.start_routine = softirq_reader; + netdata_thread_create( + softirq_threads.thread, + softirq_threads.name, + NETDATA_THREAD_OPTION_DEFAULT, + softirq_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + softirq_create_charts(em->update_every); + softirq_create_dims(); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency"); + softirq_write_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } +} + +/***************************************************************** + * EBPF SOFTIRQ THREAD + *****************************************************************/ + +/** + * Soft IRQ latency thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_softirq_thread(void *ptr) +{ + netdata_thread_cleanup_push(softirq_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = softirq_maps; + + if (ebpf_enable_tracepoints(softirq_tracepoints) == 0) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endsoftirq; + } + + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endsoftirq; + } + + softirq_collector(em); + +endsoftirq: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_softirq.h b/collectors/ebpf.plugin/ebpf_softirq.h new file mode 100644 index 0000000..7dcddbb --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_softirq.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SOFTIRQ_H +#define NETDATA_EBPF_SOFTIRQ_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_SOFTIRQ_MAX_IRQS 10 + +typedef struct softirq_ebpf_val { + uint64_t latency; + uint64_t ts; +} softirq_ebpf_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_SOFTIRQ "softirq" +#define NETDATA_SOFTIRQ_SLEEP_MS 650000ULL +#define NETDATA_SOFTIRQ_CONFIG_FILE "softirq.conf" + +typedef struct sofirq_val { + uint64_t latency; + char *name; +} softirq_val_t; + +extern struct config softirq_config; +void *ebpf_softirq_thread(void *ptr); + +#endif /* NETDATA_EBPF_SOFTIRQ_H */ diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c new file mode 100644 index 0000000..8199573 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -0,0 +1,915 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_swap.h" + +static char *swap_dimension_name[NETDATA_SWAP_END] = { "read", "write" }; +static netdata_syscall_stat_t swap_aggregated_data[NETDATA_SWAP_END]; +static netdata_publish_syscall_t swap_publish_aggregated[NETDATA_SWAP_END]; + +netdata_publish_swap_t *swap_vector = NULL; + +static netdata_idx_t swap_hash_values[NETDATA_SWAP_END]; +static netdata_idx_t *swap_values = NULL; + +netdata_publish_swap_t **swap_pid = NULL; + +struct config swap_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t swap_maps[] = {{.name = "tbl_pid_swap", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "swap_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_swap", .internal_input = NETDATA_SWAP_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +struct netdata_static_thread swap_threads = { + .name = "SWAP KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +netdata_ebpf_targets_t swap_targets[] = { {.name = "swap_readpage", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "swap_writepage", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/swap.skel.h" // BTF code + +static struct swap_bpf *bpf_obj = NULL; + +/** + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects + */ +static void ebpf_swap_disable_probe(struct swap_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_swap_readpage_probe, false); + bpf_program__set_autoload(obj->progs.netdata_swap_writepage_probe, false); +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_swap_disable_trampoline(struct swap_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_swap_readpage_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_swap_writepage_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_fentry, false); +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_swap_set_trampoline_target(struct swap_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_swap_readpage_fentry, 0, + swap_targets[NETDATA_KEY_SWAP_READPAGE_CALL].name); + + bpf_program__set_attach_target(obj->progs.netdata_swap_writepage_fentry, 0, + swap_targets[NETDATA_KEY_SWAP_WRITEPAGE_CALL].name); + + bpf_program__set_attach_target(obj->progs.netdata_release_task_fentry, 0, + EBPF_COMMON_FNCT_CLEAN_UP); +} + +/** + * Mount Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_swap_attach_kprobe(struct swap_bpf *obj) +{ + obj->links.netdata_swap_readpage_probe = bpf_program__attach_kprobe(obj->progs.netdata_swap_readpage_probe, + false, + swap_targets[NETDATA_KEY_SWAP_READPAGE_CALL].name); + int ret = libbpf_get_error(obj->links.netdata_swap_readpage_probe); + if (ret) + return -1; + + obj->links.netdata_swap_writepage_probe = bpf_program__attach_kprobe(obj->progs.netdata_swap_writepage_probe, + false, + swap_targets[NETDATA_KEY_SWAP_WRITEPAGE_CALL].name); + ret = libbpf_get_error(obj->links.netdata_swap_writepage_probe); + if (ret) + return -1; + + obj->links.netdata_release_task_probe = bpf_program__attach_kprobe(obj->progs.netdata_release_task_probe, + false, + EBPF_COMMON_FNCT_CLEAN_UP); + ret = libbpf_get_error(obj->links.netdata_swap_writepage_probe); + if (ret) + return -1; + + return 0; +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_swap_set_hash_tables(struct swap_bpf *obj) +{ + swap_maps[NETDATA_PID_SWAP_TABLE].map_fd = bpf_map__fd(obj->maps.tbl_pid_swap); + swap_maps[NETDATA_SWAP_CONTROLLER].map_fd = bpf_map__fd(obj->maps.swap_ctrl); + swap_maps[NETDATA_SWAP_GLOBAL_TABLE].map_fd = bpf_map__fd(obj->maps.tbl_swap); +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_swap_adjust_map_size(struct swap_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.tbl_pid_swap, &swap_maps[NETDATA_PID_SWAP_TABLE], + em, bpf_map__name(obj->maps.tbl_pid_swap)); +} + +/** + * Disable Release Task + * + * Disable release task when apps is not enabled. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_swap_disable_release_task(struct swap_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_release_task_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_release_task_probe, false); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_swap_load_and_attach(struct swap_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *mt = em->targets; + netdata_ebpf_program_loaded_t test = mt[NETDATA_KEY_SWAP_READPAGE_CALL].mode; + + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_swap_disable_probe(obj); + + ebpf_swap_set_trampoline_target(obj); + } else { + ebpf_swap_disable_trampoline(obj); + } + + ebpf_swap_adjust_map_size(obj, em); + + if (!em->apps_charts && !em->cgroup_charts) + ebpf_swap_disable_release_task(obj); + + int ret = swap_bpf__load(obj); + if (ret) { + return ret; + } + + ret = (test == EBPF_LOAD_TRAMPOLINE) ? swap_bpf__attach(obj) : ebpf_swap_attach_kprobe(obj); + if (!ret) { + ebpf_swap_set_hash_tables(obj); + + ebpf_update_controller(swap_maps[NETDATA_SWAP_CONTROLLER].map_fd, em); + } + + return ret; +} +#endif + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Cachestat Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_swap_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + ebpf_cleanup_publish_syscall(swap_publish_aggregated); + + freez(swap_vector); + freez(swap_values); + freez(swap_threads.thread); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + swap_bpf__destroy(bpf_obj); +#endif + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Swap exit + * + * Cancel thread and exit. + * + * @param ptr thread data. + */ +static void ebpf_swap_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*swap_threads.thread); + ebpf_swap_free(em); +} + +/** + * Swap cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void ebpf_swap_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_swap_free(em); +} + +/***************************************************************** + * + * COLLECTOR THREAD + * + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void swap_apps_accumulator(netdata_publish_swap_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_publish_swap_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_publish_swap_t *w = &out[i]; + total->write += w->write; + total->read += w->read; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void swap_fill_pid(uint32_t current_pid, netdata_publish_swap_t *publish) +{ + netdata_publish_swap_t *curr = swap_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_swap_t)); + swap_pid[current_pid] = curr; + } + + memcpy(curr, publish, sizeof(netdata_publish_swap_t)); +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_swap_cgroup() +{ + ebpf_cgroup_target_t *ect ; + netdata_publish_swap_t *cv = swap_vector; + int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; + size_t length = sizeof(netdata_publish_swap_t)*ebpf_nprocs; + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_publish_swap_t *out = &pids->swap; + if (likely(swap_pid) && swap_pid[pid]) { + netdata_publish_swap_t *in = swap_pid[pid]; + + memcpy(out, in, sizeof(netdata_publish_swap_t)); + } else { + memset(cv, 0, length); + if (!bpf_map_lookup_elem(fd, &pid, cv)) { + swap_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_publish_swap_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_publish_swap_t *cv = swap_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; + size_t length = sizeof(netdata_publish_swap_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + swap_apps_accumulator(cv); + + swap_fill_pid(key, cv); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); + + pids = pids->next; + } +} + +/** +* Send global +* +* Send global charts to Netdata +*/ +static void swap_send_global() +{ + write_io_chart(NETDATA_MEM_SWAP_CHART, NETDATA_EBPF_SYSTEM_GROUP, + swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].dimension, + (long long) swap_hash_values[NETDATA_KEY_SWAP_WRITEPAGE_CALL], + swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].dimension, + (long long) swap_hash_values[NETDATA_KEY_SWAP_READPAGE_CALL]); +} + +/** + * Read global counter + * + * Read the table with number of calls to all functions + */ +static void read_global_table() +{ + netdata_idx_t *stored = swap_values; + netdata_idx_t *val = swap_hash_values; + int fd = swap_maps[NETDATA_SWAP_GLOBAL_TABLE].map_fd; + + uint32_t i, end = NETDATA_SWAP_END; + for (i = NETDATA_KEY_SWAP_READPAGE_CALL; i < end; i++) { + if (!bpf_map_lookup_elem(fd, &i, stored)) { + int j; + int last = ebpf_nprocs; + netdata_idx_t total = 0; + for (j = 0; j < last; j++) + total += stored[j]; + + val[i] = total; + } + } +} + +/** + * Swap read hash + * + * This is the thread callback. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_swap_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_swap_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + usec_t step = NETDATA_SWAP_SLEEP_MS * em->update_every; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param swap + * @param root + */ +static void ebpf_swap_sum_pids(netdata_publish_swap_t *swap, struct pid_on_target *root) +{ + uint64_t local_read = 0; + uint64_t local_write = 0; + + while (root) { + int32_t pid = root->pid; + netdata_publish_swap_t *w = swap_pid[pid]; + if (w) { + local_write += w->write; + local_read += w->read; + } + root = root->next; + } + + // These conditions were added, because we are using incremental algorithm + swap->write = (local_write >= swap->write) ? local_write : swap->write; + swap->read = (local_read >= swap->read) ? local_read : swap->read; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. +*/ +void ebpf_swap_send_apps_data(struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_swap_sum_pids(&w->swap, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_READ_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->swap.read); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->swap.write); + } + } + write_end_chart(); +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param swap + * @param root + */ +static void ebpf_swap_sum_cgroup_pids(netdata_publish_swap_t *swap, struct pid_on_target2 *pids) +{ + uint64_t local_read = 0; + uint64_t local_write = 0; + + while (pids) { + netdata_publish_swap_t *w = &pids->swap; + local_write += w->write; + local_read += w->read; + + pids = pids->next; + } + + // These conditions were added, because we are using incremental algorithm + swap->write = (local_write >= swap->write) ? local_write : swap->write; + swap->read = (local_read >= swap->read) ? local_read : swap->read; +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_systemd_swap_charts() +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.read); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.write); + } + } + write_end_chart(); +} + +/** + * Create specific swap charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_swap_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_MEM_SWAP_READ_CHART, + "Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_CGROUP_SWAP_READ_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, + ebpf_create_global_dimension, + swap_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_CGROUP_SWAP_WRITE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, + ebpf_create_global_dimension, + &swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); +} + +/** + * Create specific swap charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_swap_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_READ_CHART,"Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_READ_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_WRITE_CHART, "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_WRITE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, update_every); +} + +/* + * Send Specific Swap data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_swap_data(char *type, netdata_publish_swap_t *values) +{ + write_begin_chart(type, NETDATA_MEM_SWAP_READ_CHART); + write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].name, (long long) values->read); + write_end_chart(); + + write_begin_chart(type, NETDATA_MEM_SWAP_WRITE_CHART); + write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].name, (long long) values->write); + write_end_chart(); +} + +/** + * Create Systemd Swap Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_swap_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_READ_CHART, + "Calls to swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_READ_CONTEXT, + NETDATA_EBPF_MODULE_NAME_SWAP, update_every); + + ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_SWAP, update_every); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_swap_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_swap_sum_cgroup_pids(&ect->publish_systemd_swap, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_swap_charts(update_every); + fflush(stdout); + } + ebpf_send_systemd_swap_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_SWAP_CHART) && ect->updated) { + ebpf_create_specific_swap_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_SWAP_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_SWAP_CHART) { + if (ect->updated) { + ebpf_send_specific_swap_data(ect->name, &ect->publish_systemd_swap); + } else { + ebpf_obsolete_specific_swap_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_SWAP_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void swap_collector(ebpf_module_t *em) +{ + swap_threads.thread = mallocz(sizeof(netdata_thread_t)); + swap_threads.start_routine = ebpf_swap_read_hash; + + netdata_thread_create(swap_threads.thread, swap_threads.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_swap_read_hash, em); + + int cgroup = em->cgroup_charts; + int update_every = em->update_every; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); + + if (cgroup) + ebpf_update_swap_cgroup(); + + pthread_mutex_lock(&lock); + + swap_send_global(); + + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_swap_send_apps_data(apps_groups_root_target); + + if (cgroup) + ebpf_swap_send_cgroup_data(update_every); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_READ_CHART, + "Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_swap_allocate_global_vectors(int apps) +{ + if (apps) + swap_pid = callocz((size_t)pid_max, sizeof(netdata_publish_swap_t *)); + + swap_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_publish_swap_t)); + + swap_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + memset(swap_hash_values, 0, sizeof(swap_hash_values)); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_swap_charts(int update_every) +{ + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART, + "Calls to access swap memory", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 202, + ebpf_create_global_dimension, + swap_publish_aggregated, NETDATA_SWAP_END, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_swap_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_KEY_SWAP_READPAGE_CALL].mode); + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = swap_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_swap_load_and_attach(bpf_obj, em); + } +#endif + + if (ret) + error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + +/** + * SWAP thread + * + * Thread used to make swap thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_swap_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_swap_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = swap_maps; + + ebpf_update_pid_table(&swap_maps[NETDATA_PID_SWAP_TABLE], em); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_swap_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endswap; + } + + ebpf_swap_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_SWAP_END] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX }; + ebpf_global_labels(swap_aggregated_data, swap_publish_aggregated, swap_dimension_name, swap_dimension_name, + algorithms, NETDATA_SWAP_END); + + pthread_mutex_lock(&lock); + ebpf_create_swap_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + swap_collector(em); + +endswap: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_swap.h b/collectors/ebpf.plugin/ebpf_swap.h new file mode 100644 index 0000000..79182e5 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_swap.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SWAP_H +#define NETDATA_EBPF_SWAP_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_SWAP "swap" + +#define NETDATA_SWAP_SLEEP_MS 850000ULL + +// charts +#define NETDATA_MEM_SWAP_CHART "swapcalls" +#define NETDATA_MEM_SWAP_READ_CHART "swap_read_call" +#define NETDATA_MEM_SWAP_WRITE_CHART "swap_write_call" +#define NETDATA_SWAP_SUBMENU "swap" + +// configuration file +#define NETDATA_DIRECTORY_SWAP_CONFIG_FILE "swap.conf" + +// Contexts +#define NETDATA_CGROUP_SWAP_READ_CONTEXT "cgroup.swap_read" +#define NETDATA_CGROUP_SWAP_WRITE_CONTEXT "cgroup.swap_write" +#define NETDATA_SYSTEMD_SWAP_READ_CONTEXT "services.swap_read" +#define NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT "services.swap_write" + +typedef struct netdata_publish_swap { + uint64_t read; + uint64_t write; +} netdata_publish_swap_t; + +enum swap_tables { + NETDATA_PID_SWAP_TABLE, + NETDATA_SWAP_CONTROLLER, + NETDATA_SWAP_GLOBAL_TABLE +}; + +enum swap_counters { + NETDATA_KEY_SWAP_READPAGE_CALL, + NETDATA_KEY_SWAP_WRITEPAGE_CALL, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_SWAP_END +}; + +extern netdata_publish_swap_t **swap_pid; + +void *ebpf_swap_thread(void *ptr); +void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr); + +extern struct config swap_config; +extern netdata_ebpf_targets_t swap_targets[]; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c new file mode 100644 index 0000000..8404975 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -0,0 +1,608 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_sync.h" + +static char *sync_counter_dimension_name[NETDATA_SYNC_IDX_END] = { "sync", "syncfs", "msync", "fsync", "fdatasync", + "sync_file_range" }; +static netdata_syscall_stat_t sync_counter_aggregated_data[NETDATA_SYNC_IDX_END]; +static netdata_publish_syscall_t sync_counter_publish_aggregated[NETDATA_SYNC_IDX_END]; + +static netdata_idx_t sync_hash_values[NETDATA_SYNC_IDX_END]; + +struct netdata_static_thread sync_threads = { + .name = "SYNC KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +static ebpf_local_maps_t sync_maps[] = {{.name = "tbl_sync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_syncfs", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_msync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_fsync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_fdatasync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_syncfr", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +struct config sync_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +netdata_ebpf_targets_t sync_targets[] = { {.name = NETDATA_SYSCALLS_SYNC, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NETDATA_SYSCALLS_SYNCFS, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NETDATA_SYSCALLS_MSYNC, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NETDATA_SYSCALLS_FSYNC, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NETDATA_SYSCALLS_FDATASYNC, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NETDATA_SYSCALLS_SYNC_FILE_RANGE, .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +/***************************************************************** + * + * BTF FUNCTIONS + * + *****************************************************************/ + +/** + * Disable probe + * + * Disable kprobe to use another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_sync_disable_probe(struct sync_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_sync_kprobe, false); +} + +/** + * Disable tramppoline + * + * Disable trampoline to use another method. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_sync_disable_trampoline(struct sync_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_sync_fentry, false); +} + +/** + * Disable tracepoint + * + * Disable tracepoints according information given. + * + * @param obj object loaded + * @param idx Which syscall will not be disabled + */ +void ebpf_sync_disable_tracepoints(struct sync_bpf *obj, sync_syscalls_index_t idx) +{ + if (idx != NETDATA_SYNC_SYNC_IDX) + bpf_program__set_autoload(obj->progs.netdata_sync_entry, false); + + if (idx != NETDATA_SYNC_SYNCFS_IDX) + bpf_program__set_autoload(obj->progs.netdata_syncfs_entry, false); + + if (idx != NETDATA_SYNC_MSYNC_IDX) + bpf_program__set_autoload(obj->progs.netdata_msync_entry, false); + + if (idx != NETDATA_SYNC_FSYNC_IDX) + bpf_program__set_autoload(obj->progs.netdata_fsync_entry, false); + + if (idx != NETDATA_SYNC_FDATASYNC_IDX) + bpf_program__set_autoload(obj->progs.netdata_fdatasync_entry, false); + + if (idx != NETDATA_SYNC_SYNC_FILE_RANGE_IDX) + bpf_program__set_autoload(obj->progs.netdata_sync_file_range_entry, false); +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + * @param idx the index for the main structure + */ +static void ebpf_sync_set_hash_tables(struct sync_bpf *obj, sync_syscalls_index_t idx) +{ + sync_maps[idx].map_fd = bpf_map__fd(obj->maps.tbl_sync); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em the structure with configuration + * @param target the syscall that we are attaching a tracer. + * @param idx the index for the main structure + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_sync_load_and_attach(struct sync_bpf *obj, ebpf_module_t *em, char *target, + sync_syscalls_index_t idx) +{ + netdata_ebpf_targets_t *synct = em->targets; + netdata_ebpf_program_loaded_t test = synct[NETDATA_SYNC_SYNC_IDX].mode; + + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_sync_disable_probe(obj); + ebpf_sync_disable_tracepoints(obj, NETDATA_SYNC_IDX_END); + + bpf_program__set_attach_target(obj->progs.netdata_sync_fentry, 0, + target); + } else if (test == EBPF_LOAD_PROBE || + test == EBPF_LOAD_RETPROBE) { + ebpf_sync_disable_tracepoints(obj, NETDATA_SYNC_IDX_END); + ebpf_sync_disable_trampoline(obj); + } else { + ebpf_sync_disable_probe(obj); + ebpf_sync_disable_trampoline(obj); + + ebpf_sync_disable_tracepoints(obj, idx); + } + + int ret = sync_bpf__load(obj); + if (!ret) { + if (test != EBPF_LOAD_PROBE && test != EBPF_LOAD_RETPROBE) { + ret = sync_bpf__attach(obj); + } else { + obj->links.netdata_sync_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_sync_kprobe, + false, target); + ret = (int)libbpf_get_error(obj->links.netdata_sync_kprobe); + } + + if (!ret) + ebpf_sync_set_hash_tables(obj, idx); + } + + return ret; +} +#endif + +/***************************************************************** + * + * CLEANUP THREAD + * + *****************************************************************/ + +#ifdef LIBBPF_MAJOR_VERSION +/** + * Cleanup Objects + * + * Cleanup loaded objects when thread was initialized. + */ +void ebpf_sync_cleanup_objects() +{ + int i; + for (i = 0; local_syscalls[i].syscall; i++) { + ebpf_sync_syscalls_t *w = &local_syscalls[i]; + if (w->sync_obj) + sync_bpf__destroy(w->sync_obj); + } +} +#endif + +/** + * Sync Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_sync_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_sync_cleanup_objects(); +#endif + freez(sync_threads.thread); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Exit + * + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_sync_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*sync_threads.thread); + ebpf_sync_free(em); +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_sync_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_sync_free(em); +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Load Legacy + * + * Load legacy code. + * + * @param w is the sync output structure with pointers to objects loaded. + * @param em is structure with configuration + * + * @return 0 on success and -1 otherwise. + */ +static int ebpf_sync_load_legacy(ebpf_sync_syscalls_t *w, ebpf_module_t *em) +{ + em->thread_name = w->syscall; + if (!w->probe_links) { + w->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &w->objects); + if (!w->probe_links) { + return -1; + } + } + + return 0; +} + +/* + * Initialize Syscalls + * + * Load the eBPF programs to monitor syscalls + * + * @return 0 on success and -1 otherwise. + */ +static int ebpf_sync_initialize_syscall(ebpf_module_t *em) +{ + int i; + const char *saved_name = em->thread_name; + int errors = 0; + for (i = 0; local_syscalls[i].syscall; i++) { + ebpf_sync_syscalls_t *w = &local_syscalls[i]; + if (w->enabled) { + if (em->load & EBPF_LOAD_LEGACY) { + if (ebpf_sync_load_legacy(w, em)) + errors++; + + em->thread_name = saved_name; + } +#ifdef LIBBPF_MAJOR_VERSION + else { + char syscall[NETDATA_EBPF_MAX_SYSCALL_LENGTH]; + ebpf_select_host_prefix(syscall, NETDATA_EBPF_MAX_SYSCALL_LENGTH, w->syscall, running_on_kernel); + w->sync_obj = sync_bpf__open(); + if (!w->sync_obj) { + errors++; + } else { + if (ebpf_is_function_inside_btf(default_btf, syscall)) { + if (ebpf_sync_load_and_attach(w->sync_obj, em, syscall, i)) { + errors++; + } + } else { + if (ebpf_sync_load_legacy(w, em)) + errors++; + } + em->thread_name = saved_name; + } + } +#endif + } + } + em->thread_name = saved_name; + + memset(sync_counter_aggregated_data, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_syscall_stat_t)); + memset(sync_counter_publish_aggregated, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_publish_syscall_t)); + memset(sync_hash_values, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_idx_t)); + + return (errors) ? -1 : 0; +} + +/***************************************************************** + * + * DATA THREAD + * + *****************************************************************/ + +/** + * Read global table + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + netdata_idx_t stored; + uint32_t idx = NETDATA_SYNC_CALL; + int i; + for (i = 0; local_syscalls[i].syscall; i++) { + if (local_syscalls[i].enabled) { + int fd = sync_maps[i].map_fd; + if (!bpf_map_lookup_elem(fd, &idx, &stored)) { + sync_hash_values[i] = stored; + } + } + } +} + +/** + * Sync read hash + * + * This is the thread callback. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_sync_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_sync_cleanup, ptr); + ebpf_module_t *em = (ebpf_module_t *)ptr; + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = NETDATA_EBPF_SYNC_SLEEP_MS * em->update_every; + + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Create Sync charts + * + * Create charts and dimensions according user input. + * + * @param id chart id + * @param idx the first index with data. + * @param end the last index with data. + */ +static void ebpf_send_sync_chart(char *id, + int idx, + int end) +{ + write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id); + + netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx]; + + while (move && idx <= end) { + if (local_syscalls[idx].enabled) + write_chart_dimension(move->name, sync_hash_values[idx]); + + move = move->next; + idx++; + } + + write_end_chart(); +} + +/** + * Send data + * + * Send global charts to Netdata + */ +static void sync_send_data() +{ + if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) { + ebpf_send_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART, NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX); + } + + if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) + ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_MSYNC_CHART, + sync_counter_publish_aggregated[NETDATA_SYNC_MSYNC_IDX].dimension, + sync_hash_values[NETDATA_SYNC_MSYNC_IDX]); + + if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) { + ebpf_send_sync_chart(NETDATA_EBPF_SYNC_CHART, NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX); + } + + if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) + ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_FILE_SEGMENT_CHART, + sync_counter_publish_aggregated[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].dimension, + sync_hash_values[NETDATA_SYNC_SYNC_FILE_RANGE_IDX]); +} + +/** +* Main loop for this collector. +*/ +static void sync_collector(ebpf_module_t *em) +{ + sync_threads.thread = mallocz(sizeof(netdata_thread_t)); + sync_threads.start_routine = ebpf_sync_read_hash; + + netdata_thread_create(sync_threads.thread, sync_threads.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_sync_read_hash, em); + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + sync_send_data(); + + pthread_mutex_unlock(&lock); + } +} + + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Create Sync charts + * + * Create charts and dimensions according user input. + * + * @param id chart id + * @param title chart title + * @param order order number of the specified chart + * @param idx the first index with data. + * @param end the last index with data. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_sync_chart(char *id, + char *title, + int order, + int idx, + int end, + int update_every) +{ + ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order, + update_every, + NETDATA_EBPF_MODULE_NAME_SYNC); + + netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx]; + + while (move && idx <= end) { + if (local_syscalls[idx].enabled) + ebpf_write_global_dimension(move->name, move->dimension, move->algorithm); + + move = move->next; + idx++; + } +} + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_sync_charts(int update_every) +{ + if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) + ebpf_create_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART, + "Monitor calls for fsync(2) and fdatasync(2).", 21300, + NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX, update_every); + + if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) + ebpf_create_sync_chart(NETDATA_EBPF_MSYNC_CHART, + "Monitor calls for msync(2).", 21301, + NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX, update_every); + + if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) + ebpf_create_sync_chart(NETDATA_EBPF_SYNC_CHART, + "Monitor calls for sync(2) and syncfs(2).", 21302, + NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX, update_every); + + if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) + ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART, + "Monitor calls for sync_file_range(2).", 21303, + NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, update_every); +} + +/** + * Parse Syscalls + * + * Parse syscall options available inside ebpf.d/sync.conf + */ +static void ebpf_sync_parse_syscalls() +{ + int i; + for (i = 0; local_syscalls[i].syscall; i++) { + local_syscalls[i].enabled = appconfig_get_boolean(&sync_config, NETDATA_SYNC_CONFIG_NAME, + local_syscalls[i].syscall, CONFIG_BOOLEAN_YES); + } +} + +/** + * Sync thread + * + * Thread used to make sync thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_sync_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_sync_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = sync_maps; + + ebpf_sync_parse_syscalls(); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_sync_initialize_syscall(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endsync; + } + + int algorithms[NETDATA_SYNC_IDX_END] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX }; + ebpf_global_labels(sync_counter_aggregated_data, sync_counter_publish_aggregated, + sync_counter_dimension_name, sync_counter_dimension_name, + algorithms, NETDATA_SYNC_IDX_END); + + pthread_mutex_lock(&lock); + ebpf_create_sync_charts(em->update_every); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + sync_collector(em); + +endsync: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_sync.h b/collectors/ebpf.plugin/ebpf_sync.h new file mode 100644 index 0000000..cace2a1 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_sync.h @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SYNC_H +#define NETDATA_EBPF_SYNC_H 1 + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/sync.skel.h" +#endif + +// Module name +#define NETDATA_EBPF_MODULE_NAME_SYNC "sync" + +// charts +#define NETDATA_EBPF_SYNC_CHART "sync" +#define NETDATA_EBPF_MSYNC_CHART "memory_map" +#define NETDATA_EBPF_FILE_SYNC_CHART "file_sync" +#define NETDATA_EBPF_FILE_SEGMENT_CHART "file_segment" +#define NETDATA_EBPF_SYNC_SUBMENU "synchronization (eBPF)" + +#define NETDATA_SYSCALLS_SYNC "sync" +#define NETDATA_SYSCALLS_SYNCFS "syncfs" +#define NETDATA_SYSCALLS_MSYNC "msync" +#define NETDATA_SYSCALLS_FSYNC "fsync" +#define NETDATA_SYSCALLS_FDATASYNC "fdatasync" +#define NETDATA_SYSCALLS_SYNC_FILE_RANGE "sync_file_range" + +#define NETDATA_EBPF_SYNC_SLEEP_MS 800000ULL + +// configuration file +#define NETDATA_SYNC_CONFIG_FILE "sync.conf" +#define NETDATA_SYNC_CONFIG_NAME "syscalls" + +typedef enum sync_syscalls_index { + NETDATA_SYNC_SYNC_IDX, + NETDATA_SYNC_SYNCFS_IDX, + NETDATA_SYNC_MSYNC_IDX, + NETDATA_SYNC_FSYNC_IDX, + NETDATA_SYNC_FDATASYNC_IDX, + NETDATA_SYNC_SYNC_FILE_RANGE_IDX, + + NETDATA_SYNC_IDX_END +} sync_syscalls_index_t; + +enum netdata_sync_charts { + NETDATA_SYNC_CALL, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_SYNC_END +}; + +enum netdata_sync_table { + NETDATA_SYNC_GLOBAL_TABLE +}; + +void *ebpf_sync_thread(void *ptr); +extern struct config sync_config; +extern netdata_ebpf_targets_t sync_targets[]; + +#endif /* NETDATA_EBPF_SYNC_H */ diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c new file mode 100644 index 0000000..ad6de4a --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -0,0 +1,1983 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "ebpf.h" +#include "ebpf_vfs.h" + +static char *vfs_dimension_names[NETDATA_KEY_PUBLISH_VFS_END] = { "delete", "read", "write", + "fsync", "open", "create" }; +static char *vfs_id_names[NETDATA_KEY_PUBLISH_VFS_END] = { "vfs_unlink", "vfs_read", "vfs_write", + "vfs_fsync", "vfs_open", "vfs_create"}; + +static netdata_idx_t *vfs_hash_values = NULL; +static netdata_syscall_stat_t vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_END]; +static netdata_publish_syscall_t vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_END]; +netdata_publish_vfs_t **vfs_pid = NULL; +netdata_publish_vfs_t *vfs_vector = NULL; + +static ebpf_local_maps_t vfs_maps[] = {{.name = "tbl_vfs_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_vfs_stats", .internal_input = NETDATA_VFS_COUNTER, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "vfs_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +struct config vfs_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +struct netdata_static_thread vfs_threads = { + .name = "VFS KERNEL", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL +}; + +netdata_ebpf_targets_t vfs_targets[] = { {.name = "vfs_write", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_writev", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_read", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_readv", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_unlink", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_fsync", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_open", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "vfs_create", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = "release_task", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/vfs.skel.h" // BTF code + +static struct vfs_bpf *bpf_obj = NULL; + +/** + * Disable probe + * + * Disable all probes to use exclusively another method. + * + * @param obj is the main structure for bpf objects + */ +static void ebpf_vfs_disable_probes(struct vfs_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_vfs_write_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_write_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_writev_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_writev_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_read_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_read_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_readv_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_readv_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_unlink_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_unlink_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_fsync_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_fsync_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_open_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_open_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_create_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_create_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_release_task_kprobe, false); +} + +/* + * Disable trampoline + * + * Disable all trampoline to use exclusively another method. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_vfs_disable_trampoline(struct vfs_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_vfs_write_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_write_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_writev_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_writev_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_read_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_read_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_readv_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_readv_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_unlink_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_fsync_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_fsync_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_open_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_open_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_create_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_release_task_fentry, false); +} + +/** + * Set trampoline target + * + * Set the targets we will monitor. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_vfs_set_trampoline_target(struct vfs_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_vfs_write_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_WRITE].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_write_fexit, 0, vfs_targets[NETDATA_EBPF_VFS_WRITE].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_writev_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_WRITEV].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_writev_fexit, 0, vfs_targets[NETDATA_EBPF_VFS_WRITEV].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_read_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_READ].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_read_fexit, 0, vfs_targets[NETDATA_EBPF_VFS_READ].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_readv_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_READV].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_readv_fexit, 0, vfs_targets[NETDATA_EBPF_VFS_READV].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_unlink_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_UNLINK].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_fsync_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_FSYNC].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_fsync_fexit, 0, vfs_targets[NETDATA_EBPF_VFS_FSYNC].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_open_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_OPEN].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_open_fexit, 0, vfs_targets[NETDATA_EBPF_VFS_OPEN].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_create_fentry, 0, vfs_targets[NETDATA_EBPF_VFS_CREATE].name); + + bpf_program__set_attach_target(obj->progs.netdata_vfs_release_task_fentry, 0, EBPF_COMMON_FNCT_CLEAN_UP); +} + +/** + * Attach Probe + * + * Attach probes to target + * + * @param obj is the main structure for bpf objects. + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_vfs_attach_probe(struct vfs_bpf *obj) +{ + obj->links.netdata_vfs_write_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_write_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_WRITE].name); + int ret = libbpf_get_error(obj->links.netdata_vfs_write_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_write_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_write_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_WRITE].name); + ret = libbpf_get_error(obj->links.netdata_vfs_write_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_writev_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_writev_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_WRITEV].name); + ret = libbpf_get_error(obj->links.netdata_vfs_writev_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_writev_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_writev_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_WRITEV].name); + ret = libbpf_get_error(obj->links.netdata_vfs_writev_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_read_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_read_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_READ].name); + ret = libbpf_get_error(obj->links.netdata_vfs_read_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_read_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_read_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_READ].name); + ret = libbpf_get_error(obj->links.netdata_vfs_read_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_readv_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_readv_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_READV].name); + ret = libbpf_get_error(obj->links.netdata_vfs_readv_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_readv_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_readv_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_READV].name); + ret = libbpf_get_error(obj->links.netdata_vfs_readv_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_unlink_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_unlink_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_UNLINK].name); + ret = libbpf_get_error(obj->links.netdata_vfs_unlink_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_unlink_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_unlink_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_UNLINK].name); + ret = libbpf_get_error(obj->links.netdata_vfs_unlink_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_fsync_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_fsync_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_FSYNC].name); + ret = libbpf_get_error(obj->links.netdata_vfs_fsync_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_fsync_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_fsync_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_FSYNC].name); + ret = libbpf_get_error(obj->links.netdata_vfs_fsync_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_open_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_open_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_OPEN].name); + ret = libbpf_get_error(obj->links.netdata_vfs_open_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_open_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_open_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_OPEN].name); + ret = libbpf_get_error(obj->links.netdata_vfs_open_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_create_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_create_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_CREATE].name); + ret = libbpf_get_error(obj->links.netdata_vfs_create_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_create_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_create_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_CREATE].name); + ret = libbpf_get_error(obj->links.netdata_vfs_create_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_fsync_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_fsync_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_FSYNC].name); + ret = libbpf_get_error(obj->links.netdata_vfs_fsync_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_fsync_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_fsync_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_FSYNC].name); + ret = libbpf_get_error(obj->links.netdata_vfs_fsync_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_open_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_open_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_OPEN].name); + ret = libbpf_get_error(obj->links.netdata_vfs_open_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_open_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_open_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_OPEN].name); + ret = libbpf_get_error(obj->links.netdata_vfs_open_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_create_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_create_kprobe, false, + vfs_targets[NETDATA_EBPF_VFS_CREATE].name); + ret = libbpf_get_error(obj->links.netdata_vfs_create_kprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_create_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_create_kretprobe, true, + vfs_targets[NETDATA_EBPF_VFS_CREATE].name); + ret = libbpf_get_error(obj->links.netdata_vfs_create_kretprobe); + if (ret) + return -1; + + obj->links.netdata_vfs_release_task_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_vfs_release_task_fentry, + true, + EBPF_COMMON_FNCT_CLEAN_UP); + ret = libbpf_get_error(obj->links.netdata_vfs_release_task_kprobe); + if (ret) + return -1; + + return 0; +} + +/** + * Adjust Map Size + * + * Resize maps according input from users. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + */ +static void ebpf_vfs_adjust_map_size(struct vfs_bpf *obj, ebpf_module_t *em) +{ + ebpf_update_map_size(obj->maps.tbl_vfs_pid, &vfs_maps[NETDATA_VFS_PID], + em, bpf_map__name(obj->maps.tbl_vfs_pid)); +} + +/** + * Set hash tables + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_vfs_set_hash_tables(struct vfs_bpf *obj) +{ + vfs_maps[NETDATA_VFS_ALL].map_fd = bpf_map__fd(obj->maps.tbl_vfs_stats); + vfs_maps[NETDATA_VFS_PID].map_fd = bpf_map__fd(obj->maps.tbl_vfs_pid); + vfs_maps[NETDATA_VFS_CTRL].map_fd = bpf_map__fd(obj->maps.vfs_ctrl); +} + +/** + * Disable Release Task + * + * Disable release task when apps is not enabled. + * + * @param obj is the main structure for bpf objects. + */ +static void ebpf_vfs_disable_release_task(struct vfs_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_vfs_release_task_fentry, false); + bpf_program__set_autoload(obj->progs.netdata_vfs_release_task_kprobe, false); +} + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * @param em structure with configuration + * + * @return it returns 0 on succes and -1 otherwise + */ +static inline int ebpf_vfs_load_and_attach(struct vfs_bpf *obj, ebpf_module_t *em) +{ + netdata_ebpf_targets_t *mt = em->targets; + netdata_ebpf_program_loaded_t test = mt[NETDATA_EBPF_VFS_WRITE].mode; + + if (test == EBPF_LOAD_TRAMPOLINE) { + ebpf_vfs_disable_probes(obj); + + ebpf_vfs_set_trampoline_target(obj); + } else { + ebpf_vfs_disable_trampoline(obj); + } + + ebpf_vfs_adjust_map_size(obj, em); + + if (!em->apps_charts && !em->cgroup_charts) + ebpf_vfs_disable_release_task(obj); + + int ret = vfs_bpf__load(obj); + if (ret) { + return ret; + } + + ret = (test == EBPF_LOAD_TRAMPOLINE) ? vfs_bpf__attach(obj) : ebpf_vfs_attach_probe(obj); + if (!ret) { + ebpf_vfs_set_hash_tables(obj); + + ebpf_update_controller(vfs_maps[NETDATA_VFS_CTRL].map_fd, em); + } + + return ret; +} +#endif + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Cachestat Free + * + * Cleanup variables after child threads to stop + * + * @param ptr thread data. + */ +static void ebpf_vfs_free(ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING; + pthread_mutex_unlock(&ebpf_exit_cleanup); + return; + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + + freez(vfs_hash_values); + freez(vfs_vector); + freez(vfs_threads.thread); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + vfs_bpf__destroy(bpf_obj); +#endif + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Exit + * + * Cancel thread and exit. + * + * @param ptr thread data. +**/ +static void ebpf_vfs_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + netdata_thread_cancel(*vfs_threads.thread); + ebpf_vfs_free(em); +} + +/** +* Clean up the main thread. +* +* @param ptr thread data. +**/ +static void ebpf_vfs_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + ebpf_vfs_free(em); +} + +/***************************************************************** + * + * FUNCTIONS WITH THE MAIN LOOP + * + *****************************************************************/ + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information +*/ +static void ebpf_vfs_send_data(ebpf_module_t *em) +{ + netdata_publish_vfs_common_t pvc; + + pvc.write = (long)vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_WRITE].bytes; + pvc.read = (long)vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_READ].bytes; + + write_count_chart(NETDATA_VFS_FILE_CLEAN_COUNT, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK], 1); + + write_count_chart(NETDATA_VFS_FILE_IO_COUNT, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], 2); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_VFS_FILE_ERR_COUNT, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], 2); + } + + write_io_chart(NETDATA_VFS_IO_FILE_BYTES, NETDATA_FILESYSTEM_FAMILY, vfs_id_names[NETDATA_KEY_PUBLISH_VFS_WRITE], + (long long)pvc.write, vfs_id_names[NETDATA_KEY_PUBLISH_VFS_READ], (long long)pvc.read); + + write_count_chart(NETDATA_VFS_FSYNC, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], 1); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_VFS_FSYNC_ERR, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], 1); + } + + write_count_chart(NETDATA_VFS_OPEN, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], 1); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_VFS_OPEN_ERR, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], 1); + } + + write_count_chart(NETDATA_VFS_CREATE, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], 1); + + if (em->mode < MODE_ENTRY) { + write_err_chart( + NETDATA_VFS_CREATE_ERR, + NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1); + } +} + +/** + * Read the hash table and store data to allocated vectors. + */ +static void read_global_table() +{ + uint64_t idx; + netdata_idx_t res[NETDATA_VFS_COUNTER]; + + netdata_idx_t *val = vfs_hash_values; + int fd = vfs_maps[NETDATA_VFS_ALL].map_fd; + for (idx = 0; idx < NETDATA_VFS_COUNTER; idx++) { + uint64_t total = 0; + if (!bpf_map_lookup_elem(fd, &idx, val)) { + int i; + int end = ebpf_nprocs; + for (i = 0; i < end; i++) + total += val[i]; + } + res[idx] = total; + } + + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].ncall = res[NETDATA_KEY_CALLS_VFS_UNLINK]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].ncall = res[NETDATA_KEY_CALLS_VFS_READ] + + res[NETDATA_KEY_CALLS_VFS_READV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].ncall = res[NETDATA_KEY_CALLS_VFS_WRITE] + + res[NETDATA_KEY_CALLS_VFS_WRITEV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].ncall = res[NETDATA_KEY_CALLS_VFS_FSYNC]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].ncall = res[NETDATA_KEY_CALLS_VFS_OPEN]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].ncall = res[NETDATA_KEY_CALLS_VFS_CREATE]; + + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].nerr = res[NETDATA_KEY_ERROR_VFS_UNLINK]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].nerr = res[NETDATA_KEY_ERROR_VFS_READ] + + res[NETDATA_KEY_ERROR_VFS_READV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].nerr = res[NETDATA_KEY_ERROR_VFS_WRITE] + + res[NETDATA_KEY_ERROR_VFS_WRITEV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].nerr = res[NETDATA_KEY_ERROR_VFS_FSYNC]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].nerr = res[NETDATA_KEY_ERROR_VFS_OPEN]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].nerr = res[NETDATA_KEY_ERROR_VFS_CREATE]; + + vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_WRITE].bytes = (uint64_t)res[NETDATA_KEY_BYTES_VFS_WRITE] + + (uint64_t)res[NETDATA_KEY_BYTES_VFS_WRITEV]; + vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_READ].bytes = (uint64_t)res[NETDATA_KEY_BYTES_VFS_READ] + + (uint64_t)res[NETDATA_KEY_BYTES_VFS_READV]; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param swap output structure + * @param root link list with structure to be used + */ +static void ebpf_vfs_sum_pids(netdata_publish_vfs_t *vfs, struct pid_on_target *root) +{ + netdata_publish_vfs_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (root) { + int32_t pid = root->pid; + netdata_publish_vfs_t *w = vfs_pid[pid]; + if (w) { + accumulator.write_call += w->write_call; + accumulator.writev_call += w->writev_call; + accumulator.read_call += w->read_call; + accumulator.readv_call += w->readv_call; + accumulator.unlink_call += w->unlink_call; + accumulator.fsync_call += w->fsync_call; + accumulator.open_call += w->open_call; + accumulator.create_call += w->create_call; + + accumulator.write_bytes += w->write_bytes; + accumulator.writev_bytes += w->writev_bytes; + accumulator.read_bytes += w->read_bytes; + accumulator.readv_bytes += w->readv_bytes; + + accumulator.write_err += w->write_err; + accumulator.writev_err += w->writev_err; + accumulator.read_err += w->read_err; + accumulator.readv_err += w->readv_err; + accumulator.unlink_err += w->unlink_err; + accumulator.fsync_err += w->fsync_err; + accumulator.open_err += w->open_err; + accumulator.create_err += w->create_err; + } + root = root->next; + } + + // These conditions were added, because we are using incremental algorithm + vfs->write_call = (accumulator.write_call >= vfs->write_call) ? accumulator.write_call : vfs->write_call; + vfs->writev_call = (accumulator.writev_call >= vfs->writev_call) ? accumulator.writev_call : vfs->writev_call; + vfs->read_call = (accumulator.read_call >= vfs->read_call) ? accumulator.read_call : vfs->read_call; + vfs->readv_call = (accumulator.readv_call >= vfs->readv_call) ? accumulator.readv_call : vfs->readv_call; + vfs->unlink_call = (accumulator.unlink_call >= vfs->unlink_call) ? accumulator.unlink_call : vfs->unlink_call; + vfs->fsync_call = (accumulator.fsync_call >= vfs->fsync_call) ? accumulator.fsync_call : vfs->fsync_call; + vfs->open_call = (accumulator.open_call >= vfs->open_call) ? accumulator.open_call : vfs->open_call; + vfs->create_call = (accumulator.create_call >= vfs->create_call) ? accumulator.create_call : vfs->create_call; + + vfs->write_bytes = (accumulator.write_bytes >= vfs->write_bytes) ? accumulator.write_bytes : vfs->write_bytes; + vfs->writev_bytes = (accumulator.writev_bytes >= vfs->writev_bytes) ? accumulator.writev_bytes : vfs->writev_bytes; + vfs->read_bytes = (accumulator.read_bytes >= vfs->read_bytes) ? accumulator.read_bytes : vfs->read_bytes; + vfs->readv_bytes = (accumulator.readv_bytes >= vfs->readv_bytes) ? accumulator.readv_bytes : vfs->readv_bytes; + + vfs->write_err = (accumulator.write_err >= vfs->write_err) ? accumulator.write_err : vfs->write_err; + vfs->writev_err = (accumulator.writev_err >= vfs->writev_err) ? accumulator.writev_err : vfs->writev_err; + vfs->read_err = (accumulator.read_err >= vfs->read_err) ? accumulator.read_err : vfs->read_err; + vfs->readv_err = (accumulator.readv_err >= vfs->readv_err) ? accumulator.readv_err : vfs->readv_err; + vfs->unlink_err = (accumulator.unlink_err >= vfs->unlink_err) ? accumulator.unlink_err : vfs->unlink_err; + vfs->fsync_err = (accumulator.fsync_err >= vfs->fsync_err) ? accumulator.fsync_err : vfs->fsync_err; + vfs->open_err = (accumulator.open_err >= vfs->open_err) ? accumulator.open_err : vfs->open_err; + vfs->create_err = (accumulator.create_err >= vfs->create_err) ? accumulator.create_err : vfs->create_err; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + * @param root the target list. + */ +void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_vfs_sum_pids(&w->vfs, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.unlink_call); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.write_call + w->vfs.writev_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.write_err + w->vfs.writev_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.read_call + w->vfs.readv_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.read_err + w->vfs.readv_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.write_bytes + w->vfs.writev_bytes); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.read_bytes + w->vfs.readv_bytes); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.fsync_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.fsync_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.create_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.create_err); + } + } + write_end_chart(); + } +} + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void vfs_apps_accumulator(netdata_publish_vfs_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_publish_vfs_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_publish_vfs_t *w = &out[i]; + + total->write_call += w->write_call; + total->writev_call += w->writev_call; + total->read_call += w->read_call; + total->readv_call += w->readv_call; + total->unlink_call += w->unlink_call; + + total->write_bytes += w->write_bytes; + total->writev_bytes += w->writev_bytes; + total->read_bytes += w->read_bytes; + total->readv_bytes += w->readv_bytes; + + total->write_err += w->write_err; + total->writev_err += w->writev_err; + total->read_err += w->read_err; + total->readv_err += w->readv_err; + total->unlink_err += w->unlink_err; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void vfs_fill_pid(uint32_t current_pid, netdata_publish_vfs_t *publish) +{ + netdata_publish_vfs_t *curr = vfs_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_vfs_t)); + vfs_pid[current_pid] = curr; + } + + memcpy(curr, &publish[0], sizeof(netdata_publish_vfs_t)); +} + +/** + * Read the hash table and store data to allocated vectors. + */ +static void ebpf_vfs_read_apps() +{ + struct pid_stat *pids = root_of_pids; + netdata_publish_vfs_t *vv = vfs_vector; + int fd = vfs_maps[NETDATA_VFS_PID].map_fd; + size_t length = sizeof(netdata_publish_vfs_t) * ebpf_nprocs; + while (pids) { + uint32_t key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, vv)) { + pids = pids->next; + continue; + } + + vfs_apps_accumulator(vv); + + vfs_fill_pid(key, vv); + + // We are cleaning to avoid passing data read from one process to other. + memset(vv, 0, length); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void read_update_vfs_cgroup() +{ + ebpf_cgroup_target_t *ect ; + netdata_publish_vfs_t *vv = vfs_vector; + int fd = vfs_maps[NETDATA_VFS_PID].map_fd; + size_t length = sizeof(netdata_publish_vfs_t) * ebpf_nprocs; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_publish_vfs_t *out = &pids->vfs; + if (likely(vfs_pid) && vfs_pid[pid]) { + netdata_publish_vfs_t *in = vfs_pid[pid]; + + memcpy(out, in, sizeof(netdata_publish_vfs_t)); + } else { + memset(vv, 0, length); + if (!bpf_map_lookup_elem(fd, &pid, vv)) { + vfs_apps_accumulator(vv); + + memcpy(out, vv, sizeof(netdata_publish_vfs_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * VFS read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_vfs_read_hash(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_vfs_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_VFS_SLEEP_MS * em->update_every; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + + read_global_table(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param vfs structure used to store data + * @param pids input data + */ +static void ebpf_vfs_sum_cgroup_pids(netdata_publish_vfs_t *vfs, struct pid_on_target2 *pids) + { + netdata_publish_vfs_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + netdata_publish_vfs_t *w = &pids->vfs; + + accumulator.write_call += w->write_call; + accumulator.writev_call += w->writev_call; + accumulator.read_call += w->read_call; + accumulator.readv_call += w->readv_call; + accumulator.unlink_call += w->unlink_call; + accumulator.fsync_call += w->fsync_call; + accumulator.open_call += w->open_call; + accumulator.create_call += w->create_call; + + accumulator.write_bytes += w->write_bytes; + accumulator.writev_bytes += w->writev_bytes; + accumulator.read_bytes += w->read_bytes; + accumulator.readv_bytes += w->readv_bytes; + + accumulator.write_err += w->write_err; + accumulator.writev_err += w->writev_err; + accumulator.read_err += w->read_err; + accumulator.readv_err += w->readv_err; + accumulator.unlink_err += w->unlink_err; + accumulator.fsync_err += w->fsync_err; + accumulator.open_err += w->open_err; + accumulator.create_err += w->create_err; + + pids = pids->next; + } + + // These conditions were added, because we are using incremental algorithm + vfs->write_call = (accumulator.write_call >= vfs->write_call) ? accumulator.write_call : vfs->write_call; + vfs->writev_call = (accumulator.writev_call >= vfs->writev_call) ? accumulator.writev_call : vfs->writev_call; + vfs->read_call = (accumulator.read_call >= vfs->read_call) ? accumulator.read_call : vfs->read_call; + vfs->readv_call = (accumulator.readv_call >= vfs->readv_call) ? accumulator.readv_call : vfs->readv_call; + vfs->unlink_call = (accumulator.unlink_call >= vfs->unlink_call) ? accumulator.unlink_call : vfs->unlink_call; + vfs->fsync_call = (accumulator.fsync_call >= vfs->fsync_call) ? accumulator.fsync_call : vfs->fsync_call; + vfs->open_call = (accumulator.open_call >= vfs->open_call) ? accumulator.open_call : vfs->open_call; + vfs->create_call = (accumulator.create_call >= vfs->create_call) ? accumulator.create_call : vfs->create_call; + + vfs->write_bytes = (accumulator.write_bytes >= vfs->write_bytes) ? accumulator.write_bytes : vfs->write_bytes; + vfs->writev_bytes = (accumulator.writev_bytes >= vfs->writev_bytes) ? accumulator.writev_bytes : vfs->writev_bytes; + vfs->read_bytes = (accumulator.read_bytes >= vfs->read_bytes) ? accumulator.read_bytes : vfs->read_bytes; + vfs->readv_bytes = (accumulator.readv_bytes >= vfs->readv_bytes) ? accumulator.readv_bytes : vfs->readv_bytes; + + vfs->write_err = (accumulator.write_err >= vfs->write_err) ? accumulator.write_err : vfs->write_err; + vfs->writev_err = (accumulator.writev_err >= vfs->writev_err) ? accumulator.writev_err : vfs->writev_err; + vfs->read_err = (accumulator.read_err >= vfs->read_err) ? accumulator.read_err : vfs->read_err; + vfs->readv_err = (accumulator.readv_err >= vfs->readv_err) ? accumulator.readv_err : vfs->readv_err; + vfs->unlink_err = (accumulator.unlink_err >= vfs->unlink_err) ? accumulator.unlink_err : vfs->unlink_err; + vfs->fsync_err = (accumulator.fsync_err >= vfs->fsync_err) ? accumulator.fsync_err : vfs->fsync_err; + vfs->open_err = (accumulator.open_err >= vfs->open_err) ? accumulator.open_err : vfs->open_err; + vfs->create_err = (accumulator.create_err >= vfs->create_err) ? accumulator.create_err : vfs->create_err; +} + +/** + * Create specific VFS charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em) +{ + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED,"Files deleted", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_UNLINK_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5500, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_WRITE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5501, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5502, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_READ_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5503, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5504, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5505, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5506, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_FSYNC_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5508, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for vfs_open", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_OPEN_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_OPEN_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5510, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for vfs_create", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_CREATE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_CREATE_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5512, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } +} + +/** + * Obsolete specific VFS charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "Files deleted", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_UNLINK_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5500, em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5501, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5502, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5503, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5504, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5505, em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5506, em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_FSYNC_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5508, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for vfs_open", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_OPEN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_OPEN_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5510, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for vfs_create", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_CREATE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_CREATE_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5512, em->update_every); + } +} + +/* + * Send specific VFS data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_vfs_data(char *type, netdata_publish_vfs_t *values, ebpf_module_t *em) +{ + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].name, (long long)values->unlink_call); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, + (long long)values->write_call + (long long)values->writev_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, + (long long)values->write_err + (long long)values->writev_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, + (long long)values->read_call + (long long)values->readv_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, + (long long)values->read_err + (long long)values->readv_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, + (long long)values->write_bytes + (long long)values->writev_bytes); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, + (long long)values->read_bytes + (long long)values->readv_bytes); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name, + (long long)values->fsync_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name, + (long long)values->fsync_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name, + (long long)values->open_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name, + (long long)values->open_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name, + (long long)values->create_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name, + (long long)values->create_err); + write_end_chart(); + } +} + +/** + * Create Systemd Socket Charts + * + * Create charts when systemd is enabled + * + * @param em the main collector structure + **/ +static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em) +{ + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_DELETED, "Files deleted", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20065, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20066, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_WRITE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20067, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20068, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_READ_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20069, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20070, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20071, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20072, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20073, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to vfs_open", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20074, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_OPEN_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20075, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_OPEN_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to vfs_create", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20076, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_CREATE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20077, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_CREATE_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @param em the main collector structure + */ +static void ebpf_send_systemd_vfs_charts(ebpf_module_t *em) +{ + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.unlink_call); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_call + + ect->publish_systemd_vfs.writev_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_err + + ect->publish_systemd_vfs.writev_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_call + + ect->publish_systemd_vfs.readv_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_err + + ect->publish_systemd_vfs.readv_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_bytes + + ect->publish_systemd_vfs.writev_bytes); + } + } + write_end_chart(); + + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_bytes + + ect->publish_systemd_vfs.readv_bytes); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_err); + } + } + write_end_chart(); + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the main collector structure +*/ +static void ebpf_vfs_send_cgroup_data(ebpf_module_t *em) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_vfs_sum_cgroup_pids(&ect->publish_systemd_vfs, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + if (send_cgroup_chart) { + ebpf_create_systemd_vfs_charts(em); + } + ebpf_send_systemd_vfs_charts(em); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_VFS_CHART) && ect->updated) { + ebpf_create_specific_vfs_charts(ect->name, em); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_VFS_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_VFS_CHART) { + if (ect->updated) { + ebpf_send_specific_vfs_data(ect->name, &ect->publish_systemd_vfs, em); + } else { + ebpf_obsolete_specific_vfs_charts(ect->name, em); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_VFS_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Main loop for this collector. + * + * @param step the number of microseconds used with heart beat + * @param em the structure with thread information + */ +static void vfs_collector(ebpf_module_t *em) +{ + vfs_threads.thread = mallocz(sizeof(netdata_thread_t)); + vfs_threads.start_routine = ebpf_vfs_read_hash; + + netdata_thread_create(vfs_threads.thread, vfs_threads.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_vfs_read_hash, em); + + int cgroups = em->cgroup_charts; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + ebpf_vfs_read_apps(); + + if (cgroups) + read_update_vfs_cgroup(); + + pthread_mutex_lock(&lock); + + ebpf_vfs_send_data(em); + fflush(stdout); + + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_vfs_send_apps_data(em, apps_groups_root_target); + + if (cgroups) + ebpf_vfs_send_cgroup_data(em); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * FUNCTIONS TO CREATE CHARTS + * + *****************************************************************/ + +/** + * Create IO chart + * + * @param family the chart family + * @param name the chart name + * @param axis the axis label + * @param web the group name used to attach the chart on dashboard + * @param order the order number of the specified chart + * @param algorithm the algorithm used to make the charts. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_io_chart(char *family, char *name, char *axis, char *web, + int order, int algorithm, int update_every) +{ + printf("CHART %s.%s '' 'Bytes written and read' '%s' '%s' '' line %d %d '' 'ebpf.plugin' 'filesystem'\n", + family, + name, + axis, + web, + order, + update_every); + + printf("DIMENSION %s %s %s 1 1\n", + vfs_id_names[NETDATA_KEY_PUBLISH_VFS_READ], + vfs_dimension_names[NETDATA_KEY_PUBLISH_VFS_READ], + ebpf_algorithms[algorithm]); + printf("DIMENSION %s %s %s -1 1\n", + vfs_id_names[NETDATA_KEY_PUBLISH_VFS_WRITE], + vfs_dimension_names[NETDATA_KEY_PUBLISH_VFS_WRITE], + ebpf_algorithms[algorithm]); +} + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_global_charts(ebpf_module_t *em) +{ + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_CLEAN_COUNT, + "Remove files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_CLEAN, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_IO_COUNT, + "Calls to IO", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_COUNT, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_io_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_IO_FILE_BYTES, EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_BYTES, + NETDATA_EBPF_INCREMENTAL_IDX, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_ERR_COUNT, + "Fails to write or read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EBYTES, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC, + "Calls for vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_FSYNC, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC_ERR, + "Fails to synchronize", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EFSYNC, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN, + "Calls for vfs_open", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_OPEN, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN_ERR, + "Fails to open a file", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EOPEN, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE, + "Calls for vfs_create", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_CREATE, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE_ERR, + "Fails to create a file.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_ECREATE, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } +} + +/** + * Create process apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + * @param ptr a pointer for the targets. + **/ +void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_DELETED, + "Files deleted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20065, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "Write to disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20066, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "Fails to write", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20067, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "Read from disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20068, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "Fails to read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20069, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20070, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20071, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC, + "Calls for vfs_fsync", EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20072, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "Sync error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20073, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN, + "Calls for vfs_open", EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20074, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "Open error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20075, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE, + "Calls for vfs_create", EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20076, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "Create error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20077, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; +} + +/***************************************************************** + * + * FUNCTIONS TO START THREAD + * + *****************************************************************/ + +/** + * Allocate vectors used with this thread. + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_vfs_allocate_global_vectors(int apps) +{ + memset(vfs_aggregated_data, 0, sizeof(vfs_aggregated_data)); + memset(vfs_publish_aggregated, 0, sizeof(vfs_publish_aggregated)); + + vfs_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + vfs_vector = callocz(ebpf_nprocs, sizeof(netdata_publish_vfs_t)); + + if (apps) + vfs_pid = callocz((size_t)pid_max, sizeof(netdata_publish_vfs_t *)); +} + +/***************************************************************** + * + * EBPF VFS THREAD + * + *****************************************************************/ + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + */ +static int ebpf_vfs_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_EBPF_VFS_WRITE].mode); + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + bpf_obj = vfs_bpf__open(); + if (!bpf_obj) + ret = -1; + else + ret = ebpf_vfs_load_and_attach(bpf_obj, em); + } +#endif + + return ret; +} + +/** + * Process thread + * + * Thread used to generate process charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_vfs_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_vfs_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = vfs_maps; + + ebpf_update_pid_table(&vfs_maps[NETDATA_VFS_PID], em); + + ebpf_vfs_allocate_global_vectors(em->apps_charts); + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_vfs_load_bpf(em)) { + em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED; + goto endvfs; + } + + int algorithms[NETDATA_KEY_PUBLISH_VFS_END] = { + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,NETDATA_EBPF_INCREMENTAL_IDX + }; + + ebpf_global_labels(vfs_aggregated_data, vfs_publish_aggregated, vfs_dimension_names, + vfs_id_names, algorithms, NETDATA_KEY_PUBLISH_VFS_END); + + pthread_mutex_lock(&lock); + ebpf_create_global_charts(em); + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&lock); + + vfs_collector(em); + +endvfs: + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_vfs.h b/collectors/ebpf.plugin/ebpf_vfs.h new file mode 100644 index 0000000..2e3c7cc --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_vfs.h @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_VFS_H +#define NETDATA_EBPF_VFS_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_VFS "vfs" + +#define NETDATA_DIRECTORY_VFS_CONFIG_FILE "vfs.conf" + +#define NETDATA_LATENCY_VFS_SLEEP_MS 750000ULL + +// Global chart name +#define NETDATA_VFS_FILE_CLEAN_COUNT "vfs_deleted_objects" +#define NETDATA_VFS_FILE_IO_COUNT "vfs_io" +#define NETDATA_VFS_FILE_ERR_COUNT "vfs_io_error" +#define NETDATA_VFS_IO_FILE_BYTES "vfs_io_bytes" +#define NETDATA_VFS_FSYNC "vfs_fsync" +#define NETDATA_VFS_FSYNC_ERR "vfs_fsync_error" +#define NETDATA_VFS_OPEN "vfs_open" +#define NETDATA_VFS_OPEN_ERR "vfs_open_error" +#define NETDATA_VFS_CREATE "vfs_create" +#define NETDATA_VFS_CREATE_ERR "vfs_create_error" + +// Charts created on Apps submenu +#define NETDATA_SYSCALL_APPS_FILE_DELETED "file_deleted" +#define NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS "vfs_write_call" +#define NETDATA_SYSCALL_APPS_VFS_READ_CALLS "vfs_read_call" +#define NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES "vfs_write_bytes" +#define NETDATA_SYSCALL_APPS_VFS_READ_BYTES "vfs_read_bytes" +#define NETDATA_SYSCALL_APPS_VFS_FSYNC "vfs_fsync" +#define NETDATA_SYSCALL_APPS_VFS_OPEN "vfs_open" +#define NETDATA_SYSCALL_APPS_VFS_CREATE "vfs_create" + +#define NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR "vfs_write_error" +#define NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR "vfs_read_error" +#define NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR "vfs_fsync_error" +#define NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR "vfs_open_error" +#define NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR "vfs_create_error" + +// Group used on Dashboard +#define NETDATA_VFS_GROUP "vfs" +#define NETDATA_VFS_CGROUP_GROUP "vfs (eBPF)" + +// Contexts +#define NETDATA_CGROUP_VFS_UNLINK_CONTEXT "cgroup.vfs_unlink" +#define NETDATA_CGROUP_VFS_WRITE_CONTEXT "cgroup.vfs_write" +#define NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT "cgroup.vfs_write_error" +#define NETDATA_CGROUP_VFS_READ_CONTEXT "cgroup.vfs_read" +#define NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT "cgroup.vfs_read_error" +#define NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT "cgroup.vfs_write_bytes" +#define NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT "cgroup.vfs_read_bytes" +#define NETDATA_CGROUP_VFS_CREATE_CONTEXT "cgroup.vfs_create" +#define NETDATA_CGROUP_VFS_CREATE_ERROR_CONTEXT "cgroup.vfs_create_error" +#define NETDATA_CGROUP_VFS_OPEN_CONTEXT "cgroup.vfs_open" +#define NETDATA_CGROUP_VFS_OPEN_ERROR_CONTEXT "cgroup.vfs_open_error" +#define NETDATA_CGROUP_VFS_FSYNC_CONTEXT "cgroup.vfs_fsync" +#define NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT "cgroup.vfs_fsync_error" + +#define NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT "services.vfs_unlink" +#define NETDATA_SYSTEMD_VFS_WRITE_CONTEXT "services.vfs_write" +#define NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT "services.vfs_write_error" +#define NETDATA_SYSTEMD_VFS_READ_CONTEXT "services.vfs_read" +#define NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT "services.vfs_read_error" +#define NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT "services.vfs_write_bytes" +#define NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT "services.vfs_read_bytes" +#define NETDATA_SYSTEMD_VFS_CREATE_CONTEXT "services.vfs_create" +#define NETDATA_SYSTEMD_VFS_CREATE_ERROR_CONTEXT "services.vfs_create_error" +#define NETDATA_SYSTEMD_VFS_OPEN_CONTEXT "services.vfs_open" +#define NETDATA_SYSTEMD_VFS_OPEN_ERROR_CONTEXT "services.vfs_open_error" +#define NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT "services.vfs_fsync" +#define NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT "services.vfs_fsync_error" + +typedef struct netdata_publish_vfs { + uint64_t pid_tgid; + uint32_t pid; + uint32_t pad; + + //Counter + uint32_t write_call; + uint32_t writev_call; + uint32_t read_call; + uint32_t readv_call; + uint32_t unlink_call; + uint32_t fsync_call; + uint32_t open_call; + uint32_t create_call; + + //Accumulator + uint64_t write_bytes; + uint64_t writev_bytes; + uint64_t readv_bytes; + uint64_t read_bytes; + + //Counter + uint32_t write_err; + uint32_t writev_err; + uint32_t read_err; + uint32_t readv_err; + uint32_t unlink_err; + uint32_t fsync_err; + uint32_t open_err; + uint32_t create_err; +} netdata_publish_vfs_t; + +enum netdata_publish_vfs_list { + NETDATA_KEY_PUBLISH_VFS_UNLINK, + NETDATA_KEY_PUBLISH_VFS_READ, + NETDATA_KEY_PUBLISH_VFS_WRITE, + NETDATA_KEY_PUBLISH_VFS_FSYNC, + NETDATA_KEY_PUBLISH_VFS_OPEN, + NETDATA_KEY_PUBLISH_VFS_CREATE, + + NETDATA_KEY_PUBLISH_VFS_END +}; + +enum vfs_counters { + NETDATA_KEY_CALLS_VFS_WRITE, + NETDATA_KEY_ERROR_VFS_WRITE, + NETDATA_KEY_BYTES_VFS_WRITE, + + NETDATA_KEY_CALLS_VFS_WRITEV, + NETDATA_KEY_ERROR_VFS_WRITEV, + NETDATA_KEY_BYTES_VFS_WRITEV, + + NETDATA_KEY_CALLS_VFS_READ, + NETDATA_KEY_ERROR_VFS_READ, + NETDATA_KEY_BYTES_VFS_READ, + + NETDATA_KEY_CALLS_VFS_READV, + NETDATA_KEY_ERROR_VFS_READV, + NETDATA_KEY_BYTES_VFS_READV, + + NETDATA_KEY_CALLS_VFS_UNLINK, + NETDATA_KEY_ERROR_VFS_UNLINK, + + NETDATA_KEY_CALLS_VFS_FSYNC, + NETDATA_KEY_ERROR_VFS_FSYNC, + + NETDATA_KEY_CALLS_VFS_OPEN, + NETDATA_KEY_ERROR_VFS_OPEN, + + NETDATA_KEY_CALLS_VFS_CREATE, + NETDATA_KEY_ERROR_VFS_CREATE, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_VFS_COUNTER +}; + +enum netdata_vfs_tables { + NETDATA_VFS_PID, + NETDATA_VFS_ALL, + NETDATA_VFS_CTRL +}; + +enum netdata_vfs_calls_name { + NETDATA_EBPF_VFS_WRITE, + NETDATA_EBPF_VFS_WRITEV, + NETDATA_EBPF_VFS_READ, + NETDATA_EBPF_VFS_READV, + NETDATA_EBPF_VFS_UNLINK, + NETDATA_EBPF_VFS_FSYNC, + NETDATA_EBPF_VFS_OPEN, + NETDATA_EBPF_VFS_CREATE, + + NETDATA_VFS_END_LIST +}; + +extern netdata_publish_vfs_t **vfs_pid; + +void *ebpf_vfs_thread(void *ptr); +void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr); +extern netdata_ebpf_targets_t vfs_targets[]; + +extern struct config vfs_config; + +#endif /* NETDATA_EBPF_VFS_H */ diff --git a/collectors/fping.plugin/Makefile.am b/collectors/fping.plugin/Makefile.am new file mode 100644 index 0000000..9065483 --- /dev/null +++ b/collectors/fping.plugin/Makefile.am @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +CLEANFILES = \ + fping.plugin \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_plugins_SCRIPTS = \ + fping.plugin \ + $(NULL) + +dist_noinst_DATA = \ + fping.plugin.in \ + README.md \ + $(NULL) + +dist_libconfig_DATA = \ + fping.conf \ + $(NULL) diff --git a/collectors/fping.plugin/README.md b/collectors/fping.plugin/README.md new file mode 100644 index 0000000..e32d391 --- /dev/null +++ b/collectors/fping.plugin/README.md @@ -0,0 +1,110 @@ + + +# fping.plugin + +The fping plugin supports monitoring latency, packet loss and uptime of any number of network end points, +by pinging them with `fping`. + +This plugin requires version 5.1 or newer of `fping` (earlier versions may or may not work). Our static builds and +Docker images come bundled with a known working version of `fping`. Native packages and local builds will need to +have a working version installed before the plugin is usable. + +## Installing fping locally + +If your distribution’s repositories do not include a working version of `fping`, the supplied plugin can install +it, by running: + +```sh +/usr/libexec/netdata/plugins.d/fping.plugin install +``` + +The above will download, build and install the right version as `/usr/local/bin/fping`. This requires a working C +compiler, GNU autotools (at least autoconf and automake), and GNU make. On Debian or Ubuntu, you can pull in most +of the required tools by installing the `build-essential` package (this should include everything except automake +and autoconf). + +## Configuration + +Then you need to edit `/etc/netdata/fping.conf` (to edit it on your system run +`/etc/netdata/edit-config fping.conf`) like this: + +```sh +# set here all the hosts you need to ping +# I suggest to use hostnames and put their IPs in /etc/hosts +hosts="host1 host2 host3" + +# override the chart update frequency - the default is inherited from Netdata +update_every=1 + +# time in milliseconds (1 sec = 1000 ms) to ping the hosts +# 200 = 5 pings per second +ping_every=200 + +# other fping options - these are the defaults +fping_opts="-R -b 56 -i 1 -r 0 -t 5000" +``` + +## alarms + +Netdata will automatically attach a few alarms for each host. +Check the [latest versions of the fping alarms](https://raw.githubusercontent.com/netdata/netdata/master/health/health.d/fping.conf) + +## Additional Tips + +### Customizing Amount of Pings Per Second + +For example, to update the chart every 10 seconds and use 2 pings every 10 seconds, use this: + +```sh +# Chart Update Frequency (Time in Seconds) +update_every=10 + +# Time in Milliseconds (1 sec = 1000 ms) to Ping the Hosts +# The Following Example Sends 1 Ping Every 5000 ms +# Calculation Formula: ping_every = (update_every * 1000 ) / 2 +ping_every=5000 +``` + +### Multiple fping Plugins With Different Settings + +You may need to run multiple fping plugins with different settings for different end points. +For example, you may need to ping a few hosts 10 times per second, and others once per second. + +Netdata allows you to add as many `fping` plugins as you like. + +Follow this procedure: + +**1. Create New fping Configuration File** + +```sh +# Step Into Configuration Directory +cd /etc/netdata + +# Copy Original fping Configuration File To New Configuration File +cp fping.conf fping2.conf +``` + +Edit `fping2.conf` and set the settings and the hosts you need for the seconds instance. + +**2. Soft Link Original fping Plugin to New Plugin File** + +```sh +# Become root (If The Step Step Is Performed As Non-Root User) +sudo su + +# Step Into The Plugins Directory +cd /usr/libexec/netdata/plugins.d + +# Link fping.plugin to fping2.plugin +ln -s fping.plugin fping2.plugin +``` + +That's it. Netdata will detect the new plugin and start it. + +You can name the new plugin any name you like. +Just make sure the plugin and the configuration file have the same name. + + diff --git a/collectors/fping.plugin/fping.conf b/collectors/fping.plugin/fping.conf new file mode 100644 index 0000000..63a7f7a --- /dev/null +++ b/collectors/fping.plugin/fping.conf @@ -0,0 +1,44 @@ +# no need for shebang - this file is sourced from fping.plugin + +# fping.plugin requires a recent version of fping. +# +# You can get it on your system, by running: +# +# /usr/libexec/netdata/plugins.d/fping.plugin install + +# ----------------------------------------------------------------------------- +# configuration options + +# The fping binary to use. We need one that can output netdata friendly info +# (supporting: -N). If you have multiple versions, put here the full filename +# of the right one + +#fping="/usr/local/bin/fping" + + +# a space separated list of hosts to fping +# we suggest to put names here and the IPs of these names in /etc/hosts + +hosts="" + + +# The update frequency of the chart - the default is inherited from netdata + +#update_every=2 + + +# The time in milliseconds (1 sec = 1000 ms) to ping the hosts +# by default 5 pings per host per iteration +# fping will not allow this to be below 20ms + +#ping_every="200" + + +# other fping options - defaults: +# -R = send packets with random data +# -b 56 = the number of bytes per packet +# -i 1 = 1 ms when sending packets to others hosts (switching hosts) +# -r 0 = never retry packets +# -t 5000 = per packet timeout at 5000 ms + +#fping_opts="-R -b 56 -i 1 -r 0 -t 5000" diff --git a/collectors/fping.plugin/fping.plugin.in b/collectors/fping.plugin/fping.plugin.in new file mode 100755 index 0000000..4b3d1d1 --- /dev/null +++ b/collectors/fping.plugin/fping.plugin.in @@ -0,0 +1,202 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2017 Costa Tsaousis +# GPL v3+ +# +# This plugin requires a latest version of fping. +# You can compile it from source, by running me with option: install + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +if [ "${1}" = "install" ] + then + [ "${UID}" != 0 ] && echo >&2 "Please run me as root. This will install a single binary file: /usr/local/bin/fping." && exit 1 + + [ -z "${2}" ] && fping_version="5.1" || fping_version="${2}" + + run() { + printf >&2 " > " + printf >&2 "%q " "${@}" + printf >&2 "\n" + "${@}" || exit 1 + } + + download() { + local curl="$(which curl 2>/dev/null || command -v curl 2>/dev/null)" + [ ! -z "${curl}" ] && run curl -s -L "${1}" && return 0 + + local wget="$(which wget 2>/dev/null || command -v wget 2>/dev/null)" + [ ! -z "${wget}" ] && run wget -q -O - "${1}" && return 0 + + echo >&2 "Cannot find 'curl' or 'wget' in this system." && exit 1 + } + + [ ! -d /usr/src ] && run mkdir -p /usr/src + [ ! -d /usr/local/bin ] && run mkdir -p /usr/local/bin + + run cd /usr/src + + if [ -d "fping-${fping_version}" ] + then + run rm -rf "fping-${fping_version}" || exit 1 + fi + + download "https://github.com/schweikert/fping/releases/download/v${fping_version}/fping-${fping_version}.tar.gz" | run tar -zxvpf - + [ $? -ne 0 ] && exit 1 + run cd "fping-${fping_version}" || exit 1 + + run ./configure --prefix=/usr/local + run make clean + run make + if [ -f /usr/local/bin/fping ] + then + run mv -f /usr/local/bin/fping /usr/local/bin/fping.old + fi + run mv src/fping /usr/local/bin/fping + run chown root:root /usr/local/bin/fping + run chmod 4755 /usr/local/bin/fping + echo >&2 + echo >&2 "All done, you have a compatible fping now at /usr/local/bin/fping." + echo >&2 + + fping="$(which fping 2>/dev/null || command -v fping 2>/dev/null)" + if [ "${fping}" != "/usr/local/bin/fping" ] + then + echo >&2 "You have another fping installed at: ${fping}." + echo >&2 "Please set:" + echo >&2 + echo >&2 " fping=\"/usr/local/bin/fping\"" + echo >&2 + echo >&2 "at /etc/netdata/fping.conf" + echo >&2 + fi + exit 0 +fi + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + echo "DISABLE" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- + +# store in ${plugin} the name we run under +# this allows us to copy/link fping.plugin under a different name +# to have multiple fping plugins running with different settings +plugin="${PROGRAM_NAME/.plugin/}" + + +# ----------------------------------------------------------------------------- + +# the frequency to send info to netdata +# passed by netdata as the first parameter +update_every="${1-1}" + +# the netdata configuration directory +# passed by netdata as an environment variable +[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@" +[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" + +# ----------------------------------------------------------------------------- +# configuration options +# can be overwritten at /etc/netdata/fping.conf + +# the fping binary to use +# we need one that can output netdata friendly info (supporting: -N) +# if you have multiple versions, put here the full filename of the right one +fping="$( which fping 2>/dev/null || command -v fping 2>/dev/null )" + +# a space separated list of hosts to fping +# we suggest to put names here and the IPs of these names in /etc/hosts +hosts="" + +# the time in milliseconds (1 sec = 1000 ms) +# to ping the hosts - by default 5 pings per host per iteration +ping_every="$((update_every * 1000 / 5))" + +# fping options +fping_opts="-R -b 56 -i 1 -r 0 -t 5000" + +# ----------------------------------------------------------------------------- +# load the configuration files + +for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/${plugin}.conf" "${NETDATA_USER_CONFIG_DIR}/${plugin}.conf" +do + if [ -f "${CONFIG}" ] + then + info "Loading config file '${CONFIG}'..." + source "${CONFIG}" + [ $? -ne 0 ] && error "Failed to load config file '${CONFIG}'." + else + warning "Cannot find file '${CONFIG}'." + fi +done + +if [ -z "${hosts}" ] +then + fatal "no hosts configured - nothing to do." +fi + +if [ -z "${fping}" ] +then + fatal "fping command is not found. Please set its full path in '${NETDATA_USER_CONFIG_DIR}/${plugin}.conf'" +fi + +if [ ! -x "${fping}" ] +then + fatal "fping command '${fping}' is not executable - cannot proceed." +fi + +if [ ${ping_every} -lt 20 ] + then + warning "ping every was set to ${ping_every} but 20 is the minimum for non-root users. Setting it to 20 ms." + ping_every=20 +fi + +# the fping options we will use +options=( -N -l -Q ${update_every} -p ${ping_every} ${fping_opts} ${hosts} ) + +# execute fping +info "starting fping: ${fping} ${options[*]}" +exec "${fping}" "${options[@]}" + +# if we cannot execute fping, stop +fatal "command '${fping} ${options[*]}' failed to be executed (returned code $?)." diff --git a/collectors/freebsd.plugin/Makefile.am b/collectors/freebsd.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/freebsd.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/freebsd.plugin/README.md b/collectors/freebsd.plugin/README.md new file mode 100644 index 0000000..9a97a7e --- /dev/null +++ b/collectors/freebsd.plugin/README.md @@ -0,0 +1,12 @@ + + +# freebsd.plugin + +Collects resource usage and performance data on FreeBSD systems + +By default, Netdata will enable monitoring metrics for disks, memory, and network only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. + + diff --git a/collectors/freebsd.plugin/freebsd_devstat.c b/collectors/freebsd.plugin/freebsd_devstat.c new file mode 100644 index 0000000..0f03774 --- /dev/null +++ b/collectors/freebsd.plugin/freebsd_devstat.c @@ -0,0 +1,759 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" + +#include + +struct disk { + char *name; + uint32_t hash; + size_t len; + + // flags + int configured; + int enabled; + int updated; + + int do_io; + int do_ops; + int do_qops; + int do_util; + int do_iotime; + int do_await; + int do_avagsz; + int do_svctm; + + + // data for differential charts + + struct prev_dstat { + collected_number bytes_read; + collected_number bytes_write; + collected_number bytes_free; + collected_number operations_read; + collected_number operations_write; + collected_number operations_other; + collected_number operations_free; + collected_number duration_read_ms; + collected_number duration_write_ms; + collected_number duration_other_ms; + collected_number duration_free_ms; + collected_number busy_time_ms; + } prev_dstat; + + // charts and dimensions + + RRDSET *st_io; + RRDDIM *rd_io_in; + RRDDIM *rd_io_out; + RRDDIM *rd_io_free; + + RRDSET *st_ops; + RRDDIM *rd_ops_in; + RRDDIM *rd_ops_out; + RRDDIM *rd_ops_other; + RRDDIM *rd_ops_free; + + RRDSET *st_qops; + RRDDIM *rd_qops; + + RRDSET *st_util; + RRDDIM *rd_util; + + RRDSET *st_iotime; + RRDDIM *rd_iotime_in; + RRDDIM *rd_iotime_out; + RRDDIM *rd_iotime_other; + RRDDIM *rd_iotime_free; + + RRDSET *st_await; + RRDDIM *rd_await_in; + RRDDIM *rd_await_out; + RRDDIM *rd_await_other; + RRDDIM *rd_await_free; + + RRDSET *st_avagsz; + RRDDIM *rd_avagsz_in; + RRDDIM *rd_avagsz_out; + RRDDIM *rd_avagsz_free; + + RRDSET *st_svctm; + RRDDIM *rd_svctm; + + struct disk *next; +}; + +static struct disk *disks_root = NULL, *disks_last_used = NULL; + +static size_t disks_added = 0, disks_found = 0; + +static void disk_free(struct disk *dm) { + if (likely(dm->st_io)) + rrdset_is_obsolete(dm->st_io); + if (likely(dm->st_ops)) + rrdset_is_obsolete(dm->st_ops); + if (likely(dm->st_qops)) + rrdset_is_obsolete(dm->st_qops); + if (likely(dm->st_util)) + rrdset_is_obsolete(dm->st_util); + if (likely(dm->st_iotime)) + rrdset_is_obsolete(dm->st_iotime); + if (likely(dm->st_await)) + rrdset_is_obsolete(dm->st_await); + if (likely(dm->st_avagsz)) + rrdset_is_obsolete(dm->st_avagsz); + if (likely(dm->st_svctm)) + rrdset_is_obsolete(dm->st_svctm); + + disks_added--; + freez(dm->name); + freez(dm); +} + +static void disks_cleanup() { + if (likely(disks_found == disks_added)) return; + + struct disk *dm = disks_root, *last = NULL; + while(dm) { + if (unlikely(!dm->updated)) { + // info("Removing disk '%s', linked after '%s'", dm->name, last?last->name:"ROOT"); + + if (disks_last_used == dm) + disks_last_used = last; + + struct disk *t = dm; + + if (dm == disks_root || !last) + disks_root = dm = dm->next; + + else + last->next = dm = dm->next; + + t->next = NULL; + disk_free(t); + } + else { + last = dm; + dm->updated = 0; + dm = dm->next; + } + } +} + +static struct disk *get_disk(const char *name) { + struct disk *dm; + + uint32_t hash = simple_hash(name); + + // search it, from the last position to the end + for(dm = disks_last_used ; dm ; dm = dm->next) { + if (unlikely(hash == dm->hash && !strcmp(name, dm->name))) { + disks_last_used = dm->next; + return dm; + } + } + + // search it from the beginning to the last position we used + for(dm = disks_root ; dm != disks_last_used ; dm = dm->next) { + if (unlikely(hash == dm->hash && !strcmp(name, dm->name))) { + disks_last_used = dm->next; + return dm; + } + } + + // create a new one + dm = callocz(1, sizeof(struct disk)); + dm->name = strdupz(name); + dm->hash = simple_hash(dm->name); + dm->len = strlen(dm->name); + disks_added++; + + // link it to the end + if (disks_root) { + struct disk *e; + for(e = disks_root; e->next ; e = e->next) ; + e->next = dm; + } + else + disks_root = dm; + + return dm; +} + +// -------------------------------------------------------------------------------------------------------------------- +// kern.devstat + +int do_kern_devstat(int update_every, usec_t dt) { + +#define DEFAULT_EXCLUDED_DISKS "" +#define CONFIG_SECTION_KERN_DEVSTAT "plugin:freebsd:kern.devstat" +#define BINTIME_SCALE 5.42101086242752217003726400434970855712890625e-17 // this is 1000/2^64 + + static int enable_new_disks = -1; + static int enable_pass_devices = -1, do_system_io = -1, do_io = -1, do_ops = -1, do_qops = -1, do_util = -1, + do_iotime = -1, do_await = -1, do_avagsz = -1, do_svctm = -1; + static SIMPLE_PATTERN *excluded_disks = NULL; + + if (unlikely(enable_new_disks == -1)) { + enable_new_disks = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, + "enable new disks detected at runtime", CONFIG_BOOLEAN_AUTO); + + enable_pass_devices = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, + "performance metrics for pass devices", CONFIG_BOOLEAN_AUTO); + + do_system_io = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "total bandwidth for all disks", + CONFIG_BOOLEAN_YES); + + do_io = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "bandwidth for all disks", + CONFIG_BOOLEAN_AUTO); + do_ops = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "operations for all disks", + CONFIG_BOOLEAN_AUTO); + do_qops = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "queued operations for all disks", + CONFIG_BOOLEAN_AUTO); + do_util = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "utilization percentage for all disks", + CONFIG_BOOLEAN_AUTO); + do_iotime = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "i/o time for all disks", + CONFIG_BOOLEAN_AUTO); + do_await = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "average completed i/o time for all disks", + CONFIG_BOOLEAN_AUTO); + do_avagsz = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "average completed i/o bandwidth for all disks", + CONFIG_BOOLEAN_AUTO); + do_svctm = config_get_boolean_ondemand(CONFIG_SECTION_KERN_DEVSTAT, "average service time for all disks", + CONFIG_BOOLEAN_AUTO); + + excluded_disks = simple_pattern_create( + config_get(CONFIG_SECTION_KERN_DEVSTAT, "disable by default disks matching", DEFAULT_EXCLUDED_DISKS) + , NULL + , SIMPLE_PATTERN_EXACT + ); + } + + if (likely(do_system_io || do_io || do_ops || do_qops || do_util || do_iotime || do_await || do_avagsz || do_svctm)) { + static int mib_numdevs[3] = {0, 0, 0}; + int numdevs; + int common_error = 0; + + if (unlikely(GETSYSCTL_SIMPLE("kern.devstat.numdevs", mib_numdevs, numdevs))) { + common_error = 1; + } else { + static int mib_devstat[3] = {0, 0, 0}; + static void *devstat_data = NULL; + static int old_numdevs = 0; + + if (unlikely(numdevs != old_numdevs)) { + devstat_data = reallocz(devstat_data, sizeof(long) + sizeof(struct devstat) * + numdevs); // there is generation number before devstat structures + old_numdevs = numdevs; + } + if (unlikely(GETSYSCTL_WSIZE("kern.devstat.all", mib_devstat, devstat_data, + sizeof(long) + sizeof(struct devstat) * numdevs))) { + common_error = 1; + } else { + struct devstat *dstat; + int i; + collected_number total_disk_kbytes_read = 0; + collected_number total_disk_kbytes_write = 0; + + disks_found = 0; + + dstat = (struct devstat*)((char*)devstat_data + sizeof(long)); // skip generation number + + for (i = 0; i < numdevs; i++) { + if (likely(do_system_io)) { + if (((dstat[i].device_type & DEVSTAT_TYPE_MASK) == DEVSTAT_TYPE_DIRECT) || + ((dstat[i].device_type & DEVSTAT_TYPE_MASK) == DEVSTAT_TYPE_STORARRAY)) { + total_disk_kbytes_read += dstat[i].bytes[DEVSTAT_READ] / KILO_FACTOR; + total_disk_kbytes_write += dstat[i].bytes[DEVSTAT_WRITE] / KILO_FACTOR; + } + } + + if (unlikely(!enable_pass_devices)) + if ((dstat[i].device_type & DEVSTAT_TYPE_PASS) == DEVSTAT_TYPE_PASS) + continue; + + if (((dstat[i].device_type & DEVSTAT_TYPE_MASK) == DEVSTAT_TYPE_DIRECT) || + ((dstat[i].device_type & DEVSTAT_TYPE_MASK) == DEVSTAT_TYPE_STORARRAY)) { + char disk[DEVSTAT_NAME_LEN + MAX_INT_DIGITS + 1]; + struct cur_dstat { + collected_number duration_read_ms; + collected_number duration_write_ms; + collected_number duration_other_ms; + collected_number duration_free_ms; + collected_number busy_time_ms; + } cur_dstat; + + sprintf(disk, "%s%d", dstat[i].device_name, dstat[i].unit_number); + + struct disk *dm = get_disk(disk); + dm->updated = 1; + disks_found++; + + if(unlikely(!dm->configured)) { + char var_name[4096 + 1]; + + // this is the first time we see this disk + + // remember we configured it + dm->configured = 1; + + dm->enabled = enable_new_disks; + + if (likely(dm->enabled)) + dm->enabled = !simple_pattern_matches(excluded_disks, disk); + + snprintfz(var_name, 4096, "%s:%s", CONFIG_SECTION_KERN_DEVSTAT, disk); + dm->enabled = config_get_boolean_ondemand(var_name, "enabled", dm->enabled); + + dm->do_io = config_get_boolean_ondemand(var_name, "bandwidth", do_io); + dm->do_ops = config_get_boolean_ondemand(var_name, "operations", do_ops); + dm->do_qops = config_get_boolean_ondemand(var_name, "queued operations", do_qops); + dm->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", do_util); + dm->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", do_iotime); + dm->do_await = config_get_boolean_ondemand(var_name, "average completed i/o time", + do_await); + dm->do_avagsz = config_get_boolean_ondemand(var_name, "average completed i/o bandwidth", + do_avagsz); + dm->do_svctm = config_get_boolean_ondemand(var_name, "average service time", do_svctm); + + // initialise data for differential charts + + dm->prev_dstat.bytes_read = dstat[i].bytes[DEVSTAT_READ]; + dm->prev_dstat.bytes_write = dstat[i].bytes[DEVSTAT_WRITE]; + dm->prev_dstat.bytes_free = dstat[i].bytes[DEVSTAT_FREE]; + dm->prev_dstat.operations_read = dstat[i].operations[DEVSTAT_READ]; + dm->prev_dstat.operations_write = dstat[i].operations[DEVSTAT_WRITE]; + dm->prev_dstat.operations_other = dstat[i].operations[DEVSTAT_NO_DATA]; + dm->prev_dstat.operations_free = dstat[i].operations[DEVSTAT_FREE]; + dm->prev_dstat.duration_read_ms = dstat[i].duration[DEVSTAT_READ].sec * 1000 + + dstat[i].duration[DEVSTAT_READ].frac * BINTIME_SCALE; + dm->prev_dstat.duration_write_ms = dstat[i].duration[DEVSTAT_WRITE].sec * 1000 + + dstat[i].duration[DEVSTAT_WRITE].frac * BINTIME_SCALE; + dm->prev_dstat.duration_other_ms = dstat[i].duration[DEVSTAT_NO_DATA].sec * 1000 + + dstat[i].duration[DEVSTAT_NO_DATA].frac * BINTIME_SCALE; + dm->prev_dstat.duration_free_ms = dstat[i].duration[DEVSTAT_FREE].sec * 1000 + + dstat[i].duration[DEVSTAT_FREE].frac * BINTIME_SCALE; + dm->prev_dstat.busy_time_ms = dstat[i].busy_time.sec * 1000 + + dstat[i].busy_time.frac * BINTIME_SCALE; + } + + cur_dstat.duration_read_ms = dstat[i].duration[DEVSTAT_READ].sec * 1000 + + dstat[i].duration[DEVSTAT_READ].frac * BINTIME_SCALE; + cur_dstat.duration_write_ms = dstat[i].duration[DEVSTAT_WRITE].sec * 1000 + + dstat[i].duration[DEVSTAT_WRITE].frac * BINTIME_SCALE; + cur_dstat.duration_other_ms = dstat[i].duration[DEVSTAT_NO_DATA].sec * 1000 + + dstat[i].duration[DEVSTAT_NO_DATA].frac * BINTIME_SCALE; + cur_dstat.duration_free_ms = dstat[i].duration[DEVSTAT_FREE].sec * 1000 + + dstat[i].duration[DEVSTAT_FREE].frac * BINTIME_SCALE; + + cur_dstat.busy_time_ms = dstat[i].busy_time.sec * 1000 + dstat[i].busy_time.frac * BINTIME_SCALE; + + if(dm->do_io == CONFIG_BOOLEAN_YES || (dm->do_io == CONFIG_BOOLEAN_AUTO && + (dstat[i].bytes[DEVSTAT_READ] || + dstat[i].bytes[DEVSTAT_WRITE] || + dstat[i].bytes[DEVSTAT_FREE] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_io)) { + dm->st_io = rrdset_create_localhost("disk", + disk, + NULL, + disk, + "disk.io", + "Disk I/O Bandwidth", + "KiB/s", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_IO, + update_every, + RRDSET_TYPE_AREA + ); + + dm->rd_io_in = rrddim_add(dm->st_io, "reads", NULL, 1, KILO_FACTOR, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_io_out = rrddim_add(dm->st_io, "writes", NULL, -1, KILO_FACTOR, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_io_free = rrddim_add(dm->st_io, "frees", NULL, -1, KILO_FACTOR, + RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(dm->st_io, dm->rd_io_in, dstat[i].bytes[DEVSTAT_READ]); + rrddim_set_by_pointer(dm->st_io, dm->rd_io_out, dstat[i].bytes[DEVSTAT_WRITE]); + rrddim_set_by_pointer(dm->st_io, dm->rd_io_free, dstat[i].bytes[DEVSTAT_FREE]); + rrdset_done(dm->st_io); + } + + if(dm->do_ops == CONFIG_BOOLEAN_YES || (dm->do_ops == CONFIG_BOOLEAN_AUTO && + (dstat[i].operations[DEVSTAT_READ] || + dstat[i].operations[DEVSTAT_WRITE] || + dstat[i].operations[DEVSTAT_NO_DATA] || + dstat[i].operations[DEVSTAT_FREE] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_ops)) { + dm->st_ops = rrdset_create_localhost("disk_ops", + disk, + NULL, + disk, + "disk.ops", + "Disk Completed I/O Operations", + "operations/s", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_OPS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(dm->st_ops, RRDSET_FLAG_DETAIL); + + dm->rd_ops_in = rrddim_add(dm->st_ops, "reads", NULL, 1, 1, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_ops_out = rrddim_add(dm->st_ops, "writes", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_ops_other = rrddim_add(dm->st_ops, "other", NULL, 1, 1, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_ops_free = rrddim_add(dm->st_ops, "frees", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(dm->st_ops, dm->rd_ops_in, dstat[i].operations[DEVSTAT_READ]); + rrddim_set_by_pointer(dm->st_ops, dm->rd_ops_out, dstat[i].operations[DEVSTAT_WRITE]); + rrddim_set_by_pointer(dm->st_ops, dm->rd_ops_other, dstat[i].operations[DEVSTAT_NO_DATA]); + rrddim_set_by_pointer(dm->st_ops, dm->rd_ops_free, dstat[i].operations[DEVSTAT_FREE]); + rrdset_done(dm->st_ops); + } + + if(dm->do_qops == CONFIG_BOOLEAN_YES || (dm->do_qops == CONFIG_BOOLEAN_AUTO && + (dstat[i].start_count || + dstat[i].end_count || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_qops)) { + dm->st_qops = rrdset_create_localhost("disk_qops", + disk, + NULL, + disk, + "disk.qops", + "Disk Current I/O Operations", + "operations", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_QOPS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(dm->st_qops, RRDSET_FLAG_DETAIL); + + dm->rd_qops = rrddim_add(dm->st_qops, "operations", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(dm->st_qops, dm->rd_qops, dstat[i].start_count - dstat[i].end_count); + rrdset_done(dm->st_qops); + } + + if(dm->do_util == CONFIG_BOOLEAN_YES || (dm->do_util == CONFIG_BOOLEAN_AUTO && + (cur_dstat.busy_time_ms || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_util)) { + dm->st_util = rrdset_create_localhost("disk_util", + disk, + NULL, + disk, + "disk.util", + "Disk Utilization Time", + "% of time working", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_UTIL, + update_every, + RRDSET_TYPE_AREA + ); + + rrdset_flag_set(dm->st_util, RRDSET_FLAG_DETAIL); + + dm->rd_util = rrddim_add(dm->st_util, "utilization", NULL, 1, 10, + RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(dm->st_util, dm->rd_util, cur_dstat.busy_time_ms); + rrdset_done(dm->st_util); + } + + if(dm->do_iotime == CONFIG_BOOLEAN_YES || (dm->do_iotime == CONFIG_BOOLEAN_AUTO && + (cur_dstat.duration_read_ms || + cur_dstat.duration_write_ms || + cur_dstat.duration_other_ms || + cur_dstat.duration_free_ms || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_iotime)) { + dm->st_iotime = rrdset_create_localhost("disk_iotime", + disk, + NULL, + disk, + "disk.iotime", + "Disk Total I/O Time", + "milliseconds/s", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_IOTIME, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(dm->st_iotime, RRDSET_FLAG_DETAIL); + + dm->rd_iotime_in = rrddim_add(dm->st_iotime, "reads", NULL, 1, 1, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_iotime_out = rrddim_add(dm->st_iotime, "writes", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_iotime_other = rrddim_add(dm->st_iotime, "other", NULL, 1, 1, + RRD_ALGORITHM_INCREMENTAL); + dm->rd_iotime_free = rrddim_add(dm->st_iotime, "frees", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(dm->st_iotime, dm->rd_iotime_in, cur_dstat.duration_read_ms); + rrddim_set_by_pointer(dm->st_iotime, dm->rd_iotime_out, cur_dstat.duration_write_ms); + rrddim_set_by_pointer(dm->st_iotime, dm->rd_iotime_other, cur_dstat.duration_other_ms); + rrddim_set_by_pointer(dm->st_iotime, dm->rd_iotime_free, cur_dstat.duration_free_ms); + rrdset_done(dm->st_iotime); + } + + // calculate differential charts + // only if this is not the first time we run + + if (likely(dt)) { + if(dm->do_await == CONFIG_BOOLEAN_YES || (dm->do_await == CONFIG_BOOLEAN_AUTO && + (dstat[i].operations[DEVSTAT_READ] || + dstat[i].operations[DEVSTAT_WRITE] || + dstat[i].operations[DEVSTAT_NO_DATA] || + dstat[i].operations[DEVSTAT_FREE] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_await)) { + dm->st_await = rrdset_create_localhost("disk_await", + disk, + NULL, + disk, + "disk.await", + "Average Completed I/O Operation Time", + "milliseconds/operation", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_AWAIT, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(dm->st_await, RRDSET_FLAG_DETAIL); + + dm->rd_await_in = rrddim_add(dm->st_await, "reads", NULL, 1, 1, + RRD_ALGORITHM_ABSOLUTE); + dm->rd_await_out = rrddim_add(dm->st_await, "writes", NULL, -1, 1, + RRD_ALGORITHM_ABSOLUTE); + dm->rd_await_other = rrddim_add(dm->st_await, "other", NULL, 1, 1, + RRD_ALGORITHM_ABSOLUTE); + dm->rd_await_free = rrddim_add(dm->st_await, "frees", NULL, -1, 1, + RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(dm->st_await, dm->rd_await_in, + (dstat[i].operations[DEVSTAT_READ] - + dm->prev_dstat.operations_read) ? + (cur_dstat.duration_read_ms - dm->prev_dstat.duration_read_ms) / + (dstat[i].operations[DEVSTAT_READ] - + dm->prev_dstat.operations_read) : + 0); + rrddim_set_by_pointer(dm->st_await, dm->rd_await_out, + (dstat[i].operations[DEVSTAT_WRITE] - + dm->prev_dstat.operations_write) ? + (cur_dstat.duration_write_ms - dm->prev_dstat.duration_write_ms) / + (dstat[i].operations[DEVSTAT_WRITE] - + dm->prev_dstat.operations_write) : + 0); + rrddim_set_by_pointer(dm->st_await, dm->rd_await_other, + (dstat[i].operations[DEVSTAT_NO_DATA] - + dm->prev_dstat.operations_other) ? + (cur_dstat.duration_other_ms - dm->prev_dstat.duration_other_ms) / + (dstat[i].operations[DEVSTAT_NO_DATA] - + dm->prev_dstat.operations_other) : + 0); + rrddim_set_by_pointer(dm->st_await, dm->rd_await_free, + (dstat[i].operations[DEVSTAT_FREE] - + dm->prev_dstat.operations_free) ? + (cur_dstat.duration_free_ms - dm->prev_dstat.duration_free_ms) / + (dstat[i].operations[DEVSTAT_FREE] - + dm->prev_dstat.operations_free) : + 0); + rrdset_done(dm->st_await); + } + + if(dm->do_avagsz == CONFIG_BOOLEAN_YES || (dm->do_avagsz == CONFIG_BOOLEAN_AUTO && + (dstat[i].operations[DEVSTAT_READ] || + dstat[i].operations[DEVSTAT_WRITE] || + dstat[i].operations[DEVSTAT_FREE] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_avagsz)) { + dm->st_avagsz = rrdset_create_localhost("disk_avgsz", + disk, + NULL, + disk, + "disk.avgsz", + "Average Completed I/O Operation Bandwidth", + "KiB/operation", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_AVGSZ, + update_every, + RRDSET_TYPE_AREA + ); + + rrdset_flag_set(dm->st_avagsz, RRDSET_FLAG_DETAIL); + + dm->rd_avagsz_in = rrddim_add(dm->st_avagsz, "reads", NULL, 1, KILO_FACTOR, + RRD_ALGORITHM_ABSOLUTE); + dm->rd_avagsz_out = rrddim_add(dm->st_avagsz, "writes", NULL, -1, KILO_FACTOR, + RRD_ALGORITHM_ABSOLUTE); + dm->rd_avagsz_free = rrddim_add(dm->st_avagsz, "frees", NULL, -1, KILO_FACTOR, + RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(dm->st_avagsz, dm->rd_avagsz_in, + (dstat[i].operations[DEVSTAT_READ] - + dm->prev_dstat.operations_read) ? + (dstat[i].bytes[DEVSTAT_READ] - dm->prev_dstat.bytes_read) / + (dstat[i].operations[DEVSTAT_READ] - + dm->prev_dstat.operations_read) : + 0); + rrddim_set_by_pointer(dm->st_avagsz, dm->rd_avagsz_out, + (dstat[i].operations[DEVSTAT_WRITE] - + dm->prev_dstat.operations_write) ? + (dstat[i].bytes[DEVSTAT_WRITE] - dm->prev_dstat.bytes_write) / + (dstat[i].operations[DEVSTAT_WRITE] - + dm->prev_dstat.operations_write) : + 0); + rrddim_set_by_pointer(dm->st_avagsz, dm->rd_avagsz_free, + (dstat[i].operations[DEVSTAT_FREE] - + dm->prev_dstat.operations_free) ? + (dstat[i].bytes[DEVSTAT_FREE] - dm->prev_dstat.bytes_free) / + (dstat[i].operations[DEVSTAT_FREE] - + dm->prev_dstat.operations_free) : + 0); + rrdset_done(dm->st_avagsz); + } + + if(dm->do_svctm == CONFIG_BOOLEAN_YES || (dm->do_svctm == CONFIG_BOOLEAN_AUTO && + (dstat[i].operations[DEVSTAT_READ] || + dstat[i].operations[DEVSTAT_WRITE] || + dstat[i].operations[DEVSTAT_NO_DATA] || + dstat[i].operations[DEVSTAT_FREE] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!dm->st_svctm)) { + dm->st_svctm = rrdset_create_localhost("disk_svctm", + disk, + NULL, + disk, + "disk.svctm", + "Average Service Time", + "milliseconds/operation", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_DISK_SVCTM, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(dm->st_svctm, RRDSET_FLAG_DETAIL); + + dm->rd_svctm = rrddim_add(dm->st_svctm, "svctm", NULL, 1, 1, + RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(dm->st_svctm, dm->rd_svctm, + ((dstat[i].operations[DEVSTAT_READ] - dm->prev_dstat.operations_read) + + (dstat[i].operations[DEVSTAT_WRITE] - dm->prev_dstat.operations_write) + + (dstat[i].operations[DEVSTAT_NO_DATA] - dm->prev_dstat.operations_other) + + (dstat[i].operations[DEVSTAT_FREE] - dm->prev_dstat.operations_free)) ? + (cur_dstat.busy_time_ms - dm->prev_dstat.busy_time_ms) / + ((dstat[i].operations[DEVSTAT_READ] - dm->prev_dstat.operations_read) + + (dstat[i].operations[DEVSTAT_WRITE] - dm->prev_dstat.operations_write) + + (dstat[i].operations[DEVSTAT_NO_DATA] - dm->prev_dstat.operations_other) + + (dstat[i].operations[DEVSTAT_FREE] - dm->prev_dstat.operations_free)) : + 0); + rrdset_done(dm->st_svctm); + } + + dm->prev_dstat.bytes_read = dstat[i].bytes[DEVSTAT_READ]; + dm->prev_dstat.bytes_write = dstat[i].bytes[DEVSTAT_WRITE]; + dm->prev_dstat.bytes_free = dstat[i].bytes[DEVSTAT_FREE]; + dm->prev_dstat.operations_read = dstat[i].operations[DEVSTAT_READ]; + dm->prev_dstat.operations_write = dstat[i].operations[DEVSTAT_WRITE]; + dm->prev_dstat.operations_other = dstat[i].operations[DEVSTAT_NO_DATA]; + dm->prev_dstat.operations_free = dstat[i].operations[DEVSTAT_FREE]; + dm->prev_dstat.duration_read_ms = cur_dstat.duration_read_ms; + dm->prev_dstat.duration_write_ms = cur_dstat.duration_write_ms; + dm->prev_dstat.duration_other_ms = cur_dstat.duration_other_ms; + dm->prev_dstat.duration_free_ms = cur_dstat.duration_free_ms; + dm->prev_dstat.busy_time_ms = cur_dstat.busy_time_ms; + } + } + } + + if (likely(do_system_io)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost("system", + "io", + NULL, + "disk", + NULL, + "Disk I/O", + "KiB/s", + "freebsd.plugin", + "devstat", + NETDATA_CHART_PRIO_SYSTEM_IO, + update_every, + RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st, "in", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "out", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, total_disk_kbytes_read); + rrddim_set_by_pointer(st, rd_out, total_disk_kbytes_write); + rrdset_done(st); + } + } + } + + if (unlikely(common_error)) { + do_system_io = 0; + error("DISABLED: system.io chart"); + do_io = 0; + error("DISABLED: disk.* charts"); + do_ops = 0; + error("DISABLED: disk_ops.* charts"); + do_qops = 0; + error("DISABLED: disk_qops.* charts"); + do_util = 0; + error("DISABLED: disk_util.* charts"); + do_iotime = 0; + error("DISABLED: disk_iotime.* charts"); + do_await = 0; + error("DISABLED: disk_await.* charts"); + do_avagsz = 0; + error("DISABLED: disk_avgsz.* charts"); + do_svctm = 0; + error("DISABLED: disk_svctm.* charts"); + error("DISABLED: kern.devstat module"); + return 1; + } + } else { + error("DISABLED: kern.devstat module"); + return 1; + } + + disks_cleanup(); + + return 0; +} diff --git a/collectors/freebsd.plugin/freebsd_getifaddrs.c b/collectors/freebsd.plugin/freebsd_getifaddrs.c new file mode 100644 index 0000000..1e870c0 --- /dev/null +++ b/collectors/freebsd.plugin/freebsd_getifaddrs.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" + +#include + +struct cgroup_network_interface { + char *name; + uint32_t hash; + size_t len; + + // flags + int configured; + int enabled; + int updated; + + int do_bandwidth; + int do_packets; + int do_errors; + int do_drops; + int do_events; + + // charts and dimensions + + RRDSET *st_bandwidth; + RRDDIM *rd_bandwidth_in; + RRDDIM *rd_bandwidth_out; + + RRDSET *st_packets; + RRDDIM *rd_packets_in; + RRDDIM *rd_packets_out; + RRDDIM *rd_packets_m_in; + RRDDIM *rd_packets_m_out; + + RRDSET *st_errors; + RRDDIM *rd_errors_in; + RRDDIM *rd_errors_out; + + RRDSET *st_drops; + RRDDIM *rd_drops_in; + RRDDIM *rd_drops_out; + + RRDSET *st_events; + RRDDIM *rd_events_coll; + + struct cgroup_network_interface *next; +}; + +static struct cgroup_network_interface *network_interfaces_root = NULL, *network_interfaces_last_used = NULL; + +static size_t network_interfaces_added = 0, network_interfaces_found = 0; + +static void network_interface_free(struct cgroup_network_interface *ifm) { + if (likely(ifm->st_bandwidth)) + rrdset_is_obsolete(ifm->st_bandwidth); + if (likely(ifm->st_packets)) + rrdset_is_obsolete(ifm->st_packets); + if (likely(ifm->st_errors)) + rrdset_is_obsolete(ifm->st_errors); + if (likely(ifm->st_drops)) + rrdset_is_obsolete(ifm->st_drops); + if (likely(ifm->st_events)) + rrdset_is_obsolete(ifm->st_events); + + network_interfaces_added--; + freez(ifm->name); + freez(ifm); +} + +static void network_interfaces_cleanup() { + if (likely(network_interfaces_found == network_interfaces_added)) return; + + struct cgroup_network_interface *ifm = network_interfaces_root, *last = NULL; + while(ifm) { + if (unlikely(!ifm->updated)) { + // info("Removing network interface '%s', linked after '%s'", ifm->name, last?last->name:"ROOT"); + + if (network_interfaces_last_used == ifm) + network_interfaces_last_used = last; + + struct cgroup_network_interface *t = ifm; + + if (ifm == network_interfaces_root || !last) + network_interfaces_root = ifm = ifm->next; + + else + last->next = ifm = ifm->next; + + t->next = NULL; + network_interface_free(t); + } + else { + last = ifm; + ifm->updated = 0; + ifm = ifm->next; + } + } +} + +static struct cgroup_network_interface *get_network_interface(const char *name) { + struct cgroup_network_interface *ifm; + + uint32_t hash = simple_hash(name); + + // search it, from the last position to the end + for(ifm = network_interfaces_last_used ; ifm ; ifm = ifm->next) { + if (unlikely(hash == ifm->hash && !strcmp(name, ifm->name))) { + network_interfaces_last_used = ifm->next; + return ifm; + } + } + + // search it from the beginning to the last position we used + for(ifm = network_interfaces_root ; ifm != network_interfaces_last_used ; ifm = ifm->next) { + if (unlikely(hash == ifm->hash && !strcmp(name, ifm->name))) { + network_interfaces_last_used = ifm->next; + return ifm; + } + } + + // create a new one + ifm = callocz(1, sizeof(struct cgroup_network_interface)); + ifm->name = strdupz(name); + ifm->hash = simple_hash(ifm->name); + ifm->len = strlen(ifm->name); + network_interfaces_added++; + + // link it to the end + if (network_interfaces_root) { + struct cgroup_network_interface *e; + for(e = network_interfaces_root; e->next ; e = e->next) ; + e->next = ifm; + } + else + network_interfaces_root = ifm; + + return ifm; +} + +// -------------------------------------------------------------------------------------------------------------------- +// getifaddrs + +int do_getifaddrs(int update_every, usec_t dt) { + (void)dt; + +#define DEFAULT_EXCLUDED_INTERFACES "lo*" +#define DEFAULT_PHYSICAL_INTERFACES "igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re* igc* dwc*" +#define CONFIG_SECTION_GETIFADDRS "plugin:freebsd:getifaddrs" + + static int enable_new_interfaces = -1; + static int do_bandwidth_ipv4 = -1, do_bandwidth_ipv6 = -1, do_bandwidth = -1, do_packets = -1, do_bandwidth_net = -1, do_packets_net = -1, + do_errors = -1, do_drops = -1, do_events = -1; + static SIMPLE_PATTERN *excluded_interfaces = NULL, *physical_interfaces = NULL; + + if (unlikely(enable_new_interfaces == -1)) { + enable_new_interfaces = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, + "enable new interfaces detected at runtime", + CONFIG_BOOLEAN_AUTO); + + do_bandwidth_net = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "total bandwidth for physical interfaces", + CONFIG_BOOLEAN_AUTO); + do_packets_net = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "total packets for physical interfaces", + CONFIG_BOOLEAN_AUTO); + do_bandwidth_ipv4 = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "total bandwidth for ipv4 interfaces", + CONFIG_BOOLEAN_AUTO); + do_bandwidth_ipv6 = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "total bandwidth for ipv6 interfaces", + CONFIG_BOOLEAN_AUTO); + do_bandwidth = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "bandwidth for all interfaces", + CONFIG_BOOLEAN_AUTO); + do_packets = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "packets for all interfaces", + CONFIG_BOOLEAN_AUTO); + do_errors = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "errors for all interfaces", + CONFIG_BOOLEAN_AUTO); + do_drops = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "drops for all interfaces", + CONFIG_BOOLEAN_AUTO); + do_events = config_get_boolean_ondemand(CONFIG_SECTION_GETIFADDRS, "collisions for all interfaces", + CONFIG_BOOLEAN_AUTO); + + excluded_interfaces = simple_pattern_create( + config_get(CONFIG_SECTION_GETIFADDRS, "disable by default interfaces matching", DEFAULT_EXCLUDED_INTERFACES) + , NULL + , SIMPLE_PATTERN_EXACT + ); + physical_interfaces = simple_pattern_create( + config_get(CONFIG_SECTION_GETIFADDRS, "set physical interfaces for system.net", DEFAULT_PHYSICAL_INTERFACES) + , NULL + , SIMPLE_PATTERN_EXACT + ); + } + + if (likely(do_bandwidth_ipv4 || do_bandwidth_ipv6 || do_bandwidth || do_packets || do_errors || do_bandwidth_net || do_packets_net || + do_drops || do_events)) { + struct ifaddrs *ifap; + + if (unlikely(getifaddrs(&ifap))) { + error("FREEBSD: getifaddrs() failed"); + do_bandwidth_net = 0; + error("DISABLED: system.net chart"); + do_packets_net = 0; + error("DISABLED: system.packets chart"); + do_bandwidth_ipv4 = 0; + error("DISABLED: system.ipv4 chart"); + do_bandwidth_ipv6 = 0; + error("DISABLED: system.ipv6 chart"); + do_bandwidth = 0; + error("DISABLED: net.* charts"); + do_packets = 0; + error("DISABLED: net_packets.* charts"); + do_errors = 0; + error("DISABLED: net_errors.* charts"); + do_drops = 0; + error("DISABLED: net_drops.* charts"); + do_events = 0; + error("DISABLED: net_events.* charts"); + error("DISABLED: getifaddrs module"); + return 1; + } else { +#define IFA_DATA(s) (((struct if_data *)ifa->ifa_data)->ifi_ ## s) + struct ifaddrs *ifa; + struct iftot { + u_long ift_ibytes; + u_long ift_obytes; + u_long ift_ipackets; + u_long ift_opackets; + u_long ift_imcasts; + u_long ift_omcasts; + } iftot = {0, 0, 0, 0, 0, 0}; + + if (likely(do_bandwidth_net)) { + + iftot.ift_ibytes = iftot.ift_obytes = 0; + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + if (!simple_pattern_matches(physical_interfaces, ifa->ifa_name)) + continue; + iftot.ift_ibytes += IFA_DATA(ibytes); + iftot.ift_obytes += IFA_DATA(obytes); + } + + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost("system", + "net", + NULL, + "network", + NULL, + "Network Traffic", + "kilobits/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_SYSTEM_NET, + update_every, + RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st, "InOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, iftot.ift_ibytes); + rrddim_set_by_pointer(st, rd_out, iftot.ift_obytes); + rrdset_done(st); + } + + if (likely(do_packets_net)) { + iftot.ift_ipackets = iftot.ift_opackets = iftot.ift_imcasts = iftot.ift_omcasts = 0; + + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + if (!simple_pattern_matches(physical_interfaces, ifa->ifa_name)) + continue; + iftot.ift_ipackets += IFA_DATA(ipackets); + iftot.ift_opackets += IFA_DATA(opackets); + iftot.ift_imcasts += IFA_DATA(imcasts); + iftot.ift_omcasts += IFA_DATA(omcasts); + } + + static RRDSET *st = NULL; + static RRDDIM *rd_packets_in = NULL, *rd_packets_out = NULL, *rd_packets_m_in = NULL, *rd_packets_m_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost("system", + "packets", + NULL, + "network", + NULL, + "Network Packets", + "packets/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_SYSTEM_PACKETS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_packets_in = rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_packets_out = rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_packets_m_in = rrddim_add(st, "multicast_received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_packets_m_out = rrddim_add(st, "multicast_sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_packets_in, iftot.ift_ipackets); + rrddim_set_by_pointer(st, rd_packets_out, iftot.ift_opackets); + rrddim_set_by_pointer(st, rd_packets_m_in, iftot.ift_imcasts); + rrddim_set_by_pointer(st, rd_packets_m_out, iftot.ift_omcasts); + rrdset_done(st); + } + + if (likely(do_bandwidth_ipv4)) { + iftot.ift_ibytes = iftot.ift_obytes = 0; + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + iftot.ift_ibytes += IFA_DATA(ibytes); + iftot.ift_obytes += IFA_DATA(obytes); + } + + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost("system", + "ipv4", + NULL, + "network", + NULL, + "IPv4 Bandwidth", + "kilobits/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_SYSTEM_IPV4, + update_every, + RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st, "InOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, iftot.ift_ibytes); + rrddim_set_by_pointer(st, rd_out, iftot.ift_obytes); + rrdset_done(st); + } + + if (likely(do_bandwidth_ipv6)) { + iftot.ift_ibytes = iftot.ift_obytes = 0; + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + iftot.ift_ibytes += IFA_DATA(ibytes); + iftot.ift_obytes += IFA_DATA(obytes); + } + + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost("system", + "ipv6", + NULL, + "network", + NULL, + "IPv6 Bandwidth", + "kilobits/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_SYSTEM_IPV6, + update_every, + RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, iftot.ift_ibytes); + rrddim_set_by_pointer(st, rd_out, iftot.ift_obytes); + rrdset_done(st); + } + + network_interfaces_found = 0; + + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + + struct cgroup_network_interface *ifm = get_network_interface(ifa->ifa_name); + ifm->updated = 1; + network_interfaces_found++; + + if (unlikely(!ifm->configured)) { + char var_name[4096 + 1]; + + // this is the first time we see this network interface + + // remember we configured it + ifm->configured = 1; + + ifm->enabled = enable_new_interfaces; + + if (likely(ifm->enabled)) + ifm->enabled = !simple_pattern_matches(excluded_interfaces, ifa->ifa_name); + + snprintfz(var_name, 4096, "%s:%s", CONFIG_SECTION_GETIFADDRS, ifa->ifa_name); + ifm->enabled = config_get_boolean_ondemand(var_name, "enabled", ifm->enabled); + + if (unlikely(ifm->enabled == CONFIG_BOOLEAN_NO)) + continue; + + ifm->do_bandwidth = config_get_boolean_ondemand(var_name, "bandwidth", do_bandwidth); + ifm->do_packets = config_get_boolean_ondemand(var_name, "packets", do_packets); + ifm->do_errors = config_get_boolean_ondemand(var_name, "errors", do_errors); + ifm->do_drops = config_get_boolean_ondemand(var_name, "drops", do_drops); + ifm->do_events = config_get_boolean_ondemand(var_name, "events", do_events); + } + + if (unlikely(!ifm->enabled)) + continue; + + if (ifm->do_bandwidth == CONFIG_BOOLEAN_YES || (ifm->do_bandwidth == CONFIG_BOOLEAN_AUTO && + (IFA_DATA(ibytes) || + IFA_DATA(obytes) || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!ifm->st_bandwidth)) { + ifm->st_bandwidth = rrdset_create_localhost("net", + ifa->ifa_name, + NULL, + ifa->ifa_name, + "net.net", + "Bandwidth", + "kilobits/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_FIRST_NET_IFACE, + update_every, + RRDSET_TYPE_AREA + ); + + ifm->rd_bandwidth_in = rrddim_add(ifm->st_bandwidth, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + ifm->rd_bandwidth_out = rrddim_add(ifm->st_bandwidth, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(ifm->st_bandwidth, ifm->rd_bandwidth_in, IFA_DATA(ibytes)); + rrddim_set_by_pointer(ifm->st_bandwidth, ifm->rd_bandwidth_out, IFA_DATA(obytes)); + rrdset_done(ifm->st_bandwidth); + } + + if (ifm->do_packets == CONFIG_BOOLEAN_YES || (ifm->do_packets == CONFIG_BOOLEAN_AUTO && + (IFA_DATA(ipackets) || + IFA_DATA(opackets) || + IFA_DATA(imcasts) || + IFA_DATA(omcasts) || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!ifm->st_packets)) { + ifm->st_packets = rrdset_create_localhost("net_packets", + ifa->ifa_name, + NULL, + ifa->ifa_name, + "net.packets", + "Packets", + "packets/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_FIRST_NET_PACKETS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(ifm->st_packets, RRDSET_FLAG_DETAIL); + + ifm->rd_packets_in = rrddim_add(ifm->st_packets, "received", NULL, 1, 1, + RRD_ALGORITHM_INCREMENTAL); + ifm->rd_packets_out = rrddim_add(ifm->st_packets, "sent", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + ifm->rd_packets_m_in = rrddim_add(ifm->st_packets, "multicast_received", NULL, 1, 1, + RRD_ALGORITHM_INCREMENTAL); + ifm->rd_packets_m_out = rrddim_add(ifm->st_packets, "multicast_sent", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(ifm->st_packets, ifm->rd_packets_in, IFA_DATA(ipackets)); + rrddim_set_by_pointer(ifm->st_packets, ifm->rd_packets_out, IFA_DATA(opackets)); + rrddim_set_by_pointer(ifm->st_packets, ifm->rd_packets_m_in, IFA_DATA(imcasts)); + rrddim_set_by_pointer(ifm->st_packets, ifm->rd_packets_m_out, IFA_DATA(omcasts)); + rrdset_done(ifm->st_packets); + } + + if (ifm->do_errors == CONFIG_BOOLEAN_YES || (ifm->do_errors == CONFIG_BOOLEAN_AUTO && + (IFA_DATA(ierrors) || + IFA_DATA(oerrors) || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!ifm->st_errors)) { + ifm->st_errors = rrdset_create_localhost("net_errors", + ifa->ifa_name, + NULL, + ifa->ifa_name, + "net.errors", + "Interface Errors", + "errors/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_FIRST_NET_ERRORS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(ifm->st_errors, RRDSET_FLAG_DETAIL); + + ifm->rd_errors_in = rrddim_add(ifm->st_errors, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ifm->rd_errors_out = rrddim_add(ifm->st_errors, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(ifm->st_errors, ifm->rd_errors_in, IFA_DATA(ierrors)); + rrddim_set_by_pointer(ifm->st_errors, ifm->rd_errors_out, IFA_DATA(oerrors)); + rrdset_done(ifm->st_errors); + } + + if (ifm->do_drops == CONFIG_BOOLEAN_YES || (ifm->do_drops == CONFIG_BOOLEAN_AUTO && + (IFA_DATA(iqdrops) || + #if __FreeBSD__ >= 11 + IFA_DATA(oqdrops) || + #endif + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!ifm->st_drops)) { + ifm->st_drops = rrdset_create_localhost("net_drops", + ifa->ifa_name, + NULL, + ifa->ifa_name, + "net.drops", + "Interface Drops", + "drops/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_FIRST_NET_DROPS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(ifm->st_drops, RRDSET_FLAG_DETAIL); + + ifm->rd_drops_in = rrddim_add(ifm->st_drops, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#if __FreeBSD__ >= 11 + ifm->rd_drops_out = rrddim_add(ifm->st_drops, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif + } + + rrddim_set_by_pointer(ifm->st_drops, ifm->rd_drops_in, IFA_DATA(iqdrops)); +#if __FreeBSD__ >= 11 + rrddim_set_by_pointer(ifm->st_drops, ifm->rd_drops_out, IFA_DATA(oqdrops)); +#endif + rrdset_done(ifm->st_drops); + } + + if (ifm->do_events == CONFIG_BOOLEAN_YES || (ifm->do_events == CONFIG_BOOLEAN_AUTO && + (IFA_DATA(collisions) || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!ifm->st_events)) { + ifm->st_events = rrdset_create_localhost("net_events", + ifa->ifa_name, + NULL, + ifa->ifa_name, + "net.events", + "Network Interface Events", + "events/s", + "freebsd.plugin", + "getifaddrs", + NETDATA_CHART_PRIO_FIRST_NET_EVENTS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(ifm->st_events, RRDSET_FLAG_DETAIL); + + ifm->rd_events_coll = rrddim_add(ifm->st_events, "collisions", NULL, -1, 1, + RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(ifm->st_events, ifm->rd_events_coll, IFA_DATA(collisions)); + rrdset_done(ifm->st_events); + } + } + + freeifaddrs(ifap); + } + } else { + error("DISABLED: getifaddrs module"); + return 1; + } + + network_interfaces_cleanup(); + + return 0; +} diff --git a/collectors/freebsd.plugin/freebsd_getmntinfo.c b/collectors/freebsd.plugin/freebsd_getmntinfo.c new file mode 100644 index 0000000..e8feefc --- /dev/null +++ b/collectors/freebsd.plugin/freebsd_getmntinfo.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" + +#include + +struct mount_point { + char *name; + uint32_t hash; + size_t len; + + // flags + int configured; + int enabled; + int updated; + + int do_space; + int do_inodes; + + size_t collected; // the number of times this has been collected + + // charts and dimensions + + RRDSET *st_space; + RRDDIM *rd_space_used; + RRDDIM *rd_space_avail; + RRDDIM *rd_space_reserved; + + RRDSET *st_inodes; + RRDDIM *rd_inodes_used; + RRDDIM *rd_inodes_avail; + + struct mount_point *next; +}; + +static struct mount_point *mount_points_root = NULL, *mount_points_last_used = NULL; + +static size_t mount_points_added = 0, mount_points_found = 0; + +static void mount_point_free(struct mount_point *m) { + if (likely(m->st_space)) + rrdset_is_obsolete(m->st_space); + if (likely(m->st_inodes)) + rrdset_is_obsolete(m->st_inodes); + + mount_points_added--; + freez(m->name); + freez(m); +} + +static void mount_points_cleanup() { + if (likely(mount_points_found == mount_points_added)) return; + + struct mount_point *m = mount_points_root, *last = NULL; + while(m) { + if (unlikely(!m->updated)) { + // info("Removing mount point '%s', linked after '%s'", m->name, last?last->name:"ROOT"); + + if (mount_points_last_used == m) + mount_points_last_used = last; + + struct mount_point *t = m; + + if (m == mount_points_root || !last) + mount_points_root = m = m->next; + + else + last->next = m = m->next; + + t->next = NULL; + mount_point_free(t); + } + else { + last = m; + m->updated = 0; + m = m->next; + } + } +} + +static struct mount_point *get_mount_point(const char *name) { + struct mount_point *m; + + uint32_t hash = simple_hash(name); + + // search it, from the last position to the end + for(m = mount_points_last_used ; m ; m = m->next) { + if (unlikely(hash == m->hash && !strcmp(name, m->name))) { + mount_points_last_used = m->next; + return m; + } + } + + // search it from the beginning to the last position we used + for(m = mount_points_root ; m != mount_points_last_used ; m = m->next) { + if (unlikely(hash == m->hash && !strcmp(name, m->name))) { + mount_points_last_used = m->next; + return m; + } + } + + // create a new one + m = callocz(1, sizeof(struct mount_point)); + m->name = strdupz(name); + m->hash = simple_hash(m->name); + m->len = strlen(m->name); + mount_points_added++; + + // link it to the end + if (mount_points_root) { + struct mount_point *e; + for(e = mount_points_root; e->next ; e = e->next) ; + e->next = m; + } + else + mount_points_root = m; + + return m; +} + +// -------------------------------------------------------------------------------------------------------------------- +// getmntinfo + +int do_getmntinfo(int update_every, usec_t dt) { + (void)dt; + +#define DEFAULT_EXCLUDED_PATHS "/proc/*" +// taken from gnulib/mountlist.c and shortened to FreeBSD related fstypes +#define DEFAULT_EXCLUDED_FILESYSTEMS "autofs procfs subfs devfs none" +#define CONFIG_SECTION_GETMNTINFO "plugin:freebsd:getmntinfo" + + static int enable_new_mount_points = -1; + static int do_space = -1, do_inodes = -1; + static SIMPLE_PATTERN *excluded_mountpoints = NULL; + static SIMPLE_PATTERN *excluded_filesystems = NULL; + + if (unlikely(enable_new_mount_points == -1)) { + enable_new_mount_points = config_get_boolean_ondemand(CONFIG_SECTION_GETMNTINFO, + "enable new mount points detected at runtime", + CONFIG_BOOLEAN_AUTO); + + do_space = config_get_boolean_ondemand(CONFIG_SECTION_GETMNTINFO, "space usage for all disks", CONFIG_BOOLEAN_AUTO); + do_inodes = config_get_boolean_ondemand(CONFIG_SECTION_GETMNTINFO, "inodes usage for all disks", CONFIG_BOOLEAN_AUTO); + + excluded_mountpoints = simple_pattern_create( + config_get(CONFIG_SECTION_GETMNTINFO, "exclude space metrics on paths", + DEFAULT_EXCLUDED_PATHS) + , NULL + , SIMPLE_PATTERN_EXACT + ); + + excluded_filesystems = simple_pattern_create( + config_get(CONFIG_SECTION_GETMNTINFO, "exclude space metrics on filesystems", + DEFAULT_EXCLUDED_FILESYSTEMS) + , NULL + , SIMPLE_PATTERN_EXACT + ); + } + + if (likely(do_space || do_inodes)) { + struct statfs *mntbuf; + int mntsize; + + // there is no mount info in sysctl MIBs + if (unlikely(!(mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)))) { + error("FREEBSD: getmntinfo() failed"); + do_space = 0; + error("DISABLED: disk_space.* charts"); + do_inodes = 0; + error("DISABLED: disk_inodes.* charts"); + error("DISABLED: getmntinfo module"); + return 1; + } else { + int i; + + mount_points_found = 0; + + for (i = 0; i < mntsize; i++) { + char title[4096 + 1]; + + struct mount_point *m = get_mount_point(mntbuf[i].f_mntonname); + m->updated = 1; + mount_points_found++; + + if (unlikely(!m->configured)) { + char var_name[4096 + 1]; + + // this is the first time we see this filesystem + + // remember we configured it + m->configured = 1; + + m->enabled = enable_new_mount_points; + + if (likely(m->enabled)) + m->enabled = !(simple_pattern_matches(excluded_mountpoints, mntbuf[i].f_mntonname) + || simple_pattern_matches(excluded_filesystems, mntbuf[i].f_fstypename)); + + snprintfz(var_name, 4096, "%s:%s", CONFIG_SECTION_GETMNTINFO, mntbuf[i].f_mntonname); + m->enabled = config_get_boolean_ondemand(var_name, "enabled", m->enabled); + + if (unlikely(m->enabled == CONFIG_BOOLEAN_NO)) + continue; + + m->do_space = config_get_boolean_ondemand(var_name, "space usage", do_space); + m->do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", do_inodes); + } + + if (unlikely(!m->enabled)) + continue; + + if (unlikely(mntbuf[i].f_flags & MNT_RDONLY && !m->collected)) + continue; + + int rendered = 0; + + if (m->do_space == CONFIG_BOOLEAN_YES || (m->do_space == CONFIG_BOOLEAN_AUTO && + (mntbuf[i].f_blocks > 2 || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!m->st_space)) { + snprintfz(title, 4096, "Disk Space Usage for %s [%s]", + mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); + m->st_space = rrdset_create_localhost("disk_space", + mntbuf[i].f_mntonname, + NULL, + mntbuf[i].f_mntonname, + "disk.space", + title, + "GiB", + "freebsd.plugin", + "getmntinfo", + NETDATA_CHART_PRIO_DISKSPACE_SPACE, + update_every, + RRDSET_TYPE_STACKED + ); + + m->rd_space_avail = rrddim_add(m->st_space, "avail", NULL, + mntbuf[i].f_bsize, GIGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + m->rd_space_used = rrddim_add(m->st_space, "used", NULL, + mntbuf[i].f_bsize, GIGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + m->rd_space_reserved = rrddim_add(m->st_space, "reserved_for_root", "reserved for root", + mntbuf[i].f_bsize, GIGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(m->st_space, m->rd_space_avail, (collected_number) mntbuf[i].f_bavail); + rrddim_set_by_pointer(m->st_space, m->rd_space_used, (collected_number) (mntbuf[i].f_blocks - + mntbuf[i].f_bfree)); + rrddim_set_by_pointer(m->st_space, m->rd_space_reserved, (collected_number) (mntbuf[i].f_bfree - + mntbuf[i].f_bavail)); + rrdset_done(m->st_space); + + rendered++; + } + + if (m->do_inodes == CONFIG_BOOLEAN_YES || (m->do_inodes == CONFIG_BOOLEAN_AUTO && + (mntbuf[i].f_files > 1 || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if (unlikely(!m->st_inodes)) { + snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]", + mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); + m->st_inodes = rrdset_create_localhost("disk_inodes", + mntbuf[i].f_mntonname, + NULL, + mntbuf[i].f_mntonname, + "disk.inodes", + title, + "inodes", + "freebsd.plugin", + "getmntinfo", + NETDATA_CHART_PRIO_DISKSPACE_INODES, + update_every, + RRDSET_TYPE_STACKED + ); + + m->rd_inodes_avail = rrddim_add(m->st_inodes, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + m->rd_inodes_used = rrddim_add(m->st_inodes, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_avail, (collected_number) mntbuf[i].f_ffree); + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_used, (collected_number) (mntbuf[i].f_files - + mntbuf[i].f_ffree)); + rrdset_done(m->st_inodes); + + rendered++; + } + + if (likely(rendered)) + m->collected++; + } + } + } else { + error("DISABLED: getmntinfo module"); + return 1; + } + + mount_points_cleanup(); + + return 0; +} diff --git a/collectors/freebsd.plugin/freebsd_ipfw.c b/collectors/freebsd.plugin/freebsd_ipfw.c new file mode 100644 index 0000000..178eaa3 --- /dev/null +++ b/collectors/freebsd.plugin/freebsd_ipfw.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" + +#include + +#define FREE_MEM_THRESHOLD 10000 // number of unused chunks that trigger memory freeing + +#define COMMON_IPFW_ERROR() error("DISABLED: ipfw.packets chart"); \ + error("DISABLED: ipfw.bytes chart"); \ + error("DISABLED: ipfw.dyn_active chart"); \ + error("DISABLED: ipfw.dyn_expired chart"); \ + error("DISABLED: ipfw.mem chart"); + +// -------------------------------------------------------------------------------------------------------------------- +// ipfw + +int do_ipfw(int update_every, usec_t dt) { + (void)dt; +#if __FreeBSD__ >= 11 + + static int do_static = -1, do_dynamic = -1, do_mem = -1; + + if (unlikely(do_static == -1)) { + do_static = config_get_boolean("plugin:freebsd:ipfw", "counters for static rules", 1); + do_dynamic = config_get_boolean("plugin:freebsd:ipfw", "number of dynamic rules", 1); + do_mem = config_get_boolean("plugin:freebsd:ipfw", "allocated memory", 1); + } + + // variables for getting ipfw configuration + + int error; + static int ipfw_socket = -1; + static ipfw_cfg_lheader *cfg = NULL; + ip_fw3_opheader *op3 = NULL; + static socklen_t *optlen = NULL, cfg_size = 0; + + // variables for static rules handling + + ipfw_obj_ctlv *ctlv = NULL; + ipfw_obj_tlv *rbase = NULL; + int rcnt = 0; + + int n, seen; + struct ip_fw_rule *rule; + struct ip_fw_bcounter *cntr; + int c = 0; + + char rule_num_str[12]; + + // variables for dynamic rules handling + + caddr_t dynbase = NULL; + size_t dynsz = 0; + size_t readsz = sizeof(*cfg);; + int ttype = 0; + ipfw_obj_tlv *tlv; + ipfw_dyn_rule *dyn_rule; + uint16_t rulenum, prev_rulenum = IPFW_DEFAULT_RULE; + unsigned srn, static_rules_num = 0; + static size_t dyn_rules_num_size = 0; + + static struct dyn_rule_num { + uint16_t rule_num; + uint32_t active_rules; + uint32_t expired_rules; + } *dyn_rules_num = NULL; + + uint32_t *dyn_rules_counter; + + if (likely(do_static | do_dynamic | do_mem)) { + + // initialize the smallest ipfw_cfg_lheader possible + + if (unlikely((optlen == NULL) || (cfg == NULL))) { + optlen = reallocz(optlen, sizeof(socklen_t)); + *optlen = cfg_size = 32; + cfg = reallocz(cfg, *optlen); + } + + // get socket descriptor and initialize ipfw_cfg_lheader structure + + if (unlikely(ipfw_socket == -1)) + ipfw_socket = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (unlikely(ipfw_socket == -1)) { + error("FREEBSD: can't get socket for ipfw configuration"); + error("FREEBSD: run netdata as root to get access to ipfw data"); + COMMON_IPFW_ERROR(); + return 1; + } + + bzero(cfg, 32); + cfg->flags = IPFW_CFG_GET_STATIC | IPFW_CFG_GET_COUNTERS | IPFW_CFG_GET_STATES; + op3 = &cfg->opheader; + op3->opcode = IP_FW_XGET; + + // get ifpw configuration size than get configuration + + *optlen = cfg_size; + error = getsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, optlen); + if (error) + if (errno != ENOMEM) { + error("FREEBSD: ipfw socket reading error"); + COMMON_IPFW_ERROR(); + return 1; + } + if ((cfg->size > cfg_size) || ((cfg_size - cfg->size) > sizeof(struct dyn_rule_num) * FREE_MEM_THRESHOLD)) { + *optlen = cfg_size = cfg->size; + cfg = reallocz(cfg, *optlen); + bzero(cfg, 32); + cfg->flags = IPFW_CFG_GET_STATIC | IPFW_CFG_GET_COUNTERS | IPFW_CFG_GET_STATES; + op3 = &cfg->opheader; + op3->opcode = IP_FW_XGET; + error = getsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, optlen); + if (error) { + error("FREEBSD: ipfw socket reading error"); + COMMON_IPFW_ERROR(); + return 1; + } + } + + // go through static rules configuration structures + + ctlv = (ipfw_obj_ctlv *) (cfg + 1); + + if (cfg->flags & IPFW_CFG_GET_STATIC) { + /* We've requested static rules */ + if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { + readsz += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *) ((caddr_t) ctlv + + ctlv->head.length); + } + + if (ctlv->head.type == IPFW_TLV_RULE_LIST) { + rbase = (ipfw_obj_tlv *) (ctlv + 1); + rcnt = ctlv->count; + readsz += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *) ((caddr_t) ctlv + ctlv->head.length); + } + } + + if ((cfg->flags & IPFW_CFG_GET_STATES) && (readsz != *optlen)) { + /* We may have some dynamic states */ + dynsz = *optlen - readsz; + /* Skip empty header */ + if (dynsz != sizeof(ipfw_obj_ctlv)) + dynbase = (caddr_t) ctlv; + else + dynsz = 0; + } + + if (likely(do_mem)) { + static RRDSET *st_mem = NULL; + static RRDDIM *rd_dyn_mem = NULL; + static RRDDIM *rd_stat_mem = NULL; + + if (unlikely(!st_mem)) { + st_mem = rrdset_create_localhost("ipfw", + "mem", + NULL, + "memory allocated", + NULL, + "Memory allocated by rules", + "bytes", + "freebsd.plugin", + "ipfw", + NETDATA_CHART_PRIO_IPFW_MEM, + update_every, + RRDSET_TYPE_STACKED + ); + rrdset_flag_set(st_mem, RRDSET_FLAG_DETAIL); + + rd_dyn_mem = rrddim_add(st_mem, "dynamic", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_stat_mem = rrddim_add(st_mem, "static", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem, rd_dyn_mem, dynsz); + rrddim_set_by_pointer(st_mem, rd_stat_mem, *optlen - dynsz); + rrdset_done(st_mem); + } + + static RRDSET *st_packets = NULL, *st_bytes = NULL; + RRDDIM *rd_packets = NULL, *rd_bytes = NULL; + + if (likely(do_static || do_dynamic)) { + if (likely(do_static)) { + if (unlikely(!st_packets)) { + st_packets = rrdset_create_localhost("ipfw", + "packets", + NULL, + "static rules", + NULL, + "Packets", + "packets/s", + "freebsd.plugin", + "ipfw", + NETDATA_CHART_PRIO_IPFW_PACKETS, + update_every, + RRDSET_TYPE_STACKED + ); + } + + if (unlikely(!st_bytes)) { + st_bytes = rrdset_create_localhost("ipfw", + "bytes", + NULL, + "static rules", + NULL, + "Bytes", + "bytes/s", + "freebsd.plugin", + "ipfw", + NETDATA_CHART_PRIO_IPFW_BYTES, + update_every, + RRDSET_TYPE_STACKED + ); + } + } + + for (n = seen = 0; n < rcnt; n++, rbase = (ipfw_obj_tlv *) ((caddr_t) rbase + rbase->length)) { + cntr = (struct ip_fw_bcounter *) (rbase + 1); + rule = (struct ip_fw_rule *) ((caddr_t) cntr + cntr->size); + if (rule->rulenum != prev_rulenum) + static_rules_num++; + if (rule->rulenum > IPFW_DEFAULT_RULE) + break; + + if (likely(do_static)) { + sprintf(rule_num_str, "%"PRIu32"_%"PRIu32"", (uint32_t)rule->rulenum, (uint32_t)rule->id); + + rd_packets = rrddim_find_active(st_packets, rule_num_str); + if (unlikely(!rd_packets)) + rd_packets = rrddim_add(st_packets, rule_num_str, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_set_by_pointer(st_packets, rd_packets, cntr->pcnt); + + rd_bytes = rrddim_find_active(st_bytes, rule_num_str); + if (unlikely(!rd_bytes)) + rd_bytes = rrddim_add(st_bytes, rule_num_str, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_set_by_pointer(st_bytes, rd_bytes, cntr->bcnt); + } + + c += rbase->length; + seen++; + } + + if (likely(do_static)) { + rrdset_done(st_packets); + rrdset_done(st_bytes); + } + } + + // go through dynamic rules configuration structures + + if (likely(do_dynamic && (dynsz > 0))) { + if ((dyn_rules_num_size < sizeof(struct dyn_rule_num) * static_rules_num) || + ((dyn_rules_num_size - sizeof(struct dyn_rule_num) * static_rules_num) > + sizeof(struct dyn_rule_num) * FREE_MEM_THRESHOLD)) { + dyn_rules_num_size = sizeof(struct dyn_rule_num) * static_rules_num; + dyn_rules_num = reallocz(dyn_rules_num, dyn_rules_num_size); + } + bzero(dyn_rules_num, sizeof(struct dyn_rule_num) * static_rules_num); + dyn_rules_num->rule_num = IPFW_DEFAULT_RULE; + + if (dynsz > 0 && ctlv->head.type == IPFW_TLV_DYNSTATE_LIST) { + dynbase += sizeof(*ctlv); + dynsz -= sizeof(*ctlv); + ttype = IPFW_TLV_DYN_ENT; + } + + while (dynsz > 0) { + tlv = (ipfw_obj_tlv *) dynbase; + if (tlv->type != ttype) + break; + + dyn_rule = (ipfw_dyn_rule *) (tlv + 1); + bcopy(&dyn_rule->rule, &rulenum, sizeof(rulenum)); + + for (srn = 0; srn < (static_rules_num - 1); srn++) { + if (dyn_rule->expire > 0) + dyn_rules_counter = &dyn_rules_num[srn].active_rules; + else + dyn_rules_counter = &dyn_rules_num[srn].expired_rules; + if (dyn_rules_num[srn].rule_num == rulenum) { + (*dyn_rules_counter)++; + break; + } + if (dyn_rules_num[srn].rule_num == IPFW_DEFAULT_RULE) { + dyn_rules_num[srn].rule_num = rulenum; + dyn_rules_num[srn + 1].rule_num = IPFW_DEFAULT_RULE; + (*dyn_rules_counter)++; + break; + } + } + + dynsz -= tlv->length; + dynbase += tlv->length; + } + + static RRDSET *st_active = NULL, *st_expired = NULL; + RRDDIM *rd_active = NULL, *rd_expired = NULL; + + if (unlikely(!st_active)) { + st_active = rrdset_create_localhost("ipfw", + "active", + NULL, + "dynamic_rules", + NULL, + "Active rules", + "rules", + "freebsd.plugin", + "ipfw", + NETDATA_CHART_PRIO_IPFW_ACTIVE, + update_every, + RRDSET_TYPE_STACKED + ); + } + + if (unlikely(!st_expired)) { + st_expired = rrdset_create_localhost("ipfw", + "expired", + NULL, + "dynamic_rules", + NULL, + "Expired rules", + "rules", + "freebsd.plugin", + "ipfw", + NETDATA_CHART_PRIO_IPFW_EXPIRED, + update_every, + RRDSET_TYPE_STACKED + ); + } + + for (srn = 0; (srn < (static_rules_num - 1)) && (dyn_rules_num[srn].rule_num != IPFW_DEFAULT_RULE); srn++) { + sprintf(rule_num_str, "%d", dyn_rules_num[srn].rule_num); + + rd_active = rrddim_find_active(st_active, rule_num_str); + if (unlikely(!rd_active)) + rd_active = rrddim_add(st_active, rule_num_str, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_set_by_pointer(st_active, rd_active, dyn_rules_num[srn].active_rules); + + rd_expired = rrddim_find_active(st_expired, rule_num_str); + if (unlikely(!rd_expired)) + rd_expired = rrddim_add(st_expired, rule_num_str, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_set_by_pointer(st_expired, rd_expired, dyn_rules_num[srn].expired_rules); + } + + rrdset_done(st_active); + rrdset_done(st_expired); + } + } + + return 0; +#else + error("FREEBSD: ipfw charts supported for FreeBSD 11.0 and newer releases only"); + COMMON_IPFW_ERROR(); + return 1; +#endif +} diff --git a/collectors/freebsd.plugin/freebsd_kstat_zfs.c b/collectors/freebsd.plugin/freebsd_kstat_zfs.c new file mode 100644 index 0000000..046a1e6 --- /dev/null +++ b/collectors/freebsd.plugin/freebsd_kstat_zfs.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" +#include "collectors/proc.plugin/zfs_common.h" + +extern struct arcstats arcstats; + +unsigned long long zfs_arcstats_shrinkable_cache_size_bytes = 0; + +// kstat.zfs.misc.arcstats + +int do_kstat_zfs_misc_arcstats(int update_every, usec_t dt) { + (void)dt; + + static int show_zero_charts = -1; + if(unlikely(show_zero_charts == -1)) + show_zero_charts = config_get_boolean_ondemand("plugin:freebsd:zfs_arcstats", "show zero charts", CONFIG_BOOLEAN_NO); + + unsigned long long l2_size; + size_t uint64_t_size = sizeof(uint64_t); + static struct mibs { + int hits[5]; + int misses[5]; + int demand_data_hits[5]; + int demand_data_misses[5]; + int demand_metadata_hits[5]; + int demand_metadata_misses[5]; + int prefetch_data_hits[5]; + int prefetch_data_misses[5]; + int prefetch_metadata_hits[5]; + int prefetch_metadata_misses[5]; + int mru_hits[5]; + int mru_ghost_hits[5]; + int mfu_hits[5]; + int mfu_ghost_hits[5]; + int deleted[5]; + int mutex_miss[5]; + int evict_skip[5]; + // int evict_not_enough[5]; + // int evict_l2_cached[5]; + // int evict_l2_eligible[5]; + // int evict_l2_ineligible[5]; + // int evict_l2_skip[5]; + int hash_elements[5]; + int hash_elements_max[5]; + int hash_collisions[5]; + int hash_chains[5]; + int hash_chain_max[5]; + int p[5]; + int c[5]; + int c_min[5]; + int c_max[5]; + int size[5]; + // int hdr_size[5]; + // int data_size[5]; + // int metadata_size[5]; + // int other_size[5]; + // int anon_size[5]; + // int anon_evictable_data[5]; + // int anon_evictable_metadata[5]; + int mru_size[5]; + // int mru_evictable_data[5]; + // int mru_evictable_metadata[5]; + // int mru_ghost_size[5]; + // int mru_ghost_evictable_data[5]; + // int mru_ghost_evictable_metadata[5]; + int mfu_size[5]; + // int mfu_evictable_data[5]; + // int mfu_evictable_metadata[5]; + // int mfu_ghost_size[5]; + // int mfu_ghost_evictable_data[5]; + // int mfu_ghost_evictable_metadata[5]; + int l2_hits[5]; + int l2_misses[5]; + // int l2_feeds[5]; + // int l2_rw_clash[5]; + int l2_read_bytes[5]; + int l2_write_bytes[5]; + // int l2_writes_sent[5]; + // int l2_writes_done[5]; + // int l2_writes_error[5]; + // int l2_writes_lock_retry[5]; + // int l2_evict_lock_retry[5]; + // int l2_evict_reading[5]; + // int l2_evict_l1cached[5]; + // int l2_free_on_write[5]; + // int l2_cdata_free_on_write[5]; + // int l2_abort_lowmem[5]; + // int l2_cksum_bad[5]; + // int l2_io_error[5]; + int l2_size[5]; + int l2_asize[5]; + // int l2_hdr_size[5]; + // int l2_compress_successes[5]; + // int l2_compress_zeros[5]; + // int l2_compress_failures[5]; + int memory_throttle_count[5]; + // int duplicate_buffers[5]; + // int duplicate_buffers_size[5]; + // int duplicate_reads[5]; + // int memory_direct_count[5]; + // int memory_indirect_count[5]; + // int arc_no_grow[5]; + // int arc_tempreserve[5]; + // int arc_loaned_bytes[5]; + // int arc_prune[5]; + // int arc_meta_used[5]; + // int arc_meta_limit[5]; + // int arc_meta_max[5]; + // int arc_meta_min[5]; + // int arc_need_free[5]; + // int arc_sys_free[5]; + } mibs; + + arcstats.l2exist = -1; + + if(unlikely(sysctlbyname("kstat.zfs.misc.arcstats.l2_size", &l2_size, &uint64_t_size, NULL, 0))) + return 0; + + if(likely(l2_size)) + arcstats.l2exist = 1; + else + arcstats.l2exist = 0; + + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hits", mibs.hits, arcstats.hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.misses", mibs.misses, arcstats.misses); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.demand_data_hits", mibs.demand_data_hits, arcstats.demand_data_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.demand_data_misses", mibs.demand_data_misses, arcstats.demand_data_misses); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.demand_metadata_hits", mibs.demand_metadata_hits, arcstats.demand_metadata_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.demand_metadata_misses", mibs.demand_metadata_misses, arcstats.demand_metadata_misses); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.prefetch_data_hits", mibs.prefetch_data_hits, arcstats.prefetch_data_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.prefetch_data_misses", mibs.prefetch_data_misses, arcstats.prefetch_data_misses); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.prefetch_metadata_hits", mibs.prefetch_metadata_hits, arcstats.prefetch_metadata_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.prefetch_metadata_misses", mibs.prefetch_metadata_misses, arcstats.prefetch_metadata_misses); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_hits", mibs.mru_hits, arcstats.mru_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_ghost_hits", mibs.mru_ghost_hits, arcstats.mru_ghost_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_hits", mibs.mfu_hits, arcstats.mfu_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_ghost_hits", mibs.mfu_ghost_hits, arcstats.mfu_ghost_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.deleted", mibs.deleted, arcstats.deleted); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mutex_miss", mibs.mutex_miss, arcstats.mutex_miss); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.evict_skip", mibs.evict_skip, arcstats.evict_skip); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.evict_not_enough", mibs.evict_not_enough, arcstats.evict_not_enough); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.evict_l2_cached", mibs.evict_l2_cached, arcstats.evict_l2_cached); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.evict_l2_eligible", mibs.evict_l2_eligible, arcstats.evict_l2_eligible); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.evict_l2_ineligible", mibs.evict_l2_ineligible, arcstats.evict_l2_ineligible); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.evict_l2_skip", mibs.evict_l2_skip, arcstats.evict_l2_skip); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hash_elements", mibs.hash_elements, arcstats.hash_elements); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hash_elements_max", mibs.hash_elements_max, arcstats.hash_elements_max); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hash_collisions", mibs.hash_collisions, arcstats.hash_collisions); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hash_chains", mibs.hash_chains, arcstats.hash_chains); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hash_chain_max", mibs.hash_chain_max, arcstats.hash_chain_max); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.p", mibs.p, arcstats.p); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.c", mibs.c, arcstats.c); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.c_min", mibs.c_min, arcstats.c_min); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.c_max", mibs.c_max, arcstats.c_max); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.size", mibs.size, arcstats.size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.hdr_size", mibs.hdr_size, arcstats.hdr_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.data_size", mibs.data_size, arcstats.data_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.metadata_size", mibs.metadata_size, arcstats.metadata_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.other_size", mibs.other_size, arcstats.other_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.anon_size", mibs.anon_size, arcstats.anon_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.anon_evictable_data", mibs.anon_evictable_data, arcstats.anon_evictable_data); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.anon_evictable_metadata", mibs.anon_evictable_metadata, arcstats.anon_evictable_metadata); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_size", mibs.mru_size, arcstats.mru_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_evictable_data", mibs.mru_evictable_data, arcstats.mru_evictable_data); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_evictable_metadata", mibs.mru_evictable_metadata, arcstats.mru_evictable_metadata); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_ghost_size", mibs.mru_ghost_size, arcstats.mru_ghost_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_ghost_evictable_data", mibs.mru_ghost_evictable_data, arcstats.mru_ghost_evictable_data); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mru_ghost_evictable_metadata", mibs.mru_ghost_evictable_metadata, arcstats.mru_ghost_evictable_metadata); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_size", mibs.mfu_size, arcstats.mfu_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_evictable_data", mibs.mfu_evictable_data, arcstats.mfu_evictable_data); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_evictable_metadata", mibs.mfu_evictable_metadata, arcstats.mfu_evictable_metadata); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_ghost_size", mibs.mfu_ghost_size, arcstats.mfu_ghost_size); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_ghost_evictable_data", mibs.mfu_ghost_evictable_data, arcstats.mfu_ghost_evictable_data); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.mfu_ghost_evictable_metadata", mibs.mfu_ghost_evictable_metadata, arcstats.mfu_ghost_evictable_metadata); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_hits", mibs.l2_hits, arcstats.l2_hits); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_misses", mibs.l2_misses, arcstats.l2_misses); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_feeds", mibs.l2_feeds, arcstats.l2_feeds); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_rw_clash", mibs.l2_rw_clash, arcstats.l2_rw_clash); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_read_bytes", mibs.l2_read_bytes, arcstats.l2_read_bytes); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_write_bytes", mibs.l2_write_bytes, arcstats.l2_write_bytes); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_writes_sent", mibs.l2_writes_sent, arcstats.l2_writes_sent); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_writes_done", mibs.l2_writes_done, arcstats.l2_writes_done); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_writes_error", mibs.l2_writes_error, arcstats.l2_writes_error); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_writes_lock_retry", mibs.l2_writes_lock_retry, arcstats.l2_writes_lock_retry); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_evict_lock_retry", mibs.l2_evict_lock_retry, arcstats.l2_evict_lock_retry); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_evict_reading", mibs.l2_evict_reading, arcstats.l2_evict_reading); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_evict_l1cached", mibs.l2_evict_l1cached, arcstats.l2_evict_l1cached); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_free_on_write", mibs.l2_free_on_write, arcstats.l2_free_on_write); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_cdata_free_on_write", mibs.l2_cdata_free_on_write, arcstats.l2_cdata_free_on_write); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_abort_lowmem", mibs.l2_abort_lowmem, arcstats.l2_abort_lowmem); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_cksum_bad", mibs.l2_cksum_bad, arcstats.l2_cksum_bad); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_io_error", mibs.l2_io_error, arcstats.l2_io_error); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_size", mibs.l2_size, arcstats.l2_size); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_asize", mibs.l2_asize, arcstats.l2_asize); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_hdr_size", mibs.l2_hdr_size, arcstats.l2_hdr_size); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_compress_successes", mibs.l2_compress_successes, arcstats.l2_compress_successes); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_compress_zeros", mibs.l2_compress_zeros, arcstats.l2_compress_zeros); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.l2_compress_failures", mibs.l2_compress_failures, arcstats.l2_compress_failures); + GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.memory_throttle_count", mibs.memory_throttle_count, arcstats.memory_throttle_count); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.duplicate_buffers", mibs.duplicate_buffers, arcstats.duplicate_buffers); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.duplicate_buffers_size", mibs.duplicate_buffers_size, arcstats.duplicate_buffers_size); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.duplicate_reads", mibs.duplicate_reads, arcstats.duplicate_reads); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.memory_direct_count", mibs.memory_direct_count, arcstats.memory_direct_count); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.memory_indirect_count", mibs.memory_indirect_count, arcstats.memory_indirect_count); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_no_grow", mibs.arc_no_grow, arcstats.arc_no_grow); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_tempreserve", mibs.arc_tempreserve, arcstats.arc_tempreserve); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_loaned_bytes", mibs.arc_loaned_bytes, arcstats.arc_loaned_bytes); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_prune", mibs.arc_prune, arcstats.arc_prune); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_meta_used", mibs.arc_meta_used, arcstats.arc_meta_used); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_meta_limit", mibs.arc_meta_limit, arcstats.arc_meta_limit); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_meta_max", mibs.arc_meta_max, arcstats.arc_meta_max); + // not used: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_meta_min", mibs.arc_meta_min, arcstats.arc_meta_min); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_need_free", mibs.arc_need_free, arcstats.arc_need_free); + // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_sys_free", mibs.arc_sys_free, arcstats.arc_sys_free); + + if (arcstats.size > arcstats.c_min) { + zfs_arcstats_shrinkable_cache_size_bytes = arcstats.size - arcstats.c_min; + } else { + zfs_arcstats_shrinkable_cache_size_bytes = 0; + } + + generate_charts_arcstats("freebsd.plugin", "zfs", show_zero_charts, update_every); + generate_charts_arc_summary("freebsd.plugin", "zfs", show_zero_charts, update_every); + + return 0; +} + +// kstat.zfs.misc.zio_trim + +int do_kstat_zfs_misc_zio_trim(int update_every, usec_t dt) { + (void)dt; + static int mib_bytes[5] = {0, 0, 0, 0, 0}, mib_success[5] = {0, 0, 0, 0, 0}, + mib_failed[5] = {0, 0, 0, 0, 0}, mib_unsupported[5] = {0, 0, 0, 0, 0}; + uint64_t bytes, success, failed, unsupported; + + if (unlikely(GETSYSCTL_SIMPLE("kstat.zfs.misc.zio_trim.bytes", mib_bytes, bytes) || + GETSYSCTL_SIMPLE("kstat.zfs.misc.zio_trim.success", mib_success, success) || + GETSYSCTL_SIMPLE("kstat.zfs.misc.zio_trim.failed", mib_failed, failed) || + GETSYSCTL_SIMPLE("kstat.zfs.misc.zio_trim.unsupported", mib_unsupported, unsupported))) { + error("DISABLED: zfs.trim_bytes chart"); + error("DISABLED: zfs.trim_success chart"); + error("DISABLED: kstat.zfs.misc.zio_trim module"); + return 1; + } else { + + static RRDSET *st_bytes = NULL; + static RRDDIM *rd_bytes = NULL; + + if (unlikely(!st_bytes)) { + st_bytes = rrdset_create_localhost( + "zfs", + "trim_bytes", + NULL, + "trim", + NULL, + "Successfully TRIMmed bytes", + "bytes", + "freebsd.plugin", + "zfs", + 2320, + update_every, + RRDSET_TYPE_LINE + ); + + rd_bytes = rrddim_add(st_bytes, "TRIMmed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_bytes, rd_bytes, bytes); + rrdset_done(st_bytes); + + static RRDSET *st_requests = NULL; + static RRDDIM *rd_successful = NULL, *rd_failed = NULL, *rd_unsupported = NULL; + + if (unlikely(!st_requests)) { + st_requests = rrdset_create_localhost( + "zfs", + "trim_requests", + NULL, + "trim", + NULL, + "TRIM requests", + "requests", + "freebsd.plugin", + "zfs", + 2321, + update_every, + RRDSET_TYPE_STACKED + ); + + rd_successful = rrddim_add(st_requests, "successful", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_requests, "failed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_unsupported = rrddim_add(st_requests, "unsupported", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_requests, rd_successful, success); + rrddim_set_by_pointer(st_requests, rd_failed, failed); + rrddim_set_by_pointer(st_requests, rd_unsupported, unsupported); + rrdset_done(st_requests); + + } + + return 0; +} diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c new file mode 100644 index 0000000..dd94a16 --- /dev/null +++ b/collectors/freebsd.plugin/freebsd_sysctl.c @@ -0,0 +1,3062 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" + +#include +#include + +#define _KERNEL +#include +#include +#include +#undef _KERNEL + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// -------------------------------------------------------------------------------------------------------------------- +// common definitions and variables + +int system_pagesize = PAGE_SIZE; +int number_of_cpus = 1; +#if __FreeBSD_version >= 1200029 +struct __vmmeter { + uint64_t v_swtch; + uint64_t v_trap; + uint64_t v_syscall; + uint64_t v_intr; + uint64_t v_soft; + uint64_t v_vm_faults; + uint64_t v_io_faults; + uint64_t v_cow_faults; + uint64_t v_cow_optim; + uint64_t v_zfod; + uint64_t v_ozfod; + uint64_t v_swapin; + uint64_t v_swapout; + uint64_t v_swappgsin; + uint64_t v_swappgsout; + uint64_t v_vnodein; + uint64_t v_vnodeout; + uint64_t v_vnodepgsin; + uint64_t v_vnodepgsout; + uint64_t v_intrans; + uint64_t v_reactivated; + uint64_t v_pdwakeups; + uint64_t v_pdpages; + uint64_t v_pdshortfalls; + uint64_t v_dfree; + uint64_t v_pfree; + uint64_t v_tfree; + uint64_t v_forks; + uint64_t v_vforks; + uint64_t v_rforks; + uint64_t v_kthreads; + uint64_t v_forkpages; + uint64_t v_vforkpages; + uint64_t v_rforkpages; + uint64_t v_kthreadpages; + u_int v_page_size; + u_int v_page_count; + u_int v_free_reserved; + u_int v_free_target; + u_int v_free_min; + u_int v_free_count; + u_int v_wire_count; + u_int v_active_count; + u_int v_inactive_target; + u_int v_inactive_count; + u_int v_laundry_count; + u_int v_pageout_free_min; + u_int v_interrupt_free_min; + u_int v_free_severe; +}; +typedef struct __vmmeter vmmeter_t; +#else +typedef struct vmmeter vmmeter_t; +#endif + +#if (__FreeBSD_version >= 1101516 && __FreeBSD_version < 1200000) || __FreeBSD_version >= 1200015 +#define NETDATA_COLLECT_LAUNDRY 1 +#endif + +// FreeBSD plugin initialization + +int freebsd_plugin_init() +{ + system_pagesize = getpagesize(); + if (system_pagesize <= 0) { + error("FREEBSD: can't get system page size"); + return 1; + } + + if (unlikely(GETSYSCTL_BY_NAME("kern.smp.cpus", number_of_cpus))) { + error("FREEBSD: can't get number of cpus"); + return 1; + } + + if (unlikely(!number_of_cpus)) { + error("FREEBSD: wrong number of cpus"); + return 1; + } + + return 0; +} + +// vm.loadavg + +// FreeBSD calculates load averages once every 5 seconds +#define MIN_LOADAVG_UPDATE_EVERY 5 + +int do_vm_loadavg(int update_every, usec_t dt){ + static usec_t next_loadavg_dt = 0; + + if (next_loadavg_dt <= dt) { + static int mib[2] = {0, 0}; + struct loadavg sysload; + + if (unlikely(GETSYSCTL_SIMPLE("vm.loadavg", mib, sysload))) { + error("DISABLED: system.load chart"); + error("DISABLED: vm.loadavg module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd_load1 = NULL, *rd_load2 = NULL, *rd_load3 = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "load", + NULL, + "load", + NULL, + "System Load Average", + "load", + "freebsd.plugin", + "vm.loadavg", + NETDATA_CHART_PRIO_SYSTEM_LOAD, + (update_every < MIN_LOADAVG_UPDATE_EVERY) ? + MIN_LOADAVG_UPDATE_EVERY : update_every, RRDSET_TYPE_LINE + ); + rd_load1 = rrddim_add(st, "load1", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_load2 = rrddim_add(st, "load5", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_load3 = rrddim_add(st, "load15", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_load1, (collected_number) ((double) sysload.ldavg[0] / sysload.fscale * 1000)); + rrddim_set_by_pointer(st, rd_load2, (collected_number) ((double) sysload.ldavg[1] / sysload.fscale * 1000)); + rrddim_set_by_pointer(st, rd_load3, (collected_number) ((double) sysload.ldavg[2] / sysload.fscale * 1000)); + rrdset_done(st); + + next_loadavg_dt = st->update_every * USEC_PER_SEC; + } + } + else + next_loadavg_dt -= dt; + + return 0; +} + +// vm.vmtotal + +int do_vm_vmtotal(int update_every, usec_t dt) { + (void)dt; + static int do_all_processes = -1, do_processes = -1, do_mem_real = -1; + + if (unlikely(do_all_processes == -1)) { + do_all_processes = config_get_boolean("plugin:freebsd:vm.vmtotal", "enable total processes", 1); + do_processes = config_get_boolean("plugin:freebsd:vm.vmtotal", "processes running", 1); + do_mem_real = config_get_boolean("plugin:freebsd:vm.vmtotal", "real memory", 1); + } + + if (likely(do_all_processes | do_processes | do_mem_real)) { + static int mib[2] = {0, 0}; + struct vmtotal vmtotal_data; + + if (unlikely(GETSYSCTL_SIMPLE("vm.vmtotal", mib, vmtotal_data))) { + do_all_processes = 0; + error("DISABLED: system.active_processes chart"); + do_processes = 0; + error("DISABLED: system.processes chart"); + do_mem_real = 0; + error("DISABLED: mem.real chart"); + error("DISABLED: vm.vmtotal module"); + return 1; + } else { + if (likely(do_all_processes)) { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "active_processes", + NULL, + "processes", + NULL, + "System Active Processes", + "processes", + "freebsd.plugin", + "vm.vmtotal", + NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES, + update_every, + RRDSET_TYPE_LINE + ); + rd = rrddim_add(st, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, (vmtotal_data.t_rq + vmtotal_data.t_dw + vmtotal_data.t_pw + vmtotal_data.t_sl + vmtotal_data.t_sw)); + rrdset_done(st); + } + + if (likely(do_processes)) { + static RRDSET *st = NULL; + static RRDDIM *rd_running = NULL, *rd_blocked = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "processes", + NULL, + "processes", + NULL, + "System Processes", + "processes", + "freebsd.plugin", + "vm.vmtotal", + NETDATA_CHART_PRIO_SYSTEM_PROCESSES, + update_every, + RRDSET_TYPE_LINE + ); + + rd_running = rrddim_add(st, "running", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_blocked = rrddim_add(st, "blocked", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_running, vmtotal_data.t_rq); + rrddim_set_by_pointer(st, rd_blocked, (vmtotal_data.t_dw + vmtotal_data.t_pw)); + rrdset_done(st); + } + + if (likely(do_mem_real)) { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "mem", + "real", + NULL, + "system", + NULL, + "Total Real Memory In Use", + "MiB", + "freebsd.plugin", + "vm.vmtotal", + NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED, + update_every, + RRDSET_TYPE_AREA + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd = rrddim_add(st, "used", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, vmtotal_data.t_rm); + rrdset_done(st); + } + } + } else { + error("DISABLED: vm.vmtotal module"); + return 1; + } + + return 0; +} + +// kern.cp_time + +int do_kern_cp_time(int update_every, usec_t dt) { + (void)dt; + + if (unlikely(CPUSTATES != 5)) { + error("FREEBSD: There are %d CPU states (5 was expected)", CPUSTATES); + error("DISABLED: system.cpu chart"); + error("DISABLED: kern.cp_time module"); + return 1; + } else { + static int mib[2] = {0, 0}; + long cp_time[CPUSTATES]; + + if (unlikely(GETSYSCTL_SIMPLE("kern.cp_time", mib, cp_time))) { + error("DISABLED: system.cpu chart"); + error("DISABLED: kern.cp_time module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd_nice = NULL, *rd_system = NULL, *rd_user = NULL, *rd_interrupt = NULL, *rd_idle = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "cpu", + NULL, + "cpu", + "system.cpu", + "Total CPU utilization", + "percentage", + "freebsd.plugin", + "kern.cp_time", + NETDATA_CHART_PRIO_SYSTEM_CPU, + update_every, + RRDSET_TYPE_STACKED + ); + + rd_nice = rrddim_add(st, "nice", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_system = rrddim_add(st, "system", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_user = rrddim_add(st, "user", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_interrupt = rrddim_add(st, "interrupt", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_idle = rrddim_add(st, "idle", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_hide(st, "idle"); + } + + rrddim_set_by_pointer(st, rd_nice, cp_time[1]); + rrddim_set_by_pointer(st, rd_system, cp_time[2]); + rrddim_set_by_pointer(st, rd_user, cp_time[0]); + rrddim_set_by_pointer(st, rd_interrupt, cp_time[3]); + rrddim_set_by_pointer(st, rd_idle, cp_time[4]); + rrdset_done(st); + } + } + + return 0; +} + +// kern.cp_times + +int do_kern_cp_times(int update_every, usec_t dt) { + (void)dt; + + if (unlikely(CPUSTATES != 5)) { + error("FREEBSD: There are %d CPU states (5 was expected)", CPUSTATES); + error("DISABLED: cpu.cpuXX charts"); + error("DISABLED: kern.cp_times module"); + return 1; + } else { + static int mib[2] = {0, 0}; + long cp_time[CPUSTATES]; + static long *pcpu_cp_time = NULL; + static int old_number_of_cpus = 0; + + if(unlikely(number_of_cpus != old_number_of_cpus)) + pcpu_cp_time = reallocz(pcpu_cp_time, sizeof(cp_time) * number_of_cpus); + if (unlikely(GETSYSCTL_WSIZE("kern.cp_times", mib, pcpu_cp_time, sizeof(cp_time) * number_of_cpus))) { + error("DISABLED: cpu.cpuXX charts"); + error("DISABLED: kern.cp_times module"); + return 1; + } else { + int i; + static struct cpu_chart { + char cpuid[MAX_INT_DIGITS + 4]; + RRDSET *st; + RRDDIM *rd_user; + RRDDIM *rd_nice; + RRDDIM *rd_system; + RRDDIM *rd_interrupt; + RRDDIM *rd_idle; + } *all_cpu_charts = NULL; + + if(unlikely(number_of_cpus > old_number_of_cpus)) { + all_cpu_charts = reallocz(all_cpu_charts, sizeof(struct cpu_chart) * number_of_cpus); + memset(&all_cpu_charts[old_number_of_cpus], 0, sizeof(struct cpu_chart) * (number_of_cpus - old_number_of_cpus)); + } + + for (i = 0; i < number_of_cpus; i++) { + if (unlikely(!all_cpu_charts[i].st)) { + snprintfz(all_cpu_charts[i].cpuid, MAX_INT_DIGITS, "cpu%d", i); + all_cpu_charts[i].st = rrdset_create_localhost( + "cpu", + all_cpu_charts[i].cpuid, + NULL, + "utilization", + "cpu.cpu", + "Core utilization", + "percentage", + "freebsd.plugin", + "kern.cp_times", + NETDATA_CHART_PRIO_CPU_PER_CORE, + update_every, + RRDSET_TYPE_STACKED + ); + + all_cpu_charts[i].rd_nice = rrddim_add(all_cpu_charts[i].st, "nice", NULL, 1, 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + all_cpu_charts[i].rd_system = rrddim_add(all_cpu_charts[i].st, "system", NULL, 1, 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + all_cpu_charts[i].rd_user = rrddim_add(all_cpu_charts[i].st, "user", NULL, 1, 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + all_cpu_charts[i].rd_interrupt = rrddim_add(all_cpu_charts[i].st, "interrupt", NULL, 1, 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + all_cpu_charts[i].rd_idle = rrddim_add(all_cpu_charts[i].st, "idle", NULL, 1, 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_hide(all_cpu_charts[i].st, "idle"); + } + + rrddim_set_by_pointer(all_cpu_charts[i].st, all_cpu_charts[i].rd_nice, pcpu_cp_time[i * 5 + 1]); + rrddim_set_by_pointer(all_cpu_charts[i].st, all_cpu_charts[i].rd_system, pcpu_cp_time[i * 5 + 2]); + rrddim_set_by_pointer(all_cpu_charts[i].st, all_cpu_charts[i].rd_user, pcpu_cp_time[i * 5 + 0]); + rrddim_set_by_pointer(all_cpu_charts[i].st, all_cpu_charts[i].rd_interrupt, pcpu_cp_time[i * 5 + 3]); + rrddim_set_by_pointer(all_cpu_charts[i].st, all_cpu_charts[i].rd_idle, pcpu_cp_time[i * 5 + 4]); + rrdset_done(all_cpu_charts[i].st); + } + } + + old_number_of_cpus = number_of_cpus; + } + + return 0; +} + +// dev.cpu.temperature + +int do_dev_cpu_temperature(int update_every, usec_t dt) { + (void)dt; + + int i; + static int *mib = NULL; + static int *pcpu_temperature = NULL; + static int old_number_of_cpus = 0; + char char_mib[MAX_INT_DIGITS + 21]; + char char_rd[MAX_INT_DIGITS + 9]; + + if (unlikely(number_of_cpus != old_number_of_cpus)) { + pcpu_temperature = reallocz(pcpu_temperature, sizeof(int) * number_of_cpus); + mib = reallocz(mib, sizeof(int) * number_of_cpus * 4); + if (unlikely(number_of_cpus > old_number_of_cpus)) + memset(&mib[old_number_of_cpus * 4], 0, sizeof(int) * (number_of_cpus - old_number_of_cpus) * 4); + } + for (i = 0; i < number_of_cpus; i++) { + if (unlikely(!(mib[i * 4]))) + sprintf(char_mib, "dev.cpu.%d.temperature", i); + if (unlikely(getsysctl_simple(char_mib, &mib[i * 4], 4, &pcpu_temperature[i], sizeof(int)))) { + error("DISABLED: cpu.temperature chart"); + error("DISABLED: dev.cpu.temperature module"); + return 1; + } + } + + static RRDSET *st; + static RRDDIM **rd_pcpu_temperature; + + if (unlikely(number_of_cpus != old_number_of_cpus)) { + rd_pcpu_temperature = reallocz(rd_pcpu_temperature, sizeof(RRDDIM) * number_of_cpus); + if (unlikely(number_of_cpus > old_number_of_cpus)) + memset(&rd_pcpu_temperature[old_number_of_cpus], 0, sizeof(RRDDIM) * (number_of_cpus - old_number_of_cpus)); + } + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "cpu", + "temperature", + NULL, + "temperature", + "cpu.temperature", + "Core temperature", + "Celsius", + "freebsd.plugin", + "dev.cpu.temperature", + NETDATA_CHART_PRIO_CPU_TEMPERATURE, + update_every, + RRDSET_TYPE_LINE + ); + } + + for (i = 0; i < number_of_cpus; i++) { + if (unlikely(!rd_pcpu_temperature[i])) { + sprintf(char_rd, "cpu%d.temp", i); + rd_pcpu_temperature[i] = rrddim_add(st, char_rd, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_pcpu_temperature[i], (collected_number) ((double)pcpu_temperature[i] / 10 - 273.15)); + } + + rrdset_done(st); + + old_number_of_cpus = number_of_cpus; + + return 0; +} + +// dev.cpu.0.freq + +int do_dev_cpu_0_freq(int update_every, usec_t dt) { + (void)dt; + static int mib[4] = {0, 0, 0, 0}; + int cpufreq; + + if (unlikely(GETSYSCTL_SIMPLE("dev.cpu.0.freq", mib, cpufreq))) { + error("DISABLED: cpu.scaling_cur_freq chart"); + error("DISABLED: dev.cpu.0.freq module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "cpu", + "scaling_cur_freq", + NULL, + "cpufreq", + NULL, + "Current CPU Scaling Frequency", + "MHz", + "freebsd.plugin", + "dev.cpu.0.freq", + NETDATA_CHART_PRIO_CPUFREQ_SCALING_CUR_FREQ, + update_every, + RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "frequency", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, cpufreq); + rrdset_done(st); + } + + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- +// hw.intrcnt + +int do_hw_intcnt(int update_every, usec_t dt) { + (void)dt; + static int mib_hw_intrcnt[2] = {0, 0}; + size_t intrcnt_size = 0; + + if (unlikely(GETSYSCTL_SIZE("hw.intrcnt", mib_hw_intrcnt, intrcnt_size))) { + error("DISABLED: system.intr chart"); + error("DISABLED: system.interrupts chart"); + error("DISABLED: hw.intrcnt module"); + return 1; + } else { + unsigned long nintr = 0; + static unsigned long old_nintr = 0; + static unsigned long *intrcnt = NULL; + + nintr = intrcnt_size / sizeof(u_long); + if (unlikely(nintr != old_nintr)) + intrcnt = reallocz(intrcnt, nintr * sizeof(u_long)); + if (unlikely(GETSYSCTL_WSIZE("hw.intrcnt", mib_hw_intrcnt, intrcnt, nintr * sizeof(u_long)))) { + error("DISABLED: system.intr chart"); + error("DISABLED: system.interrupts chart"); + error("DISABLED: hw.intrcnt module"); + return 1; + } else { + unsigned long long totalintr = 0; + unsigned long i; + + for (i = 0; i < nintr; i++) + totalintr += intrcnt[i]; + + static RRDSET *st_intr = NULL; + static RRDDIM *rd_intr = NULL; + + if (unlikely(!st_intr)) { + st_intr = rrdset_create_localhost( + "system", + "intr", + NULL, + "interrupts", + NULL, + "Total Hardware Interrupts", + "interrupts/s", + "freebsd.plugin", + "hw.intrcnt", + NETDATA_CHART_PRIO_SYSTEM_INTR, + update_every, + RRDSET_TYPE_LINE + ); + rrdset_flag_set(st_intr, RRDSET_FLAG_DETAIL); + + rd_intr = rrddim_add(st_intr, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_intr, rd_intr, totalintr); + rrdset_done(st_intr); + + size_t size; + static int mib_hw_intrnames[2] = {0, 0}; + static char *intrnames = NULL; + + if (unlikely(GETSYSCTL_SIZE("hw.intrnames", mib_hw_intrnames, size))) { + error("DISABLED: system.intr chart"); + error("DISABLED: system.interrupts chart"); + error("DISABLED: hw.intrcnt module"); + return 1; + } else { + if (unlikely(nintr != old_nintr)) + intrnames = reallocz(intrnames, size); + if (unlikely(GETSYSCTL_WSIZE("hw.intrnames", mib_hw_intrnames, intrnames, size))) { + error("DISABLED: system.intr chart"); + error("DISABLED: system.interrupts chart"); + error("DISABLED: hw.intrcnt module"); + return 1; + } else { + static RRDSET *st_interrupts = NULL; + + if (unlikely(!st_interrupts)) { + st_interrupts = rrdset_create_localhost( + "system", + "interrupts", + NULL, + "interrupts", + NULL, + "System interrupts", + "interrupts/s", + "freebsd.plugin", + "hw.intrcnt", + NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS, + update_every, + RRDSET_TYPE_STACKED + ); + } + + for (i = 0; i < nintr; i++) { + void *p; + + p = intrnames + i * (strlen(intrnames) + 1); + if (unlikely((intrcnt[i] != 0) && (*(char *) p != 0))) { + RRDDIM *rd_interrupts = rrddim_find_active(st_interrupts, p); + + if (unlikely(!rd_interrupts)) + rd_interrupts = rrddim_add(st_interrupts, p, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_interrupts, rd_interrupts, intrcnt[i]); + } + } + rrdset_done(st_interrupts); + } + } + } + + old_nintr = nintr; + } + + return 0; +} + +// vm.stats.sys.v_intr + +int do_vm_stats_sys_v_intr(int update_every, usec_t dt) { + (void)dt; + static int mib[4] = {0, 0, 0, 0}; + u_int int_number; + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.sys.v_intr", mib, int_number))) { + error("DISABLED: system.dev_intr chart"); + error("DISABLED: vm.stats.sys.v_intr module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "dev_intr", + NULL, + "interrupts", + NULL, + "Device Interrupts", + "interrupts/s", + "freebsd.plugin", + "vm.stats.sys.v_intr", + NETDATA_CHART_PRIO_SYSTEM_DEV_INTR, + update_every, + RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd, int_number); + rrdset_done(st); + } + + return 0; +} + +// vm.stats.sys.v_soft + +int do_vm_stats_sys_v_soft(int update_every, usec_t dt) { + (void)dt; + static int mib[4] = {0, 0, 0, 0}; + u_int soft_intr_number; + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.sys.v_soft", mib, soft_intr_number))) { + error("DISABLED: system.dev_intr chart"); + error("DISABLED: vm.stats.sys.v_soft module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "soft_intr", + NULL, + "interrupts", + NULL, + "Software Interrupts", + "interrupts/s", + "freebsd.plugin", + "vm.stats.sys.v_soft", + NETDATA_CHART_PRIO_SYSTEM_SOFT_INTR, + update_every, + RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd, soft_intr_number); + rrdset_done(st); + } + + return 0; +} + +// vm.stats.sys.v_swtch + +int do_vm_stats_sys_v_swtch(int update_every, usec_t dt) { + (void)dt; + static int mib[4] = {0, 0, 0, 0}; + u_int ctxt_number; + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.sys.v_swtch", mib, ctxt_number))) { + error("DISABLED: system.ctxt chart"); + error("DISABLED: vm.stats.sys.v_swtch module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "ctxt", + NULL, + "processes", + NULL, + "CPU Context Switches", + "context switches/s", + "freebsd.plugin", + "vm.stats.sys.v_swtch", + NETDATA_CHART_PRIO_SYSTEM_CTXT, + update_every, + RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "switches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd, ctxt_number); + rrdset_done(st); + } + + return 0; +} + +// vm.stats.vm.v_forks + +int do_vm_stats_sys_v_forks(int update_every, usec_t dt) { + (void)dt; + static int mib[4] = {0, 0, 0, 0}; + u_int forks_number; + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.vm.v_forks", mib, forks_number))) { + error("DISABLED: system.forks chart"); + error("DISABLED: vm.stats.sys.v_swtch module"); + return 1; + } else { + + // -------------------------------------------------------------------- + + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "forks", + NULL, + "processes", + NULL, + "Started Processes", + "processes/s", + "freebsd.plugin", + "vm.stats.sys.v_swtch", + NETDATA_CHART_PRIO_SYSTEM_FORKS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd = rrddim_add(st, "started", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd, forks_number); + rrdset_done(st); + } + + return 0; +} + +// vm.swap_info + +int do_vm_swap_info(int update_every, usec_t dt) { + (void)dt; + static int mib[3] = {0, 0, 0}; + + if (unlikely(getsysctl_mib("vm.swap_info", mib, 2))) { + error("DISABLED: system.swap chart"); + error("DISABLED: vm.swap_info module"); + return 1; + } else { + int i; + struct xswdev xsw; + struct total_xsw { + collected_number bytes_used; + collected_number bytes_total; + } total_xsw = {0, 0}; + + for (i = 0; ; i++) { + size_t size; + + mib[2] = i; + size = sizeof(xsw); + if (unlikely(sysctl(mib, 3, &xsw, &size, NULL, 0) == -1 )) { + if (unlikely(errno != ENOENT)) { + error("FREEBSD: sysctl(%s...) failed: %s", "vm.swap_info", strerror(errno)); + error("DISABLED: system.swap chart"); + error("DISABLED: vm.swap_info module"); + return 1; + } else { + if (unlikely(size != sizeof(xsw))) { + error("FREEBSD: sysctl(%s...) expected %lu, got %lu", "vm.swap_info", (unsigned long)sizeof(xsw), (unsigned long)size); + error("DISABLED: system.swap chart"); + error("DISABLED: vm.swap_info module"); + return 1; + } else break; + } + } + total_xsw.bytes_used += xsw.xsw_used; + total_xsw.bytes_total += xsw.xsw_nblks; + } + + static RRDSET *st = NULL; + static RRDDIM *rd_free = NULL, *rd_used = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "swap", + NULL, + "swap", + NULL, + "System Swap", + "MiB", + "freebsd.plugin", + "vm.swap_info", + NETDATA_CHART_PRIO_SYSTEM_SWAP, + update_every, + RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_free = rrddim_add(st, "free", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st, "used", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_free, total_xsw.bytes_total - total_xsw.bytes_used); + rrddim_set_by_pointer(st, rd_used, total_xsw.bytes_used); + rrdset_done(st); + } + + return 0; +} + +// system.ram + +int do_system_ram(int update_every, usec_t dt) { + (void)dt; + static int mib_active_count[4] = {0, 0, 0, 0}, + mib_inactive_count[4] = {0, 0, 0, 0}, + mib_wire_count[4] = {0, 0, 0, 0}, +#if __FreeBSD_version < 1200016 + mib_cache_count[4] = {0, 0, 0, 0}, +#endif + mib_vfs_bufspace[2] = {0, 0}, + mib_free_count[4] = {0, 0, 0, 0}; + vmmeter_t vmmeter_data; + size_t vfs_bufspace_count; + +#if defined(NETDATA_COLLECT_LAUNDRY) + static int mib_laundry_count[4] = {0, 0, 0, 0}; +#endif + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.vm.v_active_count", mib_active_count, vmmeter_data.v_active_count) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_inactive_count", mib_inactive_count, vmmeter_data.v_inactive_count) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_wire_count", mib_wire_count, vmmeter_data.v_wire_count) || +#if __FreeBSD_version < 1200016 + GETSYSCTL_SIMPLE("vm.stats.vm.v_cache_count", mib_cache_count, vmmeter_data.v_cache_count) || +#endif +#if defined(NETDATA_COLLECT_LAUNDRY) + GETSYSCTL_SIMPLE("vm.stats.vm.v_laundry_count", mib_laundry_count, vmmeter_data.v_laundry_count) || +#endif + GETSYSCTL_SIMPLE("vfs.bufspace", mib_vfs_bufspace, vfs_bufspace_count) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_free_count", mib_free_count, vmmeter_data.v_free_count))) { + error("DISABLED: system.ram chart"); + error("DISABLED: system.ram module"); + return 1; + } else { + static RRDSET *st = NULL, *st_mem_available = NULL; + static RRDDIM *rd_free = NULL, *rd_active = NULL, *rd_inactive = NULL, *rd_wired = NULL, + *rd_cache = NULL, *rd_buffers = NULL, *rd_avail = NULL; + +#if defined(NETDATA_COLLECT_LAUNDRY) + static RRDDIM *rd_laundry = NULL; +#endif + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "ram", + NULL, + "ram", + NULL, + "System RAM", + "MiB", + "freebsd.plugin", + "system.ram", + NETDATA_CHART_PRIO_SYSTEM_RAM, + update_every, + RRDSET_TYPE_STACKED + ); + + rd_free = rrddim_add(st, "free", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rd_active = rrddim_add(st, "active", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rd_inactive = rrddim_add(st, "inactive", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rd_wired = rrddim_add(st, "wired", NULL, 1, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rd_cache = rrddim_add(st, "cache", NULL, 1, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); +#if defined(NETDATA_COLLECT_LAUNDRY) + rd_laundry = rrddim_add(st, "laundry", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); +#endif + rd_buffers = rrddim_add(st, "buffers", NULL, 1, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_free, vmmeter_data.v_free_count); + rrddim_set_by_pointer(st, rd_active, vmmeter_data.v_active_count); + rrddim_set_by_pointer(st, rd_inactive, vmmeter_data.v_inactive_count); + rrddim_set_by_pointer(st, rd_wired, (unsigned long long)vmmeter_data.v_wire_count * (unsigned long long)system_pagesize - zfs_arcstats_shrinkable_cache_size_bytes); +#if __FreeBSD_version < 1200016 + rrddim_set_by_pointer(st, rd_cache, (unsigned long long)vmmeter_data.v_cache_count * (unsigned long long)system_pagesize + zfs_arcstats_shrinkable_cache_size_bytes); +#else + rrddim_set_by_pointer(st, rd_cache, zfs_arcstats_shrinkable_cache_size_bytes); +#endif +#if defined(NETDATA_COLLECT_LAUNDRY) + rrddim_set_by_pointer(st, rd_laundry, vmmeter_data.v_laundry_count); +#endif + rrddim_set_by_pointer(st, rd_buffers, vfs_bufspace_count); + rrdset_done(st); + + if (unlikely(!st_mem_available)) { + st_mem_available = rrdset_create_localhost( + "mem", + "available", + NULL, + "system", + NULL, + "Available RAM for applications", + "MiB", + "freebsd.plugin", + "system.ram", + NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE, + update_every, + RRDSET_TYPE_AREA + ); + + rd_avail = rrddim_add(st_mem_available, "MemAvailable", "avail", system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + +#if __FreeBSD_version < 1200016 + rrddim_set_by_pointer(st_mem_available, rd_avail, vmmeter_data.v_inactive_count + vmmeter_data.v_free_count + vmmeter_data.v_cache_count + zfs_arcstats_shrinkable_cache_size_bytes / system_pagesize); +#else + rrddim_set_by_pointer(st_mem_available, rd_avail, vmmeter_data.v_inactive_count + vmmeter_data.v_free_count + zfs_arcstats_shrinkable_cache_size_bytes / system_pagesize); +#endif + + rrdset_done(st_mem_available); + } + + return 0; +} + +// vm.stats.vm.v_swappgs + +int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) { + (void)dt; + static int mib_swappgsin[4] = {0, 0, 0, 0}, mib_swappgsout[4] = {0, 0, 0, 0}; + vmmeter_t vmmeter_data; + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.vm.v_swappgsin", mib_swappgsin, vmmeter_data.v_swappgsin) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_swappgsout", mib_swappgsout, vmmeter_data.v_swappgsout))) { + error("DISABLED: system.swapio chart"); + error("DISABLED: vm.stats.vm.v_swappgs module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "swapio", + NULL, + "swap", + NULL, + "Swap I/O", + "KiB/s", + "freebsd.plugin", + "vm.stats.vm.v_swappgs", + NETDATA_CHART_PRIO_SYSTEM_SWAPIO, + update_every, + RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st, "in", NULL, system_pagesize, KILO_FACTOR, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "out", NULL, -system_pagesize, KILO_FACTOR, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, vmmeter_data.v_swappgsin); + rrddim_set_by_pointer(st, rd_out, vmmeter_data.v_swappgsout); + rrdset_done(st); + } + + return 0; +} + +// vm.stats.vm.v_pgfaults + +int do_vm_stats_sys_v_pgfaults(int update_every, usec_t dt) { + (void)dt; + static int mib_vm_faults[4] = {0, 0, 0, 0}, mib_io_faults[4] = {0, 0, 0, 0}, mib_cow_faults[4] = {0, 0, 0, 0}, + mib_cow_optim[4] = {0, 0, 0, 0}, mib_intrans[4] = {0, 0, 0, 0}; + vmmeter_t vmmeter_data; + + if (unlikely(GETSYSCTL_SIMPLE("vm.stats.vm.v_vm_faults", mib_vm_faults, vmmeter_data.v_vm_faults) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_io_faults", mib_io_faults, vmmeter_data.v_io_faults) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_cow_faults", mib_cow_faults, vmmeter_data.v_cow_faults) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_cow_optim", mib_cow_optim, vmmeter_data.v_cow_optim) || + GETSYSCTL_SIMPLE("vm.stats.vm.v_intrans", mib_intrans, vmmeter_data.v_intrans))) { + error("DISABLED: mem.pgfaults chart"); + error("DISABLED: vm.stats.vm.v_pgfaults module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd_memory = NULL, *rd_io_requiring = NULL, *rd_cow = NULL, + *rd_cow_optimized = NULL, *rd_in_transit = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "mem", + "pgfaults", + NULL, + "system", + NULL, + "Memory Page Faults", + "page faults/s", + "freebsd.plugin", + "vm.stats.vm.v_pgfaults", + NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_memory = rrddim_add(st, "memory", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_io_requiring = rrddim_add(st, "io_requiring", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cow = rrddim_add(st, "cow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cow_optimized = rrddim_add(st, "cow_optimized", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_transit = rrddim_add(st, "in_transit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_memory, vmmeter_data.v_vm_faults); + rrddim_set_by_pointer(st, rd_io_requiring, vmmeter_data.v_io_faults); + rrddim_set_by_pointer(st, rd_cow, vmmeter_data.v_cow_faults); + rrddim_set_by_pointer(st, rd_cow_optimized, vmmeter_data.v_cow_optim); + rrddim_set_by_pointer(st, rd_in_transit, vmmeter_data.v_intrans); + rrdset_done(st); + } + + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- +// kern.ipc.sem + +int do_kern_ipc_sem(int update_every, usec_t dt) { + (void)dt; + static int mib_semmni[3] = {0, 0, 0}; + struct ipc_sem { + int semmni; + collected_number sets; + collected_number semaphores; + } ipc_sem = {0, 0, 0}; + + if (unlikely(GETSYSCTL_SIMPLE("kern.ipc.semmni", mib_semmni, ipc_sem.semmni))) { + error("DISABLED: system.ipc_semaphores chart"); + error("DISABLED: system.ipc_semaphore_arrays chart"); + error("DISABLED: kern.ipc.sem module"); + return 1; + } else { + static struct semid_kernel *ipc_sem_data = NULL; + static int old_semmni = 0; + static int mib_sema[3] = {0, 0, 0}; + + if (unlikely(ipc_sem.semmni != old_semmni)) { + ipc_sem_data = reallocz(ipc_sem_data, sizeof(struct semid_kernel) * ipc_sem.semmni); + old_semmni = ipc_sem.semmni; + } + if (unlikely(GETSYSCTL_WSIZE("kern.ipc.sema", mib_sema, ipc_sem_data, sizeof(struct semid_kernel) * ipc_sem.semmni))) { + error("DISABLED: system.ipc_semaphores chart"); + error("DISABLED: system.ipc_semaphore_arrays chart"); + error("DISABLED: kern.ipc.sem module"); + return 1; + } else { + int i; + + for (i = 0; i < ipc_sem.semmni; i++) { + if (unlikely(ipc_sem_data[i].u.sem_perm.mode & SEM_ALLOC)) { + ipc_sem.sets += 1; + ipc_sem.semaphores += ipc_sem_data[i].u.sem_nsems; + } + } + + static RRDSET *st_semaphores = NULL, *st_semaphore_arrays = NULL; + static RRDDIM *rd_semaphores = NULL, *rd_semaphore_arrays = NULL; + + if (unlikely(!st_semaphores)) { + st_semaphores = rrdset_create_localhost( + "system", + "ipc_semaphores", + NULL, + "ipc semaphores", + NULL, + "IPC Semaphores", + "semaphores", + "freebsd.plugin", + "kern.ipc.sem", + NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES, + update_every, + RRDSET_TYPE_AREA + ); + + rd_semaphores = rrddim_add(st_semaphores, "semaphores", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_semaphores, rd_semaphores, ipc_sem.semaphores); + rrdset_done(st_semaphores); + + if (unlikely(!st_semaphore_arrays)) { + st_semaphore_arrays = rrdset_create_localhost( + "system", + "ipc_semaphore_arrays", + NULL, + "ipc semaphores", + NULL, + "IPC Semaphore Arrays", + "arrays", + "freebsd.plugin", + "kern.ipc.sem", + NETDATA_CHART_PRIO_SYSTEM_IPC_SEM_ARRAYS, + update_every, + RRDSET_TYPE_AREA + ); + + rd_semaphore_arrays = rrddim_add(st_semaphore_arrays, "arrays", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_semaphore_arrays, rd_semaphore_arrays, ipc_sem.sets); + rrdset_done(st_semaphore_arrays); + } + } + + return 0; +} + +// kern.ipc.shm + +int do_kern_ipc_shm(int update_every, usec_t dt) { + (void)dt; + static int mib_shmmni[3] = {0, 0, 0}; + struct ipc_shm { + u_long shmmni; + collected_number segs; + collected_number segsize; + } ipc_shm = {0, 0, 0}; + + if (unlikely(GETSYSCTL_SIMPLE("kern.ipc.shmmni", mib_shmmni, ipc_shm.shmmni))) { + error("DISABLED: system.ipc_shared_mem_segs chart"); + error("DISABLED: system.ipc_shared_mem_size chart"); + error("DISABLED: kern.ipc.shmmodule"); + return 1; + } else { + static struct shmid_kernel *ipc_shm_data = NULL; + static u_long old_shmmni = 0; + static int mib_shmsegs[3] = {0, 0, 0}; + + if (unlikely(ipc_shm.shmmni != old_shmmni)) { + ipc_shm_data = reallocz(ipc_shm_data, sizeof(struct shmid_kernel) * ipc_shm.shmmni); + old_shmmni = ipc_shm.shmmni; + } + if (unlikely( + GETSYSCTL_WSIZE("kern.ipc.shmsegs", mib_shmsegs, ipc_shm_data, sizeof(struct shmid_kernel) * ipc_shm.shmmni))) { + error("DISABLED: system.ipc_shared_mem_segs chart"); + error("DISABLED: system.ipc_shared_mem_size chart"); + error("DISABLED: kern.ipc.shmmodule"); + return 1; + } else { + unsigned long i; + + for (i = 0; i < ipc_shm.shmmni; i++) { + if (unlikely(ipc_shm_data[i].u.shm_perm.mode & 0x0800)) { + ipc_shm.segs += 1; + ipc_shm.segsize += ipc_shm_data[i].u.shm_segsz; + } + } + + static RRDSET *st_segs = NULL, *st_size = NULL; + static RRDDIM *rd_segments = NULL, *rd_allocated = NULL; + + if (unlikely(!st_segs)) { + st_segs = rrdset_create_localhost( + "system", + "ipc_shared_mem_segs", + NULL, + "ipc shared memory", + NULL, + "IPC Shared Memory Segments", + "segments", + "freebsd.plugin", + "kern.ipc.shm", + NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SEGS, + update_every, + RRDSET_TYPE_AREA + ); + + rd_segments = rrddim_add(st_segs, "segments", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_segs, rd_segments, ipc_shm.segs); + rrdset_done(st_segs); + + if (unlikely(!st_size)) { + st_size = rrdset_create_localhost( + "system", + "ipc_shared_mem_size", + NULL, + "ipc shared memory", + NULL, + "IPC Shared Memory Segments Size", + "KiB", + "freebsd.plugin", + "kern.ipc.shm", + NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SIZE, + update_every, + RRDSET_TYPE_AREA + ); + + rd_allocated = rrddim_add(st_size, "allocated", NULL, 1, KILO_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_size, rd_allocated, ipc_shm.segsize); + rrdset_done(st_size); + } + } + + return 0; +} + +// kern.ipc.msq + +int do_kern_ipc_msq(int update_every, usec_t dt) { + (void)dt; + static int mib_msgmni[3] = {0, 0, 0}; + struct ipc_msq { + int msgmni; + collected_number queues; + collected_number messages; + collected_number usedsize; + collected_number allocsize; + } ipc_msq = {0, 0, 0, 0, 0}; + + if (unlikely(GETSYSCTL_SIMPLE("kern.ipc.msgmni", mib_msgmni, ipc_msq.msgmni))) { + error("DISABLED: system.ipc_msq_queues chart"); + error("DISABLED: system.ipc_msq_messages chart"); + error("DISABLED: system.ipc_msq_size chart"); + error("DISABLED: kern.ipc.msg module"); + return 1; + } else { + static struct msqid_kernel *ipc_msq_data = NULL; + static int old_msgmni = 0; + static int mib_msqids[3] = {0, 0, 0}; + + if (unlikely(ipc_msq.msgmni != old_msgmni)) { + ipc_msq_data = reallocz(ipc_msq_data, sizeof(struct msqid_kernel) * ipc_msq.msgmni); + old_msgmni = ipc_msq.msgmni; + } + if (unlikely( + GETSYSCTL_WSIZE("kern.ipc.msqids", mib_msqids, ipc_msq_data, sizeof(struct msqid_kernel) * ipc_msq.msgmni))) { + error("DISABLED: system.ipc_msq_queues chart"); + error("DISABLED: system.ipc_msq_messages chart"); + error("DISABLED: system.ipc_msq_size chart"); + error("DISABLED: kern.ipc.msg module"); + return 1; + } else { + int i; + + for (i = 0; i < ipc_msq.msgmni; i++) { + if (unlikely(ipc_msq_data[i].u.msg_qbytes != 0)) { + ipc_msq.queues += 1; + ipc_msq.messages += ipc_msq_data[i].u.msg_qnum; + ipc_msq.usedsize += ipc_msq_data[i].u.msg_cbytes; + ipc_msq.allocsize += ipc_msq_data[i].u.msg_qbytes; + } + } + + static RRDSET *st_queues = NULL, *st_messages = NULL, *st_size = NULL; + static RRDDIM *rd_queues = NULL, *rd_messages = NULL, *rd_allocated = NULL, *rd_used = NULL; + + if (unlikely(!st_queues)) { + st_queues = rrdset_create_localhost( + "system", + "ipc_msq_queues", + NULL, + "ipc message queues", + NULL, + "Number of IPC Message Queues", + "queues", + "freebsd.plugin", + "kern.ipc.msq", + NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_QUEUES, + update_every, + RRDSET_TYPE_AREA + ); + + rd_queues = rrddim_add(st_queues, "queues", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_queues, rd_queues, ipc_msq.queues); + rrdset_done(st_queues); + + if (unlikely(!st_messages)) { + st_messages = rrdset_create_localhost( + "system", + "ipc_msq_messages", + NULL, + "ipc message queues", + NULL, + "Number of Messages in IPC Message Queues", + "messages", + "freebsd.plugin", + "kern.ipc.msq", + NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_MESSAGES, + update_every, + RRDSET_TYPE_AREA + ); + + rd_messages = rrddim_add(st_messages, "messages", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_messages, rd_messages, ipc_msq.messages); + rrdset_done(st_messages); + + if (unlikely(!st_size)) { + st_size = rrdset_create_localhost( + "system", + "ipc_msq_size", + NULL, + "ipc message queues", + NULL, + "Size of IPC Message Queues", + "bytes", + "freebsd.plugin", + "kern.ipc.msq", + NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_SIZE, + update_every, + RRDSET_TYPE_LINE + ); + + rd_allocated = rrddim_add(st_size, "allocated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st_size, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_size, rd_allocated, ipc_msq.allocsize); + rrddim_set_by_pointer(st_size, rd_used, ipc_msq.usedsize); + rrdset_done(st_size); + } + } + + return 0; +} + +// uptime + +int do_uptime(int update_every, usec_t dt) { + (void)dt; + struct timespec up_time; + + clock_gettime(CLOCK_UPTIME, &up_time); + + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "uptime", + NULL, + "uptime", + NULL, + "System Uptime", + "seconds", + "freebsd.plugin", + "uptime", + NETDATA_CHART_PRIO_SYSTEM_UPTIME, + update_every, + RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, up_time.tv_sec); + rrdset_done(st); + return 0; +} + +// net.isr + +int do_net_isr(int update_every, usec_t dt) { + (void)dt; + static int do_netisr = -1, do_netisr_per_core = -1; + + if (unlikely(do_netisr == -1)) { + do_netisr = config_get_boolean("plugin:freebsd:net.isr", "netisr", 1); + do_netisr_per_core = config_get_boolean("plugin:freebsd:net.isr", "netisr per core", 1); + } + + static struct netisr_stats { + collected_number dispatched; + collected_number hybrid_dispatched; + collected_number qdrops; + collected_number queued; + } *netisr_stats = NULL; + + if (likely(do_netisr || do_netisr_per_core)) { + static int mib_workstream[3] = {0, 0, 0}, mib_work[3] = {0, 0, 0}; + size_t netisr_workstream_size = 0, netisr_work_size = 0; + static struct sysctl_netisr_workstream *netisr_workstream = NULL; + static struct sysctl_netisr_work *netisr_work = NULL; + unsigned long num_netisr_workstreams = 0, num_netisr_works = 0; + int common_error = 0; + + if (unlikely(GETSYSCTL_SIZE("net.isr.workstream", mib_workstream, netisr_workstream_size))) { + common_error = 1; + } else if (unlikely(GETSYSCTL_SIZE("net.isr.work", mib_work, netisr_work_size))) { + common_error = 1; + } else { + static size_t old_netisr_workstream_size = 0; + + num_netisr_workstreams = netisr_workstream_size / sizeof(struct sysctl_netisr_workstream); + if (unlikely(netisr_workstream_size != old_netisr_workstream_size)) { + netisr_workstream = reallocz(netisr_workstream, + num_netisr_workstreams * sizeof(struct sysctl_netisr_workstream)); + old_netisr_workstream_size = netisr_workstream_size; + } + if (unlikely(GETSYSCTL_WSIZE("net.isr.workstream", mib_workstream, netisr_workstream, + num_netisr_workstreams * sizeof(struct sysctl_netisr_workstream)))){ + common_error = 1; + } else { + static size_t old_netisr_work_size = 0; + + num_netisr_works = netisr_work_size / sizeof(struct sysctl_netisr_work); + if (unlikely(netisr_work_size != old_netisr_work_size)) { + netisr_work = reallocz(netisr_work, num_netisr_works * sizeof(struct sysctl_netisr_work)); + old_netisr_work_size = netisr_work_size; + } + if (unlikely(GETSYSCTL_WSIZE("net.isr.work", mib_work, netisr_work, + num_netisr_works * sizeof(struct sysctl_netisr_work)))){ + common_error = 1; + } + } + } + if (unlikely(common_error)) { + do_netisr = 0; + error("DISABLED: system.softnet_stat chart"); + do_netisr_per_core = 0; + error("DISABLED: system.cpuX_softnet_stat chart"); + common_error = 0; + error("DISABLED: net.isr module"); + return 1; + } else { + unsigned long i, n; + int j; + static int old_number_of_cpus = 0; + + if (unlikely(number_of_cpus != old_number_of_cpus)) { + netisr_stats = reallocz(netisr_stats, (number_of_cpus + 1) * sizeof(struct netisr_stats)); + old_number_of_cpus = number_of_cpus; + } + memset(netisr_stats, 0, (number_of_cpus + 1) * sizeof(struct netisr_stats)); + for (i = 0; i < num_netisr_workstreams; i++) { + for (n = 0; n < num_netisr_works; n++) { + if (netisr_workstream[i].snws_wsid == netisr_work[n].snw_wsid) { + netisr_stats[netisr_workstream[i].snws_cpu].dispatched += netisr_work[n].snw_dispatched; + netisr_stats[netisr_workstream[i].snws_cpu].hybrid_dispatched += netisr_work[n].snw_hybrid_dispatched; + netisr_stats[netisr_workstream[i].snws_cpu].qdrops += netisr_work[n].snw_qdrops; + netisr_stats[netisr_workstream[i].snws_cpu].queued += netisr_work[n].snw_queued; + } + } + } + for (j = 0; j < number_of_cpus; j++) { + netisr_stats[number_of_cpus].dispatched += netisr_stats[j].dispatched; + netisr_stats[number_of_cpus].hybrid_dispatched += netisr_stats[j].hybrid_dispatched; + netisr_stats[number_of_cpus].qdrops += netisr_stats[j].qdrops; + netisr_stats[number_of_cpus].queued += netisr_stats[j].queued; + } + } + } else { + error("DISABLED: net.isr module"); + return 1; + } + + if (likely(do_netisr)) { + static RRDSET *st = NULL; + static RRDDIM *rd_dispatched = NULL, *rd_hybrid_dispatched = NULL, *rd_qdrops = NULL, *rd_queued = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "softnet_stat", + NULL, + "softnet_stat", + NULL, + "System softnet_stat", + "events/s", + "freebsd.plugin", + "net.isr", + NETDATA_CHART_PRIO_SYSTEM_SOFTNET_STAT, + update_every, + RRDSET_TYPE_LINE + ); + + rd_dispatched = rrddim_add(st, "dispatched", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_hybrid_dispatched = rrddim_add(st, "hybrid_dispatched", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_qdrops = rrddim_add(st, "qdrops", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_queued = rrddim_add(st, "queued", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_dispatched, netisr_stats[number_of_cpus].dispatched); + rrddim_set_by_pointer(st, rd_hybrid_dispatched, netisr_stats[number_of_cpus].hybrid_dispatched); + rrddim_set_by_pointer(st, rd_qdrops, netisr_stats[number_of_cpus].qdrops); + rrddim_set_by_pointer(st, rd_queued, netisr_stats[number_of_cpus].queued); + rrdset_done(st); + } + + if (likely(do_netisr_per_core)) { + static struct softnet_chart { + char netisr_cpuid[MAX_INT_DIGITS + 17]; + RRDSET *st; + RRDDIM *rd_dispatched; + RRDDIM *rd_hybrid_dispatched; + RRDDIM *rd_qdrops; + RRDDIM *rd_queued; + } *all_softnet_charts = NULL; + static int old_number_of_cpus = 0; + int i; + + if(unlikely(number_of_cpus > old_number_of_cpus)) { + all_softnet_charts = reallocz(all_softnet_charts, sizeof(struct softnet_chart) * number_of_cpus); + memset(&all_softnet_charts[old_number_of_cpus], 0, sizeof(struct softnet_chart) * (number_of_cpus - old_number_of_cpus)); + old_number_of_cpus = number_of_cpus; + } + + for (i = 0; i < number_of_cpus ;i++) { + snprintfz(all_softnet_charts[i].netisr_cpuid, MAX_INT_DIGITS + 17, "cpu%d_softnet_stat", i); + + if (unlikely(!all_softnet_charts[i].st)) { + all_softnet_charts[i].st = rrdset_create_localhost( + "cpu", + all_softnet_charts[i].netisr_cpuid, + NULL, + "softnet_stat", + NULL, + "Per CPU netisr statistics", + "events/s", + "freebsd.plugin", + "net.isr", + NETDATA_CHART_PRIO_SOFTNET_PER_CORE + i, + update_every, + RRDSET_TYPE_LINE + ); + + all_softnet_charts[i].rd_dispatched = rrddim_add(all_softnet_charts[i].st, "dispatched", + NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + all_softnet_charts[i].rd_hybrid_dispatched = rrddim_add(all_softnet_charts[i].st, "hybrid_dispatched", + NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + all_softnet_charts[i].rd_qdrops = rrddim_add(all_softnet_charts[i].st, "qdrops", + NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + all_softnet_charts[i].rd_queued = rrddim_add(all_softnet_charts[i].st, "queued", + NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(all_softnet_charts[i].st, all_softnet_charts[i].rd_dispatched, + netisr_stats[i].dispatched); + rrddim_set_by_pointer(all_softnet_charts[i].st, all_softnet_charts[i].rd_hybrid_dispatched, + netisr_stats[i].hybrid_dispatched); + rrddim_set_by_pointer(all_softnet_charts[i].st, all_softnet_charts[i].rd_qdrops, + netisr_stats[i].qdrops); + rrddim_set_by_pointer(all_softnet_charts[i].st, all_softnet_charts[i].rd_queued, + netisr_stats[i].queued); + rrdset_done(all_softnet_charts[i].st); + } + } + + return 0; +} + +// net.inet.tcp.states + +int do_net_inet_tcp_states(int update_every, usec_t dt) { + (void)dt; + static int mib[4] = {0, 0, 0, 0}; + uint64_t tcps_states[TCP_NSTATES]; + + // see http://net-snmp.sourceforge.net/docs/mibs/tcp.html + if (unlikely(GETSYSCTL_SIMPLE("net.inet.tcp.states", mib, tcps_states))) { + error("DISABLED: ipv4.tcpsock chart"); + error("DISABLED: net.inet.tcp.states module"); + return 1; + } else { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcpsock", + NULL, + "tcp", + NULL, + "IPv4 TCP Connections", + "active connections", + "freebsd.plugin", + "net.inet.tcp.states", + 2500, + update_every, + RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "CurrEstab", "connections", 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, tcps_states[TCPS_ESTABLISHED]); + rrdset_done(st); + } + + return 0; +} + +// net.inet.tcp.stats + +int do_net_inet_tcp_stats(int update_every, usec_t dt) { + (void)dt; + static int do_tcp_packets = -1, do_tcp_errors = -1, do_tcp_handshake = -1, do_tcpext_connaborts = -1, do_tcpext_ofo = -1, + do_tcpext_syncookies = -1, do_tcpext_listen = -1, do_ecn = -1; + + if (unlikely(do_tcp_packets == -1)) { + do_tcp_packets = config_get_boolean("plugin:freebsd:net.inet.tcp.stats", "ipv4 TCP packets", 1); + do_tcp_errors = config_get_boolean("plugin:freebsd:net.inet.tcp.stats", "ipv4 TCP errors", 1); + do_tcp_handshake = config_get_boolean("plugin:freebsd:net.inet.tcp.stats", "ipv4 TCP handshake issues", 1); + do_tcpext_connaborts = config_get_boolean_ondemand("plugin:freebsd:net.inet.tcp.stats", "TCP connection aborts", + CONFIG_BOOLEAN_AUTO); + do_tcpext_ofo = config_get_boolean_ondemand("plugin:freebsd:net.inet.tcp.stats", "TCP out-of-order queue", + CONFIG_BOOLEAN_AUTO); + do_tcpext_syncookies = config_get_boolean_ondemand("plugin:freebsd:net.inet.tcp.stats", "TCP SYN cookies", + CONFIG_BOOLEAN_AUTO); + do_tcpext_listen = config_get_boolean_ondemand("plugin:freebsd:net.inet.tcp.stats", "TCP listen issues", + CONFIG_BOOLEAN_AUTO); + do_ecn = config_get_boolean_ondemand("plugin:freebsd:net.inet.tcp.stats", "ECN packets", + CONFIG_BOOLEAN_AUTO); + } + + // see http://net-snmp.sourceforge.net/docs/mibs/tcp.html + if (likely(do_tcp_packets || do_tcp_errors || do_tcp_handshake || do_tcpext_connaborts || do_tcpext_ofo || + do_tcpext_syncookies || do_tcpext_listen || do_ecn)) { + static int mib[4] = {0, 0, 0, 0}; + struct tcpstat tcpstat; + + if (unlikely(GETSYSCTL_SIMPLE("net.inet.tcp.stats", mib, tcpstat))) { + do_tcp_packets = 0; + error("DISABLED: ipv4.tcppackets chart"); + do_tcp_errors = 0; + error("DISABLED: ipv4.tcperrors chart"); + do_tcp_handshake = 0; + error("DISABLED: ipv4.tcphandshake chart"); + do_tcpext_connaborts = 0; + error("DISABLED: ipv4.tcpconnaborts chart"); + do_tcpext_ofo = 0; + error("DISABLED: ipv4.tcpofo chart"); + do_tcpext_syncookies = 0; + error("DISABLED: ipv4.tcpsyncookies chart"); + do_tcpext_listen = 0; + error("DISABLED: ipv4.tcplistenissues chart"); + do_ecn = 0; + error("DISABLED: ipv4.ecnpkts chart"); + error("DISABLED: net.inet.tcp.stats module"); + return 1; + } else { + if (likely(do_tcp_packets)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in_segs = NULL, *rd_out_segs = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcppackets", + NULL, + "tcp", + NULL, + "IPv4 TCP Packets", + "packets/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 2600, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in_segs = rrddim_add(st, "InSegs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_segs = rrddim_add(st, "OutSegs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_segs, tcpstat.tcps_rcvtotal); + rrddim_set_by_pointer(st, rd_out_segs, tcpstat.tcps_sndtotal); + rrdset_done(st); + } + + if (likely(do_tcp_errors)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in_errs = NULL, *rd_in_csum_errs = NULL, *rd_retrans_segs = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcperrors", + NULL, + "tcp", + NULL, + "IPv4 TCP Errors", + "packets/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 2700, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_in_errs = rrddim_add(st, "InErrs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_csum_errs = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_retrans_segs = rrddim_add(st, "RetransSegs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + +#if __FreeBSD__ >= 11 + rrddim_set_by_pointer(st, rd_in_errs, tcpstat.tcps_rcvbadoff + tcpstat.tcps_rcvreassfull + + tcpstat.tcps_rcvshort); +#else + rrddim_set_by_pointer(st, rd_in_errs, tcpstat.tcps_rcvbadoff + tcpstat.tcps_rcvshort); +#endif + rrddim_set_by_pointer(st, rd_in_csum_errs, tcpstat.tcps_rcvbadsum); + rrddim_set_by_pointer(st, rd_retrans_segs, tcpstat.tcps_sndrexmitpack); + rrdset_done(st); + } + + if (likely(do_tcp_handshake)) { + static RRDSET *st = NULL; + static RRDDIM *rd_estab_resets = NULL, *rd_active_opens = NULL, *rd_passive_opens = NULL, + *rd_attempt_fails = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcphandshake", + NULL, + "tcp", + NULL, + "IPv4 TCP Handshake Issues", + "events/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 2900, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_estab_resets = rrddim_add(st, "EstabResets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_active_opens = rrddim_add(st, "ActiveOpens", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_passive_opens = rrddim_add(st, "PassiveOpens", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_attempt_fails = rrddim_add(st, "AttemptFails", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_estab_resets, tcpstat.tcps_drops); + rrddim_set_by_pointer(st, rd_active_opens, tcpstat.tcps_connattempt); + rrddim_set_by_pointer(st, rd_passive_opens, tcpstat.tcps_accepts); + rrddim_set_by_pointer(st, rd_attempt_fails, tcpstat.tcps_conndrops); + rrdset_done(st); + } + + if (do_tcpext_connaborts == CONFIG_BOOLEAN_YES || (do_tcpext_connaborts == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_rcvpackafterwin || + tcpstat.tcps_rcvafterclose || + tcpstat.tcps_rcvmemdrop || + tcpstat.tcps_persistdrop || + tcpstat.tcps_finwait2_drops || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_connaborts = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_on_data = NULL, *rd_on_close = NULL, *rd_on_memory = NULL, + *rd_on_timeout = NULL, *rd_on_linger = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcpconnaborts", + NULL, + "tcp", + NULL, + "TCP Connection Aborts", + "connections/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 3010, + update_every, + RRDSET_TYPE_LINE + ); + + rd_on_data = rrddim_add(st, "TCPAbortOnData", "baddata", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_on_close = rrddim_add(st, "TCPAbortOnClose", "userclosed", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_on_memory = rrddim_add(st, "TCPAbortOnMemory", "nomemory", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_on_timeout = rrddim_add(st, "TCPAbortOnTimeout", "timeout", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_on_linger = rrddim_add(st, "TCPAbortOnLinger", "linger", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_on_data, tcpstat.tcps_rcvpackafterwin); + rrddim_set_by_pointer(st, rd_on_close, tcpstat.tcps_rcvafterclose); + rrddim_set_by_pointer(st, rd_on_memory, tcpstat.tcps_rcvmemdrop); + rrddim_set_by_pointer(st, rd_on_timeout, tcpstat.tcps_persistdrop); + rrddim_set_by_pointer(st, rd_on_linger, tcpstat.tcps_finwait2_drops); + rrdset_done(st); + } + + if (do_tcpext_ofo == CONFIG_BOOLEAN_YES || (do_tcpext_ofo == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_rcvoopack || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_ofo = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ofo_queue = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcpofo", + NULL, + "tcp", + NULL, + "TCP Out-Of-Order Queue", + "packets/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 3050, + update_every, + RRDSET_TYPE_LINE + ); + + rd_ofo_queue = rrddim_add(st, "TCPOFOQueue", "inqueue", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ofo_queue, tcpstat.tcps_rcvoopack); + rrdset_done(st); + } + + if (do_tcpext_syncookies == CONFIG_BOOLEAN_YES || (do_tcpext_syncookies == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_sc_sendcookie || + tcpstat.tcps_sc_recvcookie || + tcpstat.tcps_sc_zonefail || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_syncookies = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_recv = NULL, *rd_send = NULL, *rd_failed = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "tcpsyncookies", + NULL, + "tcp", + NULL, + "TCP SYN Cookies", + "packets/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 3100, + update_every, + RRDSET_TYPE_LINE + ); + + rd_recv = rrddim_add(st, "SyncookiesRecv", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_send = rrddim_add(st, "SyncookiesSent", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st, "SyncookiesFailed", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_recv, tcpstat.tcps_sc_recvcookie); + rrddim_set_by_pointer(st, rd_send, tcpstat.tcps_sc_sendcookie); + rrddim_set_by_pointer(st, rd_failed, tcpstat.tcps_sc_zonefail); + rrdset_done(st); + } + + if(do_tcpext_listen == CONFIG_BOOLEAN_YES || (do_tcpext_listen == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_listendrop || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_listen = CONFIG_BOOLEAN_YES; + + static RRDSET *st_listen = NULL; + static RRDDIM *rd_overflows = NULL; + + if(unlikely(!st_listen)) { + + st_listen = rrdset_create_localhost( + "ipv4", + "tcplistenissues", + NULL, + "tcp", + NULL, + "TCP Listen Socket Issues", + "packets/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 3015, + update_every, + RRDSET_TYPE_LINE + ); + + rd_overflows = rrddim_add(st_listen, "ListenOverflows", "overflows", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_listen, rd_overflows, tcpstat.tcps_listendrop); + rrdset_done(st_listen); + } + + if (do_ecn == CONFIG_BOOLEAN_YES || (do_ecn == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_ecn_ce || + tcpstat.tcps_ecn_ect0 || + tcpstat.tcps_ecn_ect1 || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ecn = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ce = NULL, *rd_no_ect = NULL, *rd_ect0 = NULL, *rd_ect1 = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "ecnpkts", + NULL, + "ecn", + NULL, + "IPv4 ECN Statistics", + "packets/s", + "freebsd.plugin", + "net.inet.tcp.stats", + 8700, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ce = rrddim_add(st, "InCEPkts", "CEP", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_no_ect = rrddim_add(st, "InNoECTPkts", "NoECTP", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ect0 = rrddim_add(st, "InECT0Pkts", "ECTP0", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ect1 = rrddim_add(st, "InECT1Pkts", "ECTP1", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ce, tcpstat.tcps_ecn_ce); + rrddim_set_by_pointer(st, rd_no_ect, tcpstat.tcps_ecn_ce - (tcpstat.tcps_ecn_ect0 + + tcpstat.tcps_ecn_ect1)); + rrddim_set_by_pointer(st, rd_ect0, tcpstat.tcps_ecn_ect0); + rrddim_set_by_pointer(st, rd_ect1, tcpstat.tcps_ecn_ect1); + rrdset_done(st); + } + + } + } else { + error("DISABLED: net.inet.tcp.stats module"); + return 1; + } + + return 0; +} + +// net.inet.udp.stats + +int do_net_inet_udp_stats(int update_every, usec_t dt) { + (void)dt; + static int do_udp_packets = -1, do_udp_errors = -1; + + if (unlikely(do_udp_packets == -1)) { + do_udp_packets = config_get_boolean("plugin:freebsd:net.inet.udp.stats", "ipv4 UDP packets", 1); + do_udp_errors = config_get_boolean("plugin:freebsd:net.inet.udp.stats", "ipv4 UDP errors", 1); + } + + // see http://net-snmp.sourceforge.net/docs/mibs/udp.html + if (likely(do_udp_packets || do_udp_errors)) { + static int mib[4] = {0, 0, 0, 0}; + struct udpstat udpstat; + + if (unlikely(GETSYSCTL_SIMPLE("net.inet.udp.stats", mib, udpstat))) { + do_udp_packets = 0; + error("DISABLED: ipv4.udppackets chart"); + do_udp_errors = 0; + error("DISABLED: ipv4.udperrors chart"); + error("DISABLED: net.inet.udp.stats module"); + return 1; + } else { + if (likely(do_udp_packets)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "udppackets", + NULL, + "udp", + NULL, + "IPv4 UDP Packets", + "packets/s", + "freebsd.plugin", + "net.inet.udp.stats", + 2601, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in = rrddim_add(st, "InDatagrams", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, udpstat.udps_ipackets); + rrddim_set_by_pointer(st, rd_out, udpstat.udps_opackets); + rrdset_done(st); + } + + if (likely(do_udp_errors)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in_errors = NULL, *rd_no_ports = NULL, *rd_recv_buf_errors = NULL, + *rd_in_csum_errors = NULL, *rd_ignored_multi = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "udperrors", + NULL, + "udp", + NULL, + "IPv4 UDP Errors", + "events/s", + "freebsd.plugin", + "net.inet.udp.stats", + 2701, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_in_errors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_no_ports = rrddim_add(st, "NoPorts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_recv_buf_errors = rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_csum_errors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ignored_multi = rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_errors, udpstat.udps_hdrops + udpstat.udps_badlen); + rrddim_set_by_pointer(st, rd_no_ports, udpstat.udps_noport); + rrddim_set_by_pointer(st, rd_recv_buf_errors, udpstat.udps_fullsock); + rrddim_set_by_pointer(st, rd_in_csum_errors, udpstat.udps_badsum + udpstat.udps_nosum); + rrddim_set_by_pointer(st, rd_ignored_multi, udpstat.udps_filtermcast); + rrdset_done(st); + } + } + } else { + error("DISABLED: net.inet.udp.stats module"); + return 1; + } + + return 0; +} + +// net.inet.icmp.stats + +int do_net_inet_icmp_stats(int update_every, usec_t dt) { + (void)dt; + static int do_icmp_packets = -1, do_icmp_errors = -1, do_icmpmsg = -1; + + if (unlikely(do_icmp_packets == -1)) { + do_icmp_packets = config_get_boolean("plugin:freebsd:net.inet.icmp.stats", "ipv4 ICMP packets", 1); + do_icmp_errors = config_get_boolean("plugin:freebsd:net.inet.icmp.stats", "ipv4 ICMP errors", 1); + do_icmpmsg = config_get_boolean("plugin:freebsd:net.inet.icmp.stats", "ipv4 ICMP messages", 1); + } + + if (likely(do_icmp_packets || do_icmp_errors || do_icmpmsg)) { + static int mib[4] = {0, 0, 0, 0}; + struct icmpstat icmpstat; + struct icmp_total { + u_long msgs_in; + u_long msgs_out; + } icmp_total = {0, 0}; + + if (unlikely(GETSYSCTL_SIMPLE("net.inet.icmp.stats", mib, icmpstat))) { + do_icmp_packets = 0; + error("DISABLED: ipv4.icmp chart"); + do_icmp_errors = 0; + error("DISABLED: ipv4.icmp_errors chart"); + do_icmpmsg = 0; + error("DISABLED: ipv4.icmpmsg chart"); + error("DISABLED: net.inet.icmp.stats module"); + return 1; + } else { + int i; + + for (i = 0; i <= ICMP_MAXTYPE; i++) { + icmp_total.msgs_in += icmpstat.icps_inhist[i]; + icmp_total.msgs_out += icmpstat.icps_outhist[i]; + } + icmp_total.msgs_in += icmpstat.icps_badcode + icmpstat.icps_badlen + icmpstat.icps_checksum + icmpstat.icps_tooshort; + + if (likely(do_icmp_packets)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "icmp" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Packets" + , "packets/s" + , "freebsd.plugin" + , "net.inet.icmp.stats" + , 2602 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_in = rrddim_add(st, "InMsgs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutMsgs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, icmp_total.msgs_in); + rrddim_set_by_pointer(st, rd_out, icmp_total.msgs_out); + rrdset_done(st); + } + + if (likely(do_icmp_errors)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL, *rd_in_csum = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "icmp_errors" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Errors" + , "packets/s" + , "freebsd.plugin" + , "net.inet.icmp.stats" + , 2603 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_in = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_csum = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, icmpstat.icps_badcode + icmpstat.icps_badlen + + icmpstat.icps_checksum + icmpstat.icps_tooshort); + rrddim_set_by_pointer(st, rd_out, icmpstat.icps_error); + rrddim_set_by_pointer(st, rd_in_csum, icmpstat.icps_checksum); + + rrdset_done(st); + } + + if (likely(do_icmpmsg)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in_reps = NULL, *rd_out_reps = NULL, *rd_in = NULL, *rd_out = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "icmpmsg" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Messages" + , "packets/s" + , "freebsd.plugin" + , "net.inet.icmp.stats" + , 2604 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_in_reps = rrddim_add(st, "InEchoReps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_reps = rrddim_add(st, "OutEchoReps", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in = rrddim_add(st, "InEchos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutEchos", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_reps, icmpstat.icps_inhist[ICMP_ECHOREPLY]); + rrddim_set_by_pointer(st, rd_out_reps, icmpstat.icps_outhist[ICMP_ECHOREPLY]); + rrddim_set_by_pointer(st, rd_in, icmpstat.icps_inhist[ICMP_ECHO]); + rrddim_set_by_pointer(st, rd_out, icmpstat.icps_outhist[ICMP_ECHO]); + rrdset_done(st); + } + } + } else { + error("DISABLED: net.inet.icmp.stats module"); + return 1; + } + + return 0; +} + +// net.inet.ip.stats + +int do_net_inet_ip_stats(int update_every, usec_t dt) { + (void)dt; + static int do_ip_packets = -1, do_ip_fragsout = -1, do_ip_fragsin = -1, do_ip_errors = -1; + + if (unlikely(do_ip_packets == -1)) { + do_ip_packets = config_get_boolean("plugin:freebsd:net.inet.ip.stats", "ipv4 packets", 1); + do_ip_fragsout = config_get_boolean("plugin:freebsd:net.inet.ip.stats", "ipv4 fragments sent", 1); + do_ip_fragsin = config_get_boolean("plugin:freebsd:net.inet.ip.stats", "ipv4 fragments assembly", 1); + do_ip_errors = config_get_boolean("plugin:freebsd:net.inet.ip.stats", "ipv4 errors", 1); + } + + // see also http://net-snmp.sourceforge.net/docs/mibs/ip.html + if (likely(do_ip_packets || do_ip_fragsout || do_ip_fragsin || do_ip_errors)) { + static int mib[4] = {0, 0, 0, 0}; + struct ipstat ipstat; + + if (unlikely(GETSYSCTL_SIMPLE("net.inet.ip.stats", mib, ipstat))) { + do_ip_packets = 0; + error("DISABLED: ipv4.packets chart"); + do_ip_fragsout = 0; + error("DISABLED: ipv4.fragsout chart"); + do_ip_fragsin = 0; + error("DISABLED: ipv4.fragsin chart"); + do_ip_errors = 0; + error("DISABLED: ipv4.errors chart"); + error("DISABLED: net.inet.ip.stats module"); + return 1; + } else { + if (likely(do_ip_packets)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in_receives = NULL, *rd_out_requests = NULL, *rd_forward_datagrams = NULL, + *rd_in_delivers = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "packets", + NULL, + "packets", + NULL, + "IPv4 Packets", + "packets/s", + "freebsd.plugin", + "net.inet.ip.stats", + 3000, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in_receives = rrddim_add(st, "InReceives", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_requests = rrddim_add(st, "OutRequests", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_forward_datagrams = rrddim_add(st, "ForwDatagrams", "forwarded", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_delivers = rrddim_add(st, "InDelivers", "delivered", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_receives, ipstat.ips_total); + rrddim_set_by_pointer(st, rd_out_requests, ipstat.ips_localout); + rrddim_set_by_pointer(st, rd_forward_datagrams, ipstat.ips_forward); + rrddim_set_by_pointer(st, rd_in_delivers, ipstat.ips_delivered); + rrdset_done(st); + } + + if (likely(do_ip_fragsout)) { + static RRDSET *st = NULL; + static RRDDIM *rd_ok = NULL, *rd_fails = NULL, *rd_created = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "fragsout", + NULL, + "fragments", + NULL, + "IPv4 Fragments Sent", + "packets/s", + "freebsd.plugin", + "net.inet.ip.stats", + 3010, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ok = rrddim_add(st, "FragOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fails = rrddim_add(st, "FragFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_created = rrddim_add(st, "FragCreates", "created", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ok, ipstat.ips_fragmented); + rrddim_set_by_pointer(st, rd_fails, ipstat.ips_cantfrag); + rrddim_set_by_pointer(st, rd_created, ipstat.ips_ofragments); + rrdset_done(st); + } + + if (likely(do_ip_fragsin)) { + static RRDSET *st = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_all = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "fragsin", + NULL, + "fragments", + NULL, + "IPv4 Fragments Reassembly", + "packets/s", + "freebsd.plugin", + "net.inet.ip.stats", + 3011, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ok = rrddim_add(st, "ReasmOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st, "ReasmFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_all = rrddim_add(st, "ReasmReqds", "all", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ok, ipstat.ips_fragments); + rrddim_set_by_pointer(st, rd_failed, ipstat.ips_fragdropped); + rrddim_set_by_pointer(st, rd_all, ipstat.ips_reassembled); + rrdset_done(st); + } + + if (likely(do_ip_errors)) { + static RRDSET *st = NULL; + static RRDDIM *rd_in_discards = NULL, *rd_out_discards = NULL, + *rd_in_hdr_errors = NULL, *rd_out_no_routes = NULL, + *rd_in_addr_errors = NULL, *rd_in_unknown_protos = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4", + "errors", + NULL, + "errors", + NULL, + "IPv4 Errors", + "packets/s", + "freebsd.plugin", + "net.inet.ip.stats", + 3002, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_in_discards = rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_discards = rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_hdr_errors = rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_no_routes = rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_addr_errors = rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_unknown_protos = rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_discards, ipstat.ips_badsum + ipstat.ips_tooshort + + ipstat.ips_toosmall + ipstat.ips_toolong); + rrddim_set_by_pointer(st, rd_out_discards, ipstat.ips_odropped); + rrddim_set_by_pointer(st, rd_in_hdr_errors, ipstat.ips_badhlen + ipstat.ips_badlen + + ipstat.ips_badoptions + ipstat.ips_badvers); + rrddim_set_by_pointer(st, rd_out_no_routes, ipstat.ips_noroute); + rrddim_set_by_pointer(st, rd_in_addr_errors, ipstat.ips_badaddr); + rrddim_set_by_pointer(st, rd_in_unknown_protos, ipstat.ips_noproto); + rrdset_done(st); + } + } + } else { + error("DISABLED: net.inet.ip.stats module"); + return 1; + } + + return 0; +} + +// net.inet6.ip6.stats + +int do_net_inet6_ip6_stats(int update_every, usec_t dt) { + (void)dt; + static int do_ip6_packets = -1, do_ip6_fragsout = -1, do_ip6_fragsin = -1, do_ip6_errors = -1; + + if (unlikely(do_ip6_packets == -1)) { + do_ip6_packets = config_get_boolean_ondemand("plugin:freebsd:net.inet6.ip6.stats", "ipv6 packets", + CONFIG_BOOLEAN_AUTO); + do_ip6_fragsout = config_get_boolean_ondemand("plugin:freebsd:net.inet6.ip6.stats", "ipv6 fragments sent", + CONFIG_BOOLEAN_AUTO); + do_ip6_fragsin = config_get_boolean_ondemand("plugin:freebsd:net.inet6.ip6.stats", "ipv6 fragments assembly", + CONFIG_BOOLEAN_AUTO); + do_ip6_errors = config_get_boolean_ondemand("plugin:freebsd:net.inet6.ip6.stats", "ipv6 errors", + CONFIG_BOOLEAN_AUTO); + } + + if (likely(do_ip6_packets || do_ip6_fragsout || do_ip6_fragsin || do_ip6_errors)) { + static int mib[4] = {0, 0, 0, 0}; + struct ip6stat ip6stat; + + if (unlikely(GETSYSCTL_SIMPLE("net.inet6.ip6.stats", mib, ip6stat))) { + do_ip6_packets = 0; + error("DISABLED: ipv6.packets chart"); + do_ip6_fragsout = 0; + error("DISABLED: ipv6.fragsout chart"); + do_ip6_fragsin = 0; + error("DISABLED: ipv6.fragsin chart"); + do_ip6_errors = 0; + error("DISABLED: ipv6.errors chart"); + error("DISABLED: net.inet6.ip6.stats module"); + return 1; + } else { + if (do_ip6_packets == CONFIG_BOOLEAN_YES || (do_ip6_packets == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_localout || + ip6stat.ip6s_total || + ip6stat.ip6s_forward || + ip6stat.ip6s_delivered || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_packets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, *rd_sent = NULL, *rd_forwarded = NULL, *rd_delivers = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "packets", + NULL, + "packets", + NULL, + "IPv6 Packets", + "packets/s", + "freebsd.plugin", + "net.inet6.ip6.stats", + 3000, + update_every, + RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_forwarded = rrddim_add(st, "forwarded", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_delivers = rrddim_add(st, "delivers", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_sent, ip6stat.ip6s_localout); + rrddim_set_by_pointer(st, rd_received, ip6stat.ip6s_total); + rrddim_set_by_pointer(st, rd_forwarded, ip6stat.ip6s_forward); + rrddim_set_by_pointer(st, rd_delivers, ip6stat.ip6s_delivered); + rrdset_done(st); + } + + if (do_ip6_fragsout == CONFIG_BOOLEAN_YES || (do_ip6_fragsout == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_fragmented || + ip6stat.ip6s_cantfrag || + ip6stat.ip6s_ofragments || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_fragsout = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_all = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "fragsout", + NULL, + "fragments", + NULL, + "IPv6 Fragments Sent", + "packets/s", + "freebsd.plugin", + "net.inet6.ip6.stats", + 3010, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ok = rrddim_add(st, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_all = rrddim_add(st, "all", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ok, ip6stat.ip6s_fragmented); + rrddim_set_by_pointer(st, rd_failed, ip6stat.ip6s_cantfrag); + rrddim_set_by_pointer(st, rd_all, ip6stat.ip6s_ofragments); + rrdset_done(st); + } + + if (do_ip6_fragsin == CONFIG_BOOLEAN_YES || (do_ip6_fragsin == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_reassembled || + ip6stat.ip6s_fragdropped || + ip6stat.ip6s_fragtimeout || + ip6stat.ip6s_fragments || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_fragsin = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_timeout = NULL, *rd_all = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "fragsin", + NULL, + "fragments", + NULL, + "IPv6 Fragments Reassembly", + "packets/s", + "freebsd.plugin", + "net.inet6.ip6.stats", + 3011, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ok = rrddim_add(st, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_timeout = rrddim_add(st, "timeout", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_all = rrddim_add(st, "all", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ok, ip6stat.ip6s_reassembled); + rrddim_set_by_pointer(st, rd_failed, ip6stat.ip6s_fragdropped); + rrddim_set_by_pointer(st, rd_timeout, ip6stat.ip6s_fragtimeout); + rrddim_set_by_pointer(st, rd_all, ip6stat.ip6s_fragments); + rrdset_done(st); + } + + if (do_ip6_errors == CONFIG_BOOLEAN_YES || (do_ip6_errors == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_toosmall || + ip6stat.ip6s_odropped || + ip6stat.ip6s_badoptions || + ip6stat.ip6s_badvers || + ip6stat.ip6s_exthdrtoolong || + ip6stat.ip6s_sources_none || + ip6stat.ip6s_tooshort || + ip6stat.ip6s_cantforward || + ip6stat.ip6s_noroute || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_errors = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_in_discards = NULL, *rd_out_discards = NULL, + *rd_in_hdr_errors = NULL, *rd_in_addr_errors = NULL, *rd_in_truncated_pkts = NULL, + *rd_in_no_routes = NULL, *rd_out_no_routes = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "errors", + NULL, + "errors", + NULL, + "IPv6 Errors", + "packets/s", + "freebsd.plugin", + "net.inet6.ip6.stats", + 3002, + update_every, + RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_in_discards = rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_discards = rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_hdr_errors = rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_addr_errors = rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_truncated_pkts = rrddim_add(st, "InTruncatedPkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_no_routes = rrddim_add(st, "InNoRoutes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_no_routes = rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_discards, ip6stat.ip6s_toosmall); + rrddim_set_by_pointer(st, rd_out_discards, ip6stat.ip6s_odropped); + rrddim_set_by_pointer(st, rd_in_hdr_errors, ip6stat.ip6s_badoptions + ip6stat.ip6s_badvers + + ip6stat.ip6s_exthdrtoolong); + rrddim_set_by_pointer(st, rd_in_addr_errors, ip6stat.ip6s_sources_none); + rrddim_set_by_pointer(st, rd_in_truncated_pkts, ip6stat.ip6s_tooshort); + rrddim_set_by_pointer(st, rd_in_no_routes, ip6stat.ip6s_cantforward); + rrddim_set_by_pointer(st, rd_out_no_routes, ip6stat.ip6s_noroute); + rrdset_done(st); + } + } + } else { + error("DISABLED: net.inet6.ip6.stats module"); + return 1; + } + + return 0; +} + +// net.inet6.icmp6.stats + +int do_net_inet6_icmp6_stats(int update_every, usec_t dt) { + (void)dt; + static int do_icmp6 = -1, do_icmp6_redir = -1, do_icmp6_errors = -1, do_icmp6_echos = -1, do_icmp6_router = -1, + do_icmp6_neighbor = -1, do_icmp6_types = -1; + + if (unlikely(do_icmp6 == -1)) { + do_icmp6 = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp", + CONFIG_BOOLEAN_AUTO); + do_icmp6_redir = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp redirects", + CONFIG_BOOLEAN_AUTO); + do_icmp6_errors = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp errors", + CONFIG_BOOLEAN_AUTO); + do_icmp6_echos = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp echos", + CONFIG_BOOLEAN_AUTO); + do_icmp6_router = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp router", + CONFIG_BOOLEAN_AUTO); + do_icmp6_neighbor = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp neighbor", + CONFIG_BOOLEAN_AUTO); + do_icmp6_types = config_get_boolean_ondemand("plugin:freebsd:net.inet6.icmp6.stats", "icmp types", + CONFIG_BOOLEAN_AUTO); + } + + if (likely(do_icmp6 || do_icmp6_redir || do_icmp6_errors || do_icmp6_echos || do_icmp6_router || do_icmp6_neighbor || do_icmp6_types)) { + static int mib[4] = {0, 0, 0, 0}; + struct icmp6stat icmp6stat; + + if (unlikely(GETSYSCTL_SIMPLE("net.inet6.icmp6.stats", mib, icmp6stat))) { + do_icmp6 = 0; + error("DISABLED: ipv6.icmp chart"); + do_icmp6_redir = 0; + error("DISABLED: ipv6.icmpredir chart"); + do_icmp6_errors = 0; + error("DISABLED: ipv6.icmperrors chart"); + do_icmp6_echos = 0; + error("DISABLED: ipv6.icmpechos chart"); + do_icmp6_router = 0; + error("DISABLED: ipv6.icmprouter chart"); + do_icmp6_neighbor = 0; + error("DISABLED: ipv6.icmpneighbor chart"); + do_icmp6_types = 0; + error("DISABLED: ipv6.icmptypes chart"); + error("DISABLED: net.inet6.icmp6.stats module"); + return 1; + } else { + int i; + struct icmp6_total { + u_long msgs_in; + u_long msgs_out; + } icmp6_total = {0, 0}; + + for (i = 0; i <= ICMP6_MAXTYPE; i++) { + icmp6_total.msgs_in += icmp6stat.icp6s_inhist[i]; + icmp6_total.msgs_out += icmp6stat.icp6s_outhist[i]; + } + icmp6_total.msgs_in += icmp6stat.icp6s_badcode + icmp6stat.icp6s_badlen + icmp6stat.icp6s_checksum + icmp6stat.icp6s_tooshort; + + // -------------------------------------------------------------------- + + if (do_icmp6 == CONFIG_BOOLEAN_YES || (do_icmp6 == CONFIG_BOOLEAN_AUTO && + (icmp6_total.msgs_in || + icmp6_total.msgs_out || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6 = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, *rd_sent = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmp", + NULL, + "icmp", + NULL, + "IPv6 ICMP Messages", + "messages/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10000, + update_every, + RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, icmp6_total.msgs_out); + rrddim_set_by_pointer(st, rd_sent, icmp6_total.msgs_in); + rrdset_done(st); + } + + if (do_icmp6_redir == CONFIG_BOOLEAN_YES || (do_icmp6_redir == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ND_REDIRECT] || + icmp6stat.icp6s_outhist[ND_REDIRECT] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_redir = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, *rd_sent = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmpredir", + NULL, + "icmp", + NULL, + "IPv6 ICMP Redirects", + "redirects/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10050, + update_every, + RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, icmp6stat.icp6s_outhist[ND_REDIRECT]); + rrddim_set_by_pointer(st, rd_sent, icmp6stat.icp6s_inhist[ND_REDIRECT]); + rrdset_done(st); + } + + if (do_icmp6_errors == CONFIG_BOOLEAN_YES || (do_icmp6_errors == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_badcode || + icmp6stat.icp6s_badlen || + icmp6stat.icp6s_checksum || + icmp6stat.icp6s_tooshort || + icmp6stat.icp6s_error || + icmp6stat.icp6s_inhist[ICMP6_DST_UNREACH] || + icmp6stat.icp6s_inhist[ICMP6_TIME_EXCEEDED] || + icmp6stat.icp6s_inhist[ICMP6_PARAM_PROB] || + icmp6stat.icp6s_outhist[ICMP6_DST_UNREACH] || + icmp6stat.icp6s_outhist[ICMP6_TIME_EXCEEDED] || + icmp6stat.icp6s_outhist[ICMP6_PARAM_PROB] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_errors = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_in_errors = NULL, *rd_out_errors = NULL, *rd_in_csum_errors = NULL, + *rd_in_dest_unreachs = NULL, *rd_in_pkt_too_bigs = NULL, *rd_in_time_excds = NULL, + *rd_in_parm_problems = NULL, *rd_out_dest_unreachs = NULL, *rd_out_time_excds = NULL, + *rd_out_parm_problems = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmperrors", + NULL, "icmp", + NULL, + "IPv6 ICMP Errors", + "errors/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10100, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in_errors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_errors = rrddim_add(st, "OutErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_csum_errors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_dest_unreachs = rrddim_add(st, "InDestUnreachs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_pkt_too_bigs = rrddim_add(st, "InPktTooBigs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_time_excds = rrddim_add(st, "InTimeExcds", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_parm_problems = rrddim_add(st, "InParmProblems", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_dest_unreachs = rrddim_add(st, "OutDestUnreachs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_time_excds = rrddim_add(st, "OutTimeExcds", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_parm_problems = rrddim_add(st, "OutParmProblems", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_errors, icmp6stat.icp6s_badcode + icmp6stat.icp6s_badlen + + icmp6stat.icp6s_checksum + icmp6stat.icp6s_tooshort); + rrddim_set_by_pointer(st, rd_out_errors, icmp6stat.icp6s_error); + rrddim_set_by_pointer(st, rd_in_csum_errors, icmp6stat.icp6s_checksum); + rrddim_set_by_pointer(st, rd_in_dest_unreachs, icmp6stat.icp6s_inhist[ICMP6_DST_UNREACH]); + rrddim_set_by_pointer(st, rd_in_pkt_too_bigs, icmp6stat.icp6s_badlen); + rrddim_set_by_pointer(st, rd_in_time_excds, icmp6stat.icp6s_inhist[ICMP6_TIME_EXCEEDED]); + rrddim_set_by_pointer(st, rd_in_parm_problems, icmp6stat.icp6s_inhist[ICMP6_PARAM_PROB]); + rrddim_set_by_pointer(st, rd_out_dest_unreachs, icmp6stat.icp6s_outhist[ICMP6_DST_UNREACH]); + rrddim_set_by_pointer(st, rd_out_time_excds, icmp6stat.icp6s_outhist[ICMP6_TIME_EXCEEDED]); + rrddim_set_by_pointer(st, rd_out_parm_problems, icmp6stat.icp6s_outhist[ICMP6_PARAM_PROB]); + rrdset_done(st); + } + + if (do_icmp6_echos == CONFIG_BOOLEAN_YES || (do_icmp6_echos == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ICMP6_ECHO_REQUEST] || + icmp6stat.icp6s_outhist[ICMP6_ECHO_REQUEST] || + icmp6stat.icp6s_inhist[ICMP6_ECHO_REPLY] || + icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_echos = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL, *rd_in_replies = NULL, *rd_out_replies = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmpechos", + NULL, + "icmp", + NULL, + "IPv6 ICMP Echo", + "messages/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10200, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in = rrddim_add(st, "InEchos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st, "OutEchos", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_replies = rrddim_add(st, "InEchoReplies", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_replies = rrddim_add(st, "OutEchoReplies", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in, icmp6stat.icp6s_inhist[ICMP6_ECHO_REQUEST]); + rrddim_set_by_pointer(st, rd_out, icmp6stat.icp6s_outhist[ICMP6_ECHO_REQUEST]); + rrddim_set_by_pointer(st, rd_in_replies, icmp6stat.icp6s_inhist[ICMP6_ECHO_REPLY]); + rrddim_set_by_pointer(st, rd_out_replies, icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]); + rrdset_done(st); + } + + if (do_icmp6_router == CONFIG_BOOLEAN_YES || (do_icmp6_router == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ND_ROUTER_SOLICIT] || + icmp6stat.icp6s_outhist[ND_ROUTER_SOLICIT] || + icmp6stat.icp6s_inhist[ND_ROUTER_ADVERT] || + icmp6stat.icp6s_outhist[ND_ROUTER_ADVERT] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_router = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_in_solicits = NULL, *rd_out_solicits = NULL, + *rd_in_advertisements = NULL, *rd_out_advertisements = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmprouter", + NULL, + "icmp", + NULL, + "IPv6 Router Messages", + "messages/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10400, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in_solicits = rrddim_add(st, "InSolicits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_solicits = rrddim_add(st, "OutSolicits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_advertisements = rrddim_add(st, "InAdvertisements", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_advertisements = rrddim_add(st, "OutAdvertisements", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_solicits, icmp6stat.icp6s_inhist[ND_ROUTER_SOLICIT]); + rrddim_set_by_pointer(st, rd_out_solicits, icmp6stat.icp6s_outhist[ND_ROUTER_SOLICIT]); + rrddim_set_by_pointer(st, rd_in_advertisements, icmp6stat.icp6s_inhist[ND_ROUTER_ADVERT]); + rrddim_set_by_pointer(st, rd_out_advertisements, icmp6stat.icp6s_outhist[ND_ROUTER_ADVERT]); + rrdset_done(st); + } + + if (do_icmp6_neighbor == CONFIG_BOOLEAN_YES || (do_icmp6_neighbor == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ND_NEIGHBOR_SOLICIT] || + icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT] || + icmp6stat.icp6s_inhist[ND_NEIGHBOR_ADVERT] || + icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_neighbor = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_in_solicits = NULL, *rd_out_solicits = NULL, + *rd_in_advertisements = NULL, *rd_out_advertisements = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmpneighbor", + NULL, + "icmp", + NULL, + "IPv6 Neighbor Messages", + "messages/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10500, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in_solicits = rrddim_add(st, "InSolicits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_solicits = rrddim_add(st, "OutSolicits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_advertisements = rrddim_add(st, "InAdvertisements", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_advertisements = rrddim_add(st, "OutAdvertisements", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_solicits, icmp6stat.icp6s_inhist[ND_NEIGHBOR_SOLICIT]); + rrddim_set_by_pointer(st, rd_out_solicits, icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT]); + rrddim_set_by_pointer(st, rd_in_advertisements, icmp6stat.icp6s_inhist[ND_NEIGHBOR_ADVERT]); + rrddim_set_by_pointer(st, rd_out_advertisements, icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT]); + rrdset_done(st); + } + + if (do_icmp6_types == CONFIG_BOOLEAN_YES || (do_icmp6_types == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[1] || + icmp6stat.icp6s_inhist[128] || + icmp6stat.icp6s_inhist[129] || + icmp6stat.icp6s_inhist[136] || + icmp6stat.icp6s_outhist[1] || + icmp6stat.icp6s_outhist[128] || + icmp6stat.icp6s_outhist[129] || + icmp6stat.icp6s_outhist[133] || + icmp6stat.icp6s_outhist[135] || + icmp6stat.icp6s_outhist[136] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_types = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_in_1 = NULL, *rd_in_128 = NULL, *rd_in_129 = NULL, *rd_in_136 = NULL, + *rd_out_1 = NULL, *rd_out_128 = NULL, *rd_out_129 = NULL, *rd_out_133 = NULL, + *rd_out_135 = NULL, *rd_out_143 = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6", + "icmptypes", + NULL, + "icmp", + NULL, + "IPv6 ICMP Types", + "messages/s", + "freebsd.plugin", + "net.inet6.icmp6.stats", + 10700, + update_every, + RRDSET_TYPE_LINE + ); + + rd_in_1 = rrddim_add(st, "InType1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_128 = rrddim_add(st, "InType128", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_129 = rrddim_add(st, "InType129", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_in_136 = rrddim_add(st, "InType136", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_1 = rrddim_add(st, "OutType1", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_128 = rrddim_add(st, "OutType128", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_129 = rrddim_add(st, "OutType129", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_133 = rrddim_add(st, "OutType133", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_135 = rrddim_add(st, "OutType135", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out_143 = rrddim_add(st, "OutType143", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_in_1, icmp6stat.icp6s_inhist[1]); + rrddim_set_by_pointer(st, rd_in_128, icmp6stat.icp6s_inhist[128]); + rrddim_set_by_pointer(st, rd_in_129, icmp6stat.icp6s_inhist[129]); + rrddim_set_by_pointer(st, rd_in_136, icmp6stat.icp6s_inhist[136]); + rrddim_set_by_pointer(st, rd_out_1, icmp6stat.icp6s_outhist[1]); + rrddim_set_by_pointer(st, rd_out_128, icmp6stat.icp6s_outhist[128]); + rrddim_set_by_pointer(st, rd_out_129, icmp6stat.icp6s_outhist[129]); + rrddim_set_by_pointer(st, rd_out_133, icmp6stat.icp6s_outhist[133]); + rrddim_set_by_pointer(st, rd_out_135, icmp6stat.icp6s_outhist[135]); + rrddim_set_by_pointer(st, rd_out_143, icmp6stat.icp6s_outhist[143]); + rrdset_done(st); + } + } + } else { + error("DISABLED: net.inet6.icmp6.stats module"); + return 1; + } + + return 0; +} diff --git a/collectors/freebsd.plugin/plugin_freebsd.c b/collectors/freebsd.plugin/plugin_freebsd.c new file mode 100644 index 0000000..a52ece3 --- /dev/null +++ b/collectors/freebsd.plugin/plugin_freebsd.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_freebsd.h" + +static struct freebsd_module { + const char *name; + const char *dim; + + int enabled; + + int (*func)(int update_every, usec_t dt); + + RRDDIM *rd; + +} freebsd_modules[] = { + + // system metrics + {.name = "kern.cp_time", .dim = "cp_time", .enabled = 1, .func = do_kern_cp_time}, + {.name = "vm.loadavg", .dim = "loadavg", .enabled = 1, .func = do_vm_loadavg}, + {.name = "system.ram", .dim = "system_ram", .enabled = 1, .func = do_system_ram}, + {.name = "vm.swap_info", .dim = "swap", .enabled = 1, .func = do_vm_swap_info}, + {.name = "vm.stats.vm.v_swappgs", .dim = "swap_io", .enabled = 1, .func = do_vm_stats_sys_v_swappgs}, + {.name = "vm.vmtotal", .dim = "vmtotal", .enabled = 1, .func = do_vm_vmtotal}, + {.name = "vm.stats.vm.v_forks", .dim = "forks", .enabled = 1, .func = do_vm_stats_sys_v_forks}, + {.name = "vm.stats.sys.v_swtch", .dim = "context_swtch", .enabled = 1, .func = do_vm_stats_sys_v_swtch}, + {.name = "hw.intrcnt", .dim = "hw_intr", .enabled = 1, .func = do_hw_intcnt}, + {.name = "vm.stats.sys.v_intr", .dim = "dev_intr", .enabled = 1, .func = do_vm_stats_sys_v_intr}, + {.name = "vm.stats.sys.v_soft", .dim = "soft_intr", .enabled = 1, .func = do_vm_stats_sys_v_soft}, + {.name = "net.isr", .dim = "net_isr", .enabled = 1, .func = do_net_isr}, + {.name = "kern.ipc.sem", .dim = "semaphores", .enabled = 1, .func = do_kern_ipc_sem}, + {.name = "kern.ipc.shm", .dim = "shared_memory", .enabled = 1, .func = do_kern_ipc_shm}, + {.name = "kern.ipc.msq", .dim = "message_queues", .enabled = 1, .func = do_kern_ipc_msq}, + {.name = "uptime", .dim = "uptime", .enabled = 1, .func = do_uptime}, + + // memory metrics + {.name = "vm.stats.vm.v_pgfaults", .dim = "pgfaults", .enabled = 1, .func = do_vm_stats_sys_v_pgfaults}, + + // CPU metrics + {.name = "kern.cp_times", .dim = "cp_times", .enabled = 1, .func = do_kern_cp_times}, + {.name = "dev.cpu.temperature", .dim = "cpu_temperature", .enabled = 1, .func = do_dev_cpu_temperature}, + {.name = "dev.cpu.0.freq", .dim = "cpu_frequency", .enabled = 1, .func = do_dev_cpu_0_freq}, + + // disk metrics + {.name = "kern.devstat", .dim = "kern_devstat", .enabled = 1, .func = do_kern_devstat}, + {.name = "getmntinfo", .dim = "getmntinfo", .enabled = 1, .func = do_getmntinfo}, + + // network metrics + {.name = "net.inet.tcp.states", .dim = "tcp_states", .enabled = 1, .func = do_net_inet_tcp_states}, + {.name = "net.inet.tcp.stats", .dim = "tcp_stats", .enabled = 1, .func = do_net_inet_tcp_stats}, + {.name = "net.inet.udp.stats", .dim = "udp_stats", .enabled = 1, .func = do_net_inet_udp_stats}, + {.name = "net.inet.icmp.stats", .dim = "icmp_stats", .enabled = 1, .func = do_net_inet_icmp_stats}, + {.name = "net.inet.ip.stats", .dim = "ip_stats", .enabled = 1, .func = do_net_inet_ip_stats}, + {.name = "net.inet6.ip6.stats", .dim = "ip6_stats", .enabled = 1, .func = do_net_inet6_ip6_stats}, + {.name = "net.inet6.icmp6.stats", .dim = "icmp6_stats", .enabled = 1, .func = do_net_inet6_icmp6_stats}, + + // network interfaces metrics + {.name = "getifaddrs", .dim = "getifaddrs", .enabled = 1, .func = do_getifaddrs}, + + // ZFS metrics + {.name = "kstat.zfs.misc.arcstats", .dim = "arcstats", .enabled = 1, .func = do_kstat_zfs_misc_arcstats}, + {.name = "kstat.zfs.misc.zio_trim", .dim = "trim", .enabled = 1, .func = do_kstat_zfs_misc_zio_trim}, + + // ipfw metrics + {.name = "ipfw", .dim = "ipfw", .enabled = 1, .func = do_ipfw}, + + // the terminator of this array + {.name = NULL, .dim = NULL, .enabled = 0, .func = NULL} +}; + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 33 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 33 +#endif + +static void freebsd_main_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *freebsd_main(void *ptr) +{ + worker_register("FREEBSD"); + + netdata_thread_cleanup_push(freebsd_main_cleanup, ptr); + + // initialize FreeBSD plugin + if (freebsd_plugin_init()) + netdata_cleanup_and_exit(1); + + // check the enabled status for each module + int i; + for (i = 0; freebsd_modules[i].name; i++) { + struct freebsd_module *pm = &freebsd_modules[i]; + + pm->enabled = config_get_boolean("plugin:freebsd", pm->name, pm->enabled); + pm->rd = NULL; + + worker_register_job_name(i, freebsd_modules[i].dim); + } + + usec_t step = localhost->rrd_update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + + usec_t hb_dt = heartbeat_next(&hb, step); + + if (unlikely(netdata_exit)) + break; + + for (i = 0; freebsd_modules[i].name; i++) { + struct freebsd_module *pm = &freebsd_modules[i]; + if (unlikely(!pm->enabled)) + continue; + + debug(D_PROCNETDEV_LOOP, "FREEBSD calling %s.", pm->name); + + worker_is_busy(i); + pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); + + if (unlikely(netdata_exit)) + break; + } + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/freebsd.plugin/plugin_freebsd.h b/collectors/freebsd.plugin/plugin_freebsd.h new file mode 100644 index 0000000..af7d082 --- /dev/null +++ b/collectors/freebsd.plugin/plugin_freebsd.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PLUGIN_FREEBSD_H +#define NETDATA_PLUGIN_FREEBSD_H 1 + +#include "daemon/common.h" + +#include + +#define KILO_FACTOR 1024 +#define MEGA_FACTOR 1048576 // 1024 * 1024 +#define GIGA_FACTOR 1073741824 // 1024 * 1024 * 1024 + +#define MAX_INT_DIGITS 10 // maximum number of digits for int + +int freebsd_plugin_init(); + +int do_vm_loadavg(int update_every, usec_t dt); +int do_vm_vmtotal(int update_every, usec_t dt); +int do_kern_cp_time(int update_every, usec_t dt); +int do_kern_cp_times(int update_every, usec_t dt); +int do_dev_cpu_temperature(int update_every, usec_t dt); +int do_dev_cpu_0_freq(int update_every, usec_t dt); +int do_hw_intcnt(int update_every, usec_t dt); +int do_vm_stats_sys_v_intr(int update_every, usec_t dt); +int do_vm_stats_sys_v_soft(int update_every, usec_t dt); +int do_vm_stats_sys_v_swtch(int update_every, usec_t dt); +int do_vm_stats_sys_v_forks(int update_every, usec_t dt); +int do_vm_swap_info(int update_every, usec_t dt); +int do_system_ram(int update_every, usec_t dt); +int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt); +int do_vm_stats_sys_v_pgfaults(int update_every, usec_t dt); +int do_kern_ipc_sem(int update_every, usec_t dt); +int do_kern_ipc_shm(int update_every, usec_t dt); +int do_kern_ipc_msq(int update_every, usec_t dt); +int do_uptime(int update_every, usec_t dt); +int do_net_isr(int update_every, usec_t dt); +int do_net_inet_tcp_states(int update_every, usec_t dt); +int do_net_inet_tcp_stats(int update_every, usec_t dt); +int do_net_inet_udp_stats(int update_every, usec_t dt); +int do_net_inet_icmp_stats(int update_every, usec_t dt); +int do_net_inet_ip_stats(int update_every, usec_t dt); +int do_net_inet6_ip6_stats(int update_every, usec_t dt); +int do_net_inet6_icmp6_stats(int update_every, usec_t dt); +int do_getifaddrs(int update_every, usec_t dt); +int do_getmntinfo(int update_every, usec_t dt); +int do_kern_devstat(int update_every, usec_t dt); +int do_kstat_zfs_misc_arcstats(int update_every, usec_t dt); +int do_kstat_zfs_misc_zio_trim(int update_every, usec_t dt); +int do_ipfw(int update_every, usec_t dt); + +// metrics that need to be shared among data collectors +extern unsigned long long zfs_arcstats_shrinkable_cache_size_bytes; + +#endif /* NETDATA_PLUGIN_FREEBSD_H */ diff --git a/collectors/freeipmi.plugin/Makefile.am b/collectors/freeipmi.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/freeipmi.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/freeipmi.plugin/README.md b/collectors/freeipmi.plugin/README.md new file mode 100644 index 0000000..ff13717 --- /dev/null +++ b/collectors/freeipmi.plugin/README.md @@ -0,0 +1,202 @@ + + +# freeipmi.plugin + +Netdata has a [freeipmi](https://www.gnu.org/software/freeipmi/) plugin. + +> FreeIPMI provides in-band and out-of-band IPMI software based on the IPMI v1.5/2.0 specification. The IPMI specification defines a set of interfaces for platform management and is implemented by a number vendors for system management. The features of IPMI that most users will be interested in are sensor monitoring, system event monitoring, power control, and serial-over-LAN (SOL). + +## Installing the FreeIPMI plugin + +When using our official DEB/RPM packages, the FreeIPMI plugin is included in a separate package named +`netdata-plugin-freeipmi` which needs to be manually installed using your system package manager. It is not +installed automatically due to the large number of dependencies it requires. + +When using a static build of Netdata, the FreeIPMI plugin will be included and installed automatically, though +you will still need to have FreeIPMI installed on your system to be able to use the plugin. + +When using a local build of Netdata, you need to ensure that the FreeIPMI development packages (typically called `libipmimonitoring-dev`, `libipmimonitoring-devel`, or `freeipmi-devel`) are installed when building Netdata. + +### Special Considerations + +Accessing IPMI requires root access, so the FreeIPMI plugin is automatically installed setuid root. + +FreeIPMI does not work correctly on IBM POWER systems, thus Netdata’s FreeIPMI plugin is not usable on such systems. + +If you have not previously used IPMI on your system, you will probably need to run the `ipmimonitoring` command as root to initiailze IPMI settings so that the Netdata plugin works correctly. It should return information about available seensors on the system. + +In some distributions `libipmimonitoring.pc` is located in a non-standard directory, which +can cause building the plugin to fail when building Netdata from source. In that case you +should find the file and link it to the standard pkg-config directory. Usually, running `sudo ln -s +/usr/lib/$(uname -m)-linux-gnu/pkgconfig/libipmimonitoring.pc/libipmimonitoring.pc /usr/lib/pkgconfig/libipmimonitoring.pc` +resolves this issue. + +## Netdata use + +The plugin creates (up to) 8 charts, based on the information collected from IPMI: + +1. number of sensors by state +2. number of events in SEL +3. Temperatures CELSIUS +4. Temperatures FAHRENHEIT +5. Voltages +6. Currents +7. Power +8. Fans + +It also adds 2 alarms: + +1. Sensors in non-nominal state (i.e. warning and critical) +2. SEL is non empty + +![image](https://cloud.githubusercontent.com/assets/2662304/23674138/88926a20-037d-11e7-89c0-20e74ee10cd1.png) + +The plugin does a speed test when it starts, to find out the duration needed by the IPMI processor to respond. Depending on the speed of your IPMI processor, charts may need several seconds to show up on the dashboard. + +## `freeipmi.plugin` configuration + +The plugin supports a few options. To see them, run: + +```text +# /usr/libexec/netdata/plugins.d/freeipmi.plugin -h + + netdata freeipmi.plugin 1.8.0-546-g72ce5d6b_rolling + Copyright (C) 2016-2017 Costa Tsaousis + Released under GNU General Public License v3 or later. + All rights reserved. + + This program is a data collector plugin for netdata. + + Available command line options: + + SECONDS data collection frequency + minimum: 5 + + debug enable verbose output + default: disabled + + sel + no-sel enable/disable SEL collection + default: enabled + + hostname HOST + username USER + password PASS connect to remote IPMI host + default: local IPMI processor + + noauthcodecheck don't check the authentication codes returned + + driver-type IPMIDRIVER + Specify the driver type to use instead of doing an auto selection. + The currently available outofband drivers are LAN and LAN_2_0, + which perform IPMI 1.5 and IPMI 2.0 respectively. + The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC. + + sdr-cache-dir PATH directory for SDR cache files + default: /tmp + + sensor-config-file FILE filename to read sensor configuration + default: system default + + ignore N1,N2,N3,... sensor IDs to ignore + default: none + + -v + -V + version print version and exit + + Linux kernel module for IPMI is CPU hungry. + On Linux run this to lower kipmiN CPU utilization: + # echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us + + or create: /etc/modprobe.d/ipmi.conf with these contents: + options ipmi_si kipmid_max_busy_us=10 + + For more information: + https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin +``` + +You can set these options in `/etc/netdata/netdata.conf` at this section: + +``` +[plugin:freeipmi] + update every = 5 + command options = +``` + +Append to `command options =` the settings you need. The minimum `update every` is 5 (enforced internally by the plugin). IPMI is slow and CPU hungry. So, once every 5 seconds is pretty acceptable. + +## Ignoring specific sensors + +Specific sensor IDs can be excluded from freeipmi tools by editing `/etc/freeipmi/freeipmi.conf` and setting the IDs to be ignored at `ipmi-sensors-exclude-record-ids`. **However this file is not used by `libipmimonitoring`** (the library used by Netdata's `freeipmi.plugin`). + +So, `freeipmi.plugin` supports the option `ignore` that accepts a comma separated list of sensor IDs to ignore. To configure it, edit `/etc/netdata/netdata.conf` and set: + +``` +[plugin:freeipmi] + command options = ignore 1,2,3,4,... +``` + +To find the IDs to ignore, run the command `ipmimonitoring`. The first column is the wanted ID: + +``` +ID | Name | Type | State | Reading | Units | Event +1 | Ambient Temp | Temperature | Nominal | 26.00 | C | 'OK' +2 | Altitude | Other Units Based Sensor | Nominal | 480.00 | ft | 'OK' +3 | Avg Power | Current | Nominal | 100.00 | W | 'OK' +4 | Planar 3.3V | Voltage | Nominal | 3.29 | V | 'OK' +5 | Planar 5V | Voltage | Nominal | 4.90 | V | 'OK' +6 | Planar 12V | Voltage | Nominal | 11.99 | V | 'OK' +7 | Planar VBAT | Voltage | Nominal | 2.95 | V | 'OK' +8 | Fan 1A Tach | Fan | Nominal | 3132.00 | RPM | 'OK' +9 | Fan 1B Tach | Fan | Nominal | 2150.00 | RPM | 'OK' +10 | Fan 2A Tach | Fan | Nominal | 2494.00 | RPM | 'OK' +11 | Fan 2B Tach | Fan | Nominal | 1825.00 | RPM | 'OK' +12 | Fan 3A Tach | Fan | Nominal | 3538.00 | RPM | 'OK' +13 | Fan 3B Tach | Fan | Nominal | 2625.00 | RPM | 'OK' +14 | Fan 1 | Entity Presence | Nominal | N/A | N/A | 'Entity Present' +15 | Fan 2 | Entity Presence | Nominal | N/A | N/A | 'Entity Present' +... +``` + +## Debugging + +You can run the plugin by hand: + +```sh +# become user netdata +sudo su -s /bin/sh netdata + +# run the plugin in debug mode +/usr/libexec/netdata/plugins.d/freeipmi.plugin 5 debug +``` + +You will get verbose output on what the plugin does. + +## kipmi0 CPU usage + +There have been reports that kipmi is showing increased CPU when the IPMI is queried. To lower the CPU consumption of +the system you can issue this command: + +```sh +echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us +``` + +You can also permanently set the above setting by creating the file `/etc/modprobe.d/ipmi.conf` with this content: + +```sh +# prevent kipmi from consuming 100% CPU +options ipmi_si kipmid_max_busy_us=10 +``` + +This instructs the kernel IPMI module to pause for a tick between checking IPMI. Querying IPMI will be a lot slower now (e.g. several seconds for IPMI to respond), but `kipmi` will not use any noticeable CPU. You can also use a higher number (this is the number of microseconds to poll IPMI for a response, before waiting for a tick). + +If you need to disable IPMI for Netdata, edit `/etc/netdata/netdata.conf` and set: + +``` +[plugins] + freeipmi = no +``` diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c new file mode 100644 index 0000000..351b6e3 --- /dev/null +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -0,0 +1,1857 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +/* + * netdata freeipmi.plugin + * Copyright (C) 2017 Costa Tsaousis + * GPL v3+ + * + * Based on: + * ipmimonitoring-sensors.c,v 1.51 2016/11/02 23:46:24 chu11 Exp + * ipmimonitoring-sel.c,v 1.51 2016/11/02 23:46:24 chu11 Exp + * + * Copyright (C) 2007-2015 Lawrence Livermore National Security, LLC. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Albert Chu + * UCRL-CODE-222073 + */ + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPMI_PARSE_DEVICE_LAN_STR "lan" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus" +#define IPMI_PARSE_DEVICE_KCS_STR "kcs" +#define IPMI_PARSE_DEVICE_SSIF_STR "ssif" +#define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi" +#define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open" +#define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc" +#define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc" +#define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi" + +#include +#include + +/* Communication Configuration - Initialize accordingly */ + +/* Hostname, NULL for In-band communication, non-null for a hostname */ +char *hostname = NULL; + +/* In-band Communication Configuration */ +int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS; /* or -1 for default */ +int disable_auto_probe = 0; /* probe for in-band device */ +unsigned int driver_address = 0; /* not used if probing */ +unsigned int register_spacing = 0; /* not used if probing */ +char *driver_device = NULL; /* not used if probing */ + +/* Out-of-band Communication Configuration */ +int protocol_version = -1; //IPMI_MONITORING_PROTOCOL_VERSION_1_5; /* or -1 for default */ +char *username = "foousername"; +char *password = "foopassword"; +unsigned char *ipmi_k_g = NULL; +unsigned int ipmi_k_g_len = 0; +int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER; /* or -1 for default */ +int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5; /* or -1 for default */ +int cipher_suite_id = 0; /* or -1 for default */ +int session_timeout = 0; /* 0 for default */ +int retransmission_timeout = 0; /* 0 for default */ + +/* Workarounds - specify workaround flags if necessary */ +unsigned int workaround_flags = 0; + +/* Initialize w/ record id numbers to only monitor specific record ids */ +unsigned int record_ids[] = {0}; +unsigned int record_ids_length = 0; + +/* Initialize w/ sensor types to only monitor specific sensor types + * see ipmi_monitoring.h sensor types list. + */ +unsigned int sensor_types[] = {0}; +unsigned int sensor_types_length = 0; + +/* Set to an appropriate alternate if desired */ +char *sdr_cache_directory = "/tmp"; +char *sensor_config_file = NULL; + +/* Set to 1 or 0 to enable these sensor reading flags + * - See ipmi_monitoring.h for descriptions of these flags. + */ +int reread_sdr_cache = 0; +int ignore_non_interpretable_sensors = 0; +int bridge_sensors = 0; +int interpret_oem_data = 0; +int shared_sensors = 0; +int discrete_reading = 1; +int ignore_scanning_disabled = 0; +int assume_bmc_owner = 0; +int entity_sensor_names = 0; + +/* Initialization flags + * + * Most commonly bitwise OR IPMI_MONITORING_FLAGS_DEBUG and/or + * IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS for extra debugging + * information. + */ +unsigned int ipmimonitoring_init_flags = 0; + +int errnum; + +// ---------------------------------------------------------------------------- +// SEL only variables + +/* Initialize w/ date range to only monitoring specific date range */ +char *date_begin = NULL; /* use MM/DD/YYYY format */ +char *date_end = NULL; /* use MM/DD/YYYY format */ + +int assume_system_event_record = 0; + +char *sel_config_file = NULL; + + +// ---------------------------------------------------------------------------- +// functions common to sensors and SEL + +static void +_init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) +{ + fatal_assert(ipmi_config); + + ipmi_config->driver_type = driver_type; + ipmi_config->disable_auto_probe = disable_auto_probe; + ipmi_config->driver_address = driver_address; + ipmi_config->register_spacing = register_spacing; + ipmi_config->driver_device = driver_device; + + ipmi_config->protocol_version = protocol_version; + ipmi_config->username = username; + ipmi_config->password = password; + ipmi_config->k_g = ipmi_k_g; + ipmi_config->k_g_len = ipmi_k_g_len; + ipmi_config->privilege_level = privilege_level; + ipmi_config->authentication_type = authentication_type; + ipmi_config->cipher_suite_id = cipher_suite_id; + ipmi_config->session_timeout_len = session_timeout; + ipmi_config->retransmission_timeout_len = retransmission_timeout; + + ipmi_config->workaround_flags = workaround_flags; +} + +#ifdef NETDATA_COMMENTED +static const char * +_get_sensor_type_string (int sensor_type) +{ + switch (sensor_type) + { + case IPMI_MONITORING_SENSOR_TYPE_RESERVED: + return ("Reserved"); + case IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE: + return ("Temperature"); + case IPMI_MONITORING_SENSOR_TYPE_VOLTAGE: + return ("Voltage"); + case IPMI_MONITORING_SENSOR_TYPE_CURRENT: + return ("Current"); + case IPMI_MONITORING_SENSOR_TYPE_FAN: + return ("Fan"); + case IPMI_MONITORING_SENSOR_TYPE_PHYSICAL_SECURITY: + return ("Physical Security"); + case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_SECURITY_VIOLATION_ATTEMPT: + return ("Platform Security Violation Attempt"); + case IPMI_MONITORING_SENSOR_TYPE_PROCESSOR: + return ("Processor"); + case IPMI_MONITORING_SENSOR_TYPE_POWER_SUPPLY: + return ("Power Supply"); + case IPMI_MONITORING_SENSOR_TYPE_POWER_UNIT: + return ("Power Unit"); + case IPMI_MONITORING_SENSOR_TYPE_COOLING_DEVICE: + return ("Cooling Device"); + case IPMI_MONITORING_SENSOR_TYPE_OTHER_UNITS_BASED_SENSOR: + return ("Other Units Based Sensor"); + case IPMI_MONITORING_SENSOR_TYPE_MEMORY: + return ("Memory"); + case IPMI_MONITORING_SENSOR_TYPE_DRIVE_SLOT: + return ("Drive Slot"); + case IPMI_MONITORING_SENSOR_TYPE_POST_MEMORY_RESIZE: + return ("POST Memory Resize"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_FIRMWARE_PROGRESS: + return ("System Firmware Progress"); + case IPMI_MONITORING_SENSOR_TYPE_EVENT_LOGGING_DISABLED: + return ("Event Logging Disabled"); + case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG1: + return ("Watchdog 1"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_EVENT: + return ("System Event"); + case IPMI_MONITORING_SENSOR_TYPE_CRITICAL_INTERRUPT: + return ("Critical Interrupt"); + case IPMI_MONITORING_SENSOR_TYPE_BUTTON_SWITCH: + return ("Button/Switch"); + case IPMI_MONITORING_SENSOR_TYPE_MODULE_BOARD: + return ("Module/Board"); + case IPMI_MONITORING_SENSOR_TYPE_MICROCONTROLLER_COPROCESSOR: + return ("Microcontroller/Coprocessor"); + case IPMI_MONITORING_SENSOR_TYPE_ADD_IN_CARD: + return ("Add In Card"); + case IPMI_MONITORING_SENSOR_TYPE_CHASSIS: + return ("Chassis"); + case IPMI_MONITORING_SENSOR_TYPE_CHIP_SET: + return ("Chip Set"); + case IPMI_MONITORING_SENSOR_TYPE_OTHER_FRU: + return ("Other Fru"); + case IPMI_MONITORING_SENSOR_TYPE_CABLE_INTERCONNECT: + return ("Cable/Interconnect"); + case IPMI_MONITORING_SENSOR_TYPE_TERMINATOR: + return ("Terminator"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_BOOT_INITIATED: + return ("System Boot Initiated"); + case IPMI_MONITORING_SENSOR_TYPE_BOOT_ERROR: + return ("Boot Error"); + case IPMI_MONITORING_SENSOR_TYPE_OS_BOOT: + return ("OS Boot"); + case IPMI_MONITORING_SENSOR_TYPE_OS_CRITICAL_STOP: + return ("OS Critical Stop"); + case IPMI_MONITORING_SENSOR_TYPE_SLOT_CONNECTOR: + return ("Slot/Connector"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_ACPI_POWER_STATE: + return ("System ACPI Power State"); + case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG2: + return ("Watchdog 2"); + case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_ALERT: + return ("Platform Alert"); + case IPMI_MONITORING_SENSOR_TYPE_ENTITY_PRESENCE: + return ("Entity Presence"); + case IPMI_MONITORING_SENSOR_TYPE_MONITOR_ASIC_IC: + return ("Monitor ASIC/IC"); + case IPMI_MONITORING_SENSOR_TYPE_LAN: + return ("LAN"); + case IPMI_MONITORING_SENSOR_TYPE_MANAGEMENT_SUBSYSTEM_HEALTH: + return ("Management Subsystem Health"); + case IPMI_MONITORING_SENSOR_TYPE_BATTERY: + return ("Battery"); + case IPMI_MONITORING_SENSOR_TYPE_SESSION_AUDIT: + return ("Session Audit"); + case IPMI_MONITORING_SENSOR_TYPE_VERSION_CHANGE: + return ("Version Change"); + case IPMI_MONITORING_SENSOR_TYPE_FRU_STATE: + return ("FRU State"); + } + + return ("Unrecognized"); +} +#endif // NETDATA_COMMENTED + + +// ---------------------------------------------------------------------------- +// BEGIN NETDATA CODE + +static int debug = 0; + +static int netdata_update_every = 5; // this is the minimum update frequency +static int netdata_priority = 90000; +static int netdata_do_sel = 1; + +static size_t netdata_sensors_updated = 0; +static size_t netdata_sensors_collected = 0; +static size_t netdata_sel_events = 0; +static size_t netdata_sensors_states_nominal = 0; +static size_t netdata_sensors_states_warning = 0; +static size_t netdata_sensors_states_critical = 0; + +struct sensor { + int record_id; + int sensor_number; + int sensor_type; + int sensor_state; + int sensor_units; + char *sensor_name; + + int sensor_reading_type; + union { + uint8_t bool_value; + uint32_t uint32_value; + double double_value; + } sensor_reading; + + int sent; + int ignore; + int exposed; + int updated; + struct sensor *next; +} *sensors_root = NULL; + +static void netdata_mark_as_not_updated() { + struct sensor *sn; + for(sn = sensors_root; sn ;sn = sn->next) + sn->updated = sn->sent = 0; + + netdata_sensors_updated = 0; + netdata_sensors_collected = 0; + netdata_sel_events = 0; + + netdata_sensors_states_nominal = 0; + netdata_sensors_states_warning = 0; + netdata_sensors_states_critical = 0; +} + +static void send_chart_to_netdata_for_units(int units) { + struct sensor *sn, *sn_stored; + int dupfound, multiplier; + + switch(units) { + case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: + printf("CHART ipmi.temperatures_c '' 'System Celsius Temperatures read by IPMI' 'Celsius' 'temperatures' 'ipmi.temperatures_c' 'line' %d %d\n" + , netdata_priority + 10 + , netdata_update_every + ); + break; + + case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: + printf("CHART ipmi.temperatures_f '' 'System Fahrenheit Temperatures read by IPMI' 'Fahrenheit' 'temperatures' 'ipmi.temperatures_f' 'line' %d %d\n" + , netdata_priority + 11 + , netdata_update_every + ); + break; + + case IPMI_MONITORING_SENSOR_UNITS_VOLTS: + printf("CHART ipmi.volts '' 'System Voltages read by IPMI' 'Volts' 'voltages' 'ipmi.voltages' 'line' %d %d\n" + , netdata_priority + 12 + , netdata_update_every + ); + break; + + case IPMI_MONITORING_SENSOR_UNITS_AMPS: + printf("CHART ipmi.amps '' 'System Current read by IPMI' 'Amps' 'current' 'ipmi.amps' 'line' %d %d\n" + , netdata_priority + 13 + , netdata_update_every + ); + break; + + case IPMI_MONITORING_SENSOR_UNITS_RPM: + printf("CHART ipmi.rpm '' 'System Fans read by IPMI' 'RPM' 'fans' 'ipmi.rpm' 'line' %d %d\n" + , netdata_priority + 14 + , netdata_update_every + ); + break; + + case IPMI_MONITORING_SENSOR_UNITS_WATTS: + printf("CHART ipmi.watts '' 'System Power read by IPMI' 'Watts' 'power' 'ipmi.watts' 'line' %d %d\n" + , netdata_priority + 5 + , netdata_update_every + ); + break; + + case IPMI_MONITORING_SENSOR_UNITS_PERCENT: + printf("CHART ipmi.percent '' 'System Metrics read by IPMI' '%%' 'other' 'ipmi.percent' 'line' %d %d\n" + , netdata_priority + 15 + , netdata_update_every + ); + break; + + default: + for(sn = sensors_root; sn; sn = sn->next) + if(sn->sensor_units == units) + sn->ignore = 1; + return; + } + + for(sn = sensors_root; sn; sn = sn->next) { + dupfound = 0; + if(sn->sensor_units == units && sn->updated && !sn->ignore) { + sn->exposed = 1; + multiplier = 1; + + switch(sn->sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + multiplier = 1000; + // fallthrough + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + for (sn_stored = sensors_root; sn_stored; sn_stored = sn_stored->next) { + if (sn_stored == sn) continue; + // If the name is a duplicate, append the sensor number + if ( !strcmp(sn_stored->sensor_name, sn->sensor_name) ) { + dupfound = 1; + printf("DIMENSION i%d_n%d_r%d '%s i%d' absolute 1 %d\n" + , sn->sensor_number + , sn->record_id + , sn->sensor_reading_type + , sn->sensor_name + , sn->sensor_number + , multiplier + ); + break; + } + } + // No duplicate name was found, display it just with Name + if (!dupfound) { + // display without ID + printf("DIMENSION i%d_n%d_r%d '%s' absolute 1 %d\n" + , sn->sensor_number + , sn->record_id + , sn->sensor_reading_type + , sn->sensor_name + , multiplier + ); + } + break; + + default: + sn->ignore = 1; + break; + } + } + } +} + +static void send_metrics_to_netdata_for_units(int units) { + struct sensor *sn; + + switch(units) { + case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: + printf("BEGIN ipmi.temperatures_c\n"); + break; + + case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: + printf("BEGIN ipmi.temperatures_f\n"); + break; + + case IPMI_MONITORING_SENSOR_UNITS_VOLTS: + printf("BEGIN ipmi.volts\n"); + break; + + case IPMI_MONITORING_SENSOR_UNITS_AMPS: + printf("BEGIN ipmi.amps\n"); + break; + + case IPMI_MONITORING_SENSOR_UNITS_RPM: + printf("BEGIN ipmi.rpm\n"); + break; + + case IPMI_MONITORING_SENSOR_UNITS_WATTS: + printf("BEGIN ipmi.watts\n"); + break; + + case IPMI_MONITORING_SENSOR_UNITS_PERCENT: + printf("BEGIN ipmi.percent\n"); + break; + + default: + for(sn = sensors_root; sn; sn = sn->next) + if(sn->sensor_units == units) + sn->ignore = 1; + return; + } + + for(sn = sensors_root; sn; sn = sn->next) { + if(sn->sensor_units == units && sn->updated && !sn->sent && !sn->ignore) { + netdata_sensors_updated++; + + sn->sent = 1; + + switch(sn->sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + printf("SET i%d_n%d_r%d = %u\n" + , sn->sensor_number + , sn->record_id + , sn->sensor_reading_type + , sn->sensor_reading.bool_value + ); + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + printf("SET i%d_n%d_r%d = %u\n" + , sn->sensor_number + , sn->record_id + , sn->sensor_reading_type + , sn->sensor_reading.uint32_value + ); + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + printf("SET i%d_n%d_r%d = %lld\n" + , sn->sensor_number + , sn->record_id + , sn->sensor_reading_type + , (long long int)(sn->sensor_reading.double_value * 1000) + ); + break; + + default: + sn->ignore = 1; + break; + } + } + } + + printf("END\n"); +} + +static void send_metrics_to_netdata() { + static int sel_chart_generated = 0, sensors_states_chart_generated = 0; + struct sensor *sn; + + if(netdata_do_sel && !sel_chart_generated) { + sel_chart_generated = 1; + printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d\n" + , netdata_priority + 2 + , netdata_update_every + ); + printf("DIMENSION events '' absolute 1 1\n"); + } + + if(!sensors_states_chart_generated) { + sensors_states_chart_generated = 1; + printf("CHART ipmi.sensors_states '' 'IPMI Sensors State' 'sensors' 'states' ipmi.sensors_states line %d %d\n" + , netdata_priority + 1 + , netdata_update_every + ); + printf("DIMENSION nominal '' absolute 1 1\n"); + printf("DIMENSION critical '' absolute 1 1\n"); + printf("DIMENSION warning '' absolute 1 1\n"); + } + + // generate the CHART/DIMENSION lines, if we have to + for(sn = sensors_root; sn; sn = sn->next) + if(sn->updated && !sn->exposed && !sn->ignore) + send_chart_to_netdata_for_units(sn->sensor_units); + + if(netdata_do_sel) { + printf( + "BEGIN ipmi.events\n" + "SET events = %zu\n" + "END\n" + , netdata_sel_events + ); + } + + printf( + "BEGIN ipmi.sensors_states\n" + "SET nominal = %zu\n" + "SET warning = %zu\n" + "SET critical = %zu\n" + "END\n" + , netdata_sensors_states_nominal + , netdata_sensors_states_warning + , netdata_sensors_states_critical + ); + + // send metrics to netdata + for(sn = sensors_root; sn; sn = sn->next) + if(sn->updated && sn->exposed && !sn->sent && !sn->ignore) + send_metrics_to_netdata_for_units(sn->sensor_units); + +} + +static int *excluded_record_ids = NULL; +size_t excluded_record_ids_length = 0; + +static void excluded_record_ids_parse(const char *s) { + if(!s) return; + + while(*s) { + while(*s && !isdigit(*s)) s++; + + if(isdigit(*s)) { + char *e; + unsigned long n = strtoul(s, &e, 10); + s = e; + + if(n != 0) { + excluded_record_ids = realloc(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int)); + if(!excluded_record_ids) { + fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); + exit(1); + } + excluded_record_ids[excluded_record_ids_length++] = (int)n; + } + } + } + + if(debug) { + fprintf(stderr, "freeipmi.plugin: excluded record ids:"); + size_t i; + for(i = 0; i < excluded_record_ids_length; i++) { + fprintf(stderr, " %d", excluded_record_ids[i]); + } + fprintf(stderr, "\n"); + } +} + +static int *excluded_status_record_ids = NULL; +size_t excluded_status_record_ids_length = 0; + +static void excluded_status_record_ids_parse(const char *s) { + if(!s) return; + + while(*s) { + while(*s && !isdigit(*s)) s++; + + if(isdigit(*s)) { + char *e; + unsigned long n = strtoul(s, &e, 10); + s = e; + + if(n != 0) { + excluded_status_record_ids = realloc(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int)); + if(!excluded_status_record_ids) { + fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); + exit(1); + } + excluded_status_record_ids[excluded_status_record_ids_length++] = (int)n; + } + } + } + + if(debug) { + fprintf(stderr, "freeipmi.plugin: excluded status record ids:"); + size_t i; + for(i = 0; i < excluded_status_record_ids_length; i++) { + fprintf(stderr, " %d", excluded_status_record_ids[i]); + } + fprintf(stderr, "\n"); + } +} + + +static int excluded_record_ids_check(int record_id) { + size_t i; + + for(i = 0; i < excluded_record_ids_length; i++) { + if(excluded_record_ids[i] == record_id) + return 1; + } + + return 0; +} + +static int excluded_status_record_ids_check(int record_id) { + size_t i; + + for(i = 0; i < excluded_status_record_ids_length; i++) { + if(excluded_status_record_ids[i] == record_id) + return 1; + } + + return 0; +} + +static void netdata_get_sensor( + int record_id + , int sensor_number + , int sensor_type + , int sensor_state + , int sensor_units + , int sensor_reading_type + , char *sensor_name + , void *sensor_reading +) { + // find the sensor record + struct sensor *sn; + for(sn = sensors_root; sn ;sn = sn->next) + if( sn->record_id == record_id && + sn->sensor_number == sensor_number && + sn->sensor_reading_type == sensor_reading_type && + sn->sensor_units == sensor_units && + !strcmp(sn->sensor_name, sensor_name) + ) + break; + + if(!sn) { + // not found, create it + // check if it is excluded + if(excluded_record_ids_check(record_id)) { + if(debug) fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name); + return; + } + + if(debug) fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); + + sn = calloc(1, sizeof(struct sensor)); + if(!sn) { + fatal("cannot allocate %zu bytes of memory.", sizeof(struct sensor)); + } + + sn->record_id = record_id; + sn->sensor_number = sensor_number; + sn->sensor_type = sensor_type; + sn->sensor_state = sensor_state; + sn->sensor_units = sensor_units; + sn->sensor_reading_type = sensor_reading_type; + sn->sensor_name = strdup(sensor_name); + if(!sn->sensor_name) { + fatal("cannot allocate %zu bytes of memory.", strlen(sensor_name)); + } + + sn->next = sensors_root; + sensors_root = sn; + } + else { + if(debug) fprintf(stderr, "Reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); + } + + switch(sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + sn->sensor_reading.bool_value = *((uint8_t *)sensor_reading); + sn->updated = 1; + netdata_sensors_collected++; + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + sn->sensor_reading.uint32_value = *((uint32_t *)sensor_reading); + sn->updated = 1; + netdata_sensors_collected++; + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + sn->sensor_reading.double_value = *((double *)sensor_reading); + sn->updated = 1; + netdata_sensors_collected++; + break; + + default: + if(debug) fprintf(stderr, "Unknown reading type - Ignoring sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); + sn->ignore = 1; + break; + } + + // check if it is excluded + if(excluded_status_record_ids_check(record_id)) { + if(debug) fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name); + return; + } + + switch(sensor_state) { + case IPMI_MONITORING_STATE_NOMINAL: + netdata_sensors_states_nominal++; + break; + + case IPMI_MONITORING_STATE_WARNING: + netdata_sensors_states_warning++; + break; + + case IPMI_MONITORING_STATE_CRITICAL: + netdata_sensors_states_critical++; + break; + + default: + break; + } +} + +static void netdata_get_sel( + int record_id + , int record_type_class + , int sel_state +) { + (void)record_id; + (void)record_type_class; + (void)sel_state; + + netdata_sel_events++; +} + + +// END NETDATA CODE +// ---------------------------------------------------------------------------- + + +static int +_ipmimonitoring_sensors (struct ipmi_monitoring_ipmi_config *ipmi_config) +{ + ipmi_monitoring_ctx_t ctx = NULL; + unsigned int sensor_reading_flags = 0; + int i; + int sensor_count; + int rv = -1; + + if (!(ctx = ipmi_monitoring_ctx_create ())) { + error("ipmi_monitoring_ctx_create()"); + goto cleanup; + } + + if (sdr_cache_directory) + { + if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, + sdr_cache_directory) < 0) + { + error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + + /* Must call otherwise only default interpretations ever used */ + if (sensor_config_file) + { + if (ipmi_monitoring_ctx_sensor_config_file (ctx, + sensor_config_file) < 0) + { + error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else + { + if (ipmi_monitoring_ctx_sensor_config_file (ctx, NULL) < 0) + { + error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + + if (reread_sdr_cache) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + + if (ignore_non_interpretable_sensors) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS; + + if (bridge_sensors) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS; + + if (interpret_oem_data) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA; + + if (shared_sensors) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS; + + if (discrete_reading) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING; + + if (ignore_scanning_disabled) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED; + + if (assume_bmc_owner) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER; + +#ifdef IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES + if (entity_sensor_names) + sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; +#endif // IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES + + if (!record_ids_length && !sensor_types_length) + { + if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, + hostname, + ipmi_config, + sensor_reading_flags, + NULL, + 0, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else if (record_ids_length) + { + if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, + hostname, + ipmi_config, + sensor_reading_flags, + record_ids, + record_ids_length, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else + { + if ((sensor_count = ipmi_monitoring_sensor_readings_by_sensor_type (ctx, + hostname, + ipmi_config, + sensor_reading_flags, + sensor_types, + sensor_types_length, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sensor_readings_by_sensor_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + +#ifdef NETDATA_COMMENTED + printf ("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n", + "Record ID", + "Sensor Name", + "Sensor Number", + "Sensor Type", + "Sensor State", + "Sensor Reading", + "Sensor Units", + "Sensor Event/Reading Type Code", + "Sensor Event Bitmask", + "Sensor Event String"); +#endif // NETDATA_COMMENTED + + for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) + { + int record_id, sensor_number, sensor_type, sensor_state, sensor_units, + sensor_reading_type; + +#ifdef NETDATA_COMMENTED + int sensor_bitmask_type, sensor_bitmask, event_reading_type_code; + char **sensor_bitmask_strings = NULL; + const char *sensor_type_str; + const char *sensor_state_str; +#endif // NETDATA_COMMENTED + + char *sensor_name = NULL; + void *sensor_reading; + + if ((record_id = ipmi_monitoring_sensor_read_record_id (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sensor_number = ipmi_monitoring_sensor_read_sensor_number (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_sensor_number(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sensor_type = ipmi_monitoring_sensor_read_sensor_type (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_sensor_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if (!(sensor_name = ipmi_monitoring_sensor_read_sensor_name (ctx))) + { + error( "ipmi_monitoring_sensor_read_sensor_name(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sensor_state = ipmi_monitoring_sensor_read_sensor_state (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_sensor_state(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sensor_units = ipmi_monitoring_sensor_read_sensor_units (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_sensor_units(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + +#ifdef NETDATA_COMMENTED + if ((sensor_bitmask_type = ipmi_monitoring_sensor_read_sensor_bitmask_type (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_sensor_bitmask_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + if ((sensor_bitmask = ipmi_monitoring_sensor_read_sensor_bitmask (ctx)) < 0) + { + error( + "ipmi_monitoring_sensor_read_sensor_bitmask(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + /* it's ok for this to be NULL, i.e. sensor_bitmask == + * IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN + */ + sensor_bitmask_strings = ipmi_monitoring_sensor_read_sensor_bitmask_strings (ctx); + + + +#endif // NETDATA_COMMENTED + + if ((sensor_reading_type = ipmi_monitoring_sensor_read_sensor_reading_type (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_sensor_reading_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + sensor_reading = ipmi_monitoring_sensor_read_sensor_reading (ctx); + +#ifdef NETDATA_COMMENTED + if ((event_reading_type_code = ipmi_monitoring_sensor_read_event_reading_type_code (ctx)) < 0) + { + error( "ipmi_monitoring_sensor_read_event_reading_type_code(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } +#endif // NETDATA_COMMENTED + + netdata_get_sensor( + record_id + , sensor_number + , sensor_type + , sensor_state + , sensor_units + , sensor_reading_type + , sensor_name + , sensor_reading + ); + +#ifdef NETDATA_COMMENTED + if (!strlen (sensor_name)) + sensor_name = "N/A"; + + sensor_type_str = _get_sensor_type_string (sensor_type); + + printf ("%d, %s, %d, %s", + record_id, + sensor_name, + sensor_number, + sensor_type_str); + + if (sensor_state == IPMI_MONITORING_STATE_NOMINAL) + sensor_state_str = "Nominal"; + else if (sensor_state == IPMI_MONITORING_STATE_WARNING) + sensor_state_str = "Warning"; + else if (sensor_state == IPMI_MONITORING_STATE_CRITICAL) + sensor_state_str = "Critical"; + else + sensor_state_str = "N/A"; + + printf (", %s", sensor_state_str); + + if (sensor_reading) + { + const char *sensor_units_str; + + if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL) + printf (", %s", + (*((uint8_t *)sensor_reading) ? "true" : "false")); + else if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32) + printf (", %u", + *((uint32_t *)sensor_reading)); + else if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE) + printf (", %.2f", + *((double *)sensor_reading)); + else + printf (", N/A"); + + if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_CELSIUS) + sensor_units_str = "C"; + else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT) + sensor_units_str = "F"; + else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_VOLTS) + sensor_units_str = "V"; + else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_AMPS) + sensor_units_str = "A"; + else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_RPM) + sensor_units_str = "RPM"; + else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_WATTS) + sensor_units_str = "W"; + else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_PERCENT) + sensor_units_str = "%"; + else + sensor_units_str = "N/A"; + + printf (", %s", sensor_units_str); + } + else + printf (", N/A, N/A"); + + printf (", %Xh", event_reading_type_code); + + /* It is possible you may want to monitor specific event + * conditions that may occur. If that is the case, you may want + * to check out what specific bitmask type and bitmask events + * occurred. See ipmi_monitoring_bitmasks.h for a list of + * bitmasks and types. + */ + + if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN) + printf (", %Xh", sensor_bitmask); + else + printf (", N/A"); + + if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN + && sensor_bitmask_strings) + { + unsigned int i = 0; + + printf (","); + + while (sensor_bitmask_strings[i]) + { + printf (" "); + + printf ("'%s'", + sensor_bitmask_strings[i]); + + i++; + } + } + else + printf (", N/A"); + + printf ("\n"); +#endif // NETDATA_COMMENTED + } + + rv = 0; + cleanup: + if (ctx) + ipmi_monitoring_ctx_destroy (ctx); + return (rv); +} + + +static int +_ipmimonitoring_sel (struct ipmi_monitoring_ipmi_config *ipmi_config) +{ + ipmi_monitoring_ctx_t ctx = NULL; + unsigned int sel_flags = 0; + int i; + int sel_count; + int rv = -1; + + if (!(ctx = ipmi_monitoring_ctx_create ())) + { + error("ipmi_monitoring_ctx_create()"); + goto cleanup; + } + + if (sdr_cache_directory) + { + if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, + sdr_cache_directory) < 0) + { + error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + + /* Must call otherwise only default interpretations ever used */ + if (sel_config_file) + { + if (ipmi_monitoring_ctx_sel_config_file (ctx, + sel_config_file) < 0) + { + error( "ipmi_monitoring_ctx_sel_config_file(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else + { + if (ipmi_monitoring_ctx_sel_config_file (ctx, NULL) < 0) + { + error( "ipmi_monitoring_ctx_sel_config_file(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + + if (reread_sdr_cache) + sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; + + if (interpret_oem_data) + sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA; + + if (assume_system_event_record) + sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD; + +#ifdef IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES + if (entity_sensor_names) + sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES; +#endif // IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES + + if (record_ids_length) + { + if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, + hostname, + ipmi_config, + sel_flags, + record_ids, + record_ids_length, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sel_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else if (sensor_types_length) + { + if ((sel_count = ipmi_monitoring_sel_by_sensor_type (ctx, + hostname, + ipmi_config, + sel_flags, + sensor_types, + sensor_types_length, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sel_by_sensor_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else if (date_begin + || date_end) + { + if ((sel_count = ipmi_monitoring_sel_by_date_range (ctx, + hostname, + ipmi_config, + sel_flags, + date_begin, + date_end, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sel_by_sensor_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + else + { + if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, + hostname, + ipmi_config, + sel_flags, + NULL, + 0, + NULL, + NULL)) < 0) + { + error( "ipmi_monitoring_sel_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + +#ifdef NETDATA_COMMENTED + printf ("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n", + "Record ID", + "Record Type", + "SEL State", + "Timestamp", + "Sensor Name", + "Sensor Type", + "Event Direction", + "Event Type Code", + "Event Data", + "Event Offset", + "Event Offset String"); +#endif // NETDATA_COMMENTED + + for (i = 0; i < sel_count; i++, ipmi_monitoring_sel_iterator_next (ctx)) + { + int record_id, record_type, sel_state, record_type_class; +#ifdef NETDATA_COMMENTED + int sensor_type, sensor_number, event_direction, + event_offset_type, event_offset, event_type_code, manufacturer_id; + unsigned int timestamp, event_data1, event_data2, event_data3; + char *event_offset_string = NULL; + const char *sensor_type_str; + const char *event_direction_str; + const char *sel_state_str; + char *sensor_name = NULL; + unsigned char oem_data[64]; + int oem_data_len; + unsigned int j; +#endif // NETDATA_COMMENTED + + if ((record_id = ipmi_monitoring_sel_read_record_id (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((record_type = ipmi_monitoring_sel_read_record_type (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_record_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((record_type_class = ipmi_monitoring_sel_read_record_type_class (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_record_type_class(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sel_state = ipmi_monitoring_sel_read_sel_state (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_sel_state(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + netdata_get_sel( + record_id + , record_type_class + , sel_state + ); + +#ifdef NETDATA_COMMENTED + if (sel_state == IPMI_MONITORING_STATE_NOMINAL) + sel_state_str = "Nominal"; + else if (sel_state == IPMI_MONITORING_STATE_WARNING) + sel_state_str = "Warning"; + else if (sel_state == IPMI_MONITORING_STATE_CRITICAL) + sel_state_str = "Critical"; + else + sel_state_str = "N/A"; + + printf ("%d, %d, %s", + record_id, + record_type, + sel_state_str); + + if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD + || record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD) + { + + if (ipmi_monitoring_sel_read_timestamp (ctx, ×tamp) < 0) + { + error( "ipmi_monitoring_sel_read_timestamp(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + /* XXX: This should be converted to a nice date output using + * your favorite timestamp -> string conversion functions. + */ + printf (", %u", timestamp); + } + else + printf (", N/A"); + + if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD) + { + /* If you are integrating ipmimonitoring SEL into a monitoring application, + * you may wish to count the number of times a specific error occurred + * and report that to the monitoring application. + * + * In this particular case, you'll probably want to check out + * what sensor type each SEL event is reporting, the + * event offset type, and the specific event offset that occurred. + * + * See ipmi_monitoring_offsets.h for a list of event offsets + * and types. + */ + + if (!(sensor_name = ipmi_monitoring_sel_read_sensor_name (ctx))) + { + error( "ipmi_monitoring_sel_read_sensor_name(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sensor_type = ipmi_monitoring_sel_read_sensor_type (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_sensor_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((sensor_number = ipmi_monitoring_sel_read_sensor_number (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_sensor_number(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((event_direction = ipmi_monitoring_sel_read_event_direction (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_event_direction(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((event_type_code = ipmi_monitoring_sel_read_event_type_code (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_event_type_code(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if (ipmi_monitoring_sel_read_event_data (ctx, + &event_data1, + &event_data2, + &event_data3) < 0) + { + error( "ipmi_monitoring_sel_read_event_data(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((event_offset_type = ipmi_monitoring_sel_read_event_offset_type (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_event_offset_type(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if ((event_offset = ipmi_monitoring_sel_read_event_offset (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_event_offset(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if (!(event_offset_string = ipmi_monitoring_sel_read_event_offset_string (ctx))) + { + error( "ipmi_monitoring_sel_read_event_offset_string(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + if (!strlen (sensor_name)) + sensor_name = "N/A"; + + sensor_type_str = _get_sensor_type_string (sensor_type); + + if (event_direction == IPMI_MONITORING_SEL_EVENT_DIRECTION_ASSERTION) + event_direction_str = "Assertion"; + else + event_direction_str = "Deassertion"; + + printf (", %s, %s, %d, %s, %Xh, %Xh-%Xh-%Xh", + sensor_name, + sensor_type_str, + sensor_number, + event_direction_str, + event_type_code, + event_data1, + event_data2, + event_data3); + + if (event_offset_type != IPMI_MONITORING_EVENT_OFFSET_TYPE_UNKNOWN) + printf (", %Xh", event_offset); + else + printf (", N/A"); + + if (event_offset_type != IPMI_MONITORING_EVENT_OFFSET_TYPE_UNKNOWN) + printf (", %s", event_offset_string); + else + printf (", N/A"); + } + else if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD + || record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD) + { + if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD) + { + if ((manufacturer_id = ipmi_monitoring_sel_read_manufacturer_id (ctx)) < 0) + { + error( "ipmi_monitoring_sel_read_manufacturer_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + printf (", Manufacturer ID = %Xh", manufacturer_id); + } + + if ((oem_data_len = ipmi_monitoring_sel_read_oem_data (ctx, oem_data, 1024)) < 0) + { + error( "ipmi_monitoring_sel_read_oem_data(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + + printf (", OEM Data = "); + + for (j = 0; j < oem_data_len; j++) + printf ("%02Xh ", oem_data[j]); + } + else + printf (", N/A, N/A, N/A, N/A, N/A, N/A, N/A"); + + printf ("\n"); +#endif // NETDATA_COMMENTED + } + + rv = 0; + cleanup: + if (ctx) + ipmi_monitoring_ctx_destroy (ctx); + return (rv); +} + +// ---------------------------------------------------------------------------- +// MAIN PROGRAM FOR NETDATA PLUGIN + +int ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config) { + errno = 0; + + if (_ipmimonitoring_sensors(ipmi_config) < 0) return -1; + + if(netdata_do_sel) { + if(_ipmimonitoring_sel(ipmi_config) < 0) return -2; + } + + return 0; +} + +int ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config) { + int i, checks = 10; + unsigned long long total = 0; + + for(i = 0 ; i < checks ; i++) { + if(debug) fprintf(stderr, "freeipmi.plugin: checking data collection speed iteration %d of %d\n", i+1, checks); + + // measure the time a data collection needs + unsigned long long start = now_realtime_usec(); + if(ipmi_collect_data(ipmi_config) < 0) + fatal("freeipmi.plugin: data collection failed."); + + unsigned long long end = now_realtime_usec(); + + if(debug) fprintf(stderr, "freeipmi.plugin: data collection speed was %llu usec\n", end - start); + + // add it to our total + total += end - start; + + // wait the same time + // to avoid flooding the IPMI processor with requests + sleep_usec(end - start); + } + + // so, we assume it needed 2x the time + // we find the average in microseconds + // and we round-up to the closest second + + return (int)(( total * 2 / checks / 1000000 ) + 1); +} + +int parse_inband_driver_type (const char *str) +{ + fatal_assert(str); + + if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_KCS); + else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_SSIF); + /* support "open" for those that might be used to + * ipmitool. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI); + /* support "bmc" for those that might be used to + * ipmitool. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC); + + return (-1); +} + +int parse_outofband_driver_type (const char *str) +{ + fatal_assert(str); + + if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0) + return (IPMI_MONITORING_PROTOCOL_VERSION_1_5); + /* support "lanplus" for those that might be used to ipmitool. + * support typo variants to ease. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0) + return (IPMI_MONITORING_PROTOCOL_VERSION_2_0); + + return (-1); +} + +int host_is_local(const char *host) +{ + if (host && (!strcmp(host, "localhost") || !strcmp(host, "127.0.0.1") || !strcmp(host, "::1"))) + return (1); + + return (0); +} + +int main (int argc, char **argv) { + clocks_init(); + + // ------------------------------------------------------------------------ + // initialization of netdata plugin + + program_name = "freeipmi.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + + // ------------------------------------------------------------------------ + // parse command line parameters + + int i, freq = 0; + for(i = 1; i < argc ; i++) { + if(isdigit(*argv[i]) && !freq) { + int n = str2i(argv[i]); + if(n > 0 && n < 86400) { + freq = n; + continue; + } + } + else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { + printf("freeipmi.plugin %s\n", VERSION); + exit(0); + } + else if(strcmp("debug", argv[i]) == 0) { + debug = 1; + continue; + } + else if(strcmp("sel", argv[i]) == 0) { + netdata_do_sel = 1; + continue; + } + else if(strcmp("no-sel", argv[i]) == 0) { + netdata_do_sel = 0; + continue; + } + else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { + fprintf(stderr, + "\n" + " netdata freeipmi.plugin %s\n" + " Copyright (C) 2016-2017 Costa Tsaousis \n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " This program is a data collector plugin for netdata.\n" + "\n" + " Available command line options:\n" + "\n" + " SECONDS data collection frequency\n" + " minimum: %d\n" + "\n" + " debug enable verbose output\n" + " default: disabled\n" + "\n" + " sel\n" + " no-sel enable/disable SEL collection\n" + " default: %s\n" + "\n" + " hostname HOST\n" + " username USER\n" + " password PASS connect to remote IPMI host\n" + " default: local IPMI processor\n" + "\n" + " noauthcodecheck don't check the authentication codes returned\n" + "\n" + " driver-type IPMIDRIVER\n" + " Specify the driver type to use instead of doing an auto selection. \n" + " The currently available outofband drivers are LAN and LAN_2_0,\n" + " which perform IPMI 1.5 and IPMI 2.0 respectively. \n" + " The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.\n" + "\n" + " sdr-cache-dir PATH directory for SDR cache files\n" + " default: %s\n" + "\n" + " sensor-config-file FILE filename to read sensor configuration\n" + " default: %s\n" + "\n" + " ignore N1,N2,N3,... sensor IDs to ignore\n" + " default: none\n" + "\n" + " ignore-status N1,N2,N3,... sensor IDs to ignore status (nominal/warning/critical)\n" + " default: none\n" + "\n" + " -v\n" + " -V\n" + " version print version and exit\n" + "\n" + " Linux kernel module for IPMI is CPU hungry.\n" + " On Linux run this to lower kipmiN CPU utilization:\n" + " # echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us\n" + "\n" + " or create: /etc/modprobe.d/ipmi.conf with these contents:\n" + " options ipmi_si kipmid_max_busy_us=10\n" + "\n" + " For more information:\n" + " https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin\n" + "\n" + , VERSION + , netdata_update_every + , netdata_do_sel?"enabled":"disabled" + , sdr_cache_directory?sdr_cache_directory:"system default" + , sensor_config_file?sensor_config_file:"system default" + ); + exit(1); + } + else if(i < argc && strcmp("hostname", argv[i]) == 0) { + hostname = strdupz(argv[++i]); + char *s = argv[i]; + // mask it be hidden from the process tree + while(*s) *s++ = 'x'; + if(debug) fprintf(stderr, "freeipmi.plugin: hostname set to '%s'\n", hostname); + continue; + } + else if(i < argc && strcmp("username", argv[i]) == 0) { + username = strdupz(argv[++i]); + char *s = argv[i]; + // mask it be hidden from the process tree + while(*s) *s++ = 'x'; + if(debug) fprintf(stderr, "freeipmi.plugin: username set to '%s'\n", username); + continue; + } + else if(i < argc && strcmp("password", argv[i]) == 0) { + password = strdupz(argv[++i]); + char *s = argv[i]; + // mask it be hidden from the process tree + while(*s) *s++ = 'x'; + if(debug) fprintf(stderr, "freeipmi.plugin: password set to '%s'\n", password); + continue; + } + else if(strcmp("driver-type", argv[i]) == 0) { + if (hostname) { + protocol_version=parse_outofband_driver_type(argv[++i]); + if(debug) fprintf(stderr, "freeipmi.plugin: outband protocol version set to '%d'\n", protocol_version); + } + else { + driver_type=parse_inband_driver_type(argv[++i]); + if(debug) fprintf(stderr, "freeipmi.plugin: inband driver type set to '%d'\n", driver_type); + } + continue; + } else if (i < argc && strcmp("noauthcodecheck", argv[i]) == 0) { + if (!hostname || host_is_local(hostname)) { + if (debug) + fprintf( + stderr, + "freeipmi.plugin: noauthcodecheck workaround flag is ignored for inband configuration\n"); + } else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) { + workaround_flags |= IPMI_MONITORING_WORKAROUND_FLAGS_PROTOCOL_VERSION_1_5_NO_AUTH_CODE_CHECK; + if (debug) + fprintf(stderr, "freeipmi.plugin: noauthcodecheck workaround flag enabled\n"); + } else { + if (debug) + fprintf( + stderr, + "freeipmi.plugin: noauthcodecheck workaround flag is ignored for protocol version 2.0\n"); + } + continue; + } + else if(i < argc && strcmp("sdr-cache-dir", argv[i]) == 0) { + sdr_cache_directory = argv[++i]; + if(debug) fprintf(stderr, "freeipmi.plugin: SDR cache directory set to '%s'\n", sdr_cache_directory); + continue; + } + else if(i < argc && strcmp("sensor-config-file", argv[i]) == 0) { + sensor_config_file = argv[++i]; + if(debug) fprintf(stderr, "freeipmi.plugin: sensor config file set to '%s'\n", sensor_config_file); + continue; + } + else if(i < argc && strcmp("ignore", argv[i]) == 0) { + excluded_record_ids_parse(argv[++i]); + continue; + } + else if(i < argc && strcmp("ignore-status", argv[i]) == 0) { + excluded_status_record_ids_parse(argv[++i]); + continue; + } + + error("freeipmi.plugin: ignoring parameter '%s'", argv[i]); + } + + errno = 0; + + if(freq >= netdata_update_every) + netdata_update_every = freq; + + else if(freq) + error("update frequency %d seconds is too small for IPMI. Using %d.", freq, netdata_update_every); + + + // ------------------------------------------------------------------------ + // initialize IPMI + + struct ipmi_monitoring_ipmi_config ipmi_config; + + if(debug) fprintf(stderr, "freeipmi.plugin: calling _init_ipmi_config()\n"); + + _init_ipmi_config(&ipmi_config); + + if(debug) { + fprintf(stderr, "freeipmi.plugin: calling ipmi_monitoring_init()\n"); + ipmimonitoring_init_flags|=IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS; + } + + if(ipmi_monitoring_init(ipmimonitoring_init_flags, &errnum) < 0) + fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(errnum)); + + if(debug) fprintf(stderr, "freeipmi.plugin: detecting IPMI minimum update frequency...\n"); + freq = ipmi_detect_speed_secs(&ipmi_config); + if(debug) fprintf(stderr, "freeipmi.plugin: IPMI minimum update frequency was calculated to %d seconds.\n", freq); + + if(freq > netdata_update_every) { + info("enforcing minimum data collection frequency, calculated to %d seconds.", freq); + netdata_update_every = freq; + } + + + // ------------------------------------------------------------------------ + // the main loop + + if(debug) fprintf(stderr, "freeipmi.plugin: starting data collection\n"); + + time_t started_t = now_monotonic_sec(); + + size_t iteration = 0; + usec_t step = netdata_update_every * USEC_PER_SEC; + + heartbeat_t hb; + heartbeat_init(&hb); + for(iteration = 0; 1 ; iteration++) { + usec_t dt = heartbeat_next(&hb, step); + + if(debug && iteration) + fprintf(stderr, "freeipmi.plugin: iteration %zu, dt %llu usec, sensors collected %zu, sensors sent to netdata %zu \n" + , iteration + , dt + , netdata_sensors_collected + , netdata_sensors_updated + ); + + netdata_mark_as_not_updated(); + + if(debug) fprintf(stderr, "freeipmi.plugin: calling ipmi_collect_data()\n"); + if(ipmi_collect_data(&ipmi_config) < 0) + fatal("data collection failed."); + + if(debug) fprintf(stderr, "freeipmi.plugin: calling send_metrics_to_netdata()\n"); + send_metrics_to_netdata(); + fflush(stdout); + + // restart check (14400 seconds) + if(now_monotonic_sec() - started_t > 14400) exit(0); + } +} diff --git a/collectors/idlejitter.plugin/Makefile.am b/collectors/idlejitter.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/idlejitter.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/idlejitter.plugin/README.md b/collectors/idlejitter.plugin/README.md new file mode 100644 index 0000000..5a92d53 --- /dev/null +++ b/collectors/idlejitter.plugin/README.md @@ -0,0 +1,32 @@ + + +# idlejitter.plugin + +Idle jitter is a measure of delays in timing for user processes caused by scheduling limitations. + +## How Netdata measures idle jitter + +A thread is spawned that requests to sleep for 20000 microseconds (20ms). +When the system wakes it up, it measures how many microseconds have passed. +The difference between the requested and the actual duration of the sleep, is the idle jitter. +This is done at most 50 times per second, to ensure we have a good average. + +This number is useful: + +- In multimedia-streaming environments such as VoIP gateways, where the CPU jitter can affect the quality of the service. +- On time servers and other systems that require very precise timing, where CPU jitter can actively interfere with timing precision. +- On gaming systems, where CPU jitter can cause frame drops and stuttering. +- In cloud infrastructure that can pause the VM or container for a small duration to perform operations at the host. + +## Charts + +idlejitter.plugin generates the idlejitter chart which measures CPU idle jitter in milliseconds lost per second. + +## Configuration + +This chart is available without any configuration. + + diff --git a/collectors/idlejitter.plugin/plugin_idlejitter.c b/collectors/idlejitter.plugin/plugin_idlejitter.c new file mode 100644 index 0000000..b6339cc --- /dev/null +++ b/collectors/idlejitter.plugin/plugin_idlejitter.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "daemon/common.h" + +#define CPU_IDLEJITTER_SLEEP_TIME_MS 20 + +static void cpuidlejitter_main_cleanup(void *ptr) { + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *cpuidlejitter_main(void *ptr) { + worker_register("IDLEJITTER"); + worker_register_job_name(0, "measurements"); + + netdata_thread_cleanup_push(cpuidlejitter_main_cleanup, ptr); + + usec_t sleep_ut = config_get_number("plugin:idlejitter", "loop time in ms", CPU_IDLEJITTER_SLEEP_TIME_MS) * USEC_PER_MS; + if(sleep_ut <= 0) { + config_set_number("plugin:idlejitter", "loop time in ms", CPU_IDLEJITTER_SLEEP_TIME_MS); + sleep_ut = CPU_IDLEJITTER_SLEEP_TIME_MS * USEC_PER_MS; + } + + RRDSET *st = rrdset_create_localhost( + "system" + , "idlejitter" + , NULL + , "idlejitter" + , NULL + , "CPU Idle Jitter" + , "microseconds lost/s" + , "idlejitter.plugin" + , NULL + , NETDATA_CHART_PRIO_SYSTEM_IDLEJITTER + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + RRDDIM *rd_min = rrddim_add(st, "min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + RRDDIM *rd_max = rrddim_add(st, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + RRDDIM *rd_avg = rrddim_add(st, "average", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + usec_t update_every_ut = localhost->rrd_update_every * USEC_PER_SEC; + struct timeval before, after; + + while (!netdata_exit) { + int iterations = 0; + usec_t error_total = 0, + error_min = 0, + error_max = 0, + elapsed = 0; + + while (elapsed < update_every_ut) { + now_monotonic_high_precision_timeval(&before); + worker_is_idle(); + sleep_usec(sleep_ut); + worker_is_busy(0); + now_monotonic_high_precision_timeval(&after); + + usec_t dt = dt_usec(&after, &before); + elapsed += dt; + + usec_t error = dt - sleep_ut; + error_total += error; + + if(unlikely(!iterations)) + error_min = error; + else if(error < error_min) + error_min = error; + + if(error > error_max) + error_max = error; + + iterations++; + } + + if(iterations) { + rrddim_set_by_pointer(st, rd_min, error_min); + rrddim_set_by_pointer(st, rd_max, error_max); + rrddim_set_by_pointer(st, rd_avg, error_total / iterations); + rrdset_done(st); + } + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + diff --git a/collectors/ioping.plugin/Makefile.am b/collectors/ioping.plugin/Makefile.am new file mode 100644 index 0000000..a9cd7c4 --- /dev/null +++ b/collectors/ioping.plugin/Makefile.am @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +CLEANFILES = \ + ioping.plugin \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_plugins_SCRIPTS = \ + ioping.plugin \ + $(NULL) + +dist_noinst_DATA = \ + ioping.plugin.in \ + README.md \ + $(NULL) + +dist_libconfig_DATA = \ + ioping.conf \ + $(NULL) diff --git a/collectors/ioping.plugin/README.md b/collectors/ioping.plugin/README.md new file mode 100644 index 0000000..c4c3695 --- /dev/null +++ b/collectors/ioping.plugin/README.md @@ -0,0 +1,86 @@ + + +# ioping.plugin + +The ioping plugin supports monitoring latency for any number of directories/files/devices, +by pinging them with `ioping`. + +A recent version of `ioping` is required (one that supports option `-N`). +The supplied plugin can install it, by running: + +```sh +/usr/libexec/netdata/plugins.d/ioping.plugin install +``` + +The `-e` option can be supplied to indicate where the Netdata environment file is installed. The default path is `/etc/netdata/.environment`. + +The above will download, build and install the right version as `/usr/libexec/netdata/plugins.d/ioping`. + +Then you need to edit `/etc/netdata/ioping.conf` (to edit it on your system run +`/etc/netdata/edit-config ioping.conf`) like this: + +```sh +# uncomment the following line - it should already be there +ioping="/usr/libexec/netdata/plugins.d/ioping" + +# set here the directory/file/device, you need to ping +destination="destination" + +# override the chart update frequency - the default is inherited from Netdata +update_every="1s" + +# the request size in bytes to ping the destination +request_size="4k" + +# other iping options - these are the defaults +ioping_opts="-T 1000000 -R" +``` + +## alarms + +Netdata will automatically attach a few alarms for each host. +Check the [latest versions of the ioping alarms](https://raw.githubusercontent.com/netdata/netdata/master/health/health.d/ioping.conf) + +## Multiple ioping Plugins With Different Settings + +You may need to run multiple ioping plugins with different settings or different end points. +For example, you may need to ping one destination once per 10 seconds, and another once per second. + +Netdata allows you to add as many `ioping` plugins as you like. + +Follow this procedure: + +**1. Create New ioping Configuration File** + +```sh +# Step Into Configuration Directory +cd /etc/netdata + +# Copy Original ioping Configuration File To New Configuration File +cp ioping.conf ioping2.conf +``` + +Edit `ioping2.conf` and set the settings and the destination you need for the seconds instance. + +**2. Soft Link Original ioping Plugin to New Plugin File** + +```sh +# Become root (If The Step Step Is Performed As Non-Root User) +sudo su + +# Step Into The Plugins Directory +cd /usr/libexec/netdata/plugins.d + +# Link ioping.plugin to ioping2.plugin +ln -s ioping.plugin ioping2.plugin +``` + +That's it. Netdata will detect the new plugin and start it. + +You can name the new plugin any name you like. +Just make sure the plugin and the configuration file have the same name. + + diff --git a/collectors/ioping.plugin/ioping.conf b/collectors/ioping.plugin/ioping.conf new file mode 100644 index 0000000..86f0de7 --- /dev/null +++ b/collectors/ioping.plugin/ioping.conf @@ -0,0 +1,40 @@ +# no need for shebang - this file is sourced from ioping.plugin + +# ioping.plugin requires a recent version of ioping. +# +# You can get it on your system, by running: +# +# /usr/libexec/netdata/plugins.d/ioping.plugin install + +# ----------------------------------------------------------------------------- +# configuration options + +# The ioping binary to use. We need one that can output netdata friendly info +# (supporting: -N). If you have multiple versions, put here the full filename +# of the right one + +#ioping="/usr/libexec/netdata/plugins.d/ioping" + + +# The directory/file/device to ioping + +destination="" + + +# The update frequency of the chart in seconds (symbolic modifiers are supported) +# the default is inherited from netdata + +#update_every="1s" + + +# The request size in bytes to ioping the destination (symbolic modifiers are supported) +# by default 4k chunks are used + +#request_size="4k" + + +# Other ioping options +# the defaults: +# -T 1000000 = maximum valid request time (us) + +#ioping_opts="-T 1000000" diff --git a/collectors/ioping.plugin/ioping.plugin.in b/collectors/ioping.plugin/ioping.plugin.in new file mode 100755 index 0000000..1d79eb7 --- /dev/null +++ b/collectors/ioping.plugin/ioping.plugin.in @@ -0,0 +1,210 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2017 Costa Tsaousis +# GPL v3+ +# +# This plugin requires a latest version of ioping. +# You can compile it from source, by running me with option: install + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +usage="$(basename "$0") [install] [-h] [-e] + +where: + install install ioping binary + -e, --env path to environment file (defaults to '/etc/netdata/.environment' + -h show this help text" + +INSTALL=0 +ENVIRONMENT_FILE="/etc/netdata/.environment" + +while :; do + case "$1" in + -h | --help) + echo "$usage" >&2 + exit 1 + ;; + install) + INSTALL=1 + shift + ;; + -e | --env) + ENVIRONMENT_FILE="$2" + shift 2 + ;; + -*) + echo "$usage" >&2 + exit 1 + ;; + *) break ;; + esac +done + +if [ "$INSTALL" == "1" ] + then + [ "${UID}" != 0 ] && echo >&2 "Please run me as root. This will install a single binary file: /usr/libexec/netdata/plugins.d/ioping." && exit 1 + + source "${ENVIRONMENT_FILE}" || exit 1 + + run() { + printf >&2 " > " + printf >&2 "%q " "${@}" + printf >&2 "\n" + "${@}" || exit 1 + } + + download() { + local git="$(which git 2>/dev/null || command -v git 2>/dev/null)" + [ ! -z "${git}" ] && run git clone "${1}" "${2}" && return 0 + + echo >&2 "Cannot find 'git' in this system." && exit 1 + } + + tmp=$(mktemp -d /tmp/netdata-ioping-XXXXXX) + [ ! -d "${NETDATA_PREFIX}/usr/libexec/netdata" ] && run mkdir -p "${NETDATA_PREFIX}/usr/libexec/netdata" + + run cd "${tmp}" + + if [ -d ioping-netdata ] + then + run rm -rf ioping-netdata || exit 1 + fi + + download 'https://github.com/netdata/ioping.git' 'ioping-netdata' + [ $? -ne 0 ] && exit 1 + run cd ioping-netdata || exit 1 + + INSTALL_PATH="${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" + + run make clean + run make + run mv ioping "${INSTALL_PATH}" + run chown root:"${NETDATA_GROUP}" "${INSTALL_PATH}" + run chmod 4750 "${INSTALL_PATH}" + echo >&2 + echo >&2 "All done, you have a compatible ioping now at ${INSTALL_PATH}." + echo >&2 + + exit 0 +fi + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + echo "DISABLE" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- + +# store in ${plugin} the name we run under +# this allows us to copy/link ioping.plugin under a different name +# to have multiple ioping plugins running with different settings +plugin="${PROGRAM_NAME/.plugin/}" + + +# ----------------------------------------------------------------------------- + +# the frequency to send info to netdata +# passed by netdata as the first parameter +update_every="${1-1}" + +# the netdata configuration directory +# passed by netdata as an environment variable +[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@" +[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" + +# the netdata directory for internal binaries +[ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="@pluginsdir_POST@" + +# ----------------------------------------------------------------------------- +# configuration options +# can be overwritten at /etc/netdata/ioping.conf + +# the ioping binary to use +# we need one that can output netdata friendly info (supporting: -N) +# if you have multiple versions, put here the full filename of the right one +ioping="${NETDATA_PLUGINS_DIR}/ioping" + +# the destination to ioping +destination="" + +# the request size in bytes to ping the disk +request_size="4k" + +# ioping options +ioping_opts="-T 1000000" + +# ----------------------------------------------------------------------------- +# load the configuration files + +for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/${plugin}.conf" "${NETDATA_USER_CONFIG_DIR}/${plugin}.conf"; do + if [ -f "${CONFIG}" ]; then + info "Loading config file '${CONFIG}'..." + source "${CONFIG}" + [ $? -ne 0 ] && error "Failed to load config file '${CONFIG}'." + elif [[ $CONFIG =~ ^$NETDATA_USER_CONFIG_DIR ]]; then + warning "Cannot find file '${CONFIG}'." + fi +done + +if [ -z "${destination}" ] +then + fatal "destination is not configured - nothing to do." +fi + +if [ ! -f "${ioping}" ] +then + fatal "ioping command is not found. Please set its full path in '${NETDATA_USER_CONFIG_DIR}/${plugin}.conf'" +fi + +if [ ! -x "${ioping}" ] +then + fatal "ioping command '${ioping}' is not executable - cannot proceed." +fi + +# the ioping options we will use +options=( -N -i ${update_every} -s ${request_size} ${ioping_opts} ${destination} ) + +# execute ioping +info "starting ioping: ${ioping} ${options[*]}" +exec "${ioping}" "${options[@]}" + +# if we cannot execute ioping, stop +fatal "command '${ioping} ${options[*]}' failed to be executed (returned code $?)." diff --git a/collectors/macos.plugin/Makefile.am b/collectors/macos.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/macos.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/macos.plugin/README.md b/collectors/macos.plugin/README.md new file mode 100644 index 0000000..92bbf1e --- /dev/null +++ b/collectors/macos.plugin/README.md @@ -0,0 +1,12 @@ + + +# macos.plugin + +Collects resource usage and performance data on macOS systems + +By default, Netdata will enable monitoring metrics for disks, memory, and network only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. + + diff --git a/collectors/macos.plugin/macos_fw.c b/collectors/macos.plugin/macos_fw.c new file mode 100644 index 0000000..07f7d77 --- /dev/null +++ b/collectors/macos.plugin/macos_fw.c @@ -0,0 +1,648 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_macos.h" + +#include +#include +#include +#include +// NEEDED BY do_space, do_inodes +#include +// NEEDED BY: struct ifaddrs, getifaddrs() +#include +#include + +// NEEDED BY: do_bandwidth +#define IFA_DATA(s) (((struct if_data *)ifa->ifa_data)->ifi_ ## s) + +#define MAXDRIVENAME 31 + +#define KILO_FACTOR 1024 +#define MEGA_FACTOR 1048576 // 1024 * 1024 +#define GIGA_FACTOR 1073741824 // 1024 * 1024 * 1024 + +int do_macos_iokit(int update_every, usec_t dt) { + (void)dt; + + static int do_io = -1, do_space = -1, do_inodes = -1, do_bandwidth = -1; + + if (unlikely(do_io == -1)) { + do_io = config_get_boolean("plugin:macos:iokit", "disk i/o", 1); + do_space = config_get_boolean("plugin:macos:sysctl", "space usage for all disks", 1); + do_inodes = config_get_boolean("plugin:macos:sysctl", "inodes usage for all disks", 1); + do_bandwidth = config_get_boolean("plugin:macos:sysctl", "bandwidth", 1); + } + + RRDSET *st; + + mach_port_t main_port; + io_registry_entry_t drive, drive_media; + io_iterator_t drive_list; + CFDictionaryRef properties, statistics; + CFStringRef name; + CFNumberRef number; + kern_return_t status; + collected_number total_disk_reads = 0; + collected_number total_disk_writes = 0; + struct diskstat { + char name[MAXDRIVENAME]; + collected_number bytes_read; + collected_number bytes_write; + collected_number reads; + collected_number writes; + collected_number time_read; + collected_number time_write; + collected_number latency_read; + collected_number latency_write; + } diskstat; + struct cur_diskstat { + collected_number duration_read_ns; + collected_number duration_write_ns; + collected_number busy_time_ns; + } cur_diskstat; + struct prev_diskstat { + collected_number bytes_read; + collected_number bytes_write; + collected_number operations_read; + collected_number operations_write; + collected_number duration_read_ns; + collected_number duration_write_ns; + collected_number busy_time_ns; + } prev_diskstat; + + // NEEDED BY: do_space, do_inodes + struct statfs *mntbuf; + int mntsize, i; + char title[4096 + 1]; + + // NEEDED BY: do_bandwidth + struct ifaddrs *ifa, *ifap; + +#if !defined(MAC_OS_VERSION_12_0) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_12_0) +#define IOMainPort IOMasterPort +#endif + + /* Get ports and services for drive statistics. */ + if (unlikely(IOMainPort(bootstrap_port, &main_port))) { + error("MACOS: IOMasterPort() failed"); + do_io = 0; + error("DISABLED: system.io"); + /* Get the list of all drive objects. */ + } else if (unlikely(IOServiceGetMatchingServices(main_port, IOServiceMatching("IOBlockStorageDriver"), &drive_list))) { + error("MACOS: IOServiceGetMatchingServices() failed"); + do_io = 0; + error("DISABLED: system.io"); + } else { + while ((drive = IOIteratorNext(drive_list)) != 0) { + properties = 0; + statistics = 0; + number = 0; + bzero(&diskstat, sizeof(diskstat)); + + /* Get drive media object. */ + status = IORegistryEntryGetChildEntry(drive, kIOServicePlane, &drive_media); + if (unlikely(status != KERN_SUCCESS)) { + IOObjectRelease(drive); + continue; + } + + /* Get drive media properties. */ + if (likely(!IORegistryEntryCreateCFProperties(drive_media, (CFMutableDictionaryRef *)&properties, kCFAllocatorDefault, 0))) { + /* Get disk name. */ + if (likely(name = (CFStringRef)CFDictionaryGetValue(properties, CFSTR(kIOBSDNameKey)))) { + CFStringGetCString(name, diskstat.name, MAXDRIVENAME, kCFStringEncodingUTF8); + } + } + + /* Release. */ + CFRelease(properties); + IOObjectRelease(drive_media); + + if(unlikely(!*diskstat.name)) { + IOObjectRelease(drive); + continue; + } + + /* Obtain the properties for this drive object. */ + if (unlikely(IORegistryEntryCreateCFProperties(drive, (CFMutableDictionaryRef *)&properties, kCFAllocatorDefault, 0))) { + IOObjectRelease(drive); + error("MACOS: IORegistryEntryCreateCFProperties() failed"); + do_io = 0; + error("DISABLED: system.io"); + break; + } else if (likely(properties)) { + /* Obtain the statistics from the drive properties. */ + if (likely(statistics = (CFDictionaryRef)CFDictionaryGetValue(properties, CFSTR(kIOBlockStorageDriverStatisticsKey)))) { + + // -------------------------------------------------------------------- + + /* Get bytes read. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsBytesReadKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.bytes_read); + total_disk_reads += diskstat.bytes_read; + } + + /* Get bytes written. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsBytesWrittenKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.bytes_write); + total_disk_writes += diskstat.bytes_write; + } + + st = rrdset_find_active_bytype_localhost("disk", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk" + , diskstat.name + , NULL + , diskstat.name + , "disk.io" + , "Disk I/O Bandwidth" + , "KiB/s" + , "macos.plugin" + , "iokit" + , 2000 + , update_every + , RRDSET_TYPE_AREA + ); + + rrddim_add(st, "reads", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "writes", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + prev_diskstat.bytes_read = rrddim_set(st, "reads", diskstat.bytes_read); + prev_diskstat.bytes_write = rrddim_set(st, "writes", diskstat.bytes_write); + rrdset_done(st); + + /* Get number of reads. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsReadsKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.reads); + } + + /* Get number of writes. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsWritesKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.writes); + } + + st = rrdset_find_active_bytype_localhost("disk_ops", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk_ops" + , diskstat.name + , NULL + , diskstat.name + , "disk.ops" + , "Disk Completed I/O Operations" + , "operations/s" + , "macos.plugin" + , "iokit" + , 2001 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + prev_diskstat.operations_read = rrddim_set(st, "reads", diskstat.reads); + prev_diskstat.operations_write = rrddim_set(st, "writes", diskstat.writes); + rrdset_done(st); + + /* Get reads time. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsTotalReadTimeKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.time_read); + } + + /* Get writes time. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsTotalWriteTimeKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.time_write); + } + + st = rrdset_find_active_bytype_localhost("disk_util", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk_util" + , diskstat.name + , NULL + , diskstat.name + , "disk.util" + , "Disk Utilization Time" + , "% of time working" + , "macos.plugin" + , "iokit" + , 2004 + , update_every + , RRDSET_TYPE_AREA + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "utilization", NULL, 1, 10000000, RRD_ALGORITHM_INCREMENTAL); + } + + cur_diskstat.busy_time_ns = (diskstat.time_read + diskstat.time_write); + prev_diskstat.busy_time_ns = rrddim_set(st, "utilization", cur_diskstat.busy_time_ns); + rrdset_done(st); + + /* Get reads latency. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsLatentReadTimeKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.latency_read); + } + + /* Get writes latency. */ + if (likely(number = (CFNumberRef)CFDictionaryGetValue(statistics, CFSTR(kIOBlockStorageDriverStatisticsLatentWriteTimeKey)))) { + CFNumberGetValue(number, kCFNumberSInt64Type, &diskstat.latency_write); + } + + st = rrdset_find_active_bytype_localhost("disk_iotime", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk_iotime" + , diskstat.name + , NULL + , diskstat.name + , "disk.iotime" + , "Disk Total I/O Time" + , "milliseconds/s" + , "macos.plugin" + , "iokit" + , 2022 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "reads", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "writes", NULL, -1, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + + cur_diskstat.duration_read_ns = diskstat.time_read + diskstat.latency_read; + cur_diskstat.duration_write_ns = diskstat.time_write + diskstat.latency_write; + prev_diskstat.duration_read_ns = rrddim_set(st, "reads", cur_diskstat.duration_read_ns); + prev_diskstat.duration_write_ns = rrddim_set(st, "writes", cur_diskstat.duration_write_ns); + rrdset_done(st); + + // calculate differential charts + // only if this is not the first time we run + + if (likely(dt)) { + st = rrdset_find_active_bytype_localhost("disk_await", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk_await" + , diskstat.name + , NULL + , diskstat.name + , "disk.await" + , "Average Completed I/O Operation Time" + , "milliseconds/operation" + , "macos.plugin" + , "iokit" + , 2005 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "reads", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "writes", NULL, -1, 1000000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "reads", (diskstat.reads - prev_diskstat.operations_read) ? + (cur_diskstat.duration_read_ns - prev_diskstat.duration_read_ns) / (diskstat.reads - prev_diskstat.operations_read) : 0); + rrddim_set(st, "writes", (diskstat.writes - prev_diskstat.operations_write) ? + (cur_diskstat.duration_write_ns - prev_diskstat.duration_write_ns) / (diskstat.writes - prev_diskstat.operations_write) : 0); + rrdset_done(st); + + st = rrdset_find_active_bytype_localhost("disk_avgsz", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk_avgsz" + , diskstat.name + , NULL + , diskstat.name + , "disk.avgsz" + , "Average Completed I/O Operation Bandwidth" + , "KiB/operation" + , "macos.plugin" + , "iokit" + , 2006 + , update_every + , RRDSET_TYPE_AREA + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "reads", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "writes", NULL, -1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "reads", (diskstat.reads - prev_diskstat.operations_read) ? + (diskstat.bytes_read - prev_diskstat.bytes_read) / (diskstat.reads - prev_diskstat.operations_read) : 0); + rrddim_set(st, "writes", (diskstat.writes - prev_diskstat.operations_write) ? + (diskstat.bytes_write - prev_diskstat.bytes_write) / (diskstat.writes - prev_diskstat.operations_write) : 0); + rrdset_done(st); + + st = rrdset_find_active_bytype_localhost("disk_svctm", diskstat.name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "disk_svctm" + , diskstat.name + , NULL + , diskstat.name + , "disk.svctm" + , "Average Service Time" + , "milliseconds/operation" + , "macos.plugin" + , "iokit" + , 2007 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "svctm", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "svctm", ((diskstat.reads - prev_diskstat.operations_read) + (diskstat.writes - prev_diskstat.operations_write)) ? + (cur_diskstat.busy_time_ns - prev_diskstat.busy_time_ns) / ((diskstat.reads - prev_diskstat.operations_read) + (diskstat.writes - prev_diskstat.operations_write)) : 0); + rrdset_done(st); + } + } + + /* Release. */ + CFRelease(properties); + } + + /* Release. */ + IOObjectRelease(drive); + } + IOIteratorReset(drive_list); + + /* Release. */ + IOObjectRelease(drive_list); + } + + if (likely(do_io)) { + st = rrdset_find_active_bytype_localhost("system", "io"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "io" + , NULL + , "disk" + , NULL + , "Disk I/O" + , "KiB/s" + , "macos.plugin" + , "iokit" + , 150 + , update_every + , RRDSET_TYPE_AREA + ); + rrddim_add(st, "in", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "out", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "in", total_disk_reads); + rrddim_set(st, "out", total_disk_writes); + rrdset_done(st); + } + + // Can be merged with FreeBSD plugin + + if (likely(do_space || do_inodes)) { + // there is no mount info in sysctl MIBs + if (unlikely(!(mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)))) { + error("MACOS: getmntinfo() failed"); + do_space = 0; + error("DISABLED: disk_space.X"); + do_inodes = 0; + error("DISABLED: disk_inodes.X"); + } else { + for (i = 0; i < mntsize; i++) { + if (mntbuf[i].f_flags == MNT_RDONLY || + mntbuf[i].f_blocks == 0 || + // taken from gnulib/mountlist.c and shortened to FreeBSD related fstypes + strcmp(mntbuf[i].f_fstypename, "autofs") == 0 || + strcmp(mntbuf[i].f_fstypename, "procfs") == 0 || + strcmp(mntbuf[i].f_fstypename, "subfs") == 0 || + strcmp(mntbuf[i].f_fstypename, "devfs") == 0 || + strcmp(mntbuf[i].f_fstypename, "none") == 0) + continue; + + // -------------------------------------------------------------------------- + + if (likely(do_space)) { + st = rrdset_find_active_bytype_localhost("disk_space", mntbuf[i].f_mntonname); + if (unlikely(!st)) { + snprintfz(title, 4096, "Disk Space Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); + st = rrdset_create_localhost( + "disk_space" + , mntbuf[i].f_mntonname + , NULL + , mntbuf[i].f_mntonname + , "disk.space" + , title + , "GiB" + , "macos.plugin" + , "iokit" + , 2023 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrddim_add(st, "avail", NULL, mntbuf[i].f_bsize, GIGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "used", NULL, mntbuf[i].f_bsize, GIGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "reserved_for_root", "reserved for root", mntbuf[i].f_bsize, GIGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "avail", (collected_number) mntbuf[i].f_bavail); + rrddim_set(st, "used", (collected_number) (mntbuf[i].f_blocks - mntbuf[i].f_bfree)); + rrddim_set(st, "reserved_for_root", (collected_number) (mntbuf[i].f_bfree - mntbuf[i].f_bavail)); + rrdset_done(st); + } + + // -------------------------------------------------------------------------- + + if (likely(do_inodes)) { + st = rrdset_find_active_bytype_localhost("disk_inodes", mntbuf[i].f_mntonname); + if (unlikely(!st)) { + snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); + st = rrdset_create_localhost( + "disk_inodes" + , mntbuf[i].f_mntonname + , NULL + , mntbuf[i].f_mntonname + , "disk.inodes" + , title + , "inodes" + , "macos.plugin" + , "iokit" + , 2024 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrddim_add(st, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "reserved_for_root", "reserved for root", 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "avail", (collected_number) mntbuf[i].f_ffree); + rrddim_set(st, "used", (collected_number) (mntbuf[i].f_files - mntbuf[i].f_ffree)); + rrdset_done(st); + } + } + } + } + + // Can be merged with FreeBSD plugin + + if (likely(do_bandwidth)) { + if (unlikely(getifaddrs(&ifap))) { + error("MACOS: getifaddrs()"); + do_bandwidth = 0; + error("DISABLED: system.ipv4"); + } else { + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + + st = rrdset_find_active_bytype_localhost("net", ifa->ifa_name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "net" + , ifa->ifa_name + , NULL + , ifa->ifa_name + , "net.net" + , "Bandwidth" + , "kilobits/s" + , "macos.plugin" + , "iokit" + , 7000 + , update_every + , RRDSET_TYPE_AREA + ); + + rrddim_add(st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "received", IFA_DATA(ibytes)); + rrddim_set(st, "sent", IFA_DATA(obytes)); + rrdset_done(st); + + st = rrdset_find_active_bytype_localhost("net_packets", ifa->ifa_name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "net_packets" + , ifa->ifa_name + , NULL + , ifa->ifa_name + , "net.packets" + , "Packets" + , "packets/s" + , "macos.plugin" + , "iokit" + , 7001 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "multicast_received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "multicast_sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "received", IFA_DATA(ipackets)); + rrddim_set(st, "sent", IFA_DATA(opackets)); + rrddim_set(st, "multicast_received", IFA_DATA(imcasts)); + rrddim_set(st, "multicast_sent", IFA_DATA(omcasts)); + rrdset_done(st); + + st = rrdset_find_active_bytype_localhost("net_errors", ifa->ifa_name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "net_errors" + , ifa->ifa_name + , NULL + , ifa->ifa_name + , "net.errors" + , "Interface Errors" + , "errors/s" + , "macos.plugin" + , "iokit" + , 7002 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "inbound", IFA_DATA(ierrors)); + rrddim_set(st, "outbound", IFA_DATA(oerrors)); + rrdset_done(st); + + st = rrdset_find_active_bytype_localhost("net_drops", ifa->ifa_name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "net_drops" + , ifa->ifa_name + , NULL + , ifa->ifa_name + , "net.drops" + , "Interface Drops" + , "drops/s" + , "macos.plugin" + , "iokit" + , 7003 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "inbound", IFA_DATA(iqdrops)); + rrdset_done(st); + + st = rrdset_find_active_bytype_localhost("net_events", ifa->ifa_name); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "net_events" + , ifa->ifa_name + , NULL + , ifa->ifa_name + , "net.events" + , "Network Interface Events" + , "events/s" + , "macos.plugin" + , "iokit" + , 7006 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "frames", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "carrier", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "collisions", IFA_DATA(collisions)); + rrdset_done(st); + } + + freeifaddrs(ifap); + } + } + + return 0; +} diff --git a/collectors/macos.plugin/macos_mach_smi.c b/collectors/macos.plugin/macos_mach_smi.c new file mode 100644 index 0000000..53b2607 --- /dev/null +++ b/collectors/macos.plugin/macos_mach_smi.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_macos.h" + +#include + +int do_macos_mach_smi(int update_every, usec_t dt) { + (void)dt; + + static int do_cpu = -1, do_ram = - 1, do_swapio = -1, do_pgfaults = -1; + + if (unlikely(do_cpu == -1)) { + do_cpu = config_get_boolean("plugin:macos:mach_smi", "cpu utilization", 1); + do_ram = config_get_boolean("plugin:macos:mach_smi", "system ram", 1); + do_swapio = config_get_boolean("plugin:macos:mach_smi", "swap i/o", 1); + do_pgfaults = config_get_boolean("plugin:macos:mach_smi", "memory page faults", 1); + } + + RRDSET *st; + + kern_return_t kr; + mach_msg_type_number_t count; + host_t host; + vm_size_t system_pagesize; + + + // NEEDED BY: do_cpu + natural_t cp_time[CPU_STATE_MAX]; + + // NEEDED BY: do_ram, do_swapio, do_pgfaults +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1060) + vm_statistics64_data_t vm_statistics; +#else + vm_statistics_data_t vm_statistics; +#endif + + host = mach_host_self(); + kr = host_page_size(host, &system_pagesize); + if (unlikely(kr != KERN_SUCCESS)) + return -1; + + if (likely(do_cpu)) { + if (unlikely(HOST_CPU_LOAD_INFO_COUNT != 4)) { + error("MACOS: There are %d CPU states (4 was expected)", HOST_CPU_LOAD_INFO_COUNT); + do_cpu = 0; + error("DISABLED: system.cpu"); + } else { + count = HOST_CPU_LOAD_INFO_COUNT; + kr = host_statistics(host, HOST_CPU_LOAD_INFO, (host_info_t)cp_time, &count); + if (unlikely(kr != KERN_SUCCESS)) { + error("MACOS: host_statistics() failed: %s", mach_error_string(kr)); + do_cpu = 0; + error("DISABLED: system.cpu"); + } else { + + st = rrdset_find_active_bytype_localhost("system", "cpu"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "cpu" + , NULL + , "cpu" + , "system.cpu" + , "Total CPU utilization" + , "percentage" + , "macos.plugin" + , "mach_smi" + , 100 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrddim_add(st, "user", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_add(st, "nice", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_add(st, "system", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_add(st, "idle", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_hide(st, "idle"); + } + + rrddim_set(st, "user", cp_time[CPU_STATE_USER]); + rrddim_set(st, "nice", cp_time[CPU_STATE_NICE]); + rrddim_set(st, "system", cp_time[CPU_STATE_SYSTEM]); + rrddim_set(st, "idle", cp_time[CPU_STATE_IDLE]); + rrdset_done(st); + } + } + } + + if (likely(do_ram || do_swapio || do_pgfaults)) { +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1060) + count = sizeof(vm_statistics64_data_t); + kr = host_statistics64(host, HOST_VM_INFO64, (host_info64_t)&vm_statistics, &count); +#else + count = sizeof(vm_statistics_data_t); + kr = host_statistics(host, HOST_VM_INFO, (host_info_t)&vm_statistics, &count); +#endif + if (unlikely(kr != KERN_SUCCESS)) { + error("MACOS: host_statistics64() failed: %s", mach_error_string(kr)); + do_ram = 0; + error("DISABLED: system.ram"); + do_swapio = 0; + error("DISABLED: system.swapio"); + do_pgfaults = 0; + error("DISABLED: mem.pgfaults"); + } else { + if (likely(do_ram)) { + st = rrdset_find_active_localhost("system.ram"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "ram" + , NULL + , "ram" + , NULL + , "System RAM" + , "MiB" + , "macos.plugin" + , "mach_smi" + , 200 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrddim_add(st, "active", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "wired", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + rrddim_add(st, "throttled", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "compressor", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); +#endif + rrddim_add(st, "inactive", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "purgeable", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "speculative", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "free", NULL, system_pagesize, 1048576, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "active", vm_statistics.active_count); + rrddim_set(st, "wired", vm_statistics.wire_count); +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + rrddim_set(st, "throttled", vm_statistics.throttled_count); + rrddim_set(st, "compressor", vm_statistics.compressor_page_count); +#endif + rrddim_set(st, "inactive", vm_statistics.inactive_count); + rrddim_set(st, "purgeable", vm_statistics.purgeable_count); + rrddim_set(st, "speculative", vm_statistics.speculative_count); + rrddim_set(st, "free", (vm_statistics.free_count - vm_statistics.speculative_count)); + rrdset_done(st); + } + +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + if (likely(do_swapio)) { + st = rrdset_find_active_localhost("system.swapio"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "swapio" + , NULL + , "swap" + , NULL + , "Swap I/O" + , "KiB/s" + , "macos.plugin" + , "mach_smi" + , 250 + , update_every + , RRDSET_TYPE_AREA + ); + + rrddim_add(st, "in", NULL, system_pagesize, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "out", NULL, -system_pagesize, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "in", vm_statistics.swapins); + rrddim_set(st, "out", vm_statistics.swapouts); + rrdset_done(st); + } +#endif + + if (likely(do_pgfaults)) { + st = rrdset_find_active_localhost("mem.pgfaults"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "mem" + , "pgfaults" + , NULL + , "system" + , NULL + , "Memory Page Faults" + , "faults/s" + , "macos.plugin" + , "mach_smi" + , NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "memory", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "cow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "pagein", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "pageout", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + rrddim_add(st, "compress", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "decompress", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif + rrddim_add(st, "zero_fill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "reactivate", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "purge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "memory", vm_statistics.faults); + rrddim_set(st, "cow", vm_statistics.cow_faults); + rrddim_set(st, "pagein", vm_statistics.pageins); + rrddim_set(st, "pageout", vm_statistics.pageouts); +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + rrddim_set(st, "compress", vm_statistics.compressions); + rrddim_set(st, "decompress", vm_statistics.decompressions); +#endif + rrddim_set(st, "zero_fill", vm_statistics.zero_fill_count); + rrddim_set(st, "reactivate", vm_statistics.reactivations); + rrddim_set(st, "purge", vm_statistics.purges); + rrdset_done(st); + } + } + } + + return 0; +} diff --git a/collectors/macos.plugin/macos_sysctl.c b/collectors/macos.plugin/macos_sysctl.c new file mode 100644 index 0000000..1f04f6e --- /dev/null +++ b/collectors/macos.plugin/macos_sysctl.c @@ -0,0 +1,1424 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_macos.h" + +#include +// NEEDED BY: do_bandwidth +#include +// NEEDED BY do_tcp... +#include +#include +#include +// NEEDED BY do_udp..., do_ip... +#include +// NEEDED BY do_udp... +#include +#include +// NEEDED BY do_icmp... +#include +#include +#include +// NEEDED BY do_icmp6... +#include +// NEEDED BY do_uptime +#include + +// MacOS calculates load averages once every 5 seconds +#define MIN_LOADAVG_UPDATE_EVERY 5 + +int do_macos_sysctl(int update_every, usec_t dt) { + static int do_loadavg = -1, do_swap = -1, do_bandwidth = -1, + do_tcp_packets = -1, do_tcp_errors = -1, do_tcp_handshake = -1, do_ecn = -1, + do_tcpext_syscookies = -1, do_tcpext_ofo = -1, do_tcpext_connaborts = -1, + do_udp_packets = -1, do_udp_errors = -1, do_icmp_packets = -1, do_icmpmsg = -1, + do_ip_packets = -1, do_ip_fragsout = -1, do_ip_fragsin = -1, do_ip_errors = -1, + do_ip6_packets = -1, do_ip6_fragsout = -1, do_ip6_fragsin = -1, do_ip6_errors = -1, + do_icmp6 = -1, do_icmp6_redir = -1, do_icmp6_errors = -1, do_icmp6_echos = -1, + do_icmp6_router = -1, do_icmp6_neighbor = -1, do_icmp6_types = -1, do_uptime = -1; + + + if (unlikely(do_loadavg == -1)) { + do_loadavg = config_get_boolean("plugin:macos:sysctl", "enable load average", 1); + do_swap = config_get_boolean("plugin:macos:sysctl", "system swap", 1); + do_bandwidth = config_get_boolean("plugin:macos:sysctl", "bandwidth", 1); + do_tcp_packets = config_get_boolean("plugin:macos:sysctl", "ipv4 TCP packets", 1); + do_tcp_errors = config_get_boolean("plugin:macos:sysctl", "ipv4 TCP errors", 1); + do_tcp_handshake = config_get_boolean("plugin:macos:sysctl", "ipv4 TCP handshake issues", 1); + do_ecn = config_get_boolean_ondemand("plugin:macos:sysctl", "ECN packets", CONFIG_BOOLEAN_AUTO); + do_tcpext_syscookies = config_get_boolean_ondemand("plugin:macos:sysctl", "TCP SYN cookies", CONFIG_BOOLEAN_AUTO); + do_tcpext_ofo = config_get_boolean_ondemand("plugin:macos:sysctl", "TCP out-of-order queue", CONFIG_BOOLEAN_AUTO); + do_tcpext_connaborts = config_get_boolean_ondemand("plugin:macos:sysctl", "TCP connection aborts", CONFIG_BOOLEAN_AUTO); + do_udp_packets = config_get_boolean("plugin:macos:sysctl", "ipv4 UDP packets", 1); + do_udp_errors = config_get_boolean("plugin:macos:sysctl", "ipv4 UDP errors", 1); + do_icmp_packets = config_get_boolean("plugin:macos:sysctl", "ipv4 ICMP packets", 1); + do_icmpmsg = config_get_boolean("plugin:macos:sysctl", "ipv4 ICMP messages", 1); + do_ip_packets = config_get_boolean("plugin:macos:sysctl", "ipv4 packets", 1); + do_ip_fragsout = config_get_boolean("plugin:macos:sysctl", "ipv4 fragments sent", 1); + do_ip_fragsin = config_get_boolean("plugin:macos:sysctl", "ipv4 fragments assembly", 1); + do_ip_errors = config_get_boolean("plugin:macos:sysctl", "ipv4 errors", 1); + do_ip6_packets = config_get_boolean_ondemand("plugin:macos:sysctl", "ipv6 packets", CONFIG_BOOLEAN_AUTO); + do_ip6_fragsout = config_get_boolean_ondemand("plugin:macos:sysctl", "ipv6 fragments sent", CONFIG_BOOLEAN_AUTO); + do_ip6_fragsin = config_get_boolean_ondemand("plugin:macos:sysctl", "ipv6 fragments assembly", CONFIG_BOOLEAN_AUTO); + do_ip6_errors = config_get_boolean_ondemand("plugin:macos:sysctl", "ipv6 errors", CONFIG_BOOLEAN_AUTO); + do_icmp6 = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp", CONFIG_BOOLEAN_AUTO); + do_icmp6_redir = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp redirects", CONFIG_BOOLEAN_AUTO); + do_icmp6_errors = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp errors", CONFIG_BOOLEAN_AUTO); + do_icmp6_echos = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp echos", CONFIG_BOOLEAN_AUTO); + do_icmp6_router = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp router", CONFIG_BOOLEAN_AUTO); + do_icmp6_neighbor = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp neighbor", CONFIG_BOOLEAN_AUTO); + do_icmp6_types = config_get_boolean_ondemand("plugin:macos:sysctl", "icmp types", CONFIG_BOOLEAN_AUTO); + do_uptime = config_get_boolean("plugin:macos:sysctl", "system uptime", 1); + } + + RRDSET *st = NULL; + + int i; + size_t size; + + // NEEDED BY: do_loadavg + static usec_t next_loadavg_dt = 0; + struct loadavg sysload; + + // NEEDED BY: do_swap + struct xsw_usage swap_usage; + + // NEEDED BY: do_bandwidth + int mib[6]; + static char *ifstatdata = NULL; + char *lim, *next; + struct if_msghdr *ifm; + struct iftot { + u_long ift_ibytes; + u_long ift_obytes; + } iftot = {0, 0}; + + // NEEDED BY: do_tcp... + struct tcpstat tcpstat; + + // NEEDED BY: do_udp... + struct udpstat udpstat; + + // NEEDED BY: do_icmp... + struct icmpstat icmpstat; + struct icmp_total { + u_long msgs_in; + u_long msgs_out; + } icmp_total = {0, 0}; + + // NEEDED BY: do_ip... + struct ipstat ipstat; + + // NEEDED BY: do_ip6... + /* + * Dirty workaround for /usr/include/netinet6/ip6_var.h absence. + * Struct ip6stat was copied from bsd/netinet6/ip6_var.h from xnu sources. + * Do the same for previously missing scope6_var.h on OS X < 10.11. + */ +#define IP6S_SRCRULE_COUNT 16 + +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED < 101100) +#ifndef _NETINET6_SCOPE6_VAR_H_ +#define _NETINET6_SCOPE6_VAR_H_ +#include + +#define SCOPE6_ID_MAX 16 +#endif +#else +#include +#endif + + struct ip6stat { + u_quad_t ip6s_total; /* total packets received */ + u_quad_t ip6s_tooshort; /* packet too short */ + u_quad_t ip6s_toosmall; /* not enough data */ + u_quad_t ip6s_fragments; /* fragments received */ + u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ + u_quad_t ip6s_fragtimeout; /* fragments timed out */ + u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */ + u_quad_t ip6s_forward; /* packets forwarded */ + u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */ + u_quad_t ip6s_redirectsent; /* packets forwarded on same net */ + u_quad_t ip6s_delivered; /* datagrams delivered to upper level */ + u_quad_t ip6s_localout; /* total ip packets generated here */ + u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */ + u_quad_t ip6s_reassembled; /* total packets reassembled ok */ + u_quad_t ip6s_atmfrag_rcvd; /* atomic fragments received */ + u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */ + u_quad_t ip6s_ofragments; /* output fragments created */ + u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ + u_quad_t ip6s_badoptions; /* error in option processing */ + u_quad_t ip6s_noroute; /* packets discarded due to no route */ + u_quad_t ip6s_badvers; /* ip6 version != 6 */ + u_quad_t ip6s_rawout; /* total raw ip packets generated */ + u_quad_t ip6s_badscope; /* scope error */ + u_quad_t ip6s_notmember; /* don't join this multicast group */ + u_quad_t ip6s_nxthist[256]; /* next header history */ + u_quad_t ip6s_m1; /* one mbuf */ + u_quad_t ip6s_m2m[32]; /* two or more mbuf */ + u_quad_t ip6s_mext1; /* one ext mbuf */ + u_quad_t ip6s_mext2m; /* two or more ext mbuf */ + u_quad_t ip6s_exthdrtoolong; /* ext hdr are not continuous */ + u_quad_t ip6s_nogif; /* no match gif found */ + u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */ + + /* + * statistics for improvement of the source address selection + * algorithm: + */ + /* number of times that address selection fails */ + u_quad_t ip6s_sources_none; + /* number of times that an address on the outgoing I/F is chosen */ + u_quad_t ip6s_sources_sameif[SCOPE6_ID_MAX]; + /* number of times that an address on a non-outgoing I/F is chosen */ + u_quad_t ip6s_sources_otherif[SCOPE6_ID_MAX]; + /* + * number of times that an address that has the same scope + * from the destination is chosen. + */ + u_quad_t ip6s_sources_samescope[SCOPE6_ID_MAX]; + /* + * number of times that an address that has a different scope + * from the destination is chosen. + */ + u_quad_t ip6s_sources_otherscope[SCOPE6_ID_MAX]; + /* number of times that a deprecated address is chosen */ + u_quad_t ip6s_sources_deprecated[SCOPE6_ID_MAX]; + + u_quad_t ip6s_forward_cachehit; + u_quad_t ip6s_forward_cachemiss; + + /* number of times that each rule of source selection is applied. */ + u_quad_t ip6s_sources_rule[IP6S_SRCRULE_COUNT]; + + /* number of times we ignored address on expensive secondary interfaces */ + u_quad_t ip6s_sources_skip_expensive_secondary_if; + + /* pkt dropped, no mbufs for control data */ + u_quad_t ip6s_pktdropcntrl; + + /* total packets trimmed/adjusted */ + u_quad_t ip6s_adj; + /* hwcksum info discarded during adjustment */ + u_quad_t ip6s_adj_hwcsum_clr; + + /* duplicate address detection collisions */ + u_quad_t ip6s_dad_collide; + + /* DAD NS looped back */ + u_quad_t ip6s_dad_loopcount; + } ip6stat; + + // NEEDED BY: do_icmp6... + struct icmp6stat icmp6stat; + struct icmp6_total { + u_long msgs_in; + u_long msgs_out; + } icmp6_total = {0, 0}; + + // NEEDED BY: do_uptime + struct timespec boot_time, cur_time; + + if (next_loadavg_dt <= dt) { + if (likely(do_loadavg)) { + if (unlikely(GETSYSCTL_BY_NAME("vm.loadavg", sysload))) { + do_loadavg = 0; + error("DISABLED: system.load"); + } else { + + st = rrdset_find_active_bytype_localhost("system", "load"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "load" + , NULL + , "load" + , NULL + , "System Load Average" + , "load" + , "macos.plugin" + , "sysctl" + , 100 + , (update_every < MIN_LOADAVG_UPDATE_EVERY) ? MIN_LOADAVG_UPDATE_EVERY : update_every + , RRDSET_TYPE_LINE + ); + rrddim_add(st, "load1", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "load5", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "load15", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "load1", (collected_number) ((double)sysload.ldavg[0] / sysload.fscale * 1000)); + rrddim_set(st, "load5", (collected_number) ((double)sysload.ldavg[1] / sysload.fscale * 1000)); + rrddim_set(st, "load15", (collected_number) ((double)sysload.ldavg[2] / sysload.fscale * 1000)); + rrdset_done(st); + } + } + + next_loadavg_dt = st->update_every * USEC_PER_SEC; + } + else next_loadavg_dt -= dt; + + if (likely(do_swap)) { + if (unlikely(GETSYSCTL_BY_NAME("vm.swapusage", swap_usage))) { + do_swap = 0; + error("DISABLED: system.swap"); + } else { + st = rrdset_find_active_localhost("system.swap"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "swap" + , NULL + , "swap" + , NULL + , "System Swap" + , "MiB" + , "macos.plugin" + , "sysctl" + , 201 + , update_every + , RRDSET_TYPE_STACKED + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "free", NULL, 1, 1048576, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(st, "used", NULL, 1, 1048576, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "free", swap_usage.xsu_avail); + rrddim_set(st, "used", swap_usage.xsu_used); + rrdset_done(st); + } + } + + if (likely(do_bandwidth)) { + mib[0] = CTL_NET; + mib[1] = PF_ROUTE; + mib[2] = 0; + mib[3] = AF_INET; + mib[4] = NET_RT_IFLIST2; + mib[5] = 0; + if (unlikely(sysctl(mib, 6, NULL, &size, NULL, 0))) { + error("MACOS: sysctl(%s...) failed: %s", "net interfaces", strerror(errno)); + do_bandwidth = 0; + error("DISABLED: system.ipv4"); + } else { + ifstatdata = reallocz(ifstatdata, size); + if (unlikely(sysctl(mib, 6, ifstatdata, &size, NULL, 0) < 0)) { + error("MACOS: sysctl(%s...) failed: %s", "net interfaces", strerror(errno)); + do_bandwidth = 0; + error("DISABLED: system.ipv4"); + } else { + lim = ifstatdata + size; + iftot.ift_ibytes = iftot.ift_obytes = 0; + for (next = ifstatdata; next < lim; ) { + ifm = (struct if_msghdr *)next; + next += ifm->ifm_msglen; + + if (ifm->ifm_type == RTM_IFINFO2) { + struct if_msghdr2 *if2m = (struct if_msghdr2 *)ifm; + + iftot.ift_ibytes += if2m->ifm_data.ifi_ibytes; + iftot.ift_obytes += if2m->ifm_data.ifi_obytes; + } + } + st = rrdset_find_active_localhost("system.ipv4"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "ipv4" + , NULL + , "network" + , NULL + , "IPv4 Bandwidth" + , "kilobits/s" + , "macos.plugin" + , "sysctl" + , 500 + , update_every + , RRDSET_TYPE_AREA + ); + + rrddim_add(st, "InOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InOctets", iftot.ift_ibytes); + rrddim_set(st, "OutOctets", iftot.ift_obytes); + rrdset_done(st); + } + } + } + + // see http://net-snmp.sourceforge.net/docs/mibs/tcp.html + if (likely(do_tcp_packets || do_tcp_errors || do_tcp_handshake || do_tcpext_connaborts || do_tcpext_ofo || do_tcpext_syscookies || do_ecn)) { + if (unlikely(GETSYSCTL_BY_NAME("net.inet.tcp.stats", tcpstat))){ + do_tcp_packets = 0; + error("DISABLED: ipv4.tcppackets"); + do_tcp_errors = 0; + error("DISABLED: ipv4.tcperrors"); + do_tcp_handshake = 0; + error("DISABLED: ipv4.tcphandshake"); + do_tcpext_connaborts = 0; + error("DISABLED: ipv4.tcpconnaborts"); + do_tcpext_ofo = 0; + error("DISABLED: ipv4.tcpofo"); + do_tcpext_syscookies = 0; + error("DISABLED: ipv4.tcpsyncookies"); + do_ecn = 0; + error("DISABLED: ipv4.ecnpkts"); + } else { + if (likely(do_tcp_packets)) { + st = rrdset_find_active_localhost("ipv4.tcppackets"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "tcppackets" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Packets" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 2600 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InSegs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutSegs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InSegs", tcpstat.tcps_rcvtotal); + rrddim_set(st, "OutSegs", tcpstat.tcps_sndtotal); + rrdset_done(st); + } + + if (likely(do_tcp_errors)) { + st = rrdset_find_active_localhost("ipv4.tcperrors"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "tcperrors" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Errors" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 2700 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "InErrs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "RetransSegs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InErrs", tcpstat.tcps_rcvbadoff + tcpstat.tcps_rcvshort); + rrddim_set(st, "InCsumErrors", tcpstat.tcps_rcvbadsum); + rrddim_set(st, "RetransSegs", tcpstat.tcps_sndrexmitpack); + rrdset_done(st); + } + + if (likely(do_tcp_handshake)) { + st = rrdset_find_active_localhost("ipv4.tcphandshake"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "tcphandshake" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Handshake Issues" + , "events/s" + , "macos.plugin" + , "sysctl" + , 2900 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "EstabResets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "ActiveOpens", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "PassiveOpens", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "AttemptFails", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "EstabResets", tcpstat.tcps_drops); + rrddim_set(st, "ActiveOpens", tcpstat.tcps_connattempt); + rrddim_set(st, "PassiveOpens", tcpstat.tcps_accepts); + rrddim_set(st, "AttemptFails", tcpstat.tcps_conndrops); + rrdset_done(st); + } + + if (do_tcpext_connaborts == CONFIG_BOOLEAN_YES || (do_tcpext_connaborts == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_rcvpackafterwin || + tcpstat.tcps_rcvafterclose || + tcpstat.tcps_rcvmemdrop || + tcpstat.tcps_persistdrop || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_connaborts = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv4.tcpconnaborts"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "tcpconnaborts" + , NULL + , "tcp" + , NULL + , "TCP Connection Aborts" + , "connections/s" + , "macos.plugin" + , "sysctl" + , 3010 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "TCPAbortOnData", "baddata", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnClose", "userclosed", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnMemory", "nomemory", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnTimeout", "timeout", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "TCPAbortOnData", tcpstat.tcps_rcvpackafterwin); + rrddim_set(st, "TCPAbortOnClose", tcpstat.tcps_rcvafterclose); + rrddim_set(st, "TCPAbortOnMemory", tcpstat.tcps_rcvmemdrop); + rrddim_set(st, "TCPAbortOnTimeout", tcpstat.tcps_persistdrop); + rrdset_done(st); + } + + if (do_tcpext_ofo == CONFIG_BOOLEAN_YES || (do_tcpext_ofo == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_rcvoopack || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_ofo = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv4.tcpofo"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "tcpofo" + , NULL + , "tcp" + , NULL + , "TCP Out-Of-Order Queue" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3050 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "TCPOFOQueue", "inqueue", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "TCPOFOQueue", tcpstat.tcps_rcvoopack); + rrdset_done(st); + } + + if (do_tcpext_syscookies == CONFIG_BOOLEAN_YES || (do_tcpext_syscookies == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_sc_sendcookie || + tcpstat.tcps_sc_recvcookie || + tcpstat.tcps_sc_zonefail || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_syscookies = CONFIG_BOOLEAN_YES; + + st = rrdset_find_active_localhost("ipv4.tcpsyncookies"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "tcpsyncookies" + , NULL + , "tcp" + , NULL + , "TCP SYN Cookies" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3100 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "SyncookiesRecv", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "SyncookiesSent", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "SyncookiesFailed", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "SyncookiesRecv", tcpstat.tcps_sc_recvcookie); + rrddim_set(st, "SyncookiesSent", tcpstat.tcps_sc_sendcookie); + rrddim_set(st, "SyncookiesFailed", tcpstat.tcps_sc_zonefail); + rrdset_done(st); + } + +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101100) + if (do_ecn == CONFIG_BOOLEAN_YES || (do_ecn == CONFIG_BOOLEAN_AUTO && + (tcpstat.tcps_ecn_recv_ce || + tcpstat.tcps_ecn_not_supported || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ecn = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv4.ecnpkts"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "ecnpkts" + , NULL + , "ecn" + , NULL + , "IPv4 ECN Statistics" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 8700 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "InCEPkts", "CEP", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InNoECTPkts", "NoECTP", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InCEPkts", tcpstat.tcps_ecn_recv_ce); + rrddim_set(st, "InNoECTPkts", tcpstat.tcps_ecn_not_supported); + rrdset_done(st); + } +#endif + + } + } + + // see http://net-snmp.sourceforge.net/docs/mibs/udp.html + if (likely(do_udp_packets || do_udp_errors)) { + if (unlikely(GETSYSCTL_BY_NAME("net.inet.udp.stats", udpstat))) { + do_udp_packets = 0; + error("DISABLED: ipv4.udppackets"); + do_udp_errors = 0; + error("DISABLED: ipv4.udperrors"); + } else { + if (likely(do_udp_packets)) { + st = rrdset_find_active_localhost("ipv4.udppackets"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "udppackets" + , NULL + , "udp" + , NULL + , "IPv4 UDP Packets" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 2601 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InDatagrams", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InDatagrams", udpstat.udps_ipackets); + rrddim_set(st, "OutDatagrams", udpstat.udps_opackets); + rrdset_done(st); + } + + if (likely(do_udp_errors)) { + st = rrdset_find_active_localhost("ipv4.udperrors"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "udperrors" + , NULL + , "udp" + , NULL + , "IPv4 UDP Errors" + , "events/s" + , "macos.plugin" + , "sysctl" + , 2701 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "NoPorts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif + } + + rrddim_set(st, "InErrors", udpstat.udps_hdrops + udpstat.udps_badlen); + rrddim_set(st, "NoPorts", udpstat.udps_noport); + rrddim_set(st, "RcvbufErrors", udpstat.udps_fullsock); +#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) + rrddim_set(st, "InCsumErrors", udpstat.udps_badsum + udpstat.udps_nosum); + rrddim_set(st, "IgnoredMulti", udpstat.udps_filtermcast); +#else + rrddim_set(st, "InCsumErrors", udpstat.udps_badsum); +#endif + rrdset_done(st); + } + } + } + + if (likely(do_icmp_packets || do_icmpmsg)) { + if (unlikely(GETSYSCTL_BY_NAME("net.inet.icmp.stats", icmpstat))) { + do_icmp_packets = 0; + error("DISABLED: ipv4.icmp"); + error("DISABLED: ipv4.icmp_errors"); + do_icmpmsg = 0; + error("DISABLED: ipv4.icmpmsg"); + } else { + for (i = 0; i <= ICMP_MAXTYPE; i++) { + icmp_total.msgs_in += icmpstat.icps_inhist[i]; + icmp_total.msgs_out += icmpstat.icps_outhist[i]; + } + icmp_total.msgs_in += icmpstat.icps_badcode + icmpstat.icps_badlen + icmpstat.icps_checksum + icmpstat.icps_tooshort; + + // -------------------------------------------------------------------- + + if (likely(do_icmp_packets)) { + st = rrdset_find_active_localhost("ipv4.icmp"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "icmp" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Packets" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 2602 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InMsgs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutMsgs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InMsgs", icmp_total.msgs_in); + rrddim_set(st, "OutMsgs", icmp_total.msgs_out); + rrdset_done(st); + + st = rrdset_find_active_localhost("ipv4.icmp_errors"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "icmp_errors" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Errors" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 2603 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InErrors", icmpstat.icps_badcode + icmpstat.icps_badlen + icmpstat.icps_checksum + icmpstat.icps_tooshort); + rrddim_set(st, "OutErrors", icmpstat.icps_error); + rrddim_set(st, "InCsumErrors", icmpstat.icps_checksum); + rrdset_done(st); + } + + if (likely(do_icmpmsg)) { + st = rrdset_find_active_localhost("ipv4.icmpmsg"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "icmpmsg" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Messages" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 2604 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InEchoReps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutEchoReps", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InEchos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutEchos", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InEchoReps", icmpstat.icps_inhist[ICMP_ECHOREPLY]); + rrddim_set(st, "OutEchoReps", icmpstat.icps_outhist[ICMP_ECHOREPLY]); + rrddim_set(st, "InEchos", icmpstat.icps_inhist[ICMP_ECHO]); + rrddim_set(st, "OutEchos", icmpstat.icps_outhist[ICMP_ECHO]); + rrdset_done(st); + } + } + } + + // see also http://net-snmp.sourceforge.net/docs/mibs/ip.html + if (likely(do_ip_packets || do_ip_fragsout || do_ip_fragsin || do_ip_errors)) { + if (unlikely(GETSYSCTL_BY_NAME("net.inet.ip.stats", ipstat))) { + do_ip_packets = 0; + error("DISABLED: ipv4.packets"); + do_ip_fragsout = 0; + error("DISABLED: ipv4.fragsout"); + do_ip_fragsin = 0; + error("DISABLED: ipv4.fragsin"); + do_ip_errors = 0; + error("DISABLED: ipv4.errors"); + } else { + if (likely(do_ip_packets)) { + st = rrdset_find_active_localhost("ipv4.packets"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "packets" + , NULL + , "packets" + , NULL + , "IPv4 Packets" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3000 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InReceives", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutRequests", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "ForwDatagrams", "forwarded", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InDelivers", "delivered", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "OutRequests", ipstat.ips_localout); + rrddim_set(st, "InReceives", ipstat.ips_total); + rrddim_set(st, "ForwDatagrams", ipstat.ips_forward); + rrddim_set(st, "InDelivers", ipstat.ips_delivered); + rrdset_done(st); + } + + if (likely(do_ip_fragsout)) { + st = rrdset_find_active_localhost("ipv4.fragsout"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "fragsout" + , NULL + , "fragments" + , NULL + , "IPv4 Fragments Sent" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3010 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "FragOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "FragFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "FragCreates", "created", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "FragOKs", ipstat.ips_fragmented); + rrddim_set(st, "FragFails", ipstat.ips_cantfrag); + rrddim_set(st, "FragCreates", ipstat.ips_ofragments); + rrdset_done(st); + } + + if (likely(do_ip_fragsin)) { + st = rrdset_find_active_localhost("ipv4.fragsin"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "fragsin" + , NULL + , "fragments" + , NULL + , "IPv4 Fragments Reassembly" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3011 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "ReasmOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "ReasmFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "ReasmReqds", "all", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "ReasmOKs", ipstat.ips_fragments); + rrddim_set(st, "ReasmFails", ipstat.ips_fragdropped); + rrddim_set(st, "ReasmReqds", ipstat.ips_reassembled); + rrdset_done(st); + } + + if (likely(do_ip_errors)) { + st = rrdset_find_active_localhost("ipv4.errors"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "errors" + , NULL + , "errors" + , NULL + , "IPv4 Errors" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3002 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InDiscards", ipstat.ips_badsum + ipstat.ips_tooshort + ipstat.ips_toosmall + ipstat.ips_toolong); + rrddim_set(st, "OutDiscards", ipstat.ips_odropped); + rrddim_set(st, "InHdrErrors", ipstat.ips_badhlen + ipstat.ips_badlen + ipstat.ips_badoptions + ipstat.ips_badvers); + rrddim_set(st, "InAddrErrors", ipstat.ips_badaddr); + rrddim_set(st, "InUnknownProtos", ipstat.ips_noproto); + rrddim_set(st, "OutNoRoutes", ipstat.ips_noroute); + rrdset_done(st); + } + } + } + + if (likely(do_ip6_packets || do_ip6_fragsout || do_ip6_fragsin || do_ip6_errors)) { + if (unlikely(GETSYSCTL_BY_NAME("net.inet6.ip6.stats", ip6stat))) { + do_ip6_packets = 0; + error("DISABLED: ipv6.packets"); + do_ip6_fragsout = 0; + error("DISABLED: ipv6.fragsout"); + do_ip6_fragsin = 0; + error("DISABLED: ipv6.fragsin"); + do_ip6_errors = 0; + error("DISABLED: ipv6.errors"); + } else { + if (do_ip6_packets == CONFIG_BOOLEAN_YES || (do_ip6_packets == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_localout || + ip6stat.ip6s_total || + ip6stat.ip6s_forward || + ip6stat.ip6s_delivered || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_packets = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.packets"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "packets" + , NULL + , "packets" + , NULL + , "IPv6 Packets" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3000 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "forwarded", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "delivers", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "sent", ip6stat.ip6s_localout); + rrddim_set(st, "received", ip6stat.ip6s_total); + rrddim_set(st, "forwarded", ip6stat.ip6s_forward); + rrddim_set(st, "delivers", ip6stat.ip6s_delivered); + rrdset_done(st); + } + + if (do_ip6_fragsout == CONFIG_BOOLEAN_YES || (do_ip6_fragsout == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_fragmented || + ip6stat.ip6s_cantfrag || + ip6stat.ip6s_ofragments || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_fragsout = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.fragsout"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "fragsout" + , NULL + , "fragments" + , NULL + , "IPv6 Fragments Sent" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3010 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "all", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "ok", ip6stat.ip6s_fragmented); + rrddim_set(st, "failed", ip6stat.ip6s_cantfrag); + rrddim_set(st, "all", ip6stat.ip6s_ofragments); + rrdset_done(st); + } + + if (do_ip6_fragsin == CONFIG_BOOLEAN_YES || (do_ip6_fragsin == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_reassembled || + ip6stat.ip6s_fragdropped || + ip6stat.ip6s_fragtimeout || + ip6stat.ip6s_fragments || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_fragsin = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.fragsin"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "fragsin" + , NULL + , "fragments" + , NULL + , "IPv6 Fragments Reassembly" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3011 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "timeout", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "all", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "ok", ip6stat.ip6s_reassembled); + rrddim_set(st, "failed", ip6stat.ip6s_fragdropped); + rrddim_set(st, "timeout", ip6stat.ip6s_fragtimeout); + rrddim_set(st, "all", ip6stat.ip6s_fragments); + rrdset_done(st); + } + + if (do_ip6_errors == CONFIG_BOOLEAN_YES || (do_ip6_errors == CONFIG_BOOLEAN_AUTO && + (ip6stat.ip6s_toosmall || + ip6stat.ip6s_odropped || + ip6stat.ip6s_badoptions || + ip6stat.ip6s_badvers || + ip6stat.ip6s_exthdrtoolong || + ip6stat.ip6s_sources_none || + ip6stat.ip6s_tooshort || + ip6stat.ip6s_cantforward || + ip6stat.ip6s_noroute || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_errors = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.errors"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "errors" + , NULL + , "errors" + , NULL + , "IPv6 Errors" + , "packets/s" + , "macos.plugin" + , "sysctl" + , 3002 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InTruncatedPkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InNoRoutes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InDiscards", ip6stat.ip6s_toosmall); + rrddim_set(st, "OutDiscards", ip6stat.ip6s_odropped); + + rrddim_set(st, "InHdrErrors", + ip6stat.ip6s_badoptions + ip6stat.ip6s_badvers + ip6stat.ip6s_exthdrtoolong); + rrddim_set(st, "InAddrErrors", ip6stat.ip6s_sources_none); + rrddim_set(st, "InTruncatedPkts", ip6stat.ip6s_tooshort); + rrddim_set(st, "InNoRoutes", ip6stat.ip6s_cantforward); + + rrddim_set(st, "OutNoRoutes", ip6stat.ip6s_noroute); + rrdset_done(st); + } + } + } + + if (likely(do_icmp6 || do_icmp6_redir || do_icmp6_errors || do_icmp6_echos || do_icmp6_router || do_icmp6_neighbor || do_icmp6_types)) { + if (unlikely(GETSYSCTL_BY_NAME("net.inet6.icmp6.stats", icmp6stat))) { + do_icmp6 = 0; + error("DISABLED: ipv6.icmp"); + } else { + for (i = 0; i <= ICMP6_MAXTYPE; i++) { + icmp6_total.msgs_in += icmp6stat.icp6s_inhist[i]; + icmp6_total.msgs_out += icmp6stat.icp6s_outhist[i]; + } + icmp6_total.msgs_in += icmp6stat.icp6s_badcode + icmp6stat.icp6s_badlen + icmp6stat.icp6s_checksum + icmp6stat.icp6s_tooshort; + if (do_icmp6 == CONFIG_BOOLEAN_YES || (do_icmp6 == CONFIG_BOOLEAN_AUTO && + (icmp6_total.msgs_in || + icmp6_total.msgs_out || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6 = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmp"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmp" + , NULL + , "icmp" + , NULL + , "IPv6 ICMP Messages" + , "messages/s" + , "macos.plugin" + , "sysctl" + , 10000 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "sent", icmp6_total.msgs_in); + rrddim_set(st, "received", icmp6_total.msgs_out); + rrdset_done(st); + } + + if (do_icmp6_redir == CONFIG_BOOLEAN_YES || (do_icmp6_redir == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ND_REDIRECT] || + icmp6stat.icp6s_outhist[ND_REDIRECT] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_redir = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmpredir"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmpredir" + , NULL + , "icmp" + , NULL + , "IPv6 ICMP Redirects" + , "redirects/s" + , "macos.plugin" + , "sysctl" + , 10050 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "sent", icmp6stat.icp6s_inhist[ND_REDIRECT]); + rrddim_set(st, "received", icmp6stat.icp6s_outhist[ND_REDIRECT]); + rrdset_done(st); + } + + if (do_icmp6_errors == CONFIG_BOOLEAN_YES || (do_icmp6_errors == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_badcode || + icmp6stat.icp6s_badlen || + icmp6stat.icp6s_checksum || + icmp6stat.icp6s_tooshort || + icmp6stat.icp6s_error || + icmp6stat.icp6s_inhist[ICMP6_DST_UNREACH] || + icmp6stat.icp6s_inhist[ICMP6_TIME_EXCEEDED] || + icmp6stat.icp6s_inhist[ICMP6_PARAM_PROB] || + icmp6stat.icp6s_outhist[ICMP6_DST_UNREACH] || + icmp6stat.icp6s_outhist[ICMP6_TIME_EXCEEDED] || + icmp6stat.icp6s_outhist[ICMP6_PARAM_PROB] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_errors = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmperrors"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmperrors" + , NULL + , "icmp" + , NULL + , "IPv6 ICMP Errors" + , "errors/s" + , "macos.plugin" + , "sysctl" + , 10100 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InDestUnreachs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InPktTooBigs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InTimeExcds", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InParmProblems", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutDestUnreachs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutTimeExcds", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutParmProblems", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InErrors", icmp6stat.icp6s_badcode + icmp6stat.icp6s_badlen + icmp6stat.icp6s_checksum + icmp6stat.icp6s_tooshort); + rrddim_set(st, "OutErrors", icmp6stat.icp6s_error); + rrddim_set(st, "InCsumErrors", icmp6stat.icp6s_checksum); + rrddim_set(st, "InDestUnreachs", icmp6stat.icp6s_inhist[ICMP6_DST_UNREACH]); + rrddim_set(st, "InPktTooBigs", icmp6stat.icp6s_badlen); + rrddim_set(st, "InTimeExcds", icmp6stat.icp6s_inhist[ICMP6_TIME_EXCEEDED]); + rrddim_set(st, "InParmProblems", icmp6stat.icp6s_inhist[ICMP6_PARAM_PROB]); + rrddim_set(st, "OutDestUnreachs", icmp6stat.icp6s_outhist[ICMP6_DST_UNREACH]); + rrddim_set(st, "OutTimeExcds", icmp6stat.icp6s_outhist[ICMP6_TIME_EXCEEDED]); + rrddim_set(st, "OutParmProblems", icmp6stat.icp6s_outhist[ICMP6_PARAM_PROB]); + rrdset_done(st); + } + + if (do_icmp6_echos == CONFIG_BOOLEAN_YES || (do_icmp6_echos == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ICMP6_ECHO_REQUEST] || + icmp6stat.icp6s_outhist[ICMP6_ECHO_REQUEST] || + icmp6stat.icp6s_inhist[ICMP6_ECHO_REPLY] || + icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_echos = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmpechos"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmpechos" + , NULL + , "icmp" + , NULL + , "IPv6 ICMP Echo" + , "messages/s" + , "macos.plugin" + , "sysctl" + , 10200 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InEchos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutEchos", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InEchoReplies", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutEchoReplies", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InEchos", icmp6stat.icp6s_inhist[ICMP6_ECHO_REQUEST]); + rrddim_set(st, "OutEchos", icmp6stat.icp6s_outhist[ICMP6_ECHO_REQUEST]); + rrddim_set(st, "InEchoReplies", icmp6stat.icp6s_inhist[ICMP6_ECHO_REPLY]); + rrddim_set(st, "OutEchoReplies", icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]); + rrdset_done(st); + } + + if (do_icmp6_router == CONFIG_BOOLEAN_YES || (do_icmp6_router == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ND_ROUTER_SOLICIT] || + icmp6stat.icp6s_outhist[ND_ROUTER_SOLICIT] || + icmp6stat.icp6s_inhist[ND_ROUTER_ADVERT] || + icmp6stat.icp6s_outhist[ND_ROUTER_ADVERT] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_router = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmprouter"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmprouter" + , NULL + , "icmp" + , NULL + , "IPv6 Router Messages" + , "messages/s" + , "macos.plugin" + , "sysctl" + , 10400 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InSolicits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutSolicits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InAdvertisements", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutAdvertisements", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InSolicits", icmp6stat.icp6s_inhist[ND_ROUTER_SOLICIT]); + rrddim_set(st, "OutSolicits", icmp6stat.icp6s_outhist[ND_ROUTER_SOLICIT]); + rrddim_set(st, "InAdvertisements", icmp6stat.icp6s_inhist[ND_ROUTER_ADVERT]); + rrddim_set(st, "OutAdvertisements", icmp6stat.icp6s_outhist[ND_ROUTER_ADVERT]); + rrdset_done(st); + } + + if (do_icmp6_neighbor == CONFIG_BOOLEAN_YES || (do_icmp6_neighbor == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[ND_NEIGHBOR_SOLICIT] || + icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT] || + icmp6stat.icp6s_inhist[ND_NEIGHBOR_ADVERT] || + icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_neighbor = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmpneighbor"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmpneighbor" + , NULL + , "icmp" + , NULL + , "IPv6 Neighbor Messages" + , "messages/s" + , "macos.plugin" + , "sysctl" + , 10500 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InSolicits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutSolicits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InAdvertisements", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutAdvertisements", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InSolicits", icmp6stat.icp6s_inhist[ND_NEIGHBOR_SOLICIT]); + rrddim_set(st, "OutSolicits", icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT]); + rrddim_set(st, "InAdvertisements", icmp6stat.icp6s_inhist[ND_NEIGHBOR_ADVERT]); + rrddim_set(st, "OutAdvertisements", icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT]); + } + + if (do_icmp6_types == CONFIG_BOOLEAN_YES || (do_icmp6_types == CONFIG_BOOLEAN_AUTO && + (icmp6stat.icp6s_inhist[1] || + icmp6stat.icp6s_inhist[128] || + icmp6stat.icp6s_inhist[129] || + icmp6stat.icp6s_inhist[136] || + icmp6stat.icp6s_outhist[1] || + icmp6stat.icp6s_outhist[128] || + icmp6stat.icp6s_outhist[129] || + icmp6stat.icp6s_outhist[133] || + icmp6stat.icp6s_outhist[135] || + icmp6stat.icp6s_outhist[136] || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp6_types = CONFIG_BOOLEAN_YES; + st = rrdset_find_active_localhost("ipv6.icmptypes"); + if (unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "icmptypes" + , NULL + , "icmp" + , NULL + , "IPv6 ICMP Types" + , "messages/s" + , "macos.plugin" + , "sysctl" + , 10700 + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "InType1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InType128", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InType129", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "InType136", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutType1", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutType128", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutType129", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutType133", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutType135", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "OutType143", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "InType1", icmp6stat.icp6s_inhist[1]); + rrddim_set(st, "InType128", icmp6stat.icp6s_inhist[128]); + rrddim_set(st, "InType129", icmp6stat.icp6s_inhist[129]); + rrddim_set(st, "InType136", icmp6stat.icp6s_inhist[136]); + rrddim_set(st, "OutType1", icmp6stat.icp6s_outhist[1]); + rrddim_set(st, "OutType128", icmp6stat.icp6s_outhist[128]); + rrddim_set(st, "OutType129", icmp6stat.icp6s_outhist[129]); + rrddim_set(st, "OutType133", icmp6stat.icp6s_outhist[133]); + rrddim_set(st, "OutType135", icmp6stat.icp6s_outhist[135]); + rrddim_set(st, "OutType143", icmp6stat.icp6s_outhist[143]); + rrdset_done(st); + } + } + } + + if (likely(do_uptime)) { + if (unlikely(GETSYSCTL_BY_NAME("kern.boottime", boot_time))) { + do_uptime = 0; + error("DISABLED: system.uptime"); + } else { + clock_gettime(CLOCK_REALTIME, &cur_time); + st = rrdset_find_active_localhost("system.uptime"); + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "uptime" + , NULL + , "uptime" + , NULL + , "System Uptime" + , "seconds" + , "macos.plugin" + , "sysctl" + , 1000 + , update_every + , RRDSET_TYPE_LINE + ); + rrddim_add(st, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(st, "uptime", cur_time.tv_sec - boot_time.tv_sec); + rrdset_done(st); + } + } + + return 0; +} diff --git a/collectors/macos.plugin/plugin_macos.c b/collectors/macos.plugin/plugin_macos.c new file mode 100644 index 0000000..10472bd --- /dev/null +++ b/collectors/macos.plugin/plugin_macos.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_macos.h" + +static struct macos_module { + const char *name; + const char *dim; + + int enabled; + + int (*func)(int update_every, usec_t dt); + + RRDDIM *rd; + +} macos_modules[] = { + {.name = "sysctl", .dim = "sysctl", .enabled = 1, .func = do_macos_sysctl}, + {.name = "mach system management interface", .dim = "mach_smi", .enabled = 1, .func = do_macos_mach_smi}, + {.name = "iokit", .dim = "iokit", .enabled = 1, .func = do_macos_iokit}, + + // the terminator of this array + {.name = NULL, .dim = NULL, .enabled = 0, .func = NULL} +}; + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 3 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3 +#endif + +static void macos_main_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *macos_main(void *ptr) +{ + worker_register("MACOS"); + + netdata_thread_cleanup_push(macos_main_cleanup, ptr); + + // check the enabled status for each module + for (int i = 0; macos_modules[i].name; i++) { + struct macos_module *pm = &macos_modules[i]; + + pm->enabled = config_get_boolean("plugin:macos", pm->name, pm->enabled); + pm->rd = NULL; + + worker_register_job_name(i, macos_modules[i].dim); + } + + usec_t step = localhost->rrd_update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + usec_t hb_dt = heartbeat_next(&hb, step); + + for (int i = 0; macos_modules[i].name; i++) { + struct macos_module *pm = &macos_modules[i]; + if (unlikely(!pm->enabled)) + continue; + + debug(D_PROCNETDEV_LOOP, "macos calling %s.", pm->name); + + worker_is_busy(i); + pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); + + if (unlikely(netdata_exit)) + break; + } + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/macos.plugin/plugin_macos.h b/collectors/macos.plugin/plugin_macos.h new file mode 100644 index 0000000..2c673a2 --- /dev/null +++ b/collectors/macos.plugin/plugin_macos.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PLUGIN_MACOS_H +#define NETDATA_PLUGIN_MACOS_H 1 + +#include "daemon/common.h" + +int do_macos_sysctl(int update_every, usec_t dt); +int do_macos_mach_smi(int update_every, usec_t dt); +int do_macos_iokit(int update_every, usec_t dt); + +#endif /* NETDATA_PLUGIN_MACOS_H */ diff --git a/collectors/nfacct.plugin/Makefile.am b/collectors/nfacct.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/nfacct.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/nfacct.plugin/README.md b/collectors/nfacct.plugin/README.md new file mode 100644 index 0000000..bacc8b7 --- /dev/null +++ b/collectors/nfacct.plugin/README.md @@ -0,0 +1,54 @@ + + +# nfacct.plugin + +`nfacct.plugin` collects Netfilter statistics. + +## Prerequisites + +1. install `libmnl-dev` and `libnetfilter-acct-dev` using the package manager of your system. + +2. re-install Netdata from source. The installer will detect that the required libraries are now available and will also build `netdata.plugin`. + +Keep in mind that NFACCT requires root access, so the plugin is setuid to root. + +## Charts + +The plugin provides Netfilter connection tracker statistics and nfacct packet and bandwidth accounting: + +Connection tracker: + +1. Connections. +2. Changes. +3. Expectations. +4. Errors. +5. Searches. + +Netfilter accounting: + +1. Packets. +2. Bandwidth. + +## Configuration + +If you need to disable NFACCT for Netdata, edit /etc/netdata/netdata.conf and set: + +``` +[plugins] + nfacct = no +``` + +## Debugging + +You can run the plugin by hand: + +``` +sudo /usr/libexec/netdata/plugins.d/nfacct.plugin 1 debug +``` + +You will get verbose output on what the plugin does. + + diff --git a/collectors/nfacct.plugin/plugin_nfacct.c b/collectors/nfacct.plugin/plugin_nfacct.c new file mode 100644 index 0000000..eeadb3c --- /dev/null +++ b/collectors/nfacct.plugin/plugin_nfacct.c @@ -0,0 +1,886 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#include +#include +#include + +#define PLUGIN_NFACCT_NAME "nfacct.plugin" + +#define NETDATA_CHART_PRIO_NETFILTER_NEW 8701 +#define NETDATA_CHART_PRIO_NETFILTER_CHANGES 8702 +#define NETDATA_CHART_PRIO_NETFILTER_EXPECT 8703 +#define NETDATA_CHART_PRIO_NETFILTER_ERRORS 8705 +#define NETDATA_CHART_PRIO_NETFILTER_SEARCH 8710 + +#define NETDATA_CHART_PRIO_NETFILTER_PACKETS 8906 +#define NETDATA_CHART_PRIO_NETFILTER_BYTES 8907 + +static inline size_t mnl_buffer_size() { + long s = MNL_SOCKET_BUFFER_SIZE; + if(s <= 0) return 8192; + return (size_t)s; +} + +// variables +static int debug = 0; +static int netdata_update_every = 1; + +#define RRD_TYPE_NET_STAT_NETFILTER "netfilter" +#define RRD_TYPE_NET_STAT_CONNTRACK "netlink" + +static struct { + int update_every; + char *buf; + size_t buf_size; + struct mnl_socket *mnl; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + unsigned int seq; + uint32_t portid; + + struct nlattr *tb[CTA_STATS_MAX+1]; + const char *attr2name[CTA_STATS_MAX+1]; + kernel_uint_t metrics[CTA_STATS_MAX+1]; + + struct nlattr *tb_exp[CTA_STATS_EXP_MAX+1]; + const char *attr2name_exp[CTA_STATS_EXP_MAX+1]; + kernel_uint_t metrics_exp[CTA_STATS_EXP_MAX+1]; +} nfstat_root = { + .update_every = 1, + .buf = NULL, + .buf_size = 0, + .mnl = NULL, + .nlh = NULL, + .nfh = NULL, + .seq = 0, + .portid = 0, + .tb = {}, + .attr2name = { + [CTA_STATS_SEARCHED] = "searched", + [CTA_STATS_FOUND] = "found", + [CTA_STATS_NEW] = "new", + [CTA_STATS_INVALID] = "invalid", + [CTA_STATS_IGNORE] = "ignore", + [CTA_STATS_DELETE] = "delete", + [CTA_STATS_DELETE_LIST] = "delete_list", + [CTA_STATS_INSERT] = "insert", + [CTA_STATS_INSERT_FAILED] = "insert_failed", + [CTA_STATS_DROP] = "drop", + [CTA_STATS_EARLY_DROP] = "early_drop", + [CTA_STATS_ERROR] = "icmp_error", + [CTA_STATS_SEARCH_RESTART] = "search_restart", + }, + .metrics = {}, + .tb_exp = {}, + .attr2name_exp = { + [CTA_STATS_EXP_NEW] = "new", + [CTA_STATS_EXP_CREATE] = "created", + [CTA_STATS_EXP_DELETE] = "deleted", + }, + .metrics_exp = {} +}; + + +static int nfstat_init(int update_every) { + nfstat_root.update_every = update_every; + + nfstat_root.buf_size = mnl_buffer_size(); + nfstat_root.buf = mallocz(nfstat_root.buf_size); + + nfstat_root.mnl = mnl_socket_open(NETLINK_NETFILTER); + if(!nfstat_root.mnl) { + error("NFSTAT: mnl_socket_open() failed"); + return 1; + } + + nfstat_root.seq = (unsigned int)now_realtime_sec() - 1; + + if(mnl_socket_bind(nfstat_root.mnl, 0, MNL_SOCKET_AUTOPID) < 0) { + error("NFSTAT: mnl_socket_bind() failed"); + return 1; + } + nfstat_root.portid = mnl_socket_get_portid(nfstat_root.mnl); + + return 0; +} + +static struct nlmsghdr * nfct_mnl_nlmsghdr_put(char *buf, uint16_t subsys, uint16_t type, uint8_t family, uint32_t seq) { + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (subsys << 8) | type; + nlh->nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP; + nlh->nlmsg_seq = seq; + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = family; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + return nlh; +} + +static int nfct_stats_attr_cb(const struct nlattr *attr, void *data) { + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + if (mnl_attr_type_valid(attr, CTA_STATS_MAX) < 0) + return MNL_CB_OK; + + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + error("NFSTAT: mnl_attr_validate() failed"); + return MNL_CB_ERROR; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int nfstat_callback(const struct nlmsghdr *nlh, void *data) { + (void)data; + + struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh); + + mnl_attr_parse(nlh, sizeof(*nfg), nfct_stats_attr_cb, nfstat_root.tb); + + // printf("cpu=%-4u\t", ntohs(nfg->res_id)); + + int i; + // add the metrics of this CPU into the metrics + for (i = 0; i < CTA_STATS_MAX+1; i++) { + if (nfstat_root.tb[i]) { + // printf("%s=%u ", nfstat_root.attr2name[i], ntohl(mnl_attr_get_u32(nfstat_root.tb[i]))); + nfstat_root.metrics[i] += ntohl(mnl_attr_get_u32(nfstat_root.tb[i])); + } + } + // printf("\n"); + + return MNL_CB_OK; +} + +static int nfstat_collect_conntrack() { + // zero all metrics - we will sum the metrics of all CPUs later + int i; + for (i = 0; i < CTA_STATS_MAX+1; i++) + nfstat_root.metrics[i] = 0; + + // prepare the request + nfstat_root.nlh = nfct_mnl_nlmsghdr_put(nfstat_root.buf, NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS_CPU, AF_UNSPEC, nfstat_root.seq); + + // send the request + if(mnl_socket_sendto(nfstat_root.mnl, nfstat_root.nlh, nfstat_root.nlh->nlmsg_len) < 0) { + error("NFSTAT: mnl_socket_sendto() failed"); + return 1; + } + + // get the reply + ssize_t ret; + while ((ret = mnl_socket_recvfrom(nfstat_root.mnl, nfstat_root.buf, nfstat_root.buf_size)) > 0) { + if(mnl_cb_run( + nfstat_root.buf + , (size_t)ret + , nfstat_root.nlh->nlmsg_seq + , nfstat_root.portid + , nfstat_callback + , NULL + ) <= MNL_CB_STOP) + break; + } + + // verify we run without issues + if (ret == -1) { + error("NFSTAT: error communicating with kernel. This plugin can only work when netdata runs as root."); + return 1; + } + + return 0; +} + +static int nfexp_stats_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + if (mnl_attr_type_valid(attr, CTA_STATS_EXP_MAX) < 0) + return MNL_CB_OK; + + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + error("NFSTAT EXP: mnl_attr_validate() failed"); + return MNL_CB_ERROR; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int nfstat_callback_exp(const struct nlmsghdr *nlh, void *data) { + (void)data; + + struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh); + + mnl_attr_parse(nlh, sizeof(*nfg), nfexp_stats_attr_cb, nfstat_root.tb_exp); + + int i; + for (i = 0; i < CTA_STATS_EXP_MAX+1; i++) { + if (nfstat_root.tb_exp[i]) { + nfstat_root.metrics_exp[i] += ntohl(mnl_attr_get_u32(nfstat_root.tb_exp[i])); + } + } + + return MNL_CB_OK; +} + +static int nfstat_collect_conntrack_expectations() { + // zero all metrics - we will sum the metrics of all CPUs later + int i; + for (i = 0; i < CTA_STATS_EXP_MAX+1; i++) + nfstat_root.metrics_exp[i] = 0; + + // prepare the request + nfstat_root.nlh = nfct_mnl_nlmsghdr_put(nfstat_root.buf, NFNL_SUBSYS_CTNETLINK_EXP, IPCTNL_MSG_EXP_GET_STATS_CPU, AF_UNSPEC, nfstat_root.seq); + + // send the request + if(mnl_socket_sendto(nfstat_root.mnl, nfstat_root.nlh, nfstat_root.nlh->nlmsg_len) < 0) { + error("NFSTAT: mnl_socket_sendto() failed"); + return 1; + } + + // get the reply + ssize_t ret; + while ((ret = mnl_socket_recvfrom(nfstat_root.mnl, nfstat_root.buf, nfstat_root.buf_size)) > 0) { + if(mnl_cb_run( + nfstat_root.buf + , (size_t)ret + , nfstat_root.nlh->nlmsg_seq + , nfstat_root.portid + , nfstat_callback_exp + , NULL + ) <= MNL_CB_STOP) + break; + } + + // verify we run without issues + if (ret == -1) { + error("NFSTAT: error communicating with kernel. This plugin can only work when netdata runs as root."); + return 1; + } + + return 0; +} + +static int nfstat_collect() { + nfstat_root.seq++; + + if(nfstat_collect_conntrack()) + return 1; + + if(nfstat_collect_conntrack_expectations()) + return 1; + + return 0; +} + +static void nfstat_send_metrics() { + static int new_chart_generated = 0, changes_chart_generated = 0, search_chart_generated = 0, errors_chart_generated = 0, expect_chart_generated = 0; + + if(!new_chart_generated) { + new_chart_generated = 1; + + printf("CHART %s.%s '' 'Connection Tracker New Connections' 'connections/s' %s '' line %d %d %s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_new" + , RRD_TYPE_NET_STAT_CONNTRACK + , NETDATA_CHART_PRIO_NETFILTER_NEW + , nfstat_root.update_every + , PLUGIN_NFACCT_NAME + ); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_NEW]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_IGNORE]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_INVALID]); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_new" + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_NEW] + , (collected_number) nfstat_root.metrics[CTA_STATS_NEW] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_IGNORE] + , (collected_number) nfstat_root.metrics[CTA_STATS_IGNORE] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_INVALID] + , (collected_number) nfstat_root.metrics[CTA_STATS_INVALID] + ); + printf("END\n"); + + // ---------------------------------------------------------------- + + if(!changes_chart_generated) { + changes_chart_generated = 1; + + printf("CHART %s.%s '' 'Connection Tracker Changes' 'changes/s' %s '' line %d %d detail %s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_changes" + , RRD_TYPE_NET_STAT_CONNTRACK + , NETDATA_CHART_PRIO_NETFILTER_CHANGES + , nfstat_root.update_every + , PLUGIN_NFACCT_NAME + ); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_INSERT]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_DELETE]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_DELETE_LIST]); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_changes" + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_INSERT] + , (collected_number) nfstat_root.metrics[CTA_STATS_INSERT] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_DELETE] + , (collected_number) nfstat_root.metrics[CTA_STATS_DELETE] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_DELETE_LIST] + , (collected_number) nfstat_root.metrics[CTA_STATS_DELETE_LIST] + ); + printf("END\n"); + + // ---------------------------------------------------------------- + + if(!search_chart_generated) { + search_chart_generated = 1; + + printf("CHART %s.%s '' 'Connection Tracker Searches' 'searches/s' %s '' line %d %d detail %s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_search" + , RRD_TYPE_NET_STAT_CONNTRACK + , NETDATA_CHART_PRIO_NETFILTER_SEARCH + , nfstat_root.update_every + , PLUGIN_NFACCT_NAME + ); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_SEARCHED]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_SEARCH_RESTART]); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_FOUND]); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_search" + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_SEARCHED] + , (collected_number) nfstat_root.metrics[CTA_STATS_SEARCHED] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_SEARCH_RESTART] + , (collected_number) nfstat_root.metrics[CTA_STATS_SEARCH_RESTART] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_FOUND] + , (collected_number) nfstat_root.metrics[CTA_STATS_FOUND] + ); + printf("END\n"); + + // ---------------------------------------------------------------- + + if(!errors_chart_generated) { + errors_chart_generated = 1; + + printf("CHART %s.%s '' 'Connection Tracker Errors' 'events/s' %s '' line %d %d detail %s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_errors" + , RRD_TYPE_NET_STAT_CONNTRACK + , NETDATA_CHART_PRIO_NETFILTER_ERRORS + , nfstat_root.update_every + , PLUGIN_NFACCT_NAME + ); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_ERROR]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_INSERT_FAILED]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_DROP]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_EARLY_DROP]); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_errors" + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_ERROR] + , (collected_number) nfstat_root.metrics[CTA_STATS_ERROR] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_INSERT_FAILED] + , (collected_number) nfstat_root.metrics[CTA_STATS_INSERT_FAILED] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_DROP] + , (collected_number) nfstat_root.metrics[CTA_STATS_DROP] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_EARLY_DROP] + , (collected_number) nfstat_root.metrics[CTA_STATS_EARLY_DROP] + ); + printf("END\n"); + + // ---------------------------------------------------------------- + + if(!expect_chart_generated) { + expect_chart_generated = 1; + + printf("CHART %s.%s '' 'Connection Tracker Expectations' 'expectations/s' %s '' line %d %d detail %s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_expect" + , RRD_TYPE_NET_STAT_CONNTRACK + , NETDATA_CHART_PRIO_NETFILTER_EXPECT + , nfstat_root.update_every + , PLUGIN_NFACCT_NAME + ); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_EXP_CREATE]); + printf("DIMENSION %s '' incremental -1 1\n", nfstat_root.attr2name[CTA_STATS_EXP_DELETE]); + printf("DIMENSION %s '' incremental 1 1\n", nfstat_root.attr2name[CTA_STATS_EXP_NEW]); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_expect" + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_EXP_CREATE] + , (collected_number) nfstat_root.metrics[CTA_STATS_EXP_CREATE] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_EXP_DELETE] + , (collected_number) nfstat_root.metrics[CTA_STATS_EXP_DELETE] + ); + printf( + "SET %s = %lld\n" + , nfstat_root.attr2name[CTA_STATS_EXP_NEW] + , (collected_number) nfstat_root.metrics[CTA_STATS_EXP_NEW] + ); + printf("END\n"); +} + + +struct nfacct_data { + char *name; + uint32_t hash; + + uint64_t pkts; + uint64_t bytes; + + int packets_dimension_added; + int bytes_dimension_added; + + int updated; + + struct nfacct_data *next; +}; + +static struct { + int update_every; + char *buf; + size_t buf_size; + struct mnl_socket *mnl; + struct nlmsghdr *nlh; + unsigned int seq; + uint32_t portid; + struct nfacct *nfacct_buffer; + struct nfacct_data *nfacct_metrics; +} nfacct_root = { + .update_every = 1, + .buf = NULL, + .buf_size = 0, + .mnl = NULL, + .nlh = NULL, + .seq = 0, + .portid = 0, + .nfacct_buffer = NULL, + .nfacct_metrics = NULL +}; + +static inline struct nfacct_data *nfacct_data_get(const char *name, uint32_t hash) { + struct nfacct_data *d = NULL, *last = NULL; + for(d = nfacct_root.nfacct_metrics; d ; last = d, d = d->next) { + if(unlikely(d->hash == hash && !strcmp(d->name, name))) + return d; + } + + d = callocz(1, sizeof(struct nfacct_data)); + d->name = strdupz(name); + d->hash = hash; + + if(!last) { + d->next = nfacct_root.nfacct_metrics; + nfacct_root.nfacct_metrics = d; + } + else { + d->next = last->next; + last->next = d; + } + + return d; +} + +static int nfacct_init(int update_every) { + nfacct_root.update_every = update_every; + + nfacct_root.buf_size = mnl_buffer_size(); + nfacct_root.buf = mallocz(nfacct_root.buf_size); + + nfacct_root.nfacct_buffer = nfacct_alloc(); + if(!nfacct_root.nfacct_buffer) { + error("nfacct.plugin: nfacct_alloc() failed."); + return 0; + } + + nfacct_root.seq = (unsigned int)now_realtime_sec() - 1; + + nfacct_root.mnl = mnl_socket_open(NETLINK_NETFILTER); + if(!nfacct_root.mnl) { + error("nfacct.plugin: mnl_socket_open() failed"); + return 1; + } + + if(mnl_socket_bind(nfacct_root.mnl, 0, MNL_SOCKET_AUTOPID) < 0) { + error("nfacct.plugin: mnl_socket_bind() failed"); + return 1; + } + nfacct_root.portid = mnl_socket_get_portid(nfacct_root.mnl); + + return 0; +} + +static int nfacct_callback(const struct nlmsghdr *nlh, void *data) { + (void)data; + + if(nfacct_nlmsg_parse_payload(nlh, nfacct_root.nfacct_buffer) < 0) { + error("NFACCT: nfacct_nlmsg_parse_payload() failed."); + return MNL_CB_OK; + } + + const char *name = nfacct_attr_get_str(nfacct_root.nfacct_buffer, NFACCT_ATTR_NAME); + uint32_t hash = simple_hash(name); + + struct nfacct_data *d = nfacct_data_get(name, hash); + + d->pkts = nfacct_attr_get_u64(nfacct_root.nfacct_buffer, NFACCT_ATTR_PKTS); + d->bytes = nfacct_attr_get_u64(nfacct_root.nfacct_buffer, NFACCT_ATTR_BYTES); + d->updated = 1; + + return MNL_CB_OK; +} + +static int nfacct_collect() { + // mark all old metrics as not-updated + struct nfacct_data *d; + for(d = nfacct_root.nfacct_metrics; d ; d = d->next) + d->updated = 0; + + // prepare the request + nfacct_root.seq++; + nfacct_root.nlh = nfacct_nlmsg_build_hdr(nfacct_root.buf, NFNL_MSG_ACCT_GET, NLM_F_DUMP, (uint32_t)nfacct_root.seq); + if(!nfacct_root.nlh) { + error("NFACCT: nfacct_nlmsg_build_hdr() failed"); + return 1; + } + + // send the request + if(mnl_socket_sendto(nfacct_root.mnl, nfacct_root.nlh, nfacct_root.nlh->nlmsg_len) < 0) { + error("NFACCT: mnl_socket_sendto() failed"); + return 1; + } + + // get the reply + ssize_t ret; + while((ret = mnl_socket_recvfrom(nfacct_root.mnl, nfacct_root.buf, nfacct_root.buf_size)) > 0) { + if(mnl_cb_run( + nfacct_root.buf + , (size_t)ret + , nfacct_root.seq + , nfacct_root.portid + , nfacct_callback + , NULL + ) <= 0) + break; + } + + // verify we run without issues + if (ret == -1) { + error("NFACCT: error communicating with kernel. This plugin can only work when netdata runs as root."); + return 1; + } + + return 0; +} + +static void nfacct_send_metrics() { + static int bytes_chart_generated = 0, packets_chart_generated = 0; + + if(!nfacct_root.nfacct_metrics) return; + struct nfacct_data *d; + + if(!packets_chart_generated) { + packets_chart_generated = 1; + printf("CHART netfilter.nfacct_packets '' 'Netfilter Accounting Packets' 'packets/s' 'nfacct' '' stacked %d %d %s\n" + , NETDATA_CHART_PRIO_NETFILTER_PACKETS + , nfacct_root.update_every + , PLUGIN_NFACCT_NAME + ); + } + + for(d = nfacct_root.nfacct_metrics; d ; d = d->next) { + if(likely(d->updated)) { + if(unlikely(!d->packets_dimension_added)) { + d->packets_dimension_added = 1; + printf( + "CHART netfilter.nfacct_packets '' 'Netfilter Accounting Packets' 'packets/s' 'nfacct' '' stacked %d %d %s\n", + NETDATA_CHART_PRIO_NETFILTER_PACKETS, + nfacct_root.update_every, + PLUGIN_NFACCT_NAME); + printf("DIMENSION %s '' incremental 1 %d\n", d->name, nfacct_root.update_every); + } + } + } + printf("BEGIN netfilter.nfacct_packets\n"); + for(d = nfacct_root.nfacct_metrics; d ; d = d->next) { + if(likely(d->updated)) { + printf("SET %s = %lld\n" + , d->name + , (collected_number)d->pkts + ); + } + } + printf("END\n"); + + // ---------------------------------------------------------------- + + if(!bytes_chart_generated) { + bytes_chart_generated = 1; + printf("CHART netfilter.nfacct_bytes '' 'Netfilter Accounting Bandwidth' 'kilobytes/s' 'nfacct' '' stacked %d %d %s\n" + , NETDATA_CHART_PRIO_NETFILTER_BYTES + , nfacct_root.update_every + , PLUGIN_NFACCT_NAME + ); + } + + for(d = nfacct_root.nfacct_metrics; d ; d = d->next) { + if(likely(d->updated)) { + if(unlikely(!d->bytes_dimension_added)) { + d->bytes_dimension_added = 1; + printf( + "CHART netfilter.nfacct_bytes '' 'Netfilter Accounting Bandwidth' 'kilobytes/s' 'nfacct' '' stacked %d %d %s\n", + NETDATA_CHART_PRIO_NETFILTER_BYTES, + nfacct_root.update_every, + PLUGIN_NFACCT_NAME); + printf("DIMENSION %s '' incremental 1 %d\n", d->name, 1000 * nfacct_root.update_every); + } + } + } + printf("BEGIN netfilter.nfacct_bytes\n"); + for(d = nfacct_root.nfacct_metrics; d ; d = d->next) { + if(likely(d->updated)) { + printf("SET %s = %lld\n" + , d->name + , (collected_number)d->bytes + ); + } + } + printf("END\n"); +} + +static void nfacct_signal_handler(int signo) +{ + exit((signo == SIGPIPE)?1:0); +} + +// When Netdata crashes this plugin was becoming zombie, +// this function was added to remove it when sigpipe and other signals are received. +void nfacct_signals() +{ + int signals[] = { SIGPIPE, SIGINT, SIGTERM, 0}; + int i; + struct sigaction sa; + sa.sa_flags = 0; + sa.sa_handler = nfacct_signal_handler; + + // ignore all signals while we run in a signal handler + sigfillset(&sa.sa_mask); + + for (i = 0; signals[i]; i++) { + if(sigaction(signals[i], &sa, NULL) == -1) + error("Cannot add the handler to signal %d", signals[i]); + } +} + +int main(int argc, char **argv) { + clocks_init(); + + // ------------------------------------------------------------------------ + // initialization of netdata plugin + + program_name = "nfacct.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + // ------------------------------------------------------------------------ + // parse command line parameters + + int i, freq = 0; + for(i = 1; i < argc ; i++) { + if(isdigit(*argv[i]) && !freq) { + int n = str2i(argv[i]); + if(n > 0 && n < 86400) { + freq = n; + continue; + } + } + else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { + printf("nfacct.plugin %s\n", VERSION); + exit(0); + } + else if(strcmp("debug", argv[i]) == 0) { + debug = 1; + continue; + } + else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { + fprintf(stderr, + "\n" + " netdata nfacct.plugin %s\n" + " Copyright (C) 2015-2017 Costa Tsaousis \n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " This program is a data collector plugin for netdata.\n" + "\n" + " Available command line options:\n" + "\n" + " COLLECTION_FREQUENCY data collection frequency in seconds\n" + " minimum: %d\n" + "\n" + " debug enable verbose output\n" + " default: disabled\n" + "\n" + " -v\n" + " -V\n" + " --version print version and exit\n" + "\n" + " -h\n" + " --help print this message and exit\n" + "\n" + " For more information:\n" + " https://github.com/netdata/netdata/tree/master/collectors/nfacct.plugin\n" + "\n" + , VERSION + , netdata_update_every + ); + exit(1); + } + + error("nfacct.plugin: ignoring parameter '%s'", argv[i]); + } + + nfacct_signals(); + + errno = 0; + + if(freq >= netdata_update_every) + netdata_update_every = freq; + else if(freq) + error("update frequency %d seconds is too small for NFACCT. Using %d.", freq, netdata_update_every); + + if (debug) + fprintf(stderr, "nfacct.plugin: calling nfacct_init()\n"); + int nfacct = !nfacct_init(netdata_update_every); + + if (debug) + fprintf(stderr, "nfacct.plugin: calling nfstat_init()\n"); + int nfstat = !nfstat_init(netdata_update_every); + + // ------------------------------------------------------------------------ + // the main loop + + if(debug) fprintf(stderr, "nfacct.plugin: starting data collection\n"); + + time_t started_t = now_monotonic_sec(); + + size_t iteration; + usec_t step = netdata_update_every * USEC_PER_SEC; + + heartbeat_t hb; + heartbeat_init(&hb); + for(iteration = 0; 1; iteration++) { + usec_t dt = heartbeat_next(&hb, step); + + if(unlikely(netdata_exit)) break; + + if(debug && iteration) + fprintf(stderr, "nfacct.plugin: iteration %zu, dt %llu usec\n" + , iteration + , dt + ); + + if(likely(nfacct)) { + if(debug) fprintf(stderr, "nfacct.plugin: calling nfacct_collect()\n"); + nfacct = !nfacct_collect(); + + if(likely(nfacct)) { + if(debug) fprintf(stderr, "nfacct.plugin: calling nfacct_send_metrics()\n"); + nfacct_send_metrics(); + } + } + + if(likely(nfstat)) { + if(debug) fprintf(stderr, "nfacct.plugin: calling nfstat_collect()\n"); + nfstat = !nfstat_collect(); + + if(likely(nfstat)) { + if(debug) fprintf(stderr, "nfacct.plugin: calling nfstat_send_metrics()\n"); + nfstat_send_metrics(); + } + } + + fflush(stdout); + + // restart check (14400 seconds) + if(now_monotonic_sec() - started_t > 14400) break; + } + + info("NFACCT process exiting"); +} diff --git a/collectors/perf.plugin/Makefile.am b/collectors/perf.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/perf.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/perf.plugin/README.md b/collectors/perf.plugin/README.md new file mode 100644 index 0000000..a7a87ac --- /dev/null +++ b/collectors/perf.plugin/README.md @@ -0,0 +1,80 @@ + + +# perf.plugin + +`perf.plugin` collects system-wide CPU performance statistics from Performance Monitoring Units (PMU) using +the `perf_event_open()` system call. + +## Important Notes + +Accessing hardware PMUs requires root permissions, so the plugin is setuid to root. + +Keep in mind that the number of PMUs in a system is usually quite limited and every hardware monitoring +event for every CPU core needs a separate file descriptor to be opened. + +## Charts + +The plugin provides statistics for general hardware and software performance monitoring events: + +Hardware events: + +1. CPU cycles +2. Instructions +3. Branch instructions +4. Cache operations +5. BUS cycles +6. Stalled frontend and backend cycles + +Software events: + +1. CPU migrations +2. Alignment faults +3. Emulation faults + +Hardware cache events: + +1. L1D cache operations +2. L1D prefetch cache operations +3. L1I cache operations +4. LL cache operations +5. DTLB cache operations +6. ITLB cache operations +7. PBU cache operations + +## Configuration + +The plugin is disabled by default because the number of PMUs is usually quite limited and it is not desired to +allow Netdata to struggle silently for PMUs, interfering with other performance monitoring software. If you need to +enable the perf plugin, edit /etc/netdata/netdata.conf and set: + +```raw +[plugins] + perf = yes +``` + +```raw +[plugin:perf] + update every = 1 + command options = all +``` + +You can use the `command options` parameter to pick what data should be collected and which charts should be +displayed. If `all` is used, all general performance monitoring counters are probed and corresponding charts +are enabled for the available counters. You can also define a particular set of enabled charts using the +following keywords: `cycles`, `instructions`, `branch`, `cache`, `bus`, `stalled`, `migrations`, `alignment`, +`emulation`, `L1D`, `L1D-prefetch`, `L1I`, `LL`, `DTLB`, `ITLB`, `PBU`. + +## Debugging + +You can run the plugin by hand: + +```raw +sudo /usr/libexec/netdata/plugins.d/perf.plugin 1 all debug +``` + +You will get verbose output on what the plugin does. + + diff --git a/collectors/perf.plugin/perf_plugin.c b/collectors/perf.plugin/perf_plugin.c new file mode 100644 index 0000000..b2f7d2e --- /dev/null +++ b/collectors/perf.plugin/perf_plugin.c @@ -0,0 +1,1353 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#include + +#define PLUGIN_PERF_NAME "perf.plugin" + +// Hardware counters +#define NETDATA_CHART_PRIO_PERF_CPU_CYCLES 8800 +#define NETDATA_CHART_PRIO_PERF_INSTRUCTIONS 8801 +#define NETDATA_CHART_PRIO_PERF_IPC 8802 +#define NETDATA_CHART_PRIO_PERF_BRANCH_INSTRUCTIONS 8803 +#define NETDATA_CHART_PRIO_PERF_CACHE 8804 +#define NETDATA_CHART_PRIO_PERF_BUS_CYCLES 8805 +#define NETDATA_CHART_PRIO_PERF_FRONT_BACK_CYCLES 8806 + +// Software counters +#define NETDATA_CHART_PRIO_PERF_MIGRATIONS 8810 +#define NETDATA_CHART_PRIO_PERF_ALIGNMENT 8811 +#define NETDATA_CHART_PRIO_PERF_EMULATION 8812 + +// Hardware cache counters +#define NETDATA_CHART_PRIO_PERF_L1D 8820 +#define NETDATA_CHART_PRIO_PERF_L1D_PREFETCH 8821 +#define NETDATA_CHART_PRIO_PERF_L1I 8822 +#define NETDATA_CHART_PRIO_PERF_LL 8823 +#define NETDATA_CHART_PRIO_PERF_DTLB 8824 +#define NETDATA_CHART_PRIO_PERF_ITLB 8825 +#define NETDATA_CHART_PRIO_PERF_PBU 8826 + +#define RRD_TYPE_PERF "perf" +#define RRD_FAMILY_HW "hardware" +#define RRD_FAMILY_SW "software" +#define RRD_FAMILY_CACHE "cache" + +#define NO_FD -1 +#define ALL_PIDS -1 +#define RUNNING_THRESHOLD 100 + +static int debug = 0; + +static int update_every = 1; +static int freq = 0; + +typedef enum perf_event_id { + // Hardware counters + EV_ID_CPU_CYCLES, + EV_ID_INSTRUCTIONS, + EV_ID_CACHE_REFERENCES, + EV_ID_CACHE_MISSES, + EV_ID_BRANCH_INSTRUCTIONS, + EV_ID_BRANCH_MISSES, + EV_ID_BUS_CYCLES, + EV_ID_STALLED_CYCLES_FRONTEND, + EV_ID_STALLED_CYCLES_BACKEND, + EV_ID_REF_CPU_CYCLES, + + // Software counters + // EV_ID_CPU_CLOCK, + // EV_ID_TASK_CLOCK, + // EV_ID_PAGE_FAULTS, + // EV_ID_CONTEXT_SWITCHES, + EV_ID_CPU_MIGRATIONS, + // EV_ID_PAGE_FAULTS_MIN, + // EV_ID_PAGE_FAULTS_MAJ, + EV_ID_ALIGNMENT_FAULTS, + EV_ID_EMULATION_FAULTS, + + // Hardware cache counters + EV_ID_L1D_READ_ACCESS, + EV_ID_L1D_READ_MISS, + EV_ID_L1D_WRITE_ACCESS, + EV_ID_L1D_WRITE_MISS, + EV_ID_L1D_PREFETCH_ACCESS, + + EV_ID_L1I_READ_ACCESS, + EV_ID_L1I_READ_MISS, + + EV_ID_LL_READ_ACCESS, + EV_ID_LL_READ_MISS, + EV_ID_LL_WRITE_ACCESS, + EV_ID_LL_WRITE_MISS, + + EV_ID_DTLB_READ_ACCESS, + EV_ID_DTLB_READ_MISS, + EV_ID_DTLB_WRITE_ACCESS, + EV_ID_DTLB_WRITE_MISS, + + EV_ID_ITLB_READ_ACCESS, + EV_ID_ITLB_READ_MISS, + + EV_ID_PBU_READ_ACCESS, + + EV_ID_END +} perf_event_id_t; + +enum perf_event_group { + EV_GROUP_CYCLES, + EV_GROUP_INSTRUCTIONS_AND_CACHE, + EV_GROUP_SOFTWARE, + EV_GROUP_CACHE_L1D, + EV_GROUP_CACHE_L1I_LL_DTLB, + EV_GROUP_CACHE_ITLB_BPU, + + EV_GROUP_NUM +}; + +static int number_of_cpus; + +static int *group_leader_fds[EV_GROUP_NUM]; + +static struct perf_event { + perf_event_id_t id; + + int type; + int config; + + int **group_leader_fd; + int *fd; + + int disabled; + int updated; + + uint64_t value; + + uint64_t *prev_value; + uint64_t *prev_time_enabled; + uint64_t *prev_time_running; +} perf_events[] = { + // Hardware counters + {EV_ID_CPU_CYCLES, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, &group_leader_fds[EV_GROUP_CYCLES], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_INSTRUCTIONS, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, &group_leader_fds[EV_GROUP_INSTRUCTIONS_AND_CACHE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_CACHE_REFERENCES, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, &group_leader_fds[EV_GROUP_INSTRUCTIONS_AND_CACHE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_CACHE_MISSES, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, &group_leader_fds[EV_GROUP_INSTRUCTIONS_AND_CACHE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_BRANCH_INSTRUCTIONS, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, &group_leader_fds[EV_GROUP_INSTRUCTIONS_AND_CACHE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_BRANCH_MISSES, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, &group_leader_fds[EV_GROUP_INSTRUCTIONS_AND_CACHE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_BUS_CYCLES, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, &group_leader_fds[EV_GROUP_CYCLES], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_STALLED_CYCLES_FRONTEND, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, &group_leader_fds[EV_GROUP_CYCLES], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_STALLED_CYCLES_BACKEND, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, &group_leader_fds[EV_GROUP_CYCLES], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, &group_leader_fds[EV_GROUP_CYCLES], NULL, 1, 0, 0, NULL, NULL, NULL}, + + // Software counters + // {EV_ID_CPU_CLOCK, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + // {EV_ID_TASK_CLOCK, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + // {EV_ID_PAGE_FAULTS, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + // {EV_ID_CONTEXT_SWITCHES, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_CPU_MIGRATIONS, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + // {EV_ID_PAGE_FAULTS_MIN, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + // {EV_ID_PAGE_FAULTS_MAJ, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_ALIGNMENT_FAULTS, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + {EV_ID_EMULATION_FAULTS, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, &group_leader_fds[EV_GROUP_SOFTWARE], NULL, 1, 0, 0, NULL, NULL, NULL}, + + // Hardware cache counters + { + EV_ID_L1D_READ_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1D], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_L1D_READ_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1D], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_L1D_WRITE_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1D], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_L1D_WRITE_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1D], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_L1D_PREFETCH_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1D], NULL, 1, 0, 0, NULL, NULL, NULL + }, + + { + EV_ID_L1I_READ_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_L1I_READ_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, + + { + EV_ID_LL_READ_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_LL_READ_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_LL_WRITE_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_LL_WRITE_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, + + { + EV_ID_DTLB_READ_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_DTLB_READ_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_DTLB_WRITE_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_L1I_LL_DTLB], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_DTLB_WRITE_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_ITLB_BPU], NULL, 1, 0, 0, NULL, NULL, NULL + }, + + { + EV_ID_ITLB_READ_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_ITLB_BPU], NULL, 1, 0, 0, NULL, NULL, NULL + }, { + EV_ID_ITLB_READ_MISS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + &group_leader_fds[EV_GROUP_CACHE_ITLB_BPU], NULL, 1, 0, 0, NULL, NULL, NULL + }, + + { + EV_ID_PBU_READ_ACCESS, PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + &group_leader_fds[EV_GROUP_CACHE_ITLB_BPU], NULL, 1, 0, 0, NULL, NULL, NULL + }, + + {EV_ID_END, 0, 0, NULL, NULL, 0, 0, 0, NULL, NULL, NULL} +}; + +static int perf_init() { + int cpu, group; + struct perf_event_attr perf_event_attr; + struct perf_event *current_event = NULL; + unsigned long flags = 0; + + number_of_cpus = (int)get_system_cpus(); + + // initialize all perf event file descriptors + for(current_event = &perf_events[0]; current_event->id != EV_ID_END; current_event++) { + current_event->fd = mallocz(number_of_cpus * sizeof(int)); + memset(current_event->fd, NO_FD, number_of_cpus * sizeof(int)); + + current_event->prev_value = mallocz(number_of_cpus * sizeof(uint64_t)); + memset(current_event->prev_value, 0, number_of_cpus * sizeof(uint64_t)); + + current_event->prev_time_enabled = mallocz(number_of_cpus * sizeof(uint64_t)); + memset(current_event->prev_time_enabled, 0, number_of_cpus * sizeof(uint64_t)); + + current_event->prev_time_running = mallocz(number_of_cpus * sizeof(uint64_t)); + memset(current_event->prev_time_running, 0, number_of_cpus * sizeof(uint64_t)); + } + + for(group = 0; group < EV_GROUP_NUM; group++) { + group_leader_fds[group] = mallocz(number_of_cpus * sizeof(int)); + memset(group_leader_fds[group], NO_FD, number_of_cpus * sizeof(int)); + } + + memset(&perf_event_attr, 0, sizeof(perf_event_attr)); + + for(cpu = 0; cpu < number_of_cpus; cpu++) { + for(current_event = &perf_events[0]; current_event->id != EV_ID_END; current_event++) { + if(unlikely(current_event->disabled)) continue; + + perf_event_attr.type = current_event->type; + perf_event_attr.config = current_event->config; + perf_event_attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; + + int fd, group_leader_fd = *(*current_event->group_leader_fd + cpu); + + fd = syscall( + __NR_perf_event_open, + &perf_event_attr, + ALL_PIDS, + cpu, + group_leader_fd, + flags + ); + + if(unlikely(group_leader_fd == NO_FD)) group_leader_fd = fd; + + if(unlikely(fd < 0)) { + switch errno { + case EACCES: + error("Cannot access to the PMU: Permission denied"); + break; + case EBUSY: + error("Another event already has exclusive access to the PMU"); + break; + default: + error("Cannot open perf event"); + } + error("Disabling event %u", current_event->id); + current_event->disabled = 1; + } + + *(current_event->fd + cpu) = fd; + *(*current_event->group_leader_fd + cpu) = group_leader_fd; + + if(unlikely(debug)) fprintf(stderr, "perf.plugin: event id = %u, cpu = %d, fd = %d, leader_fd = %d\n", current_event->id, cpu, fd, group_leader_fd); + } + } + + return 0; +} + +static void perf_free(void) { + int cpu, group; + struct perf_event *current_event = NULL; + + for(current_event = &perf_events[0]; current_event->id != EV_ID_END; current_event++) { + for(cpu = 0; cpu < number_of_cpus; cpu++) + if(*(current_event->fd + cpu) != NO_FD) close(*(current_event->fd + cpu)); + + free(current_event->fd); + free(current_event->prev_value); + free(current_event->prev_time_enabled); + free(current_event->prev_time_running); + } + + for(group = 0; group < EV_GROUP_NUM; group++) + free(group_leader_fds[group]); +} + +static void reenable_events() { + int group, cpu; + + for(group = 0; group < EV_GROUP_NUM; group++) { + for(cpu = 0; cpu < number_of_cpus; cpu++) { + int current_fd = *(group_leader_fds[group] + cpu); + + if(unlikely(current_fd == NO_FD)) continue; + + if(ioctl(current_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1 + || ioctl(current_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) + { + error("Cannot reenable event group"); + } + } + } +} + +static int perf_collect() { + int cpu; + struct perf_event *current_event = NULL; + static uint64_t prev_cpu_cycles_value = 0; + struct { + uint64_t value; + uint64_t time_enabled; + uint64_t time_running; + } read_result; + + for(current_event = &perf_events[0]; current_event->id != EV_ID_END; current_event++) { + current_event->updated = 0; + current_event->value = 0; + + if(unlikely(current_event->disabled)) continue; + + for(cpu = 0; cpu < number_of_cpus; cpu++) { + + ssize_t read_size = read(current_event->fd[cpu], &read_result, sizeof(read_result)); + + if(likely(read_size == sizeof(read_result))) { + if (likely(read_result.time_running + && read_result.time_running != *(current_event->prev_time_running + cpu) + && (read_result.time_enabled / read_result.time_running < RUNNING_THRESHOLD))) { + current_event->value += (read_result.value - *(current_event->prev_value + cpu)) \ + * (read_result.time_enabled - *(current_event->prev_time_enabled + cpu)) \ + / (read_result.time_running - *(current_event->prev_time_running + cpu)); + } + + *(current_event->prev_value + cpu) = read_result.value; + *(current_event->prev_time_enabled + cpu) = read_result.time_enabled; + *(current_event->prev_time_running + cpu) = read_result.time_running; + + current_event->updated = 1; + } + else { + error("Cannot update value for event %u", current_event->id); + return 1; + } + } + + if(unlikely(debug)) fprintf(stderr, "perf.plugin: successfully read event id = %u, value = %"PRIu64"\n", current_event->id, current_event->value); + } + + if(unlikely(perf_events[EV_ID_CPU_CYCLES].value == prev_cpu_cycles_value)) + reenable_events(); + prev_cpu_cycles_value = perf_events[EV_ID_CPU_CYCLES].value; + + return 0; +} + +static void perf_send_metrics() { + static int // Hardware counters + cpu_cycles_chart_generated = 0, + instructions_chart_generated = 0, + ipc_chart_generated = 0, + branch_chart_generated = 0, + cache_chart_generated = 0, + bus_cycles_chart_generated = 0, + stalled_cycles_chart_generated = 0, + + // Software counters + migrations_chart_generated = 0, + alignment_chart_generated = 0, + emulation_chart_generated = 0, + + // Hardware cache counters + L1D_chart_generated = 0, + L1D_prefetch_chart_generated = 0, + L1I_chart_generated = 0, + LL_chart_generated = 0, + DTLB_chart_generated = 0, + ITLB_chart_generated = 0, + PBU_chart_generated = 0; + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_CPU_CYCLES].updated || perf_events[EV_ID_REF_CPU_CYCLES].updated)) { + if(unlikely(!cpu_cycles_chart_generated)) { + cpu_cycles_chart_generated = 1; + + printf("CHART %s.%s '' 'CPU cycles' 'cycles/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "cpu_cycles" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_CPU_CYCLES + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "cpu"); + printf("DIMENSION %s '' absolute 1 1\n", "ref_cpu"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "cpu_cycles" + ); + if(likely(perf_events[EV_ID_CPU_CYCLES].updated)) { + printf( + "SET %s = %lld\n" + , "cpu" + , (collected_number) perf_events[EV_ID_CPU_CYCLES].value + ); + } + if(likely(perf_events[EV_ID_REF_CPU_CYCLES].updated)) { + printf( + "SET %s = %lld\n" + , "ref_cpu" + , (collected_number) perf_events[EV_ID_REF_CPU_CYCLES].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_INSTRUCTIONS].updated)) { + if(unlikely(!instructions_chart_generated)) { + instructions_chart_generated = 1; + + printf("CHART %s.%s '' 'Instructions' 'instructions/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "instructions" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_INSTRUCTIONS + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "instructions"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "instructions" + ); + printf( + "SET %s = %lld\n" + , "instructions" + , (collected_number) perf_events[EV_ID_INSTRUCTIONS].value + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_INSTRUCTIONS].updated) && likely(perf_events[EV_ID_CPU_CYCLES].updated)) { + if(unlikely(!ipc_chart_generated)) { + ipc_chart_generated = 1; + + printf("CHART %s.%s '' '%s' 'instructions/cycle' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "instructions_per_cycle" + , "Instructions per Cycle(IPC)" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_IPC + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 100\n", "ipc"); + } + + printf("BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "instructions_per_cycle" + ); + + NETDATA_DOUBLE result = ((NETDATA_DOUBLE)perf_events[EV_ID_INSTRUCTIONS].value / + (NETDATA_DOUBLE)perf_events[EV_ID_CPU_CYCLES].value) * 100.0; + printf("SET %s = %lld\n" + , "ipc" + , (collected_number) result + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_BRANCH_INSTRUCTIONS].updated || perf_events[EV_ID_BRANCH_MISSES].updated)) { + if(unlikely(!branch_chart_generated)) { + branch_chart_generated = 1; + + printf("CHART %s.%s '' 'Branch instructions' 'instructions/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "branch_instructions" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_BRANCH_INSTRUCTIONS + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "instructions"); + printf("DIMENSION %s '' absolute 1 1\n", "misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "branch_instructions" + ); + if(likely(perf_events[EV_ID_BRANCH_INSTRUCTIONS].updated)) { + printf( + "SET %s = %lld\n" + , "instructions" + , (collected_number) perf_events[EV_ID_BRANCH_INSTRUCTIONS].value + ); + } + if(likely(perf_events[EV_ID_BRANCH_MISSES].updated)) { + printf( + "SET %s = %lld\n" + , "misses" + , (collected_number) perf_events[EV_ID_BRANCH_MISSES].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_CACHE_REFERENCES].updated || perf_events[EV_ID_CACHE_MISSES].updated)) { + if(unlikely(!cache_chart_generated)) { + cache_chart_generated = 1; + + printf("CHART %s.%s '' 'Cache operations' 'operations/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "cache" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_CACHE + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "references"); + printf("DIMENSION %s '' absolute 1 1\n", "misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "cache" + ); + if(likely(perf_events[EV_ID_CACHE_REFERENCES].updated)) { + printf( + "SET %s = %lld\n" + , "references" + , (collected_number) perf_events[EV_ID_CACHE_REFERENCES].value + ); + } + if(likely(perf_events[EV_ID_CACHE_MISSES].updated)) { + printf( + "SET %s = %lld\n" + , "misses" + , (collected_number) perf_events[EV_ID_CACHE_MISSES].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_BUS_CYCLES].updated)) { + if(unlikely(!bus_cycles_chart_generated)) { + bus_cycles_chart_generated = 1; + + printf("CHART %s.%s '' 'Bus cycles' 'cycles/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "bus_cycles" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_BUS_CYCLES + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "bus"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "bus_cycles" + ); + printf( + "SET %s = %lld\n" + , "bus" + , (collected_number) perf_events[EV_ID_BUS_CYCLES].value + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_STALLED_CYCLES_FRONTEND].updated || perf_events[EV_ID_STALLED_CYCLES_BACKEND].updated)) { + if(unlikely(!stalled_cycles_chart_generated)) { + stalled_cycles_chart_generated = 1; + + printf("CHART %s.%s '' 'Stalled frontend and backend cycles' 'cycles/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "stalled_cycles" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_FRONT_BACK_CYCLES + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "frontend"); + printf("DIMENSION %s '' absolute 1 1\n", "backend"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "stalled_cycles" + ); + if(likely(perf_events[EV_ID_STALLED_CYCLES_FRONTEND].updated)) { + printf( + "SET %s = %lld\n" + , "frontend" + , (collected_number) perf_events[EV_ID_STALLED_CYCLES_FRONTEND].value + ); + } + if(likely(perf_events[EV_ID_STALLED_CYCLES_BACKEND].updated)) { + printf( + "SET %s = %lld\n" + , "backend" + , (collected_number) perf_events[EV_ID_STALLED_CYCLES_BACKEND].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_CPU_MIGRATIONS].updated)) { + if(unlikely(!migrations_chart_generated)) { + migrations_chart_generated = 1; + + printf("CHART %s.%s '' 'CPU migrations' 'migrations' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "migrations" + , RRD_FAMILY_SW + , NETDATA_CHART_PRIO_PERF_MIGRATIONS + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "migrations"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "migrations" + ); + printf( + "SET %s = %lld\n" + , "migrations" + , (collected_number) perf_events[EV_ID_CPU_MIGRATIONS].value + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_ALIGNMENT_FAULTS].updated)) { + if(unlikely(!alignment_chart_generated)) { + alignment_chart_generated = 1; + + printf("CHART %s.%s '' 'Alignment faults' 'faults' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "alignment_faults" + , RRD_FAMILY_SW + , NETDATA_CHART_PRIO_PERF_ALIGNMENT + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "faults"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "alignment_faults" + ); + printf( + "SET %s = %lld\n" + , "faults" + , (collected_number) perf_events[EV_ID_ALIGNMENT_FAULTS].value + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_EMULATION_FAULTS].updated)) { + if(unlikely(!emulation_chart_generated)) { + emulation_chart_generated = 1; + + printf("CHART %s.%s '' 'Emulation faults' 'faults' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "emulation_faults" + , RRD_FAMILY_SW + , NETDATA_CHART_PRIO_PERF_EMULATION + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "faults"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "emulation_faults" + ); + printf( + "SET %s = %lld\n" + , "faults" + , (collected_number) perf_events[EV_ID_EMULATION_FAULTS].value + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_L1D_READ_ACCESS].updated || perf_events[EV_ID_L1D_READ_MISS].updated + || perf_events[EV_ID_L1D_WRITE_ACCESS].updated || perf_events[EV_ID_L1D_WRITE_MISS].updated)) { + if(unlikely(!L1D_chart_generated)) { + L1D_chart_generated = 1; + + printf("CHART %s.%s '' 'L1D cache operations' 'events/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "l1d_cache" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_L1D + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "read_access"); + printf("DIMENSION %s '' absolute 1 1\n", "read_misses"); + printf("DIMENSION %s '' absolute -1 1\n", "write_access"); + printf("DIMENSION %s '' absolute -1 1\n", "write_misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "l1d_cache" + ); + if(likely(perf_events[EV_ID_L1D_READ_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "read_access" + , (collected_number) perf_events[EV_ID_L1D_READ_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_L1D_READ_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "read_misses" + , (collected_number) perf_events[EV_ID_L1D_READ_MISS].value + ); + } + if(likely(perf_events[EV_ID_L1D_WRITE_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "write_access" + , (collected_number) perf_events[EV_ID_L1D_WRITE_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_L1D_WRITE_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "write_misses" + , (collected_number) perf_events[EV_ID_L1D_WRITE_MISS].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_L1D_PREFETCH_ACCESS].updated)) { + if(unlikely(!L1D_prefetch_chart_generated)) { + L1D_prefetch_chart_generated = 1; + + printf("CHART %s.%s '' 'L1D prefetch cache operations' 'prefetches/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "l1d_cache_prefetch" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_L1D_PREFETCH + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "prefetches"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "l1d_cache_prefetch" + ); + printf( + "SET %s = %lld\n" + , "prefetches" + , (collected_number) perf_events[EV_ID_L1D_PREFETCH_ACCESS].value + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_L1I_READ_ACCESS].updated || perf_events[EV_ID_L1I_READ_MISS].updated)) { + if(unlikely(!L1I_chart_generated)) { + L1I_chart_generated = 1; + + printf("CHART %s.%s '' 'L1I cache operations' 'events/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "l1i_cache" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_L1I + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "read_access"); + printf("DIMENSION %s '' absolute 1 1\n", "read_misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "l1i_cache" + ); + if(likely(perf_events[EV_ID_L1I_READ_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "read_access" + , (collected_number) perf_events[EV_ID_L1I_READ_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_L1I_READ_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "read_misses" + , (collected_number) perf_events[EV_ID_L1I_READ_MISS].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_LL_READ_ACCESS].updated || perf_events[EV_ID_LL_READ_MISS].updated + || perf_events[EV_ID_LL_WRITE_ACCESS].updated || perf_events[EV_ID_LL_WRITE_MISS].updated)) { + if(unlikely(!LL_chart_generated)) { + LL_chart_generated = 1; + + printf("CHART %s.%s '' 'LL cache operations' 'events/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "ll_cache" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_LL + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "read_access"); + printf("DIMENSION %s '' absolute 1 1\n", "read_misses"); + printf("DIMENSION %s '' absolute -1 1\n", "write_access"); + printf("DIMENSION %s '' absolute -1 1\n", "write_misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "ll_cache" + ); + if(likely(perf_events[EV_ID_LL_READ_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "read_access" + , (collected_number) perf_events[EV_ID_LL_READ_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_LL_READ_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "read_misses" + , (collected_number) perf_events[EV_ID_LL_READ_MISS].value + ); + } + if(likely(perf_events[EV_ID_LL_WRITE_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "write_access" + , (collected_number) perf_events[EV_ID_LL_WRITE_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_LL_WRITE_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "write_misses" + , (collected_number) perf_events[EV_ID_LL_WRITE_MISS].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_DTLB_READ_ACCESS].updated || perf_events[EV_ID_DTLB_READ_MISS].updated + || perf_events[EV_ID_DTLB_WRITE_ACCESS].updated || perf_events[EV_ID_DTLB_WRITE_MISS].updated)) { + if(unlikely(!DTLB_chart_generated)) { + DTLB_chart_generated = 1; + + printf("CHART %s.%s '' 'DTLB cache operations' 'events/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "dtlb_cache" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_DTLB + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "read_access"); + printf("DIMENSION %s '' absolute 1 1\n", "read_misses"); + printf("DIMENSION %s '' absolute -1 1\n", "write_access"); + printf("DIMENSION %s '' absolute -1 1\n", "write_misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "dtlb_cache" + ); + if(likely(perf_events[EV_ID_DTLB_READ_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "read_access" + , (collected_number) perf_events[EV_ID_DTLB_READ_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_DTLB_READ_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "read_misses" + , (collected_number) perf_events[EV_ID_DTLB_READ_MISS].value + ); + } + if(likely(perf_events[EV_ID_DTLB_WRITE_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "write_access" + , (collected_number) perf_events[EV_ID_DTLB_WRITE_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_DTLB_WRITE_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "write_misses" + , (collected_number) perf_events[EV_ID_DTLB_WRITE_MISS].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_ITLB_READ_ACCESS].updated || perf_events[EV_ID_ITLB_READ_MISS].updated)) { + if(unlikely(!ITLB_chart_generated)) { + ITLB_chart_generated = 1; + + printf("CHART %s.%s '' 'ITLB cache operations' 'events/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "itlb_cache" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_ITLB + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "read_access"); + printf("DIMENSION %s '' absolute 1 1\n", "read_misses"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "itlb_cache" + ); + if(likely(perf_events[EV_ID_ITLB_READ_ACCESS].updated)) { + printf( + "SET %s = %lld\n" + , "read_access" + , (collected_number) perf_events[EV_ID_ITLB_READ_ACCESS].value + ); + } + if(likely(perf_events[EV_ID_ITLB_READ_MISS].updated)) { + printf( + "SET %s = %lld\n" + , "read_misses" + , (collected_number) perf_events[EV_ID_ITLB_READ_MISS].value + ); + } + printf("END\n"); + } + + // ------------------------------------------------------------------------ + + if(likely(perf_events[EV_ID_PBU_READ_ACCESS].updated)) { + if(unlikely(!PBU_chart_generated)) { + PBU_chart_generated = 1; + + printf("CHART %s.%s '' 'PBU cache operations' 'events/s' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "pbu_cache" + , RRD_FAMILY_CACHE + , NETDATA_CHART_PRIO_PERF_PBU + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 1\n", "read_access"); + } + + printf( + "BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "pbu_cache" + ); + printf( + "SET %s = %lld\n" + , "read_access" + , (collected_number) perf_events[EV_ID_PBU_READ_ACCESS].value + ); + printf("END\n"); + } +} + +void parse_command_line(int argc, char **argv) { + int i, plugin_enabled = 0; + + for(i = 1; i < argc ; i++) { + if(isdigit(*argv[i]) && !freq) { + int n = str2i(argv[i]); + if(n > 0 && n < 86400) { + freq = n; + continue; + } + } + else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { + printf("perf.plugin %s\n", VERSION); + exit(0); + } + else if(strcmp("all", argv[i]) == 0) { + struct perf_event *current_event = NULL; + + for(current_event = &perf_events[0]; current_event->id != EV_ID_END; current_event++) + current_event->disabled = 0; + + plugin_enabled = 1; + continue; + } + else if(strcmp("cycles", argv[i]) == 0) { + perf_events[EV_ID_CPU_CYCLES].disabled = 0; + perf_events[EV_ID_REF_CPU_CYCLES].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("instructions", argv[i]) == 0) { + perf_events[EV_ID_INSTRUCTIONS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("branch", argv[i]) == 0) { + perf_events[EV_ID_BRANCH_INSTRUCTIONS].disabled = 0; + perf_events[EV_ID_BRANCH_MISSES].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("cache", argv[i]) == 0) { + perf_events[EV_ID_CACHE_REFERENCES].disabled = 0; + perf_events[EV_ID_CACHE_MISSES].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("bus", argv[i]) == 0) { + perf_events[EV_ID_BUS_CYCLES].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("stalled", argv[i]) == 0) { + perf_events[EV_ID_STALLED_CYCLES_FRONTEND].disabled = 0; + perf_events[EV_ID_STALLED_CYCLES_BACKEND].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("migrations", argv[i]) == 0) { + perf_events[EV_ID_CPU_MIGRATIONS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("alignment", argv[i]) == 0) { + perf_events[EV_ID_ALIGNMENT_FAULTS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("emulation", argv[i]) == 0) { + perf_events[EV_ID_EMULATION_FAULTS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("L1D", argv[i]) == 0) { + perf_events[EV_ID_L1D_READ_ACCESS].disabled = 0; + perf_events[EV_ID_L1D_READ_MISS].disabled = 0; + perf_events[EV_ID_L1D_WRITE_ACCESS].disabled = 0; + perf_events[EV_ID_L1D_WRITE_MISS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("L1D-prefetch", argv[i]) == 0) { + perf_events[EV_ID_L1D_PREFETCH_ACCESS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("L1I", argv[i]) == 0) { + perf_events[EV_ID_L1I_READ_ACCESS].disabled = 0; + perf_events[EV_ID_L1I_READ_MISS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("LL", argv[i]) == 0) { + perf_events[EV_ID_LL_READ_ACCESS].disabled = 0; + perf_events[EV_ID_LL_READ_MISS].disabled = 0; + perf_events[EV_ID_LL_WRITE_ACCESS].disabled = 0; + perf_events[EV_ID_LL_WRITE_MISS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("DTLB", argv[i]) == 0) { + perf_events[EV_ID_DTLB_READ_ACCESS].disabled = 0; + perf_events[EV_ID_DTLB_READ_MISS].disabled = 0; + perf_events[EV_ID_DTLB_WRITE_ACCESS].disabled = 0; + perf_events[EV_ID_DTLB_WRITE_MISS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("ITLB", argv[i]) == 0) { + perf_events[EV_ID_ITLB_READ_ACCESS].disabled = 0; + perf_events[EV_ID_ITLB_READ_MISS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("PBU", argv[i]) == 0) { + perf_events[EV_ID_PBU_READ_ACCESS].disabled = 0; + plugin_enabled = 1; + continue; + } + else if(strcmp("debug", argv[i]) == 0) { + debug = 1; + continue; + } + else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { + fprintf(stderr, + "\n" + " netdata perf.plugin %s\n" + " Copyright (C) 2019 Netdata Inc.\n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " This program is a data collector plugin for netdata.\n" + "\n" + " Available command line options:\n" + "\n" + " COLLECTION_FREQUENCY data collection frequency in seconds\n" + " minimum: %d\n" + "\n" + " all enable all charts\n" + "\n" + " cycles enable CPU cycles chart\n" + "\n" + " instructions enable Instructions chart\n" + "\n" + " branch enable Branch instructions chart\n" + "\n" + " cache enable Cache operations chart\n" + "\n" + " bus enable Bus cycles chart\n" + "\n" + " stalled enable Stalled frontend and backend cycles chart\n" + "\n" + " migrations enable CPU migrations chart\n" + "\n" + " alignment enable Alignment faults chart\n" + "\n" + " emulation enable Emulation faults chart\n" + "\n" + " L1D enable L1D cache operations chart\n" + "\n" + " L1D-prefetch enable L1D prefetch cache operations chart\n" + "\n" + " L1I enable L1I cache operations chart\n" + "\n" + " LL enable LL cache operations chart\n" + "\n" + " DTLB enable DTLB cache operations chart\n" + "\n" + " ITLB enable ITLB cache operations chart\n" + "\n" + " PBU enable PBU cache operations chart\n" + "\n" + " debug enable verbose output\n" + " default: disabled\n" + "\n" + " -v\n" + " -V\n" + " --version print version and exit\n" + "\n" + " -h\n" + " --help print this message and exit\n" + "\n" + " For more information:\n" + " https://github.com/netdata/netdata/tree/master/collectors/perf.plugin\n" + "\n" + , VERSION + , update_every + ); + exit(1); + } + + error("ignoring parameter '%s'", argv[i]); + } + + if(!plugin_enabled){ + info("no charts enabled - nothing to do."); + printf("DISABLE\n"); + exit(1); + } +} + +int main(int argc, char **argv) { + clocks_init(); + + // ------------------------------------------------------------------------ + // initialization of netdata plugin + + program_name = "perf.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + parse_command_line(argc, argv); + + errno = 0; + + if(freq >= update_every) + update_every = freq; + else if(freq) + error("update frequency %d seconds is too small for PERF. Using %d.", freq, update_every); + + if(unlikely(debug)) fprintf(stderr, "perf.plugin: calling perf_init()\n"); + int perf = !perf_init(); + + // ------------------------------------------------------------------------ + // the main loop + + if(unlikely(debug)) fprintf(stderr, "perf.plugin: starting data collection\n"); + + time_t started_t = now_monotonic_sec(); + + size_t iteration; + usec_t step = update_every * USEC_PER_SEC; + + heartbeat_t hb; + heartbeat_init(&hb); + for(iteration = 0; 1; iteration++) { + usec_t dt = heartbeat_next(&hb, step); + + if(unlikely(netdata_exit)) break; + + if(unlikely(debug && iteration)) + fprintf(stderr, "perf.plugin: iteration %zu, dt %llu usec\n" + , iteration + , dt + ); + + if(likely(perf)) { + if(unlikely(debug)) fprintf(stderr, "perf.plugin: calling perf_collect()\n"); + perf = !perf_collect(); + + if(likely(perf)) { + if(unlikely(debug)) fprintf(stderr, "perf.plugin: calling perf_send_metrics()\n"); + perf_send_metrics(); + } + } + + fflush(stdout); + + // restart check (14400 seconds) + if(now_monotonic_sec() - started_t > 14400) break; + } + + info("process exiting"); + perf_free(); +} diff --git a/collectors/plugins.d/Makefile.am b/collectors/plugins.d/Makefile.am new file mode 100644 index 0000000..59250a9 --- /dev/null +++ b/collectors/plugins.d/Makefile.am @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/plugins.d/README.md b/collectors/plugins.d/README.md new file mode 100644 index 0000000..2ecf233 --- /dev/null +++ b/collectors/plugins.d/README.md @@ -0,0 +1,599 @@ + + +# External plugins overview + +`plugins.d` is the Netdata internal plugin that collects metrics +from external processes, thus allowing Netdata to use **external plugins**. + +## Provided External Plugins + +|plugin|language|O/S|description| +|:----:|:------:|:-:|:----------| +|[apps.plugin](/collectors/apps.plugin/README.md)|`C`|linux, freebsd|monitors the whole process tree on Linux and FreeBSD and breaks down system resource usage by **process**, **user** and **user group**.| +|[charts.d.plugin](/collectors/charts.d.plugin/README.md)|`BASH`|all|a **plugin orchestrator** for data collection modules written in `BASH` v4+.| +|[cups.plugin](/collectors/cups.plugin/README.md)|`C`|all|monitors **CUPS**| +|[fping.plugin](/collectors/fping.plugin/README.md)|`C`|all|measures network latency, jitter and packet loss between the monitored node and any number of remote network end points.| +|[ioping.plugin](/collectors/ioping.plugin/README.md)|`C`|all|measures disk latency.| +|[freeipmi.plugin](/collectors/freeipmi.plugin/README.md)|`C`|linux|collects metrics from enterprise hardware sensors, on Linux servers.| +|[nfacct.plugin](/collectors/nfacct.plugin/README.md)|`C`|linux|collects netfilter firewall, connection tracker and accounting metrics using `libmnl` and `libnetfilter_acct`.| +|[xenstat.plugin](/collectors/xenstat.plugin/README.md)|`C`|linux|collects XenServer and XCP-ng metrics using `lxenstat`.| +|[perf.plugin](/collectors/perf.plugin/README.md)|`C`|linux|collects CPU performance metrics using performance monitoring units (PMU).| +|[python.d.plugin](/collectors/python.d.plugin/README.md)|`python`|all|a **plugin orchestrator** for data collection modules written in `python` v2 or v3 (both are supported).| +|[slabinfo.plugin](/collectors/slabinfo.plugin/README.md)|`C`|linux|collects kernel internal cache objects (SLAB) metrics.| + +Plugin orchestrators may also be described as **modular plugins**. They are modular since they accept custom made modules to be included. Writing modules for these plugins is easier than accessing the native Netdata API directly. You will find modules already available for each orchestrator under the directory of the particular modular plugin (e.g. under python.d.plugin for the python orchestrator). +Each of these modular plugins has each own methods for defining modules. Please check the examples and their documentation. + +## Motivation + +This plugin allows Netdata to use **external plugins** for data collection: + +1. external data collection plugins may be written in any computer language. + +2. external data collection plugins may use O/S capabilities or `setuid` to + run with escalated privileges (compared to the `netdata` daemon). + The communication between the external plugin and Netdata is unidirectional + (from the plugin to Netdata), so that Netdata cannot manipulate an external + plugin running with escalated privileges. + +## Operation + +Each of the external plugins is expected to run forever. +Netdata will start it when it starts and stop it when it exits. + +If the external plugin exits or crashes, Netdata will log an error. +If the external plugin exits or crashes without pushing metrics to Netdata, Netdata will not start it again. + +- Plugins that exit with any value other than zero, will be disabled. Plugins that exit with zero, will be restarted after some time. +- Plugins may also be disabled by Netdata if they output things that Netdata does not understand. + +The `stdout` of external plugins is connected to Netdata to receive metrics, +with the API defined below. + +The `stderr` of external plugins is connected to Netdata's `error.log`. + +Plugins can create any number of charts with any number of dimensions each. Each chart can have its own characteristics independently of the others generated by the same plugin. For example, one chart may have an update frequency of 1 second, another may have 5 seconds and a third may have 10 seconds. + +## Configuration + +Netdata will supply the environment variables `NETDATA_USER_CONFIG_DIR` (for user supplied) and `NETDATA_STOCK_CONFIG_DIR` (for Netdata supplied) configuration files to identify the directory where configuration files are stored. It is up to the plugin to read the configuration it needs. + +The `netdata.conf` section `[plugins]` section contains a list of all the plugins found at the system where Netdata runs, with a boolean setting to enable them or not. + +Example: + +``` +[plugins] + # enable running new plugins = yes + # check for new plugins every = 60 + + # charts.d = yes + # fping = yes + # ioping = yes + # python.d = yes +``` + +The setting `enable running new plugins` sets the default behavior for all external plugins. It can be +overridden for distinct plugins by modifying the appropriate plugin value configuration to either `yes` or `no`. + +The setting `check for new plugins every` sets the interval between scans of the directory +`/usr/libexec/netdata/plugins.d`. New plugins can be added any time, and Netdata will detect them in a timely manner. + +For each of the external plugins enabled, another `netdata.conf` section +is created, in the form of `[plugin:NAME]`, where `NAME` is the name of the external plugin. +This section allows controlling the update frequency of the plugin and provide +additional command line arguments to it. + +For example, for `apps.plugin` the following section is available: + +``` +[plugin:apps] + # update every = 1 + # command options = +``` + +- `update every` controls the granularity of the external plugin. +- `command options` allows giving additional command line options to the plugin. + +Netdata will provide to the external plugins the environment variable `NETDATA_UPDATE_EVERY`, in seconds (the default is 1). This is the **minimum update frequency** for all charts. A plugin that is updating values more frequently than this, is just wasting resources. + +Netdata will call the plugin with just one command line parameter: the number of seconds the user requested this plugin to update its data (by default is also 1). + +Other than the above, the plugin configuration is up to the plugin. + +Keep in mind, that the user may use Netdata configuration to overwrite chart and dimension parameters. This is transparent to the plugin. + +### Autoconfiguration + +Plugins should attempt to autoconfigure themselves when possible. + +For example, if your plugin wants to monitor `squid`, you can search for it on port `3128` or `8080`. If any succeeds, you can proceed. If it fails you can output an error (on stderr) saying that you cannot find `squid` running and giving instructions about the plugin configuration. Then you can stop (exit with non-zero value), so that Netdata will not attempt to start the plugin again. + +## External Plugins API + +Any program that can print a few values to its standard output can become a Netdata external plugin. + +Netdata parses lines starting with: + +- `CHART` - create or update a chart +- `DIMENSION` - add or update a dimension to the chart just created +- `VARIABLE` - define a variable (to be used in health calculations) +- `CLABEL` - add a label to a chart +- `CLABEL_COMMIT` - commit added labels to the chart +- `FUNCTION` - define a function that can be called later to execute it +- `BEGIN` - initialize data collection for a chart +- `SET` - set the value of a dimension for the initialized chart +- `END` - complete data collection for the initialized chart +- `FLUSH` - ignore the last collected values +- `DISABLE` - disable this plugin + +a single program can produce any number of charts with any number of dimensions each. + +Charts can be added any time (not just the beginning). + +### command line parameters + +The plugin **MUST** accept just **one** parameter: **the number of seconds it is +expected to update the values for its charts**. The value passed by Netdata +to the plugin is controlled via its configuration file (so there is no need +for the plugin to handle this configuration option). + +The external plugin can overwrite the update frequency. For example, the server may +request per second updates, but the plugin may ignore it and update its charts +every 5 seconds. + +### environment variables + +There are a few environment variables that are set by `netdata` and are +available for the plugin to use. + +|variable|description| +|:------:|:----------| +|`NETDATA_USER_CONFIG_DIR`|The directory where all Netdata-related user configuration should be stored. If the plugin requires custom user configuration, this is the place the user has saved it (normally under `/etc/netdata`).| +|`NETDATA_STOCK_CONFIG_DIR`|The directory where all Netdata -related stock configuration should be stored. If the plugin is shipped with configuration files, this is the place they can be found (normally under `/usr/lib/netdata/conf.d`).| +|`NETDATA_PLUGINS_DIR`|The directory where all Netdata plugins are stored.| +|`NETDATA_USER_PLUGINS_DIRS`|The list of directories where custom plugins are stored.| +|`NETDATA_WEB_DIR`|The directory where the web files of Netdata are saved.| +|`NETDATA_CACHE_DIR`|The directory where the cache files of Netdata are stored. Use this directory if the plugin requires a place to store data. A new directory should be created for the plugin for this purpose, inside this directory.| +|`NETDATA_LOG_DIR`|The directory where the log files are stored. By default the `stderr` output of the plugin will be saved in the `error.log` file of Netdata.| +|`NETDATA_HOST_PREFIX`|This is used in environments where system directories like `/sys` and `/proc` have to be accessed at a different path.| +|`NETDATA_DEBUG_FLAGS`|This is a number (probably in hex starting with `0x`), that enables certain Netdata debugging features. Check **\[[Tracing Options]]** for more information.| +|`NETDATA_UPDATE_EVERY`|The minimum number of seconds between chart refreshes. This is like the **internal clock** of Netdata (it is user configurable, defaulting to `1`). There is no meaning for a plugin to update its values more frequently than this number of seconds.| + +### The output of the plugin + +The plugin should output instructions for Netdata to its output (`stdout`). Since this uses pipes, please make sure you flush stdout after every iteration. + +#### DISABLE + +`DISABLE` will disable this plugin. This will prevent Netdata from restarting the plugin. You can also exit with the value `1` to have the same effect. + +#### CHART + +`CHART` defines a new chart. + +the template is: + +> CHART type.id name title units \[family \[context \[charttype \[priority \[update_every \[options \[plugin [module]]]]]]]] + + where: + +- `type.id` + + uniquely identifies the chart, + this is what will be needed to add values to the chart + + the `type` part controls the menu the charts will appear in + +- `name` + + is the name that will be presented to the user instead of `id` in `type.id`. This means that only the `id` part of + `type.id` is changed. When a name has been given, the chart is indexed (and can be referred) as both `type.id` and + `type.name`. You can set name to `''`, or `null`, or `(null)` to disable it. If a chart with the same name already + exists, a serial number is automatically attached to the name to avoid naming collisions. + +- `title` + + the text above the chart + +- `units` + + the label of the vertical axis of the chart, + all dimensions added to a chart should have the same units + of measurement + +- `family` + + is used to group charts together + (for example all eth0 charts should say: eth0), + if empty or missing, the `id` part of `type.id` will be used + + this controls the sub-menu on the dashboard + +- `context` + + the context is giving the template of the chart. For example, if multiple charts present the same information for a different family, they should have the same `context` + + this is used for looking up rendering information for the chart (colors, sizes, informational texts) and also apply alarms to it + +- `charttype` + + one of `line`, `area` or `stacked`, + if empty or missing, the `line` will be used + +- `priority` + + is the relative priority of the charts as rendered on the web page, + lower numbers make the charts appear before the ones with higher numbers, + if empty or missing, `1000` will be used + +- `update_every` + + overwrite the update frequency set by the server, + if empty or missing, the user configured value will be used + +- `options` + + a space separated list of options, enclosed in quotes. 4 options are currently supported: `obsolete` to mark a chart as obsolete (Netdata will hide it and delete it after some time), `detail` to mark a chart as insignificant (this may be used by dashboards to make the charts smaller, or somehow visualize properly a less important chart), `store_first` to make Netdata store the first collected value, assuming there was an invisible previous value set to zero (this is used by statsd charts - if the first data collected value of incremental dimensions is not zero based, unrealistic spikes will appear with this option set) and `hidden` to perform all operations on a chart, but do not offer it on dashboards (the chart will be send to external databases). `CHART` options have been added in Netdata v1.7 and the `hidden` option was added in 1.10. + +- `plugin` and `module` + + both are just names that are used to let the user identify the plugin and the module that generated the chart. If `plugin` is unset or empty, Netdata will automatically set the filename of the plugin that generated the chart. `module` has not default. + +#### DIMENSION + +`DIMENSION` defines a new dimension for the chart + +the template is: + +> DIMENSION id \[name \[algorithm \[multiplier \[divisor [options]]]]] + + where: + +- `id` + + the `id` of this dimension (it is a text value, not numeric), + this will be needed later to add values to the dimension + + We suggest to avoid using `.` in dimension ids. External databases expect metrics to be `.` separated and people will get confused if a dimension id contains a dot. + +- `name` + + the name of the dimension as it will appear at the legend of the chart, + if empty or missing the `id` will be used + +- `algorithm` + + one of: + + - `absolute` + + the value is to drawn as-is (interpolated to second boundary), + if `algorithm` is empty, invalid or missing, `absolute` is used + + - `incremental` + + the value increases over time, + the difference from the last value is presented in the chart, + the server interpolates the value and calculates a per second figure + + - `percentage-of-absolute-row` + + the % of this value compared to the total of all dimensions + + - `percentage-of-incremental-row` + + the % of this value compared to the incremental total of + all dimensions + +- `multiplier` + + an integer value to multiply the collected value, + if empty or missing, `1` is used + +- `divisor` + + an integer value to divide the collected value, + if empty or missing, `1` is used + +- `options` + + a space separated list of options, enclosed in quotes. Options supported: `obsolete` to mark a dimension as obsolete (Netdata will delete it after some time) and `hidden` to make this dimension hidden, it will take part in the calculations but will not be presented in the chart. + +#### VARIABLE + +> VARIABLE [SCOPE] name = value + +`VARIABLE` defines a variable that can be used in alarms. This is to used for setting constants (like the max connections a server may accept). + +Variables support 2 scopes: + +- `GLOBAL` or `HOST` to define the variable at the host level. +- `LOCAL` or `CHART` to define the variable at the chart level. Use chart-local variables when the same variable may exist for different charts (i.e. Netdata monitors 2 mysql servers, and you need to set the `max_connections` each server accepts). Using chart-local variables is the ideal to build alarm templates. + +The position of the `VARIABLE` line, sets its default scope (in case you do not specify a scope). So, defining a `VARIABLE` before any `CHART`, or between `END` and `BEGIN` (outside any chart), sets `GLOBAL` scope, while defining a `VARIABLE` just after a `CHART` or a `DIMENSION`, or within the `BEGIN` - `END` block of a chart, sets `LOCAL` scope. + +These variables can be set and updated at any point. + +Variable names should use alphanumeric characters, the `.` and the `_`. + +The `value` is floating point (Netdata used `long double`). + +Variables are transferred to upstream Netdata servers (streaming and database replication). + +#### CLABEL + +> CLABEL name value source + +`CLABEL` defines a label used to organize and identify a chart. + +Name and value accept characters according to the following table: + +| Character | Symbol | Label Name | Label Value | +|---------------------|:------:|:----------:|:-----------:| +| UTF-8 character | UTF-8 | _ | keep | +| Lower case letter | [a-z] | keep | keep | +| Upper case letter | [A-Z] | keep | [a-z] | +| Digit | [0-9] | keep | keep | +| Underscore | _ | keep | keep | +| Minus | - | keep | keep | +| Plus | + | _ | keep | +| Colon | : | _ | keep | +| Semicolon | ; | _ | : | +| Equal | = | _ | : | +| Period | . | keep | keep | +| Comma | , | . | . | +| Slash | / | keep | keep | +| Backslash | \ | / | / | +| At | @ | _ | keep | +| Space | ' ' | _ | keep | +| Opening parenthesis | ( | _ | keep | +| Closing parenthesis | ) | _ | keep | +| Anything else | | _ | _ | + +The `source` is an integer field that can have the following values: +- `1`: The value was set automatically. +- `2`: The value was set manually. +- `4`: This is a K8 label. +- `8`: This is a label defined using `netdata` agent cloud link. + +#### CLABEL_COMMIT + +`CLABEL_COMMIT` indicates that all labels were defined and the chart can be updated. + +#### FUNCTION + +> FUNCTION [GLOBAL] "name and parameters of the function" timeout "help string for users" + +A function can be used by users to ask for more information from the collector. Netdata maintains a registry of functions in 2 levels: + +- per node +- per chart + +Both node and chart functions are exactly the same, but chart functions allow Netdata to relate functions with charts and therefore present a context sensitive menu of functions related to the chart the user is using. + +A function is identified by a string. The allowed characters in the function definition are: + +| Character | Symbol | In Functions | +|-------------------|:------:|:------------:| +| UTF-8 character | UTF-8 | keep | +| Lower case letter | [a-z] | keep | +| Upper case letter | [A-Z] | keep | +| Digit | [0-9] | keep | +| Underscore | _ | keep | +| Comma | , | keep | +| Minus | - | keep | +| Period | . | keep | +| Colon | : | keep | +| Slash | / | keep | +| Space | ' ' | keep | +| Semicolon | ; | : | +| Equal | = | : | +| Backslash | \ | / | +| Anything else | | _ | + +Uses can get a list of all the registered functions using the `/api/v1/functions` end point of Netdata. + +Users can call functions using the `/api/v1/function` end point of Netdata. +Once a function is called, the plugin will receive at its standard input a command that looks like this: + +> FUNCTION transaction_id timeout "name and parameters of the function" + +The plugin is expected to parse and validate `name and parameters of the function`. Netdata allows users to edit this string, append more parameters or even change the ones the plugin originally exposed. To minimize the security risk, Netdata guarantees that only the characters shown above are accepted in function definitions, but still the plugin should carefully inspect the `name and parameters of the function` to ensure that it is valid and not harmful. + +If the plugin rejects the request, it should respond with this: + +``` +FUNCTION_RESULT_BEGIN transaction_id 400 application/json +{ + "status": 400, + "error_message": "description of the rejection reasons" +} +FUNCTION_RESULT_END +``` + +If the plugin prepares a response, it should send (via its standard output, together with the collected data, but not interleaved with them): + +> FUNCTION_RESULT_BEGIN transaction_id http_error_code content_type expiration + +Where: + + - `transaction_id` is the transaction id that Netdata sent for this function execution + - `http_error` is the http error code Netdata should respond with, 200 is the "ok" response + - `content_type` is the content type of the response + - `expiration` is the absolute timestamp (number, unix epoch) this response expires + +Immediately after this, all text is assumed to be the response content. +The content is text and line oriented. The maximum line length accepted is 15kb. Longer lines will be truncated. +The type of the context itself depends on the plugin and the UI. + +To terminate the message, Netdata seeks a line with just this: + +> FUNCTION_RESULT_END + +This defines the end of the message. `FUNCTION_RESULT_END` should appear in a line alone, without any other text, so it is wise to add `\n` before and after it. + +After this line, Netdata resumes processing collected metrics from the plugin. + +## Data collection + +data collection is defined as a series of `BEGIN` -> `SET` -> `END` lines + +> BEGIN type.id [microseconds] + +- `type.id` + + is the unique identification of the chart (as given in `CHART`) + +- `microseconds` + + is the number of microseconds since the last update of the chart. It is optional. + + Under heavy system load, the system may have some latency transferring + data from the plugins to Netdata via the pipe. This number improves + accuracy significantly, since the plugin is able to calculate the + duration between its iterations better than Netdata. + + The first time the plugin is started, no microseconds should be given + to Netdata. + +> SET id = value + +- `id` + + is the unique identification of the dimension (of the chart just began) + +- `value` + + is the collected value, only integer values are collected. If you want to push fractional values, multiply this value by 100 or 1000 and set the `DIMENSION` divider to 1000. + +> END + + END does not take any parameters, it commits the collected values for all dimensions to the chart. If a dimensions was not `SET`, its value will be empty for this commit. + +More `SET` lines may appear to update all the dimensions of the chart. +All of them in one `BEGIN` -> `END` block. + +All `SET` lines within a single `BEGIN` -> `END` block have to refer to the +same chart. + +If more charts need to be updated, each chart should have its own +`BEGIN` -> `SET` -> `END` block. + +If, for any reason, a plugin has issued a `BEGIN` but wants to cancel it, +it can issue a `FLUSH`. The `FLUSH` command will instruct Netdata to ignore +all the values collected since the last `BEGIN` command. + +If a plugin does not behave properly (outputs invalid lines, or does not +follow these guidelines), will be disabled by Netdata. + +### collected values + +Netdata will collect any **signed** value in the 64bit range: +`-9.223.372.036.854.775.808` to `+9.223.372.036.854.775.807` + +If a value is not collected, leave it empty, like this: + +`SET id =` + +or do not output the line at all. + +## Modular Plugins + +1. **python**, use `python.d.plugin`, there are many examples in the [python.d + directory](/collectors/python.d.plugin/README.md) + + python is ideal for Netdata plugins. It is a simple, yet powerful way to collect data, it has a very small memory footprint, although it is not the most CPU efficient way to do it. + +2. **BASH**, use `charts.d.plugin`, there are many examples in the [charts.d + directory](/collectors/charts.d.plugin/README.md) + + BASH is the simplest scripting language for collecting values. It is the less efficient though in terms of CPU resources. You can use it to collect data quickly, but extensive use of it might use a lot of system resources. + +3. **C** + + Of course, C is the most efficient way of collecting data. This is why Netdata itself is written in C. + +## Writing Plugins Properly + +There are a few rules for writing plugins properly: + +1. Respect system resources + + Pay special attention to efficiency: + + - Initialize everything once, at the beginning. Initialization is not an expensive operation. Your plugin will most probably be started once and run forever. So, do whatever heavy operation is needed at the beginning, just once. + - Do the absolutely minimum while iterating to collect values repeatedly. + - If you need to connect to another server to collect values, avoid re-connects if possible. Connect just once, with keep-alive (for HTTP) enabled and collect values using the same connection. + - Avoid any CPU or memory heavy operation while collecting data. If you control memory allocation, avoid any memory allocation while iterating to collect values. + - Avoid running external commands when possible. If you are writing shell scripts avoid especially pipes (each pipe is another fork, a very expensive operation). + +2. The best way to iterate at a constant pace is this pseudo code: + +```js + var update_every = argv[1] * 1000; /* seconds * 1000 = milliseconds */ + + readConfiguration(); + + if(!verifyWeCanCollectValues()) { + print("DISABLE"); + exit(1); + } + + createCharts(); /* print CHART and DIMENSION statements */ + + var loops = 0; + var last_run = 0; + var next_run = 0; + var dt_since_last_run = 0; + var now = 0; + + while(true) { + /* find the current time in milliseconds */ + now = currentTimeStampInMilliseconds(); + + /* + * find the time of the next loop + * this makes sure we are always aligned + * with the Netdata daemon + */ + next_run = now - (now % update_every) + update_every; + + /* + * wait until it is time + * it is important to do it in a loop + * since many wait functions can be interrupted + */ + while( now < next_run ) { + sleepMilliseconds(next_run - now); + now = currentTimeStampInMilliseconds(); + } + + /* calculate the time passed since the last run */ + if ( loops > 0 ) + dt_since_last_run = (now - last_run) * 1000; /* in microseconds */ + + /* prepare for the next loop */ + last_run = now; + loops++; + + /* do your magic here to collect values */ + collectValues(); + + /* send the collected data to Netdata */ + printValues(dt_since_last_run); /* print BEGIN, SET, END statements */ + } +``` + + Using the above procedure, your plugin will be synchronized to start data collection on steps of `update_every`. There will be no need to keep track of latencies in data collection. + + Netdata interpolates values to second boundaries, so even if your plugin is not perfectly aligned it does not matter. Netdata will find out. When your plugin works in increments of `update_every`, there will be no gaps in the charts due to the possible cumulative micro-delays in data collection. Gaps will only appear if the data collection is really delayed. + +3. If you are not sure of memory leaks, exit every one hour. Netdata will re-start your process. + +4. If possible, try to autodetect if your plugin should be enabled, without any configuration. + + diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c new file mode 100644 index 0000000..79abc70 --- /dev/null +++ b/collectors/plugins.d/plugins_d.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugins_d.h" +#include "pluginsd_parser.h" + +char *plugin_directories[PLUGINSD_MAX_DIRECTORIES] = { NULL }; +struct plugind *pluginsd_root = NULL; + +inline size_t pluginsd_initialize_plugin_directories() +{ + char plugins_dirs[(FILENAME_MAX * 2) + 1]; + static char *plugins_dir_list = NULL; + + // Get the configuration entry + if (likely(!plugins_dir_list)) { + snprintfz(plugins_dirs, FILENAME_MAX * 2, "\"%s\" \"%s/custom-plugins.d\"", PLUGINS_DIR, CONFIG_DIR); + plugins_dir_list = strdupz(config_get(CONFIG_SECTION_DIRECTORIES, "plugins", plugins_dirs)); + } + + // Parse it and store it to plugin directories + return quoted_strings_splitter(plugins_dir_list, plugin_directories, PLUGINSD_MAX_DIRECTORIES, config_isspace, NULL, NULL, 0); +} + +static void pluginsd_worker_thread_cleanup(void *arg) +{ + struct plugind *cd = (struct plugind *)arg; + + if (cd->enabled && !cd->obsolete) { + cd->obsolete = 1; + + info("data collection thread exiting"); + + if (cd->pid) { + siginfo_t info; + info("killing child process pid %d", cd->pid); + if (killpid(cd->pid) != -1) { + info("waiting for child process pid %d to exit...", cd->pid); + waitid(P_PID, (id_t)cd->pid, &info, WEXITED); + } + cd->pid = 0; + } + } +} + +#define SERIAL_FAILURES_THRESHOLD 10 +static void pluginsd_worker_thread_handle_success(struct plugind *cd) +{ + if (likely(cd->successful_collections)) { + sleep((unsigned int)cd->update_every); + return; + } + + if (likely(cd->serial_failures <= SERIAL_FAILURES_THRESHOLD)) { + info( + "'%s' (pid %d) does not generate useful output but it reports success (exits with 0). %s.", + cd->fullfilename, cd->pid, + cd->enabled ? "Waiting a bit before starting it again." : "Will not start it again - it is now disabled."); + sleep((unsigned int)(cd->update_every * 10)); + return; + } + + if (cd->serial_failures > SERIAL_FAILURES_THRESHOLD) { + error( + "'%s' (pid %d) does not generate useful output, although it reports success (exits with 0)." + "We have tried to collect something %zu times - unsuccessfully. Disabling it.", + cd->fullfilename, cd->pid, cd->serial_failures); + cd->enabled = 0; + return; + } + + return; +} + +static void pluginsd_worker_thread_handle_error(struct plugind *cd, int worker_ret_code) +{ + if (worker_ret_code == -1) { + info("'%s' (pid %d) was killed with SIGTERM. Disabling it.", cd->fullfilename, cd->pid); + cd->enabled = 0; + return; + } + + if (!cd->successful_collections) { + error( + "'%s' (pid %d) exited with error code %d and haven't collected any data. Disabling it.", cd->fullfilename, + cd->pid, worker_ret_code); + cd->enabled = 0; + return; + } + + if (cd->serial_failures <= SERIAL_FAILURES_THRESHOLD) { + error( + "'%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times). %s", + cd->fullfilename, cd->pid, worker_ret_code, cd->successful_collections, + cd->enabled ? "Waiting a bit before starting it again." : "Will not start it again - it is disabled."); + sleep((unsigned int)(cd->update_every * 10)); + return; + } + + if (cd->serial_failures > SERIAL_FAILURES_THRESHOLD) { + error( + "'%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times)." + "We tried to restart it %zu times, but it failed to generate data. Disabling it.", + cd->fullfilename, cd->pid, worker_ret_code, cd->successful_collections, cd->serial_failures); + cd->enabled = 0; + return; + } + + return; +} +#undef SERIAL_FAILURES_THRESHOLD + +void *pluginsd_worker_thread(void *arg) +{ + worker_register("PLUGINSD"); + + netdata_thread_cleanup_push(pluginsd_worker_thread_cleanup, arg); + + struct plugind *cd = (struct plugind *)arg; + + cd->obsolete = 0; + size_t count = 0; + + while (!netdata_exit) { + FILE *fp_child_input = NULL; + FILE *fp_child_output = netdata_popen(cd->cmd, &cd->pid, &fp_child_input); + if (unlikely(!fp_child_input || !fp_child_output)) { + error("Cannot popen(\"%s\", \"r\").", cd->cmd); + break; + } + + info("connected to '%s' running on pid %d", cd->fullfilename, cd->pid); + count = pluginsd_process(localhost, cd, fp_child_input, fp_child_output, 0); + error("'%s' (pid %d) disconnected after %zu successful data collections (ENDs).", cd->fullfilename, cd->pid, count); + killpid(cd->pid); + + int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->pid); + + if (likely(worker_ret_code == 0)) + pluginsd_worker_thread_handle_success(cd); + else + pluginsd_worker_thread_handle_error(cd, worker_ret_code); + + cd->pid = 0; + if (unlikely(!cd->enabled)) + break; + } + worker_unregister(); + + netdata_thread_cleanup_pop(1); + return NULL; +} + +static void pluginsd_main_cleanup(void *data) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + info("cleaning up..."); + + struct plugind *cd; + for (cd = pluginsd_root; cd; cd = cd->next) { + if (cd->enabled && !cd->obsolete) { + info("stopping plugin thread: %s", cd->id); + netdata_thread_cancel(cd->thread); + } + } + + info("cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + + worker_unregister(); +} + +void *pluginsd_main(void *ptr) +{ + netdata_thread_cleanup_push(pluginsd_main_cleanup, ptr); + + int automatic_run = config_get_boolean(CONFIG_SECTION_PLUGINS, "enable running new plugins", 1); + int scan_frequency = (int)config_get_number(CONFIG_SECTION_PLUGINS, "check for new plugins every", 60); + if (scan_frequency < 1) + scan_frequency = 1; + + // disable some plugins by default + config_get_boolean(CONFIG_SECTION_PLUGINS, "slabinfo", CONFIG_BOOLEAN_NO); + + // store the errno for each plugins directory + // so that we don't log broken directories on each loop + int directory_errors[PLUGINSD_MAX_DIRECTORIES] = { 0 }; + + while (!netdata_exit) { + int idx; + const char *directory_name; + + for (idx = 0; idx < PLUGINSD_MAX_DIRECTORIES && (directory_name = plugin_directories[idx]); idx++) { + if (unlikely(netdata_exit)) + break; + + errno = 0; + DIR *dir = opendir(directory_name); + if (unlikely(!dir)) { + if (directory_errors[idx] != errno) { + directory_errors[idx] = errno; + error("cannot open plugins directory '%s'", directory_name); + } + continue; + } + + struct dirent *file = NULL; + while (likely((file = readdir(dir)))) { + if (unlikely(netdata_exit)) + break; + + debug(D_PLUGINSD, "examining file '%s'", file->d_name); + + if (unlikely(strcmp(file->d_name, ".") == 0 || strcmp(file->d_name, "..") == 0)) + continue; + + int len = (int)strlen(file->d_name); + if (unlikely(len <= (int)PLUGINSD_FILE_SUFFIX_LEN)) + continue; + if (unlikely(strcmp(PLUGINSD_FILE_SUFFIX, &file->d_name[len - (int)PLUGINSD_FILE_SUFFIX_LEN]) != 0)) { + debug(D_PLUGINSD, "file '%s' does not end in '%s'", file->d_name, PLUGINSD_FILE_SUFFIX); + continue; + } + + char pluginname[CONFIG_MAX_NAME + 1]; + snprintfz(pluginname, CONFIG_MAX_NAME, "%.*s", (int)(len - PLUGINSD_FILE_SUFFIX_LEN), file->d_name); + int enabled = config_get_boolean(CONFIG_SECTION_PLUGINS, pluginname, automatic_run); + + if (unlikely(!enabled)) { + debug(D_PLUGINSD, "plugin '%s' is not enabled", file->d_name); + continue; + } + + // check if it runs already + struct plugind *cd; + for (cd = pluginsd_root; cd; cd = cd->next) + if (unlikely(strcmp(cd->filename, file->d_name) == 0)) + break; + + if (likely(cd && !cd->obsolete)) { + debug(D_PLUGINSD, "plugin '%s' is already running", cd->filename); + continue; + } + + // it is not running + // allocate a new one, or use the obsolete one + if (unlikely(!cd)) { + cd = callocz(sizeof(struct plugind), 1); + + snprintfz(cd->id, CONFIG_MAX_NAME, "plugin:%s", pluginname); + + strncpyz(cd->filename, file->d_name, FILENAME_MAX); + snprintfz(cd->fullfilename, FILENAME_MAX, "%s/%s", directory_name, cd->filename); + + cd->enabled = enabled; + cd->update_every = (int)config_get_number(cd->id, "update every", localhost->rrd_update_every); + cd->started_t = now_realtime_sec(); + + char *def = ""; + snprintfz( + cd->cmd, PLUGINSD_CMD_MAX, "exec %s %d %s", cd->fullfilename, cd->update_every, + config_get(cd->id, "command options", def)); + + // link it + if (likely(pluginsd_root)) + cd->next = pluginsd_root; + pluginsd_root = cd; + + // it is not currently running + cd->obsolete = 1; + + if (cd->enabled) { + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "PLUGINSD[%s]", pluginname); + // spawn a new thread for it + netdata_thread_create( + &cd->thread, tag, NETDATA_THREAD_OPTION_DEFAULT, pluginsd_worker_thread, cd); + } + } + } + + closedir(dir); + } + + sleep((unsigned int)scan_frequency); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/plugins.d/plugins_d.h b/collectors/plugins.d/plugins_d.h new file mode 100644 index 0000000..a8acf03 --- /dev/null +++ b/collectors/plugins.d/plugins_d.h @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PLUGINS_D_H +#define NETDATA_PLUGINS_D_H 1 + +#include "daemon/common.h" + +#define PLUGINSD_FILE_SUFFIX ".plugin" +#define PLUGINSD_FILE_SUFFIX_LEN strlen(PLUGINSD_FILE_SUFFIX) +#define PLUGINSD_CMD_MAX (FILENAME_MAX*2) +#define PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH 0 + +#define PLUGINSD_KEYWORD_CHART "CHART" +#define PLUGINSD_KEYWORD_CHART_DEFINITION_END "CHART_DEFINITION_END" +#define PLUGINSD_KEYWORD_DIMENSION "DIMENSION" +#define PLUGINSD_KEYWORD_BEGIN "BEGIN" +#define PLUGINSD_KEYWORD_SET "SET" +#define PLUGINSD_KEYWORD_END "END" +#define PLUGINSD_KEYWORD_FLUSH "FLUSH" +#define PLUGINSD_KEYWORD_DISABLE "DISABLE" +#define PLUGINSD_KEYWORD_VARIABLE "VARIABLE" +#define PLUGINSD_KEYWORD_LABEL "LABEL" +#define PLUGINSD_KEYWORD_OVERWRITE "OVERWRITE" +#define PLUGINSD_KEYWORD_CLABEL "CLABEL" +#define PLUGINSD_KEYWORD_CLABEL_COMMIT "CLABEL_COMMIT" +#define PLUGINSD_KEYWORD_FUNCTION "FUNCTION" +#define PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN "FUNCTION_RESULT_BEGIN" +#define PLUGINSD_KEYWORD_FUNCTION_RESULT_END "FUNCTION_RESULT_END" + +#define PLUGINSD_KEYWORD_REPLAY_CHART "REPLAY_CHART" +#define PLUGINSD_KEYWORD_REPLAY_BEGIN "RBEGIN" +#define PLUGINSD_KEYWORD_REPLAY_SET "RSET" +#define PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE "RDSTATE" +#define PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE "RSSTATE" +#define PLUGINSD_KEYWORD_REPLAY_END "REND" + +#define PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT 10 // seconds + +#define PLUGINSD_LINE_MAX_SSL_READ 512 + +#define PLUGINSD_MAX_WORDS 20 + +#define PLUGINSD_MAX_DIRECTORIES 20 +extern char *plugin_directories[PLUGINSD_MAX_DIRECTORIES]; + +struct plugind { + char id[CONFIG_MAX_NAME+1]; // config node id + + char filename[FILENAME_MAX+1]; // just the filename + char fullfilename[FILENAME_MAX+1]; // with path + char cmd[PLUGINSD_CMD_MAX+1]; // the command that it executes + + volatile pid_t pid; + netdata_thread_t thread; + + size_t successful_collections; // the number of times we have seen + // values collected from this plugin + + size_t serial_failures; // the number of times the plugin started + // without collecting values + + int update_every; // the plugin default data collection frequency + volatile sig_atomic_t obsolete; // do not touch this structure after setting this to 1 + volatile sig_atomic_t enabled; // if this is enabled or not + + time_t started_t; + uint32_t capabilities; // follows the same principles as streaming capabilities + struct plugind *next; +}; + +extern struct plugind *pluginsd_root; + +size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugin_input, FILE *fp_plugin_output, int trust_durations); + +size_t pluginsd_initialize_plugin_directories(); + + + +#define pluginsd_function_result_begin_to_buffer(wb, transaction, code, content_type, expires) \ + buffer_sprintf(wb \ + , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \ + , (transaction) ? (transaction) : "" \ + , (int)(code) \ + , (content_type) ? (content_type) : "" \ + , (long int)(expires) \ + ) + +#define pluginsd_function_result_end_to_buffer(wb) \ + buffer_strcat(wb, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n") + +#define pluginsd_function_result_begin_to_stdout(transaction, code, content_type, expires) \ + fprintf(stdout \ + , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \ + , (transaction) ? (transaction) : "" \ + , (int)(code) \ + , (content_type) ? (content_type) : "" \ + , (long int)(expires) \ + ) + +#define pluginsd_function_result_end_to_stdout() \ + fprintf(stdout, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n") + +#endif /* NETDATA_PLUGINS_D_H */ diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c new file mode 100644 index 0000000..5501c12 --- /dev/null +++ b/collectors/plugins.d/pluginsd_parser.c @@ -0,0 +1,1360 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pluginsd_parser.h" + +#define LOG_FUNCTIONS false + +static int send_to_plugin(const char *txt, void *data) { + PARSER *parser = data; + + if(!txt || !*txt) + return 0; + +#ifdef ENABLE_HTTPS + struct netdata_ssl *ssl = parser->ssl_output; + if(ssl) { + if(ssl->conn && ssl->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) + return (int)netdata_ssl_write(ssl->conn, (void *)txt, strlen(txt)); + + error("PLUGINSD: cannot send command (SSL)"); + return -1; + } +#endif + + if(parser->fp_output) { + int bytes = fprintf(parser->fp_output, "%s", txt); + if(bytes <= 0) { + error("PLUGINSD: cannot send command (FILE)"); + return -2; + } + fflush(parser->fp_output); + return bytes; + } + + if(parser->fd != -1) { + size_t bytes = 0; + size_t total = strlen(txt); + ssize_t sent; + + do { + sent = write(parser->fd, &txt[bytes], total - bytes); + if(sent <= 0) { + error("PLUGINSD: cannot send command (fd)"); + return -3; + } + bytes += sent; + } + while(bytes < total); + + return (int)bytes; + } + + error("PLUGINSD: cannot send command (no output socket/pipe/file given to plugins.d parser)"); + return -4; +} + +static inline RRDHOST *pluginsd_require_host_from_parent(void *user, const char *cmd) { + RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; + + if(unlikely(!host)) + error("PLUGINSD: command %s requires a host, but is not set.", cmd); + + return host; +} + +static inline RRDSET *pluginsd_require_chart_from_parent(void *user, const char *cmd, const char *parent_cmd) { + RRDSET *st = ((PARSER_USER_OBJECT *) user)->st; + + if(unlikely(!st)) + error("PLUGINSD: command %s requires a chart defined via command %s, but is not set.", cmd, parent_cmd); + + return st; +} + +static inline RRDDIM_ACQUIRED *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, const char *dimension, const char *cmd) { + if (unlikely(!dimension || !*dimension)) { + error("PLUGINSD: 'host:%s/chart:%s' got a %s, without a dimension.", + rrdhost_hostname(host), rrdset_id(st), cmd); + return NULL; + } + + RRDDIM_ACQUIRED *rda = rrddim_find_and_acquire(st, dimension); + + if (unlikely(!rda)) + error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s but dimension does not exist.", + rrdhost_hostname(host), rrdset_id(st), dimension, cmd); + + return rda; +} + +static inline RRDSET *pluginsd_find_chart(RRDHOST *host, const char *chart, const char *cmd) { + if (unlikely(!chart || !*chart)) { + error("PLUGINSD: 'host:%s' got a %s without a chart id.", + rrdhost_hostname(host), cmd); + return NULL; + } + + RRDSET *st = rrdset_find(host, chart); + if (unlikely(!st)) + error("PLUGINSD: 'host:%s/chart:%s' got a %s but chart does not exist.", + rrdhost_hostname(host), chart, cmd); + + return st; +} + +static inline PARSER_RC PLUGINSD_DISABLE_PLUGIN(void *user) { + ((PARSER_USER_OBJECT *) user)->enabled = 0; + return PARSER_RC_ERROR; +} + +PARSER_RC pluginsd_set(char **words, size_t num_words, void *user) +{ + char *dimension = get_word(words, num_words, 1); + char *value = get_word(words, num_words, 2); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_SET); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_SET, PLUGINSD_KEYWORD_CHART); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDDIM_ACQUIRED *rda = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET); + if(!rda) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDDIM *rd = rrddim_acquired_to_rrddim(rda); + + if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) + debug(D_PLUGINSD, "PLUGINSD: 'host:%s/chart:%s/dim:%s' SET is setting value to '%s'", + rrdhost_hostname(host), rrdset_id(st), dimension, value && *value ? value : "UNSET"); + + if (value && *value) + rrddim_set_by_pointer(st, rd, strtoll(value, NULL, 0)); + + rrddim_acquired_release(rda); + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_begin(char **words, size_t num_words, void *user) +{ + char *id = get_word(words, num_words, 1); + char *microseconds_txt = get_word(words, num_words, 2); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_BEGIN); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + ((PARSER_USER_OBJECT *)user)->st = st; + + usec_t microseconds = 0; + if (microseconds_txt && *microseconds_txt) + microseconds = str2ull(microseconds_txt); + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + if(st->replay.log_next_data_collection) { + st->replay.log_next_data_collection = false; + + internal_error(true, + "REPLAY: 'host:%s/chart:%s' first BEGIN after replication, last collected %llu, last updated %llu, microseconds %llu", + rrdhost_hostname(host), rrdset_id(st), + st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec, + st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec, + microseconds + ); + } +#endif + + if (likely(st->counter_done)) { + if (likely(microseconds)) { + if (((PARSER_USER_OBJECT *)user)->trust_durations) + rrdset_next_usec_unfiltered(st, microseconds); + else + rrdset_next_usec(st, microseconds); + } + else + rrdset_next(st); + } + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_end(char **words, size_t num_words, void *user) +{ + UNUSED(words); + UNUSED(num_words); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_END); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_END, PLUGINSD_KEYWORD_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) + debug(D_PLUGINSD, "requested an END on chart '%s'", rrdset_id(st)); + + ((PARSER_USER_OBJECT *) user)->st = NULL; + ((PARSER_USER_OBJECT *) user)->count++; + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, /* pending_rrdset_next = */ false); + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) +{ + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_CHART); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + char *type = get_word(words, num_words, 1); + char *name = get_word(words, num_words, 2); + char *title = get_word(words, num_words, 3); + char *units = get_word(words, num_words, 4); + char *family = get_word(words, num_words, 5); + char *context = get_word(words, num_words, 6); + char *chart = get_word(words, num_words, 7); + char *priority_s = get_word(words, num_words, 8); + char *update_every_s = get_word(words, num_words, 9); + char *options = get_word(words, num_words, 10); + char *plugin = get_word(words, num_words, 11); + char *module = get_word(words, num_words, 12); + + // parse the id from type + char *id = NULL; + if (likely(type && (id = strchr(type, '.')))) { + *id = '\0'; + id++; + } + + // make sure we have the required variables + if (unlikely((!type || !*type || !id || !*id))) { + error("PLUGINSD: 'host:%s' requested a CHART, without a type.id. Disabling it.", + rrdhost_hostname(host)); + + ((PARSER_USER_OBJECT *) user)->enabled = 0; + return PARSER_RC_ERROR; + } + + // parse the name, and make sure it does not include 'type.' + if (unlikely(name && *name)) { + // when data are streamed from child nodes + // name will be type.name + // so we have to remove 'type.' from name too + size_t len = strlen(type); + if (strncmp(type, name, len) == 0 && name[len] == '.') + name = &name[len + 1]; + + // if the name is the same with the id, + // or is just 'NULL', clear it. + if (unlikely(strcmp(name, id) == 0 || strcasecmp(name, "NULL") == 0 || strcasecmp(name, "(NULL)") == 0)) + name = NULL; + } + + int priority = 1000; + if (likely(priority_s && *priority_s)) + priority = str2i(priority_s); + + int update_every = ((PARSER_USER_OBJECT *) user)->cd->update_every; + if (likely(update_every_s && *update_every_s)) + update_every = str2i(update_every_s); + if (unlikely(!update_every)) + update_every = ((PARSER_USER_OBJECT *) user)->cd->update_every; + + RRDSET_TYPE chart_type = RRDSET_TYPE_LINE; + if (unlikely(chart)) + chart_type = rrdset_type_id(chart); + + if (unlikely(name && !*name)) + name = NULL; + if (unlikely(family && !*family)) + family = NULL; + if (unlikely(context && !*context)) + context = NULL; + if (unlikely(!title)) + title = ""; + if (unlikely(!units)) + units = "unknown"; + + debug( + D_PLUGINSD, + "creating chart type='%s', id='%s', name='%s', family='%s', context='%s', chart='%s', priority=%d, update_every=%d", + type, id, name ? name : "", family ? family : "", context ? context : "", rrdset_type_name(chart_type), + priority, update_every); + + RRDSET *st = NULL; + + st = rrdset_create( + host, type, id, name, family, context, title, units, + (plugin && *plugin) ? plugin : ((PARSER_USER_OBJECT *)user)->cd->filename, + module, priority, update_every, + chart_type); + + if (likely(st)) { + if (options && *options) { + if (strstr(options, "obsolete")) + rrdset_is_obsolete(st); + else + rrdset_isnot_obsolete(st); + + if (strstr(options, "detail")) + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + else + rrdset_flag_clear(st, RRDSET_FLAG_DETAIL); + + if (strstr(options, "hidden")) + rrdset_flag_set(st, RRDSET_FLAG_HIDDEN); + else + rrdset_flag_clear(st, RRDSET_FLAG_HIDDEN); + + if (strstr(options, "store_first")) + rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST); + else + rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST); + } else { + rrdset_isnot_obsolete(st); + rrdset_flag_clear(st, RRDSET_FLAG_DETAIL); + rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST); + } + } + ((PARSER_USER_OBJECT *)user)->st = st; + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_words, void *user) +{ + const char *first_entry_txt = get_word(words, num_words, 1); + const char *last_entry_txt = get_word(words, num_words, 2); + const char *world_time_txt = get_word(words, num_words, 3); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_CHART_DEFINITION_END); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_CHART_DEFINITION_END, PLUGINSD_KEYWORD_CHART); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + time_t first_entry_child = (first_entry_txt && *first_entry_txt) ? (time_t)str2ul(first_entry_txt) : 0; + time_t last_entry_child = (last_entry_txt && *last_entry_txt) ? (time_t)str2ul(last_entry_txt) : 0; + time_t child_world_time = (world_time_txt && *world_time_txt) ? (time_t)str2ul(world_time_txt) : now_realtime_sec(); + + if((first_entry_child != 0 || last_entry_child != 0) && (first_entry_child == 0 || last_entry_child == 0)) + error("PLUGINSD REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_CHART_DEFINITION_END " with malformed timings (first time %ld, last time %ld, world time %ld).", + rrdhost_hostname(host), rrdset_id(st), + first_entry_child, last_entry_child, child_world_time); + + bool ok = true; + if(!rrdset_flag_check(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS)) { + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + st->replay.start_streaming = false; + st->replay.after = 0; + st->replay.before = 0; +#endif + + rrdset_flag_set(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS); + rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED); + rrdhost_receiver_replicating_charts_plus_one(st->rrdhost); + + PARSER *parser = ((PARSER_USER_OBJECT *)user)->parser; + ok = replicate_chart_request(send_to_plugin, parser, host, st, + first_entry_child, last_entry_child, child_world_time, + 0, 0); + } +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + else { + internal_error(true, "REPLAY: 'host:%s/chart:%s' not sending duplicate replication request", + rrdhost_hostname(st->rrdhost), rrdset_id(st)); + } +#endif + + return ok ? PARSER_RC_OK : PARSER_RC_ERROR; +} + +PARSER_RC pluginsd_dimension(char **words, size_t num_words, void *user) +{ + char *id = get_word(words, num_words, 1); + char *name = get_word(words, num_words, 2); + char *algorithm = get_word(words, num_words, 3); + char *multiplier_s = get_word(words, num_words, 4); + char *divisor_s = get_word(words, num_words, 5); + char *options = get_word(words, num_words, 6); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_DIMENSION); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_DIMENSION, PLUGINSD_KEYWORD_CHART); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + if (unlikely(!id)) { + error("PLUGINSD: 'host:%s/chart:%s' got a DIMENSION, without an id. Disabling it.", + rrdhost_hostname(host), st ? rrdset_id(st) : "UNSET"); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + if (unlikely(!st && !((PARSER_USER_OBJECT *) user)->st_exists)) { + error("PLUGINSD: 'host:%s' got a DIMENSION, without a CHART. Disabling it.", + rrdhost_hostname(host)); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + long multiplier = 1; + if (multiplier_s && *multiplier_s) { + multiplier = strtol(multiplier_s, NULL, 0); + if (unlikely(!multiplier)) + multiplier = 1; + } + + long divisor = 1; + if (likely(divisor_s && *divisor_s)) { + divisor = strtol(divisor_s, NULL, 0); + if (unlikely(!divisor)) + divisor = 1; + } + + if (unlikely(!algorithm || !*algorithm)) + algorithm = "absolute"; + + if (unlikely(st && rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) + debug( + D_PLUGINSD, + "creating dimension in chart %s, id='%s', name='%s', algorithm='%s', multiplier=%ld, divisor=%ld, hidden='%s'", + rrdset_id(st), id, name ? name : "", rrd_algorithm_name(rrd_algorithm_id(algorithm)), multiplier, divisor, + options ? options : ""); + + RRDDIM *rd = rrddim_add(st, id, name, multiplier, divisor, rrd_algorithm_id(algorithm)); + int unhide_dimension = 1; + + rrddim_option_clear(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS); + if (options && *options) { + if (strstr(options, "obsolete") != NULL) + rrddim_is_obsolete(st, rd); + else + rrddim_isnot_obsolete(st, rd); + + unhide_dimension = !strstr(options, "hidden"); + + if (strstr(options, "noreset") != NULL) + rrddim_option_set(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS); + if (strstr(options, "nooverflow") != NULL) + rrddim_option_set(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS); + } else + rrddim_isnot_obsolete(st, rd); + + if (likely(unhide_dimension)) { + rrddim_option_clear(rd, RRDDIM_OPTION_HIDDEN); + if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); + metaqueue_dimension_update_flags(rd); + } + } + else { + rrddim_option_set(rd, RRDDIM_OPTION_HIDDEN); + if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN); + metaqueue_dimension_update_flags(rd); + } + } + + return PARSER_RC_OK; +} + +// ---------------------------------------------------------------------------- +// execution of functions + +struct inflight_function { + int code; + int timeout; + BUFFER *destination_wb; + STRING *function; + void (*callback)(BUFFER *wb, int code, void *callback_data); + void *callback_data; + usec_t timeout_ut; + usec_t started_ut; + usec_t sent_ut; +}; + +static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void *func, void *parser_ptr) { + struct inflight_function *pf = func; + + PARSER *parser = parser_ptr; + + // leave this code as default, so that when the dictionary is destroyed this will be sent back to the caller + pf->code = HTTP_RESP_GATEWAY_TIMEOUT; + + char buffer[2048 + 1]; + snprintfz(buffer, 2048, "FUNCTION %s %d \"%s\"\n", + dictionary_acquired_item_name(item), + pf->timeout, + string2str(pf->function)); + + // send the command to the plugin + int ret = send_to_plugin(buffer, parser); + + pf->sent_ut = now_realtime_usec(); + + if(ret < 0) { + error("FUNCTION: failed to send function to plugin, error %d", ret); + rrd_call_function_error(pf->destination_wb, "Failed to communicate with collector", HTTP_RESP_BACKEND_FETCH_FAILED); + } + else { + internal_error(LOG_FUNCTIONS, + "FUNCTION '%s' with transaction '%s' sent to collector (%d bytes, in %llu usec)", + string2str(pf->function), dictionary_acquired_item_name(item), ret, + pf->sent_ut - pf->started_ut); + } +} + +static bool inflight_functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, void *new_func, void *parser_ptr __maybe_unused) { + struct inflight_function *pf = new_func; + + error("PLUGINSD_PARSER: duplicate UUID on pending function '%s' detected. Ignoring the second one.", string2str(pf->function)); + pf->code = rrd_call_function_error(pf->destination_wb, "This request is already in progress", HTTP_RESP_BAD_REQUEST); + pf->callback(pf->destination_wb, pf->code, pf->callback_data); + string_freez(pf->function); + + return false; +} + +static void inflight_functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func, void *parser_ptr __maybe_unused) { + struct inflight_function *pf = func; + + internal_error(LOG_FUNCTIONS, + "FUNCTION '%s' result of transaction '%s' received from collector (%zu bytes, request %llu usec, response %llu usec)", + string2str(pf->function), dictionary_acquired_item_name(item), + buffer_strlen(pf->destination_wb), pf->sent_ut - pf->started_ut, now_realtime_usec() - pf->sent_ut); + + pf->callback(pf->destination_wb, pf->code, pf->callback_data); + string_freez(pf->function); +} + +void inflight_functions_init(PARSER *parser) { + parser->inflight.functions = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(parser->inflight.functions, inflight_functions_insert_callback, parser); + dictionary_register_delete_callback(parser->inflight.functions, inflight_functions_delete_callback, parser); + dictionary_register_conflict_callback(parser->inflight.functions, inflight_functions_conflict_callback, parser); +} + +static void inflight_functions_garbage_collect(PARSER *parser, usec_t now) { + parser->inflight.smaller_timeout = 0; + struct inflight_function *pf; + dfe_start_write(parser->inflight.functions, pf) { + if (pf->timeout_ut < now) { + internal_error(true, + "FUNCTION '%s' removing expired transaction '%s', after %llu usec.", + string2str(pf->function), pf_dfe.name, now - pf->started_ut); + + if(!buffer_strlen(pf->destination_wb) || pf->code == HTTP_RESP_OK) + pf->code = rrd_call_function_error(pf->destination_wb, + "Timeout waiting for collector response.", + HTTP_RESP_GATEWAY_TIMEOUT); + + dictionary_del(parser->inflight.functions, pf_dfe.name); + } + + else if(!parser->inflight.smaller_timeout || pf->timeout_ut < parser->inflight.smaller_timeout) + parser->inflight.smaller_timeout = pf->timeout_ut; + } + dfe_done(pf); +} + +// this is the function that is called from +// rrd_call_function_and_wait() and rrd_call_function_async() +static int pluginsd_execute_function_callback(BUFFER *destination_wb, int timeout, const char *function, void *collector_data, void (*callback)(BUFFER *wb, int code, void *callback_data), void *callback_data) { + PARSER *parser = collector_data; + + usec_t now = now_realtime_usec(); + + struct inflight_function tmp = { + .started_ut = now, + .timeout_ut = now + timeout * USEC_PER_SEC, + .destination_wb = destination_wb, + .timeout = timeout, + .function = string_strdupz(function), + .callback = callback, + .callback_data = callback_data, + }; + + uuid_t uuid; + uuid_generate_time(uuid); + + char key[UUID_STR_LEN]; + uuid_unparse_lower(uuid, key); + + dictionary_write_lock(parser->inflight.functions); + + // if there is any error, our dictionary callbacks will call the caller callback to notify + // the caller about the error - no need for error handling here. + dictionary_set(parser->inflight.functions, key, &tmp, sizeof(struct inflight_function)); + + if(!parser->inflight.smaller_timeout || tmp.timeout_ut < parser->inflight.smaller_timeout) + parser->inflight.smaller_timeout = tmp.timeout_ut; + + // garbage collect stale inflight functions + if(parser->inflight.smaller_timeout < now) + inflight_functions_garbage_collect(parser, now); + + dictionary_write_unlock(parser->inflight.functions); + + return HTTP_RESP_OK; +} + +PARSER_RC pluginsd_function(char **words, size_t num_words, void *user) +{ + bool global = false; + size_t i = 1; + if(num_words >= 2 && strcmp(get_word(words, num_words, 1), "GLOBAL") == 0) { + i++; + global = true; + } + + char *name = get_word(words, num_words, i++); + char *timeout_s = get_word(words, num_words, i++); + char *help = get_word(words, num_words, i++); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_FUNCTION); + if(!host) return PARSER_RC_ERROR; + + RRDSET *st = (global)?NULL:pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_FUNCTION, PLUGINSD_KEYWORD_CHART); + if(!st) global = true; + + if (unlikely(!timeout_s || !name || !help || (!global && !st))) { + error("PLUGINSD: 'host:%s/chart:%s' got a FUNCTION, without providing the required data (global = '%s', name = '%s', timeout = '%s', help = '%s'). Ignoring it.", + rrdhost_hostname(host), + st?rrdset_id(st):"(unset)", + global?"yes":"no", + name?name:"(unset)", + timeout_s?timeout_s:"(unset)", + help?help:"(unset)" + ); + return PARSER_RC_ERROR; + } + + int timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT; + if (timeout_s && *timeout_s) { + timeout = str2i(timeout_s); + if (unlikely(timeout <= 0)) + timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT; + } + + PARSER *parser = ((PARSER_USER_OBJECT *) user)->parser; + rrd_collector_add_function(host, st, name, timeout, help, false, pluginsd_execute_function_callback, parser); + + return PARSER_RC_OK; +} + +static void pluginsd_function_result_end(struct parser *parser, void *action_data) { + STRING *key = action_data; + if(key) + dictionary_del(parser->inflight.functions, string2str(key)); + string_freez(key); +} + +PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, void *user) +{ + char *key = get_word(words, num_words, 1); + char *status = get_word(words, num_words, 2); + char *format = get_word(words, num_words, 3); + char *expires = get_word(words, num_words, 4); + + if (unlikely(!key || !*key || !status || !*status || !format || !*format || !expires || !*expires)) { + error("got a " PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " without providing the required data (key = '%s', status = '%s', format = '%s', expires = '%s')." + , key ? key : "(unset)" + , status ? status : "(unset)" + , format ? format : "(unset)" + , expires ? expires : "(unset)" + ); + } + + int code = (status && *status) ? str2i(status) : 0; + if (code <= 0) + code = HTTP_RESP_BACKEND_RESPONSE_INVALID; + + time_t expiration = (expires && *expires) ? str2l(expires) : 0; + + PARSER *parser = ((PARSER_USER_OBJECT *) user)->parser; + + struct inflight_function *pf = NULL; + + if(key && *key) + pf = (struct inflight_function *)dictionary_get(parser->inflight.functions, key); + + if(!pf) { + error("got a " PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " for transaction '%s', but the transaction is not found.", key?key:"(unset)"); + } + else { + if(format && *format) + pf->destination_wb->contenttype = functions_format_to_content_type(format); + + pf->code = code; + + pf->destination_wb->expires = expiration; + if(expiration <= now_realtime_sec()) + buffer_no_cacheable(pf->destination_wb); + else + buffer_cacheable(pf->destination_wb); + } + + parser->defer.response = (pf) ? pf->destination_wb : NULL; + parser->defer.end_keyword = PLUGINSD_KEYWORD_FUNCTION_RESULT_END; + parser->defer.action = pluginsd_function_result_end; + parser->defer.action_data = string_strdupz(key); // it is ok is key is NULL + parser->flags |= PARSER_DEFER_UNTIL_KEYWORD; + + return PARSER_RC_OK; +} + +// ---------------------------------------------------------------------------- + +PARSER_RC pluginsd_variable(char **words, size_t num_words, void *user) +{ + char *name = get_word(words, num_words, 1); + char *value = get_word(words, num_words, 2); + NETDATA_DOUBLE v; + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_VARIABLE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = ((PARSER_USER_OBJECT *) user)->st; + + int global = (st) ? 0 : 1; + + if (name && *name) { + if ((strcmp(name, "GLOBAL") == 0 || strcmp(name, "HOST") == 0)) { + global = 1; + name = get_word(words, num_words, 2); + value = get_word(words, num_words, 3); + } else if ((strcmp(name, "LOCAL") == 0 || strcmp(name, "CHART") == 0)) { + global = 0; + name = get_word(words, num_words, 2); + value = get_word(words, num_words, 3); + } + } + + if (unlikely(!name || !*name)) { + error("PLUGINSD: 'host:%s/chart:%s' got a VARIABLE without a variable name. Disabling it.", + rrdhost_hostname(host), st ? rrdset_id(st):"UNSET"); + + ((PARSER_USER_OBJECT *)user)->enabled = 0; + return PLUGINSD_DISABLE_PLUGIN(user); + } + + if (unlikely(!value || !*value)) + value = NULL; + + if (unlikely(!value)) { + error("PLUGINSD: 'host:%s/chart:%s' cannot set %s VARIABLE '%s' to an empty value", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + (global) ? "HOST" : "CHART", + name); + return PARSER_RC_OK; + } + + if (!global && !st) { + error("PLUGINSD: 'host:%s/chart:%s' cannot update CHART VARIABLE '%s' without a chart", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + name + ); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + char *endptr = NULL; + v = (NETDATA_DOUBLE)str2ndd(value, &endptr); + if (unlikely(endptr && *endptr)) { + if (endptr == value) + error("PLUGINSD: 'host:%s/chart:%s' the value '%s' of VARIABLE '%s' cannot be parsed as a number", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + value, + name); + else + error("PLUGINSD: 'host:%s/chart:%s' the value '%s' of VARIABLE '%s' has leftovers: '%s'", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + value, + name, + endptr); + } + + if (global) { + const RRDVAR_ACQUIRED *rva = rrdvar_custom_host_variable_add_and_acquire(host, name); + if (rva) { + rrdvar_custom_host_variable_set(host, rva, v); + rrdvar_custom_host_variable_release(host, rva); + } + else + error("PLUGINSD: 'host:%s' cannot find/create HOST VARIABLE '%s'", + rrdhost_hostname(host), + name); + } else { + const RRDSETVAR_ACQUIRED *rsa = rrdsetvar_custom_chart_variable_add_and_acquire(st, name); + if (rsa) { + rrdsetvar_custom_chart_variable_set(st, rsa, v); + rrdsetvar_custom_chart_variable_release(st, rsa); + } + else + error("PLUGINSD: 'host:%s/chart:%s' cannot find/create CHART VARIABLE '%s'", + rrdhost_hostname(host), rrdset_id(st), name); + } + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_flush(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) +{ + debug(D_PLUGINSD, "requested a FLUSH"); + ((PARSER_USER_OBJECT *) user)->st = NULL; + ((PARSER_USER_OBJECT *) user)->replay.start_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.end_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = 0; + ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = 0; + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_disable(char **words __maybe_unused, size_t num_words __maybe_unused, void *user __maybe_unused) +{ + info("PLUGINSD: plugin called DISABLE. Disabling it."); + ((PARSER_USER_OBJECT *) user)->enabled = 0; + return PARSER_RC_ERROR; +} + +PARSER_RC pluginsd_label(char **words, size_t num_words, void *user) +{ + const char *name = get_word(words, num_words, 1); + const char *label_source = get_word(words, num_words, 2); + const char *value = get_word(words, num_words, 3); + + if (!name || !label_source || !value) { + error("PLUGINSD: ignoring malformed or empty LABEL command."); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + char *store = (char *)value; + bool allocated_store = false; + + if(unlikely(num_words > 4)) { + allocated_store = true; + store = mallocz(PLUGINSD_LINE_MAX + 1); + size_t remaining = PLUGINSD_LINE_MAX; + char *move = store; + char *word; + for(size_t i = 3; i < num_words && remaining > 2 && (word = get_word(words, num_words, i)) ;i++) { + if(i > 3) { + *move++ = ' '; + *move = '\0'; + remaining--; + } + + size_t length = strlen(word); + if (length > remaining) + length = remaining; + + remaining -= length; + memcpy(move, word, length); + move += length; + *move = '\0'; + } + } + + if(unlikely(!((PARSER_USER_OBJECT *) user)->new_host_labels)) + ((PARSER_USER_OBJECT *) user)->new_host_labels = rrdlabels_create(); + + rrdlabels_add(((PARSER_USER_OBJECT *)user)->new_host_labels, + name, + store, + str2l(label_source)); + + if (allocated_store) + freez(store); + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_overwrite(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) +{ + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_OVERWRITE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + debug(D_PLUGINSD, "requested to OVERWRITE host labels"); + + if(unlikely(!host->rrdlabels)) + host->rrdlabels = rrdlabels_create(); + + rrdlabels_migrate_to_these(host->rrdlabels, (DICTIONARY *) (((PARSER_USER_OBJECT *)user)->new_host_labels)); + metaqueue_store_host_labels(host->machine_guid); + + rrdlabels_destroy(((PARSER_USER_OBJECT *)user)->new_host_labels); + ((PARSER_USER_OBJECT *)user)->new_host_labels = NULL; + return PARSER_RC_OK; +} + + +PARSER_RC pluginsd_clabel(char **words, size_t num_words, void *user) +{ + const char *name = get_word(words, num_words, 1); + const char *value = get_word(words, num_words, 2); + const char *label_source = get_word(words, num_words, 3); + + if (!name || !value || !*label_source) { + error("Ignoring malformed or empty CHART LABEL command."); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + if(unlikely(!((PARSER_USER_OBJECT *) user)->chart_rrdlabels_linked_temporarily)) { + ((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily = ((PARSER_USER_OBJECT *)user)->st->rrdlabels; + rrdlabels_unmark_all(((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily); + } + + rrdlabels_add(((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily, + name, value, str2l(label_source)); + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_clabel_commit(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) +{ + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_CLABEL_COMMIT); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_CLABEL_COMMIT, PLUGINSD_KEYWORD_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + debug(D_PLUGINSD, "requested to commit chart labels"); + + if(!((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily) { + error("PLUGINSD: 'host:%s' got CLABEL_COMMIT, without a CHART or BEGIN. Ignoring it.", + rrdhost_hostname(host)); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + rrdlabels_remove_all_unmarked(((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily); + + rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + + ((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily = NULL; + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_replay_rrdset_begin(char **words, size_t num_words, void *user) +{ + char *id = get_word(words, num_words, 1); + char *start_time_str = get_word(words, num_words, 2); + char *end_time_str = get_word(words, num_words, 3); + char *child_now_str = get_word(words, num_words, 4); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st; + if (likely(!id || !*id)) + st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN); + else + st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_REPLAY_BEGIN); + + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + ((PARSER_USER_OBJECT *) user)->st = st; + + if(start_time_str && end_time_str) { + time_t start_time = (time_t)str2ul(start_time_str); + time_t end_time = (time_t)str2ul(end_time_str); + + time_t wall_clock_time = 0, tolerance; + bool wall_clock_comes_from_child; (void)wall_clock_comes_from_child; + if(child_now_str) { + wall_clock_time = (time_t)str2ul(child_now_str); + tolerance = st->update_every + 1; + wall_clock_comes_from_child = true; + } + + if(wall_clock_time <= 0) { + wall_clock_time = now_realtime_sec(); + tolerance = st->update_every + 5; + wall_clock_comes_from_child = false; + } + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + internal_error( + (!st->replay.start_streaming && (end_time < st->replay.after || start_time > st->replay.before)), + "REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, which does not match our request (%ld to %ld).", + rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time, st->replay.after, st->replay.before); + + internal_error( + true, + "REPLAY: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, child wall clock is %ld (%s), had requested %ld to %ld", + rrdhost_hostname(st->rrdhost), rrdset_id(st), + start_time, end_time, wall_clock_time, wall_clock_comes_from_child ? "from child" : "parent time", + st->replay.after, st->replay.before); +#endif + + if(start_time && end_time && start_time < wall_clock_time + tolerance && end_time < wall_clock_time + tolerance && start_time < end_time) { + if (unlikely(end_time - start_time != st->update_every)) + rrdset_set_update_every(st, end_time - start_time); + + st->last_collected_time.tv_sec = end_time; + st->last_collected_time.tv_usec = 0; + + st->last_updated.tv_sec = end_time; + st->last_updated.tv_usec = 0; + + st->counter++; + st->counter_done++; + + // these are only needed for db mode RAM, SAVE, MAP, ALLOC + st->current_entry++; + if(st->current_entry >= st->entries) + st->current_entry -= st->entries; + + ((PARSER_USER_OBJECT *) user)->replay.start_time = start_time; + ((PARSER_USER_OBJECT *) user)->replay.end_time = end_time; + ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = (usec_t) start_time * USEC_PER_SEC; + ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = (usec_t) end_time * USEC_PER_SEC; + ((PARSER_USER_OBJECT *) user)->replay.wall_clock_time = wall_clock_time; + ((PARSER_USER_OBJECT *) user)->replay.rset_enabled = true; + + return PARSER_RC_OK; + } + + error("PLUGINSD REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, but timestamps are invalid (now is %ld [%s], tolerance %ld). Ignoring " PLUGINSD_KEYWORD_REPLAY_SET, + rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time, + wall_clock_time, wall_clock_comes_from_child ? "child wall clock" : "parent wall clock", tolerance); + } + + // the child sends an RBEGIN without any parameters initially + // setting rset_enabled to false, means the RSET should not store any metrics + // to store metrics, the RBEGIN needs to have timestamps + ((PARSER_USER_OBJECT *) user)->replay.start_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.end_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = 0; + ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = 0; + ((PARSER_USER_OBJECT *) user)->replay.wall_clock_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.rset_enabled = false; + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_replay_set(char **words, size_t num_words, void *user) +{ + char *dimension = get_word(words, num_words, 1); + char *value_str = get_word(words, num_words, 2); + char *flags_str = get_word(words, num_words, 3); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_SET); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + if(!((PARSER_USER_OBJECT *) user)->replay.rset_enabled) { + error_limit_static_thread_var(erl, 1, 0); + error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_SET " but it is disabled by " PLUGINSD_KEYWORD_REPLAY_BEGIN " errors", + rrdhost_hostname(host), rrdset_id(st)); + + // we have to return OK here + return PARSER_RC_OK; + } + + RRDDIM_ACQUIRED *rda = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_SET); + if(!rda) return PLUGINSD_DISABLE_PLUGIN(user); + + if (unlikely(!((PARSER_USER_OBJECT *) user)->replay.start_time || !((PARSER_USER_OBJECT *) user)->replay.end_time)) { + error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a " PLUGINSD_KEYWORD_REPLAY_SET " with invalid timestamps %ld to %ld from a " PLUGINSD_KEYWORD_REPLAY_BEGIN ". Disabling it.", + rrdhost_hostname(host), + rrdset_id(st), + dimension, + ((PARSER_USER_OBJECT *) user)->replay.start_time, + ((PARSER_USER_OBJECT *) user)->replay.end_time); + return PLUGINSD_DISABLE_PLUGIN(user); + } + + if (unlikely(!value_str || !*value_str)) + value_str = "NAN"; + + if(unlikely(!flags_str)) + flags_str = ""; + + if (likely(value_str)) { + RRDDIM *rd = rrddim_acquired_to_rrddim(rda); + + RRDDIM_FLAGS rd_flags = rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE | RRDDIM_FLAG_ARCHIVED); + + if(!(rd_flags & RRDDIM_FLAG_ARCHIVED)) { + NETDATA_DOUBLE value = strtondd(value_str, NULL); + SN_FLAGS flags = SN_FLAG_NONE; + + char c; + while ((c = *flags_str++)) { + switch (c) { + case 'R': + flags |= SN_FLAG_RESET; + break; + + case 'E': + flags |= SN_EMPTY_SLOT; + value = NAN; + break; + + default: + error("unknown flag '%c'", c); + break; + } + } + + if (!netdata_double_isnumber(value)) { + value = NAN; + flags = SN_EMPTY_SLOT; + } + + rrddim_store_metric(rd, ((PARSER_USER_OBJECT *) user)->replay.end_time_ut, value, flags); + rd->last_collected_time.tv_sec = ((PARSER_USER_OBJECT *) user)->replay.end_time; + rd->last_collected_time.tv_usec = 0; + rd->collections_counter++; + } + else { + error_limit_static_global_var(erl, 1, 0); + error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. Ignoring data.", + rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_name(rd)); + } + } + + rrddim_acquired_release(rda); + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, size_t num_words, void *user) +{ + char *dimension = get_word(words, num_words, 1); + char *last_collected_ut_str = get_word(words, num_words, 2); + char *last_collected_value_str = get_word(words, num_words, 3); + char *last_calculated_value_str = get_word(words, num_words, 4); + char *last_stored_value_str = get_word(words, num_words, 5); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDDIM_ACQUIRED *rda = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); + if(!rda) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDDIM *rd = rrddim_acquired_to_rrddim(rda); + usec_t dim_last_collected_ut = (usec_t)rd->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->last_collected_time.tv_usec; + usec_t last_collected_ut = last_collected_ut_str ? str2ull(last_collected_ut_str) : 0; + if(last_collected_ut > dim_last_collected_ut) { + rd->last_collected_time.tv_sec = last_collected_ut / USEC_PER_SEC; + rd->last_collected_time.tv_usec = last_collected_ut % USEC_PER_SEC; + } + + rd->last_collected_value = last_collected_value_str ? str2ll(last_collected_value_str, NULL) : 0; + rd->last_calculated_value = last_calculated_value_str ? str2ndd(last_calculated_value_str, NULL) : 0; + rd->last_stored_value = last_stored_value_str ? str2ndd(last_stored_value_str, NULL) : 0.0; + rrddim_acquired_release(rda); + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_replay_rrdset_collection_state(char **words, size_t num_words, void *user) +{ + char *last_collected_ut_str = get_word(words, num_words, 1); + char *last_updated_ut_str = get_word(words, num_words, 2); + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + + usec_t chart_last_collected_ut = (usec_t)st->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)st->last_collected_time.tv_usec; + usec_t last_collected_ut = last_collected_ut_str ? str2ull(last_collected_ut_str) : 0; + if(last_collected_ut > chart_last_collected_ut) { + st->last_collected_time.tv_sec = last_collected_ut / USEC_PER_SEC; + st->last_collected_time.tv_usec = last_collected_ut % USEC_PER_SEC; + } + + usec_t chart_last_updated_ut = (usec_t)st->last_updated.tv_sec * USEC_PER_SEC + (usec_t)st->last_updated.tv_usec; + usec_t last_updated_ut = last_updated_ut_str ? str2ull(last_updated_ut_str) : 0; + if(last_updated_ut > chart_last_updated_ut) { + st->last_updated.tv_sec = last_updated_ut / USEC_PER_SEC; + st->last_updated.tv_usec = last_updated_ut % USEC_PER_SEC; + } + + st->counter++; + st->counter_done++; + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) +{ + if (num_words < 7) { // accepts 7, but the 7th is optional + error("REPLAY: malformed " PLUGINSD_KEYWORD_REPLAY_END " command"); + return PARSER_RC_ERROR; + } + + const char *update_every_child_txt = get_word(words, num_words, 1); + const char *first_entry_child_txt = get_word(words, num_words, 2); + const char *last_entry_child_txt = get_word(words, num_words, 3); + const char *start_streaming_txt = get_word(words, num_words, 4); + const char *first_entry_requested_txt = get_word(words, num_words, 5); + const char *last_entry_requested_txt = get_word(words, num_words, 6); + const char *child_world_time_txt = get_word(words, num_words, 7); // optional + + time_t update_every_child = (time_t)str2ul(update_every_child_txt); + time_t first_entry_child = (time_t)str2ul(first_entry_child_txt); + time_t last_entry_child = (time_t)str2ul(last_entry_child_txt); + + bool start_streaming = (strcmp(start_streaming_txt, "true") == 0); + time_t first_entry_requested = (time_t)str2ul(first_entry_requested_txt); + time_t last_entry_requested = (time_t)str2ul(last_entry_requested_txt); + + // the optional child world time + time_t child_world_time = (child_world_time_txt && *child_world_time_txt) ? (time_t)str2ul(child_world_time_txt) : now_realtime_sec(); + + PARSER_USER_OBJECT *user_object = user; + + RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_END); + if(!host) return PLUGINSD_DISABLE_PLUGIN(user); + + RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_END, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(user); + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + internal_error(true, + "PLUGINSD REPLAY: 'host:%s/chart:%s': got a " PLUGINSD_KEYWORD_REPLAY_END " child db from %llu to %llu, start_streaming %s, had requested from %llu to %llu, wall clock %llu", + rrdhost_hostname(host), rrdset_id(st), + (unsigned long long)first_entry_child, (unsigned long long)last_entry_child, + start_streaming?"true":"false", + (unsigned long long)first_entry_requested, (unsigned long long)last_entry_requested, + (unsigned long long)child_world_time + ); +#endif + + ((PARSER_USER_OBJECT *) user)->st = NULL; + ((PARSER_USER_OBJECT *) user)->count++; + + if(((PARSER_USER_OBJECT *) user)->replay.rset_enabled && st->rrdhost->receiver) { + time_t now = now_realtime_sec(); + time_t started = st->rrdhost->receiver->replication_first_time_t; + time_t current = ((PARSER_USER_OBJECT *) user)->replay.end_time; + + worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, + (NETDATA_DOUBLE)(current - started) * 100.0 / (NETDATA_DOUBLE)(now - started)); + } + + ((PARSER_USER_OBJECT *) user)->replay.start_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.end_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = 0; + ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = 0; + ((PARSER_USER_OBJECT *) user)->replay.wall_clock_time = 0; + ((PARSER_USER_OBJECT *) user)->replay.rset_enabled = false; + + st->counter++; + st->counter_done++; + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + st->replay.start_streaming = false; + st->replay.after = 0; + st->replay.before = 0; + if(start_streaming) + st->replay.log_next_data_collection = true; +#endif + + if (start_streaming) { + if (st->update_every != update_every_child) + rrdset_set_update_every(st, update_every_child); + + if(rrdset_flag_check(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS)) { + rrdset_flag_set(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED); + rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS); + rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK); + rrdhost_receiver_replicating_charts_minus_one(st->rrdhost); + } +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + else + internal_error(true, "REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_END " with enable_streaming = true, but there is no replication in progress for this chart.", + rrdhost_hostname(host), rrdset_id(st)); +#endif + worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, 100.0); + + return PARSER_RC_OK; + } + + rrdcontext_updated_retention_rrdset(st); + + bool ok = replicate_chart_request(send_to_plugin, user_object->parser, host, st, + first_entry_child, last_entry_child, child_world_time, + first_entry_requested, last_entry_requested); + return ok ? PARSER_RC_OK : PARSER_RC_ERROR; +} + +static void pluginsd_process_thread_cleanup(void *ptr) { + PARSER *parser = (PARSER *)ptr; + rrd_collector_finished(); + parser_destroy(parser); +} + +// New plugins.d parser + +inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugin_input, FILE *fp_plugin_output, int trust_durations) +{ + int enabled = cd->enabled; + + if (!fp_plugin_input || !fp_plugin_output || !enabled) { + cd->enabled = 0; + return 0; + } + + if (unlikely(fileno(fp_plugin_input) == -1)) { + error("input file descriptor given is not a valid stream"); + cd->serial_failures++; + return 0; + } + + if (unlikely(fileno(fp_plugin_output) == -1)) { + error("output file descriptor given is not a valid stream"); + cd->serial_failures++; + return 0; + } + + clearerr(fp_plugin_input); + clearerr(fp_plugin_output); + + PARSER_USER_OBJECT user = { + .enabled = cd->enabled, + .host = host, + .cd = cd, + .trust_durations = trust_durations + }; + + // fp_plugin_output = our input; fp_plugin_input = our output + PARSER *parser = parser_init(host, &user, fp_plugin_output, fp_plugin_input, -1, PARSER_INPUT_SPLIT, NULL); + + rrd_collector_started(); + + // this keeps the parser with its current value + // so, parser needs to be allocated before pushing it + netdata_thread_cleanup_push(pluginsd_process_thread_cleanup, parser); + + user.parser = parser; + + while (likely(!parser_next(parser))) { + if (unlikely(netdata_exit || parser_action(parser, NULL))) + break; + } + + // free parser with the pop function + netdata_thread_cleanup_pop(1); + + cd->enabled = user.enabled; + size_t count = user.count; + + if (likely(count)) { + cd->successful_collections += count; + cd->serial_failures = 0; + } + else + cd->serial_failures++; + + return count; +} diff --git a/collectors/plugins.d/pluginsd_parser.h b/collectors/plugins.d/pluginsd_parser.h new file mode 100644 index 0000000..e18b43e --- /dev/null +++ b/collectors/plugins.d/pluginsd_parser.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PLUGINSD_PARSER_H +#define NETDATA_PLUGINSD_PARSER_H + +#include "parser/parser.h" + +typedef struct parser_user_object { + PARSER *parser; + RRDSET *st; + RRDHOST *host; + void *opaque; + struct plugind *cd; + int trust_durations; + DICTIONARY *new_host_labels; + DICTIONARY *chart_rrdlabels_linked_temporarily; + size_t count; + int enabled; + uint8_t st_exists; + uint8_t host_exists; + void *private; // the user can set this for private use + + struct { + time_t start_time; + time_t end_time; + + usec_t start_time_ut; + usec_t end_time_ut; + + time_t wall_clock_time; + + bool rset_enabled; + } replay; +} PARSER_USER_OBJECT; + +PARSER_RC pluginsd_function(char **words, size_t num_words, void *user); +PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, void *user); +void inflight_functions_init(PARSER *parser); +#endif //NETDATA_PLUGINSD_PARSER_H diff --git a/collectors/proc.plugin/Makefile.am b/collectors/proc.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/proc.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md new file mode 100644 index 0000000..32e2112 --- /dev/null +++ b/collectors/proc.plugin/README.md @@ -0,0 +1,607 @@ + + +# proc.plugin + +- `/proc/net/dev` (all network interfaces for all their values) +- `/proc/diskstats` (all disks for all their values) +- `/proc/mdstat` (status of RAID arrays) +- `/proc/net/snmp` (total IPv4, TCP and UDP usage) +- `/proc/net/snmp6` (total IPv6 usage) +- `/proc/net/netstat` (more IPv4 usage) +- `/proc/net/wireless` (wireless extension) +- `/proc/net/stat/nf_conntrack` (connection tracking performance) +- `/proc/net/stat/synproxy` (synproxy performance) +- `/proc/net/ip_vs/stats` (IPVS connection statistics) +- `/proc/stat` (CPU utilization and attributes) +- `/proc/meminfo` (memory information) +- `/proc/vmstat` (system performance) +- `/proc/net/rpc/nfsd` (NFS server statistics for both v3 and v4 NFS servers) +- `/sys/fs/cgroup` (Control Groups - Linux Containers) +- `/proc/self/mountinfo` (mount points) +- `/proc/interrupts` (total and per core hardware interrupts) +- `/proc/softirqs` (total and per core software interrupts) +- `/proc/loadavg` (system load and total processes running) +- `/proc/pressure/{cpu,memory,io}` (pressure stall information) +- `/proc/sys/kernel/random/entropy_avail` (random numbers pool availability - used in cryptography) +- `/proc/spl/kstat/zfs/arcstats` (status of ZFS adaptive replacement cache) +- `/proc/spl/kstat/zfs/pool/state` (state of ZFS pools) +- `/sys/class/power_supply` (power supply properties) +- `/sys/class/infiniband` (infiniband interconnect) +- `ipc` (IPC semaphores and message queues) +- `ksm` Kernel Same-Page Merging performance (several files under `/sys/kernel/mm/ksm`). +- `netdata` (internal Netdata resources utilization) + +- - - + +## Monitoring Disks + +> Live demo of disk monitoring at: **[http://london.netdata.rocks](https://registry.my-netdata.io/#menu_disk)** + +Performance monitoring for Linux disks is quite complicated. The main reason is the plethora of disk technologies available. There are many different hardware disk technologies, but there are even more **virtual disk** technologies that can provide additional storage features. + +Hopefully, the Linux kernel provides many metrics that can provide deep insights of what our disks our doing. The kernel measures all these metrics on all layers of storage: **virtual disks**, **physical disks** and **partitions of disks**. + +### Monitored disk metrics + +- **I/O bandwidth/s (kb/s)** + The amount of data transferred from and to the disk. +- **Amount of discarded data (kb/s)** +- **I/O operations/s** + The number of I/O operations completed. +- **Extended I/O operations/s** + The number of extended I/O operations completed. +- **Queued I/O operations** + The number of currently queued I/O operations. For traditional disks that execute commands one after another, one of them is being run by the disk and the rest are just waiting in a queue. +- **Backlog size (time in ms)** + The expected duration of the currently queued I/O operations. +- **Utilization (time percentage)** + The percentage of time the disk was busy with something. This is a very interesting metric, since for most disks, that execute commands sequentially, **this is the key indication of congestion**. A sequential disk that is 100% of the available time busy, has no time to do anything more, so even if the bandwidth or the number of operations executed by the disk is low, its capacity has been reached. + Of course, for newer disk technologies (like fusion cards) that are capable to execute multiple commands in parallel, this metric is just meaningless. +- **Average I/O operation time (ms)** + The average time for I/O requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them. +- **Average I/O operation time for extended operations (ms)** + The average time for extended I/O requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them. +- **Average I/O operation size (kb)** + The average amount of data of the completed I/O operations. +- **Average amount of discarded data (kb)** + The average amount of data of the completed discard operations. +- **Average Service Time (ms)** + The average service time for completed I/O operations. This metric is calculated using the total busy time of the disk and the number of completed operations. If the disk is able to execute multiple parallel operations the reporting average service time will be misleading. +- **Average Service Time for extended I/O operations (ms)** + The average service time for completed extended I/O operations. +- **Merged I/O operations/s** + The Linux kernel is capable of merging I/O operations. So, if two requests to read data from the disk are adjacent, the Linux kernel may merge them to one before giving them to disk. This metric measures the number of operations that have been merged by the Linux kernel. +- **Merged discard operations/s** +- **Total I/O time** + The sum of the duration of all completed I/O operations. This number can exceed the interval if the disk is able to execute multiple I/O operations in parallel. +- **Space usage** + For mounted disks, Netdata will provide a chart for their space, with 3 dimensions: + 1. free + 2. used + 3. reserved for root +- **inode usage** + For mounted disks, Netdata will provide a chart for their inodes (number of file and directories), with 3 dimensions: + 1. free + 2. used + 3. reserved for root + +### disk names + +Netdata will automatically set the name of disks on the dashboard, from the mount point they are mounted, of course only when they are mounted. Changes in mount points are not currently detected (you will have to restart Netdata to change the name of the disk). To use disk IDs provided by `/dev/disk/by-id`, the `name disks by id` option should be enabled. The `preferred disk ids` simple pattern allows choosing disk IDs to be used in the first place. + +### performance metrics + +By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a chart instead of `auto` to enable it permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. + +Netdata categorizes all block devices in 3 categories: + +1. physical disks (i.e. block devices that do not have child devices and are not partitions) +2. virtual disks (i.e. block devices that have child devices - like RAID devices) +3. disk partitions (i.e. block devices that are part of a physical disk) + +Performance metrics are enabled by default for all disk devices, except partitions and not-mounted virtual disks. Of course, you can enable/disable monitoring any block device by editing the Netdata configuration file. + +### Netdata configuration + +You can get the running Netdata configuration using this: + +```sh +cd /etc/netdata +curl "http://localhost:19999/netdata.conf" >netdata.conf.new +mv netdata.conf.new netdata.conf +``` + +Then edit `netdata.conf` and find the following section. This is the basic plugin configuration. + +``` +[plugin:proc:/proc/diskstats] + # enable new disks detected at runtime = yes + # performance metrics for physical disks = auto + # performance metrics for virtual disks = auto + # performance metrics for partitions = no + # bandwidth for all disks = auto + # operations for all disks = auto + # merged operations for all disks = auto + # i/o time for all disks = auto + # queued operations for all disks = auto + # utilization percentage for all disks = auto + # extended operations for all disks = auto + # backlog for all disks = auto + # bcache for all disks = auto + # bcache priority stats update every = 0 + # remove charts of removed disks = yes + # path to get block device = /sys/block/%s + # path to get block device bcache = /sys/block/%s/bcache + # path to get virtual block device = /sys/devices/virtual/block/%s + # path to get block device infos = /sys/dev/block/%lu:%lu/%s + # path to device mapper = /dev/mapper + # path to /dev/disk/by-label = /dev/disk/by-label + # path to /dev/disk/by-id = /dev/disk/by-id + # path to /dev/vx/dsk = /dev/vx/dsk + # name disks by id = no + # preferred disk ids = * + # exclude disks = loop* ram* + # filename to monitor = /proc/diskstats + # performance metrics for disks with major 8 = yes +``` + +For each virtual disk, physical disk and partition you will have a section like this: + +``` +[plugin:proc:/proc/diskstats:sda] + # enable = yes + # enable performance metrics = auto + # bandwidth = auto + # operations = auto + # merged operations = auto + # i/o time = auto + # queued operations = auto + # utilization percentage = auto + # extended operations = auto + # backlog = auto +``` + +For all configuration options: + +- `auto` = enable monitoring if the collected values are not zero +- `yes` = enable monitoring +- `no` = disable monitoring + +Of course, to set options, you will have to uncomment them. The comments show the internal defaults. + +After saving `/etc/netdata/netdata.conf`, restart your Netdata to apply them. + +#### Disabling performance metrics for individual device and to multiple devices by device type + +You can pretty easy disable performance metrics for individual device, for ex.: + +``` +[plugin:proc:/proc/diskstats:sda] + enable performance metrics = no +``` + +But sometimes you need disable performance metrics for all devices with the same type, to do it you need to figure out device type from `/proc/diskstats` for ex.: + +``` + 7 0 loop0 1651 0 3452 168 0 0 0 0 0 8 168 + 7 1 loop1 4955 0 11924 880 0 0 0 0 0 64 880 + 7 2 loop2 36 0 216 4 0 0 0 0 0 4 4 + 7 6 loop6 0 0 0 0 0 0 0 0 0 0 0 + 7 7 loop7 0 0 0 0 0 0 0 0 0 0 0 + 251 2 zram2 27487 0 219896 188 79953 0 639624 1640 0 1828 1828 + 251 3 zram3 27348 0 218784 152 79952 0 639616 1960 0 2060 2104 +``` + +All zram devices starts with `251` number and all loop devices starts with `7`. +So, to disable performance metrics for all loop devices you could add `performance metrics for disks with major 7 = no` to `[plugin:proc:/proc/diskstats]` section. + +``` +[plugin:proc:/proc/diskstats] + performance metrics for disks with major 7 = no +``` + +## Monitoring RAID arrays + +### Monitored RAID array metrics + +1. **Health** Number of failed disks in every array (aggregate chart). + +2. **Disks stats** + +- total (number of devices array ideally would have) +- inuse (number of devices currently are in use) + +3. **Mismatch count** + +- unsynchronized blocks + +4. **Current status** + +- resync in percent +- recovery in percent +- reshape in percent +- check in percent + +5. **Operation status** (if resync/recovery/reshape/check is active) + +- finish in minutes +- speed in megabytes/s + +6. **Nonredundant array availability** + +#### configuration + +``` +[plugin:proc:/proc/mdstat] + # faulty devices = yes + # nonredundant arrays availability = yes + # mismatch count = auto + # disk stats = yes + # operation status = yes + # make charts obsolete = yes + # filename to monitor = /proc/mdstat + # mismatch_cnt filename to monitor = /sys/block/%s/md/mismatch_cnt +``` + +## Monitoring CPUs + +The `/proc/stat` module monitors CPU utilization, interrupts, context switches, processes started/running, thermal +throttling, frequency, and idle states. It gathers this information from multiple files. + +If your system has more than 50 processors (`physical processors * cores per processor * threads per core`), the Agent +automatically disables CPU thermal throttling, frequency, and idle state charts. To override this default, see the next +section on configuration. + +### Configuration + +The settings for monitoring CPUs is in the `[plugin:proc:/proc/stat]` of your `netdata.conf` file. + +The `keep per core files open` option lets you reduce the number of file operations on multiple files. + +If your system has more than 50 processors and you would like to see the CPU thermal throttling, frequency, and idle +state charts that are automatically disabled, you can set the following boolean options in the +`[plugin:proc:/proc/stat]` section. + +```conf + keep per core files open = yes + keep cpuidle files open = yes + core_throttle_count = yes + package_throttle_count = yes + cpu frequency = yes + cpu idle states = yes +``` + +### CPU frequency + +The module shows the current CPU frequency as set by the `cpufreq` kernel +module. + +**Requirement:** +You need to have `CONFIG_CPU_FREQ` and (optionally) `CONFIG_CPU_FREQ_STAT` +enabled in your kernel. + +`cpufreq` interface provides two different ways of getting the information through `/sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq` and `/sys/devices/system/cpu/cpu*/cpufreq/stats/time_in_state` files. The latter is more accurate so it is preferred in the module. `scaling_cur_freq` represents only the current CPU frequency, and doesn't account for any state changes which happen between updates. The module switches back and forth between these two methods if governor is changed. + +It produces one chart with multiple lines (one line per core). + +#### configuration + +`scaling_cur_freq filename to monitor` and `time_in_state filename to monitor` in the `[plugin:proc:/proc/stat]` configuration section + +### CPU idle states + +The module monitors the usage of CPU idle states. + +**Requirement:** +Your kernel needs to have `CONFIG_CPU_IDLE` enabled. + +It produces one stacked chart per CPU, showing the percentage of time spent in +each state. + +#### configuration + +`schedstat filename to monitor`, `cpuidle name filename to monitor`, and `cpuidle time filename to monitor` in the `[plugin:proc:/proc/stat]` configuration section + +## Monitoring memory + +### Monitored memory metrics + +- Amount of memory swapped in/out +- Amount of memory paged from/to disk +- Number of memory page faults +- Number of out of memory kills +- Number of NUMA events + +### Configuration + +```conf +[plugin:proc:/proc/vmstat] + filename to monitor = /proc/vmstat + swap i/o = auto + disk i/o = yes + memory page faults = yes + out of memory kills = yes + system-wide numa metric summary = auto +``` + +## Monitoring Network Interfaces + +### Monitored network interface metrics + +- **Physical Network Interfaces Aggregated Bandwidth (kilobits/s)** + The amount of data received and sent through all physical interfaces in the system. This is the source of data for the Net Inbound and Net Outbound dials in the System Overview section. + +- **Bandwidth (kilobits/s)** + The amount of data received and sent through the interface. + +- **Packets (packets/s)** + The number of packets received, packets sent, and multicast packets transmitted through the interface. + +- **Interface Errors (errors/s)** + The number of errors for the inbound and outbound traffic on the interface. + +- **Interface Drops (drops/s)** + The number of packets dropped for the inbound and outbound traffic on the interface. + +- **Interface FIFO Buffer Errors (errors/s)** + The number of FIFO buffer errors encountered while receiving and transmitting data through the interface. + +- **Compressed Packets (packets/s)** + The number of compressed packets transmitted or received by the device driver. + +- **Network Interface Events (events/s)** + The number of packet framing errors, collisions detected on the interface, and carrier losses detected by the device driver. + +By default Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). + +### Monitoring wireless network interfaces + +The settings for monitoring wireless is in the `[plugin:proc:/proc/net/wireless]` section of your `netdata.conf` file. + +```conf + status for all interfaces = yes + quality for all interfaces = yes + discarded packets for all interfaces = yes + missed beacon for all interface = yes +``` + +You can set the following values for each configuration option: + +- `auto` = enable monitoring if the collected values are not zero +- `yes` = enable monitoring +- `no` = disable monitoring + +#### Monitored wireless interface metrics + +- **Status** + The current state of the interface. This is a device-dependent option. + +- **Link** + Overall quality of the link. + +- **Level** + Received signal strength (RSSI), which indicates how strong the received signal is. + +- **Noise** + Background noise level. + +- **Discarded packets** + Discarded packets for: Number of packets received with a different NWID or ESSID (`nwid`), unable to decrypt (`crypt`), hardware was not able to properly re-assemble the link layer fragments (`frag`), packets failed to deliver (`retry`), and packets lost in relation with specific wireless operations (`misc`). + +- **Missed beacon** + Number of periodic beacons from the cell or the access point the interface has missed. + +#### Wireless configuration + +#### alarms + +There are several alarms defined in `health.d/net.conf`. + +The tricky ones are `inbound packets dropped` and `inbound packets dropped ratio`. They have quite a strict policy so that they warn users about possible issues. These alarms can be annoying for some network configurations. It is especially true for some bonding configurations if an interface is a child or a bonding interface itself. If it is expected to have a certain number of drops on an interface for a certain network configuration, a separate alarm with different triggering thresholds can be created or the existing one can be disabled for this specific interface. It can be done with the help of the [families](/health/REFERENCE.md#alarm-line-families) line in the alarm configuration. For example, if you want to disable the `inbound packets dropped` alarm for `eth0`, set `families: !eth0 *` in the alarm definition for `template: inbound_packets_dropped`. + +#### configuration + +Module configuration: + +``` +[plugin:proc:/proc/net/dev] + # filename to monitor = /proc/net/dev + # path to get virtual interfaces = /sys/devices/virtual/net/%s + # path to get net device speed = /sys/class/net/%s/speed + # enable new interfaces detected at runtime = auto + # bandwidth for all interfaces = auto + # packets for all interfaces = auto + # errors for all interfaces = auto + # drops for all interfaces = auto + # fifo for all interfaces = auto + # compressed packets for all interfaces = auto + # frames, collisions, carrier counters for all interfaces = auto + # disable by default interfaces matching = lo fireqos* *-ifb + # refresh interface speed every seconds = 10 +``` + +Per interface configuration: + +``` +[plugin:proc:/proc/net/dev:enp0s3] + # enabled = yes + # virtual = no + # bandwidth = auto + # packets = auto + # errors = auto + # drops = auto + # fifo = auto + # compressed = auto + # events = auto +``` + +## Linux Anti-DDoS + +![image6](https://cloud.githubusercontent.com/assets/2662304/14253733/53550b16-fa95-11e5-8d9d-4ed171df4735.gif) + +--- + +SYNPROXY is a TCP SYN packets proxy. It can be used to protect any TCP server (like a web server) from SYN floods and similar DDos attacks. + +SYNPROXY is a netfilter module, in the Linux kernel (since version 3.12). It is optimized to handle millions of packets per second utilizing all CPUs available without any concurrency locking between the connections. + +The net effect of this, is that the real servers will not notice any change during the attack. The valid TCP connections will pass through and served, while the attack will be stopped at the firewall. + +Netdata does not enable SYNPROXY. It just uses the SYNPROXY metrics exposed by your kernel, so you will first need to configure it. The hard way is to run iptables SYNPROXY commands directly on the console. An easier way is to use [FireHOL](https://firehol.org/), which, is a firewall manager for iptables. FireHOL can configure SYNPROXY using the following setup guides: + +- **[Working with SYNPROXY](https://github.com/firehol/firehol/wiki/Working-with-SYNPROXY)** +- **[Working with SYNPROXY and traps](https://github.com/firehol/firehol/wiki/Working-with-SYNPROXY-and-traps)** + +### Real-time monitoring of Linux Anti-DDoS + +Netdata is able to monitor in real-time (per second updates) the operation of the Linux Anti-DDoS protection. + +It visualizes 4 charts: + +1. TCP SYN Packets received on ports operated by SYNPROXY +2. TCP Cookies (valid, invalid, retransmits) +3. Connections Reopened +4. Entries used + +Example image: + +![ddos](https://cloud.githubusercontent.com/assets/2662304/14398891/6016e3fc-fdf0-11e5-942b-55de6a52cb66.gif) + +See Linux Anti-DDoS in action at: **[Netdata demo site (with SYNPROXY enabled)](https://registry.my-netdata.io/#menu_netfilter_submenu_synproxy)** + +## Linux power supply + +This module monitors various metrics reported by power supply drivers +on Linux. This allows tracking and alerting on things like remaining +battery capacity. + +Depending on the underlying driver, it may provide the following charts +and metrics: + +1. Capacity: The power supply capacity expressed as a percentage. + + - capacity_now + +2. Charge: The charge for the power supply, expressed as amphours. + + - charge_full_design + - charge_full + - charge_now + - charge_empty + - charge_empty_design + +3. Energy: The energy for the power supply, expressed as watthours. + + - energy_full_design + - energy_full + - energy_now + - energy_empty + - energy_empty_design + +4. Voltage: The voltage for the power supply, expressed as volts. + + - voltage_max_design + - voltage_max + - voltage_now + - voltage_min + - voltage_min_design + +#### configuration + +``` +[plugin:proc:/sys/class/power_supply] + # battery capacity = yes + # battery charge = no + # battery energy = no + # power supply voltage = no + # keep files open = auto + # directory to monitor = /sys/class/power_supply +``` + +#### notes + +- Most drivers provide at least the first chart. Battery powered ACPI + compliant systems (like most laptops) provide all but the third, but do + not provide all of the metrics for each chart. + +- Current, energy, and voltages are reported with a *very* high precision + by the power_supply framework. Usually, this is far higher than the + actual hardware supports reporting, so expect to see changes in these + charts jump instead of scaling smoothly. + +- If `max` or `full` attribute is defined by the driver, but not a + corresponding `min` or `empty` attribute, then Netdata will still provide + the corresponding `min` or `empty`, which will then always read as zero. + This way, alerts which match on these will still work. + +## Infiniband interconnect + +This module monitors every active Infiniband port. It provides generic counters statistics, and per-vendor hw-counters (if vendor is supported). + +### Monitored interface metrics + +Each port will have its counters metrics monitored, grouped in the following charts: + +- **Bandwidth usage** + Sent/Received data, in KB/s + +- **Packets Statistics** + Sent/Received packets, in 3 categories: total, unicast and multicast. + +- **Errors Statistics** + Many errors counters are provided, presenting statistics for: + - Packets: malformed, sent/received discarded by card/switch, missing resource + - Link: downed, recovered, integrity error, minor error + - Other events: Tick Wait to send, buffer overrun + +If your vendor is supported, you'll also get HW-Counters statistics. These being vendor specific, please refer to their documentation. + +- Mellanox: [see statistics documentation](https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters) + +### configuration + +Default configuration will monitor only enabled infiniband ports, and refresh newly activated or created ports every 30 seconds + +``` +[plugin:proc:/sys/class/infiniband] + # dirname to monitor = /sys/class/infiniband + # bandwidth counters = yes + # packets counters = yes + # errors counters = yes + # hardware packets counters = auto + # hardware errors counters = auto + # monitor only ports being active = auto + # disable by default interfaces matching = + # refresh ports state every seconds = 30 +``` + + +## IPC + +### Monitored IPC metrics + +- **number of messages in message queues** +- **amount of memory used by message queues** +- **number of semaphores** +- **number of semaphore arrays** +- **number of shared memory segments** +- **amount of memory used by shared memory segments** + +As far as the message queue charts are dynamic, sane limits are applied for the number of dimensions per chart (the limit is configurable). + +### configuration + +``` +[plugin:proc:ipc] + # message queues = yes + # semaphore totals = yes + # shared memory totals = yes + # msg filename to monitor = /proc/sysvipc/msg + # shm filename to monitor = /proc/sysvipc/shm + # max dimensions in memory allowed = 50 +``` + + diff --git a/collectors/proc.plugin/ipc.c b/collectors/proc.plugin/ipc.c new file mode 100644 index 0000000..7d3d2ec --- /dev/null +++ b/collectors/proc.plugin/ipc.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#include +#include +#include + + +#ifndef SEMVMX +#define SEMVMX 32767 /* <= 32767 semaphore maximum value */ +#endif + +/* Some versions of libc only define IPC_INFO when __USE_GNU is defined. */ +#ifndef IPC_INFO +#define IPC_INFO 3 +#endif + +struct ipc_limits { + uint64_t shmmni; /* max number of segments */ + uint64_t shmmax; /* max segment size */ + uint64_t shmall; /* max total shared memory */ + uint64_t shmmin; /* min segment size */ + + int semmni; /* max number of arrays */ + int semmsl; /* max semaphores per array */ + int semmns; /* max semaphores system wide */ + int semopm; /* max ops per semop call */ + unsigned int semvmx; /* semaphore max value (constant) */ + + int msgmni; /* max queues system wide */ + size_t msgmax; /* max size of message */ + int msgmnb; /* default max size of queue */ +}; + +struct ipc_status { + int semusz; /* current number of arrays */ + int semaem; /* current semaphores system wide */ +}; + +/* + * The last arg of semctl is a union semun, but where is it defined? X/OPEN + * tells us to define it ourselves, but until recently Linux include files + * would also define it. + */ +#ifndef HAVE_UNION_SEMUN +/* according to X/OPEN we have to define it ourselves */ +union semun { + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; +#endif + +struct message_queue { + unsigned long long id; + int found; + + RRDDIM *rd_messages; + RRDDIM *rd_bytes; + unsigned long long messages; + unsigned long long bytes; + + struct message_queue * next; +}; + +struct shm_stats { + unsigned long long segments; + unsigned long long bytes; +}; + +static inline int ipc_sem_get_limits(struct ipc_limits *lim) { + static procfile *ff = NULL; + static int error_shown = 0; + static char filename[FILENAME_MAX + 1] = ""; + + if(unlikely(!filename[0])) + snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/sem", netdata_configured_host_prefix); + + if(unlikely(!ff)) { + ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + if(unlikely(!error_shown)) { + error("IPC: Cannot open file '%s'.", filename); + error_shown = 1; + } + goto ipc; + } + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + if(unlikely(!error_shown)) { + error("IPC: Cannot read file '%s'.", filename); + error_shown = 1; + } + goto ipc; + } + + if(procfile_lines(ff) >= 1 && procfile_linewords(ff, 0) >= 4) { + lim->semvmx = SEMVMX; + lim->semmsl = str2i(procfile_lineword(ff, 0, 0)); + lim->semmns = str2i(procfile_lineword(ff, 0, 1)); + lim->semopm = str2i(procfile_lineword(ff, 0, 2)); + lim->semmni = str2i(procfile_lineword(ff, 0, 3)); + return 0; + } + else { + if(unlikely(!error_shown)) { + error("IPC: Invalid content in file '%s'.", filename); + error_shown = 1; + } + goto ipc; + } + +ipc: + // cannot do it from the file + // query IPC + { + struct seminfo seminfo = {.semmni = 0}; + union semun arg = {.array = (ushort *) &seminfo}; + + if(unlikely(semctl(0, 0, IPC_INFO, arg) < 0)) { + error("IPC: Failed to read '%s' and request IPC_INFO with semctl().", filename); + goto error; + } + + lim->semvmx = SEMVMX; + lim->semmni = seminfo.semmni; + lim->semmsl = seminfo.semmsl; + lim->semmns = seminfo.semmns; + lim->semopm = seminfo.semopm; + return 0; + } + +error: + lim->semvmx = 0; + lim->semmni = 0; + lim->semmsl = 0; + lim->semmns = 0; + lim->semopm = 0; + return -1; +} + +/* +printf ("------ Semaphore Limits --------\n"); +printf ("max number of arrays = %d\n", limits.semmni); +printf ("max semaphores per array = %d\n", limits.semmsl); +printf ("max semaphores system wide = %d\n", limits.semmns); +printf ("max ops per semop call = %d\n", limits.semopm); +printf ("semaphore max value = %u\n", limits.semvmx); + +printf ("------ Semaphore Status --------\n"); +printf ("used arrays = %d\n", status.semusz); +printf ("allocated semaphores = %d\n", status.semaem); +*/ + +static inline int ipc_sem_get_status(struct ipc_status *st) { + struct seminfo seminfo; + union semun arg; + + arg.array = (ushort *) (void *) &seminfo; + + if(unlikely(semctl (0, 0, SEM_INFO, arg) < 0)) { + /* kernel not configured for semaphores */ + static int error_shown = 0; + if(unlikely(!error_shown)) { + error("IPC: kernel is not configured for semaphores"); + error_shown = 1; + } + st->semusz = 0; + st->semaem = 0; + return -1; + } + + st->semusz = seminfo.semusz; + st->semaem = seminfo.semaem; + return 0; +} + +int ipc_msq_get_info(char *msg_filename, struct message_queue **message_queue_root) { + static procfile *ff; + struct message_queue *msq; + + if(unlikely(!ff)) { + ff = procfile_open(msg_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 1; + + size_t lines = procfile_lines(ff); + size_t words = 0; + + if(unlikely(lines < 2)) { + error("Cannot read %s. Expected 2 or more lines, read %zu.", procfile_filename(ff), lines); + return 1; + } + + // loop through all lines except the first and the last ones + size_t l; + for(l = 1; l < lines - 1; l++) { + words = procfile_linewords(ff, l); + if(unlikely(words < 2)) continue; + if(unlikely(words < 14)) { + error("Cannot read %s line. Expected 14 params, read %zu.", procfile_filename(ff), words); + continue; + } + + // find the id in the linked list or create a new structure + int found = 0; + + unsigned long long id = str2ull(procfile_lineword(ff, l, 1)); + for(msq = *message_queue_root; msq ; msq = msq->next) { + if(unlikely(id == msq->id)) { + found = 1; + break; + } + } + + if(unlikely(!found)) { + msq = callocz(1, sizeof(struct message_queue)); + msq->next = *message_queue_root; + *message_queue_root = msq; + msq->id = id; + } + + msq->messages = str2ull(procfile_lineword(ff, l, 4)); + msq->bytes = str2ull(procfile_lineword(ff, l, 3)); + msq->found = 1; + } + + return 0; +} + +int ipc_shm_get_info(char *shm_filename, struct shm_stats *shm) { + static procfile *ff; + + if(unlikely(!ff)) { + ff = procfile_open(shm_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 1; + + size_t lines = procfile_lines(ff); + size_t words = 0; + + if(unlikely(lines < 2)) { + error("Cannot read %s. Expected 2 or more lines, read %zu.", procfile_filename(ff), lines); + return 1; + } + + shm->segments = 0; + shm->bytes = 0; + + // loop through all lines except the first and the last ones + size_t l; + for(l = 1; l < lines - 1; l++) { + words = procfile_linewords(ff, l); + if(unlikely(words < 2)) continue; + if(unlikely(words < 16)) { + error("Cannot read %s line. Expected 16 params, read %zu.", procfile_filename(ff), words); + continue; + } + + shm->segments++; + shm->bytes += str2ull(procfile_lineword(ff, l, 3)); + } + + return 0; +} + +int do_ipc(int update_every, usec_t dt) { + (void)dt; + + static int do_sem = -1, do_msg = -1, do_shm = -1; + static int read_limits_next = -1; + static struct ipc_limits limits; + static struct ipc_status status; + static const RRDVAR_ACQUIRED *arrays_max = NULL, *semaphores_max = NULL; + static RRDSET *st_semaphores = NULL, *st_arrays = NULL; + static RRDDIM *rd_semaphores = NULL, *rd_arrays = NULL; + static char *msg_filename = NULL; + static struct message_queue *message_queue_root = NULL; + static long long dimensions_limit; + static char *shm_filename = NULL; + + if(unlikely(do_sem == -1)) { + do_msg = config_get_boolean("plugin:proc:ipc", "message queues", CONFIG_BOOLEAN_YES); + do_sem = config_get_boolean("plugin:proc:ipc", "semaphore totals", CONFIG_BOOLEAN_YES); + do_shm = config_get_boolean("plugin:proc:ipc", "shared memory totals", CONFIG_BOOLEAN_YES); + + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/sysvipc/msg"); + msg_filename = config_get("plugin:proc:ipc", "msg filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/sysvipc/shm"); + shm_filename = config_get("plugin:proc:ipc", "shm filename to monitor", filename); + + dimensions_limit = config_get_number("plugin:proc:ipc", "max dimensions in memory allowed", 50); + + // make sure it works + if(ipc_sem_get_limits(&limits) == -1) { + error("unable to fetch semaphore limits"); + do_sem = CONFIG_BOOLEAN_NO; + } + else if(ipc_sem_get_status(&status) == -1) { + error("unable to fetch semaphore statistics"); + do_sem = CONFIG_BOOLEAN_NO; + } + else { + // create the charts + if(unlikely(!st_semaphores)) { + st_semaphores = rrdset_create_localhost( + "system" + , "ipc_semaphores" + , NULL + , "ipc semaphores" + , NULL + , "IPC Semaphores" + , "semaphores" + , PLUGIN_PROC_NAME + , "ipc" + , NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + rd_semaphores = rrddim_add(st_semaphores, "semaphores", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + if(unlikely(!st_arrays)) { + st_arrays = rrdset_create_localhost( + "system" + , "ipc_semaphore_arrays" + , NULL + , "ipc semaphores" + , NULL + , "IPC Semaphore Arrays" + , "arrays" + , PLUGIN_PROC_NAME + , "ipc" + , NETDATA_CHART_PRIO_SYSTEM_IPC_SEM_ARRAYS + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + rd_arrays = rrddim_add(st_arrays, "arrays", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + // variables + semaphores_max = rrdvar_custom_host_variable_add_and_acquire(localhost, "ipc_semaphores_max"); + arrays_max = rrdvar_custom_host_variable_add_and_acquire(localhost, "ipc_semaphores_arrays_max"); + } + + struct stat stbuf; + if (stat(msg_filename, &stbuf)) { + do_msg = CONFIG_BOOLEAN_NO; + } + + if(unlikely(do_sem == CONFIG_BOOLEAN_NO && do_msg == CONFIG_BOOLEAN_NO)) { + error("ipc module disabled"); + return 1; + } + } + + if(likely(do_sem != CONFIG_BOOLEAN_NO)) { + if(unlikely(read_limits_next < 0)) { + if(unlikely(ipc_sem_get_limits(&limits) == -1)) { + error("Unable to fetch semaphore limits."); + } + else { + if(semaphores_max) rrdvar_custom_host_variable_set(localhost, semaphores_max, limits.semmns); + if(arrays_max) rrdvar_custom_host_variable_set(localhost, arrays_max, limits.semmni); + + st_arrays->red = limits.semmni; + st_semaphores->red = limits.semmns; + + read_limits_next = 60 / update_every; + } + } + else + read_limits_next--; + + if(unlikely(ipc_sem_get_status(&status) == -1)) { + error("Unable to get semaphore statistics"); + return 0; + } + + rrddim_set_by_pointer(st_semaphores, rd_semaphores, status.semaem); + rrdset_done(st_semaphores); + + rrddim_set_by_pointer(st_arrays, rd_arrays, status.semusz); + rrdset_done(st_arrays); + } + + if(likely(do_msg != CONFIG_BOOLEAN_NO)) { + static RRDSET *st_msq_messages = NULL, *st_msq_bytes = NULL; + + int ret = ipc_msq_get_info(msg_filename, &message_queue_root); + + if(!ret && message_queue_root) { + if(unlikely(!st_msq_messages)) + st_msq_messages = rrdset_create_localhost( + "system" + , "message_queue_messages" + , NULL + , "ipc message queues" + , NULL + , "IPC Message Queue Number of Messages" + , "messages" + , PLUGIN_PROC_NAME + , "ipc" + , NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_MESSAGES + , update_every + , RRDSET_TYPE_STACKED + ); + + if(unlikely(!st_msq_bytes)) + st_msq_bytes = rrdset_create_localhost( + "system" + , "message_queue_bytes" + , NULL + , "ipc message queues" + , NULL + , "IPC Message Queue Used Bytes" + , "bytes" + , PLUGIN_PROC_NAME + , "ipc" + , NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_SIZE + , update_every + , RRDSET_TYPE_STACKED + ); + + struct message_queue *msq = message_queue_root, *msq_prev = NULL; + while(likely(msq)){ + if(likely(msq->found)) { + if(unlikely(!msq->rd_messages || !msq->rd_bytes)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%llu", msq->id); + if(likely(!msq->rd_messages)) msq->rd_messages = rrddim_add(st_msq_messages, id, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(likely(!msq->rd_bytes)) msq->rd_bytes = rrddim_add(st_msq_bytes, id, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_msq_messages, msq->rd_messages, msq->messages); + rrddim_set_by_pointer(st_msq_bytes, msq->rd_bytes, msq->bytes); + + msq->found = 0; + } + else { + rrddim_is_obsolete(st_msq_messages, msq->rd_messages); + rrddim_is_obsolete(st_msq_bytes, msq->rd_bytes); + + // remove message queue from the linked list + if(!msq_prev) + message_queue_root = msq->next; + else + msq_prev->next = msq->next; + freez(msq); + msq = NULL; + } + if(likely(msq)) { + msq_prev = msq; + msq = msq->next; + } + else if(!msq_prev) + msq = message_queue_root; + else + msq = msq_prev->next; + } + + rrdset_done(st_msq_messages); + rrdset_done(st_msq_bytes); + + long long dimensions_num = rrdset_number_of_dimensions(st_msq_messages); + + if(unlikely(dimensions_num > dimensions_limit)) { + info("Message queue statistics has been disabled"); + info("There are %lld dimensions in memory but limit was set to %lld", dimensions_num, dimensions_limit); + rrdset_is_obsolete(st_msq_messages); + rrdset_is_obsolete(st_msq_bytes); + st_msq_messages = NULL; + st_msq_bytes = NULL; + do_msg = CONFIG_BOOLEAN_NO; + } + else if(unlikely(!message_queue_root)) { + info("Making chart %s (%s) obsolete since it does not have any dimensions", rrdset_name(st_msq_messages), rrdset_id(st_msq_messages)); + rrdset_is_obsolete(st_msq_messages); + st_msq_messages = NULL; + + info("Making chart %s (%s) obsolete since it does not have any dimensions", rrdset_name(st_msq_bytes), rrdset_id(st_msq_bytes)); + rrdset_is_obsolete(st_msq_bytes); + st_msq_bytes = NULL; + } + } + } + + if(likely(do_shm != CONFIG_BOOLEAN_NO)) { + static RRDSET *st_shm_segments = NULL, *st_shm_bytes = NULL; + static RRDDIM *rd_shm_segments = NULL, *rd_shm_bytes = NULL; + struct shm_stats shm; + + if(!ipc_shm_get_info(shm_filename, &shm)) { + if(unlikely(!st_shm_segments)) { + st_shm_segments = rrdset_create_localhost( + "system" + , "shared_memory_segments" + , NULL + , "ipc shared memory" + , NULL + , "IPC Shared Memory Number of Segments" + , "segments" + , PLUGIN_PROC_NAME + , "ipc" + , NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SEGS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_shm_segments = rrddim_add(st_shm_segments, "segments", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_shm_segments, rd_shm_segments, shm.segments); + rrdset_done(st_shm_segments); + + if(unlikely(!st_shm_bytes)) { + st_shm_bytes = rrdset_create_localhost( + "system" + , "shared_memory_bytes" + , NULL + , "ipc shared memory" + , NULL + , "IPC Shared Memory Used Bytes" + , "bytes" + , PLUGIN_PROC_NAME + , "ipc" + , NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SIZE + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_shm_bytes = rrddim_add(st_shm_bytes, "bytes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_shm_bytes, rd_shm_bytes, shm.bytes); + rrdset_done(st_shm_bytes); + } + } + + return 0; +} diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c new file mode 100644 index 0000000..1b24df4 --- /dev/null +++ b/collectors/proc.plugin/plugin_proc.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +static struct proc_module { + const char *name; + const char *dim; + + int enabled; + + int (*func)(int update_every, usec_t dt); + + RRDDIM *rd; + +} proc_modules[] = { + + // system metrics + {.name = "/proc/stat", .dim = "stat", .func = do_proc_stat}, + {.name = "/proc/uptime", .dim = "uptime", .func = do_proc_uptime}, + {.name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg}, + {.name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail}, + + // pressure metrics + {.name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure}, + + // CPU metrics + {.name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts}, + {.name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs}, + + // memory metrics + {.name = "/proc/vmstat", .dim = "vmstat", .func = do_proc_vmstat}, + {.name = "/proc/meminfo", .dim = "meminfo", .func = do_proc_meminfo}, + {.name = "/sys/kernel/mm/ksm", .dim = "ksm", .func = do_sys_kernel_mm_ksm}, + {.name = "/sys/block/zram", .dim = "zram", .func = do_sys_block_zram}, + {.name = "/sys/devices/system/edac/mc", .dim = "ecc", .func = do_proc_sys_devices_system_edac_mc}, + {.name = "/sys/devices/system/node", .dim = "numa", .func = do_proc_sys_devices_system_node}, + {.name = "/proc/pagetypeinfo", .dim = "pagetypeinfo", .func = do_proc_pagetypeinfo}, + + // network metrics + {.name = "/proc/net/wireless", .dim = "netwireless", .func = do_proc_net_wireless}, + {.name = "/proc/net/sockstat", .dim = "sockstat", .func = do_proc_net_sockstat}, + {.name = "/proc/net/sockstat6", .dim = "sockstat6", .func = do_proc_net_sockstat6}, + {.name = "/proc/net/netstat", .dim = "netstat", .func = do_proc_net_netstat}, + {.name = "/proc/net/sctp/snmp", .dim = "sctp", .func = do_proc_net_sctp_snmp}, + {.name = "/proc/net/softnet_stat", .dim = "softnet", .func = do_proc_net_softnet_stat}, + {.name = "/proc/net/ip_vs/stats", .dim = "ipvs", .func = do_proc_net_ip_vs_stats}, + {.name = "/sys/class/infiniband", .dim = "infiniband", .func = do_sys_class_infiniband}, + + // firewall metrics + {.name = "/proc/net/stat/conntrack", .dim = "conntrack", .func = do_proc_net_stat_conntrack}, + {.name = "/proc/net/stat/synproxy", .dim = "synproxy", .func = do_proc_net_stat_synproxy}, + + // disk metrics + {.name = "/proc/diskstats", .dim = "diskstats", .func = do_proc_diskstats}, + {.name = "/proc/mdstat", .dim = "mdstat", .func = do_proc_mdstat}, + + // NFS metrics + {.name = "/proc/net/rpc/nfsd", .dim = "nfsd", .func = do_proc_net_rpc_nfsd}, + {.name = "/proc/net/rpc/nfs", .dim = "nfs", .func = do_proc_net_rpc_nfs}, + + // ZFS metrics + {.name = "/proc/spl/kstat/zfs/arcstats", .dim = "zfs_arcstats", .func = do_proc_spl_kstat_zfs_arcstats}, + {.name = "/proc/spl/kstat/zfs/pool/state",.dim = "zfs_pool_state",.func = do_proc_spl_kstat_zfs_pool_state}, + + // BTRFS metrics + {.name = "/sys/fs/btrfs", .dim = "btrfs", .func = do_sys_fs_btrfs}, + + // IPC metrics + {.name = "ipc", .dim = "ipc", .func = do_ipc}, + + {.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply}, + // linux power supply metrics + + // the terminator of this array + {.name = NULL, .dim = NULL, .func = NULL} +}; + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 36 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 36 +#endif + +static netdata_thread_t *netdev_thread = NULL; + +static void proc_main_cleanup(void *ptr) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + if (netdev_thread) { + netdata_thread_join(*netdev_thread, NULL); + freez(netdev_thread); + } + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + + worker_unregister(); +} + +void *proc_main(void *ptr) +{ + worker_register("PROC"); + + if (config_get_boolean("plugin:proc", "/proc/net/dev", CONFIG_BOOLEAN_YES)) { + netdev_thread = mallocz(sizeof(netdata_thread_t)); + debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME); + netdata_thread_create( + netdev_thread, THREAD_NETDEV_NAME, NETDATA_THREAD_OPTION_JOINABLE, netdev_main, netdev_thread); + } + + netdata_thread_cleanup_push(proc_main_cleanup, ptr); + + config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO); + + // check the enabled status for each module + int i; + for (i = 0; proc_modules[i].name; i++) { + struct proc_module *pm = &proc_modules[i]; + + pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES); + pm->rd = NULL; + + worker_register_job_name(i, proc_modules[i].dim); + } + + usec_t step = localhost->rrd_update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + usec_t hb_dt = heartbeat_next(&hb, step); + + if (unlikely(netdata_exit)) + break; + + for (i = 0; proc_modules[i].name; i++) { + if (unlikely(netdata_exit)) + break; + + struct proc_module *pm = &proc_modules[i]; + if (unlikely(!pm->enabled)) + continue; + + debug(D_PROCNETDEV_LOOP, "PROC calling %s.", pm->name); + + worker_is_busy(i); + pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); + } + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + +int get_numa_node_count(void) +{ + static int numa_node_count = -1; + + if (numa_node_count != -1) + return numa_node_count; + + numa_node_count = 0; + + char name[FILENAME_MAX + 1]; + snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/node"); + char *dirname = config_get("plugin:proc:/sys/devices/system/node", "directory to monitor", name); + + DIR *dir = opendir(dirname); + if (dir) { + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type != DT_DIR) + continue; + + if (strncmp(de->d_name, "node", 4) != 0) + continue; + + if (!isdigit(de->d_name[4])) + continue; + + numa_node_count++; + } + closedir(dir); + } + + return numa_node_count; +} diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h new file mode 100644 index 0000000..d67ccd6 --- /dev/null +++ b/collectors/proc.plugin/plugin_proc.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PLUGIN_PROC_H +#define NETDATA_PLUGIN_PROC_H 1 + +#include "daemon/common.h" + +#define PLUGIN_PROC_CONFIG_NAME "proc" +#define PLUGIN_PROC_NAME PLUGIN_PROC_CONFIG_NAME ".plugin" + +#define THREAD_NETDEV_NAME "PLUGIN[proc netdev]" +void *netdev_main(void *ptr); + +int do_proc_net_wireless(int update_every, usec_t dt); +int do_proc_diskstats(int update_every, usec_t dt); +int do_proc_mdstat(int update_every, usec_t dt); +int do_proc_net_netstat(int update_every, usec_t dt); +int do_proc_net_stat_conntrack(int update_every, usec_t dt); +int do_proc_net_ip_vs_stats(int update_every, usec_t dt); +int do_proc_stat(int update_every, usec_t dt); +int do_proc_meminfo(int update_every, usec_t dt); +int do_proc_vmstat(int update_every, usec_t dt); +int do_proc_net_rpc_nfs(int update_every, usec_t dt); +int do_proc_net_rpc_nfsd(int update_every, usec_t dt); +int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt); +int do_proc_interrupts(int update_every, usec_t dt); +int do_proc_softirqs(int update_every, usec_t dt); +int do_proc_pressure(int update_every, usec_t dt); +int do_sys_kernel_mm_ksm(int update_every, usec_t dt); +int do_sys_block_zram(int update_every, usec_t dt); +int do_proc_loadavg(int update_every, usec_t dt); +int do_proc_net_stat_synproxy(int update_every, usec_t dt); +int do_proc_net_softnet_stat(int update_every, usec_t dt); +int do_proc_uptime(int update_every, usec_t dt); +int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt); +int do_proc_sys_devices_system_node(int update_every, usec_t dt); +int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt); +int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt); +int do_sys_fs_btrfs(int update_every, usec_t dt); +int do_proc_net_sockstat(int update_every, usec_t dt); +int do_proc_net_sockstat6(int update_every, usec_t dt); +int do_proc_net_sctp_snmp(int update_every, usec_t dt); +int do_ipc(int update_every, usec_t dt); +int do_sys_class_power_supply(int update_every, usec_t dt); +int do_proc_pagetypeinfo(int update_every, usec_t dt); +int do_sys_class_infiniband(int update_every, usec_t dt); +int get_numa_node_count(void); + +// metrics that need to be shared among data collectors +extern unsigned long long zfs_arcstats_shrinkable_cache_size_bytes; + +// netdev renames +void netdev_rename_device_add( + const char *host_device, + const char *container_device, + const char *container_name, + DICTIONARY *labels, + const char *ctx_prefix); + +void netdev_rename_device_del(const char *host_device); + +#include "proc_self_mountinfo.h" +#include "proc_pressure.h" +#include "zfs_common.h" + +#endif /* NETDATA_PLUGIN_PROC_H */ diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c new file mode 100644 index 0000000..28d0e75 --- /dev/null +++ b/collectors/proc.plugin/proc_diskstats.c @@ -0,0 +1,2045 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define RRD_TYPE_DISK "disk" +#define PLUGIN_PROC_MODULE_DISKSTATS_NAME "/proc/diskstats" +#define CONFIG_SECTION_PLUGIN_PROC_DISKSTATS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_DISKSTATS_NAME + +#define DISK_TYPE_UNKNOWN 0 +#define DISK_TYPE_PHYSICAL 1 +#define DISK_TYPE_PARTITION 2 +#define DISK_TYPE_VIRTUAL 3 + +#define DEFAULT_PREFERRED_IDS "*" +#define DEFAULT_EXCLUDED_DISKS "loop* ram*" + +static struct disk { + char *disk; // the name of the disk (sda, sdb, etc, after being looked up) + char *device; // the device of the disk (before being looked up) + uint32_t hash; + unsigned long major; + unsigned long minor; + int sector_size; + int type; + + char *mount_point; + + char *chart_id; + + // disk options caching + int do_io; + int do_ops; + int do_mops; + int do_iotime; + int do_qops; + int do_util; + int do_ext; + int do_backlog; + int do_bcache; + + int updated; + + int device_is_bcache; + + char *bcache_filename_dirty_data; + char *bcache_filename_writeback_rate; + char *bcache_filename_cache_congested; + char *bcache_filename_cache_available_percent; + char *bcache_filename_stats_five_minute_cache_hit_ratio; + char *bcache_filename_stats_hour_cache_hit_ratio; + char *bcache_filename_stats_day_cache_hit_ratio; + char *bcache_filename_stats_total_cache_hit_ratio; + char *bcache_filename_stats_total_cache_hits; + char *bcache_filename_stats_total_cache_misses; + char *bcache_filename_stats_total_cache_miss_collisions; + char *bcache_filename_stats_total_cache_bypass_hits; + char *bcache_filename_stats_total_cache_bypass_misses; + char *bcache_filename_stats_total_cache_readaheads; + char *bcache_filename_cache_read_races; + char *bcache_filename_cache_io_errors; + char *bcache_filename_priority_stats; + + usec_t bcache_priority_stats_update_every_usec; + usec_t bcache_priority_stats_elapsed_usec; + + RRDSET *st_io; + RRDDIM *rd_io_reads; + RRDDIM *rd_io_writes; + + RRDSET *st_ext_io; + RRDDIM *rd_io_discards; + + RRDSET *st_ops; + RRDDIM *rd_ops_reads; + RRDDIM *rd_ops_writes; + + RRDSET *st_ext_ops; + RRDDIM *rd_ops_discards; + RRDDIM *rd_ops_flushes; + + RRDSET *st_qops; + RRDDIM *rd_qops_operations; + + RRDSET *st_backlog; + RRDDIM *rd_backlog_backlog; + + RRDSET *st_busy; + RRDDIM *rd_busy_busy; + + RRDSET *st_util; + RRDDIM *rd_util_utilization; + + RRDSET *st_mops; + RRDDIM *rd_mops_reads; + RRDDIM *rd_mops_writes; + + RRDSET *st_ext_mops; + RRDDIM *rd_mops_discards; + + RRDSET *st_iotime; + RRDDIM *rd_iotime_reads; + RRDDIM *rd_iotime_writes; + + RRDSET *st_ext_iotime; + RRDDIM *rd_iotime_discards; + RRDDIM *rd_iotime_flushes; + + RRDSET *st_await; + RRDDIM *rd_await_reads; + RRDDIM *rd_await_writes; + + RRDSET *st_ext_await; + RRDDIM *rd_await_discards; + RRDDIM *rd_await_flushes; + + RRDSET *st_avgsz; + RRDDIM *rd_avgsz_reads; + RRDDIM *rd_avgsz_writes; + + RRDSET *st_ext_avgsz; + RRDDIM *rd_avgsz_discards; + + RRDSET *st_svctm; + RRDDIM *rd_svctm_svctm; + + RRDSET *st_bcache_size; + RRDDIM *rd_bcache_dirty_size; + + RRDSET *st_bcache_usage; + RRDDIM *rd_bcache_available_percent; + + RRDSET *st_bcache_hit_ratio; + RRDDIM *rd_bcache_hit_ratio_5min; + RRDDIM *rd_bcache_hit_ratio_1hour; + RRDDIM *rd_bcache_hit_ratio_1day; + RRDDIM *rd_bcache_hit_ratio_total; + + RRDSET *st_bcache; + RRDDIM *rd_bcache_hits; + RRDDIM *rd_bcache_misses; + RRDDIM *rd_bcache_miss_collisions; + + RRDSET *st_bcache_bypass; + RRDDIM *rd_bcache_bypass_hits; + RRDDIM *rd_bcache_bypass_misses; + + RRDSET *st_bcache_rates; + RRDDIM *rd_bcache_rate_congested; + RRDDIM *rd_bcache_readaheads; + RRDDIM *rd_bcache_rate_writeback; + + RRDSET *st_bcache_cache_allocations; + RRDDIM *rd_bcache_cache_allocations_unused; + RRDDIM *rd_bcache_cache_allocations_clean; + RRDDIM *rd_bcache_cache_allocations_dirty; + RRDDIM *rd_bcache_cache_allocations_metadata; + RRDDIM *rd_bcache_cache_allocations_unknown; + + RRDSET *st_bcache_cache_read_races; + RRDDIM *rd_bcache_cache_read_races; + RRDDIM *rd_bcache_cache_io_errors; + + struct disk *next; +} *disk_root = NULL; + +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) + +// static char *path_to_get_hw_sector_size = NULL; +// static char *path_to_get_hw_sector_size_partitions = NULL; +static char *path_to_sys_dev_block_major_minor_string = NULL; +static char *path_to_sys_block_device = NULL; +static char *path_to_sys_block_device_bcache = NULL; +static char *path_to_sys_devices_virtual_block_device = NULL; +static char *path_to_device_mapper = NULL; +static char *path_to_device_label = NULL; +static char *path_to_device_id = NULL; +static char *path_to_veritas_volume_groups = NULL; +static int name_disks_by_id = CONFIG_BOOLEAN_NO; +static int global_bcache_priority_stats_update_every = 0; // disabled by default + +static int global_enable_new_disks_detected_at_runtime = CONFIG_BOOLEAN_YES, + global_enable_performance_for_physical_disks = CONFIG_BOOLEAN_AUTO, + global_enable_performance_for_virtual_disks = CONFIG_BOOLEAN_AUTO, + global_enable_performance_for_partitions = CONFIG_BOOLEAN_NO, + global_do_io = CONFIG_BOOLEAN_AUTO, + global_do_ops = CONFIG_BOOLEAN_AUTO, + global_do_mops = CONFIG_BOOLEAN_AUTO, + global_do_iotime = CONFIG_BOOLEAN_AUTO, + global_do_qops = CONFIG_BOOLEAN_AUTO, + global_do_util = CONFIG_BOOLEAN_AUTO, + global_do_ext = CONFIG_BOOLEAN_AUTO, + global_do_backlog = CONFIG_BOOLEAN_AUTO, + global_do_bcache = CONFIG_BOOLEAN_AUTO, + globals_initialized = 0, + global_cleanup_removed_disks = 1; + +static SIMPLE_PATTERN *preferred_ids = NULL; +static SIMPLE_PATTERN *excluded_disks = NULL; + +static unsigned long long int bcache_read_number_with_units(const char *filename) { + char buffer[50 + 1]; + if(read_file(filename, buffer, 50) == 0) { + static int unknown_units_error = 10; + + char *end = NULL; + NETDATA_DOUBLE value = str2ndd(buffer, &end); + if(end && *end) { + if(*end == 'k') + return (unsigned long long int)(value * 1024.0); + else if(*end == 'M') + return (unsigned long long int)(value * 1024.0 * 1024.0); + else if(*end == 'G') + return (unsigned long long int)(value * 1024.0 * 1024.0 * 1024.0); + else if(*end == 'T') + return (unsigned long long int)(value * 1024.0 * 1024.0 * 1024.0 * 1024.0); + else if(unknown_units_error > 0) { + error("bcache file '%s' provides value '%s' with unknown units '%s'", filename, buffer, end); + unknown_units_error--; + } + } + + return (unsigned long long int)value; + } + + return 0; +} + +void bcache_read_priority_stats(struct disk *d, const char *family, int update_every, usec_t dt) { + static procfile *ff = NULL; + static char *separators = " \t:%[]"; + + static ARL_BASE *arl_base = NULL; + + static unsigned long long unused; + static unsigned long long clean; + static unsigned long long dirty; + static unsigned long long metadata; + static unsigned long long unknown; + + // check if it is time to update this metric + d->bcache_priority_stats_elapsed_usec += dt; + if(likely(d->bcache_priority_stats_elapsed_usec < d->bcache_priority_stats_update_every_usec)) return; + d->bcache_priority_stats_elapsed_usec = 0; + + // initialize ARL + if(unlikely(!arl_base)) { + arl_base = arl_create("bcache/priority_stats", NULL, 60); + arl_expect(arl_base, "Unused", &unused); + arl_expect(arl_base, "Clean", &clean); + arl_expect(arl_base, "Dirty", &dirty); + arl_expect(arl_base, "Metadata", &metadata); + } + + ff = procfile_reopen(ff, d->bcache_filename_priority_stats, separators, PROCFILE_FLAG_DEFAULT); + if(likely(ff)) ff = procfile_readall(ff); + if(unlikely(!ff)) { + separators = " \t:%[]"; + return; + } + + // do not reset the separators on every iteration + separators = NULL; + + arl_begin(arl_base); + unused = clean = dirty = metadata = unknown = 0; + + size_t lines = procfile_lines(ff), l; + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 2)) { + if(unlikely(words)) error("Cannot read '%s' line %zu. Expected 2 params, read %zu.", d->bcache_filename_priority_stats, l, words); + continue; + } + + if(unlikely(arl_check(arl_base, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; + } + + unknown = 100 - unused - clean - dirty - metadata; + + // create / update the cache allocations chart + { + if(unlikely(!d->st_bcache_cache_allocations)) { + d->st_bcache_cache_allocations = rrdset_create_localhost( + "disk_bcache_cache_alloc" + , d->chart_id + , d->disk + , family + , "disk.bcache_cache_alloc" + , "BCache Cache Allocations" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_CACHE_ALLOC + , update_every + , RRDSET_TYPE_STACKED + ); + + d->rd_bcache_cache_allocations_unused = rrddim_add(d->st_bcache_cache_allocations, "unused", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_cache_allocations_dirty = rrddim_add(d->st_bcache_cache_allocations, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_cache_allocations_clean = rrddim_add(d->st_bcache_cache_allocations, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_cache_allocations_metadata = rrddim_add(d->st_bcache_cache_allocations, "metadata", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_cache_allocations_unknown = rrddim_add(d->st_bcache_cache_allocations, "undefined", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + d->bcache_priority_stats_update_every_usec = update_every * USEC_PER_SEC; + } + + rrddim_set_by_pointer(d->st_bcache_cache_allocations, d->rd_bcache_cache_allocations_unused, unused); + rrddim_set_by_pointer(d->st_bcache_cache_allocations, d->rd_bcache_cache_allocations_dirty, dirty); + rrddim_set_by_pointer(d->st_bcache_cache_allocations, d->rd_bcache_cache_allocations_clean, clean); + rrddim_set_by_pointer(d->st_bcache_cache_allocations, d->rd_bcache_cache_allocations_metadata, metadata); + rrddim_set_by_pointer(d->st_bcache_cache_allocations, d->rd_bcache_cache_allocations_unknown, unknown); + rrdset_done(d->st_bcache_cache_allocations); + } +} + +static inline int is_major_enabled(int major) { + static int8_t *major_configs = NULL; + static size_t major_size = 0; + + if(major < 0) return 1; + + size_t wanted_size = (size_t)major + 1; + + if(major_size < wanted_size) { + major_configs = reallocz(major_configs, wanted_size * sizeof(int8_t)); + + size_t i; + for(i = major_size; i < wanted_size ; i++) + major_configs[i] = -1; + + major_size = wanted_size; + } + + if(major_configs[major] == -1) { + char buffer[CONFIG_MAX_NAME + 1]; + snprintfz(buffer, CONFIG_MAX_NAME, "performance metrics for disks with major %d", major); + major_configs[major] = (char)config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, buffer, 1); + } + + return (int)major_configs[major]; +} + +static inline int get_disk_name_from_path(const char *path, char *result, size_t result_size, unsigned long major, unsigned long minor, char *disk, char *prefix, int depth) { + //info("DEVICE-MAPPER ('%s', %lu:%lu): examining directory '%s' (allowed depth %d).", disk, major, minor, path, depth); + + int found = 0, preferred = 0; + + char *first_result = mallocz(result_size); + + DIR *dir = opendir(path); + if (!dir) { + error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s'.", disk, major, minor, path); + goto failed; + } + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if(de->d_type == DT_DIR) { + if((de->d_name[0] == '.' && de->d_name[1] == '\0') || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')) + continue; + + if(depth <= 0) { + error("DEVICE-MAPPER ('%s', %lu:%lu): Depth limit reached for path '%s/%s'. Ignoring path.", disk, major, minor, path, de->d_name); + break; + } + else { + char *path_nested = NULL; + char *prefix_nested = NULL; + + { + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "%s/%s", path, de->d_name); + path_nested = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, "%s%s%s", (prefix)?prefix:"", (prefix)?"_":"", de->d_name); + prefix_nested = strdupz(buffer); + } + + found = get_disk_name_from_path(path_nested, result, result_size, major, minor, disk, prefix_nested, depth - 1); + freez(path_nested); + freez(prefix_nested); + + if(found) break; + } + } + else if(de->d_type == DT_LNK || de->d_type == DT_BLK) { + char filename[FILENAME_MAX + 1]; + + if(de->d_type == DT_LNK) { + snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); + ssize_t len = readlink(filename, result, result_size - 1); + if(len <= 0) { + error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot read link '%s'.", disk, major, minor, filename); + continue; + } + + result[len] = '\0'; + if(result[0] != '/') + snprintfz(filename, FILENAME_MAX, "%s/%s", path, result); + else + strncpyz(filename, result, FILENAME_MAX); + } + else { + snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); + } + + struct stat sb; + if(stat(filename, &sb) == -1) { + error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot stat() file '%s'.", disk, major, minor, filename); + continue; + } + + if((sb.st_mode & S_IFMT) != S_IFBLK) { + //info("DEVICE-MAPPER ('%s', %lu:%lu): file '%s' is not a block device.", disk, major, minor, filename); + continue; + } + + if(major(sb.st_rdev) != major || minor(sb.st_rdev) != minor || strcmp(basename(filename), disk)) { + //info("DEVICE-MAPPER ('%s', %lu:%lu): filename '%s' does not match %lu:%lu.", disk, major, minor, filename, (unsigned long)major(sb.st_rdev), (unsigned long)minor(sb.st_rdev)); + continue; + } + + //info("DEVICE-MAPPER ('%s', %lu:%lu): filename '%s' matches.", disk, major, minor, filename); + + snprintfz(result, result_size - 1, "%s%s%s", (prefix)?prefix:"", (prefix)?"_":"", de->d_name); + + if(!found) { + strncpyz(first_result, result, result_size); + found = 1; + } + + if(simple_pattern_matches(preferred_ids, result)) { + preferred = 1; + break; + } + } + } + closedir(dir); + + +failed: + + if(!found) + result[0] = '\0'; + else if(!preferred) + strncpyz(result, first_result, result_size); + + freez(first_result); + + return found; +} + +static inline char *get_disk_name(unsigned long major, unsigned long minor, char *disk) { + char result[FILENAME_MAX + 1] = ""; + + if(!path_to_device_mapper || !*path_to_device_mapper || !get_disk_name_from_path(path_to_device_mapper, result, FILENAME_MAX + 1, major, minor, disk, NULL, 0)) + if(!path_to_device_label || !*path_to_device_label || !get_disk_name_from_path(path_to_device_label, result, FILENAME_MAX + 1, major, minor, disk, NULL, 0)) + if(!path_to_veritas_volume_groups || !*path_to_veritas_volume_groups || !get_disk_name_from_path(path_to_veritas_volume_groups, result, FILENAME_MAX + 1, major, minor, disk, "vx", 2)) + if(name_disks_by_id != CONFIG_BOOLEAN_YES || !path_to_device_id || !*path_to_device_id || !get_disk_name_from_path(path_to_device_id, result, FILENAME_MAX + 1, major, minor, disk, NULL, 0)) + strncpy(result, disk, FILENAME_MAX); + + if(!result[0]) + strncpy(result, disk, FILENAME_MAX); + + netdata_fix_chart_name(result); + return strdup(result); +} + +static void get_disk_config(struct disk *d) { + int def_enable = global_enable_new_disks_detected_at_runtime; + + if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk))) + def_enable = CONFIG_BOOLEAN_NO; + + char var_name[4096 + 1]; + snprintfz(var_name, 4096, CONFIG_SECTION_PLUGIN_PROC_DISKSTATS ":%s", d->disk); + + def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable); + if(unlikely(def_enable == CONFIG_BOOLEAN_NO)) { + // the user does not want any metrics for this disk + d->do_io = CONFIG_BOOLEAN_NO; + d->do_ops = CONFIG_BOOLEAN_NO; + d->do_mops = CONFIG_BOOLEAN_NO; + d->do_iotime = CONFIG_BOOLEAN_NO; + d->do_qops = CONFIG_BOOLEAN_NO; + d->do_util = CONFIG_BOOLEAN_NO; + d->do_ext = CONFIG_BOOLEAN_NO; + d->do_backlog = CONFIG_BOOLEAN_NO; + d->do_bcache = CONFIG_BOOLEAN_NO; + } + else { + // this disk is enabled + // check its direct settings + + int def_performance = CONFIG_BOOLEAN_AUTO; + + // since this is 'on demand' we can figure the performance settings + // based on the type of disk + + if(!d->device_is_bcache) { + switch(d->type) { + default: + case DISK_TYPE_UNKNOWN: + break; + + case DISK_TYPE_PHYSICAL: + def_performance = global_enable_performance_for_physical_disks; + break; + + case DISK_TYPE_PARTITION: + def_performance = global_enable_performance_for_partitions; + break; + + case DISK_TYPE_VIRTUAL: + def_performance = global_enable_performance_for_virtual_disks; + break; + } + } + + // check if we have to disable performance for this disk + if(def_performance) + def_performance = is_major_enabled((int)d->major); + + // ------------------------------------------------------------ + // now we have def_performance and def_space + // to work further + + // def_performance + // check the user configuration (this will also show our 'on demand' decision) + def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance); + + int ddo_io = CONFIG_BOOLEAN_NO, + ddo_ops = CONFIG_BOOLEAN_NO, + ddo_mops = CONFIG_BOOLEAN_NO, + ddo_iotime = CONFIG_BOOLEAN_NO, + ddo_qops = CONFIG_BOOLEAN_NO, + ddo_util = CONFIG_BOOLEAN_NO, + ddo_ext = CONFIG_BOOLEAN_NO, + ddo_backlog = CONFIG_BOOLEAN_NO, + ddo_bcache = CONFIG_BOOLEAN_NO; + + // we enable individual performance charts only when def_performance is not disabled + if(unlikely(def_performance != CONFIG_BOOLEAN_NO)) { + ddo_io = global_do_io, + ddo_ops = global_do_ops, + ddo_mops = global_do_mops, + ddo_iotime = global_do_iotime, + ddo_qops = global_do_qops, + ddo_util = global_do_util, + ddo_ext = global_do_ext, + ddo_backlog = global_do_backlog, + ddo_bcache = global_do_bcache; + } + + d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io); + d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops); + d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops); + d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime); + d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops); + d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util); + d->do_ext = config_get_boolean_ondemand(var_name, "extended operations", ddo_ext); + d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog); + + if(d->device_is_bcache) + d->do_bcache = config_get_boolean_ondemand(var_name, "bcache", ddo_bcache); + else + d->do_bcache = 0; + } +} + +static struct disk *get_disk(unsigned long major, unsigned long minor, char *disk) { + static struct mountinfo *disk_mountinfo_root = NULL; + + struct disk *d; + + uint32_t hash = simple_hash(disk); + + // search for it in our RAM list. + // this is sequential, but since we just walk through + // and the number of disks / partitions in a system + // should not be that many, it should be acceptable + for(d = disk_root; d ; d = d->next){ + if (unlikely( + d->major == major && d->minor == minor && d->hash == hash && !strcmp(d->device, disk))) + return d; + } + + // not found + // create a new disk structure + d = (struct disk *)callocz(1, sizeof(struct disk)); + + d->disk = get_disk_name(major, minor, disk); + d->device = strdupz(disk); + d->hash = simple_hash(d->device); + d->major = major; + d->minor = minor; + d->type = DISK_TYPE_UNKNOWN; // Default type. Changed later if not correct. + d->sector_size = 512; // the default, will be changed below + d->next = NULL; + + // append it to the list + if(unlikely(!disk_root)) + disk_root = d; + else { + struct disk *last; + for(last = disk_root; last->next ;last = last->next); + last->next = d; + } + + d->chart_id = strdupz(d->device); + + // read device uuid if it is an LVM volume + if (!strncmp(d->device, "dm-", 3)) { + char uuid_filename[FILENAME_MAX + 1]; + snprintfz(uuid_filename, FILENAME_MAX, path_to_sys_devices_virtual_block_device, disk); + strncat(uuid_filename, "/dm/uuid", FILENAME_MAX); + + char device_uuid[RRD_ID_LENGTH_MAX + 1]; + if (!read_file(uuid_filename, device_uuid, RRD_ID_LENGTH_MAX) && !strncmp(device_uuid, "LVM-", 4)) { + trim(device_uuid); + + char chart_id[RRD_ID_LENGTH_MAX + 1]; + snprintf(chart_id, RRD_ID_LENGTH_MAX, "%s-%s", d->device, device_uuid + 4); + + freez(d->chart_id); + d->chart_id = strdupz(chart_id); + } + } + + char buffer[FILENAME_MAX + 1]; + + // find if it is a physical disk + // by checking if /sys/block/DISK is readable. + snprintfz(buffer, FILENAME_MAX, path_to_sys_block_device, disk); + if(likely(access(buffer, R_OK) == 0)) { + // assign it here, but it will be overwritten if it is not a physical disk + d->type = DISK_TYPE_PHYSICAL; + } + + // find if it is a partition + // by checking if /sys/dev/block/MAJOR:MINOR/partition is readable. + snprintfz(buffer, FILENAME_MAX, path_to_sys_dev_block_major_minor_string, major, minor, "partition"); + if(likely(access(buffer, R_OK) == 0)) { + d->type = DISK_TYPE_PARTITION; + } + else { + // find if it is a virtual disk + // by checking if /sys/devices/virtual/block/DISK is readable. + snprintfz(buffer, FILENAME_MAX, path_to_sys_devices_virtual_block_device, disk); + if(likely(access(buffer, R_OK) == 0)) { + d->type = DISK_TYPE_VIRTUAL; + } + else { + // find if it is a virtual device + // by checking if /sys/dev/block/MAJOR:MINOR/slaves has entries + snprintfz(buffer, FILENAME_MAX, path_to_sys_dev_block_major_minor_string, major, minor, "slaves/"); + DIR *dirp = opendir(buffer); + if (likely(dirp != NULL)) { + struct dirent *dp; + while ((dp = readdir(dirp))) { + // . and .. are also files in empty folders. + if (unlikely(strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)) { + continue; + } + + d->type = DISK_TYPE_VIRTUAL; + + // Stop the loop after we found one file. + break; + } + if (unlikely(closedir(dirp) == -1)) + error("Unable to close dir %s", buffer); + } + } + } + + // ------------------------------------------------------------------------ + // check if we can find its mount point + + // mountinfo_find() can be called with NULL disk_mountinfo_root + struct mountinfo *mi = mountinfo_find(disk_mountinfo_root, d->major, d->minor, d->device); + if(unlikely(!mi)) { + // mountinfo_free_all can be called with NULL + mountinfo_free_all(disk_mountinfo_root); + disk_mountinfo_root = mountinfo_read(0); + mi = mountinfo_find(disk_mountinfo_root, d->major, d->minor, d->device); + } + + if(unlikely(mi)) + d->mount_point = strdupz(mi->mount_point); + else + d->mount_point = NULL; + + // ------------------------------------------------------------------------ + // find the disk sector size + + /* + * sector size is always 512 bytes inside the kernel #3481 + * + { + char tf[FILENAME_MAX + 1], *t; + strncpyz(tf, d->device, FILENAME_MAX); + + // replace all / with ! + for(t = tf; *t ;t++) + if(unlikely(*t == '/')) *t = '!'; + + if(likely(d->type == DISK_TYPE_PARTITION)) + snprintfz(buffer, FILENAME_MAX, path_to_get_hw_sector_size_partitions, d->major, d->minor, tf); + else + snprintfz(buffer, FILENAME_MAX, path_to_get_hw_sector_size, tf); + + FILE *fpss = fopen(buffer, "r"); + if(likely(fpss)) { + char buffer2[1024 + 1]; + char *tmp = fgets(buffer2, 1024, fpss); + + if(likely(tmp)) { + d->sector_size = str2i(tmp); + if(unlikely(d->sector_size <= 0)) { + error("Invalid sector size %d for device %s in %s. Assuming 512.", d->sector_size, d->device, buffer); + d->sector_size = 512; + } + } + else error("Cannot read data for sector size for device %s from %s. Assuming 512.", d->device, buffer); + + fclose(fpss); + } + else error("Cannot read sector size for device %s from %s. Assuming 512.", d->device, buffer); + } + */ + + // ------------------------------------------------------------------------ + // check if the device is a bcache + + struct stat bcache; + snprintfz(buffer, FILENAME_MAX, path_to_sys_block_device_bcache, disk); + if(unlikely(stat(buffer, &bcache) == 0 && (bcache.st_mode & S_IFMT) == S_IFDIR)) { + // we have the 'bcache' directory + d->device_is_bcache = 1; + + char buffer2[FILENAME_MAX + 1]; + + snprintfz(buffer2, FILENAME_MAX, "%s/cache/congested", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_cache_congested = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/readahead", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_readaheads = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/cache/cache0/priority_stats", buffer); // only one cache is supported by bcache + if(access(buffer2, R_OK) == 0) + d->bcache_filename_priority_stats = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/cache/internal/cache_read_races", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_cache_read_races = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/cache/cache0/io_errors", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_cache_io_errors = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/dirty_data", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_dirty_data = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/writeback_rate", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_writeback_rate = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/cache/cache_available_percent", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_cache_available_percent = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_hits", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_hits = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_five_minute/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_five_minute_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_hour/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_hour_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_day/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_day_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_misses", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_misses = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_bypass_hits", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_bypass_hits = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_bypass_misses", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_bypass_misses = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_miss_collisions", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_miss_collisions = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + } + + get_disk_config(d); + return d; +} + +static void add_labels_to_disk(struct disk *d, RRDSET *st) { + rrdlabels_add(st->rrdlabels, "device", d->disk, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->rrdlabels, "mount_point", d->mount_point, RRDLABEL_SRC_AUTO); + + switch (d->type) { + default: + case DISK_TYPE_UNKNOWN: + rrdlabels_add(st->rrdlabels, "device_type", "unknown", RRDLABEL_SRC_AUTO); + break; + + case DISK_TYPE_PHYSICAL: + rrdlabels_add(st->rrdlabels, "device_type", "physical", RRDLABEL_SRC_AUTO); + break; + + case DISK_TYPE_PARTITION: + rrdlabels_add(st->rrdlabels, "device_type", "partition", RRDLABEL_SRC_AUTO); + break; + + case DISK_TYPE_VIRTUAL: + rrdlabels_add(st->rrdlabels, "device_type", "virtual", RRDLABEL_SRC_AUTO); + break; + } +} + +int do_proc_diskstats(int update_every, usec_t dt) { + static procfile *ff = NULL; + + if(unlikely(!globals_initialized)) { + globals_initialized = 1; + + global_enable_new_disks_detected_at_runtime = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "enable new disks detected at runtime", global_enable_new_disks_detected_at_runtime); + global_enable_performance_for_physical_disks = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "performance metrics for physical disks", global_enable_performance_for_physical_disks); + global_enable_performance_for_virtual_disks = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "performance metrics for virtual disks", global_enable_performance_for_virtual_disks); + global_enable_performance_for_partitions = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "performance metrics for partitions", global_enable_performance_for_partitions); + + global_do_io = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "bandwidth for all disks", global_do_io); + global_do_ops = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "operations for all disks", global_do_ops); + global_do_mops = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "merged operations for all disks", global_do_mops); + global_do_iotime = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "i/o time for all disks", global_do_iotime); + global_do_qops = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "queued operations for all disks", global_do_qops); + global_do_util = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "utilization percentage for all disks", global_do_util); + global_do_ext = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "extended operations for all disks", global_do_ext); + global_do_backlog = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "backlog for all disks", global_do_backlog); + global_do_bcache = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "bcache for all disks", global_do_bcache); + global_bcache_priority_stats_update_every = (int)config_get_number(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "bcache priority stats update every", global_bcache_priority_stats_update_every); + + global_cleanup_removed_disks = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "remove charts of removed disks" , global_cleanup_removed_disks); + + char buffer[FILENAME_MAX + 1]; + + snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s"); + path_to_sys_block_device = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to get block device", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/bcache"); + path_to_sys_block_device_bcache = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to get block device bcache", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/virtual/block/%s"); + path_to_sys_devices_virtual_block_device = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to get virtual block device", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/dev/block/%lu:%lu/%s"); + path_to_sys_dev_block_major_minor_string = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to get block device infos", buffer); + + //snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/queue/hw_sector_size"); + //path_to_get_hw_sector_size = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to get h/w sector size", buffer); + + //snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/dev/block/%lu:%lu/subsystem/%s/../queue/hw_sector_size"); + //path_to_get_hw_sector_size_partitions = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to get h/w sector size for partitions", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s/dev/mapper", netdata_configured_host_prefix); + path_to_device_mapper = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to device mapper", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s/dev/disk/by-label", netdata_configured_host_prefix); + path_to_device_label = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to /dev/disk/by-label", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s/dev/disk/by-id", netdata_configured_host_prefix); + path_to_device_id = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to /dev/disk/by-id", buffer); + + snprintfz(buffer, FILENAME_MAX, "%s/dev/vx/dsk", netdata_configured_host_prefix); + path_to_veritas_volume_groups = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to /dev/vx/dsk", buffer); + + name_disks_by_id = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "name disks by id", name_disks_by_id); + + preferred_ids = simple_pattern_create( + config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "preferred disk ids", DEFAULT_PREFERRED_IDS) + , NULL + , SIMPLE_PATTERN_EXACT + ); + + excluded_disks = simple_pattern_create( + config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "exclude disks", DEFAULT_EXCLUDED_DISKS) + , NULL + , SIMPLE_PATTERN_EXACT + ); + } + + // -------------------------------------------------------------------------- + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/diskstats"); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + } + if(unlikely(!ff)) return 0; + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + collected_number system_read_kb = 0, system_write_kb = 0; + + int do_dc_stats = 0, do_fl_stats = 0; + + for(l = 0; l < lines ;l++) { + // -------------------------------------------------------------------------- + // Read parameters + + char *disk; + unsigned long major = 0, minor = 0; + + collected_number reads = 0, mreads = 0, readsectors = 0, readms = 0, + writes = 0, mwrites = 0, writesectors = 0, writems = 0, + queued_ios = 0, busy_ms = 0, backlog_ms = 0, + discards = 0, mdiscards = 0, discardsectors = 0, discardms = 0, + flushes = 0, flushms = 0; + + + collected_number last_reads = 0, last_readsectors = 0, last_readms = 0, + last_writes = 0, last_writesectors = 0, last_writems = 0, + last_busy_ms = 0, + last_discards = 0, last_discardsectors = 0, last_discardms = 0, + last_flushes = 0, last_flushms = 0; + + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 14)) continue; + + major = str2ul(procfile_lineword(ff, l, 0)); + minor = str2ul(procfile_lineword(ff, l, 1)); + disk = procfile_lineword(ff, l, 2); + + // # of reads completed # of writes completed + // This is the total number of reads or writes completed successfully. + reads = str2ull(procfile_lineword(ff, l, 3)); // rd_ios + writes = str2ull(procfile_lineword(ff, l, 7)); // wr_ios + + // # of reads merged # of writes merged + // Reads and writes which are adjacent to each other may be merged for + // efficiency. Thus two 4K reads may become one 8K read before it is + // ultimately handed to the disk, and so it will be counted (and queued) + mreads = str2ull(procfile_lineword(ff, l, 4)); // rd_merges_or_rd_sec + mwrites = str2ull(procfile_lineword(ff, l, 8)); // wr_merges + + // # of sectors read # of sectors written + // This is the total number of sectors read or written successfully. + readsectors = str2ull(procfile_lineword(ff, l, 5)); // rd_sec_or_wr_ios + writesectors = str2ull(procfile_lineword(ff, l, 9)); // wr_sec + + // # of milliseconds spent reading # of milliseconds spent writing + // This is the total number of milliseconds spent by all reads or writes (as + // measured from __make_request() to end_that_request_last()). + readms = str2ull(procfile_lineword(ff, l, 6)); // rd_ticks_or_wr_sec + writems = str2ull(procfile_lineword(ff, l, 10)); // wr_ticks + + // # of I/Os currently in progress + // The only field that should go to zero. Incremented as requests are + // given to appropriate struct request_queue and decremented as they finish. + queued_ios = str2ull(procfile_lineword(ff, l, 11)); // ios_pgr + + // # of milliseconds spent doing I/Os + // This field increases so long as field queued_ios is nonzero. + busy_ms = str2ull(procfile_lineword(ff, l, 12)); // tot_ticks + + // weighted # of milliseconds spent doing I/Os + // This field is incremented at each I/O start, I/O completion, I/O + // merge, or read of these stats by the number of I/Os in progress + // (field queued_ios) times the number of milliseconds spent doing I/O since the + // last update of this field. This can provide an easy measure of both + // I/O completion time and the backlog that may be accumulating. + backlog_ms = str2ull(procfile_lineword(ff, l, 13)); // rq_ticks + + if (unlikely(words > 13)) { + do_dc_stats = 1; + + // # of discards completed + // This is the total number of discards completed successfully. + discards = str2ull(procfile_lineword(ff, l, 14)); // dc_ios + + // # of discards merged + // See the description of mreads/mwrites + mdiscards = str2ull(procfile_lineword(ff, l, 15)); // dc_merges + + // # of sectors discarded + // This is the total number of sectors discarded successfully. + discardsectors = str2ull(procfile_lineword(ff, l, 16)); // dc_sec + + // # of milliseconds spent discarding + // This is the total number of milliseconds spent by all discards (as + // measured from __make_request() to end_that_request_last()). + discardms = str2ull(procfile_lineword(ff, l, 17)); // dc_ticks + } + + if (unlikely(words > 17)) { + do_fl_stats = 1; + + // number of flush I/Os processed + // These values increment when an flush I/O request completes. + // Block layer combines flush requests and executes at most one at a time. + // This counts flush requests executed by disk. Not tracked for partitions. + flushes = str2ull(procfile_lineword(ff, l, 18)); // fl_ios + + // total wait time for flush requests + flushms = str2ull(procfile_lineword(ff, l, 19)); // fl_ticks + } + + // -------------------------------------------------------------------------- + // get a disk structure for the disk + + struct disk *d = get_disk(major, minor, disk); + d->updated = 1; + + // -------------------------------------------------------------------------- + // count the global system disk I/O of physical disks + + if(unlikely(d->type == DISK_TYPE_PHYSICAL)) { + system_read_kb += readsectors * d->sector_size / 1024; + system_write_kb += writesectors * d->sector_size / 1024; + } + + // -------------------------------------------------------------------------- + // Set its family based on mount point + + char *family = d->mount_point; + if(!family) family = d->disk; + + + // -------------------------------------------------------------------------- + // Do performance metrics + + if(d->do_io == CONFIG_BOOLEAN_YES || (d->do_io == CONFIG_BOOLEAN_AUTO && + (readsectors || writesectors || discardsectors || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_io = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_io)) { + d->st_io = rrdset_create_localhost( + RRD_TYPE_DISK + , d->chart_id + , d->disk + , family + , "disk.io" + , "Disk I/O Bandwidth" + , "KiB/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_IO + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_io_reads = rrddim_add(d->st_io, "reads", NULL, d->sector_size, 1024, RRD_ALGORITHM_INCREMENTAL); + d->rd_io_writes = rrddim_add(d->st_io, "writes", NULL, d->sector_size * -1, 1024, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_io); + } + + last_readsectors = rrddim_set_by_pointer(d->st_io, d->rd_io_reads, readsectors); + last_writesectors = rrddim_set_by_pointer(d->st_io, d->rd_io_writes, writesectors); + rrdset_done(d->st_io); + } + + if (do_dc_stats && d->do_io == CONFIG_BOOLEAN_YES && d->do_ext != CONFIG_BOOLEAN_NO) { + if (unlikely(!d->st_ext_io)) { + d->st_ext_io = rrdset_create_localhost( + "disk_ext" + , d->chart_id + , d->disk + , family + , "disk_ext.io" + , "Amount of Discarded Data" + , "KiB/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_IO + 1 + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_io_discards = rrddim_add(d->st_ext_io, "discards", NULL, d->sector_size, 1024, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ext_io); + } + + last_discardsectors = rrddim_set_by_pointer(d->st_ext_io, d->rd_io_discards, discardsectors); + rrdset_done(d->st_ext_io); + } + + if(d->do_ops == CONFIG_BOOLEAN_YES || (d->do_ops == CONFIG_BOOLEAN_AUTO && + (reads || writes || discards || flushes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_ops = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_ops)) { + d->st_ops = rrdset_create_localhost( + "disk_ops" + , d->chart_id + , d->disk + , family + , "disk.ops" + , "Disk Completed I/O Operations" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_OPS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_ops, RRDSET_FLAG_DETAIL); + + d->rd_ops_reads = rrddim_add(d->st_ops, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_ops_writes = rrddim_add(d->st_ops, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ops); + } + + last_reads = rrddim_set_by_pointer(d->st_ops, d->rd_ops_reads, reads); + last_writes = rrddim_set_by_pointer(d->st_ops, d->rd_ops_writes, writes); + rrdset_done(d->st_ops); + } + + if (do_dc_stats && d->do_ops == CONFIG_BOOLEAN_YES && d->do_ext != CONFIG_BOOLEAN_NO) { + if (unlikely(!d->st_ext_ops)) { + d->st_ext_ops = rrdset_create_localhost( + "disk_ext_ops" + , d->chart_id + , d->disk + , family + , "disk_ext.ops" + , "Disk Completed Extended I/O Operations" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_OPS + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_ext_ops, RRDSET_FLAG_DETAIL); + + d->rd_ops_discards = rrddim_add(d->st_ext_ops, "discards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + if (do_fl_stats) + d->rd_ops_flushes = rrddim_add(d->st_ext_ops, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ext_ops); + } + + last_discards = rrddim_set_by_pointer(d->st_ext_ops, d->rd_ops_discards, discards); + if (do_fl_stats) + last_flushes = rrddim_set_by_pointer(d->st_ext_ops, d->rd_ops_flushes, flushes); + rrdset_done(d->st_ext_ops); + } + + if(d->do_qops == CONFIG_BOOLEAN_YES || (d->do_qops == CONFIG_BOOLEAN_AUTO && + (queued_ios || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_qops = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_qops)) { + d->st_qops = rrdset_create_localhost( + "disk_qops" + , d->chart_id + , d->disk + , family + , "disk.qops" + , "Disk Current I/O Operations" + , "operations" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_QOPS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_qops, RRDSET_FLAG_DETAIL); + + d->rd_qops_operations = rrddim_add(d->st_qops, "operations", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_qops); + } + + rrddim_set_by_pointer(d->st_qops, d->rd_qops_operations, queued_ios); + rrdset_done(d->st_qops); + } + + if(d->do_backlog == CONFIG_BOOLEAN_YES || (d->do_backlog == CONFIG_BOOLEAN_AUTO && + (backlog_ms || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_backlog = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_backlog)) { + d->st_backlog = rrdset_create_localhost( + "disk_backlog" + , d->chart_id + , d->disk + , family + , "disk.backlog" + , "Disk Backlog" + , "milliseconds" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_BACKLOG + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(d->st_backlog, RRDSET_FLAG_DETAIL); + + d->rd_backlog_backlog = rrddim_add(d->st_backlog, "backlog", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_backlog); + } + + rrddim_set_by_pointer(d->st_backlog, d->rd_backlog_backlog, backlog_ms); + rrdset_done(d->st_backlog); + } + + if(d->do_util == CONFIG_BOOLEAN_YES || (d->do_util == CONFIG_BOOLEAN_AUTO && + (busy_ms || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_util = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_busy)) { + d->st_busy = rrdset_create_localhost( + "disk_busy" + , d->chart_id + , d->disk + , family + , "disk.busy" + , "Disk Busy Time" + , "milliseconds" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_BUSY + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(d->st_busy, RRDSET_FLAG_DETAIL); + + d->rd_busy_busy = rrddim_add(d->st_busy, "busy", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_busy); + } + + last_busy_ms = rrddim_set_by_pointer(d->st_busy, d->rd_busy_busy, busy_ms); + rrdset_done(d->st_busy); + + if(unlikely(!d->st_util)) { + d->st_util = rrdset_create_localhost( + "disk_util" + , d->chart_id + , d->disk + , family + , "disk.util" + , "Disk Utilization Time" + , "% of time working" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_UTIL + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(d->st_util, RRDSET_FLAG_DETAIL); + + d->rd_util_utilization = rrddim_add(d->st_util, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_util); + } + + collected_number disk_utilization = (busy_ms - last_busy_ms) / (10 * update_every); + if (disk_utilization > 100) + disk_utilization = 100; + + rrddim_set_by_pointer(d->st_util, d->rd_util_utilization, disk_utilization); + rrdset_done(d->st_util); + } + + if(d->do_mops == CONFIG_BOOLEAN_YES || (d->do_mops == CONFIG_BOOLEAN_AUTO && + (mreads || mwrites || mdiscards || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_mops = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_mops)) { + d->st_mops = rrdset_create_localhost( + "disk_mops" + , d->chart_id + , d->disk + , family + , "disk.mops" + , "Disk Merged Operations" + , "merged operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_MOPS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_mops, RRDSET_FLAG_DETAIL); + + d->rd_mops_reads = rrddim_add(d->st_mops, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_mops_writes = rrddim_add(d->st_mops, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_mops); + } + + rrddim_set_by_pointer(d->st_mops, d->rd_mops_reads, mreads); + rrddim_set_by_pointer(d->st_mops, d->rd_mops_writes, mwrites); + rrdset_done(d->st_mops); + } + + if(do_dc_stats && d->do_mops == CONFIG_BOOLEAN_YES && d->do_ext != CONFIG_BOOLEAN_NO) { + d->do_mops = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_ext_mops)) { + d->st_ext_mops = rrdset_create_localhost( + "disk_ext_mops" + , d->chart_id + , d->disk + , family + , "disk_ext.mops" + , "Disk Merged Discard Operations" + , "merged operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_MOPS + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_ext_mops, RRDSET_FLAG_DETAIL); + + d->rd_mops_discards = rrddim_add(d->st_ext_mops, "discards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ext_mops); + } + + rrddim_set_by_pointer(d->st_ext_mops, d->rd_mops_discards, mdiscards); + rrdset_done(d->st_ext_mops); + } + + if(d->do_iotime == CONFIG_BOOLEAN_YES || (d->do_iotime == CONFIG_BOOLEAN_AUTO && + (readms || writems || discardms || flushms || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->do_iotime = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_iotime)) { + d->st_iotime = rrdset_create_localhost( + "disk_iotime" + , d->chart_id + , d->disk + , family + , "disk.iotime" + , "Disk Total I/O Time" + , "milliseconds/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_IOTIME + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_iotime, RRDSET_FLAG_DETAIL); + + d->rd_iotime_reads = rrddim_add(d->st_iotime, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_iotime_writes = rrddim_add(d->st_iotime, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_iotime); + } + + last_readms = rrddim_set_by_pointer(d->st_iotime, d->rd_iotime_reads, readms); + last_writems = rrddim_set_by_pointer(d->st_iotime, d->rd_iotime_writes, writems); + rrdset_done(d->st_iotime); + } + + if(do_dc_stats && d->do_iotime == CONFIG_BOOLEAN_YES && d->do_ext != CONFIG_BOOLEAN_NO) { + if(unlikely(!d->st_ext_iotime)) { + d->st_ext_iotime = rrdset_create_localhost( + "disk_ext_iotime" + , d->chart_id + , d->disk + , family + , "disk_ext.iotime" + , "Disk Total I/O Time for Extended Operations" + , "milliseconds/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_IOTIME + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_ext_iotime, RRDSET_FLAG_DETAIL); + + d->rd_iotime_discards = rrddim_add(d->st_ext_iotime, "discards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + if (do_fl_stats) + d->rd_iotime_flushes = rrddim_add(d->st_ext_iotime, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ext_iotime); + } + + last_discardms = rrddim_set_by_pointer(d->st_ext_iotime, d->rd_iotime_discards, discardms); + if (do_fl_stats) + last_flushms = rrddim_set_by_pointer(d->st_ext_iotime, d->rd_iotime_flushes, flushms); + rrdset_done(d->st_ext_iotime); + } + + // calculate differential charts + // only if this is not the first time we run + + if(likely(dt)) { + if( (d->do_iotime == CONFIG_BOOLEAN_YES || (d->do_iotime == CONFIG_BOOLEAN_AUTO && + (readms || writems || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) && + (d->do_ops == CONFIG_BOOLEAN_YES || (d->do_ops == CONFIG_BOOLEAN_AUTO && + (reads || writes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) { + + if(unlikely(!d->st_await)) { + d->st_await = rrdset_create_localhost( + "disk_await" + , d->chart_id + , d->disk + , family + , "disk.await" + , "Average Completed I/O Operation Time" + , "milliseconds/operation" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_AWAIT + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_await, RRDSET_FLAG_DETAIL); + + d->rd_await_reads = rrddim_add(d->st_await, "reads", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_await_writes = rrddim_add(d->st_await, "writes", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_await); + } + + rrddim_set_by_pointer(d->st_await, d->rd_await_reads, (reads - last_reads) ? (readms - last_readms) / (reads - last_reads) : 0); + rrddim_set_by_pointer(d->st_await, d->rd_await_writes, (writes - last_writes) ? (writems - last_writems) / (writes - last_writes) : 0); + rrdset_done(d->st_await); + } + + if (do_dc_stats && d->do_iotime == CONFIG_BOOLEAN_YES && d->do_ops == CONFIG_BOOLEAN_YES && d->do_ext != CONFIG_BOOLEAN_NO) { + if(unlikely(!d->st_ext_await)) { + d->st_ext_await = rrdset_create_localhost( + "disk_ext_await" + , d->chart_id + , d->disk + , family + , "disk_ext.await" + , "Average Completed Extended I/O Operation Time" + , "milliseconds/operation" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_AWAIT + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_ext_await, RRDSET_FLAG_DETAIL); + + d->rd_await_discards = rrddim_add(d->st_ext_await, "discards", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + if (do_fl_stats) + d->rd_await_flushes = rrddim_add(d->st_ext_await, "flushes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_ext_await); + } + + rrddim_set_by_pointer( + d->st_ext_await, d->rd_await_discards, + (discards - last_discards) ? (discardms - last_discardms) / (discards - last_discards) : 0); + + if (do_fl_stats) + rrddim_set_by_pointer( + d->st_ext_await, d->rd_await_flushes, + (flushes - last_flushes) ? (flushms - last_flushms) / (flushes - last_flushes) : 0); + + rrdset_done(d->st_ext_await); + } + + if( (d->do_io == CONFIG_BOOLEAN_YES || (d->do_io == CONFIG_BOOLEAN_AUTO && + (readsectors || writesectors || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) && + (d->do_ops == CONFIG_BOOLEAN_YES || (d->do_ops == CONFIG_BOOLEAN_AUTO && + (reads || writes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) { + + if(unlikely(!d->st_avgsz)) { + d->st_avgsz = rrdset_create_localhost( + "disk_avgsz" + , d->chart_id + , d->disk + , family + , "disk.avgsz" + , "Average Completed I/O Operation Bandwidth" + , "KiB/operation" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_AVGSZ + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(d->st_avgsz, RRDSET_FLAG_DETAIL); + + d->rd_avgsz_reads = rrddim_add(d->st_avgsz, "reads", NULL, d->sector_size, 1024, RRD_ALGORITHM_ABSOLUTE); + d->rd_avgsz_writes = rrddim_add(d->st_avgsz, "writes", NULL, d->sector_size * -1, 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_avgsz); + } + + rrddim_set_by_pointer(d->st_avgsz, d->rd_avgsz_reads, (reads - last_reads) ? (readsectors - last_readsectors) / (reads - last_reads) : 0); + rrddim_set_by_pointer(d->st_avgsz, d->rd_avgsz_writes, (writes - last_writes) ? (writesectors - last_writesectors) / (writes - last_writes) : 0); + rrdset_done(d->st_avgsz); + } + + if(do_dc_stats && d->do_io == CONFIG_BOOLEAN_YES && d->do_ops == CONFIG_BOOLEAN_YES && d->do_ext != CONFIG_BOOLEAN_NO) { + if(unlikely(!d->st_ext_avgsz)) { + d->st_ext_avgsz = rrdset_create_localhost( + "disk_ext_avgsz" + , d->chart_id + , d->disk + , family + , "disk_ext.avgsz" + , "Average Amount of Discarded Data" + , "KiB/operation" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_AVGSZ + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(d->st_ext_avgsz, RRDSET_FLAG_DETAIL); + + d->rd_avgsz_discards = rrddim_add(d->st_ext_avgsz, "discards", NULL, d->sector_size, 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_ext_avgsz); + } + + rrddim_set_by_pointer( + d->st_ext_avgsz, d->rd_avgsz_discards, + (discards - last_discards) ? (discardsectors - last_discardsectors) / (discards - last_discards) : + 0); + rrdset_done(d->st_ext_avgsz); + } + + if( (d->do_util == CONFIG_BOOLEAN_YES || (d->do_util == CONFIG_BOOLEAN_AUTO && + (busy_ms || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) && + (d->do_ops == CONFIG_BOOLEAN_YES || (d->do_ops == CONFIG_BOOLEAN_AUTO && + (reads || writes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) { + + if(unlikely(!d->st_svctm)) { + d->st_svctm = rrdset_create_localhost( + "disk_svctm" + , d->chart_id + , d->disk + , family + , "disk.svctm" + , "Average Service Time" + , "milliseconds/operation" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_DISK_SVCTM + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_svctm, RRDSET_FLAG_DETAIL); + + d->rd_svctm_svctm = rrddim_add(d->st_svctm, "svctm", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_svctm); + } + + rrddim_set_by_pointer(d->st_svctm, d->rd_svctm_svctm, ((reads - last_reads) + (writes - last_writes)) ? (busy_ms - last_busy_ms) / ((reads - last_reads) + (writes - last_writes)) : 0); + rrdset_done(d->st_svctm); + } + } + + // read bcache metrics and generate the bcache charts + + if(d->device_is_bcache && d->do_bcache != CONFIG_BOOLEAN_NO) { + unsigned long long int + stats_total_cache_bypass_hits = 0, + stats_total_cache_bypass_misses = 0, + stats_total_cache_hits = 0, + stats_total_cache_miss_collisions = 0, + stats_total_cache_misses = 0, + stats_five_minute_cache_hit_ratio = 0, + stats_hour_cache_hit_ratio = 0, + stats_day_cache_hit_ratio = 0, + stats_total_cache_hit_ratio = 0, + cache_available_percent = 0, + cache_readaheads = 0, + cache_read_races = 0, + cache_io_errors = 0, + cache_congested = 0, + dirty_data = 0, + writeback_rate = 0; + + // read the bcache values + + if(d->bcache_filename_dirty_data) + dirty_data = bcache_read_number_with_units(d->bcache_filename_dirty_data); + + if(d->bcache_filename_writeback_rate) + writeback_rate = bcache_read_number_with_units(d->bcache_filename_writeback_rate); + + if(d->bcache_filename_cache_congested) + cache_congested = bcache_read_number_with_units(d->bcache_filename_cache_congested); + + if(d->bcache_filename_cache_available_percent) + read_single_number_file(d->bcache_filename_cache_available_percent, &cache_available_percent); + + if(d->bcache_filename_stats_five_minute_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_five_minute_cache_hit_ratio, &stats_five_minute_cache_hit_ratio); + + if(d->bcache_filename_stats_hour_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_hour_cache_hit_ratio, &stats_hour_cache_hit_ratio); + + if(d->bcache_filename_stats_day_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_day_cache_hit_ratio, &stats_day_cache_hit_ratio); + + if(d->bcache_filename_stats_total_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_total_cache_hit_ratio, &stats_total_cache_hit_ratio); + + if(d->bcache_filename_stats_total_cache_hits) + read_single_number_file(d->bcache_filename_stats_total_cache_hits, &stats_total_cache_hits); + + if(d->bcache_filename_stats_total_cache_misses) + read_single_number_file(d->bcache_filename_stats_total_cache_misses, &stats_total_cache_misses); + + if(d->bcache_filename_stats_total_cache_miss_collisions) + read_single_number_file(d->bcache_filename_stats_total_cache_miss_collisions, &stats_total_cache_miss_collisions); + + if(d->bcache_filename_stats_total_cache_bypass_hits) + read_single_number_file(d->bcache_filename_stats_total_cache_bypass_hits, &stats_total_cache_bypass_hits); + + if(d->bcache_filename_stats_total_cache_bypass_misses) + read_single_number_file(d->bcache_filename_stats_total_cache_bypass_misses, &stats_total_cache_bypass_misses); + + if(d->bcache_filename_stats_total_cache_readaheads) + cache_readaheads = bcache_read_number_with_units(d->bcache_filename_stats_total_cache_readaheads); + + if(d->bcache_filename_cache_read_races) + read_single_number_file(d->bcache_filename_cache_read_races, &cache_read_races); + + if(d->bcache_filename_cache_io_errors) + read_single_number_file(d->bcache_filename_cache_io_errors, &cache_io_errors); + + if(d->bcache_filename_priority_stats && global_bcache_priority_stats_update_every >= 1) + bcache_read_priority_stats(d, family, global_bcache_priority_stats_update_every, dt); + + // update the charts + + { + if(unlikely(!d->st_bcache_hit_ratio)) { + d->st_bcache_hit_ratio = rrdset_create_localhost( + "disk_bcache_hit_ratio" + , d->chart_id + , d->disk + , family + , "disk.bcache_hit_ratio" + , "BCache Cache Hit Ratio" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_HIT_RATIO + , update_every + , RRDSET_TYPE_LINE + ); + + d->rd_bcache_hit_ratio_5min = rrddim_add(d->st_bcache_hit_ratio, "5min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_hit_ratio_1hour = rrddim_add(d->st_bcache_hit_ratio, "1hour", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_hit_ratio_1day = rrddim_add(d->st_bcache_hit_ratio, "1day", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_hit_ratio_total = rrddim_add(d->st_bcache_hit_ratio, "ever", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_hit_ratio); + } + + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_5min, stats_five_minute_cache_hit_ratio); + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_1hour, stats_hour_cache_hit_ratio); + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_1day, stats_day_cache_hit_ratio); + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_total, stats_total_cache_hit_ratio); + rrdset_done(d->st_bcache_hit_ratio); + } + + { + + if(unlikely(!d->st_bcache_rates)) { + d->st_bcache_rates = rrdset_create_localhost( + "disk_bcache_rates" + , d->chart_id + , d->disk + , family + , "disk.bcache_rates" + , "BCache Rates" + , "KiB/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_RATES + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_bcache_rate_congested = rrddim_add(d->st_bcache_rates, "congested", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_rate_writeback = rrddim_add(d->st_bcache_rates, "writeback", NULL, -1, 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_rates); + } + + rrddim_set_by_pointer(d->st_bcache_rates, d->rd_bcache_rate_writeback, writeback_rate); + rrddim_set_by_pointer(d->st_bcache_rates, d->rd_bcache_rate_congested, cache_congested); + rrdset_done(d->st_bcache_rates); + } + + { + if(unlikely(!d->st_bcache_size)) { + d->st_bcache_size = rrdset_create_localhost( + "disk_bcache_size" + , d->chart_id + , d->disk + , family + , "disk.bcache_size" + , "BCache Cache Sizes" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_SIZE + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_bcache_dirty_size = rrddim_add(d->st_bcache_size, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_size); + } + + rrddim_set_by_pointer(d->st_bcache_size, d->rd_bcache_dirty_size, dirty_data); + rrdset_done(d->st_bcache_size); + } + + { + if(unlikely(!d->st_bcache_usage)) { + d->st_bcache_usage = rrdset_create_localhost( + "disk_bcache_usage" + , d->chart_id + , d->disk + , family + , "disk.bcache_usage" + , "BCache Cache Usage" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_USAGE + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_bcache_available_percent = rrddim_add(d->st_bcache_usage, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_usage); + } + + rrddim_set_by_pointer(d->st_bcache_usage, d->rd_bcache_available_percent, cache_available_percent); + rrdset_done(d->st_bcache_usage); + } + + { + + if(unlikely(!d->st_bcache_cache_read_races)) { + d->st_bcache_cache_read_races = rrdset_create_localhost( + "disk_bcache_cache_read_races" + , d->chart_id + , d->disk + , family + , "disk.bcache_cache_read_races" + , "BCache Cache Read Races" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_CACHE_READ_RACES + , update_every + , RRDSET_TYPE_LINE + ); + + d->rd_bcache_cache_read_races = rrddim_add(d->st_bcache_cache_read_races, "races", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_cache_io_errors = rrddim_add(d->st_bcache_cache_read_races, "errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_bcache_cache_read_races); + } + + rrddim_set_by_pointer(d->st_bcache_cache_read_races, d->rd_bcache_cache_read_races, cache_read_races); + rrddim_set_by_pointer(d->st_bcache_cache_read_races, d->rd_bcache_cache_io_errors, cache_io_errors); + rrdset_done(d->st_bcache_cache_read_races); + } + + if(d->do_bcache == CONFIG_BOOLEAN_YES || (d->do_bcache == CONFIG_BOOLEAN_AUTO && + (stats_total_cache_hits || + stats_total_cache_misses || + stats_total_cache_miss_collisions || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + + if(unlikely(!d->st_bcache)) { + d->st_bcache = rrdset_create_localhost( + "disk_bcache" + , d->chart_id + , d->disk + , family + , "disk.bcache" + , "BCache Cache I/O Operations" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_OPS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_bcache, RRDSET_FLAG_DETAIL); + + d->rd_bcache_hits = rrddim_add(d->st_bcache, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_misses = rrddim_add(d->st_bcache, "misses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_miss_collisions = rrddim_add(d->st_bcache, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_readaheads = rrddim_add(d->st_bcache, "readaheads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_bcache); + } + + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_hits, stats_total_cache_hits); + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_misses, stats_total_cache_misses); + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_miss_collisions, stats_total_cache_miss_collisions); + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_readaheads, cache_readaheads); + rrdset_done(d->st_bcache); + } + + if(d->do_bcache == CONFIG_BOOLEAN_YES || (d->do_bcache == CONFIG_BOOLEAN_AUTO && + (stats_total_cache_bypass_hits || + stats_total_cache_bypass_misses || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + + if(unlikely(!d->st_bcache_bypass)) { + d->st_bcache_bypass = rrdset_create_localhost( + "disk_bcache_bypass" + , d->chart_id + , d->disk + , family + , "disk.bcache_bypass" + , "BCache Cache Bypass I/O Operations" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_BCACHE_BYPASS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_bcache_bypass, RRDSET_FLAG_DETAIL); + + d->rd_bcache_bypass_hits = rrddim_add(d->st_bcache_bypass, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_bypass_misses = rrddim_add(d->st_bcache_bypass, "misses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_bcache_bypass); + } + + rrddim_set_by_pointer(d->st_bcache_bypass, d->rd_bcache_bypass_hits, stats_total_cache_bypass_hits); + rrddim_set_by_pointer(d->st_bcache_bypass, d->rd_bcache_bypass_misses, stats_total_cache_bypass_misses); + rrdset_done(d->st_bcache_bypass); + } + } + } + + // update the system total I/O + + if(global_do_io == CONFIG_BOOLEAN_YES || (global_do_io == CONFIG_BOOLEAN_AUTO && + (system_read_kb || system_write_kb || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + static RRDSET *st_io = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_io)) { + st_io = rrdset_create_localhost( + "system" + , "io" + , NULL + , "disk" + , NULL + , "Disk I/O" + , "KiB/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DISKSTATS_NAME + , NETDATA_CHART_PRIO_SYSTEM_IO + , update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_io, "in", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_io, "out", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_io, rd_in, system_read_kb); + rrddim_set_by_pointer(st_io, rd_out, system_write_kb); + rrdset_done(st_io); + } + + // cleanup removed disks + + struct disk *d = disk_root, *last = NULL; + while(d) { + if(unlikely(global_cleanup_removed_disks && !d->updated)) { + struct disk *t = d; + + rrdset_obsolete_and_pointer_null(d->st_avgsz); + rrdset_obsolete_and_pointer_null(d->st_ext_avgsz); + rrdset_obsolete_and_pointer_null(d->st_await); + rrdset_obsolete_and_pointer_null(d->st_ext_await); + rrdset_obsolete_and_pointer_null(d->st_backlog); + rrdset_obsolete_and_pointer_null(d->st_busy); + rrdset_obsolete_and_pointer_null(d->st_io); + rrdset_obsolete_and_pointer_null(d->st_ext_io); + rrdset_obsolete_and_pointer_null(d->st_iotime); + rrdset_obsolete_and_pointer_null(d->st_ext_iotime); + rrdset_obsolete_and_pointer_null(d->st_mops); + rrdset_obsolete_and_pointer_null(d->st_ext_mops); + rrdset_obsolete_and_pointer_null(d->st_ops); + rrdset_obsolete_and_pointer_null(d->st_ext_ops); + rrdset_obsolete_and_pointer_null(d->st_qops); + rrdset_obsolete_and_pointer_null(d->st_svctm); + rrdset_obsolete_and_pointer_null(d->st_util); + rrdset_obsolete_and_pointer_null(d->st_bcache); + rrdset_obsolete_and_pointer_null(d->st_bcache_bypass); + rrdset_obsolete_and_pointer_null(d->st_bcache_rates); + rrdset_obsolete_and_pointer_null(d->st_bcache_size); + rrdset_obsolete_and_pointer_null(d->st_bcache_usage); + rrdset_obsolete_and_pointer_null(d->st_bcache_hit_ratio); + rrdset_obsolete_and_pointer_null(d->st_bcache_cache_allocations); + rrdset_obsolete_and_pointer_null(d->st_bcache_cache_read_races); + + if(d == disk_root) { + disk_root = d = d->next; + last = NULL; + } + else if(last) { + last->next = d = d->next; + } + + freez(t->bcache_filename_dirty_data); + freez(t->bcache_filename_writeback_rate); + freez(t->bcache_filename_cache_congested); + freez(t->bcache_filename_cache_available_percent); + freez(t->bcache_filename_stats_five_minute_cache_hit_ratio); + freez(t->bcache_filename_stats_hour_cache_hit_ratio); + freez(t->bcache_filename_stats_day_cache_hit_ratio); + freez(t->bcache_filename_stats_total_cache_hit_ratio); + freez(t->bcache_filename_stats_total_cache_hits); + freez(t->bcache_filename_stats_total_cache_misses); + freez(t->bcache_filename_stats_total_cache_miss_collisions); + freez(t->bcache_filename_stats_total_cache_bypass_hits); + freez(t->bcache_filename_stats_total_cache_bypass_misses); + freez(t->bcache_filename_stats_total_cache_readaheads); + freez(t->bcache_filename_cache_read_races); + freez(t->bcache_filename_cache_io_errors); + freez(t->bcache_filename_priority_stats); + + freez(t->disk); + freez(t->device); + freez(t->mount_point); + freez(t->chart_id); + freez(t); + } + else { + d->updated = 0; + last = d; + d = d->next; + } + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_interrupts.c b/collectors/proc.plugin/proc_interrupts.c new file mode 100644 index 0000000..f876847 --- /dev/null +++ b/collectors/proc.plugin/proc_interrupts.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_INTERRUPTS_NAME "/proc/interrupts" +#define CONFIG_SECTION_PLUGIN_PROC_INTERRUPTS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_INTERRUPTS_NAME + +#define MAX_INTERRUPT_NAME 50 + +struct cpu_interrupt { + unsigned long long value; + RRDDIM *rd; +}; + +struct interrupt { + int used; + char *id; + char name[MAX_INTERRUPT_NAME + 1]; + RRDDIM *rd; + unsigned long long total; + struct cpu_interrupt cpu[]; +}; + +// since each interrupt is variable in size +// we use this to calculate its record size +#define recordsize(cpus) (sizeof(struct interrupt) + ((cpus) * sizeof(struct cpu_interrupt))) + +// given a base, get a pointer to each record +#define irrindex(base, line, cpus) ((struct interrupt *)&((char *)(base))[(line) * recordsize(cpus)]) + +static inline struct interrupt *get_interrupts_array(size_t lines, int cpus) { + static struct interrupt *irrs = NULL; + static size_t allocated = 0; + + if(unlikely(lines != allocated)) { + size_t l; + int c; + + irrs = (struct interrupt *)reallocz(irrs, lines * recordsize(cpus)); + + // reset all interrupt RRDDIM pointers as any line could have shifted + for(l = 0; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + irr->rd = NULL; + irr->name[0] = '\0'; + for(c = 0; c < cpus ;c++) + irr->cpu[c].rd = NULL; + } + + allocated = lines; + } + + return irrs; +} + +int do_proc_interrupts(int update_every, usec_t dt) { + (void)dt; + static procfile *ff = NULL; + static int cpus = -1, do_per_core = CONFIG_BOOLEAN_INVALID; + struct interrupt *irrs = NULL; + + if(unlikely(do_per_core == CONFIG_BOOLEAN_INVALID)) + do_per_core = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_INTERRUPTS, "interrupts per core", CONFIG_BOOLEAN_AUTO); + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/interrupts"); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_INTERRUPTS, "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + } + if(unlikely(!ff)) + return 1; + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + size_t words = procfile_linewords(ff, 0); + + if(unlikely(!lines)) { + error("Cannot read /proc/interrupts, zero lines reported."); + return 1; + } + + // find how many CPUs are there + if(unlikely(cpus == -1)) { + uint32_t w; + cpus = 0; + for(w = 0; w < words ; w++) { + if(likely(strncmp(procfile_lineword(ff, 0, w), "CPU", 3) == 0)) + cpus++; + } + } + + if(unlikely(!cpus)) { + error("PLUGIN: PROC_INTERRUPTS: Cannot find the number of CPUs in /proc/interrupts"); + return 1; + } + + // allocate the size we need; + irrs = get_interrupts_array(lines, cpus); + irrs[0].used = 0; + + // loop through all lines + for(l = 1; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + irr->used = 0; + irr->total = 0; + + words = procfile_linewords(ff, l); + if(unlikely(!words)) continue; + + irr->id = procfile_lineword(ff, l, 0); + if(unlikely(!irr->id || !irr->id[0])) continue; + + size_t idlen = strlen(irr->id); + if(irr->id[idlen - 1] == ':') + irr->id[--idlen] = '\0'; + + int c; + for(c = 0; c < cpus ;c++) { + if(likely((c + 1) < (int)words)) + irr->cpu[c].value = str2ull(procfile_lineword(ff, l, (uint32_t)(c + 1))); + else + irr->cpu[c].value = 0; + + irr->total += irr->cpu[c].value; + } + + if(unlikely(isdigit(irr->id[0]) && (uint32_t)(cpus + 2) < words)) { + strncpyz(irr->name, procfile_lineword(ff, l, words - 1), MAX_INTERRUPT_NAME); + size_t nlen = strlen(irr->name); + if(likely(nlen + 1 + idlen <= MAX_INTERRUPT_NAME)) { + irr->name[nlen] = '_'; + strncpyz(&irr->name[nlen + 1], irr->id, MAX_INTERRUPT_NAME - nlen - 1); + } + else { + irr->name[MAX_INTERRUPT_NAME - idlen - 1] = '_'; + strncpyz(&irr->name[MAX_INTERRUPT_NAME - idlen], irr->id, idlen); + } + } + else { + strncpyz(irr->name, irr->id, MAX_INTERRUPT_NAME); + } + + irr->used = 1; + } + + static RRDSET *st_system_interrupts = NULL; + if(unlikely(!st_system_interrupts)) + st_system_interrupts = rrdset_create_localhost( + "system" + , "interrupts" + , NULL + , "interrupts" + , NULL + , "System interrupts" + , "interrupts/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_INTERRUPTS_NAME + , NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS + , update_every + , RRDSET_TYPE_STACKED + ); + + for(l = 0; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + if(irr->used && irr->total) { + // some interrupt may have changed without changing the total number of lines + // if the same number of interrupts have been added and removed between two + // calls of this function. + if(unlikely(!irr->rd || strncmp(rrddim_name(irr->rd), irr->name, MAX_INTERRUPT_NAME) != 0)) { + irr->rd = rrddim_add(st_system_interrupts, irr->id, irr->name, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_reset_name(st_system_interrupts, irr->rd, irr->name); + + // also reset per cpu RRDDIMs to avoid repeating strncmp() in the per core loop + if(likely(do_per_core != CONFIG_BOOLEAN_NO)) { + int c; + for(c = 0; c < cpus; c++) irr->cpu[c].rd = NULL; + } + } + + rrddim_set_by_pointer(st_system_interrupts, irr->rd, irr->total); + } + } + + rrdset_done(st_system_interrupts); + + if(likely(do_per_core != CONFIG_BOOLEAN_NO)) { + static RRDSET **core_st = NULL; + static int old_cpus = 0; + + if(old_cpus < cpus) { + core_st = reallocz(core_st, sizeof(RRDSET *) * cpus); + memset(&core_st[old_cpus], 0, sizeof(RRDSET *) * (cpus - old_cpus)); + old_cpus = cpus; + } + + int c; + + for(c = 0; c < cpus ;c++) { + if(unlikely(!core_st[c])) { + char id[50+1]; + snprintfz(id, 50, "cpu%d_interrupts", c); + + char title[100+1]; + snprintfz(title, 100, "CPU Interrupts"); + core_st[c] = rrdset_create_localhost( + "cpu" + , id + , NULL + , "interrupts" + , "cpu.interrupts" + , title + , "interrupts/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_INTERRUPTS_NAME + , NETDATA_CHART_PRIO_INTERRUPTS_PER_CORE + c + , update_every + , RRDSET_TYPE_STACKED + ); + + char core[50+1]; + snprintfz(core, 50, "cpu%d", c); + rrdlabels_add(core_st[c]->rrdlabels, "cpu", core, RRDLABEL_SRC_AUTO); + } + + for(l = 0; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + if(irr->used && (do_per_core == CONFIG_BOOLEAN_YES || irr->cpu[c].value)) { + if(unlikely(!irr->cpu[c].rd)) { + irr->cpu[c].rd = rrddim_add(core_st[c], irr->id, irr->name, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_reset_name(core_st[c], irr->cpu[c].rd, irr->name); + } + + rrddim_set_by_pointer(core_st[c], irr->cpu[c].rd, irr->cpu[c].value); + } + } + + rrdset_done(core_st[c]); + } + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_loadavg.c b/collectors/proc.plugin/proc_loadavg.c new file mode 100644 index 0000000..d928c86 --- /dev/null +++ b/collectors/proc.plugin/proc_loadavg.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_LOADAVG_NAME "/proc/loadavg" +#define CONFIG_SECTION_PLUGIN_PROC_LOADAVG "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_LOADAVG_NAME + +// linux calculates this once every 5 seconds +#define MIN_LOADAVG_UPDATE_EVERY 5 + +int do_proc_loadavg(int update_every, usec_t dt) { + static procfile *ff = NULL; + static int do_loadavg = -1, do_all_processes = -1; + static usec_t next_loadavg_dt = 0; + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/loadavg"); + + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_LOADAVG, "filename to monitor", filename), " \t,:|/", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) + return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + if(unlikely(do_loadavg == -1)) { + do_loadavg = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_LOADAVG, "enable load average", 1); + do_all_processes = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_LOADAVG, "enable total processes", 1); + } + + if(unlikely(procfile_lines(ff) < 1)) { + error("/proc/loadavg has no lines."); + return 1; + } + if(unlikely(procfile_linewords(ff, 0) < 6)) { + error("/proc/loadavg has less than 6 words in it."); + return 1; + } + + double load1 = strtod(procfile_lineword(ff, 0, 0), NULL); + double load5 = strtod(procfile_lineword(ff, 0, 1), NULL); + double load15 = strtod(procfile_lineword(ff, 0, 2), NULL); + + //unsigned long long running_processes = str2ull(procfile_lineword(ff, 0, 3)); + unsigned long long active_processes = str2ull(procfile_lineword(ff, 0, 4)); + + //get system pid_max + unsigned long long max_processes = get_system_pid_max(); + // + //unsigned long long next_pid = str2ull(procfile_lineword(ff, 0, 5)); + + if(next_loadavg_dt <= dt) { + if(likely(do_loadavg)) { + static RRDSET *load_chart = NULL; + static RRDDIM *rd_load1 = NULL, *rd_load5 = NULL, *rd_load15 = NULL; + + if(unlikely(!load_chart)) { + load_chart = rrdset_create_localhost( + "system" + , "load" + , NULL + , "load" + , NULL + , "System Load Average" + , "load" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_LOADAVG_NAME + , NETDATA_CHART_PRIO_SYSTEM_LOAD + , (update_every < MIN_LOADAVG_UPDATE_EVERY) ? MIN_LOADAVG_UPDATE_EVERY : update_every + , RRDSET_TYPE_LINE + ); + + rd_load1 = rrddim_add(load_chart, "load1", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_load5 = rrddim_add(load_chart, "load5", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_load15 = rrddim_add(load_chart, "load15", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(load_chart, rd_load1, (collected_number) (load1 * 1000)); + rrddim_set_by_pointer(load_chart, rd_load5, (collected_number) (load5 * 1000)); + rrddim_set_by_pointer(load_chart, rd_load15, (collected_number) (load15 * 1000)); + rrdset_done(load_chart); + + next_loadavg_dt = load_chart->update_every * USEC_PER_SEC; + } + else + next_loadavg_dt = MIN_LOADAVG_UPDATE_EVERY * USEC_PER_SEC; + } + else + next_loadavg_dt -= dt; + + + if(likely(do_all_processes)) { + static RRDSET *processes_chart = NULL; + static RRDDIM *rd_active = NULL; + static const RRDSETVAR_ACQUIRED *rd_pidmax; + + if(unlikely(!processes_chart)) { + processes_chart = rrdset_create_localhost( + "system" + , "active_processes" + , NULL + , "processes" + , NULL + , "System Active Processes" + , "processes" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_LOADAVG_NAME + , NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES + , update_every + , RRDSET_TYPE_LINE + ); + + rd_active = rrddim_add(processes_chart, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pidmax = rrdsetvar_custom_chart_variable_add_and_acquire(processes_chart, "pidmax"); + } + + rrddim_set_by_pointer(processes_chart, rd_active, active_processes); + rrdsetvar_custom_chart_variable_set(processes_chart, rd_pidmax, max_processes); + rrdset_done(processes_chart); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c new file mode 100644 index 0000000..63e0c68 --- /dev/null +++ b/collectors/proc.plugin/proc_mdstat.c @@ -0,0 +1,640 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_MDSTAT_NAME "/proc/mdstat" + +struct raid { + int redundant; + char *name; + uint32_t hash; + char *level; + + RRDDIM *rd_health; + unsigned long long failed_disks; + + RRDSET *st_disks; + RRDDIM *rd_down; + RRDDIM *rd_inuse; + unsigned long long total_disks; + unsigned long long inuse_disks; + + RRDSET *st_operation; + RRDDIM *rd_check; + RRDDIM *rd_resync; + RRDDIM *rd_recovery; + RRDDIM *rd_reshape; + unsigned long long check; + unsigned long long resync; + unsigned long long recovery; + unsigned long long reshape; + + RRDSET *st_finish; + RRDDIM *rd_finish_in; + unsigned long long finish_in; + + RRDSET *st_speed; + RRDDIM *rd_speed; + unsigned long long speed; + + char *mismatch_cnt_filename; + RRDSET *st_mismatch_cnt; + RRDDIM *rd_mismatch_cnt; + unsigned long long mismatch_cnt; + + RRDSET *st_nonredundant; + RRDDIM *rd_nonredundant; +}; + +struct old_raid { + int redundant; + char *name; + uint32_t hash; + int found; +}; + +static inline char *remove_trailing_chars(char *s, char c) +{ + while (*s) { + if (unlikely(*s == c)) { + *s = '\0'; + } + s++; + } + return s; +} + +static inline void make_chart_obsolete(char *name, const char *id_modifier) +{ + char id[50 + 1]; + RRDSET *st = NULL; + + if (likely(name && id_modifier)) { + snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier); + st = rrdset_find_active_byname_localhost(id); + if (likely(st)) + rrdset_is_obsolete(st); + } +} + +static void add_labels_to_mdstat(struct raid *raid, RRDSET *st) { + rrdlabels_add(st->rrdlabels, "device", raid->name, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->rrdlabels, "raid_level", raid->level, RRDLABEL_SRC_AUTO); +} + +int do_proc_mdstat(int update_every, usec_t dt) +{ + (void)dt; + static procfile *ff = NULL; + static int do_health = -1, do_nonredundant = -1, do_disks = -1, do_operations = -1, do_mismatch = -1, + do_mismatch_config = -1; + static int make_charts_obsolete = -1; + static char *mdstat_filename = NULL, *mismatch_cnt_filename = NULL; + static struct raid *raids = NULL; + static size_t raids_allocated = 0; + size_t raids_num = 0, raid_idx = 0, redundant_num = 0; + static struct old_raid *old_raids = NULL; + static size_t old_raids_allocated = 0; + size_t old_raid_idx = 0; + + if (unlikely(do_health == -1)) { + do_health = + config_get_boolean("plugin:proc:/proc/mdstat", "faulty devices", CONFIG_BOOLEAN_YES); + do_nonredundant = + config_get_boolean("plugin:proc:/proc/mdstat", "nonredundant arrays availability", CONFIG_BOOLEAN_YES); + do_mismatch_config = + config_get_boolean_ondemand("plugin:proc:/proc/mdstat", "mismatch count", CONFIG_BOOLEAN_AUTO); + do_disks = + config_get_boolean("plugin:proc:/proc/mdstat", "disk stats", CONFIG_BOOLEAN_YES); + do_operations = + config_get_boolean("plugin:proc:/proc/mdstat", "operation status", CONFIG_BOOLEAN_YES); + + make_charts_obsolete = + config_get_boolean("plugin:proc:/proc/mdstat", "make charts obsolete", CONFIG_BOOLEAN_YES); + + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/mdstat"); + mdstat_filename = config_get("plugin:proc:/proc/mdstat", "filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/md/mismatch_cnt"); + mismatch_cnt_filename = config_get("plugin:proc:/proc/mdstat", "mismatch_cnt filename to monitor", filename); + } + + if (unlikely(!ff)) { + ff = procfile_open(mdstat_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) + return 0; // we return 0, so that we will retry opening it next time + + size_t lines = procfile_lines(ff); + size_t words = 0; + + if (unlikely(lines < 2)) { + error("Cannot read /proc/mdstat. Expected 2 or more lines, read %zu.", lines); + return 1; + } + + // find how many raids are there + size_t l; + raids_num = 0; + for (l = 1; l < lines - 2; l++) { + if (unlikely(procfile_lineword(ff, l, 1)[0] == 'a')) // check if the raid is active + raids_num++; + } + + if (unlikely(!raids_num && !old_raids_allocated)) + return 0; // we return 0, so that we will retry searching for raids next time + + // allocate the memory we need; + if (unlikely(raids_num != raids_allocated)) { + for (raid_idx = 0; raid_idx < raids_allocated; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + freez(raid->name); + freez(raid->level); + freez(raid->mismatch_cnt_filename); + } + if (raids_num) { + raids = (struct raid *)reallocz(raids, raids_num * sizeof(struct raid)); + memset(raids, 0, raids_num * sizeof(struct raid)); + } else { + freez(raids); + raids = NULL; + } + raids_allocated = raids_num; + } + + // loop through all lines except the first and the last ones + for (l = 1, raid_idx = 0; l < (lines - 2) && raid_idx < raids_num; l++) { + struct raid *raid = &raids[raid_idx]; + raid->redundant = 0; + + words = procfile_linewords(ff, l); + + if (unlikely(words < 3)) + continue; + + if (unlikely(procfile_lineword(ff, l, 1)[0] != 'a')) + continue; + + if (unlikely(!raid->name)) { + raid->name = strdupz(procfile_lineword(ff, l, 0)); + raid->hash = simple_hash(raid->name); + raid->level = strdupz(procfile_lineword(ff, l, 2)); + } else if (unlikely(strcmp(raid->name, procfile_lineword(ff, l, 0)))) { + freez(raid->name); + freez(raid->mismatch_cnt_filename); + freez(raid->level); + memset(raid, 0, sizeof(struct raid)); + raid->name = strdupz(procfile_lineword(ff, l, 0)); + raid->hash = simple_hash(raid->name); + raid->level = strdupz(procfile_lineword(ff, l, 2)); + } + + if (unlikely(!raid->name || !raid->name[0])) + continue; + + raid_idx++; + + // check if raid has disk status + l++; + words = procfile_linewords(ff, l); + if (words < 2 || procfile_lineword(ff, l, words - 1)[0] != '[') + continue; + + // split inuse and total number of disks + if (likely(do_health || do_disks)) { + char *s = NULL, *str_total = NULL, *str_inuse = NULL; + + s = procfile_lineword(ff, l, words - 2); + if (unlikely(s[0] != '[')) { + error("Cannot read /proc/mdstat raid health status. Unexpected format: missing opening bracket."); + continue; + } + str_total = ++s; + while (*s) { + if (unlikely(*s == '/')) { + *s = '\0'; + str_inuse = s + 1; + } else if (unlikely(*s == ']')) { + *s = '\0'; + break; + } + s++; + } + if (unlikely(str_total[0] == '\0' || !str_inuse || str_inuse[0] == '\0')) { + error("Cannot read /proc/mdstat raid health status. Unexpected format."); + continue; + } + + raid->inuse_disks = str2ull(str_inuse); + raid->total_disks = str2ull(str_total); + raid->failed_disks = raid->total_disks - raid->inuse_disks; + } + + raid->redundant = 1; + redundant_num++; + l++; + + // check if any operation is performed on the raid + if (likely(do_operations)) { + char *s = NULL; + + raid->check = 0; + raid->resync = 0; + raid->recovery = 0; + raid->reshape = 0; + raid->finish_in = 0; + raid->speed = 0; + + words = procfile_linewords(ff, l); + + if (likely(words < 2)) + continue; + + if (unlikely(procfile_lineword(ff, l, 0)[0] != '[')) + continue; + + if (unlikely(words < 7)) { + error("Cannot read /proc/mdstat line. Expected 7 params, read %zu.", words); + continue; + } + + char *word; + word = procfile_lineword(ff, l, 3); + remove_trailing_chars(word, '%'); + + unsigned long long percentage = (unsigned long long)(str2ndd(word, NULL) * 100); + // possible operations: check, resync, recovery, reshape + // 4-th character is unique for each operation so it is checked + switch (procfile_lineword(ff, l, 1)[3]) { + case 'c': // check + raid->check = percentage; + break; + case 'y': // resync + raid->resync = percentage; + break; + case 'o': // recovery + raid->recovery = percentage; + break; + case 'h': // reshape + raid->reshape = percentage; + break; + } + + word = procfile_lineword(ff, l, 5); + s = remove_trailing_chars(word, 'm'); // remove trailing "min" + + word += 7; // skip leading "finish=" + + if (likely(s > word)) + raid->finish_in = (unsigned long long)(str2ndd(word, NULL) * 60); + + word = procfile_lineword(ff, l, 6); + s = remove_trailing_chars(word, 'K'); // remove trailing "K/sec" + + word += 6; // skip leading "speed=" + + if (likely(s > word)) + raid->speed = str2ull(word); + } + } + + // read mismatch_cnt files + if (do_mismatch == -1) { + if (do_mismatch_config == CONFIG_BOOLEAN_AUTO) { + if (raids_num > 50) + do_mismatch = CONFIG_BOOLEAN_NO; + else + do_mismatch = CONFIG_BOOLEAN_YES; + } else + do_mismatch = do_mismatch_config; + } + + if (likely(do_mismatch)) { + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + char filename[FILENAME_MAX + 1]; + struct raid *raid = &raids[raid_idx]; + + if (likely(raid->redundant)) { + if (unlikely(!raid->mismatch_cnt_filename)) { + snprintfz(filename, FILENAME_MAX, mismatch_cnt_filename, raid->name); + raid->mismatch_cnt_filename = strdupz(filename); + } + if (unlikely(read_single_number_file(raid->mismatch_cnt_filename, &raid->mismatch_cnt))) { + error("Cannot read file '%s'", raid->mismatch_cnt_filename); + do_mismatch = CONFIG_BOOLEAN_NO; + error("Monitoring for mismatch count has been disabled"); + break; + } + } + } + } + + // check for disappeared raids + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + struct old_raid *old_raid = &old_raids[old_raid_idx]; + int found = 0; + + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + + if (unlikely( + raid->hash == old_raid->hash && !strcmp(raid->name, old_raid->name) && + raid->redundant == old_raid->redundant)) + found = 1; + } + + old_raid->found = found; + } + + int raid_disappeared = 0; + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + struct old_raid *old_raid = &old_raids[old_raid_idx]; + + if (unlikely(!old_raid->found)) { + if (likely(make_charts_obsolete)) { + make_chart_obsolete(old_raid->name, "disks"); + make_chart_obsolete(old_raid->name, "mismatch"); + make_chart_obsolete(old_raid->name, "operation"); + make_chart_obsolete(old_raid->name, "finish"); + make_chart_obsolete(old_raid->name, "speed"); + make_chart_obsolete(old_raid->name, "availability"); + } + raid_disappeared = 1; + } + } + + // allocate memory for nonredundant arrays + if (unlikely(raid_disappeared || old_raids_allocated != raids_num)) { + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + freez(old_raids[old_raid_idx].name); + } + if (likely(raids_num)) { + old_raids = reallocz(old_raids, sizeof(struct old_raid) * raids_num); + memset(old_raids, 0, sizeof(struct old_raid) * raids_num); + } else { + freez(old_raids); + old_raids = NULL; + } + old_raids_allocated = raids_num; + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + struct old_raid *old_raid = &old_raids[old_raid_idx]; + struct raid *raid = &raids[old_raid_idx]; + + old_raid->name = strdupz(raid->name); + old_raid->hash = raid->hash; + old_raid->redundant = raid->redundant; + } + } + + if (likely(do_health && redundant_num)) { + static RRDSET *st_mdstat_health = NULL; + if (unlikely(!st_mdstat_health)) { + st_mdstat_health = rrdset_create_localhost( + "mdstat", + "mdstat_health", + NULL, + "health", + "md.health", + "Faulty Devices In MD", + "failed disks", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_HEALTH, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(st_mdstat_health); + } + + if (!redundant_num) { + if (likely(make_charts_obsolete)) + make_chart_obsolete("mdstat", "health"); + } else { + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + + if (likely(raid->redundant)) { + if (unlikely(!raid->rd_health && !(raid->rd_health = rrddim_find_active(st_mdstat_health, raid->name)))) + raid->rd_health = rrddim_add(st_mdstat_health, raid->name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mdstat_health, raid->rd_health, raid->failed_disks); + } + } + + rrdset_done(st_mdstat_health); + } + } + + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + char id[50 + 1]; + char family[50 + 1]; + + if (likely(raid->redundant)) { + if (likely(do_disks)) { + snprintfz(id, 50, "%s_disks", raid->name); + + if (unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + + raid->st_disks = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.disks", + "Disks Stats", + "disks", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_DISKS + raid_idx * 10, + update_every, + RRDSET_TYPE_STACKED); + + rrdset_isnot_obsolete(raid->st_disks); + + add_labels_to_mdstat(raid, raid->st_disks); + } + + if (unlikely(!raid->rd_inuse && !(raid->rd_inuse = rrddim_find_active(raid->st_disks, "inuse")))) + raid->rd_inuse = rrddim_add(raid->st_disks, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + if (unlikely(!raid->rd_down && !(raid->rd_down = rrddim_find_active(raid->st_disks, "down")))) + raid->rd_down = rrddim_add(raid->st_disks, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_disks, raid->rd_inuse, raid->inuse_disks); + rrddim_set_by_pointer(raid->st_disks, raid->rd_down, raid->failed_disks); + rrdset_done(raid->st_disks); + } + + if (likely(do_mismatch)) { + snprintfz(id, 50, "%s_mismatch", raid->name); + + if (unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + + raid->st_mismatch_cnt = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.mismatch_cnt", + "Mismatch Count", + "unsynchronized blocks", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_MISMATCH + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_mismatch_cnt); + + add_labels_to_mdstat(raid, raid->st_mismatch_cnt); + } + + if (unlikely(!raid->rd_mismatch_cnt && !(raid->rd_mismatch_cnt = rrddim_find_active(raid->st_mismatch_cnt, "count")))) + raid->rd_mismatch_cnt = rrddim_add(raid->st_mismatch_cnt, "count", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_mismatch_cnt, raid->rd_mismatch_cnt, raid->mismatch_cnt); + rrdset_done(raid->st_mismatch_cnt); + } + + if (likely(do_operations)) { + snprintfz(id, 50, "%s_operation", raid->name); + + if (unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + + raid->st_operation = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.status", + "Current Status", + "percent", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_OPERATION + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_operation); + + add_labels_to_mdstat(raid, raid->st_operation); + } + + if(unlikely(!raid->rd_check && !(raid->rd_check = rrddim_find_active(raid->st_operation, "check")))) + raid->rd_check = rrddim_add(raid->st_operation, "check", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if(unlikely(!raid->rd_resync && !(raid->rd_resync = rrddim_find_active(raid->st_operation, "resync")))) + raid->rd_resync = rrddim_add(raid->st_operation, "resync", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if(unlikely(!raid->rd_recovery && !(raid->rd_recovery = rrddim_find_active(raid->st_operation, "recovery")))) + raid->rd_recovery = rrddim_add(raid->st_operation, "recovery", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if(unlikely(!raid->rd_reshape && !(raid->rd_reshape = rrddim_find_active(raid->st_operation, "reshape")))) + raid->rd_reshape = rrddim_add(raid->st_operation, "reshape", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_operation, raid->rd_check, raid->check); + rrddim_set_by_pointer(raid->st_operation, raid->rd_resync, raid->resync); + rrddim_set_by_pointer(raid->st_operation, raid->rd_recovery, raid->recovery); + rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape); + rrdset_done(raid->st_operation); + + snprintfz(id, 50, "%s_finish", raid->name); + if (unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + + raid->st_finish = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.expected_time_until_operation_finish", + "Approximate Time Until Finish", + "seconds", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10, + update_every, RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_finish); + + add_labels_to_mdstat(raid, raid->st_finish); + } + + if(unlikely(!raid->rd_finish_in && !(raid->rd_finish_in = rrddim_find_active(raid->st_finish, "finish_in")))) + raid->rd_finish_in = rrddim_add(raid->st_finish, "finish_in", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in); + rrdset_done(raid->st_finish); + + snprintfz(id, 50, "%s_speed", raid->name); + if (unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + + raid->st_speed = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.operation_speed", + "Operation Speed", + "KiB/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_SPEED + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_speed); + + add_labels_to_mdstat(raid, raid->st_speed); + } + + if (unlikely(!raid->rd_speed && !(raid->rd_speed = rrddim_find_active(raid->st_speed, "speed")))) + raid->rd_speed = rrddim_add(raid->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_speed, raid->rd_speed, raid->speed); + rrdset_done(raid->st_speed); + } + } else { + if (likely(do_nonredundant)) { + snprintfz(id, 50, "%s_availability", raid->name); + + if (unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_active_localhost(id)))) { + snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + + raid->st_nonredundant = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.nonredundant", + "Nonredundant Array Availability", + "boolean", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_nonredundant); + + add_labels_to_mdstat(raid, raid->st_nonredundant); + } + + if (unlikely(!raid->rd_nonredundant && !(raid->rd_nonredundant = rrddim_find_active(raid->st_nonredundant, "available")))) + raid->rd_nonredundant = rrddim_add(raid->st_nonredundant, "available", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_nonredundant, raid->rd_nonredundant, 1); + rrdset_done(raid->st_nonredundant); + } + } + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_meminfo.c b/collectors/proc.plugin/proc_meminfo.c new file mode 100644 index 0000000..2f390c6 --- /dev/null +++ b/collectors/proc.plugin/proc_meminfo.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_MEMINFO_NAME "/proc/meminfo" +#define CONFIG_SECTION_PLUGIN_PROC_MEMINFO "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_MEMINFO_NAME + +int do_proc_meminfo(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1; + static int do_percpu = 0; + + static ARL_BASE *arl_base = NULL; + static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL; + + static unsigned long long + MemTotal = 0, + MemFree = 0, + MemAvailable = 0, + Buffers = 0, + Cached = 0, + //SwapCached = 0, + //Active = 0, + //Inactive = 0, + //ActiveAnon = 0, + //InactiveAnon = 0, + //ActiveFile = 0, + //InactiveFile = 0, + //Unevictable = 0, + //Mlocked = 0, + SwapTotal = 0, + SwapFree = 0, + Dirty = 0, + Writeback = 0, + //AnonPages = 0, + //Mapped = 0, + Shmem = 0, + Slab = 0, + SReclaimable = 0, + SUnreclaim = 0, + KernelStack = 0, + PageTables = 0, + NFS_Unstable = 0, + Bounce = 0, + WritebackTmp = 0, + //CommitLimit = 0, + Committed_AS = 0, + //VmallocTotal = 0, + VmallocUsed = 0, + //VmallocChunk = 0, + Percpu = 0, + AnonHugePages = 0, + ShmemHugePages = 0, + HugePages_Total = 0, + HugePages_Free = 0, + HugePages_Rsvd = 0, + HugePages_Surp = 0, + Hugepagesize = 0, + //DirectMap4k = 0, + //DirectMap2M = 0, + HardwareCorrupted = 0; + + if(unlikely(!arl_base)) { + do_ram = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "system ram", 1); + do_swap = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "system swap", CONFIG_BOOLEAN_AUTO); + do_hwcorrupt = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "hardware corrupted ECC", CONFIG_BOOLEAN_AUTO); + do_committed = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "committed memory", 1); + do_writeback = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "writeback memory", 1); + do_kernel = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "kernel memory", 1); + do_slab = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "slab memory", 1); + do_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "hugepages", CONFIG_BOOLEAN_AUTO); + do_transparent_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "transparent hugepages", CONFIG_BOOLEAN_AUTO); + + arl_base = arl_create("meminfo", NULL, 60); + arl_expect(arl_base, "MemTotal", &MemTotal); + arl_expect(arl_base, "MemFree", &MemFree); + arl_memavailable = arl_expect(arl_base, "MemAvailable", &MemAvailable); + arl_expect(arl_base, "Buffers", &Buffers); + arl_expect(arl_base, "Cached", &Cached); + //arl_expect(arl_base, "SwapCached", &SwapCached); + //arl_expect(arl_base, "Active", &Active); + //arl_expect(arl_base, "Inactive", &Inactive); + //arl_expect(arl_base, "ActiveAnon", &ActiveAnon); + //arl_expect(arl_base, "InactiveAnon", &InactiveAnon); + //arl_expect(arl_base, "ActiveFile", &ActiveFile); + //arl_expect(arl_base, "InactiveFile", &InactiveFile); + //arl_expect(arl_base, "Unevictable", &Unevictable); + //arl_expect(arl_base, "Mlocked", &Mlocked); + arl_expect(arl_base, "SwapTotal", &SwapTotal); + arl_expect(arl_base, "SwapFree", &SwapFree); + arl_expect(arl_base, "Dirty", &Dirty); + arl_expect(arl_base, "Writeback", &Writeback); + //arl_expect(arl_base, "AnonPages", &AnonPages); + //arl_expect(arl_base, "Mapped", &Mapped); + arl_expect(arl_base, "Shmem", &Shmem); + arl_expect(arl_base, "Slab", &Slab); + arl_expect(arl_base, "SReclaimable", &SReclaimable); + arl_expect(arl_base, "SUnreclaim", &SUnreclaim); + arl_expect(arl_base, "KernelStack", &KernelStack); + arl_expect(arl_base, "PageTables", &PageTables); + arl_expect(arl_base, "NFS_Unstable", &NFS_Unstable); + arl_expect(arl_base, "Bounce", &Bounce); + arl_expect(arl_base, "WritebackTmp", &WritebackTmp); + //arl_expect(arl_base, "CommitLimit", &CommitLimit); + arl_expect(arl_base, "Committed_AS", &Committed_AS); + //arl_expect(arl_base, "VmallocTotal", &VmallocTotal); + arl_expect(arl_base, "VmallocUsed", &VmallocUsed); + //arl_expect(arl_base, "VmallocChunk", &VmallocChunk); + arl_expect(arl_base, "Percpu", &Percpu); + arl_hwcorrupted = arl_expect(arl_base, "HardwareCorrupted", &HardwareCorrupted); + arl_expect(arl_base, "AnonHugePages", &AnonHugePages); + arl_expect(arl_base, "ShmemHugePages", &ShmemHugePages); + arl_expect(arl_base, "HugePages_Total", &HugePages_Total); + arl_expect(arl_base, "HugePages_Free", &HugePages_Free); + arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd); + arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp); + arl_expect(arl_base, "Hugepagesize", &Hugepagesize); + //arl_expect(arl_base, "DirectMap4k", &DirectMap4k); + //arl_expect(arl_base, "DirectMap2M", &DirectMap2M); + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo"); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) + return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + arl_begin(arl_base); + + static int first_ff_read = 1; + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 2)) continue; + + if (first_ff_read && !strcmp(procfile_lineword(ff, l, 0), "Percpu")) + do_percpu = 1; + + if(unlikely(arl_check(arl_base, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; + } + + if (first_ff_read) + first_ff_read = 0; + + // http://calimeroteknik.free.fr/blag/?article20/really-used-memory-on-gnu-linux + unsigned long long MemCached = Cached + SReclaimable - Shmem; + unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers; + // The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory) + MemCached += (zfs_arcstats_shrinkable_cache_size_bytes / 1024); + MemUsed -= (zfs_arcstats_shrinkable_cache_size_bytes / 1024); + MemAvailable += (zfs_arcstats_shrinkable_cache_size_bytes / 1024); + + if(do_ram) { + { + static RRDSET *st_system_ram = NULL; + static RRDDIM *rd_free = NULL, *rd_used = NULL, *rd_cached = NULL, *rd_buffers = NULL; + + if(unlikely(!st_system_ram)) { + st_system_ram = rrdset_create_localhost( + "system" + , "ram" + , NULL + , "ram" + , NULL + , "System RAM" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_SYSTEM_RAM + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_free = rrddim_add(st_system_ram, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st_system_ram, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_cached = rrddim_add(st_system_ram, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_buffers = rrddim_add(st_system_ram, "buffers", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_system_ram, rd_free, MemFree); + rrddim_set_by_pointer(st_system_ram, rd_used, MemUsed); + rrddim_set_by_pointer(st_system_ram, rd_cached, MemCached); + rrddim_set_by_pointer(st_system_ram, rd_buffers, Buffers); + rrdset_done(st_system_ram); + } + + if(arl_memavailable->flags & ARL_ENTRY_FLAG_FOUND) { + static RRDSET *st_mem_available = NULL; + static RRDDIM *rd_avail = NULL; + + if(unlikely(!st_mem_available)) { + st_mem_available = rrdset_create_localhost( + "mem" + , "available" + , NULL + , "system" + , NULL + , "Available RAM for applications" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE + , update_every + , RRDSET_TYPE_AREA + ); + + rd_avail = rrddim_add(st_mem_available, "MemAvailable", "avail", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_available, rd_avail, MemAvailable); + rrdset_done(st_mem_available); + } + } + + unsigned long long SwapUsed = SwapTotal - SwapFree; + + if(do_swap == CONFIG_BOOLEAN_YES || (do_swap == CONFIG_BOOLEAN_AUTO && + (SwapTotal || SwapUsed || SwapFree || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_swap = CONFIG_BOOLEAN_YES; + + static RRDSET *st_system_swap = NULL; + static RRDDIM *rd_free = NULL, *rd_used = NULL; + + if(unlikely(!st_system_swap)) { + st_system_swap = rrdset_create_localhost( + "system" + , "swap" + , NULL + , "swap" + , NULL + , "System Swap" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_SYSTEM_SWAP + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_system_swap, RRDSET_FLAG_DETAIL); + + rd_free = rrddim_add(st_system_swap, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st_system_swap, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_system_swap, rd_used, SwapUsed); + rrddim_set_by_pointer(st_system_swap, rd_free, SwapFree); + rrdset_done(st_system_swap); + } + + if(arl_hwcorrupted->flags & ARL_ENTRY_FLAG_FOUND && + (do_hwcorrupt == CONFIG_BOOLEAN_YES || (do_hwcorrupt == CONFIG_BOOLEAN_AUTO && + (HardwareCorrupted > 0 || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) { + do_hwcorrupt = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mem_hwcorrupt = NULL; + static RRDDIM *rd_corrupted = NULL; + + if(unlikely(!st_mem_hwcorrupt)) { + st_mem_hwcorrupt = rrdset_create_localhost( + "mem" + , "hwcorrupt" + , NULL + , "ecc" + , NULL + , "Corrupted Memory, detected by ECC" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_HW + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_mem_hwcorrupt, RRDSET_FLAG_DETAIL); + + rd_corrupted = rrddim_add(st_mem_hwcorrupt, "HardwareCorrupted", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_hwcorrupt, rd_corrupted, HardwareCorrupted); + rrdset_done(st_mem_hwcorrupt); + } + + if(do_committed) { + static RRDSET *st_mem_committed = NULL; + static RRDDIM *rd_committed = NULL; + + if(unlikely(!st_mem_committed)) { + st_mem_committed = rrdset_create_localhost( + "mem" + , "committed" + , NULL + , "system" + , NULL + , "Committed (Allocated) Memory" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(st_mem_committed, RRDSET_FLAG_DETAIL); + + rd_committed = rrddim_add(st_mem_committed, "Committed_AS", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_committed, rd_committed, Committed_AS); + rrdset_done(st_mem_committed); + } + + if(do_writeback) { + static RRDSET *st_mem_writeback = NULL; + static RRDDIM *rd_dirty = NULL, *rd_writeback = NULL, *rd_fusewriteback = NULL, *rd_nfs_writeback = NULL, *rd_bounce = NULL; + + if(unlikely(!st_mem_writeback)) { + st_mem_writeback = rrdset_create_localhost( + "mem" + , "writeback" + , NULL + , "kernel" + , NULL + , "Writeback Memory" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_KERNEL + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st_mem_writeback, RRDSET_FLAG_DETAIL); + + rd_dirty = rrddim_add(st_mem_writeback, "Dirty", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_writeback = rrddim_add(st_mem_writeback, "Writeback", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_fusewriteback = rrddim_add(st_mem_writeback, "FuseWriteback", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_nfs_writeback = rrddim_add(st_mem_writeback, "NfsWriteback", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_bounce = rrddim_add(st_mem_writeback, "Bounce", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_writeback, rd_dirty, Dirty); + rrddim_set_by_pointer(st_mem_writeback, rd_writeback, Writeback); + rrddim_set_by_pointer(st_mem_writeback, rd_fusewriteback, WritebackTmp); + rrddim_set_by_pointer(st_mem_writeback, rd_nfs_writeback, NFS_Unstable); + rrddim_set_by_pointer(st_mem_writeback, rd_bounce, Bounce); + rrdset_done(st_mem_writeback); + } + + // -------------------------------------------------------------------- + + if(do_kernel) { + static RRDSET *st_mem_kernel = NULL; + static RRDDIM *rd_slab = NULL, *rd_kernelstack = NULL, *rd_pagetables = NULL, *rd_vmallocused = NULL, + *rd_percpu = NULL; + + if(unlikely(!st_mem_kernel)) { + st_mem_kernel = rrdset_create_localhost( + "mem" + , "kernel" + , NULL + , "kernel" + , NULL + , "Memory Used by Kernel" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_KERNEL + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_kernel, RRDSET_FLAG_DETAIL); + + rd_slab = rrddim_add(st_mem_kernel, "Slab", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_kernelstack = rrddim_add(st_mem_kernel, "KernelStack", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_pagetables = rrddim_add(st_mem_kernel, "PageTables", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_vmallocused = rrddim_add(st_mem_kernel, "VmallocUsed", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + if (do_percpu) + rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab); + rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack); + rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables); + rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed); + if (do_percpu) + rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu); + + rrdset_done(st_mem_kernel); + } + + if(do_slab) { + static RRDSET *st_mem_slab = NULL; + static RRDDIM *rd_reclaimable = NULL, *rd_unreclaimable = NULL; + + if(unlikely(!st_mem_slab)) { + st_mem_slab = rrdset_create_localhost( + "mem" + , "slab" + , NULL + , "slab" + , NULL + , "Reclaimable Kernel Memory" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_SLAB + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_slab, RRDSET_FLAG_DETAIL); + + rd_reclaimable = rrddim_add(st_mem_slab, "reclaimable", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_unreclaimable = rrddim_add(st_mem_slab, "unreclaimable", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_slab, rd_reclaimable, SReclaimable); + rrddim_set_by_pointer(st_mem_slab, rd_unreclaimable, SUnreclaim); + rrdset_done(st_mem_slab); + } + + if(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO && + ((Hugepagesize && HugePages_Total) || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_hugepages = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mem_hugepages = NULL; + static RRDDIM *rd_used = NULL, *rd_free = NULL, *rd_rsvd = NULL, *rd_surp = NULL; + + if(unlikely(!st_mem_hugepages)) { + st_mem_hugepages = rrdset_create_localhost( + "mem" + , "hugepages" + , NULL + , "hugepages" + , NULL + , "Dedicated HugePages Memory" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_hugepages, RRDSET_FLAG_DETAIL); + + rd_free = rrddim_add(st_mem_hugepages, "free", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st_mem_hugepages, "used", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_surp = rrddim_add(st_mem_hugepages, "surplus", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_rsvd = rrddim_add(st_mem_hugepages, "reserved", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_hugepages, rd_used, HugePages_Total - HugePages_Free - HugePages_Rsvd); + rrddim_set_by_pointer(st_mem_hugepages, rd_free, HugePages_Free); + rrddim_set_by_pointer(st_mem_hugepages, rd_rsvd, HugePages_Rsvd); + rrddim_set_by_pointer(st_mem_hugepages, rd_surp, HugePages_Surp); + rrdset_done(st_mem_hugepages); + } + + if(do_transparent_hugepages == CONFIG_BOOLEAN_YES || (do_transparent_hugepages == CONFIG_BOOLEAN_AUTO && + (AnonHugePages || + ShmemHugePages || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_transparent_hugepages = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mem_transparent_hugepages = NULL; + static RRDDIM *rd_anonymous = NULL, *rd_shared = NULL; + + if(unlikely(!st_mem_transparent_hugepages)) { + st_mem_transparent_hugepages = rrdset_create_localhost( + "mem" + , "transparent_hugepages" + , NULL + , "hugepages" + , NULL + , "Transparent HugePages Memory" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_HUGEPAGES + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_transparent_hugepages, RRDSET_FLAG_DETAIL); + + rd_anonymous = rrddim_add(st_mem_transparent_hugepages, "anonymous", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_shared = rrddim_add(st_mem_transparent_hugepages, "shmem", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_anonymous, AnonHugePages); + rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_shared, ShmemHugePages); + rrdset_done(st_mem_transparent_hugepages); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c new file mode 100644 index 0000000..e124f63 --- /dev/null +++ b/collectors/proc.plugin/proc_net_dev.c @@ -0,0 +1,1522 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NETDEV_NAME "/proc/net/dev" +#define CONFIG_SECTION_PLUGIN_PROC_NETDEV "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETDEV_NAME + +#define STATE_LENGTH_MAX 32 + +#define READ_RETRY_PERIOD 60 // seconds + +enum { + NETDEV_DUPLEX_UNKNOWN, + NETDEV_DUPLEX_HALF, + NETDEV_DUPLEX_FULL +}; + +enum { + NETDEV_OPERSTATE_UNKNOWN, + NETDEV_OPERSTATE_NOTPRESENT, + NETDEV_OPERSTATE_DOWN, + NETDEV_OPERSTATE_LOWERLAYERDOWN, + NETDEV_OPERSTATE_TESTING, + NETDEV_OPERSTATE_DORMANT, + NETDEV_OPERSTATE_UP +}; + +static inline int get_operstate(char *operstate) +{ + // As defined in https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net + if (!strcmp(operstate, "up")) + return NETDEV_OPERSTATE_UP; + if (!strcmp(operstate, "down")) + return NETDEV_OPERSTATE_DOWN; + if (!strcmp(operstate, "notpresent")) + return NETDEV_OPERSTATE_NOTPRESENT; + if (!strcmp(operstate, "lowerlayerdown")) + return NETDEV_OPERSTATE_LOWERLAYERDOWN; + if (!strcmp(operstate, "testing")) + return NETDEV_OPERSTATE_TESTING; + if (!strcmp(operstate, "dormant")) + return NETDEV_OPERSTATE_DORMANT; + + return NETDEV_OPERSTATE_UNKNOWN; +} + +// ---------------------------------------------------------------------------- +// netdev list + +static struct netdev { + char *name; + uint32_t hash; + size_t len; + + // flags + int virtual; + int configured; + int enabled; + int updated; + + int carrier_file_exists; + time_t carrier_file_lost_time; + + int duplex_file_exists; + time_t duplex_file_lost_time; + + int speed_file_exists; + time_t speed_file_lost_time; + + int do_bandwidth; + int do_packets; + int do_errors; + int do_drops; + int do_fifo; + int do_compressed; + int do_events; + int do_speed; + int do_duplex; + int do_operstate; + int do_carrier; + int do_mtu; + + const char *chart_type_net_bytes; + const char *chart_type_net_packets; + const char *chart_type_net_errors; + const char *chart_type_net_fifo; + const char *chart_type_net_events; + const char *chart_type_net_drops; + const char *chart_type_net_compressed; + const char *chart_type_net_speed; + const char *chart_type_net_duplex; + const char *chart_type_net_operstate; + const char *chart_type_net_carrier; + const char *chart_type_net_mtu; + + const char *chart_id_net_bytes; + const char *chart_id_net_packets; + const char *chart_id_net_errors; + const char *chart_id_net_fifo; + const char *chart_id_net_events; + const char *chart_id_net_drops; + const char *chart_id_net_compressed; + const char *chart_id_net_speed; + const char *chart_id_net_duplex; + const char *chart_id_net_operstate; + const char *chart_id_net_carrier; + const char *chart_id_net_mtu; + + const char *chart_ctx_net_bytes; + const char *chart_ctx_net_packets; + const char *chart_ctx_net_errors; + const char *chart_ctx_net_fifo; + const char *chart_ctx_net_events; + const char *chart_ctx_net_drops; + const char *chart_ctx_net_compressed; + const char *chart_ctx_net_speed; + const char *chart_ctx_net_duplex; + const char *chart_ctx_net_operstate; + const char *chart_ctx_net_carrier; + const char *chart_ctx_net_mtu; + + const char *chart_family; + + DICTIONARY *chart_labels; + + int flipped; + unsigned long priority; + + // data collected + kernel_uint_t rbytes; + kernel_uint_t rpackets; + kernel_uint_t rerrors; + kernel_uint_t rdrops; + kernel_uint_t rfifo; + kernel_uint_t rframe; + kernel_uint_t rcompressed; + kernel_uint_t rmulticast; + + kernel_uint_t tbytes; + kernel_uint_t tpackets; + kernel_uint_t terrors; + kernel_uint_t tdrops; + kernel_uint_t tfifo; + kernel_uint_t tcollisions; + kernel_uint_t tcarrier; + kernel_uint_t tcompressed; + kernel_uint_t speed; + kernel_uint_t duplex; + kernel_uint_t operstate; + unsigned long long carrier; + unsigned long long mtu; + + // charts + RRDSET *st_bandwidth; + RRDSET *st_packets; + RRDSET *st_errors; + RRDSET *st_drops; + RRDSET *st_fifo; + RRDSET *st_compressed; + RRDSET *st_events; + RRDSET *st_speed; + RRDSET *st_duplex; + RRDSET *st_operstate; + RRDSET *st_carrier; + RRDSET *st_mtu; + + // dimensions + RRDDIM *rd_rbytes; + RRDDIM *rd_rpackets; + RRDDIM *rd_rerrors; + RRDDIM *rd_rdrops; + RRDDIM *rd_rfifo; + RRDDIM *rd_rframe; + RRDDIM *rd_rcompressed; + RRDDIM *rd_rmulticast; + + RRDDIM *rd_tbytes; + RRDDIM *rd_tpackets; + RRDDIM *rd_terrors; + RRDDIM *rd_tdrops; + RRDDIM *rd_tfifo; + RRDDIM *rd_tcollisions; + RRDDIM *rd_tcarrier; + RRDDIM *rd_tcompressed; + + RRDDIM *rd_speed; + RRDDIM *rd_duplex_full; + RRDDIM *rd_duplex_half; + RRDDIM *rd_duplex_unknown; + RRDDIM *rd_operstate_unknown; + RRDDIM *rd_operstate_notpresent; + RRDDIM *rd_operstate_down; + RRDDIM *rd_operstate_lowerlayerdown; + RRDDIM *rd_operstate_testing; + RRDDIM *rd_operstate_dormant; + RRDDIM *rd_operstate_up; + RRDDIM *rd_carrier_up; + RRDDIM *rd_carrier_down; + RRDDIM *rd_mtu; + + char *filename_speed; + const RRDSETVAR_ACQUIRED *chart_var_speed; + + char *filename_duplex; + char *filename_operstate; + char *filename_carrier; + char *filename_mtu; + + struct netdev *next; +} *netdev_root = NULL, *netdev_last_used = NULL; + +static size_t netdev_added = 0, netdev_found = 0; + +// ---------------------------------------------------------------------------- + +static void netdev_charts_release(struct netdev *d) { + if(d->st_bandwidth) rrdset_is_obsolete(d->st_bandwidth); + if(d->st_packets) rrdset_is_obsolete(d->st_packets); + if(d->st_errors) rrdset_is_obsolete(d->st_errors); + if(d->st_drops) rrdset_is_obsolete(d->st_drops); + if(d->st_fifo) rrdset_is_obsolete(d->st_fifo); + if(d->st_compressed) rrdset_is_obsolete(d->st_compressed); + if(d->st_events) rrdset_is_obsolete(d->st_events); + if(d->st_speed) rrdset_is_obsolete(d->st_speed); + if(d->st_duplex) rrdset_is_obsolete(d->st_duplex); + if(d->st_operstate) rrdset_is_obsolete(d->st_operstate); + if(d->st_carrier) rrdset_is_obsolete(d->st_carrier); + if(d->st_mtu) rrdset_is_obsolete(d->st_mtu); + + d->st_bandwidth = NULL; + d->st_compressed = NULL; + d->st_drops = NULL; + d->st_errors = NULL; + d->st_events = NULL; + d->st_fifo = NULL; + d->st_packets = NULL; + d->st_speed = NULL; + d->st_duplex = NULL; + d->st_operstate = NULL; + d->st_carrier = NULL; + d->st_mtu = NULL; + + d->rd_rbytes = NULL; + d->rd_rpackets = NULL; + d->rd_rerrors = NULL; + d->rd_rdrops = NULL; + d->rd_rfifo = NULL; + d->rd_rframe = NULL; + d->rd_rcompressed = NULL; + d->rd_rmulticast = NULL; + + d->rd_tbytes = NULL; + d->rd_tpackets = NULL; + d->rd_terrors = NULL; + d->rd_tdrops = NULL; + d->rd_tfifo = NULL; + d->rd_tcollisions = NULL; + d->rd_tcarrier = NULL; + d->rd_tcompressed = NULL; + + d->rd_speed = NULL; + d->rd_duplex_full = NULL; + d->rd_duplex_half = NULL; + d->rd_duplex_unknown = NULL; + d->rd_carrier_up = NULL; + d->rd_carrier_down = NULL; + d->rd_mtu = NULL; + + d->rd_operstate_unknown = NULL; + d->rd_operstate_notpresent = NULL; + d->rd_operstate_down = NULL; + d->rd_operstate_lowerlayerdown = NULL; + d->rd_operstate_testing = NULL; + d->rd_operstate_dormant = NULL; + d->rd_operstate_up = NULL; + + d->chart_var_speed = NULL; +} + +static void netdev_free_chart_strings(struct netdev *d) { + freez((void *)d->chart_type_net_bytes); + freez((void *)d->chart_type_net_compressed); + freez((void *)d->chart_type_net_drops); + freez((void *)d->chart_type_net_errors); + freez((void *)d->chart_type_net_events); + freez((void *)d->chart_type_net_fifo); + freez((void *)d->chart_type_net_packets); + freez((void *)d->chart_type_net_speed); + freez((void *)d->chart_type_net_duplex); + freez((void *)d->chart_type_net_operstate); + freez((void *)d->chart_type_net_carrier); + freez((void *)d->chart_type_net_mtu); + + freez((void *)d->chart_id_net_bytes); + freez((void *)d->chart_id_net_compressed); + freez((void *)d->chart_id_net_drops); + freez((void *)d->chart_id_net_errors); + freez((void *)d->chart_id_net_events); + freez((void *)d->chart_id_net_fifo); + freez((void *)d->chart_id_net_packets); + freez((void *)d->chart_id_net_speed); + freez((void *)d->chart_id_net_duplex); + freez((void *)d->chart_id_net_operstate); + freez((void *)d->chart_id_net_carrier); + freez((void *)d->chart_id_net_mtu); + + freez((void *)d->chart_ctx_net_bytes); + freez((void *)d->chart_ctx_net_compressed); + freez((void *)d->chart_ctx_net_drops); + freez((void *)d->chart_ctx_net_errors); + freez((void *)d->chart_ctx_net_events); + freez((void *)d->chart_ctx_net_fifo); + freez((void *)d->chart_ctx_net_packets); + freez((void *)d->chart_ctx_net_speed); + freez((void *)d->chart_ctx_net_duplex); + freez((void *)d->chart_ctx_net_operstate); + freez((void *)d->chart_ctx_net_carrier); + freez((void *)d->chart_ctx_net_mtu); + + freez((void *)d->chart_family); +} + +static void netdev_free(struct netdev *d) { + netdev_charts_release(d); + netdev_free_chart_strings(d); + rrdlabels_destroy(d->chart_labels); + + freez((void *)d->name); + freez((void *)d->filename_speed); + freez((void *)d->filename_duplex); + freez((void *)d->filename_operstate); + freez((void *)d->filename_carrier); + freez((void *)d->filename_mtu); + freez((void *)d); + netdev_added--; +} + +// ---------------------------------------------------------------------------- +// netdev renames + +static struct netdev_rename { + const char *host_device; + uint32_t hash; + + const char *container_device; + const char *container_name; + const char *ctx_prefix; + + DICTIONARY *chart_labels; + + int processed; + + struct netdev_rename *next; +} *netdev_rename_root = NULL; + +static int netdev_pending_renames = 0; +static netdata_mutex_t netdev_rename_mutex = NETDATA_MUTEX_INITIALIZER; + +static struct netdev_rename *netdev_rename_find(const char *host_device, uint32_t hash) { + struct netdev_rename *r; + + for(r = netdev_rename_root; r ; r = r->next) + if(r->hash == hash && !strcmp(host_device, r->host_device)) + return r; + + return NULL; +} + +// other threads can call this function to register a rename to a netdev +void netdev_rename_device_add( + const char *host_device, + const char *container_device, + const char *container_name, + DICTIONARY *labels, + const char *ctx_prefix) +{ + netdata_mutex_lock(&netdev_rename_mutex); + + uint32_t hash = simple_hash(host_device); + struct netdev_rename *r = netdev_rename_find(host_device, hash); + if(!r) { + r = callocz(1, sizeof(struct netdev_rename)); + r->host_device = strdupz(host_device); + r->container_device = strdupz(container_device); + r->container_name = strdupz(container_name); + r->ctx_prefix = strdupz(ctx_prefix); + r->chart_labels = rrdlabels_create(); + rrdlabels_migrate_to_these(r->chart_labels, labels); + r->hash = hash; + r->next = netdev_rename_root; + r->processed = 0; + netdev_rename_root = r; + netdev_pending_renames++; + info("CGROUP: registered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); + } + else { + if(strcmp(r->container_device, container_device) != 0 || strcmp(r->container_name, container_name) != 0) { + freez((void *) r->container_device); + freez((void *) r->container_name); + + r->container_device = strdupz(container_device); + r->container_name = strdupz(container_name); + + rrdlabels_migrate_to_these(r->chart_labels, labels); + + r->processed = 0; + netdev_pending_renames++; + info("CGROUP: altered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); + } + } + + netdata_mutex_unlock(&netdev_rename_mutex); +} + +// other threads can call this function to delete a rename to a netdev +void netdev_rename_device_del(const char *host_device) { + netdata_mutex_lock(&netdev_rename_mutex); + + struct netdev_rename *r, *last = NULL; + + uint32_t hash = simple_hash(host_device); + for(r = netdev_rename_root; r ; last = r, r = r->next) { + if (r->hash == hash && !strcmp(host_device, r->host_device)) { + if (netdev_rename_root == r) + netdev_rename_root = r->next; + else if (last) + last->next = r->next; + + if(!r->processed) + netdev_pending_renames--; + + info("CGROUP: unregistered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); + + freez((void *) r->host_device); + freez((void *) r->container_name); + freez((void *) r->container_device); + freez((void *) r->ctx_prefix); + rrdlabels_destroy(r->chart_labels); + freez((void *) r); + break; + } + } + + netdata_mutex_unlock(&netdev_rename_mutex); +} + +static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename *r) { + info("CGROUP: renaming network interface '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); + + netdev_charts_release(d); + netdev_free_chart_strings(d); + + char buffer[RRD_ID_LENGTH_MAX + 1]; + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "cgroup_%s", r->container_name); + d->chart_type_net_bytes = strdupz(buffer); + d->chart_type_net_compressed = strdupz(buffer); + d->chart_type_net_drops = strdupz(buffer); + d->chart_type_net_errors = strdupz(buffer); + d->chart_type_net_events = strdupz(buffer); + d->chart_type_net_fifo = strdupz(buffer); + d->chart_type_net_packets = strdupz(buffer); + d->chart_type_net_speed = strdupz(buffer); + d->chart_type_net_duplex = strdupz(buffer); + d->chart_type_net_operstate = strdupz(buffer); + d->chart_type_net_carrier = strdupz(buffer); + d->chart_type_net_mtu = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_%s", r->container_device); + d->chart_id_net_bytes = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_compressed_%s", r->container_device); + d->chart_id_net_compressed = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_drops_%s", r->container_device); + d->chart_id_net_drops = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_errors_%s", r->container_device); + d->chart_id_net_errors = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_events_%s", r->container_device); + d->chart_id_net_events = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_fifo_%s", r->container_device); + d->chart_id_net_fifo = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_packets_%s", r->container_device); + d->chart_id_net_packets = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_speed_%s", r->container_device); + d->chart_id_net_speed = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_duplex_%s", r->container_device); + d->chart_id_net_duplex = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_operstate_%s", r->container_device); + d->chart_id_net_operstate = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_carrier_%s", r->container_device); + d->chart_id_net_carrier = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_mtu_%s", r->container_device); + d->chart_id_net_mtu = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_net", r->ctx_prefix); + d->chart_ctx_net_bytes = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_compressed", r->ctx_prefix); + d->chart_ctx_net_compressed = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_drops", r->ctx_prefix); + d->chart_ctx_net_drops = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_errors", r->ctx_prefix); + d->chart_ctx_net_errors = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_events", r->ctx_prefix); + d->chart_ctx_net_events = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_fifo", r->ctx_prefix); + d->chart_ctx_net_fifo = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_packets", r->ctx_prefix); + d->chart_ctx_net_packets = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_speed", r->ctx_prefix); + d->chart_ctx_net_speed = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_duplex", r->ctx_prefix); + d->chart_ctx_net_duplex = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_operstate", r->ctx_prefix); + d->chart_ctx_net_operstate = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_carrier", r->ctx_prefix); + d->chart_ctx_net_carrier = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_mtu", r->ctx_prefix); + d->chart_ctx_net_mtu = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "net %s", r->container_device); + d->chart_family = strdupz(buffer); + + rrdlabels_copy(d->chart_labels, r->chart_labels); + + d->priority = NETDATA_CHART_PRIO_CGROUP_NET_IFACE; + d->flipped = 1; +} + +static inline void netdev_rename(struct netdev *d) { + struct netdev_rename *r = netdev_rename_find(d->name, d->hash); + if(unlikely(r && !r->processed)) { + netdev_rename_cgroup(d, r); + r->processed = 1; + netdev_pending_renames--; + } +} + +static inline void netdev_rename_lock(struct netdev *d) { + netdata_mutex_lock(&netdev_rename_mutex); + netdev_rename(d); + netdata_mutex_unlock(&netdev_rename_mutex); +} + +static inline void netdev_rename_all_lock(void) { + netdata_mutex_lock(&netdev_rename_mutex); + + struct netdev *d; + for(d = netdev_root; d ; d = d->next) + netdev_rename(d); + + netdev_pending_renames = 0; + netdata_mutex_unlock(&netdev_rename_mutex); +} + +// ---------------------------------------------------------------------------- +// netdev data collection + +static void netdev_cleanup() { + if(likely(netdev_found == netdev_added)) return; + + netdev_added = 0; + struct netdev *d = netdev_root, *last = NULL; + while(d) { + if(unlikely(!d->updated)) { + // info("Removing network device '%s', linked after '%s'", d->name, last?last->name:"ROOT"); + + if(netdev_last_used == d) + netdev_last_used = last; + + struct netdev *t = d; + + if(d == netdev_root || !last) + netdev_root = d = d->next; + + else + last->next = d = d->next; + + t->next = NULL; + netdev_free(t); + } + else { + netdev_added++; + last = d; + d->updated = 0; + d = d->next; + } + } +} + +static struct netdev *get_netdev(const char *name) { + struct netdev *d; + + uint32_t hash = simple_hash(name); + + // search it, from the last position to the end + for(d = netdev_last_used ; d ; d = d->next) { + if(unlikely(hash == d->hash && !strcmp(name, d->name))) { + netdev_last_used = d->next; + return d; + } + } + + // search it from the beginning to the last position we used + for(d = netdev_root ; d != netdev_last_used ; d = d->next) { + if(unlikely(hash == d->hash && !strcmp(name, d->name))) { + netdev_last_used = d->next; + return d; + } + } + + // create a new one + d = callocz(1, sizeof(struct netdev)); + d->name = strdupz(name); + d->hash = simple_hash(d->name); + d->len = strlen(d->name); + d->chart_labels = rrdlabels_create(); + + d->chart_type_net_bytes = strdupz("net"); + d->chart_type_net_compressed = strdupz("net_compressed"); + d->chart_type_net_drops = strdupz("net_drops"); + d->chart_type_net_errors = strdupz("net_errors"); + d->chart_type_net_events = strdupz("net_events"); + d->chart_type_net_fifo = strdupz("net_fifo"); + d->chart_type_net_packets = strdupz("net_packets"); + d->chart_type_net_speed = strdupz("net_speed"); + d->chart_type_net_duplex = strdupz("net_duplex"); + d->chart_type_net_operstate = strdupz("net_operstate"); + d->chart_type_net_carrier = strdupz("net_carrier"); + d->chart_type_net_mtu = strdupz("net_mtu"); + + d->chart_id_net_bytes = strdupz(d->name); + d->chart_id_net_compressed = strdupz(d->name); + d->chart_id_net_drops = strdupz(d->name); + d->chart_id_net_errors = strdupz(d->name); + d->chart_id_net_events = strdupz(d->name); + d->chart_id_net_fifo = strdupz(d->name); + d->chart_id_net_packets = strdupz(d->name); + d->chart_id_net_speed = strdupz(d->name); + d->chart_id_net_duplex = strdupz(d->name); + d->chart_id_net_operstate = strdupz(d->name); + d->chart_id_net_carrier = strdupz(d->name); + d->chart_id_net_mtu = strdupz(d->name); + + d->chart_ctx_net_bytes = strdupz("net.net"); + d->chart_ctx_net_compressed = strdupz("net.compressed"); + d->chart_ctx_net_drops = strdupz("net.drops"); + d->chart_ctx_net_errors = strdupz("net.errors"); + d->chart_ctx_net_events = strdupz("net.events"); + d->chart_ctx_net_fifo = strdupz("net.fifo"); + d->chart_ctx_net_packets = strdupz("net.packets"); + d->chart_ctx_net_speed = strdupz("net.speed"); + d->chart_ctx_net_duplex = strdupz("net.duplex"); + d->chart_ctx_net_operstate = strdupz("net.operstate"); + d->chart_ctx_net_carrier = strdupz("net.carrier"); + d->chart_ctx_net_mtu = strdupz("net.mtu"); + + d->chart_family = strdupz(d->name); + d->priority = NETDATA_CHART_PRIO_FIRST_NET_IFACE; + + netdev_rename_lock(d); + + netdev_added++; + + // link it to the end + if(netdev_root) { + struct netdev *e; + for(e = netdev_root; e->next ; e = e->next) ; + e->next = d; + } + else + netdev_root = d; + + return d; +} + +int do_proc_net_dev(int update_every, usec_t dt) { + (void)dt; + static SIMPLE_PATTERN *disabled_list = NULL; + static procfile *ff = NULL; + static int enable_new_interfaces = -1; + static int do_bandwidth = -1, do_packets = -1, do_errors = -1, do_drops = -1, do_fifo = -1, do_compressed = -1, + do_events = -1, do_speed = -1, do_duplex = -1, do_operstate = -1, do_carrier = -1, do_mtu = -1; + static char *path_to_sys_devices_virtual_net = NULL, *path_to_sys_class_net_speed = NULL, + *proc_net_dev_filename = NULL; + static char *path_to_sys_class_net_duplex = NULL; + static char *path_to_sys_class_net_operstate = NULL; + static char *path_to_sys_class_net_carrier = NULL; + static char *path_to_sys_class_net_mtu = NULL; + + if(unlikely(enable_new_interfaces == -1)) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, (*netdata_configured_host_prefix)?"/proc/1/net/dev":"/proc/net/dev"); + proc_net_dev_filename = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/virtual/net/%s"); + path_to_sys_devices_virtual_net = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get virtual interfaces", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/speed"); + path_to_sys_class_net_speed = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device speed", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/duplex"); + path_to_sys_class_net_duplex = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device duplex", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/operstate"); + path_to_sys_class_net_operstate = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device operstate", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/carrier"); + path_to_sys_class_net_carrier = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device carrier", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/mtu"); + path_to_sys_class_net_mtu = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device mtu", filename); + + + enable_new_interfaces = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "enable new interfaces detected at runtime", CONFIG_BOOLEAN_AUTO); + + do_bandwidth = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "bandwidth for all interfaces", CONFIG_BOOLEAN_AUTO); + do_packets = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "packets for all interfaces", CONFIG_BOOLEAN_AUTO); + do_errors = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "errors for all interfaces", CONFIG_BOOLEAN_AUTO); + do_drops = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "drops for all interfaces", CONFIG_BOOLEAN_AUTO); + do_fifo = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "fifo for all interfaces", CONFIG_BOOLEAN_AUTO); + do_compressed = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "compressed packets for all interfaces", CONFIG_BOOLEAN_AUTO); + do_events = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "frames, collisions, carrier counters for all interfaces", CONFIG_BOOLEAN_AUTO); + do_speed = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "speed for all interfaces", CONFIG_BOOLEAN_AUTO); + do_duplex = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "duplex for all interfaces", CONFIG_BOOLEAN_AUTO); + do_operstate = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "operstate for all interfaces", CONFIG_BOOLEAN_AUTO); + do_carrier = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "carrier for all interfaces", CONFIG_BOOLEAN_AUTO); + do_mtu = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "mtu for all interfaces", CONFIG_BOOLEAN_AUTO); + + disabled_list = simple_pattern_create(config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "disable by default interfaces matching", "lo fireqos* *-ifb fwpr* fwbr* fwln*"), NULL, SIMPLE_PATTERN_EXACT); + } + + if(unlikely(!ff)) { + ff = procfile_open(proc_net_dev_filename, " \t,|", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + // rename all the devices, if we have pending renames + if(unlikely(netdev_pending_renames)) + netdev_rename_all_lock(); + + netdev_found = 0; + + kernel_uint_t system_rbytes = 0; + kernel_uint_t system_tbytes = 0; + + size_t lines = procfile_lines(ff), l; + for(l = 2; l < lines ;l++) { + // require 17 words on each line + if(unlikely(procfile_linewords(ff, l) < 17)) continue; + + char *name = procfile_lineword(ff, l, 0); + size_t len = strlen(name); + if(name[len - 1] == ':') name[len - 1] = '\0'; + + struct netdev *d = get_netdev(name); + d->updated = 1; + netdev_found++; + + if(unlikely(!d->configured)) { + // this is the first time we see this interface + + // remember we configured it + d->configured = 1; + + d->enabled = enable_new_interfaces; + + if(d->enabled) + d->enabled = !simple_pattern_matches(disabled_list, d->name); + + char buffer[FILENAME_MAX + 1]; + + snprintfz(buffer, FILENAME_MAX, path_to_sys_devices_virtual_net, d->name); + if (likely(access(buffer, R_OK) == 0)) { + d->virtual = 1; + rrdlabels_add(d->chart_labels, "interface_type", "virtual", RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT); + } + else { + d->virtual = 0; + rrdlabels_add(d->chart_labels, "interface_type", "real", RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT); + } + rrdlabels_add(d->chart_labels, "device", name, RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT); + + if(likely(!d->virtual)) { + // set the filename to get the interface speed + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_speed, d->name); + d->filename_speed = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_duplex, d->name); + d->filename_duplex = strdupz(buffer); + } + + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_operstate, d->name); + d->filename_operstate = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_carrier, d->name); + d->filename_carrier = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_mtu, d->name); + d->filename_mtu = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, "plugin:proc:/proc/net/dev:%s", d->name); + d->enabled = config_get_boolean_ondemand(buffer, "enabled", d->enabled); + d->virtual = config_get_boolean(buffer, "virtual", d->virtual); + + if(d->enabled == CONFIG_BOOLEAN_NO) + continue; + + d->do_bandwidth = config_get_boolean_ondemand(buffer, "bandwidth", do_bandwidth); + d->do_packets = config_get_boolean_ondemand(buffer, "packets", do_packets); + d->do_errors = config_get_boolean_ondemand(buffer, "errors", do_errors); + d->do_drops = config_get_boolean_ondemand(buffer, "drops", do_drops); + d->do_fifo = config_get_boolean_ondemand(buffer, "fifo", do_fifo); + d->do_compressed = config_get_boolean_ondemand(buffer, "compressed", do_compressed); + d->do_events = config_get_boolean_ondemand(buffer, "events", do_events); + d->do_speed = config_get_boolean_ondemand(buffer, "speed", do_speed); + d->do_duplex = config_get_boolean_ondemand(buffer, "duplex", do_duplex); + d->do_operstate = config_get_boolean_ondemand(buffer, "operstate", do_operstate); + d->do_carrier = config_get_boolean_ondemand(buffer, "carrier", do_carrier); + d->do_mtu = config_get_boolean_ondemand(buffer, "mtu", do_mtu); + } + + if(unlikely(!d->enabled)) + continue; + + if(likely(d->do_bandwidth != CONFIG_BOOLEAN_NO || !d->virtual)) { + d->rbytes = str2kernel_uint_t(procfile_lineword(ff, l, 1)); + d->tbytes = str2kernel_uint_t(procfile_lineword(ff, l, 9)); + + if(likely(!d->virtual)) { + system_rbytes += d->rbytes; + system_tbytes += d->tbytes; + } + } + + if(likely(d->do_packets != CONFIG_BOOLEAN_NO)) { + d->rpackets = str2kernel_uint_t(procfile_lineword(ff, l, 2)); + d->rmulticast = str2kernel_uint_t(procfile_lineword(ff, l, 8)); + d->tpackets = str2kernel_uint_t(procfile_lineword(ff, l, 10)); + } + + if(likely(d->do_errors != CONFIG_BOOLEAN_NO)) { + d->rerrors = str2kernel_uint_t(procfile_lineword(ff, l, 3)); + d->terrors = str2kernel_uint_t(procfile_lineword(ff, l, 11)); + } + + if(likely(d->do_drops != CONFIG_BOOLEAN_NO)) { + d->rdrops = str2kernel_uint_t(procfile_lineword(ff, l, 4)); + d->tdrops = str2kernel_uint_t(procfile_lineword(ff, l, 12)); + } + + if(likely(d->do_fifo != CONFIG_BOOLEAN_NO)) { + d->rfifo = str2kernel_uint_t(procfile_lineword(ff, l, 5)); + d->tfifo = str2kernel_uint_t(procfile_lineword(ff, l, 13)); + } + + if(likely(d->do_compressed != CONFIG_BOOLEAN_NO)) { + d->rcompressed = str2kernel_uint_t(procfile_lineword(ff, l, 7)); + d->tcompressed = str2kernel_uint_t(procfile_lineword(ff, l, 16)); + } + + if(likely(d->do_events != CONFIG_BOOLEAN_NO)) { + d->rframe = str2kernel_uint_t(procfile_lineword(ff, l, 6)); + d->tcollisions = str2kernel_uint_t(procfile_lineword(ff, l, 14)); + d->tcarrier = str2kernel_uint_t(procfile_lineword(ff, l, 15)); + } + + if ((d->do_carrier != CONFIG_BOOLEAN_NO || + d->do_duplex != CONFIG_BOOLEAN_NO || + d->do_speed != CONFIG_BOOLEAN_NO) && + d->filename_carrier && + (d->carrier_file_exists || + now_monotonic_sec() - d->carrier_file_lost_time > READ_RETRY_PERIOD)) { + if (read_single_number_file(d->filename_carrier, &d->carrier)) { + if (d->carrier_file_exists) + error( + "Cannot refresh interface %s carrier state by reading '%s'. Next update is in %d seconds.", + d->name, + d->filename_carrier, + READ_RETRY_PERIOD); + d->carrier_file_exists = 0; + d->carrier_file_lost_time = now_monotonic_sec(); + } else { + d->carrier_file_exists = 1; + d->carrier_file_lost_time = 0; + } + } + + if (d->do_duplex != CONFIG_BOOLEAN_NO && + d->filename_duplex && + (d->carrier || d->carrier_file_exists) && + (d->duplex_file_exists || + now_monotonic_sec() - d->duplex_file_lost_time > READ_RETRY_PERIOD)) { + char buffer[STATE_LENGTH_MAX + 1]; + + if (read_file(d->filename_duplex, buffer, STATE_LENGTH_MAX)) { + if (d->duplex_file_exists) + error("Cannot refresh interface %s duplex state by reading '%s'.", d->name, d->filename_duplex); + d->duplex_file_exists = 0; + d->duplex_file_lost_time = now_monotonic_sec(); + d->duplex = NETDEV_DUPLEX_UNKNOWN; + } else { + // values can be unknown, half or full -- just check the first letter for speed + if (buffer[0] == 'f') + d->duplex = NETDEV_DUPLEX_FULL; + else if (buffer[0] == 'h') + d->duplex = NETDEV_DUPLEX_HALF; + else + d->duplex = NETDEV_DUPLEX_UNKNOWN; + d->duplex_file_exists = 1; + d->duplex_file_lost_time = 0; + } + } else { + d->duplex = NETDEV_DUPLEX_UNKNOWN; + } + + if(d->do_operstate != CONFIG_BOOLEAN_NO && d->filename_operstate) { + char buffer[STATE_LENGTH_MAX + 1], *trimmed_buffer; + + if (read_file(d->filename_operstate, buffer, STATE_LENGTH_MAX)) { + error( + "Cannot refresh %s operstate by reading '%s'. Will not update its status anymore.", + d->name, d->filename_operstate); + freez(d->filename_operstate); + d->filename_operstate = NULL; + } else { + trimmed_buffer = trim(buffer); + d->operstate = get_operstate(trimmed_buffer); + } + } + + if (d->do_mtu != CONFIG_BOOLEAN_NO && d->filename_mtu) { + if (read_single_number_file(d->filename_mtu, &d->mtu)) { + error( + "Cannot refresh mtu for interface %s by reading '%s'. Stop updating it.", d->name, d->filename_mtu); + freez(d->filename_mtu); + d->filename_mtu = NULL; + } + } + + //info("PROC_NET_DEV: %s speed %zu, bytes %zu/%zu, packets %zu/%zu/%zu, errors %zu/%zu, drops %zu/%zu, fifo %zu/%zu, compressed %zu/%zu, rframe %zu, tcollisions %zu, tcarrier %zu" + // , d->name, d->speed + // , d->rbytes, d->tbytes + // , d->rpackets, d->tpackets, d->rmulticast + // , d->rerrors, d->terrors + // , d->rdrops, d->tdrops + // , d->rfifo, d->tfifo + // , d->rcompressed, d->tcompressed + // , d->rframe, d->tcollisions, d->tcarrier + // ); + + if(unlikely(d->do_bandwidth == CONFIG_BOOLEAN_AUTO && + (d->rbytes || d->tbytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_bandwidth = CONFIG_BOOLEAN_YES; + + if(d->do_bandwidth == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_bandwidth)) { + + d->st_bandwidth = rrdset_create_localhost( + d->chart_type_net_bytes + , d->chart_id_net_bytes + , NULL + , d->chart_family + , d->chart_ctx_net_bytes + , "Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(d->st_bandwidth, d->chart_labels); + + d->rd_rbytes = rrddim_add(d->st_bandwidth, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + d->rd_tbytes = rrddim_add(d->st_bandwidth, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + + if(d->flipped) { + // flip receive/transmit + + RRDDIM *td = d->rd_rbytes; + d->rd_rbytes = d->rd_tbytes; + d->rd_tbytes = td; + } + } + + rrddim_set_by_pointer(d->st_bandwidth, d->rd_rbytes, (collected_number)d->rbytes); + rrddim_set_by_pointer(d->st_bandwidth, d->rd_tbytes, (collected_number)d->tbytes); + rrdset_done(d->st_bandwidth); + + // update the interface speed + if(d->filename_speed) { + if(unlikely(!d->chart_var_speed)) { + d->chart_var_speed = + rrdsetvar_custom_chart_variable_add_and_acquire(d->st_bandwidth, "nic_speed_max"); + if(!d->chart_var_speed) { + error( + "Cannot create interface %s chart variable 'nic_speed_max'. Will not update its speed anymore.", + d->name); + freez(d->filename_speed); + d->filename_speed = NULL; + } + } + + if (d->filename_speed && d->chart_var_speed) { + int ret = 0; + + if ((d->carrier || d->carrier_file_exists) && + (d->speed_file_exists || now_monotonic_sec() - d->speed_file_lost_time > READ_RETRY_PERIOD)) { + ret = read_single_number_file(d->filename_speed, (unsigned long long *) &d->speed); + } else { + d->speed = 0; // TODO: this is wrong, shouldn't use 0 value, but NULL. + } + + if(ret) { + if (d->speed_file_exists) + error("Cannot refresh interface %s speed by reading '%s'.", d->name, d->filename_speed); + d->speed_file_exists = 0; + d->speed_file_lost_time = now_monotonic_sec(); + } + else { + if(d->do_speed != CONFIG_BOOLEAN_NO) { + if(unlikely(!d->st_speed)) { + d->st_speed = rrdset_create_localhost( + d->chart_type_net_speed + , d->chart_id_net_speed + , NULL + , d->chart_family + , d->chart_ctx_net_speed + , "Interface Speed" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 7 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_speed, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_speed, d->chart_labels); + + d->rd_speed = rrddim_add(d->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(d->st_speed, d->rd_speed, (collected_number)d->speed * KILOBITS_IN_A_MEGABIT); + rrdset_done(d->st_speed); + } + + rrdsetvar_custom_chart_variable_set( + d->st_bandwidth, d->chart_var_speed, (NETDATA_DOUBLE)d->speed * KILOBITS_IN_A_MEGABIT); + + if (d->speed) { + d->speed_file_exists = 1; + d->speed_file_lost_time = 0; + } + } + } + } + } + + if(d->do_duplex != CONFIG_BOOLEAN_NO && d->filename_duplex) { + if(unlikely(!d->st_duplex)) { + d->st_duplex = rrdset_create_localhost( + d->chart_type_net_duplex + , d->chart_id_net_duplex + , NULL + , d->chart_family + , d->chart_ctx_net_duplex + , "Interface Duplex State" + , "state" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 8 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_duplex, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_duplex, d->chart_labels); + + d->rd_duplex_full = rrddim_add(d->st_duplex, "full", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_duplex_half = rrddim_add(d->st_duplex, "half", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_duplex_unknown = rrddim_add(d->st_duplex, "unknown", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(d->st_duplex, d->rd_duplex_full, (collected_number)(d->duplex == NETDEV_DUPLEX_FULL)); + rrddim_set_by_pointer(d->st_duplex, d->rd_duplex_half, (collected_number)(d->duplex == NETDEV_DUPLEX_HALF)); + rrddim_set_by_pointer(d->st_duplex, d->rd_duplex_unknown, (collected_number)(d->duplex == NETDEV_DUPLEX_UNKNOWN)); + rrdset_done(d->st_duplex); + } + + if(d->do_operstate != CONFIG_BOOLEAN_NO && d->filename_operstate) { + if(unlikely(!d->st_operstate)) { + d->st_operstate = rrdset_create_localhost( + d->chart_type_net_operstate + , d->chart_id_net_operstate + , NULL + , d->chart_family + , d->chart_ctx_net_operstate + , "Interface Operational State" + , "state" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 9 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_operstate, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_operstate, d->chart_labels); + + d->rd_operstate_up = rrddim_add(d->st_operstate, "up", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_down = rrddim_add(d->st_operstate, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_notpresent = rrddim_add(d->st_operstate, "notpresent", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_lowerlayerdown = rrddim_add(d->st_operstate, "lowerlayerdown", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_testing = rrddim_add(d->st_operstate, "testing", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_dormant = rrddim_add(d->st_operstate, "dormant", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_unknown = rrddim_add(d->st_operstate, "unknown", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_up, (collected_number)(d->operstate == NETDEV_OPERSTATE_UP)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_down, (collected_number)(d->operstate == NETDEV_OPERSTATE_DOWN)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_notpresent, (collected_number)(d->operstate == NETDEV_OPERSTATE_NOTPRESENT)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_lowerlayerdown, (collected_number)(d->operstate == NETDEV_OPERSTATE_LOWERLAYERDOWN)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_testing, (collected_number)(d->operstate == NETDEV_OPERSTATE_TESTING)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_dormant, (collected_number)(d->operstate == NETDEV_OPERSTATE_DORMANT)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_unknown, (collected_number)(d->operstate == NETDEV_OPERSTATE_UNKNOWN)); + rrdset_done(d->st_operstate); + } + + if(d->do_carrier != CONFIG_BOOLEAN_NO && d->carrier_file_exists) { + if(unlikely(!d->st_carrier)) { + d->st_carrier = rrdset_create_localhost( + d->chart_type_net_carrier + , d->chart_id_net_carrier + , NULL + , d->chart_family + , d->chart_ctx_net_carrier + , "Interface Physical Link State" + , "state" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 10 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_carrier, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_carrier, d->chart_labels); + + d->rd_carrier_up = rrddim_add(d->st_carrier, "up", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_carrier_down = rrddim_add(d->st_carrier, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(d->st_carrier, d->rd_carrier_up, (collected_number)(d->carrier == 1)); + rrddim_set_by_pointer(d->st_carrier, d->rd_carrier_down, (collected_number)(d->carrier != 1)); + rrdset_done(d->st_carrier); + } + + if(d->do_mtu != CONFIG_BOOLEAN_NO && d->filename_mtu) { + if(unlikely(!d->st_mtu)) { + d->st_mtu = rrdset_create_localhost( + d->chart_type_net_mtu + , d->chart_id_net_mtu + , NULL + , d->chart_family + , d->chart_ctx_net_mtu + , "Interface MTU" + , "octets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 11 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_mtu, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_mtu, d->chart_labels); + + d->rd_mtu = rrddim_add(d->st_mtu, "mtu", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(d->st_mtu, d->rd_mtu, (collected_number)d->mtu); + rrdset_done(d->st_mtu); + } + + if(unlikely(d->do_packets == CONFIG_BOOLEAN_AUTO && + (d->rpackets || d->tpackets || d->rmulticast || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_packets = CONFIG_BOOLEAN_YES; + + if(d->do_packets == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_packets)) { + + d->st_packets = rrdset_create_localhost( + d->chart_type_net_packets + , d->chart_id_net_packets + , NULL + , d->chart_family + , d->chart_ctx_net_packets + , "Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_packets, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_packets, d->chart_labels); + + d->rd_rpackets = rrddim_add(d->st_packets, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_tpackets = rrddim_add(d->st_packets, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_rmulticast = rrddim_add(d->st_packets, "multicast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(d->flipped) { + // flip receive/transmit + + RRDDIM *td = d->rd_rpackets; + d->rd_rpackets = d->rd_tpackets; + d->rd_tpackets = td; + } + } + + rrddim_set_by_pointer(d->st_packets, d->rd_rpackets, (collected_number)d->rpackets); + rrddim_set_by_pointer(d->st_packets, d->rd_tpackets, (collected_number)d->tpackets); + rrddim_set_by_pointer(d->st_packets, d->rd_rmulticast, (collected_number)d->rmulticast); + rrdset_done(d->st_packets); + } + + if(unlikely(d->do_errors == CONFIG_BOOLEAN_AUTO && + (d->rerrors || d->terrors || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_errors = CONFIG_BOOLEAN_YES; + + if(d->do_errors == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_errors)) { + + d->st_errors = rrdset_create_localhost( + d->chart_type_net_errors + , d->chart_id_net_errors + , NULL + , d->chart_family + , d->chart_ctx_net_errors + , "Interface Errors" + , "errors/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 2 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_errors, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_errors, d->chart_labels); + + d->rd_rerrors = rrddim_add(d->st_errors, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_terrors = rrddim_add(d->st_errors, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(d->flipped) { + // flip receive/transmit + + RRDDIM *td = d->rd_rerrors; + d->rd_rerrors = d->rd_terrors; + d->rd_terrors = td; + } + } + + rrddim_set_by_pointer(d->st_errors, d->rd_rerrors, (collected_number)d->rerrors); + rrddim_set_by_pointer(d->st_errors, d->rd_terrors, (collected_number)d->terrors); + rrdset_done(d->st_errors); + } + + if(unlikely(d->do_drops == CONFIG_BOOLEAN_AUTO && + (d->rdrops || d->tdrops || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_drops = CONFIG_BOOLEAN_YES; + + if(d->do_drops == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_drops)) { + + d->st_drops = rrdset_create_localhost( + d->chart_type_net_drops + , d->chart_id_net_drops + , NULL + , d->chart_family + , d->chart_ctx_net_drops + , "Interface Drops" + , "drops/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 3 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_drops, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_drops, d->chart_labels); + + d->rd_rdrops = rrddim_add(d->st_drops, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_tdrops = rrddim_add(d->st_drops, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(d->flipped) { + // flip receive/transmit + + RRDDIM *td = d->rd_rdrops; + d->rd_rdrops = d->rd_tdrops; + d->rd_tdrops = td; + } + } + + rrddim_set_by_pointer(d->st_drops, d->rd_rdrops, (collected_number)d->rdrops); + rrddim_set_by_pointer(d->st_drops, d->rd_tdrops, (collected_number)d->tdrops); + rrdset_done(d->st_drops); + } + + if(unlikely(d->do_fifo == CONFIG_BOOLEAN_AUTO && + (d->rfifo || d->tfifo || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_fifo = CONFIG_BOOLEAN_YES; + + if(d->do_fifo == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_fifo)) { + + d->st_fifo = rrdset_create_localhost( + d->chart_type_net_fifo + , d->chart_id_net_fifo + , NULL + , d->chart_family + , d->chart_ctx_net_fifo + , "Interface FIFO Buffer Errors" + , "errors" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 4 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_fifo, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_fifo, d->chart_labels); + + d->rd_rfifo = rrddim_add(d->st_fifo, "receive", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_tfifo = rrddim_add(d->st_fifo, "transmit", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(d->flipped) { + // flip receive/transmit + + RRDDIM *td = d->rd_rfifo; + d->rd_rfifo = d->rd_tfifo; + d->rd_tfifo = td; + } + } + + rrddim_set_by_pointer(d->st_fifo, d->rd_rfifo, (collected_number)d->rfifo); + rrddim_set_by_pointer(d->st_fifo, d->rd_tfifo, (collected_number)d->tfifo); + rrdset_done(d->st_fifo); + } + + if(unlikely(d->do_compressed == CONFIG_BOOLEAN_AUTO && + (d->rcompressed || d->tcompressed || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_compressed = CONFIG_BOOLEAN_YES; + + if(d->do_compressed == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_compressed)) { + + d->st_compressed = rrdset_create_localhost( + d->chart_type_net_compressed + , d->chart_id_net_compressed + , NULL + , d->chart_family + , d->chart_ctx_net_compressed + , "Compressed Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 5 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_compressed, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_compressed, d->chart_labels); + + d->rd_rcompressed = rrddim_add(d->st_compressed, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_tcompressed = rrddim_add(d->st_compressed, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(d->flipped) { + // flip receive/transmit + + RRDDIM *td = d->rd_rcompressed; + d->rd_rcompressed = d->rd_tcompressed; + d->rd_tcompressed = td; + } + } + + rrddim_set_by_pointer(d->st_compressed, d->rd_rcompressed, (collected_number)d->rcompressed); + rrddim_set_by_pointer(d->st_compressed, d->rd_tcompressed, (collected_number)d->tcompressed); + rrdset_done(d->st_compressed); + } + + if(unlikely(d->do_events == CONFIG_BOOLEAN_AUTO && + (d->rframe || d->tcollisions || d->tcarrier || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + d->do_events = CONFIG_BOOLEAN_YES; + + if(d->do_events == CONFIG_BOOLEAN_YES) { + if(unlikely(!d->st_events)) { + + d->st_events = rrdset_create_localhost( + d->chart_type_net_events + , d->chart_id_net_events + , NULL + , d->chart_family + , d->chart_ctx_net_events + , "Network Interface Events" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , d->priority + 6 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_events, RRDSET_FLAG_DETAIL); + + rrdset_update_rrdlabels(d->st_events, d->chart_labels); + + d->rd_rframe = rrddim_add(d->st_events, "frames", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_tcollisions = rrddim_add(d->st_events, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_tcarrier = rrddim_add(d->st_events, "carrier", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(d->st_events, d->rd_rframe, (collected_number)d->rframe); + rrddim_set_by_pointer(d->st_events, d->rd_tcollisions, (collected_number)d->tcollisions); + rrddim_set_by_pointer(d->st_events, d->rd_tcarrier, (collected_number)d->tcarrier); + rrdset_done(d->st_events); + } + } + + if(do_bandwidth == CONFIG_BOOLEAN_YES || (do_bandwidth == CONFIG_BOOLEAN_AUTO && + (system_rbytes || system_tbytes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_bandwidth = CONFIG_BOOLEAN_YES; + static RRDSET *st_system_net = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_system_net)) { + st_system_net = rrdset_create_localhost( + "system" + , "net" + , NULL + , "network" + , NULL + , "Physical Network Interfaces Aggregated Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETDEV_NAME + , NETDATA_CHART_PRIO_SYSTEM_NET + , update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_system_net, "InOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_system_net, "OutOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_system_net, rd_in, (collected_number)system_rbytes); + rrddim_set_by_pointer(st_system_net, rd_out, (collected_number)system_tbytes); + + rrdset_done(st_system_net); + } + + netdev_cleanup(); + + return 0; +} + +static void netdev_main_cleanup(void *ptr) +{ + UNUSED(ptr); + + info("cleaning up..."); + + worker_unregister(); +} + +void *netdev_main(void *ptr) +{ + worker_register("NETDEV"); + worker_register_job_name(0, "netdev"); + + netdata_thread_cleanup_push(netdev_main_cleanup, ptr); + + usec_t step = localhost->rrd_update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + usec_t hb_dt = heartbeat_next(&hb, step); + + if (unlikely(netdata_exit)) + break; + + worker_is_busy(0); + if(do_proc_net_dev(localhost->rrd_update_every, hb_dt)) + break; + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/proc.plugin/proc_net_ip_vs_stats.c b/collectors/proc.plugin/proc_net_ip_vs_stats.c new file mode 100644 index 0000000..2b9c933 --- /dev/null +++ b/collectors/proc.plugin/proc_net_ip_vs_stats.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define RRD_TYPE_NET_IPVS "ipvs" +#define PLUGIN_PROC_MODULE_NET_IPVS_NAME "/proc/net/ip_vs_stats" +#define CONFIG_SECTION_PLUGIN_PROC_NET_IPVS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NET_IPVS_NAME + +int do_proc_net_ip_vs_stats(int update_every, usec_t dt) { + (void)dt; + static int do_bandwidth = -1, do_sockets = -1, do_packets = -1; + static procfile *ff = NULL; + + if(do_bandwidth == -1) do_bandwidth = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NET_IPVS, "IPVS bandwidth", 1); + if(do_sockets == -1) do_sockets = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NET_IPVS, "IPVS connections", 1); + if(do_packets == -1) do_packets = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NET_IPVS, "IPVS packets", 1); + + if(!ff) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/ip_vs_stats"); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_NET_IPVS, "filename to monitor", filename), " \t,:|", PROCFILE_FLAG_DEFAULT); + } + if(!ff) return 1; + + ff = procfile_readall(ff); + if(!ff) return 0; // we return 0, so that we will retry to open it next time + + // make sure we have 3 lines + if(procfile_lines(ff) < 3) return 1; + + // make sure we have 5 words on the 3rd line + if(procfile_linewords(ff, 2) < 5) return 1; + + unsigned long long entries, InPackets, OutPackets, InBytes, OutBytes; + + entries = strtoull(procfile_lineword(ff, 2, 0), NULL, 16); + InPackets = strtoull(procfile_lineword(ff, 2, 1), NULL, 16); + OutPackets = strtoull(procfile_lineword(ff, 2, 2), NULL, 16); + InBytes = strtoull(procfile_lineword(ff, 2, 3), NULL, 16); + OutBytes = strtoull(procfile_lineword(ff, 2, 4), NULL, 16); + + if(do_sockets) { + static RRDSET *st = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_IPVS + , "sockets" + , NULL + , RRD_TYPE_NET_IPVS + , NULL + , "IPVS New Connections" + , "connections/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_IPVS_NAME + , NETDATA_CHART_PRIO_IPVS_SOCKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "connections", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "connections", entries); + rrdset_done(st); + } + + if(do_packets) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_IPVS + , "packets" + , NULL + , RRD_TYPE_NET_IPVS + , NULL + , "IPVS Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_IPVS_NAME + , NETDATA_CHART_PRIO_IPVS_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "received", InPackets); + rrddim_set(st, "sent", OutPackets); + rrdset_done(st); + } + + if(do_bandwidth) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_IPVS + , "net" + , NULL + , RRD_TYPE_NET_IPVS + , NULL + , "IPVS Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_IPVS_NAME + , NETDATA_CHART_PRIO_IPVS_NET + , update_every + , RRDSET_TYPE_AREA + ); + + rrddim_add(st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "received", InBytes); + rrddim_set(st, "sent", OutBytes); + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_netstat.c b/collectors/proc.plugin/proc_net_netstat.c new file mode 100644 index 0000000..f7635e3 --- /dev/null +++ b/collectors/proc.plugin/proc_net_netstat.c @@ -0,0 +1,3110 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define RRD_TYPE_NET_NETSTAT "ip" +#define RRD_TYPE_NET_SNMP "ipv4" +#define RRD_TYPE_NET_SNMP6 "ipv6" +#define PLUGIN_PROC_MODULE_NETSTAT_NAME "/proc/net/netstat" +#define CONFIG_SECTION_PLUGIN_PROC_NETSTAT "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETSTAT_NAME + +static struct proc_net_snmp { + // kernel_uint_t ip_Forwarding; + kernel_uint_t ip_DefaultTTL; + kernel_uint_t ip_InReceives; + kernel_uint_t ip_InHdrErrors; + kernel_uint_t ip_InAddrErrors; + kernel_uint_t ip_ForwDatagrams; + kernel_uint_t ip_InUnknownProtos; + kernel_uint_t ip_InDiscards; + kernel_uint_t ip_InDelivers; + kernel_uint_t ip_OutRequests; + kernel_uint_t ip_OutDiscards; + kernel_uint_t ip_OutNoRoutes; + kernel_uint_t ip_ReasmTimeout; + kernel_uint_t ip_ReasmReqds; + kernel_uint_t ip_ReasmOKs; + kernel_uint_t ip_ReasmFails; + kernel_uint_t ip_FragOKs; + kernel_uint_t ip_FragFails; + kernel_uint_t ip_FragCreates; + + kernel_uint_t icmp_InMsgs; + kernel_uint_t icmp_OutMsgs; + kernel_uint_t icmp_InErrors; + kernel_uint_t icmp_OutErrors; + kernel_uint_t icmp_InCsumErrors; + + kernel_uint_t icmpmsg_InEchoReps; + kernel_uint_t icmpmsg_OutEchoReps; + kernel_uint_t icmpmsg_InDestUnreachs; + kernel_uint_t icmpmsg_OutDestUnreachs; + kernel_uint_t icmpmsg_InRedirects; + kernel_uint_t icmpmsg_OutRedirects; + kernel_uint_t icmpmsg_InEchos; + kernel_uint_t icmpmsg_OutEchos; + kernel_uint_t icmpmsg_InRouterAdvert; + kernel_uint_t icmpmsg_OutRouterAdvert; + kernel_uint_t icmpmsg_InRouterSelect; + kernel_uint_t icmpmsg_OutRouterSelect; + kernel_uint_t icmpmsg_InTimeExcds; + kernel_uint_t icmpmsg_OutTimeExcds; + kernel_uint_t icmpmsg_InParmProbs; + kernel_uint_t icmpmsg_OutParmProbs; + kernel_uint_t icmpmsg_InTimestamps; + kernel_uint_t icmpmsg_OutTimestamps; + kernel_uint_t icmpmsg_InTimestampReps; + kernel_uint_t icmpmsg_OutTimestampReps; + + //kernel_uint_t tcp_RtoAlgorithm; + //kernel_uint_t tcp_RtoMin; + //kernel_uint_t tcp_RtoMax; + ssize_t tcp_MaxConn; + kernel_uint_t tcp_ActiveOpens; + kernel_uint_t tcp_PassiveOpens; + kernel_uint_t tcp_AttemptFails; + kernel_uint_t tcp_EstabResets; + kernel_uint_t tcp_CurrEstab; + kernel_uint_t tcp_InSegs; + kernel_uint_t tcp_OutSegs; + kernel_uint_t tcp_RetransSegs; + kernel_uint_t tcp_InErrs; + kernel_uint_t tcp_OutRsts; + kernel_uint_t tcp_InCsumErrors; + + kernel_uint_t udp_InDatagrams; + kernel_uint_t udp_NoPorts; + kernel_uint_t udp_InErrors; + kernel_uint_t udp_OutDatagrams; + kernel_uint_t udp_RcvbufErrors; + kernel_uint_t udp_SndbufErrors; + kernel_uint_t udp_InCsumErrors; + kernel_uint_t udp_IgnoredMulti; + + kernel_uint_t udplite_InDatagrams; + kernel_uint_t udplite_NoPorts; + kernel_uint_t udplite_InErrors; + kernel_uint_t udplite_OutDatagrams; + kernel_uint_t udplite_RcvbufErrors; + kernel_uint_t udplite_SndbufErrors; + kernel_uint_t udplite_InCsumErrors; + kernel_uint_t udplite_IgnoredMulti; +} snmp_root = { 0 }; + +static void parse_line_pair(procfile *ff_netstat, ARL_BASE *base, size_t header_line, size_t values_line) { + size_t hwords = procfile_linewords(ff_netstat, header_line); + size_t vwords = procfile_linewords(ff_netstat, values_line); + size_t w; + + if(unlikely(vwords > hwords)) { + error("File /proc/net/netstat on header line %zu has %zu words, but on value line %zu has %zu words.", header_line, hwords, values_line, vwords); + vwords = hwords; + } + + for(w = 1; w < vwords ;w++) { + if(unlikely(arl_check(base, procfile_lineword(ff_netstat, header_line, w), procfile_lineword(ff_netstat, values_line, w)))) + break; + } +} + +int do_proc_net_netstat(int update_every, usec_t dt) { + (void)dt; + + static int do_bandwidth = -1, do_inerrors = -1, do_mcast = -1, do_bcast = -1, do_mcast_p = -1, do_bcast_p = -1, do_ecn = -1, \ + do_tcpext_reorder = -1, do_tcpext_syscookies = -1, do_tcpext_ofo = -1, do_tcpext_connaborts = -1, do_tcpext_memory = -1, + do_tcpext_syn_queue = -1, do_tcpext_accept_queue = -1; + + static int do_ip_packets = -1, do_ip_fragsout = -1, do_ip_fragsin = -1, do_ip_errors = -1, + do_tcp_sockets = -1, do_tcp_packets = -1, do_tcp_errors = -1, do_tcp_handshake = -1, do_tcp_opens = -1, + do_udp_packets = -1, do_udp_errors = -1, do_icmp_packets = -1, do_icmpmsg = -1, do_udplite_packets = -1; + + static int do_ip6_packets = -1, do_ip6_fragsout = -1, do_ip6_fragsin = -1, do_ip6_errors = -1, + do_ip6_udplite_packets = -1, do_ip6_udplite_errors = -1, do_ip6_udp_packets = -1, do_ip6_udp_errors = -1, + do_ip6_bandwidth = -1, do_ip6_mcast = -1, do_ip6_bcast = -1, do_ip6_mcast_p = -1, do_ip6_icmp = -1, + do_ip6_icmp_redir = -1, do_ip6_icmp_errors = -1, do_ip6_icmp_echos = -1, do_ip6_icmp_groupmemb = -1, + do_ip6_icmp_router = -1, do_ip6_icmp_neighbor = -1, do_ip6_icmp_mldv2 = -1, do_ip6_icmp_types = -1, + do_ip6_ect = -1; + + static uint32_t hash_ipext = 0, hash_tcpext = 0; + static uint32_t hash_ip = 0, hash_icmp = 0, hash_tcp = 0, hash_udp = 0, hash_icmpmsg = 0, hash_udplite = 0; + + static procfile *ff_netstat = NULL; + static procfile *ff_snmp = NULL; + static procfile *ff_snmp6 = NULL; + + static ARL_BASE *arl_tcpext = NULL; + static ARL_BASE *arl_ipext = NULL; + + static ARL_BASE *arl_ip = NULL; + static ARL_BASE *arl_icmp = NULL; + static ARL_BASE *arl_icmpmsg = NULL; + static ARL_BASE *arl_tcp = NULL; + static ARL_BASE *arl_udp = NULL; + static ARL_BASE *arl_udplite = NULL; + + static ARL_BASE *arl_ipv6 = NULL; + + static const RRDVAR_ACQUIRED *tcp_max_connections_var = NULL; + + // -------------------------------------------------------------------- + // IP + + // IP bandwidth + static unsigned long long ipext_InOctets = 0; + static unsigned long long ipext_OutOctets = 0; + + // IP input errors + static unsigned long long ipext_InNoRoutes = 0; + static unsigned long long ipext_InTruncatedPkts = 0; + static unsigned long long ipext_InCsumErrors = 0; + + // IP multicast bandwidth + static unsigned long long ipext_InMcastOctets = 0; + static unsigned long long ipext_OutMcastOctets = 0; + + // IP multicast packets + static unsigned long long ipext_InMcastPkts = 0; + static unsigned long long ipext_OutMcastPkts = 0; + + // IP broadcast bandwidth + static unsigned long long ipext_InBcastOctets = 0; + static unsigned long long ipext_OutBcastOctets = 0; + + // IP broadcast packets + static unsigned long long ipext_InBcastPkts = 0; + static unsigned long long ipext_OutBcastPkts = 0; + + // IP ECN + static unsigned long long ipext_InNoECTPkts = 0; + static unsigned long long ipext_InECT1Pkts = 0; + static unsigned long long ipext_InECT0Pkts = 0; + static unsigned long long ipext_InCEPkts = 0; + + // -------------------------------------------------------------------- + // IP TCP + + // IP TCP Reordering + static unsigned long long tcpext_TCPRenoReorder = 0; + static unsigned long long tcpext_TCPFACKReorder = 0; + static unsigned long long tcpext_TCPSACKReorder = 0; + static unsigned long long tcpext_TCPTSReorder = 0; + + // IP TCP SYN Cookies + static unsigned long long tcpext_SyncookiesSent = 0; + static unsigned long long tcpext_SyncookiesRecv = 0; + static unsigned long long tcpext_SyncookiesFailed = 0; + + // IP TCP Out Of Order Queue + // http://www.spinics.net/lists/netdev/msg204696.html + static unsigned long long tcpext_TCPOFOQueue = 0; // Number of packets queued in OFO queue + static unsigned long long tcpext_TCPOFODrop = 0; // Number of packets meant to be queued in OFO but dropped because socket rcvbuf limit hit. + static unsigned long long tcpext_TCPOFOMerge = 0; // Number of packets in OFO that were merged with other packets. + static unsigned long long tcpext_OfoPruned = 0; // packets dropped from out-of-order queue because of socket buffer overrun + + // IP TCP connection resets + // https://github.com/ecki/net-tools/blob/bd8bceaed2311651710331a7f8990c3e31be9840/statistics.c + static unsigned long long tcpext_TCPAbortOnData = 0; // connections reset due to unexpected data + static unsigned long long tcpext_TCPAbortOnClose = 0; // connections reset due to early user close + static unsigned long long tcpext_TCPAbortOnMemory = 0; // connections aborted due to memory pressure + static unsigned long long tcpext_TCPAbortOnTimeout = 0; // connections aborted due to timeout + static unsigned long long tcpext_TCPAbortOnLinger = 0; // connections aborted after user close in linger timeout + static unsigned long long tcpext_TCPAbortFailed = 0; // times unable to send RST due to no memory + + // https://perfchron.com/2015/12/26/investigating-linux-network-issues-with-netstat-and-nstat/ + static unsigned long long tcpext_ListenOverflows = 0; // times the listen queue of a socket overflowed + static unsigned long long tcpext_ListenDrops = 0; // SYNs to LISTEN sockets ignored + + // IP TCP memory pressures + static unsigned long long tcpext_TCPMemoryPressures = 0; + + static unsigned long long tcpext_TCPReqQFullDrop = 0; + static unsigned long long tcpext_TCPReqQFullDoCookies = 0; + + static unsigned long long tcpext_TCPSynRetrans = 0; + + // IPv6 + static unsigned long long Ip6InReceives = 0ULL; + static unsigned long long Ip6InHdrErrors = 0ULL; + static unsigned long long Ip6InTooBigErrors = 0ULL; + static unsigned long long Ip6InNoRoutes = 0ULL; + static unsigned long long Ip6InAddrErrors = 0ULL; + static unsigned long long Ip6InUnknownProtos = 0ULL; + static unsigned long long Ip6InTruncatedPkts = 0ULL; + static unsigned long long Ip6InDiscards = 0ULL; + static unsigned long long Ip6InDelivers = 0ULL; + static unsigned long long Ip6OutForwDatagrams = 0ULL; + static unsigned long long Ip6OutRequests = 0ULL; + static unsigned long long Ip6OutDiscards = 0ULL; + static unsigned long long Ip6OutNoRoutes = 0ULL; + static unsigned long long Ip6ReasmTimeout = 0ULL; + static unsigned long long Ip6ReasmReqds = 0ULL; + static unsigned long long Ip6ReasmOKs = 0ULL; + static unsigned long long Ip6ReasmFails = 0ULL; + static unsigned long long Ip6FragOKs = 0ULL; + static unsigned long long Ip6FragFails = 0ULL; + static unsigned long long Ip6FragCreates = 0ULL; + static unsigned long long Ip6InMcastPkts = 0ULL; + static unsigned long long Ip6OutMcastPkts = 0ULL; + static unsigned long long Ip6InOctets = 0ULL; + static unsigned long long Ip6OutOctets = 0ULL; + static unsigned long long Ip6InMcastOctets = 0ULL; + static unsigned long long Ip6OutMcastOctets = 0ULL; + static unsigned long long Ip6InBcastOctets = 0ULL; + static unsigned long long Ip6OutBcastOctets = 0ULL; + static unsigned long long Ip6InNoECTPkts = 0ULL; + static unsigned long long Ip6InECT1Pkts = 0ULL; + static unsigned long long Ip6InECT0Pkts = 0ULL; + static unsigned long long Ip6InCEPkts = 0ULL; + static unsigned long long Icmp6InMsgs = 0ULL; + static unsigned long long Icmp6InErrors = 0ULL; + static unsigned long long Icmp6OutMsgs = 0ULL; + static unsigned long long Icmp6OutErrors = 0ULL; + static unsigned long long Icmp6InCsumErrors = 0ULL; + static unsigned long long Icmp6InDestUnreachs = 0ULL; + static unsigned long long Icmp6InPktTooBigs = 0ULL; + static unsigned long long Icmp6InTimeExcds = 0ULL; + static unsigned long long Icmp6InParmProblems = 0ULL; + static unsigned long long Icmp6InEchos = 0ULL; + static unsigned long long Icmp6InEchoReplies = 0ULL; + static unsigned long long Icmp6InGroupMembQueries = 0ULL; + static unsigned long long Icmp6InGroupMembResponses = 0ULL; + static unsigned long long Icmp6InGroupMembReductions = 0ULL; + static unsigned long long Icmp6InRouterSolicits = 0ULL; + static unsigned long long Icmp6InRouterAdvertisements = 0ULL; + static unsigned long long Icmp6InNeighborSolicits = 0ULL; + static unsigned long long Icmp6InNeighborAdvertisements = 0ULL; + static unsigned long long Icmp6InRedirects = 0ULL; + static unsigned long long Icmp6InMLDv2Reports = 0ULL; + static unsigned long long Icmp6OutDestUnreachs = 0ULL; + static unsigned long long Icmp6OutPktTooBigs = 0ULL; + static unsigned long long Icmp6OutTimeExcds = 0ULL; + static unsigned long long Icmp6OutParmProblems = 0ULL; + static unsigned long long Icmp6OutEchos = 0ULL; + static unsigned long long Icmp6OutEchoReplies = 0ULL; + static unsigned long long Icmp6OutGroupMembQueries = 0ULL; + static unsigned long long Icmp6OutGroupMembResponses = 0ULL; + static unsigned long long Icmp6OutGroupMembReductions = 0ULL; + static unsigned long long Icmp6OutRouterSolicits = 0ULL; + static unsigned long long Icmp6OutRouterAdvertisements = 0ULL; + static unsigned long long Icmp6OutNeighborSolicits = 0ULL; + static unsigned long long Icmp6OutNeighborAdvertisements = 0ULL; + static unsigned long long Icmp6OutRedirects = 0ULL; + static unsigned long long Icmp6OutMLDv2Reports = 0ULL; + static unsigned long long Icmp6InType1 = 0ULL; + static unsigned long long Icmp6InType128 = 0ULL; + static unsigned long long Icmp6InType129 = 0ULL; + static unsigned long long Icmp6InType136 = 0ULL; + static unsigned long long Icmp6OutType1 = 0ULL; + static unsigned long long Icmp6OutType128 = 0ULL; + static unsigned long long Icmp6OutType129 = 0ULL; + static unsigned long long Icmp6OutType133 = 0ULL; + static unsigned long long Icmp6OutType135 = 0ULL; + static unsigned long long Icmp6OutType143 = 0ULL; + static unsigned long long Udp6InDatagrams = 0ULL; + static unsigned long long Udp6NoPorts = 0ULL; + static unsigned long long Udp6InErrors = 0ULL; + static unsigned long long Udp6OutDatagrams = 0ULL; + static unsigned long long Udp6RcvbufErrors = 0ULL; + static unsigned long long Udp6SndbufErrors = 0ULL; + static unsigned long long Udp6InCsumErrors = 0ULL; + static unsigned long long Udp6IgnoredMulti = 0ULL; + static unsigned long long UdpLite6InDatagrams = 0ULL; + static unsigned long long UdpLite6NoPorts = 0ULL; + static unsigned long long UdpLite6InErrors = 0ULL; + static unsigned long long UdpLite6OutDatagrams = 0ULL; + static unsigned long long UdpLite6RcvbufErrors = 0ULL; + static unsigned long long UdpLite6SndbufErrors = 0ULL; + static unsigned long long UdpLite6InCsumErrors = 0ULL; + + // prepare for /proc/net/netstat parsing + + if(unlikely(!arl_ipext)) { + hash_ipext = simple_hash("IpExt"); + hash_tcpext = simple_hash("TcpExt"); + + do_bandwidth = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "bandwidth", CONFIG_BOOLEAN_AUTO); + do_inerrors = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "input errors", CONFIG_BOOLEAN_AUTO); + do_mcast = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "multicast bandwidth", CONFIG_BOOLEAN_AUTO); + do_bcast = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "broadcast bandwidth", CONFIG_BOOLEAN_AUTO); + do_mcast_p = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "multicast packets", CONFIG_BOOLEAN_AUTO); + do_bcast_p = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "broadcast packets", CONFIG_BOOLEAN_AUTO); + do_ecn = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "ECN packets", CONFIG_BOOLEAN_AUTO); + + do_tcpext_reorder = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP reorders", CONFIG_BOOLEAN_AUTO); + do_tcpext_syscookies = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP SYN cookies", CONFIG_BOOLEAN_AUTO); + do_tcpext_ofo = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP out-of-order queue", CONFIG_BOOLEAN_AUTO); + do_tcpext_connaborts = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP connection aborts", CONFIG_BOOLEAN_AUTO); + do_tcpext_memory = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP memory pressures", CONFIG_BOOLEAN_AUTO); + + do_tcpext_syn_queue = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP SYN queue", CONFIG_BOOLEAN_AUTO); + do_tcpext_accept_queue = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "TCP accept queue", CONFIG_BOOLEAN_AUTO); + + arl_ipext = arl_create("netstat/ipext", NULL, 60); + arl_tcpext = arl_create("netstat/tcpext", NULL, 60); + + // -------------------------------------------------------------------- + // IP + + if(do_bandwidth != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InOctets", &ipext_InOctets); + arl_expect(arl_ipext, "OutOctets", &ipext_OutOctets); + } + + if(do_inerrors != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InNoRoutes", &ipext_InNoRoutes); + arl_expect(arl_ipext, "InTruncatedPkts", &ipext_InTruncatedPkts); + arl_expect(arl_ipext, "InCsumErrors", &ipext_InCsumErrors); + } + + if(do_mcast != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InMcastOctets", &ipext_InMcastOctets); + arl_expect(arl_ipext, "OutMcastOctets", &ipext_OutMcastOctets); + } + + if(do_mcast_p != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InMcastPkts", &ipext_InMcastPkts); + arl_expect(arl_ipext, "OutMcastPkts", &ipext_OutMcastPkts); + } + + if(do_bcast != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InBcastPkts", &ipext_InBcastPkts); + arl_expect(arl_ipext, "OutBcastPkts", &ipext_OutBcastPkts); + } + + if(do_bcast_p != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InBcastOctets", &ipext_InBcastOctets); + arl_expect(arl_ipext, "OutBcastOctets", &ipext_OutBcastOctets); + } + + if(do_ecn != CONFIG_BOOLEAN_NO) { + arl_expect(arl_ipext, "InNoECTPkts", &ipext_InNoECTPkts); + arl_expect(arl_ipext, "InECT1Pkts", &ipext_InECT1Pkts); + arl_expect(arl_ipext, "InECT0Pkts", &ipext_InECT0Pkts); + arl_expect(arl_ipext, "InCEPkts", &ipext_InCEPkts); + } + + // -------------------------------------------------------------------- + // IP TCP + + if(do_tcpext_reorder != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "TCPFACKReorder", &tcpext_TCPFACKReorder); + arl_expect(arl_tcpext, "TCPSACKReorder", &tcpext_TCPSACKReorder); + arl_expect(arl_tcpext, "TCPRenoReorder", &tcpext_TCPRenoReorder); + arl_expect(arl_tcpext, "TCPTSReorder", &tcpext_TCPTSReorder); + } + + if(do_tcpext_syscookies != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "SyncookiesSent", &tcpext_SyncookiesSent); + arl_expect(arl_tcpext, "SyncookiesRecv", &tcpext_SyncookiesRecv); + arl_expect(arl_tcpext, "SyncookiesFailed", &tcpext_SyncookiesFailed); + } + + if(do_tcpext_ofo != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "TCPOFOQueue", &tcpext_TCPOFOQueue); + arl_expect(arl_tcpext, "TCPOFODrop", &tcpext_TCPOFODrop); + arl_expect(arl_tcpext, "TCPOFOMerge", &tcpext_TCPOFOMerge); + arl_expect(arl_tcpext, "OfoPruned", &tcpext_OfoPruned); + } + + if(do_tcpext_connaborts != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "TCPAbortOnData", &tcpext_TCPAbortOnData); + arl_expect(arl_tcpext, "TCPAbortOnClose", &tcpext_TCPAbortOnClose); + arl_expect(arl_tcpext, "TCPAbortOnMemory", &tcpext_TCPAbortOnMemory); + arl_expect(arl_tcpext, "TCPAbortOnTimeout", &tcpext_TCPAbortOnTimeout); + arl_expect(arl_tcpext, "TCPAbortOnLinger", &tcpext_TCPAbortOnLinger); + arl_expect(arl_tcpext, "TCPAbortFailed", &tcpext_TCPAbortFailed); + } + + if(do_tcpext_memory != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "TCPMemoryPressures", &tcpext_TCPMemoryPressures); + } + + if(do_tcpext_accept_queue != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "ListenOverflows", &tcpext_ListenOverflows); + arl_expect(arl_tcpext, "ListenDrops", &tcpext_ListenDrops); + } + + if(do_tcpext_syn_queue != CONFIG_BOOLEAN_NO) { + arl_expect(arl_tcpext, "TCPReqQFullDrop", &tcpext_TCPReqQFullDrop); + arl_expect(arl_tcpext, "TCPReqQFullDoCookies", &tcpext_TCPReqQFullDoCookies); + } + + arl_expect(arl_tcpext, "TCPSynRetrans", &tcpext_TCPSynRetrans); + } + + // prepare for /proc/net/snmp parsing + + if(unlikely(!arl_ip)) { + do_ip_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 packets", CONFIG_BOOLEAN_AUTO); + do_ip_fragsout = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 fragments sent", CONFIG_BOOLEAN_AUTO); + do_ip_fragsin = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 fragments assembly", CONFIG_BOOLEAN_AUTO); + do_ip_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 errors", CONFIG_BOOLEAN_AUTO); + do_tcp_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 TCP connections", CONFIG_BOOLEAN_AUTO); + do_tcp_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 TCP packets", CONFIG_BOOLEAN_AUTO); + do_tcp_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 TCP errors", CONFIG_BOOLEAN_AUTO); + do_tcp_opens = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 TCP opens", CONFIG_BOOLEAN_AUTO); + do_tcp_handshake = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 TCP handshake issues", CONFIG_BOOLEAN_AUTO); + do_udp_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 UDP packets", CONFIG_BOOLEAN_AUTO); + do_udp_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 UDP errors", CONFIG_BOOLEAN_AUTO); + do_icmp_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 ICMP packets", CONFIG_BOOLEAN_AUTO); + do_icmpmsg = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 ICMP messages", CONFIG_BOOLEAN_AUTO); + do_udplite_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp", "ipv4 UDPLite packets", CONFIG_BOOLEAN_AUTO); + + hash_ip = simple_hash("Ip"); + hash_tcp = simple_hash("Tcp"); + hash_udp = simple_hash("Udp"); + hash_icmp = simple_hash("Icmp"); + hash_icmpmsg = simple_hash("IcmpMsg"); + hash_udplite = simple_hash("UdpLite"); + + arl_ip = arl_create("snmp/Ip", arl_callback_str2kernel_uint_t, 60); + // arl_expect(arl_ip, "Forwarding", &snmp_root.ip_Forwarding); + arl_expect(arl_ip, "DefaultTTL", &snmp_root.ip_DefaultTTL); + arl_expect(arl_ip, "InReceives", &snmp_root.ip_InReceives); + arl_expect(arl_ip, "InHdrErrors", &snmp_root.ip_InHdrErrors); + arl_expect(arl_ip, "InAddrErrors", &snmp_root.ip_InAddrErrors); + arl_expect(arl_ip, "ForwDatagrams", &snmp_root.ip_ForwDatagrams); + arl_expect(arl_ip, "InUnknownProtos", &snmp_root.ip_InUnknownProtos); + arl_expect(arl_ip, "InDiscards", &snmp_root.ip_InDiscards); + arl_expect(arl_ip, "InDelivers", &snmp_root.ip_InDelivers); + arl_expect(arl_ip, "OutRequests", &snmp_root.ip_OutRequests); + arl_expect(arl_ip, "OutDiscards", &snmp_root.ip_OutDiscards); + arl_expect(arl_ip, "OutNoRoutes", &snmp_root.ip_OutNoRoutes); + arl_expect(arl_ip, "ReasmTimeout", &snmp_root.ip_ReasmTimeout); + arl_expect(arl_ip, "ReasmReqds", &snmp_root.ip_ReasmReqds); + arl_expect(arl_ip, "ReasmOKs", &snmp_root.ip_ReasmOKs); + arl_expect(arl_ip, "ReasmFails", &snmp_root.ip_ReasmFails); + arl_expect(arl_ip, "FragOKs", &snmp_root.ip_FragOKs); + arl_expect(arl_ip, "FragFails", &snmp_root.ip_FragFails); + arl_expect(arl_ip, "FragCreates", &snmp_root.ip_FragCreates); + + arl_icmp = arl_create("snmp/Icmp", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_icmp, "InMsgs", &snmp_root.icmp_InMsgs); + arl_expect(arl_icmp, "OutMsgs", &snmp_root.icmp_OutMsgs); + arl_expect(arl_icmp, "InErrors", &snmp_root.icmp_InErrors); + arl_expect(arl_icmp, "OutErrors", &snmp_root.icmp_OutErrors); + arl_expect(arl_icmp, "InCsumErrors", &snmp_root.icmp_InCsumErrors); + + arl_icmpmsg = arl_create("snmp/Icmpmsg", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_icmpmsg, "InType0", &snmp_root.icmpmsg_InEchoReps); + arl_expect(arl_icmpmsg, "OutType0", &snmp_root.icmpmsg_OutEchoReps); + arl_expect(arl_icmpmsg, "InType3", &snmp_root.icmpmsg_InDestUnreachs); + arl_expect(arl_icmpmsg, "OutType3", &snmp_root.icmpmsg_OutDestUnreachs); + arl_expect(arl_icmpmsg, "InType5", &snmp_root.icmpmsg_InRedirects); + arl_expect(arl_icmpmsg, "OutType5", &snmp_root.icmpmsg_OutRedirects); + arl_expect(arl_icmpmsg, "InType8", &snmp_root.icmpmsg_InEchos); + arl_expect(arl_icmpmsg, "OutType8", &snmp_root.icmpmsg_OutEchos); + arl_expect(arl_icmpmsg, "InType9", &snmp_root.icmpmsg_InRouterAdvert); + arl_expect(arl_icmpmsg, "OutType9", &snmp_root.icmpmsg_OutRouterAdvert); + arl_expect(arl_icmpmsg, "InType10", &snmp_root.icmpmsg_InRouterSelect); + arl_expect(arl_icmpmsg, "OutType10", &snmp_root.icmpmsg_OutRouterSelect); + arl_expect(arl_icmpmsg, "InType11", &snmp_root.icmpmsg_InTimeExcds); + arl_expect(arl_icmpmsg, "OutType11", &snmp_root.icmpmsg_OutTimeExcds); + arl_expect(arl_icmpmsg, "InType12", &snmp_root.icmpmsg_InParmProbs); + arl_expect(arl_icmpmsg, "OutType12", &snmp_root.icmpmsg_OutParmProbs); + arl_expect(arl_icmpmsg, "InType13", &snmp_root.icmpmsg_InTimestamps); + arl_expect(arl_icmpmsg, "OutType13", &snmp_root.icmpmsg_OutTimestamps); + arl_expect(arl_icmpmsg, "InType14", &snmp_root.icmpmsg_InTimestampReps); + arl_expect(arl_icmpmsg, "OutType14", &snmp_root.icmpmsg_OutTimestampReps); + + arl_tcp = arl_create("snmp/Tcp", arl_callback_str2kernel_uint_t, 60); + // arl_expect(arl_tcp, "RtoAlgorithm", &snmp_root.tcp_RtoAlgorithm); + // arl_expect(arl_tcp, "RtoMin", &snmp_root.tcp_RtoMin); + // arl_expect(arl_tcp, "RtoMax", &snmp_root.tcp_RtoMax); + arl_expect_custom(arl_tcp, "MaxConn", arl_callback_ssize_t, &snmp_root.tcp_MaxConn); + arl_expect(arl_tcp, "ActiveOpens", &snmp_root.tcp_ActiveOpens); + arl_expect(arl_tcp, "PassiveOpens", &snmp_root.tcp_PassiveOpens); + arl_expect(arl_tcp, "AttemptFails", &snmp_root.tcp_AttemptFails); + arl_expect(arl_tcp, "EstabResets", &snmp_root.tcp_EstabResets); + arl_expect(arl_tcp, "CurrEstab", &snmp_root.tcp_CurrEstab); + arl_expect(arl_tcp, "InSegs", &snmp_root.tcp_InSegs); + arl_expect(arl_tcp, "OutSegs", &snmp_root.tcp_OutSegs); + arl_expect(arl_tcp, "RetransSegs", &snmp_root.tcp_RetransSegs); + arl_expect(arl_tcp, "InErrs", &snmp_root.tcp_InErrs); + arl_expect(arl_tcp, "OutRsts", &snmp_root.tcp_OutRsts); + arl_expect(arl_tcp, "InCsumErrors", &snmp_root.tcp_InCsumErrors); + + arl_udp = arl_create("snmp/Udp", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_udp, "InDatagrams", &snmp_root.udp_InDatagrams); + arl_expect(arl_udp, "NoPorts", &snmp_root.udp_NoPorts); + arl_expect(arl_udp, "InErrors", &snmp_root.udp_InErrors); + arl_expect(arl_udp, "OutDatagrams", &snmp_root.udp_OutDatagrams); + arl_expect(arl_udp, "RcvbufErrors", &snmp_root.udp_RcvbufErrors); + arl_expect(arl_udp, "SndbufErrors", &snmp_root.udp_SndbufErrors); + arl_expect(arl_udp, "InCsumErrors", &snmp_root.udp_InCsumErrors); + arl_expect(arl_udp, "IgnoredMulti", &snmp_root.udp_IgnoredMulti); + + arl_udplite = arl_create("snmp/Udplite", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_udplite, "InDatagrams", &snmp_root.udplite_InDatagrams); + arl_expect(arl_udplite, "NoPorts", &snmp_root.udplite_NoPorts); + arl_expect(arl_udplite, "InErrors", &snmp_root.udplite_InErrors); + arl_expect(arl_udplite, "OutDatagrams", &snmp_root.udplite_OutDatagrams); + arl_expect(arl_udplite, "RcvbufErrors", &snmp_root.udplite_RcvbufErrors); + arl_expect(arl_udplite, "SndbufErrors", &snmp_root.udplite_SndbufErrors); + arl_expect(arl_udplite, "InCsumErrors", &snmp_root.udplite_InCsumErrors); + arl_expect(arl_udplite, "IgnoredMulti", &snmp_root.udplite_IgnoredMulti); + + tcp_max_connections_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_max_connections"); + } + + // prepare for /proc/net/snmp6 parsing + + if(unlikely(!arl_ipv6)) { + do_ip6_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 packets", CONFIG_BOOLEAN_AUTO); + do_ip6_fragsout = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 fragments sent", CONFIG_BOOLEAN_AUTO); + do_ip6_fragsin = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 fragments assembly", CONFIG_BOOLEAN_AUTO); + do_ip6_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 errors", CONFIG_BOOLEAN_AUTO); + do_ip6_udp_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 UDP packets", CONFIG_BOOLEAN_AUTO); + do_ip6_udp_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 UDP errors", CONFIG_BOOLEAN_AUTO); + do_ip6_udplite_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 UDPlite packets", CONFIG_BOOLEAN_AUTO); + do_ip6_udplite_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ipv6 UDPlite errors", CONFIG_BOOLEAN_AUTO); + do_ip6_bandwidth = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "bandwidth", CONFIG_BOOLEAN_AUTO); + do_ip6_mcast = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "multicast bandwidth", CONFIG_BOOLEAN_AUTO); + do_ip6_bcast = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "broadcast bandwidth", CONFIG_BOOLEAN_AUTO); + do_ip6_mcast_p = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "multicast packets", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_redir = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp redirects", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp errors", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_echos = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp echos", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_groupmemb = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp group membership", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_router = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp router", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_neighbor = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp neighbor", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_mldv2 = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp mldv2", CONFIG_BOOLEAN_AUTO); + do_ip6_icmp_types = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "icmp types", CONFIG_BOOLEAN_AUTO); + do_ip6_ect = config_get_boolean_ondemand("plugin:proc:/proc/net/snmp6", "ect", CONFIG_BOOLEAN_AUTO); + + arl_ipv6 = arl_create("snmp6", NULL, 60); + arl_expect(arl_ipv6, "Ip6InReceives", &Ip6InReceives); + arl_expect(arl_ipv6, "Ip6InHdrErrors", &Ip6InHdrErrors); + arl_expect(arl_ipv6, "Ip6InTooBigErrors", &Ip6InTooBigErrors); + arl_expect(arl_ipv6, "Ip6InNoRoutes", &Ip6InNoRoutes); + arl_expect(arl_ipv6, "Ip6InAddrErrors", &Ip6InAddrErrors); + arl_expect(arl_ipv6, "Ip6InUnknownProtos", &Ip6InUnknownProtos); + arl_expect(arl_ipv6, "Ip6InTruncatedPkts", &Ip6InTruncatedPkts); + arl_expect(arl_ipv6, "Ip6InDiscards", &Ip6InDiscards); + arl_expect(arl_ipv6, "Ip6InDelivers", &Ip6InDelivers); + arl_expect(arl_ipv6, "Ip6OutForwDatagrams", &Ip6OutForwDatagrams); + arl_expect(arl_ipv6, "Ip6OutRequests", &Ip6OutRequests); + arl_expect(arl_ipv6, "Ip6OutDiscards", &Ip6OutDiscards); + arl_expect(arl_ipv6, "Ip6OutNoRoutes", &Ip6OutNoRoutes); + arl_expect(arl_ipv6, "Ip6ReasmTimeout", &Ip6ReasmTimeout); + arl_expect(arl_ipv6, "Ip6ReasmReqds", &Ip6ReasmReqds); + arl_expect(arl_ipv6, "Ip6ReasmOKs", &Ip6ReasmOKs); + arl_expect(arl_ipv6, "Ip6ReasmFails", &Ip6ReasmFails); + arl_expect(arl_ipv6, "Ip6FragOKs", &Ip6FragOKs); + arl_expect(arl_ipv6, "Ip6FragFails", &Ip6FragFails); + arl_expect(arl_ipv6, "Ip6FragCreates", &Ip6FragCreates); + arl_expect(arl_ipv6, "Ip6InMcastPkts", &Ip6InMcastPkts); + arl_expect(arl_ipv6, "Ip6OutMcastPkts", &Ip6OutMcastPkts); + arl_expect(arl_ipv6, "Ip6InOctets", &Ip6InOctets); + arl_expect(arl_ipv6, "Ip6OutOctets", &Ip6OutOctets); + arl_expect(arl_ipv6, "Ip6InMcastOctets", &Ip6InMcastOctets); + arl_expect(arl_ipv6, "Ip6OutMcastOctets", &Ip6OutMcastOctets); + arl_expect(arl_ipv6, "Ip6InBcastOctets", &Ip6InBcastOctets); + arl_expect(arl_ipv6, "Ip6OutBcastOctets", &Ip6OutBcastOctets); + arl_expect(arl_ipv6, "Ip6InNoECTPkts", &Ip6InNoECTPkts); + arl_expect(arl_ipv6, "Ip6InECT1Pkts", &Ip6InECT1Pkts); + arl_expect(arl_ipv6, "Ip6InECT0Pkts", &Ip6InECT0Pkts); + arl_expect(arl_ipv6, "Ip6InCEPkts", &Ip6InCEPkts); + arl_expect(arl_ipv6, "Icmp6InMsgs", &Icmp6InMsgs); + arl_expect(arl_ipv6, "Icmp6InErrors", &Icmp6InErrors); + arl_expect(arl_ipv6, "Icmp6OutMsgs", &Icmp6OutMsgs); + arl_expect(arl_ipv6, "Icmp6OutErrors", &Icmp6OutErrors); + arl_expect(arl_ipv6, "Icmp6InCsumErrors", &Icmp6InCsumErrors); + arl_expect(arl_ipv6, "Icmp6InDestUnreachs", &Icmp6InDestUnreachs); + arl_expect(arl_ipv6, "Icmp6InPktTooBigs", &Icmp6InPktTooBigs); + arl_expect(arl_ipv6, "Icmp6InTimeExcds", &Icmp6InTimeExcds); + arl_expect(arl_ipv6, "Icmp6InParmProblems", &Icmp6InParmProblems); + arl_expect(arl_ipv6, "Icmp6InEchos", &Icmp6InEchos); + arl_expect(arl_ipv6, "Icmp6InEchoReplies", &Icmp6InEchoReplies); + arl_expect(arl_ipv6, "Icmp6InGroupMembQueries", &Icmp6InGroupMembQueries); + arl_expect(arl_ipv6, "Icmp6InGroupMembResponses", &Icmp6InGroupMembResponses); + arl_expect(arl_ipv6, "Icmp6InGroupMembReductions", &Icmp6InGroupMembReductions); + arl_expect(arl_ipv6, "Icmp6InRouterSolicits", &Icmp6InRouterSolicits); + arl_expect(arl_ipv6, "Icmp6InRouterAdvertisements", &Icmp6InRouterAdvertisements); + arl_expect(arl_ipv6, "Icmp6InNeighborSolicits", &Icmp6InNeighborSolicits); + arl_expect(arl_ipv6, "Icmp6InNeighborAdvertisements", &Icmp6InNeighborAdvertisements); + arl_expect(arl_ipv6, "Icmp6InRedirects", &Icmp6InRedirects); + arl_expect(arl_ipv6, "Icmp6InMLDv2Reports", &Icmp6InMLDv2Reports); + arl_expect(arl_ipv6, "Icmp6OutDestUnreachs", &Icmp6OutDestUnreachs); + arl_expect(arl_ipv6, "Icmp6OutPktTooBigs", &Icmp6OutPktTooBigs); + arl_expect(arl_ipv6, "Icmp6OutTimeExcds", &Icmp6OutTimeExcds); + arl_expect(arl_ipv6, "Icmp6OutParmProblems", &Icmp6OutParmProblems); + arl_expect(arl_ipv6, "Icmp6OutEchos", &Icmp6OutEchos); + arl_expect(arl_ipv6, "Icmp6OutEchoReplies", &Icmp6OutEchoReplies); + arl_expect(arl_ipv6, "Icmp6OutGroupMembQueries", &Icmp6OutGroupMembQueries); + arl_expect(arl_ipv6, "Icmp6OutGroupMembResponses", &Icmp6OutGroupMembResponses); + arl_expect(arl_ipv6, "Icmp6OutGroupMembReductions", &Icmp6OutGroupMembReductions); + arl_expect(arl_ipv6, "Icmp6OutRouterSolicits", &Icmp6OutRouterSolicits); + arl_expect(arl_ipv6, "Icmp6OutRouterAdvertisements", &Icmp6OutRouterAdvertisements); + arl_expect(arl_ipv6, "Icmp6OutNeighborSolicits", &Icmp6OutNeighborSolicits); + arl_expect(arl_ipv6, "Icmp6OutNeighborAdvertisements", &Icmp6OutNeighborAdvertisements); + arl_expect(arl_ipv6, "Icmp6OutRedirects", &Icmp6OutRedirects); + arl_expect(arl_ipv6, "Icmp6OutMLDv2Reports", &Icmp6OutMLDv2Reports); + arl_expect(arl_ipv6, "Icmp6InType1", &Icmp6InType1); + arl_expect(arl_ipv6, "Icmp6InType128", &Icmp6InType128); + arl_expect(arl_ipv6, "Icmp6InType129", &Icmp6InType129); + arl_expect(arl_ipv6, "Icmp6InType136", &Icmp6InType136); + arl_expect(arl_ipv6, "Icmp6OutType1", &Icmp6OutType1); + arl_expect(arl_ipv6, "Icmp6OutType128", &Icmp6OutType128); + arl_expect(arl_ipv6, "Icmp6OutType129", &Icmp6OutType129); + arl_expect(arl_ipv6, "Icmp6OutType133", &Icmp6OutType133); + arl_expect(arl_ipv6, "Icmp6OutType135", &Icmp6OutType135); + arl_expect(arl_ipv6, "Icmp6OutType143", &Icmp6OutType143); + arl_expect(arl_ipv6, "Udp6InDatagrams", &Udp6InDatagrams); + arl_expect(arl_ipv6, "Udp6NoPorts", &Udp6NoPorts); + arl_expect(arl_ipv6, "Udp6InErrors", &Udp6InErrors); + arl_expect(arl_ipv6, "Udp6OutDatagrams", &Udp6OutDatagrams); + arl_expect(arl_ipv6, "Udp6RcvbufErrors", &Udp6RcvbufErrors); + arl_expect(arl_ipv6, "Udp6SndbufErrors", &Udp6SndbufErrors); + arl_expect(arl_ipv6, "Udp6InCsumErrors", &Udp6InCsumErrors); + arl_expect(arl_ipv6, "Udp6IgnoredMulti", &Udp6IgnoredMulti); + arl_expect(arl_ipv6, "UdpLite6InDatagrams", &UdpLite6InDatagrams); + arl_expect(arl_ipv6, "UdpLite6NoPorts", &UdpLite6NoPorts); + arl_expect(arl_ipv6, "UdpLite6InErrors", &UdpLite6InErrors); + arl_expect(arl_ipv6, "UdpLite6OutDatagrams", &UdpLite6OutDatagrams); + arl_expect(arl_ipv6, "UdpLite6RcvbufErrors", &UdpLite6RcvbufErrors); + arl_expect(arl_ipv6, "UdpLite6SndbufErrors", &UdpLite6SndbufErrors); + arl_expect(arl_ipv6, "UdpLite6InCsumErrors", &UdpLite6InCsumErrors); + } + + size_t lines, l, words; + + // parse /proc/net/netstat + + if(unlikely(!ff_netstat)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/netstat"); + ff_netstat = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_NETSTAT, "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff_netstat)) return 1; + } + + ff_netstat = procfile_readall(ff_netstat); + if(unlikely(!ff_netstat)) return 0; // we return 0, so that we will retry to open it next time + + lines = procfile_lines(ff_netstat); + + arl_begin(arl_ipext); + arl_begin(arl_tcpext); + + for(l = 0; l < lines ;l++) { + char *key = procfile_lineword(ff_netstat, l, 0); + uint32_t hash = simple_hash(key); + + if(unlikely(hash == hash_ipext && strcmp(key, "IpExt") == 0)) { + size_t h = l++; + + words = procfile_linewords(ff_netstat, l); + if(unlikely(words < 2)) { + error("Cannot read /proc/net/netstat IpExt line. Expected 2+ params, read %zu.", words); + continue; + } + + parse_line_pair(ff_netstat, arl_ipext, h, l); + + } + else if(unlikely(hash == hash_tcpext && strcmp(key, "TcpExt") == 0)) { + size_t h = l++; + + words = procfile_linewords(ff_netstat, l); + if(unlikely(words < 2)) { + error("Cannot read /proc/net/netstat TcpExt line. Expected 2+ params, read %zu.", words); + continue; + } + + parse_line_pair(ff_netstat, arl_tcpext, h, l); + } + } + + // parse /proc/net/snmp + + if(unlikely(!ff_snmp)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/snmp"); + ff_snmp = procfile_open(config_get("plugin:proc:/proc/net/snmp", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff_snmp)) return 1; + } + + ff_snmp = procfile_readall(ff_snmp); + if(unlikely(!ff_snmp)) return 0; // we return 0, so that we will retry to open it next time + + lines = procfile_lines(ff_snmp); + size_t w; + + for(l = 0; l < lines ;l++) { + char *key = procfile_lineword(ff_snmp, l, 0); + uint32_t hash = simple_hash(key); + + if(unlikely(hash == hash_ip && strcmp(key, "Ip") == 0)) { + size_t h = l++; + + if(strcmp(procfile_lineword(ff_snmp, l, 0), "Ip") != 0) { + error("Cannot read Ip line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff_snmp, l); + if(words < 3) { + error("Cannot read /proc/net/snmp Ip line. Expected 3+ params, read %zu.", words); + continue; + } + + arl_begin(arl_ip); + for(w = 1; w < words ; w++) { + if (unlikely(arl_check(arl_ip, procfile_lineword(ff_snmp, h, w), procfile_lineword(ff_snmp, l, w)) != 0)) + break; + } + } + else if(unlikely(hash == hash_icmp && strcmp(key, "Icmp") == 0)) { + size_t h = l++; + + if(strcmp(procfile_lineword(ff_snmp, l, 0), "Icmp") != 0) { + error("Cannot read Icmp line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff_snmp, l); + if(words < 3) { + error("Cannot read /proc/net/snmp Icmp line. Expected 3+ params, read %zu.", words); + continue; + } + + arl_begin(arl_icmp); + for(w = 1; w < words ; w++) { + if (unlikely(arl_check(arl_icmp, procfile_lineword(ff_snmp, h, w), procfile_lineword(ff_snmp, l, w)) != 0)) + break; + } + } + else if(unlikely(hash == hash_icmpmsg && strcmp(key, "IcmpMsg") == 0)) { + size_t h = l++; + + if(strcmp(procfile_lineword(ff_snmp, l, 0), "IcmpMsg") != 0) { + error("Cannot read IcmpMsg line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff_snmp, l); + if(words < 2) { + error("Cannot read /proc/net/snmp IcmpMsg line. Expected 2+ params, read %zu.", words); + continue; + } + + arl_begin(arl_icmpmsg); + for(w = 1; w < words ; w++) { + if (unlikely(arl_check(arl_icmpmsg, procfile_lineword(ff_snmp, h, w), procfile_lineword(ff_snmp, l, w)) != 0)) + break; + } + } + else if(unlikely(hash == hash_tcp && strcmp(key, "Tcp") == 0)) { + size_t h = l++; + + if(strcmp(procfile_lineword(ff_snmp, l, 0), "Tcp") != 0) { + error("Cannot read Tcp line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff_snmp, l); + if(words < 3) { + error("Cannot read /proc/net/snmp Tcp line. Expected 3+ params, read %zu.", words); + continue; + } + + arl_begin(arl_tcp); + for(w = 1; w < words ; w++) { + if (unlikely(arl_check(arl_tcp, procfile_lineword(ff_snmp, h, w), procfile_lineword(ff_snmp, l, w)) != 0)) + break; + } + } + else if(unlikely(hash == hash_udp && strcmp(key, "Udp") == 0)) { + size_t h = l++; + + if(strcmp(procfile_lineword(ff_snmp, l, 0), "Udp") != 0) { + error("Cannot read Udp line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff_snmp, l); + if(words < 3) { + error("Cannot read /proc/net/snmp Udp line. Expected 3+ params, read %zu.", words); + continue; + } + + arl_begin(arl_udp); + for(w = 1; w < words ; w++) { + if (unlikely(arl_check(arl_udp, procfile_lineword(ff_snmp, h, w), procfile_lineword(ff_snmp, l, w)) != 0)) + break; + } + } + else if(unlikely(hash == hash_udplite && strcmp(key, "UdpLite") == 0)) { + size_t h = l++; + + if(strcmp(procfile_lineword(ff_snmp, l, 0), "UdpLite") != 0) { + error("Cannot read UdpLite line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff_snmp, l); + if(words < 3) { + error("Cannot read /proc/net/snmp UdpLite line. Expected 3+ params, read %zu.", words); + continue; + } + + arl_begin(arl_udplite); + for(w = 1; w < words ; w++) { + if (unlikely(arl_check(arl_udplite, procfile_lineword(ff_snmp, h, w), procfile_lineword(ff_snmp, l, w)) != 0)) + break; + } + } + } + + // parse /proc/net/snmp + + if(unlikely(!ff_snmp6)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/snmp6"); + ff_snmp6 = procfile_open(config_get("plugin:proc:/proc/net/snmp6", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff_snmp6)) + return 1; + } + + ff_snmp6 = procfile_readall(ff_snmp6); + if(unlikely(!ff_snmp6)) + return 0; // we return 0, so that we will retry to open it next time + + lines = procfile_lines(ff_snmp6); + + arl_begin(arl_ipv6); + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff_snmp6, l); + if(unlikely(words < 2)) { + if(unlikely(words)) error("Cannot read /proc/net/snmp6 line %zu. Expected 2 params, read %zu.", l, words); + continue; + } + + if(unlikely(arl_check(arl_ipv6, + procfile_lineword(ff_snmp6, l, 0), + procfile_lineword(ff_snmp6, l, 1)))) break; + } + + // netstat IpExt charts + + if(do_bandwidth == CONFIG_BOOLEAN_YES || (do_bandwidth == CONFIG_BOOLEAN_AUTO && + (ipext_InOctets || + ipext_OutOctets || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_bandwidth = CONFIG_BOOLEAN_YES; + static RRDSET *st_system_ip = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_system_ip)) { + st_system_ip = rrdset_create_localhost( + "system" + , RRD_TYPE_NET_NETSTAT + , NULL + , "network" + , NULL + , "IP Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_IP + , update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_system_ip, "InOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_system_ip, "OutOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_system_ip, rd_in, ipext_InOctets); + rrddim_set_by_pointer(st_system_ip, rd_out, ipext_OutOctets); + rrdset_done(st_system_ip); + } + + if(do_inerrors == CONFIG_BOOLEAN_YES || (do_inerrors == CONFIG_BOOLEAN_AUTO && + (ipext_InNoRoutes || + ipext_InTruncatedPkts || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_inerrors = CONFIG_BOOLEAN_YES; + static RRDSET *st_ip_inerrors = NULL; + static RRDDIM *rd_noroutes = NULL, *rd_truncated = NULL, *rd_checksum = NULL; + + if(unlikely(!st_ip_inerrors)) { + st_ip_inerrors = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "inerrors" + , NULL + , "errors" + , NULL + , "IP Input Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_ip_inerrors, RRDSET_FLAG_DETAIL); + + rd_noroutes = rrddim_add(st_ip_inerrors, "InNoRoutes", "noroutes", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_truncated = rrddim_add(st_ip_inerrors, "InTruncatedPkts", "truncated", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_checksum = rrddim_add(st_ip_inerrors, "InCsumErrors", "checksum", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ip_inerrors, rd_noroutes, ipext_InNoRoutes); + rrddim_set_by_pointer(st_ip_inerrors, rd_truncated, ipext_InTruncatedPkts); + rrddim_set_by_pointer(st_ip_inerrors, rd_checksum, ipext_InCsumErrors); + rrdset_done(st_ip_inerrors); + } + + if(do_mcast == CONFIG_BOOLEAN_YES || (do_mcast == CONFIG_BOOLEAN_AUTO && + (ipext_InMcastOctets || + ipext_OutMcastOctets || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_mcast = CONFIG_BOOLEAN_YES; + static RRDSET *st_ip_mcast = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_ip_mcast)) { + st_ip_mcast = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "mcast" + , NULL + , "multicast" + , NULL + , "IP Multicast Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_MCAST + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(st_ip_mcast, RRDSET_FLAG_DETAIL); + + rd_in = rrddim_add(st_ip_mcast, "InMcastOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_ip_mcast, "OutMcastOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ip_mcast, rd_in, ipext_InMcastOctets); + rrddim_set_by_pointer(st_ip_mcast, rd_out, ipext_OutMcastOctets); + + rrdset_done(st_ip_mcast); + } + + // -------------------------------------------------------------------- + + if(do_bcast == CONFIG_BOOLEAN_YES || (do_bcast == CONFIG_BOOLEAN_AUTO && + (ipext_InBcastOctets || + ipext_OutBcastOctets || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_bcast = CONFIG_BOOLEAN_YES; + + static RRDSET *st_ip_bcast = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_ip_bcast)) { + st_ip_bcast = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "bcast" + , NULL + , "broadcast" + , NULL + , "IP Broadcast Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_BCAST + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_flag_set(st_ip_bcast, RRDSET_FLAG_DETAIL); + + rd_in = rrddim_add(st_ip_bcast, "InBcastOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_ip_bcast, "OutBcastOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ip_bcast, rd_in, ipext_InBcastOctets); + rrddim_set_by_pointer(st_ip_bcast, rd_out, ipext_OutBcastOctets); + + rrdset_done(st_ip_bcast); + } + + // -------------------------------------------------------------------- + + if(do_mcast_p == CONFIG_BOOLEAN_YES || (do_mcast_p == CONFIG_BOOLEAN_AUTO && + (ipext_InMcastPkts || + ipext_OutMcastPkts || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_mcast_p = CONFIG_BOOLEAN_YES; + + static RRDSET *st_ip_mcastpkts = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_ip_mcastpkts)) { + st_ip_mcastpkts = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "mcastpkts" + , NULL + , "multicast" + , NULL + , "IP Multicast Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_MCAST_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_ip_mcastpkts, RRDSET_FLAG_DETAIL); + + rd_in = rrddim_add(st_ip_mcastpkts, "InMcastPkts", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_ip_mcastpkts, "OutMcastPkts", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ip_mcastpkts, rd_in, ipext_InMcastPkts); + rrddim_set_by_pointer(st_ip_mcastpkts, rd_out, ipext_OutMcastPkts); + rrdset_done(st_ip_mcastpkts); + } + + if(do_bcast_p == CONFIG_BOOLEAN_YES || (do_bcast_p == CONFIG_BOOLEAN_AUTO && + (ipext_InBcastPkts || + ipext_OutBcastPkts || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_bcast_p = CONFIG_BOOLEAN_YES; + + static RRDSET *st_ip_bcastpkts = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_ip_bcastpkts)) { + st_ip_bcastpkts = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "bcastpkts" + , NULL + , "broadcast" + , NULL + , "IP Broadcast Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_BCAST_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_ip_bcastpkts, RRDSET_FLAG_DETAIL); + + rd_in = rrddim_add(st_ip_bcastpkts, "InBcastPkts", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_ip_bcastpkts, "OutBcastPkts", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ip_bcastpkts, rd_in, ipext_InBcastPkts); + rrddim_set_by_pointer(st_ip_bcastpkts, rd_out, ipext_OutBcastPkts); + rrdset_done(st_ip_bcastpkts); + } + + if(do_ecn == CONFIG_BOOLEAN_YES || (do_ecn == CONFIG_BOOLEAN_AUTO && + (ipext_InCEPkts || + ipext_InECT0Pkts || + ipext_InECT1Pkts || + ipext_InNoECTPkts || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ecn = CONFIG_BOOLEAN_YES; + + static RRDSET *st_ecnpkts = NULL; + static RRDDIM *rd_cep = NULL, *rd_noectp = NULL, *rd_ectp0 = NULL, *rd_ectp1 = NULL; + + if(unlikely(!st_ecnpkts)) { + st_ecnpkts = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "ecnpkts" + , NULL + , "ecn" + , NULL + , "IP ECN Statistics" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_ECN + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_ecnpkts, RRDSET_FLAG_DETAIL); + + rd_cep = rrddim_add(st_ecnpkts, "InCEPkts", "CEP", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_noectp = rrddim_add(st_ecnpkts, "InNoECTPkts", "NoECTP", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ectp0 = rrddim_add(st_ecnpkts, "InECT0Pkts", "ECTP0", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ectp1 = rrddim_add(st_ecnpkts, "InECT1Pkts", "ECTP1", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ecnpkts, rd_cep, ipext_InCEPkts); + rrddim_set_by_pointer(st_ecnpkts, rd_noectp, ipext_InNoECTPkts); + rrddim_set_by_pointer(st_ecnpkts, rd_ectp0, ipext_InECT0Pkts); + rrddim_set_by_pointer(st_ecnpkts, rd_ectp1, ipext_InECT1Pkts); + rrdset_done(st_ecnpkts); + } + + // netstat TcpExt charts + + if(do_tcpext_memory == CONFIG_BOOLEAN_YES || (do_tcpext_memory == CONFIG_BOOLEAN_AUTO && + (tcpext_TCPMemoryPressures || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_memory = CONFIG_BOOLEAN_YES; + + static RRDSET *st_tcpmemorypressures = NULL; + static RRDDIM *rd_pressures = NULL; + + if(unlikely(!st_tcpmemorypressures)) { + st_tcpmemorypressures = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcpmemorypressures" + , NULL + , "tcp" + , NULL + , "TCP Memory Pressures" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_MEM + , update_every + , RRDSET_TYPE_LINE + ); + + rd_pressures = rrddim_add(st_tcpmemorypressures, "TCPMemoryPressures", "pressures", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_tcpmemorypressures, rd_pressures, tcpext_TCPMemoryPressures); + rrdset_done(st_tcpmemorypressures); + } + + if(do_tcpext_connaborts == CONFIG_BOOLEAN_YES || (do_tcpext_connaborts == CONFIG_BOOLEAN_AUTO && + (tcpext_TCPAbortOnData || + tcpext_TCPAbortOnClose || + tcpext_TCPAbortOnMemory || + tcpext_TCPAbortOnTimeout || + tcpext_TCPAbortOnLinger || + tcpext_TCPAbortFailed || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_connaborts = CONFIG_BOOLEAN_YES; + + static RRDSET *st_tcpconnaborts = NULL; + static RRDDIM *rd_baddata = NULL, *rd_userclosed = NULL, *rd_nomemory = NULL, *rd_timeout = NULL, *rd_linger = NULL, *rd_failed = NULL; + + if(unlikely(!st_tcpconnaborts)) { + st_tcpconnaborts = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcpconnaborts" + , NULL + , "tcp" + , NULL + , "TCP Connection Aborts" + , "connections/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_CONNABORTS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_baddata = rrddim_add(st_tcpconnaborts, "TCPAbortOnData", "baddata", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_userclosed = rrddim_add(st_tcpconnaborts, "TCPAbortOnClose", "userclosed", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_nomemory = rrddim_add(st_tcpconnaborts, "TCPAbortOnMemory", "nomemory", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_timeout = rrddim_add(st_tcpconnaborts, "TCPAbortOnTimeout", "timeout", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_linger = rrddim_add(st_tcpconnaborts, "TCPAbortOnLinger", "linger", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_tcpconnaborts, "TCPAbortFailed", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_tcpconnaborts, rd_baddata, tcpext_TCPAbortOnData); + rrddim_set_by_pointer(st_tcpconnaborts, rd_userclosed, tcpext_TCPAbortOnClose); + rrddim_set_by_pointer(st_tcpconnaborts, rd_nomemory, tcpext_TCPAbortOnMemory); + rrddim_set_by_pointer(st_tcpconnaborts, rd_timeout, tcpext_TCPAbortOnTimeout); + rrddim_set_by_pointer(st_tcpconnaborts, rd_linger, tcpext_TCPAbortOnLinger); + rrddim_set_by_pointer(st_tcpconnaborts, rd_failed, tcpext_TCPAbortFailed); + rrdset_done(st_tcpconnaborts); + } + + if(do_tcpext_reorder == CONFIG_BOOLEAN_YES || (do_tcpext_reorder == CONFIG_BOOLEAN_AUTO && + (tcpext_TCPRenoReorder || + tcpext_TCPFACKReorder || + tcpext_TCPSACKReorder || + tcpext_TCPTSReorder || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_reorder = CONFIG_BOOLEAN_YES; + + static RRDSET *st_tcpreorders = NULL; + static RRDDIM *rd_timestamp = NULL, *rd_sack = NULL, *rd_fack = NULL, *rd_reno = NULL; + + if(unlikely(!st_tcpreorders)) { + st_tcpreorders = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcpreorders" + , NULL + , "tcp" + , NULL + , "TCP Reordered Packets by Detection Method" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_REORDERS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_timestamp = rrddim_add(st_tcpreorders, "TCPTSReorder", "timestamp", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sack = rrddim_add(st_tcpreorders, "TCPSACKReorder", "sack", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fack = rrddim_add(st_tcpreorders, "TCPFACKReorder", "fack", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_reno = rrddim_add(st_tcpreorders, "TCPRenoReorder", "reno", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_tcpreorders, rd_timestamp, tcpext_TCPTSReorder); + rrddim_set_by_pointer(st_tcpreorders, rd_sack, tcpext_TCPSACKReorder); + rrddim_set_by_pointer(st_tcpreorders, rd_fack, tcpext_TCPFACKReorder); + rrddim_set_by_pointer(st_tcpreorders, rd_reno, tcpext_TCPRenoReorder); + rrdset_done(st_tcpreorders); + } + + // -------------------------------------------------------------------- + + if(do_tcpext_ofo == CONFIG_BOOLEAN_YES || (do_tcpext_ofo == CONFIG_BOOLEAN_AUTO && + (tcpext_TCPOFOQueue || + tcpext_TCPOFODrop || + tcpext_TCPOFOMerge || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_ofo = CONFIG_BOOLEAN_YES; + + static RRDSET *st_ip_tcpofo = NULL; + static RRDDIM *rd_inqueue = NULL, *rd_dropped = NULL, *rd_merged = NULL, *rd_pruned = NULL; + + if(unlikely(!st_ip_tcpofo)) { + + st_ip_tcpofo = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcpofo" + , NULL + , "tcp" + , NULL + , "TCP Out-Of-Order Queue" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_OFO + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inqueue = rrddim_add(st_ip_tcpofo, "TCPOFOQueue", "inqueue", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_dropped = rrddim_add(st_ip_tcpofo, "TCPOFODrop", "dropped", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_merged = rrddim_add(st_ip_tcpofo, "TCPOFOMerge", "merged", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pruned = rrddim_add(st_ip_tcpofo, "OfoPruned", "pruned", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ip_tcpofo, rd_inqueue, tcpext_TCPOFOQueue); + rrddim_set_by_pointer(st_ip_tcpofo, rd_dropped, tcpext_TCPOFODrop); + rrddim_set_by_pointer(st_ip_tcpofo, rd_merged, tcpext_TCPOFOMerge); + rrddim_set_by_pointer(st_ip_tcpofo, rd_pruned, tcpext_OfoPruned); + rrdset_done(st_ip_tcpofo); + } + + if(do_tcpext_syscookies == CONFIG_BOOLEAN_YES || (do_tcpext_syscookies == CONFIG_BOOLEAN_AUTO && + (tcpext_SyncookiesSent || + tcpext_SyncookiesRecv || + tcpext_SyncookiesFailed || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_syscookies = CONFIG_BOOLEAN_YES; + + static RRDSET *st_syncookies = NULL; + static RRDDIM *rd_received = NULL, *rd_sent = NULL, *rd_failed = NULL; + + if(unlikely(!st_syncookies)) { + + st_syncookies = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcpsyncookies" + , NULL + , "tcp" + , NULL + , "TCP SYN Cookies" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_SYNCOOKIES + , update_every + , RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st_syncookies, "SyncookiesRecv", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st_syncookies, "SyncookiesSent", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_syncookies, "SyncookiesFailed", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_syncookies, rd_received, tcpext_SyncookiesRecv); + rrddim_set_by_pointer(st_syncookies, rd_sent, tcpext_SyncookiesSent); + rrddim_set_by_pointer(st_syncookies, rd_failed, tcpext_SyncookiesFailed); + rrdset_done(st_syncookies); + } + + if(do_tcpext_syn_queue == CONFIG_BOOLEAN_YES || (do_tcpext_syn_queue == CONFIG_BOOLEAN_AUTO && + (tcpext_TCPReqQFullDrop || + tcpext_TCPReqQFullDoCookies || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_syn_queue = CONFIG_BOOLEAN_YES; + + static RRDSET *st_syn_queue = NULL; + static RRDDIM + *rd_TCPReqQFullDrop = NULL, + *rd_TCPReqQFullDoCookies = NULL; + + if(unlikely(!st_syn_queue)) { + + st_syn_queue = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcp_syn_queue" + , NULL + , "tcp" + , NULL + , "TCP SYN Queue Issues" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_SYN_QUEUE + , update_every + , RRDSET_TYPE_LINE + ); + + rd_TCPReqQFullDrop = rrddim_add(st_syn_queue, "TCPReqQFullDrop", "drops", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_TCPReqQFullDoCookies = rrddim_add(st_syn_queue, "TCPReqQFullDoCookies", "cookies", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_syn_queue, rd_TCPReqQFullDrop, tcpext_TCPReqQFullDrop); + rrddim_set_by_pointer(st_syn_queue, rd_TCPReqQFullDoCookies, tcpext_TCPReqQFullDoCookies); + rrdset_done(st_syn_queue); + } + + if(do_tcpext_accept_queue == CONFIG_BOOLEAN_YES || (do_tcpext_accept_queue == CONFIG_BOOLEAN_AUTO && + (tcpext_ListenOverflows || + tcpext_ListenDrops || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcpext_accept_queue = CONFIG_BOOLEAN_YES; + + static RRDSET *st_accept_queue = NULL; + static RRDDIM *rd_overflows = NULL, + *rd_drops = NULL; + + if(unlikely(!st_accept_queue)) { + + st_accept_queue = rrdset_create_localhost( + RRD_TYPE_NET_NETSTAT + , "tcp_accept_queue" + , NULL + , "tcp" + , NULL + , "TCP Accept Queue Issues" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IP_TCP_ACCEPT_QUEUE + , update_every + , RRDSET_TYPE_LINE + ); + + rd_overflows = rrddim_add(st_accept_queue, "ListenOverflows", "overflows", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_drops = rrddim_add(st_accept_queue, "ListenDrops", "drops", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_accept_queue, rd_overflows, tcpext_ListenOverflows); + rrddim_set_by_pointer(st_accept_queue, rd_drops, tcpext_ListenDrops); + rrdset_done(st_accept_queue); + } + + // snmp Ip charts + + if(do_ip_packets == CONFIG_BOOLEAN_YES || (do_ip_packets == CONFIG_BOOLEAN_AUTO && + (snmp_root.ip_OutRequests || + snmp_root.ip_InReceives || + snmp_root.ip_ForwDatagrams || + snmp_root.ip_InDelivers || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip_packets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_InReceives = NULL, + *rd_OutRequests = NULL, + *rd_ForwDatagrams = NULL, + *rd_InDelivers = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "packets" + , NULL + , "packets" + , NULL + , "IPv4 Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InReceives = rrddim_add(st, "InReceives", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutRequests = rrddim_add(st, "OutRequests", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ForwDatagrams = rrddim_add(st, "ForwDatagrams", "forwarded", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InDelivers = rrddim_add(st, "InDelivers", "delivered", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_OutRequests, (collected_number)snmp_root.ip_OutRequests); + rrddim_set_by_pointer(st, rd_InReceives, (collected_number)snmp_root.ip_InReceives); + rrddim_set_by_pointer(st, rd_ForwDatagrams, (collected_number)snmp_root.ip_ForwDatagrams); + rrddim_set_by_pointer(st, rd_InDelivers, (collected_number)snmp_root.ip_InDelivers); + rrdset_done(st); + } + + if(do_ip_fragsout == CONFIG_BOOLEAN_YES || (do_ip_fragsout == CONFIG_BOOLEAN_AUTO && + (snmp_root.ip_FragOKs || + snmp_root.ip_FragFails || + snmp_root.ip_FragCreates || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip_fragsout = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_FragOKs = NULL, + *rd_FragFails = NULL, + *rd_FragCreates = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "fragsout" + , NULL + , "fragments" + , NULL + , "IPv4 Fragments Sent" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_FRAGMENTS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_FragOKs = rrddim_add(st, "FragOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_FragFails = rrddim_add(st, "FragFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_FragCreates = rrddim_add(st, "FragCreates", "created", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_FragOKs, (collected_number)snmp_root.ip_FragOKs); + rrddim_set_by_pointer(st, rd_FragFails, (collected_number)snmp_root.ip_FragFails); + rrddim_set_by_pointer(st, rd_FragCreates, (collected_number)snmp_root.ip_FragCreates); + rrdset_done(st); + } + + if(do_ip_fragsin == CONFIG_BOOLEAN_YES || (do_ip_fragsin == CONFIG_BOOLEAN_AUTO && + (snmp_root.ip_ReasmOKs || + snmp_root.ip_ReasmFails || + snmp_root.ip_ReasmReqds || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip_fragsin = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ReasmOKs = NULL, + *rd_ReasmFails = NULL, + *rd_ReasmReqds = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "fragsin" + , NULL + , "fragments" + , NULL + , "IPv4 Fragments Reassembly" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_FRAGMENTS + 1 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ReasmOKs = rrddim_add(st, "ReasmOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ReasmFails = rrddim_add(st, "ReasmFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ReasmReqds = rrddim_add(st, "ReasmReqds", "all", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ReasmOKs, (collected_number)snmp_root.ip_ReasmOKs); + rrddim_set_by_pointer(st, rd_ReasmFails, (collected_number)snmp_root.ip_ReasmFails); + rrddim_set_by_pointer(st, rd_ReasmReqds, (collected_number)snmp_root.ip_ReasmReqds); + rrdset_done(st); + } + + if(do_ip_errors == CONFIG_BOOLEAN_YES || (do_ip_errors == CONFIG_BOOLEAN_AUTO && + (snmp_root.ip_InDiscards || + snmp_root.ip_OutDiscards || + snmp_root.ip_InHdrErrors || + snmp_root.ip_InAddrErrors || + snmp_root.ip_InUnknownProtos || + snmp_root.ip_OutNoRoutes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip_errors = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_InDiscards = NULL, + *rd_OutDiscards = NULL, + *rd_InHdrErrors = NULL, + *rd_OutNoRoutes = NULL, + *rd_InAddrErrors = NULL, + *rd_InUnknownProtos = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "errors" + , NULL + , "errors" + , NULL + , "IPv4 Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_InDiscards = rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutDiscards = rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + rd_InHdrErrors = rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutNoRoutes = rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + rd_InAddrErrors = rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InUnknownProtos = rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InDiscards, (collected_number)snmp_root.ip_InDiscards); + rrddim_set_by_pointer(st, rd_OutDiscards, (collected_number)snmp_root.ip_OutDiscards); + rrddim_set_by_pointer(st, rd_InHdrErrors, (collected_number)snmp_root.ip_InHdrErrors); + rrddim_set_by_pointer(st, rd_InAddrErrors, (collected_number)snmp_root.ip_InAddrErrors); + rrddim_set_by_pointer(st, rd_InUnknownProtos, (collected_number)snmp_root.ip_InUnknownProtos); + rrddim_set_by_pointer(st, rd_OutNoRoutes, (collected_number)snmp_root.ip_OutNoRoutes); + rrdset_done(st); + } + + // snmp Icmp charts + + if(do_icmp_packets == CONFIG_BOOLEAN_YES || (do_icmp_packets == CONFIG_BOOLEAN_AUTO && + (snmp_root.icmp_InMsgs || + snmp_root.icmp_OutMsgs || + snmp_root.icmp_InErrors || + snmp_root.icmp_OutErrors || + snmp_root.icmp_InCsumErrors || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmp_packets = CONFIG_BOOLEAN_YES; + + { + static RRDSET *st_packets = NULL; + static RRDDIM *rd_InMsgs = NULL, + *rd_OutMsgs = NULL; + + if(unlikely(!st_packets)) { + st_packets = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "icmp" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_ICMP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InMsgs = rrddim_add(st_packets, "InMsgs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutMsgs = rrddim_add(st_packets, "OutMsgs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_packets, rd_InMsgs, (collected_number)snmp_root.icmp_InMsgs); + rrddim_set_by_pointer(st_packets, rd_OutMsgs, (collected_number)snmp_root.icmp_OutMsgs); + rrdset_done(st_packets); + } + + { + static RRDSET *st_errors = NULL; + static RRDDIM *rd_InErrors = NULL, + *rd_OutErrors = NULL, + *rd_InCsumErrors = NULL; + + if(unlikely(!st_errors)) { + st_errors = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "icmp_errors" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_ICMP + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InErrors = rrddim_add(st_errors, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutErrors = rrddim_add(st_errors, "OutErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st_errors, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_errors, rd_InErrors, (collected_number)snmp_root.icmp_InErrors); + rrddim_set_by_pointer(st_errors, rd_OutErrors, (collected_number)snmp_root.icmp_OutErrors); + rrddim_set_by_pointer(st_errors, rd_InCsumErrors, (collected_number)snmp_root.icmp_InCsumErrors); + rrdset_done(st_errors); + } + } + + // snmp IcmpMsg charts + + if(do_icmpmsg == CONFIG_BOOLEAN_YES || (do_icmpmsg == CONFIG_BOOLEAN_AUTO && + (snmp_root.icmpmsg_InEchoReps || + snmp_root.icmpmsg_OutEchoReps || + snmp_root.icmpmsg_InDestUnreachs || + snmp_root.icmpmsg_OutDestUnreachs || + snmp_root.icmpmsg_InRedirects || + snmp_root.icmpmsg_OutRedirects || + snmp_root.icmpmsg_InEchos || + snmp_root.icmpmsg_OutEchos || + snmp_root.icmpmsg_InRouterAdvert || + snmp_root.icmpmsg_OutRouterAdvert || + snmp_root.icmpmsg_InRouterSelect || + snmp_root.icmpmsg_OutRouterSelect || + snmp_root.icmpmsg_InTimeExcds || + snmp_root.icmpmsg_OutTimeExcds || + snmp_root.icmpmsg_InParmProbs || + snmp_root.icmpmsg_OutParmProbs || + snmp_root.icmpmsg_InTimestamps || + snmp_root.icmpmsg_OutTimestamps || + snmp_root.icmpmsg_InTimestampReps || + snmp_root.icmpmsg_OutTimestampReps || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_icmpmsg = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_InEchoReps = NULL, + *rd_OutEchoReps = NULL, + *rd_InDestUnreachs = NULL, + *rd_OutDestUnreachs = NULL, + *rd_InRedirects = NULL, + *rd_OutRedirects = NULL, + *rd_InEchos = NULL, + *rd_OutEchos = NULL, + *rd_InRouterAdvert = NULL, + *rd_OutRouterAdvert = NULL, + *rd_InRouterSelect = NULL, + *rd_OutRouterSelect = NULL, + *rd_InTimeExcds = NULL, + *rd_OutTimeExcds = NULL, + *rd_InParmProbs = NULL, + *rd_OutParmProbs = NULL, + *rd_InTimestamps = NULL, + *rd_OutTimestamps = NULL, + *rd_InTimestampReps = NULL, + *rd_OutTimestampReps = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "icmpmsg" + , NULL + , "icmp" + , NULL + , "IPv4 ICMP Messages" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_ICMP + 2 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InEchoReps = rrddim_add(st, "InType0", "InEchoReps", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutEchoReps = rrddim_add(st, "OutType0", "OutEchoReps", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InDestUnreachs = rrddim_add(st, "InType3", "InDestUnreachs", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutDestUnreachs = rrddim_add(st, "OutType3", "OutDestUnreachs", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InRedirects = rrddim_add(st, "InType5", "InRedirects", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutRedirects = rrddim_add(st, "OutType5", "OutRedirects", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InEchos = rrddim_add(st, "InType8", "InEchos", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutEchos = rrddim_add(st, "OutType8", "OutEchos", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InRouterAdvert = rrddim_add(st, "InType9", "InRouterAdvert", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutRouterAdvert = rrddim_add(st, "OutType9", "OutRouterAdvert", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InRouterSelect = rrddim_add(st, "InType10", "InRouterSelect", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutRouterSelect = rrddim_add(st, "OutType10", "OutRouterSelect", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InTimeExcds = rrddim_add(st, "InType11", "InTimeExcds", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutTimeExcds = rrddim_add(st, "OutType11", "OutTimeExcds", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InParmProbs = rrddim_add(st, "InType12", "InParmProbs", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutParmProbs = rrddim_add(st, "OutType12", "OutParmProbs", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InTimestamps = rrddim_add(st, "InType13", "InTimestamps", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutTimestamps = rrddim_add(st, "OutType13", "OutTimestamps", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InTimestampReps = rrddim_add(st, "InType14", "InTimestampReps", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutTimestampReps = rrddim_add(st, "OutType14", "OutTimestampReps", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InEchoReps, (collected_number)snmp_root.icmpmsg_InEchoReps); + rrddim_set_by_pointer(st, rd_OutEchoReps, (collected_number)snmp_root.icmpmsg_OutEchoReps); + rrddim_set_by_pointer(st, rd_InDestUnreachs, (collected_number)snmp_root.icmpmsg_InDestUnreachs); + rrddim_set_by_pointer(st, rd_OutDestUnreachs, (collected_number)snmp_root.icmpmsg_OutDestUnreachs); + rrddim_set_by_pointer(st, rd_InRedirects, (collected_number)snmp_root.icmpmsg_InRedirects); + rrddim_set_by_pointer(st, rd_OutRedirects, (collected_number)snmp_root.icmpmsg_OutRedirects); + rrddim_set_by_pointer(st, rd_InEchos, (collected_number)snmp_root.icmpmsg_InEchos); + rrddim_set_by_pointer(st, rd_OutEchos, (collected_number)snmp_root.icmpmsg_OutEchos); + rrddim_set_by_pointer(st, rd_InRouterAdvert, (collected_number)snmp_root.icmpmsg_InRouterAdvert); + rrddim_set_by_pointer(st, rd_OutRouterAdvert, (collected_number)snmp_root.icmpmsg_OutRouterAdvert); + rrddim_set_by_pointer(st, rd_InRouterSelect, (collected_number)snmp_root.icmpmsg_InRouterSelect); + rrddim_set_by_pointer(st, rd_OutRouterSelect, (collected_number)snmp_root.icmpmsg_OutRouterSelect); + rrddim_set_by_pointer(st, rd_InTimeExcds, (collected_number)snmp_root.icmpmsg_InTimeExcds); + rrddim_set_by_pointer(st, rd_OutTimeExcds, (collected_number)snmp_root.icmpmsg_OutTimeExcds); + rrddim_set_by_pointer(st, rd_InParmProbs, (collected_number)snmp_root.icmpmsg_InParmProbs); + rrddim_set_by_pointer(st, rd_OutParmProbs, (collected_number)snmp_root.icmpmsg_OutParmProbs); + rrddim_set_by_pointer(st, rd_InTimestamps, (collected_number)snmp_root.icmpmsg_InTimestamps); + rrddim_set_by_pointer(st, rd_OutTimestamps, (collected_number)snmp_root.icmpmsg_OutTimestamps); + rrddim_set_by_pointer(st, rd_InTimestampReps, (collected_number)snmp_root.icmpmsg_InTimestampReps); + rrddim_set_by_pointer(st, rd_OutTimestampReps, (collected_number)snmp_root.icmpmsg_OutTimestampReps); + + rrdset_done(st); + } + + // snmp Tcp charts + + // this is smart enough to update it, only when it is changed + rrdvar_custom_host_variable_set(localhost, tcp_max_connections_var, snmp_root.tcp_MaxConn); + + // see http://net-snmp.sourceforge.net/docs/mibs/tcp.html + if(do_tcp_sockets == CONFIG_BOOLEAN_YES || (do_tcp_sockets == CONFIG_BOOLEAN_AUTO && + (snmp_root.tcp_CurrEstab || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_CurrEstab = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "tcpsock" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Connections" + , "active connections" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_CurrEstab = rrddim_add(st, "CurrEstab", "connections", 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_CurrEstab, (collected_number)snmp_root.tcp_CurrEstab); + rrdset_done(st); + } + + if(do_tcp_packets == CONFIG_BOOLEAN_YES || (do_tcp_packets == CONFIG_BOOLEAN_AUTO && + (snmp_root.tcp_InSegs || + snmp_root.tcp_OutSegs || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_packets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_InSegs = NULL, + *rd_OutSegs = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "tcppackets" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP + 4 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InSegs = rrddim_add(st, "InSegs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutSegs = rrddim_add(st, "OutSegs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InSegs, (collected_number)snmp_root.tcp_InSegs); + rrddim_set_by_pointer(st, rd_OutSegs, (collected_number)snmp_root.tcp_OutSegs); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_tcp_errors == CONFIG_BOOLEAN_YES || (do_tcp_errors == CONFIG_BOOLEAN_AUTO && + (snmp_root.tcp_InErrs || + snmp_root.tcp_InCsumErrors || + snmp_root.tcp_RetransSegs || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_errors = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_InErrs = NULL, + *rd_InCsumErrors = NULL, + *rd_RetransSegs = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "tcperrors" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP + 20 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_InErrs = rrddim_add(st, "InErrs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_RetransSegs = rrddim_add(st, "RetransSegs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InErrs, (collected_number)snmp_root.tcp_InErrs); + rrddim_set_by_pointer(st, rd_InCsumErrors, (collected_number)snmp_root.tcp_InCsumErrors); + rrddim_set_by_pointer(st, rd_RetransSegs, (collected_number)snmp_root.tcp_RetransSegs); + rrdset_done(st); + } + + if(do_tcp_opens == CONFIG_BOOLEAN_YES || (do_tcp_opens == CONFIG_BOOLEAN_AUTO && + (snmp_root.tcp_ActiveOpens || + snmp_root.tcp_PassiveOpens || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_opens = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ActiveOpens = NULL, + *rd_PassiveOpens = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "tcpopens" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Opens" + , "connections/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP + 5 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ActiveOpens = rrddim_add(st, "ActiveOpens", "active", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_PassiveOpens = rrddim_add(st, "PassiveOpens", "passive", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ActiveOpens, (collected_number)snmp_root.tcp_ActiveOpens); + rrddim_set_by_pointer(st, rd_PassiveOpens, (collected_number)snmp_root.tcp_PassiveOpens); + rrdset_done(st); + } + + if(do_tcp_handshake == CONFIG_BOOLEAN_YES || (do_tcp_handshake == CONFIG_BOOLEAN_AUTO && + (snmp_root.tcp_EstabResets || + snmp_root.tcp_OutRsts || + snmp_root.tcp_AttemptFails || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_handshake = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_EstabResets = NULL, + *rd_OutRsts = NULL, + *rd_AttemptFails = NULL, + *rd_TCPSynRetrans = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "tcphandshake" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Handshake Issues" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP + 30 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_EstabResets = rrddim_add(st, "EstabResets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutRsts = rrddim_add(st, "OutRsts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_AttemptFails = rrddim_add(st, "AttemptFails", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_TCPSynRetrans = rrddim_add(st, "TCPSynRetrans", "SynRetrans", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_EstabResets, (collected_number)snmp_root.tcp_EstabResets); + rrddim_set_by_pointer(st, rd_OutRsts, (collected_number)snmp_root.tcp_OutRsts); + rrddim_set_by_pointer(st, rd_AttemptFails, (collected_number)snmp_root.tcp_AttemptFails); + rrddim_set_by_pointer(st, rd_TCPSynRetrans, tcpext_TCPSynRetrans); + rrdset_done(st); + } + + // snmp Udp charts + + // see http://net-snmp.sourceforge.net/docs/mibs/udp.html + if(do_udp_packets == CONFIG_BOOLEAN_YES || (do_udp_packets == CONFIG_BOOLEAN_AUTO && + (snmp_root.udp_InDatagrams || + snmp_root.udp_OutDatagrams || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udp_packets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_InDatagrams = NULL, + *rd_OutDatagrams = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "udppackets" + , NULL + , "udp" + , NULL + , "IPv4 UDP Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InDatagrams = rrddim_add(st, "InDatagrams", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutDatagrams = rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InDatagrams, (collected_number)snmp_root.udp_InDatagrams); + rrddim_set_by_pointer(st, rd_OutDatagrams, (collected_number)snmp_root.udp_OutDatagrams); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_udp_errors == CONFIG_BOOLEAN_YES || (do_udp_errors == CONFIG_BOOLEAN_AUTO && + (snmp_root.udp_InErrors || + snmp_root.udp_NoPorts || + snmp_root.udp_RcvbufErrors || + snmp_root.udp_SndbufErrors || + snmp_root.udp_InCsumErrors || + snmp_root.udp_IgnoredMulti || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udp_errors = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_RcvbufErrors = NULL, + *rd_SndbufErrors = NULL, + *rd_InErrors = NULL, + *rd_NoPorts = NULL, + *rd_InCsumErrors = NULL, + *rd_IgnoredMulti = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "udperrors" + , NULL + , "udp" + , NULL + , "IPv4 UDP Errors" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDP + 10 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_RcvbufErrors = rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_SndbufErrors = rrddim_add(st, "SndbufErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InErrors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_NoPorts = rrddim_add(st, "NoPorts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_IgnoredMulti = rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InErrors, (collected_number)snmp_root.udp_InErrors); + rrddim_set_by_pointer(st, rd_NoPorts, (collected_number)snmp_root.udp_NoPorts); + rrddim_set_by_pointer(st, rd_RcvbufErrors, (collected_number)snmp_root.udp_RcvbufErrors); + rrddim_set_by_pointer(st, rd_SndbufErrors, (collected_number)snmp_root.udp_SndbufErrors); + rrddim_set_by_pointer(st, rd_InCsumErrors, (collected_number)snmp_root.udp_InCsumErrors); + rrddim_set_by_pointer(st, rd_IgnoredMulti, (collected_number)snmp_root.udp_IgnoredMulti); + rrdset_done(st); + } + + // snmp UdpLite charts + + if(do_udplite_packets == CONFIG_BOOLEAN_YES || (do_udplite_packets == CONFIG_BOOLEAN_AUTO && + (snmp_root.udplite_InDatagrams || + snmp_root.udplite_OutDatagrams || + snmp_root.udplite_NoPorts || + snmp_root.udplite_InErrors || + snmp_root.udplite_InCsumErrors || + snmp_root.udplite_RcvbufErrors || + snmp_root.udplite_SndbufErrors || + snmp_root.udplite_IgnoredMulti || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udplite_packets = CONFIG_BOOLEAN_YES; + + { + static RRDSET *st = NULL; + static RRDDIM *rd_InDatagrams = NULL, + *rd_OutDatagrams = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "udplite" + , NULL + , "udplite" + , NULL + , "IPv4 UDPLite Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDPLITE + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InDatagrams = rrddim_add(st, "InDatagrams", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutDatagrams = rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InDatagrams, (collected_number)snmp_root.udplite_InDatagrams); + rrddim_set_by_pointer(st, rd_OutDatagrams, (collected_number)snmp_root.udplite_OutDatagrams); + rrdset_done(st); + } + + { + static RRDSET *st = NULL; + static RRDDIM *rd_RcvbufErrors = NULL, + *rd_SndbufErrors = NULL, + *rd_InErrors = NULL, + *rd_NoPorts = NULL, + *rd_InCsumErrors = NULL, + *rd_IgnoredMulti = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "udplite_errors" + , NULL + , "udplite" + , NULL + , "IPv4 UDPLite Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDPLITE + 10 + , update_every + , RRDSET_TYPE_LINE); + + rd_RcvbufErrors = rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_SndbufErrors = rrddim_add(st, "SndbufErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InErrors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_NoPorts = rrddim_add(st, "NoPorts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_IgnoredMulti = rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_NoPorts, (collected_number)snmp_root.udplite_NoPorts); + rrddim_set_by_pointer(st, rd_InErrors, (collected_number)snmp_root.udplite_InErrors); + rrddim_set_by_pointer(st, rd_InCsumErrors, (collected_number)snmp_root.udplite_InCsumErrors); + rrddim_set_by_pointer(st, rd_RcvbufErrors, (collected_number)snmp_root.udplite_RcvbufErrors); + rrddim_set_by_pointer(st, rd_SndbufErrors, (collected_number)snmp_root.udplite_SndbufErrors); + rrddim_set_by_pointer(st, rd_IgnoredMulti, (collected_number)snmp_root.udplite_IgnoredMulti); + rrdset_done(st); + } + } + + // snmp6 charts + + if(do_ip6_bandwidth == CONFIG_BOOLEAN_YES || (do_ip6_bandwidth == CONFIG_BOOLEAN_AUTO && + (Ip6InOctets || + Ip6OutOctets || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_bandwidth = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, + *rd_sent = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "ipv6" + , NULL + , "network" + , NULL + , "IPv6 Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_IPV6 + , update_every + , RRDSET_TYPE_AREA + ); + + rd_received = rrddim_add(st, "InOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "OutOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, Ip6InOctets); + rrddim_set_by_pointer(st, rd_sent, Ip6OutOctets); + rrdset_done(st); + } + + if(do_ip6_packets == CONFIG_BOOLEAN_YES || (do_ip6_packets == CONFIG_BOOLEAN_AUTO && + (Ip6InReceives || + Ip6OutRequests || + Ip6InDelivers || + Ip6OutForwDatagrams || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_packets = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, + *rd_sent = NULL, + *rd_forwarded = NULL, + *rd_delivers = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "packets" + , NULL + , "packets" + , NULL + , "IPv6 Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st, "InReceives", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "OutRequests", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_forwarded = rrddim_add(st, "OutForwDatagrams", "forwarded", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_delivers = rrddim_add(st, "InDelivers", "delivers", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, Ip6InReceives); + rrddim_set_by_pointer(st, rd_sent, Ip6OutRequests); + rrddim_set_by_pointer(st, rd_forwarded, Ip6OutForwDatagrams); + rrddim_set_by_pointer(st, rd_delivers, Ip6InDelivers); + rrdset_done(st); + } + + if(do_ip6_fragsout == CONFIG_BOOLEAN_YES || (do_ip6_fragsout == CONFIG_BOOLEAN_AUTO && + (Ip6FragOKs || + Ip6FragFails || + Ip6FragCreates || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_fragsout = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_ok = NULL, + *rd_failed = NULL, + *rd_all = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "fragsout" + , NULL + , "fragments6" + , NULL + , "IPv6 Fragments Sent" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_FRAGSOUT + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ok = rrddim_add(st, "FragOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st, "FragFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_all = rrddim_add(st, "FragCreates", "all", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ok, Ip6FragOKs); + rrddim_set_by_pointer(st, rd_failed, Ip6FragFails); + rrddim_set_by_pointer(st, rd_all, Ip6FragCreates); + rrdset_done(st); + } + + if(do_ip6_fragsin == CONFIG_BOOLEAN_YES || (do_ip6_fragsin == CONFIG_BOOLEAN_AUTO && + (Ip6ReasmOKs || + Ip6ReasmFails || + Ip6ReasmTimeout || + Ip6ReasmReqds || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_fragsin = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_ok = NULL, + *rd_failed = NULL, + *rd_timeout = NULL, + *rd_all = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "fragsin" + , NULL + , "fragments6" + , NULL + , "IPv6 Fragments Reassembly" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_FRAGSIN + , update_every + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ok = rrddim_add(st, "ReasmOKs", "ok", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st, "ReasmFails", "failed", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_timeout = rrddim_add(st, "ReasmTimeout", "timeout", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_all = rrddim_add(st, "ReasmReqds", "all", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_ok, Ip6ReasmOKs); + rrddim_set_by_pointer(st, rd_failed, Ip6ReasmFails); + rrddim_set_by_pointer(st, rd_timeout, Ip6ReasmTimeout); + rrddim_set_by_pointer(st, rd_all, Ip6ReasmReqds); + rrdset_done(st); + } + + if(do_ip6_errors == CONFIG_BOOLEAN_YES || (do_ip6_errors == CONFIG_BOOLEAN_AUTO && + (Ip6InDiscards || + Ip6OutDiscards || + Ip6InHdrErrors || + Ip6InAddrErrors || + Ip6InUnknownProtos || + Ip6InTooBigErrors || + Ip6InTruncatedPkts || + Ip6InNoRoutes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_errors = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InDiscards = NULL, + *rd_OutDiscards = NULL, + *rd_InHdrErrors = NULL, + *rd_InAddrErrors = NULL, + *rd_InUnknownProtos = NULL, + *rd_InTooBigErrors = NULL, + *rd_InTruncatedPkts = NULL, + *rd_InNoRoutes = NULL, + *rd_OutNoRoutes = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "errors" + , NULL + , "errors" + , NULL + , "IPv6 Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_InDiscards = rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutDiscards = rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InHdrErrors = rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InAddrErrors = rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InUnknownProtos = rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InTooBigErrors = rrddim_add(st, "InTooBigErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InTruncatedPkts = rrddim_add(st, "InTruncatedPkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InNoRoutes = rrddim_add(st, "InNoRoutes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutNoRoutes = rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InDiscards, Ip6InDiscards); + rrddim_set_by_pointer(st, rd_OutDiscards, Ip6OutDiscards); + rrddim_set_by_pointer(st, rd_InHdrErrors, Ip6InHdrErrors); + rrddim_set_by_pointer(st, rd_InAddrErrors, Ip6InAddrErrors); + rrddim_set_by_pointer(st, rd_InUnknownProtos, Ip6InUnknownProtos); + rrddim_set_by_pointer(st, rd_InTooBigErrors, Ip6InTooBigErrors); + rrddim_set_by_pointer(st, rd_InTruncatedPkts, Ip6InTruncatedPkts); + rrddim_set_by_pointer(st, rd_InNoRoutes, Ip6InNoRoutes); + rrddim_set_by_pointer(st, rd_OutNoRoutes, Ip6OutNoRoutes); + rrdset_done(st); + } + + if(do_ip6_udp_packets == CONFIG_BOOLEAN_YES || (do_ip6_udp_packets == CONFIG_BOOLEAN_AUTO && + (Udp6InDatagrams || + Udp6OutDatagrams || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udp_packets = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, + *rd_sent = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "udppackets" + , NULL + , "udp6" + , NULL + , "IPv6 UDP Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_UDP_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st, "InDatagrams", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, Udp6InDatagrams); + rrddim_set_by_pointer(st, rd_sent, Udp6OutDatagrams); + rrdset_done(st); + } + + if(do_ip6_udp_errors == CONFIG_BOOLEAN_YES || (do_ip6_udp_errors == CONFIG_BOOLEAN_AUTO && + (Udp6InErrors || + Udp6NoPorts || + Udp6RcvbufErrors || + Udp6SndbufErrors || + Udp6InCsumErrors || + Udp6IgnoredMulti || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_udp_errors = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_RcvbufErrors = NULL, + *rd_SndbufErrors = NULL, + *rd_InErrors = NULL, + *rd_NoPorts = NULL, + *rd_InCsumErrors = NULL, + *rd_IgnoredMulti = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "udperrors" + , NULL + , "udp6" + , NULL + , "IPv6 UDP Errors" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_UDP_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_RcvbufErrors = rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_SndbufErrors = rrddim_add(st, "SndbufErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InErrors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_NoPorts = rrddim_add(st, "NoPorts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_IgnoredMulti = rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_RcvbufErrors, Udp6RcvbufErrors); + rrddim_set_by_pointer(st, rd_SndbufErrors, Udp6SndbufErrors); + rrddim_set_by_pointer(st, rd_InErrors, Udp6InErrors); + rrddim_set_by_pointer(st, rd_NoPorts, Udp6NoPorts); + rrddim_set_by_pointer(st, rd_InCsumErrors, Udp6InCsumErrors); + rrddim_set_by_pointer(st, rd_IgnoredMulti, Udp6IgnoredMulti); + rrdset_done(st); + } + + if(do_ip6_udplite_packets == CONFIG_BOOLEAN_YES || (do_ip6_udplite_packets == CONFIG_BOOLEAN_AUTO && + (UdpLite6InDatagrams || + UdpLite6OutDatagrams || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udplite_packets = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, + *rd_sent = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "udplitepackets" + , NULL + , "udplite6" + , NULL + , "IPv6 UDPlite Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_UDPLITE_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_received = rrddim_add(st, "InDatagrams", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, UdpLite6InDatagrams); + rrddim_set_by_pointer(st, rd_sent, UdpLite6OutDatagrams); + rrdset_done(st); + } + + if(do_ip6_udplite_errors == CONFIG_BOOLEAN_YES || (do_ip6_udplite_errors == CONFIG_BOOLEAN_AUTO && + (UdpLite6InErrors || + UdpLite6NoPorts || + UdpLite6RcvbufErrors || + UdpLite6SndbufErrors || + Udp6InCsumErrors || + UdpLite6InCsumErrors || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_udplite_errors = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_RcvbufErrors = NULL, + *rd_SndbufErrors = NULL, + *rd_InErrors = NULL, + *rd_NoPorts = NULL, + *rd_InCsumErrors = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "udpliteerrors" + , NULL + , "udplite6" + , NULL + , "IPv6 UDP Lite Errors" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_UDPLITE_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_RcvbufErrors = rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_SndbufErrors = rrddim_add(st, "SndbufErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InErrors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_NoPorts = rrddim_add(st, "NoPorts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InErrors, UdpLite6InErrors); + rrddim_set_by_pointer(st, rd_NoPorts, UdpLite6NoPorts); + rrddim_set_by_pointer(st, rd_RcvbufErrors, UdpLite6RcvbufErrors); + rrddim_set_by_pointer(st, rd_SndbufErrors, UdpLite6SndbufErrors); + rrddim_set_by_pointer(st, rd_InCsumErrors, UdpLite6InCsumErrors); + rrdset_done(st); + } + + if(do_ip6_mcast == CONFIG_BOOLEAN_YES || (do_ip6_mcast == CONFIG_BOOLEAN_AUTO && + (Ip6OutMcastOctets || + Ip6InMcastOctets || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_mcast = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_Ip6InMcastOctets = NULL, + *rd_Ip6OutMcastOctets = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "mcast" + , NULL + , "multicast6" + , NULL + , "IPv6 Multicast Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_MCAST + , update_every + , RRDSET_TYPE_AREA + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_Ip6InMcastOctets = rrddim_add(st, "InMcastOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_Ip6OutMcastOctets = rrddim_add(st, "OutMcastOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_Ip6InMcastOctets, Ip6InMcastOctets); + rrddim_set_by_pointer(st, rd_Ip6OutMcastOctets, Ip6OutMcastOctets); + rrdset_done(st); + } + + if(do_ip6_bcast == CONFIG_BOOLEAN_YES || (do_ip6_bcast == CONFIG_BOOLEAN_AUTO && + (Ip6OutBcastOctets || + Ip6InBcastOctets || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_bcast = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_Ip6InBcastOctets = NULL, + *rd_Ip6OutBcastOctets = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "bcast" + , NULL + , "broadcast6" + , NULL + , "IPv6 Broadcast Bandwidth" + , "kilobits/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_BCAST + , update_every + , RRDSET_TYPE_AREA + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_Ip6InBcastOctets = rrddim_add(st, "InBcastOctets", "received", 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_Ip6OutBcastOctets = rrddim_add(st, "OutBcastOctets", "sent", -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_Ip6InBcastOctets, Ip6InBcastOctets); + rrddim_set_by_pointer(st, rd_Ip6OutBcastOctets, Ip6OutBcastOctets); + rrdset_done(st); + } + + if(do_ip6_mcast_p == CONFIG_BOOLEAN_YES || (do_ip6_mcast_p == CONFIG_BOOLEAN_AUTO && + (Ip6OutMcastPkts || + Ip6InMcastPkts || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_mcast_p = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_Ip6InMcastPkts = NULL, + *rd_Ip6OutMcastPkts = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "mcastpkts" + , NULL + , "multicast6" + , NULL + , "IPv6 Multicast Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_MCAST_PACKETS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_Ip6InMcastPkts = rrddim_add(st, "InMcastPkts", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_Ip6OutMcastPkts = rrddim_add(st, "OutMcastPkts", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_Ip6InMcastPkts, Ip6InMcastPkts); + rrddim_set_by_pointer(st, rd_Ip6OutMcastPkts, Ip6OutMcastPkts); + rrdset_done(st); + } + + if(do_ip6_icmp == CONFIG_BOOLEAN_YES || (do_ip6_icmp == CONFIG_BOOLEAN_AUTO && + (Icmp6InMsgs || + Icmp6OutMsgs || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_Icmp6InMsgs = NULL, + *rd_Icmp6OutMsgs = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmp" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP Messages" + , "messages/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_Icmp6InMsgs = rrddim_add(st, "InMsgs", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_Icmp6OutMsgs = rrddim_add(st, "OutMsgs", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_Icmp6InMsgs, Icmp6InMsgs); + rrddim_set_by_pointer(st, rd_Icmp6OutMsgs, Icmp6OutMsgs); + rrdset_done(st); + } + + if(do_ip6_icmp_redir == CONFIG_BOOLEAN_YES || (do_ip6_icmp_redir == CONFIG_BOOLEAN_AUTO && + (Icmp6InRedirects || + Icmp6OutRedirects || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_redir = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_Icmp6InRedirects = NULL, + *rd_Icmp6OutRedirects = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmpredir" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP Redirects" + , "redirects/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_REDIR + , update_every + , RRDSET_TYPE_LINE + ); + + rd_Icmp6InRedirects = rrddim_add(st, "InRedirects", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_Icmp6OutRedirects = rrddim_add(st, "OutRedirects", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_Icmp6InRedirects, Icmp6InRedirects); + rrddim_set_by_pointer(st, rd_Icmp6OutRedirects, Icmp6OutRedirects); + rrdset_done(st); + } + + if(do_ip6_icmp_errors == CONFIG_BOOLEAN_YES || (do_ip6_icmp_errors == CONFIG_BOOLEAN_AUTO && + (Icmp6InErrors || + Icmp6OutErrors || + Icmp6InCsumErrors || + Icmp6InDestUnreachs || + Icmp6InPktTooBigs || + Icmp6InTimeExcds || + Icmp6InParmProblems || + Icmp6OutDestUnreachs || + Icmp6OutPktTooBigs || + Icmp6OutTimeExcds || + Icmp6OutParmProblems || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_errors = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InErrors = NULL, + *rd_OutErrors = NULL, + *rd_InCsumErrors = NULL, + *rd_InDestUnreachs = NULL, + *rd_InPktTooBigs = NULL, + *rd_InTimeExcds = NULL, + *rd_InParmProblems = NULL, + *rd_OutDestUnreachs = NULL, + *rd_OutPktTooBigs = NULL, + *rd_OutTimeExcds = NULL, + *rd_OutParmProblems = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmperrors" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP Errors" + , "errors/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InErrors = rrddim_add(st, "InErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutErrors = rrddim_add(st, "OutErrors", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InDestUnreachs = rrddim_add(st, "InDestUnreachs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InPktTooBigs = rrddim_add(st, "InPktTooBigs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InTimeExcds = rrddim_add(st, "InTimeExcds", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InParmProblems = rrddim_add(st, "InParmProblems", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutDestUnreachs = rrddim_add(st, "OutDestUnreachs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutPktTooBigs = rrddim_add(st, "OutPktTooBigs", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutTimeExcds = rrddim_add(st, "OutTimeExcds", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutParmProblems = rrddim_add(st, "OutParmProblems", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InErrors, Icmp6InErrors); + rrddim_set_by_pointer(st, rd_OutErrors, Icmp6OutErrors); + rrddim_set_by_pointer(st, rd_InCsumErrors, Icmp6InCsumErrors); + rrddim_set_by_pointer(st, rd_InDestUnreachs, Icmp6InDestUnreachs); + rrddim_set_by_pointer(st, rd_InPktTooBigs, Icmp6InPktTooBigs); + rrddim_set_by_pointer(st, rd_InTimeExcds, Icmp6InTimeExcds); + rrddim_set_by_pointer(st, rd_InParmProblems, Icmp6InParmProblems); + rrddim_set_by_pointer(st, rd_OutDestUnreachs, Icmp6OutDestUnreachs); + rrddim_set_by_pointer(st, rd_OutPktTooBigs, Icmp6OutPktTooBigs); + rrddim_set_by_pointer(st, rd_OutTimeExcds, Icmp6OutTimeExcds); + rrddim_set_by_pointer(st, rd_OutParmProblems, Icmp6OutParmProblems); + rrdset_done(st); + } + + if(do_ip6_icmp_echos == CONFIG_BOOLEAN_YES || (do_ip6_icmp_echos == CONFIG_BOOLEAN_AUTO && + (Icmp6InEchos || + Icmp6OutEchos || + Icmp6InEchoReplies || + Icmp6OutEchoReplies || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_echos = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InEchos = NULL, + *rd_OutEchos = NULL, + *rd_InEchoReplies = NULL, + *rd_OutEchoReplies = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmpechos" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP Echo" + , "messages/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_ECHOS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InEchos = rrddim_add(st, "InEchos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutEchos = rrddim_add(st, "OutEchos", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InEchoReplies = rrddim_add(st, "InEchoReplies", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutEchoReplies = rrddim_add(st, "OutEchoReplies", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InEchos, Icmp6InEchos); + rrddim_set_by_pointer(st, rd_OutEchos, Icmp6OutEchos); + rrddim_set_by_pointer(st, rd_InEchoReplies, Icmp6InEchoReplies); + rrddim_set_by_pointer(st, rd_OutEchoReplies, Icmp6OutEchoReplies); + rrdset_done(st); + } + + if(do_ip6_icmp_groupmemb == CONFIG_BOOLEAN_YES || (do_ip6_icmp_groupmemb == CONFIG_BOOLEAN_AUTO && + (Icmp6InGroupMembQueries || + Icmp6OutGroupMembQueries || + Icmp6InGroupMembResponses || + Icmp6OutGroupMembResponses || + Icmp6InGroupMembReductions || + Icmp6OutGroupMembReductions || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_groupmemb = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InQueries = NULL, + *rd_OutQueries = NULL, + *rd_InResponses = NULL, + *rd_OutResponses = NULL, + *rd_InReductions = NULL, + *rd_OutReductions = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "groupmemb" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP Group Membership" + , "messages/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_GROUPMEMB + , update_every + , RRDSET_TYPE_LINE); + + rd_InQueries = rrddim_add(st, "InQueries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutQueries = rrddim_add(st, "OutQueries", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InResponses = rrddim_add(st, "InResponses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutResponses = rrddim_add(st, "OutResponses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InReductions = rrddim_add(st, "InReductions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutReductions = rrddim_add(st, "OutReductions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InQueries, Icmp6InGroupMembQueries); + rrddim_set_by_pointer(st, rd_OutQueries, Icmp6OutGroupMembQueries); + rrddim_set_by_pointer(st, rd_InResponses, Icmp6InGroupMembResponses); + rrddim_set_by_pointer(st, rd_OutResponses, Icmp6OutGroupMembResponses); + rrddim_set_by_pointer(st, rd_InReductions, Icmp6InGroupMembReductions); + rrddim_set_by_pointer(st, rd_OutReductions, Icmp6OutGroupMembReductions); + rrdset_done(st); + } + + if(do_ip6_icmp_router == CONFIG_BOOLEAN_YES || (do_ip6_icmp_router == CONFIG_BOOLEAN_AUTO && + (Icmp6InRouterSolicits || + Icmp6OutRouterSolicits || + Icmp6InRouterAdvertisements || + Icmp6OutRouterAdvertisements || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_router = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InSolicits = NULL, + *rd_OutSolicits = NULL, + *rd_InAdvertisements = NULL, + *rd_OutAdvertisements = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmprouter" + , NULL + , "icmp6" + , NULL + , "IPv6 Router Messages" + , "messages/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_ROUTER + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InSolicits = rrddim_add(st, "InSolicits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutSolicits = rrddim_add(st, "OutSolicits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InAdvertisements = rrddim_add(st, "InAdvertisements", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutAdvertisements = rrddim_add(st, "OutAdvertisements", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InSolicits, Icmp6InRouterSolicits); + rrddim_set_by_pointer(st, rd_OutSolicits, Icmp6OutRouterSolicits); + rrddim_set_by_pointer(st, rd_InAdvertisements, Icmp6InRouterAdvertisements); + rrddim_set_by_pointer(st, rd_OutAdvertisements, Icmp6OutRouterAdvertisements); + rrdset_done(st); + } + + if(do_ip6_icmp_neighbor == CONFIG_BOOLEAN_YES || (do_ip6_icmp_neighbor == CONFIG_BOOLEAN_AUTO && + (Icmp6InNeighborSolicits || + Icmp6OutNeighborSolicits || + Icmp6InNeighborAdvertisements || + Icmp6OutNeighborAdvertisements || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_neighbor = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InSolicits = NULL, + *rd_OutSolicits = NULL, + *rd_InAdvertisements = NULL, + *rd_OutAdvertisements = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmpneighbor" + , NULL + , "icmp6" + , NULL + , "IPv6 Neighbor Messages" + , "messages/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_NEIGHBOR + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InSolicits = rrddim_add(st, "InSolicits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutSolicits = rrddim_add(st, "OutSolicits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InAdvertisements = rrddim_add(st, "InAdvertisements", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutAdvertisements = rrddim_add(st, "OutAdvertisements", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InSolicits, Icmp6InNeighborSolicits); + rrddim_set_by_pointer(st, rd_OutSolicits, Icmp6OutNeighborSolicits); + rrddim_set_by_pointer(st, rd_InAdvertisements, Icmp6InNeighborAdvertisements); + rrddim_set_by_pointer(st, rd_OutAdvertisements, Icmp6OutNeighborAdvertisements); + rrdset_done(st); + } + + if(do_ip6_icmp_mldv2 == CONFIG_BOOLEAN_YES || (do_ip6_icmp_mldv2 == CONFIG_BOOLEAN_AUTO && + (Icmp6InMLDv2Reports || + Icmp6OutMLDv2Reports || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_mldv2 = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InMLDv2Reports = NULL, + *rd_OutMLDv2Reports = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmpmldv2" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP MLDv2 Reports" + , "reports/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_LDV2 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InMLDv2Reports = rrddim_add(st, "InMLDv2Reports", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutMLDv2Reports = rrddim_add(st, "OutMLDv2Reports", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InMLDv2Reports, Icmp6InMLDv2Reports); + rrddim_set_by_pointer(st, rd_OutMLDv2Reports, Icmp6OutMLDv2Reports); + rrdset_done(st); + } + + if(do_ip6_icmp_types == CONFIG_BOOLEAN_YES || (do_ip6_icmp_types == CONFIG_BOOLEAN_AUTO && + (Icmp6InType1 || + Icmp6InType128 || + Icmp6InType129 || + Icmp6InType136 || + Icmp6OutType1 || + Icmp6OutType128 || + Icmp6OutType129 || + Icmp6OutType133 || + Icmp6OutType135 || + Icmp6OutType143 || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_icmp_types = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InType1 = NULL, + *rd_InType128 = NULL, + *rd_InType129 = NULL, + *rd_InType136 = NULL, + *rd_OutType1 = NULL, + *rd_OutType128 = NULL, + *rd_OutType129 = NULL, + *rd_OutType133 = NULL, + *rd_OutType135 = NULL, + *rd_OutType143 = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "icmptypes" + , NULL + , "icmp6" + , NULL + , "IPv6 ICMP Types" + , "messages/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ICMP_TYPES + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InType1 = rrddim_add(st, "InType1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InType128 = rrddim_add(st, "InType128", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InType129 = rrddim_add(st, "InType129", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InType136 = rrddim_add(st, "InType136", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutType1 = rrddim_add(st, "OutType1", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutType128 = rrddim_add(st, "OutType128", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutType129 = rrddim_add(st, "OutType129", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutType133 = rrddim_add(st, "OutType133", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutType135 = rrddim_add(st, "OutType135", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutType143 = rrddim_add(st, "OutType143", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InType1, Icmp6InType1); + rrddim_set_by_pointer(st, rd_InType128, Icmp6InType128); + rrddim_set_by_pointer(st, rd_InType129, Icmp6InType129); + rrddim_set_by_pointer(st, rd_InType136, Icmp6InType136); + rrddim_set_by_pointer(st, rd_OutType1, Icmp6OutType1); + rrddim_set_by_pointer(st, rd_OutType128, Icmp6OutType128); + rrddim_set_by_pointer(st, rd_OutType129, Icmp6OutType129); + rrddim_set_by_pointer(st, rd_OutType133, Icmp6OutType133); + rrddim_set_by_pointer(st, rd_OutType135, Icmp6OutType135); + rrddim_set_by_pointer(st, rd_OutType143, Icmp6OutType143); + rrdset_done(st); + } + + if(do_ip6_ect == CONFIG_BOOLEAN_YES || (do_ip6_ect == CONFIG_BOOLEAN_AUTO && + (Ip6InNoECTPkts || + Ip6InECT1Pkts || + Ip6InECT0Pkts || + Ip6InCEPkts || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ip6_ect = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_InNoECTPkts = NULL, + *rd_InECT1Pkts = NULL, + *rd_InECT0Pkts = NULL, + *rd_InCEPkts = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP6 + , "ect" + , NULL + , "packets" + , NULL + , "IPv6 ECT Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NETSTAT_NAME + , NETDATA_CHART_PRIO_IPV6_ECT + , update_every + , RRDSET_TYPE_LINE + ); + + rd_InNoECTPkts = rrddim_add(st, "InNoECTPkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InECT1Pkts = rrddim_add(st, "InECT1Pkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InECT0Pkts = rrddim_add(st, "InECT0Pkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InCEPkts = rrddim_add(st, "InCEPkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InNoECTPkts, Ip6InNoECTPkts); + rrddim_set_by_pointer(st, rd_InECT1Pkts, Ip6InECT1Pkts); + rrddim_set_by_pointer(st, rd_InECT0Pkts, Ip6InECT0Pkts); + rrddim_set_by_pointer(st, rd_InCEPkts, Ip6InCEPkts); + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_rpc_nfs.c b/collectors/proc.plugin/proc_net_rpc_nfs.c new file mode 100644 index 0000000..b1ff4e0 --- /dev/null +++ b/collectors/proc.plugin/proc_net_rpc_nfs.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NFS_NAME "/proc/net/rpc/nfs" +#define CONFIG_SECTION_PLUGIN_PROC_NFS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NFS_NAME + +struct nfs_procs { + char name[30]; + unsigned long long value; + int present; + RRDDIM *rd; +}; + +struct nfs_procs nfs_proc2_values[] = { + { "null" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"root" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"readlink", 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"wrcache" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"symlink" , 0ULL, 0, NULL} + , {"mkdir" , 0ULL, 0, NULL} + , {"rmdir" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"fsstat" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + +struct nfs_procs nfs_proc3_values[] = { + { "null" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"access" , 0ULL, 0, NULL} + , {"readlink" , 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"mkdir" , 0ULL, 0, NULL} + , {"symlink" , 0ULL, 0, NULL} + , {"mknod" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rmdir" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"readdirplus", 0ULL, 0, NULL} + , {"fsstat" , 0ULL, 0, NULL} + , {"fsinfo" , 0ULL, 0, NULL} + , {"pathconf" , 0ULL, 0, NULL} + , {"commit" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + +struct nfs_procs nfs_proc4_values[] = { + { "null" , 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"commit" , 0ULL, 0, NULL} + , {"open" , 0ULL, 0, NULL} + , {"open_conf" , 0ULL, 0, NULL} + , {"open_noat" , 0ULL, 0, NULL} + , {"open_dgrd" , 0ULL, 0, NULL} + , {"close" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"fsinfo" , 0ULL, 0, NULL} + , {"renew" , 0ULL, 0, NULL} + , {"setclntid" , 0ULL, 0, NULL} + , {"confirm" , 0ULL, 0, NULL} + , {"lock" , 0ULL, 0, NULL} + , {"lockt" , 0ULL, 0, NULL} + , {"locku" , 0ULL, 0, NULL} + , {"access" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"lookup_root" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"symlink" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"pathconf" , 0ULL, 0, NULL} + , {"statfs" , 0ULL, 0, NULL} + , {"readlink" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"server_caps" , 0ULL, 0, NULL} + , {"delegreturn" , 0ULL, 0, NULL} + , {"getacl" , 0ULL, 0, NULL} + , {"setacl" , 0ULL, 0, NULL} + , {"fs_locations" , 0ULL, 0, NULL} + , {"rel_lkowner" , 0ULL, 0, NULL} + , {"secinfo" , 0ULL, 0, NULL} + , {"fsid_present" , 0ULL, 0, NULL} + , + + /* nfsv4.1 client ops */ + { "exchange_id" , 0ULL, 0, NULL} + , {"create_session" , 0ULL, 0, NULL} + , {"destroy_session" , 0ULL, 0, NULL} + , {"sequence" , 0ULL, 0, NULL} + , {"get_lease_time" , 0ULL, 0, NULL} + , {"reclaim_comp" , 0ULL, 0, NULL} + , {"layoutget" , 0ULL, 0, NULL} + , {"getdevinfo" , 0ULL, 0, NULL} + , {"layoutcommit" , 0ULL, 0, NULL} + , {"layoutreturn" , 0ULL, 0, NULL} + , {"secinfo_no" , 0ULL, 0, NULL} + , {"test_stateid" , 0ULL, 0, NULL} + , {"free_stateid" , 0ULL, 0, NULL} + , {"getdevicelist" , 0ULL, 0, NULL} + , {"bind_conn_to_ses", 0ULL, 0, NULL} + , {"destroy_clientid", 0ULL, 0, NULL} + , + + /* nfsv4.2 client ops */ + { "seek" , 0ULL, 0, NULL} + , {"allocate" , 0ULL, 0, NULL} + , {"deallocate" , 0ULL, 0, NULL} + , {"layoutstats" , 0ULL, 0, NULL} + , {"clone" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + +int do_proc_net_rpc_nfs(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_net = -1, do_rpc = -1, do_proc2 = -1, do_proc3 = -1, do_proc4 = -1; + static int proc2_warning = 0, proc3_warning = 0, proc4_warning = 0; + + if(!ff) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/rpc/nfs"); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_NFS, "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + } + if(!ff) return 1; + + ff = procfile_readall(ff); + if(!ff) return 0; // we return 0, so that we will retry to open it next time + + if(do_net == -1) do_net = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NFS, "network", 1); + if(do_rpc == -1) do_rpc = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NFS, "rpc", 1); + if(do_proc2 == -1) do_proc2 = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NFS, "NFS v2 procedures", 1); + if(do_proc3 == -1) do_proc3 = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NFS, "NFS v3 procedures", 1); + if(do_proc4 == -1) do_proc4 = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_NFS, "NFS v4 procedures", 1); + + // if they are enabled, reset them to 1 + // later we do them =2 to avoid doing strcmp() for all lines + if(do_net) do_net = 1; + if(do_rpc) do_rpc = 1; + if(do_proc2) do_proc2 = 1; + if(do_proc3) do_proc3 = 1; + if(do_proc4) do_proc4 = 1; + + size_t lines = procfile_lines(ff), l; + + char *type; + unsigned long long net_count = 0, net_udp_count = 0, net_tcp_count = 0, net_tcp_connections = 0; + unsigned long long rpc_calls = 0, rpc_retransmits = 0, rpc_auth_refresh = 0; + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(!words) continue; + + type = procfile_lineword(ff, l, 0); + + if(do_net == 1 && strcmp(type, "net") == 0) { + if(words < 5) { + error("%s line of /proc/net/rpc/nfs has %zu words, expected %d", type, words, 5); + continue; + } + + net_count = str2ull(procfile_lineword(ff, l, 1)); + net_udp_count = str2ull(procfile_lineword(ff, l, 2)); + net_tcp_count = str2ull(procfile_lineword(ff, l, 3)); + net_tcp_connections = str2ull(procfile_lineword(ff, l, 4)); + + unsigned long long sum = net_count + net_udp_count + net_tcp_count + net_tcp_connections; + if(sum == 0ULL) do_net = -1; + else do_net = 2; + } + else if(do_rpc == 1 && strcmp(type, "rpc") == 0) { + if(words < 4) { + error("%s line of /proc/net/rpc/nfs has %zu words, expected %d", type, words, 6); + continue; + } + + rpc_calls = str2ull(procfile_lineword(ff, l, 1)); + rpc_retransmits = str2ull(procfile_lineword(ff, l, 2)); + rpc_auth_refresh = str2ull(procfile_lineword(ff, l, 3)); + + unsigned long long sum = rpc_calls + rpc_retransmits + rpc_auth_refresh; + if(sum == 0ULL) do_rpc = -1; + else do_rpc = 2; + } + else if(do_proc2 == 1 && strcmp(type, "proc2") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfs_proc2_values[i].name[0] ; i++, j++) { + nfs_proc2_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfs_proc2_values[i].present = 1; + sum += nfs_proc2_values[i].value; + } + + if(sum == 0ULL) { + if(!proc2_warning) { + error("Disabling /proc/net/rpc/nfs v2 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc2_warning = 1; + } + do_proc2 = 0; + } + else do_proc2 = 2; + } + else if(do_proc3 == 1 && strcmp(type, "proc3") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfs_proc3_values[i].name[0] ; i++, j++) { + nfs_proc3_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfs_proc3_values[i].present = 1; + sum += nfs_proc3_values[i].value; + } + + if(sum == 0ULL) { + if(!proc3_warning) { + info("Disabling /proc/net/rpc/nfs v3 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc3_warning = 1; + } + do_proc3 = 0; + } + else do_proc3 = 2; + } + else if(do_proc4 == 1 && strcmp(type, "proc4") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfs_proc4_values[i].name[0] ; i++, j++) { + nfs_proc4_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfs_proc4_values[i].present = 1; + sum += nfs_proc4_values[i].value; + } + + if(sum == 0ULL) { + if(!proc4_warning) { + info("Disabling /proc/net/rpc/nfs v4 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc4_warning = 1; + } + do_proc4 = 0; + } + else do_proc4 = 2; + } + } + + if(do_net == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_udp = NULL, + *rd_tcp = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfs" + , "net" + , NULL + , "network" + , NULL + , "NFS Client Network" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFS_NAME + , NETDATA_CHART_PRIO_NFS_NET + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_udp = rrddim_add(st, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_tcp = rrddim_add(st, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + // ignore net_count, net_tcp_connections + (void)net_count; + (void)net_tcp_connections; + + rrddim_set_by_pointer(st, rd_udp, net_udp_count); + rrddim_set_by_pointer(st, rd_tcp, net_tcp_count); + rrdset_done(st); + } + + if(do_rpc == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_calls = NULL, + *rd_retransmits = NULL, + *rd_auth_refresh = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfs" + , "rpc" + , NULL + , "rpc" + , NULL + , "NFS Client Remote Procedure Calls Statistics" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFS_NAME + , NETDATA_CHART_PRIO_NFS_RPC + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_calls = rrddim_add(st, "calls", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_retransmits = rrddim_add(st, "retransmits", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_auth_refresh = rrddim_add(st, "auth_refresh", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_calls, rpc_calls); + rrddim_set_by_pointer(st, rd_retransmits, rpc_retransmits); + rrddim_set_by_pointer(st, rd_auth_refresh, rpc_auth_refresh); + rrdset_done(st); + } + + if(do_proc2 == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfs" + , "proc2" + , NULL + , "nfsv2rpc" + , NULL + , "NFS v2 Client Remote Procedure Calls" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFS_NAME + , NETDATA_CHART_PRIO_NFS_PROC2 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfs_proc2_values[i].present ; i++) { + if(unlikely(!nfs_proc2_values[i].rd)) + nfs_proc2_values[i].rd = rrddim_add(st, nfs_proc2_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfs_proc2_values[i].rd, nfs_proc2_values[i].value); + } + + rrdset_done(st); + } + + if(do_proc3 == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfs" + , "proc3" + , NULL + , "nfsv3rpc" + , NULL + , "NFS v3 Client Remote Procedure Calls" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFS_NAME + , NETDATA_CHART_PRIO_NFS_PROC3 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfs_proc3_values[i].present ; i++) { + if(unlikely(!nfs_proc3_values[i].rd)) + nfs_proc3_values[i].rd = rrddim_add(st, nfs_proc3_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfs_proc3_values[i].rd, nfs_proc3_values[i].value); + } + + rrdset_done(st); + } + + if(do_proc4 == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfs" + , "proc4" + , NULL + , "nfsv4rpc" + , NULL + , "NFS v4 Client Remote Procedure Calls" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFS_NAME + , NETDATA_CHART_PRIO_NFS_PROC4 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfs_proc4_values[i].present ; i++) { + if(unlikely(!nfs_proc4_values[i].rd)) + nfs_proc4_values[i].rd = rrddim_add(st, nfs_proc4_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfs_proc4_values[i].rd, nfs_proc4_values[i].value); + } + + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_rpc_nfsd.c b/collectors/proc.plugin/proc_net_rpc_nfsd.c new file mode 100644 index 0000000..bd1da88 --- /dev/null +++ b/collectors/proc.plugin/proc_net_rpc_nfsd.c @@ -0,0 +1,763 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NFSD_NAME "/proc/net/rpc/nfsd" + +struct nfsd_procs { + char name[30]; + unsigned long long value; + int present; + RRDDIM *rd; +}; + +struct nfsd_procs nfsd_proc2_values[] = { + { "null" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"root" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"readlink", 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"wrcache" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"symlink" , 0ULL, 0, NULL} + , {"mkdir" , 0ULL, 0, NULL} + , {"rmdir" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"fsstat" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + +struct nfsd_procs nfsd_proc3_values[] = { + { "null" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"access" , 0ULL, 0, NULL} + , {"readlink" , 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"mkdir" , 0ULL, 0, NULL} + , {"symlink" , 0ULL, 0, NULL} + , {"mknod" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rmdir" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"readdirplus", 0ULL, 0, NULL} + , {"fsstat" , 0ULL, 0, NULL} + , {"fsinfo" , 0ULL, 0, NULL} + , {"pathconf" , 0ULL, 0, NULL} + , {"commit" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + +struct nfsd_procs nfsd_proc4_values[] = { + { "null" , 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"commit" , 0ULL, 0, NULL} + , {"open" , 0ULL, 0, NULL} + , {"open_conf" , 0ULL, 0, NULL} + , {"open_noat" , 0ULL, 0, NULL} + , {"open_dgrd" , 0ULL, 0, NULL} + , {"close" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"fsinfo" , 0ULL, 0, NULL} + , {"renew" , 0ULL, 0, NULL} + , {"setclntid" , 0ULL, 0, NULL} + , {"confirm" , 0ULL, 0, NULL} + , {"lock" , 0ULL, 0, NULL} + , {"lockt" , 0ULL, 0, NULL} + , {"locku" , 0ULL, 0, NULL} + , {"access" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"lookup_root" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"symlink" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"pathconf" , 0ULL, 0, NULL} + , {"statfs" , 0ULL, 0, NULL} + , {"readlink" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"server_caps" , 0ULL, 0, NULL} + , {"delegreturn" , 0ULL, 0, NULL} + , {"getacl" , 0ULL, 0, NULL} + , {"setacl" , 0ULL, 0, NULL} + , {"fs_locations" , 0ULL, 0, NULL} + , {"rel_lkowner" , 0ULL, 0, NULL} + , {"secinfo" , 0ULL, 0, NULL} + , {"fsid_present" , 0ULL, 0, NULL} + , + + /* nfsv4.1 client ops */ + { "exchange_id" , 0ULL, 0, NULL} + , {"create_session" , 0ULL, 0, NULL} + , {"destroy_session" , 0ULL, 0, NULL} + , {"sequence" , 0ULL, 0, NULL} + , {"get_lease_time" , 0ULL, 0, NULL} + , {"reclaim_comp" , 0ULL, 0, NULL} + , {"layoutget" , 0ULL, 0, NULL} + , {"getdevinfo" , 0ULL, 0, NULL} + , {"layoutcommit" , 0ULL, 0, NULL} + , {"layoutreturn" , 0ULL, 0, NULL} + , {"secinfo_no" , 0ULL, 0, NULL} + , {"test_stateid" , 0ULL, 0, NULL} + , {"free_stateid" , 0ULL, 0, NULL} + , {"getdevicelist" , 0ULL, 0, NULL} + , {"bind_conn_to_ses", 0ULL, 0, NULL} + , {"destroy_clientid", 0ULL, 0, NULL} + , + + /* nfsv4.2 client ops */ + { "seek" , 0ULL, 0, NULL} + , {"allocate" , 0ULL, 0, NULL} + , {"deallocate" , 0ULL, 0, NULL} + , {"layoutstats" , 0ULL, 0, NULL} + , {"clone" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + +struct nfsd_procs nfsd4_ops_values[] = { + { "unused_op0" , 0ULL, 0, NULL} + , {"unused_op1" , 0ULL, 0, NULL} + , {"future_op2" , 0ULL, 0, NULL} + , {"access" , 0ULL, 0, NULL} + , {"close" , 0ULL, 0, NULL} + , {"commit" , 0ULL, 0, NULL} + , {"create" , 0ULL, 0, NULL} + , {"delegpurge" , 0ULL, 0, NULL} + , {"delegreturn" , 0ULL, 0, NULL} + , {"getattr" , 0ULL, 0, NULL} + , {"getfh" , 0ULL, 0, NULL} + , {"link" , 0ULL, 0, NULL} + , {"lock" , 0ULL, 0, NULL} + , {"lockt" , 0ULL, 0, NULL} + , {"locku" , 0ULL, 0, NULL} + , {"lookup" , 0ULL, 0, NULL} + , {"lookup_root" , 0ULL, 0, NULL} + , {"nverify" , 0ULL, 0, NULL} + , {"open" , 0ULL, 0, NULL} + , {"openattr" , 0ULL, 0, NULL} + , {"open_confirm" , 0ULL, 0, NULL} + , {"open_downgrade" , 0ULL, 0, NULL} + , {"putfh" , 0ULL, 0, NULL} + , {"putpubfh" , 0ULL, 0, NULL} + , {"putrootfh" , 0ULL, 0, NULL} + , {"read" , 0ULL, 0, NULL} + , {"readdir" , 0ULL, 0, NULL} + , {"readlink" , 0ULL, 0, NULL} + , {"remove" , 0ULL, 0, NULL} + , {"rename" , 0ULL, 0, NULL} + , {"renew" , 0ULL, 0, NULL} + , {"restorefh" , 0ULL, 0, NULL} + , {"savefh" , 0ULL, 0, NULL} + , {"secinfo" , 0ULL, 0, NULL} + , {"setattr" , 0ULL, 0, NULL} + , {"setclientid" , 0ULL, 0, NULL} + , {"setclientid_confirm" , 0ULL, 0, NULL} + , {"verify" , 0ULL, 0, NULL} + , {"write" , 0ULL, 0, NULL} + , {"release_lockowner" , 0ULL, 0, NULL} + , + + /* nfs41 */ + { "backchannel_ctl" , 0ULL, 0, NULL} + , {"bind_conn_to_session", 0ULL, 0, NULL} + , {"exchange_id" , 0ULL, 0, NULL} + , {"create_session" , 0ULL, 0, NULL} + , {"destroy_session" , 0ULL, 0, NULL} + , {"free_stateid" , 0ULL, 0, NULL} + , {"get_dir_delegation" , 0ULL, 0, NULL} + , {"getdeviceinfo" , 0ULL, 0, NULL} + , {"getdevicelist" , 0ULL, 0, NULL} + , {"layoutcommit" , 0ULL, 0, NULL} + , {"layoutget" , 0ULL, 0, NULL} + , {"layoutreturn" , 0ULL, 0, NULL} + , {"secinfo_no_name" , 0ULL, 0, NULL} + , {"sequence" , 0ULL, 0, NULL} + , {"set_ssv" , 0ULL, 0, NULL} + , {"test_stateid" , 0ULL, 0, NULL} + , {"want_delegation" , 0ULL, 0, NULL} + , {"destroy_clientid" , 0ULL, 0, NULL} + , {"reclaim_complete" , 0ULL, 0, NULL} + , + + /* nfs42 */ + { "allocate" , 0ULL, 0, NULL} + , {"copy" , 0ULL, 0, NULL} + , {"copy_notify" , 0ULL, 0, NULL} + , {"deallocate" , 0ULL, 0, NULL} + , {"ioadvise" , 0ULL, 0, NULL} + , {"layouterror" , 0ULL, 0, NULL} + , {"layoutstats" , 0ULL, 0, NULL} + , {"offload_cancel" , 0ULL, 0, NULL} + , {"offload_status" , 0ULL, 0, NULL} + , {"read_plus" , 0ULL, 0, NULL} + , {"seek" , 0ULL, 0, NULL} + , {"write_same" , 0ULL, 0, NULL} + , + + /* termination */ + { "" , 0ULL, 0, NULL} +}; + + +int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { + (void)dt; + static procfile *ff = NULL; + static int do_rc = -1, do_fh = -1, do_io = -1, do_th = -1, do_net = -1, do_rpc = -1, do_proc2 = -1, do_proc3 = -1, do_proc4 = -1, do_proc4ops = -1; + static int proc2_warning = 0, proc3_warning = 0, proc4_warning = 0, proc4ops_warning = 0; + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/rpc/nfsd"); + ff = procfile_open(config_get("plugin:proc:/proc/net/rpc/nfsd", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + if(unlikely(do_rc == -1)) { + do_rc = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "read cache", 1); + do_fh = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "file handles", 1); + do_io = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "I/O", 1); + do_th = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "threads", 1); + do_net = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "network", 1); + do_rpc = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "rpc", 1); + do_proc2 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v2 procedures", 1); + do_proc3 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v3 procedures", 1); + do_proc4 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v4 procedures", 1); + do_proc4ops = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v4 operations", 1); + } + + // if they are enabled, reset them to 1 + // later we do them = 2 to avoid doing strcmp() for all lines + if(do_rc) do_rc = 1; + if(do_fh) do_fh = 1; + if(do_io) do_io = 1; + if(do_th) do_th = 1; + if(do_net) do_net = 1; + if(do_rpc) do_rpc = 1; + if(do_proc2) do_proc2 = 1; + if(do_proc3) do_proc3 = 1; + if(do_proc4) do_proc4 = 1; + if(do_proc4ops) do_proc4ops = 1; + + size_t lines = procfile_lines(ff), l; + + char *type; + unsigned long long rc_hits = 0, rc_misses = 0, rc_nocache = 0; + unsigned long long fh_stale = 0; + unsigned long long io_read = 0, io_write = 0; + unsigned long long th_threads = 0; + unsigned long long net_count = 0, net_udp_count = 0, net_tcp_count = 0, net_tcp_connections = 0; + unsigned long long rpc_calls = 0, rpc_bad_format = 0, rpc_bad_auth = 0, rpc_bad_client = 0; + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(!words)) continue; + + type = procfile_lineword(ff, l, 0); + + if(do_rc == 1 && strcmp(type, "rc") == 0) { + if(unlikely(words < 4)) { + error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 4); + continue; + } + + rc_hits = str2ull(procfile_lineword(ff, l, 1)); + rc_misses = str2ull(procfile_lineword(ff, l, 2)); + rc_nocache = str2ull(procfile_lineword(ff, l, 3)); + + unsigned long long sum = rc_hits + rc_misses + rc_nocache; + if(sum == 0ULL) do_rc = -1; + else do_rc = 2; + } + else if(do_fh == 1 && strcmp(type, "fh") == 0) { + if(unlikely(words < 6)) { + error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 6); + continue; + } + + fh_stale = str2ull(procfile_lineword(ff, l, 1)); + + // other file handler metrics were never used and are always zero + + if(fh_stale == 0ULL) do_fh = -1; + else do_fh = 2; + } + else if(do_io == 1 && strcmp(type, "io") == 0) { + if(unlikely(words < 3)) { + error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 3); + continue; + } + + io_read = str2ull(procfile_lineword(ff, l, 1)); + io_write = str2ull(procfile_lineword(ff, l, 2)); + + unsigned long long sum = io_read + io_write; + if(sum == 0ULL) do_io = -1; + else do_io = 2; + } + else if(do_th == 1 && strcmp(type, "th") == 0) { + if(unlikely(words < 13)) { + error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 13); + continue; + } + + th_threads = str2ull(procfile_lineword(ff, l, 1)); + + // thread histogram has been disabled since 2009 (kernel 2.6.30) + // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=8bbfa9f3889b643fc7de82c0c761ef17097f8faf + + do_th = 2; + } + else if(do_net == 1 && strcmp(type, "net") == 0) { + if(unlikely(words < 5)) { + error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 5); + continue; + } + + net_count = str2ull(procfile_lineword(ff, l, 1)); + net_udp_count = str2ull(procfile_lineword(ff, l, 2)); + net_tcp_count = str2ull(procfile_lineword(ff, l, 3)); + net_tcp_connections = str2ull(procfile_lineword(ff, l, 4)); + + unsigned long long sum = net_count + net_udp_count + net_tcp_count + net_tcp_connections; + if(sum == 0ULL) do_net = -1; + else do_net = 2; + } + else if(do_rpc == 1 && strcmp(type, "rpc") == 0) { + if(unlikely(words < 6)) { + error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 6); + continue; + } + + rpc_calls = str2ull(procfile_lineword(ff, l, 1)); + rpc_bad_format = str2ull(procfile_lineword(ff, l, 3)); + rpc_bad_auth = str2ull(procfile_lineword(ff, l, 4)); + rpc_bad_client = str2ull(procfile_lineword(ff, l, 5)); + + unsigned long long sum = rpc_calls + rpc_bad_format + rpc_bad_auth + rpc_bad_client; + if(sum == 0ULL) do_rpc = -1; + else do_rpc = 2; + } + else if(do_proc2 == 1 && strcmp(type, "proc2") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfsd_proc2_values[i].name[0] ; i++, j++) { + nfsd_proc2_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfsd_proc2_values[i].present = 1; + sum += nfsd_proc2_values[i].value; + } + + if(sum == 0ULL) { + if(!proc2_warning) { + error("Disabling /proc/net/rpc/nfsd v2 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc2_warning = 1; + } + do_proc2 = 0; + } + else do_proc2 = 2; + } + else if(do_proc3 == 1 && strcmp(type, "proc3") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfsd_proc3_values[i].name[0] ; i++, j++) { + nfsd_proc3_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfsd_proc3_values[i].present = 1; + sum += nfsd_proc3_values[i].value; + } + + if(sum == 0ULL) { + if(!proc3_warning) { + info("Disabling /proc/net/rpc/nfsd v3 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc3_warning = 1; + } + do_proc3 = 0; + } + else do_proc3 = 2; + } + else if(do_proc4 == 1 && strcmp(type, "proc4") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfsd_proc4_values[i].name[0] ; i++, j++) { + nfsd_proc4_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfsd_proc4_values[i].present = 1; + sum += nfsd_proc4_values[i].value; + } + + if(sum == 0ULL) { + if(!proc4_warning) { + info("Disabling /proc/net/rpc/nfsd v4 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc4_warning = 1; + } + do_proc4 = 0; + } + else do_proc4 = 2; + } + else if(do_proc4ops == 1 && strcmp(type, "proc4ops") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfsd4_ops_values[i].name[0] ; i++, j++) { + nfsd4_ops_values[i].value = str2ull(procfile_lineword(ff, l, j)); + nfsd4_ops_values[i].present = 1; + sum += nfsd4_ops_values[i].value; + } + + if(sum == 0ULL) { + if(!proc4ops_warning) { + info("Disabling /proc/net/rpc/nfsd v4 operations chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc4ops_warning = 1; + } + do_proc4ops = 0; + } + else do_proc4ops = 2; + } + } + + if(do_rc == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_hits = NULL, + *rd_misses = NULL, + *rd_nocache = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "readcache" + , NULL + , "cache" + , NULL + , "NFS Server Read Cache" + , "reads/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_READCACHE + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_hits = rrddim_add(st, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_misses = rrddim_add(st, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_nocache = rrddim_add(st, "nocache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_hits, rc_hits); + rrddim_set_by_pointer(st, rd_misses, rc_misses); + rrddim_set_by_pointer(st, rd_nocache, rc_nocache); + rrdset_done(st); + } + + if(do_fh == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_stale = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "filehandles" + , NULL + , "filehandles" + , NULL + , "NFS Server File Handles" + , "handles/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_FILEHANDLES + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_stale = rrddim_add(st, "stale", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_stale, fh_stale); + rrdset_done(st); + } + + if(do_io == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_read = NULL, + *rd_write = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "io" + , NULL + , "io" + , NULL + , "NFS Server I/O" + , "kilobytes/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_IO + , update_every + , RRDSET_TYPE_AREA + ); + + rd_read = rrddim_add(st, "read", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + rd_write = rrddim_add(st, "write", NULL, -1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_read, io_read); + rrddim_set_by_pointer(st, rd_write, io_write); + rrdset_done(st); + } + + if(do_th == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_threads = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "threads" + , NULL + , "threads" + , NULL + , "NFS Server Threads" + , "threads" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_THREADS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_threads = rrddim_add(st, "threads", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_threads, th_threads); + rrdset_done(st); + } + + if(do_net == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_udp = NULL, + *rd_tcp = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "net" + , NULL + , "network" + , NULL + , "NFS Server Network Statistics" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_NET + , update_every + , RRDSET_TYPE_STACKED + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_udp = rrddim_add(st, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_tcp = rrddim_add(st, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + // ignore net_count, net_tcp_connections + (void)net_count; + (void)net_tcp_connections; + + rrddim_set_by_pointer(st, rd_udp, net_udp_count); + rrddim_set_by_pointer(st, rd_tcp, net_tcp_count); + rrdset_done(st); + } + + if(do_rpc == 2) { + static RRDSET *st = NULL; + static RRDDIM *rd_calls = NULL, + *rd_bad_format = NULL, + *rd_bad_auth = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "rpc" + , NULL + , "rpc" + , NULL + , "NFS Server Remote Procedure Calls Statistics" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_RPC + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_calls = rrddim_add(st, "calls", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_bad_format = rrddim_add(st, "bad_format", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_bad_auth = rrddim_add(st, "bad_auth", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + // ignore rpc_bad_client + (void)rpc_bad_client; + + rrddim_set_by_pointer(st, rd_calls, rpc_calls); + rrddim_set_by_pointer(st, rd_bad_format, rpc_bad_format); + rrddim_set_by_pointer(st, rd_bad_auth, rpc_bad_auth); + rrdset_done(st); + } + + if(do_proc2 == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "proc2" + , NULL + , "nfsv2rpc" + , NULL + , "NFS v2 Server Remote Procedure Calls" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_PROC2 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfsd_proc2_values[i].present ; i++) { + if(unlikely(!nfsd_proc2_values[i].rd)) + nfsd_proc2_values[i].rd = rrddim_add(st, nfsd_proc2_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfsd_proc2_values[i].rd, nfsd_proc2_values[i].value); + } + + rrdset_done(st); + } + + if(do_proc3 == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "proc3" + , NULL + , "nfsv3rpc" + , NULL + , "NFS v3 Server Remote Procedure Calls" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_PROC3 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfsd_proc3_values[i].present ; i++) { + if(unlikely(!nfsd_proc3_values[i].rd)) + nfsd_proc3_values[i].rd = rrddim_add(st, nfsd_proc3_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfsd_proc3_values[i].rd, nfsd_proc3_values[i].value); + } + + rrdset_done(st); + } + + if(do_proc4 == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "proc4" + , NULL + , "nfsv4rpc" + , NULL + , "NFS v4 Server Remote Procedure Calls" + , "calls/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_PROC4 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfsd_proc4_values[i].present ; i++) { + if(unlikely(!nfsd_proc4_values[i].rd)) + nfsd_proc4_values[i].rd = rrddim_add(st, nfsd_proc4_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfsd_proc4_values[i].rd, nfsd_proc4_values[i].value); + } + + rrdset_done(st); + } + + if(do_proc4ops == 2) { + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "proc4ops" + , NULL + , "nfsv4ops" + , NULL + , "NFS v4 Server Operations" + , "operations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_PROC4OPS + , update_every + , RRDSET_TYPE_STACKED + ); + } + + size_t i; + for(i = 0; nfsd4_ops_values[i].present ; i++) { + if(unlikely(!nfsd4_ops_values[i].rd)) + nfsd4_ops_values[i].rd = rrddim_add(st, nfsd4_ops_values[i].name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st, nfsd4_ops_values[i].rd, nfsd4_ops_values[i].value); + } + + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_sctp_snmp.c b/collectors/proc.plugin/proc_net_sctp_snmp.c new file mode 100644 index 0000000..292449a --- /dev/null +++ b/collectors/proc.plugin/proc_net_sctp_snmp.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" +#define PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME "/proc/net/sctp/snmp" + +int do_proc_net_sctp_snmp(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + + static int + do_associations = -1, + do_transitions = -1, + do_packet_errors = -1, + do_packets = -1, + do_fragmentation = -1, + do_chunk_types = -1; + + static ARL_BASE *arl_base = NULL; + + static unsigned long long SctpCurrEstab = 0ULL; + static unsigned long long SctpActiveEstabs = 0ULL; + static unsigned long long SctpPassiveEstabs = 0ULL; + static unsigned long long SctpAborteds = 0ULL; + static unsigned long long SctpShutdowns = 0ULL; + static unsigned long long SctpOutOfBlues = 0ULL; + static unsigned long long SctpChecksumErrors = 0ULL; + static unsigned long long SctpOutCtrlChunks = 0ULL; + static unsigned long long SctpOutOrderChunks = 0ULL; + static unsigned long long SctpOutUnorderChunks = 0ULL; + static unsigned long long SctpInCtrlChunks = 0ULL; + static unsigned long long SctpInOrderChunks = 0ULL; + static unsigned long long SctpInUnorderChunks = 0ULL; + static unsigned long long SctpFragUsrMsgs = 0ULL; + static unsigned long long SctpReasmUsrMsgs = 0ULL; + static unsigned long long SctpOutSCTPPacks = 0ULL; + static unsigned long long SctpInSCTPPacks = 0ULL; + static unsigned long long SctpT1InitExpireds = 0ULL; + static unsigned long long SctpT1CookieExpireds = 0ULL; + static unsigned long long SctpT2ShutdownExpireds = 0ULL; + static unsigned long long SctpT3RtxExpireds = 0ULL; + static unsigned long long SctpT4RtoExpireds = 0ULL; + static unsigned long long SctpT5ShutdownGuardExpireds = 0ULL; + static unsigned long long SctpDelaySackExpireds = 0ULL; + static unsigned long long SctpAutocloseExpireds = 0ULL; + static unsigned long long SctpT3Retransmits = 0ULL; + static unsigned long long SctpPmtudRetransmits = 0ULL; + static unsigned long long SctpFastRetransmits = 0ULL; + static unsigned long long SctpInPktSoftirq = 0ULL; + static unsigned long long SctpInPktBacklog = 0ULL; + static unsigned long long SctpInPktDiscards = 0ULL; + static unsigned long long SctpInDataChunkDiscards = 0ULL; + + if(unlikely(!arl_base)) { + do_associations = config_get_boolean_ondemand("plugin:proc:/proc/net/sctp/snmp", "established associations", CONFIG_BOOLEAN_AUTO); + do_transitions = config_get_boolean_ondemand("plugin:proc:/proc/net/sctp/snmp", "association transitions", CONFIG_BOOLEAN_AUTO); + do_fragmentation = config_get_boolean_ondemand("plugin:proc:/proc/net/sctp/snmp", "fragmentation", CONFIG_BOOLEAN_AUTO); + do_packets = config_get_boolean_ondemand("plugin:proc:/proc/net/sctp/snmp", "packets", CONFIG_BOOLEAN_AUTO); + do_packet_errors = config_get_boolean_ondemand("plugin:proc:/proc/net/sctp/snmp", "packet errors", CONFIG_BOOLEAN_AUTO); + do_chunk_types = config_get_boolean_ondemand("plugin:proc:/proc/net/sctp/snmp", "chunk types", CONFIG_BOOLEAN_AUTO); + + arl_base = arl_create("sctp", NULL, 60); + arl_expect(arl_base, "SctpCurrEstab", &SctpCurrEstab); + arl_expect(arl_base, "SctpActiveEstabs", &SctpActiveEstabs); + arl_expect(arl_base, "SctpPassiveEstabs", &SctpPassiveEstabs); + arl_expect(arl_base, "SctpAborteds", &SctpAborteds); + arl_expect(arl_base, "SctpShutdowns", &SctpShutdowns); + arl_expect(arl_base, "SctpOutOfBlues", &SctpOutOfBlues); + arl_expect(arl_base, "SctpChecksumErrors", &SctpChecksumErrors); + arl_expect(arl_base, "SctpOutCtrlChunks", &SctpOutCtrlChunks); + arl_expect(arl_base, "SctpOutOrderChunks", &SctpOutOrderChunks); + arl_expect(arl_base, "SctpOutUnorderChunks", &SctpOutUnorderChunks); + arl_expect(arl_base, "SctpInCtrlChunks", &SctpInCtrlChunks); + arl_expect(arl_base, "SctpInOrderChunks", &SctpInOrderChunks); + arl_expect(arl_base, "SctpInUnorderChunks", &SctpInUnorderChunks); + arl_expect(arl_base, "SctpFragUsrMsgs", &SctpFragUsrMsgs); + arl_expect(arl_base, "SctpReasmUsrMsgs", &SctpReasmUsrMsgs); + arl_expect(arl_base, "SctpOutSCTPPacks", &SctpOutSCTPPacks); + arl_expect(arl_base, "SctpInSCTPPacks", &SctpInSCTPPacks); + arl_expect(arl_base, "SctpT1InitExpireds", &SctpT1InitExpireds); + arl_expect(arl_base, "SctpT1CookieExpireds", &SctpT1CookieExpireds); + arl_expect(arl_base, "SctpT2ShutdownExpireds", &SctpT2ShutdownExpireds); + arl_expect(arl_base, "SctpT3RtxExpireds", &SctpT3RtxExpireds); + arl_expect(arl_base, "SctpT4RtoExpireds", &SctpT4RtoExpireds); + arl_expect(arl_base, "SctpT5ShutdownGuardExpireds", &SctpT5ShutdownGuardExpireds); + arl_expect(arl_base, "SctpDelaySackExpireds", &SctpDelaySackExpireds); + arl_expect(arl_base, "SctpAutocloseExpireds", &SctpAutocloseExpireds); + arl_expect(arl_base, "SctpT3Retransmits", &SctpT3Retransmits); + arl_expect(arl_base, "SctpPmtudRetransmits", &SctpPmtudRetransmits); + arl_expect(arl_base, "SctpFastRetransmits", &SctpFastRetransmits); + arl_expect(arl_base, "SctpInPktSoftirq", &SctpInPktSoftirq); + arl_expect(arl_base, "SctpInPktBacklog", &SctpInPktBacklog); + arl_expect(arl_base, "SctpInPktDiscards", &SctpInPktDiscards); + arl_expect(arl_base, "SctpInDataChunkDiscards", &SctpInDataChunkDiscards); + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/sctp/snmp"); + ff = procfile_open(config_get("plugin:proc:/proc/net/sctp/snmp", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) + return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + arl_begin(arl_base); + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 2)) { + if(unlikely(words)) error("Cannot read /proc/net/sctp/snmp line %zu. Expected 2 params, read %zu.", l, words); + continue; + } + + if(unlikely(arl_check(arl_base, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; + } + + // -------------------------------------------------------------------- + + if(do_associations == CONFIG_BOOLEAN_YES || (do_associations == CONFIG_BOOLEAN_AUTO && + (SctpCurrEstab || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_associations = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_established = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "sctp" + , "established" + , NULL + , "associations" + , NULL + , "SCTP current total number of established associations" + , "associations" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME + , NETDATA_CHART_PRIO_SCTP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_established = rrddim_add(st, "SctpCurrEstab", "established", 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_established, SctpCurrEstab); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_transitions == CONFIG_BOOLEAN_YES || (do_transitions == CONFIG_BOOLEAN_AUTO && + (SctpActiveEstabs || + SctpPassiveEstabs || + SctpAborteds || + SctpShutdowns || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_transitions = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_active = NULL, + *rd_passive = NULL, + *rd_aborted = NULL, + *rd_shutdown = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "sctp" + , "transitions" + , NULL + , "transitions" + , NULL + , "SCTP Association Transitions" + , "transitions/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME + , NETDATA_CHART_PRIO_SCTP + 10 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_active = rrddim_add(st, "SctpActiveEstabs", "active", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_passive = rrddim_add(st, "SctpPassiveEstabs", "passive", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_aborted = rrddim_add(st, "SctpAborteds", "aborted", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_shutdown = rrddim_add(st, "SctpShutdowns", "shutdown", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_active, SctpActiveEstabs); + rrddim_set_by_pointer(st, rd_passive, SctpPassiveEstabs); + rrddim_set_by_pointer(st, rd_aborted, SctpAborteds); + rrddim_set_by_pointer(st, rd_shutdown, SctpShutdowns); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_packets == CONFIG_BOOLEAN_YES || (do_packets == CONFIG_BOOLEAN_AUTO && + (SctpInSCTPPacks || + SctpOutSCTPPacks || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_packets = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, + *rd_sent = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "sctp" + , "packets" + , NULL + , "packets" + , NULL + , "SCTP Packets" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME + , NETDATA_CHART_PRIO_SCTP + 20 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_received = rrddim_add(st, "SctpInSCTPPacks", "received", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "SctpOutSCTPPacks", "sent", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, SctpInSCTPPacks); + rrddim_set_by_pointer(st, rd_sent, SctpOutSCTPPacks); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_packet_errors == CONFIG_BOOLEAN_YES || (do_packet_errors == CONFIG_BOOLEAN_AUTO && + (SctpOutOfBlues || + SctpChecksumErrors || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_packet_errors = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM *rd_invalid = NULL, + *rd_csum = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "sctp" + , "packet_errors" + , NULL + , "packets" + , NULL + , "SCTP Packet Errors" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME + , NETDATA_CHART_PRIO_SCTP + 30 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_invalid = rrddim_add(st, "SctpOutOfBlues", "invalid", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_csum = rrddim_add(st, "SctpChecksumErrors", "checksum", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_invalid, SctpOutOfBlues); + rrddim_set_by_pointer(st, rd_csum, SctpChecksumErrors); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_fragmentation == CONFIG_BOOLEAN_YES || (do_fragmentation == CONFIG_BOOLEAN_AUTO && + (SctpFragUsrMsgs || + SctpReasmUsrMsgs || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_fragmentation = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_fragmented = NULL, + *rd_reassembled = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "sctp" + , "fragmentation" + , NULL + , "fragmentation" + , NULL + , "SCTP Fragmentation" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME + , NETDATA_CHART_PRIO_SCTP + 40 + , update_every + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_reassembled = rrddim_add(st, "SctpReasmUsrMsgs", "reassembled", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fragmented = rrddim_add(st, "SctpFragUsrMsgs", "fragmented", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_reassembled, SctpReasmUsrMsgs); + rrddim_set_by_pointer(st, rd_fragmented, SctpFragUsrMsgs); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_chunk_types == CONFIG_BOOLEAN_YES || (do_chunk_types == CONFIG_BOOLEAN_AUTO && + (SctpInCtrlChunks || + SctpInOrderChunks || + SctpInUnorderChunks || + SctpOutCtrlChunks || + SctpOutOrderChunks || + SctpOutUnorderChunks || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_chunk_types = CONFIG_BOOLEAN_YES; + static RRDSET *st = NULL; + static RRDDIM + *rd_InCtrl = NULL, + *rd_InOrder = NULL, + *rd_InUnorder = NULL, + *rd_OutCtrl = NULL, + *rd_OutOrder = NULL, + *rd_OutUnorder = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "sctp" + , "chunks" + , NULL + , "chunks" + , NULL + , "SCTP Chunk Types" + , "chunks/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SCTP_SNMP_NAME + , NETDATA_CHART_PRIO_SCTP + 50 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_InCtrl = rrddim_add(st, "SctpInCtrlChunks", "InCtrl", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InOrder = rrddim_add(st, "SctpInOrderChunks", "InOrder", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_InUnorder = rrddim_add(st, "SctpInUnorderChunks", "InUnorder", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutCtrl = rrddim_add(st, "SctpOutCtrlChunks", "OutCtrl", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutOrder = rrddim_add(st, "SctpOutOrderChunks", "OutOrder", -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutUnorder = rrddim_add(st, "SctpOutUnorderChunks", "OutUnorder", -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_InCtrl, SctpInCtrlChunks); + rrddim_set_by_pointer(st, rd_InOrder, SctpInOrderChunks); + rrddim_set_by_pointer(st, rd_InUnorder, SctpInUnorderChunks); + rrddim_set_by_pointer(st, rd_OutCtrl, SctpOutCtrlChunks); + rrddim_set_by_pointer(st, rd_OutOrder, SctpOutOrderChunks); + rrddim_set_by_pointer(st, rd_OutUnorder, SctpOutUnorderChunks); + rrdset_done(st); + } + + return 0; +} + diff --git a/collectors/proc.plugin/proc_net_sockstat.c b/collectors/proc.plugin/proc_net_sockstat.c new file mode 100644 index 0000000..e94b891 --- /dev/null +++ b/collectors/proc.plugin/proc_net_sockstat.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME "/proc/net/sockstat" + +static struct proc_net_sockstat { + kernel_uint_t sockets_used; + + kernel_uint_t tcp_inuse; + kernel_uint_t tcp_orphan; + kernel_uint_t tcp_tw; + kernel_uint_t tcp_alloc; + kernel_uint_t tcp_mem; + + kernel_uint_t udp_inuse; + kernel_uint_t udp_mem; + + kernel_uint_t udplite_inuse; + + kernel_uint_t raw_inuse; + + kernel_uint_t frag_inuse; + kernel_uint_t frag_memory; +} sockstat_root = { 0 }; + + +static int read_tcp_mem(void) { + static char *filename = NULL; + static const RRDVAR_ACQUIRED *tcp_mem_low_threshold = NULL, + *tcp_mem_pressure_threshold = NULL, + *tcp_mem_high_threshold = NULL; + + if(unlikely(!tcp_mem_low_threshold)) { + tcp_mem_low_threshold = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_mem_low"); + tcp_mem_pressure_threshold = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_mem_pressure"); + tcp_mem_high_threshold = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_mem_high"); + } + + if(unlikely(!filename)) { + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "%s/proc/sys/net/ipv4/tcp_mem", netdata_configured_host_prefix); + filename = strdupz(buffer); + } + + char buffer[200 + 1], *start, *end; + if(read_file(filename, buffer, 200) != 0) return 1; + buffer[200] = '\0'; + + unsigned long long low = 0, pressure = 0, high = 0; + + start = buffer; + low = strtoull(start, &end, 10); + + start = end; + pressure = strtoull(start, &end, 10); + + start = end; + high = strtoull(start, &end, 10); + + // fprintf(stderr, "TCP MEM low = %llu, pressure = %llu, high = %llu\n", low, pressure, high); + + rrdvar_custom_host_variable_set(localhost, tcp_mem_low_threshold, low * sysconf(_SC_PAGESIZE) / 1024.0); + rrdvar_custom_host_variable_set(localhost, tcp_mem_pressure_threshold, pressure * sysconf(_SC_PAGESIZE) / 1024.0); + rrdvar_custom_host_variable_set(localhost, tcp_mem_high_threshold, high * sysconf(_SC_PAGESIZE) / 1024.0); + + return 0; +} + +static kernel_uint_t read_tcp_max_orphans(void) { + static char *filename = NULL; + static const RRDVAR_ACQUIRED *tcp_max_orphans_var = NULL; + + if(unlikely(!filename)) { + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "%s/proc/sys/net/ipv4/tcp_max_orphans", netdata_configured_host_prefix); + filename = strdupz(buffer); + } + + unsigned long long tcp_max_orphans = 0; + if(read_single_number_file(filename, &tcp_max_orphans) == 0) { + + if(unlikely(!tcp_max_orphans_var)) + tcp_max_orphans_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_max_orphans"); + + rrdvar_custom_host_variable_set(localhost, tcp_max_orphans_var, tcp_max_orphans); + return tcp_max_orphans; + } + + return 0; +} + +int do_proc_net_sockstat(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + + static uint32_t hash_sockets = 0, + hash_raw = 0, + hash_frag = 0, + hash_tcp = 0, + hash_udp = 0, + hash_udplite = 0; + + static long long update_constants_every = 60, update_constants_count = 0; + + static ARL_BASE *arl_sockets = NULL; + static ARL_BASE *arl_tcp = NULL; + static ARL_BASE *arl_udp = NULL; + static ARL_BASE *arl_udplite = NULL; + static ARL_BASE *arl_raw = NULL; + static ARL_BASE *arl_frag = NULL; + + static int do_sockets = -1, do_tcp_sockets = -1, do_tcp_mem = -1, do_udp_sockets = -1, do_udp_mem = -1, do_udplite_sockets = -1, do_raw_sockets = -1, do_frag_sockets = -1, do_frag_mem = -1; + + static char *keys[7] = { NULL }; + static uint32_t hashes[7] = { 0 }; + static ARL_BASE *bases[7] = { NULL }; + + if(unlikely(!arl_sockets)) { + do_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 sockets", CONFIG_BOOLEAN_AUTO); + do_tcp_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 TCP sockets", CONFIG_BOOLEAN_AUTO); + do_tcp_mem = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 TCP memory", CONFIG_BOOLEAN_AUTO); + do_udp_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 UDP sockets", CONFIG_BOOLEAN_AUTO); + do_udp_mem = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 UDP memory", CONFIG_BOOLEAN_AUTO); + do_udplite_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 UDPLITE sockets", CONFIG_BOOLEAN_AUTO); + do_raw_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 RAW sockets", CONFIG_BOOLEAN_AUTO); + do_frag_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 FRAG sockets", CONFIG_BOOLEAN_AUTO); + do_frag_mem = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat", "ipv4 FRAG memory", CONFIG_BOOLEAN_AUTO); + + update_constants_every = config_get_number("plugin:proc:/proc/net/sockstat", "update constants every", update_constants_every); + update_constants_count = update_constants_every; + + arl_sockets = arl_create("sockstat/sockets", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_sockets, "used", &sockstat_root.sockets_used); + + arl_tcp = arl_create("sockstat/TCP", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_tcp, "inuse", &sockstat_root.tcp_inuse); + arl_expect(arl_tcp, "orphan", &sockstat_root.tcp_orphan); + arl_expect(arl_tcp, "tw", &sockstat_root.tcp_tw); + arl_expect(arl_tcp, "alloc", &sockstat_root.tcp_alloc); + arl_expect(arl_tcp, "mem", &sockstat_root.tcp_mem); + + arl_udp = arl_create("sockstat/UDP", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_udp, "inuse", &sockstat_root.udp_inuse); + arl_expect(arl_udp, "mem", &sockstat_root.udp_mem); + + arl_udplite = arl_create("sockstat/UDPLITE", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_udplite, "inuse", &sockstat_root.udplite_inuse); + + arl_raw = arl_create("sockstat/RAW", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_raw, "inuse", &sockstat_root.raw_inuse); + + arl_frag = arl_create("sockstat/FRAG", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_frag, "inuse", &sockstat_root.frag_inuse); + arl_expect(arl_frag, "memory", &sockstat_root.frag_memory); + + hash_sockets = simple_hash("sockets"); + hash_tcp = simple_hash("TCP"); + hash_udp = simple_hash("UDP"); + hash_udplite = simple_hash("UDPLITE"); + hash_raw = simple_hash("RAW"); + hash_frag = simple_hash("FRAG"); + + keys[0] = "sockets"; hashes[0] = hash_sockets; bases[0] = arl_sockets; + keys[1] = "TCP"; hashes[1] = hash_tcp; bases[1] = arl_tcp; + keys[2] = "UDP"; hashes[2] = hash_udp; bases[2] = arl_udp; + keys[3] = "UDPLITE"; hashes[3] = hash_udplite; bases[3] = arl_udplite; + keys[4] = "RAW"; hashes[4] = hash_raw; bases[4] = arl_raw; + keys[5] = "FRAG"; hashes[5] = hash_frag; bases[5] = arl_frag; + keys[6] = NULL; // terminator + } + + update_constants_count += update_every; + if(unlikely(update_constants_count > update_constants_every)) { + read_tcp_max_orphans(); + read_tcp_mem(); + update_constants_count = 0; + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/sockstat"); + ff = procfile_open(config_get("plugin:proc:/proc/net/sockstat", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + char *key = procfile_lineword(ff, l, 0); + uint32_t hash = simple_hash(key); + + int k; + for(k = 0; keys[k] ; k++) { + if(unlikely(hash == hashes[k] && strcmp(key, keys[k]) == 0)) { + // fprintf(stderr, "KEY: '%s', l=%zu, w=1, words=%zu\n", key, l, words); + ARL_BASE *arl = bases[k]; + arl_begin(arl); + size_t w = 1; + + while(w + 1 < words) { + char *name = procfile_lineword(ff, l, w); w++; + char *value = procfile_lineword(ff, l, w); w++; + // fprintf(stderr, " > NAME '%s', VALUE '%s', l=%zu, w=%zu, words=%zu\n", name, value, l, w, words); + if(unlikely(arl_check(arl, name, value) != 0)) + break; + } + + break; + } + } + } + + // ------------------------------------------------------------------------ + + if(do_sockets == CONFIG_BOOLEAN_YES || (do_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat_root.sockets_used || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_used = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_sockets" + , NULL + , "sockets" + , NULL + , "IPv4 Sockets Used" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_SOCKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_used = rrddim_add(st, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_used, (collected_number)sockstat_root.sockets_used); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_tcp_sockets == CONFIG_BOOLEAN_YES || (do_tcp_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat_root.tcp_inuse || + sockstat_root.tcp_orphan || + sockstat_root.tcp_tw || + sockstat_root.tcp_alloc || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL, + *rd_orphan = NULL, + *rd_timewait = NULL, + *rd_alloc = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_tcp_sockets" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP_SOCKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_alloc = rrddim_add(st, "alloc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_orphan = rrddim_add(st, "orphan", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_timewait = rrddim_add(st, "timewait", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat_root.tcp_inuse); + rrddim_set_by_pointer(st, rd_orphan, (collected_number)sockstat_root.tcp_orphan); + rrddim_set_by_pointer(st, rd_timewait, (collected_number)sockstat_root.tcp_tw); + rrddim_set_by_pointer(st, rd_alloc, (collected_number)sockstat_root.tcp_alloc); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_tcp_mem == CONFIG_BOOLEAN_YES || (do_tcp_mem == CONFIG_BOOLEAN_AUTO && + (sockstat_root.tcp_mem || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_mem = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_mem = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_tcp_mem" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Sockets Memory" + , "KiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_TCP_MEM + , update_every + , RRDSET_TYPE_AREA + ); + + rd_mem = rrddim_add(st, "mem", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_mem, (collected_number)sockstat_root.tcp_mem); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_udp_sockets == CONFIG_BOOLEAN_YES || (do_udp_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat_root.udp_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udp_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_udp_sockets" + , NULL + , "udp" + , NULL + , "IPv4 UDP Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat_root.udp_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_udp_mem == CONFIG_BOOLEAN_YES || (do_udp_mem == CONFIG_BOOLEAN_AUTO && + (sockstat_root.udp_mem || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udp_mem = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_mem = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_udp_mem" + , NULL + , "udp" + , NULL + , "IPv4 UDP Sockets Memory" + , "KiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDP_MEM + , update_every + , RRDSET_TYPE_AREA + ); + + rd_mem = rrddim_add(st, "mem", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_mem, (collected_number)sockstat_root.udp_mem); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_udplite_sockets == CONFIG_BOOLEAN_YES || (do_udplite_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat_root.udplite_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udplite_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_udplite_sockets" + , NULL + , "udplite" + , NULL + , "IPv4 UDPLITE Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_UDPLITE + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat_root.udplite_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_raw_sockets == CONFIG_BOOLEAN_YES || (do_raw_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat_root.raw_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_raw_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_raw_sockets" + , NULL + , "raw" + , NULL + , "IPv4 RAW Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_RAW + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat_root.raw_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_frag_sockets == CONFIG_BOOLEAN_YES || (do_frag_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat_root.frag_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_frag_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_frag_sockets" + , NULL + , "fragments" + , NULL + , "IPv4 FRAG Sockets" + , "fragments" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_FRAGMENTS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat_root.frag_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_frag_mem == CONFIG_BOOLEAN_YES || (do_frag_mem == CONFIG_BOOLEAN_AUTO && + (sockstat_root.frag_memory || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_frag_mem = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_mem = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv4" + , "sockstat_frag_mem" + , NULL + , "fragments" + , NULL + , "IPv4 FRAG Sockets Memory" + , "KiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME + , NETDATA_CHART_PRIO_IPV4_FRAGMENTS_MEM + , update_every + , RRDSET_TYPE_AREA + ); + + rd_mem = rrddim_add(st, "mem", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_mem, (collected_number)sockstat_root.frag_memory); + rrdset_done(st); + } + + return 0; +} + diff --git a/collectors/proc.plugin/proc_net_sockstat6.c b/collectors/proc.plugin/proc_net_sockstat6.c new file mode 100644 index 0000000..065cf60 --- /dev/null +++ b/collectors/proc.plugin/proc_net_sockstat6.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME "/proc/net/sockstat6" + +static struct proc_net_sockstat6 { + kernel_uint_t tcp6_inuse; + kernel_uint_t udp6_inuse; + kernel_uint_t udplite6_inuse; + kernel_uint_t raw6_inuse; + kernel_uint_t frag6_inuse; +} sockstat6_root = { 0 }; + +int do_proc_net_sockstat6(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + + static uint32_t hash_raw = 0, + hash_frag = 0, + hash_tcp = 0, + hash_udp = 0, + hash_udplite = 0; + + static ARL_BASE *arl_tcp = NULL; + static ARL_BASE *arl_udp = NULL; + static ARL_BASE *arl_udplite = NULL; + static ARL_BASE *arl_raw = NULL; + static ARL_BASE *arl_frag = NULL; + + static int do_tcp_sockets = -1, do_udp_sockets = -1, do_udplite_sockets = -1, do_raw_sockets = -1, do_frag_sockets = -1; + + static char *keys[6] = { NULL }; + static uint32_t hashes[6] = { 0 }; + static ARL_BASE *bases[6] = { NULL }; + + if(unlikely(!arl_tcp)) { + do_tcp_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat6", "ipv6 TCP sockets", CONFIG_BOOLEAN_AUTO); + do_udp_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat6", "ipv6 UDP sockets", CONFIG_BOOLEAN_AUTO); + do_udplite_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat6", "ipv6 UDPLITE sockets", CONFIG_BOOLEAN_AUTO); + do_raw_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat6", "ipv6 RAW sockets", CONFIG_BOOLEAN_AUTO); + do_frag_sockets = config_get_boolean_ondemand("plugin:proc:/proc/net/sockstat6", "ipv6 FRAG sockets", CONFIG_BOOLEAN_AUTO); + + arl_tcp = arl_create("sockstat6/TCP6", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_tcp, "inuse", &sockstat6_root.tcp6_inuse); + + arl_udp = arl_create("sockstat6/UDP6", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_udp, "inuse", &sockstat6_root.udp6_inuse); + + arl_udplite = arl_create("sockstat6/UDPLITE6", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_udplite, "inuse", &sockstat6_root.udplite6_inuse); + + arl_raw = arl_create("sockstat6/RAW6", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_raw, "inuse", &sockstat6_root.raw6_inuse); + + arl_frag = arl_create("sockstat6/FRAG6", arl_callback_str2kernel_uint_t, 60); + arl_expect(arl_frag, "inuse", &sockstat6_root.frag6_inuse); + + hash_tcp = simple_hash("TCP6"); + hash_udp = simple_hash("UDP6"); + hash_udplite = simple_hash("UDPLITE6"); + hash_raw = simple_hash("RAW6"); + hash_frag = simple_hash("FRAG6"); + + keys[0] = "TCP6"; hashes[0] = hash_tcp; bases[0] = arl_tcp; + keys[1] = "UDP6"; hashes[1] = hash_udp; bases[1] = arl_udp; + keys[2] = "UDPLITE6"; hashes[2] = hash_udplite; bases[2] = arl_udplite; + keys[3] = "RAW6"; hashes[3] = hash_raw; bases[3] = arl_raw; + keys[4] = "FRAG6"; hashes[4] = hash_frag; bases[4] = arl_frag; + keys[5] = NULL; // terminator + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/sockstat6"); + ff = procfile_open(config_get("plugin:proc:/proc/net/sockstat6", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + char *key = procfile_lineword(ff, l, 0); + uint32_t hash = simple_hash(key); + + int k; + for(k = 0; keys[k] ; k++) { + if(unlikely(hash == hashes[k] && strcmp(key, keys[k]) == 0)) { + // fprintf(stderr, "KEY: '%s', l=%zu, w=1, words=%zu\n", key, l, words); + ARL_BASE *arl = bases[k]; + arl_begin(arl); + size_t w = 1; + + while(w + 1 < words) { + char *name = procfile_lineword(ff, l, w); w++; + char *value = procfile_lineword(ff, l, w); w++; + // fprintf(stderr, " > NAME '%s', VALUE '%s', l=%zu, w=%zu, words=%zu\n", name, value, l, w, words); + if(unlikely(arl_check(arl, name, value) != 0)) + break; + } + + break; + } + } + } + + // ------------------------------------------------------------------------ + + if(do_tcp_sockets == CONFIG_BOOLEAN_YES || (do_tcp_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat6_root.tcp6_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_tcp_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "sockstat6_tcp_sockets" + , NULL + , "tcp6" + , NULL + , "IPv6 TCP Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME + , NETDATA_CHART_PRIO_IPV6_TCP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat6_root.tcp6_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_udp_sockets == CONFIG_BOOLEAN_YES || (do_udp_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat6_root.udp6_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udp_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "sockstat6_udp_sockets" + , NULL + , "udp6" + , NULL + , "IPv6 UDP Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME + , NETDATA_CHART_PRIO_IPV6_UDP + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat6_root.udp6_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_udplite_sockets == CONFIG_BOOLEAN_YES || (do_udplite_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat6_root.udplite6_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_udplite_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "sockstat6_udplite_sockets" + , NULL + , "udplite6" + , NULL + , "IPv6 UDPLITE Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME + , NETDATA_CHART_PRIO_IPV6_UDPLITE + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat6_root.udplite6_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_raw_sockets == CONFIG_BOOLEAN_YES || (do_raw_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat6_root.raw6_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_raw_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "sockstat6_raw_sockets" + , NULL + , "raw6" + , NULL + , "IPv6 RAW Sockets" + , "sockets" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME + , NETDATA_CHART_PRIO_IPV6_RAW + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat6_root.raw6_inuse); + rrdset_done(st); + } + + // ------------------------------------------------------------------------ + + if(do_frag_sockets == CONFIG_BOOLEAN_YES || (do_frag_sockets == CONFIG_BOOLEAN_AUTO && + (sockstat6_root.frag6_inuse || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_frag_sockets = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + static RRDDIM *rd_inuse = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "ipv6" + , "sockstat6_frag_sockets" + , NULL + , "fragments6" + , NULL + , "IPv6 FRAG Sockets" + , "fragments" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME + , NETDATA_CHART_PRIO_IPV6_FRAGMENTS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_inuse = rrddim_add(st, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_inuse, (collected_number)sockstat6_root.frag6_inuse); + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_softnet_stat.c b/collectors/proc.plugin/proc_net_softnet_stat.c new file mode 100644 index 0000000..6523924 --- /dev/null +++ b/collectors/proc.plugin/proc_net_softnet_stat.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NET_SOFTNET_NAME "/proc/net/softnet_stat" + +static inline char *softnet_column_name(size_t column) { + switch(column) { + // https://github.com/torvalds/linux/blob/a7fd20d1c476af4563e66865213474a2f9f473a4/net/core/net-procfs.c#L161-L166 + case 0: return "processed"; + case 1: return "dropped"; + case 2: return "squeezed"; + case 9: return "received_rps"; + case 10: return "flow_limit_count"; + default: return NULL; + } +} + +int do_proc_net_softnet_stat(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_per_core = -1; + static size_t allocated_lines = 0, allocated_columns = 0; + static uint32_t *data = NULL; + + if(unlikely(do_per_core == -1)) do_per_core = config_get_boolean("plugin:proc:/proc/net/softnet_stat", "softnet_stat per core", 1); + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/softnet_stat"); + ff = procfile_open(config_get("plugin:proc:/proc/net/softnet_stat", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + size_t words = procfile_linewords(ff, 0), w; + + if(unlikely(!lines || !words)) { + error("Cannot read /proc/net/softnet_stat, %zu lines and %zu columns reported.", lines, words); + return 1; + } + + if(unlikely(lines > 200)) lines = 200; + if(unlikely(words > 50)) words = 50; + + if(unlikely(!data || lines > allocated_lines || words > allocated_columns)) { + freez(data); + allocated_lines = lines; + allocated_columns = words; + data = mallocz((allocated_lines + 1) * allocated_columns * sizeof(uint32_t)); + } + + // initialize to zero + memset(data, 0, (allocated_lines + 1) * allocated_columns * sizeof(uint32_t)); + + // parse the values + for(l = 0; l < lines ;l++) { + words = procfile_linewords(ff, l); + if(unlikely(!words)) continue; + + if(unlikely(words > allocated_columns)) + words = allocated_columns; + + for(w = 0; w < words ; w++) { + if(unlikely(softnet_column_name(w))) { + uint32_t t = (uint32_t)strtoul(procfile_lineword(ff, l, w), NULL, 16); + data[w] += t; + data[((l + 1) * allocated_columns) + w] = t; + } + } + } + + if(unlikely(data[(lines * allocated_columns)] == 0)) + lines--; + + RRDSET *st; + + // -------------------------------------------------------------------- + + st = rrdset_find_active_bytype_localhost("system", "softnet_stat"); + if(unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "softnet_stat" + , NULL + , "softnet_stat" + , "system.softnet_stat" + , "System softnet_stat" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOFTNET_NAME + , NETDATA_CHART_PRIO_SYSTEM_SOFTNET_STAT + , update_every + , RRDSET_TYPE_LINE + ); + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_set(st, softnet_column_name(w), data[w]); + + rrdset_done(st); + + if(do_per_core) { + for(l = 0; l < lines ;l++) { + char id[50+1]; + snprintfz(id, 50, "cpu%zu_softnet_stat", l); + + st = rrdset_find_active_bytype_localhost("cpu", id); + if(unlikely(!st)) { + char title[100+1]; + snprintfz(title, 100, "CPU softnet_stat"); + + st = rrdset_create_localhost( + "cpu" + , id + , NULL + , "softnet_stat" + , "cpu.softnet_stat" + , title + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NET_SOFTNET_NAME + , NETDATA_CHART_PRIO_SOFTNET_PER_CORE + l + , update_every + , RRDSET_TYPE_LINE + ); + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_set(st, softnet_column_name(w), data[((l + 1) * allocated_columns) + w]); + + rrdset_done(st); + } + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_stat_conntrack.c b/collectors/proc.plugin/proc_net_stat_conntrack.c new file mode 100644 index 0000000..f9dbdf4 --- /dev/null +++ b/collectors/proc.plugin/proc_net_stat_conntrack.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define RRD_TYPE_NET_STAT_NETFILTER "netfilter" +#define RRD_TYPE_NET_STAT_CONNTRACK "conntrack" +#define PLUGIN_PROC_MODULE_CONNTRACK_NAME "/proc/net/stat/nf_conntrack" + +int do_proc_net_stat_conntrack(int update_every, usec_t dt) { + static procfile *ff = NULL; + static int do_sockets = -1, do_new = -1, do_changes = -1, do_expect = -1, do_search = -1, do_errors = -1; + static usec_t get_max_every = 10 * USEC_PER_SEC, usec_since_last_max = 0; + static int read_full = 1; + static char *nf_conntrack_filename, *nf_conntrack_count_filename, *nf_conntrack_max_filename; + static const RRDVAR_ACQUIRED *rrdvar_max = NULL; + + unsigned long long aentries = 0, asearched = 0, afound = 0, anew = 0, ainvalid = 0, aignore = 0, adelete = 0, adelete_list = 0, + ainsert = 0, ainsert_failed = 0, adrop = 0, aearly_drop = 0, aicmp_error = 0, aexpect_new = 0, aexpect_create = 0, aexpect_delete = 0, asearch_restart = 0; + + if(unlikely(do_sockets == -1)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/stat/nf_conntrack"); + nf_conntrack_filename = config_get("plugin:proc:/proc/net/stat/nf_conntrack", "filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/sys/net/netfilter/nf_conntrack_max"); + nf_conntrack_max_filename = config_get("plugin:proc:/proc/sys/net/netfilter/nf_conntrack_max", "filename to monitor", filename); + usec_since_last_max = get_max_every = config_get_number("plugin:proc:/proc/sys/net/netfilter/nf_conntrack_max", "read every seconds", 10) * USEC_PER_SEC; + + read_full = 1; + ff = procfile_open(nf_conntrack_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(!ff) read_full = 0; + + do_new = config_get_boolean("plugin:proc:/proc/net/stat/nf_conntrack", "netfilter new connections", read_full); + do_changes = config_get_boolean("plugin:proc:/proc/net/stat/nf_conntrack", "netfilter connection changes", read_full); + do_expect = config_get_boolean("plugin:proc:/proc/net/stat/nf_conntrack", "netfilter connection expectations", read_full); + do_search = config_get_boolean("plugin:proc:/proc/net/stat/nf_conntrack", "netfilter connection searches", read_full); + do_errors = config_get_boolean("plugin:proc:/proc/net/stat/nf_conntrack", "netfilter errors", read_full); + + do_sockets = 1; + if(!read_full) { + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/sys/net/netfilter/nf_conntrack_count"); + nf_conntrack_count_filename = config_get("plugin:proc:/proc/sys/net/netfilter/nf_conntrack_count", "filename to monitor", filename); + + if(read_single_number_file(nf_conntrack_count_filename, &aentries)) + do_sockets = 0; + } + + do_sockets = config_get_boolean("plugin:proc:/proc/net/stat/nf_conntrack", "netfilter connections", do_sockets); + + if(!do_sockets && !read_full) + return 1; + + rrdvar_max = rrdvar_custom_host_variable_add_and_acquire(localhost, "netfilter_conntrack_max"); + } + + if(likely(read_full)) { + if(unlikely(!ff)) { + ff = procfile_open(nf_conntrack_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + for(l = 1; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 17)) { + if(unlikely(words)) error("Cannot read /proc/net/stat/nf_conntrack line. Expected 17 params, read %zu.", words); + continue; + } + + unsigned long long tentries = 0, tsearched = 0, tfound = 0, tnew = 0, tinvalid = 0, tignore = 0, tdelete = 0, tdelete_list = 0, tinsert = 0, tinsert_failed = 0, tdrop = 0, tearly_drop = 0, ticmp_error = 0, texpect_new = 0, texpect_create = 0, texpect_delete = 0, tsearch_restart = 0; + + tentries = strtoull(procfile_lineword(ff, l, 0), NULL, 16); + tsearched = strtoull(procfile_lineword(ff, l, 1), NULL, 16); + tfound = strtoull(procfile_lineword(ff, l, 2), NULL, 16); + tnew = strtoull(procfile_lineword(ff, l, 3), NULL, 16); + tinvalid = strtoull(procfile_lineword(ff, l, 4), NULL, 16); + tignore = strtoull(procfile_lineword(ff, l, 5), NULL, 16); + tdelete = strtoull(procfile_lineword(ff, l, 6), NULL, 16); + tdelete_list = strtoull(procfile_lineword(ff, l, 7), NULL, 16); + tinsert = strtoull(procfile_lineword(ff, l, 8), NULL, 16); + tinsert_failed = strtoull(procfile_lineword(ff, l, 9), NULL, 16); + tdrop = strtoull(procfile_lineword(ff, l, 10), NULL, 16); + tearly_drop = strtoull(procfile_lineword(ff, l, 11), NULL, 16); + ticmp_error = strtoull(procfile_lineword(ff, l, 12), NULL, 16); + texpect_new = strtoull(procfile_lineword(ff, l, 13), NULL, 16); + texpect_create = strtoull(procfile_lineword(ff, l, 14), NULL, 16); + texpect_delete = strtoull(procfile_lineword(ff, l, 15), NULL, 16); + tsearch_restart = strtoull(procfile_lineword(ff, l, 16), NULL, 16); + + if(unlikely(!aentries)) aentries = tentries; + + // sum all the cpus together + asearched += tsearched; // conntrack.search + afound += tfound; // conntrack.search + anew += tnew; // conntrack.new + ainvalid += tinvalid; // conntrack.new + aignore += tignore; // conntrack.new + adelete += tdelete; // conntrack.changes + adelete_list += tdelete_list; // conntrack.changes + ainsert += tinsert; // conntrack.changes + ainsert_failed += tinsert_failed; // conntrack.errors + adrop += tdrop; // conntrack.errors + aearly_drop += tearly_drop; // conntrack.errors + aicmp_error += ticmp_error; // conntrack.errors + aexpect_new += texpect_new; // conntrack.expect + aexpect_create += texpect_create; // conntrack.expect + aexpect_delete += texpect_delete; // conntrack.expect + asearch_restart += tsearch_restart; // conntrack.search + } + } + else { + if(unlikely(read_single_number_file(nf_conntrack_count_filename, &aentries))) + return 0; // we return 0, so that we will retry to open it next time + } + + usec_since_last_max += dt; + if(unlikely(rrdvar_max && usec_since_last_max >= get_max_every)) { + usec_since_last_max = 0; + + unsigned long long max; + if(likely(!read_single_number_file(nf_conntrack_max_filename, &max))) + rrdvar_custom_host_variable_set(localhost, rrdvar_max, max); + } + + // -------------------------------------------------------------------- + + if(do_sockets) { + static RRDSET *st = NULL; + static RRDDIM *rd_connections = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_sockets" + , NULL + , RRD_TYPE_NET_STAT_CONNTRACK + , NULL + , "Connection Tracker Connections" + , "active connections" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_CONNTRACK_NAME + , NETDATA_CHART_PRIO_NETFILTER_SOCKETS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_connections = rrddim_add(st, "connections", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_connections, aentries); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_new) { + static RRDSET *st = NULL; + static RRDDIM + *rd_new = NULL, + *rd_ignore = NULL, + *rd_invalid = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_new" + , NULL + , RRD_TYPE_NET_STAT_CONNTRACK + , NULL + , "Connection Tracker New Connections" + , "connections/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_CONNTRACK_NAME + , NETDATA_CHART_PRIO_NETFILTER_NEW + , update_every + , RRDSET_TYPE_LINE + ); + + rd_new = rrddim_add(st, "new", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ignore = rrddim_add(st, "ignore", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_invalid = rrddim_add(st, "invalid", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_new, anew); + rrddim_set_by_pointer(st, rd_ignore, aignore); + rrddim_set_by_pointer(st, rd_invalid, ainvalid); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_changes) { + static RRDSET *st = NULL; + static RRDDIM + *rd_inserted = NULL, + *rd_deleted = NULL, + *rd_delete_list = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_changes" + , NULL + , RRD_TYPE_NET_STAT_CONNTRACK + , NULL + , "Connection Tracker Changes" + , "changes/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_CONNTRACK_NAME + , NETDATA_CHART_PRIO_NETFILTER_CHANGES + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_inserted = rrddim_add(st, "inserted", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_deleted = rrddim_add(st, "deleted", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_delete_list = rrddim_add(st, "delete_list", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_inserted, ainsert); + rrddim_set_by_pointer(st, rd_deleted, adelete); + rrddim_set_by_pointer(st, rd_delete_list, adelete_list); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_expect) { + static RRDSET *st = NULL; + static RRDDIM *rd_created = NULL, + *rd_deleted = NULL, + *rd_new = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_expect" + , NULL + , RRD_TYPE_NET_STAT_CONNTRACK + , NULL + , "Connection Tracker Expectations" + , "expectations/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_CONNTRACK_NAME + , NETDATA_CHART_PRIO_NETFILTER_EXPECT + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_created = rrddim_add(st, "created", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_deleted = rrddim_add(st, "deleted", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_new = rrddim_add(st, "new", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_created, aexpect_create); + rrddim_set_by_pointer(st, rd_deleted, aexpect_delete); + rrddim_set_by_pointer(st, rd_new, aexpect_new); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_search) { + static RRDSET *st = NULL; + static RRDDIM *rd_searched = NULL, + *rd_restarted = NULL, + *rd_found = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_search" + , NULL + , RRD_TYPE_NET_STAT_CONNTRACK + , NULL + , "Connection Tracker Searches" + , "searches/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_CONNTRACK_NAME + , NETDATA_CHART_PRIO_NETFILTER_SEARCH + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_searched = rrddim_add(st, "searched", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_restarted = rrddim_add(st, "restarted", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_found = rrddim_add(st, "found", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_searched, asearched); + rrddim_set_by_pointer(st, rd_restarted, asearch_restart); + rrddim_set_by_pointer(st, rd_found, afound); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_errors) { + static RRDSET *st = NULL; + static RRDDIM *rd_icmp_error = NULL, + *rd_insert_failed = NULL, + *rd_drop = NULL, + *rd_early_drop = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_CONNTRACK "_errors" + , NULL + , RRD_TYPE_NET_STAT_CONNTRACK + , NULL + , "Connection Tracker Errors" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_CONNTRACK_NAME + , NETDATA_CHART_PRIO_NETFILTER_ERRORS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_icmp_error = rrddim_add(st, "icmp_error", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_insert_failed = rrddim_add(st, "insert_failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_drop = rrddim_add(st, "drop", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_early_drop = rrddim_add(st, "early_drop", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_icmp_error, aicmp_error); + rrddim_set_by_pointer(st, rd_insert_failed, ainsert_failed); + rrddim_set_by_pointer(st, rd_drop, adrop); + rrddim_set_by_pointer(st, rd_early_drop, aearly_drop); + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_stat_synproxy.c b/collectors/proc.plugin/proc_net_stat_synproxy.c new file mode 100644 index 0000000..0a74b35 --- /dev/null +++ b/collectors/proc.plugin/proc_net_stat_synproxy.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_SYNPROXY_NAME "/proc/net/stat/synproxy" + +#define RRD_TYPE_NET_STAT_NETFILTER "netfilter" +#define RRD_TYPE_NET_STAT_SYNPROXY "synproxy" + +int do_proc_net_stat_synproxy(int update_every, usec_t dt) { + (void)dt; + + static int do_cookies = -1, do_syns = -1, do_reopened = -1; + static procfile *ff = NULL; + + if(unlikely(do_cookies == -1)) { + do_cookies = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY cookies", CONFIG_BOOLEAN_AUTO); + do_syns = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY SYN received", CONFIG_BOOLEAN_AUTO); + do_reopened = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY connections reopened", CONFIG_BOOLEAN_AUTO); + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/stat/synproxy"); + ff = procfile_open(config_get("plugin:proc:/proc/net/stat/synproxy", "filename to monitor", filename), " \t,:|", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) + return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + // make sure we have 3 lines + size_t lines = procfile_lines(ff), l; + if(unlikely(lines < 2)) { + error("/proc/net/stat/synproxy has %zu lines, expected no less than 2. Disabling it.", lines); + return 1; + } + + unsigned long long syn_received = 0, cookie_invalid = 0, cookie_valid = 0, cookie_retrans = 0, conn_reopened = 0; + + // synproxy gives its values per CPU + for(l = 1; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 6)) + continue; + + syn_received += strtoull(procfile_lineword(ff, l, 1), NULL, 16); + cookie_invalid += strtoull(procfile_lineword(ff, l, 2), NULL, 16); + cookie_valid += strtoull(procfile_lineword(ff, l, 3), NULL, 16); + cookie_retrans += strtoull(procfile_lineword(ff, l, 4), NULL, 16); + conn_reopened += strtoull(procfile_lineword(ff, l, 5), NULL, 16); + } + + unsigned long long events = syn_received + cookie_invalid + cookie_valid + cookie_retrans + conn_reopened; + + // -------------------------------------------------------------------- + + if(do_syns == CONFIG_BOOLEAN_YES || (do_syns == CONFIG_BOOLEAN_AUTO && + (events || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_syns = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_SYNPROXY "_syn_received" + , NULL + , RRD_TYPE_NET_STAT_SYNPROXY + , NULL + , "SYNPROXY SYN Packets received" + , "packets/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_SYNPROXY_NAME + , NETDATA_CHART_PRIO_SYNPROXY_SYN_RECEIVED + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "received", syn_received); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_reopened == CONFIG_BOOLEAN_YES || (do_reopened == CONFIG_BOOLEAN_AUTO && + (events || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_reopened = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_SYNPROXY "_conn_reopened" + , NULL + , RRD_TYPE_NET_STAT_SYNPROXY + , NULL + , "SYNPROXY Connections Reopened" + , "connections/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_SYNPROXY_NAME + , NETDATA_CHART_PRIO_SYNPROXY_CONN_OPEN + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "reopened", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "reopened", conn_reopened); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_cookies == CONFIG_BOOLEAN_YES || (do_cookies == CONFIG_BOOLEAN_AUTO && + (events || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_cookies = CONFIG_BOOLEAN_YES; + + static RRDSET *st = NULL; + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_STAT_NETFILTER + , RRD_TYPE_NET_STAT_SYNPROXY "_cookies" + , NULL + , RRD_TYPE_NET_STAT_SYNPROXY + , NULL + , "SYNPROXY TCP Cookies" + , "cookies/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_SYNPROXY_NAME + , NETDATA_CHART_PRIO_SYNPROXY_COOKIES + , update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(st, "valid", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "invalid", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st, "retransmits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(st, "valid", cookie_valid); + rrddim_set(st, "invalid", cookie_invalid); + rrddim_set(st, "retransmits", cookie_retrans); + rrdset_done(st); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_net_wireless.c b/collectors/proc.plugin/proc_net_wireless.c new file mode 100644 index 0000000..08ab2ea --- /dev/null +++ b/collectors/proc.plugin/proc_net_wireless.c @@ -0,0 +1,432 @@ +#include +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NETWIRELESS_NAME "/proc/net/wireless" + +#define CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETWIRELESS_NAME + + +static struct netwireless { + char *name; + uint32_t hash; + + //flags + bool configured; + struct timeval updated; + + int do_status; + int do_quality; + int do_discarded_packets; + int do_missed_beacon; + + // Data collected + // status + kernel_uint_t status; + + // Quality + NETDATA_DOUBLE link; + NETDATA_DOUBLE level; + NETDATA_DOUBLE noise; + + // Discarded packets + kernel_uint_t nwid; + kernel_uint_t crypt; + kernel_uint_t frag; + kernel_uint_t retry; + kernel_uint_t misc; + + // missed beacon + kernel_uint_t missed_beacon; + + const char *chart_id_net_status; + const char *chart_id_net_link; + const char *chart_id_net_level; + const char *chart_id_net_noise; + const char *chart_id_net_discarded_packets; + const char *chart_id_net_missed_beacon; + + const char *chart_family; + + // charts + // status + RRDSET *st_status; + + // Quality + RRDSET *st_link; + RRDSET *st_level; + RRDSET *st_noise; + + // Discarded Packets + RRDSET *st_discarded_packets; + // Missed beacon + RRDSET *st_missed_beacon; + + // Dimensions + // status + RRDDIM *rd_status; + + // Quality + RRDDIM *rd_link; + RRDDIM *rd_level; + RRDDIM *rd_noise; + + // Discarded packets + RRDDIM *rd_nwid; + RRDDIM *rd_crypt; + RRDDIM *rd_frag; + RRDDIM *rd_retry; + RRDDIM *rd_misc; + + // missed beacon + RRDDIM *rd_missed_beacon; + + struct netwireless *next; +} *netwireless_root = NULL; + +static void netwireless_free_st(struct netwireless *wireless_dev) +{ + if (wireless_dev->st_status) rrdset_is_obsolete(wireless_dev->st_status); + if (wireless_dev->st_link) rrdset_is_obsolete(wireless_dev->st_link); + if (wireless_dev->st_level) rrdset_is_obsolete(wireless_dev->st_level); + if (wireless_dev->st_noise) rrdset_is_obsolete(wireless_dev->st_noise); + if (wireless_dev->st_discarded_packets) rrdset_is_obsolete(wireless_dev->st_discarded_packets); + if (wireless_dev->st_missed_beacon) rrdset_is_obsolete(wireless_dev->st_missed_beacon); + + wireless_dev->st_status = NULL; + wireless_dev->st_link = NULL; + wireless_dev->st_level = NULL; + wireless_dev->st_noise = NULL; + wireless_dev->st_discarded_packets = NULL; + wireless_dev->st_missed_beacon = NULL; +} + +static void netwireless_free(struct netwireless *wireless_dev) +{ + wireless_dev->next = NULL; + freez((void *)wireless_dev->name); + netwireless_free_st(wireless_dev); + freez((void *)wireless_dev->chart_id_net_status); + freez((void *)wireless_dev->chart_id_net_link); + freez((void *)wireless_dev->chart_id_net_level); + freez((void *)wireless_dev->chart_id_net_noise); + freez((void *)wireless_dev->chart_id_net_discarded_packets); + freez((void *)wireless_dev->chart_id_net_missed_beacon); + + freez((void *)wireless_dev); +} + +static void netwireless_cleanup(struct timeval *timestamp) +{ + struct netwireless *previous = NULL; + struct netwireless *current; + // search it, from beginning to the end + for (current = netwireless_root; current;) { + + if (timercmp(¤t->updated, timestamp, <)) { + struct netwireless *to_free = current; + current = current->next; + netwireless_free(to_free); + + if (previous) { + previous->next = current; + } else { + netwireless_root = current; + } + } else { + previous = current; + current = current->next; + } + } +} + +// finds an existing interface or creates a new entry +static struct netwireless *find_or_create_wireless(const char *name) +{ + struct netwireless *wireless; + uint32_t hash = simple_hash(name); + + // search it, from beginning to the end + for (wireless = netwireless_root ; wireless ; wireless = wireless->next) { + if (unlikely(hash == wireless->hash && !strcmp(name, wireless->name))) { + return wireless; + } + } + + // create a new one + wireless = callocz(1, sizeof(struct netwireless)); + wireless->name = strdupz(name); + wireless->hash = hash; + + // link it to the end + if (netwireless_root) { + struct netwireless *last_node; + for (last_node = netwireless_root; last_node->next ; last_node = last_node->next); + + last_node->next = wireless; + } else + netwireless_root = wireless; + + return wireless; +} + +static void configure_device(int do_status, int do_quality, int do_discarded_packets, int do_missed, + struct netwireless *wireless_dev) { + wireless_dev->do_status = do_status; + wireless_dev->do_quality = do_quality; + wireless_dev->do_discarded_packets = do_discarded_packets; + wireless_dev->do_missed_beacon = do_missed; + wireless_dev->configured = true; + + char buffer[RRD_ID_LENGTH_MAX + 1]; + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_status", wireless_dev->name); + wireless_dev->chart_id_net_status = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_link_quality", wireless_dev->name); + wireless_dev->chart_id_net_link = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_signal_level", wireless_dev->name); + wireless_dev->chart_id_net_level = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_noise_level", wireless_dev->name); + wireless_dev->chart_id_net_noise = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_discarded_packets", wireless_dev->name); + wireless_dev->chart_id_net_discarded_packets = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_missed_beacon", wireless_dev->name); + wireless_dev->chart_id_net_missed_beacon = strdupz(buffer); +} + +static void add_labels_to_wireless(struct netwireless *w, RRDSET *st) { + rrdlabels_add(st->rrdlabels, "device", w->name, RRDLABEL_SRC_AUTO); +} + +int do_proc_net_wireless(int update_every, usec_t dt) +{ + UNUSED(dt); + static procfile *ff = NULL; + static int do_status, do_quality = -1, do_discarded_packets, do_beacon; + static char *proc_net_wireless_filename = NULL; + + if (unlikely(do_quality == -1)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/wireless"); + + proc_net_wireless_filename = config_get(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS,"filename to monitor", filename); + do_status = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "status for all interfaces", CONFIG_BOOLEAN_AUTO); + do_quality = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "quality for all interfaces", CONFIG_BOOLEAN_AUTO); + do_discarded_packets = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "discarded packets for all interfaces", CONFIG_BOOLEAN_AUTO); + do_beacon = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "missed beacon for all interface", CONFIG_BOOLEAN_AUTO); + } + + if (unlikely(!ff)) { + ff = procfile_open(proc_net_wireless_filename, " \t,|", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) return 1; + + size_t lines = procfile_lines(ff); + struct timeval timestamp; + size_t l; + gettimeofday(×tamp, NULL); + for (l = 2; l < lines; l++) { + if (unlikely(procfile_linewords(ff, l) < 11)) continue; + + char *name = procfile_lineword(ff, l, 0); + size_t len = strlen(name); + if (name[len - 1] == ':') name[len - 1] = '\0'; + + struct netwireless *wireless_dev = find_or_create_wireless(name); + + if (unlikely(!wireless_dev->configured)) { + configure_device(do_status, do_quality, do_discarded_packets, do_beacon, wireless_dev); + } + + if (likely(do_status != CONFIG_BOOLEAN_NO)) { + wireless_dev->status = str2kernel_uint_t(procfile_lineword(ff, l, 1)); + + if (unlikely(!wireless_dev->st_status)) { + wireless_dev->st_status = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_status, + NULL, + wireless_dev->name, + "wireless.status", + "Internal status reported by interface.", + "status", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(wireless_dev->st_status, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_status = rrddim_add(wireless_dev->st_status, "status", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_status); + } + + rrddim_set_by_pointer(wireless_dev->st_status, wireless_dev->rd_status, + (collected_number)wireless_dev->status); + rrdset_done(wireless_dev->st_status); + } + + if (likely(do_quality != CONFIG_BOOLEAN_NO)) { + wireless_dev->link = str2ndd(procfile_lineword(ff, l, 2), NULL); + wireless_dev->level = str2ndd(procfile_lineword(ff, l, 3), NULL); + wireless_dev->noise = str2ndd(procfile_lineword(ff, l, 4), NULL); + + if (unlikely(!wireless_dev->st_link)) { + wireless_dev->st_link = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_link, + NULL, + wireless_dev->name, + "wireless.link_quality", + "Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.", + "value", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 1, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_link, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_link = rrddim_add(wireless_dev->st_link, "link_quality", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_link); + } + + if (unlikely(!wireless_dev->st_level)) { + wireless_dev->st_level = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_level, + NULL, + wireless_dev->name, + "wireless.signal_level", + "The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.", + "dBm", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 2, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_level, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_level = rrddim_add(wireless_dev->st_level, "signal_level", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_level); + } + + if (unlikely(!wireless_dev->st_noise)) { + wireless_dev->st_noise = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_noise, + NULL, + wireless_dev->name, + "wireless.noise_level", + "The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.", + "dBm", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 3, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_noise, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_noise = rrddim_add(wireless_dev->st_noise, "noise_level", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_noise); + } + + rrddim_set_by_pointer(wireless_dev->st_link, wireless_dev->rd_link, (collected_number)wireless_dev->link); + rrdset_done(wireless_dev->st_link); + + rrddim_set_by_pointer(wireless_dev->st_level, wireless_dev->rd_level, (collected_number)wireless_dev->level); + rrdset_done(wireless_dev->st_level); + + rrddim_set_by_pointer(wireless_dev->st_noise, wireless_dev->rd_noise, (collected_number)wireless_dev->noise); + rrdset_done(wireless_dev->st_noise); + } + + if (likely(do_discarded_packets)) { + wireless_dev->nwid = str2kernel_uint_t(procfile_lineword(ff, l, 5)); + wireless_dev->crypt = str2kernel_uint_t(procfile_lineword(ff, l, 6)); + wireless_dev->frag = str2kernel_uint_t(procfile_lineword(ff, l, 7)); + wireless_dev->retry = str2kernel_uint_t(procfile_lineword(ff, l, 8)); + wireless_dev->misc = str2kernel_uint_t(procfile_lineword(ff, l, 9)); + + if (unlikely(!wireless_dev->st_discarded_packets)) { + wireless_dev->st_discarded_packets = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_discarded_packets, + NULL, + wireless_dev->name, + "wireless.discarded_packets", + "Packet discarded in the wireless adapter due to \"wireless\" specific problems.", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 4, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(wireless_dev->st_discarded_packets, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_nwid = rrddim_add(wireless_dev->st_discarded_packets, "nwid", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_crypt = rrddim_add(wireless_dev->st_discarded_packets, "crypt", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_frag = rrddim_add(wireless_dev->st_discarded_packets, "frag", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_retry = rrddim_add(wireless_dev->st_discarded_packets, "retry", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_misc = rrddim_add(wireless_dev->st_discarded_packets, "misc", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_discarded_packets); + } + + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_nwid, (collected_number)wireless_dev->nwid); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_crypt, (collected_number)wireless_dev->crypt); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_frag, (collected_number)wireless_dev->frag); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_retry, (collected_number)wireless_dev->retry); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_misc, (collected_number)wireless_dev->misc); + + rrdset_done(wireless_dev->st_discarded_packets); + } + + if (likely(do_beacon)) { + wireless_dev->missed_beacon = str2kernel_uint_t(procfile_lineword(ff, l, 10)); + + if (unlikely(!wireless_dev->st_missed_beacon)) { + wireless_dev->st_missed_beacon = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_missed_beacon, + NULL, + wireless_dev->name, + "wireless.missed_beacons", + "Number of missed beacons.", + "frames/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 5, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(wireless_dev->st_missed_beacon, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_missed_beacon = rrddim_add(wireless_dev->st_missed_beacon, "missed_beacons", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_missed_beacon); + } + + rrddim_set_by_pointer(wireless_dev->st_missed_beacon, wireless_dev->rd_missed_beacon, (collected_number)wireless_dev->missed_beacon); + rrdset_done(wireless_dev->st_missed_beacon); + } + + wireless_dev->updated = timestamp; + } + + netwireless_cleanup(×tamp); + return 0; +} diff --git a/collectors/proc.plugin/proc_pagetypeinfo.c b/collectors/proc.plugin/proc_pagetypeinfo.c new file mode 100644 index 0000000..dc006aa --- /dev/null +++ b/collectors/proc.plugin/proc_pagetypeinfo.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +// For ULONG_MAX +#include + +#define PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME "/proc/pagetypeinfo" +#define CONFIG_SECTION_PLUGIN_PROC_PAGETYPEINFO "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME + +// Zone struct is pglist_data, in include/linux/mmzone.h +// MAX_NR_ZONES is from __MAX_NR_ZONE, which is the last value of the enum. +#define MAX_PAGETYPE_ORDER 11 + +// Names are in mm/page_alloc.c :: migratetype_names. Max size = 10. +#define MAX_ZONETYPE_NAME 16 +#define MAX_PAGETYPE_NAME 16 + +// Defined in include/linux/mmzone.h as __MAX_NR_ZONE (last enum of zone_type) +#define MAX_ZONETYPE 6 +// Defined in include/linux/mmzone.h as MIGRATE_TYPES (last enum of migratetype) +#define MAX_PAGETYPE 7 + + +// +// /proc/pagetypeinfo is declared in mm/vmstat.c :: init_mm_internals +// + +// One line of /proc/pagetypeinfo +struct pageline { + int node; + char *zone; + char *type; + int line; + uint64_t free_pages_size[MAX_PAGETYPE_ORDER]; + RRDDIM *rd[MAX_PAGETYPE_ORDER]; +}; + +// Sum of all orders +struct systemorder { + uint64_t size; + RRDDIM *rd; +}; + + +static inline uint64_t pageline_total_count(struct pageline *p) { + uint64_t sum = 0, o; + for (o=0; ofree_pages_size[o]; + return sum; +} + +// Check if a line of /proc/pagetypeinfo is valid to use +// Free block lines starts by "Node" && 4th col is "type" +#define pagetypeinfo_line_valid(ff, l) (strncmp(procfile_lineword(ff, l, 0), "Node", 4) == 0 && strncmp(procfile_lineword(ff, l, 4), "type", 4) == 0) + +// Dimension name from the order +#define dim_name(s, o, pagesize) (snprintfz(s, 16,"%ldKB (%lu)", (1 << o) * pagesize / 1024, o)) + +int do_proc_pagetypeinfo(int update_every, usec_t dt) { + (void)dt; + + // Config + static int do_global, do_detail; + static SIMPLE_PATTERN *filter_types = NULL; + + // Counters from parsing the file, that doesn't change after boot + static struct systemorder systemorders[MAX_PAGETYPE_ORDER] = {}; + static struct pageline* pagelines = NULL; + static long pagesize = 0; + static size_t pageorders_cnt = 0, pagelines_cnt = 0, ff_lines = 0; + + // Handle + static procfile *ff = NULL; + static char ff_path[FILENAME_MAX + 1]; + + // RRD Sets + static RRDSET *st_order = NULL; + static RRDSET **st_nodezonetype = NULL; + + // Local temp variables + long unsigned int l, o, p; + struct pageline *pgl = NULL; + + // -------------------------------------------------------------------- + // Startup: Init arch and open /proc/pagetypeinfo + if (unlikely(!pagesize)) { + pagesize = sysconf(_SC_PAGESIZE); + } + + if(unlikely(!ff)) { + snprintfz(ff_path, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_PAGETYPEINFO, "filename to monitor", ff_path), " \t:", PROCFILE_FLAG_DEFAULT); + + if(unlikely(!ff)) { + strncpyz(ff_path, PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME, FILENAME_MAX); + ff = procfile_open(PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME, " \t,", PROCFILE_FLAG_DEFAULT); + } + } + if(unlikely(!ff)) + return 1; + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + // -------------------------------------------------------------------- + // Init: find how many Nodes, Zones and Types + if(unlikely(pagelines_cnt == 0)) { + size_t nodenumlast = -1; + char *zonenamelast = NULL; + + ff_lines = procfile_lines(ff); + if(unlikely(!ff_lines)) { + error("PLUGIN: PROC_PAGETYPEINFO: Cannot read %s, zero lines reported.", ff_path); + return 1; + } + + // Configuration + do_global = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PAGETYPEINFO, "enable system summary", CONFIG_BOOLEAN_YES); + do_detail = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_PAGETYPEINFO, "enable detail per-type", CONFIG_BOOLEAN_AUTO); + filter_types = simple_pattern_create( + config_get(CONFIG_SECTION_PLUGIN_PROC_PAGETYPEINFO, "hide charts id matching", "") + , NULL + , SIMPLE_PATTERN_SUFFIX + ); + + pagelines_cnt = 0; + + // Pass 1: how many lines would be valid + for (l = 4; l < ff_lines; l++) { + if (!pagetypeinfo_line_valid(ff, l)) + continue; + + pagelines_cnt++; + } + if (pagelines_cnt == 0) { + error("PLUGIN: PROC_PAGETYPEINFO: Unable to parse any valid line in %s", ff_path); + return 1; + } + + // 4th line is the "Free pages count per migrate type at order". Just subtract these 8 words. + pageorders_cnt = procfile_linewords(ff, 3); + if (pageorders_cnt < 9) { + error("PLUGIN: PROC_PAGETYPEINFO: Unable to parse Line 4 of %s", ff_path); + return 1; + } + + pageorders_cnt -= 9; + + if (pageorders_cnt > MAX_PAGETYPE_ORDER) { + error("PLUGIN: PROC_PAGETYPEINFO: pageorder found (%lu) is higher than max %d", + (long unsigned int) pageorders_cnt, MAX_PAGETYPE_ORDER); + return 1; + } + + // Init pagelines from scanned lines + if (!pagelines) { + pagelines = callocz(pagelines_cnt, sizeof(struct pageline)); + if (!pagelines) { + error("PLUGIN: PROC_PAGETYPEINFO: Cannot allocate %lu pagelines of %lu B", + (long unsigned int) pagelines_cnt, (long unsigned int) sizeof(struct pageline)); + return 1; + } + } + + // Pass 2: Scan the file again, with details + p = 0; + for (l=4; l < ff_lines; l++) { + + if (!pagetypeinfo_line_valid(ff, l)) + continue; + + size_t nodenum = strtoul(procfile_lineword(ff, l, 1), NULL, 10); + char *zonename = procfile_lineword(ff, l, 3); + char *typename = procfile_lineword(ff, l, 5); + + // We changed node or zone + if (nodenum != nodenumlast || !zonenamelast || strncmp(zonename, zonenamelast, 6) != 0) { + zonenamelast = zonename; + } + + // Populate the line + pgl = &pagelines[p]; + + pgl->line = l; + pgl->node = nodenum; + pgl->type = typename; + pgl->zone = zonename; + for (o = 0; o < pageorders_cnt; o++) + pgl->free_pages_size[o] = str2uint64_t(procfile_lineword(ff, l, o+6)) * 1 << o; + + p++; + } + + // Init the RRD graphs + + // Per-Order: sum of all node, zone, type Grouped by order + if (do_global != CONFIG_BOOLEAN_NO) { + st_order = rrdset_create_localhost( + "mem" + , "pagetype_global" + , NULL + , "pagetype" + , NULL + , "System orders available" + , "B" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME + , NETDATA_CHART_PRIO_MEM_PAGEFRAG + , update_every + , RRDSET_TYPE_STACKED + ); + for (o = 0; o < pageorders_cnt; o++) { + char id[3+1]; + snprintfz(id, 3, "%lu", o); + + char name[20+1]; + dim_name(name, o, pagesize); + + systemorders[o].rd = rrddim_add(st_order, id, name, pagesize, 1, RRD_ALGORITHM_ABSOLUTE); + } + } + + + // Per-Numa Node & Zone & Type (full detail). Only if sum(line) > 0 + st_nodezonetype = callocz(pagelines_cnt, sizeof(RRDSET *)); + for (p = 0; p < pagelines_cnt; p++) { + pgl = &pagelines[p]; + + // Skip invalid, refused or empty pagelines if not explicitly requested + if (!pgl + || do_detail == CONFIG_BOOLEAN_NO + || (do_detail == CONFIG_BOOLEAN_AUTO && pageline_total_count(pgl) == 0 && netdata_zero_metrics_enabled != CONFIG_BOOLEAN_YES)) + continue; + + // "pagetype Node" + NUMA-NodeId + ZoneName + TypeName + char setid[13+1+2+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME+1]; + snprintfz(setid, 13+1+2+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME, "pagetype_Node%d_%s_%s", pgl->node, pgl->zone, pgl->type); + + // Skip explicitly refused charts + if (simple_pattern_matches(filter_types, setid)) + continue; + + // "Node" + NUMA-NodeID + ZoneName + TypeName + char setname[4+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME +1]; + snprintfz(setname, MAX_ZONETYPE_NAME + MAX_PAGETYPE_NAME, "Node %d %s %s", pgl->node, pgl->zone, pgl->type); + + st_nodezonetype[p] = rrdset_create_localhost( + "mem" + , setid + , NULL + , "pagetype" + , "mem.pagetype" + , setname + , "B" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_PAGETYPEINFO_NAME + , NETDATA_CHART_PRIO_MEM_PAGEFRAG + 1 + p + , update_every + , RRDSET_TYPE_STACKED + ); + + char node[50+1]; + snprintfz(node, 50, "node%d", pgl->node); + rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_id", node, RRDLABEL_SRC_AUTO); + rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_zone", pgl->zone, RRDLABEL_SRC_AUTO); + rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_type", pgl->type, RRDLABEL_SRC_AUTO); + + for (o = 0; o < pageorders_cnt; o++) { + char dimid[3+1]; + snprintfz(dimid, 3, "%lu", o); + char dimname[20+1]; + dim_name(dimname, o, pagesize); + + pgl->rd[o] = rrddim_add(st_nodezonetype[p], dimid, dimname, pagesize, 1, RRD_ALGORITHM_ABSOLUTE); + } + } + } + + // -------------------------------------------------------------------- + // Update pagelines + + // Process each line + p = 0; + for (l=4; lshare_time.st) { + rrddim_set_by_pointer( + pcs->share_time.st, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer( + pcs->share_time.st, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer( + pcs->share_time.st, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(pcs->share_time.st); + } + if (pcs->total_time.st) { + rrddim_set_by_pointer( + pcs->total_time.st, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(pcs->total_time.st); + } +} + +static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) { + struct pressure_charts *pcs; + struct resource_info ri; + pcs = some ? &resources[res_idx].some : &resources[res_idx].full; + ri = resource_info[res_idx]; + + if (unlikely(!pcs->share_time.st)) { + pcs->share_time.st = rrdset_create_localhost( + "system", + pcs->share_time.id, + NULL, + ri.family, + NULL, + pcs->share_time.title, + "percentage", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_PRESSURE_NAME, + ri.section_priority + (some ? 40 : 50), + pressure_update_every, + RRDSET_TYPE_LINE); + pcs->share_time.rd10 = + rrddim_add(pcs->share_time.st, some ? "some 10" : "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = + rrddim_add(pcs->share_time.st, some ? "some 60" : "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = + rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL); + pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL); + pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL); + + if (unlikely(!pcs->total_time.st)) { + pcs->total_time.st = rrdset_create_localhost( + "system", + pcs->total_time.id, + NULL, + ri.family, + NULL, + pcs->total_time.title, + "ms", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_PRESSURE_NAME, + ri.section_priority + (some ? 45 : 55), + pressure_update_every, + RRDSET_TYPE_LINE); + pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8)) / 1000; +} + +static void proc_pressure_do_resource_some(procfile *ff, int res_idx) { + proc_pressure_do_resource(ff, res_idx, 1); +} + +static void proc_pressure_do_resource_full(procfile *ff, int res_idx) { + proc_pressure_do_resource(ff, res_idx, 0); +} + +int do_proc_pressure(int update_every, usec_t dt) { + int fail_count = 0; + int i; + + static usec_t next_pressure_dt = 0; + static char *base_path = NULL; + + update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every; + pressure_update_every = update_every; + + if (next_pressure_dt <= dt) { + next_pressure_dt = update_every * USEC_PER_SEC; + } else { + next_pressure_dt -= dt; + return 0; + } + + if (unlikely(!base_path)) { + base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure"); + } + + for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) { + procfile *ff = resource_info[i].pf; + int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled; + + if (unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + char config_key[CONFIG_MAX_NAME + 1]; + + snprintfz(filename + , FILENAME_MAX + , "%s%s/%s" + , netdata_configured_host_prefix + , base_path + , resource_info[i].name); + + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name); + do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + resources[i].some.enabled = do_some; + + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name); + do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + resources[i].full.enabled = do_full; + + ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + error("Cannot read pressure information from %s.", filename); + fail_count++; + continue; + } + } + + ff = procfile_readall(ff); + resource_info[i].pf = ff; + if (unlikely(!ff)) { + fail_count++; + continue; + } + + size_t lines = procfile_lines(ff); + if (unlikely(lines < 1)) { + error("%s has no lines.", procfile_filename(ff)); + fail_count++; + continue; + } + + if (do_some) { + proc_pressure_do_resource_some(ff, i); + update_pressure_charts(&resources[i].some); + } + if (do_full && lines > 2) { + proc_pressure_do_resource_full(ff, i); + update_pressure_charts(&resources[i].full); + } + } + + if (PRESSURE_NUM_RESOURCES == fail_count) { + return 1; + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_pressure.h b/collectors/proc.plugin/proc_pressure.h new file mode 100644 index 0000000..0cb2331 --- /dev/null +++ b/collectors/proc.plugin/proc_pressure.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PROC_PRESSURE_H +#define NETDATA_PROC_PRESSURE_H + +#define PRESSURE_NUM_RESOURCES 3 + +struct pressure { + int updated; + char *filename; + + struct pressure_charts { + int enabled; + + struct pressure_share_time_chart { + const char *id; + const char *title; + + double value10; + double value60; + double value300; + + RRDSET *st; + RRDDIM *rd10; + RRDDIM *rd60; + RRDDIM *rd300; + } share_time; + + struct pressure_total_time_chart { + const char *id; + const char *title; + + unsigned long long value_total; + + RRDSET *st; + RRDDIM *rdtotal; + } total_time; + } some, full; +}; + +void update_pressure_charts(struct pressure_charts *charts); + +#endif //NETDATA_PROC_PRESSURE_H diff --git a/collectors/proc.plugin/proc_self_mountinfo.c b/collectors/proc.plugin/proc_self_mountinfo.c new file mode 100644 index 0000000..9310f2f --- /dev/null +++ b/collectors/proc.plugin/proc_self_mountinfo.c @@ -0,0 +1,458 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +// ---------------------------------------------------------------------------- +// taken from gnulib/mountlist.c + +#ifndef ME_REMOTE +/* A file system is "remote" if its Fs_name contains a ':' + or if (it is of type (smbfs or cifs) and its Fs_name starts with '//') + or Fs_name is equal to "-hosts" (used by autofs to mount remote fs). */ +# define ME_REMOTE(Fs_name, Fs_type) \ + (strchr (Fs_name, ':') != NULL \ + || ((Fs_name)[0] == '/' \ + && (Fs_name)[1] == '/' \ + && (strcmp (Fs_type, "smbfs") == 0 \ + || strcmp (Fs_type, "cifs") == 0)) \ + || (strcmp("-hosts", Fs_name) == 0)) +#endif + +#define ME_DUMMY_0(Fs_name, Fs_type) \ + (strcmp (Fs_type, "autofs") == 0 \ + || strcmp (Fs_type, "proc") == 0 \ + || strcmp (Fs_type, "subfs") == 0 \ + /* for Linux 2.6/3.x */ \ + || strcmp (Fs_type, "debugfs") == 0 \ + || strcmp (Fs_type, "devpts") == 0 \ + || strcmp (Fs_type, "fusectl") == 0 \ + || strcmp (Fs_type, "mqueue") == 0 \ + || strcmp (Fs_type, "rpc_pipefs") == 0 \ + || strcmp (Fs_type, "sysfs") == 0 \ + /* FreeBSD, Linux 2.4 */ \ + || strcmp (Fs_type, "devfs") == 0 \ + /* for NetBSD 3.0 */ \ + || strcmp (Fs_type, "kernfs") == 0 \ + /* for Irix 6.5 */ \ + || strcmp (Fs_type, "ignore") == 0) + +/* Historically, we have marked as "dummy" any file system of type "none", + but now that programs like du need to know about bind-mounted directories, + we grant an exception to any with "bind" in its list of mount options. + I.e., those are *not* dummy entries. */ +# define ME_DUMMY(Fs_name, Fs_type) \ + (ME_DUMMY_0 (Fs_name, Fs_type) || strcmp (Fs_type, "none") == 0) + +// ---------------------------------------------------------------------------- + +// find the mount info with the given major:minor +// in the supplied linked list of mountinfo structures +struct mountinfo *mountinfo_find(struct mountinfo *root, unsigned long major, unsigned long minor, char *device) { + struct mountinfo *mi; + + uint32_t hash = simple_hash(device); + + for(mi = root; mi ; mi = mi->next) + if (unlikely( + mi->major == major && + mi->minor == minor && + mi->mount_source_name_hash == hash && + !strcmp(mi->mount_source_name, device))) + return mi; + + return NULL; +} + +// find the mount info with the given filesystem and mount_source +// in the supplied linked list of mountinfo structures +struct mountinfo *mountinfo_find_by_filesystem_mount_source(struct mountinfo *root, const char *filesystem, const char *mount_source) { + struct mountinfo *mi; + uint32_t filesystem_hash = simple_hash(filesystem), mount_source_hash = simple_hash(mount_source); + + for(mi = root; mi ; mi = mi->next) + if(unlikely(mi->filesystem + && mi->mount_source + && mi->filesystem_hash == filesystem_hash + && mi->mount_source_hash == mount_source_hash + && !strcmp(mi->filesystem, filesystem) + && !strcmp(mi->mount_source, mount_source))) + return mi; + + return NULL; +} + +struct mountinfo *mountinfo_find_by_filesystem_super_option(struct mountinfo *root, const char *filesystem, const char *super_options) { + struct mountinfo *mi; + uint32_t filesystem_hash = simple_hash(filesystem); + + size_t solen = strlen(super_options); + + for(mi = root; mi ; mi = mi->next) + if(unlikely(mi->filesystem + && mi->super_options + && mi->filesystem_hash == filesystem_hash + && !strcmp(mi->filesystem, filesystem))) { + + // super_options is a comma separated list + char *s = mi->super_options, *e; + while(*s) { + e = s + 1; + while(*e && *e != ',') e++; + + size_t len = e - s; + if(unlikely(len == solen && !strncmp(s, super_options, len))) + return mi; + + if(*e == ',') s = ++e; + else s = e; + } + } + + return NULL; +} + +static void mountinfo_free(struct mountinfo *mi) { + freez(mi->root); + freez(mi->mount_point); + freez(mi->mount_options); + freez(mi->persistent_id); +/* + if(mi->optional_fields_count) { + int i; + for(i = 0; i < mi->optional_fields_count ; i++) + free(*mi->optional_fields[i]); + } + free(mi->optional_fields); +*/ + freez(mi->filesystem); + freez(mi->mount_source); + freez(mi->mount_source_name); + freez(mi->super_options); + freez(mi); +} + +// free a linked list of mountinfo structures +void mountinfo_free_all(struct mountinfo *mi) { + while(mi) { + struct mountinfo *t = mi; + mi = mi->next; + + mountinfo_free(t); + } +} + +static char *strdupz_decoding_octal(const char *string) { + char *buffer = strdupz(string); + + char *d = buffer; + const char *s = string; + + while(*s) { + if(unlikely(*s == '\\')) { + s++; + if(likely(isdigit(*s) && isdigit(s[1]) && isdigit(s[2]))) { + char c = *s++ - '0'; + c <<= 3; + c |= *s++ - '0'; + c <<= 3; + c |= *s++ - '0'; + *d++ = c; + } + else *d++ = '_'; + } + else *d++ = *s++; + } + *d = '\0'; + + return buffer; +} + +static inline int is_read_only(const char *s) { + if(!s) return 0; + + size_t len = strlen(s); + if(len < 2) return 0; + if(len == 2) { + if(!strcmp(s, "ro")) return 1; + return 0; + } + if(!strncmp(s, "ro,", 3)) return 1; + if(!strncmp(&s[len - 3], ",ro", 3)) return 1; + if(strstr(s, ",ro,")) return 1; + return 0; +} + +// for the full list of protected mount points look at +// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L142-L149 +// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L180-L194 +static const char *systemd_protected_mount_points[] = { + "/home", + "/root", + "/usr", + "/boot", + "/efi", + "/etc", + "/run/user", + "/lib", + "/lib64", + "/bin", + "/sbin", + NULL +}; + +static inline int mount_point_is_protected(char *mount_point) +{ + for (size_t i = 0; systemd_protected_mount_points[i] != NULL; i++) + if (!strcmp(mount_point, systemd_protected_mount_points[i])) + return 1; + + return 0; +} + +// read the whole mountinfo into a linked list +struct mountinfo *mountinfo_read(int do_statvfs) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/self/mountinfo", netdata_configured_host_prefix); + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + snprintfz(filename, FILENAME_MAX, "%s/proc/1/mountinfo", netdata_configured_host_prefix); + ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return NULL; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return NULL; + + struct mountinfo *root = NULL, *last = NULL, *mi = NULL; + + // create a dictionary to track uniqueness + DICTIONARY *dict = dictionary_create( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_NAME_LINK_DONT_CLONE); + + unsigned long l, lines = procfile_lines(ff); + for(l = 0; l < lines ;l++) { + if(unlikely(procfile_linewords(ff, l) < 5)) + continue; + + // make sure we don't add the same item twice + char *v = (char *)dictionary_set(dict, procfile_lineword(ff, l, 4), "N", 2); + if(v) { + if(*v == 'O') continue; + *v = 'O'; + } + + mi = mallocz(sizeof(struct mountinfo)); + + unsigned long w = 0; + mi->id = str2ul(procfile_lineword(ff, l, w)); w++; + mi->parentid = str2ul(procfile_lineword(ff, l, w)); w++; + + char *major = procfile_lineword(ff, l, w), *minor; w++; + for(minor = major; *minor && *minor != ':' ;minor++) ; + + if(unlikely(!*minor)) { + error("Cannot parse major:minor on '%s' at line %lu of '%s'", major, l + 1, filename); + freez(mi); + continue; + } + + *minor = '\0'; + minor++; + + mi->flags = 0; + + mi->major = str2ul(major); + mi->minor = str2ul(minor); + + mi->root = strdupz(procfile_lineword(ff, l, w)); w++; + mi->root_hash = simple_hash(mi->root); + + mi->mount_point = strdupz_decoding_octal(procfile_lineword(ff, l, w)); w++; + mi->mount_point_hash = simple_hash(mi->mount_point); + + mi->persistent_id = strdupz(mi->mount_point); + netdata_fix_chart_id(mi->persistent_id); + mi->persistent_id_hash = simple_hash(mi->persistent_id); + + mi->mount_options = strdupz(procfile_lineword(ff, l, w)); w++; + + if(unlikely(is_read_only(mi->mount_options))) + mi->flags |= MOUNTINFO_READONLY; + + if(unlikely(mount_point_is_protected(mi->mount_point))) + mi->flags |= MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST; + + // count the optional fields +/* + unsigned long wo = w; +*/ + mi->optional_fields_count = 0; + char *s = procfile_lineword(ff, l, w); + while(*s && *s != '-') { + w++; + s = procfile_lineword(ff, l, w); + mi->optional_fields_count++; + } + +/* + if(unlikely(mi->optional_fields_count)) { + // we have some optional fields + // read them into a new array of pointers; + + mi->optional_fields = mallocz(mi->optional_fields_count * sizeof(char *)); + + int i; + for(i = 0; i < mi->optional_fields_count ; i++) { + *mi->optional_fields[wo] = strdupz(procfile_lineword(ff, l, w)); + wo++; + } + } + else + mi->optional_fields = NULL; +*/ + + if(likely(*s == '-')) { + w++; + + mi->filesystem = strdupz(procfile_lineword(ff, l, w)); w++; + mi->filesystem_hash = simple_hash(mi->filesystem); + + mi->mount_source = strdupz_decoding_octal(procfile_lineword(ff, l, w)); w++; + mi->mount_source_hash = simple_hash(mi->mount_source); + + mi->mount_source_name = strdupz(basename(mi->mount_source)); + mi->mount_source_name_hash = simple_hash(mi->mount_source_name); + + mi->super_options = strdupz(procfile_lineword(ff, l, w)); w++; + + if(unlikely(is_read_only(mi->super_options))) + mi->flags |= MOUNTINFO_READONLY; + + if(unlikely(ME_DUMMY(mi->mount_source, mi->filesystem))) + mi->flags |= MOUNTINFO_IS_DUMMY; + + if(unlikely(ME_REMOTE(mi->mount_source, mi->filesystem))) + mi->flags |= MOUNTINFO_IS_REMOTE; + + // mark as BIND the duplicates (i.e. same filesystem + same source) + if(do_statvfs) { + struct stat buf; + if(unlikely(stat(mi->mount_point, &buf) == -1)) { + mi->st_dev = 0; + mi->flags |= MOUNTINFO_NO_STAT; + } + else { + mi->st_dev = buf.st_dev; + + struct mountinfo *mt; + for(mt = root; mt; mt = mt->next) { + if(unlikely(mt->st_dev == mi->st_dev && !(mt->flags & MOUNTINFO_IS_SAME_DEV))) { + if(strlen(mi->mount_point) < strlen(mt->mount_point)) + mt->flags |= MOUNTINFO_IS_SAME_DEV; + else + mi->flags |= MOUNTINFO_IS_SAME_DEV; + } + } + } + } + else { + mi->st_dev = 0; + } + } + else { + mi->filesystem = NULL; + mi->filesystem_hash = 0; + + mi->mount_source = NULL; + mi->mount_source_hash = 0; + + mi->mount_source_name = NULL; + mi->mount_source_name_hash = 0; + + mi->super_options = NULL; + + mi->st_dev = 0; + } + + // check if it has size + if(do_statvfs && !(mi->flags & MOUNTINFO_IS_DUMMY)) { + struct statvfs buff_statvfs; + if(unlikely(statvfs(mi->mount_point, &buff_statvfs) < 0)) { + mi->flags |= MOUNTINFO_NO_STAT; + } + else if(unlikely(!buff_statvfs.f_blocks /* || !buff_statvfs.f_files */)) { + mi->flags |= MOUNTINFO_NO_SIZE; + } + } + + // link it + if(unlikely(!root)) + root = mi; + else + last->next = mi; + + last = mi; + mi->next = NULL; + +/* +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "MOUNTINFO: %ld %ld %lu:%lu root '%s', persistent id '%s', mount point '%s', mount options '%s', filesystem '%s', mount source '%s', super options '%s'%s%s%s%s%s%s\n", + mi->id, + mi->parentid, + mi->major, + mi->minor, + mi->root, + mi->persistent_id, + (mi->mount_point)?mi->mount_point:"", + (mi->mount_options)?mi->mount_options:"", + (mi->filesystem)?mi->filesystem:"", + (mi->mount_source)?mi->mount_source:"", + (mi->super_options)?mi->super_options:"", + (mi->flags & MOUNTINFO_IS_DUMMY)?" DUMMY":"", + (mi->flags & MOUNTINFO_IS_BIND)?" BIND":"", + (mi->flags & MOUNTINFO_IS_REMOTE)?" REMOTE":"", + (mi->flags & MOUNTINFO_NO_STAT)?" NOSTAT":"", + (mi->flags & MOUNTINFO_NO_SIZE)?" NOSIZE":"", + (mi->flags & MOUNTINFO_IS_SAME_DEV)?" SAMEDEV":"" + ); +#endif +*/ + } + +/* find if the mount options have "bind" in them + { + FILE *fp = setmntent(MOUNTED, "r"); + if (fp != NULL) { + struct mntent mntbuf; + struct mntent *mnt; + char buf[4096 + 1]; + + while ((mnt = getmntent_r(fp, &mntbuf, buf, 4096))) { + char *bind = hasmntopt(mnt, "bind"); + if(unlikely(bind)) { + struct mountinfo *mi; + for(mi = root; mi ; mi = mi->next) { + if(unlikely(strcmp(mnt->mnt_dir, mi->mount_point) == 0)) { + fprintf(stderr, "Mount point '%s' is BIND\n", mi->mount_point); + mi->flags |= MOUNTINFO_IS_BIND; + break; + } + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(!mi)) { + error("Mount point '%s' not found in /proc/self/mountinfo", mnt->mnt_dir); + } +#endif + } + } + endmntent(fp); + } + } +*/ + + dictionary_destroy(dict); + procfile_close(ff); + return root; +} diff --git a/collectors/proc.plugin/proc_self_mountinfo.h b/collectors/proc.plugin/proc_self_mountinfo.h new file mode 100644 index 0000000..4bd24d2 --- /dev/null +++ b/collectors/proc.plugin/proc_self_mountinfo.h @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PROC_SELF_MOUNTINFO_H +#define NETDATA_PROC_SELF_MOUNTINFO_H 1 + +#define MOUNTINFO_IS_DUMMY 0x00000001 +#define MOUNTINFO_IS_REMOTE 0x00000002 +#define MOUNTINFO_IS_BIND 0x00000004 +#define MOUNTINFO_IS_SAME_DEV 0x00000008 +#define MOUNTINFO_NO_STAT 0x00000010 +#define MOUNTINFO_NO_SIZE 0x00000020 +#define MOUNTINFO_READONLY 0x00000040 +#define MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST 0x00000080 + +struct mountinfo { + long id; // mount ID: unique identifier of the mount (may be reused after umount(2)). + long parentid; // parent ID: ID of parent mount (or of self for the top of the mount tree). + unsigned long major; // major:minor: value of st_dev for files on filesystem (see stat(2)). + unsigned long minor; + + char *persistent_id; // a calculated persistent id for the mount point + uint32_t persistent_id_hash; + + char *root; // root: root of the mount within the filesystem. + uint32_t root_hash; + + char *mount_point; // mount point: mount point relative to the process's root. + uint32_t mount_point_hash; + + char *mount_options; // mount options: per-mount options. + + int optional_fields_count; +/* + char ***optional_fields; // optional fields: zero or more fields of the form "tag[:value]". +*/ + char *filesystem; // filesystem type: name of filesystem in the form "type[.subtype]". + uint32_t filesystem_hash; + + char *mount_source; // mount source: filesystem-specific information or "none". + uint32_t mount_source_hash; + + char *mount_source_name; + uint32_t mount_source_name_hash; + + char *super_options; // super options: per-superblock options. + + uint32_t flags; + + dev_t st_dev; // id of device as given by stat() + + struct mountinfo *next; +}; + +struct mountinfo *mountinfo_find(struct mountinfo *root, unsigned long major, unsigned long minor, char *device); +struct mountinfo *mountinfo_find_by_filesystem_mount_source(struct mountinfo *root, const char *filesystem, const char *mount_source); +struct mountinfo *mountinfo_find_by_filesystem_super_option(struct mountinfo *root, const char *filesystem, const char *super_options); + +void mountinfo_free_all(struct mountinfo *mi); +struct mountinfo *mountinfo_read(int do_statvfs); + +#endif /* NETDATA_PROC_SELF_MOUNTINFO_H */ diff --git a/collectors/proc.plugin/proc_softirqs.c b/collectors/proc.plugin/proc_softirqs.c new file mode 100644 index 0000000..4c4df76 --- /dev/null +++ b/collectors/proc.plugin/proc_softirqs.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_SOFTIRQS_NAME "/proc/softirqs" + +#define MAX_INTERRUPT_NAME 50 + +struct cpu_interrupt { + unsigned long long value; + RRDDIM *rd; +}; + +struct interrupt { + int used; + char *id; + char name[MAX_INTERRUPT_NAME + 1]; + RRDDIM *rd; + unsigned long long total; + struct cpu_interrupt cpu[]; +}; + +// since each interrupt is variable in size +// we use this to calculate its record size +#define recordsize(cpus) (sizeof(struct interrupt) + ((cpus) * sizeof(struct cpu_interrupt))) + +// given a base, get a pointer to each record +#define irrindex(base, line, cpus) ((struct interrupt *)&((char *)(base))[(line) * recordsize(cpus)]) + +static inline struct interrupt *get_interrupts_array(size_t lines, int cpus) { + static struct interrupt *irrs = NULL; + static size_t allocated = 0; + + if(unlikely(lines != allocated)) { + uint32_t l; + int c; + + irrs = (struct interrupt *)reallocz(irrs, lines * recordsize(cpus)); + + // reset all interrupt RRDDIM pointers as any line could have shifted + for(l = 0; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + irr->rd = NULL; + irr->name[0] = '\0'; + for(c = 0; c < cpus ;c++) + irr->cpu[c].rd = NULL; + } + + allocated = lines; + } + + return irrs; +} + +int do_proc_softirqs(int update_every, usec_t dt) { + (void)dt; + static procfile *ff = NULL; + static int cpus = -1, do_per_core = CONFIG_BOOLEAN_INVALID; + struct interrupt *irrs = NULL; + + if(unlikely(do_per_core == CONFIG_BOOLEAN_INVALID)) + do_per_core = config_get_boolean_ondemand("plugin:proc:/proc/softirqs", "interrupts per core", CONFIG_BOOLEAN_AUTO); + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/softirqs"); + ff = procfile_open(config_get("plugin:proc:/proc/softirqs", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + size_t words = procfile_linewords(ff, 0); + + if(unlikely(!lines)) { + error("Cannot read /proc/softirqs, zero lines reported."); + return 1; + } + + // find how many CPUs are there + if(unlikely(cpus == -1)) { + uint32_t w; + cpus = 0; + for(w = 0; w < words ; w++) { + if(likely(strncmp(procfile_lineword(ff, 0, w), "CPU", 3) == 0)) + cpus++; + } + } + + if(unlikely(!cpus)) { + error("PLUGIN: PROC_SOFTIRQS: Cannot find the number of CPUs in /proc/softirqs"); + return 1; + } + + // allocate the size we need; + irrs = get_interrupts_array(lines, cpus); + irrs[0].used = 0; + + // loop through all lines + for(l = 1; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + irr->used = 0; + irr->total = 0; + + words = procfile_linewords(ff, l); + if(unlikely(!words)) continue; + + irr->id = procfile_lineword(ff, l, 0); + if(unlikely(!irr->id || !irr->id[0])) continue; + + int c; + for(c = 0; c < cpus ;c++) { + if(likely((c + 1) < (int)words)) + irr->cpu[c].value = str2ull(procfile_lineword(ff, l, (uint32_t)(c + 1))); + else + irr->cpu[c].value = 0; + + irr->total += irr->cpu[c].value; + } + + strncpyz(irr->name, irr->id, MAX_INTERRUPT_NAME); + + irr->used = 1; + } + + // -------------------------------------------------------------------- + + static RRDSET *st_system_softirqs = NULL; + if(unlikely(!st_system_softirqs)) { + st_system_softirqs = rrdset_create_localhost( + "system" + , "softirqs" + , NULL + , "softirqs" + , NULL + , "System softirqs" + , "softirqs/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_SOFTIRQS_NAME + , NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS + , update_every + , RRDSET_TYPE_STACKED + ); + } + + for(l = 0; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + + if(irr->used && irr->total) { + // some interrupt may have changed without changing the total number of lines + // if the same number of interrupts have been added and removed between two + // calls of this function. + if(unlikely(!irr->rd || strncmp(irr->name, rrddim_name(irr->rd), MAX_INTERRUPT_NAME) != 0)) { + irr->rd = rrddim_add(st_system_softirqs, irr->id, irr->name, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_reset_name(st_system_softirqs, irr->rd, irr->name); + + // also reset per cpu RRDDIMs to avoid repeating strncmp() in the per core loop + if(likely(do_per_core != CONFIG_BOOLEAN_NO)) { + int c; + for(c = 0; c < cpus; c++) irr->cpu[c].rd = NULL; + } + } + + rrddim_set_by_pointer(st_system_softirqs, irr->rd, irr->total); + } + } + + rrdset_done(st_system_softirqs); + + // -------------------------------------------------------------------- + + if(do_per_core != CONFIG_BOOLEAN_NO) { + static RRDSET **core_st = NULL; + static int old_cpus = 0; + + if(old_cpus < cpus) { + core_st = reallocz(core_st, sizeof(RRDSET *) * cpus); + memset(&core_st[old_cpus], 0, sizeof(RRDSET *) * (cpus - old_cpus)); + old_cpus = cpus; + } + + int c; + + for(c = 0; c < cpus ; c++) { + if(unlikely(!core_st[c])) { + // find if everything is just zero + unsigned long long core_sum = 0; + + for (l = 0; l < lines; l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + if (unlikely(!irr->used)) continue; + core_sum += irr->cpu[c].value; + } + + if (unlikely(core_sum == 0)) continue; // try next core + + char id[50 + 1]; + snprintfz(id, 50, "cpu%d_softirqs", c); + + char title[100 + 1]; + snprintfz(title, 100, "CPU softirqs"); + + core_st[c] = rrdset_create_localhost( + "cpu" + , id + , NULL + , "softirqs" + , "cpu.softirqs" + , title + , "softirqs/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_SOFTIRQS_NAME + , NETDATA_CHART_PRIO_SOFTIRQS_PER_CORE + c + , update_every + , RRDSET_TYPE_STACKED + ); + + char core[50+1]; + snprintfz(core, 50, "cpu%d", c); + rrdlabels_add(core_st[c]->rrdlabels, "cpu", core, RRDLABEL_SRC_AUTO); + } + + for(l = 0; l < lines ;l++) { + struct interrupt *irr = irrindex(irrs, l, cpus); + + if(irr->used && (do_per_core == CONFIG_BOOLEAN_YES || irr->cpu[c].value)) { + if(unlikely(!irr->cpu[c].rd)) { + irr->cpu[c].rd = rrddim_add(core_st[c], irr->id, irr->name, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_reset_name(core_st[c], irr->cpu[c].rd, irr->name); + } + + rrddim_set_by_pointer(core_st[c], irr->cpu[c].rd, irr->cpu[c].value); + } + } + + rrdset_done(core_st[c]); + } + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_spl_kstat_zfs.c b/collectors/proc.plugin/proc_spl_kstat_zfs.c new file mode 100644 index 0000000..8938d64 --- /dev/null +++ b/collectors/proc.plugin/proc_spl_kstat_zfs.c @@ -0,0 +1,423 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" +#include "zfs_common.h" + +#define ZFS_PROC_ARCSTATS "/proc/spl/kstat/zfs/arcstats" +#define ZFS_PROC_POOLS "/proc/spl/kstat/zfs" + +#define STATE_SIZE 9 +#define MAX_CHART_ID 256 + +extern struct arcstats arcstats; + +unsigned long long zfs_arcstats_shrinkable_cache_size_bytes = 0; + +int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt) { + (void)dt; + + static int show_zero_charts = 0, do_zfs_stats = 0; + static procfile *ff = NULL; + static char *dirname = NULL; + static ARL_BASE *arl_base = NULL; + + arcstats.l2exist = -1; + + if(unlikely(!arl_base)) { + arl_base = arl_create("arcstats", NULL, 60); + + arl_expect(arl_base, "hits", &arcstats.hits); + arl_expect(arl_base, "misses", &arcstats.misses); + arl_expect(arl_base, "demand_data_hits", &arcstats.demand_data_hits); + arl_expect(arl_base, "demand_data_misses", &arcstats.demand_data_misses); + arl_expect(arl_base, "demand_metadata_hits", &arcstats.demand_metadata_hits); + arl_expect(arl_base, "demand_metadata_misses", &arcstats.demand_metadata_misses); + arl_expect(arl_base, "prefetch_data_hits", &arcstats.prefetch_data_hits); + arl_expect(arl_base, "prefetch_data_misses", &arcstats.prefetch_data_misses); + arl_expect(arl_base, "prefetch_metadata_hits", &arcstats.prefetch_metadata_hits); + arl_expect(arl_base, "prefetch_metadata_misses", &arcstats.prefetch_metadata_misses); + arl_expect(arl_base, "mru_hits", &arcstats.mru_hits); + arl_expect(arl_base, "mru_ghost_hits", &arcstats.mru_ghost_hits); + arl_expect(arl_base, "mfu_hits", &arcstats.mfu_hits); + arl_expect(arl_base, "mfu_ghost_hits", &arcstats.mfu_ghost_hits); + arl_expect(arl_base, "deleted", &arcstats.deleted); + arl_expect(arl_base, "mutex_miss", &arcstats.mutex_miss); + arl_expect(arl_base, "evict_skip", &arcstats.evict_skip); + arl_expect(arl_base, "evict_not_enough", &arcstats.evict_not_enough); + arl_expect(arl_base, "evict_l2_cached", &arcstats.evict_l2_cached); + arl_expect(arl_base, "evict_l2_eligible", &arcstats.evict_l2_eligible); + arl_expect(arl_base, "evict_l2_ineligible", &arcstats.evict_l2_ineligible); + arl_expect(arl_base, "evict_l2_skip", &arcstats.evict_l2_skip); + arl_expect(arl_base, "hash_elements", &arcstats.hash_elements); + arl_expect(arl_base, "hash_elements_max", &arcstats.hash_elements_max); + arl_expect(arl_base, "hash_collisions", &arcstats.hash_collisions); + arl_expect(arl_base, "hash_chains", &arcstats.hash_chains); + arl_expect(arl_base, "hash_chain_max", &arcstats.hash_chain_max); + arl_expect(arl_base, "p", &arcstats.p); + arl_expect(arl_base, "c", &arcstats.c); + arl_expect(arl_base, "c_min", &arcstats.c_min); + arl_expect(arl_base, "c_max", &arcstats.c_max); + arl_expect(arl_base, "size", &arcstats.size); + arl_expect(arl_base, "hdr_size", &arcstats.hdr_size); + arl_expect(arl_base, "data_size", &arcstats.data_size); + arl_expect(arl_base, "metadata_size", &arcstats.metadata_size); + arl_expect(arl_base, "other_size", &arcstats.other_size); + arl_expect(arl_base, "anon_size", &arcstats.anon_size); + arl_expect(arl_base, "anon_evictable_data", &arcstats.anon_evictable_data); + arl_expect(arl_base, "anon_evictable_metadata", &arcstats.anon_evictable_metadata); + arl_expect(arl_base, "mru_size", &arcstats.mru_size); + arl_expect(arl_base, "mru_evictable_data", &arcstats.mru_evictable_data); + arl_expect(arl_base, "mru_evictable_metadata", &arcstats.mru_evictable_metadata); + arl_expect(arl_base, "mru_ghost_size", &arcstats.mru_ghost_size); + arl_expect(arl_base, "mru_ghost_evictable_data", &arcstats.mru_ghost_evictable_data); + arl_expect(arl_base, "mru_ghost_evictable_metadata", &arcstats.mru_ghost_evictable_metadata); + arl_expect(arl_base, "mfu_size", &arcstats.mfu_size); + arl_expect(arl_base, "mfu_evictable_data", &arcstats.mfu_evictable_data); + arl_expect(arl_base, "mfu_evictable_metadata", &arcstats.mfu_evictable_metadata); + arl_expect(arl_base, "mfu_ghost_size", &arcstats.mfu_ghost_size); + arl_expect(arl_base, "mfu_ghost_evictable_data", &arcstats.mfu_ghost_evictable_data); + arl_expect(arl_base, "mfu_ghost_evictable_metadata", &arcstats.mfu_ghost_evictable_metadata); + arl_expect(arl_base, "l2_hits", &arcstats.l2_hits); + arl_expect(arl_base, "l2_misses", &arcstats.l2_misses); + arl_expect(arl_base, "l2_feeds", &arcstats.l2_feeds); + arl_expect(arl_base, "l2_rw_clash", &arcstats.l2_rw_clash); + arl_expect(arl_base, "l2_read_bytes", &arcstats.l2_read_bytes); + arl_expect(arl_base, "l2_write_bytes", &arcstats.l2_write_bytes); + arl_expect(arl_base, "l2_writes_sent", &arcstats.l2_writes_sent); + arl_expect(arl_base, "l2_writes_done", &arcstats.l2_writes_done); + arl_expect(arl_base, "l2_writes_error", &arcstats.l2_writes_error); + arl_expect(arl_base, "l2_writes_lock_retry", &arcstats.l2_writes_lock_retry); + arl_expect(arl_base, "l2_evict_lock_retry", &arcstats.l2_evict_lock_retry); + arl_expect(arl_base, "l2_evict_reading", &arcstats.l2_evict_reading); + arl_expect(arl_base, "l2_evict_l1cached", &arcstats.l2_evict_l1cached); + arl_expect(arl_base, "l2_free_on_write", &arcstats.l2_free_on_write); + arl_expect(arl_base, "l2_cdata_free_on_write", &arcstats.l2_cdata_free_on_write); + arl_expect(arl_base, "l2_abort_lowmem", &arcstats.l2_abort_lowmem); + arl_expect(arl_base, "l2_cksum_bad", &arcstats.l2_cksum_bad); + arl_expect(arl_base, "l2_io_error", &arcstats.l2_io_error); + arl_expect(arl_base, "l2_size", &arcstats.l2_size); + arl_expect(arl_base, "l2_asize", &arcstats.l2_asize); + arl_expect(arl_base, "l2_hdr_size", &arcstats.l2_hdr_size); + arl_expect(arl_base, "l2_compress_successes", &arcstats.l2_compress_successes); + arl_expect(arl_base, "l2_compress_zeros", &arcstats.l2_compress_zeros); + arl_expect(arl_base, "l2_compress_failures", &arcstats.l2_compress_failures); + arl_expect(arl_base, "memory_throttle_count", &arcstats.memory_throttle_count); + arl_expect(arl_base, "duplicate_buffers", &arcstats.duplicate_buffers); + arl_expect(arl_base, "duplicate_buffers_size", &arcstats.duplicate_buffers_size); + arl_expect(arl_base, "duplicate_reads", &arcstats.duplicate_reads); + arl_expect(arl_base, "memory_direct_count", &arcstats.memory_direct_count); + arl_expect(arl_base, "memory_indirect_count", &arcstats.memory_indirect_count); + arl_expect(arl_base, "arc_no_grow", &arcstats.arc_no_grow); + arl_expect(arl_base, "arc_tempreserve", &arcstats.arc_tempreserve); + arl_expect(arl_base, "arc_loaned_bytes", &arcstats.arc_loaned_bytes); + arl_expect(arl_base, "arc_prune", &arcstats.arc_prune); + arl_expect(arl_base, "arc_meta_used", &arcstats.arc_meta_used); + arl_expect(arl_base, "arc_meta_limit", &arcstats.arc_meta_limit); + arl_expect(arl_base, "arc_meta_max", &arcstats.arc_meta_max); + arl_expect(arl_base, "arc_meta_min", &arcstats.arc_meta_min); + arl_expect(arl_base, "arc_need_free", &arcstats.arc_need_free); + arl_expect(arl_base, "arc_sys_free", &arcstats.arc_sys_free); + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, ZFS_PROC_ARCSTATS); + ff = procfile_open(config_get("plugin:proc:" ZFS_PROC_ARCSTATS, "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) + return 1; + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/spl/kstat/zfs"); + dirname = config_get("plugin:proc:" ZFS_PROC_ARCSTATS, "directory to monitor", filename); + + show_zero_charts = config_get_boolean_ondemand("plugin:proc:" ZFS_PROC_ARCSTATS, "show zero charts", CONFIG_BOOLEAN_NO); + if(show_zero_charts == CONFIG_BOOLEAN_AUTO && netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES) + show_zero_charts = CONFIG_BOOLEAN_YES; + if(unlikely(show_zero_charts == CONFIG_BOOLEAN_YES)) + do_zfs_stats = 1; + } + + // check if any pools exist + if(likely(!do_zfs_stats)) { + DIR *dir = opendir(dirname); + if(unlikely(!dir)) { + error("Cannot read directory '%s'", dirname); + return 1; + } + + struct dirent *de = NULL; + while(likely(de = readdir(dir))) { + if(likely(de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + ))) + continue; + + if(unlikely(de->d_type == DT_LNK || de->d_type == DT_DIR)) { + do_zfs_stats = 1; + break; + } + } + + closedir(dir); + } + + // do not show ZFS filesystem metrics if there haven't been any pools in the system yet + if(unlikely(!do_zfs_stats)) + return 0; + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + arl_begin(arl_base); + + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 3)) { + if(unlikely(words)) error("Cannot read " ZFS_PROC_ARCSTATS " line %zu. Expected 3 params, read %zu.", l, words); + continue; + } + + const char *key = procfile_lineword(ff, l, 0); + const char *value = procfile_lineword(ff, l, 2); + + if(unlikely(arcstats.l2exist == -1)) { + if(key[0] == 'l' && key[1] == '2' && key[2] == '_') + arcstats.l2exist = 1; + } + + if(unlikely(arl_check(arl_base, key, value))) break; + } + + if (arcstats.size > arcstats.c_min) { + zfs_arcstats_shrinkable_cache_size_bytes = arcstats.size - arcstats.c_min; + } else { + zfs_arcstats_shrinkable_cache_size_bytes = 0; + } + + if(unlikely(arcstats.l2exist == -1)) + arcstats.l2exist = 0; + + generate_charts_arcstats(PLUGIN_PROC_NAME, ZFS_PROC_ARCSTATS, show_zero_charts, update_every); + generate_charts_arc_summary(PLUGIN_PROC_NAME, ZFS_PROC_ARCSTATS, show_zero_charts, update_every); + + return 0; +} + +struct zfs_pool { + RRDSET *st; + + RRDDIM *rd_online; + RRDDIM *rd_degraded; + RRDDIM *rd_faulted; + RRDDIM *rd_offline; + RRDDIM *rd_removed; + RRDDIM *rd_unavail; + + int updated; + int disabled; + + int online; + int degraded; + int faulted; + int offline; + int removed; + int unavail; +}; + +struct deleted_zfs_pool { + char *name; + struct deleted_zfs_pool *next; +} *deleted_zfs_pools = NULL; + +DICTIONARY *zfs_pools = NULL; + +void disable_zfs_pool_state(struct zfs_pool *pool) +{ + if (pool->st) + rrdset_is_obsolete(pool->st); + + pool->st = NULL; + + pool->rd_online = NULL; + pool->rd_degraded = NULL; + pool->rd_faulted = NULL; + pool->rd_offline = NULL; + pool->rd_removed = NULL; + pool->rd_unavail = NULL; + + pool->disabled = 1; +} + +int update_zfs_pool_state_chart(const DICTIONARY_ITEM *item, void *pool_p, void *update_every_p) { + const char *name = dictionary_acquired_item_name(item); + struct zfs_pool *pool = (struct zfs_pool *)pool_p; + int update_every = *(int *)update_every_p; + + if (pool->updated) { + pool->updated = 0; + + if (!pool->disabled) { + if (unlikely(!pool->st)) { + char chart_id[MAX_CHART_ID + 1]; + snprintf(chart_id, MAX_CHART_ID, "state_%s", name); + + pool->st = rrdset_create_localhost( + "zfspool", + chart_id, + NULL, + name, + "zfspool.state", + "ZFS pool state", + "boolean", + PLUGIN_PROC_NAME, + ZFS_PROC_POOLS, + NETDATA_CHART_PRIO_ZFS_POOL_STATE, + update_every, + RRDSET_TYPE_LINE); + + pool->rd_online = rrddim_add(pool->st, "online", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + pool->rd_degraded = rrddim_add(pool->st, "degraded", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + pool->rd_faulted = rrddim_add(pool->st, "faulted", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + pool->rd_offline = rrddim_add(pool->st, "offline", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + pool->rd_removed = rrddim_add(pool->st, "removed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + pool->rd_unavail = rrddim_add(pool->st, "unavail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(pool->st, pool->rd_online, pool->online); + rrddim_set_by_pointer(pool->st, pool->rd_degraded, pool->degraded); + rrddim_set_by_pointer(pool->st, pool->rd_faulted, pool->faulted); + rrddim_set_by_pointer(pool->st, pool->rd_offline, pool->offline); + rrddim_set_by_pointer(pool->st, pool->rd_removed, pool->removed); + rrddim_set_by_pointer(pool->st, pool->rd_unavail, pool->unavail); + rrdset_done(pool->st); + } + } else { + disable_zfs_pool_state(pool); + struct deleted_zfs_pool *new = callocz(1, sizeof(struct deleted_zfs_pool)); + new->name = strdupz(name); + new->next = deleted_zfs_pools; + deleted_zfs_pools = new; + } + + return 0; +} + +int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt) +{ + (void)dt; + + static int do_zfs_pool_state = -1; + static char *dirname = NULL; + + int pool_found = 0, state_file_found = 0; + + if (unlikely(do_zfs_pool_state == -1)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/spl/kstat/zfs"); + dirname = config_get("plugin:proc:" ZFS_PROC_POOLS, "directory to monitor", filename); + + zfs_pools = dictionary_create(DICT_OPTION_SINGLE_THREADED); + + do_zfs_pool_state = 1; + } + + if (likely(do_zfs_pool_state)) { + DIR *dir = opendir(dirname); + if (unlikely(!dir)) { + error("Cannot read directory '%s'", dirname); + return 1; + } + + struct dirent *de = NULL; + while (likely(de = readdir(dir))) { + if (likely( + de->d_type == DT_DIR && ((de->d_name[0] == '.' && de->d_name[1] == '\0') || + (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')))) + continue; + + if (unlikely(de->d_type == DT_LNK || de->d_type == DT_DIR)) { + pool_found = 1; + + struct zfs_pool *pool = dictionary_get(zfs_pools, de->d_name); + + if (unlikely(!pool)) { + struct zfs_pool new_zfs_pool = {}; + pool = dictionary_set(zfs_pools, de->d_name, &new_zfs_pool, sizeof(struct zfs_pool)); + }; + + pool->updated = 1; + + if (pool->disabled) { + state_file_found = 1; + continue; + } + + pool->online = 0; + pool->degraded = 0; + pool->faulted = 0; + pool->offline = 0; + pool->removed = 0; + pool->unavail = 0; + + char filename[FILENAME_MAX + 1]; + snprintfz( + filename, FILENAME_MAX, "%s%s/%s/state", netdata_configured_host_prefix, dirname, de->d_name); + + char state[STATE_SIZE + 1]; + int ret = read_file(filename, state, STATE_SIZE); + + if (!ret) { + state_file_found = 1; + + // ZFS pool states are described at https://openzfs.github.io/openzfs-docs/man/8/zpoolconcepts.8.html?#Device_Failure_and_Recovery + if (!strcmp(state, "ONLINE\n")) { + pool->online = 1; + } else if (!strcmp(state, "DEGRADED\n")) { + pool->degraded = 1; + } else if (!strcmp(state, "FAULTED\n")) { + pool->faulted = 1; + } else if (!strcmp(state, "OFFLINE\n")) { + pool->offline = 1; + } else if (!strcmp(state, "REMOVED\n")) { + pool->removed = 1; + } else if (!strcmp(state, "UNAVAIL\n")) { + pool->unavail = 1; + } else { + disable_zfs_pool_state(pool); + + char *c = strchr(state, '\n'); + if (c) + *c = '\0'; + error("ZFS POOLS: Undefined state %s for zpool %s, disabling the chart", state, de->d_name); + } + } + } + } + + closedir(dir); + } + + if (do_zfs_pool_state && pool_found && !state_file_found) { + info("ZFS POOLS: State files not found. Disabling the module."); + do_zfs_pool_state = 0; + } + + if (do_zfs_pool_state) + dictionary_walkthrough_read(zfs_pools, update_zfs_pool_state_chart, &update_every); + + while (deleted_zfs_pools) { + struct deleted_zfs_pool *current_pool = deleted_zfs_pools; + dictionary_del(zfs_pools, current_pool->name); + + deleted_zfs_pools = deleted_zfs_pools->next; + + freez(current_pool->name); + freez(current_pool); + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c new file mode 100644 index 0000000..33fe932 --- /dev/null +++ b/collectors/proc.plugin/proc_stat.c @@ -0,0 +1,1064 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_STAT_NAME "/proc/stat" + +struct per_core_single_number_file { + unsigned char found:1; + const char *filename; + int fd; + collected_number value; + RRDDIM *rd; +}; + +struct last_ticks { + collected_number frequency; + collected_number ticks; +}; + +// This is an extension of struct per_core_single_number_file at CPU_FREQ_INDEX. +// Either scaling_cur_freq or time_in_state file is used at one time. +struct per_core_time_in_state_file { + const char *filename; + procfile *ff; + size_t last_ticks_len; + struct last_ticks *last_ticks; +}; + +#define CORE_THROTTLE_COUNT_INDEX 0 +#define PACKAGE_THROTTLE_COUNT_INDEX 1 +#define CPU_FREQ_INDEX 2 +#define PER_CORE_FILES 3 + +struct cpu_chart { + const char *id; + + RRDSET *st; + RRDDIM *rd_user; + RRDDIM *rd_nice; + RRDDIM *rd_system; + RRDDIM *rd_idle; + RRDDIM *rd_iowait; + RRDDIM *rd_irq; + RRDDIM *rd_softirq; + RRDDIM *rd_steal; + RRDDIM *rd_guest; + RRDDIM *rd_guest_nice; + + struct per_core_single_number_file files[PER_CORE_FILES]; + + struct per_core_time_in_state_file time_in_state_files; +}; + +static int keep_per_core_fds_open = CONFIG_BOOLEAN_YES; +static int keep_cpuidle_fds_open = CONFIG_BOOLEAN_YES; + +static int read_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index) { + char buf[50 + 1]; + size_t x, files_read = 0, files_nonzero = 0; + + for(x = 0; x < len ; x++) { + struct per_core_single_number_file *f = &all_cpu_charts[x].files[index]; + + f->found = 0; + + if(unlikely(!f->filename)) + continue; + + if(unlikely(f->fd == -1)) { + f->fd = open(f->filename, O_RDONLY); + if (unlikely(f->fd == -1)) { + error("Cannot open file '%s'", f->filename); + continue; + } + } + + ssize_t ret = read(f->fd, buf, 50); + if(unlikely(ret < 0)) { + // cannot read that file + + error("Cannot read file '%s'", f->filename); + close(f->fd); + f->fd = -1; + continue; + } + else { + // successful read + + // terminate the buffer + buf[ret] = '\0'; + + if(unlikely(keep_per_core_fds_open != CONFIG_BOOLEAN_YES)) { + close(f->fd); + f->fd = -1; + } + else if(lseek(f->fd, 0, SEEK_SET) == -1) { + error("Cannot seek in file '%s'", f->filename); + close(f->fd); + f->fd = -1; + } + } + + files_read++; + f->found = 1; + + f->value = str2ll(buf, NULL); + if(likely(f->value != 0)) + files_nonzero++; + } + + if(files_read == 0) + return -1; + + if(files_nonzero == 0) + return 0; + + return (int)files_nonzero; +} + +static int read_per_core_time_in_state_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index) { + size_t x, files_read = 0, files_nonzero = 0; + + for(x = 0; x < len ; x++) { + struct per_core_single_number_file *f = &all_cpu_charts[x].files[index]; + struct per_core_time_in_state_file *tsf = &all_cpu_charts[x].time_in_state_files; + + f->found = 0; + + if(unlikely(!tsf->filename)) + continue; + + if(unlikely(!tsf->ff)) { + tsf->ff = procfile_open(tsf->filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!tsf->ff)) + { + error("Cannot open file '%s'", tsf->filename); + continue; + } + } + + tsf->ff = procfile_readall(tsf->ff); + if(unlikely(!tsf->ff)) { + error("Cannot read file '%s'", tsf->filename); + procfile_close(tsf->ff); + tsf->ff = NULL; + continue; + } + else { + // successful read + + size_t lines = procfile_lines(tsf->ff), l; + size_t words; + unsigned long long total_ticks_since_last = 0, avg_freq = 0; + + // Check if there is at least one frequency in time_in_state + if (procfile_word(tsf->ff, 0)[0] == '\0') { + if(unlikely(keep_per_core_fds_open != CONFIG_BOOLEAN_YES)) { + procfile_close(tsf->ff); + tsf->ff = NULL; + } + // TODO: Is there a better way to avoid spikes than calculating the average over + // the whole period under schedutil governor? + // freez(tsf->last_ticks); + // tsf->last_ticks = NULL; + // tsf->last_ticks_len = 0; + continue; + } + + if (unlikely(tsf->last_ticks_len < lines || tsf->last_ticks == NULL)) { + tsf->last_ticks = reallocz(tsf->last_ticks, sizeof(struct last_ticks) * lines); + memset(tsf->last_ticks, 0, sizeof(struct last_ticks) * lines); + tsf->last_ticks_len = lines; + } + + f->value = 0; + + for(l = 0; l < lines - 1 ;l++) { + unsigned long long frequency = 0, ticks = 0, ticks_since_last = 0; + + words = procfile_linewords(tsf->ff, l); + if(unlikely(words < 2)) { + error("Cannot read time_in_state line. Expected 2 params, read %zu.", words); + continue; + } + frequency = str2ull(procfile_lineword(tsf->ff, l, 0)); + ticks = str2ull(procfile_lineword(tsf->ff, l, 1)); + + // It is assumed that frequencies are static and sorted + ticks_since_last = ticks - tsf->last_ticks[l].ticks; + tsf->last_ticks[l].frequency = frequency; + tsf->last_ticks[l].ticks = ticks; + + total_ticks_since_last += ticks_since_last; + avg_freq += frequency * ticks_since_last; + + } + + if (likely(total_ticks_since_last)) { + avg_freq /= total_ticks_since_last; + f->value = avg_freq; + } + + if(unlikely(keep_per_core_fds_open != CONFIG_BOOLEAN_YES)) { + procfile_close(tsf->ff); + tsf->ff = NULL; + } + } + + files_read++; + + f->found = 1; + + if(likely(f->value != 0)) + files_nonzero++; + } + + if(unlikely(files_read == 0)) + return -1; + + if(unlikely(files_nonzero == 0)) + return 0; + + return (int)files_nonzero; +} + +static void chart_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index, RRDSET *st, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm) { + size_t x; + for(x = 0; x < len ; x++) { + struct per_core_single_number_file *f = &all_cpu_charts[x].files[index]; + + if(unlikely(!f->found)) + continue; + + if(unlikely(!f->rd)) + f->rd = rrddim_add(st, all_cpu_charts[x].id, NULL, multiplier, divisor, algorithm); + + rrddim_set_by_pointer(st, f->rd, f->value); + } +} + +struct cpuidle_state { + char *name; + + char *time_filename; + int time_fd; + + collected_number value; + + RRDDIM *rd; +}; + +struct per_core_cpuidle_chart { + RRDSET *st; + + RRDDIM *active_time_rd; + collected_number active_time; + collected_number last_active_time; + + struct cpuidle_state *cpuidle_state; + size_t cpuidle_state_len; + int rescan_cpu_states; +}; + +static void* wake_cpu_thread(void* core) { + pthread_t thread; + cpu_set_t cpu_set; + static size_t cpu_wakeups = 0; + static int errors = 0; + + CPU_ZERO(&cpu_set); + CPU_SET(*(int*)core, &cpu_set); + + thread = pthread_self(); + if(unlikely(pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpu_set))) { + if(unlikely(errors < 8)) { + error("Cannot set CPU affinity for core %d", *(int*)core); + errors++; + } + else if(unlikely(errors < 9)) { + error("CPU affinity errors are disabled"); + errors++; + } + } + + // Make the CPU core do something to force it to update its idle counters + cpu_wakeups++; + + return 0; +} + +static int read_schedstat(char *schedstat_filename, struct per_core_cpuidle_chart **cpuidle_charts_address, size_t *schedstat_cores_found) { + static size_t cpuidle_charts_len = 0; + static procfile *ff = NULL; + struct per_core_cpuidle_chart *cpuidle_charts = *cpuidle_charts_address; + size_t cores_found = 0; + + if(unlikely(!ff)) { + ff = procfile_open(schedstat_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 1; + + size_t lines = procfile_lines(ff), l; + size_t words; + + for(l = 0; l < lines ;l++) { + char *row_key = procfile_lineword(ff, l, 0); + + // faster strncmp(row_key, "cpu", 3) == 0 + if(likely(row_key[0] == 'c' && row_key[1] == 'p' && row_key[2] == 'u')) { + words = procfile_linewords(ff, l); + if(unlikely(words < 10)) { + error("Cannot read /proc/schedstat cpu line. Expected 9 params, read %zu.", words); + return 1; + } + cores_found++; + + size_t core = str2ul(&row_key[3]); + if(unlikely(core >= cores_found)) { + error("Core %zu found but no more than %zu cores were expected.", core, cores_found); + return 1; + } + + if(unlikely(cpuidle_charts_len < cores_found)) { + cpuidle_charts = reallocz(cpuidle_charts, sizeof(struct per_core_cpuidle_chart) * cores_found); + *cpuidle_charts_address = cpuidle_charts; + memset(cpuidle_charts + cpuidle_charts_len, 0, sizeof(struct per_core_cpuidle_chart) * (cores_found - cpuidle_charts_len)); + cpuidle_charts_len = cores_found; + } + + cpuidle_charts[core].active_time = str2ull(procfile_lineword(ff, l, 7)) / 1000; + } + } + + *schedstat_cores_found = cores_found; + return 0; +} + +static int read_one_state(char *buf, const char *filename, int *fd) { + ssize_t ret = read(*fd, buf, 50); + + if(unlikely(ret <= 0)) { + // cannot read that file + error("Cannot read file '%s'", filename); + close(*fd); + *fd = -1; + return 0; + } + else { + // successful read + + // terminate the buffer + buf[ret - 1] = '\0'; + + if(unlikely(keep_cpuidle_fds_open != CONFIG_BOOLEAN_YES)) { + close(*fd); + *fd = -1; + } + else if(lseek(*fd, 0, SEEK_SET) == -1) { + error("Cannot seek in file '%s'", filename); + close(*fd); + *fd = -1; + } + } + + return 1; +} + +static int read_cpuidle_states(char *cpuidle_name_filename , char *cpuidle_time_filename, struct per_core_cpuidle_chart *cpuidle_charts, size_t core) { + char filename[FILENAME_MAX + 1]; + static char next_state_filename[FILENAME_MAX + 1]; + struct stat stbuf; + struct per_core_cpuidle_chart *cc = &cpuidle_charts[core]; + size_t state; + + if(unlikely(!cc->cpuidle_state_len || cc->rescan_cpu_states)) { + int state_file_found = 1; // check at least one state + + if(cc->cpuidle_state_len) { + for(state = 0; state < cc->cpuidle_state_len; state++) { + freez(cc->cpuidle_state[state].name); + + freez(cc->cpuidle_state[state].time_filename); + close(cc->cpuidle_state[state].time_fd); + cc->cpuidle_state[state].time_fd = -1; + } + + freez(cc->cpuidle_state); + cc->cpuidle_state = NULL; + cc->cpuidle_state_len = 0; + + cc->active_time_rd = NULL; + cc->st = NULL; + } + + while(likely(state_file_found)) { + snprintfz(filename, FILENAME_MAX, cpuidle_name_filename, core, cc->cpuidle_state_len); + if (stat(filename, &stbuf) == 0) + cc->cpuidle_state_len++; + else + state_file_found = 0; + } + snprintfz(next_state_filename, FILENAME_MAX, cpuidle_name_filename, core, cc->cpuidle_state_len); + + if(likely(cc->cpuidle_state_len)) + cc->cpuidle_state = callocz(cc->cpuidle_state_len, sizeof(struct cpuidle_state)); + + for(state = 0; state < cc->cpuidle_state_len; state++) { + char name_buf[50 + 1]; + snprintfz(filename, FILENAME_MAX, cpuidle_name_filename, core, state); + + int fd = open(filename, O_RDONLY, 0666); + if(unlikely(fd == -1)) { + error("Cannot open file '%s'", filename); + cc->rescan_cpu_states = 1; + return 1; + } + + ssize_t r = read(fd, name_buf, 50); + if(unlikely(r < 1)) { + error("Cannot read file '%s'", filename); + close(fd); + cc->rescan_cpu_states = 1; + return 1; + } + + name_buf[r - 1] = '\0'; // erase extra character + cc->cpuidle_state[state].name = strdupz(trim(name_buf)); + close(fd); + + snprintfz(filename, FILENAME_MAX, cpuidle_time_filename, core, state); + cc->cpuidle_state[state].time_filename = strdupz(filename); + cc->cpuidle_state[state].time_fd = -1; + } + + cc->rescan_cpu_states = 0; + } + + for(state = 0; state < cc->cpuidle_state_len; state++) { + + struct cpuidle_state *cs = &cc->cpuidle_state[state]; + + if(unlikely(cs->time_fd == -1)) { + cs->time_fd = open(cs->time_filename, O_RDONLY); + if (unlikely(cs->time_fd == -1)) { + error("Cannot open file '%s'", cs->time_filename); + cc->rescan_cpu_states = 1; + return 1; + } + } + + char time_buf[50 + 1]; + if(likely(read_one_state(time_buf, cs->time_filename, &cs->time_fd))) { + cs->value = str2ll(time_buf, NULL); + } + else { + cc->rescan_cpu_states = 1; + return 1; + } + } + + // check if the number of states was increased + if(unlikely(stat(next_state_filename, &stbuf) == 0)) { + cc->rescan_cpu_states = 1; + return 1; + } + + return 0; +} + +int do_proc_stat(int update_every, usec_t dt) { + (void)dt; + + static struct cpu_chart *all_cpu_charts = NULL; + static size_t all_cpu_charts_size = 0; + static procfile *ff = NULL; + static int do_cpu = -1, do_cpu_cores = -1, do_interrupts = -1, do_context = -1, do_forks = -1, do_processes = -1, + do_core_throttle_count = -1, do_package_throttle_count = -1, do_cpu_freq = -1, do_cpuidle = -1; + static uint32_t hash_intr, hash_ctxt, hash_processes, hash_procs_running, hash_procs_blocked; + static char *core_throttle_count_filename = NULL, *package_throttle_count_filename = NULL, *scaling_cur_freq_filename = NULL, + *time_in_state_filename = NULL, *schedstat_filename = NULL, *cpuidle_name_filename = NULL, *cpuidle_time_filename = NULL; + static const RRDVAR_ACQUIRED *cpus_var = NULL; + static int accurate_freq_avail = 0, accurate_freq_is_used = 0; + size_t cores_found = (size_t)processors; + + if(unlikely(do_cpu == -1)) { + do_cpu = config_get_boolean("plugin:proc:/proc/stat", "cpu utilization", CONFIG_BOOLEAN_YES); + do_cpu_cores = config_get_boolean("plugin:proc:/proc/stat", "per cpu core utilization", CONFIG_BOOLEAN_YES); + do_interrupts = config_get_boolean("plugin:proc:/proc/stat", "cpu interrupts", CONFIG_BOOLEAN_YES); + do_context = config_get_boolean("plugin:proc:/proc/stat", "context switches", CONFIG_BOOLEAN_YES); + do_forks = config_get_boolean("plugin:proc:/proc/stat", "processes started", CONFIG_BOOLEAN_YES); + do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", CONFIG_BOOLEAN_YES); + + // give sane defaults based on the number of processors + if(unlikely(processors > 50)) { + // the system has too many processors + keep_per_core_fds_open = CONFIG_BOOLEAN_NO; + do_core_throttle_count = CONFIG_BOOLEAN_NO; + do_package_throttle_count = CONFIG_BOOLEAN_NO; + do_cpu_freq = CONFIG_BOOLEAN_NO; + do_cpuidle = CONFIG_BOOLEAN_NO; + } + else { + // the system has a reasonable number of processors + keep_per_core_fds_open = CONFIG_BOOLEAN_YES; + do_core_throttle_count = CONFIG_BOOLEAN_AUTO; + do_package_throttle_count = CONFIG_BOOLEAN_NO; + do_cpu_freq = CONFIG_BOOLEAN_YES; + do_cpuidle = CONFIG_BOOLEAN_YES; + } + if(unlikely(processors > 24)) { + // the system has too many processors + keep_cpuidle_fds_open = CONFIG_BOOLEAN_NO; + } + else { + // the system has a reasonable number of processors + keep_cpuidle_fds_open = CONFIG_BOOLEAN_YES; + } + + keep_per_core_fds_open = config_get_boolean("plugin:proc:/proc/stat", "keep per core files open", keep_per_core_fds_open); + keep_cpuidle_fds_open = config_get_boolean("plugin:proc:/proc/stat", "keep cpuidle files open", keep_cpuidle_fds_open); + do_core_throttle_count = config_get_boolean_ondemand("plugin:proc:/proc/stat", "core_throttle_count", do_core_throttle_count); + do_package_throttle_count = config_get_boolean_ondemand("plugin:proc:/proc/stat", "package_throttle_count", do_package_throttle_count); + do_cpu_freq = config_get_boolean_ondemand("plugin:proc:/proc/stat", "cpu frequency", do_cpu_freq); + do_cpuidle = config_get_boolean_ondemand("plugin:proc:/proc/stat", "cpu idle states", do_cpuidle); + + hash_intr = simple_hash("intr"); + hash_ctxt = simple_hash("ctxt"); + hash_processes = simple_hash("processes"); + hash_procs_running = simple_hash("procs_running"); + hash_procs_blocked = simple_hash("procs_blocked"); + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/thermal_throttle/core_throttle_count"); + core_throttle_count_filename = config_get("plugin:proc:/proc/stat", "core_throttle_count filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/thermal_throttle/package_throttle_count"); + package_throttle_count_filename = config_get("plugin:proc:/proc/stat", "package_throttle_count filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/cpufreq/scaling_cur_freq"); + scaling_cur_freq_filename = config_get("plugin:proc:/proc/stat", "scaling_cur_freq filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/cpufreq/stats/time_in_state"); + time_in_state_filename = config_get("plugin:proc:/proc/stat", "time_in_state filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/schedstat"); + schedstat_filename = config_get("plugin:proc:/proc/stat", "schedstat filename to monitor", filename); + + if(do_cpuidle != CONFIG_BOOLEAN_NO) { + struct stat stbuf; + + if (stat(schedstat_filename, &stbuf)) + do_cpuidle = CONFIG_BOOLEAN_NO; + } + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/name"); + cpuidle_name_filename = config_get("plugin:proc:/proc/stat", "cpuidle name filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/time"); + cpuidle_time_filename = config_get("plugin:proc:/proc/stat", "cpuidle time filename to monitor", filename); + } + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/stat"); + ff = procfile_open(config_get("plugin:proc:/proc/stat", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + size_t words; + + unsigned long long processes = 0, running = 0 , blocked = 0; + + for(l = 0; l < lines ;l++) { + char *row_key = procfile_lineword(ff, l, 0); + uint32_t hash = simple_hash(row_key); + + // faster strncmp(row_key, "cpu", 3) == 0 + if(likely(row_key[0] == 'c' && row_key[1] == 'p' && row_key[2] == 'u')) { + words = procfile_linewords(ff, l); + if(unlikely(words < 9)) { + error("Cannot read /proc/stat cpu line. Expected 9 params, read %zu.", words); + continue; + } + + size_t core = (row_key[3] == '\0') ? 0 : str2ul(&row_key[3]) + 1; + if(likely(core > 0)) cores_found = core; + + if(likely((core == 0 && do_cpu) || (core > 0 && do_cpu_cores))) { + char *id; + unsigned long long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0; + + id = row_key; + user = str2ull(procfile_lineword(ff, l, 1)); + nice = str2ull(procfile_lineword(ff, l, 2)); + system = str2ull(procfile_lineword(ff, l, 3)); + idle = str2ull(procfile_lineword(ff, l, 4)); + iowait = str2ull(procfile_lineword(ff, l, 5)); + irq = str2ull(procfile_lineword(ff, l, 6)); + softirq = str2ull(procfile_lineword(ff, l, 7)); + steal = str2ull(procfile_lineword(ff, l, 8)); + + guest = str2ull(procfile_lineword(ff, l, 9)); + user -= guest; + + guest_nice = str2ull(procfile_lineword(ff, l, 10)); + nice -= guest_nice; + + char *title, *type, *context, *family; + long priority; + + if(unlikely(core >= all_cpu_charts_size)) { + size_t old_cpu_charts_size = all_cpu_charts_size; + all_cpu_charts_size = core + 1; + all_cpu_charts = reallocz(all_cpu_charts, sizeof(struct cpu_chart) * all_cpu_charts_size); + memset(&all_cpu_charts[old_cpu_charts_size], 0, sizeof(struct cpu_chart) * (all_cpu_charts_size - old_cpu_charts_size)); + } + struct cpu_chart *cpu_chart = &all_cpu_charts[core]; + + if(unlikely(!cpu_chart->st)) { + cpu_chart->id = strdupz(id); + + if(unlikely(core == 0)) { + title = "Total CPU utilization"; + type = "system"; + context = "system.cpu"; + family = id; + priority = NETDATA_CHART_PRIO_SYSTEM_CPU; + } + else { + title = "Core utilization"; + type = "cpu"; + context = "cpu.cpu"; + family = "utilization"; + priority = NETDATA_CHART_PRIO_CPU_PER_CORE; + + char filename[FILENAME_MAX + 1]; + struct stat stbuf; + + if(do_core_throttle_count != CONFIG_BOOLEAN_NO) { + snprintfz(filename, FILENAME_MAX, core_throttle_count_filename, id); + if (stat(filename, &stbuf) == 0) { + cpu_chart->files[CORE_THROTTLE_COUNT_INDEX].filename = strdupz(filename); + cpu_chart->files[CORE_THROTTLE_COUNT_INDEX].fd = -1; + do_core_throttle_count = CONFIG_BOOLEAN_YES; + } + } + + if(do_package_throttle_count != CONFIG_BOOLEAN_NO) { + snprintfz(filename, FILENAME_MAX, package_throttle_count_filename, id); + if (stat(filename, &stbuf) == 0) { + cpu_chart->files[PACKAGE_THROTTLE_COUNT_INDEX].filename = strdupz(filename); + cpu_chart->files[PACKAGE_THROTTLE_COUNT_INDEX].fd = -1; + do_package_throttle_count = CONFIG_BOOLEAN_YES; + } + } + + if(do_cpu_freq != CONFIG_BOOLEAN_NO) { + + snprintfz(filename, FILENAME_MAX, scaling_cur_freq_filename, id); + + if (stat(filename, &stbuf) == 0) { + cpu_chart->files[CPU_FREQ_INDEX].filename = strdupz(filename); + cpu_chart->files[CPU_FREQ_INDEX].fd = -1; + do_cpu_freq = CONFIG_BOOLEAN_YES; + } + + snprintfz(filename, FILENAME_MAX, time_in_state_filename, id); + + if (stat(filename, &stbuf) == 0) { + cpu_chart->time_in_state_files.filename = strdupz(filename); + cpu_chart->time_in_state_files.ff = NULL; + do_cpu_freq = CONFIG_BOOLEAN_YES; + accurate_freq_avail = 1; + } + } + } + + cpu_chart->st = rrdset_create_localhost( + type + , id + , NULL + , family + , context + , title + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , priority + core + , update_every + , RRDSET_TYPE_STACKED + ); + + long multiplier = 1; + long divisor = 1; // sysconf(_SC_CLK_TCK); + + cpu_chart->rd_guest_nice = rrddim_add(cpu_chart->st, "guest_nice", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_guest = rrddim_add(cpu_chart->st, "guest", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_steal = rrddim_add(cpu_chart->st, "steal", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_softirq = rrddim_add(cpu_chart->st, "softirq", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_irq = rrddim_add(cpu_chart->st, "irq", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_user = rrddim_add(cpu_chart->st, "user", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_system = rrddim_add(cpu_chart->st, "system", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_nice = rrddim_add(cpu_chart->st, "nice", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_iowait = rrddim_add(cpu_chart->st, "iowait", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + cpu_chart->rd_idle = rrddim_add(cpu_chart->st, "idle", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_hide(cpu_chart->st, "idle"); + + if(unlikely(core == 0 && cpus_var == NULL)) + cpus_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "active_processors"); + } + + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_user, user); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_nice, nice); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_system, system); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_idle, idle); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_iowait, iowait); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_irq, irq); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_softirq, softirq); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_steal, steal); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_guest, guest); + rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_guest_nice, guest_nice); + rrdset_done(cpu_chart->st); + } + } + else if(unlikely(hash == hash_intr && strcmp(row_key, "intr") == 0)) { + if(likely(do_interrupts)) { + static RRDSET *st_intr = NULL; + static RRDDIM *rd_interrupts = NULL; + unsigned long long value = str2ull(procfile_lineword(ff, l, 1)); + + if(unlikely(!st_intr)) { + st_intr = rrdset_create_localhost( + "system" + , "intr" + , NULL + , "interrupts" + , NULL + , "CPU Interrupts" + , "interrupts/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_INTR + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_intr, RRDSET_FLAG_DETAIL); + + rd_interrupts = rrddim_add(st_intr, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_intr, rd_interrupts, value); + rrdset_done(st_intr); + } + } + else if(unlikely(hash == hash_ctxt && strcmp(row_key, "ctxt") == 0)) { + if(likely(do_context)) { + static RRDSET *st_ctxt = NULL; + static RRDDIM *rd_switches = NULL; + unsigned long long value = str2ull(procfile_lineword(ff, l, 1)); + + if(unlikely(!st_ctxt)) { + st_ctxt = rrdset_create_localhost( + "system" + , "ctxt" + , NULL + , "processes" + , NULL + , "CPU Context Switches" + , "context switches/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_CTXT + , update_every + , RRDSET_TYPE_LINE + ); + + rd_switches = rrddim_add(st_ctxt, "switches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ctxt, rd_switches, value); + rrdset_done(st_ctxt); + } + } + else if(unlikely(hash == hash_processes && !processes && strcmp(row_key, "processes") == 0)) { + processes = str2ull(procfile_lineword(ff, l, 1)); + } + else if(unlikely(hash == hash_procs_running && !running && strcmp(row_key, "procs_running") == 0)) { + running = str2ull(procfile_lineword(ff, l, 1)); + } + else if(unlikely(hash == hash_procs_blocked && !blocked && strcmp(row_key, "procs_blocked") == 0)) { + blocked = str2ull(procfile_lineword(ff, l, 1)); + } + } + + // -------------------------------------------------------------------- + + if(likely(do_forks)) { + static RRDSET *st_forks = NULL; + static RRDDIM *rd_started = NULL; + + if(unlikely(!st_forks)) { + st_forks = rrdset_create_localhost( + "system" + , "forks" + , NULL + , "processes" + , NULL + , "Started Processes" + , "processes/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_FORKS + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st_forks, RRDSET_FLAG_DETAIL); + + rd_started = rrddim_add(st_forks, "started", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_forks, rd_started, processes); + rrdset_done(st_forks); + } + + // -------------------------------------------------------------------- + + if(likely(do_processes)) { + static RRDSET *st_processes = NULL; + static RRDDIM *rd_running = NULL; + static RRDDIM *rd_blocked = NULL; + + if(unlikely(!st_processes)) { + st_processes = rrdset_create_localhost( + "system" + , "processes" + , NULL + , "processes" + , NULL + , "System Processes" + , "processes" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_PROCESSES + , update_every + , RRDSET_TYPE_LINE + ); + + rd_running = rrddim_add(st_processes, "running", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_blocked = rrddim_add(st_processes, "blocked", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_processes, rd_running, running); + rrddim_set_by_pointer(st_processes, rd_blocked, blocked); + rrdset_done(st_processes); + } + + if(likely(all_cpu_charts_size > 1)) { + if(likely(do_core_throttle_count != CONFIG_BOOLEAN_NO)) { + int r = read_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CORE_THROTTLE_COUNT_INDEX); + if(likely(r != -1 && (do_core_throttle_count == CONFIG_BOOLEAN_YES || r > 0))) { + do_core_throttle_count = CONFIG_BOOLEAN_YES; + + static RRDSET *st_core_throttle_count = NULL; + + if (unlikely(!st_core_throttle_count)) { + st_core_throttle_count = rrdset_create_localhost( + "cpu" + , "core_throttling" + , NULL + , "throttling" + , "cpu.core_throttling" + , "Core Thermal Throttling Events" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_CORE_THROTTLING + , update_every + , RRDSET_TYPE_LINE + ); + } + + chart_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CORE_THROTTLE_COUNT_INDEX, st_core_throttle_count, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrdset_done(st_core_throttle_count); + } + } + + if(likely(do_package_throttle_count != CONFIG_BOOLEAN_NO)) { + int r = read_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, PACKAGE_THROTTLE_COUNT_INDEX); + if(likely(r != -1 && (do_package_throttle_count == CONFIG_BOOLEAN_YES || r > 0))) { + do_package_throttle_count = CONFIG_BOOLEAN_YES; + + static RRDSET *st_package_throttle_count = NULL; + + if(unlikely(!st_package_throttle_count)) { + st_package_throttle_count = rrdset_create_localhost( + "cpu" + , "package_throttling" + , NULL + , "throttling" + , "cpu.package_throttling" + , "Package Thermal Throttling Events" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_PACKAGE_THROTTLING + , update_every + , RRDSET_TYPE_LINE + ); + } + + chart_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, PACKAGE_THROTTLE_COUNT_INDEX, st_package_throttle_count, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrdset_done(st_package_throttle_count); + } + } + + if(likely(do_cpu_freq != CONFIG_BOOLEAN_NO)) { + char filename[FILENAME_MAX + 1]; + int r = 0; + + if (accurate_freq_avail) { + r = read_per_core_time_in_state_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CPU_FREQ_INDEX); + if(r > 0 && !accurate_freq_is_used) { + accurate_freq_is_used = 1; + snprintfz(filename, FILENAME_MAX, time_in_state_filename, "cpu*"); + info("cpufreq is using %s", filename); + } + } + if (r < 1) { + r = read_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CPU_FREQ_INDEX); + if(accurate_freq_is_used) { + accurate_freq_is_used = 0; + snprintfz(filename, FILENAME_MAX, scaling_cur_freq_filename, "cpu*"); + info("cpufreq fell back to %s", filename); + } + } + + if(likely(r != -1 && (do_cpu_freq == CONFIG_BOOLEAN_YES || r > 0))) { + do_cpu_freq = CONFIG_BOOLEAN_YES; + + static RRDSET *st_scaling_cur_freq = NULL; + + if(unlikely(!st_scaling_cur_freq)) { + st_scaling_cur_freq = rrdset_create_localhost( + "cpu" + , "cpufreq" + , NULL + , "cpufreq" + , "cpufreq.cpufreq" + , "Current CPU Frequency" + , "MHz" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_CPUFREQ_SCALING_CUR_FREQ + , update_every + , RRDSET_TYPE_LINE + ); + } + + chart_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CPU_FREQ_INDEX, st_scaling_cur_freq, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rrdset_done(st_scaling_cur_freq); + } + } + } + + // -------------------------------------------------------------------- + + static struct per_core_cpuidle_chart *cpuidle_charts = NULL; + size_t schedstat_cores_found = 0; + + if(likely(do_cpuidle != CONFIG_BOOLEAN_NO && !read_schedstat(schedstat_filename, &cpuidle_charts, &schedstat_cores_found))) { + int cpu_states_updated = 0; + size_t core, state; + + + // proc.plugin runs on Linux systems only. Multi-platform compatibility is not needed here, + // so bare pthread functions are used to avoid unneeded overheads. + for(core = 0; core < schedstat_cores_found; core++) { + if(unlikely(!(cpuidle_charts[core].active_time - cpuidle_charts[core].last_active_time))) { + pthread_t thread; + cpu_set_t global_cpu_set; + + if (likely(!pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &global_cpu_set))) { + if (unlikely(!CPU_ISSET(core, &global_cpu_set))) { + continue; + } + } + else + error("Cannot read current process affinity"); + + // These threads are very ephemeral and don't need to have a specific name + if(unlikely(pthread_create(&thread, NULL, wake_cpu_thread, (void *)&core))) + error("Cannot create wake_cpu_thread"); + else if(unlikely(pthread_join(thread, NULL))) + error("Cannot join wake_cpu_thread"); + cpu_states_updated = 1; + } + } + + if(unlikely(!cpu_states_updated || !read_schedstat(schedstat_filename, &cpuidle_charts, &schedstat_cores_found))) { + for(core = 0; core < schedstat_cores_found; core++) { + cpuidle_charts[core].last_active_time = cpuidle_charts[core].active_time; + + int r = read_cpuidle_states(cpuidle_name_filename, cpuidle_time_filename, cpuidle_charts, core); + if(likely(r != -1 && (do_cpuidle == CONFIG_BOOLEAN_YES || r > 0))) { + do_cpuidle = CONFIG_BOOLEAN_YES; + + char cpuidle_chart_id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(cpuidle_chart_id, RRD_ID_LENGTH_MAX, "cpu%zu_cpuidle", core); + + if(unlikely(!cpuidle_charts[core].st)) { + cpuidle_charts[core].st = rrdset_create_localhost( + "cpu" + , cpuidle_chart_id + , NULL + , "cpuidle" + , "cpuidle.cpu_cstate_residency_time" + , "C-state residency time" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_CPUIDLE + core + , update_every + , RRDSET_TYPE_STACKED + ); + + char corebuf[50+1]; + snprintfz(corebuf, 50, "cpu%zu", core); + rrdlabels_add(cpuidle_charts[core].st->rrdlabels, "cpu", corebuf, RRDLABEL_SRC_AUTO); + + char cpuidle_dim_id[RRD_ID_LENGTH_MAX + 1]; + cpuidle_charts[core].active_time_rd = rrddim_add(cpuidle_charts[core].st, "active", "C0 (active)", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) { + strncpyz(cpuidle_dim_id, cpuidle_charts[core].cpuidle_state[state].name, RRD_ID_LENGTH_MAX); + for(int i = 0; cpuidle_dim_id[i]; i++) + cpuidle_dim_id[i] = tolower(cpuidle_dim_id[i]); + cpuidle_charts[core].cpuidle_state[state].rd = rrddim_add(cpuidle_charts[core].st, cpuidle_dim_id, + cpuidle_charts[core].cpuidle_state[state].name, + 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + } + + rrddim_set_by_pointer(cpuidle_charts[core].st, cpuidle_charts[core].active_time_rd, cpuidle_charts[core].active_time); + for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) { + rrddim_set_by_pointer(cpuidle_charts[core].st, cpuidle_charts[core].cpuidle_state[state].rd, cpuidle_charts[core].cpuidle_state[state].value); + } + rrdset_done(cpuidle_charts[core].st); + } + } + } + } + + if(cpus_var) + rrdvar_custom_host_variable_set(localhost, cpus_var, cores_found); + + return 0; +} diff --git a/collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c b/collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c new file mode 100644 index 0000000..a04d430 --- /dev/null +++ b/collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/sys/kernel/random/entropy_avail"); + ff = procfile_open(config_get("plugin:proc:/proc/sys/kernel/random/entropy_avail", "filename to monitor", filename), "", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + unsigned long long entropy = str2ull(procfile_lineword(ff, 0, 0)); + + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "system" + , "entropy" + , NULL + , "entropy" + , NULL + , "Available Entropy" + , "entropy" + , PLUGIN_PROC_NAME + , "/proc/sys/kernel/random/entropy_avail" + , NETDATA_CHART_PRIO_SYSTEM_ENTROPY + , update_every + , RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "entropy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, entropy); + rrdset_done(st); + return 0; +} diff --git a/collectors/proc.plugin/proc_uptime.c b/collectors/proc.plugin/proc_uptime.c new file mode 100644 index 0000000..ddab726 --- /dev/null +++ b/collectors/proc.plugin/proc_uptime.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +int do_proc_uptime(int update_every, usec_t dt) { + (void)dt; + + static char *uptime_filename = NULL; + if(!uptime_filename) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/uptime"); + + uptime_filename = config_get("plugin:proc:/proc/uptime", "filename to monitor", filename); + } + + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if(unlikely(!st)) { + + st = rrdset_create_localhost( + "system" + , "uptime" + , NULL + , "uptime" + , NULL + , "System Uptime" + , "seconds" + , PLUGIN_PROC_NAME + , "/proc/uptime" + , NETDATA_CHART_PRIO_SYSTEM_UPTIME + , update_every + , RRDSET_TYPE_LINE + ); + + rd = rrddim_add(st, "uptime", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd, uptime_msec(uptime_filename)); + rrdset_done(st); + return 0; +} diff --git a/collectors/proc.plugin/proc_vmstat.c b/collectors/proc.plugin/proc_vmstat.c new file mode 100644 index 0000000..b8defc4 --- /dev/null +++ b/collectors/proc.plugin/proc_vmstat.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_VMSTAT_NAME "/proc/vmstat" + +#define OOM_KILL_STRING "oom_kill" + +int do_proc_vmstat(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_swapio = -1, do_io = -1, do_pgfaults = -1, do_oom_kill = -1, do_numa = -1; + static int has_numa = -1; + + static ARL_BASE *arl_base = NULL; + static unsigned long long numa_foreign = 0ULL; + static unsigned long long numa_hint_faults = 0ULL; + static unsigned long long numa_hint_faults_local = 0ULL; + static unsigned long long numa_huge_pte_updates = 0ULL; + static unsigned long long numa_interleave = 0ULL; + static unsigned long long numa_local = 0ULL; + static unsigned long long numa_other = 0ULL; + static unsigned long long numa_pages_migrated = 0ULL; + static unsigned long long numa_pte_updates = 0ULL; + static unsigned long long pgfault = 0ULL; + static unsigned long long pgmajfault = 0ULL; + static unsigned long long pgpgin = 0ULL; + static unsigned long long pgpgout = 0ULL; + static unsigned long long pswpin = 0ULL; + static unsigned long long pswpout = 0ULL; + static unsigned long long oom_kill = 0ULL; + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/vmstat"); + ff = procfile_open(config_get("plugin:proc:/proc/vmstat", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + size_t lines = procfile_lines(ff), l; + + if(unlikely(!arl_base)) { + do_swapio = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "swap i/o", CONFIG_BOOLEAN_AUTO); + do_io = config_get_boolean("plugin:proc:/proc/vmstat", "disk i/o", CONFIG_BOOLEAN_YES); + do_pgfaults = config_get_boolean("plugin:proc:/proc/vmstat", "memory page faults", CONFIG_BOOLEAN_YES); + do_oom_kill = config_get_boolean("plugin:proc:/proc/vmstat", "out of memory kills", CONFIG_BOOLEAN_AUTO); + do_numa = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "system-wide numa metric summary", CONFIG_BOOLEAN_AUTO); + + + arl_base = arl_create("vmstat", NULL, 60); + arl_expect(arl_base, "pgfault", &pgfault); + arl_expect(arl_base, "pgmajfault", &pgmajfault); + arl_expect(arl_base, "pgpgin", &pgpgin); + arl_expect(arl_base, "pgpgout", &pgpgout); + arl_expect(arl_base, "pswpin", &pswpin); + arl_expect(arl_base, "pswpout", &pswpout); + + int has_oom_kill = 0; + + for (l = 0; l < lines; l++) { + if (!strcmp(procfile_lineword(ff, l, 0), OOM_KILL_STRING)) { + has_oom_kill = 1; + break; + } + } + + if (has_oom_kill) + arl_expect(arl_base, OOM_KILL_STRING, &oom_kill); + else + do_oom_kill = CONFIG_BOOLEAN_NO; + + if(do_numa == CONFIG_BOOLEAN_YES || (do_numa == CONFIG_BOOLEAN_AUTO && + (get_numa_node_count() >= 2 || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + arl_expect(arl_base, "numa_foreign", &numa_foreign); + arl_expect(arl_base, "numa_hint_faults_local", &numa_hint_faults_local); + arl_expect(arl_base, "numa_hint_faults", &numa_hint_faults); + arl_expect(arl_base, "numa_huge_pte_updates", &numa_huge_pte_updates); + arl_expect(arl_base, "numa_interleave", &numa_interleave); + arl_expect(arl_base, "numa_local", &numa_local); + arl_expect(arl_base, "numa_other", &numa_other); + arl_expect(arl_base, "numa_pages_migrated", &numa_pages_migrated); + arl_expect(arl_base, "numa_pte_updates", &numa_pte_updates); + } + else { + // Do not expect numa metrics when they are not needed. + // By not adding them, the ARL will stop processing the file + // when all the expected metrics are collected. + // Also ARL will not parse their values. + has_numa = 0; + do_numa = CONFIG_BOOLEAN_NO; + } + } + + arl_begin(arl_base); + for(l = 0; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + if(unlikely(words < 2)) { + if(unlikely(words)) error("Cannot read /proc/vmstat line %zu. Expected 2 params, read %zu.", l, words); + continue; + } + + if(unlikely(arl_check(arl_base, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; + } + + // -------------------------------------------------------------------- + + if(do_swapio == CONFIG_BOOLEAN_YES || (do_swapio == CONFIG_BOOLEAN_AUTO && + (pswpin || pswpout || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_swapio = CONFIG_BOOLEAN_YES; + + static RRDSET *st_swapio = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_swapio)) { + st_swapio = rrdset_create_localhost( + "system" + , "swapio" + , NULL + , "swap" + , NULL + , "Swap I/O" + , "KiB/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_VMSTAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_SWAPIO + , update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_swapio, "in", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_swapio, "out", NULL, -sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_swapio, rd_in, pswpin); + rrddim_set_by_pointer(st_swapio, rd_out, pswpout); + rrdset_done(st_swapio); + } + + // -------------------------------------------------------------------- + + if(do_io) { + static RRDSET *st_io = NULL; + static RRDDIM *rd_in = NULL, *rd_out = NULL; + + if(unlikely(!st_io)) { + st_io = rrdset_create_localhost( + "system" + , "pgpgio" + , NULL + , "disk" + , NULL + , "Memory Paged from/to disk" + , "KiB/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_VMSTAT_NAME + , NETDATA_CHART_PRIO_SYSTEM_PGPGIO + , update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_io, "in", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_io, "out", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_io, rd_in, pgpgin); + rrddim_set_by_pointer(st_io, rd_out, pgpgout); + rrdset_done(st_io); + } + + // -------------------------------------------------------------------- + + if(do_pgfaults) { + static RRDSET *st_pgfaults = NULL; + static RRDDIM *rd_minor = NULL, *rd_major = NULL; + + if(unlikely(!st_pgfaults)) { + st_pgfaults = rrdset_create_localhost( + "mem" + , "pgfaults" + , NULL + , "system" + , NULL + , "Memory Page Faults" + , "faults/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_VMSTAT_NAME + , NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_pgfaults, RRDSET_FLAG_DETAIL); + + rd_minor = rrddim_add(st_pgfaults, "minor", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_major = rrddim_add(st_pgfaults, "major", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_pgfaults, rd_minor, pgfault); + rrddim_set_by_pointer(st_pgfaults, rd_major, pgmajfault); + rrdset_done(st_pgfaults); + } + + // -------------------------------------------------------------------- + + if (do_oom_kill == CONFIG_BOOLEAN_YES || + (do_oom_kill == CONFIG_BOOLEAN_AUTO && (oom_kill || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + static RRDSET *st_oom_kill = NULL; + static RRDDIM *rd_oom_kill = NULL; + + do_oom_kill = CONFIG_BOOLEAN_YES; + + if(unlikely(!st_oom_kill)) { + st_oom_kill = rrdset_create_localhost( + "mem" + , "oom_kill" + , NULL + , "system" + , NULL + , "Out of Memory Kills" + , "kills/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_VMSTAT_NAME + , NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_oom_kill, RRDSET_FLAG_DETAIL); + + rd_oom_kill = rrddim_add(st_oom_kill, "kills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_oom_kill, rd_oom_kill, oom_kill); + rrdset_done(st_oom_kill); + } + + // -------------------------------------------------------------------- + + // Ondemand criteria for NUMA. Since this won't change at run time, we + // check it only once. We check whether the node count is >= 2 because + // single-node systems have uninteresting statistics (since all accesses + // are local). + if(unlikely(has_numa == -1)) + + has_numa = (numa_local || numa_foreign || numa_interleave || numa_other || numa_pte_updates || + numa_huge_pte_updates || numa_hint_faults || numa_hint_faults_local || numa_pages_migrated) ? 1 : 0; + + if(do_numa == CONFIG_BOOLEAN_YES || (do_numa == CONFIG_BOOLEAN_AUTO && has_numa)) { + do_numa = CONFIG_BOOLEAN_YES; + + static RRDSET *st_numa = NULL; + static RRDDIM *rd_local = NULL, *rd_foreign = NULL, *rd_interleave = NULL, *rd_other = NULL, *rd_pte_updates = NULL, *rd_huge_pte_updates = NULL, *rd_hint_faults = NULL, *rd_hint_faults_local = NULL, *rd_pages_migrated = NULL; + + if(unlikely(!st_numa)) { + st_numa = rrdset_create_localhost( + "mem" + , "numa" + , NULL + , "numa" + , NULL + , "NUMA events" + , "events/s" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_VMSTAT_NAME + , NETDATA_CHART_PRIO_MEM_NUMA + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_numa, RRDSET_FLAG_DETAIL); + + // These depend on CONFIG_NUMA in the kernel. + rd_local = rrddim_add(st_numa, "local", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_foreign = rrddim_add(st_numa, "foreign", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_interleave = rrddim_add(st_numa, "interleave", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_other = rrddim_add(st_numa, "other", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + // The following stats depend on CONFIG_NUMA_BALANCING in the + // kernel. + rd_pte_updates = rrddim_add(st_numa, "pte_updates", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_huge_pte_updates = rrddim_add(st_numa, "huge_pte_updates", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_hint_faults = rrddim_add(st_numa, "hint_faults", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_hint_faults_local = rrddim_add(st_numa, "hint_faults_local", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_migrated = rrddim_add(st_numa, "pages_migrated", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_numa, rd_local, numa_local); + rrddim_set_by_pointer(st_numa, rd_foreign, numa_foreign); + rrddim_set_by_pointer(st_numa, rd_interleave, numa_interleave); + rrddim_set_by_pointer(st_numa, rd_other, numa_other); + + rrddim_set_by_pointer(st_numa, rd_pte_updates, numa_pte_updates); + rrddim_set_by_pointer(st_numa, rd_huge_pte_updates, numa_huge_pte_updates); + rrddim_set_by_pointer(st_numa, rd_hint_faults, numa_hint_faults); + rrddim_set_by_pointer(st_numa, rd_hint_faults_local, numa_hint_faults_local); + rrddim_set_by_pointer(st_numa, rd_pages_migrated, numa_pages_migrated); + + rrdset_done(st_numa); + } + + return 0; +} + diff --git a/collectors/proc.plugin/sys_block_zram.c b/collectors/proc.plugin/sys_block_zram.c new file mode 100644 index 0000000..6bae542 --- /dev/null +++ b/collectors/proc.plugin/sys_block_zram.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_ZRAM_NAME "/sys/block/zram" +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) + +typedef struct mm_stat { + unsigned long long orig_data_size; + unsigned long long compr_data_size; + unsigned long long mem_used_total; + unsigned long long mem_limit; + unsigned long long mem_used_max; + unsigned long long same_pages; + unsigned long long pages_compacted; +} MM_STAT; + +typedef struct zram_device { + procfile *file; + + RRDSET *st_usage; + RRDDIM *rd_compr_data_size; + RRDDIM *rd_metadata_size; + + RRDSET *st_savings; + RRDDIM *rd_original_size; + RRDDIM *rd_savings_size; + + RRDSET *st_comp_ratio; + RRDDIM *rd_comp_ratio; + + RRDSET *st_alloc_efficiency; + RRDDIM *rd_alloc_efficiency; +} ZRAM_DEVICE; + +static int try_get_zram_major_number(procfile *file) { + size_t i; + unsigned int lines = procfile_lines(file); + int id = -1; + char *name = NULL; + for (i = 0; i < lines; i++) + { + if (procfile_linewords(file, i) < 2) + continue; + name = procfile_lineword(file, i, 1); + if (strcmp(name, "zram") == 0) + { + id = str2i(procfile_lineword(file, i, 0)); + if (id == 0) + return -1; + return id; + } + } + return -1; +} + +static inline void init_rrd(const char *name, ZRAM_DEVICE *d, int update_every) { + char chart_name[RRD_ID_LENGTH_MAX + 1]; + + snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_usage.%s", name); + d->st_usage = rrdset_create_localhost( + "mem" + , chart_name + , chart_name + , name + , "mem.zram_usage" + , "ZRAM Memory Usage" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_ZRAM_NAME + , NETDATA_CHART_PRIO_MEM_ZRAM + , update_every + , RRDSET_TYPE_AREA); + d->rd_compr_data_size = rrddim_add(d->st_usage, "compressed", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + d->rd_metadata_size = rrddim_add(d->st_usage, "metadata", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_usage->rrdlabels, "device", name, RRDLABEL_SRC_AUTO); + + snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_savings.%s", name); + d->st_savings = rrdset_create_localhost( + "mem" + , chart_name + , chart_name + , name + , "mem.zram_savings" + , "ZRAM Memory Savings" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_ZRAM_NAME + , NETDATA_CHART_PRIO_MEM_ZRAM_SAVINGS + , update_every + , RRDSET_TYPE_AREA); + d->rd_savings_size = rrddim_add(d->st_savings, "savings", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + d->rd_original_size = rrddim_add(d->st_savings, "original", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_savings->rrdlabels, "device", name, RRDLABEL_SRC_AUTO); + + snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_ratio.%s", name); + d->st_comp_ratio = rrdset_create_localhost( + "mem" + , chart_name + , chart_name + , name + , "mem.zram_ratio" + , "ZRAM Compression Ratio (original to compressed)" + , "ratio" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_ZRAM_NAME + , NETDATA_CHART_PRIO_MEM_ZRAM_RATIO + , update_every + , RRDSET_TYPE_LINE); + d->rd_comp_ratio = rrddim_add(d->st_comp_ratio, "ratio", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_comp_ratio->rrdlabels, "device", name, RRDLABEL_SRC_AUTO); + + snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_efficiency.%s", name); + d->st_alloc_efficiency = rrdset_create_localhost( + "mem" + , chart_name + , chart_name + , name + , "mem.zram_efficiency" + , "ZRAM Efficiency" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_ZRAM_NAME + , NETDATA_CHART_PRIO_MEM_ZRAM_EFFICIENCY + , update_every + , RRDSET_TYPE_LINE); + d->rd_alloc_efficiency = rrddim_add(d->st_alloc_efficiency, "percent", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_alloc_efficiency->rrdlabels, "device", name, RRDLABEL_SRC_AUTO); +} + +static int init_devices(DICTIONARY *devices, unsigned int zram_id, int update_every) { + int count = 0; + DIR *dir = opendir("/dev"); + struct dirent *de; + struct stat st; + char filename[FILENAME_MAX + 1]; + procfile *ff = NULL; + ZRAM_DEVICE device; + + if (unlikely(!dir)) + return 0; + while ((de = readdir(dir))) + { + snprintfz(filename, FILENAME_MAX, "/dev/%s", de->d_name); + if (unlikely(stat(filename, &st) != 0)) + { + error("ZRAM : Unable to stat %s: %s", filename, strerror(errno)); + continue; + } + if (major(st.st_rdev) == zram_id) + { + info("ZRAM : Found device %s", filename); + snprintfz(filename, FILENAME_MAX, "/sys/block/%s/mm_stat", de->d_name); + ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); + if (ff == NULL) + { + error("ZRAM : Failed to open %s: %s", filename, strerror(errno)); + continue; + } + device.file = ff; + init_rrd(de->d_name, &device, update_every); + dictionary_set(devices, de->d_name, &device, sizeof(ZRAM_DEVICE)); + count++; + } + } + closedir(dir); + return count; +} + +static void free_device(DICTIONARY *dict, const char *name) +{ + ZRAM_DEVICE *d = (ZRAM_DEVICE*)dictionary_get(dict, name); + info("ZRAM : Disabling monitoring of device %s", name); + rrdset_obsolete_and_pointer_null(d->st_usage); + rrdset_obsolete_and_pointer_null(d->st_savings); + rrdset_obsolete_and_pointer_null(d->st_alloc_efficiency); + rrdset_obsolete_and_pointer_null(d->st_comp_ratio); + dictionary_del(dict, name); +} + +static inline int read_mm_stat(procfile *ff, MM_STAT *stats) { + ff = procfile_readall(ff); + if (!ff) + return -1; + if (procfile_lines(ff) < 1) { + procfile_close(ff); + return -1; + } + if (procfile_linewords(ff, 0) < 7) { + procfile_close(ff); + return -1; + } + + stats->orig_data_size = str2ull(procfile_word(ff, 0)); + stats->compr_data_size = str2ull(procfile_word(ff, 1)); + stats->mem_used_total = str2ull(procfile_word(ff, 2)); + stats->mem_limit = str2ull(procfile_word(ff, 3)); + stats->mem_used_max = str2ull(procfile_word(ff, 4)); + stats->same_pages = str2ull(procfile_word(ff, 5)); + stats->pages_compacted = str2ull(procfile_word(ff, 6)); + return 0; +} + +static int collect_zram_metrics(const DICTIONARY_ITEM *item, void *entry, void *data) { + const char *name = dictionary_acquired_item_name(item); + ZRAM_DEVICE *dev = entry; + DICTIONARY *dict = data; + + MM_STAT mm; + int value; + + if (unlikely(read_mm_stat(dev->file, &mm) < 0)) { + free_device(dict, name); + return -1; + } + + // zram_usage + rrddim_set_by_pointer(dev->st_usage, dev->rd_compr_data_size, mm.compr_data_size); + rrddim_set_by_pointer(dev->st_usage, dev->rd_metadata_size, mm.mem_used_total - mm.compr_data_size); + rrdset_done(dev->st_usage); + + // zram_savings + rrddim_set_by_pointer(dev->st_savings, dev->rd_savings_size, mm.compr_data_size - mm.orig_data_size); + rrddim_set_by_pointer(dev->st_savings, dev->rd_original_size, mm.orig_data_size); + rrdset_done(dev->st_savings); + + // zram_ratio + value = mm.compr_data_size == 0 ? 1 : mm.orig_data_size * 100 / mm.compr_data_size; + rrddim_set_by_pointer(dev->st_comp_ratio, dev->rd_comp_ratio, value); + rrdset_done(dev->st_comp_ratio); + + // zram_efficiency + value = mm.mem_used_total == 0 ? 100 : (mm.compr_data_size * 1000000 / mm.mem_used_total); + rrddim_set_by_pointer(dev->st_alloc_efficiency, dev->rd_alloc_efficiency, value); + rrdset_done(dev->st_alloc_efficiency); + + return 0; +} + +int do_sys_block_zram(int update_every, usec_t dt) { + static procfile *ff = NULL; + static DICTIONARY *devices = NULL; + static int initialized = 0; + static int device_count = 0; + int zram_id = -1; + + (void)dt; + + if (unlikely(!initialized)) + { + initialized = 1; + ff = procfile_open("/proc/devices", " \t:", PROCFILE_FLAG_DEFAULT); + if (ff == NULL) + { + error("Cannot read /proc/devices"); + return 1; + } + ff = procfile_readall(ff); + if (!ff) + return 1; + zram_id = try_get_zram_major_number(ff); + if (zram_id == -1) + { + if (ff != NULL) + procfile_close(ff); + return 1; + } + procfile_close(ff); + + devices = dictionary_create(DICT_OPTION_SINGLE_THREADED); + device_count = init_devices(devices, (unsigned int)zram_id, update_every); + } + + if (unlikely(device_count < 1)) + return 1; + + dictionary_walkthrough_write(devices, collect_zram_metrics, devices); + return 0; +} \ No newline at end of file diff --git a/collectors/proc.plugin/sys_class_infiniband.c b/collectors/proc.plugin/sys_class_infiniband.c new file mode 100644 index 0000000..fca0cb8 --- /dev/null +++ b/collectors/proc.plugin/sys_class_infiniband.c @@ -0,0 +1,703 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// Heavily inspired from proc_net_dev.c +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_INFINIBAND_NAME "/sys/class/infiniband" +#define CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND \ + "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_INFINIBAND_NAME + +// ib_device::name[IB_DEVICE_NAME_MAX(64)] + "-" + ib_device::phys_port_cnt[u8 = 3 chars] +#define IBNAME_MAX 68 + +// ---------------------------------------------------------------------------- +// infiniband & omnipath standard counters + +// I use macro as there's no single file acting as summary, but a lot of different files, so can't use helpers like +// procfile(). Also, omnipath generates other counters, that are not provided by infiniband +#define FOREACH_COUNTER(GEN, ...) \ + FOREACH_COUNTER_BYTES(GEN, __VA_ARGS__) \ + FOREACH_COUNTER_PACKETS(GEN, __VA_ARGS__) \ + FOREACH_COUNTER_ERRORS(GEN, __VA_ARGS__) + +#define FOREACH_COUNTER_BYTES(GEN, ...) \ + GEN(port_rcv_data, bytes, "Received", 1, __VA_ARGS__) \ + GEN(port_xmit_data, bytes, "Sent", -1, __VA_ARGS__) + +#define FOREACH_COUNTER_PACKETS(GEN, ...) \ + GEN(port_rcv_packets, packets, "Received", 1, __VA_ARGS__) \ + GEN(port_xmit_packets, packets, "Sent", -1, __VA_ARGS__) \ + GEN(multicast_rcv_packets, packets, "Mcast rcvd", 1, __VA_ARGS__) \ + GEN(multicast_xmit_packets, packets, "Mcast sent", -1, __VA_ARGS__) \ + GEN(unicast_rcv_packets, packets, "Ucast rcvd", 1, __VA_ARGS__) \ + GEN(unicast_xmit_packets, packets, "Ucast sent", -1, __VA_ARGS__) + +#define FOREACH_COUNTER_ERRORS(GEN, ...) \ + GEN(port_rcv_errors, errors, "Pkts malformated", 1, __VA_ARGS__) \ + GEN(port_rcv_constraint_errors, errors, "Pkts rcvd discarded ", 1, __VA_ARGS__) \ + GEN(port_xmit_discards, errors, "Pkts sent discarded", 1, __VA_ARGS__) \ + GEN(port_xmit_wait, errors, "Tick Wait to send", 1, __VA_ARGS__) \ + GEN(VL15_dropped, errors, "Pkts missed resource", 1, __VA_ARGS__) \ + GEN(excessive_buffer_overrun_errors, errors, "Buffer overrun", 1, __VA_ARGS__) \ + GEN(link_downed, errors, "Link Downed", 1, __VA_ARGS__) \ + GEN(link_error_recovery, errors, "Link recovered", 1, __VA_ARGS__) \ + GEN(local_link_integrity_errors, errors, "Link integrity err", 1, __VA_ARGS__) \ + GEN(symbol_error, errors, "Link minor errors", 1, __VA_ARGS__) \ + GEN(port_rcv_remote_physical_errors, errors, "Pkts rcvd with EBP", 1, __VA_ARGS__) \ + GEN(port_rcv_switch_relay_errors, errors, "Pkts rcvd discarded by switch", 1, __VA_ARGS__) \ + GEN(port_xmit_constraint_errors, errors, "Pkts sent discarded by switch", 1, __VA_ARGS__) + +// +// Hardware Counters +// + +// IMPORTANT: These are vendor-specific fields. +// If you want to add a new vendor, search this for for 'VENDORS:' keyword and +// add your definition as 'VENDOR-:' where if the string part that +// is shown in /sys/class/infiniband/X_Y +// EG: for Mellanox, shown as mlx0_1, it's 'mlx' +// for Intel, shown as hfi1_1, it's 'hfi' + +// VENDORS: List of implemented hardware vendors +#define FOREACH_HWCOUNTER_NAME(GEN, ...) GEN(mlx, __VA_ARGS__) + +// VENDOR-MLX: HW Counters for Mellanox ConnectX Devices +#define FOREACH_HWCOUNTER_MLX(GEN, ...) \ + FOREACH_HWCOUNTER_MLX_PACKETS(GEN, __VA_ARGS__) \ + FOREACH_HWCOUNTER_MLX_ERRORS(GEN, __VA_ARGS__) + +#define FOREACH_HWCOUNTER_MLX_PACKETS(GEN, ...) \ + GEN(np_cnp_sent, hwpackets, "RoCEv2 Congestion sent", 1, __VA_ARGS__) \ + GEN(np_ecn_marked_roce_packets, hwpackets, "RoCEv2 Congestion rcvd", -1, __VA_ARGS__) \ + GEN(rp_cnp_handled, hwpackets, "IB Congestion handled", 1, __VA_ARGS__) \ + GEN(rx_atomic_requests, hwpackets, "ATOMIC req. rcvd", 1, __VA_ARGS__) \ + GEN(rx_dct_connect, hwpackets, "Connection req. rcvd", 1, __VA_ARGS__) \ + GEN(rx_read_requests, hwpackets, "Read req. rcvd", 1, __VA_ARGS__) \ + GEN(rx_write_requests, hwpackets, "Write req. rcvd", 1, __VA_ARGS__) \ + GEN(roce_adp_retrans, hwpackets, "RoCE retrans adaptive", 1, __VA_ARGS__) \ + GEN(roce_adp_retrans_to, hwpackets, "RoCE retrans timeout", 1, __VA_ARGS__) \ + GEN(roce_slow_restart, hwpackets, "RoCE slow restart", 1, __VA_ARGS__) \ + GEN(roce_slow_restart_cnps, hwpackets, "RoCE slow restart congestion", 1, __VA_ARGS__) \ + GEN(roce_slow_restart_trans, hwpackets, "RoCE slow restart count", 1, __VA_ARGS__) + +#define FOREACH_HWCOUNTER_MLX_ERRORS(GEN, ...) \ + GEN(duplicate_request, hwerrors, "Duplicated packets", -1, __VA_ARGS__) \ + GEN(implied_nak_seq_err, hwerrors, "Pkt Seq Num gap", 1, __VA_ARGS__) \ + GEN(local_ack_timeout_err, hwerrors, "Ack timer expired", 1, __VA_ARGS__) \ + GEN(out_of_buffer, hwerrors, "Drop missing buffer", 1, __VA_ARGS__) \ + GEN(out_of_sequence, hwerrors, "Drop out of sequence", 1, __VA_ARGS__) \ + GEN(packet_seq_err, hwerrors, "NAK sequence rcvd", 1, __VA_ARGS__) \ + GEN(req_cqe_error, hwerrors, "CQE err Req", 1, __VA_ARGS__) \ + GEN(resp_cqe_error, hwerrors, "CQE err Resp", 1, __VA_ARGS__) \ + GEN(req_cqe_flush_error, hwerrors, "CQE Flushed err Req", 1, __VA_ARGS__) \ + GEN(resp_cqe_flush_error, hwerrors, "CQE Flushed err Resp", 1, __VA_ARGS__) \ + GEN(req_remote_access_errors, hwerrors, "Remote access err Req", 1, __VA_ARGS__) \ + GEN(resp_remote_access_errors, hwerrors, "Remote access err Resp", 1, __VA_ARGS__) \ + GEN(req_remote_invalid_request, hwerrors, "Remote invalid req", 1, __VA_ARGS__) \ + GEN(resp_local_length_error, hwerrors, "Local length err Resp", 1, __VA_ARGS__) \ + GEN(rnr_nak_retry_err, hwerrors, "RNR NAK Packets", 1, __VA_ARGS__) \ + GEN(rp_cnp_ignored, hwerrors, "CNP Pkts ignored", 1, __VA_ARGS__) \ + GEN(rx_icrc_encapsulated, hwerrors, "RoCE ICRC Errors", 1, __VA_ARGS__) + +// Common definitions used more than once +#define GEN_RRD_DIM_ADD(NAME, GRP, DESC, DIR, PORT) \ + GEN_RRD_DIM_ADD_CUSTOM(NAME, GRP, DESC, DIR, PORT, 1, 1, RRD_ALGORITHM_INCREMENTAL) + +#define GEN_RRD_DIM_ADD_CUSTOM(NAME, GRP, DESC, DIR, PORT, MULT, DIV, ALGO) \ + PORT->rd_##NAME = rrddim_add(PORT->st_##GRP, DESC, NULL, DIR * MULT, DIV, ALGO); + +#define GEN_RRD_DIM_ADD_HW(NAME, GRP, DESC, DIR, PORT, HW) \ + HW->rd_##NAME = rrddim_add(PORT->st_##GRP, DESC, NULL, DIR, 1, RRD_ALGORITHM_INCREMENTAL); + +#define GEN_RRD_DIM_SETP(NAME, GRP, DESC, DIR, PORT) \ + rrddim_set_by_pointer(PORT->st_##GRP, PORT->rd_##NAME, (collected_number)PORT->NAME); + +#define GEN_RRD_DIM_SETP_HW(NAME, GRP, DESC, DIR, PORT, HW) \ + rrddim_set_by_pointer(PORT->st_##GRP, HW->rd_##NAME, (collected_number)HW->NAME); + +// https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters +// https://community.mellanox.com/s/article/infiniband-port-counters +static struct ibport { + char *name; + char *counters_path; + char *hwcounters_path; + int len; + + // flags + int configured; + int enabled; + int discovered; + + int do_bytes; + int do_packets; + int do_errors; + int do_hwpackets; + int do_hwerrors; + + const char *chart_type_bytes; + const char *chart_type_packets; + const char *chart_type_errors; + const char *chart_type_hwpackets; + const char *chart_type_hwerrors; + + const char *chart_id_bytes; + const char *chart_id_packets; + const char *chart_id_errors; + const char *chart_id_hwpackets; + const char *chart_id_hwerrors; + + const char *chart_family; + + unsigned long priority; + + // Port details using drivers/infiniband/core/sysfs.c :: rate_show() + RRDDIM *rd_speed; + uint64_t speed; + uint64_t width; + +// Stats from /$device/ports/$portid/counters +// as drivers/infiniband/hw/qib/qib_verbs.h +// All uint64 except vl15_dropped, local_link_integrity_errors, excessive_buffer_overrun_errors uint32 +// Will generate 2 elements for each counter: +// - uint64_t to store the value +// - char* to store the filename path +// - RRDDIM* to store the RRD Dimension +#define GEN_DEF_COUNTER(NAME, ...) \ + uint64_t NAME; \ + char *file_##NAME; \ + RRDDIM *rd_##NAME; + FOREACH_COUNTER(GEN_DEF_COUNTER) + +// Vendor specific hwcounters from /$device/ports/$portid/hw_counters +// We will generate one struct pointer per vendor to avoid future casting +#define GEN_DEF_HWCOUNTER_PTR(VENDOR, ...) struct ibporthw_##VENDOR *hwcounters_##VENDOR; + FOREACH_HWCOUNTER_NAME(GEN_DEF_HWCOUNTER_PTR) + + // Function pointer to the "infiniband_hwcounters_parse_" function + void (*hwcounters_parse)(struct ibport *); + void (*hwcounters_dorrd)(struct ibport *); + + // charts and dim + RRDSET *st_bytes; + RRDSET *st_packets; + RRDSET *st_errors; + RRDSET *st_hwpackets; + RRDSET *st_hwerrors; + + const RRDSETVAR_ACQUIRED *stv_speed; + + usec_t speed_last_collected_usec; + + struct ibport *next; +} *ibport_root = NULL, *ibport_last_used = NULL; + +// VENDORS: reading / calculation functions +#define GEN_DEF_HWCOUNTER(NAME, ...) \ + uint64_t NAME; \ + char *file_##NAME; \ + RRDDIM *rd_##NAME; + +#define GEN_DO_HWCOUNTER_READ(NAME, GRP, DESC, DIR, PORT, HW, ...) \ + if (HW->file_##NAME) { \ + if (read_single_number_file(HW->file_##NAME, (unsigned long long *)&HW->NAME)) { \ + error("cannot read iface '%s' hwcounter '" #HW "'", PORT->name); \ + HW->file_##NAME = NULL; \ + } \ + } + +// VENDOR-MLX: Mellanox +struct ibporthw_mlx { + FOREACH_HWCOUNTER_MLX(GEN_DEF_HWCOUNTER) +}; +void infiniband_hwcounters_parse_mlx(struct ibport *port) +{ + if (port->do_hwerrors != CONFIG_BOOLEAN_NO) + FOREACH_HWCOUNTER_MLX_ERRORS(GEN_DO_HWCOUNTER_READ, port, port->hwcounters_mlx) + if (port->do_hwpackets != CONFIG_BOOLEAN_NO) + FOREACH_HWCOUNTER_MLX_PACKETS(GEN_DO_HWCOUNTER_READ, port, port->hwcounters_mlx) +} +void infiniband_hwcounters_dorrd_mlx(struct ibport *port) +{ + if (port->do_hwerrors != CONFIG_BOOLEAN_NO) { + FOREACH_HWCOUNTER_MLX_ERRORS(GEN_RRD_DIM_SETP_HW, port, port->hwcounters_mlx) + rrdset_done(port->st_hwerrors); + } + if (port->do_hwpackets != CONFIG_BOOLEAN_NO) { + FOREACH_HWCOUNTER_MLX_PACKETS(GEN_RRD_DIM_SETP_HW, port, port->hwcounters_mlx) + rrdset_done(port->st_hwpackets); + } +} + +// ---------------------------------------------------------------------------- + +static struct ibport *get_ibport(const char *dev, const char *port) +{ + struct ibport *p; + + char name[IBNAME_MAX + 1]; + snprintfz(name, IBNAME_MAX, "%s-%s", dev, port); + + // search it, resuming from the last position in sequence + for (p = ibport_last_used; p; p = p->next) { + if (unlikely(!strcmp(name, p->name))) { + ibport_last_used = p->next; + return p; + } + } + + // new round, from the beginning to the last position used this time + for (p = ibport_root; p != ibport_last_used; p = p->next) { + if (unlikely(!strcmp(name, p->name))) { + ibport_last_used = p->next; + return p; + } + } + + // create a new one + p = callocz(1, sizeof(struct ibport)); + p->name = strdupz(name); + p->len = strlen(p->name); + + p->chart_type_bytes = strdupz("infiniband_cnt_bytes"); + p->chart_type_packets = strdupz("infiniband_cnt_packets"); + p->chart_type_errors = strdupz("infiniband_cnt_errors"); + p->chart_type_hwpackets = strdupz("infiniband_hwc_packets"); + p->chart_type_hwerrors = strdupz("infiniband_hwc_errors"); + + char buffer[RRD_ID_LENGTH_MAX + 1]; + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_cntbytes_%s", p->name); + p->chart_id_bytes = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_cntpackets_%s", p->name); + p->chart_id_packets = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_cnterrors_%s", p->name); + p->chart_id_errors = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_hwcntpackets_%s", p->name); + p->chart_id_hwpackets = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_hwcnterrors_%s", p->name); + p->chart_id_hwerrors = strdupz(buffer); + + p->chart_family = strdupz(p->name); + p->priority = NETDATA_CHART_PRIO_INFINIBAND; + + // Link current ibport to last one in the list + if (ibport_root) { + struct ibport *t; + for (t = ibport_root; t->next; t = t->next) + ; + t->next = p; + } else + ibport_root = p; + + return p; +} + +int do_sys_class_infiniband(int update_every, usec_t dt) +{ + (void)dt; + static SIMPLE_PATTERN *disabled_list = NULL; + static int initialized = 0; + static int enable_new_ports = -1, enable_only_active = CONFIG_BOOLEAN_YES; + static int do_bytes = -1, do_packets = -1, do_errors = -1, do_hwpackets = -1, do_hwerrors = -1; + static char *sys_class_infiniband_dirname = NULL; + + static long long int dt_to_refresh_ports = 0, last_refresh_ports_usec = 0; + + if (unlikely(enable_new_ports == -1)) { + char dirname[FILENAME_MAX + 1]; + + snprintfz(dirname, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/infiniband"); + sys_class_infiniband_dirname = + config_get(CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "dirname to monitor", dirname); + + do_bytes = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "bandwidth counters", CONFIG_BOOLEAN_YES); + do_packets = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "packets counters", CONFIG_BOOLEAN_YES); + do_errors = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "errors counters", CONFIG_BOOLEAN_YES); + do_hwpackets = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "hardware packets counters", CONFIG_BOOLEAN_AUTO); + do_hwerrors = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "hardware errors counters", CONFIG_BOOLEAN_AUTO); + + enable_only_active = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "monitor only active ports", CONFIG_BOOLEAN_AUTO); + disabled_list = simple_pattern_create( + config_get(CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "disable by default interfaces matching", ""), NULL, + SIMPLE_PATTERN_EXACT); + + dt_to_refresh_ports = + config_get_number(CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "refresh ports state every seconds", 30) * + USEC_PER_SEC; + if (dt_to_refresh_ports < 0) + dt_to_refresh_ports = 0; + } + + // init listing of /sys/class/infiniband/ (or rediscovery) + if (unlikely(!initialized) || unlikely(last_refresh_ports_usec >= dt_to_refresh_ports)) { + // If folder does not exists, return 1 to disable + DIR *devices_dir = opendir(sys_class_infiniband_dirname); + if (unlikely(!devices_dir)) + return 1; + + // Work on all device available + struct dirent *dev_dent; + while ((dev_dent = readdir(devices_dir))) { + // Skip special folders + if (!strcmp(dev_dent->d_name, "..") || !strcmp(dev_dent->d_name, ".")) + continue; + + // /sys/class/infiniband//ports + char ports_dirname[FILENAME_MAX + 1]; + snprintfz(ports_dirname, FILENAME_MAX, "%s/%s/%s", sys_class_infiniband_dirname, dev_dent->d_name, "ports"); + + DIR *ports_dir = opendir(ports_dirname); + if (unlikely(!ports_dir)) + continue; + + struct dirent *port_dent; + while ((port_dent = readdir(ports_dir))) { + // Skip special folders + if (!strcmp(port_dent->d_name, "..") || !strcmp(port_dent->d_name, ".")) + continue; + + char buffer[FILENAME_MAX + 1]; + + // Check if counters are available (mandatory) + // /sys/class/infiniband//ports//counters + char counters_dirname[FILENAME_MAX + 1]; + snprintfz(counters_dirname, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "counters"); + DIR *counters_dir = opendir(counters_dirname); + // Standard counters are mandatory + if (!counters_dir) + continue; + closedir(counters_dir); + + // Hardware Counters are optional, used later + char hwcounters_dirname[FILENAME_MAX + 1]; + snprintfz( + hwcounters_dirname, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "hw_counters"); + + // Get new or existing ibport + struct ibport *p = get_ibport(dev_dent->d_name, port_dent->d_name); + if (!p) + continue; + + // Prepare configuration + if (!p->configured) { + p->configured = 1; + + // Enable by default, will be filtered out later + p->enabled = 1; + + p->counters_path = strdupz(counters_dirname); + p->hwcounters_path = strdupz(hwcounters_dirname); + + snprintfz(buffer, FILENAME_MAX, "plugin:proc:/sys/class/infiniband:%s", p->name); + + // Standard counters + p->do_bytes = config_get_boolean_ondemand(buffer, "bytes", do_bytes); + p->do_packets = config_get_boolean_ondemand(buffer, "packets", do_packets); + p->do_errors = config_get_boolean_ondemand(buffer, "errors", do_errors); + +// Gen filename allocation and concatenation +#define GEN_DO_COUNTER_NAME(NAME, GRP, DESC, DIR, PORT, ...) \ + PORT->file_##NAME = callocz(1, strlen(PORT->counters_path) + sizeof(#NAME) + 3); \ + strcat(PORT->file_##NAME, PORT->counters_path); \ + strcat(PORT->file_##NAME, "/" #NAME); + FOREACH_COUNTER(GEN_DO_COUNTER_NAME, p) + + // Check HW Counters vendor dependent + DIR *hwcounters_dir = opendir(hwcounters_dirname); + if (hwcounters_dir) { + // By default set standard + p->do_hwpackets = config_get_boolean_ondemand(buffer, "hwpackets", do_hwpackets); + p->do_hwerrors = config_get_boolean_ondemand(buffer, "hwerrors", do_hwerrors); + +// VENDORS: Set your own + +// Allocate the chars to the filenames +#define GEN_DO_HWCOUNTER_NAME(NAME, GRP, DESC, DIR, PORT, HW, ...) \ + HW->file_##NAME = callocz(1, strlen(PORT->hwcounters_path) + sizeof(#NAME) + 3); \ + strcat(HW->file_##NAME, PORT->hwcounters_path); \ + strcat(HW->file_##NAME, "/" #NAME); + + // VENDOR-MLX: Mellanox + if (strncmp(dev_dent->d_name, "mlx", 3) == 0) { + // Allocate the vendor specific struct + p->hwcounters_mlx = callocz(1, sizeof(struct ibporthw_mlx)); + + FOREACH_HWCOUNTER_MLX(GEN_DO_HWCOUNTER_NAME, p, p->hwcounters_mlx) + + // Set the function pointer for hwcounter parsing + p->hwcounters_parse = &infiniband_hwcounters_parse_mlx; + p->hwcounters_dorrd = &infiniband_hwcounters_dorrd_mlx; + } + + // VENDOR: Unknown + else { + p->do_hwpackets = CONFIG_BOOLEAN_NO; + p->do_hwerrors = CONFIG_BOOLEAN_NO; + } + closedir(hwcounters_dir); + } + } + + // Check port state to keep activation + if (enable_only_active) { + snprintfz(buffer, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "state"); + unsigned long long active; + // File is "1: DOWN" or "4: ACTIVE", but str2ull will stop on first non-decimal char + read_single_number_file(buffer, &active); + + // Want "IB_PORT_ACTIVE" == "4", as defined by drivers/infiniband/core/sysfs.c::state_show() + if (active == 4) + p->enabled = 1; + else + p->enabled = 0; + } + + if (p->enabled) + p->enabled = !simple_pattern_matches(disabled_list, p->name); + + // Check / Update the link speed & width frm "rate" file + // Sample output: "100 Gb/sec (4X EDR)" + snprintfz(buffer, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "rate"); + char buffer_rate[65]; + if (read_file(buffer, buffer_rate, 64)) { + error("Unable to read '%s'", buffer); + p->width = 1; + } else { + char *buffer_width = strstr(buffer_rate, "("); + buffer_width++; + // str2ull will stop on first non-decimal value + p->speed = str2ull(buffer_rate); + p->width = str2ull(buffer_width); + } + + if (!p->discovered) + info( + "Infiniband card %s port %s at speed %" PRIu64 " width %" PRIu64 "", + dev_dent->d_name, + port_dent->d_name, + p->speed, + p->width); + + p->discovered = 1; + } + closedir(ports_dir); + } + closedir(devices_dir); + + initialized = 1; + last_refresh_ports_usec = 0; + } + last_refresh_ports_usec += dt; + + // Update all ports values + struct ibport *port; + for (port = ibport_root; port; port = port->next) { + if (!port->enabled) + continue; + // + // Read values from system to struct + // + +// counter from file and place it in ibport struct +#define GEN_DO_COUNTER_READ(NAME, GRP, DESC, DIR, PORT, ...) \ + if (PORT->file_##NAME) { \ + if (read_single_number_file(PORT->file_##NAME, (unsigned long long *)&PORT->NAME)) { \ + error("cannot read iface '%s' counter '" #NAME "'", PORT->name); \ + PORT->file_##NAME = NULL; \ + } \ + } + + // Update charts + if (port->do_bytes != CONFIG_BOOLEAN_NO) { + // Read values from sysfs + FOREACH_COUNTER_BYTES(GEN_DO_COUNTER_READ, port) + + // First creation of RRD Set (charts) + if (unlikely(!port->st_bytes)) { + port->st_bytes = rrdset_create_localhost( + "Infiniband", + port->chart_id_bytes, + NULL, + port->chart_family, + "ib.bytes", + "Bandwidth usage", + "kilobits/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 1, + update_every, + RRDSET_TYPE_AREA); + // Create Dimensions + rrdset_flag_set(port->st_bytes, RRDSET_FLAG_DETAIL); + // On this chart, we want to have a KB/s so the dashboard will autoscale it + // The reported values are also per-lane, so we must multiply it by the width + // x4 lanes multiplier as per Documentation/ABI/stable/sysfs-class-infiniband + FOREACH_COUNTER_BYTES(GEN_RRD_DIM_ADD_CUSTOM, port, 4 * 8 * port->width, 1024, RRD_ALGORITHM_INCREMENTAL) + + port->stv_speed = rrdsetvar_custom_chart_variable_add_and_acquire(port->st_bytes, "link_speed"); + } + + // Link read values to dimensions + FOREACH_COUNTER_BYTES(GEN_RRD_DIM_SETP, port) + + // For link speed set only variable + rrdsetvar_custom_chart_variable_set(port->st_bytes, port->stv_speed, port->speed); + + rrdset_done(port->st_bytes); + } + + if (port->do_packets != CONFIG_BOOLEAN_NO) { + // Read values from sysfs + FOREACH_COUNTER_PACKETS(GEN_DO_COUNTER_READ, port) + + // First creation of RRD Set (charts) + if (unlikely(!port->st_packets)) { + port->st_packets = rrdset_create_localhost( + "Infiniband", + port->chart_id_packets, + NULL, + port->chart_family, + "ib.packets", + "Packets Statistics", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 2, + update_every, + RRDSET_TYPE_AREA); + // Create Dimensions + rrdset_flag_set(port->st_packets, RRDSET_FLAG_DETAIL); + FOREACH_COUNTER_PACKETS(GEN_RRD_DIM_ADD, port) + } + + // Link read values to dimensions + FOREACH_COUNTER_PACKETS(GEN_RRD_DIM_SETP, port) + rrdset_done(port->st_packets); + } + + if (port->do_errors != CONFIG_BOOLEAN_NO) { + // Read values from sysfs + FOREACH_COUNTER_ERRORS(GEN_DO_COUNTER_READ, port) + + // First creation of RRD Set (charts) + if (unlikely(!port->st_errors)) { + port->st_errors = rrdset_create_localhost( + "Infiniband", + port->chart_id_errors, + NULL, + port->chart_family, + "ib.errors", + "Error Counters", + "errors/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 3, + update_every, + RRDSET_TYPE_LINE); + // Create Dimensions + rrdset_flag_set(port->st_errors, RRDSET_FLAG_DETAIL); + FOREACH_COUNTER_ERRORS(GEN_RRD_DIM_ADD, port) + } + + // Link read values to dimensions + FOREACH_COUNTER_ERRORS(GEN_RRD_DIM_SETP, port) + rrdset_done(port->st_errors); + } + + // + // HW Counters + // + + // Call the function for parsing and setting hwcounters + if (port->hwcounters_parse && port->hwcounters_dorrd) { + // Read all values (done by vendor-specific function) + (*port->hwcounters_parse)(port); + + if (port->do_hwerrors != CONFIG_BOOLEAN_NO) { + // First creation of RRD Set (charts) + if (unlikely(!port->st_hwerrors)) { + port->st_hwerrors = rrdset_create_localhost( + "Infiniband", + port->chart_id_hwerrors, + NULL, + port->chart_family, + "ib.hwerrors", + "Hardware Errors", + "errors/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 4, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(port->st_hwerrors, RRDSET_FLAG_DETAIL); + + // VENDORS: Set your selection + + // VENDOR: Mellanox + if (strncmp(port->name, "mlx", 3) == 0) { + FOREACH_HWCOUNTER_MLX_ERRORS(GEN_RRD_DIM_ADD_HW, port, port->hwcounters_mlx) + } + + // Unknown vendor, should not happen + else { + error( + "Unmanaged vendor for '%s', do_hwerrors should have been set to no. Please report this bug", + port->name); + port->do_hwerrors = CONFIG_BOOLEAN_NO; + } + } + } + + if (port->do_hwpackets != CONFIG_BOOLEAN_NO) { + // First creation of RRD Set (charts) + if (unlikely(!port->st_hwpackets)) { + port->st_hwpackets = rrdset_create_localhost( + "Infiniband", + port->chart_id_hwpackets, + NULL, + port->chart_family, + "ib.hwpackets", + "Hardware Packets Statistics", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 5, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(port->st_hwpackets, RRDSET_FLAG_DETAIL); + + // VENDORS: Set your selection + + // VENDOR: Mellanox + if (strncmp(port->name, "mlx", 3) == 0) { + FOREACH_HWCOUNTER_MLX_PACKETS(GEN_RRD_DIM_ADD_HW, port, port->hwcounters_mlx) + } + + // Unknown vendor, should not happen + else { + error( + "Unmanaged vendor for '%s', do_hwpackets should have been set to no. Please report this bug", + port->name); + port->do_hwpackets = CONFIG_BOOLEAN_NO; + } + } + } + + // Update values to rrd (done by vendor-specific function) + (*port->hwcounters_dorrd)(port); + } + } + + return 0; +} diff --git a/collectors/proc.plugin/sys_class_power_supply.c b/collectors/proc.plugin/sys_class_power_supply.c new file mode 100644 index 0000000..dde4215 --- /dev/null +++ b/collectors/proc.plugin/sys_class_power_supply.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_POWER_SUPPLY_NAME "/sys/class/power_supply" + +const char *ps_property_names[] = { "charge", "energy", "voltage"}; +const char *ps_property_titles[] = {"Battery charge", "Battery energy", "Power supply voltage"}; +const char *ps_property_units[] = { "Ah", "Wh", "V"}; + +const char *ps_property_dim_names[] = {"empty_design", "empty", "now", "full", "full_design", + "empty_design", "empty", "now", "full", "full_design", + "min_design", "min", "now", "max", "max_design"}; + +struct ps_property_dim { + char *name; + char *filename; + int fd; + + RRDDIM *rd; + unsigned long long value; + int always_zero; + + struct ps_property_dim *next; +}; + +struct ps_property { + char *name; + char *title; + char *units; + + RRDSET *st; + + struct ps_property_dim *property_dim_root; + + struct ps_property *next; +}; + +struct capacity { + char *filename; + int fd; + + RRDSET *st; + RRDDIM *rd; + unsigned long long value; +}; + +struct power_supply { + char *name; + uint32_t hash; + int found; + + struct capacity *capacity; + + struct ps_property *property_root; + + struct power_supply *next; +}; + +static struct power_supply *power_supply_root = NULL; +static int files_num = 0; + +void power_supply_free(struct power_supply *ps) { + if(likely(ps)) { + + // free capacity structure + if(likely(ps->capacity)) { + if(likely(ps->capacity->st)) rrdset_is_obsolete(ps->capacity->st); + freez(ps->capacity->filename); + if(likely(ps->capacity->fd != -1)) close(ps->capacity->fd); + files_num--; + freez(ps->capacity); + } + freez(ps->name); + + struct ps_property *pr = ps->property_root; + while(likely(pr)) { + + // free dimensions + struct ps_property_dim *pd = pr->property_dim_root; + while(likely(pd)) { + freez(pd->name); + freez(pd->filename); + if(likely(pd->fd != -1)) close(pd->fd); + files_num--; + struct ps_property_dim *d = pd; + pd = pd->next; + freez(d); + } + + // free properties + if(likely(pr->st)) rrdset_is_obsolete(pr->st); + freez(pr->name); + freez(pr->title); + freez(pr->units); + struct ps_property *p = pr; + pr = pr->next; + freez(p); + } + + // remove power supply from linked list + if(likely(ps == power_supply_root)) { + power_supply_root = ps->next; + } + else { + struct power_supply *last; + for(last = power_supply_root; last && last->next != ps; last = last->next); + if(likely(last)) last->next = ps->next; + } + + freez(ps); + } +} + +static void add_labels_to_power_supply(struct power_supply *ps, RRDSET *st) { + rrdlabels_add(st->rrdlabels, "device", ps->name, RRDLABEL_SRC_AUTO); +} + +int do_sys_class_power_supply(int update_every, usec_t dt) { + (void)dt; + static int do_capacity = -1, do_property[3] = {-1}; + static int keep_fds_open = CONFIG_BOOLEAN_NO, keep_fds_open_config = -1; + static char *dirname = NULL; + + if(unlikely(do_capacity == -1)) { + do_capacity = config_get_boolean("plugin:proc:/sys/class/power_supply", "battery capacity", CONFIG_BOOLEAN_YES); + do_property[0] = config_get_boolean("plugin:proc:/sys/class/power_supply", "battery charge", CONFIG_BOOLEAN_NO); + do_property[1] = config_get_boolean("plugin:proc:/sys/class/power_supply", "battery energy", CONFIG_BOOLEAN_NO); + do_property[2] = config_get_boolean("plugin:proc:/sys/class/power_supply", "power supply voltage", CONFIG_BOOLEAN_NO); + + keep_fds_open_config = config_get_boolean_ondemand("plugin:proc:/sys/class/power_supply", "keep files open", CONFIG_BOOLEAN_AUTO); + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/power_supply"); + dirname = config_get("plugin:proc:/sys/class/power_supply", "directory to monitor", filename); + } + + DIR *dir = opendir(dirname); + if(unlikely(!dir)) { + error("Cannot read directory '%s'", dirname); + return 1; + } + + struct dirent *de = NULL; + while(likely(de = readdir(dir))) { + if(likely(de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + ))) + continue; + + if(likely(de->d_type == DT_LNK || de->d_type == DT_DIR)) { + uint32_t hash = simple_hash(de->d_name); + + struct power_supply *ps; + for(ps = power_supply_root; ps; ps = ps->next) { + if(unlikely(ps->hash == hash && !strcmp(ps->name, de->d_name))) { + ps->found = 1; + break; + } + } + + // allocate memory for power supply and initialize it + if(unlikely(!ps)) { + ps = callocz(sizeof(struct power_supply), 1); + ps->name = strdupz(de->d_name); + ps->hash = simple_hash(de->d_name); + ps->found = 1; + ps->next = power_supply_root; + power_supply_root = ps; + + struct stat stbuf; + if(likely(do_capacity != CONFIG_BOOLEAN_NO)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s/%s", dirname, de->d_name, "capacity"); + if (stat(filename, &stbuf) == 0) { + ps->capacity = callocz(sizeof(struct capacity), 1); + ps->capacity->filename = strdupz(filename); + ps->capacity->fd = -1; + files_num++; + } + } + + // allocate memory and initialize structures for every property and file found + size_t pr_idx, pd_idx; + size_t prev_idx = 3; // there is no property with this index + + for(pr_idx = 0; pr_idx < 3; pr_idx++) { + if(unlikely(do_property[pr_idx] != CONFIG_BOOLEAN_NO)) { + struct ps_property *pr = NULL; + int min_value_found = 0, max_value_found = 0; + + for(pd_idx = pr_idx * 5; pd_idx < pr_idx * 5 + 5; pd_idx++) { + + // check if file exists + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s/%s_%s", dirname, de->d_name, + ps_property_names[pr_idx], ps_property_dim_names[pd_idx]); + if (stat(filename, &stbuf) == 0) { + + if(unlikely(pd_idx == pr_idx * 5 + 1)) + min_value_found = 1; + if(unlikely(pd_idx == pr_idx * 5 + 3)) + max_value_found = 1; + + // add chart + if(unlikely(prev_idx != pr_idx)) { + pr = callocz(sizeof(struct ps_property), 1); + pr->name = strdupz(ps_property_names[pr_idx]); + pr->title = strdupz(ps_property_titles[pr_idx]); + pr->units = strdupz(ps_property_units[pr_idx]); + prev_idx = pr_idx; + pr->next = ps->property_root; + ps->property_root = pr; + } + + // add dimension + struct ps_property_dim *pd; + pd= callocz(sizeof(struct ps_property_dim), 1); + pd->name = strdupz(ps_property_dim_names[pd_idx]); + pd->filename = strdupz(filename); + pd->fd = -1; + files_num++; + pd->next = pr->property_dim_root; + pr->property_dim_root = pd; + } + } + + // create a zero empty/min dimension + if(unlikely(max_value_found && !min_value_found)) { + struct ps_property_dim *pd; + pd= callocz(sizeof(struct ps_property_dim), 1); + pd->name = strdupz(ps_property_dim_names[pr_idx * 5 + 1]); + pd->always_zero = 1; + pd->next = pr->property_dim_root; + pr->property_dim_root = pd; + } + } + } + } + + // read capacity file + if(likely(ps->capacity)) { + char buffer[30 + 1]; + + if(unlikely(ps->capacity->fd == -1)) { + ps->capacity->fd = open(ps->capacity->filename, O_RDONLY, 0666); + if(unlikely(ps->capacity->fd == -1)) { + error("Cannot open file '%s'", ps->capacity->filename); + power_supply_free(ps); + ps = NULL; + } + } + + if (ps) + { + ssize_t r = read(ps->capacity->fd, buffer, 30); + if(unlikely(r < 1)) { + error("Cannot read file '%s'", ps->capacity->filename); + power_supply_free(ps); + ps = NULL; + } + else { + buffer[r] = '\0'; + ps->capacity->value = str2ull(buffer); + + if(unlikely(!keep_fds_open)) { + close(ps->capacity->fd); + ps->capacity->fd = -1; + } + else if(unlikely(lseek(ps->capacity->fd, 0, SEEK_SET) == -1)) { + error("Cannot seek in file '%s'", ps->capacity->filename); + close(ps->capacity->fd); + ps->capacity->fd = -1; + } + } + } + } + + // read property files + int read_error = 0; + struct ps_property *pr; + if (ps) + { + for(pr = ps->property_root; pr && !read_error; pr = pr->next) { + struct ps_property_dim *pd; + for(pd = pr->property_dim_root; pd; pd = pd->next) { + if(likely(!pd->always_zero)) { + char buffer[30 + 1]; + + if(unlikely(pd->fd == -1)) { + pd->fd = open(pd->filename, O_RDONLY, 0666); + if(unlikely(pd->fd == -1)) { + error("Cannot open file '%s'", pd->filename); + read_error = 1; + power_supply_free(ps); + break; + } + } + + ssize_t r = read(pd->fd, buffer, 30); + if(unlikely(r < 1)) { + error("Cannot read file '%s'", pd->filename); + read_error = 1; + power_supply_free(ps); + break; + } + buffer[r] = '\0'; + pd->value = str2ull(buffer); + + if(unlikely(!keep_fds_open)) { + close(pd->fd); + pd->fd = -1; + } + else if(unlikely(lseek(pd->fd, 0, SEEK_SET) == -1)) { + error("Cannot seek in file '%s'", pd->filename); + close(pd->fd); + pd->fd = -1; + } + } + } + } + } + } + } + + closedir(dir); + + keep_fds_open = keep_fds_open_config; + if(likely(keep_fds_open_config == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(files_num > 32)) + keep_fds_open = CONFIG_BOOLEAN_NO; + else + keep_fds_open = CONFIG_BOOLEAN_YES; + } + + // -------------------------------------------------------------------- + + struct power_supply *ps = power_supply_root; + while(unlikely(ps)) { + if(unlikely(!ps->found)) { + struct power_supply *f = ps; + ps = ps->next; + power_supply_free(f); + continue; + } + + if(likely(ps->capacity)) { + if(unlikely(!ps->capacity->st)) { + ps->capacity->st = rrdset_create_localhost( + "powersupply_capacity" + , ps->name + , NULL + , ps->name + , "powersupply.capacity" + , "Battery capacity" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_POWER_SUPPLY_NAME + , NETDATA_CHART_PRIO_POWER_SUPPLY_CAPACITY + , update_every + , RRDSET_TYPE_LINE + ); + + add_labels_to_power_supply(ps, ps->capacity->st); + } + + if(unlikely(!ps->capacity->rd)) ps->capacity->rd = rrddim_add(ps->capacity->st, "capacity", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_set_by_pointer(ps->capacity->st, ps->capacity->rd, ps->capacity->value); + + rrdset_done(ps->capacity->st); + } + + struct ps_property *pr; + for(pr = ps->property_root; pr; pr = pr->next) { + if(unlikely(!pr->st)) { + char id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "powersupply_%s", pr->name); + snprintfz(context, RRD_ID_LENGTH_MAX, "powersupply.%s", pr->name); + + pr->st = rrdset_create_localhost( + id + , ps->name + , NULL + , ps->name + , context + , pr->title + , pr->units + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_POWER_SUPPLY_NAME + , NETDATA_CHART_PRIO_POWER_SUPPLY_CAPACITY + , update_every + , RRDSET_TYPE_LINE + ); + + add_labels_to_power_supply(ps, pr->st); + } + + struct ps_property_dim *pd; + for(pd = pr->property_dim_root; pd; pd = pd->next) { + if(unlikely(!pd->rd)) pd->rd = rrddim_add(pr->st, pd->name, NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); + rrddim_set_by_pointer(pr->st, pd->rd, pd->value); + } + + rrdset_done(pr->st); + } + + ps->found = 0; + ps = ps->next; + } + + return 0; +} diff --git a/collectors/proc.plugin/sys_devices_system_edac_mc.c b/collectors/proc.plugin/sys_devices_system_edac_mc.c new file mode 100644 index 0000000..13d2097 --- /dev/null +++ b/collectors/proc.plugin/sys_devices_system_edac_mc.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +struct mc { + char *name; + char ce_updated; + char ue_updated; + + char *ce_count_filename; + char *ue_count_filename; + + procfile *ce_ff; + procfile *ue_ff; + + collected_number ce_count; + collected_number ue_count; + + RRDDIM *ce_rd; + RRDDIM *ue_rd; + + struct mc *next; +}; +static struct mc *mc_root = NULL; + +static void find_all_mc() { + char name[FILENAME_MAX + 1]; + snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/edac/mc"); + char *dirname = config_get("plugin:proc:/sys/devices/system/edac/mc", "directory to monitor", name); + + DIR *dir = opendir(dirname); + if(unlikely(!dir)) { + error("Cannot read ECC memory errors directory '%s'", dirname); + return; + } + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR && de->d_name[0] == 'm' && de->d_name[1] == 'c' && isdigit(de->d_name[2])) { + struct mc *m = callocz(1, sizeof(struct mc)); + m->name = strdupz(de->d_name); + + struct stat st; + + snprintfz(name, FILENAME_MAX, "%s/%s/ce_count", dirname, de->d_name); + if(stat(name, &st) != -1) + m->ce_count_filename = strdupz(name); + + snprintfz(name, FILENAME_MAX, "%s/%s/ue_count", dirname, de->d_name); + if(stat(name, &st) != -1) + m->ue_count_filename = strdupz(name); + + if(!m->ce_count_filename && !m->ue_count_filename) { + freez(m->name); + freez(m); + } + else { + m->next = mc_root; + mc_root = m; + } + } + } + + closedir(dir); +} + +int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) { + (void)dt; + + if(unlikely(mc_root == NULL)) { + find_all_mc(); + if(unlikely(mc_root == NULL)) + return 1; + } + + static int do_ce = -1, do_ue = -1; + NETDATA_DOUBLE ce_sum = 0, ue_sum = 0; + struct mc *m; + + if(unlikely(do_ce == -1)) { + do_ce = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory correctable errors", CONFIG_BOOLEAN_YES); + do_ue = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory uncorrectable errors", CONFIG_BOOLEAN_YES); + } + + if(do_ce != CONFIG_BOOLEAN_NO) { + for(m = mc_root; m; m = m->next) { + if(m->ce_count_filename) { + m->ce_updated = 0; + + if(unlikely(!m->ce_ff)) { + m->ce_ff = procfile_open(m->ce_count_filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!m->ce_ff)) + continue; + } + + m->ce_ff = procfile_readall(m->ce_ff); + if(unlikely(!m->ce_ff || procfile_lines(m->ce_ff) < 1 || procfile_linewords(m->ce_ff, 0) < 1)) + continue; + + m->ce_count = str2ull(procfile_lineword(m->ce_ff, 0, 0)); + ce_sum += m->ce_count; + m->ce_updated = 1; + } + } + } + + if(do_ue != CONFIG_BOOLEAN_NO) { + for(m = mc_root; m; m = m->next) { + if(m->ue_count_filename) { + m->ue_updated = 0; + + if(unlikely(!m->ue_ff)) { + m->ue_ff = procfile_open(m->ue_count_filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!m->ue_ff)) + continue; + } + + m->ue_ff = procfile_readall(m->ue_ff); + if(unlikely(!m->ue_ff || procfile_lines(m->ue_ff) < 1 || procfile_linewords(m->ue_ff, 0) < 1)) + continue; + + m->ue_count = str2ull(procfile_lineword(m->ue_ff, 0, 0)); + ue_sum += m->ue_count; + m->ue_updated = 1; + } + } + } + + // -------------------------------------------------------------------- + + if(do_ce == CONFIG_BOOLEAN_YES || (do_ce == CONFIG_BOOLEAN_AUTO && + (ce_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ce = CONFIG_BOOLEAN_YES; + + static RRDSET *ce_st = NULL; + + if(unlikely(!ce_st)) { + ce_st = rrdset_create_localhost( + "mem" + , "ecc_ce" + , NULL + , "ecc" + , NULL + , "ECC Memory Correctable Errors" + , "errors" + , PLUGIN_PROC_NAME + , "/sys/devices/system/edac/mc" + , NETDATA_CHART_PRIO_MEM_HW_ECC_CE + , update_every + , RRDSET_TYPE_LINE + ); + } + + for(m = mc_root; m; m = m->next) { + if (m->ce_count_filename && m->ce_updated) { + if(unlikely(!m->ce_rd)) + m->ce_rd = rrddim_add(ce_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(ce_st, m->ce_rd, m->ce_count); + } + } + + rrdset_done(ce_st); + } + + // -------------------------------------------------------------------- + + if(do_ue == CONFIG_BOOLEAN_YES || (do_ue == CONFIG_BOOLEAN_AUTO && + (ue_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_ue = CONFIG_BOOLEAN_YES; + + static RRDSET *ue_st = NULL; + + if(unlikely(!ue_st)) { + ue_st = rrdset_create_localhost( + "mem" + , "ecc_ue" + , NULL + , "ecc" + , NULL + , "ECC Memory Uncorrectable Errors" + , "errors" + , PLUGIN_PROC_NAME + , "/sys/devices/system/edac/mc" + , NETDATA_CHART_PRIO_MEM_HW_ECC_UE + , update_every + , RRDSET_TYPE_LINE + ); + } + + for(m = mc_root; m; m = m->next) { + if (m->ue_count_filename && m->ue_updated) { + if(unlikely(!m->ue_rd)) + m->ue_rd = rrddim_add(ue_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(ue_st, m->ue_rd, m->ue_count); + } + } + + rrdset_done(ue_st); + } + + return 0; +} diff --git a/collectors/proc.plugin/sys_devices_system_node.c b/collectors/proc.plugin/sys_devices_system_node.c new file mode 100644 index 0000000..90aafd5 --- /dev/null +++ b/collectors/proc.plugin/sys_devices_system_node.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +struct node { + char *name; + char *numastat_filename; + procfile *numastat_ff; + RRDSET *numastat_st; + struct node *next; +}; +static struct node *numa_root = NULL; + +static int find_all_nodes() { + int numa_node_count = 0; + char name[FILENAME_MAX + 1]; + snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/node"); + char *dirname = config_get("plugin:proc:/sys/devices/system/node", "directory to monitor", name); + + DIR *dir = opendir(dirname); + if(!dir) { + error("Cannot read NUMA node directory '%s'", dirname); + return 0; + } + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type != DT_DIR) + continue; + + if(strncmp(de->d_name, "node", 4) != 0) + continue; + + if(!isdigit(de->d_name[4])) + continue; + + numa_node_count++; + + struct node *m = callocz(1, sizeof(struct node)); + m->name = strdupz(de->d_name); + + struct stat st; + + snprintfz(name, FILENAME_MAX, "%s/%s/numastat", dirname, de->d_name); + if(stat(name, &st) == -1) { + freez(m->name); + freez(m); + continue; + } + + m->numastat_filename = strdupz(name); + + m->next = numa_root; + numa_root = m; + } + + closedir(dir); + + return numa_node_count; +} + +int do_proc_sys_devices_system_node(int update_every, usec_t dt) { + (void)dt; + + static uint32_t hash_local_node = 0, hash_numa_foreign = 0, hash_interleave_hit = 0, hash_other_node = 0, hash_numa_hit = 0, hash_numa_miss = 0; + static int do_numastat = -1, numa_node_count = 0; + struct node *m; + + if(unlikely(numa_root == NULL)) { + numa_node_count = find_all_nodes(); + if(unlikely(numa_root == NULL)) + return 1; + } + + if(unlikely(do_numastat == -1)) { + do_numastat = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/node", "enable per-node numa metrics", CONFIG_BOOLEAN_AUTO); + + hash_local_node = simple_hash("local_node"); + hash_numa_foreign = simple_hash("numa_foreign"); + hash_interleave_hit = simple_hash("interleave_hit"); + hash_other_node = simple_hash("other_node"); + hash_numa_hit = simple_hash("numa_hit"); + hash_numa_miss = simple_hash("numa_miss"); + } + + if(do_numastat == CONFIG_BOOLEAN_YES || (do_numastat == CONFIG_BOOLEAN_AUTO && + (numa_node_count >= 2 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + for(m = numa_root; m; m = m->next) { + if(m->numastat_filename) { + + if(unlikely(!m->numastat_ff)) { + m->numastat_ff = procfile_open(m->numastat_filename, " ", PROCFILE_FLAG_DEFAULT); + + if(unlikely(!m->numastat_ff)) + continue; + } + + m->numastat_ff = procfile_readall(m->numastat_ff); + if(unlikely(!m->numastat_ff || procfile_lines(m->numastat_ff) < 1 || procfile_linewords(m->numastat_ff, 0) < 1)) + continue; + + if(unlikely(!m->numastat_st)) { + m->numastat_st = rrdset_create_localhost( + "mem" + , m->name + , NULL + , "numa" + , NULL + , "NUMA events" + , "events/s" + , PLUGIN_PROC_NAME + , "/sys/devices/system/node" + , NETDATA_CHART_PRIO_MEM_NUMA_NODES + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(m->numastat_st->rrdlabels, "numa_node", m->name, RRDLABEL_SRC_AUTO); + + rrdset_flag_set(m->numastat_st, RRDSET_FLAG_DETAIL); + + rrddim_add(m->numastat_st, "numa_hit", "hit", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(m->numastat_st, "numa_miss", "miss", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(m->numastat_st, "local_node", "local", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(m->numastat_st, "numa_foreign", "foreign", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(m->numastat_st, "interleave_hit", "interleave", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(m->numastat_st, "other_node", "other", 1, 1, RRD_ALGORITHM_INCREMENTAL); + + } + + size_t lines = procfile_lines(m->numastat_ff), l; + for(l = 0; l < lines; l++) { + size_t words = procfile_linewords(m->numastat_ff, l); + + if(unlikely(words < 2)) { + if(unlikely(words)) + error("Cannot read %s numastat line %zu. Expected 2 params, read %zu.", m->name, l, words); + continue; + } + + char *name = procfile_lineword(m->numastat_ff, l, 0); + char *value = procfile_lineword(m->numastat_ff, l, 1); + + if (unlikely(!name || !*name || !value || !*value)) + continue; + + uint32_t hash = simple_hash(name); + if(likely( + (hash == hash_numa_hit && !strcmp(name, "numa_hit")) + || (hash == hash_numa_miss && !strcmp(name, "numa_miss")) + || (hash == hash_local_node && !strcmp(name, "local_node")) + || (hash == hash_numa_foreign && !strcmp(name, "numa_foreign")) + || (hash == hash_interleave_hit && !strcmp(name, "interleave_hit")) + || (hash == hash_other_node && !strcmp(name, "other_node")) + )) + rrddim_set(m->numastat_st, name, (collected_number)str2kernel_uint_t(value)); + } + + rrdset_done(m->numastat_st); + } + } + } + + return 0; +} diff --git a/collectors/proc.plugin/sys_fs_btrfs.c b/collectors/proc.plugin/sys_fs_btrfs.c new file mode 100644 index 0000000..3b9841f --- /dev/null +++ b/collectors/proc.plugin/sys_fs_btrfs.c @@ -0,0 +1,743 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_BTRFS_NAME "/sys/fs/btrfs" + +typedef struct btrfs_disk { + char *name; + uint32_t hash; + int exists; + + char *size_filename; + char *hw_sector_size_filename; + unsigned long long size; + unsigned long long hw_sector_size; + + struct btrfs_disk *next; +} BTRFS_DISK; + +typedef struct btrfs_node { + int exists; + int logged_error; + + char *id; + uint32_t hash; + + char *label; + + // unsigned long long int sectorsize; + // unsigned long long int nodesize; + // unsigned long long int quota_override; + + #define declare_btrfs_allocation_section_field(SECTION, FIELD) \ + char *allocation_ ## SECTION ## _ ## FIELD ## _filename; \ + unsigned long long int allocation_ ## SECTION ## _ ## FIELD; + + #define declare_btrfs_allocation_field(FIELD) \ + char *allocation_ ## FIELD ## _filename; \ + unsigned long long int allocation_ ## FIELD; + + RRDSET *st_allocation_disks; + RRDDIM *rd_allocation_disks_unallocated; + RRDDIM *rd_allocation_disks_data_used; + RRDDIM *rd_allocation_disks_data_free; + RRDDIM *rd_allocation_disks_metadata_used; + RRDDIM *rd_allocation_disks_metadata_free; + RRDDIM *rd_allocation_disks_system_used; + RRDDIM *rd_allocation_disks_system_free; + unsigned long long all_disks_total; + + RRDSET *st_allocation_data; + RRDDIM *rd_allocation_data_free; + RRDDIM *rd_allocation_data_used; + declare_btrfs_allocation_section_field(data, total_bytes) + declare_btrfs_allocation_section_field(data, bytes_used) + declare_btrfs_allocation_section_field(data, disk_total) + declare_btrfs_allocation_section_field(data, disk_used) + + RRDSET *st_allocation_metadata; + RRDDIM *rd_allocation_metadata_free; + RRDDIM *rd_allocation_metadata_used; + RRDDIM *rd_allocation_metadata_reserved; + declare_btrfs_allocation_section_field(metadata, total_bytes) + declare_btrfs_allocation_section_field(metadata, bytes_used) + declare_btrfs_allocation_section_field(metadata, disk_total) + declare_btrfs_allocation_section_field(metadata, disk_used) + //declare_btrfs_allocation_field(global_rsv_reserved) + declare_btrfs_allocation_field(global_rsv_size) + + RRDSET *st_allocation_system; + RRDDIM *rd_allocation_system_free; + RRDDIM *rd_allocation_system_used; + declare_btrfs_allocation_section_field(system, total_bytes) + declare_btrfs_allocation_section_field(system, bytes_used) + declare_btrfs_allocation_section_field(system, disk_total) + declare_btrfs_allocation_section_field(system, disk_used) + + BTRFS_DISK *disks; + + struct btrfs_node *next; +} BTRFS_NODE; + +static BTRFS_NODE *nodes = NULL; + +static inline void btrfs_free_disk(BTRFS_DISK *d) { + freez(d->name); + freez(d->size_filename); + freez(d->hw_sector_size_filename); + freez(d); +} + +static inline void btrfs_free_node(BTRFS_NODE *node) { + // info("BTRFS: destroying '%s'", node->id); + + if(node->st_allocation_disks) + rrdset_is_obsolete(node->st_allocation_disks); + + if(node->st_allocation_data) + rrdset_is_obsolete(node->st_allocation_data); + + if(node->st_allocation_metadata) + rrdset_is_obsolete(node->st_allocation_metadata); + + if(node->st_allocation_system) + rrdset_is_obsolete(node->st_allocation_system); + + freez(node->allocation_data_bytes_used_filename); + freez(node->allocation_data_total_bytes_filename); + + freez(node->allocation_metadata_bytes_used_filename); + freez(node->allocation_metadata_total_bytes_filename); + + freez(node->allocation_system_bytes_used_filename); + freez(node->allocation_system_total_bytes_filename); + + while(node->disks) { + BTRFS_DISK *d = node->disks; + node->disks = node->disks->next; + btrfs_free_disk(d); + } + + freez(node->label); + freez(node->id); + freez(node); +} + +static inline int find_btrfs_disks(BTRFS_NODE *node, const char *path) { + char filename[FILENAME_MAX + 1]; + + node->all_disks_total = 0; + + BTRFS_DISK *d; + for(d = node->disks ; d ; d = d->next) + d->exists = 0; + + DIR *dir = opendir(path); + if (!dir) { + if(!node->logged_error) { + error("BTRFS: Cannot open directory '%s'.", path); + node->logged_error = 1; + } + return 1; + } + node->logged_error = 0; + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type != DT_LNK + || !strcmp(de->d_name, ".") + || !strcmp(de->d_name, "..") + ) { + // info("BTRFS: ignoring '%s'", de->d_name); + continue; + } + + uint32_t hash = simple_hash(de->d_name); + + // -------------------------------------------------------------------- + // search for it + + for(d = node->disks ; d ; d = d->next) { + if(hash == d->hash && !strcmp(de->d_name, d->name)) + break; + } + + // -------------------------------------------------------------------- + // did we find it? + + if(!d) { + d = callocz(sizeof(BTRFS_DISK), 1); + + d->name = strdupz(de->d_name); + d->hash = simple_hash(d->name); + + snprintfz(filename, FILENAME_MAX, "%s/%s/size", path, de->d_name); + d->size_filename = strdupz(filename); + + // for bcache + snprintfz(filename, FILENAME_MAX, "%s/%s/bcache/../queue/hw_sector_size", path, de->d_name); + struct stat sb; + if(stat(filename, &sb) == -1) { + // for disks + snprintfz(filename, FILENAME_MAX, "%s/%s/queue/hw_sector_size", path, de->d_name); + if(stat(filename, &sb) == -1) + // for partitions + snprintfz(filename, FILENAME_MAX, "%s/%s/../queue/hw_sector_size", path, de->d_name); + } + + d->hw_sector_size_filename = strdupz(filename); + + // link it + d->next = node->disks; + node->disks = d; + } + + d->exists = 1; + + + // -------------------------------------------------------------------- + // update the values + + if(read_single_number_file(d->size_filename, &d->size) != 0) { + error("BTRFS: failed to read '%s'", d->size_filename); + d->exists = 0; + continue; + } + + if(read_single_number_file(d->hw_sector_size_filename, &d->hw_sector_size) != 0) { + error("BTRFS: failed to read '%s'", d->hw_sector_size_filename); + d->exists = 0; + continue; + } + + node->all_disks_total += d->size * d->hw_sector_size; + } + closedir(dir); + + // ------------------------------------------------------------------------ + // cleanup + + BTRFS_DISK *last = NULL; + d = node->disks; + + while(d) { + if(unlikely(!d->exists)) { + if(unlikely(node->disks == d)) { + node->disks = d->next; + btrfs_free_disk(d); + d = node->disks; + last = NULL; + } + else { + last->next = d->next; + btrfs_free_disk(d); + d = last->next; + } + + continue; + } + + last = d; + d = d->next; + } + + return 0; +} + + +static inline int find_all_btrfs_pools(const char *path) { + static int logged_error = 0; + char filename[FILENAME_MAX + 1]; + + BTRFS_NODE *node; + for(node = nodes ; node ; node = node->next) + node->exists = 0; + + DIR *dir = opendir(path); + if (!dir) { + if(!logged_error) { + error("BTRFS: Cannot open directory '%s'.", path); + logged_error = 1; + } + return 1; + } + logged_error = 0; + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if(de->d_type != DT_DIR + || !strcmp(de->d_name, ".") + || !strcmp(de->d_name, "..") + || !strcmp(de->d_name, "features") + ) { + // info("BTRFS: ignoring '%s'", de->d_name); + continue; + } + + uint32_t hash = simple_hash(de->d_name); + + // search for it + for(node = nodes ; node ; node = node->next) { + if(hash == node->hash && !strcmp(de->d_name, node->id)) + break; + } + + // did we find it? + if(node) { + // info("BTRFS: already exists '%s'", de->d_name); + node->exists = 1; + + // update the disk sizes + snprintfz(filename, FILENAME_MAX, "%s/%s/devices", path, de->d_name); + find_btrfs_disks(node, filename); + + continue; + } + + // info("BTRFS: adding '%s'", de->d_name); + + // not found, create it + node = callocz(sizeof(BTRFS_NODE), 1); + + node->id = strdupz(de->d_name); + node->hash = simple_hash(node->id); + node->exists = 1; + + { + char label[FILENAME_MAX + 1] = ""; + + snprintfz(filename, FILENAME_MAX, "%s/%s/label", path, de->d_name); + if(read_file(filename, label, FILENAME_MAX) != 0) { + error("BTRFS: failed to read '%s'", filename); + btrfs_free_node(node); + continue; + } + + char *s = label; + if (s[0]) + s = trim(label); + + if(s && s[0]) + node->label = strdupz(s); + else + node->label = strdupz(node->id); + } + + //snprintfz(filename, FILENAME_MAX, "%s/%s/sectorsize", path, de->d_name); + //if(read_single_number_file(filename, &node->sectorsize) != 0) { + // error("BTRFS: failed to read '%s'", filename); + // btrfs_free_node(node); + // continue; + //} + + //snprintfz(filename, FILENAME_MAX, "%s/%s/nodesize", path, de->d_name); + //if(read_single_number_file(filename, &node->nodesize) != 0) { + // error("BTRFS: failed to read '%s'", filename); + // btrfs_free_node(node); + // continue; + //} + + //snprintfz(filename, FILENAME_MAX, "%s/%s/quota_override", path, de->d_name); + //if(read_single_number_file(filename, &node->quota_override) != 0) { + // error("BTRFS: failed to read '%s'", filename); + // btrfs_free_node(node); + // continue; + //} + + // -------------------------------------------------------------------- + // macros to simplify our life + + #define init_btrfs_allocation_field(FIELD) {\ + snprintfz(filename, FILENAME_MAX, "%s/%s/allocation/" #FIELD, path, de->d_name); \ + if(read_single_number_file(filename, &node->allocation_ ## FIELD) != 0) {\ + error("BTRFS: failed to read '%s'", filename);\ + btrfs_free_node(node);\ + continue;\ + }\ + if(!node->allocation_ ## FIELD ## _filename)\ + node->allocation_ ## FIELD ## _filename = strdupz(filename);\ + } + + #define init_btrfs_allocation_section_field(SECTION, FIELD) {\ + snprintfz(filename, FILENAME_MAX, "%s/%s/allocation/" #SECTION "/" #FIELD, path, de->d_name); \ + if(read_single_number_file(filename, &node->allocation_ ## SECTION ## _ ## FIELD) != 0) {\ + error("BTRFS: failed to read '%s'", filename);\ + btrfs_free_node(node);\ + continue;\ + }\ + if(!node->allocation_ ## SECTION ## _ ## FIELD ## _filename)\ + node->allocation_ ## SECTION ## _ ## FIELD ## _filename = strdupz(filename);\ + } + + // -------------------------------------------------------------------- + // allocation/data + + init_btrfs_allocation_section_field(data, total_bytes); + init_btrfs_allocation_section_field(data, bytes_used); + init_btrfs_allocation_section_field(data, disk_total); + init_btrfs_allocation_section_field(data, disk_used); + + + // -------------------------------------------------------------------- + // allocation/metadata + + init_btrfs_allocation_section_field(metadata, total_bytes); + init_btrfs_allocation_section_field(metadata, bytes_used); + init_btrfs_allocation_section_field(metadata, disk_total); + init_btrfs_allocation_section_field(metadata, disk_used); + + init_btrfs_allocation_field(global_rsv_size); + // init_btrfs_allocation_field(global_rsv_reserved); + + + // -------------------------------------------------------------------- + // allocation/system + + init_btrfs_allocation_section_field(system, total_bytes); + init_btrfs_allocation_section_field(system, bytes_used); + init_btrfs_allocation_section_field(system, disk_total); + init_btrfs_allocation_section_field(system, disk_used); + + + // -------------------------------------------------------------------- + // find all disks related to this node + // and collect their sizes + + snprintfz(filename, FILENAME_MAX, "%s/%s/devices", path, de->d_name); + find_btrfs_disks(node, filename); + + + // -------------------------------------------------------------------- + // link it + + // info("BTRFS: linking '%s'", node->id); + node->next = nodes; + nodes = node; + } + closedir(dir); + + + // ------------------------------------------------------------------------ + // cleanup + + BTRFS_NODE *last = NULL; + node = nodes; + + while(node) { + if(unlikely(!node->exists)) { + if(unlikely(nodes == node)) { + nodes = node->next; + btrfs_free_node(node); + node = nodes; + last = NULL; + } + else { + last->next = node->next; + btrfs_free_node(node); + node = last->next; + } + + continue; + } + + last = node; + node = node->next; + } + + return 0; +} + +static void add_labels_to_btrfs(BTRFS_NODE *n, RRDSET *st) { + rrdlabels_add(st->rrdlabels, "device", n->id, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->rrdlabels, "device_label", n->label, RRDLABEL_SRC_AUTO); +} + +int do_sys_fs_btrfs(int update_every, usec_t dt) { + static int initialized = 0 + , do_allocation_disks = CONFIG_BOOLEAN_AUTO + , do_allocation_system = CONFIG_BOOLEAN_AUTO + , do_allocation_data = CONFIG_BOOLEAN_AUTO + , do_allocation_metadata = CONFIG_BOOLEAN_AUTO; + + static usec_t refresh_delta = 0, refresh_every = 60 * USEC_PER_SEC; + static char *btrfs_path = NULL; + + (void)dt; + + if(unlikely(!initialized)) { + initialized = 1; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/btrfs"); + btrfs_path = config_get("plugin:proc:/sys/fs/btrfs", "path to monitor", filename); + + refresh_every = config_get_number("plugin:proc:/sys/fs/btrfs", "check for btrfs changes every", refresh_every / USEC_PER_SEC) * USEC_PER_SEC; + refresh_delta = refresh_every; + + do_allocation_disks = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "physical disks allocation", do_allocation_disks); + do_allocation_data = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "data allocation", do_allocation_data); + do_allocation_metadata = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "metadata allocation", do_allocation_metadata); + do_allocation_system = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "system allocation", do_allocation_system); + } + + refresh_delta += dt; + if(refresh_delta >= refresh_every) { + refresh_delta = 0; + find_all_btrfs_pools(btrfs_path); + } + + BTRFS_NODE *node; + for(node = nodes; node ; node = node->next) { + // -------------------------------------------------------------------- + // allocation/system + + #define collect_btrfs_allocation_field(FIELD) \ + read_single_number_file(node->allocation_ ## FIELD ## _filename, &node->allocation_ ## FIELD) + + #define collect_btrfs_allocation_section_field(SECTION, FIELD) \ + read_single_number_file(node->allocation_ ## SECTION ## _ ## FIELD ## _filename, &node->allocation_ ## SECTION ## _ ## FIELD) + + if(do_allocation_disks != CONFIG_BOOLEAN_NO) { + if( collect_btrfs_allocation_section_field(data, disk_total) != 0 + || collect_btrfs_allocation_section_field(data, disk_used) != 0 + || collect_btrfs_allocation_section_field(metadata, disk_total) != 0 + || collect_btrfs_allocation_section_field(metadata, disk_used) != 0 + || collect_btrfs_allocation_section_field(system, disk_total) != 0 + || collect_btrfs_allocation_section_field(system, disk_used) != 0) { + error("BTRFS: failed to collect physical disks allocation for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + if(do_allocation_data != CONFIG_BOOLEAN_NO) { + if (collect_btrfs_allocation_section_field(data, total_bytes) != 0 + || collect_btrfs_allocation_section_field(data, bytes_used) != 0) { + error("BTRFS: failed to collect allocation/data for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + if(do_allocation_metadata != CONFIG_BOOLEAN_NO) { + if (collect_btrfs_allocation_section_field(metadata, total_bytes) != 0 + || collect_btrfs_allocation_section_field(metadata, bytes_used) != 0 + || collect_btrfs_allocation_field(global_rsv_size) != 0 + ) { + error("BTRFS: failed to collect allocation/metadata for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + if(do_allocation_system != CONFIG_BOOLEAN_NO) { + if (collect_btrfs_allocation_section_field(system, total_bytes) != 0 + || collect_btrfs_allocation_section_field(system, bytes_used) != 0) { + error("BTRFS: failed to collect allocation/system for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + // -------------------------------------------------------------------- + // allocation/disks + + if(do_allocation_disks == CONFIG_BOOLEAN_YES || (do_allocation_disks == CONFIG_BOOLEAN_AUTO && + ((node->all_disks_total && node->allocation_data_disk_total) || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_allocation_disks = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_disks)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "disk_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "disk_%s", node->label); + snprintf(title, 200, "BTRFS Physical Disk Allocation"); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_disks = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.disk" + , title + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_BTRFS_NAME + , NETDATA_CHART_PRIO_BTRFS_DISK + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_disks_unallocated = rrddim_add(node->st_allocation_disks, "unallocated", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_data_free = rrddim_add(node->st_allocation_disks, "data_free", "data free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_data_used = rrddim_add(node->st_allocation_disks, "data_used", "data used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_metadata_free = rrddim_add(node->st_allocation_disks, "meta_free", "meta free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_metadata_used = rrddim_add(node->st_allocation_disks, "meta_used", "meta used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_system_free = rrddim_add(node->st_allocation_disks, "sys_free", "sys free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_system_used = rrddim_add(node->st_allocation_disks, "sys_used", "sys used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_disks); + } + + // unsigned long long disk_used = node->allocation_data_disk_used + node->allocation_metadata_disk_used + node->allocation_system_disk_used; + unsigned long long disk_total = node->allocation_data_disk_total + node->allocation_metadata_disk_total + node->allocation_system_disk_total; + unsigned long long disk_unallocated = node->all_disks_total - disk_total; + + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_unallocated, disk_unallocated); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_data_used, node->allocation_data_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_data_free, node->allocation_data_disk_total - node->allocation_data_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_metadata_used, node->allocation_metadata_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_metadata_free, node->allocation_metadata_disk_total - node->allocation_metadata_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_system_used, node->allocation_system_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_system_free, node->allocation_system_disk_total - node->allocation_system_disk_used); + rrdset_done(node->st_allocation_disks); + } + + + // -------------------------------------------------------------------- + // allocation/data + + if(do_allocation_data == CONFIG_BOOLEAN_YES || (do_allocation_data == CONFIG_BOOLEAN_AUTO && + (node->allocation_data_total_bytes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_allocation_data = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_data)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "data_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "data_%s", node->label); + snprintf(title, 200, "BTRFS Data Allocation"); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_data = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.data" + , title + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_BTRFS_NAME + , NETDATA_CHART_PRIO_BTRFS_DATA + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_data_free = rrddim_add(node->st_allocation_data, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_data_used = rrddim_add(node->st_allocation_data, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_data); + } + + rrddim_set_by_pointer(node->st_allocation_data, node->rd_allocation_data_free, node->allocation_data_total_bytes - node->allocation_data_bytes_used); + rrddim_set_by_pointer(node->st_allocation_data, node->rd_allocation_data_used, node->allocation_data_bytes_used); + rrdset_done(node->st_allocation_data); + } + + // -------------------------------------------------------------------- + // allocation/metadata + + if(do_allocation_metadata == CONFIG_BOOLEAN_YES || (do_allocation_metadata == CONFIG_BOOLEAN_AUTO && + (node->allocation_metadata_total_bytes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_allocation_metadata = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_metadata)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "metadata_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "metadata_%s", node->label); + snprintf(title, 200, "BTRFS Metadata Allocation"); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_metadata = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.metadata" + , title + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_BTRFS_NAME + , NETDATA_CHART_PRIO_BTRFS_METADATA + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_metadata_free = rrddim_add(node->st_allocation_metadata, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_metadata_used = rrddim_add(node->st_allocation_metadata, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_metadata_reserved = rrddim_add(node->st_allocation_metadata, "reserved", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_metadata); + } + + rrddim_set_by_pointer(node->st_allocation_metadata, node->rd_allocation_metadata_free, node->allocation_metadata_total_bytes - node->allocation_metadata_bytes_used - node->allocation_global_rsv_size); + rrddim_set_by_pointer(node->st_allocation_metadata, node->rd_allocation_metadata_used, node->allocation_metadata_bytes_used); + rrddim_set_by_pointer(node->st_allocation_metadata, node->rd_allocation_metadata_reserved, node->allocation_global_rsv_size); + rrdset_done(node->st_allocation_metadata); + } + + // -------------------------------------------------------------------- + // allocation/system + + if(do_allocation_system == CONFIG_BOOLEAN_YES || (do_allocation_system == CONFIG_BOOLEAN_AUTO && + (node->allocation_system_total_bytes || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + do_allocation_system = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_system)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "system_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "system_%s", node->label); + snprintf(title, 200, "BTRFS System Allocation"); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_system = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.system" + , title + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_BTRFS_NAME + , NETDATA_CHART_PRIO_BTRFS_SYSTEM + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_system_free = rrddim_add(node->st_allocation_system, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_system_used = rrddim_add(node->st_allocation_system, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_system); + } + + rrddim_set_by_pointer(node->st_allocation_system, node->rd_allocation_system_free, node->allocation_system_total_bytes - node->allocation_system_bytes_used); + rrddim_set_by_pointer(node->st_allocation_system, node->rd_allocation_system_used, node->allocation_system_bytes_used); + rrdset_done(node->st_allocation_system); + } + } + + return 0; +} + diff --git a/collectors/proc.plugin/sys_kernel_mm_ksm.c b/collectors/proc.plugin/sys_kernel_mm_ksm.c new file mode 100644 index 0000000..e586d55 --- /dev/null +++ b/collectors/proc.plugin/sys_kernel_mm_ksm.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_KSM_NAME "/sys/kernel/mm/ksm" + +typedef struct ksm_name_value { + char filename[FILENAME_MAX + 1]; + unsigned long long value; +} KSM_NAME_VALUE; + +#define PAGES_SHARED 0 +#define PAGES_SHARING 1 +#define PAGES_UNSHARED 2 +#define PAGES_VOLATILE 3 +#define PAGES_TO_SCAN 4 + +KSM_NAME_VALUE values[] = { + [PAGES_SHARED] = { "/sys/kernel/mm/ksm/pages_shared", 0ULL }, + [PAGES_SHARING] = { "/sys/kernel/mm/ksm/pages_sharing", 0ULL }, + [PAGES_UNSHARED] = { "/sys/kernel/mm/ksm/pages_unshared", 0ULL }, + [PAGES_VOLATILE] = { "/sys/kernel/mm/ksm/pages_volatile", 0ULL }, + // [PAGES_TO_SCAN] = { "/sys/kernel/mm/ksm/pages_to_scan", 0ULL }, +}; + +int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { + (void)dt; + static procfile *ff_pages_shared = NULL, *ff_pages_sharing = NULL, *ff_pages_unshared = NULL, *ff_pages_volatile = NULL/*, *ff_pages_to_scan = NULL*/; + static unsigned long page_size = 0; + + if(unlikely(page_size == 0)) + page_size = (unsigned long)sysconf(_SC_PAGESIZE); + + if(unlikely(!ff_pages_shared)) { + snprintfz(values[PAGES_SHARED].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_shared"); + snprintfz(values[PAGES_SHARED].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_shared", values[PAGES_SHARED].filename)); + ff_pages_shared = procfile_open(values[PAGES_SHARED].filename, " \t:", PROCFILE_FLAG_DEFAULT); + } + + if(unlikely(!ff_pages_sharing)) { + snprintfz(values[PAGES_SHARING].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_sharing"); + snprintfz(values[PAGES_SHARING].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_sharing", values[PAGES_SHARING].filename)); + ff_pages_sharing = procfile_open(values[PAGES_SHARING].filename, " \t:", PROCFILE_FLAG_DEFAULT); + } + + if(unlikely(!ff_pages_unshared)) { + snprintfz(values[PAGES_UNSHARED].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_unshared"); + snprintfz(values[PAGES_UNSHARED].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_unshared", values[PAGES_UNSHARED].filename)); + ff_pages_unshared = procfile_open(values[PAGES_UNSHARED].filename, " \t:", PROCFILE_FLAG_DEFAULT); + } + + if(unlikely(!ff_pages_volatile)) { + snprintfz(values[PAGES_VOLATILE].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_volatile"); + snprintfz(values[PAGES_VOLATILE].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_volatile", values[PAGES_VOLATILE].filename)); + ff_pages_volatile = procfile_open(values[PAGES_VOLATILE].filename, " \t:", PROCFILE_FLAG_DEFAULT); + } + + //if(unlikely(!ff_pages_to_scan)) { + // snprintfz(values[PAGES_TO_SCAN].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_to_scan"); + // snprintfz(values[PAGES_TO_SCAN].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_to_scan", values[PAGES_TO_SCAN].filename)); + // ff_pages_to_scan = procfile_open(values[PAGES_TO_SCAN].filename, " \t:", PROCFILE_FLAG_DEFAULT); + //} + + if(unlikely(!ff_pages_shared || !ff_pages_sharing || !ff_pages_unshared || !ff_pages_volatile /*|| !ff_pages_to_scan */)) + return 1; + + unsigned long long pages_shared = 0, pages_sharing = 0, pages_unshared = 0, pages_volatile = 0, /*pages_to_scan = 0,*/ offered = 0, saved = 0; + + ff_pages_shared = procfile_readall(ff_pages_shared); + if(unlikely(!ff_pages_shared)) return 0; // we return 0, so that we will retry to open it next time + pages_shared = str2ull(procfile_lineword(ff_pages_shared, 0, 0)); + + ff_pages_sharing = procfile_readall(ff_pages_sharing); + if(unlikely(!ff_pages_sharing)) return 0; // we return 0, so that we will retry to open it next time + pages_sharing = str2ull(procfile_lineword(ff_pages_sharing, 0, 0)); + + ff_pages_unshared = procfile_readall(ff_pages_unshared); + if(unlikely(!ff_pages_unshared)) return 0; // we return 0, so that we will retry to open it next time + pages_unshared = str2ull(procfile_lineword(ff_pages_unshared, 0, 0)); + + ff_pages_volatile = procfile_readall(ff_pages_volatile); + if(unlikely(!ff_pages_volatile)) return 0; // we return 0, so that we will retry to open it next time + pages_volatile = str2ull(procfile_lineword(ff_pages_volatile, 0, 0)); + + //ff_pages_to_scan = procfile_readall(ff_pages_to_scan); + //if(unlikely(!ff_pages_to_scan)) return 0; // we return 0, so that we will retry to open it next time + //pages_to_scan = str2ull(procfile_lineword(ff_pages_to_scan, 0, 0)); + + offered = pages_sharing + pages_shared + pages_unshared + pages_volatile; + saved = pages_sharing; + + if(unlikely(!offered /*|| !pages_to_scan*/ && netdata_zero_metrics_enabled == CONFIG_BOOLEAN_NO)) return 0; + + // -------------------------------------------------------------------- + + { + static RRDSET *st_mem_ksm = NULL; + static RRDDIM *rd_shared = NULL, *rd_unshared = NULL, *rd_sharing = NULL, *rd_volatile = NULL/*, *rd_to_scan = NULL*/; + + if (unlikely(!st_mem_ksm)) { + st_mem_ksm = rrdset_create_localhost( + "mem" + , "ksm" + , NULL + , "ksm" + , NULL + , "Kernel Same Page Merging" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_KSM_NAME + , NETDATA_CHART_PRIO_MEM_KSM + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_shared = rrddim_add(st_mem_ksm, "shared", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_unshared = rrddim_add(st_mem_ksm, "unshared", NULL, -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_sharing = rrddim_add(st_mem_ksm, "sharing", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_volatile = rrddim_add(st_mem_ksm, "volatile", NULL, -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + //rd_to_scan = rrddim_add(st_mem_ksm, "to_scan", "to scan", -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_ksm, rd_shared, pages_shared * page_size); + rrddim_set_by_pointer(st_mem_ksm, rd_unshared, pages_unshared * page_size); + rrddim_set_by_pointer(st_mem_ksm, rd_sharing, pages_sharing * page_size); + rrddim_set_by_pointer(st_mem_ksm, rd_volatile, pages_volatile * page_size); + //rrddim_set_by_pointer(st_mem_ksm, rd_to_scan, pages_to_scan * page_size); + + rrdset_done(st_mem_ksm); + } + + // -------------------------------------------------------------------- + + { + static RRDSET *st_mem_ksm_savings = NULL; + static RRDDIM *rd_savings = NULL, *rd_offered = NULL; + + if (unlikely(!st_mem_ksm_savings)) { + st_mem_ksm_savings = rrdset_create_localhost( + "mem" + , "ksm_savings" + , NULL + , "ksm" + , NULL + , "Kernel Same Page Merging Savings" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_KSM_NAME + , NETDATA_CHART_PRIO_MEM_KSM_SAVINGS + , update_every + , RRDSET_TYPE_AREA + ); + + rd_savings = rrddim_add(st_mem_ksm_savings, "savings", NULL, -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_offered = rrddim_add(st_mem_ksm_savings, "offered", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_ksm_savings, rd_savings, saved * page_size); + rrddim_set_by_pointer(st_mem_ksm_savings, rd_offered, offered * page_size); + + rrdset_done(st_mem_ksm_savings); + } + + // -------------------------------------------------------------------- + + { + static RRDSET *st_mem_ksm_ratios = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_mem_ksm_ratios)) { + st_mem_ksm_ratios = rrdset_create_localhost( + "mem" + , "ksm_ratios" + , NULL + , "ksm" + , NULL + , "Kernel Same Page Merging Effectiveness" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_KSM_NAME + , NETDATA_CHART_PRIO_MEM_KSM_RATIOS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_savings = rrddim_add(st_mem_ksm_ratios, "savings", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_ksm_ratios, rd_savings, offered ? (saved * 1000000) / offered : 0); + rrdset_done(st_mem_ksm_ratios); + } + + return 0; +} diff --git a/collectors/proc.plugin/zfs_common.c b/collectors/proc.plugin/zfs_common.c new file mode 100644 index 0000000..cca0ae0 --- /dev/null +++ b/collectors/proc.plugin/zfs_common.c @@ -0,0 +1,960 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "zfs_common.h" + +struct arcstats arcstats = { 0 }; + +void generate_charts_arcstats(const char *plugin, const char *module, int show_zero_charts, int update_every) { + static int do_arc_size = -1, do_l2_size = -1, do_reads = -1, do_l2bytes = -1, do_ahits = -1, do_dhits = -1, \ + do_phits = -1, do_mhits = -1, do_l2hits = -1, do_list_hits = -1; + + if(unlikely(do_arc_size == -1)) + do_arc_size = do_l2_size = do_reads = do_l2bytes = do_ahits = do_dhits = do_phits = do_mhits \ + = do_l2hits = do_list_hits = show_zero_charts; + + // ARC reads + unsigned long long aread = arcstats.hits + arcstats.misses; + + // Demand reads + unsigned long long dhit = arcstats.demand_data_hits + arcstats.demand_metadata_hits; + unsigned long long dmiss = arcstats.demand_data_misses + arcstats.demand_metadata_misses; + unsigned long long dread = dhit + dmiss; + + // Prefetch reads + unsigned long long phit = arcstats.prefetch_data_hits + arcstats.prefetch_metadata_hits; + unsigned long long pmiss = arcstats.prefetch_data_misses + arcstats.prefetch_metadata_misses; + unsigned long long pread = phit + pmiss; + + // Metadata reads + unsigned long long mhit = arcstats.prefetch_metadata_hits + arcstats.demand_metadata_hits; + unsigned long long mmiss = arcstats.prefetch_metadata_misses + arcstats.demand_metadata_misses; + unsigned long long mread = mhit + mmiss; + + // l2 reads + unsigned long long l2hit = arcstats.l2_hits; + unsigned long long l2miss = arcstats.l2_misses; + unsigned long long l2read = l2hit + l2miss; + + // -------------------------------------------------------------------- + + if(do_arc_size == CONFIG_BOOLEAN_YES || arcstats.size || arcstats.c || arcstats.c_min || arcstats.c_max) { + do_arc_size = CONFIG_BOOLEAN_YES; + + static RRDSET *st_arc_size = NULL; + static RRDDIM *rd_arc_size = NULL; + static RRDDIM *rd_arc_target_size = NULL; + static RRDDIM *rd_arc_target_min_size = NULL; + static RRDDIM *rd_arc_target_max_size = NULL; + + if (unlikely(!st_arc_size)) { + st_arc_size = rrdset_create_localhost( + "zfs" + , "arc_size" + , NULL + , ZFS_FAMILY_SIZE + , NULL + , "ZFS ARC Size" + , "MiB" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_ARC_SIZE + , update_every + , RRDSET_TYPE_AREA + ); + + rd_arc_size = rrddim_add(st_arc_size, "size", "arcsz", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_arc_target_size = rrddim_add(st_arc_size, "target", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_arc_target_min_size = rrddim_add(st_arc_size, "min", "min (hard limit)", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_arc_target_max_size = rrddim_add(st_arc_size, "max", "max (high water)", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_arc_size, rd_arc_size, arcstats.size); + rrddim_set_by_pointer(st_arc_size, rd_arc_target_size, arcstats.c); + rrddim_set_by_pointer(st_arc_size, rd_arc_target_min_size, arcstats.c_min); + rrddim_set_by_pointer(st_arc_size, rd_arc_target_max_size, arcstats.c_max); + rrdset_done(st_arc_size); + } + + // -------------------------------------------------------------------- + + if(likely(arcstats.l2exist) && (do_l2_size == CONFIG_BOOLEAN_YES || arcstats.l2_size || arcstats.l2_asize)) { + do_l2_size = CONFIG_BOOLEAN_YES; + + static RRDSET *st_l2_size = NULL; + static RRDDIM *rd_l2_size = NULL; + static RRDDIM *rd_l2_asize = NULL; + + if (unlikely(!st_l2_size)) { + st_l2_size = rrdset_create_localhost( + "zfs" + , "l2_size" + , NULL + , ZFS_FAMILY_SIZE + , NULL + , "ZFS L2 ARC Size" + , "MiB" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_L2_SIZE + , update_every + , RRDSET_TYPE_AREA + ); + + rd_l2_asize = rrddim_add(st_l2_size, "actual", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rd_l2_size = rrddim_add(st_l2_size, "size", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_l2_size, rd_l2_size, arcstats.l2_size); + rrddim_set_by_pointer(st_l2_size, rd_l2_asize, arcstats.l2_asize); + rrdset_done(st_l2_size); + } + + // -------------------------------------------------------------------- + + if(likely(do_reads == CONFIG_BOOLEAN_YES || aread || dread || pread || mread || l2read)) { + do_reads = CONFIG_BOOLEAN_YES; + + static RRDSET *st_reads = NULL; + static RRDDIM *rd_aread = NULL; + static RRDDIM *rd_dread = NULL; + static RRDDIM *rd_pread = NULL; + static RRDDIM *rd_mread = NULL; + static RRDDIM *rd_l2read = NULL; + + if (unlikely(!st_reads)) { + st_reads = rrdset_create_localhost( + "zfs" + , "reads" + , NULL + , ZFS_FAMILY_ACCESSES + , NULL + , "ZFS Reads" + , "reads/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_READS + , update_every + , RRDSET_TYPE_AREA + ); + + rd_aread = rrddim_add(st_reads, "areads", "arc", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_dread = rrddim_add(st_reads, "dreads", "demand", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pread = rrddim_add(st_reads, "preads", "prefetch", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mread = rrddim_add(st_reads, "mreads", "metadata", 1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(arcstats.l2exist) + rd_l2read = rrddim_add(st_reads, "l2reads", "l2", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_reads, rd_aread, aread); + rrddim_set_by_pointer(st_reads, rd_dread, dread); + rrddim_set_by_pointer(st_reads, rd_pread, pread); + rrddim_set_by_pointer(st_reads, rd_mread, mread); + + if(arcstats.l2exist) + rrddim_set_by_pointer(st_reads, rd_l2read, l2read); + + rrdset_done(st_reads); + } + + // -------------------------------------------------------------------- + + if(likely(arcstats.l2exist && (do_l2bytes == CONFIG_BOOLEAN_YES || arcstats.l2_read_bytes || arcstats.l2_write_bytes))) { + do_l2bytes = CONFIG_BOOLEAN_YES; + + static RRDSET *st_l2bytes = NULL; + static RRDDIM *rd_l2_read_bytes = NULL; + static RRDDIM *rd_l2_write_bytes = NULL; + + if (unlikely(!st_l2bytes)) { + st_l2bytes = rrdset_create_localhost( + "zfs" + , "bytes" + , NULL + , ZFS_FAMILY_ACCESSES + , NULL + , "ZFS ARC L2 Read/Write Rate" + , "KiB/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_IO + , update_every + , RRDSET_TYPE_AREA + ); + + rd_l2_read_bytes = rrddim_add(st_l2bytes, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rd_l2_write_bytes = rrddim_add(st_l2bytes, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_l2bytes, rd_l2_read_bytes, arcstats.l2_read_bytes); + rrddim_set_by_pointer(st_l2bytes, rd_l2_write_bytes, arcstats.l2_write_bytes); + rrdset_done(st_l2bytes); + } + + // -------------------------------------------------------------------- + + if(likely(do_ahits == CONFIG_BOOLEAN_YES || arcstats.hits || arcstats.misses)) { + do_ahits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_ahits = NULL; + static RRDDIM *rd_ahits = NULL; + static RRDDIM *rd_amisses = NULL; + + if (unlikely(!st_ahits)) { + st_ahits = rrdset_create_localhost( + "zfs" + , "hits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS ARC Hits" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_HITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_ahits = rrddim_add(st_ahits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_amisses = rrddim_add(st_ahits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_ahits, rd_ahits, arcstats.hits); + rrddim_set_by_pointer(st_ahits, rd_amisses, arcstats.misses); + rrdset_done(st_ahits); + + static RRDSET *st_ahits_rate = NULL; + static RRDDIM *rd_ahits_rate = NULL; + static RRDDIM *rd_amisses_rate = NULL; + + if (unlikely(!st_ahits_rate)) { + st_ahits_rate = rrdset_create_localhost( + "zfs" + , "hits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS ARC Hits Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_HITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_ahits_rate = rrddim_add(st_ahits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_amisses_rate = rrddim_add(st_ahits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ahits_rate, rd_ahits_rate, arcstats.hits); + rrddim_set_by_pointer(st_ahits_rate, rd_amisses_rate, arcstats.misses); + rrdset_done(st_ahits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_dhits == CONFIG_BOOLEAN_YES || dhit || dmiss)) { + do_dhits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_dhits = NULL; + static RRDDIM *rd_dhits = NULL; + static RRDDIM *rd_dmisses = NULL; + + if (unlikely(!st_dhits)) { + st_dhits = rrdset_create_localhost( + "zfs" + , "dhits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Demand Hits" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_DHITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_dhits = rrddim_add(st_dhits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_dmisses = rrddim_add(st_dhits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_dhits, rd_dhits, dhit); + rrddim_set_by_pointer(st_dhits, rd_dmisses, dmiss); + rrdset_done(st_dhits); + + static RRDSET *st_dhits_rate = NULL; + static RRDDIM *rd_dhits_rate = NULL; + static RRDDIM *rd_dmisses_rate = NULL; + + if (unlikely(!st_dhits_rate)) { + st_dhits_rate = rrdset_create_localhost( + "zfs" + , "dhits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Demand Hits Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_DHITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_dhits_rate = rrddim_add(st_dhits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_dmisses_rate = rrddim_add(st_dhits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_dhits_rate, rd_dhits_rate, dhit); + rrddim_set_by_pointer(st_dhits_rate, rd_dmisses_rate, dmiss); + rrdset_done(st_dhits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_phits == CONFIG_BOOLEAN_YES || phit || pmiss)) { + do_phits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_phits = NULL; + static RRDDIM *rd_phits = NULL; + static RRDDIM *rd_pmisses = NULL; + + if (unlikely(!st_phits)) { + st_phits = rrdset_create_localhost( + "zfs" + , "phits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Prefetch Hits" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_PHITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_phits = rrddim_add(st_phits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_pmisses = rrddim_add(st_phits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_phits, rd_phits, phit); + rrddim_set_by_pointer(st_phits, rd_pmisses, pmiss); + rrdset_done(st_phits); + + static RRDSET *st_phits_rate = NULL; + static RRDDIM *rd_phits_rate = NULL; + static RRDDIM *rd_pmisses_rate = NULL; + + if (unlikely(!st_phits_rate)) { + st_phits_rate = rrdset_create_localhost( + "zfs" + , "phits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Prefetch Hits Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_PHITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_phits_rate = rrddim_add(st_phits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pmisses_rate = rrddim_add(st_phits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_phits_rate, rd_phits_rate, phit); + rrddim_set_by_pointer(st_phits_rate, rd_pmisses_rate, pmiss); + rrdset_done(st_phits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_mhits == CONFIG_BOOLEAN_YES || mhit || mmiss)) { + do_mhits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mhits = NULL; + static RRDDIM *rd_mhits = NULL; + static RRDDIM *rd_mmisses = NULL; + + if (unlikely(!st_mhits)) { + st_mhits = rrdset_create_localhost( + "zfs" + , "mhits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Metadata Hits" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_MHITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_mhits = rrddim_add(st_mhits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_mmisses = rrddim_add(st_mhits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_mhits, rd_mhits, mhit); + rrddim_set_by_pointer(st_mhits, rd_mmisses, mmiss); + rrdset_done(st_mhits); + + static RRDSET *st_mhits_rate = NULL; + static RRDDIM *rd_mhits_rate = NULL; + static RRDDIM *rd_mmisses_rate = NULL; + + if (unlikely(!st_mhits_rate)) { + st_mhits_rate = rrdset_create_localhost( + "zfs" + , "mhits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Metadata Hits Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_MHITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_mhits_rate = rrddim_add(st_mhits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mmisses_rate = rrddim_add(st_mhits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_mhits_rate, rd_mhits_rate, mhit); + rrddim_set_by_pointer(st_mhits_rate, rd_mmisses_rate, mmiss); + rrdset_done(st_mhits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(arcstats.l2exist && (do_l2hits == CONFIG_BOOLEAN_YES || l2hit || l2miss))) { + do_l2hits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_l2hits = NULL; + static RRDDIM *rd_l2hits = NULL; + static RRDDIM *rd_l2misses = NULL; + + if (unlikely(!st_l2hits)) { + st_l2hits = rrdset_create_localhost( + "zfs" + , "l2hits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS L2 Hits" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_L2HITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_l2hits = rrddim_add(st_l2hits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_l2misses = rrddim_add(st_l2hits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_l2hits, rd_l2hits, l2hit); + rrddim_set_by_pointer(st_l2hits, rd_l2misses, l2miss); + rrdset_done(st_l2hits); + + static RRDSET *st_l2hits_rate = NULL; + static RRDDIM *rd_l2hits_rate = NULL; + static RRDDIM *rd_l2misses_rate = NULL; + + if (unlikely(!st_l2hits_rate)) { + st_l2hits_rate = rrdset_create_localhost( + "zfs" + , "l2hits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS L2 Hits Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_L2HITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_l2hits_rate = rrddim_add(st_l2hits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_l2misses_rate = rrddim_add(st_l2hits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_l2hits_rate, rd_l2hits_rate, l2hit); + rrddim_set_by_pointer(st_l2hits_rate, rd_l2misses_rate, l2miss); + rrdset_done(st_l2hits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_list_hits == CONFIG_BOOLEAN_YES || arcstats.mfu_hits \ + || arcstats.mru_hits \ + || arcstats.mfu_ghost_hits \ + || arcstats.mru_ghost_hits)) { + do_list_hits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_list_hits = NULL; + static RRDDIM *rd_mfu = NULL; + static RRDDIM *rd_mru = NULL; + static RRDDIM *rd_mfug = NULL; + static RRDDIM *rd_mrug = NULL; + + if (unlikely(!st_list_hits)) { + st_list_hits = rrdset_create_localhost( + "zfs" + , "list_hits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS List Hits" + , "hits/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_LIST_HITS + , update_every + , RRDSET_TYPE_AREA + ); + + rd_mfu = rrddim_add(st_list_hits, "mfu", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mfug = rrddim_add(st_list_hits, "mfug", "mfu ghost", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mru = rrddim_add(st_list_hits, "mru", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mrug = rrddim_add(st_list_hits, "mrug", "mru ghost", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_list_hits, rd_mfu, arcstats.mfu_hits); + rrddim_set_by_pointer(st_list_hits, rd_mru, arcstats.mru_hits); + rrddim_set_by_pointer(st_list_hits, rd_mfug, arcstats.mfu_ghost_hits); + rrddim_set_by_pointer(st_list_hits, rd_mrug, arcstats.mru_ghost_hits); + rrdset_done(st_list_hits); + } +} + +void generate_charts_arc_summary(const char *plugin, const char *module, int show_zero_charts, int update_every) { + static int do_arc_size_breakdown = -1, do_memory = -1, do_important_ops = -1, do_actual_hits = -1, \ + do_demand_data_hits = -1, do_prefetch_data_hits = -1, do_hash_elements = -1, do_hash_chains = -1; + + if(unlikely(do_arc_size_breakdown == -1)) + do_arc_size_breakdown = do_memory = do_important_ops = do_actual_hits = do_demand_data_hits \ + = do_prefetch_data_hits = do_hash_elements = do_hash_chains = show_zero_charts; + + unsigned long long arc_accesses_total = arcstats.hits + arcstats.misses; + unsigned long long real_hits = arcstats.mfu_hits + arcstats.mru_hits; + unsigned long long real_misses = arc_accesses_total - real_hits; + + //unsigned long long anon_hits = arcstats.hits - (arcstats.mfu_hits + arcstats.mru_hits + arcstats.mfu_ghost_hits + arcstats.mru_ghost_hits); + + unsigned long long arc_size = arcstats.size; + unsigned long long mru_size = arcstats.p; + //unsigned long long target_min_size = arcstats.c_min; + //unsigned long long target_max_size = arcstats.c_max; + unsigned long long target_size = arcstats.c; + //unsigned long long target_size_ratio = (target_max_size / target_min_size); + + unsigned long long mfu_size; + if(arc_size > target_size) + mfu_size = arc_size - mru_size; + else + mfu_size = target_size - mru_size; + + // -------------------------------------------------------------------- + + if(likely(do_arc_size_breakdown == CONFIG_BOOLEAN_YES || mru_size || mfu_size)) { + do_arc_size_breakdown = CONFIG_BOOLEAN_YES; + + static RRDSET *st_arc_size_breakdown = NULL; + static RRDDIM *rd_most_recent = NULL; + static RRDDIM *rd_most_frequent = NULL; + + if (unlikely(!st_arc_size_breakdown)) { + st_arc_size_breakdown = rrdset_create_localhost( + "zfs" + , "arc_size_breakdown" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS ARC Size Breakdown" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_ARC_SIZE_BREAKDOWN + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_most_recent = rrddim_add(st_arc_size_breakdown, "recent", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + rd_most_frequent = rrddim_add(st_arc_size_breakdown, "frequent", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + } + + rrddim_set_by_pointer(st_arc_size_breakdown, rd_most_recent, mru_size); + rrddim_set_by_pointer(st_arc_size_breakdown, rd_most_frequent, mfu_size); + rrdset_done(st_arc_size_breakdown); + } + + // -------------------------------------------------------------------- + + if(likely(do_memory == CONFIG_BOOLEAN_YES || arcstats.memory_direct_count \ + || arcstats.memory_throttle_count \ + || arcstats.memory_indirect_count)) { + do_memory = CONFIG_BOOLEAN_YES; + + static RRDSET *st_memory = NULL; +#ifndef __FreeBSD__ + static RRDDIM *rd_direct = NULL; +#endif + static RRDDIM *rd_throttled = NULL; +#ifndef __FreeBSD__ + static RRDDIM *rd_indirect = NULL; +#endif + + if (unlikely(!st_memory)) { + st_memory = rrdset_create_localhost( + "zfs" + , "memory_ops" + , NULL + , ZFS_FAMILY_OPERATIONS + , NULL + , "ZFS Memory Operations" + , "operations/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_MEMORY_OPS + , update_every + , RRDSET_TYPE_LINE + ); + +#ifndef __FreeBSD__ + rd_direct = rrddim_add(st_memory, "direct", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif + rd_throttled = rrddim_add(st_memory, "throttled", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#ifndef __FreeBSD__ + rd_indirect = rrddim_add(st_memory, "indirect", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif + } + +#ifndef __FreeBSD__ + rrddim_set_by_pointer(st_memory, rd_direct, arcstats.memory_direct_count); +#endif + rrddim_set_by_pointer(st_memory, rd_throttled, arcstats.memory_throttle_count); +#ifndef __FreeBSD__ + rrddim_set_by_pointer(st_memory, rd_indirect, arcstats.memory_indirect_count); +#endif + rrdset_done(st_memory); + } + + // -------------------------------------------------------------------- + + if(likely(do_important_ops == CONFIG_BOOLEAN_YES || arcstats.deleted \ + || arcstats.evict_skip \ + || arcstats.mutex_miss \ + || arcstats.hash_collisions)) { + do_important_ops = CONFIG_BOOLEAN_YES; + + static RRDSET *st_important_ops = NULL; + static RRDDIM *rd_deleted = NULL; + static RRDDIM *rd_mutex_misses = NULL; + static RRDDIM *rd_evict_skips = NULL; + static RRDDIM *rd_hash_collisions = NULL; + + if (unlikely(!st_important_ops)) { + st_important_ops = rrdset_create_localhost( + "zfs" + , "important_ops" + , NULL + , ZFS_FAMILY_OPERATIONS + , NULL + , "ZFS Important Operations" + , "operations/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_IMPORTANT_OPS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_evict_skips = rrddim_add(st_important_ops, "eskip", "evict skip", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_deleted = rrddim_add(st_important_ops, "deleted", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mutex_misses = rrddim_add(st_important_ops, "mtxmis", "mutex miss", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_hash_collisions = rrddim_add(st_important_ops, "hash_collisions", "hash collisions", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_important_ops, rd_deleted, arcstats.deleted); + rrddim_set_by_pointer(st_important_ops, rd_evict_skips, arcstats.evict_skip); + rrddim_set_by_pointer(st_important_ops, rd_mutex_misses, arcstats.mutex_miss); + rrddim_set_by_pointer(st_important_ops, rd_hash_collisions, arcstats.hash_collisions); + rrdset_done(st_important_ops); + } + + // -------------------------------------------------------------------- + + if(likely(do_actual_hits == CONFIG_BOOLEAN_YES || real_hits || real_misses)) { + do_actual_hits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_actual_hits = NULL; + static RRDDIM *rd_actual_hits = NULL; + static RRDDIM *rd_actual_misses = NULL; + + if (unlikely(!st_actual_hits)) { + st_actual_hits = rrdset_create_localhost( + "zfs" + , "actual_hits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Actual Cache Hits" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_ACTUAL_HITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_actual_hits = rrddim_add(st_actual_hits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_actual_misses = rrddim_add(st_actual_hits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_actual_hits, rd_actual_hits, real_hits); + rrddim_set_by_pointer(st_actual_hits, rd_actual_misses, real_misses); + rrdset_done(st_actual_hits); + + static RRDSET *st_actual_hits_rate = NULL; + static RRDDIM *rd_actual_hits_rate = NULL; + static RRDDIM *rd_actual_misses_rate = NULL; + + if (unlikely(!st_actual_hits_rate)) { + st_actual_hits_rate = rrdset_create_localhost( + "zfs" + , "actual_hits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Actual Cache Hits Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_ACTUAL_HITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_actual_hits_rate = rrddim_add(st_actual_hits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_actual_misses_rate = rrddim_add(st_actual_hits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_actual_hits_rate, rd_actual_hits_rate, real_hits); + rrddim_set_by_pointer(st_actual_hits_rate, rd_actual_misses_rate, real_misses); + rrdset_done(st_actual_hits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_demand_data_hits == CONFIG_BOOLEAN_YES || arcstats.demand_data_hits || arcstats.demand_data_misses)) { + do_demand_data_hits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_demand_data_hits = NULL; + static RRDDIM *rd_demand_data_hits = NULL; + static RRDDIM *rd_demand_data_misses = NULL; + + if (unlikely(!st_demand_data_hits)) { + st_demand_data_hits = rrdset_create_localhost( + "zfs" + , "demand_data_hits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Data Demand Efficiency" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_DEMAND_DATA_HITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_demand_data_hits = rrddim_add(st_demand_data_hits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_demand_data_misses = rrddim_add(st_demand_data_hits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_demand_data_hits, rd_demand_data_hits, arcstats.demand_data_hits); + rrddim_set_by_pointer(st_demand_data_hits, rd_demand_data_misses, arcstats.demand_data_misses); + rrdset_done(st_demand_data_hits); + + static RRDSET *st_demand_data_hits_rate = NULL; + static RRDDIM *rd_demand_data_hits_rate = NULL; + static RRDDIM *rd_demand_data_misses_rate = NULL; + + if (unlikely(!st_demand_data_hits_rate)) { + st_demand_data_hits_rate = rrdset_create_localhost( + "zfs" + , "demand_data_hits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Data Demand Efficiency Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_DEMAND_DATA_HITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_demand_data_hits_rate = rrddim_add(st_demand_data_hits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_demand_data_misses_rate = rrddim_add(st_demand_data_hits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_demand_data_hits_rate, rd_demand_data_hits_rate, arcstats.demand_data_hits); + rrddim_set_by_pointer(st_demand_data_hits_rate, rd_demand_data_misses_rate, arcstats.demand_data_misses); + rrdset_done(st_demand_data_hits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_prefetch_data_hits == CONFIG_BOOLEAN_YES || arcstats.prefetch_data_hits \ + || arcstats.prefetch_data_misses)) { + do_prefetch_data_hits = CONFIG_BOOLEAN_YES; + + static RRDSET *st_prefetch_data_hits = NULL; + static RRDDIM *rd_prefetch_data_hits = NULL; + static RRDDIM *rd_prefetch_data_misses = NULL; + + if (unlikely(!st_prefetch_data_hits)) { + st_prefetch_data_hits = rrdset_create_localhost( + "zfs" + , "prefetch_data_hits" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Data Prefetch Efficiency" + , "percentage" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_PREFETCH_DATA_HITS + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_prefetch_data_hits = rrddim_add(st_prefetch_data_hits, "hits", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rd_prefetch_data_misses = rrddim_add(st_prefetch_data_hits, "misses", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + + rrddim_set_by_pointer(st_prefetch_data_hits, rd_prefetch_data_hits, arcstats.prefetch_data_hits); + rrddim_set_by_pointer(st_prefetch_data_hits, rd_prefetch_data_misses, arcstats.prefetch_data_misses); + rrdset_done(st_prefetch_data_hits); + + static RRDSET *st_prefetch_data_hits_rate = NULL; + static RRDDIM *rd_prefetch_data_hits_rate = NULL; + static RRDDIM *rd_prefetch_data_misses_rate = NULL; + + if (unlikely(!st_prefetch_data_hits_rate)) { + st_prefetch_data_hits_rate = rrdset_create_localhost( + "zfs" + , "prefetch_data_hits_rate" + , NULL + , ZFS_FAMILY_EFFICIENCY + , NULL + , "ZFS Data Prefetch Efficiency Rate" + , "events/s" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_PREFETCH_DATA_HITS + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_prefetch_data_hits_rate = rrddim_add(st_prefetch_data_hits_rate, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_prefetch_data_misses_rate = rrddim_add(st_prefetch_data_hits_rate, "misses", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_prefetch_data_hits_rate, rd_prefetch_data_hits_rate, arcstats.prefetch_data_hits); + rrddim_set_by_pointer(st_prefetch_data_hits_rate, rd_prefetch_data_misses_rate, arcstats.prefetch_data_misses); + rrdset_done(st_prefetch_data_hits_rate); + } + + // -------------------------------------------------------------------- + + if(likely(do_hash_elements == CONFIG_BOOLEAN_YES || arcstats.hash_elements || arcstats.hash_elements_max)) { + do_hash_elements = CONFIG_BOOLEAN_YES; + + static RRDSET *st_hash_elements = NULL; + static RRDDIM *rd_hash_elements_current = NULL; + static RRDDIM *rd_hash_elements_max = NULL; + + if (unlikely(!st_hash_elements)) { + st_hash_elements = rrdset_create_localhost( + "zfs" + , "hash_elements" + , NULL + , ZFS_FAMILY_HASH + , NULL + , "ZFS ARC Hash Elements" + , "elements" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_HASH_ELEMENTS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_hash_elements_current = rrddim_add(st_hash_elements, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_hash_elements_max = rrddim_add(st_hash_elements, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_hash_elements, rd_hash_elements_current, arcstats.hash_elements); + rrddim_set_by_pointer(st_hash_elements, rd_hash_elements_max, arcstats.hash_elements_max); + rrdset_done(st_hash_elements); + } + + // -------------------------------------------------------------------- + + if(likely(do_hash_chains == CONFIG_BOOLEAN_YES || arcstats.hash_chains || arcstats.hash_chain_max)) { + do_hash_chains = CONFIG_BOOLEAN_YES; + + static RRDSET *st_hash_chains = NULL; + static RRDDIM *rd_hash_chains_current = NULL; + static RRDDIM *rd_hash_chains_max = NULL; + + if (unlikely(!st_hash_chains)) { + st_hash_chains = rrdset_create_localhost( + "zfs" + , "hash_chains" + , NULL + , ZFS_FAMILY_HASH + , NULL + , "ZFS ARC Hash Chains" + , "chains" + , plugin + , module + , NETDATA_CHART_PRIO_ZFS_HASH_CHAINS + , update_every + , RRDSET_TYPE_LINE + ); + + rd_hash_chains_current = rrddim_add(st_hash_chains, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_hash_chains_max = rrddim_add(st_hash_chains, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_hash_chains, rd_hash_chains_current, arcstats.hash_chains); + rrddim_set_by_pointer(st_hash_chains, rd_hash_chains_max, arcstats.hash_chain_max); + rrdset_done(st_hash_chains); + } + + // -------------------------------------------------------------------- + +} diff --git a/collectors/proc.plugin/zfs_common.h b/collectors/proc.plugin/zfs_common.h new file mode 100644 index 0000000..9d61de2 --- /dev/null +++ b/collectors/proc.plugin/zfs_common.h @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ZFS_COMMON_H +#define NETDATA_ZFS_COMMON_H 1 + +#include "daemon/common.h" + +#define ZFS_FAMILY_SIZE "size" +#define ZFS_FAMILY_EFFICIENCY "efficiency" +#define ZFS_FAMILY_ACCESSES "accesses" +#define ZFS_FAMILY_OPERATIONS "operations" +#define ZFS_FAMILY_HASH "hashes" + +struct arcstats { + // values + unsigned long long hits; + unsigned long long misses; + unsigned long long demand_data_hits; + unsigned long long demand_data_misses; + unsigned long long demand_metadata_hits; + unsigned long long demand_metadata_misses; + unsigned long long prefetch_data_hits; + unsigned long long prefetch_data_misses; + unsigned long long prefetch_metadata_hits; + unsigned long long prefetch_metadata_misses; + unsigned long long mru_hits; + unsigned long long mru_ghost_hits; + unsigned long long mfu_hits; + unsigned long long mfu_ghost_hits; + unsigned long long deleted; + unsigned long long mutex_miss; + unsigned long long evict_skip; + unsigned long long evict_not_enough; + unsigned long long evict_l2_cached; + unsigned long long evict_l2_eligible; + unsigned long long evict_l2_ineligible; + unsigned long long evict_l2_skip; + unsigned long long hash_elements; + unsigned long long hash_elements_max; + unsigned long long hash_collisions; + unsigned long long hash_chains; + unsigned long long hash_chain_max; + unsigned long long p; + unsigned long long c; + unsigned long long c_min; + unsigned long long c_max; + unsigned long long size; + unsigned long long hdr_size; + unsigned long long data_size; + unsigned long long metadata_size; + unsigned long long other_size; + unsigned long long anon_size; + unsigned long long anon_evictable_data; + unsigned long long anon_evictable_metadata; + unsigned long long mru_size; + unsigned long long mru_evictable_data; + unsigned long long mru_evictable_metadata; + unsigned long long mru_ghost_size; + unsigned long long mru_ghost_evictable_data; + unsigned long long mru_ghost_evictable_metadata; + unsigned long long mfu_size; + unsigned long long mfu_evictable_data; + unsigned long long mfu_evictable_metadata; + unsigned long long mfu_ghost_size; + unsigned long long mfu_ghost_evictable_data; + unsigned long long mfu_ghost_evictable_metadata; + unsigned long long l2_hits; + unsigned long long l2_misses; + unsigned long long l2_feeds; + unsigned long long l2_rw_clash; + unsigned long long l2_read_bytes; + unsigned long long l2_write_bytes; + unsigned long long l2_writes_sent; + unsigned long long l2_writes_done; + unsigned long long l2_writes_error; + unsigned long long l2_writes_lock_retry; + unsigned long long l2_evict_lock_retry; + unsigned long long l2_evict_reading; + unsigned long long l2_evict_l1cached; + unsigned long long l2_free_on_write; + unsigned long long l2_cdata_free_on_write; + unsigned long long l2_abort_lowmem; + unsigned long long l2_cksum_bad; + unsigned long long l2_io_error; + unsigned long long l2_size; + unsigned long long l2_asize; + unsigned long long l2_hdr_size; + unsigned long long l2_compress_successes; + unsigned long long l2_compress_zeros; + unsigned long long l2_compress_failures; + unsigned long long memory_throttle_count; + unsigned long long duplicate_buffers; + unsigned long long duplicate_buffers_size; + unsigned long long duplicate_reads; + unsigned long long memory_direct_count; + unsigned long long memory_indirect_count; + unsigned long long arc_no_grow; + unsigned long long arc_tempreserve; + unsigned long long arc_loaned_bytes; + unsigned long long arc_prune; + unsigned long long arc_meta_used; + unsigned long long arc_meta_limit; + unsigned long long arc_meta_max; + unsigned long long arc_meta_min; + unsigned long long arc_need_free; + unsigned long long arc_sys_free; + + // flags + int l2exist; +}; + +void generate_charts_arcstats(const char *plugin, const char *module, int show_zero_charts, int update_every); +void generate_charts_arc_summary(const char *plugin, const char *module, int show_zero_charts, int update_every); + +#endif //NETDATA_ZFS_COMMON_H diff --git a/collectors/python.d.plugin/Makefile.am b/collectors/python.d.plugin/Makefile.am new file mode 100644 index 0000000..1bbbf8c --- /dev/null +++ b/collectors/python.d.plugin/Makefile.am @@ -0,0 +1,236 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +CLEANFILES = \ + python.d.plugin \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_libconfig_DATA = \ + python.d.conf \ + $(NULL) + +dist_plugins_SCRIPTS = \ + python.d.plugin \ + $(NULL) + +dist_noinst_DATA = \ + python.d.plugin.in \ + README.md \ + $(NULL) + +dist_python_SCRIPTS = \ + $(NULL) + +dist_python_DATA = \ + $(NULL) + +userpythonconfigdir=$(configdir)/python.d +dist_userpythonconfig_DATA = \ + $(NULL) + +# Explicitly install directories to avoid permission issues due to umask +install-exec-local: + $(INSTALL) -d $(DESTDIR)$(userpythonconfigdir) + +pythonconfigdir=$(libconfigdir)/python.d +dist_pythonconfig_DATA = \ + $(NULL) + +include adaptec_raid/Makefile.inc +include alarms/Makefile.inc +include am2320/Makefile.inc +include anomalies/Makefile.inc +include beanstalk/Makefile.inc +include bind_rndc/Makefile.inc +include boinc/Makefile.inc +include ceph/Makefile.inc +include changefinder/Makefile.inc +include dockerd/Makefile.inc +include dovecot/Makefile.inc +include example/Makefile.inc +include exim/Makefile.inc +include fail2ban/Makefile.inc +include gearman/Makefile.inc +include go_expvar/Makefile.inc +include haproxy/Makefile.inc +include hddtemp/Makefile.inc +include hpssa/Makefile.inc +include icecast/Makefile.inc +include ipfs/Makefile.inc +include litespeed/Makefile.inc +include logind/Makefile.inc +include megacli/Makefile.inc +include memcached/Makefile.inc +include mongodb/Makefile.inc +include monit/Makefile.inc +include nvidia_smi/Makefile.inc +include nsd/Makefile.inc +include ntpd/Makefile.inc +include openldap/Makefile.inc +include oracledb/Makefile.inc +include pandas/Makefile.inc +include postfix/Makefile.inc +include proxysql/Makefile.inc +include puppet/Makefile.inc +include rabbitmq/Makefile.inc +include rethinkdbs/Makefile.inc +include retroshare/Makefile.inc +include riakkv/Makefile.inc +include samba/Makefile.inc +include sensors/Makefile.inc +include smartd_log/Makefile.inc +include spigotmc/Makefile.inc +include springboot/Makefile.inc +include squid/Makefile.inc +include tomcat/Makefile.inc +include tor/Makefile.inc +include traefik/Makefile.inc +include uwsgi/Makefile.inc +include varnish/Makefile.inc +include w1sensor/Makefile.inc +include zscores/Makefile.inc + +pythonmodulesdir=$(pythondir)/python_modules +dist_pythonmodules_DATA = \ + python_modules/__init__.py \ + $(NULL) + +basesdir=$(pythonmodulesdir)/bases +dist_bases_DATA = \ + python_modules/bases/__init__.py \ + python_modules/bases/charts.py \ + python_modules/bases/collection.py \ + python_modules/bases/loaders.py \ + python_modules/bases/loggers.py \ + $(NULL) + +bases_framework_servicesdir=$(basesdir)/FrameworkServices +dist_bases_framework_services_DATA = \ + python_modules/bases/FrameworkServices/__init__.py \ + python_modules/bases/FrameworkServices/ExecutableService.py \ + python_modules/bases/FrameworkServices/LogService.py \ + python_modules/bases/FrameworkServices/MySQLService.py \ + python_modules/bases/FrameworkServices/SimpleService.py \ + python_modules/bases/FrameworkServices/SocketService.py \ + python_modules/bases/FrameworkServices/UrlService.py \ + $(NULL) + +third_partydir=$(pythonmodulesdir)/third_party +dist_third_party_DATA = \ + python_modules/third_party/__init__.py \ + python_modules/third_party/ordereddict.py \ + python_modules/third_party/lm_sensors.py \ + python_modules/third_party/mcrcon.py \ + python_modules/third_party/boinc_client.py \ + python_modules/third_party/monotonic.py \ + python_modules/third_party/filelock.py \ + $(NULL) + +pythonyaml2dir=$(pythonmodulesdir)/pyyaml2 +dist_pythonyaml2_DATA = \ + python_modules/pyyaml2/__init__.py \ + python_modules/pyyaml2/composer.py \ + python_modules/pyyaml2/constructor.py \ + python_modules/pyyaml2/cyaml.py \ + python_modules/pyyaml2/dumper.py \ + python_modules/pyyaml2/emitter.py \ + python_modules/pyyaml2/error.py \ + python_modules/pyyaml2/events.py \ + python_modules/pyyaml2/loader.py \ + python_modules/pyyaml2/nodes.py \ + python_modules/pyyaml2/parser.py \ + python_modules/pyyaml2/reader.py \ + python_modules/pyyaml2/representer.py \ + python_modules/pyyaml2/resolver.py \ + python_modules/pyyaml2/scanner.py \ + python_modules/pyyaml2/serializer.py \ + python_modules/pyyaml2/tokens.py \ + $(NULL) + +pythonyaml3dir=$(pythonmodulesdir)/pyyaml3 +dist_pythonyaml3_DATA = \ + python_modules/pyyaml3/__init__.py \ + python_modules/pyyaml3/composer.py \ + python_modules/pyyaml3/constructor.py \ + python_modules/pyyaml3/cyaml.py \ + python_modules/pyyaml3/dumper.py \ + python_modules/pyyaml3/emitter.py \ + python_modules/pyyaml3/error.py \ + python_modules/pyyaml3/events.py \ + python_modules/pyyaml3/loader.py \ + python_modules/pyyaml3/nodes.py \ + python_modules/pyyaml3/parser.py \ + python_modules/pyyaml3/reader.py \ + python_modules/pyyaml3/representer.py \ + python_modules/pyyaml3/resolver.py \ + python_modules/pyyaml3/scanner.py \ + python_modules/pyyaml3/serializer.py \ + python_modules/pyyaml3/tokens.py \ + $(NULL) + +python_urllib3dir=$(pythonmodulesdir)/urllib3 +dist_python_urllib3_DATA = \ + python_modules/urllib3/__init__.py \ + python_modules/urllib3/_collections.py \ + python_modules/urllib3/connection.py \ + python_modules/urllib3/connectionpool.py \ + python_modules/urllib3/exceptions.py \ + python_modules/urllib3/fields.py \ + python_modules/urllib3/filepost.py \ + python_modules/urllib3/response.py \ + python_modules/urllib3/poolmanager.py \ + python_modules/urllib3/request.py \ + $(NULL) + +python_urllib3_utildir=$(python_urllib3dir)/util +dist_python_urllib3_util_DATA = \ + python_modules/urllib3/util/__init__.py \ + python_modules/urllib3/util/connection.py \ + python_modules/urllib3/util/request.py \ + python_modules/urllib3/util/response.py \ + python_modules/urllib3/util/retry.py \ + python_modules/urllib3/util/selectors.py \ + python_modules/urllib3/util/ssl_.py \ + python_modules/urllib3/util/timeout.py \ + python_modules/urllib3/util/url.py \ + python_modules/urllib3/util/wait.py \ + $(NULL) + +python_urllib3_packagesdir=$(python_urllib3dir)/packages +dist_python_urllib3_packages_DATA = \ + python_modules/urllib3/packages/__init__.py \ + python_modules/urllib3/packages/ordered_dict.py \ + python_modules/urllib3/packages/six.py \ + $(NULL) + +python_urllib3_backportsdir=$(python_urllib3_packagesdir)/backports +dist_python_urllib3_backports_DATA = \ + python_modules/urllib3/packages/backports/__init__.py \ + python_modules/urllib3/packages/backports/makefile.py \ + $(NULL) + +python_urllib3_ssl_match_hostnamedir=$(python_urllib3_packagesdir)/ssl_match_hostname +dist_python_urllib3_ssl_match_hostname_DATA = \ + python_modules/urllib3/packages/ssl_match_hostname/__init__.py \ + python_modules/urllib3/packages/ssl_match_hostname/_implementation.py \ + $(NULL) + +python_urllib3_contribdir=$(python_urllib3dir)/contrib +dist_python_urllib3_contrib_DATA = \ + python_modules/urllib3/contrib/__init__.py \ + python_modules/urllib3/contrib/appengine.py \ + python_modules/urllib3/contrib/ntlmpool.py \ + python_modules/urllib3/contrib/pyopenssl.py \ + python_modules/urllib3/contrib/securetransport.py \ + python_modules/urllib3/contrib/socks.py \ + $(NULL) + +python_urllib3_securetransportdir=$(python_urllib3_contribdir)/_securetransport +dist_python_urllib3_securetransport_DATA = \ + python_modules/urllib3/contrib/_securetransport/__init__.py \ + python_modules/urllib3/contrib/_securetransport/bindings.py \ + python_modules/urllib3/contrib/_securetransport/low_level.py \ + $(NULL) diff --git a/collectors/python.d.plugin/README.md b/collectors/python.d.plugin/README.md new file mode 100644 index 0000000..2f5ebfc --- /dev/null +++ b/collectors/python.d.plugin/README.md @@ -0,0 +1,270 @@ + + +# python.d.plugin + +`python.d.plugin` is a Netdata external plugin. It is an **orchestrator** for data collection modules written in `python`. + +1. It runs as an independent process `ps fax` shows it +2. It is started and stopped automatically by Netdata +3. It communicates with Netdata via a unidirectional pipe (sending data to the `netdata` daemon) +4. Supports any number of data collection **modules** +5. Allows each **module** to have one or more data collection **jobs** +6. Each **job** is collecting one or more metrics from a single data source + +## Disclaimer + +All third party libraries should be installed system-wide or in `python_modules` directory. +Module configurations are written in YAML and **pyYAML is required**. + +Every configuration file must have one of two formats: + +- Configuration for only one job: + +```yaml +update_every : 2 # update frequency +priority : 20000 # where it is shown on dashboard + +other_var1 : bla # variables passed to module +other_var2 : alb +``` + +- Configuration for many jobs (ex. mysql): + +```yaml +# module defaults: +update_every : 2 +priority : 20000 + +local: # job name + update_every : 5 # job update frequency + other_var1 : some_val # module specific variable + +other_job: + priority : 5 # job position on dashboard + other_var2 : val # module specific variable +``` + +`update_every` and `priority` are always optional. + +## How to debug a python module + +``` +# become user netdata +sudo su -s /bin/bash netdata +``` + +Depending on where Netdata was installed, execute one of the following commands to trace the execution of a python module: + +``` +# execute the plugin in debug mode, for a specific module +/opt/netdata/usr/libexec/netdata/plugins.d/python.d.plugin debug trace +/usr/libexec/netdata/plugins.d/python.d.plugin debug trace +``` + +Where `[module]` is the directory name under + +**Note**: If you would like execute a collector in debug mode while it is still running by Netdata, you can pass the `nolock` CLI option to the above commands. + +## How to write a new module + +Writing new python module is simple. You just need to remember to include 5 major things: + +- **ORDER** global list +- **CHART** global dictionary +- **Service** class +- **\_get_data** method + +If you plan to submit the module in a PR, make sure and go through the [PR checklist for new modules](#pull-request-checklist-for-python-plugins) beforehand to make sure you have updated all the files you need to. + +For a quick start, you can look at the [example +plugin](https://raw.githubusercontent.com/netdata/netdata/master/collectors/python.d.plugin/example/example.chart.py). + +**Note**: If you are working 'locally' on a new collector and would like to run it in an already installed and running +Netdata (as opposed to having to install Netdata from source again with your new changes) to can copy over the relevant +file to where Netdata expects it and then either `sudo systemctl restart netdata` to have it be picked up and used by +Netdata or you can just run the updated collector in debug mode by following a process like below (this assumes you have +[installed Netdata from a GitHub fork](https://learn.netdata.cloud/docs/agent/packaging/installer/methods/manual) you +have made to do your development on). + +```bash +# clone your fork (done once at the start but shown here for clarity) +#git clone --branch my-example-collector https://github.com/mygithubusername/netdata.git --depth=100 --recursive +# go into your netdata source folder +cd netdata +# git pull your latest changes (assuming you built from a fork you are using to develop on) +git pull +# instead of running the installer we can just copy over the updated collector files +#sudo ./netdata-installer.sh --dont-wait +# copy over the file you have updated locally (pretending we are working on the 'example' collector) +sudo cp collectors/python.d.plugin/example/example.chart.py /usr/libexec/netdata/python.d/ +# become user netdata +sudo su -s /bin/bash netdata +# run your updated collector in debug mode to see if it works without having to reinstall netdata +/usr/libexec/netdata/plugins.d/python.d.plugin example debug trace nolock +``` + +### Global variables `ORDER` and `CHART` + +`ORDER` list should contain the order of chart ids. Example: + +```py +ORDER = ['first_chart', 'second_chart', 'third_chart'] +``` + +`CHART` dictionary is a little bit trickier. It should contain the chart definition in following format: + +```py +CHART = { + id: { + 'options': [name, title, units, family, context, charttype], + 'lines': [ + [unique_dimension_name, name, algorithm, multiplier, divisor] + ]} +``` + +All names are better explained in the [External Plugins](/collectors/plugins.d/README.md) section. +Parameters like `priority` and `update_every` are handled by `python.d.plugin`. + +### `Service` class + +Every module needs to implement its own `Service` class. This class should inherit from one of the framework classes: + +- `SimpleService` +- `UrlService` +- `SocketService` +- `LogService` +- `ExecutableService` + +Also it needs to invoke the parent class constructor in a specific way as well as assign global variables to class variables. + +Simple example: + +```py +from base import UrlService +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS +``` + +### `_get_data` collector/parser + +This method should grab raw data from `_get_raw_data`, parse it, and return a dictionary where keys are unique dimension names or `None` if no data is collected. + +Example: + +```py +def _get_data(self): + try: + raw = self._get_raw_data().split(" ") + return {'active': int(raw[2])} + except (ValueError, AttributeError): + return None +``` + +# More about framework classes + +Every framework class has some user-configurable variables which are specific to this particular class. Those variables should have default values initialized in the child class constructor. + +If module needs some additional user-configurable variable, it can be accessed from the `self.configuration` list and assigned in constructor or custom `check` method. Example: + +```py +def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + try: + self.baseurl = str(self.configuration['baseurl']) + except (KeyError, TypeError): + self.baseurl = "http://localhost:5001" +``` + +Classes implement `_get_raw_data` which should be used to grab raw data. This method usually returns a list of strings. + +### `SimpleService` + +_This is last resort class, if a new module cannot be written by using other framework class this one can be used._ + +_Example: `ceph`, `sensors`_ + +It is the lowest-level class which implements most of module logic, like: + +- threading +- handling run times +- chart formatting +- logging +- chart creation and updating + +### `LogService` + +_Examples: `apache_cache`, `nginx_log`_ + +_Variable from config file_: `log_path`. + +Object created from this class reads new lines from file specified in `log_path` variable. It will check if file exists and is readable. Also `_get_raw_data` returns list of strings where each string is one line from file specified in `log_path`. + +### `ExecutableService` + +_Examples: `exim`, `postfix`_ + +_Variable from config file_: `command`. + +This allows to execute a shell command in a secure way. It will check for invalid characters in `command` variable and won't proceed if there is one of: + +- '&' +- '|' +- ';' +- '>' +- '\<' + +For additional security it uses python `subprocess.Popen` (without `shell=True` option) to execute command. Command can be specified with absolute or relative name. When using relative name, it will try to find `command` in `PATH` environment variable as well as in `/sbin` and `/usr/sbin`. + +`_get_raw_data` returns list of decoded lines returned by `command`. + +### UrlService + +_Examples: `apache`, `nginx`, `tomcat`_ + +_Multiple Endpoints (urls) Examples: [`rabbitmq`](/collectors/python.d.plugin/rabbitmq/README.md) (simpler). + + +_Variables from config file_: `url`, `user`, `pass`. + +If data is grabbed by accessing service via HTTP protocol, this class can be used. It can handle HTTP Basic Auth when specified with `user` and `pass` credentials. + +Please note that the config file can use different variables according to the specification of each module. + +`_get_raw_data` returns list of utf-8 decoded strings (lines). + +### SocketService + +_Examples: `dovecot`, `redis`_ + +_Variables from config file_: `unix_socket`, `host`, `port`, `request`. + +Object will try execute `request` using either `unix_socket` or TCP/IP socket with combination of `host` and `port`. This can access unix sockets with SOCK_STREAM or SOCK_DGRAM protocols and TCP/IP sockets in version 4 and 6 with SOCK_STREAM setting. + +Sockets are accessed in non-blocking mode with 15 second timeout. + +After every execution of `_get_raw_data` socket is closed, to prevent this module needs to set `_keep_alive` variable to `True` and implement custom `_check_raw_data` method. + +`_check_raw_data` should take raw data and return `True` if all data is received otherwise it should return `False`. Also it should do it in fast and efficient way. + +## Pull Request Checklist for Python Plugins + +This is a generic checklist for submitting a new Python plugin for Netdata. It is by no means comprehensive. + +At minimum, to be buildable and testable, the PR needs to include: + +- The module itself, following proper naming conventions: `collectors/python.d.plugin//.chart.py` +- A README.md file for the plugin under `collectors/python.d.plugin/`. +- The configuration file for the module: `collectors/python.d.plugin//.conf`. Python config files are in YAML format, and should include comments describing what options are present. The instructions are also needed in the configuration section of the README.md +- A basic configuration for the plugin in the appropriate global config file: `collectors/python.d.plugin/python.d.conf`, which is also in YAML format. Either add a line that reads `# : yes` if the module is to be enabled by default, or one that reads `: no` if it is to be disabled by default. +- A makefile for the plugin at `collectors/python.d.plugin//Makefile.inc`. Check an existing plugin for what this should look like. +- A line in `collectors/python.d.plugin/Makefile.am` including the above-mentioned makefile. Place it with the other plugin includes (please keep the includes sorted alphabetically). +- Optionally, chart information in `web/gui/dashboard_info.js`. This generally involves specifying a name and icon for the section, and may include descriptions for the section or individual charts. +- Optionally, some default alarm configurations for your collector in `health/health.d/.conf` and a line adding `.conf` in `health/Makefile.am`. + + diff --git a/collectors/python.d.plugin/adaptec_raid/Makefile.inc b/collectors/python.d.plugin/adaptec_raid/Makefile.inc new file mode 100644 index 0000000..716cdb2 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += adaptec_raid/adaptec_raid.chart.py +dist_pythonconfig_DATA += adaptec_raid/adaptec_raid.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += adaptec_raid/README.md adaptec_raid/Makefile.inc + diff --git a/collectors/python.d.plugin/adaptec_raid/README.md b/collectors/python.d.plugin/adaptec_raid/README.md new file mode 100644 index 0000000..da5d13b --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/README.md @@ -0,0 +1,80 @@ + + +# Adaptec RAID controller monitoring with Netdata + +Collects logical and physical devices metrics using `arcconf` command-line utility. + +Executed commands: + +- `sudo -n arcconf GETCONFIG 1 LD` +- `sudo -n arcconf GETCONFIG 1 PD` + +## Requirements + +The module uses `arcconf`, which can only be executed by `root`. It uses +`sudo` and assumes that it is configured such that the `netdata` user can execute `arcconf` as root without a password. + +- Add to your `/etc/sudoers` file: + +`which arcconf` shows the full path to the binary. + +```bash +netdata ALL=(root) NOPASSWD: /path/to/arcconf +``` + +- Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + +The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `arcconf` using `sudo`. + + +As the `root` user, do the following: + +```cmd +mkdir /etc/systemd/system/netdata.service.d +echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf +systemctl daemon-reload +systemctl restart netdata.service +``` + +## Charts + +- Logical Device Status +- Physical Device State +- Physical Device S.M.A.R.T warnings +- Physical Device Temperature + +## Enable the collector + +The `adaptec_raid` collector is disabled by default. To enable it, use `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` +file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `adaptec_raid` setting to `yes`. Save the file and restart the Netdata Agent with `sudo +systemctl restart netdata`, or the [appropriate method](/docs/configure/start-stop-restart.md) for your system. + +## Configuration + +Edit the `python.d/adaptec_raid.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/adaptec_raid.conf +``` + +![image](https://user-images.githubusercontent.com/22274335/47278133-6d306680-d601-11e8-87c2-cc9c0f42d686.png) + +--- + + diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py new file mode 100644 index 0000000..bb59d88 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +# Description: adaptec_raid netdata python.d module +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +import re +from copy import deepcopy + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +disabled_by_default = True + +update_every = 5 + +ORDER = [ + 'ld_status', + 'pd_state', + 'pd_smart_warnings', + 'pd_temperature', +] + +CHARTS = { + 'ld_status': { + 'options': [None, 'Status of logical devices (1: Failed or Degraded)', 'bool', 'logical devices', + 'adaptec_raid.ld_status', 'line'], + 'lines': [] + }, + 'pd_state': { + 'options': [None, 'State of physical devices (1: not Online)', 'bool', 'physical devices', + 'adaptec_raid.pd_state', 'line'], + 'lines': [] + }, + 'pd_smart_warnings': { + 'options': [None, 'S.M.A.R.T warnings', 'count', 'physical devices', + 'adaptec_raid.smart_warnings', 'line'], + 'lines': [] + }, + 'pd_temperature': { + 'options': [None, 'Temperature', 'celsius', 'physical devices', 'adaptec_raid.temperature', 'line'], + 'lines': [] + }, +} + +SUDO = 'sudo' +ARCCONF = 'arcconf' + +BAD_LD_STATUS = ( + 'Degraded', + 'Failed', +) + +GOOD_PD_STATUS = ( + 'Online', +) + +RE_LD = re.compile( + r'Logical [dD]evice number\s+([0-9]+).*?' + r'Status of [lL]ogical [dD]evice\s+: ([a-zA-Z]+)' +) + + +def find_lds(d): + d = ' '.join(v.strip() for v in d) + return [LD(*v) for v in RE_LD.findall(d)] + + +def find_pds(d): + pds = list() + pd = PD() + + for row in d: + row = row.strip() + if row.startswith('Device #'): + pd = PD() + pd.id = row.split('#')[-1] + elif not pd.id: + continue + + if row.startswith('State'): + v = row.split()[-1] + pd.state = v + elif row.startswith('S.M.A.R.T. warnings'): + v = row.split()[-1] + pd.smart_warnings = v + elif row.startswith('Temperature'): + v = row.split(':')[-1].split()[0] + pd.temperature = v + elif row.startswith('NCQ status'): + if pd.id and pd.state and pd.smart_warnings: + pds.append(pd) + pd = PD() + + return pds + + +class LD: + def __init__(self, ld_id, status): + self.id = ld_id + self.status = status + + def data(self): + return { + 'ld_{0}_status'.format(self.id): int(self.status in BAD_LD_STATUS) + } + + +class PD: + def __init__(self): + self.id = None + self.state = None + self.smart_warnings = None + self.temperature = None + + def data(self): + data = { + 'pd_{0}_state'.format(self.id): int(self.state not in GOOD_PD_STATUS), + 'pd_{0}_smart_warnings'.format(self.id): self.smart_warnings, + } + if self.temperature and self.temperature.isdigit(): + data['pd_{0}_temperature'.format(self.id)] = self.temperature + + return data + + +class Arcconf: + def __init__(self, arcconf): + self.arcconf = arcconf + + def ld_info(self): + return [self.arcconf, 'GETCONFIG', '1', 'LD'] + + def pd_info(self): + return [self.arcconf, 'GETCONFIG', '1', 'PD'] + + +# TODO: hardcoded sudo... +class SudoArcconf: + def __init__(self, arcconf, sudo): + self.arcconf = Arcconf(arcconf) + self.sudo = sudo + + def ld_info(self): + return [self.sudo, '-n'] + self.arcconf.ld_info() + + def pd_info(self): + return [self.sudo, '-n'] + self.arcconf.pd_info() + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.use_sudo = self.configuration.get('use_sudo', True) + self.arcconf = None + + def execute(self, command, stderr=False): + return self._get_raw_data(command=command, stderr=stderr) + + def check(self): + arcconf = find_binary(ARCCONF) + if not arcconf: + self.error('can\'t locate "{0}" binary'.format(ARCCONF)) + return False + + sudo = find_binary(SUDO) + if self.use_sudo: + if not sudo: + self.error('can\'t locate "{0}" binary'.format(SUDO)) + return False + err = self.execute([sudo, '-n', '-v'], True) + if err: + self.error(' '.join(err)) + return False + + if self.use_sudo: + self.arcconf = SudoArcconf(arcconf, sudo) + else: + self.arcconf = Arcconf(arcconf) + + lds = self.get_lds() + if not lds: + return False + + self.debug('discovered logical devices ids: {0}'.format([ld.id for ld in lds])) + + pds = self.get_pds() + if not pds: + return False + + self.debug('discovered physical devices ids: {0}'.format([pd.id for pd in pds])) + + self.update_charts(lds, pds) + return True + + def get_data(self): + data = dict() + + for ld in self.get_lds(): + data.update(ld.data()) + + for pd in self.get_pds(): + data.update(pd.data()) + + return data + + def get_lds(self): + raw_lds = self.execute(self.arcconf.ld_info()) + if not raw_lds: + return None + + lds = find_lds(raw_lds) + if not lds: + self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.ld_info()))) + self.debug('output: {0}'.format(raw_lds)) + return None + return lds + + def get_pds(self): + raw_pds = self.execute(self.arcconf.pd_info()) + if not raw_pds: + return None + + pds = find_pds(raw_pds) + if not pds: + self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.pd_info()))) + self.debug('output: {0}'.format(raw_pds)) + return None + return pds + + def update_charts(self, lds, pds): + charts = self.definitions + for ld in lds: + dim = ['ld_{0}_status'.format(ld.id), 'ld {0}'.format(ld.id)] + charts['ld_status']['lines'].append(dim) + + for pd in pds: + dim = ['pd_{0}_state'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_state']['lines'].append(dim) + + dim = ['pd_{0}_smart_warnings'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_smart_warnings']['lines'].append(dim) + + dim = ['pd_{0}_temperature'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_temperature']['lines'].append(dim) diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf new file mode 100644 index 0000000..fa462ec --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf @@ -0,0 +1,53 @@ +# netdata python.d.plugin configuration for adaptec raid +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/alarms/Makefile.inc b/collectors/python.d.plugin/alarms/Makefile.inc new file mode 100644 index 0000000..c2de117 --- /dev/null +++ b/collectors/python.d.plugin/alarms/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += alarms/alarms.chart.py +dist_pythonconfig_DATA += alarms/alarms.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += alarms/README.md alarms/Makefile.inc + diff --git a/collectors/python.d.plugin/alarms/README.md b/collectors/python.d.plugin/alarms/README.md new file mode 100644 index 0000000..8dc666f --- /dev/null +++ b/collectors/python.d.plugin/alarms/README.md @@ -0,0 +1,66 @@ + + +# Alarms - graphing Netdata alarm states over time + +This collector creates an 'Alarms' menu with one line plot showing alarm states over time. Alarm states are mapped to integer values according to the below default mapping. Any alarm status types not in this mapping will be ignored (Note: This mapping can be changed by editing the `status_map` in the `alarms.conf` file). If you would like to learn more about the different alarm statuses check out the docs [here](https://learn.netdata.cloud/docs/agent/health/reference#alarm-statuses). + +``` +{ + 'CLEAR': 0, + 'WARNING': 1, + 'CRITICAL': 2 +} +``` + +## Charts + +Below is an example of the chart produced when running `stress-ng --all 2` for a few minutes. You can see the various warning and critical alarms raised. + +![alarms collector](https://user-images.githubusercontent.com/1153921/101641493-0b086a80-39ef-11eb-9f55-0713e5dfb19f.png) + +## Configuration + +Enable the collector and [restart Netdata](/docs/configure/start-stop-restart.md). + +```bash +cd /etc/netdata/ +sudo ./edit-config python.d.conf +# Set `alarms: no` to `alarms: yes` +sudo systemctl restart netdata +``` + +If needed, edit the `python.d/alarms.conf` configuration file using `edit-config` from the your agent's [config +directory](/docs/configure/nodes.md), which is usually at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/alarms.conf +``` + +The `alarms` specific part of the `alarms.conf` file should look like this: + +```yaml +# what url to pull data from +local: + url: 'http://127.0.0.1:19999/api/v1/alarms?all' + # define how to map alarm status to numbers for the chart + status_map: + CLEAR: 0 + WARNING: 1 + CRITICAL: 2 + # set to true to include a chart with calculated alarm values over time + collect_alarm_values: false + # define the type of chart for plotting status over time e.g. 'line' or 'stacked' + alarm_status_chart_type: 'line' + # a "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only + # alarms with "cpu" or "load" in alarm name. Default includes all. + alarm_contains_words: '' + # a "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude + # all alarms with "cpu" or "load" in alarm name. Default excludes None. + alarm_excludes_words: '' +``` + +It will default to pulling all alarms at each time step from the Netdata rest api at `http://127.0.0.1:19999/api/v1/alarms?all` diff --git a/collectors/python.d.plugin/alarms/alarms.chart.py b/collectors/python.d.plugin/alarms/alarms.chart.py new file mode 100644 index 0000000..d194273 --- /dev/null +++ b/collectors/python.d.plugin/alarms/alarms.chart.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Description: alarms netdata python.d module +# Author: andrewm4894 +# SPDX-License-Identifier: GPL-3.0-or-later + +from json import loads + +from bases.FrameworkServices.UrlService import UrlService + +update_every = 10 +disabled_by_default = True + + +def charts_template(sm, alarm_status_chart_type='line'): + order = [ + 'alarms', + 'values' + ] + + mappings = ', '.join(['{0}={1}'.format(k, v) for k, v in sm.items()]) + charts = { + 'alarms': { + 'options': [None, 'Alarms ({0})'.format(mappings), 'status', 'status', 'alarms.status', alarm_status_chart_type], + 'lines': [], + 'variables': [ + ['alarms_num'], + ] + }, + 'values': { + 'options': [None, 'Alarm Values', 'value', 'value', 'alarms.value', 'line'], + 'lines': [], + } + } + return order, charts + + +DEFAULT_STATUS_MAP = {'CLEAR': 0, 'WARNING': 1, 'CRITICAL': 2} +DEFAULT_URL = 'http://127.0.0.1:19999/api/v1/alarms?all' +DEFAULT_COLLECT_ALARM_VALUES = False +DEFAULT_ALARM_STATUS_CHART_TYPE = 'line' +DEFAULT_ALARM_CONTAINS_WORDS = '' +DEFAULT_ALARM_EXCLUDES_WORDS = '' + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.sm = self.configuration.get('status_map', DEFAULT_STATUS_MAP) + self.alarm_status_chart_type = self.configuration.get('alarm_status_chart_type', DEFAULT_ALARM_STATUS_CHART_TYPE) + self.order, self.definitions = charts_template(self.sm, self.alarm_status_chart_type) + self.url = self.configuration.get('url', DEFAULT_URL) + self.collect_alarm_values = bool(self.configuration.get('collect_alarm_values', DEFAULT_COLLECT_ALARM_VALUES)) + self.collected_dims = {'alarms': set(), 'values': set()} + self.alarm_contains_words = self.configuration.get('alarm_contains_words', DEFAULT_ALARM_CONTAINS_WORDS) + self.alarm_contains_words_list = [alarm_contains_word.lstrip(' ').rstrip(' ') for alarm_contains_word in self.alarm_contains_words.split(',')] + self.alarm_excludes_words = self.configuration.get('alarm_excludes_words', DEFAULT_ALARM_EXCLUDES_WORDS) + self.alarm_excludes_words_list = [alarm_excludes_word.lstrip(' ').rstrip(' ') for alarm_excludes_word in self.alarm_excludes_words.split(',')] + + def _get_data(self): + raw_data = self._get_raw_data() + if raw_data is None: + return None + + raw_data = loads(raw_data) + alarms = raw_data.get('alarms', {}) + if self.alarm_contains_words != '': + alarms = {alarm_name: alarms[alarm_name] for alarm_name in alarms for alarm_contains_word in + self.alarm_contains_words_list if alarm_contains_word in alarm_name} + if self.alarm_excludes_words != '': + alarms = {alarm_name: alarms[alarm_name] for alarm_name in alarms for alarm_excludes_word in + self.alarm_excludes_words_list if alarm_excludes_word not in alarm_name} + + data = {a: self.sm[alarms[a]['status']] for a in alarms if alarms[a]['status'] in self.sm} + self.update_charts('alarms', data) + data['alarms_num'] = len(data) + + if self.collect_alarm_values: + data_values = {'{}_value'.format(a): alarms[a]['value'] * 100 for a in alarms if 'value' in alarms[a] and alarms[a]['value'] is not None} + self.update_charts('values', data_values, divisor=100) + data.update(data_values) + + return data + + def update_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1): + if not self.charts: + return + + for dim in data: + if dim not in self.collected_dims[chart]: + self.collected_dims[chart].add(dim) + self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor]) + + for dim in list(self.collected_dims[chart]): + if dim not in data: + self.collected_dims[chart].remove(dim) + self.charts[chart].del_dimension(dim, hide=False) diff --git a/collectors/python.d.plugin/alarms/alarms.conf b/collectors/python.d.plugin/alarms/alarms.conf new file mode 100644 index 0000000..06d76c3 --- /dev/null +++ b/collectors/python.d.plugin/alarms/alarms.conf @@ -0,0 +1,60 @@ +# netdata python.d.plugin configuration for example +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 10 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) + +# what url to pull data from +local: + url: 'http://127.0.0.1:19999/api/v1/alarms?all' + # define how to map alarm status to numbers for the chart + status_map: + CLEAR: 0 + WARNING: 1 + CRITICAL: 2 + # set to true to include a chart with calculated alarm values over time + collect_alarm_values: false + # define the type of chart for plotting status over time e.g. 'line' or 'stacked' + alarm_status_chart_type: 'line' + # a "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only + # alarms with "cpu" or "load" in alarm name. Default includes all. + alarm_contains_words: '' + # a "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude + # all alarms with "cpu" or "load" in alarm name. Default excludes None. + alarm_excludes_words: '' diff --git a/collectors/python.d.plugin/am2320/Makefile.inc b/collectors/python.d.plugin/am2320/Makefile.inc new file mode 100644 index 0000000..48e5a88 --- /dev/null +++ b/collectors/python.d.plugin/am2320/Makefile.inc @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# install these files +dist_python_DATA += am2320/am2320.chart.py +dist_pythonconfig_DATA += am2320/am2320.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += am2320/README.md am2320/Makefile.inc diff --git a/collectors/python.d.plugin/am2320/README.md b/collectors/python.d.plugin/am2320/README.md new file mode 100644 index 0000000..3503d7c --- /dev/null +++ b/collectors/python.d.plugin/am2320/README.md @@ -0,0 +1,53 @@ + + +# AM2320 sensor monitoring with netdata + +Displays a graph of the temperature and humidity from a AM2320 sensor. + +## Requirements + - Adafruit Circuit Python AM2320 library + - Adafruit AM2320 I2C sensor + - Python 3 (Adafruit libraries are not Python 2.x compatible) + + +It produces the following charts: +1. **Temperature** +2. **Humidity** + +## Configuration + +Edit the `python.d/am2320.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/am2320.conf +``` + +Raspberry Pi Instructions: + +Hardware install: +Connect the am2320 to the Raspberry Pi I2C pins + +Raspberry Pi 3B/4 Pins: + +- Board 3.3V (pin 1) to sensor VIN (pin 1) +- Board SDA (pin 3) to sensor SDA (pin 2) +- Board GND (pin 6) to sensor GND (pin 3) +- Board SCL (pin 5) to sensor SCL (pin 4) + +You may also need to add two I2C pullup resistors if your board does not already have them. The Raspberry Pi does have internal pullup resistors but it doesn't hurt to add them anyway. You can use 2.2K - 10K but we will just use 10K. The resistors go from VDD to SCL and SDA each. + +Software install: +- `sudo pip3 install adafruit-circuitpython-am2320` +- edit `/etc/netdata/netdata.conf` +- find `[plugin:python.d]` +- add `command options = -ppython3` +- save the file. +- restart the netdata service. +- check the dashboard. + diff --git a/collectors/python.d.plugin/am2320/am2320.chart.py b/collectors/python.d.plugin/am2320/am2320.chart.py new file mode 100644 index 0000000..8e66544 --- /dev/null +++ b/collectors/python.d.plugin/am2320/am2320.chart.py @@ -0,0 +1,68 @@ +# _*_ coding: utf-8 _*_ +# Description: AM2320 netdata module +# Author: tommybuck +# SPDX-License-Identifier: GPL-3.0-or-Later + +try: + import board + import busio + import adafruit_am2320 + + HAS_AM2320 = True +except ImportError: + HAS_AM2320 = False + +from bases.FrameworkServices.SimpleService import SimpleService + +ORDER = [ + 'temperature', + 'humidity', +] + +CHARTS = { + 'temperature': { + 'options': [None, 'Temperature', 'celsius', 'temperature', 'am2320.temperature', 'line'], + 'lines': [ + ['temperature'] + ] + }, + 'humidity': { + 'options': [None, 'Relative Humidity', 'percentage', 'humidity', 'am2320.humidity', 'line'], + 'lines': [ + ['humidity'] + ] + } +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.am = None + + def check(self): + if not HAS_AM2320: + self.error("Could not find the adafruit-circuitpython-am2320 package.") + return False + + try: + i2c = busio.I2C(board.SCL, board.SDA) + self.am = adafruit_am2320.AM2320(i2c) + except ValueError as error: + self.error("error on creating I2C shared bus : {0}".format(error)) + return False + + return True + + def get_data(self): + try: + return { + 'temperature': self.am.temperature, + 'humidity': self.am.relative_humidity, + } + + except (OSError, RuntimeError) as error: + self.error(error) + return None diff --git a/collectors/python.d.plugin/am2320/am2320.conf b/collectors/python.d.plugin/am2320/am2320.conf new file mode 100644 index 0000000..c6b9885 --- /dev/null +++ b/collectors/python.d.plugin/am2320/am2320.conf @@ -0,0 +1,68 @@ +# netdata python.d.plugin configuration for am2320 temperature/humidity sensor +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, example also supports the following: +# +# - none +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) diff --git a/collectors/python.d.plugin/anomalies/Makefile.inc b/collectors/python.d.plugin/anomalies/Makefile.inc new file mode 100644 index 0000000..94937b3 --- /dev/null +++ b/collectors/python.d.plugin/anomalies/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += anomalies/anomalies.chart.py +dist_pythonconfig_DATA += anomalies/anomalies.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += anomalies/README.md anomalies/Makefile.inc + diff --git a/collectors/python.d.plugin/anomalies/README.md b/collectors/python.d.plugin/anomalies/README.md new file mode 100644 index 0000000..aaf39ab --- /dev/null +++ b/collectors/python.d.plugin/anomalies/README.md @@ -0,0 +1,245 @@ + + +# Anomaly detection with Netdata + +**Note**: Check out the [Netdata Anomaly Advisor](https://learn.netdata.cloud/docs/cloud/insights/anomaly-advisor) for a more native anomaly detection experience within Netdata. + +This collector uses the Python [PyOD](https://pyod.readthedocs.io/en/latest/index.html) library to perform unsupervised [anomaly detection](https://en.wikipedia.org/wiki/Anomaly_detection) on your Netdata charts and/or dimensions. + +Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return an anomaly probability and anomaly flag for each chart or custom model you define. This computation consists of a **train** function that runs every `train_n_secs` to train the ML models to learn what 'normal' typically looks like on your node. At each iteration there is also a **predict** function that uses the latest trained models and most recent metrics to produce an anomaly probability and anomaly flag for each chart or custom model you define. + +> As this is a somewhat unique collector and involves often subjective concepts like anomalies and anomaly probabilities, we would love to hear any feedback on it from the community. Please let us know on the [community forum](https://community.netdata.cloud/t/anomalies-collector-feedback-megathread/767) or drop us a note at [analytics-ml-team@netdata.cloud](mailto:analytics-ml-team@netdata.cloud) for any and all feedback, both positive and negative. This sort of feedback is priceless to help us make complex features more useful. + +## Charts + +Two charts are produced: + +- **Anomaly Probability** (`anomalies.probability`): This chart shows the probability that the latest observed data is anomalous based on the trained model for that chart (using the [`predict_proba()`](https://pyod.readthedocs.io/en/latest/api_cc.html#pyod.models.base.BaseDetector.predict_proba) method of the trained PyOD model). +- **Anomaly** (`anomalies.anomaly`): This chart shows `1` or `0` predictions of if the latest observed data is considered anomalous or not based on the trained model (using the [`predict()`](https://pyod.readthedocs.io/en/latest/api_cc.html#pyod.models.base.BaseDetector.predict) method of the trained PyOD model). + +Below is an example of the charts produced by this collector and how they might look when things are 'normal' on the node. The anomaly probabilities tend to bounce randomly around a typically low probability range, one or two might randomly jump or drift outside of this range every now and then and show up as anomalies on the anomaly chart. + +![netdata-anomalies-collector-normal](https://user-images.githubusercontent.com/2178292/100663699-99755000-334e-11eb-922f-0c41a0176484.jpg) + +If we then go onto the system and run a command like `stress-ng --all 2` to create some [stress](https://wiki.ubuntu.com/Kernel/Reference/stress-ng), we see some charts begin to have anomaly probabilities that jump outside the typical range. When the anomaly probabilities change enough, we will start seeing anomalies being flagged on the `anomalies.anomaly` chart. The idea is that these charts are the most anomalous right now so could be a good place to start your troubleshooting. + +![netdata-anomalies-collector-abnormal](https://user-images.githubusercontent.com/2178292/100663710-9bd7aa00-334e-11eb-9d14-76fda73bc309.jpg) + +Then, as the issue passes, the anomaly probabilities should settle back down into their 'normal' range again. + +![netdata-anomalies-collector-normal-again](https://user-images.githubusercontent.com/2178292/100666681-481a9000-3351-11eb-9979-64728ee2dfb6.jpg) + +## Requirements + +- This collector will only work with Python 3 and requires the packages below be installed. +- Typically you will not need to do this, but, if needed, to ensure Python 3 is used you can add the below line to the `[plugin:python.d]` section of `netdata.conf` + +```conf +[plugin:python.d] + # update every = 1 + command options = -ppython3 +``` + +Install the required python libraries. + +```bash +# become netdata user +sudo su -s /bin/bash netdata +# install required packages for the netdata user +pip3 install --user netdata-pandas==0.0.38 numba==0.50.1 scikit-learn==0.23.2 pyod==0.8.3 +``` + +## Configuration + +Install the Python requirements above, enable the collector and restart Netdata. + +```bash +cd /etc/netdata/ +sudo ./edit-config python.d.conf +# Set `anomalies: no` to `anomalies: yes` +sudo systemctl restart netdata +``` + +The configuration for the anomalies collector defines how it will behave on your system and might take some experimentation with over time to set it optimally for your node. Out of the box, the config comes with some [sane defaults](https://www.netdata.cloud/blog/redefining-monitoring-netdata/) to get you started that try to balance the flexibility and power of the ML models with the goal of being as cheap as possible in term of cost on the node resources. + +_**Note**: If you are unsure about any of the below configuration options then it's best to just ignore all this and leave the `anomalies.conf` file alone to begin with. Then you can return to it later if you would like to tune things a bit more once the collector is running for a while and you have a feeling for its performance on your node._ + +Edit the `python.d/anomalies.conf` configuration file using `edit-config` from the your agent's [config +directory](/docs/configure/nodes.md), which is usually at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/anomalies.conf +``` + +The default configuration should look something like this. Here you can see each parameter (with sane defaults) and some information about each one and what it does. + +```conf +# ---------------------------------------------------------------------- +# JOBS (data collection sources) + +# Pull data from local Netdata node. +anomalies: + name: 'Anomalies' + + # Host to pull data from. + host: '127.0.0.1:19999' + + # Username and Password for Netdata if using basic auth. + # username: '???' + # password: '???' + + # Use http or https to pull data + protocol: 'http' + + # SSL verify parameter for requests.get() calls + tls_verify: true + + # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. + charts_regex: 'system\..*' + + # Charts to exclude, useful if you would like to exclude some specific charts. + # Note: should be a ',' separated string like 'chart.name,chart.name'. + charts_to_exclude: 'system.uptime,system.entropy' + + # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'. + # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html. + model: 'pca' + + # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs. + train_max_n: 100000 + + # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes). + # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained. + train_every_n: 1800 + + # The length of the window of data to train on (14400 = last 4 hours). + train_n_secs: 14400 + + # How many prediction steps after a train event to just use previous prediction value for. + # Used to reduce possibility of the training step itself appearing as an anomaly on the charts. + train_no_prediction_n: 10 + + # If you would like to train the model for the first time on a specific window then you can define it using the below two variables. + # Start of training data for initial model. + # initial_train_data_after: 1604578857 + + # End of training data for initial model. + # initial_train_data_before: 1604593257 + + # If you would like to ignore recent data in training then you can offset it by offset_n_secs. + offset_n_secs: 0 + + # How many lagged values of each dimension to include in the 'feature vector' each model is trained on. + lags_n: 5 + + # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on. + smooth_n: 3 + + # How many differences to take in preprocessing your data. + # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing + # diffs_n=0 would mean training models on the raw values of each dimension. + # diffs_n=1 means everything is done in terms of differences. + diffs_n: 1 + + # What is the typical proportion of anomalies in your data on average? + # This parameter can control the sensitivity of your models to anomalies. + # Some discussion here: https://github.com/yzhao062/pyod/issues/144 + contamination: 0.001 + + # Set to true to include an "average_prob" dimension on anomalies probability chart which is + # just the average of all anomaly probabilities at each time step + include_average_prob: true + + # Define any custom models you would like to create anomaly probabilities for, some examples below to show how. + # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers + # and one on the cpu and mem apps metrics for the python.d.plugin. + # custom_models: + # - name: 'demos_cpu' + # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system' + # - name: 'apps_python_d_plugin' + # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin' + + # Set to true to normalize, using min-max standardization, features used for the custom models. + # Useful if your custom models contain dimensions on very different scales an model you use does + # not internally do its own normalization. Usually best to leave as false. + # custom_models_normalize: false +``` + +## Custom models + +In the `anomalies.conf` file you can also define some "custom models" which you can use to group one or more metrics into a single model much like is done by default for the charts you specify. This is useful if you have a handful of metrics that exist in different charts but perhaps are related to the same underlying thing you would like to perform anomaly detection on, for example a specific app or user. + +To define a custom model you would include configuration like below in `anomalies.conf`. By default there should already be some commented out examples in there. + +`name` is a name you give your custom model, this is what will appear alongside any other specified charts in the `anomalies.probability` and `anomalies.anomaly` charts. `dimensions` is a string of metrics you want to include in your custom model. By default the [netdata-pandas](https://github.com/netdata/netdata-pandas) library used to pull the data from Netdata uses a "chart.a|dim.1" type of naming convention in the pandas columns it returns, hence the `dimensions` string should look like "chart.name|dimension.name,chart.name|dimension.name". The examples below hopefully make this clear. + +```yaml +custom_models: + # a model for anomaly detection on the netdata user in terms of cpu, mem, threads, processes and sockets. + - name: 'user_netdata' + dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata' + # a model for anomaly detection on the netdata python.d.plugin app in terms of cpu, mem, threads, processes and sockets. + - name: 'apps_python_d_plugin' + dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin' + +custom_models_normalize: false +``` + +## Troubleshooting + +To see any relevant log messages you can use a command like below. + +```bash +`grep 'anomalies' /var/log/netdata/error.log` +``` + +If you would like to log in as `netdata` user and run the collector in debug mode to see more detail. + +```bash +# become netdata user +sudo su -s /bin/bash netdata +# run collector in debug using `nolock` option if netdata is already running the collector itself. +/usr/libexec/netdata/plugins.d/python.d.plugin anomalies debug trace nolock +``` + +## Deepdive tutorial + +If you would like to go deeper on what exactly the anomalies collector is doing under the hood then check out this [deepdive tutorial](https://github.com/netdata/community/blob/main/netdata-agent-api/netdata-pandas/anomalies_collector_deepdive.ipynb) in our community repo where you can play around with some data from our demo servers (or your own if its accessible to you) and work through the calculations step by step. + +(Note: as its a Jupyter Notebook it might render a little prettier on [nbviewer](https://nbviewer.jupyter.org/github/netdata/community/blob/main/netdata-agent-api/netdata-pandas/anomalies_collector_deepdive.ipynb)) + +## Notes + +- Python 3 is required as the [`netdata-pandas`](https://github.com/netdata/netdata-pandas) package uses Python async libraries ([asks](https://pypi.org/project/asks/) and [trio](https://pypi.org/project/trio/)) to make asynchronous calls to the [Netdata REST API](https://learn.netdata.cloud/docs/agent/web/api) to get the required data for each chart. +- Python 3 is also required for the underlying ML libraries of [numba](https://pypi.org/project/numba/), [scikit-learn](https://pypi.org/project/scikit-learn/), and [PyOD](https://pypi.org/project/pyod/). +- It may take a few hours or so (depending on your choice of `train_secs_n`) for the collector to 'settle' into it's typical behaviour in terms of the trained models and probabilities you will see in the normal running of your node. +- As this collector does most of the work in Python itself, with [PyOD](https://pyod.readthedocs.io/en/latest/) leveraging [numba](https://numba.pydata.org/) under the hood, you may want to try it out first on a test or development system to get a sense of its performance characteristics on a node similar to where you would like to use it. +- `lags_n`, `smooth_n`, and `diffs_n` together define the preprocessing done to the raw data before models are trained and before each prediction. This essentially creates a [feature vector](https://en.wikipedia.org/wiki/Feature_(machine_learning)#:~:text=In%20pattern%20recognition%20and%20machine,features%20that%20represent%20some%20object.&text=Feature%20vectors%20are%20often%20combined,score%20for%20making%20a%20prediction.) for each chart model (or each custom model). The default settings for these parameters aim to create a rolling matrix of recent smoothed [differenced](https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing) values for each chart. The aim of the model then is to score how unusual this 'matrix' of features is for each chart based on what it has learned as 'normal' from the training data. So as opposed to just looking at the single most recent value of a dimension and considering how strange it is, this approach looks at a recent smoothed window of all dimensions for a chart (or dimensions in a custom model) and asks how unusual the data as a whole looks. This should be more flexible in capturing a wider range of [anomaly types](https://andrewm4894.com/2020/10/19/different-types-of-time-series-anomalies/) and be somewhat more robust to temporary 'spikes' in the data that tend to always be happening somewhere in your metrics but often are not the most important type of anomaly (this is all covered in a lot more detail in the [deepdive tutorial](https://nbviewer.jupyter.org/github/netdata/community/blob/main/netdata-agent-api/netdata-pandas/anomalies_collector_deepdive.ipynb)). +- You can see how long model training is taking by looking in the logs for the collector `grep 'anomalies' /var/log/netdata/error.log | grep 'training'` and you should see lines like `2020-12-01 22:02:14: python.d INFO: anomalies[local] : training complete in 2.81 seconds (runs_counter=2700, model=pca, train_n_secs=14400, models=26, n_fit_success=26, n_fit_fails=0, after=1606845731, before=1606860131).`. + - This also gives counts of the number of models, if any, that failed to fit and so had to default back to the DefaultModel (which is currently [HBOS](https://pyod.readthedocs.io/en/latest/_modules/pyod/models/hbos.html)). + - `after` and `before` here refer to the start and end of the training data used to train the models. +- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the typical performance characteristics we saw from running this collector (with defaults) were: + - A runtime (`netdata.runtime_anomalies`) of ~80ms when doing scoring and ~3 seconds when training or retraining the models. + - Typically ~3%-3.5% additional cpu usage from scoring, jumping to ~60% for a couple of seconds during model training. + - About ~150mb of ram (`apps.mem`) being continually used by the `python.d.plugin`. +- If you activate this collector on a fresh node, it might take a little while to build up enough data to calculate a realistic and useful model. +- Some models like `iforest` can be comparatively expensive (on same n1-standard-2 system above ~2s runtime during predict, ~40s training time, ~50% cpu on both train and predict) so if you would like to use it you might be advised to set a relatively high `update_every` maybe 10, 15 or 30 in `anomalies.conf`. +- Setting a higher `train_every_n` and `update_every` is an easy way to devote less resources on the node to anomaly detection. Specifying less charts and a lower `train_n_secs` will also help reduce resources at the expense of covering less charts and maybe a more noisy model if you set `train_n_secs` to be too small for how your node tends to behave. +- If you would like to enable this on a Rasberry Pi, then check out [this guide](https://learn.netdata.cloud/guides/monitor/raspberry-pi-anomaly-detection) which will guide you through first installing LLVM. + +## Useful links and further reading + +- [PyOD documentation](https://pyod.readthedocs.io/en/latest/), [PyOD Github](https://github.com/yzhao062/pyod). +- [Anomaly Detection](https://en.wikipedia.org/wiki/Anomaly_detection) wikipedia page. +- [Anomaly Detection YouTube playlist](https://www.youtube.com/playlist?list=PL6Zhl9mK2r0KxA6rB87oi4kWzoqGd5vp0) maintained by [andrewm4894](https://github.com/andrewm4894/) from Netdata. +- [awesome-TS-anomaly-detection](https://github.com/rob-med/awesome-TS-anomaly-detection) Github list of useful tools, libraries and resources. +- [Mendeley public group](https://www.mendeley.com/community/interesting-anomaly-detection-papers/) with some interesting anomaly detection papers we have been reading. +- Good [blog post](https://www.anodot.com/blog/what-is-anomaly-detection/) from Anodot on time series anomaly detection. Anodot also have some great whitepapers in this space too that some may find useful. +- Novelty and outlier detection in the [scikit-learn documentation](https://scikit-learn.org/stable/modules/outlier_detection.html). + diff --git a/collectors/python.d.plugin/anomalies/anomalies.chart.py b/collectors/python.d.plugin/anomalies/anomalies.chart.py new file mode 100644 index 0000000..8ca3df6 --- /dev/null +++ b/collectors/python.d.plugin/anomalies/anomalies.chart.py @@ -0,0 +1,426 @@ +# -*- coding: utf-8 -*- +# Description: anomalies netdata python.d module +# Author: andrewm4894 +# SPDX-License-Identifier: GPL-3.0-or-later + +import sys +import time +from datetime import datetime +import re +import warnings + +import requests +import numpy as np +import pandas as pd +from netdata_pandas.data import get_data, get_allmetrics_async +from pyod.models.hbos import HBOS +from pyod.models.pca import PCA +from pyod.models.loda import LODA +from pyod.models.iforest import IForest +from pyod.models.cblof import CBLOF +from pyod.models.feature_bagging import FeatureBagging +from pyod.models.copod import COPOD +from sklearn.preprocessing import MinMaxScaler + +from bases.FrameworkServices.SimpleService import SimpleService + +# ignore some sklearn/numpy warnings that are ok +warnings.filterwarnings('ignore', r'All-NaN slice encountered') +warnings.filterwarnings('ignore', r'invalid value encountered in true_divide') +warnings.filterwarnings('ignore', r'divide by zero encountered in true_divide') +warnings.filterwarnings('ignore', r'invalid value encountered in subtract') + +disabled_by_default = True + +ORDER = ['probability', 'anomaly'] + +CHARTS = { + 'probability': { + 'options': ['probability', 'Anomaly Probability', 'probability', 'anomalies', 'anomalies.probability', 'line'], + 'lines': [] + }, + 'anomaly': { + 'options': ['anomaly', 'Anomaly', 'count', 'anomalies', 'anomalies.anomaly', 'stacked'], + 'lines': [] + }, +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.basic_init() + self.charts_init() + self.custom_models_init() + self.data_init() + self.model_params_init() + self.models_init() + self.collected_dims = {'probability': set(), 'anomaly': set()} + + def check(self): + python_version = float('{}.{}'.format(sys.version_info[0], sys.version_info[1])) + if python_version < 3.6: + self.error("anomalies collector only works with Python>=3.6") + if len(self.host_charts_dict[self.host]) > 0: + _ = get_allmetrics_async(host_charts_dict=self.host_charts_dict, protocol=self.protocol, user=self.username, pwd=self.password) + return True + + def basic_init(self): + """Perform some basic initialization. + """ + self.order = ORDER + self.definitions = CHARTS + self.protocol = self.configuration.get('protocol', 'http') + self.host = self.configuration.get('host', '127.0.0.1:19999') + self.username = self.configuration.get('username', None) + self.password = self.configuration.get('password', None) + self.tls_verify = self.configuration.get('tls_verify', True) + self.fitted_at = {} + self.df_allmetrics = pd.DataFrame() + self.last_train_at = 0 + self.include_average_prob = bool(self.configuration.get('include_average_prob', True)) + self.reinitialize_at_every_step = bool(self.configuration.get('reinitialize_at_every_step', False)) + + def charts_init(self): + """Do some initialisation of charts in scope related variables. + """ + self.charts_regex = re.compile(self.configuration.get('charts_regex','None')) + self.charts_available = [c for c in list(requests.get(f'{self.protocol}://{self.host}/api/v1/charts', verify=self.tls_verify).json().get('charts', {}).keys())] + self.charts_in_scope = list(filter(self.charts_regex.match, self.charts_available)) + self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',') + if len(self.charts_to_exclude) > 0: + self.charts_in_scope = [c for c in self.charts_in_scope if c not in self.charts_to_exclude] + + def custom_models_init(self): + """Perform initialization steps related to custom models. + """ + self.custom_models = self.configuration.get('custom_models', None) + self.custom_models_normalize = bool(self.configuration.get('custom_models_normalize', False)) + if self.custom_models: + self.custom_models_names = [model['name'] for model in self.custom_models] + self.custom_models_dims = [i for s in [model['dimensions'].split(',') for model in self.custom_models] for i in s] + self.custom_models_dims = [dim if '::' in dim else f'{self.host}::{dim}' for dim in self.custom_models_dims] + self.custom_models_charts = list(set([dim.split('|')[0].split('::')[1] for dim in self.custom_models_dims])) + self.custom_models_hosts = list(set([dim.split('::')[0] for dim in self.custom_models_dims])) + self.custom_models_host_charts_dict = {} + for host in self.custom_models_hosts: + self.custom_models_host_charts_dict[host] = list(set([dim.split('::')[1].split('|')[0] for dim in self.custom_models_dims if dim.startswith(host)])) + self.custom_models_dims_renamed = [f"{model['name']}|{dim}" for model in self.custom_models for dim in model['dimensions'].split(',')] + self.models_in_scope = list(set([f'{self.host}::{c}' for c in self.charts_in_scope] + self.custom_models_names)) + self.charts_in_scope = list(set(self.charts_in_scope + self.custom_models_charts)) + self.host_charts_dict = {self.host: self.charts_in_scope} + for host in self.custom_models_host_charts_dict: + if host not in self.host_charts_dict: + self.host_charts_dict[host] = self.custom_models_host_charts_dict[host] + else: + for chart in self.custom_models_host_charts_dict[host]: + if chart not in self.host_charts_dict[host]: + self.host_charts_dict[host].extend(chart) + else: + self.models_in_scope = [f'{self.host}::{c}' for c in self.charts_in_scope] + self.host_charts_dict = {self.host: self.charts_in_scope} + self.model_display_names = {model: model.split('::')[1] if '::' in model else model for model in self.models_in_scope} + #self.info(f'self.host_charts_dict (len={len(self.host_charts_dict[self.host])}): {self.host_charts_dict}') + + def data_init(self): + """Initialize some empty data objects. + """ + self.data_probability_latest = {f'{m}_prob': 0 for m in self.charts_in_scope} + self.data_anomaly_latest = {f'{m}_anomaly': 0 for m in self.charts_in_scope} + self.data_latest = {**self.data_probability_latest, **self.data_anomaly_latest} + + def model_params_init(self): + """Model parameters initialisation. + """ + self.train_max_n = self.configuration.get('train_max_n', 100000) + self.train_n_secs = self.configuration.get('train_n_secs', 14400) + self.offset_n_secs = self.configuration.get('offset_n_secs', 0) + self.train_every_n = self.configuration.get('train_every_n', 1800) + self.train_no_prediction_n = self.configuration.get('train_no_prediction_n', 10) + self.initial_train_data_after = self.configuration.get('initial_train_data_after', 0) + self.initial_train_data_before = self.configuration.get('initial_train_data_before', 0) + self.contamination = self.configuration.get('contamination', 0.001) + self.lags_n = {model: self.configuration.get('lags_n', 5) for model in self.models_in_scope} + self.smooth_n = {model: self.configuration.get('smooth_n', 5) for model in self.models_in_scope} + self.diffs_n = {model: self.configuration.get('diffs_n', 5) for model in self.models_in_scope} + + def models_init(self): + """Models initialisation. + """ + self.model = self.configuration.get('model', 'pca') + if self.model == 'pca': + self.models = {model: PCA(contamination=self.contamination) for model in self.models_in_scope} + elif self.model == 'loda': + self.models = {model: LODA(contamination=self.contamination) for model in self.models_in_scope} + elif self.model == 'iforest': + self.models = {model: IForest(n_estimators=50, bootstrap=True, behaviour='new', contamination=self.contamination) for model in self.models_in_scope} + elif self.model == 'cblof': + self.models = {model: CBLOF(n_clusters=3, contamination=self.contamination) for model in self.models_in_scope} + elif self.model == 'feature_bagging': + self.models = {model: FeatureBagging(base_estimator=PCA(contamination=self.contamination), contamination=self.contamination) for model in self.models_in_scope} + elif self.model == 'copod': + self.models = {model: COPOD(contamination=self.contamination) for model in self.models_in_scope} + elif self.model == 'hbos': + self.models = {model: HBOS(contamination=self.contamination) for model in self.models_in_scope} + else: + self.models = {model: HBOS(contamination=self.contamination) for model in self.models_in_scope} + self.custom_model_scalers = {model: MinMaxScaler() for model in self.models_in_scope} + + def model_init(self, model): + """Model initialisation of a single model. + """ + if self.model == 'pca': + self.models[model] = PCA(contamination=self.contamination) + elif self.model == 'loda': + self.models[model] = LODA(contamination=self.contamination) + elif self.model == 'iforest': + self.models[model] = IForest(n_estimators=50, bootstrap=True, behaviour='new', contamination=self.contamination) + elif self.model == 'cblof': + self.models[model] = CBLOF(n_clusters=3, contamination=self.contamination) + elif self.model == 'feature_bagging': + self.models[model] = FeatureBagging(base_estimator=PCA(contamination=self.contamination), contamination=self.contamination) + elif self.model == 'copod': + self.models[model] = COPOD(contamination=self.contamination) + elif self.model == 'hbos': + self.models[model] = HBOS(contamination=self.contamination) + else: + self.models[model] = HBOS(contamination=self.contamination) + self.custom_model_scalers[model] = MinMaxScaler() + + def reinitialize(self): + """Reinitialize charts, models and data to a beginning state. + """ + self.charts_init() + self.custom_models_init() + self.data_init() + self.model_params_init() + self.models_init() + + def save_data_latest(self, data, data_probability, data_anomaly): + """Save the most recent data objects to be used if needed in the future. + """ + self.data_latest = data + self.data_probability_latest = data_probability + self.data_anomaly_latest = data_anomaly + + def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1): + """If dimension not in chart then add it. + """ + for dim in data: + if dim not in self.collected_dims[chart]: + self.collected_dims[chart].add(dim) + self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor]) + + for dim in list(self.collected_dims[chart]): + if dim not in data: + self.collected_dims[chart].remove(dim) + self.charts[chart].del_dimension(dim, hide=False) + + def add_custom_models_dims(self, df): + """Given a df, select columns used by custom models, add custom model name as prefix, and append to df. + + :param df : dataframe to append new renamed columns to. + :return: dataframe with additional columns added relating to the specified custom models. + """ + df_custom = df[self.custom_models_dims].copy() + df_custom.columns = self.custom_models_dims_renamed + df = df.join(df_custom) + + return df + + def make_features(self, arr, train=False, model=None): + """Take in numpy array and preprocess accordingly by taking diffs, smoothing and adding lags. + + :param arr : numpy array we want to make features from. + :param train : True if making features for training, in which case need to fit_transform scaler and maybe sample train_max_n. + :param model : model to make features for. + :return: transformed numpy array. + """ + + def lag(arr, n): + res = np.empty_like(arr) + res[:n] = np.nan + res[n:] = arr[:-n] + + return res + + arr = np.nan_to_num(arr) + + diffs_n = self.diffs_n[model] + smooth_n = self.smooth_n[model] + lags_n = self.lags_n[model] + + if self.custom_models_normalize and model in self.custom_models_names: + if train: + arr = self.custom_model_scalers[model].fit_transform(arr) + else: + arr = self.custom_model_scalers[model].transform(arr) + + if diffs_n > 0: + arr = np.diff(arr, diffs_n, axis=0) + arr = arr[~np.isnan(arr).any(axis=1)] + + if smooth_n > 1: + arr = np.cumsum(arr, axis=0, dtype=float) + arr[smooth_n:] = arr[smooth_n:] - arr[:-smooth_n] + arr = arr[smooth_n - 1:] / smooth_n + arr = arr[~np.isnan(arr).any(axis=1)] + + if lags_n > 0: + arr_orig = np.copy(arr) + for lag_n in range(1, lags_n + 1): + arr = np.concatenate((arr, lag(arr_orig, lag_n)), axis=1) + arr = arr[~np.isnan(arr).any(axis=1)] + + if train: + if len(arr) > self.train_max_n: + arr = arr[np.random.randint(arr.shape[0], size=self.train_max_n), :] + + arr = np.nan_to_num(arr) + + return arr + + def train(self, models_to_train=None, train_data_after=0, train_data_before=0): + """Pull required training data and train a model for each specified model. + + :param models_to_train : list of models to train on. + :param train_data_after : integer timestamp for start of train data. + :param train_data_before : integer timestamp for end of train data. + """ + now = datetime.now().timestamp() + if train_data_after > 0 and train_data_before > 0: + before = train_data_before + after = train_data_after + else: + before = int(now) - self.offset_n_secs + after = before - self.train_n_secs + + # get training data + df_train = get_data( + host_charts_dict=self.host_charts_dict, host_prefix=True, host_sep='::', after=after, before=before, + sort_cols=True, numeric_only=True, protocol=self.protocol, float_size='float32', user=self.username, pwd=self.password, + verify=self.tls_verify + ).ffill() + if self.custom_models: + df_train = self.add_custom_models_dims(df_train) + + # train model + self.try_fit(df_train, models_to_train=models_to_train) + self.info(f'training complete in {round(time.time() - now, 2)} seconds (runs_counter={self.runs_counter}, model={self.model}, train_n_secs={self.train_n_secs}, models={len(self.fitted_at)}, n_fit_success={self.n_fit_success}, n_fit_fails={self.n_fit_fail}, after={after}, before={before}).') + self.last_train_at = self.runs_counter + + def try_fit(self, df_train, models_to_train=None): + """Try fit each model and try to fallback to a default model if fit fails for any reason. + + :param df_train : data to train on. + :param models_to_train : list of models to train. + """ + if models_to_train is None: + models_to_train = list(self.models.keys()) + self.n_fit_fail, self.n_fit_success = 0, 0 + for model in models_to_train: + if model not in self.models: + self.model_init(model) + X_train = self.make_features( + df_train[df_train.columns[df_train.columns.str.startswith(f'{model}|')]].values, + train=True, model=model) + try: + self.models[model].fit(X_train) + self.n_fit_success += 1 + except Exception as e: + self.n_fit_fail += 1 + self.info(e) + self.info(f'training failed for {model} at run_counter {self.runs_counter}, defaulting to hbos model.') + self.models[model] = HBOS(contamination=self.contamination) + self.models[model].fit(X_train) + self.fitted_at[model] = self.runs_counter + + def predict(self): + """Get latest data, make it into a feature vector, and get predictions for each available model. + + :return: (,) tuple of dictionaries, one for probability scores and the other for anomaly predictions. + """ + # get recent data to predict on + df_allmetrics = get_allmetrics_async( + host_charts_dict=self.host_charts_dict, host_prefix=True, host_sep='::', wide=True, sort_cols=True, + protocol=self.protocol, numeric_only=True, float_size='float32', user=self.username, pwd=self.password + ) + if self.custom_models: + df_allmetrics = self.add_custom_models_dims(df_allmetrics) + self.df_allmetrics = self.df_allmetrics.append(df_allmetrics).ffill().tail((max(self.lags_n.values()) + max(self.smooth_n.values()) + max(self.diffs_n.values())) * 2) + + # get predictions + data_probability, data_anomaly = self.try_predict() + + return data_probability, data_anomaly + + def try_predict(self): + """Try make prediction and fall back to last known prediction if fails. + + :return: (,) tuple of dictionaries, one for probability scores and the other for anomaly predictions. + """ + data_probability, data_anomaly = {}, {} + for model in self.fitted_at.keys(): + model_display_name = self.model_display_names[model] + try: + X_model = np.nan_to_num( + self.make_features( + self.df_allmetrics[self.df_allmetrics.columns[self.df_allmetrics.columns.str.startswith(f'{model}|')]].values, + model=model + )[-1,:].reshape(1, -1) + ) + data_probability[model_display_name + '_prob'] = np.nan_to_num(self.models[model].predict_proba(X_model)[-1][1]) * 10000 + data_anomaly[model_display_name + '_anomaly'] = self.models[model].predict(X_model)[-1] + except Exception as _: + #self.info(e) + if model_display_name + '_prob' in self.data_latest: + #self.info(f'prediction failed for {model} at run_counter {self.runs_counter}, using last prediction instead.') + data_probability[model_display_name + '_prob'] = self.data_latest[model_display_name + '_prob'] + data_anomaly[model_display_name + '_anomaly'] = self.data_latest[model_display_name + '_anomaly'] + else: + #self.info(f'prediction failed for {model} at run_counter {self.runs_counter}, skipping as no previous prediction.') + continue + + return data_probability, data_anomaly + + def get_data(self): + + # initialize to what's available right now + if self.reinitialize_at_every_step or len(self.host_charts_dict[self.host]) == 0: + self.charts_init() + self.custom_models_init() + self.model_params_init() + + # if not all models have been trained then train those we need to + if len(self.fitted_at) < len(self.models_in_scope): + self.train( + models_to_train=[m for m in self.models_in_scope if m not in self.fitted_at], + train_data_after=self.initial_train_data_after, + train_data_before=self.initial_train_data_before + ) + # retrain all models as per schedule from config + elif self.train_every_n > 0 and self.runs_counter % self.train_every_n == 0: + self.reinitialize() + self.train() + + # roll forward previous predictions around a training step to avoid the possibility of having the training itself trigger an anomaly + if (self.runs_counter - self.last_train_at) <= self.train_no_prediction_n: + data_probability = self.data_probability_latest + data_anomaly = self.data_anomaly_latest + else: + data_probability, data_anomaly = self.predict() + if self.include_average_prob: + average_prob = np.mean(list(data_probability.values())) + data_probability['average_prob'] = 0 if np.isnan(average_prob) else average_prob + + data = {**data_probability, **data_anomaly} + + self.validate_charts('probability', data_probability, divisor=100) + self.validate_charts('anomaly', data_anomaly) + + self.save_data_latest(data, data_probability, data_anomaly) + + #self.info(f'len(data)={len(data)}') + #self.info(f'data') + + return data diff --git a/collectors/python.d.plugin/anomalies/anomalies.conf b/collectors/python.d.plugin/anomalies/anomalies.conf new file mode 100644 index 0000000..ef86770 --- /dev/null +++ b/collectors/python.d.plugin/anomalies/anomalies.conf @@ -0,0 +1,184 @@ +# netdata python.d.plugin configuration for anomalies +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 2 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) + +# Pull data from local Netdata node. +anomalies: + name: 'Anomalies' + + # Host to pull data from. + host: '127.0.0.1:19999' + + # Username and Password for Netdata if using basic auth. + # username: '???' + # password: '???' + + # Use http or https to pull data + protocol: 'http' + + # SSL verify parameter for requests.get() calls + tls_verify: true + + # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. + charts_regex: 'system\..*' + + # Charts to exclude, useful if you would like to exclude some specific charts. + # Note: should be a ',' separated string like 'chart.name,chart.name'. + charts_to_exclude: 'system.uptime,system.entropy' + + # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'. + # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html. + model: 'pca' + + # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs. + train_max_n: 100000 + + # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes). + # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained. + train_every_n: 1800 + + # The length of the window of data to train on (14400 = last 4 hours). + train_n_secs: 14400 + + # How many prediction steps after a train event to just use previous prediction value for. + # Used to reduce possibility of the training step itself appearing as an anomaly on the charts. + train_no_prediction_n: 10 + + # If you would like to train the model for the first time on a specific window then you can define it using the below two variables. + # Start of training data for initial model. + # initial_train_data_after: 1604578857 + + # End of training data for initial model. + # initial_train_data_before: 1604593257 + + # If you would like to ignore recent data in training then you can offset it by offset_n_secs. + offset_n_secs: 0 + + # How many lagged values of each dimension to include in the 'feature vector' each model is trained on. + lags_n: 5 + + # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on. + smooth_n: 3 + + # How many differences to take in preprocessing your data. + # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing + # diffs_n=0 would mean training models on the raw values of each dimension. + # diffs_n=1 means everything is done in terms of differences. + diffs_n: 1 + + # What is the typical proportion of anomalies in your data on average? + # This parameter can control the sensitivity of your models to anomalies. + # Some discussion here: https://github.com/yzhao062/pyod/issues/144 + contamination: 0.001 + + # Set to true to include an "average_prob" dimension on anomalies probability chart which is + # just the average of all anomaly probabilities at each time step + include_average_prob: true + + # Define any custom models you would like to create anomaly probabilities for, some examples below to show how. + # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers + # and one on the cpu and mem apps metrics for the python.d.plugin. + # custom_models: + # - name: 'demos_cpu' + # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system' + # - name: 'apps_python_d_plugin' + # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin' + + # Set to true to normalize, using min-max standardization, features used for the custom models. + # Useful if your custom models contain dimensions on very different scales an model you use does + # not internally do its own normalization. Usually best to leave as false. + # custom_models_normalize: false + +# Standalone Custom models example as an additional collector job. +# custom: +# name: 'custom' +# host: '127.0.0.1:19999' +# protocol: 'http' +# charts_regex: 'None' +# charts_to_exclude: 'None' +# model: 'pca' +# train_max_n: 100000 +# train_every_n: 1800 +# train_n_secs: 14400 +# offset_n_secs: 0 +# lags_n: 5 +# smooth_n: 3 +# diffs_n: 1 +# contamination: 0.001 +# custom_models: +# - name: 'user_netdata' +# dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata' +# - name: 'apps_python_d_plugin' +# dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin' + +# Pull data from some demo nodes for cross node custom models. +# demos: +# name: 'demos' +# host: '127.0.0.1:19999' +# protocol: 'http' +# charts_regex: 'None' +# charts_to_exclude: 'None' +# model: 'pca' +# train_max_n: 100000 +# train_every_n: 1800 +# train_n_secs: 14400 +# offset_n_secs: 0 +# lags_n: 5 +# smooth_n: 3 +# diffs_n: 1 +# contamination: 0.001 +# custom_models: +# - name: 'system.cpu' +# dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system' +# - name: 'system.ip' +# dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent' +# - name: 'system.net' +# dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent' +# - name: 'system.io' +# dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out' + +# Example additional job if you want to also pull data from a child streaming to your +# local parent or even a remote node so long as the Netdata REST API is accessible. +# mychildnode1: +# name: 'mychildnode1' +# host: '127.0.0.1:19999/host/mychildnode1' +# protocol: 'http' +# charts_regex: 'system\..*' +# charts_to_exclude: 'None' +# model: 'pca' +# train_max_n: 100000 +# train_every_n: 1800 +# train_n_secs: 14400 +# offset_n_secs: 0 +# lags_n: 5 +# smooth_n: 3 +# diffs_n: 1 +# contamination: 0.001 diff --git a/collectors/python.d.plugin/beanstalk/Makefile.inc b/collectors/python.d.plugin/beanstalk/Makefile.inc new file mode 100644 index 0000000..4bbb708 --- /dev/null +++ b/collectors/python.d.plugin/beanstalk/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += beanstalk/beanstalk.chart.py +dist_pythonconfig_DATA += beanstalk/beanstalk.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += beanstalk/README.md beanstalk/Makefile.inc + diff --git a/collectors/python.d.plugin/beanstalk/README.md b/collectors/python.d.plugin/beanstalk/README.md new file mode 100644 index 0000000..3b63259 --- /dev/null +++ b/collectors/python.d.plugin/beanstalk/README.md @@ -0,0 +1,133 @@ + + +# Beanstalk monitoring with Netdata + +Provides server and tube-level statistics. + +## Requirements + +- `python-beanstalkc` + +**Server statistics:** + +1. **Cpu usage** in cpu time + + - user + - system + +2. **Jobs rate** in jobs/s + + - total + - timeouts + +3. **Connections rate** in connections/s + + - connections + +4. **Commands rate** in commands/s + + - put + - peek + - peek-ready + - peek-delayed + - peek-buried + - reserve + - use + - watch + - ignore + - delete + - release + - bury + - kick + - stats + - stats-job + - stats-tube + - list-tubes + - list-tube-used + - list-tubes-watched + - pause-tube + +5. **Current tubes** in tubes + + - tubes + +6. **Current jobs** in jobs + + - urgent + - ready + - reserved + - delayed + - buried + +7. **Current connections** in connections + + - written + - producers + - workers + - waiting + +8. **Binlog** in records/s + + - written + - migrated + +9. **Uptime** in seconds + + - uptime + +**Per tube statistics:** + +1. **Jobs rate** in jobs/s + + - jobs + +2. **Jobs** in jobs + + - using + - ready + - reserved + - delayed + - buried + +3. **Connections** in connections + + - using + - waiting + - watching + +4. **Commands** in commands/s + + - deletes + - pauses + +5. **Pause** in seconds + + - since + - left + +## Configuration + +Edit the `python.d/beanstalk.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/beanstalk.conf +``` + +Sample: + +```yaml +host : '127.0.0.1' +port : 11300 +``` + +If no configuration is given, module will attempt to connect to beanstalkd on `127.0.0.1:11300` address + +--- + + diff --git a/collectors/python.d.plugin/beanstalk/beanstalk.chart.py b/collectors/python.d.plugin/beanstalk/beanstalk.chart.py new file mode 100644 index 0000000..396543e --- /dev/null +++ b/collectors/python.d.plugin/beanstalk/beanstalk.chart.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +# Description: beanstalk netdata python.d module +# Author: ilyam8 +# SPDX-License-Identifier: GPL-3.0-or-later + +try: + import beanstalkc + + BEANSTALKC = True +except ImportError: + BEANSTALKC = False + +from bases.FrameworkServices.SimpleService import SimpleService +from bases.loaders import load_yaml + +ORDER = [ + 'cpu_usage', + 'jobs_rate', + 'connections_rate', + 'commands_rate', + 'current_tubes', + 'current_jobs', + 'current_connections', + 'binlog', + 'uptime', +] + +CHARTS = { + 'cpu_usage': { + 'options': [None, 'Cpu Usage', 'cpu time', 'server statistics', 'beanstalk.cpu_usage', 'area'], + 'lines': [ + ['rusage-utime', 'user', 'incremental'], + ['rusage-stime', 'system', 'incremental'] + ] + }, + 'jobs_rate': { + 'options': [None, 'Jobs Rate', 'jobs/s', 'server statistics', 'beanstalk.jobs_rate', 'line'], + 'lines': [ + ['total-jobs', 'total', 'incremental'], + ['job-timeouts', 'timeouts', 'incremental'] + ] + }, + 'connections_rate': { + 'options': [None, 'Connections Rate', 'connections/s', 'server statistics', 'beanstalk.connections_rate', + 'area'], + 'lines': [ + ['total-connections', 'connections', 'incremental'] + ] + }, + 'commands_rate': { + 'options': [None, 'Commands Rate', 'commands/s', 'server statistics', 'beanstalk.commands_rate', 'stacked'], + 'lines': [ + ['cmd-put', 'put', 'incremental'], + ['cmd-peek', 'peek', 'incremental'], + ['cmd-peek-ready', 'peek-ready', 'incremental'], + ['cmd-peek-delayed', 'peek-delayed', 'incremental'], + ['cmd-peek-buried', 'peek-buried', 'incremental'], + ['cmd-reserve', 'reserve', 'incremental'], + ['cmd-use', 'use', 'incremental'], + ['cmd-watch', 'watch', 'incremental'], + ['cmd-ignore', 'ignore', 'incremental'], + ['cmd-delete', 'delete', 'incremental'], + ['cmd-release', 'release', 'incremental'], + ['cmd-bury', 'bury', 'incremental'], + ['cmd-kick', 'kick', 'incremental'], + ['cmd-stats', 'stats', 'incremental'], + ['cmd-stats-job', 'stats-job', 'incremental'], + ['cmd-stats-tube', 'stats-tube', 'incremental'], + ['cmd-list-tubes', 'list-tubes', 'incremental'], + ['cmd-list-tube-used', 'list-tube-used', 'incremental'], + ['cmd-list-tubes-watched', 'list-tubes-watched', 'incremental'], + ['cmd-pause-tube', 'pause-tube', 'incremental'] + ] + }, + 'current_tubes': { + 'options': [None, 'Current Tubes', 'tubes', 'server statistics', 'beanstalk.current_tubes', 'area'], + 'lines': [ + ['current-tubes', 'tubes'] + ] + }, + 'current_jobs': { + 'options': [None, 'Current Jobs', 'jobs', 'server statistics', 'beanstalk.current_jobs', 'stacked'], + 'lines': [ + ['current-jobs-urgent', 'urgent'], + ['current-jobs-ready', 'ready'], + ['current-jobs-reserved', 'reserved'], + ['current-jobs-delayed', 'delayed'], + ['current-jobs-buried', 'buried'] + ] + }, + 'current_connections': { + 'options': [None, 'Current Connections', 'connections', 'server statistics', + 'beanstalk.current_connections', 'line'], + 'lines': [ + ['current-connections', 'written'], + ['current-producers', 'producers'], + ['current-workers', 'workers'], + ['current-waiting', 'waiting'] + ] + }, + 'binlog': { + 'options': [None, 'Binlog', 'records/s', 'server statistics', 'beanstalk.binlog', 'line'], + 'lines': [ + ['binlog-records-written', 'written', 'incremental'], + ['binlog-records-migrated', 'migrated', 'incremental'] + ] + }, + 'uptime': { + 'options': [None, 'Uptime', 'seconds', 'server statistics', 'beanstalk.uptime', 'line'], + 'lines': [ + ['uptime'], + ] + } +} + + +def tube_chart_template(name): + order = [ + '{0}_jobs_rate'.format(name), + '{0}_jobs'.format(name), + '{0}_connections'.format(name), + '{0}_commands'.format(name), + '{0}_pause'.format(name) + ] + family = 'tube {0}'.format(name) + + charts = { + order[0]: { + 'options': [None, 'Job Rate', 'jobs/s', family, 'beanstalk.jobs_rate', 'area'], + 'lines': [ + ['_'.join([name, 'total-jobs']), 'jobs', 'incremental'] + ] + }, + order[1]: { + 'options': [None, 'Jobs', 'jobs', family, 'beanstalk.jobs', 'stacked'], + 'lines': [ + ['_'.join([name, 'current-jobs-urgent']), 'urgent'], + ['_'.join([name, 'current-jobs-ready']), 'ready'], + ['_'.join([name, 'current-jobs-reserved']), 'reserved'], + ['_'.join([name, 'current-jobs-delayed']), 'delayed'], + ['_'.join([name, 'current-jobs-buried']), 'buried'] + ] + }, + order[2]: { + 'options': [None, 'Connections', 'connections', family, 'beanstalk.connections', 'stacked'], + 'lines': [ + ['_'.join([name, 'current-using']), 'using'], + ['_'.join([name, 'current-waiting']), 'waiting'], + ['_'.join([name, 'current-watching']), 'watching'] + ] + }, + order[3]: { + 'options': [None, 'Commands', 'commands/s', family, 'beanstalk.commands', 'stacked'], + 'lines': [ + ['_'.join([name, 'cmd-delete']), 'deletes', 'incremental'], + ['_'.join([name, 'cmd-pause-tube']), 'pauses', 'incremental'] + ] + }, + order[4]: { + 'options': [None, 'Pause', 'seconds', family, 'beanstalk.pause', 'stacked'], + 'lines': [ + ['_'.join([name, 'pause']), 'since'], + ['_'.join([name, 'pause-time-left']), 'left'] + ] + } + } + + return order, charts + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.configuration = configuration + self.order = list(ORDER) + self.definitions = dict(CHARTS) + self.conn = None + self.alive = True + + def check(self): + if not BEANSTALKC: + self.error("'beanstalkc' module is needed to use beanstalk.chart.py") + return False + + self.conn = self.connect() + + return True if self.conn else False + + def get_data(self): + """ + :return: dict + """ + if not self.is_alive(): + return None + + active_charts = self.charts.active_charts() + data = dict() + + try: + data.update(self.conn.stats()) + + for tube in self.conn.tubes(): + stats = self.conn.stats_tube(tube) + + if tube + '_jobs_rate' not in active_charts: + self.create_new_tube_charts(tube) + + for stat in stats: + data['_'.join([tube, stat])] = stats[stat] + + except beanstalkc.SocketError: + self.alive = False + return None + + return data or None + + def create_new_tube_charts(self, tube): + order, charts = tube_chart_template(tube) + + for chart_name in order: + params = [chart_name] + charts[chart_name]['options'] + dimensions = charts[chart_name]['lines'] + + new_chart = self.charts.add_chart(params) + for dimension in dimensions: + new_chart.add_dimension(dimension) + + def connect(self): + host = self.configuration.get('host', '127.0.0.1') + port = self.configuration.get('port', 11300) + timeout = self.configuration.get('timeout', 1) + try: + return beanstalkc.Connection(host=host, + port=port, + connect_timeout=timeout, + parse_yaml=load_yaml) + except beanstalkc.SocketError as error: + self.error('Connection to {0}:{1} failed: {2}'.format(host, port, error)) + return None + + def reconnect(self): + try: + self.conn.reconnect() + self.alive = True + return True + except beanstalkc.SocketError: + return False + + def is_alive(self): + if not self.alive: + return self.reconnect() + return True diff --git a/collectors/python.d.plugin/beanstalk/beanstalk.conf b/collectors/python.d.plugin/beanstalk/beanstalk.conf new file mode 100644 index 0000000..6d9773a --- /dev/null +++ b/collectors/python.d.plugin/beanstalk/beanstalk.conf @@ -0,0 +1,78 @@ +# netdata python.d.plugin configuration for beanstalk +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# chart_cleanup sets the default chart cleanup interval in iterations. +# A chart is marked as obsolete if it has not been updated +# 'chart_cleanup' iterations in a row. +# When a plugin sends the obsolete flag, the charts are not deleted +# from netdata immediately. +# They will be hidden immediately (not offered to dashboard viewer, +# streamed upstream and archived to external databases) and deleted one hour +# later (configurable from netdata.conf). +# chart_cleanup: 10 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# chart_cleanup: 10 # the JOB's chart cleanup interval in iterations +# +# Additionally to the above, beanstalk also supports the following: +# +# host: 'host' # Server ip address or hostname. Default: 127.0.0.1 +# port: port # Beanstalkd port. Default: +# +# ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/bind_rndc/Makefile.inc b/collectors/python.d.plugin/bind_rndc/Makefile.inc new file mode 100644 index 0000000..72f3914 --- /dev/null +++ b/collectors/python.d.plugin/bind_rndc/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += bind_rndc/bind_rndc.chart.py +dist_pythonconfig_DATA += bind_rndc/bind_rndc.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += bind_rndc/README.md bind_rndc/Makefile.inc + diff --git a/collectors/python.d.plugin/bind_rndc/README.md b/collectors/python.d.plugin/bind_rndc/README.md new file mode 100644 index 0000000..2d747f8 --- /dev/null +++ b/collectors/python.d.plugin/bind_rndc/README.md @@ -0,0 +1,79 @@ + + +# ISC Bind monitoring with Netdata + +Collects Name server summary performance statistics using `rndc` tool. + +## Requirements + +- Version of bind must be 9.6 + +- Netdata must have permissions to run `rndc stats` + +It produces: + +1. **Name server statistics** + + - requests + - responses + - success + - auth_answer + - nonauth_answer + - nxrrset + - failure + - nxdomain + - recursion + - duplicate + - rejections + +2. **Incoming queries** + + - RESERVED0 + - A + - NS + - CNAME + - SOA + - PTR + - MX + - TXT + - X25 + - AAAA + - SRV + - NAPTR + - A6 + - DS + - RSIG + - DNSKEY + - SPF + - ANY + - DLV + +3. **Outgoing queries** + +- Same as Incoming queries + +## Configuration + +Edit the `python.d/bind_rndc.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/bind_rndc.conf +``` + +Sample: + +```yaml +local: + named_stats_path : '/var/log/bind/named.stats' +``` + +If no configuration is given, module will attempt to read named.stats file at `/var/log/bind/named.stats` + +--- + + diff --git a/collectors/python.d.plugin/bind_rndc/bind_rndc.chart.py b/collectors/python.d.plugin/bind_rndc/bind_rndc.chart.py new file mode 100644 index 0000000..9d6c9fe --- /dev/null +++ b/collectors/python.d.plugin/bind_rndc/bind_rndc.chart.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +# Description: bind rndc netdata python.d module +# Author: ilyam8 +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +from collections import defaultdict +from subprocess import Popen + +from bases.FrameworkServices.SimpleService import SimpleService +from bases.collection import find_binary + +update_every = 30 + +ORDER = [ + 'name_server_statistics', + 'incoming_queries', + 'outgoing_queries', + 'named_stats_size', +] + +CHARTS = { + 'name_server_statistics': { + 'options': [None, 'Name Server Statistics', 'stats', 'name server statistics', + 'bind_rndc.name_server_statistics', 'line'], + 'lines': [ + ['nms_requests', 'requests', 'incremental'], + ['nms_rejected_queries', 'rejected_queries', 'incremental'], + ['nms_success', 'success', 'incremental'], + ['nms_failure', 'failure', 'incremental'], + ['nms_responses', 'responses', 'incremental'], + ['nms_duplicate', 'duplicate', 'incremental'], + ['nms_recursion', 'recursion', 'incremental'], + ['nms_nxrrset', 'nxrrset', 'incremental'], + ['nms_nxdomain', 'nxdomain', 'incremental'], + ['nms_non_auth_answer', 'non_auth_answer', 'incremental'], + ['nms_auth_answer', 'auth_answer', 'incremental'], + ['nms_dropped_queries', 'dropped_queries', 'incremental'], + ]}, + 'incoming_queries': { + 'options': [None, 'Incoming Queries', 'queries', 'incoming queries', 'bind_rndc.incoming_queries', 'line'], + 'lines': [ + ]}, + 'outgoing_queries': { + 'options': [None, 'Outgoing Queries', 'queries', 'outgoing queries', 'bind_rndc.outgoing_queries', 'line'], + 'lines': [ + ]}, + 'named_stats_size': { + 'options': [None, 'Named Stats File Size', 'MiB', 'file size', 'bind_rndc.stats_size', 'line'], + 'lines': [ + ['stats_size', None, 'absolute', 1, 1 << 20] + ] + } +} + +NMS = { + 'nms_requests': [ + 'IPv4 requests received', + 'IPv6 requests received', + 'TCP requests received', + 'requests with EDNS(0) receive' + ], + 'nms_responses': [ + 'responses sent', + 'truncated responses sent', + 'responses with EDNS(0) sent', + 'requests with unsupported EDNS version received' + ], + 'nms_failure': [ + 'other query failures', + 'queries resulted in SERVFAIL' + ], + 'nms_auth_answer': ['queries resulted in authoritative answer'], + 'nms_non_auth_answer': ['queries resulted in non authoritative answer'], + 'nms_nxrrset': ['queries resulted in nxrrset'], + 'nms_success': ['queries resulted in successful answer'], + 'nms_nxdomain': ['queries resulted in NXDOMAIN'], + 'nms_recursion': ['queries caused recursion'], + 'nms_duplicate': ['duplicate queries received'], + 'nms_rejected_queries': [ + 'auth queries rejected', + 'recursive queries rejected' + ], + 'nms_dropped_queries': ['queries dropped'] +} + +STATS = ['Name Server Statistics', 'Incoming Queries', 'Outgoing Queries'] + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.named_stats_path = self.configuration.get('named_stats_path', '/var/log/bind/named.stats') + self.rndc = find_binary('rndc') + self.data = dict( + nms_requests=0, + nms_responses=0, + nms_failure=0, + nms_auth=0, + nms_non_auth=0, + nms_nxrrset=0, + nms_success=0, + nms_nxdomain=0, + nms_recursion=0, + nms_duplicate=0, + nms_rejected_queries=0, + nms_dropped_queries=0, + ) + + def check(self): + if not self.rndc: + self.error('Can\'t locate "rndc" binary or binary is not executable by netdata') + return False + + if not (os.path.isfile(self.named_stats_path) and os.access(self.named_stats_path, os.R_OK)): + self.error('Cannot access file %s' % self.named_stats_path) + return False + + run_rndc = Popen([self.rndc, 'stats'], shell=False) + run_rndc.wait() + + if not run_rndc.returncode: + return True + self.error('Not enough permissions to run "%s stats"' % self.rndc) + return False + + def _get_raw_data(self): + """ + Run 'rndc stats' and read last dump from named.stats + :return: dict + """ + result = dict() + try: + current_size = os.path.getsize(self.named_stats_path) + run_rndc = Popen([self.rndc, 'stats'], shell=False) + run_rndc.wait() + + if run_rndc.returncode: + return None + with open(self.named_stats_path) as named_stats: + named_stats.seek(current_size) + result['stats'] = named_stats.readlines() + result['size'] = current_size + return result + except (OSError, IOError): + return None + + def _get_data(self): + """ + Parse data from _get_raw_data() + :return: dict + """ + + raw_data = self._get_raw_data() + + if raw_data is None: + return None + parsed = dict() + for stat in STATS: + parsed[stat] = parse_stats(field=stat, + named_stats=raw_data['stats']) + + self.data.update(nms_mapper(data=parsed['Name Server Statistics'])) + + for elem in zip(['Incoming Queries', 'Outgoing Queries'], ['incoming_queries', 'outgoing_queries']): + parsed_key, chart_name = elem[0], elem[1] + for dimension_id, value in queries_mapper(data=parsed[parsed_key], + add=chart_name[:9]).items(): + + if dimension_id not in self.data: + dimension = dimension_id.replace(chart_name[:9], '') + if dimension_id not in self.charts[chart_name]: + self.charts[chart_name].add_dimension([dimension_id, dimension, 'incremental']) + + self.data[dimension_id] = value + + self.data['stats_size'] = raw_data['size'] + return self.data + + +def parse_stats(field, named_stats): + """ + :param field: str: + :param named_stats: list: + :return: dict + + Example: + filed: 'Incoming Queries' + names_stats (list of lines): + ++ Incoming Requests ++ + 1405660 QUERY + 3 NOTIFY + ++ Incoming Queries ++ + 1214961 A + 75 NS + 2 CNAME + 2897 SOA + 35544 PTR + 14 MX + 5822 TXT + 145974 AAAA + 371 SRV + ++ Outgoing Queries ++ + ... + + result: + {'A', 1214961, 'NS': 75, 'CNAME': 2, 'SOA': 2897, ...} + """ + data = dict() + ns = iter(named_stats) + for line in ns: + if field not in line: + continue + while True: + try: + line = next(ns) + except StopIteration: + break + if '++' not in line: + if '[' in line: + continue + v, k = line.strip().split(' ', 1) + if k not in data: + data[k] = 0 + data[k] += int(v) + continue + break + break + return data + + +def nms_mapper(data): + """ + :param data: dict + :return: dict(defaultdict) + """ + result = defaultdict(int) + for k, v in NMS.items(): + for elem in v: + result[k] += data.get(elem, 0) + return result + + +def queries_mapper(data, add): + """ + :param data: dict + :param add: str + :return: dict + """ + return dict([(add + k, v) for k, v in data.items()]) diff --git a/collectors/python.d.plugin/bind_rndc/bind_rndc.conf b/collectors/python.d.plugin/bind_rndc/bind_rndc.conf new file mode 100644 index 0000000..3b7e9a2 --- /dev/null +++ b/collectors/python.d.plugin/bind_rndc/bind_rndc.conf @@ -0,0 +1,110 @@ +# netdata python.d.plugin configuration for bind_rndc +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, bind_rndc also supports the following: +# +# named_stats_path: 'path to named.stats' # Default: '/var/log/bind/named.stats' +#------------------------------------------------------------------------------------------------------------------ +# IMPORTANT Information +# +# BIND APPEND logs at EVERY RUN. Its NOT RECOMMENDED to set update_every below 30 sec. +# STRONGLY RECOMMENDED to create a bind-rndc conf file for logrotate +# +# To set up your BIND to dump stats do the following: +# +# 1. add to 'named.conf.options' options {}: +# statistics-file "/var/log/bind/named.stats"; +# +# 2. Create bind/ directory in /var/log +# cd /var/log/ && mkdir bind +# +# 3. Change owner of directory to 'bind' user +# chown bind bind/ +# +# 4. RELOAD (NOT restart) BIND +# systemctl reload bind9.service +# +# 5. Run as a root 'rndc stats' to dump (BIND will create named.stats in new directory) +# +# +# To ALLOW NETDATA TO RUN 'rndc stats' change '/etc/bind/rndc.key' group to netdata +# chown :netdata rndc.key +# +# The last BUT NOT least is to create bind-rndc.conf in logrotate.d/ +# The working one +# /var/log/bind/named.stats { +# +# daily +# rotate 4 +# compress +# delaycompress +# create 0644 bind bind +# missingok +# postrotate +# rndc reload > /dev/null +# endscript +# } +# +# To test your logrotate conf file run as root: +# +# logrotate /etc/logrotate.d/bind-rndc -d (debug dry-run mode) +# +# ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/boinc/Makefile.inc b/collectors/python.d.plugin/boinc/Makefile.inc new file mode 100644 index 0000000..319e19c --- /dev/null +++ b/collectors/python.d.plugin/boinc/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += boinc/boinc.chart.py +dist_pythonconfig_DATA += boinc/boinc.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += boinc/README.md boinc/Makefile.inc + diff --git a/collectors/python.d.plugin/boinc/README.md b/collectors/python.d.plugin/boinc/README.md new file mode 100644 index 0000000..4da2d52 --- /dev/null +++ b/collectors/python.d.plugin/boinc/README.md @@ -0,0 +1,41 @@ + + +# BOINC monitoring with Netdata + +Monitors task counts for the Berkeley Open Infrastructure Networking Computing (BOINC) distributed computing client using the same RPC interface that the BOINC monitoring GUI does. + +It provides charts tracking the total number of tasks and active tasks, as well as ones tracking each of the possible states for tasks. + +## Configuration + +Edit the `python.d/boinc.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/boinc.conf +``` + +BOINC requires use of a password to access it's RPC interface. You can +find this password in the `gui_rpc_auth.cfg` file in your BOINC directory. + +By default, the module will try to auto-detect the password by looking +in `/var/lib/boinc` for this file (this is the location most Linux +distributions use for a system-wide BOINC installation), so things may +just work without needing configuration for the local system. + +You can monitor remote systems as well: + +```yaml +remote: + hostname: some-host + password: some-password +``` + +--- + + diff --git a/collectors/python.d.plugin/boinc/boinc.chart.py b/collectors/python.d.plugin/boinc/boinc.chart.py new file mode 100644 index 0000000..a31eda1 --- /dev/null +++ b/collectors/python.d.plugin/boinc/boinc.chart.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# Description: BOINC netdata python.d module +# Author: Austin S. Hemmelgarn (Ferroin) +# SPDX-License-Identifier: GPL-3.0-or-later + +import socket + +from bases.FrameworkServices.SimpleService import SimpleService +from third_party import boinc_client + +ORDER = [ + 'tasks', + 'states', + 'sched_states', + 'process_states', +] + +CHARTS = { + 'tasks': { + 'options': [None, 'Overall Tasks', 'tasks', 'boinc', 'boinc.tasks', 'line'], + 'lines': [ + ['total', 'Total', 'absolute', 1, 1], + ['active', 'Active', 'absolute', 1, 1] + ] + }, + 'states': { + 'options': [None, 'Tasks per State', 'tasks', 'boinc', 'boinc.states', 'line'], + 'lines': [ + ['new', 'New', 'absolute', 1, 1], + ['downloading', 'Downloading', 'absolute', 1, 1], + ['downloaded', 'Ready to Run', 'absolute', 1, 1], + ['comperror', 'Compute Errors', 'absolute', 1, 1], + ['uploading', 'Uploading', 'absolute', 1, 1], + ['uploaded', 'Uploaded', 'absolute', 1, 1], + ['aborted', 'Aborted', 'absolute', 1, 1], + ['upload_failed', 'Failed Uploads', 'absolute', 1, 1] + ] + }, + 'sched_states': { + 'options': [None, 'Tasks per Scheduler State', 'tasks', 'boinc', 'boinc.sched', 'line'], + 'lines': [ + ['uninit_sched', 'Uninitialized', 'absolute', 1, 1], + ['preempted', 'Preempted', 'absolute', 1, 1], + ['scheduled', 'Scheduled', 'absolute', 1, 1] + ] + }, + 'process_states': { + 'options': [None, 'Tasks per Process State', 'tasks', 'boinc', 'boinc.process', 'line'], + 'lines': [ + ['uninit_proc', 'Uninitialized', 'absolute', 1, 1], + ['executing', 'Executing', 'absolute', 1, 1], + ['suspended', 'Suspended', 'absolute', 1, 1], + ['aborting', 'Aborted', 'absolute', 1, 1], + ['quit', 'Quit', 'absolute', 1, 1], + ['copy_pending', 'Copy Pending', 'absolute', 1, 1] + ] + } +} + +# A simple template used for pre-loading the return dictionary to make +# the _get_data() method simpler. +_DATA_TEMPLATE = { + 'total': 0, + 'active': 0, + 'new': 0, + 'downloading': 0, + 'downloaded': 0, + 'comperror': 0, + 'uploading': 0, + 'uploaded': 0, + 'aborted': 0, + 'upload_failed': 0, + 'uninit_sched': 0, + 'preempted': 0, + 'scheduled': 0, + 'uninit_proc': 0, + 'executing': 0, + 'suspended': 0, + 'aborting': 0, + 'quit': 0, + 'copy_pending': 0 +} + +# Map task states to dimensions +_TASK_MAP = { + boinc_client.ResultState.NEW: 'new', + boinc_client.ResultState.FILES_DOWNLOADING: 'downloading', + boinc_client.ResultState.FILES_DOWNLOADED: 'downloaded', + boinc_client.ResultState.COMPUTE_ERROR: 'comperror', + boinc_client.ResultState.FILES_UPLOADING: 'uploading', + boinc_client.ResultState.FILES_UPLOADED: 'uploaded', + boinc_client.ResultState.ABORTED: 'aborted', + boinc_client.ResultState.UPLOAD_FAILED: 'upload_failed' +} + +# Map scheduler states to dimensions +_SCHED_MAP = { + boinc_client.CpuSched.UNINITIALIZED: 'uninit_sched', + boinc_client.CpuSched.PREEMPTED: 'preempted', + boinc_client.CpuSched.SCHEDULED: 'scheduled', +} + +# Maps process states to dimensions +_PROC_MAP = { + boinc_client.Process.UNINITIALIZED: 'uninit_proc', + boinc_client.Process.EXECUTING: 'executing', + boinc_client.Process.SUSPENDED: 'suspended', + boinc_client.Process.ABORT_PENDING: 'aborted', + boinc_client.Process.QUIT_PENDING: 'quit', + boinc_client.Process.COPY_PENDING: 'copy_pending' +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.host = self.configuration.get('host', 'localhost') + self.port = self.configuration.get('port', 0) + self.password = self.configuration.get('password', '') + self.client = boinc_client.BoincClient(host=self.host, port=self.port, passwd=self.password) + self.alive = False + + def check(self): + return self.connect() + + def connect(self): + self.client.connect() + self.alive = self.client.connected and self.client.authorized + return self.alive + + def reconnect(self): + # The client class itself actually disconnects existing + # connections when it is told to connect, so we don't need to + # explicitly disconnect when we're just trying to reconnect. + return self.connect() + + def is_alive(self): + if not self.alive: + return self.reconnect() + return True + + def _get_data(self): + if not self.is_alive(): + return None + + data = dict(_DATA_TEMPLATE) + + try: + results = self.client.get_tasks() + except socket.error: + self.error('Connection is dead') + self.alive = False + return None + + for task in results: + data['total'] += 1 + data[_TASK_MAP[task.state]] += 1 + try: + if task.active_task: + data['active'] += 1 + data[_SCHED_MAP[task.scheduler_state]] += 1 + data[_PROC_MAP[task.active_task_state]] += 1 + except AttributeError: + pass + + return data or None diff --git a/collectors/python.d.plugin/boinc/boinc.conf b/collectors/python.d.plugin/boinc/boinc.conf new file mode 100644 index 0000000..16edf55 --- /dev/null +++ b/collectors/python.d.plugin/boinc/boinc.conf @@ -0,0 +1,66 @@ +# netdata python.d.plugin configuration for boinc +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, boinc also supports the following: +# +# hostname: localhost # The host running the BOINC client +# port: 31416 # The remote GUI RPC port for BOINC +# password: '' # The remote GUI RPC password diff --git a/collectors/python.d.plugin/ceph/Makefile.inc b/collectors/python.d.plugin/ceph/Makefile.inc new file mode 100644 index 0000000..15b039e --- /dev/null +++ b/collectors/python.d.plugin/ceph/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += ceph/ceph.chart.py +dist_pythonconfig_DATA += ceph/ceph.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += ceph/README.md ceph/Makefile.inc + diff --git a/collectors/python.d.plugin/ceph/README.md b/collectors/python.d.plugin/ceph/README.md new file mode 100644 index 0000000..b75ba6d --- /dev/null +++ b/collectors/python.d.plugin/ceph/README.md @@ -0,0 +1,48 @@ + + +# CEPH monitoring with Netdata + +Monitors the ceph cluster usage and consumption data of a server, and produces: + +- Cluster statistics (usage, available, latency, objects, read/write rate) +- OSD usage +- OSD latency +- Pool usage +- Pool read/write operations +- Pool read/write rate +- number of objects per pool + +## Requirements + +- `rados` python module +- Granting read permissions to ceph group from keyring file + +```shell +# chmod 640 /etc/ceph/ceph.client.admin.keyring +``` + +## Configuration + +Edit the `python.d/ceph.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/ceph.conf +``` + +Sample: + +```yaml +local: + config_file: '/etc/ceph/ceph.conf' + keyring_file: '/etc/ceph/ceph.client.admin.keyring' +``` + +--- + + diff --git a/collectors/python.d.plugin/ceph/ceph.chart.py b/collectors/python.d.plugin/ceph/ceph.chart.py new file mode 100644 index 0000000..494eef4 --- /dev/null +++ b/collectors/python.d.plugin/ceph/ceph.chart.py @@ -0,0 +1,374 @@ +# -*- coding: utf-8 -*- +# Description: ceph netdata python.d module +# Author: Luis Eduardo (lets00) +# SPDX-License-Identifier: GPL-3.0-or-later + +try: + import rados + + CEPH = True +except ImportError: + CEPH = False + +import json +import os + +from bases.FrameworkServices.SimpleService import SimpleService + +# default module values (can be overridden per job in `config`) +update_every = 10 + +ORDER = [ + 'general_usage', + 'general_objects', + 'general_bytes', + 'general_operations', + 'general_latency', + 'pool_usage', + 'pool_objects', + 'pool_read_bytes', + 'pool_write_bytes', + 'pool_read_operations', + 'pool_write_operations', + 'osd_usage', + 'osd_size', + 'osd_apply_latency', + 'osd_commit_latency' +] + +CHARTS = { + 'general_usage': { + 'options': [None, 'Ceph General Space', 'KiB', 'general', 'ceph.general_usage', 'stacked'], + 'lines': [ + ['general_available', 'avail', 'absolute'], + ['general_usage', 'used', 'absolute'] + ] + }, + 'general_objects': { + 'options': [None, 'Ceph General Objects', 'objects', 'general', 'ceph.general_objects', 'area'], + 'lines': [ + ['general_objects', 'cluster', 'absolute'] + ] + }, + 'general_bytes': { + 'options': [None, 'Ceph General Read/Write Data/s', 'KiB/s', 'general', 'ceph.general_bytes', + 'area'], + 'lines': [ + ['general_read_bytes', 'read', 'absolute', 1, 1024], + ['general_write_bytes', 'write', 'absolute', -1, 1024] + ] + }, + 'general_operations': { + 'options': [None, 'Ceph General Read/Write Operations/s', 'operations', 'general', 'ceph.general_operations', + 'area'], + 'lines': [ + ['general_read_operations', 'read', 'absolute', 1], + ['general_write_operations', 'write', 'absolute', -1] + ] + }, + 'general_latency': { + 'options': [None, 'Ceph General Apply/Commit latency', 'milliseconds', 'general', 'ceph.general_latency', + 'area'], + 'lines': [ + ['general_apply_latency', 'apply', 'absolute'], + ['general_commit_latency', 'commit', 'absolute'] + ] + }, + 'pool_usage': { + 'options': [None, 'Ceph Pools', 'KiB', 'pool', 'ceph.pool_usage', 'line'], + 'lines': [] + }, + 'pool_objects': { + 'options': [None, 'Ceph Pools', 'objects', 'pool', 'ceph.pool_objects', 'line'], + 'lines': [] + }, + 'pool_read_bytes': { + 'options': [None, 'Ceph Read Pool Data/s', 'KiB/s', 'pool', 'ceph.pool_read_bytes', 'area'], + 'lines': [] + }, + 'pool_write_bytes': { + 'options': [None, 'Ceph Write Pool Data/s', 'KiB/s', 'pool', 'ceph.pool_write_bytes', 'area'], + 'lines': [] + }, + 'pool_read_operations': { + 'options': [None, 'Ceph Read Pool Operations/s', 'operations', 'pool', 'ceph.pool_read_operations', 'area'], + 'lines': [] + }, + 'pool_write_operations': { + 'options': [None, 'Ceph Write Pool Operations/s', 'operations', 'pool', 'ceph.pool_write_operations', 'area'], + 'lines': [] + }, + 'osd_usage': { + 'options': [None, 'Ceph OSDs', 'KiB', 'osd', 'ceph.osd_usage', 'line'], + 'lines': [] + }, + 'osd_size': { + 'options': [None, 'Ceph OSDs size', 'KiB', 'osd', 'ceph.osd_size', 'line'], + 'lines': [] + }, + 'osd_apply_latency': { + 'options': [None, 'Ceph OSDs apply latency', 'milliseconds', 'osd', 'ceph.apply_latency', 'line'], + 'lines': [] + }, + 'osd_commit_latency': { + 'options': [None, 'Ceph OSDs commit latency', 'milliseconds', 'osd', 'ceph.commit_latency', 'line'], + 'lines': [] + } + +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.config_file = self.configuration.get('config_file') + self.keyring_file = self.configuration.get('keyring_file') + self.rados_id = self.configuration.get('rados_id', 'admin') + + def check(self): + """ + Checks module + :return: + """ + if not CEPH: + self.error('rados module is needed to use ceph.chart.py') + return False + if not (self.config_file and self.keyring_file): + self.error('config_file and/or keyring_file is not defined') + return False + + # Verify files and permissions + if not (os.access(self.config_file, os.F_OK)): + self.error('{0} does not exist'.format(self.config_file)) + return False + if not (os.access(self.keyring_file, os.F_OK)): + self.error('{0} does not exist'.format(self.keyring_file)) + return False + if not (os.access(self.config_file, os.R_OK)): + self.error('Ceph plugin does not read {0}, define read permission.'.format(self.config_file)) + return False + if not (os.access(self.keyring_file, os.R_OK)): + self.error('Ceph plugin does not read {0}, define read permission.'.format(self.keyring_file)) + return False + try: + self.cluster = rados.Rados(conffile=self.config_file, + conf=dict(keyring=self.keyring_file), + rados_id=self.rados_id) + self.cluster.connect() + except rados.Error as error: + self.error(error) + return False + self.create_definitions() + return True + + def create_definitions(self): + """ + Create dynamically charts options + :return: None + """ + # Pool lines + for pool in sorted(self._get_df()['pools'], key=lambda x: sorted(x.keys())): + self.definitions['pool_usage']['lines'].append([pool['name'], + pool['name'], + 'absolute']) + self.definitions['pool_objects']['lines'].append(["obj_{0}".format(pool['name']), + pool['name'], + 'absolute']) + self.definitions['pool_read_bytes']['lines'].append(['read_{0}'.format(pool['name']), + pool['name'], + 'absolute', 1, 1024]) + self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']), + pool['name'], + 'absolute', 1, 1024]) + self.definitions['pool_read_operations']['lines'].append(['read_operations_{0}'.format(pool['name']), + pool['name'], + 'absolute']) + self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']), + pool['name'], + 'absolute']) + + # OSD lines + for osd in sorted(self._get_osd_df()['nodes'], key=lambda x: sorted(x.keys())): + self.definitions['osd_usage']['lines'].append([osd['name'], + osd['name'], + 'absolute']) + self.definitions['osd_size']['lines'].append(['size_{0}'.format(osd['name']), + osd['name'], + 'absolute']) + self.definitions['osd_apply_latency']['lines'].append(['apply_latency_{0}'.format(osd['name']), + osd['name'], + 'absolute']) + self.definitions['osd_commit_latency']['lines'].append(['commit_latency_{0}'.format(osd['name']), + osd['name'], + 'absolute']) + + def get_data(self): + """ + Catch all ceph data + :return: dict + """ + try: + data = {} + df = self._get_df() + osd_df = self._get_osd_df() + osd_perf = self._get_osd_perf() + osd_perf_infos = get_osd_perf_infos(osd_perf) + pool_stats = self._get_osd_pool_stats() + + data.update(self._get_general(osd_perf_infos, pool_stats)) + for pool in df['pools']: + data.update(self._get_pool_usage(pool)) + data.update(self._get_pool_objects(pool)) + for pool_io in pool_stats: + data.update(self._get_pool_rw(pool_io)) + for osd in osd_df['nodes']: + data.update(self._get_osd_usage(osd)) + data.update(self._get_osd_size(osd)) + for osd_apply_commit in osd_perf_infos: + data.update(self._get_osd_latency(osd_apply_commit)) + return data + except (ValueError, AttributeError) as error: + self.error(error) + return None + + def _get_general(self, osd_perf_infos, pool_stats): + """ + Get ceph's general usage + :return: dict + """ + status = self.cluster.get_cluster_stats() + read_bytes_sec = 0 + write_bytes_sec = 0 + read_op_per_sec = 0 + write_op_per_sec = 0 + apply_latency = 0 + commit_latency = 0 + + for pool_rw_io_b in pool_stats: + read_bytes_sec += pool_rw_io_b['client_io_rate'].get('read_bytes_sec', 0) + write_bytes_sec += pool_rw_io_b['client_io_rate'].get('write_bytes_sec', 0) + read_op_per_sec += pool_rw_io_b['client_io_rate'].get('read_op_per_sec', 0) + write_op_per_sec += pool_rw_io_b['client_io_rate'].get('write_op_per_sec', 0) + for perf in osd_perf_infos: + apply_latency += perf['perf_stats']['apply_latency_ms'] + commit_latency += perf['perf_stats']['commit_latency_ms'] + + return { + 'general_usage': int(status['kb_used']), + 'general_available': int(status['kb_avail']), + 'general_objects': int(status['num_objects']), + 'general_read_bytes': read_bytes_sec, + 'general_write_bytes': write_bytes_sec, + 'general_read_operations': read_op_per_sec, + 'general_write_operations': write_op_per_sec, + 'general_apply_latency': apply_latency, + 'general_commit_latency': commit_latency + } + + @staticmethod + def _get_pool_usage(pool): + """ + Process raw data into pool usage dict information + :return: A pool dict with pool name's key and usage bytes' value + """ + return {pool['name']: pool['stats']['kb_used']} + + @staticmethod + def _get_pool_objects(pool): + """ + Process raw data into pool usage dict information + :return: A pool dict with pool name's key and object numbers + """ + return {'obj_{0}'.format(pool['name']): pool['stats']['objects']} + + @staticmethod + def _get_pool_rw(pool): + """ + Get read/write kb and operations in a pool + :return: A pool dict with both read/write bytes and operations. + """ + return { + 'read_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('read_bytes_sec', 0)), + 'write_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('write_bytes_sec', 0)), + 'read_operations_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('read_op_per_sec', 0)), + 'write_operations_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('write_op_per_sec', 0)) + } + + @staticmethod + def _get_osd_usage(osd): + """ + Process raw data into osd dict information to get osd usage + :return: A osd dict with osd name's key and usage bytes' value + """ + return {osd['name']: float(osd['kb_used'])} + + @staticmethod + def _get_osd_size(osd): + """ + Process raw data into osd dict information to get osd size (kb) + :return: A osd dict with osd name's key and size bytes' value + """ + return {'size_{0}'.format(osd['name']): float(osd['kb'])} + + @staticmethod + def _get_osd_latency(osd): + """ + Get ceph osd apply and commit latency + :return: A osd dict with osd name's key with both apply and commit latency values + """ + return { + 'apply_latency_osd.{0}'.format(osd['id']): osd['perf_stats']['apply_latency_ms'], + 'commit_latency_osd.{0}'.format(osd['id']): osd['perf_stats']['commit_latency_ms'] + } + + def _get_df(self): + """ + Get ceph df output + :return: ceph df --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'df', + 'format': 'json' + }), '')[1].decode('utf-8')) + + def _get_osd_df(self): + """ + Get ceph osd df output + :return: ceph osd df --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'osd df', + 'format': 'json' + }), '')[1].decode('utf-8').replace('-nan', '"-nan"')) + + def _get_osd_perf(self): + """ + Get ceph osd performance + :return: ceph osd perf --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'osd perf', + 'format': 'json' + }), '')[1].decode('utf-8')) + + def _get_osd_pool_stats(self): + """ + Get ceph osd pool status. + This command is used to get information about both + read/write operation and bytes per second on each pool + :return: ceph osd pool stats --format json + """ + return json.loads(self.cluster.mon_command(json.dumps({ + 'prefix': 'osd pool stats', + 'format': 'json' + }), '')[1].decode('utf-8')) + + +def get_osd_perf_infos(osd_perf): + # https://github.com/netdata/netdata/issues/8247 + # module uses 'osd_perf_infos' data, its been moved under 'osdstats` since Ceph v14.2 + if 'osd_perf_infos' in osd_perf: + return osd_perf['osd_perf_infos'] + return osd_perf['osdstats']['osd_perf_infos'] diff --git a/collectors/python.d.plugin/ceph/ceph.conf b/collectors/python.d.plugin/ceph/ceph.conf new file mode 100644 index 0000000..81788e8 --- /dev/null +++ b/collectors/python.d.plugin/ceph/ceph.conf @@ -0,0 +1,75 @@ +# netdata python.d.plugin configuration for ceph stats +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 10 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 10 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, ceph plugin also supports the following: +# +# config_file: 'config_file' # Ceph config file. +# keyring_file: 'keyring_file' # Ceph keyring file. netdata user must be added into ceph group +# # and keyring file must be read group permission. +# rados_id: 'rados username' # ID used to connect to ceph cluster. Allows +# # creating a read only key for pulling data v.s. admin +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +config_file: '/etc/ceph/ceph.conf' +keyring_file: '/etc/ceph/ceph.client.admin.keyring' +rados_id: 'admin' diff --git a/collectors/python.d.plugin/changefinder/Makefile.inc b/collectors/python.d.plugin/changefinder/Makefile.inc new file mode 100644 index 0000000..01a9240 --- /dev/null +++ b/collectors/python.d.plugin/changefinder/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += changefinder/changefinder.chart.py +dist_pythonconfig_DATA += changefinder/changefinder.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += changefinder/README.md changefinder/Makefile.inc + diff --git a/collectors/python.d.plugin/changefinder/README.md b/collectors/python.d.plugin/changefinder/README.md new file mode 100644 index 0000000..7ec3a25 --- /dev/null +++ b/collectors/python.d.plugin/changefinder/README.md @@ -0,0 +1,217 @@ + + +# Online changepoint detection with Netdata + +This collector uses the Python [changefinder](https://github.com/shunsukeaihara/changefinder) library to +perform [online](https://en.wikipedia.org/wiki/Online_machine_learning) [changepoint detection](https://en.wikipedia.org/wiki/Change_detection) +on your Netdata charts and/or dimensions. + +Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a +changepoint score for each chart or dimension you configure it to work on. This is +an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step +to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap +to compute at each step of data collection (see the notes section below for more details) and it should scale fairly +well to work on lots of charts or hosts (if running on a parent node for example). + +> As this is a somewhat unique collector and involves often subjective concepts like changepoints and anomalies, we would love to hear any feedback on it from the community. Please let us know on the [community forum](https://community.netdata.cloud/t/changefinder-collector-feedback/972) or drop us a note at [analytics-ml-team@netdata.cloud](mailto:analytics-ml-team@netdata.cloud) for any and all feedback, both positive and negative. This sort of feedback is priceless to help us make complex features more useful. + +## Charts + +Two charts are available: + +### ChangeFinder Scores (`changefinder.scores`) + +This chart shows the percentile of the score that is output from the ChangeFinder library (it is turned off by default +but available with `show_scores: true`). + +A high observed score is more likely to be a valid changepoint worth exploring, even more so when multiple charts or +dimensions have high changepoint scores at the same time or very close together. + +### ChangeFinder Flags (`changefinder.flags`) + +This chart shows `1` or `0` if the latest score has a percentile value that exceeds the `cf_threshold` threshold. By +default, any scores that are in the 99th or above percentile will raise a flag on this chart. + +The raw changefinder score itself can be a little noisy and so limiting ourselves to just periods where it surpasses +the 99th percentile can help manage the "[signal to noise ratio](https://en.wikipedia.org/wiki/Signal-to-noise_ratio)" +better. + +The `cf_threshold` parameter might be one you want to play around with to tune things specifically for the workloads on +your node and the specific charts you want to monitor. For example, maybe the 95th percentile might work better for you +than the 99th percentile. + +Below is an example of the chart produced by this collector. The first 3/4 of the period looks normal in that we see a +few individual changes being picked up somewhat randomly over time. But then at around 14:59 towards the end of the +chart we see two periods with 'spikes' of multiple changes for a small period of time. This is the sort of pattern that +might be a sign something on the system that has changed sufficiently enough to merit some investigation. + +![changepoint-collector](https://user-images.githubusercontent.com/2178292/108773528-665de980-7556-11eb-895d-798669bcd695.png) + +## Requirements + +- This collector will only work with Python 3 and requires the packages below be installed. + +```bash +# become netdata user +sudo su -s /bin/bash netdata +# install required packages for the netdata user +pip3 install --user numpy==1.19.5 changefinder==0.03 scipy==1.5.4 +``` + +**Note**: if you need to tell Netdata to use Python 3 then you can pass the below command in the python plugin section +of your `netdata.conf` file. + +```yaml +[ plugin:python.d ] + # update every = 1 + command options = -ppython3 +``` + +## Configuration + +Install the Python requirements above, enable the collector and restart Netdata. + +```bash +cd /etc/netdata/ +sudo ./edit-config python.d.conf +# Set `changefinder: no` to `changefinder: yes` +sudo systemctl restart netdata +``` + +The configuration for the changefinder collector defines how it will behave on your system and might take some +experimentation with over time to set it optimally for your node. Out of the box, the config comes with +some [sane defaults](https://www.netdata.cloud/blog/redefining-monitoring-netdata/) to get you started that try to +balance the flexibility and power of the ML models with the goal of being as cheap as possible in term of cost on the +node resources. + +_**Note**: If you are unsure about any of the below configuration options then it's best to just ignore all this and +leave the `changefinder.conf` file alone to begin with. Then you can return to it later if you would like to tune things +a bit more once the collector is running for a while and you have a feeling for its performance on your node._ + +Edit the `python.d/changefinder.conf` configuration file using `edit-config` from the your +agent's [config directory](/docs/configure/nodes.md), which is usually at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/changefinder.conf +``` + +The default configuration should look something like this. Here you can see each parameter (with sane defaults) and some +information about each one and what it does. + +```yaml +# ---------------------------------------------------------------------- +# JOBS (data collection sources) + +# Pull data from local Netdata node. +local: + + # A friendly name for this job. + name: 'local' + + # What host to pull data from. + host: '127.0.0.1:19999' + + # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. + charts_regex: 'system\..*' + + # Charts to exclude, useful if you would like to exclude some specific charts. + # Note: should be a ',' separated string like 'chart.name,chart.name'. + charts_to_exclude: '' + + # Get ChangeFinder scores 'per_dim' or 'per_chart'. + mode: 'per_chart' + + # Default parameters that can be passed to the changefinder library. + cf_r: 0.5 + cf_order: 1 + cf_smooth: 15 + + # The percentile above which scores will be flagged. + cf_threshold: 99 + + # The number of recent scores to use when calculating the percentile of the changefinder score. + n_score_samples: 14400 + + # Set to true if you also want to chart the percentile scores in addition to the flags. + # Mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time. + show_scores: false +``` + +## Troubleshooting + +To see any relevant log messages you can use a command like below. + +```bash +grep 'changefinder' /var/log/netdata/error.log +``` + +If you would like to log in as `netdata` user and run the collector in debug mode to see more detail. + +```bash +# become netdata user +sudo su -s /bin/bash netdata +# run collector in debug using `nolock` option if netdata is already running the collector itself. +/usr/libexec/netdata/plugins.d/python.d.plugin changefinder debug trace nolock +``` + +## Notes + +- It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's + typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly + this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw + score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have + already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then + should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning + approaches which need some initial window of time before they can be useful. +- As this collector does most of the work in Python itself, you may want to try it out first on a test or development + system to get a sense of its performance characteristics on a node similar to where you would like to use it. +- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the + typical performance characteristics we saw from running this collector (with defaults) were: + - A runtime (`netdata.runtime_changefinder`) of ~30ms. + - Typically ~1% additional cpu usage. + - About ~85mb of ram (`apps.mem`) being continually used by the `python.d.plugin` under default configuration. + +## Useful links and further reading + +- [PyPi changefinder](https://pypi.org/project/changefinder/) reference page. +- [GitHub repo](https://github.com/shunsukeaihara/changefinder) for the changefinder library. +- Relevant academic papers: + - Yamanishi K, Takeuchi J. A unifying framework for detecting outliers and change points from nonstationary time + series data. 8th ACM SIGKDD international conference on Knowledge discovery and data mining - KDD02. 2002: + 676. ([pdf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.12.3469&rep=rep1&type=pdf)) + - Kawahara Y, Sugiyama M. Sequential Change-Point Detection Based on Direct Density-Ratio Estimation. SIAM + International Conference on Data Mining. 2009: + 389–400. ([pdf](https://onlinelibrary.wiley.com/doi/epdf/10.1002/sam.10124)) + - Liu S, Yamada M, Collier N, Sugiyama M. Change-point detection in time-series data by relative density-ratio + estimation. Neural Networks. Jul.2013 43:72–83. [PubMed: 23500502] ([pdf](https://arxiv.org/pdf/1203.0453.pdf)) + - T. Iwata, K. Nakamura, Y. Tokusashi, and H. Matsutani, “Accelerating Online Change-Point Detection Algorithm using + 10 GbE FPGA NIC,” Proc. International European Conference on Parallel and Distributed Computing (Euro-Par’18) + Workshops, vol.11339, pp.506–517, Aug. + 2018 ([pdf](https://www.arc.ics.keio.ac.jp/~matutani/papers/iwata_heteropar2018.pdf)) +- The [ruptures](https://github.com/deepcharles/ruptures) python package is also a good place to learn more about + changepoint detection (mostly offline as opposed to online but deals with similar concepts). +- A nice [blog post](https://techrando.com/2019/08/14/a-brief-introduction-to-change-point-detection-using-python/) + showing some of the other options and libraries for changepoint detection in Python. +- [Bayesian changepoint detection](https://github.com/hildensia/bayesian_changepoint_detection) library - we may explore + implementing a collector for this or integrating this approach into this collector at a future date if there is + interest and it proves computationaly feasible. +- You might also find the + Netdata [anomalies collector](https://github.com/netdata/netdata/tree/master/collectors/python.d.plugin/anomalies) + interesting. +- [Anomaly Detection](https://en.wikipedia.org/wiki/Anomaly_detection) wikipedia page. +- [Anomaly Detection YouTube playlist](https://www.youtube.com/playlist?list=PL6Zhl9mK2r0KxA6rB87oi4kWzoqGd5vp0) + maintained by [andrewm4894](https://github.com/andrewm4894/) from Netdata. +- [awesome-TS-anomaly-detection](https://github.com/rob-med/awesome-TS-anomaly-detection) Github list of useful tools, + libraries and resources. +- [Mendeley public group](https://www.mendeley.com/community/interesting-anomaly-detection-papers/) with some + interesting anomaly detection papers we have been reading. +- Good [blog post](https://www.anodot.com/blog/what-is-anomaly-detection/) from Anodot on time series anomaly detection. + Anodot also have some great whitepapers in this space too that some may find useful. +- Novelty and outlier detection in + the [scikit-learn documentation](https://scikit-learn.org/stable/modules/outlier_detection.html). + diff --git a/collectors/python.d.plugin/changefinder/changefinder.chart.py b/collectors/python.d.plugin/changefinder/changefinder.chart.py new file mode 100644 index 0000000..c18e560 --- /dev/null +++ b/collectors/python.d.plugin/changefinder/changefinder.chart.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +# Description: changefinder netdata python.d module +# Author: andrewm4894 +# SPDX-License-Identifier: GPL-3.0-or-later + +from json import loads +import re + +from bases.FrameworkServices.UrlService import UrlService + +import numpy as np +import changefinder +from scipy.stats import percentileofscore + +update_every = 5 +disabled_by_default = True + +ORDER = [ + 'scores', + 'flags' +] + +CHARTS = { + 'scores': { + 'options': [None, 'ChangeFinder', 'score', 'Scores', 'scores', 'line'], + 'lines': [] + }, + 'flags': { + 'options': [None, 'ChangeFinder', 'flag', 'Flags', 'flags', 'stacked'], + 'lines': [] + } +} + +DEFAULT_PROTOCOL = 'http' +DEFAULT_HOST = '127.0.0.1:19999' +DEFAULT_CHARTS_REGEX = 'system.*' +DEFAULT_MODE = 'per_chart' +DEFAULT_CF_R = 0.5 +DEFAULT_CF_ORDER = 1 +DEFAULT_CF_SMOOTH = 15 +DEFAULT_CF_DIFF = False +DEFAULT_CF_THRESHOLD = 99 +DEFAULT_N_SCORE_SAMPLES = 14400 +DEFAULT_SHOW_SCORES = False + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.protocol = self.configuration.get('protocol', DEFAULT_PROTOCOL) + self.host = self.configuration.get('host', DEFAULT_HOST) + self.url = '{}://{}/api/v1/allmetrics?format=json'.format(self.protocol, self.host) + self.charts_regex = re.compile(self.configuration.get('charts_regex', DEFAULT_CHARTS_REGEX)) + self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',') + self.mode = self.configuration.get('mode', DEFAULT_MODE) + self.n_score_samples = int(self.configuration.get('n_score_samples', DEFAULT_N_SCORE_SAMPLES)) + self.show_scores = int(self.configuration.get('show_scores', DEFAULT_SHOW_SCORES)) + self.cf_r = float(self.configuration.get('cf_r', DEFAULT_CF_R)) + self.cf_order = int(self.configuration.get('cf_order', DEFAULT_CF_ORDER)) + self.cf_smooth = int(self.configuration.get('cf_smooth', DEFAULT_CF_SMOOTH)) + self.cf_diff = bool(self.configuration.get('cf_diff', DEFAULT_CF_DIFF)) + self.cf_threshold = float(self.configuration.get('cf_threshold', DEFAULT_CF_THRESHOLD)) + self.collected_dims = {'scores': set(), 'flags': set()} + self.models = {} + self.x_latest = {} + self.scores_latest = {} + self.scores_samples = {} + + def get_score(self, x, model): + """Update the score for the model based on most recent data, flag if it's percentile passes self.cf_threshold. + """ + + # get score + if model not in self.models: + # initialise empty model if needed + self.models[model] = changefinder.ChangeFinder(r=self.cf_r, order=self.cf_order, smooth=self.cf_smooth) + # if the update for this step fails then just fallback to last known score + try: + score = self.models[model].update(x) + self.scores_latest[model] = score + except Exception as _: + score = self.scores_latest.get(model, 0) + score = 0 if np.isnan(score) else score + + # update sample scores used to calculate percentiles + if model in self.scores_samples: + self.scores_samples[model].append(score) + else: + self.scores_samples[model] = [score] + self.scores_samples[model] = self.scores_samples[model][-self.n_score_samples:] + + # convert score to percentile + score = percentileofscore(self.scores_samples[model], score) + + # flag based on score percentile + flag = 1 if score >= self.cf_threshold else 0 + + return score, flag + + def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1): + """If dimension not in chart then add it. + """ + if not self.charts: + return + + for dim in data: + if dim not in self.collected_dims[chart]: + self.collected_dims[chart].add(dim) + self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor]) + + for dim in list(self.collected_dims[chart]): + if dim not in data: + self.collected_dims[chart].remove(dim) + self.charts[chart].del_dimension(dim, hide=False) + + def diff(self, x, model): + """Take difference of data. + """ + x_diff = x - self.x_latest.get(model, 0) + self.x_latest[model] = x + x = x_diff + return x + + def _get_data(self): + + # pull data from self.url + raw_data = self._get_raw_data() + if raw_data is None: + return None + + raw_data = loads(raw_data) + + # filter to just the data for the charts specified + charts_in_scope = list(filter(self.charts_regex.match, raw_data.keys())) + charts_in_scope = [c for c in charts_in_scope if c not in self.charts_to_exclude] + + data_score = {} + data_flag = {} + + # process each chart + for chart in charts_in_scope: + + if self.mode == 'per_chart': + + # average dims on chart and run changefinder on that average + x = [raw_data[chart]['dimensions'][dim]['value'] for dim in raw_data[chart]['dimensions']] + x = [x for x in x if x is not None] + + if len(x) > 0: + + x = sum(x) / len(x) + x = self.diff(x, chart) if self.cf_diff else x + + score, flag = self.get_score(x, chart) + if self.show_scores: + data_score['{}_score'.format(chart)] = score * 100 + data_flag[chart] = flag + + else: + + # run changefinder on each individual dim + for dim in raw_data[chart]['dimensions']: + + chart_dim = '{}|{}'.format(chart, dim) + + x = raw_data[chart]['dimensions'][dim]['value'] + x = x if x else 0 + x = self.diff(x, chart_dim) if self.cf_diff else x + + score, flag = self.get_score(x, chart_dim) + if self.show_scores: + data_score['{}_score'.format(chart_dim)] = score * 100 + data_flag[chart_dim] = flag + + self.validate_charts('flags', data_flag) + + if self.show_scores & len(data_score) > 0: + data_score['average_score'] = sum(data_score.values()) / len(data_score) + self.validate_charts('scores', data_score, divisor=100) + + data = {**data_score, **data_flag} + + return data diff --git a/collectors/python.d.plugin/changefinder/changefinder.conf b/collectors/python.d.plugin/changefinder/changefinder.conf new file mode 100644 index 0000000..56a681f --- /dev/null +++ b/collectors/python.d.plugin/changefinder/changefinder.conf @@ -0,0 +1,74 @@ +# netdata python.d.plugin configuration for example +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) + +local: + + # A friendly name for this job. + name: 'local' + + # What host to pull data from. + host: '127.0.0.1:19999' + + # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. + charts_regex: 'system\..*' + + # Charts to exclude, useful if you would like to exclude some specific charts. + # Note: should be a ',' separated string like 'chart.name,chart.name'. + charts_to_exclude: '' + + # Get ChangeFinder scores 'per_dim' or 'per_chart'. + mode: 'per_chart' + + # Default parameters that can be passed to the changefinder library. + cf_r: 0.5 + cf_order: 1 + cf_smooth: 15 + + # The percentile above which scores will be flagged. + cf_threshold: 99 + + # The number of recent scores to use when calculating the percentile of the changefinder score. + n_score_samples: 14400 + + # Set to true if you also want to chart the percentile scores in addition to the flags. + # Mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time. + show_scores: false diff --git a/collectors/python.d.plugin/dockerd/Makefile.inc b/collectors/python.d.plugin/dockerd/Makefile.inc new file mode 100644 index 0000000..b100bc6 --- /dev/null +++ b/collectors/python.d.plugin/dockerd/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += dockerd/dockerd.chart.py +dist_pythonconfig_DATA += dockerd/dockerd.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += dockerd/README.md dockerd/Makefile.inc + diff --git a/collectors/python.d.plugin/dockerd/README.md b/collectors/python.d.plugin/dockerd/README.md new file mode 100644 index 0000000..6470a7c --- /dev/null +++ b/collectors/python.d.plugin/dockerd/README.md @@ -0,0 +1,46 @@ + + +# Docker Engine monitoring with Netdata + +Collects docker container health metrics. + +**Requirement:** + +- `docker` package, required version 3.2.0+ + +Following charts are drawn: + +1. **running containers** + + - count + +2. **healthy containers** + + - count + +3. **unhealthy containers** + + - count + +## Configuration + +Edit the `python.d/dockerd.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/dockerd.conf +``` + +```yaml + update_every : 1 + priority : 60000 +``` + +--- + + diff --git a/collectors/python.d.plugin/dockerd/dockerd.chart.py b/collectors/python.d.plugin/dockerd/dockerd.chart.py new file mode 100644 index 0000000..bd9640b --- /dev/null +++ b/collectors/python.d.plugin/dockerd/dockerd.chart.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# Description: docker netdata python.d module +# Author: Kévin Darcel (@tuxity) + +try: + import docker + + HAS_DOCKER = True +except ImportError: + HAS_DOCKER = False + +from distutils.version import StrictVersion + +from bases.FrameworkServices.SimpleService import SimpleService + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + 'running_containers', + 'healthy_containers', + 'unhealthy_containers' +] + +CHARTS = { + 'running_containers': { + 'options': [None, 'Number of running containers', 'containers', 'running containers', + 'docker.running_containers', 'line'], + 'lines': [ + ['running_containers', 'running'] + ] + }, + 'healthy_containers': { + 'options': [None, 'Number of healthy containers', 'containers', 'healthy containers', + 'docker.healthy_containers', 'line'], + 'lines': [ + ['healthy_containers', 'healthy'] + ] + }, + 'unhealthy_containers': { + 'options': [None, 'Number of unhealthy containers', 'containers', 'unhealthy containers', + 'docker.unhealthy_containers', 'line'], + 'lines': [ + ['unhealthy_containers', 'unhealthy'] + ] + } +} + +MIN_REQUIRED_VERSION = '3.2.0' + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.client = None + + def check(self): + if not HAS_DOCKER: + self.error("'docker' package is needed to use dockerd module") + return False + + if StrictVersion(docker.__version__) < StrictVersion(MIN_REQUIRED_VERSION): + self.error("installed 'docker' package version {0}, minimum required version {1}, please upgrade".format( + docker.__version__, + MIN_REQUIRED_VERSION, + )) + return False + + self.client = docker.DockerClient(base_url=self.configuration.get('url', 'unix://var/run/docker.sock')) + + try: + self.client.ping() + except docker.errors.APIError as error: + self.error(error) + return False + + return True + + def get_data(self): + data = dict() + + data['running_containers'] = len(self.client.containers.list(sparse=True)) + data['healthy_containers'] = len(self.client.containers.list(filters={'health': 'healthy'}, sparse=True)) + data['unhealthy_containers'] = len(self.client.containers.list(filters={'health': 'unhealthy'}, sparse=True)) + + return data or None diff --git a/collectors/python.d.plugin/dockerd/dockerd.conf b/collectors/python.d.plugin/dockerd/dockerd.conf new file mode 100644 index 0000000..96c8ee0 --- /dev/null +++ b/collectors/python.d.plugin/dockerd/dockerd.conf @@ -0,0 +1,77 @@ +# netdata python.d.plugin configuration for dockerd health data API +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, dockerd plugin also supports the following: +# +# url: '://:/' +# # http://localhost:8080/health +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +local: + url: 'unix://var/run/docker.sock' diff --git a/collectors/python.d.plugin/dovecot/Makefile.inc b/collectors/python.d.plugin/dovecot/Makefile.inc new file mode 100644 index 0000000..fd7d13b --- /dev/null +++ b/collectors/python.d.plugin/dovecot/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += dovecot/dovecot.chart.py +dist_pythonconfig_DATA += dovecot/dovecot.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += dovecot/README.md dovecot/Makefile.inc + diff --git a/collectors/python.d.plugin/dovecot/README.md b/collectors/python.d.plugin/dovecot/README.md new file mode 100644 index 0000000..e6bbf0d --- /dev/null +++ b/collectors/python.d.plugin/dovecot/README.md @@ -0,0 +1,105 @@ + + +# Dovecot monitoring with Netdata + +Provides statistics information from Dovecot server. + +Statistics are taken from dovecot socket by executing `EXPORT global` command. +More information about dovecot stats can be found on [project wiki page.](http://wiki2.dovecot.org/Statistics) + +Module isn't compatible with new statistic api (v2.3), but you are still able to use the module with Dovecot v2.3 +by following [upgrading steps.](https://wiki2.dovecot.org/Upgrading/2.3). + +**Requirement:** +Dovecot UNIX socket with R/W permissions for user `netdata` or Dovecot with configured TCP/IP socket. + +Module gives information with following charts: + +1. **sessions** + + - active sessions + +2. **logins** + + - logins + +3. **commands** - number of IMAP commands + + - commands + +4. **Faults** + + - minor + - major + +5. **Context Switches** + + - voluntary + - involuntary + +6. **disk** in bytes/s + + - read + - write + +7. **bytes** in bytes/s + + - read + - write + +8. **number of syscalls** in syscalls/s + + - read + - write + +9. **lookups** - number of lookups per second + + - path + - attr + +10. **hits** - number of cache hits + + - hits + +11. **attempts** - authorization attempts + + - success + - failure + +12. **cache** - cached authorization hits + + - hit + - miss + +## Configuration + +Edit the `python.d/dovecot.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/dovecot.conf +``` + +Sample: + +```yaml +localtcpip: + name : 'local' + host : '127.0.0.1' + port : 24242 + +localsocket: + name : 'local' + socket : '/var/run/dovecot/stats' +``` + +If no configuration is given, module will attempt to connect to dovecot using unix socket localized in `/var/run/dovecot/stats` + +--- + + diff --git a/collectors/python.d.plugin/dovecot/dovecot.chart.py b/collectors/python.d.plugin/dovecot/dovecot.chart.py new file mode 100644 index 0000000..dfaef28 --- /dev/null +++ b/collectors/python.d.plugin/dovecot/dovecot.chart.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# Description: dovecot netdata python.d module +# Author: Pawel Krupa (paulfantom) +# SPDX-License-Identifier: GPL-3.0-or-later + +from bases.FrameworkServices.SocketService import SocketService + +UNIX_SOCKET = '/var/run/dovecot/stats' + +ORDER = [ + 'sessions', + 'logins', + 'commands', + 'faults', + 'context_switches', + 'io', + 'net', + 'syscalls', + 'lookup', + 'cache', + 'auth', + 'auth_cache' +] + +CHARTS = { + 'sessions': { + 'options': [None, 'Dovecot Active Sessions', 'number', 'sessions', 'dovecot.sessions', 'line'], + 'lines': [ + ['num_connected_sessions', 'active sessions', 'absolute'] + ] + }, + 'logins': { + 'options': [None, 'Dovecot Logins', 'number', 'logins', 'dovecot.logins', 'line'], + 'lines': [ + ['num_logins', 'logins', 'absolute'] + ] + }, + 'commands': { + 'options': [None, 'Dovecot Commands', 'commands', 'commands', 'dovecot.commands', 'line'], + 'lines': [ + ['num_cmds', 'commands', 'absolute'] + ] + }, + 'faults': { + 'options': [None, 'Dovecot Page Faults', 'faults', 'page faults', 'dovecot.faults', 'line'], + 'lines': [ + ['min_faults', 'minor', 'absolute'], + ['maj_faults', 'major', 'absolute'] + ] + }, + 'context_switches': { + 'options': [None, 'Dovecot Context Switches', 'switches', 'context switches', 'dovecot.context_switches', + 'line'], + 'lines': [ + ['vol_cs', 'voluntary', 'absolute'], + ['invol_cs', 'involuntary', 'absolute'] + ] + }, + 'io': { + 'options': [None, 'Dovecot Disk I/O', 'KiB/s', 'disk', 'dovecot.io', 'area'], + 'lines': [ + ['disk_input', 'read', 'incremental', 1, 1024], + ['disk_output', 'write', 'incremental', -1, 1024] + ] + }, + 'net': { + 'options': [None, 'Dovecot Network Bandwidth', 'kilobits/s', 'network', 'dovecot.net', 'area'], + 'lines': [ + ['read_bytes', 'read', 'incremental', 8, 1000], + ['write_bytes', 'write', 'incremental', -8, 1000] + ] + }, + 'syscalls': { + 'options': [None, 'Dovecot Number of SysCalls', 'syscalls/s', 'system', 'dovecot.syscalls', 'line'], + 'lines': [ + ['read_count', 'read', 'incremental'], + ['write_count', 'write', 'incremental'] + ] + }, + 'lookup': { + 'options': [None, 'Dovecot Lookups', 'number/s', 'lookups', 'dovecot.lookup', 'stacked'], + 'lines': [ + ['mail_lookup_path', 'path', 'incremental'], + ['mail_lookup_attr', 'attr', 'incremental'] + ] + }, + 'cache': { + 'options': [None, 'Dovecot Cache Hits', 'hits/s', 'cache', 'dovecot.cache', 'line'], + 'lines': [ + ['mail_cache_hits', 'hits', 'incremental'] + ] + }, + 'auth': { + 'options': [None, 'Dovecot Authentications', 'attempts', 'logins', 'dovecot.auth', 'stacked'], + 'lines': [ + ['auth_successes', 'ok', 'absolute'], + ['auth_failures', 'failed', 'absolute'] + ] + }, + 'auth_cache': { + 'options': [None, 'Dovecot Authentication Cache', 'number', 'cache', 'dovecot.auth_cache', 'stacked'], + 'lines': [ + ['auth_cache_hits', 'hit', 'absolute'], + ['auth_cache_misses', 'miss', 'absolute'] + ] + } +} + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + SocketService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.host = None # localhost + self.port = None # 24242 + self.unix_socket = UNIX_SOCKET + self.request = 'EXPORT\tglobal\r\n' + + def _get_data(self): + """ + Format data received from socket + :return: dict + """ + try: + raw = self._get_raw_data() + except (ValueError, AttributeError): + return None + + if raw is None: + self.debug('dovecot returned no data') + return None + + data = raw.split('\n')[:2] + desc = data[0].split('\t') + vals = data[1].split('\t') + ret = dict() + for i, _ in enumerate(desc): + try: + ret[str(desc[i])] = int(vals[i]) + except ValueError: + continue + return ret or None diff --git a/collectors/python.d.plugin/dovecot/dovecot.conf b/collectors/python.d.plugin/dovecot/dovecot.conf new file mode 100644 index 0000000..451dbc9 --- /dev/null +++ b/collectors/python.d.plugin/dovecot/dovecot.conf @@ -0,0 +1,98 @@ +# netdata python.d.plugin configuration for dovecot +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, dovecot also supports the following: +# +# socket: 'path/to/dovecot/stats' +# +# or +# host: 'IP or HOSTNAME' # the host to connect to +# port: PORT # the port to connect to +# +# + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + host : 'localhost' + port : 24242 + +localipv4: + name : 'local' + host : '127.0.0.1' + port : 24242 + +localipv6: + name : 'local' + host : '::1' + port : 24242 + +localsocket: + name : 'local' + socket : '/var/run/dovecot/stats' + +localsocket_old: + name : 'local' + socket : '/var/run/dovecot/old-stats' + diff --git a/collectors/python.d.plugin/example/Makefile.inc b/collectors/python.d.plugin/example/Makefile.inc new file mode 100644 index 0000000..1b027d5 --- /dev/null +++ b/collectors/python.d.plugin/example/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += example/example.chart.py +dist_pythonconfig_DATA += example/example.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += example/README.md example/Makefile.inc + diff --git a/collectors/python.d.plugin/example/README.md b/collectors/python.d.plugin/example/README.md new file mode 100644 index 0000000..0b80aa9 --- /dev/null +++ b/collectors/python.d.plugin/example/README.md @@ -0,0 +1,14 @@ + + +# Example + +You can add custom data collectors using Python. + +Netdata provides an [example python data collection module](https://github.com/netdata/netdata/tree/master/collectors/python.d.plugin/example). + +If you want to write your own collector, read our [writing a new Python module](/collectors/python.d.plugin/README.md#how-to-write-a-new-module) tutorial. + + diff --git a/collectors/python.d.plugin/example/example.chart.py b/collectors/python.d.plugin/example/example.chart.py new file mode 100644 index 0000000..d6c0b66 --- /dev/null +++ b/collectors/python.d.plugin/example/example.chart.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Description: example netdata python.d module +# Author: Put your name here (your github login) +# SPDX-License-Identifier: GPL-3.0-or-later + +from random import SystemRandom + +from bases.FrameworkServices.SimpleService import SimpleService + +priority = 90000 + +ORDER = [ + 'random', +] + +CHARTS = { + 'random': { + 'options': [None, 'A random number', 'random number', 'random', 'random', 'line'], + 'lines': [ + ['random1'] + ] + } +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.random = SystemRandom() + self.num_lines = self.configuration.get('num_lines', 4) + self.lower = self.configuration.get('lower', 0) + self.upper = self.configuration.get('upper', 100) + + @staticmethod + def check(): + return True + + def get_data(self): + data = dict() + + for i in range(0, self.num_lines): + dimension_id = ''.join(['random', str(i)]) + + if dimension_id not in self.charts['random']: + self.charts['random'].add_dimension([dimension_id]) + + data[dimension_id] = self.random.randint(self.lower, self.upper) + + return data diff --git a/collectors/python.d.plugin/example/example.conf b/collectors/python.d.plugin/example/example.conf new file mode 100644 index 0000000..31261b8 --- /dev/null +++ b/collectors/python.d.plugin/example/example.conf @@ -0,0 +1,87 @@ +# netdata python.d.plugin configuration for example +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear on the dashboard +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, example also supports the following: +# +# num_lines: 4 # the number of lines to create +# lower: 0 # the lower bound of numbers to randomly sample from +# upper: 100 # the upper bound of numbers to randomly sample from +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS + +four_lines: + name: "Four Lines" # the JOB's name as it will appear on the dashboard + update_every: 1 # the JOB's data collection frequency + priority: 60000 # the JOB's order on the dashboard + penalty: yes # the JOB's penalty + autodetection_retry: 0 # the JOB's re-check interval in seconds + num_lines: 4 # the number of lines to create + lower: 0 # the lower bound of numbers to randomly sample from + upper: 100 # the upper bound of numbers to randomly sample from + +# if you wanted to make another job to run in addition to the one above then +# you would just uncomment the job configuration below. +# two_lines: +# name: "Two Lines" # the JOB's name as it will appear on the dashboard +# num_lines: 2 # the number of lines to create +# lower: 50 # the lower bound of numbers to randomly sample from +# upper: 75 # the upper bound of numbers to randomly sample from diff --git a/collectors/python.d.plugin/exim/Makefile.inc b/collectors/python.d.plugin/exim/Makefile.inc new file mode 100644 index 0000000..36ffa56 --- /dev/null +++ b/collectors/python.d.plugin/exim/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += exim/exim.chart.py +dist_pythonconfig_DATA += exim/exim.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += exim/README.md exim/Makefile.inc + diff --git a/collectors/python.d.plugin/exim/README.md b/collectors/python.d.plugin/exim/README.md new file mode 100644 index 0000000..92b2d7a --- /dev/null +++ b/collectors/python.d.plugin/exim/README.md @@ -0,0 +1,41 @@ + + +# Exim monitoring with Netdata + +Simple module executing `exim -bpc` to grab exim queue. +This command can take a lot of time to finish its execution thus it is not recommended to run it every second. + +## Requirements + +The module uses the `exim` binary, which can only be executed as root by default. We need to allow other users to `exim` binary. We solve that adding `queue_list_requires_admin` statement in exim configuration and set to `false`, because it is `true` by default. On many Linux distributions, the default location of `exim` configuration is in `/etc/exim.conf`. + +1. Edit the `exim` configuration with your preferred editor and add: +`queue_list_requires_admin = false` +2. Restart `exim` and Netdata + +*WHM (CPanel) server* + +On a WHM server, you can reconfigure `exim` over the WHM interface with the following steps. + +1. Login to WHM +2. Navigate to Service Configuration --> Exim Configuration Manager --> tab Advanced Editor +3. Scroll down to the button **Add additional configuration setting** and click on it. +4. In the new dropdown which will appear above we need to find and choose: +`queue_list_requires_admin` and set to `false` +5. Scroll to the end and click the **Save** button. + +It produces only one chart: + +1. **Exim Queue Emails** + + - emails + +Configuration is not needed. + +--- + + diff --git a/collectors/python.d.plugin/exim/exim.chart.py b/collectors/python.d.plugin/exim/exim.chart.py new file mode 100644 index 0000000..7238a1b --- /dev/null +++ b/collectors/python.d.plugin/exim/exim.chart.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Description: exim netdata python.d module +# Author: Pawel Krupa (paulfantom) +# SPDX-License-Identifier: GPL-3.0-or-later + +from bases.FrameworkServices.ExecutableService import ExecutableService + +EXIM_COMMAND = 'exim -bpc' + +ORDER = [ + 'qemails', +] + +CHARTS = { + 'qemails': { + 'options': [None, 'Exim Queue Emails', 'emails', 'queue', 'exim.qemails', 'line'], + 'lines': [ + ['emails', None, 'absolute'] + ] + } +} + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.command = EXIM_COMMAND + + def _get_data(self): + """ + Format data received from shell command + :return: dict + """ + try: + return {'emails': int(self._get_raw_data()[0])} + except (ValueError, AttributeError): + return None diff --git a/collectors/python.d.plugin/exim/exim.conf b/collectors/python.d.plugin/exim/exim.conf new file mode 100644 index 0000000..3b7e659 --- /dev/null +++ b/collectors/python.d.plugin/exim/exim.conf @@ -0,0 +1,91 @@ +# netdata python.d.plugin configuration for exim +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# exim is slow, so once every 10 seconds +update_every: 10 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, exim also supports the following: +# +# command: 'exim -bpc' # the command to run +# + +# ---------------------------------------------------------------------- +# REQUIRED exim CONFIGURATION +# +# netdata will query exim as user netdata. +# By default exim will refuse to respond. +# +# To allow querying exim as non-admin user, please set the following +# to your exim configuration: +# +# queue_list_requires_admin = false +# +# Your exim configuration should be in +# +# /etc/exim/exim4.conf +# or +# /etc/exim4/conf.d/main/000_local_options +# +# Please consult your distribution information to find the exact file. + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS + +local: + command: 'exim -bpc' diff --git a/collectors/python.d.plugin/fail2ban/Makefile.inc b/collectors/python.d.plugin/fail2ban/Makefile.inc new file mode 100644 index 0000000..31e117e --- /dev/null +++ b/collectors/python.d.plugin/fail2ban/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += fail2ban/fail2ban.chart.py +dist_pythonconfig_DATA += fail2ban/fail2ban.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += fail2ban/README.md fail2ban/Makefile.inc + diff --git a/collectors/python.d.plugin/fail2ban/README.md b/collectors/python.d.plugin/fail2ban/README.md new file mode 100644 index 0000000..be09e18 --- /dev/null +++ b/collectors/python.d.plugin/fail2ban/README.md @@ -0,0 +1,82 @@ + + +# Fail2ban monitoring with Netdata + +Monitors the fail2ban log file to show all bans for all active jails. + +## Requirements + +The `fail2ban.log` file must be readable by the user `netdata`: + +- change the file ownership and access permissions. +- update `/etc/logrotate.d/fail2ban` to persists the changes after rotating the log file. + +
+ Click to expand the instruction. + +To change the file ownership and access permissions, execute the following: + +```shell +sudo chown root:netdata /var/log/fail2ban.log +sudo chmod 640 /var/log/fail2ban.log +``` + +To persist the changes after rotating the log file, add `create 640 root netdata` to the `/etc/logrotate.d/fail2ban`: + +```shell +/var/log/fail2ban.log { + + weekly + rotate 4 + compress + + delaycompress + missingok + postrotate + fail2ban-client flushlogs 1>/dev/null + endscript + + # If fail2ban runs as non-root it still needs to have write access + # to logfiles. + # create 640 fail2ban adm + create 640 root netdata +} +``` + +
+ +## Charts + +- Failed attempts in attempts/s +- Bans in bans/s +- Banned IP addresses (since the last restart of netdata) in ips + +## Configuration + +Edit the `python.d/fail2ban.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/fail2ban.conf +``` + +Sample: + +```yaml +local: + log_path: '/var/log/fail2ban.log' + conf_path: '/etc/fail2ban/jail.local' + exclude: 'dropbear apache' +``` + +If no configuration is given, module will attempt to read log file at `/var/log/fail2ban.log` and conf file +at `/etc/fail2ban/jail.local`. If conf file is not found default jail is `ssh`. + +--- + + diff --git a/collectors/python.d.plugin/fail2ban/fail2ban.chart.py b/collectors/python.d.plugin/fail2ban/fail2ban.chart.py new file mode 100644 index 0000000..76f6d92 --- /dev/null +++ b/collectors/python.d.plugin/fail2ban/fail2ban.chart.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +# Description: fail2ban log netdata python.d module +# Author: ilyam8 +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import re +from collections import defaultdict +from glob import glob + +from bases.FrameworkServices.LogService import LogService + +ORDER = [ + 'jails_failed_attempts', + 'jails_bans', + 'jails_banned_ips', +] + + +def charts(jails): + """ + Chart definitions creating + """ + + ch = { + ORDER[0]: { + 'options': [None, 'Failed attempts', 'attempts/s', 'failed attempts', 'fail2ban.failed_attempts', 'line'], + 'lines': [] + }, + ORDER[1]: { + 'options': [None, 'Bans', 'bans/s', 'bans', 'fail2ban.bans', 'line'], + 'lines': [] + }, + ORDER[2]: { + 'options': [None, 'Banned IP addresses (since the last restart of netdata)', 'ips', 'banned ips', + 'fail2ban.banned_ips', 'line'], + 'lines': [] + }, + } + for jail in jails: + dim = ['{0}_failed_attempts'.format(jail), jail, 'incremental'] + ch[ORDER[0]]['lines'].append(dim) + + dim = [jail, jail, 'incremental'] + ch[ORDER[1]]['lines'].append(dim) + + dim = ['{0}_in_jail'.format(jail), jail, 'absolute'] + ch[ORDER[2]]['lines'].append(dim) + + return ch + + +RE_JAILS = re.compile(r'\[([a-zA-Z0-9_-]+)\][^\[\]]+?enabled\s+= +(true|yes|false|no)') + +ACTION_BAN = 'Ban' +ACTION_UNBAN = 'Unban' +ACTION_RESTORE_BAN = 'Restore Ban' +ACTION_FOUND = 'Found' + +# Example: +# 2018-09-12 11:45:58,727 fail2ban.actions[25029]: WARNING [ssh] Found 203.0.113.1 +# 2018-09-12 11:45:58,727 fail2ban.actions[25029]: WARNING [ssh] Ban 203.0.113.1 +# 2018-09-12 11:45:58,727 fail2ban.actions[25029]: WARNING [ssh] Restore Ban 203.0.113.1 +# 2018-09-12 11:45:53,715 fail2ban.actions[25029]: WARNING [ssh] Unban 203.0.113.1 +RE_DATA = re.compile( + r'\[(?P[A-Za-z-_0-9]+)\] (?P{0}|{1}|{2}|{3}) (?P[a-f0-9.:]+)'.format( + ACTION_BAN, ACTION_UNBAN, ACTION_RESTORE_BAN, ACTION_FOUND + ) +) + +DEFAULT_JAILS = [ + 'ssh', +] + + +class Service(LogService): + def __init__(self, configuration=None, name=None): + LogService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = dict() + self.log_path = self.configuration.get('log_path', '/var/log/fail2ban.log') + self.conf_path = self.configuration.get('conf_path', '/etc/fail2ban/jail.local') + self.conf_dir = self.configuration.get('conf_dir', '/etc/fail2ban/jail.d/') + self.exclude = self.configuration.get('exclude', str()) + self.monitoring_jails = list() + self.banned_ips = defaultdict(set) + self.data = dict() + + def check(self): + """ + :return: bool + """ + if not self.conf_path.endswith(('.conf', '.local')): + self.error('{0} is a wrong conf path name, must be *.conf or *.local'.format(self.conf_path)) + return False + + if not os.access(self.log_path, os.R_OK): + self.error('{0} is not readable'.format(self.log_path)) + return False + + if os.path.getsize(self.log_path) == 0: + self.error('{0} is empty'.format(self.log_path)) + return False + + self.monitoring_jails = self.jails_auto_detection() + for jail in self.monitoring_jails: + self.data['{0}_failed_attempts'.format(jail)] = 0 + self.data[jail] = 0 + self.data['{0}_in_jail'.format(jail)] = 0 + + self.definitions = charts(self.monitoring_jails) + self.info('monitoring jails: {0}'.format(self.monitoring_jails)) + + return True + + def get_data(self): + """ + :return: dict + """ + raw = self._get_raw_data() + + if not raw: + return None if raw is None else self.data + + for row in raw: + match = RE_DATA.search(row) + + if not match: + continue + + match = match.groupdict() + + if match['jail'] not in self.monitoring_jails: + continue + + jail, action, ip = match['jail'], match['action'], match['ip'] + + if action == ACTION_FOUND: + self.data['{0}_failed_attempts'.format(jail)] += 1 + elif action in (ACTION_BAN, ACTION_RESTORE_BAN): + self.data[jail] += 1 + if ip not in self.banned_ips[jail]: + self.banned_ips[jail].add(ip) + self.data['{0}_in_jail'.format(jail)] += 1 + elif action == ACTION_UNBAN: + if ip in self.banned_ips[jail]: + self.banned_ips[jail].remove(ip) + self.data['{0}_in_jail'.format(jail)] -= 1 + + return self.data + + def get_files_from_dir(self, dir_path, suffix): + """ + :return: list + """ + if not os.path.isdir(dir_path): + self.error('{0} is not a directory'.format(dir_path)) + return list() + + return glob('{0}/*.{1}'.format(self.conf_dir, suffix)) + + def get_jails_from_file(self, file_path): + """ + :return: list + """ + if not os.access(file_path, os.R_OK): + self.error('{0} is not readable or not exist'.format(file_path)) + return list() + + with open(file_path, 'rt') as f: + lines = f.readlines() + raw = ' '.join(line for line in lines if line.startswith(('[', 'enabled'))) + + match = RE_JAILS.findall(raw) + # Result: [('ssh', 'true'), ('dropbear', 'true'), ('pam-generic', 'true'), ...] + + if not match: + self.debug('{0} parse failed'.format(file_path)) + return list() + + return match + + def jails_auto_detection(self): + """ + :return: list + + Parses jail configuration files. Returns list of enabled jails. + According man jail.conf parse order must be + * jail.conf + * jail.d/*.conf (in alphabetical order) + * jail.local + * jail.d/*.local (in alphabetical order) + """ + jails_files, all_jails, active_jails = list(), list(), list() + + jails_files.append('{0}.conf'.format(self.conf_path.rsplit('.')[0])) + jails_files.extend(self.get_files_from_dir(self.conf_dir, 'conf')) + jails_files.append('{0}.local'.format(self.conf_path.rsplit('.')[0])) + jails_files.extend(self.get_files_from_dir(self.conf_dir, 'local')) + + self.debug('config files to parse: {0}'.format(jails_files)) + + for f in jails_files: + all_jails.extend(self.get_jails_from_file(f)) + + exclude = self.exclude.split() + + for name, status in all_jails: + if name in exclude: + continue + + if status in ('true', 'yes') and name not in active_jails: + active_jails.append(name) + elif status in ('false', 'no') and name in active_jails: + active_jails.remove(name) + + return active_jails or DEFAULT_JAILS diff --git a/collectors/python.d.plugin/fail2ban/fail2ban.conf b/collectors/python.d.plugin/fail2ban/fail2ban.conf new file mode 100644 index 0000000..a36436b --- /dev/null +++ b/collectors/python.d.plugin/fail2ban/fail2ban.conf @@ -0,0 +1,68 @@ +# netdata python.d.plugin configuration for fail2ban +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, fail2ban also supports the following: +# +# log_path: 'path to fail2ban.log' # Default: '/var/log/fail2ban.log' +# conf_path: 'path to jail.local/jail.conf' # Default: '/etc/fail2ban/jail.local' +# conf_dir: 'path to jail.d/' # Default: '/etc/fail2ban/jail.d/' +# exclude: 'jails you want to exclude from autodetection' # Default: none +#------------------------------------------------------------------------------------------------------------------ diff --git a/collectors/python.d.plugin/gearman/Makefile.inc b/collectors/python.d.plugin/gearman/Makefile.inc new file mode 100644 index 0000000..275adf1 --- /dev/null +++ b/collectors/python.d.plugin/gearman/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += gearman/gearman.chart.py +dist_pythonconfig_DATA += gearman/gearman.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += gearman/README.md gearman/Makefile.inc + diff --git a/collectors/python.d.plugin/gearman/README.md b/collectors/python.d.plugin/gearman/README.md new file mode 100644 index 0000000..34ea584 --- /dev/null +++ b/collectors/python.d.plugin/gearman/README.md @@ -0,0 +1,50 @@ + + +# Gearman monitoring with Netdata + +Monitors Gearman worker statistics. A chart is shown for each job as well as one showing a summary of all workers. + +Note: Charts may show as a line graph rather than an area +graph if you load Netdata with no jobs running. To change +this go to "Settings" > "Which dimensions to show?" and +select "All". + +Plugin can obtain data from tcp socket **OR** unix socket. + +**Requirement:** +Socket MUST be readable by netdata user. + +It produces: + + * Workers queued + * Workers idle + * Workers running + +## Configuration + +Edit the `python.d/gearman.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/gearman.conf +``` + +```yaml +localhost: + name : 'local' + host : 'localhost' + port : 4730 + + # TLS information can be provided as well + tls : no + cert : /path/to/cert + key : /path/to/key +``` + +When no configuration file is found, module tries to connect to TCP/IP socket: `localhost:4730`. + diff --git a/collectors/python.d.plugin/gearman/gearman.chart.py b/collectors/python.d.plugin/gearman/gearman.chart.py new file mode 100644 index 0000000..5e280a4 --- /dev/null +++ b/collectors/python.d.plugin/gearman/gearman.chart.py @@ -0,0 +1,243 @@ +# Description: dovecot netdata python.d module +# Author: Kyle Agronick (agronick) +# SPDX-License-Identifier: GPL-3.0+ + +# Gearman Netdata Plugin + +from copy import deepcopy + +from bases.FrameworkServices.SocketService import SocketService + +CHARTS = { + 'total_workers': { + 'options': [None, 'Total Jobs', 'Jobs', 'Total Jobs', 'gearman.total_jobs', 'line'], + 'lines': [ + ['total_pending', 'Pending', 'absolute'], + ['total_running', 'Running', 'absolute'], + ] + }, +} + + +def job_chart_template(job_name): + return { + 'options': [None, job_name, 'Jobs', 'Activity by Job', 'gearman.single_job', 'stacked'], + 'lines': [ + ['{0}_pending'.format(job_name), 'Pending', 'absolute'], + ['{0}_idle'.format(job_name), 'Idle', 'absolute'], + ['{0}_running'.format(job_name), 'Running', 'absolute'], + ] + } + + +def build_result_dict(job): + """ + Get the status for each job + :return: dict + """ + + total, running, available = job['metrics'] + + idle = available - running + pending = total - running + + return { + '{0}_pending'.format(job['job_name']): pending, + '{0}_idle'.format(job['job_name']): idle, + '{0}_running'.format(job['job_name']): running, + } + + +def parse_worker_data(job): + job_name = job[0] + job_metrics = job[1:] + + return { + 'job_name': job_name, + 'metrics': job_metrics, + } + + +class GearmanReadException(BaseException): + pass + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.request = "status\n" + self._keep_alive = True + + self.host = self.configuration.get('host', 'localhost') + self.port = self.configuration.get('port', 4730) + + self.tls = self.configuration.get('tls', False) + self.cert = self.configuration.get('cert', None) + self.key = self.configuration.get('key', None) + + self.active_jobs = set() + self.definitions = deepcopy(CHARTS) + self.order = ['total_workers'] + + def _get_data(self): + """ + Format data received from socket + :return: dict + """ + + try: + active_jobs = self.get_active_jobs() + except GearmanReadException: + return None + + found_jobs, job_data = self.process_jobs(active_jobs) + self.remove_stale_jobs(found_jobs) + return job_data + + def get_active_jobs(self): + active_jobs = [] + + for job in self.get_worker_data(): + parsed_job = parse_worker_data(job) + + # Gearman does not clean up old jobs + # We only care about jobs that have + # some relevant data + if not any(parsed_job['metrics']): + continue + + active_jobs.append(parsed_job) + + return active_jobs + + def get_worker_data(self): + """ + Split the data returned from Gearman + into a list of lists + + This returns the same output that you + would get from a gearadmin --status + command. + + Example output returned from + _get_raw_data(): + prefix generic_worker4 78 78 500 + generic_worker2 78 78 500 + generic_worker3 0 0 760 + generic_worker1 0 0 500 + + :return: list + """ + + try: + raw = self._get_raw_data() + except (ValueError, AttributeError): + raise GearmanReadException() + + if raw is None: + self.debug("Gearman returned no data") + raise GearmanReadException() + + workers = list() + + for line in raw.splitlines()[:-1]: + parts = line.split() + if not parts: + continue + + name = '_'.join(parts[:-3]) + try: + values = [int(w) for w in parts[-3:]] + except ValueError: + continue + + w = [name] + w.extend(values) + workers.append(w) + + return workers + + def process_jobs(self, active_jobs): + + output = { + 'total_pending': 0, + 'total_idle': 0, + 'total_running': 0, + } + found_jobs = set() + + for parsed_job in active_jobs: + + job_name = self.add_job(parsed_job) + found_jobs.add(job_name) + job_data = build_result_dict(parsed_job) + + for sum_value in ('pending', 'running', 'idle'): + output['total_{0}'.format(sum_value)] += job_data['{0}_{1}'.format(job_name, sum_value)] + + output.update(job_data) + + return found_jobs, output + + def remove_stale_jobs(self, active_job_list): + """ + Removes jobs that have no workers, pending jobs, + or running jobs + :param active_job_list: The latest list of active jobs + :type active_job_list: iterable + :return: None + """ + + for to_remove in self.active_jobs - active_job_list: + self.remove_job(to_remove) + + def add_job(self, parsed_job): + """ + Adds a job to the list of active jobs + :param parsed_job: A parsed job dict + :type parsed_job: dict + :return: None + """ + + def add_chart(job_name): + """ + Adds a new job chart + :param job_name: The name of the job to add + :type job_name: string + :return: None + """ + + job_key = 'job_{0}'.format(job_name) + template = job_chart_template(job_name) + new_chart = self.charts.add_chart([job_key] + template['options']) + for dimension in template['lines']: + new_chart.add_dimension(dimension) + + if parsed_job['job_name'] not in self.active_jobs: + add_chart(parsed_job['job_name']) + self.active_jobs.add(parsed_job['job_name']) + + return parsed_job['job_name'] + + def remove_job(self, job_name): + """ + Removes a job to the list of active jobs + :param job_name: The name of the job to remove + :type job_name: string + :return: None + """ + + def remove_chart(job_name): + """ + Removes a job chart + :param job_name: The name of the job to remove + :type job_name: string + :return: None + """ + + job_key = 'job_{0}'.format(job_name) + self.charts[job_key].obsolete() + del self.charts[job_key] + + remove_chart(job_name) + self.active_jobs.remove(job_name) diff --git a/collectors/python.d.plugin/gearman/gearman.conf b/collectors/python.d.plugin/gearman/gearman.conf new file mode 100644 index 0000000..c41fd9f --- /dev/null +++ b/collectors/python.d.plugin/gearman/gearman.conf @@ -0,0 +1,72 @@ +# netdata python.d.plugin configuration for gearman +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, gearman also supports the following: +# +# hostname: localhost # The host running the Gearman server +# port: 4730 # Port of the Gearman server +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOB + +localhost: + name : 'local' + host : 'localhost' + port : 4730 \ No newline at end of file diff --git a/collectors/python.d.plugin/go_expvar/Makefile.inc b/collectors/python.d.plugin/go_expvar/Makefile.inc new file mode 100644 index 0000000..74f50d7 --- /dev/null +++ b/collectors/python.d.plugin/go_expvar/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += go_expvar/go_expvar.chart.py +dist_pythonconfig_DATA += go_expvar/go_expvar.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += go_expvar/README.md go_expvar/Makefile.inc + diff --git a/collectors/python.d.plugin/go_expvar/README.md b/collectors/python.d.plugin/go_expvar/README.md new file mode 100644 index 0000000..feb150d --- /dev/null +++ b/collectors/python.d.plugin/go_expvar/README.md @@ -0,0 +1,319 @@ + + +# Go applications monitoring with Netdata + +Monitors Go application that exposes its metrics with the use of `expvar` package from the Go standard library. The package produces charts for Go runtime memory statistics and optionally any number of custom charts. + +The `go_expvar` module produces the following charts: + +1. **Heap allocations** in kB + + - alloc: size of objects allocated on the heap + - inuse: size of allocated heap spans + +2. **Stack allocations** in kB + + - inuse: size of allocated stack spans + +3. **MSpan allocations** in kB + + - inuse: size of allocated mspan structures + +4. **MCache allocations** in kB + + - inuse: size of allocated mcache structures + +5. **Virtual memory** in kB + + - sys: size of reserved virtual address space + +6. **Live objects** + + - live: number of live objects in memory + +7. **GC pauses average** in ns + + - avg: average duration of all GC stop-the-world pauses + +## Monitoring Go applications + +Netdata can be used to monitor running Go applications that expose their metrics with +the use of the [expvar package](https://golang.org/pkg/expvar/) included in Go standard library. + +The `expvar` package exposes these metrics over HTTP and is very easy to use. +Consider this minimal sample below: + +```go +package main + +import ( + _ "expvar" + "net/http" +) + +func main() { + http.ListenAndServe("127.0.0.1:8080", nil) +} +``` + +When imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that +exposes Go runtime's memory statistics in JSON format. You can inspect the output by opening +the URL in your browser (or by using `wget` or `curl`). + +Sample output: + +```json +{ +"cmdline": ["./expvar-demo-binary"], +"memstats": {"Alloc":630856,"TotalAlloc":630856,"Sys":3346432,"Lookups":27, } +} +``` + +You can of course expose and monitor your own variables as well. +Here is a sample Go application that exposes a few custom variables: + +```go +package main + +import ( + "expvar" + "net/http" + "runtime" + "time" +) + +func main() { + + tick := time.NewTicker(1 * time.Second) + num_go := expvar.NewInt("runtime.goroutines") + counters := expvar.NewMap("counters") + counters.Set("cnt1", new(expvar.Int)) + counters.Set("cnt2", new(expvar.Float)) + + go http.ListenAndServe(":8080", nil) + + for { + select { + case <- tick.C: + num_go.Set(int64(runtime.NumGoroutine())) + counters.Add("cnt1", 1) + counters.AddFloat("cnt2", 1.452) + } + } +} +``` + +Apart from the runtime memory stats, this application publishes two counters and the +number of currently running Goroutines and updates these stats every second. + +In the next section, we will cover how to monitor and chart these exposed stats with +the use of `netdata`s `go_expvar` module. + +### Using Netdata go_expvar module + +The `go_expvar` module is disabled by default. To enable it, edit `python.d.conf` (to edit it on your system run +`/etc/netdata/edit-config python.d.conf`), and change the `go_expvar` variable to `yes`: + +``` +# Enable / Disable python.d.plugin modules +#default_run: yes +# +# If "default_run" = "yes" the default for all modules is enabled (yes). +# Setting any of these to "no" will disable it. +# +# If "default_run" = "no" the default for all modules is disabled (no). +# Setting any of these to "yes" will enable it. +... +go_expvar: yes +... +``` + +Next, we need to edit the module configuration file (found at `/etc/netdata/python.d/go_expvar.conf` by default) (to +edit it on your system run `/etc/netdata/edit-config python.d/go_expvar.conf`). The module configuration consists of +jobs, where each job can be used to monitor a separate Go application. Let's see a sample job configuration: + +``` +# /etc/netdata/python.d/go_expvar.conf + +app1: + name : 'app1' + url : 'http://127.0.0.1:8080/debug/vars' + collect_memstats: true + extra_charts: {} +``` + +Let's go over each of the defined options: + +``` +name: 'app1' +``` + +This is the job name that will appear at the Netdata dashboard. +If not defined, the job_name (top level key) will be used. + +``` +url: 'http://127.0.0.1:8080/debug/vars' +``` + +This is the URL of the expvar endpoint. As the expvar handler can be installed +in a custom path, the whole URL has to be specified. This value is mandatory. + +``` +collect_memstats: true +``` + +Whether to enable collecting stats about Go runtime's memory. You can find more +information about the exposed values at the [runtime package docs](https://golang.org/pkg/runtime/#MemStats). + +``` +extra_charts: {} +``` + +Enables the user to specify custom expvars to monitor and chart. +Will be explained in more detail below. + +**Note: if `collect_memstats` is disabled and no `extra_charts` are defined, the plugin will +disable itself, as there will be no data to collect!** + +Apart from these options, each job supports options inherited from Netdata's `python.d.plugin` +and its base `UrlService` class. These are: + +``` +update_every: 1 # the job's data collection frequency +priority: 60000 # the job's order on the dashboard +user: admin # use when the expvar endpoint is protected by HTTP Basic Auth +password: sekret # use when the expvar endpoint is protected by HTTP Basic Auth +``` + +### Monitoring custom vars with go_expvar + +Now, memory stats might be useful, but what if you want Netdata to monitor some custom values +that your Go application exposes? The `go_expvar` module can do that as well with the use of +the `extra_charts` configuration variable. + +The `extra_charts` variable is a YaML list of Netdata chart definitions. +Each chart definition has the following keys: + +``` +id: Netdata chart ID +options: a key-value mapping of chart options +lines: a list of line definitions +``` + +**Note: please do not use dots in the chart or line ID field. +See [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.** + +Please see these two links to the official Netdata documentation for more information about the values: + +- [External plugins - charts](/collectors/plugins.d/README.md#chart) +- [Chart variables](/collectors/python.d.plugin/README.md#global-variables-order-and-chart) + +**Line definitions** + +Each chart can define multiple lines (dimensions). +A line definition is a key-value mapping of line options. +Each line can have the following options: + +``` +# mandatory +expvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint +expvar_type: value type; supported are "float" or "int" +id: the id of this line/dimension in Netdata + +# optional - Netdata defaults are used if these options are not defined +name: '' +algorithm: absolute +multiplier: 1 +divisor: 100 if expvar_type == float, 1 if expvar_type == int +hidden: False +``` + +Please see the following link for more information about the options and their default values: +[External plugins - dimensions](/collectors/plugins.d/README.md#dimension) + +Apart from top-level expvars, this plugin can also parse expvars stored in a multi-level map; +All dicts in the resulting JSON document are then flattened to one level. +Expvar names are joined together with '.' when flattening. + +Example: + +``` +{ + "counters": {"cnt1": 1042, "cnt2": 1512.9839999999983}, + "runtime.goroutines": 5 +} +``` + +In the above case, the exported variables will be available under `runtime.goroutines`, +`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision, +the first defined key wins and all subsequent keys with the same name are ignored. + +## Enable the collector + +The `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl +restart netdata`, or the appropriate method for your system, to finish enabling the `go_expvar` collector. + +## Configuration + +Edit the `python.d/go_expvar.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/go_expvar.conf +``` + +The configuration below matches the second Go application described above. +Netdata will monitor and chart memory stats for the application, as well as a custom chart of +running goroutines and two dummy counters. + +``` +app1: + name : 'app1' + url : 'http://127.0.0.1:8080/debug/vars' + collect_memstats: true + extra_charts: + - id: "runtime_goroutines" + options: + name: num_goroutines + title: "runtime: number of goroutines" + units: goroutines + family: runtime + context: expvar.runtime.goroutines + chart_type: line + lines: + - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines} + - id: "foo_counters" + options: + name: counters + title: "some random counters" + units: awesomeness + family: counters + context: expvar.foo.counters + chart_type: line + lines: + - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1} + - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2} +``` + +**Netdata charts example** + +The images below show how do the final charts in Netdata look. + +![Memory stats charts](https://cloud.githubusercontent.com/assets/15180106/26762052/62b4af58-493b-11e7-9e69-146705acfc2c.png) + +![Custom charts](https://cloud.githubusercontent.com/assets/15180106/26762051/62ae915e-493b-11e7-8518-bd25a3886650.png) + + diff --git a/collectors/python.d.plugin/go_expvar/go_expvar.chart.py b/collectors/python.d.plugin/go_expvar/go_expvar.chart.py new file mode 100644 index 0000000..dca0108 --- /dev/null +++ b/collectors/python.d.plugin/go_expvar/go_expvar.chart.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- +# Description: go_expvar netdata python.d module +# Author: Jan Kral (kralewitz) +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import division + +import json +from collections import namedtuple + +from bases.FrameworkServices.UrlService import UrlService + +MEMSTATS_ORDER = [ + 'memstats_heap', + 'memstats_stack', + 'memstats_mspan', + 'memstats_mcache', + 'memstats_sys', + 'memstats_live_objects', + 'memstats_gc_pauses', +] + +MEMSTATS_CHARTS = { + 'memstats_heap': { + 'options': ['heap', 'memory: size of heap memory structures', 'KiB', 'memstats', + 'expvar.memstats.heap', 'line'], + 'lines': [ + ['memstats_heap_alloc', 'alloc', 'absolute', 1, 1024], + ['memstats_heap_inuse', 'inuse', 'absolute', 1, 1024] + ] + }, + 'memstats_stack': { + 'options': ['stack', 'memory: size of stack memory structures', 'KiB', 'memstats', + 'expvar.memstats.stack', 'line'], + 'lines': [ + ['memstats_stack_inuse', 'inuse', 'absolute', 1, 1024] + ] + }, + 'memstats_mspan': { + 'options': ['mspan', 'memory: size of mspan memory structures', 'KiB', 'memstats', + 'expvar.memstats.mspan', 'line'], + 'lines': [ + ['memstats_mspan_inuse', 'inuse', 'absolute', 1, 1024] + ] + }, + 'memstats_mcache': { + 'options': ['mcache', 'memory: size of mcache memory structures', 'KiB', 'memstats', + 'expvar.memstats.mcache', 'line'], + 'lines': [ + ['memstats_mcache_inuse', 'inuse', 'absolute', 1, 1024] + ] + }, + 'memstats_live_objects': { + 'options': ['live_objects', 'memory: number of live objects', 'objects', 'memstats', + 'expvar.memstats.live_objects', 'line'], + 'lines': [ + ['memstats_live_objects', 'live'] + ] + }, + 'memstats_sys': { + 'options': ['sys', 'memory: size of reserved virtual address space', 'KiB', 'memstats', + 'expvar.memstats.sys', 'line'], + 'lines': [ + ['memstats_sys', 'sys', 'absolute', 1, 1024] + ] + }, + 'memstats_gc_pauses': { + 'options': ['gc_pauses', 'memory: average duration of GC pauses', 'ns', 'memstats', + 'expvar.memstats.gc_pauses', 'line'], + 'lines': [ + ['memstats_gc_pauses', 'avg'] + ] + } +} + +EXPVAR = namedtuple( + "EXPVAR", + [ + "key", + "type", + "id", + ] +) + + +def flatten(d, top='', sep='.'): + items = [] + for key, val in d.items(): + nkey = top + sep + key if top else key + if isinstance(val, dict): + items.extend(flatten(val, nkey, sep=sep).items()) + else: + items.append((nkey, val)) + return dict(items) + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + # if memstats collection is enabled, add the charts and their order + if self.configuration.get('collect_memstats'): + self.definitions = dict(MEMSTATS_CHARTS) + self.order = list(MEMSTATS_ORDER) + else: + self.definitions = dict() + self.order = list() + + # if extra charts are defined, parse their config + extra_charts = self.configuration.get('extra_charts') + if extra_charts: + self._parse_extra_charts_config(extra_charts) + + def check(self): + """ + Check if the module can collect data: + 1) At least one JOB configuration has to be specified + 2) The JOB configuration needs to define the URL and either collect_memstats must be enabled or at least one + extra_chart must be defined. + + The configuration and URL check is provided by the UrlService class. + """ + + if not (self.configuration.get('extra_charts') or self.configuration.get('collect_memstats')): + self.error('Memstats collection is disabled and no extra_charts are defined, disabling module.') + return False + + return UrlService.check(self) + + def _parse_extra_charts_config(self, extra_charts_config): + + # a place to store the expvar keys and their types + self.expvars = list() + + for chart in extra_charts_config: + + chart_dict = dict() + chart_id = chart.get('id') + chart_lines = chart.get('lines') + chart_opts = chart.get('options', dict()) + + if not all([chart_id, chart_lines]): + self.info('Chart {0} has no ID or no lines defined, skipping'.format(chart)) + continue + + chart_dict['options'] = [ + chart_opts.get('name', ''), + chart_opts.get('title', ''), + chart_opts.get('units', ''), + chart_opts.get('family', ''), + chart_opts.get('context', ''), + chart_opts.get('chart_type', 'line') + ] + chart_dict['lines'] = list() + + # add the lines to the chart + for line in chart_lines: + + ev_key = line.get('expvar_key') + ev_type = line.get('expvar_type') + line_id = line.get('id') + + if not all([ev_key, ev_type, line_id]): + self.info('Line missing expvar_key, expvar_type, or line_id, skipping: {0}'.format(line)) + continue + + if ev_type not in ['int', 'float']: + self.info('Unsupported expvar_type "{0}". Must be "int" or "float"'.format(ev_type)) + continue + + # self.expvars[ev_key] = (ev_type, line_id) + self.expvars.append(EXPVAR(ev_key, ev_type, line_id)) + + chart_dict['lines'].append( + [ + line.get('id', ''), + line.get('name', ''), + line.get('algorithm', ''), + line.get('multiplier', 1), + line.get('divisor', 100 if ev_type == 'float' else 1), + line.get('hidden', False) + ] + ) + + self.order.append(chart_id) + self.definitions[chart_id] = chart_dict + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + + raw_data = self._get_raw_data() + if not raw_data: + return None + + data = json.loads(raw_data) + + expvars = dict() + if self.configuration.get('collect_memstats'): + expvars.update(self._parse_memstats(data)) + + if self.configuration.get('extra_charts'): + # the memstats part of the data has been already parsed, so we remove it before flattening and checking + # the rest of the data, thus avoiding needless iterating over the multiply nested memstats dict. + del (data['memstats']) + flattened = flatten(data) + + for ev in self.expvars: + v = flattened.get(ev.key) + + if v is None: + continue + + try: + if ev.type == 'int': + expvars[ev.id] = int(v) + elif ev.type == 'float': + expvars[ev.id] = float(v) * 100 + except ValueError: + self.info('Failed to parse value for key {0} as {1}, ignoring key.'.format(ev.key, ev.type)) + return None + + return expvars + + @staticmethod + def _parse_memstats(data): + + memstats = data['memstats'] + + # calculate the number of live objects in memory + live_objs = int(memstats['Mallocs']) - int(memstats['Frees']) + + # calculate GC pause times average + # the Go runtime keeps the last 256 GC pause durations in a circular buffer, + # so we need to filter out the 0 values before the buffer is filled + gc_pauses = memstats['PauseNs'] + try: + gc_pause_avg = sum(gc_pauses) / len([x for x in gc_pauses if x > 0]) + # no GC cycles have occurred yet + except ZeroDivisionError: + gc_pause_avg = 0 + + return { + 'memstats_heap_alloc': memstats['HeapAlloc'], + 'memstats_heap_inuse': memstats['HeapInuse'], + 'memstats_stack_inuse': memstats['StackInuse'], + 'memstats_mspan_inuse': memstats['MSpanInuse'], + 'memstats_mcache_inuse': memstats['MCacheInuse'], + 'memstats_sys': memstats['Sys'], + 'memstats_live_objects': live_objs, + 'memstats_gc_pauses': gc_pause_avg, + } diff --git a/collectors/python.d.plugin/go_expvar/go_expvar.conf b/collectors/python.d.plugin/go_expvar/go_expvar.conf new file mode 100644 index 0000000..4b821cd --- /dev/null +++ b/collectors/python.d.plugin/go_expvar/go_expvar.conf @@ -0,0 +1,108 @@ +# netdata python.d.plugin configuration for go_expvar +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, this plugin also supports the following: +# +# url: 'http://127.0.0.1/debug/vars' # the URL of the expvar endpoint +# +# As the plugin cannot possibly know the port your application listens on, there is no default value. Please include +# the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# collect_memstats: true # enables charts for Go runtime's memory statistics +# extra_charts: {} # defines extra data/charts to monitor, please see the example below +# +# If collect_memstats is disabled and no extra charts are defined, this module will disable itself, as it has no data to +# collect. +# +# Please visit the module wiki page for more information on how to use the extra_charts variable: +# +# https://github.com/netdata/netdata/tree/master/collectors/python.d.plugin/go_expvar +# +# Configuration example +# --------------------- + +#app1: +# name : 'app1' +# url : 'http://127.0.0.1:8080/debug/vars' +# collect_memstats: true +# extra_charts: +# - id: "runtime_goroutines" +# options: +# name: num_goroutines +# title: "runtime: number of goroutines" +# units: goroutines +# family: runtime +# context: expvar.runtime.goroutines +# chart_type: line +# lines: +# - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines} +# - id: "foo_counters" +# options: +# name: counters +# title: "some random counters" +# units: awesomeness +# family: counters +# context: expvar.foo.counters +# chart_type: line +# lines: +# - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1} +# - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2} + diff --git a/collectors/python.d.plugin/haproxy/Makefile.inc b/collectors/python.d.plugin/haproxy/Makefile.inc new file mode 100644 index 0000000..ad24dea --- /dev/null +++ b/collectors/python.d.plugin/haproxy/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += haproxy/haproxy.chart.py +dist_pythonconfig_DATA += haproxy/haproxy.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += haproxy/README.md haproxy/Makefile.inc + diff --git a/collectors/python.d.plugin/haproxy/README.md b/collectors/python.d.plugin/haproxy/README.md new file mode 100644 index 0000000..f16e725 --- /dev/null +++ b/collectors/python.d.plugin/haproxy/README.md @@ -0,0 +1,67 @@ + + +# HAProxy monitoring with Netdata + +Monitors frontend and backend metrics such as bytes in, bytes out, sessions current, sessions in queue current. +And health metrics such as backend servers status (server check should be used). + +Plugin can obtain data from URL or Unix socket. + +Requirement: + +- Socket must be readable and writable by the `netdata` user. +- URL must have `stats uri ` present in the haproxy config, otherwise you will get HTTP 503 in the haproxy logs. + +It produces: + +1. **Frontend** family charts + + - Kilobytes in/s + - Kilobytes out/s + - Sessions current + - Sessions in queue current + +2. **Backend** family charts + + - Kilobytes in/s + - Kilobytes out/s + - Sessions current + - Sessions in queue current + +3. **Health** chart + + - number of failed servers for every backend (in DOWN state) + +## Configuration + +Edit the `python.d/haproxy.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/haproxy.conf +``` + +Sample: + +```yaml +via_url: + user: 'username' # ONLY IF stats auth is used + pass: 'password' # # ONLY IF stats auth is used + url: 'http://ip.address:port/url;csv;norefresh' +``` + +OR + +```yaml +via_socket: + socket: 'path/to/haproxy/sock' +``` + +If no configuration is given, module will fail to run. + +--- diff --git a/collectors/python.d.plugin/haproxy/haproxy.chart.py b/collectors/python.d.plugin/haproxy/haproxy.chart.py new file mode 100644 index 0000000..6f94c9a --- /dev/null +++ b/collectors/python.d.plugin/haproxy/haproxy.chart.py @@ -0,0 +1,360 @@ +# -*- coding: utf-8 -*- +# Description: haproxy netdata python.d module +# Author: ilyam8, ktarasz +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections import defaultdict +from re import compile as re_compile + +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse + +from bases.FrameworkServices.SocketService import SocketService +from bases.FrameworkServices.UrlService import UrlService + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + 'fbin', + 'fbout', + 'fscur', + 'fqcur', + 'fhrsp_1xx', + 'fhrsp_2xx', + 'fhrsp_3xx', + 'fhrsp_4xx', + 'fhrsp_5xx', + 'fhrsp_other', + 'fhrsp_total', + 'bbin', + 'bbout', + 'bscur', + 'bqcur', + 'bhrsp_1xx', + 'bhrsp_2xx', + 'bhrsp_3xx', + 'bhrsp_4xx', + 'bhrsp_5xx', + 'bhrsp_other', + 'bhrsp_total', + 'bqtime', + 'bttime', + 'brtime', + 'bctime', + 'health_sup', + 'health_sdown', + 'health_bdown', + 'health_idle' +] + +CHARTS = { + 'fbin': { + 'options': [None, 'Kilobytes In', 'KiB/s', 'frontend', 'haproxy_f.bin', 'line'], + 'lines': [] + }, + 'fbout': { + 'options': [None, 'Kilobytes Out', 'KiB/s', 'frontend', 'haproxy_f.bout', 'line'], + 'lines': [] + }, + 'fscur': { + 'options': [None, 'Sessions Active', 'sessions', 'frontend', 'haproxy_f.scur', 'line'], + 'lines': [] + }, + 'fqcur': { + 'options': [None, 'Session In Queue', 'sessions', 'frontend', 'haproxy_f.qcur', 'line'], + 'lines': [] + }, + 'fhrsp_1xx': { + 'options': [None, 'HTTP responses with 1xx code', 'responses/s', 'frontend', 'haproxy_f.hrsp_1xx', 'line'], + 'lines': [] + }, + 'fhrsp_2xx': { + 'options': [None, 'HTTP responses with 2xx code', 'responses/s', 'frontend', 'haproxy_f.hrsp_2xx', 'line'], + 'lines': [] + }, + 'fhrsp_3xx': { + 'options': [None, 'HTTP responses with 3xx code', 'responses/s', 'frontend', 'haproxy_f.hrsp_3xx', 'line'], + 'lines': [] + }, + 'fhrsp_4xx': { + 'options': [None, 'HTTP responses with 4xx code', 'responses/s', 'frontend', 'haproxy_f.hrsp_4xx', 'line'], + 'lines': [] + }, + 'fhrsp_5xx': { + 'options': [None, 'HTTP responses with 5xx code', 'responses/s', 'frontend', 'haproxy_f.hrsp_5xx', 'line'], + 'lines': [] + }, + 'fhrsp_other': { + 'options': [None, 'HTTP responses with other codes (protocol error)', 'responses/s', 'frontend', + 'haproxy_f.hrsp_other', 'line'], + 'lines': [] + }, + 'fhrsp_total': { + 'options': [None, 'HTTP responses', 'responses', 'frontend', 'haproxy_f.hrsp_total', 'line'], + 'lines': [] + }, + 'bbin': { + 'options': [None, 'Kilobytes In', 'KiB/s', 'backend', 'haproxy_b.bin', 'line'], + 'lines': [] + }, + 'bbout': { + 'options': [None, 'Kilobytes Out', 'KiB/s', 'backend', 'haproxy_b.bout', 'line'], + 'lines': [] + }, + 'bscur': { + 'options': [None, 'Sessions Active', 'sessions', 'backend', 'haproxy_b.scur', 'line'], + 'lines': [] + }, + 'bqcur': { + 'options': [None, 'Sessions In Queue', 'sessions', 'backend', 'haproxy_b.qcur', 'line'], + 'lines': [] + }, + 'bhrsp_1xx': { + 'options': [None, 'HTTP responses with 1xx code', 'responses/s', 'backend', 'haproxy_b.hrsp_1xx', 'line'], + 'lines': [] + }, + 'bhrsp_2xx': { + 'options': [None, 'HTTP responses with 2xx code', 'responses/s', 'backend', 'haproxy_b.hrsp_2xx', 'line'], + 'lines': [] + }, + 'bhrsp_3xx': { + 'options': [None, 'HTTP responses with 3xx code', 'responses/s', 'backend', 'haproxy_b.hrsp_3xx', 'line'], + 'lines': [] + }, + 'bhrsp_4xx': { + 'options': [None, 'HTTP responses with 4xx code', 'responses/s', 'backend', 'haproxy_b.hrsp_4xx', 'line'], + 'lines': [] + }, + 'bhrsp_5xx': { + 'options': [None, 'HTTP responses with 5xx code', 'responses/s', 'backend', 'haproxy_b.hrsp_5xx', 'line'], + 'lines': [] + }, + 'bhrsp_other': { + 'options': [None, 'HTTP responses with other codes (protocol error)', 'responses/s', 'backend', + 'haproxy_b.hrsp_other', 'line'], + 'lines': [] + }, + 'bhrsp_total': { + 'options': [None, 'HTTP responses (total)', 'responses/s', 'backend', 'haproxy_b.hrsp_total', 'line'], + 'lines': [] + }, + 'bqtime': { + 'options': [None, 'The average queue time over the 1024 last requests', 'milliseconds', 'backend', + 'haproxy_b.qtime', 'line'], + 'lines': [] + }, + 'bctime': { + 'options': [None, 'The average connect time over the 1024 last requests', 'milliseconds', 'backend', + 'haproxy_b.ctime', 'line'], + 'lines': [] + }, + 'brtime': { + 'options': [None, 'The average response time over the 1024 last requests', 'milliseconds', 'backend', + 'haproxy_b.rtime', 'line'], + 'lines': [] + }, + 'bttime': { + 'options': [None, 'The average total session time over the 1024 last requests', 'milliseconds', 'backend', + 'haproxy_b.ttime', 'line'], + 'lines': [] + }, + 'health_sdown': { + 'options': [None, 'Backend Servers In DOWN State', 'failed servers', 'health', 'haproxy_hs.down', 'line'], + 'lines': [] + }, + 'health_sup': { + 'options': [None, 'Backend Servers In UP State', 'health servers', 'health', 'haproxy_hs.up', 'line'], + 'lines': [] + }, + 'health_bdown': { + 'options': [None, 'Is Backend Failed?', 'boolean', 'health', 'haproxy_hb.down', 'line'], + 'lines': [] + }, + 'health_idle': { + 'options': [None, 'The Ratio Of Polling Time Vs Total Time', 'percentage', 'health', 'haproxy.idle', 'line'], + 'lines': [ + ['idle', None, 'absolute'] + ] + } +} + +METRICS = { + 'bin': {'algorithm': 'incremental', 'divisor': 1024}, + 'bout': {'algorithm': 'incremental', 'divisor': 1024}, + 'scur': {'algorithm': 'absolute', 'divisor': 1}, + 'qcur': {'algorithm': 'absolute', 'divisor': 1}, + 'hrsp_1xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_2xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_3xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_4xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_5xx': {'algorithm': 'incremental', 'divisor': 1}, + 'hrsp_other': {'algorithm': 'incremental', 'divisor': 1} +} + +BACKEND_METRICS = { + 'qtime': {'algorithm': 'absolute', 'divisor': 1}, + 'ctime': {'algorithm': 'absolute', 'divisor': 1}, + 'rtime': {'algorithm': 'absolute', 'divisor': 1}, + 'ttime': {'algorithm': 'absolute', 'divisor': 1} +} + +REGEX = dict(url=re_compile(r'idle = (?P[0-9]+)'), + socket=re_compile(r'Idle_pct: (?P[0-9]+)')) + + +# TODO: the code is unreadable +class Service(UrlService, SocketService): + def __init__(self, configuration=None, name=None): + if 'socket' in configuration: + SocketService.__init__(self, configuration=configuration, name=name) + self.poll = SocketService + self.options_ = dict(regex=REGEX['socket'], + stat='show stat\n'.encode(), + info='show info\n'.encode()) + else: + UrlService.__init__(self, configuration=configuration, name=name) + self.poll = UrlService + self.options_ = dict(regex=REGEX['url'], + stat=self.url, + info=url_remove_params(self.url)) + self.order = ORDER + self.definitions = CHARTS + + def check(self): + if self.poll.check(self): + self.create_charts() + self.info('We are using %s.' % self.poll.__name__) + return True + return False + + def _get_data(self): + to_netdata = dict() + self.request, self.url = self.options_['stat'], self.options_['stat'] + stat_data = self._get_stat_data() + self.request, self.url = self.options_['info'], self.options_['info'] + info_data = self._get_info_data(regex=self.options_['regex']) + + to_netdata.update(stat_data) + to_netdata.update(info_data) + return to_netdata or None + + def _get_stat_data(self): + """ + :return: dict + """ + raw_data = self.poll._get_raw_data(self) + + if not raw_data: + return dict() + + raw_data = raw_data.splitlines() + self.data = parse_data_([dict(zip(raw_data[0].split(','), raw_data[_].split(','))) + for _ in range(1, len(raw_data))]) + if not self.data: + return dict() + + stat_data = dict() + + for frontend in self.data['frontend']: + for metric in METRICS: + idx = frontend['# pxname'].replace('.', '_') + stat_data['_'.join(['frontend', metric, idx])] = frontend.get(metric) or 0 + + for backend in self.data['backend']: + name, idx = backend['# pxname'], backend['# pxname'].replace('.', '_') + stat_data['hsup_' + idx] = len([server for server in self.data['servers'] + if server_status(server, name, 'UP')]) + stat_data['hsdown_' + idx] = len([server for server in self.data['servers'] + if server_status(server, name, 'DOWN')]) + stat_data['hbdown_' + idx] = 1 if backend.get('status') == 'DOWN' else 0 + for metric in BACKEND_METRICS: + stat_data['_'.join(['backend', metric, idx])] = backend.get(metric) or 0 + hrsp_total = 0 + for metric in METRICS: + stat_data['_'.join(['backend', metric, idx])] = backend.get(metric) or 0 + if metric.startswith('hrsp_'): + hrsp_total += int(backend.get(metric) or 0) + stat_data['_'.join(['backend', 'hrsp_total', idx])] = hrsp_total + return stat_data + + def _get_info_data(self, regex): + """ + :return: dict + """ + raw_data = self.poll._get_raw_data(self) + if not raw_data: + return dict() + + match = regex.search(raw_data) + return match.groupdict() if match else dict() + + @staticmethod + def _check_raw_data(data): + """ + Check if all data has been gathered from socket + :param data: str + :return: boolean + """ + return not bool(data) + + def create_charts(self): + for front in self.data['frontend']: + name, idx = front['# pxname'], front['# pxname'].replace('.', '_') + for metric in METRICS: + self.definitions['f' + metric]['lines'].append(['_'.join(['frontend', metric, idx]), + name, METRICS[metric]['algorithm'], 1, + METRICS[metric]['divisor']]) + self.definitions['fhrsp_total']['lines'].append(['_'.join(['frontend', 'hrsp_total', idx]), + name, 'incremental', 1, 1]) + for back in self.data['backend']: + name, idx = back['# pxname'], back['# pxname'].replace('.', '_') + for metric in METRICS: + self.definitions['b' + metric]['lines'].append(['_'.join(['backend', metric, idx]), + name, METRICS[metric]['algorithm'], 1, + METRICS[metric]['divisor']]) + self.definitions['bhrsp_total']['lines'].append(['_'.join(['backend', 'hrsp_total', idx]), + name, 'incremental', 1, 1]) + for metric in BACKEND_METRICS: + self.definitions['b' + metric]['lines'].append(['_'.join(['backend', metric, idx]), + name, BACKEND_METRICS[metric]['algorithm'], 1, + BACKEND_METRICS[metric]['divisor']]) + self.definitions['health_sup']['lines'].append(['hsup_' + idx, name, 'absolute']) + self.definitions['health_sdown']['lines'].append(['hsdown_' + idx, name, 'absolute']) + self.definitions['health_bdown']['lines'].append(['hbdown_' + idx, name, 'absolute']) + + +def parse_data_(data): + def is_backend(backend): + return backend.get('svname') == 'BACKEND' and backend.get('# pxname') != 'stats' + + def is_frontend(frontend): + return frontend.get('svname') == 'FRONTEND' and frontend.get('# pxname') != 'stats' + + def is_server(server): + return not server.get('svname', '').startswith(('FRONTEND', 'BACKEND')) + + if not data: + return None + + result = defaultdict(list) + for elem in data: + if is_backend(elem): + result['backend'].append(elem) + continue + elif is_frontend(elem): + result['frontend'].append(elem) + continue + elif is_server(elem): + result['servers'].append(elem) + + return result or None + + +def server_status(server, backend_name, status='DOWN'): + return server.get('# pxname') == backend_name and server.get('status') == status + + +def url_remove_params(url): + parsed = urlparse(url or str()) + return '{scheme}://{netloc}{path}'.format(scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path) diff --git a/collectors/python.d.plugin/haproxy/haproxy.conf b/collectors/python.d.plugin/haproxy/haproxy.conf new file mode 100644 index 0000000..10a0df3 --- /dev/null +++ b/collectors/python.d.plugin/haproxy/haproxy.conf @@ -0,0 +1,83 @@ +# netdata python.d.plugin configuration for haproxy +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, haproxy also supports the following: +# +# IMPORTANT: socket MUST BE readable AND writable by netdata user +# +# socket: 'path/to/haproxy/sock' +# +# OR +# url: 'http://:/;csv;norefresh' +# [user: USERNAME] only if stats auth is used +# [pass: PASSWORD] only if stats auth is used + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +#via_url: +# user : 'admin' +# pass : 'password' +# url : 'http://127.0.0.1:7000/haproxy_stats;csv;norefresh' + +#via_socket: +# socket: '/var/run/haproxy/admin.sock' diff --git a/collectors/python.d.plugin/hddtemp/Makefile.inc b/collectors/python.d.plugin/hddtemp/Makefile.inc new file mode 100644 index 0000000..22852b6 --- /dev/null +++ b/collectors/python.d.plugin/hddtemp/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += hddtemp/hddtemp.chart.py +dist_pythonconfig_DATA += hddtemp/hddtemp.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += hddtemp/README.md hddtemp/Makefile.inc + diff --git a/collectors/python.d.plugin/hddtemp/README.md b/collectors/python.d.plugin/hddtemp/README.md new file mode 100644 index 0000000..d8aba62 --- /dev/null +++ b/collectors/python.d.plugin/hddtemp/README.md @@ -0,0 +1,38 @@ + + +# Hard drive temperature monitoring with Netdata + +Monitors disk temperatures from one or more `hddtemp` daemons. + +**Requirement:** +Running `hddtemp` in daemonized mode with access on tcp port + +It produces one chart **Temperature** with dynamic number of dimensions (one per disk) + +## Configuration + +Edit the `python.d/hddtemp.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/hddtemp.conf +``` + +Sample: + +```yaml +update_every: 3 +host: "127.0.0.1" +port: 7634 +``` + +If no configuration is given, module will attempt to connect to hddtemp daemon on `127.0.0.1:7634` address + +--- + + diff --git a/collectors/python.d.plugin/hddtemp/hddtemp.chart.py b/collectors/python.d.plugin/hddtemp/hddtemp.chart.py new file mode 100644 index 0000000..6427aa1 --- /dev/null +++ b/collectors/python.d.plugin/hddtemp/hddtemp.chart.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# Description: hddtemp netdata python.d module +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +import re +from copy import deepcopy + +from bases.FrameworkServices.SocketService import SocketService + +ORDER = [ + 'temperatures', +] + +CHARTS = { + 'temperatures': { + 'options': ['disks_temp', 'Disks Temperatures', 'Celsius', 'temperatures', 'hddtemp.temperatures', 'line'], + 'lines': [ + # lines are created dynamically in `check()` method + ]}} + +RE = re.compile(r'\/dev\/([^|]+)\|([^|]+)\|([0-9]+|SLP|UNK)\|') + + +class Disk: + def __init__(self, id_, name, temp): + self.id = id_.split('/')[-1] + self.name = name.replace(' ', '_') + self.temp = temp if temp.isdigit() else None + + def __repr__(self): + return self.id + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + SocketService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.do_only = self.configuration.get('devices') + self._keep_alive = False + self.request = "" + self.host = "127.0.0.1" + self.port = 7634 + + def get_disks(self): + r = self._get_raw_data() + + if not r: + return None + + m = RE.findall(r) + + if not m: + self.error("received data doesn't have needed records") + return None + + rv = [Disk(*d) for d in m] + self.debug('available disks: {0}'.format(rv)) + + if self.do_only: + return [v for v in rv if v.id in self.do_only] + return rv + + def get_data(self): + """ + Get data from TCP/IP socket + :return: dict + """ + + disks = self.get_disks() + + if not disks: + return None + + return dict((d.id, d.temp) for d in disks) + + def check(self): + """ + Parse configuration, check if hddtemp is available, and dynamically create chart lines data + :return: boolean + """ + self._parse_config() + disks = self.get_disks() + + if not disks: + return False + + for d in disks: + dim = [d.id] + self.definitions['temperatures']['lines'].append(dim) + + return True + + @staticmethod + def _check_raw_data(data): + return not bool(data) diff --git a/collectors/python.d.plugin/hddtemp/hddtemp.conf b/collectors/python.d.plugin/hddtemp/hddtemp.conf new file mode 100644 index 0000000..b2d7aef --- /dev/null +++ b/collectors/python.d.plugin/hddtemp/hddtemp.conf @@ -0,0 +1,95 @@ +# netdata python.d.plugin configuration for hddtemp +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, hddtemp also supports the following: +# +# host: 'IP or HOSTNAME' # the host to connect to +# port: PORT # the port to connect to +# + +# By default this module will try to autodetect disks +# (autodetection works only for disk which names start with "sd"). +# However this can be overridden by setting variable `disks` to +# array of desired disks. Example for two disks: +# +# devices: +# - sda +# - sdb +# + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name: 'local' + host: 'localhost' + port: 7634 + +localipv4: + name: 'local' + host: '127.0.0.1' + port: 7634 + +localipv6: + name: 'local' + host: '::1' + port: 7634 diff --git a/collectors/python.d.plugin/hpssa/Makefile.inc b/collectors/python.d.plugin/hpssa/Makefile.inc new file mode 100644 index 0000000..1c04aa4 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += hpssa/hpssa.chart.py +dist_pythonconfig_DATA += hpssa/hpssa.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += hpssa/README.md hpssa/Makefile.inc + diff --git a/collectors/python.d.plugin/hpssa/README.md b/collectors/python.d.plugin/hpssa/README.md new file mode 100644 index 0000000..c1d2182 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/README.md @@ -0,0 +1,83 @@ + + +# HP Smart Storage Arrays monitoring with Netdata + +Monitors controller, cache module, logical and physical drive state and temperature using `ssacli` tool. + +Executed commands: + +- `sudo -n ssacli ctrl all show config detail` + +## Requirements: + +This module uses `ssacli`, which can only be executed by root. It uses +`sudo` and assumes that it is configured such that the `netdata` user can execute `ssacli` as root without a password. + +- Add to your `/etc/sudoers` file: + +`which ssacli` shows the full path to the binary. + +```bash +netdata ALL=(root) NOPASSWD: /path/to/ssacli +``` + +- Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + +The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `ssacli` using `sudo`. + +As the `root` user, do the following: + +```cmd +mkdir /etc/systemd/system/netdata.service.d +echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf +systemctl daemon-reload +systemctl restart netdata.service +``` + +## Charts + +- Controller status +- Controller temperature +- Logical drive status +- Physical drive status +- Physical drive temperature + +## Enable the collector + +The `hpssa` collector is disabled by default. To enable it, use `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` +file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `hpssa` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl +restart netdata`, or the [appropriate method](/docs/configure/start-stop-restart.md) for your system. + +## Configuration + +Edit the `python.d/hpssa.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/hpssa.conf +``` + +If `ssacli` cannot be found in the `PATH`, configure it in `hpssa.conf`. + +```yaml +ssacli_path: /usr/sbin/ssacli +``` + +Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate +method](/docs/configure/start-stop-restart.md) for your system. + diff --git a/collectors/python.d.plugin/hpssa/hpssa.chart.py b/collectors/python.d.plugin/hpssa/hpssa.chart.py new file mode 100644 index 0000000..4da73dc --- /dev/null +++ b/collectors/python.d.plugin/hpssa/hpssa.chart.py @@ -0,0 +1,396 @@ +# -*- coding: utf-8 -*- +# Description: hpssa netdata python.d module +# Author: Peter Gnodde (gnoddep) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import re +from copy import deepcopy + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +disabled_by_default = True +update_every = 5 + +ORDER = [ + 'ctrl_status', + 'ctrl_temperature', + 'ld_status', + 'pd_status', + 'pd_temperature', +] + +CHARTS = { + 'ctrl_status': { + 'options': [ + None, + 'Status 1 is OK, Status 0 is not OK', + 'Status', + 'Controller', + 'hpssa.ctrl_status', + 'line' + ], + 'lines': [] + }, + 'ctrl_temperature': { + 'options': [ + None, + 'Temperature', + 'Celsius', + 'Controller', + 'hpssa.ctrl_temperature', + 'line' + ], + 'lines': [] + }, + 'ld_status': { + 'options': [ + None, + 'Status 1 is OK, Status 0 is not OK', + 'Status', + 'Logical drives', + 'hpssa.ld_status', + 'line' + ], + 'lines': [] + }, + 'pd_status': { + 'options': [ + None, + 'Status 1 is OK, Status 0 is not OK', + 'Status', + 'Physical drives', + 'hpssa.pd_status', + 'line' + ], + 'lines': [] + }, + 'pd_temperature': { + 'options': [ + None, + 'Temperature', + 'Celsius', + 'Physical drives', + 'hpssa.pd_temperature', + 'line' + ], + 'lines': [] + } +} + +adapter_regex = re.compile(r'^(?P.+) in Slot (?P\d+)') +ignored_sections_regex = re.compile( + r''' + ^ + Physical[ ]Drives + | None[ ]attached + | (?:Expander|Enclosure|SEP|Port[ ]Name:)[ ].+ + | .+[ ]at[ ]Port[ ]\S+,[ ]Box[ ]\d+,[ ].+ + | Mirror[ ]Group[ ]\d+: + $ + ''', + re.X +) +mirror_group_regex = re.compile(r'^Mirror Group \d+:$') +disk_partition_regex = re.compile(r'^Disk Partition Information$') +array_regex = re.compile(r'^Array: (?P[A-Z]+)$') +drive_regex = re.compile( + r''' + ^ + Logical[ ]Drive:[ ](?P\d+) + | physicaldrive[ ](?P[^:]+:\d+:\d+) + $ + ''', + re.X +) +key_value_regex = re.compile(r'^(?P[^:]+): ?(?P.*)$') +ld_status_regex = re.compile(r'^Status: (?P[^,]+)(?:, (?P[0-9.]+)% complete)?$') +error_match = re.compile(r'Error:') + + +class HPSSAException(Exception): + pass + + +class HPSSA(object): + def __init__(self, lines): + self.lines = [line.strip() for line in lines if line.strip()] + self.current_line = 0 + self.adapters = [] + self.parse() + + def __iter__(self): + return self + + def __next__(self): + if self.current_line == len(self.lines): + raise StopIteration + + line = self.lines[self.current_line] + self.current_line += 1 + + return line + + def next(self): + """ + This is for Python 2.7 compatibility + """ + return self.__next__() + + def rewind(self): + self.current_line = max(self.current_line - 1, 0) + + @staticmethod + def match_any(line, *regexes): + return any([regex.match(line) for regex in regexes]) + + def parse(self): + for line in self: + match = adapter_regex.match(line) + if match: + self.adapters.append(self.parse_adapter(**match.groupdict())) + + def parse_adapter(self, slot, adapter_type): + adapter = { + 'slot': int(slot), + 'type': adapter_type, + + 'controller': { + 'status': None, + 'temperature': None, + }, + 'cache': { + 'present': False, + 'status': None, + 'temperature': None, + }, + 'battery': { + 'status': None, + 'count': 0, + }, + + 'logical_drives': [], + 'physical_drives': [], + } + + for line in self: + if error_match.match(line): + raise HPSSAException('Error: {}'.format(line)) + elif adapter_regex.match(line): + self.rewind() + break + elif array_regex.match(line): + self.parse_array(adapter) + elif line == 'Unassigned' or line == 'HBA Drives': + self.parse_physical_drives(adapter) + elif ignored_sections_regex.match(line): + self.parse_ignored_section() + else: + match = key_value_regex.match(line) + if match: + key, value = match.group('key', 'value') + if key == 'Controller Status': + adapter['controller']['status'] = value == 'OK' + elif key == 'Controller Temperature (C)': + adapter['controller']['temperature'] = int(value) + elif key == 'Cache Board Present': + adapter['cache']['present'] = value == 'True' + elif key == 'Cache Status': + adapter['cache']['status'] = value == 'OK' + elif key == 'Cache Module Temperature (C)': + adapter['cache']['temperature'] = int(value) + elif key == 'Battery/Capacitor Count': + adapter['battery']['count'] = int(value) + elif key == 'Battery/Capacitor Status': + adapter['battery']['status'] = value == 'OK' + else: + raise HPSSAException('Cannot parse line: {}'.format(line)) + + return adapter + + def parse_array(self, adapter): + for line in self: + if HPSSA.match_any(line, adapter_regex, array_regex, ignored_sections_regex): + self.rewind() + break + + match = drive_regex.match(line) + if match: + data = match.groupdict() + if data['logical_drive_id']: + self.parse_logical_drive(adapter, int(data['logical_drive_id'])) + else: + self.parse_physical_drive(adapter, data['fqn']) + elif not key_value_regex.match(line): + self.rewind() + break + + def parse_physical_drives(self, adapter): + for line in self: + match = drive_regex.match(line) + if match: + self.parse_physical_drive(adapter, match.group('fqn')) + else: + self.rewind() + break + + def parse_logical_drive(self, adapter, logical_drive_id): + ld = { + 'id': logical_drive_id, + 'status': None, + 'status_complete': None, + } + + for line in self: + if HPSSA.match_any(line, mirror_group_regex, disk_partition_regex): + self.parse_ignored_section() + continue + + match = ld_status_regex.match(line) + if match: + ld['status'] = match.group('status') == 'OK' + + if match.group('percentage'): + ld['status_complete'] = float(match.group('percentage')) / 100 + elif HPSSA.match_any(line, adapter_regex, array_regex, drive_regex, ignored_sections_regex) \ + or not key_value_regex.match(line): + self.rewind() + break + + adapter['logical_drives'].append(ld) + + def parse_physical_drive(self, adapter, fqn): + pd = { + 'fqn': fqn, + 'status': None, + 'temperature': None, + } + + for line in self: + if HPSSA.match_any(line, adapter_regex, array_regex, drive_regex, ignored_sections_regex): + self.rewind() + break + + match = key_value_regex.match(line) + if match: + key, value = match.group('key', 'value') + if key == 'Status': + pd['status'] = value == 'OK' + elif key == 'Current Temperature (C)': + pd['temperature'] = int(value) + else: + self.rewind() + break + + adapter['physical_drives'].append(pd) + + def parse_ignored_section(self): + for line in self: + if HPSSA.match_any(line, adapter_regex, array_regex, drive_regex, ignored_sections_regex) \ + or not key_value_regex.match(line): + self.rewind() + break + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.ssacli_path = self.configuration.get('ssacli_path', 'ssacli') + self.use_sudo = self.configuration.get('use_sudo', True) + self.cmd = [] + + def get_adapters(self): + try: + adapters = HPSSA(self._get_raw_data(command=self.cmd)).adapters + if not adapters: + # If no adapters are returned, run the command again but capture stderr + err = self._get_raw_data(command=self.cmd, stderr=True) + if err: + raise HPSSAException('Error executing cmd {}: {}'.format(' '.join(self.cmd), '\n'.join(err))) + return adapters + except HPSSAException as ex: + self.error(ex) + return [] + + def check(self): + if not os.path.isfile(self.ssacli_path): + ssacli_path = find_binary(self.ssacli_path) + if ssacli_path: + self.ssacli_path = ssacli_path + else: + self.error('Cannot locate "{}" binary'.format(self.ssacli_path)) + return False + + if self.use_sudo: + sudo = find_binary('sudo') + if not sudo: + self.error('Cannot locate "{}" binary'.format('sudo')) + return False + + allowed = self._get_raw_data(command=[sudo, '-n', '-l', self.ssacli_path]) + if not allowed or allowed[0].strip() != os.path.realpath(self.ssacli_path): + self.error('Not allowed to run sudo for command {}'.format(self.ssacli_path)) + return False + + self.cmd = [sudo, '-n'] + + self.cmd.extend([self.ssacli_path, 'ctrl', 'all', 'show', 'config', 'detail']) + self.info('Command: {}'.format(self.cmd)) + + adapters = self.get_adapters() + + self.info('Discovered adapters: {}'.format([adapter['type'] for adapter in adapters])) + if not adapters: + self.error('No adapters discovered') + return False + + return True + + def get_data(self): + netdata = {} + + for adapter in self.get_adapters(): + status_key = '{}_status'.format(adapter['slot']) + temperature_key = '{}_temperature'.format(adapter['slot']) + ld_key = 'ld_{}_'.format(adapter['slot']) + + data = { + 'ctrl_status': { + 'ctrl_' + status_key: adapter['controller']['status'], + 'cache_' + status_key: adapter['cache']['present'] and adapter['cache']['status'], + 'battery_' + status_key: + adapter['battery']['status'] if adapter['battery']['count'] > 0 else None + }, + + 'ctrl_temperature': { + 'ctrl_' + temperature_key: adapter['controller']['temperature'], + 'cache_' + temperature_key: adapter['cache']['temperature'], + }, + + 'ld_status': { + ld_key + '{}_status'.format(ld['id']): ld['status'] for ld in adapter['logical_drives'] + }, + + 'pd_status': {}, + 'pd_temperature': {}, + } + + for pd in adapter['physical_drives']: + pd_key = 'pd_{}_{}'.format(adapter['slot'], pd['fqn']) + data['pd_status'][pd_key + '_status'] = pd['status'] + data['pd_temperature'][pd_key + '_temperature'] = pd['temperature'] + + for chart, dimension_data in data.items(): + for dimension_id, value in dimension_data.items(): + if value is None: + continue + + if dimension_id not in self.charts[chart]: + self.charts[chart].add_dimension([dimension_id]) + + netdata[dimension_id] = value + + return netdata diff --git a/collectors/python.d.plugin/hpssa/hpssa.conf b/collectors/python.d.plugin/hpssa/hpssa.conf new file mode 100644 index 0000000..cc50c98 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/hpssa.conf @@ -0,0 +1,61 @@ +# netdata python.d.plugin configuration for hpssa +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 5 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, hpssa also supports the following: +# +# ssacli_path: /usr/sbin/ssacli # The path to the ssacli executable +# use_sudo: True # Whether to use sudo or not +# ---------------------------------------------------------------------- + +# ssacli_path: /usr/sbin/ssacli +# use_sudo: True diff --git a/collectors/python.d.plugin/icecast/Makefile.inc b/collectors/python.d.plugin/icecast/Makefile.inc new file mode 100644 index 0000000..cb7c6fa --- /dev/null +++ b/collectors/python.d.plugin/icecast/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += icecast/icecast.chart.py +dist_pythonconfig_DATA += icecast/icecast.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += icecast/README.md icecast/Makefile.inc + diff --git a/collectors/python.d.plugin/icecast/README.md b/collectors/python.d.plugin/icecast/README.md new file mode 100644 index 0000000..c122f76 --- /dev/null +++ b/collectors/python.d.plugin/icecast/README.md @@ -0,0 +1,44 @@ + + +# Icecast monitoring with Netdata + +Monitors the number of listeners for active sources. + +## Requirements + +- icecast version >= 2.4.0 + +It produces the following charts: + +1. **Listeners** in listeners + +- source number + +## Configuration + +Edit the `python.d/icecast.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/icecast.conf +``` + +Needs only `url` to server's `/status-json.xsl` + +Here is an example for remote server: + +```yaml +remote: + url : 'http://1.2.3.4:8443/status-json.xsl' +``` + +Without configuration, module attempts to connect to `http://localhost:8443/status-json.xsl` + +--- + + diff --git a/collectors/python.d.plugin/icecast/icecast.chart.py b/collectors/python.d.plugin/icecast/icecast.chart.py new file mode 100644 index 0000000..a967d17 --- /dev/null +++ b/collectors/python.d.plugin/icecast/icecast.chart.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# Description: icecast netdata python.d module +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import json + +from bases.FrameworkServices.UrlService import UrlService + +ORDER = [ + 'listeners', +] + +CHARTS = { + 'listeners': { + 'options': [None, 'Number Of Listeners', 'listeners', 'listeners', 'icecast.listeners', 'line'], + 'lines': [ + ] + } +} + + +class Source: + def __init__(self, idx, data): + self.name = 'source_{0}'.format(idx) + self.is_active = data.get('stream_start') and data.get('server_name') + self.listeners = data['listeners'] + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.url = self.configuration.get('url') + self._manager = self._build_manager() + + def check(self): + """ + Add active sources to the "listeners" chart + :return: bool + """ + sources = self.get_sources() + if not sources: + return None + + active_sources = 0 + for idx, raw_source in enumerate(sources): + if Source(idx, raw_source).is_active: + active_sources += 1 + dim_id = 'source_{0}'.format(idx) + dim = 'source {0}'.format(idx) + self.definitions['listeners']['lines'].append([dim_id, dim]) + + return bool(active_sources) + + def _get_data(self): + """ + Get number of listeners for every source + :return: dict + """ + sources = self.get_sources() + if not sources: + return None + + data = dict() + + for idx, raw_source in enumerate(sources): + source = Source(idx, raw_source) + data[source.name] = source.listeners + + return data + + def get_sources(self): + """ + Format data received from http request and return list of sources + :return: list + """ + + raw_data = self._get_raw_data() + if not raw_data: + return None + + try: + data = json.loads(raw_data) + except ValueError as error: + self.error('JSON decode error:', error) + return None + + sources = data['icestats'].get('source') + if not sources: + return None + + return sources if isinstance(sources, list) else [sources] diff --git a/collectors/python.d.plugin/icecast/icecast.conf b/collectors/python.d.plugin/icecast/icecast.conf new file mode 100644 index 0000000..a33074a --- /dev/null +++ b/collectors/python.d.plugin/icecast/icecast.conf @@ -0,0 +1,81 @@ +# netdata python.d.plugin configuration for icecast +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, icecast also supports the following: +# +# url: 'URL' # the URL to fetch icecast's stats +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + url : 'http://localhost:8443/status-json.xsl' + +localipv4: + name : 'local' + url : 'http://127.0.0.1:8443/status-json.xsl' \ No newline at end of file diff --git a/collectors/python.d.plugin/ipfs/Makefile.inc b/collectors/python.d.plugin/ipfs/Makefile.inc new file mode 100644 index 0000000..68458cb --- /dev/null +++ b/collectors/python.d.plugin/ipfs/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += ipfs/ipfs.chart.py +dist_pythonconfig_DATA += ipfs/ipfs.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += ipfs/README.md ipfs/Makefile.inc + diff --git a/collectors/python.d.plugin/ipfs/README.md b/collectors/python.d.plugin/ipfs/README.md new file mode 100644 index 0000000..3a7c436 --- /dev/null +++ b/collectors/python.d.plugin/ipfs/README.md @@ -0,0 +1,51 @@ + + +# IPFS monitoring with Netdata + +Collects [`IPFS`](https://ipfs.io) basic information like file system bandwidth, peers and repo metrics. + +## Charts + +It produces the following charts: + +- Bandwidth in `kilobits/s` +- Peers in `peers` +- Repo Size in `GiB` +- Repo Objects in `objects` + +## Configuration + +Edit the `python.d/ipfs.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/ipfs.conf +``` + +--- + +Calls to the following endpoints are disabled due to `IPFS` bugs: + +- `/api/v0/stats/repo` (https://github.com/ipfs/go-ipfs/issues/3874) +- `/api/v0/pin/ls` (https://github.com/ipfs/go-ipfs/issues/7528) + +Can be enabled in the collector configuration file. + +The configuration needs only `url` to `IPFS` server, here is an example for 2 `IPFS` instances: + +```yaml +localhost: + url: 'http://localhost:5001' + +remote: + url: 'http://203.0.113.10::5001' +``` + +--- + + diff --git a/collectors/python.d.plugin/ipfs/ipfs.chart.py b/collectors/python.d.plugin/ipfs/ipfs.chart.py new file mode 100644 index 0000000..abfc9c4 --- /dev/null +++ b/collectors/python.d.plugin/ipfs/ipfs.chart.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +# Description: IPFS netdata python.d module +# Authors: davidak +# SPDX-License-Identifier: GPL-3.0-or-later + +import json + +from bases.FrameworkServices.UrlService import UrlService + +ORDER = [ + 'bandwidth', + 'peers', + 'repo_size', + 'repo_objects', +] + +CHARTS = { + 'bandwidth': { + 'options': [None, 'IPFS Bandwidth', 'kilobits/s', 'Bandwidth', 'ipfs.bandwidth', 'line'], + 'lines': [ + ['in', None, 'absolute', 8, 1000], + ['out', None, 'absolute', -8, 1000] + ] + }, + 'peers': { + 'options': [None, 'IPFS Peers', 'peers', 'Peers', 'ipfs.peers', 'line'], + 'lines': [ + ['peers', None, 'absolute'] + ] + }, + 'repo_size': { + 'options': [None, 'IPFS Repo Size', 'GiB', 'Size', 'ipfs.repo_size', 'area'], + 'lines': [ + ['avail', None, 'absolute', 1, 1 << 30], + ['size', None, 'absolute', 1, 1 << 30], + ] + }, + 'repo_objects': { + 'options': [None, 'IPFS Repo Objects', 'objects', 'Objects', 'ipfs.repo_objects', 'line'], + 'lines': [ + ['objects', None, 'absolute', 1, 1], + ['pinned', None, 'absolute', 1, 1], + ['recursive_pins', None, 'absolute', 1, 1] + ] + } +} + +SI_zeroes = { + 'k': 3, + 'm': 6, + 'g': 9, + 't': 12, + 'p': 15, + 'e': 18, + 'z': 21, + 'y': 24 +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.baseurl = self.configuration.get('url', 'http://localhost:5001') + self.method = "POST" + self.do_pinapi = self.configuration.get('pinapi') + self.do_repoapi = self.configuration.get('repoapi') + self.__storage_max = None + + def _get_json(self, sub_url): + """ + :return: json decoding of the specified url + """ + self.url = self.baseurl + sub_url + try: + return json.loads(self._get_raw_data()) + except (TypeError, ValueError): + return dict() + + @staticmethod + def _recursive_pins(keys): + return sum(1 for k in keys if keys[k]['Type'] == b'recursive') + + @staticmethod + def _dehumanize(store_max): + # convert from '10Gb' to 10000000000 + if not isinstance(store_max, int): + store_max = store_max.lower() + if store_max.endswith('b'): + val, units = store_max[:-2], store_max[-2] + if units in SI_zeroes: + val += '0' * SI_zeroes[units] + store_max = val + try: + store_max = int(store_max) + except (TypeError, ValueError): + store_max = None + return store_max + + def _storagemax(self, store_cfg): + if self.__storage_max is None: + self.__storage_max = self._dehumanize(store_cfg) + return self.__storage_max + + def _get_data(self): + """ + Get data from API + :return: dict + """ + # suburl : List of (result-key, original-key, transform-func) + cfg = { + '/api/v0/stats/bw': + [ + ('in', 'RateIn', int), + ('out', 'RateOut', int), + ], + '/api/v0/swarm/peers': + [ + ('peers', 'Peers', len), + ], + } + if self.do_repoapi: + cfg.update({ + '/api/v0/stats/repo': + [ + ('size', 'RepoSize', int), + ('objects', 'NumObjects', int), + ('avail', 'StorageMax', self._storagemax), + ], + }) + + if self.do_pinapi: + cfg.update({ + '/api/v0/pin/ls': + [ + ('pinned', 'Keys', len), + ('recursive_pins', 'Keys', self._recursive_pins), + ] + }) + r = dict() + for suburl in cfg: + in_json = self._get_json(suburl) + for new_key, orig_key, xmute in cfg[suburl]: + try: + r[new_key] = xmute(in_json[orig_key]) + except Exception as error: + self.debug(error) + return r or None diff --git a/collectors/python.d.plugin/ipfs/ipfs.conf b/collectors/python.d.plugin/ipfs/ipfs.conf new file mode 100644 index 0000000..8b167b3 --- /dev/null +++ b/collectors/python.d.plugin/ipfs/ipfs.conf @@ -0,0 +1,82 @@ +# netdata python.d.plugin configuration for ipfs +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, ipfs also supports the following: +# +# url: 'URL' # URL to the IPFS API +# repoapi: no # Collect repo metrics +# # Currently defaults to disabled due to IPFS Bug +# # https://github.com/ipfs/go-ipfs/issues/7528 +# # resulting in very high CPU Usage +# pinapi: no # Set status of IPFS pinned object polling +# # Currently defaults to disabled due to IPFS Bug +# # https://github.com/ipfs/go-ipfs/issues/3874 +# # resulting in very high CPU Usage +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name: 'local' + url: 'http://localhost:5001' + repoapi: no + pinapi: no diff --git a/collectors/python.d.plugin/litespeed/Makefile.inc b/collectors/python.d.plugin/litespeed/Makefile.inc new file mode 100644 index 0000000..5dd6450 --- /dev/null +++ b/collectors/python.d.plugin/litespeed/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += litespeed/litespeed.chart.py +dist_pythonconfig_DATA += litespeed/litespeed.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += litespeed/README.md litespeed/Makefile.inc + diff --git a/collectors/python.d.plugin/litespeed/README.md b/collectors/python.d.plugin/litespeed/README.md new file mode 100644 index 0000000..b58b23d --- /dev/null +++ b/collectors/python.d.plugin/litespeed/README.md @@ -0,0 +1,72 @@ + + +# LiteSpeed monitoring with Netdata + +Collects web server performance metrics for network, connection, requests, and cache. + +It produces: + +1. **Network Throughput HTTP** in kilobits/s + + - in + - out + +2. **Network Throughput HTTPS** in kilobits/s + + - in + - out + +3. **Connections HTTP** in connections + + - free + - used + +4. **Connections HTTPS** in connections + + - free + - used + +5. **Requests** in requests/s + + - requests + +6. **Requests In Processing** in requests + + - processing + +7. **Public Cache Hits** in hits/s + + - hits + +8. **Private Cache Hits** in hits/s + + - hits + +9. **Static Hits** in hits/s + + - hits + +## Configuration + +Edit the `python.d/litespeed.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/litespeed.conf +``` + +```yaml +local: + path : 'PATH' +``` + +If no configuration is given, module will use "/tmp/lshttpd/". + +--- + + diff --git a/collectors/python.d.plugin/litespeed/litespeed.chart.py b/collectors/python.d.plugin/litespeed/litespeed.chart.py new file mode 100644 index 0000000..7ef8189 --- /dev/null +++ b/collectors/python.d.plugin/litespeed/litespeed.chart.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# Description: litespeed netdata python.d module +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import glob +import os +import re +from collections import namedtuple + +from bases.FrameworkServices.SimpleService import SimpleService + +update_every = 10 + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + 'net_throughput_http', # net throughput + 'net_throughput_https', # net throughput + 'connections_http', # connections + 'connections_https', # connections + 'requests', # requests + 'requests_processing', # requests + 'pub_cache_hits', # cache + 'private_cache_hits', # cache + 'static_hits', # static +] + +CHARTS = { + 'net_throughput_http': { + 'options': [None, 'Network Throughput HTTP', 'kilobits/s', 'net throughput', + 'litespeed.net_throughput', 'area'], + 'lines': [ + ['bps_in', 'in', 'absolute'], + ['bps_out', 'out', 'absolute', -1] + ] + }, + 'net_throughput_https': { + 'options': [None, 'Network Throughput HTTPS', 'kilobits/s', 'net throughput', + 'litespeed.net_throughput', 'area'], + 'lines': [ + ['ssl_bps_in', 'in', 'absolute'], + ['ssl_bps_out', 'out', 'absolute', -1] + ] + }, + 'connections_http': { + 'options': [None, 'Connections HTTP', 'conns', 'connections', 'litespeed.connections', 'stacked'], + 'lines': [ + ['conn_free', 'free', 'absolute'], + ['conn_used', 'used', 'absolute'] + ] + }, + 'connections_https': { + 'options': [None, 'Connections HTTPS', 'conns', 'connections', 'litespeed.connections', 'stacked'], + 'lines': [ + ['ssl_conn_free', 'free', 'absolute'], + ['ssl_conn_used', 'used', 'absolute'] + ] + }, + 'requests': { + 'options': [None, 'Requests', 'requests/s', 'requests', 'litespeed.requests', 'line'], + 'lines': [ + ['requests', None, 'absolute', 1, 100] + ] + }, + 'requests_processing': { + 'options': [None, 'Requests In Processing', 'requests', 'requests', 'litespeed.requests_processing', 'line'], + 'lines': [ + ['requests_processing', 'processing', 'absolute'] + ] + }, + 'pub_cache_hits': { + 'options': [None, 'Public Cache Hits', 'hits/s', 'cache', 'litespeed.cache', 'line'], + 'lines': [ + ['pub_cache_hits', 'hits', 'absolute', 1, 100] + ] + }, + 'private_cache_hits': { + 'options': [None, 'Private Cache Hits', 'hits/s', 'cache', 'litespeed.cache', 'line'], + 'lines': [ + ['private_cache_hits', 'hits', 'absolute', 1, 100] + ] + }, + 'static_hits': { + 'options': [None, 'Static Hits', 'hits/s', 'static', 'litespeed.static', 'line'], + 'lines': [ + ['static_hits', 'hits', 'absolute', 1, 100] + ] + } +} + +t = namedtuple('T', ['key', 'id', 'mul']) + +T = [ + t('BPS_IN', 'bps_in', 8), + t('BPS_OUT', 'bps_out', 8), + t('SSL_BPS_IN', 'ssl_bps_in', 8), + t('SSL_BPS_OUT', 'ssl_bps_out', 8), + t('REQ_PER_SEC', 'requests', 100), + t('REQ_PROCESSING', 'requests_processing', 1), + t('PUB_CACHE_HITS_PER_SEC', 'pub_cache_hits', 100), + t('PRIVATE_CACHE_HITS_PER_SEC', 'private_cache_hits', 100), + t('STATIC_HITS_PER_SEC', 'static_hits', 100), + t('PLAINCONN', 'conn_used', 1), + t('AVAILCONN', 'conn_free', 1), + t('SSLCONN', 'ssl_conn_used', 1), + t('AVAILSSL', 'ssl_conn_free', 1), +] + +RE = re.compile(r'([A-Z_]+): ([0-9.]+)') + +ZERO_DATA = { + 'bps_in': 0, + 'bps_out': 0, + 'ssl_bps_in': 0, + 'ssl_bps_out': 0, + 'requests': 0, + 'requests_processing': 0, + 'pub_cache_hits': 0, + 'private_cache_hits': 0, + 'static_hits': 0, + 'conn_used': 0, + 'conn_free': 0, + 'ssl_conn_used': 0, + 'ssl_conn_free': 0, +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.path = self.configuration.get('path', '/tmp/lshttpd/') + self.files = list() + + def check(self): + if not self.path: + self.error('"path" not specified') + return False + + fs = glob.glob(os.path.join(self.path, '.rtreport*')) + + if not fs: + self.error('"{0}" has no "rtreport" files or dir is not readable'.format(self.path)) + return None + + self.debug('stats files:', fs) + + for f in fs: + if not is_readable_file(f): + self.error('{0} is not readable'.format(f)) + continue + self.files.append(f) + + return bool(self.files) + + def get_data(self): + """ + Format data received from http request + :return: dict + """ + data = dict(ZERO_DATA) + + for f in self.files: + try: + with open(f) as b: + lines = b.readlines() + except (OSError, IOError) as err: + self.error(err) + return None + else: + parse_file(data, lines) + + return data + + +def parse_file(data, lines): + for line in lines: + if not line.startswith(('BPS_IN:', 'MAXCONN:', 'PLAINCONN:', 'REQ_RATE []:')): + continue + m = dict(RE.findall(line)) + for v in T: + if v.key in m: + data[v.id] += float(m[v.key]) * v.mul + + +def is_readable_file(v): + return os.path.isfile(v) and os.access(v, os.R_OK) diff --git a/collectors/python.d.plugin/litespeed/litespeed.conf b/collectors/python.d.plugin/litespeed/litespeed.conf new file mode 100644 index 0000000..a326e18 --- /dev/null +++ b/collectors/python.d.plugin/litespeed/litespeed.conf @@ -0,0 +1,72 @@ +# netdata python.d.plugin configuration for litespeed +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, lightspeed also supports the following: +# +# path: 'PATH' # path to lightspeed stats files directory +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + path : '/tmp/lshttpd/' diff --git a/collectors/python.d.plugin/logind/Makefile.inc b/collectors/python.d.plugin/logind/Makefile.inc new file mode 100644 index 0000000..adadab1 --- /dev/null +++ b/collectors/python.d.plugin/logind/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += logind/logind.chart.py +dist_pythonconfig_DATA += logind/logind.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += logind/README.md logind/Makefile.inc + diff --git a/collectors/python.d.plugin/logind/README.md b/collectors/python.d.plugin/logind/README.md new file mode 100644 index 0000000..442d388 --- /dev/null +++ b/collectors/python.d.plugin/logind/README.md @@ -0,0 +1,86 @@ + + +# Systemd-Logind monitoring with Netdata + +Monitors active sessions, users, and seats tracked by `systemd-logind` or `elogind`. + +It provides the following charts: + +1. **Sessions** Tracks the total number of sessions. + + - Graphical: Local graphical sessions (running X11, or Wayland, or something else). + - Console: Local console sessions. + - Remote: Remote sessions. + +2. **Users** Tracks total number of unique user logins of each type. + + - Graphical + - Console + - Remote + +3. **Seats** Total number of seats in use. + + - Seats + +## Enable the collector + +The `logind` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `logind` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl +restart netdata`, or the appropriate method for your system, to finish enabling the `logind` collector. + +## Configuration + +This module needs no configuration. Just make sure the `netdata` user +can run the `loginctl` command and get a session list without having to +specify a path. + +This will work with any command that can output data in the _exact_ +same format as `loginctl list-sessions --no-legend`. If you have some +other command you want to use that outputs data in this format, you can +specify it using the `command` key like so: + +```yaml +command: '/path/to/other/command' +``` + +Edit the `python.d/logind.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/logind.conf +``` + +## Notes + +- This module's ability to track logins is dependent on what PAM services + are configured to register sessions with logind. In particular, for + most systems, it will only track TTY logins, local desktop logins, + and logins through remote shell connections. + +- The users chart counts _usernames_ not UID's. This is potentially + important in configurations where multiple users have the same UID. + +- The users chart counts any given user name up to once for _each_ type + of login. So if the same user has a graphical and a console login on a + system, they will show up once in the graphical count, and once in the + console count. + +- Because the data collection process is rather expensive, this plugin + is currently disabled by default, and needs to be explicitly enabled in + `/etc/netdata/python.d.conf` before it will run. + +--- + + diff --git a/collectors/python.d.plugin/logind/logind.chart.py b/collectors/python.d.plugin/logind/logind.chart.py new file mode 100644 index 0000000..7086686 --- /dev/null +++ b/collectors/python.d.plugin/logind/logind.chart.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# Description: logind netdata python.d module +# Author: Austin S. Hemmelgarn (Ferroin) +# SPDX-License-Identifier: GPL-3.0-or-later + +from bases.FrameworkServices.ExecutableService import ExecutableService + +priority = 59999 +disabled_by_default = True + +LOGINCTL_COMMAND = 'loginctl list-sessions --no-legend' + +ORDER = [ + 'sessions', + 'users', + 'seats', +] + +CHARTS = { + 'sessions': { + 'options': [None, 'Logind Sessions', 'sessions', 'sessions', 'logind.sessions', 'stacked'], + 'lines': [ + ['sessions_graphical', 'Graphical', 'absolute', 1, 1], + ['sessions_console', 'Console', 'absolute', 1, 1], + ['sessions_remote', 'Remote', 'absolute', 1, 1] + ] + }, + 'users': { + 'options': [None, 'Logind Users', 'users', 'users', 'logind.users', 'stacked'], + 'lines': [ + ['users_graphical', 'Graphical', 'absolute', 1, 1], + ['users_console', 'Console', 'absolute', 1, 1], + ['users_remote', 'Remote', 'absolute', 1, 1] + ] + }, + 'seats': { + 'options': [None, 'Logind Seats', 'seats', 'seats', 'logind.seats', 'line'], + 'lines': [ + ['seats', 'Active Seats', 'absolute', 1, 1] + ] + } +} + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.command = LOGINCTL_COMMAND + + def _get_data(self): + ret = { + 'sessions_graphical': 0, + 'sessions_console': 0, + 'sessions_remote': 0, + } + users = { + 'graphical': list(), + 'console': list(), + 'remote': list() + } + seats = list() + data = self._get_raw_data() + + for item in data: + fields = item.split() + if len(fields) == 3: + users['remote'].append(fields[2]) + ret['sessions_remote'] += 1 + elif len(fields) == 4: + users['graphical'].append(fields[2]) + ret['sessions_graphical'] += 1 + seats.append(fields[3]) + elif len(fields) == 5: + users['console'].append(fields[2]) + ret['sessions_console'] += 1 + seats.append(fields[3]) + + ret['users_graphical'] = len(set(users['graphical'])) + ret['users_console'] = len(set(users['console'])) + ret['users_remote'] = len(set(users['remote'])) + ret['seats'] = len(set(seats)) + + return ret diff --git a/collectors/python.d.plugin/logind/logind.conf b/collectors/python.d.plugin/logind/logind.conf new file mode 100644 index 0000000..01a859d --- /dev/null +++ b/collectors/python.d.plugin/logind/logind.conf @@ -0,0 +1,60 @@ +# netdata python.d.plugin configuration for logind +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds diff --git a/collectors/python.d.plugin/megacli/Makefile.inc b/collectors/python.d.plugin/megacli/Makefile.inc new file mode 100644 index 0000000..83680d7 --- /dev/null +++ b/collectors/python.d.plugin/megacli/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += megacli/megacli.chart.py +dist_pythonconfig_DATA += megacli/megacli.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += megacli/README.md megacli/Makefile.inc + diff --git a/collectors/python.d.plugin/megacli/README.md b/collectors/python.d.plugin/megacli/README.md new file mode 100644 index 0000000..3c99c3d --- /dev/null +++ b/collectors/python.d.plugin/megacli/README.md @@ -0,0 +1,86 @@ + + +# MegaRAID controller monitoring with Netdata + +Collects adapter, physical drives and battery stats using `megacli` command-line tool. + +Executed commands: + +- `sudo -n megacli -LDPDInfo -aAll` +- `sudo -n megacli -AdpBbuCmd -a0` + +## Requirements + +The module uses `megacli`, which can only be executed by `root`. It uses +`sudo` and assumes that it is configured such that the `netdata` user can execute `megacli` as root without a password. + +- Add to your `/etc/sudoers` file: + +`which megacli` shows the full path to the binary. + +```bash +netdata ALL=(root) NOPASSWD: /path/to/megacli +``` + +- Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + +The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `megacli` using `sudo`. + + +As the `root` user, do the following: + +```cmd +mkdir /etc/systemd/system/netdata.service.d +echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf +systemctl daemon-reload +systemctl restart netdata.service +``` + +## Charts + +- Adapter State +- Physical Drives Media Errors +- Physical Drives Predictive Failures +- Battery Relative State of Charge +- Battery Cycle Count + +## Enable the collector + +The `megacli` collector is disabled by default. To enable it, use `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` +file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `megacli` setting to `yes`. Save the file and restart the Netdata Agent +with `sudo systemctl restart netdata`, or the appropriate method for your system. + +## Configuration + +Edit the `python.d/megacli.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/megacli.conf +``` + +Battery stats disabled by default. To enable them, modify `megacli.conf`. + +```yaml +do_battery: yes +``` + +Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate +method](/docs/configure/start-stop-restart.md) for your system. + + diff --git a/collectors/python.d.plugin/megacli/megacli.chart.py b/collectors/python.d.plugin/megacli/megacli.chart.py new file mode 100644 index 0000000..ef35ff6 --- /dev/null +++ b/collectors/python.d.plugin/megacli/megacli.chart.py @@ -0,0 +1,278 @@ +# -*- coding: utf-8 -*- +# Description: megacli netdata python.d module +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +import re + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +disabled_by_default = True + +update_every = 5 + + +def adapter_charts(ads): + order = [ + 'adapter_degraded', + ] + + def dims(ad): + return [['adapter_{0}_degraded'.format(a.id), 'adapter {0}'.format(a.id)] for a in ad] + + charts = { + 'adapter_degraded': { + 'options': [None, 'Adapter State', 'is degraded', 'adapter', 'megacli.adapter_degraded', 'line'], + 'lines': dims(ads) + }, + } + + return order, charts + + +def pd_charts(pds): + order = [ + 'pd_media_error', + 'pd_predictive_failure', + ] + + def dims(k, pd): + return [['slot_{0}_{1}'.format(p.id, k), 'slot {0}'.format(p.id), 'incremental'] for p in pd] + + charts = { + 'pd_media_error': { + 'options': [None, 'Physical Drives Media Errors', 'errors/s', 'pd', 'megacli.pd_media_error', 'line'], + 'lines': dims('media_error', pds) + }, + 'pd_predictive_failure': { + 'options': [None, 'Physical Drives Predictive Failures', 'failures/s', 'pd', + 'megacli.pd_predictive_failure', 'line'], + 'lines': dims('predictive_failure', pds) + } + } + + return order, charts + + +def battery_charts(bats): + order = list() + charts = dict() + + for b in bats: + order.append('bbu_{0}_relative_charge'.format(b.id)) + charts.update( + { + 'bbu_{0}_relative_charge'.format(b.id): { + 'options': [None, 'Relative State of Charge', 'percentage', 'battery', + 'megacli.bbu_relative_charge', 'line'], + 'lines': [ + ['bbu_{0}_relative_charge'.format(b.id), 'adapter {0}'.format(b.id)], + ] + } + } + ) + + for b in bats: + order.append('bbu_{0}_cycle_count'.format(b.id)) + charts.update( + { + 'bbu_{0}_cycle_count'.format(b.id): { + 'options': [None, 'Cycle Count', 'cycle count', 'battery', 'megacli.bbu_cycle_count', 'line'], + 'lines': [ + ['bbu_{0}_cycle_count'.format(b.id), 'adapter {0}'.format(b.id)], + ] + } + } + ) + + return order, charts + + +RE_ADAPTER = re.compile( + r'Adapter #([0-9]+) State(?:\s+)?: ([a-zA-Z]+)' +) + +RE_VD = re.compile( + r'Slot Number: ([0-9]+) Media Error Count: ([0-9]+) Predictive Failure Count: ([0-9]+)' +) + +RE_BATTERY = re.compile( + r'BBU Capacity Info for Adapter: ([0-9]+) Relative State of Charge: ([0-9]+) % Cycle Count: ([0-9]+)' +) + + +def find_adapters(d): + keys = ('Adapter #', 'State') + d = ' '.join(v.strip() for v in d if v.startswith(keys)) + return [Adapter(*v) for v in RE_ADAPTER.findall(d)] + + +def find_pds(d): + keys = ('Slot Number', 'Media Error Count', 'Predictive Failure Count') + d = ' '.join(v.strip() for v in d if v.startswith(keys)) + return [PD(*v) for v in RE_VD.findall(d)] + + +def find_batteries(d): + keys = ('BBU Capacity Info for Adapter', 'Relative State of Charge', 'Cycle Count') + d = ' '.join(v.strip() for v in d if v.strip().startswith(keys)) + return [Battery(*v) for v in RE_BATTERY.findall(d)] + + +class Adapter: + def __init__(self, n, state): + self.id = n + self.state = int(state == 'Degraded') + + def data(self): + return { + 'adapter_{0}_degraded'.format(self.id): self.state, + } + + +class PD: + def __init__(self, n, media_err, predict_fail): + self.id = n + self.media_err = media_err + self.predict_fail = predict_fail + + def data(self): + return { + 'slot_{0}_media_error'.format(self.id): self.media_err, + 'slot_{0}_predictive_failure'.format(self.id): self.predict_fail, + } + + +class Battery: + def __init__(self, adapt_id, rel_charge, cycle_count): + self.id = adapt_id + self.rel_charge = rel_charge + self.cycle_count = cycle_count + + def data(self): + return { + 'bbu_{0}_relative_charge'.format(self.id): self.rel_charge, + 'bbu_{0}_cycle_count'.format(self.id): self.cycle_count, + } + + +# TODO: hardcoded sudo... +class Megacli: + def __init__(self): + self.s = find_binary('sudo') + self.m = find_binary('megacli') or find_binary('MegaCli') # Binary on FreeBSD is MegaCli + self.sudo_check = [self.s, '-n', '-l'] + self.disk_info = [self.s, '-n', self.m, '-LDPDInfo', '-aAll', '-NoLog'] + self.battery_info = [self.s, '-n', self.m, '-AdpBbuCmd', '-a0', '-NoLog'] + + def __bool__(self): + return bool(self.s and self.m) + + def __nonzero__(self): + return self.__bool__() + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = list() + self.definitions = dict() + self.do_battery = self.configuration.get('do_battery') + self.megacli = Megacli() + + def check_sudo(self): + err = self._get_raw_data(command=self.megacli.sudo_check, stderr=True) + if err: + self.error(''.join(err)) + return False + return True + + def check_disk_info(self): + d = self._get_raw_data(command=self.megacli.disk_info) + if not d: + return False + + ads = find_adapters(d) + pds = find_pds(d) + + if not (ads and pds): + self.error('failed to parse "{0}" output'.format(' '.join(self.megacli.disk_info))) + return False + + o, c = adapter_charts(ads) + self.order.extend(o) + self.definitions.update(c) + + o, c = pd_charts(pds) + self.order.extend(o) + self.definitions.update(c) + + return True + + def check_battery(self): + d = self._get_raw_data(command=self.megacli.battery_info) + if not d: + return False + + bats = find_batteries(d) + + if not bats: + self.error('failed to parse "{0}" output'.format(' '.join(self.megacli.battery_info))) + return False + + o, c = battery_charts(bats) + self.order.extend(o) + self.definitions.update(c) + return True + + def check(self): + if not self.megacli: + self.error('can\'t locate "sudo" or "megacli" binary') + return None + + if not (self.check_sudo() and self.check_disk_info()): + return False + + if self.do_battery: + self.do_battery = self.check_battery() + + return True + + def get_data(self): + data = dict() + + data.update(self.get_adapter_pd_data()) + + if self.do_battery: + data.update(self.get_battery_data()) + + return data or None + + def get_adapter_pd_data(self): + raw = self._get_raw_data(command=self.megacli.disk_info) + data = dict() + + if not raw: + return data + + for a in find_adapters(raw): + data.update(a.data()) + + for p in find_pds(raw): + data.update(p.data()) + + return data + + def get_battery_data(self): + raw = self._get_raw_data(command=self.megacli.battery_info) + data = dict() + + if not raw: + return data + + for b in find_batteries(raw): + data.update(b.data()) + + return data diff --git a/collectors/python.d.plugin/megacli/megacli.conf b/collectors/python.d.plugin/megacli/megacli.conf new file mode 100644 index 0000000..1af4292 --- /dev/null +++ b/collectors/python.d.plugin/megacli/megacli.conf @@ -0,0 +1,60 @@ +# netdata python.d.plugin configuration for megacli +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, megacli also supports the following: +# +# do_battery: yes/no # default is no. Battery stats (adds additional call to megacli `megacli -AdpBbuCmd -a0`). +# +# ---------------------------------------------------------------------- +# uncomment the line below to collect battery statistics +# do_battery: yes diff --git a/collectors/python.d.plugin/memcached/Makefile.inc b/collectors/python.d.plugin/memcached/Makefile.inc new file mode 100644 index 0000000..e603571 --- /dev/null +++ b/collectors/python.d.plugin/memcached/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += memcached/memcached.chart.py +dist_pythonconfig_DATA += memcached/memcached.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += memcached/README.md memcached/Makefile.inc + diff --git a/collectors/python.d.plugin/memcached/README.md b/collectors/python.d.plugin/memcached/README.md new file mode 100644 index 0000000..19139ee --- /dev/null +++ b/collectors/python.d.plugin/memcached/README.md @@ -0,0 +1,99 @@ + + +# Memcached monitoring with Netdata + +Collects memory-caching system performance metrics. It reads server response to stats command ([stats interface](https://github.com/memcached/memcached/wiki/Commands#stats)). + + +1. **Network** in kilobytes/s + + - read + - written + +2. **Connections** per second + + - current + - rejected + - total + +3. **Items** in cluster + + - current + - total + +4. **Evicted and Reclaimed** items + + - evicted + - reclaimed + +5. **GET** requests/s + + - hits + - misses + +6. **GET rate** rate in requests/s + + - rate + +7. **SET rate** rate in requests/s + + - rate + +8. **DELETE** requests/s + + - hits + - misses + +9. **CAS** requests/s + + - hits + - misses + - bad value + +10. **Increment** requests/s + + - hits + - misses + +11. **Decrement** requests/s + + - hits + - misses + +12. **Touch** requests/s + + - hits + - misses + +13. **Touch rate** rate in requests/s + + - rate + +## Configuration + +Edit the `python.d/memcached.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/memcached.conf +``` + +Sample: + +```yaml +localtcpip: + name : 'local' + host : '127.0.0.1' + port : 24242 +``` + +If no configuration is given, module will attempt to connect to memcached instance on `127.0.0.1:11211` address. + +--- + + diff --git a/collectors/python.d.plugin/memcached/memcached.chart.py b/collectors/python.d.plugin/memcached/memcached.chart.py new file mode 100644 index 0000000..bb656a2 --- /dev/null +++ b/collectors/python.d.plugin/memcached/memcached.chart.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# Description: memcached netdata python.d module +# Author: Pawel Krupa (paulfantom) +# SPDX-License-Identifier: GPL-3.0-or-later + +from bases.FrameworkServices.SocketService import SocketService + +ORDER = [ + 'cache', + 'net', + 'connections', + 'items', + 'evicted_reclaimed', + 'get', + 'get_rate', + 'set_rate', + 'cas', + 'delete', + 'increment', + 'decrement', + 'touch', + 'touch_rate', +] + +CHARTS = { + 'cache': { + 'options': [None, 'Cache Size', 'MiB', 'cache', 'memcached.cache', 'stacked'], + 'lines': [ + ['avail', 'available', 'absolute', 1, 1 << 20], + ['used', 'used', 'absolute', 1, 1 << 20] + ] + }, + 'net': { + 'options': [None, 'Network', 'kilobits/s', 'network', 'memcached.net', 'area'], + 'lines': [ + ['bytes_read', 'in', 'incremental', 8, 1000], + ['bytes_written', 'out', 'incremental', -8, 1000], + ] + }, + 'connections': { + 'options': [None, 'Connections', 'connections/s', 'connections', 'memcached.connections', 'line'], + 'lines': [ + ['curr_connections', 'current', 'incremental'], + ['rejected_connections', 'rejected', 'incremental'], + ['total_connections', 'total', 'incremental'] + ] + }, + 'items': { + 'options': [None, 'Items', 'items', 'items', 'memcached.items', 'line'], + 'lines': [ + ['curr_items', 'current', 'absolute'], + ['total_items', 'total', 'absolute'] + ] + }, + 'evicted_reclaimed': { + 'options': [None, 'Items', 'items', 'items', 'memcached.evicted_reclaimed', 'line'], + 'lines': [ + ['reclaimed', 'reclaimed', 'absolute'], + ['evictions', 'evicted', 'absolute'] + ] + }, + 'get': { + 'options': [None, 'Requests', 'requests', 'get ops', 'memcached.get', 'stacked'], + 'lines': [ + ['get_hits', 'hits', 'percent-of-absolute-row'], + ['get_misses', 'misses', 'percent-of-absolute-row'] + ] + }, + 'get_rate': { + 'options': [None, 'Rate', 'requests/s', 'get ops', 'memcached.get_rate', 'line'], + 'lines': [ + ['cmd_get', 'rate', 'incremental'] + ] + }, + 'set_rate': { + 'options': [None, 'Rate', 'requests/s', 'set ops', 'memcached.set_rate', 'line'], + 'lines': [ + ['cmd_set', 'rate', 'incremental'] + ] + }, + 'delete': { + 'options': [None, 'Requests', 'requests', 'delete ops', 'memcached.delete', 'stacked'], + 'lines': [ + ['delete_hits', 'hits', 'percent-of-absolute-row'], + ['delete_misses', 'misses', 'percent-of-absolute-row'], + ] + }, + 'cas': { + 'options': [None, 'Requests', 'requests', 'check and set ops', 'memcached.cas', 'stacked'], + 'lines': [ + ['cas_hits', 'hits', 'percent-of-absolute-row'], + ['cas_misses', 'misses', 'percent-of-absolute-row'], + ['cas_badval', 'bad value', 'percent-of-absolute-row'] + ] + }, + 'increment': { + 'options': [None, 'Requests', 'requests', 'increment ops', 'memcached.increment', 'stacked'], + 'lines': [ + ['incr_hits', 'hits', 'percent-of-absolute-row'], + ['incr_misses', 'misses', 'percent-of-absolute-row'] + ] + }, + 'decrement': { + 'options': [None, 'Requests', 'requests', 'decrement ops', 'memcached.decrement', 'stacked'], + 'lines': [ + ['decr_hits', 'hits', 'percent-of-absolute-row'], + ['decr_misses', 'misses', 'percent-of-absolute-row'] + ] + }, + 'touch': { + 'options': [None, 'Requests', 'requests', 'touch ops', 'memcached.touch', 'stacked'], + 'lines': [ + ['touch_hits', 'hits', 'percent-of-absolute-row'], + ['touch_misses', 'misses', 'percent-of-absolute-row'] + ] + }, + 'touch_rate': { + 'options': [None, 'Rate', 'requests/s', 'touch ops', 'memcached.touch_rate', 'line'], + 'lines': [ + ['cmd_touch', 'rate', 'incremental'] + ] + } +} + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + SocketService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.request = 'stats\r\n' + self.host = 'localhost' + self.port = 11211 + self._keep_alive = True + self.unix_socket = None + + def _get_data(self): + """ + Get data from socket + :return: dict + """ + response = self._get_raw_data() + if response is None: + # error has already been logged + return None + + if response.startswith('ERROR'): + self.error('received ERROR') + return None + + try: + parsed = response.split('\n') + except AttributeError: + self.error('response is invalid/empty') + return None + + # split the response + data = {} + for line in parsed: + if line.startswith('STAT'): + try: + t = line[5:].split(' ') + data[t[0]] = t[1] + except (IndexError, ValueError): + self.debug('invalid line received: ' + str(line)) + + if not data: + self.error("received data doesn't have any records") + return None + + # custom calculations + try: + data['avail'] = int(data['limit_maxbytes']) - int(data['bytes']) + data['used'] = int(data['bytes']) + except (KeyError, ValueError, TypeError): + pass + + return data + + def _check_raw_data(self, data): + if data.endswith('END\r\n'): + self.debug('received full response from memcached') + return True + + self.debug('waiting more data from memcached') + return False + + def check(self): + """ + Parse configuration, check if memcached is available + :return: boolean + """ + self._parse_config() + data = self._get_data() + if data is None: + return False + return True diff --git a/collectors/python.d.plugin/memcached/memcached.conf b/collectors/python.d.plugin/memcached/memcached.conf new file mode 100644 index 0000000..3286b46 --- /dev/null +++ b/collectors/python.d.plugin/memcached/memcached.conf @@ -0,0 +1,90 @@ +# netdata python.d.plugin configuration for memcached +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, memcached also supports the following: +# +# socket: 'path/to/memcached.sock' +# +# or +# host: 'IP or HOSTNAME' # the host to connect to +# port: PORT # the port to connect to +# +# + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + host : 'localhost' + port : 11211 + +localipv4: + name : 'local' + host : '127.0.0.1' + port : 11211 + +localipv6: + name : 'local' + host : '::1' + port : 11211 + diff --git a/collectors/python.d.plugin/mongodb/Makefile.inc b/collectors/python.d.plugin/mongodb/Makefile.inc new file mode 100644 index 0000000..784945a --- /dev/null +++ b/collectors/python.d.plugin/mongodb/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += mongodb/mongodb.chart.py +dist_pythonconfig_DATA += mongodb/mongodb.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += mongodb/README.md mongodb/Makefile.inc + diff --git a/collectors/python.d.plugin/mongodb/README.md b/collectors/python.d.plugin/mongodb/README.md new file mode 100644 index 0000000..b6dd9c5 --- /dev/null +++ b/collectors/python.d.plugin/mongodb/README.md @@ -0,0 +1,210 @@ + + +# MongoDB monitoring with Netdata + +Monitors performance and health metrics of MongoDB. + +## Requirements + +- `python-pymongo` package v2.4+. + +You need to install it manually. + +Number of charts depends on mongodb version, storage engine and other features (replication): + +1. **Read requests**: + + - query + - getmore (operation the cursor executes to get additional data from query) + +2. **Write requests**: + + - insert + - delete + - update + +3. **Active clients**: + + - readers (number of clients with read operations in progress or queued) + - writers (number of clients with write operations in progress or queued) + +4. **Journal transactions**: + + - commits (count of transactions that have been written to the journal) + +5. **Data written to the journal**: + + - volume (volume of data) + +6. **Background flush** (MMAPv1): + + - average ms (average time taken by flushes to execute) + - last ms (time taken by the last flush) + +7. **Read tickets** (WiredTiger): + + - in use (number of read tickets in use) + - available (number of available read tickets remaining) + +8. **Write tickets** (WiredTiger): + + - in use (number of write tickets in use) + - available (number of available write tickets remaining) + +9. **Cursors**: + +- opened (number of cursors currently opened by MongoDB for clients) +- timedOut (number of cursors that have timed) +- noTimeout (number of open cursors with timeout disabled) + +10. **Connections**: + + - connected (number of clients currently connected to the database server) + - unused (number of unused connections available for new clients) + +11. **Memory usage metrics**: + + - virtual + - resident (amount of memory used by the database process) + - mapped + - non mapped + +12. **Page faults**: + + - page faults (number of times MongoDB had to request from disk) + +13. **Cache metrics** (WiredTiger): + + - percentage of bytes currently in the cache (amount of space taken by cached data) + - percentage of tracked dirty bytes in the cache (amount of space taken by dirty data) + +14. **Pages evicted from cache** (WiredTiger): + + - modified + - unmodified + +15. **Queued requests**: + + - readers (number of read request currently queued) + - writers (number of write request currently queued) + +16. **Errors**: + + - msg (number of message assertions raised) + - warning (number of warning assertions raised) + - regular (number of regular assertions raised) + - user (number of assertions corresponding to errors generated by users) + +17. **Storage metrics** (one chart for every database) + + - dataSize (size of all documents + padding in the database) + - indexSize (size of all indexes in the database) + - storageSize (size of all extents in the database) + +18. **Documents in the database** (one chart for all databases) + +- documents (number of objects in the database among all the collections) + +19. **tcmalloc metrics** + + - central cache free + - current total thread cache + - pageheap free + - pageheap unmapped + - thread cache free + - transfer cache free + - heap size + +20. **Commands total/failed rate** + + - count + - createIndex + - delete + - eval + - findAndModify + - insert + +21. **Locks metrics** (acquireCount metrics - number of times the lock was acquired in the specified mode) + + - Global lock + - Database lock + - Collection lock + - Metadata lock + - oplog lock + +22. **Replica set members state** + + - state + +23. **Oplog window** + + - window (interval of time between the oldest and the latest entries in the oplog) + +24. **Replication lag** + + - member (time when last entry from the oplog was applied for every member) + +25. **Replication set member heartbeat latency** + + - member (time when last heartbeat was received from replica set member) + +## Prerequisite + +Create a read-only user for Netdata in the admin database. + +1. Authenticate as the admin user. + +``` +use admin +db.auth("admin", "") +``` + +2. Create a user. + +``` +# MongoDB 2.x. +db.addUser("netdata", "", true) + +# MongoDB 3.x or higher. +db.createUser({ + "user":"netdata", + "pwd": "", + "roles" : [ + {role: 'read', db: 'admin' }, + {role: 'clusterMonitor', db: 'admin'}, + {role: 'read', db: 'local' } + ] +}) +``` + +## Configuration + +Edit the `python.d/mongodb.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/mongodb.conf +``` + +Sample: + +```yaml +local: + name : 'local' + authdb: 'admin' + host : '127.0.0.1' + port : 27017 + user : 'netdata' + pass : 'netdata' +``` + +If no configuration is given, module will attempt to connect to mongodb daemon on `127.0.0.1:27017` address + +--- + + diff --git a/collectors/python.d.plugin/mongodb/mongodb.chart.py b/collectors/python.d.plugin/mongodb/mongodb.chart.py new file mode 100644 index 0000000..5e8fec8 --- /dev/null +++ b/collectors/python.d.plugin/mongodb/mongodb.chart.py @@ -0,0 +1,786 @@ +# -*- coding: utf-8 -*- +# Description: mongodb netdata python.d module +# Author: ilyam8 +# SPDX-License-Identifier: GPL-3.0-or-later + +import ssl + +from copy import deepcopy +from datetime import datetime +from sys import exc_info + +try: + from pymongo import MongoClient, ASCENDING, DESCENDING, version_tuple + from pymongo.errors import PyMongoError + + PYMONGO = True +except ImportError: + PYMONGO = False + +from bases.FrameworkServices.SimpleService import SimpleService + +REPL_SET_STATES = [ + ('1', 'primary'), + ('8', 'down'), + ('2', 'secondary'), + ('3', 'recovering'), + ('5', 'startup2'), + ('4', 'fatal'), + ('7', 'arbiter'), + ('6', 'unknown'), + ('9', 'rollback'), + ('10', 'removed'), + ('0', 'startup') +] + + +def multiply_by_100(value): + return value * 100 + + +DEFAULT_METRICS = [ + ('opcounters.delete', None, None), + ('opcounters.update', None, None), + ('opcounters.insert', None, None), + ('opcounters.query', None, None), + ('opcounters.getmore', None, None), + ('globalLock.activeClients.readers', 'activeClients_readers', None), + ('globalLock.activeClients.writers', 'activeClients_writers', None), + ('connections.available', 'connections_available', None), + ('connections.current', 'connections_current', None), + ('mem.mapped', None, None), + ('mem.resident', None, None), + ('mem.virtual', None, None), + ('globalLock.currentQueue.readers', 'currentQueue_readers', None), + ('globalLock.currentQueue.writers', 'currentQueue_writers', None), + ('asserts.msg', None, None), + ('asserts.regular', None, None), + ('asserts.user', None, None), + ('asserts.warning', None, None), + ('extra_info.page_faults', None, None), + ('metrics.record.moves', None, None), + ('backgroundFlushing.average_ms', None, multiply_by_100), + ('backgroundFlushing.last_ms', None, multiply_by_100), + ('backgroundFlushing.flushes', None, multiply_by_100), + ('metrics.cursor.timedOut', None, None), + ('metrics.cursor.open.total', 'cursor_total', None), + ('metrics.cursor.open.noTimeout', None, None), + ('cursors.timedOut', None, None), + ('cursors.totalOpen', 'cursor_total', None) +] + +DUR = [ + ('dur.commits', None, None), + ('dur.journaledMB', None, multiply_by_100) +] + +WIREDTIGER = [ + ('wiredTiger.concurrentTransactions.read.available', 'wiredTigerRead_available', None), + ('wiredTiger.concurrentTransactions.read.out', 'wiredTigerRead_out', None), + ('wiredTiger.concurrentTransactions.write.available', 'wiredTigerWrite_available', None), + ('wiredTiger.concurrentTransactions.write.out', 'wiredTigerWrite_out', None), + ('wiredTiger.cache.bytes currently in the cache', None, None), + ('wiredTiger.cache.tracked dirty bytes in the cache', None, None), + ('wiredTiger.cache.maximum bytes configured', None, None), + ('wiredTiger.cache.unmodified pages evicted', 'unmodified', None), + ('wiredTiger.cache.modified pages evicted', 'modified', None) +] + +TCMALLOC = [ + ('tcmalloc.generic.current_allocated_bytes', None, None), + ('tcmalloc.generic.heap_size', None, None), + ('tcmalloc.tcmalloc.central_cache_free_bytes', None, None), + ('tcmalloc.tcmalloc.current_total_thread_cache_bytes', None, None), + ('tcmalloc.tcmalloc.pageheap_free_bytes', None, None), + ('tcmalloc.tcmalloc.pageheap_unmapped_bytes', None, None), + ('tcmalloc.tcmalloc.thread_cache_free_bytes', None, None), + ('tcmalloc.tcmalloc.transfer_cache_free_bytes', None, None) +] + +COMMANDS = [ + ('metrics.commands.count.total', 'count_total', None), + ('metrics.commands.createIndexes.total', 'createIndexes_total', None), + ('metrics.commands.delete.total', 'delete_total', None), + ('metrics.commands.eval.total', 'eval_total', None), + ('metrics.commands.findAndModify.total', 'findAndModify_total', None), + ('metrics.commands.insert.total', 'insert_total', None), + ('metrics.commands.delete.total', 'delete_total', None), + ('metrics.commands.count.failed', 'count_failed', None), + ('metrics.commands.createIndexes.failed', 'createIndexes_failed', None), + ('metrics.commands.delete.failed', 'delete_failed', None), + ('metrics.commands.eval.failed', 'eval_failed', None), + ('metrics.commands.findAndModify.failed', 'findAndModify_failed', None), + ('metrics.commands.insert.failed', 'insert_failed', None), + ('metrics.commands.delete.failed', 'delete_failed', None) +] + +LOCKS = [ + ('locks.Collection.acquireCount.R', 'Collection_R', None), + ('locks.Collection.acquireCount.r', 'Collection_r', None), + ('locks.Collection.acquireCount.W', 'Collection_W', None), + ('locks.Collection.acquireCount.w', 'Collection_w', None), + ('locks.Database.acquireCount.R', 'Database_R', None), + ('locks.Database.acquireCount.r', 'Database_r', None), + ('locks.Database.acquireCount.W', 'Database_W', None), + ('locks.Database.acquireCount.w', 'Database_w', None), + ('locks.Global.acquireCount.R', 'Global_R', None), + ('locks.Global.acquireCount.r', 'Global_r', None), + ('locks.Global.acquireCount.W', 'Global_W', None), + ('locks.Global.acquireCount.w', 'Global_w', None), + ('locks.Metadata.acquireCount.R', 'Metadata_R', None), + ('locks.Metadata.acquireCount.w', 'Metadata_w', None), + ('locks.oplog.acquireCount.r', 'oplog_r', None), + ('locks.oplog.acquireCount.w', 'oplog_w', None) +] + +DBSTATS = [ + 'dataSize', + 'indexSize', + 'storageSize', + 'objects' +] + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + 'read_operations', + 'write_operations', + 'active_clients', + 'journaling_transactions', + 'journaling_volume', + 'background_flush_average', + 'background_flush_last', + 'background_flush_rate', + 'wiredtiger_read', + 'wiredtiger_write', + 'cursors', + 'connections', + 'memory', + 'page_faults', + 'queued_requests', + 'record_moves', + 'wiredtiger_cache', + 'wiredtiger_pages_evicted', + 'asserts', + 'locks_collection', + 'locks_database', + 'locks_global', + 'locks_metadata', + 'locks_oplog', + 'dbstats_objects', + 'tcmalloc_generic', + 'tcmalloc_metrics', + 'command_total_rate', + 'command_failed_rate' +] + +CHARTS = { + 'read_operations': { + 'options': [None, 'Received read requests', 'requests/s', 'throughput metrics', + 'mongodb.read_operations', 'line'], + 'lines': [ + ['query', None, 'incremental'], + ['getmore', None, 'incremental'] + ] + }, + 'write_operations': { + 'options': [None, 'Received write requests', 'requests/s', 'throughput metrics', + 'mongodb.write_operations', 'line'], + 'lines': [ + ['insert', None, 'incremental'], + ['update', None, 'incremental'], + ['delete', None, 'incremental'] + ] + }, + 'active_clients': { + 'options': [None, 'Clients with read or write operations in progress or queued', 'clients', + 'throughput metrics', 'mongodb.active_clients', 'line'], + 'lines': [ + ['activeClients_readers', 'readers', 'absolute'], + ['activeClients_writers', 'writers', 'absolute'] + ] + }, + 'journaling_transactions': { + 'options': [None, 'Transactions that have been written to the journal', 'commits', + 'database performance', 'mongodb.journaling_transactions', 'line'], + 'lines': [ + ['commits', None, 'absolute'] + ] + }, + 'journaling_volume': { + 'options': [None, 'Volume of data written to the journal', 'MiB', 'database performance', + 'mongodb.journaling_volume', 'line'], + 'lines': [ + ['journaledMB', 'volume', 'absolute', 1, 100] + ] + }, + 'background_flush_average': { + 'options': [None, 'Average time taken by flushes to execute', 'milliseconds', 'database performance', + 'mongodb.background_flush_average', 'line'], + 'lines': [ + ['average_ms', 'time', 'absolute', 1, 100] + ] + }, + 'background_flush_last': { + 'options': [None, 'Time taken by the last flush operation to execute', 'milliseconds', 'database performance', + 'mongodb.background_flush_last', 'line'], + 'lines': [ + ['last_ms', 'time', 'absolute', 1, 100] + ] + }, + 'background_flush_rate': { + 'options': [None, 'Flushes rate', 'flushes', 'database performance', 'mongodb.background_flush_rate', 'line'], + 'lines': [ + ['flushes', 'flushes', 'incremental', 1, 1] + ] + }, + 'wiredtiger_read': { + 'options': [None, 'Read tickets in use and remaining', 'tickets', 'database performance', + 'mongodb.wiredtiger_read', 'stacked'], + 'lines': [ + ['wiredTigerRead_available', 'available', 'absolute', 1, 1], + ['wiredTigerRead_out', 'inuse', 'absolute', 1, 1] + ] + }, + 'wiredtiger_write': { + 'options': [None, 'Write tickets in use and remaining', 'tickets', 'database performance', + 'mongodb.wiredtiger_write', 'stacked'], + 'lines': [ + ['wiredTigerWrite_available', 'available', 'absolute', 1, 1], + ['wiredTigerWrite_out', 'inuse', 'absolute', 1, 1] + ] + }, + 'cursors': { + 'options': [None, 'Currently opened cursors, cursors with timeout disabled and timed out cursors', + 'cursors', 'database performance', 'mongodb.cursors', 'stacked'], + 'lines': [ + ['cursor_total', 'opened', 'absolute', 1, 1], + ['noTimeout', None, 'absolute', 1, 1], + ['timedOut', None, 'incremental', 1, 1] + ] + }, + 'connections': { + 'options': [None, 'Currently connected clients and unused connections', 'connections', + 'resource utilization', 'mongodb.connections', 'stacked'], + 'lines': [ + ['connections_available', 'unused', 'absolute', 1, 1], + ['connections_current', 'connected', 'absolute', 1, 1] + ] + }, + 'memory': { + 'options': [None, 'Memory metrics', 'MiB', 'resource utilization', 'mongodb.memory', 'stacked'], + 'lines': [ + ['virtual', None, 'absolute', 1, 1], + ['resident', None, 'absolute', 1, 1], + ['nonmapped', None, 'absolute', 1, 1], + ['mapped', None, 'absolute', 1, 1] + ] + }, + 'page_faults': { + 'options': [None, 'Number of times MongoDB had to fetch data from disk', 'request/s', + 'resource utilization', 'mongodb.page_faults', 'line'], + 'lines': [ + ['page_faults', None, 'incremental', 1, 1] + ] + }, + 'queued_requests': { + 'options': [None, 'Currently queued read and write requests', 'requests', 'resource saturation', + 'mongodb.queued_requests', 'line'], + 'lines': [ + ['currentQueue_readers', 'readers', 'absolute', 1, 1], + ['currentQueue_writers', 'writers', 'absolute', 1, 1] + ] + }, + 'record_moves': { + 'options': [None, 'Number of times documents had to be moved on-disk', 'number', + 'resource saturation', 'mongodb.record_moves', 'line'], + 'lines': [ + ['moves', None, 'incremental', 1, 1] + ] + }, + 'asserts': { + 'options': [ + None, + 'Number of message, warning, regular, corresponding to errors generated by users assertions raised', + 'number', 'errors (asserts)', 'mongodb.asserts', 'line'], + 'lines': [ + ['msg', None, 'incremental', 1, 1], + ['warning', None, 'incremental', 1, 1], + ['regular', None, 'incremental', 1, 1], + ['user', None, 'incremental', 1, 1] + ] + }, + 'wiredtiger_cache': { + 'options': [None, 'The percentage of the wiredTiger cache that is in use and cache with dirty bytes', + 'percentage', 'resource utilization', 'mongodb.wiredtiger_cache', 'stacked'], + 'lines': [ + ['wiredTiger_percent_clean', 'inuse', 'absolute', 1, 1000], + ['wiredTiger_percent_dirty', 'dirty', 'absolute', 1, 1000] + ] + }, + 'wiredtiger_pages_evicted': { + 'options': [None, 'Pages evicted from the cache', + 'pages', 'resource utilization', 'mongodb.wiredtiger_pages_evicted', 'stacked'], + 'lines': [ + ['unmodified', None, 'absolute', 1, 1], + ['modified', None, 'absolute', 1, 1] + ] + }, + 'dbstats_objects': { + 'options': [None, 'Number of documents in the database among all the collections', 'documents', + 'storage size metrics', 'mongodb.dbstats_objects', 'stacked'], + 'lines': [] + }, + 'tcmalloc_generic': { + 'options': [None, 'Tcmalloc generic metrics', 'MiB', 'tcmalloc', 'mongodb.tcmalloc_generic', 'stacked'], + 'lines': [ + ['current_allocated_bytes', 'allocated', 'absolute', 1, 1 << 20], + ['heap_size', 'heap_size', 'absolute', 1, 1 << 20] + ] + }, + 'tcmalloc_metrics': { + 'options': [None, 'Tcmalloc metrics', 'KiB', 'tcmalloc', 'mongodb.tcmalloc_metrics', 'stacked'], + 'lines': [ + ['central_cache_free_bytes', 'central_cache_free', 'absolute', 1, 1024], + ['current_total_thread_cache_bytes', 'current_total_thread_cache', 'absolute', 1, 1024], + ['pageheap_free_bytes', 'pageheap_free', 'absolute', 1, 1024], + ['pageheap_unmapped_bytes', 'pageheap_unmapped', 'absolute', 1, 1024], + ['thread_cache_free_bytes', 'thread_cache_free', 'absolute', 1, 1024], + ['transfer_cache_free_bytes', 'transfer_cache_free', 'absolute', 1, 1024] + ] + }, + 'command_total_rate': { + 'options': [None, 'Commands total rate', 'commands/s', 'commands', 'mongodb.command_total_rate', 'stacked'], + 'lines': [ + ['count_total', 'count', 'incremental', 1, 1], + ['createIndexes_total', 'createIndexes', 'incremental', 1, 1], + ['delete_total', 'delete', 'incremental', 1, 1], + ['eval_total', 'eval', 'incremental', 1, 1], + ['findAndModify_total', 'findAndModify', 'incremental', 1, 1], + ['insert_total', 'insert', 'incremental', 1, 1], + ['update_total', 'update', 'incremental', 1, 1] + ] + }, + 'command_failed_rate': { + 'options': [None, 'Commands failed rate', 'commands/s', 'commands', 'mongodb.command_failed_rate', 'stacked'], + 'lines': [ + ['count_failed', 'count', 'incremental', 1, 1], + ['createIndexes_failed', 'createIndexes', 'incremental', 1, 1], + ['delete_failed', 'delete', 'incremental', 1, 1], + ['eval_failed', 'eval', 'incremental', 1, 1], + ['findAndModify_failed', 'findAndModify', 'incremental', 1, 1], + ['insert_failed', 'insert', 'incremental', 1, 1], + ['update_failed', 'update', 'incremental', 1, 1] + ] + }, + 'locks_collection': { + 'options': [None, 'Collection lock. Number of times the lock was acquired in the specified mode', + 'locks', 'locks metrics', 'mongodb.locks_collection', 'stacked'], + 'lines': [ + ['Collection_R', 'shared', 'incremental'], + ['Collection_W', 'exclusive', 'incremental'], + ['Collection_r', 'intent_shared', 'incremental'], + ['Collection_w', 'intent_exclusive', 'incremental'] + ] + }, + 'locks_database': { + 'options': [None, 'Database lock. Number of times the lock was acquired in the specified mode', + 'locks', 'locks metrics', 'mongodb.locks_database', 'stacked'], + 'lines': [ + ['Database_R', 'shared', 'incremental'], + ['Database_W', 'exclusive', 'incremental'], + ['Database_r', 'intent_shared', 'incremental'], + ['Database_w', 'intent_exclusive', 'incremental'] + ] + }, + 'locks_global': { + 'options': [None, 'Global lock. Number of times the lock was acquired in the specified mode', + 'locks', 'locks metrics', 'mongodb.locks_global', 'stacked'], + 'lines': [ + ['Global_R', 'shared', 'incremental'], + ['Global_W', 'exclusive', 'incremental'], + ['Global_r', 'intent_shared', 'incremental'], + ['Global_w', 'intent_exclusive', 'incremental'] + ] + }, + 'locks_metadata': { + 'options': [None, 'Metadata lock. Number of times the lock was acquired in the specified mode', + 'locks', 'locks metrics', 'mongodb.locks_metadata', 'stacked'], + 'lines': [ + ['Metadata_R', 'shared', 'incremental'], + ['Metadata_w', 'intent_exclusive', 'incremental'] + ] + }, + 'locks_oplog': { + 'options': [None, 'Lock on the oplog. Number of times the lock was acquired in the specified mode', + 'locks', 'locks metrics', 'mongodb.locks_oplog', 'stacked'], + 'lines': [ + ['oplog_r', 'intent_shared', 'incremental'], + ['oplog_w', 'intent_exclusive', 'incremental'] + ] + } +} + +DEFAULT_HOST = '127.0.0.1' +DEFAULT_PORT = 27017 +DEFAULT_TIMEOUT = 100 +DEFAULT_AUTHDB = 'admin' + +CONN_PARAM_HOST = 'host' +CONN_PARAM_PORT = 'port' +CONN_PARAM_SERVER_SELECTION_TIMEOUT_MS = 'serverselectiontimeoutms' +CONN_PARAM_SSL_SSL = 'ssl' +CONN_PARAM_SSL_CERT_REQS = 'ssl_cert_reqs' +CONN_PARAM_SSL_CA_CERTS = 'ssl_ca_certs' +CONN_PARAM_SSL_CRL_FILE = 'ssl_crlfile' +CONN_PARAM_SSL_CERT_FILE = 'ssl_certfile' +CONN_PARAM_SSL_KEY_FILE = 'ssl_keyfile' +CONN_PARAM_SSL_PEM_PASSPHRASE = 'ssl_pem_passphrase' + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER[:] + self.definitions = deepcopy(CHARTS) + self.authdb = self.configuration.get('authdb', DEFAULT_AUTHDB) + self.user = self.configuration.get('user') + self.password = self.configuration.get('pass') + self.metrics_to_collect = deepcopy(DEFAULT_METRICS) + self.connection = None + self.do_replica = None + self.databases = list() + + def check(self): + if not PYMONGO: + self.error('Pymongo package v2.4+ is needed to use mongodb.chart.py') + return False + self.connection, server_status, error = self._create_connection() + if error: + self.error(error) + return False + + self.build_metrics_to_collect_(server_status) + + try: + data = self._get_data() + except (LookupError, SyntaxError, AttributeError): + self.error('Type: %s, error: %s' % (str(exc_info()[0]), str(exc_info()[1]))) + return False + if isinstance(data, dict) and data: + self._data_from_check = data + self.create_charts_(server_status) + return True + self.error('_get_data() returned no data or type is not ') + return False + + def build_metrics_to_collect_(self, server_status): + + self.do_replica = 'repl' in server_status + if 'dur' in server_status: + self.metrics_to_collect.extend(DUR) + if 'tcmalloc' in server_status: + self.metrics_to_collect.extend(TCMALLOC) + if 'commands' in server_status['metrics']: + self.metrics_to_collect.extend(COMMANDS) + if 'wiredTiger' in server_status: + self.metrics_to_collect.extend(WIREDTIGER) + has_locks = 'locks' in server_status + if has_locks and 'Collection' in server_status['locks']: + self.metrics_to_collect.extend(LOCKS) + + def create_charts_(self, server_status): + + if 'dur' not in server_status: + self.order.remove('journaling_transactions') + self.order.remove('journaling_volume') + + if 'backgroundFlushing' not in server_status: + self.order.remove('background_flush_average') + self.order.remove('background_flush_last') + self.order.remove('background_flush_rate') + + if 'wiredTiger' not in server_status: + self.order.remove('wiredtiger_write') + self.order.remove('wiredtiger_read') + self.order.remove('wiredtiger_cache') + + if 'tcmalloc' not in server_status: + self.order.remove('tcmalloc_generic') + self.order.remove('tcmalloc_metrics') + + if 'commands' not in server_status['metrics']: + self.order.remove('command_total_rate') + self.order.remove('command_failed_rate') + + has_no_locks = 'locks' not in server_status + if has_no_locks or 'Collection' not in server_status['locks']: + self.order.remove('locks_collection') + self.order.remove('locks_database') + self.order.remove('locks_global') + self.order.remove('locks_metadata') + + if has_no_locks or 'oplog' not in server_status['locks']: + self.order.remove('locks_oplog') + + for dbase in self.databases: + self.order.append('_'.join([dbase, 'dbstats'])) + self.definitions['_'.join([dbase, 'dbstats'])] = { + 'options': [None, '%s: size of all documents, indexes, extents' % dbase, 'KB', + 'storage size metrics', 'mongodb.dbstats', 'line'], + 'lines': [ + ['_'.join([dbase, 'dataSize']), 'documents', 'absolute', 1, 1024], + ['_'.join([dbase, 'indexSize']), 'indexes', 'absolute', 1, 1024], + ['_'.join([dbase, 'storageSize']), 'extents', 'absolute', 1, 1024] + ]} + self.definitions['dbstats_objects']['lines'].append(['_'.join([dbase, 'objects']), dbase, 'absolute']) + + if self.do_replica: + def create_lines(hosts, string): + lines = list() + for host in hosts: + dim_id = '_'.join([host, string]) + lines.append([dim_id, host, 'absolute', 1, 1000]) + return lines + + def create_state_lines(states): + lines = list() + for state, description in states: + dim_id = '_'.join([host, 'state', state]) + lines.append([dim_id, description, 'absolute', 1, 1]) + return lines + + all_hosts = server_status['repl']['hosts'] + server_status['repl'].get('arbiters', list()) + this_host = server_status['repl']['me'] + other_hosts = [host for host in all_hosts if host != this_host] + + if 'local' in self.databases: + self.order.append('oplog_window') + self.definitions['oplog_window'] = { + 'options': [None, 'Interval of time between the oldest and the latest entries in the oplog', + 'seconds', 'replication and oplog', 'mongodb.oplog_window', 'line'], + 'lines': [['timeDiff', 'window', 'absolute', 1, 1000]]} + # Create "heartbeat delay" chart + self.order.append('heartbeat_delay') + self.definitions['heartbeat_delay'] = { + 'options': [ + None, + 'Time when last heartbeat was received from the replica set member (lastHeartbeatRecv)', + 'seconds ago', 'replication and oplog', 'mongodb.replication_heartbeat_delay', 'stacked'], + 'lines': create_lines(other_hosts, 'heartbeat_lag')} + # Create "optimedate delay" chart + self.order.append('optimedate_delay') + self.definitions['optimedate_delay'] = { + 'options': [None, 'Time when last entry from the oplog was applied (optimeDate)', + 'seconds ago', 'replication and oplog', 'mongodb.replication_optimedate_delay', 'stacked'], + 'lines': create_lines(all_hosts, 'optimedate')} + # Create "replica set members state" chart + for host in all_hosts: + chart_name = '_'.join([host, 'state']) + self.order.append(chart_name) + self.definitions[chart_name] = { + 'options': [None, 'Replica set member (%s) current state' % host, 'state', + 'replication and oplog', 'mongodb.replication_state', 'line'], + 'lines': create_state_lines(REPL_SET_STATES)} + + def _get_raw_data(self): + raw_data = dict() + + raw_data.update(self.get_server_status() or dict()) + raw_data.update(self.get_db_stats() or dict()) + raw_data.update(self.get_repl_set_get_status() or dict()) + raw_data.update(self.get_get_replication_info() or dict()) + + return raw_data or None + + def get_server_status(self): + raw_data = dict() + try: + raw_data['serverStatus'] = self.connection.admin.command('serverStatus') + except PyMongoError: + return None + else: + return raw_data + + def get_db_stats(self): + if not self.databases: + return None + + raw_data = dict() + raw_data['dbStats'] = dict() + try: + for dbase in self.databases: + raw_data['dbStats'][dbase] = self.connection[dbase].command('dbStats') + return raw_data + except PyMongoError: + return None + + def get_repl_set_get_status(self): + if not self.do_replica: + return None + + raw_data = dict() + try: + raw_data['replSetGetStatus'] = self.connection.admin.command('replSetGetStatus') + return raw_data + except PyMongoError: + return None + + def get_get_replication_info(self): + if not (self.do_replica and 'local' in self.databases): + return None + + raw_data = dict() + raw_data['getReplicationInfo'] = dict() + try: + raw_data['getReplicationInfo']['ASCENDING'] = self.connection.local.oplog.rs.find().sort( + '$natural', ASCENDING).limit(1)[0] + raw_data['getReplicationInfo']['DESCENDING'] = self.connection.local.oplog.rs.find().sort( + '$natural', DESCENDING).limit(1)[0] + return raw_data + except PyMongoError: + return None + + def _get_data(self): + """ + :return: dict + """ + raw_data = self._get_raw_data() + + if not raw_data: + return None + + data = dict() + serverStatus = raw_data['serverStatus'] + dbStats = raw_data.get('dbStats') + replSetGetStatus = raw_data.get('replSetGetStatus') + getReplicationInfo = raw_data.get('getReplicationInfo') + utc_now = datetime.utcnow() + + # serverStatus + for metric, new_name, func in self.metrics_to_collect: + value = serverStatus + for key in metric.split('.'): + try: + value = value[key] + except KeyError: + break + + if not isinstance(value, dict) and key: + data[new_name or key] = value if not func else func(value) + + if 'mapped' in serverStatus['mem']: + data['nonmapped'] = data['virtual'] - serverStatus['mem'].get('mappedWithJournal', data['mapped']) + + if data.get('maximum bytes configured'): + maximum = data['maximum bytes configured'] + data['wiredTiger_percent_clean'] = int(data['bytes currently in the cache'] * 100 / maximum * 1000) + data['wiredTiger_percent_dirty'] = int(data['tracked dirty bytes in the cache'] * 100 / maximum * 1000) + + # dbStats + if dbStats: + for dbase in dbStats: + for metric in DBSTATS: + key = '_'.join([dbase, metric]) + data[key] = dbStats[dbase][metric] + + # replSetGetStatus + if replSetGetStatus: + other_hosts = list() + members = replSetGetStatus['members'] + unix_epoch = datetime(1970, 1, 1, 0, 0) + + for member in members: + if not member.get('self'): + other_hosts.append(member) + + # Replica set time diff between current time and time when last entry from the oplog was applied + if member.get('optimeDate', unix_epoch) != unix_epoch: + member_optimedate = member['name'] + '_optimedate' + delta = utc_now - member['optimeDate'] + data[member_optimedate] = int(delta_calculation(delta=delta, multiplier=1000)) + + # Replica set members state + member_state = member['name'] + '_state' + for elem in REPL_SET_STATES: + state = elem[0] + data.update({'_'.join([member_state, state]): 0}) + data.update({'_'.join([member_state, str(member['state'])]): member['state']}) + + # Heartbeat lag calculation + for other in other_hosts: + if other['lastHeartbeatRecv'] != unix_epoch: + node = other['name'] + '_heartbeat_lag' + delta = utc_now - other['lastHeartbeatRecv'] + data[node] = int(delta_calculation(delta=delta, multiplier=1000)) + + if getReplicationInfo: + first_event = getReplicationInfo['ASCENDING']['ts'].as_datetime() + last_event = getReplicationInfo['DESCENDING']['ts'].as_datetime() + data['timeDiff'] = int(delta_calculation(delta=last_event - first_event, multiplier=1000)) + + return data + + def build_ssl_connection_params(self): + conf = self.configuration + + def cert_req(v): + if v is None: + return None + if not v: + return ssl.CERT_NONE + return ssl.CERT_REQUIRED + + ssl_params = { + CONN_PARAM_SSL_SSL: conf.get(CONN_PARAM_SSL_SSL), + CONN_PARAM_SSL_CERT_REQS: cert_req(conf.get(CONN_PARAM_SSL_CERT_REQS)), + CONN_PARAM_SSL_CA_CERTS: conf.get(CONN_PARAM_SSL_CA_CERTS), + CONN_PARAM_SSL_CRL_FILE: conf.get(CONN_PARAM_SSL_CRL_FILE), + CONN_PARAM_SSL_CERT_FILE: conf.get(CONN_PARAM_SSL_CERT_FILE), + CONN_PARAM_SSL_KEY_FILE: conf.get(CONN_PARAM_SSL_KEY_FILE), + CONN_PARAM_SSL_PEM_PASSPHRASE: conf.get(CONN_PARAM_SSL_PEM_PASSPHRASE), + } + + ssl_params = dict((k, v) for k, v in ssl_params.items() if v is not None) + + return ssl_params + + def build_connection_params(self): + conf = self.configuration + params = { + CONN_PARAM_HOST: conf.get(CONN_PARAM_HOST, DEFAULT_HOST), + CONN_PARAM_PORT: conf.get(CONN_PARAM_PORT, DEFAULT_PORT), + } + if hasattr(MongoClient, 'server_selection_timeout') or version_tuple[0] >= 4: + params[CONN_PARAM_SERVER_SELECTION_TIMEOUT_MS] = conf.get('timeout', DEFAULT_TIMEOUT) + + params.update(self.build_ssl_connection_params()) + return params + + def _create_connection(self): + params = self.build_connection_params() + self.debug('creating connection, connection params: {0}'.format(sorted(params))) + + try: + connection = MongoClient(**params) + if self.user and self.password: + self.debug('authenticating, user: {0}, password: {1}'.format(self.user, self.password)) + getattr(connection, self.authdb).authenticate(name=self.user, password=self.password) + else: + self.debug('skip authenticating, user and password are not set') + # elif self.user: + # connection.admin.authenticate(name=self.user, mechanism='MONGODB-X509') + server_status = connection.admin.command('serverStatus') + except PyMongoError as error: + return None, None, str(error) + else: + try: + self.databases = connection.database_names() + except PyMongoError as error: + self.info('Can\'t collect databases: %s' % str(error)) + return connection, server_status, None + + +def delta_calculation(delta, multiplier=1): + if hasattr(delta, 'total_seconds'): + return delta.total_seconds() * multiplier + return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6 * multiplier diff --git a/collectors/python.d.plugin/mongodb/mongodb.conf b/collectors/python.d.plugin/mongodb/mongodb.conf new file mode 100644 index 0000000..9f660f5 --- /dev/null +++ b/collectors/python.d.plugin/mongodb/mongodb.conf @@ -0,0 +1,102 @@ +# netdata python.d.plugin configuration for mongodb +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, mongodb also supports the following: +# +# host: 'IP or HOSTNAME' # type the host to connect to +# port: PORT # type the port to connect to +# +# in all cases, the following can also be set: +# +# authdb: 'dbname' # database to authenticate the user against, +# # defaults to "admin". +# user: 'username' # the mongodb username to use +# pass: 'password' # the mongodb password to use +# +# SSL connection parameters (https://api.mongodb.com/python/current/examples/tls.html): +# +# ssl: yes # connect to the server using TLS +# ssl_cert_reqs: yes # require a certificate from the server when TLS is enabled +# ssl_ca_certs: '/path/to/ca.pem' # use a specific set of CA certificates +# ssl_crlfile: '/path/to/crl.pem' # use a certificate revocation lists +# ssl_certfile: '/path/to/client.pem' # use a client certificate +# ssl_keyfile: '/path/to/key.pem' # use a specific client certificate key +# ssl_pem_passphrase: 'passphrase' # use a passphrase to decrypt encrypted private keys +# + +# ---------------------------------------------------------------------- +# to connect to the mongodb on localhost, without a password: +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +local: + name : 'local' + host : '127.0.0.1' + port : 27017 + +# authsample: +# name : 'secure' +# host : 'mongodb.example.com' +# port : 27017 +# authdb : 'admin' +# user : 'monitor' +# pass : 'supersecret' diff --git a/collectors/python.d.plugin/monit/Makefile.inc b/collectors/python.d.plugin/monit/Makefile.inc new file mode 100644 index 0000000..4a3673f --- /dev/null +++ b/collectors/python.d.plugin/monit/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += monit/monit.chart.py +dist_pythonconfig_DATA += monit/monit.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += monit/README.md monit/Makefile.inc + diff --git a/collectors/python.d.plugin/monit/README.md b/collectors/python.d.plugin/monit/README.md new file mode 100644 index 0000000..1396025 --- /dev/null +++ b/collectors/python.d.plugin/monit/README.md @@ -0,0 +1,52 @@ + + +# Monit monitoring with Netdata + +Monit monitoring module. Data is grabbed from stats XML interface (exists for a long time, but not mentioned in official documentation). Mostly this plugin shows statuses of monit targets, i.e. [statuses of specified checks](https://mmonit.com/monit/documentation/monit.html#Service-checks). + +1. **Filesystems** + + - Filesystems + - Directories + - Files + - Pipes + +2. **Applications** + + - Processes (+threads/childs) + - Programs + +3. **Network** + + - Hosts (+latency) + - Network interfaces + +## Configuration + +Edit the `python.d/monit.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/monit.conf +``` + +Sample: + +```yaml +local: + name : 'local' + url : 'http://localhost:2812' + user: : admin + pass: : monit +``` + +If no configuration is given, module will attempt to connect to monit as `http://localhost:2812`. + +--- + + diff --git a/collectors/python.d.plugin/monit/monit.chart.py b/collectors/python.d.plugin/monit/monit.chart.py new file mode 100644 index 0000000..bfc1823 --- /dev/null +++ b/collectors/python.d.plugin/monit/monit.chart.py @@ -0,0 +1,360 @@ +# -*- coding: utf-8 -*- +# Description: monit netdata python.d module +# Author: Evgeniy K. (n0guest) +# SPDX-License-Identifier: GPL-3.0-or-later + +import xml.etree.ElementTree as ET +from collections import namedtuple + +from bases.FrameworkServices.UrlService import UrlService + +MonitType = namedtuple('MonitType', ('index', 'name')) + +# see enum Service_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h) +# typedef enum { +# Service_Filesystem = 0, +# Service_Directory, +# Service_File, +# Service_Process, +# Service_Host, +# Service_System, +# Service_Fifo, +# Service_Program, +# Service_Net, +# Service_Last = Service_Net +# } __attribute__((__packed__)) Service_Type; + +TYPE_FILESYSTEM = MonitType(0, 'filesystem') +TYPE_DIRECTORY = MonitType(1, 'directory') +TYPE_FILE = MonitType(2, 'file') +TYPE_PROCESS = MonitType(3, 'process') +TYPE_HOST = MonitType(4, 'host') +TYPE_SYSTEM = MonitType(5, 'system') +TYPE_FIFO = MonitType(6, 'fifo') +TYPE_PROGRAM = MonitType(7, 'program') +TYPE_NET = MonitType(8, 'net') + +TYPES = ( + TYPE_FILESYSTEM, + TYPE_DIRECTORY, + TYPE_FILE, + TYPE_PROCESS, + TYPE_HOST, + TYPE_SYSTEM, + TYPE_FIFO, + TYPE_PROGRAM, + TYPE_NET, +) + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + 'filesystem', + 'directory', + 'file', + 'process', + 'process_uptime', + 'process_threads', + 'process_children', + 'host', + 'host_latency', + 'system', + 'fifo', + 'program', + 'net' +] + +CHARTS = { + 'filesystem': { + 'options': ['filesystems', 'Filesystems', 'filesystems', 'filesystem', 'monit.filesystems', 'line'], + 'lines': [] + }, + 'directory': { + 'options': ['directories', 'Directories', 'directories', 'filesystem', 'monit.directories', 'line'], + 'lines': [] + }, + 'file': { + 'options': ['files', 'Files', 'files', 'filesystem', 'monit.files', 'line'], + 'lines': [] + }, + 'fifo': { + 'options': ['fifos', 'Pipes (fifo)', 'pipes', 'filesystem', 'monit.fifos', 'line'], + 'lines': [] + }, + 'program': { + 'options': ['programs', 'Programs statuses', 'programs', 'applications', 'monit.programs', 'line'], + 'lines': [] + }, + 'process': { + 'options': ['processes', 'Processes statuses', 'processes', 'applications', 'monit.services', 'line'], + 'lines': [] + }, + 'process_uptime': { + 'options': ['processes uptime', 'Processes uptime', 'seconds', 'applications', + 'monit.process_uptime', 'line', 'hidden'], + 'lines': [] + }, + 'process_threads': { + 'options': ['processes threads', 'Processes threads', 'threads', 'applications', + 'monit.process_threads', 'line'], + 'lines': [] + }, + 'process_children': { + 'options': ['processes childrens', 'Child processes', 'childrens', 'applications', + 'monit.process_childrens', 'line'], + 'lines': [] + }, + 'host': { + 'options': ['hosts', 'Hosts', 'hosts', 'network', 'monit.hosts', 'line'], + 'lines': [] + }, + 'host_latency': { + 'options': ['hosts latency', 'Hosts latency', 'milliseconds', 'network', 'monit.host_latency', 'line'], + 'lines': [] + }, + 'net': { + 'options': ['interfaces', 'Network interfaces and addresses', 'interfaces', 'network', + 'monit.networks', 'line'], + 'lines': [] + }, +} + + +class BaseMonitService(object): + def __init__(self, typ, name, status, monitor): + self.type = typ + self.name = name + self.status = status + self.monitor = monitor + + def __repr__(self): + return 'MonitService({0}:{1})'.format(self.type.name, self.name) + + def __eq__(self, other): + if not isinstance(other, BaseMonitService): + return False + return self.type == other.type and self.name == other.name + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def is_running(self): + return self.status == '0' and self.monitor == '1' + + def key(self): + return '{0}_{1}'.format(self.type.name, self.name) + + def data(self): + return {self.key(): int(self.is_running())} + + +class ProcessMonitService(BaseMonitService): + def __init__(self, typ, name, status, monitor): + super(ProcessMonitService, self).__init__(typ, name, status, monitor) + self.uptime = None + self.threads = None + self.children = None + + def __eq__(self, other): + return super(ProcessMonitService, self).__eq__(other) + + def __ne__(self, other): + return super(ProcessMonitService, self).__ne__(other) + + def __hash__(self): + return super(ProcessMonitService, self).__hash__() + + def uptime_key(self): + return 'process_uptime_{0}'.format(self.name) + + def threads_key(self): + return 'process_threads_{0}'.format(self.name) + + def children_key(self): + return 'process_children_{0}'.format(self.name) + + def data(self): + base_data = super(ProcessMonitService, self).data() + # skipping bugged metrics with negative uptime (monit before v5.16) + uptime = self.uptime if self.uptime and int(self.uptime) >= 0 else None + data = { + self.uptime_key(): uptime, + self.threads_key(): self.threads, + self.children_key(): self.children, + } + data.update(base_data) + + return data + + +class HostMonitService(BaseMonitService): + def __init__(self, typ, name, status, monitor): + super(HostMonitService, self).__init__(typ, name, status, monitor) + self.latency = None + + def __eq__(self, other): + return super(HostMonitService, self).__eq__(other) + + def __ne__(self, other): + return super(HostMonitService, self).__ne__(other) + + def __hash__(self): + return super(HostMonitService, self).__hash__() + + def latency_key(self): + return 'host_latency_{0}'.format(self.name) + + def data(self): + base_data = super(HostMonitService, self).data() + latency = float(self.latency) * 1000000 if self.latency else None + data = {self.latency_key(): latency} + data.update(base_data) + + return data + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + base_url = self.configuration.get('url', "http://localhost:2812") + self.url = '{0}/_status?format=xml&level=full'.format(base_url) + self.active_services = list() + + def parse(self, raw): + try: + root = ET.fromstring(raw) + except ET.ParseError: + self.error("URL {0} didn't return a valid XML page. Please check your settings.".format(self.url)) + return None + return root + + def _get_data(self): + raw = self._get_raw_data() + if not raw: + return None + + root = self.parse(raw) + if root is None: + return None + + services = self.get_services(root) + if not services: + return None + + if len(self.charts) > 0: + self.update_charts(services) + + data = dict() + + for svc in services: + data.update(svc.data()) + + return data + + def get_services(self, root): + services = list() + + for typ in TYPES: + if typ == TYPE_SYSTEM: + self.debug("skipping service from '{0}' category, it's useless in graphs".format(TYPE_SYSTEM.name)) + continue + + xpath_query = "./service[@type='{0}']".format(typ.index) + self.debug('Searching for {0} as {1}'.format(typ.name, xpath_query)) + + for svc_root in root.findall(xpath_query): + svc = create_service(svc_root, typ) + self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format( + svc.name, svc.type.name, svc.status, svc.monitor)) + + services.append(svc) + + return services + + def update_charts(self, services): + remove = [svc for svc in self.active_services if svc not in services] + add = [svc for svc in services if svc not in self.active_services] + + self.remove_services_from_charts(remove) + self.add_services_to_charts(add) + + self.active_services = services + + def add_services_to_charts(self, services): + for svc in services: + if svc.type == TYPE_HOST: + self.charts['host_latency'].add_dimension([svc.latency_key(), svc.name, 'absolute', 1000, 1000000]) + if svc.type == TYPE_PROCESS: + self.charts['process_uptime'].add_dimension([svc.uptime_key(), svc.name]) + self.charts['process_threads'].add_dimension([svc.threads_key(), svc.name]) + self.charts['process_children'].add_dimension([svc.children_key(), svc.name]) + self.charts[svc.type.name].add_dimension([svc.key(), svc.name]) + + def remove_services_from_charts(self, services): + for svc in services: + if svc.type == TYPE_HOST: + self.charts['host_latency'].del_dimension(svc.latency_key(), False) + if svc.type == TYPE_PROCESS: + self.charts['process_uptime'].del_dimension(svc.uptime_key(), False) + self.charts['process_threads'].del_dimension(svc.threads_key(), False) + self.charts['process_children'].del_dimension(svc.children_key(), False) + self.charts[svc.type.name].del_dimension(svc.key(), False) + + +def create_service(root, typ): + if typ == TYPE_HOST: + return create_host_service(root) + elif typ == TYPE_PROCESS: + return create_process_service(root) + return create_base_service(root, typ) + + +def create_host_service(root): + svc = HostMonitService( + TYPE_HOST, + root.find('name').text, + root.find('status').text, + root.find('monitor').text, + ) + + latency = root.find('./icmp/responsetime') + if latency is not None: + svc.latency = latency.text + + return svc + + +def create_process_service(root): + svc = ProcessMonitService( + TYPE_PROCESS, + root.find('name').text, + root.find('status').text, + root.find('monitor').text, + ) + + uptime = root.find('uptime') + if uptime is not None: + svc.uptime = uptime.text + + threads = root.find('threads') + if threads is not None: + svc.threads = threads.text + + children = root.find('children') + if children is not None: + svc.children = children.text + + return svc + + +def create_base_service(root, typ): + return BaseMonitService( + typ, + root.find('name').text, + root.find('status').text, + root.find('monitor').text, + ) diff --git a/collectors/python.d.plugin/monit/monit.conf b/collectors/python.d.plugin/monit/monit.conf new file mode 100644 index 0000000..9a3fb69 --- /dev/null +++ b/collectors/python.d.plugin/monit/monit.conf @@ -0,0 +1,86 @@ +# netdata python.d.plugin configuration for monit +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, this plugin also supports the following: +# +# url: 'URL' # the URL to fetch monit's status stats +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# Example +# +# local: +# name : 'Local Monit' +# url : 'http://localhost:2812' +# +# "local" will show up in Netdata logs. "Reverse Proxy" will show up in the menu +# in the monit section. + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + url : 'http://localhost:2812' diff --git a/collectors/python.d.plugin/nsd/Makefile.inc b/collectors/python.d.plugin/nsd/Makefile.inc new file mode 100644 index 0000000..58e9fd6 --- /dev/null +++ b/collectors/python.d.plugin/nsd/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += nsd/nsd.chart.py +dist_pythonconfig_DATA += nsd/nsd.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += nsd/README.md nsd/Makefile.inc + diff --git a/collectors/python.d.plugin/nsd/README.md b/collectors/python.d.plugin/nsd/README.md new file mode 100644 index 0000000..e5183ae --- /dev/null +++ b/collectors/python.d.plugin/nsd/README.md @@ -0,0 +1,68 @@ + + +# NSD monitoring with Netdata + +Uses the `nsd-control stats_noreset` command to provide `nsd` statistics. + +## Requirements + +- Version of `nsd` must be 4.0+ +- Netdata must have permissions to run `nsd-control stats_noreset` + +It produces: + +1. **Queries** + + - queries + +2. **Zones** + + - master + - slave + +3. **Protocol** + + - udp + - udp6 + - tcp + - tcp6 + +4. **Query Type** + + - A + - NS + - CNAME + - SOA + - PTR + - HINFO + - MX + - NAPTR + - TXT + - AAAA + - SRV + - ANY + +5. **Transfer** + + - NOTIFY + - AXFR + +6. **Return Code** + + - NOERROR + - FORMERR + - SERVFAIL + - NXDOMAIN + - NOTIMP + - REFUSED + - YXDOMAIN + +Configuration is not needed. + +--- + + diff --git a/collectors/python.d.plugin/nsd/nsd.chart.py b/collectors/python.d.plugin/nsd/nsd.chart.py new file mode 100644 index 0000000..6f9b2ce --- /dev/null +++ b/collectors/python.d.plugin/nsd/nsd.chart.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +# Description: NSD `nsd-control stats_noreset` netdata python.d module +# Author: <383c57 at gmail.com> +# SPDX-License-Identifier: GPL-3.0-or-later + +import re + +from bases.FrameworkServices.ExecutableService import ExecutableService + +update_every = 30 + +NSD_CONTROL_COMMAND = 'nsd-control stats_noreset' +REGEX = re.compile(r'([A-Za-z0-9.]+)=(\d+)') + +ORDER = [ + 'queries', + 'zones', + 'protocol', + 'type', + 'transfer', + 'rcode', +] + +CHARTS = { + 'queries': { + 'options': [None, 'queries', 'queries/s', 'queries', 'nsd.queries', 'line'], + 'lines': [ + ['num_queries', 'queries', 'incremental'] + ] + }, + 'zones': { + 'options': [None, 'zones', 'zones', 'zones', 'nsd.zones', 'stacked'], + 'lines': [ + ['zone_master', 'master', 'absolute'], + ['zone_slave', 'slave', 'absolute'] + ] + }, + 'protocol': { + 'options': [None, 'protocol', 'queries/s', 'protocol', 'nsd.protocols', 'stacked'], + 'lines': [ + ['num_udp', 'udp', 'incremental'], + ['num_udp6', 'udp6', 'incremental'], + ['num_tcp', 'tcp', 'incremental'], + ['num_tcp6', 'tcp6', 'incremental'] + ] + }, + 'type': { + 'options': [None, 'query type', 'queries/s', 'query type', 'nsd.type', 'stacked'], + 'lines': [ + ['num_type_A', 'A', 'incremental'], + ['num_type_NS', 'NS', 'incremental'], + ['num_type_CNAME', 'CNAME', 'incremental'], + ['num_type_SOA', 'SOA', 'incremental'], + ['num_type_PTR', 'PTR', 'incremental'], + ['num_type_HINFO', 'HINFO', 'incremental'], + ['num_type_MX', 'MX', 'incremental'], + ['num_type_NAPTR', 'NAPTR', 'incremental'], + ['num_type_TXT', 'TXT', 'incremental'], + ['num_type_AAAA', 'AAAA', 'incremental'], + ['num_type_SRV', 'SRV', 'incremental'], + ['num_type_TYPE255', 'ANY', 'incremental'] + ] + }, + 'transfer': { + 'options': [None, 'transfer', 'queries/s', 'transfer', 'nsd.transfer', 'stacked'], + 'lines': [ + ['num_opcode_NOTIFY', 'NOTIFY', 'incremental'], + ['num_type_TYPE252', 'AXFR', 'incremental'] + ] + }, + 'rcode': { + 'options': [None, 'return code', 'queries/s', 'return code', 'nsd.rcode', 'stacked'], + 'lines': [ + ['num_rcode_NOERROR', 'NOERROR', 'incremental'], + ['num_rcode_FORMERR', 'FORMERR', 'incremental'], + ['num_rcode_SERVFAIL', 'SERVFAIL', 'incremental'], + ['num_rcode_NXDOMAIN', 'NXDOMAIN', 'incremental'], + ['num_rcode_NOTIMP', 'NOTIMP', 'incremental'], + ['num_rcode_REFUSED', 'REFUSED', 'incremental'], + ['num_rcode_YXDOMAIN', 'YXDOMAIN', 'incremental'] + ] + } +} + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.command = NSD_CONTROL_COMMAND + + def _get_data(self): + lines = self._get_raw_data() + if not lines: + return None + + stats = dict( + (k.replace('.', '_'), int(v)) for k, v in REGEX.findall(''.join(lines)) + ) + stats.setdefault('num_opcode_NOTIFY', 0) + stats.setdefault('num_type_TYPE252', 0) + stats.setdefault('num_type_TYPE255', 0) + + return stats diff --git a/collectors/python.d.plugin/nsd/nsd.conf b/collectors/python.d.plugin/nsd/nsd.conf new file mode 100644 index 0000000..77a8a31 --- /dev/null +++ b/collectors/python.d.plugin/nsd/nsd.conf @@ -0,0 +1,91 @@ +# netdata python.d.plugin configuration for nsd +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# nsd-control is slow, so once every 30 seconds +# update_every: 30 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, nsd also supports the following: +# +# command: 'nsd-control stats_noreset' # the command to run +# + +# ---------------------------------------------------------------------- +# IMPORTANT Information +# +# Netdata must have permissions to run `nsd-control stats_noreset` command +# +# - Example-1 (use "sudo") +# 1. sudoers (e.g. visudo -f /etc/sudoers.d/netdata) +# Defaults:netdata !requiretty +# netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset +# 2. etc/netdata/python.d/nsd.conf +# local: +# update_every: 30 +# command: 'sudo /usr/sbin/nsd-control stats_noreset' +# +# - Example-2 (add "netdata" user to "nsd" group) +# usermod -aG nsd netdata +# + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS + +local: + update_every: 30 + command: 'nsd-control stats_noreset' diff --git a/collectors/python.d.plugin/ntpd/Makefile.inc b/collectors/python.d.plugin/ntpd/Makefile.inc new file mode 100644 index 0000000..81210eb --- /dev/null +++ b/collectors/python.d.plugin/ntpd/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += ntpd/ntpd.chart.py +dist_pythonconfig_DATA += ntpd/ntpd.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += ntpd/README.md ntpd/Makefile.inc + diff --git a/collectors/python.d.plugin/ntpd/README.md b/collectors/python.d.plugin/ntpd/README.md new file mode 100644 index 0000000..9832707 --- /dev/null +++ b/collectors/python.d.plugin/ntpd/README.md @@ -0,0 +1,90 @@ + + +# NTP daemon monitoring with Netdata + +Monitors the system variables of the local `ntpd` daemon (optional incl. variables of the polled peers) using the NTP Control Message Protocol via UDP socket, similar to `ntpq`, the [standard NTP query program](http://doc.ntp.org/current-stable/ntpq.html). + +## Requirements + +- Version: `NTPv4` +- Local interrogation allowed in `/etc/ntp.conf` (default): + +``` +# Local users may interrogate the ntp server more closely. +restrict 127.0.0.1 +restrict ::1 +``` + +It produces: + +1. system + + - offset + - jitter + - frequency + - delay + - dispersion + - stratum + - tc + - precision + +2. peers + + - offset + - delay + - dispersion + - jitter + - rootdelay + - rootdispersion + - stratum + - hmode + - pmode + - hpoll + - ppoll + - precision + +## Configuration + +Edit the `python.d/ntpd.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/ntpd.conf +``` + +Sample: + +```yaml +update_every: 10 + +host: 'localhost' +port: '123' +show_peers: yes +# hide peers with source address in ranges 127.0.0.0/8 and 192.168.0.0/16 +peer_filter: '(127\..*)|(192\.168\..*)' +# check for new/changed peers every 60 updates +peer_rescan: 60 +``` + +Sample (multiple jobs): + +Note: `ntp.conf` on the host `otherhost` must be configured to allow queries from our local host by including a line like `restrict nomodify notrap nopeer`. + +```yaml +local: + host: 'localhost' + +otherhost: + host: 'otherhost' +``` + +If no configuration is given, module will attempt to connect to `ntpd` on `::1:123` or `127.0.0.1:123` and show charts for the systemvars. Use `show_peers: yes` to also show the charts for configured peers. Local peers in the range `127.0.0.0/8` are hidden by default, use `peer_filter: ''` to show all peers. + +--- + + diff --git a/collectors/python.d.plugin/ntpd/ntpd.chart.py b/collectors/python.d.plugin/ntpd/ntpd.chart.py new file mode 100644 index 0000000..275d227 --- /dev/null +++ b/collectors/python.d.plugin/ntpd/ntpd.chart.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +# Description: ntpd netdata python.d module +# Author: Sven Mäder (rda0) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import re +import struct + +from bases.FrameworkServices.SocketService import SocketService + +# NTP Control Message Protocol constants +MODE = 6 +HEADER_FORMAT = '!BBHHHHH' +HEADER_LEN = 12 +OPCODES = { + 'readstat': 1, + 'readvar': 2 +} + +# Maximal dimension precision +PRECISION = 1000000 + +# Static charts +ORDER = [ + 'sys_offset', + 'sys_jitter', + 'sys_frequency', + 'sys_wander', + 'sys_rootdelay', + 'sys_rootdisp', + 'sys_stratum', + 'sys_tc', + 'sys_precision', + 'peer_offset', + 'peer_delay', + 'peer_dispersion', + 'peer_jitter', + 'peer_xleave', + 'peer_rootdelay', + 'peer_rootdisp', + 'peer_stratum', + 'peer_hmode', + 'peer_pmode', + 'peer_hpoll', + 'peer_ppoll', + 'peer_precision' +] + +CHARTS = { + 'sys_offset': { + 'options': [None, 'Combined offset of server relative to this host', 'milliseconds', + 'system', 'ntpd.sys_offset', 'area'], + 'lines': [ + ['offset', 'offset', 'absolute', 1, PRECISION] + ] + }, + 'sys_jitter': { + 'options': [None, 'Combined system jitter and clock jitter', 'milliseconds', + 'system', 'ntpd.sys_jitter', 'line'], + 'lines': [ + ['sys_jitter', 'system', 'absolute', 1, PRECISION], + ['clk_jitter', 'clock', 'absolute', 1, PRECISION] + ] + }, + 'sys_frequency': { + 'options': [None, 'Frequency offset relative to hardware clock', 'ppm', 'system', 'ntpd.sys_frequency', 'area'], + 'lines': [ + ['frequency', 'frequency', 'absolute', 1, PRECISION] + ] + }, + 'sys_wander': { + 'options': [None, 'Clock frequency wander', 'ppm', 'system', 'ntpd.sys_wander', 'area'], + 'lines': [ + ['clk_wander', 'clock', 'absolute', 1, PRECISION] + ] + }, + 'sys_rootdelay': { + 'options': [None, 'Total roundtrip delay to the primary reference clock', 'milliseconds', 'system', + 'ntpd.sys_rootdelay', 'area'], + 'lines': [ + ['rootdelay', 'delay', 'absolute', 1, PRECISION] + ] + }, + 'sys_rootdisp': { + 'options': [None, 'Total root dispersion to the primary reference clock', 'milliseconds', 'system', + 'ntpd.sys_rootdisp', 'area'], + 'lines': [ + ['rootdisp', 'dispersion', 'absolute', 1, PRECISION] + ] + }, + 'sys_stratum': { + 'options': [None, 'Stratum (1-15)', 'stratum', 'system', 'ntpd.sys_stratum', 'line'], + 'lines': [ + ['stratum', 'stratum', 'absolute', 1, PRECISION] + ] + }, + 'sys_tc': { + 'options': [None, 'Time constant and poll exponent (3-17)', 'log2 s', 'system', 'ntpd.sys_tc', 'line'], + 'lines': [ + ['tc', 'current', 'absolute', 1, PRECISION], + ['mintc', 'minimum', 'absolute', 1, PRECISION] + ] + }, + 'sys_precision': { + 'options': [None, 'Precision', 'log2 s', 'system', 'ntpd.sys_precision', 'line'], + 'lines': [ + ['precision', 'precision', 'absolute', 1, PRECISION] + ] + } +} + +PEER_CHARTS = { + 'peer_offset': { + 'options': [None, 'Filter offset', 'milliseconds', 'peers', 'ntpd.peer_offset', 'line'], + 'lines': [] + }, + 'peer_delay': { + 'options': [None, 'Filter delay', 'milliseconds', 'peers', 'ntpd.peer_delay', 'line'], + 'lines': [] + }, + 'peer_dispersion': { + 'options': [None, 'Filter dispersion', 'milliseconds', 'peers', 'ntpd.peer_dispersion', 'line'], + 'lines': [] + }, + 'peer_jitter': { + 'options': [None, 'Filter jitter', 'milliseconds', 'peers', 'ntpd.peer_jitter', 'line'], + 'lines': [] + }, + 'peer_xleave': { + 'options': [None, 'Interleave delay', 'milliseconds', 'peers', 'ntpd.peer_xleave', 'line'], + 'lines': [] + }, + 'peer_rootdelay': { + 'options': [None, 'Total roundtrip delay to the primary reference clock', 'milliseconds', 'peers', + 'ntpd.peer_rootdelay', 'line'], + 'lines': [] + }, + 'peer_rootdisp': { + 'options': [None, 'Total root dispersion to the primary reference clock', 'ms', 'peers', + 'ntpd.peer_rootdisp', 'line'], + 'lines': [] + }, + 'peer_stratum': { + 'options': [None, 'Stratum (1-15)', 'stratum', 'peers', 'ntpd.peer_stratum', 'line'], + 'lines': [] + }, + 'peer_hmode': { + 'options': [None, 'Host mode (1-6)', 'hmode', 'peers', 'ntpd.peer_hmode', 'line'], + 'lines': [] + }, + 'peer_pmode': { + 'options': [None, 'Peer mode (1-5)', 'pmode', 'peers', 'ntpd.peer_pmode', 'line'], + 'lines': [] + }, + 'peer_hpoll': { + 'options': [None, 'Host poll exponent', 'log2 s', 'peers', 'ntpd.peer_hpoll', 'line'], + 'lines': [] + }, + 'peer_ppoll': { + 'options': [None, 'Peer poll exponent', 'log2 s', 'peers', 'ntpd.peer_ppoll', 'line'], + 'lines': [] + }, + 'peer_precision': { + 'options': [None, 'Precision', 'log2 s', 'peers', 'ntpd.peer_precision', 'line'], + 'lines': [] + } +} + + +class Base: + regex = re.compile(r'([a-z_]+)=((?:-)?[0-9]+(?:\.[0-9]+)?)') + + @staticmethod + def get_header(associd=0, operation='readvar'): + """ + Constructs the NTP Control Message header: + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |LI | VN |Mode |R|E|M| OpCode | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Status | Association ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Offset | Count | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + """ + version = 2 + sequence = 1 + status = 0 + offset = 0 + count = 0 + header = struct.pack(HEADER_FORMAT, (version << 3 | MODE), OPCODES[operation], + sequence, status, associd, offset, count) + return header + + +class System(Base): + def __init__(self): + self.request = self.get_header() + + def get_data(self, raw): + """ + Extracts key=value pairs with float/integer from ntp response packet data. + """ + data = dict() + for key, value in self.regex.findall(raw): + data[key] = float(value) * PRECISION + return data + + +class Peer(Base): + def __init__(self, idx, name): + self.id = idx + self.real_name = name + self.name = name.replace('.', '_') + self.request = self.get_header(self.id) + + def get_data(self, raw): + """ + Extracts key=value pairs with float/integer from ntp response packet data. + """ + data = dict() + for key, value in self.regex.findall(raw): + dimension = '_'.join([self.name, key]) + data[dimension] = float(value) * PRECISION + return data + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + SocketService.__init__(self, configuration=configuration, name=name) + self.order = list(ORDER) + self.definitions = dict(CHARTS) + self.port = 'ntp' + self.dgram_socket = True + self.system = System() + self.peers = dict() + self.request = str() + self.retries = 0 + self.show_peers = self.configuration.get('show_peers', False) + self.peer_rescan = self.configuration.get('peer_rescan', 60) + if self.show_peers: + self.definitions.update(PEER_CHARTS) + + def check(self): + """ + Checks if we can get valid systemvars. + If not, returns None to disable module. + """ + self._parse_config() + + peer_filter = self.configuration.get('peer_filter', r'127\..*') + try: + self.peer_filter = re.compile(r'^((0\.0\.0\.0)|({0}))$'.format(peer_filter)) + except re.error as error: + self.error('Compile pattern error (peer_filter) : {0}'.format(error)) + return None + + self.request = self.system.request + raw_systemvars = self._get_raw_data() + + if not self.system.get_data(raw_systemvars): + return None + + return True + + def get_data(self): + """ + Gets systemvars data on each update. + Gets peervars data for all peers on each update. + """ + data = dict() + + self.request = self.system.request + raw = self._get_raw_data() + if not raw: + return None + + data.update(self.system.get_data(raw)) + + if not self.show_peers: + return data + + if not self.peers or self.runs_counter % self.peer_rescan == 0 or self.retries > 8: + self.find_new_peers() + + for peer in self.peers.values(): + self.request = peer.request + peer_data = peer.get_data(self._get_raw_data()) + if peer_data: + data.update(peer_data) + else: + self.retries += 1 + + return data + + def find_new_peers(self): + new_peers = dict((p.real_name, p) for p in self.get_peers()) + if new_peers: + + peers_to_remove = set(self.peers) - set(new_peers) + peers_to_add = set(new_peers) - set(self.peers) + + for peer_name in peers_to_remove: + self.hide_old_peer_from_charts(self.peers[peer_name]) + del self.peers[peer_name] + + for peer_name in peers_to_add: + self.add_new_peer_to_charts(new_peers[peer_name]) + + self.peers.update(new_peers) + self.retries = 0 + + def add_new_peer_to_charts(self, peer): + for chart_id in set(self.charts.charts) & set(PEER_CHARTS): + dim_id = peer.name + chart_id[4:] + if dim_id not in self.charts[chart_id]: + self.charts[chart_id].add_dimension([dim_id, peer.real_name, 'absolute', 1, PRECISION]) + else: + self.charts[chart_id].hide_dimension(dim_id, reverse=True) + + def hide_old_peer_from_charts(self, peer): + for chart_id in set(self.charts.charts) & set(PEER_CHARTS): + dim_id = peer.name + chart_id[4:] + self.charts[chart_id].hide_dimension(dim_id) + + def get_peers(self): + self.request = Base.get_header(operation='readstat') + + raw_data = self._get_raw_data(raw=True) + if not raw_data: + return list() + + peer_ids = self.get_peer_ids(raw_data) + if not peer_ids: + return list() + + new_peers = list() + for peer_id in peer_ids: + self.request = Base.get_header(peer_id) + raw_peer_data = self._get_raw_data() + if not raw_peer_data: + continue + srcadr = re.search(r'(srcadr)=([^,]+)', raw_peer_data) + if not srcadr: + continue + srcadr = srcadr.group(2) + if self.peer_filter.search(srcadr): + continue + stratum = re.search(r'(stratum)=([^,]+)', raw_peer_data) + if not stratum: + continue + if int(stratum.group(2)) > 15: + continue + + new_peer = Peer(idx=peer_id, name=srcadr) + new_peers.append(new_peer) + return new_peers + + def get_peer_ids(self, res): + """ + Unpack the NTP Control Message header + Get data length from header + Get list of association ids returned in the readstat response + """ + + try: + count = struct.unpack(HEADER_FORMAT, res[:HEADER_LEN])[6] + except struct.error as error: + self.error('error unpacking header: {0}'.format(error)) + return None + if not count: + self.error('empty data field in NTP control packet') + return None + + data_end = HEADER_LEN + count + data = res[HEADER_LEN:data_end] + data_format = ''.join(['!', 'H' * int(count / 2)]) + try: + peer_ids = list(struct.unpack(data_format, data))[::2] + except struct.error as error: + self.error('error unpacking data: {0}'.format(error)) + return None + return peer_ids diff --git a/collectors/python.d.plugin/ntpd/ntpd.conf b/collectors/python.d.plugin/ntpd/ntpd.conf new file mode 100644 index 0000000..80bd468 --- /dev/null +++ b/collectors/python.d.plugin/ntpd/ntpd.conf @@ -0,0 +1,89 @@ +# netdata python.d.plugin configuration for ntpd +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# +# Additionally to the above, ntp also supports the following: +# +# host: 'localhost' # the host to query +# port: '123' # the UDP port where `ntpd` listens +# show_peers: no # use `yes` to show peer charts. enabling this +# # option is recommended only for debugging, as +# # it could possibly imply memory leaks if the +# # peers change frequently. +# peer_filter: '127\..*' # regex to exclude peers +# # by default local peers are hidden +# # use `''` to show all peers. +# peer_rescan: 60 # interval (>0) to check for new/changed peers +# # use `1` to check on every update +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name: 'local' + host: 'localhost' + port: '123' + show_peers: no + +localhost_ipv4: + name: 'local' + host: '127.0.0.1' + port: '123' + show_peers: no + +localhost_ipv6: + name: 'local' + host: '::1' + port: '123' + show_peers: no diff --git a/collectors/python.d.plugin/nvidia_smi/Makefile.inc b/collectors/python.d.plugin/nvidia_smi/Makefile.inc new file mode 100644 index 0000000..52fb25a --- /dev/null +++ b/collectors/python.d.plugin/nvidia_smi/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += nvidia_smi/nvidia_smi.chart.py +dist_pythonconfig_DATA += nvidia_smi/nvidia_smi.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += nvidia_smi/README.md nvidia_smi/Makefile.inc + diff --git a/collectors/python.d.plugin/nvidia_smi/README.md b/collectors/python.d.plugin/nvidia_smi/README.md new file mode 100644 index 0000000..bb41694 --- /dev/null +++ b/collectors/python.d.plugin/nvidia_smi/README.md @@ -0,0 +1,66 @@ + + +# Nvidia GPU monitoring with Netdata + +Monitors performance metrics (memory usage, fan speed, pcie bandwidth utilization, temperature, etc.) using `nvidia-smi` cli tool. + +> **Warning**: this collector does not work when the Netdata Agent is [running in a container](https://learn.netdata.cloud/docs/agent/packaging/docker). + + +## Requirements and Notes + +- You must have the `nvidia-smi` tool installed and your NVIDIA GPU(s) must support the tool. Mostly the newer high end models used for AI / ML and Crypto or Pro range, read more about [nvidia_smi](https://developer.nvidia.com/nvidia-system-management-interface). +- You must enable this plugin, as its disabled by default due to minor performance issues: + ```bash + cd /etc/netdata # Replace this path with your Netdata config directory, if different + sudo ./edit-config python.d.conf + ``` + Remove the '#' before nvidia_smi so it reads: `nvidia_smi: yes`. + +- On some systems when the GPU is idle the `nvidia-smi` tool unloads and there is added latency again when it is next queried. If you are running GPUs under constant workload this isn't likely to be an issue. +- Currently the `nvidia-smi` tool is being queried via cli. Updating the plugin to use the nvidia c/c++ API directly should resolve this issue. See discussion here: +- Contributions are welcome. +- Make sure `netdata` user can execute `/usr/bin/nvidia-smi` or wherever your binary is. +- If `nvidia-smi` process [is not killed after netdata restart](https://github.com/netdata/netdata/issues/7143) you need to off `loop_mode`. +- `poll_seconds` is how often in seconds the tool is polled for as an integer. + +## Charts + +It produces the following charts: + +- PCI Express Bandwidth Utilization in `KiB/s` +- Fan Speed in `percentage` +- GPU Utilization in `percentage` +- Memory Bandwidth Utilization in `percentage` +- Encoder/Decoder Utilization in `percentage` +- Memory Usage in `MiB` +- Temperature in `celsius` +- Clock Frequencies in `MHz` +- Power Utilization in `Watts` +- Memory Used by Each Process in `MiB` +- Memory Used by Each User in `MiB` +- Number of User on GPU in `num` + +## Configuration + +Edit the `python.d/nvidia_smi.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/nvidia_smi.conf +``` + +Sample: + +```yaml +loop_mode : yes +poll_seconds : 1 +exclude_zero_memory_users : yes +``` + + diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py new file mode 100644 index 0000000..23e90e6 --- /dev/null +++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py @@ -0,0 +1,589 @@ +# -*- coding: utf-8 -*- +# Description: nvidia-smi netdata python.d module +# Original Author: Steven Noonan (tycho) +# Author: Ilya Mashchenko (ilyam8) +# User Memory Stat Author: Guido Scatena (scatenag) + +import os +import pwd +import subprocess +import threading +import xml.etree.ElementTree as et + +from bases.FrameworkServices.SimpleService import SimpleService +from bases.collection import find_binary + +disabled_by_default = True + +NVIDIA_SMI = 'nvidia-smi' + +EMPTY_ROW = '' +EMPTY_ROW_LIMIT = 500 +POLLER_BREAK_ROW = '' + +PCI_BANDWIDTH = 'pci_bandwidth' +FAN_SPEED = 'fan_speed' +GPU_UTIL = 'gpu_utilization' +MEM_UTIL = 'mem_utilization' +ENCODER_UTIL = 'encoder_utilization' +MEM_USAGE = 'mem_usage' +BAR_USAGE = 'bar1_mem_usage' +TEMPERATURE = 'temperature' +CLOCKS = 'clocks' +POWER = 'power' +POWER_STATE = 'power_state' +PROCESSES_MEM = 'processes_mem' +USER_MEM = 'user_mem' +USER_NUM = 'user_num' + +ORDER = [ + PCI_BANDWIDTH, + FAN_SPEED, + GPU_UTIL, + MEM_UTIL, + ENCODER_UTIL, + MEM_USAGE, + BAR_USAGE, + TEMPERATURE, + CLOCKS, + POWER, + POWER_STATE, + PROCESSES_MEM, + USER_MEM, + USER_NUM, +] + +# https://docs.nvidia.com/gameworks/content/gameworkslibrary/coresdk/nvapi/group__gpupstate.html +POWER_STATES = ['P' + str(i) for i in range(0, 16)] + + +def gpu_charts(gpu): + fam = gpu.full_name() + + charts = { + PCI_BANDWIDTH: { + 'options': [None, 'PCI Express Bandwidth Utilization', 'KiB/s', fam, 'nvidia_smi.pci_bandwidth', 'area'], + 'lines': [ + ['rx_util', 'rx', 'absolute', 1, 1], + ['tx_util', 'tx', 'absolute', 1, -1], + ] + }, + FAN_SPEED: { + 'options': [None, 'Fan Speed', 'percentage', fam, 'nvidia_smi.fan_speed', 'line'], + 'lines': [ + ['fan_speed', 'speed'], + ] + }, + GPU_UTIL: { + 'options': [None, 'GPU Utilization', 'percentage', fam, 'nvidia_smi.gpu_utilization', 'line'], + 'lines': [ + ['gpu_util', 'utilization'], + ] + }, + MEM_UTIL: { + 'options': [None, 'Memory Bandwidth Utilization', 'percentage', fam, 'nvidia_smi.mem_utilization', 'line'], + 'lines': [ + ['memory_util', 'utilization'], + ] + }, + ENCODER_UTIL: { + 'options': [None, 'Encoder/Decoder Utilization', 'percentage', fam, 'nvidia_smi.encoder_utilization', + 'line'], + 'lines': [ + ['encoder_util', 'encoder'], + ['decoder_util', 'decoder'], + ] + }, + MEM_USAGE: { + 'options': [None, 'Memory Usage', 'MiB', fam, 'nvidia_smi.memory_allocated', 'stacked'], + 'lines': [ + ['fb_memory_free', 'free'], + ['fb_memory_used', 'used'], + ] + }, + BAR_USAGE: { + 'options': [None, 'Bar1 Memory Usage', 'MiB', fam, 'nvidia_smi.bar1_memory_usage', 'stacked'], + 'lines': [ + ['bar1_memory_free', 'free'], + ['bar1_memory_used', 'used'], + ] + }, + TEMPERATURE: { + 'options': [None, 'Temperature', 'celsius', fam, 'nvidia_smi.temperature', 'line'], + 'lines': [ + ['gpu_temp', 'temp'], + ] + }, + CLOCKS: { + 'options': [None, 'Clock Frequencies', 'MHz', fam, 'nvidia_smi.clocks', 'line'], + 'lines': [ + ['graphics_clock', 'graphics'], + ['video_clock', 'video'], + ['sm_clock', 'sm'], + ['mem_clock', 'mem'], + ] + }, + POWER: { + 'options': [None, 'Power Utilization', 'Watts', fam, 'nvidia_smi.power', 'line'], + 'lines': [ + ['power_draw', 'power', 'absolute', 1, 100], + ] + }, + POWER_STATE: { + 'options': [None, 'Power State', 'state', fam, 'nvidia_smi.power_state', 'line'], + 'lines': [['power_state_' + v.lower(), v, 'absolute'] for v in POWER_STATES] + }, + PROCESSES_MEM: { + 'options': [None, 'Memory Used by Each Process', 'MiB', fam, 'nvidia_smi.processes_mem', 'stacked'], + 'lines': [] + }, + USER_MEM: { + 'options': [None, 'Memory Used by Each User', 'MiB', fam, 'nvidia_smi.user_mem', 'stacked'], + 'lines': [] + }, + USER_NUM: { + 'options': [None, 'Number of User on GPU', 'num', fam, 'nvidia_smi.user_num', 'line'], + 'lines': [ + ['user_num', 'users'], + ] + }, + } + + idx = gpu.num + + order = ['gpu{0}_{1}'.format(idx, v) for v in ORDER] + charts = dict(('gpu{0}_{1}'.format(idx, k), v) for k, v in charts.items()) + + for chart in charts.values(): + for line in chart['lines']: + line[0] = 'gpu{0}_{1}'.format(idx, line[0]) + + return order, charts + + +class NvidiaSMI: + def __init__(self): + self.command = find_binary(NVIDIA_SMI) + self.active_proc = None + + def run_once(self): + proc = subprocess.Popen([self.command, '-x', '-q'], stdout=subprocess.PIPE) + stdout, _ = proc.communicate() + return stdout + + def run_loop(self, interval): + if self.active_proc: + self.kill() + proc = subprocess.Popen([self.command, '-x', '-q', '-l', str(interval)], stdout=subprocess.PIPE) + self.active_proc = proc + return proc.stdout + + def kill(self): + if self.active_proc: + self.active_proc.kill() + self.active_proc = None + + +class NvidiaSMIPoller(threading.Thread): + def __init__(self, poll_interval): + threading.Thread.__init__(self) + self.daemon = True + + self.smi = NvidiaSMI() + self.interval = poll_interval + + self.lock = threading.RLock() + self.last_data = str() + self.exit = False + self.empty_rows = 0 + self.rows = list() + + def has_smi(self): + return bool(self.smi.command) + + def run_once(self): + return self.smi.run_once() + + def run(self): + out = self.smi.run_loop(self.interval) + + for row in out: + if self.exit or self.empty_rows > EMPTY_ROW_LIMIT: + break + self.process_row(row) + self.smi.kill() + + def process_row(self, row): + row = row.decode() + self.empty_rows += (row == EMPTY_ROW) + self.rows.append(row) + + if POLLER_BREAK_ROW in row: + self.lock.acquire() + self.last_data = '\n'.join(self.rows) + self.lock.release() + + self.rows = list() + self.empty_rows = 0 + + def is_started(self): + return self.ident is not None + + def shutdown(self): + self.exit = True + + def data(self): + self.lock.acquire() + data = self.last_data + self.lock.release() + return data + + +def handle_attr_error(method): + def on_call(*args, **kwargs): + try: + return method(*args, **kwargs) + except AttributeError: + return None + + return on_call + + +def handle_value_error(method): + def on_call(*args, **kwargs): + try: + return method(*args, **kwargs) + except ValueError: + return None + + return on_call + + +HOST_PREFIX = os.getenv('NETDATA_HOST_PREFIX') +ETC_PASSWD_PATH = '/etc/passwd' +PROC_PATH = '/proc' + +IS_INSIDE_DOCKER = False + +if HOST_PREFIX: + ETC_PASSWD_PATH = os.path.join(HOST_PREFIX, ETC_PASSWD_PATH[1:]) + PROC_PATH = os.path.join(HOST_PREFIX, PROC_PATH[1:]) + IS_INSIDE_DOCKER = True + + +def read_passwd_file(): + data = dict() + with open(ETC_PASSWD_PATH, 'r') as f: + for line in f: + line = line.strip() + if line.startswith("#"): + continue + fields = line.split(":") + # name, passwd, uid, gid, comment, home_dir, shell + if len(fields) != 7: + continue + # uid, guid + fields[2], fields[3] = int(fields[2]), int(fields[3]) + data[fields[2]] = fields + return data + + +def read_passwd_file_safe(): + try: + if IS_INSIDE_DOCKER: + return read_passwd_file() + return dict((k[2], k) for k in pwd.getpwall()) + except (OSError, IOError): + return dict() + + +def get_username_by_pid_safe(pid, passwd_file): + path = os.path.join(PROC_PATH, pid) + try: + uid = os.stat(path).st_uid + except (OSError, IOError): + return '' + try: + if IS_INSIDE_DOCKER: + return passwd_file[uid][0] + return pwd.getpwuid(uid)[0] + except KeyError: + return str(uid) + + +class GPU: + def __init__(self, num, root, exclude_zero_memory_users=False): + self.num = num + self.root = root + self.exclude_zero_memory_users = exclude_zero_memory_users + + def id(self): + return self.root.get('id') + + def name(self): + return self.root.find('product_name').text + + def full_name(self): + return 'gpu{0} {1}'.format(self.num, self.name()) + + @handle_attr_error + def rx_util(self): + return self.root.find('pci').find('rx_util').text.split()[0] + + @handle_attr_error + def tx_util(self): + return self.root.find('pci').find('tx_util').text.split()[0] + + @handle_attr_error + def fan_speed(self): + return self.root.find('fan_speed').text.split()[0] + + @handle_attr_error + def gpu_util(self): + return self.root.find('utilization').find('gpu_util').text.split()[0] + + @handle_attr_error + def memory_util(self): + return self.root.find('utilization').find('memory_util').text.split()[0] + + @handle_attr_error + def encoder_util(self): + return self.root.find('utilization').find('encoder_util').text.split()[0] + + @handle_attr_error + def decoder_util(self): + return self.root.find('utilization').find('decoder_util').text.split()[0] + + @handle_attr_error + def fb_memory_used(self): + return self.root.find('fb_memory_usage').find('used').text.split()[0] + + @handle_attr_error + def fb_memory_free(self): + return self.root.find('fb_memory_usage').find('free').text.split()[0] + + @handle_attr_error + def bar1_memory_used(self): + return self.root.find('bar1_memory_usage').find('used').text.split()[0] + + @handle_attr_error + def bar1_memory_free(self): + return self.root.find('bar1_memory_usage').find('free').text.split()[0] + + @handle_attr_error + def temperature(self): + return self.root.find('temperature').find('gpu_temp').text.split()[0] + + @handle_attr_error + def graphics_clock(self): + return self.root.find('clocks').find('graphics_clock').text.split()[0] + + @handle_attr_error + def video_clock(self): + return self.root.find('clocks').find('video_clock').text.split()[0] + + @handle_attr_error + def sm_clock(self): + return self.root.find('clocks').find('sm_clock').text.split()[0] + + @handle_attr_error + def mem_clock(self): + return self.root.find('clocks').find('mem_clock').text.split()[0] + + @handle_attr_error + def power_state(self): + return str(self.root.find('power_readings').find('power_state').text.split()[0]) + + @handle_value_error + @handle_attr_error + def power_draw(self): + return float(self.root.find('power_readings').find('power_draw').text.split()[0]) * 100 + + @handle_attr_error + def processes(self): + processes_info = self.root.find('processes').findall('process_info') + if not processes_info: + return list() + + passwd_file = read_passwd_file_safe() + processes = list() + + for info in processes_info: + pid = info.find('pid').text + processes.append({ + 'pid': int(pid), + 'process_name': info.find('process_name').text, + 'used_memory': int(info.find('used_memory').text.split()[0]), + 'username': get_username_by_pid_safe(pid, passwd_file), + }) + return processes + + def data(self): + data = { + 'rx_util': self.rx_util(), + 'tx_util': self.tx_util(), + 'fan_speed': self.fan_speed(), + 'gpu_util': self.gpu_util(), + 'memory_util': self.memory_util(), + 'encoder_util': self.encoder_util(), + 'decoder_util': self.decoder_util(), + 'fb_memory_used': self.fb_memory_used(), + 'fb_memory_free': self.fb_memory_free(), + 'bar1_memory_used': self.bar1_memory_used(), + 'bar1_memory_free': self.bar1_memory_free(), + 'gpu_temp': self.temperature(), + 'graphics_clock': self.graphics_clock(), + 'video_clock': self.video_clock(), + 'sm_clock': self.sm_clock(), + 'mem_clock': self.mem_clock(), + 'power_draw': self.power_draw(), + } + + for v in POWER_STATES: + data['power_state_' + v.lower()] = 0 + p_state = self.power_state() + if p_state: + data['power_state_' + p_state.lower()] = 1 + + processes = self.processes() or [] + users = set() + for p in processes: + data['process_mem_{0}'.format(p['pid'])] = p['used_memory'] + if p['username']: + if self.exclude_zero_memory_users and p['used_memory'] == 0: + continue + users.add(p['username']) + key = 'user_mem_{0}'.format(p['username']) + if key in data: + data[key] += p['used_memory'] + else: + data[key] = p['used_memory'] + data['user_num'] = len(users) + + return dict(('gpu{0}_{1}'.format(self.num, k), v) for k, v in data.items()) + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.order = list() + self.definitions = dict() + self.loop_mode = configuration.get('loop_mode', True) + poll = int(configuration.get('poll_seconds', self.get_update_every())) + self.exclude_zero_memory_users = configuration.get('exclude_zero_memory_users', False) + self.poller = NvidiaSMIPoller(poll) + + def get_data_loop_mode(self): + if not self.poller.is_started(): + self.poller.start() + + if not self.poller.is_alive(): + self.debug('poller is off') + return None + + return self.poller.data() + + def get_data_normal_mode(self): + return self.poller.run_once() + + def get_data(self): + if self.loop_mode: + last_data = self.get_data_loop_mode() + else: + last_data = self.get_data_normal_mode() + + if not last_data: + return None + + parsed = self.parse_xml(last_data) + if parsed is None: + return None + + data = dict() + for idx, root in enumerate(parsed.findall('gpu')): + gpu = GPU(idx, root, self.exclude_zero_memory_users) + gpu_data = gpu.data() + # self.debug(gpu_data) + gpu_data = dict((k, v) for k, v in gpu_data.items() if is_gpu_data_value_valid(v)) + data.update(gpu_data) + self.update_processes_mem_chart(gpu) + self.update_processes_user_mem_chart(gpu) + + return data or None + + def update_processes_mem_chart(self, gpu): + ps = gpu.processes() + if not ps: + return + chart = self.charts['gpu{0}_{1}'.format(gpu.num, PROCESSES_MEM)] + active_dim_ids = [] + for p in ps: + dim_id = 'gpu{0}_process_mem_{1}'.format(gpu.num, p['pid']) + active_dim_ids.append(dim_id) + if dim_id not in chart: + chart.add_dimension([dim_id, '{0} {1}'.format(p['pid'], p['process_name'])]) + for dim in chart: + if dim.id not in active_dim_ids: + chart.del_dimension(dim.id, hide=False) + + def update_processes_user_mem_chart(self, gpu): + ps = gpu.processes() + if not ps: + return + chart = self.charts['gpu{0}_{1}'.format(gpu.num, USER_MEM)] + active_dim_ids = [] + for p in ps: + if not p.get('username'): + continue + dim_id = 'gpu{0}_user_mem_{1}'.format(gpu.num, p['username']) + active_dim_ids.append(dim_id) + if dim_id not in chart: + chart.add_dimension([dim_id, '{0}'.format(p['username'])]) + + for dim in chart: + if dim.id not in active_dim_ids: + chart.del_dimension(dim.id, hide=False) + + def check(self): + if not self.poller.has_smi(): + self.error("couldn't find '{0}' binary".format(NVIDIA_SMI)) + return False + + raw_data = self.poller.run_once() + if not raw_data: + self.error("failed to invoke '{0}' binary".format(NVIDIA_SMI)) + return False + + parsed = self.parse_xml(raw_data) + if parsed is None: + return False + + gpus = parsed.findall('gpu') + if not gpus: + return False + + self.create_charts(gpus) + + return True + + def parse_xml(self, data): + try: + return et.fromstring(data) + except et.ParseError as error: + self.error('xml parse failed: "{0}", error: {1}'.format(data, error)) + + return None + + def create_charts(self, gpus): + for idx, root in enumerate(gpus): + order, charts = gpu_charts(GPU(idx, root)) + self.order.extend(order) + self.definitions.update(charts) + + +def is_gpu_data_value_valid(value): + try: + int(value) + except (TypeError, ValueError): + return False + return True diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf new file mode 100644 index 0000000..3d2a30d --- /dev/null +++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf @@ -0,0 +1,68 @@ +# netdata python.d.plugin configuration for nvidia_smi +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, example also supports the following: +# +# loop_mode: yes/no # default is yes. If set to yes `nvidia-smi` is executed in a separate thread using `-l` option. +# poll_seconds: SECONDS # default is 1. Sets the frequency of seconds the nvidia-smi tool is polled in loop mode. +# exclude_zero_memory_users: yes/no # default is no. Whether to collect users metrics with 0Mb memory allocation. +# +# ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/openldap/Makefile.inc b/collectors/python.d.plugin/openldap/Makefile.inc new file mode 100644 index 0000000..dc947e2 --- /dev/null +++ b/collectors/python.d.plugin/openldap/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += openldap/openldap.chart.py +dist_pythonconfig_DATA += openldap/openldap.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += openldap/README.md openldap/Makefile.inc + diff --git a/collectors/python.d.plugin/openldap/README.md b/collectors/python.d.plugin/openldap/README.md new file mode 100644 index 0000000..b0cd1db --- /dev/null +++ b/collectors/python.d.plugin/openldap/README.md @@ -0,0 +1,79 @@ + + +# OpenLDAP monitoring with Netdata + +Provides statistics information from openldap (slapd) server. +Statistics are taken from LDAP monitoring interface. Manual page, slapd-monitor(5) is available. + +**Requirement:** + +- Follow instructions from to activate monitoring interface. +- Install python ldap module `pip install ldap` or `yum install python-ldap` +- Modify openldap.conf with your credentials + +### Module gives information with following charts: + +1. **connections** + + - total connections number + +2. **Bytes** + + - sent + +3. **operations** + + - completed + - initiated + +4. **referrals** + + - sent + +5. **entries** + + - sent + +6. **ldap operations** + + - bind + - search + - unbind + - add + - delete + - modify + - compare + +7. **waiters** + + - read + - write + +## Configuration + +Edit the `python.d/openldap.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/openldap.conf +``` + +Sample: + +```yaml +openldap: + name : 'local' + username : "cn=monitor,dc=superb,dc=eu" + password : "testpass" + server : 'localhost' + port : 389 +``` + +--- + + diff --git a/collectors/python.d.plugin/openldap/openldap.chart.py b/collectors/python.d.plugin/openldap/openldap.chart.py new file mode 100644 index 0000000..aba1439 --- /dev/null +++ b/collectors/python.d.plugin/openldap/openldap.chart.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +# Description: openldap netdata python.d module +# Author: Manolis Kartsonakis (ekartsonakis) +# SPDX-License-Identifier: GPL-3.0+ + +try: + import ldap + + HAS_LDAP = True +except ImportError: + HAS_LDAP = False + +from bases.FrameworkServices.SimpleService import SimpleService + +DEFAULT_SERVER = 'localhost' +DEFAULT_PORT = '389' +DEFAULT_TLS = False +DEFAULT_CERT_CHECK = True +DEFAULT_TIMEOUT = 1 +DEFAULT_START_TLS = False + +ORDER = [ + 'total_connections', + 'bytes_sent', + 'operations', + 'referrals_sent', + 'entries_sent', + 'ldap_operations', + 'waiters' +] + +CHARTS = { + 'total_connections': { + 'options': [None, 'Total Connections', 'connections/s', 'ldap', 'openldap.total_connections', 'line'], + 'lines': [ + ['total_connections', 'connections', 'incremental'] + ] + }, + 'bytes_sent': { + 'options': [None, 'Traffic', 'KiB/s', 'ldap', 'openldap.traffic_stats', 'line'], + 'lines': [ + ['bytes_sent', 'sent', 'incremental', 1, 1024] + ] + }, + 'operations': { + 'options': [None, 'Operations Status', 'ops/s', 'ldap', 'openldap.operations_status', 'line'], + 'lines': [ + ['completed_operations', 'completed', 'incremental'], + ['initiated_operations', 'initiated', 'incremental'] + ] + }, + 'referrals_sent': { + 'options': [None, 'Referrals', 'referrals/s', 'ldap', 'openldap.referrals', 'line'], + 'lines': [ + ['referrals_sent', 'sent', 'incremental'] + ] + }, + 'entries_sent': { + 'options': [None, 'Entries', 'entries/s', 'ldap', 'openldap.entries', 'line'], + 'lines': [ + ['entries_sent', 'sent', 'incremental'] + ] + }, + 'ldap_operations': { + 'options': [None, 'Operations', 'ops/s', 'ldap', 'openldap.ldap_operations', 'line'], + 'lines': [ + ['bind_operations', 'bind', 'incremental'], + ['search_operations', 'search', 'incremental'], + ['unbind_operations', 'unbind', 'incremental'], + ['add_operations', 'add', 'incremental'], + ['delete_operations', 'delete', 'incremental'], + ['modify_operations', 'modify', 'incremental'], + ['compare_operations', 'compare', 'incremental'] + ] + }, + 'waiters': { + 'options': [None, 'Waiters', 'waiters/s', 'ldap', 'openldap.waiters', 'line'], + 'lines': [ + ['write_waiters', 'write', 'incremental'], + ['read_waiters', 'read', 'incremental'] + ] + }, +} + +# Stuff to gather - make tuples of DN dn and attrib to get +SEARCH_LIST = { + 'total_connections': ( + 'cn=Total,cn=Connections,cn=Monitor', 'monitorCounter', + ), + 'bytes_sent': ( + 'cn=Bytes,cn=Statistics,cn=Monitor', 'monitorCounter', + ), + 'completed_operations': ( + 'cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'initiated_operations': ( + 'cn=Operations,cn=Monitor', 'monitorOpInitiated', + ), + 'referrals_sent': ( + 'cn=Referrals,cn=Statistics,cn=Monitor', 'monitorCounter', + ), + 'entries_sent': ( + 'cn=Entries,cn=Statistics,cn=Monitor', 'monitorCounter', + ), + 'bind_operations': ( + 'cn=Bind,cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'unbind_operations': ( + 'cn=Unbind,cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'add_operations': ( + 'cn=Add,cn=Operations,cn=Monitor', 'monitorOpInitiated', + ), + 'delete_operations': ( + 'cn=Delete,cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'modify_operations': ( + 'cn=Modify,cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'compare_operations': ( + 'cn=Compare,cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'search_operations': ( + 'cn=Search,cn=Operations,cn=Monitor', 'monitorOpCompleted', + ), + 'write_waiters': ( + 'cn=Write,cn=Waiters,cn=Monitor', 'monitorCounter', + ), + 'read_waiters': ( + 'cn=Read,cn=Waiters,cn=Monitor', 'monitorCounter', + ), +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.server = configuration.get('server', DEFAULT_SERVER) + self.port = configuration.get('port', DEFAULT_PORT) + self.username = configuration.get('username') + self.password = configuration.get('password') + self.timeout = configuration.get('timeout', DEFAULT_TIMEOUT) + self.use_tls = configuration.get('use_tls', DEFAULT_TLS) + self.cert_check = configuration.get('cert_check', DEFAULT_CERT_CHECK) + self.use_start_tls = configuration.get('use_start_tls', DEFAULT_START_TLS) + self.alive = False + self.conn = None + + def disconnect(self): + if self.conn: + self.conn.unbind() + self.conn = None + self.alive = False + + def connect(self): + try: + if self.use_tls: + self.conn = ldap.initialize('ldaps://%s:%s' % (self.server, self.port)) + else: + self.conn = ldap.initialize('ldap://%s:%s' % (self.server, self.port)) + self.conn.set_option(ldap.OPT_NETWORK_TIMEOUT, self.timeout) + if (self.use_tls or self.use_start_tls) and not self.cert_check: + self.conn.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER) + if self.use_start_tls or self.use_tls: + self.conn.set_option(ldap.OPT_X_TLS_NEWCTX, 0) + if self.use_start_tls: + self.conn.protocol_version = ldap.VERSION3 + self.conn.start_tls_s() + if self.username and self.password: + self.conn.simple_bind(self.username, self.password) + except ldap.LDAPError as error: + self.error(error) + return False + + self.alive = True + return True + + def reconnect(self): + self.disconnect() + return self.connect() + + def check(self): + if not HAS_LDAP: + self.error("'python-ldap' package is needed") + return None + + return self.connect() and self.get_data() + + def get_data(self): + if not self.alive and not self.reconnect(): + return None + + data = dict() + for key in SEARCH_LIST: + dn = SEARCH_LIST[key][0] + attr = SEARCH_LIST[key][1] + try: + num = self.conn.search(dn, ldap.SCOPE_BASE, 'objectClass=*', [attr, ]) + result_type, result_data = self.conn.result(num, 1) + except ldap.LDAPError as error: + self.error("Empty result. Check bind username/password. Message: ", error) + self.alive = False + return None + + if result_type != 101: + continue + + try: + data[key] = int(list(result_data[0][1].values())[0][0]) + except (ValueError, IndexError) as error: + self.debug(error) + continue + + return data diff --git a/collectors/python.d.plugin/openldap/openldap.conf b/collectors/python.d.plugin/openldap/openldap.conf new file mode 100644 index 0000000..5fd99a5 --- /dev/null +++ b/collectors/python.d.plugin/openldap/openldap.conf @@ -0,0 +1,75 @@ +# netdata python.d.plugin configuration for openldap +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# postfix is slow, so once every 10 seconds +update_every: 10 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# ---------------------------------------------------------------------- +# OPENLDAP EXTRA PARAMETERS + +# Set here your LDAP connection settings + +#username : "cn=admin,dc=example,dc=com" # The bind user with right to access monitor statistics +#password : "yourpass" # The password for the binded user +#server : 'localhost' # The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. +#port : 389 # The listening port of the LDAP server. Change to 636 port in case of TLS connection +#use_tls : False # Make True if a TLS connection is used over ldaps:// +#use_start_tls: False # Make True if a TLS connection is used over ldap:// +#cert_check : True # False if you want to ignore certificate check +#timeout : 1 # Seconds to timeout if no connection exi diff --git a/collectors/python.d.plugin/oracledb/Makefile.inc b/collectors/python.d.plugin/oracledb/Makefile.inc new file mode 100644 index 0000000..ea3a824 --- /dev/null +++ b/collectors/python.d.plugin/oracledb/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += oracledb/oracledb.chart.py +dist_pythonconfig_DATA += oracledb/oracledb.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += oracledb/README.md oracledb/Makefile.inc + diff --git a/collectors/python.d.plugin/oracledb/README.md b/collectors/python.d.plugin/oracledb/README.md new file mode 100644 index 0000000..88024f8 --- /dev/null +++ b/collectors/python.d.plugin/oracledb/README.md @@ -0,0 +1,97 @@ + + +# OracleDB monitoring with Netdata + +Monitors the performance and health metrics of the Oracle database. + +## Requirements + +- `cx_Oracle` package. +- Oracle Client (using `cx_Oracle` requires Oracle Client libraries to be installed). + +It produces following charts: + +- session activity + - Session Count + - Session Limit Usage + - Logons +- disk activity + - Physical Disk Reads/Writes + - Sorts On Disk + - Full Table Scans +- database and buffer activity + - Database Wait Time Ratio + - Shared Pool Free Memory + - In-Memory Sorts Ratio + - SQL Service Response Time + - User Rollbacks + - Enqueue Timeouts +- cache + - Cache Hit Ratio + - Global Cache Blocks Events +- activities + - Activities +- wait time + - Wait Time +- tablespace + - Size + - Usage + - Usage In Percent +- allocated space + - Size + - Usage + - Usage In Percent + +## prerequisite + +To use the Oracle module do the following: + +1. Install `cx_Oracle` package ([link](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html)). + +2. Install Oracle Client libraries + ([link](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html#install-oracle-client)). + +3. Create a read-only `netdata` user with proper access to your Oracle Database Server. + +Connect to your Oracle database with an administrative user and execute: + +``` +ALTER SESSION SET "_ORACLE_SCRIPT"=true; + +CREATE USER netdata IDENTIFIED BY ; + +GRANT CONNECT TO netdata; +GRANT SELECT_CATALOG_ROLE TO netdata; +``` + +## Configuration + +Edit the `python.d/oracledb.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/oracledb.conf +``` + +```yaml +local: + user: 'netdata' + password: 'secret' + server: 'localhost:1521' + service: 'XE' + +remote: + user: 'netdata' + password: 'secret' + server: '10.0.0.1:1521' + service: 'XE' +``` + +All parameters are required. Without them module will fail to start. + + diff --git a/collectors/python.d.plugin/oracledb/oracledb.chart.py b/collectors/python.d.plugin/oracledb/oracledb.chart.py new file mode 100644 index 0000000..28ef8db --- /dev/null +++ b/collectors/python.d.plugin/oracledb/oracledb.chart.py @@ -0,0 +1,831 @@ +# -*- coding: utf-8 -*- +# Description: oracledb netdata python.d module +# Author: ilyam8 (Ilya Mashchenko) +# SPDX-License-Identifier: GPL-3.0-or-later + +from copy import deepcopy + +from bases.FrameworkServices.SimpleService import SimpleService + +try: + import cx_Oracle + + HAS_ORACLE = True +except ImportError: + HAS_ORACLE = False + +ORDER = [ + 'session_count', + 'session_limit_usage', + 'logons', + 'physical_disk_read_write', + 'sorts_on_disk', + 'full_table_scans', + 'database_wait_time_ratio', + 'shared_pool_free_memory', + 'in_memory_sorts_ratio', + 'sql_service_response_time', + 'user_rollbacks', + 'enqueue_timeouts', + 'cache_hit_ratio', + 'global_cache_blocks', + 'activity', + 'wait_time', + 'tablespace_size', + 'tablespace_usage', + 'tablespace_usage_in_percent', + 'allocated_size', + 'allocated_usage', + 'allocated_usage_in_percent', +] + +CHARTS = { + 'session_count': { + 'options': [None, 'Session Count', 'sessions', 'session activity', 'oracledb.session_count', 'line'], + 'lines': [ + ['session_count', 'total', 'absolute', 1, 1000], + ['average_active_sessions', 'active', 'absolute', 1, 1000], + ] + }, + 'session_limit_usage': { + 'options': [None, 'Session Limit Usage', '%', 'session activity', 'oracledb.session_limit_usage', 'area'], + 'lines': [ + ['session_limit_percent', 'usage', 'absolute', 1, 1000], + ] + }, + 'logons': { + 'options': [None, 'Logons', 'events/s', 'session activity', 'oracledb.logons', 'area'], + 'lines': [ + ['logons_per_sec', 'logons', 'absolute', 1, 1000], + ] + }, + 'physical_disk_read_write': { + 'options': [None, 'Physical Disk Reads/Writes', 'events/s', 'disk activity', + 'oracledb.physical_disk_read_writes', 'area'], + 'lines': [ + ['physical_reads_per_sec', 'reads', 'absolute', 1, 1000], + ['physical_writes_per_sec', 'writes', 'absolute', -1, 1000], + ] + }, + 'sorts_on_disk': { + 'options': [None, 'Sorts On Disk', 'events/s', 'disk activity', 'oracledb.sorts_on_disks', 'line'], + 'lines': [ + ['disk_sort_per_sec', 'sorts', 'absolute', 1, 1000], + ] + }, + 'full_table_scans': { + 'options': [None, 'Full Table Scans', 'events/s', 'disk activity', 'oracledb.full_table_scans', 'line'], + 'lines': [ + ['long_table_scans_per_sec', 'full table scans', 'absolute', 1, 1000], + ] + }, + 'database_wait_time_ratio': { + 'options': [None, 'Database Wait Time Ratio', '%', 'database and buffer activity', + 'oracledb.database_wait_time_ratio', 'line'], + 'lines': [ + ['database_wait_time_ratio', 'wait time ratio', 'absolute', 1, 1000], + ] + }, + 'shared_pool_free_memory': { + 'options': [None, 'Shared Pool Free Memory', '%', 'database and buffer activity', + 'oracledb.shared_pool_free_memory', 'line'], + 'lines': [ + ['shared_pool_free_percent', 'free memory', 'absolute', 1, 1000], + ] + }, + 'in_memory_sorts_ratio': { + 'options': [None, 'In-Memory Sorts Ratio', '%', 'database and buffer activity', + 'oracledb.in_memory_sorts_ratio', 'line'], + 'lines': [ + ['memory_sorts_ratio', 'in-memory sorts', 'absolute', 1, 1000], + ] + }, + 'sql_service_response_time': { + 'options': [None, 'SQL Service Response Time', 'seconds', 'database and buffer activity', + 'oracledb.sql_service_response_time', 'line'], + 'lines': [ + ['sql_service_response_time', 'time', 'absolute', 1, 1000], + ] + }, + 'user_rollbacks': { + 'options': [None, 'User Rollbacks', 'events/s', 'database and buffer activity', + 'oracledb.user_rollbacks', 'line'], + 'lines': [ + ['user_rollbacks_per_sec', 'rollbacks', 'absolute', 1, 1000], + ] + }, + 'enqueue_timeouts': { + 'options': [None, 'Enqueue Timeouts', 'events/s', 'database and buffer activity', + 'oracledb.enqueue_timeouts', 'line'], + 'lines': [ + ['enqueue_timeouts_per_sec', 'enqueue timeouts', 'absolute', 1, 1000], + ] + }, + 'cache_hit_ratio': { + 'options': [None, 'Cache Hit Ratio', '%', 'cache', 'oracledb.cache_hit_ration', 'stacked'], + 'lines': [ + ['buffer_cache_hit_ratio', 'buffer', 'absolute', 1, 1000], + ['cursor_cache_hit_ratio', 'cursor', 'absolute', 1, 1000], + ['library_cache_hit_ratio', 'library', 'absolute', 1, 1000], + ['row_cache_hit_ratio', 'row', 'absolute', 1, 1000], + ] + }, + 'global_cache_blocks': { + 'options': [None, 'Global Cache Blocks Events', 'events/s', 'cache', 'oracledb.global_cache_blocks', 'area'], + 'lines': [ + ['global_cache_blocks_corrupted', 'corrupted', 'incremental', 1, 1000], + ['global_cache_blocks_lost', 'lost', 'incremental', 1, 1000], + ] + }, + 'activity': { + 'options': [None, 'Activities', 'events/s', 'activities', 'oracledb.activity', 'stacked'], + 'lines': [ + ['activity_parse_count_total', 'parse count', 'incremental', 1, 1000], + ['activity_execute_count', 'execute count', 'incremental', 1, 1000], + ['activity_user_commits', 'user commits', 'incremental', 1, 1000], + ['activity_user_rollbacks', 'user rollbacks', 'incremental', 1, 1000], + ] + }, + 'wait_time': { + 'options': [None, 'Wait Time', 'ms', 'wait time', 'oracledb.wait_time', 'stacked'], + 'lines': [ + ['wait_time_application', 'application', 'absolute', 1, 1000], + ['wait_time_configuration', 'configuration', 'absolute', 1, 1000], + ['wait_time_administrative', 'administrative', 'absolute', 1, 1000], + ['wait_time_concurrency', 'concurrency', 'absolute', 1, 1000], + ['wait_time_commit', 'commit', 'absolute', 1, 1000], + ['wait_time_network', 'network', 'absolute', 1, 1000], + ['wait_time_user_io', 'user I/O', 'absolute', 1, 1000], + ['wait_time_system_io', 'system I/O', 'absolute', 1, 1000], + ['wait_time_scheduler', 'scheduler', 'absolute', 1, 1000], + ['wait_time_other', 'other', 'absolute', 1, 1000], + ] + }, + 'tablespace_size': { + 'options': [None, 'Size', 'KiB', 'tablespace', 'oracledb.tablespace_size', 'line'], + 'lines': [], + }, + 'tablespace_usage': { + 'options': [None, 'Usage', 'KiB', 'tablespace', 'oracledb.tablespace_usage', 'line'], + 'lines': [], + }, + 'tablespace_usage_in_percent': { + 'options': [None, 'Usage', '%', 'tablespace', 'oracledb.tablespace_usage_in_percent', 'line'], + 'lines': [], + }, + 'allocated_size': { + 'options': [None, 'Size', 'B', 'tablespace', 'oracledb.allocated_size', 'line'], + 'lines': [], + }, + 'allocated_usage': { + 'options': [None, 'Usage', 'B', 'tablespace', 'oracledb.allocated_usage', 'line'], + 'lines': [], + }, + 'allocated_usage_in_percent': { + 'options': [None, 'Usage', '%', 'tablespace', 'oracledb.allocated_usage_in_percent', 'line'], + 'lines': [], + }, +} + +CX_CONNECT_STRING = "{0}/{1}@//{2}/{3}" + +QUERY_SYSTEM = ''' +SELECT + metric_name, + value +FROM + gv$sysmetric +ORDER BY + begin_time +''' +QUERY_TABLESPACE = ''' +SELECT + m.tablespace_name, + m.used_space * t.block_size AS used_bytes, + m.tablespace_size * t.block_size AS max_bytes, + m.used_percent +FROM + dba_tablespace_usage_metrics m + JOIN dba_tablespaces t ON m.tablespace_name = t.tablespace_name +''' +QUERY_ALLOCATED = ''' +SELECT + nvl(b.tablespace_name,nvl(a.tablespace_name,'UNKNOWN')) tablespace_name, + bytes_alloc used_bytes, + bytes_alloc-nvl(bytes_free,0) max_bytes, + ((bytes_alloc-nvl(bytes_free,0))/ bytes_alloc)*100 used_percent +FROM + (SELECT + sum(bytes) bytes_free, + tablespace_name + FROM sys.dba_free_space + GROUP BY tablespace_name + ) a, + (SELECT + sum(bytes) bytes_alloc, + tablespace_name + FROM sys.dba_data_files + GROUP BY tablespace_name + ) b +WHERE a.tablespace_name (+) = b.tablespace_name +''' +QUERY_ACTIVITIES_COUNT = ''' +SELECT + name, + value +FROM + v$sysstat +WHERE + name IN ( + 'parse count (total)', + 'execute count', + 'user commits', + 'user rollbacks' + ) +''' +QUERY_WAIT_TIME = ''' +SELECT + n.wait_class, + round(m.time_waited / m.INTSIZE_CSEC, 3) +FROM + v$waitclassmetric m, + v$system_wait_class n +WHERE + m.wait_class_id = n.wait_class_id + AND n.wait_class != 'Idle' +''' +# QUERY_SESSION_COUNT = ''' +# SELECT +# status, +# type +# FROM +# v$session +# GROUP BY +# status, +# type +# ''' +# QUERY_PROCESSES_COUNT = ''' +# SELECT +# COUNT(*) +# FROM +# v$process +# ''' +# QUERY_PROCESS = ''' +# SELECT +# program, +# pga_used_mem, +# pga_alloc_mem, +# pga_freeable_mem, +# pga_max_mem +# FROM +# gv$process +# ''' + +# PROCESS_METRICS = [ +# 'pga_used_memory', +# 'pga_allocated_memory', +# 'pga_freeable_memory', +# 'pga_maximum_memory', +# ] + + +SYS_METRICS = { + 'Average Active Sessions': 'average_active_sessions', + 'Session Count': 'session_count', + 'Session Limit %': 'session_limit_percent', + 'Logons Per Sec': 'logons_per_sec', + 'Physical Reads Per Sec': 'physical_reads_per_sec', + 'Physical Writes Per Sec': 'physical_writes_per_sec', + 'Disk Sort Per Sec': 'disk_sort_per_sec', + 'Long Table Scans Per Sec': 'long_table_scans_per_sec', + 'Database Wait Time Ratio': 'database_wait_time_ratio', + 'Shared Pool Free %': 'shared_pool_free_percent', + 'Memory Sorts Ratio': 'memory_sorts_ratio', + 'SQL Service Response Time': 'sql_service_response_time', + 'User Rollbacks Per Sec': 'user_rollbacks_per_sec', + 'Enqueue Timeouts Per Sec': 'enqueue_timeouts_per_sec', + 'Buffer Cache Hit Ratio': 'buffer_cache_hit_ratio', + 'Cursor Cache Hit Ratio': 'cursor_cache_hit_ratio', + 'Library Cache Hit Ratio': 'library_cache_hit_ratio', + 'Row Cache Hit Ratio': 'row_cache_hit_ratio', + 'Global Cache Blocks Corrupted': 'global_cache_blocks_corrupted', + 'Global Cache Blocks Lost': 'global_cache_blocks_lost', +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.user = configuration.get('user') + self.password = configuration.get('password') + self.server = configuration.get('server') + self.service = configuration.get('service') + self.alive = False + self.conn = None + self.active_tablespaces = set() + + def connect(self): + if self.conn: + self.conn.close() + self.conn = None + + try: + self.conn = cx_Oracle.connect( + CX_CONNECT_STRING.format( + self.user, + self.password, + self.server, + self.service, + )) + except cx_Oracle.DatabaseError as error: + self.error(error) + return False + + self.alive = True + return True + + def reconnect(self): + return self.connect() + + def check(self): + if not HAS_ORACLE: + self.error("'cx_Oracle' package is needed to use oracledb module") + return False + + if not all([ + self.user, + self.password, + self.server, + self.service, + ]): + self.error("one of these parameters is not specified: user, password, server, service") + return False + + if not self.connect(): + return False + + return bool(self.get_data()) + + def get_data(self): + if not self.alive and not self.reconnect(): + return None + + data = dict() + + # SYSTEM + try: + rv = self.gather_system_metrics() + except cx_Oracle.Error as error: + self.error(error) + self.alive = False + return None + else: + for name, value in rv: + if name not in SYS_METRICS: + continue + data[SYS_METRICS[name]] = int(float(value) * 1000) + + # ACTIVITIES COUNT + try: + rv = self.gather_activities_count() + except cx_Oracle.Error as error: + self.error(error) + self.alive = False + return None + else: + for name, amount in rv: + cleaned = name.replace(' ', '_').replace('(', '').replace(')', '') + new_name = 'activity_{0}'.format(cleaned) + data[new_name] = int(float(amount) * 1000) + + # WAIT TIME + try: + rv = self.gather_wait_time_metrics() + except cx_Oracle.Error as error: + self.error(error) + self.alive = False + return None + else: + for name, amount in rv: + cleaned = name.replace(' ', '_').replace('/', '').lower() + new_name = 'wait_time_{0}'.format(cleaned) + data[new_name] = amount + + # TABLESPACE + try: + rv = self.gather_tablespace_metrics() + except cx_Oracle.Error as error: + self.error(error) + self.alive = False + return None + else: + for name, offline, size, used, used_in_percent in rv: + # TODO: skip offline? + if not (not offline and self.charts): + continue + # TODO: remove inactive? + if name not in self.active_tablespaces: + self.active_tablespaces.add(name) + self.add_tablespace_to_charts(name) + data['{0}_tablespace_size'.format(name)] = int(size * 1000) + data['{0}_tablespace_used'.format(name)] = int(used * 1000) + data['{0}_tablespace_used_in_percent'.format(name)] = int(used_in_percent * 1000) + + # ALLOCATED SPACE + try: + rv = self.gather_allocated_metrics() + except cx_Oracle.Error as error: + self.error(error) + self.alive = False + return None + else: + for name, offline, size, used, used_in_percent in rv: + # TODO: skip offline? + if not (not offline and self.charts): + continue + # TODO: remove inactive? + if name not in self.active_tablespaces: + self.active_tablespaces.add(name) + self.add_tablespace_to_charts(name) + data['{0}_allocated_size'.format(name)] = int(size * 1000) + data['{0}_allocated_used'.format(name)] = int(used * 1000) + data['{0}_allocated_used_in_percent'.format(name)] = int(used_in_percent * 1000) + + return data or None + + def gather_system_metrics(self): + + """ + :return: + + [['Buffer Cache Hit Ratio', 100], + ['Memory Sorts Ratio', 100], + ['Redo Allocation Hit Ratio', 100], + ['User Transaction Per Sec', 0], + ['Physical Reads Per Sec', 0], + ['Physical Reads Per Txn', 0], + ['Physical Writes Per Sec', 0], + ['Physical Writes Per Txn', 0], + ['Physical Reads Direct Per Sec', 0], + ['Physical Reads Direct Per Txn', 0], + ['Physical Writes Direct Per Sec', 0], + ['Physical Writes Direct Per Txn', 0], + ['Physical Reads Direct Lobs Per Sec', 0], + ['Physical Reads Direct Lobs Per Txn', 0], + ['Physical Writes Direct Lobs Per Sec', 0], + ['Physical Writes Direct Lobs Per Txn', 0], + ['Redo Generated Per Sec', Decimal('4.66666666666667')], + ['Redo Generated Per Txn', 280], + ['Logons Per Sec', Decimal('0.0166666666666667')], + ['Logons Per Txn', 1], + ['Open Cursors Per Sec', 0.35], + ['Open Cursors Per Txn', 21], + ['User Commits Per Sec', 0], + ['User Commits Percentage', 0], + ['User Rollbacks Per Sec', 0], + ['User Rollbacks Percentage', 0], + ['User Calls Per Sec', Decimal('0.0333333333333333')], + ['User Calls Per Txn', 2], + ['Recursive Calls Per Sec', 14.15], + ['Recursive Calls Per Txn', 849], + ['Logical Reads Per Sec', Decimal('0.683333333333333')], + ['Logical Reads Per Txn', 41], + ['DBWR Checkpoints Per Sec', 0], + ['Background Checkpoints Per Sec', 0], + ['Redo Writes Per Sec', Decimal('0.0333333333333333')], + ['Redo Writes Per Txn', 2], + ['Long Table Scans Per Sec', 0], + ['Long Table Scans Per Txn', 0], + ['Total Table Scans Per Sec', Decimal('0.0166666666666667')], + ['Total Table Scans Per Txn', 1], + ['Full Index Scans Per Sec', 0], + ['Full Index Scans Per Txn', 0], + ['Total Index Scans Per Sec', Decimal('0.216666666666667')], + ['Total Index Scans Per Txn', 13], + ['Total Parse Count Per Sec', 0.35], + ['Total Parse Count Per Txn', 21], + ['Hard Parse Count Per Sec', 0], + ['Hard Parse Count Per Txn', 0], + ['Parse Failure Count Per Sec', 0], + ['Parse Failure Count Per Txn', 0], + ['Cursor Cache Hit Ratio', Decimal('52.3809523809524')], + ['Disk Sort Per Sec', 0], + ['Disk Sort Per Txn', 0], + ['Rows Per Sort', 8.6], + ['Execute Without Parse Ratio', Decimal('27.5862068965517')], + ['Soft Parse Ratio', 100], + ['User Calls Ratio', Decimal('0.235017626321974')], + ['Host CPU Utilization (%)', Decimal('0.124311845142959')], + ['Network Traffic Volume Per Sec', 0], + ['Enqueue Timeouts Per Sec', 0], + ['Enqueue Timeouts Per Txn', 0], + ['Enqueue Waits Per Sec', 0], + ['Enqueue Waits Per Txn', 0], + ['Enqueue Deadlocks Per Sec', 0], + ['Enqueue Deadlocks Per Txn', 0], + ['Enqueue Requests Per Sec', Decimal('216.683333333333')], + ['Enqueue Requests Per Txn', 13001], + ['DB Block Gets Per Sec', 0], + ['DB Block Gets Per Txn', 0], + ['Consistent Read Gets Per Sec', Decimal('0.683333333333333')], + ['Consistent Read Gets Per Txn', 41], + ['DB Block Changes Per Sec', 0], + ['DB Block Changes Per Txn', 0], + ['Consistent Read Changes Per Sec', 0], + ['Consistent Read Changes Per Txn', 0], + ['CPU Usage Per Sec', 0], + ['CPU Usage Per Txn', 0], + ['CR Blocks Created Per Sec', 0], + ['CR Blocks Created Per Txn', 0], + ['CR Undo Records Applied Per Sec', 0], + ['CR Undo Records Applied Per Txn', 0], + ['User Rollback UndoRec Applied Per Sec', 0], + ['User Rollback Undo Records Applied Per Txn', 0], + ['Leaf Node Splits Per Sec', 0], + ['Leaf Node Splits Per Txn', 0], + ['Branch Node Splits Per Sec', 0], + ['Branch Node Splits Per Txn', 0], + ['PX downgraded 1 to 25% Per Sec', 0], + ['PX downgraded 25 to 50% Per Sec', 0], + ['PX downgraded 50 to 75% Per Sec', 0], + ['PX downgraded 75 to 99% Per Sec', 0], + ['PX downgraded to serial Per Sec', 0], + ['Physical Read Total IO Requests Per Sec', Decimal('2.16666666666667')], + ['Physical Read Total Bytes Per Sec', Decimal('35498.6666666667')], + ['GC CR Block Received Per Second', 0], + ['GC CR Block Received Per Txn', 0], + ['GC Current Block Received Per Second', 0], + ['GC Current Block Received Per Txn', 0], + ['Global Cache Average CR Get Time', 0], + ['Global Cache Average Current Get Time', 0], + ['Physical Write Total IO Requests Per Sec', Decimal('0.966666666666667')], + ['Global Cache Blocks Corrupted', 0], + ['Global Cache Blocks Lost', 0], + ['Current Logons Count', 49], + ['Current Open Cursors Count', 64], + ['User Limit %', Decimal('0.00000114087015416959')], + ['SQL Service Response Time', 0], + ['Database Wait Time Ratio', 0], + ['Database CPU Time Ratio', 0], + ['Response Time Per Txn', 0], + ['Row Cache Hit Ratio', 100], + ['Row Cache Miss Ratio', 0], + ['Library Cache Hit Ratio', 100], + ['Library Cache Miss Ratio', 0], + ['Shared Pool Free %', Decimal('7.82380268491548')], + ['PGA Cache Hit %', Decimal('98.0399767109115')], + ['Process Limit %', Decimal('17.6666666666667')], + ['Session Limit %', Decimal('15.2542372881356')], + ['Executions Per Txn', 29], + ['Executions Per Sec', Decimal('0.483333333333333')], + ['Txns Per Logon', 0], + ['Database Time Per Sec', 0], + ['Physical Write Total Bytes Per Sec', 15308.8], + ['Physical Read IO Requests Per Sec', 0], + ['Physical Read Bytes Per Sec', 0], + ['Physical Write IO Requests Per Sec', 0], + ['Physical Write Bytes Per Sec', 0], + ['DB Block Changes Per User Call', 0], + ['DB Block Gets Per User Call', 0], + ['Executions Per User Call', 14.5], + ['Logical Reads Per User Call', 20.5], + ['Total Sorts Per User Call', 2.5], + ['Total Table Scans Per User Call', 0.5], + ['Current OS Load', 0.0390625], + ['Streams Pool Usage Percentage', 0], + ['PQ QC Session Count', 0], + ['PQ Slave Session Count', 0], + ['Queries parallelized Per Sec', 0], + ['DML statements parallelized Per Sec', 0], + ['DDL statements parallelized Per Sec', 0], + ['PX operations not downgraded Per Sec', 0], + ['Session Count', 72], + ['Average Synchronous Single-Block Read Latency', 0], + ['I/O Megabytes per Second', 0.05], + ['I/O Requests per Second', Decimal('3.13333333333333')], + ['Average Active Sessions', 0], + ['Active Serial Sessions', 1], + ['Active Parallel Sessions', 0], + ['Captured user calls', 0], + ['Replayed user calls', 0], + ['Workload Capture and Replay status', 0], + ['Background CPU Usage Per Sec', Decimal('1.22578833333333')], + ['Background Time Per Sec', 0.0147551], + ['Host CPU Usage Per Sec', Decimal('0.116666666666667')], + ['Cell Physical IO Interconnect Bytes', 3048448], + ['Temp Space Used', 0], + ['Total PGA Allocated', 200657920], + ['Total PGA Used by SQL Workareas', 0], + ['Run Queue Per Sec', 0], + ['VM in bytes Per Sec', 0], + ['VM out bytes Per Sec', 0]] + """ + + metrics = list() + with self.conn.cursor() as cursor: + cursor.execute(QUERY_SYSTEM) + for metric_name, value in cursor.fetchall(): + metrics.append([metric_name, value]) + return metrics + + def gather_tablespace_metrics(self): + """ + :return: + + [['SYSTEM', 874250240.0, 3233169408.0, 27.040038107400033, 0], + ['SYSAUX', 498860032.0, 3233169408.0, 15.429443033997678, 0], + ['TEMP', 0.0, 3233177600.0, 0.0, 0], + ['USERS', 1048576.0, 3233169408.0, 0.03243182981397305, 0]] + """ + metrics = list() + with self.conn.cursor() as cursor: + cursor.execute(QUERY_TABLESPACE) + for tablespace_name, used_bytes, max_bytes, used_percent in cursor.fetchall(): + if used_bytes is None: + offline = True + used = 0 + else: + offline = False + used = float(used_bytes) + if max_bytes is None: + size = 0 + else: + size = float(max_bytes) + if used_percent is None: + used_percent = 0 + else: + used_percent = float(used_percent) + metrics.append( + [ + tablespace_name, + offline, + size, + used, + used_percent, + ] + ) + return metrics + + def gather_allocated_metrics(self): + """ + :return: + + [['SYSTEM', 874250240.0, 3233169408.0, 27.040038107400033, 0], + ['SYSAUX', 498860032.0, 3233169408.0, 15.429443033997678, 0], + ['TEMP', 0.0, 3233177600.0, 0.0, 0], + ['USERS', 1048576.0, 3233169408.0, 0.03243182981397305, 0]] + """ + metrics = list() + with self.conn.cursor() as cursor: + cursor.execute(QUERY_ALLOCATED) + for tablespace_name, used_bytes, max_bytes, used_percent in cursor.fetchall(): + if used_bytes is None: + offline = True + used = 0 + else: + offline = False + used = float(used_bytes) + if max_bytes is None: + size = 0 + else: + size = float(max_bytes) + if used_percent is None: + used_percent = 0 + else: + used_percent = float(used_percent) + metrics.append( + [ + tablespace_name, + offline, + size, + used, + used_percent, + ] + ) + return metrics + + def gather_wait_time_metrics(self): + """ + :return: + + [['Other', 0], + ['Application', 0], + ['Configuration', 0], + ['Administrative', 0], + ['Concurrency', 0], + ['Commit', 0], + ['Network', 0], + ['User I/O', 0], + ['System I/O', 0.002], + ['Scheduler', 0]] + """ + metrics = list() + with self.conn.cursor() as cursor: + cursor.execute(QUERY_WAIT_TIME) + for wait_class_name, value in cursor.fetchall(): + metrics.append([wait_class_name, value]) + return metrics + + def gather_activities_count(self): + """ + :return: + + [('user commits', 9104), + ('user rollbacks', 17), + ('parse count (total)', 483695), + ('execute count', 2020356)] + """ + with self.conn.cursor() as cursor: + cursor.execute(QUERY_ACTIVITIES_COUNT) + return cursor.fetchall() + + # def gather_process_metrics(self): + # """ + # :return: + # + # [['PSEUDO', 'pga_used_memory', 0], + # ['PSEUDO', 'pga_allocated_memory', 0], + # ['PSEUDO', 'pga_freeable_memory', 0], + # ['PSEUDO', 'pga_maximum_memory', 0], + # ['oracle@localhost.localdomain (PMON)', 'pga_used_memory', 1793827], + # ['oracle@localhost.localdomain (PMON)', 'pga_allocated_memory', 1888651], + # ['oracle@localhost.localdomain (PMON)', 'pga_freeable_memory', 0], + # ['oracle@localhost.localdomain (PMON)', 'pga_maximum_memory', 1888651], + # ... + # ... + # """ + # + # metrics = list() + # with self.conn.cursor() as cursor: + # cursor.execute(QUERY_PROCESS) + # for row in cursor.fetchall(): + # for i, name in enumerate(PROCESS_METRICS, 1): + # metrics.append([row[0], name, row[i]]) + # return metrics + + # def gather_processes_count(self): + # with self.conn.cursor() as cursor: + # cursor.execute(QUERY_PROCESSES_COUNT) + # return cursor.fetchone()[0] # 53 + + # def gather_sessions_count(self): + # with self.conn.cursor() as cursor: + # cursor.execute(QUERY_SESSION_COUNT) + # total, active, inactive = 0, 0, 0 + # for status, _ in cursor.fetchall(): + # total += 1 + # active += status == 'ACTIVE' + # inactive += status == 'INACTIVE' + # return [total, active, inactive] + + def add_tablespace_to_charts(self, name): + self.charts['tablespace_size'].add_dimension( + [ + '{0}_tablespace_size'.format(name), + name, + 'absolute', + 1, + 1024 * 1000, + ]) + self.charts['tablespace_usage'].add_dimension( + [ + '{0}_tablespace_used'.format(name), + name, + 'absolute', + 1, + 1024 * 1000, + ]) + self.charts['tablespace_usage_in_percent'].add_dimension( + [ + '{0}_tablespace_used_in_percent'.format(name), + name, + 'absolute', + 1, + 1000, + ]) + self.charts['allocated_size'].add_dimension( + [ + '{0}_allocated_size'.format(name), + name, + 'absolute', + 1, + 1000, + ]) + self.charts['allocated_usage'].add_dimension( + [ + '{0}_allocated_used'.format(name), + name, + 'absolute', + 1, + 1000, + ]) + self.charts['allocated_usage_in_percent'].add_dimension( + [ + '{0}_allocated_used_in_percent'.format(name), + name, + 'absolute', + 1, + 1000, + ]) diff --git a/collectors/python.d.plugin/oracledb/oracledb.conf b/collectors/python.d.plugin/oracledb/oracledb.conf new file mode 100644 index 0000000..6257172 --- /dev/null +++ b/collectors/python.d.plugin/oracledb/oracledb.conf @@ -0,0 +1,84 @@ +# netdata python.d.plugin configuration for oracledb +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, oracledb also supports the following: +# +# user: username # the username for the user account. Required. +# password: password # the password for the user account. Required. +# server: localhost:1521 # the IP address or hostname of the Oracle Database Server. Required. +# service: XE # the Oracle Database service name. Required. To view the services available on your server, +# run this query: `SELECT value FROM v$parameter WHERE name='service_names'`. +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +#local: +# user: 'netdata' +# password: 'secret' +# server: 'localhost:1521' +# service: 'XE' + +#remote: +# user: 'netdata' +# password: 'secret' +# server: '10.0.0.1:1521' +# service: 'XE' \ No newline at end of file diff --git a/collectors/python.d.plugin/pandas/Makefile.inc b/collectors/python.d.plugin/pandas/Makefile.inc new file mode 100644 index 0000000..9f4f9b3 --- /dev/null +++ b/collectors/python.d.plugin/pandas/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += pandas/pandas.chart.py +dist_pythonconfig_DATA += pandas/pandas.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += pandas/README.md pandas/Makefile.inc + diff --git a/collectors/python.d.plugin/pandas/README.md b/collectors/python.d.plugin/pandas/README.md new file mode 100644 index 0000000..1415494 --- /dev/null +++ b/collectors/python.d.plugin/pandas/README.md @@ -0,0 +1,92 @@ + + +# Pandas Netdata Collector + + + Pandas + + +A python collector using [pandas](https://pandas.pydata.org/) to pull data and do pandas based +preprocessing before feeding to Netdata. + +## Requirements + +This collector depends on some Python (Python 3 only) packages that can usually be installed via `pip` or `pip3`. + +```bash +sudo pip install pandas requests +``` + +## Configuration + +Below is an example configuration to query some json weather data from [Open-Meteo](https://open-meteo.com), +do some data wrangling on it and save in format as expected by Netdata. + +```yaml +# example pulling some hourly temperature data +temperature: + name: "temperature" + update_every: 3 + chart_configs: + - name: "temperature_by_city" + title: "Temperature By City" + family: "temperature.today" + context: "pandas.temperature" + type: "line" + units: "Celsius" + df_steps: > + pd.DataFrame.from_dict( + {city: requests.get( + f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly=temperature_2m' + ).json()['hourly']['temperature_2m'] + for (city,lat,lng) + in [ + ('dublin', 53.3441, -6.2675), + ('athens', 37.9792, 23.7166), + ('london', 51.5002, -0.1262), + ('berlin', 52.5235, 13.4115), + ('paris', 48.8567, 2.3510), + ] + } + ); # use dictionary comprehension to make multiple requests; + df.describe(); # get aggregate stats for each city; + df.transpose()[['mean', 'max', 'min']].reset_index(); # just take mean, min, max; + df.rename(columns={'index':'city'}); # some column renaming; + df.pivot(columns='city').mean().to_frame().reset_index(); # force to be one row per city; + df.rename(columns={0:'degrees'}); # some column renaming; + pd.concat([df, df['city']+'_'+df['level_0']], axis=1); # add new column combining city and summary measurement label; + df.rename(columns={0:'measurement'}); # some column renaming; + df[['measurement', 'degrees']].set_index('measurement'); # just take two columns we want; + df.sort_index(); # sort by city name; + df.transpose(); # transpose so its just one wide row; +``` + +`chart_configs` is a list of dictionary objects where each one defines the sequence of `df_steps` to be run using [`pandas`](https://pandas.pydata.org/), +and the `name`, `title` etc to define the +[CHART variables](https://learn.netdata.cloud/docs/agent/collectors/python.d.plugin#global-variables-order-and-chart) +that will control how the results will look in netdata. + +The example configuration above would result in a `data` dictionary like the below being collected by Netdata +at each time step. They keys in this dictionary will be the +[dimension](https://learn.netdata.cloud/docs/agent/web#dimensions) names on the chart. + +```javascript +{'athens_max': 26.2, 'athens_mean': 19.45952380952381, 'athens_min': 12.2, 'berlin_max': 17.4, 'berlin_mean': 10.764285714285714, 'berlin_min': 5.7, 'dublin_max': 15.3, 'dublin_mean': 12.008928571428571, 'dublin_min': 6.6, 'london_max': 18.9, 'london_mean': 12.510714285714286, 'london_min': 5.2, 'paris_max': 19.4, 'paris_mean': 12.054166666666665, 'paris_min': 4.8} +``` + +Which, given the above configuration would end up as a chart like below in Netdata. + +![pandas collector temperature example chart](https://user-images.githubusercontent.com/2178292/195075312-8ce8cf68-5172-48e3-af09-104ffecfcdd6.png) + +## Notes +- Each line in `df_steps` must return a pandas +[DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) object (`df`) at each step. +- You can use +[this colab notebook](https://colab.research.google.com/drive/1VYrddSegZqGtkWGFuiUbMbUk5f3rW6Hi?usp=sharing) +to mock up and work on your `df_steps` iteratively before adding them to your config. +- This collector is expecting one row in the final pandas DataFrame. It is that first row that will be taken +as the most recent values for each dimension on each chart using (`df.to_dict(orient='records')[0]`). +See [pd.to_dict()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_dict.html). diff --git a/collectors/python.d.plugin/pandas/pandas.chart.py b/collectors/python.d.plugin/pandas/pandas.chart.py new file mode 100644 index 0000000..8eb4452 --- /dev/null +++ b/collectors/python.d.plugin/pandas/pandas.chart.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# Description: pandas netdata python.d module +# Author: Andrew Maguire (andrewm4894) +# SPDX-License-Identifier: GPL-3.0-or-later + +import pandas as pd + +try: + import requests + HAS_REQUESTS = True +except ImportError: + HAS_REQUESTS = False + +from bases.FrameworkServices.SimpleService import SimpleService + +ORDER = [] + +CHARTS = {} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.chart_configs = self.configuration.get('chart_configs', None) + self.line_sep = self.configuration.get('line_sep', ';') + + def run_code(self, df_steps): + """eval() each line of code and ensure the result is a pandas dataframe""" + + # process each line of code + lines = df_steps.split(self.line_sep) + for line in lines: + line_clean = line.strip('\n').strip(' ') + if line_clean != '' and line_clean[0] != '#': + df = eval(line_clean) + assert isinstance(df, pd.DataFrame), 'The result of each evaluated line of `df_steps` must be of type `pd.DataFrame`' + + # take top row of final df as data to be collected by netdata + data = df.to_dict(orient='records')[0] + + return data + + def check(self): + """ensure charts and dims all configured and that we can get data""" + + if not HAS_REQUESTS: + self.warn('requests library could not be imported') + + if not self.chart_configs: + self.error('chart_configs must be defined') + + data = dict() + + # add each chart as defined by the config + for chart_config in self.chart_configs: + if chart_config['name'] not in self.charts: + chart_template = { + 'options': [ + chart_config['name'], + chart_config['title'], + chart_config['units'], + chart_config['family'], + chart_config['context'], + chart_config['type'] + ], + 'lines': [] + } + self.charts.add_chart([chart_config['name']] + chart_template['options']) + + data_tmp = self.run_code(chart_config['df_steps']) + data.update(data_tmp) + + for dim in data_tmp: + self.charts[chart_config['name']].add_dimension([dim, dim, 'absolute', 1, 1]) + + return True + + def get_data(self): + """get data for each chart config""" + + data = dict() + + for chart_config in self.chart_configs: + data_tmp = self.run_code(chart_config['df_steps']) + data.update(data_tmp) + + return data diff --git a/collectors/python.d.plugin/pandas/pandas.conf b/collectors/python.d.plugin/pandas/pandas.conf new file mode 100644 index 0000000..6684af9 --- /dev/null +++ b/collectors/python.d.plugin/pandas/pandas.conf @@ -0,0 +1,191 @@ +# netdata python.d.plugin configuration for pandas +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear on the dashboard +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, example also supports the following: +# +# num_lines: 4 # the number of lines to create +# lower: 0 # the lower bound of numbers to randomly sample from +# upper: 100 # the upper bound of numbers to randomly sample from +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS + +# Some example configurations, enable this collector, uncomment and example below and restart netdata to enable. + +# example pulling some hourly temperature data, a chart for today forecast (mean,min,max) and another chart for current. +# temperature: +# name: "temperature" +# update_every: 5 +# chart_configs: +# - name: "temperature_forecast_by_city" +# title: "Temperature By City - Today Forecast" +# family: "temperature.today" +# context: "pandas.temperature" +# type: "line" +# units: "Celsius" +# df_steps: > +# pd.DataFrame.from_dict( +# {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly=temperature_2m').json()['hourly']['temperature_2m'] +# for (city,lat,lng) +# in [ +# ('dublin', 53.3441, -6.2675), +# ('athens', 37.9792, 23.7166), +# ('london', 51.5002, -0.1262), +# ('berlin', 52.5235, 13.4115), +# ('paris', 48.8567, 2.3510), +# ('madrid', 40.4167, -3.7033), +# ('new_york', 40.71, -74.01), +# ('los_angeles', 34.05, -118.24), +# ] +# } +# ); +# df.describe(); # get aggregate stats for each city; +# df.transpose()[['mean', 'max', 'min']].reset_index(); # just take mean, min, max; +# df.rename(columns={'index':'city'}); # some column renaming; +# df.pivot(columns='city').mean().to_frame().reset_index(); # force to be one row per city; +# df.rename(columns={0:'degrees'}); # some column renaming; +# pd.concat([df, df['city']+'_'+df['level_0']], axis=1); # add new column combining city and summary measurement label; +# df.rename(columns={0:'measurement'}); # some column renaming; +# df[['measurement', 'degrees']].set_index('measurement'); # just take two columns we want; +# df.sort_index(); # sort by city name; +# df.transpose(); # transpose so its just one wide row; +# - name: "temperature_current_by_city" +# title: "Temperature By City - Current" +# family: "temperature.current" +# context: "pandas.temperature" +# type: "line" +# units: "Celsius" +# df_steps: > +# pd.DataFrame.from_dict( +# {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}¤t_weather=true').json()['current_weather'] +# for (city,lat,lng) +# in [ +# ('dublin', 53.3441, -6.2675), +# ('athens', 37.9792, 23.7166), +# ('london', 51.5002, -0.1262), +# ('berlin', 52.5235, 13.4115), +# ('paris', 48.8567, 2.3510), +# ('madrid', 40.4167, -3.7033), +# ('new_york', 40.71, -74.01), +# ('los_angeles', 34.05, -118.24), +# ] +# } +# ); +# df.transpose(); +# df[['temperature']]; +# df.transpose(); + +# example showing a read_csv from a url and some light pandas data wrangling. +# pull data in csv format from london demo server and then ratio of user cpus over system cpu averaged over last 60 seconds. +# example_csv: +# name: "example_csv" +# update_every: 2 +# chart_configs: +# - name: "london_system_cpu" +# title: "London System CPU - Ratios" +# family: "london_system_cpu" +# context: "pandas" +# type: "line" +# units: "n" +# df_steps: > +# pd.read_csv('https://london.my-netdata.io/api/v1/data?chart=system.cpu&format=csv&after=-60', storage_options={'User-Agent': 'netdata'}); +# df.drop('time', axis=1); +# df.mean().to_frame().transpose(); +# df.apply(lambda row: (row.user / row.system), axis = 1).to_frame(); +# df.rename(columns={0:'average_user_system_ratio'}); +# df*100; + +# example showing a read_json from a url and some light pandas data wrangling. +# pull data in json format (using requests.get() if json data is too complex for pd.read_json() ) from london demo server and work out 'total_bandwidth'. +# example_json: +# name: "example_json" +# update_every: 2 +# chart_configs: +# - name: "london_system_net" +# title: "London System Net - Total Bandwidth" +# family: "london_system_net" +# context: "pandas" +# type: "area" +# units: "kilobits/s" +# df_steps: > +# pd.DataFrame(requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['data'], columns=requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['labels']); +# df.drop('time', axis=1); +# abs(df); +# df.sum(axis=1).to_frame(); +# df.rename(columns={0:'total_bandwidth'}); + +# example showing a read_xml from a url and some light pandas data wrangling. +# pull weather forecast data in xml format, use xpath to pull out temperature forecast. +# example_xml: +# name: "example_xml" +# update_every: 2 +# line_sep: "|" +# chart_configs: +# - name: "temperature_forcast" +# title: "Temperature Forecast" +# family: "temp" +# context: "pandas.temp" +# type: "line" +# units: "celsius" +# df_steps: > +# pd.read_xml('http://metwdb-openaccess.ichec.ie/metno-wdb2ts/locationforecast?lat=54.7210798611;long=-8.7237392806', xpath='./product/time[1]/location/temperature', parser='etree')| +# df.rename(columns={'value': 'dublin'})| +# df[['dublin']]| \ No newline at end of file diff --git a/collectors/python.d.plugin/postfix/Makefile.inc b/collectors/python.d.plugin/postfix/Makefile.inc new file mode 100644 index 0000000..f4091b2 --- /dev/null +++ b/collectors/python.d.plugin/postfix/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += postfix/postfix.chart.py +dist_pythonconfig_DATA += postfix/postfix.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += postfix/README.md postfix/Makefile.inc + diff --git a/collectors/python.d.plugin/postfix/README.md b/collectors/python.d.plugin/postfix/README.md new file mode 100644 index 0000000..1a546c6 --- /dev/null +++ b/collectors/python.d.plugin/postfix/README.md @@ -0,0 +1,36 @@ + + +# Postfix monitoring with Netdata + +Monitors MTA email queue statistics using [postqueue](http://www.postfix.org/postqueue.1.html) tool. + +The collector executes `postqueue -p` to get Postfix queue statistics. + +## Requirements + +Postfix has internal access controls that limit activities on the mail queue. By default, all users are allowed to view +the queue. If your system is configured with stricter access controls, you need to grant the `netdata` user access to +view the mail queue. In order to do it, add `netdata` to `authorized_mailq_users` in the `/etc/postfix/main.cf` file. + +See the `authorized_mailq_users` setting in +the [Postfix documentation](https://www.postfix.org/postconf.5.html) for more details. + +## Charts + +It produces only two charts: + +1. **Postfix Queue Emails** + + - emails + +2. **Postfix Queue Emails Size** in KB + + - size + +## Configuration + +Configuration is not needed. diff --git a/collectors/python.d.plugin/postfix/postfix.chart.py b/collectors/python.d.plugin/postfix/postfix.chart.py new file mode 100644 index 0000000..b650514 --- /dev/null +++ b/collectors/python.d.plugin/postfix/postfix.chart.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# Description: postfix netdata python.d module +# Author: Pawel Krupa (paulfantom) +# SPDX-License-Identifier: GPL-3.0-or-later + +from bases.FrameworkServices.ExecutableService import ExecutableService + +POSTQUEUE_COMMAND = 'postqueue -p' + +ORDER = [ + 'qemails', + 'qsize', +] + +CHARTS = { + 'qemails': { + 'options': [None, 'Postfix Queue Emails', 'emails', 'queue', 'postfix.qemails', 'line'], + 'lines': [ + ['emails', None, 'absolute'] + ] + }, + 'qsize': { + 'options': [None, 'Postfix Queue Emails Size', 'KiB', 'queue', 'postfix.qsize', 'area'], + 'lines': [ + ['size', None, 'absolute'] + ] + } +} + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.command = POSTQUEUE_COMMAND + + def _get_data(self): + """ + Format data received from shell command + :return: dict + """ + try: + raw = self._get_raw_data()[-1].split(' ') + if raw[0] == 'Mail' and raw[1] == 'queue': + return {'emails': 0, + 'size': 0} + + return {'emails': raw[4], + 'size': raw[1]} + except (ValueError, AttributeError): + return None diff --git a/collectors/python.d.plugin/postfix/postfix.conf b/collectors/python.d.plugin/postfix/postfix.conf new file mode 100644 index 0000000..a4d2472 --- /dev/null +++ b/collectors/python.d.plugin/postfix/postfix.conf @@ -0,0 +1,72 @@ +# netdata python.d.plugin configuration for postfix +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# postfix is slow, so once every 10 seconds +update_every: 10 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, postfix also supports the following: +# +# command: 'postqueue -p' # the command to run +# + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS + +local: + command: 'postqueue -p' diff --git a/collectors/python.d.plugin/proxysql/Makefile.inc b/collectors/python.d.plugin/proxysql/Makefile.inc new file mode 100644 index 0000000..66be372 --- /dev/null +++ b/collectors/python.d.plugin/proxysql/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += proxysql/proxysql.chart.py +dist_pythonconfig_DATA += proxysql/proxysql.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += proxysql/README.md proxysql/Makefile.inc + diff --git a/collectors/python.d.plugin/proxysql/README.md b/collectors/python.d.plugin/proxysql/README.md new file mode 100644 index 0000000..8c6a394 --- /dev/null +++ b/collectors/python.d.plugin/proxysql/README.md @@ -0,0 +1,106 @@ + + +# ProxySQL monitoring with Netdata + +Monitors database backend and frontend performance metrics. + +## Requirements + +- python library [MySQLdb](https://github.com/PyMySQL/mysqlclient-python) (faster) or [PyMySQL](https://github.com/PyMySQL/PyMySQL) (slower) +- `netdata` local user to connect to the ProxySQL server. + +To create the `netdata` user, follow [the documentation](https://github.com/sysown/proxysql/wiki/Users-configuration#creating-a-new-user). + +## Charts + +It produces: + +1. **Connections (frontend)** + + - connected: number of frontend connections currently connected + - aborted: number of frontend connections aborted due to invalid credential or max_connections reached + - non_idle: number of frontend connections that are not currently idle + - created: number of frontend connections created + +2. **Questions (frontend)** + + - questions: total number of queries sent from frontends + - slow_queries: number of queries that ran for longer than the threshold in milliseconds defined in global variable `mysql-long_query_time` + +3. **Overall Bandwidth (backends)** + + - in + - out + +4. **Status (backends)** + + - Backends + - `1=ONLINE`: backend server is fully operational + - `2=SHUNNED`: backend sever is temporarily taken out of use because of either too many connection errors in a time that was too short, or replication lag exceeded the allowed threshold + - `3=OFFLINE_SOFT`: when a server is put into OFFLINE_SOFT mode, new incoming connections aren't accepted anymore, while the existing connections are kept until they became inactive. In other words, connections are kept in use until the current transaction is completed. This allows to gracefully detach a backend + - `4=OFFLINE_HARD`: when a server is put into OFFLINE_HARD mode, the existing connections are dropped, while new incoming connections aren't accepted either. This is equivalent to deleting the server from a hostgroup, or temporarily taking it out of the hostgroup for maintenance work + - `-1`: Unknown status + +5. **Bandwidth (backends)** + + - Backends + - in + - out + +6. **Queries (backends)** + + - Backends + - queries + +7. **Latency (backends)** + + - Backends + - ping time + +8. **Pool connections (backends)** + + - Backends + - Used: The number of connections are currently used by ProxySQL for sending queries to the backend server. + - Free: The number of connections are currently free. + - Established/OK: The number of connections were established successfully. + - Error: The number of connections weren't established successfully. + +9. **Commands** + + - Commands + - Count + - Duration (Total duration for each command) + +10. **Commands Histogram** + + - Commands + - 100us, 500us, ..., 10s, inf: the total number of commands of the given type which executed within the specified time limit and the previous one. + +## Configuration + +Edit the `python.d/proxysql.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/proxysql.conf +``` + +```yaml +tcpipv4: + name : 'local' + user : 'stats' + pass : 'stats' + host : '127.0.0.1' + port : '6032' +``` + +If no configuration is given, module will fail to run. + +--- + + diff --git a/collectors/python.d.plugin/proxysql/proxysql.chart.py b/collectors/python.d.plugin/proxysql/proxysql.chart.py new file mode 100644 index 0000000..982c28e --- /dev/null +++ b/collectors/python.d.plugin/proxysql/proxysql.chart.py @@ -0,0 +1,352 @@ +# -*- coding: utf-8 -*- +# Description: Proxysql netdata python.d module +# Author: Ali Borhani (alibo) +# SPDX-License-Identifier: GPL-3.0+ + +from bases.FrameworkServices.MySQLService import MySQLService + + +def query(table, *params): + return 'SELECT {params} FROM {table}'.format(table=table, params=', '.join(params)) + + +# https://github.com/sysown/proxysql/blob/master/doc/admin_tables.md#stats_mysql_global +QUERY_GLOBAL = query( + "stats_mysql_global", + "Variable_Name", + "Variable_Value" +) + +# https://github.com/sysown/proxysql/blob/master/doc/admin_tables.md#stats_mysql_connection_pool +QUERY_CONNECTION_POOL = query( + "stats_mysql_connection_pool", + "hostgroup", + "srv_host", + "srv_port", + "status", + "ConnUsed", + "ConnFree", + "ConnOK", + "ConnERR", + "Queries", + "Bytes_data_sent", + "Bytes_data_recv", + "Latency_us" +) + +# https://github.com/sysown/proxysql/blob/master/doc/admin_tables.md#stats_mysql_commands_counters +QUERY_COMMANDS = query( + "stats_mysql_commands_counters", + "Command", + "Total_Time_us", + "Total_cnt", + "cnt_100us", + "cnt_500us", + "cnt_1ms", + "cnt_5ms", + "cnt_10ms", + "cnt_50ms", + "cnt_100ms", + "cnt_500ms", + "cnt_1s", + "cnt_5s", + "cnt_10s", + "cnt_INFs" +) + +GLOBAL_STATS = [ + 'client_connections_aborted', + 'client_connections_connected', + 'client_connections_created', + 'client_connections_non_idle', + 'proxysql_uptime', + 'questions', + 'slow_queries' +] + +CONNECTION_POOL_STATS = [ + 'status', + 'connused', + 'connfree', + 'connok', + 'connerr', + 'queries', + 'bytes_data_sent', + 'bytes_data_recv', + 'latency_us' +] + +ORDER = [ + 'connections', + 'active_transactions', + 'questions', + 'pool_overall_net', + 'commands_count', + 'commands_duration', + 'pool_status', + 'pool_net', + 'pool_queries', + 'pool_latency', + 'pool_connection_used', + 'pool_connection_free', + 'pool_connection_ok', + 'pool_connection_error' +] + +HISTOGRAM_ORDER = [ + '100us', + '500us', + '1ms', + '5ms', + '10ms', + '50ms', + '100ms', + '500ms', + '1s', + '5s', + '10s', + 'inf' +] + +STATUS = { + "ONLINE": 1, + "SHUNNED": 2, + "OFFLINE_SOFT": 3, + "OFFLINE_HARD": 4 +} + +CHARTS = { + 'pool_status': { + 'options': [None, 'ProxySQL Backend Status', 'status', 'status', 'proxysql.pool_status', 'line'], + 'lines': [] + }, + 'pool_net': { + 'options': [None, 'ProxySQL Backend Bandwidth', 'kilobits/s', 'bandwidth', 'proxysql.pool_net', 'area'], + 'lines': [] + }, + 'pool_overall_net': { + 'options': [None, 'ProxySQL Backend Overall Bandwidth', 'kilobits/s', 'overall_bandwidth', + 'proxysql.pool_overall_net', 'area'], + 'lines': [ + ['bytes_data_recv', 'in', 'incremental', 8, 1000], + ['bytes_data_sent', 'out', 'incremental', -8, 1000] + ] + }, + 'questions': { + 'options': [None, 'ProxySQL Frontend Questions', 'questions/s', 'questions', 'proxysql.questions', 'line'], + 'lines': [ + ['questions', 'questions', 'incremental'], + ['slow_queries', 'slow_queries', 'incremental'] + ] + }, + 'pool_queries': { + 'options': [None, 'ProxySQL Backend Queries', 'queries/s', 'queries', 'proxysql.queries', 'line'], + 'lines': [] + }, + 'active_transactions': { + 'options': [None, 'ProxySQL Frontend Active Transactions', 'transactions/s', 'active_transactions', + 'proxysql.active_transactions', 'line'], + 'lines': [ + ['active_transactions', 'active_transactions', 'absolute'] + ] + }, + 'pool_latency': { + 'options': [None, 'ProxySQL Backend Latency', 'milliseconds', 'latency', 'proxysql.latency', 'line'], + 'lines': [] + }, + 'connections': { + 'options': [None, 'ProxySQL Frontend Connections', 'connections/s', 'connections', 'proxysql.connections', + 'line'], + 'lines': [ + ['client_connections_connected', 'connected', 'absolute'], + ['client_connections_created', 'created', 'incremental'], + ['client_connections_aborted', 'aborted', 'incremental'], + ['client_connections_non_idle', 'non_idle', 'absolute'] + ] + }, + 'pool_connection_used': { + 'options': [None, 'ProxySQL Used Connections', 'connections', 'pool_connections', + 'proxysql.pool_used_connections', 'line'], + 'lines': [] + }, + 'pool_connection_free': { + 'options': [None, 'ProxySQL Free Connections', 'connections', 'pool_connections', + 'proxysql.pool_free_connections', 'line'], + 'lines': [] + }, + 'pool_connection_ok': { + 'options': [None, 'ProxySQL Established Connections', 'connections', 'pool_connections', + 'proxysql.pool_ok_connections', 'line'], + 'lines': [] + }, + 'pool_connection_error': { + 'options': [None, 'ProxySQL Error Connections', 'connections', 'pool_connections', + 'proxysql.pool_error_connections', 'line'], + 'lines': [] + }, + 'commands_count': { + 'options': [None, 'ProxySQL Commands', 'commands', 'commands', 'proxysql.commands_count', 'line'], + 'lines': [] + }, + 'commands_duration': { + 'options': [None, 'ProxySQL Commands Duration', 'milliseconds', 'commands', 'proxysql.commands_duration', + 'line'], + 'lines': [] + } +} + + +class Service(MySQLService): + def __init__(self, configuration=None, name=None): + MySQLService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.queries = dict( + global_status=QUERY_GLOBAL, + connection_pool_status=QUERY_CONNECTION_POOL, + commands_status=QUERY_COMMANDS + ) + + def _get_data(self): + raw_data = self._get_raw_data(description=True) + + if not raw_data: + return None + + to_netdata = dict() + + if 'global_status' in raw_data: + global_status = dict(raw_data['global_status'][0]) + for key in global_status: + if key.lower() in GLOBAL_STATS: + to_netdata[key.lower()] = global_status[key] + + if 'connection_pool_status' in raw_data: + + to_netdata['bytes_data_recv'] = 0 + to_netdata['bytes_data_sent'] = 0 + + for record in raw_data['connection_pool_status'][0]: + backend = self.generate_backend(record) + name = self.generate_backend_name(backend) + + for key in backend: + if key in CONNECTION_POOL_STATS: + if key == 'status': + backend[key] = self.convert_status(backend[key]) + + if len(self.charts) > 0: + if (name + '_status') not in self.charts['pool_status']: + self.add_backend_dimensions(name) + + to_netdata["{0}_{1}".format(name, key)] = backend[key] + + if key == 'bytes_data_recv': + to_netdata['bytes_data_recv'] += int(backend[key]) + + if key == 'bytes_data_sent': + to_netdata['bytes_data_sent'] += int(backend[key]) + + if 'commands_status' in raw_data: + for record in raw_data['commands_status'][0]: + cmd = self.generate_command_stats(record) + name = cmd['name'] + + if len(self.charts) > 0: + if (name + '_count') not in self.charts['commands_count']: + self.add_command_dimensions(name) + self.add_histogram_chart(cmd) + + to_netdata[name + '_count'] = cmd['count'] + to_netdata[name + '_duration'] = cmd['duration'] + for histogram in cmd['histogram']: + dimId = 'commands_histogram_{0}_{1}'.format(name, histogram) + to_netdata[dimId] = cmd['histogram'][histogram] + + return to_netdata or None + + def add_backend_dimensions(self, name): + self.charts['pool_status'].add_dimension([name + '_status', name, 'absolute']) + self.charts['pool_net'].add_dimension([name + '_bytes_data_recv', 'from_' + name, 'incremental', 8, 1024]) + self.charts['pool_net'].add_dimension([name + '_bytes_data_sent', 'to_' + name, 'incremental', -8, 1024]) + self.charts['pool_queries'].add_dimension([name + '_queries', name, 'incremental']) + self.charts['pool_latency'].add_dimension([name + '_latency_us', name, 'absolute', 1, 1000]) + self.charts['pool_connection_used'].add_dimension([name + '_connused', name, 'absolute']) + self.charts['pool_connection_free'].add_dimension([name + '_connfree', name, 'absolute']) + self.charts['pool_connection_ok'].add_dimension([name + '_connok', name, 'incremental']) + self.charts['pool_connection_error'].add_dimension([name + '_connerr', name, 'incremental']) + + def add_command_dimensions(self, cmd): + self.charts['commands_count'].add_dimension([cmd + '_count', cmd, 'incremental']) + self.charts['commands_duration'].add_dimension([cmd + '_duration', cmd, 'incremental', 1, 1000]) + + def add_histogram_chart(self, cmd): + chart = self.charts.add_chart(self.histogram_chart(cmd)) + + for histogram in HISTOGRAM_ORDER: + dimId = 'commands_histogram_{0}_{1}'.format(cmd['name'], histogram) + chart.add_dimension([dimId, histogram, 'incremental']) + + @staticmethod + def histogram_chart(cmd): + return [ + 'commands_histogram_' + cmd['name'], + None, + 'ProxySQL {0} Command Histogram'.format(cmd['name'].title()), + 'commands', + 'commands_histogram', + 'proxysql.commands_histogram_' + cmd['name'], + 'stacked' + ] + + @staticmethod + def generate_backend(data): + return { + 'hostgroup': data[0], + 'srv_host': data[1], + 'srv_port': data[2], + 'status': data[3], + 'connused': data[4], + 'connfree': data[5], + 'connok': data[6], + 'connerr': data[7], + 'queries': data[8], + 'bytes_data_sent': data[9], + 'bytes_data_recv': data[10], + 'latency_us': data[11] + } + + @staticmethod + def generate_command_stats(data): + return { + 'name': data[0].lower(), + 'duration': data[1], + 'count': data[2], + 'histogram': { + '100us': data[3], + '500us': data[4], + '1ms': data[5], + '5ms': data[6], + '10ms': data[7], + '50ms': data[8], + '100ms': data[9], + '500ms': data[10], + '1s': data[11], + '5s': data[12], + '10s': data[13], + 'inf': data[14] + } + } + + @staticmethod + def generate_backend_name(backend): + hostgroup = backend['hostgroup'].replace(' ', '_').lower() + host = backend['srv_host'].replace('.', '_') + + return "{0}_{1}_{2}".format(hostgroup, host, backend['srv_port']) + + @staticmethod + def convert_status(status): + if status in STATUS: + return STATUS[status] + return -1 diff --git a/collectors/python.d.plugin/proxysql/proxysql.conf b/collectors/python.d.plugin/proxysql/proxysql.conf new file mode 100644 index 0000000..3c503a8 --- /dev/null +++ b/collectors/python.d.plugin/proxysql/proxysql.conf @@ -0,0 +1,116 @@ +# netdata python.d.plugin configuration for ProxySQL +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, proxysql also supports the following: +# +# host: 'IP or HOSTNAME' # the host to connect to +# port: PORT # the port to connect to +# +# in all cases, the following can also be set: +# +# user: 'username' # the proxysql username to use +# pass: 'password' # the proxysql password to use +# + +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +tcp: + name : 'local' + user : 'stats' + pass : 'stats' + host : 'localhost' + port : '6032' + +tcpipv4: + name : 'local' + user : 'stats' + pass : 'stats' + host : '127.0.0.1' + port : '6032' + +tcpipv6: + name : 'local' + user : 'stats' + pass : 'stats' + host : '::1' + port : '6032' + +tcp_admin: + name : 'local' + user : 'admin' + pass : 'admin' + host : 'localhost' + port : '6032' + +tcpipv4_admin: + name : 'local' + user : 'admin' + pass : 'admin' + host : '127.0.0.1' + port : '6032' + +tcpipv6_admin: + name : 'local' + user : 'admin' + pass : 'admin' + host : '::1' + port : '6032' diff --git a/collectors/python.d.plugin/puppet/Makefile.inc b/collectors/python.d.plugin/puppet/Makefile.inc new file mode 100644 index 0000000..fe94b92 --- /dev/null +++ b/collectors/python.d.plugin/puppet/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += puppet/puppet.chart.py +dist_pythonconfig_DATA += puppet/puppet.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += puppet/README.md puppet/Makefile.inc + diff --git a/collectors/python.d.plugin/puppet/README.md b/collectors/python.d.plugin/puppet/README.md new file mode 100644 index 0000000..1b06d18 --- /dev/null +++ b/collectors/python.d.plugin/puppet/README.md @@ -0,0 +1,67 @@ + + +# Puppet monitoring with Netdata + +Monitor status of Puppet Server and Puppet DB. + +Following charts are drawn: + +1. **JVM Heap** + + - committed (allocated from OS) + - used (actual use) + +2. **JVM Non-Heap** + + - committed (allocated from OS) + - used (actual use) + +3. **CPU Usage** + + - execution + - GC (taken by garbage collection) + +4. **File Descriptors** + + - max + - used + +## Configuration + +Edit the `python.d/puppet.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/puppet.conf +``` + +```yaml +puppetdb: + url: 'https://fqdn.example.com:8081' + tls_cert_file: /path/to/client.crt + tls_key_file: /path/to/client.key + autodetection_retry: 1 + +puppetserver: + url: 'https://fqdn.example.com:8140' + autodetection_retry: 1 +``` + +When no configuration is given, module uses `https://fqdn.example.com:8140`. + +### notes + +- Exact Fully Qualified Domain Name of the node should be used. +- Usually Puppet Server/DB startup time is VERY long. So, there should + be quite reasonable retry count. +- Secure PuppetDB config may require client certificate. Not applies + to default PuppetDB configuration though. + +--- + + diff --git a/collectors/python.d.plugin/puppet/puppet.chart.py b/collectors/python.d.plugin/puppet/puppet.chart.py new file mode 100644 index 0000000..f8adf60 --- /dev/null +++ b/collectors/python.d.plugin/puppet/puppet.chart.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +# Description: puppet netdata python.d module +# Author: Andrey Galkin (andvgal) +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This module should work both with OpenSource and PE versions +# of PuppetServer and PuppetDB. +# +# NOTE: PuppetDB may be configured to require proper TLS +# client certificate for security reasons. Use tls_key_file +# and tls_cert_file options then. +# + +import socket +from json import loads + +from bases.FrameworkServices.UrlService import UrlService + +update_every = 5 + +MiB = 1 << 20 +CPU_SCALE = 1000 + +ORDER = [ + 'jvm_heap', + 'jvm_nonheap', + 'cpu', + 'fd_open', +] + +CHARTS = { + 'jvm_heap': { + 'options': [None, 'JVM Heap', 'MiB', 'resources', 'puppet.jvm', 'area'], + 'lines': [ + ['jvm_heap_committed', 'committed', 'absolute', 1, MiB], + ['jvm_heap_used', 'used', 'absolute', 1, MiB], + ], + 'variables': [ + ['jvm_heap_max'], + ['jvm_heap_init'], + ], + }, + 'jvm_nonheap': { + 'options': [None, 'JVM Non-Heap', 'MiB', 'resources', 'puppet.jvm', 'area'], + 'lines': [ + ['jvm_nonheap_committed', 'committed', 'absolute', 1, MiB], + ['jvm_nonheap_used', 'used', 'absolute', 1, MiB], + ], + 'variables': [ + ['jvm_nonheap_max'], + ['jvm_nonheap_init'], + ], + }, + 'cpu': { + 'options': [None, 'CPU usage', 'percentage', 'resources', 'puppet.cpu', 'stacked'], + 'lines': [ + ['cpu_time', 'execution', 'absolute', 1, CPU_SCALE], + ['gc_time', 'GC', 'absolute', 1, CPU_SCALE], + ] + }, + 'fd_open': { + 'options': [None, 'File Descriptors', 'descriptors', 'resources', 'puppet.fdopen', 'line'], + 'lines': [ + ['fd_used', 'used', 'absolute'], + ], + 'variables': [ + ['fd_max'], + ], + }, +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.url = 'https://{0}:8140'.format(socket.getfqdn()) + + def _get_data(self): + # NOTE: there are several ways to retrieve data + # 1. Only PE versions: + # https://puppet.com/docs/pe/2018.1/api_status/status_api_metrics_endpoints.html + # 2. Individual Metrics API (JMX): + # https://puppet.com/docs/pe/2018.1/api_status/metrics_api.html + # 3. Extended status at debug level: + # https://puppet.com/docs/pe/2018.1/api_status/status_api_json_endpoints.html + # + # For sake of simplicity and efficiency the status one is used.. + + raw_data = self._get_raw_data(self.url + '/status/v1/services?level=debug') + + if raw_data is None: + return None + + raw_data = loads(raw_data) + data = {} + + try: + try: + jvm_metrics = raw_data['status-service']['status']['experimental']['jvm-metrics'] + except KeyError: + jvm_metrics = raw_data['status-service']['status']['jvm-metrics'] + + heap_mem = jvm_metrics['heap-memory'] + non_heap_mem = jvm_metrics['non-heap-memory'] + + for k in ['max', 'committed', 'used', 'init']: + data['jvm_heap_' + k] = heap_mem[k] + data['jvm_nonheap_' + k] = non_heap_mem[k] + + fd_open = jvm_metrics['file-descriptors'] + data['fd_max'] = fd_open['max'] + data['fd_used'] = fd_open['used'] + + data['cpu_time'] = int(jvm_metrics['cpu-usage'] * CPU_SCALE) + data['gc_time'] = int(jvm_metrics['gc-cpu-usage'] * CPU_SCALE) + except KeyError: + pass + + return data or None diff --git a/collectors/python.d.plugin/puppet/puppet.conf b/collectors/python.d.plugin/puppet/puppet.conf new file mode 100644 index 0000000..ff5c3d0 --- /dev/null +++ b/collectors/python.d.plugin/puppet/puppet.conf @@ -0,0 +1,94 @@ +# netdata python.d.plugin configuration for Puppet Server and Puppet DB +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# These configuration comes from UrlService base: +# url: # HTTP or HTTPS URL +# tls_verify: False # Control HTTPS server certificate verification +# tls_ca_file: # Optional CA (bundle) file to use +# tls_cert_file: # Optional client certificate file +# tls_key_file: # Optional client key file +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# puppet: +# url: 'https://:8140' +# + +# +# Production configuration should look like below. +# +# NOTE: usually Puppet Server/DB startup time is VERY long. So, there should +# be quite reasonable retry count. +# +# NOTE: secure PuppetDB config may require client certificate. +# Not applies to default PuppetDB configuration though. +# +# puppetdb: +# url: 'https://fqdn.example.com:8081' +# tls_cert_file: /path/to/client.crt +# tls_key_file: /path/to/client.key +# autodetection_retry: 1 +# +# puppetserver: +# url: 'https://fqdn.example.com:8140' +# autodetection_retry: 1 +# diff --git a/collectors/python.d.plugin/python.d.conf b/collectors/python.d.plugin/python.d.conf new file mode 100644 index 0000000..7b43ee2 --- /dev/null +++ b/collectors/python.d.plugin/python.d.conf @@ -0,0 +1,84 @@ +# netdata python.d.plugin configuration +# +# This file is in YaML format. +# Generally the format is: +# +# name: value +# + +# Enable / disable the whole python.d.plugin (all its modules) +enabled: yes + +# ---------------------------------------------------------------------- +# Enable / Disable python.d.plugin modules +#default_run: yes +# +# If "default_run" = "yes" the default for all modules is enabled (yes). +# Setting any of these to "no" will disable it. +# +# If "default_run" = "no" the default for all modules is disabled (no). +# Setting any of these to "yes" will enable it. + +# Enable / Disable explicit garbage collection (full collection run). Default is enabled. +gc_run: yes + +# Garbage collection interval in seconds. Default is 300. +gc_interval: 300 + +# adaptec_raid: yes +# alarms: yes +# am2320: yes +# anomalies: no +# beanstalk: yes +# bind_rndc: yes +# boinc: yes +# ceph: yes +# changefinder: no +# dockerd: yes +# dovecot: yes + +# this is just an example +example: no + +# exim: yes +# fail2ban: yes +# gearman: yes +go_expvar: no + +# haproxy: yes +# hddtemp: yes +hpssa: no +# icecast: yes +# ipfs: yes +# litespeed: yes +logind: no +# megacli: yes +# memcached: yes +# mongodb: yes +# monit: yes +# nvidia_smi: yes +# nsd: yes +# ntpd: yes +# openldap: yes +# oracledb: yes +# pandas: yes +# postfix: yes +# proxysql: yes +# puppet: yes +# rabbitmq: yes +# rethinkdbs: yes +# retroshare: yes +# riakkv: yes +# samba: yes +# sensors: yes +# smartd_log: yes +# spigotmc: yes +# springboot: yes +# squid: yes +# traefik: yes +# tomcat: yes +# tor: yes +# uwsgi: yes +# varnish: yes +# w1sensor: yes +# zscores: no diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in new file mode 100644 index 0000000..681ceb4 --- /dev/null +++ b/collectors/python.d.plugin/python.d.plugin.in @@ -0,0 +1,922 @@ +#!/usr/bin/env bash +'''':; +pybinary=$(which python3 || which python || which python2) +filtered=() +for arg in "$@" +do + case $arg in + -p*) pybinary=${arg:2} + shift 1 ;; + *) filtered+=("$arg") ;; + esac +done +if [ "$pybinary" = "" ] +then + echo "ERROR python IS NOT AVAILABLE IN THIS SYSTEM" + exit 1 +fi +exec "$pybinary" "$0" "${filtered[@]}" # ''' + +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (l2isbad) +# SPDX-License-Identifier: GPL-3.0-or-later + +import collections +import copy +import gc +import json +import os +import pprint +import re +import sys +import threading +import time +import types + +try: + from queue import Queue +except ImportError: + from Queue import Queue + +PY_VERSION = sys.version_info[:2] # (major=3, minor=7, micro=3, releaselevel='final', serial=0) + +if PY_VERSION > (3, 1): + from importlib.machinery import SourceFileLoader +else: + from imp import load_source as SourceFileLoader + +ENV_NETDATA_USER_CONFIG_DIR = 'NETDATA_USER_CONFIG_DIR' +ENV_NETDATA_STOCK_CONFIG_DIR = 'NETDATA_STOCK_CONFIG_DIR' +ENV_NETDATA_PLUGINS_DIR = 'NETDATA_PLUGINS_DIR' +ENV_NETDATA_USER_PLUGINS_DIRS = 'NETDATA_USER_PLUGINS_DIRS' +ENV_NETDATA_LIB_DIR = 'NETDATA_LIB_DIR' +ENV_NETDATA_UPDATE_EVERY = 'NETDATA_UPDATE_EVERY' +ENV_NETDATA_LOCK_DIR = 'NETDATA_LOCK_DIR' + + +def add_pythond_packages(): + pluginsd = os.getenv(ENV_NETDATA_PLUGINS_DIR, os.path.dirname(__file__)) + pythond = os.path.abspath(pluginsd + '/../python.d') + packages = os.path.join(pythond, 'python_modules') + sys.path.append(packages) + + +add_pythond_packages() + +from bases.collection import safe_print +from bases.loggers import PythonDLogger +from bases.loaders import load_config +from third_party import filelock + +try: + from collections import OrderedDict +except ImportError: + from third_party.ordereddict import OrderedDict + + +def dirs(): + var_lib = os.getenv( + ENV_NETDATA_LIB_DIR, + '@varlibdir_POST@', + ) + plugin_user_config = os.getenv( + ENV_NETDATA_USER_CONFIG_DIR, + '@configdir_POST@', + ) + plugin_stock_config = os.getenv( + ENV_NETDATA_STOCK_CONFIG_DIR, + '@libconfigdir_POST@', + ) + pluginsd = os.getenv( + ENV_NETDATA_PLUGINS_DIR, + os.path.dirname(__file__), + ) + locks = os.getenv( + ENV_NETDATA_LOCK_DIR, + os.path.join('@varlibdir_POST@', 'lock') + ) + modules_user_config = os.path.join(plugin_user_config, 'python.d') + modules_stock_config = os.path.join(plugin_stock_config, 'python.d') + modules = os.path.abspath(pluginsd + '/../python.d') + user_modules = [os.path.join(p, 'python.d') for p in + os.getenv(ENV_NETDATA_USER_PLUGINS_DIRS, "").split(" ") if + p] + + Dirs = collections.namedtuple( + 'Dirs', + [ + 'plugin_user_config', + 'plugin_stock_config', + 'modules_user_config', + 'modules_stock_config', + 'modules', + 'user_modules', + 'var_lib', + 'locks', + ] + ) + return Dirs( + plugin_user_config, + plugin_stock_config, + modules_user_config, + modules_stock_config, + modules, + user_modules, + var_lib, + locks, + ) + + +DIRS = dirs() + +IS_ATTY = sys.stdout.isatty() or sys.stderr.isatty() + +MODULE_SUFFIX = '.chart.py' + + +def find_available_modules(*directories): + AvailableModule = collections.namedtuple( + 'AvailableModule', + [ + 'filepath', + 'name', + ] + ) + available = list() + for d in directories: + try: + if not os.path.isdir(d): + continue + files = sorted(os.listdir(d)) + except OSError: + continue + modules = [m for m in files if m.endswith(MODULE_SUFFIX)] + available.extend([AvailableModule(os.path.join(d, m), m[:-len(MODULE_SUFFIX)]) for m in modules]) + + return available + + +def available_modules(): + obsolete = ( + 'apache_cache', # replaced by web_log + 'cpuidle', # rewritten in C + 'cpufreq', # rewritten in C + 'gunicorn_log', # replaced by web_log + 'linux_power_supply', # rewritten in C + 'nginx_log', # replaced by web_log + 'mdstat', # rewritten in C + 'sslcheck', # rewritten in Go, memory leak bug https://github.com/netdata/netdata/issues/5624 + 'unbound', # rewritten in Go + ) + + stock = [m for m in find_available_modules(DIRS.modules) if m.name not in obsolete] + user = find_available_modules(*DIRS.user_modules) + + available, seen = list(), set() + for m in user + stock: + if m.name in seen: + continue + seen.add(m.name) + available.append(m) + + return available + + +AVAILABLE_MODULES = available_modules() + +JOB_BASE_CONF = { + 'update_every': int(os.getenv(ENV_NETDATA_UPDATE_EVERY, 1)), + 'priority': 60000, + 'autodetection_retry': 0, + 'chart_cleanup': 10, + 'penalty': True, + 'name': str(), +} + +PLUGIN_BASE_CONF = { + 'enabled': True, + 'default_run': True, + 'gc_run': True, + 'gc_interval': 300, +} + + +def multi_path_find(name, *paths): + for path in paths: + abs_name = os.path.join(path, name) + if os.path.isfile(abs_name): + return abs_name + return str() + + +def load_module(name, filepath): + module = SourceFileLoader('pythond_' + name, filepath) + if isinstance(module, types.ModuleType): + return module + return module.load_module() + + +class ModuleConfig: + def __init__(self, name, config=None): + self.name = name + self.config = config or OrderedDict() + + def load(self, abs_path): + self.config.update(load_config(abs_path) or dict()) + + def defaults(self): + keys = ( + 'update_every', + 'priority', + 'autodetection_retry', + 'chart_cleanup', + 'penalty', + ) + return dict((k, self.config[k]) for k in keys if k in self.config) + + def create_job(self, job_name, job_config=None): + job_config = job_config or dict() + + config = OrderedDict() + config.update(job_config) + config['job_name'] = job_name + for k, v in self.defaults().items(): + config.setdefault(k, v) + + return config + + def job_names(self): + return [v for v in self.config if isinstance(self.config.get(v), dict)] + + def single_job(self): + return [self.create_job(self.name, self.config)] + + def multi_job(self): + return [self.create_job(n, self.config[n]) for n in self.job_names()] + + def create_jobs(self): + return self.multi_job() or self.single_job() + + +class JobsConfigsBuilder: + def __init__(self, config_dirs): + self.config_dirs = config_dirs + self.log = PythonDLogger() + self.job_defaults = None + self.module_defaults = None + self.min_update_every = None + + def load_module_config(self, module_name): + name = '{0}.conf'.format(module_name) + self.log.debug("[{0}] looking for '{1}' in {2}".format(module_name, name, self.config_dirs)) + config = ModuleConfig(module_name) + + abs_path = multi_path_find(name, *self.config_dirs) + if not abs_path: + self.log.warning("[{0}] '{1}' was not found".format(module_name, name)) + return config + + self.log.debug("[{0}] loading '{1}'".format(module_name, abs_path)) + try: + config.load(abs_path) + except Exception as error: + self.log.error("[{0}] error on loading '{1}' : {2}".format(module_name, abs_path, repr(error))) + return None + + self.log.debug("[{0}] '{1}' is loaded".format(module_name, abs_path)) + return config + + @staticmethod + def apply_defaults(jobs, defaults): + if defaults is None: + return + for k, v in defaults.items(): + for job in jobs: + job.setdefault(k, v) + + def set_min_update_every(self, jobs, min_update_every): + if min_update_every is None: + return + for job in jobs: + if 'update_every' in job and job['update_every'] < self.min_update_every: + job['update_every'] = self.min_update_every + + def build(self, module_name): + config = self.load_module_config(module_name) + if config is None: + return None + + configs = config.create_jobs() + self.log.info("[{0}] built {1} job(s) configs".format(module_name, len(configs))) + + self.apply_defaults(configs, self.module_defaults) + self.apply_defaults(configs, self.job_defaults) + self.set_min_update_every(configs, self.min_update_every) + + return configs + + +JOB_STATUS_ACTIVE = 'active' +JOB_STATUS_RECOVERING = 'recovering' +JOB_STATUS_DROPPED = 'dropped' +JOB_STATUS_INIT = 'initial' + + +class Job(threading.Thread): + inf = -1 + + def __init__(self, service, module_name, config): + threading.Thread.__init__(self) + self.daemon = True + self.service = service + self.module_name = module_name + self.config = config + self.real_name = config['job_name'] + self.actual_name = config['override_name'] or self.real_name + self.autodetection_retry = config['autodetection_retry'] + self.checks = self.inf + self.job = None + self.status = JOB_STATUS_INIT + + def is_inited(self): + return self.job is not None + + def init(self): + self.job = self.service(configuration=copy.deepcopy(self.config)) + + def full_name(self): + return self.job.name + + def check(self): + ok = self.job.check() + self.checks -= self.checks != self.inf and not ok + return ok + + def create(self): + self.job.create() + + def need_to_recheck(self): + return self.autodetection_retry != 0 and self.checks != 0 + + def run(self): + self.job.run() + + +class ModuleSrc: + def __init__(self, m): + self.name = m.name + self.filepath = m.filepath + self.src = None + + def load(self): + self.src = load_module(self.name, self.filepath) + + def get(self, key): + return getattr(self.src, key, None) + + def service(self): + return self.get('Service') + + def defaults(self): + keys = ( + 'update_every', + 'priority', + 'autodetection_retry', + 'chart_cleanup', + 'penalty', + ) + return dict((k, self.get(k)) for k in keys if self.get(k) is not None) + + def is_disabled_by_default(self): + return bool(self.get('disabled_by_default')) + + +class JobsStatuses: + def __init__(self): + self.items = OrderedDict() + + def dump(self): + return json.dumps(self.items, indent=2) + + def get(self, module_name, job_name): + if module_name not in self.items: + return None + return self.items[module_name].get(job_name) + + def has(self, module_name, job_name): + return self.get(module_name, job_name) is not None + + def from_file(self, path): + with open(path) as f: + data = json.load(f) + return self.from_json(data) + + @staticmethod + def from_json(items): + if not isinstance(items, dict): + raise Exception('items obj has wrong type : {0}'.format(type(items))) + if not items: + return JobsStatuses() + + v = OrderedDict() + for mod_name in sorted(items): + if not items[mod_name]: + continue + v[mod_name] = OrderedDict() + for job_name in sorted(items[mod_name]): + v[mod_name][job_name] = items[mod_name][job_name] + + rv = JobsStatuses() + rv.items = v + return rv + + @staticmethod + def from_jobs(jobs): + v = OrderedDict() + for job in jobs: + status = job.status + if status not in (JOB_STATUS_ACTIVE, JOB_STATUS_RECOVERING): + continue + if job.module_name not in v: + v[job.module_name] = OrderedDict() + v[job.module_name][job.real_name] = status + + rv = JobsStatuses() + rv.items = v + return rv + + +class StdoutSaver: + @staticmethod + def save(dump): + print(dump) + + +class CachedFileSaver: + def __init__(self, path): + self.last_save_success = False + self.last_saved_dump = str() + self.path = path + + def save(self, dump): + if self.last_save_success and self.last_saved_dump == dump: + return + try: + with open(self.path, 'w') as out: + out.write(dump) + except Exception: + self.last_save_success = False + raise + self.last_saved_dump = dump + self.last_save_success = True + + +class PluginConfig(dict): + def __init__(self, *args): + dict.__init__(self, *args) + + def is_module_explicitly_enabled(self, module_name): + return self._is_module_enabled(module_name, True) + + def is_module_enabled(self, module_name): + return self._is_module_enabled(module_name, False) + + def _is_module_enabled(self, module_name, explicit): + if module_name in self: + return self[module_name] + if explicit: + return False + return self['default_run'] + + +class FileLockRegistry: + def __init__(self, path): + self.path = path + self.locks = dict() + + @staticmethod + def rename(name): + # go version name is 'docker' + if name.startswith("dockerd"): + name = "docker" + name[7:] + return name + + + def register(self, name): + name = self.rename(name) + if name in self.locks: + return + file = os.path.join(self.path, '{0}.collector.lock'.format(name)) + lock = filelock.FileLock(file) + lock.acquire(timeout=0) + self.locks[name] = lock + + def unregister(self, name): + name = self.rename(name) + if name not in self.locks: + return + lock = self.locks[name] + lock.release() + del self.locks[name] + + +class DummyRegistry: + def register(self, name): + pass + + def unregister(self, name): + pass + + +class Plugin: + config_name = 'python.d.conf' + jobs_status_dump_name = 'pythond-jobs-statuses.json' + + def __init__(self, modules_to_run, min_update_every, registry): + self.modules_to_run = modules_to_run + self.min_update_every = min_update_every + self.config = PluginConfig(PLUGIN_BASE_CONF) + self.log = PythonDLogger() + self.registry = registry + self.started_jobs = collections.defaultdict(dict) + self.jobs = list() + self.saver = None + self.runs = 0 + + def load_config_file(self, filepath, expected): + self.log.debug("looking for '{0}'".format(filepath)) + if not os.path.isfile(filepath): + log = self.log.info if not expected else self.log.error + log("'{0}' was not found".format(filepath)) + return dict() + try: + config = load_config(filepath) + except Exception as error: + self.log.error("error on loading '{0}' : {1}".format(filepath, repr(error))) + return dict() + self.log.debug("'{0}' is loaded".format(filepath)) + return config + + def load_config(self): + user_config = self.load_config_file( + filepath=os.path.join(DIRS.plugin_user_config, self.config_name), + expected=False, + ) + stock_config = self.load_config_file( + filepath=os.path.join(DIRS.plugin_stock_config, self.config_name), + expected=True, + ) + self.config.update(stock_config) + self.config.update(user_config) + + def load_job_statuses(self): + self.log.debug("looking for '{0}' in {1}".format(self.jobs_status_dump_name, DIRS.var_lib)) + abs_path = multi_path_find(self.jobs_status_dump_name, DIRS.var_lib) + if not abs_path: + self.log.warning("'{0}' was not found".format(self.jobs_status_dump_name)) + return + + self.log.debug("loading '{0}'".format(abs_path)) + try: + statuses = JobsStatuses().from_file(abs_path) + except Exception as error: + self.log.error("[{0}] config file invalid YAML format: {1}".format( + module_name, ' '.join([v.strip() for v in str(error).split('\n')]))) + return None + self.log.debug("'{0}' is loaded".format(abs_path)) + return statuses + + def create_jobs(self, job_statuses=None): + paths = [ + DIRS.modules_user_config, + DIRS.modules_stock_config, + ] + + builder = JobsConfigsBuilder(paths) + builder.job_defaults = JOB_BASE_CONF + builder.min_update_every = self.min_update_every + + jobs = list() + for m in self.modules_to_run: + if not self.config.is_module_enabled(m.name): + self.log.info("[{0}] is disabled in the configuration file, skipping it".format(m.name)) + continue + + src = ModuleSrc(m) + try: + src.load() + except Exception as error: + self.log.warning("[{0}] error on loading source : {1}, skipping it".format(m.name, repr(error))) + continue + self.log.debug("[{0}] loaded module source : '{1}'".format(m.name, m.filepath)) + + if not (src.service() and callable(src.service())): + self.log.warning("[{0}] has no callable Service object, skipping it".format(m.name)) + continue + + if src.is_disabled_by_default() and not self.config.is_module_explicitly_enabled(m.name): + self.log.info("[{0}] is disabled by default, skipping it".format(m.name)) + continue + + builder.module_defaults = src.defaults() + configs = builder.build(m.name) + if not configs: + self.log.info("[{0}] has no job configs, skipping it".format(m.name)) + continue + + for config in configs: + config['job_name'] = re.sub(r'\s+', '_', config['job_name']) + config['override_name'] = re.sub(r'\s+', '_', config.pop('name')) + + job = Job(src.service(), m.name, config) + + was_previously_active = job_statuses and job_statuses.has(job.module_name, job.real_name) + if was_previously_active and job.autodetection_retry == 0: + self.log.debug('{0}[{1}] was previously active, applying recovering settings'.format( + job.module_name, job.real_name)) + job.checks = 11 + job.autodetection_retry = 30 + + jobs.append(job) + + return jobs + + def setup(self): + self.load_config() + + if not self.config['enabled']: + self.log.info('disabled in the configuration file') + return False + + statuses = self.load_job_statuses() + + self.jobs = self.create_jobs(statuses) + if not self.jobs: + self.log.info('no jobs to run') + return False + + if not IS_ATTY: + abs_path = os.path.join(DIRS.var_lib, self.jobs_status_dump_name) + self.saver = CachedFileSaver(abs_path) + return True + + def start_jobs(self, *jobs): + for job in jobs: + if job.status not in (JOB_STATUS_INIT, JOB_STATUS_RECOVERING): + continue + + if job.actual_name in self.started_jobs[job.module_name]: + self.log.info('{0}[{1}] : already served by another job, skipping it'.format( + job.module_name, job.real_name)) + job.status = JOB_STATUS_DROPPED + continue + + if not job.is_inited(): + try: + job.init() + except Exception as error: + self.log.warning("{0}[{1}] : unhandled exception on init : {2}, skipping the job".format( + job.module_name, job.real_name, repr(error))) + job.status = JOB_STATUS_DROPPED + continue + + try: + ok = job.check() + except Exception as error: + self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format( + job.module_name, job.real_name, repr(error))) + job.status = JOB_STATUS_DROPPED + continue + if not ok: + self.log.info('{0}[{1}] : check failed'.format(job.module_name, job.real_name)) + job.status = JOB_STATUS_RECOVERING if job.need_to_recheck() else JOB_STATUS_DROPPED + continue + self.log.info('{0}[{1}] : check success'.format(job.module_name, job.real_name)) + + try: + self.registry.register(job.full_name()) + except filelock.Timeout as error: + self.log.info('{0}[{1}] : already registered by another process, skipping the job ({2})'.format( + job.module_name, job.real_name, error)) + job.status = JOB_STATUS_DROPPED + continue + except Exception as error: + self.log.warning('{0}[{1}] : registration failed: {2}, skipping the job'.format( + job.module_name, job.real_name, error)) + job.status = JOB_STATUS_DROPPED + continue + + try: + job.create() + except Exception as error: + self.log.warning("{0}[{1}] : unhandled exception on create : {2}, skipping the job".format( + job.module_name, job.real_name, repr(error))) + job.status = JOB_STATUS_DROPPED + try: + self.registry.unregister(job.full_name()) + except Exception as error: + self.log.warning('{0}[{1}] : deregistration failed: {2}'.format( + job.module_name, job.real_name, error)) + continue + + self.started_jobs[job.module_name] = job.actual_name + job.status = JOB_STATUS_ACTIVE + job.start() + + @staticmethod + def keep_alive(): + if not IS_ATTY: + safe_print('\n') + + def garbage_collection(self): + if self.config['gc_run'] and self.runs % self.config['gc_interval'] == 0: + v = gc.collect() + self.log.debug('GC collection run result: {0}'.format(v)) + + def restart_recovering_jobs(self): + for job in self.jobs: + if job.status != JOB_STATUS_RECOVERING: + continue + if self.runs % job.autodetection_retry != 0: + continue + self.start_jobs(job) + + def cleanup_jobs(self): + self.jobs = [j for j in self.jobs if j.status != JOB_STATUS_DROPPED] + + def have_alive_jobs(self): + return next( + (True for job in self.jobs if job.status in (JOB_STATUS_RECOVERING, JOB_STATUS_ACTIVE)), + False, + ) + + def save_job_statuses(self): + if self.saver is None: + return + if self.runs % 10 != 0: + return + dump = JobsStatuses().from_jobs(self.jobs).dump() + try: + self.saver.save(dump) + except Exception as error: + self.log.error("error on saving jobs statuses dump : {0}".format(repr(error))) + + def serve_once(self): + if not self.have_alive_jobs(): + self.log.info('no jobs to serve') + return False + + time.sleep(1) + self.runs += 1 + + self.keep_alive() + self.garbage_collection() + self.cleanup_jobs() + self.restart_recovering_jobs() + self.save_job_statuses() + return True + + def serve(self): + while self.serve_once(): + pass + + def run(self): + self.start_jobs(*self.jobs) + self.serve() + + +def parse_command_line(): + opts = sys.argv[:][1:] + + debug = False + trace = False + nolock = False + update_every = 1 + modules_to_run = list() + + def find_first_positive_int(values): + return next((v for v in values if v.isdigit() and int(v) >= 1), None) + + u = find_first_positive_int(opts) + if u is not None: + update_every = int(u) + opts.remove(u) + if 'debug' in opts: + debug = True + opts.remove('debug') + if 'trace' in opts: + trace = True + opts.remove('trace') + if 'nolock' in opts: + nolock = True + opts.remove('nolock') + if opts: + modules_to_run = list(opts) + + cmd = collections.namedtuple( + 'CMD', + [ + 'update_every', + 'debug', + 'trace', + 'nolock', + 'modules_to_run', + ]) + return cmd( + update_every, + debug, + trace, + nolock, + modules_to_run, + ) + + +def guess_module(modules, *names): + def guess(n): + found = None + for i, _ in enumerate(n): + cur = [x for x in modules if x.startswith(name[:i + 1])] + if not cur: + return found + found = cur + return found + + guessed = list() + for name in names: + name = name.lower() + m = guess(name) + if m: + guessed.extend(m) + return sorted(set(guessed)) + + +def disable(): + if not IS_ATTY: + safe_print('DISABLE') + exit(0) + + +def get_modules_to_run(cmd): + if not cmd.modules_to_run: + return AVAILABLE_MODULES + + modules_to_run, seen = list(), set() + for m in AVAILABLE_MODULES: + if m.name not in cmd.modules_to_run or m.name in seen: + continue + seen.add(m.name) + modules_to_run.append(m) + + return modules_to_run + + +def main(): + cmd = parse_command_line() + log = PythonDLogger() + + if cmd.debug: + log.logger.severity = 'DEBUG' + if cmd.trace: + log.log_traceback = True + + log.info('using python v{0}'.format(PY_VERSION[0])) + + if DIRS.locks and not cmd.nolock: + registry = FileLockRegistry(DIRS.locks) + else: + registry = DummyRegistry() + + unique_avail_module_names = set([m.name for m in AVAILABLE_MODULES]) + unknown = set(cmd.modules_to_run) - unique_avail_module_names + if unknown: + log.error('unknown modules : {0}'.format(sorted(list(unknown)))) + guessed = guess_module(unique_avail_module_names, *cmd.modules_to_run) + if guessed: + log.info('probably you meant : \n{0}'.format(pprint.pformat(guessed, width=1))) + return + + p = Plugin( + get_modules_to_run(cmd), + cmd.update_every, + registry, + ) + + # cheap attempt to reduce chance of python.d job running before go.d + # TODO: better implementation needed + if not IS_ATTY: + time.sleep(1.5) + + try: + if not p.setup(): + return + p.run() + except KeyboardInterrupt: + pass + log.info('exiting from main...') + + +if __name__ == "__main__": + main() + disable() diff --git a/collectors/python.d.plugin/python_modules/__init__.py b/collectors/python.d.plugin/python_modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py new file mode 100644 index 0000000..a74b423 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from subprocess import Popen, PIPE + +from bases.FrameworkServices.SimpleService import SimpleService +from bases.collection import find_binary + + +class ExecutableService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.command = None + + def _get_raw_data(self, stderr=False, command=None): + """ + Get raw data from executed command + :return: + """ + command = command or self.command + self.debug("Executing command '{0}'".format(' '.join(command))) + try: + p = Popen(command, stdout=PIPE, stderr=PIPE) + except Exception as error: + self.error('Executing command {0} resulted in error: {1}'.format(command, error)) + return None + + data = list() + std = p.stderr if stderr else p.stdout + for line in std: + try: + data.append(line.decode('utf-8')) + except (TypeError, UnicodeDecodeError): + continue + + return data + + def check(self): + """ + Parse basic configuration, check if command is whitelisted and is returning values + :return: + """ + # Preference: 1. "command" from configuration file 2. "command" from plugin (if specified) + if 'command' in self.configuration: + self.command = self.configuration['command'] + + # "command" must be: 1.not None 2. type + if not (self.command and isinstance(self.command, str)): + self.error('Command is not defined or command type is not ') + return False + + # Split "command" into: 1. command 2. options + command, opts = self.command.split()[0], self.command.split()[1:] + + # Check for "bad" symbols in options. No pipes, redirects etc. + opts_list = ['&', '|', ';', '>', '<'] + bad_opts = set(''.join(opts)) & set(opts_list) + if bad_opts: + self.error("Bad command argument(s): {opts}".format(opts=bad_opts)) + return False + + # Find absolute path ('echo' => '/bin/echo') + if '/' not in command: + command = find_binary(command) + if not command: + self.error('Can\'t locate "{command}" binary'.format(command=self.command)) + return False + # Check if binary exist and executable + else: + if not os.access(command, os.X_OK): + self.error('"{binary}" is not executable'.format(binary=command)) + return False + + self.command = [command] + opts if opts else [command] + + try: + data = self._get_data() + except Exception as error: + self.error('_get_data() failed. Command: {command}. Error: {error}'.format(command=self.command, + error=error)) + return False + + if isinstance(data, dict) and data: + return True + self.error('Command "{command}" returned no data'.format(command=self.command)) + return False diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py new file mode 100644 index 0000000..a55e33f --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +from glob import glob +import sys +import os + +from bases.FrameworkServices.SimpleService import SimpleService + + +class LogService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.log_path = self.configuration.get('path') + self.__glob_path = self.log_path + self._last_position = 0 + self.__re_find = dict(current=0, run=0, maximum=60) + self.__open_args = {'errors': 'replace'} if sys.version_info[0] > 2 else {} + + def _get_raw_data(self): + """ + Get log lines since last poll + :return: list + """ + lines = list() + try: + if self.__re_find['current'] == self.__re_find['run']: + self._find_recent_log_file() + size = os.path.getsize(self.log_path) + if size == self._last_position: + self.__re_find['current'] += 1 + return list() # return empty list if nothing has changed + elif size < self._last_position: + self._last_position = 0 # read from beginning if file has shrunk + + with open(self.log_path, **self.__open_args) as fp: + fp.seek(self._last_position) + for line in fp: + lines.append(line) + self._last_position = fp.tell() + self.__re_find['current'] = 0 + except (OSError, IOError) as error: + self.__re_find['current'] += 1 + self.error(str(error)) + + return lines or None + + def _find_recent_log_file(self): + """ + :return: + """ + self.__re_find['run'] = self.__re_find['maximum'] + self.__re_find['current'] = 0 + self.__glob_path = self.__glob_path or self.log_path # workaround for modules w/o config files + path_list = glob(self.__glob_path) + if path_list: + self.log_path = max(path_list) + return True + return False + + def check(self): + """ + Parse basic configuration and check if log file exists + :return: boolean + """ + if not self.log_path: + self.error('No path to log specified') + return None + + if self._find_recent_log_file() and os.access(self.log_path, os.R_OK) and os.path.isfile(self.log_path): + return True + self.error('Cannot access {0}'.format(self.log_path)) + return False + + def create(self): + # set cursor at last byte of log file + self._last_position = os.path.getsize(self.log_path) + status = SimpleService.create(self) + return status diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py new file mode 100644 index 0000000..7f5c7d2 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +from sys import exc_info + +try: + import MySQLdb + + PY_MYSQL = True +except ImportError: + try: + import pymysql as MySQLdb + + PY_MYSQL = True + except ImportError: + PY_MYSQL = False + +from bases.FrameworkServices.SimpleService import SimpleService + + +class MySQLService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.__connection = None + self.__conn_properties = dict() + self.extra_conn_properties = dict() + self.__queries = self.configuration.get('queries', dict()) + self.queries = dict() + + def __connect(self): + try: + connection = MySQLdb.connect(connect_timeout=self.update_every, **self.__conn_properties) + except (MySQLdb.MySQLError, TypeError, AttributeError) as error: + return None, str(error) + else: + return connection, None + + def check(self): + def get_connection_properties(conf, extra_conf): + properties = dict() + if conf.get('user'): + properties['user'] = conf['user'] + if conf.get('pass'): + properties['passwd'] = conf['pass'] + + if conf.get('socket'): + properties['unix_socket'] = conf['socket'] + elif conf.get('host'): + properties['host'] = conf['host'] + properties['port'] = int(conf.get('port', 3306)) + elif conf.get('my.cnf'): + properties['read_default_file'] = conf['my.cnf'] + + if conf.get('ssl'): + properties['ssl'] = conf['ssl'] + + if isinstance(extra_conf, dict) and extra_conf: + properties.update(extra_conf) + + return properties or None + + def is_valid_queries_dict(raw_queries, log_error): + """ + :param raw_queries: dict: + :param log_error: function: + :return: dict or None + + raw_queries is valid when: type and not empty after is_valid_query(for all queries) + """ + + def is_valid_query(query): + return all([isinstance(query, str), + query.startswith(('SELECT', 'select', 'SHOW', 'show'))]) + + if hasattr(raw_queries, 'keys') and raw_queries: + valid_queries = dict([(n, q) for n, q in raw_queries.items() if is_valid_query(q)]) + bad_queries = set(raw_queries) - set(valid_queries) + + if bad_queries: + log_error('Removed query(s): {queries}'.format(queries=bad_queries)) + return valid_queries + else: + log_error('Unsupported "queries" format. Must be not empty ') + return None + + if not PY_MYSQL: + self.error('MySQLdb or PyMySQL module is needed to use mysql.chart.py plugin') + return False + + # Preference: 1. "queries" from the configuration file 2. "queries" from the module + self.queries = self.__queries or self.queries + # Check if "self.queries" exist, not empty and all queries are in valid format + self.queries = is_valid_queries_dict(self.queries, self.error) + if not self.queries: + return None + + # Get connection properties + self.__conn_properties = get_connection_properties(self.configuration, self.extra_conn_properties) + if not self.__conn_properties: + self.error('Connection properties are missing') + return False + + # Create connection to the database + self.__connection, error = self.__connect() + if error: + self.error('Can\'t establish connection to MySQL: {error}'.format(error=error)) + return False + + try: + data = self._get_data() + except Exception as error: + self.error('_get_data() failed. Error: {error}'.format(error=error)) + return False + + if isinstance(data, dict) and data: + return True + self.error("_get_data() returned no data or type is not ") + return False + + def _get_raw_data(self, description=None): + """ + Get raw data from MySQL server + :return: dict: fetchall() or (fetchall(), description) + """ + + if not self.__connection: + self.__connection, error = self.__connect() + if error: + return None + + raw_data = dict() + queries = dict(self.queries) + try: + cursor = self.__connection.cursor() + for name, query in queries.items(): + try: + cursor.execute(query) + except (MySQLdb.ProgrammingError, MySQLdb.OperationalError) as error: + if self.__is_error_critical(err_class=exc_info()[0], err_text=str(error)): + cursor.close() + raise RuntimeError + self.error('Removed query: {name}[{query}]. Error: error'.format(name=name, + query=query, + error=error)) + self.queries.pop(name) + continue + else: + raw_data[name] = (cursor.fetchall(), cursor.description) if description else cursor.fetchall() + cursor.close() + self.__connection.commit() + except (MySQLdb.MySQLError, RuntimeError, TypeError, AttributeError): + self.__connection.close() + self.__connection = None + return None + else: + return raw_data or None + + @staticmethod + def __is_error_critical(err_class, err_text): + return err_class == MySQLdb.OperationalError and all(['denied' not in err_text, + 'Unknown column' not in err_text]) diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py new file mode 100644 index 0000000..a7acc23 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from bases.charts import Charts, ChartError, create_runtime_chart +from bases.collection import safe_print +from bases.loggers import PythonDLimitedLogger +from third_party.monotonic import monotonic +from time import sleep, time + +RUNTIME_CHART_UPDATE = 'BEGIN netdata.runtime_{job_name} {since_last}\n' \ + 'SET run_time = {elapsed}\n' \ + 'END\n' + +PENALTY_EVERY = 5 +MAX_PENALTY = 10 * 60 # 10 minutes + +ND_INTERNAL_MONITORING_DISABLED = os.getenv("NETDATA_INTERNALS_MONITORING") == "NO" + + +class RuntimeCounters: + def __init__(self, configuration): + """ + :param configuration: + """ + self.update_every = int(configuration.pop('update_every')) + self.do_penalty = configuration.pop('penalty') + + self.start_mono = 0 + self.start_real = 0 + self.retries = 0 + self.penalty = 0 + self.elapsed = 0 + self.prev_update = 0 + + self.runs = 1 + + def calc_next(self): + self.start_mono = monotonic() + return self.start_mono - (self.start_mono % self.update_every) + self.update_every + self.penalty + + def sleep_until_next(self): + next_time = self.calc_next() + while self.start_mono < next_time: + sleep(next_time - self.start_mono) + self.start_mono = monotonic() + self.start_real = time() + + def handle_retries(self): + self.retries += 1 + if self.do_penalty and self.retries % PENALTY_EVERY == 0: + self.penalty = round(min(self.retries * self.update_every / 2, MAX_PENALTY)) + + +def clean_module_name(name): + if name.startswith('pythond_'): + return name[8:] + return name + + +class SimpleService(PythonDLimitedLogger, object): + """ + Prototype of Service class. + Implemented basic functionality to run jobs by `python.d.plugin` + """ + + def __init__(self, configuration, name=''): + """ + :param configuration: + :param name: + """ + PythonDLimitedLogger.__init__(self) + self.configuration = configuration + self.order = list() + self.definitions = dict() + + self.module_name = clean_module_name(self.__module__) + self.job_name = configuration.pop('job_name') + self.actual_job_name = self.job_name or self.module_name + self.override_name = configuration.pop('override_name') + self.fake_name = None + + self._runtime_counters = RuntimeCounters(configuration=configuration) + self.charts = Charts(job_name=self.actual_name, + actual_job_name=self.actual_job_name, + priority=configuration.pop('priority'), + cleanup=configuration.pop('chart_cleanup'), + get_update_every=self.get_update_every, + module_name=self.module_name) + + def __repr__(self): + return '<{cls_bases}: {name}>'.format(cls_bases=', '.join(c.__name__ for c in self.__class__.__bases__), + name=self.name) + + @property + def name(self): + name = self.override_name or self.job_name + if name and name != self.module_name: + return '_'.join([self.module_name, name]) + return self.module_name + + def actual_name(self): + return self.fake_name or self.name + + @property + def runs_counter(self): + return self._runtime_counters.runs + + @property + def update_every(self): + return self._runtime_counters.update_every + + @update_every.setter + def update_every(self, value): + """ + :param value: + :return: + """ + self._runtime_counters.update_every = value + + def get_update_every(self): + return self.update_every + + def check(self): + """ + check() prototype + :return: boolean + """ + self.debug("job doesn't implement check() method. Using default which simply invokes get_data().") + data = self.get_data() + if data and isinstance(data, dict): + return True + self.debug('returned value is wrong: {0}'.format(data)) + return False + + @create_runtime_chart + def create(self): + for chart_name in self.order: + chart_config = self.definitions.get(chart_name) + + if not chart_config: + self.debug("create() => [NOT ADDED] chart '{chart_name}' not in definitions. " + "Skipping it.".format(chart_name=chart_name)) + continue + + # create chart + chart_params = [chart_name] + chart_config['options'] + try: + self.charts.add_chart(params=chart_params) + except ChartError as error: + self.error("create() => [NOT ADDED] (chart '{chart}': {error})".format(chart=chart_name, + error=error)) + continue + + # add dimensions to chart + for dimension in chart_config['lines']: + try: + self.charts[chart_name].add_dimension(dimension) + except ChartError as error: + self.error("create() => [NOT ADDED] (dimension '{dimension}': {error})".format(dimension=dimension, + error=error)) + continue + + # add variables to chart + if 'variables' in chart_config: + for variable in chart_config['variables']: + try: + self.charts[chart_name].add_variable(variable) + except ChartError as error: + self.error("create() => [NOT ADDED] (variable '{var}': {error})".format(var=variable, + error=error)) + continue + + del self.order + del self.definitions + + # True if job has at least 1 chart else False + return bool(self.charts) + + def run(self): + """ + Runs job in thread. Handles retries. + Exits when job failed or timed out. + :return: None + """ + job = self._runtime_counters + self.debug('started, update frequency: {freq}'.format(freq=job.update_every)) + + while True: + job.sleep_until_next() + + since = 0 + if job.prev_update: + since = int((job.start_real - job.prev_update) * 1e6) + + try: + updated = self.update(interval=since) + except Exception as error: + self.error('update() unhandled exception: {error}'.format(error=error)) + updated = False + + job.runs += 1 + + if not updated: + job.handle_retries() + else: + job.elapsed = int((monotonic() - job.start_mono) * 1e3) + job.prev_update = job.start_real + job.retries, job.penalty = 0, 0 + if not ND_INTERNAL_MONITORING_DISABLED: + safe_print(RUNTIME_CHART_UPDATE.format(job_name=self.name, + since_last=since, + elapsed=job.elapsed)) + self.debug('update => [{status}] (elapsed time: {elapsed}, failed retries in a row: {retries})'.format( + status='OK' if updated else 'FAILED', + elapsed=job.elapsed if updated else '-', + retries=job.retries)) + + def update(self, interval): + """ + :return: + """ + data = self.get_data() + if not data: + self.debug('get_data() returned no data') + return False + elif not isinstance(data, dict): + self.debug('get_data() returned incorrect type data') + return False + + updated = False + + for chart in self.charts: + if chart.flags.obsoleted: + if chart.can_be_updated(data): + chart.refresh() + else: + continue + elif self.charts.cleanup and chart.penalty >= self.charts.cleanup: + chart.obsolete() + self.info("chart '{0}' was suppressed due to non updating".format(chart.name)) + continue + + ok = chart.update(data, interval) + if ok: + updated = True + + if not updated: + self.debug('none of the charts has been updated') + + return updated + + def get_data(self): + return self._get_data() + + def _get_data(self): + raise NotImplementedError diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py new file mode 100644 index 0000000..d6c7550 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py @@ -0,0 +1,336 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import errno +import socket + +try: + import ssl +except ImportError: + _TLS_SUPPORT = False +else: + _TLS_SUPPORT = True + +if _TLS_SUPPORT: + try: + PROTOCOL_TLS = ssl.PROTOCOL_TLS + except AttributeError: + PROTOCOL_TLS = ssl.PROTOCOL_SSLv23 + +from bases.FrameworkServices.SimpleService import SimpleService + + +DEFAULT_CONNECT_TIMEOUT = 2.0 +DEFAULT_READ_TIMEOUT = 2.0 +DEFAULT_WRITE_TIMEOUT = 2.0 + + +class SocketService(SimpleService): + def __init__(self, configuration=None, name=None): + self._sock = None + self._keep_alive = False + self.host = 'localhost' + self.port = None + self.unix_socket = None + self.dgram_socket = False + self.request = '' + self.tls = False + self.cert = None + self.key = None + self.__socket_config = None + self.__empty_request = "".encode() + SimpleService.__init__(self, configuration=configuration, name=name) + self.connect_timeout = configuration.get('connect_timeout', DEFAULT_CONNECT_TIMEOUT) + self.read_timeout = configuration.get('read_timeout', DEFAULT_READ_TIMEOUT) + self.write_timeout = configuration.get('write_timeout', DEFAULT_WRITE_TIMEOUT) + + def _socket_error(self, message=None): + if self.unix_socket is not None: + self.error('unix socket "{socket}": {message}'.format(socket=self.unix_socket, + message=message)) + else: + if self.__socket_config is not None: + _, _, _, _, sa = self.__socket_config + self.error('socket to "{address}" port {port}: {message}'.format(address=sa[0], + port=sa[1], + message=message)) + else: + self.error('unknown socket: {0}'.format(message)) + + def _connect2socket(self, res=None): + """ + Connect to a socket, passing the result of getaddrinfo() + :return: boolean + """ + if res is None: + res = self.__socket_config + if res is None: + self.error("Cannot create socket to 'None':") + return False + + af, sock_type, proto, _, sa = res + try: + self.debug('Creating socket to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + self._sock = socket.socket(af, sock_type, proto) + except socket.error as error: + self.error('Failed to create socket "{address}", port {port}, error: {error}'.format(address=sa[0], + port=sa[1], + error=error)) + self._sock = None + self.__socket_config = None + return False + + if self.tls: + try: + self.debug('Encapsulating socket with TLS') + self.debug('Using keyfile: {0}, certfile: {1}, cert_reqs: {2}, ssl_version: {3}'.format( + self.key, self.cert, ssl.CERT_NONE, PROTOCOL_TLS + )) + self._sock = ssl.wrap_socket(self._sock, + keyfile=self.key, + certfile=self.cert, + server_side=False, + cert_reqs=ssl.CERT_NONE, + ssl_version=PROTOCOL_TLS, + ) + except (socket.error, ssl.SSLError, IOError, OSError) as error: + self.error('failed to wrap socket : {0}'.format(repr(error))) + self._disconnect() + self.__socket_config = None + return False + + try: + self.debug('connecting socket to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + self._sock.settimeout(self.connect_timeout) + self.debug('set socket connect timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.connect(sa) + except (socket.error, ssl.SSLError) as error: + self.error('Failed to connect to "{address}", port {port}, error: {error}'.format(address=sa[0], + port=sa[1], + error=error)) + self._disconnect() + self.__socket_config = None + return False + + self.debug('connected to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + self.__socket_config = res + return True + + def _connect2unixsocket(self): + """ + Connect to a unix socket, given its filename + :return: boolean + """ + if self.unix_socket is None: + self.error("cannot connect to unix socket 'None'") + return False + + try: + self.debug('attempting DGRAM unix socket "{0}"'.format(self.unix_socket)) + self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) + self._sock.settimeout(self.connect_timeout) + self.debug('set socket connect timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.connect(self.unix_socket) + self.debug('connected DGRAM unix socket "{0}"'.format(self.unix_socket)) + return True + except socket.error as error: + self.debug('Failed to connect DGRAM unix socket "{socket}": {error}'.format(socket=self.unix_socket, + error=error)) + + try: + self.debug('attempting STREAM unix socket "{0}"'.format(self.unix_socket)) + self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self._sock.settimeout(self.connect_timeout) + self.debug('set socket connect timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.connect(self.unix_socket) + self.debug('connected STREAM unix socket "{0}"'.format(self.unix_socket)) + return True + except socket.error as error: + self.debug('Failed to connect STREAM unix socket "{socket}": {error}'.format(socket=self.unix_socket, + error=error)) + self._sock = None + return False + + def _connect(self): + """ + Recreate socket and connect to it since sockets cannot be reused after closing + Available configurations are IPv6, IPv4 or UNIX socket + :return: + """ + try: + if self.unix_socket is not None: + self._connect2unixsocket() + + else: + if self.__socket_config is not None: + self._connect2socket() + else: + if self.dgram_socket: + sock_type = socket.SOCK_DGRAM + else: + sock_type = socket.SOCK_STREAM + for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, sock_type): + if self._connect2socket(res): + break + + except Exception as error: + self.error('unhandled exception during connect : {0}'.format(repr(error))) + self._sock = None + self.__socket_config = None + + def _disconnect(self): + """ + Close socket connection + :return: + """ + if self._sock is not None: + try: + self.debug('closing socket') + self._sock.shutdown(2) # 0 - read, 1 - write, 2 - all + self._sock.close() + except Exception as error: + if not (hasattr(error, 'errno') and error.errno == errno.ENOTCONN): + self.error(error) + self._sock = None + + def _send(self, request=None): + """ + Send request. + :return: boolean + """ + # Send request if it is needed + if self.request != self.__empty_request: + try: + self.debug('set socket write timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.settimeout(self.write_timeout) + self.debug('sending request: {0}'.format(request or self.request)) + self._sock.send(request or self.request) + except Exception as error: + self._socket_error('error sending request: {0}'.format(error)) + self._disconnect() + return False + return True + + def _receive(self, raw=False): + """ + Receive data from socket + :param raw: set `True` to return bytes + :type raw: bool + :return: decoded str or raw bytes + :rtype: str/bytes + """ + data = "" if not raw else b"" + while True: + self.debug('receiving response') + try: + self.debug('set socket read timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.settimeout(self.read_timeout) + buf = self._sock.recv(4096) + except Exception as error: + self._socket_error('failed to receive response: {0}'.format(error)) + self._disconnect() + break + + if buf is None or len(buf) == 0: # handle server disconnect + if data == "" or data == b"": + self._socket_error('unexpectedly disconnected') + else: + self.debug('server closed the connection') + self._disconnect() + break + + self.debug('received data') + data += buf.decode('utf-8', 'ignore') if not raw else buf + if self._check_raw_data(data): + break + + self.debug(u'final response: {0}'.format(data if not raw else u'binary data')) + return data + + def _get_raw_data(self, raw=False, request=None): + """ + Get raw data with low-level "socket" module. + :param raw: set `True` to return bytes + :type raw: bool + :return: decoded data (str) or raw data (bytes) + :rtype: str/bytes + """ + if self._sock is None: + self._connect() + if self._sock is None: + return None + + # Send request if it is needed + if not self._send(request): + return None + + data = self._receive(raw) + + if not self._keep_alive: + self._disconnect() + + return data + + @staticmethod + def _check_raw_data(data): + """ + Check if all data has been gathered from socket + :param data: str + :return: boolean + """ + return bool(data) + + def _parse_config(self): + """ + Parse configuration data + :return: boolean + """ + try: + self.unix_socket = str(self.configuration['socket']) + except (KeyError, TypeError): + self.debug('No unix socket specified. Trying TCP/IP socket.') + self.unix_socket = None + try: + self.host = str(self.configuration['host']) + except (KeyError, TypeError): + self.debug('No host specified. Using: "{0}"'.format(self.host)) + try: + self.port = int(self.configuration['port']) + except (KeyError, TypeError): + self.debug('No port specified. Using: "{0}"'.format(self.port)) + + self.tls = bool(self.configuration.get('tls', self.tls)) + if self.tls and not _TLS_SUPPORT: + self.warning('TLS requested but no TLS module found, disabling TLS support.') + self.tls = False + if _TLS_SUPPORT and not self.tls: + self.debug('No TLS preference specified, not using TLS.') + + if self.tls and _TLS_SUPPORT: + self.key = self.configuration.get('tls_key_file') + self.cert = self.configuration.get('tls_cert_file') + if not self.cert: + # If there's not a valid certificate, clear the key too. + self.debug('No valid TLS client certificate configuration found.') + self.key = None + self.cert = None + elif not self.key: + # If a key isn't listed, the config may still be + # valid, because there may be a key attached to the + # certificate. + self.info('No TLS client key specified, assuming it\'s attached to the certificate.') + self.key = None + + try: + self.request = str(self.configuration['request']) + except (KeyError, TypeError): + self.debug('No request specified. Using: "{0}"'.format(self.request)) + + self.request = self.request.encode() + + def check(self): + self._parse_config() + return SimpleService.check(self) diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py new file mode 100644 index 0000000..1faf036 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py @@ -0,0 +1,207 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import urllib3 + +from distutils.version import StrictVersion as version + +from bases.FrameworkServices.SimpleService import SimpleService + +try: + urllib3.disable_warnings() +except AttributeError: + pass + +# https://github.com/urllib3/urllib3/blob/master/CHANGES.rst#19-2014-07-04 +# New retry logic and urllib3.util.retry.Retry configuration object. (Issue https://github.com/urllib3/urllib3/pull/326) +URLLIB3_MIN_REQUIRED_VERSION = '1.9' +URLLIB3_VERSION = urllib3.__version__ +URLLIB3 = 'urllib3' + + +def version_check(): + if version(URLLIB3_VERSION) >= version(URLLIB3_MIN_REQUIRED_VERSION): + return + + err = '{0} version: {1}, minimum required version: {2}, please upgrade'.format( + URLLIB3, + URLLIB3_VERSION, + URLLIB3_MIN_REQUIRED_VERSION, + ) + raise Exception(err) + + +class UrlService(SimpleService): + def __init__(self, configuration=None, name=None): + version_check() + SimpleService.__init__(self, configuration=configuration, name=name) + self.debug("{0} version: {1}".format(URLLIB3, URLLIB3_VERSION)) + self.url = self.configuration.get('url') + self.user = self.configuration.get('user') + self.password = self.configuration.get('pass') + self.proxy_user = self.configuration.get('proxy_user') + self.proxy_password = self.configuration.get('proxy_pass') + self.proxy_url = self.configuration.get('proxy_url') + self.method = self.configuration.get('method', 'GET') + self.header = self.configuration.get('header') + self.body = self.configuration.get('body') + self.request_timeout = self.configuration.get('timeout', 1) + self.respect_retry_after_header = self.configuration.get('respect_retry_after_header') + self.tls_verify = self.configuration.get('tls_verify') + self.tls_ca_file = self.configuration.get('tls_ca_file') + self.tls_key_file = self.configuration.get('tls_key_file') + self.tls_cert_file = self.configuration.get('tls_cert_file') + self._manager = None + + def __make_headers(self, **header_kw): + user = header_kw.get('user') or self.user + password = header_kw.get('pass') or self.password + proxy_user = header_kw.get('proxy_user') or self.proxy_user + proxy_password = header_kw.get('proxy_pass') or self.proxy_password + custom_header = header_kw.get('header') or self.header + header_params = dict(keep_alive=True) + proxy_header_params = dict() + if user and password: + header_params['basic_auth'] = '{user}:{password}'.format(user=user, + password=password) + if proxy_user and proxy_password: + proxy_header_params['proxy_basic_auth'] = '{user}:{password}'.format(user=proxy_user, + password=proxy_password) + try: + header, proxy_header = urllib3.make_headers(**header_params), urllib3.make_headers(**proxy_header_params) + except TypeError as error: + self.error('build_header() error: {error}'.format(error=error)) + return None, None + else: + header.update(custom_header or dict()) + return header, proxy_header + + def _build_manager(self, **header_kw): + header, proxy_header = self.__make_headers(**header_kw) + if header is None or proxy_header is None: + return None + proxy_url = header_kw.get('proxy_url') or self.proxy_url + if proxy_url: + manager = urllib3.ProxyManager + params = dict(proxy_url=proxy_url, headers=header, proxy_headers=proxy_header) + else: + manager = urllib3.PoolManager + params = dict(headers=header) + tls_cert_file = self.tls_cert_file + if tls_cert_file: + params['cert_file'] = tls_cert_file + # NOTE: key_file is useless without cert_file, but + # cert_file may include the key as well. + tls_key_file = self.tls_key_file + if tls_key_file: + params['key_file'] = tls_key_file + tls_ca_file = self.tls_ca_file + if tls_ca_file: + params['ca_certs'] = tls_ca_file + try: + url = header_kw.get('url') or self.url + is_https = url.startswith('https') + if skip_tls_verify(is_https, self.tls_verify, tls_ca_file): + params['ca_certs'] = None + params['cert_reqs'] = 'CERT_NONE' + if is_https: + params['assert_hostname'] = False + return manager(**params) + except (urllib3.exceptions.ProxySchemeUnknown, TypeError) as error: + self.error('build_manager() error:', str(error)) + return None + + def _get_raw_data(self, url=None, manager=None, **kwargs): + """ + Get raw data from http request + :return: str + """ + try: + response = self._do_request(url, manager, **kwargs) + except Exception as error: + self.error('Url: {url}. Error: {error}'.format(url=url or self.url, error=error)) + return None + + if response.status == 200: + if isinstance(response.data, str): + return response.data + return response.data.decode(errors='ignore') + else: + self.debug('Url: {url}. Http response status code: {code}'.format(url=url or self.url, code=response.status)) + return None + + def _get_raw_data_with_status(self, url=None, manager=None, retries=1, redirect=True, **kwargs): + """ + Get status and response body content from http request. Does not catch exceptions + :return: int, str + """ + response = self._do_request(url, manager, retries, redirect, **kwargs) + + if isinstance(response.data, str): + return response.status, response.data + return response.status, response.data.decode(errors='ignore') + + def _do_request(self, url=None, manager=None, retries=1, redirect=True, **kwargs): + """ + Get response from http request. Does not catch exceptions + :return: HTTPResponse + """ + url = url or self.url + manager = manager or self._manager + retry = urllib3.Retry(retries) + if hasattr(retry, 'respect_retry_after_header'): + retry.respect_retry_after_header = bool(self.respect_retry_after_header) + + if self.body: + kwargs['body'] = self.body + + response = manager.request( + method=self.method, + url=url, + timeout=self.request_timeout, + retries=retry, + headers=manager.headers, + redirect=redirect, + **kwargs + ) + return response + + def check(self): + """ + Format configuration data and try to connect to server + :return: boolean + """ + if not (self.url and isinstance(self.url, str)): + self.error('URL is not defined or type is not ') + return False + + self._manager = self._build_manager() + if not self._manager: + return False + + try: + data = self._get_data() + except Exception as error: + self.error('_get_data() failed. Url: {url}. Error: {error}'.format(url=self.url, error=error)) + return False + + if isinstance(data, dict) and data: + return True + self.error('_get_data() returned no data or type is not ') + return False + + +def skip_tls_verify(is_https, tls_verify, tls_ca_file): + # default 'tls_verify' value is None + # logic is: + # - never skip if there is 'tls_ca_file' file + # - skip by default for https + # - do not skip by default for http + if tls_ca_file: + return False + if is_https and not tls_verify: + return True + return tls_verify is False diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/__init__.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/bases/__init__.py b/collectors/python.d.plugin/python_modules/bases/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/bases/charts.py b/collectors/python.d.plugin/python_modules/bases/charts.py new file mode 100644 index 0000000..203ad16 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/charts.py @@ -0,0 +1,431 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from bases.collection import safe_print + +CHART_PARAMS = ['type', 'id', 'name', 'title', 'units', 'family', 'context', 'chart_type', 'hidden'] +DIMENSION_PARAMS = ['id', 'name', 'algorithm', 'multiplier', 'divisor', 'hidden'] +VARIABLE_PARAMS = ['id', 'value'] + +CHART_TYPES = ['line', 'area', 'stacked'] +DIMENSION_ALGORITHMS = ['absolute', 'incremental', 'percentage-of-absolute-row', 'percentage-of-incremental-row'] + +CHART_BEGIN = 'BEGIN {type}.{id} {since_last}\n' +CHART_CREATE = "CHART {type}.{id} '{name}' '{title}' '{units}' '{family}' '{context}' " \ + "{chart_type} {priority} {update_every} '{hidden}' 'python.d.plugin' '{module_name}'\n" +CHART_OBSOLETE = "CHART {type}.{id} '{name}' '{title}' '{units}' '{family}' '{context}' " \ + "{chart_type} {priority} {update_every} '{hidden} obsolete'\n" + +CLABEL_COLLECT_JOB = "CLABEL '_collect_job' '{actual_job_name}' '0'\n" +CLABEL_COMMIT = "CLABEL_COMMIT\n" + +DIMENSION_CREATE = "DIMENSION '{id}' '{name}' {algorithm} {multiplier} {divisor} '{hidden} {obsolete}'\n" +DIMENSION_SET = "SET '{id}' = {value}\n" + +CHART_VARIABLE_SET = "VARIABLE CHART '{id}' = {value}\n" + +# 1 is label source auto +# https://github.com/netdata/netdata/blob/cc2586de697702f86a3c34e60e23652dd4ddcb42/database/rrd.h#L205 +RUNTIME_CHART_CREATE = "CHART netdata.runtime_{job_name} '' 'Execution time' 'ms' 'python.d' " \ + "netdata.pythond_runtime line 145000 {update_every} '' 'python.d.plugin' '{module_name}'\n" \ + "CLABEL '_collect_job' '{actual_job_name}' '1'\n" \ + "CLABEL_COMMIT\n" \ + "DIMENSION run_time 'run time' absolute 1 1\n" + +ND_INTERNAL_MONITORING_DISABLED = os.getenv("NETDATA_INTERNALS_MONITORING") == "NO" + + +def create_runtime_chart(func): + """ + Calls a wrapped function, then prints runtime chart to stdout. + + Used as a decorator for SimpleService.create() method. + The whole point of making 'create runtime chart' functionality as a decorator was + to help users who re-implements create() in theirs classes. + + :param func: class method + :return: + """ + + def wrapper(*args, **kwargs): + self = args[0] + if not ND_INTERNAL_MONITORING_DISABLED: + chart = RUNTIME_CHART_CREATE.format( + job_name=self.name, + actual_job_name=self.actual_job_name, + update_every=self._runtime_counters.update_every, + module_name=self.module_name, + ) + safe_print(chart) + ok = func(*args, **kwargs) + return ok + + return wrapper + + +class ChartError(Exception): + """Base-class for all exceptions raised by this module""" + + +class DuplicateItemError(ChartError): + """Occurs when user re-adds a chart or a dimension that has already been added""" + + +class ItemTypeError(ChartError): + """Occurs when user passes value of wrong type to Chart, Dimension or ChartVariable class""" + + +class ItemValueError(ChartError): + """Occurs when user passes inappropriate value to Chart, Dimension or ChartVariable class""" + + +class Charts: + """Represent a collection of charts + + All charts stored in a dict. + Chart is a instance of Chart class. + Charts adding must be done using Charts.add_chart() method only""" + + def __init__(self, job_name, actual_job_name, priority, cleanup, get_update_every, module_name): + """ + :param job_name: + :param priority: + :param get_update_every: + """ + self.job_name = job_name + self.actual_job_name = actual_job_name + self.priority = priority + self.cleanup = cleanup + self.get_update_every = get_update_every + self.module_name = module_name + self.charts = dict() + + def __len__(self): + return len(self.charts) + + def __iter__(self): + return iter(self.charts.values()) + + def __repr__(self): + return 'Charts({0})'.format(self) + + def __str__(self): + return str([chart for chart in self.charts]) + + def __contains__(self, item): + return item in self.charts + + def __getitem__(self, item): + return self.charts[item] + + def __delitem__(self, key): + del self.charts[key] + + def __bool__(self): + return bool(self.charts) + + def __nonzero__(self): + return self.__bool__() + + def add_chart(self, params): + """ + Create Chart instance and add it to the dict + + Manually adds job name, priority and update_every to params. + :param params: + :return: + """ + params = [self.job_name()] + params + new_chart = Chart(params) + + new_chart.params['update_every'] = self.get_update_every() + new_chart.params['priority'] = self.priority + new_chart.params['module_name'] = self.module_name + new_chart.params['actual_job_name'] = self.actual_job_name + + self.priority += 1 + self.charts[new_chart.id] = new_chart + + return new_chart + + def active_charts(self): + return [chart.id for chart in self if not chart.flags.obsoleted] + + +class Chart: + """Represent a chart""" + + def __init__(self, params): + """ + :param params: + """ + if not isinstance(params, list): + raise ItemTypeError("'chart' must be a list type") + if not len(params) >= 8: + raise ItemValueError("invalid value for 'chart', must be {0}".format(CHART_PARAMS)) + + self.params = dict(zip(CHART_PARAMS, (p or str() for p in params))) + self.name = '{type}.{id}'.format(type=self.params['type'], + id=self.params['id']) + if self.params.get('chart_type') not in CHART_TYPES: + self.params['chart_type'] = 'absolute' + hidden = str(self.params.get('hidden', '')) + self.params['hidden'] = 'hidden' if hidden == 'hidden' else '' + + self.dimensions = list() + self.variables = set() + self.flags = ChartFlags() + self.penalty = 0 + + def __getattr__(self, item): + try: + return self.params[item] + except KeyError: + raise AttributeError("'{instance}' has no attribute '{attr}'".format(instance=repr(self), + attr=item)) + + def __repr__(self): + return 'Chart({0})'.format(self.id) + + def __str__(self): + return self.id + + def __iter__(self): + return iter(self.dimensions) + + def __contains__(self, item): + return item in [dimension.id for dimension in self.dimensions] + + def add_variable(self, variable): + """ + :param variable: + :return: + """ + self.variables.add(ChartVariable(variable)) + + def add_dimension(self, dimension): + """ + :param dimension: + :return: + """ + dim = Dimension(dimension) + + if dim.id in self: + raise DuplicateItemError("'{dimension}' already in '{chart}' dimensions".format(dimension=dim.id, + chart=self.name)) + self.refresh() + self.dimensions.append(dim) + return dim + + def del_dimension(self, dimension_id, hide=True): + if dimension_id not in self: + return + idx = self.dimensions.index(dimension_id) + dimension = self.dimensions[idx] + if hide: + dimension.params['hidden'] = 'hidden' + dimension.params['obsolete'] = 'obsolete' + self.create() + self.dimensions.remove(dimension) + + def hide_dimension(self, dimension_id, reverse=False): + if dimension_id not in self: + return + idx = self.dimensions.index(dimension_id) + dimension = self.dimensions[idx] + dimension.params['hidden'] = 'hidden' if not reverse else str() + self.refresh() + + def create(self): + """ + :return: + """ + chart = CHART_CREATE.format(**self.params) + labels = CLABEL_COLLECT_JOB.format(**self.params) + CLABEL_COMMIT + dimensions = ''.join([dimension.create() for dimension in self.dimensions]) + variables = ''.join([var.set(var.value) for var in self.variables if var]) + + self.flags.push = False + self.flags.created = True + + safe_print(chart + labels + dimensions + variables) + + def can_be_updated(self, data): + for dim in self.dimensions: + if dim.get_value(data) is not None: + return True + return False + + def update(self, data, interval): + updated_dimensions, updated_variables = str(), str() + + for dim in self.dimensions: + value = dim.get_value(data) + if value is not None: + updated_dimensions += dim.set(value) + + for var in self.variables: + value = var.get_value(data) + if value is not None: + updated_variables += var.set(value) + + if updated_dimensions: + since_last = interval if self.flags.updated else 0 + + if self.flags.push: + self.create() + + chart_begin = CHART_BEGIN.format(type=self.type, id=self.id, since_last=since_last) + safe_print(chart_begin, updated_dimensions, updated_variables, 'END\n') + + self.flags.updated = True + self.penalty = 0 + else: + self.penalty += 1 + self.flags.updated = False + + return bool(updated_dimensions) + + def obsolete(self): + self.flags.obsoleted = True + if self.flags.created: + safe_print(CHART_OBSOLETE.format(**self.params)) + + def refresh(self): + self.penalty = 0 + self.flags.push = True + self.flags.obsoleted = False + + +class Dimension: + """Represent a dimension""" + + def __init__(self, params): + """ + :param params: + """ + if not isinstance(params, list): + raise ItemTypeError("'dimension' must be a list type") + if not params: + raise ItemValueError("invalid value for 'dimension', must be {0}".format(DIMENSION_PARAMS)) + + self.params = dict(zip(DIMENSION_PARAMS, (p or str() for p in params))) + self.params['name'] = self.params.get('name') or self.params['id'] + + if self.params.get('algorithm') not in DIMENSION_ALGORITHMS: + self.params['algorithm'] = 'absolute' + if not isinstance(self.params.get('multiplier'), int): + self.params['multiplier'] = 1 + if not isinstance(self.params.get('divisor'), int): + self.params['divisor'] = 1 + self.params.setdefault('hidden', '') + self.params.setdefault('obsolete', '') + + def __getattr__(self, item): + try: + return self.params[item] + except KeyError: + raise AttributeError("'{instance}' has no attribute '{attr}'".format(instance=repr(self), + attr=item)) + + def __repr__(self): + return 'Dimension({0})'.format(self.id) + + def __str__(self): + return self.id + + def __eq__(self, other): + if not isinstance(other, Dimension): + return self.id == other + return self.id == other.id + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def create(self): + return DIMENSION_CREATE.format(**self.params) + + def set(self, value): + """ + :param value: : must be a digit + :return: + """ + return DIMENSION_SET.format(id=self.id, + value=value) + + def get_value(self, data): + try: + return int(data[self.id]) + except (KeyError, TypeError): + return None + + +class ChartVariable: + """Represent a chart variable""" + + def __init__(self, params): + """ + :param params: + """ + if not isinstance(params, list): + raise ItemTypeError("'variable' must be a list type") + if not params: + raise ItemValueError("invalid value for 'variable' must be: {0}".format(VARIABLE_PARAMS)) + + self.params = dict(zip(VARIABLE_PARAMS, params)) + self.params.setdefault('value', None) + + def __getattr__(self, item): + try: + return self.params[item] + except KeyError: + raise AttributeError("'{instance}' has no attribute '{attr}'".format(instance=repr(self), + attr=item)) + + def __bool__(self): + return self.value is not None + + def __nonzero__(self): + return self.__bool__() + + def __repr__(self): + return 'ChartVariable({0})'.format(self.id) + + def __str__(self): + return self.id + + def __eq__(self, other): + if isinstance(other, ChartVariable): + return self.id == other.id + return False + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def set(self, value): + return CHART_VARIABLE_SET.format(id=self.id, + value=value) + + def get_value(self, data): + try: + return int(data[self.id]) + except (KeyError, TypeError): + return None + + +class ChartFlags: + def __init__(self): + self.push = True + self.created = False + self.updated = False + self.obsoleted = False diff --git a/collectors/python.d.plugin/python_modules/bases/collection.py b/collectors/python.d.plugin/python_modules/bases/collection.py new file mode 100644 index 0000000..93bf8cf --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/collection.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from threading import Lock + +PATH = os.getenv('PATH', '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin').split(':') + +CHART_BEGIN = 'BEGIN {0} {1}\n' +CHART_CREATE = "CHART {0} '{1}' '{2}' '{3}' '{4}' '{5}' {6} {7} {8}\n" +DIMENSION_CREATE = "DIMENSION '{0}' '{1}' {2} {3} {4} '{5}'\n" +DIMENSION_SET = "SET '{0}' = {1}\n" + +print_lock = Lock() + + +def setdefault_values(config, base_dict): + for key, value in base_dict.items(): + config.setdefault(key, value) + return config + + +def run_and_exit(func): + def wrapper(*args, **kwargs): + func(*args, **kwargs) + exit(1) + + return wrapper + + +def on_try_except_finally(on_except=(None,), on_finally=(None,)): + except_func = on_except[0] + finally_func = on_finally[0] + + def decorator(func): + def wrapper(*args, **kwargs): + try: + func(*args, **kwargs) + except Exception: + if except_func: + except_func(*on_except[1:]) + finally: + if finally_func: + finally_func(*on_finally[1:]) + + return wrapper + + return decorator + + +def static_vars(**kwargs): + def decorate(func): + for k in kwargs: + setattr(func, k, kwargs[k]) + return func + + return decorate + + +@on_try_except_finally(on_except=(exit, 1)) +def safe_print(*msg): + """ + :param msg: + :return: + """ + print_lock.acquire() + print(''.join(msg)) + print_lock.release() + + +def find_binary(binary): + """ + :param binary: + :return: + """ + for directory in PATH: + binary_name = os.path.join(directory, binary) + if os.path.isfile(binary_name) and os.access(binary_name, os.X_OK): + return binary_name + return None + + +def read_last_line(f): + with open(f, 'rb') as opened: + opened.seek(-2, 2) + while opened.read(1) != b'\n': + opened.seek(-2, 1) + if opened.tell() == 0: + break + result = opened.readline() + return result.decode() + + +def unicode_str(arg): + """Return the argument as a unicode string. + + The `unicode` function has been removed from Python3 and `str` takes its + place. This function is a helper which will try using Python 2's `unicode` + and if it doesn't exist, assume we're using Python 3 and use `str`. + + :param arg: + :return: + """ + # TODO: fix + try: + # https://github.com/netdata/netdata/issues/7613 + if isinstance(arg, unicode): + return arg + return unicode(arg, errors='ignore') + # https://github.com/netdata/netdata/issues/7642 + except TypeError: + return unicode(arg) + except NameError: + return str(arg) diff --git a/collectors/python.d.plugin/python_modules/bases/loaders.py b/collectors/python.d.plugin/python_modules/bases/loaders.py new file mode 100644 index 0000000..095f3a3 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/loaders.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +from sys import version_info + +PY_VERSION = version_info[:2] + +try: + if PY_VERSION > (3, 1): + from pyyaml3 import SafeLoader as YamlSafeLoader + else: + from pyyaml2 import SafeLoader as YamlSafeLoader +except ImportError: + from yaml import SafeLoader as YamlSafeLoader + + +try: + from collections import OrderedDict +except ImportError: + from third_party.ordereddict import OrderedDict + + +DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' if PY_VERSION > (3, 1) else u'tag:yaml.org,2002:map' + + +def dict_constructor(loader, node): + return OrderedDict(loader.construct_pairs(node)) + + +YamlSafeLoader.add_constructor(DEFAULT_MAPPING_TAG, dict_constructor) + + +def load_yaml(stream): + loader = YamlSafeLoader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + + +def load_config(file_name): + with open(file_name, 'r') as stream: + return load_yaml(stream) diff --git a/collectors/python.d.plugin/python_modules/bases/loggers.py b/collectors/python.d.plugin/python_modules/bases/loggers.py new file mode 100644 index 0000000..47f196a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/loggers.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import logging +import traceback + +from sys import exc_info + +try: + from time import monotonic as time +except ImportError: + from time import time + +from bases.collection import on_try_except_finally, unicode_str + + +LOGGING_LEVELS = {'CRITICAL': 50, + 'ERROR': 40, + 'WARNING': 30, + 'INFO': 20, + 'DEBUG': 10, + 'NOTSET': 0} + +DEFAULT_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s : %(message)s' +DEFAULT_LOG_TIME_FORMAT = '%Y-%m-%d %H:%M:%S' + +PYTHON_D_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s' +PYTHON_D_LOG_NAME = 'python.d' + + +def limiter(log_max_count=30, allowed_in_seconds=60): + def on_decorator(func): + + def on_call(*args): + current_time = args[0]._runtime_counters.start_mono + lc = args[0]._logger_counters + + if lc.logged and lc.logged % log_max_count == 0: + if current_time - lc.time_to_compare <= allowed_in_seconds: + lc.dropped += 1 + return + lc.time_to_compare = current_time + + lc.logged += 1 + func(*args) + + return on_call + return on_decorator + + +def add_traceback(func): + def on_call(*args): + self = args[0] + + if not self.log_traceback: + func(*args) + else: + if exc_info()[0]: + func(*args) + func(self, traceback.format_exc()) + else: + func(*args) + + return on_call + + +class LoggerCounters: + def __init__(self): + self.logged = 0 + self.dropped = 0 + self.time_to_compare = time() + + def __repr__(self): + return 'LoggerCounter(logged: {logged}, dropped: {dropped})'.format(logged=self.logged, + dropped=self.dropped) + + +class BaseLogger(object): + def __init__(self, logger_name, log_fmt=DEFAULT_LOG_LINE_FORMAT, date_fmt=DEFAULT_LOG_TIME_FORMAT, + handler=logging.StreamHandler): + """ + :param logger_name: + :param log_fmt: + :param date_fmt: + :param handler: + """ + self.logger = logging.getLogger(logger_name) + if not self.has_handlers(): + self.severity = 'INFO' + self.logger.addHandler(handler()) + self.set_formatter(fmt=log_fmt, date_fmt=date_fmt) + + def __repr__(self): + return ''.format(name=self.logger.name) + + def set_formatter(self, fmt, date_fmt=DEFAULT_LOG_TIME_FORMAT): + """ + :param fmt: + :param date_fmt: + :return: + """ + if self.has_handlers(): + self.logger.handlers[0].setFormatter(logging.Formatter(fmt=fmt, datefmt=date_fmt)) + + def has_handlers(self): + return self.logger.handlers + + @property + def severity(self): + return self.logger.getEffectiveLevel() + + @severity.setter + def severity(self, level): + """ + :param level: or + :return: + """ + if level in LOGGING_LEVELS: + self.logger.setLevel(LOGGING_LEVELS[level]) + + def debug(self, *msg, **kwargs): + self.logger.debug(' '.join(map(unicode_str, msg)), **kwargs) + + def info(self, *msg, **kwargs): + self.logger.info(' '.join(map(unicode_str, msg)), **kwargs) + + def warning(self, *msg, **kwargs): + self.logger.warning(' '.join(map(unicode_str, msg)), **kwargs) + + def error(self, *msg, **kwargs): + self.logger.error(' '.join(map(unicode_str, msg)), **kwargs) + + def alert(self, *msg, **kwargs): + self.logger.critical(' '.join(map(unicode_str, msg)), **kwargs) + + @on_try_except_finally(on_finally=(exit, 1)) + def fatal(self, *msg, **kwargs): + self.logger.critical(' '.join(map(unicode_str, msg)), **kwargs) + + +class PythonDLogger(object): + def __init__(self, logger_name=PYTHON_D_LOG_NAME, log_fmt=PYTHON_D_LOG_LINE_FORMAT): + """ + :param logger_name: + :param log_fmt: + """ + self.logger = BaseLogger(logger_name, log_fmt=log_fmt) + self.module_name = 'plugin' + self.job_name = 'main' + self._logger_counters = LoggerCounters() + + _LOG_TRACEBACK = False + + @property + def log_traceback(self): + return PythonDLogger._LOG_TRACEBACK + + @log_traceback.setter + def log_traceback(self, value): + PythonDLogger._LOG_TRACEBACK = value + + def debug(self, *msg): + self.logger.debug(*msg, extra={'module_name': self.module_name, + 'job_name': self.job_name or self.module_name}) + + def info(self, *msg): + self.logger.info(*msg, extra={'module_name': self.module_name, + 'job_name': self.job_name or self.module_name}) + + def warning(self, *msg): + self.logger.warning(*msg, extra={'module_name': self.module_name, + 'job_name': self.job_name or self.module_name}) + + @add_traceback + def error(self, *msg): + self.logger.error(*msg, extra={'module_name': self.module_name, + 'job_name': self.job_name or self.module_name}) + + @add_traceback + def alert(self, *msg): + self.logger.alert(*msg, extra={'module_name': self.module_name, + 'job_name': self.job_name or self.module_name}) + + def fatal(self, *msg): + self.logger.fatal(*msg, extra={'module_name': self.module_name, + 'job_name': self.job_name or self.module_name}) + + +class PythonDLimitedLogger(PythonDLogger): + @limiter() + def info(self, *msg): + PythonDLogger.info(self, *msg) + + @limiter() + def warning(self, *msg): + PythonDLogger.warning(self, *msg) + + @limiter() + def error(self, *msg): + PythonDLogger.error(self, *msg) + + @limiter() + def alert(self, *msg): + PythonDLogger.alert(self, *msg) diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/__init__.py b/collectors/python.d.plugin/python_modules/pyyaml2/__init__.py new file mode 100644 index 0000000..4d560e4 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/__init__.py @@ -0,0 +1,316 @@ +# SPDX-License-Identifier: MIT + +from error import * + +from tokens import * +from events import * +from nodes import * + +from loader import * +from dumper import * + +__version__ = '3.11' + +try: + from cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + from StringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(object): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __metaclass__ = YAMLObjectMetaclass + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + from_yaml = classmethod(from_yaml) + + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + to_yaml = classmethod(to_yaml) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/composer.py b/collectors/python.d.plugin/python_modules/pyyaml2/composer.py new file mode 100644 index 0000000..6b41b80 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/composer.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Composer', 'ComposerError'] + +from error import MarkedYAMLError +from events import * +from nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer(object): + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor.encode('utf-8'), event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor.encode('utf-8'), self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == u'!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/constructor.py b/collectors/python.d.plugin/python_modules/pyyaml2/constructor.py new file mode 100644 index 0000000..8ad1b90 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/constructor.py @@ -0,0 +1,676 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from error import * +from nodes import * + +import datetime + +import binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor(object): + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = generator.next() + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + try: + hash(key) + except TypeError, exc: + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % exc, key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + add_constructor = classmethod(add_constructor) + + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + add_multi_constructor = classmethod(add_multi_constructor) + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == u'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == u'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == u'tag:yaml.org,2002:value': + key_node.tag = u'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + u'yes': True, + u'no': False, + u'true': True, + u'false': False, + u'on': True, + u'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + value = self.construct_scalar(node) + try: + return str(value).decode('base64') + except (binascii.Error, UnicodeEncodeError), exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + ur'''^(?P[0-9][0-9][0-9][0-9]) + -(?P[0-9][0-9]?) + -(?P[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P[0-9][0-9]?) + :(?P[0-9][0-9]) + :(?P[0-9][0-9]) + (?:\.(?P[0-9]*))? + (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) + (?::(?P[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + value = self.construct_scalar(node) + try: + return value.encode('ascii') + except UnicodeEncodeError: + return value + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), + node.start_mark) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node).encode('utf-8') + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_long(self, node): + return long(self.construct_yaml_int(node)) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError, exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if u'.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = '__builtin__' + object_name = name + try: + __import__(module_name) + except ImportError, exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" % (object_name.encode('utf-8'), + module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + class classobj: pass + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type(self.classobj)) \ + and not args and not kwds: + instance = self.classobj() + instance.__class__ = cls + return instance + elif newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py b/collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py new file mode 100644 index 0000000..2858ab4 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from constructor import * + +from serializer import * +from representer import * + +from resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/dumper.py b/collectors/python.d.plugin/python_modules/pyyaml2/dumper.py new file mode 100644 index 0000000..3685cbe --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/dumper.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from emitter import * +from serializer import * +from representer import * +from resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/emitter.py b/collectors/python.d.plugin/python_modules/pyyaml2/emitter.py new file mode 100644 index 0000000..9a460a0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/emitter.py @@ -0,0 +1,1141 @@ +# SPDX-License-Identifier: MIT + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from error import YAMLError +from events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis(object): + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter(object): + + DEFAULT_TAG_PREFIXES = { + u'!' : u'!', + u'tag:yaml.org,2002:' : u'!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = u'\n' + if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not getattr(self.stream, 'encoding', None): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = self.event.tags.keys() + handles.sort() + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator(u'---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator(u'...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor(u'&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor(u'*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator(u'[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator(u'{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(u':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator(u'-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(u':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == u'') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = u'!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return u'%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != u'!' or handle[-1] != u'!': + raise EmitterError("tag handle must start and end with '!': %r" + % (handle.encode('utf-8'))) + for ch in handle[1:-1]: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch.encode('utf-8'), handle.encode('utf-8'))) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == u'!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return u''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == u'!': + return tag + handle = None + suffix = tag + prefixes = self.tag_prefixes.keys() + prefixes.sort() + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == u'!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.~*\'()[]' \ + or (ch == u'!' and handle != u'!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = u''.join(chunks) + if handle: + return u'%s%s' % (handle, suffix_text) + else: + return u'!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch.encode('utf-8'), anchor.encode('utf-8'))) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith(u'---') or scalar.startswith(u'...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in u'#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in u'?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in u',?[]{}': + flow_indicators = True + if ch == u':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in u'\n\x85\u2028\u2029': + line_breaks = True + if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): + if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == u' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in u'\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write(u'\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = u' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = u' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = u'%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = u'%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator(u'\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != u' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == u'\'': + data = u'\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + self.write_indicator(u'\'', False) + + ESCAPE_REPLACEMENTS = { + u'\0': u'0', + u'\x07': u'a', + u'\x08': u'b', + u'\x09': u't', + u'\x0A': u'n', + u'\x0B': u'v', + u'\x0C': u'f', + u'\x0D': u'r', + u'\x1B': u'e', + u'\"': u'\"', + u'\\': u'\\', + u'\x85': u'N', + u'\xA0': u'_', + u'\u2028': u'L', + u'\u2029': u'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator(u'"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ + or not (u'\x20' <= ch <= u'\x7E' + or (self.allow_unicode + and (u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= u'\xFF': + data = u'\\x%02X' % ord(ch) + elif ch <= u'\uFFFF': + data = u'\\u%04X' % ord(ch) + else: + data = u'\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+u'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == u' ': + data = u'\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator(u'"', False) + + def determine_block_hints(self, text): + hints = u'' + if text: + if text[0] in u' \n\x85\u2028\u2029': + hints += unicode(self.best_indent) + if text[-1] not in u'\n\x85\u2028\u2029': + hints += u'-' + elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': + hints += u'+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'>'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != u' ' \ + and text[start] == u'\n': + self.write_line_break() + leading_space = (ch == u' ') + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == u' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'|'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in u'\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = u' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/error.py b/collectors/python.d.plugin/python_modules/pyyaml2/error.py new file mode 100644 index 0000000..5466be7 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/error.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark(object): + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end].encode('utf-8') + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/events.py b/collectors/python.d.plugin/python_modules/pyyaml2/events.py new file mode 100644 index 0000000..283452a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/events.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: MIT + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/loader.py b/collectors/python.d.plugin/python_modules/pyyaml2/loader.py new file mode 100644 index 0000000..1c19553 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/loader.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from reader import * +from scanner import * +from parser import * +from composer import * +from constructor import * +from resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/nodes.py b/collectors/python.d.plugin/python_modules/pyyaml2/nodes.py new file mode 100644 index 0000000..ed2a1b4 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/nodes.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: MIT + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/parser.py b/collectors/python.d.plugin/python_modules/pyyaml2/parser.py new file mode 100644 index 0000000..97ba083 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/parser.py @@ -0,0 +1,590 @@ +# SPDX-License-Identifier: MIT + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from error import MarkedYAMLError +from tokens import * +from events import * +from scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser(object): + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + u'!': u'!', + u'!!': u'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == u'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == u'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle.encode('utf-8'), + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle.encode('utf-8'), + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == u'!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == u'!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == u'!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), u'', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), u'', mark, mark) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/reader.py b/collectors/python.d.plugin/python_modules/pyyaml2/reader.py new file mode 100644 index 0000000..8d42295 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/reader.py @@ -0,0 +1,191 @@ +# SPDX-License-Identifier: MIT +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, str): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to unicode, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `str` object, + # - a `unicode` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = u'' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, unicode): + self.name = "" + self.check_printable(stream) + self.buffer = stream+u'\0' + elif isinstance(stream, str): + self.name = "" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "") + self.eof = False + self.raw_buffer = '' + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in u'\n\x85\u2028\u2029' \ + or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + self.line += 1 + self.column = 0 + elif ch != u'\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and len(self.raw_buffer) < 2: + self.update_raw() + if not isinstance(self.raw_buffer, unicode): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError, exc: + character = exc.object[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += u'\0' + self.raw_buffer = None + break + + def update_raw(self, size=1024): + data = self.stream.read(size) + if data: + self.raw_buffer += data + self.stream_pointer += len(data) + else: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/representer.py b/collectors/python.d.plugin/python_modules/pyyaml2/representer.py new file mode 100644 index 0000000..0a1404e --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/representer.py @@ -0,0 +1,485 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from error import * +from nodes import * + +import datetime + +import sys, copy_reg, types + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter(object): + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def get_classobj_bases(self, cls): + bases = [cls] + for base in cls.__bases__: + bases.extend(self.get_classobj_bases(base)) + return bases + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if type(data) is types.InstanceType: + data_types = self.get_classobj_bases(data.__class__)+list(data_types) + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, unicode(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + add_representer = classmethod(add_representer) + + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + add_multi_representer = classmethod(add_multi_representer) + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = mapping.items() + mapping.sort() + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, unicode, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:null', + u'null') + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) + + def represent_bool(self, data): + if data: + value = u'true' + else: + value = u'false' + return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + def represent_long(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = u'.nan' + elif data == self.inf_value: + value = u'.inf' + elif data == -self.inf_value: + value = u'-.inf' + else: + value = unicode(repr(data)).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if u'.' not in value and u'e' in value: + value = value.replace(u'e', u'.0e', 1) + return self.represent_scalar(u'tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping(u'tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping(u'tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = unicode(data.isoformat()) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = unicode(data.isoformat(' ')) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(unicode, + SafeRepresenter.represent_unicode) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(long, + SafeRepresenter.represent_long) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:python/str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + tag = None + try: + data.encode('ascii') + tag = u'tag:yaml.org,2002:python/unicode' + except UnicodeEncodeError: + tag = u'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data) + + def represent_long(self, data): + tag = u'tag:yaml.org,2002:int' + if int(data) is not data: + tag = u'tag:yaml.org,2002:python/long' + return self.represent_scalar(tag, unicode(data)) + + def represent_complex(self, data): + if data.imag == 0.0: + data = u'%r' % data.real + elif data.real == 0.0: + data = u'%rj' % data.imag + elif data.imag > 0: + data = u'%r+%rj' % (data.real, data.imag) + else: + data = u'%r%rj' % (data.real, data.imag) + return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = u'%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') + + def represent_module(self, data): + return self.represent_scalar( + u'tag:yaml.org,2002:python/module:'+data.__name__, u'') + + def represent_instance(self, data): + # For instances of classic classes, we use __getinitargs__ and + # __getstate__ to serialize the data. + + # If data.__getinitargs__ exists, the object must be reconstructed by + # calling cls(**args), where args is a tuple returned by + # __getinitargs__. Otherwise, the cls.__init__ method should never be + # called and the class instance is created by instantiating a trivial + # class and assigning to the instance's __class__ variable. + + # If data.__getstate__ exists, it returns the state of the object. + # Otherwise, the state of the object is data.__dict__. + + # We produce either a !!python/object or !!python/object/new node. + # If data.__getinitargs__ does not exist and state is a dictionary, we + # produce a !!python/object node . Otherwise we produce a + # !!python/object/new node. + + cls = data.__class__ + class_name = u'%s.%s' % (cls.__module__, cls.__name__) + args = None + state = None + if hasattr(data, '__getinitargs__'): + args = list(data.__getinitargs__()) + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__ + if args is None and isinstance(state, dict): + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+class_name, state) + if isinstance(state, dict) and not state: + return self.represent_sequence( + u'tag:yaml.org,2002:python/object/new:'+class_name, args) + value = {} + if args: + value['args'] = args + value['state'] = state + return self.represent_mapping( + u'tag:yaml.org,2002:python/object/new:'+class_name, value) + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copy_reg.dispatch_table: + reduce = copy_reg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = u'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = u'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = u'%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(str, + Representer.represent_str) + +Representer.add_representer(unicode, + Representer.represent_unicode) + +Representer.add_representer(long, + Representer.represent_long) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.ClassType, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(types.InstanceType, + Representer.represent_instance) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/resolver.py b/collectors/python.d.plugin/python_modules/pyyaml2/resolver.py new file mode 100644 index 0000000..49922de --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/resolver.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseResolver', 'Resolver'] + +from error import * +from nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver(object): + + DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + add_implicit_resolver = classmethod(add_implicit_resolver) + + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, basestring) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (basestring, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + add_path_resolver = classmethod(add_path_resolver) + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, basestring): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, basestring): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == u'': + resolvers = self.yaml_implicit_resolvers.get(u'', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:bool', + re.compile(ur'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list(u'yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:float', + re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list(u'-+0123456789.')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:int', + re.compile(ur'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list(u'-+0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:merge', + re.compile(ur'^(?:<<)$'), + [u'<']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:null', + re.compile(ur'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + [u'~', u'n', u'N', u'']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:timestamp', + re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list(u'0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:value', + re.compile(ur'^(?:=)$'), + [u'=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:yaml', + re.compile(ur'^(?:!|&|\*)$'), + list(u'!&*')) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/scanner.py b/collectors/python.d.plugin/python_modules/pyyaml2/scanner.py new file mode 100644 index 0000000..971da61 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/scanner.py @@ -0,0 +1,1458 @@ +# SPDX-License-Identifier: MIT + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from error import MarkedYAMLError +from tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey(object): + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner(object): + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == u'\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == u'%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == u'-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == u'.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == u'\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == u'[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == u'{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == u']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == u'}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == u',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == u'-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == u'?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == u':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == u'*': + return self.fetch_alias() + + # Is it an anchor? + if ch == u'&': + return self.fetch_anchor() + + # Is it a tag? + if ch == u'!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == u'|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == u'>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == u'\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == u'\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" + % ch.encode('utf-8'), self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in self.possible_simple_keys.keys(): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'---' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'...' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' + and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == u'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == u'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not (u'0' <= ch <= u'9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 0 + while u'0' <= self.peek(length) <= u'9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == u' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != u' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == u'*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == u'<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != u'>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + elif ch in u'\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = u'!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in u'\0 \r\n\x85\u2028\u2029': + if ch == u'!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = u'!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = u'!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = u'' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != u'\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in u' \t' + length = 0 + while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != u'\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == u'\n' \ + and leading_non_space and self.peek() not in u' \t': + if not breaks: + chunks.append(u' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == u'\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(u' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() != u' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + while self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + u'0': u'\0', + u'a': u'\x07', + u'b': u'\x08', + u't': u'\x09', + u'\t': u'\x09', + u'n': u'\x0A', + u'v': u'\x0B', + u'f': u'\x0C', + u'r': u'\x0D', + u'e': u'\x1B', + u' ': u'\x20', + u'\"': u'\"', + u'\\': u'\\', + u'N': u'\x85', + u'_': u'\xA0', + u'L': u'\u2028', + u'P': u'\u2029', + } + + ESCAPE_CODES = { + u'x': 2, + u'u': 4, + u'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == u'\'' and self.peek(1) == u'\'': + chunks.append(u'\'') + self.forward(2) + elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == u'\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k).encode('utf-8')), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(unichr(code)) + self.forward(length) + elif ch in u'\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in u' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == u'\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in u' \t': + self.forward() + if self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == u'#': + break + while True: + ch = self.peek(length) + if ch in u'\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == u':' and + self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in u',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == u':' + and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == u'#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in u' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != u'!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != u' ': + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if ch != u'!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.!~*\'()[]%': + if ch == u'%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch.encode('utf-8'), + self.get_mark()) + return u''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + bytes = [] + mark = self.get_mark() + while self.peek() == u'%': + self.forward() + for k in range(2): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % + (self.peek(k).encode('utf-8')), self.get_mark()) + bytes.append(chr(int(self.prefix(2), 16))) + self.forward(2) + try: + value = unicode(''.join(bytes), 'utf-8') + except UnicodeDecodeError, exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in u'\r\n\x85': + if self.prefix(2) == u'\r\n': + self.forward(2) + else: + self.forward() + return u'\n' + elif ch in u'\u2028\u2029': + self.forward() + return ch + return u'' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/serializer.py b/collectors/python.d.plugin/python_modules/pyyaml2/serializer.py new file mode 100644 index 0000000..15fdbb0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/serializer.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Serializer', 'SerializerError'] + +from error import YAMLError +from events import * +from nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer(object): + + ANCHOR_TEMPLATE = u'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/tokens.py b/collectors/python.d.plugin/python_modules/pyyaml2/tokens.py new file mode 100644 index 0000000..c5c4fb1 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/tokens.py @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: MIT + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '' + +class DirectiveToken(Token): + id = '' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '' + +class DocumentEndToken(Token): + id = '' + +class StreamStartToken(Token): + id = '' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '' + +class BlockSequenceStartToken(Token): + id = '' + +class BlockMappingStartToken(Token): + id = '' + +class BlockEndToken(Token): + id = '' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/__init__.py b/collectors/python.d.plugin/python_modules/pyyaml3/__init__.py new file mode 100644 index 0000000..a884b33 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/__init__.py @@ -0,0 +1,313 @@ +# SPDX-License-Identifier: MIT + +from .error import * + +from .tokens import * +from .events import * +from .nodes import * + +from .loader import * +from .dumper import * + +__version__ = '3.11' +try: + from .cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +import io + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = io.StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(metaclass=YAMLObjectMetaclass): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/composer.py b/collectors/python.d.plugin/python_modules/pyyaml3/composer.py new file mode 100644 index 0000000..c418bba --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/composer.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Composer', 'ComposerError'] + +from .error import MarkedYAMLError +from .events import * +from .nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer: + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor, event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor, self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == '!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/constructor.py b/collectors/python.d.plugin/python_modules/pyyaml3/constructor.py new file mode 100644 index 0000000..ee09a7a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/constructor.py @@ -0,0 +1,687 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from .error import * +from .nodes import * + +import collections, datetime, base64, binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor: + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if not isinstance(key, collections.Hashable): + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + @classmethod + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + @classmethod + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return super().construct_scalar(node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return super().construct_mapping(node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + r'''^(?P[0-9][0-9][0-9][0-9]) + -(?P[0-9][0-9]?) + -(?P[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P[0-9][0-9]?) + :(?P[0-9][0-9]) + :(?P[0-9][0-9]) + (?:\.(?P[0-9]*))? + (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) + (?::(?P[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag, + node.start_mark) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node) + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + def construct_python_long(self, node): + return self.construct_yaml_int(node) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError as exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name, exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if '.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = 'builtins' + object_name = name + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name, exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bytes', + Constructor.construct_python_bytes) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py b/collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py new file mode 100644 index 0000000..e6c16d8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from .constructor import * + +from .serializer import * +from .representer import * + +from .resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/dumper.py b/collectors/python.d.plugin/python_modules/pyyaml3/dumper.py new file mode 100644 index 0000000..ba590c6 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/dumper.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/emitter.py b/collectors/python.d.plugin/python_modules/pyyaml3/emitter.py new file mode 100644 index 0000000..d4be65a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/emitter.py @@ -0,0 +1,1138 @@ +# SPDX-License-Identifier: MIT + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from .error import YAMLError +from .events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis: + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter: + + DEFAULT_TAG_PREFIXES = { + '!' : '!', + 'tag:yaml.org,2002:' : '!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator('...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator('---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator('...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator('...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor('&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor('*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator('[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator(']', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator('{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator('}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator('}', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator('-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == '') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = '!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return '%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) + for ch in handle[1:-1]: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch, handle)) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == '!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return ''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == '!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = ''.join(chunks) + if handle: + return '%s%s' % (handle, suffix_text) + else: + return '!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch, anchor)) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith('---') or scalar.startswith('...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in '#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in '?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in ',?[]{}': + flow_indicators = True + if ch == ':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in '\n\x85\u2028\u2029': + line_breaks = True + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == ' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in '\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write('\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = ' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = ' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = '%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = '%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator('\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != ' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == '\'': + data = '\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + self.write_indicator('\'', False) + + ESCAPE_REPLACEMENTS = { + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator('"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' + or (self.allow_unicode + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) + else: + data = '\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == ' ': + data = '\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator('"', False) + + def determine_block_hints(self, text): + hints = '' + if text: + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': + self.write_line_break() + leading_space = (ch == ' ') + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in '\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = ' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/error.py b/collectors/python.d.plugin/python_modules/pyyaml3/error.py new file mode 100644 index 0000000..5fec7d4 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/error.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark: + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end] + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/events.py b/collectors/python.d.plugin/python_modules/pyyaml3/events.py new file mode 100644 index 0000000..283452a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/events.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: MIT + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/loader.py b/collectors/python.d.plugin/python_modules/pyyaml3/loader.py new file mode 100644 index 0000000..7ef6cf8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/loader.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/nodes.py b/collectors/python.d.plugin/python_modules/pyyaml3/nodes.py new file mode 100644 index 0000000..ed2a1b4 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/nodes.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: MIT + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/parser.py b/collectors/python.d.plugin/python_modules/pyyaml3/parser.py new file mode 100644 index 0000000..bcec7f9 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/parser.py @@ -0,0 +1,590 @@ +# SPDX-License-Identifier: MIT + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser: + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + '!': '!', + '!!': 'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == 'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == 'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle, + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle, + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == '!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == '!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == '!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), '', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), '', mark, mark) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/reader.py b/collectors/python.d.plugin/python_modules/pyyaml3/reader.py new file mode 100644 index 0000000..0a515fd --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/reader.py @@ -0,0 +1,193 @@ +# SPDX-License-Identifier: MIT +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from .error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, bytes): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `bytes` object, + # - a `str` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = '' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, str): + self.name = "" + self.check_printable(stream) + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "") + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, bytes): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size=4096): + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/representer.py b/collectors/python.d.plugin/python_modules/pyyaml3/representer.py new file mode 100644 index 0000000..756a18d --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/representer.py @@ -0,0 +1,375 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from .error import * +from .nodes import * + +import datetime, sys, copyreg, types, base64 + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter: + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, str(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + @classmethod + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, bytes, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + + def represent_str(self, data): + return self.represent_scalar('tag:yaml.org,2002:str', data) + + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') + + def represent_bool(self, data): + if data: + value = 'true' + else: + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + else: + value = repr(data).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence('tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping('tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping('tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_complex(self, data): + if data.imag == 0.0: + data = '%r' % data.real + elif data.real == 0.0: + data = '%rj' % data.imag + elif data.imag > 0: + data = '%r+%rj' % (data.real, data.imag) + else: + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') + + def represent_module(self, data): + return self.represent_scalar( + 'tag:yaml.org,2002:python/module:'+data.__name__, '') + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = 'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = 'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = '%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + 'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/resolver.py b/collectors/python.d.plugin/python_modules/pyyaml3/resolver.py new file mode 100644 index 0000000..50945e0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/resolver.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseResolver', 'Resolver'] + +from .error import * +from .nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver: + + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + @classmethod + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, str) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (str, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, str): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list('yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list('-+0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + ['~', 'n', 'N', '']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list('0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/scanner.py b/collectors/python.d.plugin/python_modules/pyyaml3/scanner.py new file mode 100644 index 0000000..b55854e --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/scanner.py @@ -0,0 +1,1449 @@ +# SPDX-License-Identifier: MIT + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from .error import MarkedYAMLError +from .tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey: + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner: + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == '%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == '-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == '.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == '\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == '-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == '?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == ':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == '*': + return self.fetch_alias() + + # Is it an anchor? + if ch == '&': + return self.fetch_anchor() + + # Is it a tag? + if ch == '!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == '|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == '>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == '\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == '\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" % ch, + self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == '\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == 'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == 'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" % self.peek(), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" % self.peek(), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not ('0' <= ch <= '9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch, self.get_mark()) + length = 0 + while '0' <= self.peek(length) <= '9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == ' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != ' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch, self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == '*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == '<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != '>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek(), + self.get_mark()) + self.forward() + elif ch in '\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = '!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = '!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = '!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = '' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != '\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in ' \t' + length = 0 + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != '\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': + if not breaks: + chunks.append(' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == '\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch, self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + while self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', + } + + ESCAPE_CODES = { + 'x': 2, + 'u': 4, + 'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') + self.forward(2) + elif (double and ch == '\'') or (not double and ch in '\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == '\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k)), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(chr(code)) + self.forward(length) + elif ch in '\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch, self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in ' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == '\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in ' \t': + self.forward() + if self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == '#': + break + while True: + ch = self.peek(length) + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in ',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == ':' + and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == '#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in ' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != '!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if ch != '!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + codes = [] + mark = self.get_mark() + while self.peek() == '%': + self.forward() + for k in range(2): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) + self.forward(2) + try: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': + self.forward(2) + else: + self.forward() + return '\n' + elif ch in '\u2028\u2029': + self.forward() + return ch + return '' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/serializer.py b/collectors/python.d.plugin/python_modules/pyyaml3/serializer.py new file mode 100644 index 0000000..1ba2f7f --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/serializer.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Serializer', 'SerializerError'] + +from .error import YAMLError +from .events import * +from .nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer: + + ANCHOR_TEMPLATE = 'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/tokens.py b/collectors/python.d.plugin/python_modules/pyyaml3/tokens.py new file mode 100644 index 0000000..c5c4fb1 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/tokens.py @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: MIT + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '' + +class DirectiveToken(Token): + id = '' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '' + +class DocumentEndToken(Token): + id = '' + +class StreamStartToken(Token): + id = '' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '' + +class BlockSequenceStartToken(Token): + id = '' + +class BlockMappingStartToken(Token): + id = '' + +class BlockEndToken(Token): + id = '' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + diff --git a/collectors/python.d.plugin/python_modules/third_party/__init__.py b/collectors/python.d.plugin/python_modules/third_party/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/third_party/boinc_client.py b/collectors/python.d.plugin/python_modules/third_party/boinc_client.py new file mode 100644 index 0000000..ec21779 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/boinc_client.py @@ -0,0 +1,515 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# client.py - Somewhat higher-level GUI_RPC API for BOINC core client +# +# Copyright (C) 2013 Rodrigo Silva (MestreLion) +# Copyright (C) 2017 Austin S. Hemmelgarn +# +# SPDX-License-Identifier: GPL-3.0 + +# Based on client/boinc_cmd.cpp + +import hashlib +import socket +import sys +import time +from functools import total_ordering +from xml.etree import ElementTree + +GUI_RPC_PASSWD_FILE = "/var/lib/boinc/gui_rpc_auth.cfg" + +GUI_RPC_HOSTNAME = None # localhost +GUI_RPC_PORT = 31416 +GUI_RPC_TIMEOUT = 1 + +class Rpc(object): + ''' Class to perform GUI RPC calls to a BOINC core client. + Usage in a context manager ('with' block) is recommended to ensure + disconnect() is called. Using the same instance for all calls is also + recommended so it reuses the same socket connection + ''' + def __init__(self, hostname="", port=0, timeout=0, text_output=False): + self.hostname = hostname + self.port = port + self.timeout = timeout + self.sock = None + self.text_output = text_output + + @property + def sockargs(self): + return (self.hostname, self.port, self.timeout) + + def __enter__(self): self.connect(*self.sockargs); return self + def __exit__(self, *args): self.disconnect() + + def connect(self, hostname="", port=0, timeout=0): + ''' Connect to (hostname, port) with timeout in seconds. + Hostname defaults to None (localhost), and port to 31416 + Calling multiple times will disconnect previous connection (if any), + and (re-)connect to host. + ''' + if self.sock: + self.disconnect() + + self.hostname = hostname or GUI_RPC_HOSTNAME + self.port = port or GUI_RPC_PORT + self.timeout = timeout or GUI_RPC_TIMEOUT + + self.sock = socket.create_connection(self.sockargs[0:2], self.sockargs[2]) + + def disconnect(self): + ''' Disconnect from host. Calling multiple times is OK (idempotent) + ''' + if self.sock: + self.sock.close() + self.sock = None + + def call(self, request, text_output=None): + ''' Do an RPC call. Pack and send the XML request and return the + unpacked reply. request can be either plain XML text or a + xml.etree.ElementTree.Element object. Return ElementTree.Element + or XML text according to text_output flag. + Will auto-connect if not connected. + ''' + if text_output is None: + text_output = self.text_output + + if not self.sock: + self.connect(*self.sockargs) + + if not isinstance(request, ElementTree.Element): + request = ElementTree.fromstring(request) + + # pack request + end = '\003' + if sys.version_info[0] < 3: + req = "\n{0}\n\n{1}".format(ElementTree.tostring(request).replace(' />', '/>'), end) + else: + req = "\n{0}\n\n{1}".format(ElementTree.tostring(request, encoding='unicode').replace(' />', '/>'), end).encode() + + try: + self.sock.sendall(req) + except (socket.error, socket.herror, socket.gaierror, socket.timeout): + raise + + req = "" + while True: + try: + buf = self.sock.recv(8192) + if not buf: + raise socket.error("No data from socket") + if sys.version_info[0] >= 3: + buf = buf.decode() + except socket.error: + raise + n = buf.find(end) + if not n == -1: break + req += buf + req += buf[:n] + + # unpack reply (remove root tag, ie: first and last lines) + req = '\n'.join(req.strip().rsplit('\n')[1:-1]) + + if text_output: + return req + else: + return ElementTree.fromstring(req) + +def setattrs_from_xml(obj, xml, attrfuncdict={}): + ''' Helper to set values for attributes of a class instance by mapping + matching tags from a XML file. + attrfuncdict is a dict of functions to customize value data type of + each attribute. It falls back to simple int/float/bool/str detection + based on values defined in __init__(). This would not be needed if + Boinc used standard RPC protocol, which includes data type in XML. + ''' + if not isinstance(xml, ElementTree.Element): + xml = ElementTree.fromstring(xml) + for e in list(xml): + if hasattr(obj, e.tag): + attr = getattr(obj, e.tag) + attrfunc = attrfuncdict.get(e.tag, None) + if attrfunc is None: + if isinstance(attr, bool): attrfunc = parse_bool + elif isinstance(attr, int): attrfunc = parse_int + elif isinstance(attr, float): attrfunc = parse_float + elif isinstance(attr, str): attrfunc = parse_str + elif isinstance(attr, list): attrfunc = parse_list + else: attrfunc = lambda x: x + setattr(obj, e.tag, attrfunc(e)) + else: + pass + #print "class missing attribute '%s': %r" % (e.tag, obj) + return obj + + +def parse_bool(e): + ''' Helper to convert ElementTree.Element.text to boolean. + Treat '' (and '[[:blank:]]') as True + Treat '0' and 'false' as False + ''' + if e.text is None: + return True + else: + return bool(e.text) and not e.text.strip().lower() in ('0', 'false') + + +def parse_int(e): + ''' Helper to convert ElementTree.Element.text to integer. + Treat '' (and '') as 0 + ''' + # int(float()) allows casting to int a value expressed as float in XML + return 0 if e.text is None else int(float(e.text.strip())) + + +def parse_float(e): + ''' Helper to convert ElementTree.Element.text to float. ''' + return 0.0 if e.text is None else float(e.text.strip()) + + +def parse_str(e): + ''' Helper to convert ElementTree.Element.text to string. ''' + return "" if e.text is None else e.text.strip() + + +def parse_list(e): + ''' Helper to convert ElementTree.Element to list. For now, simply return + the list of root element's children + ''' + return list(e) + + +class Enum(object): + UNKNOWN = -1 # Not in original API + + @classmethod + def name(cls, value): + ''' Quick-and-dirty fallback for getting the "name" of an enum item ''' + + # value as string, if it matches an enum attribute. + # Allows short usage as Enum.name("VALUE") besides Enum.name(Enum.VALUE) + if hasattr(cls, str(value)): + return cls.name(getattr(cls, value, None)) + + # value not handled in subclass name() + for k, v in cls.__dict__.items(): + if v == value: + return k.lower().replace('_', ' ') + + # value not found + return cls.name(Enum.UNKNOWN) + + +class CpuSched(Enum): + ''' values of ACTIVE_TASK::scheduler_state and ACTIVE_TASK::next_scheduler_state + "SCHEDULED" is synonymous with "executing" except when CPU throttling + is in use. + ''' + UNINITIALIZED = 0 + PREEMPTED = 1 + SCHEDULED = 2 + + +class ResultState(Enum): + ''' Values of RESULT::state in client. + THESE MUST BE IN NUMERICAL ORDER + (because of the > comparison in RESULT::computing_done()) + see html/inc/common_defs.inc + ''' + NEW = 0 + #// New result + FILES_DOWNLOADING = 1 + #// Input files for result (WU, app version) are being downloaded + FILES_DOWNLOADED = 2 + #// Files are downloaded, result can be (or is being) computed + COMPUTE_ERROR = 3 + #// computation failed; no file upload + FILES_UPLOADING = 4 + #// Output files for result are being uploaded + FILES_UPLOADED = 5 + #// Files are uploaded, notify scheduling server at some point + ABORTED = 6 + #// result was aborted + UPLOAD_FAILED = 7 + #// some output file permanent failure + + +class Process(Enum): + ''' values of ACTIVE_TASK::task_state ''' + UNINITIALIZED = 0 + #// process doesn't exist yet + EXECUTING = 1 + #// process is running, as far as we know + SUSPENDED = 9 + #// we've sent it a "suspend" message + ABORT_PENDING = 5 + #// process exceeded limits; send "abort" message, waiting to exit + QUIT_PENDING = 8 + #// we've sent it a "quit" message, waiting to exit + COPY_PENDING = 10 + #// waiting for async file copies to finish + + +class _Struct(object): + ''' base helper class with common methods for all classes derived from + BOINC's C++ structs + ''' + @classmethod + def parse(cls, xml): + return setattrs_from_xml(cls(), xml) + + def __str__(self, indent=0): + buf = '{0}{1}:\n'.format('\t' * indent, self.__class__.__name__) + for attr in self.__dict__: + value = getattr(self, attr) + if isinstance(value, list): + buf += '{0}\t{1} [\n'.format('\t' * indent, attr) + for v in value: buf += '\t\t{0}\t\t,\n'.format(v) + buf += '\t]\n' + else: + buf += '{0}\t{1}\t{2}\n'.format('\t' * indent, + attr, + value.__str__(indent+2) + if isinstance(value, _Struct) + else repr(value)) + return buf + + +@total_ordering +class VersionInfo(_Struct): + def __init__(self, major=0, minor=0, release=0): + self.major = major + self.minor = minor + self.release = release + + @property + def _tuple(self): + return (self.major, self.minor, self.release) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self._tuple == other._tuple + + def __ne__(self, other): + return not self.__eq__(other) + + def __gt__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self._tuple > other._tuple + + def __str__(self): + return "{0}.{1}.{2}".format(self.major, self.minor, self.release) + + def __repr__(self): + return "{0}{1}".format(self.__class__.__name__, self._tuple) + + +class Result(_Struct): + ''' Also called "task" in some contexts ''' + def __init__(self): + # Names and values follow lib/gui_rpc_client.h @ RESULT + # Order too, except when grouping contradicts client/result.cpp + # RESULT::write_gui(), then XML order is used. + + self.name = "" + self.wu_name = "" + self.version_num = 0 + #// identifies the app used + self.plan_class = "" + self.project_url = "" # from PROJECT.master_url + self.report_deadline = 0.0 # seconds since epoch + self.received_time = 0.0 # seconds since epoch + #// when we got this from server + self.ready_to_report = False + #// we're ready to report this result to the server; + #// either computation is done and all the files have been uploaded + #// or there was an error + self.got_server_ack = False + #// we've received the ack for this result from the server + self.final_cpu_time = 0.0 + self.final_elapsed_time = 0.0 + self.state = ResultState.NEW + self.estimated_cpu_time_remaining = 0.0 + #// actually, estimated elapsed time remaining + self.exit_status = 0 + #// return value from the application + self.suspended_via_gui = False + self.project_suspended_via_gui = False + self.edf_scheduled = False + #// temporary used to tell GUI that this result is deadline-scheduled + self.coproc_missing = False + #// a coproc needed by this job is missing + #// (e.g. because user removed their GPU board). + self.scheduler_wait = False + self.scheduler_wait_reason = "" + self.network_wait = False + self.resources = "" + #// textual description of resources used + + #// the following defined if active + # XML is generated in client/app.cpp ACTIVE_TASK::write_gui() + self.active_task = False + self.active_task_state = Process.UNINITIALIZED + self.app_version_num = 0 + self.slot = -1 + self.pid = 0 + self.scheduler_state = CpuSched.UNINITIALIZED + self.checkpoint_cpu_time = 0.0 + self.current_cpu_time = 0.0 + self.fraction_done = 0.0 + self.elapsed_time = 0.0 + self.swap_size = 0 + self.working_set_size_smoothed = 0.0 + self.too_large = False + self.needs_shmem = False + self.graphics_exec_path = "" + self.web_graphics_url = "" + self.remote_desktop_addr = "" + self.slot_path = "" + #// only present if graphics_exec_path is + + # The following are not in original API, but are present in RPC XML reply + self.completed_time = 0.0 + #// time when ready_to_report was set + self.report_immediately = False + self.working_set_size = 0 + self.page_fault_rate = 0.0 + #// derived by higher-level code + + # The following are in API, but are NEVER in RPC XML reply. Go figure + self.signal = 0 + + self.app = None # APP* + self.wup = None # WORKUNIT* + self.project = None # PROJECT* + self.avp = None # APP_VERSION* + + @classmethod + def parse(cls, xml): + if not isinstance(xml, ElementTree.Element): + xml = ElementTree.fromstring(xml) + + # parse main XML + result = super(Result, cls).parse(xml) + + # parse '' children + active_task = xml.find('active_task') + if active_task is None: + result.active_task = False # already the default after __init__() + else: + result.active_task = True # already the default after main parse + result = setattrs_from_xml(result, active_task) + + #// if CPU time is nonzero but elapsed time is zero, + #// we must be talking to an old client. + #// Set elapsed = CPU + #// (easier to deal with this here than in the manager) + if result.current_cpu_time != 0 and result.elapsed_time == 0: + result.elapsed_time = result.current_cpu_time + + if result.final_cpu_time != 0 and result.final_elapsed_time == 0: + result.final_elapsed_time = result.final_cpu_time + + return result + + def __str__(self): + buf = '{0}:\n'.format(self.__class__.__name__) + for attr in self.__dict__: + value = getattr(self, attr) + if attr in ['received_time', 'report_deadline']: + value = time.ctime(value) + buf += '\t{0}\t{1}\n'.format(attr, value) + return buf + + +class BoincClient(object): + + def __init__(self, host="", port=0, passwd=None): + self.hostname = host + self.port = port + self.passwd = passwd + self.rpc = Rpc(text_output=False) + self.version = None + self.authorized = False + + # Informative, not authoritative. Records status of *last* RPC call, + # but does not infer success about the *next* one. + # Thus, it should be read *after* an RPC call, not prior to one + self.connected = False + + def __enter__(self): self.connect(); return self + def __exit__(self, *args): self.disconnect() + + def connect(self): + try: + self.rpc.connect(self.hostname, self.port) + self.connected = True + except socket.error: + self.connected = False + return + self.authorized = self.authorize(self.passwd) + self.version = self.exchange_versions() + + def disconnect(self): + self.rpc.disconnect() + + def authorize(self, password): + ''' Request authorization. If password is None and we are connecting + to localhost, try to read password from the local config file + GUI_RPC_PASSWD_FILE. If file can't be read (not found or no + permission to read), try to authorize with a blank password. + If authorization is requested and fails, all subsequent calls + will be refused with socket.error 'Connection reset by peer' (104). + Since most local calls do no require authorization, do not attempt + it if you're not sure about the password. + ''' + if password is None and not self.hostname: + password = read_gui_rpc_password() or "" + nonce = self.rpc.call('').text + authhash = hashlib.md5('{0}{1}'.format(nonce, password).encode()).hexdigest().lower() + reply = self.rpc.call('{0}'.format(authhash)) + + if reply.tag == 'authorized': + return True + else: + return False + + def exchange_versions(self): + ''' Return VersionInfo instance with core client version info ''' + return VersionInfo.parse(self.rpc.call('')) + + def get_tasks(self): + ''' Same as get_results(active_only=False) ''' + return self.get_results(False) + + def get_results(self, active_only=False): + ''' Get a list of results. + Those that are in progress will have information such as CPU time + and fraction done. Each result includes a name; + Use CC_STATE::lookup_result() to find this result in the current static state; + if it's not there, call get_state() again. + ''' + reply = self.rpc.call("{0}".format(1 if active_only else 0)) + if not reply.tag == 'results': + return [] + + results = [] + for item in list(reply): + results.append(Result.parse(item)) + + return results + + +def read_gui_rpc_password(): + ''' Read password string from GUI_RPC_PASSWD_FILE file, trim the last CR + (if any), and return it + ''' + try: + with open(GUI_RPC_PASSWD_FILE, 'r') as f: + buf = f.read() + if buf.endswith('\n'): return buf[:-1] # trim last CR + else: return buf + except IOError: + # Permission denied or File not found. + pass diff --git a/collectors/python.d.plugin/python_modules/third_party/filelock.py b/collectors/python.d.plugin/python_modules/third_party/filelock.py new file mode 100644 index 0000000..4c98167 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/filelock.py @@ -0,0 +1,451 @@ +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to + +""" +A platform independent file lock that supports the with-statement. +""" + + +# Modules +# ------------------------------------------------ +import logging +import os +import threading +import time +try: + import warnings +except ImportError: + warnings = None + +try: + import msvcrt +except ImportError: + msvcrt = None + +try: + import fcntl +except ImportError: + fcntl = None + + +# Backward compatibility +# ------------------------------------------------ +try: + TimeoutError +except NameError: + TimeoutError = OSError + + +# Data +# ------------------------------------------------ +__all__ = [ + "Timeout", + "BaseFileLock", + "WindowsFileLock", + "UnixFileLock", + "SoftFileLock", + "FileLock" +] + +__version__ = "3.0.12" + + +_logger = None +def logger(): + """Returns the logger instance used in this module.""" + global _logger + _logger = _logger or logging.getLogger(__name__) + return _logger + + +# Exceptions +# ------------------------------------------------ +class Timeout(TimeoutError): + """ + Raised when the lock could not be acquired in *timeout* + seconds. + """ + + def __init__(self, lock_file): + """ + """ + #: The path of the file lock. + self.lock_file = lock_file + return None + + def __str__(self): + temp = "The file lock '{}' could not be acquired."\ + .format(self.lock_file) + return temp + + +# Classes +# ------------------------------------------------ + +# This is a helper class which is returned by :meth:`BaseFileLock.acquire` +# and wraps the lock to make sure __enter__ is not called twice when entering +# the with statement. +# If we would simply return *self*, the lock would be acquired again +# in the *__enter__* method of the BaseFileLock, but not released again +# automatically. +# +# :seealso: issue #37 (memory leak) +class _Acquire_ReturnProxy(object): + + def __init__(self, lock): + self.lock = lock + return None + + def __enter__(self): + return self.lock + + def __exit__(self, exc_type, exc_value, traceback): + self.lock.release() + return None + + +class BaseFileLock(object): + """ + Implements the base class of a file lock. + """ + + def __init__(self, lock_file, timeout = -1): + """ + """ + # The path to the lock file. + self._lock_file = lock_file + + # The file descriptor for the *_lock_file* as it is returned by the + # os.open() function. + # This file lock is only NOT None, if the object currently holds the + # lock. + self._lock_file_fd = None + + # The default timeout value. + self.timeout = timeout + + # We use this lock primarily for the lock counter. + self._thread_lock = threading.Lock() + + # The lock counter is used for implementing the nested locking + # mechanism. Whenever the lock is acquired, the counter is increased and + # the lock is only released, when this value is 0 again. + self._lock_counter = 0 + return None + + @property + def lock_file(self): + """ + The path to the lock file. + """ + return self._lock_file + + @property + def timeout(self): + """ + You can set a default timeout for the filelock. It will be used as + fallback value in the acquire method, if no timeout value (*None*) is + given. + + If you want to disable the timeout, set it to a negative value. + + A timeout of 0 means, that there is exactly one attempt to acquire the + file lock. + + .. versionadded:: 2.0.0 + """ + return self._timeout + + @timeout.setter + def timeout(self, value): + """ + """ + self._timeout = float(value) + return None + + # Platform dependent locking + # -------------------------------------------- + + def _acquire(self): + """ + Platform dependent. If the file lock could be + acquired, self._lock_file_fd holds the file descriptor + of the lock file. + """ + raise NotImplementedError() + + def _release(self): + """ + Releases the lock and sets self._lock_file_fd to None. + """ + raise NotImplementedError() + + # Platform independent methods + # -------------------------------------------- + + @property + def is_locked(self): + """ + True, if the object holds the file lock. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._lock_file_fd is not None + + def acquire(self, timeout=None, poll_intervall=0.05): + """ + Acquires the file lock or fails with a :exc:`Timeout` error. + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + :arg float timeout: + The maximum time waited for the file lock. + If ``timeout < 0``, there is no timeout and this method will + block until the lock could be acquired. + If ``timeout`` is None, the default :attr:`~timeout` is used. + + :arg float poll_intervall: + We check once in *poll_intervall* seconds if we can acquire the + file lock. + + :raises Timeout: + if the lock could not be acquired in *timeout* seconds. + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self.timeout + + # Increment the number right at the beginning. + # We can still undo it, if something fails. + with self._thread_lock: + self._lock_counter += 1 + + lock_id = id(self) + lock_filename = self._lock_file + start_time = time.time() + try: + while True: + with self._thread_lock: + if not self.is_locked: + logger().debug('Attempting to acquire lock %s on %s', lock_id, lock_filename) + self._acquire() + + if self.is_locked: + logger().info('Lock %s acquired on %s', lock_id, lock_filename) + break + elif timeout >= 0 and time.time() - start_time > timeout: + logger().debug('Timeout on acquiring lock %s on %s', lock_id, lock_filename) + raise Timeout(self._lock_file) + else: + logger().debug( + 'Lock %s not acquired on %s, waiting %s seconds ...', + lock_id, lock_filename, poll_intervall + ) + time.sleep(poll_intervall) + except: + # Something did go wrong, so decrement the counter. + with self._thread_lock: + self._lock_counter = max(0, self._lock_counter - 1) + + raise + return _Acquire_ReturnProxy(lock = self) + + def release(self, force = False): + """ + Releases the file lock. + + Please note, that the lock is only completly released, if the lock + counter is 0. + + Also note, that the lock file itself is not automatically deleted. + + :arg bool force: + If true, the lock counter is ignored and the lock is released in + every case. + """ + with self._thread_lock: + + if self.is_locked: + self._lock_counter -= 1 + + if self._lock_counter == 0 or force: + lock_id = id(self) + lock_filename = self._lock_file + + logger().debug('Attempting to release lock %s on %s', lock_id, lock_filename) + self._release() + self._lock_counter = 0 + logger().info('Lock %s released on %s', lock_id, lock_filename) + + return None + + def __enter__(self): + self.acquire() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.release() + return None + + def __del__(self): + self.release(force = True) + return None + + +# Windows locking mechanism +# ~~~~~~~~~~~~~~~~~~~~~~~~~ + +class WindowsFileLock(BaseFileLock): + """ + Uses the :func:`msvcrt.locking` function to hard lock the lock file on + windows systems. + """ + + def _acquire(self): + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + + try: + fd = os.open(self._lock_file, open_mode) + except OSError: + pass + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except (IOError, OSError): + os.close(fd) + else: + self._lock_file_fd = fd + return None + + def _release(self): + fd = self._lock_file_fd + self._lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + try: + os.remove(self._lock_file) + # Probably another instance of the application + # that acquired the file lock. + except OSError: + pass + return None + +# Unix locking mechanism +# ~~~~~~~~~~~~~~~~~~~~~~ + +class UnixFileLock(BaseFileLock): + """ + Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems. + """ + + def _acquire(self): + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + fd = os.open(self._lock_file, open_mode) + + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except (IOError, OSError): + os.close(fd) + else: + self._lock_file_fd = fd + return None + + def _release(self): + # Do not remove the lockfile: + # + # https://github.com/benediktschmitt/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = self._lock_file_fd + self._lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + return None + +# Soft lock +# ~~~~~~~~~ + +class SoftFileLock(BaseFileLock): + """ + Simply watches the existence of the lock file. + """ + + def _acquire(self): + open_mode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_TRUNC + try: + fd = os.open(self._lock_file, open_mode) + except (IOError, OSError): + pass + else: + self._lock_file_fd = fd + return None + + def _release(self): + os.close(self._lock_file_fd) + self._lock_file_fd = None + + try: + os.remove(self._lock_file) + # The file is already deleted and that's what we want. + except OSError: + pass + return None + + +# Platform filelock +# ~~~~~~~~~~~~~~~~~ + +#: Alias for the lock, which should be used for the current platform. On +#: Windows, this is an alias for :class:`WindowsFileLock`, on Unix for +#: :class:`UnixFileLock` and otherwise for :class:`SoftFileLock`. +FileLock = None + +if msvcrt: + FileLock = WindowsFileLock +elif fcntl: + FileLock = UnixFileLock +else: + FileLock = SoftFileLock + + if warnings is not None: + warnings.warn("only soft file lock is available") diff --git a/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py b/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py new file mode 100644 index 0000000..f873eac --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py @@ -0,0 +1,327 @@ +# SPDX-License-Identifier: LGPL-2.1 +""" +@package sensors.py +Python Bindings for libsensors3 + +use the documentation of libsensors for the low level API. +see example.py for high level API usage. + +@author: Pavel Rojtberg (http://www.rojtberg.net) +@see: https://github.com/paroj/sensors.py +@copyright: LGPLv2 (same as libsensors) +""" + +from ctypes import * +import ctypes.util + +_libc = cdll.LoadLibrary(ctypes.util.find_library("c")) +# see https://github.com/paroj/sensors.py/issues/1 +_libc.free.argtypes = [c_void_p] + +_hdl = cdll.LoadLibrary(ctypes.util.find_library("sensors")) + +version = c_char_p.in_dll(_hdl, "libsensors_version").value.decode("ascii") + + +class SensorsError(Exception): + pass + + +class ErrorWildcards(SensorsError): + pass + + +class ErrorNoEntry(SensorsError): + pass + + +class ErrorAccessRead(SensorsError, OSError): + pass + + +class ErrorKernel(SensorsError, OSError): + pass + + +class ErrorDivZero(SensorsError, ZeroDivisionError): + pass + + +class ErrorChipName(SensorsError): + pass + + +class ErrorBusName(SensorsError): + pass + + +class ErrorParse(SensorsError): + pass + + +class ErrorAccessWrite(SensorsError, OSError): + pass + + +class ErrorIO(SensorsError, IOError): + pass + + +class ErrorRecursion(SensorsError): + pass + + +_ERR_MAP = { + 1: ErrorWildcards, + 2: ErrorNoEntry, + 3: ErrorAccessRead, + 4: ErrorKernel, + 5: ErrorDivZero, + 6: ErrorChipName, + 7: ErrorBusName, + 8: ErrorParse, + 9: ErrorAccessWrite, + 10: ErrorIO, + 11: ErrorRecursion +} + + +def raise_sensor_error(errno, message=''): + raise _ERR_MAP[abs(errno)](message) + + +class bus_id(Structure): + _fields_ = [("type", c_short), + ("nr", c_short)] + + +class chip_name(Structure): + _fields_ = [("prefix", c_char_p), + ("bus", bus_id), + ("addr", c_int), + ("path", c_char_p)] + + +class feature(Structure): + _fields_ = [("name", c_char_p), + ("number", c_int), + ("type", c_int)] + + # sensors_feature_type + IN = 0x00 + FAN = 0x01 + TEMP = 0x02 + POWER = 0x03 + ENERGY = 0x04 + CURR = 0x05 + HUMIDITY = 0x06 + MAX_MAIN = 0x7 + VID = 0x10 + INTRUSION = 0x11 + MAX_OTHER = 0x12 + BEEP_ENABLE = 0x18 + + +class subfeature(Structure): + _fields_ = [("name", c_char_p), + ("number", c_int), + ("type", c_int), + ("mapping", c_int), + ("flags", c_uint)] + + +_hdl.sensors_get_detected_chips.restype = POINTER(chip_name) +_hdl.sensors_get_features.restype = POINTER(feature) +_hdl.sensors_get_all_subfeatures.restype = POINTER(subfeature) +_hdl.sensors_get_label.restype = c_void_p # return pointer instead of str so we can free it +_hdl.sensors_get_adapter_name.restype = c_char_p # docs do not say whether to free this or not +_hdl.sensors_strerror.restype = c_char_p + +### RAW API ### +MODE_R = 1 +MODE_W = 2 +COMPUTE_MAPPING = 4 + + +def init(cfg_file=None): + file = _libc.fopen(cfg_file.encode("utf-8"), "r") if cfg_file is not None else None + + result = _hdl.sensors_init(file) + if result != 0: + raise_sensor_error(result, "sensors_init failed") + + if file is not None: + _libc.fclose(file) + + +def cleanup(): + _hdl.sensors_cleanup() + + +def parse_chip_name(orig_name): + ret = chip_name() + err = _hdl.sensors_parse_chip_name(orig_name.encode("utf-8"), byref(ret)) + + if err < 0: + raise_sensor_error(err, strerror(err)) + + return ret + + +def strerror(errnum): + return _hdl.sensors_strerror(errnum).decode("utf-8") + + +def free_chip_name(chip): + _hdl.sensors_free_chip_name(byref(chip)) + + +def get_detected_chips(match, nr): + """ + @return: (chip, next nr to query) + """ + _nr = c_int(nr) + + if match is not None: + match = byref(match) + + chip = _hdl.sensors_get_detected_chips(match, byref(_nr)) + chip = chip.contents if bool(chip) else None + return chip, _nr.value + + +def chip_snprintf_name(chip, buffer_size=200): + """ + @param buffer_size defaults to the size used in the sensors utility + """ + ret = create_string_buffer(buffer_size) + err = _hdl.sensors_snprintf_chip_name(ret, buffer_size, byref(chip)) + + if err < 0: + raise_sensor_error(err, strerror(err)) + + return ret.value.decode("utf-8") + + +def do_chip_sets(chip): + """ + @attention this function was not tested + """ + err = _hdl.sensors_do_chip_sets(byref(chip)) + if err < 0: + raise_sensor_error(err, strerror(err)) + + +def get_adapter_name(bus): + return _hdl.sensors_get_adapter_name(byref(bus)).decode("utf-8") + + +def get_features(chip, nr): + """ + @return: (feature, next nr to query) + """ + _nr = c_int(nr) + feature = _hdl.sensors_get_features(byref(chip), byref(_nr)) + feature = feature.contents if bool(feature) else None + return feature, _nr.value + + +def get_label(chip, feature): + ptr = _hdl.sensors_get_label(byref(chip), byref(feature)) + val = cast(ptr, c_char_p).value.decode("utf-8") + _libc.free(ptr) + return val + + +def get_all_subfeatures(chip, feature, nr): + """ + @return: (subfeature, next nr to query) + """ + _nr = c_int(nr) + subfeature = _hdl.sensors_get_all_subfeatures(byref(chip), byref(feature), byref(_nr)) + subfeature = subfeature.contents if bool(subfeature) else None + return subfeature, _nr.value + + +def get_value(chip, subfeature_nr): + val = c_double() + err = _hdl.sensors_get_value(byref(chip), subfeature_nr, byref(val)) + if err < 0: + raise_sensor_error(err, strerror(err)) + return val.value + + +def set_value(chip, subfeature_nr, value): + """ + @attention this function was not tested + """ + val = c_double(value) + err = _hdl.sensors_set_value(byref(chip), subfeature_nr, byref(val)) + if err < 0: + raise_sensor_error(err, strerror(err)) + + +### Convenience API ### +class ChipIterator: + def __init__(self, match=None): + self.match = parse_chip_name(match) if match is not None else None + self.nr = 0 + + def __iter__(self): + return self + + def __next__(self): + chip, self.nr = get_detected_chips(self.match, self.nr) + + if chip is None: + raise StopIteration + + return chip + + def __del__(self): + if self.match is not None: + free_chip_name(self.match) + + def next(self): # python2 compability + return self.__next__() + + +class FeatureIterator: + def __init__(self, chip): + self.chip = chip + self.nr = 0 + + def __iter__(self): + return self + + def __next__(self): + feature, self.nr = get_features(self.chip, self.nr) + + if feature is None: + raise StopIteration + + return feature + + def next(self): # python2 compability + return self.__next__() + + +class SubFeatureIterator: + def __init__(self, chip, feature): + self.chip = chip + self.feature = feature + self.nr = 0 + + def __iter__(self): + return self + + def __next__(self): + subfeature, self.nr = get_all_subfeatures(self.chip, self.feature, self.nr) + + if subfeature is None: + raise StopIteration + + return subfeature + + def next(self): # python2 compability + return self.__next__() diff --git a/collectors/python.d.plugin/python_modules/third_party/mcrcon.py b/collectors/python.d.plugin/python_modules/third_party/mcrcon.py new file mode 100644 index 0000000..a65a304 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/mcrcon.py @@ -0,0 +1,74 @@ +# Minecraft Remote Console module. +# +# Copyright (C) 2015 Barnaby Gale +# +# SPDX-License-Identifier: MIT + +import socket +import select +import struct +import time + + +class MCRconException(Exception): + pass + + +class MCRcon(object): + socket = None + + def connect(self, host, port, password): + if self.socket is not None: + raise MCRconException("Already connected") + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.settimeout(0.9) + self.socket.connect((host, port)) + self.send(3, password) + + def disconnect(self): + if self.socket is None: + raise MCRconException("Already disconnected") + self.socket.close() + self.socket = None + + def read(self, length): + data = b"" + while len(data) < length: + data += self.socket.recv(length - len(data)) + return data + + def send(self, out_type, out_data): + if self.socket is None: + raise MCRconException("Must connect before sending data") + + # Send a request packet + out_payload = struct.pack(' + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +""" +import time + + +__all__ = ('monotonic',) + + +try: + monotonic = time.monotonic +except AttributeError: + import ctypes + import ctypes.util + import os + import sys + import threading + + + def clock_clock_gettime_c_library(): + return ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True).clock_gettime + + + def clock_clock_gettime_rt_library(): + return ctypes.CDLL(ctypes.util.find_library('rt'), use_errno=True).clock_gettime + + + def clock_clock_gettime_c_library_synology6(): + return ctypes.CDLL('/usr/lib/libc.so.6', use_errno=True).clock_gettime + + + def clock_clock_gettime_rt_library_synology6(): + return ctypes.CDLL('/usr/lib/librt.so.1', use_errno=True).clock_gettime + + + def clock_gettime_linux(): + # see https://github.com/netdata/netdata/issues/7976 + order = [ + clock_clock_gettime_c_library, + clock_clock_gettime_rt_library, + clock_clock_gettime_c_library_synology6, + clock_clock_gettime_rt_library_synology6, + ] + + for gettime in order: + try: + return gettime() + except (RuntimeError, AttributeError, OSError): + continue + raise RuntimeError('can not find c and rt libraries') + + + try: + if sys.platform == 'darwin': # OS X, iOS + # See Technical Q&A QA1398 of the Mac Developer Library: + # + libc = ctypes.CDLL('/usr/lib/libc.dylib', use_errno=True) + + class mach_timebase_info_data_t(ctypes.Structure): + """System timebase info. Defined in .""" + _fields_ = (('numer', ctypes.c_uint32), + ('denom', ctypes.c_uint32)) + + mach_absolute_time = libc.mach_absolute_time + mach_absolute_time.restype = ctypes.c_uint64 + + timebase = mach_timebase_info_data_t() + libc.mach_timebase_info(ctypes.byref(timebase)) + ticks_per_second = timebase.numer / timebase.denom * 1.0e9 + + def monotonic(): + """Monotonic clock, cannot go backward.""" + return mach_absolute_time() / ticks_per_second + + elif sys.platform.startswith('win32') or sys.platform.startswith('cygwin'): + if sys.platform.startswith('cygwin'): + # Note: cygwin implements clock_gettime (CLOCK_MONOTONIC = 4) since + # version 1.7.6. Using raw WinAPI for maximum version compatibility. + + # Ugly hack using the wrong calling convention (in 32-bit mode) + # because ctypes has no windll under cygwin (and it also seems that + # the code letting you select stdcall in _ctypes doesn't exist under + # the preprocessor definitions relevant to cygwin). + # This is 'safe' because: + # 1. The ABI of GetTickCount and GetTickCount64 is identical for + # both calling conventions because they both have no parameters. + # 2. libffi masks the problem because after making the call it doesn't + # touch anything through esp and epilogue code restores a correct + # esp from ebp afterwards. + try: + kernel32 = ctypes.cdll.kernel32 + except OSError: # 'No such file or directory' + kernel32 = ctypes.cdll.LoadLibrary('kernel32.dll') + else: + kernel32 = ctypes.windll.kernel32 + + GetTickCount64 = getattr(kernel32, 'GetTickCount64', None) + if GetTickCount64: + # Windows Vista / Windows Server 2008 or newer. + GetTickCount64.restype = ctypes.c_ulonglong + + def monotonic(): + """Monotonic clock, cannot go backward.""" + return GetTickCount64() / 1000.0 + + else: + # Before Windows Vista. + GetTickCount = kernel32.GetTickCount + GetTickCount.restype = ctypes.c_uint32 + + get_tick_count_lock = threading.Lock() + get_tick_count_last_sample = 0 + get_tick_count_wraparounds = 0 + + def monotonic(): + """Monotonic clock, cannot go backward.""" + global get_tick_count_last_sample + global get_tick_count_wraparounds + + with get_tick_count_lock: + current_sample = GetTickCount() + if current_sample < get_tick_count_last_sample: + get_tick_count_wraparounds += 1 + get_tick_count_last_sample = current_sample + + final_milliseconds = get_tick_count_wraparounds << 32 + final_milliseconds += get_tick_count_last_sample + return final_milliseconds / 1000.0 + + else: + clock_gettime = clock_gettime_linux() + + class timespec(ctypes.Structure): + """Time specification, as described in clock_gettime(3).""" + _fields_ = (('tv_sec', ctypes.c_long), + ('tv_nsec', ctypes.c_long)) + + if sys.platform.startswith('linux'): + CLOCK_MONOTONIC = 1 + elif sys.platform.startswith('freebsd'): + CLOCK_MONOTONIC = 4 + elif sys.platform.startswith('sunos5'): + CLOCK_MONOTONIC = 4 + elif 'bsd' in sys.platform: + CLOCK_MONOTONIC = 3 + elif sys.platform.startswith('aix'): + CLOCK_MONOTONIC = ctypes.c_longlong(10) + + def monotonic(): + """Monotonic clock, cannot go backward.""" + ts = timespec() + if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(ts)): + errno = ctypes.get_errno() + raise OSError(errno, os.strerror(errno)) + return ts.tv_sec + ts.tv_nsec / 1.0e9 + + # Perform a sanity-check. + if monotonic() - monotonic() > 0: + raise ValueError('monotonic() is not monotonic!') + + except Exception as e: + raise RuntimeError('no suitable implementation for this system: ' + repr(e)) diff --git a/collectors/python.d.plugin/python_modules/third_party/ordereddict.py b/collectors/python.d.plugin/python_modules/third_party/ordereddict.py new file mode 100644 index 0000000..589401b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/ordereddict.py @@ -0,0 +1,110 @@ +# Copyright (c) 2009 Raymond Hettinger +# +# SPDX-License-Identifier: MIT + +from UserDict import DictMixin + + +class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = reversed(self).next() + else: + key = iter(self).next() + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return self.__class__, (items,), inst_dict + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(self.items(), other.items()): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/collectors/python.d.plugin/python_modules/urllib3/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/__init__.py new file mode 100644 index 0000000..3add848 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/__init__.py @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: MIT +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +from __future__ import absolute_import +import warnings + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url +) + +from . import exceptions +from .filepost import encode_multipart_formdata +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: # Python 2.7+ + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '1.21.1' + +__all__ = ( + 'HTTPConnectionPool', + 'HTTPSConnectionPool', + 'PoolManager', + 'ProxyManager', + 'HTTPResponse', + 'Retry', + 'Timeout', + 'add_stderr_logger', + 'connection_from_url', + 'disable_warnings', + 'encode_multipart_formdata', + 'get_host', + 'make_headers', + 'proxy_from_url', +) + +logging.getLogger(__name__).addHandler(NullHandler()) + + +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added a stderr logging handler to logger: %s', __name__) + return handler + + +# ... Clean up. +del NullHandler + + +# All warning filters *must* be appended unless you're really certain that they +# shouldn't be: otherwise, it's very hard for users to use most Python +# mechanisms to silence them. +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning, + append=True) +# SNIMissingWarnings should go off only once. +warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True) + + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/collectors/python.d.plugin/python_modules/urllib3/_collections.py b/collectors/python.d.plugin/python_modules/urllib3/_collections.py new file mode 100644 index 0000000..2a6b3ec --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/_collections.py @@ -0,0 +1,320 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import + +try: + from collections import Mapping, MutableMapping +except ImportError: + from collections.abc import Mapping, MutableMapping + +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + + +try: # Python 2.7+ + from collections import OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict +from .packages.six import iterkeys, itervalues, PY3 + + +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] + + +_Null = object() + + +class RecentlyUsedContainer(MutableMapping): + """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. + + :param maxsize: + Maximum number of recent elements to retain. + + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ + + ContainerCls = OrderedDict + + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func + + self._container = self.ContainerCls() + self.lock = RLock() + + def __getitem__(self, key): + # Re-insert the item, moving it to the end of the eviction line. + with self.lock: + item = self._container.pop(key) + self._container[key] = item + return item + + def __setitem__(self, key, value): + evicted_value = _Null + with self.lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value + + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) + + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) + + def __delitem__(self, key): + with self.lock: + value = self._container.pop(key) + + if self.dispose_func: + self.dispose_func(value) + + def __len__(self): + with self.lock: + return len(self._container) + + def __iter__(self): + raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') + + def clear(self): + with self.lock: + # Copy pointers to all values, then wipe the mapping + values = list(itervalues(self._container)) + self._container.clear() + + if self.dispose_func: + for value in values: + self.dispose_func(value) + + def keys(self): + with self.lock: + return list(iterkeys(self._container)) + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 7230. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + """ + + def __init__(self, headers=None, **kwargs): + super(HTTPHeaderDict, self).__init__() + self._container = OrderedDict() + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + self._container[key.lower()] = [key, val] + return self._container[key.lower()] + + def __getitem__(self, key): + val = self._container[key.lower()] + return ', '.join(val[1:]) + + def __delitem__(self, key): + del self._container[key.lower()] + + def __contains__(self, key): + return key.lower() in self._container + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return (dict((k.lower(), v) for k, v in self.itermerged()) == + dict((k.lower(), v) for k, v in other.itermerged())) + + def __ne__(self, other): + return not self.__eq__(other) + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + key_lower = key.lower() + new_vals = [key, val] + # Keep the common case aka no item present as fast as possible + vals = self._container.setdefault(key_lower, new_vals) + if new_vals is not vals: + vals.append(val) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({0} given)".format(len(args))) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + try: + vals = self._container[key.lower()] + except KeyError: + return [] + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist + + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + + def _copy_from(self, other): + for key in other: + val = other.getlist(key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + self._container[key.lower()] = [key] + val + + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = self._container[key.lower()] + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = self._container[key.lower()] + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + headers = [] + + for line in message.headers: + if line.startswith((' ', '\t')): + key, value = headers[-1] + headers[-1] = (key, value + '\r\n' + line.rstrip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/collectors/python.d.plugin/python_modules/urllib3/connection.py b/collectors/python.d.plugin/python_modules/urllib3/connection.py new file mode 100644 index 0000000..f757493 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/connection.py @@ -0,0 +1,374 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import datetime +import logging +import os +import sys +import socket +from socket import error as SocketError, timeout as SocketTimeout +import warnings +from .packages import six +from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection +from .packages.six.moves.http_client import HTTPException # noqa: F401 + +try: # Compiled with SSL? + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None + + class BaseSSLError(BaseException): + pass + + +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + +from .exceptions import ( + NewConnectionError, + ConnectTimeoutError, + SubjectAltNameWarning, + SystemTimeWarning, +) +from .packages.ssl_match_hostname import match_hostname, CertificateError + +from .util.ssl_ import ( + resolve_cert_reqs, + resolve_ssl_version, + assert_fingerprint, + create_urllib3_context, + ssl_wrap_socket +) + + +from .util import connection + +from ._collections import HTTPHeaderDict + +log = logging.getLogger(__name__) + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + +# When updating RECENT_DATE, move it to +# within two years of the current date, and no +# earlier than 6 months ago. +RECENT_DATE = datetime.date(2016, 1, 1) + + +class DummyConnection(object): + """Used to detect a failed ConnectionCls import.""" + pass + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + """ + + default_port = port_by_scheme['http'] + + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + #: Whether this connection verifies the host's certificate. + is_verified = False + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it. + + :return: New socket connection. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) + + except SocketTimeout as e: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + except SocketError as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e) + + return conn + + def _prepare_conn(self, conn): + self.sock = conn + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + def request_chunked(self, method, url, body=None, headers=None): + """ + Alternative to the common request method, which sends the + body with chunked encoding and not as one block + """ + headers = HTTPHeaderDict(headers if headers is not None else {}) + skip_accept_encoding = 'accept-encoding' in headers + skip_host = 'host' in headers + self.putrequest( + method, + url, + skip_accept_encoding=skip_accept_encoding, + skip_host=skip_host + ) + for header, value in headers.items(): + self.putheader(header, value) + if 'transfer-encoding' not in headers: + self.putheader('Transfer-Encoding', 'chunked') + self.endheaders() + + if body is not None: + stringish_types = six.string_types + (six.binary_type,) + if isinstance(body, stringish_types): + body = (body,) + for chunk in body: + if not chunk: + continue + if not isinstance(chunk, six.binary_type): + chunk = chunk.encode('utf8') + len_str = hex(len(chunk))[2:] + self.send(len_str.encode('utf-8')) + self.send(b'\r\n') + self.send(chunk) + self.send(b'\r\n') + + # After the if clause, to always have a closed body + self.send(b'0\r\n\r\n') + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + ssl_version = None + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + ssl_context=None, **kw): + + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) + + self.key_file = key_file + self.cert_file = cert_file + self.ssl_context = ssl_context + + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + if self.ssl_context is None: + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(None), + cert_reqs=resolve_cert_reqs(None), + ) + + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + ssl_context=self.ssl_context, + ) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ca_cert_dir = None + ssl_version = None + assert_fingerprint = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None, + ca_cert_dir=None): + """ + This method should only be called once, before the connection is used. + """ + # If cert_reqs is not provided, we can try to guess. If the user gave + # us a cert database, we assume they want to use it: otherwise, if + # they gave us an SSL Context object we should use whatever is set for + # it. + if cert_reqs is None: + if ca_certs or ca_cert_dir: + cert_reqs = 'CERT_REQUIRED' + elif self.ssl_context is not None: + cert_reqs = self.ssl_context.verify_mode + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + + def connect(self): + # Add certificate verification + conn = self._new_conn() + + hostname = self.host + if getattr(self, '_tunnel_host', None): + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn(( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors').format(RECENT_DATE), + SystemTimeWarning + ) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + if self.ssl_context is None: + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(self.ssl_version), + cert_reqs=resolve_cert_reqs(self.cert_reqs), + ) + + context = self.ssl_context + context.verify_mode = resolve_cert_reqs(self.cert_reqs) + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + server_hostname=hostname, + ssl_context=context) + + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif context.verify_mode != ssl.CERT_NONE \ + and not getattr(context, 'check_hostname', False) \ + and self.assert_hostname is not False: + # While urllib3 attempts to always turn off hostname matching from + # the TLS library, this cannot always be done. So we check whether + # the TLS Library still thinks it's matching hostnames. + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' + '`commonName` for now. This feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' + 'for details.)'.format(hostname)), + SubjectAltNameWarning + ) + _match_hostname(cert, self.assert_hostname or hostname) + + self.is_verified = ( + context.verify_mode == ssl.CERT_REQUIRED or + self.assert_fingerprint is not None + ) + + +def _match_hostname(cert, asserted_hostname): + try: + match_hostname(cert, asserted_hostname) + except CertificateError as e: + log.error( + 'Certificate did not match expected hostname: %s. ' + 'Certificate: %s', asserted_hostname, cert + ) + # Add cert to exception and reraise so client code can inspect + # the cert when catching the exception, if they want to + e._peer_cert = cert + raise + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/collectors/python.d.plugin/python_modules/urllib3/connectionpool.py b/collectors/python.d.plugin/python_modules/urllib3/connectionpool.py new file mode 100644 index 0000000..90e4c86 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/connectionpool.py @@ -0,0 +1,900 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import errno +import logging +import sys +import warnings + +from socket import error as SocketError, timeout as SocketTimeout +import socket + + +from .exceptions import ( + ClosedPoolError, + ProtocolError, + EmptyPoolError, + HeaderParsingError, + HostChangedError, + LocationValueError, + MaxRetryError, + ProxyError, + ReadTimeoutError, + SSLError, + TimeoutError, + InsecureRequestWarning, + NewConnectionError, +) +from .packages.ssl_match_hostname import CertificateError +from .packages import six +from .packages.six.moves import queue +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) +from .request import RequestMethods +from .response import HTTPResponse + +from .util.connection import is_connection_dropped +from .util.request import set_file_position +from .util.response import assert_header_parsing +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host, Url + + +if six.PY2: + # Queue is imported for side effects on MS Windows + import Queue as _unused_module_Queue # noqa: F401 + +xrange = six.moves.xrange + +log = logging.getLogger(__name__) + +_Default = object() + + +# Pool objects +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + + scheme = None + QueueCls = queue.LifoQueue + + def __init__(self, host, port=None): + if not host: + raise LocationValueError("No host specified.") + + self.host = _ipv6_host(host).lower() + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % (type(self).__name__, + self.host, self.port) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + pass + + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + + :param timeout: + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to False, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param retries: + Retry configuration to use by default with requests in this pool. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \\**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. + """ + + scheme = 'http' + ConnectionCls = HTTPConnection + ResponseCls = HTTPResponse + + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + + self.strict = strict + + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + + if retries is None: + retries = Retry.DEFAULT + + self.timeout = timeout + self.retries = retries + + self.pool = self.QueueCls(maxsize) + self.block = block + + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + self.conn_kw = conn_kw + + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + + def _new_conn(self): + """ + Return a fresh :class:`HTTPConnection`. + """ + self.num_connections += 1 + log.debug("Starting new HTTP connection (%d): %s", + self.num_connections, self.host) + + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict, **self.conn_kw) + return conn + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") + + except queue.Empty: + if self.block: + raise EmptyPoolError(self, + "Pool reached maximum size and no more " + "connections are allowed.") + pass # Oh well, we'll create a new connection then + + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.debug("Resetting dropped connection: %s", self.host) + conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. + """ + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # This should never happen if self.block == True + log.warning( + "Connection pool is full, discarding connection: %s", + self.host) + + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + def _make_request(self, conn, method, url, timeout=_Default, chunked=False, + **httplib_request_kw): + """ + Perform a request on a given urllib connection object taken from our + pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. + """ + self.num_requests += 1 + + timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + + # Trigger any extra validation we need to do. + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + if chunked: + conn.request_chunked(method, url, **httplib_request_kw) + else: + conn.request(method, url, **httplib_request_kw) + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + + # App Engine doesn't have a sock attr + if getattr(conn, 'sock', None): + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: # Python 2.7, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older, Python 3 + try: + httplib_response = conn.getresponse() + except Exception as e: + # Remove the TypeError from the exception chain in Python 3; + # otherwise it looks like a programming error was the cause. + six.raise_from(e, None) + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + raise + + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host, self.port, + method, url, http_version, httplib_response.status, + httplib_response.length) + + try: + assert_header_parsing(httplib_response.msg) + except HeaderParsingError as hpe: # Platform-specific: Python 3 + log.warning( + 'Failed to parse headers (url=%s): %s', + self._absolute_url(url), hpe, exc_info=True) + + return httplib_response + + def _absolute_url(self, path): + return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + + except queue.Empty: + pass # Done. + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + if url.startswith('/'): + return True + + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + + host = _ipv6_host(host).lower() + + # Use explicit default port for comparison when none is given + if self.port and not port: + port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None + + return (scheme, host, port) == (self.scheme, self.host, self.port) + + def urlopen(self, method, url, body=None, headers=None, retries=None, + redirect=True, assert_same_host=True, timeout=_Default, + pool_timeout=None, release_conn=None, chunked=False, + body_pos=None, **response_kw): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param redirect: + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param chunked: + If True, urllib3 will send the body using chunked transfer + encoding. Otherwise, urllib3 will send the body using the standard + content-length form. Defaults to False. + + :param int body_pos: + Position to seek to in file-like body in the event of a retry or + redirect. Typically this won't need to be set because urllib3 will + auto-populate the value when needed. + + :param \\**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) + + if release_conn is None: + release_conn = response_kw.get('preload_content', True) + + # Check host + if assert_same_host and not self.is_same_host(url): + raise HostChangedError(self, url, retries) + + conn = None + + # Track whether `conn` needs to be released before + # returning/raising/recursing. Update this variable if necessary, and + # leave `release_conn` constant throughout the function. That way, if + # the function recurses, the original value of `release_conn` will be + # passed down into the recursive call, and its value will be respected. + # + # See issue #651 [1] for details. + # + # [1] + release_this_conn = release_conn + + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + + # Keep track of whether we cleanly exited the except block. This + # ensures we do proper cleanup in finally. + clean_exit = False + + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body_pos = set_file_position(body, body_pos) + + try: + # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) + conn = self._get_conn(timeout=pool_timeout) + + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + + # Make the request on the httplib connection object. + httplib_response = self._make_request(conn, method, url, + timeout=timeout_obj, + body=body, headers=headers, + chunked=chunked) + + # If we're going to release the connection in ``finally:``, then + # the response doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = conn if not release_conn else None + + # Pass method to Response for length checking + response_kw['request_method'] = method + + # Import httplib's response into our own wrapper object + response = self.ResponseCls.from_httplib(httplib_response, + pool=self, + connection=response_conn, + retries=retries, + **response_kw) + + # Everything went great! + clean_exit = True + + except queue.Empty: + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") + + except (BaseSSLError, CertificateError) as e: + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + clean_exit = False + raise SSLError(e) + + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + clean_exit = False + raise + + except (TimeoutError, HTTPException, SocketError, ProtocolError) as e: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + clean_exit = False + + if isinstance(e, (SocketError, NewConnectionError)) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) + + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) + retries.sleep() + + # Keep track of the error for the retry warning. + err = e + + finally: + if not clean_exit: + # We hit some kind of exception, handled or otherwise. We need + # to throw the connection away unless explicitly told not to. + # Close the connection, set the variable to None, and make sure + # we put the None back in the pool to avoid leaking it. + conn = conn and conn.close() + release_this_conn = True + + if release_this_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. + self._put_conn(conn) + + if not conn: + # Try again + log.warning("Retrying (%r) after connection " + "broken by '%r': %s", retries, err, url) + return self.urlopen(method, url, body, headers, retries, + redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, body_pos=body_pos, + **response_kw) + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + if response.status == 303: + method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + + retries.sleep_for_retry(response) + log.debug("Redirecting %s -> %s", url, redirect_location) + return self.urlopen( + method, redirect_location, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, body_pos=body_pos, + **response_kw) + + # Check if we should retry the HTTP response. + has_retry_after = bool(response.getheader('Retry-After')) + if retries.is_retry(method, response.status, has_retry_after): + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_status: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + retries.sleep(response) + log.debug("Retry: %s", url) + return self.urlopen( + method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, + body_pos=body_pos, **response_kw) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:`.HTTPSConnection`. + + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is + available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. + """ + + scheme = 'https' + ConnectionCls = HTTPSConnection + + def __init__(self, host, port=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, + _proxy=None, _proxy_headers=None, + key_file=None, cert_file=None, cert_reqs=None, + ca_certs=None, ssl_version=None, + assert_hostname=None, assert_fingerprint=None, + ca_cert_dir=None, **conn_kw): + + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, + block, headers, retries, _proxy, _proxy_headers, + **conn_kw) + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.ca_cert_dir = ca_cert_dir + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _prepare_conn(self, conn): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version + return conn + + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + + conn.connect() + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPSConnection`. + """ + self.num_connections += 1 + log.debug("Starting new HTTPS connection (%d): %s", + self.num_connections, self.host) + + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + strict=self.strict, **self.conn_kw) + + return self._prepare_conn(conn) + + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings'), + InsecureRequestWarning) + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \\**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example:: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + port = port or port_by_scheme.get(scheme, 80) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def _ipv6_host(host): + """ + Process IPv6 address literals + """ + + # httplib doesn't like it when we include brackets in IPv6 addresses + # Specifically, if we include brackets but also pass the port then + # httplib crazily doubles up the square brackets on the Host header. + # Instead, we need to make sure we never pass ``None`` as the port. + # However, for backward compatibility reasons we can't actually + # *assert* that. See http://bugs.python.org/issue28539 + # + # Also if an IPv6 address literal has a zone identifier, the + # percent sign might be URIencoded, convert it back into ASCII + if host.startswith('[') and host.endswith(']'): + host = host.replace('%25', '%').strip('[]') + return host diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py new file mode 100644 index 0000000..bb82667 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py @@ -0,0 +1,591 @@ +# SPDX-License-Identifier: MIT +""" +This module uses ctypes to bind a whole bunch of functions and constants from +SecureTransport. The goal here is to provide the low-level API to +SecureTransport. These are essentially the C-level functions and constants, and +they're pretty gross to work with. + +This code is a bastardised version of the code found in Will Bond's oscrypto +library. An enormous debt is owed to him for blazing this trail for us. For +that reason, this code should be considered to be covered both by urllib3's +license and by oscrypto's: + + Copyright (c) 2015-2016 Will Bond + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +""" +from __future__ import absolute_import + +import platform +from ctypes.util import find_library +from ctypes import ( + c_void_p, c_int32, c_char_p, c_size_t, c_byte, c_uint32, c_ulong, c_long, + c_bool +) +from ctypes import CDLL, POINTER, CFUNCTYPE + + +security_path = find_library('Security') +if not security_path: + raise ImportError('The library Security could not be found') + + +core_foundation_path = find_library('CoreFoundation') +if not core_foundation_path: + raise ImportError('The library CoreFoundation could not be found') + + +version = platform.mac_ver()[0] +version_info = tuple(map(int, version.split('.'))) +if version_info < (10, 8): + raise OSError( + 'Only OS X 10.8 and newer are supported, not %s.%s' % ( + version_info[0], version_info[1] + ) + ) + +Security = CDLL(security_path, use_errno=True) +CoreFoundation = CDLL(core_foundation_path, use_errno=True) + +Boolean = c_bool +CFIndex = c_long +CFStringEncoding = c_uint32 +CFData = c_void_p +CFString = c_void_p +CFArray = c_void_p +CFMutableArray = c_void_p +CFDictionary = c_void_p +CFError = c_void_p +CFType = c_void_p +CFTypeID = c_ulong + +CFTypeRef = POINTER(CFType) +CFAllocatorRef = c_void_p + +OSStatus = c_int32 + +CFDataRef = POINTER(CFData) +CFStringRef = POINTER(CFString) +CFArrayRef = POINTER(CFArray) +CFMutableArrayRef = POINTER(CFMutableArray) +CFDictionaryRef = POINTER(CFDictionary) +CFArrayCallBacks = c_void_p +CFDictionaryKeyCallBacks = c_void_p +CFDictionaryValueCallBacks = c_void_p + +SecCertificateRef = POINTER(c_void_p) +SecExternalFormat = c_uint32 +SecExternalItemType = c_uint32 +SecIdentityRef = POINTER(c_void_p) +SecItemImportExportFlags = c_uint32 +SecItemImportExportKeyParameters = c_void_p +SecKeychainRef = POINTER(c_void_p) +SSLProtocol = c_uint32 +SSLCipherSuite = c_uint32 +SSLContextRef = POINTER(c_void_p) +SecTrustRef = POINTER(c_void_p) +SSLConnectionRef = c_uint32 +SecTrustResultType = c_uint32 +SecTrustOptionFlags = c_uint32 +SSLProtocolSide = c_uint32 +SSLConnectionType = c_uint32 +SSLSessionOption = c_uint32 + + +try: + Security.SecItemImport.argtypes = [ + CFDataRef, + CFStringRef, + POINTER(SecExternalFormat), + POINTER(SecExternalItemType), + SecItemImportExportFlags, + POINTER(SecItemImportExportKeyParameters), + SecKeychainRef, + POINTER(CFArrayRef), + ] + Security.SecItemImport.restype = OSStatus + + Security.SecCertificateGetTypeID.argtypes = [] + Security.SecCertificateGetTypeID.restype = CFTypeID + + Security.SecIdentityGetTypeID.argtypes = [] + Security.SecIdentityGetTypeID.restype = CFTypeID + + Security.SecKeyGetTypeID.argtypes = [] + Security.SecKeyGetTypeID.restype = CFTypeID + + Security.SecCertificateCreateWithData.argtypes = [ + CFAllocatorRef, + CFDataRef + ] + Security.SecCertificateCreateWithData.restype = SecCertificateRef + + Security.SecCertificateCopyData.argtypes = [ + SecCertificateRef + ] + Security.SecCertificateCopyData.restype = CFDataRef + + Security.SecCopyErrorMessageString.argtypes = [ + OSStatus, + c_void_p + ] + Security.SecCopyErrorMessageString.restype = CFStringRef + + Security.SecIdentityCreateWithCertificate.argtypes = [ + CFTypeRef, + SecCertificateRef, + POINTER(SecIdentityRef) + ] + Security.SecIdentityCreateWithCertificate.restype = OSStatus + + Security.SecKeychainCreate.argtypes = [ + c_char_p, + c_uint32, + c_void_p, + Boolean, + c_void_p, + POINTER(SecKeychainRef) + ] + Security.SecKeychainCreate.restype = OSStatus + + Security.SecKeychainDelete.argtypes = [ + SecKeychainRef + ] + Security.SecKeychainDelete.restype = OSStatus + + Security.SecPKCS12Import.argtypes = [ + CFDataRef, + CFDictionaryRef, + POINTER(CFArrayRef) + ] + Security.SecPKCS12Import.restype = OSStatus + + SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t)) + SSLWriteFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t)) + + Security.SSLSetIOFuncs.argtypes = [ + SSLContextRef, + SSLReadFunc, + SSLWriteFunc + ] + Security.SSLSetIOFuncs.restype = OSStatus + + Security.SSLSetPeerID.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t + ] + Security.SSLSetPeerID.restype = OSStatus + + Security.SSLSetCertificate.argtypes = [ + SSLContextRef, + CFArrayRef + ] + Security.SSLSetCertificate.restype = OSStatus + + Security.SSLSetCertificateAuthorities.argtypes = [ + SSLContextRef, + CFTypeRef, + Boolean + ] + Security.SSLSetCertificateAuthorities.restype = OSStatus + + Security.SSLSetConnection.argtypes = [ + SSLContextRef, + SSLConnectionRef + ] + Security.SSLSetConnection.restype = OSStatus + + Security.SSLSetPeerDomainName.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t + ] + Security.SSLSetPeerDomainName.restype = OSStatus + + Security.SSLHandshake.argtypes = [ + SSLContextRef + ] + Security.SSLHandshake.restype = OSStatus + + Security.SSLRead.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t, + POINTER(c_size_t) + ] + Security.SSLRead.restype = OSStatus + + Security.SSLWrite.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t, + POINTER(c_size_t) + ] + Security.SSLWrite.restype = OSStatus + + Security.SSLClose.argtypes = [ + SSLContextRef + ] + Security.SSLClose.restype = OSStatus + + Security.SSLGetNumberSupportedCiphers.argtypes = [ + SSLContextRef, + POINTER(c_size_t) + ] + Security.SSLGetNumberSupportedCiphers.restype = OSStatus + + Security.SSLGetSupportedCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + POINTER(c_size_t) + ] + Security.SSLGetSupportedCiphers.restype = OSStatus + + Security.SSLSetEnabledCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + c_size_t + ] + Security.SSLSetEnabledCiphers.restype = OSStatus + + Security.SSLGetNumberEnabledCiphers.argtype = [ + SSLContextRef, + POINTER(c_size_t) + ] + Security.SSLGetNumberEnabledCiphers.restype = OSStatus + + Security.SSLGetEnabledCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + POINTER(c_size_t) + ] + Security.SSLGetEnabledCiphers.restype = OSStatus + + Security.SSLGetNegotiatedCipher.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite) + ] + Security.SSLGetNegotiatedCipher.restype = OSStatus + + Security.SSLGetNegotiatedProtocolVersion.argtypes = [ + SSLContextRef, + POINTER(SSLProtocol) + ] + Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus + + Security.SSLCopyPeerTrust.argtypes = [ + SSLContextRef, + POINTER(SecTrustRef) + ] + Security.SSLCopyPeerTrust.restype = OSStatus + + Security.SecTrustSetAnchorCertificates.argtypes = [ + SecTrustRef, + CFArrayRef + ] + Security.SecTrustSetAnchorCertificates.restype = OSStatus + + Security.SecTrustSetAnchorCertificatesOnly.argstypes = [ + SecTrustRef, + Boolean + ] + Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus + + Security.SecTrustEvaluate.argtypes = [ + SecTrustRef, + POINTER(SecTrustResultType) + ] + Security.SecTrustEvaluate.restype = OSStatus + + Security.SecTrustGetCertificateCount.argtypes = [ + SecTrustRef + ] + Security.SecTrustGetCertificateCount.restype = CFIndex + + Security.SecTrustGetCertificateAtIndex.argtypes = [ + SecTrustRef, + CFIndex + ] + Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef + + Security.SSLCreateContext.argtypes = [ + CFAllocatorRef, + SSLProtocolSide, + SSLConnectionType + ] + Security.SSLCreateContext.restype = SSLContextRef + + Security.SSLSetSessionOption.argtypes = [ + SSLContextRef, + SSLSessionOption, + Boolean + ] + Security.SSLSetSessionOption.restype = OSStatus + + Security.SSLSetProtocolVersionMin.argtypes = [ + SSLContextRef, + SSLProtocol + ] + Security.SSLSetProtocolVersionMin.restype = OSStatus + + Security.SSLSetProtocolVersionMax.argtypes = [ + SSLContextRef, + SSLProtocol + ] + Security.SSLSetProtocolVersionMax.restype = OSStatus + + Security.SecCopyErrorMessageString.argtypes = [ + OSStatus, + c_void_p + ] + Security.SecCopyErrorMessageString.restype = CFStringRef + + Security.SSLReadFunc = SSLReadFunc + Security.SSLWriteFunc = SSLWriteFunc + Security.SSLContextRef = SSLContextRef + Security.SSLProtocol = SSLProtocol + Security.SSLCipherSuite = SSLCipherSuite + Security.SecIdentityRef = SecIdentityRef + Security.SecKeychainRef = SecKeychainRef + Security.SecTrustRef = SecTrustRef + Security.SecTrustResultType = SecTrustResultType + Security.SecExternalFormat = SecExternalFormat + Security.OSStatus = OSStatus + + Security.kSecImportExportPassphrase = CFStringRef.in_dll( + Security, 'kSecImportExportPassphrase' + ) + Security.kSecImportItemIdentity = CFStringRef.in_dll( + Security, 'kSecImportItemIdentity' + ) + + # CoreFoundation time! + CoreFoundation.CFRetain.argtypes = [ + CFTypeRef + ] + CoreFoundation.CFRetain.restype = CFTypeRef + + CoreFoundation.CFRelease.argtypes = [ + CFTypeRef + ] + CoreFoundation.CFRelease.restype = None + + CoreFoundation.CFGetTypeID.argtypes = [ + CFTypeRef + ] + CoreFoundation.CFGetTypeID.restype = CFTypeID + + CoreFoundation.CFStringCreateWithCString.argtypes = [ + CFAllocatorRef, + c_char_p, + CFStringEncoding + ] + CoreFoundation.CFStringCreateWithCString.restype = CFStringRef + + CoreFoundation.CFStringGetCStringPtr.argtypes = [ + CFStringRef, + CFStringEncoding + ] + CoreFoundation.CFStringGetCStringPtr.restype = c_char_p + + CoreFoundation.CFStringGetCString.argtypes = [ + CFStringRef, + c_char_p, + CFIndex, + CFStringEncoding + ] + CoreFoundation.CFStringGetCString.restype = c_bool + + CoreFoundation.CFDataCreate.argtypes = [ + CFAllocatorRef, + c_char_p, + CFIndex + ] + CoreFoundation.CFDataCreate.restype = CFDataRef + + CoreFoundation.CFDataGetLength.argtypes = [ + CFDataRef + ] + CoreFoundation.CFDataGetLength.restype = CFIndex + + CoreFoundation.CFDataGetBytePtr.argtypes = [ + CFDataRef + ] + CoreFoundation.CFDataGetBytePtr.restype = c_void_p + + CoreFoundation.CFDictionaryCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + POINTER(CFTypeRef), + CFIndex, + CFDictionaryKeyCallBacks, + CFDictionaryValueCallBacks + ] + CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef + + CoreFoundation.CFDictionaryGetValue.argtypes = [ + CFDictionaryRef, + CFTypeRef + ] + CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef + + CoreFoundation.CFArrayCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + CFIndex, + CFArrayCallBacks, + ] + CoreFoundation.CFArrayCreate.restype = CFArrayRef + + CoreFoundation.CFArrayCreateMutable.argtypes = [ + CFAllocatorRef, + CFIndex, + CFArrayCallBacks + ] + CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef + + CoreFoundation.CFArrayAppendValue.argtypes = [ + CFMutableArrayRef, + c_void_p + ] + CoreFoundation.CFArrayAppendValue.restype = None + + CoreFoundation.CFArrayGetCount.argtypes = [ + CFArrayRef + ] + CoreFoundation.CFArrayGetCount.restype = CFIndex + + CoreFoundation.CFArrayGetValueAtIndex.argtypes = [ + CFArrayRef, + CFIndex + ] + CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p + + CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll( + CoreFoundation, 'kCFAllocatorDefault' + ) + CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(CoreFoundation, 'kCFTypeArrayCallBacks') + CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeDictionaryKeyCallBacks' + ) + CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeDictionaryValueCallBacks' + ) + + CoreFoundation.CFTypeRef = CFTypeRef + CoreFoundation.CFArrayRef = CFArrayRef + CoreFoundation.CFStringRef = CFStringRef + CoreFoundation.CFDictionaryRef = CFDictionaryRef + +except (AttributeError): + raise ImportError('Error initializing ctypes') + + +class CFConst(object): + """ + A class object that acts as essentially a namespace for CoreFoundation + constants. + """ + kCFStringEncodingUTF8 = CFStringEncoding(0x08000100) + + +class SecurityConst(object): + """ + A class object that acts as essentially a namespace for Security constants. + """ + kSSLSessionOptionBreakOnServerAuth = 0 + + kSSLProtocol2 = 1 + kSSLProtocol3 = 2 + kTLSProtocol1 = 4 + kTLSProtocol11 = 7 + kTLSProtocol12 = 8 + + kSSLClientSide = 1 + kSSLStreamType = 0 + + kSecFormatPEMSequence = 10 + + kSecTrustResultInvalid = 0 + kSecTrustResultProceed = 1 + # This gap is present on purpose: this was kSecTrustResultConfirm, which + # is deprecated. + kSecTrustResultDeny = 3 + kSecTrustResultUnspecified = 4 + kSecTrustResultRecoverableTrustFailure = 5 + kSecTrustResultFatalTrustFailure = 6 + kSecTrustResultOtherError = 7 + + errSSLProtocol = -9800 + errSSLWouldBlock = -9803 + errSSLClosedGraceful = -9805 + errSSLClosedNoNotify = -9816 + errSSLClosedAbort = -9806 + + errSSLXCertChainInvalid = -9807 + errSSLCrypto = -9809 + errSSLInternal = -9810 + errSSLCertExpired = -9814 + errSSLCertNotYetValid = -9815 + errSSLUnknownRootCert = -9812 + errSSLNoRootCert = -9813 + errSSLHostNameMismatch = -9843 + errSSLPeerHandshakeFail = -9824 + errSSLPeerUserCancelled = -9839 + errSSLWeakPeerEphemeralDHKey = -9850 + errSSLServerAuthCompleted = -9841 + errSSLRecordOverflow = -9847 + + errSecVerifyFailed = -67808 + errSecNoTrustSettings = -25263 + errSecItemNotFound = -25300 + errSecInvalidTrustSettings = -25262 + + # Cipher suites. We only pick the ones our default cipher string allows. + TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C + TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030 + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B + TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F + TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 = 0x00A3 + TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F + TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 = 0x00A2 + TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024 + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028 + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014 + TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B + TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 = 0x006A + TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039 + TLS_DHE_DSS_WITH_AES_256_CBC_SHA = 0x0038 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067 + TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 = 0x0040 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033 + TLS_DHE_DSS_WITH_AES_128_CBC_SHA = 0x0032 + TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D + TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C + TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D + TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C + TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035 + TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py new file mode 100644 index 0000000..0f79a13 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py @@ -0,0 +1,344 @@ +# SPDX-License-Identifier: MIT +""" +Low-level helpers for the SecureTransport bindings. + +These are Python functions that are not directly related to the high-level APIs +but are necessary to get them to work. They include a whole bunch of low-level +CoreFoundation messing about and memory management. The concerns in this module +are almost entirely about trying to avoid memory leaks and providing +appropriate and useful assistance to the higher-level code. +""" +import base64 +import ctypes +import itertools +import re +import os +import ssl +import tempfile + +from .bindings import Security, CoreFoundation, CFConst + + +# This regular expression is used to grab PEM data out of a PEM bundle. +_PEM_CERTS_RE = re.compile( + b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL +) + + +def _cf_data_from_bytes(bytestring): + """ + Given a bytestring, create a CFData object from it. This CFData object must + be CFReleased by the caller. + """ + return CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring) + ) + + +def _cf_dictionary_from_tuples(tuples): + """ + Given a list of Python tuples, create an associated CFDictionary. + """ + dictionary_size = len(tuples) + + # We need to get the dictionary keys and values out in the same order. + keys = (t[0] for t in tuples) + values = (t[1] for t in tuples) + cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys) + cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values) + + return CoreFoundation.CFDictionaryCreate( + CoreFoundation.kCFAllocatorDefault, + cf_keys, + cf_values, + dictionary_size, + CoreFoundation.kCFTypeDictionaryKeyCallBacks, + CoreFoundation.kCFTypeDictionaryValueCallBacks, + ) + + +def _cf_string_to_unicode(value): + """ + Creates a Unicode string from a CFString object. Used entirely for error + reporting. + + Yes, it annoys me quite a lot that this function is this complex. + """ + value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p)) + + string = CoreFoundation.CFStringGetCStringPtr( + value_as_void_p, + CFConst.kCFStringEncodingUTF8 + ) + if string is None: + buffer = ctypes.create_string_buffer(1024) + result = CoreFoundation.CFStringGetCString( + value_as_void_p, + buffer, + 1024, + CFConst.kCFStringEncodingUTF8 + ) + if not result: + raise OSError('Error copying C string from CFStringRef') + string = buffer.value + if string is not None: + string = string.decode('utf-8') + return string + + +def _assert_no_error(error, exception_class=None): + """ + Checks the return code and throws an exception if there is an error to + report + """ + if error == 0: + return + + cf_error_string = Security.SecCopyErrorMessageString(error, None) + output = _cf_string_to_unicode(cf_error_string) + CoreFoundation.CFRelease(cf_error_string) + + if output is None or output == u'': + output = u'OSStatus %s' % error + + if exception_class is None: + exception_class = ssl.SSLError + + raise exception_class(output) + + +def _cert_array_from_pem(pem_bundle): + """ + Given a bundle of certs in PEM format, turns them into a CFArray of certs + that can be used to validate a cert chain. + """ + der_certs = [ + base64.b64decode(match.group(1)) + for match in _PEM_CERTS_RE.finditer(pem_bundle) + ] + if not der_certs: + raise ssl.SSLError("No root certificates specified") + + cert_array = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks) + ) + if not cert_array: + raise ssl.SSLError("Unable to allocate memory!") + + try: + for der_bytes in der_certs: + certdata = _cf_data_from_bytes(der_bytes) + if not certdata: + raise ssl.SSLError("Unable to allocate memory!") + cert = Security.SecCertificateCreateWithData( + CoreFoundation.kCFAllocatorDefault, certdata + ) + CoreFoundation.CFRelease(certdata) + if not cert: + raise ssl.SSLError("Unable to build cert object!") + + CoreFoundation.CFArrayAppendValue(cert_array, cert) + CoreFoundation.CFRelease(cert) + except Exception: + # We need to free the array before the exception bubbles further. + # We only want to do that if an error occurs: otherwise, the caller + # should free. + CoreFoundation.CFRelease(cert_array) + + return cert_array + + +def _is_cert(item): + """ + Returns True if a given CFTypeRef is a certificate. + """ + expected = Security.SecCertificateGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _is_identity(item): + """ + Returns True if a given CFTypeRef is an identity. + """ + expected = Security.SecIdentityGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _temporary_keychain(): + """ + This function creates a temporary Mac keychain that we can use to work with + credentials. This keychain uses a one-time password and a temporary file to + store the data. We expect to have one keychain per socket. The returned + SecKeychainRef must be freed by the caller, including calling + SecKeychainDelete. + + Returns a tuple of the SecKeychainRef and the path to the temporary + directory that contains it. + """ + # Unfortunately, SecKeychainCreate requires a path to a keychain. This + # means we cannot use mkstemp to use a generic temporary file. Instead, + # we're going to create a temporary directory and a filename to use there. + # This filename will be 8 random bytes expanded into base64. We also need + # some random bytes to password-protect the keychain we're creating, so we + # ask for 40 random bytes. + random_bytes = os.urandom(40) + filename = base64.b64encode(random_bytes[:8]).decode('utf-8') + password = base64.b64encode(random_bytes[8:]) # Must be valid UTF-8 + tempdirectory = tempfile.mkdtemp() + + keychain_path = os.path.join(tempdirectory, filename).encode('utf-8') + + # We now want to create the keychain itself. + keychain = Security.SecKeychainRef() + status = Security.SecKeychainCreate( + keychain_path, + len(password), + password, + False, + None, + ctypes.byref(keychain) + ) + _assert_no_error(status) + + # Having created the keychain, we want to pass it off to the caller. + return keychain, tempdirectory + + +def _load_items_from_file(keychain, path): + """ + Given a single file, loads all the trust objects from it into arrays and + the keychain. + Returns a tuple of lists: the first list is a list of identities, the + second a list of certs. + """ + certificates = [] + identities = [] + result_array = None + + with open(path, 'rb') as f: + raw_filedata = f.read() + + try: + filedata = CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, + raw_filedata, + len(raw_filedata) + ) + result_array = CoreFoundation.CFArrayRef() + result = Security.SecItemImport( + filedata, # cert data + None, # Filename, leaving it out for now + None, # What the type of the file is, we don't care + None, # what's in the file, we don't care + 0, # import flags + None, # key params, can include passphrase in the future + keychain, # The keychain to insert into + ctypes.byref(result_array) # Results + ) + _assert_no_error(result) + + # A CFArray is not very useful to us as an intermediary + # representation, so we are going to extract the objects we want + # and then free the array. We don't need to keep hold of keys: the + # keychain already has them! + result_count = CoreFoundation.CFArrayGetCount(result_array) + for index in range(result_count): + item = CoreFoundation.CFArrayGetValueAtIndex( + result_array, index + ) + item = ctypes.cast(item, CoreFoundation.CFTypeRef) + + if _is_cert(item): + CoreFoundation.CFRetain(item) + certificates.append(item) + elif _is_identity(item): + CoreFoundation.CFRetain(item) + identities.append(item) + finally: + if result_array: + CoreFoundation.CFRelease(result_array) + + CoreFoundation.CFRelease(filedata) + + return (identities, certificates) + + +def _load_client_cert_chain(keychain, *paths): + """ + Load certificates and maybe keys from a number of files. Has the end goal + of returning a CFArray containing one SecIdentityRef, and then zero or more + SecCertificateRef objects, suitable for use as a client certificate trust + chain. + """ + # Ok, the strategy. + # + # This relies on knowing that macOS will not give you a SecIdentityRef + # unless you have imported a key into a keychain. This is a somewhat + # artificial limitation of macOS (for example, it doesn't necessarily + # affect iOS), but there is nothing inside Security.framework that lets you + # get a SecIdentityRef without having a key in a keychain. + # + # So the policy here is we take all the files and iterate them in order. + # Each one will use SecItemImport to have one or more objects loaded from + # it. We will also point at a keychain that macOS can use to work with the + # private key. + # + # Once we have all the objects, we'll check what we actually have. If we + # already have a SecIdentityRef in hand, fab: we'll use that. Otherwise, + # we'll take the first certificate (which we assume to be our leaf) and + # ask the keychain to give us a SecIdentityRef with that cert's associated + # key. + # + # We'll then return a CFArray containing the trust chain: one + # SecIdentityRef and then zero-or-more SecCertificateRef objects. The + # responsibility for freeing this CFArray will be with the caller. This + # CFArray must remain alive for the entire connection, so in practice it + # will be stored with a single SSLSocket, along with the reference to the + # keychain. + certificates = [] + identities = [] + + # Filter out bad paths. + paths = (path for path in paths if path) + + try: + for file_path in paths: + new_identities, new_certs = _load_items_from_file( + keychain, file_path + ) + identities.extend(new_identities) + certificates.extend(new_certs) + + # Ok, we have everything. The question is: do we have an identity? If + # not, we want to grab one from the first cert we have. + if not identities: + new_identity = Security.SecIdentityRef() + status = Security.SecIdentityCreateWithCertificate( + keychain, + certificates[0], + ctypes.byref(new_identity) + ) + _assert_no_error(status) + identities.append(new_identity) + + # We now want to release the original certificate, as we no longer + # need it. + CoreFoundation.CFRelease(certificates.pop(0)) + + # We now need to build a new CFArray that holds the trust chain. + trust_chain = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + for item in itertools.chain(identities, certificates): + # ArrayAppendValue does a CFRetain on the item. That's fine, + # because the finally block will release our other refs to them. + CoreFoundation.CFArrayAppendValue(trust_chain, item) + + return trust_chain + finally: + for obj in itertools.chain(identities, certificates): + CoreFoundation.CFRelease(obj) diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py new file mode 100644 index 0000000..e74589f --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py @@ -0,0 +1,297 @@ +# SPDX-License-Identifier: MIT +""" +This module provides a pool manager that uses Google App Engine's +`URLFetch Service `_. + +Example usage:: + + from urllib3 import PoolManager + from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox + + if is_appengine_sandbox(): + # AppEngineManager uses AppEngine's URLFetch API behind the scenes + http = AppEngineManager() + else: + # PoolManager uses a socket-level API behind the scenes + http = PoolManager() + + r = http.request('GET', 'https://google.com/') + +There are `limitations `_ to the URLFetch service and it may not be +the best choice for your application. There are three options for using +urllib3 on Google App Engine: + +1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is + cost-effective in many circumstances as long as your usage is within the + limitations. +2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets. + Sockets also have `limitations and restrictions + `_ and have a lower free quota than URLFetch. + To use sockets, be sure to specify the following in your ``app.yaml``:: + + env_variables: + GAE_USE_SOCKETS_HTTPLIB : 'true' + +3. If you are using `App Engine Flexible +`_, you can use the standard +:class:`PoolManager` without any configuration or special environment variables. +""" + +from __future__ import absolute_import +import logging +import os +import warnings +from ..packages.six.moves.urllib.parse import urljoin + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + TimeoutError, + SSLError +) + +from ..packages.six import BytesIO +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.timeout import Timeout +from ..util.retry import Retry + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None + + +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation `here + `_. + + Notably it will raise an :class:`AppEnginePlatformError` if: + * URLFetch is not available. + * If you attempt to use this on App Engine Flexible, as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabtyes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__(self, headers=None, retries=None, validate_certificate=True, + urlfetch_retries=True): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment.") + + if is_prod_appengine_mvms(): + raise AppEnginePlatformError( + "Use normal urllib3.PoolManager instead of AppEngineManager" + "on Managed VMs, as using URLFetch is not necessary in " + "this environment.") + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.", + AppEnginePlatformWarning) + + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + self.urlfetch_retries = urlfetch_retries + + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen(self, method, url, body=None, headers=None, + retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw): + + retries = self._get_retries(retries, redirect) + + try: + follow_redirects = ( + redirect and + retries.redirect != 0 and + retries.total) + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=self.urlfetch_retries and follow_redirects, + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if 'too large' in str(e): + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", e) + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if 'Too many redirects' in str(e): + raise MaxRetryError(self, url, reason=e) + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", e) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e) + + http_response = self._urlfetch_response_to_http_response( + response, retries=retries, **response_kw) + + # Handle redirect? + redirect_location = redirect and http_response.get_redirect_location() + if redirect_location: + # Check for redirect response + if (self.urlfetch_retries and retries.raise_on_redirect): + raise MaxRetryError(self, url, "too many redirects") + else: + if http_response.status == 303: + method = 'GET' + + try: + retries = retries.increment(method, url, response=http_response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise MaxRetryError(self, url, "too many redirects") + return http_response + + retries.sleep_for_retry(http_response) + log.debug("Redirecting %s -> %s", url, redirect_location) + redirect_url = urljoin(url, redirect_location) + return self.urlopen( + method, redirect_url, body, headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + # Check if we should retry the HTTP response. + has_retry_after = bool(http_response.getheader('Retry-After')) + if retries.is_retry(method, http_response.status, has_retry_after): + retries = retries.increment( + method, url, response=http_response, _pool=self) + log.debug("Retry: %s", url) + retries.sleep(http_response) + return self.urlopen( + method, url, + body=body, headers=headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + return http_response + + def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): + + if is_prod_appengine(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get('content-encoding') + + if content_encoding == 'deflate': + del urlfetch_resp.headers['content-encoding'] + + transfer_encoding = urlfetch_resp.headers.get('transfer-encoding') + # We have a full response's content, + # so let's make sure we don't report ourselves as chunked data. + if transfer_encoding == 'chunked': + encodings = transfer_encoding.split(",") + encodings.remove('chunked') + urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings) + + return HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return None # Defer to URLFetch's default. + if isinstance(timeout, Timeout): + if timeout._read is not None or timeout._connect is not None: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total or default URLFetch timeout.", + AppEnginePlatformWarning) + return timeout.total + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, redirect=redirect, default=self.retries) + + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning) + + return retries + + +def is_appengine(): + return (is_local_appengine() or + is_prod_appengine() or + is_prod_appengine_mvms()) + + +def is_appengine_sandbox(): + return is_appengine() and not is_prod_appengine_mvms() + + +def is_local_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE']) + + +def is_prod_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not is_prod_appengine_mvms()) + + +def is_prod_appengine_mvms(): + return os.environ.get('GAE_VM', False) == 'true' diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..3f8c9eb --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: MIT +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" +from __future__ import absolute_import + +from logging import getLogger +from ntlm import ntlm + +from .. import HTTPSConnectionPool +from ..packages.six.moves.http_client import HTTPSConnection + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s', + self.num_connections, self.host, self.authurl) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s', headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s', res.status, res.reason) + log.debug('Response headers: %s', reshdr) + log.debug('Response data: %s [...]', res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s', headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s', res.status, res.reason) + log.debug('Response headers: %s', dict(res.getheaders())) + log.debug('Response data: %s [...]', res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py new file mode 100644 index 0000000..8d37350 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py @@ -0,0 +1,458 @@ +# SPDX-License-Identifier: MIT +""" +SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. + +This needs the following packages installed: + +* pyOpenSSL (tested with 16.0.0) +* cryptography (minimum 1.3.4, from pyopenssl) +* idna (minimum 2.0, from cryptography) + +However, pyopenssl depends on cryptography, which depends on idna, so while we +use all three directly here we end up having relatively few packages required. + +You can install them with the following command: + + pip install pyopenssl cryptography idna + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +compression in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) +""" +from __future__ import absolute_import + +import OpenSSL.SSL +from cryptography import x509 +from cryptography.hazmat.backends.openssl import backend as openssl_backend +from cryptography.hazmat.backends.openssl.x509 import _Certificate + +from socket import timeout, error as SocketError +from io import BytesIO + +try: # Platform-specific: Python 2 + from socket import _fileobject +except ImportError: # Platform-specific: Python 3 + _fileobject = None + from ..packages.backports.makefile import backport_makefile + +import logging +import ssl + +try: + import six +except ImportError: + from ..packages import six + +import sys + +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI always works. +HAS_SNI = True + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} + +if hasattr(ssl, 'PROTOCOL_TLSv1_1') and hasattr(OpenSSL.SSL, 'TLSv1_1_METHOD'): + _openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD + +if hasattr(ssl, 'PROTOCOL_TLSv1_2') and hasattr(OpenSSL.SSL, 'TLSv1_2_METHOD'): + _openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD + +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass + +_stdlib_to_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: + OpenSSL.SSL.VERIFY_PEER + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} +_openssl_to_stdlib_verify = dict( + (v, k) for k, v in _stdlib_to_openssl_verify.items() +) + +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 + +orig_util_HAS_SNI = util.HAS_SNI +orig_util_SSLContext = util.ssl_.SSLContext + + +log = logging.getLogger(__name__) + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + + _validate_dependencies_met() + + util.ssl_.SSLContext = PyOpenSSLContext + util.HAS_SNI = HAS_SNI + util.ssl_.HAS_SNI = HAS_SNI + util.IS_PYOPENSSL = True + util.ssl_.IS_PYOPENSSL = True + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + + util.ssl_.SSLContext = orig_util_SSLContext + util.HAS_SNI = orig_util_HAS_SNI + util.ssl_.HAS_SNI = orig_util_HAS_SNI + util.IS_PYOPENSSL = False + util.ssl_.IS_PYOPENSSL = False + + +def _validate_dependencies_met(): + """ + Verifies that PyOpenSSL's package-level dependencies have been met. + Throws `ImportError` if they are not met. + """ + # Method added in `cryptography==1.1`; not available in older versions + from cryptography.x509.extensions import Extensions + if getattr(Extensions, "get_extension_for_class", None) is None: + raise ImportError("'cryptography' module missing required functionality. " + "Try upgrading to v1.3.4 or newer.") + + # pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509 + # attribute is only present on those versions. + from OpenSSL.crypto import X509 + x509 = X509() + if getattr(x509, "_x509", None) is None: + raise ImportError("'pyOpenSSL' module missing required functionality. " + "Try upgrading to v0.14 or newer.") + + +def _dnsname_to_stdlib(name): + """ + Converts a dNSName SubjectAlternativeName field to the form used by the + standard library on the given Python version. + + Cryptography produces a dNSName as a unicode string that was idna-decoded + from ASCII bytes. We need to idna-encode that string to get it back, and + then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib + uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8). + """ + def idna_encode(name): + """ + Borrowed wholesale from the Python Cryptography Project. It turns out + that we can't just safely call `idna.encode`: it can explode for + wildcard names. This avoids that problem. + """ + import idna + + for prefix in [u'*.', u'.']: + if name.startswith(prefix): + name = name[len(prefix):] + return prefix.encode('ascii') + idna.encode(name) + return idna.encode(name) + + name = idna_encode(name) + if sys.version_info >= (3, 0): + name = name.decode('utf-8') + return name + + +def get_subj_alt_name(peer_cert): + """ + Given an PyOpenSSL certificate, provides all the subject alternative names. + """ + # Pass the cert to cryptography, which has much better APIs for this. + # This is technically using private APIs, but should work across all + # relevant versions until PyOpenSSL gets something proper for this. + cert = _Certificate(openssl_backend, peer_cert._x509) + + # We want to find the SAN extension. Ask Cryptography to locate it (it's + # faster than looping in Python) + try: + ext = cert.extensions.get_extension_for_class( + x509.SubjectAlternativeName + ).value + except x509.ExtensionNotFound: + # No such extension, return the empty list. + return [] + except (x509.DuplicateExtension, x509.UnsupportedExtension, + x509.UnsupportedGeneralNameType, UnicodeError) as e: + # A problem has been found with the quality of the certificate. Assume + # no SAN field is present. + log.warning( + "A problem was encountered with the certificate that prevented " + "urllib3 from finding the SubjectAlternativeName field. This can " + "affect certificate validation. The error was %s", + e, + ) + return [] + + # We want to return dNSName and iPAddress fields. We need to cast the IPs + # back to strings because the match_hostname function wants them as + # strings. + # Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8 + # decoded. This is pretty frustrating, but that's what the standard library + # does with certificates, and so we need to attempt to do the same. + names = [ + ('DNS', _dnsname_to_stdlib(name)) + for name in ext.get_values_for_type(x509.DNSName) + ] + names.extend( + ('IP Address', str(name)) + for name in ext.get_values_for_type(x509.IPAddress) + ) + + return names + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class. + + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' + + def __init__(self, connection, socket, suppress_ragged_eofs=True): + self.connection = connection + self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 + self._closed = False + + def fileno(self): + return self.socket.fileno() + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self): + if self._makefile_refs > 0: + self._makefile_refs -= 1 + if self._closed: + self.close() + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise SocketError(str(e)) + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(self.socket, self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv(*args, **kwargs) + else: + return data + + def recv_into(self, *args, **kwargs): + try: + return self.connection.recv_into(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return 0 + else: + raise SocketError(str(e)) + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return 0 + else: + raise + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(self.socket, self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv_into(*args, **kwargs) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + wr = util.wait_for_write(self.socket, self.socket.gettimeout()) + if not wr: + raise timeout() + continue + except OpenSSL.SSL.SysCallError as e: + raise SocketError(str(e)) + + def sendall(self, data): + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + # FIXME rethrow compatible exceptions should we ever use this + self.connection.shutdown() + + def close(self): + if self._makefile_refs < 1: + try: + self._closed = True + return self.connection.close() + except OpenSSL.SSL.Error: + return + else: + self._makefile_refs -= 1 + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + + if not x509: + return x509 + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, + x509) + + return { + 'subject': ( + (('commonName', x509.get_subject().CN),), + ), + 'subjectAltName': get_subj_alt_name(x509) + } + + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + + +if _fileobject: # Platform-specific: Python 2 + def makefile(self, mode, bufsize=-1): + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) +else: # Platform-specific: Python 3 + makefile = backport_makefile + +WrappedSocket.makefile = makefile + + +class PyOpenSSLContext(object): + """ + I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible + for translating the interface of the standard library ``SSLContext`` object + to calls into PyOpenSSL. + """ + def __init__(self, protocol): + self.protocol = _openssl_versions[protocol] + self._ctx = OpenSSL.SSL.Context(self.protocol) + self._options = 0 + self.check_hostname = False + + @property + def options(self): + return self._options + + @options.setter + def options(self, value): + self._options = value + self._ctx.set_options(value) + + @property + def verify_mode(self): + return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()] + + @verify_mode.setter + def verify_mode(self, value): + self._ctx.set_verify( + _stdlib_to_openssl_verify[value], + _verify_callback + ) + + def set_default_verify_paths(self): + self._ctx.set_default_verify_paths() + + def set_ciphers(self, ciphers): + if isinstance(ciphers, six.text_type): + ciphers = ciphers.encode('utf-8') + self._ctx.set_cipher_list(ciphers) + + def load_verify_locations(self, cafile=None, capath=None, cadata=None): + if cafile is not None: + cafile = cafile.encode('utf-8') + if capath is not None: + capath = capath.encode('utf-8') + self._ctx.load_verify_locations(cafile, capath) + if cadata is not None: + self._ctx.load_verify_locations(BytesIO(cadata)) + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._ctx.use_certificate_file(certfile) + if password is not None: + self._ctx.set_passwd_cb(lambda max_length, prompt_twice, userdata: password) + self._ctx.use_privatekey_file(keyfile or certfile) + + def wrap_socket(self, sock, server_side=False, + do_handshake_on_connect=True, suppress_ragged_eofs=True, + server_hostname=None): + cnx = OpenSSL.SSL.Connection(self._ctx, sock) + + if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3 + server_hostname = server_hostname.encode('utf-8') + + if server_hostname is not None: + cnx.set_tlsext_host_name(server_hostname) + + cnx.set_connect_state() + + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(sock, sock.gettimeout()) + if not rd: + raise timeout('select timed out') + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake: %r' % e) + break + + return WrappedSocket(cnx, sock) + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py new file mode 100644 index 0000000..fcc3011 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py @@ -0,0 +1,808 @@ +# SPDX-License-Identifier: MIT +""" +SecureTranport support for urllib3 via ctypes. + +This makes platform-native TLS available to urllib3 users on macOS without the +use of a compiler. This is an important feature because the Python Package +Index is moving to become a TLSv1.2-or-higher server, and the default OpenSSL +that ships with macOS is not capable of doing TLSv1.2. The only way to resolve +this is to give macOS users an alternative solution to the problem, and that +solution is to use SecureTransport. + +We use ctypes here because this solution must not require a compiler. That's +because pip is not allowed to require a compiler either. + +This is not intended to be a seriously long-term solution to this problem. +The hope is that PEP 543 will eventually solve this issue for us, at which +point we can retire this contrib module. But in the short term, we need to +solve the impending tire fire that is Python on Mac without this kind of +contrib module. So...here we are. + +To use this module, simply import and inject it:: + + import urllib3.contrib.securetransport + urllib3.contrib.securetransport.inject_into_urllib3() + +Happy TLSing! +""" +from __future__ import absolute_import + +import contextlib +import ctypes +import errno +import os.path +import shutil +import socket +import ssl +import threading +import weakref + +from .. import util +from ._securetransport.bindings import ( + Security, SecurityConst, CoreFoundation +) +from ._securetransport.low_level import ( + _assert_no_error, _cert_array_from_pem, _temporary_keychain, + _load_client_cert_chain +) + +try: # Platform-specific: Python 2 + from socket import _fileobject +except ImportError: # Platform-specific: Python 3 + _fileobject = None + from ..packages.backports.makefile import backport_makefile + +try: + memoryview(b'') +except NameError: + raise ImportError("SecureTransport only works on Pythons with memoryview") + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI always works +HAS_SNI = True + +orig_util_HAS_SNI = util.HAS_SNI +orig_util_SSLContext = util.ssl_.SSLContext + +# This dictionary is used by the read callback to obtain a handle to the +# calling wrapped socket. This is a pretty silly approach, but for now it'll +# do. I feel like I should be able to smuggle a handle to the wrapped socket +# directly in the SSLConnectionRef, but for now this approach will work I +# guess. +# +# We need to lock around this structure for inserts, but we don't do it for +# reads/writes in the callbacks. The reasoning here goes as follows: +# +# 1. It is not possible to call into the callbacks before the dictionary is +# populated, so once in the callback the id must be in the dictionary. +# 2. The callbacks don't mutate the dictionary, they only read from it, and +# so cannot conflict with any of the insertions. +# +# This is good: if we had to lock in the callbacks we'd drastically slow down +# the performance of this code. +_connection_refs = weakref.WeakValueDictionary() +_connection_ref_lock = threading.Lock() + +# Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over +# for no better reason than we need *a* limit, and this one is right there. +SSL_WRITE_BLOCKSIZE = 16384 + +# This is our equivalent of util.ssl_.DEFAULT_CIPHERS, but expanded out to +# individual cipher suites. We need to do this becuase this is how +# SecureTransport wants them. +CIPHER_SUITES = [ + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA, +] + +# Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of +# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version. +_protocol_to_min_max = { + ssl.PROTOCOL_SSLv23: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), +} + +if hasattr(ssl, "PROTOCOL_SSLv2"): + _protocol_to_min_max[ssl.PROTOCOL_SSLv2] = ( + SecurityConst.kSSLProtocol2, SecurityConst.kSSLProtocol2 + ) +if hasattr(ssl, "PROTOCOL_SSLv3"): + _protocol_to_min_max[ssl.PROTOCOL_SSLv3] = ( + SecurityConst.kSSLProtocol3, SecurityConst.kSSLProtocol3 + ) +if hasattr(ssl, "PROTOCOL_TLSv1"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1] = ( + SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol1 + ) +if hasattr(ssl, "PROTOCOL_TLSv1_1"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1_1] = ( + SecurityConst.kTLSProtocol11, SecurityConst.kTLSProtocol11 + ) +if hasattr(ssl, "PROTOCOL_TLSv1_2"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1_2] = ( + SecurityConst.kTLSProtocol12, SecurityConst.kTLSProtocol12 + ) +if hasattr(ssl, "PROTOCOL_TLS"): + _protocol_to_min_max[ssl.PROTOCOL_TLS] = _protocol_to_min_max[ssl.PROTOCOL_SSLv23] + + +def inject_into_urllib3(): + """ + Monkey-patch urllib3 with SecureTransport-backed SSL-support. + """ + util.ssl_.SSLContext = SecureTransportContext + util.HAS_SNI = HAS_SNI + util.ssl_.HAS_SNI = HAS_SNI + util.IS_SECURETRANSPORT = True + util.ssl_.IS_SECURETRANSPORT = True + + +def extract_from_urllib3(): + """ + Undo monkey-patching by :func:`inject_into_urllib3`. + """ + util.ssl_.SSLContext = orig_util_SSLContext + util.HAS_SNI = orig_util_HAS_SNI + util.ssl_.HAS_SNI = orig_util_HAS_SNI + util.IS_SECURETRANSPORT = False + util.ssl_.IS_SECURETRANSPORT = False + + +def _read_callback(connection_id, data_buffer, data_length_pointer): + """ + SecureTransport read callback. This is called by ST to request that data + be returned from the socket. + """ + wrapped_socket = None + try: + wrapped_socket = _connection_refs.get(connection_id) + if wrapped_socket is None: + return SecurityConst.errSSLInternal + base_socket = wrapped_socket.socket + + requested_length = data_length_pointer[0] + + timeout = wrapped_socket.gettimeout() + error = None + read_count = 0 + buffer = (ctypes.c_char * requested_length).from_address(data_buffer) + buffer_view = memoryview(buffer) + + try: + while read_count < requested_length: + if timeout is None or timeout >= 0: + readables = util.wait_for_read([base_socket], timeout) + if not readables: + raise socket.error(errno.EAGAIN, 'timed out') + + # We need to tell ctypes that we have a buffer that can be + # written to. Upsettingly, we do that like this: + chunk_size = base_socket.recv_into( + buffer_view[read_count:requested_length] + ) + read_count += chunk_size + if not chunk_size: + if not read_count: + return SecurityConst.errSSLClosedGraceful + break + except (socket.error) as e: + error = e.errno + + if error is not None and error != errno.EAGAIN: + if error == errno.ECONNRESET: + return SecurityConst.errSSLClosedAbort + raise + + data_length_pointer[0] = read_count + + if read_count != requested_length: + return SecurityConst.errSSLWouldBlock + + return 0 + except Exception as e: + if wrapped_socket is not None: + wrapped_socket._exception = e + return SecurityConst.errSSLInternal + + +def _write_callback(connection_id, data_buffer, data_length_pointer): + """ + SecureTransport write callback. This is called by ST to request that data + actually be sent on the network. + """ + wrapped_socket = None + try: + wrapped_socket = _connection_refs.get(connection_id) + if wrapped_socket is None: + return SecurityConst.errSSLInternal + base_socket = wrapped_socket.socket + + bytes_to_write = data_length_pointer[0] + data = ctypes.string_at(data_buffer, bytes_to_write) + + timeout = wrapped_socket.gettimeout() + error = None + sent = 0 + + try: + while sent < bytes_to_write: + if timeout is None or timeout >= 0: + writables = util.wait_for_write([base_socket], timeout) + if not writables: + raise socket.error(errno.EAGAIN, 'timed out') + chunk_sent = base_socket.send(data) + sent += chunk_sent + + # This has some needless copying here, but I'm not sure there's + # much value in optimising this data path. + data = data[chunk_sent:] + except (socket.error) as e: + error = e.errno + + if error is not None and error != errno.EAGAIN: + if error == errno.ECONNRESET: + return SecurityConst.errSSLClosedAbort + raise + + data_length_pointer[0] = sent + if sent != bytes_to_write: + return SecurityConst.errSSLWouldBlock + + return 0 + except Exception as e: + if wrapped_socket is not None: + wrapped_socket._exception = e + return SecurityConst.errSSLInternal + + +# We need to keep these two objects references alive: if they get GC'd while +# in use then SecureTransport could attempt to call a function that is in freed +# memory. That would be...uh...bad. Yeah, that's the word. Bad. +_read_callback_pointer = Security.SSLReadFunc(_read_callback) +_write_callback_pointer = Security.SSLWriteFunc(_write_callback) + + +class WrappedSocket(object): + """ + API-compatibility wrapper for Python's OpenSSL wrapped socket object. + + Note: _makefile_refs, _drop(), and _reuse() are needed for the garbage + collector of PyPy. + """ + def __init__(self, socket): + self.socket = socket + self.context = None + self._makefile_refs = 0 + self._closed = False + self._exception = None + self._keychain = None + self._keychain_dir = None + self._client_cert_chain = None + + # We save off the previously-configured timeout and then set it to + # zero. This is done because we use select and friends to handle the + # timeouts, but if we leave the timeout set on the lower socket then + # Python will "kindly" call select on that socket again for us. Avoid + # that by forcing the timeout to zero. + self._timeout = self.socket.gettimeout() + self.socket.settimeout(0) + + @contextlib.contextmanager + def _raise_on_error(self): + """ + A context manager that can be used to wrap calls that do I/O from + SecureTransport. If any of the I/O callbacks hit an exception, this + context manager will correctly propagate the exception after the fact. + This avoids silently swallowing those exceptions. + + It also correctly forces the socket closed. + """ + self._exception = None + + # We explicitly don't catch around this yield because in the unlikely + # event that an exception was hit in the block we don't want to swallow + # it. + yield + if self._exception is not None: + exception, self._exception = self._exception, None + self.close() + raise exception + + def _set_ciphers(self): + """ + Sets up the allowed ciphers. By default this matches the set in + util.ssl_.DEFAULT_CIPHERS, at least as supported by macOS. This is done + custom and doesn't allow changing at this time, mostly because parsing + OpenSSL cipher strings is going to be a freaking nightmare. + """ + ciphers = (Security.SSLCipherSuite * len(CIPHER_SUITES))(*CIPHER_SUITES) + result = Security.SSLSetEnabledCiphers( + self.context, ciphers, len(CIPHER_SUITES) + ) + _assert_no_error(result) + + def _custom_validate(self, verify, trust_bundle): + """ + Called when we have set custom validation. We do this in two cases: + first, when cert validation is entirely disabled; and second, when + using a custom trust DB. + """ + # If we disabled cert validation, just say: cool. + if not verify: + return + + # We want data in memory, so load it up. + if os.path.isfile(trust_bundle): + with open(trust_bundle, 'rb') as f: + trust_bundle = f.read() + + cert_array = None + trust = Security.SecTrustRef() + + try: + # Get a CFArray that contains the certs we want. + cert_array = _cert_array_from_pem(trust_bundle) + + # Ok, now the hard part. We want to get the SecTrustRef that ST has + # created for this connection, shove our CAs into it, tell ST to + # ignore everything else it knows, and then ask if it can build a + # chain. This is a buuuunch of code. + result = Security.SSLCopyPeerTrust( + self.context, ctypes.byref(trust) + ) + _assert_no_error(result) + if not trust: + raise ssl.SSLError("Failed to copy trust reference") + + result = Security.SecTrustSetAnchorCertificates(trust, cert_array) + _assert_no_error(result) + + result = Security.SecTrustSetAnchorCertificatesOnly(trust, True) + _assert_no_error(result) + + trust_result = Security.SecTrustResultType() + result = Security.SecTrustEvaluate( + trust, ctypes.byref(trust_result) + ) + _assert_no_error(result) + finally: + if trust: + CoreFoundation.CFRelease(trust) + + if cert_array is None: + CoreFoundation.CFRelease(cert_array) + + # Ok, now we can look at what the result was. + successes = ( + SecurityConst.kSecTrustResultUnspecified, + SecurityConst.kSecTrustResultProceed + ) + if trust_result.value not in successes: + raise ssl.SSLError( + "certificate verify failed, error code: %d" % + trust_result.value + ) + + def handshake(self, + server_hostname, + verify, + trust_bundle, + min_version, + max_version, + client_cert, + client_key, + client_key_passphrase): + """ + Actually performs the TLS handshake. This is run automatically by + wrapped socket, and shouldn't be needed in user code. + """ + # First, we do the initial bits of connection setup. We need to create + # a context, set its I/O funcs, and set the connection reference. + self.context = Security.SSLCreateContext( + None, SecurityConst.kSSLClientSide, SecurityConst.kSSLStreamType + ) + result = Security.SSLSetIOFuncs( + self.context, _read_callback_pointer, _write_callback_pointer + ) + _assert_no_error(result) + + # Here we need to compute the handle to use. We do this by taking the + # id of self modulo 2**31 - 1. If this is already in the dictionary, we + # just keep incrementing by one until we find a free space. + with _connection_ref_lock: + handle = id(self) % 2147483647 + while handle in _connection_refs: + handle = (handle + 1) % 2147483647 + _connection_refs[handle] = self + + result = Security.SSLSetConnection(self.context, handle) + _assert_no_error(result) + + # If we have a server hostname, we should set that too. + if server_hostname: + if not isinstance(server_hostname, bytes): + server_hostname = server_hostname.encode('utf-8') + + result = Security.SSLSetPeerDomainName( + self.context, server_hostname, len(server_hostname) + ) + _assert_no_error(result) + + # Setup the ciphers. + self._set_ciphers() + + # Set the minimum and maximum TLS versions. + result = Security.SSLSetProtocolVersionMin(self.context, min_version) + _assert_no_error(result) + result = Security.SSLSetProtocolVersionMax(self.context, max_version) + _assert_no_error(result) + + # If there's a trust DB, we need to use it. We do that by telling + # SecureTransport to break on server auth. We also do that if we don't + # want to validate the certs at all: we just won't actually do any + # authing in that case. + if not verify or trust_bundle is not None: + result = Security.SSLSetSessionOption( + self.context, + SecurityConst.kSSLSessionOptionBreakOnServerAuth, + True + ) + _assert_no_error(result) + + # If there's a client cert, we need to use it. + if client_cert: + self._keychain, self._keychain_dir = _temporary_keychain() + self._client_cert_chain = _load_client_cert_chain( + self._keychain, client_cert, client_key + ) + result = Security.SSLSetCertificate( + self.context, self._client_cert_chain + ) + _assert_no_error(result) + + while True: + with self._raise_on_error(): + result = Security.SSLHandshake(self.context) + + if result == SecurityConst.errSSLWouldBlock: + raise socket.timeout("handshake timed out") + elif result == SecurityConst.errSSLServerAuthCompleted: + self._custom_validate(verify, trust_bundle) + continue + else: + _assert_no_error(result) + break + + def fileno(self): + return self.socket.fileno() + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self): + if self._makefile_refs > 0: + self._makefile_refs -= 1 + if self._closed: + self.close() + + def recv(self, bufsiz): + buffer = ctypes.create_string_buffer(bufsiz) + bytes_read = self.recv_into(buffer, bufsiz) + data = buffer[:bytes_read] + return data + + def recv_into(self, buffer, nbytes=None): + # Read short on EOF. + if self._closed: + return 0 + + if nbytes is None: + nbytes = len(buffer) + + buffer = (ctypes.c_char * nbytes).from_buffer(buffer) + processed_bytes = ctypes.c_size_t(0) + + with self._raise_on_error(): + result = Security.SSLRead( + self.context, buffer, nbytes, ctypes.byref(processed_bytes) + ) + + # There are some result codes that we want to treat as "not always + # errors". Specifically, those are errSSLWouldBlock, + # errSSLClosedGraceful, and errSSLClosedNoNotify. + if (result == SecurityConst.errSSLWouldBlock): + # If we didn't process any bytes, then this was just a time out. + # However, we can get errSSLWouldBlock in situations when we *did* + # read some data, and in those cases we should just read "short" + # and return. + if processed_bytes.value == 0: + # Timed out, no data read. + raise socket.timeout("recv timed out") + elif result in (SecurityConst.errSSLClosedGraceful, SecurityConst.errSSLClosedNoNotify): + # The remote peer has closed this connection. We should do so as + # well. Note that we don't actually return here because in + # principle this could actually be fired along with return data. + # It's unlikely though. + self.close() + else: + _assert_no_error(result) + + # Ok, we read and probably succeeded. We should return whatever data + # was actually read. + return processed_bytes.value + + def settimeout(self, timeout): + self._timeout = timeout + + def gettimeout(self): + return self._timeout + + def send(self, data): + processed_bytes = ctypes.c_size_t(0) + + with self._raise_on_error(): + result = Security.SSLWrite( + self.context, data, len(data), ctypes.byref(processed_bytes) + ) + + if result == SecurityConst.errSSLWouldBlock and processed_bytes.value == 0: + # Timed out + raise socket.timeout("send timed out") + else: + _assert_no_error(result) + + # We sent, and probably succeeded. Tell them how much we sent. + return processed_bytes.value + + def sendall(self, data): + total_sent = 0 + while total_sent < len(data): + sent = self.send(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + with self._raise_on_error(): + Security.SSLClose(self.context) + + def close(self): + # TODO: should I do clean shutdown here? Do I have to? + if self._makefile_refs < 1: + self._closed = True + if self.context: + CoreFoundation.CFRelease(self.context) + self.context = None + if self._client_cert_chain: + CoreFoundation.CFRelease(self._client_cert_chain) + self._client_cert_chain = None + if self._keychain: + Security.SecKeychainDelete(self._keychain) + CoreFoundation.CFRelease(self._keychain) + shutil.rmtree(self._keychain_dir) + self._keychain = self._keychain_dir = None + return self.socket.close() + else: + self._makefile_refs -= 1 + + def getpeercert(self, binary_form=False): + # Urgh, annoying. + # + # Here's how we do this: + # + # 1. Call SSLCopyPeerTrust to get hold of the trust object for this + # connection. + # 2. Call SecTrustGetCertificateAtIndex for index 0 to get the leaf. + # 3. To get the CN, call SecCertificateCopyCommonName and process that + # string so that it's of the appropriate type. + # 4. To get the SAN, we need to do something a bit more complex: + # a. Call SecCertificateCopyValues to get the data, requesting + # kSecOIDSubjectAltName. + # b. Mess about with this dictionary to try to get the SANs out. + # + # This is gross. Really gross. It's going to be a few hundred LoC extra + # just to repeat something that SecureTransport can *already do*. So my + # operating assumption at this time is that what we want to do is + # instead to just flag to urllib3 that it shouldn't do its own hostname + # validation when using SecureTransport. + if not binary_form: + raise ValueError( + "SecureTransport only supports dumping binary certs" + ) + trust = Security.SecTrustRef() + certdata = None + der_bytes = None + + try: + # Grab the trust store. + result = Security.SSLCopyPeerTrust( + self.context, ctypes.byref(trust) + ) + _assert_no_error(result) + if not trust: + # Probably we haven't done the handshake yet. No biggie. + return None + + cert_count = Security.SecTrustGetCertificateCount(trust) + if not cert_count: + # Also a case that might happen if we haven't handshaked. + # Handshook? Handshaken? + return None + + leaf = Security.SecTrustGetCertificateAtIndex(trust, 0) + assert leaf + + # Ok, now we want the DER bytes. + certdata = Security.SecCertificateCopyData(leaf) + assert certdata + + data_length = CoreFoundation.CFDataGetLength(certdata) + data_buffer = CoreFoundation.CFDataGetBytePtr(certdata) + der_bytes = ctypes.string_at(data_buffer, data_length) + finally: + if certdata: + CoreFoundation.CFRelease(certdata) + if trust: + CoreFoundation.CFRelease(trust) + + return der_bytes + + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + + +if _fileobject: # Platform-specific: Python 2 + def makefile(self, mode, bufsize=-1): + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) +else: # Platform-specific: Python 3 + def makefile(self, mode="r", buffering=None, *args, **kwargs): + # We disable buffering with SecureTransport because it conflicts with + # the buffering that ST does internally (see issue #1153 for more). + buffering = 0 + return backport_makefile(self, mode, buffering, *args, **kwargs) + +WrappedSocket.makefile = makefile + + +class SecureTransportContext(object): + """ + I am a wrapper class for the SecureTransport library, to translate the + interface of the standard library ``SSLContext`` object to calls into + SecureTransport. + """ + def __init__(self, protocol): + self._min_version, self._max_version = _protocol_to_min_max[protocol] + self._options = 0 + self._verify = False + self._trust_bundle = None + self._client_cert = None + self._client_key = None + self._client_key_passphrase = None + + @property + def check_hostname(self): + """ + SecureTransport cannot have its hostname checking disabled. For more, + see the comment on getpeercert() in this file. + """ + return True + + @check_hostname.setter + def check_hostname(self, value): + """ + SecureTransport cannot have its hostname checking disabled. For more, + see the comment on getpeercert() in this file. + """ + pass + + @property + def options(self): + # TODO: Well, crap. + # + # So this is the bit of the code that is the most likely to cause us + # trouble. Essentially we need to enumerate all of the SSL options that + # users might want to use and try to see if we can sensibly translate + # them, or whether we should just ignore them. + return self._options + + @options.setter + def options(self, value): + # TODO: Update in line with above. + self._options = value + + @property + def verify_mode(self): + return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE + + @verify_mode.setter + def verify_mode(self, value): + self._verify = True if value == ssl.CERT_REQUIRED else False + + def set_default_verify_paths(self): + # So, this has to do something a bit weird. Specifically, what it does + # is nothing. + # + # This means that, if we had previously had load_verify_locations + # called, this does not undo that. We need to do that because it turns + # out that the rest of the urllib3 code will attempt to load the + # default verify paths if it hasn't been told about any paths, even if + # the context itself was sometime earlier. We resolve that by just + # ignoring it. + pass + + def load_default_certs(self): + return self.set_default_verify_paths() + + def set_ciphers(self, ciphers): + # For now, we just require the default cipher string. + if ciphers != util.ssl_.DEFAULT_CIPHERS: + raise ValueError( + "SecureTransport doesn't support custom cipher strings" + ) + + def load_verify_locations(self, cafile=None, capath=None, cadata=None): + # OK, we only really support cadata and cafile. + if capath is not None: + raise ValueError( + "SecureTransport does not support cert directories" + ) + + self._trust_bundle = cafile or cadata + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._client_cert = certfile + self._client_key = keyfile + self._client_cert_passphrase = password + + def wrap_socket(self, sock, server_side=False, + do_handshake_on_connect=True, suppress_ragged_eofs=True, + server_hostname=None): + # So, what do we do here? Firstly, we assert some properties. This is a + # stripped down shim, so there is some functionality we don't support. + # See PEP 543 for the real deal. + assert not server_side + assert do_handshake_on_connect + assert suppress_ragged_eofs + + # Ok, we're good to go. Now we want to create the wrapped socket object + # and store it in the appropriate place. + wrapped_socket = WrappedSocket(sock) + + # Now we can handshake + wrapped_socket.handshake( + server_hostname, self._verify, self._trust_bundle, + self._min_version, self._max_version, self._client_cert, + self._client_key, self._client_key_passphrase + ) + return wrapped_socket diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py new file mode 100644 index 0000000..1cb7928 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: MIT +""" +This module contains provisional support for SOCKS proxies from within +urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and +SOCKS5. To enable its functionality, either install PySocks or install this +module with the ``socks`` extra. + +The SOCKS implementation supports the full range of urllib3 features. It also +supports the following SOCKS features: + +- SOCKS4 +- SOCKS4a +- SOCKS5 +- Usernames and passwords for the SOCKS proxy + +Known Limitations: + +- Currently PySocks does not support contacting remote websites via literal + IPv6 addresses. Any such connection attempt will fail. You must use a domain + name. +- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any + such connection attempt will fail. +""" +from __future__ import absolute_import + +try: + import socks +except ImportError: + import warnings + from ..exceptions import DependencyWarning + + warnings.warn(( + 'SOCKS support in urllib3 requires the installation of optional ' + 'dependencies: specifically, PySocks. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies' + ), + DependencyWarning + ) + raise + +from socket import error as SocketError, timeout as SocketTimeout + +from ..connection import ( + HTTPConnection, HTTPSConnection +) +from ..connectionpool import ( + HTTPConnectionPool, HTTPSConnectionPool +) +from ..exceptions import ConnectTimeoutError, NewConnectionError +from ..poolmanager import PoolManager +from ..util.url import parse_url + +try: + import ssl +except ImportError: + ssl = None + + +class SOCKSConnection(HTTPConnection): + """ + A plain-text HTTP connection that connects via a SOCKS proxy. + """ + def __init__(self, *args, **kwargs): + self._socks_options = kwargs.pop('_socks_options') + super(SOCKSConnection, self).__init__(*args, **kwargs) + + def _new_conn(self): + """ + Establish a new connection via the SOCKS proxy. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = socks.create_connection( + (self.host, self.port), + proxy_type=self._socks_options['socks_version'], + proxy_addr=self._socks_options['proxy_host'], + proxy_port=self._socks_options['proxy_port'], + proxy_username=self._socks_options['username'], + proxy_password=self._socks_options['password'], + proxy_rdns=self._socks_options['rdns'], + timeout=self.timeout, + **extra_kw + ) + + except SocketTimeout as e: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + except socks.ProxyError as e: + # This is fragile as hell, but it seems to be the only way to raise + # useful errors here. + if e.socket_err: + error = e.socket_err + if isinstance(error, SocketTimeout): + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout) + ) + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % error + ) + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % e + ) + + except SocketError as e: # Defensive: PySocks should catch all these. + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e) + + return conn + + +# We don't need to duplicate the Verified/Unverified distinction from +# urllib3/connection.py here because the HTTPSConnection will already have been +# correctly set to either the Verified or Unverified form by that module. This +# means the SOCKSHTTPSConnection will automatically be the correct type. +class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection): + pass + + +class SOCKSHTTPConnectionPool(HTTPConnectionPool): + ConnectionCls = SOCKSConnection + + +class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): + ConnectionCls = SOCKSHTTPSConnection + + +class SOCKSProxyManager(PoolManager): + """ + A version of the urllib3 ProxyManager that routes connections via the + defined SOCKS proxy. + """ + pool_classes_by_scheme = { + 'http': SOCKSHTTPConnectionPool, + 'https': SOCKSHTTPSConnectionPool, + } + + def __init__(self, proxy_url, username=None, password=None, + num_pools=10, headers=None, **connection_pool_kw): + parsed = parse_url(proxy_url) + + if parsed.scheme == 'socks5': + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = False + elif parsed.scheme == 'socks5h': + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = True + elif parsed.scheme == 'socks4': + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = False + elif parsed.scheme == 'socks4a': + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = True + else: + raise ValueError( + "Unable to determine SOCKS version from %s" % proxy_url + ) + + self.proxy_url = proxy_url + + socks_options = { + 'socks_version': socks_version, + 'proxy_host': parsed.host, + 'proxy_port': parsed.port, + 'username': username, + 'password': password, + 'rdns': rdns + } + connection_pool_kw['_socks_options'] = socks_options + + super(SOCKSProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw + ) + + self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme diff --git a/collectors/python.d.plugin/python_modules/urllib3/exceptions.py b/collectors/python.d.plugin/python_modules/urllib3/exceptions.py new file mode 100644 index 0000000..a71cabe --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/exceptions.py @@ -0,0 +1,247 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from .packages.six.moves.http_client import ( + IncompleteRead as httplib_IncompleteRead +) +# Base Exceptions + + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + +class PoolError(HTTPError): + "Base exception for errors caused within a pool." + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, None) + + +class RequestError(PoolError): + "Base exception for PoolErrors that have associated URLs." + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) + + +class SSLError(HTTPError): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." + pass + + +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." + pass + + +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + +# Leaf Exceptions + +class MaxRetryError(RequestError): + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ + + def __init__(self, pool, url, reason=None): + self.reason = reason + + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) + + RequestError.__init__(self, pool, url, message) + + +class HostChangedError(RequestError): + "Raised when an existing pool gets a request for a foreign host." + + def __init__(self, pool, url, retries=3): + message = "Tried to open a foreign host with url: %s" % url + RequestError.__init__(self, pool, url, message) + self.retries = retries + + +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + ` and :exc:`ConnectTimeoutErrors `. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" + pass + + +class NewConnectionError(ConnectTimeoutError, PoolError): + "Raised when we fail to establish a new connection. Usually ECONNREFUSED." + pass + + +class EmptyPoolError(PoolError): + "Raised when a pool runs out of connections and no more are allowed." + pass + + +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + HTTPError.__init__(self, message) + + self.location = location + + +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class SubjectAltNameWarning(SecurityWarning): + "Warned when connecting to a host with a certificate missing a SAN." + pass + + +class InsecureRequestWarning(SecurityWarning): + "Warned when making an unverified HTTPS request." + pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class SNIMissingWarning(HTTPWarning): + "Warned when making a HTTPS request without SNI available." + pass + + +class DependencyWarning(HTTPWarning): + """ + Warned when an attempt is made to import a module with missing optional + dependencies. + """ + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + "Response needs to be chunked in order to read it as chunks." + pass + + +class BodyNotHttplibCompatible(HTTPError): + """ + Body should be httplib.HTTPResponse like (have an fp attribute which + returns raw chunks) for read_chunked(). + """ + pass + + +class IncompleteRead(HTTPError, httplib_IncompleteRead): + """ + Response length doesn't match expected Content-Length + + Subclass of http_client.IncompleteRead to allow int value + for `partial` to avoid creating large objects on streamed + reads. + """ + def __init__(self, partial, expected): + super(IncompleteRead, self).__init__(partial, expected) + + def __repr__(self): + return ('IncompleteRead(%i bytes read, ' + '%i more expected)' % (self.partial, self.expected)) + + +class InvalidHeader(HTTPError): + "The header provided was somehow invalid." + pass + + +class ProxySchemeUnknown(AssertionError, ValueError): + "ProxyManager does not support the supplied scheme" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + + def __init__(self, scheme): + message = "Not supported proxy scheme %s" % scheme + super(ProxySchemeUnknown, self).__init__(message) + + +class HeaderParsingError(HTTPError): + "Raised by assert_header_parsing, but we convert it to a log.warning statement." + def __init__(self, defects, unparsed_data): + message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) + super(HeaderParsingError, self).__init__(message) + + +class UnrewindableBodyError(HTTPError): + "urllib3 encountered an error when trying to rewind a body" + pass diff --git a/collectors/python.d.plugin/python_modules/urllib3/fields.py b/collectors/python.d.plugin/python_modules/urllib3/fields.py new file mode 100644 index 0000000..de7577b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/fields.py @@ -0,0 +1,179 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetypes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except (UnicodeEncodeError, UnicodeDecodeError): + pass + else: + return result + if not six.PY3 and isinstance(value, six.text_type): # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value is not None: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/collectors/python.d.plugin/python_modules/urllib3/filepost.py b/collectors/python.d.plugin/python_modules/urllib3/filepost.py new file mode 100644 index 0000000..3febc9c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/filepost.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import codecs + +from uuid import uuid4 +from io import BytesIO + +from .packages import six +from .packages.six import b +from .fields import RequestField + +writer = codecs.lookup('utf-8')[3] + + +def choose_boundary(): + """ + Our embarrassingly-simple replacement for mimetools.choose_boundary. + """ + return uuid4().hex + + +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) + + +def iter_fields(fields): + """ + .. deprecated:: 1.6 + + Iterate over fields. + + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. + + :param fields: + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = BytesIO() + if boundary is None: + boundary = choose_boundary() + + for field in iter_field_objects(fields): + body.write(b('--%s\r\n' % (boundary))) + + writer(body).write(field.render_headers()) + data = field.data + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, six.text_type): + writer(body).write(data) + else: + body.write(data) + + body.write(b'\r\n') + + body.write(b('--%s--\r\n' % (boundary))) + + content_type = str('multipart/form-data; boundary=%s' % boundary) + + return body.getvalue(), content_type diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py new file mode 100644 index 0000000..170e974 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +from . import ssl_match_hostname + +__all__ = ('ssl_match_hostname', ) diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/backports/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py new file mode 100644 index 0000000..8ab122f --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: MIT +""" +backports.makefile +~~~~~~~~~~~~~~~~~~ + +Backports the Python 3 ``socket.makefile`` method for use with anything that +wants to create a "fake" socket object. +""" +import io + +from socket import SocketIO + + +def backport_makefile(self, mode="r", buffering=None, encoding=None, + errors=None, newline=None): + """ + Backport of ``socket.makefile`` from Python 3.5. + """ + if not set(mode) <= set(["r", "w", "b"]): + raise ValueError( + "invalid mode %r (only r, w, b allowed)" % (mode,) + ) + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = SocketIO(self, rawmode) + self._makefile_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py b/collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py new file mode 100644 index 0000000..9f7c0e6 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py @@ -0,0 +1,260 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ +# SPDX-License-Identifier: MIT +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/six.py b/collectors/python.d.plugin/python_modules/urllib3/packages/six.py new file mode 100644 index 0000000..31df501 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/six.py @@ -0,0 +1,852 @@ +"""Utilities for writing code that runs on Python 2 and 3""" + +# Copyright (c) 2010-2015 Benjamin Peterson +# +# SPDX-License-Identifier: MIT + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.10.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [ + MovedModule("winreg", "_winreg"), + ] + +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), +] +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + + def u(s): + return s + unichr = chr + import struct + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + exec_("""def reraise(tp, value, tb=None): + raise tp, value, tb +""") + + +if sys.version_info[:2] == (3, 2): + exec_("""def raise_from(value, from_value): + if from_value is None: + raise value + raise value from from_value +""") +elif sys.version_info[:2] > (3, 2): + exec_("""def raise_from(value, from_value): + raise value from from_value +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + def wrapper(f): + f = functools.wraps(wrapped, assigned, updated)(f) + f.__wrapped__ = wrapped + return f + return wrapper +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(meta): + + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def python_2_unicode_compatible(klass): + """ + A decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py new file mode 100644 index 0000000..2aeeeff --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: MIT +import sys + +try: + # Our match_hostname function is the same as 3.5's, so we only want to + # import the match_hostname function if it's at least that good. + if sys.version_info < (3, 5): + raise ImportError("Fallback to vendored code") + + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000..647e081 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,156 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# SPDX-License-Identifier: Python-2.0 + +import re +import sys + +# ipaddress has been backported to 2.6+ in pypi. If it is installed on the +# system, use it to handle IPAddress ServerAltnames (this was added in +# python-3.5) otherwise only do DNS matching. This allows +# backports.ssl_match_hostname to continue to be used all the way back to +# python-2.4. +try: + import ipaddress +except ImportError: + ipaddress = None + +__version__ = '3.5.0.1' + + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def _to_unicode(obj): + if isinstance(obj, str) and sys.version_info < (3,): + obj = unicode(obj, encoding='ascii', errors='strict') + return obj + +def _ipaddress_match(ipname, host_ip): + """Exact matching of IP addresses. + + RFC 6125 explicitly doesn't define an algorithm for this + (section 1.7.2 - "Out of Scope"). + """ + # OpenSSL may add a trailing newline to a subjectAltName's IP address + # Divergence from upstream: ipaddress can't handle byte str + ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) + return ip == host_ip + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED") + try: + # Divergence from upstream: ipaddress can't handle byte str + host_ip = ipaddress.ip_address(_to_unicode(hostname)) + except ValueError: + # Not an IP address (common case) + host_ip = None + except UnicodeError: + # Divergence from upstream: Have to deal with ipaddress not taking + # byte strings. addresses should be all ascii, so we consider it not + # an ipaddress in this case + host_ip = None + except AttributeError: + # Divergence from upstream: Make ipaddress library optional + if ipaddress is None: + host_ip = None + else: + raise + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if host_ip is None and _dnsname_match(value, hostname): + return + dnsnames.append(value) + elif key == 'IP Address': + if host_ip is not None and _ipaddress_match(value, host_ip): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/collectors/python.d.plugin/python_modules/urllib3/poolmanager.py b/collectors/python.d.plugin/python_modules/urllib3/poolmanager.py new file mode 100644 index 0000000..adea9bc --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/poolmanager.py @@ -0,0 +1,441 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import collections +import functools +import logging + +from ._collections import RecentlyUsedContainer +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connectionpool import port_by_scheme +from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown +from .packages.six.moves.urllib.parse import urljoin +from .request import RequestMethods +from .util.url import parse_url +from .util.retry import Retry + + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] + + +log = logging.getLogger(__name__) + +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version', 'ca_cert_dir', 'ssl_context') + +# All known keyword arguments that could be provided to the pool manager, its +# pools, or the underlying connections. This is used to construct a pool key. +_key_fields = ( + 'key_scheme', # str + 'key_host', # str + 'key_port', # int + 'key_timeout', # int or float or Timeout + 'key_retries', # int or Retry + 'key_strict', # bool + 'key_block', # bool + 'key_source_address', # str + 'key_key_file', # str + 'key_cert_file', # str + 'key_cert_reqs', # str + 'key_ca_certs', # str + 'key_ssl_version', # str + 'key_ca_cert_dir', # str + 'key_ssl_context', # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext + 'key_maxsize', # int + 'key_headers', # dict + 'key__proxy', # parsed proxy url + 'key__proxy_headers', # dict + 'key_socket_options', # list of (level (int), optname (int), value (int or str)) tuples + 'key__socks_options', # dict + 'key_assert_hostname', # bool or string + 'key_assert_fingerprint', # str +) + +#: The namedtuple class used to construct keys for the connection pool. +#: All custom key schemes should include the fields in this key at a minimum. +PoolKey = collections.namedtuple('PoolKey', _key_fields) + + +def _default_key_normalizer(key_class, request_context): + """ + Create a pool key out of a request context dictionary. + + According to RFC 3986, both the scheme and host are case-insensitive. + Therefore, this function normalizes both before constructing the pool + key for an HTTPS request. If you wish to change this behaviour, provide + alternate callables to ``key_fn_by_scheme``. + + :param key_class: + The class to use when constructing the key. This should be a namedtuple + with the ``scheme`` and ``host`` keys at a minimum. + :type key_class: namedtuple + :param request_context: + A dictionary-like object that contain the context for a request. + :type request_context: dict + + :return: A namedtuple that can be used as a connection pool key. + :rtype: PoolKey + """ + # Since we mutate the dictionary, make a copy first + context = request_context.copy() + context['scheme'] = context['scheme'].lower() + context['host'] = context['host'].lower() + + # These are both dictionaries and need to be transformed into frozensets + for key in ('headers', '_proxy_headers', '_socks_options'): + if key in context and context[key] is not None: + context[key] = frozenset(context[key].items()) + + # The socket_options key may be a list and needs to be transformed into a + # tuple. + socket_opts = context.get('socket_options') + if socket_opts is not None: + context['socket_options'] = tuple(socket_opts) + + # Map the kwargs to the names in the namedtuple - this is necessary since + # namedtuples can't have fields starting with '_'. + for key in list(context.keys()): + context['key_' + key] = context.pop(key) + + # Default to ``None`` for keys missing from the context + for field in key_class._fields: + if field not in context: + context[field] = None + + return key_class(**context) + + +#: A dictionary that maps a scheme to a callable that creates a pool key. +#: This can be used to alter the way pool keys are constructed, if desired. +#: Each PoolManager makes a copy of this dictionary so they can be configured +#: globally here, or individually on the instance. +key_fn_by_scheme = { + 'http': functools.partial(_default_key_normalizer, PoolKey), + 'https': functools.partial(_default_key_normalizer, PoolKey), +} + +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param \\**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example:: + + >>> manager = PoolManager(num_pools=2) + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') + >>> len(manager.pools) + 2 + + """ + + proxy = None + + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools, + dispose_func=lambda p: p.close()) + + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + + def _new_pool(self, scheme, host, port, request_context=None): + """ + Create a new :class:`ConnectionPool` based on host, port, scheme, and + any additional pool keyword arguments. + + If ``request_context`` is provided, it is provided as keyword arguments + to the pool class used. This method is used to actually create the + connection pools handed out by :meth:`connection_from_url` and + companion methods. It is intended to be overridden for customization. + """ + pool_cls = self.pool_classes_by_scheme[scheme] + if request_context is None: + request_context = self.connection_pool_kw.copy() + + # Although the context has everything necessary to create the pool, + # this function has historically only used the scheme, host, and port + # in the positional args. When an API change is acceptable these can + # be removed. + for key in ('scheme', 'host', 'port'): + request_context.pop(key, None) + + if scheme == 'http': + for kw in SSL_KEYWORDS: + request_context.pop(kw, None) + + return pool_cls(host, port, **request_context) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() + + def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is + provided, it is merged with the instance's ``connection_pool_kw`` + variable and used to create the new connection pool, if one is + needed. + """ + + if not host: + raise LocationValueError("No host specified.") + + request_context = self._merge_pool_kwargs(pool_kwargs) + request_context['scheme'] = scheme or 'http' + if not port: + port = port_by_scheme.get(request_context['scheme'].lower(), 80) + request_context['port'] = port + request_context['host'] = host + + return self.connection_from_context(request_context) + + def connection_from_context(self, request_context): + """ + Get a :class:`ConnectionPool` based on the request context. + + ``request_context`` must at least contain the ``scheme`` key and its + value must be a key in ``key_fn_by_scheme`` instance variable. + """ + scheme = request_context['scheme'].lower() + pool_key_constructor = self.key_fn_by_scheme[scheme] + pool_key = pool_key_constructor(request_context) + + return self.connection_from_pool_key(pool_key, request_context=request_context) + + def connection_from_pool_key(self, pool_key, request_context=None): + """ + Get a :class:`ConnectionPool` based on the provided pool key. + + ``pool_key`` should be a namedtuple that only contains immutable + objects. At a minimum it must have the ``scheme``, ``host``, and + ``port`` fields. + """ + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + scheme = request_context['scheme'] + host = request_context['host'] + port = request_context['port'] + pool = self._new_pool(scheme, host, port, request_context=request_context) + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url, pool_kwargs=None): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url`. + + If ``pool_kwargs`` is not provided and a new pool needs to be + constructed, ``self.connection_pool_kw`` is used to initialize + the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` + is provided, it is used instead. Note that if a new pool does not + need to be created for the request, the provided ``pool_kwargs`` are + not used. + """ + u = parse_url(url) + return self.connection_from_host(u.host, port=u.port, scheme=u.scheme, + pool_kwargs=pool_kwargs) + + def _merge_pool_kwargs(self, override): + """ + Merge a dictionary of override values for self.connection_pool_kw. + + This does not modify self.connection_pool_kw and returns a new dict. + Any keys in the override dictionary with a value of ``None`` are + removed from the merged dictionary. + """ + base_pool_kwargs = self.connection_pool_kw.copy() + if override: + for key, value in override.items(): + if value is None: + try: + del base_pool_kwargs[key] + except KeyError: + pass + else: + base_pool_kwargs[key] = value + return base_pool_kwargs + + def urlopen(self, method, url, redirect=True, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. + + The given ``url`` parameter must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw['assert_same_host'] = False + kw['redirect'] = False + if 'headers' not in kw: + kw['headers'] = self.headers + + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + + # RFC 7231, Section 6.4.4 + if response.status == 303: + method = 'GET' + + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries + kw['redirect'] = redirect + + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method, redirect_location, **kw) + + +class ProxyManager(PoolManager): + """ + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param proxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + + """ + + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) + + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme, pool_kwargs=pool_kwargs) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs) + + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc + + if headers: + headers_.update(headers) + return headers_ + + def urlopen(self, method, url, redirect=True, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + u = parse_url(url) + + if u.scheme == "http": + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) + + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) + + +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/collectors/python.d.plugin/python_modules/urllib3/request.py b/collectors/python.d.plugin/python_modules/urllib3/request.py new file mode 100644 index 0000000..f783319 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/request.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import + +from .filepost import encode_multipart_formdata +from .packages.six.moves.urllib.parse import urlencode + + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-form-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + def __init__(self, headers=None): + self.headers = headers or {} + + def urlopen(self, method, url, body=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **kw): # Abstract + raise NotImplemented("Classes extending RequestMethods must implement " + "their own ``urlopen`` method.") + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + if method in self._encode_url_methods: + return self.request_encode_url(method, url, fields=fields, + headers=headers, + **urlopen_kw) + else: + return self.request_encode_body(method, url, fields=fields, + headers=headers, + **urlopen_kw) + + def request_encode_url(self, method, url, fields=None, headers=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': headers} + extra_kw.update(urlopen_kw) + + if fields: + url += '?' + urlencode(fields) + + return self.urlopen(method, url, **extra_kw) + + def request_encode_body(self, method, url, fields=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request + signing, such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example:: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': {}} + + if fields: + if 'body' in urlopen_kw: + raise TypeError( + "request got values for both 'fields' and 'body', can only specify one.") + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) + + return self.urlopen(method, url, **extra_kw) diff --git a/collectors/python.d.plugin/python_modules/urllib3/response.py b/collectors/python.d.plugin/python_modules/urllib3/response.py new file mode 100644 index 0000000..cf14a30 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/response.py @@ -0,0 +1,623 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from contextlib import contextmanager +import zlib +import io +import logging +from socket import timeout as SocketTimeout +from socket import error as SocketError + +from ._collections import HTTPHeaderDict +from .exceptions import ( + BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError, + ResponseNotChunked, IncompleteRead, InvalidHeader +) +from .packages.six import string_types as basestring, binary_type, PY3 +from .packages.six.moves import http_client as httplib +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed, is_response_to_head + +log = logging.getLogger(__name__) + + +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + decompressed = self._obj.decompress(data) + if decompressed: + self._first_try = False + self._data = None + return decompressed + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + +def _get_decoder(mode): + if mode == 'gzip': + return GzipDecoder() + + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param original_response: + When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + + :param retries: + The retries contains the last :class:`~urllib3.util.retry.Retry` that + was used during the request. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. + """ + + CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + def __init__(self, body='', headers=None, status=0, version=0, reason=None, + strict=0, preload_content=True, decode_content=True, + original_response=None, pool=None, connection=None, + retries=None, enforce_content_length=False, request_method=None): + + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) + self.status = status + self.version = version + self.reason = reason + self.strict = strict + self.decode_content = decode_content + self.retries = retries + self.enforce_content_length = enforce_content_length + + self._decoder = None + self._body = None + self._fp = None + self._original_response = original_response + self._fp_bytes_read = 0 + + if body and isinstance(body, (basestring, binary_type)): + self._body = body + + self._pool = pool + self._connection = connection + + if hasattr(body, 'read'): + self._fp = body + + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # Determine length of response + self.length_remaining = self._init_length(request_method) + + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get('location') + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + @property + def connection(self): + return self._connection + + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + + def _init_length(self, request_method): + """ + Set initial length value for Response content if available. + """ + length = self.headers.get('content-length') + + if length is not None and self.chunked: + # This Response will fail with an IncompleteRead if it can't be + # received as chunked. This method falls back to attempt reading + # the response before raising an exception. + log.warning("Received response with both Content-Length and " + "Transfer-Encoding set. This is expressly forbidden " + "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " + "attempting to process response as Transfer-Encoding: " + "chunked.") + return None + + elif length is not None: + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = set([int(val) for val in length.split(',')]) + if len(lengths) > 1: + raise InvalidHeader("Content-Length contained multiple " + "unmatching values (%s)" % length) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + # Convert status to int for comparison + # In some cases, httplib returns a status of "_UNKNOWN" + try: + status = int(self.status) + except ValueError: + status = 0 + + # Check for responses that shouldn't include a body + if status in (204, 304) or 100 <= status < 200 or request_method == 'HEAD': + length = 0 + + return length + + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content: + data += self._flush_decoder() + + return data + + def _flush_decoder(self): + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + buf = self._decoder.decompress(b'') + return buf + self._decoder.flush() + + return b'' + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + clean_exit = False + + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except (HTTPException, SocketError) as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now to ensure that the connection is + # released back to the pool. + if self._original_response: + self._original_response.close() + + # Closing the response may not actually be sufficient to close + # everything, so if we have a hold of the connection close that + # too. + if self._connection: + self._connection.close() + + # If we hold the original response but it's closed now, we should + # return the connection back to the pool. + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + self._init_decoder() + if decode_content is None: + decode_content = self.decode_content + + if self._fp is None: + return + + flush_decoder = False + data = None + + with self._error_catcher(): + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + if self.enforce_content_length and self.length_remaining not in (0, None): + # This is an edge case that httplib failed to cover due + # to concerns of backward compatibility. We're + # addressing it here to make sure IncompleteRead is + # raised during streaming, so all calls with incorrect + # Content-Length are caught. + raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + + if data: + self._fp_bytes_read += len(data) + if self.length_remaining is not None: + self.length_remaining -= len(data) + + data = self._decode(data, decode_content, flush_decoder) + + if cache_content: + self._body = data + + return data + + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + if self.chunked and self.supports_chunked_reads(): + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`httplib.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + headers = r.msg + + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, 'strict', 0) + resp = ResponseCls(body=r, + headers=headers, + status=r.status, + version=r.version, + reason=r.reason, + strict=strict, + original_response=r, + **response_kw) + return resp + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + if self._connection: + self._connection.close() + + @property + def closed(self): + if self._fp is None: + return True + elif hasattr(self._fp, 'isclosed'): + return self._fp.isclosed() + elif hasattr(self._fp, 'closed'): + return self._fp.closed + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError("The file-like object this HTTPResponse is wrapped " + "around has no file descriptor") + + def flush(self): + if self._fp is not None and hasattr(self._fp, 'flush'): + return self._fp.flush() + + def readable(self): + # This method is required for `io` module compatibility. + return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[:len(temp)] = temp + return len(temp) + + def supports_chunked_reads(self): + """ + Checks if the underlying file-like object looks like a + httplib.HTTPResponse object. We do this by testing for the fp + attribute. If it is present we assume it returns raw chunks as + processed by read_chunked(). + """ + return hasattr(self._fp, 'fp') + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked( + "Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing.") + if not self.supports_chunked_reads(): + raise BodyNotHttplibCompatible( + "Body should be httplib.HTTPResponse like. " + "It should have have an fp attribute which returns raw chunks.") + + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): + self._original_response.close() + return + + with self._error_catcher(): + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + decoded = self._decode(chunk, decode_content=decode_content, + flush_decoder=False) + if decoded: + yield decoded + + if decode_content: + # On CPython and PyPy, we should never need to flush the + # decoder. However, on Jython we *might* need to, so + # lets defensively do it anyway. + decoded = self._flush_decoder() + if decoded: # Platform-specific: Jython. + yield decoded + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/util/__init__.py new file mode 100644 index 0000000..bba628d --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/__init__.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +# For backwards compatibility, provide imports that used to be here. +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + IS_PYOPENSSL, + IS_SECURETRANSPORT, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) + +from .retry import Retry +from .url import ( + get_host, + parse_url, + split_first, + Url, +) +from .wait import ( + wait_for_read, + wait_for_write +) + +__all__ = ( + 'HAS_SNI', + 'IS_PYOPENSSL', + 'IS_SECURETRANSPORT', + 'SSLContext', + 'Retry', + 'Timeout', + 'Url', + 'assert_fingerprint', + 'current_time', + 'is_connection_dropped', + 'is_fp_closed', + 'get_host', + 'parse_url', + 'make_headers', + 'resolve_cert_reqs', + 'resolve_ssl_version', + 'split_first', + 'ssl_wrap_socket', + 'wait_for_read', + 'wait_for_write' +) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/connection.py b/collectors/python.d.plugin/python_modules/urllib3/util/connection.py new file mode 100644 index 0000000..3bd69e8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/connection.py @@ -0,0 +1,131 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import socket +from .wait import wait_for_read +from .selectors import HAS_SELECT, SelectorError + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return True + + if not HAS_SELECT: + return False + + try: + return bool(wait_for_read(sock, timeout=0.0)) + except SelectorError: + return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +# One additional modification is that we avoid binding to IPv6 servers +# discovered in DNS if the system doesn't have IPv6 functionality. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + if host.startswith('['): + host = host.strip('[]') + err = None + + # Using the value from allowed_gai_family() in the context of getaddrinfo lets + # us select whether to work with IPv4 DNS records, IPv6 records, or both. + # The original create_connection function always returns all records. + family = allowed_gai_family() + + for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as e: + err = e + if sock is not None: + sock.close() + sock = None + + if err is not None: + raise err + + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) + + +def allowed_gai_family(): + """This function is designed to work in the context of + getaddrinfo, where family=socket.AF_UNSPEC is the default and + will perform a DNS search for both IPv6 and IPv4 records.""" + + family = socket.AF_INET + if HAS_IPV6: + family = socket.AF_UNSPEC + return family + + +def _has_ipv6(host): + """ Returns True if the system can bind an IPv6 address. """ + sock = None + has_ipv6 = False + + if socket.has_ipv6: + # has_ipv6 returns true if cPython was compiled with IPv6 support. + # It does not tell us if the system has IPv6 support enabled. To + # determine that we must bind to an IPv6 address. + # https://github.com/shazow/urllib3/pull/611 + # https://bugs.python.org/issue658327 + try: + sock = socket.socket(socket.AF_INET6) + sock.bind((host, 0)) + has_ipv6 = True + except Exception: + pass + + if sock: + sock.close() + return has_ipv6 + + +HAS_IPV6 = _has_ipv6('::1') diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/request.py b/collectors/python.d.plugin/python_modules/urllib3/util/request.py new file mode 100644 index 0000000..18f27b0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/request.py @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from base64 import b64encode + +from ..packages.six import b, integer_types +from ..exceptions import UnrewindableBodyError + +ACCEPT_ENCODING = 'gzip,deflate' +_FAILEDTELL = object() + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None, disable_cache=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(b(proxy_basic_auth)).decode('utf-8') + + if disable_cache: + headers['cache-control'] = 'no-cache' + + return headers + + +def set_file_position(body, pos): + """ + If a position is provided, move file to that point. + Otherwise, we'll attempt to record a position for future use. + """ + if pos is not None: + rewind_body(body, pos) + elif getattr(body, 'tell', None) is not None: + try: + pos = body.tell() + except (IOError, OSError): + # This differentiates from None, allowing us to catch + # a failed `tell()` later when trying to rewind the body. + pos = _FAILEDTELL + + return pos + + +def rewind_body(body, body_pos): + """ + Attempt to rewind body to a certain position. + Primarily used for request redirects and retries. + + :param body: + File-like object that supports seek. + + :param int pos: + Position to seek to in file. + """ + body_seek = getattr(body, 'seek', None) + if body_seek is not None and isinstance(body_pos, integer_types): + try: + body_seek(body_pos) + except (IOError, OSError): + raise UnrewindableBodyError("An error occurred when rewinding request " + "body for redirect/retry.") + elif body_pos is _FAILEDTELL: + raise UnrewindableBodyError("Unable to record file position for rewinding " + "request body during a redirect/retry.") + else: + raise ValueError("body_pos must be of type integer, " + "instead it was %s." % type(body_pos)) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/response.py b/collectors/python.d.plugin/python_modules/urllib3/util/response.py new file mode 100644 index 0000000..e4cda93 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/response.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from ..packages.six.moves import http_client as httplib + +from ..exceptions import HeaderParsingError + + +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + + try: + # Check `isclosed()` first, in case Python3 doesn't set `closed`. + # GH Issue #928 + return obj.isclosed() + except AttributeError: + pass + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). + return obj.fp is None + except AttributeError: + pass + + raise ValueError("Unable to determine whether fp is closed.") + + +def assert_header_parsing(headers): + """ + Asserts whether all headers have been successfully parsed. + Extracts encountered errors from the result of parsing headers. + + Only works on Python 3. + + :param headers: Headers to verify. + :type headers: `httplib.HTTPMessage`. + + :raises urllib3.exceptions.HeaderParsingError: + If parsing errors are found. + """ + + # This will fail silently if we pass in the wrong kind of parameter. + # To make debugging easier add an explicit check. + if not isinstance(headers, httplib.HTTPMessage): + raise TypeError('expected httplib.Message, got {0}.'.format( + type(headers))) + + defects = getattr(headers, 'defects', None) + get_payload = getattr(headers, 'get_payload', None) + + unparsed_data = None + if get_payload: # Platform-specific: Python 3. + unparsed_data = get_payload() + + if defects or unparsed_data: + raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) + + +def is_response_to_head(response): + """ + Checks whether the request of a response has been a HEAD-request. + Handles the quirks of AppEngine. + + :param conn: + :type conn: :class:`httplib.HTTPResponse` + """ + # FIXME: Can we do this somehow without accessing private httplib _method? + method = response._method + if isinstance(method, int): # Platform-specific: Appengine + return method == 3 + return method.upper() == 'HEAD' diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/retry.py b/collectors/python.d.plugin/python_modules/urllib3/util/retry.py new file mode 100644 index 0000000..61e63af --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/retry.py @@ -0,0 +1,402 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import time +import logging +from collections import namedtuple +from itertools import takewhile +import email +import re + +from ..exceptions import ( + ConnectTimeoutError, + MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, + InvalidHeader, +) +from ..packages import six + + +log = logging.getLogger(__name__) + +# Data structure for representing the metadata of requests that result in a retry. +RequestHistory = namedtuple('RequestHistory', ["method", "url", "error", + "status", "redirect_location"]) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int status: + How many times to retry on bad status codes. + + These are retries made on responses, where status code matches + ``status_forcelist``. + + Set to ``0`` to fail on the first retry of this type. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + idempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + Set to a ``False`` value to retry on any verb. + + :param iterable status_forcelist: + A set of integer HTTP status codes that we should force a retry on. + A retry is initiated if the request method is in ``method_whitelist`` + and the response status code is in ``status_forcelist``. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts after the second try + (most errors are resolved immediately by a second try without a + delay). urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.BACKOFF_MAX`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + + :param bool raise_on_status: Similar meaning to ``raise_on_redirect``: + whether we should raise an exception, or return a response, + if status falls in ``status_forcelist`` range and retries have + been exhausted. + + :param tuple history: The history of the request encountered during + each call to :meth:`~Retry.increment`. The list is in the order + the requests occurred. Each list item is of class :class:`RequestHistory`. + + :param bool respect_retry_after_header: + Whether to respect Retry-After header on status codes defined as + :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not. + + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, status=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, raise_on_status=True, + history=None, respect_retry_after_header=True): + + self.total = total + self.connect = connect + self.read = read + self.status = status + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self.raise_on_status = raise_on_status + self.history = history or tuple() + self.respect_retry_after_header = respect_retry_after_header + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, status=self.status, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + raise_on_status=self.raise_on_status, + history=self.history, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r", retries, new_retries) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + # We want to consider only the last consecutive errors sequence (Ignore redirects). + consecutive_errors_len = len(list(takewhile(lambda x: x.redirect_location is None, + reversed(self.history)))) + if consecutive_errors_len <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def parse_retry_after(self, retry_after): + # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 + if re.match(r"^\s*[0-9]+\s*$", retry_after): + seconds = int(retry_after) + else: + retry_date_tuple = email.utils.parsedate(retry_after) + if retry_date_tuple is None: + raise InvalidHeader("Invalid Retry-After header: %s" % retry_after) + retry_date = time.mktime(retry_date_tuple) + seconds = retry_date - time.time() + + if seconds < 0: + seconds = 0 + + return seconds + + def get_retry_after(self, response): + """ Get the value of Retry-After in seconds. """ + + retry_after = response.getheader("Retry-After") + + if retry_after is None: + return None + + return self.parse_retry_after(retry_after) + + def sleep_for_retry(self, response=None): + retry_after = self.get_retry_after(response) + if retry_after: + time.sleep(retry_after) + return True + + return False + + def _sleep_backoff(self): + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def sleep(self, response=None): + """ Sleep between retry attempts. + + This method will respect a server's ``Retry-After`` response header + and sleep the duration of the time requested. If that is not present, it + will use an exponential backoff. By default, the backoff factor is 0 and + this method will return immediately. + """ + + if response: + slept = self.sleep_for_retry(response) + if slept: + return + + self._sleep_backoff() + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def _is_method_retryable(self, method): + """ Checks if a given HTTP method should be retried upon, depending if + it is included on the method whitelist. + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return True + + def is_retry(self, method, status_code, has_retry_after=False): + """ Is this method/status code retryable? (Based on whitelists and control + variables such as the number of total retries to allow, whether to + respect the Retry-After header, whether this header is present, and + whether the returned status code is on the list of status codes to + be retried upon on the presence of the aforementioned header) + """ + if not self._is_method_retryable(method): + return False + + if self.status_forcelist and status_code in self.status_forcelist: + return True + + return (self.total and self.respect_retry_after_header and + has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES)) + + def is_exhausted(self): + """ Are we out of retries? """ + retry_counts = (self.total, self.connect, self.read, self.redirect, self.status) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, + _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + connect = self.connect + read = self.read + redirect = self.redirect + status_count = self.status + cause = 'unknown' + status = None + redirect_location = None + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False or not self._is_method_retryable(method): + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + cause = 'too many redirects' + redirect_location = response.get_redirect_location() + status = response.status + + else: + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist + cause = ResponseError.GENERIC_ERROR + if response and response.status: + if status_count is not None: + status_count -= 1 + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) + status = response.status + + history = self.history + (RequestHistory(method, url, error, status, redirect_location),) + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, status=status_count, + history=history) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error or ResponseError(cause)) + + log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) + + return new_retry + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect}, status={self.status})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py b/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py new file mode 100644 index 0000000..de5e498 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py @@ -0,0 +1,588 @@ +# SPDX-License-Identifier: MIT +# Backport of selectors.py from Python 3.5+ to support Python < 3.4 +# Also has the behavior specified in PEP 475 which is to retry syscalls +# in the case of an EINTR error. This module is required because selectors34 +# does not follow this behavior and instead returns that no dile descriptor +# events have occurred rather than retry the syscall. The decision to drop +# support for select.devpoll is made to maintain 100% test coverage. + +import errno +import math +import select +import socket +import sys +import time + +from collections import namedtuple + +try: + from collections import Mapping +except ImportError: + from collections.abc import Mapping + +try: + monotonic = time.monotonic +except (AttributeError, ImportError): # Python 3.3< + monotonic = time.time + +EVENT_READ = (1 << 0) +EVENT_WRITE = (1 << 1) + +HAS_SELECT = True # Variable that shows whether the platform has a selector. +_SYSCALL_SENTINEL = object() # Sentinel in case a system call returns None. +_DEFAULT_SELECTOR = None + + +class SelectorError(Exception): + def __init__(self, errcode): + super(SelectorError, self).__init__() + self.errno = errcode + + def __repr__(self): + return "".format(self.errno) + + def __str__(self): + return self.__repr__() + + +def _fileobj_to_fd(fileobj): + """ Return a file descriptor from a file object. If + given an integer will simply return that integer back. """ + if isinstance(fileobj, int): + fd = fileobj + else: + try: + fd = int(fileobj.fileno()) + except (AttributeError, TypeError, ValueError): + raise ValueError("Invalid file object: {0!r}".format(fileobj)) + if fd < 0: + raise ValueError("Invalid file descriptor: {0}".format(fd)) + return fd + + +# Determine which function to use to wrap system calls because Python 3.5+ +# already handles the case when system calls are interrupted. +if sys.version_info >= (3, 5): + def _syscall_wrapper(func, _, *args, **kwargs): + """ This is the short-circuit version of the below logic + because in Python 3.5+ all system calls automatically restart + and recalculate their timeouts. """ + try: + return func(*args, **kwargs) + except (OSError, IOError, select.error) as e: + errcode = None + if hasattr(e, "errno"): + errcode = e.errno + raise SelectorError(errcode) +else: + def _syscall_wrapper(func, recalc_timeout, *args, **kwargs): + """ Wrapper function for syscalls that could fail due to EINTR. + All functions should be retried if there is time left in the timeout + in accordance with PEP 475. """ + timeout = kwargs.get("timeout", None) + if timeout is None: + expires = None + recalc_timeout = False + else: + timeout = float(timeout) + if timeout < 0.0: # Timeout less than 0 treated as no timeout. + expires = None + else: + expires = monotonic() + timeout + + args = list(args) + if recalc_timeout and "timeout" not in kwargs: + raise ValueError( + "Timeout must be in args or kwargs to be recalculated") + + result = _SYSCALL_SENTINEL + while result is _SYSCALL_SENTINEL: + try: + result = func(*args, **kwargs) + # OSError is thrown by select.select + # IOError is thrown by select.epoll.poll + # select.error is thrown by select.poll.poll + # Aren't we thankful for Python 3.x rework for exceptions? + except (OSError, IOError, select.error) as e: + # select.error wasn't a subclass of OSError in the past. + errcode = None + if hasattr(e, "errno"): + errcode = e.errno + elif hasattr(e, "args"): + errcode = e.args[0] + + # Also test for the Windows equivalent of EINTR. + is_interrupt = (errcode == errno.EINTR or (hasattr(errno, "WSAEINTR") and + errcode == errno.WSAEINTR)) + + if is_interrupt: + if expires is not None: + current_time = monotonic() + if current_time > expires: + raise OSError(errno=errno.ETIMEDOUT) + if recalc_timeout: + if "timeout" in kwargs: + kwargs["timeout"] = expires - current_time + continue + if errcode: + raise SelectorError(errcode) + else: + raise + return result + + +SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data']) + + +class _SelectorMapping(Mapping): + """ Mapping of file objects to selector keys """ + + def __init__(self, selector): + self._selector = selector + + def __len__(self): + return len(self._selector._fd_to_key) + + def __getitem__(self, fileobj): + try: + fd = self._selector._fileobj_lookup(fileobj) + return self._selector._fd_to_key[fd] + except KeyError: + raise KeyError("{0!r} is not registered.".format(fileobj)) + + def __iter__(self): + return iter(self._selector._fd_to_key) + + +class BaseSelector(object): + """ Abstract Selector class + + A selector supports registering file objects to be monitored + for specific I/O events. + + A file object is a file descriptor or any object with a + `fileno()` method. An arbitrary object can be attached to the + file object which can be used for example to store context info, + a callback, etc. + + A selector can use various implementations (select(), poll(), epoll(), + and kqueue()) depending on the platform. The 'DefaultSelector' class uses + the most efficient implementation for the current platform. + """ + def __init__(self): + # Maps file descriptors to keys. + self._fd_to_key = {} + + # Read-only mapping returned by get_map() + self._map = _SelectorMapping(self) + + def _fileobj_lookup(self, fileobj): + """ Return a file descriptor from a file object. + This wraps _fileobj_to_fd() to do an exhaustive + search in case the object is invalid but we still + have it in our map. Used by unregister() so we can + unregister an object that was previously registered + even if it is closed. It is also used by _SelectorMapping + """ + try: + return _fileobj_to_fd(fileobj) + except ValueError: + + # Search through all our mapped keys. + for key in self._fd_to_key.values(): + if key.fileobj is fileobj: + return key.fd + + # Raise ValueError after all. + raise + + def register(self, fileobj, events, data=None): + """ Register a file object for a set of events to monitor. """ + if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)): + raise ValueError("Invalid events: {0!r}".format(events)) + + key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data) + + if key.fd in self._fd_to_key: + raise KeyError("{0!r} (FD {1}) is already registered" + .format(fileobj, key.fd)) + + self._fd_to_key[key.fd] = key + return key + + def unregister(self, fileobj): + """ Unregister a file object from being monitored. """ + try: + key = self._fd_to_key.pop(self._fileobj_lookup(fileobj)) + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + # Getting the fileno of a closed socket on Windows errors with EBADF. + except socket.error as e: # Platform-specific: Windows. + if e.errno != errno.EBADF: + raise + else: + for key in self._fd_to_key.values(): + if key.fileobj is fileobj: + self._fd_to_key.pop(key.fd) + break + else: + raise KeyError("{0!r} is not registered".format(fileobj)) + return key + + def modify(self, fileobj, events, data=None): + """ Change a registered file object monitored events and data. """ + # NOTE: Some subclasses optimize this operation even further. + try: + key = self._fd_to_key[self._fileobj_lookup(fileobj)] + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + if events != key.events: + self.unregister(fileobj) + key = self.register(fileobj, events, data) + + elif data != key.data: + # Use a shortcut to update the data. + key = key._replace(data=data) + self._fd_to_key[key.fd] = key + + return key + + def select(self, timeout=None): + """ Perform the actual selection until some monitored file objects + are ready or the timeout expires. """ + raise NotImplementedError() + + def close(self): + """ Close the selector. This must be called to ensure that all + underlying resources are freed. """ + self._fd_to_key.clear() + self._map = None + + def get_key(self, fileobj): + """ Return the key associated with a registered file object. """ + mapping = self.get_map() + if mapping is None: + raise RuntimeError("Selector is closed") + try: + return mapping[fileobj] + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + def get_map(self): + """ Return a mapping of file objects to selector keys """ + return self._map + + def _key_from_fd(self, fd): + """ Return the key associated to a given file descriptor + Return None if it is not found. """ + try: + return self._fd_to_key[fd] + except KeyError: + return None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +# Almost all platforms have select.select() +if hasattr(select, "select"): + class SelectSelector(BaseSelector): + """ Select-based selector. """ + def __init__(self): + super(SelectSelector, self).__init__() + self._readers = set() + self._writers = set() + + def register(self, fileobj, events, data=None): + key = super(SelectSelector, self).register(fileobj, events, data) + if events & EVENT_READ: + self._readers.add(key.fd) + if events & EVENT_WRITE: + self._writers.add(key.fd) + return key + + def unregister(self, fileobj): + key = super(SelectSelector, self).unregister(fileobj) + self._readers.discard(key.fd) + self._writers.discard(key.fd) + return key + + def _select(self, r, w, timeout=None): + """ Wrapper for select.select because timeout is a positional arg """ + return select.select(r, w, [], timeout) + + def select(self, timeout=None): + # Selecting on empty lists on Windows errors out. + if not len(self._readers) and not len(self._writers): + return [] + + timeout = None if timeout is None else max(timeout, 0.0) + ready = [] + r, w, _ = _syscall_wrapper(self._select, True, self._readers, + self._writers, timeout) + r = set(r) + w = set(w) + for fd in r | w: + events = 0 + if fd in r: + events |= EVENT_READ + if fd in w: + events |= EVENT_WRITE + + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + +if hasattr(select, "poll"): + class PollSelector(BaseSelector): + """ Poll-based selector """ + def __init__(self): + super(PollSelector, self).__init__() + self._poll = select.poll() + + def register(self, fileobj, events, data=None): + key = super(PollSelector, self).register(fileobj, events, data) + event_mask = 0 + if events & EVENT_READ: + event_mask |= select.POLLIN + if events & EVENT_WRITE: + event_mask |= select.POLLOUT + self._poll.register(key.fd, event_mask) + return key + + def unregister(self, fileobj): + key = super(PollSelector, self).unregister(fileobj) + self._poll.unregister(key.fd) + return key + + def _wrap_poll(self, timeout=None): + """ Wrapper function for select.poll.poll() so that + _syscall_wrapper can work with only seconds. """ + if timeout is not None: + if timeout <= 0: + timeout = 0 + else: + # select.poll.poll() has a resolution of 1 millisecond, + # round away from zero to wait *at least* timeout seconds. + timeout = math.ceil(timeout * 1e3) + + result = self._poll.poll(timeout) + return result + + def select(self, timeout=None): + ready = [] + fd_events = _syscall_wrapper(self._wrap_poll, True, timeout=timeout) + for fd, event_mask in fd_events: + events = 0 + if event_mask & ~select.POLLIN: + events |= EVENT_WRITE + if event_mask & ~select.POLLOUT: + events |= EVENT_READ + + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + + return ready + + +if hasattr(select, "epoll"): + class EpollSelector(BaseSelector): + """ Epoll-based selector """ + def __init__(self): + super(EpollSelector, self).__init__() + self._epoll = select.epoll() + + def fileno(self): + return self._epoll.fileno() + + def register(self, fileobj, events, data=None): + key = super(EpollSelector, self).register(fileobj, events, data) + events_mask = 0 + if events & EVENT_READ: + events_mask |= select.EPOLLIN + if events & EVENT_WRITE: + events_mask |= select.EPOLLOUT + _syscall_wrapper(self._epoll.register, False, key.fd, events_mask) + return key + + def unregister(self, fileobj): + key = super(EpollSelector, self).unregister(fileobj) + try: + _syscall_wrapper(self._epoll.unregister, False, key.fd) + except SelectorError: + # This can occur when the fd was closed since registry. + pass + return key + + def select(self, timeout=None): + if timeout is not None: + if timeout <= 0: + timeout = 0.0 + else: + # select.epoll.poll() has a resolution of 1 millisecond + # but luckily takes seconds so we don't need a wrapper + # like PollSelector. Just for better rounding. + timeout = math.ceil(timeout * 1e3) * 1e-3 + timeout = float(timeout) + else: + timeout = -1.0 # epoll.poll() must have a float. + + # We always want at least 1 to ensure that select can be called + # with no file descriptors registered. Otherwise will fail. + max_events = max(len(self._fd_to_key), 1) + + ready = [] + fd_events = _syscall_wrapper(self._epoll.poll, True, + timeout=timeout, + maxevents=max_events) + for fd, event_mask in fd_events: + events = 0 + if event_mask & ~select.EPOLLIN: + events |= EVENT_WRITE + if event_mask & ~select.EPOLLOUT: + events |= EVENT_READ + + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + def close(self): + self._epoll.close() + super(EpollSelector, self).close() + + +if hasattr(select, "kqueue"): + class KqueueSelector(BaseSelector): + """ Kqueue / Kevent-based selector """ + def __init__(self): + super(KqueueSelector, self).__init__() + self._kqueue = select.kqueue() + + def fileno(self): + return self._kqueue.fileno() + + def register(self, fileobj, events, data=None): + key = super(KqueueSelector, self).register(fileobj, events, data) + if events & EVENT_READ: + kevent = select.kevent(key.fd, + select.KQ_FILTER_READ, + select.KQ_EV_ADD) + + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + + if events & EVENT_WRITE: + kevent = select.kevent(key.fd, + select.KQ_FILTER_WRITE, + select.KQ_EV_ADD) + + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + + return key + + def unregister(self, fileobj): + key = super(KqueueSelector, self).unregister(fileobj) + if key.events & EVENT_READ: + kevent = select.kevent(key.fd, + select.KQ_FILTER_READ, + select.KQ_EV_DELETE) + try: + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + except SelectorError: + pass + if key.events & EVENT_WRITE: + kevent = select.kevent(key.fd, + select.KQ_FILTER_WRITE, + select.KQ_EV_DELETE) + try: + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + except SelectorError: + pass + + return key + + def select(self, timeout=None): + if timeout is not None: + timeout = max(timeout, 0) + + max_events = len(self._fd_to_key) * 2 + ready_fds = {} + + kevent_list = _syscall_wrapper(self._kqueue.control, True, + None, max_events, timeout) + + for kevent in kevent_list: + fd = kevent.ident + event_mask = kevent.filter + events = 0 + if event_mask == select.KQ_FILTER_READ: + events |= EVENT_READ + if event_mask == select.KQ_FILTER_WRITE: + events |= EVENT_WRITE + + key = self._key_from_fd(fd) + if key: + if key.fd not in ready_fds: + ready_fds[key.fd] = (key, events & key.events) + else: + old_events = ready_fds[key.fd][1] + ready_fds[key.fd] = (key, (events | old_events) & key.events) + + return list(ready_fds.values()) + + def close(self): + self._kqueue.close() + super(KqueueSelector, self).close() + + +if not hasattr(select, 'select'): # Platform-specific: AppEngine + HAS_SELECT = False + + +def _can_allocate(struct): + """ Checks that select structs can be allocated by the underlying + operating system, not just advertised by the select module. We don't + check select() because we'll be hopeful that most platforms that + don't have it available will not advertise it. (ie: GAE) """ + try: + # select.poll() objects won't fail until used. + if struct == 'poll': + p = select.poll() + p.poll(0) + + # All others will fail on allocation. + else: + getattr(select, struct)().close() + return True + except (OSError, AttributeError) as e: + return False + + +# Choose the best implementation, roughly: +# kqueue == epoll > poll > select. Devpoll not supported. (See above) +# select() also can't accept a FD > FD_SETSIZE (usually around 1024) +def DefaultSelector(): + """ This function serves as a first call for DefaultSelector to + detect if the select module is being monkey-patched incorrectly + by eventlet, greenlet, and preserve proper behavior. """ + global _DEFAULT_SELECTOR + if _DEFAULT_SELECTOR is None: + if _can_allocate('kqueue'): + _DEFAULT_SELECTOR = KqueueSelector + elif _can_allocate('epoll'): + _DEFAULT_SELECTOR = EpollSelector + elif _can_allocate('poll'): + _DEFAULT_SELECTOR = PollSelector + elif hasattr(select, 'select'): + _DEFAULT_SELECTOR = SelectSelector + else: # Platform-specific: AppEngine + raise ValueError('Platform does not have a selector') + return _DEFAULT_SELECTOR() diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py b/collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py new file mode 100644 index 0000000..ece3ec3 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py @@ -0,0 +1,338 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import errno +import warnings +import hmac + +from binascii import hexlify, unhexlify +from hashlib import md5, sha1, sha256 + +from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning + + +SSLContext = None +HAS_SNI = False +IS_PYOPENSSL = False +IS_SECURETRANSPORT = False + +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = { + 32: md5, + 40: sha1, + 64: sha256, +} + + +def _const_compare_digest_backport(a, b): + """ + Compare two digests of equal length in constant time. + + The digests must be of type str/bytes. + Returns True if the digests match, and False otherwise. + """ + result = abs(len(a) - len(b)) + for l, r in zip(bytearray(a), bytearray(b)): + result |= l ^ r + return result == 0 + + +_const_compare_digest = getattr(hmac, 'compare_digest', + _const_compare_digest_backport) + + +try: # Test for SSL features + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and +# security, +# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ':'.join([ + 'ECDH+AESGCM', + 'ECDH+CHACHA20', + 'DH+AESGCM', + 'DH+CHACHA20', + 'ECDH+AES256', + 'DH+AES256', + 'ECDH+AES128', + 'DH+AES', + 'RSA+AESGCM', + 'RSA+AES', + '!aNULL', + '!eNULL', + '!MD5', +]) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, cafile=None, capath=None): + self.ca_certs = cafile + + if capath is not None: + raise SSLError("CA directories not supported in older Pythons") + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None, server_side=False): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. You can upgrade to a newer ' + 'version of Python to solve this. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings', + InsecurePlatformWarning + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + 'server_side': server_side, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + fingerprint = fingerprint.replace(':', '').lower() + digest_length = len(fingerprint) + hashfunc = HASHFUNC_MAP.get(digest_length) + if not hashfunc: + raise SSLError( + 'Fingerprint of invalid length: {0}'.format(fingerprint)) + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + cert_digest = hashfunc(cert).digest() + + if not _const_compare_digest(cert_digest, fingerprint_bytes): + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(fingerprint, hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def create_urllib3_context(ssl_version=None, cert_reqs=None, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None, + ca_cert_dir=None): + """ + All arguments except for server_hostname, ssl_context, and ca_cert_dir have + the same meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + :param ca_cert_dir: + A directory containing CA certificates in multiple separate files, as + supported by OpenSSL's -CApath flag or the capath argument to + SSLContext.load_verify_locations(). + """ + context = ssl_context + if context is None: + # Note: This branch of code and all the variables in it are no longer + # used by urllib3 itself. We should consider deprecating and removing + # this code. + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs or ca_cert_dir: + try: + context.load_verify_locations(ca_certs, ca_cert_dir) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: + raise SSLError(e) + raise + elif getattr(context, 'load_default_certs', None) is not None: + # try to load OS default certs; works well on Windows (require Python3.4+) + context.load_default_certs() + + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + + warnings.warn( + 'An HTTPS request has been made, but the SNI (Subject Name ' + 'Indication) extension to TLS is not available on this platform. ' + 'This may cause the server to present an incorrect TLS ' + 'certificate, which can cause validation failures. You can upgrade to ' + 'a newer version of Python to solve this. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings', + SNIMissingWarning + ) + return context.wrap_socket(sock) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/timeout.py b/collectors/python.d.plugin/python_modules/urllib3/util/timeout.py new file mode 100644 index 0000000..4041cf9 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/timeout.py @@ -0,0 +1,243 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() + + +# Use time.monotonic if available. +current_time = getattr(time, "monotonic", time.time) + + +class Timeout(object): + """ Timeout configuration. + + Timeouts can be defined as a default for a pool:: + + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not trigger, even though the request will take + several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid. + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If it is a numeric value less than or equal to + zero, or the type is not an integer, float, or None. + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + if isinstance(value, bool): + raise ValueError("Timeout cannot be a boolean value. It must " + "be an int, float or None.") + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int, float or None." % (name, value)) + + try: + if value <= 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than or equal to 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int, float or None." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value + passed to this function. + + :param timeout: The legacy timeout value. + :type timeout: integer, float, sentinel default object, or None + :return: Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: Elapsed time. + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: Connect timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: Value to use for the read timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # In case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/url.py b/collectors/python.d.plugin/python_modules/urllib3/util/url.py new file mode 100644 index 0000000..99fd653 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/url.py @@ -0,0 +1,231 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from collections import namedtuple + +from ..exceptions import LocationParseError + + +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] + +# We only want to normalize urls with an HTTP(S) scheme. +# urllib3 infers URLs without a scheme (None) to be http. +NORMALIZABLE_SCHEMES = ('http', 'https', None) + + +class Url(namedtuple('Url', url_attrs)): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. Both the scheme and host are normalized as they are + both case-insensitive according to RFC 3986. + """ + __slots__ = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + if path and not path.startswith('/'): + path = '/' + path + if scheme: + scheme = scheme.lower() + if host and scheme in NORMALIZABLE_SCHEMES: + host = host.lower() + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example:: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx + 1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example:: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + if not url: + # Empty + return Url() + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. No whitespace, no plus or + # minus prefixes, no non-integer digits such as ^2 (superscript). + if not port.isdigit(): + raise LocationParseError(url) + try: + port = int(port) + except ValueError: + raise LocationParseError(url) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/wait.py b/collectors/python.d.plugin/python_modules/urllib3/util/wait.py new file mode 100644 index 0000000..21e7297 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/wait.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT +from .selectors import ( + HAS_SELECT, + DefaultSelector, + EVENT_READ, + EVENT_WRITE +) + + +def _wait_for_io_events(socks, events, timeout=None): + """ Waits for IO events to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be interacted with immediately. """ + if not HAS_SELECT: + raise ValueError('Platform does not have a selector') + if not isinstance(socks, list): + # Probably just a single socket. + if hasattr(socks, "fileno"): + socks = [socks] + # Otherwise it might be a non-list iterable. + else: + socks = list(socks) + with DefaultSelector() as selector: + for sock in socks: + selector.register(sock, events) + return [key[0].fileobj for key in + selector.select(timeout) if key[1] & events] + + +def wait_for_read(socks, timeout=None): + """ Waits for reading to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be read from immediately. """ + return _wait_for_io_events(socks, EVENT_READ, timeout) + + +def wait_for_write(socks, timeout=None): + """ Waits for writing to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be written to immediately. """ + return _wait_for_io_events(socks, EVENT_WRITE, timeout) diff --git a/collectors/python.d.plugin/rabbitmq/Makefile.inc b/collectors/python.d.plugin/rabbitmq/Makefile.inc new file mode 100644 index 0000000..7e67ef5 --- /dev/null +++ b/collectors/python.d.plugin/rabbitmq/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += rabbitmq/rabbitmq.chart.py +dist_pythonconfig_DATA += rabbitmq/rabbitmq.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += rabbitmq/README.md rabbitmq/Makefile.inc + diff --git a/collectors/python.d.plugin/rabbitmq/README.md b/collectors/python.d.plugin/rabbitmq/README.md new file mode 100644 index 0000000..927adcc --- /dev/null +++ b/collectors/python.d.plugin/rabbitmq/README.md @@ -0,0 +1,138 @@ + + +# RabbitMQ monitoring with Netdata + +Collects message broker global and per virtual host metrics. + + +Following charts are drawn: + +1. **Queued Messages** + + - ready + - unacknowledged + +2. **Message Rates** + + - ack + - redelivered + - deliver + - publish + +3. **Global Counts** + + - channels + - consumers + - connections + - queues + - exchanges + +4. **File Descriptors** + + - used descriptors + +5. **Socket Descriptors** + + - used descriptors + +6. **Erlang processes** + + - used processes + +7. **Erlang run queue** + + - Erlang run queue + +8. **Memory** + + - free memory in megabytes + +9. **Disk Space** + + - free disk space in gigabytes + + +Per Vhost charts: + +1. **Vhost Messages** + + - ack + - confirm + - deliver + - get + - get_no_ack + - publish + - redeliver + - return_unroutable + +2. Per Queue charts: + + 1. **Queued Messages** + + - messages + - paged_out + - persistent + - ready + - unacknowledged + + 2. **Queue Messages stats** + + - ack + - confirm + - deliver + - get + - get_no_ack + - publish + - redeliver + - return_unroutable + +## Configuration + +Edit the `python.d/rabbitmq.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/rabbitmq.conf +``` + +When no configuration file is found, module tries to connect to: `localhost:15672`. + +```yaml +socket: + name : 'local' + host : '127.0.0.1' + port : 15672 + user : 'guest' + pass : 'guest' +``` + +--- + +### Per-Queue Chart configuration + +RabbitMQ users with the "monitoring" tag cannot see all queue data. You'll need a user with read permissions. +To create a dedicated user for netdata: + +```bash +rabbitmqctl add_user netdata ChangeThisSuperSecretPassword +rabbitmqctl set_permissions netdata "^$" "^$" ".*" +``` + +See [set_permissions](https://www.rabbitmq.com/rabbitmqctl.8.html#set_permissions) for details. + +Once the user is set up, add `collect_queues_metrics: yes` to your `rabbitmq.conf`: + +```yaml +local: + name : 'local' + host : '127.0.0.1' + port : 15672 + user : 'netdata' + pass : 'ChangeThisSuperSecretPassword' + collect_queues_metrics : 'yes' +``` diff --git a/collectors/python.d.plugin/rabbitmq/rabbitmq.chart.py b/collectors/python.d.plugin/rabbitmq/rabbitmq.chart.py new file mode 100644 index 0000000..866b777 --- /dev/null +++ b/collectors/python.d.plugin/rabbitmq/rabbitmq.chart.py @@ -0,0 +1,443 @@ +# -*- coding: utf-8 -*- +# Description: rabbitmq netdata python.d module +# Author: ilyam8 +# SPDX-License-Identifier: GPL-3.0-or-later + +from json import loads + +from bases.FrameworkServices.UrlService import UrlService + +API_NODE = 'api/nodes' +API_OVERVIEW = 'api/overview' +API_QUEUES = 'api/queues' +API_VHOSTS = 'api/vhosts' + +NODE_STATS = [ + 'fd_used', + 'mem_used', + 'sockets_used', + 'proc_used', + 'disk_free', + 'run_queue' +] + +OVERVIEW_STATS = [ + 'object_totals.channels', + 'object_totals.consumers', + 'object_totals.connections', + 'object_totals.queues', + 'object_totals.exchanges', + 'queue_totals.messages_ready', + 'queue_totals.messages_unacknowledged', + 'message_stats.ack', + 'message_stats.redeliver', + 'message_stats.deliver', + 'message_stats.publish', + 'churn_rates.connection_created_details.rate', + 'churn_rates.connection_closed_details.rate', + 'churn_rates.channel_created_details.rate', + 'churn_rates.channel_closed_details.rate', + 'churn_rates.queue_created_details.rate', + 'churn_rates.queue_declared_details.rate', + 'churn_rates.queue_deleted_details.rate' +] + +QUEUE_STATS = [ + 'messages', + 'messages_paged_out', + 'messages_persistent', + 'messages_ready', + 'messages_unacknowledged', + 'message_stats.ack', + 'message_stats.confirm', + 'message_stats.deliver', + 'message_stats.get', + 'message_stats.get_no_ack', + 'message_stats.publish', + 'message_stats.redeliver', + 'message_stats.return_unroutable', +] + +VHOST_MESSAGE_STATS = [ + 'message_stats.ack', + 'message_stats.confirm', + 'message_stats.deliver', + 'message_stats.get', + 'message_stats.get_no_ack', + 'message_stats.publish', + 'message_stats.redeliver', + 'message_stats.return_unroutable', +] + +ORDER = [ + 'queued_messages', + 'connection_churn_rates', + 'channel_churn_rates', + 'queue_churn_rates', + 'message_rates', + 'global_counts', + 'file_descriptors', + 'socket_descriptors', + 'erlang_processes', + 'erlang_run_queue', + 'memory', + 'disk_space' +] + +CHARTS = { + 'file_descriptors': { + 'options': [None, 'File Descriptors', 'descriptors', 'overview', 'rabbitmq.file_descriptors', 'line'], + 'lines': [ + ['fd_used', 'used', 'absolute'] + ] + }, + 'memory': { + 'options': [None, 'Memory', 'MiB', 'overview', 'rabbitmq.memory', 'area'], + 'lines': [ + ['mem_used', 'used', 'absolute', 1, 1 << 20] + ] + }, + 'disk_space': { + 'options': [None, 'Disk Space', 'GiB', 'overview', 'rabbitmq.disk_space', 'area'], + 'lines': [ + ['disk_free', 'free', 'absolute', 1, 1 << 30] + ] + }, + 'socket_descriptors': { + 'options': [None, 'Socket Descriptors', 'descriptors', 'overview', 'rabbitmq.sockets', 'line'], + 'lines': [ + ['sockets_used', 'used', 'absolute'] + ] + }, + 'erlang_processes': { + 'options': [None, 'Erlang Processes', 'processes', 'overview', 'rabbitmq.processes', 'line'], + 'lines': [ + ['proc_used', 'used', 'absolute'] + ] + }, + 'erlang_run_queue': { + 'options': [None, 'Erlang Run Queue', 'processes', 'overview', 'rabbitmq.erlang_run_queue', 'line'], + 'lines': [ + ['run_queue', 'length', 'absolute'] + ] + }, + 'global_counts': { + 'options': [None, 'Global Counts', 'counts', 'overview', 'rabbitmq.global_counts', 'line'], + 'lines': [ + ['object_totals_channels', 'channels', 'absolute'], + ['object_totals_consumers', 'consumers', 'absolute'], + ['object_totals_connections', 'connections', 'absolute'], + ['object_totals_queues', 'queues', 'absolute'], + ['object_totals_exchanges', 'exchanges', 'absolute'] + ] + }, + 'connection_churn_rates': { + 'options': [None, 'Connection Churn Rates', 'operations/s', 'overview', 'rabbitmq.connection_churn_rates', 'line'], + 'lines': [ + ['churn_rates_connection_created_details_rate', 'created', 'absolute'], + ['churn_rates_connection_closed_details_rate', 'closed', 'absolute'] + ] + }, + 'channel_churn_rates': { + 'options': [None, 'Channel Churn Rates', 'operations/s', 'overview', 'rabbitmq.channel_churn_rates', 'line'], + 'lines': [ + ['churn_rates_channel_created_details_rate', 'created', 'absolute'], + ['churn_rates_channel_closed_details_rate', 'closed', 'absolute'] + ] + }, + 'queue_churn_rates': { + 'options': [None, 'Queue Churn Rates', 'operations/s', 'overview', 'rabbitmq.queue_churn_rates', 'line'], + 'lines': [ + ['churn_rates_queue_created_details_rate', 'created', 'absolute'], + ['churn_rates_queue_declared_details_rate', 'declared', 'absolute'], + ['churn_rates_queue_deleted_details_rate', 'deleted', 'absolute'] + ] + }, + 'queued_messages': { + 'options': [None, 'Queued Messages', 'messages', 'overview', 'rabbitmq.queued_messages', 'stacked'], + 'lines': [ + ['queue_totals_messages_ready', 'ready', 'absolute'], + ['queue_totals_messages_unacknowledged', 'unacknowledged', 'absolute'] + ] + }, + 'message_rates': { + 'options': [None, 'Message Rates', 'messages/s', 'overview', 'rabbitmq.message_rates', 'line'], + 'lines': [ + ['message_stats_ack', 'ack', 'incremental'], + ['message_stats_redeliver', 'redeliver', 'incremental'], + ['message_stats_deliver', 'deliver', 'incremental'], + ['message_stats_publish', 'publish', 'incremental'] + ] + } +} + + +def vhost_chart_template(name): + order = [ + 'vhost_{0}_message_stats'.format(name), + ] + family = 'vhost {0}'.format(name) + + charts = { + order[0]: { + 'options': [ + None, 'Vhost "{0}" Messages'.format(name), 'messages/s', family, 'rabbitmq.vhost_messages', 'stacked'], + 'lines': [ + ['vhost_{0}_message_stats_ack'.format(name), 'ack', 'incremental'], + ['vhost_{0}_message_stats_confirm'.format(name), 'confirm', 'incremental'], + ['vhost_{0}_message_stats_deliver'.format(name), 'deliver', 'incremental'], + ['vhost_{0}_message_stats_get'.format(name), 'get', 'incremental'], + ['vhost_{0}_message_stats_get_no_ack'.format(name), 'get_no_ack', 'incremental'], + ['vhost_{0}_message_stats_publish'.format(name), 'publish', 'incremental'], + ['vhost_{0}_message_stats_redeliver'.format(name), 'redeliver', 'incremental'], + ['vhost_{0}_message_stats_return_unroutable'.format(name), 'return_unroutable', 'incremental'], + ] + }, + } + + return order, charts + +def queue_chart_template(queue_id): + vhost, name = queue_id + order = [ + 'vhost_{0}_queue_{1}_queued_message'.format(vhost, name), + 'vhost_{0}_queue_{1}_messages_stats'.format(vhost, name), + ] + family = 'vhost {0}'.format(vhost) + + charts = { + order[0]: { + 'options': [ + None, 'Queue "{0}" in "{1}" queued messages'.format(name, vhost), 'messages', family, 'rabbitmq.queue_messages', 'line'], + 'lines': [ + ['vhost_{0}_queue_{1}_messages'.format(vhost, name), 'messages', 'absolute'], + ['vhost_{0}_queue_{1}_messages_paged_out'.format(vhost, name), 'paged_out', 'absolute'], + ['vhost_{0}_queue_{1}_messages_persistent'.format(vhost, name), 'persistent', 'absolute'], + ['vhost_{0}_queue_{1}_messages_ready'.format(vhost, name), 'ready', 'absolute'], + ['vhost_{0}_queue_{1}_messages_unacknowledged'.format(vhost, name), 'unack', 'absolute'], + ] + }, + order[1]: { + 'options': [ + None, 'Queue "{0}" in "{1}" messages stats'.format(name, vhost), 'messages/s', family, 'rabbitmq.queue_messages_stats', 'line'], + 'lines': [ + ['vhost_{0}_queue_{1}_message_stats_ack'.format(vhost, name), 'ack', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_confirm'.format(vhost, name), 'confirm', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_deliver'.format(vhost, name), 'deliver', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_get'.format(vhost, name), 'get', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_get_no_ack'.format(vhost, name), 'get_no_ack', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_publish'.format(vhost, name), 'publish', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_redeliver'.format(vhost, name), 'redeliver', 'incremental'], + ['vhost_{0}_queue_{1}_message_stats_return_unroutable'.format(vhost, name), 'return_unroutable', 'incremental'], + ] + }, + } + + return order, charts + + +class VhostStatsBuilder: + def __init__(self): + self.stats = None + + def set(self, raw_stats): + self.stats = raw_stats + + def name(self): + return self.stats['name'] + + def has_msg_stats(self): + return bool(self.stats.get('message_stats')) + + def msg_stats(self): + name = self.name() + stats = fetch_data(raw_data=self.stats, metrics=VHOST_MESSAGE_STATS) + return dict(('vhost_{0}_{1}'.format(name, k), v) for k, v in stats.items()) + +class QueueStatsBuilder: + def __init__(self): + self.stats = None + + def set(self, raw_stats): + self.stats = raw_stats + + def id(self): + return self.stats['vhost'], self.stats['name'] + + def queue_stats(self): + vhost, name = self.id() + stats = fetch_data(raw_data=self.stats, metrics=QUEUE_STATS) + return dict(('vhost_{0}_queue_{1}_{2}'.format(vhost, name, k), v) for k, v in stats.items()) + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.url = '{0}://{1}:{2}'.format( + configuration.get('scheme', 'http'), + configuration.get('host', '127.0.0.1'), + configuration.get('port', 15672), + ) + self.node_name = str() + self.vhost = VhostStatsBuilder() + self.collected_vhosts = set() + self.collect_queues_metrics = configuration.get('collect_queues_metrics', False) + self.debug("collect_queues_metrics is {0}".format("enabled" if self.collect_queues_metrics else "disabled")) + if self.collect_queues_metrics: + self.queue = QueueStatsBuilder() + self.collected_queues = set() + + def _get_data(self): + data = dict() + + stats = self.get_overview_stats() + if not stats: + return None + + data.update(stats) + + stats = self.get_nodes_stats() + if not stats: + return None + + data.update(stats) + + stats = self.get_vhosts_stats() + if stats: + data.update(stats) + + if self.collect_queues_metrics: + stats = self.get_queues_stats() + if stats: + data.update(stats) + + return data or None + + def get_overview_stats(self): + url = '{0}/{1}'.format(self.url, API_OVERVIEW) + self.debug("doing http request to '{0}'".format(url)) + raw = self._get_raw_data(url) + if not raw: + return None + + data = loads(raw) + self.node_name = data['node'] + self.debug("found node name: '{0}'".format(self.node_name)) + + stats = fetch_data(raw_data=data, metrics=OVERVIEW_STATS) + self.debug("number of metrics: {0}".format(len(stats))) + return stats + + def get_nodes_stats(self): + if self.node_name == "": + self.error("trying to get node stats, but node name is not set") + return None + + url = '{0}/{1}/{2}'.format(self.url, API_NODE, self.node_name) + self.debug("doing http request to '{0}'".format(url)) + raw = self._get_raw_data(url) + if not raw: + return None + + data = loads(raw) + stats = fetch_data(raw_data=data, metrics=NODE_STATS) + handle_disabled_disk_monitoring(stats) + self.debug("number of metrics: {0}".format(len(stats))) + return stats + + def get_vhosts_stats(self): + url = '{0}/{1}'.format(self.url, API_VHOSTS) + self.debug("doing http request to '{0}'".format(url)) + raw = self._get_raw_data(url) + if not raw: + return None + + data = dict() + vhosts = loads(raw) + charts_initialized = len(self.charts) > 0 + + for vhost in vhosts: + self.vhost.set(vhost) + if not self.vhost.has_msg_stats(): + continue + + if charts_initialized and self.vhost.name() not in self.collected_vhosts: + self.collected_vhosts.add(self.vhost.name()) + self.add_vhost_charts(self.vhost.name()) + + data.update(self.vhost.msg_stats()) + + self.debug("number of vhosts: {0}, metrics: {1}".format(len(vhosts), len(data))) + return data + + def get_queues_stats(self): + url = '{0}/{1}'.format(self.url, API_QUEUES) + self.debug("doing http request to '{0}'".format(url)) + raw = self._get_raw_data(url) + if not raw: + return None + + data = dict() + queues = loads(raw) + charts_initialized = len(self.charts) > 0 + + for queue in queues: + self.queue.set(queue) + if self.queue.id()[0] not in self.collected_vhosts: + continue + + if charts_initialized and self.queue.id() not in self.collected_queues: + self.collected_queues.add(self.queue.id()) + self.add_queue_charts(self.queue.id()) + + data.update(self.queue.queue_stats()) + + self.debug("number of queues: {0}, metrics: {1}".format(len(queues), len(data))) + return data + + def add_vhost_charts(self, vhost_name): + order, charts = vhost_chart_template(vhost_name) + + for chart_name in order: + params = [chart_name] + charts[chart_name]['options'] + dimensions = charts[chart_name]['lines'] + + new_chart = self.charts.add_chart(params) + for dimension in dimensions: + new_chart.add_dimension(dimension) + + def add_queue_charts(self, queue_id): + order, charts = queue_chart_template(queue_id) + + for chart_name in order: + params = [chart_name] + charts[chart_name]['options'] + dimensions = charts[chart_name]['lines'] + + new_chart = self.charts.add_chart(params) + for dimension in dimensions: + new_chart.add_dimension(dimension) + + +def fetch_data(raw_data, metrics): + data = dict() + for metric in metrics: + value = raw_data + metrics_list = metric.split('.') + try: + for m in metrics_list: + value = value[m] + except (KeyError, TypeError): + continue + data['_'.join(metrics_list)] = value + + return data + + +def handle_disabled_disk_monitoring(node_stats): + # https://github.com/netdata/netdata/issues/7218 + # can be "disk_free": "disk_free_monitoring_disabled" + v = node_stats.get('disk_free') + if v and not isinstance(v, int): + del node_stats['disk_free'] diff --git a/collectors/python.d.plugin/rabbitmq/rabbitmq.conf b/collectors/python.d.plugin/rabbitmq/rabbitmq.conf new file mode 100644 index 0000000..47d47a1 --- /dev/null +++ b/collectors/python.d.plugin/rabbitmq/rabbitmq.conf @@ -0,0 +1,86 @@ +# netdata python.d.plugin configuration for rabbitmq +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, rabbitmq plugin also supports the following: +# +# host: 'ipaddress' # Server ip address or hostname. Default: 127.0.0.1 +# port: 'port' # Rabbitmq port. Default: 15672 +# scheme: 'scheme' # http or https. Default: http +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# Rabbitmq plugin can also collect stats per vhost per queues, which is disabled +# by default. Please note that enabling this can induced a serious overhead on +# both netdata and rabbitmq if a look of queues are configured and used. +# +# collect_queues_metrics: 'yes/no' +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +local: + host: '127.0.0.1' + user: 'guest' + pass: 'guest' diff --git a/collectors/python.d.plugin/rethinkdbs/Makefile.inc b/collectors/python.d.plugin/rethinkdbs/Makefile.inc new file mode 100644 index 0000000..dec6044 --- /dev/null +++ b/collectors/python.d.plugin/rethinkdbs/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += rethinkdbs/rethinkdbs.chart.py +dist_pythonconfig_DATA += rethinkdbs/rethinkdbs.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += rethinkdbs/README.md rethinkdbs/Makefile.inc + diff --git a/collectors/python.d.plugin/rethinkdbs/README.md b/collectors/python.d.plugin/rethinkdbs/README.md new file mode 100644 index 0000000..d3fa355 --- /dev/null +++ b/collectors/python.d.plugin/rethinkdbs/README.md @@ -0,0 +1,53 @@ + + +# RethinkDB monitoring with Netdata + +Collects database server and cluster statistics. + +Following charts are drawn: + +1. **Connected Servers** + + - connected + - missing + +2. **Active Clients** + + - active + +3. **Queries** per second + + - queries + +4. **Documents** per second + + - documents + +## Configuration + +Edit the `python.d/rethinkdbs.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/rethinkdbs.conf +``` + +```yaml +localhost: + name : 'local' + host : '127.0.0.1' + port : 28015 + user : "user" + password : "pass" +``` + +When no configuration file is found, module tries to connect to `127.0.0.1:28015`. + +--- + + diff --git a/collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py b/collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py new file mode 100644 index 0000000..e3fbc36 --- /dev/null +++ b/collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +# Description: rethinkdb netdata python.d module +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +try: + import rethinkdb as rdb + + HAS_RETHINKDB = True +except ImportError: + HAS_RETHINKDB = False + +from bases.FrameworkServices.SimpleService import SimpleService + +ORDER = [ + 'cluster_connected_servers', + 'cluster_clients_active', + 'cluster_queries', + 'cluster_documents', +] + + +def cluster_charts(): + return { + 'cluster_connected_servers': { + 'options': [None, 'Connected Servers', 'servers', 'cluster', 'rethinkdb.cluster_connected_servers', + 'stacked'], + 'lines': [ + ['cluster_servers_connected', 'connected'], + ['cluster_servers_missing', 'missing'], + ] + }, + 'cluster_clients_active': { + 'options': [None, 'Active Clients', 'clients', 'cluster', 'rethinkdb.cluster_clients_active', + 'line'], + 'lines': [ + ['cluster_clients_active', 'active'], + ] + }, + 'cluster_queries': { + 'options': [None, 'Queries', 'queries/s', 'cluster', 'rethinkdb.cluster_queries', 'line'], + 'lines': [ + ['cluster_queries_per_sec', 'queries'], + ] + }, + 'cluster_documents': { + 'options': [None, 'Documents', 'documents/s', 'cluster', 'rethinkdb.cluster_documents', 'line'], + 'lines': [ + ['cluster_read_docs_per_sec', 'reads'], + ['cluster_written_docs_per_sec', 'writes'], + ] + }, + } + + +def server_charts(n): + o = [ + '{0}_client_connections'.format(n), + '{0}_clients_active'.format(n), + '{0}_queries'.format(n), + '{0}_documents'.format(n), + ] + f = 'server {0}'.format(n) + + c = { + o[0]: { + 'options': [None, 'Client Connections', 'connections', f, 'rethinkdb.client_connections', 'line'], + 'lines': [ + ['{0}_client_connections'.format(n), 'connections'], + ] + }, + o[1]: { + 'options': [None, 'Active Clients', 'clients', f, 'rethinkdb.clients_active', 'line'], + 'lines': [ + ['{0}_clients_active'.format(n), 'active'], + ] + }, + o[2]: { + 'options': [None, 'Queries', 'queries/s', f, 'rethinkdb.queries', 'line'], + 'lines': [ + ['{0}_queries_total'.format(n), 'queries', 'incremental'], + ] + }, + o[3]: { + 'options': [None, 'Documents', 'documents/s', f, 'rethinkdb.documents', 'line'], + 'lines': [ + ['{0}_read_docs_total'.format(n), 'reads', 'incremental'], + ['{0}_written_docs_total'.format(n), 'writes', 'incremental'], + ] + }, + } + + return o, c + + +class Cluster: + def __init__(self, raw): + self.raw = raw + + def data(self): + qe = self.raw['query_engine'] + + return { + 'cluster_clients_active': qe['clients_active'], + 'cluster_queries_per_sec': qe['queries_per_sec'], + 'cluster_read_docs_per_sec': qe['read_docs_per_sec'], + 'cluster_written_docs_per_sec': qe['written_docs_per_sec'], + 'cluster_servers_connected': 0, + 'cluster_servers_missing': 0, + } + + +class Server: + def __init__(self, raw): + self.name = raw['server'] + self.raw = raw + + def error(self): + return self.raw.get('error') + + def data(self): + qe = self.raw['query_engine'] + + d = { + 'client_connections': qe['client_connections'], + 'clients_active': qe['clients_active'], + 'queries_total': qe['queries_total'], + 'read_docs_total': qe['read_docs_total'], + 'written_docs_total': qe['written_docs_total'], + } + + return dict(('{0}_{1}'.format(self.name, k), d[k]) for k in d) + + +# https://pypi.org/project/rethinkdb/2.4.0/ +# rdb.RethinkDB() can be used as rdb drop in replacement. +# https://github.com/rethinkdb/rethinkdb-python#quickstart +def get_rethinkdb(): + if hasattr(rdb, 'RethinkDB'): + return rdb.RethinkDB() + return rdb + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = list(ORDER) + self.definitions = cluster_charts() + self.host = self.configuration.get('host', '127.0.0.1') + self.port = self.configuration.get('port', 28015) + self.user = self.configuration.get('user', 'admin') + self.password = self.configuration.get('password') + self.timeout = self.configuration.get('timeout', 2) + self.rdb = None + self.conn = None + self.alive = True + + def check(self): + if not HAS_RETHINKDB: + self.error('"rethinkdb" module is needed to use rethinkdbs.py') + return False + + self.debug("rethinkdb driver version {0}".format(rdb.__version__)) + self.rdb = get_rethinkdb() + + if not self.connect(): + return None + + stats = self.get_stats() + + if not stats: + return None + + for v in stats[1:]: + if get_id(v) == 'server': + o, c = server_charts(v['server']) + self.order.extend(o) + self.definitions.update(c) + + return True + + def get_data(self): + if not self.is_alive(): + return None + + stats = self.get_stats() + + if not stats: + return None + + data = dict() + + # cluster + data.update(Cluster(stats[0]).data()) + + # servers + for v in stats[1:]: + if get_id(v) != 'server': + continue + + s = Server(v) + + if s.error(): + data['cluster_servers_missing'] += 1 + else: + data['cluster_servers_connected'] += 1 + data.update(s.data()) + + return data + + def get_stats(self): + try: + return list(self.rdb.db('rethinkdb').table('stats').run(self.conn).items) + except rdb.errors.ReqlError: + self.alive = False + return None + + def connect(self): + try: + self.conn = self.rdb.connect( + host=self.host, + port=self.port, + user=self.user, + password=self.password, + timeout=self.timeout, + ) + self.alive = True + return True + except rdb.errors.ReqlError as error: + self.error('Connection to {0}:{1} failed: {2}'.format(self.host, self.port, error)) + return False + + def reconnect(self): + # The connection is already closed after rdb.errors.ReqlError, + # so we do not need to call conn.close() + if self.connect(): + return True + return False + + def is_alive(self): + if not self.alive: + return self.reconnect() + return True + + +def get_id(v): + return v['id'][0] diff --git a/collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf b/collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf new file mode 100644 index 0000000..d671acb --- /dev/null +++ b/collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf @@ -0,0 +1,76 @@ +# netdata python.d.plugin configuration for rethinkdb +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, rethinkdb also supports the following: +# +# host: IP or HOSTNAME # default is 'localhost' +# port: PORT # default is 28015 +# user: USERNAME # default is 'admin' +# password: PASSWORD # not set by default +# timeout: TIMEOUT # default is 2 + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +local: + name: 'local' + host: 'localhost' diff --git a/collectors/python.d.plugin/retroshare/Makefile.inc b/collectors/python.d.plugin/retroshare/Makefile.inc new file mode 100644 index 0000000..891193e --- /dev/null +++ b/collectors/python.d.plugin/retroshare/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += retroshare/retroshare.chart.py +dist_pythonconfig_DATA += retroshare/retroshare.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += retroshare/README.md retroshare/Makefile.inc + diff --git a/collectors/python.d.plugin/retroshare/README.md b/collectors/python.d.plugin/retroshare/README.md new file mode 100644 index 0000000..297df9f --- /dev/null +++ b/collectors/python.d.plugin/retroshare/README.md @@ -0,0 +1,47 @@ + + +# RetroShare monitoring with Netdata + +Monitors application bandwidth, peers and DHT metrics. + +This module will monitor one or more `RetroShare` applications, depending on your configuration. + +## Charts + +This module produces the following charts: + +- Bandwidth in `kilobits/s` +- Peers in `peers` +- DHT in `peers` + + +## Configuration + +Edit the `python.d/retroshare.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/retroshare.conf +``` + +Here is an example for 2 servers: + +```yaml +localhost: + url : 'http://localhost:9090' + user : "user" + password : "pass" + +remote: + url : 'http://203.0.113.1:9090' + user : "user" + password : "pass" +``` +--- + + diff --git a/collectors/python.d.plugin/retroshare/retroshare.chart.py b/collectors/python.d.plugin/retroshare/retroshare.chart.py new file mode 100644 index 0000000..3f9593e --- /dev/null +++ b/collectors/python.d.plugin/retroshare/retroshare.chart.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Description: RetroShare netdata python.d module +# Authors: sehraf +# SPDX-License-Identifier: GPL-3.0-or-later + +import json + +from bases.FrameworkServices.UrlService import UrlService + +ORDER = [ + 'bandwidth', + 'peers', + 'dht', +] + +CHARTS = { + 'bandwidth': { + 'options': [None, 'RetroShare Bandwidth', 'kilobits/s', 'RetroShare', 'retroshare.bandwidth', 'area'], + 'lines': [ + ['bandwidth_up_kb', 'Upload'], + ['bandwidth_down_kb', 'Download'] + ] + }, + 'peers': { + 'options': [None, 'RetroShare Peers', 'peers', 'RetroShare', 'retroshare.peers', 'line'], + 'lines': [ + ['peers_all', 'All friends'], + ['peers_connected', 'Connected friends'] + ] + }, + 'dht': { + 'options': [None, 'Retroshare DHT', 'peers', 'RetroShare', 'retroshare.dht', 'line'], + 'lines': [ + ['dht_size_all', 'DHT nodes estimated'], + ['dht_size_rs', 'RS nodes estimated'] + ] + } +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.baseurl = self.configuration.get('url', 'http://localhost:9090') + + def _get_stats(self): + """ + Format data received from http request + :return: dict + """ + try: + raw = self._get_raw_data() + parsed = json.loads(raw) + if str(parsed['returncode']) != 'ok': + return None + except (TypeError, ValueError): + return None + + return parsed['data'][0] + + def _get_data(self): + """ + Get data from API + :return: dict + """ + self.url = self.baseurl + '/api/v2/stats' + data = self._get_stats() + if data is None: + return None + + data['bandwidth_up_kb'] = data['bandwidth_up_kb'] * -1 + if data['dht_active'] is False: + data['dht_size_all'] = None + data['dht_size_rs'] = None + + return data diff --git a/collectors/python.d.plugin/retroshare/retroshare.conf b/collectors/python.d.plugin/retroshare/retroshare.conf new file mode 100644 index 0000000..3d0af53 --- /dev/null +++ b/collectors/python.d.plugin/retroshare/retroshare.conf @@ -0,0 +1,72 @@ +# netdata python.d.plugin configuration for RetroShare +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, RetroShare also supports the following: +# +# - url: 'url' # the URL to the WebUI +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name: 'local' + url: 'http://localhost:9090' diff --git a/collectors/python.d.plugin/riakkv/Makefile.inc b/collectors/python.d.plugin/riakkv/Makefile.inc new file mode 100644 index 0000000..87d29f8 --- /dev/null +++ b/collectors/python.d.plugin/riakkv/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += riakkv/riakkv.chart.py +dist_pythonconfig_DATA += riakkv/riakkv.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += riakkv/README.md riakkv/Makefile.inc + diff --git a/collectors/python.d.plugin/riakkv/README.md b/collectors/python.d.plugin/riakkv/README.md new file mode 100644 index 0000000..fe62c67 --- /dev/null +++ b/collectors/python.d.plugin/riakkv/README.md @@ -0,0 +1,126 @@ + + +# Riak KV monitoring with Netdata + +Collects database stats from `/stats` endpoint. + +## Requirements + +- An accessible `/stats` endpoint. See [the Riak KV configuration reference documentation](https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces) + for how to enable this. + +The following charts are included, which are mostly derived from the metrics +listed +[here](https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#riak-metrics-to-graph). + +1. **Throughput** in operations/s + +- **KV operations** + - gets + - puts + +- **Data type updates** + - counters + - sets + - maps + +- **Search queries** + - queries + +- **Search documents** + - indexed + +- **Strong consistency operations** + - gets + - puts + +2. **Latency** in milliseconds + +- **KV latency** of the past minute + - get (mean, median, 95th / 99th / 100th percentile) + - put (mean, median, 95th / 99th / 100th percentile) + +- **Data type latency** of the past minute + - counter_merge (mean, median, 95th / 99th / 100th percentile) + - set_merge (mean, median, 95th / 99th / 100th percentile) + - map_merge (mean, median, 95th / 99th / 100th percentile) + +- **Search latency** of the past minute + - query (median, min, max, 95th / 99th percentile) + - index (median, min, max, 95th / 99th percentile) + +- **Strong consistency latency** of the past minute + - get (mean, median, 95th / 99th / 100th percentile) + - put (mean, median, 95th / 99th / 100th percentile) + +3. **Erlang VM metrics** + +- **System counters** + - processes + +- **Memory allocation** in MB + - processes.allocated + - processes.used + +4. **General load / health metrics** + +- **Siblings encountered in KV operations** during the past minute + - get (mean, median, 95th / 99th / 100th percentile) + +- **Object size in KV operations** during the past minute in KB + - get (mean, median, 95th / 99th / 100th percentile) + +- **Message queue length** in unprocessed messages + - vnodeq_size (mean, median, 95th / 99th / 100th percentile) + +- **Index operations** encountered by Search + - errors + +- **Protocol buffer connections** + - active + +- **Repair operations coordinated by this node** + - read + +- **Active finite state machines by kind** + - get + - put + - secondary_index + - list_keys + +- **Rejected finite state machines** + - get + - put + +- **Number of writes to Search failed due to bad data format by reason** + - bad_entry + - extract_fail + +## Configuration + +Edit the `python.d/riakkv.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/riakkv.conf +``` + +The module needs to be passed the full URL to Riak's stats endpoint. +For example: + +```yaml +myriak: + url: http://myriak.example.com:8098/stats +``` + +With no explicit configuration given, the module will attempt to connect to +`http://localhost:8098/stats`. + +The default update frequency for the plugin is set to 2 seconds as Riak +internally updates the metrics every second. If we were to update the metrics +every second, the resulting graph would contain odd jitter. diff --git a/collectors/python.d.plugin/riakkv/riakkv.chart.py b/collectors/python.d.plugin/riakkv/riakkv.chart.py new file mode 100644 index 0000000..c390c8b --- /dev/null +++ b/collectors/python.d.plugin/riakkv/riakkv.chart.py @@ -0,0 +1,334 @@ +# -*- coding: utf-8 -*- +# Description: riak netdata python.d module +# +# See also: +# https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html + +from json import loads + +from bases.FrameworkServices.UrlService import UrlService + +# Riak updates the metrics at the /stats endpoint every 1 second. +# If we use `update_every = 1` here, that means we might get weird jitter in the graph, +# so the default is set to 2 seconds to prevent it. +update_every = 2 + +# charts order (can be overridden if you want less charts, or different order) +ORDER = [ + # Throughput metrics + # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#throughput-metrics + # Collected in totals. + "kv.node_operations", # K/V node operations. + "dt.vnode_updates", # Data type vnode updates. + "search.queries", # Search queries on the node. + "search.documents", # Documents indexed by Search. + "consistent.operations", # Consistent node operations. + + # Latency metrics + # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#throughput-metrics + # Collected for the past minute in milliseconds, + # returned from riak in microseconds. + "kv.latency.get", # K/V GET FSM traversal latency. + "kv.latency.put", # K/V PUT FSM traversal latency. + "dt.latency.counter", # Update Counter Data type latency. + "dt.latency.set", # Update Set Data type latency. + "dt.latency.map", # Update Map Data type latency. + "search.latency.query", # Search query latency. + "search.latency.index", # Time it takes for search to index a new document. + "consistent.latency.get", # Strong consistent read latency. + "consistent.latency.put", # Strong consistent write latency. + + # Erlang resource usage metrics + # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#erlang-resource-usage-metrics + # Processes collected as a gauge, + # memory collected as Megabytes, returned as bytes from Riak. + "vm.processes", # Number of processes currently running in the Erlang VM. + "vm.memory.processes", # Total amount of memory allocated & used for Erlang processes. + + # General Riak Load / Health metrics + # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#general-riak-load-health-metrics + # The following are collected by Riak over the past minute: + "kv.siblings_encountered.get", # Siblings encountered during GET operations by this node. + "kv.objsize.get", # Object size encountered by this node. + "search.vnodeq_size", # Number of unprocessed messages in the vnode message queues (Search). + # The following are calculated in total, or as gauges: + "search.index_errors", # Errors of the search subsystem while indexing documents. + "core.pbc", # Number of currently active protocol buffer connections. + "core.repairs", # Total read repair operations coordinated by this node. + "core.fsm_active", # Active finite state machines by kind. + "core.fsm_rejected", # Rejected finite state machines by kind. + + # General Riak Search Load / Health metrics + # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#general-riak-search-load-health-metrics + # Reported as counters. + "search.errors", # Write and read errors of the Search subsystem. +] + +CHARTS = { + # Throughput metrics + "kv.node_operations": { + "options": [None, "Reads & writes coordinated by this node", "operations/s", "throughput", "riak.kv.throughput", + "line"], + "lines": [ + ["node_gets_total", "gets", "incremental"], + ["node_puts_total", "puts", "incremental"] + ] + }, + "dt.vnode_updates": { + "options": [None, "Update operations coordinated by local vnodes by data type", "operations/s", "throughput", + "riak.dt.vnode_updates", "line"], + "lines": [ + ["vnode_counter_update_total", "counters", "incremental"], + ["vnode_set_update_total", "sets", "incremental"], + ["vnode_map_update_total", "maps", "incremental"], + ] + }, + "search.queries": { + "options": [None, "Search queries on the node", "queries/s", "throughput", "riak.search", "line"], + "lines": [ + ["search_query_throughput_count", "queries", "incremental"] + ] + }, + "search.documents": { + "options": [None, "Documents indexed by search", "documents/s", "throughput", "riak.search.documents", "line"], + "lines": [ + ["search_index_throughput_count", "indexed", "incremental"] + ] + }, + "consistent.operations": { + "options": [None, "Consistent node operations", "operations/s", "throughput", "riak.consistent.operations", + "line"], + "lines": [ + ["consistent_gets_total", "gets", "incremental"], + ["consistent_puts_total", "puts", "incremental"], + ] + }, + + # Latency metrics + "kv.latency.get": { + "options": [None, "Time between reception of a client GET request and subsequent response to client", "ms", + "latency", "riak.kv.latency.get", "line"], + "lines": [ + ["node_get_fsm_time_mean", "mean", "absolute", 1, 1000], + ["node_get_fsm_time_median", "median", "absolute", 1, 1000], + ["node_get_fsm_time_95", "95", "absolute", 1, 1000], + ["node_get_fsm_time_99", "99", "absolute", 1, 1000], + ["node_get_fsm_time_100", "100", "absolute", 1, 1000], + ] + }, + "kv.latency.put": { + "options": [None, "Time between reception of a client PUT request and subsequent response to client", "ms", + "latency", "riak.kv.latency.put", "line"], + "lines": [ + ["node_put_fsm_time_mean", "mean", "absolute", 1, 1000], + ["node_put_fsm_time_median", "median", "absolute", 1, 1000], + ["node_put_fsm_time_95", "95", "absolute", 1, 1000], + ["node_put_fsm_time_99", "99", "absolute", 1, 1000], + ["node_put_fsm_time_100", "100", "absolute", 1, 1000], + ] + }, + "dt.latency.counter": { + "options": [None, "Time it takes to perform an Update Counter operation", "ms", "latency", + "riak.dt.latency.counter_merge", "line"], + "lines": [ + ["object_counter_merge_time_mean", "mean", "absolute", 1, 1000], + ["object_counter_merge_time_median", "median", "absolute", 1, 1000], + ["object_counter_merge_time_95", "95", "absolute", 1, 1000], + ["object_counter_merge_time_99", "99", "absolute", 1, 1000], + ["object_counter_merge_time_100", "100", "absolute", 1, 1000], + ] + }, + "dt.latency.set": { + "options": [None, "Time it takes to perform an Update Set operation", "ms", "latency", + "riak.dt.latency.set_merge", "line"], + "lines": [ + ["object_set_merge_time_mean", "mean", "absolute", 1, 1000], + ["object_set_merge_time_median", "median", "absolute", 1, 1000], + ["object_set_merge_time_95", "95", "absolute", 1, 1000], + ["object_set_merge_time_99", "99", "absolute", 1, 1000], + ["object_set_merge_time_100", "100", "absolute", 1, 1000], + ] + }, + "dt.latency.map": { + "options": [None, "Time it takes to perform an Update Map operation", "ms", "latency", + "riak.dt.latency.map_merge", "line"], + "lines": [ + ["object_map_merge_time_mean", "mean", "absolute", 1, 1000], + ["object_map_merge_time_median", "median", "absolute", 1, 1000], + ["object_map_merge_time_95", "95", "absolute", 1, 1000], + ["object_map_merge_time_99", "99", "absolute", 1, 1000], + ["object_map_merge_time_100", "100", "absolute", 1, 1000], + ] + }, + "search.latency.query": { + "options": [None, "Search query latency", "ms", "latency", "riak.search.latency.query", "line"], + "lines": [ + ["search_query_latency_median", "median", "absolute", 1, 1000], + ["search_query_latency_min", "min", "absolute", 1, 1000], + ["search_query_latency_95", "95", "absolute", 1, 1000], + ["search_query_latency_99", "99", "absolute", 1, 1000], + ["search_query_latency_999", "999", "absolute", 1, 1000], + ["search_query_latency_max", "max", "absolute", 1, 1000], + ] + }, + "search.latency.index": { + "options": [None, "Time it takes Search to index a new document", "ms", "latency", "riak.search.latency.index", + "line"], + "lines": [ + ["search_index_latency_median", "median", "absolute", 1, 1000], + ["search_index_latency_min", "min", "absolute", 1, 1000], + ["search_index_latency_95", "95", "absolute", 1, 1000], + ["search_index_latency_99", "99", "absolute", 1, 1000], + ["search_index_latency_999", "999", "absolute", 1, 1000], + ["search_index_latency_max", "max", "absolute", 1, 1000], + ] + }, + + # Riak Strong Consistency metrics + "consistent.latency.get": { + "options": [None, "Strongly consistent read latency", "ms", "latency", "riak.consistent.latency.get", "line"], + "lines": [ + ["consistent_get_time_mean", "mean", "absolute", 1, 1000], + ["consistent_get_time_median", "median", "absolute", 1, 1000], + ["consistent_get_time_95", "95", "absolute", 1, 1000], + ["consistent_get_time_99", "99", "absolute", 1, 1000], + ["consistent_get_time_100", "100", "absolute", 1, 1000], + ] + }, + "consistent.latency.put": { + "options": [None, "Strongly consistent write latency", "ms", "latency", "riak.consistent.latency.put", "line"], + "lines": [ + ["consistent_put_time_mean", "mean", "absolute", 1, 1000], + ["consistent_put_time_median", "median", "absolute", 1, 1000], + ["consistent_put_time_95", "95", "absolute", 1, 1000], + ["consistent_put_time_99", "99", "absolute", 1, 1000], + ["consistent_put_time_100", "100", "absolute", 1, 1000], + ] + }, + + # BEAM metrics + "vm.processes": { + "options": [None, "Total processes running in the Erlang VM", "total", "vm", "riak.vm", "line"], + "lines": [ + ["sys_process_count", "processes", "absolute"], + ] + }, + "vm.memory.processes": { + "options": [None, "Memory allocated & used by Erlang processes", "MB", "vm", "riak.vm.memory.processes", + "line"], + "lines": [ + ["memory_processes", "allocated", "absolute", 1, 1024 * 1024], + ["memory_processes_used", "used", "absolute", 1, 1024 * 1024] + ] + }, + + # General Riak Load/Health metrics + "kv.siblings_encountered.get": { + "options": [None, "Number of siblings encountered during GET operations by this node during the past minute", + "siblings", "load", "riak.kv.siblings_encountered.get", "line"], + "lines": [ + ["node_get_fsm_siblings_mean", "mean", "absolute"], + ["node_get_fsm_siblings_median", "median", "absolute"], + ["node_get_fsm_siblings_95", "95", "absolute"], + ["node_get_fsm_siblings_99", "99", "absolute"], + ["node_get_fsm_siblings_100", "100", "absolute"], + ] + }, + "kv.objsize.get": { + "options": [None, "Object size encountered by this node during the past minute", "KB", "load", + "riak.kv.objsize.get", "line"], + "lines": [ + ["node_get_fsm_objsize_mean", "mean", "absolute", 1, 1024], + ["node_get_fsm_objsize_median", "median", "absolute", 1, 1024], + ["node_get_fsm_objsize_95", "95", "absolute", 1, 1024], + ["node_get_fsm_objsize_99", "99", "absolute", 1, 1024], + ["node_get_fsm_objsize_100", "100", "absolute", 1, 1024], + ] + }, + "search.vnodeq_size": { + "options": [None, + "Number of unprocessed messages in the vnode message queues of Search on this node in the past minute", + "messages", "load", "riak.search.vnodeq_size", "line"], + "lines": [ + ["riak_search_vnodeq_mean", "mean", "absolute"], + ["riak_search_vnodeq_median", "median", "absolute"], + ["riak_search_vnodeq_95", "95", "absolute"], + ["riak_search_vnodeq_99", "99", "absolute"], + ["riak_search_vnodeq_100", "100", "absolute"], + ] + }, + "search.index_errors": { + "options": [None, "Number of document index errors encountered by Search", "errors", "load", + "riak.search.index", "line"], + "lines": [ + ["search_index_fail_count", "errors", "absolute"] + ] + }, + "core.pbc": { + "options": [None, "Protocol buffer connections by status", "connections", "load", + "riak.core.protobuf_connections", "line"], + "lines": [ + ["pbc_active", "active", "absolute"], + # ["pbc_connects", "established_pastmin", "absolute"] + ] + }, + "core.repairs": { + "options": [None, "Number of repair operations this node has coordinated", "repairs", "load", + "riak.core.repairs", "line"], + "lines": [ + ["read_repairs", "read", "absolute"] + ] + }, + "core.fsm_active": { + "options": [None, "Active finite state machines by kind", "fsms", "load", "riak.core.fsm_active", "line"], + "lines": [ + ["node_get_fsm_active", "get", "absolute"], + ["node_put_fsm_active", "put", "absolute"], + ["index_fsm_active", "secondary index", "absolute"], + ["list_fsm_active", "list keys", "absolute"] + ] + }, + "core.fsm_rejected": { + # Writing "Sidejob's" here seems to cause some weird issues: it results in this chart being rendered in + # its own context and additionally, moves the entire Riak graph all the way up to the top of the Netdata + # dashboard for some reason. + "options": [None, "Finite state machines being rejected by Sidejobs overload protection", "fsms", "load", + "riak.core.fsm_rejected", "line"], + "lines": [ + ["node_get_fsm_rejected", "get", "absolute"], + ["node_put_fsm_rejected", "put", "absolute"] + ] + }, + + # General Riak Search Load / Health metrics + "search.errors": { + "options": [None, "Number of writes to Search failed due to bad data format by reason", "writes", "load", + "riak.search.index", "line"], + "lines": [ + ["search_index_bad_entry_count", "bad_entry", "absolute"], + ["search_index_extract_fail_count", "extract_fail", "absolute"], + ] + } +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + raw = self._get_raw_data() + if not raw: + return None + + try: + return loads(raw) + except (TypeError, ValueError) as err: + self.error(err) + return None diff --git a/collectors/python.d.plugin/riakkv/riakkv.conf b/collectors/python.d.plugin/riakkv/riakkv.conf new file mode 100644 index 0000000..be01c48 --- /dev/null +++ b/collectors/python.d.plugin/riakkv/riakkv.conf @@ -0,0 +1,68 @@ +# netdata python.d.plugin configuration for riak +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +local: + url : 'http://localhost:8098/stats' diff --git a/collectors/python.d.plugin/samba/Makefile.inc b/collectors/python.d.plugin/samba/Makefile.inc new file mode 100644 index 0000000..230a8ba --- /dev/null +++ b/collectors/python.d.plugin/samba/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += samba/samba.chart.py +dist_pythonconfig_DATA += samba/samba.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += samba/README.md samba/Makefile.inc + diff --git a/collectors/python.d.plugin/samba/README.md b/collectors/python.d.plugin/samba/README.md new file mode 100644 index 0000000..767df12 --- /dev/null +++ b/collectors/python.d.plugin/samba/README.md @@ -0,0 +1,121 @@ + + +# Samba monitoring with Netdata + +Monitors the performance metrics of Samba file sharing using `smbstatus` command-line tool. + +Executed commands: + +- `sudo -n smbstatus -P` + +## Requirements + +- `smbstatus` program +- `sudo` program +- `smbd` must be compiled with profiling enabled +- `smbd` must be started either with the `-P 1` option or inside `smb.conf` using `smbd profiling level` + +The module uses `smbstatus`, which can only be executed by `root`. It uses +`sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a +password. + +- Add to your `/etc/sudoers` file: + +`which smbstatus` shows the full path to the binary. + +```bash +netdata ALL=(root) NOPASSWD: /path/to/smbstatus +``` + +- Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + +The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `smbstatus` using `sudo`. + + +As the `root` user, do the following: + +```cmd +mkdir /etc/systemd/system/netdata.service.d +echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf +systemctl daemon-reload +systemctl restart netdata.service +``` + +## Charts + +1. **Syscall R/Ws** in kilobytes/s + + - sendfile + - recvfile + +2. **Smb2 R/Ws** in kilobytes/s + + - readout + - writein + - readin + - writeout + +3. **Smb2 Create/Close** in operations/s + + - create + - close + +4. **Smb2 Info** in operations/s + + - getinfo + - setinfo + +5. **Smb2 Find** in operations/s + + - find + +6. **Smb2 Notify** in operations/s + + - notify + +7. **Smb2 Lesser Ops** as counters + + - tcon + - negprot + - tdis + - cancel + - logoff + - flush + - lock + - keepalive + - break + - sessetup + +## Enable the collector + +The `samba` collector is disabled by default. To enable it, use `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` +file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `samba` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl +restart netdata`, or the [appropriate method](/docs/configure/start-stop-restart.md) for your system. + +## Configuration + +Edit the `python.d/samba.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/samba.conf +``` + +--- + + diff --git a/collectors/python.d.plugin/samba/samba.chart.py b/collectors/python.d.plugin/samba/samba.chart.py new file mode 100644 index 0000000..8eebcd6 --- /dev/null +++ b/collectors/python.d.plugin/samba/samba.chart.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# Description: samba netdata python.d module +# Author: Christopher Cox +# SPDX-License-Identifier: GPL-3.0-or-later +# +# The netdata user needs to be able to be able to sudo the smbstatus program +# without password: +# netdata ALL=(ALL) NOPASSWD: /usr/bin/smbstatus -P +# +# This makes calls to smbstatus -P +# +# This just looks at a couple of values out of syscall, and some from smb2. +# +# The Lesser Ops chart is merely a display of current counter values. They +# didn't seem to change much to me. However, if you notice something changing +# a lot there, bring one or more out into its own chart and make it incremental +# (like find and notify... good examples). + +import re +import os + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +disabled_by_default = True + +update_every = 5 + +ORDER = [ + 'syscall_rw', + 'smb2_rw', + 'smb2_create_close', + 'smb2_info', + 'smb2_find', + 'smb2_notify', + 'smb2_sm_count' +] + +CHARTS = { + 'syscall_rw': { + 'options': [None, 'R/Ws', 'KiB/s', 'syscall', 'syscall.rw', 'area'], + 'lines': [ + ['syscall_sendfile_bytes', 'sendfile', 'incremental', 1, 1024], + ['syscall_recvfile_bytes', 'recvfile', 'incremental', -1, 1024] + ] + }, + 'smb2_rw': { + 'options': [None, 'R/Ws', 'KiB/s', 'smb2', 'smb2.rw', 'area'], + 'lines': [ + ['smb2_read_outbytes', 'readout', 'incremental', 1, 1024], + ['smb2_write_inbytes', 'writein', 'incremental', -1, 1024], + ['smb2_read_inbytes', 'readin', 'incremental', 1, 1024], + ['smb2_write_outbytes', 'writeout', 'incremental', -1, 1024] + ] + }, + 'smb2_create_close': { + 'options': [None, 'Create/Close', 'operations/s', 'smb2', 'smb2.create_close', 'line'], + 'lines': [ + ['smb2_create_count', 'create', 'incremental', 1, 1], + ['smb2_close_count', 'close', 'incremental', -1, 1] + ] + }, + 'smb2_info': { + 'options': [None, 'Info', 'operations/s', 'smb2', 'smb2.get_set_info', 'line'], + 'lines': [ + ['smb2_getinfo_count', 'getinfo', 'incremental', 1, 1], + ['smb2_setinfo_count', 'setinfo', 'incremental', -1, 1] + ] + }, + 'smb2_find': { + 'options': [None, 'Find', 'operations/s', 'smb2', 'smb2.find', 'line'], + 'lines': [ + ['smb2_find_count', 'find', 'incremental', 1, 1] + ] + }, + 'smb2_notify': { + 'options': [None, 'Notify', 'operations/s', 'smb2', 'smb2.notify', 'line'], + 'lines': [ + ['smb2_notify_count', 'notify', 'incremental', 1, 1] + ] + }, + 'smb2_sm_count': { + 'options': [None, 'Lesser Ops', 'count', 'smb2', 'smb2.sm_counters', 'stacked'], + 'lines': [ + ['smb2_tcon_count', 'tcon', 'absolute', 1, 1], + ['smb2_negprot_count', 'negprot', 'absolute', 1, 1], + ['smb2_tdis_count', 'tdis', 'absolute', 1, 1], + ['smb2_cancel_count', 'cancel', 'absolute', 1, 1], + ['smb2_logoff_count', 'logoff', 'absolute', 1, 1], + ['smb2_flush_count', 'flush', 'absolute', 1, 1], + ['smb2_lock_count', 'lock', 'absolute', 1, 1], + ['smb2_keepalive_count', 'keepalive', 'absolute', 1, 1], + ['smb2_break_count', 'break', 'absolute', 1, 1], + ['smb2_sessetup_count', 'sessetup', 'absolute', 1, 1] + ] + } +} + +SUDO = 'sudo' +SMBSTATUS = 'smbstatus' + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.rgx_smb2 = re.compile(r'(smb2_[^:]+|syscall_.*file_bytes):\s+(\d+)') + + def check(self): + smbstatus_binary = find_binary(SMBSTATUS) + if not smbstatus_binary: + self.error("can't locate '{0}' binary".format(SMBSTATUS)) + return False + + if os.getuid() == 0: + self.command = ' '.join([smbstatus_binary, '-P']) + return ExecutableService.check(self) + + sudo_binary = find_binary(SUDO) + if not sudo_binary: + self.error("can't locate '{0}' binary".format(SUDO)) + return False + command = [sudo_binary, '-n', '-l', smbstatus_binary, '-P'] + smbstatus = '{0} -P'.format(smbstatus_binary) + allowed = self._get_raw_data(command=command) + if not (allowed and allowed[0].strip() == smbstatus): + self.error("not allowed to run sudo for command '{0}'".format(smbstatus)) + return False + self.command = ' '.join([sudo_binary, '-n', smbstatus_binary, '-P']) + return ExecutableService.check(self) + + def _get_data(self): + """ + Format data received from shell command + :return: dict + """ + raw_data = self._get_raw_data() + if not raw_data: + return None + + parsed = self.rgx_smb2.findall(' '.join(raw_data)) + + return dict(parsed) or None diff --git a/collectors/python.d.plugin/samba/samba.conf b/collectors/python.d.plugin/samba/samba.conf new file mode 100644 index 0000000..db15d4e --- /dev/null +++ b/collectors/python.d.plugin/samba/samba.conf @@ -0,0 +1,60 @@ +# netdata python.d.plugin configuration for samba +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds \ No newline at end of file diff --git a/collectors/python.d.plugin/sensors/Makefile.inc b/collectors/python.d.plugin/sensors/Makefile.inc new file mode 100644 index 0000000..5fb26e1 --- /dev/null +++ b/collectors/python.d.plugin/sensors/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += sensors/sensors.chart.py +dist_pythonconfig_DATA += sensors/sensors.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += sensors/README.md sensors/Makefile.inc + diff --git a/collectors/python.d.plugin/sensors/README.md b/collectors/python.d.plugin/sensors/README.md new file mode 100644 index 0000000..e791195 --- /dev/null +++ b/collectors/python.d.plugin/sensors/README.md @@ -0,0 +1,33 @@ + + +# Linux machine sensors monitoring with Netdata + +Reads system sensors information (temperature, voltage, electric current, power, etc.). + +Charts are created dynamically. + +## Configuration + +Edit the `python.d/sensors.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/sensors.conf +``` + +### possible issues + +There have been reports from users that on certain servers, ACPI ring buffer errors are printed by the kernel (`dmesg`) when ACPI sensors are being accessed. +We are tracking such cases in issue [#827](https://github.com/netdata/netdata/issues/827). +Please join this discussion for help. + +When `lm-sensors` doesn't work on your device (e.g. for RPi temperatures), use [the legacy bash collector](https://learn.netdata.cloud/docs/agent/collectors/charts.d.plugin/sensors) + +--- + + diff --git a/collectors/python.d.plugin/sensors/sensors.chart.py b/collectors/python.d.plugin/sensors/sensors.chart.py new file mode 100644 index 0000000..701bf64 --- /dev/null +++ b/collectors/python.d.plugin/sensors/sensors.chart.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +# Description: sensors netdata python.d plugin +# Author: Pawel Krupa (paulfantom) +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections import defaultdict + +from bases.FrameworkServices.SimpleService import SimpleService +from third_party import lm_sensors as sensors + +ORDER = [ + 'temperature', + 'fan', + 'voltage', + 'current', + 'power', + 'energy', + 'humidity', +] + +# This is a prototype of chart definition which is used to dynamically create self.definitions +CHARTS = { + 'temperature': { + 'options': [None, 'Temperature', 'Celsius', 'temperature', 'sensors.temperature', 'line'], + 'lines': [ + [None, None, 'absolute', 1, 1000] + ] + }, + 'voltage': { + 'options': [None, 'Voltage', 'Volts', 'voltage', 'sensors.voltage', 'line'], + 'lines': [ + [None, None, 'absolute', 1, 1000] + ] + }, + 'current': { + 'options': [None, 'Current', 'Ampere', 'current', 'sensors.current', 'line'], + 'lines': [ + [None, None, 'absolute', 1, 1000] + ] + }, + 'power': { + 'options': [None, 'Power', 'Watt', 'power', 'sensors.power', 'line'], + 'lines': [ + [None, None, 'absolute', 1, 1000] + ] + }, + 'fan': { + 'options': [None, 'Fans speed', 'Rotations/min', 'fans', 'sensors.fan', 'line'], + 'lines': [ + [None, None, 'absolute', 1, 1000] + ] + }, + 'energy': { + 'options': [None, 'Energy', 'Joule', 'energy', 'sensors.energy', 'line'], + 'lines': [ + [None, None, 'incremental', 1, 1000] + ] + }, + 'humidity': { + 'options': [None, 'Humidity', 'Percent', 'humidity', 'sensors.humidity', 'line'], + 'lines': [ + [None, None, 'absolute', 1, 1000] + ] + } +} + +LIMITS = { + 'temperature': [-127, 1000], + 'voltage': [-127, 127], + 'current': [-127, 127], + 'fan': [0, 65535] +} + +TYPE_MAP = { + 0: 'voltage', + 1: 'fan', + 2: 'temperature', + 3: 'power', + 4: 'energy', + 5: 'current', + 6: 'humidity', + # 7: 'max_main', + # 16: 'vid', + # 17: 'intrusion', + # 18: 'max_other', + # 24: 'beep_enable' +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = list() + self.definitions = dict() + self.chips = configuration.get('chips') + self.priority = 60000 + + def get_data(self): + seen, data = dict(), dict() + try: + for chip in sensors.ChipIterator(): + chip_name = sensors.chip_snprintf_name(chip) + seen[chip_name] = defaultdict(list) + + for feat in sensors.FeatureIterator(chip): + if feat.type not in TYPE_MAP: + continue + + feat_type = TYPE_MAP[feat.type] + feat_name = str(feat.name.decode()) + feat_label = sensors.get_label(chip, feat) + feat_limits = LIMITS.get(feat_type) + sub_feat = next(sensors.SubFeatureIterator(chip, feat)) # current value + + if not sub_feat: + continue + + try: + v = sensors.get_value(chip, sub_feat.number) + except sensors.SensorsError: + continue + + if v is None: + continue + + seen[chip_name][feat_type].append((feat_name, feat_label)) + + if feat_limits and (v < feat_limits[0] or v > feat_limits[1]): + continue + + data[chip_name + '_' + feat_name] = int(v * 1000) + + except sensors.SensorsError as error: + self.error(error) + return None + + self.update_sensors_charts(seen) + + return data or None + + def update_sensors_charts(self, seen): + for chip_name, feat in seen.items(): + if self.chips and not any([chip_name.startswith(ex) for ex in self.chips]): + continue + + for feat_type, sub_feat in feat.items(): + if feat_type not in ORDER or feat_type not in CHARTS: + continue + + chart_id = '{}_{}'.format(chip_name, feat_type) + if chart_id in self.charts: + continue + + params = [chart_id] + list(CHARTS[feat_type]['options']) + new_chart = self.charts.add_chart(params) + new_chart.params['priority'] = self.get_chart_priority(feat_type) + + for name, label in sub_feat: + lines = list(CHARTS[feat_type]['lines'][0]) + lines[0] = chip_name + '_' + name + lines[1] = label + new_chart.add_dimension(lines) + + def check(self): + try: + sensors.init() + except sensors.SensorsError as error: + self.error(error) + return False + + self.priority = self.charts.priority + + return bool(self.get_data() and self.charts) + + def get_chart_priority(self, feat_type): + for i, v in enumerate(ORDER): + if v == feat_type: + return self.priority + i + return self.priority diff --git a/collectors/python.d.plugin/sensors/sensors.conf b/collectors/python.d.plugin/sensors/sensors.conf new file mode 100644 index 0000000..d3369ba --- /dev/null +++ b/collectors/python.d.plugin/sensors/sensors.conf @@ -0,0 +1,61 @@ +# netdata python.d.plugin configuration for sensors +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# Limit the number of sensors types. +# Comment the ones you want to disable. +# Also, re-arranging this list controls the order of the charts at the +# netdata dashboard. + +types: + - temperature + - fan + - voltage + - current + - power + - energy + - humidity + +# ---------------------------------------------------------------------- +# Limit the number of sensors chips. +# Uncomment the first line (chips:) and add chip names below it. +# The chip names that start with like that will be matched. +# You can find the chip names using the sensors command. + +#chips: +# - i8k +# - coretemp +# +# chip names can be found using the sensors shell command +# the prefix is matched (anything that starts like that) +# +#---------------------------------------------------------------------- + diff --git a/collectors/python.d.plugin/smartd_log/Makefile.inc b/collectors/python.d.plugin/smartd_log/Makefile.inc new file mode 100644 index 0000000..dc1d0f3 --- /dev/null +++ b/collectors/python.d.plugin/smartd_log/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += smartd_log/smartd_log.chart.py +dist_pythonconfig_DATA += smartd_log/smartd_log.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += smartd_log/README.md smartd_log/Makefile.inc + diff --git a/collectors/python.d.plugin/smartd_log/README.md b/collectors/python.d.plugin/smartd_log/README.md new file mode 100644 index 0000000..eef34ce --- /dev/null +++ b/collectors/python.d.plugin/smartd_log/README.md @@ -0,0 +1,125 @@ + + +# Storage devices monitoring with Netdata + +Monitors `smartd` log files to collect HDD/SSD S.M.A.R.T attributes. + +## Requirements + +- `smartmontools` + +It produces following charts for SCSI devices: + +1. **Read Error Corrected** + +2. **Read Error Uncorrected** + +3. **Write Error Corrected** + +4. **Write Error Uncorrected** + +5. **Verify Error Corrected** + +6. **Verify Error Uncorrected** + +7. **Temperature** + +For ATA devices: + +1. **Read Error Rate** + +2. **Seek Error Rate** + +3. **Soft Read Error Rate** + +4. **Write Error Rate** + +5. **SATA Interface Downshift** + +6. **UDMA CRC Error Count** + +7. **Throughput Performance** + +8. **Seek Time Performance** + +9. **Start/Stop Count** + +10. **Power-On Hours Count** + +11. **Power Cycle Count** + +12. **Unexpected Power Loss** + +13. **Spin-Up Time** + +14. **Spin-up Retries** + +15. **Calibration Retries** + +16. **Temperature** + +17. **Reallocated Sectors Count** + +18. **Reserved Block Count** + +19. **Program Fail Count** + +20. **Erase Fail Count** + +21. **Wear Leveller Worst Case Erase Count** + +22. **Unused Reserved NAND Blocks** + +23. **Reallocation Event Count** + +24. **Current Pending Sector Count** + +25. **Offline Uncorrectable Sector Count** + +26. **Percent Lifetime Used** + +## prerequisite + +`smartd` must be running with `-A` option to write smartd attribute information to files. + +For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`: + +``` +# dump smartd attrs info every 600 seconds +smartd_opts="-A /var/log/smartd/ -i 600" +``` + +You may need to create the smartd directory before smartd will write to it: + +```sh +mkdir -p /var/log/smartd +``` + +Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also for more info on the `-A --attributelog=PREFIX` command. + +`smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files. + +## Configuration + +Edit the `python.d/smartd_log.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/smartd_log.conf +``` + +```yaml +local: + log_path : '/var/log/smartd/' +``` + +If no configuration is given, module will attempt to read log files in `/var/log/smartd/` directory. + +--- + + diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py new file mode 100644 index 0000000..dc4e95d --- /dev/null +++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py @@ -0,0 +1,772 @@ +# -*- coding: utf-8 -*- +# Description: smart netdata python.d module +# Author: ilyam8, vorph1 +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import re +from copy import deepcopy +from time import time + +from bases.FrameworkServices.SimpleService import SimpleService +from bases.collection import read_last_line + +INCREMENTAL = 'incremental' +ABSOLUTE = 'absolute' + +ATA = 'ata' +SCSI = 'scsi' +CSV = '.csv' + +DEF_RESCAN_INTERVAL = 60 +DEF_AGE = 30 +DEF_PATH = '/var/log/smartd' + +ATTR1 = '1' +ATTR2 = '2' +ATTR3 = '3' +ATTR4 = '4' +ATTR5 = '5' +ATTR7 = '7' +ATTR8 = '8' +ATTR9 = '9' +ATTR10 = '10' +ATTR11 = '11' +ATTR12 = '12' +ATTR13 = '13' +ATTR170 = '170' +ATTR171 = '171' +ATTR172 = '172' +ATTR173 = '173' +ATTR174 = '174' +ATTR180 = '180' +ATTR183 = '183' +ATTR190 = '190' +ATTR194 = '194' +ATTR196 = '196' +ATTR197 = '197' +ATTR198 = '198' +ATTR199 = '199' +ATTR202 = '202' +ATTR206 = '206' +ATTR233 = '233' +ATTR249 = '249' +ATTR_READ_ERR_COR = 'read-total-err-corrected' +ATTR_READ_ERR_UNC = 'read-total-unc-errors' +ATTR_WRITE_ERR_COR = 'write-total-err-corrected' +ATTR_WRITE_ERR_UNC = 'write-total-unc-errors' +ATTR_VERIFY_ERR_COR = 'verify-total-err-corrected' +ATTR_VERIFY_ERR_UNC = 'verify-total-unc-errors' +ATTR_TEMPERATURE = 'temperature' + +RE_ATA = re.compile( + '(\d+);' # attribute + '(\d+);' # normalized value + '(\d+)', # raw value + re.X +) + +RE_SCSI = re.compile( + '([a-z-]+);' # attribute + '([0-9.]+)', # raw value + re.X +) + +ORDER = [ + # errors + 'read_error_rate', + 'seek_error_rate', + 'soft_read_error_rate', + 'write_error_rate', + 'read_total_err_corrected', + 'read_total_unc_errors', + 'write_total_err_corrected', + 'write_total_unc_errors', + 'verify_total_err_corrected', + 'verify_total_unc_errors', + # external failure + 'sata_interface_downshift', + 'udma_crc_error_count', + # performance + 'throughput_performance', + 'seek_time_performance', + # power + 'start_stop_count', + 'power_on_hours_count', + 'power_cycle_count', + 'unexpected_power_loss', + # spin + 'spin_up_time', + 'spin_up_retries', + 'calibration_retries', + # temperature + 'airflow_temperature_celsius', + 'temperature_celsius', + # wear + 'reallocated_sectors_count', + 'reserved_block_count', + 'program_fail_count', + 'erase_fail_count', + 'wear_leveller_worst_case_erase_count', + 'unused_reserved_nand_blocks', + 'reallocation_event_count', + 'current_pending_sector_count', + 'offline_uncorrectable_sector_count', + 'percent_lifetime_used', + 'media_wearout_indicator', +] + +CHARTS = { + 'read_error_rate': { + 'options': [None, 'Read Error Rate', 'value', 'errors', 'smartd_log.read_error_rate', 'line'], + 'lines': [], + 'attrs': [ATTR1], + 'algo': ABSOLUTE, + }, + 'seek_error_rate': { + 'options': [None, 'Seek Error Rate', 'value', 'errors', 'smartd_log.seek_error_rate', 'line'], + 'lines': [], + 'attrs': [ATTR7], + 'algo': ABSOLUTE, + }, + 'soft_read_error_rate': { + 'options': [None, 'Soft Read Error Rate', 'errors', 'errors', 'smartd_log.soft_read_error_rate', 'line'], + 'lines': [], + 'attrs': [ATTR13], + 'algo': INCREMENTAL, + }, + 'write_error_rate': { + 'options': [None, 'Write Error Rate', 'value', 'errors', 'smartd_log.write_error_rate', 'line'], + 'lines': [], + 'attrs': [ATTR206], + 'algo': ABSOLUTE, + }, + 'read_total_err_corrected': { + 'options': [None, 'Read Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'], + 'lines': [], + 'attrs': [ATTR_READ_ERR_COR], + 'algo': INCREMENTAL, + }, + 'read_total_unc_errors': { + 'options': [None, 'Read Error Uncorrected', 'errors', 'errors', 'smartd_log.read_total_unc_errors', 'line'], + 'lines': [], + 'attrs': [ATTR_READ_ERR_UNC], + 'algo': INCREMENTAL, + }, + 'write_total_err_corrected': { + 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'], + 'lines': [], + 'attrs': [ATTR_WRITE_ERR_COR], + 'algo': INCREMENTAL, + }, + 'write_total_unc_errors': { + 'options': [None, 'Write Error Uncorrected', 'errors', 'errors', 'smartd_log.write_total_unc_errors', 'line'], + 'lines': [], + 'attrs': [ATTR_WRITE_ERR_UNC], + 'algo': INCREMENTAL, + }, + 'verify_total_err_corrected': { + 'options': [None, 'Verify Error Corrected', 'errors', 'errors', 'smartd_log.verify_total_err_corrected', + 'line'], + 'lines': [], + 'attrs': [ATTR_VERIFY_ERR_COR], + 'algo': INCREMENTAL, + }, + 'verify_total_unc_errors': { + 'options': [None, 'Verify Error Uncorrected', 'errors', 'errors', 'smartd_log.verify_total_unc_errors', 'line'], + 'lines': [], + 'attrs': [ATTR_VERIFY_ERR_UNC], + 'algo': INCREMENTAL, + }, + 'sata_interface_downshift': { + 'options': [None, 'SATA Interface Downshift', 'events', 'external failure', + 'smartd_log.sata_interface_downshift', 'line'], + 'lines': [], + 'attrs': [ATTR183], + 'algo': INCREMENTAL, + }, + 'udma_crc_error_count': { + 'options': [None, 'UDMA CRC Error Count', 'errors', 'external failure', 'smartd_log.udma_crc_error_count', + 'line'], + 'lines': [], + 'attrs': [ATTR199], + 'algo': INCREMENTAL, + }, + 'throughput_performance': { + 'options': [None, 'Throughput Performance', 'value', 'performance', 'smartd_log.throughput_performance', + 'line'], + 'lines': [], + 'attrs': [ATTR2], + 'algo': ABSOLUTE, + }, + 'seek_time_performance': { + 'options': [None, 'Seek Time Performance', 'value', 'performance', 'smartd_log.seek_time_performance', 'line'], + 'lines': [], + 'attrs': [ATTR8], + 'algo': ABSOLUTE, + }, + 'start_stop_count': { + 'options': [None, 'Start/Stop Count', 'events', 'power', 'smartd_log.start_stop_count', 'line'], + 'lines': [], + 'attrs': [ATTR4], + 'algo': ABSOLUTE, + }, + 'power_on_hours_count': { + 'options': [None, 'Power-On Hours Count', 'hours', 'power', 'smartd_log.power_on_hours_count', 'line'], + 'lines': [], + 'attrs': [ATTR9], + 'algo': ABSOLUTE, + }, + 'power_cycle_count': { + 'options': [None, 'Power Cycle Count', 'events', 'power', 'smartd_log.power_cycle_count', 'line'], + 'lines': [], + 'attrs': [ATTR12], + 'algo': ABSOLUTE, + }, + 'unexpected_power_loss': { + 'options': [None, 'Unexpected Power Loss', 'events', 'power', 'smartd_log.unexpected_power_loss', 'line'], + 'lines': [], + 'attrs': [ATTR174], + 'algo': ABSOLUTE, + }, + 'spin_up_time': { + 'options': [None, 'Spin-Up Time', 'ms', 'spin', 'smartd_log.spin_up_time', 'line'], + 'lines': [], + 'attrs': [ATTR3], + 'algo': ABSOLUTE, + }, + 'spin_up_retries': { + 'options': [None, 'Spin-up Retries', 'retries', 'spin', 'smartd_log.spin_up_retries', 'line'], + 'lines': [], + 'attrs': [ATTR10], + 'algo': INCREMENTAL, + }, + 'calibration_retries': { + 'options': [None, 'Calibration Retries', 'retries', 'spin', 'smartd_log.calibration_retries', 'line'], + 'lines': [], + 'attrs': [ATTR11], + 'algo': INCREMENTAL, + }, + 'airflow_temperature_celsius': { + 'options': [None, 'Airflow Temperature Celsius', 'celsius', 'temperature', + 'smartd_log.airflow_temperature_celsius', 'line'], + 'lines': [], + 'attrs': [ATTR190], + 'algo': ABSOLUTE, + }, + 'temperature_celsius': { + 'options': [None, 'Temperature', 'celsius', 'temperature', 'smartd_log.temperature_celsius', 'line'], + 'lines': [], + 'attrs': [ATTR194, ATTR_TEMPERATURE], + 'algo': ABSOLUTE, + }, + 'reallocated_sectors_count': { + 'options': [None, 'Reallocated Sectors Count', 'sectors', 'wear', 'smartd_log.reallocated_sectors_count', + 'line'], + 'lines': [], + 'attrs': [ATTR5], + 'algo': ABSOLUTE, + }, + 'reserved_block_count': { + 'options': [None, 'Reserved Block Count', 'percentage', 'wear', 'smartd_log.reserved_block_count', 'line'], + 'lines': [], + 'attrs': [ATTR170], + 'algo': ABSOLUTE, + }, + 'program_fail_count': { + 'options': [None, 'Program Fail Count', 'errors', 'wear', 'smartd_log.program_fail_count', 'line'], + 'lines': [], + 'attrs': [ATTR171], + 'algo': INCREMENTAL, + }, + 'erase_fail_count': { + 'options': [None, 'Erase Fail Count', 'failures', 'wear', 'smartd_log.erase_fail_count', 'line'], + 'lines': [], + 'attrs': [ATTR172], + 'algo': INCREMENTAL, + }, + 'wear_leveller_worst_case_erase_count': { + 'options': [None, 'Wear Leveller Worst Case Erase Count', 'erases', 'wear', + 'smartd_log.wear_leveller_worst_case_erase_count', 'line'], + 'lines': [], + 'attrs': [ATTR173], + 'algo': ABSOLUTE, + }, + 'unused_reserved_nand_blocks': { + 'options': [None, 'Unused Reserved NAND Blocks', 'blocks', 'wear', 'smartd_log.unused_reserved_nand_blocks', + 'line'], + 'lines': [], + 'attrs': [ATTR180], + 'algo': ABSOLUTE, + }, + 'reallocation_event_count': { + 'options': [None, 'Reallocation Event Count', 'events', 'wear', 'smartd_log.reallocation_event_count', 'line'], + 'lines': [], + 'attrs': [ATTR196], + 'algo': INCREMENTAL, + }, + 'current_pending_sector_count': { + 'options': [None, 'Current Pending Sector Count', 'sectors', 'wear', 'smartd_log.current_pending_sector_count', + 'line'], + 'lines': [], + 'attrs': [ATTR197], + 'algo': ABSOLUTE, + }, + 'offline_uncorrectable_sector_count': { + 'options': [None, 'Offline Uncorrectable Sector Count', 'sectors', 'wear', + 'smartd_log.offline_uncorrectable_sector_count', 'line'], + 'lines': [], + 'attrs': [ATTR198], + 'algo': ABSOLUTE, + + }, + 'percent_lifetime_used': { + 'options': [None, 'Percent Lifetime Used', 'percentage', 'wear', 'smartd_log.percent_lifetime_used', 'line'], + 'lines': [], + 'attrs': [ATTR202], + 'algo': ABSOLUTE, + }, + 'media_wearout_indicator': { + 'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'], + 'lines': [], + 'attrs': [ATTR233], + 'algo': ABSOLUTE, + }, + 'nand_writes_1gib': { + 'options': [None, 'NAND Writes', 'GiB', 'wear', 'smartd_log.nand_writes_1gib', 'line'], + 'lines': [], + 'attrs': [ATTR249], + 'algo': ABSOLUTE, + }, +} + +# NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin +# C code: +# https://github.com/smartmontools/smartmontools/blob/master/smartmontools/atacmds.cpp#L2051 +# +# Calling 'parse_temp' on the raw value will return a 4-tuple, containing +# * temperature +# * minimum +# * maximum +# * over-temperature count +# substituting None for values it can't decode. +# +# Example: +# >>> parse_temp(42952491042) +# >>> (34, 10, 43, None) +# +# +# def check_temp_word(i): +# if i <= 0x7F: +# return 0x11 +# elif i <= 0xFF: +# return 0x01 +# elif 0xFF80 <= i: +# return 0x10 +# return 0x00 +# +# +# def check_temp_range(t, b0, b1): +# if b0 > b1: +# t0, t1 = b1, b0 +# else: +# t0, t1 = b0, b1 +# +# if all([ +# -60 <= t0, +# t0 <= t, +# t <= t1, +# t1 <= 120, +# not (t0 == -1 and t1 <= 0) +# ]): +# return t0, t1 +# return None, None +# +# +# def parse_temp(raw): +# byte = list() +# word = list() +# for i in range(0, 6): +# byte.append(0xFF & (raw >> (i * 8))) +# for i in range(0, 3): +# word.append(0xFFFF & (raw >> (i * 16))) +# +# ctwd = check_temp_word(word[0]) +# +# if not word[2]: +# if ctwd and not word[1]: +# # byte[0] is temp, no other data +# return byte[0], None, None, None +# +# if ctwd and all(check_temp_range(byte[0], byte[2], byte[3])): +# # byte[0] is temp, byte[2] is max or min, byte[3] is min or max +# trange = check_temp_range(byte[0], byte[2], byte[3]) +# return byte[0], trange[0], trange[1], None +# +# if ctwd and all(check_temp_range(byte[0], byte[1], byte[2])): +# # byte[0] is temp, byte[1] is max or min, byte[2] is min or max +# trange = check_temp_range(byte[0], byte[1], byte[2]) +# return byte[0], trange[0], trange[1], None +# +# return None, None, None, None +# +# if ctwd: +# if all( +# [ +# ctwd & check_temp_word(word[1]) & check_temp_word(word[2]) != 0x00, +# all(check_temp_range(byte[0], byte[2], byte[4])), +# ] +# ): +# # byte[0] is temp, byte[2] is max or min, byte[4] is min or max +# trange = check_temp_range(byte[0], byte[2], byte[4]) +# return byte[0], trange[0], trange[1], None +# else: +# trange = check_temp_range(byte[0], byte[2], byte[3]) +# if word[2] < 0x7FFF and all(trange) and trange[1] >= 40: +# # byte[0] is temp, byte[2] is max or min, byte[3] is min or max, word[2] is overtemp count +# return byte[0], trange[0], trange[1], word[2] +# # no data +# return None, None, None, None + + +CHARTED_ATTRS = dict((attr, k) for k, v in CHARTS.items() for attr in v['attrs']) + + +class BaseAtaSmartAttribute: + def __init__(self, name, normalized_value, raw_value): + self.name = name + self.normalized_value = normalized_value + self.raw_value = raw_value + + def value(self): + raise NotImplementedError + + +class AtaRaw(BaseAtaSmartAttribute): + def value(self): + return self.raw_value + + +class AtaNormalized(BaseAtaSmartAttribute): + def value(self): + return self.normalized_value + + +class Ata3(BaseAtaSmartAttribute): + def value(self): + value = int(self.raw_value) + # https://github.com/netdata/netdata/issues/5919 + # + # 3;151;38684000679; + # 423 (Average 447) + # 38684000679 & 0xFFF -> 423 + # (38684000679 & 0xFFF0000) >> 16 -> 447 + if value > 1e6: + return value & 0xFFF + return value + + +class Ata9(BaseAtaSmartAttribute): + def value(self): + value = int(self.raw_value) + if value > 1e6: + return value & 0xFFFF + return value + + +class Ata190(BaseAtaSmartAttribute): + def value(self): + return 100 - int(self.normalized_value) + + +class Ata194(BaseAtaSmartAttribute): + # https://github.com/netdata/netdata/issues/3041 + # https://github.com/netdata/netdata/issues/5919 + # + # The low byte is the current temperature, the third lowest is the maximum, and the fifth lowest is the minimum + def value(self): + value = int(self.raw_value) + if value > 1e6: + return value & 0xFF + return min(int(self.normalized_value), int(self.raw_value)) + + +class BaseSCSISmartAttribute: + def __init__(self, name, raw_value): + self.name = name + self.raw_value = raw_value + + def value(self): + raise NotImplementedError + + +class SCSIRaw(BaseSCSISmartAttribute): + def value(self): + return self.raw_value + + +def ata_attribute_factory(value): + name = value[0] + + if name == ATTR3: + return Ata3(*value) + elif name == ATTR9: + return Ata9(*value) + elif name == ATTR190: + return Ata190(*value) + elif name == ATTR194: + return Ata194(*value) + elif name in [ + ATTR1, + ATTR7, + ATTR202, + ATTR206, + ATTR233, + ]: + return AtaNormalized(*value) + + return AtaRaw(*value) + + +def scsi_attribute_factory(value): + return SCSIRaw(*value) + + +def attribute_factory(value): + name = value[0] + if name.isdigit(): + return ata_attribute_factory(value) + return scsi_attribute_factory(value) + + +def handle_error(*errors): + def on_method(method): + def on_call(*args): + try: + return method(*args) + except errors: + return None + + return on_call + + return on_method + + +class DiskLogFile: + def __init__(self, full_path): + self.path = full_path + self.size = os.path.getsize(full_path) + + @handle_error(OSError) + def is_changed(self): + return self.size != os.path.getsize(self.path) + + @handle_error(OSError) + def is_active(self, current_time, limit): + return (current_time - os.path.getmtime(self.path)) / 60 < limit + + @handle_error(OSError) + def read(self): + self.size = os.path.getsize(self.path) + return read_last_line(self.path) + + +class BaseDisk: + def __init__(self, name, log_file): + self.raw_name = name + self.name = re.sub(r'_+', '_', name) + self.log_file = log_file + self.attrs = list() + self.alive = True + self.charted = False + + def __eq__(self, other): + if isinstance(other, BaseDisk): + return self.raw_name == other.raw_name + return self.raw_name == other + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def parser(self, data): + raise NotImplementedError + + @handle_error(TypeError) + def populate_attrs(self): + self.attrs = list() + line = self.log_file.read() + for value in self.parser(line): + self.attrs.append(attribute_factory(value)) + + return len(self.attrs) + + def data(self): + data = dict() + for attr in self.attrs: + data['{0}_{1}'.format(self.name, attr.name)] = attr.value() + return data + + +class ATADisk(BaseDisk): + def parser(self, data): + return RE_ATA.findall(data) + + +class SCSIDisk(BaseDisk): + def parser(self, data): + return RE_SCSI.findall(data) + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.log_path = configuration.get('log_path', DEF_PATH) + self.age = configuration.get('age', DEF_AGE) + self.exclude = configuration.get('exclude_disks', str()).split() + self.disks = list() + self.runs = 0 + self.do_force_rescan = False + + def check(self): + return self.scan() > 0 + + def get_data(self): + self.runs += 1 + + if self.do_force_rescan or self.runs % DEF_RESCAN_INTERVAL == 0: + self.cleanup() + self.scan() + self.do_force_rescan = False + + data = dict() + + for disk in self.disks: + if not disk.alive: + continue + + if not disk.charted: + self.add_disk_to_charts(disk) + + changed = disk.log_file.is_changed() + + if changed is None: + disk.alive = False + self.do_force_rescan = True + continue + + if changed and disk.populate_attrs() is None: + disk.alive = False + self.do_force_rescan = True + continue + + data.update(disk.data()) + + return data + + def cleanup(self): + current_time = time() + for disk in self.disks[:]: + if any( + [ + not disk.alive, + not disk.log_file.is_active(current_time, self.age), + ] + ): + self.disks.remove(disk.raw_name) + self.remove_disk_from_charts(disk) + + def scan(self): + self.debug('scanning {0}'.format(self.log_path)) + current_time = time() + + for full_name in os.listdir(self.log_path): + disk = self.create_disk_from_file(full_name, current_time) + if not disk: + continue + self.disks.append(disk) + + return len(self.disks) + + def create_disk_from_file(self, full_name, current_time): + if not full_name.endswith(CSV): + self.debug('skipping {0}: not a csv file'.format(full_name)) + return None + + name = os.path.basename(full_name).split('.')[-3] + path = os.path.join(self.log_path, full_name) + + if name in self.disks: + self.debug('skipping {0}: already in disks'.format(full_name)) + return None + + if [p for p in self.exclude if p in name]: + self.debug('skipping {0}: filtered by `exclude` option'.format(full_name)) + return None + + if not os.access(path, os.R_OK): + self.debug('skipping {0}: not readable'.format(full_name)) + return None + + if os.path.getsize(path) == 0: + self.debug('skipping {0}: zero size'.format(full_name)) + return None + + if (current_time - os.path.getmtime(path)) / 60 > self.age: + self.debug('skipping {0}: haven\'t been updated for last {1} minutes'.format(full_name, self.age)) + return None + + if ATA in full_name: + disk = ATADisk(name, DiskLogFile(path)) + elif SCSI in full_name: + disk = SCSIDisk(name, DiskLogFile(path)) + else: + self.debug('skipping {0}: unknown type'.format(full_name)) + return None + + disk.populate_attrs() + if not disk.attrs: + self.error('skipping {0}: parsing failed'.format(full_name)) + return None + + self.debug('added {0}'.format(full_name)) + return disk + + def add_disk_to_charts(self, disk): + if len(self.charts) == 0 or disk.charted: + return + disk.charted = True + + for attr in disk.attrs: + chart_id = CHARTED_ATTRS.get(attr.name) + + if not chart_id or chart_id not in self.charts: + continue + + chart = self.charts[chart_id] + dim = [ + '{0}_{1}'.format(disk.name, attr.name), + disk.name, + CHARTS[chart_id]['algo'], + ] + + if dim[0] in self.charts[chart_id].dimensions: + chart.hide_dimension(dim[0], reverse=True) + else: + chart.add_dimension(dim) + + def remove_disk_from_charts(self, disk): + if len(self.charts) == 0 or not disk.charted: + return + + for attr in disk.attrs: + chart_id = CHARTED_ATTRS.get(attr.name) + + if not chart_id or chart_id not in self.charts: + continue + + self.charts[chart_id].del_dimension('{0}_{1}'.format(disk.name, attr.name)) diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.conf b/collectors/python.d.plugin/smartd_log/smartd_log.conf new file mode 100644 index 0000000..6c01d95 --- /dev/null +++ b/collectors/python.d.plugin/smartd_log/smartd_log.conf @@ -0,0 +1,75 @@ +# netdata python.d.plugin configuration for smartd log +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, smartd_log also supports the following: +# +# log_path: '/path/to/smartd_logs' # path to smartd log files. Default is /var/log/smartd +# exclude_disks: 'PATTERN1 PATTERN2' # space separated patterns. If the pattern is in the drive name, the module will not collect data for it. +# +# ---------------------------------------------------------------------- + +custom: + name: smartd_log + log_path: '/var/log/smartd/' + +debian: + name: smartd_log + log_path: '/var/lib/smartmontools/' diff --git a/collectors/python.d.plugin/spigotmc/Makefile.inc b/collectors/python.d.plugin/spigotmc/Makefile.inc new file mode 100644 index 0000000..f9fa8b6 --- /dev/null +++ b/collectors/python.d.plugin/spigotmc/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += spigotmc/spigotmc.chart.py +dist_pythonconfig_DATA += spigotmc/spigotmc.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += spigotmc/README.md spigotmc/Makefile.inc + diff --git a/collectors/python.d.plugin/spigotmc/README.md b/collectors/python.d.plugin/spigotmc/README.md new file mode 100644 index 0000000..0648318 --- /dev/null +++ b/collectors/python.d.plugin/spigotmc/README.md @@ -0,0 +1,38 @@ + + +# SpigotMC monitoring with Netdata + +Performs basic monitoring for Spigot Minecraft servers. + +It provides two charts, one tracking server-side ticks-per-second in +1, 5 and 15 minute averages, and one tracking the number of currently +active users. + +This is not compatible with Spigot plugins which change the format of +the data returned by the `tps` or `list` console commands. + +## Configuration + +Edit the `python.d/spigotmc.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/spigotmc.conf +``` + +```yaml +host: localhost +port: 25575 +password: pass +``` + +By default, a connection to port 25575 on the local system is attempted with an empty password. + +--- + + diff --git a/collectors/python.d.plugin/spigotmc/spigotmc.chart.py b/collectors/python.d.plugin/spigotmc/spigotmc.chart.py new file mode 100644 index 0000000..81370fb --- /dev/null +++ b/collectors/python.d.plugin/spigotmc/spigotmc.chart.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# Description: spigotmc netdata python.d module +# Author: Austin S. Hemmelgarn (Ferroin) +# SPDX-License-Identifier: GPL-3.0-or-later + +import platform +import re +import socket + +from bases.FrameworkServices.SimpleService import SimpleService +from third_party import mcrcon + +# Update only every 5 seconds because collection takes in excess of +# 100ms sometimes, and most people won't care about second-by-second data. +update_every = 5 + +PRECISION = 100 + +COMMAND_TPS = 'tps' +COMMAND_LIST = 'list' +COMMAND_ONLINE = 'online' + +ORDER = [ + 'tps', + 'mem', + 'users', +] + +CHARTS = { + 'tps': { + 'options': [None, 'Spigot Ticks Per Second', 'ticks', 'spigotmc', 'spigotmc.tps', 'line'], + 'lines': [ + ['tps1', '1 Minute Average', 'absolute', 1, PRECISION], + ['tps5', '5 Minute Average', 'absolute', 1, PRECISION], + ['tps15', '15 Minute Average', 'absolute', 1, PRECISION] + ] + }, + 'users': { + 'options': [None, 'Minecraft Users', 'users', 'spigotmc', 'spigotmc.users', 'area'], + 'lines': [ + ['users', 'Users', 'absolute', 1, 1] + ] + }, + 'mem': { + 'options': [None, 'Minecraft Memory Usage', 'MiB', 'spigotmc', 'spigotmc.mem', 'line'], + 'lines': [ + ['mem_used', 'used', 'absolute', 1, 1], + ['mem_alloc', 'allocated', 'absolute', 1, 1], + ['mem_max', 'max', 'absolute', 1, 1] + ] + } +} + +_TPS_REGEX = re.compile( + # Examples: + # §6TPS from last 1m, 5m, 15m: §a*20.0, §a*20.0, §a*20.0 + # §6Current Memory Usage: §a936/65536 mb (Max: 65536 mb) + r'^.*: .*?' # Message lead-in + r'(\d{1,2}.\d+), .*?' # 1-minute TPS value + r'(\d{1,2}.\d+), .*?' # 5-minute TPS value + r'(\d{1,2}\.\d+).*?' # 15-minute TPS value + r'(\s.*?(\d+)\/(\d+).*?: (\d+).*)?', # Current Memory Usage / Total Memory (Max Memory) + re.MULTILINE +) +_LIST_REGEX = re.compile( + # Examples: + # There are 4 of a max 50 players online: player1, player2, player3, player4 + # §6There are §c4§6 out of maximum §c50§6 players online. + # §6There are §c3§6/§c1§6 out of maximum §c50§6 players online. + # §6当前有 §c4§6 个玩家在线,最大在线人数为 §c50§6 个玩家. + # §c4§6 人のプレイヤーが接続中です。最大接続可能人数\:§c 50 + r'[^§](\d+)(?:.*?(?=/).*?[^§](\d+))?', # Current user count. + re.X +) + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.host = self.configuration.get('host', 'localhost') + self.port = self.configuration.get('port', 25575) + self.password = self.configuration.get('password', '') + self.console = mcrcon.MCRcon() + self.alive = True + + def check(self): + if platform.system() != 'Linux': + self.error('Only supported on Linux.') + return False + try: + self.connect() + except (mcrcon.MCRconException, socket.error) as err: + self.error('Error connecting.') + self.error(repr(err)) + return False + + return self._get_data() + + def connect(self): + self.console.connect(self.host, self.port, self.password) + + def reconnect(self): + self.error('try reconnect.') + try: + try: + self.console.disconnect() + except mcrcon.MCRconException: + pass + self.console.connect(self.host, self.port, self.password) + self.alive = True + except (mcrcon.MCRconException, socket.error) as err: + self.error('Error connecting.') + self.error(repr(err)) + return False + return True + + def is_alive(self): + if any( + [ + not self.alive, + self.console.socket.getsockopt(socket.IPPROTO_TCP, socket.TCP_INFO, 0) != 1 + ] + ): + return self.reconnect() + return True + + def _get_data(self): + if not self.is_alive(): + return None + + data = {} + + try: + raw = self.console.command(COMMAND_TPS) + match = _TPS_REGEX.match(raw) + if match: + data['tps1'] = int(float(match.group(1)) * PRECISION) + data['tps5'] = int(float(match.group(2)) * PRECISION) + data['tps15'] = int(float(match.group(3)) * PRECISION) + if match.group(4): + data['mem_used'] = int(match.group(5)) + data['mem_alloc'] = int(match.group(6)) + data['mem_max'] = int(match.group(7)) + else: + self.error('Unable to process TPS values.') + if not raw: + self.error( + "'{0}' command returned no value, make sure you set correct password".format(COMMAND_TPS)) + except mcrcon.MCRconException: + self.error('Unable to fetch TPS values.') + except socket.error: + self.error('Connection is dead.') + self.alive = False + return None + + try: + raw = self.console.command(COMMAND_LIST) + match = _LIST_REGEX.search(raw) + if not match: + raw = self.console.command(COMMAND_ONLINE) + match = _LIST_REGEX.search(raw) + if match: + users = int(match.group(1)) + hidden_users = match.group(2) + if hidden_users: + hidden_users = int(hidden_users) + else: + hidden_users = 0 + data['users'] = users + hidden_users + else: + if not raw: + self.error("'{0}' and '{1}' commands returned no value, make sure you set correct password".format( + COMMAND_LIST, COMMAND_ONLINE)) + self.error('Unable to process user counts.') + except mcrcon.MCRconException: + self.error('Unable to fetch user counts.') + except socket.error: + self.error('Connection is dead.') + self.alive = False + return None + + return data diff --git a/collectors/python.d.plugin/spigotmc/spigotmc.conf b/collectors/python.d.plugin/spigotmc/spigotmc.conf new file mode 100644 index 0000000..f0064ea --- /dev/null +++ b/collectors/python.d.plugin/spigotmc/spigotmc.conf @@ -0,0 +1,66 @@ +# netdata python.d.plugin configuration for spigotmc +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# In addition to the above, spigotmc supports the following: +# +# host: localhost # The host to connect to. Defaults to the local system. +# port: 25575 # The port the remote console is listening on. +# password: '' # The remote console password. Most be set correctly. diff --git a/collectors/python.d.plugin/springboot/Makefile.inc b/collectors/python.d.plugin/springboot/Makefile.inc new file mode 100644 index 0000000..06775f9 --- /dev/null +++ b/collectors/python.d.plugin/springboot/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += springboot/springboot.chart.py +dist_pythonconfig_DATA += springboot/springboot.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += springboot/README.md springboot/Makefile.inc + diff --git a/collectors/python.d.plugin/springboot/README.md b/collectors/python.d.plugin/springboot/README.md new file mode 100644 index 0000000..cdbc9a9 --- /dev/null +++ b/collectors/python.d.plugin/springboot/README.md @@ -0,0 +1,145 @@ + + +# Java Spring Boot 2 application monitoring with Netdata + +Monitors one or more Java Spring-boot applications depending on configuration. +Netdata can be used to monitor running Java [Spring Boot](https://spring.io/) applications that expose their metrics with the use of the **Spring Boot Actuator** included in Spring Boot library. + +## Configuration + +The Spring Boot Actuator exposes these metrics over HTTP and is very easy to use: + +- add `org.springframework.boot:spring-boot-starter-actuator` to your application dependencies +- set `endpoints.metrics.sensitive=false` in your `application.properties` + +You can create custom Metrics by add and inject a PublicMetrics in your application. +This is a example to add custom metrics: + +```java +package com.example; + +import org.springframework.boot.actuate.endpoint.PublicMetrics; +import org.springframework.boot.actuate.metrics.Metric; +import org.springframework.stereotype.Service; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryPoolMXBean; +import java.util.ArrayList; +import java.util.Collection; + +@Service +public class HeapPoolMetrics implements PublicMetrics { + + private static final String PREFIX = "mempool."; + private static final String KEY_EDEN = PREFIX + "eden"; + private static final String KEY_SURVIVOR = PREFIX + "survivor"; + private static final String KEY_TENURED = PREFIX + "tenured"; + + @Override + public Collection> metrics() { + Collection> result = new ArrayList<>(4); + for (MemoryPoolMXBean mem : ManagementFactory.getMemoryPoolMXBeans()) { + String poolName = mem.getName(); + String name = null; + if (poolName.indexOf("Eden Space") != -1) { + name = KEY_EDEN; + } else if (poolName.indexOf("Survivor Space") != -1) { + name = KEY_SURVIVOR; + } else if (poolName.indexOf("Tenured Gen") != -1 || poolName.indexOf("Old Gen") != -1) { + name = KEY_TENURED; + } + + if (name != null) { + result.add(newMemoryMetric(name, mem.getUsage().getMax())); + result.add(newMemoryMetric(name + ".init", mem.getUsage().getInit())); + result.add(newMemoryMetric(name + ".committed", mem.getUsage().getCommitted())); + result.add(newMemoryMetric(name + ".used", mem.getUsage().getUsed())); + } + } + return result; + } + + private Metric newMemoryMetric(String name, long bytes) { + return new Metric<>(name, bytes / 1024); + } +} +``` + +Please refer [Spring Boot Actuator: Production-ready Features](https://docs.spring.io/spring-boot/docs/current/reference/html/production-ready-features.html#production-ready) and [81. Actuator - Part IX. ‘How-to’ guides](https://docs.spring.io/spring-boot/docs/current/reference/html/howto.html#howto-actuator) for more information. + +## Charts + +1. **Response Codes** in requests/s + + - 1xx + - 2xx + - 3xx + - 4xx + - 5xx + - others + +2. **Threads** + + - daemon + - total + +3. **GC Time** in milliseconds and **GC Operations** in operations/s + + - Copy + - MarkSweep + - ... + +4. **Heap Memory Usage** in KB + + - used + - committed + +## Usage + +Edit the `python.d/springboot.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/springboot.conf +``` + +This module defines some common charts, and you can add custom charts by change the configurations. + +The configuration format is like: + +```yaml +: + name: '' + url: '' # ex. http://localhost:8080/metrics + user: '' # optional + pass: '' # optional + defaults: + []: true|false + extras: + - id: '' + options: + title: '***' + units: '***' + family: '***' + context: 'springboot.***' + charttype: 'stacked' | 'area' | 'line' + lines: + - { dimension: 'myapp_ok', name: 'ok', algorithm: 'absolute', multiplier: 1, divisor: 1} # it shows "myapp.ok" metrics + - { dimension: 'myapp_ng', name: 'ng', algorithm: 'absolute', multiplier: 1, divisor: 1} # it shows "myapp.ng" metrics +``` + +By default, it creates `response_code`, `threads`, `gc_time`, `gc_ope` abd `heap` charts. +You can disable the default charts by set `defaults.: false`. + +The dimension name of extras charts should replace `.` to `_`. + +Please check +[springboot.conf](https://raw.githubusercontent.com/netdata/netdata/master/collectors/python.d.plugin/springboot/springboot.conf) +for more examples. + + diff --git a/collectors/python.d.plugin/springboot/springboot.chart.py b/collectors/python.d.plugin/springboot/springboot.chart.py new file mode 100644 index 0000000..dbe11d6 --- /dev/null +++ b/collectors/python.d.plugin/springboot/springboot.chart.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# Description: tomcat netdata python.d module +# Author: Wing924 +# SPDX-License-Identifier: GPL-3.0-or-later + +import json + +from bases.FrameworkServices.UrlService import UrlService + +DEFAULT_ORDER = [ + 'response_code', + 'threads', + 'gc_time', + 'gc_ope', + 'heap', +] + +DEFAULT_CHARTS = { + 'response_code': { + 'options': [None, "Response Codes", "requests/s", "response", "springboot.response_code", "stacked"], + 'lines': [ + ["resp_other", 'Other', 'incremental'], + ["resp_1xx", '1xx', 'incremental'], + ["resp_2xx", '2xx', 'incremental'], + ["resp_3xx", '3xx', 'incremental'], + ["resp_4xx", '4xx', 'incremental'], + ["resp_5xx", '5xx', 'incremental'], + ] + }, + 'threads': { + 'options': [None, "Threads", "current threads", "threads", "springboot.threads", "area"], + 'lines': [ + ["threads_daemon", 'daemon', 'absolute'], + ["threads", 'total', 'absolute'], + ] + }, + 'gc_time': { + 'options': [None, "GC Time", "milliseconds", "garbage collection", "springboot.gc_time", "stacked"], + 'lines': [ + ["gc_copy_time", 'Copy', 'incremental'], + ["gc_marksweepcompact_time", 'MarkSweepCompact', 'incremental'], + ["gc_parnew_time", 'ParNew', 'incremental'], + ["gc_concurrentmarksweep_time", 'ConcurrentMarkSweep', 'incremental'], + ["gc_ps_scavenge_time", 'PS Scavenge', 'incremental'], + ["gc_ps_marksweep_time", 'PS MarkSweep', 'incremental'], + ["gc_g1_young_generation_time", 'G1 Young Generation', 'incremental'], + ["gc_g1_old_generation_time", 'G1 Old Generation', 'incremental'], + ] + }, + 'gc_ope': { + 'options': [None, "GC Operations", "operations/s", "garbage collection", "springboot.gc_ope", "stacked"], + 'lines': [ + ["gc_copy_count", 'Copy', 'incremental'], + ["gc_marksweepcompact_count", 'MarkSweepCompact', 'incremental'], + ["gc_parnew_count", 'ParNew', 'incremental'], + ["gc_concurrentmarksweep_count", 'ConcurrentMarkSweep', 'incremental'], + ["gc_ps_scavenge_count", 'PS Scavenge', 'incremental'], + ["gc_ps_marksweep_count", 'PS MarkSweep', 'incremental'], + ["gc_g1_young_generation_count", 'G1 Young Generation', 'incremental'], + ["gc_g1_old_generation_count", 'G1 Old Generation', 'incremental'], + ] + }, + 'heap': { + 'options': [None, "Heap Memory Usage", "KiB", "heap memory", "springboot.heap", "area"], + 'lines': [ + ["heap_committed", 'committed', "absolute"], + ["heap_used", 'used', "absolute"], + ] + } +} + + +class ExtraChartError(ValueError): + pass + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.url = self.configuration.get('url', "http://localhost:8080/metrics") + self._setup_charts() + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + raw_data = self._get_raw_data() + if not raw_data: + return None + + try: + data = json.loads(raw_data) + except ValueError: + self.debug('%s is not a valid JSON page' % self.url) + return None + + result = { + 'resp_1xx': 0, + 'resp_2xx': 0, + 'resp_3xx': 0, + 'resp_4xx': 0, + 'resp_5xx': 0, + 'resp_other': 0, + } + + for key, value in data.iteritems(): + if 'counter.status.' in key: + status_type = key[15:16] + 'xx' + if status_type[0] not in '12345': + status_type = 'other' + result['resp_' + status_type] += value + else: + result[key.replace('.', '_')] = value + + return result or None + + def _setup_charts(self): + self.order = [] + self.definitions = {} + defaults = self.configuration.get('defaults', {}) + + for chart in DEFAULT_ORDER: + if defaults.get(chart, True): + self.order.append(chart) + self.definitions[chart] = DEFAULT_CHARTS[chart] + + for extra in self.configuration.get('extras', []): + self._add_extra_chart(extra) + self.order.append(extra['id']) + + def _add_extra_chart(self, chart): + chart_id = chart.get('id', None) or self.die('id is not defined in extra chart') + options = chart.get('options', None) or self.die('option is not defined in extra chart: %s' % chart_id) + lines = chart.get('lines', None) or self.die('lines is not defined in extra chart: %s' % chart_id) + + title = options.get('title', None) or self.die('title is missing: %s' % chart_id) + units = options.get('units', None) or self.die('units is missing: %s' % chart_id) + family = options.get('family', title) + context = options.get('context', 'springboot.' + title) + charttype = options.get('charttype', 'line') + + result = { + 'options': [None, title, units, family, context, charttype], + 'lines': [], + } + + for line in lines: + dimension = line.get('dimension', None) or self.die('dimension is missing: %s' % chart_id) + name = line.get('name', dimension) + algorithm = line.get('algorithm', 'absolute') + multiplier = line.get('multiplier', 1) + divisor = line.get('divisor', 1) + result['lines'].append([dimension, name, algorithm, multiplier, divisor]) + + self.definitions[chart_id] = result + + @staticmethod + def die(error_message): + raise ExtraChartError(error_message) diff --git a/collectors/python.d.plugin/springboot/springboot.conf b/collectors/python.d.plugin/springboot/springboot.conf new file mode 100644 index 0000000..0cb369c --- /dev/null +++ b/collectors/python.d.plugin/springboot/springboot.conf @@ -0,0 +1,118 @@ +# netdata python.d.plugin configuration for springboot +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, this plugin also supports the following: +# +# url: 'http://127.0.0.1/metrics' # the URL of the spring boot actuator metrics +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# defaults: +# [chart_id]: true | false # enables/disables default charts, defaults true. +# extras: {} # defines extra charts to monitor, please see the example below +# - id: [chart_id] +# options: {} +# lines: [] +# +# If all defaults is disabled and no extra charts are defined, this module will disable itself, as it has no data to +# collect. +# +# Configuration example +# --------------------- +# example: +# name: 'example' +# url: 'http://localhost:8080/metrics' +# defaults: +# response_code: true +# threads: true +# gc_time: true +# gc_ope: true +# heap: false +# extras: +# - id: 'heap' +# options: { title: 'Heap Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap', charttype: 'stacked' } +# lines: +# - { dimension: 'mem_free', name: 'free'} +# - { dimension: 'mempool_eden_used', name: 'eden', algorithm: 'absolute', multiplier: 1, divisor: 1} +# - { dimension: 'mempool_survivor_used', name: 'survivor', algorithm: 'absolute', multiplier: 1, divisor: 1} +# - { dimension: 'mempool_tenured_used', name: 'tenured', algorithm: 'absolute', multiplier: 1, divisor: 1} +# - id: 'heap_eden' +# options: { title: 'Eden Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_eden', charttype: 'area' } +# lines: +# - { dimension: 'mempool_eden_used', name: 'used'} +# - { dimension: 'mempool_eden_committed', name: 'committed'} +# - id: 'heap_survivor' +# options: { title: 'Survivor Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_survivor', charttype: 'area' } +# lines: +# - { dimension: 'mempool_survivor_used', name: 'used'} +# - { dimension: 'mempool_survivor_committed', name: 'committed'} +# - id: 'heap_tenured' +# options: { title: 'Tenured Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_tenured', charttype: 'area' } +# lines: +# - { dimension: 'mempool_tenured_used', name: 'used'} +# - { dimension: 'mempool_tenured_committed', name: 'committed'} + + +local: + name: 'local' + url: 'http://localhost:8080/metrics' + +local_ip: + name: 'local' + url: 'http://127.0.0.1:8080/metrics' diff --git a/collectors/python.d.plugin/squid/Makefile.inc b/collectors/python.d.plugin/squid/Makefile.inc new file mode 100644 index 0000000..76ecff8 --- /dev/null +++ b/collectors/python.d.plugin/squid/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += squid/squid.chart.py +dist_pythonconfig_DATA += squid/squid.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += squid/README.md squid/Makefile.inc + diff --git a/collectors/python.d.plugin/squid/README.md b/collectors/python.d.plugin/squid/README.md new file mode 100644 index 0000000..c29b69a --- /dev/null +++ b/collectors/python.d.plugin/squid/README.md @@ -0,0 +1,58 @@ + + +# Squid monitoring with Netdata + +Monitors one or more squid instances depending on configuration. + +It produces following charts: + +1. **Client Bandwidth** in kilobits/s + + - in + - out + - hits + +2. **Client Requests** in requests/s + + - requests + - hits + - errors + +3. **Server Bandwidth** in kilobits/s + + - in + - out + +4. **Server Requests** in requests/s + + - requests + - errors + +## Configuration + +Edit the `python.d/squid.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/squid.conf +``` + +```yaml +priority : 50000 + +local: + request : 'cache_object://localhost:3128/counters' + host : 'localhost' + port : 3128 +``` + +Without any configuration module will try to autodetect where squid presents its `counters` data + +--- + + diff --git a/collectors/python.d.plugin/squid/squid.chart.py b/collectors/python.d.plugin/squid/squid.chart.py new file mode 100644 index 0000000..bcae2d8 --- /dev/null +++ b/collectors/python.d.plugin/squid/squid.chart.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# Description: squid netdata python.d module +# Author: Pawel Krupa (paulfantom) +# SPDX-License-Identifier: GPL-3.0-or-later + +from bases.FrameworkServices.SocketService import SocketService + +ORDER = [ + 'clients_net', + 'clients_requests', + 'servers_net', + 'servers_requests', +] + +CHARTS = { + 'clients_net': { + 'options': [None, 'Squid Client Bandwidth', 'kilobits/s', 'clients', 'squid.clients_net', 'area'], + 'lines': [ + ['client_http_kbytes_in', 'in', 'incremental', 8, 1], + ['client_http_kbytes_out', 'out', 'incremental', -8, 1], + ['client_http_hit_kbytes_out', 'hits', 'incremental', -8, 1] + ] + }, + 'clients_requests': { + 'options': [None, 'Squid Client Requests', 'requests/s', 'clients', 'squid.clients_requests', 'line'], + 'lines': [ + ['client_http_requests', 'requests', 'incremental'], + ['client_http_hits', 'hits', 'incremental'], + ['client_http_errors', 'errors', 'incremental', -1, 1] + ] + }, + 'servers_net': { + 'options': [None, 'Squid Server Bandwidth', 'kilobits/s', 'servers', 'squid.servers_net', 'area'], + 'lines': [ + ['server_all_kbytes_in', 'in', 'incremental', 8, 1], + ['server_all_kbytes_out', 'out', 'incremental', -8, 1] + ] + }, + 'servers_requests': { + 'options': [None, 'Squid Server Requests', 'requests/s', 'servers', 'squid.servers_requests', 'line'], + 'lines': [ + ['server_all_requests', 'requests', 'incremental'], + ['server_all_errors', 'errors', 'incremental', -1, 1] + ] + } +} + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + SocketService.__init__(self, configuration=configuration, name=name) + self._keep_alive = True + self.request = '' + self.host = 'localhost' + self.port = 3128 + self.order = ORDER + self.definitions = CHARTS + + def _get_data(self): + """ + Get data via http request + :return: dict + """ + response = self._get_raw_data() + + data = dict() + try: + raw = '' + for tmp in response.split('\r\n'): + if tmp.startswith('sample_time'): + raw = tmp + break + + if raw.startswith('<'): + self.error('invalid data received') + return None + + for row in raw.split('\n'): + if row.startswith(('client', 'server.all')): + tmp = row.split('=') + data[tmp[0].replace('.', '_').strip(' ')] = int(tmp[1]) + + except (ValueError, AttributeError, TypeError): + self.error('invalid data received') + return None + + if not data: + self.error('no data received') + return None + return data + + def _check_raw_data(self, data): + header = data[:1024].lower() + + if 'connection: keep-alive' in header: + self._keep_alive = True + else: + self._keep_alive = False + + if data[-7:] == '\r\n0\r\n\r\n' and 'transfer-encoding: chunked' in header: # HTTP/1.1 response + self.debug('received full response from squid') + return True + + self.debug('waiting more data from squid') + return False + + def check(self): + """ + Parse essential configuration, autodetect squid configuration (if needed), and check if data is available + :return: boolean + """ + self._parse_config() + # format request + req = self.request.decode() + if not req.startswith('GET'): + req = 'GET ' + req + if not req.endswith(' HTTP/1.1\r\n\r\n'): + req += ' HTTP/1.1\r\n\r\n' + self.request = req.encode() + if self._get_data() is not None: + return True + else: + return False diff --git a/collectors/python.d.plugin/squid/squid.conf b/collectors/python.d.plugin/squid/squid.conf new file mode 100644 index 0000000..b90a52c --- /dev/null +++ b/collectors/python.d.plugin/squid/squid.conf @@ -0,0 +1,167 @@ +# netdata python.d.plugin configuration for squid +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, squid also supports the following: +# +# host : 'IP or HOSTNAME' # the host to connect to +# port : PORT # the port to connect to +# request: 'URL' # the URL to request from squid +# + +# ---------------------------------------------------------------------- +# SQUID CONFIGURATION +# +# See: +# http://wiki.squid-cache.org/Features/CacheManager +# +# In short, add to your squid configuration these: +# +# http_access allow localhost manager +# http_access deny manager +# +# To remotely monitor a squid: +# +# acl managerAdmin src 192.0.2.1 +# http_access allow localhost manager +# http_access allow managerAdmin manager +# http_access deny manager +# + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +tcp3128old: + name : 'local' + host : 'localhost' + port : 3128 + request : 'cache_object://localhost:3128/counters' + +tcp8080old: + name : 'local' + host : 'localhost' + port : 8080 + request : 'cache_object://localhost:3128/counters' + +tcp3128new: + name : 'local' + host : 'localhost' + port : 3128 + request : '/squid-internal-mgr/counters' + +tcp8080new: + name : 'local' + host : 'localhost' + port : 8080 + request : '/squid-internal-mgr/counters' + +# IPv4 + +tcp3128oldipv4: + name : 'local' + host : '127.0.0.1' + port : 3128 + request : 'cache_object://127.0.0.1:3128/counters' + +tcp8080oldipv4: + name : 'local' + host : '127.0.0.1' + port : 8080 + request : 'cache_object://127.0.0.1:3128/counters' + +tcp3128newipv4: + name : 'local' + host : '127.0.0.1' + port : 3128 + request : '/squid-internal-mgr/counters' + +tcp8080newipv4: + name : 'local' + host : '127.0.0.1' + port : 8080 + request : '/squid-internal-mgr/counters' + +# IPv6 + +tcp3128oldipv6: + name : 'local' + host : '::1' + port : 3128 + request : 'cache_object://[::1]:3128/counters' + +tcp8080oldipv6: + name : 'local' + host : '::1' + port : 8080 + request : 'cache_object://[::1]:3128/counters' + +tcp3128newipv6: + name : 'local' + host : '::1' + port : 3128 + request : '/squid-internal-mgr/counters' + +tcp8080newipv6: + name : 'local' + host : '::1' + port : 8080 + request : '/squid-internal-mgr/counters' + diff --git a/collectors/python.d.plugin/tomcat/Makefile.inc b/collectors/python.d.plugin/tomcat/Makefile.inc new file mode 100644 index 0000000..940a783 --- /dev/null +++ b/collectors/python.d.plugin/tomcat/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += tomcat/tomcat.chart.py +dist_pythonconfig_DATA += tomcat/tomcat.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += tomcat/README.md tomcat/Makefile.inc + diff --git a/collectors/python.d.plugin/tomcat/README.md b/collectors/python.d.plugin/tomcat/README.md new file mode 100644 index 0000000..b7525b8 --- /dev/null +++ b/collectors/python.d.plugin/tomcat/README.md @@ -0,0 +1,53 @@ + + +# Apache Tomcat monitoring with Netdata + +Presents memory utilization of tomcat containers. + +Charts: + +1. **Requests** per second + + - accesses + +2. **Volume** in KB/s + + - volume + +3. **Threads** + + - current + - busy + +4. **JVM Free Memory** in MB + + - jvm + +## Configuration + +Edit the `python.d/tomcat.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/tomcat.conf +``` + +```yaml +localhost: + name : 'local' + url : 'http://127.0.0.1:8080/manager/status?XML=true' + user : 'tomcat_username' + pass : 'secret_tomcat_password' +``` + +Without configuration, module attempts to connect to `http://localhost:8080/manager/status?XML=true`, without any credentials. +So it will probably fail. + +--- + + diff --git a/collectors/python.d.plugin/tomcat/tomcat.chart.py b/collectors/python.d.plugin/tomcat/tomcat.chart.py new file mode 100644 index 0000000..90315f8 --- /dev/null +++ b/collectors/python.d.plugin/tomcat/tomcat.chart.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +# Description: tomcat netdata python.d module +# Author: Pawel Krupa (paulfantom) +# Author: Wei He (Wing924) +# SPDX-License-Identifier: GPL-3.0-or-later + +import re +import xml.etree.ElementTree as ET + +from bases.FrameworkServices.UrlService import UrlService + +MiB = 1 << 20 + +# Regex fix for Tomcat single quote XML attributes +# affecting Tomcat < 8.5.24 & 9.0.2 running with Java > 9 +# cf. https://bz.apache.org/bugzilla/show_bug.cgi?id=61603 +single_quote_regex = re.compile(r"='([^']+)'([^']+)''") + +ORDER = [ + 'accesses', + 'bandwidth', + 'processing_time', + 'threads', + 'jvm', + 'jvm_eden', + 'jvm_survivor', + 'jvm_tenured', +] + +CHARTS = { + 'accesses': { + 'options': [None, 'Requests', 'requests/s', 'statistics', 'tomcat.accesses', 'area'], + 'lines': [ + ['requestCount', 'accesses', 'incremental'], + ['errorCount', 'errors', 'incremental'], + ] + }, + 'bandwidth': { + 'options': [None, 'Bandwidth', 'KiB/s', 'statistics', 'tomcat.bandwidth', 'area'], + 'lines': [ + ['bytesSent', 'sent', 'incremental', 1, 1024], + ['bytesReceived', 'received', 'incremental', 1, 1024], + ] + }, + 'processing_time': { + 'options': [None, 'processing time', 'seconds', 'statistics', 'tomcat.processing_time', 'area'], + 'lines': [ + ['processingTime', 'processing time', 'incremental', 1, 1000] + ] + }, + 'threads': { + 'options': [None, 'Threads', 'current threads', 'statistics', 'tomcat.threads', 'area'], + 'lines': [ + ['currentThreadCount', 'current', 'absolute'], + ['currentThreadsBusy', 'busy', 'absolute'] + ] + }, + 'jvm': { + 'options': [None, 'JVM Memory Pool Usage', 'MiB', 'memory', 'tomcat.jvm', 'stacked'], + 'lines': [ + ['free', 'free', 'absolute', 1, MiB], + ['eden_used', 'eden', 'absolute', 1, MiB], + ['survivor_used', 'survivor', 'absolute', 1, MiB], + ['tenured_used', 'tenured', 'absolute', 1, MiB], + ['code_cache_used', 'code cache', 'absolute', 1, MiB], + ['compressed_used', 'compressed', 'absolute', 1, MiB], + ['metaspace_used', 'metaspace', 'absolute', 1, MiB], + ] + }, + 'jvm_eden': { + 'options': [None, 'Eden Memory Usage', 'MiB', 'memory', 'tomcat.jvm_eden', 'area'], + 'lines': [ + ['eden_used', 'used', 'absolute', 1, MiB], + ['eden_committed', 'committed', 'absolute', 1, MiB], + ['eden_max', 'max', 'absolute', 1, MiB] + ] + }, + 'jvm_survivor': { + 'options': [None, 'Survivor Memory Usage', 'MiB', 'memory', 'tomcat.jvm_survivor', 'area'], + 'lines': [ + ['survivor_used', 'used', 'absolute', 1, MiB], + ['survivor_committed', 'committed', 'absolute', 1, MiB], + ['survivor_max', 'max', 'absolute', 1, MiB], + ] + }, + 'jvm_tenured': { + 'options': [None, 'Tenured Memory Usage', 'MiB', 'memory', 'tomcat.jvm_tenured', 'area'], + 'lines': [ + ['tenured_used', 'used', 'absolute', 1, MiB], + ['tenured_committed', 'committed', 'absolute', 1, MiB], + ['tenured_max', 'max', 'absolute', 1, MiB] + ] + } +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.url = self.configuration.get('url', 'http://127.0.0.1:8080/manager/status?XML=true') + self.connector_name = self.configuration.get('connector_name', None) + self.parse = self.xml_parse + + def xml_parse(self, data): + try: + return ET.fromstring(data) + except ET.ParseError: + self.debug('%s is not a valid XML page. Please add "?XML=true" to tomcat status page.' % self.url) + return None + + def xml_single_quote_fix_parse(self, data): + data = single_quote_regex.sub(r"='\g<1>\g<2>'", data) + return self.xml_parse(data) + + def check(self): + self._manager = self._build_manager() + + raw_data = self._get_raw_data() + if not raw_data: + return False + + if single_quote_regex.search(raw_data): + self.warning('Tomcat status page is returning invalid single quote XML, please consider upgrading ' + 'your Tomcat installation. See https://bz.apache.org/bugzilla/show_bug.cgi?id=61603') + self.parse = self.xml_single_quote_fix_parse + + return self.parse(raw_data) is not None + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + data = None + raw_data = self._get_raw_data() + if raw_data: + xml = self.parse(raw_data) + if xml is None: + return None + + data = {} + + jvm = xml.find('jvm') + + connector = None + if self.connector_name: + for conn in xml.findall('connector'): + if self.connector_name in conn.get('name'): + connector = conn + break + else: + connector = xml.find('connector') + + memory = jvm.find('memory') + data['free'] = memory.get('free') + data['total'] = memory.get('total') + + for pool in jvm.findall('memorypool'): + name = pool.get('name') + if 'Eden Space' in name: + data['eden_used'] = pool.get('usageUsed') + data['eden_committed'] = pool.get('usageCommitted') + data['eden_max'] = pool.get('usageMax') + elif 'Survivor Space' in name: + data['survivor_used'] = pool.get('usageUsed') + data['survivor_committed'] = pool.get('usageCommitted') + data['survivor_max'] = pool.get('usageMax') + elif 'Tenured Gen' in name or 'Old Gen' in name: + data['tenured_used'] = pool.get('usageUsed') + data['tenured_committed'] = pool.get('usageCommitted') + data['tenured_max'] = pool.get('usageMax') + elif name == 'Code Cache': + data['code_cache_used'] = pool.get('usageUsed') + data['code_cache_committed'] = pool.get('usageCommitted') + data['code_cache_max'] = pool.get('usageMax') + elif name == 'Compressed': + data['compressed_used'] = pool.get('usageUsed') + data['compressed_committed'] = pool.get('usageCommitted') + data['compressed_max'] = pool.get('usageMax') + elif name == 'Metaspace': + data['metaspace_used'] = pool.get('usageUsed') + data['metaspace_committed'] = pool.get('usageCommitted') + data['metaspace_max'] = pool.get('usageMax') + + if connector is not None: + thread_info = connector.find('threadInfo') + data['currentThreadsBusy'] = thread_info.get('currentThreadsBusy') + data['currentThreadCount'] = thread_info.get('currentThreadCount') + + request_info = connector.find('requestInfo') + data['processingTime'] = request_info.get('processingTime') + data['requestCount'] = request_info.get('requestCount') + data['errorCount'] = request_info.get('errorCount') + data['bytesReceived'] = request_info.get('bytesReceived') + data['bytesSent'] = request_info.get('bytesSent') + + return data or None diff --git a/collectors/python.d.plugin/tomcat/tomcat.conf b/collectors/python.d.plugin/tomcat/tomcat.conf new file mode 100644 index 0000000..009591b --- /dev/null +++ b/collectors/python.d.plugin/tomcat/tomcat.conf @@ -0,0 +1,89 @@ +# netdata python.d.plugin configuration for tomcat +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, tomcat also supports the following: +# +# url: 'URL' # the URL to fetch nginx's status stats +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# if you have multiple connectors, the following are supported: +# +# connector_name: 'ajp-bio-8009' # default is null, which use first connector in status XML +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + url : 'http://localhost:8080/manager/status?XML=true' + +localipv4: + name : 'local' + url : 'http://127.0.0.1:8080/manager/status?XML=true' + +localipv6: + name : 'local' + url : 'http://[::1]:8080/manager/status?XML=true' diff --git a/collectors/python.d.plugin/tor/Makefile.inc b/collectors/python.d.plugin/tor/Makefile.inc new file mode 100644 index 0000000..5a45f9b --- /dev/null +++ b/collectors/python.d.plugin/tor/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += tor/tor.chart.py +dist_pythonconfig_DATA += tor/tor.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += tor/README.md tor/Makefile.inc + diff --git a/collectors/python.d.plugin/tor/README.md b/collectors/python.d.plugin/tor/README.md new file mode 100644 index 0000000..b57d77c --- /dev/null +++ b/collectors/python.d.plugin/tor/README.md @@ -0,0 +1,66 @@ + + +# Tor monitoring with Netdata + +Connects to the Tor control port to collect traffic statistics. + +## Requirements + +- `tor` program +- `stem` python package + +It produces only one chart: + +1. **Traffic** + + - read + - write + +## Configuration + +Edit the `python.d/tor.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/tor.conf +``` + +Needs only `control_port`. + +Here is an example for local server: + +```yaml +update_every : 1 +priority : 60000 + +local_tcp: + name: 'local' + control_port: 9051 + password: # if required + +local_socket: + name: 'local' + control_port: '/var/run/tor/control' + password: # if required +``` + +### prerequisite + +Add to `/etc/tor/torrc`: + +``` +ControlPort 9051 +``` + +For more options please read the manual. + +Without configuration, module attempts to connect to `127.0.0.1:9051`. + +--- + + diff --git a/collectors/python.d.plugin/tor/tor.chart.py b/collectors/python.d.plugin/tor/tor.chart.py new file mode 100644 index 0000000..8dc021a --- /dev/null +++ b/collectors/python.d.plugin/tor/tor.chart.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# Description: adaptec_raid netdata python.d module +# Author: Federico Ceratto +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +from bases.FrameworkServices.SimpleService import SimpleService + +try: + import stem + import stem.connection + import stem.control + + STEM_AVAILABLE = True +except ImportError: + STEM_AVAILABLE = False + +DEF_PORT = 'default' + +ORDER = [ + 'traffic', +] + +CHARTS = { + 'traffic': { + 'options': [None, 'Tor Traffic', 'KiB/s', 'traffic', 'tor.traffic', 'area'], + 'lines': [ + ['read', 'read', 'incremental', 1, 1024], + ['write', 'write', 'incremental', 1, -1024], + ] + } +} + + +class Service(SimpleService): + """Provide netdata service for Tor""" + + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.port = self.configuration.get('control_port', DEF_PORT) + self.password = self.configuration.get('password') + self.use_socket = isinstance(self.port, str) and self.port != DEF_PORT and not self.port.isdigit() + self.conn = None + self.alive = False + + def check(self): + if not STEM_AVAILABLE: + self.error('the stem library is missing') + return False + + return self.connect() + + def get_data(self): + if not self.alive and not self.reconnect(): + return None + + data = dict() + + try: + data['read'] = self.conn.get_info('traffic/read') + data['write'] = self.conn.get_info('traffic/written') + except stem.ControllerError as error: + self.debug(error) + self.alive = False + + return data or None + + def authenticate(self): + try: + self.conn.authenticate(password=self.password) + except stem.connection.AuthenticationFailure as error: + self.error('authentication error: {0}'.format(error)) + return False + return True + + def connect_via_port(self): + try: + self.conn = stem.control.Controller.from_port(port=self.port) + except (stem.SocketError, ValueError) as error: + self.error(error) + + def connect_via_socket(self): + try: + self.conn = stem.control.Controller.from_socket_file(path=self.port) + except (stem.SocketError, ValueError) as error: + self.error(error) + + def connect(self): + if self.conn: + self.conn.close() + self.conn = None + + if self.use_socket: + self.connect_via_socket() + else: + self.connect_via_port() + + if self.conn and self.authenticate(): + self.alive = True + + return self.alive + + def reconnect(self): + return self.connect() diff --git a/collectors/python.d.plugin/tor/tor.conf b/collectors/python.d.plugin/tor/tor.conf new file mode 100644 index 0000000..bf09b21 --- /dev/null +++ b/collectors/python.d.plugin/tor/tor.conf @@ -0,0 +1,79 @@ +# netdata python.d.plugin configuration for tor +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, tor plugin also supports the following: +# +# control_port: 'port' # tor control port +# password: 'password' # tor control password +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +# local_tcp: +# name: 'local' +# control_port: 9051 +# password: +# +# local_socket: +# name: 'local' +# control_port: '/var/run/tor/control' +# password: diff --git a/collectors/python.d.plugin/traefik/Makefile.inc b/collectors/python.d.plugin/traefik/Makefile.inc new file mode 100644 index 0000000..926d56d --- /dev/null +++ b/collectors/python.d.plugin/traefik/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += traefik/traefik.chart.py +dist_pythonconfig_DATA += traefik/traefik.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += traefik/README.md traefik/Makefile.inc + diff --git a/collectors/python.d.plugin/traefik/README.md b/collectors/python.d.plugin/traefik/README.md new file mode 100644 index 0000000..251cdf2 --- /dev/null +++ b/collectors/python.d.plugin/traefik/README.md @@ -0,0 +1,74 @@ + + +# Traefik monitoring with Netdata + +Uses the `health` API to provide statistics. + +It produces: + +1. **Responses** by statuses + + - success (1xx, 2xx, 304) + - error (5xx) + - redirect (3xx except 304) + - bad (4xx) + - other (all other responses) + +2. **Responses** by codes + + - 2xx (successful) + - 5xx (internal server errors) + - 3xx (redirect) + - 4xx (bad) + - 1xx (informational) + - other (non-standart responses) + +3. **Detailed Response Codes** requests/s (number of responses for each response code family individually) + +4. **Requests**/s + + - request statistics + +5. **Total response time** + + - sum of all response time + +6. **Average response time** + +7. **Average response time per iteration** + +8. **Uptime** + + - Traefik server uptime + +## Configuration + +Edit the `python.d/traefik.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/traefik.conf +``` + +Needs only `url` to server's `health` + +Here is an example for local server: + +```yaml +update_every : 1 +priority : 60000 + +local: + url : 'http://localhost:8080/health' +``` + +Without configuration, module attempts to connect to `http://localhost:8080/health`. + +--- + + diff --git a/collectors/python.d.plugin/traefik/traefik.chart.py b/collectors/python.d.plugin/traefik/traefik.chart.py new file mode 100644 index 0000000..5a49846 --- /dev/null +++ b/collectors/python.d.plugin/traefik/traefik.chart.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Description: traefik netdata python.d module +# Author: Alexandre Menezes (@ale_menezes) +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections import defaultdict +from json import loads + +from bases.FrameworkServices.UrlService import UrlService + +ORDER = [ + 'response_statuses', + 'response_codes', + 'detailed_response_codes', + 'requests', + 'total_response_time', + 'average_response_time', + 'average_response_time_per_iteration', + 'uptime' +] + +CHARTS = { + 'response_statuses': { + 'options': [None, 'Response statuses', 'requests/s', 'responses', 'traefik.response_statuses', 'stacked'], + 'lines': [ + ['successful_requests', 'success', 'incremental'], + ['server_errors', 'error', 'incremental'], + ['redirects', 'redirect', 'incremental'], + ['bad_requests', 'bad', 'incremental'], + ['other_requests', 'other', 'incremental'] + ] + }, + 'response_codes': { + 'options': [None, 'Responses by codes', 'requests/s', 'responses', 'traefik.response_codes', 'stacked'], + 'lines': [ + ['2xx', None, 'incremental'], + ['5xx', None, 'incremental'], + ['3xx', None, 'incremental'], + ['4xx', None, 'incremental'], + ['1xx', None, 'incremental'], + ['other', None, 'incremental'] + ] + }, + 'detailed_response_codes': { + 'options': [None, 'Detailed response codes', 'requests/s', 'responses', 'traefik.detailed_response_codes', + 'stacked'], + 'lines': [] + }, + 'requests': { + 'options': [None, 'Requests', 'requests/s', 'requests', 'traefik.requests', 'line'], + 'lines': [ + ['total_count', 'requests', 'incremental'] + ] + }, + 'total_response_time': { + 'options': [None, 'Total response time', 'seconds', 'timings', 'traefik.total_response_time', 'line'], + 'lines': [ + ['total_response_time_sec', 'response', 'absolute', 1, 10000] + ] + }, + 'average_response_time': { + 'options': [None, 'Average response time', 'milliseconds', 'timings', 'traefik.average_response_time', 'line'], + 'lines': [ + ['average_response_time_sec', 'response', 'absolute', 1, 1000] + ] + }, + 'average_response_time_per_iteration': { + 'options': [None, 'Average response time per iteration', 'milliseconds', 'timings', + 'traefik.average_response_time_per_iteration', 'line'], + 'lines': [ + ['average_response_time_per_iteration_sec', 'response', 'incremental', 1, 10000] + ] + }, + 'uptime': { + 'options': [None, 'Uptime', 'seconds', 'uptime', 'traefik.uptime', 'line'], + 'lines': [ + ['uptime_sec', 'uptime', 'absolute'] + ] + } +} + +HEALTH_STATS = [ + 'uptime_sec', + 'average_response_time_sec', + 'total_response_time_sec', + 'total_count', + 'total_status_code_count' +] + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.url = self.configuration.get('url', 'http://localhost:8080/health') + self.order = ORDER + self.definitions = CHARTS + self.last_total_response_time = 0 + self.last_total_count = 0 + self.data = { + 'successful_requests': 0, + 'redirects': 0, + 'bad_requests': 0, + 'server_errors': 0, + 'other_requests': 0, + '1xx': 0, + '2xx': 0, + '3xx': 0, + '4xx': 0, + '5xx': 0, + 'other': 0, + 'average_response_time_per_iteration_sec': 0, + } + + def _get_data(self): + data = self._get_raw_data() + + if not data: + return None + + data = loads(data) + + self.get_data_per_code_status(raw_data=data) + + self.get_data_per_code_family(raw_data=data) + + self.get_data_per_code(raw_data=data) + + self.data.update(fetch_data_(raw_data=data, metrics=HEALTH_STATS)) + + self.data['average_response_time_sec'] *= 1000000 + self.data['total_response_time_sec'] *= 10000 + if data['total_count'] != self.last_total_count: + self.data['average_response_time_per_iteration_sec'] = \ + (data['total_response_time_sec'] - self.last_total_response_time) * \ + 1000000 / (data['total_count'] - self.last_total_count) + else: + self.data['average_response_time_per_iteration_sec'] = 0 + self.last_total_response_time = data['total_response_time_sec'] + self.last_total_count = data['total_count'] + + return self.data or None + + def get_data_per_code_status(self, raw_data): + data = defaultdict(int) + for code, value in raw_data['total_status_code_count'].items(): + code_prefix = code[0] + if code_prefix == '1' or code_prefix == '2' or code == '304': + data['successful_requests'] += value + elif code_prefix == '3': + data['redirects'] += value + elif code_prefix == '4': + data['bad_requests'] += value + elif code_prefix == '5': + data['server_errors'] += value + else: + data['other_requests'] += value + self.data.update(data) + + def get_data_per_code_family(self, raw_data): + data = defaultdict(int) + for code, value in raw_data['total_status_code_count'].items(): + code_prefix = code[0] + if code_prefix == '1': + data['1xx'] += value + elif code_prefix == '2': + data['2xx'] += value + elif code_prefix == '3': + data['3xx'] += value + elif code_prefix == '4': + data['4xx'] += value + elif code_prefix == '5': + data['5xx'] += value + else: + data['other'] += value + self.data.update(data) + + def get_data_per_code(self, raw_data): + for code, value in raw_data['total_status_code_count'].items(): + if self.charts: + if code not in self.data: + self.charts['detailed_response_codes'].add_dimension([code, code, 'incremental']) + self.data[code] = value + + +def fetch_data_(raw_data, metrics): + data = dict() + + for metric in metrics: + value = raw_data + metrics_list = metric.split('.') + try: + for m in metrics_list: + value = value[m] + except KeyError: + continue + data['_'.join(metrics_list)] = value + + return data diff --git a/collectors/python.d.plugin/traefik/traefik.conf b/collectors/python.d.plugin/traefik/traefik.conf new file mode 100644 index 0000000..e3f182d --- /dev/null +++ b/collectors/python.d.plugin/traefik/traefik.conf @@ -0,0 +1,77 @@ +# netdata python.d.plugin configuration for traefik health data API +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, traefik plugin also supports the following: +# +# url: '://:/' +# # http://localhost:8080/health +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +local: + url: 'http://localhost:8080/health' diff --git a/collectors/python.d.plugin/uwsgi/Makefile.inc b/collectors/python.d.plugin/uwsgi/Makefile.inc new file mode 100644 index 0000000..75d96de --- /dev/null +++ b/collectors/python.d.plugin/uwsgi/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += uwsgi/uwsgi.chart.py +dist_pythonconfig_DATA += uwsgi/uwsgi.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += uwsgi/README.md uwsgi/Makefile.inc + diff --git a/collectors/python.d.plugin/uwsgi/README.md b/collectors/python.d.plugin/uwsgi/README.md new file mode 100644 index 0000000..58db1a4 --- /dev/null +++ b/collectors/python.d.plugin/uwsgi/README.md @@ -0,0 +1,52 @@ + + +# uWSGI monitoring with Netdata + +Monitors performance metrics exposed by [`Stats Server`](https://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html). + + +Following charts are drawn: + +1. **Requests** + + - requests per second + - transmitted data + - average request time + +2. **Memory** + + - rss + - vsz + +3. **Exceptions** +4. **Harakiris** +5. **Respawns** + +## Configuration + +Edit the `python.d/uwsgi.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/uwsgi.conf +``` + +```yaml +socket: + name : 'local' + socket : '/tmp/stats.socket' + +localhost: + name : 'local' + host : 'localhost' + port : 1717 +``` + +When no configuration file is found, module tries to connect to TCP/IP socket: `localhost:1717`. + + diff --git a/collectors/python.d.plugin/uwsgi/uwsgi.chart.py b/collectors/python.d.plugin/uwsgi/uwsgi.chart.py new file mode 100644 index 0000000..e4d9000 --- /dev/null +++ b/collectors/python.d.plugin/uwsgi/uwsgi.chart.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +# Description: uwsgi netdata python.d module +# Author: Robbert Segeren (robbert-ef) +# SPDX-License-Identifier: GPL-3.0-or-later + +import json +from copy import deepcopy + +from bases.FrameworkServices.SocketService import SocketService + +ORDER = [ + 'requests', + 'tx', + 'avg_rt', + 'memory_rss', + 'memory_vsz', + 'exceptions', + 'harakiri', + 'respawn', +] + +DYNAMIC_CHARTS = [ + 'requests', + 'tx', + 'avg_rt', + 'memory_rss', + 'memory_vsz', +] + +# NOTE: lines are created dynamically in `check()` method +CHARTS = { + 'requests': { + 'options': [None, 'Requests', 'requests/s', 'requests', 'uwsgi.requests', 'stacked'], + 'lines': [ + ['requests', 'requests', 'incremental'] + ] + }, + 'tx': { + 'options': [None, 'Transmitted data', 'KiB/s', 'requests', 'uwsgi.tx', 'stacked'], + 'lines': [ + ['tx', 'tx', 'incremental'] + ] + }, + 'avg_rt': { + 'options': [None, 'Average request time', 'milliseconds', 'requests', 'uwsgi.avg_rt', 'line'], + 'lines': [ + ['avg_rt', 'avg_rt', 'absolute'] + ] + }, + 'memory_rss': { + 'options': [None, 'RSS (Resident Set Size)', 'MiB', 'memory', 'uwsgi.memory_rss', 'stacked'], + 'lines': [ + ['memory_rss', 'memory_rss', 'absolute', 1, 1 << 20] + ] + }, + 'memory_vsz': { + 'options': [None, 'VSZ (Virtual Memory Size)', 'MiB', 'memory', 'uwsgi.memory_vsz', 'stacked'], + 'lines': [ + ['memory_vsz', 'memory_vsz', 'absolute', 1, 1 << 20] + ] + }, + 'exceptions': { + 'options': [None, 'Exceptions', 'exceptions', 'exceptions', 'uwsgi.exceptions', 'line'], + 'lines': [ + ['exceptions', 'exceptions', 'incremental'] + ] + }, + 'harakiri': { + 'options': [None, 'Harakiris', 'harakiris', 'harakiris', 'uwsgi.harakiris', 'line'], + 'lines': [ + ['harakiri_count', 'harakiris', 'incremental'] + ] + }, + 'respawn': { + 'options': [None, 'Respawns', 'respawns', 'respawns', 'uwsgi.respawns', 'line'], + 'lines': [ + ['respawn_count', 'respawns', 'incremental'] + ] + }, +} + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.url = self.configuration.get('host', 'localhost') + self.port = self.configuration.get('port', 1717) + # Clear dynamic dimensions, these are added during `_get_data()` to allow adding workers at run-time + for chart in DYNAMIC_CHARTS: + self.definitions[chart]['lines'] = [] + self.last_result = {} + self.workers = [] + + def read_data(self): + """ + Read data from socket and parse as JSON. + :return: (dict) stats + """ + raw_data = self._get_raw_data() + if not raw_data: + return None + try: + return json.loads(raw_data) + except ValueError as err: + self.error(err) + return None + + def check(self): + """ + Parse configuration and check if we can read data. + :return: boolean + """ + self._parse_config() + return bool(self.read_data()) + + def add_worker_dimensions(self, key): + """ + Helper to add dimensions for a worker. + :param key: (int or str) worker identifier + :return: + """ + for chart in DYNAMIC_CHARTS: + for line in CHARTS[chart]['lines']: + dimension_id = '{}_{}'.format(line[0], key) + dimension_name = str(key) + + dimension = [dimension_id, dimension_name] + line[2:] + self.charts[chart].add_dimension(dimension) + + @staticmethod + def _check_raw_data(data): + # The server will close the connection when it's done sending + # data, so just keep looping until that happens. + return False + + def _get_data(self): + """ + Read data from socket + :return: dict + """ + stats = self.read_data() + if not stats: + return None + + result = { + 'exceptions': 0, + 'harakiri_count': 0, + 'respawn_count': 0, + } + + for worker in stats['workers']: + key = worker['pid'] + + # Add dimensions for new workers + if key not in self.workers: + self.add_worker_dimensions(key) + self.workers.append(key) + + result['requests_{}'.format(key)] = worker['requests'] + result['tx_{}'.format(key)] = worker['tx'] + result['avg_rt_{}'.format(key)] = worker['avg_rt'] + + # avg_rt is not reset by uwsgi, so reset here + if self.last_result.get('requests_{}'.format(key)) == worker['requests']: + result['avg_rt_{}'.format(key)] = 0 + + result['memory_rss_{}'.format(key)] = worker['rss'] + result['memory_vsz_{}'.format(key)] = worker['vsz'] + + result['exceptions'] += worker['exceptions'] + result['harakiri_count'] += worker['harakiri_count'] + result['respawn_count'] += worker['respawn_count'] + + self.last_result = result + return result diff --git a/collectors/python.d.plugin/uwsgi/uwsgi.conf b/collectors/python.d.plugin/uwsgi/uwsgi.conf new file mode 100644 index 0000000..7d09e73 --- /dev/null +++ b/collectors/python.d.plugin/uwsgi/uwsgi.conf @@ -0,0 +1,92 @@ +# netdata python.d.plugin configuration for uwsgi +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, uwsgi also supports the following: +# +# socket: 'path/to/uwsgistats.sock' +# +# or +# host: 'IP or HOSTNAME' # the host to connect to +# port: PORT # the port to connect to +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# + +socket: + name : 'local' + socket : '/tmp/stats.socket' + +localhost: + name : 'local' + host : 'localhost' + port : 1717 + +localipv4: + name : 'local' + host : '127.0.0.1' + port : 1717 + +localipv6: + name : 'local' + host : '::1' + port : 1717 diff --git a/collectors/python.d.plugin/varnish/Makefile.inc b/collectors/python.d.plugin/varnish/Makefile.inc new file mode 100644 index 0000000..2469b05 --- /dev/null +++ b/collectors/python.d.plugin/varnish/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += varnish/varnish.chart.py +dist_pythonconfig_DATA += varnish/varnish.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += varnish/README.md varnish/Makefile.inc + diff --git a/collectors/python.d.plugin/varnish/README.md b/collectors/python.d.plugin/varnish/README.md new file mode 100644 index 0000000..018905f --- /dev/null +++ b/collectors/python.d.plugin/varnish/README.md @@ -0,0 +1,65 @@ + + +# Varnish Cache monitoring with Netdata + +Provides HTTP accelerator global, Backends (VBE) and Storages (SMF, SMA, MSE) statistics using `varnishstat` tool. + +Note that both, Varnish-Cache (free and open source) and Varnish-Plus (Commercial/Enterprise version), are supported. + +## Requirements + +- `netdata` user must be a member of the `varnish` group + +## Charts + +This module produces the following charts: + +- Connections Statistics in `connections/s` +- Client Requests in `requests/s` +- All History Hit Rate Ratio in `percent` +- Current Poll Hit Rate Ratio in `percent` +- Expired Objects in `expired/s` +- Least Recently Used Nuked Objects in `nuked/s` +- Number Of Threads In All Pools in `pools` +- Threads Statistics in `threads/s` +- Current Queue Length in `requests` +- Backend Connections Statistics in `connections/s` +- Requests To The Backend in `requests/s` +- ESI Statistics in `problems/s` +- Memory Usage in `MiB` +- Uptime in `seconds` + +For every backend (VBE): + +- Backend Response Statistics in `kilobits/s` + +For every storage (SMF, SMA, or MSE): + +- Storage Usage in `KiB` +- Storage Allocated Objects + +## Configuration + +Edit the `python.d/varnish.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/varnish.conf +``` + +Only one parameter is supported: + +```yaml +instance_name: 'name' +``` + +The name of the `varnishd` instance to get logs from. If not specified, the host name is used. + +--- + + diff --git a/collectors/python.d.plugin/varnish/varnish.chart.py b/collectors/python.d.plugin/varnish/varnish.chart.py new file mode 100644 index 0000000..506ad02 --- /dev/null +++ b/collectors/python.d.plugin/varnish/varnish.chart.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +# Description: varnish netdata python.d module +# Author: ilyam8 +# SPDX-License-Identifier: GPL-3.0-or-later + +import re + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +ORDER = [ + 'session_connections', + 'client_requests', + 'all_time_hit_rate', + 'current_poll_hit_rate', + 'cached_objects_expired', + 'cached_objects_nuked', + 'threads_total', + 'threads_statistics', + 'threads_queue_len', + 'backend_connections', + 'backend_requests', + 'esi_statistics', + 'memory_usage', + 'uptime' +] + +CHARTS = { + 'session_connections': { + 'options': [None, 'Connections Statistics', 'connections/s', + 'client metrics', 'varnish.session_connection', 'line'], + 'lines': [ + ['sess_conn', 'accepted', 'incremental'], + ['sess_dropped', 'dropped', 'incremental'] + ] + }, + 'client_requests': { + 'options': [None, 'Client Requests', 'requests/s', + 'client metrics', 'varnish.client_requests', 'line'], + 'lines': [ + ['client_req', 'received', 'incremental'] + ] + }, + 'all_time_hit_rate': { + 'options': [None, 'All History Hit Rate Ratio', 'percentage', 'cache performance', + 'varnish.all_time_hit_rate', 'stacked'], + 'lines': [ + ['cache_hit', 'hit', 'percentage-of-absolute-row'], + ['cache_miss', 'miss', 'percentage-of-absolute-row'], + ['cache_hitpass', 'hitpass', 'percentage-of-absolute-row']] + }, + 'current_poll_hit_rate': { + 'options': [None, 'Current Poll Hit Rate Ratio', 'percentage', 'cache performance', + 'varnish.current_poll_hit_rate', 'stacked'], + 'lines': [ + ['cache_hit', 'hit', 'percentage-of-incremental-row'], + ['cache_miss', 'miss', 'percentage-of-incremental-row'], + ['cache_hitpass', 'hitpass', 'percentage-of-incremental-row'] + ] + }, + 'cached_objects_expired': { + 'options': [None, 'Expired Objects', 'expired/s', 'cache performance', + 'varnish.cached_objects_expired', 'line'], + 'lines': [ + ['n_expired', 'objects', 'incremental'] + ] + }, + 'cached_objects_nuked': { + 'options': [None, 'Least Recently Used Nuked Objects', 'nuked/s', 'cache performance', + 'varnish.cached_objects_nuked', 'line'], + 'lines': [ + ['n_lru_nuked', 'objects', 'incremental'] + ] + }, + 'threads_total': { + 'options': [None, 'Number Of Threads In All Pools', 'number', 'thread related metrics', + 'varnish.threads_total', 'line'], + 'lines': [ + ['threads', None, 'absolute'] + ] + }, + 'threads_statistics': { + 'options': [None, 'Threads Statistics', 'threads/s', 'thread related metrics', + 'varnish.threads_statistics', 'line'], + 'lines': [ + ['threads_created', 'created', 'incremental'], + ['threads_failed', 'failed', 'incremental'], + ['threads_limited', 'limited', 'incremental'] + ] + }, + 'threads_queue_len': { + 'options': [None, 'Current Queue Length', 'requests', 'thread related metrics', + 'varnish.threads_queue_len', 'line'], + 'lines': [ + ['thread_queue_len', 'in queue'] + ] + }, + 'backend_connections': { + 'options': [None, 'Backend Connections Statistics', 'connections/s', 'backend metrics', + 'varnish.backend_connections', 'line'], + 'lines': [ + ['backend_conn', 'successful', 'incremental'], + ['backend_unhealthy', 'unhealthy', 'incremental'], + ['backend_reuse', 'reused', 'incremental'], + ['backend_toolate', 'closed', 'incremental'], + ['backend_recycle', 'recycled', 'incremental'], + ['backend_fail', 'failed', 'incremental'] + ] + }, + 'backend_requests': { + 'options': [None, 'Requests To The Backend', 'requests/s', 'backend metrics', + 'varnish.backend_requests', 'line'], + 'lines': [ + ['backend_req', 'sent', 'incremental'] + ] + }, + 'esi_statistics': { + 'options': [None, 'ESI Statistics', 'problems/s', 'esi related metrics', 'varnish.esi_statistics', 'line'], + 'lines': [ + ['esi_errors', 'errors', 'incremental'], + ['esi_warnings', 'warnings', 'incremental'] + ] + }, + 'memory_usage': { + 'options': [None, 'Memory Usage', 'MiB', 'memory usage', 'varnish.memory_usage', 'stacked'], + 'lines': [ + ['memory_free', 'free', 'absolute', 1, 1 << 20], + ['memory_allocated', 'allocated', 'absolute', 1, 1 << 20]] + }, + 'uptime': { + 'lines': [ + ['uptime', None, 'absolute'] + ], + 'options': [None, 'Uptime', 'seconds', 'uptime', 'varnish.uptime', 'line'] + } +} + + +def backend_charts_template(name): + order = [ + '{0}_response_statistics'.format(name), + ] + + charts = { + order[0]: { + 'options': [None, 'Backend "{0}"'.format(name), 'kilobits/s', 'backend response statistics', + 'varnish.backend', 'area'], + 'lines': [ + ['{0}_beresp_hdrbytes'.format(name), 'header', 'incremental', 8, 1000], + ['{0}_beresp_bodybytes'.format(name), 'body', 'incremental', -8, 1000] + ] + }, + } + + return order, charts + + +def storage_charts_template(name): + order = [ + 'storage_{0}_usage'.format(name), + 'storage_{0}_alloc_objs'.format(name) + ] + + charts = { + order[0]: { + 'options': [None, 'Storage "{0}" Usage'.format(name), 'KiB', 'storage usage', 'varnish.storage_usage', 'stacked'], + 'lines': [ + ['{0}.g_space'.format(name), 'free', 'absolute', 1, 1 << 10], + ['{0}.g_bytes'.format(name), 'allocated', 'absolute', 1, 1 << 10] + ] + }, + order[1]: { + 'options': [None, 'Storage "{0}" Allocated Objects'.format(name), 'objects', 'storage usage', 'varnish.storage_alloc_objs', 'line'], + 'lines': [ + ['{0}.g_alloc'.format(name), 'allocated', 'absolute'] + ] + } + } + + return order, charts + + +VARNISHSTAT = 'varnishstat' + +re_version = re.compile(r'varnish-(?:plus-)?(?P\d+)\.(?P\d+)\.(?P\d+)') + + +class VarnishVersion: + def __init__(self, major, minor, patch): + self.major = major + self.minor = minor + self.patch = patch + + def __str__(self): + return '{0}.{1}.{2}'.format(self.major, self.minor, self.patch) + + +class Parser: + _backend_new = re.compile(r'VBE.([\d\w_.]+)\(.*?\).(beresp[\w_]+)\s+(\d+)') + _backend_old = re.compile(r'VBE\.[\d\w-]+\.([\w\d_-]+).(beresp[\w_]+)\s+(\d+)') + _default = re.compile(r'([A-Z]+\.)?([\d\w_.]+)\s+(\d+)') + + def __init__(self): + self.re_default = None + self.re_backend = None + + def init(self, data): + data = ''.join(data) + parsed_main = Parser._default.findall(data) + if parsed_main: + self.re_default = Parser._default + + parsed_backend = Parser._backend_new.findall(data) + if parsed_backend: + self.re_backend = Parser._backend_new + else: + parsed_backend = Parser._backend_old.findall(data) + if parsed_backend: + self.re_backend = Parser._backend_old + + def server_stats(self, data): + return self.re_default.findall(''.join(data)) + + def backend_stats(self, data): + return self.re_backend.findall(''.join(data)) + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.instance_name = configuration.get('instance_name') + self.parser = Parser() + self.command = None + self.collected_vbe = set() + self.collected_storages = set() + + def create_command(self): + varnishstat = find_binary(VARNISHSTAT) + + if not varnishstat: + self.error("can't locate '{0}' binary or binary is not executable by user netdata".format(VARNISHSTAT)) + return False + + command = [varnishstat, '-V'] + reply = self._get_raw_data(stderr=True, command=command) + if not reply: + self.error( + "no output from '{0}'. Is varnish running? Not enough privileges?".format(' '.join(self.command))) + return False + + ver = parse_varnish_version(reply) + if not ver: + self.error("failed to parse reply from '{0}', used regex :'{1}', reply : {2}".format( + ' '.join(command), re_version.pattern, reply)) + return False + + if self.instance_name: + self.command = [varnishstat, '-1', '-n', self.instance_name] + else: + self.command = [varnishstat, '-1'] + + if ver.major > 4: + self.command.extend(['-t', '1']) + + self.info("varnish version: {0}, will use command: '{1}'".format(ver, ' '.join(self.command))) + + return True + + def check(self): + if not self.create_command(): + return False + + # STDOUT is not empty + reply = self._get_raw_data() + if not reply: + self.error("no output from '{0}'. Is it running? Not enough privileges?".format(' '.join(self.command))) + return False + + self.parser.init(reply) + + # Output is parsable + if not self.parser.re_default: + self.error('cant parse the output...') + return False + + return True + + def get_data(self): + """ + Format data received from shell command + :return: dict + """ + raw = self._get_raw_data() + if not raw: + return None + + data = dict() + server_stats = self.parser.server_stats(raw) + if not server_stats: + return None + + stats = dict((param, value) for _, param, value in server_stats) + data.update(stats) + + self.get_vbe_backends(data, raw) + self.get_storages(server_stats) + + # varnish 5 uses default.g_bytes and default.g_space + data['memory_allocated'] = data.get('s0.g_bytes') or data.get('default.g_bytes') + data['memory_free'] = data.get('s0.g_space') or data.get('default.g_space') + + return data + + def get_vbe_backends(self, data, raw): + if not self.parser.re_backend: + return + stats = self.parser.backend_stats(raw) + if not stats: + return + + for (name, param, value) in stats: + data['_'.join([name, param])] = value + if name in self.collected_vbe: + continue + self.collected_vbe.add(name) + self.add_backend_charts(name) + + def get_storages(self, server_stats): + # Storage types: + # - SMF: File Storage + # - SMA: Malloc Storage + # - MSE: Massive Storage Engine (Varnish-Plus only) + # + # Stats example: + # [('SMF.', 'ssdStorage.c_req', '47686'), + # ('SMF.', 'ssdStorage.c_fail', '0'), + # ('SMF.', 'ssdStorage.c_bytes', '668102656'), + # ('SMF.', 'ssdStorage.c_freed', '140980224'), + # ('SMF.', 'ssdStorage.g_alloc', '39753'), + # ('SMF.', 'ssdStorage.g_bytes', '527122432'), + # ('SMF.', 'ssdStorage.g_space', '53159968768'), + # ('SMF.', 'ssdStorage.g_smf', '40130'), + # ('SMF.', 'ssdStorage.g_smf_frag', '311'), + # ('SMF.', 'ssdStorage.g_smf_large', '66')] + storages = [name for typ, name, _ in server_stats if typ.startswith(('SMF', 'SMA', 'MSE')) and name.endswith('g_space')] + if not storages: + return + for storage in storages: + storage = storage.split('.')[0] + if storage in self.collected_storages: + continue + self.collected_storages.add(storage) + self.add_storage_charts(storage) + + def add_backend_charts(self, backend_name): + self.add_charts(backend_name, backend_charts_template) + + def add_storage_charts(self, storage_name): + self.add_charts(storage_name, storage_charts_template) + + def add_charts(self, name, charts_template): + order, charts = charts_template(name) + + for chart_name in order: + params = [chart_name] + charts[chart_name]['options'] + dimensions = charts[chart_name]['lines'] + + new_chart = self.charts.add_chart(params) + for dimension in dimensions: + new_chart.add_dimension(dimension) + + +def parse_varnish_version(lines): + m = re_version.search(lines[0]) + if not m: + return None + + m = m.groupdict() + return VarnishVersion( + int(m['major']), + int(m['minor']), + int(m['patch']), + ) diff --git a/collectors/python.d.plugin/varnish/varnish.conf b/collectors/python.d.plugin/varnish/varnish.conf new file mode 100644 index 0000000..54bfe4d --- /dev/null +++ b/collectors/python.d.plugin/varnish/varnish.conf @@ -0,0 +1,66 @@ +# netdata python.d.plugin configuration for varnish +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, varnish also supports the following: +# +# instance_name: 'name' # the name of the varnishd instance to get logs from. If not specified, the host name is used. +# +# ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/w1sensor/Makefile.inc b/collectors/python.d.plugin/w1sensor/Makefile.inc new file mode 100644 index 0000000..bddf146 --- /dev/null +++ b/collectors/python.d.plugin/w1sensor/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += w1sensor/w1sensor.chart.py +dist_pythonconfig_DATA += w1sensor/w1sensor.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += w1sensor/README.md w1sensor/Makefile.inc + diff --git a/collectors/python.d.plugin/w1sensor/README.md b/collectors/python.d.plugin/w1sensor/README.md new file mode 100644 index 0000000..b6d2b2d --- /dev/null +++ b/collectors/python.d.plugin/w1sensor/README.md @@ -0,0 +1,28 @@ + + +# 1-Wire Sensors monitoring with Netdata + +Monitors sensor temperature. + +On Linux these are supported by the wire, w1_gpio, and w1_therm modules. +Currently temperature sensors are supported and automatically detected. + +Charts are created dynamically based on the number of detected sensors. + +## Configuration + +Edit the `python.d/w1sensor.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/w1sensor.conf +``` + +--- + + diff --git a/collectors/python.d.plugin/w1sensor/w1sensor.chart.py b/collectors/python.d.plugin/w1sensor/w1sensor.chart.py new file mode 100644 index 0000000..c4f847b --- /dev/null +++ b/collectors/python.d.plugin/w1sensor/w1sensor.chart.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# Description: 1-wire temperature monitor netdata python.d module +# Author: Diomidis Spinellis +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import re + +from bases.FrameworkServices.SimpleService import SimpleService + +# default module values (can be overridden per job in `config`) +update_every = 5 + +# Location where 1-Wire devices can be found +W1_DIR = '/sys/bus/w1/devices/' + +# Lines matching the following regular expression contain a temperature value +RE_TEMP = re.compile(r' t=(\d+)') + +ORDER = [ + 'temp', +] + +CHARTS = { + 'temp': { + 'options': [None, '1-Wire Temperature Sensor', 'Celsius', 'Temperature', 'w1sensor.temp', 'line'], + 'lines': [] + } +} + +# Known and supported family members +# Based on linux/drivers/w1/w1_family.h and w1/slaves/w1_therm.c +THERM_FAMILY = { + '10': 'W1_THERM_DS18S20', + '22': 'W1_THERM_DS1822', + '28': 'W1_THERM_DS18B20', + '3b': 'W1_THERM_DS1825', + '42': 'W1_THERM_DS28EA00', +} + + +class Service(SimpleService): + """Provide netdata service for 1-Wire sensors""" + + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + self.probes = [] + + def check(self): + """Auto-detect available 1-Wire sensors, setting line definitions + and probes to be monitored.""" + try: + file_names = os.listdir(W1_DIR) + except OSError as err: + self.error(err) + return False + + lines = [] + for file_name in file_names: + if file_name[2] != '-': + continue + if not file_name[0:2] in THERM_FAMILY: + continue + + self.probes.append(file_name) + identifier = file_name[3:] + name = identifier + config_name = self.configuration.get('name_' + identifier) + if config_name: + name = config_name + lines.append(['w1sensor_temp_' + identifier, name, 'absolute', + 1, 10]) + self.definitions['temp']['lines'] = lines + return len(self.probes) > 0 + + def get_data(self): + """Return data read from sensors.""" + data = dict() + + for file_name in self.probes: + file_path = W1_DIR + file_name + '/w1_slave' + identifier = file_name[3:] + try: + with open(file_path, 'r') as device_file: + for line in device_file: + matched = RE_TEMP.search(line) + if matched: + # Round to one decimal digit to filter-out noise + value = round(int(matched.group(1)) / 1000., 1) + value = int(value * 10) + data['w1sensor_temp_' + identifier] = value + except (OSError, IOError) as err: + self.error(err) + continue + return data or None diff --git a/collectors/python.d.plugin/w1sensor/w1sensor.conf b/collectors/python.d.plugin/w1sensor/w1sensor.conf new file mode 100644 index 0000000..1727100 --- /dev/null +++ b/collectors/python.d.plugin/w1sensor/w1sensor.conf @@ -0,0 +1,72 @@ +# netdata python.d.plugin configuration for w1sensor +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 5 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, example also supports the following: +# +# name_<1-Wire id>: '' +# This allows associating a human readable name with a sensor's 1-Wire +# identifier. Example: +# name_00000022276e: 'Machine room' +# name_00000022298f: 'Rack 12' +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) diff --git a/collectors/python.d.plugin/zscores/Makefile.inc b/collectors/python.d.plugin/zscores/Makefile.inc new file mode 100644 index 0000000..d8b1824 --- /dev/null +++ b/collectors/python.d.plugin/zscores/Makefile.inc @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += zscores/zscores.chart.py +dist_pythonconfig_DATA += zscores/zscores.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += zscores/README.md zscores/Makefile.inc diff --git a/collectors/python.d.plugin/zscores/README.md b/collectors/python.d.plugin/zscores/README.md new file mode 100644 index 0000000..4f84a6c --- /dev/null +++ b/collectors/python.d.plugin/zscores/README.md @@ -0,0 +1,144 @@ + + +# Z-Scores - basic anomaly detection for your key metrics and charts + +Smoothed, rolling [Z-Scores](https://en.wikipedia.org/wiki/Standard_score) for selected metrics or charts. + +This collector uses the [Netdata rest api](https://learn.netdata.cloud/docs/agent/web/api) to get the `mean` and `stddev` +for each dimension on specified charts over a time range (defined by `train_secs` and `offset_secs`). For each dimension +it will calculate a Z-Score as `z = (x - mean) / stddev` (clipped at `z_clip`). Scores are then smoothed over +time (`z_smooth_n`) and, if `mode: 'per_chart'`, aggregated across dimensions to a smoothed, rolling chart level Z-Score +at each time step. + +## Charts + +Two charts are produced: + +- **Z-Score** (`zscores.z`): This chart shows the calculated Z-Score per chart (or dimension if `mode='per_dim'`). +- **Z-Score >3** (`zscores.3stddev`): This chart shows a `1` if the absolute value of the Z-Score is greater than 3 or + a `0` otherwise. + +Below is an example of the charts produced by this collector and a typical example of how they would look when things +are 'normal' on the system. Most of the zscores tend to bounce randomly around a range typically between 0 to +3 (or -3 +to +3 if `z_abs: 'false'`), a few charts might stay steady at a more constant higher value depending on your +configuration and the typical workload on your system (typically those charts that do not change that much have a +smaller range of values on which to calculate a zscore and so tend to have a higher typical zscore). + +So really its a combination of the zscores values themselves plus, perhaps more importantly, how they change when +something strange occurs on your system which can be most useful. + +![zscores-collector-normal](https://user-images.githubusercontent.com/2178292/108776300-21d44d00-755a-11eb-92a4-ecb8f7d2f175.png) + +For example, if we go onto the system and run a command +like [`stress-ng --all 2`](https://wiki.ubuntu.com/Kernel/Reference/stress-ng) to create some stress, we see many charts +begin to have zscores that jump outside the typical range. When the absolute zscore for a chart is greater than 3 you +will see a corresponding line appear on the `zscores.3stddev` chart to make it a bit clearer what charts might be worth +looking at first (for more background information on why 3 stddev +see [here](https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule#:~:text=In%20the%20empirical%20sciences%20the,99.7%25%20probability%20as%20near%20certainty.)) +. + +In the example below we basically took a sledge hammer to our system so its not surprising that lots of charts light up +after we run the stress command. In a more realistic setting you might just see a handful of charts with strange zscores +and that could be a good indication of where to look first. + +![zscores-collector-abnormal](https://user-images.githubusercontent.com/2178292/108776316-28fb5b00-755a-11eb-80de-ec5d38089ecc.png) + +Then as the issue passes the zscores should settle back down into their normal range again as they are calculated in a +rolling and smoothed way (as defined by your `zscores.conf` file). + +![zscores-collector-normal-again](https://user-images.githubusercontent.com/2178292/108776439-4fb99180-755a-11eb-8bb7-b4df144cb44c.png) + +## Requirements + +This collector will only work with Python 3 and requires the below packages be installed. + +```bash +# become netdata user +sudo su -s /bin/bash netdata +# install required packages +pip3 install numpy pandas requests netdata-pandas==0.0.38 +``` + +## Configuration + +Install the underlying Python requirements, Enable the collector and restart Netdata. + +```bash +cd /etc/netdata/ +sudo ./edit-config python.d.conf +# Set `zscores: no` to `zscores: yes` +sudo systemctl restart netdata +``` + +The configuration for the zscores collector defines how it will behave on your system and might take some +experimentation with over time to set it optimally. Out of the box, the config comes with +some [sane defaults](https://www.netdata.cloud/blog/redefining-monitoring-netdata/) to get you started. + +If you are unsure about any of the below configuration options then it's best to just ignore all this and leave +the `zscores.conf` files alone to begin with. Then you can return to it later if you would like to tune things a bit +more once the collector is running for a while. + +Edit the `python.d/zscores.conf` configuration file using `edit-config` from the your +agent's [config directory](https://learn.netdata.cloud/guides/step-by-step/step-04#find-your-netdataconf-file), which is +usually at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/zscores.conf +``` + +The default configuration should look something like this. Here you can see each parameter (with sane defaults) and some +information about each one and what it does. + +```bash +# what host to pull data from +host: '127.0.0.1:19999' +# What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. +charts_regex: 'system\..*' +# length of time to base calculations off for mean and stddev +train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore +# offset preceding latest data to ignore when calculating mean and stddev +offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev +# recalculate the mean and stddev every n steps of the collector +train_every_n: 900 # recalculate mean and stddev every 15 minutes +# smooth the z score by averaging it over last n values +z_smooth_n: 15 # take a rolling average of the last 15 zscore values to reduce sensitivity to temporary 'spikes' +# cap absolute value of zscore (before smoothing) for better stability +z_clip: 10 # cap each zscore at 10 so as to avoid really large individual zscores swamping any rolling average +# set z_abs: 'true' to make all zscores be absolute values only. +z_abs: 'true' +# burn in period in which to initially calculate mean and stddev on every step +burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return +# mode can be to get a zscore 'per_dim' or 'per_chart' +mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step +# per_chart_agg is how you aggregate from dimension to chart when mode='per_chart' +per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average. +``` + +## Notes + +- Python 3 is required as the [`netdata-pandas`](https://github.com/netdata/netdata-pandas) package uses python async + libraries ([asks](https://pypi.org/project/asks/) and [trio](https://pypi.org/project/trio/)) to make asynchronous + calls to the netdata rest api to get the required data for each chart when calculating the mean and stddev. +- It may take a few hours or so for the collector to 'settle' into it's typical behaviour in terms of the scores you + will see in the normal running of your system. +- The zscore you see for each chart when using `mode: 'per_chart'` as actually an aggregated zscore across all the + dimensions on the underlying chart. +- If you set `mode: 'per_dim'` then you will see a zscore for each dimension on each chart as opposed to one per chart. +- As this collector does some calculations itself in python you may want to try it out first on a test or development + system to get a sense of its performance characteristics. Most of the work in calculating the mean and stddev will be + pushed down to the underlying Netdata C libraries via the rest api. But some data wrangling and calculations are then + done using [Pandas](https://pandas.pydata.org/) and [Numpy](https://numpy.org/) within the collector itself. +- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the + typical performance characteristics we saw from running this collector were: + - A runtime (`netdata.runtime_zscores`) of ~50ms when doing scoring and ~500ms when recalculating the mean and + stddev. + - Typically 3%-3.5% cpu usage from scoring, jumping to ~35% for one second when recalculating the mean and stddev. + - About ~50mb of ram (`apps.mem`) being continually used by the `python.d.plugin`. +- If you activate this collector on a fresh node, it might take a little while to build up enough data to calculate a + proper zscore. So until you actually have `train_secs` of available data the mean and stddev calculated will be subject + to more noise. diff --git a/collectors/python.d.plugin/zscores/zscores.chart.py b/collectors/python.d.plugin/zscores/zscores.chart.py new file mode 100644 index 0000000..1099b93 --- /dev/null +++ b/collectors/python.d.plugin/zscores/zscores.chart.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Description: zscores netdata python.d module +# Author: andrewm4894 +# SPDX-License-Identifier: GPL-3.0-or-later + +from datetime import datetime +import re + +import requests +import numpy as np +import pandas as pd + +from bases.FrameworkServices.SimpleService import SimpleService +from netdata_pandas.data import get_data, get_allmetrics + +priority = 60000 +update_every = 5 +disabled_by_default = True + +ORDER = [ + 'z', + '3stddev' +] + +CHARTS = { + 'z': { + 'options': ['z', 'Z Score', 'z', 'Z Score', 'zscores.z', 'line'], + 'lines': [] + }, + '3stddev': { + 'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', 'zscores.3stddev', 'stacked'], + 'lines': [] + }, +} + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.host = self.configuration.get('host', '127.0.0.1:19999') + self.charts_regex = re.compile(self.configuration.get('charts_regex', 'system.*')) + self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',') + self.charts_in_scope = [ + c for c in + list(filter(self.charts_regex.match, + requests.get(f'http://{self.host}/api/v1/charts').json()['charts'].keys())) + if c not in self.charts_to_exclude + ] + self.train_secs = self.configuration.get('train_secs', 14400) + self.offset_secs = self.configuration.get('offset_secs', 300) + self.train_every_n = self.configuration.get('train_every_n', 900) + self.z_smooth_n = self.configuration.get('z_smooth_n', 15) + self.z_clip = self.configuration.get('z_clip', 10) + self.z_abs = bool(self.configuration.get('z_abs', True)) + self.burn_in = self.configuration.get('burn_in', 2) + self.mode = self.configuration.get('mode', 'per_chart') + self.per_chart_agg = self.configuration.get('per_chart_agg', 'mean') + self.order = ORDER + self.definitions = CHARTS + self.collected_dims = {'z': set(), '3stddev': set()} + self.df_mean = pd.DataFrame() + self.df_std = pd.DataFrame() + self.df_z_history = pd.DataFrame() + + def check(self): + _ = get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.') + return True + + def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1): + """If dimension not in chart then add it. + """ + for dim in data: + if dim not in self.collected_dims[chart]: + self.collected_dims[chart].add(dim) + self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor]) + + for dim in list(self.collected_dims[chart]): + if dim not in data: + self.collected_dims[chart].remove(dim) + self.charts[chart].del_dimension(dim, hide=False) + + def train_model(self): + """Calculate the mean and stddev for all relevant metrics and store them for use in calulcating zscore at each timestep. + """ + before = int(datetime.now().timestamp()) - self.offset_secs + after = before - self.train_secs + + self.df_mean = get_data( + self.host, self.charts_in_scope, after, before, points=10, group='average', col_sep='.' + ).mean().to_frame().rename(columns={0: "mean"}) + + self.df_std = get_data( + self.host, self.charts_in_scope, after, before, points=10, group='stddev', col_sep='.' + ).mean().to_frame().rename(columns={0: "std"}) + + def create_data(self, df_allmetrics): + """Use x, mean, stddev to generate z scores and 3stddev flags via some pandas manipulation. + Returning two dictionaries of dimensions and measures, one for each chart. + + :param df_allmetrics : pandas dataframe with latest data from api/v1/allmetrics. + :return: (,) tuple of dictionaries, one for zscores and the other for a flag if abs(z)>3. + """ + # calculate clipped z score for each available metric + df_z = pd.concat([self.df_mean, self.df_std, df_allmetrics], axis=1, join='inner') + df_z['z'] = ((df_z['value'] - df_z['mean']) / df_z['std']).clip(-self.z_clip, self.z_clip).fillna(0) * 100 + if self.z_abs: + df_z['z'] = df_z['z'].abs() + + # append last z_smooth_n rows of zscores to history table in wide format + self.df_z_history = self.df_z_history.append( + df_z[['z']].reset_index().pivot_table(values='z', columns='index'), sort=True + ).tail(self.z_smooth_n) + + # get average zscore for last z_smooth_n for each metric + df_z_smooth = self.df_z_history.melt(value_name='z').groupby('index')['z'].mean().to_frame() + df_z_smooth['3stddev'] = np.where(abs(df_z_smooth['z']) > 300, 1, 0) + data_z = df_z_smooth['z'].add_suffix('_z').to_dict() + + # aggregate to chart level if specified + if self.mode == 'per_chart': + df_z_smooth['chart'] = ['.'.join(x[0:2]) + '_z' for x in df_z_smooth.index.str.split('.').to_list()] + if self.per_chart_agg == 'absmax': + data_z = \ + list(df_z_smooth.groupby('chart').agg({'z': lambda x: max(x, key=abs)})['z'].to_dict().values())[0] + else: + data_z = list(df_z_smooth.groupby('chart').agg({'z': [self.per_chart_agg]})['z'].to_dict().values())[0] + + data_3stddev = {} + for k in data_z: + data_3stddev[k.replace('_z', '')] = 1 if abs(data_z[k]) > 300 else 0 + + return data_z, data_3stddev + + def get_data(self): + + if self.runs_counter <= self.burn_in or self.runs_counter % self.train_every_n == 0: + self.train_model() + + data_z, data_3stddev = self.create_data( + get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.').transpose()) + data = {**data_z, **data_3stddev} + + self.validate_charts('z', data_z, divisor=100) + self.validate_charts('3stddev', data_3stddev) + + return data diff --git a/collectors/python.d.plugin/zscores/zscores.conf b/collectors/python.d.plugin/zscores/zscores.conf new file mode 100644 index 0000000..07d62eb --- /dev/null +++ b/collectors/python.d.plugin/zscores/zscores.conf @@ -0,0 +1,108 @@ +# netdata python.d.plugin configuration for example +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, example also supports the following: +# +# - none +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +local: + name: 'local' + + # what host to pull data from + host: '127.0.0.1:19999' + + # what charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. + charts_regex: 'system\..*' + + # Charts to exclude, useful if you would like to exclude some specific charts. + # Note: should be a ',' separated string like 'chart.name,chart.name'. + charts_to_exclude: 'system.uptime' + + # length of time to base calculations off for mean and stddev + train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore + + # offset preceding latest data to ignore when calculating mean and stddev + offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev + + # recalculate the mean and stddev every n steps of the collector + train_every_n: 900 # recalculate mean and stddev every 15 minutes + + # smooth the z score by averaging it over last n values + z_smooth_n: 15 # take a rolling average of the last 15 zscore values to reduce sensitivity to temporary 'spikes' + + # cap absolute value of zscore (before smoothing) for better stability + z_clip: 10 # cap each zscore at 10 so as to avoid really large individual zscores swamping any rolling average + + # set z_abs: 'true' to make all zscores be absolute values only. + z_abs: 'true' + + # burn in period in which to initially calculate mean and stddev on every step + burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return + + # mode can be to get a zscore 'per_dim' or 'per_chart' + mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step + + # per_chart_agg is how you aggregate from dimension to chart when mode='per_chart' + per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average. diff --git a/collectors/slabinfo.plugin/Makefile.am b/collectors/slabinfo.plugin/Makefile.am new file mode 100644 index 0000000..07796ea --- /dev/null +++ b/collectors/slabinfo.plugin/Makefile.am @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +CLEANFILES = \ + slabinfo.plugin \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/slabinfo.plugin/README.md b/collectors/slabinfo.plugin/README.md new file mode 100644 index 0000000..2f49046 --- /dev/null +++ b/collectors/slabinfo.plugin/README.md @@ -0,0 +1,29 @@ + + +# slabinfo.plugin + +SLAB is a cache mechanism used by the Kernel to avoid fragmentation. + +Each internal structure (process, file descriptor, inode...) is stored within a SLAB. + +## configuring Netdata for slabinfo + +The plugin is disabled by default because it collects and displays a huge amount of metrics. +To enable it set `slabinfo = yes` in the `plugins` section of the `netdata.conf` configuration file. + +There is currently no configuration needed for the plugin itself. + +As `/proc/slabinfo` is only readable by root, this plugin is setuid root. + +## For what use + +This slabinfo details allows to have clues on actions done on your system. +In the following screenshot, you can clearly see a `find` done on a ext4 filesystem (the number of `ext4_inode_cache` & `dentry` are rising fast), and a few seconds later, an admin issued a `echo 3 > /proc/sys/vm/drop_cached` as their count dropped. + +![netdata_slabinfo](https://user-images.githubusercontent.com/9157986/64433811-7f06e500-d0bf-11e9-8e1e-087497e61033.png) + + + diff --git a/collectors/slabinfo.plugin/slabinfo.c b/collectors/slabinfo.plugin/slabinfo.c new file mode 100644 index 0000000..2e47ee2 --- /dev/null +++ b/collectors/slabinfo.plugin/slabinfo.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "daemon/common.h" +#include "libnetdata/required_dummies.h" + +#define PLUGIN_SLABINFO_NAME "slabinfo.plugin" +#define PLUGIN_SLABINFO_PROCFILE "/proc/slabinfo" + +#define CHART_TYPE "mem" +#define CHART_FAMILY "slab" +#define CHART_PRIO 3000 + +// #define slabdebug(...) if (debug) { fprintf(stderr, __VA_ARGS__); } +#define slabdebug(args...) if (debug) { \ + fprintf(stderr, "slabinfo.plugin DEBUG (%04d@%-10.10s:%-15.15s)::", __LINE__, __FILE__, __FUNCTION__); \ + fprintf(stderr, ##args); \ + fprintf(stderr, "\n"); } + +int running = 1; +int debug = 0; +size_t lines_discovered = 0; +int redraw_chart = 0; + +// ---------------------------------------------------------------------------- + +// Slabinfo format : +// format 2.1 Was provided by 57ed3eda977a215f054102b460ab0eb5d8d112e6 (2.6.24-rc6) as: +// seq_puts(m, "# name "); +// seq_puts(m, " : tunables "); +// seq_puts(m, " : slabdata "); +// +// With max values: +// seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", +// cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, sinfo.objects_per_slab, (1 << sinfo.cache_order)); +// seq_printf(m, " : tunables %4u %4u %4u", +// sinfo.limit, sinfo.batchcount, sinfo.shared); +// seq_printf(m, " : slabdata %6lu %6lu %6lu", +// sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); +// +// If CONFIG_DEBUG_SLAB is set, it will also add columns from slabinfo_show_stats (for SLAB only): +// seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu", +// allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees, overflows); +// seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", +// allochit, allocmiss, freehit, freemiss); +// +// Implementation choices: +// - Iterates through a linked list of kmem_cache. +// - Name is a char* from struct kmem_cache (mm/slab.h). +// - max name size found is 24: +// grep -roP 'kmem_cache_create\(".+"'| awk '{split($0,a,"\""); print a[2],length(a[2]); }' | sort -k2 -n +// - Using uint64 everywhere, as types fits and allows to use standard helpers + +struct slabinfo { + // procfile fields + const char *name; + uint64_t active_objs; + uint64_t num_objs; + uint64_t obj_size; + uint64_t obj_per_slab; + uint64_t pages_per_slab; + uint64_t tune_limit; + uint64_t tune_batchcnt; + uint64_t tune_shared_factor; + uint64_t data_active_slabs; + uint64_t data_num_slabs; + uint64_t data_shared_avail; + + // Calculated fields + uint64_t mem_usage; + uint64_t mem_waste; + uint8_t obj_filling; + + uint32_t hash; + struct slabinfo *next; +} *slabinfo_root = NULL, *slabinfo_next = NULL, *slabinfo_last_used = NULL; + +// The code is very inspired from "proc_net_dev.c" and "perf_plugin.c" + +// Get the existing object, or create a new one +static struct slabinfo *get_slabstruct(const char *name) { + struct slabinfo *s; + + slabdebug("--> Requested slabstruct %s", name); + + uint32_t hash = simple_hash(name); + + // Search it, from the next to the end + for (s = slabinfo_next; s; s = s->next) { + if ((hash = s->hash) && !strcmp(name, s->name)) { + slabdebug("<-- Found existing slabstruct after %s", slabinfo_last_used->name); + // Prepare the next run + slabinfo_next = s->next; + slabinfo_last_used = s; + return s; + } + } + + // Search it from the beginning to the last position we used + for (s = slabinfo_root; s != slabinfo_last_used; s = s->next) { + if (hash == s->hash && !strcmp(name, s->name)) { + slabdebug("<-- Found existing slabstruct after root %s", slabinfo_root->name); + slabinfo_next = s->next; + slabinfo_last_used = s; + return s; + } + } + + // Create a new one + s = callocz(1, sizeof(struct slabinfo)); + s->name = strdupz(name); + s->hash = hash; + + // Add it to the current position + if (slabinfo_root) { + slabdebug("<-- Creating new slabstruct after %s", slabinfo_last_used->name); + s->next = slabinfo_last_used->next; + slabinfo_last_used->next = s; + slabinfo_last_used = s; + } + else { + slabdebug("<-- Creating new slabstruct as root"); + slabinfo_root = slabinfo_last_used = s; + } + + return s; +} + + +// Read a full pass of slabinfo to update the structs +struct slabinfo *read_file_slabinfo() { + + slabdebug("-> Reading procfile %s", PLUGIN_SLABINFO_PROCFILE); + + static procfile *ff = NULL; + static long slab_pagesize = 0; + + if (unlikely(!slab_pagesize)) { + slab_pagesize = sysconf(_SC_PAGESIZE); + slabdebug(" Discovered pagesize: %ld", slab_pagesize); + } + + if(unlikely(!ff)) { + ff = procfile_reopen(ff, PLUGIN_SLABINFO_PROCFILE, " ,:" , PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + error("<- Cannot open file '%s", PLUGIN_SLABINFO_PROCFILE); + exit(1); + } + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + error("<- Cannot read file '%s'", PLUGIN_SLABINFO_PROCFILE); + exit(0); + } + + + // Iterate on all lines to populate / update the slabinfo struct + size_t lines = procfile_lines(ff), l; + if (unlikely(lines != lines_discovered)) { + lines_discovered = lines; + redraw_chart = 1; + } + + slabdebug(" Read %lu lines from procfile", (unsigned long)lines); + for(l = 2; l < lines; l++) { + if (unlikely(procfile_linewords(ff, l) < 14)) { + slabdebug(" Line %zu has only %zu words, skipping", l, procfile_linewords(ff,l)); + continue; + } + + char *name = procfile_lineword(ff, l, 0); + struct slabinfo *s = get_slabstruct(name); + + s->active_objs = str2uint64_t(procfile_lineword(ff, l, 1)); + s->num_objs = str2uint64_t(procfile_lineword(ff, l, 2)); + s->obj_size = str2uint64_t(procfile_lineword(ff, l, 3)); + s->obj_per_slab = str2uint64_t(procfile_lineword(ff, l, 4)); + s->pages_per_slab = str2uint64_t(procfile_lineword(ff, l, 5)); + + s->tune_limit = str2uint64_t(procfile_lineword(ff, l, 7)); + s->tune_batchcnt = str2uint64_t(procfile_lineword(ff, l, 8)); + s->tune_shared_factor = str2uint64_t(procfile_lineword(ff, l, 9)); + + s->data_active_slabs = str2uint64_t(procfile_lineword(ff, l, 11)); + s->data_num_slabs = str2uint64_t(procfile_lineword(ff, l, 12)); + s->data_shared_avail = str2uint64_t(procfile_lineword(ff, l, 13)); + + uint32_t memperslab = s->pages_per_slab * slab_pagesize; + // Internal fragmentation: loss per slab, due to objects not being a multiple of pagesize + //uint32_t lossperslab = memperslab - s->obj_per_slab * s->obj_size; + + // Total usage = slabs * pages per slab * page size + s->mem_usage = (uint64_t)(s->data_num_slabs * memperslab); + + // Wasted memory (filling): slabs allocated but not filled: sum total slab - sum total objects + s->mem_waste = s->mem_usage - (uint64_t)(s->active_objs * s->obj_size); + //if (s->data_num_slabs > 1) + // s->mem_waste += s->data_num_slabs * lossperslab; + + + // Slab filling efficiency + if (s->num_objs > 0) + s->obj_filling = 100 * s->active_objs / s->num_objs; + else + s->obj_filling = 0; + + slabdebug(" Updated slab %s: %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" / %"PRIu64" %"PRIu64" %"PRIu64" / %"PRIu64" %"PRIu64" %"PRIu64" / %"PRIu64" %"PRIu64" %hhu", + name, s->active_objs, s->num_objs, s->obj_size, s->obj_per_slab, s->pages_per_slab, + s->tune_limit, s->tune_batchcnt, s->tune_shared_factor, + s->data_active_slabs, s->data_num_slabs, s->data_shared_avail, + s->mem_usage, s->mem_waste, s->obj_filling); + } + + return slabinfo_root; +} + + + +unsigned int do_slab_stats(int update_every) { + + static unsigned int loops = 0; + struct slabinfo *sactive = NULL, *s = NULL; + + // Main processing loop + while (running) { + + sactive = read_file_slabinfo(); + + // Init Charts + if (unlikely(redraw_chart)) { + redraw_chart = 0; + // Memory Usage + printf("CHART %s.%s '' 'Memory Usage' 'B' '%s' '' line %d %d %s\n" + , CHART_TYPE + , "slabmemory" + , CHART_FAMILY + , CHART_PRIO + , update_every + , PLUGIN_SLABINFO_NAME + ); + for (s = sactive; s; s = s->next) { + printf("DIMENSION %s '' absolute 1 1\n", s->name); + } + + // Slab active usage (filling) + printf("CHART %s.%s '' 'Object Filling' '%%' '%s' '' line %d %d %s\n" + , CHART_TYPE + , "slabfilling" + , CHART_FAMILY + , CHART_PRIO + 1 + , update_every + , PLUGIN_SLABINFO_NAME + ); + for (s = sactive; s; s = s->next) { + printf("DIMENSION %s '' absolute 1 1\n", s->name); + } + + // Memory waste + printf("CHART %s.%s '' 'Memory waste' 'B' '%s' '' line %d %d %s\n" + , CHART_TYPE + , "slabwaste" + , CHART_FAMILY + , CHART_PRIO + 2 + , update_every + , PLUGIN_SLABINFO_NAME + ); + for (s = sactive; s; s = s->next) { + printf("DIMENSION %s '' absolute 1 1\n", s->name); + } + } + + + // + // Memory usage + // + printf("BEGIN %s.%s\n" + , CHART_TYPE + , "slabmemory" + ); + for (s = sactive; s; s = s->next) { + printf("SET %s = %"PRIu64"\n" + , s->name + , s->mem_usage + ); + } + printf("END\n"); + + // + // Slab active usage + // + printf("BEGIN %s.%s\n" + , CHART_TYPE + , "slabfilling" + ); + for (s = sactive; s; s = s->next) { + printf("SET %s = %u\n" + , s->name + , s->obj_filling + ); + } + printf("END\n"); + + // + // Memory waste + // + printf("BEGIN %s.%s\n" + , CHART_TYPE + , "slabwaste" + ); + for (s = sactive; s; s = s->next) { + printf("SET %s = %"PRIu64"\n" + , s->name + , s->mem_waste + ); + } + printf("END\n"); + + + loops++; + + sleep(update_every); + } + + return loops; +} + + + + +// ---------------------------------------------------------------------------- +// main + +void usage(void) { + fprintf(stderr, "%s\n", program_name); + exit(1); +} + +int main(int argc, char **argv) { + clocks_init(); + + program_name = argv[0]; + program_version = "0.1"; + error_log_syslog = 0; + + int update_every = 1, i, n, freq = 0; + + for (i = 1; i < argc; i++) { + // Frequency parsing + if(isdigit(*argv[i]) && !freq) { + n = (int) str2l(argv[i]); + if (n > 0) { + if (n >= UPDATE_EVERY_MAX) { + error("Invalid interval value: %s", argv[i]); + exit(1); + } + freq = n; + } + } + else if (strcmp("debug", argv[i]) == 0) { + debug = 1; + continue; + } + else { + fprintf(stderr, + "netdata slabinfo.plugin %s\n" + "This program is a data collector plugin for netdata.\n" + "\n" + "Available command line options:\n" + "\n" + " COLLECTION_FREQUENCY data collection frequency in seconds\n" + " minimum: %d\n" + "\n" + " debug enable verbose output\n" + " default: disabled\n" + "\n", + program_version, + update_every + ); + exit(1); + } + } + + if(freq >= update_every) + update_every = freq; + else if(freq) + error("update frequency %d seconds is too small for slabinfo. Using %d.", freq, update_every); + + + // Call the main function. Time drift to be added + do_slab_stats(update_every); + + return 0; +} diff --git a/collectors/statsd.plugin/Makefile.am b/collectors/statsd.plugin/Makefile.am new file mode 100644 index 0000000..c8144c1 --- /dev/null +++ b/collectors/statsd.plugin/Makefile.am @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) + +statsdconfigdir=$(libconfigdir)/statsd.d +dist_statsdconfig_DATA = \ + example.conf \ + k6.conf \ + asterisk.conf \ + $(NULL) + +userstatsdconfigdir=$(configdir)/statsd.d +dist_userstatsdconfig_DATA = \ + $(NULL) + +# Explicitly install directories to avoid permission issues due to umask +install-exec-local: + $(INSTALL) -d $(DESTDIR)$(userstatsdconfigdir) diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md new file mode 100644 index 0000000..b46ca28 --- /dev/null +++ b/collectors/statsd.plugin/README.md @@ -0,0 +1,699 @@ + + +StatsD is a system to collect data from any application. Applications send metrics to it, usually via non-blocking UDP communication, and StatsD servers collect these metrics, perform a few simple calculations on them and push them to backend time-series databases. + +If you want to learn more about the StatsD protocol, we have written a [blog post](https://www.netdata.cloud/blog/introduction-to-statsd/) about it! + + +Netdata is a fully featured statsd server. It can collect statsd formatted metrics, visualize them on its dashboards and store them in it's database for long-term retention. + +Netdata statsd is inside Netdata (an internal plugin, running inside the Netdata daemon), it is configured via `netdata.conf` and by-default listens on standard statsd port 8125. Netdata supports both TCP and UDP packets at the same time. + +Since statsd is embedded in Netdata, it means you now have a statsd server embedded on all your servers. + +Netdata statsd is fast. It can collect several millions of metrics per second on modern hardware, using just 1 CPU core. The implementation uses two threads: one thread collects metrics, another thread updates the charts from the collected data. + +## Available StatsD synthetic application charts + +Netdata ships with a few synthetic chart definitions to automatically present application metrics into a more uniform way. These synthetic charts are configuration files (you can create your own) that re-arrange statsd metrics into a more meaningful way. + +On synthetic charts, we can have alarms as with any metric and chart. + +- [K6 load testing tool](https://k6.io) + - **Description:** k6 is a developer-centric, free and open-source load testing tool built for making performance testing a productive and enjoyable experience. + - [Documentation](/collectors/statsd.plugin/k6.md) + - [Configuration](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/k6.conf) +- [Asterisk](https://www.asterisk.org/) + - **Description:** Asterisk is an Open Source PBX and telephony toolkit. + - [Documentation](/collectors/statsd.plugin/asterisk.md) + - [Configuration](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/asterisk.conf) + +## Metrics supported by Netdata + +Netdata fully supports the StatsD protocol and also extends it to support more advanced Netdata specific use cases. All StatsD client libraries can be used with Netdata too. + +- **Gauges** + + The application sends `name:value|g`, where `value` is any **decimal/fractional** number, StatsD reports the latest value collected and the number of times it was updated (events). + + The application may increment or decrement a previous value, by setting the first character of the value to `+` or `-` (so, the only way to set a gauge to an absolute negative value, is to first set it to zero). + + [Sampling rate](#sampling-rates) is supported. + [Tags](#tags) are supported for changing chart units, family and dimension name. + + When a gauge is not collected and the setting is not to show gaps on the charts (the default), the last value will be shown, until a data collection event changes it. + +- **Counters** and **Meters** + + The application sends `name:value|c`, `name:value|C` or `name:value|m`, where `value` is a positive or negative **integer** number of events occurred, StatsD reports the **rate** and the number of times it was updated (events). + + `:value` can be omitted and StatsD will assume it is `1`. `|c`, `|C` and `|m` can be omitted and StatsD will assume it is `|m`. So, the application may send just `name` and StatsD will parse it as `name:1|m`. + + - Counters use `|c` (etsy/StatsD compatible) or `|C` (brubeck compatible) + - Meters use `|m` + + [Sampling rate](#sampling-rates) is supported. + [Tags](#tags) are supported for changing chart units, family and dimension name. + + When a counter or meter is not collected, StatsD **defaults** to showing a zero value, until a data collection event changes the value. + +- **Timers** and **Histograms** + + The application sends `name:value|ms` or `name:value|h`, where `value` is any **decimal/fractional** number, StatsD reports **min**, **max**, **average**, **95th percentile**, **median** and **standard deviation** and the total number of times it was updated (events). Internally it also calculates the **sum**, which is available for synthetic charts. + + - Timers use `|ms` + - Histograms use `|h` + + The only difference between the two, is the `units` of the charts, as timers report *milliseconds*. + + [Sampling rate](#sampling-rates) is supported. + [Tags](#tags) are supported for changing chart units and family. + + When a counter or meter is not collected, StatsD **defaults** to showing a zero value, until a data collection event changes the value. + +- **Sets** + + The application sends `name:value|s`, where `value` is anything (**number or text**, leading and trailing spaces are removed), StatsD reports the number of unique values sent and the number of times it was updated (events). + + Sampling rate is **not** supported for Sets. `value` is always considered text (so `01` and `1` are considered different). + + [Tags](#tags) are supported for changing chart units and family. + + When a set is not collected, Netdata **defaults** to showing a zero value, until a data collection event changes the value. + +- **Dictionaries** + + The application sends `name:value|d`, where `value` is anything (**number or text**, leading and trailing spaces are removed), StatsD reports the number of events sent for each `value` and the total times `name` was updated (events). + + Sampling rate is **not** supported for Dictionaries. `value` is always considered text (so `01` and `1` are considered different). + + [Tags](#tags) are supported for changing chart units and family. + + When a set is not collected, Netdata **defaults** to showing a zero value, until a data collection event changes the value. + +#### Sampling Rates + +The application may append `|@sampling_rate`, where `sampling_rate` is a number from `0.0` to `1.0` in order for StatD to extrapolate the value and predict the total for the entire period. If the application reports to StatsD a value for 1/10th of the time, it can append `|@0.1` to the metrics it sends to statsd. + +#### Tags + +The application may append `|#tag1:value1,tag2:value2,tag3:value3` etc, where `tagX` and `valueX` are strings. `:valueX` can be omitted. + +Currently, Netdata uses only 2 tags: + + * `units=string` which sets the units of the chart that is automatically generated + * `family=string` which sets the family of the chart that is automatically generated (the family is the submenu of the dashboard) + * `name=string` which sets the name of the dimension of the chart that is automatically generated (only for counters, meters, gauges) + +Other tags are parsed, but currently are ignored. + +Charts are not updated to change units or dimension names once they are created. So, either send the tags on every event, or use the special `zinit` value to initiaze the charts at the beginning. `zinit` is a special value that can be used on any chart, to have netdata initialize the charts, without actually setting any values to them. So, instead of sending `my.metric:VALUE|c|#units=bytes,name=size` every time, the application can send at the beginning `my.metric:zinit|c|#units=bytes,name=size` and then `my.metric:VALUE|c`. + +#### Overlapping metrics + +Netdata's StatsD server maintains different indexes for each of the metric types supported. This means the same metric `name` may exist under different types concurrently. + +#### How to name your metrics + +A good practice is to name your metrics like `application.operation.metric`, where: + +- `application` is the application name - Netdata will automatically create a dashboard section based on the first keyword of the metrics, so you can have all your applications in different sections. +- `operation` is the operation your application is executing, like `dbquery`, `request`, `response`, etc. +- `metric` is anything you want to name your metric as. Netdata will automatically append the metric type (meter, counter, gauge, set, dictionary, timer, histogram) to the generated chart. + +Using [Tags](#tags) you can also change the submenus of the dashboard, the units of the charts and for meters, counters and gauges, the name of dimension. So, you can have a usable default view without using [Synthetic StatsD charts](#synthetic-statsd-charts) + +#### Multiple metrics per packet + +Netdata accepts multiple metrics per packet if each is terminated with a newline (`\n`) at the end. + +#### TCP packets + +Netdata listens for both TCP and UDP packets. For TCP, is it important to always append `\n` on each metric, as Netdata will use the newline character to detect if a metric is split into multiple TCP packets. + + +#### UDP packets + +When sending multiple metrics over a single UDP message, it is important not to exceed the network MTU, which is usually 1500 bytes. + +Netdata will accept UDP packets up to 9000 bytes, but the underlying network will not exceed MTU. + +> You can read more about the network maximum transmission unit(MTU) in this cloudflare [article](https://www.cloudflare.com/en-gb/learning/network-layer/what-is-mtu/). + +## Configuration + +You can find the configuration at `/etc/netdata/netdata.conf`: + +``` +[statsd] + # enabled = yes + # decimal detail = 1000 + # update every (flushInterval) = 1 + # udp messages to process at once = 10 + # create private charts for metrics matching = * + # max private charts allowed = 200 + # max private charts hard limit = 1000 + # private charts memory mode = save + # private charts history = 3996 + # histograms and timers percentile (percentThreshold) = 95.00000 + # add dimension for number of events received = no + # gaps on gauges (deleteGauges) = no + # gaps on counters (deleteCounters) = no + # gaps on meters (deleteMeters) = no + # gaps on sets (deleteSets) = no + # gaps on histograms (deleteHistograms) = no + # gaps on timers (deleteTimers) = no + # listen backlog = 4096 + # default port = 8125 + # bind to = udp:localhost:8125 tcp:localhost:8125 +``` + +### StatsD main config options + +- `enabled = yes|no` + + controls if StatsD will be enabled for this Netdata. The default is enabled. + +- `default port = 8125` + + controls the default port StatsD will use if no port is defined in the following setting. + +- `bind to = udp:localhost tcp:localhost` + + is a space separated list of IPs and ports to listen to. The format is `PROTOCOL:IP:PORT` - if `PORT` is omitted, the `default port` will be used. If `IP` is IPv6, it needs to be enclosed in `[]`. `IP` can also be `*` (to listen on all IPs) or even a hostname. + +- `update every (flushInterval) = 1` seconds, controls the frequency StatsD will push the collected metrics to Netdata charts. + +- `decimal detail = 1000` controls the number of fractional digits in gauges and histograms. Netdata collects metrics using signed 64-bit integers and their fractional detail is controlled using multipliers and divisors. This setting is used to multiply all collected values to convert them to integers and is also set as the divisors, so that the final data will be a floating point number with this fractional detail (1000 = X.0 - X.999, 10000 = X.0 - X.9999, etc). + +The rest of the settings are discussed below. + +## StatsD charts + +Netdata can visualize StatsD collected metrics in 2 ways: + +1. Each metric gets its own **private chart**. This is the default and does not require any configuration. You can adjust the default parameters. + +2. **Synthetic charts** can be created, combining multiple metrics, independently of their metric types. For this type of charts, special configuration is required, to define the chart title, type, units, its dimensions, etc. + +### Private metric charts + +Private charts are controlled with `create private charts for metrics matching = *`. This setting accepts a space-separated list of [simple patterns](/libnetdata/simple_pattern/README.md). Netdata will create private charts for all metrics **by default**. + +For example, to render charts for all `myapp.*` metrics, except `myapp.*.badmetric`, use: + +``` +create private charts for metrics matching = !myapp.*.badmetric myapp.* +``` + +You can specify Netdata StatsD to have a different `memory mode` than the rest of the Netdata Agent. You can read more about `memory mode` in the [documentation](/database/README.md). + +The default behavior is to use the same settings as the rest of the Netdata Agent. If you wish to change them, edit the following settings: +- `private charts memory mode` +- `private charts history` + +### Optimize private metric charts visualization and storage + +If you have thousands of metrics, each with its own private chart, you may notice that your web browser becomes slow when you view the Netdata dashboard (this is a web browser issue we need to address at the Netdata UI). So, Netdata has a protection to stop creating charts when `max private charts allowed = 200` (soft limit) is reached. + +The metrics above this soft limit are still processed by Netdata, can be used in synthetic charts and will be available to be sent to backend time-series databases, up to `max private charts hard limit = 1000`. So, between 200 and 1000 charts, Netdata will still generate charts, but they will automatically be created with `memory mode = none` (Netdata will not maintain a database for them). These metrics will be sent to backend time series databases, if the backend configuration is set to `as collected`. + +Metrics above the hard limit are still collected, but they can only be used in synthetic charts (once a metric is added to chart, it will be sent to backend servers too). + +Example private charts (automatically generated without any configuration): + +#### Counters + +- Scope: **count the events of something** (e.g. number of file downloads) +- Format: `name:INTEGER|c` or `name:INTEGER|C` or `name|c` +- StatsD increments the counter by the `INTEGER` number supplied (positive, or negative). + +![image](https://cloud.githubusercontent.com/assets/2662304/26131553/4a26d19c-3aa3-11e7-94e8-c53b5ed6ebc3.png) + +#### Gauges + +- Scope: **report the value of something** (e.g. cache memory used by the application server) +- Format: `name:FLOAT|g` +- StatsD remembers the last value supplied, and can increment or decrement the latest value if `FLOAT` begins with `+` or `-`. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131575/5d54e6f0-3aa3-11e7-9099-bc4440cd4592.png) + +#### histograms + +- Scope: **statistics on a size of events** (e.g. statistics on the sizes of files downloaded) +- Format: `name:FLOAT|h` +- StatsD maintains a list of all the values supplied and provides statistics on them. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131587/704de72a-3aa3-11e7-9ea9-0d2bb778c150.png) + +The same chart with `sum` unselected, to show the detail of the dimensions supported: +![image](https://cloud.githubusercontent.com/assets/2662304/26131598/8076443a-3aa3-11e7-9ffa-ea535aee9c9f.png) + +#### Meters + +This is identical to `counter`. + +- Scope: **count the events of something** (e.g. number of file downloads) +- Format: `name:INTEGER|m` or `name|m` or just `name` +- StatsD increments the counter by the `INTEGER` number supplied (positive, or negative). + +![image](https://cloud.githubusercontent.com/assets/2662304/26131605/8fdf5a06-3aa3-11e7-963f-7ecf207d1dbc.png) + +#### Sets + +- Scope: **count the unique occurrences of something** (e.g. unique filenames downloaded, or unique users that downloaded files) +- Format: `name:TEXT|s` +- StatsD maintains a unique index of all values supplied, and reports the unique entries in it. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131612/9eaa7b1a-3aa3-11e7-903b-d881e9a35be2.png) + +#### Timers + +- Scope: **statistics on the duration of events** (e.g. statistics for the duration of file downloads) +- Format: `name:FLOAT|ms` +- StatsD maintains a list of all the values supplied and provides statistics on them. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131629/bc34f2d2-3aa3-11e7-8a07-f2fc94ba4352.png) + +### Synthetic StatsD charts + +Use synthetic charts to create dedicated sections on the dashboard to render your StatsD charts. + +Synthetic charts are organized in + +- **application** aka section in Netdata Dashboard. +- **charts for each application** aka family in Netdata Dashboard. +- **StatsD metrics for each chart** /aka charts and context Netdata Dashboard. + +> You can read more about how the Netdata Agent organizes information in the relevant [documentation](/web/README.md) + +For each application you need to create a `.conf` file in `/etc/netdata/statsd.d`. + +For example, if you want to monitor the application `myapp` using StatsD and Netdata, create the file `/etc/netdata/statsd.d/myapp.conf`, with this content: +``` +[app] + name = myapp + metrics = myapp.* + private charts = no + gaps when not collected = no + history = 60 +# memory mode = ram + +[dictionary] + m1 = metric1 + m2 = metric2 + +# replace 'mychart' with the chart id +# the chart will be named: myapp.mychart +[mychart] + name = mychart + title = my chart title + family = my family + context = chart.context + units = tests/s + priority = 91000 + type = area + dimension = myapp.metric1 m1 + dimension = myapp.metric2 m2 +``` + +Using the above configuration `myapp` should get its own section on the dashboard, having one chart with 2 dimensions. + +`[app]` starts a new application definition. The supported settings in this section are: + +- `name` defines the name of the app. +- `metrics` is a Netdata [simple pattern](/libnetdata/simple_pattern/README.md). This pattern should match all the possible StatsD metrics that will be participating in the application `myapp`. +- `private charts = yes|no`, enables or disables private charts for the metrics matched. +- `gaps when not collected = yes|no`, enables or disables gaps on the charts of the application in case that no metrics are collected. +- `memory mode` sets the memory mode for all charts of the application. The default is the global default for Netdata (not the global default for StatsD private charts). We suggest not to use this (we have commented it out in the example) and let your app use the global default for Netdata, which is our dbengine. + +- `history` sets the size of the round robin database for this application. The default is the global default for Netdata (not the global default for StatsD private charts). This is only relevant if you use `memory mode = save`. Read more on our [metrics storage(]/docs/store/change-metrics-storage.md) doc. + +`[dictionary]` defines name-value associations. These are used to renaming metrics, when added to synthetic charts. Metric names are also defined at each `dimension` line. However, using the dictionary dimension names can be declared globally, for each app and is the only way to rename dimensions when using patterns. Of course the dictionary can be empty or missing. + +Then, add any number of charts. Each chart should start with `[id]`. The chart will be called `app_name.id`. `family` controls the submenu on the dashboard. `context` controls the alarm templates. `priority` controls the ordering of the charts on the dashboard. The rest of the settings are informational. + +Add any number of metrics to a chart, using `dimension` lines. These lines accept 5 space separated parameters: + +1. the metric name, as it is collected (it has to be matched by the `metrics =` pattern of the app) +2. the dimension name, as it should be shown on the chart +3. an optional selector (type) of the value to shown (see below) +4. an optional multiplier +5. an optional divider +6. optional flags, space separated and enclosed in quotes. All the external plugins `DIMENSION` flags can be used. Currently the only usable flag is `hidden`, to add the dimension, but not show it on the dashboard. This is usually needed to have the values available for percentage calculation, or use them in alarms. + +So, the format is this: + +``` +dimension = [pattern] METRIC NAME TYPE MULTIPLIER DIVIDER OPTIONS +``` + +`pattern` is a keyword. When set, `METRIC` is expected to be a Netdata [simple pattern](/libnetdata/simple_pattern/README.md) that will be used to match all the StatsD metrics to be added to the chart. So, `pattern` automatically matches any number of StatsD metrics, all of which will be added as separate chart dimensions. + +`TYPE`, `MULTIPLIER`, `DIVIDER` and `OPTIONS` are optional. + +`TYPE` can be: + +- `events` to show the number of events received by StatsD for this metric +- `last` to show the last value, as calculated at the flush interval of the metric (the default) + +Then for histograms and timers the following types are also supported: + +- `min`, show the minimum value +- `max`, show the maximum value +- `sum`, show the sum of all values +- `average` (same as `last`) +- `percentile`, show the 95th percentile (or any other percentile, as configured at StatsD global config) +- `median`, show the median of all values (i.e. sort all values and get the middle value) +- `stddev`, show the standard deviation of the values + +#### Example synthetic charts + +StatsD metrics: `foo` and `bar`. + +Contents of file `/etc/netdata/stats.d/foobar.conf`: + +``` +[app] + name = foobarapp + metrics = foo bar + private charts = yes + +[foobar_chart1] + title = Hey, foo and bar together + family = foobar_family + context = foobarapp.foobars + units = foobars + type = area + dimension = foo 'foo me' last 1 1 + dimension = bar 'bar me' last 1 1 +``` + +Metrics sent to statsd: `foo:10|g` and `bar:20|g`. + +Private charts: + +![screenshot from 2017-08-03 23-28-19](https://user-images.githubusercontent.com/2662304/28942295-7c3a73a8-78a3-11e7-88e5-a9a006bb7465.png) + +Synthetic chart: + +![screenshot from 2017-08-03 23-29-14](https://user-images.githubusercontent.com/2662304/28942317-958a2c68-78a3-11e7-853f-32850141dd36.png) + +#### Renaming StatsD synthetic charts' metrics + +You can define a dictionary to rename metrics sent by StatsD clients. This enables you to send response `"200"` and Netdata visualize it as `succesful connection` + +The `[dictionary]` section accepts any number of `name = value` pairs. + +Netdata uses this dictionary as follows: + +1. When a `dimension` has a non-empty `NAME`, that name is looked up at the dictionary. + +2. If the above lookup gives nothing, or the `dimension` has an empty `NAME`, the original StatsD metric name is looked up at the dictionary. + +3. If any of the above succeeds, Netdata uses the `value` of the dictionary, to set the name of the dimension. The dimensions will have as ID the original StatsD metric name, and as name, the dictionary value. + +Use the dictionary in 2 ways: + +1. set `dimension = myapp.metric1 ''` and have at the dictionary `myapp.metric1 = metric1 name` +2. set `dimension = myapp.metric1 'm1'` and have at the dictionary `m1 = metric1 name` + +In both cases, the dimension will be added with ID `myapp.metric1` and will be named `metric1 name`. So, in alarms use either of the 2 as `${myapp.metric1}` or `${metric1 name}`. + +> keep in mind that if you add multiple times the same StatsD metric to a chart, Netdata will append `TYPE` to the dimension ID, so `myapp.metric1` will be added as `myapp.metric1_last` or `myapp.metric1_events`, etc. If you add multiple times the same metric with the same `TYPE` to a chart, Netdata will also append an incremental counter to the dimension ID, i.e. `myapp.metric1_last1`, `myapp.metric1_last2`, etc. + +#### Dimension patterns + +Netdata allows adding multiple dimensions to a chart, by matching the StatsD metrics with a Netdata simple pattern. + +Assume we have an API that provides StatsD metrics for each response code per method it supports, like these: + +``` +myapp.api.get.200 +myapp.api.get.400 +myapp.api.get.500 +myapp.api.del.200 +myapp.api.del.400 +myapp.api.del.500 +myapp.api.post.200 +myapp.api.post.400 +myapp.api.post.500 +myapp.api.all.200 +myapp.api.all.400 +myapp.api.all.500 +``` + +In order to add all the response codes of `myapp.api.get` to a chart, we simply make the following configuration: + +``` +[api_get_responses] + ... + dimension = pattern 'myapp.api.get.* '' last 1 1 +``` + +The above will add dimension named `200`, `400` and `500`. Netdata extracts the wildcard part of the metric name - so the dimensions will be named with whatever the `*` matched. + +You can rename the dimensions with this: + +``` +[dictionary] + get.200 = 200 ok + get.400 = 400 bad request + get.500 = 500 cannot connect to db + +[api_get_responses] + ... + dimension = pattern 'myapp.api.get.* 'get.' last 1 1 +``` + +Note that we added a `NAME` to the dimension line with `get.`. This is prefixed to the wildcarded part of the metric name, to compose the key for looking up the dictionary. So `500` became `get.500` which was looked up to the dictionary to find value `500 cannot connect to db`. This way we can have different dimension names, for each of the API methods (i.e. `get.500 = 500 cannot connect to db` while `post.500 = 500 cannot write to disk`). + +To add all 200s across all API methods to a chart, you can do this: + +``` +[ok_by_method] + ... + dimension = pattern 'myapp.api.*.200 '' last 1 1 +``` + +The above will add `get`, `post`, `del` and `all` to the chart. + +If `all` is not wanted (a `stacked` chart does not need the `all` dimension, since the sum of the dimensions provides the total), the line should be: + +``` +[ok_by_method] + ... + dimension = pattern '!myapp.api.all.* myapp.api.*.200 '' last 1 1 +``` + +With the above, all methods except `all` will be added to the chart. + +To automatically rename the methods, you can use this: + +``` +[dictionary] + method.get = GET + method.post = ADD + method.del = DELETE + +[ok_by_method] + ... + dimension = pattern '!myapp.api.all.* myapp.api.*.200 'method.' last 1 1 +``` + +Using the above, the dimensions will be added as `GET`, `ADD` and `DELETE`. + +## StatsD examples + +### Python + +It's really easy to instrument your python application with StatsD, for example using [jsocol/pystatsd](https://github.com/jsocol/pystatsd). + +```python +import statsd +c = statsd.StatsClient('localhost', 8125) +c.incr('foo') # Increment the 'foo' counter. +for i in range(100000000): + c.incr('bar') + c.incr('foo') + if i % 3: + c.decr('bar') + c.timing('stats.timed', 320) # Record a 320ms 'stats.timed'. +``` + +You can find detailed documentation in their [documentation page](https://statsd.readthedocs.io/en/v3.3/). + +### Javascript and Node.js + +Using the client library by [sivy/node-statsd](https://github.com/sivy/node-statsd), you can easily embed StatsD into your Node.js project. + +```javascript + var StatsD = require('node-statsd'), + client = new StatsD(); + + // Timing: sends a timing command with the specified milliseconds + client.timing('response_time', 42); + + // Increment: Increments a stat by a value (default is 1) + client.increment('my_counter'); + + // Decrement: Decrements a stat by a value (default is -1) + client.decrement('my_counter'); + + // Using the callback + client.set(['foo', 'bar'], 42, function(error, bytes){ + //this only gets called once after all messages have been sent + if(error){ + console.error('Oh noes! There was an error:', error); + } else { + console.log('Successfully sent', bytes, 'bytes'); + } + }); + + // Sampling, tags and callback are optional and could be used in any combination + client.histogram('my_histogram', 42, 0.25); // 25% Sample Rate + client.histogram('my_histogram', 42, ['tag']); // User-defined tag + client.histogram('my_histogram', 42, next); // Callback + client.histogram('my_histogram', 42, 0.25, ['tag']); + client.histogram('my_histogram', 42, 0.25, next); + client.histogram('my_histogram', 42, ['tag'], next); + client.histogram('my_histogram', 42, 0.25, ['tag'], next); +``` +### Other languages + +You can also use StatsD with: +- Golang, thanks to [alexcesaro/statsd](https://github.com/alexcesaro/statsd) +- Ruby, thanks to [reinh/statsd](https://github.com/reinh/statsd) +- Java, thanks to [DataDog/java-dogstatsd-client](https://github.com/DataDog/java-dogstatsd-client) + + +### Shell + +Getting the proper support for a programming language is not always easy, but the Unix shell is available on most Unix systems. You can use shell and `nc` to instrument your systems and send metric data to Netdata's StatsD implementation. + +Using the method you can send metrics from any script. You can generate events like: backup.started, backup.ended, backup.time, or even tail logs and convert them to metrics. + +> **IMPORTANT**: +> +> To send StatsD messages you need from the `netcat` package, the `nc` command. +> There are multiple versions of this package. Please try to experiment with the `nc` command you have available on your right system, to find the right parameters. +> +> In the examples below, we assume the `openbsd-netcat` is installed. + +If you plan to send short StatsD events at sporadic occasions, use UDP. The messages should not be too long (remember, most networks support up to 1500 bytes MTU, which is also the limit for StatsD messages over UDP). The good thing is that using UDP will not block your script, even if the StatsD server is not there (UDP messages are "fire-and-forget"). + + +For UDP use this: + +```sh +echo "APPLICATION.METRIC:VALUE|TYPE" | nc -u -w 0 localhost 8125 +``` + +`-u` turns on UDP, `-w 0` tells `nc` not to wait for a response from StatsD (idle time to close the connection). + +where: + +- `APPLICATION` is any name for your application +- `METRIC` is the name for the specific metric +- `VALUE` is the value for that metric (**meters**, **counters**, **gauges**, **timers** and **histograms** accept integer/decimal/fractional numbers, **sets** and **dictionaries** accept strings) +- `TYPE` is one of `m`, `c`, `g`, `ms`, `h`, `s`, `d` to define the metric type. + +For tailing a log and converting it to metrics, do something like this: + +```sh +tail -f some.log | awk 'awk commands to parse the log and format statsd metrics' | nc -N -w 120 localhost 8125 +``` + +`-N` tells `nc` to close the socket once it receives EOF on its input. `-w 120` tells `nc` to stop if the connection is idle for 120 seconds. The timeout is needed to stop the `nc` command if you restart Netdata while `nc` is connected to it. Without it, `nc` will sit idle forever. + +When you embed the above commands to a script, you may notice that all the metrics are sent to StatsD with a delay. They are buffered in the pipes `|`. You can turn them to real-time by prepending each command with `stdbuf -i0 -oL -eL command to be run`, like this: + +```sh +stdbuf -i0 -oL -eL tail -f some.log |\ + stdbuf -i0 -oL -eL awk 'awk commands to parse the log and format statsd metrics' |\ + stdbuf -i0 -oL -eL nc -N -w 120 localhost 8125 +``` + +If you use `mawk` you also need to run awk with `-W interactive`. + +Examples: + +To set `myapp.used_memory` as gauge to value `123456`, use: + +```sh +echo "myapp.used_memory:123456|g|#units:bytes" | nc -u -w 0 localhost 8125 +``` + +To increment `myapp.files_sent` by `10`, as a counter, use: + +```sh +echo "myapp.files_sent:10|c|#units:files" | nc -u -w 0 localhost 8125 +``` + +You can send multiple metrics like this: + +```sh +# send multiple metrics via UDP +printf "myapp.used_memory:123456|g|#units:bytes\nmyapp.files_sent:10|c|#units:files\n" | nc -u -w 0 localhost 8125 +``` + +Remember, for UDP communication each packet should not exceed the MTU. So, if you plan to push too many metrics at once, prefer TCP communication: + +```sh +# send multiple metrics via TCP +cat /tmp/statsd.metrics.txt | nc -N -w 120 localhost 8125 +``` + +You can also use this little function to take care of all the details: + +```sh +#!/usr/bin/env bash + +# we assume nc is from the openbsd-netcat package + +STATSD_HOST="localhost" +STATSD_PORT="8125" +statsd() { + local options="-u -w 0" all="${*}" + + # replace all spaces with newlines + all="${all// /\\n}" + + # if the string length of all parameters given is above 1000, use TCP + [ "${#all}" -gt 1000 ] && options="-N -w 0" + + # send the metrics to statsd + printf "${all}\n" | nc ${options} ${STATSD_HOST} ${STATSD_PORT} || return 1 + + return 0 +} + +if [ ! -z "${*}" ] +then + statsd "${@}" +fi +``` + +You can use it like this: + +```sh +# first, source it in your script +source statsd.sh + +# then, at any point: +statsd "myapp.used_memory:123456|g|#units:bytes" "myapp.files_sent:10|c|#units:files" ... +``` + +or even at a terminal prompt, like this: + +```sh +./statsd.sh "myapp.used_memory:123456|g|#units:bytes" "myapp.files_sent:10|c|#units:files" ... +``` + +The function is smart enough to call `nc` just once and pass all the metrics to it. It will also automatically switch to TCP if the metrics to send are above 1000 bytes. + +If you have gotten thus far, make sure to check out our [community forums](https://community.netdata.cloud) to share your experience using Netdata with StatsD. diff --git a/collectors/statsd.plugin/asterisk.conf b/collectors/statsd.plugin/asterisk.conf new file mode 100644 index 0000000..160b80f --- /dev/null +++ b/collectors/statsd.plugin/asterisk.conf @@ -0,0 +1,208 @@ +[app] + name = asterisk + metrics = asterisk.* + private charts = yes + gaps when not collected = no + +[dictionary] + # https://www.voip-info.org/asterisk-variable-hangupcause/ + q931.1 = unallocated 1 + q931.2 = no route transit net 2 + q931.3 = no route destination 3 + q931.6 = channel unacceptable 6 + q931.7 = call awarded delivered 7 + q931.16 = normal 16 + q931.17 = busy 17 + q931.18 = no response 18 + q931.19 = no answer 19 + q931.21 = rejected call 21 + q931.22 = number changed 22 + q931.27 = dst out of order 27 + q931.28 = invalid number 28 + q931.29 = rejected facility 29 + q931.30 = response to status 30 + q931.31 = normal unspecified 31 + q931.34 = congestion circuit 34 + q931.38 = net out of order 38 + q931.41 = normal tmp fail 41 + q931.42 = congestion switch 42 + q931.43 = access info discarded 43 + q931.44 = requested chan unavail 44 + q931.45 = pre empted 45 + q931.47 = resource unavailable, unspecified 47 + q931.50 = facility not subscribed 50 + q931.52 = outgoing call barred 52 + q931.54 = incoming call barred 54 + q931.57 = bearer capability not auth 57 + q931.58 = bearer capability not avail 58 + q931.65 = bearer capability not implemented 65 + q931.66 = chan not implemented 66 + q931.69 = facility not implemented 67 + q931.81 = invalid call reference 81 + q931.88 = incompatible destination 88 + q931.95 = invalid msg specified 95 + q931.96 = mandatory ie missing 96 + q931.97 = message type non exist 97 + q931.98 = wrong message 98 + q931.99 = ie non exist 99 + q931.100 = invalid ie contents 100 + q931.101 = wrong call state 101 + q931.102 = recovery on timer expire 102 + q931.103 = mandatory ie length error 103 + q931.111 = protocol error 111 + q931.127 = interworking 127 + + +[channels] + name = channels + title = Active Channels + family = channels + context = asterisk.channels + units = channels + priority = 91000 + type = stacked + dimension = pattern asterisk.channels.count 'channels' last 1 1 + # FIXME: netdata needs to prevent this from going negative + +[endpoints] + name = endpoints + title = Active Endpoints + family = endpoints + context = asterisk.endpoints + units = endpoints + priority = 91005 + type = stacked + dimension = pattern asterisk.endpoints.count 'endpoints' last 1 1 + +[endpoints_by_status] + name = endpoints_by_status + title = Active Endpoints by Status + family = endpoints + context = asterisk.endpoints_by_status + units = endpoints + priority = 91006 + type = stacked + dimension = pattern asterisk.endpoints.state.* '' last 1 1 + +[sip_channels_by_endpoint] + name = sip_channels_by_endpoint + title = Active SIP channels by endpoint + family = channels + context = asterisk.sip_channels_by_endpoint + units = channels + priority = 91110 + type = stacked + dimension = pattern asterisk.endpoints.SIP.*.channels '' last 1 1 + +[pjsip_channels_by_endpoint] + name = pjsip_channels_by_endpoint + title = Active PJSIP channels by endpoint + family = channels + context = asterisk.pjsip_channels_by_endpoint + units = channels + priority = 91111 + type = stacked + dimension = pattern asterisk.endpoints.PJSIP.*.channels '' last 1 1 + +[dialstatuses] + name = dialstatuses + title = Distribution of Dial Statuses + family = dial_statuses + context = asterisk.dialstatus + units = calls + priority = 91150 + type = stacked + dimension = pattern 'asterisk.dialstatus.*' '' last 1 1 + +[calltime] + name = calltime + title = Asterisk Channels Call Duration + family = calltime + context = asterisk.calltime + units = milliseconds + priority = 91160 + type = stacked + dimension = asterisk.channels.calltime 'calltime' average 1 1 + dimension = asterisk.channels.calltime 'sum' sum 1 1 hidden + dimension = asterisk.channels.calltime 'count' events 1 1 hidden + +[hangupcause] + name = hangupcause + title = Distribution of Hangup Causes + family = hangup_causes + context = asterisk.hangupcause + units = calls + priority = 91200 + type = stacked + dimension = pattern 'asterisk.hangupcause.*' 'q931.' last 1 1 + +[hangupcause_answer] + name = hangupcause_answer + title = Distribution of Hangup Causes for ANSWERed calls + family = hangup_causes + context = asterisk.hangupcause_answer + units = calls + priority = 91210 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.ANSWER.*' 'q931.' last 1 1 + +[hangupcause_busy] + name = hangupcause_busy + title = Distribution of Hangup Causes for BUSY calls + family = hangup_causes + context = asterisk.hangupcause_busy + units = calls + priority = 91215 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.BUSY.*' 'q931.' last 1 1 + +[hangupcause_cancel] + name = hangupcause_cancel + title = Distribution of Hangup Causes for CANCELled calls + family = hangup_causes + context = asterisk.hangupcause_cancel + units = calls + priority = 91220 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.CANCEL.*' 'q931.' last 1 1 + +[hangupcause_chanunavail] + name = hangupcause_chanunavail + title = Distribution of Hangup Causes for CHANUNVAILed calls + family = hangup_causes + context = asterisk.hangupcause_chanunavail + units = calls + priority = 91230 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.CHANUNAVAIL.*' 'q931.' last 1 1 + +[hangupcause_congestion] + name = hangupcause_congestion + title = Distribution of Hangup Causes for CONGESTIONed calls + family = hangup_causes + context = asterisk.hangupcause_congestion + units = calls + priority = 91240 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.CONGESTION.*' 'q931.' last 1 1 + +[events] + name = events + title = Asterisk Dialplan Events + family = events + context = asterisk.events + units = events/s + priority = 91400 + type = stacked + dimension = pattern 'asterisk.stasis.message.ast_channel_*_type' '' last 1 1 + +[qualify] + name = qualify + title = Asterisk PJSIP Peers Qualify + family = qualify + context = asterisk.qualify + units = milliseconds + priority = 91500 + type = stacked + dimension = pattern 'asterisk.PJSIP.contacts.*.rtt' '' max 1 1 + # FIXME: netdata needs to set update every = 15 on this diff --git a/collectors/statsd.plugin/asterisk.md b/collectors/statsd.plugin/asterisk.md new file mode 100644 index 0000000..94da94e --- /dev/null +++ b/collectors/statsd.plugin/asterisk.md @@ -0,0 +1,61 @@ + + +# Asterisk monitoring with Netdata + +Monitors [Asterisk](https://www.asterisk.org/) dialplan application's statistics. + +## Requirements + +- Asterisk [integrated with StatsD](https://www.asterisk.org/integrating-asterisk-with-statsd/). + +## Configuration + +Netdata ships +with [asterisk.conf](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/asterisk.conf) with +preconfigured charts. + +To receive Asterisk metrics in Netdata, uncomment the following lines in the `/etc/asterisk/statsd.conf` file: + +```ini +[general] +enabled = yes ; When set to yes, statsd support is enabled +server = 127.0.0.1 ; server[:port] of statsd server to use. + ; If not specified, the port is 8125 +prefix = asterisk ; Prefix to prepend to all metrics +``` + +> See [statsd.conf.sample](https://github.com/asterisk/asterisk/blob/master/configs/samples/statsd.conf.sample) for all available options. + +## Charts and metrics + +
Click to see screenshots of the charts. + +![image](https://user-images.githubusercontent.com/2662304/158055351-fcc7a7fb-9b95-4656-bdc6-2e5f5a909215.png) +![image](https://user-images.githubusercontent.com/2662304/158055367-cfd25cd5-d71a-4bab-8cd1-bfcc47bc7312.png) + +
+ +Mapping Asterisk StatsD metrics and Netdata charts. + +| Chart | Metrics | +|------------------------------------------------------|--------------------------------------------| +| Active Channels | asterisk.channels.count | +| Active Endpoints | asterisk.endpoints.count | +| Active Endpoints by Status | asterisk.endpoints.state.* | +| Active SIP channels by endpoint | asterisk.endpoints.SIP.*.channels | +| Active PJSIP channels by endpoint | asterisk.endpoints.PJSIP.*.channels | +| Distribution of Dial Statuses | asterisk.dialstatus.* | +| Asterisk Channels Call Duration | asterisk.channels.calltime | +| Distribution of Hangup Causes | asterisk.hangupcause.* | +| Distribution of Hangup Causes for ANSWERed calls | asterisk.dialhangupcause.ANSWER.* | +| Distribution of Hangup Causes for BUSY calls | asterisk.dialhangupcause.BUSY.* | +| Distribution of Hangup Causes for CANCELled calls | asterisk.dialhangupcause.CANCEL.* | +| Distribution of Hangup Causes for CHANUNVAILed calls | asterisk.dialhangupcause.CHANUNAVAIL.* | +| Distribution of Hangup Causes for CONGESTIONed calls | asterisk.dialhangupcause.CONGESTION.* | +| Asterisk Dialplan Events | asterisk.stasis.message.ast_channel_*_type | +| Asterisk PJSIP Peers Qualify | asterisk.PJSIP.contacts.*.rtt | diff --git a/collectors/statsd.plugin/example.conf b/collectors/statsd.plugin/example.conf new file mode 100644 index 0000000..2c7de6c --- /dev/null +++ b/collectors/statsd.plugin/example.conf @@ -0,0 +1,64 @@ +# statsd synthetic charts configuration + +# You can add many .conf files in /etc/netdata/statsd.d/, +# one for each of your apps. + +# start a new app - you can add many apps in the same file +[app] + # give a name for this app + # this controls the main menu on the dashboard + # and will be the prefix for all charts of the app + name = myexampleapp + + # match all the metrics of the app + metrics = myexampleapp.* + + # shall private charts of these metrics be created? + private charts = no + + # shall gaps be shown when metrics are not collected? + gaps when not collected = no + + # the memory mode for the charts of this app: none|map|save + # the default is to use the global memory mode + #memory mode = ram + + # the history size for the charts of this app, in seconds + # the default is to use the global history + #history = 3600 + +# create a chart +# this is its id - the chart will be named myexampleapp.myexamplechart +[myexamplechart] + # a name for the chart, similar to the id (2 names for each chart) + name = myexamplechart + + # the chart title + title = my chart title + + # the submenu of the dashboard + family = my family + + # the context for alarm templates + context = chart.context + + # the units of the chart + units = tests/s + + # the sorting priority of the chart on the dashboard + priority = 91000 + + # the type of chart to create: line | area | stacked + type = area + + # one or more dimensions for the chart + # type = events | last | min | max | sum | average | percentile | median | stddev + # events = the number of events for this metric + # last = the last value collected + # all the others are only valid for histograms and timers + dimension = myexampleapp.metric1 avg average 1 1 + dimension = myexampleapp.metric1 lower min 1 1 + dimension = myexampleapp.metric1 upper max 1 1 + dimension = myexampleapp.metric2 other last 1 1 + +# You can add as many charts as needed diff --git a/collectors/statsd.plugin/k6.conf b/collectors/statsd.plugin/k6.conf new file mode 100644 index 0000000..3bef00c --- /dev/null +++ b/collectors/statsd.plugin/k6.conf @@ -0,0 +1,110 @@ +[app] + name = k6 + metrics = k6* + private charts = no + gaps when not collected = yes + +[dictionary] + http_reqs = HTTP Requests + http_reqs_failed = Failed HTTP Requests + vus = Virtual active users + vus_max = max Virtual active users + iteration_duration = iteration duration + iteration_duration_max = max iteration duration + iteration_duration_min = min iteration duration + iteration_duration_avg = avg iteration duration + dropped_iterations = Dropped iterations + http_req_blocked = Blocked HTTP requests + http_req_connecting = Connecting HTTP requests + http_req_sending = Sending HTTP requests + http_req_receiving = Receiving HTTP requests + http_req_waiting = Waiting HTTP requests + http_req_duration_median = Median HTTP req duration + http_req_duration_average = Avg HTTP req duration + http_req_duration = HTTP req duration + http_req_duration_max = max HTTP req duration + http_req_duration_min = min HTTP req duration + http_req_duration_p95 = 95 percentile of HTTP req duration + data_received = Received data + data_sent = Sent data + + +[http_reqs] + name = http_reqs + title = HTTP Requests rate + family = http requests + context = k6.http_requests + dimension = k6.http_reqs http_reqs last 1 1 sum + type = line + units = requests/s + +[http_reqs] + name = http_reqs_failed + title = Failed HTTP Requests rate + family = http requests + context = k6.http_requests + dimension = k6.http_reqs_failed http_reqs_failed last 1 1 sum + type = line + units = requests/s + +[vus] + name = vus + title = Virtual Active Users + family = k6_metrics + dimension = k6.vus vus last 1 1 + dimension = k6.vus_max vus_max last 1 1 + type = line + units = vus + +[iteration_duration] + name = iteration_duration_2 + title = Iteration duration + family = k6_metrics + dimension = k6.iteration_duration iteration_duration last 1 1 + dimension = k6.iteration_duration iteration_duration_max max 1 1 + dimension = k6.iteration_duration iteration_duration_min min 1 1 + dimension = k6.iteration_duration iteration_duration_avg average 1 1 + type = line + units = s + +[dropped_iterations] + name = dropped_iterations + title = Dropped Iterations + family = k6_metrics + dimension = k6.dropped_iterations dropped_iterations last 1 1 + units = iterations + type = line + +[data] + name = data + title = K6 Data + family = k6_metrics + dimension = k6.data_received data_received last 1 1 + dimension = k6.data_sent data_sent last -1 1 + units = kb/s + type = area + +[http_req_duration_types] + name = http_req_duration_types + title = HTTP Requests total duration + family = http requests + dimension = k6.http_req_sending http_req_sending last 1 1 + dimension = k6.http_req_waiting http_req_waiting last 1 1 + dimension = k6.http_req_receiving http_req_receiving last 1 1 + dimension = k6.http_req_blocked http_req_blocked last 1 1 + dimension = k6.http_req_connecting http_req_connecting last 1 1 + units = ms + type = stacked + +[http_req_duration] + name = http_req_duration + title = HTTP duration metrics + family = http requests + dimension = k6.http_req_duration http_req_duration_median median 1 1 + dimension = k6.http_req_duration http_req_duration_max max 1 1 + dimension = k6.http_req_duration http_req_duration_average average 1 1 + dimension = k6.http_req_duration http_req_duration_min min 1 1 + dimension = k6.http_req_duration http_req_duration_p95 percentile 1 1 + dimension = k6.http_req_duration http_req_duration last 1 1 + units = ms + type = line diff --git a/collectors/statsd.plugin/k6.md b/collectors/statsd.plugin/k6.md new file mode 100644 index 0000000..4f8c701 --- /dev/null +++ b/collectors/statsd.plugin/k6.md @@ -0,0 +1,76 @@ + + +# K6 Load Testing monitoring with Netdata + +Monitors the impact of load testing experiments performed with [K6](https://k6.io/). + +You can read more about the metrics that K6 sends in the [K6 documentation](https://k6.io/docs/using-k6/metrics/). + +## Requirements + +- When running the k6 experiment, specify a [StatsD output](https://k6.io/docs/results-visualization/statsd/). + - Tip: K6 currently supports tags only with [datadog output](https://k6.io/docs/results-visualization/datadog/), which is in essence StatsD. Netdata can be used with both. + +## Metrics + +![image](https://user-images.githubusercontent.com/13405632/117691411-8a7baf00-b1c4-11eb-9d87-8e9e7214871f.png) + + +### HTTP Requests + +Number of HTTP requests that K6 generates, per second. + +### Failed HTTP Requests + +Number of failed HTTP requests that K6 generates, per second. + +### Virtual Active Users +Current number of active virtual users. + +### Iteration Duration + +The time it took K6 to complete one full iteration of the main function. + +### Dropped Iterations + +The number of iterations that could not be started either due to lack of Virtual Users or lack of time. + +### Data + +The amount of data received and sent. + +### TTP Requests total duration + +The total duration it took for a round-trip of an HTTP request. It includes: +- Blocked HTTP requests: time spent blocked before initiating the request +- Connecting HTTP requests: time spent establishing TCP connection to the remote host +- Sending HTTP requests: time spent sending data to the remote host +- Receiving HTTP requests: time spent receiving data from the remote host +- Waiting HTTP requests: time spent waiting for response from the remote host + +### HTTP duration metrics + +Different metrics on the HTTP request as defined by K6. The HTTP request duration is defined by K6 as: `HTTP sending request` + `HTTP receiving request` + `HTTP waiting request`. + +Metrics: +- Median +- Average +- Max +- Min +- 95th percentile +- absolute (the value as it is, without any computation) + +## Configuration + +The collector is preconfigured and defined in `statsd.plugin/k6.conf`. + +Due to being a StatsD collector, you only need to define the configuration file and then send data to Netdata using the StatsD protocol. + +If Netdata is running on the same machine as K6, no further configuration is required. Otherwise, you will have to [point K6](https://k6.io/docs/results-visualization/statsd/) to your node and make sure that the K6 process can reach Netdata. + +The default namespace that is used in the configuration is `k6`. If you change it in K6, you will have to change it as well in the configuration file `k6.conf`. diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c new file mode 100644 index 0000000..67d7ed2 --- /dev/null +++ b/collectors/statsd.plugin/statsd.c @@ -0,0 +1,2844 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "daemon/common.h" + +#define STATSD_CHART_PREFIX "statsd" + +#define PLUGIN_STATSD_NAME "statsd.plugin" + +#define STATSD_LISTEN_PORT 8125 +#define STATSD_LISTEN_BACKLOG 4096 + +#define WORKER_JOB_TYPE_TCP_CONNECTED 0 +#define WORKER_JOB_TYPE_TCP_DISCONNECTED 1 +#define WORKER_JOB_TYPE_RCV_DATA 2 +#define WORKER_JOB_TYPE_SND_DATA 3 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 4 +#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least 4 +#endif + +// -------------------------------------------------------------------------------------- + +// DO NOT ENABLE MULTITHREADING - IT IS NOT WELL TESTED +// #define STATSD_MULTITHREADED 1 + +#define STATSD_DICTIONARY_OPTIONS (DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_ADD_IN_FRONT) +#define STATSD_DECIMAL_DETAIL 1000 // floating point values get multiplied by this, with the same divisor + +// -------------------------------------------------------------------------------------------------------------------- +// data specific to each metric type + +typedef struct statsd_metric_gauge { + NETDATA_DOUBLE value; +} STATSD_METRIC_GAUGE; + +typedef struct statsd_metric_counter { // counter and meter + long long value; +} STATSD_METRIC_COUNTER; + +typedef struct statsd_histogram_extensions { + netdata_mutex_t mutex; + + // average is stored in metric->last + collected_number last_min; + collected_number last_max; + collected_number last_percentile; + collected_number last_median; + collected_number last_stddev; + collected_number last_sum; + + int zeroed; + + RRDDIM *rd_min; + RRDDIM *rd_max; + RRDDIM *rd_percentile; + RRDDIM *rd_median; + RRDDIM *rd_stddev; + //RRDDIM *rd_sum; + + size_t size; + size_t used; + NETDATA_DOUBLE *values; // dynamic array of values collected +} STATSD_METRIC_HISTOGRAM_EXTENSIONS; + +typedef struct statsd_metric_histogram { // histogram and timer + STATSD_METRIC_HISTOGRAM_EXTENSIONS *ext; +} STATSD_METRIC_HISTOGRAM; + +typedef struct statsd_metric_set { + DICTIONARY *dict; + size_t unique; +} STATSD_METRIC_SET; + +typedef struct statsd_metric_dictionary_item { + size_t count; + RRDDIM *rd; +} STATSD_METRIC_DICTIONARY_ITEM; + +typedef struct statsd_metric_dictionary { + DICTIONARY *dict; + size_t unique; +} STATSD_METRIC_DICTIONARY; + + +// -------------------------------------------------------------------------------------------------------------------- +// this is a metric - for all types of metrics + +typedef enum statsd_metric_options { + STATSD_METRIC_OPTION_NONE = 0x00000000, // no options set + STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED = 0x00000001, // do not update the chart dimension, when this metric is not collected + STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED = 0x00000002, // render a private chart for this metric + STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED = 0x00000004, // the metric has been checked if it should get private chart or not + STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT = 0x00000008, // show the count of events for this private chart + STATSD_METRIC_OPTION_CHECKED_IN_APPS = 0x00000010, // set when this metric has been checked against apps + STATSD_METRIC_OPTION_USED_IN_APPS = 0x00000020, // set when this metric is used in apps + STATSD_METRIC_OPTION_CHECKED = 0x00000040, // set when the charting thread checks this metric for use in charts (its usefulness) + STATSD_METRIC_OPTION_USEFUL = 0x00000080, // set when the charting thread finds the metric useful (i.e. used in a chart) + STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED = 0x00000100, // set when the collection is full for this metric + STATSD_METRIC_OPTION_UPDATED_CHART_METADATA = 0x00000200, // set when the private chart metadata have been updated via tags +} STATS_METRIC_OPTIONS; + +typedef enum statsd_metric_type { + STATSD_METRIC_TYPE_GAUGE, + STATSD_METRIC_TYPE_COUNTER, + STATSD_METRIC_TYPE_METER, + STATSD_METRIC_TYPE_TIMER, + STATSD_METRIC_TYPE_HISTOGRAM, + STATSD_METRIC_TYPE_SET, + STATSD_METRIC_TYPE_DICTIONARY +} STATSD_METRIC_TYPE; + + +typedef struct statsd_metric { + const char *name; // the name of the metric - linked to dictionary name + uint32_t hash; // hash of the name + + STATSD_METRIC_TYPE type; + + // metadata about data collection + collected_number events; // the number of times this metric has been collected (never resets) + size_t count; // the number of times this metric has been collected since the last flush + + // the actual collected data + union { + STATSD_METRIC_GAUGE gauge; + STATSD_METRIC_COUNTER counter; + STATSD_METRIC_HISTOGRAM histogram; + STATSD_METRIC_SET set; + STATSD_METRIC_DICTIONARY dictionary; + }; + + char *units; + char *dimname; + char *family; + + // chart related members + STATS_METRIC_OPTIONS options; // STATSD_METRIC_OPTION_* (bitfield) + char reset; // set to 1 by the charting thread to instruct the collector thread(s) to reset this metric + collected_number last; // the last value sent to netdata + RRDSET *st; // the private chart of this metric + RRDDIM *rd_value; // the dimension of this metric value + RRDDIM *rd_count; // the dimension for the number of events received + + // linking, used for walking through all metrics + struct statsd_metric *next_useful; +} STATSD_METRIC; + + +// -------------------------------------------------------------------------------------------------------------------- +// each type of metric has its own index + +typedef struct statsd_index { + char *name; // the name of the index of metrics + size_t events; // the number of events processed for this index + size_t metrics; // the number of metrics in this index + size_t useful; // the number of useful metrics in this index + + STATSD_METRIC_TYPE type; // the type of index + DICTIONARY *dict; + + STATSD_METRIC *first_useful; // the linked list of useful metrics (new metrics are added in front) + + STATS_METRIC_OPTIONS default_options; // default options for all metrics in this index +} STATSD_INDEX; + +// -------------------------------------------------------------------------------------------------------------------- +// synthetic charts + +typedef enum statsd_app_chart_dimension_value_type { + STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS, + STATSD_APP_CHART_DIM_VALUE_TYPE_LAST, + STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE, + STATSD_APP_CHART_DIM_VALUE_TYPE_SUM, + STATSD_APP_CHART_DIM_VALUE_TYPE_MIN, + STATSD_APP_CHART_DIM_VALUE_TYPE_MAX, + STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE, + STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN, + STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV +} STATSD_APP_CHART_DIM_VALUE_TYPE; + +typedef struct statsd_app_chart_dimension { + const char *name; // the name of this dimension + const char *metric; // the source metric name of this dimension + uint32_t metric_hash; // hash for fast string comparisons + + SIMPLE_PATTERN *metric_pattern; // set when the 'metric' is a simple pattern + + collected_number multiplier; // the multiplier of the dimension + collected_number divisor; // the divisor of the dimension + RRDDIM_FLAGS flags; // the RRDDIM flags for this dimension + RRDDIM_OPTIONS options; // the RRDDIM options for this dimension + + STATSD_APP_CHART_DIM_VALUE_TYPE value_type; // which value to use of the source metric + + RRDDIM *rd; // a pointer to the RRDDIM that has been created for this dimension + collected_number *value_ptr; // a pointer to the source metric value + RRD_ALGORITHM algorithm; // the algorithm of this dimension + + struct statsd_app_chart_dimension *next; // the next dimension for this chart +} STATSD_APP_CHART_DIM; + +typedef struct statsd_app_chart { + const char *id; + const char *name; + const char *title; + const char *family; + const char *context; + const char *units; + const char *module; + long priority; + RRDSET_TYPE chart_type; + STATSD_APP_CHART_DIM *dimensions; + size_t dimensions_count; + size_t dimensions_linked_count; + + RRDSET *st; + struct statsd_app_chart *next; +} STATSD_APP_CHART; + +typedef struct statsd_app { + const char *name; + SIMPLE_PATTERN *metrics; + STATS_METRIC_OPTIONS default_options; + RRD_MEMORY_MODE rrd_memory_mode; + DICTIONARY *dict; + long rrd_history_entries; + + const char *source; + STATSD_APP_CHART *charts; + struct statsd_app *next; +} STATSD_APP; + +// -------------------------------------------------------------------------------------------------------------------- +// global statsd data + +struct collection_thread_status { + int status; + size_t max_sockets; + + netdata_thread_t thread; +}; + +static struct statsd { + STATSD_INDEX gauges; + STATSD_INDEX counters; + STATSD_INDEX timers; + STATSD_INDEX histograms; + STATSD_INDEX meters; + STATSD_INDEX sets; + STATSD_INDEX dictionaries; + + size_t unknown_types; + size_t socket_errors; + size_t tcp_socket_connects; + size_t tcp_socket_disconnects; + size_t tcp_socket_connected; + size_t tcp_socket_reads; + size_t tcp_packets_received; + size_t tcp_bytes_read; + size_t udp_socket_reads; + size_t udp_packets_received; + size_t udp_bytes_read; + + int enabled; + int update_every; + SIMPLE_PATTERN *charts_for; + + size_t tcp_idle_timeout; + collected_number decimal_detail; + size_t private_charts; + size_t max_private_charts_hard; + long private_charts_rrd_history_entries; + unsigned int private_charts_hidden:1; + + STATSD_APP *apps; + size_t recvmmsg_size; + size_t histogram_increase_step; + double histogram_percentile; + char *histogram_percentile_str; + size_t dictionary_max_unique; + + int threads; + struct collection_thread_status *collection_threads_status; + + LISTEN_SOCKETS sockets; +} statsd = { + .enabled = 1, + .max_private_charts_hard = 1000, + .private_charts_hidden = 0, + .recvmmsg_size = 10, + .decimal_detail = STATSD_DECIMAL_DETAIL, + + .gauges = { + .name = "gauge", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_GAUGE, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .counters = { + .name = "counter", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_COUNTER, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .timers = { + .name = "timer", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_TIMER, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .histograms = { + .name = "histogram", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_HISTOGRAM, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .meters = { + .name = "meter", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_METER, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .sets = { + .name = "set", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_SET, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .dictionaries = { + .name = "dictionary", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_DICTIONARY, + .default_options = STATSD_METRIC_OPTION_NONE + }, + + .tcp_idle_timeout = 600, + + .apps = NULL, + .histogram_percentile = 95.0, + .histogram_increase_step = 10, + .dictionary_max_unique = 200, + .threads = 0, + .collection_threads_status = NULL, + .sockets = { + .config = &netdata_config, + .config_section = CONFIG_SECTION_STATSD, + .default_bind_to = "udp:localhost tcp:localhost", + .default_port = STATSD_LISTEN_PORT, + .backlog = STATSD_LISTEN_BACKLOG + }, +}; + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd index management - add/find metrics + +static void dictionary_metric_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + STATSD_INDEX *index = (STATSD_INDEX *)data; + STATSD_METRIC *m = (STATSD_METRIC *)value; + const char *name = dictionary_acquired_item_name(item); + + debug(D_STATSD, "Creating new %s metric '%s'", index->name, name); + + m->name = name; + m->hash = simple_hash(name); + m->type = index->type; + m->options = index->default_options; + + if (m->type == STATSD_METRIC_TYPE_HISTOGRAM || m->type == STATSD_METRIC_TYPE_TIMER) { + m->histogram.ext = callocz(1,sizeof(STATSD_METRIC_HISTOGRAM_EXTENSIONS)); + netdata_mutex_init(&m->histogram.ext->mutex); + } + + __atomic_fetch_add(&index->metrics, 1, __ATOMIC_RELAXED); +} + +static void dictionary_metric_delete_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + (void)data; // STATSD_INDEX *index = (STATSD_INDEX *)data; + (void)item; + STATSD_METRIC *m = (STATSD_METRIC *)value; + + if(m->type == STATSD_METRIC_TYPE_HISTOGRAM || m->type == STATSD_METRIC_TYPE_TIMER) { + freez(m->histogram.ext); + m->histogram.ext = NULL; + } + + freez(m->units); + freez(m->family); + freez(m->dimname); +} + +static inline STATSD_METRIC *statsd_find_or_add_metric(STATSD_INDEX *index, const char *name) { + debug(D_STATSD, "searching for metric '%s' under '%s'", name, index->name); + +#ifdef STATSD_MULTITHREADED + // avoid the write lock of dictionary_set() for existing metrics + STATSD_METRIC *m = dictionary_get(index->dict, name); + if(!m) m = dictionary_set(index->dict, name, NULL, sizeof(STATSD_METRIC)); +#else + // no locks here, go faster + // this will call the dictionary_metric_insert_callback() if an item + // is inserted, otherwise it will return the existing one. + // We used the flag DICT_OPTION_DONT_OVERWRITE_VALUE to support this. + STATSD_METRIC *m = dictionary_set(index->dict, name, NULL, sizeof(STATSD_METRIC)); +#endif + + index->events++; + return m; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd parsing numbers + +static inline NETDATA_DOUBLE statsd_parse_float(const char *v, NETDATA_DOUBLE def) { + NETDATA_DOUBLE value; + + if(likely(v && *v)) { + char *e = NULL; + value = str2ndd(v, &e); + if(unlikely(e && *e)) + error("STATSD: excess data '%s' after value '%s'", e, v); + } + else + value = def; + + return value; +} + +static inline NETDATA_DOUBLE statsd_parse_sampling_rate(const char *v) { + NETDATA_DOUBLE sampling_rate = statsd_parse_float(v, 1.0); + if(unlikely(isless(sampling_rate, 0.001))) sampling_rate = 0.001; + if(unlikely(isgreater(sampling_rate, 1.0))) sampling_rate = 1.0; + return sampling_rate; +} + +static inline long long statsd_parse_int(const char *v, long long def) { + long long value; + + if(likely(v && *v)) { + char *e = NULL; + value = str2ll(v, &e); + if(unlikely(e && *e)) + error("STATSD: excess data '%s' after value '%s'", e, v); + } + else + value = def; + + return value; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd processors per metric type + +static inline void statsd_reset_metric(STATSD_METRIC *m) { + m->reset = 0; + m->count = 0; +} + +static inline int value_is_zinit(const char *value) { + return (value && *value == 'z' && *++value == 'i' && *++value == 'n' && *++value == 'i' && *++value == 't' && *++value == '\0'); +} + +#define is_metric_checked(m) ((m)->options & STATSD_METRIC_OPTION_CHECKED) +#define is_metric_useful_for_collection(m) (!is_metric_checked(m) || ((m)->options & STATSD_METRIC_OPTION_USEFUL)) + +static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, const char *sampling) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric '%s' of type gauge, with empty value is ignored.", m->name); + return; + } + + if(unlikely(m->reset)) { + // no need to reset anything specific for gauges + statsd_reset_metric(m); + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + if (unlikely(*value == '+' || *value == '-')) + m->gauge.value += statsd_parse_float(value, 1.0) / statsd_parse_sampling_rate(sampling); + else + m->gauge.value = statsd_parse_float(value, 1.0); + + m->events++; + m->count++; + } +} + +static inline void statsd_process_counter_or_meter(STATSD_METRIC *m, const char *value, const char *sampling) { + if(!is_metric_useful_for_collection(m)) return; + + // we accept empty values for counters + + if(unlikely(m->reset)) statsd_reset_metric(m); + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + m->counter.value += llrintndd((NETDATA_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_sampling_rate(sampling)); + + m->events++; + m->count++; + } +} + +#define statsd_process_counter(m, value, sampling) statsd_process_counter_or_meter(m, value, sampling) +#define statsd_process_meter(m, value, sampling) statsd_process_counter_or_meter(m, value, sampling) + +static inline void statsd_process_histogram_or_timer(STATSD_METRIC *m, const char *value, const char *sampling, const char *type) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric of type %s, with empty value is ignored.", type); + return; + } + + if(unlikely(m->reset)) { + m->histogram.ext->used = 0; + statsd_reset_metric(m); + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + NETDATA_DOUBLE v = statsd_parse_float(value, 1.0); + NETDATA_DOUBLE sampling_rate = statsd_parse_sampling_rate(sampling); + if(unlikely(isless(sampling_rate, 0.01))) sampling_rate = 0.01; + if(unlikely(isgreater(sampling_rate, 1.0))) sampling_rate = 1.0; + + long long samples = llrintndd(1.0 / sampling_rate); + while(samples-- > 0) { + + if(unlikely(m->histogram.ext->used == m->histogram.ext->size)) { + netdata_mutex_lock(&m->histogram.ext->mutex); + m->histogram.ext->size += statsd.histogram_increase_step; + m->histogram.ext->values = reallocz(m->histogram.ext->values, sizeof(NETDATA_DOUBLE) * m->histogram.ext->size); + netdata_mutex_unlock(&m->histogram.ext->mutex); + } + + m->histogram.ext->values[m->histogram.ext->used++] = v; + } + + m->events++; + m->count++; + } +} + +#define statsd_process_timer(m, value, sampling) statsd_process_histogram_or_timer(m, value, sampling, "timer") +#define statsd_process_histogram(m, value, sampling) statsd_process_histogram_or_timer(m, value, sampling, "histogram") + +static void dictionary_metric_set_value_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + (void)item; + (void)value; + STATSD_METRIC *m = (STATSD_METRIC *)data; + m->set.unique++; +} + +static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric of type set, with empty value is ignored."); + return; + } + + if(unlikely(m->reset)) { + if(likely(m->set.dict)) { + dictionary_destroy(m->set.dict); + m->set.dict = NULL; + } + statsd_reset_metric(m); + } + + if (unlikely(!m->set.dict)) { + m->set.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + dictionary_register_insert_callback(m->set.dict, dictionary_metric_set_value_insert_callback, m); + m->set.unique = 0; + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { +#ifdef STATSD_MULTITHREADED + // avoid the write lock to check if something is already there + if(!dictionary_get(m->set.dict, value)) + dictionary_set(m->set.dict, value, NULL, 0); +#else + dictionary_set(m->set.dict, value, NULL, 0); +#endif + m->events++; + m->count++; + } +} + +static void dictionary_metric_dict_value_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + (void)item; + (void)value; + STATSD_METRIC *m = (STATSD_METRIC *)data; + m->dictionary.unique++; +} + +static inline void statsd_process_dictionary(STATSD_METRIC *m, const char *value) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric of type set, with empty value is ignored."); + return; + } + + if(unlikely(m->reset)) + statsd_reset_metric(m); + + if (unlikely(!m->dictionary.dict)) { + m->dictionary.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + dictionary_register_insert_callback(m->dictionary.dict, dictionary_metric_dict_value_insert_callback, m); + m->dictionary.unique = 0; + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + STATSD_METRIC_DICTIONARY_ITEM *t = (STATSD_METRIC_DICTIONARY_ITEM *)dictionary_get(m->dictionary.dict, value); + + if (unlikely(!t)) { + if(!t && m->dictionary.unique >= statsd.dictionary_max_unique) + value = "other"; + + t = (STATSD_METRIC_DICTIONARY_ITEM *)dictionary_set(m->dictionary.dict, value, NULL, sizeof(STATSD_METRIC_DICTIONARY_ITEM)); + } + + t->count++; + m->events++; + m->count++; + } +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd parsing + +static inline const char *statsd_parse_skip_up_to(const char *s, char d1, char d2, char d3) { + char c; + + for(c = *s; c && c != d1 && c != d2 && c != d3 && c != '\r' && c != '\n'; c = *++s) ; + + return s; +} + +const char *statsd_parse_skip_spaces(const char *s) { + char c; + + for(c = *s; c && ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ); c = *++s) ; + + return s; +} + +static inline const char *statsd_parse_field_trim(const char *start, char *end) { + if(unlikely(!start || !*start)) { + start = end; + return start; + } + + while(start <= end && (*start == ' ' || *start == '\t')) + start++; + + *end = '\0'; + end--; + while(end >= start && (*end == ' ' || *end == '\t')) + *end-- = '\0'; + + return start; +} + +static void statsd_process_metric(const char *name, const char *value, const char *type, const char *sampling, const char *tags) { + debug(D_STATSD, "STATSD: raw metric '%s', value '%s', type '%s', sampling '%s', tags '%s'", name?name:"(null)", value?value:"(null)", type?type:"(null)", sampling?sampling:"(null)", tags?tags:"(null)"); + + if(unlikely(!name || !*name)) return; + if(unlikely(!type || !*type)) type = "m"; + + STATSD_METRIC *m = NULL; + + char t0 = type[0], t1 = type[1]; + if(unlikely(t0 == 'g' && t1 == '\0')) { + statsd_process_gauge( + m = statsd_find_or_add_metric(&statsd.gauges, name), + value, sampling); + } + else if(unlikely((t0 == 'c' || t0 == 'C') && t1 == '\0')) { + // etsy/statsd uses 'c' + // brubeck uses 'C' + statsd_process_counter( + m = statsd_find_or_add_metric(&statsd.counters, name), + value, sampling); + } + else if(unlikely(t0 == 'm' && t1 == '\0')) { + statsd_process_meter( + m = statsd_find_or_add_metric(&statsd.meters, name), + value, sampling); + } + else if(unlikely(t0 == 'h' && t1 == '\0')) { + statsd_process_histogram( + m = statsd_find_or_add_metric(&statsd.histograms, name), + value, sampling); + } + else if(unlikely(t0 == 's' && t1 == '\0')) { + statsd_process_set( + m = statsd_find_or_add_metric(&statsd.sets, name), + value); + } + else if(unlikely(t0 == 'd' && t1 == '\0')) { + statsd_process_dictionary( + m = statsd_find_or_add_metric(&statsd.dictionaries, name), + value); + } + else if(unlikely(t0 == 'm' && t1 == 's' && type[2] == '\0')) { + statsd_process_timer( + m = statsd_find_or_add_metric(&statsd.timers, name), + value, sampling); + } + else { + statsd.unknown_types++; + error("STATSD: metric '%s' with value '%s' is sent with unknown metric type '%s'", name, value?value:"", type); + } + + if(m && tags && *tags) { + const char *s = tags; + while(*s) { + const char *tagkey = NULL, *tagvalue = NULL; + char *tagkey_end = NULL, *tagvalue_end = NULL; + + s = tagkey_end = (char *)statsd_parse_skip_up_to(tagkey = s, ':', '=', ','); + if(tagkey == tagkey_end) { + if (*s) { + s++; + s = statsd_parse_skip_spaces(s); + } + continue; + } + + if(likely(*s == ':' || *s == '=')) + s = tagvalue_end = (char *) statsd_parse_skip_up_to(tagvalue = ++s, ',', '\0', '\0'); + + if(*s == ',') s++; + + statsd_parse_field_trim(tagkey, tagkey_end); + statsd_parse_field_trim(tagvalue, tagvalue_end); + + if(tagkey && *tagkey && tagvalue && *tagvalue) { + if (strcmp(tagkey, "units") == 0 && (!m->units || strcmp(m->units, tagvalue) != 0)) { + m->units = strdupz(tagvalue); + m->options |= STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + } + + if (strcmp(tagkey, "name") == 0 && (!m->dimname || strcmp(m->dimname, tagvalue) != 0)) { + m->dimname = strdupz(tagvalue); + m->options |= STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + } + + if (strcmp(tagkey, "family") == 0 && (!m->family || strcmp(m->family, tagvalue) != 0)) { + m->family = strdupz(tagvalue); + m->options |= STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + } + } + } + } +} + +static inline size_t statsd_process(char *buffer, size_t size, int require_newlines) { + buffer[size] = '\0'; + debug(D_STATSD, "RECEIVED: %zu bytes: '%s'", size, buffer); + + const char *s = buffer; + while(*s) { + const char *name = NULL, *value = NULL, *type = NULL, *sampling = NULL, *tags = NULL; + char *name_end = NULL, *value_end = NULL, *type_end = NULL, *sampling_end = NULL, *tags_end = NULL; + + s = name_end = (char *)statsd_parse_skip_up_to(name = s, ':', '=', '|'); + if(name == name_end) { + if (*s) { + s++; + s = statsd_parse_skip_spaces(s); + } + continue; + } + + if(likely(*s == ':' || *s == '=')) + s = value_end = (char *) statsd_parse_skip_up_to(value = ++s, '|', '@', '#'); + + if(likely(*s == '|')) + s = type_end = (char *) statsd_parse_skip_up_to(type = ++s, '|', '@', '#'); + + while(*s == '|' || *s == '@' || *s == '#') { + // parse all the fields that may be appended + + if ((*s == '|' && s[1] == '@') || *s == '@') { + s = sampling_end = (char *)statsd_parse_skip_up_to(sampling = ++s, '|', '@', '#'); + if (*sampling == '@') sampling++; + } + else if ((*s == '|' && s[1] == '#') || *s == '#') { + s = tags_end = (char *)statsd_parse_skip_up_to(tags = ++s, '|', '@', '#'); + if (*tags == '#') tags++; + } + else { + // unknown field, skip it + s = (char *)statsd_parse_skip_up_to(++s, '|', '@', '#'); + } + } + + // skip everything until the end of the line + while(*s && *s != '\n') s++; + + if(unlikely(require_newlines && *s != '\n' && s > buffer)) { + // move the remaining data to the beginning + size -= (name - buffer); + memmove(buffer, name, size); + return size; + } + else + s = statsd_parse_skip_spaces(s); + + statsd_process_metric( + statsd_parse_field_trim(name, name_end) + , statsd_parse_field_trim(value, value_end) + , statsd_parse_field_trim(type, type_end) + , statsd_parse_field_trim(sampling, sampling_end) + , statsd_parse_field_trim(tags, tags_end) + ); + } + + return 0; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd pollfd interface + +#define STATSD_TCP_BUFFER_SIZE 65536 // minimize tcp reads +#define STATSD_UDP_BUFFER_SIZE 9000 // this should be up to MTU + +typedef enum { + STATSD_SOCKET_DATA_TYPE_TCP, + STATSD_SOCKET_DATA_TYPE_UDP +} STATSD_SOCKET_DATA_TYPE; + +struct statsd_tcp { + STATSD_SOCKET_DATA_TYPE type; + size_t size; + size_t len; + char buffer[]; +}; + +#ifdef HAVE_RECVMMSG +struct statsd_udp { + int *running; + STATSD_SOCKET_DATA_TYPE type; + size_t size; + struct iovec *iovecs; + struct mmsghdr *msgs; +}; +#else +struct statsd_udp { + int *running; + STATSD_SOCKET_DATA_TYPE type; + char buffer[STATSD_UDP_BUFFER_SIZE]; +}; +#endif + +// new TCP client connected +static void *statsd_add_callback(POLLINFO *pi, short int *events, void *data) { + (void)pi; + (void)data; + + worker_is_busy(WORKER_JOB_TYPE_TCP_CONNECTED); + *events = POLLIN; + + struct statsd_tcp *t = (struct statsd_tcp *)callocz(sizeof(struct statsd_tcp) + STATSD_TCP_BUFFER_SIZE, 1); + t->type = STATSD_SOCKET_DATA_TYPE_TCP; + t->size = STATSD_TCP_BUFFER_SIZE - 1; + statsd.tcp_socket_connects++; + statsd.tcp_socket_connected++; + + worker_is_idle(); + return t; +} + +// TCP client disconnected +static void statsd_del_callback(POLLINFO *pi) { + worker_is_busy(WORKER_JOB_TYPE_TCP_DISCONNECTED); + + struct statsd_tcp *t = pi->data; + + if(likely(t)) { + if(t->type == STATSD_SOCKET_DATA_TYPE_TCP) { + if(t->len != 0) { + statsd.socket_errors++; + error("STATSD: client is probably sending unterminated metrics. Closed socket left with '%s'. Trying to process it.", t->buffer); + statsd_process(t->buffer, t->len, 0); + } + statsd.tcp_socket_disconnects++; + statsd.tcp_socket_connected--; + } + else + error("STATSD: internal error: received socket data type is %d, but expected %d", (int)t->type, (int)STATSD_SOCKET_DATA_TYPE_TCP); + + freez(t); + } + + worker_is_idle(); +} + +// Receive data +static int statsd_rcv_callback(POLLINFO *pi, short int *events) { + int retval = -1; + worker_is_busy(WORKER_JOB_TYPE_RCV_DATA); + + *events = POLLIN; + + int fd = pi->fd; + + switch(pi->socktype) { + case SOCK_STREAM: { + struct statsd_tcp *d = (struct statsd_tcp *)pi->data; + if(unlikely(!d)) { + error("STATSD: internal error: expected TCP data pointer is NULL"); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_TCP)) { + error("STATSD: internal error: socket data type should be %d, but it is %d", (int)STATSD_SOCKET_DATA_TYPE_TCP, (int)d->type); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } +#endif + + int ret = 0; + ssize_t rc; + do { + rc = recv(fd, &d->buffer[d->len], d->size - d->len, MSG_DONTWAIT); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { + error("STATSD: recv() on TCP socket %d failed.", fd); + statsd.socket_errors++; + ret = -1; + } + } + else if (!rc) { + // connection closed + debug(D_STATSD, "STATSD: client disconnected."); + ret = -1; + } + else { + // data received + d->len += rc; + statsd.tcp_socket_reads++; + statsd.tcp_bytes_read += rc; + } + + if(likely(d->len > 0)) { + statsd.tcp_packets_received++; + d->len = statsd_process(d->buffer, d->len, 1); + } + + if(unlikely(ret == -1)) { + retval = -1; + goto cleanup; + } + + } while (rc != -1); + break; + } + + case SOCK_DGRAM: { + struct statsd_udp *d = (struct statsd_udp *)pi->data; + if(unlikely(!d)) { + error("STATSD: internal error: expected UDP data pointer is NULL"); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_UDP)) { + error("STATSD: internal error: socket data should be %d, but it is %d", (int)d->type, (int)STATSD_SOCKET_DATA_TYPE_UDP); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } +#endif + +#ifdef HAVE_RECVMMSG + ssize_t rc; + do { + rc = recvmmsg(fd, d->msgs, (unsigned int)d->size, MSG_DONTWAIT, NULL); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { + error("STATSD: recvmmsg() on UDP socket %d failed.", fd); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + } else if (rc) { + // data received + statsd.udp_socket_reads++; + statsd.udp_packets_received += rc; + + size_t i; + for (i = 0; i < (size_t)rc; ++i) { + size_t len = (size_t)d->msgs[i].msg_len; + statsd.udp_bytes_read += len; + statsd_process(d->msgs[i].msg_hdr.msg_iov->iov_base, len, 0); + } + } + } while (rc != -1); + +#else // !HAVE_RECVMMSG + ssize_t rc; + do { + rc = recv(fd, d->buffer, STATSD_UDP_BUFFER_SIZE - 1, MSG_DONTWAIT); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { + error("STATSD: recv() on UDP socket %d failed.", fd); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + } else if (rc) { + // data received + statsd.udp_socket_reads++; + statsd.udp_packets_received++; + statsd.udp_bytes_read += rc; + statsd_process(d->buffer, (size_t) rc, 0); + } + } while (rc != -1); +#endif + + break; + } + + default: { + error("STATSD: internal error: unknown socktype %d on socket %d", pi->socktype, fd); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + } + + retval = 0; +cleanup: + worker_is_idle(); + return retval; +} + +static int statsd_snd_callback(POLLINFO *pi, short int *events) { + (void)pi; + (void)events; + + worker_is_busy(WORKER_JOB_TYPE_SND_DATA); + error("STATSD: snd_callback() called, but we never requested to send data to statsd clients."); + worker_is_idle(); + + return -1; +} + +// -------------------------------------------------------------------------------------------------------------------- +// statsd child thread to collect metrics from network + +void statsd_collector_thread_cleanup(void *data) { + struct statsd_udp *d = data; + *d->running = 0; + + info("cleaning up..."); + +#ifdef HAVE_RECVMMSG + size_t i; + for (i = 0; i < d->size; i++) + freez(d->iovecs[i].iov_base); + + freez(d->iovecs); + freez(d->msgs); +#endif + + freez(d); + worker_unregister(); +} + +void *statsd_collector_thread(void *ptr) { + struct collection_thread_status *status = ptr; + status->status = 1; + + worker_register("STATSD"); + worker_register_job_name(WORKER_JOB_TYPE_TCP_CONNECTED, "tcp connect"); + worker_register_job_name(WORKER_JOB_TYPE_TCP_DISCONNECTED, "tcp disconnect"); + worker_register_job_name(WORKER_JOB_TYPE_RCV_DATA, "receive"); + worker_register_job_name(WORKER_JOB_TYPE_SND_DATA, "send"); + + info("STATSD collector thread started with taskid %d", gettid()); + + struct statsd_udp *d = callocz(sizeof(struct statsd_udp), 1); + d->running = &status->status; + + netdata_thread_cleanup_push(statsd_collector_thread_cleanup, d); + +#ifdef HAVE_RECVMMSG + d->type = STATSD_SOCKET_DATA_TYPE_UDP; + d->size = statsd.recvmmsg_size; + d->iovecs = callocz(sizeof(struct iovec), d->size); + d->msgs = callocz(sizeof(struct mmsghdr), d->size); + + size_t i; + for (i = 0; i < d->size; i++) { + d->iovecs[i].iov_base = mallocz(STATSD_UDP_BUFFER_SIZE); + d->iovecs[i].iov_len = STATSD_UDP_BUFFER_SIZE - 1; + d->msgs[i].msg_hdr.msg_iov = &d->iovecs[i]; + d->msgs[i].msg_hdr.msg_iovlen = 1; + } +#endif + + poll_events(&statsd.sockets + , statsd_add_callback + , statsd_del_callback + , statsd_rcv_callback + , statsd_snd_callback + , NULL + , NULL // No access control pattern + , 0 // No dns lookups for access control pattern + , (void *)d + , 0 // tcp request timeout, 0 = disabled + , statsd.tcp_idle_timeout // tcp idle timeout, 0 = disabled + , statsd.update_every * 1000 + , ptr // timer_data + , status->max_sockets + ); + + netdata_thread_cleanup_pop(1); + return NULL; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd applications configuration files parsing + +#define STATSD_CONF_LINE_MAX 8192 + +static STATSD_APP_CHART_DIM_VALUE_TYPE string2valuetype(const char *type, size_t line, const char *filename) { + if(!type || !*type) type = "last"; + + if(!strcmp(type, "events")) return STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS; + else if(!strcmp(type, "last")) return STATSD_APP_CHART_DIM_VALUE_TYPE_LAST; + else if(!strcmp(type, "min")) return STATSD_APP_CHART_DIM_VALUE_TYPE_MIN; + else if(!strcmp(type, "max")) return STATSD_APP_CHART_DIM_VALUE_TYPE_MAX; + else if(!strcmp(type, "sum")) return STATSD_APP_CHART_DIM_VALUE_TYPE_SUM; + else if(!strcmp(type, "average")) return STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE; + else if(!strcmp(type, "median")) return STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN; + else if(!strcmp(type, "stddev")) return STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV; + else if(!strcmp(type, "percentile")) return STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE; + + error("STATSD: invalid type '%s' at line %zu of file '%s'. Using 'last'.", type, line, filename); + return STATSD_APP_CHART_DIM_VALUE_TYPE_LAST; +} + +static const char *valuetype2string(STATSD_APP_CHART_DIM_VALUE_TYPE type) { + switch(type) { + case STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS: return "events"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_LAST: return "last"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_MIN: return "min"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_MAX: return "max"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_SUM: return "sum"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE: return "average"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN: return "median"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV: return "stddev"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE: return "percentile"; + } + + return "unknown"; +} + +static STATSD_APP_CHART_DIM *add_dimension_to_app_chart( + STATSD_APP *app __maybe_unused + , STATSD_APP_CHART *chart + , const char *metric_name + , const char *dim_name + , collected_number multiplier + , collected_number divisor + , RRDDIM_FLAGS flags + , RRDDIM_OPTIONS options + , STATSD_APP_CHART_DIM_VALUE_TYPE value_type +) { + STATSD_APP_CHART_DIM *dim = callocz(sizeof(STATSD_APP_CHART_DIM), 1); + + dim->metric = strdupz(metric_name); + dim->metric_hash = simple_hash(dim->metric); + + dim->name = strdupz((dim_name)?dim_name:""); + dim->multiplier = multiplier; + dim->divisor = divisor; + dim->value_type = value_type; + dim->flags = flags; + dim->options = options; + + if(!dim->multiplier) + dim->multiplier = 1; + + if(!dim->divisor) + dim->divisor = 1; + + // append it to the list of dimension + STATSD_APP_CHART_DIM *tdim; + for(tdim = chart->dimensions; tdim && tdim->next ; tdim = tdim->next) ; + if(!tdim) { + dim->next = chart->dimensions; + chart->dimensions = dim; + } + else { + dim->next = tdim->next; + tdim->next = dim; + } + chart->dimensions_count++; + + debug(D_STATSD, "Added dimension '%s' to chart '%s' of app '%s', for metric '%s', with type %u, multiplier " COLLECTED_NUMBER_FORMAT ", divisor " COLLECTED_NUMBER_FORMAT, + dim->name, chart->id, app->name, dim->metric, dim->value_type, dim->multiplier, dim->divisor); + + return dim; +} + +static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHART *chart, DICTIONARY *dict) { + debug(D_STATSD, "STATSD configuration reading file '%s'", filename); + + char *buffer = mallocz(STATSD_CONF_LINE_MAX + 1); + + FILE *fp = fopen(filename, "r"); + if(!fp) { + error("STATSD: cannot open file '%s'.", filename); + freez(buffer); + return -1; + } + + size_t line = 0; + char *s; + while(fgets(buffer, STATSD_CONF_LINE_MAX, fp) != NULL) { + buffer[STATSD_CONF_LINE_MAX] = '\0'; + line++; + + s = trim(buffer); + if (!s || *s == '#') { + debug(D_STATSD, "STATSD: ignoring line %zu of file '%s', it is empty.", line, filename); + continue; + } + + debug(D_STATSD, "STATSD: processing line %zu of file '%s': %s", line, filename, buffer); + + if(*s == 'i' && strncmp(s, "include", 7) == 0) { + s = trim(&s[7]); + if(s && *s) { + char *tmp; + if(*s == '/') + tmp = strdupz(s); + else { + // the file to be included is relative to current file + // find the directory name from the file we already read + char *filename2 = strdupz(filename); // copy filename, since dirname() will change it + char *dir = dirname(filename2); // find the directory part of the filename + tmp = strdupz_path_subpath(dir, s); // compose the new filename to read; + freez(filename2); // free the filename we copied + } + statsd_readfile(tmp, app, chart, dict); + freez(tmp); + } + else + error("STATSD: ignoring line %zu of file '%s', include filename is empty", line, filename); + + continue; + } + + int len = (int) strlen(s); + if (*s == '[' && s[len - 1] == ']') { + // new section + s[len - 1] = '\0'; + s++; + + if (!strcmp(s, "app")) { + // a new app + app = callocz(sizeof(STATSD_APP), 1); + app->name = strdupz("unnamed"); + app->rrd_memory_mode = localhost->rrd_memory_mode; + app->rrd_history_entries = localhost->rrd_history_entries; + + app->next = statsd.apps; + statsd.apps = app; + chart = NULL; + dict = NULL; + + { + char lineandfile[FILENAME_MAX + 1]; + snprintfz(lineandfile, FILENAME_MAX, "%zu@%s", line, filename); + app->source = strdupz(lineandfile); + } + } + else if(app) { + if(!strcmp(s, "dictionary")) { + if(!app->dict) + app->dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + + dict = app->dict; + } + else { + dict = NULL; + + // a new chart + chart = callocz(sizeof(STATSD_APP_CHART), 1); + netdata_fix_chart_id(s); + chart->id = strdupz(s); + chart->name = strdupz(s); + chart->title = strdupz("Statsd chart"); + chart->context = strdupz(s); + chart->family = strdupz("overview"); + chart->units = strdupz("value"); + chart->priority = NETDATA_CHART_PRIO_STATSD_PRIVATE; + chart->chart_type = RRDSET_TYPE_LINE; + + chart->next = app->charts; + app->charts = chart; + + if (!strncmp( + filename, + netdata_configured_stock_config_dir, + strlen(netdata_configured_stock_config_dir))) { + char tmpfilename[FILENAME_MAX + 1]; + strncpyz(tmpfilename, filename, FILENAME_MAX); + chart->module = strdupz(basename(tmpfilename)); + } else { + chart->module = strdupz("synthetic_chart"); + } + } + } + else + error("STATSD: ignoring line %zu ('%s') of file '%s', [app] is not defined.", line, s, filename); + + continue; + } + + if(!app) { + error("STATSD: ignoring line %zu ('%s') of file '%s', it is outside all sections.", line, s, filename); + continue; + } + + char *name = s; + char *value = strchr(s, '='); + if(!value) { + error("STATSD: ignoring line %zu ('%s') of file '%s', there is no = in it.", line, s, filename); + continue; + } + *value = '\0'; + value++; + + name = trim(name); + value = trim(value); + + if(!name || *name == '#') { + error("STATSD: ignoring line %zu of file '%s', name is empty.", line, filename); + continue; + } + if(!value) { + debug(D_CONFIG, "STATSD: ignoring line %zu of file '%s', value is empty.", line, filename); + continue; + } + + if(unlikely(dict)) { + // parse [dictionary] members + + dictionary_set(dict, name, value, strlen(value) + 1); + } + else if(!chart) { + // parse [app] members + + if(!strcmp(name, "name")) { + freez((void *)app->name); + netdata_fix_chart_name(value); + app->name = strdupz(value); + } + else if (!strcmp(name, "metrics")) { + simple_pattern_free(app->metrics); + app->metrics = simple_pattern_create(value, NULL, SIMPLE_PATTERN_EXACT); + } + else if (!strcmp(name, "private charts")) { + if (!strcmp(value, "yes") || !strcmp(value, "on")) + app->default_options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + else + app->default_options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } + else if (!strcmp(name, "gaps when not collected")) { + if (!strcmp(value, "yes") || !strcmp(value, "on")) + app->default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + } + else if (!strcmp(name, "memory mode")) { + // this is not supported anymore + // with the implementation of storage engines, all charts have the same storage engine always + // app->rrd_memory_mode = rrd_memory_mode_id(value); + ; + } + else if (!strcmp(name, "history")) { + app->rrd_history_entries = atol(value); + if (app->rrd_history_entries < 5) + app->rrd_history_entries = 5; + } + else { + error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [app] section.", line, name, filename); + continue; + } + } + else { + // parse [chart] members + + if(!strcmp(name, "name")) { + freez((void *)chart->name); + netdata_fix_chart_id(value); + chart->name = strdupz(value); + } + else if(!strcmp(name, "title")) { + freez((void *)chart->title); + chart->title = strdupz(value); + } + else if (!strcmp(name, "family")) { + freez((void *)chart->family); + chart->family = strdupz(value); + } + else if (!strcmp(name, "context")) { + freez((void *)chart->context); + netdata_fix_chart_id(value); + chart->context = strdupz(value); + } + else if (!strcmp(name, "units")) { + freez((void *)chart->units); + chart->units = strdupz(value); + } + else if (!strcmp(name, "priority")) { + chart->priority = atol(value); + } + else if (!strcmp(name, "type")) { + chart->chart_type = rrdset_type_id(value); + } + else if (!strcmp(name, "dimension")) { + // metric [name [type [multiplier [divisor]]]] + char *words[10] = { NULL }; + size_t num_words = pluginsd_split_words(value, words, 10, NULL, NULL, 0); + + int pattern = 0; + size_t i = 0; + char *metric_name = get_word(words, num_words, i++); + + if(strcmp(metric_name, "pattern") == 0) { + metric_name = get_word(words, num_words, i++); + pattern = 1; + } + + char *dim_name = get_word(words, num_words, i++); + char *type = get_word(words, num_words, i++); + char *multiplier = get_word(words, num_words, i++); + char *divisor = get_word(words, num_words, i++); + char *opts = get_word(words, num_words, i++); + + RRDDIM_FLAGS flags = RRDDIM_FLAG_NONE; + RRDDIM_OPTIONS options = RRDDIM_OPTION_NONE; + if(opts && *opts) { + if(strstr(opts, "hidden") != NULL) options |= RRDDIM_OPTION_HIDDEN; + if(strstr(opts, "noreset") != NULL) options |= RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS; + if(strstr(opts, "nooverflow") != NULL) options |= RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS; + } + + if(!pattern) { + if(app->dict) { + if(dim_name && *dim_name) { + char *n = dictionary_get(app->dict, dim_name); + if(n) dim_name = n; + } + else { + dim_name = dictionary_get(app->dict, metric_name); + } + } + + if(!dim_name || !*dim_name) + dim_name = metric_name; + } + + STATSD_APP_CHART_DIM *dim = add_dimension_to_app_chart( + app + , chart + , metric_name + , dim_name + , (multiplier && *multiplier)?str2l(multiplier):1 + , (divisor && *divisor)?str2l(divisor):1 + , flags + , + options, string2valuetype(type, line, filename) + ); + + if(pattern) + dim->metric_pattern = simple_pattern_create(dim->metric, NULL, SIMPLE_PATTERN_EXACT); + } + else { + error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [%s] section.", line, name, filename, chart->id); + continue; + } + } + } + + freez(buffer); + fclose(fp); + return 0; +} + +static int statsd_file_callback(const char *filename, void *data) { + (void)data; + return statsd_readfile(filename, NULL, NULL, NULL); +} + +static inline void statsd_readdir(const char *user_path, const char *stock_path, const char *subpath) { + recursive_config_double_dir_load(user_path, stock_path, subpath, statsd_file_callback, NULL, 0); +} + +// -------------------------------------------------------------------------------------------------------------------- +// send metrics to netdata - in private charts - called from the main thread + +// extract chart type and chart id from metric name +static inline void statsd_get_metric_type_and_id(STATSD_METRIC *m, char *type, char *id, char *context, const char *metrictype, size_t len) { + + // The full chart type.id looks like this: + // ${STATSD_CHART_PREFIX} + "_" + ${METRIC_NAME} + "_" + ${METRIC_TYPE} + // + // where: + // STATSD_CHART_PREFIX = "statsd" as defined above + // METRIC_NAME = whatever the user gave to statsd + // METRIC_TYPE = "gauge", "counter", "meter", "timer", "histogram", "set", "dictionary" + + // for chart type, we want: + // ${STATSD_CHART_PREFIX} + "_" + the first word of ${METRIC_NAME} + + // find the first word of ${METRIC_NAME} + char firstword[len + 1], *s = ""; + strncpyz(firstword, m->name, len); + for (s = firstword; *s ; s++) { + if (unlikely(*s == '.' || *s == '_')) { + *s = '\0'; + s++; + break; + } + } + // firstword has the first word of ${METRIC_NAME} + // s has the remaining, if any + + // create the chart type: + snprintfz(type, len, STATSD_CHART_PREFIX "_%s", firstword); + + // for chart id, we want: + // the remaining of the words of ${METRIC_NAME} + "_" + ${METRIC_TYPE} + // or the ${METRIC_NAME} has no remaining words, the ${METRIC_TYPE} alone + if(*s) + snprintfz(id, len, "%s_%s", s, metrictype); + else + snprintfz(id, len, "%s", metrictype); + + // for the context, we want the full of both the above, separated with a dot (type.id): + snprintfz(context, RRD_ID_LENGTH_MAX, "%s.%s", type, id); + + // make sure they don't have illegal characters + netdata_fix_chart_id(type); + netdata_fix_chart_id(id); + netdata_fix_chart_id(context); +} + +static inline RRDSET *statsd_private_rrdset_create( + STATSD_METRIC *m __maybe_unused + , const char *type + , const char *id + , const char *name + , const char *family + , const char *context + , const char *title + , const char *units + , long priority + , int update_every + , RRDSET_TYPE chart_type +) { + if(!m->st) + statsd.private_charts++; + + RRDSET *st = rrdset_create_custom( + localhost // host + , type // type + , id // id + , name // name + , family // family + , context // context + , title // title + , units // units + , PLUGIN_STATSD_NAME // plugin + , "private_chart" // module + , priority // priority + , update_every // update every + , chart_type // chart type + , default_rrd_memory_mode // memory mode + , default_rrd_history_entries // history + ); + rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST); + + if(statsd.private_charts_hidden) + rrdset_flag_set(st, RRDSET_FLAG_HIDDEN); + + // rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + return st; +} + +static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { + debug(D_STATSD, "updating private chart for gauge metric '%s'", m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, "gauge", RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for gauge %s", m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:"gauges" // family (submenu) + , context // context + , title // title + , m->units?m->units:"value" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_LINE + ); + + m->rd_value = rrddim_add(m->st, "gauge", m->dimname?m->dimname:NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { + debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, dim, RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for %s %s", dim, m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:family // family (submenu) + , context // context + , title // title + , m->units?m->units:"events/s" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_AREA + ); + + m->rd_value = rrddim_add(m->st, dim, m->dimname?m->dimname:NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_set(STATSD_METRIC *m) { + debug(D_STATSD, "updating private chart for set metric '%s'", m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, "set", RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for set %s", m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:"sets" // family (submenu) + , context // context + , title // title + , m->units?m->units:"entries" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_LINE + ); + + m->rd_value = rrddim_add(m->st, "set", m->dimname?m->dimname:"unique", 1, 1, RRD_ALGORITHM_ABSOLUTE); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) { + debug(D_STATSD, "updating private chart for dictionary metric '%s'", m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, "dictionary", RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for dictionary %s", m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:"dictionaries" // family (submenu) + , context // context + , title // title + , m->units?m->units:"events/s" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_STACKED + ); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + STATSD_METRIC_DICTIONARY_ITEM *t; + dfe_start_read(m->dictionary.dict, t) { + if (!t->rd) t->rd = rrddim_add(m->st, t_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_set_by_pointer(m->st, t->rd, (collected_number)t->count); + } + dfe_done(t); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { + debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, dim, RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for %s %s", dim, m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:family // family (submenu) + , context // context + , title // title + , m->units?m->units:units // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_AREA + ); + + m->histogram.ext->rd_min = rrddim_add(m->st, "min", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_max = rrddim_add(m->st, "max", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->rd_value = rrddim_add(m->st, "average", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_percentile = rrddim_add(m->st, statsd.histogram_percentile_str, NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_median = rrddim_add(m->st, "median", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_stddev = rrddim_add(m->st, "stddev", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + //m->histogram.ext->rd_sum = rrddim_add(m->st, "sum", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_min, m->histogram.ext->last_min); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_max, m->histogram.ext->last_max); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_percentile, m->histogram.ext->last_percentile); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_median, m->histogram.ext->last_median); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_stddev, m->histogram.ext->last_stddev); + //rrddim_set_by_pointer(m->st, m->histogram.ext->rd_sum, m->histogram.ext->last_sum); + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +// -------------------------------------------------------------------------------------------------------------------- +// statsd flush metrics + +static inline void statsd_flush_gauge(STATSD_METRIC *m) { + debug(D_STATSD, "flushing gauge metric '%s'", m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = (collected_number) (m->gauge.value * statsd.decimal_detail); + + m->reset = 1; + updated = 1; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_gauge(m); +} + +static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { + debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = m->counter.value; + + m->reset = 1; + updated = 1; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_counter_or_meter(m, dim, family); +} + +static inline void statsd_flush_counter(STATSD_METRIC *m) { + statsd_flush_counter_or_meter(m, "counter", "counters"); +} + +static inline void statsd_flush_meter(STATSD_METRIC *m) { + statsd_flush_counter_or_meter(m, "meter", "meters"); +} + +static inline void statsd_flush_set(STATSD_METRIC *m) { + debug(D_STATSD, "flushing set metric '%s'", m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = (collected_number)m->set.unique; + + m->reset = 1; + updated = 1; + } + else { + m->last = 0; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_set(m); +} + +static inline void statsd_flush_dictionary(STATSD_METRIC *m) { + debug(D_STATSD, "flushing dictionary metric '%s'", m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = (collected_number)m->dictionary.unique; + + m->reset = 1; + updated = 1; + } + else { + m->last = 0; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_dictionary(m); + + if(m->dictionary.unique >= statsd.dictionary_max_unique) { + if(!(m->options & STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED)) { + m->options |= STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED; + info( + "STATSD dictionary '%s' reach max of %zu items - try increasing 'dictionaries max unique dimensions' in netdata.conf", + m->name, + m->dictionary.unique); + } + } +} + +static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { + debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count && m->histogram.ext->used > 0)) { + netdata_mutex_lock(&m->histogram.ext->mutex); + + size_t len = m->histogram.ext->used; + NETDATA_DOUBLE *series = m->histogram.ext->values; + sort_series(series, len); + + m->histogram.ext->last_min = (collected_number)roundndd(series[0] * statsd.decimal_detail); + m->histogram.ext->last_max = (collected_number)roundndd(series[len - 1] * statsd.decimal_detail); + m->last = (collected_number)roundndd(average(series, len) * statsd.decimal_detail); + m->histogram.ext->last_median = (collected_number)roundndd(median_on_sorted_series(series, len) * statsd.decimal_detail); + m->histogram.ext->last_stddev = (collected_number)roundndd(standard_deviation(series, len) * statsd.decimal_detail); + m->histogram.ext->last_sum = (collected_number)roundndd(sum(series, len) * statsd.decimal_detail); + + size_t pct_len = (size_t)floor((double)len * statsd.histogram_percentile / 100.0); + if(pct_len < 1) + m->histogram.ext->last_percentile = (collected_number)(series[0] * statsd.decimal_detail); + else + m->histogram.ext->last_percentile = (collected_number)roundndd(series[pct_len - 1] * statsd.decimal_detail); + + netdata_mutex_unlock(&m->histogram.ext->mutex); + + debug(D_STATSD, "STATSD %s metric %s: min " COLLECTED_NUMBER_FORMAT ", max " COLLECTED_NUMBER_FORMAT ", last " COLLECTED_NUMBER_FORMAT ", pcent " COLLECTED_NUMBER_FORMAT ", median " COLLECTED_NUMBER_FORMAT ", stddev " COLLECTED_NUMBER_FORMAT ", sum " COLLECTED_NUMBER_FORMAT, + dim, m->name, m->histogram.ext->last_min, m->histogram.ext->last_max, m->last, m->histogram.ext->last_percentile, m->histogram.ext->last_median, m->histogram.ext->last_stddev, m->histogram.ext->last_sum); + + m->histogram.ext->zeroed = 0; + m->reset = 1; + updated = 1; + } + else if(unlikely(!m->histogram.ext->zeroed)) { + // reset the metrics + // if we collected anything, they will be updated below + // this ensures that we report zeros if nothing is collected + + m->histogram.ext->last_min = 0; + m->histogram.ext->last_max = 0; + m->last = 0; + m->histogram.ext->last_median = 0; + m->histogram.ext->last_stddev = 0; + m->histogram.ext->last_sum = 0; + m->histogram.ext->last_percentile = 0; + + m->histogram.ext->zeroed = 1; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_timer_or_histogram(m, dim, family, units); +} + +static inline void statsd_flush_timer(STATSD_METRIC *m) { + statsd_flush_timer_or_histogram(m, "timer", "timers", "milliseconds"); +} + +static inline void statsd_flush_histogram(STATSD_METRIC *m) { + statsd_flush_timer_or_histogram(m, "histogram", "histograms", "value"); +} + +static inline RRD_ALGORITHM statsd_algorithm_for_metric(STATSD_METRIC *m) { + switch(m->type) { + default: + case STATSD_METRIC_TYPE_GAUGE: + case STATSD_METRIC_TYPE_SET: + case STATSD_METRIC_TYPE_TIMER: + case STATSD_METRIC_TYPE_HISTOGRAM: + return RRD_ALGORITHM_ABSOLUTE; + + case STATSD_METRIC_TYPE_METER: + case STATSD_METRIC_TYPE_COUNTER: + case STATSD_METRIC_TYPE_DICTIONARY: + return RRD_ALGORITHM_INCREMENTAL; + } +} + +static inline void link_metric_to_app_dimension(STATSD_APP *app, STATSD_METRIC *m, STATSD_APP_CHART *chart, STATSD_APP_CHART_DIM *dim) { + if(dim->value_type == STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS) { + dim->value_ptr = &m->events; + dim->algorithm = RRD_ALGORITHM_INCREMENTAL; + } + else if(m->type == STATSD_METRIC_TYPE_HISTOGRAM || m->type == STATSD_METRIC_TYPE_TIMER) { + dim->algorithm = RRD_ALGORITHM_ABSOLUTE; + dim->divisor *= statsd.decimal_detail; + + switch(dim->value_type) { + case STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS: + // will never match - added to avoid warning + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_LAST: + case STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE: + dim->value_ptr = &m->last; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_SUM: + dim->value_ptr = &m->histogram.ext->last_sum; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_MIN: + dim->value_ptr = &m->histogram.ext->last_min; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_MAX: + dim->value_ptr = &m->histogram.ext->last_max; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN: + dim->value_ptr = &m->histogram.ext->last_median; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE: + dim->value_ptr = &m->histogram.ext->last_percentile; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV: + dim->value_ptr = &m->histogram.ext->last_stddev; + break; + } + } + else { + if (dim->value_type != STATSD_APP_CHART_DIM_VALUE_TYPE_LAST) + error("STATSD: unsupported value type for dimension '%s' of chart '%s' of app '%s' on metric '%s'", dim->name, chart->id, app->name, m->name); + + dim->value_ptr = &m->last; + dim->algorithm = statsd_algorithm_for_metric(m); + + if(m->type == STATSD_METRIC_TYPE_GAUGE) + dim->divisor *= statsd.decimal_detail; + } + + if(unlikely(chart->st && dim->rd)) { + rrddim_set_algorithm(chart->st, dim->rd, dim->algorithm); + rrddim_set_multiplier(chart->st, dim->rd, dim->multiplier); + rrddim_set_divisor(chart->st, dim->rd, dim->divisor); + } + + chart->dimensions_linked_count++; + m->options |= STATSD_METRIC_OPTION_USED_IN_APPS; + debug(D_STATSD, "metric '%s' of type %u linked with app '%s', chart '%s', dimension '%s', algorithm '%s'", m->name, m->type, app->name, chart->id, dim->name, rrd_algorithm_name(dim->algorithm)); +} + +static inline void check_if_metric_is_for_app(STATSD_INDEX *index, STATSD_METRIC *m) { + (void)index; + + STATSD_APP *app; + for(app = statsd.apps; app ;app = app->next) { + if(unlikely(simple_pattern_matches(app->metrics, m->name))) { + debug(D_STATSD, "metric '%s' matches app '%s'", m->name, app->name); + + // the metric should get the options from the app + + if(app->default_options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED) + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + else + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + + if(app->default_options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED) + m->options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + else + m->options &= ~STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED; + + // check if there is a chart in this app, willing to get this metric + STATSD_APP_CHART *chart; + for(chart = app->charts; chart; chart = chart->next) { + + STATSD_APP_CHART_DIM *dim; + for(dim = chart->dimensions; dim ; dim = dim->next) { + if(unlikely(dim->metric_pattern)) { + size_t dim_name_len = strlen(dim->name); + size_t wildcarded_len = dim_name_len + strlen(m->name) + 1; + char wildcarded[wildcarded_len]; + + strcpy(wildcarded, dim->name); + char *ws = &wildcarded[dim_name_len]; + + if(simple_pattern_matches_extract(dim->metric_pattern, m->name, ws, wildcarded_len - dim_name_len)) { + + char *final_name = NULL; + + if(app->dict) { + if(likely(*wildcarded)) { + // use the name of the wildcarded string + final_name = dictionary_get(app->dict, wildcarded); + } + + if(unlikely(!final_name)) { + // use the name of the metric + final_name = dictionary_get(app->dict, m->name); + } + } + + if(unlikely(!final_name)) + final_name = wildcarded; + + add_dimension_to_app_chart( + app + , chart + , m->name + , final_name + , dim->multiplier + , dim->divisor + , dim->flags + , dim->options + , dim->value_type + ); + + // the new dimension is appended to the list + // so, it will be matched and linked later too + } + } + else if(!dim->value_ptr && dim->metric_hash == m->hash && !strcmp(dim->metric, m->name)) { + // we have a match - this metric should be linked to this dimension + link_metric_to_app_dimension(app, m, chart, dim); + } + } + + } + } + } +} + +static inline RRDDIM *statsd_add_dim_to_app_chart(STATSD_APP *app, STATSD_APP_CHART *chart, STATSD_APP_CHART_DIM *dim) { + (void)app; + + // allow the same statsd metric to be added multiple times to the same chart + + STATSD_APP_CHART_DIM *tdim; + size_t count_same_metric = 0, count_same_metric_value_type = 0; + size_t pos_same_metric_value_type = 0; + + for (tdim = chart->dimensions; tdim && tdim->next; tdim = tdim->next) { + if (dim->metric_hash == tdim->metric_hash && !strcmp(dim->metric, tdim->metric)) { + count_same_metric++; + + if(dim->value_type == tdim->value_type) { + count_same_metric_value_type++; + if (tdim == dim) + pos_same_metric_value_type = count_same_metric_value_type; + } + } + } + + if(count_same_metric > 1) { + // the same metric is found multiple times + + size_t len = strlen(dim->metric) + 100; + char metric[ len + 1 ]; + + if(count_same_metric_value_type > 1) { + // the same metric, with the same value type, is added multiple times + snprintfz(metric, len, "%s_%s%zu", dim->metric, valuetype2string(dim->value_type), pos_same_metric_value_type); + } + else { + // the same metric, with different value type is added + snprintfz(metric, len, "%s_%s", dim->metric, valuetype2string(dim->value_type)); + } + + dim->rd = rrddim_add(chart->st, metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); + if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; + if(dim->options != RRDDIM_OPTION_NONE) dim->rd->options |= dim->options; + return dim->rd; + } + + dim->rd = rrddim_add(chart->st, dim->metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); + if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; + if(dim->options != RRDDIM_OPTION_NONE) dim->rd->options |= dim->options; + return dim->rd; +} + +static inline void statsd_update_app_chart(STATSD_APP *app, STATSD_APP_CHART *chart) { + debug(D_STATSD, "updating chart '%s' for app '%s'", chart->id, app->name); + + if(!chart->st) { + chart->st = rrdset_create_custom( + localhost // host + , app->name // type + , chart->id // id + , chart->name // name + , chart->family // family + , chart->context // context + , chart->title // title + , chart->units // units + , PLUGIN_STATSD_NAME // plugin + , chart->module // module + , chart->priority // priority + , statsd.update_every // update every + , chart->chart_type // chart type + , app->rrd_memory_mode // memory mode + , app->rrd_history_entries // history + ); + + rrdset_flag_set(chart->st, RRDSET_FLAG_STORE_FIRST); + // rrdset_flag_set(chart->st, RRDSET_FLAG_DEBUG); + } + + STATSD_APP_CHART_DIM *dim; + for(dim = chart->dimensions; dim ;dim = dim->next) { + if(likely(!dim->metric_pattern)) { + if (unlikely(!dim->rd)) + statsd_add_dim_to_app_chart(app, chart, dim); + + if (unlikely(dim->value_ptr)) { + debug(D_STATSD, "updating dimension '%s' (%s) of chart '%s' (%s) for app '%s' with value " COLLECTED_NUMBER_FORMAT, dim->name, rrddim_id(dim->rd), chart->id, rrdset_id(chart->st), app->name, *dim->value_ptr); + rrddim_set_by_pointer(chart->st, dim->rd, *dim->value_ptr); + } + } + } + + rrdset_done(chart->st); + debug(D_STATSD, "completed update of chart '%s' for app '%s'", chart->id, app->name); +} + +static inline void statsd_update_all_app_charts(void) { + // debug(D_STATSD, "updating app charts"); + + STATSD_APP *app; + for(app = statsd.apps; app ;app = app->next) { + // debug(D_STATSD, "updating charts for app '%s'", app->name); + + STATSD_APP_CHART *chart; + for(chart = app->charts; chart ;chart = chart->next) { + if(unlikely(chart->dimensions_linked_count)) { + statsd_update_app_chart(app, chart); + } + } + } + + // debug(D_STATSD, "completed update of app charts"); +} + +const char *statsd_metric_type_string(STATSD_METRIC_TYPE type) { + switch(type) { + case STATSD_METRIC_TYPE_COUNTER: return "counter"; + case STATSD_METRIC_TYPE_GAUGE: return "gauge"; + case STATSD_METRIC_TYPE_HISTOGRAM: return "histogram"; + case STATSD_METRIC_TYPE_METER: return "meter"; + case STATSD_METRIC_TYPE_SET: return "set"; + case STATSD_METRIC_TYPE_DICTIONARY: return "dictionary"; + case STATSD_METRIC_TYPE_TIMER: return "timer"; + default: return "unknown"; + } +} + +static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_metric)(STATSD_METRIC *)) { + STATSD_METRIC *m; + + // find the useful metrics (incremental = each time we are called, we check the new metrics only) + dfe_start_read(index->dict, m) { + // since we add new metrics at the beginning + // check for useful charts, until the point we last checked + if(unlikely(is_metric_checked(m))) break; + + if(unlikely(!(m->options & STATSD_METRIC_OPTION_CHECKED_IN_APPS))) { + log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); + check_if_metric_is_for_app(index, m); + m->options |= STATSD_METRIC_OPTION_CHECKED_IN_APPS; + } + + if(unlikely(!(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED))) { + if(unlikely(statsd.private_charts >= statsd.max_private_charts_hard)) { + debug(D_STATSD, "STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts has been reached.", m->name); + info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts_hard); + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } + else { + if (simple_pattern_matches(statsd.charts_for, m->name)) { + debug(D_STATSD, "STATSD: metric '%s' will be charted.", m->name); + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } else { + debug(D_STATSD, "STATSD: metric '%s' will not be charted.", m->name); + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } + } + + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED; + } + + // mark it as checked + m->options |= STATSD_METRIC_OPTION_CHECKED; + + // check if it is used in charts + if((m->options & (STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED|STATSD_METRIC_OPTION_USED_IN_APPS)) && !(m->options & STATSD_METRIC_OPTION_USEFUL)) { + m->options |= STATSD_METRIC_OPTION_USEFUL; + index->useful++; + m->next_useful = index->first_useful; + index->first_useful = m; + } + } + dfe_done(m); + + // flush all the useful metrics + for(m = index->first_useful; m ; m = m->next_useful) { + flush_metric(m); + } +} + + +// -------------------------------------------------------------------------------------- +// statsd main thread + +static int statsd_listen_sockets_setup(void) { + return listen_sockets_setup(&statsd.sockets); +} + +static void statsd_main_cleanup(void *data) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + info("cleaning up..."); + + if (statsd.collection_threads_status) { + int i; + for (i = 0; i < statsd.threads; i++) { + if(statsd.collection_threads_status[i].status) { + info("STATSD: stopping data collection thread %d...", i + 1); + netdata_thread_cancel(statsd.collection_threads_status[i].thread); + } + else { + info("STATSD: data collection thread %d found stopped.", i + 1); + } + } + } + + info("STATSD: closing sockets..."); + listen_sockets_close(&statsd.sockets); + + // destroy the dictionaries + dictionary_destroy(statsd.gauges.dict); + dictionary_destroy(statsd.meters.dict); + dictionary_destroy(statsd.counters.dict); + dictionary_destroy(statsd.histograms.dict); + dictionary_destroy(statsd.dictionaries.dict); + dictionary_destroy(statsd.sets.dict); + dictionary_destroy(statsd.timers.dict); + + info("STATSD: cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + + worker_unregister(); +} + +#define WORKER_STATSD_FLUSH_GAUGES 0 +#define WORKER_STATSD_FLUSH_COUNTERS 1 +#define WORKER_STATSD_FLUSH_METERS 2 +#define WORKER_STATSD_FLUSH_TIMERS 3 +#define WORKER_STATSD_FLUSH_HISTOGRAMS 4 +#define WORKER_STATSD_FLUSH_SETS 5 +#define WORKER_STATSD_FLUSH_DICTIONARIES 6 +#define WORKER_STATSD_FLUSH_STATS 7 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8 +#endif + +void *statsd_main(void *ptr) { + worker_register("STATSDFLUSH"); + worker_register_job_name(WORKER_STATSD_FLUSH_GAUGES, "gauges"); + worker_register_job_name(WORKER_STATSD_FLUSH_COUNTERS, "counters"); + worker_register_job_name(WORKER_STATSD_FLUSH_METERS, "meters"); + worker_register_job_name(WORKER_STATSD_FLUSH_TIMERS, "timers"); + worker_register_job_name(WORKER_STATSD_FLUSH_HISTOGRAMS, "histograms"); + worker_register_job_name(WORKER_STATSD_FLUSH_SETS, "sets"); + worker_register_job_name(WORKER_STATSD_FLUSH_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_STATSD_FLUSH_STATS, "statistics"); + + netdata_thread_cleanup_push(statsd_main_cleanup, ptr); + + statsd.gauges.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.meters.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.counters.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.histograms.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.dictionaries.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.sets.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.timers.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + + dictionary_register_insert_callback(statsd.gauges.dict, dictionary_metric_insert_callback, &statsd.gauges); + dictionary_register_insert_callback(statsd.meters.dict, dictionary_metric_insert_callback, &statsd.meters); + dictionary_register_insert_callback(statsd.counters.dict, dictionary_metric_insert_callback, &statsd.counters); + dictionary_register_insert_callback(statsd.histograms.dict, dictionary_metric_insert_callback, &statsd.histograms); + dictionary_register_insert_callback(statsd.dictionaries.dict, dictionary_metric_insert_callback, &statsd.dictionaries); + dictionary_register_insert_callback(statsd.sets.dict, dictionary_metric_insert_callback, &statsd.sets); + dictionary_register_insert_callback(statsd.timers.dict, dictionary_metric_insert_callback, &statsd.timers); + + dictionary_register_delete_callback(statsd.gauges.dict, dictionary_metric_delete_callback, &statsd.gauges); + dictionary_register_delete_callback(statsd.meters.dict, dictionary_metric_delete_callback, &statsd.meters); + dictionary_register_delete_callback(statsd.counters.dict, dictionary_metric_delete_callback, &statsd.counters); + dictionary_register_delete_callback(statsd.histograms.dict, dictionary_metric_delete_callback, &statsd.histograms); + dictionary_register_delete_callback(statsd.dictionaries.dict, dictionary_metric_delete_callback, &statsd.dictionaries); + dictionary_register_delete_callback(statsd.sets.dict, dictionary_metric_delete_callback, &statsd.sets); + dictionary_register_delete_callback(statsd.timers.dict, dictionary_metric_delete_callback, &statsd.timers); + + // ---------------------------------------------------------------------------------------------------------------- + // statsd configuration + + statsd.enabled = config_get_boolean(CONFIG_SECTION_PLUGINS, "statsd", statsd.enabled); + + statsd.update_every = default_rrd_update_every; + statsd.update_every = (int)config_get_number(CONFIG_SECTION_STATSD, "update every (flushInterval)", statsd.update_every); + if(statsd.update_every < default_rrd_update_every) { + error("STATSD: minimum flush interval %d given, but the minimum is the update every of netdata. Using %d", statsd.update_every, default_rrd_update_every); + statsd.update_every = default_rrd_update_every; + } + +#ifdef HAVE_RECVMMSG + statsd.recvmmsg_size = (size_t)config_get_number(CONFIG_SECTION_STATSD, "udp messages to process at once", (long long)statsd.recvmmsg_size); +#endif + + statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT); + statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard); + statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries); + statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); + statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout); + statsd.private_charts_hidden = (unsigned int)config_get_boolean(CONFIG_SECTION_STATSD, "private charts hidden", statsd.private_charts_hidden); + + statsd.histogram_percentile = (double)config_get_float(CONFIG_SECTION_STATSD, "histograms and timers percentile (percentThreshold)", statsd.histogram_percentile); + if(isless(statsd.histogram_percentile, 0) || isgreater(statsd.histogram_percentile, 100)) { + error("STATSD: invalid histograms and timers percentile %0.5f given", statsd.histogram_percentile); + statsd.histogram_percentile = 95.0; + } + { + char buffer[314 + 1]; + snprintfz(buffer, 314, "%0.1f%%", statsd.histogram_percentile); + statsd.histogram_percentile_str = strdupz(buffer); + } + + statsd.dictionary_max_unique = config_get_number(CONFIG_SECTION_STATSD, "dictionaries max unique dimensions", statsd.dictionary_max_unique); + + if(config_get_boolean(CONFIG_SECTION_STATSD, "add dimension for number of events received", 0)) { + statsd.gauges.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.counters.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.meters.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.sets.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.histograms.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.timers.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.dictionaries.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + } + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on gauges (deleteGauges)", 0)) + statsd.gauges.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on counters (deleteCounters)", 0)) + statsd.counters.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on meters (deleteMeters)", 0)) + statsd.meters.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on sets (deleteSets)", 0)) + statsd.sets.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on histograms (deleteHistograms)", 0)) + statsd.histograms.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on timers (deleteTimers)", 0)) + statsd.timers.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on dictionaries (deleteDictionaries)", 0)) + statsd.dictionaries.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + size_t max_sockets = (size_t)config_get_number(CONFIG_SECTION_STATSD, "statsd server max TCP sockets", (long long int)(rlimit_nofile.rlim_cur / 4)); + +#ifdef STATSD_MULTITHREADED + statsd.threads = (int)config_get_number(CONFIG_SECTION_STATSD, "threads", processors); + if(statsd.threads < 1) { + error("STATSD: Invalid number of threads %d, using %d", statsd.threads, processors); + statsd.threads = processors; + config_set_number(CONFIG_SECTION_STATSD, "collector threads", statsd.threads); + } +#else + statsd.threads = 1; +#endif + + // read custom application definitions + statsd_readdir(netdata_configured_user_config_dir, netdata_configured_stock_config_dir, "statsd.d"); + + // ---------------------------------------------------------------------------------------------------------------- + // statsd setup + + if(!statsd.enabled) goto cleanup; + + statsd_listen_sockets_setup(); + if(!statsd.sockets.opened) { + error("STATSD: No statsd sockets to listen to. statsd will be disabled."); + goto cleanup; + } + + statsd.collection_threads_status = callocz((size_t)statsd.threads, sizeof(struct collection_thread_status)); + + int i; + for(i = 0; i < statsd.threads ;i++) { + statsd.collection_threads_status[i].max_sockets = max_sockets / statsd.threads; + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STATSD_COLLECTOR[%d]", i + 1); + netdata_thread_create(&statsd.collection_threads_status[i].thread, tag, NETDATA_THREAD_OPTION_DEFAULT, statsd_collector_thread, &statsd.collection_threads_status[i]); + } + + // ---------------------------------------------------------------------------------------------------------------- + // statsd monitoring charts + + RRDSET *st_metrics = NULL; + RRDDIM *rd_metrics_gauge = NULL; + RRDDIM *rd_metrics_counter = NULL; + RRDDIM *rd_metrics_timer = NULL; + RRDDIM *rd_metrics_meter = NULL; + RRDDIM *rd_metrics_histogram = NULL; + RRDDIM *rd_metrics_set = NULL; + RRDDIM *rd_metrics_dictionary = NULL; + RRDSET *st_useful_metrics = NULL; + RRDDIM *rd_useful_metrics_gauge = NULL; + RRDDIM *rd_useful_metrics_counter = NULL; + RRDDIM *rd_useful_metrics_timer = NULL; + RRDDIM *rd_useful_metrics_meter = NULL; + RRDDIM *rd_useful_metrics_histogram = NULL; + RRDDIM *rd_useful_metrics_set = NULL; + RRDDIM *rd_useful_metrics_dictionary = NULL; + RRDSET *st_events = NULL; + RRDDIM *rd_events_gauge = NULL; + RRDDIM *rd_events_counter = NULL; + RRDDIM *rd_events_timer = NULL; + RRDDIM *rd_events_meter = NULL; + RRDDIM *rd_events_histogram = NULL; + RRDDIM *rd_events_set = NULL; + RRDDIM *rd_events_dictionary = NULL; + RRDDIM *rd_events_unknown = NULL; + RRDDIM *rd_events_errors = NULL; + RRDSET *st_reads = NULL; + RRDDIM *rd_reads_tcp = NULL; + RRDDIM *rd_reads_udp = NULL; + RRDSET *st_bytes = NULL; + RRDDIM *rd_bytes_tcp = NULL; + RRDDIM *rd_bytes_udp = NULL; + RRDSET *st_packets = NULL; + RRDDIM *rd_packets_tcp = NULL; + RRDDIM *rd_packets_udp = NULL; + RRDSET *st_tcp_connects = NULL; + RRDDIM *rd_tcp_connects = NULL; + RRDDIM *rd_tcp_disconnects = NULL; + RRDSET *st_tcp_connected = NULL; + RRDDIM *rd_tcp_connected = NULL; + RRDSET *st_pcharts = NULL; + RRDDIM *rd_pcharts = NULL; + + if(global_statistics_enabled) { + st_metrics = rrdset_create_localhost( + "netdata", + "statsd_metrics", + NULL, + "statsd", + NULL, + "Metrics in the netdata statsd database", + "metrics", + PLUGIN_STATSD_NAME, + "stats", + 132010, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_metrics_gauge = rrddim_add(st_metrics, "gauges", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_counter = rrddim_add(st_metrics, "counters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_timer = rrddim_add(st_metrics, "timers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_meter = rrddim_add(st_metrics, "meters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_histogram = rrddim_add(st_metrics, "histograms", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_set = rrddim_add(st_metrics, "sets", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_dictionary = rrddim_add(st_metrics, "dictionaries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + st_useful_metrics = rrdset_create_localhost( + "netdata", + "statsd_useful_metrics", + NULL, + "statsd", + NULL, + "Useful metrics in the netdata statsd database", + "metrics", + PLUGIN_STATSD_NAME, + "stats", + 132010, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_useful_metrics_gauge = rrddim_add(st_useful_metrics, "gauges", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_counter = rrddim_add(st_useful_metrics, "counters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_timer = rrddim_add(st_useful_metrics, "timers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_meter = rrddim_add(st_useful_metrics, "meters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_histogram = rrddim_add(st_useful_metrics, "histograms", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_set = rrddim_add(st_useful_metrics, "sets", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_dictionary = rrddim_add(st_useful_metrics, "dictionaries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + st_events = rrdset_create_localhost( + "netdata", + "statsd_events", + NULL, + "statsd", + NULL, + "Events processed by the netdata statsd server", + "events/s", + PLUGIN_STATSD_NAME, + "stats", + 132011, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_events_gauge = rrddim_add(st_events, "gauges", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_counter = rrddim_add(st_events, "counters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_timer = rrddim_add(st_events, "timers", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_meter = rrddim_add(st_events, "meters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_histogram = rrddim_add(st_events, "histograms", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_set = rrddim_add(st_events, "sets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_dictionary = rrddim_add(st_events, "dictionaries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_unknown = rrddim_add(st_events, "unknown", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_errors = rrddim_add(st_events, "errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_reads = rrdset_create_localhost( + "netdata", + "statsd_reads", + NULL, + "statsd", + NULL, + "Read operations made by the netdata statsd server", + "reads/s", + PLUGIN_STATSD_NAME, + "stats", + 132012, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_reads_tcp = rrddim_add(st_reads, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_reads_udp = rrddim_add(st_reads, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_bytes = rrdset_create_localhost( + "netdata", + "statsd_bytes", + NULL, + "statsd", + NULL, + "Bytes read by the netdata statsd server", + "kilobits/s", + PLUGIN_STATSD_NAME, + "stats", + 132013, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_bytes_tcp = rrddim_add(st_bytes, "tcp", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_bytes_udp = rrddim_add(st_bytes, "udp", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + + st_packets = rrdset_create_localhost( + "netdata", + "statsd_packets", + NULL, + "statsd", + NULL, + "Network packets processed by the netdata statsd server", + "packets/s", + PLUGIN_STATSD_NAME, + "stats", + 132014, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_packets_tcp = rrddim_add(st_packets, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_packets_udp = rrddim_add(st_packets, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_tcp_connects = rrdset_create_localhost( + "netdata", + "tcp_connects", + NULL, + "statsd", + NULL, + "statsd server TCP connects and disconnects", + "events", + PLUGIN_STATSD_NAME, + "stats", + 132015, + statsd.update_every, + RRDSET_TYPE_LINE); + rd_tcp_connects = rrddim_add(st_tcp_connects, "connects", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_tcp_disconnects = rrddim_add(st_tcp_connects, "disconnects", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_tcp_connected = rrdset_create_localhost( + "netdata", + "tcp_connected", + NULL, + "statsd", + NULL, + "statsd server TCP connected sockets", + "sockets", + PLUGIN_STATSD_NAME, + "stats", + 132016, + statsd.update_every, + RRDSET_TYPE_LINE); + rd_tcp_connected = rrddim_add(st_tcp_connected, "connected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + st_pcharts = rrdset_create_localhost( + "netdata", + "private_charts", + NULL, + "statsd", + NULL, + "Private metric charts created by the netdata statsd server", + "charts", + PLUGIN_STATSD_NAME, + "stats", + 132020, + statsd.update_every, + RRDSET_TYPE_AREA); + rd_pcharts = rrddim_add(st_pcharts, "charts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + // ---------------------------------------------------------------------------------------------------------------- + // statsd thread to turn metrics into charts + + usec_t step = statsd.update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + while(!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_STATSD_FLUSH_GAUGES); + statsd_flush_index_metrics(&statsd.gauges, statsd_flush_gauge); + + worker_is_busy(WORKER_STATSD_FLUSH_COUNTERS); + statsd_flush_index_metrics(&statsd.counters, statsd_flush_counter); + + worker_is_busy(WORKER_STATSD_FLUSH_METERS); + statsd_flush_index_metrics(&statsd.meters, statsd_flush_meter); + + worker_is_busy(WORKER_STATSD_FLUSH_TIMERS); + statsd_flush_index_metrics(&statsd.timers, statsd_flush_timer); + + worker_is_busy(WORKER_STATSD_FLUSH_HISTOGRAMS); + statsd_flush_index_metrics(&statsd.histograms, statsd_flush_histogram); + + worker_is_busy(WORKER_STATSD_FLUSH_SETS); + statsd_flush_index_metrics(&statsd.sets, statsd_flush_set); + + worker_is_busy(WORKER_STATSD_FLUSH_DICTIONARIES); + statsd_flush_index_metrics(&statsd.dictionaries,statsd_flush_dictionary); + + worker_is_busy(WORKER_STATSD_FLUSH_STATS); + statsd_update_all_app_charts(); + + if(unlikely(netdata_exit)) + break; + + if(global_statistics_enabled) { + rrddim_set_by_pointer(st_metrics, rd_metrics_gauge, (collected_number)statsd.gauges.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_counter, (collected_number)statsd.counters.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_timer, (collected_number)statsd.timers.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_meter, (collected_number)statsd.meters.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_histogram, (collected_number)statsd.histograms.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_set, (collected_number)statsd.sets.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_dictionary, (collected_number)statsd.dictionaries.metrics); + rrdset_done(st_metrics); + + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_gauge, (collected_number)statsd.gauges.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_counter, (collected_number)statsd.counters.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_timer, (collected_number)statsd.timers.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_meter, (collected_number)statsd.meters.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_histogram, (collected_number)statsd.histograms.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_set, (collected_number)statsd.sets.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_dictionary, (collected_number)statsd.dictionaries.useful); + rrdset_done(st_useful_metrics); + + rrddim_set_by_pointer(st_events, rd_events_gauge, (collected_number)statsd.gauges.events); + rrddim_set_by_pointer(st_events, rd_events_counter, (collected_number)statsd.counters.events); + rrddim_set_by_pointer(st_events, rd_events_timer, (collected_number)statsd.timers.events); + rrddim_set_by_pointer(st_events, rd_events_meter, (collected_number)statsd.meters.events); + rrddim_set_by_pointer(st_events, rd_events_histogram, (collected_number)statsd.histograms.events); + rrddim_set_by_pointer(st_events, rd_events_set, (collected_number)statsd.sets.events); + rrddim_set_by_pointer(st_events, rd_events_dictionary, (collected_number)statsd.dictionaries.events); + rrddim_set_by_pointer(st_events, rd_events_unknown, (collected_number)statsd.unknown_types); + rrddim_set_by_pointer(st_events, rd_events_errors, (collected_number)statsd.socket_errors); + rrdset_done(st_events); + + rrddim_set_by_pointer(st_reads, rd_reads_tcp, (collected_number)statsd.tcp_socket_reads); + rrddim_set_by_pointer(st_reads, rd_reads_udp, (collected_number)statsd.udp_socket_reads); + rrdset_done(st_reads); + + rrddim_set_by_pointer(st_bytes, rd_bytes_tcp, (collected_number)statsd.tcp_bytes_read); + rrddim_set_by_pointer(st_bytes, rd_bytes_udp, (collected_number)statsd.udp_bytes_read); + rrdset_done(st_bytes); + + rrddim_set_by_pointer(st_packets, rd_packets_tcp, (collected_number)statsd.tcp_packets_received); + rrddim_set_by_pointer(st_packets, rd_packets_udp, (collected_number)statsd.udp_packets_received); + rrdset_done(st_packets); + + rrddim_set_by_pointer(st_tcp_connects, rd_tcp_connects, (collected_number)statsd.tcp_socket_connects); + rrddim_set_by_pointer(st_tcp_connects, rd_tcp_disconnects, (collected_number)statsd.tcp_socket_disconnects); + rrdset_done(st_tcp_connects); + + rrddim_set_by_pointer(st_tcp_connected, rd_tcp_connected, (collected_number)statsd.tcp_socket_connected); + rrdset_done(st_tcp_connected); + + rrddim_set_by_pointer(st_pcharts, rd_pcharts, (collected_number)statsd.private_charts); + rrdset_done(st_pcharts); + } + } + +cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/tc.plugin/Makefile.am b/collectors/tc.plugin/Makefile.am new file mode 100644 index 0000000..9bc6cf2 --- /dev/null +++ b/collectors/tc.plugin/Makefile.am @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +CLEANFILES = \ + tc-qos-helper.sh \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_plugins_SCRIPTS = \ + tc-qos-helper.sh \ + $(NULL) + +dist_noinst_DATA = \ + tc-qos-helper.sh.in \ + README.md \ + $(NULL) diff --git a/collectors/tc.plugin/README.md b/collectors/tc.plugin/README.md new file mode 100644 index 0000000..32c20f4 --- /dev/null +++ b/collectors/tc.plugin/README.md @@ -0,0 +1,205 @@ + + +# tc.plugin + +Live demo - **[see it in action here](https://registry.my-netdata.io/#menu_tc)** ! + +![qos](https://cloud.githubusercontent.com/assets/2662304/14439411/b7f36254-0033-11e6-93f0-c739bb6a1c3a.gif) + +Netdata monitors `tc` QoS classes for all interfaces. + +If you also use [FireQOS](http://firehol.org/tutorial/fireqos-new-user/) it will collect interface and class names. + +There is a [shell helper](https://raw.githubusercontent.com/netdata/netdata/master/collectors/tc.plugin/tc-qos-helper.sh.in) for this (all parsing is done by the plugin in `C` code - this shell script is just a configuration for the command to run to get `tc` output). + +The source of the tc plugin is [here](https://raw.githubusercontent.com/netdata/netdata/master/collectors/tc.plugin/plugin_tc.c). It is somewhat complex, because a state machine was needed to keep track of all the `tc` classes, including the pseudo classes tc dynamically creates. + +## Motivation + +One category of metrics missing in Linux monitoring, is bandwidth consumption for each open socket (inbound and outbound traffic). So, you cannot tell how much bandwidth your web server, your database server, your backup, your ssh sessions, etc are using. + +To solve this problem, the most *adventurous* Linux monitoring tools install kernel modules to capture all traffic, analyze it and provide reports per application. A lot of work, CPU intensive and with a great degree of risk (due to the kernel modules involved which might affect the stability of the whole system). Not to mention that such solutions are probably better suited for a core linux router in your network. + +Others use NFACCT, the netfilter accounting module which is already part of the Linux firewall. However, this would require configuring a firewall on every system you want to measure bandwidth (just FYI, I do install a firewall on every server - and I strongly advise you to do so too - but configuring accounting on all servers seems overkill when you don't really need it for billing purposes). + +**There is however a much simpler approach**. + +## QoS + +One of the features the Linux kernel has, but it is rarely used, is its ability to **apply QoS on traffic**. Even most interesting is that it can apply QoS to **both inbound and outbound traffic**. + +QoS is about 2 features: + +1. **Classify traffic** + + Classification is the process of organizing traffic in groups, called **classes**. Classification can evaluate every aspect of network packets, like source and destination ports, source and destination IPs, netfilter marks, etc. + + When you classify traffic, you just assign a label to it. Of course classes have some properties themselves (like queuing mechanisms), but let's say it is that simple: **a label**. For example **I call `web server` traffic, the traffic from my server's tcp/80, tcp/443 and to my server's tcp/80, tcp/443, while I call `web surfing` all other tcp/80 and tcp/443 traffic**. You can use any combinations you like. There is no limit. + +2. **Apply traffic shaping rules to these classes** + + Traffic shaping is used to control how network interface bandwidth should be shared among the classes. Normally, you need to do this, when there is not enough bandwidth to satisfy all the demand, or when you want to control the supply of bandwidth to certain services. Of course classification is sufficient for monitoring traffic, but traffic shaping is also quite important, as we will explain in the next section. + +## Why you want QoS + +1. **Monitoring the bandwidth used by services** + + Netdata provides wonderful real-time charts, like this one (wait to see the orange `rsync` part): + + ![qos3](https://cloud.githubusercontent.com/assets/2662304/14474189/713ede84-0104-11e6-8c9c-8dca5c2abd63.gif) + +2. **Ensure sensitive administrative tasks will not starve for bandwidth** + + Have you tried to ssh to a server when the network is congested? If you have, you already know it does not work very well. QoS can guarantee that services like ssh, dns, ntp, etc will always have a small supply of bandwidth. So, no matter what happens, you will be able to ssh to your server and DNS will always work. + +3. **Ensure administrative tasks will not monopolize all the bandwidth** + + Services like backups, file copies, database dumps, etc can easily monopolize all the available bandwidth. It is common for example a nightly backup, or a huge file transfer to negatively influence the end-user experience. QoS can fix that. + +4. **Ensure each end-user connection will get a fair cut of the available bandwidth.** + + Several QoS queuing disciplines in Linux do this automatically, without any configuration from you. The result is that new sockets are favored over older ones, so that users will get a snappier experience, while others are transferring large amounts of traffic. + +5. **Protect the servers from DDoS attacks.** + + When your system is under a DDoS attack, it will get a lot more bandwidth compared to the one it can handle and probably your applications will crash. Setting a limit on the inbound traffic using QoS, will protect your servers (throttle the requests) and depending on the size of the attack may allow your legitimate users to access the server, while the attack is taking place. + + Using QoS together with a [SYNPROXY](/collectors/proc.plugin/README.md) will provide a great degree of protection against most DDoS attacks. Actually when I wrote that article, a few folks tried to DDoS the Netdata demo site to see in real-time the SYNPROXY operation. They did not do it right, but anyway a great deal of requests reached the Netdata server. What saved Netdata was QoS. The Netdata demo server has QoS installed, so the requests were throttled and the server did not even reach the point of resource starvation. Read about it [here](/collectors/proc.plugin/README.md). + +On top of all these, QoS is extremely light. You will configure it once, and this is it. It will not bother you again and it will not use any noticeable CPU resources, especially on application and database servers. + +``` +- ensure administrative tasks (like ssh, dns, etc) will always have a small but guaranteed bandwidth. So, no matter what happens, I will be able to ssh to my server and DNS will work. + +- ensure other administrative tasks will not monopolize all the available bandwidth. So, my nightly backup will not hurt my users, a developer that is copying files over the net will not get all the available bandwidth, etc. + +- ensure each end-user connection will get a fair cut of the available bandwidth. +``` + +Once **traffic classification** is applied, we can use **[netdata](https://github.com/netdata/netdata)** to visualize the bandwidth consumption per class in real-time (no configuration is needed for Netdata - it will figure it out). + +QoS, is extremely light. You will configure it once, and this is it. It will not bother you again and it will not use any noticeable CPU resources, especially on application and database servers. + +This is QoS from a home linux router. Check these features: + +1. It is real-time (per second updates) +2. QoS really works in Linux - check that the `background` traffic is squeezed when `surfing` needs it. + +![test2](https://cloud.githubusercontent.com/assets/2662304/14093004/68966020-f553-11e5-98fe-ffee2086fafd.gif) + +--- + +## QoS in Linux? + +Of course, `tc` is probably **the most undocumented, complicated and unfriendly** command in Linux. + +For example, do you know that for matching a simple port range in `tc`, e.g. all the high ports, from 1025 to 65535 inclusive, you have to match these: + +``` +1025/0xffff +1026/0xfffe +1028/0xfffc +1032/0xfff8 +1040/0xfff0 +1056/0xffe0 +1088/0xffc0 +1152/0xff80 +1280/0xff00 +1536/0xfe00 +2048/0xf800 +4096/0xf000 +8192/0xe000 +16384/0xc000 +32768/0x8000 +``` + +To do it the hard way, you can go through the [tc configuration steps](#qos-configuration-with-tc). An easier way is to use **[FireQOS](https://firehol.org/tutorial/fireqos-new-user/)**, a tool that simplifies QoS management in Linux. + +## Qos Configuration with FireHOL + +The **[FireHOL](https://firehol.org/)** package already distributes **[FireQOS](https://firehol.org/tutorial/fireqos-new-user/)**. Check the **[FireQOS tutorial](https://firehol.org/tutorial/fireqos-new-user/)** to learn how to write your own QoS configuration. + +With **[FireQOS](https://firehol.org/tutorial/fireqos-new-user/)**, it is **really simple for everyone to use QoS in Linux**. Just install the package `firehol`. It should already be available for your distribution. If not, check the **[FireHOL Installation Guide](https://firehol.org/installing/)**. After that, you will have the `fireqos` command which uses a configuration like the following `/etc/firehol/fireqos.conf`, used at the Netdata demo site: + +```sh + # configure the Netdata ports + server_netdata_ports="tcp/19999" + + interface eth0 world bidirectional ethernet balanced rate 50Mbit + class arp + match arp + + class icmp + match icmp + + class dns commit 1Mbit + server dns + client dns + + class ntp + server ntp + client ntp + + class ssh commit 2Mbit + server ssh + client ssh + + class rsync commit 2Mbit max 10Mbit + server rsync + client rsync + + class web_server commit 40Mbit + server http + server netdata + + class client + client surfing + + class nms commit 1Mbit + match input src 10.2.3.5 +``` + +Nothing more is needed. You just run `fireqos start` to apply this configuration, restart Netdata and you have real-time visualization of the bandwidth consumption of your applications. FireQOS is not a daemon. It will just convert the configuration to `tc` commands. It will run them and it will exit. + +**IMPORTANT**: If you copy this configuration to apply it to your system, please adapt the speeds - experiment in non-production environments to learn the tool, before applying it on your servers. + +And this is what you are going to get: + +![image](https://cloud.githubusercontent.com/assets/2662304/14436322/c91d90a4-0024-11e6-9fb1-57cdef1580df.png) + +## QoS Configuration with tc + +First, setup the tc rules in rc.local using commands to assign different QoS markings to different classids. You can see one such example in [github issue #4563](https://github.com/netdata/netdata/issues/4563#issuecomment-455711973). + +Then, map the classids to names by creating `/etc/iproute2/tc_cls`. For example: + +``` +2:1 Standard +2:8 LowPriorityData +2:10 HighThroughputData +2:16 OAM +2:18 LowLatencyData +2:24 BroadcastVideo +2:26 MultimediaStreaming +2:32 RealTimeInteractive +2:34 MultimediaConferencing +2:40 Signalling +2:46 Telephony +2:48 NetworkControl +``` + +Add the following configuration option in `/etc/netdata.conf`: + +```\[plugin:tc] + enable show all classes and qdiscs for all interfaces = yes +``` + +Finally, create `/etc/netdata/tc-qos-helper.conf` with this content: +`tc_show="class"` + +Please note, that by default Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a chart instead of `auto` to enable it permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. + + diff --git a/collectors/tc.plugin/plugin_tc.c b/collectors/tc.plugin/plugin_tc.c new file mode 100644 index 0000000..a2e72ee --- /dev/null +++ b/collectors/tc.plugin/plugin_tc.c @@ -0,0 +1,1182 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "daemon/common.h" + +#define RRD_TYPE_TC "tc" +#define PLUGIN_TC_NAME "tc.plugin" + +// ---------------------------------------------------------------------------- +// /sbin/tc processor +// this requires the script plugins.d/tc-qos-helper.sh + +#define TC_LINE_MAX 1024 + +struct tc_class { + STRING *id; + STRING *name; + STRING *leafid; + STRING *parentid; + + bool hasparent; + bool isleaf; + bool isqdisc; + bool render; + bool name_updated; + bool updated; + + int unupdated; // the number of times, this has been found un-updated + + unsigned long long bytes; + unsigned long long packets; + unsigned long long dropped; + unsigned long long tokens; + unsigned long long ctokens; + + //unsigned long long overlimits; + //unsigned long long requeues; + //unsigned long long lended; + //unsigned long long borrowed; + //unsigned long long giants; + + RRDDIM *rd_bytes; + RRDDIM *rd_packets; + RRDDIM *rd_dropped; + RRDDIM *rd_tokens; + RRDDIM *rd_ctokens; +}; + +struct tc_device { + STRING *id; + STRING *name; + STRING *family; + + bool name_updated; + bool family_updated; + + char enabled; + char enabled_bytes; + char enabled_packets; + char enabled_dropped; + char enabled_tokens; + char enabled_ctokens; + char enabled_all_classes_qdiscs; + + RRDSET *st_bytes; + RRDSET *st_packets; + RRDSET *st_dropped; + RRDSET *st_tokens; + RRDSET *st_ctokens; + + DICTIONARY *classes; +}; + + +// ---------------------------------------------------------------------------- +// tc_class index + +static void tc_class_free_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + // struct tc_device *d = data; + struct tc_class *c = value; + + string_freez(c->id); + string_freez(c->name); + string_freez(c->leafid); + string_freez(c->parentid); +} + +static bool tc_class_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) { + struct tc_device *d = data; (void)d; + struct tc_class *c = old_value; (void)c; + struct tc_class *new_c = new_value; (void)new_c; + + error("TC: class '%s' is already in device '%s'. Ignoring duplicate.", dictionary_acquired_item_name(item), string2str(d->id)); + + tc_class_free_callback(item, new_value, data); + + return true; +} + +static void tc_class_index_init(struct tc_device *d) { + if(!d->classes) { + d->classes = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_SINGLE_THREADED); + + dictionary_register_delete_callback(d->classes, tc_class_free_callback, d); + dictionary_register_conflict_callback(d->classes, tc_class_conflict_callback, d); + } +} + +static void tc_class_index_destroy(struct tc_device *d) { + dictionary_destroy(d->classes); + d->classes = NULL; +} + +static struct tc_class *tc_class_index_add(struct tc_device *d, struct tc_class *c) { + return dictionary_set(d->classes, string2str(c->id), c, sizeof(*c)); +} + +static void tc_class_index_del(struct tc_device *d, struct tc_class *c) { + dictionary_del(d->classes, string2str(c->id)); +} + +static inline struct tc_class *tc_class_index_find(struct tc_device *d, const char *id) { + return dictionary_get(d->classes, id); +} + +// ---------------------------------------------------------------------------- +// tc_device index + +static DICTIONARY *tc_device_root_index = NULL; + +static void tc_device_add_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct tc_device *d = value; + tc_class_index_init(d); +} + +static void tc_device_free_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct tc_device *d = value; + + tc_class_index_destroy(d); + + string_freez(d->id); + string_freez(d->name); + string_freez(d->family); +} + +static void tc_device_index_init() { + if(!tc_device_root_index) { + tc_device_root_index = dictionary_create( + DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_SINGLE_THREADED | DICT_OPTION_ADD_IN_FRONT); + + dictionary_register_insert_callback(tc_device_root_index, tc_device_add_callback, NULL); + dictionary_register_delete_callback(tc_device_root_index, tc_device_free_callback, NULL); + } +} + +static void tc_device_index_destroy() { + dictionary_destroy(tc_device_root_index); + tc_device_root_index = NULL; +} + +static struct tc_device *tc_device_index_add(struct tc_device *d) { + return dictionary_set(tc_device_root_index, string2str(d->id), d, sizeof(*d)); +} + +//static struct tc_device *tc_device_index_del(struct tc_device *d) { +// dictionary_del(tc_device_root_index, string2str(d->id)); +// return d; +//} + +static inline struct tc_device *tc_device_index_find(const char *id) { + return dictionary_get(tc_device_root_index, id); +} + +// ---------------------------------------------------------------------------- + +static inline void tc_class_free(struct tc_device *n, struct tc_class *c) { + debug(D_TC_LOOP, "Removing from device '%s' class '%s', parentid '%s', leafid '%s', unused=%d", + string2str(n->id), string2str(c->id), string2str(c->parentid), string2str(c->leafid), + c->unupdated); + + tc_class_index_del(n, c); +} + +static inline void tc_device_classes_cleanup(struct tc_device *d) { + static int cleanup_every = 999; + + if(unlikely(cleanup_every > 0)) { + cleanup_every = (int) config_get_number("plugin:tc", "cleanup unused classes every", 120); + if(cleanup_every < 0) cleanup_every = -cleanup_every; + } + + d->name_updated = false; + d->family_updated = false; + + struct tc_class *c; + dfe_start_write(d->classes, c) { + if(unlikely(cleanup_every && c->unupdated >= cleanup_every)) + tc_class_free(d, c); + + else { + c->updated = false; + c->name_updated = false; + } + } + dfe_done(c); +} + +static inline void tc_device_commit(struct tc_device *d) { + static int enable_new_interfaces = -1, enable_bytes = -1, enable_packets = -1, enable_dropped = -1, enable_tokens = -1, enable_ctokens = -1, enabled_all_classes_qdiscs = -1; + + if(unlikely(enable_new_interfaces == -1)) { + enable_new_interfaces = config_get_boolean_ondemand("plugin:tc", "enable new interfaces detected at runtime", CONFIG_BOOLEAN_YES); + enable_bytes = config_get_boolean_ondemand("plugin:tc", "enable traffic charts for all interfaces", CONFIG_BOOLEAN_AUTO); + enable_packets = config_get_boolean_ondemand("plugin:tc", "enable packets charts for all interfaces", CONFIG_BOOLEAN_AUTO); + enable_dropped = config_get_boolean_ondemand("plugin:tc", "enable dropped charts for all interfaces", CONFIG_BOOLEAN_AUTO); + enable_tokens = config_get_boolean_ondemand("plugin:tc", "enable tokens charts for all interfaces", CONFIG_BOOLEAN_NO); + enable_ctokens = config_get_boolean_ondemand("plugin:tc", "enable ctokens charts for all interfaces", CONFIG_BOOLEAN_NO); + enabled_all_classes_qdiscs = config_get_boolean_ondemand("plugin:tc", "enable show all classes and qdiscs for all interfaces", CONFIG_BOOLEAN_NO); + } + + if(unlikely(d->enabled == (char)-1)) { + char var_name[CONFIG_MAX_NAME + 1]; + snprintfz(var_name, CONFIG_MAX_NAME, "qos for %s", string2str(d->id)); + + d->enabled = (char)config_get_boolean_ondemand("plugin:tc", var_name, enable_new_interfaces); + + snprintfz(var_name, CONFIG_MAX_NAME, "traffic chart for %s", string2str(d->id)); + d->enabled_bytes = (char)config_get_boolean_ondemand("plugin:tc", var_name, enable_bytes); + + snprintfz(var_name, CONFIG_MAX_NAME, "packets chart for %s", string2str(d->id)); + d->enabled_packets = (char)config_get_boolean_ondemand("plugin:tc", var_name, enable_packets); + + snprintfz(var_name, CONFIG_MAX_NAME, "dropped packets chart for %s", string2str(d->id)); + d->enabled_dropped = (char)config_get_boolean_ondemand("plugin:tc", var_name, enable_dropped); + + snprintfz(var_name, CONFIG_MAX_NAME, "tokens chart for %s", string2str(d->id)); + d->enabled_tokens = (char)config_get_boolean_ondemand("plugin:tc", var_name, enable_tokens); + + snprintfz(var_name, CONFIG_MAX_NAME, "ctokens chart for %s", string2str(d->id)); + d->enabled_ctokens = (char)config_get_boolean_ondemand("plugin:tc", var_name, enable_ctokens); + + snprintfz(var_name, CONFIG_MAX_NAME, "show all classes for %s", string2str(d->id)); + d->enabled_all_classes_qdiscs = (char)config_get_boolean_ondemand("plugin:tc", var_name, enabled_all_classes_qdiscs); + } + + // we only need to add leaf classes + struct tc_class *c, *x /*, *root = NULL */; + unsigned long long bytes_sum = 0, packets_sum = 0, dropped_sum = 0, tokens_sum = 0, ctokens_sum = 0; + int active_nodes = 0, updated_classes = 0, updated_qdiscs = 0; + + // prepare all classes + // we set reasonable defaults for the rest of the code below + + dfe_start_read(d->classes, c) { + c->render = false; // do not render this class + c->isleaf = true; // this is a leaf class + c->hasparent = false; // without a parent + + if(unlikely(!c->updated)) + c->unupdated++; // increase its unupdated counter + else { + c->unupdated = 0; // reset its unupdated counter + + // count how many of each kind + if(c->isqdisc) + updated_qdiscs++; + else + updated_classes++; + } + } + dfe_done(c); + + if(unlikely(!d->enabled || (!updated_classes && !updated_qdiscs))) { + debug(D_TC_LOOP, "TC: Ignoring TC device '%s'. It is not enabled/updated.", string2str(d->name?d->name:d->id)); + tc_device_classes_cleanup(d); + return; + } + + if(unlikely(updated_classes && updated_qdiscs)) { + error("TC: device '%s' has active both classes (%d) and qdiscs (%d). Will render only qdiscs.", string2str(d->id), updated_classes, updated_qdiscs); + + // set all classes to !updated + dfe_start_read(d->classes, c) { + if (unlikely(!c->isqdisc && c->updated)) + c->updated = false; + } + dfe_done(c); + updated_classes = 0; + } + + // mark the classes as leafs and parents + // + // TC is hierarchical: + // - classes can have other classes in them + // - the same is true for qdiscs (i.e. qdiscs have classes, that have other qdiscs) + // + // we need to present a chart with leaf nodes only, so that the sum + // of all dimensions of the chart, will be the total utilization + // of the interface. + // + // here we try to find the ones we need to report + // by default all nodes are marked with: isleaf = 1 (see above) + // + // so, here we remove the isleaf flag from nodes in the middle + // and we add the hasparent flag to leaf nodes we found their parent + if(likely(!d->enabled_all_classes_qdiscs)) { + dfe_start_read(d->classes, c) { + if(unlikely(!c->updated)) + continue; + + //debug(D_TC_LOOP, "TC: In device '%s', %s '%s' has leafid: '%s' and parentid '%s'.", + // d->id, + // c->isqdisc?"qdisc":"class", + // c->id, + // c->leafid?c->leafid:"NULL", + // c->parentid?c->parentid:"NULL"); + + // find if c is leaf or not + dfe_start_read(d->classes, x) { + if(unlikely(!x->updated || c == x || !x->parentid)) + continue; + + // classes have both parentid and leafid + // qdiscs have only parentid + // the following works for both (it is an OR) + + if((x->parentid && c->id == x->parentid) || + (c->leafid && x->parentid && c->leafid == x->parentid)) { + // debug(D_TC_LOOP, "TC: In device '%s', %s '%s' (leafid: '%s') has as leaf %s '%s' (parentid: '%s').", d->name?d->name:d->id, c->isqdisc?"qdisc":"class", c->name?c->name:c->id, c->leafid?c->leafid:c->id, x->isqdisc?"qdisc":"class", x->name?x->name:x->id, x->parentid?x->parentid:x->id); + c->isleaf = false; + x->hasparent = true; + } + } + dfe_done(x); + } + dfe_done(c); + } + + dfe_start_read(d->classes, c) { + if(unlikely(!c->updated)) + continue; + + // debug(D_TC_LOOP, "TC: device '%s', %s '%s' isleaf=%d, hasparent=%d", d->id, (c->isqdisc)?"qdisc":"class", c->id, c->isleaf, c->hasparent); + + if(unlikely((c->isleaf && c->hasparent) || d->enabled_all_classes_qdiscs)) { + c->render = true; + active_nodes++; + bytes_sum += c->bytes; + packets_sum += c->packets; + dropped_sum += c->dropped; + tokens_sum += c->tokens; + ctokens_sum += c->ctokens; + } + + //if(unlikely(!c->hasparent)) { + // if(root) error("TC: multiple root class/qdisc for device '%s' (old: '%s', new: '%s')", d->id, root->id, c->id); + // root = c; + // debug(D_TC_LOOP, "TC: found root class/qdisc '%s'", root->id); + //} + } + dfe_done(c); + +#ifdef NETDATA_INTERNAL_CHECKS + // dump all the list to see what we know + + if(unlikely(debug_flags & D_TC_LOOP)) { + dfe_start_read(d->classes, c) { + if(c->render) debug(D_TC_LOOP, "TC: final nodes dump for '%s': class %s, OK", string2str(d->name), string2str(c->id)); + else debug(D_TC_LOOP, "TC: final nodes dump for '%s': class '%s', IGNORE (updated: %d, isleaf: %d, hasparent: %d, parent: '%s')", + string2str(d->name?d->name:d->id), string2str(c->id), c->updated, c->isleaf, c->hasparent, string2str(c->parentid)); + } + dfe_done(c); + } +#endif + + if(unlikely(!active_nodes)) { + debug(D_TC_LOOP, "TC: Ignoring TC device '%s'. No useful classes/qdiscs.", string2str(d->name?d->name:d->id)); + tc_device_classes_cleanup(d); + return; + } + + debug(D_TC_LOOP, "TC: evaluating TC device '%s'. enabled = %d/%d (bytes: %d/%d, packets: %d/%d, dropped: %d/%d, tokens: %d/%d, ctokens: %d/%d, all_classes_qdiscs: %d/%d), classes: (bytes = %llu, packets = %llu, dropped = %llu, tokens = %llu, ctokens = %llu).", + string2str(d->name?d->name:d->id), + d->enabled, enable_new_interfaces, + d->enabled_bytes, enable_bytes, + d->enabled_packets, enable_packets, + d->enabled_dropped, enable_dropped, + d->enabled_tokens, enable_tokens, + d->enabled_ctokens, enable_ctokens, + d->enabled_all_classes_qdiscs, enabled_all_classes_qdiscs, + bytes_sum, + packets_sum, + dropped_sum, + tokens_sum, + ctokens_sum + ); + + // -------------------------------------------------------------------- + // bytes + + if(d->enabled_bytes == CONFIG_BOOLEAN_YES || (d->enabled_bytes == CONFIG_BOOLEAN_AUTO && + (bytes_sum || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->enabled_bytes = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_bytes)) { + d->st_bytes = rrdset_create_localhost( + RRD_TYPE_TC, + string2str(d->id), + string2str(d->name ? d->name : d->id), + string2str(d->family ? d->family : d->id), + RRD_TYPE_TC ".qos", + "Class Usage", + "kilobits/s", + PLUGIN_TC_NAME, + NULL, + NETDATA_CHART_PRIO_TC_QOS, + localhost->rrd_update_every, + d->enabled_all_classes_qdiscs ? RRDSET_TYPE_LINE : RRDSET_TYPE_STACKED); + + rrdlabels_add(d->st_bytes->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + } + else { + if(unlikely(d->name_updated)) + rrdset_reset_name(d->st_bytes, string2str(d->name)); + + if(d->name && d->name_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + + if(d->family && d->family_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + + // TODO + // update the family + } + + dfe_start_read(d->classes, c) { + if(unlikely(!c->render)) continue; + + if(unlikely(!c->rd_bytes)) + c->rd_bytes = rrddim_add(d->st_bytes, string2str(c->id), string2str(c->name?c->name:c->id), 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + else if(unlikely(c->name_updated)) + rrddim_reset_name(d->st_bytes, c->rd_bytes, string2str(c->name)); + + rrddim_set_by_pointer(d->st_bytes, c->rd_bytes, c->bytes); + } + dfe_done(c); + + rrdset_done(d->st_bytes); + } + + // -------------------------------------------------------------------- + // packets + + if(d->enabled_packets == CONFIG_BOOLEAN_YES || (d->enabled_packets == CONFIG_BOOLEAN_AUTO && + (packets_sum || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->enabled_packets = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_packets)) { + char id[RRD_ID_LENGTH_MAX + 1]; + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_packets", string2str(d->id)); + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_packets", string2str(d->name ? d->name : d->id)); + + d->st_packets = rrdset_create_localhost( + RRD_TYPE_TC, + id, + name, + string2str(d->family ? d->family : d->id), + RRD_TYPE_TC ".qos_packets", + "Class Packets", + "packets/s", + PLUGIN_TC_NAME, + NULL, + NETDATA_CHART_PRIO_TC_QOS_PACKETS, + localhost->rrd_update_every, + d->enabled_all_classes_qdiscs ? RRDSET_TYPE_LINE : RRDSET_TYPE_STACKED); + + rrdlabels_add(d->st_bytes->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + } + else { + if(unlikely(d->name_updated)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_packets", string2str(d->name?d->name:d->id)); + rrdset_reset_name(d->st_packets, name); + } + + if(d->name && d->name_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + + if(d->family && d->family_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + + // TODO + // update the family + } + + dfe_start_read(d->classes, c) { + if(unlikely(!c->render)) continue; + + if(unlikely(!c->rd_packets)) + c->rd_packets = rrddim_add(d->st_packets, string2str(c->id), string2str(c->name?c->name:c->id), 1, 1, RRD_ALGORITHM_INCREMENTAL); + else if(unlikely(c->name_updated)) + rrddim_reset_name(d->st_packets, c->rd_packets, string2str(c->name)); + + rrddim_set_by_pointer(d->st_packets, c->rd_packets, c->packets); + } + dfe_done(c); + + rrdset_done(d->st_packets); + } + + // -------------------------------------------------------------------- + // dropped + + if(d->enabled_dropped == CONFIG_BOOLEAN_YES || (d->enabled_dropped == CONFIG_BOOLEAN_AUTO && + (dropped_sum || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->enabled_dropped = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_dropped)) { + char id[RRD_ID_LENGTH_MAX + 1]; + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_dropped", string2str(d->id)); + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_dropped", string2str(d->name ? d->name : d->id)); + + d->st_dropped = rrdset_create_localhost( + RRD_TYPE_TC, + id, + name, + string2str(d->family ? d->family : d->id), + RRD_TYPE_TC ".qos_dropped", + "Class Dropped Packets", + "packets/s", + PLUGIN_TC_NAME, + NULL, + NETDATA_CHART_PRIO_TC_QOS_DROPPED, + localhost->rrd_update_every, + d->enabled_all_classes_qdiscs ? RRDSET_TYPE_LINE : RRDSET_TYPE_STACKED); + + rrdlabels_add(d->st_bytes->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + } + else { + if(unlikely(d->name_updated)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_dropped", string2str(d->name?d->name:d->id)); + rrdset_reset_name(d->st_dropped, name); + } + + if(d->name && d->name_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + + if(d->family && d->family_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + + // TODO + // update the family + } + + dfe_start_read(d->classes, c) { + if(unlikely(!c->render)) continue; + + if(unlikely(!c->rd_dropped)) + c->rd_dropped = rrddim_add(d->st_dropped, string2str(c->id), string2str(c->name?c->name:c->id), 1, 1, RRD_ALGORITHM_INCREMENTAL); + else if(unlikely(c->name_updated)) + rrddim_reset_name(d->st_dropped, c->rd_dropped, string2str(c->name)); + + rrddim_set_by_pointer(d->st_dropped, c->rd_dropped, c->dropped); + } + dfe_done(c); + + rrdset_done(d->st_dropped); + } + + // -------------------------------------------------------------------- + // tokens + + if(d->enabled_tokens == CONFIG_BOOLEAN_YES || (d->enabled_tokens == CONFIG_BOOLEAN_AUTO && + (tokens_sum || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->enabled_tokens = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_tokens)) { + char id[RRD_ID_LENGTH_MAX + 1]; + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_tokens", string2str(d->id)); + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_tokens", string2str(d->name ? d->name : d->id)); + + d->st_tokens = rrdset_create_localhost( + RRD_TYPE_TC, + id, + name, + string2str(d->family ? d->family : d->id), + RRD_TYPE_TC ".qos_tokens", + "Class Tokens", + "tokens", + PLUGIN_TC_NAME, + NULL, + NETDATA_CHART_PRIO_TC_QOS_TOKENS, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rrdlabels_add(d->st_bytes->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + } + else { + if(unlikely(d->name_updated)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_tokens", string2str(d->name?d->name:d->id)); + rrdset_reset_name(d->st_tokens, name); + } + + if(d->name && d->name_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + + if(d->family && d->family_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + + // TODO + // update the family + } + + dfe_start_read(d->classes, c) { + if(unlikely(!c->render)) continue; + + if(unlikely(!c->rd_tokens)) { + c->rd_tokens = rrddim_add(d->st_tokens, string2str(c->id), string2str(c->name?c->name:c->id), 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else if(unlikely(c->name_updated)) + rrddim_reset_name(d->st_tokens, c->rd_tokens, string2str(c->name)); + + rrddim_set_by_pointer(d->st_tokens, c->rd_tokens, c->tokens); + } + dfe_done(c); + + rrdset_done(d->st_tokens); + } + + // -------------------------------------------------------------------- + // ctokens + + if(d->enabled_ctokens == CONFIG_BOOLEAN_YES || (d->enabled_ctokens == CONFIG_BOOLEAN_AUTO && + (ctokens_sum || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + d->enabled_ctokens = CONFIG_BOOLEAN_YES; + + if(unlikely(!d->st_ctokens)) { + char id[RRD_ID_LENGTH_MAX + 1]; + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_ctokens", string2str(d->id)); + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_ctokens", string2str(d->name ? d->name : d->id)); + + d->st_ctokens = rrdset_create_localhost( + RRD_TYPE_TC, + id, + name, + string2str(d->family ? d->family : d->id), + RRD_TYPE_TC ".qos_ctokens", + "Class cTokens", + "ctokens", + PLUGIN_TC_NAME, + NULL, + NETDATA_CHART_PRIO_TC_QOS_CTOKENS, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rrdlabels_add(d->st_bytes->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + } + else { + debug(D_TC_LOOP, "TC: Updating _ctokens chart for device '%s'", string2str(d->name?d->name:d->id)); + + if(unlikely(d->name_updated)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_ctokens", string2str(d->name?d->name:d->id)); + rrdset_reset_name(d->st_ctokens, name); + } + + if(d->name && d->name_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + + if(d->family && d->family_updated) + rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + + // TODO + // update the family + } + + dfe_start_read(d->classes, c) { + if(unlikely(!c->render)) continue; + + if(unlikely(!c->rd_ctokens)) + c->rd_ctokens = rrddim_add(d->st_ctokens, string2str(c->id), string2str(c->name?c->name:c->id), 1, 1, RRD_ALGORITHM_ABSOLUTE); + else if(unlikely(c->name_updated)) + rrddim_reset_name(d->st_ctokens, c->rd_ctokens, string2str(c->name)); + + rrddim_set_by_pointer(d->st_ctokens, c->rd_ctokens, c->ctokens); + } + dfe_done(c); + + rrdset_done(d->st_ctokens); + } + + tc_device_classes_cleanup(d); +} + +static inline void tc_device_set_class_name(struct tc_device *d, char *id, char *name) { + if(unlikely(!name || !*name)) return; + + struct tc_class *c = tc_class_index_find(d, id); + if(likely(c)) { + if(likely(c->name)) { + if(!strcmp(string2str(c->name), name)) return; + string_freez(c->name); + c->name = NULL; + } + + if(likely(name && *name && strcmp(string2str(c->id), name) != 0)) { + debug(D_TC_LOOP, "TC: Setting device '%s', class '%s' name to '%s'", string2str(d->id), id, name); + c->name = string_strdupz(name); + c->name_updated = true; + } + } +} + +static inline void tc_device_set_device_name(struct tc_device *d, char *name) { + if(unlikely(!name || !*name)) return; + + if(d->name) { + if(!strcmp(string2str(d->name), name)) return; + string_freez(d->name); + d->name = NULL; + } + + if(likely(name && *name && strcmp(string2str(d->id), name) != 0)) { + debug(D_TC_LOOP, "TC: Setting device '%s' name to '%s'", string2str(d->id), name); + d->name = string_strdupz(name); + d->name_updated = true; + } +} + +static inline void tc_device_set_device_family(struct tc_device *d, char *family) { + string_freez(d->family); + d->family = NULL; + + if(likely(family && *family && strcmp(string2str(d->id), family) != 0)) { + debug(D_TC_LOOP, "TC: Setting device '%s' family to '%s'", string2str(d->id), family); + d->family = string_strdupz(family); + d->family_updated = true; + } + // no need for null termination - it is already null +} + +static inline struct tc_device *tc_device_create(char *id) { + struct tc_device *d = tc_device_index_find(id); + + if(!d) { + debug(D_TC_LOOP, "TC: Creating device '%s'", id); + + struct tc_device tmp = { + .id = string_strdupz(id), + .enabled = (char)-1, + }; + d = tc_device_index_add(&tmp); + } + + return(d); +} + +static inline struct tc_class *tc_class_add(struct tc_device *n, char *id, bool qdisc, char *parentid, char *leafid) { + struct tc_class *c = tc_class_index_find(n, id); + + if(!c) { + debug(D_TC_LOOP, "TC: Creating in device '%s', class id '%s', parentid '%s', leafid '%s'", + string2str(n->id), id, parentid?parentid:"", leafid?leafid:""); + + struct tc_class tmp = { + .id = string_strdupz(id), + .isqdisc = qdisc, + .parentid = string_strdupz(parentid), + .leafid = string_strdupz(leafid), + }; + + tc_class_index_add(n, &tmp); + } + return(c); +} + +//static inline void tc_device_free(struct tc_device *d) { +// tc_device_index_del(d); +//} + +static inline int tc_space(char c) { + switch(c) { + case ' ': + case '\t': + case '\r': + case '\n': + return 1; + + default: + return 0; + } +} + +static inline void tc_split_words(char *str, char **words, int max_words) { + char *s = str; + int i = 0; + + // skip all white space + while(tc_space(*s)) s++; + + // store the first word + words[i++] = s; + + // while we have something + while(*s) { + // if it is a space + if(unlikely(tc_space(*s))) { + + // terminate the word + *s++ = '\0'; + + // skip all white space + while(tc_space(*s)) s++; + + // if we reached the end, stop + if(!*s) break; + + // store the next word + if(i < max_words) words[i++] = s; + else break; + } + else s++; + } + + // terminate the words + while(i < max_words) words[i++] = NULL; +} + +static pid_t tc_child_pid = 0; + +static void tc_main_cleanup(void *ptr) { + worker_unregister(); + + tc_device_index_destroy(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + if(tc_child_pid) { + info("TC: killing with SIGTERM tc-qos-helper process %d", tc_child_pid); + if(killpid(tc_child_pid) != -1) { + siginfo_t info; + + info("TC: waiting for tc plugin child process pid %d to exit...", tc_child_pid); + waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); + } + + tc_child_pid = 0; + } + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +#define WORKER_TC_CLASS 0 +#define WORKER_TC_BEGIN 1 +#define WORKER_TC_END 2 +#define WORKER_TC_SENT 3 +#define WORKER_TC_LENDED 4 +#define WORKER_TC_TOKENS 5 +#define WORKER_TC_SETDEVICENAME 6 +#define WORKER_TC_SETDEVICEGROUP 7 +#define WORKER_TC_SETCLASSNAME 8 +#define WORKER_TC_WORKTIME 9 +#define WORKER_TC_PLUGIN_TIME 10 +#define WORKER_TC_DEVICES 11 +#define WORKER_TC_CLASSES 12 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 13 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 10 +#endif + +void *tc_main(void *ptr) { + worker_register("TC"); + worker_register_job_name(WORKER_TC_CLASS, "class"); + worker_register_job_name(WORKER_TC_BEGIN, "begin"); + worker_register_job_name(WORKER_TC_END, "end"); + worker_register_job_name(WORKER_TC_SENT, "sent"); + worker_register_job_name(WORKER_TC_LENDED, "lended"); + worker_register_job_name(WORKER_TC_TOKENS, "tokens"); + worker_register_job_name(WORKER_TC_SETDEVICENAME, "devicename"); + worker_register_job_name(WORKER_TC_SETDEVICEGROUP, "devicegroup"); + worker_register_job_name(WORKER_TC_SETCLASSNAME, "classname"); + worker_register_job_name(WORKER_TC_WORKTIME, "worktime"); + + worker_register_job_custom_metric(WORKER_TC_PLUGIN_TIME, "tc script execution time", "milliseconds/run", WORKER_METRIC_ABSOLUTE); + worker_register_job_custom_metric(WORKER_TC_DEVICES, "number of devices", "devices", WORKER_METRIC_ABSOLUTE); + worker_register_job_custom_metric(WORKER_TC_CLASSES, "number of classes", "classes", WORKER_METRIC_ABSOLUTE); + + tc_device_index_init(); + netdata_thread_cleanup_push(tc_main_cleanup, ptr); + + char command[FILENAME_MAX + 1]; + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + + uint32_t BEGIN_HASH = simple_hash("BEGIN"); + uint32_t END_HASH = simple_hash("END"); + uint32_t QDISC_HASH = simple_hash("qdisc"); + uint32_t CLASS_HASH = simple_hash("class"); + uint32_t SENT_HASH = simple_hash("Sent"); + uint32_t LENDED_HASH = simple_hash("lended:"); + uint32_t TOKENS_HASH = simple_hash("tokens:"); + uint32_t SETDEVICENAME_HASH = simple_hash("SETDEVICENAME"); + uint32_t SETDEVICEGROUP_HASH = simple_hash("SETDEVICEGROUP"); + uint32_t SETCLASSNAME_HASH = simple_hash("SETCLASSNAME"); + uint32_t WORKTIME_HASH = simple_hash("WORKTIME"); + uint32_t first_hash; + + snprintfz(command, TC_LINE_MAX, "%s/tc-qos-helper.sh", netdata_configured_primary_plugins_dir); + char *tc_script = config_get("plugin:tc", "script to run to get tc values", command); + + while(!netdata_exit) { + FILE *fp_child_input, *fp_child_output; + struct tc_device *device = NULL; + struct tc_class *class = NULL; + + snprintfz(command, TC_LINE_MAX, "exec %s %d", tc_script, localhost->rrd_update_every); + debug(D_TC_LOOP, "executing '%s'", command); + + fp_child_output = netdata_popen(command, (pid_t *)&tc_child_pid, &fp_child_input); + if(unlikely(!fp_child_output)) { + error("TC: Cannot popen(\"%s\", \"r\").", command); + goto cleanup; + } + + char buffer[TC_LINE_MAX+1] = ""; + while(fgets(buffer, TC_LINE_MAX, fp_child_output) != NULL) { + if(unlikely(netdata_exit)) break; + + buffer[TC_LINE_MAX] = '\0'; + // debug(D_TC_LOOP, "TC: read '%s'", buffer); + + tc_split_words(buffer, words, PLUGINSD_MAX_WORDS); + + if(unlikely(!words[0] || !*words[0])) { + // debug(D_TC_LOOP, "empty line"); + worker_is_idle(); + continue; + } + // else debug(D_TC_LOOP, "First word is '%s'", words[0]); + + first_hash = simple_hash(words[0]); + + if(unlikely(device && ((first_hash == CLASS_HASH && strcmp(words[0], "class") == 0) || (first_hash == QDISC_HASH && strcmp(words[0], "qdisc") == 0)))) { + worker_is_busy(WORKER_TC_CLASS); + + // debug(D_TC_LOOP, "CLASS line on class id='%s', parent='%s', parentid='%s', leaf='%s', leafid='%s'", words[2], words[3], words[4], words[5], words[6]); + + char *type = words[1]; // the class/qdisc type: htb, fq_codel, etc + char *id = words[2]; // the class/qdisc major:minor + char *parent = words[3]; // the word 'parent' or 'root' + char *parentid = words[4]; // parentid + char *leaf = words[5]; // the word 'leaf' + char *leafid = words[6]; // leafid + + int parent_is_root = 0; + int parent_is_parent = 0; + if(likely(parent)) { + parent_is_parent = !strcmp(parent, "parent"); + + if(!parent_is_parent) + parent_is_root = !strcmp(parent, "root"); + } + + if(likely(type && id && (parent_is_root || parent_is_parent))) { + bool qdisc = false; + + if(first_hash == QDISC_HASH) { + qdisc = true; + + if(!strcmp(type, "ingress")) { + // we don't want to get the ingress qdisc + // there should be an IFB interface for this + + class = NULL; + worker_is_idle(); + continue; + } + + if(parent_is_parent && parentid) { + // eliminate the minor number from parentid + // why: parentid is the id of the parent class + // but major: is also the id of the parent qdisc + + char *s = parentid; + while(*s && *s != ':') s++; + if(*s == ':') s[1] = '\0'; + } + } + + if(parent_is_root) { + parentid = NULL; + leafid = NULL; + } + else if(!leaf || strcmp(leaf, "leaf") != 0) + leafid = NULL; + + char leafbuf[20 + 1] = ""; + if(leafid && leafid[strlen(leafid) - 1] == ':') { + strncpyz(leafbuf, leafid, 20 - 1); + strcat(leafbuf, "1"); + leafid = leafbuf; + } + + class = tc_class_add(device, id, qdisc, parentid, leafid); + } + else { + // clear the last class + class = NULL; + } + } + else if(unlikely(first_hash == END_HASH && strcmp(words[0], "END") == 0)) { + worker_is_busy(WORKER_TC_END); + + // debug(D_TC_LOOP, "END line"); + + if(likely(device)) { + netdata_thread_disable_cancelability(); + tc_device_commit(device); + // tc_device_free(device); + netdata_thread_enable_cancelability(); + } + + device = NULL; + class = NULL; + } + else if(unlikely(first_hash == BEGIN_HASH && strcmp(words[0], "BEGIN") == 0)) { + worker_is_busy(WORKER_TC_BEGIN); + + // debug(D_TC_LOOP, "BEGIN line on device '%s'", words[1]); + + if(likely(words[1] && *words[1])) { + device = tc_device_create(words[1]); + } + else { + // tc_device_free(device); + device = NULL; + } + + class = NULL; + } + else if(unlikely(device && class && first_hash == SENT_HASH && strcmp(words[0], "Sent") == 0)) { + worker_is_busy(WORKER_TC_SENT); + + // debug(D_TC_LOOP, "SENT line '%s'", words[1]); + if(likely(words[1] && *words[1])) { + class->bytes = str2ull(words[1]); + class->updated = true; + } + else { + class->updated = false; + } + + if(likely(words[3] && *words[3])) + class->packets = str2ull(words[3]); + + if(likely(words[6] && *words[6])) + class->dropped = str2ull(words[6]); + + //if(likely(words[8] && *words[8])) + // class->overlimits = str2ull(words[8]); + + //if(likely(words[10] && *words[10])) + // class->requeues = str2ull(words[8]); + } + else if(unlikely(device && class && class->updated && first_hash == LENDED_HASH && strcmp(words[0], "lended:") == 0)) { + worker_is_busy(WORKER_TC_LENDED); + + // debug(D_TC_LOOP, "LENDED line '%s'", words[1]); + //if(likely(words[1] && *words[1])) + // class->lended = str2ull(words[1]); + + //if(likely(words[3] && *words[3])) + // class->borrowed = str2ull(words[3]); + + //if(likely(words[5] && *words[5])) + // class->giants = str2ull(words[5]); + } + else if(unlikely(device && class && class->updated && first_hash == TOKENS_HASH && strcmp(words[0], "tokens:") == 0)) { + worker_is_busy(WORKER_TC_TOKENS); + + // debug(D_TC_LOOP, "TOKENS line '%s'", words[1]); + if(likely(words[1] && *words[1])) + class->tokens = str2ull(words[1]); + + if(likely(words[3] && *words[3])) + class->ctokens = str2ull(words[3]); + } + else if(unlikely(device && first_hash == SETDEVICENAME_HASH && strcmp(words[0], "SETDEVICENAME") == 0)) { + worker_is_busy(WORKER_TC_SETDEVICENAME); + + // debug(D_TC_LOOP, "SETDEVICENAME line '%s'", words[1]); + if(likely(words[1] && *words[1])) + tc_device_set_device_name(device, words[1]); + } + else if(unlikely(device && first_hash == SETDEVICEGROUP_HASH && strcmp(words[0], "SETDEVICEGROUP") == 0)) { + worker_is_busy(WORKER_TC_SETDEVICEGROUP); + + // debug(D_TC_LOOP, "SETDEVICEGROUP line '%s'", words[1]); + if(likely(words[1] && *words[1])) + tc_device_set_device_family(device, words[1]); + } + else if(unlikely(device && first_hash == SETCLASSNAME_HASH && strcmp(words[0], "SETCLASSNAME") == 0)) { + worker_is_busy(WORKER_TC_SETCLASSNAME); + + // debug(D_TC_LOOP, "SETCLASSNAME line '%s' '%s'", words[1], words[2]); + char *id = words[1]; + char *path = words[2]; + if(likely(id && *id && path && *path)) + tc_device_set_class_name(device, id, path); + } + else if(unlikely(first_hash == WORKTIME_HASH && strcmp(words[0], "WORKTIME") == 0)) { + worker_is_busy(WORKER_TC_WORKTIME); + worker_set_metric(WORKER_TC_PLUGIN_TIME, str2ll(words[1], NULL)); + + size_t number_of_devices = dictionary_entries(tc_device_root_index); + size_t number_of_classes = 0; + + struct tc_device *d; + dfe_start_read(tc_device_root_index, d) { + number_of_classes += dictionary_entries(d->classes); + } + dfe_done(d); + + worker_set_metric(WORKER_TC_DEVICES, number_of_devices); + worker_set_metric(WORKER_TC_CLASSES, number_of_classes); + } + //else { + // debug(D_TC_LOOP, "IGNORED line"); + //} + + worker_is_idle(); + } + + // fgets() failed or loop broke + int code = netdata_pclose(fp_child_input, fp_child_output, (pid_t)tc_child_pid); + tc_child_pid = 0; + + if(unlikely(device)) { + // tc_device_free(device); + device = NULL; + class = NULL; + } + + if(unlikely(netdata_exit)) + goto cleanup; + + if(code == 1 || code == 127) { + // 1 = DISABLE + // 127 = cannot even run it + error("TC: tc-qos-helper.sh exited with code %d. Disabling it.", code); + goto cleanup; + } + + sleep((unsigned int) localhost->rrd_update_every); + } + +cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement + worker_unregister(); + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/tc.plugin/tc-qos-helper.sh.in b/collectors/tc.plugin/tc-qos-helper.sh.in new file mode 100755 index 0000000..97d4d01 --- /dev/null +++ b/collectors/tc.plugin/tc-qos-helper.sh.in @@ -0,0 +1,297 @@ +#!/usr/bin/env bash + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2017 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This script is a helper to allow netdata collect tc data. +# tc output parsing has been implemented in C, inside netdata +# This script allows setting names to dimensions. + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +# ----------------------------------------------------------------------------- +# logging functions + +PROGRAM_NAME="$(basename "$0")" +PROGRAM_NAME="${PROGRAM_NAME/.plugin/}" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- +# find /var/run/fireqos + +# the default +fireqos_run_dir="/var/run/fireqos" + +function realdir() { + local r + local t + r="$1" + t="$(readlink "$r")" + + while [ "$t" ]; do + r=$(cd "$(dirname "$r")" && cd "$(dirname "$t")" && pwd -P)/$(basename "$t") + t=$(readlink "$r") + done + + dirname "$r" +} + +if [ ! -d "${fireqos_run_dir}" ]; then + + # the fireqos executable - we will use it to find its config + fireqos="$(command -v fireqos 2>/dev/null)" + + if [ -n "${fireqos}" ]; then + + fireqos_exec_dir="$(realdir "${fireqos}")" + + if [ -n "${fireqos_exec_dir}" ] && [ "${fireqos_exec_dir}" != "." ] && [ -f "${fireqos_exec_dir}/install.config" ]; then + LOCALSTATEDIR= + #shellcheck source=/dev/null + source "${fireqos_exec_dir}/install.config" + + if [ -d "${LOCALSTATEDIR}/run/fireqos" ]; then + fireqos_run_dir="${LOCALSTATEDIR}/run/fireqos" + else + warning "FireQOS is installed as '${fireqos}', its installation config at '${fireqos_exec_dir}/install.config' specifies local state data at '${LOCALSTATEDIR}/run/fireqos', but this directory is not found or is not readable (check the permissions of its parents)." + fi + else + warning "Although FireQOS is installed on this system as '${fireqos}', I cannot find/read its installation configuration at '${fireqos_exec_dir}/install.config'." + fi + else + warning "FireQOS is not installed on this system. Use FireQOS to apply traffic QoS and expose the class names to netdata. Check https://github.com/netdata/netdata/tree/master/collectors/tc.plugin#tcplugin" + fi +fi + +# ----------------------------------------------------------------------------- + +[ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")" +[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@" +[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" + +plugins_dir="${NETDATA_PLUGINS_DIR}" +tc="$(command -v tc 2>/dev/null)" + +# ----------------------------------------------------------------------------- +# user configuration + +# time in seconds to refresh QoS class/qdisc names +qos_get_class_names_every=120 + +# time in seconds to exit - netdata will restart the script +qos_exit_every=3600 + +# what to use? classes or qdiscs? +tc_show="qdisc" # can also be "class" + +# ----------------------------------------------------------------------------- +# check if we have a valid number for interval + +t=${1} +update_every=$((t)) +[ $((update_every)) -lt 1 ] && update_every=${NETDATA_UPDATE_EVERY} +[ $((update_every)) -lt 1 ] && update_every=1 + +# ----------------------------------------------------------------------------- +# allow the user to override our defaults + +for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/tc-qos-helper.conf" "${NETDATA_USER_CONFIG_DIR}/tc-qos-helper.conf"; do + if [ -f "${CONFIG}" ]; then + info "Loading config file '${CONFIG}'..." + #shellcheck source=/dev/null + source "${CONFIG}" || error "Failed to load config file '${CONFIG}'." + else + warning "Cannot find file '${CONFIG}'." + fi +done + +case "${tc_show}" in +qdisc | class) ;; + +*) + error "tc_show variable can be either 'qdisc' or 'class' but is set to '${tc_show}'. Assuming it is 'qdisc'." + tc_show="qdisc" + ;; +esac + +# ----------------------------------------------------------------------------- +# default sleep function + +LOOPSLEEPMS_LASTWORK=0 +loopsleepms() { + sleep "$1" +} + +# if found and included, this file overwrites loopsleepms() +# with a high resolution timer function for precise looping. +#shellcheck source=/dev/null +. "${plugins_dir}/loopsleepms.sh.inc" + +# ----------------------------------------------------------------------------- +# final checks we can run + +if [ -z "${tc}" ] || [ ! -x "${tc}" ]; then + fatal "cannot find command 'tc' in this system." +fi + +tc_devices= +fix_names= + +# ----------------------------------------------------------------------------- + +setclassname() { + if [ "${tc_show}" = "qdisc" ]; then + echo "SETCLASSNAME $4 $2" + else + echo "SETCLASSNAME $3 $2" + fi +} + +show_tc_cls() { + [ "${tc_show}" = "qdisc" ] && return 1 + + local x="${1}" + + if [ -f /etc/iproute2/tc_cls ]; then + local classid name rest + while read -r classid name rest; do + if [ -z "${classid}" ] || + [ -z "${name}" ] || + [ "${classid}" = "#" ] || + [ "${name}" = "#" ] || + [ "${classid:0:1}" = "#" ] || + [ "${name:0:1}" = "#" ]; then + continue + fi + setclassname "" "${name}" "${classid}" + done /dev/null)" + [ -n "${l}" ] && tc_devices="${tc_devices} ${dev}" + done +} + +# update devices and class names +# once every 2 minutes +names_every=$((qos_get_class_names_every / update_every)) + +# exit this script every hour +# it will be restarted automatically +exit_after=$((qos_exit_every / update_every)) + +c=0 +gc=0 +while true; do + fix_names= + c=$((c + 1)) + gc=$((gc + 1)) + + if [ ${c} -le 1 ] || [ ${c} -ge ${names_every} ]; then + c=1 + fix_names="YES" + find_tc_devices + fi + + for d in ${tc_devices}; do + show_tc "${d}" + done + + echo "WORKTIME ${LOOPSLEEPMS_LASTWORK}" || exit + + loopsleepms ${update_every} + + [ ${gc} -gt ${exit_after} ] && exit 0 +done diff --git a/collectors/timex.plugin/Makefile.am b/collectors/timex.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/timex.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/timex.plugin/README.md b/collectors/timex.plugin/README.md new file mode 100644 index 0000000..18665f8 --- /dev/null +++ b/collectors/timex.plugin/README.md @@ -0,0 +1,31 @@ + + +# timex.plugin + +This plugin monitors the system kernel clock synchronization state. + +This plugin creates the following charts: + +- System clock synchronization state according to the system kernel +- System clock status which gives the value of the `time_status` variable in the kernel +- Computed time offset between local system and reference clock + +This is obtained from the information provided by the [ntp_adjtime()](https://man7.org/linux/man-pages/man2/adjtimex.2.html) system call. +An unsynchronized clock may indicate a hardware clock error, or an issue with UTC synchronization. + +## Configuration + +Edit the `netdata.conf` configuration file using [`edit-config`](/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) from the [Netdata config directory](/docs/configure/nodes.md#the-netdata-config-directory), which is typically at `/etc/netdata`. + +Scroll down to the `[plugin:timex]` section to find the available options: + +```ini +[plugin:timex] + # update every = 1 + # clock synchronization state = yes + # time offset = yes +``` diff --git a/collectors/timex.plugin/plugin_timex.c b/collectors/timex.plugin/plugin_timex.c new file mode 100644 index 0000000..46cfc57 --- /dev/null +++ b/collectors/timex.plugin/plugin_timex.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "daemon/common.h" +#include "libnetdata/os.h" + +#define PLUGIN_TIMEX_NAME "timex.plugin" + +#define CONFIG_SECTION_TIMEX "plugin:timex" + +struct status_codes { + char *name; + int code; + RRDDIM *rd; +} sta_codes[] = { + // {"pll", STA_PLL, NULL}, + // {"ppsfreq", STA_PPSFREQ, NULL}, + // {"ppstime", STA_PPSTIME, NULL}, + // {"fll", STA_FLL, NULL}, + // {"ins", STA_INS, NULL}, + // {"del", STA_DEL, NULL}, + {"unsync", STA_UNSYNC, NULL}, + // {"freqhold", STA_FREQHOLD, NULL}, + // {"ppssignal", STA_PPSSIGNAL, NULL}, + // {"ppsjitter", STA_PPSJITTER, NULL}, + // {"ppswander", STA_PPSWANDER, NULL}, + // {"ppserror", STA_PPSERROR, NULL}, + {"clockerr", STA_CLOCKERR, NULL}, + // {"nano", STA_NANO, NULL}, + // {"clk", STA_CLK, NULL}, + {NULL, 0, NULL}, +}; + +static void timex_main_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *timex_main(void *ptr) +{ + worker_register("TIMEX"); + worker_register_job_name(0, "clock check"); + + netdata_thread_cleanup_push(timex_main_cleanup, ptr); + + int update_every = (int)config_get_number(CONFIG_SECTION_TIMEX, "update every", 10); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + int do_sync = config_get_boolean(CONFIG_SECTION_TIMEX, "clock synchronization state", CONFIG_BOOLEAN_YES); + int do_offset = config_get_boolean(CONFIG_SECTION_TIMEX, "time offset", CONFIG_BOOLEAN_YES); + + if (unlikely(do_sync == CONFIG_BOOLEAN_NO && do_offset == CONFIG_BOOLEAN_NO)) { + info("No charts to show"); + goto exit; + } + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + worker_is_busy(0); + + struct timex timex_buf = {}; + int sync_state = 0; + static int prev_sync_state = 0; + + sync_state = ADJUST_TIMEX(&timex_buf); + + int non_seq_failure = (sync_state == -1 && prev_sync_state != -1); + prev_sync_state = sync_state; + + if (non_seq_failure) { + error("Cannot get clock synchronization state"); + continue; + } + + collected_number divisor = USEC_PER_MS; + if (timex_buf.status & STA_NANO) + divisor = NSEC_PER_MSEC; + + // ---------------------------------------------------------------- + + if (do_sync) { + static RRDSET *st_sync_state = NULL; + static RRDDIM *rd_sync_state; + + if (unlikely(!st_sync_state)) { + st_sync_state = rrdset_create_localhost( + "system", + "clock_sync_state", + NULL, + "clock synchronization", + NULL, + "System Clock Synchronization State", + "state", + PLUGIN_TIMEX_NAME, + NULL, + NETDATA_CHART_PRIO_CLOCK_SYNC_STATE, + update_every, + RRDSET_TYPE_LINE); + + rd_sync_state = rrddim_add(st_sync_state, "state", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_sync_state, rd_sync_state, sync_state != TIME_ERROR ? 1 : 0); + rrdset_done(st_sync_state); + + static RRDSET *st_clock_status = NULL; + + if (unlikely(!st_clock_status)) { + st_clock_status = rrdset_create_localhost( + "system", + "clock_status", + NULL, + "clock synchronization", + NULL, + "System Clock Status", + "status", + PLUGIN_TIMEX_NAME, + NULL, + NETDATA_CHART_PRIO_CLOCK_STATUS, + update_every, + RRDSET_TYPE_LINE); + + for (int i = 0; sta_codes[i].name != NULL; i++) { + sta_codes[i].rd = + rrddim_add(st_clock_status, sta_codes[i].name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + } + + for (int i = 0; sta_codes[i].name != NULL; i++) + rrddim_set_by_pointer(st_clock_status, sta_codes[i].rd, timex_buf.status & sta_codes[i].code ? 1 : 0); + + rrdset_done(st_clock_status); + } + + if (do_offset) { + static RRDSET *st_offset = NULL; + static RRDDIM *rd_offset; + + if (unlikely(!st_offset)) { + st_offset = rrdset_create_localhost( + "system", + "clock_sync_offset", + NULL, + "clock synchronization", + NULL, + "Computed Time Offset Between Local System and Reference Clock", + "milliseconds", + PLUGIN_TIMEX_NAME, + NULL, + NETDATA_CHART_PRIO_CLOCK_SYNC_OFFSET, + update_every, + RRDSET_TYPE_LINE); + + rd_offset = rrddim_add(st_offset, "offset", NULL, 1, divisor, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_offset, rd_offset, timex_buf.offset); + rrdset_done(st_offset); + } + } + +exit: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/xenstat.plugin/Makefile.am b/collectors/xenstat.plugin/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/collectors/xenstat.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/xenstat.plugin/README.md b/collectors/xenstat.plugin/README.md new file mode 100644 index 0000000..8cbe086 --- /dev/null +++ b/collectors/xenstat.plugin/README.md @@ -0,0 +1,53 @@ + + +# xenstat.plugin + +`xenstat.plugin` collects XenServer and XCP-ng statistics. + +## Prerequisites + +1. install `xen-dom0-libs-devel` and `yajl-devel` using the package manager of your system. + Note: On Cent-OS systems you will need `centos-release-xen` repository and the required package for xen is `xen-devel` + +2. re-install Netdata from source. The installer will detect that the required libraries are now available and will also build xenstat.plugin. + +Keep in mind that `libxenstat` requires root access, so the plugin is setuid to root. + +## Charts + +The plugin provides XenServer and XCP-ng host and domains statistics: + +Host: + +1. Number of domains. + +Domain: + +1. CPU. +2. Memory. +3. Networks. +4. VBDs. + +## Configuration + +If you need to disable xenstat for Netdata, edit /etc/netdata/netdata.conf and set: + +``` +[plugins] + xenstat = no +``` + +## Debugging + +You can run the plugin by hand: + +``` +sudo /usr/libexec/netdata/plugins.d/xenstat.plugin 1 debug +``` + +You will get verbose output on what the plugin does. + + diff --git a/collectors/xenstat.plugin/xenstat_plugin.c b/collectors/xenstat.plugin/xenstat_plugin.c new file mode 100644 index 0000000..ea98b9b --- /dev/null +++ b/collectors/xenstat.plugin/xenstat_plugin.c @@ -0,0 +1,1071 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#include +#include + +#define PLUGIN_XENSTAT_NAME "xenstat.plugin" + +#define NETDATA_CHART_PRIO_XENSTAT_NODE_CPUS 30001 +#define NETDATA_CHART_PRIO_XENSTAT_NODE_CPU_FREQ 30002 +#define NETDATA_CHART_PRIO_XENSTAT_NODE_MEM 30003 +#define NETDATA_CHART_PRIO_XENSTAT_NODE_TMEM 30004 +#define NETDATA_CHART_PRIO_XENSTAT_NODE_DOMAINS 30005 + +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_STATES 30101 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_CPU 30102 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VCPU 30103 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_MEM 30104 + +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_TMEM_PAGES 30104 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_TMEM_OPERATIONS 30105 + +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VBD_OO_REQ 30200 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VBD_REQUESTS 30300 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VBD_SECTORS 30400 + +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_BYTES 30500 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_PACKETS 30600 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_ERRORS 30700 +#define NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_DROPS 30800 + +#define TYPE_LENGTH_MAX 200 + +#define CHART_IS_OBSOLETE 1 +#define CHART_IS_NOT_OBSOLETE 0 + +// Variables +static int debug = 0; +static int netdata_update_every = 1; + +struct vcpu_metrics { + unsigned int id; + + unsigned int online; + unsigned long long ns; + + int chart_generated; + int updated; + + struct vcpu_metrics *next; +}; + +struct vbd_metrics { + unsigned int id; + + unsigned int error; + unsigned long long oo_reqs; + unsigned long long rd_reqs; + unsigned long long wr_reqs; + unsigned long long rd_sects; + unsigned long long wr_sects; + + int oo_req_chart_generated; + int requests_chart_generated; + int sectors_chart_generated; + int updated; + + struct vbd_metrics *next; +}; + +struct network_metrics { + unsigned int id; + + unsigned long long rbytes; + unsigned long long rpackets; + unsigned long long rerrs; + unsigned long long rdrops; + + unsigned long long tbytes; + unsigned long long tpackets; + unsigned long long terrs; + unsigned long long tdrops; + + int bytes_chart_generated; + int packets_chart_generated; + int errors_chart_generated; + int drops_chart_generated; + int updated; + + struct network_metrics *next; +}; + +struct domain_metrics { + char *uuid; + uint32_t hash; + + unsigned int id; + char *name; + + // states + unsigned int running; + unsigned int blocked; + unsigned int paused; + unsigned int shutdown; + unsigned int crashed; + unsigned int dying; + unsigned int cur_vcpus; + + unsigned long long cpu_ns; + unsigned long long cur_mem; + unsigned long long max_mem; + + struct vcpu_metrics *vcpu_root; + struct vbd_metrics *vbd_root; + struct network_metrics *network_root; + + int states_chart_generated; + int cpu_chart_generated; + int vcpu_chart_generated; + int num_vcpus_changed; + int mem_chart_generated; + int updated; + + struct domain_metrics *next; +}; + +struct node_metrics{ + unsigned long long tot_mem; + unsigned long long free_mem; + int num_domains; + unsigned int num_cpus; + unsigned long long node_cpu_hz; + + struct domain_metrics *domain_root; +}; + +static struct node_metrics node_metrics = { + .domain_root = NULL +}; + +static inline struct domain_metrics *domain_metrics_get(const char *uuid, uint32_t hash) { + struct domain_metrics *d = NULL, *last = NULL; + for(d = node_metrics.domain_root; d ; last = d, d = d->next) { + if(unlikely(d->hash == hash && !strcmp(d->uuid, uuid))) + return d; + } + + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: allocating memory for domain with uuid %s\n", uuid); + + d = callocz(1, sizeof(struct domain_metrics)); + d->uuid = strdupz(uuid); + d->hash = hash; + + if(unlikely(!last)) { + d->next = node_metrics.domain_root; + node_metrics.domain_root = d; + } + else { + d->next = last->next; + last->next = d; + } + + return d; +} + +static struct domain_metrics *domain_metrics_free(struct domain_metrics *d) { + struct domain_metrics *cur = NULL, *last = NULL; + struct vcpu_metrics *vcpu, *vcpu_f; + struct vbd_metrics *vbd, *vbd_f; + struct network_metrics *network, *network_f; + + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: freeing memory for domain '%s' id %u, uuid %s\n", d->name, d->id, d->uuid); + + for(cur = node_metrics.domain_root; cur ; last = cur, cur = cur->next) { + if(unlikely(cur->hash == d->hash && !strcmp(cur->uuid, d->uuid))) break; + } + + if(unlikely(!cur)) { + error("XENSTAT: failed to free domain metrics."); + return NULL; + } + + if(likely(last)) + last->next = cur->next; + else + node_metrics.domain_root = NULL; + + freez(cur->uuid); + freez(cur->name); + + vcpu = cur->vcpu_root; + while(vcpu) { + vcpu_f = vcpu; + vcpu = vcpu->next; + freez(vcpu_f); + } + + vbd = cur->vbd_root; + while(vbd) { + vbd_f = vbd; + vbd = vbd->next; + freez(vbd_f); + } + + network = cur->network_root; + while(network) { + network_f = network; + network = network->next; + freez(network_f); + } + + freez(cur); + + return last ? last : NULL; +} + +static int vcpu_metrics_collect(struct domain_metrics *d, xenstat_domain *domain) { + unsigned int num_vcpus = 0; + xenstat_vcpu *vcpu = NULL; + struct vcpu_metrics *vcpu_m = NULL, *last_vcpu_m = NULL; + + num_vcpus = xenstat_domain_num_vcpus(domain); + + for(vcpu_m = d->vcpu_root; vcpu_m ; vcpu_m = vcpu_m->next) + vcpu_m->updated = 0; + + vcpu_m = d->vcpu_root; + + unsigned int i, num_online_vcpus=0; + for(i = 0; i < num_vcpus; i++) { + if(unlikely(!vcpu_m)) { + vcpu_m = callocz(1, sizeof(struct vcpu_metrics)); + + if(unlikely(i == 0)) d->vcpu_root = vcpu_m; + else last_vcpu_m->next = vcpu_m; + } + + vcpu_m->id = i; + + vcpu = xenstat_domain_vcpu(domain, i); + + if(unlikely(!vcpu)) { + error("XENSTAT: cannot get VCPU statistics."); + return 1; + } + + vcpu_m->online = xenstat_vcpu_online(vcpu); + if(likely(vcpu_m->online)) { num_online_vcpus++; } + vcpu_m->ns = xenstat_vcpu_ns(vcpu); + + vcpu_m->updated = 1; + + last_vcpu_m = vcpu_m; + vcpu_m = vcpu_m->next; + } + + if(unlikely(num_online_vcpus != d->cur_vcpus)) { + d->num_vcpus_changed = 1; + d->cur_vcpus = num_online_vcpus; + } + + return 0; +} + +static int vbd_metrics_collect(struct domain_metrics *d, xenstat_domain *domain) { + unsigned int num_vbds = xenstat_domain_num_vbds(domain); + xenstat_vbd *vbd = NULL; + struct vbd_metrics *vbd_m = NULL, *last_vbd_m = NULL; + + for(vbd_m = d->vbd_root; vbd_m ; vbd_m = vbd_m->next) + vbd_m->updated = 0; + + vbd_m = d->vbd_root; + + unsigned int i; + for(i = 0; i < num_vbds; i++) { + if(unlikely(!vbd_m)) { + vbd_m = callocz(1, sizeof(struct vbd_metrics)); + + if(unlikely(i == 0)) d->vbd_root = vbd_m; + else last_vbd_m->next = vbd_m; + } + + vbd_m->id = i; + + vbd = xenstat_domain_vbd(domain, i); + + if(unlikely(!vbd)) { + error("XENSTAT: cannot get VBD statistics."); + return 1; + } + +#ifdef HAVE_XENSTAT_VBD_ERROR + vbd_m->error = xenstat_vbd_error(vbd); +#else + vbd_m->error = 0; +#endif + vbd_m->oo_reqs = xenstat_vbd_oo_reqs(vbd); + vbd_m->rd_reqs = xenstat_vbd_rd_reqs(vbd); + vbd_m->wr_reqs = xenstat_vbd_wr_reqs(vbd); + vbd_m->rd_sects = xenstat_vbd_rd_sects(vbd); + vbd_m->wr_sects = xenstat_vbd_wr_sects(vbd); + + vbd_m->updated = 1; + + last_vbd_m = vbd_m; + vbd_m = vbd_m->next; + } + + return 0; +} + +static int network_metrics_collect(struct domain_metrics *d, xenstat_domain *domain) { + unsigned int num_networks = xenstat_domain_num_networks(domain); + xenstat_network *network = NULL; + struct network_metrics *network_m = NULL, *last_network_m = NULL; + + for(network_m = d->network_root; network_m ; network_m = network_m->next) + network_m->updated = 0; + + network_m = d->network_root; + + unsigned int i; + for(i = 0; i < num_networks; i++) { + if(unlikely(!network_m)) { + network_m = callocz(1, sizeof(struct network_metrics)); + + if(unlikely(i == 0)) d->network_root = network_m; + else last_network_m->next = network_m; + } + + network_m->id = i; + + network = xenstat_domain_network(domain, i); + + if(unlikely(!network)) { + error("XENSTAT: cannot get network statistics."); + return 1; + } + + network_m->rbytes = xenstat_network_rbytes(network); + network_m->rpackets = xenstat_network_rpackets(network); + network_m->rerrs = xenstat_network_rerrs(network); + network_m->rdrops = xenstat_network_rdrop(network); + + network_m->tbytes = xenstat_network_tbytes(network); + network_m->tpackets = xenstat_network_tpackets(network); + network_m->terrs = xenstat_network_terrs(network); + network_m->tdrops = xenstat_network_tdrop(network); + + network_m->updated = 1; + + last_network_m = network_m; + network_m = network_m->next; + } + + return 0; +} + +static int xenstat_collect(xenstat_handle *xhandle, libxl_ctx *ctx, libxl_dominfo *info) { + + // mark all old metrics as not-updated + struct domain_metrics *d; + for(d = node_metrics.domain_root; d ; d = d->next) + d->updated = 0; + + xenstat_node *node = xenstat_get_node(xhandle, XENSTAT_ALL); + if (unlikely(!node)) { + error("XENSTAT: failed to retrieve statistics from libxenstat."); + return 1; + } + + node_metrics.tot_mem = xenstat_node_tot_mem(node); + node_metrics.free_mem = xenstat_node_free_mem(node); + node_metrics.num_domains = xenstat_node_num_domains(node); + node_metrics.num_cpus = xenstat_node_num_cpus(node); + node_metrics.node_cpu_hz = xenstat_node_cpu_hz(node); + + int i; + for(i = 0; i < node_metrics.num_domains; i++) { + xenstat_domain *domain = NULL; + char uuid[LIBXL_UUID_FMTLEN + 1]; + + domain = xenstat_node_domain_by_index(node, i); + + // get domain UUID + unsigned int id = xenstat_domain_id(domain); + if(unlikely(libxl_domain_info(ctx, info, id))) { + error("XENSTAT: cannot get domain info."); + } + else { + snprintfz(uuid, LIBXL_UUID_FMTLEN, LIBXL_UUID_FMT "\n", LIBXL_UUID_BYTES(info->uuid)); + } + + uint32_t hash = simple_hash(uuid); + d = domain_metrics_get(uuid, hash); + + d->id = id; + if(unlikely(!d->name)) { + d->name = strdupz(xenstat_domain_name(domain)); + netdata_fix_chart_id(d->name); + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: domain id %u, uuid %s has name '%s'\n", d->id, d->uuid, d->name); + } + + d->running = xenstat_domain_running(domain); + d->blocked = xenstat_domain_blocked(domain); + d->paused = xenstat_domain_paused(domain); + d->shutdown = xenstat_domain_shutdown(domain); + d->crashed = xenstat_domain_crashed(domain); + d->dying = xenstat_domain_dying(domain); + + d->cpu_ns = xenstat_domain_cpu_ns(domain); + d->cur_mem = xenstat_domain_cur_mem(domain); + d->max_mem = xenstat_domain_max_mem(domain); + + if(unlikely(vcpu_metrics_collect(d, domain) || vbd_metrics_collect(d, domain) || network_metrics_collect(d, domain))) { + xenstat_free_node(node); + return 1; + } + + d->updated = 1; + } + + xenstat_free_node(node); + + return 0; +} + +static void xenstat_send_node_metrics() { + static int mem_chart_generated = 0, domains_chart_generated = 0, cpus_chart_generated = 0, cpu_freq_chart_generated = 0; + + // ---------------------------------------------------------------- + + if(unlikely(!mem_chart_generated)) { + printf("CHART xenstat.mem '' 'Memory Usage' 'MiB' 'memory' '' stacked %d %d '' %s\n" + , NETDATA_CHART_PRIO_XENSTAT_NODE_MEM + , netdata_update_every + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION %s '' absolute 1 %d\n", "free", netdata_update_every * 1024 * 1024); + printf("DIMENSION %s '' absolute 1 %d\n", "used", netdata_update_every * 1024 * 1024); + mem_chart_generated = 1; + } + + printf( + "BEGIN xenstat.mem\n" + "SET free = %lld\n" + "SET used = %lld\n" + "END\n" + , (collected_number) node_metrics.free_mem + , (collected_number) (node_metrics.tot_mem - node_metrics.free_mem) + ); + + // ---------------------------------------------------------------- + + if(unlikely(!domains_chart_generated)) { + printf("CHART xenstat.domains '' 'Number of Domains' 'domains' 'domains' '' line %d %d '' %s\n" + , NETDATA_CHART_PRIO_XENSTAT_NODE_DOMAINS + , netdata_update_every + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION %s '' absolute 1 %d\n", "domains", netdata_update_every); + domains_chart_generated = 1; + } + + printf( + "BEGIN xenstat.domains\n" + "SET domains = %lld\n" + "END\n" + , (collected_number) node_metrics.num_domains + ); + + // ---------------------------------------------------------------- + + if(unlikely(!cpus_chart_generated)) { + printf("CHART xenstat.cpus '' 'Number of CPUs' 'cpus' 'cpu' '' line %d %d '' %s\n" + , NETDATA_CHART_PRIO_XENSTAT_NODE_CPUS + , netdata_update_every + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION %s '' absolute 1 %d\n", "cpus", netdata_update_every); + cpus_chart_generated = 1; + } + + printf( + "BEGIN xenstat.cpus\n" + "SET cpus = %lld\n" + "END\n" + , (collected_number) node_metrics.num_cpus + ); + + // ---------------------------------------------------------------- + + if(unlikely(!cpu_freq_chart_generated)) { + printf("CHART xenstat.cpu_freq '' 'CPU Frequency' 'MHz' 'cpu' '' line %d %d '' %s\n" + , NETDATA_CHART_PRIO_XENSTAT_NODE_CPU_FREQ + , netdata_update_every + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION %s '' absolute 1 %d\n", "frequency", netdata_update_every * 1024 * 1024); + cpu_freq_chart_generated = 1; + } + + printf( + "BEGIN xenstat.cpu_freq\n" + "SET frequency = %lld\n" + "END\n" + , (collected_number) node_metrics.node_cpu_hz + ); +} + +static void print_domain_states_chart_definition(char *type, int obsolete_flag) { + printf("CHART %s.states '' 'Domain States' 'boolean' 'states' 'xendomain.states' line %d %d %s %s\n" + , type + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_STATES + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION running '' absolute 1 %d\n", netdata_update_every); + printf("DIMENSION blocked '' absolute 1 %d\n", netdata_update_every); + printf("DIMENSION paused '' absolute 1 %d\n", netdata_update_every); + printf("DIMENSION shutdown '' absolute 1 %d\n", netdata_update_every); + printf("DIMENSION crashed '' absolute 1 %d\n", netdata_update_every); + printf("DIMENSION dying '' absolute 1 %d\n", netdata_update_every); +} + +static void print_domain_cpu_chart_definition(char *type, int obsolete_flag) { + printf("CHART %s.cpu '' 'CPU Usage (100%% = 1 core)' 'percentage' 'cpu' 'xendomain.cpu' line %d %d %s %s\n" + , type + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_CPU + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION used '' incremental 100 %d\n", netdata_update_every * 1000000000); +} + +static void print_domain_mem_chart_definition(char *type, int obsolete_flag) { + printf("CHART %s.mem '' 'Memory Reservation' 'MiB' 'memory' 'xendomain.mem' line %d %d %s %s\n" + , type + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_MEM + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION maximum '' absolute 1 %d\n", netdata_update_every * 1024 * 1024); + printf("DIMENSION current '' absolute 1 %d\n", netdata_update_every * 1024 * 1024); +} + +static void print_domain_vcpu_chart_definition(char *type, struct domain_metrics *d, int obsolete_flag) { + struct vcpu_metrics *vcpu_m; + + printf("CHART %s.vcpu '' 'CPU Usage per VCPU' 'percentage' 'cpu' 'xendomain.vcpu' line %d %d %s %s\n" + , type + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VCPU + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + + for(vcpu_m = d->vcpu_root; vcpu_m; vcpu_m = vcpu_m->next) { + if(likely(vcpu_m->updated && vcpu_m->online)) { + printf("DIMENSION vcpu%u '' incremental 100 %d\n", vcpu_m->id, netdata_update_every * 1000000000); + } + } +} + +static void print_domain_vbd_oo_chart_definition(char *type, unsigned int vbd, int obsolete_flag) { + printf("CHART %s.oo_req_vbd%u '' 'VBD%u \"Out Of\" Requests' 'requests/s' 'vbd' 'xendomain.oo_req_vbd' line %u %d %s %s\n" + , type + , vbd + , vbd + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VBD_OO_REQ + vbd + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION requests '' incremental 1 %d\n", netdata_update_every); +} + +static void print_domain_vbd_requests_chart_definition(char *type, unsigned int vbd, int obsolete_flag) { + printf("CHART %s.requests_vbd%u '' 'VBD%u Requests' 'requests/s' 'vbd' 'xendomain.requests_vbd' line %u %d %s %s\n" + , type + , vbd + , vbd + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VBD_REQUESTS + vbd + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION read '' incremental 1 %d\n", netdata_update_every); + printf("DIMENSION write '' incremental -1 %d\n", netdata_update_every); +} + +static void print_domain_vbd_sectors_chart_definition(char *type, unsigned int vbd, int obsolete_flag) { + printf("CHART %s.sectors_vbd%u '' 'VBD%u Read/Written Sectors' 'sectors/s' 'vbd' 'xendomain.sectors_vbd' line %u %d %s %s\n" + , type + , vbd + , vbd + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_VBD_SECTORS + vbd + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION read '' incremental 1 %d\n", netdata_update_every); + printf("DIMENSION write '' incremental -1 %d\n", netdata_update_every); +} + +static void print_domain_network_bytes_chart_definition(char *type, unsigned int network, int obsolete_flag) { + printf("CHART %s.bytes_network%u '' 'Network%u Received/Sent Bytes' 'kilobits/s' 'network' 'xendomain.bytes_network' line %u %d %s %s\n" + , type + , network + , network + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_BYTES + network + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION received '' incremental 8 %d\n", netdata_update_every * 1000); + printf("DIMENSION sent '' incremental -8 %d\n", netdata_update_every * 1000); +} + +static void print_domain_network_packets_chart_definition(char *type, unsigned int network, int obsolete_flag) { + printf("CHART %s.packets_network%u '' 'Network%u Received/Sent Packets' 'packets/s' 'network' 'xendomain.packets_network' line %u %d %s %s\n" + , type + , network + , network + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_PACKETS + network + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION received '' incremental 1 %d\n", netdata_update_every); + printf("DIMENSION sent '' incremental -1 %d\n", netdata_update_every); +} + +static void print_domain_network_errors_chart_definition(char *type, unsigned int network, int obsolete_flag) { + printf("CHART %s.errors_network%u '' 'Network%u Receive/Transmit Errors' 'errors/s' 'network' 'xendomain.errors_network' line %u %d %s %s\n" + , type + , network + , network + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_PACKETS + network + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION received '' incremental 1 %d\n", netdata_update_every); + printf("DIMENSION sent '' incremental -1 %d\n", netdata_update_every); +} + +static void print_domain_network_drops_chart_definition(char *type, unsigned int network, int obsolete_flag) { + printf("CHART %s.drops_network%u '' 'Network%u Receive/Transmit Drops' 'drops/s' 'network' 'xendomain.drops_network' line %u %d %s %s\n" + , type + , network + , network + , NETDATA_CHART_PRIO_XENSTAT_DOMAIN_NET_PACKETS + network + , netdata_update_every + , obsolete_flag ? "obsolete": "''" + , PLUGIN_XENSTAT_NAME + ); + printf("DIMENSION received '' incremental 1 %d\n", netdata_update_every); + printf("DIMENSION sent '' incremental -1 %d\n", netdata_update_every); +} + +static void xenstat_send_domain_metrics() { + + if(unlikely(!node_metrics.domain_root)) return; + struct domain_metrics *d; + + for(d = node_metrics.domain_root; d; d = d->next) { + char type[TYPE_LENGTH_MAX + 1]; + snprintfz(type, TYPE_LENGTH_MAX, "xendomain_%s_%s", d->name, d->uuid); + + if(likely(d->updated)) { + + // ---------------------------------------------------------------- + + if(unlikely(!d->states_chart_generated)) { + print_domain_states_chart_definition(type, CHART_IS_NOT_OBSOLETE); + d->states_chart_generated = 1; + } + printf( + "BEGIN %s.states\n" + "SET running = %lld\n" + "SET blocked = %lld\n" + "SET paused = %lld\n" + "SET shutdown = %lld\n" + "SET crashed = %lld\n" + "SET dying = %lld\n" + "END\n" + , type + , (collected_number)d->running + , (collected_number)d->blocked + , (collected_number)d->paused + , (collected_number)d->shutdown + , (collected_number)d->crashed + , (collected_number)d->dying + ); + + // ---------------------------------------------------------------- + + if(unlikely(!d->cpu_chart_generated)) { + print_domain_cpu_chart_definition(type, CHART_IS_NOT_OBSOLETE); + d->cpu_chart_generated = 1; + } + printf( + "BEGIN %s.cpu\n" + "SET used = %lld\n" + "END\n" + , type + , (collected_number)d->cpu_ns + ); + + // ---------------------------------------------------------------- + + struct vcpu_metrics *vcpu_m; + + if(unlikely(!d->vcpu_chart_generated || d->num_vcpus_changed)) { + print_domain_vcpu_chart_definition(type, d, CHART_IS_NOT_OBSOLETE); + d->num_vcpus_changed = 0; + d->vcpu_chart_generated = 1; + } + + printf("BEGIN %s.vcpu\n", type); + for(vcpu_m = d->vcpu_root; vcpu_m; vcpu_m = vcpu_m->next) { + if(likely(vcpu_m->updated && vcpu_m->online)) { + printf( + "SET vcpu%u = %lld\n" + , vcpu_m->id + , (collected_number)vcpu_m->ns + ); + } + } + printf("END\n"); + + // ---------------------------------------------------------------- + + if(unlikely(!d->mem_chart_generated)) { + print_domain_mem_chart_definition(type, CHART_IS_NOT_OBSOLETE); + d->mem_chart_generated = 1; + } + printf( + "BEGIN %s.mem\n" + "SET maximum = %lld\n" + "SET current = %lld\n" + "END\n" + , type + , (collected_number)d->max_mem + , (collected_number)d->cur_mem + ); + + // ---------------------------------------------------------------- + + struct vbd_metrics *vbd_m; + for(vbd_m = d->vbd_root; vbd_m; vbd_m = vbd_m->next) { + if(likely(vbd_m->updated && !vbd_m->error)) { + if(unlikely(!vbd_m->oo_req_chart_generated)) { + print_domain_vbd_oo_chart_definition(type, vbd_m->id, CHART_IS_NOT_OBSOLETE); + vbd_m->oo_req_chart_generated = 1; + } + printf( + "BEGIN %s.oo_req_vbd%u\n" + "SET requests = %lld\n" + "END\n" + , type + , vbd_m->id + , (collected_number)vbd_m->oo_reqs + ); + + // ---------------------------------------------------------------- + + if(unlikely(!vbd_m->requests_chart_generated)) { + print_domain_vbd_requests_chart_definition(type, vbd_m->id, CHART_IS_NOT_OBSOLETE); + vbd_m->requests_chart_generated = 1; + } + printf( + "BEGIN %s.requests_vbd%u\n" + "SET read = %lld\n" + "SET write = %lld\n" + "END\n" + , type + , vbd_m->id + , (collected_number)vbd_m->rd_reqs + , (collected_number)vbd_m->wr_reqs + ); + + // ---------------------------------------------------------------- + + if(unlikely(!vbd_m->sectors_chart_generated)) { + print_domain_vbd_sectors_chart_definition(type, vbd_m->id, CHART_IS_NOT_OBSOLETE); + vbd_m->sectors_chart_generated = 1; + } + printf( + "BEGIN %s.sectors_vbd%u\n" + "SET read = %lld\n" + "SET write = %lld\n" + "END\n" + , type + , vbd_m->id + , (collected_number)vbd_m->rd_sects + , (collected_number)vbd_m->wr_sects + ); + } + else { + if(unlikely(vbd_m->oo_req_chart_generated + || vbd_m->requests_chart_generated + || vbd_m->sectors_chart_generated)) { + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: mark charts as obsolete for vbd %u, domain '%s', id %u, uuid %s\n", vbd_m->id, d->name, d->id, d->uuid); + print_domain_vbd_oo_chart_definition(type, vbd_m->id, CHART_IS_OBSOLETE); + print_domain_vbd_requests_chart_definition(type, vbd_m->id, CHART_IS_OBSOLETE); + print_domain_vbd_sectors_chart_definition(type, vbd_m->id, CHART_IS_OBSOLETE); + vbd_m->oo_req_chart_generated = 0; + vbd_m->requests_chart_generated = 0; + vbd_m->sectors_chart_generated = 0; + } + } + } + + // ---------------------------------------------------------------- + + struct network_metrics *network_m; + for(network_m = d->network_root; network_m; network_m = network_m->next) { + if(likely(network_m->updated)) { + if(unlikely(!network_m->bytes_chart_generated)) { + print_domain_network_bytes_chart_definition(type, network_m->id, CHART_IS_NOT_OBSOLETE); + network_m->bytes_chart_generated = 1; + } + printf( + "BEGIN %s.bytes_network%u\n" + "SET received = %lld\n" + "SET sent = %lld\n" + "END\n" + , type + , network_m->id + , (collected_number)network_m->rbytes + , (collected_number)network_m->tbytes + ); + + // ---------------------------------------------------------------- + + if(unlikely(!network_m->packets_chart_generated)) { + print_domain_network_packets_chart_definition(type, network_m->id, CHART_IS_NOT_OBSOLETE); + network_m->packets_chart_generated = 1; + } + printf( + "BEGIN %s.packets_network%u\n" + "SET received = %lld\n" + "SET sent = %lld\n" + "END\n" + , type + , network_m->id + , (collected_number)network_m->rpackets + , (collected_number)network_m->tpackets + ); + + // ---------------------------------------------------------------- + + if(unlikely(!network_m->errors_chart_generated)) { + print_domain_network_errors_chart_definition(type, network_m->id, CHART_IS_NOT_OBSOLETE); + network_m->errors_chart_generated = 1; + } + printf( + "BEGIN %s.errors_network%u\n" + "SET received = %lld\n" + "SET sent = %lld\n" + "END\n" + , type + , network_m->id + , (collected_number)network_m->rerrs + , (collected_number)network_m->terrs + ); + + // ---------------------------------------------------------------- + + if(unlikely(!network_m->drops_chart_generated)) { + print_domain_network_drops_chart_definition(type, network_m->id, CHART_IS_NOT_OBSOLETE); + network_m->drops_chart_generated = 1; + } + printf( + "BEGIN %s.drops_network%u\n" + "SET received = %lld\n" + "SET sent = %lld\n" + "END\n" + , type + , network_m->id + , (collected_number)network_m->rdrops + , (collected_number)network_m->tdrops + ); + } + else { + if(unlikely(network_m->bytes_chart_generated + || network_m->packets_chart_generated + || network_m->errors_chart_generated + || network_m->drops_chart_generated)) + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: mark charts as obsolete for network %u, domain '%s', id %u, uuid %s\n", network_m->id, d->name, d->id, d->uuid); + print_domain_network_bytes_chart_definition(type, network_m->id, CHART_IS_OBSOLETE); + print_domain_network_packets_chart_definition(type, network_m->id, CHART_IS_OBSOLETE); + print_domain_network_errors_chart_definition(type, network_m->id, CHART_IS_OBSOLETE); + print_domain_network_drops_chart_definition(type, network_m->id, CHART_IS_OBSOLETE); + network_m->bytes_chart_generated = 0; + network_m->packets_chart_generated = 0; + network_m->errors_chart_generated = 0; + network_m->drops_chart_generated = 0; + } + } + } + else{ + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: mark charts as obsolete for domain '%s', id %u, uuid %s\n", d->name, d->id, d->uuid); + print_domain_states_chart_definition(type, CHART_IS_OBSOLETE); + print_domain_cpu_chart_definition(type, CHART_IS_OBSOLETE); + print_domain_vcpu_chart_definition(type, d, CHART_IS_OBSOLETE); + print_domain_mem_chart_definition(type, CHART_IS_OBSOLETE); + + d = domain_metrics_free(d); + } + } +} + +int main(int argc, char **argv) { + clocks_init(); + + // ------------------------------------------------------------------------ + // initialization of netdata plugin + + program_name = "xenstat.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + // ------------------------------------------------------------------------ + // parse command line parameters + + int i, freq = 0; + for(i = 1; i < argc ; i++) { + if(isdigit(*argv[i]) && !freq) { + int n = str2i(argv[i]); + if(n > 0 && n < 86400) { + freq = n; + continue; + } + } + else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { + printf("xenstat.plugin %s\n", VERSION); + exit(0); + } + else if(strcmp("debug", argv[i]) == 0) { + debug = 1; + continue; + } + else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { + fprintf(stderr, + "\n" + " netdata xenstat.plugin %s\n" + " Copyright (C) 2019 Netdata Inc.\n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " This program is a data collector plugin for netdata.\n" + "\n" + " Available command line options:\n" + "\n" + " COLLECTION_FREQUENCY data collection frequency in seconds\n" + " minimum: %d\n" + "\n" + " debug enable verbose output\n" + " default: disabled\n" + "\n" + " -v\n" + " -V\n" + " --version print version and exit\n" + "\n" + " -h\n" + " --help print this message and exit\n" + "\n" + " For more information:\n" + " https://github.com/netdata/netdata/tree/master/collectors/xenstat.plugin\n" + "\n" + , VERSION + , netdata_update_every + ); + exit(1); + } + + error("xenstat.plugin: ignoring parameter '%s'", argv[i]); + } + + errno = 0; + + if(freq >= netdata_update_every) + netdata_update_every = freq; + else if(freq) + error("update frequency %d seconds is too small for XENSTAT. Using %d.", freq, netdata_update_every); + + // ------------------------------------------------------------------------ + // initialize xen API handles + xenstat_handle *xhandle = NULL; + libxl_ctx *ctx = NULL; + libxl_dominfo info; + + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling xenstat_init()\n"); + xhandle = xenstat_init(); + if (xhandle == NULL) { + error("XENSTAT: failed to initialize xenstat library."); + return 1; + } + + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling libxl_ctx_alloc()\n"); + if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, NULL)) { + error("XENSTAT: failed to initialize xl context."); + xenstat_uninit(xhandle); + return 1; + } + libxl_dominfo_init(&info); + + // ------------------------------------------------------------------------ + // the main loop + + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: starting data collection\n"); + + time_t started_t = now_monotonic_sec(); + + size_t iteration; + usec_t step = netdata_update_every * USEC_PER_SEC; + + heartbeat_t hb; + heartbeat_init(&hb); + for(iteration = 0; 1; iteration++) { + usec_t dt = heartbeat_next(&hb, step); + + if(unlikely(netdata_exit)) break; + + if(unlikely(debug && iteration)) + fprintf(stderr, "xenstat.plugin: iteration %zu, dt %llu usec\n" + , iteration + , dt + ); + + if(likely(xhandle)) { + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling xenstat_collect()\n"); + int ret = xenstat_collect(xhandle, ctx, &info); + + if(likely(!ret)) { + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling xenstat_send_node_metrics()\n"); + xenstat_send_node_metrics(); + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling xenstat_send_domain_metrics()\n"); + xenstat_send_domain_metrics(); + } + else { + if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: can't collect data\n"); + } + } + + fflush(stdout); + + // restart check (14400 seconds) + if(unlikely(now_monotonic_sec() - started_t > 14400)) break; + } + + libxl_ctx_free(ctx); + xenstat_uninit(xhandle); + info("XENSTAT process exiting"); + + return 0; +} diff --git a/config.cmake.h.in b/config.cmake.h.in new file mode 100644 index 0000000..e0faaa4 --- /dev/null +++ b/config.cmake.h.in @@ -0,0 +1,56 @@ +/* This file was generated by CMAKE from config.cmake.h.in */ + +#define VERSION "@NETDATA_VERSION@" + +#define CONFIGURE_COMMAND "cmake" + +#define NETDATA_USER "@NETDATA_USER@" + +/* Using a bundled copy of protobuf */ +// #undef BUNDLED_PROTOBUF +#define HAVE_PROTOBUF 1 + +#define likely(x) @LIKELY_MACRO@ +#define unlikely(x) @UNLIKELY_MACRO@ + +/* La boring stuff */ +#cmakedefine HAVE_STRUCT_TIMESPEC + +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + +#cmakedefine HAVE_SYS_STATFS_H +#cmakedefine HAVE_SYS_STATVFS_H +#cmakedefine HAVE_INTTYPES_H +#cmakedefine HAVE_STDINT_H +#cmakedefine STRERROR_R_CHAR_P + +#cmakedefine MAJOR_IN_MKDEV +#cmakedefine MAJOR_IN_SYSMACROS + +#cmakedefine HAVE_CRYPTO + +#cmakedefine ENABLE_PROMETHEUS_REMOTE_WRITE + +/* they are defined as REQUIRED in CMakeLists.txt */ +#define NETDATA_WITH_ZLIB 1 +#define ENABLE_JSONC 1 + +#cmakedefine ENABLE_ML + +#cmakedefine HAVE_LIBBPF + +/* NSA spy stuff */ +#define ENABLE_HTTPS 1 +#cmakedefine01 HAVE_X509_VERIFY_PARAM_set1_host + +#cmakedefine ENABLE_ACLK +#define ENABLE_DBENGINE +#define ENABLE_COMPRESSION // pkg_check_modules(LIBLZ4 REQUIRED liblz4) +#cmakedefine ENABLE_APPS_PLUGIN + + +#define __always_unused @ALWAYS_UNUSED_MACRO@ +#define __maybe_unused @MAYBE_UNUSED_MACRO@ diff --git a/configs.signatures b/configs.signatures new file mode 100644 index 0000000..994b62a --- /dev/null +++ b/configs.signatures @@ -0,0 +1,764 @@ +declare -A configs_signatures=( + ['00049600c2f9237e6c46b8b0f703c13c']='health.d/bcache.conf' + ['00403e687213f3b7db9bf4563a5a92cc']='python.d/isc_dhcpd.conf' + ['0056936ce99788ed9ae1c611c87aa6d8']='apps_groups.conf' + ['007fc019fb32e952b509d455c016a002']='health.d/tcp_resets.conf' + ['0083884a6cec6a48ee61665fbe131142']='charts.d/sensors.conf' + ['0102351817595a85d01ebd54a5f2f36b']='python.d/ovpn_status_log.conf' + ['01302e01162d465614276de43fad7546']='python.d.conf' + ['0147c7e8f8f57e37c5dade4e8aacacf9']='python.d/example.conf' + ['017036c1dc32c9312b2704b839bd078f']='python.d/haproxy.conf' + ['01c54057e0ca55b5bb49df1662d6b8c3']='python.d/web_log.conf' + ['024f4a6a431bcbc6acdb4184aa9661f3']='python.d/httpcheck.conf' + ['02fa10fa85ab88e9723998de48d1aca0']='health.d/disks.conf' + ['0314f0f1f88773c0ed9e9a908335e7ca']='health.d/tcp_mem.conf' + ['032ee2b3b6cb200bfdf1a0698c2457d6']='health.d/boinc.conf' + ['03510c8b3a2a6e8535320cfb9ebef06a']='python.d/httpcheck.conf' + ['036dc300bd7b0e0ef229b9822686d63e']='python.d/isc_dhcpd.conf' + ['0388b873d0d7e47c19005b7241db77d8']='python.d/tomcat.conf' + ['04138a3d8e907c75329fe60ce2e27c1c']='health.d/tcp_resets.conf' + ['0433d98a19d3b08e6f13884e46d39b47']='health.d/disks.conf' + ['043f0a35dde85837fabeb85b990a41c1']='health.d/swap.conf' + ['044496086420b531487d3c57600ca673']='apps_groups.conf' + ['0529b679d3c0e7e6332753c7f6484731']='health.d/net.conf' + ['054a2eece27ee2f5928b8167f5989b65']='python.d/dockerd.conf' + ['057d12aaff0467e64529e839a258806b']='health.d/entropy.conf' + ['05809c6662ba39f19cbec90234433d62']='health_alarm_notify.conf' + ['059d98d0c562e1c81653d1e64673deab']='python.d/web_log.conf' + ['05a8f39f134850c1e8d6267dbe706273']='health.d/web_log.conf' + ['061c45b0e34170d357e47883166ecf40']='python.d/nginx.conf' + ['06f055543a6b4d038d92c226952d777f']='health.d/isc_dhcpd.conf' + ['074d618a7e9c72f9bdfda7611e01e0ca']='python.d/redis.conf' + ['074df527cc70b5f38c0714f08f20e57c']='health.d/apache.conf' + ['0787e67357804b934d2866f1b7c60b14']='health.d/ipc.conf' + ['08042325ab27256b938575deafee8ecf']='python.d/nginx.conf' + ['0847d54a7a0c7e0381c52e9d4d3fa7db']='health.d/mdstat.conf' + ['084ee72d64760f2641b0720e79c922f3']='health.d/cpu.conf' + ['0856124b1eecf01681b4fdf4e21efb3f']='health.d/net.conf' + ['0862e7cf3d32ef48795702c6aefd27e0']='python.d/fail2ban.conf' + ['08de9ae0765a4161abe24c09e47b3454']='python.d/go_expvar.conf' + ['08ff5218f938fc48e09e718821169d14']='health.d/redis.conf' + ['091572888425bc3b8b559c3c53367ec7']='apps_groups.conf' + ['09225283977a6584f8063016091cc4f2']='health.d/tcp_resets.conf' + ['09264cec953ae1c4c2985e6446abb386']='health.d/mysql.conf' + ['093540fdc2a228e976ce5d48a3adf9fc']='health.d/disks.conf' + ['09e030d26be08a13fa3560e47fa27825']='apps_groups.conf' + ['0a5bc649295ba08f7e22e175901c1380']='python.d/unbound.conf' + ['0a7039ecc7a86b480d9d499b12b02763']='python.d/freeradius.conf' + ['0ad10fa896346202aee99384b0ec968d']='health.d/cpu.conf' + ['0b6903f981cdb018c17802ac4e295609']='health.d/btrfs.conf' + ['0bd66be0e8d99abc3a1d816036343f0a']='health_alarm_notify.conf' + ['0c5e0fa364d7bdf7c16e8459a0544572']='health.d/netfilter.conf' + ['0cd4e1fb57497e4d4c2451a9e58f724d']='python.d/redis.conf' + ['0d29fe9919a2975107db1f2583344e7a']='health.d/mdstat.conf' + ['0dd38dcd2473ddb9f8b1b41147432d10']='health_alarm_notify.conf' + ['0e59bc11d0a869ea0247c04c08c8d72e']='python.d/ipfs.conf' + ['0ee63c201892b21abc8bf6c712e815e3']='python.d/mysql.conf' + ['0ef8af1f358741afa7fd5d0ffabefaac']='charts.d/mysql.conf' + ['0f65b08edebedd06e376274021196a6b']='health.d/lighttpd.conf' + ['0ff6d725acde27a53de4646d69e9e3d1']='health.d/net.conf' + ['107de0cfeafcb6ab22fe7dd4a25d200d']='health.d/udp_errors.conf' + ['107e6ac69b30fb9837ac64c35f891ec7']='health.d/tcp_resets.conf' + ['10ac8106a109fdabdcc0405e9f43dbe1']='charts.d/mem_apps.conf' + ['10c3b525850a1cb9de760a8ee96fbc6e']='charts.d/opensips.conf' + ['1112c848ef91ebb9c622020d09712d67']='health.d/net.conf' + ['111401c47f94015cd12ad0b8a4393c4f']='health.d/softnet.conf' + ['111ead4b350593dd69b6f7ac0307b49b']='python.d/httpcheck.conf' + ['12a4c7803ae79506a14ea784fea60dce']='health.d/net.conf' + ['12d27b9f4d1696c2d49a77ed71d68e6f']='python.d/w1sensor.conf' + ['12e57bea1127933a4fe49ce2e9674f4d']='statsd.d/example.conf' + ['13141998a5d71308d9c119834c27bfd3']='python.d.conf' + ['13ccf65fd879795f0fcea89ade27c2d0']='health.d/swap.conf' + ['13e861a3d2f3075de883994ab54df658']='health.d/megacli.conf' + ['1423e4f8c25a66c316be37da0d5c9c54']='health.d/btrfs.conf' + ['142a5b693d34b0308bb0b8aec71fad79']='python.d/postfix.conf' + ['14783e051650442ec9e2ed38d81d667e']='charts.d/exim.conf' + ['156fe032bfd9da822060d0f515b326d9']='health.d/isc_dhcpd.conf' + ['15d8401b56a74120f9f832873ec9c578']='health.d/postgres.conf' + ['15e32114994b92be7853b88091e7c6fb']='python.d/exim.conf' + ['167a2ce21035ac6b5a8720c7c2b4413c']='health.d/web_log.conf' + ['174c21a6ce5de97bda83d502aa47a9f8']='health.d/apache.conf' + ['17555c7418c801ceb6c93adbe485d6f9']='apps_groups.conf' + ['178281aa2241d4a3e6b798bb9c4ae577']='python.d/haproxy.conf' + ['17dc745a76ab4c37ee31a3224f644fc1']='charts.d/postfix.conf' + ['18710ef6523cef8630d644ab270bfe02']='health.d/varnish.conf' + ['18c46f55d45a17d60c72877807d9a3d2']='health.d/udp_errors.conf' + ['18ee1c6197a4381b1c1631ef6129824f']='apps_groups.conf' + ['1972e48345e6c3f0d65f94a03317622b']='health_alarm_notify.conf' + ['1bc518219377499d1d05d6ee770f1a87']='python.d/go_expvar.conf' + ['1be2d92f2934601e18e6d709590569b7']='charts.d/apcupsd.conf' + ['1c12b678ab65f271a96da1bbd0a1ab1c']='health.d/softnet.conf' + ['1c3168c95b53e999df3d45162b3f50b8']='health.d/fping.conf' + ['1c71a8792c5c0ed035dd97af93a04838']='health_alarm_notify.conf' + ['1d6efba856acaaaf3b50bc6d66611b92']='python.d/web_log.conf' + ['1e09f326178acf07d361c08a44d8b1f3']='python.d/rabbitmq.conf' + ['1e0bc6a0ff701d16225383e5de76585b']='python.d/spigotmc.conf' + ['1ea8e8ef1fa8a3a0fcdfba236f4cb195']='python.d/mysql.conf' + ['1eb0bc80934a3166fcde4d153c476d14']='health.d/fping.conf' + ['1ef0fd38e7969c023bc3fa6d89eaf6d6']='python.d/mdstat.conf' + ['1f43a9a820c02e0525de594299b55b15']='python.d.conf' + ['1f5545b3ff52b3eb75ee05401f67a9bc']='fping.conf' + ['1fa47f32ab52a22f8e7087cae85dd25e']='health.d/net.conf' + ['203678a5be09d65993dcb3a9d475d187']='health.d/ipfs.conf' + ['20be73f473e59bc7de1fe61d53466aba']='health.d/ram.conf' + ['21913b96f540333094a972614f5da8f1']='charts.d/tomcat.conf' + ['21924a6ab8008d16ffac340f226ebad9']='python.d/nginx.conf' + ['219c5bb81965fa17d4940d4aa343c282']='health.d/mysql.conf' + ['225792e33ddeea72992ffa5ab36d505f']='python.d/ntpd.conf' + ['22952dbf42647c583b005054b23b545f']='health.d/disks.conf' + ['22ceb822983134a7ca67343241f30341']='health.d/disks.conf' + ['2320314191d0f8e7548b9273b77ac5e3']='apps_groups.conf' + ['2385e5d35b440619621c4af62492d91b']='health.d/disks.conf' + ['23989e04f9c7694b7eab646f8949cd52']='python.d/portcheck.conf' + ['23a5afe5260a7ad388e447709cb009df']='python.d/web_log.conf' + ['23ae815aefa221b1929f96752a1f7556']='health.d/squid.conf' + ['243503ceee1d5b4e1e55a28768a116ae']='health.d/net.conf' + ['2472e49550326f7142e2c425ccbca005']='health.d/softnet.conf' + ['24d02e4086fd60943c45d8de2e52a4fb']='python.d/springboot.conf' + ['254de8ec49602bea2da3631676d7cfec']='health.d/cpu.conf' + ['256a7f06f7e579a61752fc64418cffe5']='charts.d/nut.conf' + ['25a35a7c3c6092a839865e9be250c024']='health.d/ram.conf' + ['262f98b3d88b98978cb08d566ce85a9d']='charts.d/squid.conf' + ['27a1dbd43abc7394dcd72efe797ee9af']='python.d.conf' + ['2827de41cf34a91b7a8e4d8724f59668']='health.d/net.conf' + ['28df44a90e8ea4c6156314c03e88bf44']='health.d/softnet.conf' + ['292c6cbbb5c819bb91f87c02a45890c1']='health.d/swap.conf' + ['29485dc362202095d3d80e4f744d0538']='health_alarm_notify.conf' + ['297160ae7ee01a547ed14f857b4f2c8d']='health.d/memcached.conf' + ['298504f331c55dff4055321ff3a7b5cc']='health.d/web_log.conf' + ['29c37d59e8801dffd18617738c1b4b71']='python.d.conf' + ['29f97e10b92333790fbe0d2a3617b736']='health_alarm_notify.conf' + ['2a0794fd43eadf30a51805bc9ba2c64d']='python.d/hddtemp.conf' + ['2acae80dbdbe536a160f5b216bac84bc']='python.d/samba.conf' + ['2ad55a5d1e885cf142849a78d4b00401']='health.d/net.conf' + ['2b0106e89ce622da2869cb0d201246d1']='python.d/unbound.conf' + ['2bbbebf52f84fd27fbefecd2a8a8076f']='health.d/memcached.conf' + ['2c2b7e8df922b2bf121fb7db32bbc3bd']='health.d/udp_errors.conf' + ['2d1d7498c72f4245cf32902c2b7e71e0']='health.d/entropy.conf' + ['2da0a2e7117292ece11d69723a294bd7']='python.d/mongodb.conf' + ['2ee5df033fe9c65a45566b6760b856e3']='python.d/web_log.conf' + ['2f05e09b69ea20cda56d8f8b6fd3e86d']='health.d/couchdb.conf' + ['2f13a6b7d11eda826ff26569b2a77080']='health.d/apcupsd.conf' + ['2f3a8e33df83f14e0af8ca2465697215']='python.d/exim.conf' + ['2f4a85fedecce1bf425fa1039f6b021e']='apps_groups.conf' + ['2fa8fb929fd597f2ab97b6efc540a043']='health_alarm_notify.conf' + ['307ac41f6c67fcf007d6f7135fac314c']='stream.conf' + ['312b4b8e2805e19cf9be554b319567d6']='health.d/softnet.conf' + ['31471ee7eb6cfbb412587a837ffcfe6f']='python.d.conf' + ['3161290af7c1909768253e714ea2c3de']='python.d/ceph.conf' + ['318bb45755726a25120bb33413d4b582']='health.d/net.conf' + ['318db50a701442890c269ab547041e97']='health.d/tcp_orphans.conf' + ['31e4058cfe0a01dd9ce4ae425fd7b4f1']='python.d/web_log.conf' + ['322ec5e7095912221110623c9d7130cf']='health_alarm_notify.conf' + ['325617412a628e3bc776e3fbb777a2a6']='health.d/redis.conf' + ['326e1477131e0f73304711135f70a2a5']='health.d/memcached.conf' + ['32fde0057c790964f2c743cb3c9aad29']='health.d/nginx.conf' + ['33486497112127badc4c47ed2008969c']='python.d/freeradius.conf' + ['33b135e28aeaef2b8224ba69a0fde245']='health.d/cpu.conf' + ['343bc919a2fbc93f687f9d1339ec5f79']='health.d/net.conf' + ['34f6cf10f8da18ddd7edd2a28a4fe5f9']='python.d/sensors.conf' + ['35024ebd94542484c0866e6ee6b950cb']='health.d/net.conf' + ['35ac63a2f08b2c6dd901c542629ae5df']='python.d/postgres.conf' + ['35eb9785c844afd43fa7931915e2d566']='python.d/elasticsearch.conf' + ['3634d5eddc46fb0d50cf47f370670c2c']='health.d/redis.conf' + ['364b6e0081b116c9ec073b4d329a6dcc']='health_alarm_notify.conf' + ['367d1463e520eb9dc89223bab161c6d1']='python.d/postgres.conf' + ['36fdd55665cf10b0db164c2a0cca5e57']='health.d/qos.conf' + ['373160658e7d5f1a129de397b9347365']='health.d/entropy.conf' + ['373c1276dc9e65884ff2b26e1f08afe7']='health.d/named.conf' + ['3798445a7faaf45c7a8047908678e690']='python.d/varnish.conf' + ['37a5218f42e0ffd1becfb7db14cae568']='health.d/fronius.conf' + ['37bc2b50ade9f334da4775dfea59f785']='python.d.conf' + ['3807c37ac57046ae867e34dcfe6dbfd9']='health.d/httpcheck.conf' + ['3848172053221b95279ba9bf789cd4e0']='health.d/apache.conf' + ['3866efafd38e161136428d0f818cac43']='health.d/net.conf' + ['38d1bf04fe9901481dd6febcc0404a86']='python.d.conf' + ['392ab65af875a8daf0041113b1b40c2f']='python.d.conf' + ['39304b2570611c3acb35b72762b46778']='charts.d/sensors.conf' + ['394b7e91c97b7adb776460d309b335ff']='python.d/nginx.conf' + ['39571e9fad9b759200c5d5b2ee13feb4']='python.d/redis.conf' + ['39b65042cafdd9b849a44ec81aa66cac']='health_alarm_notify.conf' + ['39f9422b0f0c3eec11a31aff79d89514']='health.d/retroshare.conf' + ['3a04a3bc66c49d0c24f65a44fd9caa80']='python.d/postgres.conf' + ['3a0f1f988d2111ba003199deca722d9b']='health_alarm_notify.conf' + ['3a278ef6c66c122a407a0236c251119d']='python.d/nginx_plus.conf' + ['3af522d65b50a5e447607ffb28c81ff5']='apps_groups.conf' + ['3b1bfa40a4ff6a200bb2fc00bc51a664']='apps_groups.conf' + ['3b535da82cf2ff53e03d735d02fb2357']='python.d/squid.conf' + ['3bc2776623889744a98178bad6fb3b79']='health.d/disks.conf' + ['3bc2c4423b19779d49ee7935b2ea1431']='health.d/stiebeleltron.conf' + ['3bc65e997ab59b9de390fdf63d77f5e1']='python.d/postgres.conf' + ['3c60691eb05d4d5bf78a41ed46303bb6']='python.d.conf' + ['3c9c47163e9d4dbcb0079b6232398f2f']='apps_groups.conf' + ['3ca696189911fb38a0319ddd71e9a395']='python.d/phpfpm.conf' + ['3cc6255457d4cba881ae0554ae5d9190']='health.d/squid.conf' + ['3d974ac9fdaa44d4527d6503bec35e34']='stream.conf' + ['3d9b33da0f40c2ceecd006ddfd44fd14']='python.d.conf' + ['3f170e3343cd784983b019163393f5af']='health.d/nginx.conf' + ['3f7b669fde5c63bd55cb6dd88866d306']='python.d/ceph.conf' + ['3fbe85671efd5d07e51584ab8262b48b']='health.d/tcp_listen.conf' + ['3fc45cc18e884c22482524dff6d27833']='python.d/hddtemp.conf' + ['3fcc3c449ce8e0388f9c23ca07cab608']='health.d/backend.conf' + ['40225ee41bc3a85ce9b7f7af4d90e3e9']='charts.d/cpu_apps.conf' + ['4063a01bffb43b0423425d1ba3004967']='health.d/tcp_resets.conf' + ['41fa6bb109763561be59d7bcd07bbe82']='python.d/dnsdist.conf' + ['421d5dc6c2fce22d0816b6e6363bea57']='python.d/hddtemp.conf' + ['42ad0e70b1365b6e7244cc305dbaa529']='health_alarm_notify.conf' + ['42bf1c7c64ed77038a0aa094d792a9e2']='python.d/mysql.conf' + ['4332dee96e4f38fc73c962df3494ab7c']='health_alarm_notify.conf' + ['43739017b6195a6abec14a70fe0df224']='python.d/rethinkdbs.conf' + ['43ebb7f224c3b232d8ad044d7e9508b6']='health.d/net.conf' + ['43ef8c1e77054f53f9be9f381eb6cd67']='python.d/portcheck.conf' + ['4401f0c6a101d35d2cb833e7b0aeb421']='health.d/qos.conf' + ['444e20cf75e2cd019e8d412d5d1f4a7f']='charts.d/cpu_apps.conf' + ['4461bfacf9a3da47770fb3ca31f4c91f']='health.d/net.conf' + ['450667c552ab7a7d8d4a2c214fdacca5']='health.d/entropy.conf' + ['459e57e6acb389f4243f695a1e53ab2b']='health.d/boinc.conf' + ['45a77ac36ba9f1898144b902de17204b']='health.d/memcached.conf' + ['46798cda21e1a5faa769abf4e5d27c48']='health.d/disks.conf' + ['46dfa2b6a7e7c76532e00c1344d5d171']='python.d/logind.conf' + ['46ef6c1b638e40a7dfd62defdc5f99a3']='health.d/retroshare.conf' + ['47180421d580baeaedf8c0ef3d647fb5']='python.d/hddtemp.conf' + ['48195c5c8c0476a49b714b4c76bdb570']='python.d/squid.conf' + ['48eef63bcf744bae114b502b6dacb4a1']='charts.d/phpfpm.conf' + ['4960852f8951b54ca2fe10065752143e']='python.d.conf' + ['4a448831776de8acf2e0bdc4cc994cb4']='apps_groups.conf' + ['4aba3b6a28ccd75faf5326aca997ee0d']='health_alarm_notify.conf' + ['4b775fb31342f1478b3773d041a72911']='python.d.conf' + ['4ccb06fff1ce06dc5bc80e0a9f568f6e']='charts.d.conf' + ['4cd585f5dfdacaf287413ad037b4e60a']='apps_groups.conf' + ['4d13684cadfa90e73ab465409bf7263b']='health.d/mysql.conf' + ['4d91ee6fe4c887ea3865ef36ac63da3c']='health.d/mysql.conf' + ['4da1c0f009d87995ed66d84fae07f09a']='health.d/memory.conf' + ['4dee2390e0bc89938dafa34a390dcf36']='charts.d/squid.conf' + ['4e07ea46dd54eb0bbb4f1c0982a71973']='python.d/cpuidle.conf' + ['4e37502fdf1944d094dd8be1e1f5e9e6']='health.d/cpu.conf' + ['4e59e91d800059183028bbb44cf5afd2']='health.d/httpcheck.conf' + ['4e995acb0d6fd77403a2a9dca984b55b']='charts.d.conf' + ['4f6a5b47a13f5912cc89e9286701dd08']='health.d/redis.conf' + ['4f6f4d39c19d7d954f769d3f9d3b4da5']='health.d/memcached.conf' + ['4fc3fa3dc89b789c8820ce109ea6e385']='python.d/httpcheck.conf' + ['4fdf72784296326e0b46cb526a5d77a1']='python.d.conf' + ['4fef19afccd9a591165b72f0b1a2ac2e']='python.d/freeradius.conf' + ['501eb2484b459b410b3f792c2dbaa955']='health.d/swap.conf' + ['5050b5963599f13ad5dc0263fa39a906']='python.d/fail2ban.conf' + ['508771d8e4611a058991a1bc11039dea']='health.d/disks.conf' + ['5120492fa26be3749192607f62dc05f8']='health.d/mdstat.conf' + ['5271cf9fc0fd10915a9759add70f7d78']='health.d/swap.conf' + ['5278ebbae19c60db600f0a119cb3664e']='python.d/apache.conf' + ['52d230aff57850a5aacc4e0420fcd8f5']='python.d.conf' + ['52d4131cf9df84e2550b1a5d899ec61d']='health.d/swap.conf' + ['5306b64a1e6baacd9de721e1f56961a8']='health_alarm_notify.conf' + ['53160707fdc6ce46c195b1b55bb0bcb1']='health.d/swap.conf' + ['535e5113b07b0fc6f3abd59546c276f6']='charts.d.conf' + ['5379cdc26d7725e2b0d688d785816cef']='python.d/mysql.conf' + ['5452eccad2f220d1191411737f6f4b2b']='python.d/isc_dhcpd.conf' + ['54614490a14e1a4b7b3d9fecb6b4cfa5']='python.d/exim.conf' + ['547779cdc460a926980de1590294b96b']='health.d/softnet.conf' + ['54c3934a03453453b8d7d0e8b84a7cd8']='health_alarm_notify.conf' + ['5523c092be7d667b228c70aeda6f44eb']='stream.conf' + ['55608bdd908a3806df1468f6ee318b2b']='health.d/qos.conf' + ['5598b83e915e31f68027afe324a427cd']='apps_groups.conf' + ['55cc7e3fe365a77f8e92d01d7a428276']='health.d/ram.conf' + ['56324751bae48308f155c079ee7ed43f']='python.d/megacli.conf' + ['565f11c38ae6bd5cc9d3c2adb542bc1b']='health.d/softnet.conf' + ['5664a814f9351b55da76edd472169a73']='health_alarm_notify.conf' + ['56b689031cdcf138064825f31474b37d']='apps_groups.conf' + ['56d072c4756b898d0c91f143b89e366b']='python.d.conf' + ['573398335c0c71c075fa57f702bce287']='health.d/disks.conf' + ['579c7c41c756745f57afd11c94a879d7']='python.d.conf' + ['57be306944cb09b7f024079728fd04b9']='apps_groups.conf' + ['5829812db29598db5857c9f433e96fef']='python.d/apache.conf' + ['58439d9c1253e33c74b399e6853143ab']='health.d/apcupsd.conf' + ['5855dd70d71c8497e5591b0690162c9c']='health.d/tcp_resets.conf' + ['58660dfcc260f77deec94b328b3838e8']='health_alarm_notify.conf' + ['58e835b7176865ec5a6f59f7aba832bf']='health.d/named.conf' + ['598f9814966a9e2fe48e8218151d3fa6']='stream.conf' + ['59dded33e3adfe622f36c557a4f4bed7']='health.d/net.conf' + ['59dea5e3872e5fe4e6c535b216c516b4']='health.d/disks.conf' + ['5b5588b00d6829908c2c5ea3220cfa1c']='health.d/load.conf' + ['5b917d894bb6a755d59264e9d48e9d56']='fping.conf' + ['5bbef0708f5eff4d4a53aaf35fc48a62']='health.d/disks.conf' + ['5bf51bb24fb41db9b1e448bd060d3f8c']='apps_groups.conf' + ['5c1694184557813a6948db0872556bf0']='charts.d/libreswan.conf' + ['5da15d6e17a15213a720749045e5d419']='health.d/disks.conf' + ['5dddb6c9670f4aa605abe4b0d901acc4']='python.d/bind_rndc.conf' + ['5e6fd588ef6934cf04ddb5e662aa02ea']='health.d/postgres.conf' + ['5eb670b6fe39da5fec2523d910b0dd1e']='health.d/cpu.conf' + ['5f05d4b248ab2637ada319b4e8c4e4c3']='python.d/varnish.conf' + ['5f109df927d5f20409c81f4bfca0c83e']='python.d/web_log.conf' + ['5ff1bcaa58695754e2f6980bfe19f579']='health.d/entropy.conf' + ['609c6c57605033da96ea65e50c90201c']='charts.d/apache.conf' + ['60a13375b3072300bd7552cb5ee9762b']='health.d/netfilter.conf' + ['611130db85bad90f966b52055147c81e']='python.d/httpcheck.conf' + ['61b7ed36f35e7bd930f5f7f91694a112']='charts.d/postfix.conf' + ['621f10b257a11add5ff5aff41e9662e3']='health.d/memcached.conf' + ['623771eecb3c277fc728b5304793f93b']='health.d/cpu.conf' + ['6265b7465e38839c3543190e638156aa']='python.d/ntpd.conf' + ['6319e4ae3810e9eabb61e852e1305785']='python.d.conf' + ['632c28d714c87a4969d11cf36a5edaa8']='health.d/web_log.conf' + ['636d032928ea0f4741eab264fb49c099']='apps_groups.conf' + ['6398ef37a15cb6a0bc921f58948d2b39']='health.d/softnet.conf' + ['63c626bc64b3d7bc46a72fbccf9b1926']='health.d/net.conf' + ['64070d856ab1b47a18ec871e49bbc13b']='python.d/squid.conf' + ['647361e99b5f4e0d73470c569bb9461c']='apps_groups.conf' + ['64ac37868097a462e5ee6905c350267e']='python.d/postgres.conf' + ['64c48f9726ab987baec9c617a9fef7a6']='health.d/nginx.conf' + ['64ffc1b6878c81b87564b0f48642c790']='health.d/elasticsearch.conf' + ['650b5fc9da23b25ee7ee1481e4aa2851']='health_alarm_notify.conf' + ['653e0c014c8fcfb4db6cd3351d87d720']='python.d.conf' + ['6546909d10cc5efcef9dd873bea85956']='python.d/mysql.conf' + ['65a59d96c039d0180603ffd945a8968c']='apps_groups.conf' + ['65c6933a17fb6b7f8e6baeab73431c17']='charts.d/apcupsd.conf' + ['6608c6546b3c6bde084fc1d34b1163c1']='health.d/retroshare.conf' + ['66628e70f70c6e991f4fe641b8e9bdde']='python.d/nginx_plus.conf' + ['669ebef43ee341f6889d382e86d0e200']='health.d/named.conf' + ['66c068eaa3672fbe4e2448e330b3511c']='python.d/web_log.conf' + ['66dfe138058ca26a31a118007eb31f35']='health.d/nginx.conf' + ['6814b9bc84483db428f6a479ba221855']='python.d/mysql.conf' + ['6848e78a5a1c349c6c42d6245d6530ad']='python.d/boinc.conf' + ['68607aef1802ed3dc0cd593bf6073beb']='python.d/postfix.conf' + ['6a18f61a595c0d48c3363bcc0dbfa6b9']='health_alarm_notify.conf' + ['6a47af861ad3dd112124c37fbf09672b']='apps_groups.conf' + ['6aa4507f86657383917a0407f2a9cc0d']='python.d.conf' + ['6acad8ce5c33e642742825db0eb9bb56']='python.d/web_log.conf' + ['6b39de5d85db45115db236347a6896d4']='health.d/named.conf' + ['6b598533309e08d71023e46801d45d7e']='apps_groups.conf' + ['6bb278bd9e171c4cb5c0fe639231288b']='python.d/web_log.conf' + ['6bf0de6e3b251b765b10a71d8c5c319d']='python.d/apache.conf' + ['6c9f2f0abe49a6f1a69db052ebcef1bf']='python.d/elasticsearch.conf' + ['6ca08ea2a238cad26578b8b85edae160']='health.d/udp_errors.conf' + ['6d02c2dd0863e09ad9dbba53e3b58116']='health.d/mysql.conf' + ['6df13c6ad582ef339a2a93901b6f0196']='health_alarm_notify.conf' + ['6e8366993709652fe7fc00e5d6a0a136']='charts.d/mysql.conf' + ['6ea958ca521e0514af57c08b518d8c5c']='health.d/backend.conf' + ['6f303ccfdc21c7b122758cea8c15e249']='python.d.conf' + ['6f54474c885234af0c792d135644d230']='python.d.conf' + ['7005feb3eb5d06416d07cdf7e7c54425']='python.d/ntpd.conf' + ['70105b1744a8e13f49083d7f1981aea2']='python.d/ipfs.conf' + ['707a63f53f4b32e01d134ae90ba94aad']='health_alarm_notify.conf' + ['707a63f53f4b32e01d134ae90ba94aad']='health_email_recipients.conf' + ['70d82dabecb09a1da4684f293abef0c9']='health_alarm_notify.conf' + ['7117b7067ac2b712aa4c9e92a6cdbf5a']='python.d/couchdb.conf' + ['7120cba2f55b1c0a97a0e10d4f6ef751']='health.d/ipmi.conf' + ['72246c32511197d87b004e67e4c8da36']='python.d/portcheck.conf' + ['729b3e24a72f7d566fd429617d51a21b']='health.d/web_log.conf' + ['72ea87f658483f47c38994291af488e8']='health_alarm_notify.conf' + ['73125ae64d5c6e9361944cd9bd14844e']='python.d/exim.conf' + ['731a1fcfe9b2da1b9d685056a59541b8']='python.d/hddtemp.conf' + ['73a8e10dfe4183aca751e9e2a80dabe3']='node.d.conf' + ['7454ed74511d7b9819dfe173f9020786']='python.d/redis.conf' + ['749fe31362969d75f1ea66d15231d98d']='python.d/retroshare.conf' + ['74e5e8d3a4b324f1770f61f78ee4b0e6']='health.d/beanstalkd.conf' + ['7502c931aa9acbb92f54c67978d75983']='stream.conf' + ['751f15371d0987018abc4d4ad60819f5']='apps_groups.conf' + ['7596ae54d46ce199ac599429ef753caf']='health.d/cpu.conf' + ['75a9c4b0b1c73956df55585eb0619f6c']='charts.d/ap.conf' + ['75ddb2b9bc38a5306bceb5acb0422fe3']='python.d/icecast.conf' + ['76205037196767f6877392862eb00d7b']='health.d/ram.conf' + ['763e24621c63f5aa05fd6dddf0c855ba']='health.d/nginx_plus.conf' + ['7673ea6afe0a286a77a390b9d042c191']='python.d/httpcheck.conf' + ['769aa4cdcdc3d78d0328d1f9e4edcdf9']='python.d/mysql.conf' + ['76a0c1b21e49850442a43efddb15a81e']='health.d/tcp_orphans.conf' + ['76a31091f42f2be1fab3bb56bb7ea400']='health_alarm_notify.conf' + ['76edb4cc11935aadaff53129c63457aa']='python.d.conf' + ['777f4da70f461ef675bde07fb3644312']='python.d/redis.conf' + ['777f55a95c5c25cf6176fece1ebbf4b8']='apps_groups.conf' + ['77b256144293ebfabad31779a5326948']='python.d/phpfpm.conf' + ['7808ba2ca26bd0642270740cf6a8ee59']='charts.d/mem_apps.conf' + ['7830066c46a7e5f9682b8d3f4566b4e5']='python.d/cpufreq.conf' + ['78bb08809dffcb62e9bc493840f9c039']='python.d/squid.conf' + ['78e0065738394f5bf15023f41d66ed4b']='python.d/squid.conf' + ['79a37756869d9b4629285922572d6b9b']='apps_groups.conf' + ['7a21ccc76be2968ce5d0b52ec1166788']='python.d.conf' + ['7a985528cc9176564640001aa73e3492']='health.d/nginx.conf' + ['7aa209fa287c95b3ca04c23681b40770']='health.d/disks.conf' + ['7ad46e684775d186251eb71b1e9be530']='charts.d/ap.conf' + ['7b3281b6dbdbbc0b48c53fd76033e0db']='health.d/disks.conf' + ['7bac18d8d5ff8f117be8d489a21c0c65']='python.d/mysql.conf' + ['7cf6402b51e5070f2be3ad6fe059ff89']='charts.d.conf' + ['7d8bd884ec26cb35d16c4fc05f969799']='python.d/squid.conf' + ['7deb236ec68a512b9bdd18e6a51d76f7']='python.d/mysql.conf' + ['7e5fc1644aa7a54f9dbb1bd102521b09']='health.d/memcached.conf' + ['7f13631183fbdf79c21c8e5a171e9b34']='health.d/zfs.conf' + ['82f1dc0a477a175ae31d7b815411e44e']='health.d/dbengine.conf' + ['7fb8184d56a27040e73261ed9c6fc76f']='health_alarm_notify.conf' + ['80266bddd3df374923c750a6de91d120']='health.d/apache.conf' + ['803a7f9dcb942eeac0fd764b9e3e38ca']='fping.conf' + ['80d242d619eb7e91cebfdbf58d79b0f8']='health.d/disks.conf' + ['80df37b89e852d585209b8c02bb94312']='python.d/bind_rndc.conf' + ['80f109ff293ac94222bf3959432751bd']='health.d/qos.conf' + ['81255035f6d53534938085df72cdef23']='health.d/nginx.conf' + ['8170ba3ae507cf9322bd60350348552e']='health.d/net.conf' + ['81af92c7050873c7de2fb42e0c3f04f4']='python.d/tomcat.conf' + ['81f7c857a9a7bcf12f500166bd6c7499']='health.d/linux_power_supply.conf' + ['81fd16f29d5f3d422fe1cee82dc8ed9d']='health.d/cpu.conf' + ['8213d921b6a8382e27052fb42d81db3d']='python.d/freeradius.conf' + ['8214bb8f4b005aa4691fcd38f7331e8f']='health.d/swap.conf' + ['830c4e7307b58a4c4bb5034f091e008d']='charts.d/nginx.conf' + ['8320bf7600afcaa3d419d268d5563133']='python.d/web_log.conf' + ['837480f77ba1a85677a36747fbc2cd2e']='python.d/sensors.conf' + ['8422e71761d22e817e3cfcb1befc6080']='health.d/mongodb.conf' + ['8425a60ea3d28ed40bb0bac4c3f182e8']='python.d/sensors.conf' + ['842b1ad5b89bfa5f421d9c5b72e001a4']='health.d/apache.conf' + ['845023f9b4a526aa0e6493756dbe6034']='health.d/squid.conf' + ['846ce94bfeeb90c0dc6a89e8d25f1a68']='health.d/named.conf' + ['846f6039460aa317f165d91a54cd8b07']='health.d/stiebeleltron.conf' + ['8490f690d97adacc4e2096df82e7e8a5']='charts.d/cpufreq.conf' + ['86a0d8d5619b5134e2d050805b45d6c3']='python.d/unbound.conf' + ['87155bea7383028b0c1846c802cfdd81']='python.d/mdstat.conf' + ['871bbeea33b83ea9755600b6d574919c']='python.d/web_log.conf' + ['87224d2f2b87646f3c0d38cc1eb30112']='python.d/nsd.conf' + ['87615ae5ac2412d853c717383fa53781']='python.d/chrony.conf' + ['87642c568093daf3b2c30c5beffe2225']='python.d/elasticsearch.conf' + ['8810140ce9c09af1d18b9602c4003904']='health_alarm_notify.conf' + ['886975ecb9a4e856151dc71024b122e6']='health.d/apcupsd.conf' + ['8891fb423f6b987281d7913bb6c1c024']='health.d/ipc.conf' + ['88e3b51b6b3fe8f317df82a2d4fbb990']='python.d.conf' + ['88f77865f75c9fb61c97d700bd4561ee']='python.d/mysql.conf' + ['8989b5e2f4ef9cd278ef58be0fae4074']='health.d/disks.conf' + ['899bcb0b3f4375b0a1280296be930201']='health.d/named.conf' + ['89fb3cbb223be4fa0cb676cfa3b07055']='health.d/backend.conf' + ['8a1b95d375992d7b11330a0ac46f369c']='health.d/disks.conf' + ['8a66a3085ad8892a002ff39b18b2cb07']='python.d/fail2ban.conf' + ['8abc7f66746b201b5b0af45c419d53bc']='health.d/bind_rndc.conf' + ['8b834a0f343a8e620dbb639270a84cce']='health.d/mdstat.conf' + ['8c0f037f8ad506c41acdbc4f9f6cead6']='health_alarm_notify.conf' + ['8c1d41e2c88aeca78bc319ed74c8748c']='python.d/phpfpm.conf' + ['8d0552371a7c9725a04196fa560813d1']='health.d/cpu.conf' + ['8d24873bb25c195026918f15626310ea']='health.d/softnet.conf' + ['8d736f551571675244d853bb2f53b3da']='health.d/load.conf' + ['8dc0bd0a70b5117454bd5f5b98f91c2c']='health.d/disks.conf' + ['8dc6a32b8e2995cbdd527c621a72c4fb']='health.d/ram.conf' + ['8ec636a4f96158044d2cec0fd1ff8452']='python.d/rabbitmq.conf' + ['8ed596c4f6f85b24a890cfe95f10ce9a']='python.d/ntpd.conf' + ['8f4f925c1e97dd164007495ec5135ffc']='health.d/fping.conf' + ['8f520e787d995943e61a777c826bddf7']='python.d/litespeed.conf' + ['8f7b734ea0f89abf8acbb47c50234477']='health.d/web_log.conf' + ['8fd472a854b0996327e8ed3562161182']='health_alarm_notify.conf' + ['919911d13901d60a7580f5dfd7fc87bb']='health.d/ram.conf' + ['91c377e7d26a1120cfbbd488332f0398']='python.d/dns_query_time.conf' + ['91c757ef6be3abdb86906d9dbb9c217a']='fping.conf' + ['91cf3b3d42cac969b8b3fd4f531ecfb3']='python.d/squid.conf' + ['91e1a9703debbdc64edf124419fdc14b']='python.d/elasticsearch.conf' + ['91f0a626c19f76241cadf9dbf28fb5a7']='health.d/beanstalkd.conf' + ['92024bbe088e55251665fb666305ff66']='python.d/mysql.conf' + ['920574fcfe56d5c9c11a583905e9db62']='health.d/tcp_conn.conf' + ['9347bcce0b3574ac5193d43248d2e3cc']='python.d/chrony.conf' + ['93c7c00103f63ea3b4eea1951dd16c95']='health_alarm_notify.conf' + ['94bb961f83ec724cf86239328f73a3db']='health.d/redis.conf' + ['94e567bbefd37db0c55d880ff61188a6']='health_alarm_notify.conf' + ['9542f80def48ba105190f6cdaa18248e']='health.d/mysql.conf' + ['95a27691df972832a5e7626ae59b0af6']='python.d/portcheck.conf' + ['96997c8bf3a65b9eac848cafa8c127d2']='python.d/portcheck.conf' + ['978daf0777ffe774e5a9576d33972e97']='python.d/smartd_log.conf' + ['97eee7a30e6419df4537242e9d4a719d']='health.d/mysql.conf' + ['97f337eb96213f3ede05e522e3743a6c']='python.d/memcached.conf' + ['98e4dd6ba71bf76767bc59c63a51b617']='apps_groups.conf' + ['98f6f917138949228b9fb88c61e5aea8']='charts.d/cpufreq.conf' + ['9962e20641036566279ac800861b7963']='python.d.conf' + ['99a3de85d1e7826ed64a5f8576712e5d']='python.d.conf' + ['99b06e68f1da5917ae4cf60e901439f6']='health.d/ram.conf' + ['99b6030ce25c8fee4598179c0f95fb0b']='health.d/redis.conf' + ['99c1617448abbdc493976ab9bda5ce02']='apps_groups.conf' + ['9a525125e705ca5a3146b3399be4510a']='python.d/nginx_plus.conf' + ['9a89bbdf5d9732b9a29a0aa82714059b']='health.d/dockerd.conf' + ['9a8a459a3841b78d4c6ef07428ad2fe1']='health.d/entropy.conf' + ['9b6eee7f2febb29efac2b7ea9fcab9be']='charts.d/nut.conf' + ['9c0185ceff15415bc59b2ce2c1f04367']='apps_groups.conf' + ['9c457056c9ee0d50f9717da647bbd444']='health_alarm_notify.conf' + ['9c8ddfa810d83ae58c8614ee5229e66b']='health.d/disks.conf' + ['9c981c75bdf4b1637f7113e7e45eb2bf']='health.d/memcached.conf' + ['9d304e41e32721224a743f25534263d9']='python.d/retroshare.conf' + ['9e0553ebdc21b64295873fc104cfa79d']='python.d.conf' + ['9e07d51bb83a38dcc37a39ca92fe4865']='charts.d/opensips.conf' + ['9e33f51e56d258e7f4336048edde2f5c']='health.d/httpcheck.conf' + ['9eb3326ae2ee9badeaad31d8dd2eaa2b']='python.d/isc_dhcpd.conf' + ['a02d14124b19c635c1426cee2e98bac5']='charts.d.conf' + ['a03f3e38378385bf87d4c0f81eb1f108']='health.d/tcp_resets.conf' + ['a09714b5942cf25a89ec3da1dbc18063']='health.d/ram.conf' + ['a0b3a12389c9c56dfe35964b20b59836']='health.d/bind_rndc.conf' + ['a0c0ef7ca9671f4b5e797d4276e5c0dd']='health.d/disks.conf' + ['a0ee8f351f213c0e8af9eb7a4a09cb95']='apps_groups.conf' + ['a1b53a225f225911cd8ac892bba8118b']='python.d/powerdns.conf' + ['a1b6dfe312b896b0b1ba471e8ac07f95']='python.d/isc_dhcpd.conf' + ['a1bb5823c4926b65ef4b2dae467fc847']='python.d/couchdb.conf' + ['a250e12f1ab4c18796fdaff5b0ba8968']='python.d/varnish.conf' + ['a2944a309f8ce1a3195451856478d6ae']='python.d.conf' + ['a2a647dc492dc2d6ed1f5c0fdc97a96e']='python.d/mongodb.conf' + ['a305b400378d6492efd15f9940c2779b']='health.d/softnet.conf' + ['a41885acf112563e3446f9d937362c9b']='python.d/chrony.conf' + ['a4407787e4beb23a701a8a614dca461d']='health.d/disks.conf' + ['a44899a5795bed2863c1d11aa3e85586']='health.d/swap.conf' + ['a4a8660728c6afcb528cc6b378897d6b']='health.d/squid.conf' + ['a4be524cc5b7192878c292a17c767c28']='health.d/redis.conf' + ['a4e8c35f8973049f4db5c8900e9a2354']='health_alarm_notify.conf' + ['a5114d5b0d3816dba75024b9444f4b40']='health.d/disks.conf' + ['a5134d7cfbe27f5791e788c2add51abb']='apps_groups.conf' + ['a55133f1b0be0a4255057849dd451b09']='health_alarm_notify.conf' + ['a6d5ce2572bf7a1dce9e545fcd29273e']='health.d/apache.conf' + ['a70e14bda17b076d2486232355652ae6']='apps_groups.conf' + ['a71d9082410200bf92e823675d78121c']='python.d/retroshare.conf' + ['a731b7b164f42717c1c9a778ee637ff3']='health.d/memcached.conf' + ['a7320c6f26191b9599ec3bc4be007a93']='health.d/swap.conf' + ['a752e51d923e15add4a11fa8f3be935a']='health_email_recipients.conf' + ['a78d59c2ad14a17b9b8c7fa5d796b427']='python.d.conf' + ['a7cceeafb1e6ef1ead503ab65f687902']='apps_groups.conf' + ['a8167dafeac0b66696a1d9b08e815cda']='health.d/disks.conf' + ['a837986be634fd7648bcdf939019424a']='apps_groups.conf' + ['a89c516a1144435a88decf25509318ac']='health_alarm_notify.conf' + ['a8bb4e1d0525f59692778ad8f675a77a']='python.d/example.conf' + ['a8feb36776005bf419c90278787a1be8']='health.d/entropy.conf' + ['a9150a0c61e1b360cf8c265ea2413d02']='python.d/couchdb.conf' + ['a94af1c808aafdf00537d85ff2197ec8']='python.d/exim.conf' + ['a9827518560ae7e811ef74e08cd4d3a6']='charts.d/load_average.conf' + ['a9ab68845db2fb695b7060273a6ac68e']='health_alarm_notify.conf' + ['a9cd91675467c5426f5b51c47602c889']='apps_groups.conf' + ['aa4bee249bfc0c4a88ac8c2ffb97aa0d']='health.d/squid.conf' + ['aa620b7017c8b864d80aa6c8acab01cf']='python.d/smartd_log.conf' + ['aa6c4a270e6276f2deddf127ee1a24f6']='statsd.d/example.conf' + ['aa8b57a733c2035917acf81a8ebdfbe7']='health.d/haproxy.conf' + ['aac44691a1cf95fa8f8990a79bab4ce1']='python.d/web_log.conf' + ['ab3902bf769ed35219691c95a3954ebb']='python.d/portcheck.conf' + ['abaf2e021f9f6ee5d1c4e4726f47348e']='health.d/ipc.conf' + ['abe1a80ac6d6f97bd324e72f31e8256e']='health.d/ram.conf' + ['ac8a91f0297bf7ebb8970f8cae4b3477']='health.d/ipc.conf' + ['acaa6731a272f6d251afb357e99b518f']='apps_groups.conf' + ['ad15b251b93f8b16bb33ec508f44a598']='health.d/netfilter.conf' + ['ade389c1b6efe0cff47c33e662731f0a']='python.d/squid.conf' + ['adf69efd83cb5079d0a5746e3568032f']='charts.d/exim.conf' + ['ae5ac0a3521e50aa6f6eda2a330b4075']='python.d/example.conf' + ['aee501b7f9b122b962521c45893371bb']='python.d/smartd_log.conf' + ['af12051cf57dd4e484ef8e64502b7549']='health.d/net.conf' + ['af14667ee7993acea810f6d50923bdc9']='health.d/web_log.conf' + ['af44cc53aa2bc5cc8935667119567522']='python.d.conf' + ['afdae4646c755ff2d117527fbf761c8e']='health.d/disks.conf' + ['b06d1063bc2200bb2d864021fa1a9cbd']='python.d.conf' + ['b07eebc6f58d19721ac069171b911d2a']='health_alarm_notify.conf' + ['b0c59b2bd7a10f6a3f2be6b4b27857db']='health.d/haproxy.conf' + ['b0f0a0ac415e4b1a82187b80d211e83b']='python.d/mysql.conf' + ['b181dcca01a258d9792ad703583baed2']='statsd.d/example.conf' + ['b185914d4f795e1732273dc4c7a35845']='health.d/memory.conf' + ['b210982cac9accfe43173cef5f46b361']='health.d/beanstalkd.conf' + ['b27f10a38a95edbbec20f44a4728b7c4']='python.d.conf' + ['b28c77dceeb398ca4ceec44c646f5431']='stream.conf' + ['b32164929eda7449a9677044e11151bf']='python.d.conf' + ['b3d48935ab7f44a57d40ad349df0033d']='python.d/postgres.conf' + ['b3fc4749b132e55ac0d3a0f92859237e']='health.d/tcp_resets.conf' + ['b44e33ba5c7a7306b467ac9c9b698895']='health.d/bcache.conf' + ['b4825f731cc7eb03b374eade14a453c1']='health.d/net.conf' + ['b5246eed059e33e0903a819fa5460ce0']='python.d/ipfs.conf' + ['b544e5934ac79a9548b5af6756c042a6']='apps_groups.conf' + ['b5b5a8d6d991fb1cef8d80afa23ba114']='python.d/cpufreq.conf' + ['b636e5e603f9d93e52c7577ac8c6bf0c']='health.d/entropy.conf' + ['b68706bb8101ef85192db92f865a5d80']='health_alarm_notify.conf' + ['b6ee82968de8fbf974c0d35b55fe6fae']='python.d/web_log.conf' + ['b735732fbe993d8191d6b3317082efa2']='health.d/qos.conf' + ['b75e2d3e69c1fe89c2f900bc201f7390']='health_alarm_notify.conf' + ['b7d769ce86a7aebba01315da5c0799e6']='health.d/ram.conf' + ['b81b8f331161b0d48e03f6fbf6b6d062']='health.d/memcached.conf' + ['b846ca1f99fa6a65303b58186f47d7a4']='python.d/squid.conf' + ['b854fcb711ee4d052741de5fc888682e']='health.d/backend.conf' + ['b8969be5b3ceb4a99477937119bd4323']='python.d.conf' + ['b8aff60806fb6829a4e72a824e655375']='health.d/beanstalkd.conf' + ['b8b87574fd496a66ede884c5336493bd']='python.d/phpfpm.conf' + ['b8ca1449d142b7f1cd202d875d400882']='health.d/apcupsd.conf' + ['b915126262d08aa9da81de539a58a3fb']='python.d/redis.conf' + ['ba11ea2d2f632b2de4b1224bcdc54f07']='python.d/smartd_log.conf' + ['bb51112d01ff20053196a57632df8962']='apps_groups.conf' + ['bba2f3886587f137ea08a6e63dd3d376']='python.d.conf' + ['bcaba2347951b301127fd502a219b26a']='python.d/apache.conf' + ['bcd94c4fa2f89c710ff807de061ab11c']='health.d/net.conf' + ['bd12233b529e3066d5b4a78da20c495e']='python.d/ntpd.conf' + ['bda5517ea01640cfdfa0a27549619d6a']='health.d/memcached.conf' + ['bdec19a255367f22b6fb652d0bef6bad']='python.d/httpcheck.conf' + ['bf66f113b2dd8d8fb444cbd5650f284c']='health_alarm_notify.conf' + ['bfa2f469e83cf2961963841e143049e6']='health.d/tcp_listen.conf' + ['bfd35a87c77c3a1dbe218fd02b529208']='charts.d/example.conf' + ['bff38dfe6c879f93ac49b77990fce1cc']='python.d/ipfs.conf' + ['c004430f55310ae9ed489c4905ed02cb']='charts.d/apache.conf' + ['c0385cdf9e87aca01f5dee2a5d89c467']='health_alarm_notify.conf' + ['c080e006f544c949baca33cc24a9c126']='health_alarm_notify.conf' + ['c0c4c63384ef408f0715331e7615aa60']='python.d/ceph.conf' + ['c132d2e257fc4df2925be7ad75100d5b']='health.d/entropy.conf' + ['c1a7e634b5b8aad523a0d115a93379cd']='health.d/memcached.conf' + ['c1d014ffaebfa0952968aeaf330e5337']='python.d.conf' + ['c30ee008173ba9f77adfcacbf138143e']='python.d/ovpn_status_log.conf' + ['c3296c08260bcd556e74711c820817be']='health.d/cpu.conf' + ['c3661b68232e06de90bb5e63e725b8b6']='health_alarm_notify.conf' + ['c45ab106725e94615bccf8be4b136d0f']='python.d.conf' + ['c482676558420c4b47162651a24b8baf']='python.d/httpcheck.conf' + ['c4f203b4b12c40640dd578af16a49bb1']='health.d/portcheck.conf' + ['c61948101e0e6846679682794ee48c5b']='python.d/nginx.conf' + ['c6403d8b1bcfa52d3abb941be155fc03']='python.d.conf' + ['c6b9f31e14adca433f82054f62388c47']='python.d/web_log.conf' + ['c84fd3292710091802e443c8e688dee1']='health_alarm_notify.conf' + ['c878060687b85c46006e9041f3632d88']='health_alarm_notify.conf' + ['c88fb430f35b7d8f08775d84debffbd2']='python.d/phpfpm.conf' + ['c8e339491a83df22decbdf5f1f8a037f']='python.d.conf' + ['c94cb4f4eeaa13c1dcee6248deb01829']='python.d/postgres.conf' + ['c9a16df512b4a9ce7fa65f5a69bda20a']='python.d/web_log.conf' + ['c9b792755de59d842ba95f8c315d94c8']='health.d/swap.conf' + ['c9d102c49da0cb57886c42d1016fa163']='python.d/httpcheck.conf' + ['ca026d7c779f0a7cb7787713c5be5c47']='charts.d.conf' + ['ca08a9b18d38ae0a0f5081a7cdc96863']='health.d/swap.conf' + ['ca0eb92bdd3de67582ea6db37462895f']='health.d/tcp_resets.conf' + ['ca249db7a0637d55abb938d969f9b486']='python.d/postfix.conf' + ['ca761cbf8a28317abe526ab3c2428472']='health.d/portcheck.conf' + ['ca9e52b3ee3c71d3d042dc531753a1fd']='apps_groups.conf' + ['cad263c67f779029a663b620d6c34704']='charts.d/libreswan.conf' + ['cb178b15427274d7def5b14bc4c09441']='health.d/net.conf' + ['cb60badf376d246ad8ec9d3f524db430']='health.d/disks.conf' + ['cb7f80cd2768c649d7448e01f8aa6579']='python.d.conf' + ['cc4d31a0d1ff9c339892c1f8a0c5fcd3']='charts.d/load_average.conf' + ['cca26b4d2384043f1737e0ed4a995600']='python.d/bind_rndc.conf' + ['ccde91d209aeb02c4a6be0e43a8d92b3']='health.d/apache.conf' + ['cce5176664d29d137fa7575b77de01e4']='health.d/tcp_resets.conf' + ['cd08e5534c94bf1f2cd28396c76b8bbc']='health.d/ram.conf' + ['cd15a9a77a46d66ca0beb55f2acb7538']='health.d/mysql.conf' + ['cd9a7de356d6424c4a71d87053726c86']='python.d/bind_rndc.conf' + ['cdd504812ff93073c57d02209d4d0f69']='health.d/cpu.conf' + ['cde652b15742e377e98e79fb9eb2acab']='health_alarm_notify.conf' + ['ce0fa3485a0d8d3aa80b25ab0c70cc5a']='charts.d/apcupsd.conf' + ['ce2e8768964a936f58c4c2144aee8a01']='health_alarm_notify.conf' + ['ce3b65eac6c472b21905f7f72104f4c9']='python.d/nginx.conf' + ['ce937f8b9ab7820b61ce9fcde6b946e8']='charts.d/nut.conf' + ['cf2c9096b3a8c506a3ec76fa52574395']='charts.d/phpfpm.conf' + ['cf46545065f7698c4d529fdc77955274']='python.d/puppet.conf' + ['cf48dfd828af70bea04db7a809f94358']='health.d/haproxy.conf' + ['cf8b87ede2d3233b6f55f4690af7fb08']='python.d/smartd_log.conf' + ['cfecf298bdafaa7e0a3a263548e82132']='python.d/sensors.conf' + ['d11711b3647bc2bdd0292dd7deebbeb1']='health.d/net.conf' + ['d1596fe068c8674efade49a4a8e22b5d']='health.d/isc_dhcpd.conf' + ['d162b7465a56151312e60151c1d74fba']='health.d/squid.conf' + ['d1e79707cd9b51a14288e8dd40694fcc']='fping.conf' + ['d297104e43ce2b544003271181e26ff6']='python.d/cpufreq.conf' + ['d29c5fa5faf74b86d01c2270a79388d8']='health.d/disks.conf' + ['d2b2ad30e277a69d8713e620dabc18bc']='python.d/phpfpm.conf' + ['d3bccdfe06c099673592d5375994c329']='charts.d/hddtemp.conf' + ['d3f397ead7f2ac8f88a99d7c5b8cff1d']='python.d/dovecot.conf' + ['d41d8cd98f00b204e9800998ecf8427e']='python.d/portcheck.conf' + ['d4adcebadc4c86332df247922b85aadc']='python.d/freeradius.conf' + ['d54f9652d6510d04339682d97cd7b6e4']='python.d/httpcheck.conf' + ['d55bdb83b9ff606852f6a97c1430258c']='health.d/ram.conf' + ['d55be5bb5e108da1e7645da007c53cd4']='python.d.conf' + ['d56c28ece8354850011f213d94d02fe0']='python.d/hddtemp.conf' + ['d5dab509d8792f795bece27de39dd476']='health.d/mysql.conf' + ['d69eba15d3e968187a938a7b98e22dda']='python.d.conf' + ['d6cd34c96e47a8a63732a6f1512f5c39']='python.d/ovpn_status_log.conf' + ['d712df81b17971884443a4a9bc996c9e']='health_alarm_notify.conf' + ['d74dc63fbe631dab9a2ff1b0f5d71719']='python.d/hddtemp.conf' + ['d7e0bd12d4a60a761dcab3531a841711']='python.d/phpfpm.conf' + ['d86e0502e394e0a16c0ca574db462653']='health.d/megacli.conf' + ['d8dc489e32f7114c6298fce94e86a8ef']='health.d/entropy.conf' + ['d9036091e2232fc2b8bfa8c7484dea28']='apps_groups.conf' + ['d9258e671d0d0b6498af1ce16ef030d2']='apps_groups.conf' + ['d9fa0290cdfe4153188bb52dd31191df']='apps_groups.conf' + ['da29d2ab1ab7b8fda189960c840e5144']='health.d/swap.conf' + ['dad303c5cca7a69345811a01a74f5892']='health.d/net.conf' + ['db305937e884c9f871bb076d5ed2946f']='health_alarm_notify.conf' + ['dc0d2b96378f290eec3fcf98b89ad824']='python.d/cpufreq.conf' + ['dc9c2a66778623a759706c14c3d91983']='health.d/net.conf' + ['dd220677c42c487549952048ee1f7750']='python.d/postgres.conf' + ['dd221c29dfb7c5586fc906748aa7c831']='health.d/tcp_listen.conf' + ['dd7764507804a2296bfd091a58ad4ad7']='health.d/memcached.conf' + ['dd8254ef74509a3e38cb2838e30f7e63']='health.d/disks.conf' + ['ddda2bb1c88be03b637d3285406f7910']='health.d/named.conf' + ['dddc4f93e6187fe4220eb6bf5e20f095']='health.d/ram.conf' + ['de02f899a61f21b86adb646940f0bcae']='health.d/net.conf' + ['de581daa11e267a583776bd8b8179884']='python.d/postgres.conf' + ['de5fe159e14b481d6bd69856eaddd242']='health_alarm_notify.conf' + ['def883f35986c9d25de63b1a8e7d0f46']='health.d/entropy.conf' + ['df381f3a7ca9fb2b4b43ae7cb7a4c492']='python.d/mysql.conf' + ['df7e8044902b5e155fad8430c2ddcfa8']='health.d/fping.conf' + ['dfd5431b11cf2f3852a40d390c1d5a92']='python.d/varnish.conf' + ['e0242003fd2e3f9ac1b9314e802ada79']='python.d/hddtemp.conf' + ['e0ba3bc216ffc9933b4741dbb6b1f8c8']='health.d/web_log.conf' + ['e0ffc0c34424b35666fddf7f61e05def']='health.d/tcp_resets.conf' + ['e100d98f3ed1eff59678f035b3b8daf2']='python.d/beanstalk.conf' + ['e12ab150198467aaa56b1091ba219587']='charts.d/nut.conf' + ['e1822c48067954e26649f7ad5fdb71f5']='health.d/softnet.conf' + ['e1a8bf99d36683c10225100f207a2b59']='python.d/web_log.conf' + ['e22f30680148a29d9738bd4bfe8b252c']='health_alarm_notify.conf' + ['e2e7adf66a28b8277f55e246b007f25a']='python.d/ntpd.conf' + ['e2f3388c06726154c10ec22bad5bc7ec']='fping.conf' + ['e3023092e3b2bbb5351e0fe6682f4fe9']='health_alarm_notify.conf' + ['e3112d8e06fa77888aab02e8fcd22e25']='apps_groups.conf' + ['e3996f70a4b09315b4a64e3df7d34d43']='python.d/rabbitmq.conf' + ['e3d100c2d0347c08efbf6245e05620c6']='python.d/fail2ban.conf' + ['e3e0c742427c9609ce923e845a0c8532']='health.d/ceph.conf' + ['e3e5bc57335c489f01b8559f5c70e112']='python.d/squid.conf' + ['e40947d22f7ed5359f12fc89e3512963']='python.d/dovecot.conf' + ['e445de5a4d6953bddec36d85b1b2771e']='python.d/linux_power_supply.conf' + ['e449e5582279742496550df14b6fca95']='health.d/entropy.conf' + ['e4ed13f996434ac17b40a2228c96283b']='python.d/tomcat.conf' + ['e5f32f54d6d6728f21f9ac26f37d6573']='python.d/example.conf' + ['e707ad89a146004ae281d66a4e01e5c1']='health.d/load.conf' + ['e70a7ee4999f30c6ceb75f31088a3a34']='python.d/powerdns.conf' + ['e734c5951a8764d4d9de046dd7cf7407']='health.d/softnet.conf' + ['e7ae3f2b00b9e5178acfe4f5e46228b7']='health.d/tcp_resets.conf' + ['e7bc22a1942cffbd2b1b0cfd119ee328']='health.d/ipfs.conf' + ['e8656d72dbd3b6fe603048ded751499a']='python.d/memcached.conf' + ['e8ec8046c7007af6ca3e8c51e62c99f8']='health.d/disks.conf' + ['ea031c1c0c36edee3bd08fae559c4203']='health_alarm_notify.conf' + ['ea1a96c42ad464c354fb250e3408c3e8']='stream.conf' + ['eaa7beb935cae9c48a40fb934eb105a7']='health.d/web_log.conf' + ['eb5168f0b516bc982aac45e59da6e52e']='health.d/nginx.conf' + ['eb748d6fb69d11b0d29c5794657e206c']='health.d/qos.conf' + ['eb9fedc3c1dface77312d9bf48f673a8']='stream.conf' + ['ebd0612ccc5807524ebb2b647e3e56c9']='apps_groups.conf' + ['eca875b2e4402ee07972589bad003e01']='python.d/traefik.conf' + ['ecb6c01fae255d369748406945a50435']='apps_groups.conf' + ['ecd3aa97e2581f88eb466d6612690ef2']='charts.d/nginx.conf' + ['ed43efac299c31f8fd5e2abccff30071']='python.d/samba.conf' + ['ed80e6b2cfc8b08adea7027fc03daa68']='python.d.conf' + ['edb48efc8f446624001e07d04f6cad1a']='apps_groups.conf' + ['ee5343881744e6a97e6ee5cdd329cfb8']='health.d/retroshare.conf' + ['eee974cea7534aeed2d38bcf0edf3f9e']='python.d/springboot.conf' + ['ef067629c7456cb934f110ce15200131']='stream.conf' + ['ef1861bf5725d91e773cbdba05687597']='python.d.conf' + ['ef9916ea144878a9f37cbb6b1b29da10']='health.d/squid.conf' + ['f075be84c5bfac7e34de2a091841360c']='statsd.d/example.conf' + ['f0a86c5bae3c4b32b266dacbf74ca4a3']='python.d/web_log.conf' + ['f1446cb3f1a905ee06defa2aa15ee806']='python.d/web_log.conf' + ['f1682835e3414f60284c13bf1662e50f']='health.d/web_log.conf' + ['f1f114647ed185c4812c361b1d870b44']='python.d/sensors.conf' + ['f2622abcee86b514976a053b528553d4']='python.d/web_log.conf' + ['f2f1b8656f5011e965ac45b818cf668d']='apps_groups.conf' + ['f3c56a769ffdf811ec13467d8f7cd3c0']='python.d/apache.conf' + ['f42389e5497a28205ba6fef4f716db4f']='python.d/nginx.conf' + ['f42df9f13abfae2426519c6728b34882']='charts.d/example.conf' + ['f4609cbd5e748f41ad4f1ea3d2dcfde0']='python.d.conf' + ['f4c5d88c34d3fb853498124177cc77f1']='python.d.conf' + ['f5736e0b2945182cb659cb0713eff923']='apps_groups.conf' + ['f66e5236ba1245bb2e5fd99191f114c6']='charts.d/hddtemp.conf' + ['f68ac0fca6b4ffc96097779344cabac6']='health.d/tcp_listen.conf' + ['f6c6656f900ff52d159dca12d624016a']='python.d/postgres.conf' + ['f72e44a305567c9b21a244ebd6da6800']='health_alarm_notify.conf' + ['f7401a6e7c7d4fe2e0e2be7f7f523275']='health.d/web_log.conf' + ['f7a99e94231beda85c6254912d8d31c1']='python.d/tomcat.conf' + ['f82924563e41d99cdae5431f0af69155']='python.d.conf' + ['f8c30f22df92765e2c0fab3c8174e2fc']='health.d/memcached.conf' + ['f8dade4484f1b6a48655388502df7d5a']='health_alarm_notify.conf' + ['f8e7d23a83fc8ee58f403da7bdbe7f8a']='python.d/phpfpm.conf' + ['f96acba4b14b0c1b50d0187a04416151']='health_alarm_notify.conf' + ['f9be549a849d023595d19d5d74263e0f']='health.d/tcp_resets.conf' + ['fa4396513b358d6ec6a7f5bfb08439b8']='health.d/net.conf' + ['fb1c75159f855f4b4671835f5bca1ef6']='python.d.conf' + ['fbdb6f5d3906d3d8ea4e28f6ba6965a6']='python.d/go_expvar.conf' + ['fc11bd9255ac382f442f31c1f1a32532']='health_alarm_notify.conf' + ['fc40b83f173bc4676d686867a8369a62']='python.d/dns_query_time.conf' + ['fc64f44eb19a8b5a88ba8dc28de355f6']='python.d/puppet.conf' + ['fc987459f82e251e31c41d822e5e8202']='python.d/nsd.conf' + ['fd3164e6e8cb6726706267eae49aa082']='health_alarm_notify.conf' + ['fdd11640ba626cc2064c2fe3ea3eee4c']='health.d/cpu.conf' + ['fde44f62c8d7e52f09705cd273fae6b1']='charts.d/tomcat.conf' + ['fdea185e0e52b459b48852aa37f20e0f']='apps_groups.conf' + ['fe069e4d6579ecdda7f36ac2318ffefc']='python.d/exim.conf' + ['fe2b15369de13b83b18e2ff5c7594a57']='python.d/monit.conf' + ['fe478efe2e721724edb1fe2ef1addf93']='health_alarm_notify.conf' + ['feb8bcf828aa2529a7ee4a140feeb12d']='health.d/net.conf' + ['ff1b3d8ae8b2149c711d8da9b7a9c4bd']='health_alarm_notify.conf' + ['ff3f0a9b1bf488a5075850cc16de3c26']='python.d/monit.conf' + ['ff940c5396f16d05deb5c5859832ee48']='health.d/swap.conf' +) diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..7ab7db4 --- /dev/null +++ b/configure.ac @@ -0,0 +1,1809 @@ +# +# Copyright (C) 2015 Alon Bar-Lev +# SPDX-License-Identifier: GPL-3.0-or-later +# +AC_PREREQ(2.60) + +# We do not use m4_esyscmd_s to support older autoconf. +define([VERSION_STRING], m4_esyscmd([git describe 2>/dev/null | tr -d '\n'])) +define([VERSION_FROM_FILE], m4_esyscmd([cat packaging/version | tr -d '\n'])) +m4_ifval(VERSION_STRING, [], [define([VERSION_STRING], VERSION_FROM_FILE)]) + +AC_INIT([netdata], VERSION_STRING[]) + +AM_MAINTAINER_MODE([disable]) +if test x"$USE_MAINTAINER_MODE" = xyes; then +AC_MSG_NOTICE(***************** MAINTAINER MODE *****************) +fi + +PACKAGE_RPM_VERSION="VERSION_STRING" +AC_SUBST([PACKAGE_RPM_VERSION]) + +# ----------------------------------------------------------------------------- +# autoconf initialization + +AC_CONFIG_AUX_DIR([.]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_MACRO_DIR([build/m4]) +AC_CONFIG_SRCDIR([daemon/main.c]) +define([AUTOMATE_INIT_OPTIONS], [tar-pax subdir-objects]) +m4_ifdef([AM_SILENT_RULES], [ + define([AUTOMATE_INIT_OPTIONS], [tar-pax silent-rules subdir-objects]) + ]) +AM_INIT_AUTOMAKE(AUTOMATE_INIT_OPTIONS) +m4_ifdef([AM_SILENT_RULES], [ + AM_SILENT_RULES([yes]) + ]) +AC_CANONICAL_HOST +AC_PROG_CC +AC_PROG_CC_C99 +AM_PROG_CC_C_O +AC_PROG_CXX +AC_PROG_INSTALL +PKG_PROG_PKG_CONFIG +AC_USE_SYSTEM_EXTENSIONS + +AC_LANG_PUSH([C]) +AX_COMPILER_VENDOR +AC_LANG_POP([C]) +if test "$ax_cv_c_compiler_vendor" = "clang"; then + AC_CHECK_PROG([AR], [llvm-ar], [llvm-ar], [ar]) + AC_CHECK_PROG([RANLIB], [llvm-ranlib], [llvm-ranlib], [ranlib]) +else + AC_CHECK_TOOL([RANLIB], [ranlib]) + AC_CHECK_TOOL([AR], [ar]) +fi + +# ----------------------------------------------------------------------------- +# configurable options + +AC_ARG_ENABLE( + [plugin-nfacct], + [AS_HELP_STRING([--enable-plugin-nfacct], [enable nfacct plugin @<:@default autodetect@:>@])], + , + [enable_plugin_nfacct="detect"] +) +AC_ARG_ENABLE( + [plugin-freeipmi], + [AS_HELP_STRING([--enable-plugin-freeipmi], [enable freeipmi plugin @<:@default autodetect@:>@])], + , + [enable_plugin_freeipmi="detect"] +) +AC_ARG_ENABLE( + [plugin-cups], + [AS_HELP_STRING([--enable-plugin-cups], [enable cups plugin @<:@default autodetect@:>@])], + , + [enable_plugin_cups="detect"] +) +AC_ARG_ENABLE( + [plugin-xenstat], + [AS_HELP_STRING([--enable-plugin-xenstat], [enable xenstat plugin @<:@default autodetect@:>@])], + , + [enable_plugin_xenstat="detect"] +) +AC_ARG_ENABLE( + [exporting-kinesis], + [AS_HELP_STRING([--enable-exporting-kinesis], [enable kinesis exporting connector @<:@default autodetect@:>@])], + , + [enable_exporting_kinesis="detect"] +) +AC_ARG_ENABLE( + [exporting-pubsub], + [AS_HELP_STRING([--enable-exporting-pubsub], [enable pubsub exporting connector @<:@default autodetect@:>@])], + , + [enable_exporting_pubsub="detect"] +) +AC_ARG_ENABLE( + [exporting-prometheus-remote-write], + [AS_HELP_STRING([--enable-exporting-prometheus-remote-write], [enable prometheus remote write exporting connector @<:@default autodetect@:>@])], + , + [enable_exporting_prometheus_remote_write="detect"] +) +AC_ARG_ENABLE( + [exporting-mongodb], + [AS_HELP_STRING([--enable-exporting-mongodb], [enable mongodb exporting @<:@default autodetect@:>@])], + , + [enable_exporting_mongodb="detect"] +) +AC_ARG_ENABLE( + [pedantic], + [AS_HELP_STRING([--enable-pedantic], [enable pedantic compiler warnings @<:@default disabled@:>@])], + , + [enable_pedantic="no"] +) +AC_ARG_ENABLE( + [accept4], + [AS_HELP_STRING([--disable-accept4], [System does not have accept4 @<:@default autodetect@:>@])], + , + [enable_accept4="detect"] +) +AC_ARG_WITH( + [webdir], + [AS_HELP_STRING([--with-webdir], [location of webdir @<:@PKGDATADIR/web@:>@])], + [webdir="${withval}"], + [webdir="\$(pkgdatadir)/web"] +) +AC_ARG_WITH( + [libcap], + [AS_HELP_STRING([--with-libcap], [build with libcap @<:@default autodetect@:>@])], + , + [with_libcap="detect"] +) +AC_ARG_WITH( + [zlib], + [AS_HELP_STRING([--without-zlib], [build without zlib @<:@default enabled@:>@])], + , + [with_zlib="yes"] +) +AC_ARG_WITH( + [math], + [AS_HELP_STRING([--without-math], [build without math @<:@default enabled@:>@])], + , + [with_math="yes"] +) +AC_ARG_WITH( + [user], + [AS_HELP_STRING([--with-user], [use this user to drop privilege @<:@default nobody@:>@])], + , + [with_user="nobody"] +) +AC_ARG_ENABLE( + [x86-sse], + [AS_HELP_STRING([--disable-x86-sse], [SSE/SS2 optimizations on x86 @<:@default enabled@:>@])], + , + [enable_x86_sse="yes"] +) +AC_ARG_ENABLE( + [lto], + [AS_HELP_STRING([--enable-lto], [Link Time Optimizations @<:@default disabled@:>@])], + , + [enable_lto="no"] +) +AC_ARG_ENABLE( + [https], + [AS_HELP_STRING([--enable-https], [Enable SSL support @<:@default autodetect@:>@])], + , + [enable_https="detect"] +) +AC_ARG_ENABLE( + [compression], + [AS_HELP_STRING([--enable-compression], [Enable LZ4 compression support @<:@default autodetect@:>@])], + , + [enable_compression="detect"] +) +AC_ARG_ENABLE( + [dbengine], + [AS_HELP_STRING([--disable-dbengine], [disable netdata dbengine @<:@default autodetect@:>@])], + , + [enable_dbengine="detect"] +) +AC_ARG_ENABLE( + [jsonc], + [AS_HELP_STRING([--enable-jsonc], [Enable JSON-C support @<:@default autodetect@:>@])], + , + [enable_jsonc="detect"] +) +AC_ARG_ENABLE( + [ebpf], + [AS_HELP_STRING([--disable-ebpf], [Disable eBPF support @<:@default autodetect@:>@])], + , + [enable_ebpf="detect"] +) +AC_ARG_WITH( + [bundled-lws], + [AS_HELP_STRING([--with-bundled-lws], [Use the bundled version of libwebsockets library @<:@default use system library@:>@])], + [with_bundled_lws="yes"], [with_bundled_lws="no"] +) +AC_ARG_WITH( + [bundled-protobuf], + [AS_HELP_STRING([--with-bundled-protobuf], + [Uses the bundled version of Google Protocol Buffers @<:@default bundled if present@:>@])], + [with_bundled_protobuf="$withval"], + [with_bundled_protobuf="detect"] +) +AC_ARG_ENABLE( + [ml], + [AS_HELP_STRING([--enable-ml], [Enable anomaly detection @<:@default autodetect@:>@])], + , + [enable_ml="detect"] +) +AC_ARG_ENABLE( + [ml_tests], + [AS_HELP_STRING([--enable-ml-tests], [Enable anomaly detection tests @<:@no@:>@])], + [enable_ml_tests="yes"], + [enable_ml_tests="no"] +) + +# ----------------------------------------------------------------------------- +# Enforce building with C99, bail early if we can't. +test "${ac_cv_prog_cc_c99}" = "no" && AC_MSG_ERROR([Netdata requires a compiler that supports C99 to build]) + +# ----------------------------------------------------------------------------- +# Check if cloud is enabled and if the functionality is available + +AC_ARG_ENABLE( + [cloud], + [AS_HELP_STRING([--disable-cloud], + [Disables all cloud functionality])], + [ enable_cloud="$enableval" ], + [ enable_cloud="detect" ] +) + +if test "${enable_cloud}" = "no"; then + AC_DEFINE([DISABLE_CLOUD], [1], [disable netdata cloud functionality]) +fi + +# ----------------------------------------------------------------------------- +# netdata required checks + +# fails on centos6 +#AX_CHECK_ENABLE_DEBUG() + +AX_GCC_FUNC_ATTRIBUTE([returns_nonnull]) +AX_GCC_FUNC_ATTRIBUTE([malloc]) +AX_GCC_FUNC_ATTRIBUTE([noreturn]) +AX_GCC_FUNC_ATTRIBUTE([noinline]) +AX_GCC_FUNC_ATTRIBUTE([format]) +AX_GCC_FUNC_ATTRIBUTE([warn_unused_result]) + +AC_CHECK_TYPES([struct timespec, clockid_t], [], [], [[#include ]]) +AC_SEARCH_LIBS([clock_gettime], [rt posix4]) +AC_CHECK_FUNCS([clock_gettime]) +AC_CHECK_FUNCS([sched_setscheduler sched_getscheduler sched_getparam sched_get_priority_min sched_get_priority_max getpriority setpriority nice]) +AC_CHECK_FUNCS([recvmmsg]) + +AC_TYPE_INT8_T +AC_TYPE_INT16_T +AC_TYPE_INT32_T +AC_TYPE_INT64_T +AC_TYPE_UINT8_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_C_INLINE +AC_FUNC_STRERROR_R +AC_C__GENERIC +# AC_C_STMT_EXPR +AC_CANONICAL_HOST +AC_HEADER_MAJOR +AC_HEADER_RESOLV + +AC_CHECK_HEADERS_ONCE([sys/prctl.h]) +AC_CHECK_HEADERS_ONCE([sys/vfs.h]) +AC_CHECK_HEADERS_ONCE([sys/statfs.h]) +AC_CHECK_HEADERS_ONCE([linux/magic.h]) +AC_CHECK_HEADERS_ONCE([sys/statvfs.h]) +AC_CHECK_HEADERS_ONCE([sys/mount.h]) + +if test "${enable_accept4}" != "no"; then + AC_CHECK_FUNCS_ONCE(accept4) +fi + +AC_CHECK_FUNCS_ONCE(malloc_usable_size) + +# ----------------------------------------------------------------------------- +# operating system detection + +originalCFLAGS="${CFLAGS}" +AC_MSG_CHECKING([operating system]) +case "$host_os" in +freebsd*) + build_target=freebsd + OPTIONAL_OS_DEP_CFLAGS="-I/usr/local/include" + CFLAGS="${CFLAGS} ${OPTIONAL_OS_DEP_CFLAGS}" + LDFLAGS="${LDFLAGS} -L/usr/local/lib" + ;; +darwin*) + build_target=macos + LDFLAGS="${LDFLAGS} -framework CoreFoundation -framework IOKit" + # ----------------------------------------------------------------------------- + # Pull in OpenSSL properly if on macOS + if brew --prefix > /dev/null 2>&1; then + if brew --prefix --installed openssl > /dev/null 2>&1; then + HOMEBREW_OPENSSL_PREFIX=$(brew --prefix --installed openssl) + elif brew --prefix --installed openssl@3 > /dev/null 2>&1; then + HOMEBREW_OPENSSL_PREFIX=$(brew --prefix --installed openssl@3) + elif brew --prefix --installed openssl@1.1 > /dev/null 2>&1; then + HOMEBREW_OPENSSL_PREFIX=$(brew --prefix --installed openssl@1.1) + fi + if test -n "${HOMEBREW_OPENSSL_PREFIX}"; then + OPTIONAL_OS_DEP_CFLAGS="-I${HOMEBREW_OPENSSL_PREFIX}/include" + CFLAGS="${CFLAGS} ${OPTIONAL_OS_DEP_CFLAGS}" + LDFLAGS="${LDFLAGS} -L${HOMEBREW_OPENSSL_PREFIX}/lib" + fi + HOMEBREW_PREFIX=$(brew --prefix) + OPTIONAL_OS_DEP_CFLAGS="${OPTIONAL_OS_DEP_CFLAGS} -I${HOMEBREW_PREFIX}/include" + CFLAGS="${CFLAGS} -I${HOMEBREW_PREFIX}/include" + LDFLAGS="${LDFLAGS} -L${HOMEBREW_PREFIX}/lib" + fi + ;; +*) + build_target=linux + ;; +esac + +AM_CONDITIONAL([FREEBSD], [test "${build_target}" = "freebsd"]) +AM_CONDITIONAL([MACOS], [test "${build_target}" = "macos"]) +AM_CONDITIONAL([LINUX], [test "${build_target}" = "linux"]) +AC_MSG_RESULT([Host OS: ${build_target}]) + +# ----------------------------------------------------------------------------- +# backtrace + +AC_SEARCH_LIBS([backtrace], [execinfo], [AC_DEFINE([HAVE_BACKTRACE], [1], [backtrace availability])]) + +# ----------------------------------------------------------------------------- +# pthreads + +ACX_PTHREAD(, [AC_MSG_ERROR([Cannot initialize pthread environment])]) +LIBS="${PTHREAD_LIBS} ${LIBS}" +CFLAGS="${CFLAGS} ${PTHREAD_CFLAGS}" +CC="${PTHREAD_CC}" + +AC_CHECK_LIB( +[pthread], +[pthread_getname_np], +[AC_DEFINE([HAVE_PTHREAD_GETNAME_NP], [1], [Is set if pthread_getname_np is available])] +) + + +# ----------------------------------------------------------------------------- +# libm + +AC_ARG_VAR([MATH_CFLAGS], [C compiler flags for math]) +AC_ARG_VAR([MATH_LIBS], [linker flags for math]) +if test -z "${MATH_LIBS}"; then + AC_CHECK_LIB( + [m], + [sin], + [MATH_LIBS="-lm"] + ) +fi +test "${with_math}" = "yes" -a -z "${MATH_LIBS}" && AC_MSG_ERROR([math required but not found]) + +AC_MSG_CHECKING([if libm should be used]) +if test "${with_math}" != "no" -a ! -z "${MATH_LIBS}"; then + with_math="yes" + AC_DEFINE([STORAGE_WITH_MATH], [1], [math usability]) + OPTIONAL_MATH_CFLAGS="${MATH_CFLAGS}" + OPTIONAL_MATH_LIBS="${MATH_LIBS}" +else + with_math="no" +fi +AC_MSG_RESULT([${with_math}]) + +AC_CHECK_FUNCS([isfinite]) +AC_CHECK_FUNCS([finite]) + +# ----------------------------------------------------------------------------- +# libuv multi-platform support library with a focus on asynchronous I/O +# TODO: check version, uv_fs_scandir_next only available in version >= 1.0 + +AC_CHECK_LIB( + [uv], + [uv_fs_scandir_next], + [UV_LIBS="-luv"] +) +test -z "${UV_LIBS}" && \ + AC_MSG_ERROR([libuv required but not found. Try installing 'libuv1-dev' or 'libuv-devel'.]) +OPTIONAL_UV_CFLAGS="${UV_CFLAGS}" +OPTIONAL_UV_LIBS="${UV_LIBS}" + + +# ----------------------------------------------------------------------------- +# lz4 Extremely Fast Compression algorithm + +AC_CHECK_LIB( + [lz4], + [LZ4_initStream], + [LZ4_LIBS_FAST="-llz4"] +) + +AC_CHECK_LIB( + [lz4], + [LZ4_compress_default], + [LZ4_LIBS="-llz4"] +) + +# ----------------------------------------------------------------------------- +# zlib + +PKG_CHECK_MODULES( + [ZLIB], + [zlib], + [have_zlib=yes], + [have_zlib=no] +) +test "${with_zlib}" = "yes" -a "${have_zlib}" != "yes" && AC_MSG_ERROR([zlib required but not found. Try installing 'zlib1g-dev' or 'zlib-devel'.]) + +AC_MSG_CHECKING([if zlib should be used]) +if test "${with_zlib}" != "no" -a "${have_zlib}" = "yes"; then + with_zlib="yes" + AC_DEFINE([NETDATA_WITH_ZLIB], [1], [zlib usability]) + OPTIONAL_ZLIB_CFLAGS="${ZLIB_CFLAGS}" + OPTIONAL_ZLIB_LIBS="${ZLIB_LIBS}" +else + with_zlib="no" +fi +AC_MSG_RESULT([${with_zlib}]) + + +# ----------------------------------------------------------------------------- +# libuuid + +PKG_CHECK_MODULES( + [UUID], + [uuid], + [have_uuid=yes], + [AC_MSG_ERROR([libuuid required but not found. Try installing 'uuid-dev' or 'libuuid-devel'.])] +) +OPTIONAL_UUID_CFLAGS="${UUID_CFLAGS}" +OPTIONAL_UUID_LIBS="${UUID_LIBS}" + +# ----------------------------------------------------------------------------- +# OpenSSL Cryptography and SSL/TLS Toolkit + +AC_CHECK_LIB( + [crypto], + [SHA256_Init], + [SSL_LIBS="-lcrypto -lssl"] +) + +AC_CHECK_LIB( + [crypto], + [X509_VERIFY_PARAM_set1_host], + [ssl_host_validation="yes"], + [ssl_host_validation="no"] +) + +test -z "${SSL_LIBS}" || \ + AC_DEFINE([HAVE_CRYPTO], [1], [libcrypto availability]) + +if test "${ssl_host_validation}" = "no"; then + AC_DEFINE([HAVE_X509_VERIFY_PARAM_set1_host], [0], [ssl host validation]) + AC_MSG_WARN([DISABLING SSL HOSTNAME VALIDATION BECAUSE IT IS NOT AVAILABLE ON THIS SYSTEM.]) +else + AC_DEFINE([HAVE_X509_VERIFY_PARAM_set1_host], [1], [ssl host validation]) +fi + +# ----------------------------------------------------------------------------- +# JSON-C library + +PKG_CHECK_MODULES([JSON],[json-c],AC_CHECK_LIB( + [json-c], + [json_object_get_type], + [JSONC_LIBS="-ljson-c"]),AC_CHECK_LIB( + [json], + [json_object_get_type], + [JSONC_LIBS="-ljson"]) + ) + +OPTIONAL_JSONC_LIBS="${JSONC_LIBS}" + +# ----------------------------------------------------------------------------- +# DB engine and HTTPS +test "${enable_dbengine}" = "yes" -a -z "${LZ4_LIBS}" && \ + AC_MSG_ERROR([liblz4 required but not found. Try installing 'liblz4-dev' or 'lz4-devel'.]) + +AC_C_BIGENDIAN([], + [LIBJUDY_CFLAGS="-DJU_LITTLE_ENDIAN"], + [AC_MSG_ERROR([Could not find out system endiannnes])]) + +AC_CHECK_SIZEOF(void *) +if test "$ac_cv_sizeof_void_p" = 8; then + AC_MSG_RESULT(Detected 64-bit Build Environment) + LIBJUDY_CFLAGS="$LIBJUDY_CFLAGS -DJU_64BIT" +else + AC_MSG_RESULT(Detected 32-bit Build Environment) + LIBJUDY_CFLAGS="$LIBJUDY_CFLAGS -UJU_64BIT" +fi + +AC_SUBST([LIBJUDY_CFLAGS]) + +JUDY_CFLAGS="-I \$(abs_top_srcdir)/libnetdata/libjudy/src" + +test "${enable_https}" = "yes" -a -z "${SSL_LIBS}" && \ + AC_MSG_ERROR([OpenSSL required for HTTPS but not found. Try installing 'libssl-dev' or 'openssl-devel'.]) + +test "${enable_dbengine}" = "yes" -a -z "${SSL_LIBS}" && \ + AC_MSG_ERROR([OpenSSL required for DBENGINE but not found. Try installing 'libssl-dev' or 'openssl-devel'.]) + +AC_MSG_CHECKING([if netdata dbengine should be used]) +if test "${enable_dbengine}" != "no" -a "${UV_LIBS}" -a "${LZ4_LIBS}" -a "${SSL_LIBS}"; then + enable_dbengine="yes" + AC_DEFINE([ENABLE_DBENGINE], [1], [netdata dbengine usability]) + OPTIONAL_LZ4_CFLAGS="${LZ4_CFLAGS}" + OPTIONAL_LZ4_LIBS="${LZ4_LIBS}" + OPTIONAL_SSL_CFLAGS="${SSL_CFLAGS}" + OPTIONAL_SSL_LIBS="${SSL_LIBS}" +else + enable_dbengine="no" +fi +AC_MSG_RESULT([${enable_dbengine}]) +AM_CONDITIONAL([ENABLE_DBENGINE], [test "${enable_dbengine}" = "yes"]) + +AC_MSG_CHECKING([if netdata https should be used]) +if test "${enable_https}" != "no" -a "${SSL_LIBS}"; then + enable_https="yes" + AC_DEFINE([ENABLE_HTTPS], [1], [netdata HTTPS usability]) + OPTIONAL_SSL_CFLAGS="${SSL_CFLAGS}" + OPTIONAL_SSL_LIBS="${SSL_LIBS}" +else + enable_https="no" +fi +AC_MSG_RESULT([${enable_https}]) +AM_CONDITIONAL([ENABLE_HTTPS], [test "${enable_https}" = "yes"]) + +AC_MSG_CHECKING([if netdata compression should be used]) +if test "${enable_compression}" != "no"; then + if test "${LZ4_LIBS_FAST}"; then + LIBS_BKP="${LIBS}" + LIBS="${LZ4_LIBS_FAST}" + AC_TRY_LINK( + [ #include ], + [ + LZ4_stream_t* stream = LZ4_initStream(NULL, 0); + ], + [ enable_compression="yes"], + [ enable_compression="no" ] + ) + LIBS="${LIBS_BKP}" + if test "${enable_compression}" == "yes"; then + OPTIONAL_LZ4_LIBS="${LZ4_LIBS_FAST}" + AC_DEFINE([ENABLE_COMPRESSION], [1], [netdata compression usability]) + fi + else + if test "${enable_compression}" == "yes"; then + AC_MSG_ERROR([liblz4 with version >= 1.9.0 required to enable_compression. Try installing the required version of 'liblz4-dev' or 'liblz4-devel'.]) + enable_compression="no" + fi + enable_compression="no" + fi +else + enable_compression="no" +fi +AC_MSG_RESULT([${enable_compression}]) +AM_CONDITIONAL([ENABLE_COMPRESSION], [test "${enable_compression}" = "yes"]) + +# ----------------------------------------------------------------------------- +# JSON-C + +if test "${enable_jsonc}" != "no" -a -z "${JSONC_LIBS}"; then + # Try and detect manual static build presence (from netdata-installer.sh) + AC_MSG_CHECKING([if statically built json-c is present]) + HAVE_libjson_c_a="no" + if test -f "externaldeps/jsonc/libjson-c.a"; then + LIBS_BKP="${LIBS}" + LIBS="externaldeps/jsonc/libjson-c.a" + AC_LINK_IFELSE([AC_LANG_SOURCE([[#include "externaldeps/jsonc/json-c/json.h" + int main (int argc, char **argv) { + struct json_object *jobj; + char *str = "{ \"msg-type\": \"random\" }"; + jobj = json_tokener_parse(str); + json_object_get_type(jobj); + }]])], + [HAVE_libjson_c_a="yes"], + [HAVE_libjson_c_a="no"]) + LIBS="${LIBS_BKP}" + fi + + if test "${HAVE_libjson_c_a}" = "yes"; then + AC_DEFINE([LINK_STATIC_JSONC], [1], [static json-c should be used]) + JSONC_LIBS="static" + OPTIONAL_JSONC_STATIC_CFLAGS="-I \$(abs_top_srcdir)/externaldeps/jsonc" + fi + AC_MSG_RESULT([${HAVE_libjson_c_a}]) +fi +AM_CONDITIONAL([LINK_STATIC_JSONC], [test "${JSONC_LIBS}" = "static"]) + +test "${enable_jsonc}" = "yes" -a -z "${JSONC_LIBS}" && \ + AC_MSG_ERROR([JSON-C required but not found. Try installing 'libjson-c-dev' or 'json-c'.]) + +AC_MSG_CHECKING([if json-c should be used]) +if test "${enable_jsonc}" != "no" -a "${JSONC_LIBS}"; then + enable_jsonc="yes" + AC_DEFINE([ENABLE_JSONC], [1], [netdata json-c usability]) +else + enable_jsonc="no" +fi +AC_MSG_RESULT([${enable_jsonc}]) +AM_CONDITIONAL([ENABLE_JSONC], [test "${enable_jsonc}" = "yes"]) + +# ----------------------------------------------------------------------------- +# compiler options + +AC_ARG_VAR([SSE_CANDIDATE], [C compiler flags for SSE]) +AS_CASE([$host_cpu], + [i?86], [SSE_CANDIDATE="yes"] +) +AC_SUBST([SSE_CANDIDATE]) +if test "${SSE_CANDIDATE}" = "yes" -a "${enable_x86_sse}" = "yes"; then + opt="-msse2 -mfpmath=sse" + AX_CHECK_COMPILE_FLAG(${opt}, [CFLAGS="${CFLAGS} ${opt}"], []) +fi + +if test "${GCC}" = "yes"; then + AC_DEFINE_UNQUOTED([likely(x)], [__builtin_expect(!!(x), 1)], [gcc branch optimization]) + AC_DEFINE_UNQUOTED([unlikely(x)], [__builtin_expect(!!(x), 0)], [gcc branch optimization]) +else + AC_DEFINE_UNQUOTED([likely(x)], [(x)], [gcc branch optimization]) + AC_DEFINE_UNQUOTED([unlikely(x)], [(x)], [gcc branch optimization]) +fi + +if test "${GCC}" = "yes"; then + AC_DEFINE([__always_unused], [__attribute__((unused))], [gcc unused attribute]) + AC_DEFINE([__maybe_unused], [__attribute__((unused))], [gcc unused attribute]) +else + AC_DEFINE([__always_unused], [], [dummy unused attribute]) + AC_DEFINE([__maybe_unused], [], [dummy unused attribute]) +fi + +if test "${enable_pedantic}" = "yes"; then + enable_strict="yes" + CFLAGS="${CFLAGS} -pedantic -Wall -Wextra -Wno-long-long" +fi + + +# ----------------------------------------------------------------------------- +# memory allocation library + +AC_MSG_CHECKING([for memory allocator]) +TS_CHECK_JEMALLOC +if test "$has_jemalloc" = "1"; then + AC_DEFINE([ENABLE_JEMALLOC], [1], [compile and link with jemalloc]) + AC_MSG_RESULT([jemalloc]) +else + TS_CHECK_TCMALLOC + if test "$has_tcmalloc" = "1"; then + AC_DEFINE([ENABLE_TCMALLOC], [1], [compile and link with tcmalloc]) + AC_MSG_RESULT([tcmalloc]) + else + AC_MSG_RESULT([system]) + AC_C_MALLOPT + AC_C_MALLINFO + fi +fi + + +# ----------------------------------------------------------------------------- +# libcap + +PKG_CHECK_MODULES( + [LIBCAP], + [libcap], + [AC_CHECK_LIB([cap], [cap_get_proc, cap_set_proc], + [AC_CHECK_HEADER( + [sys/capability.h], + [have_libcap=yes], + [have_libcap=no] + )], + [have_libcap=no] + )], + [have_libcap=no] +) +test "${with_libcap}" = "yes" -a "${have_libcap}" != "yes" && AC_MSG_ERROR([libcap required but not found.]) + +AC_MSG_CHECKING([if libcap should be used]) +if test "${with_libcap}" != "no" -a "${have_libcap}" = "yes"; then + with_libcap="yes" + AC_DEFINE([HAVE_CAPABILITY], [1], [libcap usability]) + OPTIONAL_LIBCAP_CFLAGS="${LIBCAP_CFLAGS}" + OPTIONAL_LIBCAP_LIBS="${LIBCAP_LIBS}" +else + with_libcap="no" +fi +AC_MSG_RESULT([${with_libcap}]) +AM_CONDITIONAL([ENABLE_CAPABILITY], [test "${with_libcap}" = "yes"]) + +# ----------------------------------------------------------------------------- +# ACLK + +bundled_proto_avail="no" +if test "${with_bundled_protobuf}" != "no"; then + AC_MSG_CHECKING([is bundled protobuf available]) + if test -f "externaldeps/protobuf/src/protoc"; then + bundled_proto_avail="yes" + fi + AC_MSG_RESULT([${bundled_proto_avail}]) + if test "${with_bundled_protobuf}" == "yes" -a "${bundled_proto_avail}" != "yes"; then + AC_MSG_ERROR([Bundled protobuf requested using --with-bundled-protobuf but it cannot be used/found]) + fi + if test "${with_bundled_protobuf}" == "detect" -a "${bundled_proto_avail}" == "yes"; then + with_bundled_protobuf="yes" + fi +fi + +if test "${with_bundled_protobuf}" != "yes"; then +PKG_CHECK_MODULES( + [PROTOBUF], + [protobuf >= 3], + [have_libprotobuf=yes], + [have_libprotobuf=no] +) + +AC_PATH_PROG([PROTOC], [protoc], [no]) +AS_IF( + [test x"${PROTOC}" == x"no"], + [have_protoc=no], + [have_protoc=yes] +) +else + AC_MSG_NOTICE([using bundled protobuf]) + AC_DEFINE([BUNDLED_PROTOBUF], [1], [Using a bundled copy of protobuf]) + PROTOC="\$(abs_top_srcdir)/externaldeps/protobuf/src/protoc" + PROTOBUF_CFLAGS="-I \$(abs_top_srcdir)/externaldeps/protobuf/src" + PROTOBUF_LIBS="\$(abs_top_srcdir)/externaldeps/protobuf/src/.libs/libprotobuf.a" + have_libprotobuf="yes" + have_protoc="yes" +fi + +AC_PATH_PROG([CXX_BINARY], [${CXX}], [no]) +AS_IF( + [test x"${CXX_BINARY}" == x"no"], + [have_CXX_compiler=no], + [have_CXX_compiler=yes] +) + +if test "${have_libprotobuf}" == "yes" && test "${have_CXX_compiler}" == "yes"; then + AC_DEFINE([HAVE_PROTOBUF], [1], [Protobuf is available]) +fi + +AC_MSG_CHECKING([if Cloud functionality should be enabled]) +AC_MSG_RESULT([${enable_cloud}]) + +if test "$enable_cloud" != "no"; then + AC_MSG_NOTICE([Checking if ACLK can be built]) + can_enable_ng="yes" + AC_MSG_CHECKING([if git submodules present for ACLK]) + if test -f "mqtt_websockets/src/mqtt_wss_client.c"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + can_enable_ng="no" + fi + AC_MSG_CHECKING([if SSL available for ACLK]) + if test -n "${SSL_LIBS}"; then + AC_MSG_RESULT([yes]) + OPTIONAL_SSL_CFLAGS="${SSL_CFLAGS}" + OPTIONAL_SSL_LIBS="${SSL_LIBS}" + else + AC_MSG_RESULT([no]) + fi + AC_MSG_CHECKING([if JSON-C available for ACLK]) + if test "$enable_jsonc" != "yes"; then + AC_MSG_RESULT([no]) + can_enable_ng="no" + else + AC_MSG_RESULT([yes]) + fi + + AC_MSG_CHECKING([if protobuf available for ACLK New Cloud Protocol]) + if test "${have_libprotobuf}" != "yes"; then + AC_MSG_RESULT([no]) + can_enable_ng="no" + else + AC_MSG_RESULT([yes]) + fi + AC_MSG_CHECKING([if protoc available for ACLK New Cloud Protocol]) + if test "${have_protoc}" != "yes"; then + AC_MSG_RESULT([no]) + can_enable_ng="no" + else + AC_MSG_RESULT([yes]) + fi + AC_MSG_CHECKING([if C++ compiler available for ACLK New Cloud Protocol]) + if test "${have_CXX_compiler}" != "yes"; then + AC_MSG_RESULT([no]) + can_enable_ng="no" + else + AC_MSG_RESULT([yes]) + fi + + AC_MSG_CHECKING([ACLK Next Generation can be built]) + AC_MSG_RESULT([${can_enable_ng}]) + if test "$can_enable_ng" = "no" -a "$enable_cloud" = "yes"; then + AC_MSG_ERROR([You have requested --with-cloud but ACLK can't be built. See reasons in lines above]) + fi + if test "$can_enable_ng" = "yes"; then + enable_aclk="yes" + AC_DEFINE([ENABLE_ACLK], [1], [netdata ACLK]) + OPTIONAL_ACLK_CFLAGS="-I \$(abs_top_srcdir)/mqtt_websockets/src/include -I \$(abs_top_srcdir)/mqtt_websockets/c-rbuf/include -I \$(abs_top_srcdir)/mqtt_websockets/MQTT-C/include -I \$(abs_top_srcdir)/aclk/aclk-schemas" + OPTIONAL_PROTOBUF_CFLAGS="${PROTOBUF_CFLAGS}" + CXX11FLAG="-std=c++11" + OPTIONAL_PROTOBUF_LIBS="${PROTOBUF_LIBS}" + fi +fi + +if test "$enable_cloud" = "yes" -a "$enable_aclk" != "yes"; then + AC_MSG_ERROR([ACLK can't be built but --enable-cloud was requested]) +fi + +AC_SUBST([enable_cloud]) +AC_SUBST([enable_aclk]) +AM_CONDITIONAL([ENABLE_ACLK], [test "${enable_aclk}" = "yes"]) + +# ----------------------------------------------------------------------------- +# apps.plugin + +AC_MSG_CHECKING([if apps.plugin should be enabled]) +if test "${build_target}" != "macos"; then + AC_DEFINE([ENABLE_APPS_PLUGIN], [1], [apps.plugin]) + enable_plugin_apps="yes" +else + enable_plugin_apps="no" +fi +AC_MSG_RESULT([${enable_plugin_apps}]) +AM_CONDITIONAL([ENABLE_PLUGIN_APPS], [test "${enable_plugin_apps}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# freeipmi.plugin - libipmimonitoring + +PKG_CHECK_MODULES( + [IPMIMONITORING], + [libipmimonitoring], + [AC_CHECK_LIB([ipmimonitoring], [ + ipmi_monitoring_sensor_readings_by_record_id, + ipmi_monitoring_sensor_readings_by_sensor_type, + ipmi_monitoring_sensor_read_sensor_number, + ipmi_monitoring_sensor_read_sensor_name, + ipmi_monitoring_sensor_read_sensor_state, + ipmi_monitoring_sensor_read_sensor_units, + ipmi_monitoring_sensor_iterator_next, + ipmi_monitoring_ctx_sensor_config_file, + ipmi_monitoring_ctx_sdr_cache_directory, + ipmi_monitoring_ctx_errormsg, + ipmi_monitoring_ctx_create + ], + [AC_CHECK_HEADER( + [ipmi_monitoring.h], + [AC_CHECK_HEADER( + [ipmi_monitoring_bitmasks.h], + [have_ipmimonitoring=yes], + [have_ipmimonitoring=no] + )], + [have_ipmimonitoring=no] + )], + [have_ipmimonitoring=no] + )], + [have_ipmimonitoring=no] +) +test "${enable_plugin_freeipmi}" = "yes" -a "${have_ipmimonitoring}" != "yes" && \ + AC_MSG_ERROR([ipmimonitoring required but not found. Try installing 'libipmimonitoring-dev' or 'libipmimonitoring-devel']) + +AC_MSG_CHECKING([if freeipmi.plugin should be enabled]) +if test "${enable_plugin_freeipmi}" != "no" -a "${have_ipmimonitoring}" = "yes"; then + enable_plugin_freeipmi="yes" + AC_DEFINE([HAVE_FREEIPMI], [1], [ipmimonitoring usability]) + OPTIONAL_IPMIMONITORING_CFLAGS="${IPMIMONITORING_CFLAGS}" + OPTIONAL_IPMIMONITORING_LIBS="${IPMIMONITORING_LIBS}" +else + enable_plugin_freeipmi="no" +fi +AC_MSG_RESULT([${enable_plugin_freeipmi}]) +AM_CONDITIONAL([ENABLE_PLUGIN_FREEIPMI], [test "${enable_plugin_freeipmi}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# cups.plugin - libcups + +# Only check most recently added method of cups +AC_CHECK_LIB([cups], [httpConnect2], + [AC_CHECK_HEADER( + [cups/cups.h], + [have_cups=yes], + [have_cups=no] + )], + [have_cups=no] +) + +test "${enable_plugin_cups}" = "yes" -a "${have_cups}" != "yes" && \ + AC_MSG_ERROR([cups required but not found. Try installing 'cups']) + +AC_ARG_WITH([cups-config], + [AS_HELP_STRING([--with-cups-config=path], [Specify path to cups-config executable.])], + [with_cups_config="$withval"], + [with_cups_config=system] + ) + +AS_IF([test "x$with_cups_config" != "xsystem"], [ + CUPSCONFIG=$with_cups_config +], [ + AC_PATH_TOOL(CUPSCONFIG, [cups-config]) + AS_IF([test -z "$CUPSCONFIG"], [ + have_cups=no + ]) +]) + +AC_MSG_CHECKING([if cups.plugin should be enabled]) +if test "${enable_plugin_cups}" != "no" -a "${have_cups}" = "yes"; then + enable_plugin_cups="yes" + AC_DEFINE([HAVE_CUPS], [1], [cups usability]) + + CUPS_CFLAGS="${CUPS_CFLAGS} `$CUPSCONFIG --cflags`" + CUPS_LIBS="${CUPS_LIBS} `$CUPSCONFIG --libs`" + + OPTIONAL_CUPS_CFLAGS="${CUPS_CFLAGS}" + OPTIONAL_CUPS_LIBS="${CUPS_LIBS}" +else + enable_plugin_cups="no" +fi +AC_MSG_RESULT([${enable_plugin_cups}]) +AM_CONDITIONAL([ENABLE_PLUGIN_CUPS], [test "${enable_plugin_cups}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# nfacct.plugin - libmnl, libnetfilter_acct + +AC_CHECK_HEADER( + [linux/netfilter/nfnetlink_conntrack.h], + [AC_CHECK_DECL( + [CTA_STATS_MAX], + [have_nfnetlink_conntrack=yes], + [have_nfnetlink_conntrack=no], + [#include ] + )], + [have_nfnetlink_conntrack=no] +) + +PKG_CHECK_MODULES( + [NFACCT], + [libnetfilter_acct], + [AC_CHECK_LIB( + [netfilter_acct], + [nfacct_alloc], + [have_libnetfilter_acct=yes], + [have_libnetfilter_acct=no] + )], + [have_libnetfilter_acct=no] +) + +PKG_CHECK_MODULES( + [LIBMNL], + [libmnl], + [AC_CHECK_LIB( + [mnl], + [mnl_socket_open], + [have_libmnl=yes], + [have_libmnl=no] + )], + [have_libmnl=no] +) + +test "${enable_plugin_nfacct}" = "yes" -a "${have_nfnetlink_conntrack}" != "yes" && \ + AC_MSG_ERROR([nfnetlink_conntrack.h required but not found or too old]) + +test "${enable_plugin_nfacct}" = "yes" -a "${have_libnetfilter_acct}" != "yes" && \ + AC_MSG_ERROR([netfilter_acct required but not found]) + +test "${enable_plugin_nfacct}" = "yes" -a "${have_libmnl}" != "yes" && \ + AC_MSG_ERROR([libmnl required but not found. Try installing 'libmnl-dev' or 'libmnl-devel']) + +AC_MSG_CHECKING([if nfacct.plugin should be enabled]) +if test "${enable_plugin_nfacct}" != "no" -a "${have_libnetfilter_acct}" = "yes" \ + -a "${have_libmnl}" = "yes" \ + -a "${have_nfnetlink_conntrack}" = "yes"; then + enable_plugin_nfacct="yes" + AC_DEFINE([HAVE_NFACCT], [1], [netfilter accounting usability]) + OPTIONAL_NFACCT_CFLAGS="${NFACCT_CFLAGS} ${LIBMNL_CFLAGS}" + OPTIONAL_NFACCT_LIBS="${NFACCT_LIBS} ${LIBMNL_LIBS}" +else + enable_plugin_nfacct="no" +fi +AC_MSG_RESULT([${enable_plugin_nfacct}]) +AM_CONDITIONAL([ENABLE_PLUGIN_NFACCT], [test "${enable_plugin_nfacct}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# xenstat.plugin - libxenstat + +PKG_CHECK_MODULES( + [YAJL], + [yajl], + [AC_CHECK_LIB( + [yajl], + [yajl_tree_get], + [have_libyajl=yes], + [have_libyajl=no] + )], + [have_libyajl=no] +) + +AC_CHECK_LIB( + [xenstat], + [xenstat_init], + [AC_CHECK_HEADER( + [xenstat.h], + [have_libxenstat=yes], + [have_libxenstat=no] + )], + [have_libxenstat=no], + [-lyajl] +) + +PKG_CHECK_MODULES( + [XENLIGHT], + [xenlight], + [AC_CHECK_LIB( + [xenlight], + [libxl_domain_info], + [AC_CHECK_HEADER( + [libxl.h], + [have_libxenlight=yes], + [have_libxenlight=no] + )], + [have_libxenlight=no] + )], + [have_libxenlight=no] +) + +test "${enable_plugin_xenstat}" = "yes" -a "${have_libxenstat}" != "yes" && \ + AC_MSG_ERROR([libxenstat required but not found. try installing 'xen-dom0-libs-devel']) + +test "${enable_plugin_xenstat}" = "yes" -a "${have_libxenlight}" != "yes" && \ + AC_MSG_ERROR([libxenlight required but not found. try installing 'xen-dom0-libs-devel']) + +test "${enable_plugin_xenstat}" = "yes" -a "${have_libyajl}" != "yes" && \ + AC_MSG_ERROR([libyajl required but not found. Try installing 'yajl-devel']) + +AC_MSG_CHECKING([if xenstat.plugin should be enabled]) +if test "${enable_plugin_xenstat}" != "no" -a "${have_libxenstat}" = "yes" -a "${have_libxenlight}" = "yes" -a "${have_libyajl}" = "yes"; then + enable_plugin_xenstat="yes" + AC_DEFINE([HAVE_LIBXENSTAT], [1], [libxenstat usability]) + OPTIONAL_XENSTAT_CFLAGS="${XENLIGHT_CFLAGS} ${YAJL_CFLAGS}" + OPTIONAL_XENSTAT_LIBS="-lxenstat ${XENLIGHT_LIBS} ${YAJL_LIBS}" +else + enable_plugin_xenstat="no" +fi +AC_MSG_RESULT([${enable_plugin_xenstat}]) +AM_CONDITIONAL([ENABLE_PLUGIN_XENSTAT], [test "${enable_plugin_xenstat}" = "yes"]) + +if test "${enable_plugin_xenstat}" == "yes"; then + AC_MSG_CHECKING([for xenstat_vbd_error in -lxenstat]) + AC_TRY_LINK( + [ #include ], + [ + xenstat_vbd * vbd; + int out = xenstat_vbd_error(vbd); + ], + [ + have_xenstat_vbd_error=yes + AC_DEFINE([HAVE_XENSTAT_VBD_ERROR], [1], [xenstat_vbd_error usability]) + ], + [ have_xenstat_vbd_error=no ] + ) + AC_MSG_RESULT([${have_xenstat_vbd_error}]) +fi + +# ----------------------------------------------------------------------------- +# perf.plugin + +AC_CHECK_HEADER( + [linux/perf_event.h], + [AC_CHECK_DECL( + [PERF_COUNT_HW_REF_CPU_CYCLES], + [have_perf_event=yes], + [have_perf_event=no], + [#include ] + )], + [have_perf_event=no] +) + +AC_MSG_CHECKING([if perf.plugin should be enabled]) +if test "${build_target}" == "linux" -a "${have_perf_event}" = "yes"; then + AC_DEFINE([ENABLE_PERF_PLUGIN], [1], [perf.plugin]) + enable_plugin_perf="yes" +else + enable_plugin_perf="no" +fi +AC_MSG_RESULT([${enable_plugin_perf}]) +AM_CONDITIONAL([ENABLE_PLUGIN_PERF], [test "${enable_plugin_perf}" = "yes"]) + +# ----------------------------------------------------------------------------- +# gtest/gmock + +AC_MSG_CHECKING([if gtest and gmock can be found]) + +PKG_CHECK_MODULES([GTEST], [gtest], [have_gtest=yes], [have_gtest=no]) +PKG_CHECK_MODULES([GMOCK], [gmock], [have_gmock=yes], [have_gmock=no]) + +if test "${have_gtest}" = "yes" -a "${have_gmock}" = "yes"; then + OPTIONAL_GTEST_CFLAGS="${GTEST_CFLAGS} ${GMOCK_CFLAGS}" + OPTIONAL_GTEST_LIBS="${GTEST_LIBS} ${GMOCK_LIBS}" + have_gtest="yes" +else + have_gtest="no" +fi + +# ----------------------------------------------------------------------------- +# ml - anomaly detection + +# Check if uuid is available. Fail if ML was explicitly requested. +if test "${enable_ml}" = "yes" -a "${have_uuid}" != "yes"; then + AC_MSG_ERROR([You have explicitly requested --enable-ml functionality but libuuid can not be found."]) +fi + +# Check if submodules have not been fetched. Fail if ML was explicitly requested. +AC_MSG_CHECKING([if git submodules are present for machine learning functionality]) +if test -f "ml/dlib/dlib/all/source.cpp" -a -f "ml/json/single_include/nlohmann/json.hpp"; then + AC_MSG_RESULT([yes]) + have_ml_submodules="yes" +else + AC_MSG_RESULT([no]) + have_ml_submodules="no" +fi + +if test "${enable_ml}" = "yes" -a "${have_ml_submodules}" = "no"; then + AC_MSG_ERROR([You have explicitly requested --enable-ml functionality but it cannot be built because the required git submodules are missing.]) +fi + +# Check if C++ toolchain does not support C++11. Fail if ML was explicitly requested. +AC_LANG_PUSH([C++]) +AX_CHECK_COMPILE_FLAG([-std=c++11], [have_cxx11=yes], [have_cxx11=no]) +AC_LANG_POP([C++]) + +# PPC64LE needs -std=gnu++11 in order to build dlib. However, the rest of +# the agent's components use and have been tested only with -std=c++11. +# Skip ML compilation on that CPU until we reorganize and test the C++ flags. +if test "${host_cpu}" = "powerpc64le"; then + have_cxx11="no" +fi + +if test "${enable_ml}" = "yes" -a "${have_cxx11}" = "no"; then + AC_MSG_ERROR([You have explicitly requested --enable-ml functionality but it cannot be built without a C++11 toolchain.]) +else + CXX11FLAG="$CXX11FLAG -std=c++11" +fi + +# Decide if we should build ML +if test "${enable_ml}" != "no" -a "${have_ml_submodules}" = "yes" -a "${have_cxx11}" = "yes" -a "${have_uuid}" = "yes"; then + build_ml="yes" +else + build_ml="no" +fi + +AM_CONDITIONAL([ENABLE_ML], [test "${build_ml}" = "yes"]) +if test "${build_ml}" = "yes"; then + AC_DEFINE([ENABLE_ML], [1], [anomaly detection usability]) + OPTIONAL_ML_CFLAGS="-DDLIB_NO_GUI_SUPPORT -I \$(abs_top_srcdir)/ml/dlib" + OPTIONAL_ML_LIBS="" +fi + +# Decide if we should build ML tests. +if test "${build_ml}" = "yes" -a "${enable_ml_tests}" = "yes" -a "${have_gtest}" = "yes"; then + build_ml_tests="yes" +else + build_ml_tests="no" +fi + +AM_CONDITIONAL([ENABLE_ML_TESTS], [test "${build_ml_tests}" = "yes"]) +if test "${build_ml_tests}" = "yes"; then + AC_DEFINE([ENABLE_ML_TESTS], [1], [anomaly detection tests]) + OPTIONAL_ML_TESTS_CFLAGS="${OPTIONAL_GTEST_CFLAGS}" + OPTIONAL_ML_TESTS_LIBS="${OPTIONAL_GTEST_LIBS}" +fi + +# ----------------------------------------------------------------------------- +# ebpf.plugin + +if test "${build_target}" = "linux" -a "${enable_ebpf}" != "no"; then + PKG_CHECK_MODULES( + [LIBELF], + [libelf], + [have_libelf=yes], + [have_libelf=no] + ) + + AC_CHECK_TYPE( + [struct bpf_prog_info], + [have_bpf=yes], + [have_bpf=no], + [#include ] + ) + + AC_CHECK_FILE( + externaldeps/libbpf/libbpf.a, + [have_libbpf=yes], + [have_libbpf=no] + ) + + if test "${have_libelf}" = "yes" -a \ + "${have_bpf}" = "yes" -a \ + "${have_libbpf}" = "yes"; then + OPTIONAL_BPF_CFLAGS="${LIBELF_CFLAGS} -I \$(abs_top_srcdir)/externaldeps/libbpf/include -I \$(abs_top_srcdir)/externaldeps/libbpf/include/uapi" + OPTIONAL_BPF_LIBS="\$(abs_top_srcdir)/externaldeps/libbpf/libbpf.a ${LIBELF_LIBS}" + AC_DEFINE([HAVE_LIBBPF], [1], [libbpf usability]) + enable_ebpf="yes" + else + enable_ebpf="no" + fi +else + enable_ebpf="no" +fi +AC_MSG_CHECKING([if ebpf.plugin should be enabled]) +AC_MSG_RESULT([${enable_ebpf}]) +AM_CONDITIONAL([ENABLE_PLUGIN_EBPF], [test "${enable_ebpf}" = "yes"]) + +# ----------------------------------------------------------------------------- +# slabinfo.plugin + +AC_MSG_CHECKING([if slabinfo.plugin should be enabled]) +if test "${build_target}" == "linux"; then + AC_DEFINE([ENABLE_SLABINFO], [1], [slabinfo plugin]) + enable_plugin_slabinfo="yes" +else + enable_plugin_slabinfo="no" +fi +AC_MSG_RESULT([${enable_plugin_slabinfo}]) +AM_CONDITIONAL([ENABLE_PLUGIN_SLABINFO], [test "${enable_plugin_slabinfo}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# AWS Kinesis exporting connector - libaws-cpp-sdk-kinesis, libaws-cpp-sdk-core, libssl, libcrypto, libcurl + +PKG_CHECK_MODULES( + [LIBCRYPTO], + [libcrypto], + [AC_CHECK_LIB( + [crypto], + [CRYPTO_new_ex_data], + [have_libcrypto=yes], + [have_libcrypto=no] + )], + [have_libcrypto=no] +) + +PKG_CHECK_MODULES( + [LIBSSL], + [libssl], + [AC_CHECK_LIB( + [ssl], + [SSL_connect], + [have_libssl=yes], + [have_libssl=no] + )], + [have_libssl=no] +) + +PKG_CHECK_MODULES( + [LIBCURL], + [libcurl], + [AC_CHECK_LIB( + [curl], + [curl_easy_init], + [have_libcurl=yes], + [have_libcurl=no] + )], + [have_libcurl=no] +) + +PKG_CHECK_MODULES( + [AWS_CPP_SDK_CORE], + [aws-cpp-sdk-core], + [have_libaws_cpp_sdk_core=yes], + [have_libaws_cpp_sdk_core=no] +) + +PKG_CHECK_MODULES( + [AWS_CPP_SDK_KINESIS], + [aws-cpp-sdk-kinesis], + [have_libaws_cpp_sdk_kinesis=yes], + [have_libaws_cpp_sdk_kinesis=no] +) + +test "${enable_exporting_kinesis}" = "yes" -a "${have_libaws_cpp_sdk_kinesis}" != "yes" && \ + AC_MSG_ERROR([libaws-cpp-sdk-kinesis required but not found. try installing AWS C++ SDK]) + +test "${enable_exporting_kinesis}" = "yes" -a "${have_libaws_cpp_sdk_core}" != "yes" && \ + AC_MSG_ERROR([libaws-cpp-sdk-core required but not found. try installing AWS C++ SDK]) + +test "${enable_exporting_kinesis}" = "yes" -a "${have_libcurl}" != "yes" && \ + AC_MSG_ERROR([libcurl required but not found]) + +test "${enable_exporting_kinesis}" = "yes" -a "${have_libssl}" != "yes" && \ + AC_MSG_ERROR([libssl required but not found]) + +test "${enable_exporting_kinesis}" = "yes" -a "${have_libcrypto}" != "yes" && \ + AC_MSG_ERROR([libcrypto required but not found]) + +AC_MSG_CHECKING([if kinesis exporting connector should be enabled]) +if test "${enable_exporting_kinesis}" != "no" -a "${have_libaws_cpp_sdk_kinesis}" = "yes" \ + -a "${have_libaws_cpp_sdk_core}" = "yes" \ + -a "${have_libcurl}" = "yes" \ + -a "${have_libssl}" = "yes" \ + -a "${have_libcrypto}" = "yes"; then + enable_exporting_kinesis="yes" + AC_DEFINE([HAVE_KINESIS], [1], [libaws-cpp-sdk-kinesis usability]) + OPTIONAL_KINESIS_CFLAGS="${LIBCRYPTO_CFLAGS} ${LIBSSL_CFLAGS} ${LIBCURL_CFLAGS}" + CXX11FLAG="${AWS_CPP_SDK_KINESIS_CFLAGS} ${AWS_CPP_SDK_CORE_CFLAGS}" + OPTIONAL_KINESIS_LIBS="${AWS_CPP_SDK_KINESIS_LIBS} ${AWS_CPP_SDK_CORE_LIBS} \ + ${LIBCRYPTO_LIBS} ${LIBSSL_LIBS} ${LIBCURL_LIBS}" +else + enable_exporting_kinesis="no" +fi + +AC_MSG_RESULT([${enable_exporting_kinesis}]) +AM_CONDITIONAL([ENABLE_EXPORTING_KINESIS], [test "${enable_exporting_kinesis}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# Pub/Sub exporting connector - googleapis + +PKG_CHECK_MODULES( + [GRPC], + [grpc], + [have_libgrpc=yes], + [have_libgrpc=no] +) + +if test "${enable_exporting_pubsub}" != "no"; then + PKG_CHECK_MODULES( + [PUBSUB], + [google_cloud_cpp_pubsub_protos], + [have_pubsub_protos=yes], + [have_pubsub_protos=no] + ) +fi + +AC_PATH_PROG([CXX_BINARY], [${CXX}], [no]) +AS_IF( + [test x"${CXX_BINARY}" == x"no"], + [have_CXX_compiler=no], + [have_CXX_compiler=yes] +) + +test "${enable_pubsub}" = "yes" -a "${have_grpc}" != "yes" && \ + AC_MSG_ERROR([libgrpc required but not found. try installing grpc]) + +test "${enable_pubsub}" = "yes" -a "${have_pubsub_protos}" != "yes" && \ + AC_MSG_ERROR([libgoogleapis_cpp_pubsub_protos required but not found. try installing googleapis]) + +test "${enable_exporting_prometheus_remote_write}" = "yes" -a "${have_CXX_compiler}" != "yes" && \ + AC_MSG_ERROR([C++ compiler required but not found. try installing g++]) + +AC_MSG_CHECKING([if pubsub exporting connector should be enabled]) +if test "${enable_exporting_pubsub}" != "no" -a "${have_pubsub_protos}" = "yes" -a "${have_CXX_compiler}" = "yes"; then + enable_exporting_pubsub="yes" + AC_DEFINE([ENABLE_EXPORTING_PUBSUB], [1], [Pub/Sub API usability]) + OPTIONAL_PUBSUB_CFLAGS="${GRPC_CFLAGS} ${PUBSUB_CFLAGS}" + CXX11FLAG="-std=c++11" + OPTIONAL_PUBSUB_LIBS="${GRPC_LIBS} ${PUBSUB_LIBS}" +else + enable_pubsub="no" +fi + +AC_MSG_RESULT([${enable_exporting_pubsub}]) +AM_CONDITIONAL([ENABLE_EXPORTING_PUBSUB], [test "${enable_exporting_pubsub}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# Prometheus remote write exporting connector - libprotobuf, libsnappy, protoc + +AC_MSG_CHECKING([for snappy::RawCompress in -lsnappy]) + + AC_LANG_SAVE + AC_LANG_CPLUSPLUS + save_LIBS="${LIBS}" + LIBS="-lsnappy" + save_CXXFLAGS="${CXXFLAGS}" + CXXFLAGS="${CXXFLAGS} -std=c++11" + + AC_TRY_LINK( + [ + #include + #include + ], + [ + const char *input = "test"; + size_t compressed_length; + char *buffer = (char *)malloc(5 * sizeof(char)); + snappy::RawCompress(input, 4, buffer, &compressed_length); + free(buffer); + ], + [ + have_libsnappy=yes + SNAPPY_CFLAGS="" + SNAPPY_LIBS="-lsnappy" + ], + [have_libsnappy=no] + ) + + LIBS="${save_LIBS}" + CXXFLAGS="${save_CXXFLAGS}" + AC_LANG_RESTORE + +AC_MSG_RESULT([${have_libsnappy}]) + +test "${enable_exporting_prometheus_remote_write}" = "yes" -a "${have_libprotobuf}" != "yes" && \ + AC_MSG_ERROR([libprotobuf required but not found. try installing protobuf]) + +test "${enable_exporting_prometheus_remote_write}" = "yes" -a "${have_libsnappy}" != "yes" && \ + AC_MSG_ERROR([libsnappy required but not found. try installing snappy]) + +test "${enable_exporting_prometheus_remote_write}" = "yes" -a "${have_protoc}" != "yes" && \ + AC_MSG_ERROR([protoc compiler required but not found. try installing protobuf]) + +test "${enable_exporting_prometheus_remote_write}" = "yes" -a "${have_CXX_compiler}" != "yes" && \ + AC_MSG_ERROR([C++ compiler required but not found. try installing g++]) + +AC_MSG_CHECKING([if prometheus remote write exporting connector should be enabled]) +if test "${enable_exporting_prometheus_remote_write}" != "no" -a "${have_libprotobuf}" = "yes" -a "${have_libsnappy}" = "yes" \ + -a "${have_protoc}" = "yes" -a "${have_CXX_compiler}" = "yes"; then + enable_exporting_prometheus_remote_write="yes" + AC_DEFINE([ENABLE_PROMETHEUS_REMOTE_WRITE], [1], [Prometheus remote write API usability]) + OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS="${SNAPPY_CFLAGS} -I \$(abs_top_srcdir)/exporting/prometheus/remote_write" + CXX11FLAG="-std=c++11" + OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS="${SNAPPY_LIBS}" + OPTIONAL_PROTOBUF_CFLAGS="${PROTOBUF_CFLAGS}" + OPTIONAL_PROTOBUF_LIBS="${PROTOBUF_LIBS}" +else + enable_exporting_prometheus_remote_write="no" +fi + +AC_MSG_RESULT([${enable_exporting_prometheus_remote_write}]) +AM_CONDITIONAL([ENABLE_EXPORTING_PROMETHEUS_REMOTE_WRITE], [test "${enable_exporting_prometheus_remote_write}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# MongoDB exporting connector - libmongoc + +PKG_CHECK_MODULES( + [LIBMONGOC], + [libmongoc-1.0 >= 1.7], + [have_libmongoc=yes], + [have_libmongoc=no] +) + +test "${enable_exporting_mongodb}" = "yes" -a "${have_libmongoc}" != "yes" && \ + AC_MSG_ERROR([libmongoc required but not found. Try installing `mongoc`.]) + +AC_MSG_CHECKING([if mongodb exporting connector should be enabled]) +if test "${enable_exporting_mongodb}" != "no" -a "${have_libmongoc}" = "yes"; then + enable_exporting_mongodb="yes" + AC_DEFINE([HAVE_MONGOC], [1], [libmongoc usability]) + OPTIONAL_MONGOC_CFLAGS="${LIBMONGOC_CFLAGS}" + OPTIONAL_MONGOC_LIBS="${LIBMONGOC_LIBS}" +else + enable_exporting_mongodb="no" +fi + +AC_MSG_RESULT([${enable_exporting_mongodb}]) +AM_CONDITIONAL([ENABLE_EXPORTING_MONGODB], [test "${enable_exporting_mongodb}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# check for setns() - cgroup-network + +AC_CHECK_FUNC([setns]) +AC_MSG_CHECKING([if cgroup-network can be enabled]) +if test "$ac_cv_func_setns" = "yes" ; then + have_setns="yes" + AC_DEFINE([HAVE_SETNS], [1], [Define 1 if you have setns() function]) +else + have_setns="no" +fi +AC_MSG_RESULT([${have_setns}]) +AM_CONDITIONAL([ENABLE_PLUGIN_CGROUP_NETWORK], [test "${have_setns}" = "yes"]) + + +# ----------------------------------------------------------------------------- +# Link-Time-Optimization + +if test "${enable_lto}" != "no"; then + opt="-flto" + AX_CHECK_COMPILE_FLAG(${opt}, [have_lto=yes], [have_lto=no]) +fi +if test "${have_lto}" = "yes"; then + oCFLAGS="${CFLAGS}" + CFLAGS="${CFLAGS} -flto" + ac_cv_c_lto_cross_compile="${enable_lto}" + test "${ac_cv_c_lto_cross_compile}" != "yes" && ac_cv_c_lto_cross_compile="no" + AC_C_LTO + CFLAGS="${oCFLAGS}" + test "${ac_cv_c_lto}" != "yes" && have_lto="no" +fi +test "${enable_lto}" = "yes" -a "${have_lto}" != "yes" && \ + AC_MSG_ERROR([LTO is required but is not available.]) +AC_MSG_CHECKING([if LTO should be enabled]) +if test "${enable_lto}" != "no" -a "${have_lto}" = "yes"; then + enable_lto="yes" + OPTIONAL_LTO_CFLAGS="-flto" +else + enable_lto="no" +fi +AC_MSG_RESULT([${enable_lto}]) + + +# ----------------------------------------------------------------------------- + +# Try to unconditionally link with -latomic. If the compiler can satisfy +# all the atomic ops with builtins then, the library will be left unused. +# Otherwise, some ops will be covered by the compiler's intrinsics and some +# will be picked up by the linker from -latomic. In the later case, if +# -latomic is not available there will be a build failure, which would +# have happened either way before this change. +AC_LANG_PUSH([C++]) + +AC_MSG_CHECKING(whether we can use -latomic) +OLD_LIBS="${LIBS}" +LIBS="-latomic" +AC_LINK_IFELSE([AC_LANG_SOURCE([[ + #include + #include + std::atomic v; + int main() { + return v; + } +]])], CAN_USE_LIBATOMIC=yes, CAN_USE_LIBATOMIC=no) +LIBS="${OLD_LIBS}" +AC_MSG_RESULT($CAN_USE_LIBATOMIC) + +if test "x$CAN_USE_LIBATOMIC" = xyes; then + OPTIONAL_ATOMIC_LIBS="-latomic" +fi +AC_SUBST([OPTIONAL_ATOMIC_LIBS]) + +AC_LANG_POP([C++]) + +# ----------------------------------------------------------------------------- + +AC_MSG_CHECKING(whether we can use dlsym) +OLD_LIBS="${LIBS}" +LIBS="-ldl" +AC_LINK_IFELSE([AC_LANG_SOURCE([[ + #include + static void *(*libc_malloc)(size_t); + int main() { + libc_malloc = dlsym(RTLD_NEXT, "malloc"); + } +]])], CAN_USE_DLSYM=yes, CAN_USE_DLSYM=no) +LIBS="${OLD_LIBS}" +AC_MSG_RESULT($CAN_USE_DLSYM) + +if test "x$CAN_USE_DLSYM" = xyes; then + AC_DEFINE([HAVE_DLSYM], [1], [dlsym usability]) + OPTIONAL_DL_LIBS="-ldl" +fi +AC_SUBST([OPTIONAL_DL_LIBS]) + +# ----------------------------------------------------------------------------- + + +AC_DEFINE_UNQUOTED([NETDATA_USER], ["${with_user}"], [use this user to drop privileged]) + +varlibdir="${localstatedir}/lib/netdata" +registrydir="${localstatedir}/lib/netdata/registry" +cachedir="${localstatedir}/cache/netdata" +chartsdir="${libexecdir}/netdata/charts.d" +pythondir="${libexecdir}/netdata/python.d" +configdir="${sysconfdir}/netdata" +libconfigdir="${libdir}/netdata/conf.d" +logdir="${localstatedir}/log/netdata" +pluginsdir="${libexecdir}/netdata/plugins.d" +netdata_user="${with_user}" +libsysdir="${libdir}/netdata/system" + +AC_SUBST([varlibdir]) +AC_SUBST([registrydir]) +AC_SUBST([cachedir]) +AC_SUBST([chartsdir]) +AC_SUBST([pythondir]) +AC_SUBST([configdir]) +AC_SUBST([libconfigdir]) +AC_SUBST([logdir]) +AC_SUBST([pluginsdir]) +AC_SUBST([webdir]) +AC_SUBST([netdata_user]) +AC_SUBST([libsysdir]) + +CFLAGS="${originalCFLAGS} ${OPTIONAL_LTO_CFLAGS} ${OPTIONAL_PROTOBUF_CFLAGS} ${OPTIONAL_MATH_CFLAGS} ${OPTIONAL_NFACCT_CFLAGS} \ + ${OPTIONAL_ZLIB_CFLAGS} ${OPTIONAL_UUID_CFLAGS} \ + ${OPTIONAL_LIBCAP_CFLAGS} ${OPTIONAL_IPMIMONITORING_CFLAGS} ${OPTIONAL_CUPS_CFLAGS} ${OPTIONAL_XENSTAT_FLAGS} \ + ${OPTIONAL_KINESIS_CFLAGS} ${OPTIONAL_PUBSUB_CFLAGS} ${OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS} \ + ${OPTIONAL_MONGOC_CFLAGS} ${LWS_CFLAGS} ${OPTIONAL_JSONC_STATIC_CFLAGS} ${OPTIONAL_BPF_CFLAGS} ${JUDY_CFLAGS} \ + ${OPTIONAL_ACLK_CFLAGS} ${OPTIONAL_ML_CFLAGS} ${OPTIONAL_ML_TESTS_CFLAGS} ${OPTIONAL_OS_DEP_CFLAGS}" + +CXXFLAGS="${CFLAGS} ${CXX11FLAG}" + +CPPFLAGS="\ + -DVARLIB_DIR=\"\\\"${varlibdir}\\\"\" \ + -DCACHE_DIR=\"\\\"${cachedir}\\\"\" \ + -DCONFIG_DIR=\"\\\"${configdir}\\\"\" \ + -DLIBCONFIG_DIR=\"\\\"${libconfigdir}\\\"\" \ + -DLOG_DIR=\"\\\"${logdir}\\\"\" \ + -DPLUGINS_DIR=\"\\\"${pluginsdir}\\\"\" \ + -DRUN_DIR=\"\\\"${localstatedir}/run/netdata\\\"\" \ + -DWEB_DIR=\"\\\"${webdir}\\\"\" \ +" + +AC_SUBST([OPTIONAL_MATH_CFLAGS]) +AC_SUBST([OPTIONAL_MATH_LIBS]) +AC_SUBST([OPTIONAL_UV_LIBS]) +AC_SUBST([OPTIONAL_LZ4_LIBS]) +AC_SUBST([OPTIONAL_SSL_LIBS]) +AC_SUBST([OPTIONAL_JSONC_LIBS]) +AC_SUBST([OPTIONAL_NFACCT_CFLAGS]) +AC_SUBST([OPTIONAL_NFACCT_LIBS]) +AC_SUBST([OPTIONAL_ZLIB_CFLAGS]) +AC_SUBST([OPTIONAL_ZLIB_LIBS]) +AC_SUBST([OPTIONAL_UUID_CFLAGS]) +AC_SUBST([OPTIONAL_UUID_LIBS]) +AC_SUBST([OPTIONAL_BPF_CFLAGS]) +AC_SUBST([OPTIONAL_BPF_LIBS]) +AC_SUBST([OPTIONAL_MQTT_LIBS]) +AC_SUBST([OPTIONAL_LIBCAP_CFLAGS]) +AC_SUBST([OPTIONAL_LIBCAP_LIBS]) +AC_SUBST([OPTIONAL_IPMIMONITORING_CFLAGS]) +AC_SUBST([OPTIONAL_IPMIMONITORING_LIBS]) +AC_SUBST([OPTIONAL_CUPS_CFLAGS]) +AC_SUBST([OPTIONAL_CUPS_LIBS]) +AC_SUBST([OPTIONAL_XENSTAT_CFLAGS]) +AC_SUBST([OPTIONAL_XENSTAT_LIBS]) +AC_SUBST([OPTIONAL_KINESIS_CFLAGS]) +AC_SUBST([OPTIONAL_KINESIS_LIBS]) +AC_SUBST([OPTIONAL_PUBSUB_CFLAGS]) +AC_SUBST([OPTIONAL_PUBSUB_LIBS]) +AC_SUBST([OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS]) +AC_SUBST([OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS]) +AC_SUBST([OPTIONAL_MONGOC_CFLAGS]) +AC_SUBST([OPTIONAL_MONGOC_LIBS]) +AC_SUBST([OPTIONAL_LWS_LIBS]) +AC_SUBST([OPTIONAL_ACLK_CFLAGS]) +AC_SUBST([OPTIONAL_PROTOBUF_CFLAGS]) +AC_SUBST([OPTIONAL_PROTOBUF_LIBS]) +AC_SUBST([OPTIONAL_GTEST_CFLAGS]) +AC_SUBST([OPTIONAL_GTEST_LIBS]) +AC_SUBST([OPTIONAL_ML_CFLAGS]) +AC_SUBST([OPTIONAL_ML_LIBS]) +AC_SUBST([OPTIONAL_ML_TESTS_CFLAGS]) +AC_SUBST([OPTIONAL_ML_TESTS_LIBS]) + +# ----------------------------------------------------------------------------- +# Check if cmocka is available - needed for unit testing + +AC_ARG_ENABLE( + [unit-tests], + [AS_HELP_STRING([--disable-unit-tests], + [Disables building and running the unit tests suite])], + [], + [enable_unit_tests="yes"] +) + + +PKG_CHECK_MODULES( + [CMOCKA], + [cmocka], + [have_cmocka="yes"], + [AC_MSG_NOTICE([CMocka not found on the system. Unit tests disabled])] +) +AM_CONDITIONAL([ENABLE_UNITTESTS], [test "${enable_unit_tests}" = "yes" -a "${have_cmocka}" = "yes" ]) +AC_SUBST([ENABLE_UNITTESTS]) + +TEST_CFLAGS="${CFLAGS} ${CMOCKA_CFLAGS}" +TEST_LIBS="${CMOCKA_LIBS}" + +AC_SUBST([TEST_CFLAGS]) +AC_SUBST([TEST_LIBS]) + +# ----------------------------------------------------------------------------- +# save configure options for build info +AC_DEFINE_UNQUOTED( + [CONFIGURE_COMMAND], + ["$ac_configure_args"], + [options passed to configure script] +) + +AC_CONFIG_FILES([ + Makefile + netdata.spec + collectors/Makefile + collectors/apps.plugin/Makefile + collectors/cgroups.plugin/Makefile + collectors/charts.d.plugin/Makefile + collectors/diskspace.plugin/Makefile + collectors/timex.plugin/Makefile + collectors/fping.plugin/Makefile + collectors/ioping.plugin/Makefile + collectors/freebsd.plugin/Makefile + collectors/freeipmi.plugin/Makefile + collectors/cups.plugin/Makefile + collectors/idlejitter.plugin/Makefile + collectors/macos.plugin/Makefile + collectors/nfacct.plugin/Makefile + collectors/plugins.d/Makefile + collectors/proc.plugin/Makefile + collectors/python.d.plugin/Makefile + collectors/slabinfo.plugin/Makefile + collectors/statsd.plugin/Makefile + collectors/ebpf.plugin/Makefile + collectors/tc.plugin/Makefile + collectors/xenstat.plugin/Makefile + collectors/perf.plugin/Makefile + daemon/Makefile + database/Makefile + database/engine/Makefile + database/ram/Makefile + database/sqlite/Makefile + diagrams/Makefile + exporting/Makefile + exporting/graphite/Makefile + exporting/json/Makefile + exporting/opentsdb/Makefile + exporting/prometheus/Makefile + exporting/prometheus/remote_write/Makefile + exporting/aws_kinesis/Makefile + exporting/pubsub/Makefile + exporting/mongodb/Makefile + exporting/tests/Makefile + health/Makefile + health/notifications/Makefile + libnetdata/Makefile + libnetdata/tests/Makefile + libnetdata/adaptive_resortable_list/Makefile + libnetdata/arrayalloc/Makefile + libnetdata/avl/Makefile + libnetdata/buffer/Makefile + libnetdata/clocks/Makefile + libnetdata/completion/Makefile + libnetdata/config/Makefile + libnetdata/dictionary/Makefile + libnetdata/ebpf/Makefile + libnetdata/eval/Makefile + libnetdata/locks/Makefile + libnetdata/log/Makefile + libnetdata/onewayalloc/Makefile + libnetdata/popen/Makefile + libnetdata/procfile/Makefile + libnetdata/simple_pattern/Makefile + libnetdata/socket/Makefile + libnetdata/statistical/Makefile + libnetdata/string/Makefile + libnetdata/storage_number/Makefile + libnetdata/storage_number/tests/Makefile + libnetdata/threads/Makefile + libnetdata/url/Makefile + libnetdata/json/Makefile + libnetdata/health/Makefile + libnetdata/worker_utilization/Makefile + registry/Makefile + streaming/Makefile + system/Makefile + tests/Makefile + web/Makefile + web/api/Makefile + web/api/badges/Makefile + web/api/exporters/Makefile + web/api/exporters/shell/Makefile + web/api/exporters/prometheus/Makefile + web/api/formatters/Makefile + web/api/formatters/csv/Makefile + web/api/formatters/json/Makefile + web/api/formatters/ssv/Makefile + web/api/formatters/value/Makefile + web/api/queries/Makefile + web/api/queries/average/Makefile + web/api/queries/countif/Makefile + web/api/queries/des/Makefile + web/api/queries/incremental_sum/Makefile + web/api/queries/max/Makefile + web/api/queries/median/Makefile + web/api/queries/min/Makefile + web/api/queries/percentile/Makefile + web/api/queries/ses/Makefile + web/api/queries/stddev/Makefile + web/api/queries/sum/Makefile + web/api/queries/trimmed_mean/Makefile + web/api/health/Makefile + web/gui/Makefile + web/gui/dashboard/Makefile + web/server/Makefile + web/server/static/Makefile + claim/Makefile + spawn/Makefile + parser/Makefile +]) + +AC_OUTPUT + +test "${with_math}" != "yes" && AC_MSG_WARN([You are building without math. math allows accurate calculations. It should be enabled.]) || : +test "${with_zlib}" != "yes" && AC_MSG_WARN([You are building without zlib. zlib allows netdata to transfer a lot less data with web clients. It should be enabled.]) || : diff --git a/contrib/README.md b/contrib/README.md new file mode 100644 index 0000000..36abd3b --- /dev/null +++ b/contrib/README.md @@ -0,0 +1,61 @@ + + +# Netdata contrib + +## Building .deb packages + +The `contrib/debian/` directory contains basic rules to build a +Debian package. It has been tested on Debian Jessie and Wheezy, +but should work, possibly with minor changes, if you have other +dpkg-based systems such as Ubuntu or Mint. + +To build Netdata for a Debian Jessie system, the debian directory +has to be available in the root of the Netdata source. The easiest +way to do this is with a symlink: + +```sh +ln -s contrib/debian +``` + +Edit the `debian/changelog` file to reflect the package version and +the build time: + +```sh +netdata (1.21.0) unstable; urgency=medium + + * Initial Release + + -- Netdata Builder Tue, 12 May 2020 10:36:52 +0200 +``` + +Then build the debian package: + +```sh +dpkg-buildpackage -us -uc -rfakeroot +``` + +This should give a package that can be installed in the parent +directory, which you can install manually with dpkg. + +```sh +ls -1 ../*.deb +../netdata_1.21.0_amd64.deb +../netdata-dbgsym_1.21.0_amd64.deb +../netdata-plugin-cups_1.21.0_amd64.deb +../netdata-plugin-cups-dbgsym_1.21.0_amd64.deb +../netdata-plugin-freeipmi_1.21.0_amd64.deb +../netdata-plugin-freeipmi-dbgsym_1.21.0_amd64.deb +sudo dpkg -i ../netdata_1.21.0_amd64.deb +``` + +### Reinstalling Netdata + +The recommended way to upgrade Netdata packages built from this +source is to remove the current package from your system, then +install the new package. Upgrading on wheezy is known to not +work cleanly; Jessie may behave as expected. + + diff --git a/contrib/debian/changelog b/contrib/debian/changelog new file mode 100644 index 0000000..5ac1edb --- /dev/null +++ b/contrib/debian/changelog @@ -0,0 +1,6 @@ +netdata (PREVIOUS_PACKAGE_VERSION) unstable; urgency=medium + + * Initial Release + + -- Netdata Builder PREVIOUS_PACKAGE_DATE + diff --git a/contrib/debian/compat b/contrib/debian/compat new file mode 100644 index 0000000..f599e28 --- /dev/null +++ b/contrib/debian/compat @@ -0,0 +1 @@ +10 diff --git a/contrib/debian/control b/contrib/debian/control new file mode 100644 index 0000000..c5e5791 --- /dev/null +++ b/contrib/debian/control @@ -0,0 +1,66 @@ +Source: netdata +Build-Depends: debhelper (>= 9.20160709), + dh-autoreconf, + dpkg-dev (>= 1.13.19), + zlib1g-dev, + uuid-dev, + libelf-dev, + libuv1-dev, + liblz4-dev, + libssl-dev, + libmnl-dev, + libjson-c-dev, + libcups2-dev, + libipmimonitoring-dev, + libnetfilter-acct-dev, + libsnappy-dev, + libprotobuf-dev, + libprotoc-dev, + cmake, + autogen, + autoconf, + automake, + pkg-config, + curl, + protobuf-compiler +Section: net +Priority: optional +Maintainer: Netdata Builder +Standards-Version: 3.9.6 +Homepage: https://netdata.cloud + +Package: netdata +Architecture: any +Depends: adduser, + libcap2-bin (>= 1:2.0), + lsb-base (>= 3.1-23.2), + openssl, + ${misc:Depends}, + ${shlibs:Depends} +Conflicts: netdata-core, + netdata-plugins-bash, + netdata-plugins-python, + netdata-web +Pre-Depends: dpkg (>= 1.17.14) +Description: real-time charts for system monitoring + Netdata is a daemon that collects data in realtime (per second) + and presents a web site to view and analyze them. The presentation + is also real-time and full of interactive charts that precisely + render all collected values. + +Package: netdata-plugin-cups +Architecture: any +Depends: cups, + ${shlibs:Depends}, + netdata (>= ${source:Version}) +Description: The Common Unix Printing System plugin for metrics collection from cupsd + +Package: netdata-plugin-freeipmi +Architecture: any +Depends: freeipmi, + ${shlibs:Depends}, + netdata (= ${source:Version}) +Description: FreeIPMI - The Intelligent Platform Management System. + The IPMI specification defines a set of interfaces for platform management. + It is implemented by a number vendors for system management. The features of IPMI that most users will be interested in + are sensor monitoring, system event monitoring, power control, and serial-over-LAN (SOL). diff --git a/contrib/debian/control.xenial b/contrib/debian/control.xenial new file mode 100644 index 0000000..43246d7 --- /dev/null +++ b/contrib/debian/control.xenial @@ -0,0 +1,61 @@ +Source: netdata +Build-Depends: debhelper (>= 9), + dh-autoreconf, + dh-systemd (>= 1.5), + dpkg-dev (>= 1.13.19), + zlib1g-dev, + uuid-dev, + libelf-dev, + libuv1-dev, + liblz4-dev, + libssl-dev, + libmnl-dev, + libjson-c-dev, + libcups2-dev, + libipmimonitoring-dev, + libnetfilter-acct-dev, + libsnappy-dev, + libprotobuf-dev, + libprotoc-dev, + cmake, + autogen, + autoconf, + automake, + pkg-config, + curl, + protobuf-compiler +Section: net +Priority: optional +Maintainer: Netdata Builder +Standards-Version: 3.9.6 +Homepage: https://netdata.cloud + +Package: netdata +Architecture: any +Depends: adduser, + libcap2-bin (>= 1:2.0), + lsb-base (>= 3.1-23.2), + openssl, + ${misc:Depends}, + ${shlibs:Depends} +Pre-Depends: dpkg (>= 1.17.14) +Description: real-time charts for system monitoring + Netdata is a daemon that collects data in realtime (per second) + and presents a web site to view and analyze them. The presentation + is also real-time and full of interactive charts that precisely + render all collected values. + +Package: netdata-plugin-cups +Architecture: any +Depends: cups, + netdata (>= ${source:Version}) +Description: The Common Unix Printing System plugin for metrics collection from cupsd + +Package: netdata-plugin-freeipmi +Architecture: any +Depends: freeipmi, + netdata (= ${source:Version}) +Description: FreeIPMI - The Intelligent Platform Management System. + The IPMI specification defines a set of interfaces for platform management. + It is implemented by a number vendors for system management. The features of IPMI that most users will be interested in + are sensor monitoring, system event monitoring, power control, and serial-over-LAN (SOL). diff --git a/contrib/debian/copyright b/contrib/debian/copyright new file mode 100644 index 0000000..085580e --- /dev/null +++ b/contrib/debian/copyright @@ -0,0 +1,10 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: Netdata +Upstream-Contact: Costa Tsaousis +Source: https://github.com/netdata/netdata + +Files: * +Copyright: 2014-2016, Costa Tsaousis +License: GPL-3+ + On Debian systems, the complete text of the GNU General Public + License version 3 can be found in /usr/share/common-licenses/GPL-3. diff --git a/contrib/debian/install_go.sh b/contrib/debian/install_go.sh new file mode 100755 index 0000000..4c4ee93 --- /dev/null +++ b/contrib/debian/install_go.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +GO_PACKAGE_VERSION="$1" +LIB_DIR="$2" +LIBEXEC_DIR="$3" + +# ############################################################ +# Package Go within netdata (TBD: Package it separately) +safe_sha256sum() { + # Within the context of the installer, we only use -c option that is common between the two commands + # We will have to reconsider if we start non-common options + if command -v sha256sum >/dev/null 2>&1; then + sha256sum $@ + elif command -v shasum >/dev/null 2>&1; then + shasum -a 256 $@ + else + fatal "I could not find a suitable checksum binary to use" + fi +} + +download_go() { + url="${1}" + dest="${2}" + + if command -v curl >/dev/null 2>&1; then + curl -sSL --connect-timeout 10 --retry 3 "${url}" > "${dest}" + elif command -v wget >/dev/null 2>&1; then + wget -T 15 -O - "${url}" > "${dest}" + else + echo >&2 + echo >&2 "Downloading go.d plugin from '${url}' failed because of missing mandatory packages." + echo >&2 "Either add packages or disable it by issuing '--disable-go' in the installer" + echo >&2 + exit 1 + fi +} + +install_go() { + ARCH_MAP=( + 'i386::386' + 'i686::386' + 'x86_64::amd64' + 'aarch64::arm64' + 'armv64::arm64' + 'armv6l::arm' + 'armv7l::arm' + 'armv5tel::arm' + ) + + if [ -z "${NETDATA_DISABLE_GO+x}" ]; then + echo >&2 "Install go.d.plugin" + ARCH=$(uname -m) + OS=$(uname -s | tr '[:upper:]' '[:lower:]') + + for index in "${ARCH_MAP[@]}" ; do + KEY="${index%%::*}" + VALUE="${index##*::}" + if [ "$KEY" = "$ARCH" ]; then + ARCH="${VALUE}" + break + fi + done + tmp=$(mktemp -d /tmp/netdata-go-XXXXXX) + GO_PACKAGE_BASENAME="go.d.plugin-${GO_PACKAGE_VERSION}.${OS}-${ARCH}.tar.gz" + download_go "https://github.com/netdata/go.d.plugin/releases/download/${GO_PACKAGE_VERSION}/${GO_PACKAGE_BASENAME}" "${tmp}/${GO_PACKAGE_BASENAME}" + download_go "https://github.com/netdata/go.d.plugin/releases/download/${GO_PACKAGE_VERSION}/config.tar.gz" "${tmp}/config.tar.gz" + + if [ ! -f "${tmp}/${GO_PACKAGE_BASENAME}" ] || [ ! -f "${tmp}/config.tar.gz" ] || [ ! -s "${tmp}/config.tar.gz" ] || [ ! -s "${tmp}/${GO_PACKAGE_BASENAME}" ]; then + echo >&2 "Either check the error or consider disabling it by issuing '--disable-go' in the installer" + echo >&2 + return 1 + fi + + grep "${GO_PACKAGE_BASENAME}\$" "packaging/go.d.checksums" > "${tmp}/sha256sums.txt" 2>/dev/null + grep "config.tar.gz" "packaging/go.d.checksums" >> "${tmp}/sha256sums.txt" 2>/dev/null + + # Checksum validation + if ! (cd "${tmp}" && safe_sha256sum -c "sha256sums.txt"); then + + echo >&2 "go.d plugin checksum validation failure." + echo >&2 "Either check the error or consider disabling it by issuing '--disable-go' in the installer" + echo >&2 + + echo "go.d.plugin package files checksum validation failed." + exit 1 + fi + + # Install files + tar -xf "${tmp}/config.tar.gz" -C "${LIB_DIR}/conf.d/" + tar xf "${tmp}/${GO_PACKAGE_BASENAME}" + mv "${GO_PACKAGE_BASENAME/\.tar\.gz/}" "${LIBEXEC_DIR}/plugins.d/go.d.plugin" + + rm -rf "${tmp}" + fi + return 0 +} + +install_go diff --git a/contrib/debian/netdata.default b/contrib/debian/netdata.default new file mode 100644 index 0000000..0bc847f --- /dev/null +++ b/contrib/debian/netdata.default @@ -0,0 +1,3 @@ +# Extra arguments to pass to netdata +# +EXTRA_OPTS="-P /var/run/netdata/netdata.pid" diff --git a/contrib/debian/netdata.docs b/contrib/debian/netdata.docs new file mode 100644 index 0000000..1b763b1 --- /dev/null +++ b/contrib/debian/netdata.docs @@ -0,0 +1 @@ +CHANGELOG.md diff --git a/contrib/debian/netdata.init b/contrib/debian/netdata.init new file mode 100755 index 0000000..c2706ca --- /dev/null +++ b/contrib/debian/netdata.init @@ -0,0 +1,56 @@ +#!/bin/sh +# Start/stop the netdata daemon. +# +### BEGIN INIT INFO +# Provides: netdata +# Required-Start: $remote_fs +# Required-Stop: $remote_fs +# Should-Start: $network +# Should-Stop: $network +# Default-Start: 2 3 4 5 +# Default-Stop: +# Short-Description: Real-time charts for system monitoring +# Description: Netdata is a daemon that collects data in realtime (per second) +# and presents a web site to view and analyze them. The presentation +# is also real-time and full of interactive charts that precisely +# render all collected values. +### END INIT INFO + +PATH=/bin:/usr/bin:/sbin:/usr/sbin +DESC="netdata daemon" +NAME=netdata +DAEMON=/usr/sbin/netdata +PIDFILE=/var/run/netdata/netdata.pid +SCRIPTNAME=/etc/init.d/"$NAME" + +test -f $DAEMON || exit 0 + +. /lib/lsb/init-functions + +[ -r /etc/default/netdata ] && . /etc/default/netdata + +case "$1" in +start) log_daemon_msg "Starting real-time system monitoring" "netdata" + start_daemon -p $PIDFILE $DAEMON -P $PIDFILE $EXTRA_OPTS + log_end_msg $? + ;; +stop) log_daemon_msg "Stopping real-time system monitoring" "netdata" + killproc -p $PIDFILE $DAEMON + RETVAL=$? + [ $RETVAL -eq 0 ] && [ -e "$PIDFILE" ] && rm -f $PIDFILE + log_end_msg $RETVAL + # wait for plugins to exit + sleep 1 + ;; +restart|force-reload) log_daemon_msg "Restarting real-time system monitoring" "netdata" + $0 stop + $0 start + ;; +status) + status_of_proc -p $PIDFILE $DAEMON $NAME && exit 0 || exit $? + ;; +*) log_action_msg "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" + exit 2 + ;; +esac +exit 0 diff --git a/contrib/debian/netdata.install b/contrib/debian/netdata.install new file mode 100644 index 0000000..45d42b6 --- /dev/null +++ b/contrib/debian/netdata.install @@ -0,0 +1 @@ +debian/netdata.conf /etc/netdata/ diff --git a/contrib/debian/netdata.lintian-overrides b/contrib/debian/netdata.lintian-overrides new file mode 100644 index 0000000..45b2d86 --- /dev/null +++ b/contrib/debian/netdata.lintian-overrides @@ -0,0 +1,16 @@ + +# See Debian policy 10.9. apps.plugin has extra capabilities, so don't let +# normal users run it. +netdata: non-standard-executable-perm usr/lib/*/netdata/plugins.d/apps.plugin 0754 != 0755 + + +# FontAwesome is at least in the fonts-font-awesome package, but this is +# not available in wheezy. glyphicons-halflings-regular isn't currently in +# a Debian package. Therefore don't complain about shipping them with netdata +# for the time being. +netdata: duplicate-font-file usr/share/netdata/fonts/* +netdata: font-in-non-font-package usr/share/netdata/fonts/* + +# Files here are marked as conffiles so that local updates to the html files +# isn't clobbered on upgrade. +netdata: non-etc-file-marked-as-conffile var/lib/netdata/www/* diff --git a/contrib/debian/netdata.postinst b/contrib/debian/netdata.postinst new file mode 100644 index 0000000..daea8cb --- /dev/null +++ b/contrib/debian/netdata.postinst @@ -0,0 +1,85 @@ +#!/bin/sh + +set -e + +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/.well-known /usr/share/netdata/www/.well-known 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/css /usr/share/netdata/www/css 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/fonts /usr/share/netdata/www/fonts 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/images /usr/share/netdata/www/images 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/lib /usr/share/netdata/www/lib 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/static /usr/share/netdata/www/static 1.18.1~ netdata -- "$@" + +case "$1" in + configure|recnfigure) + if ! getent group netdata > /dev/null; then + addgroup --quiet --system netdata + fi + + if ! getent passwd netdata > /dev/null; then + adduser --quiet --system --ingroup netdata --home /var/lib/netdata --no-create-home netdata + fi + + for item in docker nginx varnish haproxy adm nsd proxy squid ceph nobody I2C; do + if getent group $item > /dev/null 2>&1; then + usermod -a -G $item netdata + fi + done + + if ! dpkg-statoverride --list /var/lib/netdata > /dev/null 2>&1; then + dpkg-statoverride --update --add netdata netdata 0755 /var/lib/netdata + fi + + if ! dpkg-statoverride --list /var/lib/netdata/www > /dev/null 2>&1; then + dpkg-statoverride --update --add root netdata 0755 /var/lib/netdata/www + fi + + if ! dpkg-statoverride --list /var/cache/netdata > /dev/null 2>&1; then + dpkg-statoverride --update --add netdata netdata 0755 /var/cache/netdata + fi + + if ! dpkg-statoverride --list /var/run/netdata > /dev/null 2>&1; then + dpkg-statoverride --update --add netdata netdata 0755 /var/run/netdata + fi + + if ! dpkg-statoverride --list /var/log/netdata > /dev/null 2>&1; then + dpkg-statoverride --update --add netdata adm 02750 /var/log/netdata + fi + + dpkg-statoverride --force --update --add root netdata 0775 /var/lib/netdata/registry > /dev/null 2>&1 + + chown -R root:netdata /usr/libexec/netdata/plugins.d + setcap cap_dac_read_search,cap_sys_ptrace+ep /usr/libexec/netdata/plugins.d/apps.plugin + setcap cap_dac_read_search+ep /usr/libexec/netdata/plugins.d/slabinfo.plugin + + if capsh --supports=cap_perfmon 2>/dev/null; then + setcap cap_perfmon+ep /usr/libexec/netdata/plugins.d/perf.plugin + else + setcap cap_sys_admin+ep /usr/libexec/netdata/plugins.d/perf.plugin + fi + + if [ -f "/usr/libexec/netdata/plugins.d/go.d.plugin" ]; then + setcap "cap_net_admin+epi cap_net_raw=eip" /usr/libexec/netdata/plugins.d/go.d.plugin + fi + + chmod 4750 /usr/libexec/netdata/plugins.d/cgroup-network + chmod 4750 /usr/libexec/netdata/plugins.d/nfacct.plugin + + # Workaround if system does not have ebpf.plugin + chmod -f 4750 /usr/libexec/netdata/plugins.d/ebpf.plugin || true + + # Workaround for other plugins not installed directly by this package + chmod -f 4750 /usr/libexec/netdata/plugins.d/freeipmi.plugin || true + chmod -f 4750 /usr/libexec/netdata/plugins.d/ioping || true + + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata.postrm b/contrib/debian/netdata.postrm new file mode 100644 index 0000000..5644e22 --- /dev/null +++ b/contrib/debian/netdata.postrm @@ -0,0 +1,53 @@ +#!/bin/sh + +set -e + +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/.well-known /usr/share/netdata/www/.well-known 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/css /usr/share/netdata/www/css 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/fonts /usr/share/netdata/www/fonts 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/images /usr/share/netdata/www/images 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/lib /usr/share/netdata/www/lib 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/static /usr/share/netdata/www/static 1.18.1~ netdata -- "$@" + +case "$1" in + remove) ;; + + purge) + if dpkg-statoverride --list | grep -qw /var/cache/netdata; then + dpkg-statoverride --remove /var/cache/netdata + fi + + if dpkg-statoverride --list | grep -qw /var/lib/netdata/www; then + dpkg-statoverride --remove /var/lib/netdata/www + fi + + if dpkg-statoverride --list | grep -qw /var/lib/netdata/registry; then + dpkg-statoverride --remove /var/lib/netdata/registry + fi + + if dpkg-statoverride --list | grep -qw /var/lib/netdata; then + dpkg-statoverride --remove /var/lib/netdata + fi + + if dpkg-statoverride --list | grep -qw /var/run/netdata; then + dpkg-statoverride --remove /var/run/netdata + fi + + if dpkg-statoverride --list | grep -qw /var/log/netdata; then + dpkg-statoverride --remove /var/log/netdata + fi + ;; + + *) ;; + +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata.preinst b/contrib/debian/netdata.preinst new file mode 100644 index 0000000..3bbdea0 --- /dev/null +++ b/contrib/debian/netdata.preinst @@ -0,0 +1,18 @@ +#!/bin/sh + +set -e + +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/.well-known /usr/share/netdata/www/.well-known 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/css /usr/share/netdata/www/css 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/fonts /usr/share/netdata/www/fonts 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/images /usr/share/netdata/www/images 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/lib /usr/share/netdata/www/lib 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/static /usr/share/netdata/www/static 1.18.1~ netdata -- "$@" + +#DEBHELPER# diff --git a/contrib/debian/rules b/contrib/debian/rules new file mode 100755 index 0000000..5be3e81 --- /dev/null +++ b/contrib/debian/rules @@ -0,0 +1,137 @@ +#!/usr/bin/make -f + +# Find the arch we are building for, as this determines +# the location of plugins in /usr/lib +TOP = $(CURDIR)/debian/netdata +TEMPTOP = $(CURDIR)/debian/tmp + +BASE_CONFIG = system/netdata.conf + +SYSTEMD_VERSION = $(shell /bin/sh -c "systemd --version 2>&1 | head -n 1 | cut -f 2 -d ' '") + +ifeq ($(shell test $(SYSTEMD_VERSION) -ge 235 && echo "1"), 1) +SYSTEMD_UNIT = system/netdata.service.v235 +else +SYSTEMD_UNIT = system/netdata.service +endif + +ifeq ($(shell test `uname -m` != "x86_64" && echo "1"), 1) +HAVE_EBPF = 0 +EBPF_CONFIG = --disable-ebpf +else +HAVE_EBPF = 1 +endif + +%: + dh $@ + +override_dh_installinit: + echo "SystemD Version: $(SYSTEMD_VERSION)" + echo "Using SystemD Unit: $(SYSTEMD_UNIT)" + cp -v $(SYSTEMD_UNIT) debian/netdata.service + + dh_systemd_enable + dh_installinit + +override_dh_auto_configure: + if [ $(HAVE_EBPF) -eq 1 ]; then \ + packaging/bundle-libbpf.sh . ${TOP}/usr/libexec/netdata/plugins.d; \ + packaging/bundle-ebpf-co-re.sh . ${TOP}/usr/libexec/netdata/plugins.d; \ + fi + autoreconf -ivf + dh_auto_configure -- --prefix=/usr --sysconfdir=/etc --localstatedir=/var --libdir=/usr/lib \ + --libexecdir=/usr/libexec --with-user=netdata --with-math --with-zlib --with-webdir=/var/lib/netdata/www \ + --disable-dependency-tracking $(EBPF_CONFIG) + +override_dh_install: + cp -v $(BASE_CONFIG) debian/netdata.conf + + dh_install + + # Set the CUPS plugin install rule + # + mkdir -p $(TOP)-plugin-cups/usr/libexec/netdata/plugins.d + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/cups.plugin \ + $(TOP)-plugin-cups/usr/libexec/netdata/plugins.d/cups.plugin + + # Add free IPMI plugin install rules + # + mkdir -p $(TOP)-plugin-freeipmi/usr/libexec/netdata/plugins.d + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/freeipmi.plugin \ + $(TOP)-plugin-freeipmi/usr/libexec/netdata/plugins.d/freeipmi.plugin + + # Set the rest of the software in the main package + # + cp -rp $(TEMPTOP)/usr $(TOP) + cp -rp $(TEMPTOP)/var $(TOP) + cp -rp $(TEMPTOP)/etc $(TOP) + mkdir -p "$(TOP)/var/log/netdata" + mkdir -p "$(TOP)/var/cache/netdata" + mkdir -p "$(TOP)/var/run/netdata" + + # Copy the updater script + # + cp -v packaging/installer/netdata-updater.sh $(TOP)/usr/libexec/netdata/netdata-updater.sh + + # Move files that local user shouldn't be editing to /usr/share/netdata + # + mkdir -p "$(TOP)/usr/share/netdata/www" + for D in $$(find "$(TOP)/var/lib/netdata/www/" -maxdepth 1 -type d -printf '%f '); do \ + echo Relocating $$D; \ + mv "$(TOP)/var/lib/netdata/www/$$D" "$(TOP)/usr/share/netdata/www/$$D"; \ + ln -s "/usr/share/netdata/www/$$D" "$(TOP)/var/lib/netdata/www/$$D"; \ + done + + if [ $(HAVE_EBPF) -eq 1 ]; then \ + packaging/bundle-ebpf.sh . ${TOP}/usr/libexec/netdata/plugins.d; \ + fi + + # Install go + # + debian/install_go.sh $$(cat ${CURDIR}/packaging/go.d.version) $(TOP)/usr/lib/netdata $(TOP)/usr/libexec/netdata + +override_dh_installdocs: + dh_installdocs + + find . \ + -name README.md \ + -not -path './.travis/*' \ + -not -path './debian/*' \ + -not -path './contrib/*' \ + -exec cp \ + --parents \ + --target $(TOP)/usr/share/doc/netdata/ \ + {} \; + +override_dh_fixperms: + dh_fixperms + + # Updater script should be executable + # + chmod 0755 $(TOP)/usr/libexec/netdata/netdata-updater.sh + + # apps.plugin should only be runnable by the netdata user. It will be + # given extra capabilities in the postinst script. + # + chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/apps.plugin + chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/perf.plugin + chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/slabinfo.plugin + chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/go.d.plugin + + # CUPS plugin package + chmod 0750 $(TOP)-plugin-cups/usr/libexec/netdata/plugins.d/cups.plugin + + # freeIPMI plugin package + chmod 4750 $(TOP)-plugin-freeipmi/usr/libexec/netdata/plugins.d/freeipmi.plugin + +override_dh_installlogrotate: + cp system/netdata.logrotate debian/netdata.logrotate + dh_installlogrotate + +override_dh_clean: + dh_clean + + # Tidy up copied/generated files + # + -[ -r $(CURDIR)/debian/netdata.logrotate ] && rm $(CURDIR)/debian/netdata.logrotate + -[ -r $(CURDIR)/debian/netdata.conffiles ] && rm $(CURDIR)/debian/netdata.conffiles diff --git a/contrib/debian/source/format b/contrib/debian/source/format new file mode 100644 index 0000000..89ae9db --- /dev/null +++ b/contrib/debian/source/format @@ -0,0 +1 @@ +3.0 (native) diff --git a/contrib/rhel/build-netdata-rpm.sh b/contrib/rhel/build-netdata-rpm.sh new file mode 100755 index 0000000..e48c3c1 --- /dev/null +++ b/contrib/rhel/build-netdata-rpm.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# docker run -it --rm centos:6.9 /bin/sh +# yum -y install rpm-build redhat-rpm-config yum-utils autoconf automake curl gcc git libmnl-devel libuuid-devel make pkgconfig zlib-devel + +cd "$(dirname "$0")/../../" || exit 1 +# shellcheck disable=SC1091 +source "packaging/installer/functions.sh" || exit 1 + +set -e + +run autoreconf -ivf +run ./configure --enable-maintainer-mode +run make dist + +typeset version="$(grep PACKAGE_VERSION < config.h | cut -d '"' -f 2)" +if [[ -z "${version}" ]]; then + run_failed "Cannot find netdata version." + exit 1 +fi + +if [[ "${version//-/}" != "$version" ]]; then + # Remove all -* and _* suffixes to be as close as netdata release + typeset versionfix="${version%%-*}"; versionfix="${versionfix%%_*}" + # Append the current datetime fox a 'unique' build + versionfix+="_$(date '+%m%d%H%M%S')" + # And issue hints & details on why this failed, and how to fix it + run_failed "Current version contains '-' which is forbidden by rpm. You must create a git annotated tag and rerun this script. Example:" + run_failed " git tag -a $versionfix -m 'Release by $(id -nu) on $(uname -n)' && $0" + exit 1 +fi + + +typeset tgz="netdata-${version}.tar.gz" +if [[ ! -f "${tgz}" ]]; then + run_failed "Cannot find the generated tar.gz file '${tgz}'" + exit 1 +fi + +typeset srpm="$(run rpmbuild -ts "${tgz}" | cut -d ' ' -f 2)" +if [[ -z "${srpm}" ]] || ! [[ -f "${srpm}" ]]; then + run_failed "Cannot find the generated SRPM file '${srpm}'" + exit 1 +fi + +#if which yum-builddep 2>/dev/null +#then +# run yum-builddep "${srpm}" +#elif which dnf 2>/dev/null +#then +# [ "${UID}" = 0 ] && run dnf builddep "${srpm}" +#fi + +run rpmbuild --rebuild "${srpm}" + +run_ok "All done! Packages created in '$(rpm -E '%_rpmdir/%_arch')'" diff --git a/coverity-scan.sh b/coverity-scan.sh new file mode 100755 index 0000000..9737545 --- /dev/null +++ b/coverity-scan.sh @@ -0,0 +1,208 @@ +#!/usr/bin/env bash +# +# Coverity scan script +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Costa Tsaousis (costa@netdata.cloud) +# Author : Pawel Krupa (paulfantom) +# Author : Pavlos Emm. Katsoulakis (paul@netdata.cloud) +# shellcheck disable=SC1091,SC2230,SC2086 + +# To run manually, save configuration to .coverity-scan.conf like this: +# +# the repository to report to coverity - devs can set here their own fork +# REPOSITORY="netdata/netdata" +# +# the email of the developer, as given to coverity +# COVERITY_SCAN_SUBMIT_MAIL="you@example.com" +# +# the token given by coverity to the developer +# COVERITY_SCAN_TOKEN="TOKEN taken from Coverity site" +# +# the absolute path of the cov-build - optional +# COVERITY_BUILD_PATH="/opt/cov-analysis-linux64-2021.12/bin/cov-build" +# +# when set, the script will print on screen the curl command that submits the build to coverity +# this includes the token, so the default is not to print it. +# COVERITY_SUBMIT_DEBUG=1 +# +# Override the standard coverity build version we know is supported +# COVERITY_BUILD_VERSION="cov-analysis-linux64-2019.03" +# +# All these variables can also be exported before running this script. +# +# If the first parameter of this script is "install", +# coverity build tools will be downloaded and installed in /opt/coverity + +set -e + +INSTALL_DIR="/opt" + +# the version of coverity to use +COVERITY_BUILD_VERSION="${COVERITY_BUILD_VERSION:-cov-analysis-linux64-2022.06}" + +# TODO: For some reasons this does not fully load on Debian 10 (Haven't checked if it happens on other distros yet), it breaks +source packaging/installer/functions.sh || echo "Failed to fully load the functions library" + +cpus=$(find_processors) +[ -z "${cpus}" ] && cpus=1 + +if [ -f ".coverity-scan.conf" ]; then + source ".coverity-scan.conf" +fi + +repo="${REPOSITORY}" +if [ -z "${repo}" ]; then + fatal "export variable REPOSITORY or set it in .coverity-scan.conf" +fi +repo="${repo//\//%2F}" + +email="${COVERITY_SCAN_SUBMIT_MAIL}" +if [ -z "${email}" ]; then + fatal "export variable COVERITY_SCAN_SUBMIT_MAIL or set it in .coverity-scan.conf" +fi + +token="${COVERITY_SCAN_TOKEN}" +if [ -z "${token}" ]; then + fatal "export variable COVERITY_SCAN_TOKEN or set it in .coverity-scan.conf" +fi + +if ! command -v curl > /dev/null 2>&1; then + fatal "CURL is required for coverity scan to work" +fi + +# only print the output of a command +# when debugging is enabled +# used to hide the token when debugging is not enabled +debugrun() { + if [ "${COVERITY_SUBMIT_DEBUG}" = "1" ]; then + run "${@}" + return $? + else + "${@}" + return $? + fi +} + +scanit() { + progress "Scanning using coverity" + COVERITY_PATH=$(find "${INSTALL_DIR}" -maxdepth 1 -name 'cov*linux*') + export PATH=${PATH}:${COVERITY_PATH}/bin/ + covbuild="${COVERITY_BUILD_PATH}" + [ -z "${covbuild}" ] && covbuild="$(which cov-build 2> /dev/null || command -v cov-build 2> /dev/null)" + + if [ -z "${covbuild}" ]; then + fatal "Cannot find 'cov-build' binary in \$PATH. Export variable COVERITY_BUILD_PATH or set it in .coverity-scan.conf" + elif [ ! -x "${covbuild}" ]; then + fatal "The command '${covbuild}' is not executable. Export variable COVERITY_BUILD_PATH or set it in .coverity-scan.conf" + fi + + version="$(grep "^#define PACKAGE_VERSION" config.h | cut -d '"' -f 2)" + progress "Working on netdata version: ${version}" + + progress "Cleaning up old builds..." + run make clean || echo >&2 "Nothing to clean" + + [ -d "cov-int" ] && rm -rf "cov-int" + + [ -f netdata-coverity-analysis.tgz ] && run rm netdata-coverity-analysis.tgz + + progress "Configuring netdata source..." + + run autoreconf -ivf + run ./configure ${OTHER_OPTIONS} + + progress "Analyzing netdata..." + run "${covbuild}" --dir cov-int make -j${cpus} + + echo >&2 "Compressing analysis..." + run tar czvf netdata-coverity-analysis.tgz cov-int + + echo >&2 "Sending analysis to coverity for netdata version ${version} ..." + COVERITY_SUBMIT_RESULT=$(debugrun curl --progress-bar \ + --form token="${token}" \ + --form email="${email}" \ + --form file=@netdata-coverity-analysis.tgz \ + --form version="${version}" \ + --form description="netdata, monitor everything, in real-time." \ + https://scan.coverity.com/builds?project="${repo}") + + echo "${COVERITY_SUBMIT_RESULT}" | grep -q -e 'Build successfully submitted' || echo >&2 "scan results were not pushed to coverity. Message was: ${COVERITY_SUBMIT_RESULT}" + + progress "Coverity scan completed" +} + +installit() { + ORIGINAL_DIR="${PWD}" + TMP_DIR="$(mktemp -d /tmp/netdata-coverity-scan-XXXXX)" + progress "Downloading coverity in ${TMP_DIR}..." + cd "${TMP_DIR}" + + debugrun curl --remote-name --remote-header-name --show-error --location --data "token=${token}&project=${repo}" https://scan.coverity.com/download/linux64 + + if [ -f "${COVERITY_BUILD_VERSION}.tar.gz" ]; then + progress "Installing coverity..." + cd "${INSTALL_DIR}" + + run sudo tar -z -x -f "${TMP_DIR}/${COVERITY_BUILD_VERSION}.tar.gz" || exit 1 + rm "${TMP_DIR}/${COVERITY_BUILD_VERSION}.tar.gz" + COVERITY_PATH=$(find "${INSTALL_DIR}" -maxdepth 1 -name 'cov*linux*') + export PATH=${PATH}:${COVERITY_PATH}/bin/ + elif find . -name "*.tar.gz" > /dev/null 2>&1; then + fatal "Downloaded coverity tool tarball does not appear to be the version we were expecting, exiting." + else + fatal "Failed to download coverity tool tarball!" + fi + + # Validate the installation + covbuild="$(which cov-build 2> /dev/null || command -v cov-build 2> /dev/null)" + if [ -z "$covbuild" ]; then + fatal "Failed to install coverity." + fi + + progress "Coverity scan tools are installed." + cd "$ORIGINAL_DIR" + + # Clean temp directory + [ -n "${TMP_DIR}" ] && rm -rf "${TMP_DIR}" + return 0 +} + +OTHER_OPTIONS="--disable-lto" +OTHER_OPTIONS+=" --with-zlib" +OTHER_OPTIONS+=" --with-math" +OTHER_OPTIONS+=" --enable-https" +OTHER_OPTIONS+=" --enable-jsonc" +OTHER_OPTIONS+=" --enable-plugin-nfacct" +OTHER_OPTIONS+=" --enable-plugin-freeipmi" +OTHER_OPTIONS+=" --enable-plugin-cups" +OTHER_OPTIONS+=" --enable-exporting-prometheus-remote-write" +# TODO: enable these plugins too +#OTHER_OPTIONS+=" --enable-plugin-xenstat" +#OTHER_OPTIONS+=" --enable-exporting-kinesis" +#OTHER_OPTIONS+=" --enable-exporting-mongodb" + +FOUND_OPTS="NO" +while [ -n "${1}" ]; do + if [ "${1}" = "--with-install" ]; then + progress "Running coverity install" + installit + shift 1 + elif [ -n "${1}" ]; then + # Clear the default arguments, once you bump into the first argument + if [ "${FOUND_OPTS}" = "NO" ]; then + OTHER_OPTIONS="${1}" + FOUND_OPTS="YES" + else + OTHER_OPTIONS+=" ${1}" + fi + + shift 1 + else + break + fi +done + +echo "Running coverity scan with extra options ${OTHER_OPTIONS}" +scanit "${OTHER_OPTIONS}" diff --git a/cppcheck.sh b/cppcheck.sh new file mode 100755 index 0000000..ebbeeaf --- /dev/null +++ b/cppcheck.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# echo >>/tmp/cppcheck.log "cppcheck ${*}" + +# shellcheck disable=SC2230 +cppcheck=$(which cppcheck 2>/dev/null || command -v cppcheck 2>/dev/null) +[ -z "${cppcheck}" ] && echo >&2 "install cppcheck." && exit 1 + +processors=$(grep -c ^processor /proc/cpuinfo) +[ $(( processors )) -lt 1 ] && processors=1 + +base="$(dirname "${0}")" +[ "${base}" = "." ] && base="${PWD}" + +cd "${base}" || exit 1 + +[ ! -d "cppcheck-build" ] && mkdir "cppcheck-build" + +file="${1}" +shift +# shellcheck disable=SC2235 +([ "${file}" = "${base}" ] || [ -z "${file}" ]) && file="${base}" + +"${cppcheck}" \ + -j ${processors} \ + --cppcheck-build-dir="cppcheck-build" \ + -I .. \ + --force \ + --enable=warning,performance,portability,information \ + --library=gnu \ + --library=posix \ + --suppress="unusedFunction:*" \ + --suppress="nullPointerRedundantCheck:*" \ + --suppress="readdirCalled:*" \ + "${file}" "${@}" diff --git a/daemon/Makefile.am b/daemon/Makefile.am new file mode 100644 index 0000000..d3102f6 --- /dev/null +++ b/daemon/Makefile.am @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +CLEANFILES = \ + anonymous-statistics.sh \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_noinst_DATA = \ + README.md \ + config/README.md \ + anonymous-statistics.sh.in \ + get-kubernetes-labels.sh.in \ + $(NULL) + +dist_plugins_SCRIPTS = \ + anonymous-statistics.sh \ + system-info.sh \ + get-kubernetes-labels.sh \ + $(NULL) diff --git a/daemon/README.md b/daemon/README.md new file mode 100644 index 0000000..c5951c6 --- /dev/null +++ b/daemon/README.md @@ -0,0 +1,520 @@ + + +# Netdata daemon + +## Starting netdata + +- You can start Netdata by executing it with `/usr/sbin/netdata` (the installer will also start it). + +- You can stop Netdata by killing it with `killall netdata`. You can stop and start Netdata at any point. When + exiting, the [database engine](/database/engine/README.md) saves metrics to `/var/cache/netdata/dbengine/` so that + it can continue when started again. + +Access to the web site, for all graphs, is by default on port `19999`, so go to: + +```sh +http://127.0.0.1:19999/ +``` + +You can get the running config file at any time, by accessing `http://127.0.0.1:19999/netdata.conf`. + +### Starting Netdata at boot + +In the `system` directory you can find scripts and configurations for the +various distros. + +#### systemd + +The installer already installs `netdata.service` if it detects a systemd system. + +To install `netdata.service` by hand, run: + +```sh +# stop Netdata +killall netdata + +# copy netdata.service to systemd +cp system/netdata.service /etc/systemd/system/ + +# let systemd know there is a new service +systemctl daemon-reload + +# enable Netdata at boot +systemctl enable netdata + +# start Netdata +systemctl start netdata +``` + +#### init.d + +In the system directory you can find `netdata-lsb`. Copy it to the proper place according to your distribution +documentation. For Ubuntu, this can be done via running the following commands as root. + +```sh +# copy the Netdata startup file to /etc/init.d +cp system/netdata-lsb /etc/init.d/netdata + +# make sure it is executable +chmod +x /etc/init.d/netdata + +# enable it +update-rc.d netdata defaults +``` + +#### openrc (gentoo) + +In the `system` directory you can find `netdata-openrc`. Copy it to the proper +place according to your distribution documentation. + +#### CentOS / Red Hat Enterprise Linux + +For older versions of RHEL/CentOS that don't have systemd, an init script is included in the system directory. This can +be installed by running the following commands as root. + +```sh +# copy the Netdata startup file to /etc/init.d +cp system/netdata-init-d /etc/init.d/netdata + +# make sure it is executable +chmod +x /etc/init.d/netdata + +# enable it +chkconfig --add netdata +``` + +_There have been some recent work on the init script, see PR +_ + +#### other systems + +You can start Netdata by running it from `/etc/rc.local` or equivalent. + +## Command line options + +Normally you don't need to supply any command line arguments to netdata. + +If you do though, they override the configuration equivalent options. + +To get a list of all command line parameters supported, run: + +```sh +netdata -h +``` + +The program will print the supported command line parameters. + +The command line options of the Netdata 1.10.0 version are the following: + +```sh + ^ + |.-. .-. .-. .-. . netdata + | '-' '-' '-' '-' real-time performance monitoring, done right! + +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---> + + Copyright (C) 2016-2022, Netdata, Inc. + Released under GNU General Public License v3 or later. + All rights reserved. + + Home Page : https://netdata.cloud + Source Code: https://github.com/netdata/netdata + Docs : https://learn.netdata.cloud + Support : https://github.com/netdata/netdata/issues + License : https://github.com/netdata/netdata/blob/master/LICENSE.md + + Twitter : https://twitter.com/linuxnetdata + LinkedIn : https://linkedin.com/company/netdata-cloud/ + Facebook : https://facebook.com/linuxnetdata/ + + + SYNOPSIS: netdata [options] + + Options: + + -c filename Configuration file to load. + Default: /etc/netdata/netdata.conf + + -D Do not fork. Run in the foreground. + Default: run in the background + + -h Display this help message. + + -P filename File to save a pid while running. + Default: do not save pid to a file + + -i IP The IP address to listen to. + Default: all IP addresses IPv4 and IPv6 + + -p port API/Web port to use. + Default: 19999 + + -s path Prefix for /proc and /sys (for containers). + Default: no prefix + + -t seconds The internal clock of netdata. + Default: 1 + + -u username Run as user. + Default: netdata + + -v Print netdata version and exit. + + -V Print netdata version and exit. + + -W options See Advanced options below. + + + Advanced options: + + -W stacksize=N Set the stacksize (in bytes). + + -W debug_flags=N Set runtime tracing to debug.log. + + -W unittest Run internal unittests and exit. + + -W createdataset=N Create a DB engine dataset of N seconds and exit. + + -W set section option value + set netdata.conf option from the command line. + + -W buildinfo Print the version, the configure options, + a list of optional features, and whether they + are enabled or not. + + -W buildinfojson Print the version, the configure options, + a list of optional features, and whether they + are enabled or not, in JSON format. + + -W simple-pattern pattern string + Check if string matches pattern and exit. + + -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://api.netdata.cloud" + Connect the agent to the workspace rooms pointed to by TOKEN and ROOM*. + + Signals netdata handles: + + - HUP Close and reopen log files. + - USR1 Save internal DB to disk. + - USR2 Reload health configuration. +``` + +You can send commands during runtime via [netdatacli](/cli/README.md). + +## Log files + +Netdata uses 3 log files: + +1. `error.log` +2. `access.log` +3. `debug.log` + +Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log` and +`access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled (Netdata needs to be compiled +with debugging enabled). + +Log files are stored in `/var/log/netdata/` by default. + +### error.log + +The `error.log` is the `stderr` of the `netdata` daemon and all external plugins +run by `netdata`. + +So if any process, in the Netdata process tree, writes anything to its standard error, +it will appear in `error.log`. + +For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear: + +| tag | description | +|:-:|:----------| +| `INFO` | Something important the user should know. | +| `ERROR` | Something that might disable a part of netdata.
The log line includes `errno` (if it is not zero). | +| `FATAL` | Something prevented a program from running.
The log line includes `errno` (if it is not zero) and the program exited. | + +So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the +program continues to run. + +When a Netdata program cannot run at all, a `FATAL` line is logged. + +### access.log + +The `access.log` logs web requests. The format is: + +```txt +DATE: ID: (sent/all = SENT_BYTES/ALL_BYTES bytes PERCENT_COMPRESSION%, prep/sent/total PREP_TIME/SENT_TIME/TOTAL_TIME ms): ACTION CODE URL +``` + +where: + +- `ID` is the client ID. Client IDs are auto-incremented every time a client connects to netdata. +- `SENT_BYTES` is the number of bytes sent to the client, without the HTTP response header. +- `ALL_BYTES` is the number of bytes of the response, before compression. +- `PERCENT_COMPRESSION` is the percentage of traffic saved due to compression. +- `PREP_TIME` is the time in milliseconds needed to prepared the response. +- `SENT_TIME` is the time in milliseconds needed to sent the response to the client. +- `TOTAL_TIME` is the total time the request was inside Netdata (from the first byte of the request to the last byte + of the response). +- `ACTION` can be `filecopy`, `options` (used in CORS), `data` (API call). + +### debug.log + +See [debugging](#debugging). + +## Netdata process scheduling policy + +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. + +You can set Netdata scheduling policy in `netdata.conf`, like this: + +```conf +[global] + process scheduling policy = idle +``` + +You can use the following: + +| policy | description | +| :-----------------------: | :---------- | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `other`
or
`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | +| `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | +| `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | +| `rr` | a simple enhancement of `fifo`. Everything described above for `fifo` also applies to `rr`, except that each thread is allowed to run only for a maximum time quantum. | +| `keep`
or
`none` | do not set scheduling policy, priority or nice level - i.e. keep running with whatever it is set already (e.g. by systemd). | + +For more information see `man sched`. + +### scheduling priority for `rr` and `fifo` + +Once the policy is set to one of `rr` or `fifo`, the following will appear: + +```conf +[global] + process scheduling priority = 0 +``` + +These priorities are usually from 0 to 99. Higher numbers make the process more +important. + +### nice level for policies `other` or `batch` + +When the policy is set to `other`, `nice`, or `batch`, the following will appear: + +```conf +[global] + process nice level = 19 +``` + +## scheduling settings and systemd + +Netdata will not be able to set its scheduling policy and priority to more important values when it is started as the +`netdata` user (systemd case). + +You can set these settings at `/etc/systemd/system/netdata.service`: + +```sh +[Service] +# By default Netdata switches to scheduling policy idle, which makes it use CPU, only +# when there is spare available. +# Valid policies: other (the system default) | batch | idle | fifo | rr +#CPUSchedulingPolicy=other + +# This sets the maximum scheduling priority Netdata can set (for policies: rr and fifo). +# Netdata (via [global].process scheduling priority in netdata.conf) can only lower this value. +# Priority gets values 1 (lowest) to 99 (highest). +#CPUSchedulingPriority=1 + +# For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata. +# Netdata (via [global].process nice level in netdata.conf) can only increase the value set here. +#Nice=0 +``` + +Run `systemctl daemon-reload` to reload these changes. + +Now, tell Netdata to keep these settings, as set by systemd, by editing +`netdata.conf` and setting: + +```conf +[global] + process scheduling policy = keep +``` + +Using the above, whatever scheduling settings you have set at `netdata.service` +will be maintained by netdata. + +### Example 1: Netdata with nice -1 on non-systemd systems + +On a system that is not based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for +all programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = other + process nice level = -1 +``` + +then execute this to [restart Netdata](/docs/configure/start-stop-restart.md): + +```sh +sudo systemctl restart netdata +``` + +#### Example 2: Netdata with nice -1 on systemd systems + +On a system that is based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for all +programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = keep +``` + +edit /etc/systemd/system/netdata.service and set: + +```sh +[Service] +CPUSchedulingPolicy=other +Nice=-1 +``` + +then execute: + +```sh +sudo systemctl daemon-reload +sudo systemctl restart netdata +``` + +## Virtual memory + +You may notice that netdata's virtual memory size, as reported by `ps` or `/proc/pid/status` (or even netdata's +applications virtual memory chart) is unrealistically high. + +For example, it may be reported to be 150+MB, even if the resident memory size is just 25MB. Similar values may be +reported for Netdata plugins too. + +Check this for example: A Netdata installation with default settings on Ubuntu +16.04LTS. The top chart is **real memory used**, while the bottom one is +**virtual memory**: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013772/5eb7173e-87e3-11e6-8f2b-a2ccfeb06faf.png) + +### Why does this happen? + +The system memory allocator allocates virtual memory arenas, per thread running. On Linux systems this defaults to 16MB +per thread on 64 bit machines. So, if you get the difference between real and virtual memory and divide it by 16MB you +will roughly get the number of threads running. + +The system does this for speed. Having a separate memory arena for each thread, allows the threads to run in parallel in +multi-core systems, without any locks between them. + +This behaviour is system specific. For example, the chart above when running +Netdata on Alpine Linux (that uses **musl** instead of **glibc**) is this: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013807/7cf5878e-87e4-11e6-9651-082e68701eab.png) + +### Can we do anything to lower it? + +Since Netdata already uses minimal memory allocations while it runs (i.e. it adapts its memory on start, so that while +repeatedly collects data it does not do memory allocations), it already instructs the system memory allocator to +minimize the memory arenas for each thread. We have also added [2 configuration +options](https://github.com/netdata/netdata/blob/5645b1ee35248d94e6931b64a8688f7f0d865ec6/src/main.c#L410-L418) to allow +you tweak these settings: `glibc malloc arena max for plugins` and `glibc malloc arena max for netdata`. + +However, even if we instructed the memory allocator to use just one arena, it +seems it allocates an arena per thread. + +Netdata also supports `jemalloc` and `tcmalloc`, however both behave exactly the +same to the glibc memory allocator in this aspect. + +### Is this a problem? + +No, it is not. + +Linux reserves real memory (physical RAM) in pages (on x86 machines pages are 4KB each). So even if the system memory +allocator is allocating huge amounts of virtual memory, only the 4KB pages that are actually used are reserving physical +RAM. The **real memory** chart on Netdata application section, shows the amount of physical memory these pages occupy(it +accounts the whole pages, even if parts of them are actually used). + +## Debugging + +When you compile Netdata with debugging: + +1. compiler optimizations for your CPU are disabled (Netdata will run somewhat slower) + +2. a lot of code is added all over netdata, to log debug messages to `/var/log/netdata/debug.log`. However, nothing is + printed by default. Netdata allows you to select which sections of Netdata you want to trace. Tracing is activated + via the config option `debug flags`. It accepts a hex number, to enable or disable specific sections. You can find + the options supported at [log.h](https://raw.githubusercontent.com/netdata/netdata/master/libnetdata/log/log.h). + They are the `D_*` defines. The value `0xffffffffffffffff` will enable all possible debug flags. + +Once Netdata is compiled with debugging and tracing is enabled for a few sections, the file `/var/log/netdata/debug.log` +will contain the messages. + +> Do not forget to disable tracing (`debug flags = 0`) when you are done tracing. The file `debug.log` can grow too +> fast. + +### compiling Netdata with debugging + +To compile Netdata with debugging, use this: + +```sh +# step into the Netdata source directory +cd /usr/src/netdata.git + +# run the installer with debugging enabled +CFLAGS="-O1 -ggdb -DNETDATA_INTERNAL_CHECKS=1" ./netdata-installer.sh +``` + +The above will compile and install Netdata with debugging info embedded. You can now use `debug flags` to set the +section(s) you need to trace. + +### debugging crashes + +We have made the most to make Netdata crash free. If however, Netdata crashes on your system, it would be very helpful +to provide stack traces of the crash. Without them, is will be almost impossible to find the issue (the code base is +quite large to find such an issue by just observing it). + +To provide stack traces, **you need to have Netdata compiled with debugging**. There is no need to enable any tracing +(`debug flags`). + +Then you need to be in one of the following 2 cases: + +1. Netdata crashes and you have a core dump + +2. you can reproduce the crash + +If you are not on these cases, you need to find a way to be (i.e. if your system does not produce core dumps, check your +distro documentation to enable them). + +### Netdata crashes and you have a core dump + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Run the following command and post the output on a github issue. + +```sh +gdb $(which netdata) /path/to/core/dump +``` + +### you can reproduce a Netdata crash on your system + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Install the package `valgrind` and run: + +```sh +valgrind $(which netdata) -D +``` + +Netdata will start and it will be a lot slower. Now reproduce the crash and `valgrind` will dump on your console the +stack trace. Open a new github issue and post the output. + + diff --git a/daemon/analytics.c b/daemon/analytics.c new file mode 100644 index 0000000..3d0e514 --- /dev/null +++ b/daemon/analytics.c @@ -0,0 +1,1034 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" + +struct analytics_data analytics_data; +extern void analytics_exporting_connectors (BUFFER *b); +extern void analytics_exporting_connectors_ssl (BUFFER *b); +extern void analytics_build_info (BUFFER *b); +extern int aclk_connected; + +struct collector { + const char *plugin; + const char *module; +}; + +struct array_printer { + int c; + BUFFER *both; +}; + +/* + * Debug logging + */ +void analytics_log_data(void) +{ + debug(D_ANALYTICS, "NETDATA_CONFIG_STREAM_ENABLED : [%s]", analytics_data.netdata_config_stream_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_MEMORY_MODE : [%s]", analytics_data.netdata_config_memory_mode); + debug(D_ANALYTICS, "NETDATA_CONFIG_EXPORTING_ENABLED : [%s]", analytics_data.netdata_config_exporting_enabled); + debug(D_ANALYTICS, "NETDATA_EXPORTING_CONNECTORS : [%s]", analytics_data.netdata_exporting_connectors); + debug(D_ANALYTICS, "NETDATA_ALLMETRICS_PROMETHEUS_USED : [%s]", analytics_data.netdata_allmetrics_prometheus_used); + debug(D_ANALYTICS, "NETDATA_ALLMETRICS_SHELL_USED : [%s]", analytics_data.netdata_allmetrics_shell_used); + debug(D_ANALYTICS, "NETDATA_ALLMETRICS_JSON_USED : [%s]", analytics_data.netdata_allmetrics_json_used); + debug(D_ANALYTICS, "NETDATA_DASHBOARD_USED : [%s]", analytics_data.netdata_dashboard_used); + debug(D_ANALYTICS, "NETDATA_COLLECTORS : [%s]", analytics_data.netdata_collectors); + debug(D_ANALYTICS, "NETDATA_COLLECTORS_COUNT : [%s]", analytics_data.netdata_collectors_count); + debug(D_ANALYTICS, "NETDATA_BUILDINFO : [%s]", analytics_data.netdata_buildinfo); + debug(D_ANALYTICS, "NETDATA_CONFIG_PAGE_CACHE_SIZE : [%s]", analytics_data.netdata_config_page_cache_size); + debug(D_ANALYTICS, "NETDATA_CONFIG_MULTIDB_DISK_QUOTA : [%s]", analytics_data.netdata_config_multidb_disk_quota); + debug(D_ANALYTICS, "NETDATA_CONFIG_HTTPS_ENABLED : [%s]", analytics_data.netdata_config_https_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_WEB_ENABLED : [%s]", analytics_data.netdata_config_web_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_RELEASE_CHANNEL : [%s]", analytics_data.netdata_config_release_channel); + debug(D_ANALYTICS, "NETDATA_MIRRORED_HOST_COUNT : [%s]", analytics_data.netdata_mirrored_host_count); + debug(D_ANALYTICS, "NETDATA_MIRRORED_HOSTS_REACHABLE : [%s]", analytics_data.netdata_mirrored_hosts_reachable); + debug(D_ANALYTICS, "NETDATA_MIRRORED_HOSTS_UNREACHABLE : [%s]", analytics_data.netdata_mirrored_hosts_unreachable); + debug(D_ANALYTICS, "NETDATA_NOTIFICATION_METHODS : [%s]", analytics_data.netdata_notification_methods); + debug(D_ANALYTICS, "NETDATA_ALARMS_NORMAL : [%s]", analytics_data.netdata_alarms_normal); + debug(D_ANALYTICS, "NETDATA_ALARMS_WARNING : [%s]", analytics_data.netdata_alarms_warning); + debug(D_ANALYTICS, "NETDATA_ALARMS_CRITICAL : [%s]", analytics_data.netdata_alarms_critical); + debug(D_ANALYTICS, "NETDATA_CHARTS_COUNT : [%s]", analytics_data.netdata_charts_count); + debug(D_ANALYTICS, "NETDATA_METRICS_COUNT : [%s]", analytics_data.netdata_metrics_count); + debug(D_ANALYTICS, "NETDATA_CONFIG_IS_PARENT : [%s]", analytics_data.netdata_config_is_parent); + debug(D_ANALYTICS, "NETDATA_CONFIG_HOSTS_AVAILABLE : [%s]", analytics_data.netdata_config_hosts_available); + debug(D_ANALYTICS, "NETDATA_HOST_CLOUD_AVAILABLE : [%s]", analytics_data.netdata_host_cloud_available); + debug(D_ANALYTICS, "NETDATA_HOST_ACLK_AVAILABLE : [%s]", analytics_data.netdata_host_aclk_available); + debug(D_ANALYTICS, "NETDATA_HOST_ACLK_PROTOCOL : [%s]", analytics_data.netdata_host_aclk_protocol); + debug(D_ANALYTICS, "NETDATA_HOST_ACLK_IMPLEMENTATION : [%s]", analytics_data.netdata_host_aclk_implementation); + debug(D_ANALYTICS, "NETDATA_HOST_AGENT_CLAIMED : [%s]", analytics_data.netdata_host_agent_claimed); + debug(D_ANALYTICS, "NETDATA_HOST_CLOUD_ENABLED : [%s]", analytics_data.netdata_host_cloud_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_HTTPS_AVAILABLE : [%s]", analytics_data.netdata_config_https_available); + debug(D_ANALYTICS, "NETDATA_INSTALL_TYPE : [%s]", analytics_data.netdata_install_type); + debug(D_ANALYTICS, "NETDATA_PREBUILT_DISTRO : [%s]", analytics_data.netdata_prebuilt_distro); + debug(D_ANALYTICS, "NETDATA_CONFIG_IS_PRIVATE_REGISTRY : [%s]", analytics_data.netdata_config_is_private_registry); + debug(D_ANALYTICS, "NETDATA_CONFIG_USE_PRIVATE_REGISTRY: [%s]", analytics_data.netdata_config_use_private_registry); + debug(D_ANALYTICS, "NETDATA_CONFIG_OOM_SCORE : [%s]", analytics_data.netdata_config_oom_score); +} + +/* + * Free data + */ +void analytics_free_data(void) +{ + freez(analytics_data.netdata_config_stream_enabled); + freez(analytics_data.netdata_config_memory_mode); + freez(analytics_data.netdata_config_exporting_enabled); + freez(analytics_data.netdata_exporting_connectors); + freez(analytics_data.netdata_allmetrics_prometheus_used); + freez(analytics_data.netdata_allmetrics_shell_used); + freez(analytics_data.netdata_allmetrics_json_used); + freez(analytics_data.netdata_dashboard_used); + freez(analytics_data.netdata_collectors); + freez(analytics_data.netdata_collectors_count); + freez(analytics_data.netdata_buildinfo); + freez(analytics_data.netdata_config_page_cache_size); + freez(analytics_data.netdata_config_multidb_disk_quota); + freez(analytics_data.netdata_config_https_enabled); + freez(analytics_data.netdata_config_web_enabled); + freez(analytics_data.netdata_config_release_channel); + freez(analytics_data.netdata_mirrored_host_count); + freez(analytics_data.netdata_mirrored_hosts_reachable); + freez(analytics_data.netdata_mirrored_hosts_unreachable); + freez(analytics_data.netdata_notification_methods); + freez(analytics_data.netdata_alarms_normal); + freez(analytics_data.netdata_alarms_warning); + freez(analytics_data.netdata_alarms_critical); + freez(analytics_data.netdata_charts_count); + freez(analytics_data.netdata_metrics_count); + freez(analytics_data.netdata_config_is_parent); + freez(analytics_data.netdata_config_hosts_available); + freez(analytics_data.netdata_host_cloud_available); + freez(analytics_data.netdata_host_aclk_available); + freez(analytics_data.netdata_host_aclk_protocol); + freez(analytics_data.netdata_host_aclk_implementation); + freez(analytics_data.netdata_host_agent_claimed); + freez(analytics_data.netdata_host_cloud_enabled); + freez(analytics_data.netdata_config_https_available); + freez(analytics_data.netdata_install_type); + freez(analytics_data.netdata_config_is_private_registry); + freez(analytics_data.netdata_config_use_private_registry); + freez(analytics_data.netdata_config_oom_score); + freez(analytics_data.netdata_prebuilt_distro); +} + +/* + * Set a numeric/boolean data with a value + */ +void analytics_set_data(char **name, char *value) +{ + if (*name) { + analytics_data.data_length -= strlen(*name); + freez(*name); + } + *name = strdupz(value); + analytics_data.data_length += strlen(*name); +} + +/* + * Set a string data with a value + */ +void analytics_set_data_str(char **name, char *value) +{ + size_t value_string_len; + if (*name) { + analytics_data.data_length -= strlen(*name); + freez(*name); + } + value_string_len = strlen(value) + 4; + *name = mallocz(sizeof(char) * value_string_len); + snprintfz(*name, value_string_len - 1, "\"%s\"", value); + analytics_data.data_length += strlen(*name); +} + +/* + * Get data, used by web api v1 + */ +void analytics_get_data(char *name, BUFFER *wb) +{ + buffer_strcat(wb, name); +} + +/* + * Log hits on the allmetrics page, with prometheus parameter + */ +void analytics_log_prometheus(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.prometheus_hits < ANALYTICS_MAX_PROMETHEUS_HITS)) { + analytics_data.prometheus_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.prometheus_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); + } +} + +/* + * Log hits on the allmetrics page, with shell parameter (or default) + */ +void analytics_log_shell(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.shell_hits < ANALYTICS_MAX_SHELL_HITS)) { + analytics_data.shell_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.shell_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); + } +} + +/* + * Log hits on the allmetrics page, with json parameter + */ +void analytics_log_json(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.json_hits < ANALYTICS_MAX_JSON_HITS)) { + analytics_data.json_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.json_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); + } +} + +/* + * Log hits on the dashboard, (when calling HELLO). + */ +void analytics_log_dashboard(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.dashboard_hits < ANALYTICS_MAX_DASHBOARD_HITS)) { + analytics_data.dashboard_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.dashboard_hits); + analytics_set_data(&analytics_data.netdata_dashboard_used, b); + } +} + +/* + * Called when setting the oom score + */ +void analytics_report_oom_score(long long int score){ + char b[7]; + snprintfz(b, 6, "%d", (int)score); + analytics_set_data(&analytics_data.netdata_config_oom_score, b); +} + +void analytics_mirrored_hosts(void) +{ + RRDHOST *host; + int count = 0; + int reachable = 0; + int unreachable = 0; + char b[11]; + + rrd_rdlock(); + rrdhost_foreach_read(host) + { + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) + continue; + + netdata_mutex_lock(&host->receiver_lock); + ((host->receiver || host == localhost) ? reachable++ : unreachable++); + netdata_mutex_unlock(&host->receiver_lock); + + count++; + } + rrd_unlock(); + + snprintfz(b, 10, "%d", count); + analytics_set_data(&analytics_data.netdata_mirrored_host_count, b); + snprintfz(b, 10, "%d", reachable); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, b); + snprintfz(b, 10, "%d", unreachable); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, b); +} + +void analytics_exporters(void) +{ + //when no exporters are available, an empty string will be sent + //decide if something else is more suitable (but probably not null) + BUFFER *bi = buffer_create(1000); + analytics_exporting_connectors(bi); + analytics_set_data_str(&analytics_data.netdata_exporting_connectors, (char *)buffer_tostring(bi)); + buffer_free(bi); +} + +int collector_counter_callb(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data) { + + struct array_printer *ap = (struct array_printer *)data; + struct collector *col = (struct collector *)entry; + + BUFFER *bt = ap->both; + + if (likely(ap->c)) { + buffer_strcat(bt, ","); + } + + buffer_strcat(bt, "{"); + buffer_strcat(bt, " \"plugin\": \""); + buffer_strcat(bt, col->plugin); + buffer_strcat(bt, "\", \"module\":\""); + buffer_strcat(bt, col->module); + buffer_strcat(bt, "\" }"); + + (ap->c)++; + + return 0; +} + +/* + * Create a JSON array of available collectors, same as in api/v1/info + */ +void analytics_collectors(void) +{ + RRDSET *st; + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + char name[500]; + BUFFER *bt = buffer_create(1000); + + rrdset_foreach_read(st, localhost) { + if(!rrdset_is_available_for_viewers(st)) + continue; + + struct collector col = { + .plugin = rrdset_plugin_name(st), + .module = rrdset_module_name(st) + }; + snprintfz(name, 499, "%s:%s", col.plugin, col.module); + dictionary_set(dict, name, &col, sizeof(struct collector)); + } + rrdset_foreach_done(st); + + struct array_printer ap; + ap.c = 0; + ap.both = bt; + + dictionary_walkthrough_read(dict, collector_counter_callb, &ap); + dictionary_destroy(dict); + + analytics_set_data(&analytics_data.netdata_collectors, (char *)buffer_tostring(ap.both)); + + { + char b[7]; + snprintfz(b, 6, "%d", ap.c); + analytics_set_data(&analytics_data.netdata_collectors_count, b); + } + + buffer_free(bt); +} + +/* + * Run alarm-notify.sh script using the dump_methods parameter + * SEND_CUSTOM is always available + */ +void analytics_alarms_notifications(void) +{ + char *script; + script = mallocz( + sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("alarm-notify.sh dump_methods") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "alarm-notify.sh"); + if (unlikely(access(script, R_OK) != 0)) { + info("Alarm notify script %s not found.", script); + freez(script); + return; + } + + strcat(script, " dump_methods"); + + pid_t command_pid; + + debug(D_ANALYTICS, "Executing %s", script); + + BUFFER *b = buffer_create(1000); + int cnt = 0; + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if (fp_child_output) { + char line[200 + 1]; + + while (fgets(line, 200, fp_child_output) != NULL) { + char *end = line; + while (*end && *end != '\n') + end++; + *end = '\0'; + + if (likely(cnt)) + buffer_strcat(b, "|"); + + buffer_strcat(b, line); + + cnt++; + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + + analytics_set_data_str(&analytics_data.netdata_notification_methods, (char *)buffer_tostring(b)); + + buffer_free(b); +} + +void analytics_get_install_type(void) +{ + if (localhost->system_info->install_type == NULL) { + analytics_set_data_str(&analytics_data.netdata_install_type, "unknown"); + } else { + analytics_set_data_str(&analytics_data.netdata_install_type, localhost->system_info->install_type); + } + + if (localhost->system_info->prebuilt_dist != NULL) { + analytics_set_data_str(&analytics_data.netdata_prebuilt_distro, localhost->system_info->prebuilt_dist); + } +} + +/* + * Pick up if https is actually used + */ +void analytics_https(void) +{ + BUFFER *b = buffer_create(30); +#ifdef ENABLE_HTTPS + analytics_exporting_connectors_ssl(b); + buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|"); + buffer_strcat(b, netdata_ssl_srv_ctx ? "web" : ""); +#else + buffer_strcat(b, "||"); +#endif + + analytics_set_data_str(&analytics_data.netdata_config_https_available, (char *)buffer_tostring(b)); + buffer_free(b); +} + +void analytics_charts(void) +{ + RRDSET *st; + int c = 0; + + rrdset_foreach_read(st, localhost) + if(rrdset_is_available_for_viewers(st)) c++; + rrdset_foreach_done(st); + + { + char b[7]; + snprintfz(b, 6, "%d", c); + analytics_set_data(&analytics_data.netdata_charts_count, b); + } +} + +void analytics_metrics(void) +{ + RRDSET *st; + long int dimensions = 0; + rrdset_foreach_read(st, localhost) { + if (rrdset_is_available_for_viewers(st)) { + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + continue; + dimensions++; + } + rrddim_foreach_done(rd); + } + } + rrdset_foreach_done(st); + + { + char b[7]; + snprintfz(b, 6, "%ld", dimensions); + analytics_set_data(&analytics_data.netdata_metrics_count, b); + } +} + +void analytics_alarms(void) +{ + int alarm_warn = 0, alarm_crit = 0, alarm_normal = 0; + char b[10]; + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(localhost, rc) { + if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) + continue; + + switch (rc->status) { + case RRDCALC_STATUS_WARNING: + alarm_warn++; + break; + case RRDCALC_STATUS_CRITICAL: + alarm_crit++; + break; + default: + alarm_normal++; + } + } + foreach_rrdcalc_in_rrdhost_done(rc); + + snprintfz(b, 9, "%d", alarm_normal); + analytics_set_data(&analytics_data.netdata_alarms_normal, b); + snprintfz(b, 9, "%d", alarm_warn); + analytics_set_data(&analytics_data.netdata_alarms_warning, b); + snprintfz(b, 9, "%d", alarm_crit); + analytics_set_data(&analytics_data.netdata_alarms_critical, b); +} + +/* + * Misc attributes to get (run from start) + */ +void analytics_misc(void) +{ +#ifdef ENABLE_ACLK + analytics_set_data(&analytics_data.netdata_host_cloud_available, "true"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, "Next Generation"); +#else + analytics_set_data(&analytics_data.netdata_host_cloud_available, "false"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, ""); +#endif + + analytics_set_data(&analytics_data.netdata_config_exporting_enabled, appconfig_get_boolean(&exporting_config, CONFIG_SECTION_EXPORTING, "enabled", CONFIG_BOOLEAN_NO) ? "true" : "false"); + + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "false"); + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "false"); + + if (strcmp( + config_get(CONFIG_SECTION_REGISTRY, "registry to announce", "https://registry.my-netdata.io"), + "https://registry.my-netdata.io")) + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "true"); + + //do we need both registry to announce and enabled to indicate that this is a private registry ? + if (config_get_boolean(CONFIG_SECTION_REGISTRY, "enabled", CONFIG_BOOLEAN_NO) && + web_server_mode != WEB_SERVER_MODE_NONE) + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "true"); +} + +void analytics_aclk(void) +{ +#ifdef ENABLE_ACLK + if (aclk_connected) { + analytics_set_data(&analytics_data.netdata_host_aclk_available, "true"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_protocol, "New"); + } + else +#endif + analytics_set_data(&analytics_data.netdata_host_aclk_available, "false"); +} + +/* + * Get the meta data, called from the thread once after the original delay + * These are values that won't change during agent runtime, and therefore + * don't try to read them on each META event send + */ +void analytics_gather_immutable_meta_data(void) +{ + analytics_misc(); + analytics_exporters(); + analytics_https(); +} + +/* + * Get the meta data, called from the thread on every heartbeat, and right before the EXIT event + * These are values that can change between agent restarts, and therefore + * try to read them on each META event send + */ +void analytics_gather_mutable_meta_data(void) +{ + analytics_collectors(); + analytics_alarms(); + analytics_charts(); + analytics_metrics(); + analytics_aclk(); + analytics_mirrored_hosts(); + analytics_alarms_notifications(); + + analytics_set_data( + &analytics_data.netdata_config_is_parent, (rrdhost_hosts_available() > 1 || configured_as_parent()) ? "true" : "false"); + + char *claim_id = get_agent_claimid(); + analytics_set_data(&analytics_data.netdata_host_agent_claimed, claim_id ? "true" : "false"); + freez(claim_id); + + { + char b[7]; + snprintfz(b, 6, "%d", analytics_data.prometheus_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); + + snprintfz(b, 6, "%d", analytics_data.shell_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); + + snprintfz(b, 6, "%d", analytics_data.json_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); + + snprintfz(b, 6, "%d", analytics_data.dashboard_hits); + analytics_set_data(&analytics_data.netdata_dashboard_used, b); + + snprintfz(b, 6, "%zu", rrd_hosts_available); + analytics_set_data(&analytics_data.netdata_config_hosts_available, b); + } +} + +void analytics_main_cleanup(void *ptr) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + debug(D_ANALYTICS, "Cleaning up..."); + analytics_free_data(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/* + * The analytics thread. Sleep for ANALYTICS_INIT_SLEEP_SEC, + * gather the data, and then go to a loop where every ANALYTICS_HEARTBEAT + * it will send a new META event after gathering data that could be changed + * while the agent is running + */ +void *analytics_main(void *ptr) +{ + netdata_thread_cleanup_push(analytics_main_cleanup, ptr); + unsigned int sec = 0; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step_ut = USEC_PER_SEC; + + debug(D_ANALYTICS, "Analytics thread starts"); + + //first delay after agent start + while (!netdata_exit && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) { + heartbeat_next(&hb, step_ut); + sec++; + } + + if (unlikely(netdata_exit)) + goto cleanup; + + analytics_gather_immutable_meta_data(); + analytics_gather_mutable_meta_data(); + send_statistics("META_START", "-", "-"); + analytics_log_data(); + + sec = 0; + while (1) { + heartbeat_next(&hb, step_ut * 2); + sec += 2; + + if (unlikely(netdata_exit)) + break; + + if (likely(sec < ANALYTICS_HEARTBEAT)) + continue; + + analytics_gather_mutable_meta_data(); + send_statistics("META", "-", "-"); + analytics_log_data(); + sec = 0; + } + +cleanup: + netdata_thread_cleanup_pop(1); + return NULL; +} + +static const char *verify_required_directory(const char *dir) +{ + if (chdir(dir) == -1) + fatal("Cannot change directory to '%s'", dir); + + DIR *d = opendir(dir); + if (!d) + fatal("Cannot examine the contents of directory '%s'", dir); + closedir(d); + + return dir; +} + +/* + * This is called after the rrdinit + * These values will be sent on the START event + */ +void set_late_global_environment() +{ + analytics_set_data(&analytics_data.netdata_config_stream_enabled, default_rrdpush_enabled ? "true" : "false"); + analytics_set_data_str(&analytics_data.netdata_config_memory_mode, (char *)rrd_memory_mode_name(default_rrd_memory_mode)); + +#ifdef DISABLE_CLOUD + analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "false"); +#else + analytics_set_data( + &analytics_data.netdata_host_cloud_enabled, + appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", CONFIG_BOOLEAN_YES) ? "true" : "false"); +#endif + +#ifdef ENABLE_DBENGINE + { + char b[16]; + snprintfz(b, 15, "%d", default_rrdeng_page_cache_mb); + analytics_set_data(&analytics_data.netdata_config_page_cache_size, b); + + snprintfz(b, 15, "%d", default_multidb_disk_quota_mb); + analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, b); + } +#endif + +#ifdef ENABLE_HTTPS + analytics_set_data(&analytics_data.netdata_config_https_enabled, "true"); +#else + analytics_set_data(&analytics_data.netdata_config_https_enabled, "false"); +#endif + + if (web_server_mode == WEB_SERVER_MODE_NONE) + analytics_set_data(&analytics_data.netdata_config_web_enabled, "false"); + else + analytics_set_data(&analytics_data.netdata_config_web_enabled, "true"); + + analytics_set_data_str(&analytics_data.netdata_config_release_channel, (char *)get_release_channel()); + + { + BUFFER *bi = buffer_create(1000); + analytics_build_info(bi); + analytics_set_data_str(&analytics_data.netdata_buildinfo, (char *)buffer_tostring(bi)); + buffer_free(bi); + } + + analytics_get_install_type(); +} + +void get_system_timezone(void) +{ + // avoid flood calls to stat(/etc/localtime) + // http://stackoverflow.com/questions/4554271/how-to-avoid-excessive-stat-etc-localtime-calls-in-strftime-on-linux + const char *tz = getenv("TZ"); + if (!tz || !*tz) + setenv("TZ", config_get(CONFIG_SECTION_ENV_VARS, "TZ", ":/etc/localtime"), 0); + + char buffer[FILENAME_MAX + 1] = ""; + const char *timezone = NULL; + ssize_t ret; + + // use the TZ variable + if (tz && *tz && *tz != ':') { + timezone = tz; + info("TIMEZONE: using TZ variable '%s'", timezone); + } + + // use the contents of /etc/timezone + if (!timezone && !read_file("/etc/timezone", buffer, FILENAME_MAX)) { + timezone = buffer; + info("TIMEZONE: using the contents of /etc/timezone"); + } + + // read the link /etc/localtime + if (!timezone) { + ret = readlink("/etc/localtime", buffer, FILENAME_MAX); + + if (ret > 0) { + buffer[ret] = '\0'; + + char *cmp = "/usr/share/zoneinfo/"; + size_t cmp_len = strlen(cmp); + + char *s = strstr(buffer, cmp); + if (s && s[cmp_len]) { + timezone = &s[cmp_len]; + info("TIMEZONE: using the link of /etc/localtime: '%s'", timezone); + } + } else + buffer[0] = '\0'; + } + + // find the timezone from strftime() + if (!timezone) { + time_t t; + struct tm *tmp, tmbuf; + + t = now_realtime_sec(); + tmp = localtime_r(&t, &tmbuf); + + if (tmp != NULL) { + if (strftime(buffer, FILENAME_MAX, "%Z", tmp) == 0) + buffer[0] = '\0'; + else { + buffer[FILENAME_MAX] = '\0'; + timezone = buffer; + info("TIMEZONE: using strftime(): '%s'", timezone); + } + } + } + + if (timezone && *timezone) { + // make sure it does not have illegal characters + // info("TIMEZONE: fixing '%s'", timezone); + + size_t len = strlen(timezone); + char tmp[len + 1]; + char *d = tmp; + *d = '\0'; + + while (*timezone) { + if (isalnum(*timezone) || *timezone == '_' || *timezone == '/') + *d++ = *timezone++; + else + timezone++; + } + *d = '\0'; + strncpyz(buffer, tmp, len); + timezone = buffer; + info("TIMEZONE: fixed as '%s'", timezone); + } + + if (!timezone || !*timezone) + timezone = "unknown"; + + netdata_configured_timezone = config_get(CONFIG_SECTION_GLOBAL, "timezone", timezone); + + //get the utc offset, and the timezone as returned by strftime + //will be sent to the cloud + //Note: This will need an agent restart to get new offset on time change (dst, etc). + { + time_t t; + struct tm *tmp, tmbuf; + char zone[FILENAME_MAX + 1]; + char sign[2], hh[3], mm[3]; + + t = now_realtime_sec(); + tmp = localtime_r(&t, &tmbuf); + + if (tmp != NULL) { + if (strftime(zone, FILENAME_MAX, "%Z", tmp) == 0) { + netdata_configured_abbrev_timezone = strdupz("UTC"); + } else + netdata_configured_abbrev_timezone = strdupz(zone); + + if (strftime(zone, FILENAME_MAX, "%z", tmp) == 0) { + netdata_configured_utc_offset = 0; + } else { + sign[0] = zone[0] == '-' || zone[0] == '+' ? zone[0] : '0'; + sign[1] = '\0'; + hh[0] = isdigit(zone[1]) ? zone[1] : '0'; + hh[1] = isdigit(zone[2]) ? zone[2] : '0'; + hh[2] = '\0'; + mm[0] = isdigit(zone[3]) ? zone[3] : '0'; + mm[1] = isdigit(zone[4]) ? zone[4] : '0'; + mm[2] = '\0'; + + netdata_configured_utc_offset = (str2i(hh) * 3600) + (str2i(mm) * 60); + netdata_configured_utc_offset = + sign[0] == '-' ? -netdata_configured_utc_offset : netdata_configured_utc_offset; + } + } else { + netdata_configured_abbrev_timezone = strdupz("UTC"); + netdata_configured_utc_offset = 0; + } + } +} + +void set_global_environment() +{ + { + char b[16]; + snprintfz(b, 15, "%d", default_rrd_update_every); + setenv("NETDATA_UPDATE_EVERY", b, 1); + } + + setenv("NETDATA_VERSION", program_version, 1); + setenv("NETDATA_HOSTNAME", netdata_configured_hostname, 1); + setenv("NETDATA_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); + setenv("NETDATA_USER_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); + setenv("NETDATA_STOCK_CONFIG_DIR", verify_required_directory(netdata_configured_stock_config_dir), 1); + setenv("NETDATA_PLUGINS_DIR", verify_required_directory(netdata_configured_primary_plugins_dir), 1); + setenv("NETDATA_WEB_DIR", verify_required_directory(netdata_configured_web_dir), 1); + setenv("NETDATA_CACHE_DIR", verify_required_directory(netdata_configured_cache_dir), 1); + setenv("NETDATA_LIB_DIR", verify_required_directory(netdata_configured_varlib_dir), 1); + setenv("NETDATA_LOCK_DIR", netdata_configured_lock_dir, 1); + setenv("NETDATA_LOG_DIR", verify_required_directory(netdata_configured_log_dir), 1); + setenv("HOME", verify_required_directory(netdata_configured_home_dir), 1); + setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1); + + { + BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX); + + for (size_t i = 1; i < PLUGINSD_MAX_DIRECTORIES && plugin_directories[i]; i++) { + if (i > 1) + buffer_strcat(user_plugins_dirs, " "); + buffer_strcat(user_plugins_dirs, plugin_directories[i]); + } + + setenv("NETDATA_USER_PLUGINS_DIRS", buffer_tostring(user_plugins_dirs), 1); + + buffer_free(user_plugins_dirs); + } + + analytics_data.data_length = 0; + analytics_set_data(&analytics_data.netdata_config_stream_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_memory_mode, "null"); + analytics_set_data(&analytics_data.netdata_config_exporting_enabled, "null"); + analytics_set_data(&analytics_data.netdata_exporting_connectors, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, "null"); + analytics_set_data(&analytics_data.netdata_dashboard_used, "null"); + analytics_set_data(&analytics_data.netdata_collectors, "null"); + analytics_set_data(&analytics_data.netdata_collectors_count, "null"); + analytics_set_data(&analytics_data.netdata_buildinfo, "null"); + analytics_set_data(&analytics_data.netdata_config_page_cache_size, "null"); + analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, "null"); + analytics_set_data(&analytics_data.netdata_config_https_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_web_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_release_channel, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_host_count, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, "null"); + analytics_set_data(&analytics_data.netdata_notification_methods, "null"); + analytics_set_data(&analytics_data.netdata_alarms_normal, "null"); + analytics_set_data(&analytics_data.netdata_alarms_warning, "null"); + analytics_set_data(&analytics_data.netdata_alarms_critical, "null"); + analytics_set_data(&analytics_data.netdata_charts_count, "null"); + analytics_set_data(&analytics_data.netdata_metrics_count, "null"); + analytics_set_data(&analytics_data.netdata_config_is_parent, "null"); + analytics_set_data(&analytics_data.netdata_config_hosts_available, "null"); + analytics_set_data(&analytics_data.netdata_host_cloud_available, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_implementation, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_available, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_protocol, "null"); + analytics_set_data(&analytics_data.netdata_host_agent_claimed, "null"); + analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_https_available, "null"); + analytics_set_data(&analytics_data.netdata_install_type, "null"); + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "null"); + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "null"); + analytics_set_data(&analytics_data.netdata_config_oom_score, "null"); + analytics_set_data(&analytics_data.netdata_prebuilt_distro, "null"); + + analytics_data.prometheus_hits = 0; + analytics_data.shell_hits = 0; + analytics_data.json_hits = 0; + analytics_data.dashboard_hits = 0; + + char *default_port = appconfig_get(&netdata_config, CONFIG_SECTION_WEB, "default port", NULL); + int clean = 0; + if (!default_port) { + default_port = strdupz("19999"); + clean = 1; + } + + setenv("NETDATA_LISTEN_PORT", default_port, 1); + if (clean) + freez(default_port); + + // set the path we need + char path[1024 + 1], *p = getenv("PATH"); + if (!p) + p = "/bin:/usr/bin"; + snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"); + setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1); + + // python options + p = getenv("PYTHONPATH"); + if (!p) + p = ""; + setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1); + + // disable buffering for python plugins + setenv("PYTHONUNBUFFERED", "1", 1); + + // switch to standard locale for plugins + setenv("LC_ALL", "C", 1); +} + +void send_statistics(const char *action, const char *action_result, const char *action_data) +{ + static char *as_script; + + if (netdata_anonymous_statistics_enabled == -1) { + char *optout_file = mallocz( + sizeof(char) * + (strlen(netdata_configured_user_config_dir) + strlen(".opt-out-from-anonymous-statistics") + 2)); + sprintf(optout_file, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + if (likely(access(optout_file, R_OK) != 0)) { + as_script = mallocz( + sizeof(char) * + (strlen(netdata_configured_primary_plugins_dir) + strlen("anonymous-statistics.sh") + 2)); + sprintf(as_script, "%s/%s", netdata_configured_primary_plugins_dir, "anonymous-statistics.sh"); + if (unlikely(access(as_script, R_OK) != 0)) { + netdata_anonymous_statistics_enabled = 0; + info("Anonymous statistics script %s not found.", as_script); + freez(as_script); + } else { + netdata_anonymous_statistics_enabled = 1; + } + } else { + netdata_anonymous_statistics_enabled = 0; + as_script = NULL; + } + freez(optout_file); + } + if (!netdata_anonymous_statistics_enabled) + return; + if (!action) + return; + if (!action_result) + action_result = ""; + if (!action_data) + action_data = ""; + char *command_to_run = mallocz( + sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); + pid_t command_pid; + + sprintf( + command_to_run, + "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", + as_script, + action, + action_result, + action_data, + analytics_data.netdata_config_stream_enabled, + analytics_data.netdata_config_memory_mode, + analytics_data.netdata_config_exporting_enabled, + analytics_data.netdata_exporting_connectors, + analytics_data.netdata_allmetrics_prometheus_used, + analytics_data.netdata_allmetrics_shell_used, + analytics_data.netdata_allmetrics_json_used, + analytics_data.netdata_dashboard_used, + analytics_data.netdata_collectors, + analytics_data.netdata_collectors_count, + analytics_data.netdata_buildinfo, + analytics_data.netdata_config_page_cache_size, + analytics_data.netdata_config_multidb_disk_quota, + analytics_data.netdata_config_https_enabled, + analytics_data.netdata_config_web_enabled, + analytics_data.netdata_config_release_channel, + analytics_data.netdata_mirrored_host_count, + analytics_data.netdata_mirrored_hosts_reachable, + analytics_data.netdata_mirrored_hosts_unreachable, + analytics_data.netdata_notification_methods, + analytics_data.netdata_alarms_normal, + analytics_data.netdata_alarms_warning, + analytics_data.netdata_alarms_critical, + analytics_data.netdata_charts_count, + analytics_data.netdata_metrics_count, + analytics_data.netdata_config_is_parent, + analytics_data.netdata_config_hosts_available, + analytics_data.netdata_host_cloud_available, + analytics_data.netdata_host_aclk_available, + analytics_data.netdata_host_aclk_protocol, + analytics_data.netdata_host_aclk_implementation, + analytics_data.netdata_host_agent_claimed, + analytics_data.netdata_host_cloud_enabled, + analytics_data.netdata_config_https_available, + analytics_data.netdata_install_type, + analytics_data.netdata_config_is_private_registry, + analytics_data.netdata_config_use_private_registry, + analytics_data.netdata_config_oom_score, + analytics_data.netdata_prebuilt_distro); + + info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data); + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); + if (fp_child_output) { + char buffer[4 + 1]; + char *s = fgets(buffer, 4, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); + if (exit_code) + error("Execution of anonymous statistics script returned %d.", exit_code); + if (s && strncmp(buffer, "200", 3)) + error("Execution of anonymous statistics script returned http code %s.", buffer); + } else { + error("Failed to run anonymous statistics script %s.", as_script); + } + freez(command_to_run); +} diff --git a/daemon/analytics.h b/daemon/analytics.h new file mode 100644 index 0000000..d1ffcec --- /dev/null +++ b/daemon/analytics.h @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ANALYTICS_H +#define NETDATA_ANALYTICS_H 1 + +#include "daemon/common.h" + +/* Max number of seconds before the first META analytics is sent */ +#define ANALYTICS_INIT_SLEEP_SEC 120 + +/* Send a META event every X seconds */ +#define ANALYTICS_HEARTBEAT 7200 + +/* Maximum number of hits to log */ +#define ANALYTICS_MAX_PROMETHEUS_HITS 255 +#define ANALYTICS_MAX_SHELL_HITS 255 +#define ANALYTICS_MAX_JSON_HITS 255 +#define ANALYTICS_MAX_DASHBOARD_HITS 255 + +/* Needed to calculate the space needed for parameters */ +#define ANALYTICS_NO_OF_ITEMS 39 + +struct analytics_data { + char *netdata_config_stream_enabled; + char *netdata_config_memory_mode; + char *netdata_exporting_connectors; + char *netdata_config_exporting_enabled; + char *netdata_allmetrics_prometheus_used; + char *netdata_allmetrics_shell_used; + char *netdata_allmetrics_json_used; + char *netdata_dashboard_used; + char *netdata_collectors; + char *netdata_collectors_count; + char *netdata_buildinfo; + char *netdata_config_page_cache_size; + char *netdata_config_multidb_disk_quota; + char *netdata_config_https_enabled; + char *netdata_config_web_enabled; + char *netdata_config_release_channel; + char *netdata_mirrored_host_count; + char *netdata_mirrored_hosts_reachable; + char *netdata_mirrored_hosts_unreachable; + char *netdata_notification_methods; + char *netdata_alarms_normal; + char *netdata_alarms_warning; + char *netdata_alarms_critical; + char *netdata_charts_count; + char *netdata_metrics_count; + char *netdata_config_is_parent; + char *netdata_config_hosts_available; + char *netdata_host_cloud_available; + char *netdata_host_aclk_available; + char *netdata_host_aclk_protocol; + char *netdata_host_aclk_implementation; + char *netdata_host_agent_claimed; + char *netdata_host_cloud_enabled; + char *netdata_config_https_available; + char *netdata_install_type; + char *netdata_config_is_private_registry; + char *netdata_config_use_private_registry; + char *netdata_config_oom_score; + char *netdata_prebuilt_distro; + + size_t data_length; + + uint8_t prometheus_hits; + uint8_t shell_hits; + uint8_t json_hits; + uint8_t dashboard_hits; +}; + +void analytics_get_data(char *name, BUFFER *wb); +void set_late_global_environment(void); +void analytics_free_data(void); +void set_global_environment(void); +void send_statistics(const char *action, const char *action_result, const char *action_data); +void analytics_log_shell(void); +void analytics_log_json(void); +void analytics_log_prometheus(void); +void analytics_log_dashboard(void); +void analytics_gather_mutable_meta_data(void); +void analytics_report_oom_score(long long int score); +void get_system_timezone(void); + +extern struct analytics_data analytics_data; + +#endif //NETDATA_ANALYTICS_H diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in new file mode 100755 index 0000000..9f8df18 --- /dev/null +++ b/daemon/anonymous-statistics.sh.in @@ -0,0 +1,177 @@ +#!/usr/bin/env sh + +# Valid actions: + +# - FATAL - netdata exited due to a fatal condition +# ACTION_RESULT -- program name and thread tag +# ACTION_DATA -- fmt, args passed to fatal +# - START - netdata started +# ACTION_DATA -- nan +# - EXIT - installation action +# ACTION_DATA -- ret value of + +ACTION="${1}" +ACTION_RESULT="${2}" +ACTION_DATA="${3}" +ACTION_DATA=$(echo "${ACTION_DATA}" | tr '"' "'") + +# ------------------------------------------------------------------------------------------------- +# check opt-out + +if [ -f "@configdir_POST@/.opt-out-from-anonymous-statistics" ] || + [ ! "${DISABLE_TELEMETRY:-0}" -eq 0 ] || + [ -n "$DISABLE_TELEMETRY" ] || + [ ! "${DO_NOT_TRACK:-0}" -eq 0 ] || + [ -n "$DO_NOT_TRACK" ]; then + exit 0 +fi + +# ------------------------------------------------------------------------------------------------- +# Get the extra variables + +NETDATA_CONFIG_STREAM_ENABLED="${4}" +NETDATA_CONFIG_MEMORY_MODE="${5}" +NETDATA_CONFIG_EXPORTING_ENABLED="${6}" +NETDATA_EXPORTING_CONNECTORS="${7}" +NETDATA_ALLMETRICS_PROMETHEUS_USED="${8}" +NETDATA_ALLMETRICS_SHELL_USED="${9}" +NETDATA_ALLMETRICS_JSON_USED="${10}" +NETDATA_DASHBOARD_USED="${11}" +NETDATA_COLLECTORS="${12}" +NETDATA_COLLECTORS_COUNT="${13}" +NETDATA_BUILDINFO="${14}" +NETDATA_CONFIG_PAGE_CACHE_SIZE="${15}" +NETDATA_CONFIG_MULTIDB_DISK_QUOTA="${16}" +NETDATA_CONFIG_HTTPS_ENABLED="${17}" +NETDATA_CONFIG_WEB_ENABLED="${18}" +NETDATA_CONFIG_RELEASE_CHANNEL="${19}" +NETDATA_MIRRORED_HOST_COUNT="${20}" +NETDATA_MIRRORED_HOSTS_REACHABLE="${21}" +NETDATA_MIRRORED_HOSTS_UNREACHABLE="${22}" +NETDATA_NOTIFICATION_METHODS="${23}" +NETDATA_ALARMS_NORMAL="${24}" +NETDATA_ALARMS_WARNING="${25}" +NETDATA_ALARMS_CRITICAL="${26}" +NETDATA_CHARTS_COUNT="${27}" +NETDATA_METRICS_COUNT="${28}" +NETDATA_CONFIG_IS_PARENT="${29}" +NETDATA_CONFIG_HOSTS_AVAILABLE="${30}" +NETDATA_HOST_CLOUD_AVAILABLE="${31}" +NETDATA_HOST_ACLK_AVAILABLE="${32}" +NETDATA_HOST_ACLK_PROTOCOL="${33}" +NETDATA_HOST_ACLK_IMPLEMENTATION="${34}" +NETDATA_HOST_AGENT_CLAIMED="${35}" +NETDATA_HOST_CLOUD_ENABLED="${36}" +NETDATA_CONFIG_HTTPS_AVAILABLE="${37}" +NETDATA_INSTALL_TYPE="${38}" +NETDATA_IS_PRIVATE_REGISTRY="${39}" +NETDATA_USE_PRIVATE_REGISTRY="${40}" +NETDATA_CONFIG_OOM_SCORE="${41}" +NETDATA_PREBUILT_DISTRO="${42}" + + +# define body of request to be sent +REQ_BODY="$(cat << EOF +{ + "api_key": "mqkwGT0JNFqO-zX2t0mW6Tec9yooaVu7xCBlXtHnt5Y", + "event": "${ACTION} ${ACTION_RESULT}", + "properties": { + "distinct_id": "${NETDATA_REGISTRY_UNIQUE_ID}", + "\$current_url": "agent backend", + "\$pathname": "netdata-backend", + "\$host": "backend.netdata.io", + "\$ip": "127.0.0.1", + "event_source": "agent backend", + "action": "${ACTION}", + "action_result": "${ACTION_RESULT}", + "action_data": "${ACTION_DATA}", + "netdata_machine_guid": "${NETDATA_REGISTRY_UNIQUE_ID}", + "netdata_version": "${NETDATA_VERSION}", + "netdata_buildinfo": ${NETDATA_BUILDINFO}, + "netdata_release_channel": ${NETDATA_CONFIG_RELEASE_CHANNEL}, + "netdata_install_type": ${NETDATA_INSTALL_TYPE}, + "netdata_prebuilt_distro": ${NETDATA_PREBUILT_DISTRO}, + "host_os_name": "${NETDATA_HOST_OS_NAME}", + "host_os_id": "${NETDATA_HOST_OS_ID}", + "host_os_id_like": "${NETDATA_HOST_OS_ID_LIKE}", + "host_os_version": "${NETDATA_HOST_OS_VERSION}", + "host_os_version_id": "${NETDATA_HOST_OS_VERSION_ID}", + "host_os_detection": "${NETDATA_HOST_OS_DETECTION}", + "host_is_k8s_node": "${NETDATA_HOST_IS_K8S_NODE}", + "system_kernel_name": "${NETDATA_SYSTEM_KERNEL_NAME}", + "system_kernel_version": "${NETDATA_SYSTEM_KERNEL_VERSION}", + "system_architecture": "${NETDATA_SYSTEM_ARCHITECTURE}", + "system_virtualization": "${NETDATA_SYSTEM_VIRTUALIZATION}", + "system_virt_detection": "${NETDATA_SYSTEM_VIRT_DETECTION}", + "system_container": "${NETDATA_SYSTEM_CONTAINER}", + "system_container_detection": "${NETDATA_SYSTEM_CONTAINER_DETECTION}", + "container_os_name": "${NETDATA_CONTAINER_OS_NAME}", + "container_os_id": "${NETDATA_CONTAINER_OS_ID}", + "container_os_id_like": "${NETDATA_CONTAINER_OS_ID_LIKE}", + "container_os_version": "${NETDATA_CONTAINER_OS_VERSION}", + "container_os_version_id": "${NETDATA_CONTAINER_OS_VERSION_ID}", + "container_os_detection": "${NETDATA_CONTAINER_OS_DETECTION}", + "container_is_official_image": ${NETDATA_CONTAINER_IS_OFFICIAL_IMAGE}, + "system_cpu_detection": "${NETDATA_SYSTEM_CPU_DETECTION}", + "system_cpu_freq": "${NETDATA_SYSTEM_CPU_FREQ}", + "system_cpu_logical_cpu_count": "${NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT}", + "system_cpu_model": "${NETDATA_SYSTEM_CPU_MODEL}", + "system_cpu_vendor": "${NETDATA_SYSTEM_CPU_VENDOR}", + "system_disk_detection": "${NETDATA_SYSTEM_DISK_DETECTION}", + "system_ram_detection": "${NETDATA_SYSTEM_RAM_DETECTION}", + "system_total_disk_size": "${NETDATA_SYSTEM_TOTAL_DISK_SIZE}", + "system_total_ram": "${NETDATA_SYSTEM_TOTAL_RAM}", + "config_stream_enabled": ${NETDATA_CONFIG_STREAM_ENABLED}, + "config_memory_mode": ${NETDATA_CONFIG_MEMORY_MODE}, + "config_page_cache_size": ${NETDATA_CONFIG_PAGE_CACHE_SIZE}, + "config_multidb_disk_quota": ${NETDATA_CONFIG_MULTIDB_DISK_QUOTA}, + "config_https_enabled": ${NETDATA_CONFIG_HTTPS_ENABLED}, + "config_https_available": ${NETDATA_CONFIG_HTTPS_AVAILABLE}, + "config_web_enabled": ${NETDATA_CONFIG_WEB_ENABLED}, + "config_exporting_enabled": ${NETDATA_CONFIG_EXPORTING_ENABLED}, + "config_is_parent": ${NETDATA_CONFIG_IS_PARENT}, + "config_is_private_registry": ${NETDATA_IS_PRIVATE_REGISTRY}, + "config_private_registry_used": ${NETDATA_USE_PRIVATE_REGISTRY}, + "config_hosts_available": ${NETDATA_CONFIG_HOSTS_AVAILABLE}, + "config_oom_score": ${NETDATA_CONFIG_OOM_SCORE}, + "alarms_normal": ${NETDATA_ALARMS_NORMAL}, + "alarms_warning": ${NETDATA_ALARMS_WARNING}, + "alarms_critical": ${NETDATA_ALARMS_CRITICAL}, + "host_charts_count": ${NETDATA_CHARTS_COUNT}, + "host_metrics_count": ${NETDATA_METRICS_COUNT}, + "host_collectors":[ + ${NETDATA_COLLECTORS} + ], + "host_collectors_count": ${NETDATA_COLLECTORS_COUNT}, + "host_notification_methods": ${NETDATA_NOTIFICATION_METHODS}, + "host_allmetrics_prometheus_used": ${NETDATA_ALLMETRICS_PROMETHEUS_USED}, + "host_allmetrics_shell_used": ${NETDATA_ALLMETRICS_SHELL_USED}, + "host_allmetrics_json_used": ${NETDATA_ALLMETRICS_JSON_USED}, + "host_dashboard_used": ${NETDATA_DASHBOARD_USED}, + "host_cloud_available": ${NETDATA_HOST_CLOUD_AVAILABLE}, + "host_cloud_enabled": ${NETDATA_HOST_CLOUD_ENABLED}, + "host_agent_claimed": ${NETDATA_HOST_AGENT_CLAIMED}, + "host_aclk_available": ${NETDATA_HOST_ACLK_AVAILABLE}, + "host_aclk_protocol": ${NETDATA_HOST_ACLK_PROTOCOL}, + "host_aclk_implementation": ${NETDATA_HOST_ACLK_IMPLEMENTATION}, + "mirrored_host_count": ${NETDATA_MIRRORED_HOST_COUNT}, + "mirrored_hosts_reachable": ${NETDATA_MIRRORED_HOSTS_REACHABLE}, + "mirrored_hosts_unreachable": ${NETDATA_MIRRORED_HOSTS_UNREACHABLE}, + "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS} + } +} +EOF +)" + +# send the anonymous statistics to the Netdata PostHog +if [ -n "$(command -v curl 2> /dev/null)" ]; then + curl --silent -o /dev/null --write-out '%{http_code}' -X POST --max-time 2 --header "Content-Type: application/json" -d "${REQ_BODY}" https://posthog.netdata.cloud/capture/ +else + wget -q -O - --no-check-certificate \ + --server-response \ + --method POST \ + --timeout=1 \ + --header 'Content-Type: application/json' \ + --body-data "${REQ_BODY}" \ + 'https://posthog.netdata.cloud/capture/' 2>&1 | awk '/^ HTTP/{print $2}' +fi diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c new file mode 100644 index 0000000..ef813a9 --- /dev/null +++ b/daemon/buildinfo.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include "./config.h" +#include "common.h" +#include "buildinfo.h" + +// Optional features + +#ifdef ENABLE_ACLK +#define FEAT_CLOUD 1 +#define FEAT_CLOUD_MSG "" +#else +#ifdef DISABLE_CLOUD +#define FEAT_CLOUD 0 +#define FEAT_CLOUD_MSG "(by user request)" +#else +#define FEAT_CLOUD 0 +#define FEAT_CLOUD_MSG "" +#endif +#endif + +#ifdef ENABLE_DBENGINE +#define FEAT_DBENGINE 1 +#else +#define FEAT_DBENGINE 0 +#endif + +#if defined(HAVE_X509_VERIFY_PARAM_set1_host) && HAVE_X509_VERIFY_PARAM_set1_host == 1 +#define FEAT_TLS_HOST_VERIFY 1 +#else +#define FEAT_TLS_HOST_VERIFY 0 +#endif + +#ifdef ENABLE_HTTPS +#define FEAT_NATIVE_HTTPS 1 +#else +#define FEAT_NATIVE_HTTPS 0 +#endif + +#ifdef ENABLE_ML +#define FEAT_ML 1 +#else +#define FEAT_ML 0 +#endif + +#ifdef ENABLE_COMPRESSION +#define FEAT_STREAM_COMPRESSION 1 +#else +#define FEAT_STREAM_COMPRESSION 0 +#endif //ENABLE_COMPRESSION + + +// Optional libraries + +#ifdef HAVE_PROTOBUF +#define FEAT_PROTOBUF 1 +#ifdef BUNDLED_PROTOBUF +#define FEAT_PROTOBUF_BUNDLED " (bundled)" +#else +#define FEAT_PROTOBUF_BUNDLED " (system)" +#endif +#else +#define FEAT_PROTOBUF 0 +#define FEAT_PROTOBUF_BUNDLED "" +#endif + +#ifdef ENABLE_JSONC +#define FEAT_JSONC 1 +#else +#define FEAT_JSONC 0 +#endif + +#ifdef ENABLE_JEMALLOC +#define FEAT_JEMALLOC 1 +#else +#define FEAT_JEMALLOC 0 +#endif + +#ifdef ENABLE_TCMALLOC +#define FEAT_TCMALLOC 1 +#else +#define FEAT_TCMALLOC 0 +#endif + +#ifdef HAVE_CAPABILITY +#define FEAT_LIBCAP 1 +#else +#define FEAT_LIBCAP 0 +#endif + +#ifdef NETDATA_WITH_ZLIB +#define FEAT_ZLIB 1 +#else +#define FEAT_ZLIB 0 +#endif + +#ifdef STORAGE_WITH_MATH +#define FEAT_LIBM 1 +#else +#define FEAT_LIBM 0 +#endif + +#ifdef HAVE_CRYPTO +#define FEAT_CRYPTO 1 +#else +#define FEAT_CRYPTO 0 +#endif + +// Optional plugins + +#ifdef ENABLE_APPS_PLUGIN +#define FEAT_APPS_PLUGIN 1 +#else +#define FEAT_APPS_PLUGIN 0 +#endif + +#ifdef HAVE_FREEIPMI +#define FEAT_IPMI 1 +#else +#define FEAT_IPMI 0 +#endif + +#ifdef HAVE_CUPS +#define FEAT_CUPS 1 +#else +#define FEAT_CUPS 0 +#endif + +#ifdef HAVE_NFACCT +#define FEAT_NFACCT 1 +#else +#define FEAT_NFACCT 0 +#endif + +#ifdef HAVE_LIBXENSTAT +#define FEAT_XEN 1 +#else +#define FEAT_XEN 0 +#endif + +#ifdef HAVE_XENSTAT_VBD_ERROR +#define FEAT_XEN_VBD_ERROR 1 +#else +#define FEAT_XEN_VBD_ERROR 0 +#endif + +#ifdef HAVE_LIBBPF +#define FEAT_EBPF 1 +#else +#define FEAT_EBPF 0 +#endif + +#ifdef HAVE_SETNS +#define FEAT_CGROUP_NET 1 +#else +#define FEAT_CGROUP_NET 0 +#endif + +#ifdef ENABLE_PERF_PLUGIN +#define FEAT_PERF 1 +#else +#define FEAT_PERF 0 +#endif + +#ifdef ENABLE_SLABINFO +#define FEAT_SLABINFO 1 +#else +#define FEAT_SLABINFO 0 +#endif + +// Optional Exporters + +#ifdef HAVE_KINESIS +#define FEAT_KINESIS 1 +#else +#define FEAT_KINESIS 0 +#endif + +#ifdef ENABLE_EXPORTING_PUBSUB +#define FEAT_PUBSUB 1 +#else +#define FEAT_PUBSUB 0 +#endif + +#ifdef HAVE_MONGOC +#define FEAT_MONGO 1 +#else +#define FEAT_MONGO 0 +#endif + +#ifdef ENABLE_PROMETHEUS_REMOTE_WRITE +#define FEAT_REMOTE_WRITE 1 +#else +#define FEAT_REMOTE_WRITE 0 +#endif + +#define FEAT_YES_NO(x) ((x) ? "YES" : "NO") + +#ifdef NETDATA_TRACE_ALLOCATIONS +#define FEAT_TRACE_ALLOC 1 +#else +#define FEAT_TRACE_ALLOC 0 +#endif + +char *get_value_from_key(char *buffer, char *key) { + char *s = NULL, *t = NULL; + s = t = buffer + strlen(key) + 2; + if (s) { + while (*s == '\'') + s++; + while (*++t != '\0'); + while (--t > s && *t == '\'') + *t = '\0'; + } + return s; +} + +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist) { + char *install_type_filename; + + int install_type_filename_len = (strlen(netdata_configured_user_config_dir) + strlen(".install-type") + 3); + install_type_filename = mallocz(sizeof(char) * install_type_filename_len); + snprintfz(install_type_filename, install_type_filename_len - 1, "%s/%s", netdata_configured_user_config_dir, ".install-type"); + + FILE *fp = fopen(install_type_filename, "r"); + if (fp) { + char *s, buf[256 + 1]; + size_t len = 0; + + while ((s = fgets_trim_len(buf, 256, fp, &len))) { + if (!strncmp(buf, "INSTALL_TYPE='", 14)) + *install_type = strdupz((char *)get_value_from_key(buf, "INSTALL_TYPE")); + else if (!strncmp(buf, "PREBUILT_ARCH='", 15)) + *prebuilt_arch = strdupz((char *)get_value_from_key(buf, "PREBUILT_ARCH")); + else if (!strncmp(buf, "PREBUILT_DISTRO='", 17)) + *prebuilt_dist = strdupz((char *)get_value_from_key(buf, "PREBUILT_DISTRO")); + } + fclose(fp); + } + freez(install_type_filename); +} + +void print_build_info(void) { + char *install_type = NULL; + char *prebuilt_arch = NULL; + char *prebuilt_distro = NULL; + get_install_type(&install_type, &prebuilt_arch, &prebuilt_distro); + + printf("Configure options: %s\n", CONFIGURE_COMMAND); + + if (install_type == NULL) { + printf("Install type: unknown\n"); + } else { + printf("Install type: %s\n", install_type); + } + + if (prebuilt_arch != NULL) { + printf(" Binary architecture: %s\n", prebuilt_arch); + } + + if (prebuilt_distro != NULL) { + printf(" Packaging distro: %s\n", prebuilt_distro); + } + + freez(install_type); + freez(prebuilt_arch); + freez(prebuilt_distro); + + printf("Features:\n"); + printf(" dbengine: %s\n", FEAT_YES_NO(FEAT_DBENGINE)); + printf(" Native HTTPS: %s\n", FEAT_YES_NO(FEAT_NATIVE_HTTPS)); + printf(" Netdata Cloud: %s %s\n", FEAT_YES_NO(FEAT_CLOUD), FEAT_CLOUD_MSG); + printf(" ACLK: %s\n", FEAT_YES_NO(FEAT_CLOUD)); + printf(" TLS Host Verification: %s\n", FEAT_YES_NO(FEAT_TLS_HOST_VERIFY)); + printf(" Machine Learning: %s\n", FEAT_YES_NO(FEAT_ML)); + printf(" Stream Compression: %s\n", FEAT_YES_NO(FEAT_STREAM_COMPRESSION)); + + printf("Libraries:\n"); + printf(" protobuf: %s%s\n", FEAT_YES_NO(FEAT_PROTOBUF), FEAT_PROTOBUF_BUNDLED); + printf(" jemalloc: %s\n", FEAT_YES_NO(FEAT_JEMALLOC)); + printf(" JSON-C: %s\n", FEAT_YES_NO(FEAT_JSONC)); + printf(" libcap: %s\n", FEAT_YES_NO(FEAT_LIBCAP)); + printf(" libcrypto: %s\n", FEAT_YES_NO(FEAT_CRYPTO)); + printf(" libm: %s\n", FEAT_YES_NO(FEAT_LIBM)); + printf(" tcalloc: %s\n", FEAT_YES_NO(FEAT_TCMALLOC)); + printf(" zlib: %s\n", FEAT_YES_NO(FEAT_ZLIB)); + + printf("Plugins:\n"); + printf(" apps: %s\n", FEAT_YES_NO(FEAT_APPS_PLUGIN)); + printf(" cgroup Network Tracking: %s\n", FEAT_YES_NO(FEAT_CGROUP_NET)); + printf(" CUPS: %s\n", FEAT_YES_NO(FEAT_CUPS)); + printf(" EBPF: %s\n", FEAT_YES_NO(FEAT_EBPF)); + printf(" IPMI: %s\n", FEAT_YES_NO(FEAT_IPMI)); + printf(" NFACCT: %s\n", FEAT_YES_NO(FEAT_NFACCT)); + printf(" perf: %s\n", FEAT_YES_NO(FEAT_PERF)); + printf(" slabinfo: %s\n", FEAT_YES_NO(FEAT_SLABINFO)); + printf(" Xen: %s\n", FEAT_YES_NO(FEAT_XEN)); + printf(" Xen VBD Error Tracking: %s\n", FEAT_YES_NO(FEAT_XEN_VBD_ERROR)); + + printf("Exporters:\n"); + printf(" AWS Kinesis: %s\n", FEAT_YES_NO(FEAT_KINESIS)); + printf(" GCP PubSub: %s\n", FEAT_YES_NO(FEAT_PUBSUB)); + printf(" MongoDB: %s\n", FEAT_YES_NO(FEAT_MONGO)); + printf(" Prometheus Remote Write: %s\n", FEAT_YES_NO(FEAT_REMOTE_WRITE)); + + printf("Debug/Developer Features:\n"); + printf(" Trace Allocations: %s\n", FEAT_YES_NO(FEAT_TRACE_ALLOC)); +}; + +#define FEAT_JSON_BOOL(x) ((x) ? "true" : "false") +// This intentionally does not use JSON-C so it works even if JSON-C is not present +// This is used for anonymous statistics reporting, so it intentionally +// does not include the configure options, which would be very easy to use +// for tracking custom builds (and complicate outputting valid JSON). +void print_build_info_json(void) { + printf("{\n"); + printf(" \"features\": {\n"); + printf(" \"dbengine\": %s,\n", FEAT_JSON_BOOL(FEAT_DBENGINE)); + printf(" \"native-https\": %s,\n", FEAT_JSON_BOOL(FEAT_NATIVE_HTTPS)); + printf(" \"cloud\": %s,\n", FEAT_JSON_BOOL(FEAT_CLOUD)); +#ifdef DISABLE_CLOUD + printf(" \"cloud-disabled\": true,\n"); +#else + printf(" \"cloud-disabled\": false,\n"); +#endif + printf(" \"aclk\": %s,\n", FEAT_JSON_BOOL(FEAT_CLOUD)); + + printf(" \"tls-host-verify\": %s,\n", FEAT_JSON_BOOL(FEAT_TLS_HOST_VERIFY)); + printf(" \"machine-learning\": %s\n", FEAT_JSON_BOOL(FEAT_ML)); + printf(" \"stream-compression\": %s\n", FEAT_JSON_BOOL(FEAT_STREAM_COMPRESSION)); + printf(" },\n"); + + printf(" \"libs\": {\n"); + printf(" \"protobuf\": %s,\n", FEAT_JSON_BOOL(FEAT_PROTOBUF)); + printf(" \"protobuf-source\": \"%s\",\n", FEAT_PROTOBUF_BUNDLED); + printf(" \"jemalloc\": %s,\n", FEAT_JSON_BOOL(FEAT_JEMALLOC)); + printf(" \"jsonc\": %s,\n", FEAT_JSON_BOOL(FEAT_JSONC)); + printf(" \"libcap\": %s,\n", FEAT_JSON_BOOL(FEAT_LIBCAP)); + printf(" \"libcrypto\": %s,\n", FEAT_JSON_BOOL(FEAT_CRYPTO)); + printf(" \"libm\": %s,\n", FEAT_JSON_BOOL(FEAT_LIBM)); + printf(" \"tcmalloc\": %s,\n", FEAT_JSON_BOOL(FEAT_TCMALLOC)); + printf(" \"zlib\": %s\n", FEAT_JSON_BOOL(FEAT_ZLIB)); + printf(" },\n"); + + printf(" \"plugins\": {\n"); + printf(" \"apps\": %s,\n", FEAT_JSON_BOOL(FEAT_APPS_PLUGIN)); + printf(" \"cgroup-net\": %s,\n", FEAT_JSON_BOOL(FEAT_CGROUP_NET)); + printf(" \"cups\": %s,\n", FEAT_JSON_BOOL(FEAT_CUPS)); + printf(" \"ebpf\": %s,\n", FEAT_JSON_BOOL(FEAT_EBPF)); + printf(" \"ipmi\": %s,\n", FEAT_JSON_BOOL(FEAT_IPMI)); + printf(" \"nfacct\": %s,\n", FEAT_JSON_BOOL(FEAT_NFACCT)); + printf(" \"perf\": %s,\n", FEAT_JSON_BOOL(FEAT_PERF)); + printf(" \"slabinfo\": %s,\n", FEAT_JSON_BOOL(FEAT_SLABINFO)); + printf(" \"xen\": %s,\n", FEAT_JSON_BOOL(FEAT_XEN)); + printf(" \"xen-vbd-error\": %s\n", FEAT_JSON_BOOL(FEAT_XEN_VBD_ERROR)); + printf(" },\n"); + + printf(" \"exporters\": {\n"); + printf(" \"kinesis\": %s,\n", FEAT_JSON_BOOL(FEAT_KINESIS)); + printf(" \"pubsub\": %s,\n", FEAT_JSON_BOOL(FEAT_PUBSUB)); + printf(" \"mongodb\": %s,\n", FEAT_JSON_BOOL(FEAT_MONGO)); + printf(" \"prom-remote-write\": %s\n", FEAT_JSON_BOOL(FEAT_REMOTE_WRITE)); + printf(" }\n"); + printf(" \"debug-n-devel\": {\n"); + printf(" \"trace-allocations\": %s\n }\n",FEAT_JSON_BOOL(FEAT_TRACE_ALLOC)); + printf("}\n"); +}; + +#define add_to_bi(buffer, str) \ + { if(first) { \ + buffer_strcat (b, str); \ + first = 0; \ + } else \ + buffer_strcat (b, "|" str); } + +void analytics_build_info(BUFFER *b) { + int first = 1; +#ifdef ENABLE_DBENGINE + add_to_bi(b, "dbengine"); +#endif +#ifdef ENABLE_HTTPS + add_to_bi(b, "Native HTTPS"); +#endif +#ifdef ENABLE_ACLK + add_to_bi(b, "Netdata Cloud"); +#endif +#if (FEAT_TLS_HOST_VERIFY!=0) + add_to_bi(b, "TLS Host Verification"); +#endif +#ifdef ENABLE_ML + add_to_bi(b, "Machine Learning"); +#endif +#ifdef ENABLE_COMPRESSION + add_to_bi(b, "Stream Compression"); +#endif + +#ifdef HAVE_PROTOBUF + add_to_bi(b, "protobuf"); +#endif +#ifdef ENABLE_JEMALLOC + add_to_bi(b, "jemalloc"); +#endif +#ifdef ENABLE_JSONC + add_to_bi(b, "JSON-C"); +#endif +#ifdef HAVE_CAPABILITY + add_to_bi(b, "libcap"); +#endif +#ifdef HAVE_CRYPTO + add_to_bi(b, "libcrypto"); +#endif +#ifdef STORAGE_WITH_MATH + add_to_bi(b, "libm"); +#endif + +#ifdef ENABLE_TCMALLOC + add_to_bi(b, "tcalloc"); +#endif +#ifdef NETDATA_WITH_ZLIB + add_to_bi(b, "zlib"); +#endif + +#ifdef ENABLE_APPS_PLUGIN + add_to_bi(b, "apps"); +#endif +#ifdef HAVE_SETNS + add_to_bi(b, "cgroup Network Tracking"); +#endif +#ifdef HAVE_CUPS + add_to_bi(b, "CUPS"); +#endif +#ifdef HAVE_LIBBPF + add_to_bi(b, "EBPF"); +#endif +#ifdef HAVE_FREEIPMI + add_to_bi(b, "IPMI"); +#endif +#ifdef HAVE_NFACCT + add_to_bi(b, "NFACCT"); +#endif +#ifdef ENABLE_PERF_PLUGIN + add_to_bi(b, "perf"); +#endif +#ifdef ENABLE_SLABINFO + add_to_bi(b, "slabinfo"); +#endif +#ifdef HAVE_LIBXENSTAT + add_to_bi(b, "Xen"); +#endif +#ifdef HAVE_XENSTAT_VBD_ERROR + add_to_bi(b, "Xen VBD Error Tracking"); +#endif + +#ifdef HAVE_KINESIS + add_to_bi(b, "AWS Kinesis"); +#endif +#ifdef ENABLE_EXPORTING_PUBSUB + add_to_bi(b, "GCP PubSub"); +#endif +#ifdef HAVE_MONGOC + add_to_bi(b, "MongoDB"); +#endif +#ifdef ENABLE_PROMETHEUS_REMOTE_WRITE + add_to_bi(b, "Prometheus Remote Write"); +#endif +#ifdef NETDATA_TRACE_ALLOCATIONS + add_to_bi(b, "DebugTraceAlloc"); +#endif +} diff --git a/daemon/buildinfo.h b/daemon/buildinfo.h new file mode 100644 index 0000000..d3b439f --- /dev/null +++ b/daemon/buildinfo.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_BUILDINFO_H +#define NETDATA_BUILDINFO_H 1 + +void print_build_info(void); + +void print_build_info_json(void); + +char *get_value_from_key(char *buffer, char *key); + +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist); + +#endif // NETDATA_BUILDINFO_H diff --git a/daemon/commands.c b/daemon/commands.c new file mode 100644 index 0000000..6288ee5 --- /dev/null +++ b/daemon/commands.c @@ -0,0 +1,736 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static uv_thread_t thread; +static uv_loop_t* loop; +static uv_async_t async; +static struct completion completion; +static uv_pipe_t server_pipe; + +char cmd_prefix_by_status[] = { + CMD_PREFIX_INFO, + CMD_PREFIX_ERROR, + CMD_PREFIX_ERROR +}; + +static int command_server_initialized = 0; +static int command_thread_error; +static int command_thread_shutdown; +static unsigned clients = 0; + +struct command_context { + /* embedded client pipe structure at address 0 */ + uv_pipe_t client; + + uv_work_t work; + uv_write_t write_req; + cmd_t idx; + char *args; + char *message; + cmd_status_t status; + char command_string[MAX_COMMAND_LENGTH]; + unsigned command_string_size; +}; + +/* Forward declarations */ +static cmd_status_t cmd_help_execute(char *args, char **message); +static cmd_status_t cmd_reload_health_execute(char *args, char **message); +static cmd_status_t cmd_save_database_execute(char *args, char **message); +static cmd_status_t cmd_reopen_logs_execute(char *args, char **message); +static cmd_status_t cmd_exit_execute(char *args, char **message); +static cmd_status_t cmd_fatal_execute(char *args, char **message); +static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message); +static cmd_status_t cmd_reload_labels_execute(char *args, char **message); +static cmd_status_t cmd_read_config_execute(char *args, char **message); +static cmd_status_t cmd_write_config_execute(char *args, char **message); +static cmd_status_t cmd_ping_execute(char *args, char **message); +static cmd_status_t cmd_aclk_state(char *args, char **message); + +static command_info_t command_info_array[] = { + {"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu + {"reload-health", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration + {"save-database", cmd_save_database_execute, CMD_TYPE_ORTHOGONAL}, // save database for memory mode save + {"reopen-logs", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files + {"shutdown-agent", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly + {"fatal-agent", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error + {"reload-claiming-state", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL}, // reload claiming state + {"reload-labels", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL}, // reload the labels + {"read-config", cmd_read_config_execute, CMD_TYPE_CONCURRENT}, + {"write-config", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL}, + {"ping", cmd_ping_execute, CMD_TYPE_ORTHOGONAL}, + {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL} +}; + +/* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */ +static uv_mutex_t command_lock_array[CMD_TOTAL_COMMANDS]; +/* Commands of type CMD_TYPE_EXCLUSIVE are writers */ +static uv_rwlock_t exclusive_rwlock; +/* + * Locking order: + * 1. exclusive_rwlock + * 2. command_lock_array[] + */ + +/* Forward declarations */ +static void cmd_lock_exclusive(unsigned index); +static void cmd_lock_orthogonal(unsigned index); +static void cmd_lock_idempotent(unsigned index); +static void cmd_lock_high_priority(unsigned index); + +static command_lock_t *cmd_lock_by_type[] = { + cmd_lock_exclusive, + cmd_lock_orthogonal, + cmd_lock_idempotent, + cmd_lock_high_priority +}; + +/* Forward declarations */ +static void cmd_unlock_exclusive(unsigned index); +static void cmd_unlock_orthogonal(unsigned index); +static void cmd_unlock_idempotent(unsigned index); +static void cmd_unlock_high_priority(unsigned index); + +static command_lock_t *cmd_unlock_by_type[] = { + cmd_unlock_exclusive, + cmd_unlock_orthogonal, + cmd_unlock_idempotent, + cmd_unlock_high_priority +}; + +static cmd_status_t cmd_help_execute(char *args, char **message) +{ + (void)args; + + *message = mallocz(MAX_COMMAND_LENGTH); + strncpyz(*message, + "\nThe commands are (arguments are in brackets):\n" + "help\n" + " Show this help menu.\n" + "reload-health\n" + " Reload health configuration.\n" + "reload-labels\n" + " Reload all labels.\n" + "save-database\n" + " Save internal DB to disk for memory mode save.\n" + "reopen-logs\n" + " Close and reopen log files.\n" + "shutdown-agent\n" + " Cleanup and exit the netdata agent.\n" + "fatal-agent\n" + " Log the state and halt the netdata agent.\n" + "reload-claiming-state\n" + " Reload agent claiming state from disk.\n" + "ping\n" + " Return with 'pong' if agent is alive.\n" + "aclk-state [json]\n" + " Returns current state of ACLK and Cloud connection. (optionally in json)\n", + MAX_COMMAND_LENGTH - 1); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_health_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Reloading HEALTH configuration."); + health_reload(); + error_log_limit_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_save_database_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Saving databases."); + rrdhost_save_all(); + info("COMMAND: Databases saved."); + error_log_limit_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reopen_logs_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Reopening all log files."); + reopen_all_log_files(); + error_log_limit_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_exit_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Cleaning up to exit."); + netdata_cleanup_and_exit(0); + exit(0); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_fatal_execute(char *args, char **message) +{ + (void)args; + (void)message; + + fatal("COMMAND: netdata now exits."); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message) +{ + (void)args; + (void)message; +#if defined(DISABLE_CLOUD) || !defined(ENABLE_ACLK) + info("The claiming feature has been explicitly disabled"); + *message = strdupz("This agent cannot be claimed, it was built without support for Cloud"); + return CMD_STATUS_FAILURE; +#endif + error_log_limit_unlimited(); + info("COMMAND: Reloading Agent Claiming configuration."); + load_claiming_state(); + registry_update_cloud_base_url(); + rrdpush_claimed_id(localhost); + error_log_limit_reset(); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_labels_execute(char *args, char **message) +{ + (void)args; + info("COMMAND: reloading host labels."); + reload_host_labels(); + + BUFFER *wb = buffer_create(10); + rrdlabels_log_to_buffer(localhost->rrdlabels, wb); + (*message)=strdupz(buffer_tostring(wb)); + buffer_free(wb); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_read_config_execute(char *args, char **message) +{ + size_t n = strlen(args); + char *separator = strchr(args,'|'); + if (separator == NULL) + return CMD_STATUS_FAILURE; + char *separator2 = strchr(separator + 1,'|'); + if (separator2 == NULL) + return CMD_STATUS_FAILURE; + + char *temp = callocz(n + 1, 1); + strcpy(temp, args); + size_t offset = separator - args; + temp[offset] = 0; + size_t offset2 = separator2 - args; + temp[offset2] = 0; + + const char *conf_file = temp; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + + char *value = appconfig_get(tmp_config, temp + offset + 1, temp + offset2 + 1, NULL); + if (value == NULL) + { + error("Cannot execute read-config conf_file=%s section=%s / key=%s because no value set", conf_file, + temp + offset + 1, temp + offset2 + 1); + freez(temp); + return CMD_STATUS_FAILURE; + } + else + { + (*message) = strdupz(value); + freez(temp); + return CMD_STATUS_SUCCESS; + } + +} + +static cmd_status_t cmd_write_config_execute(char *args, char **message) +{ + UNUSED(message); + info("write-config %s", args); + size_t n = strlen(args); + char *separator = strchr(args,'|'); + if (separator == NULL) + return CMD_STATUS_FAILURE; + char *separator2 = strchr(separator + 1,'|'); + if (separator2 == NULL) + return CMD_STATUS_FAILURE; + char *separator3 = strchr(separator2 + 1,'|'); + if (separator3 == NULL) + return CMD_STATUS_FAILURE; + char *temp = callocz(n + 1, 1); + strcpy(temp, args); + size_t offset = separator - args; + temp[offset] = 0; + size_t offset2 = separator2 - args; + temp[offset2] = 0; + size_t offset3 = separator3 - args; + temp[offset3] = 0; + + const char *conf_file = temp; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + + appconfig_set(tmp_config, temp + offset + 1, temp + offset2 + 1, temp + offset3 + 1); + info("write-config conf_file=%s section=%s key=%s value=%s",conf_file, temp + offset + 1, temp + offset2 + 1, + temp + offset3 + 1); + freez(temp); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_ping_execute(char *args, char **message) +{ + (void)args; + + *message = strdupz("pong"); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_aclk_state(char *args, char **message) +{ + info("COMMAND: Reopening aclk/cloud state."); + if (strstr(args, "json")) + *message = aclk_state_json(); + else + *message = aclk_state(); + + return CMD_STATUS_SUCCESS; +} + +static void cmd_lock_exclusive(unsigned index) +{ + (void)index; + + uv_rwlock_wrlock(&exclusive_rwlock); +} + +static void cmd_lock_orthogonal(unsigned index) +{ + uv_rwlock_rdlock(&exclusive_rwlock); + uv_mutex_lock(&command_lock_array[index]); +} + +static void cmd_lock_idempotent(unsigned index) +{ + (void)index; + + uv_rwlock_rdlock(&exclusive_rwlock); +} + +static void cmd_lock_high_priority(unsigned index) +{ + (void)index; +} + +static void cmd_unlock_exclusive(unsigned index) +{ + (void)index; + + uv_rwlock_wrunlock(&exclusive_rwlock); +} + +static void cmd_unlock_orthogonal(unsigned index) +{ + uv_rwlock_rdunlock(&exclusive_rwlock); + uv_mutex_unlock(&command_lock_array[index]); +} + +static void cmd_unlock_idempotent(unsigned index) +{ + (void)index; + + uv_rwlock_rdunlock(&exclusive_rwlock); +} + +static void cmd_unlock_high_priority(unsigned index) +{ + (void)index; +} + +static void pipe_close_cb(uv_handle_t* handle) +{ + /* Also frees command context */ + freez(handle); +} + +static void pipe_write_cb(uv_write_t* req, int status) +{ + (void)status; + uv_pipe_t *client = req->data; + + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + freez(client->data); + info("Command Clients = %u\n", clients); +} + +static inline void add_char_to_command_reply(char *reply_string, unsigned *reply_string_size, char character) +{ + reply_string[(*reply_string_size)++] = character; +} + +static inline void add_string_to_command_reply(char *reply_string, unsigned *reply_string_size, char *str) +{ + unsigned len; + + len = strlen(str); + + if (MAX_COMMAND_LENGTH - 1 < len + *reply_string_size) + len = MAX_COMMAND_LENGTH - *reply_string_size - 1; + + strncpyz(reply_string + *reply_string_size, str, len); + *reply_string_size += len; +} + +static void send_command_reply(struct command_context *cmd_ctx, cmd_status_t status, char *message) +{ + int ret; + char *reply_string = mallocz(MAX_COMMAND_LENGTH); + char exit_status_string[MAX_EXIT_STATUS_LENGTH + 1] = {'\0', }; + unsigned reply_string_size = 0; + uv_buf_t write_buf; + uv_stream_t *client = (uv_stream_t *)(uv_pipe_t *)cmd_ctx; + + snprintfz(exit_status_string, MAX_EXIT_STATUS_LENGTH, "%u", status); + add_char_to_command_reply(reply_string, &reply_string_size, CMD_PREFIX_EXIT_CODE); + add_string_to_command_reply(reply_string, &reply_string_size, exit_status_string); + add_char_to_command_reply(reply_string, &reply_string_size, '\0'); + + if (message) { + add_char_to_command_reply(reply_string, &reply_string_size, cmd_prefix_by_status[status]); + add_string_to_command_reply(reply_string, &reply_string_size, message); + } + + cmd_ctx->write_req.data = client; + client->data = reply_string; + write_buf.base = reply_string; + write_buf.len = reply_string_size; + ret = uv_write(&cmd_ctx->write_req, (uv_stream_t *)client, &write_buf, 1, pipe_write_cb); + if (ret) { + error("uv_write(): %s", uv_strerror(ret)); + } + info("COMMAND: Sending reply: \"%s\"", reply_string); +} + +cmd_status_t execute_command(cmd_t idx, char *args, char **message) +{ + cmd_status_t status; + cmd_type_t type = command_info_array[idx].type; + + cmd_lock_by_type[type](idx); + status = command_info_array[idx].func(args, message); + cmd_unlock_by_type[type](idx); + + return status; +} + +static void after_schedule_command(uv_work_t *req, int status) +{ + struct command_context *cmd_ctx = req->data; + + (void)status; + + send_command_reply(cmd_ctx, cmd_ctx->status, cmd_ctx->message); + if (cmd_ctx->message) + freez(cmd_ctx->message); +} + +static void schedule_command(uv_work_t *req) +{ + struct command_context *cmd_ctx = req->data; + + cmd_ctx->status = execute_command(cmd_ctx->idx, cmd_ctx->args, &cmd_ctx->message); +} + +/* This will alter the state of the command_info_array.cmd_str +*/ +static void parse_commands(struct command_context *cmd_ctx) +{ + char *message = NULL, *pos, *lstrip, *rstrip; + cmd_t i; + cmd_status_t status; + + status = CMD_STATUS_FAILURE; + + /* Skip white-space characters */ + for (pos = cmd_ctx->command_string ; isspace(*pos) && ('\0' != *pos) ; ++pos) {;} + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + if (!strncmp(pos, command_info_array[i].cmd_str, strlen(command_info_array[i].cmd_str))) { + if (CMD_EXIT == i) { + /* musl C does not like libuv workqueues calling exit() */ + execute_command(CMD_EXIT, NULL, NULL); + } + for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace(*lstrip) && ('\0' != *lstrip); ++lstrip) {;} + for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace(*rstrip); *(rstrip--) = 0 ); + + cmd_ctx->work.data = cmd_ctx; + cmd_ctx->idx = i; + cmd_ctx->args = lstrip; + cmd_ctx->message = NULL; + + fatal_assert(0 == uv_queue_work(loop, &cmd_ctx->work, schedule_command, after_schedule_command)); + break; + } + } + if (CMD_TOTAL_COMMANDS == i) { + /* no command found */ + message = strdupz("Illegal command. Please type \"help\" for instructions."); + send_command_reply(cmd_ctx, status, message); + freez(message); + } +} + +static void pipe_read_cb(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf) +{ + struct command_context *cmd_ctx = (struct command_context *)client; + + if (0 == nread) { + info("%s: Zero bytes read by command pipe.", __func__); + } else if (UV_EOF == nread) { + info("EOF found in command pipe."); + parse_commands(cmd_ctx); + } else if (nread < 0) { + error("%s: %s", __func__, uv_strerror(nread)); + } + + if (nread < 0) { /* stop stream due to EOF or error */ + (void)uv_read_stop((uv_stream_t *)client); + } else if (nread) { + size_t to_copy; + + to_copy = MIN((size_t) nread, MAX_COMMAND_LENGTH - 1 - cmd_ctx->command_string_size); + memcpy(cmd_ctx->command_string + cmd_ctx->command_string_size, buf->base, to_copy); + cmd_ctx->command_string_size += to_copy; + cmd_ctx->command_string[cmd_ctx->command_string_size] = '\0'; + } + if (buf && buf->len) { + freez(buf->base); + } + + if (nread < 0 && UV_EOF != nread) { + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + info("Command Clients = %u\n", clients); + } +} + +static void alloc_cb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf) +{ + (void)handle; + + buf->base = mallocz(suggested_size); + buf->len = suggested_size; +} + +static void connection_cb(uv_stream_t *server, int status) +{ + int ret; + uv_pipe_t *client; + struct command_context *cmd_ctx; + fatal_assert(status == 0); + + /* combined allocation of client pipe and command context */ + cmd_ctx = mallocz(sizeof(*cmd_ctx)); + client = (uv_pipe_t *)cmd_ctx; + ret = uv_pipe_init(server->loop, client, 1); + if (ret) { + error("uv_pipe_init(): %s", uv_strerror(ret)); + freez(cmd_ctx); + return; + } + ret = uv_accept(server, (uv_stream_t *)client); + if (ret) { + error("uv_accept(): %s", uv_strerror(ret)); + uv_close((uv_handle_t *)client, pipe_close_cb); + return; + } + + ++clients; + info("Command Clients = %u\n", clients); + /* Start parsing a new command */ + cmd_ctx->command_string_size = 0; + cmd_ctx->command_string[0] = '\0'; + + ret = uv_read_start((uv_stream_t*)client, alloc_cb, pipe_read_cb); + if (ret) { + error("uv_read_start(): %s", uv_strerror(ret)); + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + info("Command Clients = %u\n", clients); + return; + } +} + +static void async_cb(uv_async_t *handle) +{ + uv_stop(handle->loop); +} + +static void command_thread(void *arg) +{ + int ret; + uv_fs_t req; + + (void) arg; + loop = mallocz(sizeof(uv_loop_t)); + ret = uv_loop_init(loop); + if (ret) { + error("uv_loop_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_loop_init; + } + loop->data = NULL; + + ret = uv_async_init(loop, &async, async_cb); + if (ret) { + error("uv_async_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_async_init; + } + async.data = NULL; + + ret = uv_pipe_init(loop, &server_pipe, 0); + if (ret) { + error("uv_pipe_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_pipe_init; + } + (void)uv_fs_unlink(loop, &req, PIPENAME, NULL); + uv_fs_req_cleanup(&req); + ret = uv_pipe_bind(&server_pipe, PIPENAME); + if (ret) { + error("uv_pipe_bind(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_pipe_bind; + } + ret = uv_listen((uv_stream_t *)&server_pipe, SOMAXCONN, connection_cb); + if (ret) { + /* Fallback to backlog of 1 */ + info("uv_listen() failed with backlog = %d, falling back to backlog = 1.", SOMAXCONN); + ret = uv_listen((uv_stream_t *)&server_pipe, 1, connection_cb); + } + if (ret) { + error("uv_listen(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_uv_listen; + } + + command_thread_error = 0; + command_thread_shutdown = 0; + /* wake up initialization thread */ + completion_mark_complete(&completion); + + while (command_thread_shutdown == 0) { + uv_run(loop, UV_RUN_DEFAULT); + } + /* cleanup operations of the event loop */ + info("Shutting down command event loop."); + uv_close((uv_handle_t *)&async, NULL); + uv_close((uv_handle_t*)&server_pipe, NULL); + uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ + + info("Shutting down command loop complete."); + fatal_assert(0 == uv_loop_close(loop)); + freez(loop); + + return; + +error_after_uv_listen: +error_after_pipe_bind: + uv_close((uv_handle_t*)&server_pipe, NULL); +error_after_pipe_init: + uv_close((uv_handle_t *)&async, NULL); +error_after_async_init: + uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ + fatal_assert(0 == uv_loop_close(loop)); +error_after_loop_init: + freez(loop); + + /* wake up initialization thread */ + completion_mark_complete(&completion); +} + +static void sanity_check(void) +{ + /* The size of command_info_array must be CMD_TOTAL_COMMANDS elements */ + BUILD_BUG_ON(CMD_TOTAL_COMMANDS != sizeof(command_info_array) / sizeof(command_info_array[0])); +} + +void commands_init(void) +{ + cmd_t i; + int error; + + sanity_check(); + if (command_server_initialized) + return; + + info("Initializing command server."); + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + fatal_assert(0 == uv_mutex_init(&command_lock_array[i])); + } + fatal_assert(0 == uv_rwlock_init(&exclusive_rwlock)); + + completion_init(&completion); + error = uv_thread_create(&thread, command_thread, NULL); + if (error) { + error("uv_thread_create(): %s", uv_strerror(error)); + goto after_error; + } + /* wait for worker thread to initialize */ + completion_wait_for(&completion); + completion_destroy(&completion); + uv_thread_set_name_np(thread, "DAEMON_COMMAND"); + + if (command_thread_error) { + error = uv_thread_join(&thread); + if (error) { + error("uv_thread_create(): %s", uv_strerror(error)); + } + goto after_error; + } + + command_server_initialized = 1; + return; + +after_error: + error("Failed to initialize command server. The netdata cli tool will be unable to send commands."); +} + +void commands_exit(void) +{ + cmd_t i; + + if (!command_server_initialized) + return; + + command_thread_shutdown = 1; + info("Shutting down command server."); + /* wake up event loop */ + fatal_assert(0 == uv_async_send(&async)); + fatal_assert(0 == uv_thread_join(&thread)); + + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + uv_mutex_destroy(&command_lock_array[i]); + } + uv_rwlock_destroy(&exclusive_rwlock); + info("Command server has stopped."); + command_server_initialized = 0; +} diff --git a/daemon/commands.h b/daemon/commands.h new file mode 100644 index 0000000..f0e38ce --- /dev/null +++ b/daemon/commands.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMANDS_H +#define NETDATA_COMMANDS_H 1 + +#ifdef _WIN32 +# define PIPENAME "\\\\?\\pipe\\netdata-cli" +#else +# define PIPENAME "/tmp/netdata-ipc" +#endif + +#define MAX_COMMAND_LENGTH 4096 +#define MAX_EXIT_STATUS_LENGTH 23 /* Can't ever be bigger than "X-18446744073709551616" */ + +typedef enum cmd { + CMD_HELP = 0, + CMD_RELOAD_HEALTH, + CMD_SAVE_DATABASE, + CMD_REOPEN_LOGS, + CMD_EXIT, + CMD_FATAL, + CMD_RELOAD_CLAIMING_STATE, + CMD_RELOAD_LABELS, + CMD_READ_CONFIG, + CMD_WRITE_CONFIG, + CMD_PING, + CMD_ACLK_STATE, + CMD_TOTAL_COMMANDS +} cmd_t; + +typedef enum cmd_status { + CMD_STATUS_SUCCESS = 0, + CMD_STATUS_FAILURE, + CMD_STATUS_BUSY +} cmd_status_t; + +#define CMD_PREFIX_INFO 'O' /* Following string should go to cli stdout */ +#define CMD_PREFIX_ERROR 'E' /* Following string should go to cli stderr */ +#define CMD_PREFIX_EXIT_CODE 'X' /* Following string is cli integer exit code */ + +typedef enum cmd_type { + /* + * No other command is allowed to run at the same time (except for CMD_TYPE_HIGH_PRIORITY). + */ + CMD_TYPE_EXCLUSIVE = 0, + /* + * Other commands are allowed to run concurrently (except for CMD_TYPE_EXCLUSIVE) but calls to this command are + * serialized. + */ + CMD_TYPE_ORTHOGONAL, + /* + * Other commands are allowed to run concurrently (except for CMD_TYPE_EXCLUSIVE) as are calls to this command. + */ + CMD_TYPE_CONCURRENT, + /* + * Those commands are always allowed to run. + */ + CMD_TYPE_HIGH_PRIORITY +} cmd_type_t; + +/** + * Executes a command and returns the status. + * + * @param args a string that may contain additional parameters to be parsed + * @param message allocate and return a message if need be (up to MAX_COMMAND_LENGTH bytes) + * @return CMD_FAILURE or CMD_SUCCESS + */ +typedef cmd_status_t (command_action_t) (char *args, char **message); + +typedef struct command_info { + char *cmd_str; // the command string + command_action_t *func; // the function that executes the command + cmd_type_t type; // Concurrency control information for the command +} command_info_t; + +typedef void (command_lock_t) (unsigned index); + +cmd_status_t execute_command(cmd_t idx, char *args, char **message); +void commands_init(void); +void commands_exit(void); + +#endif //NETDATA_COMMANDS_H diff --git a/daemon/common.c b/daemon/common.c new file mode 100644 index 0000000..85d6386 --- /dev/null +++ b/daemon/common.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +char *netdata_configured_hostname = NULL; +char *netdata_configured_user_config_dir = CONFIG_DIR; +char *netdata_configured_stock_config_dir = LIBCONFIG_DIR; +char *netdata_configured_log_dir = LOG_DIR; +char *netdata_configured_primary_plugins_dir = NULL; +char *netdata_configured_web_dir = WEB_DIR; +char *netdata_configured_cache_dir = CACHE_DIR; +char *netdata_configured_varlib_dir = VARLIB_DIR; +char *netdata_configured_lock_dir = NULL; +char *netdata_configured_home_dir = VARLIB_DIR; +char *netdata_configured_host_prefix = NULL; +char *netdata_configured_timezone = NULL; +char *netdata_configured_abbrev_timezone = NULL; +int32_t netdata_configured_utc_offset = 0; +int netdata_ready; +int netdata_cloud_setting; + diff --git a/daemon/common.h b/daemon/common.h new file mode 100644 index 0000000..f3d8686 --- /dev/null +++ b/daemon/common.h @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMON_H +#define NETDATA_COMMON_H 1 + +#include "libnetdata/libnetdata.h" + +// ---------------------------------------------------------------------------- +// shortcuts for the default netdata configuration + +#define config_load(filename, overwrite_used, section) appconfig_load(&netdata_config, filename, overwrite_used, section) +#define config_get(section, name, default_value) appconfig_get(&netdata_config, section, name, default_value) +#define config_get_number(section, name, value) appconfig_get_number(&netdata_config, section, name, value) +#define config_get_float(section, name, value) appconfig_get_float(&netdata_config, section, name, value) +#define config_get_boolean(section, name, value) appconfig_get_boolean(&netdata_config, section, name, value) +#define config_get_boolean_ondemand(section, name, value) appconfig_get_boolean_ondemand(&netdata_config, section, name, value) +#define config_get_duration(section, name, value) appconfig_get_duration(&netdata_config, section, name, value) + +#define config_set(section, name, default_value) appconfig_set(&netdata_config, section, name, default_value) +#define config_set_default(section, name, value) appconfig_set_default(&netdata_config, section, name, value) +#define config_set_number(section, name, value) appconfig_set_number(&netdata_config, section, name, value) +#define config_set_float(section, name, value) appconfig_set_float(&netdata_config, section, name, value) +#define config_set_boolean(section, name, value) appconfig_set_boolean(&netdata_config, section, name, value) + +#define config_exists(section, name) appconfig_exists(&netdata_config, section, name) +#define config_move(section_old, name_old, section_new, name_new) appconfig_move(&netdata_config, section_old, name_old, section_new, name_new) + +#define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed) + +#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name) + +// ---------------------------------------------------------------------------- +// netdata include files + +#include "global_statistics.h" + +// the netdata database +#include "database/rrd.h" + +// the netdata webserver(s) +#include "web/server/web_server.h" + +// streaming metrics between netdata servers +#include "streaming/rrdpush.h" + +// health monitoring and alarm notifications +#include "health/health.h" + +// anomaly detection +#include "ml/ml.h" + +// the netdata registry +// the registry is actually an API feature +#include "registry/registry.h" + +// exporting engine for archiving the metrics +#include "exporting/exporting_engine.h" + +// the netdata API +#include "web/api/web_api_v1.h" + +// all data collection plugins +#include "collectors/all.h" + +// netdata unit tests +#include "unit_test.h" + +// netdata agent claiming +#include "claim/claim.h" + +// netdata agent cloud link +#include "aclk/aclk.h" + +// global GUID map functions + +// netdata agent spawn server +#include "spawn/spawn.h" + +// the netdata daemon +#include "daemon.h" +#include "main.h" +#include "static_threads.h" +#include "signals.h" +#include "commands.h" +#include "analytics.h" + +// global netdata daemon variables +extern char *netdata_configured_hostname; +extern char *netdata_configured_user_config_dir; +extern char *netdata_configured_stock_config_dir; +extern char *netdata_configured_log_dir; +extern char *netdata_configured_primary_plugins_dir; +extern char *netdata_configured_web_dir; +extern char *netdata_configured_cache_dir; +extern char *netdata_configured_varlib_dir; +extern char *netdata_configured_lock_dir; +extern char *netdata_configured_home_dir; +extern char *netdata_configured_host_prefix; +extern char *netdata_configured_timezone; +extern char *netdata_configured_abbrev_timezone; +extern int32_t netdata_configured_utc_offset; +extern int netdata_zero_metrics_enabled; +extern int netdata_anonymous_statistics_enabled; + +extern int netdata_ready; +extern int netdata_cloud_setting; + +#endif /* NETDATA_COMMON_H */ diff --git a/daemon/config/README.md b/daemon/config/README.md new file mode 100644 index 0000000..7b4d27e --- /dev/null +++ b/daemon/config/README.md @@ -0,0 +1,230 @@ + + +# Daemon configuration + +
+The daemon configuration file is read from /etc/netdata/netdata.conf. + +Depending on your installation method, Netdata will have been installed either directly under `/`, or +under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is +under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will +be `/opt/netdata/etc/netdata`). + +
+ +This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to +adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by +accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file +of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to +[private IPs](https://en.wikipedia.org/wiki/Private_network), via +the [web server access lists](/web/server/README.md#access-lists). + +`netdata.conf` has sections stated with `[section]`. You will see the following sections: + +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/daemon/README.md). +2. `[db]` to [configure](#db-section-options) the database of Netdata. +3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. +4. `[logs]` to [configure](#logs-section-options) the Netdata logging. +5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used + Netdata. +6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/daemon/README.md) SQLite settings. +7. `[ml]` to configure settings for [machine learning](/ml/README.md). +8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md). +9. `[web]` to [configure the web server](/web/server/README.md). +10. `[registry]` for the [Netdata registry](/registry/README.md). +11. `[global statistics]` for the [Netdata registry](/registry/README.md). +12. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). +13. `[plugins]` to [configure](#plugins-section-options) which [collectors](/collectors/README.md) to use and PATH + settings. +14. `[plugin:NAME]` sections for each collector plugin, under the + comment [Per plugin configuration](#per-plugin-configuration). + +The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When +you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a +comment on settings it does not currently use. + +## Applying changes + +After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/configure/start-stop-restart.md) for +changes to apply: + +```bash +sudo systemctl restart netdata +``` + +If the above does not work, try the following: + +```bash +sudo killall netdata; sleep 10; sudo netdata +``` + +Please note that your data history will be lost if you have modified `history` parameter in section `[global]`. + +## Sections + +### [global] section options + +| setting | default | info | +|:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | + +### [db] section options + +| setting | default | info | +|:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`.
`save`: Netdata will save its round robin database on exit and load it on startup.
`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`).
`ram`: The round-robin database will be temporary and it will be lost when Netdata exits.
`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. | +| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/database/README.md) for more information. | +| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | +| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier.
`N belongs to [1..4]` || + | dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | +| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | +| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well.
`N belongs to [1..4]` | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). | +| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`.
`N belongs to [1..4]` | +| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier.
`New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window).
`none`: No back filling is applied.
`N belongs to [1..4]` | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | +| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | + +:::info + +The multiplication of all the **enabled** tiers `dbengine tier N update every iterations` values must be less than `65535`. + +::: + + +### [directories] section options + +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | + +### [logs] section options + +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging). | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | + +### [environment variables] section options + +| setting | default | info | +|:----------:|:-----------------:|:-----------------------------------------------------------| +| TZ | `:/etc/localtime` | Where to find the timezone | +| PATH | `auto-detected` | Specifies the directories to be searched to find a command | +| PYTHONPATH | | Used to set a custom python path | + +### [sqlite] section options + +| setting | default | info | +|:------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| auto vacuum | `INCREMENTAL` | The [auto-vacuum status](https://www.sqlite.org/pragma.html#pragma_auto_vacuum) in the database | +| synchronous | `NORMAL` | The setting of the ["synchronous"](https://www.sqlite.org/pragma.html#pragma_synchronous) flag | +| journal mode | `WAL` | The [journal mode](https://www.sqlite.org/pragma.html#pragma_journal_mode) for databases | +| temp store | `MEMORY` | Used to determine where [temporary tables and indices are stored](https://www.sqlite.org/pragma.html#pragma_temp_store) | +| journal size limit | `16777216` | Used to set a new [limit in bytes for the database](https://www.sqlite.org/pragma.html#pragma_journal_size_limit) | +| cache size | `-2000` | Used to [suggest the maximum number of database disk pages](https://www.sqlite.org/pragma.html#pragma_cache_size) that SQLite will hold in memory at once per open database file | + +### [health] section options + +This section controls the general behavior of the health monitoring capabilities of Netdata. + +Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health +monitoring](/health/README.md). + +[Alarm notifications](/health/notifications/README.md) are configured in `health_alarm_notify.conf`. + +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alarms and notifications | +| in memory max health log entries | 1000 | Size of the alarm history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. | +| rotate log every lines | 2000 | Controls the number of alarm log entries stored in `/health-log.db`, where `` is the one configured in the [\[global\] section](#global-section-options) | + +### [web] section options + +Refer to the [web server documentation](/web/server/README.md) + +### [plugins] section options + +In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note +that the configuration options in this section for the orchestrator plugins `python.d` and `charts.d` control **all the +modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules +under `collectors/python.d.plugin` will be disabled. + +Additionally, there will be the following options: + +| setting | default | info | +|:-------------------------------:|:---------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enable running new plugins | `yes` | When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes` | +| check for new plugins every | 60 | The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata. | +| checks | `no` | This is a debugging plugin for the internal latency | + +### [registry] section options + +To understand what this section is and how it should be configured, please refer to +the [registry documentation](/registry/README.md). + +## Per-plugin configuration + +The configuration options for plugins appear in sections following the pattern `[plugin:NAME]`. + +### Internal plugins + +Most internal plugins will provide additional options. Check [Internal Plugins](/collectors/README.md) for more +information. + +Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are +not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, +will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them +to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You +can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics +for all internal Netdata plugins. + +### External plugins + +External plugins will have only 2 options at `netdata.conf`: + +| setting | default | info | +|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------| +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/guides/configure/performance.md). | +| command options | - | Additional command line options to pass to the plugin. | | + +External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their +documentation. + diff --git a/daemon/daemon.c b/daemon/daemon.c new file mode 100644 index 0000000..2b8a655 --- /dev/null +++ b/daemon/daemon.c @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include + +char pidfile[FILENAME_MAX + 1] = ""; +char claimingdirectory[FILENAME_MAX + 1]; +char exepath[FILENAME_MAX + 1]; + +void get_netdata_execution_path(void) +{ + int ret; + size_t exepath_size = 0; + struct passwd *passwd = NULL; + char *user = NULL; + + passwd = getpwuid(getuid()); + user = (passwd && passwd->pw_name) ? passwd->pw_name : ""; + + exepath_size = sizeof(exepath) - 1; + ret = uv_exepath(exepath, &exepath_size); + if (0 != ret) { + error("uv_exepath(\"%s\", %u) (user: %s) failed (%s).", exepath, (unsigned)exepath_size, user, + uv_strerror(ret)); + fatal("Cannot start netdata without getting execution path."); + } + exepath[exepath_size] = '\0'; +} + +static void chown_open_file(int fd, uid_t uid, gid_t gid) { + if(fd == -1) return; + + struct stat buf; + + if(fstat(fd, &buf) == -1) { + error("Cannot fstat() fd %d", fd); + return; + } + + if((buf.st_uid != uid || buf.st_gid != gid) && S_ISREG(buf.st_mode)) { + if(fchown(fd, uid, gid) == -1) + error("Cannot fchown() fd %d.", fd); + } +} + +void create_needed_dir(const char *dir, uid_t uid, gid_t gid) +{ + // attempt to create the directory + if(mkdir(dir, 0755) == 0) { + // we created it + + // chown it to match the required user + if(chown(dir, uid, gid) == -1) + error("Cannot chown directory '%s' to %u:%u", dir, (unsigned int)uid, (unsigned int)gid); + } + else if(errno != EEXIST) + // log an error only if the directory does not exist + error("Cannot create directory '%s'", dir); +} + +void clean_directory(char *dirname) +{ + DIR *dir = opendir(dirname); + if(!dir) return; + + int dir_fd = dirfd(dir); + struct dirent *de = NULL; + + while((de = readdir(dir))) + if(de->d_type == DT_REG) + if (unlinkat(dir_fd, de->d_name, 0)) + error("Cannot delete %s/%s", dirname, de->d_name); + + closedir(dir); +} + +int become_user(const char *username, int pid_fd) { + int am_i_root = (getuid() == 0)?1:0; + + struct passwd *pw = getpwnam(username); + if(!pw) { + error("User %s is not present.", username); + return -1; + } + + uid_t uid = pw->pw_uid; + gid_t gid = pw->pw_gid; + + create_needed_dir(netdata_configured_cache_dir, uid, gid); + create_needed_dir(netdata_configured_varlib_dir, uid, gid); + create_needed_dir(netdata_configured_lock_dir, uid, gid); + create_needed_dir(claimingdirectory, uid, gid); + + clean_directory(netdata_configured_lock_dir); + + if(pidfile[0]) { + if(chown(pidfile, uid, gid) == -1) + error("Cannot chown '%s' to %u:%u", pidfile, (unsigned int)uid, (unsigned int)gid); + } + + int ngroups = (int)sysconf(_SC_NGROUPS_MAX); + gid_t *supplementary_groups = NULL; + if(ngroups > 0) { + supplementary_groups = mallocz(sizeof(gid_t) * ngroups); +#ifdef __APPLE__ + if(getgrouplist(username, gid, (int *)supplementary_groups, &ngroups) == -1) { +#else + if(getgrouplist(username, gid, supplementary_groups, &ngroups) == -1) { +#endif /* __APPLE__ */ + if(am_i_root) + error("Cannot get supplementary groups of user '%s'.", username); + + ngroups = 0; + } + } + + chown_open_file(STDOUT_FILENO, uid, gid); + chown_open_file(STDERR_FILENO, uid, gid); + chown_open_file(stdaccess_fd, uid, gid); + chown_open_file(pid_fd, uid, gid); + + if(supplementary_groups && ngroups > 0) { + if(setgroups((size_t)ngroups, supplementary_groups) == -1) { + if(am_i_root) + error("Cannot set supplementary groups for user '%s'", username); + } + ngroups = 0; + } + + if(supplementary_groups) + freez(supplementary_groups); + +#ifdef __APPLE__ + if(setregid(gid, gid) != 0) { +#else + if(setresgid(gid, gid, gid) != 0) { +#endif /* __APPLE__ */ + error("Cannot switch to user's %s group (gid: %u).", username, gid); + return -1; + } + +#ifdef __APPLE__ + if(setreuid(uid, uid) != 0) { +#else + if(setresuid(uid, uid, uid) != 0) { +#endif /* __APPLE__ */ + error("Cannot switch to user %s (uid: %u).", username, uid); + return -1; + } + + if(setgid(gid) != 0) { + error("Cannot switch to user's %s group (gid: %u).", username, gid); + return -1; + } + if(setegid(gid) != 0) { + error("Cannot effectively switch to user's %s group (gid: %u).", username, gid); + return -1; + } + if(setuid(uid) != 0) { + error("Cannot switch to user %s (uid: %u).", username, uid); + return -1; + } + if(seteuid(uid) != 0) { + error("Cannot effectively switch to user %s (uid: %u).", username, uid); + return -1; + } + + return(0); +} + +#ifndef OOM_SCORE_ADJ_MAX +#define OOM_SCORE_ADJ_MAX (1000) +#endif +#ifndef OOM_SCORE_ADJ_MIN +#define OOM_SCORE_ADJ_MIN (-1000) +#endif + +static void oom_score_adj(void) { + char buf[30 + 1]; + long long int old_score, wanted_score = 0, final_score = 0; + + // read the existing score + if(read_single_signed_number_file("/proc/self/oom_score_adj", &old_score)) { + error("Out-Of-Memory (OOM) score setting is not supported on this system."); + return; + } + + if (old_score != 0) { + wanted_score = old_score; + analytics_report_oom_score(old_score); + } + + // check the environment + char *s = getenv("OOMScoreAdjust"); + if(!s || !*s) { + snprintfz(buf, 30, "%d", (int)wanted_score); + s = buf; + } + + // check netdata.conf configuration + s = config_get(CONFIG_SECTION_GLOBAL, "OOM score", s); + if(s && *s && (isdigit(*s) || *s == '-' || *s == '+')) + wanted_score = atoll(s); + else if(s && !strcmp(s, "keep")) { + info("Out-Of-Memory (OOM) kept as-is (running with %d)", (int) old_score); + return; + } + else { + info("Out-Of-Memory (OOM) score not changed due to non-numeric setting: '%s' (running with %d)", s, (int)old_score); + return; + } + + if(wanted_score < OOM_SCORE_ADJ_MIN) { + error("Wanted Out-Of-Memory (OOM) score %d is too small. Using %d", (int)wanted_score, (int)OOM_SCORE_ADJ_MIN); + wanted_score = OOM_SCORE_ADJ_MIN; + } + + if(wanted_score > OOM_SCORE_ADJ_MAX) { + error("Wanted Out-Of-Memory (OOM) score %d is too big. Using %d", (int)wanted_score, (int)OOM_SCORE_ADJ_MAX); + wanted_score = OOM_SCORE_ADJ_MAX; + } + + if(old_score == wanted_score) { + info("Out-Of-Memory (OOM) score is already set to the wanted value %d", (int)old_score); + return; + } + + int written = 0; + int fd = open("/proc/self/oom_score_adj", O_WRONLY); + if(fd != -1) { + snprintfz(buf, 30, "%d", (int)wanted_score); + ssize_t len = strlen(buf); + if(len > 0 && write(fd, buf, (size_t)len) == len) written = 1; + close(fd); + + if(written) { + if(read_single_signed_number_file("/proc/self/oom_score_adj", &final_score)) + error("Adjusted my Out-Of-Memory (OOM) score to %d, but cannot verify it.", (int)wanted_score); + else if(final_score == wanted_score) + info("Adjusted my Out-Of-Memory (OOM) score from %d to %d.", (int)old_score, (int)final_score); + else + error("Adjusted my Out-Of-Memory (OOM) score from %d to %d, but it has been set to %d.", (int)old_score, (int)wanted_score, (int)final_score); + analytics_report_oom_score(final_score); + } + else + error("Failed to adjust my Out-Of-Memory (OOM) score to %d. Running with %d. (systemd systems may change it via netdata.service)", (int)wanted_score, (int)old_score); + } + else + error("Failed to adjust my Out-Of-Memory (OOM) score. Cannot open /proc/self/oom_score_adj for writing."); +} + +static void process_nice_level(void) { +#ifdef HAVE_NICE + int nice_level = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process nice level", 19); + if(nice(nice_level) == -1) error("Cannot set netdata CPU nice level to %d.", nice_level); + else debug(D_SYSTEM, "Set netdata nice level to %d.", nice_level); +#endif // HAVE_NICE +}; + +#define SCHED_FLAG_NONE 0x00 +#define SCHED_FLAG_PRIORITY_CONFIGURABLE 0x01 // the priority is user configurable +#define SCHED_FLAG_KEEP_AS_IS 0x04 // do not attempt to set policy, priority or nice() +#define SCHED_FLAG_USE_NICE 0x08 // use nice() after setting this policy + +struct sched_def { + char *name; + int policy; + int priority; + uint8_t flags; +} scheduler_defaults[] = { + + // the order of array members is important! + // the first defined is the default used by netdata + + // the available members are important too! + // these are all the possible scheduling policies supported by netdata + +#ifdef SCHED_BATCH + { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, +#endif + +#ifdef SCHED_OTHER + { "other", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, + { "nice", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, +#endif + +#ifdef SCHED_IDLE + { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#endif + +#ifdef SCHED_RR + { "rr", SCHED_RR, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, +#endif + +#ifdef SCHED_FIFO + { "fifo", SCHED_FIFO, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, +#endif + + // do not change the scheduling priority + { "keep", 0, 0, SCHED_FLAG_KEEP_AS_IS }, + { "none", 0, 0, SCHED_FLAG_KEEP_AS_IS }, + + // array termination + { NULL, 0, 0, 0 } +}; + + +#ifdef HAVE_SCHED_GETSCHEDULER +static void sched_getscheduler_report(void) { + int sched = sched_getscheduler(0); + if(sched == -1) { + error("Cannot get my current process scheduling policy."); + return; + } + else { + int i; + for(i = 0 ; scheduler_defaults[i].name ; i++) { + if(scheduler_defaults[i].policy == sched) { + if(scheduler_defaults[i].flags & SCHED_FLAG_PRIORITY_CONFIGURABLE) { + struct sched_param param; + if(sched_getparam(0, ¶m) == -1) { + error("Cannot get the process scheduling priority for my policy '%s'", scheduler_defaults[i].name); + return; + } + else { + info("Running with process scheduling policy '%s', priority %d", scheduler_defaults[i].name, param.sched_priority); + } + } + else if(scheduler_defaults[i].flags & SCHED_FLAG_USE_NICE) { + #ifdef HAVE_GETPRIORITY + int n = getpriority(PRIO_PROCESS, 0); + info("Running with process scheduling policy '%s', nice level %d", scheduler_defaults[i].name, n); + #else // !HAVE_GETPRIORITY + info("Running with process scheduling policy '%s'", scheduler_defaults[i].name); + #endif // !HAVE_GETPRIORITY + } + else { + info("Running with process scheduling policy '%s'", scheduler_defaults[i].name); + } + + return; + } + } + } +} +#endif /* HAVE_SCHED_GETSCHEDULER */ + +#ifdef HAVE_SCHED_SETSCHEDULER + +static void sched_setscheduler_set(void) { + + if(scheduler_defaults[0].name) { + const char *name = scheduler_defaults[0].name; + int policy = scheduler_defaults[0].policy, priority = scheduler_defaults[0].priority; + uint8_t flags = scheduler_defaults[0].flags; + int found = 0; + + // read the configuration + name = config_get(CONFIG_SECTION_GLOBAL, "process scheduling policy", name); + int i; + for(i = 0 ; scheduler_defaults[i].name ; i++) { + if(!strcmp(name, scheduler_defaults[i].name)) { + found = 1; + policy = scheduler_defaults[i].policy; + priority = scheduler_defaults[i].priority; + flags = scheduler_defaults[i].flags; + + if(flags & SCHED_FLAG_KEEP_AS_IS) + goto report; + + if(flags & SCHED_FLAG_PRIORITY_CONFIGURABLE) + priority = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process scheduling priority", priority); + +#ifdef HAVE_SCHED_GET_PRIORITY_MIN + errno = 0; + if(priority < sched_get_priority_min(policy)) { + error("scheduler %s (%d) priority %d is below the minimum %d. Using the minimum.", name, policy, priority, sched_get_priority_min(policy)); + priority = sched_get_priority_min(policy); + } +#endif +#ifdef HAVE_SCHED_GET_PRIORITY_MAX + errno = 0; + if(priority > sched_get_priority_max(policy)) { + error("scheduler %s (%d) priority %d is above the maximum %d. Using the maximum.", name, policy, priority, sched_get_priority_max(policy)); + priority = sched_get_priority_max(policy); + } +#endif + break; + } + } + + if(!found) { + error("Unknown scheduling policy '%s' - falling back to nice", name); + goto fallback; + } + + const struct sched_param param = { + .sched_priority = priority + }; + + errno = 0; + i = sched_setscheduler(0, policy, ¶m); + if(i != 0) { + error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice.", name, policy, priority); + } + else { + info("Adjusted netdata scheduling policy to %s (%d), with priority %d.", name, policy, priority); + if(!(flags & SCHED_FLAG_USE_NICE)) + goto report; + } + } + +fallback: + process_nice_level(); + +report: + sched_getscheduler_report(); +} +#else /* HAVE_SCHED_SETSCHEDULER */ +static void sched_setscheduler_set(void) { + process_nice_level(); +} +#endif /* HAVE_SCHED_SETSCHEDULER */ + +int become_daemon(int dont_fork, const char *user) +{ + if(!dont_fork) { + int i = fork(); + if(i == -1) { + perror("cannot fork"); + exit(1); + } + if(i != 0) { + exit(0); // the parent + } + + // become session leader + if (setsid() < 0) { + perror("Cannot become session leader."); + exit(2); + } + + // fork() again + i = fork(); + if(i == -1) { + perror("cannot fork"); + exit(1); + } + if(i != 0) { + exit(0); // the parent + } + } + + // generate our pid file + int pidfd = -1; + if(pidfile[0]) { + pidfd = open(pidfile, O_WRONLY | O_CREAT, 0644); + if(pidfd >= 0) { + if(ftruncate(pidfd, 0) != 0) + error("Cannot truncate pidfile '%s'.", pidfile); + + char b[100]; + sprintf(b, "%d\n", getpid()); + ssize_t i = write(pidfd, b, strlen(b)); + if(i <= 0) + error("Cannot write pidfile '%s'.", pidfile); + } + else error("Failed to open pidfile '%s'.", pidfile); + } + + // Set new file permissions + umask(0007); + + // adjust my Out-Of-Memory score + oom_score_adj(); + + // never become a problem + sched_setscheduler_set(); + + // Set claiming directory based on user config directory with correct ownership + snprintfz(claimingdirectory, FILENAME_MAX, "%s/cloud.d", netdata_configured_varlib_dir); + + if(user && *user) { + if(become_user(user, pidfd) != 0) { + error("Cannot become user '%s'. Continuing as we are.", user); + } + else debug(D_SYSTEM, "Successfully became user '%s'.", user); + } + else { + create_needed_dir(netdata_configured_cache_dir, getuid(), getgid()); + create_needed_dir(netdata_configured_varlib_dir, getuid(), getgid()); + create_needed_dir(netdata_configured_lock_dir, getuid(), getgid()); + create_needed_dir(claimingdirectory, getuid(), getgid()); + + clean_directory(netdata_configured_lock_dir); + } + + if(pidfd != -1) + close(pidfd); + + return(0); +} diff --git a/daemon/daemon.h b/daemon/daemon.h new file mode 100644 index 0000000..2a8a58e --- /dev/null +++ b/daemon/daemon.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_H +#define NETDATA_DAEMON_H 1 + +int become_user(const char *username, int pid_fd); + +int become_daemon(int dont_fork, const char *user); + +void netdata_cleanup_and_exit(int i); +void send_statistics(const char *action, const char *action_result, const char *action_data); + +void get_netdata_execution_path(void); + +extern char pidfile[]; +extern char exepath[]; + +#endif /* NETDATA_DAEMON_H */ diff --git a/daemon/get-kubernetes-labels.sh.in b/daemon/get-kubernetes-labels.sh.in new file mode 100644 index 0000000..bc82c2a --- /dev/null +++ b/daemon/get-kubernetes-labels.sh.in @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +me="$(basename "${0}")" + +# Checks if netdata is running in a kubernetes pod and fetches: +# - pod's labels +# - kubernetes cluster name (GKE only) + +if [ -z "${KUBERNETES_SERVICE_HOST}" ] || [ -z "${KUBERNETES_PORT_443_TCP_PORT}" ] || [ -z "${MY_POD_NAMESPACE}" ] || [ -z "${MY_POD_NAME}" ]; then + exit 0 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo >&2 "${me}: jq command not available. Please install jq to get host labels for kubernetes pods." + exit 1 +fi + +TOKEN="$(< /var/run/secrets/kubernetes.io/serviceaccount/token)" +HEADER="Authorization: Bearer $TOKEN" +HOST="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" + +URL="https://$HOST/api/v1/namespaces/$MY_POD_NAMESPACE/pods/$MY_POD_NAME" +if ! POD_DATA=$(curl --fail -sSk -H "$HEADER" "$URL" 2>&1); then + echo >&2 "${me}: error on curl '${URL}': ${POD_DATA}." + exit 1 +fi + +URL="https://$HOST/api/v1/namespaces/kube-system" +if ! KUBE_SYSTEM_NS_DATA=$(curl --fail -sSk -H "$HEADER" "$URL" 2>&1); then + echo >&2 "${me}: error on curl '${URL}': ${KUBE_SYSTEM_NS_DATA}." + exit 1 +fi + +if ! POD_LABELS=$(jq -r '.metadata.labels' <<< "$POD_DATA" | grep ':' | tr -d '," ' 2>&1); then + echo >&2 "${me}: error on 'jq' parse pod data: ${POD_LABELS}." + exit 1 +fi + +if ! KUBE_SYSTEM_NS_UID=$(jq -r '.metadata.uid' <<< "$KUBE_SYSTEM_NS_DATA" 2>&1); then + echo >&2 "${me}: error on 'jq' parse kube_system_ns: ${KUBE_SYSTEM_NS_UID}." + exit 1 +fi + +LABELS="$POD_LABELS\nk8s_cluster_id:$KUBE_SYSTEM_NS_UID" + +GCP_META_HEADER="Metadata-Flavor: Google" +GCP_META_URL="http://metadata/computeMetadata/v1" +GKE_CLUSTER_NAME="" + +if id=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/project/project-id"); then + loc=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-location") + name=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-name") + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ] && GKE_CLUSTER_NAME="gke_${id}_${loc}_${name}" +fi + +[ -n "$GKE_CLUSTER_NAME" ] && LABELS+="\nk8s_cluster_name:$GKE_CLUSTER_NAME" + +echo -e "$LABELS" + +exit 0 diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c new file mode 100644 index 0000000..a4e9d32 --- /dev/null +++ b/daemon/global_statistics.c @@ -0,0 +1,2894 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +#define GLOBAL_STATS_RESET_WEB_USEC_MAX 0x01 + +#define WORKER_JOB_GLOBAL 0 +#define WORKER_JOB_REGISTRY 1 +#define WORKER_JOB_WORKERS 2 +#define WORKER_JOB_DBENGINE 3 +#define WORKER_JOB_HEARTBEAT 4 +#define WORKER_JOB_STRINGS 5 +#define WORKER_JOB_DICTIONARIES 6 +#define WORKER_JOB_MALLOC_TRACE 7 +#define WORKER_JOB_SQLITE3 8 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 9 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 9 +#endif + +bool global_statistics_enabled = true; + +static struct global_statistics { + uint16_t connected_clients; + + uint64_t web_requests; + uint64_t web_usec; + uint64_t web_usec_max; + uint64_t bytes_received; + uint64_t bytes_sent; + uint64_t content_size; + uint64_t compressed_content_size; + + uint64_t web_client_count; + + uint64_t api_data_queries_made; + uint64_t api_data_db_points_read; + uint64_t api_data_result_points_generated; + + uint64_t api_weights_queries_made; + uint64_t api_weights_db_points_read; + uint64_t api_weights_result_points_generated; + + uint64_t api_badges_queries_made; + uint64_t api_badges_db_points_read; + uint64_t api_badges_result_points_generated; + + uint64_t health_queries_made; + uint64_t health_db_points_read; + uint64_t health_result_points_generated; + + uint64_t ml_queries_made; + uint64_t ml_db_points_read; + uint64_t ml_result_points_generated; + + uint64_t exporters_queries_made; + uint64_t exporters_db_points_read; + + uint64_t backfill_queries_made; + uint64_t backfill_db_points_read; + + uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS]; + +} global_statistics = { + .connected_clients = 0, + .web_requests = 0, + .web_usec = 0, + .bytes_received = 0, + .bytes_sent = 0, + .content_size = 0, + .compressed_content_size = 0, + .web_client_count = 1, + + .api_data_queries_made = 0, + .api_data_db_points_read = 0, + .api_data_result_points_generated = 0, +}; + +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { + __atomic_fetch_add(&global_statistics.db_points_stored_per_tier[tier], points_read_per_tier_array[tier], __ATOMIC_RELAXED); + points_read_per_tier_array[tier] = 0; + } +} + +void global_statistics_ml_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.ml_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_exporters_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.exporters_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.exporters_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_backfill_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.backfill_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) { + switch(query_source) { + case QUERY_SOURCE_API_DATA: + __atomic_fetch_add(&global_statistics.api_data_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_ML: + __atomic_fetch_add(&global_statistics.ml_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_WEIGHTS: + __atomic_fetch_add(&global_statistics.api_weights_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_BADGE: + __atomic_fetch_add(&global_statistics.api_badges_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_HEALTH: + __atomic_fetch_add(&global_statistics.health_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + default: + case QUERY_SOURCE_UNITTEST: + case QUERY_SOURCE_UNKNOWN: + break; + } +} + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size) { + uint64_t old_web_usec_max = global_statistics.web_usec_max; + while(dt > old_web_usec_max) + __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + __atomic_fetch_add(&global_statistics.web_requests, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.web_usec, dt, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.bytes_received, bytes_received, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.bytes_sent, bytes_sent, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.content_size, content_size, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.compressed_content_size, compressed_content_size, __ATOMIC_RELAXED); +} + +uint64_t global_statistics_web_client_connected(void) { + __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); + return __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_RELAXED); +} + +void global_statistics_web_client_disconnected(void) { + __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); +} + +static inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { + gs->connected_clients = __atomic_load_n(&global_statistics.connected_clients, __ATOMIC_RELAXED); + gs->web_requests = __atomic_load_n(&global_statistics.web_requests, __ATOMIC_RELAXED); + gs->web_usec = __atomic_load_n(&global_statistics.web_usec, __ATOMIC_RELAXED); + gs->web_usec_max = __atomic_load_n(&global_statistics.web_usec_max, __ATOMIC_RELAXED); + gs->bytes_received = __atomic_load_n(&global_statistics.bytes_received, __ATOMIC_RELAXED); + gs->bytes_sent = __atomic_load_n(&global_statistics.bytes_sent, __ATOMIC_RELAXED); + gs->content_size = __atomic_load_n(&global_statistics.content_size, __ATOMIC_RELAXED); + gs->compressed_content_size = __atomic_load_n(&global_statistics.compressed_content_size, __ATOMIC_RELAXED); + gs->web_client_count = __atomic_load_n(&global_statistics.web_client_count, __ATOMIC_RELAXED); + + gs->api_data_queries_made = __atomic_load_n(&global_statistics.api_data_queries_made, __ATOMIC_RELAXED); + gs->api_data_db_points_read = __atomic_load_n(&global_statistics.api_data_db_points_read, __ATOMIC_RELAXED); + gs->api_data_result_points_generated = __atomic_load_n(&global_statistics.api_data_result_points_generated, __ATOMIC_RELAXED); + + gs->api_weights_queries_made = __atomic_load_n(&global_statistics.api_weights_queries_made, __ATOMIC_RELAXED); + gs->api_weights_db_points_read = __atomic_load_n(&global_statistics.api_weights_db_points_read, __ATOMIC_RELAXED); + gs->api_weights_result_points_generated = __atomic_load_n(&global_statistics.api_weights_result_points_generated, __ATOMIC_RELAXED); + + gs->api_badges_queries_made = __atomic_load_n(&global_statistics.api_badges_queries_made, __ATOMIC_RELAXED); + gs->api_badges_db_points_read = __atomic_load_n(&global_statistics.api_badges_db_points_read, __ATOMIC_RELAXED); + gs->api_badges_result_points_generated = __atomic_load_n(&global_statistics.api_badges_result_points_generated, __ATOMIC_RELAXED); + + gs->health_queries_made = __atomic_load_n(&global_statistics.health_queries_made, __ATOMIC_RELAXED); + gs->health_db_points_read = __atomic_load_n(&global_statistics.health_db_points_read, __ATOMIC_RELAXED); + gs->health_result_points_generated = __atomic_load_n(&global_statistics.health_result_points_generated, __ATOMIC_RELAXED); + + gs->ml_queries_made = __atomic_load_n(&global_statistics.ml_queries_made, __ATOMIC_RELAXED); + gs->ml_db_points_read = __atomic_load_n(&global_statistics.ml_db_points_read, __ATOMIC_RELAXED); + gs->ml_result_points_generated = __atomic_load_n(&global_statistics.ml_result_points_generated, __ATOMIC_RELAXED); + + gs->exporters_queries_made = __atomic_load_n(&global_statistics.exporters_queries_made, __ATOMIC_RELAXED); + gs->exporters_db_points_read = __atomic_load_n(&global_statistics.exporters_db_points_read, __ATOMIC_RELAXED); + gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED); + gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED); + + for(size_t tier = 0; tier < storage_tiers ;tier++) + gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED); + + if(options & GLOBAL_STATS_RESET_WEB_USEC_MAX) { + uint64_t n = 0; + __atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } +} + +static void global_statistics_charts(void) { + static unsigned long long old_web_requests = 0, + old_web_usec = 0, + old_content_size = 0, + old_compressed_content_size = 0; + + static collected_number compression_ratio = -1, + average_response_time = -1; + + static time_t netdata_start_time = 0; + if (!netdata_start_time) + netdata_start_time = now_boottime_sec(); + time_t netdata_uptime = now_boottime_sec() - netdata_start_time; + + struct global_statistics gs; + struct rusage me; + + struct replication_query_statistics replication = replication_get_query_statistics(); + global_statistics_copy(&gs, GLOBAL_STATS_RESET_WEB_USEC_MAX); + getrusage(RUSAGE_SELF, &me); + + // ---------------------------------------------------------------- + + { + static RRDSET *st_cpu = NULL; + static RRDDIM *rd_cpu_user = NULL, + *rd_cpu_system = NULL; + + if (unlikely(!st_cpu)) { + st_cpu = rrdset_create_localhost( + "netdata" + , "server_cpu" + , NULL + , "netdata" + , NULL + , "Netdata CPU usage" + , "milliseconds/s" + , "netdata" + , "stats" + , 130000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_cpu_user = rrddim_add(st_cpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + rd_cpu_system = rrddim_add(st_cpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_cpu, rd_cpu_user, me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec); + rrddim_set_by_pointer(st_cpu, rd_cpu_system, me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec); + rrdset_done(st_cpu); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_uptime = NULL; + static RRDDIM *rd_uptime = NULL; + + if (unlikely(!st_uptime)) { + st_uptime = rrdset_create_localhost( + "netdata", + "uptime", + NULL, + "netdata", + NULL, + "Netdata uptime", + "seconds", + "netdata", + "stats", + 130100, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_uptime = rrddim_add(st_uptime, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_uptime, rd_uptime, netdata_uptime); + rrdset_done(st_uptime); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_clients = NULL; + static RRDDIM *rd_clients = NULL; + + if (unlikely(!st_clients)) { + st_clients = rrdset_create_localhost( + "netdata" + , "clients" + , NULL + , "api" + , NULL + , "Netdata Web Clients" + , "connected clients" + , "netdata" + , "stats" + , 130200 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_clients = rrddim_add(st_clients, "clients", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_clients, rd_clients, gs.connected_clients); + rrdset_done(st_clients); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_reqs = NULL; + static RRDDIM *rd_requests = NULL; + + if (unlikely(!st_reqs)) { + st_reqs = rrdset_create_localhost( + "netdata" + , "requests" + , NULL + , "api" + , NULL + , "Netdata Web Requests" + , "requests/s" + , "netdata" + , "stats" + , 130300 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_requests = rrddim_add(st_reqs, "requests", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_reqs, rd_requests, (collected_number) gs.web_requests); + rrdset_done(st_reqs); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_bytes = NULL; + static RRDDIM *rd_in = NULL, + *rd_out = NULL; + + if (unlikely(!st_bytes)) { + st_bytes = rrdset_create_localhost( + "netdata" + , "net" + , NULL + , "api" + , NULL + , "Netdata Network Traffic" + , "kilobits/s" + , "netdata" + , "stats" + , 130400 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_bytes, "in", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_bytes, "out", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_bytes, rd_in, (collected_number) gs.bytes_received); + rrddim_set_by_pointer(st_bytes, rd_out, (collected_number) gs.bytes_sent); + rrdset_done(st_bytes); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_duration = NULL; + static RRDDIM *rd_average = NULL, + *rd_max = NULL; + + if (unlikely(!st_duration)) { + st_duration = rrdset_create_localhost( + "netdata" + , "response_time" + , NULL + , "api" + , NULL + , "Netdata API Response Time" + , "milliseconds/request" + , "netdata" + , "stats" + , 130500 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_average = rrddim_add(st_duration, "average", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_max = rrddim_add(st_duration, "max", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + uint64_t gweb_usec = gs.web_usec; + uint64_t gweb_requests = gs.web_requests; + + uint64_t web_usec = (gweb_usec >= old_web_usec) ? gweb_usec - old_web_usec : 0; + uint64_t web_requests = (gweb_requests >= old_web_requests) ? gweb_requests - old_web_requests : 0; + + old_web_usec = gweb_usec; + old_web_requests = gweb_requests; + + if (web_requests) + average_response_time = (collected_number) (web_usec / web_requests); + + if (unlikely(average_response_time != -1)) + rrddim_set_by_pointer(st_duration, rd_average, average_response_time); + else + rrddim_set_by_pointer(st_duration, rd_average, 0); + + rrddim_set_by_pointer(st_duration, rd_max, ((gs.web_usec_max)?(collected_number)gs.web_usec_max:average_response_time)); + rrdset_done(st_duration); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata" + , "compression_ratio" + , NULL + , "api" + , NULL + , "Netdata API Responses Compression Savings Ratio" + , "percentage" + , "netdata" + , "stats" + , 130600 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + // since we don't lock here to read the global statistics + // read the smaller value first + unsigned long long gcompressed_content_size = gs.compressed_content_size; + unsigned long long gcontent_size = gs.content_size; + + unsigned long long compressed_content_size = gcompressed_content_size - old_compressed_content_size; + unsigned long long content_size = gcontent_size - old_content_size; + + old_compressed_content_size = gcompressed_content_size; + old_content_size = gcontent_size; + + if (content_size && content_size >= compressed_content_size) + compression_ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + + if (compression_ratio != -1) + rrddim_set_by_pointer(st_compression, rd_savings, compression_ratio); + + rrdset_done(st_compression); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_queries = NULL; + static RRDDIM *rd_api_data_queries = NULL; + static RRDDIM *rd_api_weights_queries = NULL; + static RRDDIM *rd_api_badges_queries = NULL; + static RRDDIM *rd_health_queries = NULL; + static RRDDIM *rd_ml_queries = NULL; + static RRDDIM *rd_exporters_queries = NULL; + static RRDDIM *rd_backfill_queries = NULL; + static RRDDIM *rd_replication_queries = NULL; + + if (unlikely(!st_queries)) { + st_queries = rrdset_create_localhost( + "netdata" + , "queries" + , NULL + , "queries" + , NULL + , "Netdata DB Queries" + , "queries/s" + , "netdata" + , "stats" + , 131000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_queries = rrddim_add(st_queries, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_queries = rrddim_add(st_queries, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_queries = rrddim_add(st_queries, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_queries = rrddim_add(st_queries, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_queries = rrddim_add(st_queries, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_queries = rrddim_add(st_queries, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_queries = rrddim_add(st_queries, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_queries = rrddim_add(st_queries, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_queries, rd_api_data_queries, (collected_number)gs.api_data_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_weights_queries, (collected_number)gs.api_weights_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_badges_queries, (collected_number)gs.api_badges_queries_made); + rrddim_set_by_pointer(st_queries, rd_health_queries, (collected_number)gs.health_queries_made); + rrddim_set_by_pointer(st_queries, rd_ml_queries, (collected_number)gs.ml_queries_made); + rrddim_set_by_pointer(st_queries, rd_exporters_queries, (collected_number)gs.exporters_queries_made); + rrddim_set_by_pointer(st_queries, rd_backfill_queries, (collected_number)gs.backfill_queries_made); + rrddim_set_by_pointer(st_queries, rd_replication_queries, (collected_number)replication.queries_finished); + + rrdset_done(st_queries); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_read = NULL; + static RRDDIM *rd_api_data_points_read = NULL; + static RRDDIM *rd_api_weights_points_read = NULL; + static RRDDIM *rd_api_badges_points_read = NULL; + static RRDDIM *rd_health_points_read = NULL; + static RRDDIM *rd_ml_points_read = NULL; + static RRDDIM *rd_exporters_points_read = NULL; + static RRDDIM *rd_backfill_points_read = NULL; + static RRDDIM *rd_replication_points_read = NULL; + + if (unlikely(!st_points_read)) { + st_points_read = rrdset_create_localhost( + "netdata" + , "db_points_read" + , NULL + , "queries" + , NULL + , "Netdata DB Points Query Read" + , "points/s" + , "netdata" + , "stats" + , 131001 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_read = rrddim_add(st_points_read, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_read = rrddim_add(st_points_read, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_read = rrddim_add(st_points_read, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_read = rrddim_add(st_points_read, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_read = rrddim_add(st_points_read, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_points_read = rrddim_add(st_points_read, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_points_read = rrddim_add(st_points_read, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_read = rrddim_add(st_points_read, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_read, rd_api_data_points_read, (collected_number)gs.api_data_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_weights_points_read, (collected_number)gs.api_weights_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_badges_points_read, (collected_number)gs.api_badges_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_health_points_read, (collected_number)gs.health_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_ml_points_read, (collected_number)gs.ml_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_exporters_points_read, (collected_number)gs.exporters_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_backfill_points_read, (collected_number)gs.backfill_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_replication_points_read, (collected_number)replication.points_read); + + rrdset_done(st_points_read); + } + + // ---------------------------------------------------------------- + + if(gs.api_data_result_points_generated || replication.points_generated) { + static RRDSET *st_points_generated = NULL; + static RRDDIM *rd_api_data_points_generated = NULL; + static RRDDIM *rd_api_weights_points_generated = NULL; + static RRDDIM *rd_api_badges_points_generated = NULL; + static RRDDIM *rd_health_points_generated = NULL; + static RRDDIM *rd_ml_points_generated = NULL; + static RRDDIM *rd_replication_points_generated = NULL; + + if (unlikely(!st_points_generated)) { + st_points_generated = rrdset_create_localhost( + "netdata" + , "db_points_results" + , NULL + , "queries" + , NULL + , "Netdata Points in Query Results" + , "points/s" + , "netdata" + , "stats" + , 131002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_generated = rrddim_add(st_points_generated, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_generated = rrddim_add(st_points_generated, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_generated = rrddim_add(st_points_generated, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_generated = rrddim_add(st_points_generated, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_generated = rrddim_add(st_points_generated, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_generated = rrddim_add(st_points_generated, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_generated, rd_api_data_points_generated, (collected_number)gs.api_data_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_weights_points_generated, (collected_number)gs.api_weights_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_badges_points_generated, (collected_number)gs.api_badges_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_health_points_generated, (collected_number)gs.health_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_ml_points_generated, (collected_number)gs.ml_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_replication_points_generated, (collected_number)replication.points_generated); + + rrdset_done(st_points_generated); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_stored = NULL; + static RRDDIM *rds[RRD_STORAGE_TIERS] = {}; + + if (unlikely(!st_points_stored)) { + st_points_stored = rrdset_create_localhost( + "netdata" + , "db_points_stored" + , NULL + , "queries" + , NULL + , "Netdata DB Points Stored" + , "points/s" + , "netdata" + , "stats" + , 131003 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + char buf[30 + 1]; + snprintfz(buf, 30, "tier%zu", tier); + rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + } + + for(size_t tier = 0; tier < storage_tiers ;tier++) + rrddim_set_by_pointer(st_points_stored, rds[tier], (collected_number)gs.db_points_stored_per_tier[tier]); + + rrdset_done(st_points_stored); + } +} + +// ---------------------------------------------------------------------------- +// sqlite3 statistics + +struct sqlite3_statistics { + uint64_t sqlite3_queries_made; + uint64_t sqlite3_queries_ok; + uint64_t sqlite3_queries_failed; + uint64_t sqlite3_queries_failed_busy; + uint64_t sqlite3_queries_failed_locked; + uint64_t sqlite3_rows; + uint64_t sqlite3_metadata_cache_hit; + uint64_t sqlite3_context_cache_hit; + uint64_t sqlite3_metadata_cache_miss; + uint64_t sqlite3_context_cache_miss; + uint64_t sqlite3_metadata_cache_spill; + uint64_t sqlite3_context_cache_spill; + uint64_t sqlite3_metadata_cache_write; + uint64_t sqlite3_context_cache_write; + +} sqlite3_statistics = { }; + +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED); + + if(success) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED); + } + else { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED); + + if(busy) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED); + + if(locked) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED); + } +} + +void global_statistics_sqlite3_row_completed(void) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED); +} + +static inline void sqlite3_statistics_copy(struct sqlite3_statistics *gs) { + static usec_t last_run = 0; + + gs->sqlite3_queries_made = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_made, __ATOMIC_RELAXED); + gs->sqlite3_queries_ok = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_busy = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_locked = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED); + gs->sqlite3_rows = __atomic_load_n(&sqlite3_statistics.sqlite3_rows, __ATOMIC_RELAXED); + + usec_t timeout = default_rrd_update_every * USEC_PER_SEC + default_rrd_update_every * USEC_PER_SEC / 3; + usec_t now = now_monotonic_usec(); + if(!last_run) + last_run = now; + usec_t delta = now - last_run; + bool query_sqlite3 = delta < timeout; + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_metadata_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_context_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_metadata_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_context_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_metadata_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_context_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_metadata_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_context_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + last_run = now_monotonic_usec(); +} + +static void sqlite3_statistics_charts(void) { + struct sqlite3_statistics gs; + sqlite3_statistics_copy(&gs); + + if(gs.sqlite3_queries_made) { + static RRDSET *st_sqlite3_queries = NULL; + static RRDDIM *rd_queries = NULL; + + if (unlikely(!st_sqlite3_queries)) { + st_sqlite3_queries = rrdset_create_localhost( + "netdata" + , "sqlite3_queries" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries" + , "queries/s" + , "netdata" + , "stats" + , 131100 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_queries = rrddim_add(st_sqlite3_queries, "queries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries, rd_queries, (collected_number)gs.sqlite3_queries_made); + + rrdset_done(st_sqlite3_queries); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_queries_ok || gs.sqlite3_queries_failed) { + static RRDSET *st_sqlite3_queries_by_status = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_busy = NULL, *rd_locked = NULL; + + if (unlikely(!st_sqlite3_queries_by_status)) { + st_sqlite3_queries_by_status = rrdset_create_localhost( + "netdata" + , "sqlite3_queries_by_status" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries by status" + , "queries/s" + , "netdata" + , "stats" + , 131101 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_ok = rrddim_add(st_sqlite3_queries_by_status, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_sqlite3_queries_by_status, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_busy = rrddim_add(st_sqlite3_queries_by_status, "busy", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_locked = rrddim_add(st_sqlite3_queries_by_status, "locked", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_ok, (collected_number)gs.sqlite3_queries_made); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_failed, (collected_number)gs.sqlite3_queries_failed); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_busy, (collected_number)gs.sqlite3_queries_failed_busy); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_locked, (collected_number)gs.sqlite3_queries_failed_locked); + + rrdset_done(st_sqlite3_queries_by_status); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_rows) { + static RRDSET *st_sqlite3_rows = NULL; + static RRDDIM *rd_rows = NULL; + + if (unlikely(!st_sqlite3_rows)) { + st_sqlite3_rows = rrdset_create_localhost( + "netdata" + , "sqlite3_rows" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Rows" + , "rows/s" + , "netdata" + , "stats" + , 131102 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_rows = rrddim_add(st_sqlite3_rows, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_rows, rd_rows, (collected_number)gs.sqlite3_rows); + + rrdset_done(st_sqlite3_rows); + } + + if(gs.sqlite3_metadata_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_metatada_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 metadata cache" + , "ops/s" + , "netdata" + , "stats" + , 131103 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + if(gs.sqlite3_metadata_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit); + + if(gs.sqlite3_metadata_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss); + + if(gs.sqlite3_metadata_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill); + + if(gs.sqlite3_metadata_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + if(gs.sqlite3_context_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_context_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 context cache" + , "ops/s" + , "netdata" + , "stats" + , 131104 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + if(gs.sqlite3_context_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit); + + if(gs.sqlite3_context_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss); + + if(gs.sqlite3_context_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill); + + if(gs.sqlite3_context_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + // ---------------------------------------------------------------- +} + +static void dbengine_statistics_charts(void) { +#ifdef ENABLE_DBENGINE + if(netdata_rwlock_tryrdlock(&rrd_rwlock) == 0) { + RRDHOST *host; + unsigned long long stats_array[RRDENG_NR_STATS] = {0}; + unsigned long long local_stats_array[RRDENG_NR_STATS]; + unsigned dbengine_contexts = 0, counted_multihost_db[RRD_STORAGE_TIERS] = { 0 }, i; + + rrdhost_foreach_read(host) { + if (!rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { + + /* get localhost's DB engine's statistics for each tier */ + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode != RRD_MEMORY_MODE_DBENGINE) continue; + if(!host->db[tier].instance) continue; + + if(is_storage_engine_shared(host->db[tier].instance)) { + if(counted_multihost_db[tier]) + continue; + else + counted_multihost_db[tier] = 1; + } + + ++dbengine_contexts; + rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].instance, local_stats_array); + for (i = 0; i < RRDENG_NR_STATS; ++i) { + /* aggregate statistics across hosts */ + stats_array[i] += local_stats_array[i]; + } + } + } + } + rrd_unlock(); + + if (dbengine_contexts) { + /* deduplicate global statistics by getting the ones from the last context */ + stats_array[30] = local_stats_array[30]; + stats_array[31] = local_stats_array[31]; + stats_array[32] = local_stats_array[32]; + stats_array[34] = local_stats_array[34]; + stats_array[36] = local_stats_array[36]; + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata", + "dbengine_compression_ratio", + NULL, + "dbengine", + NULL, + "Netdata DB engine data extents' compression savings ratio", + "percentage", + "netdata", + "stats", + 132000, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + unsigned long long ratio; + unsigned long long compressed_content_size = stats_array[12]; + unsigned long long content_size = stats_array[11]; + + if (content_size) { + // allow negative savings + ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_compression, rd_savings, ratio); + + rrdset_done(st_compression); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_pg_cache_hit_ratio = NULL; + static RRDDIM *rd_hit_ratio = NULL; + + if (unlikely(!st_pg_cache_hit_ratio)) { + st_pg_cache_hit_ratio = rrdset_create_localhost( + "netdata", + "page_cache_hit_ratio", + NULL, + "dbengine", + NULL, + "Netdata DB engine page cache hit ratio", + "percentage", + "netdata", + "stats", + 132003, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + static unsigned long long old_hits = 0; + static unsigned long long old_misses = 0; + unsigned long long hits = stats_array[7]; + unsigned long long misses = stats_array[8]; + unsigned long long hits_delta; + unsigned long long misses_delta; + unsigned long long ratio; + + hits_delta = hits - old_hits; + misses_delta = misses - old_misses; + old_hits = hits; + old_misses = misses; + + if (hits_delta + misses_delta) { + ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta); + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio); + + rrdset_done(st_pg_cache_hit_ratio); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_pg_cache_pages = NULL; + static RRDDIM *rd_descriptors = NULL; + static RRDDIM *rd_populated = NULL; + static RRDDIM *rd_dirty = NULL; + static RRDDIM *rd_backfills = NULL; + static RRDDIM *rd_evictions = NULL; + static RRDDIM *rd_used_by_collectors = NULL; + + if (unlikely(!st_pg_cache_pages)) { + st_pg_cache_pages = rrdset_create_localhost( + "netdata", + "page_cache_stats", + NULL, + "dbengine", + NULL, + "Netdata dbengine page cache statistics", + "pages", + "netdata", + "stats", + 132004, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_dirty = rrddim_add(st_pg_cache_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_used_by_collectors = + rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_dirty, (collected_number)stats_array[0] + stats_array[4]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_used_by_collectors, (collected_number)stats_array[0]); + rrdset_done(st_pg_cache_pages); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_long_term_pages = NULL; + static RRDDIM *rd_total = NULL; + static RRDDIM *rd_insertions = NULL; + static RRDDIM *rd_deletions = NULL; + static RRDDIM *rd_flushing_pressure_deletions = NULL; + + if (unlikely(!st_long_term_pages)) { + st_long_term_pages = rrdset_create_localhost( + "netdata", + "dbengine_long_term_page_stats", + NULL, + "dbengine", + NULL, + "Netdata dbengine long-term page statistics", + "pages", + "netdata", + "stats", + 132005, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_total = rrddim_add(st_long_term_pages, "total", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_insertions = rrddim_add(st_long_term_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_flushing_pressure_deletions = rrddim_add( + st_long_term_pages, "flushing_pressure_deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]); + rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]); + rrddim_set_by_pointer(st_long_term_pages, rd_deletions, (collected_number)stats_array[6]); + rrddim_set_by_pointer( + st_long_term_pages, rd_flushing_pressure_deletions, (collected_number)stats_array[36]); + rrdset_done(st_long_term_pages); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_throughput", + NULL, + "dbengine", + NULL, + "Netdata DB engine I/O throughput", + "MiB/s", + "netdata", + "stats", + 132006, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); + rrdset_done(st_io_stats); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_operations", + NULL, + "dbengine", + NULL, + "Netdata DB engine I/O operations", + "operations/s", + "netdata", + "stats", + 132007, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); + rrdset_done(st_io_stats); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_errors = NULL; + static RRDDIM *rd_fs_errors = NULL; + static RRDDIM *rd_io_errors = NULL; + static RRDDIM *pg_cache_over_half_dirty_events = NULL; + + if (unlikely(!st_errors)) { + st_errors = rrdset_create_localhost( + "netdata", + "dbengine_global_errors", + NULL, + "dbengine", + NULL, + "Netdata DB engine errors", + "errors/s", + "netdata", + "stats", + 132008, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + pg_cache_over_half_dirty_events = + rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); + rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); + rrddim_set_by_pointer(st_errors, pg_cache_over_half_dirty_events, (collected_number)stats_array[34]); + rrdset_done(st_errors); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_fd = NULL; + static RRDDIM *rd_fd_current = NULL; + static RRDDIM *rd_fd_max = NULL; + + if (unlikely(!st_fd)) { + st_fd = rrdset_create_localhost( + "netdata", + "dbengine_global_file_descriptors", + NULL, + "dbengine", + NULL, + "Netdata DB engine File Descriptors", + "descriptors", + "netdata", + "stats", + 132009, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); + /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ + rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); + rrdset_done(st_fd); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_ram_usage = NULL; + static RRDDIM *rd_cached = NULL; + static RRDDIM *rd_pinned = NULL; + static RRDDIM *rd_cache_metadata = NULL; + static RRDDIM *rd_index_metadata = NULL; + static RRDDIM *rd_pages_metadata = NULL; + + collected_number API_producers, populated_pages, cache_metadata, pages_on_disk, + page_cache_descriptors, index_metadata, pages_metadata; + + if (unlikely(!st_ram_usage)) { + st_ram_usage = rrdset_create_localhost( + "netdata", + "dbengine_ram", + NULL, + "dbengine", + NULL, + "Netdata DB engine RAM usage", + "MiB", + "netdata", + "stats", + 132010, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_cached = rrddim_add(st_ram_usage, "cache", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_cache_metadata = rrddim_add(st_ram_usage, "cache metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_pages_metadata = rrddim_add(st_ram_usage, "pages metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_index_metadata = rrddim_add(st_ram_usage, "index metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + } + + API_producers = (collected_number)stats_array[0]; + pages_on_disk = (collected_number)stats_array[2]; + populated_pages = (collected_number)stats_array[3]; + page_cache_descriptors = (collected_number)stats_array[27]; + + cache_metadata = page_cache_descriptors * sizeof(struct page_cache_descr); + + pages_metadata = pages_on_disk * sizeof(struct rrdeng_page_descr); + + /* This is an empirical estimation for Judy array indexing and extent structures */ + index_metadata = pages_on_disk * 58; + + rrddim_set_by_pointer(st_ram_usage, rd_cached, populated_pages - API_producers); + rrddim_set_by_pointer(st_ram_usage, rd_pinned, API_producers); + rrddim_set_by_pointer(st_ram_usage, rd_cache_metadata, cache_metadata); + rrddim_set_by_pointer(st_ram_usage, rd_pages_metadata, pages_metadata); + rrddim_set_by_pointer(st_ram_usage, rd_index_metadata, index_metadata); + rrdset_done(st_ram_usage); + } + } + } +#endif +} + +static void update_strings_charts() { + static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL; + static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL, *rd_ops_searches = NULL, *rd_ops_duplications = NULL, *rd_ops_releases = NULL; + static RRDDIM *rd_entries_entries = NULL, *rd_entries_refs = NULL; + static RRDDIM *rd_mem = NULL; + + size_t inserts, deletes, searches, entries, references, memory, duplications, releases; + + string_statistics(&inserts, &deletes, &searches, &entries, &references, &memory, &duplications, &releases); + + if (unlikely(!st_ops)) { + st_ops = rrdset_create_localhost( + "netdata" + , "strings_ops" + , NULL + , "strings" + , NULL + , "Strings operations" + , "ops/s" + , "netdata" + , "stats" + , 910000 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE); + + rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts); + rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes); + rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches); + rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications); + rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases); + rrdset_done(st_ops); + + if (unlikely(!st_entries)) { + st_entries = rrdset_create_localhost( + "netdata" + , "strings_entries" + , NULL + , "strings" + , NULL + , "Strings entries" + , "entries" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries); + rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references); + rrdset_done(st_entries); + + if (unlikely(!st_mem)) { + st_mem = rrdset_create_localhost( + "netdata" + , "strings_memory" + , NULL + , "strings" + , NULL + , "Strings memory" + , "bytes" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_mem = rrddim_add(st_mem, "memory", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem, rd_mem, (collected_number)memory); + rrdset_done(st_mem); +} + +static void update_heartbeat_charts() { + static RRDSET *st_heartbeat = NULL; + static RRDDIM *rd_heartbeat_min = NULL; + static RRDDIM *rd_heartbeat_max = NULL; + static RRDDIM *rd_heartbeat_avg = NULL; + + if (unlikely(!st_heartbeat)) { + st_heartbeat = rrdset_create_localhost( + "netdata" + , "heartbeat" + , NULL + , "heartbeat" + , NULL + , "System clock jitter" + , "microseconds" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_heartbeat_min = rrddim_add(st_heartbeat, "min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_max = rrddim_add(st_heartbeat, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_avg = rrddim_add(st_heartbeat, "average", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + usec_t min, max, average; + size_t count; + + heartbeat_statistics(&min, &max, &average, &count); + + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_min, (collected_number)min); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_max, (collected_number)max); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_avg, (collected_number)average); + + rrdset_done(st_heartbeat); +} + +// --------------------------------------------------------------------------------------------------------------------- +// dictionary statistics + +struct dictionary_categories { + struct dictionary_stats *stats; + const char *family; + const char *context_prefix; + int priority; + + RRDSET *st_dicts; + RRDDIM *rd_dicts_active; + RRDDIM *rd_dicts_deleted; + + RRDSET *st_items; + RRDDIM *rd_items_entries; + RRDDIM *rd_items_referenced; + RRDDIM *rd_items_pending_deletion; + + RRDSET *st_ops; + RRDDIM *rd_ops_creations; + RRDDIM *rd_ops_destructions; + RRDDIM *rd_ops_flushes; + RRDDIM *rd_ops_traversals; + RRDDIM *rd_ops_walkthroughs; + RRDDIM *rd_ops_garbage_collections; + RRDDIM *rd_ops_searches; + RRDDIM *rd_ops_inserts; + RRDDIM *rd_ops_resets; + RRDDIM *rd_ops_deletes; + + RRDSET *st_callbacks; + RRDDIM *rd_callbacks_inserts; + RRDDIM *rd_callbacks_conflicts; + RRDDIM *rd_callbacks_reacts; + RRDDIM *rd_callbacks_deletes; + + RRDSET *st_memory; + RRDDIM *rd_memory_indexed; + RRDDIM *rd_memory_values; + RRDDIM *rd_memory_dict; + + RRDSET *st_spins; + RRDDIM *rd_spins_use; + RRDDIM *rd_spins_search; + RRDDIM *rd_spins_insert; + RRDDIM *rd_spins_delete; + +} dictionary_categories[] = { + { .stats = &dictionary_stats_category_other, "dictionaries", "dictionaries", 900000 }, + + // terminator + { .stats = NULL, NULL, NULL, 0 }, +}; + +#define load_dictionary_stats_entry(x) total += (size_t)(stats.x = __atomic_load_n(&c->stats->x, __ATOMIC_RELAXED)) + +static void update_dictionary_category_charts(struct dictionary_categories *c) { + struct dictionary_stats stats; + stats.name = c->stats->name; + + // ------------------------------------------------------------------------ + + size_t total = 0; + load_dictionary_stats_entry(dictionaries.active); + load_dictionary_stats_entry(dictionaries.deleted); + + if(c->st_dicts || total != 0) { + if (unlikely(!c->st_dicts)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.dictionaries", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.dictionaries", c->context_prefix); + + c->st_dicts = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionaries" + , "dictionaries" + , "netdata" + , "stats" + , c->priority + 0 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_dicts_active = rrddim_add(c->st_dicts, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_dicts_deleted = rrddim_add(c->st_dicts, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_dicts->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_active, (collected_number)stats.dictionaries.active); + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_deleted, (collected_number)stats.dictionaries.deleted); + rrdset_done(c->st_dicts); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(items.entries); + load_dictionary_stats_entry(items.referenced); + load_dictionary_stats_entry(items.pending_deletion); + + if(c->st_items || total != 0) { + if (unlikely(!c->st_items)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.items", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.items", c->context_prefix); + + c->st_items = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Items" + , "items" + , "netdata" + , "stats" + , c->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_items_entries = rrddim_add(c->st_items, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_pending_deletion = rrddim_add(c->st_items, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_referenced = rrddim_add(c->st_items, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_items->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_items, c->rd_items_entries, stats.items.entries); + rrddim_set_by_pointer(c->st_items, c->rd_items_pending_deletion, stats.items.pending_deletion); + rrddim_set_by_pointer(c->st_items, c->rd_items_referenced, stats.items.referenced); + rrdset_done(c->st_items); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(ops.creations); + load_dictionary_stats_entry(ops.destructions); + load_dictionary_stats_entry(ops.flushes); + load_dictionary_stats_entry(ops.traversals); + load_dictionary_stats_entry(ops.walkthroughs); + load_dictionary_stats_entry(ops.garbage_collections); + load_dictionary_stats_entry(ops.searches); + load_dictionary_stats_entry(ops.inserts); + load_dictionary_stats_entry(ops.resets); + load_dictionary_stats_entry(ops.deletes); + + if(c->st_ops || total != 0) { + if (unlikely(!c->st_ops)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.ops", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.ops", c->context_prefix); + + c->st_ops = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Operations" + , "ops/s" + , "netdata" + , "stats" + , c->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_ops_creations = rrddim_add(c->st_ops, "creations", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_destructions = rrddim_add(c->st_ops, "destructions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_flushes = rrddim_add(c->st_ops, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_traversals = rrddim_add(c->st_ops, "traversals", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_walkthroughs = rrddim_add(c->st_ops, "walkthroughs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_garbage_collections = rrddim_add(c->st_ops, "garbage_collections", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_searches = rrddim_add(c->st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_inserts = rrddim_add(c->st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_resets = rrddim_add(c->st_ops, "resets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_deletes = rrddim_add(c->st_ops, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_ops->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_ops, c->rd_ops_creations, (collected_number)stats.ops.creations); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_destructions, (collected_number)stats.ops.destructions); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_flushes, (collected_number)stats.ops.flushes); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_traversals, (collected_number)stats.ops.traversals); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_walkthroughs, (collected_number)stats.ops.walkthroughs); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_garbage_collections, (collected_number)stats.ops.garbage_collections); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_searches, (collected_number)stats.ops.searches); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_inserts, (collected_number)stats.ops.inserts); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_resets, (collected_number)stats.ops.resets); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_deletes, (collected_number)stats.ops.deletes); + + rrdset_done(c->st_ops); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(callbacks.inserts); + load_dictionary_stats_entry(callbacks.conflicts); + load_dictionary_stats_entry(callbacks.reacts); + load_dictionary_stats_entry(callbacks.deletes); + + if(c->st_callbacks || total != 0) { + if (unlikely(!c->st_callbacks)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.callbacks", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.callbacks", c->context_prefix); + + c->st_callbacks = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Callbacks" + , "callbacks/s" + , "netdata" + , "stats" + , c->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_callbacks_inserts = rrddim_add(c->st_callbacks, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_deletes = rrddim_add(c->st_callbacks, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_conflicts = rrddim_add(c->st_callbacks, "conflicts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_reacts = rrddim_add(c->st_callbacks, "reacts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_callbacks->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_inserts, (collected_number)stats.callbacks.inserts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_conflicts, (collected_number)stats.callbacks.conflicts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_reacts, (collected_number)stats.callbacks.reacts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_deletes, (collected_number)stats.callbacks.deletes); + + rrdset_done(c->st_callbacks); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(memory.indexed); + load_dictionary_stats_entry(memory.values); + load_dictionary_stats_entry(memory.dict); + + if(c->st_memory || total != 0) { + if (unlikely(!c->st_memory)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.memory", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.memory", c->context_prefix); + + c->st_memory = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Memory" + , "bytes" + , "netdata" + , "stats" + , c->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + c->rd_memory_indexed = rrddim_add(c->st_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_values = rrddim_add(c->st_memory, "data", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_dict = rrddim_add(c->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_memory->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.indexed); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_values, (collected_number)stats.memory.values); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_dict, (collected_number)stats.memory.dict); + + rrdset_done(c->st_memory); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(spin_locks.use_spins); + load_dictionary_stats_entry(spin_locks.search_spins); + load_dictionary_stats_entry(spin_locks.insert_spins); + load_dictionary_stats_entry(spin_locks.delete_spins); + + if(c->st_spins || total != 0) { + if (unlikely(!c->st_spins)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.spins", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.spins", c->context_prefix); + + c->st_spins = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Spins" + , "count" + , "netdata" + , "stats" + , c->priority + 5 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_spins_use = rrddim_add(c->st_spins, "use", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_search = rrddim_add(c->st_spins, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_insert = rrddim_add(c->st_spins, "insert", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_delete = rrddim_add(c->st_spins, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_spins->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_spins, c->rd_spins_use, (collected_number)stats.spin_locks.use_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_search, (collected_number)stats.spin_locks.search_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_insert, (collected_number)stats.spin_locks.insert_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_delete, (collected_number)stats.spin_locks.delete_spins); + + rrdset_done(c->st_spins); + } +} + +#ifdef NETDATA_TRACE_ALLOCATIONS + +struct memory_trace_data { + RRDSET *st_memory; + RRDSET *st_allocations; + RRDSET *st_avg_alloc; + RRDSET *st_ops; +}; + +static int do_memory_trace_item(void *item, void *data) { + struct memory_trace_data *tmp = data; + struct malloc_trace *p = item; + + // ------------------------------------------------------------------------ + + if(!p->rd_bytes) + p->rd_bytes = rrddim_add(tmp->st_memory, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number bytes = (collected_number)__atomic_load_n(&p->bytes, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_memory, p->rd_bytes, bytes); + + // ------------------------------------------------------------------------ + + if(!p->rd_allocations) + p->rd_allocations = rrddim_add(tmp->st_allocations, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number allocs = (collected_number)__atomic_load_n(&p->allocations, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_allocations, p->rd_allocations, allocs); + + // ------------------------------------------------------------------------ + + if(!p->rd_avg_alloc) + p->rd_avg_alloc = rrddim_add(tmp->st_avg_alloc, p->function, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + collected_number avg_alloc = (allocs)?(bytes * 100 / allocs):0; + rrddim_set_by_pointer(tmp->st_avg_alloc, p->rd_avg_alloc, avg_alloc); + + // ------------------------------------------------------------------------ + + if(!p->rd_ops) + p->rd_ops = rrddim_add(tmp->st_ops, p->function, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + collected_number ops = 0; + ops += (collected_number)__atomic_load_n(&p->malloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->calloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->realloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->strdup_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->free_calls, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_ops, p->rd_ops, ops); + + // ------------------------------------------------------------------------ + + return 1; +} +static void malloc_trace_statistics(void) { + static struct memory_trace_data tmp = { + .st_memory = NULL, + .st_allocations = NULL, + .st_avg_alloc = NULL, + .st_ops = NULL, + }; + + if(!tmp.st_memory) { + tmp.st_memory = rrdset_create_localhost( + "netdata" + , "memory_size" + , NULL + , "memory" + , "netdata.memory.size" + , "Netdata Memory Used by Function" + , "bytes" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_ops) { + tmp.st_ops = rrdset_create_localhost( + "netdata" + , "memory_operations" + , NULL + , "memory" + , "netdata.memory.operations" + , "Netdata Memory Operations by Function" + , "ops/s" + , "netdata" + , "stats" + , 900001 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + if(!tmp.st_allocations) { + tmp.st_allocations = rrdset_create_localhost( + "netdata" + , "memory_allocations" + , NULL + , "memory" + , "netdata.memory.allocations" + , "Netdata Memory Allocations by Function" + , "allocations" + , "netdata" + , "stats" + , 900002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_avg_alloc) { + tmp.st_avg_alloc = rrdset_create_localhost( + "netdata" + , "memory_avg_alloc" + , NULL + , "memory" + , "netdata.memory.avg_alloc" + , "Netdata Average Allocation Size by Function" + , "bytes" + , "netdata" + , "stats" + , 900003 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + malloc_trace_walkthrough(do_memory_trace_item, &tmp); + + rrdset_done(tmp.st_memory); + rrdset_done(tmp.st_ops); + rrdset_done(tmp.st_allocations); + rrdset_done(tmp.st_avg_alloc); +} +#endif + +static void dictionary_statistics(void) { + for(int i = 0; dictionary_categories[i].stats ;i++) { + update_dictionary_category_charts(&dictionary_categories[i]); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// worker utilization + +#define WORKERS_MIN_PERCENT_DEFAULT 10000.0 + +struct worker_job_type_gs { + STRING *name; + STRING *units; + + size_t jobs_started; + usec_t busy_time; + + RRDDIM *rd_jobs_started; + RRDDIM *rd_busy_time; + + WORKER_METRIC_TYPE type; + NETDATA_DOUBLE min_value; + NETDATA_DOUBLE max_value; + NETDATA_DOUBLE sum_value; + size_t count_value; + + RRDSET *st; + RRDDIM *rd_min; + RRDDIM *rd_max; + RRDDIM *rd_avg; +}; + +struct worker_thread { + pid_t pid; + bool enabled; + + bool cpu_enabled; + double cpu; + + kernel_uint_t utime; + kernel_uint_t stime; + + kernel_uint_t utime_old; + kernel_uint_t stime_old; + + usec_t collected_time; + usec_t collected_time_old; + + size_t jobs_started; + usec_t busy_time; + + struct worker_thread *next; + struct worker_thread *prev; +}; + +struct worker_utilization { + const char *name; + const char *family; + size_t priority; + uint32_t flags; + + char *name_lowercase; + + struct worker_job_type_gs per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + + size_t workers_max_job_id; + size_t workers_registered; + size_t workers_busy; + usec_t workers_total_busy_time; + usec_t workers_total_duration; + size_t workers_total_jobs_started; + double workers_min_busy_time; + double workers_max_busy_time; + + size_t workers_cpu_registered; + double workers_cpu_min; + double workers_cpu_max; + double workers_cpu_total; + + struct worker_thread *threads; + + RRDSET *st_workers_time; + RRDDIM *rd_workers_time_avg; + RRDDIM *rd_workers_time_min; + RRDDIM *rd_workers_time_max; + + RRDSET *st_workers_cpu; + RRDDIM *rd_workers_cpu_avg; + RRDDIM *rd_workers_cpu_min; + RRDDIM *rd_workers_cpu_max; + + RRDSET *st_workers_threads; + RRDDIM *rd_workers_threads_free; + RRDDIM *rd_workers_threads_busy; + + RRDSET *st_workers_jobs_per_job_type; + RRDSET *st_workers_busy_per_job_type; + + RRDDIM *rd_total_cpu_utilizaton; +}; + +static struct worker_utilization all_workers_utilization[] = { + { .name = "STATS", .family = "workers global statistics", .priority = 1000000 }, + { .name = "HEALTH", .family = "workers health alarms", .priority = 1000000 }, + { .name = "MLTRAIN", .family = "workers ML training", .priority = 1000000 }, + { .name = "MLDETECT", .family = "workers ML detection", .priority = 1000000 }, + { .name = "STREAMRCV", .family = "workers streaming receive", .priority = 1000000 }, + { .name = "STREAMSND", .family = "workers streaming send", .priority = 1000000 }, + { .name = "DBENGINE", .family = "workers dbengine instances", .priority = 1000000 }, + { .name = "WEB", .family = "workers web server", .priority = 1000000 }, + { .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 }, + { .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 }, + { .name = "METASYNC", .family = "workers metadata sync", .priority = 1000000 }, + { .name = "PLUGINSD", .family = "workers plugins.d", .priority = 1000000 }, + { .name = "STATSD", .family = "workers plugin statsd", .priority = 1000000 }, + { .name = "STATSDFLUSH", .family = "workers plugin statsd flush", .priority = 1000000 }, + { .name = "PROC", .family = "workers plugin proc", .priority = 1000000 }, + { .name = "NETDEV", .family = "workers plugin proc netdev", .priority = 1000000 }, + { .name = "FREEBSD", .family = "workers plugin freebsd", .priority = 1000000 }, + { .name = "MACOS", .family = "workers plugin macos", .priority = 1000000 }, + { .name = "CGROUPS", .family = "workers plugin cgroups", .priority = 1000000 }, + { .name = "CGROUPSDISC", .family = "workers plugin cgroups find", .priority = 1000000 }, + { .name = "DISKSPACE", .family = "workers plugin diskspace", .priority = 1000000 }, + { .name = "TC", .family = "workers plugin tc", .priority = 1000000 }, + { .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 }, + { .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 }, + { .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 }, + { .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 }, + { .name = "SERVICE", .family = "workers service", .priority = 1000000 }, + + // has to be terminated with a NULL + { .name = NULL, .family = NULL } +}; + +static void workers_total_cpu_utilization_chart(void) { + size_t i, cpu_enabled = 0; + for(i = 0; all_workers_utilization[i].name ;i++) + if(all_workers_utilization[i].workers_cpu_registered) cpu_enabled++; + + if(!cpu_enabled) return; + + static RRDSET *st = NULL; + + if(!st) { + st = rrdset_create_localhost( + "netdata", + "workers_cpu", + NULL, + "workers", + "netdata.workers.cpu_total", + "Netdata Workers CPU Utilization (100% = 1 core)", + "%", + "netdata", + "stats", + 999000, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + } + + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + if(!wu->workers_cpu_registered) continue; + + if(!wu->rd_total_cpu_utilizaton) + wu->rd_total_cpu_utilizaton = rrddim_add(st, wu->name_lowercase, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st, wu->rd_total_cpu_utilizaton, (collected_number)((double)wu->workers_cpu_total * 100.0)); + } + + rrdset_done(st); +} + +#define WORKER_CHART_DECIMAL_PRECISION 100 + +static void workers_utilization_update_chart(struct worker_utilization *wu) { + if(!wu->workers_registered) return; + + //fprintf(stderr, "%-12s WORKER UTILIZATION: %-3.2f%%, %zu jobs done, %zu running, on %zu workers, min %-3.02f%%, max %-3.02f%%.\n", + // wu->name, + // (double)wu->workers_total_busy_time * 100.0 / (double)wu->workers_total_duration, + // wu->workers_total_jobs_started, wu->workers_busy, wu->workers_registered, + // wu->workers_min_busy_time, wu->workers_max_busy_time); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_time)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_time_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time", wu->name_lowercase); + + wu->st_workers_time = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + // we add the min and max dimensions only when we have multiple workers + + if(unlikely(!wu->rd_workers_time_min && wu->workers_registered > 1)) + wu->rd_workers_time_min = rrddim_add(wu->st_workers_time, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_max && wu->workers_registered > 1)) + wu->rd_workers_time_max = rrddim_add(wu->st_workers_time, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_avg)) + wu->rd_workers_time_avg = rrddim_add(wu->st_workers_time, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(wu->workers_min_busy_time == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_min_busy_time = 0.0; + + if(wu->rd_workers_time_min) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_min, (collected_number)((double)wu->workers_min_busy_time * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->rd_workers_time_max) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_max, (collected_number)((double)wu->workers_max_busy_time * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->workers_total_duration == 0) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, (collected_number)((double)wu->workers_total_busy_time * 100.0 * WORKER_CHART_DECIMAL_PRECISION / (double)wu->workers_total_duration)); + + rrdset_done(wu->st_workers_time); + + // ---------------------------------------------------------------------- + +#ifdef __linux__ + if(wu->workers_cpu_registered || wu->st_workers_cpu) { + if(unlikely(!wu->st_workers_cpu)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_cpu_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.cpu", wu->name_lowercase); + + wu->st_workers_cpu = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers CPU Utilization (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + if (unlikely(!wu->rd_workers_cpu_min && wu->workers_registered > 1)) + wu->rd_workers_cpu_min = rrddim_add(wu->st_workers_cpu, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if (unlikely(!wu->rd_workers_cpu_max && wu->workers_registered > 1)) + wu->rd_workers_cpu_max = rrddim_add(wu->st_workers_cpu, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_cpu_avg)) + wu->rd_workers_cpu_avg = rrddim_add(wu->st_workers_cpu, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(wu->workers_cpu_min == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_cpu_min = 0.0; + + if(wu->rd_workers_cpu_min) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_min, (collected_number)(wu->workers_cpu_min * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->rd_workers_cpu_max) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_max, (collected_number)(wu->workers_cpu_max * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->workers_cpu_registered == 0) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, (collected_number)( wu->workers_cpu_total * WORKER_CHART_DECIMAL_PRECISION / (NETDATA_DOUBLE)wu->workers_cpu_registered )); + + rrdset_done(wu->st_workers_cpu); + } +#endif + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_jobs_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_jobs_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.jobs_started_by_type", wu->name_lowercase); + + wu->st_workers_jobs_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Jobs Started by Type" + , "jobs" + , "netdata" + , "stats" + , wu->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + { + size_t i; + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { + + if(unlikely(!wu->per_job_type[i].rd_jobs_started)) + wu->per_job_type[i].rd_jobs_started = rrddim_add(wu->st_workers_jobs_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].rd_jobs_started, (collected_number)(wu->per_job_type[i].jobs_started)); + } + } + } + + rrdset_done(wu->st_workers_jobs_per_job_type); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_busy_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_busy_time_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time_by_type", wu->name_lowercase); + + wu->st_workers_busy_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time by Type" + , "ms" + , "netdata" + , "stats" + , wu->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + { + size_t i; + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { + + if(unlikely(!wu->per_job_type[i].rd_busy_time)) + wu->per_job_type[i].rd_busy_time = rrddim_add(wu->st_workers_busy_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_busy_per_job_type, wu->per_job_type[i].rd_busy_time, (collected_number)(wu->per_job_type[i].busy_time)); + } + } + } + + rrdset_done(wu->st_workers_busy_per_job_type); + + // ---------------------------------------------------------------------- + + if(wu->st_workers_threads || wu->workers_registered > 1) { + if(unlikely(!wu->st_workers_threads)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_threads_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.threads", wu->name_lowercase); + + wu->st_workers_threads = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Threads" + , "threads" + , "netdata" + , "stats" + , wu->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + wu->rd_workers_threads_free = rrddim_add(wu->st_workers_threads, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_threads_busy = rrddim_add(wu->st_workers_threads, "busy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_free, (collected_number)(wu->workers_registered - wu->workers_busy)); + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_busy, (collected_number)(wu->workers_busy)); + rrdset_done(wu->st_workers_threads); + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_ABSOLUTE + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_ABSOLUTE) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_value_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.value.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"value" + , "netdata" + , "stats" + , wu->priority + 5 + i + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_INCREMENTAL + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id ; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_INCREMENT && wu->per_job_type[i].type != WORKER_METRIC_INCREMENTAL_TOTAL) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_rate_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.rate.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"rate" + , "netdata" + , "stats" + , wu->priority + 5 + i + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } +} + +static void workers_utilization_reset_statistics(struct worker_utilization *wu) { + wu->workers_registered = 0; + wu->workers_busy = 0; + wu->workers_total_busy_time = 0; + wu->workers_total_duration = 0; + wu->workers_total_jobs_started = 0; + wu->workers_min_busy_time = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_max_busy_time = 0; + + wu->workers_cpu_registered = 0; + wu->workers_cpu_min = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_cpu_max = 0; + wu->workers_cpu_total = 0; + + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + if(unlikely(!wu->name_lowercase)) { + wu->name_lowercase = strdupz(wu->name); + char *s = wu->name_lowercase; + for( ; *s ; s++) *s = tolower(*s); + } + + wu->per_job_type[i].jobs_started = 0; + wu->per_job_type[i].busy_time = 0; + + wu->per_job_type[i].min_value = NAN; + wu->per_job_type[i].max_value = NAN; + wu->per_job_type[i].sum_value = NAN; + wu->per_job_type[i].count_value = 0; + } + + struct worker_thread *wt; + for(wt = wu->threads; wt ; wt = wt->next) { + wt->enabled = false; + wt->cpu_enabled = false; + } +} + +#define TASK_STAT_PREFIX "/proc/self/task/" +#define TASK_STAT_SUFFIX "/stat" + +static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_uint_t *utime __maybe_unused, kernel_uint_t *stime __maybe_unused) { +#ifdef __linux__ + static char filename[sizeof(TASK_STAT_PREFIX) + sizeof(TASK_STAT_SUFFIX) + 20] = TASK_STAT_PREFIX; + static size_t start_pos = sizeof(TASK_STAT_PREFIX) - 1; + static procfile *ff = NULL; + + // construct the filename + size_t end_pos = snprintfz(&filename[start_pos], 20, "%d", pid); + strcpy(&filename[start_pos + end_pos], TASK_STAT_SUFFIX); + + // (re)open the procfile to the new filename + bool set_quotes = (ff == NULL) ? true : false; + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return -1; + + if(set_quotes) + procfile_set_open_close(ff, "(", ")"); + + // read the entire file and split it to lines and words + ff = procfile_readall(ff); + if(unlikely(!ff)) return -1; + + // parse the numbers we are interested + *utime = str2kernel_uint_t(procfile_lineword(ff, 0, 13)); + *stime = str2kernel_uint_t(procfile_lineword(ff, 0, 14)); + + // leave the file open for the next iteration + + return 0; +#else + // TODO: add here cpu time detection per thread, for FreeBSD and MacOS + *utime = 0; + *stime = 0; + return 1; +#endif +} + +static Pvoid_t workers_by_pid_JudyL_array = NULL; + +static void workers_threads_cleanup(struct worker_utilization *wu) { + struct worker_thread *t = wu->threads; + while(t) { + struct worker_thread *next = t->next; + + if(!t->enabled) { + JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0); + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next); + freez(t); + } + t = next; + } + } + +static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) { + struct worker_thread *wt = NULL; + + Pvoid_t *PValue = JudyLGet(workers_by_pid_JudyL_array, pid, PJE0); + if(PValue) + wt = *PValue; + + return wt; +} + +static struct worker_thread *worker_thread_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + + wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread)); + wt->pid = pid; + + Pvoid_t *PValue = JudyLIns(&workers_by_pid_JudyL_array, pid, PJE0); + *PValue = wt; + + // link it + DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next); + + return wt; +} + +static struct worker_thread *worker_thread_find_or_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + wt = worker_thread_find(wu, pid); + if(!wt) wt = worker_thread_create(wu, pid); + + return wt; +} + +static void worker_utilization_charts_callback(void *ptr + , pid_t pid __maybe_unused + , const char *thread_tag __maybe_unused + , size_t max_job_id __maybe_unused + , size_t utilization_usec __maybe_unused + , size_t duration_usec __maybe_unused + , size_t jobs_started __maybe_unused + , size_t is_running __maybe_unused + , STRING **job_types_names __maybe_unused + , STRING **job_types_units __maybe_unused + , WORKER_METRIC_TYPE *job_types_metric_types __maybe_unused + , size_t *job_types_jobs_started __maybe_unused + , usec_t *job_types_busy_time __maybe_unused + , NETDATA_DOUBLE *job_types_custom_metrics __maybe_unused + ) { + struct worker_utilization *wu = (struct worker_utilization *)ptr; + + // find the worker_thread in the list + struct worker_thread *wt = worker_thread_find_or_create(wu, pid); + + wt->enabled = true; + wt->busy_time = utilization_usec; + wt->jobs_started = jobs_started; + + wt->utime_old = wt->utime; + wt->stime_old = wt->stime; + wt->collected_time_old = wt->collected_time; + + if(max_job_id > wu->workers_max_job_id) + wu->workers_max_job_id = max_job_id; + + wu->workers_total_busy_time += utilization_usec; + wu->workers_total_duration += duration_usec; + wu->workers_total_jobs_started += jobs_started; + wu->workers_busy += is_running; + wu->workers_registered++; + + double util = (double)utilization_usec * 100.0 / (double)duration_usec; + if(util > wu->workers_max_busy_time) + wu->workers_max_busy_time = util; + + if(util < wu->workers_min_busy_time) + wu->workers_min_busy_time = util; + + // accumulate per job type statistics + size_t i; + for(i = 0; i <= max_job_id ;i++) { + if(!wu->per_job_type[i].name && job_types_names[i]) + wu->per_job_type[i].name = string_dup(job_types_names[i]); + + if(!wu->per_job_type[i].units && job_types_units[i]) + wu->per_job_type[i].units = string_dup(job_types_units[i]); + + wu->per_job_type[i].type = job_types_metric_types[i]; + + wu->per_job_type[i].jobs_started += job_types_jobs_started[i]; + wu->per_job_type[i].busy_time += job_types_busy_time[i]; + + NETDATA_DOUBLE value = job_types_custom_metrics[i]; + if(netdata_double_isnumber(value)) { + if(!wu->per_job_type[i].count_value) { + wu->per_job_type[i].count_value = 1; + wu->per_job_type[i].min_value = value; + wu->per_job_type[i].max_value = value; + wu->per_job_type[i].sum_value = value; + } + else { + wu->per_job_type[i].count_value++; + wu->per_job_type[i].sum_value += value; + if(value < wu->per_job_type[i].min_value) wu->per_job_type[i].min_value = value; + if(value > wu->per_job_type[i].max_value) wu->per_job_type[i].max_value = value; + } + } + } + + // find its CPU utilization + if((!read_thread_cpu_time_from_proc_stat(pid, &wt->utime, &wt->stime))) { + wt->collected_time = now_realtime_usec(); + usec_t delta = wt->collected_time - wt->collected_time_old; + + double utime = (double)(wt->utime - wt->utime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double cpu = utime + stime; + wt->cpu = cpu; + wt->cpu_enabled = true; + + wu->workers_cpu_total += cpu; + if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu; + if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu; + } + wu->workers_cpu_registered += (wt->cpu_enabled) ? 1 : 0; +} + +static void worker_utilization_charts(void) { + static size_t iterations = 0; + iterations++; + + for(int i = 0; all_workers_utilization[i].name ;i++) { + workers_utilization_reset_statistics(&all_workers_utilization[i]); + + netdata_thread_disable_cancelability(); + workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]); + netdata_thread_enable_cancelability(); + + // skip the first iteration, so that we don't accumulate startup utilization to our charts + if(likely(iterations > 1)) + workers_utilization_update_chart(&all_workers_utilization[i]); + + netdata_thread_disable_cancelability(); + workers_threads_cleanup(&all_workers_utilization[i]); + netdata_thread_enable_cancelability(); + } + + workers_total_cpu_utilization_chart(); +} + +static void worker_utilization_finish(void) { + int i, j; + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + + if(wu->name_lowercase) { + freez(wu->name_lowercase); + wu->name_lowercase = NULL; + } + + for(j = 0; j < WORKER_UTILIZATION_MAX_JOB_TYPES ;j++) { + string_freez(wu->per_job_type[j].name); + wu->per_job_type[j].name = NULL; + + string_freez(wu->per_job_type[j].units); + wu->per_job_type[j].units = NULL; + } + + // mark all threads as not enabled + struct worker_thread *t; + for(t = wu->threads; t ; t = t->next) + t->enabled = false; + + // let the cleanup job free them + workers_threads_cleanup(wu); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// global statistics thread + + +static void global_statistics_register_workers(void) { + worker_register("STATS"); + worker_register_job_name(WORKER_JOB_GLOBAL, "global"); + worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); + worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); + worker_register_job_name(WORKER_JOB_STRINGS, "strings"); + worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace"); + worker_register_job_name(WORKER_JOB_WORKERS, "workers"); + worker_register_job_name(WORKER_JOB_SQLITE3, "sqlite3"); +} + +static void global_statistics_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + worker_utilization_finish(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_main(void *ptr) +{ + global_statistics_register_workers(); + + netdata_thread_cleanup_push(global_statistics_cleanup, ptr); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + // keep the randomness at zero + // to make sure we are not close to any other thread + hb.randomness = 0; + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_GLOBAL); + global_statistics_charts(); + + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + + worker_is_busy(WORKER_JOB_REGISTRY); + registry_statistics(); + + if(dbengine_enabled) { + worker_is_busy(WORKER_JOB_DBENGINE); + dbengine_statistics_charts(); + } + + worker_is_busy(WORKER_JOB_HEARTBEAT); + update_heartbeat_charts(); + + worker_is_busy(WORKER_JOB_STRINGS); + update_strings_charts(); + + worker_is_busy(WORKER_JOB_DICTIONARIES); + dictionary_statistics(); + +#ifdef NETDATA_TRACE_ALLOCATIONS + worker_is_busy(WORKER_JOB_MALLOC_TRACE); + malloc_trace_statistics(); +#endif + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + + +// --------------------------------------------------------------------------------------------------------------------- +// workers thread + +static void global_statistics_workers_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + worker_utilization_finish(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_workers_main(void *ptr) +{ + global_statistics_register_workers(); + + netdata_thread_cleanup_push(global_statistics_workers_cleanup, ptr); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h new file mode 100644 index 0000000..f7d6775 --- /dev/null +++ b/daemon/global_statistics.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_GLOBAL_STATISTICS_H +#define NETDATA_GLOBAL_STATISTICS_H 1 + +#include "database/rrd.h" + +// ---------------------------------------------------------------------------- +// global statistics + +void global_statistics_ml_query_completed(size_t points_read); +void global_statistics_exporters_query_completed(size_t points_read); +void global_statistics_backfill_query_completed(size_t points_read); +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source); +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked); +void global_statistics_sqlite3_row_completed(void); +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array); + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size); + +uint64_t global_statistics_web_client_connected(void); +void global_statistics_web_client_disconnected(void); + +extern bool global_statistics_enabled; + +#endif /* NETDATA_GLOBAL_STATISTICS_H */ diff --git a/daemon/main.c b/daemon/main.c new file mode 100644 index 0000000..6b59138 --- /dev/null +++ b/daemon/main.c @@ -0,0 +1,1617 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" +#include "static_threads.h" + +bool unittest_running = false; +int netdata_zero_metrics_enabled; +int netdata_anonymous_statistics_enabled; + +struct netdata_static_thread *static_threads; + +struct config netdata_config = { + .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { + .avl_tree = { + .root = NULL, + .compar = appconfig_section_compare + }, + .rwlock = AVL_LOCK_INITIALIZER + } +}; + +void netdata_cleanup_and_exit(int ret) { + // enabling this, is wrong + // because the threads will be cancelled while cleaning up + // netdata_exit = 1; + + error_log_limit_unlimited(); + info("EXIT: netdata prepares to exit with code %d...", ret); + + send_statistics("EXIT", ret?"ERROR":"OK","-"); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + (void) rename(agent_crash_file, agent_incomplete_shutdown_file); + + // cleanup/save the database and exit + info("EXIT: cleaning up the database..."); + rrdhost_cleanup_all(); + + if(!ret) { + // exit cleanly + + // stop everything + info("EXIT: stopping static threads..."); +#ifdef ENABLE_ACLK + aclk_sync_exit_all(); +#endif + cancel_main_threads(); + + // free the database + info("EXIT: freeing database memory..."); +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_prepare_exit(multidb_ctx[tier]); + } +#endif + metadata_sync_shutdown_prepare(); + rrdhost_free_all(); + metadata_sync_shutdown(); +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit(multidb_ctx[tier]); + } +#endif + } + sql_close_context_database(); + sql_close_database(); + + // unlink the pid + if(pidfile[0]) { + info("EXIT: removing netdata PID file '%s'...", pidfile); + if(unlink(pidfile) != 0) + error("EXIT: cannot unlink pidfile '%s'.", pidfile); + } + +#ifdef ENABLE_HTTPS + security_clean_openssl(); +#endif + info("EXIT: all done - netdata is now exiting - bye bye..."); + (void) unlink(agent_incomplete_shutdown_file); + exit(ret); +} + +void web_server_threading_selection(void) { + web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode))); + + int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED); + + int i; + for (i = 0; static_threads[i].name; i++) { + if (static_threads[i].start_routine == socket_listen_main_static_threaded) + static_threads[i].enabled = static_threaded; + } +} + +int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p) +{ + char *value = config_get(section_name,config_name,default_value); + if(!strcmp("yes",value)) + return 1; + if(!strcmp("no",value)) + return 0; + if(strcmp("heuristic",value)) + error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'", + value, section_name, config_name); + + return simple_pattern_is_potential_name(p); +} + +void web_server_config_options(void) +{ + web_client_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout); + web_client_first_request_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout); + web_client_streaming_rate_t = + config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t); + + respect_web_browser_do_not_track_policy = + config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy); + web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", ""); + if(!*web_x_frame_options) + web_x_frame_options = NULL; + + web_allow_connections_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_connections_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from); + web_allow_dashboard_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_dashboard_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from); + web_allow_badges_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_badges_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from); + web_allow_registry_from = + simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic", + web_allow_registry_from); + web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic", + web_allow_streaming_from); + // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses. + web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", + "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*" + " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*" + " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*" + " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_netdataconf_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from); + web_allow_mgmt_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_mgmt_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from); + + +#ifdef NETDATA_WITH_ZLIB + web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip); + + char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default"); + if(!strcmp(s, "default")) + web_gzip_strategy = Z_DEFAULT_STRATEGY; + else if(!strcmp(s, "filtered")) + web_gzip_strategy = Z_FILTERED; + else if(!strcmp(s, "huffman only")) + web_gzip_strategy = Z_HUFFMAN_ONLY; + else if(!strcmp(s, "rle")) + web_gzip_strategy = Z_RLE; + else if(!strcmp(s, "fixed")) + web_gzip_strategy = Z_FIXED; + else { + error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s); + web_gzip_strategy = Z_DEFAULT_STRATEGY; + } + + web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3); + if(web_gzip_level < 1) { + error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level); + web_gzip_level = 1; + } + else if(web_gzip_level > 9) { + error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level); + web_gzip_level = 9; + } +#endif /* NETDATA_WITH_ZLIB */ +} + + +// killpid kills pid with SIGTERM. +int killpid(pid_t pid) { + int ret; + debug(D_EXIT, "Request to kill pid %d", pid); + + errno = 0; + ret = kill(pid, SIGTERM); + if (ret == -1) { + switch(errno) { + case ESRCH: + // We wanted the process to exit so just let the caller handle. + return ret; + + case EPERM: + error("Cannot kill pid %d, but I do not have enough permissions.", pid); + break; + + default: + error("Cannot kill pid %d, but I received an error.", pid); + break; + } + } + + return ret; +} + +void cancel_main_threads() { + error_log_limit_unlimited(); + + int i, found = 0; + usec_t max = 5 * USEC_PER_SEC, step = 100000; + for (i = 0; static_threads[i].name != NULL ; i++) { + if(static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) { + info("EXIT: Stopping main thread: %s", static_threads[i].name); + netdata_thread_cancel(*static_threads[i].thread); + found++; + } + } + + netdata_exit = 1; + + while(found && max > 0) { + max -= step; + info("Waiting %d threads to finish...", found); + sleep_usec(step); + found = 0; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + found++; + } + } + + if(found) { + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name); + } + } + else + info("All threads finished."); + + for (i = 0; static_threads[i].name != NULL ; i++) + freez(static_threads[i].thread); + + freez(static_threads); +} + +struct option_def option_definitions[] = { + // opt description arg name default value + { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, + { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, + { 'd', "Fork. Run in the background.", NULL, "run in the background"}, + { 'h', "Display this help message.", NULL, NULL}, + { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, + { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, + { 'p', "API/Web port to use.", "port", "19999"}, + { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, + { 't', "The internal clock of netdata.", "seconds", "1"}, + { 'u', "Run as user.", "username", "netdata"}, + { 'v', "Print netdata version and exit.", NULL, NULL}, + { 'V', "Print netdata version and exit.", NULL, NULL}, + { 'W', "See Advanced options below.", "options", NULL}, +}; + +int help(int exitcode) { + FILE *stream; + if(exitcode == 0) + stream = stdout; + else + stream = stderr; + + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + int i; + int max_len_arg = 0; + + // Compute maximum argument length + for( i = 0; i < num_opts; i++ ) { + if(option_definitions[i].arg_name) { + int len_arg = (int)strlen(option_definitions[i].arg_name); + if(len_arg > max_len_arg) max_len_arg = len_arg; + } + } + + if(max_len_arg > 30) max_len_arg = 30; + if(max_len_arg < 20) max_len_arg = 20; + + fprintf(stream, "%s", "\n" + " ^\n" + " |.-. .-. .-. .-. . netdata \n" + " | '-' '-' '-' '-' real-time performance monitoring, done right! \n" + " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n" + "\n" + " Copyright (C) 2016-2022, Netdata, Inc. \n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " Home Page : https://netdata.cloud\n" + " Source Code: https://github.com/netdata/netdata\n" + " Docs : https://learn.netdata.cloud\n" + " Support : https://github.com/netdata/netdata/issues\n" + " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n" + "\n" + " Twitter : https://twitter.com/linuxnetdata\n" + " LinkedIn : https://linkedin.com/company/netdata-cloud/\n" + " Facebook : https://facebook.com/linuxnetdata/\n" + "\n" + "\n" + ); + + fprintf(stream, " SYNOPSIS: netdata [options]\n"); + fprintf(stream, "\n"); + fprintf(stream, " Options:\n\n"); + + // Output options description. + for( i = 0; i < num_opts; i++ ) { + fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description); + if(option_definitions[i].default_value) { + fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value); + } else { + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); + } + + fprintf(stream, "\n Advanced options:\n\n" + " -W stacksize=N Set the stacksize (in bytes).\n\n" + " -W debug_flags=N Set runtime tracing to debug.log.\n\n" + " -W unittest Run internal unittests and exit.\n\n" + " -W sqlite-check Check metadata database integrity and exit.\n\n" + " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n" + " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" +#ifdef ENABLE_DBENGINE + " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" + " -W stresstest=A,B,C,D,E,F,G\n" + " Run a DB engine stress test for A seconds,\n" + " with B writers and C readers, with a ramp up\n" + " time of D seconds for writers, a page cache\n" + " size of E MiB, an optional disk space limit\n" + " of F MiB, G libuv workers (default 16) and exit.\n\n" +#endif + " -W set section option value\n" + " set netdata.conf option from the command line.\n\n" + " -W buildinfo Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not.\n\n" + " -W buildinfojson Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not, in JSON format.\n\n" + " -W simple-pattern pattern string\n" + " Check if string matches pattern and exit.\n\n" + " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n" + " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n" + ); + + fprintf(stream, "\n Signals netdata handles:\n\n" + " - HUP Close and reopen log files.\n" + " - USR1 Save internal DB to disk.\n" + " - USR2 Reload health configuration.\n" + "\n" + ); + + fflush(stream); + return exitcode; +} + +#ifdef ENABLE_HTTPS +static void security_init(){ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir); + netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename); + + snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir); + netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename); + + tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3"); + tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none"); + + security_openssl_library(); +} +#endif + +static void log_init(void) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir); + stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename); + + snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir); + stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename); + + snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); + stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename); + + snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); + stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename); + +#ifdef ENABLE_ACLK + aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO); + if (aclklog_enabled) { + snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir); + aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename); + } +#endif + + char deffacility[8]; + snprintfz(deffacility,7,"%s","daemon"); + facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility); + + error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period); + error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period); + error_log_errors_per_period_backup = error_log_errors_per_period; + + setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1); + setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1); +} + +char *initialize_lock_directory_path(char *prefix) +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/lock", prefix); + + return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename); +} + +static void backwards_compatible_config() { + // move [global] options to the [web] section + config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog", + CONFIG_SECTION_WEB, "listen backlog"); + + config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "bind to", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "default port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds", + CONFIG_SECTION_WEB, "disconnect idle clients after seconds"); + + config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy", + CONFIG_SECTION_WEB, "respect do not track policy"); + + config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header", + CONFIG_SECTION_WEB, "x-frame-options response header"); + + config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression", + CONFIG_SECTION_WEB, "enable gzip compression"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression strategy", + CONFIG_SECTION_WEB, "gzip compression strategy"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression level", + CONFIG_SECTION_WEB, "gzip compression level"); + + config_move(CONFIG_SECTION_GLOBAL, "config directory", + CONFIG_SECTION_DIRECTORIES, "config"); + + config_move(CONFIG_SECTION_GLOBAL, "stock config directory", + CONFIG_SECTION_DIRECTORIES, "stock config"); + + config_move(CONFIG_SECTION_GLOBAL, "log directory", + CONFIG_SECTION_DIRECTORIES, "log"); + + config_move(CONFIG_SECTION_GLOBAL, "web files directory", + CONFIG_SECTION_DIRECTORIES, "web"); + + config_move(CONFIG_SECTION_GLOBAL, "cache directory", + CONFIG_SECTION_DIRECTORIES, "cache"); + + config_move(CONFIG_SECTION_GLOBAL, "lib directory", + CONFIG_SECTION_DIRECTORIES, "lib"); + + config_move(CONFIG_SECTION_GLOBAL, "home directory", + CONFIG_SECTION_DIRECTORIES, "home"); + + config_move(CONFIG_SECTION_GLOBAL, "lock directory", + CONFIG_SECTION_DIRECTORIES, "lock"); + + config_move(CONFIG_SECTION_GLOBAL, "plugins directory", + CONFIG_SECTION_DIRECTORIES, "plugins"); + + config_move(CONFIG_SECTION_HEALTH, "health configuration directory", + CONFIG_SECTION_DIRECTORIES, "health config"); + + config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory", + CONFIG_SECTION_DIRECTORIES, "stock health config"); + + config_move(CONFIG_SECTION_REGISTRY, "registry db directory", + CONFIG_SECTION_DIRECTORIES, "registry"); + + config_move(CONFIG_SECTION_GLOBAL, "debug log", + CONFIG_SECTION_LOGS, "debug"); + + config_move(CONFIG_SECTION_GLOBAL, "error log", + CONFIG_SECTION_LOGS, "error"); + + config_move(CONFIG_SECTION_GLOBAL, "access log", + CONFIG_SECTION_LOGS, "access"); + + config_move(CONFIG_SECTION_GLOBAL, "facility log", + CONFIG_SECTION_LOGS, "facility"); + + config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period", + CONFIG_SECTION_LOGS, "errors flood protection period"); + + config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "errors to trigger flood protection"); + + config_move(CONFIG_SECTION_GLOBAL, "debug flags", + CONFIG_SECTION_LOGS, "debug flags"); + + config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable", + CONFIG_SECTION_ENV_VARS, "TZ"); + + config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable", + CONFIG_SECTION_ENV_VARS, "PATH"); + + config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable", + CONFIG_SECTION_ENV_VARS, "PYTHONPATH"); + + config_move(CONFIG_SECTION_STATSD, "enabled", + CONFIG_SECTION_PLUGINS, "statsd"); + + config_move(CONFIG_SECTION_GLOBAL, "memory mode", + CONFIG_SECTION_DB, "mode"); + + config_move(CONFIG_SECTION_GLOBAL, "history", + CONFIG_SECTION_DB, "retention"); + + config_move(CONFIG_SECTION_GLOBAL, "update every", + CONFIG_SECTION_DB, "update every"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_DB, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_DB, "page cache with malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space", + CONFIG_SECTION_DB, "dbengine disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space", + CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)", + CONFIG_SECTION_DB, "memory deduplication (ksm)"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout", + CONFIG_SECTION_DB, "dbengine page fetch timeout secs"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries", + CONFIG_SECTION_DB, "dbengine page fetch retries"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages", + CONFIG_SECTION_DB, "dbengine pages per extent"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", + CONFIG_SECTION_DB, "cleanup obsolete charts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", + CONFIG_SECTION_DB, "gap when lost iterations above"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", + CONFIG_SECTION_DB, "cleanup orphan hosts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", + CONFIG_SECTION_DB, "delete obsolete charts files"); + + config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", + CONFIG_SECTION_DB, "delete orphan hosts files"); + + config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", + CONFIG_SECTION_DB, "enable zero metrics"); + +} + +static void get_netdata_configured_variables() { + backwards_compatible_config(); + + // ------------------------------------------------------------------------ + // get the hostname + + char buf[HOSTNAME_MAX + 1]; + if(gethostname(buf, HOSTNAME_MAX) == -1){ + error("Cannot get machine hostname."); + } + + netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf); + debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname); + + // ------------------------------------------------------------------------ + // get default database update frequency + + default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY); + if(default_rrd_update_every < 1 || default_rrd_update_every > 600) { + error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY); + default_rrd_update_every = UPDATE_EVERY; + config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every); + } + + // ------------------------------------------------------------------------ + // get default memory mode for the database + + { + const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + default_rrd_memory_mode = rrd_memory_mode_id(mode); + if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) { + error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode)); + config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + } + } + + // ------------------------------------------------------------------------ + // get default database size + + if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) { + default_rrd_history_entries = (int)config_get_number( + CONFIG_SECTION_DB, "retention", + align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES)); + + long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries); + if (h != default_rrd_history_entries) { + config_set_number(CONFIG_SECTION_DB, "retention", h); + default_rrd_history_entries = (int)h; + } + } + + // ------------------------------------------------------------------------ + // get system paths + + netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir); + netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir); + netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir); + netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir); + netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir); + netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir); + char *env_home=getenv("HOME"); + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir); + + netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir); + + { + pluginsd_initialize_plugin_directories(); + netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH]; + } + +#ifdef ENABLE_DBENGINE + // ------------------------------------------------------------------------ + // get default Database Engine page cache size in MiB + + db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_YES); + default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) { + error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB); + default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + } + + // ------------------------------------------------------------------------ + // get default Database Engine disk space quota in MiB + + default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); + if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { + error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB); + default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB; + config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); + } + + default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace()); + if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { + error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb); + default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb; + config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb); + } +#else + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead."); + default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; + } +#endif + // ------------------------------------------------------------------------ + + netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", ""); + verify_netdata_host_prefix(); + + // -------------------------------------------------------------------- + // get KSM settings + +#ifdef MADV_MERGEABLE + enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm); +#endif + + // -------------------------------------------------------------------- + // metric correlations + + enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations); + default_metric_correlations_method = weights_string_to_method(config_get( + CONFIG_SECTION_GLOBAL, "metric correlations method", + weights_method_to_string(default_metric_correlations_method))); + + // -------------------------------------------------------------------- + + rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time); + // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short + // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at + // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information. + if (rrdset_free_obsolete_time < 10) { + rrdset_free_obsolete_time = 10; + info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds."); + config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time); + } + + gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + if (gap_when_lost_iterations_above < 1) { + gap_when_lost_iterations_above = 1; + config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + } + + // -------------------------------------------------------------------- + // get various system parameters + + get_system_HZ(); + get_system_cpus(); + get_system_pid_max(); + + +} + +int load_netdata_conf(char *filename, char overwrite_used) { + errno = 0; + + int ret = 0; + + if(filename && *filename) { + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + error("CONFIG: cannot load config file '%s'.", filename); + } + else { + filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf"); + + ret = config_load(filename, overwrite_used, NULL); + if(!ret) { + info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename); + freez(filename); + + filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf"); + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename); + } + + freez(filename); + } + + return ret; +} + +// coverity[ +tainted_string_sanitize_content : arg-0 ] +static inline void coverity_remove_taint(char *s) +{ + (void)s; +} + +int get_system_info(struct rrdhost_system_info *system_info) { + char *script; + script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh"); + if (unlikely(access(script, R_OK) != 0)) { + info("System info script %s not found.",script); + freez(script); + return 1; + } + + pid_t command_pid; + + info("Executing %s", script); + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if(fp_child_output) { + char line[200 + 1]; + // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert. + // One time init code, but I'm curious about the warning... + while (fgets(line, 200, fp_child_output) != NULL) { + char *value=line; + while (*value && *value != '=') value++; + if (*value=='=') { + *value='\0'; + value++; + char *end = value; + while (*end && *end != '\n') end++; + *end = '\0'; // Overwrite newline if present + coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted + coverity_remove_taint(value); + + if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) { + info("Unexpected environment variable %s=%s", line, value); + } + else { + info("%s=%s", line, value); + setenv(line, value, 1); + } + } + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + return 0; +} + +void set_silencers_filename() { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir); + silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename); +} + +/* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST* + be set in this procedure to be called in all the relevant code paths. +*/ +void post_conf_load(char **user) +{ + // -------------------------------------------------------------------- + // get the user we should run + + // IMPORTANT: this is required before web_files_uid() + if(getuid() == 0) { + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER); + } + else { + struct passwd *passwd = getpwuid(getuid()); + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:""); + } + + // -------------------------------------------------------------------- + // Check if the cloud is enabled +#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) + netdata_cloud_setting = 0; +#else + netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1); +#endif + // This must be set before any point in the code that accesses it. Do not move it from this function. + appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL); +} + +int main(int argc, char **argv) { + int i; + int config_loaded = 0; + int dont_fork = 0; + bool close_open_fds = true; + size_t default_stacksize; + char *user = NULL; + + static_threads = static_threads_get(); + + netdata_ready=0; + // set the name for logging + program_name = "netdata"; + + if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) { + // don't run netdata, this is the spawn server + spawn_server(); + exit(0); + } + + // parse options + { + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + char optstring[(num_opts * 2) + 1]; + + int string_i = 0; + for( i = 0; i < num_opts; i++ ) { + optstring[string_i] = option_definitions[i].val; + string_i++; + if(option_definitions[i].arg_name) { + optstring[string_i] = ':'; + string_i++; + } + } + // terminate optstring + optstring[string_i] ='\0'; + optstring[(num_opts *2)] ='\0'; + + int opt; + while( (opt = getopt(argc, argv, optstring)) != -1 ) { + switch(opt) { + case 'c': + if(load_netdata_conf(optarg, 1) != 1) { + error("Cannot load configuration file %s.", optarg); + return 1; + } + else { + debug(D_OPTIONS, "Configuration loaded from %s.", optarg); + post_conf_load(&user); + load_cloud_conf(1); + config_loaded = 1; + } + break; + case 'D': + dont_fork = 1; + break; + case 'd': + dont_fork = 0; + break; + case 'h': + return help(0); + case 'i': + config_set(CONFIG_SECTION_WEB, "bind to", optarg); + break; + case 'P': + strncpy(pidfile, optarg, FILENAME_MAX); + pidfile[FILENAME_MAX] = '\0'; + break; + case 'p': + config_set(CONFIG_SECTION_GLOBAL, "default port", optarg); + break; + case 's': + config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg); + break; + case 't': + config_set(CONFIG_SECTION_GLOBAL, "update every", optarg); + break; + case 'u': + config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg); + break; + case 'v': + case 'V': + printf("%s %s\n", program_name, program_version); + return 0; + case 'W': + { + char* stacksize_string = "stacksize="; + char* debug_flags_string = "debug_flags="; + char* claim_string = "claim"; +#ifdef ENABLE_DBENGINE + char* createdataset_string = "createdataset="; + char* stresstest_string = "stresstest="; +#endif + if(strcmp(optarg, "sqlite-check") == 0) { + sql_init_database(DB_CHECK_INTEGRITY, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-fix") == 0) { + sql_init_database(DB_CHECK_FIX_DB, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-compact") == 0) { + sql_init_database(DB_CHECK_RECLAIM_SPACE, 0); + return 0; + } + + if(strcmp(optarg, "unittest") == 0) { + unittest_running = true; + + if (unit_test_static_threads()) + return 1; + if (unit_test_buffer()) + return 1; + if (unit_test_str2ld()) + return 1; + if (unit_test_bitmap256()) + return 1; + // No call to load the config file on this code-path + post_conf_load(&user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + default_health_enabled = 0; + storage_tiers = 1; + registry_init(); + if(rrd_init("unittest", NULL)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + default_rrdpush_enabled = 0; + if(run_all_mockup_tests()) return 1; + if(unit_test_storage()) return 1; +#ifdef ENABLE_DBENGINE + if(test_dbengine()) return 1; +#endif + if(test_sqlite()) return 1; + if(string_unittest(10000)) return 1; + if (dictionary_unittest(10000)) + return 1; + if(aral_unittest(10000)) + return 1; + if (rrdlabels_unittest()) + return 1; + if (ctx_unittest()) + return 1; + fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); + return 0; + } + else if(strcmp(optarg, "escapetest") == 0) { + return command_argument_sanitization_tests(); + } +#ifdef ENABLE_ML_TESTS + else if(strcmp(optarg, "mltest") == 0) { + return test_ml(argc, argv); + } +#endif +#ifdef ENABLE_DBENGINE + else if(strcmp(optarg, "mctest") == 0) { + unittest_running = true; + return mc_unittest(); + } + else if(strcmp(optarg, "ctxtest") == 0) { + unittest_running = true; + return ctx_unittest(); + } + else if(strcmp(optarg, "dicttest") == 0) { + unittest_running = true; + return dictionary_unittest(10000); + } + else if(strcmp(optarg, "araltest") == 0) { + unittest_running = true; + return aral_unittest(10000); + } + else if(strcmp(optarg, "stringtest") == 0) { + unittest_running = true; + return string_unittest(10000); + } + else if(strcmp(optarg, "rrdlabelstest") == 0) { + unittest_running = true; + return rrdlabels_unittest(); + } + else if(strcmp(optarg, "metatest") == 0) { + unittest_running = true; + return metadata_unittest(); + } + else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { + optarg += strlen(createdataset_string); + unsigned history_seconds = strtoul(optarg, NULL, 0); + generate_dbengine_dataset(history_seconds); + return 0; + } + else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) { + char *endptr; + unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0, + page_cache_mb = 0, disk_space_mb = 0, workers = 16; + + optarg += strlen(stresstest_string); + test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0); + if (',' == *endptr) + dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + workers = (unsigned)strtoul(endptr + 1, &endptr, 0); + + if (workers > 1024) + workers = 1024; + + char workers_str[16]; + snprintf(workers_str, 15, "%u", workers); + setenv("UV_THREADPOOL_SIZE", workers_str, 1); + dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds, + page_cache_mb, disk_space_mb); + return 0; + } +#endif + else if(strcmp(optarg, "simple-pattern") == 0) { + if(optind + 2 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n" + " Checks if 'pattern' matches the given 'string'.\n" + " - 'pattern' can be one or more space separated words.\n" + " - each 'word' can contain one or more asterisks.\n" + " - words starting with '!' give negative matches.\n" + " - words are processed left to right\n" + "\n" + "Examples:\n" + "\n" + " > match all veth interfaces, except veth0:\n" + "\n" + " -W simple-pattern '!veth0 veth*' 'veth12'\n" + "\n" + "\n" + " > match all *.ext files directly in /path/:\n" + " (this will not match *.ext files in a subdir of /path/)\n" + "\n" + " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n" + "\n" + ); + return 1; + } + + const char *haystack = argv[optind]; + const char *needle = argv[optind + 1]; + size_t len = strlen(needle) + 1; + char wildcarded[len]; + + SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT); + int ret = simple_pattern_matches_extract(p, needle, wildcarded, len); + simple_pattern_free(p); + + if(ret) { + fprintf(stdout, "RESULT: MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 0; + } + else { + fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 1; + } + } + else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) { + optarg += strlen(stacksize_string); + config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg); + } + else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) { + optarg += strlen(debug_flags_string); + config_set(CONFIG_SECTION_LOGS, "debug flags", optarg); + debug_flags = strtoull(optarg, NULL, 0); + } + else if(strcmp(optarg, "set") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + "\n" + ); + return 1; + } + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *value = argv[optind + 2]; + optind += 3; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + config_set_default(section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "set2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *value = argv[optind + 3]; + optind += 4; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + appconfig_set_default(tmp_config, section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "get") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get.\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0); + post_conf_load(&user); + } + + get_netdata_configured_variables(); + + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *def = argv[optind + 2]; + const char *value = config_get(section, key, def); + printf("%s\n", value); + return 0; + } + else if(strcmp(optarg, "get2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get2.\n" + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0); + post_conf_load(&user); + load_cloud_conf(1); + } + + get_netdata_configured_variables(); + + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *def = argv[optind + 3]; + const char *value = appconfig_get(tmp_config, section, key, def); + printf("%s\n", value); + return 0; + } + else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) { + /* will trigger a claiming attempt when the agent is initialized */ + claiming_pending_arguments = optarg + strlen(claim_string); + } + else if(strcmp(optarg, "buildinfo") == 0) { + printf("Version: %s %s\n", program_name, program_version); + print_build_info(); + return 0; + } + else if(strcmp(optarg, "buildinfojson") == 0) { + print_build_info_json(); + return 0; + } + else if(strcmp(optarg, "keepopenfds") == 0) { + // Internal dev option to skip closing inherited + // open FDs. Useful, when we want to run the agent + // under profiling tools that open/maintain their + // own FDs. + close_open_fds = false; + } else { + fprintf(stderr, "Unknown -W parameter '%s'\n", optarg); + return help(1); + } + } + break; + + default: /* ? */ + fprintf(stderr, "Unknown parameter '%c'\n", opt); + return help(1); + } + } + } + +#ifdef _SC_OPEN_MAX + if (close_open_fds == true) { + // close all open file descriptors, except the standard ones + // the caller may have left open files (lxc-attach has this issue) + for(int fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--) + if(fd_is_valid(fd)) + close(fd); + } +#endif + + + if(!config_loaded) + { + load_netdata_conf(NULL, 0); + post_conf_load(&user); + load_cloud_conf(0); + } + + char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD"); + if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1)) { + appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false"); + } + + + // ------------------------------------------------------------------------ + // initialize netdata + { + char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1"); + if(pmax && *pmax) + setenv("MALLOC_ARENA_MAX", pmax, 1); + +#if defined(HAVE_C_MALLOPT) + i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1); + if(i > 0) + mallopt(M_ARENA_MAX, 1); + + +#ifdef NETDATA_INTERNAL_CHECKS + mallopt(M_PERTURB, 0x5A); + // mallopt(M_MXFAST, 0); +#endif +#endif + + // initialize the system clocks + clocks_init(); + + // prepare configuration environment variables for the plugins + + setenv("UV_THREADPOOL_SIZE", config_get(CONFIG_SECTION_GLOBAL, "libuv worker threads", "16"), 1); + get_netdata_configured_variables(); + set_global_environment(); + + // work while we are cd into config_dir + // to allow the plugins refer to their config + // files using relative filenames + if(chdir(netdata_configured_user_config_dir) == -1) + fatal("Cannot cd to '%s'", netdata_configured_user_config_dir); + + // Get execution path before switching user to avoid permission issues + get_netdata_execution_path(); + } + + { + // -------------------------------------------------------------------- + // get the debugging flags from the configuration file + + char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000"); + setenv("NETDATA_DEBUG_FLAGS", flags, 1); + + debug_flags = strtoull(flags, NULL, 0); + debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags); + + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); + +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } + + + // -------------------------------------------------------------------- + // get log filenames and settings + + log_init(); + error_log_limit_unlimited(); + + // initialize the log files + open_all_log_files(); + + get_system_timezone(); + + // -------------------------------------------------------------------- + // get the certificate and start security + +#ifdef ENABLE_HTTPS + security_init(); +#endif + + // -------------------------------------------------------------------- + // This is the safest place to start the SILENCERS structure + + set_silencers_filename(); + health_initialize_global_silencers(); + + // -------------------------------------------------------------------- + // Initialize ML configuration + + ml_init(); + + // -------------------------------------------------------------------- + // setup process signals + + // block signals while initializing threads. + // this causes the threads to block signals. + + signals_block(); + + // setup the signals we want to use + + signals_init(); + + // setup threads configs + default_stacksize = netdata_threads_init(); + + + // -------------------------------------------------------------------- + // check which threads are enabled and initialize them + + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->config_name) + st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled); + + if(st->enabled && st->init_routine) + st->init_routine(); + + if(st->env_name) + setenv(st->env_name, st->enabled?"YES":"NO", 1); + + if(st->global_variable) + *st->global_variable = (st->enabled) ? true : false; + } + + // -------------------------------------------------------------------- + // create the listening sockets + + web_client_api_v1_init(); + web_server_threading_selection(); + + if(web_server_mode != WEB_SERVER_MODE_NONE) + api_listen_sockets_setup(); + + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + // get the max file limit + if(getrlimit(RLIMIT_NOFILE, &rlimit_nofile) != 0) + error("getrlimit(RLIMIT_NOFILE) failed"); + else + info("resources control: allowed file descriptors: soft = %zu, max = %zu", (size_t)rlimit_nofile.rlim_cur, (size_t)rlimit_nofile.rlim_max); + + // fork, switch user, create pid file, set process priority + if(become_daemon(dont_fork, user) == -1) + fatal("Cannot daemonize myself."); + + info("netdata started on pid %d.", getpid()); + + netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); + + // initialize internal registry + registry_init(); + // fork the spawn server + spawn_init(); + /* + * Libuv uv_spawn() uses SIGCHLD internally: + * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485 + * and inadvertently replaces the netdata signal handler which was setup during initialization. + * Thusly, we must explicitly restore the signal handler for SIGCHLD. + * Warning: extreme care is needed when mixing and matching POSIX and libuv. + */ + signals_restore_SIGCHLD(); + + // ------------------------------------------------------------------------ + // initialize rrd, registry, health, rrdpush, etc. + + netdata_anonymous_statistics_enabled=-1; + struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); + get_system_info(system_info); + system_info->hops = 0; + get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); + + if(rrd_init(netdata_configured_hostname, system_info)) + fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + int crash_detected = (unlink(agent_crash_file) == 0); + int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444); + if (fd >= 0) + close(fd); + + + // ------------------------------------------------------------------------ + // Claim netdata agent to a cloud endpoint + + if (claiming_pending_arguments) + claim_agent(claiming_pending_arguments); + load_claiming_state(); + + // ------------------------------------------------------------------------ + // enable log flood protection + + error_log_limit_reset(); + + // Load host labels + reload_host_labels(); + + // ------------------------------------------------------------------------ + // spawn the threads + + web_server_config_options(); + + netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO); + + set_late_global_environment(); + + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->enabled) { + st->thread = mallocz(sizeof(netdata_thread_t)); + debug(D_SYSTEM, "Starting thread %s.", st->name); + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + else debug(D_SYSTEM, "Not starting thread %s.", st->name); + } + + // ------------------------------------------------------------------------ + // Initialize netdata agent command serving from cli and signals + + commands_init(); + + info("netdata initialization completed. Enjoy real-time performance monitoring!"); + netdata_ready = 1; + + send_statistics("START", "-", "-"); + if (crash_detected) + send_statistics("CRASH", "-", "-"); + if (incomplete_shutdown_detected) + send_statistics("INCOMPLETE_SHUTDOWN", "-", "-"); + + //check if ANALYTICS needs to start + if (netdata_anonymous_statistics_enabled == 1) { + for (i = 0; static_threads[i].name != NULL; i++) { + if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) { + struct netdata_static_thread *st = &static_threads[i]; + st->thread = mallocz(sizeof(netdata_thread_t)); + st->enabled = 1; + debug(D_SYSTEM, "Starting thread %s.", st->name); + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + } + } + + // ------------------------------------------------------------------------ + // Report ACLK build failure +#ifndef ENABLE_ACLK + error("This agent doesn't have ACLK."); + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); + if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized + send_statistics("ACLK_DISABLED", "-", "-"); + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); + if (fd == -1) + error("Cannot create file '%s'. Please fix this.", filename); + else + close(fd); + } +#endif + + // ------------------------------------------------------------------------ + // unblock signals + + signals_unblock(); + + // ------------------------------------------------------------------------ + // Handle signals + + signals_handle(); + + // should never reach this point + // but we need it for rpmlint #2752 + return 1; +} diff --git a/daemon/main.h b/daemon/main.h new file mode 100644 index 0000000..a4e2b3a --- /dev/null +++ b/daemon/main.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_MAIN_H +#define NETDATA_MAIN_H 1 + +#include "common.h" + +extern struct config netdata_config; + +/** + * This struct contains information about command line options. + */ +struct option_def { + /** The option character */ + const char val; + /** The name of the long option. */ + const char *description; + /** Short description what the option does */ + /** Name of the argument displayed in SYNOPSIS */ + const char *arg_name; + /** Default value if not set */ + const char *default_value; +}; + +void cancel_main_threads(void); +int killpid(pid_t pid); +void netdata_cleanup_and_exit(int ret) NORETURN; +void send_statistics(const char *action, const char *action_result, const char *action_data); + +#endif /* NETDATA_MAIN_H */ diff --git a/daemon/service.c b/daemon/service.c new file mode 100644 index 0000000..6db2ef6 --- /dev/null +++ b/daemon/service.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +/* Run service jobs every X seconds */ +#define SERVICE_HEARTBEAT 10 + +#define TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT (3600 / 2) +#define ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT 60 + +#define WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK 1 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS 2 +#define WORKER_JOB_ARCHIVE_CHART 3 +#define WORKER_JOB_ARCHIVE_CHART_DIMENSIONS 4 +#define WORKER_JOB_ARCHIVE_DIMENSION 5 +#define WORKER_JOB_CLEANUP_ORPHAN_HOSTS 6 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS 7 +#define WORKER_JOB_FREE_HOST 9 +#define WORKER_JOB_SAVE_HOST_CHARTS 10 +#define WORKER_JOB_DELETE_HOST_CHARTS 11 +#define WORKER_JOB_FREE_CHART 12 +#define WORKER_JOB_SAVE_CHART 13 +#define WORKER_JOB_DELETE_CHART 14 +#define WORKER_JOB_FREE_DIMENSION 15 + +static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED) || !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + return; + + worker_is_busy(WORKER_JOB_ARCHIVE_DIMENSION); + + rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); + rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); + + const char *cache_filename = rrddim_cache_filename(rd); + if(cache_filename) { + info("Deleting dimension file '%s'.", cache_filename); + if (unlikely(unlink(cache_filename) == -1)) + error("Cannot delete dimension file '%s'", cache_filename); + } + + if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + rrddimvar_delete_all(rd); + + /* only a collector can mark a chart as obsolete, so we must remove the reference */ + + size_t tiers_available = 0, tiers_said_no_retention = 0; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(rd->tiers[tier]) { + tiers_available++; + + if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) + tiers_said_no_retention++; + + rd->tiers[tier]->db_collection_handle = NULL; + } + } + + if (tiers_available == tiers_said_no_retention && tiers_said_no_retention) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); + } + else { + /* Do not delete this dimension */ + return; + } + } + + worker_is_busy(WORKER_JOB_FREE_DIMENSION); + rrddim_free(st, rd); +} + +static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { + worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS); + + RRDDIM *rd; + time_t now = now_realtime_sec(); + + bool done_all_dimensions = true; + + dfe_start_write(st->rrddim_root_index, rd) { + if(unlikely( + all_dimensions || + (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now)) + )) { + + if(dictionary_acquired_item_references(rd_dfe.item) == 1) { + info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st)); + svc_rrddim_obsolete_to_archive(rd); + } + else + done_all_dimensions = false; + } + else + done_all_dimensions = false; + } + dfe_done(rd); + + return done_all_dimensions; +} + +static void svc_rrdset_obsolete_to_archive(RRDSET *st) { + worker_is_busy(WORKER_JOB_ARCHIVE_CHART); + + if(!svc_rrdset_archive_obsolete_dimensions(st, true)) + return; + + rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED); + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + + rrdcalc_unlink_all_rrdset_alerts(st); + + rrdsetvar_release_and_delete_all(st); + + // has to be run after all dimensions are archived - or use-after-free will occur + rrdvar_delete_all(st->rrdvars); + + if(st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) { + if(rrdhost_option_check(st->rrdhost, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS)) { + worker_is_busy(WORKER_JOB_DELETE_CHART); + rrdset_delete_files(st); + } + else { + worker_is_busy(WORKER_JOB_SAVE_CHART); + rrdset_save(st); + } + + worker_is_busy(WORKER_JOB_FREE_CHART); + rrdset_free(st); + } +} + +static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS); + + time_t now = now_realtime_sec(); + RRDSET *st; + rrdset_foreach_reentrant(st, host) { + if(rrdset_is_replicating(st)) + continue; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) + && st->last_accessed_time + rrdset_free_obsolete_time < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now + )) { + svc_rrdset_obsolete_to_archive(st); + } + else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) { + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + svc_rrdset_archive_obsolete_dimensions(st, false); + } + } + rrdset_foreach_done(st); +} + +static void svc_rrdset_check_obsoletion(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK); + + time_t now = now_realtime_sec(); + time_t last_entry_t; + RRDSET *st; + rrdset_foreach_read(st, host) { + if(rrdset_is_replicating(st)) + continue; + + last_entry_t = rrdset_last_entry_t(st); + + if(last_entry_t && last_entry_t < host->senders_connect_time && + host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every + < now) + + rrdset_is_obsolete(st); + } + rrdset_foreach_done(st); +} + +static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) + continue; + + if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) { + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + svc_rrdhost_cleanup_obsolete_charts(host); + } + + if(host != localhost + && host->trigger_chart_obsoletion_check + && ( + ( + host->senders_last_chart_command + && host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec() + ) + || (host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) + ) + ) { + svc_rrdset_check_obsoletion(host); + host->trigger_chart_obsoletion_check = 0; + } + } + + rrd_unlock(); +} + +static void svc_rrdhost_cleanup_orphan_hosts(RRDHOST *protected_host) { + worker_is_busy(WORKER_JOB_CLEANUP_ORPHAN_HOSTS); + rrd_wrlock(); + + time_t now = now_realtime_sec(); + + RRDHOST *host; + +restart_after_removal: + rrdhost_foreach_write(host) { + if(!rrdhost_should_be_removed(host, protected_host, now)) + continue; + + info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); + + if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) + /* don't delete multi-host DB host files */ + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) + ) { + worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); + rrdhost_delete_charts(host); + } + else { + worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); + rrdhost_save_charts(host); + } + + worker_is_busy(WORKER_JOB_FREE_HOST); + rrdhost_free(host, 0); + goto restart_after_removal; + } + + rrd_unlock(); +} + +static void service_main_cleanup(void *ptr) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + debug(D_SYSTEM, "Cleaning up..."); + worker_unregister(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/* + * The service thread. + */ +void *service_main(void *ptr) +{ + worker_register("SERVICE"); + worker_register_job_name(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK, "child chart obsoletion check"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS, "cleanup obsolete charts"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART, "archive chart"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS, "archive chart dimensions"); + worker_register_job_name(WORKER_JOB_ARCHIVE_DIMENSION, "archive dimension"); + worker_register_job_name(WORKER_JOB_CLEANUP_ORPHAN_HOSTS, "cleanup orphan hosts"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS, "cleanup obsolete charts on all hosts"); + worker_register_job_name(WORKER_JOB_FREE_HOST, "free host"); + worker_register_job_name(WORKER_JOB_SAVE_HOST_CHARTS, "save host charts"); + worker_register_job_name(WORKER_JOB_DELETE_HOST_CHARTS, "delete host charts"); + worker_register_job_name(WORKER_JOB_FREE_CHART, "free chart"); + worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart"); + worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart"); + worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); + + netdata_thread_cleanup_push(service_main_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = USEC_PER_SEC * SERVICE_HEARTBEAT; + + debug(D_SYSTEM, "Service thread starts"); + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + svc_rrd_cleanup_obsolete_charts_from_all_hosts(); + svc_rrdhost_cleanup_orphan_hosts(localhost); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/daemon/signals.c b/daemon/signals.c new file mode 100644 index 0000000..c857a9b --- /dev/null +++ b/daemon/signals.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static int reaper_enabled = 0; + +typedef enum signal_action { + NETDATA_SIGNAL_END_OF_LIST, + NETDATA_SIGNAL_IGNORE, + NETDATA_SIGNAL_EXIT_CLEANLY, + NETDATA_SIGNAL_SAVE_DATABASE, + NETDATA_SIGNAL_REOPEN_LOGS, + NETDATA_SIGNAL_RELOAD_HEALTH, + NETDATA_SIGNAL_FATAL, + NETDATA_SIGNAL_CHILD, +} SIGNAL_ACTION; + +static struct { + int signo; // the signal + const char *name; // the name of the signal + size_t count; // the number of signals received + SIGNAL_ACTION action; // the action to take +} signals_waiting[] = { + { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE }, + { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS }, + { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE }, + { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, + { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, + { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD }, + + // terminator + { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST } +}; + +static void signal_handler(int signo) { + // find the entry in the list + int i; + for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) { + if(unlikely(signals_waiting[i].signo == signo)) { + signals_waiting[i].count++; + + if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) { + char buffer[200 + 1]; + snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name); + if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) { + // nothing to do - we cannot write but there is no way to complain about it + ; + } + } + + return; + } + } +} + +void signals_block(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1) + error("SIGNAL: Could not block signals for threads"); +} + +void signals_unblock(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) { + error("SIGNAL: Could not unblock signals for threads"); + } +} + +void signals_init(void) { + // Catch signals which we want to use + struct sigaction sa; + sa.sa_flags = 0; + + // Enable process tracking / reaper if running as init (pid == 1). + // This prevents zombie processes when running in a container. + if (getpid() == 1) { + info("SIGNAL: Enabling reaper"); + netdata_popen_tracking_init(); + reaper_enabled = 1; + } else { + info("SIGNAL: Not enabling reaper"); + } + + // ignore all signals while we run in a signal handler + sigfillset(&sa.sa_mask); + + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_IGNORE: + sa.sa_handler = SIG_IGN; + break; + case NETDATA_SIGNAL_CHILD: + if (reaper_enabled == 0) + continue; + // FALLTHROUGH + default: + sa.sa_handler = signal_handler; + break; + } + + if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) + error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name); + } +} + +void signals_restore_SIGCHLD(void) +{ + struct sigaction sa; + + if (reaper_enabled == 0) + return; + + sa.sa_flags = 0; + sigfillset(&sa.sa_mask); + sa.sa_handler = signal_handler; + + if(sigaction(SIGCHLD, &sa, NULL) == -1) + error("SIGNAL: Failed to change signal handler for: SIGCHLD"); +} + +void signals_reset(void) { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) + error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name); + } + + if (reaper_enabled == 1) + netdata_popen_tracking_cleanup(); +} + +// reap_child reaps the child identified by pid. +static void reap_child(pid_t pid) { + siginfo_t i; + + errno = 0; + debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid); + if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { + if (errno != ECHILD) + error("SIGNAL: Failed to wait for: %d", pid); + else + debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid); + return; + } else if (i.si_pid == 0) { + // Process didn't exit, this shouldn't happen. + return; + } + + switch (i.si_code) { + case CLD_EXITED: + debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status); + break; + case CLD_KILLED: + debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status); + break; + case CLD_DUMPED: + debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status); + break; + case CLD_STOPPED: + debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status); + break; + case CLD_TRAPPED: + debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status); + break; + case CLD_CONTINUED: + debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status); + break; + default: + debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); + } +} + +// reap_children reaps all pending children which are not managed by myp. +static void reap_children() { + siginfo_t i; + + while (1 == 1) { + // Identify which process caused the signal so we can determine + // if we need to reap a re-parented process. + i.si_pid = 0; + if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) { + if (errno != ECHILD) // This shouldn't happen with WNOHANG but does. + error("SIGNAL: Failed to wait"); + return; + } else if (i.si_pid == 0) { + // No child exited. + return; + } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) { + // myp managed, sleep for a short time to avoid busy wait while + // this is handled by myp. + usleep(10000); + } else { + // Unknown process, likely a re-parented child, reap it. + reap_child(i.si_pid); + } + } +} + +void signals_handle(void) { + while(1) { + + // pause() causes the calling process (or thread) to sleep until a signal + // is delivered that either terminates the process or causes the invocation + // of a signal-catching function. + if(pause() == -1 && errno == EINTR) { + + // loop once, but keep looping while signals are coming in + // this is needed because a few operations may take some time + // so we need to check for new signals before pausing again + int found = 1; + while(found) { + found = 0; + + // execute the actions of the signals + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + if (signals_waiting[i].count) { + found = 1; + signals_waiting[i].count = 0; + const char *name = signals_waiting[i].name; + + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_RELOAD_HEALTH: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Reloading HEALTH configuration...", name); + error_log_limit_reset(); + execute_command(CMD_RELOAD_HEALTH, NULL, NULL); + break; + + case NETDATA_SIGNAL_SAVE_DATABASE: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Saving databases...", name); + error_log_limit_reset(); + execute_command(CMD_SAVE_DATABASE, NULL, NULL); + break; + + case NETDATA_SIGNAL_REOPEN_LOGS: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Reopening all log files...", name); + error_log_limit_reset(); + execute_command(CMD_REOPEN_LOGS, NULL, NULL); + break; + + case NETDATA_SIGNAL_EXIT_CLEANLY: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Cleaning up to exit...", name); + commands_exit(); + netdata_cleanup_and_exit(0); + exit(0); + break; + + case NETDATA_SIGNAL_FATAL: + fatal("SIGNAL: Received %s. netdata now exits.", name); + break; + + case NETDATA_SIGNAL_CHILD: + debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name); + reap_children(); + break; + + default: + info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name); + break; + } + } + } + } + } + else + error("SIGNAL: pause() returned but it was not interrupted by a signal."); + } +} diff --git a/daemon/signals.h b/daemon/signals.h new file mode 100644 index 0000000..12b1ed1 --- /dev/null +++ b/daemon/signals.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SIGNALS_H +#define NETDATA_SIGNALS_H 1 + +void signals_init(void); +void signals_block(void); +void signals_unblock(void); +void signals_restore_SIGCHLD(void); +void signals_reset(void); +void signals_handle(void) NORETURN; + +#endif //NETDATA_SIGNALS_H diff --git a/daemon/static_threads.c b/daemon/static_threads.c new file mode 100644 index 0000000..b7730bc --- /dev/null +++ b/daemon/static_threads.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *aclk_main(void *ptr); +void *analytics_main(void *ptr); +void *cpuidlejitter_main(void *ptr); +void *global_statistics_main(void *ptr); +void *global_statistics_workers_main(void *ptr); +void *health_main(void *ptr); +void *pluginsd_main(void *ptr); +void *service_main(void *ptr); +void *statsd_main(void *ptr); +void *timex_main(void *ptr); +void *replication_thread_main(void *ptr __maybe_unused); + +extern bool global_statistics_enabled; + +const struct netdata_static_thread static_threads_common[] = { + { + .name = "PLUGIN[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + { + .name = "PLUGIN[idlejitter]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "idlejitter", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cpuidlejitter_main + }, + { + .name = "ANALYTICS", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = analytics_main + }, + { + .name = "GLOBAL_STATS", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_main + }, + { + .name = "WORKERS_STATS", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_workers_main + }, + { + .name = "PLUGINSD", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = pluginsd_main + }, + { + .name = "SERVICE", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = service_main + }, + { + .name = "STATSD", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = statsd_main + }, + { + .name = "EXPORTING", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = exporting_main + }, + { + .name = "STREAM", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = rrdpush_sender_thread + }, + { + .name = "WEB_SERVER[static1]", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = socket_listen_main_static_threaded + }, + +#ifdef ENABLE_ACLK + { + .name = "ACLK_Main", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = aclk_main + }, +#endif + + { + .name = "RRDCONTEXT", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = rrdcontext_main + }, + + { + .name = "REPLICATION", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = replication_thread_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs) +{ + struct netdata_static_thread *res; + + int lhs_size = 0; + for (; lhs[lhs_size].name; lhs_size++) {} + + int rhs_size = 0; + for (; rhs[rhs_size].name; rhs_size++) {} + + res = callocz(lhs_size + rhs_size + 1, sizeof(struct netdata_static_thread)); + + for (int i = 0; i != lhs_size; i++) + memcpy(&res[i], &lhs[i], sizeof(struct netdata_static_thread)); + + for (int i = 0; i != rhs_size; i++) + memcpy(&res[lhs_size + i], &rhs[i], sizeof(struct netdata_static_thread)); + + return res; +} diff --git a/daemon/static_threads.h b/daemon/static_threads.h new file mode 100644 index 0000000..9597da7 --- /dev/null +++ b/daemon/static_threads.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STATIC_THREADS_H +#define NETDATA_STATIC_THREADS_H + +#include "common.h" + +struct netdata_static_thread { + // the name of the thread as it should appear in the logs + char *name; + + // the section of netdata.conf to check if this is enabled or not + char *config_section; + + // the name of the config option to check if it is true or false + char *config_name; + + // the current status of the thread + volatile sig_atomic_t enabled; + + // internal use, to maintain a pointer to the created thread + netdata_thread_t *thread; + + // an initialization function to run before spawning the thread + void (*init_routine) (void); + + // the threaded worker + void *(*start_routine) (void *); + + // the environment variable to create + char *env_name; + + // global variable + bool *global_variable; +}; + +#define NETDATA_MAIN_THREAD_RUNNING CONFIG_BOOLEAN_YES +#define NETDATA_MAIN_THREAD_EXITING (CONFIG_BOOLEAN_YES + 1) +#define NETDATA_MAIN_THREAD_EXITED CONFIG_BOOLEAN_NO + +extern const struct netdata_static_thread static_threads_common[]; +extern const struct netdata_static_thread static_threads_linux[]; +extern const struct netdata_static_thread static_threads_freebsd[]; +extern const struct netdata_static_thread static_threads_macos[]; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs); + +struct netdata_static_thread *static_threads_get(); + +#endif /* NETDATA_STATIC_THREADS_H */ diff --git a/daemon/static_threads_freebsd.c b/daemon/static_threads_freebsd.c new file mode 100644 index 0000000..48066bf --- /dev/null +++ b/daemon/static_threads_freebsd.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +extern void *freebsd_main(void *ptr); + +const struct netdata_static_thread static_threads_freebsd[] = { + { + .name = "PLUGIN[freebsd]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "freebsd", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = freebsd_main + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_linux[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_macos[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_freebsd); +} diff --git a/daemon/static_threads_linux.c b/daemon/static_threads_linux.c new file mode 100644 index 0000000..260b2c1 --- /dev/null +++ b/daemon/static_threads_linux.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +extern void *cgroups_main(void *ptr); +extern void *proc_main(void *ptr); +extern void *diskspace_main(void *ptr); +extern void *tc_main(void *ptr); +extern void *timex_main(void *ptr); + +const struct netdata_static_thread static_threads_linux[] = { + { + .name = "PLUGIN[tc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "tc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = tc_main + }, + { + .name = "PLUGIN[diskspace]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "diskspace", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = diskspace_main + }, + { + .name = "PLUGIN[proc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "proc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = proc_main + }, + { + .name = "PLUGIN[cgroups]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "cgroups", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cgroups_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +const struct netdata_static_thread static_threads_freebsd[] = { + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +const struct netdata_static_thread static_threads_macos[] = { + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_linux); +} diff --git a/daemon/static_threads_macos.c b/daemon/static_threads_macos.c new file mode 100644 index 0000000..72c0324 --- /dev/null +++ b/daemon/static_threads_macos.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +extern void *macos_main(void *ptr); + +const struct netdata_static_thread static_threads_macos[] = { + { + .name = "PLUGIN[macos]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "macos", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = macos_main, + .env_name = NULL, + .global_variable = NULL, + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_freebsd[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_linux[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_macos); +} diff --git a/daemon/system-info.sh b/daemon/system-info.sh new file mode 100755 index 0000000..68cdc48 --- /dev/null +++ b/daemon/system-info.sh @@ -0,0 +1,511 @@ +#!/usr/bin/env sh + +# ------------------------------------------------------------------------------------------------- +# detect the kernel + +KERNEL_NAME="$(uname -s)" +KERNEL_VERSION="$(uname -r)" +ARCHITECTURE="$(uname -m)" + +# ------------------------------------------------------------------------------------------------- +# detect the virtualization and possibly the container technology + +# systemd-detect-virt: https://github.com/systemd/systemd/blob/df423851fcc05cf02281d11aab6aee7b476c1c3b/src/basic/virt.c#L999 +# lscpu: https://github.com/util-linux/util-linux/blob/581b77da7aa4a5205902857184d555bed367e3e0/sys-utils/lscpu.c#L52 +virtualization_normalize_name() { + vname="$1" + case "$vname" in + "User-mode Linux") vname="uml" ;; + "Windows Subsystem for Linux") vname="wsl" ;; + esac + + echo "$vname" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g' +} + +CONTAINER="unknown" +CONT_DETECTION="none" +CONTAINER_IS_OFFICIAL_IMAGE="${NETDATA_OFFICIAL_IMAGE:-false}" + +if [ -z "${VIRTUALIZATION}" ]; then + VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + + if command -v systemd-detect-virt >/dev/null 2>&1; then + VIRTUALIZATION="$(systemd-detect-virt -v)" + VIRT_DETECTION="systemd-detect-virt" + CONTAINER_DETECT_TMP="$(systemd-detect-virt -c)" + [ -n "$CONTAINER_DETECT_TMP" ] && CONTAINER="$CONTAINER_DETECT_TMP" + CONT_DETECTION="systemd-detect-virt" + elif command -v lscpu >/dev/null 2>&1; then + VIRTUALIZATION=$(lscpu | grep "Hypervisor vendor:" | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="lscpu" + [ -z "$VIRTUALIZATION" ] && lscpu | grep -q "Virtualization:" && VIRTUALIZATION="none" + elif command -v dmidecode >/dev/null 2>&1; then + VIRTUALIZATION=$(dmidecode -s system-product-name 2>/dev/null | grep "VMware\|Virtual\|KVM\|Bochs") + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="dmidecode" + fi + + if [ -z "${VIRTUALIZATION}" ] || [ "$VIRTUALIZATION" = "unknown" ]; then + if [ "${KERNEL_NAME}" = "FreeBSD" ]; then + VIRTUALIZATION=$(sysctl kern.vm_guest 2>/dev/null | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="sysctl" + fi + fi + + if [ -z "${VIRTUALIZATION}" ]; then + # Output from the command is outside of spec + VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + elif [ "$VIRTUALIZATION" != "none" ] && [ "$VIRTUALIZATION" != "unknown" ]; then + VIRTUALIZATION=$(virtualization_normalize_name $VIRTUALIZATION) + fi +else + # Passed from outside - probably in docker run + VIRT_DETECTION="provided" +fi + +# ------------------------------------------------------------------------------------------------- +# detect containers with heuristics + +if [ "${CONTAINER}" = "unknown" ]; then + if [ -f /proc/1/sched ]; then + IFS='(, ' read -r process _ < /proc/1/sched + if [ "${process}" = "netdata" ]; then + CONTAINER="container" + CONT_DETECTION="process" + fi + fi + # ubuntu and debian supply /bin/running-in-container + # https://www.apt-browse.org/browse/ubuntu/trusty/main/i386/upstart/1.12.1-0ubuntu4/file/bin/running-in-container + if /bin/running-in-container > /dev/null 2>&1; then + CONTAINER="container" + CONT_DETECTION="/bin/running-in-container" + fi + + # lxc sets environment variable 'container' + #shellcheck disable=SC2154 + if [ -n "${container}" ]; then + CONTAINER="lxc" + CONT_DETECTION="containerenv" + fi + + # docker creates /.dockerenv + # http://stackoverflow.com/a/25518345 + if [ -f "/.dockerenv" ]; then + CONTAINER="docker" + CONT_DETECTION="dockerenv" + fi + +fi + +# ------------------------------------------------------------------------------------------------- +# detect the operating system + +# Initially assume all OS detection values are for a container, these are moved later if we are bare-metal + +CONTAINER_OS_DETECTION="unknown" +CONTAINER_NAME="unknown" +CONTAINER_VERSION="unknown" +CONTAINER_VERSION_ID="unknown" +CONTAINER_ID="unknown" +CONTAINER_ID_LIKE="unknown" + +if [ "${KERNEL_NAME}" = "Darwin" ]; then + CONTAINER_ID=$(sw_vers -productName) + CONTAINER_ID_LIKE="mac" + CONTAINER_NAME="mac" + CONTAINER_VERSION=$(sw_vers -productVersion) + CONTAINER_OS_DETECTION="sw_vers" +elif [ "${KERNEL_NAME}" = "FreeBSD" ]; then + CONTAINER_ID="FreeBSD" + CONTAINER_ID_LIKE="FreeBSD" + CONTAINER_NAME="FreeBSD" + CONTAINER_OS_DETECTION="uname" + CONTAINER_VERSION=$(uname -r) + KERNEL_VERSION=$(uname -K) +else + if [ -f "/etc/os-release" ]; then + eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /etc/os-release | sed 's/^/CONTAINER_/')" + CONTAINER_OS_DETECTION="/etc/os-release" + fi + + # shellcheck disable=SC2153 + if [ "${CONTAINER_NAME}" = "unknown" ] || [ "${CONTAINER_VERSION}" = "unknown" ] || [ "${CONTAINER_ID}" = "unknown" ]; then + if [ -f "/etc/lsb-release" ]; then + if [ "${CONTAINER_OS_DETECTION}" = "unknown" ]; then + CONTAINER_OS_DETECTION="/etc/lsb-release" + else + CONTAINER_OS_DETECTION="Mixed" + fi + DISTRIB_ID="unknown" + DISTRIB_RELEASE="unknown" + DISTRIB_CODENAME="unknown" + eval "$(grep -E "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=" < /etc/lsb-release)" + if [ "${CONTAINER_NAME}" = "unknown" ]; then CONTAINER_NAME="${DISTRIB_ID}"; fi + if [ "${CONTAINER_VERSION}" = "unknown" ]; then CONTAINER_VERSION="${DISTRIB_RELEASE}"; fi + if [ "${CONTAINER_ID}" = "unknown" ]; then CONTAINER_ID="${DISTRIB_CODENAME}"; fi + fi + if [ -n "$(command -v lsb_release 2> /dev/null)" ]; then + if [ "${CONTAINER_OS_DETECTION}" = "unknown" ]; then + CONTAINER_OS_DETECTION="lsb_release" + else + CONTAINER_OS_DETECTION="Mixed" + fi + if [ "${CONTAINER_NAME}" = "unknown" ]; then CONTAINER_NAME="$(lsb_release -is 2> /dev/null)"; fi + if [ "${CONTAINER_VERSION}" = "unknown" ]; then CONTAINER_VERSION="$(lsb_release -rs 2> /dev/null)"; fi + if [ "${CONTAINER_ID}" = "unknown" ]; then CONTAINER_ID="$(lsb_release -cs 2> /dev/null)"; fi + fi + fi +fi + +# If Netdata is not running in a container then use the local detection as the host +HOST_OS_DETECTION="unknown" +HOST_NAME="unknown" +HOST_VERSION="unknown" +HOST_VERSION_ID="unknown" +HOST_ID="unknown" +HOST_ID_LIKE="unknown" + +# 'systemd-detect-virt' returns 'none' if there is no hardware/container virtualization. +if [ "${CONTAINER}" = "unknown" ] || [ "${CONTAINER}" = "none" ]; then + for v in NAME ID ID_LIKE VERSION VERSION_ID OS_DETECTION; do + eval "HOST_$v=\$CONTAINER_$v; CONTAINER_$v=none" + done +else + # Otherwise try and use a user-supplied bind-mount into the container to resolve the host details + if [ -e "/host/etc/os-release" ]; then + eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /host/etc/os-release | sed 's/^/HOST_/')" + HOST_OS_DETECTION="/host/etc/os-release" + fi + if [ "${HOST_NAME}" = "unknown" ] || [ "${HOST_VERSION}" = "unknown" ] || [ "${HOST_ID}" = "unknown" ]; then + if [ -f "/host/etc/lsb-release" ]; then + if [ "${HOST_OS_DETECTION}" = "unknown" ]; then + HOST_OS_DETECTION="/etc/lsb-release" + else + HOST_OS_DETECTION="Mixed" + fi + DISTRIB_ID="unknown" + DISTRIB_RELEASE="unknown" + DISTRIB_CODENAME="unknown" + eval "$(grep -E "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=" < /etc/lsb-release)" + if [ "${HOST_NAME}" = "unknown" ]; then HOST_NAME="${DISTRIB_ID}"; fi + if [ "${HOST_VERSION}" = "unknown" ]; then HOST_VERSION="${DISTRIB_RELEASE}"; fi + if [ "${HOST_ID}" = "unknown" ]; then HOST_ID="${DISTRIB_CODENAME}"; fi + fi + fi +fi + +# ------------------------------------------------------------------------------------------------- +# Detect information about the CPU + +LCPU_COUNT="unknown" +CPU_MODEL="unknown" +CPU_VENDOR="unknown" +CPU_FREQ="unknown" +CPU_INFO_SOURCE="none" + +possible_cpu_freq="" +nproc="$(command -v nproc)" +lscpu="$(command -v lscpu)" +lscpu_output="" +dmidecode="$(command -v dmidecode)" +dmidecode_output="" + +if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then + lscpu_output="$(LC_NUMERIC=C ${lscpu} 2> /dev/null)" + CPU_INFO_SOURCE="lscpu" + LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + fi + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep "^Model name:" | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}')" + fi + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" +elif [ -n "${dmidecode}" ] && dmidecode -t processor > /dev/null 2>&1; then + dmidecode_output="$(${dmidecode} -t processor 2> /dev/null)" + CPU_INFO_SOURCE="dmidecode" + LCPU_COUNT="$(echo "${dmidecode_output}" | grep -F "Thread Count:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(echo "${dmidecode_output}" | grep -F "Manufacturer:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_MODEL="$(echo "${dmidecode_output}" | grep -F "Version:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + possible_cpu_freq="$(echo "${dmidecode_output}" | grep -F "Current Speed:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" +else + if [ -n "${nproc}" ]; then + CPU_INFO_SOURCE="nproc" + LCPU_COUNT="$(${nproc})" + elif [ "${KERNEL_NAME}" = FreeBSD ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n kern.smp.cpus)" + if ! possible_cpu_freq=$(sysctl -n machdep.tsc_freq 2> /dev/null); then + possible_cpu_freq=$(sysctl -n hw.model 2> /dev/null | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}') + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" + fi + elif [ "${KERNEL_NAME}" = Darwin ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n hw.logicalcpu)" + elif [ -d /sys/devices/system/cpu ]; then + CPU_INFO_SOURCE="sysfs" + # This is potentially more accurate than checking `/proc/cpuinfo`. + LCPU_COUNT="$(find /sys/devices/system/cpu -mindepth 1 -maxdepth 1 -type d -name 'cpu*' | grep -cEv 'idle|freq')" + elif [ -r /proc/cpuinfo ]; then + CPU_INFO_SOURCE="procfs" + LCPU_COUNT="$(grep -c ^processor /proc/cpuinfo)" + fi + + if [ "${KERNEL_NAME}" = Darwin ]; then + CPU_MODEL="$(sysctl -n machdep.cpu.brand_string)" + if [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_VENDOR="$(sysctl -n machdep.cpu.vendor)" + else + CPU_VENDOR="Apple" + fi + echo "${CPU_INFO_SOURCE}" | grep -qv sysctl && CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + elif uname --version 2> /dev/null | grep -qF 'GNU coreutils'; then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} uname" + CPU_MODEL="$(uname -p)" + CPU_VENDOR="$(uname -i)" + elif [ "${KERNEL_NAME}" = FreeBSD ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysctl); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + fi + + CPU_MODEL="$(sysctl -n hw.model)" + elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + + CPU_MODEL="$(grep -F "model name" /proc/cpuinfo | head -n 1 | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(grep -F "vendor_id" /proc/cpuinfo | head -n 1 | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + fi +fi + +if [ "${KERNEL_NAME}" = Darwin ] && [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_FREQ="$(sysctl -n hw.cpufrequency)" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/base_frequency ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" + fi + + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency)" + CPU_FREQ="$((value * 1000))" +elif [ -n "${possible_cpu_freq}" ]; then + CPU_FREQ="${possible_cpu_freq}" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" + fi + + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq)" + CPU_FREQ="$((value * 1000))" +elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + value=$(grep "cpu MHz" /proc/cpuinfo 2>/dev/null | grep -o "[0-9]*" | head -n 1 | awk '{print int($0*1000000)}') + [ -n "$value" ] && CPU_FREQ="$value" +fi + +freq_units="$(echo "${CPU_FREQ}" | cut -f 2 -d ' ')" + +case "${freq_units}" in + GHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000 * 1000 * 1000))" + ;; + MHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000 * 1000))" + ;; + KHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000))" + ;; + *) ;; + +esac + +# ------------------------------------------------------------------------------------------------- +# Detect the total system RAM + +TOTAL_RAM="unknown" +RAM_DETECTION="none" + +if [ "${KERNEL_NAME}" = FreeBSD ]; then + RAM_DETECTION="sysctl" + TOTAL_RAM="$(sysctl -n hw.physmem)" +elif [ "${KERNEL_NAME}" = Darwin ]; then + RAM_DETECTION="sysctl" + TOTAL_RAM="$(sysctl -n hw.memsize)" +elif [ -r /proc/meminfo ]; then + RAM_DETECTION="procfs" + TOTAL_RAM="$(grep -F MemTotal /proc/meminfo | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | cut -f 1 -d ' ')" + TOTAL_RAM="$((TOTAL_RAM * 1024))" +fi + +# ------------------------------------------------------------------------------------------------- +# Detect the total system disk space + +DISK_SIZE="unknown" +DISK_DETECTION="none" + +if [ "${KERNEL_NAME}" = "Darwin" ]; then + if DISK_SIZE=$(diskutil info / 2>/dev/null | awk '/Disk Size/ {total += substr($5,2,length($5))} END { print total }') && + [ -n "$DISK_SIZE" ] && [ "$DISK_SIZE" != "0" ]; then + DISK_DETECTION="diskutil" + else + types='hfs' + + if (lsvfs | grep -q apfs); then + types="${types},apfs" + fi + + if (lsvfs | grep -q ufs); then + types="${types},ufs" + fi + + DISK_DETECTION="df" + DISK_SIZE=$(($(/bin/df -k -t ${types} | tail -n +2 | sed -E 's/\/dev\/disk([[:digit:]]*)s[[:digit:]]*/\/dev\/disk\1/g' | sort -k 1 | awk -F ' ' '{s=$NF;for(i=NF-1;i>=1;i--)s=s FS $i;print s}' | uniq -f 9 | awk '{print $8}' | tr '\n' '+' | rev | cut -f 2- -d '+' | rev) * 1024)) + fi +elif [ "${KERNEL_NAME}" = FreeBSD ]; then + types='ufs' + + if (lsvfs | grep -q zfs); then + types="${types},zfs" + fi + + DISK_DETECTION="df" + total="$(df -t ${types} -c -k | tail -n 1 | awk '{print $2}')" + DISK_SIZE="$((total * 1024))" +else + if [ -d /sys/block ] && [ -r /proc/devices ]; then + dev_major_whitelist='' + + # This is a list of device names used for block storage devices. + # These translate to the prefixs of files in `/dev` indicating the device type. + # They are sorted by lowest used device major number, with dynamically assigned ones at the end. + # We use this to look up device major numbers in `/proc/devices` + device_names='hd sd mfm ad ftl pd nftl dasd intfl mmcblk ub xvd rfd vbd nvme virtblk blkext' + + for name in ${device_names}; do + if grep -qE " ${name}\$" /proc/devices; then + dev_major_whitelist="${dev_major_whitelist}:$(grep -E "${name}\$" /proc/devices | sed -e 's/^[[:space:]]*//' | cut -f 1 -d ' ' | tr '\n' ':'):" + fi + done + + DISK_DETECTION="sysfs" + DISK_SIZE="0" + for disk in /sys/block/*; do + if [ -r "${disk}/size" ] \ + && (echo "${dev_major_whitelist}" | grep -q ":$(cut -f 1 -d ':' "${disk}/dev"):") \ + && grep -qv 1 "${disk}/removable"; then + size="$(($(cat "${disk}/size") * 512))" + DISK_SIZE="$((DISK_SIZE + size))" + fi + done + elif df --version 2> /dev/null | grep -qF "GNU coreutils"; then + DISK_DETECTION="df" + DISK_SIZE=$(($(df -x tmpfs -x devtmpfs -x squashfs -l -B1 --output=source,size | tail -n +2 | sort -u -k 1 | awk '{print $2}' | tr '\n' '+' | head -c -1))) + else + DISK_DETECTION="df" + include_fs_types="ext*|btrfs|xfs|jfs|reiser*|zfs" + DISK_SIZE=$(($(df -T -P | tail -n +2 | sort -u -k 1 | grep "${include_fs_types}" | awk '{print $3}' | tr '\n' '+' | head -c -1) * 1024)) + fi +fi + +# ------------------------------------------------------------------------------------------------- +# Detect whether the node is kubernetes node + +HOST_IS_K8S_NODE="false" + +if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_SERVICE_PORT}" ]; then + # These env vars are set for every container managed by k8s. + HOST_IS_K8S_NODE="true" +elif pgrep "kubelet"; then + # The kubelet is the primary "node agent" that runs on each node. + HOST_IS_K8S_NODE="true" +fi + +# ------------------------------------------------------------------------------------------------ +# Detect instance metadata for VMs running on cloud providers + +CLOUD_TYPE="unknown" +CLOUD_INSTANCE_TYPE="unknown" +CLOUD_INSTANCE_REGION="unknown" + +if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then + # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404. + curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1 + ret=$? + # anything but operation timeout. + if [ "$ret" != 28 ]; then + # Try AWS IMDSv2 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + AWS_IMDS_TOKEN="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")" + if [ -n "${AWS_IMDS_TOKEN}" ]; then + CLOUD_TYPE="AWS" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/instance-type" 2>/dev/null)" + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/placement/region" 2>/dev/null)" + fi + fi + + # Try GCE computeMetadata v1 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + if [ -n "$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1")" ]; then + CLOUD_TYPE="GCP" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/machine-type")" + [ -n "$CLOUD_INSTANCE_TYPE" ] && CLOUD_INSTANCE_TYPE=$(basename "$CLOUD_INSTANCE_TYPE") + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone")" + [ -n "$CLOUD_INSTANCE_REGION" ] && CLOUD_INSTANCE_REGION=$(basename "$CLOUD_INSTANCE_REGION") && CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION%-*} + fi + fi + + # TODO: needs to be tested in Microsoft Azure + # Try Azure IMDS + # if [ "${CLOUD_TYPE}" = "unknown" ]; then + # AZURE_IMDS_DATA="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?version=2021-10-01")" + # if [ -n "${AZURE_IMDS_DATA}" ]; then + # CLOUD_TYPE="Azure" + # CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/vmSize?version=2021-10-01&format=text")" + # CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/location?version=2021-10-01&format=text")" + # fi + # fi + fi +fi + +echo "NETDATA_CONTAINER_OS_NAME=${CONTAINER_NAME}" +echo "NETDATA_CONTAINER_OS_ID=${CONTAINER_ID}" +echo "NETDATA_CONTAINER_OS_ID_LIKE=${CONTAINER_ID_LIKE}" +echo "NETDATA_CONTAINER_OS_VERSION=${CONTAINER_VERSION}" +echo "NETDATA_CONTAINER_OS_VERSION_ID=${CONTAINER_VERSION_ID}" +echo "NETDATA_CONTAINER_OS_DETECTION=${CONTAINER_OS_DETECTION}" +echo "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE=${CONTAINER_IS_OFFICIAL_IMAGE}" +echo "NETDATA_HOST_OS_NAME=${HOST_NAME}" +echo "NETDATA_HOST_OS_ID=${HOST_ID}" +echo "NETDATA_HOST_OS_ID_LIKE=${HOST_ID_LIKE}" +echo "NETDATA_HOST_OS_VERSION=${HOST_VERSION}" +echo "NETDATA_HOST_OS_VERSION_ID=${HOST_VERSION_ID}" +echo "NETDATA_HOST_OS_DETECTION=${HOST_OS_DETECTION}" +echo "NETDATA_HOST_IS_K8S_NODE=${HOST_IS_K8S_NODE}" +echo "NETDATA_SYSTEM_KERNEL_NAME=${KERNEL_NAME}" +echo "NETDATA_SYSTEM_KERNEL_VERSION=${KERNEL_VERSION}" +echo "NETDATA_SYSTEM_ARCHITECTURE=${ARCHITECTURE}" +echo "NETDATA_SYSTEM_VIRTUALIZATION=${VIRTUALIZATION}" +echo "NETDATA_SYSTEM_VIRT_DETECTION=${VIRT_DETECTION}" +echo "NETDATA_SYSTEM_CONTAINER=${CONTAINER}" +echo "NETDATA_SYSTEM_CONTAINER_DETECTION=${CONT_DETECTION}" +echo "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT=${LCPU_COUNT}" +echo "NETDATA_SYSTEM_CPU_VENDOR=${CPU_VENDOR}" +echo "NETDATA_SYSTEM_CPU_MODEL=${CPU_MODEL}" +echo "NETDATA_SYSTEM_CPU_FREQ=${CPU_FREQ}" +echo "NETDATA_SYSTEM_CPU_DETECTION=${CPU_INFO_SOURCE}" +echo "NETDATA_SYSTEM_TOTAL_RAM=${TOTAL_RAM}" +echo "NETDATA_SYSTEM_RAM_DETECTION=${RAM_DETECTION}" +echo "NETDATA_SYSTEM_TOTAL_DISK_SIZE=${DISK_SIZE}" +echo "NETDATA_SYSTEM_DISK_DETECTION=${DISK_DETECTION}" +echo "NETDATA_INSTANCE_CLOUD_TYPE=${CLOUD_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=${CLOUD_INSTANCE_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION}" diff --git a/daemon/unit_test.c b/daemon/unit_test.c new file mode 100644 index 0000000..f698618 --- /dev/null +++ b/daemon/unit_test.c @@ -0,0 +1,2600 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static bool cmd_arg_sanitization_test(const char *expected, const char *src, char *dst, size_t dst_size) { + bool ok = sanitize_command_argument_string(dst, src, dst_size); + + if (!expected) + return ok == false; + + return strcmp(expected, dst) == 0; +} + +bool command_argument_sanitization_tests() { + char dst[1024]; + + for (size_t i = 0; i != 5; i++) { + const char *expected = i == 4 ? "'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 9; i++) { + const char *expected = i == 8 ? "'\\'''\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "''", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "'\\''a" : NULL; + if (cmd_arg_sanitization_test(expected, "'a", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "a'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "a'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 22; i++) { + const char *expected = i == 21 ? "foo'\\''a'\\'''\\'''\\''b" : NULL; + if (cmd_arg_sanitization_test(expected, "--foo'a'''b", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n length: %zu\n", expected, dst, strlen(dst)); + return 1; + } + } + + return 0; +} + +static int check_number_printing(void) { + struct { + NETDATA_DOUBLE n; + const char *correct; + } values[] = { + { .n = 0, .correct = "0" }, + { .n = 0.0000001, .correct = "0.0000001" }, + { .n = 0.00000009, .correct = "0.0000001" }, + { .n = 0.000000001, .correct = "0" }, + { .n = 99.99999999999999999, .correct = "100" }, + { .n = -99.99999999999999999, .correct = "-100" }, + { .n = 123.4567890123456789, .correct = "123.456789" }, + { .n = 9999.9999999, .correct = "9999.9999999" }, + { .n = -9999.9999999, .correct = "-9999.9999999" }, + { .n = 0, .correct = NULL }, + }; + + char netdata[50], system[50]; + int i, failed = 0; + for(i = 0; values[i].correct ; i++) { + print_netdata_double(netdata, values[i].n); + snprintfz(system, 49, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); + + int ok = 1; + if(strcmp(netdata, values[i].correct) != 0) { + ok = 0; + failed++; + } + + fprintf(stderr, "'%s' (system) printed as '%s' (netdata): %s\n", system, netdata, ok?"OK":"FAILED"); + } + + if(failed) return 1; + return 0; +} + +static int check_rrdcalc_comparisons(void) { + RRDCALC_STATUS a, b; + + // make sure calloc() sets the status to UNINITIALIZED + memset(&a, 0, sizeof(RRDCALC_STATUS)); + if(a != RRDCALC_STATUS_UNINITIALIZED) { + fprintf(stderr, "%s is not zero.\n", rrdcalc_status2string(RRDCALC_STATUS_UNINITIALIZED)); + return 1; + } + + a = RRDCALC_STATUS_REMOVED; + b = RRDCALC_STATUS_UNDEFINED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNDEFINED; + b = RRDCALC_STATUS_UNINITIALIZED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNINITIALIZED; + b = RRDCALC_STATUS_CLEAR; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_CLEAR; + b = RRDCALC_STATUS_RAISED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_RAISED; + b = RRDCALC_STATUS_WARNING; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_WARNING; + b = RRDCALC_STATUS_CRITICAL; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + fprintf(stderr, "RRDCALC_STATUSes are sortable.\n"); + + return 0; +} + +int check_storage_number(NETDATA_DOUBLE n, int debug) { + char buffer[100]; + uint32_t flags = SN_DEFAULT_FLAGS; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(!does_storage_number_exist(s)) { + fprintf(stderr, "Exists flags missing for number " NETDATA_DOUBLE_FORMAT "!\n", n); + return 5; + } + + NETDATA_DOUBLE ddiff = d - n; + NETDATA_DOUBLE dcdiff = ddiff * 100.0 / n; + + if(dcdiff < 0) dcdiff = -dcdiff; + + size_t len = (size_t)print_netdata_double(buffer, d); + NETDATA_DOUBLE p = str2ndd(buffer, NULL); + NETDATA_DOUBLE pdiff = n - p; + NETDATA_DOUBLE pcdiff = pdiff * 100.0 / n; + if(pcdiff < 0) pcdiff = -pcdiff; + + if(debug) { + fprintf(stderr, + NETDATA_DOUBLE_FORMAT + " original\n" NETDATA_DOUBLE_FORMAT " packed and unpacked, (stored as 0x%08X, diff " NETDATA_DOUBLE_FORMAT + ", " NETDATA_DOUBLE_FORMAT "%%)\n" + "%s printed after unpacked (%zu bytes)\n" NETDATA_DOUBLE_FORMAT + " re-parsed from printed (diff " NETDATA_DOUBLE_FORMAT ", " NETDATA_DOUBLE_FORMAT "%%)\n\n", + n, + d, s, ddiff, dcdiff, + buffer, len, + p, pdiff, pcdiff + ); + if(len != strlen(buffer)) fprintf(stderr, "ERROR: printed number %s is reported to have length %zu but it has %zu\n", buffer, len, strlen(buffer)); + + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: packing number " NETDATA_DOUBLE_FORMAT " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, dcdiff); + + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " NETDATA_DOUBLE_FORMAT + " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, pcdiff); + } + + if(len != strlen(buffer)) return 1; + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 3; + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 4; + return 0; +} + +NETDATA_DOUBLE storage_number_min(NETDATA_DOUBLE n) { + NETDATA_DOUBLE r = 1, last; + + do { + last = n; + n /= 2.0; + storage_number t = pack_storage_number(n, SN_DEFAULT_FLAGS); + r = unpack_storage_number(t); + } while(r != 0.0 && r != last); + + return last; +} + +void benchmark_storage_number(int loop, int multiplier) { + int i, j; + NETDATA_DOUBLE n, d; + storage_number s; + unsigned long long user, system, total, mine, their; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_positive_max = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW); + + char buffer[100]; + + struct rusage now, last; + + fprintf(stderr, "\n\nBenchmarking %d numbers, please wait...\n\n", loop); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE SIZE: %zu bytes\n", sizeof(NETDATA_DOUBLE)); + fprintf(stderr, "NETDATA FLOATING POINT SIZE: %zu bytes\n", sizeof(storage_number)); + + mine = (NETDATA_DOUBLE)sizeof(storage_number) * (NETDATA_DOUBLE)loop; + their = (NETDATA_DOUBLE)sizeof(NETDATA_DOUBLE) * (NETDATA_DOUBLE)loop; + + if(mine > their) { + fprintf(stderr, "\nNETDATA NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES MORE MEMORY. Sorry!\n", (NETDATA_DOUBLE)(mine / their)); + } + else { + fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES LESS MEMORY.\n", (NETDATA_DOUBLE)(their / mine)); + } + + fprintf(stderr, "\nNETDATA FLOATING POINT\n"); + fprintf(stderr, "MIN POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW)); + fprintf(stderr, "MAX POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW)); + fprintf(stderr, "MIN NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MIN_RAW)); + fprintf(stderr, "MAX NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW)); + fprintf(stderr, "Maximum accuracy loss accepted: " NETDATA_DOUBLE_FORMAT "%%\n\n\n", (NETDATA_DOUBLE)ACCURACY_LOSS_ACCEPTED_PERCENT); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "INTERNAL LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + print_netdata_double(buffer, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + snprintfz(buffer, 100, NETDATA_DOUBLE_FORMAT, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + their = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > total) { + fprintf(stderr, "NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + + fprintf(stderr, "\nINTERNAL LONG DOUBLE PRINTING WITH PACK / UNPACK: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + s = pack_storage_number(n, SN_DEFAULT_FLAGS); + d = unpack_storage_number(s); + print_netdata_double(buffer, d); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > their) { + fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + +} + +static int check_storage_number_exists() { + uint32_t flags = SN_DEFAULT_FLAGS; + NETDATA_DOUBLE n = 0.0; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(n != d) { + fprintf(stderr, "Wrong number returned. Expected " NETDATA_DOUBLE_FORMAT ", returned " NETDATA_DOUBLE_FORMAT "!\n", n, d); + return 1; + } + + return 0; +} + +int unit_test_storage() { + if(check_storage_number_exists()) return 0; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_negative_max = unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW); + + NETDATA_DOUBLE c, a = 0; + int i, j, g, r = 0; + + for(g = -1; g <= 1 ; g++) { + a = 0; + + if(!g) continue; + + for(j = 0; j < 9 ;j++) { + a += 0.0000001; + c = a * g; + for(i = 0; i < 21 ;i++, c *= 10) { + if(c > 0 && c < storage_number_positive_min) continue; + if(c < 0 && c > storage_number_negative_max) continue; + + if(check_storage_number(c, 1)) return 1; + } + } + } + + // if(check_storage_number(858993459.1234567, 1)) return 1; + benchmark_storage_number(1000000, 2); + return r; +} + +int unit_test_str2ld() { + char *values[] = { + "1.2345678", "-35.6", "0.00123", "23842384234234.2", ".1", "1.2e-10", + "hello", "1wrong", "nan", "inf", NULL + }; + + int i; + for(i = 0; values[i] ; i++) { + char *e_mine = "hello", *e_sys = "world"; + NETDATA_DOUBLE mine = str2ndd(values[i], &e_mine); + NETDATA_DOUBLE sys = strtondd(values[i], &e_sys); + + if(isnan(mine)) { + if(!isnan(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(isinf(mine)) { + if(!isinf(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(mine != sys && ABS(mine-sys) > 0.000001) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ", delta %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys, sys-mine); + return -1; + } + + if(e_mine != e_sys) { + fprintf(stderr, "Value '%s' is parsed correctly, but endptr is not right\n", values[i]); + return -1; + } + + fprintf(stderr, "str2ndd() parsed value '%s' exactly the same way with strtold(), returned %" NETDATA_DOUBLE_MODIFIER + " vs %" NETDATA_DOUBLE_MODIFIER "\n", values[i], mine, sys); + } + + return 0; +} + +int unit_test_buffer() { + BUFFER *wb = buffer_create(1); + char string[2048 + 1]; + char final[9000 + 1]; + int i; + + for(i = 0; i < 2048; i++) + string[i] = (char)((i % 24) + 'a'); + string[2048] = '\0'; + + const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s"; + buffer_sprintf(wb, fmt, string, string, string, string); + snprintfz(final, 9000, fmt, string, string, string, string); + + const char *s = buffer_tostring(wb); + + if(buffer_strlen(wb) != strlen(final) || strcmp(s, final) != 0) { + fprintf(stderr, "\nbuffer_sprintf() is faulty.\n"); + fprintf(stderr, "\nstring : %s (length %zu)\n", string, strlen(string)); + fprintf(stderr, "\nbuffer : %s (length %zu)\n", s, buffer_strlen(wb)); + fprintf(stderr, "\nexpected: %s (length %zu)\n", final, strlen(final)); + buffer_free(wb); + return -1; + } + + fprintf(stderr, "buffer_sprintf() works as expected.\n"); + buffer_free(wb); + return 0; +} + +int unit_test_static_threads() { + struct netdata_static_thread *static_threads = static_threads_get(); + + /* + * make sure enough static threads have been registered + */ + if (!static_threads) { + fprintf(stderr, "empty static_threads array\n"); + return 1; + } + + int n; + for (n = 0; static_threads[n].start_routine != NULL; n++) {} + + if (n < 2) { + fprintf(stderr, "only %d static threads registered", n); + freez(static_threads); + return 1; + } + + /* + * verify that each thread's start routine is unique. + */ + for (int i = 0; i != n - 1; i++) { + for (int j = i + 1; j != n; j++) { + if (static_threads[i].start_routine != static_threads[j].start_routine) + continue; + + fprintf(stderr, "Found duplicate threads with name: %s\n", static_threads[i].name); + freez(static_threads); + return 1; + } + } + + freez(static_threads); + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- + +struct feed_values { + unsigned long long microseconds; + collected_number value; +}; + +struct test { + char name[100]; + char description[1024]; + + int update_every; + unsigned long long multiplier; + unsigned long long divisor; + RRD_ALGORITHM algorithm; + + unsigned long feed_entries; + unsigned long result_entries; + struct feed_values *feed; + NETDATA_DOUBLE *results; + + collected_number *feed2; + NETDATA_DOUBLE *results2; +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test1 +// test absolute values stored + +struct feed_values test1_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test1_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test1 = { + "test1", // name + "test absolute values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test1_feed, // feed + test1_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test2 +// test absolute values stored in the middle of second boundaries + +struct feed_values test2_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test2_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test2 = { + "test2", // name + "test absolute values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test2_feed, // feed + test2_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test3 + +struct feed_values test3_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test3_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test3 = { + "test3", // name + "test incremental values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test3_feed, // feed + test3_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test4 + +struct feed_values test4_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test4_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test4 = { + "test4", // name + "test incremental values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test4_feed, // feed + test4_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5 - 32 bit overflows + +struct feed_values test5_feed[] = { + { 0, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5_results[] = { + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, +}; + +struct test test5 = { + "test5", // name + "test 32-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5_feed, // feed + test5_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5b - 64 bit overflows + +struct feed_values test5b_feed[] = { + { 0, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5b_results[] = { + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, +}; + +struct test test5b = { + "test5b", // name + "test 64-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5b_feed, // feed + test5b_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test6 + +struct feed_values test6_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test6_results[] = { + 4000, 4000, 4000, 4000 +}; + +struct test test6 = { + "test6", // name + "test incremental values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 16, // feed entries + 4, // result entries + test6_feed, // feed + test6_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test7 + +struct feed_values test7_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, + { 2000000, 7000 }, + { 2000000, 8000 }, + { 2000000, 9000 }, + { 2000000, 10000 }, +}; + +NETDATA_DOUBLE test7_results[] = { + 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 +}; + +struct test test7 = { + "test7", // name + "test incremental values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 18, // result entries + test7_feed, // feed + test7_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test8 + +struct feed_values test8_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, +}; + +NETDATA_DOUBLE test8_results[] = { + 1250, 2000, 2250, 3000, 3250, 4000, 4250, 5000, 5250, 6000 +}; + +struct test test8 = { + "test8", // name + "test absolute values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 6, // feed entries + 10, // result entries + test8_feed, // feed + test8_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test9 + +struct feed_values test9_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test9_results[] = { + 4000, 8000, 12000, 16000 +}; + +struct test test9 = { + "test9", // name + "test absolute values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 16, // feed entries + 4, // result entries + test9_feed, // feed + test9_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test10 + +struct feed_values test10_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test10_results[] = { + 1000, 1000, 1000, 1000, 1000, 1000, 1000 +}; + +struct test test10 = { + "test10", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 7, // result entries + test10_feed, // feed + test10_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test11 + +struct feed_values test11_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test11_feed2[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +NETDATA_DOUBLE test11_results[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +NETDATA_DOUBLE test11_results2[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +struct test test11 = { + "test11", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test11_feed, // feed + test11_results, // results + test11_feed2, // feed2 + test11_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test12 + +struct feed_values test12_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test12_feed2[] = { + 10*3, 20*3, 30*3, 40*3, 50*3, 60*3, 70*3, 80*3, 90*3, 100*3 +}; + +NETDATA_DOUBLE test12_results[] = { + 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; + +NETDATA_DOUBLE test12_results2[] = { + 75, 75, 75, 75, 75, 75, 75, 75, 75 +}; + +struct test test12 = { + "test12", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test12_feed, // feed + test12_results, // results + test12_feed2, // feed2 + test12_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test13 + +struct feed_values test13_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test13_results[] = { + 83.3333300, 100, 100, 100, 100, 100, 100 +}; + +struct test test13 = { + "test13", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 7, // result entries + test13_feed, // feed + test13_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test14 + +struct feed_values test14_feed[] = { + { 0, 0x015397dc42151c41ULL }, + { 13573000, 0x015397e612e3ff5dULL }, + { 29969000, 0x015397f905ecdaa8ULL }, + { 29958000, 0x0153980c2a6cb5e4ULL }, + { 30054000, 0x0153981f4032fb83ULL }, + { 34952000, 0x015398355efadaccULL }, + { 25046000, 0x01539845ba4b09f8ULL }, + { 29947000, 0x0153985948bf381dULL }, + { 30054000, 0x0153986c5b9c27e2ULL }, + { 29942000, 0x0153987f888982d0ULL }, +}; + +NETDATA_DOUBLE test14_results[] = { + 23.1383300, 21.8515600, 21.8804600, 21.7788000, 22.0112200, 22.4386100, 22.0906100, 21.9150800 +}; + +struct test test14 = { + "test14", // name + "issue #981 with real data", + 30, // update_every + 8, // multiplier + 1000000000, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14_feed, // feed + test14_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14b_feed[] = { + { 0, 0 }, + { 13573000, 13573000 }, + { 29969000, 13573000 + 29969000 }, + { 29958000, 13573000 + 29969000 + 29958000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 }, + { 34952000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 }, + { 25046000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 }, + { 29947000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 }, + { 29942000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 + 29942000 }, +}; + +NETDATA_DOUBLE test14b_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14b = { + "test14b", // name + "issue #981 with dummy data", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14b_feed, // feed + test14b_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14c_feed[] = { + { 29000000, 29000000 }, + { 1000000, 29000000 + 1000000 }, + { 30000000, 29000000 + 1000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, +}; + +NETDATA_DOUBLE test14c_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14c = { + "test14c", // name + "issue #981 with dummy data, checking for late start", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test14c_feed, // feed + test14c_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test15 + +struct feed_values test15_feed[] = { + { 0, 1068066388 }, + { 1008752, 1068822698 }, + { 993809, 1069573072 }, + { 995911, 1070324135 }, + { 1014562, 1071078166 }, + { 994684, 1071831349 }, + { 993128, 1072235739 }, + { 1010332, 1072958871 }, + { 1003394, 1073707019 }, + { 995201, 1074460255 }, +}; + +collected_number test15_feed2[] = { + 178825286, 178825286, 178825286, 178825286, 178825498, 178825498, 179165652, 179202964, 179203282, 179204130 +}; + +NETDATA_DOUBLE test15_results[] = { + 5857.4080000, 5898.4540000, 5891.6590000, 5806.3160000, 5914.2640000, 3202.2630000, 5589.6560000, 5822.5260000, 5911.7520000 +}; + +NETDATA_DOUBLE test15_results2[] = { + 0.0000000, 0.0000000, 0.0024944, 1.6324779, 0.0212777, 2655.1890000, 290.5387000, 5.6733610, 6.5960220 +}; + +struct test test15 = { + "test15", // name + "test incremental with 2 dimensions", + 1, // update_every + 8, // multiplier + 1024, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test15_feed, // feed + test15_results, // results + test15_feed2, // feed2 + test15_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- + +int run_test(struct test *test) +{ + fprintf(stderr, "\nRunning test '%s':\n%s\n", test->name, test->description); + + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = test->update_every; + + char name[101]; + snprintfz(name, 100, "unittest-%s", test->name); + + // create the chart + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1 + , test->update_every, RRDSET_TYPE_LINE); + RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); + + RRDDIM *rd2 = NULL; + if(test->feed2) + rd2 = rrddim_add(st, "dim2", NULL, test->multiplier, test->divisor, test->algorithm); + + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + // feed it with the test data + time_t time_now = 0, time_start = now_realtime_sec(); + unsigned long c; + collected_number last = 0; + for(c = 0; c < test->feed_entries; c++) { + if(debug_flags) fprintf(stderr, "\n\n"); + + if(c) { + time_now += test->feed[c].microseconds; + fprintf(stderr, " > %s: feeding position %lu, after %0.3f seconds (%0.3f seconds from start), delta " NETDATA_DOUBLE_FORMAT + ", rate " NETDATA_DOUBLE_FORMAT "\n", + test->name, c+1, + (float)test->feed[c].microseconds / 1000000.0, + (float)time_now / 1000000.0, + ((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor, + (((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor) / (NETDATA_DOUBLE)test->feed[c].microseconds * (NETDATA_DOUBLE)1000000); + + // rrdset_next_usec_unfiltered(st, test->feed[c].microseconds); + st->usec_since_last_update = test->feed[c].microseconds; + } + else { + fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); + } + + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd), test->feed[c].value); + rrddim_set(st, "dim1", test->feed[c].value); + last = test->feed[c].value; + + if(rd2) { + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd2), test->feed2[c]); + rrddim_set(st, "dim2", test->feed2[c]); + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, false); + + // align the first entry to second boundary + if(!c) { + fprintf(stderr, " > %s: fixing first collection time to be %llu microseconds to second boundary\n", test->name, test->feed[c].microseconds); + rd->last_collected_time.tv_usec = st->last_collected_time.tv_usec = st->last_updated.tv_usec = test->feed[c].microseconds; + // time_start = st->last_collected_time.tv_sec; + } + } + + // check the result + int errors = 0; + + if(st->counter != test->result_entries) { + fprintf(stderr, " %s stored %zu entries, but we were expecting %lu, ### E R R O R ###\n", test->name, st->counter, test->result_entries); + errors++; + } + + unsigned long max = (st->counter < test->result_entries)?st->counter:test->result_entries; + for(c = 0 ; c < max ; c++) { + NETDATA_DOUBLE v = unpack_storage_number(rd->db[c]); + NETDATA_DOUBLE n = unpack_storage_number(pack_storage_number(test->results[c], SN_DEFAULT_FLAGS)); + int same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd), c+1, + (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + + if(!same) errors++; + + if(rd2) { + v = unpack_storage_number(rd2->db[c]); + n = test->results2[c]; + same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd2), c+1, + (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + } + } + + return errors; +} + +static int test_variable_renames(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + fprintf(stderr, "Creating chart\n"); + RRDSET *st = rrdset_create_localhost("chart", "ID", NULL, "family", "context", "Unit Testing", "a value", "unittest", NULL, 1, 1, RRDSET_TYPE_LINE); + fprintf(stderr, "Created chart with id '%s', name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Creating dimension DIM1\n"); + RRDDIM *rd1 = rrddim_add(st, "DIM1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Creating dimension DIM2\n"); + RRDDIM *rd2 = rrddim_add(st, "DIM2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming chart to CHARTNAME1\n"); + rrdset_reset_name(st, "CHARTNAME1"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming chart to CHARTNAME2\n"); + rrdset_reset_name(st, "CHARTNAME2"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME1\n"); + rrddim_reset_name(st, rd1, "DIM1NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME2\n"); + rrddim_reset_name(st, rd1, "DIM1NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME1\n"); + rrddim_reset_name(st, rd2, "DIM2NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME2\n"); + rrddim_reset_name(st, rd2, "DIM2NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + BUFFER *buf = buffer_create(1); + health_api_v1_chart_variables2json(st, buf); + fprintf(stderr, "%s", buffer_tostring(buf)); + buffer_free(buf); + return 1; +} + +int check_strdupz_path_subpath() { + + struct strdupz_path_subpath_checks { + const char *path; + const char *subpath; + const char *result; + } checks[] = { + { "", "", "." }, + { "/", "", "/" }, + { "/etc/netdata", "", "/etc/netdata" }, + { "/etc/netdata///", "", "/etc/netdata" }, + { "/etc/netdata///", "health.d", "/etc/netdata/health.d" }, + { "/etc/netdata///", "///health.d", "/etc/netdata/health.d" }, + { "/etc/netdata", "///health.d", "/etc/netdata/health.d" }, + { "", "///health.d", "./health.d" }, + { "/", "///health.d", "/health.d" }, + + // terminator + { NULL, NULL, NULL } + }; + + size_t i; + for(i = 0; checks[i].result ; i++) { + char *s = strdupz_path_subpath(checks[i].path, checks[i].subpath); + fprintf(stderr, "strdupz_path_subpath(\"%s\", \"%s\") = \"%s\": ", checks[i].path, checks[i].subpath, s); + if(!s || strcmp(s, checks[i].result) != 0) { + freez(s); + fprintf(stderr, "FAILED\n"); + return 1; + } + else { + freez(s); + fprintf(stderr, "OK\n"); + } + } + + return 0; +} + +int run_all_mockup_tests(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + if(check_strdupz_path_subpath()) + return 1; + + if(check_number_printing()) + return 1; + + if(check_rrdcalc_comparisons()) + return 1; + + if(!test_variable_renames()) + return 1; + + if(run_test(&test1)) + return 1; + + if(run_test(&test2)) + return 1; + + if(run_test(&test3)) + return 1; + + if(run_test(&test4)) + return 1; + + if(run_test(&test5)) + return 1; + + if(run_test(&test5b)) + return 1; + + if(run_test(&test6)) + return 1; + + if(run_test(&test7)) + return 1; + + if(run_test(&test8)) + return 1; + + if(run_test(&test9)) + return 1; + + if(run_test(&test10)) + return 1; + + if(run_test(&test11)) + return 1; + + if(run_test(&test12)) + return 1; + + if(run_test(&test13)) + return 1; + + if(run_test(&test14)) + return 1; + + if(run_test(&test14b)) + return 1; + + if(run_test(&test14c)) + return 1; + + if(run_test(&test15)) + return 1; + + + + return 0; +} + +int unit_test(long delay, long shift) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + static int repeat = 0; + repeat++; + + char name[101]; + snprintfz(name, 100, "unittest-%d-%ld-%ld", repeat, delay, shift); + + //debug_flags = 0xffffffff; + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = 1; + + int do_abs = 1; + int do_inc = 1; + int do_abst = 0; + int do_absi = 0; + + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1, 1 + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + RRDDIM *rdabs = NULL; + RRDDIM *rdinc = NULL; + RRDDIM *rdabst = NULL; + RRDDIM *rdabsi = NULL; + + if(do_abs) rdabs = rrddim_add(st, "absolute", "absolute", 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(do_inc) rdinc = rrddim_add(st, "incremental", "incremental", 1, 1, RRD_ALGORITHM_INCREMENTAL); + if(do_abst) rdabst = rrddim_add(st, "percentage-of-absolute-row", "percentage-of-absolute-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + if(do_absi) rdabsi = rrddim_add(st, "percentage-of-incremental-row", "percentage-of-incremental-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + + long increment = 1000; + collected_number i = 0; + + unsigned long c, dimensions = rrdset_number_of_dimensions(st); + RRDDIM *rd; + + for(c = 0; c < 20 ;c++) { + i += increment; + + fprintf(stderr, "\n\nLOOP = %lu, DELAY = %ld, VALUE = " COLLECTED_NUMBER_FORMAT "\n", c, delay, i); + if(c) { + // rrdset_next_usec_unfiltered(st, delay); + st->usec_since_last_update = delay; + } + if(do_abs) rrddim_set(st, "absolute", i); + if(do_inc) rrddim_set(st, "incremental", i); + if(do_abst) rrddim_set(st, "percentage-of-absolute-row", i); + if(do_absi) rrddim_set(st, "percentage-of-incremental-row", i); + + if(!c) { + now_realtime_timeval(&st->last_collected_time); + st->last_collected_time.tv_usec = shift; + } + + // prevent it from deleting the dimensions + rrddim_foreach_read(rd, st) { + rd->last_collected_time.tv_sec = st->last_collected_time.tv_sec; + } + rrddim_foreach_done(rd); + + rrdset_done(st); + } + + unsigned long oincrement = increment; + increment = increment * st->update_every * 1000000 / delay; + fprintf(stderr, "\n\nORIGINAL INCREMENT: %lu, INCREMENT %ld, DELAY %ld, SHIFT %ld\n", oincrement * 10, increment * 10, delay, shift); + + int ret = 0; + storage_number sn; + NETDATA_DOUBLE cn, v; + for(c = 0 ; c < st->counter ; c++) { + fprintf(stderr, "\nPOSITION: c = %lu, EXPECTED VALUE %lu\n", c, (oincrement + c * increment + increment * (1000000 - shift) / 1000000 )* 10); + + rrddim_foreach_read(rd, st) { + sn = rd->db[c]; + cn = unpack_storage_number(sn); + fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rrddim_id(rd), cn, sn); + + if(rd == rdabs) v = + ( oincrement + // + (increment * (1000000 - shift) / 1000000) + + (c + 1) * increment + ); + + else if(rd == rdinc) v = (c?(increment):(increment * (1000000 - shift) / 1000000)); + else if(rd == rdabst) v = oincrement / dimensions / 10; + else if(rd == rdabsi) v = oincrement / dimensions / 10; + else v = 0; + + if(v == cn) fprintf(stderr, "passed.\n"); + else { + fprintf(stderr, "ERROR! (expected " NETDATA_DOUBLE_FORMAT ")\n", v); + ret = 1; + } + } + rrddim_foreach_done(rd); + } + + if(ret) + fprintf(stderr, "\n\nUNIT TEST(%ld, %ld) FAILED\n\n", delay, shift); + + return ret; +} + +int test_sqlite(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + sqlite3 *db_meta; + fprintf(stderr, "Testing SQLIte\n"); + + int rc = sqlite3_open(":memory:", &db_meta); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: DB init failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_meta, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Create table failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_meta, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_meta, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n"); + return 1; + } + + BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); + char *uuid_str = "0000_000"; + + buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); + rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + buffer_flush(sql); + + buffer_sprintf(sql, INDEX_ACLK_ALERT, uuid_str, uuid_str); + rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + buffer_flush(sql); + + buffer_free(sql); + fprintf(stderr,"SQLite is OK\n"); + rc = sqlite3_close_v2(db_meta); + return 0; +error: + rc = sqlite3_close_v2(db_meta); + fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql)); + buffer_free(sql); + fprintf(stderr,"SQLite tests failed\n"); + return 1; +} + +int unit_test_bitmap256(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + BITMAP256 test_bitmap = {0}; + + bitmap256_set_bit(&test_bitmap, 0, 1); + bitmap256_set_bit(&test_bitmap, 64, 1); + bitmap256_set_bit(&test_bitmap, 128, 1); + bitmap256_set_bit(&test_bitmap, 192, 1); + if (test_bitmap.data[0] == 1) + fprintf(stderr, "%s() INDEX 1 is OK\n", __FUNCTION__ ); + if (test_bitmap.data[1] == 1) + fprintf(stderr, "%s() INDEX 65 is OK\n", __FUNCTION__ ); + if (test_bitmap.data[2] == 1) + fprintf(stderr, "%s() INDEX 129 is OK\n", __FUNCTION__ ); + if (test_bitmap.data[3] == 1) + fprintf(stderr, "%s() INDEX 192 is OK\n", __FUNCTION__ ); + + uint8_t i=0; + int j = 0; + do { + bitmap256_set_bit(&test_bitmap, i++, 1); + j++; + } while (j < 256); + + if (test_bitmap.data[0] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 0 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 0 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[1] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 1 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 1 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[2] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 2 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 2 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[3] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 3 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 3 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + i = 0; + j = 0; + do { + bitmap256_set_bit(&test_bitmap, i++, 0); + j++; + } while (j < 256); + + if (test_bitmap.data[0] == 0) + fprintf(stderr, "%s() INDEX 0 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 0 is not reset FAILED\n", __FUNCTION__); + return 1; + } + if (test_bitmap.data[1] == 0) + fprintf(stderr, "%s() INDEX 1 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 1 is not reset FAILED\n", __FUNCTION__); + return 1; + } + + if (test_bitmap.data[2] == 0) + fprintf(stderr, "%s() INDEX 2 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 2 is not reset FAILED\n", __FUNCTION__); + return 1; + } + + if (test_bitmap.data[3] == 0) + fprintf(stderr, "%s() INDEX 3 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 3 is not reset FAILED\n", __FUNCTION__); + return 1; + } + + i=0; + j = 0; + do { + bitmap256_set_bit(&test_bitmap, i, 1); + i += 4; + j += 4; + } while (j < 256); + + if (test_bitmap.data[0] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 0 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 0 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[1] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 1 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 1 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[1]); + return 1; + } + + if (test_bitmap.data[2] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 2 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 2 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[2]); + return 1; + } + + if (test_bitmap.data[3] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 3 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 3 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[3]); + return 1; + } + + fprintf(stderr, "%s() tests passed\n", __FUNCTION__); + return 0; +} + +#ifdef ENABLE_DBENGINE +static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number value, time_t now) +{ + rd->last_collected_time.tv_sec = now; + rd->last_collected_time.tv_usec = 0; + rd->collected_value = value; + rd->updated = 1; + + rd->collections_counter++; + + collected_number v = (value >= 0) ? value : -value; + if(unlikely(v > rd->collected_value_max)) rd->collected_value_max = v; +} + +static RRDHOST *dbengine_rrdhost_find_or_create(char *name) +{ + /* We don't want to drop metrics when generating load, we prefer to block data generation itself */ + rrdeng_drop_metrics_under_page_cache_pressure = 0; + + return rrdhost_find_or_create( + name + , name + , name + , os_type + , netdata_configured_timezone + , netdata_configured_abbrev_timezone + , netdata_configured_utc_offset + , "" + , program_name + , program_version + , default_rrd_update_every + , default_rrd_history_entries + , RRD_MEMORY_MODE_DBENGINE + , default_health_enabled + , default_rrdpush_enabled + , default_rrdpush_destination + , default_rrdpush_api_key + , default_rrdpush_send_charts_matching + , default_rrdpush_enable_replication + , default_rrdpush_seconds_to_replicate + , default_rrdpush_replication_step + , NULL + , 0 + ); +} + +// constants for test_dbengine +static const int CHARTS = 64; +static const int DIMS = 16; // That gives us 64 * 16 = 1024 metrics +#define REGIONS (3) // 3 regions of update_every +// first region update_every is 2, second is 3, third is 1 +static const int REGION_UPDATE_EVERY[REGIONS] = {2, 3, 1}; +static const int REGION_POINTS[REGIONS] = { + 16384, // This produces 64MiB of metric data for the first region: update_every = 2 + 16384, // This produces 64MiB of metric data for the second region: update_every = 3 + 16384, // This produces 64MiB of metric data for the third region: update_every = 1 +}; +static const int QUERY_BATCH = 4096; + +static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int update_every) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + int i, j; + char name[101]; + + for (i = 0 ; i < CHARTS ; ++i) { + snprintfz(name, 100, "dbengine-chart-%d", i); + + // create the chart + st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", + NULL, 1, update_every, RRDSET_TYPE_LINE); + rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG); + rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST); + for (j = 0 ; j < DIMS ; ++j) { + snprintfz(name, 100, "dim-%d", j); + + rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + } + + // Initialize DB with the very first entries + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0 ; j < DIMS ; ++j) { + rd[i][j]->last_collected_time.tv_sec = + st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = 2 * API_RELATIVE_TIME_MAX - 1; + rd[i][j]->last_collected_time.tv_usec = + st[i]->last_collected_time.tv_usec = st[i]->last_updated.tv_usec = 0; + } + } + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->usec_since_last_update = USEC_PER_SEC; + + for (j = 0; j < DIMS; ++j) { + rrddim_set_by_pointer_fake_time(rd[i][j], 69, 2 * API_RELATIVE_TIME_MAX); // set first value to 69 + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st[i], now, false); + } + // Fluh pages for subsequent real values + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + } + } +} + +// Feeds the database region with test data, returns last timestamp of region +static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int current_region, time_t time_start) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + time_t time_now; + int i, j, c, update_every; + collected_number next; + + update_every = REGION_UPDATE_EVERY[current_region]; + time_now = time_start; + // feed it with the test data + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0 ; j < DIMS ; ++j) { + rd[i][j]->tiers[0]->collect_ops->change_collection_frequency(rd[i][j]->tiers[0]->db_collection_handle, update_every); + + rd[i][j]->last_collected_time.tv_sec = + st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = time_now; + rd[i][j]->last_collected_time.tv_usec = + st[i]->last_collected_time.tv_usec = st[i]->last_updated.tv_usec = 0; + } + } + for (c = 0; c < REGION_POINTS[current_region] ; ++c) { + time_now += update_every; // time_now = start + (c + 1) * update_every + + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->usec_since_last_update = USEC_PER_SEC * update_every; + + for (j = 0; j < DIMS; ++j) { + next = ((collected_number)i * DIMS) * REGION_POINTS[current_region] + + j * REGION_POINTS[current_region] + c; + rrddim_set_by_pointer_fake_time(rd[i][j], next, time_now); + } + + struct timeval now; + now.tv_sec = time_now; + now.tv_usec = 0; + + rrdset_timed_done(st[i], now, false); + } + } + return time_now; //time_end +} + +// Checks the metric data for the given region, returns number of errors +static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int current_region, time_t time_start) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + uint8_t same; + time_t time_now, time_retrieved, end_time; + int i, j, k, c, errors, update_every; + collected_number last; + NETDATA_DOUBLE value, expected; + struct storage_engine_query_handle handle; + size_t value_errors = 0, time_errors = 0; + + update_every = REGION_UPDATE_EVERY[current_region]; + errors = 0; + + // check the result + for (c = 0; c < REGION_POINTS[current_region] ; c += QUERY_BATCH) { + time_now = time_start + (c + 1) * update_every; + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0; j < DIMS; ++j) { + rd[i][j]->tiers[0]->query_ops->init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every); + for (k = 0; k < QUERY_BATCH; ++k) { + last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] + + j * REGION_POINTS[current_region] + c + k; + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); + + STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops->next_metric(&handle); + value = sp.sum; + time_retrieved = sp.start_time; + end_time = sp.end_time; + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if(!same) { + if(!value_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now + k * update_every, expected, value); + value_errors++; + errors++; + } + if(end_time != time_now + k * update_every) { + if(!time_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now + k * update_every, (unsigned long)time_retrieved); + time_errors++; + errors++; + } + } + rd[i][j]->tiers[0]->query_ops->finalize(&handle); + } + } + } + + if(value_errors) + fprintf(stderr, "%zu value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered\n", time_errors); + + return errors; +} + +// Check rrdr transformations +static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int current_region, time_t time_start, time_t time_end) +{ + int update_every = REGION_UPDATE_EVERY[current_region]; + fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d...\n", __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every); + uint8_t same; + time_t time_now, time_retrieved; + int i, j, errors, value_errors = 0, time_errors = 0; + long c; + collected_number last; + NETDATA_DOUBLE value, expected; + + errors = 0; + long points = (time_end - time_start) / update_every; + for (i = 0 ; i < CHARTS ; ++i) { + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start, time_end, + RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS, + NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + if (!r) { + fprintf(stderr, " DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", rrdset_name(st[i]), current_region); + return ++errors; + } else { + assert(r->internal.qt->request.st == st[i]); + for (c = 0; c != (long)rrdr_rows(r) ; ++c) { + RRDDIM *d; + time_now = time_start + (c + 1) * update_every; + time_retrieved = r->t[c]; + + // for each dimension + rrddim_foreach_read(d, r->internal.qt->request.st) { + if(unlikely(d_dfe.counter >= r->d)) break; // d_counter is provided by the dictionary dfe + + j = (int)d_dfe.counter; + + NETDATA_DOUBLE *cn = &r->v[ c * r->d ]; + value = cn[j]; + assert(rd[i][j] == d); + + last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c; + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if(!same) { + if(value_errors < 20) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); + value_errors++; + } + if(time_retrieved != time_now) { + if(time_errors < 20) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); + time_errors++; + } + } + rrddim_foreach_done(d); + } + rrdr_free(owa, r); + } + onewayalloc_destroy(owa); + } + + if(value_errors) + fprintf(stderr, "%d value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%d time errors encountered\n", time_errors); + + return errors + value_errors + time_errors; +} + +int test_dbengine(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + int i, j, errors, value_errors = 0, time_errors = 0, update_every, current_region; + RRDHOST *host = NULL; + RRDSET *st[CHARTS]; + RRDDIM *rd[CHARTS][DIMS]; + time_t time_start[REGIONS], time_end[REGIONS]; + + error_log_limit_unlimited(); + fprintf(stderr, "\nRunning DB-engine test\n"); + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + + fprintf(stderr, "Initializing localhost with hostname 'unittest-dbengine'"); + host = dbengine_rrdhost_find_or_create("unittest-dbengine"); + if (NULL == host) + return 1; + + current_region = 0; // this is the first region of data + update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 2 seconds + test_dbengine_create_charts(host, st, rd, update_every); + + time_start[current_region] = 2 * API_RELATIVE_TIME_MAX; + time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + if (errors) + goto error_out; + + current_region = 1; //this is the second region of data + update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds + // Align pages for frequency change + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->update_every = update_every; + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + } + } + + time_start[current_region] = time_end[current_region - 1] + update_every; + if (0 != time_start[current_region] % update_every) // align to update_every + time_start[current_region] += update_every - time_start[current_region] % update_every; + time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + if (errors) + goto error_out; + + current_region = 2; //this is the third region of data + update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds + // Align pages for frequency change + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->update_every = update_every; + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + } + } + + time_start[current_region] = time_end[current_region - 1] + update_every; + if (0 != time_start[current_region] % update_every) // align to update_every + time_start[current_region] += update_every - time_start[current_region] % update_every; + time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + if (errors) + goto error_out; + + for (current_region = 0 ; current_region < REGIONS ; ++current_region) { + errors = test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]); + if (errors) + goto error_out; + } + + current_region = 1; + update_every = REGION_UPDATE_EVERY[current_region]; // use the maximum update_every = 3 + errors = 0; + long points = (time_end[REGIONS - 1] - time_start[0]) / update_every; // cover all time regions with RRDR + long point_offset = (time_start[current_region] - time_start[0]) / update_every; + for (i = 0 ; i < CHARTS ; ++i) { + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start[0] + update_every, + time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, + RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + + if (!r) { + fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", rrdset_name(st[i])); + ++errors; + } else { + long c; + + assert(r->internal.qt->request.st == st[i]); + // test current region values only, since they must be left unchanged + for (c = point_offset ; c < (long)(point_offset + rrdr_rows(r) / REGIONS / 2) ; ++c) { + RRDDIM *d; + time_t time_now = time_start[current_region] + (c - point_offset + 2) * update_every; + time_t time_retrieved = r->t[c]; + + // for each dimension + rrddim_foreach_read(d, r->internal.qt->request.st) { + if(unlikely(d_dfe.counter >= r->d)) break; // d_counter is provided by the dictionary dfe + + j = (int)d_dfe.counter; + + NETDATA_DOUBLE *cn = &r->v[ c * r->d ]; + NETDATA_DOUBLE value = cn[j]; + assert(rd[i][j] == d); + + collected_number last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c - point_offset + 1; + NETDATA_DOUBLE expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); + + uint8_t same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if(!same) { + if(!value_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); + value_errors++; + } + if(time_retrieved != time_now) { + if(!time_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); + time_errors++; + } + } + rrddim_foreach_done(d); + } + rrdr_free(owa, r); + } + onewayalloc_destroy(owa); + } +error_out: + rrd_wrlock(); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); + rrdhost_delete_charts(host); + rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); + rrd_unlock(); + + return errors + value_errors + time_errors; +} + +struct dbengine_chart_thread { + uv_thread_t thread; + RRDHOST *host; + char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */ + unsigned dset_charts; /* number of charts */ + unsigned dset_dims; /* dimensions per chart */ + unsigned chart_i; /* current chart offset */ + time_t time_present; /* current virtual time of the benchmark */ + volatile time_t time_max; /* latest timestamp of stored values */ + unsigned history_seconds; /* how far back in the past to go */ + + volatile long done; /* initialize to 0, set to 1 to stop thread */ + struct completion charts_initialized; + unsigned long errors, stored_metrics_nr; /* statistics */ + + RRDSET *st; + RRDDIM *rd[]; /* dset_dims elements */ +}; + +collected_number generate_dbengine_chart_value(int chart_i, int dim_i, time_t time_current) +{ + collected_number value; + + value = ((collected_number)time_current) * (chart_i + 1); + value += ((collected_number)time_current) * (dim_i + 1); + value %= 1024LLU; + + return value; +} + +static void generate_dbengine_chart(void *arg) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + struct dbengine_chart_thread *thread_info = (struct dbengine_chart_thread *)arg; + RRDHOST *host = thread_info->host; + char *chartname = thread_info->chartname; + const unsigned DSET_DIMS = thread_info->dset_dims; + unsigned history_seconds = thread_info->history_seconds; + time_t time_present = thread_info->time_present; + + unsigned j, update_every = 1; + RRDSET *st; + RRDDIM *rd[DSET_DIMS]; + char name[RRD_ID_LENGTH_MAX + 1]; + time_t time_current; + + // create the chart + snprintfz(name, RRD_ID_LENGTH_MAX, "example_local%u", thread_info->chart_i + 1); + thread_info->st = st = rrdset_create(host, name, chartname, chartname, "example", NULL, chartname, chartname, + chartname, NULL, 1, update_every, RRDSET_TYPE_LINE); + for (j = 0 ; j < DSET_DIMS ; ++j) { + snprintfz(name, RRD_ID_LENGTH_MAX, "%s%u", chartname, j + 1); + + thread_info->rd[j] = rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + completion_mark_complete(&thread_info->charts_initialized); + + // feed it with the test data + time_current = time_present - history_seconds; + for (j = 0 ; j < DSET_DIMS ; ++j) { + rd[j]->last_collected_time.tv_sec = + st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current - update_every; + rd[j]->last_collected_time.tv_usec = + st->last_collected_time.tv_usec = st->last_updated.tv_usec = 0; + } + for( ; !thread_info->done && time_current < time_present ; time_current += update_every) { + st->usec_since_last_update = USEC_PER_SEC * update_every; + + for (j = 0; j < DSET_DIMS; ++j) { + collected_number value; + + value = generate_dbengine_chart_value(thread_info->chart_i, j, time_current); + rrddim_set_by_pointer_fake_time(rd[j], value, time_current); + ++thread_info->stored_metrics_nr; + } + rrdset_done(st); + thread_info->time_max = time_current; + } + for (j = 0; j < DSET_DIMS; ++j) { + rrdeng_store_metric_finalize((rd[j])->tiers[0]->db_collection_handle); + } +} + +void generate_dbengine_dataset(unsigned history_seconds) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + const int DSET_CHARTS = 16; + const int DSET_DIMS = 128; + const uint64_t EXPECTED_COMPRESSION_RATIO = 20; + RRDHOST *host = NULL; + struct dbengine_chart_thread **thread_info; + int i; + time_t time_present; + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + default_rrdeng_page_cache_mb = 128; + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * history_seconds) / + (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + + error_log_limit_unlimited(); + fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'"); + + host = dbengine_rrdhost_find_or_create("dbengine-dataset"); + if (NULL == host) + return; + + thread_info = mallocz(sizeof(*thread_info) * DSET_CHARTS); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + thread_info[i] = mallocz(sizeof(*thread_info[i]) + sizeof(RRDDIM *) * DSET_DIMS); + } + fprintf(stderr, "\nRunning DB-engine workload generator\n"); + + time_present = now_realtime_sec(); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + thread_info[i]->host = host; + thread_info[i]->chartname = "random"; + thread_info[i]->dset_charts = DSET_CHARTS; + thread_info[i]->chart_i = i; + thread_info[i]->dset_dims = DSET_DIMS; + thread_info[i]->history_seconds = history_seconds; + thread_info[i]->time_present = time_present; + thread_info[i]->time_max = 0; + thread_info[i]->done = 0; + completion_init(&thread_info[i]->charts_initialized); + assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i])); + completion_wait_for(&thread_info[i]->charts_initialized); + completion_destroy(&thread_info[i]->charts_initialized); + } + for (i = 0 ; i < DSET_CHARTS ; ++i) { + assert(0 == uv_thread_join(&thread_info[i]->thread)); + } + + for (i = 0 ; i < DSET_CHARTS ; ++i) { + freez(thread_info[i]); + } + freez(thread_info); + rrd_wrlock(); + rrdhost_free(host, 1); + rrd_unlock(); +} + +struct dbengine_query_thread { + uv_thread_t thread; + RRDHOST *host; + char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */ + unsigned dset_charts; /* number of charts */ + unsigned dset_dims; /* dimensions per chart */ + time_t time_present; /* current virtual time of the benchmark */ + unsigned history_seconds; /* how far back in the past to go */ + volatile long done; /* initialize to 0, set to 1 to stop thread */ + unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */ + uint8_t delete_old_data; /* if non zero then data are deleted when disk space is exhausted */ + + struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */ +}; + +static void query_dbengine_chart(void *arg) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg; + const int DSET_CHARTS = thread_info->dset_charts; + const int DSET_DIMS = thread_info->dset_dims; + time_t time_after, time_before, time_min, time_approx_min, time_max, duration; + int i, j, update_every = 1; + RRDSET *st; + RRDDIM *rd; + uint8_t same; + time_t time_now, time_retrieved, end_time; + collected_number generatedv; + NETDATA_DOUBLE value, expected; + struct storage_engine_query_handle handle; + size_t value_errors = 0, time_errors = 0; + + do { + // pick a chart and dimension + i = random() % DSET_CHARTS; + st = thread_info->chart_threads[i]->st; + j = random() % DSET_DIMS; + rd = thread_info->chart_threads[i]->rd[j]; + + time_min = thread_info->time_present - thread_info->history_seconds + 1; + time_max = thread_info->chart_threads[i]->time_max; + + if (thread_info->delete_old_data) { + /* A time window of twice the disk space is sufficient for compression space savings of up to 50% */ + time_approx_min = time_max - (default_rrdeng_disk_quota_mb * 2 * 1024 * 1024) / + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number)); + time_min = MAX(time_min, time_approx_min); + } + if (!time_max) { + time_before = time_after = time_min; + } else { + time_after = time_min + random() % (MAX(time_max - time_min, 1)); + duration = random() % 3600; + time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */ + } + + rd->tiers[0]->query_ops->init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before); + ++thread_info->queries_nr; + for (time_now = time_after ; time_now <= time_before ; time_now += update_every) { + generatedv = generate_dbengine_chart_value(i, j, time_now); + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS)); + + if (unlikely(rd->tiers[0]->query_ops->is_finished(&handle))) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found data gap, ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); + ++thread_info->errors; + } + break; + } + + STORAGE_POINT sp = rd->tiers[0]->query_ops->next_metric(&handle); + value = sp.sum; + time_retrieved = sp.start_time; + end_time = sp.end_time; + + if (!netdata_double_isnumber(value)) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found data gap, ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); + ++thread_info->errors; + } + break; + } + ++thread_info->queried_metrics_nr; + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if (!same) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + if(!value_errors) + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected, value); + value_errors++; + thread_info->errors++; + } + } + if (end_time != time_now) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + if(!time_errors) + fprintf(stderr, + " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, (unsigned long) time_retrieved); + time_errors++; + thread_info->errors++; + } + } + } + rd->tiers[0]->query_ops->finalize(&handle); + } while(!thread_info->done); + + if(value_errors) + fprintf(stderr, "%zu value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered\n", time_errors); +} + +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + const unsigned DSET_DIMS = 128; + const uint64_t EXPECTED_COMPRESSION_RATIO = 20; + const unsigned HISTORY_SECONDS = 3600 * 24 * 365 * 50; /* 50 year of history */ + RRDHOST *host = NULL; + struct dbengine_chart_thread **chart_threads; + struct dbengine_query_thread **query_threads; + unsigned i, j; + time_t time_start, test_duration; + + error_log_limit_unlimited(); + + if (!TEST_DURATION_SEC) + TEST_DURATION_SEC = 10; + if (!DSET_CHARTS) + DSET_CHARTS = 1; + if (!QUERY_THREADS) + QUERY_THREADS = 1; + if (PAGE_CACHE_MB < RRDENG_MIN_PAGE_CACHE_SIZE_MB) + PAGE_CACHE_MB = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + default_rrdeng_page_cache_mb = PAGE_CACHE_MB; + if (DISK_SPACE_MB) { + fprintf(stderr, "By setting disk space limit data are allowed to be deleted. " + "Data validation is turned off for this run.\n"); + default_rrdeng_disk_quota_mb = DISK_SPACE_MB; + } else { + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + } + + fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n"); + + (void) sql_init_database(DB_CHECK_NONE, 1); + host = dbengine_rrdhost_find_or_create("dbengine-stress-test"); + if (NULL == host) + return; + + chart_threads = mallocz(sizeof(*chart_threads) * DSET_CHARTS); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i] = mallocz(sizeof(*chart_threads[i]) + sizeof(RRDDIM *) * DSET_DIMS); + } + query_threads = mallocz(sizeof(*query_threads) * QUERY_THREADS); + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i] = mallocz(sizeof(*query_threads[i]) + sizeof(struct dbengine_chart_thread *) * DSET_CHARTS); + } + fprintf(stderr, "\nRunning DB-engine stress test, %u seconds writers ramp-up time,\n" + "%u seconds of concurrent readers and writers, %u writer threads, %u reader threads,\n" + "%u MiB of page cache.\n", + RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB); + + time_start = now_realtime_sec() + HISTORY_SECONDS; /* move history to the future */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i]->host = host; + chart_threads[i]->chartname = "random"; + chart_threads[i]->dset_charts = DSET_CHARTS; + chart_threads[i]->chart_i = i; + chart_threads[i]->dset_dims = DSET_DIMS; + chart_threads[i]->history_seconds = HISTORY_SECONDS; + chart_threads[i]->time_present = time_start; + chart_threads[i]->time_max = 0; + chart_threads[i]->done = 0; + chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0; + completion_init(&chart_threads[i]->charts_initialized); + assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i])); + } + /* barrier so that subsequent queries can access valid chart data */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + completion_wait_for(&chart_threads[i]->charts_initialized); + completion_destroy(&chart_threads[i]->charts_initialized); + } + sleep(RAMP_UP_SECONDS); + /* at this point data have already began being written to the database */ + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i]->host = host; + query_threads[i]->chartname = "random"; + query_threads[i]->dset_charts = DSET_CHARTS; + query_threads[i]->dset_dims = DSET_DIMS; + query_threads[i]->history_seconds = HISTORY_SECONDS; + query_threads[i]->time_present = time_start; + query_threads[i]->done = 0; + query_threads[i]->errors = query_threads[i]->queries_nr = query_threads[i]->queried_metrics_nr = 0; + for (j = 0 ; j < DSET_CHARTS ; ++j) { + query_threads[i]->chart_threads[j] = chart_threads[j]; + } + query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0; + assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i])); + } + sleep(TEST_DURATION_SEC); + /* stop workload */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i]->done = 1; + } + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i]->done = 1; + } + for (i = 0 ; i < DSET_CHARTS ; ++i) { + assert(0 == uv_thread_join(&chart_threads[i]->thread)); + } + for (i = 0 ; i < QUERY_THREADS ; ++i) { + assert(0 == uv_thread_join(&query_threads[i]->thread)); + } + test_duration = now_realtime_sec() - (time_start - HISTORY_SECONDS); + if (!test_duration) + test_duration = 1; + fprintf(stderr, "\nDB-engine stress test finished in %lld seconds.\n", (long long)test_duration); + unsigned long stored_metrics_nr = 0; + for (i = 0 ; i < DSET_CHARTS ; ++i) { + stored_metrics_nr += chart_threads[i]->stored_metrics_nr; + } + unsigned long queried_metrics_nr = 0; + for (i = 0 ; i < QUERY_THREADS ; ++i) { + queried_metrics_nr += query_threads[i]->queried_metrics_nr; + } + fprintf(stderr, "%u metrics were stored (dataset size of %lu MiB) in %u charts by 1 writer thread per chart.\n", + DSET_CHARTS * DSET_DIMS, stored_metrics_nr * sizeof(storage_number) / (1024 * 1024), DSET_CHARTS); + fprintf(stderr, "Metrics were being generated per 1 emulated second and time was accelerated.\n"); + fprintf(stderr, "%lu metric data points were queried by %u reader threads.\n", queried_metrics_nr, QUERY_THREADS); + fprintf(stderr, "Query starting time is randomly chosen from the beginning of the time-series up to the time of\n" + "the latest data point, and ending time from 1 second up to 1 hour after the starting time.\n"); + fprintf(stderr, "Performance is %lld written data points/sec and %lld read data points/sec.\n", + (long long)(stored_metrics_nr / test_duration), (long long)(queried_metrics_nr / test_duration)); + + for (i = 0 ; i < DSET_CHARTS ; ++i) { + freez(chart_threads[i]); + } + freez(chart_threads); + for (i = 0 ; i < QUERY_THREADS ; ++i) { + freez(query_threads[i]); + } + freez(query_threads); + rrd_wrlock(); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); + rrdhost_delete_charts(host); + rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); + rrd_unlock(); +} + +#endif diff --git a/daemon/unit_test.h b/daemon/unit_test.h new file mode 100644 index 0000000..f79bd5c --- /dev/null +++ b/daemon/unit_test.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_UNIT_TEST_H +#define NETDATA_UNIT_TEST_H 1 + +#include "stdbool.h" + +int unit_test_storage(void); +int unit_test(long delay, long shift); +int run_all_mockup_tests(void); +int unit_test_str2ld(void); +int unit_test_buffer(void); +int unit_test_static_threads(void); +int test_sqlite(void); +int unit_test_bitmap256(void); +#ifdef ENABLE_DBENGINE +int test_dbengine(void); +void generate_dbengine_dataset(unsigned history_seconds); +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB); + +#endif + +bool command_argument_sanitization_tests(); + +#endif /* NETDATA_UNIT_TEST_H */ diff --git a/database/KolmogorovSmirnovDist.c b/database/KolmogorovSmirnovDist.c new file mode 100644 index 0000000..1486abc --- /dev/null +++ b/database/KolmogorovSmirnovDist.c @@ -0,0 +1,788 @@ +// SPDX-License-Identifier: GPL-3.0 + +/******************************************************************** + * + * File: KolmogorovSmirnovDist.c + * Environment: ISO C99 or ANSI C89 + * Author: Richard Simard + * Organization: DIRO, Université de Montréal + * Date: 1 February 2012 + * Version 1.1 + + * Copyright 1 march 2010 by Université de Montréal, + Richard Simard and Pierre L'Ecuyer + ===================================================================== + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, version 3 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + =====================================================================*/ + +#include "KolmogorovSmirnovDist.h" +#include +#include + +#define num_Pi 3.14159265358979323846 /* PI */ +#define num_Ln2 0.69314718055994530941 /* log(2) */ + +/* For x close to 0 or 1, we use the exact formulae of Ruben-Gambino in all + cases. For n <= NEXACT, we use exact algorithms: the Durbin matrix and + the Pomeranz algorithms. For n > NEXACT, we use asymptotic methods + except for x close to 0 where we still use the method of Durbin + for n <= NKOLMO. For n > NKOLMO, we use asymptotic methods only and + so the precision is less for x close to 0. + We could increase the limit NKOLMO to 10^6 to get better precision + for x close to 0, but at the price of a slower speed. */ +#define NEXACT 500 +#define NKOLMO 100000 + +/* The Durbin matrix algorithm for the Kolmogorov-Smirnov distribution */ +static double DurbinMatrix (int n, double d); + + +/*========================================================================*/ +#if 0 + +/* For ANSI C89 only, not for ISO C99 */ +#define MAXI 50 +#define EPSILON 1.0e-15 + +double log1p (double x) +{ + /* returns a value equivalent to log(1 + x) accurate also for small x. */ + if (fabs (x) > 0.1) { + return log (1.0 + x); + } else { + double term = x; + double sum = x; + int s = 2; + while ((fabs (term) > EPSILON * fabs (sum)) && (s < MAXI)) { + term *= -x; + sum += term / s; + s++; + } + return sum; + } +} + +#undef MAXI +#undef EPSILON + +#endif + +/*========================================================================*/ +#define MFACT 30 + +/* The natural logarithm of factorial n! for 0 <= n <= MFACT */ +static double LnFactorial[MFACT + 1] = { + 0., + 0., + 0.6931471805599453, + 1.791759469228055, + 3.178053830347946, + 4.787491742782046, + 6.579251212010101, + 8.525161361065415, + 10.60460290274525, + 12.80182748008147, + 15.10441257307552, + 17.50230784587389, + 19.98721449566188, + 22.55216385312342, + 25.19122118273868, + 27.89927138384088, + 30.67186010608066, + 33.50507345013688, + 36.39544520803305, + 39.33988418719949, + 42.33561646075348, + 45.3801388984769, + 48.47118135183522, + 51.60667556776437, + 54.7847293981123, + 58.00360522298051, + 61.26170176100199, + 64.55753862700632, + 67.88974313718154, + 71.257038967168, + 74.65823634883016 +}; + +/*------------------------------------------------------------------------*/ + +static double getLogFactorial (int n) +{ + /* Returns the natural logarithm of factorial n! */ + if (n <= MFACT) { + return LnFactorial[n]; + + } else { + double x = (double) (n + 1); + double y = 1.0 / (x * x); + double z = ((-(5.95238095238E-4 * y) + 7.936500793651E-4) * y - + 2.7777777777778E-3) * y + 8.3333333333333E-2; + z = ((x - 0.5) * log (x) - x) + 9.1893853320467E-1 + z / x; + return z; + } +} + +/*------------------------------------------------------------------------*/ + +static double rapfac (int n) +{ + /* Computes n! / n^n */ + int i; + double res = 1.0 / n; + for (i = 2; i <= n; i++) { + res *= (double) i / n; + } + return res; +} + + +/*========================================================================*/ + +static double **CreateMatrixD (int N, int M) +{ + int i; + double **T2; + + T2 = (double **) malloc (N * sizeof (double *)); + T2[0] = (double *) malloc ((size_t) N * M * sizeof (double)); + for (i = 1; i < N; i++) + T2[i] = T2[0] + i * M; + return T2; +} + + +static void DeleteMatrixD (double **T) +{ + free (T[0]); + free (T); +} + + +/*========================================================================*/ + +static double KSPlusbarAsymp (int n, double x) +{ + /* Compute the probability of the KS+ distribution using an asymptotic + formula */ + double t = (6.0 * n * x + 1); + double z = t * t / (18.0 * n); + double v = 1.0 - (2.0 * z * z - 4.0 * z - 1.0) / (18.0 * n); + if (v <= 0.0) + return 0.0; + v = v * exp (-z); + if (v >= 1.0) + return 1.0; + return v; +} + + +/*-------------------------------------------------------------------------*/ + +static double KSPlusbarUpper (int n, double x) +{ + /* Compute the probability of the KS+ distribution in the upper tail using + Smirnov's stable formula */ + const double EPSILON = 1.0E-12; + double q; + double Sum = 0.0; + double term; + double t; + double LogCom; + double LOGJMAX; + int j; + int jdiv; + int jmax = (int) (n * (1.0 - x)); + + if (n > 200000) + return KSPlusbarAsymp (n, x); + + /* Avoid log(0) for j = jmax and q ~ 1.0 */ + if ((1.0 - x - (double) jmax / n) <= 0.0) + jmax--; + + if (n > 3000) + jdiv = 2; + else + jdiv = 3; + + j = jmax / jdiv + 1; + LogCom = getLogFactorial (n) - getLogFactorial (j) - + getLogFactorial (n - j); + LOGJMAX = LogCom; + + while (j <= jmax) { + q = (double) j / n + x; + term = LogCom + (j - 1) * log (q) + (n - j) * log1p (-q); + t = exp (term); + Sum += t; + LogCom += log ((double) (n - j) / (j + 1)); + if (t <= Sum * EPSILON) + break; + j++; + } + + j = jmax / jdiv; + LogCom = LOGJMAX + log ((double) (j + 1) / (n - j)); + + while (j > 0) { + q = (double) j / n + x; + term = LogCom + (j - 1) * log (q) + (n - j) * log1p (-q); + t = exp (term); + Sum += t; + LogCom += log ((double) j / (n - j + 1)); + if (t <= Sum * EPSILON) + break; + j--; + } + + Sum *= x; + /* add the term j = 0 */ + Sum += exp (n * log1p (-x)); + return Sum; +} + + +/*========================================================================*/ + +static double Pelz (int n, double x) +{ + /* Approximating the Lower Tail-Areas of the Kolmogorov-Smirnov One-Sample + Statistic, + Wolfgang Pelz and I. J. Good, + Journal of the Royal Statistical Society, Series B. + Vol. 38, No. 2 (1976), pp. 152-156 + */ + + const int JMAX = 20; + const double EPS = 1.0e-10; + const double C = 2.506628274631001; /* sqrt(2*Pi) */ + const double C2 = 1.2533141373155001; /* sqrt(Pi/2) */ + const double PI2 = num_Pi * num_Pi; + const double PI4 = PI2 * PI2; + const double RACN = sqrt ((double) n); + const double z = RACN * x; + const double z2 = z * z; + const double z4 = z2 * z2; + const double z6 = z4 * z2; + const double w = PI2 / (2.0 * z * z); + double ti, term, tom; + double sum; + int j; + + term = 1; + j = 0; + sum = 0; + while (j <= JMAX && term > EPS * sum) { + ti = j + 0.5; + term = exp (-ti * ti * w); + sum += term; + j++; + } + sum *= C / z; + + term = 1; + tom = 0; + j = 0; + while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { + ti = j + 0.5; + term = (PI2 * ti * ti - z2) * exp (-ti * ti * w); + tom += term; + j++; + } + sum += tom * C2 / (RACN * 3.0 * z4); + + term = 1; + tom = 0; + j = 0; + while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { + ti = j + 0.5; + term = 6 * z6 + 2 * z4 + PI2 * (2 * z4 - 5 * z2) * ti * ti + + PI4 * (1 - 2 * z2) * ti * ti * ti * ti; + term *= exp (-ti * ti * w); + tom += term; + j++; + } + sum += tom * C2 / (n * 36.0 * z * z6); + + term = 1; + tom = 0; + j = 1; + while (j <= JMAX && term > EPS * tom) { + ti = j; + term = PI2 * ti * ti * exp (-ti * ti * w); + tom += term; + j++; + } + sum -= tom * C2 / (n * 18.0 * z * z2); + + term = 1; + tom = 0; + j = 0; + while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { + ti = j + 0.5; + ti = ti * ti; + term = -30 * z6 - 90 * z6 * z2 + PI2 * (135 * z4 - 96 * z6) * ti + + PI4 * (212 * z4 - 60 * z2) * ti * ti + PI2 * PI4 * ti * ti * ti * (5 - + 30 * z2); + term *= exp (-ti * w); + tom += term; + j++; + } + sum += tom * C2 / (RACN * n * 3240.0 * z4 * z6); + + term = 1; + tom = 0; + j = 1; + while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { + ti = j * j; + term = (3 * PI2 * ti * z2 - PI4 * ti * ti) * exp (-ti * w); + tom += term; + j++; + } + sum += tom * C2 / (RACN * n * 108.0 * z6); + + return sum; +} + + +/*=========================================================================*/ + +static void CalcFloorCeil ( + int n, /* sample size */ + double t, /* = nx */ + double *A, /* A_i */ + double *Atflo, /* floor (A_i - t) */ + double *Atcei /* ceiling (A_i + t) */ + ) +{ + /* Precompute A_i, floors, and ceilings for limits of sums in the Pomeranz + algorithm */ + int i; + int ell = (int) t; /* floor (t) */ + double z = t - ell; /* t - floor (t) */ + double w = ceil (t) - t; + + if (z > 0.5) { + for (i = 2; i <= 2 * n + 2; i += 2) + Atflo[i] = i / 2 - 2 - ell; + for (i = 1; i <= 2 * n + 2; i += 2) + Atflo[i] = i / 2 - 1 - ell; + + for (i = 2; i <= 2 * n + 2; i += 2) + Atcei[i] = i / 2 + ell; + for (i = 1; i <= 2 * n + 2; i += 2) + Atcei[i] = i / 2 + 1 + ell; + + } else if (z > 0.0) { + for (i = 1; i <= 2 * n + 2; i++) + Atflo[i] = i / 2 - 1 - ell; + + for (i = 2; i <= 2 * n + 2; i++) + Atcei[i] = i / 2 + ell; + Atcei[1] = 1 + ell; + + } else { /* z == 0 */ + for (i = 2; i <= 2 * n + 2; i += 2) + Atflo[i] = i / 2 - 1 - ell; + for (i = 1; i <= 2 * n + 2; i += 2) + Atflo[i] = i / 2 - ell; + + for (i = 2; i <= 2 * n + 2; i += 2) + Atcei[i] = i / 2 - 1 + ell; + for (i = 1; i <= 2 * n + 2; i += 2) + Atcei[i] = i / 2 + ell; + } + + if (w < z) + z = w; + A[0] = A[1] = 0; + A[2] = z; + A[3] = 1 - A[2]; + for (i = 4; i <= 2 * n + 1; i++) + A[i] = A[i - 2] + 1; + A[2 * n + 2] = n; +} + + +/*========================================================================*/ + +static double Pomeranz (int n, double x) +{ + /* The Pomeranz algorithm to compute the KS distribution */ + const double EPS = 1.0e-15; + const int ENO = 350; + const double RENO = ldexp (1.0, ENO); /* for renormalization of V */ + int coreno; /* counter: how many renormalizations */ + const double t = n * x; + double w, sum, minsum; + int i, j, k, s; + int r1, r2; /* Indices i and i-1 for V[i][] */ + int jlow, jup, klow, kup, kup0; + double *A; + double *Atflo; + double *Atcei; + double **V; + double **H; /* = pow(w, j) / Factorial(j) */ + + A = (double *) calloc ((size_t) (2 * n + 3), sizeof (double)); + Atflo = (double *) calloc ((size_t) (2 * n + 3), sizeof (double)); + Atcei = (double *) calloc ((size_t) (2 * n + 3), sizeof (double)); + V = (double **) CreateMatrixD (2, n + 2); + H = (double **) CreateMatrixD (4, n + 2); + + CalcFloorCeil (n, t, A, Atflo, Atcei); + + for (j = 1; j <= n + 1; j++) + V[0][j] = 0; + for (j = 2; j <= n + 1; j++) + V[1][j] = 0; + V[1][1] = RENO; + coreno = 1; + + /* Precompute H[][] = (A[j] - A[j-1]^k / k! for speed */ + H[0][0] = 1; + w = 2.0 * A[2] / n; + for (j = 1; j <= n + 1; j++) + H[0][j] = w * H[0][j - 1] / j; + + H[1][0] = 1; + w = (1.0 - 2.0 * A[2]) / n; + for (j = 1; j <= n + 1; j++) + H[1][j] = w * H[1][j - 1] / j; + + H[2][0] = 1; + w = A[2] / n; + for (j = 1; j <= n + 1; j++) + H[2][j] = w * H[2][j - 1] / j; + + H[3][0] = 1; + for (j = 1; j <= n + 1; j++) + H[3][j] = 0; + + r1 = 0; + r2 = 1; + for (i = 2; i <= 2 * n + 2; i++) { + jlow = 2 + (int) Atflo[i]; + if (jlow < 1) + jlow = 1; + jup = (int) Atcei[i]; + if (jup > n + 1) + jup = n + 1; + + klow = 2 + (int) Atflo[i - 1]; + if (klow < 1) + klow = 1; + kup0 = (int) Atcei[i - 1]; + + /* Find to which case it corresponds */ + w = (A[i] - A[i - 1]) / n; + s = -1; + for (j = 0; j < 4; j++) { + if (fabs (w - H[j][1]) <= EPS) { + s = j; + break; + } + } + /* assert (s >= 0, "Pomeranz: s < 0"); */ + + minsum = RENO; + r1 = (r1 + 1) & 1; /* i - 1 */ + r2 = (r2 + 1) & 1; /* i */ + + for (j = jlow; j <= jup; j++) { + kup = kup0; + if (kup > j) + kup = j; + sum = 0; + for (k = kup; k >= klow; k--) + sum += V[r1][k] * H[s][j - k]; + V[r2][j] = sum; + if (sum < minsum) + minsum = sum; + } + + if (minsum < 1.0e-280) { + /* V is too small: renormalize to avoid underflow of probabilities */ + for (j = jlow; j <= jup; j++) + V[r2][j] *= RENO; + coreno++; /* keep track of log of RENO */ + } + } + + sum = V[r2][n + 1]; + free (A); + free (Atflo); + free (Atcei); + DeleteMatrixD (H); + DeleteMatrixD (V); + w = getLogFactorial (n) - coreno * ENO * num_Ln2 + log (sum); + if (w >= 0.) + return 1.; + return exp (w); +} + + +/*========================================================================*/ + +static double cdfSpecial (int n, double x) +{ + /* The KS distribution is known exactly for these cases */ + + /* For nx^2 > 18, KSfbar(n, x) is smaller than 5e-16 */ + if ((n * x * x >= 18.0) || (x >= 1.0)) + return 1.0; + + if (x <= 0.5 / n) + return 0.0; + + if (n == 1) + return 2.0 * x - 1.0; + + if (x <= 1.0 / n) { + double t = 2.0 * x * n - 1.0; + double w; + if (n <= NEXACT) { + w = rapfac (n); + return w * pow (t, (double) n); + } + w = getLogFactorial (n) + n * log (t / n); + return exp (w); + } + + if (x >= 1.0 - 1.0 / n) { + return 1.0 - 2.0 * pow (1.0 - x, (double) n); + } + + return -1.0; +} + + +/*========================================================================*/ + +double KScdf (int n, double x) +{ + const double w = n * x * x; + double u = cdfSpecial (n, x); + if (u >= 0.0) + return u; + + if (n <= NEXACT) { + if (w < 0.754693) + return DurbinMatrix (n, x); + if (w < 4.0) + return Pomeranz (n, x); + return 1.0 - KSfbar (n, x); + } + + if ((w * x * n <= 7.0) && (n <= NKOLMO)) + return DurbinMatrix (n, x); + + return Pelz (n, x); +} + + +/*=========================================================================*/ + +static double fbarSpecial (int n, double x) +{ + const double w = n * x * x; + + if ((w >= 370.0) || (x >= 1.0)) + return 0.0; + if ((w <= 0.0274) || (x <= 0.5 / n)) + return 1.0; + if (n == 1) + return 2.0 - 2.0 * x; + + if (x <= 1.0 / n) { + double z; + double t = 2.0 * x * n - 1.0; + if (n <= NEXACT) { + z = rapfac (n); + return 1.0 - z * pow (t, (double) n); + } + z = getLogFactorial (n) + n * log (t / n); + return 1.0 - exp (z); + } + + if (x >= 1.0 - 1.0 / n) { + return 2.0 * pow (1.0 - x, (double) n); + } + return -1.0; +} + + +/*========================================================================*/ + +double KSfbar (int n, double x) +{ + const double w = n * x * x; + double v = fbarSpecial (n, x); + if (v >= 0.0) + return v; + + if (n <= NEXACT) { + if (w < 4.0) + return 1.0 - KScdf (n, x); + else + return 2.0 * KSPlusbarUpper (n, x); + } + + if (w >= 2.65) + return 2.0 * KSPlusbarUpper (n, x); + + return 1.0 - KScdf (n, x); +} + + +/*========================================================================= + +The following implements the Durbin matrix algorithm and was programmed by +G. Marsaglia, Wai Wan Tsang and Jingbo Wong. + +I have made small modifications in their program. (Richard Simard) + + + +=========================================================================*/ + +/* + The C program to compute Kolmogorov's distribution + + K(n,d) = Prob(D_n < d), where + + D_n = max(x_1-0/n,x_2-1/n...,x_n-(n-1)/n,1/n-x_1,2/n-x_2,...,n/n-x_n) + + with x_17 DIGIT ACCURACY IN THE RIGHT TAIL */ +#if 0 + s = d * d * n; + if (s > 7.24 || (s > 3.76 && n > 99)) + return 1 - 2 * exp (-(2.000071 + .331 / sqrt (n) + 1.409 / n) * s); +#endif + k = (int) (n * d) + 1; + m = 2 * k - 1; + h = k - n * d; + H = (double *) malloc ((m * m) * sizeof (double)); + Q = (double *) malloc ((m * m) * sizeof (double)); + for (i = 0; i < m; i++) + for (j = 0; j < m; j++) + if (i - j + 1 < 0) + H[i * m + j] = 0; + else + H[i * m + j] = 1; + for (i = 0; i < m; i++) { + H[i * m] -= pow (h, (double) (i + 1)); + H[(m - 1) * m + i] -= pow (h, (double) (m - i)); + } + H[(m - 1) * m] += (2 * h - 1 > 0 ? pow (2 * h - 1, (double) m) : 0); + for (i = 0; i < m; i++) + for (j = 0; j < m; j++) + if (i - j + 1 > 0) + for (g = 1; g <= i - j + 1; g++) + H[i * m + j] /= g; + eH = 0; + mPower (H, eH, Q, &eQ, m, n); + s = Q[(k - 1) * m + k - 1]; + + for (i = 1; i <= n; i++) { + s = s * (double) i / n; + if (s < INORM) { + s *= NORM; + eQ -= LOGNORM; + } + } + s *= pow (10., (double) eQ); + free (H); + free (Q); + return s; +} + + +static void mMultiply (double *A, double *B, double *C, int m) +{ + int i, j, k; + double s; + for (i = 0; i < m; i++) + for (j = 0; j < m; j++) { + s = 0.; + for (k = 0; k < m; k++) + s += A[i * m + k] * B[k * m + j]; + C[i * m + j] = s; + } +} + + +static void renormalize (double *V, int m, int *p) +{ + int i; + for (i = 0; i < m * m; i++) + V[i] *= INORM; + *p += LOGNORM; +} + + +static void mPower (double *A, int eA, double *V, int *eV, int m, int n) +{ + double *B; + int eB, i; + if (n == 1) { + for (i = 0; i < m * m; i++) + V[i] = A[i]; + *eV = eA; + return; + } + mPower (A, eA, V, eV, m, n / 2); + B = (double *) malloc ((m * m) * sizeof (double)); + mMultiply (V, V, B, m); + eB = 2 * (*eV); + if (B[(m / 2) * m + (m / 2)] > NORM) + renormalize (B, m, &eB); + + if (n % 2 == 0) { + for (i = 0; i < m * m; i++) + V[i] = B[i]; + *eV = eB; + } else { + mMultiply (A, B, V, m); + *eV = eA + eB; + } + + if (V[(m / 2) * m + (m / 2)] > NORM) + renormalize (V, m, eV); + free (B); +} diff --git a/database/KolmogorovSmirnovDist.h b/database/KolmogorovSmirnovDist.h new file mode 100644 index 0000000..cf45504 --- /dev/null +++ b/database/KolmogorovSmirnovDist.h @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-3.0 + +#ifndef KOLMOGOROVSMIRNOVDIST_H +#define KOLMOGOROVSMIRNOVDIST_H + +#ifdef __cplusplus +extern "C" { +#endif + + +/******************************************************************** + * + * File: KolmogorovSmirnovDist.h + * Environment: ISO C99 or ANSI C89 + * Author: Richard Simard + * Organization: DIRO, Université de Montréal + * Date: 1 February 2012 + * Version 1.1 + * + * Copyright March 2010 by Université de Montréal, + Richard Simard and Pierre L'Ecuyer + ===================================================================== + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, version 3 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + =====================================================================*/ +/* + * + * The Kolmogorov-Smirnov test statistic D_n is defined by + * + * D_n = sup_x |F(x) - S_n(x)| + * + * where n is the sample size, F(x) is a completely specified theoretical + * distribution, and S_n(x) is an empirical distribution function. + * + * + * The function + * + * double KScdf (int n, double x); + * + * computes the cumulative probability P[D_n <= x] of the 2-sided 1-sample + * Kolmogorov-Smirnov distribution with sample size n at x. + * It returns at least 13 decimal digits of precision for n <= 500, + * at least 7 decimal digits of precision for 500 < n <= 100000, + * and a few correct decimal digits for n > 100000. + * + */ + +double KScdf (int n, double x); + + +/* + * The function + * + * double KSfbar (int n, double x); + * + * computes the complementary cumulative probability P[D_n >= x] of the + * 2-sided 1-sample Kolmogorov-Smirnov distribution with sample size n at x. + * It returns at least 10 decimal digits of precision for n <= 500, + * at least 6 decimal digits of precision for 500 < n <= 200000, + * and a few correct decimal digits for n > 200000. + * + */ + +double KSfbar (int n, double x); + + +/* + * NOTE: + * The ISO C99 function log1p of the standard math library does not exist in + * ANSI C89. Here, it is programmed explicitly in KolmogorovSmirnovDist.c. + + * For ANSI C89 compilers, change the preprocessor condition to make it + * available. + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/database/Makefile.am b/database/Makefile.am new file mode 100644 index 0000000..dc87e61 --- /dev/null +++ b/database/Makefile.am @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + engine \ + ram \ + sqlite \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/database/README.md b/database/README.md new file mode 100644 index 0000000..1453f9b --- /dev/null +++ b/database/README.md @@ -0,0 +1,147 @@ + + +# Database + +Netdata is fully capable of long-term metrics storage, at per-second granularity, via its default database engine +(`dbengine`). But to remain as flexible as possible, Netdata supports several storage options: + +1. `dbengine`, (the default) data are in database files. The [Database Engine](/database/engine/README.md) works like a + traditional database. There is some amount of RAM dedicated to data caching and indexing and the rest of the data + reside compressed on disk. The number of history entries is not fixed in this case, but depends on the configured + disk space and the effective compression ratio of the data stored. This is the **only mode** that supports changing + the data collection update frequency (`update every`) **without losing** the previously stored metrics. For more + details see [here](/database/engine/README.md). + +2. `ram`, data are purely in memory. Data are never saved on disk. This mode uses `mmap()` and supports [KSM](#ksm). + +3. `save`, data are only in RAM while Netdata runs and are saved to / loaded from disk on Netdata restart. It also + uses `mmap()` and supports [KSM](#ksm). + +4. `map`, data are in memory mapped files. This works like the swap. When Netdata writes data on its memory, the Linux + kernel marks the related memory pages as dirty and automatically starts updating them on disk. Unfortunately we + cannot control how frequently this works. The Linux kernel uses exactly the same algorithm it uses for its swap + memory. This mode uses `mmap()` but does not support [KSM](#ksm). _Keep in mind though, this option will have a + constant write on your disk._ + +5. `alloc`, like `ram` but it uses `calloc()` and does not support [KSM](#ksm). This mode is the fallback for all others + except `none`. + +6. `none`, without a database (collected metrics can only be streamed to another Netdata). + +## Which database mode to use + +The default mode `[db].mode = dbengine` has been designed to scale for longer retentions and is the only mode suitable +for parent Agents in the _Parent - Child_ setups + +The other available database modes are designed to minimize resource utilization and should only be considered on +[Parent - Child](/docs/metrics-storage-management/how-streaming-works.mdx) setups at the children side and only when the +resource constraints are very strict. + +So, + +- On a single node setup, use `[db].mode = dbengine`. +- On a [Parent - Child](/docs/metrics-storage-management/how-streaming-works.mdx) setup, use `[db].mode = dbengine` on the + parent to increase retention, a more resource efficient mode like, `dbengine` with light retention settings, and + `save`, `ram` or `none` modes for the children to minimize resource utilization. + +## Choose your database mode + +You can select the database mode by editing `netdata.conf` and setting: + +```conf +[db] + # dbengine (default), ram, save (the default if dbengine not available), map (swap like), none, alloc + mode = dbengine +``` + +## Netdata Longer Metrics Retention + +Metrics retention is controlled only by the disk space allocated to storing metrics. But it also affects the memory and +CPU required by the agent to query longer timeframes. + +Since Netdata Agents usually run on the edge, on production systems, Netdata Agent **parents** should be considered. +When having a [**parent - child**](/docs/metrics-storage-management/how-streaming-works.mdx) setup, the child (the +Netdata Agent running on a production system) delegates all of its functions, including longer metrics retention and +querying, to the parent node that can dedicate more resources to this task. A single Netdata Agent parent can centralize +multiple children Netdata Agents (dozens, hundreds, or even thousands depending on its available resources). + +## Running Netdata on embedded devices + +Embedded devices typically have very limited RAM resources available. + +There are two settings for you to configure: + +1. `[db].update every`, which controls the data collection frequency +2. `[db].retention`, which controls the size of the database in memory (except for `[db].mode = dbengine`) + +By default `[db].update every = 1` and `[db].retention = 3600`. This gives you an hour of data with per second updates. + +If you set `[db].update every = 2` and `[db].retention = 1800`, you will still have an hour of data, but collected once +every 2 seconds. This will **cut in half** both CPU and RAM resources consumed by Netdata. Of course experiment a bit to find the right setting. +On very weak devices you might have to use `[db].update every = 5` and `[db].retention = 720` (still 1 hour of data, but +1/5 of the CPU and RAM resources). + +You can also disable [data collection plugins](/collectors/README.md) that you don't need. Disabling such plugins will also +free both CPU and RAM resources. + +## Memory optimizations + +### KSM + +KSM performs memory deduplication by scanning through main memory for physical pages that have identical content, and +identifies the virtual pages that are mapped to those physical pages. It leaves one page unchanged, and re-maps each +duplicate page to point to the same physical page. Netdata offers all of its in-memory database to kernel for +deduplication. + +In the past, KSM has been criticized for consuming a lot of CPU resources. This is true when KSM is used for +deduplicating certain applications, but it is not true for Netdata. Agent's memory is written very infrequently +(if you have 24 hours of metrics in Netdata, each byte at the in-memory database will be updated just once per day). KSM +is a solution that will provide 60+% memory savings to Netdata. + +### Enable KSM in kernel + +To enable KSM in kernel, you need to run a kernel compiled with the following: + +```sh +CONFIG_KSM=y +``` + +When KSM is enabled at the kernel, it is just available for the user to enable it. + +If you build a kernel with `CONFIG_KSM=y`, you will just get a few files in `/sys/kernel/mm/ksm`. Nothing else +happens. There is no performance penalty (apart from the memory this code occupies into the kernel). + +The files that `CONFIG_KSM=y` offers include: + +- `/sys/kernel/mm/ksm/run` by default `0`. You have to set this to `1` for the kernel to spawn `ksmd`. +- `/sys/kernel/mm/ksm/sleep_millisecs`, by default `20`. The frequency ksmd should evaluate memory for deduplication. +- `/sys/kernel/mm/ksm/pages_to_scan`, by default `100`. The amount of pages ksmd will evaluate on each run. + +So, by default `ksmd` is just disabled. It will not harm performance and the user/admin can control the CPU resources +they are willing to have used by `ksmd`. + +### Run `ksmd` kernel daemon + +To activate / run `ksmd,` you need to run the following: + +```sh +echo 1 >/sys/kernel/mm/ksm/run +echo 1000 >/sys/kernel/mm/ksm/sleep_millisecs +``` + +With these settings, ksmd does not even appear in the running process list (it will run once per second and evaluate 100 +pages for de-duplication). + +Put the above lines in your boot sequence (`/etc/rc.local` or equivalent) to have `ksmd` run at boot. + +### Monitoring Kernel Memory de-duplication performance + +Netdata will create charts for kernel memory de-duplication performance, like this: + +![image](https://cloud.githubusercontent.com/assets/2662304/11998786/eb23ae54-aab6-11e5-94d4-e848e8a5c56a.png) + + diff --git a/database/engine/Makefile.am b/database/engine/Makefile.am new file mode 100644 index 0000000..59250a9 --- /dev/null +++ b/database/engine/Makefile.am @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/database/engine/README.md b/database/engine/README.md new file mode 100644 index 0000000..c67e400 --- /dev/null +++ b/database/engine/README.md @@ -0,0 +1,302 @@ + + +# Database engine + +The Database Engine works like a traditional time series database. Unlike other [database modes](/database/README.md), +the amount of historical metrics stored is based on the amount of disk space you allocate and the effective compression +ratio, not a fixed number of metrics collected. + +## Tiering + +Tiering is a mechanism of providing multiple tiers of data with +different [granularity on metrics](/docs/store/distributed-data-architecture.md#granularity-of-metrics). + +For Netdata Agents with version `netdata-1.35.0.138.nightly` and greater, `dbengine` supports Tiering, allowing almost +unlimited retention of data. + + +### Metric size + +Every Tier down samples the exact lower tier (lower tiers have greater resolution). You can have up to 5 +Tiers **[0. . 4]** of data (including the Tier 0, which has the highest resolution) + +Tier 0 is the default that was always available in `dbengine` mode. Tier 1 is the first level of aggregation, Tier 2 is +the second, and so on. + +Metrics on all tiers except of the _Tier 0_ also store the following five additional values for every point for accurate +representation: + +1. The `sum` of the points aggregated +2. The `min` of the points aggregated +3. The `max` of the points aggregated +4. The `count` of the points aggregated (could be constant, but it may not be due to gaps in data collection) +5. The `anomaly_count` of the points aggregated (how many of the aggregated points found anomalous) + +Among `min`, `max` and `sum`, the correct value is chosen based on the user query. `average` is calculated on the fly at +query time. + +### Tiering in a nutshell + +The `dbengine` is capable of retaining metrics for years. To further understand the `dbengine` tiering mechanism let's +explore the following configuration. + +``` +[db] + mode = dbengine + + # per second data collection + update every = 1 + + # enables Tier 1 and Tier 2, Tier 0 is always enabled in dbengine mode + storage tiers = 3 + + # Tier 0, per second data for a week + dbengine multihost disk space MB = 1100 + + # Tier 1, per minute data for a month + dbengine tier 1 multihost disk space MB = 330 + + # Tier 2, per hour data for a year + dbengine tier 2 multihost disk space MB = 67 +``` + +For 2000 metrics, collected every second and retained for a week, Tier 0 needs: 1 byte x 2000 metrics x 3600 secs per +hour x 24 hours per day x 7 days per week = 1100MB. + +By setting `dbengine multihost disk space MB` to `1100`, this node will start maintaining about a week of data. But pay +attention to the number of metrics. If you have more than 2000 metrics on a node, or you need more that a week of high +resolution metrics, you may need to adjust this setting accordingly. + +Tier 1 is by default sampling the data every **60 points of Tier 0**. In our case, Tier 0 is per second, if we want to +transform this information in terms of time then the Tier 1 "resolution" is per minute. + +Tier 1 needs four times more storage per point compared to Tier 0. So, for 2000 metrics, with per minute resolution, +retained for a month, Tier 1 needs: 4 bytes x 2000 metrics x 60 minutes per hour x 24 hours per day x 30 days per month += 330MB. + +Tier 2 is by default sampling data every 3600 points of Tier 0 (60 of Tier 1, which is the previous exact Tier). Again +in term of "time" (Tier 0 is per second), then Tier 2 is per hour. + +The storage requirements are the same to Tier 1. + +For 2000 metrics, with per hour resolution, retained for a year, Tier 2 needs: 4 bytes x 2000 metrics x 24 hours per day +x 365 days per year = 67MB. + +## Legacy configuration + +### v1.35.1 and prior + +These versions of the Agent do not support [Tiering](#Tiering). You could change the metric retention for the parent and +all of its children only with the `dbengine multihost disk space MB` setting. This setting accounts the space allocation +for the parent node and all of its children. + +To configure the database engine, look for the `page cache size MB` and `dbengine multihost disk space MB` settings in +the `[db]` section of your `netdata.conf`. + +```conf +[db] + dbengine page cache size MB = 32 + dbengine multihost disk space MB = 256 +``` + +### v1.23.2 and prior + +_For Netdata Agents earlier than v1.23.2_, the Agent on the parent node uses one dbengine instance for itself, and +another instance for every child node it receives metrics from. If you had four streaming nodes, you would have five +instances in total (`1 parent + 4 child nodes = 5 instances`). + +The Agent allocates resources for each instance separately using the `dbengine disk space MB` (**deprecated**) setting. +If +`dbengine disk space MB`(**deprecated**) is set to the default `256`, each instance is given 256 MiB in disk space, +which means the total disk space required to store all instances is, +roughly, `256 MiB * 1 parent * 4 child nodes = 1280 MiB`. + +#### Backward compatibility + +All existing metrics belonging to child nodes are automatically converted to legacy dbengine instances and the localhost +metrics are transferred to the multihost dbengine instance. + +All new child nodes are automatically transferred to the multihost dbengine instance and share its page cache and disk +space. If you want to migrate a child node from its legacy dbengine instance to the multihost dbengine instance, you +must delete the instance's directory, which is located in `/var/cache/netdata/MACHINE_GUID/dbengine`, after stopping the +Agent. + +##### Information + +For more information about setting `[db].mode` on your nodes, in addition to other streaming configurations, see +[streaming](/streaming/README.md). + +## Requirements & limitations + +### Memory + +Using database mode `dbengine` we can overcome most memory restrictions and store a dataset that is much larger than the +available memory. + +There are explicit memory requirements **per** DB engine **instance**: + +- The total page cache memory footprint will be an additional `#dimensions-being-collected x 4096 x 2` bytes over what + the user configured with `dbengine page cache size MB`. + + +- an additional `#pages-on-disk x 4096 x 0.03` bytes of RAM are allocated for metadata. + + - roughly speaking this is 3% of the uncompressed disk space taken by the DB files. + + - for very highly compressible data (compression ratio > 90%) this RAM overhead is comparable to the disk space + footprint. + +An important observation is that RAM usage depends on both the `page cache size` and the `dbengine multihost disk space` +options. + +You can use +our [database engine calculator](/docs/store/change-metrics-storage.md#calculate-the-system-resources-ram-disk-space-needed-to-store-metrics) +to validate the memory requirements for your particular system(s) and configuration (**out-of-date**). + +### Disk space + +There are explicit disk space requirements **per** DB engine **instance**: + +- The total disk space footprint will be the maximum between `#dimensions-being-collected x 4096 x 2` bytes or what the + user configured with `dbengine multihost disk space` or `dbengine disk space`. + +### File descriptor + +The Database Engine may keep a **significant** amount of files open per instance (e.g. per streaming child or parent +server). When configuring your system you should make sure there are at least 50 file descriptors available per +`dbengine` instance. + +Netdata allocates 25% of the available file descriptors to its Database Engine instances. This means that only 25% of +the file descriptors that are available to the Netdata service are accessible by dbengine instances. You should take +that into account when configuring your service or system-wide file descriptor limits. You can roughly estimate that the +Netdata service needs 2048 file descriptors for every 10 streaming child hosts when streaming is configured to use +`[db].mode = dbengine`. + +If for example one wants to allocate 65536 file descriptors to the Netdata service on a systemd system one needs to +override the Netdata service by running `sudo systemctl edit netdata` and creating a file with contents: + +```sh +[Service] +LimitNOFILE=65536 +``` + +For other types of services one can add the line: + +```sh +ulimit -n 65536 +``` + +at the beginning of the service file. Alternatively you can change the system-wide limits of the kernel by changing +`/etc/sysctl.conf`. For linux that would be: + +```conf +fs.file-max = 65536 +``` + +In FreeBSD and OS X you change the lines like this: + +```conf +kern.maxfilesperproc=65536 +kern.maxfiles=65536 +``` + +You can apply the settings by running `sysctl -p` or by rebooting. + +## Files + +With the DB engine mode the metric data are stored in database files. These files are organized in pairs, the datafiles +and their corresponding journalfiles, e.g.: + +```sh +datafile-1-0000000001.ndf +journalfile-1-0000000001.njf +datafile-1-0000000002.ndf +journalfile-1-0000000002.njf +datafile-1-0000000003.ndf +journalfile-1-0000000003.njf +... +``` + +They are located under their host's cache directory in the directory `./dbengine` (e.g. for localhost the default +location is `/var/cache/netdata/dbengine/*`). The higher numbered filenames contain more recent metric data. The user +can safely delete some pairs of files when Netdata is stopped to manually free up some space. + +_Users should_ **back up** _their `./dbengine` folders if they consider this data to be important._ You can also set up +one or more [exporting connectors](/exporting/README.md) to send your Netdata metrics to other databases for long-term +storage at lower granularity. + +## Operation + +The DB engine stores chart metric values in 4096-byte pages in memory. Each chart dimension gets its own page to store +consecutive values generated from the data collectors. Those pages comprise the **Page Cache**. + +When those pages fill up, they are slowly compressed and flushed to disk. It can +take `4096 / 4 = 1024 seconds = 17 minutes`, for a chart dimension that is being collected every 1 second, to fill a +page. Pages can be cut short when we stop Netdata or the DB engine instance so as to not lose the data. When we query +the DB engine for data we trigger disk read I/O requests that fill the Page Cache with the requested pages and +potentially evict cold (not recently used) +pages. + +When the disk quota is exceeded the oldest values are removed from the DB engine at real time, by automatically deleting +the oldest datafile and journalfile pair. Any corresponding pages residing in the Page Cache will also be invalidated +and removed. The DB engine logic will try to maintain between 10 and 20 file pairs at any point in time. + +The Database Engine uses direct I/O to avoid polluting the OS filesystem caches and does not generate excessive I/O +traffic so as to create the minimum possible interference with other applications. + +## Evaluation + +We have evaluated the performance of the `dbengine` API that the netdata daemon uses internally. This is **not** the web +API of netdata. Our benchmarks ran on a **single** `dbengine` instance, multiple of which can be running in a Netdata +parent node. We used a server with an AMD Ryzen Threadripper 2950X 16-Core Processor and 2 disk drives, a Seagate +Constellation ES.3 2TB magnetic HDD and a SAMSUNG MZQLB960HAJR-00007 960GB NAND Flash SSD. + +For our workload, we defined 32 charts with 128 metrics each, giving us a total of 4096 metrics. We defined 1 worker +thread per chart (32 threads) that generates new data points with a data generation interval of 1 second. The time axis +of the time-series is emulated and accelerated so that the worker threads can generate as many data points as possible +without delays. + +We also defined 32 worker threads that perform queries on random metrics with semi-random time ranges. The starting time +of the query is randomly selected between the beginning of the time-series and the time of the latest data point. The +ending time is randomly selected between 1 second and 1 hour after the starting time. The pseudo-random numbers are +generated with a uniform distribution. + +The data are written to the database at the same time as they are read from it. This is a concurrent read/write mixed +workload with a duration of 60 seconds. The faster `dbengine` runs, the bigger the dataset size becomes since more data +points will be generated. We set a page cache size of 64MiB for the two disk-bound scenarios. This way, the dataset size +of the metric data is much bigger than the RAM that is being used for caching so as to trigger I/O requests most of the +time. In our final scenario, we set the page cache size to 16 GiB. That way, the dataset fits in the page cache so as to +avoid all disk bottlenecks. + +The reported numbers are the following: + +| device | page cache | dataset | reads/sec | writes/sec | +|:------:|:----------:|--------:|----------:|-----------:| +| HDD | 64 MiB | 4.1 GiB | 813K | 18.0M | +| SSD | 64 MiB | 9.8 GiB | 1.7M | 43.0M | +| N/A | 16 GiB | 6.8 GiB | 118.2M | 30.2M | + +where "reads/sec" is the number of metric data points being read from the database via its API per second and +"writes/sec" is the number of metric data points being written to the database per second. + +Notice that the HDD numbers are pretty high and not much slower than the SSD numbers. This is thanks to the database +engine design being optimized for rotating media. In the database engine disk I/O requests are: + +- asynchronous to mask the high I/O latency of HDDs. +- mostly large to reduce the amount of HDD seeking time. +- mostly sequential to reduce the amount of HDD seeking time. +- compressed to reduce the amount of required throughput. + +As a result, the HDD is not thousands of times slower than the SSD, which is typical for other workloads. + +An interesting observation to make is that the CPU-bound run (16 GiB page cache) generates fewer data than the SSD run +(6.8 GiB vs 9.8 GiB). The reason is that the 32 reader threads in the SSD scenario are more frequently blocked by I/O, +and generate a read load of 1.7M/sec, whereas in the CPU-bound scenario the read load is 70 times higher at 118M/sec. +Consequently, there is a significant degree of interference by the reader threads, that slow down the writer threads. +This is also possible because the interference effects are greater than the SSD impact on data generation throughput. + + diff --git a/database/engine/datafile.c b/database/engine/datafile.c new file mode 100644 index 0000000..9c70068 --- /dev/null +++ b/database/engine/datafile.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#include "rrdengine.h" + +void df_extent_insert(struct extent_info *extent) +{ + struct rrdengine_datafile *datafile = extent->datafile; + + if (likely(NULL != datafile->extents.last)) { + datafile->extents.last->next = extent; + } + if (unlikely(NULL == datafile->extents.first)) { + datafile->extents.first = extent; + } + datafile->extents.last = extent; +} + +void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile) +{ + if (likely(NULL != ctx->datafiles.last)) { + ctx->datafiles.last->next = datafile; + } + if (unlikely(NULL == ctx->datafiles.first)) { + ctx->datafiles.first = datafile; + } + ctx->datafiles.last = datafile; +} + +void datafile_list_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile) +{ + struct rrdengine_datafile *next; + + next = datafile->next; + fatal_assert((NULL != next) && (ctx->datafiles.first == datafile) && (ctx->datafiles.last != datafile)); + ctx->datafiles.first = next; +} + + +static void datafile_init(struct rrdengine_datafile *datafile, struct rrdengine_instance *ctx, + unsigned tier, unsigned fileno) +{ + fatal_assert(tier == 1); + datafile->tier = tier; + datafile->fileno = fileno; + datafile->file = (uv_file)0; + datafile->pos = 0; + datafile->extents.first = datafile->extents.last = NULL; /* will be populated by journalfile */ + datafile->journalfile = NULL; + datafile->next = NULL; + datafile->ctx = ctx; +} + +void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen) +{ + (void) snprintfz(str, maxlen, "%s/" DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION, + datafile->ctx->dbfiles_path, datafile->tier, datafile->fileno); +} + +int close_data_file(struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + int ret; + char path[RRDENG_PATH_MAX]; + + generate_datafilepath(datafile, path, sizeof(path)); + + ret = uv_fs_close(NULL, &req, datafile->file, NULL); + if (ret < 0) { + error("uv_fs_close(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + return ret; +} + +int unlink_data_file(struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + int ret; + char path[RRDENG_PATH_MAX]; + + generate_datafilepath(datafile, path, sizeof(path)); + + ret = uv_fs_unlink(NULL, &req, path, NULL); + if (ret < 0) { + error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ++ctx->stats.datafile_deletions; + + return ret; +} + +int destroy_data_file(struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + int ret; + char path[RRDENG_PATH_MAX]; + + generate_datafilepath(datafile, path, sizeof(path)); + + ret = uv_fs_ftruncate(NULL, &req, datafile->file, 0, NULL); + if (ret < 0) { + error("uv_fs_ftruncate(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ret = uv_fs_close(NULL, &req, datafile->file, NULL); + if (ret < 0) { + error("uv_fs_close(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ret = uv_fs_unlink(NULL, &req, path, NULL); + if (ret < 0) { + error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ++ctx->stats.datafile_deletions; + + return ret; +} + +int create_data_file(struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + uv_file file; + int ret, fd; + struct rrdeng_df_sb *superblock; + uv_buf_t iov; + char path[RRDENG_PATH_MAX]; + + generate_datafilepath(datafile, path, sizeof(path)); + fd = open_file_direct_io(path, O_CREAT | O_RDWR | O_TRUNC, &file); + if (fd < 0) { + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + return fd; + } + datafile->file = file; + ++ctx->stats.datafile_creations; + + ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + memset(superblock, 0, sizeof(*superblock)); + (void) strncpy(superblock->magic_number, RRDENG_DF_MAGIC, RRDENG_MAGIC_SZ); + (void) strncpy(superblock->version, RRDENG_DF_VER, RRDENG_VER_SZ); + superblock->tier = 1; + + iov = uv_buf_init((void *)superblock, sizeof(*superblock)); + + ret = uv_fs_write(NULL, &req, file, &iov, 1, 0, NULL); + if (ret < 0) { + fatal_assert(req.result < 0); + error("uv_fs_write: %s", uv_strerror(ret)); + ++ctx->stats.io_errors; + rrd_stat_atomic_add(&global_io_errors, 1); + } + uv_fs_req_cleanup(&req); + posix_memfree(superblock); + if (ret < 0) { + destroy_data_file(datafile); + return ret; + } + + datafile->pos = sizeof(*superblock); + ctx->stats.io_write_bytes += sizeof(*superblock); + ++ctx->stats.io_write_requests; + + return 0; +} + +static int check_data_file_superblock(uv_file file) +{ + int ret; + struct rrdeng_df_sb *superblock; + uv_buf_t iov; + uv_fs_t req; + + ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + iov = uv_buf_init((void *)superblock, sizeof(*superblock)); + + ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL); + if (ret < 0) { + error("uv_fs_read: %s", uv_strerror(ret)); + uv_fs_req_cleanup(&req); + goto error; + } + fatal_assert(req.result >= 0); + uv_fs_req_cleanup(&req); + + if (strncmp(superblock->magic_number, RRDENG_DF_MAGIC, RRDENG_MAGIC_SZ) || + strncmp(superblock->version, RRDENG_DF_VER, RRDENG_VER_SZ) || + superblock->tier != 1) { + error("File has invalid superblock."); + ret = UV_EINVAL; + } else { + ret = 0; + } + error: + posix_memfree(superblock); + return ret; +} + +static int load_data_file(struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + uv_file file; + int ret, fd, error; + uint64_t file_size; + char path[RRDENG_PATH_MAX]; + + generate_datafilepath(datafile, path, sizeof(path)); + fd = open_file_direct_io(path, O_RDWR, &file); + if (fd < 0) { + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + return fd; + } + info("Initializing data file \"%s\".", path); + + ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb)); + if (ret) + goto error; + file_size = ALIGN_BYTES_CEILING(file_size); + + ret = check_data_file_superblock(file); + if (ret) + goto error; + ctx->stats.io_read_bytes += sizeof(struct rrdeng_df_sb); + ++ctx->stats.io_read_requests; + + datafile->file = file; + datafile->pos = file_size; + + info("Data file \"%s\" initialized (size:%"PRIu64").", path, file_size); + return 0; + + error: + error = ret; + ret = uv_fs_close(NULL, &req, file, NULL); + if (ret < 0) { + error("uv_fs_close(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + return error; +} + +static int scan_data_files_cmp(const void *a, const void *b) +{ + struct rrdengine_datafile *file1, *file2; + char path1[RRDENG_PATH_MAX], path2[RRDENG_PATH_MAX]; + + file1 = *(struct rrdengine_datafile **)a; + file2 = *(struct rrdengine_datafile **)b; + generate_datafilepath(file1, path1, sizeof(path1)); + generate_datafilepath(file2, path2, sizeof(path2)); + return strcmp(path1, path2); +} + +/* Returns number of datafiles that were loaded or < 0 on error */ +static int scan_data_files(struct rrdengine_instance *ctx) +{ + int ret; + unsigned tier, no, matched_files, i,failed_to_load; + static uv_fs_t req; + uv_dirent_t dent; + struct rrdengine_datafile **datafiles, *datafile; + struct rrdengine_journalfile *journalfile; + + ret = uv_fs_scandir(NULL, &req, ctx->dbfiles_path, 0, NULL); + if (ret < 0) { + fatal_assert(req.result < 0); + uv_fs_req_cleanup(&req); + error("uv_fs_scandir(%s): %s", ctx->dbfiles_path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + return ret; + } + info("Found %d files in path %s", ret, ctx->dbfiles_path); + + datafiles = callocz(MIN(ret, MAX_DATAFILES), sizeof(*datafiles)); + for (matched_files = 0 ; UV_EOF != uv_fs_scandir_next(&req, &dent) && matched_files < MAX_DATAFILES ; ) { + info("Scanning file \"%s/%s\"", ctx->dbfiles_path, dent.name); + ret = sscanf(dent.name, DATAFILE_PREFIX RRDENG_FILE_NUMBER_SCAN_TMPL DATAFILE_EXTENSION, &tier, &no); + if (2 == ret) { + info("Matched file \"%s/%s\"", ctx->dbfiles_path, dent.name); + datafile = mallocz(sizeof(*datafile)); + datafile_init(datafile, ctx, tier, no); + datafiles[matched_files++] = datafile; + } + } + uv_fs_req_cleanup(&req); + + if (0 == matched_files) { + freez(datafiles); + return 0; + } + if (matched_files == MAX_DATAFILES) { + error("Warning: hit maximum database engine file limit of %d files", MAX_DATAFILES); + } + qsort(datafiles, matched_files, sizeof(*datafiles), scan_data_files_cmp); + /* TODO: change this when tiering is implemented */ + ctx->last_fileno = datafiles[matched_files - 1]->fileno; + + for (failed_to_load = 0, i = 0 ; i < matched_files ; ++i) { + uint8_t must_delete_pair = 0; + + datafile = datafiles[i]; + ret = load_data_file(datafile); + if (0 != ret) { + must_delete_pair = 1; + } + journalfile = mallocz(sizeof(*journalfile)); + datafile->journalfile = journalfile; + journalfile_init(journalfile, datafile); + ret = load_journal_file(ctx, journalfile, datafile); + if (0 != ret) { + if (!must_delete_pair) /* If datafile is still open close it */ + close_data_file(datafile); + must_delete_pair = 1; + } + if (must_delete_pair) { + char path[RRDENG_PATH_MAX]; + + error("Deleting invalid data and journal file pair."); + ret = unlink_journal_file(journalfile); + if (!ret) { + generate_journalfilepath(datafile, path, sizeof(path)); + info("Deleted journal file \"%s\".", path); + } + ret = unlink_data_file(datafile); + if (!ret) { + generate_datafilepath(datafile, path, sizeof(path)); + info("Deleted data file \"%s\".", path); + } + freez(journalfile); + freez(datafile); + ++failed_to_load; + continue; + } + + datafile_list_insert(ctx, datafile); + ctx->disk_space += datafile->pos + journalfile->pos; + } + matched_files -= failed_to_load; + freez(datafiles); + + return matched_files; +} + +/* Creates a datafile and a journalfile pair */ +int create_new_datafile_pair(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno) +{ + struct rrdengine_datafile *datafile; + struct rrdengine_journalfile *journalfile; + int ret; + char path[RRDENG_PATH_MAX]; + + info("Creating new data and journal files in path %s", ctx->dbfiles_path); + datafile = mallocz(sizeof(*datafile)); + datafile_init(datafile, ctx, tier, fileno); + ret = create_data_file(datafile); + if (!ret) { + generate_datafilepath(datafile, path, sizeof(path)); + info("Created data file \"%s\".", path); + } else { + goto error_after_datafile; + } + + journalfile = mallocz(sizeof(*journalfile)); + datafile->journalfile = journalfile; + journalfile_init(journalfile, datafile); + ret = create_journal_file(journalfile, datafile); + if (!ret) { + generate_journalfilepath(datafile, path, sizeof(path)); + info("Created journal file \"%s\".", path); + } else { + goto error_after_journalfile; + } + datafile_list_insert(ctx, datafile); + ctx->disk_space += datafile->pos + journalfile->pos; + + return 0; + +error_after_journalfile: + destroy_data_file(datafile); + freez(journalfile); +error_after_datafile: + freez(datafile); + return ret; +} + +/* Page cache must already be initialized. + * Return 0 on success. + */ +int init_data_files(struct rrdengine_instance *ctx) +{ + int ret; + + ret = scan_data_files(ctx); + if (ret < 0) { + error("Failed to scan path \"%s\".", ctx->dbfiles_path); + return ret; + } else if (0 == ret) { + info("Data files not found, creating in path \"%s\".", ctx->dbfiles_path); + ret = create_new_datafile_pair(ctx, 1, 1); + if (ret) { + error("Failed to create data and journal files in path \"%s\".", ctx->dbfiles_path); + return ret; + } + ctx->last_fileno = 1; + } + + return 0; +} + +void finalize_data_files(struct rrdengine_instance *ctx) +{ + struct rrdengine_datafile *datafile, *next_datafile; + struct rrdengine_journalfile *journalfile; + struct extent_info *extent, *next_extent; + + for (datafile = ctx->datafiles.first ; datafile != NULL ; datafile = next_datafile) { + journalfile = datafile->journalfile; + next_datafile = datafile->next; + + for (extent = datafile->extents.first ; extent != NULL ; extent = next_extent) { + next_extent = extent->next; + freez(extent); + } + close_journal_file(journalfile, datafile); + close_data_file(datafile); + freez(journalfile); + freez(datafile); + } +} diff --git a/database/engine/datafile.h b/database/engine/datafile.h new file mode 100644 index 0000000..1cf256a --- /dev/null +++ b/database/engine/datafile.h @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DATAFILE_H +#define NETDATA_DATAFILE_H + +#include "rrdengine.h" + +/* Forward declarations */ +struct rrdengine_datafile; +struct rrdengine_journalfile; +struct rrdengine_instance; + +#define DATAFILE_PREFIX "datafile-" +#define DATAFILE_EXTENSION ".ndf" + +#define MAX_DATAFILE_SIZE (1073741824LU) +#define MIN_DATAFILE_SIZE (4194304LU) +#define MAX_DATAFILES (65536) /* Supports up to 64TiB for now */ +#define TARGET_DATAFILES (20) + +#define DATAFILE_IDEAL_IO_SIZE (1048576U) + +struct extent_info { + uint64_t offset; + uint32_t size; + uint8_t number_of_pages; + struct rrdengine_datafile *datafile; + struct extent_info *next; + struct rrdeng_page_descr *pages[]; +}; + +struct rrdengine_df_extents { + /* the extent list is sorted based on disk offset */ + struct extent_info *first; + struct extent_info *last; +}; + +/* only one event loop is supported for now */ +struct rrdengine_datafile { + unsigned tier; + unsigned fileno; + uv_file file; + uint64_t pos; + struct rrdengine_instance *ctx; + struct rrdengine_df_extents extents; + struct rrdengine_journalfile *journalfile; + struct rrdengine_datafile *next; +}; + +struct rrdengine_datafile_list { + struct rrdengine_datafile *first; /* oldest */ + struct rrdengine_datafile *last; /* newest */ +}; + +void df_extent_insert(struct extent_info *extent); +void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); +void datafile_list_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); +void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen); +int close_data_file(struct rrdengine_datafile *datafile); +int unlink_data_file(struct rrdengine_datafile *datafile); +int destroy_data_file(struct rrdengine_datafile *datafile); +int create_data_file(struct rrdengine_datafile *datafile); +int create_new_datafile_pair(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno); +int init_data_files(struct rrdengine_instance *ctx); +void finalize_data_files(struct rrdengine_instance *ctx); + +#endif /* NETDATA_DATAFILE_H */ \ No newline at end of file diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c new file mode 100644 index 0000000..500dd78 --- /dev/null +++ b/database/engine/journalfile.c @@ -0,0 +1,587 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#include "rrdengine.h" + +static void flush_transaction_buffer_cb(uv_fs_t* req) +{ + struct generic_io_descriptor *io_descr = req->data; + struct rrdengine_worker_config* wc = req->loop->data; + struct rrdengine_instance *ctx = wc->ctx; + + debug(D_RRDENGINE, "%s: Journal block was written to disk.", __func__); + if (req->result < 0) { + ++ctx->stats.io_errors; + rrd_stat_atomic_add(&global_io_errors, 1); + error("%s: uv_fs_write: %s", __func__, uv_strerror((int)req->result)); + } else { + debug(D_RRDENGINE, "%s: Journal block was written to disk.", __func__); + } + + uv_fs_req_cleanup(req); + posix_memfree(io_descr->buf); + freez(io_descr); +} + +/* Careful to always call this before creating a new journal file */ +void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc) +{ + struct rrdengine_instance *ctx = wc->ctx; + int ret; + struct generic_io_descriptor *io_descr; + unsigned pos, size; + struct rrdengine_journalfile *journalfile; + + if (unlikely(NULL == ctx->commit_log.buf || 0 == ctx->commit_log.buf_pos)) { + return; + } + /* care with outstanding transactions when switching journal files */ + journalfile = ctx->datafiles.last->journalfile; + + io_descr = mallocz(sizeof(*io_descr)); + pos = ctx->commit_log.buf_pos; + size = ctx->commit_log.buf_size; + if (pos < size) { + /* simulate an empty transaction to skip the rest of the block */ + *(uint8_t *) (ctx->commit_log.buf + pos) = STORE_PADDING; + } + io_descr->buf = ctx->commit_log.buf; + io_descr->bytes = size; + io_descr->pos = journalfile->pos; + io_descr->req.data = io_descr; + io_descr->completion = NULL; + + io_descr->iov = uv_buf_init((void *)io_descr->buf, size); + ret = uv_fs_write(wc->loop, &io_descr->req, journalfile->file, &io_descr->iov, 1, + journalfile->pos, flush_transaction_buffer_cb); + fatal_assert(-1 != ret); + journalfile->pos += RRDENG_BLOCK_SIZE; + ctx->disk_space += RRDENG_BLOCK_SIZE; + ctx->commit_log.buf = NULL; + ctx->stats.io_write_bytes += RRDENG_BLOCK_SIZE; + ++ctx->stats.io_write_requests; +} + +void * wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size) +{ + struct rrdengine_instance *ctx = wc->ctx; + int ret; + unsigned buf_pos = 0, buf_size; + + fatal_assert(size); + if (ctx->commit_log.buf) { + unsigned remaining; + + buf_pos = ctx->commit_log.buf_pos; + buf_size = ctx->commit_log.buf_size; + remaining = buf_size - buf_pos; + if (size > remaining) { + /* we need a new buffer */ + wal_flush_transaction_buffer(wc); + } + } + if (NULL == ctx->commit_log.buf) { + buf_size = ALIGN_BYTES_CEILING(size); + ret = posix_memalign((void *)&ctx->commit_log.buf, RRDFILE_ALIGNMENT, buf_size); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + memset(ctx->commit_log.buf, 0, buf_size); + buf_pos = ctx->commit_log.buf_pos = 0; + ctx->commit_log.buf_size = buf_size; + } + ctx->commit_log.buf_pos += size; + + return ctx->commit_log.buf + buf_pos; +} + +void generate_journalfilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen) +{ + (void) snprintfz(str, maxlen, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION, + datafile->ctx->dbfiles_path, datafile->tier, datafile->fileno); +} + +void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile) +{ + journalfile->file = (uv_file)0; + journalfile->pos = 0; + journalfile->datafile = datafile; +} + +int close_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + int ret; + char path[RRDENG_PATH_MAX]; + + generate_journalfilepath(datafile, path, sizeof(path)); + + ret = uv_fs_close(NULL, &req, journalfile->file, NULL); + if (ret < 0) { + error("uv_fs_close(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + return ret; +} + +int unlink_journal_file(struct rrdengine_journalfile *journalfile) +{ + struct rrdengine_datafile *datafile = journalfile->datafile; + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + int ret; + char path[RRDENG_PATH_MAX]; + + generate_journalfilepath(datafile, path, sizeof(path)); + + ret = uv_fs_unlink(NULL, &req, path, NULL); + if (ret < 0) { + error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ++ctx->stats.journalfile_deletions; + + return ret; +} + +int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + int ret; + char path[RRDENG_PATH_MAX]; + + generate_journalfilepath(datafile, path, sizeof(path)); + + ret = uv_fs_ftruncate(NULL, &req, journalfile->file, 0, NULL); + if (ret < 0) { + error("uv_fs_ftruncate(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ret = uv_fs_close(NULL, &req, journalfile->file, NULL); + if (ret < 0) { + error("uv_fs_close(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ret = uv_fs_unlink(NULL, &req, path, NULL); + if (ret < 0) { + error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + + ++ctx->stats.journalfile_deletions; + + return ret; +} + +int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile) +{ + struct rrdengine_instance *ctx = datafile->ctx; + uv_fs_t req; + uv_file file; + int ret, fd; + struct rrdeng_jf_sb *superblock; + uv_buf_t iov; + char path[RRDENG_PATH_MAX]; + + generate_journalfilepath(datafile, path, sizeof(path)); + fd = open_file_direct_io(path, O_CREAT | O_RDWR | O_TRUNC, &file); + if (fd < 0) { + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + return fd; + } + journalfile->file = file; + ++ctx->stats.journalfile_creations; + + ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + memset(superblock, 0, sizeof(*superblock)); + (void) strncpy(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ); + (void) strncpy(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ); + + iov = uv_buf_init((void *)superblock, sizeof(*superblock)); + + ret = uv_fs_write(NULL, &req, file, &iov, 1, 0, NULL); + if (ret < 0) { + fatal_assert(req.result < 0); + error("uv_fs_write: %s", uv_strerror(ret)); + ++ctx->stats.io_errors; + rrd_stat_atomic_add(&global_io_errors, 1); + } + uv_fs_req_cleanup(&req); + posix_memfree(superblock); + if (ret < 0) { + destroy_journal_file(journalfile, datafile); + return ret; + } + + journalfile->pos = sizeof(*superblock); + ctx->stats.io_write_bytes += sizeof(*superblock); + ++ctx->stats.io_write_requests; + + return 0; +} + +static int check_journal_file_superblock(uv_file file) +{ + int ret; + struct rrdeng_jf_sb *superblock; + uv_buf_t iov; + uv_fs_t req; + + ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + iov = uv_buf_init((void *)superblock, sizeof(*superblock)); + + ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL); + if (ret < 0) { + error("uv_fs_read: %s", uv_strerror(ret)); + uv_fs_req_cleanup(&req); + goto error; + } + fatal_assert(req.result >= 0); + uv_fs_req_cleanup(&req); + + if (strncmp(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ) || + strncmp(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ)) { + error("File has invalid superblock."); + ret = UV_EINVAL; + } else { + ret = 0; + } + error: + posix_memfree(superblock); + return ret; +} + +static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, + void *buf, unsigned max_size) +{ + static BITMAP256 page_error_map; + struct page_cache *pg_cache = &ctx->pg_cache; + unsigned i, count, payload_length, descr_size, valid_pages; + struct rrdeng_page_descr *descr; + struct extent_info *extent; + /* persistent structures */ + struct rrdeng_jf_store_data *jf_metric_data; + + jf_metric_data = buf; + count = jf_metric_data->number_of_pages; + descr_size = sizeof(*jf_metric_data->descr) * count; + payload_length = sizeof(*jf_metric_data) + descr_size; + if (payload_length > max_size) { + error("Corrupted transaction payload."); + return; + } + + extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0])); + extent->offset = jf_metric_data->extent_offset; + extent->size = jf_metric_data->extent_size; + extent->datafile = journalfile->datafile; + extent->next = NULL; + + for (i = 0, valid_pages = 0 ; i < count ; ++i) { + uuid_t *temp_id; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + uint8_t page_type = jf_metric_data->descr[i].type; + + if (page_type > PAGE_TYPE_MAX) { + if (!bitmap256_get_bit(&page_error_map, page_type)) { + error("Unknown page type %d encountered.", page_type); + bitmap256_set_bit(&page_error_map, page_type, 1); + } + continue; + } + uint64_t start_time_ut = jf_metric_data->descr[i].start_time_ut; + uint64_t end_time_ut = jf_metric_data->descr[i].end_time_ut; + size_t entries = jf_metric_data->descr[i].page_length / page_type_size[page_type]; + time_t update_every_s = (entries > 1) ? ((end_time_ut - start_time_ut) / USEC_PER_SEC / (entries - 1)) : 0; + + if (unlikely(start_time_ut > end_time_ut)) { + ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut = end_time_ut; + continue; + } + + if (unlikely(start_time_ut == end_time_ut && entries != 1)) { + ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut = end_time_ut; + continue; + } + + if (unlikely(!entries)) { + ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut = end_time_ut; + continue; + } + + if(entries > 1 && update_every_s == 0) { + ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut = end_time_ut; + continue; + } + + if(start_time_ut + update_every_s * USEC_PER_SEC * (entries - 1) != end_time_ut) { + ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut = end_time_ut; + + // let this be + // end_time_ut = start_time_ut + update_every_s * USEC_PER_SEC * (entries - 1); + } + + temp_id = (uuid_t *)jf_metric_data->descr[i].uuid; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + /* First time we see the UUID */ + uv_rwlock_wrlock(&pg_cache->metrics_index.lock); + PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t), PJE0); + fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */ + *PValue = page_index = create_page_index(temp_id, ctx); + page_index->prev = pg_cache->metrics_index.last_page_index; + pg_cache->metrics_index.last_page_index = page_index; + uv_rwlock_wrunlock(&pg_cache->metrics_index.lock); + } + + descr = pg_cache_create_descr(); + descr->page_length = jf_metric_data->descr[i].page_length; + descr->start_time_ut = start_time_ut; + descr->end_time_ut = end_time_ut; + descr->update_every_s = (update_every_s > 0) ? (uint32_t)update_every_s : (page_index->latest_update_every_s); + descr->id = &page_index->id; + descr->extent = extent; + descr->type = page_type; + extent->pages[valid_pages++] = descr; + pg_cache_insert(ctx, page_index, descr); + + if(page_index->latest_time_ut == descr->end_time_ut) + page_index->latest_update_every_s = descr->update_every_s; + + if(descr->update_every_s == 0) + fatal( + "DBENGINE: page descriptor update every is zero, end_time_ut = %llu, start_time_ut = %llu, entries = %zu", + (unsigned long long)end_time_ut, (unsigned long long)start_time_ut, entries); + } + + extent->number_of_pages = valid_pages; + + if (likely(valid_pages)) + df_extent_insert(extent); + else { + freez(extent); + ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter++; + } +} + +/* + * Replays transaction by interpreting up to max_size bytes from buf. + * Sets id to the current transaction id or to 0 if unknown. + * Returns size of transaction record or 0 for unknown size. + */ +static unsigned replay_transaction(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, + void *buf, uint64_t *id, unsigned max_size) +{ + unsigned payload_length, size_bytes; + int ret; + /* persistent structures */ + struct rrdeng_jf_transaction_header *jf_header; + struct rrdeng_jf_transaction_trailer *jf_trailer; + uLong crc; + + *id = 0; + jf_header = buf; + if (STORE_PADDING == jf_header->type) { + debug(D_RRDENGINE, "Skipping padding."); + return 0; + } + if (sizeof(*jf_header) > max_size) { + error("Corrupted transaction record, skipping."); + return 0; + } + *id = jf_header->id; + payload_length = jf_header->payload_length; + size_bytes = sizeof(*jf_header) + payload_length + sizeof(*jf_trailer); + if (size_bytes > max_size) { + error("Corrupted transaction record, skipping."); + return 0; + } + jf_trailer = buf + sizeof(*jf_header) + payload_length; + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, buf, sizeof(*jf_header) + payload_length); + ret = crc32cmp(jf_trailer->checksum, crc); + debug(D_RRDENGINE, "Transaction %"PRIu64" was read from disk. CRC32 check: %s", *id, ret ? "FAILED" : "SUCCEEDED"); + if (unlikely(ret)) { + error("Transaction %"PRIu64" was read from disk. CRC32 check: FAILED", *id); + return size_bytes; + } + switch (jf_header->type) { + case STORE_DATA: + debug(D_RRDENGINE, "Replaying transaction %"PRIu64"", jf_header->id); + restore_extent_metadata(ctx, journalfile, buf + sizeof(*jf_header), payload_length); + break; + default: + error("Unknown transaction type. Skipping record."); + break; + } + + return size_bytes; +} + + +#define READAHEAD_BYTES (RRDENG_BLOCK_SIZE * 256) +/* + * Iterates journal file transactions and populates the page cache. + * Page cache must already be initialized. + * Returns the maximum transaction id it discovered. + */ +static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile) +{ + uv_file file; + uint64_t file_size;//, data_file_size; + int ret; + uint64_t pos, pos_i, max_id, id; + unsigned size_bytes; + void *buf; + uv_buf_t iov; + uv_fs_t req; + + file = journalfile->file; + file_size = journalfile->pos; + //data_file_size = journalfile->datafile->pos; TODO: utilize this? + + max_id = 1; + bool journal_is_mmapped = (journalfile->data != NULL); + if (unlikely(!journal_is_mmapped)) { + ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); + if (unlikely(ret)) + fatal("posix_memalign:%s", strerror(ret)); + } + else + buf = journalfile->data + sizeof(struct rrdeng_jf_sb); + for (pos = sizeof(struct rrdeng_jf_sb) ; pos < file_size ; pos += READAHEAD_BYTES) { + size_bytes = MIN(READAHEAD_BYTES, file_size - pos); + if (unlikely(!journal_is_mmapped)) { + iov = uv_buf_init(buf, size_bytes); + ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL); + if (ret < 0) { + error("uv_fs_read: pos=%" PRIu64 ", %s", pos, uv_strerror(ret)); + uv_fs_req_cleanup(&req); + goto skip_file; + } + fatal_assert(req.result >= 0); + uv_fs_req_cleanup(&req); + ++ctx->stats.io_read_requests; + ctx->stats.io_read_bytes += size_bytes; + } + + for (pos_i = 0 ; pos_i < size_bytes ; ) { + unsigned max_size; + + max_size = pos + size_bytes - pos_i; + ret = replay_transaction(ctx, journalfile, buf + pos_i, &id, max_size); + if (!ret) /* TODO: support transactions bigger than 4K */ + /* unknown transaction size, move on to the next block */ + pos_i = ALIGN_BYTES_FLOOR(pos_i + RRDENG_BLOCK_SIZE); + else + pos_i += ret; + max_id = MAX(max_id, id); + } + if (likely(journal_is_mmapped)) + buf += size_bytes; + } +skip_file: + if (unlikely(!journal_is_mmapped)) + posix_memfree(buf); + return max_id; +} + +int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, + struct rrdengine_datafile *datafile) +{ + uv_fs_t req; + uv_file file; + int ret, fd, error; + uint64_t file_size, max_id; + char path[RRDENG_PATH_MAX]; + + generate_journalfilepath(datafile, path, sizeof(path)); + fd = open_file_direct_io(path, O_RDWR, &file); + if (fd < 0) { + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + return fd; + } + info("Loading journal file \"%s\".", path); + + ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb)); + if (ret) + goto error; + file_size = ALIGN_BYTES_FLOOR(file_size); + + ret = check_journal_file_superblock(file); + if (ret) + goto error; + ctx->stats.io_read_bytes += sizeof(struct rrdeng_jf_sb); + ++ctx->stats.io_read_requests; + + journalfile->file = file; + journalfile->pos = file_size; + journalfile->data = netdata_mmap(path, file_size, MAP_SHARED, 0); + info("Loading journal file \"%s\" using %s.", path, journalfile->data?"MMAP":"uv_fs_read"); + + max_id = iterate_transactions(ctx, journalfile); + + ctx->commit_log.transaction_id = MAX(ctx->commit_log.transaction_id, max_id + 1); + + info("Journal file \"%s\" loaded (size:%"PRIu64").", path, file_size); + if (likely(journalfile->data)) + netdata_munmap(journalfile->data, file_size); + return 0; + + error: + error = ret; + ret = uv_fs_close(NULL, &req, file, NULL); + if (ret < 0) { + error("uv_fs_close(%s): %s", path, uv_strerror(ret)); + ++ctx->stats.fs_errors; + rrd_stat_atomic_add(&global_fs_errors, 1); + } + uv_fs_req_cleanup(&req); + return error; +} + +void init_commit_log(struct rrdengine_instance *ctx) +{ + ctx->commit_log.buf = NULL; + ctx->commit_log.buf_pos = 0; + ctx->commit_log.transaction_id = 1; +} diff --git a/database/engine/journalfile.h b/database/engine/journalfile.h new file mode 100644 index 0000000..011c506 --- /dev/null +++ b/database/engine/journalfile.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_JOURNALFILE_H +#define NETDATA_JOURNALFILE_H + +#include "rrdengine.h" + +/* Forward declarations */ +struct rrdengine_instance; +struct rrdengine_worker_config; +struct rrdengine_datafile; +struct rrdengine_journalfile; + +#define WALFILE_PREFIX "journalfile-" +#define WALFILE_EXTENSION ".njf" + + +/* only one event loop is supported for now */ +struct rrdengine_journalfile { + uv_file file; + uint64_t pos; + void *data; + struct rrdengine_datafile *datafile; +}; + +/* only one event loop is supported for now */ +struct transaction_commit_log { + uint64_t transaction_id; + + /* outstanding transaction buffer */ + void *buf; + unsigned buf_pos; + unsigned buf_size; +}; + +void generate_journalfilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen); +void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +void *wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size); +void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc); +int close_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +int unlink_journal_file(struct rrdengine_journalfile *journalfile); +int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, + struct rrdengine_datafile *datafile); +void init_commit_log(struct rrdengine_instance *ctx); + + +#endif /* NETDATA_JOURNALFILE_H */ \ No newline at end of file diff --git a/database/engine/metadata_log/README.md b/database/engine/metadata_log/README.md new file mode 100644 index 0000000..e69de29 diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c new file mode 100644 index 0000000..4f5da70 --- /dev/null +++ b/database/engine/pagecache.c @@ -0,0 +1,1313 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#define NETDATA_RRD_INTERNALS + +#include "rrdengine.h" + +ARAL page_descr_aral = { + .requested_element_size = sizeof(struct rrdeng_page_descr), + .initial_elements = 20000, + .filename = "page_descriptors", + .cache_dir = &netdata_configured_cache_dir, + .use_mmap = false, + .internal.initialized = false +}; + +void rrdeng_page_descr_aral_go_singlethreaded(void) { + page_descr_aral.internal.lockless = true; +} +void rrdeng_page_descr_aral_go_multithreaded(void) { + page_descr_aral.internal.lockless = false; +} + +struct rrdeng_page_descr *rrdeng_page_descr_mallocz(void) { + struct rrdeng_page_descr *descr; + descr = arrayalloc_mallocz(&page_descr_aral); + return descr; +} + +void rrdeng_page_descr_freez(struct rrdeng_page_descr *descr) { + arrayalloc_freez(&page_descr_aral, descr); +} + +void rrdeng_page_descr_use_malloc(void) { + if(page_descr_aral.internal.initialized) + error("DBENGINE: cannot change ARAL allocation policy after it has been initialized."); + else + page_descr_aral.use_mmap = false; +} + +void rrdeng_page_descr_use_mmap(void) { + if(page_descr_aral.internal.initialized) + error("DBENGINE: cannot change ARAL allocation policy after it has been initialized."); + else + page_descr_aral.use_mmap = true; +} + +bool rrdeng_page_descr_is_mmap(void) { + return page_descr_aral.use_mmap; +} + +/* Forward declarations */ +static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx); + +/* always inserts into tail */ +static inline void pg_cache_replaceQ_insert_unsafe(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + if (likely(NULL != pg_cache->replaceQ.tail)) { + pg_cache_descr->prev = pg_cache->replaceQ.tail; + pg_cache->replaceQ.tail->next = pg_cache_descr; + } + if (unlikely(NULL == pg_cache->replaceQ.head)) { + pg_cache->replaceQ.head = pg_cache_descr; + } + pg_cache->replaceQ.tail = pg_cache_descr; +} + +static inline void pg_cache_replaceQ_delete_unsafe(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr, *prev, *next; + + prev = pg_cache_descr->prev; + next = pg_cache_descr->next; + + if (likely(NULL != prev)) { + prev->next = next; + } + if (likely(NULL != next)) { + next->prev = prev; + } + if (unlikely(pg_cache_descr == pg_cache->replaceQ.head)) { + pg_cache->replaceQ.head = next; + } + if (unlikely(pg_cache_descr == pg_cache->replaceQ.tail)) { + pg_cache->replaceQ.tail = prev; + } + pg_cache_descr->prev = pg_cache_descr->next = NULL; +} + +void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + uv_rwlock_wrlock(&pg_cache->replaceQ.lock); + pg_cache_replaceQ_insert_unsafe(ctx, descr); + uv_rwlock_wrunlock(&pg_cache->replaceQ.lock); +} + +void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + uv_rwlock_wrlock(&pg_cache->replaceQ.lock); + pg_cache_replaceQ_delete_unsafe(ctx, descr); + uv_rwlock_wrunlock(&pg_cache->replaceQ.lock); +} +void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + uv_rwlock_wrlock(&pg_cache->replaceQ.lock); + pg_cache_replaceQ_delete_unsafe(ctx, descr); + pg_cache_replaceQ_insert_unsafe(ctx, descr); + uv_rwlock_wrunlock(&pg_cache->replaceQ.lock); +} + +struct rrdeng_page_descr *pg_cache_create_descr(void) +{ + struct rrdeng_page_descr *descr; + + descr = rrdeng_page_descr_mallocz(); + descr->page_length = 0; + descr->start_time_ut = INVALID_TIME; + descr->end_time_ut = INVALID_TIME; + descr->id = NULL; + descr->extent = NULL; + descr->pg_cache_descr_state = 0; + descr->pg_cache_descr = NULL; + descr->update_every_s = 0; + + return descr; +} + +/* The caller must hold page descriptor lock. */ +void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + if (pg_cache_descr->waiters) + uv_cond_broadcast(&pg_cache_descr->cond); +} + +void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_wake_up_waiters_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); +} + +/* + * The caller must hold page descriptor lock. + * The lock will be released and re-acquired. The descriptor is not guaranteed + * to exist after this function returns. + */ +void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + ++pg_cache_descr->waiters; + uv_cond_wait(&pg_cache_descr->cond, &pg_cache_descr->mutex); + --pg_cache_descr->waiters; +} + +/* + * The caller must hold page descriptor lock. + * The lock will be released and re-acquired. The descriptor is not guaranteed + * to exist after this function returns. + * Returns UV_ETIMEDOUT if timeout_sec seconds pass. + */ +int pg_cache_timedwait_event_unsafe(struct rrdeng_page_descr *descr, uint64_t timeout_sec) +{ + int ret; + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + ++pg_cache_descr->waiters; + ret = uv_cond_timedwait(&pg_cache_descr->cond, &pg_cache_descr->mutex, timeout_sec * NSEC_PER_SEC); + --pg_cache_descr->waiters; + + return ret; +} + +/* + * Returns page flags. + * The lock will be released and re-acquired. The descriptor is not guaranteed + * to exist after this function returns. + */ +unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + unsigned long flags; + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_wait_event_unsafe(descr); + flags = pg_cache_descr->flags; + rrdeng_page_descr_mutex_unlock(ctx, descr); + + return flags; +} + +/* + * The caller must hold page descriptor lock. + */ +int pg_cache_can_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + if ((pg_cache_descr->flags & (RRD_PAGE_LOCKED | RRD_PAGE_READ_PENDING)) || + (exclusive_access && pg_cache_descr->refcnt)) { + return 0; + } + + return 1; +} + +/* + * The caller must hold page descriptor lock. + * Gets a reference to the page descriptor. + * Returns 1 on success and 0 on failure. + */ +int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + if (!pg_cache_can_get_unsafe(descr, exclusive_access)) + return 0; + + if (exclusive_access) + pg_cache_descr->flags |= RRD_PAGE_LOCKED; + ++pg_cache_descr->refcnt; + + return 1; +} + +/* + * The caller must hold the page descriptor lock. + * This function may block doing cleanup. + */ +void pg_cache_put_unsafe(struct rrdeng_page_descr *descr) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + pg_cache_descr->flags &= ~RRD_PAGE_LOCKED; + if (0 == --pg_cache_descr->refcnt) { + pg_cache_wake_up_waiters_unsafe(descr); + } +} + +/* + * This function may block doing cleanup. + */ +void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_put_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); +} + +/* The caller must hold the page cache lock */ +static void pg_cache_release_pages_unsafe(struct rrdengine_instance *ctx, unsigned number) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + pg_cache->populated_pages -= number; +} + +static void pg_cache_release_pages(struct rrdengine_instance *ctx, unsigned number) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + pg_cache_release_pages_unsafe(ctx, number); + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); +} + +/* + * This function returns the maximum number of pages allowed in the page cache. + */ +unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx) +{ + return ctx->max_cache_pages + (unsigned long)ctx->metric_API_max_producers; +} + +/* + * This function returns the low watermark number of pages in the page cache. The page cache should strive to keep the + * number of pages below that number. + */ +unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx) +{ + return ctx->cache_pages_low_watermark + (unsigned long)ctx->metric_API_max_producers; +} + +/* + * This function returns the maximum number of dirty pages that are committed to be written to disk allowed in the page + * cache. + */ +unsigned long pg_cache_committed_hard_limit(struct rrdengine_instance *ctx) +{ + /* We remove the active pages of the producers from the calculation and only allow the extra pinned pages */ + return ctx->cache_pages_low_watermark + (unsigned long)ctx->metric_API_max_producers; +} + +/* + * This function will block until it reserves #number populated pages. + * It will trigger evictions or dirty page flushing if the pg_cache_hard_limit() limit is hit. + */ +static void pg_cache_reserve_pages(struct rrdengine_instance *ctx, unsigned number) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + unsigned failures = 0; + const unsigned FAILURES_CEILING = 10; /* truncates exponential backoff to (2^FAILURES_CEILING x slot) */ + unsigned long exp_backoff_slot_usec = USEC_PER_MS * 10; + + assert(number < ctx->max_cache_pages); + + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + if (pg_cache->populated_pages + number >= pg_cache_hard_limit(ctx) + 1) + debug(D_RRDENGINE, "==Page cache full. Reserving %u pages.==", + number); + while (pg_cache->populated_pages + number >= pg_cache_hard_limit(ctx) + 1) { + + if (!pg_cache_try_evict_one_page_unsafe(ctx)) { + /* failed to evict */ + struct completion compl; + struct rrdeng_cmd cmd; + + ++failures; + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); + + completion_init(&compl); + cmd.opcode = RRDENG_FLUSH_PAGES; + cmd.completion = &compl; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + /* wait for some pages to be flushed */ + debug(D_RRDENGINE, "%s: waiting for pages to be written to disk before evicting.", __func__); + completion_wait_for(&compl); + completion_destroy(&compl); + + if (unlikely(failures > 1)) { + unsigned long slots, usecs_to_sleep; + /* exponential backoff */ + slots = random() % (2LU << MIN(failures, FAILURES_CEILING)); + usecs_to_sleep = slots * exp_backoff_slot_usec; + + if (usecs_to_sleep >= USEC_PER_SEC) + error("Page cache is full. Sleeping for %llu second(s).", usecs_to_sleep / USEC_PER_SEC); + + (void)sleep_usec(usecs_to_sleep); + } + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + } + } + pg_cache->populated_pages += number; + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); +} + +/* + * This function will attempt to reserve #number populated pages. + * It may trigger evictions if the pg_cache_soft_limit() limit is hit. + * Returns 0 on failure and 1 on success. + */ +static int pg_cache_try_reserve_pages(struct rrdengine_instance *ctx, unsigned number) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + unsigned count = 0; + int ret = 0; + + assert(number < ctx->max_cache_pages); + + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + if (pg_cache->populated_pages + number >= pg_cache_soft_limit(ctx) + 1) { + debug(D_RRDENGINE, + "==Page cache full. Trying to reserve %u pages.==", + number); + do { + if (!pg_cache_try_evict_one_page_unsafe(ctx)) + break; + ++count; + } while (pg_cache->populated_pages + number >= pg_cache_soft_limit(ctx) + 1); + debug(D_RRDENGINE, "Evicted %u pages.", count); + } + + if (pg_cache->populated_pages + number < pg_cache_hard_limit(ctx) + 1) { + pg_cache->populated_pages += number; + ret = 1; /* success */ + } + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); + + return ret; +} + +/* The caller must hold the page cache and the page descriptor locks in that order */ +static void pg_cache_evict_unsafe(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + dbengine_page_free(pg_cache_descr->page); + pg_cache_descr->page = NULL; + pg_cache_descr->flags &= ~RRD_PAGE_POPULATED; + pg_cache_release_pages_unsafe(ctx, 1); + ++ctx->stats.pg_cache_evictions; +} + +/* + * The caller must hold the page cache lock. + * Lock order: page cache -> replaceQ -> page descriptor + * This function iterates all pages and tries to evict one. + * If it fails it sets in_flight_descr to the oldest descriptor that has write-back in progress, + * or it sets it to NULL if no write-back is in progress. + * + * Returns 1 on success and 0 on failure. + */ +static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + unsigned long old_flags; + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr = NULL; + + uv_rwlock_wrlock(&pg_cache->replaceQ.lock); + for (pg_cache_descr = pg_cache->replaceQ.head ; NULL != pg_cache_descr ; pg_cache_descr = pg_cache_descr->next) { + descr = pg_cache_descr->descr; + + rrdeng_page_descr_mutex_lock(ctx, descr); + old_flags = pg_cache_descr->flags; + if ((old_flags & RRD_PAGE_POPULATED) && !(old_flags & RRD_PAGE_DIRTY) && pg_cache_try_get_unsafe(descr, 1)) { + /* must evict */ + pg_cache_evict_unsafe(ctx, descr); + pg_cache_put_unsafe(descr); + pg_cache_replaceQ_delete_unsafe(ctx, descr); + + rrdeng_page_descr_mutex_unlock(ctx, descr); + uv_rwlock_wrunlock(&pg_cache->replaceQ.lock); + + rrdeng_try_deallocate_pg_cache_descr(ctx, descr); + + return 1; + } + rrdeng_page_descr_mutex_unlock(ctx, descr); + } + uv_rwlock_wrunlock(&pg_cache->replaceQ.lock); + + /* failed to evict */ + return 0; +} + +/** + * Deletes a page from the database. + * Callers of this function need to make sure they're not deleting the same descriptor concurrently. + * @param ctx is the database instance. + * @param descr is the page descriptor. + * @param remove_dirty must be non-zero if the page to be deleted is dirty. + * @param is_exclusive_holder must be non-zero if the caller holds an exclusive page reference. + * @param metric_id is set to the metric the page belongs to, if it's safe to delete the metric and metric_id is not + * NULL. Otherwise, metric_id is not set. + * @return 1 if it's safe to delete the metric, 0 otherwise. + */ +uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, uint8_t remove_dirty, + uint8_t is_exclusive_holder, uuid_t *metric_id) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct page_cache_descr *pg_cache_descr = NULL; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + int ret; + uint8_t can_delete_metric = 0; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, descr->id, sizeof(uuid_t)); + fatal_assert(NULL != PValue); + page_index = *PValue; + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + + uv_rwlock_wrlock(&page_index->lock); + ret = JudyLDel(&page_index->JudyL_array, (Word_t)(descr->start_time_ut / USEC_PER_SEC), PJE0); + if (unlikely(0 == ret)) { + uv_rwlock_wrunlock(&page_index->lock); + if (unlikely(debug_flags & D_RRDENGINE)) { + print_page_descr(descr); + } + goto destroy; + } + --page_index->page_count; + if (!page_index->writers && !page_index->page_count) { + can_delete_metric = 1; + if (metric_id) { + memcpy(metric_id, page_index->id, sizeof(uuid_t)); + } + } + uv_rwlock_wrunlock(&page_index->lock); + fatal_assert(1 == ret); + + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + ++ctx->stats.pg_cache_deletions; + --pg_cache->page_descriptors; + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + if (!is_exclusive_holder) { + /* If we don't hold an exclusive page reference get one */ + while (!pg_cache_try_get_unsafe(descr, 1)) { + debug(D_RRDENGINE, "%s: Waiting for locked page:", __func__); + if (unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + pg_cache_wait_event_unsafe(descr); + } + } + if (remove_dirty) { + pg_cache_descr->flags &= ~RRD_PAGE_DIRTY; + } else { + /* even a locked page could be dirty */ + while (unlikely(pg_cache_descr->flags & RRD_PAGE_DIRTY)) { + debug(D_RRDENGINE, "%s: Found dirty page, waiting for it to be flushed:", __func__); + if (unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + pg_cache_wait_event_unsafe(descr); + } + } + rrdeng_page_descr_mutex_unlock(ctx, descr); + + while (unlikely(pg_cache_descr->flags & RRD_PAGE_READ_PENDING)) { + error_limit_static_global_var(erl, 1, 0); + error_limit(&erl, "%s: Found page with READ PENDING, waiting for read to complete", __func__); + if (unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + pg_cache_wait_event_unsafe(descr); + } + + if (pg_cache_descr->flags & RRD_PAGE_POPULATED) { + /* only after locking can it be safely deleted from LRU */ + pg_cache_replaceQ_delete(ctx, descr); + + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + pg_cache_evict_unsafe(ctx, descr); + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); + } + pg_cache_put(ctx, descr); + rrdeng_try_deallocate_pg_cache_descr(ctx, descr); + while (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) { + rrdeng_try_deallocate_pg_cache_descr(ctx, descr); /* spin */ + (void)sleep_usec(1000); /* 1 msec */ + } +destroy: + rrdeng_page_descr_freez(descr); + pg_cache_update_metric_times(page_index); + + return can_delete_metric; +} + +static inline int is_page_in_time_range(struct rrdeng_page_descr *descr, usec_t start_time, usec_t end_time) +{ + usec_t pg_start, pg_end; + + pg_start = descr->start_time_ut; + pg_end = descr->end_time_ut; + + return (pg_start < start_time && pg_end >= start_time) || + (pg_start >= start_time && pg_start <= end_time); +} + +static inline int is_point_in_time_in_page(struct rrdeng_page_descr *descr, usec_t point_in_time) +{ + return (point_in_time >= descr->start_time_ut && point_in_time <= descr->end_time_ut); +} + +/* The caller must hold the page index lock */ +static inline struct rrdeng_page_descr * + find_first_page_in_time_range(struct pg_cache_page_index *page_index, usec_t start_time, usec_t end_time) +{ + struct rrdeng_page_descr *descr = NULL; + Pvoid_t *PValue; + Word_t Index; + + Index = (Word_t)(start_time / USEC_PER_SEC); + PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0); + if (likely(NULL != PValue)) { + descr = *PValue; + if (is_page_in_time_range(descr, start_time, end_time)) { + return descr; + } + } + + Index = (Word_t)(start_time / USEC_PER_SEC); + PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0); + if (likely(NULL != PValue)) { + descr = *PValue; + if (is_page_in_time_range(descr, start_time, end_time)) { + return descr; + } + } + + return NULL; +} + +/* Update metric oldest and latest timestamps efficiently when adding new values */ +void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr) +{ + usec_t oldest_time = page_index->oldest_time_ut; + usec_t latest_time = page_index->latest_time_ut; + + if (unlikely(oldest_time == INVALID_TIME || descr->start_time_ut < oldest_time)) { + page_index->oldest_time_ut = descr->start_time_ut; + } + if (likely(descr->end_time_ut > latest_time || latest_time == INVALID_TIME)) { + page_index->latest_time_ut = descr->end_time_ut; + } +} + +/* Update metric oldest and latest timestamps when removing old values */ +void pg_cache_update_metric_times(struct pg_cache_page_index *page_index) +{ + Pvoid_t *firstPValue, *lastPValue; + Word_t firstIndex, lastIndex; + struct rrdeng_page_descr *descr; + usec_t oldest_time = INVALID_TIME; + usec_t latest_time = INVALID_TIME; + + uv_rwlock_rdlock(&page_index->lock); + /* Find first page in range */ + firstIndex = (Word_t)0; + firstPValue = JudyLFirst(page_index->JudyL_array, &firstIndex, PJE0); + if (likely(NULL != firstPValue)) { + descr = *firstPValue; + oldest_time = descr->start_time_ut; + } + lastIndex = (Word_t)-1; + lastPValue = JudyLLast(page_index->JudyL_array, &lastIndex, PJE0); + if (likely(NULL != lastPValue)) { + descr = *lastPValue; + latest_time = descr->end_time_ut; + } + uv_rwlock_rdunlock(&page_index->lock); + + if (unlikely(NULL == firstPValue)) { + fatal_assert(NULL == lastPValue); + page_index->oldest_time_ut = page_index->latest_time_ut = INVALID_TIME; + return; + } + page_index->oldest_time_ut = oldest_time; + page_index->latest_time_ut = latest_time; +} + +/* If index is NULL lookup by UUID (descr->id) */ +void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, + struct rrdeng_page_descr *descr) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index; + unsigned long pg_cache_descr_state = descr->pg_cache_descr_state; + + if (0 != pg_cache_descr_state) { + /* there is page cache descriptor pre-allocated state */ + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + + fatal_assert(pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED); + if (pg_cache_descr->flags & RRD_PAGE_POPULATED) { + pg_cache_reserve_pages(ctx, 1); + if (!(pg_cache_descr->flags & RRD_PAGE_DIRTY)) + pg_cache_replaceQ_insert(ctx, descr); + } + } + + if (unlikely(NULL == index)) { + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, descr->id, sizeof(uuid_t)); + fatal_assert(NULL != PValue); + page_index = *PValue; + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + } else { + page_index = index; + } + + uv_rwlock_wrlock(&page_index->lock); + PValue = JudyLIns(&page_index->JudyL_array, (Word_t)(descr->start_time_ut / USEC_PER_SEC), PJE0); + *PValue = descr; + ++page_index->page_count; + pg_cache_add_new_metric_time(page_index, descr); + uv_rwlock_wrunlock(&page_index->lock); + + uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); + ++ctx->stats.pg_cache_insertions; + ++pg_cache->page_descriptors; + uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); +} + +usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time_ut, usec_t end_time_ut) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + return INVALID_TIME; + } + + uv_rwlock_rdlock(&page_index->lock); + descr = find_first_page_in_time_range(page_index, start_time_ut, end_time_ut); + if (NULL == descr) { + uv_rwlock_rdunlock(&page_index->lock); + return INVALID_TIME; + } + uv_rwlock_rdunlock(&page_index->lock); + return descr->start_time_ut; +} + +/** + * Return page information for the first page before point_in_time that satisfies the filter. + * @param ctx DB context + * @param page_index page index of a metric + * @param point_in_time_ut the pages that are searched must be older than this timestamp + * @param filter decides if the page satisfies the caller's criteria + * @param page_info the result of the search is set in this pointer + */ +void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, + usec_t point_in_time_ut, pg_cache_page_info_filter_t *filter, + struct rrdeng_page_info *page_info) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + Pvoid_t *PValue; + Word_t Index; + + (void)pg_cache; + fatal_assert(NULL != page_index); + + Index = (Word_t)(point_in_time_ut / USEC_PER_SEC); + uv_rwlock_rdlock(&page_index->lock); + do { + PValue = JudyLPrev(page_index->JudyL_array, &Index, PJE0); + descr = unlikely(NULL == PValue) ? NULL : *PValue; + } while (descr != NULL && !filter(descr)); + if (unlikely(NULL == descr)) { + page_info->page_length = 0; + page_info->start_time_ut = INVALID_TIME; + page_info->end_time_ut = INVALID_TIME; + } else { + page_info->page_length = descr->page_length; + page_info->start_time_ut = descr->start_time_ut; + page_info->end_time_ut = descr->end_time_ut; + } + uv_rwlock_rdunlock(&page_index->lock); +} + +/** + * Searches for an unallocated page without triggering disk I/O. Attempts to reserve the page and get a reference. + * @param ctx DB context + * @param id lookup by UUID + * @param start_time_ut exact starting time in usec + * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID. + * @return the page descriptor or NULL on failure. It can fail if: + * 1. The page is already allocated to the page cache. + * 2. It did not succeed to get a reference. + * 3. It did not succeed to reserve a spot in the page cache. + */ +struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id, + usec_t start_time_ut) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + struct page_cache_descr *pg_cache_descr = NULL; + unsigned long flags; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + Word_t Index; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + + if ((NULL == PValue) || !pg_cache_try_reserve_pages(ctx, 1)) { + /* Failed to find page or failed to reserve a spot in the cache */ + return NULL; + } + + uv_rwlock_rdlock(&page_index->lock); + Index = (Word_t)(start_time_ut / USEC_PER_SEC); + PValue = JudyLGet(page_index->JudyL_array, Index, PJE0); + if (likely(NULL != PValue)) { + descr = *PValue; + } + if (NULL == PValue || 0 == descr->page_length) { + /* Failed to find non-empty page */ + uv_rwlock_rdunlock(&page_index->lock); + + pg_cache_release_pages(ctx, 1); + return NULL; + } + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + flags = pg_cache_descr->flags; + uv_rwlock_rdunlock(&page_index->lock); + + if ((flags & RRD_PAGE_POPULATED) || !pg_cache_try_get_unsafe(descr, 1)) { + /* Failed to get reference or page is already populated */ + rrdeng_page_descr_mutex_unlock(ctx, descr); + + pg_cache_release_pages(ctx, 1); + return NULL; + } + /* success */ + rrdeng_page_descr_mutex_unlock(ctx, descr); + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + + return descr; +} + +/** + * Searches for pages in a time range and triggers disk I/O if necessary and possible. + * Does not get a reference. + * @param ctx DB context + * @param id UUID + * @param start_time_ut inclusive starting time in usec + * @param end_time_ut inclusive ending time in usec + * @param page_info_arrayp It allocates (*page_arrayp) and populates it with information of pages that overlap + * with the time range [start_time,end_time]. The caller must free (*page_info_arrayp) with freez(). + * If page_info_arrayp is set to NULL nothing was allocated. + * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID. + * @return the number of pages that overlap with the time range [start_time,end_time]. + */ +unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time_ut, usec_t end_time_ut, + struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL, *preload_array[PAGE_CACHE_MAX_PRELOAD_PAGES]; + struct page_cache_descr *pg_cache_descr = NULL; + unsigned i, j, k, preload_count, count, page_info_array_max_size; + unsigned long flags; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + Word_t Index; + uint8_t failed_to_reserve; + + fatal_assert(NULL != ret_page_indexp); + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + *ret_page_indexp = page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__); + *ret_page_indexp = NULL; + return 0; + } + + uv_rwlock_rdlock(&page_index->lock); + descr = find_first_page_in_time_range(page_index, start_time_ut, end_time_ut); + if (NULL == descr) { + uv_rwlock_rdunlock(&page_index->lock); + debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__); + *ret_page_indexp = NULL; + return 0; + } else { + Index = (Word_t)(descr->start_time_ut / USEC_PER_SEC); + } + if (page_info_arrayp) { + page_info_array_max_size = PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info); + *page_info_arrayp = mallocz(page_info_array_max_size); + } + + for (count = 0, preload_count = 0 ; + descr != NULL && is_page_in_time_range(descr, start_time_ut, end_time_ut) ; + PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0), + descr = unlikely(NULL == PValue) ? NULL : *PValue) { + /* Iterate all pages in range */ + + if (unlikely(0 == descr->page_length)) + continue; + if (page_info_arrayp) { + if (unlikely(count >= page_info_array_max_size / sizeof(struct rrdeng_page_info))) { + page_info_array_max_size += PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info); + *page_info_arrayp = reallocz(*page_info_arrayp, page_info_array_max_size); + } + (*page_info_arrayp)[count].start_time_ut = descr->start_time_ut; + (*page_info_arrayp)[count].end_time_ut = descr->end_time_ut; + (*page_info_arrayp)[count].page_length = descr->page_length; + } + ++count; + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + flags = pg_cache_descr->flags; + if (pg_cache_can_get_unsafe(descr, 0)) { + if (flags & RRD_PAGE_POPULATED) { + /* success */ + rrdeng_page_descr_mutex_unlock(ctx, descr); + debug(D_RRDENGINE, "%s: Page was found in memory.", __func__); + continue; + } + } + if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) { + preload_array[preload_count++] = descr; + if (PAGE_CACHE_MAX_PRELOAD_PAGES == preload_count) { + rrdeng_page_descr_mutex_unlock(ctx, descr); + break; + } + } + rrdeng_page_descr_mutex_unlock(ctx, descr); + + } + uv_rwlock_rdunlock(&page_index->lock); + + failed_to_reserve = 0; + for (i = 0 ; i < preload_count && !failed_to_reserve ; ++i) { + struct rrdeng_cmd cmd; + struct rrdeng_page_descr *next; + + descr = preload_array[i]; + if (NULL == descr) { + continue; + } + if (!pg_cache_try_reserve_pages(ctx, 1)) { + failed_to_reserve = 1; + break; + } + cmd.opcode = RRDENG_READ_EXTENT; + cmd.read_extent.page_cache_descr[0] = descr; + /* don't use this page again */ + preload_array[i] = NULL; + for (j = 0, k = 1 ; j < preload_count ; ++j) { + next = preload_array[j]; + if (NULL == next) { + continue; + } + if (descr->extent == next->extent) { + /* same extent, consolidate */ + if (!pg_cache_try_reserve_pages(ctx, 1)) { + failed_to_reserve = 1; + break; + } + cmd.read_extent.page_cache_descr[k++] = next; + /* don't use this page again */ + preload_array[j] = NULL; + } + } + cmd.read_extent.page_count = k; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + } + if (failed_to_reserve) { + debug(D_RRDENGINE, "%s: Failed to reserve enough memory, canceling I/O.", __func__); + for (i = 0 ; i < preload_count ; ++i) { + descr = preload_array[i]; + if (NULL == descr) { + continue; + } + pg_cache_put(ctx, descr); + } + } + if (!preload_count) { + /* no such page */ + debug(D_RRDENGINE, "%s: No page was eligible to attempt preload.", __func__); + } + if (unlikely(0 == count && page_info_arrayp)) { + freez(*page_info_arrayp); + *page_info_arrayp = NULL; + } + return count; +} + +/* + * Searches for a page and gets a reference. + * When point_in_time is INVALID_TIME get any page. + * If index is NULL lookup by UUID (id). + */ +struct rrdeng_page_descr * + pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, + usec_t point_in_time_ut) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + struct page_cache_descr *pg_cache_descr = NULL; + unsigned long flags; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + Word_t Index; + uint8_t page_not_in_cache; + + if (unlikely(NULL == index)) { + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + return NULL; + } + } else { + page_index = index; + } + pg_cache_reserve_pages(ctx, 1); + + page_not_in_cache = 0; + uv_rwlock_rdlock(&page_index->lock); + while (1) { + Index = (Word_t)(point_in_time_ut / USEC_PER_SEC); + PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0); + if (likely(NULL != PValue)) { + descr = *PValue; + } + if (NULL == PValue || + 0 == descr->page_length || + (INVALID_TIME != point_in_time_ut && + !is_point_in_time_in_page(descr, point_in_time_ut))) { + /* non-empty page not found */ + uv_rwlock_rdunlock(&page_index->lock); + + pg_cache_release_pages(ctx, 1); + return NULL; + } + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + flags = pg_cache_descr->flags; + if ((flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 0)) { + /* success */ + rrdeng_page_descr_mutex_unlock(ctx, descr); + debug(D_RRDENGINE, "%s: Page was found in memory.", __func__); + break; + } + if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) { + struct rrdeng_cmd cmd; + + uv_rwlock_rdunlock(&page_index->lock); + + cmd.opcode = RRDENG_READ_PAGE; + cmd.read_page.page_cache_descr = descr; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + + debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__); + if(unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) { + pg_cache_wait_event_unsafe(descr); + } + /* success */ + /* Downgrade exclusive reference to allow other readers */ + pg_cache_descr->flags &= ~RRD_PAGE_LOCKED; + pg_cache_wake_up_waiters_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + return descr; + } + uv_rwlock_rdunlock(&page_index->lock); + debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__); + if(unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + if (!(flags & RRD_PAGE_POPULATED)) + page_not_in_cache = 1; + pg_cache_wait_event_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); + + /* reset scan to find again */ + uv_rwlock_rdlock(&page_index->lock); + } + uv_rwlock_rdunlock(&page_index->lock); + + if (!(flags & RRD_PAGE_DIRTY)) + pg_cache_replaceQ_set_hot(ctx, descr); + pg_cache_release_pages(ctx, 1); + if (page_not_in_cache) + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + else + rrd_stat_atomic_add(&ctx->stats.pg_cache_hits, 1); + return descr; +} + +/* + * Searches for the first page between start_time and end_time and gets a reference. + * start_time and end_time are inclusive. + * If index is NULL lookup by UUID (id). + */ +struct rrdeng_page_descr * +pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, + usec_t start_time_ut, usec_t end_time_ut) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + struct page_cache_descr *pg_cache_descr = NULL; + unsigned long flags; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + uint8_t page_not_in_cache; + + if (unlikely(NULL == index)) { + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + return NULL; + } + } else { + page_index = index; + } + pg_cache_reserve_pages(ctx, 1); + + page_not_in_cache = 0; + uv_rwlock_rdlock(&page_index->lock); + int retry_count = 0; + while (1) { + descr = find_first_page_in_time_range(page_index, start_time_ut, end_time_ut); + if (NULL == descr || 0 == descr->page_length || retry_count == default_rrdeng_page_fetch_retries) { + /* non-empty page not found */ + if (retry_count == default_rrdeng_page_fetch_retries) + error_report("Page cache timeout while waiting for page %p : returning FAIL", descr); + uv_rwlock_rdunlock(&page_index->lock); + + pg_cache_release_pages(ctx, 1); + return NULL; + } + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + flags = pg_cache_descr->flags; + if ((flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 0)) { + /* success */ + rrdeng_page_descr_mutex_unlock(ctx, descr); + debug(D_RRDENGINE, "%s: Page was found in memory.", __func__); + break; + } + if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) { + struct rrdeng_cmd cmd; + + uv_rwlock_rdunlock(&page_index->lock); + + cmd.opcode = RRDENG_READ_PAGE; + cmd.read_page.page_cache_descr = descr; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + + debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__); + if(unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) { + pg_cache_wait_event_unsafe(descr); + } + /* success */ + /* Downgrade exclusive reference to allow other readers */ + pg_cache_descr->flags &= ~RRD_PAGE_LOCKED; + pg_cache_wake_up_waiters_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + return descr; + } + uv_rwlock_rdunlock(&page_index->lock); + debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__); + if(unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + if (!(flags & RRD_PAGE_POPULATED)) + page_not_in_cache = 1; + + if (pg_cache_timedwait_event_unsafe(descr, default_rrdeng_page_fetch_timeout) == UV_ETIMEDOUT) { + error_report("Page cache timeout while waiting for page %p : retry count = %d", descr, retry_count); + ++retry_count; + } + rrdeng_page_descr_mutex_unlock(ctx, descr); + + /* reset scan to find again */ + uv_rwlock_rdlock(&page_index->lock); + } + uv_rwlock_rdunlock(&page_index->lock); + + if (!(flags & RRD_PAGE_DIRTY)) + pg_cache_replaceQ_set_hot(ctx, descr); + pg_cache_release_pages(ctx, 1); + if (page_not_in_cache) + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + else + rrd_stat_atomic_add(&ctx->stats.pg_cache_hits, 1); + return descr; +} + +struct pg_cache_page_index *create_page_index(uuid_t *id, struct rrdengine_instance *ctx) +{ + struct pg_cache_page_index *page_index; + + page_index = mallocz(sizeof(*page_index)); + page_index->JudyL_array = (Pvoid_t) NULL; + uuid_copy(page_index->id, *id); + fatal_assert(0 == uv_rwlock_init(&page_index->lock)); + page_index->oldest_time_ut = INVALID_TIME; + page_index->latest_time_ut = INVALID_TIME; + page_index->prev = NULL; + page_index->page_count = 0; + page_index->refcount = 0; + page_index->writers = 0; + page_index->ctx = ctx; + page_index->latest_update_every_s = default_rrd_update_every; + + return page_index; +} + +static void init_metrics_index(struct rrdengine_instance *ctx) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + pg_cache->metrics_index.JudyHS_array = (Pvoid_t) NULL; + pg_cache->metrics_index.last_page_index = NULL; + fatal_assert(0 == uv_rwlock_init(&pg_cache->metrics_index.lock)); +} + +static void init_replaceQ(struct rrdengine_instance *ctx) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + pg_cache->replaceQ.head = NULL; + pg_cache->replaceQ.tail = NULL; + fatal_assert(0 == uv_rwlock_init(&pg_cache->replaceQ.lock)); +} + +static void init_committed_page_index(struct rrdengine_instance *ctx) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + pg_cache->committed_page_index.JudyL_array = (Pvoid_t) NULL; + fatal_assert(0 == uv_rwlock_init(&pg_cache->committed_page_index.lock)); + pg_cache->committed_page_index.latest_corr_id = 0; + pg_cache->committed_page_index.nr_committed_pages = 0; +} + +void init_page_cache(struct rrdengine_instance *ctx) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + + pg_cache->page_descriptors = 0; + pg_cache->populated_pages = 0; + fatal_assert(0 == uv_rwlock_init(&pg_cache->pg_cache_rwlock)); + + init_metrics_index(ctx); + init_replaceQ(ctx); + init_committed_page_index(ctx); +} + +void free_page_cache(struct rrdengine_instance *ctx) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index, *prev_page_index; + Word_t Index; + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + + // if we are exiting, the OS will recover all memory so do not slow down the shutdown process + // Do the cleanup if we are compiling with NETDATA_INTERNAL_CHECKS + // This affects the reporting of dbengine statistics which are available in real time + // via the /api/v1/dbengine_stats endpoint +#ifndef NETDATA_DBENGINE_FREE + if (netdata_exit) + return; +#endif + Word_t metrics_index_bytes = 0, pages_index_bytes = 0, pages_dirty_index_bytes = 0; + + /* Free committed page index */ + pages_dirty_index_bytes = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0); + fatal_assert(NULL == pg_cache->committed_page_index.JudyL_array); + + for (page_index = pg_cache->metrics_index.last_page_index ; + page_index != NULL ; + page_index = prev_page_index) { + + prev_page_index = page_index->prev; + + /* Find first page in range */ + Index = (Word_t) 0; + PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0); + descr = unlikely(NULL == PValue) ? NULL : *PValue; + + while (descr != NULL) { + /* Iterate all page descriptors of this metric */ + + if (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) { + /* Check rrdenglocking.c */ + pg_cache_descr = descr->pg_cache_descr; + if (pg_cache_descr->flags & RRD_PAGE_POPULATED) { + dbengine_page_free(pg_cache_descr->page); + } + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + } + rrdeng_page_descr_freez(descr); + + PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0); + descr = unlikely(NULL == PValue) ? NULL : *PValue; + } + + /* Free page index */ + pages_index_bytes += JudyLFreeArray(&page_index->JudyL_array, PJE0); + fatal_assert(NULL == page_index->JudyL_array); + freez(page_index); + } + /* Free metrics index */ + metrics_index_bytes = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0); + fatal_assert(NULL == pg_cache->metrics_index.JudyHS_array); + info("Freed %lu bytes of memory from page cache.", pages_dirty_index_bytes + pages_index_bytes + metrics_index_bytes); +} diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h new file mode 100644 index 0000000..635b021 --- /dev/null +++ b/database/engine/pagecache.h @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PAGECACHE_H +#define NETDATA_PAGECACHE_H + +#include "rrdengine.h" + +/* Forward declarations */ +struct rrdengine_instance; +struct extent_info; +struct rrdeng_page_descr; + +#define INVALID_TIME (0) +#define MAX_PAGE_CACHE_FETCH_RETRIES (3) +#define PAGE_CACHE_FETCH_WAIT_TIMEOUT (3) + +/* Page flags */ +#define RRD_PAGE_DIRTY (1LU << 0) +#define RRD_PAGE_LOCKED (1LU << 1) +#define RRD_PAGE_READ_PENDING (1LU << 2) +#define RRD_PAGE_WRITE_PENDING (1LU << 3) +#define RRD_PAGE_POPULATED (1LU << 4) + +struct page_cache_descr { + struct rrdeng_page_descr *descr; /* parent descriptor */ + void *page; + unsigned long flags; + struct page_cache_descr *prev; /* LRU */ + struct page_cache_descr *next; /* LRU */ + + unsigned refcnt; + uv_mutex_t mutex; /* always take it after the page cache lock or after the commit lock */ + uv_cond_t cond; + unsigned waiters; +}; + +/* Page cache descriptor flags, state = 0 means no descriptor */ +#define PG_CACHE_DESCR_ALLOCATED (1LU << 0) +#define PG_CACHE_DESCR_DESTROY (1LU << 1) +#define PG_CACHE_DESCR_LOCKED (1LU << 2) +#define PG_CACHE_DESCR_SHIFT (3) +#define PG_CACHE_DESCR_USERS_MASK (((unsigned long)-1) << PG_CACHE_DESCR_SHIFT) +#define PG_CACHE_DESCR_FLAGS_MASK (((unsigned long)-1) >> (BITS_PER_ULONG - PG_CACHE_DESCR_SHIFT)) + +/* + * Page cache descriptor state bits (works for both 32-bit and 64-bit architectures): + * + * 63 ... 31 ... 3 | 2 | 1 | 0| + * -----------------------------+------------+------------+-----------| + * number of descriptor users | DESTROY | LOCKED | ALLOCATED | + */ +struct rrdeng_page_descr { + uuid_t *id; /* never changes */ + struct extent_info *extent; + + /* points to ephemeral page cache descriptor if the page resides in the cache */ + struct page_cache_descr *pg_cache_descr; + + /* Compare-And-Swap target for page cache descriptor allocation algorithm */ + volatile unsigned long pg_cache_descr_state; + + /* page information */ + usec_t start_time_ut; + usec_t end_time_ut; + uint32_t update_every_s:24; + uint8_t type; + uint32_t page_length; +}; + +#define PAGE_INFO_SCRATCH_SZ (8) +struct rrdeng_page_info { + uint8_t scratch[PAGE_INFO_SCRATCH_SZ]; /* scratch area to be used by page-cache users */ + + usec_t start_time_ut; + usec_t end_time_ut; + uint32_t page_length; +}; + +/* returns 1 for success, 0 for failure */ +typedef int pg_cache_page_info_filter_t(struct rrdeng_page_descr *); + +#define PAGE_CACHE_MAX_PRELOAD_PAGES (256) + +struct pg_alignment { + uint32_t page_length; + uint32_t refcount; +}; + +/* maps time ranges to pages */ +struct pg_cache_page_index { + uuid_t id; + /* + * care: JudyL_array indices are converted from useconds to seconds to fit in one word in 32-bit architectures + * TODO: examine if we want to support better granularity than seconds + */ + Pvoid_t JudyL_array; + Word_t page_count; + unsigned short refcount; + unsigned short writers; + uv_rwlock_t lock; + + /* + * Only one effective writer, data deletion workqueue. + * It's also written during the DB loading phase. + */ + usec_t oldest_time_ut; + + /* + * Only one effective writer, data collection thread. + * It's also written by the data deletion workqueue when data collection is disabled for this metric. + */ + usec_t latest_time_ut; + + struct rrdengine_instance *ctx; + uint32_t latest_update_every_s; + + struct pg_cache_page_index *prev; +}; + +/* maps UUIDs to page indices */ +struct pg_cache_metrics_index { + uv_rwlock_t lock; + Pvoid_t JudyHS_array; + struct pg_cache_page_index *last_page_index; +}; + +/* gathers dirty pages to be written on disk */ +struct pg_cache_committed_page_index { + uv_rwlock_t lock; + + Pvoid_t JudyL_array; + + /* + * Dirty page correlation ID is a hint. Dirty pages that are correlated should have + * a small correlation ID difference. Dirty pages in memory should never have the + * same ID at the same time for correctness. + */ + Word_t latest_corr_id; + + unsigned nr_committed_pages; +}; + +/* + * Gathers populated pages to be evicted. + * Relies on page cache descriptors being there as it uses their memory. + */ +struct pg_cache_replaceQ { + uv_rwlock_t lock; /* LRU lock */ + + struct page_cache_descr *head; /* LRU */ + struct page_cache_descr *tail; /* MRU */ +}; + +struct page_cache { /* TODO: add statistics */ + uv_rwlock_t pg_cache_rwlock; /* page cache lock */ + + struct pg_cache_metrics_index metrics_index; + struct pg_cache_committed_page_index committed_page_index; + struct pg_cache_replaceQ replaceQ; + + unsigned page_descriptors; + unsigned populated_pages; +}; + +void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr); +void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr); +unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr); +void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr); +void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx, + struct rrdeng_page_descr *descr); +struct rrdeng_page_descr *pg_cache_create_descr(void); +int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access); +void pg_cache_put_unsafe(struct rrdeng_page_descr *descr); +void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, + struct rrdeng_page_descr *descr); +uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, + uint8_t remove_dirty, uint8_t is_exclusive_holder, uuid_t *metric_id); +usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, + usec_t start_time_ut, usec_t end_time_ut); +void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, + usec_t point_in_time_ut, pg_cache_page_info_filter_t *filter, + struct rrdeng_page_info *page_info); +struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id, + usec_t start_time_ut); +unsigned + pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time_ut, usec_t end_time_ut, + struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp); +struct rrdeng_page_descr * + pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, + usec_t point_in_time_ut); +struct rrdeng_page_descr * + pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, + usec_t start_time_ut, usec_t end_time_ut); +struct pg_cache_page_index *create_page_index(uuid_t *id, struct rrdengine_instance *ctx); +void init_page_cache(struct rrdengine_instance *ctx); +void free_page_cache(struct rrdengine_instance *ctx); +void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr); +void pg_cache_update_metric_times(struct pg_cache_page_index *page_index); +unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx); +unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx); +unsigned long pg_cache_committed_hard_limit(struct rrdengine_instance *ctx); + +void rrdeng_page_descr_aral_go_singlethreaded(void); +void rrdeng_page_descr_aral_go_multithreaded(void); +void rrdeng_page_descr_use_malloc(void); +void rrdeng_page_descr_use_mmap(void); +bool rrdeng_page_descr_is_mmap(void); +struct rrdeng_page_descr *rrdeng_page_descr_mallocz(void); +void rrdeng_page_descr_freez(struct rrdeng_page_descr *descr); + +static inline void + pg_cache_atomic_get_pg_info(struct rrdeng_page_descr *descr, usec_t *end_time_ut_p, uint32_t *page_lengthp) +{ + usec_t end_time_ut, old_end_time_ut; + uint32_t page_length; + + if (NULL == descr->extent) { + /* this page is currently being modified, get consistent info locklessly */ + do { + end_time_ut = descr->end_time_ut; + __sync_synchronize(); + old_end_time_ut = end_time_ut; + page_length = descr->page_length; + __sync_synchronize(); + end_time_ut = descr->end_time_ut; + __sync_synchronize(); + } while ((end_time_ut != old_end_time_ut || (end_time_ut & 1) != 0)); + + *end_time_ut_p = end_time_ut; + *page_lengthp = page_length; + } else { + *end_time_ut_p = descr->end_time_ut; + *page_lengthp = descr->page_length; + } +} + +/* The caller must hold a reference to the page and must have already set the new data */ +static inline void pg_cache_atomic_set_pg_info(struct rrdeng_page_descr *descr, usec_t end_time_ut, uint32_t page_length) +{ + fatal_assert(!(end_time_ut & 1)); + __sync_synchronize(); + descr->end_time_ut |= 1; /* mark start of uncertainty period by adding 1 microsecond */ + __sync_synchronize(); + descr->page_length = page_length; + __sync_synchronize(); + descr->end_time_ut = end_time_ut; /* mark end of uncertainty period */ +} + +#endif /* NETDATA_PAGECACHE_H */ diff --git a/database/engine/rrddiskprotocol.h b/database/engine/rrddiskprotocol.h new file mode 100644 index 0000000..5b4be94 --- /dev/null +++ b/database/engine/rrddiskprotocol.h @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDDISKPROTOCOL_H +#define NETDATA_RRDDISKPROTOCOL_H + +#define RRDENG_BLOCK_SIZE (4096) +#define RRDFILE_ALIGNMENT RRDENG_BLOCK_SIZE + +#define RRDENG_MAGIC_SZ (32) +#define RRDENG_DF_MAGIC "netdata-data-file" +#define RRDENG_JF_MAGIC "netdata-journal-file" + +#define RRDENG_VER_SZ (16) +#define RRDENG_DF_VER "1.0" +#define RRDENG_JF_VER "1.0" + +#define UUID_SZ (16) +#define CHECKSUM_SZ (4) /* CRC32 */ + +#define RRD_NO_COMPRESSION (0) +#define RRD_LZ4 (1) + +#define RRDENG_DF_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + RRDENG_VER_SZ + sizeof(uint8_t))) +/* + * Data file persistent super-block + */ +struct rrdeng_df_sb { + char magic_number[RRDENG_MAGIC_SZ]; + char version[RRDENG_VER_SZ]; + uint8_t tier; + uint8_t padding[RRDENG_DF_SB_PADDING_SZ]; +} __attribute__ ((packed)); + +/* + * Page types + */ +#define PAGE_METRICS (0) +#define PAGE_TIER (1) +#define PAGE_TYPE_MAX 1 // Maximum page type (inclusive) + +/* + * Data file page descriptor + */ +struct rrdeng_extent_page_descr { + uint8_t type; + + uint8_t uuid[UUID_SZ]; + uint32_t page_length; + uint64_t start_time_ut; + uint64_t end_time_ut; +} __attribute__ ((packed)); + +/* + * Data file extent header + */ +struct rrdeng_df_extent_header { + uint32_t payload_length; + uint8_t compression_algorithm; + uint8_t number_of_pages; + /* #number_of_pages page descriptors follow */ + struct rrdeng_extent_page_descr descr[]; +} __attribute__ ((packed)); + +/* + * Data file extent trailer + */ +struct rrdeng_df_extent_trailer { + uint8_t checksum[CHECKSUM_SZ]; /* CRC32 */ +} __attribute__ ((packed)); + +#define RRDENG_JF_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + RRDENG_VER_SZ)) +/* + * Journal file super-block + */ +struct rrdeng_jf_sb { + char magic_number[RRDENG_MAGIC_SZ]; + char version[RRDENG_VER_SZ]; + uint8_t padding[RRDENG_JF_SB_PADDING_SZ]; +} __attribute__ ((packed)); + +/* + * Transaction record types + */ +#define STORE_PADDING (0) +#define STORE_DATA (1) +#define STORE_LOGS (2) /* reserved */ + +/* + * Journal file transaction record header + */ +struct rrdeng_jf_transaction_header { + /* when set to STORE_PADDING jump to start of next block */ + uint8_t type; + + uint32_t reserved; /* reserved for future use */ + uint64_t id; + uint16_t payload_length; +} __attribute__ ((packed)); + +/* + * Journal file transaction record trailer + */ +struct rrdeng_jf_transaction_trailer { + uint8_t checksum[CHECKSUM_SZ]; /* CRC32 */ +} __attribute__ ((packed)); + +/* + * Journal file STORE_DATA action + */ +struct rrdeng_jf_store_data { + /* data file extent information */ + uint64_t extent_offset; + uint32_t extent_size; + + uint8_t number_of_pages; + /* #number_of_pages page descriptors follow */ + struct rrdeng_extent_page_descr descr[]; +} __attribute__ ((packed)); + +#endif /* NETDATA_RRDDISKPROTOCOL_H */ \ No newline at end of file diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c new file mode 100644 index 0000000..a6840f3 --- /dev/null +++ b/database/engine/rrdengine.c @@ -0,0 +1,1508 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#define NETDATA_RRD_INTERNALS + +#include "rrdengine.h" + +rrdeng_stats_t global_io_errors = 0; +rrdeng_stats_t global_fs_errors = 0; +rrdeng_stats_t rrdeng_reserved_file_descriptors = 0; +rrdeng_stats_t global_pg_cache_over_half_dirty_events = 0; +rrdeng_stats_t global_flushing_pressure_page_deletions = 0; + +unsigned rrdeng_pages_per_extent = MAX_PAGES_PER_EXTENT; + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_MAX_OPCODE + 2) +#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least (RRDENG_MAX_OPCODE + 2) +#endif + +void *dbengine_page_alloc() { + void *page = NULL; + if (unlikely(db_engine_use_malloc)) + page = mallocz(RRDENG_BLOCK_SIZE); + else { + page = netdata_mmap(NULL, RRDENG_BLOCK_SIZE, MAP_PRIVATE, enable_ksm); + if(!page) fatal("Cannot allocate dbengine page cache page, with mmap()"); + } + return page; +} + +void dbengine_page_free(void *page) { + if (unlikely(db_engine_use_malloc)) + freez(page); + else + netdata_munmap(page, RRDENG_BLOCK_SIZE); +} + +static void sanity_check(void) +{ + BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_MAX_OPCODE + 2)); + + /* Magic numbers must fit in the super-blocks */ + BUILD_BUG_ON(strlen(RRDENG_DF_MAGIC) > RRDENG_MAGIC_SZ); + BUILD_BUG_ON(strlen(RRDENG_JF_MAGIC) > RRDENG_MAGIC_SZ); + + /* Version strings must fit in the super-blocks */ + BUILD_BUG_ON(strlen(RRDENG_DF_VER) > RRDENG_VER_SZ); + BUILD_BUG_ON(strlen(RRDENG_JF_VER) > RRDENG_VER_SZ); + + /* Data file super-block cannot be larger than RRDENG_BLOCK_SIZE */ + BUILD_BUG_ON(RRDENG_DF_SB_PADDING_SZ < 0); + + BUILD_BUG_ON(sizeof(uuid_t) != UUID_SZ); /* check UUID size */ + + /* page count must fit in 8 bits */ + BUILD_BUG_ON(MAX_PAGES_PER_EXTENT > 255); + + /* extent cache count must fit in 32 bits */ + BUILD_BUG_ON(MAX_CACHED_EXTENTS > 32); + + /* page info scratch space must be able to hold 2 32-bit integers */ + BUILD_BUG_ON(sizeof(((struct rrdeng_page_info *)0)->scratch) < 2 * sizeof(uint32_t)); +} + +/* always inserts into tail */ +static inline void xt_cache_replaceQ_insert(struct rrdengine_worker_config* wc, + struct extent_cache_element *xt_cache_elem) +{ + struct extent_cache *xt_cache = &wc->xt_cache; + + xt_cache_elem->prev = NULL; + xt_cache_elem->next = NULL; + + if (likely(NULL != xt_cache->replaceQ_tail)) { + xt_cache_elem->prev = xt_cache->replaceQ_tail; + xt_cache->replaceQ_tail->next = xt_cache_elem; + } + if (unlikely(NULL == xt_cache->replaceQ_head)) { + xt_cache->replaceQ_head = xt_cache_elem; + } + xt_cache->replaceQ_tail = xt_cache_elem; +} + +static inline void xt_cache_replaceQ_delete(struct rrdengine_worker_config* wc, + struct extent_cache_element *xt_cache_elem) +{ + struct extent_cache *xt_cache = &wc->xt_cache; + struct extent_cache_element *prev, *next; + + prev = xt_cache_elem->prev; + next = xt_cache_elem->next; + + if (likely(NULL != prev)) { + prev->next = next; + } + if (likely(NULL != next)) { + next->prev = prev; + } + if (unlikely(xt_cache_elem == xt_cache->replaceQ_head)) { + xt_cache->replaceQ_head = next; + } + if (unlikely(xt_cache_elem == xt_cache->replaceQ_tail)) { + xt_cache->replaceQ_tail = prev; + } + xt_cache_elem->prev = xt_cache_elem->next = NULL; +} + +static inline void xt_cache_replaceQ_set_hot(struct rrdengine_worker_config* wc, + struct extent_cache_element *xt_cache_elem) +{ + xt_cache_replaceQ_delete(wc, xt_cache_elem); + xt_cache_replaceQ_insert(wc, xt_cache_elem); +} + +/* Returns the index of the cached extent if it was successfully inserted in the extent cache, otherwise -1 */ +static int try_insert_into_xt_cache(struct rrdengine_worker_config* wc, struct extent_info *extent) +{ + struct extent_cache *xt_cache = &wc->xt_cache; + struct extent_cache_element *xt_cache_elem; + unsigned idx; + int ret; + + ret = find_first_zero(xt_cache->allocation_bitmap); + if (-1 == ret || ret >= MAX_CACHED_EXTENTS) { + for (xt_cache_elem = xt_cache->replaceQ_head ; NULL != xt_cache_elem ; xt_cache_elem = xt_cache_elem->next) { + idx = xt_cache_elem - xt_cache->extent_array; + if (!check_bit(xt_cache->inflight_bitmap, idx)) { + xt_cache_replaceQ_delete(wc, xt_cache_elem); + break; + } + } + if (NULL == xt_cache_elem) + return -1; + } else { + idx = (unsigned)ret; + xt_cache_elem = &xt_cache->extent_array[idx]; + } + xt_cache_elem->extent = extent; + xt_cache_elem->fileno = extent->datafile->fileno; + xt_cache_elem->inflight_io_descr = NULL; + xt_cache_replaceQ_insert(wc, xt_cache_elem); + modify_bit(&xt_cache->allocation_bitmap, idx, 1); + + return (int)idx; +} + +/** + * Returns 0 if the cached extent was found in the extent cache, 1 otherwise. + * Sets *idx to point to the position of the extent inside the cache. + **/ +static uint8_t lookup_in_xt_cache(struct rrdengine_worker_config* wc, struct extent_info *extent, unsigned *idx) +{ + struct extent_cache *xt_cache = &wc->xt_cache; + struct extent_cache_element *xt_cache_elem; + unsigned i; + + for (i = 0 ; i < MAX_CACHED_EXTENTS ; ++i) { + xt_cache_elem = &xt_cache->extent_array[i]; + if (check_bit(xt_cache->allocation_bitmap, i) && xt_cache_elem->extent == extent && + xt_cache_elem->fileno == extent->datafile->fileno) { + *idx = i; + return 0; + } + } + return 1; +} + +#if 0 /* disabled code */ +static void delete_from_xt_cache(struct rrdengine_worker_config* wc, unsigned idx) +{ + struct extent_cache *xt_cache = &wc->xt_cache; + struct extent_cache_element *xt_cache_elem; + + xt_cache_elem = &xt_cache->extent_array[idx]; + xt_cache_replaceQ_delete(wc, xt_cache_elem); + xt_cache_elem->extent = NULL; + modify_bit(&wc->xt_cache.allocation_bitmap, idx, 0); /* invalidate it */ + modify_bit(&wc->xt_cache.inflight_bitmap, idx, 0); /* not in-flight anymore */ +} +#endif + +void enqueue_inflight_read_to_xt_cache(struct rrdengine_worker_config* wc, unsigned idx, + struct extent_io_descriptor *xt_io_descr) +{ + struct extent_cache *xt_cache = &wc->xt_cache; + struct extent_cache_element *xt_cache_elem; + struct extent_io_descriptor *old_next; + + xt_cache_elem = &xt_cache->extent_array[idx]; + old_next = xt_cache_elem->inflight_io_descr->next; + xt_cache_elem->inflight_io_descr->next = xt_io_descr; + xt_io_descr->next = old_next; +} + +void read_cached_extent_cb(struct rrdengine_worker_config* wc, unsigned idx, struct extent_io_descriptor *xt_io_descr) +{ + unsigned i, j, page_offset; + struct rrdengine_instance *ctx = wc->ctx; + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + void *page; + struct extent_info *extent = xt_io_descr->descr_array[0]->extent; + + for (i = 0 ; i < xt_io_descr->descr_count; ++i) { + page = dbengine_page_alloc(); + descr = xt_io_descr->descr_array[i]; + for (j = 0, page_offset = 0 ; j < extent->number_of_pages ; ++j) { + /* care, we don't hold the descriptor mutex */ + if (!uuid_compare(*extent->pages[j]->id, *descr->id) && + extent->pages[j]->page_length == descr->page_length && + extent->pages[j]->start_time_ut == descr->start_time_ut && + extent->pages[j]->end_time_ut == descr->end_time_ut) { + break; + } + page_offset += extent->pages[j]->page_length; + + } + /* care, we don't hold the descriptor mutex */ + (void) memcpy(page, wc->xt_cache.extent_array[idx].pages + page_offset, descr->page_length); + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + pg_cache_descr->page = page; + pg_cache_descr->flags |= RRD_PAGE_POPULATED; + pg_cache_descr->flags &= ~RRD_PAGE_READ_PENDING; + rrdeng_page_descr_mutex_unlock(ctx, descr); + pg_cache_replaceQ_insert(ctx, descr); + if (xt_io_descr->release_descr) { + pg_cache_put(ctx, descr); + } else { + debug(D_RRDENGINE, "%s: Waking up waiters.", __func__); + pg_cache_wake_up_waiters(ctx, descr); + } + } + if (xt_io_descr->completion) + completion_mark_complete(xt_io_descr->completion); + freez(xt_io_descr); +} + +static void fill_page_with_nulls(void *page, uint32_t page_length, uint8_t type) { + switch(type) { + case PAGE_METRICS: { + storage_number n = pack_storage_number(NAN, SN_FLAG_NONE); + storage_number *array = (storage_number *)page; + size_t slots = page_length / sizeof(n); + for(size_t i = 0; i < slots ; i++) + array[i] = n; + } + break; + + case PAGE_TIER: { + storage_number_tier1_t n = { + .min_value = NAN, + .max_value = NAN, + .sum_value = NAN, + .count = 1, + .anomaly_count = 0, + }; + storage_number_tier1_t *array = (storage_number_tier1_t *)page; + size_t slots = page_length / sizeof(n); + for(size_t i = 0; i < slots ; i++) + array[i] = n; + } + break; + + default: { + static bool logged = false; + if(!logged) { + error("DBENGINE: cannot fill page with nulls on unknown page type id %d", type); + logged = true; + } + memset(page, 0, page_length); + } + } +} + +struct rrdeng_page_descr *get_descriptor(struct pg_cache_page_index *page_index, time_t start_time_s) +{ + uv_rwlock_rdlock(&page_index->lock); + Pvoid_t *PValue = JudyLGet(page_index->JudyL_array, start_time_s, PJE0); + struct rrdeng_page_descr *descr = unlikely(NULL == PValue) ? NULL : *PValue; + uv_rwlock_rdunlock(&page_index->lock); + return descr; +}; + +static void do_extent_processing (struct rrdengine_worker_config *wc, struct extent_io_descriptor *xt_io_descr, bool read_failed) +{ + struct rrdengine_instance *ctx = wc->ctx; + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + int ret; + unsigned i, j, count; + void *page, *uncompressed_buf = NULL; + uint32_t payload_length, payload_offset, page_offset, uncompressed_payload_length = 0; + uint8_t have_read_error = 0; + /* persistent structures */ + struct rrdeng_df_extent_header *header; + struct rrdeng_df_extent_trailer *trailer; + uLong crc; + + header = xt_io_descr->buf; + payload_length = header->payload_length; + count = header->number_of_pages; + payload_offset = sizeof(*header) + sizeof(header->descr[0]) * count; + trailer = xt_io_descr->buf + xt_io_descr->bytes - sizeof(*trailer); + + if (unlikely(read_failed)) { + struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile; + + ++ctx->stats.io_errors; + rrd_stat_atomic_add(&global_io_errors, 1); + have_read_error = 1; + error("%s: uv_fs_read - extent at offset %"PRIu64"(%u) in datafile %u-%u.", __func__, xt_io_descr->pos, + xt_io_descr->bytes, datafile->tier, datafile->fileno); + goto after_crc_check; + } + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, xt_io_descr->buf, xt_io_descr->bytes - sizeof(*trailer)); + ret = crc32cmp(trailer->checksum, crc); +#ifdef NETDATA_INTERNAL_CHECKS + { + struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile; + debug(D_RRDENGINE, "%s: Extent at offset %"PRIu64"(%u) was read from datafile %u-%u. CRC32 check: %s", __func__, + xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno, ret ? "FAILED" : "SUCCEEDED"); + } +#endif + if (unlikely(ret)) { + struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile; + + ++ctx->stats.io_errors; + rrd_stat_atomic_add(&global_io_errors, 1); + have_read_error = 1; + error("%s: Extent at offset %"PRIu64"(%u) was read from datafile %u-%u. CRC32 check: FAILED", __func__, + xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno); + } + +after_crc_check: + if (!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm) { + uncompressed_payload_length = 0; + for (i = 0 ; i < count ; ++i) { + uncompressed_payload_length += header->descr[i].page_length; + } + uncompressed_buf = mallocz(uncompressed_payload_length); + ret = LZ4_decompress_safe(xt_io_descr->buf + payload_offset, uncompressed_buf, + payload_length, uncompressed_payload_length); + ctx->stats.before_decompress_bytes += payload_length; + ctx->stats.after_decompress_bytes += ret; + debug(D_RRDENGINE, "LZ4 decompressed %u bytes to %d bytes.", payload_length, ret); + /* care, we don't hold the descriptor mutex */ + } + { + uint8_t xt_is_cached = 0; + unsigned xt_idx; + struct extent_info *extent = xt_io_descr->descr_array[0]->extent; + + xt_is_cached = !lookup_in_xt_cache(wc, extent, &xt_idx); + if (xt_is_cached && check_bit(wc->xt_cache.inflight_bitmap, xt_idx)) { + struct extent_cache *xt_cache = &wc->xt_cache; + struct extent_cache_element *xt_cache_elem = &xt_cache->extent_array[xt_idx]; + struct extent_io_descriptor *curr, *next; + + if (have_read_error) { + memset(xt_cache_elem->pages, 0, sizeof(xt_cache_elem->pages)); + } else if (RRD_NO_COMPRESSION == header->compression_algorithm) { + (void)memcpy(xt_cache_elem->pages, xt_io_descr->buf + payload_offset, payload_length); + } else { + (void)memcpy(xt_cache_elem->pages, uncompressed_buf, uncompressed_payload_length); + } + /* complete all connected in-flight read requests */ + for (curr = xt_cache_elem->inflight_io_descr->next ; curr ; curr = next) { + next = curr->next; + read_cached_extent_cb(wc, xt_idx, curr); + } + xt_cache_elem->inflight_io_descr = NULL; + modify_bit(&xt_cache->inflight_bitmap, xt_idx, 0); /* not in-flight anymore */ + } + } + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + Pvoid_t *PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, xt_io_descr->descr_array[0]->id, sizeof(uuid_t)); + struct pg_cache_page_index *page_index = likely( NULL != PValue) ? *PValue : NULL; + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + + + for (i = 0, page_offset = 0; i < count; page_offset += header->descr[i++].page_length) { + uint8_t is_prefetched_page; + descr = NULL; + for (j = 0 ; j < xt_io_descr->descr_count; ++j) { + struct rrdeng_page_descr descrj; + + descrj = xt_io_descr->descr_read_array[j]; + /* care, we don't hold the descriptor mutex */ + if (!uuid_compare(*(uuid_t *) header->descr[i].uuid, *descrj.id) && + header->descr[i].page_length == descrj.page_length && + header->descr[i].start_time_ut == descrj.start_time_ut && + header->descr[i].end_time_ut == descrj.end_time_ut) { + //descr = descrj; + descr = get_descriptor(page_index, (time_t) (descrj.start_time_ut / USEC_PER_SEC)); + if (unlikely(!descr)) { + error_limit_static_thread_var(erl, 1, 0); + error_limit(&erl, "%s: Required descriptor is not in the page index anymore", __FUNCTION__); + } + break; + } + } + is_prefetched_page = 0; + if (!descr) { /* This extent page has not been requested. Try populating it for locality (best effort). */ + descr = pg_cache_lookup_unpopulated_and_lock(ctx, (uuid_t *)header->descr[i].uuid, + header->descr[i].start_time_ut); + if (!descr) + continue; /* Failed to reserve a suitable page */ + is_prefetched_page = 1; + } + page = dbengine_page_alloc(); + + /* care, we don't hold the descriptor mutex */ + if (have_read_error) { + fill_page_with_nulls(page, descr->page_length, descr->type); + } else if (RRD_NO_COMPRESSION == header->compression_algorithm) { + (void) memcpy(page, xt_io_descr->buf + payload_offset + page_offset, descr->page_length); + } else { + (void) memcpy(page, uncompressed_buf + page_offset, descr->page_length); + } + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + pg_cache_descr->page = page; + pg_cache_descr->flags |= RRD_PAGE_POPULATED; + pg_cache_descr->flags &= ~RRD_PAGE_READ_PENDING; + rrdeng_page_descr_mutex_unlock(ctx, descr); + pg_cache_replaceQ_insert(ctx, descr); + if (xt_io_descr->release_descr || is_prefetched_page) { + pg_cache_put(ctx, descr); + } else { + debug(D_RRDENGINE, "%s: Waking up waiters.", __func__); + pg_cache_wake_up_waiters(ctx, descr); + } + } + if (!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm) { + freez(uncompressed_buf); + } + if (xt_io_descr->completion) + completion_mark_complete(xt_io_descr->completion); +} + +static void read_extent_cb(uv_fs_t *req) +{ + struct rrdengine_worker_config *wc = req->loop->data; + struct extent_io_descriptor *xt_io_descr; + + xt_io_descr = req->data; + do_extent_processing(wc, xt_io_descr, req->result < 0); + uv_fs_req_cleanup(req); + posix_memfree(xt_io_descr->buf); + freez(xt_io_descr); +} + +static void read_mmap_extent_cb(uv_work_t *req, int status __maybe_unused) +{ + struct rrdengine_worker_config *wc = req->loop->data; + struct rrdengine_instance *ctx = wc->ctx; + struct extent_io_descriptor *xt_io_descr; + xt_io_descr = req->data; + + if (likely(xt_io_descr->map_base)) { + do_extent_processing(wc, xt_io_descr, false); + munmap(xt_io_descr->map_base, xt_io_descr->map_length); + freez(xt_io_descr); + return; + } + + // MMAP failed, so do uv_fs_read + int ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(xt_io_descr->bytes)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + unsigned real_io_size = ALIGN_BYTES_CEILING( xt_io_descr->bytes); + xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size); + xt_io_descr->req.data = xt_io_descr; + ret = uv_fs_read(req->loop, &xt_io_descr->req, xt_io_descr->file, &xt_io_descr->iov, 1, (unsigned) xt_io_descr->pos, read_extent_cb); + fatal_assert(-1 != ret); + ctx->stats.io_read_bytes += real_io_size; + ctx->stats.io_read_extent_bytes += real_io_size; +} + +static void do_mmap_read_extent(uv_work_t *req) +{ + struct extent_io_descriptor *xt_io_descr = (struct extent_io_descriptor * )req->data; + struct rrdengine_worker_config *wc = req->loop->data; + struct rrdengine_instance *ctx = wc->ctx; + + off_t map_start = ALIGN_BYTES_FLOOR(xt_io_descr->pos); + size_t length = ALIGN_BYTES_CEILING(xt_io_descr->pos + xt_io_descr->bytes) - map_start; + unsigned real_io_size = xt_io_descr->bytes; + + void *data = mmap(NULL, length, PROT_READ, MAP_SHARED, xt_io_descr->file, map_start); + if (likely(data != MAP_FAILED)) { + xt_io_descr->map_base = data; + xt_io_descr->map_length = length; + xt_io_descr->buf = data + (xt_io_descr->pos - map_start); + ctx->stats.io_read_bytes += real_io_size; + ctx->stats.io_read_extent_bytes += real_io_size; + } +} + +static void do_read_extent(struct rrdengine_worker_config* wc, + struct rrdeng_page_descr **descr, + unsigned count, + uint8_t release_descr) +{ + struct rrdengine_instance *ctx = wc->ctx; + struct page_cache_descr *pg_cache_descr; + int ret; + unsigned i, size_bytes, pos; + struct extent_io_descriptor *xt_io_descr; + struct rrdengine_datafile *datafile; + struct extent_info *extent = descr[0]->extent; + uint8_t xt_is_cached = 0, xt_is_inflight = 0; + unsigned xt_idx; + + datafile = extent->datafile; + pos = extent->offset; + size_bytes = extent->size; + + xt_io_descr = callocz(1, sizeof(*xt_io_descr)); + for (i = 0 ; i < count; ++i) { + rrdeng_page_descr_mutex_lock(ctx, descr[i]); + pg_cache_descr = descr[i]->pg_cache_descr; + pg_cache_descr->flags |= RRD_PAGE_READ_PENDING; + rrdeng_page_descr_mutex_unlock(ctx, descr[i]); + xt_io_descr->descr_array[i] = descr[i]; + xt_io_descr->descr_read_array[i] = *(descr[i]); + } + xt_io_descr->descr_count = count; + xt_io_descr->file = datafile->file; + xt_io_descr->bytes = size_bytes; + xt_io_descr->pos = pos; + xt_io_descr->req_worker.data = xt_io_descr; + xt_io_descr->completion = NULL; + xt_io_descr->release_descr = release_descr; + xt_io_descr->buf = NULL; + + xt_is_cached = !lookup_in_xt_cache(wc, extent, &xt_idx); + if (xt_is_cached) { + xt_cache_replaceQ_set_hot(wc, &wc->xt_cache.extent_array[xt_idx]); + xt_is_inflight = check_bit(wc->xt_cache.inflight_bitmap, xt_idx); + if (xt_is_inflight) { + enqueue_inflight_read_to_xt_cache(wc, xt_idx, xt_io_descr); + return; + } + return read_cached_extent_cb(wc, xt_idx, xt_io_descr); + } else { + ret = try_insert_into_xt_cache(wc, extent); + if (-1 != ret) { + xt_idx = (unsigned)ret; + modify_bit(&wc->xt_cache.inflight_bitmap, xt_idx, 1); + wc->xt_cache.extent_array[xt_idx].inflight_io_descr = xt_io_descr; + } + } + + ret = uv_queue_work(wc->loop, &xt_io_descr->req_worker, do_mmap_read_extent, read_mmap_extent_cb); + fatal_assert(-1 != ret); + + ++ctx->stats.io_read_requests; + ++ctx->stats.io_read_extents; + ctx->stats.pg_cache_backfills += count; +} + +static void commit_data_extent(struct rrdengine_worker_config* wc, struct extent_io_descriptor *xt_io_descr) +{ + struct rrdengine_instance *ctx = wc->ctx; + unsigned count, payload_length, descr_size, size_bytes; + void *buf; + /* persistent structures */ + struct rrdeng_df_extent_header *df_header; + struct rrdeng_jf_transaction_header *jf_header; + struct rrdeng_jf_store_data *jf_metric_data; + struct rrdeng_jf_transaction_trailer *jf_trailer; + uLong crc; + + df_header = xt_io_descr->buf; + count = df_header->number_of_pages; + descr_size = sizeof(*jf_metric_data->descr) * count; + payload_length = sizeof(*jf_metric_data) + descr_size; + size_bytes = sizeof(*jf_header) + payload_length + sizeof(*jf_trailer); + + buf = wal_get_transaction_buffer(wc, size_bytes); + + jf_header = buf; + jf_header->type = STORE_DATA; + jf_header->reserved = 0; + jf_header->id = ctx->commit_log.transaction_id++; + jf_header->payload_length = payload_length; + + jf_metric_data = buf + sizeof(*jf_header); + jf_metric_data->extent_offset = xt_io_descr->pos; + jf_metric_data->extent_size = xt_io_descr->bytes; + jf_metric_data->number_of_pages = count; + memcpy(jf_metric_data->descr, df_header->descr, descr_size); + + jf_trailer = buf + sizeof(*jf_header) + payload_length; + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, buf, sizeof(*jf_header) + payload_length); + crc32set(jf_trailer->checksum, crc); +} + +static void do_commit_transaction(struct rrdengine_worker_config* wc, uint8_t type, void *data) +{ + switch (type) { + case STORE_DATA: + commit_data_extent(wc, (struct extent_io_descriptor *)data); + break; + default: + fatal_assert(type == STORE_DATA); + break; + } +} + +static void after_invalidate_oldest_committed(struct rrdengine_worker_config* wc) +{ + int error; + + error = uv_thread_join(wc->now_invalidating_dirty_pages); + if (error) { + error("uv_thread_join(): %s", uv_strerror(error)); + } + freez(wc->now_invalidating_dirty_pages); + wc->now_invalidating_dirty_pages = NULL; + wc->cleanup_thread_invalidating_dirty_pages = 0; +} + +static void invalidate_oldest_committed(void *arg) +{ + struct rrdengine_instance *ctx = arg; + struct rrdengine_worker_config *wc = &ctx->worker_config; + struct page_cache *pg_cache = &ctx->pg_cache; + int ret; + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + Pvoid_t *PValue; + Word_t Index; + unsigned nr_committed_pages; + + do { + uv_rwlock_wrlock(&pg_cache->committed_page_index.lock); + for (Index = 0, + PValue = JudyLFirst(pg_cache->committed_page_index.JudyL_array, &Index, PJE0), + descr = unlikely(NULL == PValue) ? NULL : *PValue; + + descr != NULL; + + PValue = JudyLNext(pg_cache->committed_page_index.JudyL_array, &Index, PJE0), + descr = unlikely(NULL == PValue) ? NULL : *PValue) { + fatal_assert(0 != descr->page_length); + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + if (!(pg_cache_descr->flags & RRD_PAGE_WRITE_PENDING) && pg_cache_try_get_unsafe(descr, 1)) { + rrdeng_page_descr_mutex_unlock(ctx, descr); + + ret = JudyLDel(&pg_cache->committed_page_index.JudyL_array, Index, PJE0); + fatal_assert(1 == ret); + break; + } + rrdeng_page_descr_mutex_unlock(ctx, descr); + } + uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock); + + if (!descr) { + info("Failed to invalidate any dirty pages to relieve page cache pressure."); + + goto out; + } + pg_cache_punch_hole(ctx, descr, 1, 1, NULL); + + uv_rwlock_wrlock(&pg_cache->committed_page_index.lock); + nr_committed_pages = --pg_cache->committed_page_index.nr_committed_pages; + uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock); + rrd_stat_atomic_add(&ctx->stats.flushing_pressure_page_deletions, 1); + rrd_stat_atomic_add(&global_flushing_pressure_page_deletions, 1); + + } while (nr_committed_pages >= pg_cache_committed_hard_limit(ctx)); +out: + wc->cleanup_thread_invalidating_dirty_pages = 1; + /* wake up event loop */ + fatal_assert(0 == uv_async_send(&wc->async)); +} + +void rrdeng_invalidate_oldest_committed(struct rrdengine_worker_config* wc) +{ + struct rrdengine_instance *ctx = wc->ctx; + struct page_cache *pg_cache = &ctx->pg_cache; + unsigned nr_committed_pages; + int error; + + if (unlikely(ctx->quiesce != NO_QUIESCE)) /* Shutting down */ + return; + + uv_rwlock_rdlock(&pg_cache->committed_page_index.lock); + nr_committed_pages = pg_cache->committed_page_index.nr_committed_pages; + uv_rwlock_rdunlock(&pg_cache->committed_page_index.lock); + + if (nr_committed_pages >= pg_cache_committed_hard_limit(ctx)) { + /* delete the oldest page in memory */ + if (wc->now_invalidating_dirty_pages) { + /* already deleting a page */ + return; + } + errno = 0; + error("Failed to flush dirty buffers quickly enough in dbengine instance \"%s\". " + "Metric data are being deleted, please reduce disk load or use a faster disk.", ctx->dbfiles_path); + + wc->now_invalidating_dirty_pages = mallocz(sizeof(*wc->now_invalidating_dirty_pages)); + wc->cleanup_thread_invalidating_dirty_pages = 0; + + error = uv_thread_create(wc->now_invalidating_dirty_pages, invalidate_oldest_committed, ctx); + if (error) { + error("uv_thread_create(): %s", uv_strerror(error)); + freez(wc->now_invalidating_dirty_pages); + wc->now_invalidating_dirty_pages = NULL; + } + } +} + +void flush_pages_cb(uv_fs_t* req) +{ + struct rrdengine_worker_config* wc = req->loop->data; + struct rrdengine_instance *ctx = wc->ctx; + struct page_cache *pg_cache = &ctx->pg_cache; + struct extent_io_descriptor *xt_io_descr; + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + unsigned i, count; + + xt_io_descr = req->data; + if (req->result < 0) { + ++ctx->stats.io_errors; + rrd_stat_atomic_add(&global_io_errors, 1); + error("%s: uv_fs_write: %s", __func__, uv_strerror((int)req->result)); + } +#ifdef NETDATA_INTERNAL_CHECKS + { + struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile; + debug(D_RRDENGINE, "%s: Extent at offset %"PRIu64"(%u) was written to datafile %u-%u. Waking up waiters.", + __func__, xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno); + } +#endif + count = xt_io_descr->descr_count; + for (i = 0 ; i < count ; ++i) { + /* care, we don't hold the descriptor mutex */ + descr = xt_io_descr->descr_array[i]; + + pg_cache_replaceQ_insert(ctx, descr); + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + pg_cache_descr->flags &= ~(RRD_PAGE_DIRTY | RRD_PAGE_WRITE_PENDING); + /* wake up waiters, care no reference being held */ + pg_cache_wake_up_waiters_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); + } + if (xt_io_descr->completion) + completion_mark_complete(xt_io_descr->completion); + uv_fs_req_cleanup(req); + posix_memfree(xt_io_descr->buf); + freez(xt_io_descr); + + uv_rwlock_wrlock(&pg_cache->committed_page_index.lock); + pg_cache->committed_page_index.nr_committed_pages -= count; + uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock); + wc->inflight_dirty_pages -= count; +} + +/* + * completion must be NULL or valid. + * Returns 0 when no flushing can take place. + * Returns datafile bytes to be written on successful flushing initiation. + */ +static int do_flush_pages(struct rrdengine_worker_config* wc, int force, struct completion *completion) +{ + struct rrdengine_instance *ctx = wc->ctx; + struct page_cache *pg_cache = &ctx->pg_cache; + int ret; + int compressed_size, max_compressed_size = 0; + unsigned i, count, size_bytes, pos, real_io_size; + uint32_t uncompressed_payload_length, payload_offset; + struct rrdeng_page_descr *descr, *eligible_pages[MAX_PAGES_PER_EXTENT]; + struct page_cache_descr *pg_cache_descr; + struct extent_io_descriptor *xt_io_descr; + void *compressed_buf = NULL; + Word_t descr_commit_idx_array[MAX_PAGES_PER_EXTENT]; + Pvoid_t *PValue; + Word_t Index; + uint8_t compression_algorithm = ctx->global_compress_alg; + struct extent_info *extent; + struct rrdengine_datafile *datafile; + /* persistent structures */ + struct rrdeng_df_extent_header *header; + struct rrdeng_df_extent_trailer *trailer; + uLong crc; + + if (force) { + debug(D_RRDENGINE, "Asynchronous flushing of extent has been forced by page pressure."); + } + uv_rwlock_wrlock(&pg_cache->committed_page_index.lock); + for (Index = 0, count = 0, uncompressed_payload_length = 0, + PValue = JudyLFirst(pg_cache->committed_page_index.JudyL_array, &Index, PJE0), + descr = unlikely(NULL == PValue) ? NULL : *PValue ; + + descr != NULL && count != rrdeng_pages_per_extent; + + PValue = JudyLNext(pg_cache->committed_page_index.JudyL_array, &Index, PJE0), + descr = unlikely(NULL == PValue) ? NULL : *PValue) { + uint8_t page_write_pending; + + fatal_assert(0 != descr->page_length); + page_write_pending = 0; + + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + if (!(pg_cache_descr->flags & RRD_PAGE_WRITE_PENDING)) { + page_write_pending = 1; + /* care, no reference being held */ + pg_cache_descr->flags |= RRD_PAGE_WRITE_PENDING; + uncompressed_payload_length += descr->page_length; + descr_commit_idx_array[count] = Index; + eligible_pages[count++] = descr; + } + rrdeng_page_descr_mutex_unlock(ctx, descr); + + if (page_write_pending) { + ret = JudyLDel(&pg_cache->committed_page_index.JudyL_array, Index, PJE0); + fatal_assert(1 == ret); + } + } + uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock); + + if (!count) { + debug(D_RRDENGINE, "%s: no pages eligible for flushing.", __func__); + if (completion) + completion_mark_complete(completion); + return 0; + } + wc->inflight_dirty_pages += count; + + xt_io_descr = mallocz(sizeof(*xt_io_descr)); + payload_offset = sizeof(*header) + count * sizeof(header->descr[0]); + switch (compression_algorithm) { + case RRD_NO_COMPRESSION: + size_bytes = payload_offset + uncompressed_payload_length + sizeof(*trailer); + break; + default: /* Compress */ + fatal_assert(uncompressed_payload_length < LZ4_MAX_INPUT_SIZE); + max_compressed_size = LZ4_compressBound(uncompressed_payload_length); + compressed_buf = mallocz(max_compressed_size); + size_bytes = payload_offset + MAX(uncompressed_payload_length, (unsigned)max_compressed_size) + sizeof(*trailer); + break; + } + ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + /* freez(xt_io_descr);*/ + } + memset(xt_io_descr->buf, 0, ALIGN_BYTES_CEILING(size_bytes)); + (void) memcpy(xt_io_descr->descr_array, eligible_pages, sizeof(struct rrdeng_page_descr *) * count); + xt_io_descr->descr_count = count; + + pos = 0; + header = xt_io_descr->buf; + header->compression_algorithm = compression_algorithm; + header->number_of_pages = count; + pos += sizeof(*header); + + extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0])); + datafile = ctx->datafiles.last; /* TODO: check for exceeded size quota */ + extent->offset = datafile->pos; + extent->number_of_pages = count; + extent->datafile = datafile; + extent->next = NULL; + + for (i = 0 ; i < count ; ++i) { + /* This is here for performance reasons */ + xt_io_descr->descr_commit_idx_array[i] = descr_commit_idx_array[i]; + + descr = xt_io_descr->descr_array[i]; + header->descr[i].type = descr->type; + uuid_copy(*(uuid_t *)header->descr[i].uuid, *descr->id); + header->descr[i].page_length = descr->page_length; + header->descr[i].start_time_ut = descr->start_time_ut; + header->descr[i].end_time_ut = descr->end_time_ut; + pos += sizeof(header->descr[i]); + } + for (i = 0 ; i < count ; ++i) { + descr = xt_io_descr->descr_array[i]; + /* care, we don't hold the descriptor mutex */ + (void) memcpy(xt_io_descr->buf + pos, descr->pg_cache_descr->page, descr->page_length); + descr->extent = extent; + extent->pages[i] = descr; + + pos += descr->page_length; + } + df_extent_insert(extent); + + switch (compression_algorithm) { + case RRD_NO_COMPRESSION: + header->payload_length = uncompressed_payload_length; + break; + default: /* Compress */ + compressed_size = LZ4_compress_default(xt_io_descr->buf + payload_offset, compressed_buf, + uncompressed_payload_length, max_compressed_size); + ctx->stats.before_compress_bytes += uncompressed_payload_length; + ctx->stats.after_compress_bytes += compressed_size; + debug(D_RRDENGINE, "LZ4 compressed %"PRIu32" bytes to %d bytes.", uncompressed_payload_length, compressed_size); + (void) memcpy(xt_io_descr->buf + payload_offset, compressed_buf, compressed_size); + freez(compressed_buf); + size_bytes = payload_offset + compressed_size + sizeof(*trailer); + header->payload_length = compressed_size; + break; + } + extent->size = size_bytes; + xt_io_descr->bytes = size_bytes; + xt_io_descr->pos = datafile->pos; + xt_io_descr->req.data = xt_io_descr; + xt_io_descr->completion = completion; + + trailer = xt_io_descr->buf + size_bytes - sizeof(*trailer); + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, xt_io_descr->buf, size_bytes - sizeof(*trailer)); + crc32set(trailer->checksum, crc); + + real_io_size = ALIGN_BYTES_CEILING(size_bytes); + xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size); + ret = uv_fs_write(wc->loop, &xt_io_descr->req, datafile->file, &xt_io_descr->iov, 1, datafile->pos, flush_pages_cb); + fatal_assert(-1 != ret); + ctx->stats.io_write_bytes += real_io_size; + ++ctx->stats.io_write_requests; + ctx->stats.io_write_extent_bytes += real_io_size; + ++ctx->stats.io_write_extents; + do_commit_transaction(wc, STORE_DATA, xt_io_descr); + datafile->pos += ALIGN_BYTES_CEILING(size_bytes); + ctx->disk_space += ALIGN_BYTES_CEILING(size_bytes); + rrdeng_test_quota(wc); + + return ALIGN_BYTES_CEILING(size_bytes); +} + +static void after_delete_old_data(struct rrdengine_worker_config* wc) +{ + struct rrdengine_instance *ctx = wc->ctx; + struct rrdengine_datafile *datafile; + struct rrdengine_journalfile *journalfile; + unsigned deleted_bytes, journalfile_bytes, datafile_bytes; + int ret, error; + char path[RRDENG_PATH_MAX]; + + datafile = ctx->datafiles.first; + journalfile = datafile->journalfile; + datafile_bytes = datafile->pos; + journalfile_bytes = journalfile->pos; + deleted_bytes = 0; + + info("Deleting data and journal file pair."); + datafile_list_delete(ctx, datafile); + ret = destroy_journal_file(journalfile, datafile); + if (!ret) { + generate_journalfilepath(datafile, path, sizeof(path)); + info("Deleted journal file \"%s\".", path); + deleted_bytes += journalfile_bytes; + } + ret = destroy_data_file(datafile); + if (!ret) { + generate_datafilepath(datafile, path, sizeof(path)); + info("Deleted data file \"%s\".", path); + deleted_bytes += datafile_bytes; + } + freez(journalfile); + freez(datafile); + + ctx->disk_space -= deleted_bytes; + info("Reclaimed %u bytes of disk space.", deleted_bytes); + + error = uv_thread_join(wc->now_deleting_files); + if (error) { + error("uv_thread_join(): %s", uv_strerror(error)); + } + freez(wc->now_deleting_files); + /* unfreeze command processing */ + wc->now_deleting_files = NULL; + + wc->cleanup_thread_deleting_files = 0; + rrdcontext_db_rotation(); + + /* interrupt event loop */ + uv_stop(wc->loop); +} + +static void delete_old_data(void *arg) +{ + struct rrdengine_instance *ctx = arg; + struct rrdengine_worker_config* wc = &ctx->worker_config; + struct rrdengine_datafile *datafile; + struct extent_info *extent, *next; + struct rrdeng_page_descr *descr; + unsigned count, i; + uint8_t can_delete_metric; + uuid_t metric_id; + + /* Safe to use since it will be deleted after we are done */ + datafile = ctx->datafiles.first; + + for (extent = datafile->extents.first ; extent != NULL ; extent = next) { + count = extent->number_of_pages; + for (i = 0 ; i < count ; ++i) { + descr = extent->pages[i]; + can_delete_metric = pg_cache_punch_hole(ctx, descr, 0, 0, &metric_id); + if (unlikely(can_delete_metric)) { + /* + * If the metric is empty, has no active writers and if the metadata log has been initialized then + * attempt to delete the corresponding netdata dimension. + */ + metaqueue_delete_dimension_uuid(&metric_id); + } + } + next = extent->next; + freez(extent); + } + wc->cleanup_thread_deleting_files = 1; + /* wake up event loop */ + fatal_assert(0 == uv_async_send(&wc->async)); +} + +void rrdeng_test_quota(struct rrdengine_worker_config* wc) +{ + struct rrdengine_instance *ctx = wc->ctx; + struct rrdengine_datafile *datafile; + unsigned current_size, target_size; + uint8_t out_of_space, only_one_datafile; + int ret, error; + + out_of_space = 0; + /* Do not allow the pinned pages to exceed the disk space quota to avoid deadlocks */ + if (unlikely(ctx->disk_space > MAX(ctx->max_disk_space, 2 * ctx->metric_API_max_producers * RRDENG_BLOCK_SIZE))) { + out_of_space = 1; + } + datafile = ctx->datafiles.last; + current_size = datafile->pos; + target_size = ctx->max_disk_space / TARGET_DATAFILES; + target_size = MIN(target_size, MAX_DATAFILE_SIZE); + target_size = MAX(target_size, MIN_DATAFILE_SIZE); + only_one_datafile = (datafile == ctx->datafiles.first) ? 1 : 0; + if (unlikely(current_size >= target_size || (out_of_space && only_one_datafile))) { + /* Finalize data and journal file and create a new pair */ + wal_flush_transaction_buffer(wc); + ret = create_new_datafile_pair(ctx, 1, ctx->last_fileno + 1); + if (likely(!ret)) { + ++ctx->last_fileno; + } + } + if (unlikely(out_of_space && NO_QUIESCE == ctx->quiesce)) { + /* delete old data */ + if (wc->now_deleting_files) { + /* already deleting data */ + return; + } + if (NULL == ctx->datafiles.first->next) { + error("Cannot delete data file \"%s/"DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION"\"" + " to reclaim space, there are no other file pairs left.", + ctx->dbfiles_path, ctx->datafiles.first->tier, ctx->datafiles.first->fileno); + return; + } + info("Deleting data file \"%s/"DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION"\".", + ctx->dbfiles_path, ctx->datafiles.first->tier, ctx->datafiles.first->fileno); + wc->now_deleting_files = mallocz(sizeof(*wc->now_deleting_files)); + wc->cleanup_thread_deleting_files = 0; + + error = uv_thread_create(wc->now_deleting_files, delete_old_data, ctx); + if (error) { + error("uv_thread_create(): %s", uv_strerror(error)); + freez(wc->now_deleting_files); + wc->now_deleting_files = NULL; + } + } +} + +static inline int rrdeng_threads_alive(struct rrdengine_worker_config* wc) +{ + if (wc->now_invalidating_dirty_pages || wc->now_deleting_files) { + return 1; + } + return 0; +} + +static void rrdeng_cleanup_finished_threads(struct rrdengine_worker_config* wc) +{ + struct rrdengine_instance *ctx = wc->ctx; + + if (unlikely(wc->cleanup_thread_invalidating_dirty_pages)) { + after_invalidate_oldest_committed(wc); + } + if (unlikely(wc->cleanup_thread_deleting_files)) { + after_delete_old_data(wc); + } + if (unlikely(SET_QUIESCE == ctx->quiesce && !rrdeng_threads_alive(wc))) { + ctx->quiesce = QUIESCED; + completion_mark_complete(&ctx->rrdengine_completion); + } +} + +/* return 0 on success */ +int init_rrd_files(struct rrdengine_instance *ctx) +{ + int ret = init_data_files(ctx); + + BUFFER *wb = buffer_create(1000); + size_t all_errors = 0; + usec_t now = now_realtime_usec(); + + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter) { + buffer_sprintf(wb, "%s%zu pages had start time > end time (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter) { + buffer_sprintf(wb, "%s%zu pages had start time = end time with more than 1 entries (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter) { + buffer_sprintf(wb, "%s%zu pages had zero points (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter) { + buffer_sprintf(wb, "%s%zu pages had update every == 0 with entries > 1 (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter) { + buffer_sprintf(wb, "%s%zu pages had a different number of points compared to their timestamps (latest: %llu secs ago; these page have been loaded)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter) { + buffer_sprintf(wb, "%s%zu extents have been dropped because they didn't have any valid pages" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter + ); + all_errors += ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter; + } + + if(all_errors) + info("DBENGINE: tier %d: %s", ctx->tier, buffer_tostring(wb)); + + buffer_free(wb); + return ret; +} + +void finalize_rrd_files(struct rrdengine_instance *ctx) +{ + return finalize_data_files(ctx); +} + +void rrdeng_init_cmd_queue(struct rrdengine_worker_config* wc) +{ + wc->cmd_queue.head = wc->cmd_queue.tail = 0; + wc->queue_size = 0; + fatal_assert(0 == uv_cond_init(&wc->cmd_cond)); + fatal_assert(0 == uv_mutex_init(&wc->cmd_mutex)); +} + +void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd) +{ + unsigned queue_size; + + /* wait for free space in queue */ + uv_mutex_lock(&wc->cmd_mutex); + while ((queue_size = wc->queue_size) == RRDENG_CMD_Q_MAX_SIZE) { + uv_cond_wait(&wc->cmd_cond, &wc->cmd_mutex); + } + fatal_assert(queue_size < RRDENG_CMD_Q_MAX_SIZE); + /* enqueue command */ + wc->cmd_queue.cmd_array[wc->cmd_queue.tail] = *cmd; + wc->cmd_queue.tail = wc->cmd_queue.tail != RRDENG_CMD_Q_MAX_SIZE - 1 ? + wc->cmd_queue.tail + 1 : 0; + wc->queue_size = queue_size + 1; + uv_mutex_unlock(&wc->cmd_mutex); + + /* wake up event loop */ + fatal_assert(0 == uv_async_send(&wc->async)); +} + +struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc) +{ + struct rrdeng_cmd ret; + unsigned queue_size; + + uv_mutex_lock(&wc->cmd_mutex); + queue_size = wc->queue_size; + if (queue_size == 0) { + ret.opcode = RRDENG_NOOP; + } else { + /* dequeue command */ + ret = wc->cmd_queue.cmd_array[wc->cmd_queue.head]; + if (queue_size == 1) { + wc->cmd_queue.head = wc->cmd_queue.tail = 0; + } else { + wc->cmd_queue.head = wc->cmd_queue.head != RRDENG_CMD_Q_MAX_SIZE - 1 ? + wc->cmd_queue.head + 1 : 0; + } + wc->queue_size = queue_size - 1; + + /* wake up producers */ + uv_cond_signal(&wc->cmd_cond); + } + uv_mutex_unlock(&wc->cmd_mutex); + + return ret; +} + +static void load_configuration_dynamic(void) +{ + unsigned read_num = (unsigned)config_get_number(CONFIG_SECTION_DB, "dbengine pages per extent", MAX_PAGES_PER_EXTENT); + if (read_num > 0 && read_num <= MAX_PAGES_PER_EXTENT) + rrdeng_pages_per_extent = read_num; + else { + error("Invalid dbengine pages per extent %u given. Using %u.", read_num, rrdeng_pages_per_extent); + config_set_number(CONFIG_SECTION_DB, "dbengine pages per extent", rrdeng_pages_per_extent); + } +} + +void async_cb(uv_async_t *handle) +{ + uv_stop(handle->loop); + uv_update_time(handle->loop); + debug(D_RRDENGINE, "%s called, active=%d.", __func__, uv_is_active((uv_handle_t *)handle)); +} + +/* Flushes dirty pages when timer expires */ +#define TIMER_PERIOD_MS (1000) + +void timer_cb(uv_timer_t* handle) +{ + worker_is_busy(RRDENG_MAX_OPCODE + 1); + + struct rrdengine_worker_config* wc = handle->data; + struct rrdengine_instance *ctx = wc->ctx; + + uv_stop(handle->loop); + uv_update_time(handle->loop); + rrdeng_test_quota(wc); + debug(D_RRDENGINE, "%s: timeout reached.", __func__); + if (likely(!wc->now_deleting_files && !wc->now_invalidating_dirty_pages)) { + /* There is free space so we can write to disk and we are not actively deleting dirty buffers */ + struct page_cache *pg_cache = &ctx->pg_cache; + unsigned long total_bytes, bytes_written, nr_committed_pages, bytes_to_write = 0, producers, low_watermark, + high_watermark; + + uv_rwlock_rdlock(&pg_cache->committed_page_index.lock); + nr_committed_pages = pg_cache->committed_page_index.nr_committed_pages; + uv_rwlock_rdunlock(&pg_cache->committed_page_index.lock); + producers = ctx->metric_API_max_producers; + /* are flushable pages more than 25% of the maximum page cache size */ + high_watermark = (ctx->max_cache_pages * 25LLU) / 100; + low_watermark = (ctx->max_cache_pages * 5LLU) / 100; /* 5%, must be smaller than high_watermark */ + + /* Flush more pages only if disk can keep up */ + if (wc->inflight_dirty_pages < high_watermark + producers) { + if (nr_committed_pages > producers && + /* committed to be written pages are more than the produced number */ + nr_committed_pages - producers > high_watermark) { + /* Flushing speed must increase to stop page cache from filling with dirty pages */ + bytes_to_write = (nr_committed_pages - producers - low_watermark) * RRDENG_BLOCK_SIZE; + } + bytes_to_write = MAX(DATAFILE_IDEAL_IO_SIZE, bytes_to_write); + + debug(D_RRDENGINE, "Flushing pages to disk."); + for (total_bytes = bytes_written = do_flush_pages(wc, 0, NULL); + bytes_written && (total_bytes < bytes_to_write); + total_bytes += bytes_written) { + bytes_written = do_flush_pages(wc, 0, NULL); + } + } + } + load_configuration_dynamic(); +#ifdef NETDATA_INTERNAL_CHECKS + { + char buf[4096]; + debug(D_RRDENGINE, "%s", get_rrdeng_statistics(wc->ctx, buf, sizeof(buf))); + } +#endif + + worker_is_idle(); +} + +#define MAX_CMD_BATCH_SIZE (256) + +void rrdeng_worker(void* arg) +{ + worker_register("DBENGINE"); + worker_register_job_name(RRDENG_NOOP, "noop"); + worker_register_job_name(RRDENG_READ_PAGE, "page read"); + worker_register_job_name(RRDENG_READ_EXTENT, "extent read"); + worker_register_job_name(RRDENG_COMMIT_PAGE, "commit"); + worker_register_job_name(RRDENG_FLUSH_PAGES, "flush"); + worker_register_job_name(RRDENG_SHUTDOWN, "shutdown"); + worker_register_job_name(RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE, "page lru"); + worker_register_job_name(RRDENG_QUIESCE, "quiesce"); + worker_register_job_name(RRDENG_MAX_OPCODE, "cleanup"); + worker_register_job_name(RRDENG_MAX_OPCODE + 1, "timer"); + + struct rrdengine_worker_config* wc = arg; + struct rrdengine_instance *ctx = wc->ctx; + uv_loop_t* loop; + int shutdown, ret; + enum rrdeng_opcode opcode; + uv_timer_t timer_req; + struct rrdeng_cmd cmd; + unsigned cmd_batch_size; + + rrdeng_init_cmd_queue(wc); + + loop = wc->loop = mallocz(sizeof(uv_loop_t)); + ret = uv_loop_init(loop); + if (ret) { + error("uv_loop_init(): %s", uv_strerror(ret)); + goto error_after_loop_init; + } + loop->data = wc; + + ret = uv_async_init(wc->loop, &wc->async, async_cb); + if (ret) { + error("uv_async_init(): %s", uv_strerror(ret)); + goto error_after_async_init; + } + wc->async.data = wc; + + wc->now_deleting_files = NULL; + wc->cleanup_thread_deleting_files = 0; + + wc->now_invalidating_dirty_pages = NULL; + wc->cleanup_thread_invalidating_dirty_pages = 0; + wc->inflight_dirty_pages = 0; + + /* dirty page flushing timer */ + ret = uv_timer_init(loop, &timer_req); + if (ret) { + error("uv_timer_init(): %s", uv_strerror(ret)); + goto error_after_timer_init; + } + timer_req.data = wc; + + wc->error = 0; + /* wake up initialization thread */ + completion_mark_complete(&ctx->rrdengine_completion); + + fatal_assert(0 == uv_timer_start(&timer_req, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS)); + shutdown = 0; + int set_name = 0; + while (likely(shutdown == 0 || rrdeng_threads_alive(wc))) { + worker_is_idle(); + uv_run(loop, UV_RUN_DEFAULT); + worker_is_busy(RRDENG_MAX_OPCODE); + rrdeng_cleanup_finished_threads(wc); + + /* wait for commands */ + cmd_batch_size = 0; + do { + /* + * Avoid starving the loop when there are too many commands coming in. + * timer_cb will interrupt the loop again to allow serving more commands. + */ + if (unlikely(cmd_batch_size >= MAX_CMD_BATCH_SIZE)) + break; + + cmd = rrdeng_deq_cmd(wc); + opcode = cmd.opcode; + ++cmd_batch_size; + + if(likely(opcode != RRDENG_NOOP)) + worker_is_busy(opcode); + + switch (opcode) { + case RRDENG_NOOP: + /* the command queue was empty, do nothing */ + break; + case RRDENG_SHUTDOWN: + shutdown = 1; + break; + case RRDENG_QUIESCE: + ctx->drop_metrics_under_page_cache_pressure = 0; + ctx->quiesce = SET_QUIESCE; + fatal_assert(0 == uv_timer_stop(&timer_req)); + uv_close((uv_handle_t *)&timer_req, NULL); + while (do_flush_pages(wc, 1, NULL)) { + ; /* Force flushing of all committed pages. */ + } + wal_flush_transaction_buffer(wc); + if (!rrdeng_threads_alive(wc)) { + ctx->quiesce = QUIESCED; + completion_mark_complete(&ctx->rrdengine_completion); + } + break; + case RRDENG_READ_PAGE: + do_read_extent(wc, &cmd.read_page.page_cache_descr, 1, 0); + break; + case RRDENG_READ_EXTENT: + do_read_extent(wc, cmd.read_extent.page_cache_descr, cmd.read_extent.page_count, 1); + if (unlikely(!set_name)) { + set_name = 1; + uv_thread_set_name_np(ctx->worker_config.thread, "DBENGINE"); + } + break; + case RRDENG_COMMIT_PAGE: + do_commit_transaction(wc, STORE_DATA, NULL); + break; + case RRDENG_FLUSH_PAGES: { + if (wc->now_invalidating_dirty_pages) { + /* Do not flush if the disk cannot keep up */ + completion_mark_complete(cmd.completion); + } else { + (void)do_flush_pages(wc, 1, cmd.completion); + } + break; + case RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE: + rrdeng_invalidate_oldest_committed(wc); + break; + } + default: + debug(D_RRDENGINE, "%s: default.", __func__); + break; + } + } while (opcode != RRDENG_NOOP); + } + + /* cleanup operations of the event loop */ + info("Shutting down RRD engine event loop for tier %d", ctx->tier); + + /* + * uv_async_send after uv_close does not seem to crash in linux at the moment, + * it is however undocumented behaviour and we need to be aware if this becomes + * an issue in the future. + */ + uv_close((uv_handle_t *)&wc->async, NULL); + + while (do_flush_pages(wc, 1, NULL)) { + ; /* Force flushing of all committed pages. */ + } + wal_flush_transaction_buffer(wc); + uv_run(loop, UV_RUN_DEFAULT); + + info("Shutting down RRD engine event loop for tier %d complete", ctx->tier); + /* TODO: don't let the API block by waiting to enqueue commands */ + uv_cond_destroy(&wc->cmd_cond); +/* uv_mutex_destroy(&wc->cmd_mutex); */ + fatal_assert(0 == uv_loop_close(loop)); + freez(loop); + + worker_unregister(); + return; + +error_after_timer_init: + uv_close((uv_handle_t *)&wc->async, NULL); +error_after_async_init: + fatal_assert(0 == uv_loop_close(loop)); +error_after_loop_init: + freez(loop); + + wc->error = UV_EAGAIN; + /* wake up initialization thread */ + completion_mark_complete(&ctx->rrdengine_completion); + worker_unregister(); +} + +/* C entry point for development purposes + * make "LDFLAGS=-errdengine_main" + */ +void rrdengine_main(void) +{ + int ret; + struct rrdengine_instance *ctx; + + sanity_check(); + ret = rrdeng_init(NULL, &ctx, "/tmp", RRDENG_MIN_PAGE_CACHE_SIZE_MB, RRDENG_MIN_DISK_SPACE_MB, 0); + if (ret) { + exit(ret); + } + rrdeng_exit(ctx); + fprintf(stderr, "Hello world!"); + exit(0); +} diff --git a/database/engine/rrdengine.h b/database/engine/rrdengine.h new file mode 100644 index 0000000..521d252 --- /dev/null +++ b/database/engine/rrdengine.h @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDENGINE_H +#define NETDATA_RRDENGINE_H + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include "daemon/common.h" +#include "../rrd.h" +#include "rrddiskprotocol.h" +#include "rrdenginelib.h" +#include "datafile.h" +#include "journalfile.h" +#include "rrdengineapi.h" +#include "pagecache.h" +#include "rrdenglocking.h" + +#ifdef NETDATA_RRD_INTERNALS + +#endif /* NETDATA_RRD_INTERNALS */ + +extern unsigned rrdeng_pages_per_extent; + +/* Forward declarations */ +struct rrdengine_instance; + +#define MAX_PAGES_PER_EXTENT (64) /* TODO: can go higher only when journal supports bigger than 4KiB transactions */ + +#define RRDENG_FILE_NUMBER_SCAN_TMPL "%1u-%10u" +#define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u" + +struct rrdeng_collect_handle { + struct pg_cache_page_index *page_index; + struct rrdeng_page_descr *descr; + unsigned long page_correlation_id; + // set to 1 when this dimension is not page aligned with the other dimensions in the chart + uint8_t unaligned_page; + struct pg_alignment *alignment; +}; + +struct rrdeng_query_handle { + struct rrdeng_page_descr *descr; + struct rrdengine_instance *ctx; + struct pg_cache_page_index *page_index; + time_t wanted_start_time_s; + time_t now_s; + unsigned position; + unsigned entries; + storage_number *page; + usec_t page_end_time_ut; + uint32_t page_length; + time_t dt_s; +}; + +typedef enum { + RRDENGINE_STATUS_UNINITIALIZED = 0, + RRDENGINE_STATUS_INITIALIZING, + RRDENGINE_STATUS_INITIALIZED +} rrdengine_state_t; + +enum rrdeng_opcode { + /* can be used to return empty status or flush the command queue */ + RRDENG_NOOP = 0, + + RRDENG_READ_PAGE, + RRDENG_READ_EXTENT, + RRDENG_COMMIT_PAGE, + RRDENG_FLUSH_PAGES, + RRDENG_SHUTDOWN, + RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE, + RRDENG_QUIESCE, + + RRDENG_MAX_OPCODE +}; + +struct rrdeng_read_page { + struct rrdeng_page_descr *page_cache_descr; +}; + +struct rrdeng_read_extent { + struct rrdeng_page_descr *page_cache_descr[MAX_PAGES_PER_EXTENT]; + int page_count; +}; + +struct rrdeng_cmd { + enum rrdeng_opcode opcode; + union { + struct rrdeng_read_page read_page; + struct rrdeng_read_extent read_extent; + struct completion *completion; + }; +}; + +#define RRDENG_CMD_Q_MAX_SIZE (2048) + +struct rrdeng_cmdqueue { + unsigned head, tail; + struct rrdeng_cmd cmd_array[RRDENG_CMD_Q_MAX_SIZE]; +}; + +struct extent_io_descriptor { + uv_fs_t req; + uv_work_t req_worker; + uv_buf_t iov; + uv_file file; + void *buf; + void *map_base; + size_t map_length; + uint64_t pos; + unsigned bytes; + struct completion *completion; + unsigned descr_count; + int release_descr; + struct rrdeng_page_descr *descr_array[MAX_PAGES_PER_EXTENT]; + struct rrdeng_page_descr descr_read_array[MAX_PAGES_PER_EXTENT]; + Word_t descr_commit_idx_array[MAX_PAGES_PER_EXTENT]; + struct extent_io_descriptor *next; /* multiple requests to be served by the same cached extent */ +}; + +struct generic_io_descriptor { + uv_fs_t req; + uv_buf_t iov; + void *buf; + uint64_t pos; + unsigned bytes; + struct completion *completion; +}; + +struct extent_cache_element { + struct extent_info *extent; /* The ABA problem is avoided with the help of fileno below */ + unsigned fileno; + struct extent_cache_element *prev; /* LRU */ + struct extent_cache_element *next; /* LRU */ + struct extent_io_descriptor *inflight_io_descr; /* I/O descriptor for in-flight extent */ + uint8_t pages[MAX_PAGES_PER_EXTENT * RRDENG_BLOCK_SIZE]; +}; + +#define MAX_CACHED_EXTENTS 16 /* cannot be over 32 to fit in 32-bit architectures */ + +/* Initialize by setting the structure to zero */ +struct extent_cache { + struct extent_cache_element extent_array[MAX_CACHED_EXTENTS]; + unsigned allocation_bitmap; /* 1 if the corresponding position in the extent_array is allocated */ + unsigned inflight_bitmap; /* 1 if the corresponding position in the extent_array is waiting for I/O */ + + struct extent_cache_element *replaceQ_head; /* LRU */ + struct extent_cache_element *replaceQ_tail; /* MRU */ +}; + +struct rrdengine_worker_config { + struct rrdengine_instance *ctx; + + uv_thread_t thread; + uv_loop_t* loop; + uv_async_t async; + + /* file deletion thread */ + uv_thread_t *now_deleting_files; + unsigned long cleanup_thread_deleting_files; /* set to 0 when now_deleting_files is still running */ + + /* dirty page deletion thread */ + uv_thread_t *now_invalidating_dirty_pages; + /* set to 0 when now_invalidating_dirty_pages is still running */ + unsigned long cleanup_thread_invalidating_dirty_pages; + unsigned inflight_dirty_pages; + + /* FIFO command queue */ + uv_mutex_t cmd_mutex; + uv_cond_t cmd_cond; + volatile unsigned queue_size; + struct rrdeng_cmdqueue cmd_queue; + + struct extent_cache xt_cache; + + int error; +}; + +/* + * Debug statistics not used by code logic. + * They only describe operations since DB engine instance load time. + */ +struct rrdengine_statistics { + rrdeng_stats_t metric_API_producers; + rrdeng_stats_t metric_API_consumers; + rrdeng_stats_t pg_cache_insertions; + rrdeng_stats_t pg_cache_deletions; + rrdeng_stats_t pg_cache_hits; + rrdeng_stats_t pg_cache_misses; + rrdeng_stats_t pg_cache_backfills; + rrdeng_stats_t pg_cache_evictions; + rrdeng_stats_t before_decompress_bytes; + rrdeng_stats_t after_decompress_bytes; + rrdeng_stats_t before_compress_bytes; + rrdeng_stats_t after_compress_bytes; + rrdeng_stats_t io_write_bytes; + rrdeng_stats_t io_write_requests; + rrdeng_stats_t io_read_bytes; + rrdeng_stats_t io_read_requests; + rrdeng_stats_t io_write_extent_bytes; + rrdeng_stats_t io_write_extents; + rrdeng_stats_t io_read_extent_bytes; + rrdeng_stats_t io_read_extents; + rrdeng_stats_t datafile_creations; + rrdeng_stats_t datafile_deletions; + rrdeng_stats_t journalfile_creations; + rrdeng_stats_t journalfile_deletions; + rrdeng_stats_t page_cache_descriptors; + rrdeng_stats_t io_errors; + rrdeng_stats_t fs_errors; + rrdeng_stats_t pg_cache_over_half_dirty_events; + rrdeng_stats_t flushing_pressure_page_deletions; +}; + +/* I/O errors global counter */ +extern rrdeng_stats_t global_io_errors; +/* File-System errors global counter */ +extern rrdeng_stats_t global_fs_errors; +/* number of File-Descriptors that have been reserved by dbengine */ +extern rrdeng_stats_t rrdeng_reserved_file_descriptors; +/* inability to flush global counters */ +extern rrdeng_stats_t global_pg_cache_over_half_dirty_events; +extern rrdeng_stats_t global_flushing_pressure_page_deletions; /* number of deleted pages */ + +#define NO_QUIESCE (0) /* initial state when all operations function normally */ +#define SET_QUIESCE (1) /* set it before shutting down the instance, quiesce long running operations */ +#define QUIESCED (2) /* is set after all threads have finished running */ + +typedef enum { + LOAD_ERRORS_PAGE_FLIPPED_TIME = 0, + LOAD_ERRORS_PAGE_EQUAL_TIME = 1, + LOAD_ERRORS_PAGE_ZERO_ENTRIES = 2, + LOAD_ERRORS_PAGE_UPDATE_ZERO = 3, + LOAD_ERRORS_PAGE_FLEXY_TIME = 4, + LOAD_ERRORS_DROPPED_EXTENT = 5, +} INVALID_PAGE_ID; + +struct rrdengine_instance { + struct rrdengine_worker_config worker_config; + struct completion rrdengine_completion; + struct page_cache pg_cache; + uint8_t drop_metrics_under_page_cache_pressure; /* boolean */ + uint8_t global_compress_alg; + struct transaction_commit_log commit_log; + struct rrdengine_datafile_list datafiles; + RRDHOST *host; /* the legacy host, or NULL for multi-host DB */ + char dbfiles_path[FILENAME_MAX + 1]; + char machine_guid[GUID_LEN + 1]; /* the unique ID of the corresponding host, or localhost for multihost DB */ + uint64_t disk_space; + uint64_t max_disk_space; + int tier; + unsigned last_fileno; /* newest index of datafile and journalfile */ + unsigned long max_cache_pages; + unsigned long cache_pages_low_watermark; + unsigned long metric_API_max_producers; + + uint8_t quiesce; /* set to SET_QUIESCE before shutdown of the engine */ + uint8_t page_type; /* Default page type for this context */ + + struct rrdengine_statistics stats; + + struct { + size_t counter; + usec_t latest_end_time_ut; + } load_errors[6]; +}; + +void *dbengine_page_alloc(void); +void dbengine_page_free(void *page); + +int init_rrd_files(struct rrdengine_instance *ctx); +void finalize_rrd_files(struct rrdengine_instance *ctx); +void rrdeng_test_quota(struct rrdengine_worker_config* wc); +void rrdeng_worker(void* arg); +void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd); +struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc); + +#endif /* NETDATA_RRDENGINE_H */ diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c new file mode 100755 index 0000000..4525b04 --- /dev/null +++ b/database/engine/rrdengineapi.c @@ -0,0 +1,1272 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#include "rrdengine.h" +#include "../storage_engine.h" + +/* Default global database instance */ +struct rrdengine_instance multidb_ctx_storage_tier0; +struct rrdengine_instance multidb_ctx_storage_tier1; +struct rrdengine_instance multidb_ctx_storage_tier2; +struct rrdengine_instance multidb_ctx_storage_tier3; +struct rrdengine_instance multidb_ctx_storage_tier4; +#if RRD_STORAGE_TIERS != 5 +#error RRD_STORAGE_TIERS is not 5 - you need to add allocations here +#endif +struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS]; +uint8_t tier_page_type[RRD_STORAGE_TIERS] = {PAGE_METRICS, PAGE_TIER, PAGE_TIER, PAGE_TIER, PAGE_TIER}; + +#if PAGE_TYPE_MAX != 1 +#error PAGE_TYPE_MAX is not 1 - you need to add allocations here +#endif +size_t page_type_size[256] = {sizeof(storage_number), sizeof(storage_number_tier1_t)}; + +__attribute__((constructor)) void initialize_multidb_ctx(void) { + multidb_ctx[0] = &multidb_ctx_storage_tier0; + multidb_ctx[1] = &multidb_ctx_storage_tier1; + multidb_ctx[2] = &multidb_ctx_storage_tier2; + multidb_ctx[3] = &multidb_ctx_storage_tier3; + multidb_ctx[4] = &multidb_ctx_storage_tier4; +} + +int db_engine_use_malloc = 0; +int default_rrdeng_page_fetch_timeout = 3; +int default_rrdeng_page_fetch_retries = 3; +int default_rrdeng_page_cache_mb = 32; +int default_rrdeng_disk_quota_mb = 256; +int default_multidb_disk_quota_mb = 256; +/* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */ +uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1; + +// ---------------------------------------------------------------------------- +// metrics groups + +static inline void rrdeng_page_alignment_acquire(struct pg_alignment *pa) { + if(unlikely(!pa)) return; + __atomic_add_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST); +} + +static inline bool rrdeng_page_alignment_release(struct pg_alignment *pa) { + if(unlikely(!pa)) return true; + + if(__atomic_sub_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST) == 0) { + freez(pa); + return true; + } + + return false; +} + +// charts call this +STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) { + struct pg_alignment *pa = callocz(1, sizeof(struct pg_alignment)); + rrdeng_page_alignment_acquire(pa); + return (STORAGE_METRICS_GROUP *)pa; +} + +// charts call this +void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg) { + if(unlikely(!smg)) return; + + struct pg_alignment *pa = (struct pg_alignment *)smg; + rrdeng_page_alignment_release(pa); +} + +// ---------------------------------------------------------------------------- +// metric handle for legacy dbs + +/* This UUID is not unique across hosts */ +void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid) +{ + EVP_MD_CTX *evpctx; + unsigned char hash_value[EVP_MAX_MD_SIZE]; + unsigned int hash_len; + + evpctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL); + EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id)); + EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id)); + EVP_DigestFinal_ex(evpctx, hash_value, &hash_len); + EVP_MD_CTX_destroy(evpctx); + fatal_assert(hash_len > sizeof(uuid_t)); + memcpy(ret_uuid, hash_value, sizeof(uuid_t)); +} + +/* Transform legacy UUID to be unique across hosts deterministically */ +void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, uuid_t *ret_uuid) +{ + EVP_MD_CTX *evpctx; + unsigned char hash_value[EVP_MAX_MD_SIZE]; + unsigned int hash_len; + + evpctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL); + EVP_DigestUpdate(evpctx, machine_guid, GUID_LEN); + EVP_DigestUpdate(evpctx, *legacy_uuid, sizeof(uuid_t)); + EVP_DigestFinal_ex(evpctx, hash_value, &hash_len); + EVP_MD_CTX_destroy(evpctx); + fatal_assert(hash_len > sizeof(uuid_t)); + memcpy(ret_uuid, hash_value, sizeof(uuid_t)); +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id) { + uuid_t legacy_uuid; + rrdeng_generate_legacy_uuid(rd_id, st_id, &legacy_uuid); + return rrdeng_metric_get(db_instance, &legacy_uuid); +} + +// ---------------------------------------------------------------------------- +// metric handle + +void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + + __atomic_sub_fetch(&page_index->refcount, 1, __ATOMIC_SEQ_CST); +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + __atomic_add_fetch(&page_index->refcount, 1, __ATOMIC_SEQ_CST); + return db_metric_handle; +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + struct page_cache *pg_cache = &ctx->pg_cache; + struct pg_cache_page_index *page_index = NULL; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + Pvoid_t *PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, uuid, sizeof(uuid_t)); + if (likely(NULL != PValue)) + page_index = *PValue; + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + + if (likely(page_index)) + __atomic_add_fetch(&page_index->refcount, 1, __ATOMIC_SEQ_CST); + + return (STORAGE_METRIC_HANDLE *)page_index; +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid) { + internal_fatal(!db_instance, "DBENGINE: db_instance is NULL"); + + struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + struct pg_cache_page_index *page_index; + struct page_cache *pg_cache = &ctx->pg_cache; + + uv_rwlock_wrlock(&pg_cache->metrics_index.lock); + Pvoid_t *PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, uuid, sizeof(uuid_t), PJE0); + fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */ + *PValue = page_index = create_page_index(uuid, ctx); + page_index->prev = pg_cache->metrics_index.last_page_index; + pg_cache->metrics_index.last_page_index = page_index; + page_index->refcount = 1; + uv_rwlock_wrunlock(&pg_cache->metrics_index.lock); + + return (STORAGE_METRIC_HANDLE *)page_index; +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance) { + STORAGE_METRIC_HANDLE *db_metric_handle; + + db_metric_handle = rrdeng_metric_get(db_instance, &rd->metric_uuid); + if(!db_metric_handle) { + db_metric_handle = rrdeng_metric_get_legacy(db_instance, rrddim_id(rd), rrdset_id(rd->rrdset)); + if(db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + uuid_copy(rd->metric_uuid, page_index->id); + } + } + if(!db_metric_handle) + db_metric_handle = rrdeng_metric_create(db_instance, &rd->metric_uuid); + +#ifdef NETDATA_INTERNAL_CHECKS + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + if(uuid_compare(rd->metric_uuid, page_index->id) != 0) { + char uuid1[UUID_STR_LEN + 1]; + char uuid2[UUID_STR_LEN + 1]; + + uuid_unparse(rd->metric_uuid, uuid1); + uuid_unparse(page_index->id, uuid2); + fatal("DBENGINE: uuids do not match, asked for metric '%s', but got page_index of metric '%s'", uuid1, uuid2); + } + + struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + if(page_index->ctx != ctx) + fatal("DBENGINE: mixed up rrdengine instances, asked for metric from %p, got from %p", ctx, page_index->ctx); +#endif + + return db_metric_handle; +} + + +// ---------------------------------------------------------------------------- +// collect ops + +/* + * Gets a handle for storing metrics to the database. + * The handle must be released with rrdeng_store_metric_final(). + */ +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + struct rrdeng_collect_handle *handle; + + handle = callocz(1, sizeof(struct rrdeng_collect_handle)); + handle->page_index = page_index; + handle->descr = NULL; + handle->unaligned_page = 0; + page_index->latest_update_every_s = update_every; + + handle->alignment = (struct pg_alignment *)smg; + rrdeng_page_alignment_acquire(handle->alignment); + + uv_rwlock_wrlock(&page_index->lock); + ++page_index->writers; + uv_rwlock_wrunlock(&page_index->lock); + + return (STORAGE_COLLECT_HANDLE *)handle; +} + +/* The page must be populated and referenced */ +static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr) +{ + switch(descr->type) { + case PAGE_METRICS: { + size_t slots = descr->page_length / PAGE_POINT_SIZE_BYTES(descr); + storage_number *array = (storage_number *)descr->pg_cache_descr->page; + for (size_t i = 0 ; i < slots; ++i) { + if(does_storage_number_exist(array[i])) + return 0; + } + } + break; + + case PAGE_TIER: { + size_t slots = descr->page_length / PAGE_POINT_SIZE_BYTES(descr); + storage_number_tier1_t *array = (storage_number_tier1_t *)descr->pg_cache_descr->page; + for (size_t i = 0 ; i < slots; ++i) { + if(fpclassify(array[i].sum_value) != FP_NAN) + return 0; + } + } + break; + + default: { + static bool logged = false; + if(!logged) { + error("DBENGINE: cannot check page for nulls on unknown page type id %d", descr->type); + logged = true; + } + return 0; + } + } + + return 1; +} + +void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + // struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle; + struct rrdengine_instance *ctx = handle->page_index->ctx; + struct rrdeng_page_descr *descr = handle->descr; + + if (unlikely(!ctx)) return; + if (unlikely(!descr)) return; + + if (likely(descr->page_length)) { + int page_is_empty; + + rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1); + + page_is_empty = page_has_only_empty_metrics(descr); + if (page_is_empty) { + print_page_cache_descr(descr, "Page has empty metrics only, deleting", true); + pg_cache_put(ctx, descr); + pg_cache_punch_hole(ctx, descr, 1, 0, NULL); + } else + rrdeng_commit_page(ctx, descr, handle->page_correlation_id); + } else { + dbengine_page_free(descr->pg_cache_descr->page); + rrdeng_destroy_pg_cache_descr(ctx, descr->pg_cache_descr); + rrdeng_page_descr_freez(descr); + } + handle->descr = NULL; +} + +static void rrdeng_store_metric_next_internal(STORAGE_COLLECT_HANDLE *collection_handle, + usec_t point_in_time_ut, + NETDATA_DOUBLE n, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags) +{ + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct pg_cache_page_index *page_index = handle->page_index; + struct rrdengine_instance *ctx = handle->page_index->ctx; + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = handle->descr; + + void *page; + uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0; + + if (descr) { + /* Make alignment decisions */ + +#ifdef NETDATA_INTERNAL_CHECKS + if(descr->end_time_ut + page_index->latest_update_every_s * USEC_PER_SEC != point_in_time_ut) { + char buffer[200 + 1]; + snprintfz(buffer, 200, + "metrics collected are %s, end_time_ut = %llu, point_in_time_ut = %llu, update_every = %u, delta = %llu", + (point_in_time_ut / USEC_PER_SEC - descr->end_time_ut / USEC_PER_SEC > page_index->latest_update_every_s)?"far apart":"not aligned", + descr->end_time_ut / USEC_PER_SEC, + point_in_time_ut / USEC_PER_SEC, + page_index->latest_update_every_s, + point_in_time_ut / USEC_PER_SEC - descr->end_time_ut / USEC_PER_SEC); + print_page_cache_descr(descr, buffer, false); + } +#endif + + if (descr->page_length == handle->alignment->page_length) { + /* this is the leading dimension that defines chart alignment */ + perfect_page_alignment = 1; + } + /* is the metric far enough out of alignment with the others? */ + if (unlikely(descr->page_length + PAGE_POINT_SIZE_BYTES(descr) < handle->alignment->page_length)) { + handle->unaligned_page = 1; + print_page_cache_descr(descr, "Metric page is not aligned with chart", true); + } + if (unlikely(handle->unaligned_page && + /* did the other metrics change page? */ + handle->alignment->page_length <= PAGE_POINT_SIZE_BYTES(descr))) { + print_page_cache_descr(descr, "must_flush_unaligned_page = 1", true); + must_flush_unaligned_page = 1; + handle->unaligned_page = 0; + } + } + if (unlikely(NULL == descr || + descr->page_length + PAGE_POINT_SIZE_BYTES(descr) > RRDENG_BLOCK_SIZE || + must_flush_unaligned_page)) { + + if(descr) { + print_page_cache_descr(descr, "flushing metric", true); + rrdeng_store_metric_flush_current_page(collection_handle); + } + + page = rrdeng_create_page(ctx, &page_index->id, &descr); + fatal_assert(page); + + descr->update_every_s = page_index->latest_update_every_s; + handle->descr = descr; + + handle->page_correlation_id = rrd_atomic_fetch_add(&pg_cache->committed_page_index.latest_corr_id, 1); + + if (0 == handle->alignment->page_length) { + /* this is the leading dimension that defines chart alignment */ + perfect_page_alignment = 1; + } + } + + page = descr->pg_cache_descr->page; + + switch (descr->type) { + case PAGE_METRICS: { + ((storage_number *)page)[descr->page_length / PAGE_POINT_SIZE_BYTES(descr)] = pack_storage_number(n, flags); + } + break; + + case PAGE_TIER: { + storage_number_tier1_t number_tier1; + number_tier1.sum_value = (float)n; + number_tier1.min_value = (float)min_value; + number_tier1.max_value = (float)max_value; + number_tier1.anomaly_count = anomaly_count; + number_tier1.count = count; + ((storage_number_tier1_t *)page)[descr->page_length / PAGE_POINT_SIZE_BYTES(descr)] = number_tier1; + } + break; + + default: { + static bool logged = false; + if(!logged) { + error("DBENGINE: cannot store metric on unknown page type id %d", descr->type); + logged = true; + } + } + break; + } + + pg_cache_atomic_set_pg_info(descr, point_in_time_ut, descr->page_length + PAGE_POINT_SIZE_BYTES(descr)); + + if (perfect_page_alignment) + handle->alignment->page_length = descr->page_length; + if (unlikely(INVALID_TIME == descr->start_time_ut)) { + unsigned long new_metric_API_producers, old_metric_API_max_producers, ret_metric_API_max_producers; + descr->start_time_ut = point_in_time_ut; + + new_metric_API_producers = rrd_atomic_add_fetch(&ctx->stats.metric_API_producers, 1); + while (unlikely(new_metric_API_producers > (old_metric_API_max_producers = ctx->metric_API_max_producers))) { + /* Increase ctx->metric_API_max_producers */ + ret_metric_API_max_producers = ulong_compare_and_swap(&ctx->metric_API_max_producers, + old_metric_API_max_producers, + new_metric_API_producers); + if (old_metric_API_max_producers == ret_metric_API_max_producers) { + /* success */ + break; + } + } + + pg_cache_insert(ctx, page_index, descr); + } else { + pg_cache_add_new_metric_time(page_index, descr); + } + +// { +// unsigned char u[16] = { 0x0C, 0x0A, 0x40, 0xD6, 0x2A, 0x43, 0x4A, 0x7C, 0x95, 0xF7, 0xD1, 0x1E, 0x0C, 0x9E, 0x8A, 0xE7 }; +// if(uuid_compare(u, page_index->id) == 0) { +// char buffer[100]; +// snprintfz(buffer, 100, "store system.cpu, collect:%u, page_index first:%u, last:%u", +// (uint32_t)(point_in_time / USEC_PER_SEC), +// (uint32_t)(page_index->oldest_time / USEC_PER_SEC), +// (uint32_t)(page_index->latest_time / USEC_PER_SEC)); +// +// print_page_cache_descr(descr, buffer, false); +// } +// } +} + +void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, + usec_t point_in_time_ut, + NETDATA_DOUBLE n, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags) +{ + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct pg_cache_page_index *page_index = handle->page_index; + struct rrdeng_page_descr *descr = handle->descr; + + if(likely(descr)) { + usec_t last_point_in_time_ut = descr->end_time_ut; + usec_t update_every_ut = page_index->latest_update_every_s * USEC_PER_SEC; + size_t points_gap = (point_in_time_ut <= last_point_in_time_ut) ? + (size_t)0 : + (size_t)((point_in_time_ut - last_point_in_time_ut) / update_every_ut); + + if(unlikely(points_gap != 1)) { + if (unlikely(points_gap <= 0)) { + time_t now = now_realtime_sec(); + static __thread size_t counter = 0; + static __thread time_t last_time_logged = 0; + counter++; + + if(now - last_time_logged > 600) { + error("DBENGINE: collected point is in the past (repeated %zu times in the last %zu secs). Ignoring these data collection points.", + counter, (size_t)(last_time_logged?(now - last_time_logged):0)); + + last_time_logged = now; + counter = 0; + } + return; + } + + size_t point_size = PAGE_POINT_SIZE_BYTES(descr); + size_t page_size_in_points = RRDENG_BLOCK_SIZE / point_size; + size_t used_points = descr->page_length / point_size; + size_t remaining_points_in_page = page_size_in_points - used_points; + + bool new_point_is_aligned = true; + if(unlikely((point_in_time_ut - last_point_in_time_ut) / points_gap != update_every_ut)) + new_point_is_aligned = false; + + if(unlikely(points_gap > remaining_points_in_page || !new_point_is_aligned)) { +// char buffer[200]; +// snprintfz(buffer, 200, "data collection skipped %zu points, last stored point %llu, new point %llu, update every %d. Cutting page.", +// points_gap, last_point_in_time_ut / USEC_PER_SEC, point_in_time_ut / USEC_PER_SEC, page_index->latest_update_every_s); +// print_page_cache_descr(descr, buffer, false); + + rrdeng_store_metric_flush_current_page(collection_handle); + } + else { +// char buffer[200]; +// snprintfz(buffer, 200, "data collection skipped %zu points, last stored point %llu, new point %llu, update every %d. Filling the gap.", +// points_gap, last_point_in_time_ut / USEC_PER_SEC, point_in_time_ut / USEC_PER_SEC, page_index->latest_update_every_s); +// print_page_cache_descr(descr, buffer, false); + + // loop to fill the gap + usec_t step_ut = page_index->latest_update_every_s * USEC_PER_SEC; + usec_t last_point_filled_ut = last_point_in_time_ut + step_ut; + + while (last_point_filled_ut < point_in_time_ut) { + rrdeng_store_metric_next_internal( + collection_handle, last_point_filled_ut, NAN, NAN, NAN, + 1, 0, SN_EMPTY_SLOT); + + last_point_filled_ut += step_ut; + } + } + } + } + + rrdeng_store_metric_next_internal(collection_handle, point_in_time_ut, n, min_value, max_value, count, anomaly_count, flags); +} + + +/* + * Releases the database reference from the handle for storing metrics. + * Returns 1 if it's safe to delete the dimension. + */ +int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct pg_cache_page_index *page_index = handle->page_index; + + uint8_t can_delete_metric = 0; + + rrdeng_store_metric_flush_current_page(collection_handle); + uv_rwlock_wrlock(&page_index->lock); + + if (!--page_index->writers && !page_index->page_count) + can_delete_metric = 1; + + uv_rwlock_wrunlock(&page_index->lock); + + rrdeng_page_alignment_release(handle->alignment); + freez(handle); + + return can_delete_metric; +} + +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct pg_cache_page_index *page_index = handle->page_index; + rrdeng_store_metric_flush_current_page(collection_handle); + uv_rwlock_rdlock(&page_index->lock); + page_index->latest_update_every_s = update_every; + uv_rwlock_rdunlock(&page_index->lock); +} + +// ---------------------------------------------------------------------------- +// query ops + +//static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info) +//{ +// return (uint32_t *)&page_info->scratch[0]; +//} +// +//static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info) +//{ +// return (uint32_t *)&page_info->scratch[sizeof(uint32_t)]; +//} +// +/* + * Gets a handle for loading metrics from the database. + * The handle must be released with rrdeng_load_metric_final(). + */ +void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrdimm_handle, time_t start_time_s, time_t end_time_s) +{ + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + struct rrdengine_instance *ctx = page_index->ctx; + + // fprintf(stderr, "%s: %s/%s start time %ld, end time %ld\n", __FUNCTION__ , rd->rrdset->name, rd->name, start_time, end_time); + + struct rrdeng_query_handle *handle; + unsigned pages_nr; + + if(!page_index->latest_update_every_s) + page_index->latest_update_every_s = default_rrd_update_every; + + rrdimm_handle->start_time_s = start_time_s; + rrdimm_handle->end_time_s = end_time_s; + + handle = callocz(1, sizeof(struct rrdeng_query_handle)); + handle->wanted_start_time_s = start_time_s; + handle->now_s = start_time_s; + handle->position = 0; + handle->ctx = ctx; + handle->descr = NULL; + handle->dt_s = page_index->latest_update_every_s; + rrdimm_handle->handle = (STORAGE_QUERY_HANDLE *)handle; + pages_nr = pg_cache_preload(ctx, &page_index->id, start_time_s * USEC_PER_SEC, end_time_s * USEC_PER_SEC, + NULL, &handle->page_index); + if (unlikely(NULL == handle->page_index || 0 == pages_nr)) + // there are no metrics to load + handle->wanted_start_time_s = INVALID_TIME; +} + +static int rrdeng_load_page_next(struct storage_engine_query_handle *rrdimm_handle, bool debug_this __maybe_unused) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; + + struct rrdengine_instance *ctx = handle->ctx; + struct rrdeng_page_descr *descr = handle->descr; + + uint32_t page_length; + usec_t page_end_time_ut; + unsigned position; + + if (likely(descr)) { + // Drop old page's reference + +#ifdef NETDATA_INTERNAL_CHECKS + rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1); +#endif + + pg_cache_put(ctx, descr); + handle->descr = NULL; + handle->wanted_start_time_s = (time_t)((handle->page_end_time_ut / USEC_PER_SEC) + handle->dt_s); + + if (unlikely(handle->wanted_start_time_s > rrdimm_handle->end_time_s)) + return 1; + } + + usec_t wanted_start_time_ut = handle->wanted_start_time_s * USEC_PER_SEC; + descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, + wanted_start_time_ut, rrdimm_handle->end_time_s * USEC_PER_SEC); + if (NULL == descr) + return 1; + +#ifdef NETDATA_INTERNAL_CHECKS + rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1); +#endif + + handle->descr = descr; + pg_cache_atomic_get_pg_info(descr, &page_end_time_ut, &page_length); + if (unlikely(INVALID_TIME == descr->start_time_ut || INVALID_TIME == page_end_time_ut || 0 == descr->update_every_s)) { + error("DBENGINE: discarding invalid page descriptor (start_time = %llu, end_time = %llu, update_every_s = %d)", + descr->start_time_ut, page_end_time_ut, descr->update_every_s); + return 1; + } + + if (unlikely(descr->start_time_ut != page_end_time_ut && wanted_start_time_ut > descr->start_time_ut)) { + // we're in the middle of the page somewhere + unsigned entries = page_length / PAGE_POINT_SIZE_BYTES(descr); + position = ((uint64_t)(wanted_start_time_ut - descr->start_time_ut)) * (entries - 1) / + (page_end_time_ut - descr->start_time_ut); + } + else + position = 0; + + handle->page_end_time_ut = page_end_time_ut; + handle->page_length = page_length; + handle->entries = page_length / PAGE_POINT_SIZE_BYTES(descr); + handle->page = descr->pg_cache_descr->page; + handle->dt_s = descr->update_every_s; + handle->position = position; + +// if(debug_this) +// info("DBENGINE: rrdeng_load_page_next(), " +// "position:%d, " +// "start_time_ut:%llu, " +// "page_end_time_ut:%llu, " +// "next_page_time_ut:%llu, " +// "in_out:%s" +// , position +// , descr->start_time_ut +// , page_end_time_ut +// , +// wanted_start_time_ut, in_out?"true":"false" +// ); + + return 0; +} + +// Returns the metric and sets its timestamp into current_time +// IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags) +// IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; + // struct rrdeng_metric_handle *metric_handle = handle->metric_handle; + + struct rrdeng_page_descr *descr = handle->descr; + time_t now = handle->now_s + handle->dt_s; + +// bool debug_this = false; +// { +// unsigned char u[16] = { 0x0C, 0x0A, 0x40, 0xD6, 0x2A, 0x43, 0x4A, 0x7C, 0x95, 0xF7, 0xD1, 0x1E, 0x0C, 0x9E, 0x8A, 0xE7 }; +// if(uuid_compare(u, handle->page_index->id) == 0) { +// char buffer[100]; +// snprintfz(buffer, 100, "load system.cpu, now:%u, dt:%u, position:%u page_index first:%u, last:%u", +// (uint32_t)(now), +// (uint32_t)(handle->dt_s), +// (uint32_t)(handle->position), +// (uint32_t)(handle->page_index->oldest_time / USEC_PER_SEC), +// (uint32_t)(handle->page_index->latest_time / USEC_PER_SEC)); +// +// print_page_cache_descr(descr, buffer, false); +// debug_this = true; +// } +// } + + STORAGE_POINT sp; + unsigned position = handle->position + 1; + storage_number_tier1_t tier1_value; + + if (unlikely(INVALID_TIME == handle->wanted_start_time_s)) { + handle->wanted_start_time_s = INVALID_TIME; + handle->now_s = now; + storage_point_empty(sp, now - handle->dt_s, now); + return sp; + } + + if (unlikely(!descr || position >= handle->entries)) { + // We need to get a new page + if(rrdeng_load_page_next(rrddim_handle, false)) { + // next calls will not load any more metrics + handle->wanted_start_time_s = INVALID_TIME; + handle->now_s = now; + storage_point_empty(sp, now - handle->dt_s, now); + return sp; + } + + descr = handle->descr; + position = handle->position; + now = (time_t)((descr->start_time_ut / USEC_PER_SEC) + position * descr->update_every_s); + +// if(debug_this) { +// char buffer[100]; +// snprintfz(buffer, 100, "NEW PAGE system.cpu, now:%u, dt:%u, position:%u page_index first:%u, last:%u", +// (uint32_t)(now), +// (uint32_t)(handle->dt_s), +// (uint32_t)(handle->position), +// (uint32_t)(handle->page_index->oldest_time / USEC_PER_SEC), +// (uint32_t)(handle->page_index->latest_time / USEC_PER_SEC)); +// +// print_page_cache_descr(descr, buffer, false); +// } + } + + sp.start_time = now - handle->dt_s; + sp.end_time = now; + + handle->position = position; + handle->now_s = now; + + switch(descr->type) { + case PAGE_METRICS: { + storage_number n = handle->page[position]; + sp.min = sp.max = sp.sum = unpack_storage_number(n); + sp.flags = n & SN_USER_FLAGS; + sp.count = 1; + sp.anomaly_count = is_storage_number_anomalous(n) ? 1 : 0; + } + break; + + case PAGE_TIER: { + tier1_value = ((storage_number_tier1_t *)handle->page)[position]; + sp.flags = tier1_value.anomaly_count ? SN_FLAG_NONE : SN_FLAG_NOT_ANOMALOUS; + sp.count = tier1_value.count; + sp.anomaly_count = tier1_value.anomaly_count; + sp.min = tier1_value.min_value; + sp.max = tier1_value.max_value; + sp.sum = tier1_value.sum_value; + } + break; + + // we don't know this page type + default: { + static bool logged = false; + if(!logged) { + error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", descr->type); + logged = true; + } + storage_point_empty(sp, sp.start_time, sp.end_time); + } + break; + } + + if (unlikely(now >= rrddim_handle->end_time_s)) { + // next calls will not load any more metrics + handle->wanted_start_time_s = INVALID_TIME; + } + +// if(debug_this) +// info("DBENGINE: returning point: " +// "time from %ld to %ld // query from %ld to %ld // wanted_start_time_s %ld" +// , sp.start_time, sp.end_time +// , rrddim_handle->start_time_s, rrddim_handle->end_time_s +// , handle->wanted_start_time_s +// ); + + return sp; +} + +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrdimm_handle) +{ + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; + return (INVALID_TIME == handle->wanted_start_time_s); +} + +/* + * Releases the database reference from the handle for loading metrics. + */ +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrdimm_handle) +{ + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; + struct rrdengine_instance *ctx = handle->ctx; + struct rrdeng_page_descr *descr = handle->descr; + + if (descr) { +#ifdef NETDATA_INTERNAL_CHECKS + rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1); +#endif + pg_cache_put(ctx, descr); + } + + // whatever is allocated at rrdeng_load_metric_init() should be freed here + freez(handle); + rrdimm_handle->handle = NULL; +} + +time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + return (time_t)(page_index->latest_time_ut / USEC_PER_SEC); +} +time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + return (time_t)(page_index->oldest_time_ut / USEC_PER_SEC); +} + +int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t) +{ + struct page_cache *pg_cache; + struct rrdengine_instance *ctx; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index = NULL; + + ctx = (struct rrdengine_instance *)si; + if (unlikely(!ctx)) { + error("DBENGINE: invalid STORAGE INSTANCE to %s()", __FUNCTION__); + return 1; + } + pg_cache = &ctx->pg_cache; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, dim_uuid, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + + if (likely(page_index)) { + *first_entry_t = page_index->oldest_time_ut / USEC_PER_SEC; + *last_entry_t = page_index->latest_time_ut / USEC_PER_SEC; + return 0; + } + + return 1; +} + +/* Also gets a reference for the page */ +void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr) +{ + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + void *page; + /* TODO: check maximum number of pages in page cache limit */ + + descr = pg_cache_create_descr(); + descr->id = id; /* TODO: add page type: metric, log, something? */ + descr->type = ctx->page_type; + page = dbengine_page_alloc(); /*TODO: add page size */ + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + pg_cache_descr->page = page; + pg_cache_descr->flags = RRD_PAGE_DIRTY /*| RRD_PAGE_LOCKED */ | RRD_PAGE_POPULATED /* | BEING_COLLECTED */; + pg_cache_descr->refcnt = 1; + + debug(D_RRDENGINE, "Created new page:"); + if (unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr, "", true); + rrdeng_page_descr_mutex_unlock(ctx, descr); + *ret_descr = descr; + return page; +} + +/* The page must not be empty */ +void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, + Word_t page_correlation_id) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + Pvoid_t *PValue; + unsigned nr_committed_pages; + + if (unlikely(NULL == descr)) { + debug(D_RRDENGINE, "%s: page descriptor is NULL, page has already been force-committed.", __func__); + return; + } + fatal_assert(descr->page_length); + + uv_rwlock_wrlock(&pg_cache->committed_page_index.lock); + PValue = JudyLIns(&pg_cache->committed_page_index.JudyL_array, page_correlation_id, PJE0); + *PValue = descr; + nr_committed_pages = ++pg_cache->committed_page_index.nr_committed_pages; + uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock); + + if (nr_committed_pages >= pg_cache_hard_limit(ctx) / 2) { + /* over 50% of pages have not been committed yet */ + + if (ctx->drop_metrics_under_page_cache_pressure && + nr_committed_pages >= pg_cache_committed_hard_limit(ctx)) { + /* 100% of pages are dirty */ + struct rrdeng_cmd cmd; + + cmd.opcode = RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + } else { + if (0 == (unsigned long) ctx->stats.pg_cache_over_half_dirty_events) { + /* only print the first time */ + errno = 0; + error("Failed to flush dirty buffers quickly enough in dbengine instance \"%s\". " + "Metric data at risk of not being stored in the database, " + "please reduce disk load or use a faster disk.", ctx->dbfiles_path); + } + rrd_stat_atomic_add(&ctx->stats.pg_cache_over_half_dirty_events, 1); + rrd_stat_atomic_add(&global_pg_cache_over_half_dirty_events, 1); + } + } + + pg_cache_put(ctx, descr); +} + +/* Gets a reference for the page */ +void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle) +{ + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + + debug(D_RRDENGINE, "Reading existing page:"); + descr = pg_cache_lookup(ctx, NULL, id, INVALID_TIME); + if (NULL == descr) { + *handle = NULL; + + return NULL; + } + *handle = descr; + pg_cache_descr = descr->pg_cache_descr; + + return pg_cache_descr->page; +} + +/* Gets a reference for the page */ +void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time_ut, void **handle) +{ + struct rrdeng_page_descr *descr; + struct page_cache_descr *pg_cache_descr; + + debug(D_RRDENGINE, "Reading existing page:"); + descr = pg_cache_lookup(ctx, NULL, id, point_in_time_ut); + if (NULL == descr) { + *handle = NULL; + + return NULL; + } + *handle = descr; + pg_cache_descr = descr->pg_cache_descr; + + return pg_cache_descr->page; +} + +/* + * Gathers Database Engine statistics. + * Careful when modifying this function. + * You must not change the indices of the statistics or user code will break. + * You must not exceed RRDENG_NR_STATS or it will crash. + */ +void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array) +{ + if (ctx == NULL) + return; + + struct page_cache *pg_cache = &ctx->pg_cache; + + array[0] = (uint64_t)ctx->stats.metric_API_producers; + array[1] = (uint64_t)ctx->stats.metric_API_consumers; + array[2] = (uint64_t)pg_cache->page_descriptors; + array[3] = (uint64_t)pg_cache->populated_pages; + array[4] = (uint64_t)pg_cache->committed_page_index.nr_committed_pages; + array[5] = (uint64_t)ctx->stats.pg_cache_insertions; + array[6] = (uint64_t)ctx->stats.pg_cache_deletions; + array[7] = (uint64_t)ctx->stats.pg_cache_hits; + array[8] = (uint64_t)ctx->stats.pg_cache_misses; + array[9] = (uint64_t)ctx->stats.pg_cache_backfills; + array[10] = (uint64_t)ctx->stats.pg_cache_evictions; + array[11] = (uint64_t)ctx->stats.before_compress_bytes; + array[12] = (uint64_t)ctx->stats.after_compress_bytes; + array[13] = (uint64_t)ctx->stats.before_decompress_bytes; + array[14] = (uint64_t)ctx->stats.after_decompress_bytes; + array[15] = (uint64_t)ctx->stats.io_write_bytes; + array[16] = (uint64_t)ctx->stats.io_write_requests; + array[17] = (uint64_t)ctx->stats.io_read_bytes; + array[18] = (uint64_t)ctx->stats.io_read_requests; + array[19] = (uint64_t)ctx->stats.io_write_extent_bytes; + array[20] = (uint64_t)ctx->stats.io_write_extents; + array[21] = (uint64_t)ctx->stats.io_read_extent_bytes; + array[22] = (uint64_t)ctx->stats.io_read_extents; + array[23] = (uint64_t)ctx->stats.datafile_creations; + array[24] = (uint64_t)ctx->stats.datafile_deletions; + array[25] = (uint64_t)ctx->stats.journalfile_creations; + array[26] = (uint64_t)ctx->stats.journalfile_deletions; + array[27] = (uint64_t)ctx->stats.page_cache_descriptors; + array[28] = (uint64_t)ctx->stats.io_errors; + array[29] = (uint64_t)ctx->stats.fs_errors; + array[30] = (uint64_t)global_io_errors; + array[31] = (uint64_t)global_fs_errors; + array[32] = (uint64_t)rrdeng_reserved_file_descriptors; + array[33] = (uint64_t)ctx->stats.pg_cache_over_half_dirty_events; + array[34] = (uint64_t)global_pg_cache_over_half_dirty_events; + array[35] = (uint64_t)ctx->stats.flushing_pressure_page_deletions; + array[36] = (uint64_t)global_flushing_pressure_page_deletions; + fatal_assert(RRDENG_NR_STATS == 37); +} + +/* Releases reference to page */ +void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle) +{ + (void)ctx; + pg_cache_put(ctx, (struct rrdeng_page_descr *)handle); +} + +/* + * Returns 0 on success, negative on error + */ +int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, + unsigned disk_space_mb, size_t tier) { + struct rrdengine_instance *ctx; + int error; + uint32_t max_open_files; + + max_open_files = rlimit_nofile.rlim_cur / 4; + + /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */ + rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE); + if (rrdeng_reserved_file_descriptors > max_open_files) { + error( + "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.", + (unsigned)rrdeng_reserved_file_descriptors, + (unsigned)max_open_files); + + rrd_stat_atomic_add(&global_fs_errors, 1); + rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); + return UV_EMFILE; + } + + if(NULL == ctxp) { + ctx = multidb_ctx[tier]; + memset(ctx, 0, sizeof(*ctx)); + } + else { + *ctxp = ctx = callocz(1, sizeof(*ctx)); + } + ctx->tier = tier; + ctx->page_type = tier_page_type[tier]; + ctx->global_compress_alg = RRD_LZ4; + if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) + page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + ctx->max_cache_pages = page_cache_mb * (1048576LU / RRDENG_BLOCK_SIZE); + /* try to keep 5% of the page cache free */ + ctx->cache_pages_low_watermark = (ctx->max_cache_pages * 95LLU) / 100; + if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB) + disk_space_mb = RRDENG_MIN_DISK_SPACE_MB; + ctx->max_disk_space = disk_space_mb * 1048576LLU; + strncpyz(ctx->dbfiles_path, dbfiles_path, sizeof(ctx->dbfiles_path) - 1); + ctx->dbfiles_path[sizeof(ctx->dbfiles_path) - 1] = '\0'; + if (NULL == host) + strncpyz(ctx->machine_guid, registry_get_this_machine_guid(), GUID_LEN); + else + strncpyz(ctx->machine_guid, host->machine_guid, GUID_LEN); + + ctx->drop_metrics_under_page_cache_pressure = rrdeng_drop_metrics_under_page_cache_pressure; + ctx->metric_API_max_producers = 0; + ctx->quiesce = NO_QUIESCE; + ctx->host = host; + + memset(&ctx->worker_config, 0, sizeof(ctx->worker_config)); + ctx->worker_config.ctx = ctx; + init_page_cache(ctx); + init_commit_log(ctx); + error = init_rrd_files(ctx); + if (error) { + goto error_after_init_rrd_files; + } + + completion_init(&ctx->rrdengine_completion); + fatal_assert(0 == uv_thread_create(&ctx->worker_config.thread, rrdeng_worker, &ctx->worker_config)); + /* wait for worker thread to initialize */ + completion_wait_for(&ctx->rrdengine_completion); + completion_destroy(&ctx->rrdengine_completion); + uv_thread_set_name_np(ctx->worker_config.thread, "LIBUV_WORKER"); + if (ctx->worker_config.error) { + goto error_after_rrdeng_worker; + } +// error = metalog_init(ctx); +// if (error) { +// error("Failed to initialize metadata log file event loop."); +// goto error_after_rrdeng_worker; +// } + + return 0; + +error_after_rrdeng_worker: + finalize_rrd_files(ctx); +error_after_init_rrd_files: + free_page_cache(ctx); + if (!is_storage_engine_shared((STORAGE_INSTANCE *)ctx)) { + freez(ctx); + if (ctxp) + *ctxp = NULL; + } + rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); + return UV_EIO; +} + +/* + * Returns 0 on success, 1 on error + */ +int rrdeng_exit(struct rrdengine_instance *ctx) +{ + struct rrdeng_cmd cmd; + + if (NULL == ctx) { + return 1; + } + + /* TODO: add page to page cache */ + cmd.opcode = RRDENG_SHUTDOWN; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + + fatal_assert(0 == uv_thread_join(&ctx->worker_config.thread)); + + finalize_rrd_files(ctx); + //metalog_exit(ctx->metalog_ctx); + free_page_cache(ctx); + + if(!is_storage_engine_shared((STORAGE_INSTANCE *)ctx)) + freez(ctx); + + rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); + return 0; +} + +void rrdeng_prepare_exit(struct rrdengine_instance *ctx) +{ + struct rrdeng_cmd cmd; + + if (NULL == ctx) { + return; + } + + completion_init(&ctx->rrdengine_completion); + cmd.opcode = RRDENG_QUIESCE; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + + /* wait for dbengine to quiesce */ + completion_wait_for(&ctx->rrdengine_completion); + completion_destroy(&ctx->rrdengine_completion); + + //metalog_prepare_exit(ctx->metalog_ctx); +} + +RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx) { + RRDENG_SIZE_STATS stats = { 0 }; + + for(struct pg_cache_page_index *page_index = ctx->pg_cache.metrics_index.last_page_index; + page_index != NULL ;page_index = page_index->prev) { + stats.metrics++; + stats.metrics_pages += page_index->page_count; + } + + for(struct rrdengine_datafile *df = ctx->datafiles.first; df ;df = df->next) { + stats.datafiles++; + + for(struct extent_info *ei = df->extents.first; ei ; ei = ei->next) { + stats.extents++; + stats.extents_compressed_bytes += ei->size; + + for(int p = 0; p < ei->number_of_pages ;p++) { + struct rrdeng_page_descr *descr = ei->pages[p]; + + usec_t update_every_usec; + + size_t points = descr->page_length / PAGE_POINT_SIZE_BYTES(descr); + + if(likely(points > 1)) + update_every_usec = (descr->end_time_ut - descr->start_time_ut) / (points - 1); + else { + update_every_usec = default_rrd_update_every * get_tier_grouping(ctx->tier) * USEC_PER_SEC; + stats.single_point_pages++; + } + + time_t duration_secs = (time_t)((descr->end_time_ut - descr->start_time_ut + update_every_usec)/USEC_PER_SEC); + + stats.extents_pages++; + stats.pages_uncompressed_bytes += descr->page_length; + stats.pages_duration_secs += duration_secs; + stats.points += points; + + stats.page_types[descr->type].pages++; + stats.page_types[descr->type].pages_uncompressed_bytes += descr->page_length; + stats.page_types[descr->type].pages_duration_secs += duration_secs; + stats.page_types[descr->type].points += points; + + if(!stats.first_t || (descr->start_time_ut - update_every_usec) < stats.first_t) + stats.first_t = (descr->start_time_ut - update_every_usec) / USEC_PER_SEC; + + if(!stats.last_t || descr->end_time_ut > stats.last_t) + stats.last_t = descr->end_time_ut / USEC_PER_SEC; + } + } + } + + + stats.currently_collected_metrics = ctx->stats.metric_API_producers; + stats.max_concurrently_collected_metrics = ctx->metric_API_max_producers; + + internal_error(stats.metrics_pages != stats.extents_pages + stats.currently_collected_metrics, + "DBENGINE: metrics pages is %zu, but extents pages is %zu and API consumers is %zu", + stats.metrics_pages, stats.extents_pages, stats.currently_collected_metrics); + + stats.disk_space = ctx->disk_space; + stats.max_disk_space = ctx->max_disk_space; + + stats.database_retention_secs = (time_t)(stats.last_t - stats.first_t); + + if(stats.extents_pages) + stats.average_page_size_bytes = (double)stats.pages_uncompressed_bytes / (double)stats.extents_pages; + + if(stats.pages_uncompressed_bytes > 0) + stats.average_compression_savings = 100.0 - ((double)stats.extents_compressed_bytes * 100.0 / (double)stats.pages_uncompressed_bytes); + + if(stats.points) + stats.average_point_duration_secs = (double)stats.pages_duration_secs / (double)stats.points; + + if(stats.metrics) { + stats.average_metric_retention_secs = (double)stats.pages_duration_secs / (double)stats.metrics; + + if(stats.database_retention_secs) { + double metric_coverage = stats.average_metric_retention_secs / (double)stats.database_retention_secs; + double db_retention_days = (double)stats.database_retention_secs / 86400.0; + + stats.estimated_concurrently_collected_metrics = stats.metrics * metric_coverage; + + stats.ephemeral_metrics_per_day_percent = ((double)stats.metrics * 100.0 / (double)stats.estimated_concurrently_collected_metrics - 100.0) / (double)db_retention_days; + } + } + + stats.sizeof_metric = struct_natural_alignment(sizeof(struct pg_cache_page_index) + sizeof(struct pg_alignment)); + stats.sizeof_page = struct_natural_alignment(sizeof(struct rrdeng_page_descr)); + stats.sizeof_datafile = struct_natural_alignment(sizeof(struct rrdengine_datafile)) + struct_natural_alignment(sizeof(struct rrdengine_journalfile)); + stats.sizeof_page_in_cache = struct_natural_alignment(sizeof(struct page_cache_descr)); + stats.sizeof_point_data = page_type_size[ctx->page_type]; + stats.sizeof_page_data = RRDENG_BLOCK_SIZE; + stats.pages_per_extent = rrdeng_pages_per_extent; + + stats.sizeof_extent = sizeof(struct extent_info); + stats.sizeof_page_in_extent = sizeof(struct rrdeng_page_descr *); + + stats.sizeof_metric_in_index = 40; + stats.sizeof_page_in_index = 24; + + stats.default_granularity_secs = (size_t)default_rrd_update_every * get_tier_grouping(ctx->tier); + + return stats; +} diff --git a/database/engine/rrdengineapi.h b/database/engine/rrdengineapi.h new file mode 100644 index 0000000..3acee4e --- /dev/null +++ b/database/engine/rrdengineapi.h @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDENGINEAPI_H +#define NETDATA_RRDENGINEAPI_H + +#include "rrdengine.h" + +#define RRDENG_MIN_PAGE_CACHE_SIZE_MB (8) +#define RRDENG_MIN_DISK_SPACE_MB (64) + +#define RRDENG_NR_STATS (37) + +#define RRDENG_FD_BUDGET_PER_INSTANCE (50) + +extern int db_engine_use_malloc; +extern int default_rrdeng_page_fetch_timeout; +extern int default_rrdeng_page_fetch_retries; +extern int default_rrdeng_page_cache_mb; +extern int default_rrdeng_disk_quota_mb; +extern int default_multidb_disk_quota_mb; +extern uint8_t rrdeng_drop_metrics_under_page_cache_pressure; +extern struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS]; +extern size_t page_type_size[]; + +#define PAGE_POINT_SIZE_BYTES(x) page_type_size[(x)->type] + +struct rrdeng_region_info { + time_t start_time_s; + int update_every; + unsigned points; +}; + +void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr); +void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, + Word_t page_correlation_id); +void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle); +void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time_ut, void **handle); +void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle); + +void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid); +void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, + uuid_t *ret_uuid); + + +STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance); +STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +STORAGE_METRIC_HANDLE *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +STORAGE_METRIC_HANDLE *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id); +void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle); +STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle); + +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg); +void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle); +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); +void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, NETDATA_DOUBLE n, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags); +int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle); + +void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrdimm_handle, + time_t start_time_s, time_t end_time_s); +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle); + + +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrdimm_handle); +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrdimm_handle); +time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); +time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); + +void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array); + +/* must call once before using anything */ +int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, + unsigned disk_space_mb, size_t tier); + +int rrdeng_exit(struct rrdengine_instance *ctx); +void rrdeng_prepare_exit(struct rrdengine_instance *ctx); +int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t); + +extern STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +extern void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); + +typedef struct rrdengine_size_statistics { + size_t default_granularity_secs; + + size_t sizeof_metric; + size_t sizeof_metric_in_index; + size_t sizeof_page; + size_t sizeof_page_in_index; + size_t sizeof_extent; + size_t sizeof_page_in_extent; + size_t sizeof_datafile; + size_t sizeof_page_in_cache; + size_t sizeof_point_data; + size_t sizeof_page_data; + + size_t pages_per_extent; + + size_t datafiles; + size_t extents; + size_t extents_pages; + size_t points; + size_t metrics; + size_t metrics_pages; + + size_t extents_compressed_bytes; + size_t pages_uncompressed_bytes; + time_t pages_duration_secs; + + struct { + size_t pages; + size_t pages_uncompressed_bytes; + time_t pages_duration_secs; + size_t points; + } page_types[256]; + + size_t single_point_pages; + + usec_t first_t; + usec_t last_t; + + size_t currently_collected_metrics; + size_t max_concurrently_collected_metrics; + size_t estimated_concurrently_collected_metrics; + + size_t disk_space; + size_t max_disk_space; + + time_t database_retention_secs; + double average_compression_savings; + double average_point_duration_secs; + double average_metric_retention_secs; + + double ephemeral_metrics_per_day_percent; + + double average_page_size_bytes; +} RRDENG_SIZE_STATS; + +RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx); + +#endif /* NETDATA_RRDENGINEAPI_H */ diff --git a/database/engine/rrdenginelib.c b/database/engine/rrdenginelib.c new file mode 100644 index 0000000..58bd9c4 --- /dev/null +++ b/database/engine/rrdenginelib.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#include "rrdengine.h" + +#define BUFSIZE (512) + +/* Caller must hold descriptor lock */ +void print_page_cache_descr(struct rrdeng_page_descr *descr, const char *msg, bool log_debug) +{ + if(log_debug && !(debug_flags & D_RRDENGINE)) + return; + + BUFFER *wb = buffer_create(512); + + if(!descr) { + buffer_sprintf(wb, "DBENGINE: %s : descr is NULL", msg); + } + else { + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + char uuid_str[UUID_STR_LEN]; + + uuid_unparse_lower(*descr->id, uuid_str); + buffer_sprintf(wb, "DBENGINE: %s : page(%p) metric:%s, len:%"PRIu32", time:%"PRIu64"->%"PRIu64", update_every:%u, type:%u, xt_offset:", + msg, + pg_cache_descr->page, uuid_str, + descr->page_length, + (uint64_t)descr->start_time_ut, + (uint64_t)descr->end_time_ut, + (uint32_t)descr->update_every_s, + (uint32_t)descr->type + ); + if (!descr->extent) { + buffer_strcat(wb, "N/A"); + } else { + buffer_sprintf(wb, "%"PRIu64, descr->extent->offset); + } + + buffer_sprintf(wb, ", flags:0x%2.2lX refcnt:%u", pg_cache_descr->flags, pg_cache_descr->refcnt); + } + + if(log_debug) + debug(D_RRDENGINE, "%s", buffer_tostring(wb)); + else + internal_error(true, "%s", buffer_tostring(wb)); + + buffer_free(wb); +} + +void print_page_descr(struct rrdeng_page_descr *descr) +{ + char uuid_str[UUID_STR_LEN]; + char str[BUFSIZE + 1]; + int pos = 0; + + uuid_unparse_lower(*descr->id, uuid_str); + pos += snprintfz(str, BUFSIZE - pos, "id=%s\n" + "--->len:%"PRIu32" time:%"PRIu64"->%"PRIu64" xt_offset:", + uuid_str, + descr->page_length, + (uint64_t)descr->start_time_ut, + (uint64_t)descr->end_time_ut); + if (!descr->extent) { + pos += snprintfz(str + pos, BUFSIZE - pos, "N/A"); + } else { + pos += snprintfz(str + pos, BUFSIZE - pos, "%"PRIu64, descr->extent->offset); + } + snprintfz(str + pos, BUFSIZE - pos, "\n\n"); + fputs(str, stderr); +} + +int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size) +{ + int ret; + uv_fs_t req; + uv_stat_t* s; + + ret = uv_fs_fstat(NULL, &req, file, NULL); + if (ret < 0) { + fatal("uv_fs_fstat: %s\n", uv_strerror(ret)); + } + fatal_assert(req.result == 0); + s = req.ptr; + if (!(s->st_mode & S_IFREG)) { + error("Not a regular file.\n"); + uv_fs_req_cleanup(&req); + return UV_EINVAL; + } + if (s->st_size < min_size) { + error("File length is too short.\n"); + uv_fs_req_cleanup(&req); + return UV_EINVAL; + } + *file_size = s->st_size; + uv_fs_req_cleanup(&req); + + return 0; +} + +/** + * Open file for I/O. + * + * @param path The full path of the file. + * @param flags Same flags as the open() system call uses. + * @param file On success sets (*file) to be the uv_file that was opened. + * @param direct Tries to open a file in direct I/O mode when direct=1, falls back to buffered mode if not possible. + * @return Returns UV error number that is < 0 on failure. 0 on success. + */ +int open_file_for_io(char *path, int flags, uv_file *file, int direct) +{ + uv_fs_t req; + int fd = -1, current_flags; + + fatal_assert(0 == direct || 1 == direct); + for ( ; direct >= 0 ; --direct) { +#ifdef __APPLE__ + /* Apple OS does not support O_DIRECT */ + direct = 0; +#endif + current_flags = flags; + if (direct) { + current_flags |= O_DIRECT; + } + fd = uv_fs_open(NULL, &req, path, current_flags, S_IRUSR | S_IWUSR, NULL); + if (fd < 0) { + if ((direct) && (UV_EINVAL == fd)) { + error("File \"%s\" does not support direct I/O, falling back to buffered I/O.", path); + } else { + error("Failed to open file \"%s\".", path); + --direct; /* break the loop */ + } + } else { + fatal_assert(req.result >= 0); + *file = req.result; +#ifdef __APPLE__ + info("Disabling OS X caching for file \"%s\".", path); + fcntl(fd, F_NOCACHE, 1); +#endif + --direct; /* break the loop */ + } + uv_fs_req_cleanup(&req); + } + + return fd; +} + +char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size) +{ + struct page_cache *pg_cache; + + pg_cache = &ctx->pg_cache; + snprintfz(str, size, + "metric_API_producers: %ld\n" + "metric_API_consumers: %ld\n" + "page_cache_total_pages: %ld\n" + "page_cache_descriptors: %ld\n" + "page_cache_populated_pages: %ld\n" + "page_cache_committed_pages: %ld\n" + "page_cache_insertions: %ld\n" + "page_cache_deletions: %ld\n" + "page_cache_hits: %ld\n" + "page_cache_misses: %ld\n" + "page_cache_backfills: %ld\n" + "page_cache_evictions: %ld\n" + "compress_before_bytes: %ld\n" + "compress_after_bytes: %ld\n" + "decompress_before_bytes: %ld\n" + "decompress_after_bytes: %ld\n" + "io_write_bytes: %ld\n" + "io_write_requests: %ld\n" + "io_read_bytes: %ld\n" + "io_read_requests: %ld\n" + "io_write_extent_bytes: %ld\n" + "io_write_extents: %ld\n" + "io_read_extent_bytes: %ld\n" + "io_read_extents: %ld\n" + "datafile_creations: %ld\n" + "datafile_deletions: %ld\n" + "journalfile_creations: %ld\n" + "journalfile_deletions: %ld\n" + "io_errors: %ld\n" + "fs_errors: %ld\n" + "global_io_errors: %ld\n" + "global_fs_errors: %ld\n" + "rrdeng_reserved_file_descriptors: %ld\n" + "pg_cache_over_half_dirty_events: %ld\n" + "global_pg_cache_over_half_dirty_events: %ld\n" + "flushing_pressure_page_deletions: %ld\n" + "global_flushing_pressure_page_deletions: %ld\n", + (long)ctx->stats.metric_API_producers, + (long)ctx->stats.metric_API_consumers, + (long)pg_cache->page_descriptors, + (long)ctx->stats.page_cache_descriptors, + (long)pg_cache->populated_pages, + (long)pg_cache->committed_page_index.nr_committed_pages, + (long)ctx->stats.pg_cache_insertions, + (long)ctx->stats.pg_cache_deletions, + (long)ctx->stats.pg_cache_hits, + (long)ctx->stats.pg_cache_misses, + (long)ctx->stats.pg_cache_backfills, + (long)ctx->stats.pg_cache_evictions, + (long)ctx->stats.before_compress_bytes, + (long)ctx->stats.after_compress_bytes, + (long)ctx->stats.before_decompress_bytes, + (long)ctx->stats.after_decompress_bytes, + (long)ctx->stats.io_write_bytes, + (long)ctx->stats.io_write_requests, + (long)ctx->stats.io_read_bytes, + (long)ctx->stats.io_read_requests, + (long)ctx->stats.io_write_extent_bytes, + (long)ctx->stats.io_write_extents, + (long)ctx->stats.io_read_extent_bytes, + (long)ctx->stats.io_read_extents, + (long)ctx->stats.datafile_creations, + (long)ctx->stats.datafile_deletions, + (long)ctx->stats.journalfile_creations, + (long)ctx->stats.journalfile_deletions, + (long)ctx->stats.io_errors, + (long)ctx->stats.fs_errors, + (long)global_io_errors, + (long)global_fs_errors, + (long)rrdeng_reserved_file_descriptors, + (long)ctx->stats.pg_cache_over_half_dirty_events, + (long)global_pg_cache_over_half_dirty_events, + (long)ctx->stats.flushing_pressure_page_deletions, + (long)global_flushing_pressure_page_deletions + ); + return str; +} + +int is_legacy_child(const char *machine_guid) +{ + uuid_t uuid; + char dbengine_file[FILENAME_MAX+1]; + + if (unlikely(!strcmp(machine_guid, "unittest-dbengine") || !strcmp(machine_guid, "dbengine-dataset") || + !strcmp(machine_guid, "dbengine-stress-test"))) { + return 1; + } + if (!uuid_parse(machine_guid, uuid)) { + uv_fs_t stat_req; + snprintfz(dbengine_file, FILENAME_MAX, "%s/%s/dbengine", netdata_configured_cache_dir, machine_guid); + int rc = uv_fs_stat(NULL, &stat_req, dbengine_file, NULL); + if (likely(rc == 0 && ((stat_req.statbuf.st_mode & S_IFMT) == S_IFDIR))) { + //info("Found legacy engine folder \"%s\"", dbengine_file); + return 1; + } + } + return 0; +} + +int count_legacy_children(char *dbfiles_path) +{ + int ret; + uv_fs_t req; + uv_dirent_t dent; + int legacy_engines = 0; + + ret = uv_fs_scandir(NULL, &req, dbfiles_path, 0, NULL); + if (ret < 0) { + uv_fs_req_cleanup(&req); + error("uv_fs_scandir(%s): %s", dbfiles_path, uv_strerror(ret)); + return ret; + } + + while(UV_EOF != uv_fs_scandir_next(&req, &dent)) { + if (dent.type == UV_DIRENT_DIR) { + if (is_legacy_child(dent.name)) + legacy_engines++; + } + } + uv_fs_req_cleanup(&req); + return legacy_engines; +} + +int compute_multidb_diskspace() +{ + char multidb_disk_space_file[FILENAME_MAX + 1]; + FILE *fp; + int computed_multidb_disk_quota_mb = -1; + + snprintfz(multidb_disk_space_file, FILENAME_MAX, "%s/dbengine_multihost_size", netdata_configured_varlib_dir); + fp = fopen(multidb_disk_space_file, "r"); + if (likely(fp)) { + int rc = fscanf(fp, "%d", &computed_multidb_disk_quota_mb); + fclose(fp); + if (unlikely(rc != 1 || computed_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB)) { + errno = 0; + error("File '%s' contains invalid input, it will be rebuild", multidb_disk_space_file); + computed_multidb_disk_quota_mb = -1; + } + } + + if (computed_multidb_disk_quota_mb == -1) { + int rc = count_legacy_children(netdata_configured_cache_dir); + if (likely(rc >= 0)) { + computed_multidb_disk_quota_mb = (rc + 1) * default_rrdeng_disk_quota_mb; + info("Found %d legacy dbengines, setting multidb diskspace to %dMB", rc, computed_multidb_disk_quota_mb); + + fp = fopen(multidb_disk_space_file, "w"); + if (likely(fp)) { + fprintf(fp, "%d", computed_multidb_disk_quota_mb); + info("Created file '%s' to store the computed value", multidb_disk_space_file); + fclose(fp); + } else + error("Failed to store the default multidb disk quota size on '%s'", multidb_disk_space_file); + } + else + computed_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb; + } + + return computed_multidb_disk_quota_mb; +} diff --git a/database/engine/rrdenginelib.h b/database/engine/rrdenginelib.h new file mode 100644 index 0000000..6b1a15f --- /dev/null +++ b/database/engine/rrdenginelib.h @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDENGINELIB_H +#define NETDATA_RRDENGINELIB_H + +#include "libnetdata/libnetdata.h" + +/* Forward declarations */ +struct rrdeng_page_descr; +struct rrdengine_instance; + +#define STR_HELPER(x) #x +#define STR(x) STR_HELPER(x) + +#define BITS_PER_ULONG (sizeof(unsigned long) * 8) + +#define ALIGN_BYTES_FLOOR(x) (((x) / RRDENG_BLOCK_SIZE) * RRDENG_BLOCK_SIZE) +#define ALIGN_BYTES_CEILING(x) ((((x) + RRDENG_BLOCK_SIZE - 1) / RRDENG_BLOCK_SIZE) * RRDENG_BLOCK_SIZE) + +#define ROUND_USEC_TO_SEC(x) (((x) + USEC_PER_SEC / 2 - 1) / USEC_PER_SEC) + +typedef uintptr_t rrdeng_stats_t; + +#ifdef __ATOMIC_RELAXED +#define rrd_atomic_fetch_add(p, n) __atomic_fetch_add(p, n, __ATOMIC_RELAXED) +#define rrd_atomic_add_fetch(p, n) __atomic_add_fetch(p, n, __ATOMIC_RELAXED) +#else +#define rrd_atomic_fetch_add(p, n) __sync_fetch_and_add(p, n) +#define rrd_atomic_add_fetch(p, n) __sync_add_and_fetch(p, n) +#endif + +#define rrd_stat_atomic_add(p, n) rrd_atomic_fetch_add(p, n) + +/* returns -1 if it didn't find the first cleared bit, the position otherwise. Starts from LSB. */ +static inline int find_first_zero(unsigned x) +{ + return ffs((int)(~x)) - 1; +} + +/* Starts from LSB. */ +static inline uint8_t check_bit(unsigned x, size_t pos) +{ + return !!(x & (1 << pos)); +} + +/* Starts from LSB. val is 0 or 1 */ +static inline void modify_bit(unsigned *x, unsigned pos, uint8_t val) +{ + switch(val) { + case 0: + *x &= ~(1U << pos); + break; + case 1: + *x |= 1U << pos; + break; + default: + error("modify_bit() called with invalid argument."); + break; + } +} + +#define RRDENG_PATH_MAX (4096) + +/* returns old *ptr value */ +static inline unsigned long ulong_compare_and_swap(volatile unsigned long *ptr, + unsigned long oldval, unsigned long newval) +{ + return __sync_val_compare_and_swap(ptr, oldval, newval); +} + +#ifndef O_DIRECT +/* Workaround for OS X */ +#define O_DIRECT (0) +#endif + +static inline int crc32cmp(void *crcp, uLong crc) +{ + return (*(uint32_t *)crcp != crc); +} + +static inline void crc32set(void *crcp, uLong crc) +{ + *(uint32_t *)crcp = crc; +} + +void print_page_cache_descr(struct rrdeng_page_descr *descr, const char *msg, bool log_debug); +void print_page_descr(struct rrdeng_page_descr *descr); +int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size); +int open_file_for_io(char *path, int flags, uv_file *file, int direct); +static inline int open_file_direct_io(char *path, int flags, uv_file *file) +{ + return open_file_for_io(path, flags, file, 1); +} +static inline int open_file_buffered_io(char *path, int flags, uv_file *file) +{ + return open_file_for_io(path, flags, file, 0); +} +char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size); +int compute_multidb_diskspace(); +int is_legacy_child(const char *machine_guid); + +#endif /* NETDATA_RRDENGINELIB_H */ diff --git a/database/engine/rrdenglocking.c b/database/engine/rrdenglocking.c new file mode 100644 index 0000000..a23abf3 --- /dev/null +++ b/database/engine/rrdenglocking.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#include "rrdengine.h" + +struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance *ctx) +{ + struct page_cache_descr *pg_cache_descr; + + pg_cache_descr = mallocz(sizeof(*pg_cache_descr)); + rrd_stat_atomic_add(&ctx->stats.page_cache_descriptors, 1); + pg_cache_descr->page = NULL; + pg_cache_descr->flags = 0; + pg_cache_descr->prev = pg_cache_descr->next = NULL; + pg_cache_descr->refcnt = 0; + pg_cache_descr->waiters = 0; + fatal_assert(0 == uv_cond_init(&pg_cache_descr->cond)); + fatal_assert(0 == uv_mutex_init(&pg_cache_descr->mutex)); + + return pg_cache_descr; +} + +void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr) +{ + uv_cond_destroy(&pg_cache_descr->cond); + uv_mutex_destroy(&pg_cache_descr->mutex); + freez(pg_cache_descr); + rrd_stat_atomic_add(&ctx->stats.page_cache_descriptors, -1); +} + +/* also allocates page cache descriptor if missing */ +void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + unsigned long old_state, old_users, new_state, ret_state; + struct page_cache_descr *pg_cache_descr = NULL; + uint8_t we_locked; + + we_locked = 0; + while (1) { /* spin */ + old_state = descr->pg_cache_descr_state; + old_users = old_state >> PG_CACHE_DESCR_SHIFT; + + if (unlikely(we_locked)) { + fatal_assert(old_state & PG_CACHE_DESCR_LOCKED); + new_state = (1 << PG_CACHE_DESCR_SHIFT) | PG_CACHE_DESCR_ALLOCATED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + break; + } + continue; /* spin */ + } + if (old_state & PG_CACHE_DESCR_LOCKED) { + fatal_assert(0 == old_users); + continue; /* spin */ + } + if (0 == old_state) { + /* no page cache descriptor has been allocated */ + + if (NULL == pg_cache_descr) { + pg_cache_descr = rrdeng_create_pg_cache_descr(ctx); + } + new_state = PG_CACHE_DESCR_LOCKED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, 0, new_state); + if (0 == ret_state) { + we_locked = 1; + descr->pg_cache_descr = pg_cache_descr; + pg_cache_descr->descr = descr; + pg_cache_descr = NULL; /* make sure we don't free pg_cache_descr */ + /* retry */ + continue; + } + continue; /* spin */ + } + /* page cache descriptor is already allocated */ + if (unlikely(!(old_state & PG_CACHE_DESCR_ALLOCATED))) { + fatal("Invalid page cache descriptor locking state:%#lX", old_state); + } + new_state = (old_users + 1) << PG_CACHE_DESCR_SHIFT; + new_state |= old_state & PG_CACHE_DESCR_FLAGS_MASK; + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + break; + } + /* spin */ + } + + if (pg_cache_descr) { + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + } + pg_cache_descr = descr->pg_cache_descr; + uv_mutex_lock(&pg_cache_descr->mutex); +} + +void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + unsigned long old_state, new_state, ret_state, old_users; + struct page_cache_descr *pg_cache_descr, *delete_pg_cache_descr = NULL; + uint8_t we_locked; + + uv_mutex_unlock(&descr->pg_cache_descr->mutex); + + we_locked = 0; + while (1) { /* spin */ + old_state = descr->pg_cache_descr_state; + old_users = old_state >> PG_CACHE_DESCR_SHIFT; + + if (unlikely(we_locked)) { + fatal_assert(0 == old_users); + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, 0); + if (old_state == ret_state) { + /* success */ + rrdeng_destroy_pg_cache_descr(ctx, delete_pg_cache_descr); + return; + } + continue; /* spin */ + } + if (old_state & PG_CACHE_DESCR_LOCKED) { + fatal_assert(0 == old_users); + continue; /* spin */ + } + fatal_assert(old_state & PG_CACHE_DESCR_ALLOCATED); + pg_cache_descr = descr->pg_cache_descr; + /* caller is the only page cache descriptor user and there are no pending references on the page */ + if ((old_state & PG_CACHE_DESCR_DESTROY) && (1 == old_users) && + !pg_cache_descr->flags && !pg_cache_descr->refcnt) { + fatal_assert(!pg_cache_descr->waiters); + + new_state = PG_CACHE_DESCR_LOCKED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + we_locked = 1; + delete_pg_cache_descr = pg_cache_descr; + descr->pg_cache_descr = NULL; + /* retry */ + continue; + } + continue; /* spin */ + } + fatal_assert(old_users > 0); + new_state = (old_users - 1) << PG_CACHE_DESCR_SHIFT; + new_state |= old_state & PG_CACHE_DESCR_FLAGS_MASK; + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + break; + } + /* spin */ + } +} + +/* + * Tries to deallocate page cache descriptor. If it fails, it postpones deallocation by setting the + * PG_CACHE_DESCR_DESTROY flag which will be eventually cleared by a different context after doing + * the deallocation. + */ +void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + unsigned long old_state, new_state, ret_state, old_users; + struct page_cache_descr *pg_cache_descr = NULL; + uint8_t just_locked, can_free, must_unlock; + + just_locked = 0; + can_free = 0; + must_unlock = 0; + while (1) { /* spin */ + old_state = descr->pg_cache_descr_state; + old_users = old_state >> PG_CACHE_DESCR_SHIFT; + + if (unlikely(just_locked)) { + fatal_assert(0 == old_users); + + must_unlock = 1; + just_locked = 0; + /* Try deallocate if there are no pending references on the page */ + if (!pg_cache_descr->flags && !pg_cache_descr->refcnt) { + fatal_assert(!pg_cache_descr->waiters); + + descr->pg_cache_descr = NULL; + can_free = 1; + /* success */ + continue; + } + continue; /* spin */ + } + if (unlikely(must_unlock)) { + fatal_assert(0 == old_users); + + if (can_free) { + /* success */ + new_state = 0; + } else { + new_state = old_state | PG_CACHE_DESCR_DESTROY; + new_state &= ~PG_CACHE_DESCR_LOCKED; + } + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* unlocked */ + if (can_free) + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + return; + } + continue; /* spin */ + } + if (!(old_state & PG_CACHE_DESCR_ALLOCATED)) { + /* don't do anything */ + return; + } + if (old_state & PG_CACHE_DESCR_LOCKED) { + fatal_assert(0 == old_users); + continue; /* spin */ + } + /* caller is the only page cache descriptor user */ + if (0 == old_users) { + new_state = old_state | PG_CACHE_DESCR_LOCKED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + just_locked = 1; + pg_cache_descr = descr->pg_cache_descr; + /* retry */ + continue; + } + continue; /* spin */ + } + if (old_state & PG_CACHE_DESCR_DESTROY) { + /* don't do anything */ + return; + } + /* plant PG_CACHE_DESCR_DESTROY so that other contexts eventually free the page cache descriptor */ + new_state = old_state | PG_CACHE_DESCR_DESTROY; + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + return; + } + /* spin */ + } +} \ No newline at end of file diff --git a/database/engine/rrdenglocking.h b/database/engine/rrdenglocking.h new file mode 100644 index 0000000..078eab3 --- /dev/null +++ b/database/engine/rrdenglocking.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDENGLOCKING_H +#define NETDATA_RRDENGLOCKING_H + +#include "rrdengine.h" + +/* Forward declarations */ +struct page_cache_descr; + +struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance *ctx); +void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr); +void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); + +#endif /* NETDATA_RRDENGLOCKING_H */ \ No newline at end of file diff --git a/database/ram/Makefile.am b/database/ram/Makefile.am new file mode 100644 index 0000000..59250a9 --- /dev/null +++ b/database/ram/Makefile.am @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/database/ram/README.md b/database/ram/README.md new file mode 100644 index 0000000..73562f0 --- /dev/null +++ b/database/ram/README.md @@ -0,0 +1,7 @@ + + +# RAM modes \ No newline at end of file diff --git a/database/ram/rrddim_mem.c b/database/ram/rrddim_mem.c new file mode 100644 index 0000000..299b655 --- /dev/null +++ b/database/ram/rrddim_mem.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrddim_mem.h" +#include "Judy.h" + +static Pvoid_t rrddim_JudyHS_array = NULL; +static netdata_rwlock_t rrddim_JudyHS_rwlock = NETDATA_RWLOCK_INITIALIZER; + +// ---------------------------------------------------------------------------- +// metrics groups + +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) { + return NULL; +} + +void rrddim_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg __maybe_unused) { + // if(!smg) return; // smg may be NULL + ; +} + +// ---------------------------------------------------------------------------- +// RRDDIM legacy data collection functions + +STORAGE_METRIC_HANDLE * +rrddim_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance __maybe_unused) { + STORAGE_METRIC_HANDLE *t = rrddim_metric_get(db_instance, &rd->metric_uuid); + if(!t) { + netdata_rwlock_wrlock(&rrddim_JudyHS_rwlock); + Pvoid_t *PValue = JudyHSIns(&rrddim_JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); + fatal_assert(NULL == *PValue); + *PValue = rd; + t = (STORAGE_METRIC_HANDLE *)rd; + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + } + + if((RRDDIM *)t != rd) + fatal("RRDDIM_MEM: incorrect pointer returned from index."); + + return (STORAGE_METRIC_HANDLE *)rd; +} + +STORAGE_METRIC_HANDLE * +rrddim_metric_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid) { + RRDDIM *rd = NULL; + netdata_rwlock_rdlock(&rrddim_JudyHS_rwlock); + Pvoid_t *PValue = JudyHSGet(rrddim_JudyHS_array, uuid, sizeof(uuid_t)); + if (likely(NULL != PValue)) + rd = *PValue; + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + + return (STORAGE_METRIC_HANDLE *)rd; +} + +STORAGE_METRIC_HANDLE *rrddim_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) { + return db_metric_handle; +} + +void rrddim_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle __maybe_unused) { + RRDDIM *rd = (RRDDIM *)db_metric_handle; + + netdata_rwlock_wrlock(&rrddim_JudyHS_rwlock); + JudyHSDel(&rrddim_JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); +} + +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every __maybe_unused) { + rrddim_store_metric_flush(collection_handle); +} + +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every __maybe_unused, STORAGE_METRICS_GROUP *smg __maybe_unused) { + RRDDIM *rd = (RRDDIM *)db_metric_handle; + rd->db[rd->rrdset->current_entry] = pack_storage_number(NAN, SN_FLAG_NONE); + struct mem_collect_handle *ch = callocz(1, sizeof(struct mem_collect_handle)); + ch->rd = rd; + return (STORAGE_COLLECT_HANDLE *)ch; +} + +void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags) +{ + UNUSED(point_in_time); + UNUSED(min_value); + UNUSED(max_value); + UNUSED(count); + UNUSED(anomaly_count); + + struct mem_collect_handle *ch = (struct mem_collect_handle *)collection_handle; + RRDDIM *rd = ch->rd; + rd->db[rd->rrdset->current_entry] = pack_storage_number(number, flags); +} + +void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle) { + struct mem_collect_handle *ch = (struct mem_collect_handle *)collection_handle; + + RRDDIM *rd = ch->rd; + for(int i = 0; i < rd->rrdset->entries ;i++) + rd->db[i] = SN_EMPTY_SLOT; + +} + +int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { + freez(collection_handle); + return 0; +} + +// ---------------------------------------------------------------------------- + +// get the total duration in seconds of the round robin database +#define rrddim_duration(st) (( (time_t)(rd)->rrdset->counter >= (time_t)(rd)->rrdset->entries ? (time_t)(rd)->rrdset->entries : (time_t)(rd)->rrdset->counter ) * (time_t)(rd)->rrdset->update_every) + +// get the last slot updated in the round robin database +#define rrddim_last_slot(rd) ((size_t)(((rd)->rrdset->current_entry == 0) ? (rd)->rrdset->entries - 1 : (rd)->rrdset->current_entry - 1)) + +// return the slot that has the oldest value +#define rrddim_first_slot(rd) ((size_t)((rd)->rrdset->counter >= (size_t)(rd)->rrdset->entries ? (rd)->rrdset->current_entry : 0)) + +// get the slot of the round robin database, for the given timestamp (t) +// it always returns a valid slot, although may not be for the time requested if the time is outside the round robin database +// only valid when not using dbengine +static inline size_t rrddim_time2slot(RRDDIM *rd, time_t t) { + size_t ret = 0; + time_t last_entry_t = rrddim_query_latest_time((STORAGE_METRIC_HANDLE *)rd); + time_t first_entry_t = rrddim_query_oldest_time((STORAGE_METRIC_HANDLE *)rd); + size_t entries = rd->rrdset->entries; + size_t first_slot = rrddim_first_slot(rd); + size_t last_slot = rrddim_last_slot(rd); + size_t update_every = rd->rrdset->update_every; + + if(t >= last_entry_t) { + // the requested time is after the last entry we have + ret = last_slot; + } + else { + if(t <= first_entry_t) { + // the requested time is before the first entry we have + ret = first_slot; + } + else { + if(last_slot >= (size_t)((last_entry_t - t) / update_every)) + ret = last_slot - ((last_entry_t - t) / update_every); + else + ret = last_slot - ((last_entry_t - t) / update_every) + entries; + } + } + + if(unlikely(ret >= entries)) { + error("INTERNAL ERROR: rrddim_time2slot() on %s returns values outside entries", rrddim_name(rd)); + ret = entries - 1; + } + + return ret; +} + +// get the timestamp of a specific slot in the round robin database +// only valid when not using dbengine +static inline time_t rrddim_slot2time(RRDDIM *rd, size_t slot) { + time_t ret; + time_t last_entry_t = rrddim_query_latest_time((STORAGE_METRIC_HANDLE *)rd); + time_t first_entry_t = rrddim_query_oldest_time((STORAGE_METRIC_HANDLE *)rd); + size_t entries = rd->rrdset->entries; + size_t last_slot = rrddim_last_slot(rd); + size_t update_every = rd->rrdset->update_every; + + if(slot >= entries) { + error("INTERNAL ERROR: caller of rrddim_slot2time() gives invalid slot %zu", slot); + slot = entries - 1; + } + + if(slot > last_slot) + ret = last_entry_t - (time_t)(update_every * (last_slot - slot + entries)); + else + ret = last_entry_t - (time_t)(update_every * (last_slot - slot)); + + if(unlikely(ret < first_entry_t)) { + error("INTERNAL ERROR: rrddim_slot2time() on %s returns time too far in the past", rrddim_name(rd)); + ret = first_entry_t; + } + + if(unlikely(ret > last_entry_t)) { + error("INTERNAL ERROR: rrddim_slot2time() on %s returns time into the future", rrddim_name(rd)); + ret = last_entry_t; + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// RRDDIM legacy database query functions + +void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, time_t start_time, time_t end_time) { + RRDDIM *rd = (RRDDIM *)db_metric_handle; + + handle->rd = rd; + handle->start_time_s = start_time; + handle->end_time_s = end_time; + struct mem_query_handle* h = mallocz(sizeof(struct mem_query_handle)); + h->slot = rrddim_time2slot(rd, start_time); + h->last_slot = rrddim_time2slot(rd, end_time); + h->dt = rd->rrdset->update_every; + + h->next_timestamp = start_time; + h->slot_timestamp = rrddim_slot2time(rd, h->slot); + h->last_timestamp = rrddim_slot2time(rd, h->last_slot); + + // info("RRDDIM QUERY INIT: start %ld, end %ld, next %ld, first %ld, last %ld, dt %ld", start_time, end_time, h->next_timestamp, h->slot_timestamp, h->last_timestamp, h->dt); + + handle->handle = (STORAGE_QUERY_HANDLE *)h; +} + +// Returns the metric and sets its timestamp into current_time +// IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags) +// IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *handle) { + RRDDIM *rd = handle->rd; + struct mem_query_handle* h = (struct mem_query_handle*)handle->handle; + size_t entries = rd->rrdset->entries; + size_t slot = h->slot; + + STORAGE_POINT sp; + sp.count = 1; + + time_t this_timestamp = h->next_timestamp; + h->next_timestamp += h->dt; + + // set this timestamp for our caller + sp.start_time = this_timestamp - h->dt; + sp.end_time = this_timestamp; + + if(unlikely(this_timestamp < h->slot_timestamp)) { + storage_point_empty(sp, sp.start_time, sp.end_time); + return sp; + } + + if(unlikely(this_timestamp > h->last_timestamp)) { + storage_point_empty(sp, sp.start_time, sp.end_time); + return sp; + } + + storage_number n = rd->db[slot++]; + if(unlikely(slot >= entries)) slot = 0; + + h->slot = slot; + h->slot_timestamp += h->dt; + + sp.anomaly_count = is_storage_number_anomalous(n) ? 1 : 0; + sp.flags = (n & SN_USER_FLAGS); + sp.min = sp.max = sp.sum = unpack_storage_number(n); + + return sp; +} + +int rrddim_query_is_finished(struct storage_engine_query_handle *handle) { + struct mem_query_handle* h = (struct mem_query_handle*)handle->handle; + return (h->next_timestamp > handle->end_time_s); +} + +void rrddim_query_finalize(struct storage_engine_query_handle *handle) { +#ifdef NETDATA_INTERNAL_CHECKS + if(!rrddim_query_is_finished(handle)) + error("QUERY: query for chart '%s' dimension '%s' has been stopped unfinished", rrdset_id(handle->rd->rrdset), rrddim_name(handle->rd)); +#endif + freez(handle->handle); +} + +time_t rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { + RRDDIM *rd = (RRDDIM *)db_metric_handle; + return rd->rrdset->last_updated.tv_sec; +} + +time_t rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { + RRDDIM *rd = (RRDDIM *)db_metric_handle; + return (time_t)(rd->rrdset->last_updated.tv_sec - rrddim_duration(rd)); +} diff --git a/database/ram/rrddim_mem.h b/database/ram/rrddim_mem.h new file mode 100644 index 0000000..79c59f1 --- /dev/null +++ b/database/ram/rrddim_mem.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDDIMMEM_H +#define NETDATA_RRDDIMMEM_H + +#include "database/rrd.h" + +struct mem_collect_handle { + RRDDIM *rd; + long slot; + long entries; +}; + +struct mem_query_handle { + time_t dt; + time_t next_timestamp; + time_t last_timestamp; + time_t slot_timestamp; + size_t slot; + size_t last_slot; +}; + +STORAGE_METRIC_HANDLE *rrddim_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance); +STORAGE_METRIC_HANDLE *rrddim_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +STORAGE_METRIC_HANDLE *rrddim_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle); +void rrddim_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle); + +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +void rrddim_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); + +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg); +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); +void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags); +void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle); +int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle); + +void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, time_t start_time, time_t end_time); +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *handle); +int rrddim_query_is_finished(struct storage_engine_query_handle *handle); +void rrddim_query_finalize(struct storage_engine_query_handle *handle); +time_t rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); +time_t rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); + +#endif diff --git a/database/rrd.c b/database/rrd.c new file mode 100644 index 0000000..df36441 --- /dev/null +++ b/database/rrd.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#define NETDATA_RRD_INTERNALS 1 + +#include "rrd.h" +#include "storage_engine.h" + +// ---------------------------------------------------------------------------- +// globals + +/* +// if not zero it gives the time (in seconds) to remove un-updated dimensions +// DO NOT ENABLE +// if dimensions are removed, the chart generation will have to run again +int rrd_delete_unupdated_dimensions = 0; +*/ + +int default_rrd_update_every = UPDATE_EVERY; +int default_rrd_history_entries = RRD_DEFAULT_HISTORY_ENTRIES; +#ifdef ENABLE_DBENGINE +RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; +#else +RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; +#endif +int gap_when_lost_iterations_above = 1; + + +// ---------------------------------------------------------------------------- +// RRD - memory modes + +inline const char *rrd_memory_mode_name(RRD_MEMORY_MODE id) { + switch(id) { + case RRD_MEMORY_MODE_RAM: + return RRD_MEMORY_MODE_RAM_NAME; + + case RRD_MEMORY_MODE_MAP: + return RRD_MEMORY_MODE_MAP_NAME; + + case RRD_MEMORY_MODE_NONE: + return RRD_MEMORY_MODE_NONE_NAME; + + case RRD_MEMORY_MODE_SAVE: + return RRD_MEMORY_MODE_SAVE_NAME; + + case RRD_MEMORY_MODE_ALLOC: + return RRD_MEMORY_MODE_ALLOC_NAME; + + case RRD_MEMORY_MODE_DBENGINE: + return RRD_MEMORY_MODE_DBENGINE_NAME; + } + + STORAGE_ENGINE* eng = storage_engine_get(id); + if (eng) { + return eng->name; + } + + return RRD_MEMORY_MODE_SAVE_NAME; +} + +RRD_MEMORY_MODE rrd_memory_mode_id(const char *name) { + STORAGE_ENGINE* eng = storage_engine_find(name); + if (eng) { + return eng->id; + } + + return RRD_MEMORY_MODE_SAVE; +} + + +// ---------------------------------------------------------------------------- +// RRD - algorithms types + +RRD_ALGORITHM rrd_algorithm_id(const char *name) { + if(strcmp(name, RRD_ALGORITHM_INCREMENTAL_NAME) == 0) + return RRD_ALGORITHM_INCREMENTAL; + + else if(strcmp(name, RRD_ALGORITHM_ABSOLUTE_NAME) == 0) + return RRD_ALGORITHM_ABSOLUTE; + + else if(strcmp(name, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME) == 0) + return RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL; + + else if(strcmp(name, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME) == 0) + return RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL; + + else + return RRD_ALGORITHM_ABSOLUTE; +} + +const char *rrd_algorithm_name(RRD_ALGORITHM algorithm) { + switch(algorithm) { + case RRD_ALGORITHM_ABSOLUTE: + default: + return RRD_ALGORITHM_ABSOLUTE_NAME; + + case RRD_ALGORITHM_INCREMENTAL: + return RRD_ALGORITHM_INCREMENTAL_NAME; + + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + return RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME; + + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + return RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME; + } +} + + +// ---------------------------------------------------------------------------- +// RRD - chart types + +inline RRDSET_TYPE rrdset_type_id(const char *name) { + if(unlikely(strcmp(name, RRDSET_TYPE_AREA_NAME) == 0)) + return RRDSET_TYPE_AREA; + + else if(unlikely(strcmp(name, RRDSET_TYPE_STACKED_NAME) == 0)) + return RRDSET_TYPE_STACKED; + + else // if(unlikely(strcmp(name, RRDSET_TYPE_LINE_NAME) == 0)) + return RRDSET_TYPE_LINE; +} + +const char *rrdset_type_name(RRDSET_TYPE chart_type) { + switch(chart_type) { + case RRDSET_TYPE_LINE: + default: + return RRDSET_TYPE_LINE_NAME; + + case RRDSET_TYPE_AREA: + return RRDSET_TYPE_AREA_NAME; + + case RRDSET_TYPE_STACKED: + return RRDSET_TYPE_STACKED_NAME; + } +} + +// ---------------------------------------------------------------------------- +// RRD - cache directory + +char *rrdset_cache_dir(RRDHOST *host, const char *id) { + char *ret = NULL; + + char b[FILENAME_MAX + 1]; + char n[FILENAME_MAX + 1]; + rrdset_strncpyz_name(b, id, FILENAME_MAX); + + snprintfz(n, FILENAME_MAX, "%s/%s", host->cache_dir, b); + ret = strdupz(n); + + if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { + int r = mkdir(ret, 0775); + if(r != 0 && errno != EEXIST) + error("Cannot create directory '%s'", ret); + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// RRD - string management + +STRING *rrd_string_strdupz(const char *s) { + if(unlikely(!s || !*s)) return string_strdupz(s); + + char *tmp = strdupz(s); + json_fix_string(tmp); + STRING *ret = string_strdupz(tmp); + freez(tmp); + return ret; +} diff --git a/database/rrd.h b/database/rrd.h new file mode 100644 index 0000000..0796ff9 --- /dev/null +++ b/database/rrd.h @@ -0,0 +1,1413 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRD_H +#define NETDATA_RRD_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +// non-existing structs instead of voids +// to enable type checking at compile time +typedef struct storage_instance STORAGE_INSTANCE; +typedef struct storage_metric_handle STORAGE_METRIC_HANDLE; +typedef struct storage_alignment STORAGE_METRICS_GROUP; + +// forward typedefs +typedef struct rrdhost RRDHOST; +typedef struct rrddim RRDDIM; +typedef struct rrdset RRDSET; +typedef struct rrdcalc RRDCALC; +typedef struct rrdcalctemplate RRDCALCTEMPLATE; +typedef struct alarm_entry ALARM_ENTRY; + +typedef struct rrdfamily_acquired RRDFAMILY_ACQUIRED; +typedef struct rrdvar_acquired RRDVAR_ACQUIRED; +typedef struct rrdsetvar_acquired RRDSETVAR_ACQUIRED; +typedef struct rrdcalc_acquired RRDCALC_ACQUIRED; + +typedef struct rrdhost_acquired RRDHOST_ACQUIRED; +typedef struct rrdset_acquired RRDSET_ACQUIRED; +typedef struct rrddim_acquired RRDDIM_ACQUIRED; + +typedef void *ml_host_t; +typedef void *ml_dimension_t; + +typedef enum { + QUERY_SOURCE_UNKNOWN, + QUERY_SOURCE_API_DATA, + QUERY_SOURCE_API_BADGE, + QUERY_SOURCE_API_WEIGHTS, + QUERY_SOURCE_HEALTH, + QUERY_SOURCE_ML, + QUERY_SOURCE_UNITTEST, +} QUERY_SOURCE; + +// forward declarations +struct rrddim_tier; + +#ifdef ENABLE_DBENGINE +struct rrdeng_page_descr; +struct rrdengine_instance; +struct pg_cache_page_index; +#endif + +#include "daemon/common.h" +#include "web/api/queries/query.h" +#include "web/api/queries/rrdr.h" +#include "rrdvar.h" +#include "rrdsetvar.h" +#include "rrddimvar.h" +#include "rrdcalc.h" +#include "rrdcalctemplate.h" +#include "streaming/rrdpush.h" +#include "aclk/aclk_rrdhost_state.h" +#include "sqlite/sqlite_health.h" +#include "rrdcontext.h" + +extern bool unittest_running; +extern bool dbengine_enabled; +extern size_t storage_tiers; +extern size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS]; + +typedef enum { + RRD_BACKFILL_NONE, + RRD_BACKFILL_FULL, + RRD_BACKFILL_NEW +} RRD_BACKFILL; + +extern RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS]; + +enum { + CONTEXT_FLAGS_ARCHIVE = 0x01, + CONTEXT_FLAGS_CHART = 0x02, + CONTEXT_FLAGS_CONTEXT = 0x04 +}; + +struct context_param { + RRDDIM *rd; + time_t first_entry_t; + time_t last_entry_t; + uint8_t flags; +}; + +#define UPDATE_EVERY 1 +#define UPDATE_EVERY_MAX 3600 + +#define RRD_DEFAULT_HISTORY_ENTRIES 3600 +#define RRD_HISTORY_ENTRIES_MAX (86400*365) + +extern int default_rrd_update_every; +extern int default_rrd_history_entries; +extern int gap_when_lost_iterations_above; +extern time_t rrdset_free_obsolete_time; + +#define RRD_ID_LENGTH_MAX 200 + +typedef long long total_number; +#define TOTAL_NUMBER_FORMAT "%lld" + +// ---------------------------------------------------------------------------- +// chart types + +typedef enum rrdset_type { + RRDSET_TYPE_LINE = 0, + RRDSET_TYPE_AREA = 1, + RRDSET_TYPE_STACKED = 2 +} RRDSET_TYPE; + +#define RRDSET_TYPE_LINE_NAME "line" +#define RRDSET_TYPE_AREA_NAME "area" +#define RRDSET_TYPE_STACKED_NAME "stacked" + +RRDSET_TYPE rrdset_type_id(const char *name); +const char *rrdset_type_name(RRDSET_TYPE chart_type); + + +// ---------------------------------------------------------------------------- +// memory mode + +typedef enum rrd_memory_mode { + RRD_MEMORY_MODE_NONE = 0, + RRD_MEMORY_MODE_RAM = 1, + RRD_MEMORY_MODE_MAP = 2, + RRD_MEMORY_MODE_SAVE = 3, + RRD_MEMORY_MODE_ALLOC = 4, + RRD_MEMORY_MODE_DBENGINE = 5, + + // this is 8-bit +} RRD_MEMORY_MODE; + +#define RRD_MEMORY_MODE_NONE_NAME "none" +#define RRD_MEMORY_MODE_RAM_NAME "ram" +#define RRD_MEMORY_MODE_MAP_NAME "map" +#define RRD_MEMORY_MODE_SAVE_NAME "save" +#define RRD_MEMORY_MODE_ALLOC_NAME "alloc" +#define RRD_MEMORY_MODE_DBENGINE_NAME "dbengine" + +extern RRD_MEMORY_MODE default_rrd_memory_mode; + +const char *rrd_memory_mode_name(RRD_MEMORY_MODE id); +RRD_MEMORY_MODE rrd_memory_mode_id(const char *name); + + +// ---------------------------------------------------------------------------- +// algorithms types + +typedef enum rrd_algorithm { + RRD_ALGORITHM_ABSOLUTE = 0, + RRD_ALGORITHM_INCREMENTAL = 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL = 2, + RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL = 3, + + // this is 8-bit +} RRD_ALGORITHM; + +#define RRD_ALGORITHM_ABSOLUTE_NAME "absolute" +#define RRD_ALGORITHM_INCREMENTAL_NAME "incremental" +#define RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME "percentage-of-incremental-row" +#define RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME "percentage-of-absolute-row" + +RRD_ALGORITHM rrd_algorithm_id(const char *name); +const char *rrd_algorithm_name(RRD_ALGORITHM algorithm); + +// ---------------------------------------------------------------------------- +// RRD FAMILY + +const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id); +void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa); +void rrdfamily_index_init(RRDHOST *host); +void rrdfamily_index_destroy(RRDHOST *host); +DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rf); + + +// ---------------------------------------------------------------------------- +// flags & options + +// options are permanent configuration options (no atomics to alter/access them) +typedef enum rrddim_options { + RRDDIM_OPTION_NONE = 0, + RRDDIM_OPTION_HIDDEN = (1 << 0), // this dimension will not be offered to callers + RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS = (1 << 1), // do not offer RESET or OVERFLOW info to callers + RRDDIM_OPTION_BACKFILLED_HIGH_TIERS = (1 << 2), // when set, we have backfilled higher tiers + + // this is 8-bit +} RRDDIM_OPTIONS; + +#define rrddim_option_check(rd, option) ((rd)->options & (option)) +#define rrddim_option_set(rd, option) (rd)->options |= (option) +#define rrddim_option_clear(rd, option) (rd)->options &= ~(option) + +// flags are runtime changing status flags (atomics are required to alter/access them) +typedef enum rrddim_flags { + RRDDIM_FLAG_NONE = 0, + RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 0), + + RRDDIM_FLAG_OBSOLETE = (1 << 2), // this is marked by the collector/module as obsolete + // No new values have been collected for this dimension since agent start, or it was marked RRDDIM_FLAG_OBSOLETE at + // least rrdset_free_obsolete_time seconds ago. + RRDDIM_FLAG_ARCHIVED = (1 << 3), + RRDDIM_FLAG_METADATA_UPDATE = (1 << 4), // Metadata needs to go to the database + + RRDDIM_FLAG_META_HIDDEN = (1 << 6), // Status of hidden option in the metadata database + + // this is 8 bit +} RRDDIM_FLAGS; + +#define rrddim_flag_check(rd, flag) (__atomic_load_n(&((rd)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrddim_flag_set(rd, flag) __atomic_or_fetch(&((rd)->flags), (flag), __ATOMIC_SEQ_CST) +#define rrddim_flag_clear(rd, flag) __atomic_and_fetch(&((rd)->flags), ~(flag), __ATOMIC_SEQ_CST) + +typedef enum rrdlabel_source { + RRDLABEL_SRC_AUTO = (1 << 0), // set when Netdata found the label by some automation + RRDLABEL_SRC_CONFIG = (1 << 1), // set when the user configured the label + RRDLABEL_SRC_K8S = (1 << 2), // set when this label is found from k8s (RRDLABEL_SRC_AUTO should also be set) + RRDLABEL_SRC_ACLK = (1 << 3), // set when this label is found from ACLK (RRDLABEL_SRC_AUTO should also be set) + + // more sources can be added here + + RRDLABEL_FLAG_PERMANENT = (1 << 29), // set when this label should never be removed (can be overwritten though) + RRDLABEL_FLAG_OLD = (1 << 30), // marks for rrdlabels internal use - they are not exposed outside rrdlabels + RRDLABEL_FLAG_NEW = (1 << 31) // marks for rrdlabels internal use - they are not exposed outside rrdlabels +} RRDLABEL_SRC; + +#define RRDLABEL_FLAG_INTERNAL (RRDLABEL_FLAG_OLD | RRDLABEL_FLAG_NEW | RRDLABEL_FLAG_PERMANENT) + +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length); + +DICTIONARY *rrdlabels_create(void); +void rrdlabels_destroy(DICTIONARY *labels_dict); +void rrdlabels_add(DICTIONARY *dict, const char *name, const char *value, RRDLABEL_SRC ls); +void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls); +void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null); +void rrdlabels_get_value_to_char_or_null(DICTIONARY *labels, char **value, const char *key); +void rrdlabels_flush(DICTIONARY *labels_dict); + +void rrdlabels_unmark_all(DICTIONARY *labels); +void rrdlabels_remove_all_unmarked(DICTIONARY *labels); + +int rrdlabels_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data); +int rrdlabels_sorted_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data); + +void rrdlabels_log_to_buffer(DICTIONARY *labels, BUFFER *wb); +bool rrdlabels_match_simple_pattern(DICTIONARY *labels, const char *simple_pattern_txt); +bool rrdlabels_match_simple_pattern_parsed(DICTIONARY *labels, SIMPLE_PATTERN *pattern, char equal); +int rrdlabels_to_buffer(DICTIONARY *labels, BUFFER *wb, const char *before_each, const char *equal, const char *quote, const char *between_them, bool (*filter_callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *filter_data, void (*name_sanitizer)(char *dst, const char *src, size_t dst_size), void (*value_sanitizer)(char *dst, const char *src, size_t dst_size)); + +void rrdlabels_migrate_to_these(DICTIONARY *dst, DICTIONARY *src); +void rrdlabels_copy(DICTIONARY *dst, DICTIONARY *src); + +void reload_host_labels(void); +void rrdset_update_rrdlabels(RRDSET *st, DICTIONARY *new_rrdlabels); +void rrdset_save_rrdlabels_to_sql(RRDSET *st); +void rrdhost_set_is_parent_label(int count); +int rrdlabels_unittest(void); + +// unfortunately this break when defined in exporting_engine.h +bool exporting_labels_filter_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data); + +// ---------------------------------------------------------------------------- +// RRD DIMENSION - this is a metric + +struct rrddim { + uuid_t metric_uuid; // global UUID for this metric (unique_across hosts) + + // ------------------------------------------------------------------------ + // dimension definition + + STRING *id; // the id of this dimension (for internal identification) + STRING *name; // the name of this dimension (as presented to user) + + RRD_ALGORITHM algorithm:8; // the algorithm that is applied to add new collected values + RRDDIM_OPTIONS options:8; // permanent configuration options + RRD_MEMORY_MODE rrd_memory_mode:8; // the memory mode for this dimension + /*RRDDIM_FLAGS*/ uint8_t flags; // run time changing status flags + + bool updated; // 1 when the dimension has been updated since the last processing + bool exposed; // 1 when set what have sent this dimension to the central netdata + + collected_number multiplier; // the multiplier of the collected values + collected_number divisor; // the divider of the collected values + + int update_every; // every how many seconds is this updated + // TODO - remove update_every from rrddim + // it is always the same in rrdset + + // ------------------------------------------------------------------------ + // operational state members + + ml_dimension_t ml_dimension; // machine learning data about this dimension + + // ------------------------------------------------------------------------ + // linking to siblings and parents + + struct rrdset *rrdset; + + RRDMETRIC_ACQUIRED *rrdmetric; // the rrdmetric of this dimension + + // ------------------------------------------------------------------------ + // data collection members + + struct rrddim_tier *tiers[RRD_STORAGE_TIERS]; // our tiers of databases + + struct timeval last_collected_time; // when was this dimension last updated + // this is actual date time we updated the last_collected_value + // THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET + + size_t collections_counter; // the number of times we added values to this rrddim + collected_number collected_value_max; // the absolute maximum of the collected value + + NETDATA_DOUBLE calculated_value; // the current calculated value, after applying the algorithm - resets to zero after being used + NETDATA_DOUBLE last_calculated_value; // the last calculated value processed + NETDATA_DOUBLE last_stored_value; // the last value as stored in the database (after interpolation) + + collected_number collected_value; // the current value, as collected - resets to 0 after being used + collected_number last_collected_value; // the last value that was collected, after being processed + +#ifdef NETDATA_LOG_COLLECTION_ERRORS + usec_t rrddim_store_metric_last_ut; // the timestamp we last called rrddim_store_metric() + size_t rrddim_store_metric_count; // the rrddim_store_metric() counter + const char *rrddim_store_metric_last_caller; // the name of the function that last called rrddim_store_metric() +#endif + + // ------------------------------------------------------------------------ + // db mode RAM, SAVE, MAP, ALLOC, NONE specifics + // TODO - they should be managed by storage engine + // (RRDDIM_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + size_t memsize; // the memory allocated for this dimension (without RRDDIM) + void *rd_on_file; // pointer to the header written on disk + storage_number *db; // the array of values +}; + +#define rrddim_id(rd) string2str((rd)->id) +#define rrddim_name(rd) string2str((rd) ->name) + +// returns the RRDDIM cache filename, or NULL if it does not exist +const char *rrddim_cache_filename(RRDDIM *rd); + +// updated the header with the latest RRDDIM value, for memory mode MAP and SAVE +void rrddim_memory_file_update(RRDDIM *rd); + +// free the memory file structures for memory mode MAP and SAVE +void rrddim_memory_file_free(RRDDIM *rd); + +bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MODE memory_mode); + +// return the v019 header size of RRDDIM files +size_t rrddim_memory_file_header_size(void); + +void rrddim_memory_file_save(RRDDIM *rd); + +// ---------------------------------------------------------------------------- + +typedef struct storage_point { + NETDATA_DOUBLE min; // when count > 1, this is the minimum among them + NETDATA_DOUBLE max; // when count > 1, this is the maximum among them + NETDATA_DOUBLE sum; // the point sum - divided by count gives the average + + // end_time - start_time = point duration + time_t start_time; // the time the point starts + time_t end_time; // the time the point ends + + unsigned count; // the number of original points aggregated + unsigned anomaly_count; // the number of original points found anomalous + + SN_FLAGS flags; // flags stored with the point +} STORAGE_POINT; + +#define storage_point_unset(x) do { \ + (x).min = (x).max = (x).sum = NAN; \ + (x).count = 0; \ + (x).anomaly_count = 0; \ + (x).flags = SN_FLAG_NONE; \ + (x).start_time = 0; \ + (x).end_time = 0; \ + } while(0) + +#define storage_point_empty(x, start_t, end_t) do { \ + (x).min = (x).max = (x).sum = NAN; \ + (x).count = 1; \ + (x).anomaly_count = 0; \ + (x).flags = SN_FLAG_NONE; \ + (x).start_time = start_t; \ + (x).end_time = end_t; \ + } while(0) + +#define storage_point_is_unset(x) (!(x).count) +#define storage_point_is_empty(x) (!netdata_double_isnumber((x).sum)) + +// ---------------------------------------------------------------------------- +// engine-specific iterator state for dimension data collection +typedef struct storage_collect_handle STORAGE_COLLECT_HANDLE; + +// ---------------------------------------------------------------------------- +// engine-specific iterator state for dimension data queries +typedef struct storage_query_handle STORAGE_QUERY_HANDLE; + +// ------------------------------------------------------------------------ +// function pointers that handle data collection +struct storage_engine_collect_ops { + // an initialization function to run before starting collection + STORAGE_COLLECT_HANDLE *(*init)(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg); + + // run this to store each metric into the database + void (*store_metric)(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags); + + // run this to flush / reset the current data collection sequence + void (*flush)(STORAGE_COLLECT_HANDLE *collection_handle); + + // a finalization function to run after collection is over + // returns 1 if it's safe to delete the dimension + int (*finalize)(STORAGE_COLLECT_HANDLE *collection_handle); + + void (*change_collection_frequency)(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); + + STORAGE_METRICS_GROUP *(*metrics_group_get)(STORAGE_INSTANCE *db_instance, uuid_t *uuid); + void (*metrics_group_release)(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *sa); +}; + +// ---------------------------------------------------------------------------- +// iterator state for RRD dimension data queries +struct storage_engine_query_handle { + RRDDIM *rd; + time_t start_time_s; + time_t end_time_s; + STORAGE_QUERY_HANDLE* handle; +}; + +// function pointers that handle database queries +struct storage_engine_query_ops { + // run this before starting a series of next_metric() database queries + void (*init)(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, time_t start_time, time_t end_time); + + // run this to load each metric number from the database + STORAGE_POINT (*next_metric)(struct storage_engine_query_handle *handle); + + // run this to test if the series of next_metric() database queries is finished + int (*is_finished)(struct storage_engine_query_handle *handle); + + // run this after finishing a series of load_metric() database queries + void (*finalize)(struct storage_engine_query_handle *handle); + + // get the timestamp of the last entry of this metric + time_t (*latest_time)(STORAGE_METRIC_HANDLE *db_metric_handle); + + // get the timestamp of the first entry of this metric + time_t (*oldest_time)(STORAGE_METRIC_HANDLE *db_metric_handle); +}; + +typedef struct storage_engine STORAGE_ENGINE; + +// ------------------------------------------------------------------------ +// function pointers for all APIs provided by a storage engine +typedef struct storage_engine_api { + // metric management + STORAGE_METRIC_HANDLE *(*metric_get)(STORAGE_INSTANCE *instance, uuid_t *uuid); + STORAGE_METRIC_HANDLE *(*metric_get_or_create)(RRDDIM *rd, STORAGE_INSTANCE *instance); + void (*metric_release)(STORAGE_METRIC_HANDLE *); + STORAGE_METRIC_HANDLE *(*metric_dup)(STORAGE_METRIC_HANDLE *); + + // operations + struct storage_engine_collect_ops collect_ops; + struct storage_engine_query_ops query_ops; +} STORAGE_ENGINE_API; + +struct storage_engine { + RRD_MEMORY_MODE id; + const char* name; + STORAGE_ENGINE_API api; +}; + +STORAGE_ENGINE* storage_engine_get(RRD_MEMORY_MODE mmode); +STORAGE_ENGINE* storage_engine_find(const char* name); + +// ---------------------------------------------------------------------------- +// Storage tier data for every dimension + +struct rrddim_tier { + size_t tier_grouping; + STORAGE_METRIC_HANDLE *db_metric_handle; // the metric handle inside the database + STORAGE_COLLECT_HANDLE *db_collection_handle; // the data collection handle + STORAGE_POINT virtual_point; + time_t next_point_time; + struct storage_engine_collect_ops *collect_ops; + struct storage_engine_query_ops *query_ops; +}; + +void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now); + +// ---------------------------------------------------------------------------- +// these loop macros make sure the linked list is accessed with the right lock + +#define rrddim_foreach_read(rd, st) \ + dfe_start_read((st)->rrddim_root_index, rd) + +#define rrddim_foreach_write(rd, st) \ + dfe_start_write((st)->rrddim_root_index, rd) + +#define rrddim_foreach_reentrant(rd, st) \ + dfe_start_reentrant((st)->rrddim_root_index, rd) + +#define rrddim_foreach_done(rd) \ + dfe_done(rd) + +// ---------------------------------------------------------------------------- +// RRDSET - this is a chart + +// use this for configuration flags, not for state control +// flags are set/unset in a manner that is not thread safe +// and may lead to missing information. + +typedef enum rrdset_flags { + RRDSET_FLAG_DETAIL = (1 << 1), // if set, the data set should be considered as a detail of another + // (the master data set should be the one that has the same family and is not detail) + RRDSET_FLAG_DEBUG = (1 << 2), // enables or disables debugging for a chart + RRDSET_FLAG_OBSOLETE = (1 << 3), // this is marked by the collector/module as obsolete + RRDSET_FLAG_EXPORTING_SEND = (1 << 4), // if set, this chart should be sent to Prometheus web API and external databases + RRDSET_FLAG_EXPORTING_IGNORE = (1 << 5), // if set, this chart should not be sent to Prometheus web API and external databases + + RRDSET_FLAG_UPSTREAM_SEND = (1 << 6), // if set, this chart should be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_IGNORE = (1 << 7), // if set, this chart should not be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_EXPOSED = (1 << 8), // if set, we have sent this chart definition to netdata parent (streaming) + + RRDSET_FLAG_STORE_FIRST = (1 << 9), // if set, do not eliminate the first collection during interpolation + RRDSET_FLAG_HETEROGENEOUS = (1 << 10), // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) + RRDSET_FLAG_HOMOGENEOUS_CHECK = (1 << 11), // if set, the chart should be checked to determine if the dimensions are homogeneous + RRDSET_FLAG_HIDDEN = (1 << 12), // if set, do not show this chart on the dashboard, but use it for exporting + RRDSET_FLAG_SYNC_CLOCK = (1 << 13), // if set, microseconds on next data collection will be ignored (the chart will be synced to now) + RRDSET_FLAG_OBSOLETE_DIMENSIONS = (1 << 14), // this is marked by the collector/module when a chart has obsolete dimensions + // No new values have been collected for this chart since agent start, or it was marked RRDSET_FLAG_OBSOLETE at + // least rrdset_free_obsolete_time seconds ago. + RRDSET_FLAG_ARCHIVED = (1 << 15), + RRDSET_FLAG_METADATA_UPDATE = (1 << 16), // Mark that metadata needs to be stored + RRDSET_FLAG_ANOMALY_DETECTION = (1 << 18), // flag to identify anomaly detection charts. + RRDSET_FLAG_INDEXED_ID = (1 << 19), // the rrdset is indexed by its id + RRDSET_FLAG_INDEXED_NAME = (1 << 20), // the rrdset is indexed by its name + + RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 21), + + RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS = (1 << 22), // the sending side has replication in progress + RRDSET_FLAG_SENDER_REPLICATION_FINISHED = (1 << 23), // the sending side has completed replication + RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS = (1 << 24), // the receiving side has replication in progress + RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED = (1 << 25), // the receiving side has completed replication + + RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 26), // a custom variable has been updated and needs to be exposed to parent +} RRDSET_FLAGS; + +#define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrdset_flag_set(st, flag) __atomic_or_fetch(&((st)->flags), flag, __ATOMIC_SEQ_CST) +#define rrdset_flag_clear(st, flag) __atomic_and_fetch(&((st)->flags), ~(flag), __ATOMIC_SEQ_CST) + +#define rrdset_is_replicating(st) (rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS|RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS) \ + && !rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_FINISHED|RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED)) + +struct rrdset { + uuid_t chart_uuid; // the global UUID for this chart + + // ------------------------------------------------------------------------ + // chart configuration + + struct { + STRING *type; // the type of {type}.{id} + STRING *id; // the id of {type}.{id} + STRING *name; // the name of {type}.{name} + } parts; + + STRING *id; // the unique ID of the rrdset as {type}.{id} + STRING *name; // the unique name of the rrdset as {type}.{name} + STRING *family; // grouping sets under the same family + STRING *title; // title shown to user + STRING *units; // units of measurement + STRING *context; // the template of this data set + STRING *plugin_name; // the name of the plugin that generated this + STRING *module_name; // the name of the plugin module that generated this + + RRDSET_TYPE chart_type; // line, area, stacked + + long priority; // the sorting priority of this chart + + int update_every; // data collection frequency + + DICTIONARY *rrdlabels; // chart labels + DICTIONARY *rrdsetvar_root_index; // chart variables + DICTIONARY *rrddimvar_root_index; // dimension variables + // we use this dictionary to manage their allocation + + // ------------------------------------------------------------------------ + // operational state members + + RRDSET_FLAGS flags; // flags + RRD_MEMORY_MODE rrd_memory_mode; // the db mode of this rrdset + + DICTIONARY *rrddim_root_index; // dimensions index + + int gap_when_lost_iterations_above; // after how many lost iterations a gap should be stored + // netdata will interpolate values for gaps lower than this + // TODO - use the global - all charts have the same value + + STORAGE_METRICS_GROUP *storage_metrics_groups[RRD_STORAGE_TIERS]; + + // ------------------------------------------------------------------------ + // linking to siblings and parents + + RRDHOST *rrdhost; // pointer to RRDHOST this chart belongs to + + RRDINSTANCE_ACQUIRED *rrdinstance; // the rrdinstance of this chart + RRDCONTEXT_ACQUIRED *rrdcontext; // the rrdcontext this chart belongs to + + // ------------------------------------------------------------------------ + // data collection members + + size_t counter; // the number of times we added values to this database + size_t counter_done; // the number of times rrdset_done() has been called + + time_t last_accessed_time; // the last time this RRDSET has been accessed + + usec_t usec_since_last_update; // the time in microseconds since the last collection of data + + struct timeval last_updated; // when this data set was last updated (updated every time the rrd_stats_done() function) + struct timeval last_collected_time; // when did this data set last collected values + + size_t rrdlabels_last_saved_version; + + DICTIONARY *functions_view; // collector functions this rrdset supports, can be NULL + + // ------------------------------------------------------------------------ + // data collection - streaming to parents, temp variables + + time_t upstream_resync_time; // the timestamp up to which we should resync clock upstream + + // ------------------------------------------------------------------------ + // db mode SAVE, MAP specifics + // TODO - they should be managed by storage engine + // (RRDSET_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + char *cache_dir; // the directory to store dimensions + unsigned long memsize; // how much mem we have allocated for this (without dimensions) + void *st_on_file; // compatibility with V019 RRDSET files + + // ------------------------------------------------------------------------ + // db mode RAM, SAVE, MAP, ALLOC, NONE specifics + // TODO - they should be managed by storage engine + // (RRDSET_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + long entries; // total number of entries in the data set + + long current_entry; // the entry that is currently being updated + // it goes around in a round-robin fashion + + // ------------------------------------------------------------------------ + // exporting to 3rd party time-series members + // TODO - they should be managed by exporting engine + // (RRDSET_EXPORTING_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + RRDSET_FLAGS *exporting_flags; // array of flags for exporting connector instances + + // ------------------------------------------------------------------------ + // health monitoring members + // TODO - they should be managed by health + // (RRDSET_HEALTH_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + NETDATA_DOUBLE green; // green threshold for this chart + NETDATA_DOUBLE red; // red threshold for this chart + + DICTIONARY *rrdvars; // RRDVAR index for this chart + const RRDFAMILY_ACQUIRED *rrdfamily; // pointer to RRDFAMILY dictionary item, this chart belongs to + + struct { + netdata_rwlock_t rwlock; // protection for RRDCALC *base + RRDCALC *base; // double linked list of RRDCALC related to this RRDSET + } alerts; + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + struct { + bool log_next_data_collection; + bool start_streaming; + time_t after; + time_t before; + } replay; +#endif // NETDATA_LOG_REPLICATION_REQUESTS +}; + +#define rrdset_plugin_name(st) string2str((st)->plugin_name) +#define rrdset_module_name(st) string2str((st)->module_name) +#define rrdset_units(st) string2str((st)->units) +#define rrdset_parts_type(st) string2str((st)->parts.type) +#define rrdset_family(st) string2str((st)->family) +#define rrdset_title(st) string2str((st)->title) +#define rrdset_context(st) string2str((st)->context) +#define rrdset_name(st) string2str((st)->name) +#define rrdset_id(st) string2str((st)->id) + +STRING *rrd_string_strdupz(const char *s); + +// ---------------------------------------------------------------------------- +// these loop macros make sure the linked list is accessed with the right lock + +#define rrdset_foreach_read(st, host) \ + dfe_start_read((host)->rrdset_root_index, st) + +#define rrdset_foreach_write(st, host) \ + dfe_start_write((host)->rrdset_root_index, st) + +#define rrdset_foreach_reentrant(st, host) \ + dfe_start_reentrant((host)->rrdset_root_index, st) + +#define rrdset_foreach_done(st) \ + dfe_done(st) + +#define rrdset_number_of_dimensions(st) \ + dictionary_entries((st)->rrddim_root_index) + +void rrdset_memory_file_save(RRDSET *st); +void rrdset_memory_file_free(RRDSET *st); +void rrdset_memory_file_update(RRDSET *st); +const char *rrdset_cache_filename(RRDSET *st); +bool rrdset_memory_load_or_create_map_save(RRDSET *st_on_file, RRD_MEMORY_MODE memory_mode); + +#include "rrdfunctions.h" + +// ---------------------------------------------------------------------------- +// RRDHOST flags +// use this for configuration flags, not for state control +// flags are set/unset in a manner that is not thread safe +// and may lead to missing information. + +typedef enum rrdhost_flags { + // Orphan, Archived and Obsolete flags + RRDHOST_FLAG_ORPHAN = (1 << 10), // this host is orphan (not receiving data) + RRDHOST_FLAG_ARCHIVED = (1 << 11), // The host is archived, no collected charts yet + RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS = (1 << 12), // the host has pending chart obsoletions + RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS = (1 << 13), // the host has pending dimension obsoletions + + // Streaming sender + RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED = (1 << 14), // the host has initialized rrdpush structures + RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN = (1 << 15), // When set, the sender thread is running + RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED = (1 << 16), // When set, the host is connected to a parent + RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS = (1 << 17), // when set, rrdset_done() should push metrics to parent + RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS = (1 << 18), // when set, we have logged the status of metrics streaming + RRDHOST_FLAG_RRDPUSH_SENDER_JOIN = (1 << 19), // When set, we want to join the sender thread + + // Health + RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 20), // contains charts and dims with uninitialized variables + RRDHOST_FLAG_INITIALIZED_HEALTH = (1 << 21), // the host has initialized health structures + + // Exporting + RRDHOST_FLAG_EXPORTING_SEND = (1 << 22), // send it to external databases + RRDHOST_FLAG_EXPORTING_DONT_SEND = (1 << 23), // don't send it to external databases + + // ACLK + RRDHOST_FLAG_ACLK_STREAM_CONTEXTS = (1 << 24), // when set, we should send ACLK stream context updates + // Metadata + RRDHOST_FLAG_METADATA_UPDATE = (1 << 25), // metadata needs to be stored in the database + + RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED = ( 1 << 26), // set when the receiver part is disconnected +} RRDHOST_FLAGS; + +#define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrdhost_flag_set(host, flag) __atomic_or_fetch(&((host)->flags), flag, __ATOMIC_SEQ_CST) +#define rrdhost_flag_clear(host, flag) __atomic_and_fetch(&((host)->flags), ~(flag), __ATOMIC_SEQ_CST) + +#ifdef NETDATA_INTERNAL_CHECKS +#define rrdset_debug(st, fmt, args...) do { if(unlikely(debug_flags & D_RRD_STATS && rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) \ + debug_int(__FILE__, __FUNCTION__, __LINE__, "%s: " fmt, rrdset_name(st), ##args); } while(0) +#else +#define rrdset_debug(st, fmt, args...) debug_dummy() +#endif + +typedef enum { + // Indexing + RRDHOST_OPTION_INDEXED_MACHINE_GUID = (1 << 0), // when set, we have indexed its machine guid + RRDHOST_OPTION_INDEXED_HOSTNAME = (1 << 1), // when set, we have indexed its hostname + + // Streaming configuration + RRDHOST_OPTION_SENDER_ENABLED = (1 << 2), // set when the host is configured to send metrics to a parent + + // Configuration options + RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS = (1 << 3), // delete files of obsolete charts + RRDHOST_OPTION_DELETE_ORPHAN_HOST = (1 << 4), // delete the entire host when orphan + + RRDHOST_OPTION_REPLICATION = (1 << 5), // when set, we support replication for this host +} RRDHOST_OPTIONS; + +#define rrdhost_option_check(host, flag) ((host)->options & (flag)) +#define rrdhost_option_set(host, flag) (host)->options |= flag +#define rrdhost_option_clear(host, flag) (host)->options &= ~(flag) + +#define rrdhost_has_rrdpush_sender_enabled(host) (rrdhost_option_check(host, RRDHOST_OPTION_SENDER_ENABLED) && (host)->sender) + +#define rrdhost_can_send_definitions_to_parent(host) (rrdhost_has_rrdpush_sender_enabled(host) && rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED)) + +// ---------------------------------------------------------------------------- +// Health data + +struct alarm_entry { + uint32_t unique_id; + uint32_t alarm_id; + uint32_t alarm_event_id; + uuid_t config_hash_id; + + time_t when; + time_t duration; + time_t non_clear_duration; + + STRING *name; + STRING *chart; + STRING *chart_context; + STRING *family; + + STRING *classification; + STRING *component; + STRING *type; + + STRING *exec; + STRING *recipient; + time_t exec_run_timestamp; + int exec_code; + uint64_t exec_spawn_serial; + + STRING *source; + STRING *units; + STRING *info; + + NETDATA_DOUBLE old_value; + NETDATA_DOUBLE new_value; + + STRING *old_value_string; + STRING *new_value_string; + + RRDCALC_STATUS old_status; + RRDCALC_STATUS new_status; + + uint32_t flags; + + int delay; + time_t delay_up_to_timestamp; + + uint32_t updated_by_id; + uint32_t updates_id; + + time_t last_repeat; + + struct alarm_entry *next; + struct alarm_entry *next_in_progress; + struct alarm_entry *prev_in_progress; +}; + +#define ae_name(ae) string2str((ae)->name) +#define ae_chart_name(ae) string2str((ae)->chart) +#define ae_chart_context(ae) string2str((ae)->chart_context) +#define ae_family(ae) string2str((ae)->family) +#define ae_classification(ae) string2str((ae)->classification) +#define ae_component(ae) string2str((ae)->component) +#define ae_type(ae) string2str((ae)->type) +#define ae_exec(ae) string2str((ae)->exec) +#define ae_recipient(ae) string2str((ae)->recipient) +#define ae_source(ae) string2str((ae)->source) +#define ae_units(ae) string2str((ae)->units) +#define ae_info(ae) string2str((ae)->info) +#define ae_old_value_string(ae) string2str((ae)->old_value_string) +#define ae_new_value_string(ae) string2str((ae)->new_value_string) + +typedef struct alarm_log { + uint32_t next_log_id; + uint32_t next_alarm_id; + unsigned int count; + unsigned int max; + ALARM_ENTRY *alarms; + netdata_rwlock_t alarm_log_rwlock; +} ALARM_LOG; + + +// ---------------------------------------------------------------------------- +// RRD HOST + +struct rrdhost_system_info { + char *cloud_provider_type; + char *cloud_instance_type; + char *cloud_instance_region; + + char *host_os_name; + char *host_os_id; + char *host_os_id_like; + char *host_os_version; + char *host_os_version_id; + char *host_os_detection; + char *host_cores; + char *host_cpu_freq; + char *host_ram_total; + char *host_disk_space; + char *container_os_name; + char *container_os_id; + char *container_os_id_like; + char *container_os_version; + char *container_os_version_id; + char *container_os_detection; + char *kernel_name; + char *kernel_version; + char *architecture; + char *virtualization; + char *virt_detection; + char *container; + char *container_detection; + char *is_k8s_node; + uint16_t hops; + bool ml_capable; + bool ml_enabled; + char *install_type; + char *prebuilt_arch; + char *prebuilt_dist; + int mc_version; +}; + +struct rrdhost { + char machine_guid[GUID_LEN + 1]; // the unique ID of this host + + // ------------------------------------------------------------------------ + // host information + + STRING *hostname; // the hostname of this host + STRING *registry_hostname; // the registry hostname for this host + STRING *os; // the O/S type of the host + STRING *tags; // tags for this host + STRING *timezone; // the timezone of the host + STRING *abbrev_timezone; // the abbriviated timezone of the host + STRING *program_name; // the program name that collects metrics for this host + STRING *program_version; // the program version that collects metrics for this host + + int32_t utc_offset; // the offset in seconds from utc + + RRDHOST_OPTIONS options; // configuration option for this RRDHOST (no atomics on this) + RRDHOST_FLAGS flags; // runtime flags about this RRDHOST (atomics on this) + RRDHOST_FLAGS *exporting_flags; // array of flags for exporting connector instances + + int rrd_update_every; // the update frequency of the host + long rrd_history_entries; // the number of history entries for the host's charts + + RRD_MEMORY_MODE rrd_memory_mode; // the configured memory more for the charts of this host + // the actual per tier is at .db[tier].mode + + char *cache_dir; // the directory to save RRD cache files + char *varlib_dir; // the directory to save health log + + struct { + RRD_MEMORY_MODE mode; // the db mode for this tier + STORAGE_ENGINE *eng; // the storage engine API for this tier + STORAGE_INSTANCE *instance; // the db instance for this tier + size_t tier_grouping; // tier 0 iterations aggregated on this tier + } db[RRD_STORAGE_TIERS]; + + struct rrdhost_system_info *system_info; // information collected from the host environment + + // ------------------------------------------------------------------------ + // streaming of data to remote hosts - rrdpush sender + + char *rrdpush_send_destination; // where to send metrics to + char *rrdpush_send_api_key; // the api key at the receiving netdata + struct rrdpush_destinations *destinations; // a linked list of possible destinations + struct rrdpush_destinations *destination; // the current destination from the above list + SIMPLE_PATTERN *rrdpush_send_charts_matching; // pattern to match the charts to be sent + + time_t rrdpush_seconds_to_replicate; // max time we want to replicate from the child + time_t rrdpush_replication_step; // seconds per replication step + size_t rrdpush_receiver_replicating_charts; // the number of charts currently being replicated from a child + + // the following are state information for the threading + // streaming metrics from this netdata to an upstream netdata + struct sender_state *sender; + netdata_thread_t rrdpush_sender_thread; // the sender thread + size_t rrdpush_sender_replicating_charts; // the number of charts currently being replicated to a parent + void *dbsync_worker; + + // ------------------------------------------------------------------------ + // streaming of data from remote hosts - rrdpush receiver + + time_t senders_connect_time; // the time the last sender was connected + time_t senders_last_chart_command; // the time of the last CHART streaming command + time_t senders_disconnected_time; // the time the last sender was disconnected + int senders_count; // number of senders currently streaming + + struct receiver_state *receiver; + netdata_mutex_t receiver_lock; + int trigger_chart_obsoletion_check; // set when child connects, will instruct parent to + // trigger a check for obsoleted charts since previous connect + + // ------------------------------------------------------------------------ + // health monitoring options + + unsigned int health_enabled; // 1 when this host has health enabled + bool health_spawn; // true when health thread is running + netdata_thread_t health_thread; // the health thread + unsigned int aclk_alert_reloaded; // 1 on thread start and health reload, 0 after removed are sent + time_t health_delay_up_to; // a timestamp to delay alarms processing up to + STRING *health_default_exec; // the full path of the alarms notifications program + STRING *health_default_recipient; // the default recipient for all alarms + char *health_log_filename; // the alarms event log filename + size_t health_log_entries_written; // the number of alarm events written to the alarms event log + FILE *health_log_fp; // the FILE pointer to the open alarms event log file + uint32_t health_default_warn_repeat_every; // the default value for the interval between repeating warning notifications + uint32_t health_default_crit_repeat_every; // the default value for the interval between repeating critical notifications + + // all RRDCALCs are primarily allocated and linked here + DICTIONARY *rrdcalc_root_index; + + // templates of alarms + DICTIONARY *rrdcalctemplate_root_index; + + ALARM_LOG health_log; // alarms historical events (event log) + uint32_t health_last_processed_id; // the last processed health id from the log + uint32_t health_max_unique_id; // the max alarm log unique id given for the host + uint32_t health_max_alarm_id; // the max alarm id given for the host + + // ------------------------------------------------------------------------ + // locks + + netdata_rwlock_t rrdhost_rwlock; // lock for this RRDHOST (protects rrdset_root linked list) + + // ------------------------------------------------------------------------ + // ML handle + ml_host_t ml_host; + + // ------------------------------------------------------------------------ + // Support for host-level labels + DICTIONARY *rrdlabels; + + // ------------------------------------------------------------------------ + // Support for functions + DICTIONARY *functions; // collector functions this rrdset supports, can be NULL + + // ------------------------------------------------------------------------ + // indexes + + DICTIONARY *rrdset_root_index; // the host's charts index (by id) + DICTIONARY *rrdset_root_index_name; // the host's charts index (by name) + + DICTIONARY *rrdfamily_root_index; // the host's chart families index + DICTIONARY *rrdvars; // the host's chart variables index + // this includes custom host variables + + RRDCONTEXTS *rrdctx_hub_queue; + RRDCONTEXTS *rrdctx_post_processing_queue; + RRDCONTEXTS *rrdctx; + + uuid_t host_uuid; // Global GUID for this host + uuid_t *node_id; // Cloud node_id + + netdata_mutex_t aclk_state_lock; + aclk_rrdhost_state aclk_state; + + struct rrdhost *next; + struct rrdhost *prev; +}; +extern RRDHOST *localhost; + +#define rrdhost_hostname(host) string2str((host)->hostname) +#define rrdhost_registry_hostname(host) string2str((host)->registry_hostname) +#define rrdhost_os(host) string2str((host)->os) +#define rrdhost_tags(host) string2str((host)->tags) +#define rrdhost_timezone(host) string2str((host)->timezone) +#define rrdhost_abbrev_timezone(host) string2str((host)->abbrev_timezone) +#define rrdhost_program_name(host) string2str((host)->program_name) +#define rrdhost_program_version(host) string2str((host)->program_version) + +#define rrdhost_rdlock(host) netdata_rwlock_rdlock(&((host)->rrdhost_rwlock)) +#define rrdhost_wrlock(host) netdata_rwlock_wrlock(&((host)->rrdhost_rwlock)) +#define rrdhost_unlock(host) netdata_rwlock_unlock(&((host)->rrdhost_rwlock)) + +#define rrdhost_aclk_state_lock(host) netdata_mutex_lock(&((host)->aclk_state_lock)) +#define rrdhost_aclk_state_unlock(host) netdata_mutex_unlock(&((host)->aclk_state_lock)) + +#define rrdhost_receiver_replicating_charts(host) (__atomic_load_n(&((host)->rrdpush_receiver_replicating_charts), __ATOMIC_RELAXED)) +#define rrdhost_receiver_replicating_charts_plus_one(host) (__atomic_add_fetch(&((host)->rrdpush_receiver_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_receiver_replicating_charts_minus_one(host) (__atomic_sub_fetch(&((host)->rrdpush_receiver_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_receiver_replicating_charts_zero(host) (__atomic_store_n(&((host)->rrdpush_receiver_replicating_charts), 0, __ATOMIC_RELAXED)) + +#define rrdhost_sender_replicating_charts(host) (__atomic_load_n(&((host)->rrdpush_sender_replicating_charts), __ATOMIC_RELAXED)) +#define rrdhost_sender_replicating_charts_plus_one(host) (__atomic_add_fetch(&((host)->rrdpush_sender_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_sender_replicating_charts_minus_one(host) (__atomic_sub_fetch(&((host)->rrdpush_sender_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_sender_replicating_charts_zero(host) (__atomic_store_n(&((host)->rrdpush_sender_replicating_charts), 0, __ATOMIC_RELAXED)) + +extern DICTIONARY *rrdhost_root_index; +long rrdhost_hosts_available(void); + +// ---------------------------------------------------------------------------- +// these loop macros make sure the linked list is accessed with the right lock + +#define rrdhost_foreach_read(var) \ + for((var) = localhost, rrd_check_rdlock(); var ; (var) = (var)->next) + +#define rrdhost_foreach_write(var) \ + for((var) = localhost, rrd_check_wrlock(); var ; (var) = (var)->next) + + +// ---------------------------------------------------------------------------- +// global lock for all RRDHOSTs + +extern netdata_rwlock_t rrd_rwlock; + +#define rrd_rdlock() netdata_rwlock_rdlock(&rrd_rwlock) +#define rrd_wrlock() netdata_rwlock_wrlock(&rrd_rwlock) +#define rrd_unlock() netdata_rwlock_unlock(&rrd_rwlock) + +// ---------------------------------------------------------------------------- + +bool is_storage_engine_shared(STORAGE_INSTANCE *engine); +void rrdset_index_init(RRDHOST *host); +void rrdset_index_destroy(RRDHOST *host); + +void rrddim_index_init(RRDSET *st); +void rrddim_index_destroy(RRDSET *st); + +// ---------------------------------------------------------------------------- + +extern size_t rrd_hosts_available; +extern time_t rrdhost_free_orphan_time; + +int rrd_init(char *hostname, struct rrdhost_system_info *system_info); + +RRDHOST *rrdhost_find_by_hostname(const char *hostname); +RRDHOST *rrdhost_find_by_guid(const char *guid); + +RRDHOST *rrdhost_find_or_create( + const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *abbrev_timezone + , int32_t utc_offset + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step + , struct rrdhost_system_info *system_info + , bool is_archived +); + +void rrdhost_update(RRDHOST *host + , const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *abbrev_timezone + , int32_t utc_offset + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step + , struct rrdhost_system_info *system_info +); + +int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value); + +#if defined(NETDATA_INTERNAL_CHECKS) && defined(NETDATA_VERIFY_LOCKS) +void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); +void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); +void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line); +void __rrdset_check_wrlock(RRDSET *st, const char *file, const char *function, const unsigned long line); +void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line); +void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line); + +#define rrdhost_check_rdlock(host) __rrdhost_check_rdlock(host, __FILE__, __FUNCTION__, __LINE__) +#define rrdhost_check_wrlock(host) __rrdhost_check_wrlock(host, __FILE__, __FUNCTION__, __LINE__) +#define rrdset_check_rdlock(st) __rrdset_check_rdlock(st, __FILE__, __FUNCTION__, __LINE__) +#define rrdset_check_wrlock(st) __rrdset_check_wrlock(st, __FILE__, __FUNCTION__, __LINE__) +#define rrd_check_rdlock() __rrd_check_rdlock(__FILE__, __FUNCTION__, __LINE__) +#define rrd_check_wrlock() __rrd_check_wrlock(__FILE__, __FUNCTION__, __LINE__) + +#else +#define rrdhost_check_rdlock(host) (void)0 +#define rrdhost_check_wrlock(host) (void)0 +#define rrdset_check_rdlock(st) (void)0 +#define rrdset_check_wrlock(st) (void)0 +#define rrd_check_rdlock() (void)0 +#define rrd_check_wrlock() (void)0 +#endif + +// ---------------------------------------------------------------------------- +// RRDSET functions + +int rrdset_reset_name(RRDSET *st, const char *name); + +RRDSET *rrdset_create_custom(RRDHOST *host + , const char *type + , const char *id + , const char *name + , const char *family + , const char *context + , const char *title + , const char *units + , const char *plugin + , const char *module + , long priority + , int update_every + , RRDSET_TYPE chart_type + , RRD_MEMORY_MODE memory_mode + , long history_entries); + +#define rrdset_create(host, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) \ + rrdset_create_custom(host, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type, (host)->rrd_memory_mode, (host)->rrd_history_entries) + +#define rrdset_create_localhost(type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) \ + rrdset_create(localhost, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) + +void rrdhost_free_all(void); +void rrdhost_save_all(void); +void rrdhost_cleanup_all(void); + +void rrdhost_system_info_free(struct rrdhost_system_info *system_info); +void rrdhost_free(RRDHOST *host, bool force); +void rrdhost_save_charts(RRDHOST *host); +void rrdhost_delete_charts(RRDHOST *host); + +int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now); + +void rrdset_update_heterogeneous_flag(RRDSET *st); + +time_t rrdset_set_update_every(RRDSET *st, time_t update_every); + +RRDSET *rrdset_find(RRDHOST *host, const char *id); +#define rrdset_find_localhost(id) rrdset_find(localhost, id) +/* This will not return charts that are archived */ +static inline RRDSET *rrdset_find_active_localhost(const char *id) +{ + RRDSET *st = rrdset_find_localhost(id); + if (unlikely(st && rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED))) + return NULL; + return st; +} + +RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id); +#define rrdset_find_bytype_localhost(type, id) rrdset_find_bytype(localhost, type, id) +/* This will not return charts that are archived */ +static inline RRDSET *rrdset_find_active_bytype_localhost(const char *type, const char *id) +{ + RRDSET *st = rrdset_find_bytype_localhost(type, id); + if (unlikely(st && rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED))) + return NULL; + return st; +} + +RRDSET *rrdset_find_byname(RRDHOST *host, const char *name); +#define rrdset_find_byname_localhost(name) rrdset_find_byname(localhost, name) +/* This will not return charts that are archived */ +static inline RRDSET *rrdset_find_active_byname_localhost(const char *name) +{ + RRDSET *st = rrdset_find_byname_localhost(name); + if (unlikely(st && rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED))) + return NULL; + return st; +} + +void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds); +void rrdset_next_usec(RRDSET *st, usec_t microseconds); +void rrdset_timed_next(RRDSET *st, struct timeval now, usec_t microseconds); +#define rrdset_next(st) rrdset_next_usec(st, 0ULL) + +void rrdset_timed_done(RRDSET *st, struct timeval now, bool pending_rrdset_next); +void rrdset_done(RRDSET *st); + +void rrdset_is_obsolete(RRDSET *st); +void rrdset_isnot_obsolete(RRDSET *st); + +// checks if the RRDSET should be offered to viewers +#define rrdset_is_available_for_viewers(st) (!rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && rrdset_number_of_dimensions(st) && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE) +#define rrdset_is_available_for_exporting_and_alarms(st) (!rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && rrdset_number_of_dimensions(st)) +#define rrdset_is_archived(st) (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && rrdset_number_of_dimensions(st)) + +time_t rrddim_first_entry_t(RRDDIM *rd); +time_t rrddim_first_entry_t_of_tier(RRDDIM *rd, size_t tier); +time_t rrddim_last_entry_t(RRDDIM *rd); +time_t rrdset_last_entry_t(RRDSET *st); +time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier); +time_t rrdset_first_entry_t(RRDSET *st); +time_t rrdhost_last_entry_t(RRDHOST *h); + +// ---------------------------------------------------------------------------- +// RRD DIMENSION functions + +RRDDIM *rrddim_add_custom(RRDSET *st + , const char *id + , const char *name + , collected_number multiplier + , collected_number divisor + , RRD_ALGORITHM algorithm + , RRD_MEMORY_MODE memory_mode + ); + +#define rrddim_add(st, id, name, multiplier, divisor, algorithm) \ + rrddim_add_custom(st, id, name, multiplier, divisor, algorithm, (st)->rrd_memory_mode) + +int rrddim_reset_name(RRDSET *st, RRDDIM *rd, const char *name); +int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm); +int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier); +int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor); + +RRDDIM *rrddim_find(RRDSET *st, const char *id); +RRDDIM_ACQUIRED *rrddim_find_and_acquire(RRDSET *st, const char *id); +RRDDIM *rrddim_acquired_to_rrddim(RRDDIM_ACQUIRED *rda); +void rrddim_acquired_release(RRDDIM_ACQUIRED *rda); +RRDDIM *rrddim_find_active(RRDSET *st, const char *id); + +int rrddim_hide(RRDSET *st, const char *id); +int rrddim_unhide(RRDSET *st, const char *id); + +void rrddim_is_obsolete(RRDSET *st, RRDDIM *rd); +void rrddim_isnot_obsolete(RRDSET *st, RRDDIM *rd); + +collected_number rrddim_timed_set_by_pointer(RRDSET *st, RRDDIM *rd, struct timeval collected_time, collected_number value); +collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value); +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value); + +#ifdef ENABLE_ACLK +time_t calc_dimension_liveness(RRDDIM *rd, time_t now); +#endif +long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries); + +#ifdef NETDATA_LOG_COLLECTION_ERRORS +#define rrddim_store_metric(rd, point_end_time_ut, n, flags) rrddim_store_metric_with_trace(rd, point_end_time_ut, n, flags, __FUNCTION__) +void rrddim_store_metric_with_trace(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags, const char *function); +#else +void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags); +#endif + +// ---------------------------------------------------------------------------- +// Miscellaneous functions + +char *rrdset_strncpyz_name(char *to, const char *from, size_t length); + +// ---------------------------------------------------------------------------- +// RRD internal functions + +void rrdset_delete_files(RRDSET *st); +void rrdset_save(RRDSET *st); +void rrdset_free(RRDSET *st); + +#ifdef NETDATA_RRD_INTERNALS + +char *rrdset_cache_dir(RRDHOST *host, const char *id); + +void rrddim_free(RRDSET *st, RRDDIM *rd); + +void rrdset_reset(RRDSET *st); +void rrdset_delete_obsolete_dimensions(RRDSET *st); + +RRDHOST *rrdhost_create( + const char *hostname, const char *registry_hostname, const char *guid, const char *os, const char *timezone, + const char *abbrev_timezone, int32_t utc_offset,const char *tags, const char *program_name, const char *program_version, + int update_every, long entries, RRD_MEMORY_MODE memory_mode, unsigned int health_enabled, unsigned int rrdpush_enabled, + char *rrdpush_destination, char *rrdpush_api_key, char *rrdpush_send_charts_matching, + bool rrdpush_enable_replication, time_t rrdpush_seconds_to_replicate, time_t rrdpush_replication_step, + struct rrdhost_system_info *system_info, int is_localhost, bool is_archived); + +#endif /* NETDATA_RRD_INTERNALS */ + +void set_host_properties( + RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *registry_hostname, + const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset, + const char *program_name, const char *program_version); + +size_t get_tier_grouping(size_t tier); + +// ---------------------------------------------------------------------------- +// RRD DB engine declarations + +#ifdef ENABLE_DBENGINE +#include "database/engine/rrdengineapi.h" +#endif +#include "sqlite/sqlite_functions.h" +#include "sqlite/sqlite_context.h" +#include "sqlite/sqlite_metadata.h" +#include "sqlite/sqlite_aclk.h" +#include "sqlite/sqlite_aclk_alert.h" +#include "sqlite/sqlite_aclk_node.h" +#include "sqlite/sqlite_health.h" + +#ifdef __cplusplus +} +#endif + +#endif /* NETDATA_RRD_H */ diff --git a/database/rrdcalc.c b/database/rrdcalc.c new file mode 100644 index 0000000..aad945a --- /dev/null +++ b/database/rrdcalc.c @@ -0,0 +1,800 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDCALC helpers + +inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { + switch(status) { + case RRDCALC_STATUS_REMOVED: + return "REMOVED"; + + case RRDCALC_STATUS_UNDEFINED: + return "UNDEFINED"; + + case RRDCALC_STATUS_UNINITIALIZED: + return "UNINITIALIZED"; + + case RRDCALC_STATUS_CLEAR: + return "CLEAR"; + + case RRDCALC_STATUS_RAISED: + return "RAISED"; + + case RRDCALC_STATUS_WARNING: + return "WARNING"; + + case RRDCALC_STATUS_CRITICAL: + return "CRITICAL"; + + default: + error("Unknown alarm status %d", status); + return "UNKNOWN"; + } +} + +uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) { + netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); + + // re-use old IDs, by looking them up in the alarm log + ALARM_ENTRY *ae = NULL; + for(ae = host->health_log.alarms; ae ;ae = ae->next) { + if(unlikely(name == ae->name && chart == ae->chart)) { + if(next_event_id) *next_event_id = ae->alarm_event_id + 1; + break; + } + } + + uint32_t alarm_id; + + if(ae) + alarm_id = ae->alarm_id; + + else { + if (unlikely(!host->health_log.next_alarm_id)) + host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); + + alarm_id = host->health_log.next_alarm_id++; + } + + netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock); + return alarm_id; +} + +// ---------------------------------------------------------------------------- +// RRDCALC replacing info text variables with RRDSET labels + +static STRING *rrdcalc_replace_variables_with_rrdset_labels(const char *line, RRDCALC *rc) { + if (!line || !*line) + return NULL; + + size_t pos = 0; + char *temp = strdupz(line); + char var[RRDCALC_VAR_MAX]; + char *m, *lbl_value = NULL; + + while ((m = strchr(temp + pos, '$'))) { + int i = 0; + char *e = m; + while (*e) { + + if (*e == ' ' || i == RRDCALC_VAR_MAX - 1) + break; + else + var[i] = *e; + + e++; + i++; + } + + var[i] = '\0'; + pos = m - temp + 1; + + if (!strcmp(var, RRDCALC_VAR_FAMILY)) { + char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rrdset_family(rc->rrdset) : "", m); + freez(temp); + temp = buf; + } + else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) { + if(likely(rc->rrdset && rc->rrdset->rrdlabels)) { + rrdlabels_get_value_to_char_or_null(rc->rrdset->rrdlabels, &lbl_value, var+RRDCALC_VAR_LABEL_LEN); + if (lbl_value) { + char *buf = find_and_replace(temp, var, lbl_value, m); + freez(temp); + temp = buf; + freez(lbl_value); + } + } + } + } + + STRING *ret = string_strdupz(temp); + freez(temp); + + return ret; +} + +void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc) { + if(!rc->rrdset || !rc->original_info || !rc->rrdset->rrdlabels) return; + + size_t labels_version = dictionary_version(rc->rrdset->rrdlabels); + if(rc->labels_version != labels_version) { + + STRING *old = rc->info; + rc->info = rrdcalc_replace_variables_with_rrdset_labels(rrdcalc_original_info(rc), rc); + string_freez(old); + + rc->labels_version = labels_version; + } +} + +// ---------------------------------------------------------------------------- +// RRDCALC index management for RRDSET + +// the dictionary requires a unique key for every item +// we use {chart id}.{alert name} for both the RRDHOST and RRDSET alert indexes. + +#define RRDCALC_MAX_KEY_SIZE 1024 +static size_t rrdcalc_key(char *dst, size_t dst_len, const char *chart, const char *alert) { + return snprintfz(dst, dst_len, "%s/%s", chart, alert); +} + +const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name) { + char key[RRDCALC_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), alert_name); + + const RRDCALC_ACQUIRED *rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)(key_len + 1)); + + if(!rca) { + key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_name(st), alert_name); + rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)(key_len + 1)); + } + + return rca; +} + +void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca) { + if(!rca) return; + + dictionary_acquired_item_release(st->rrdhost->rrdcalc_root_index, (const DICTIONARY_ITEM *)rca); +} + +RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca) { + if(rca) + return dictionary_acquired_item_value((const DICTIONARY_ITEM *)rca); + + return NULL; +} + +// ---------------------------------------------------------------------------- +// RRDCALC managing the linking with RRDSET + +static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) { + RRDHOST *host = st->rrdhost; + + debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host)); + + rc->last_status_change = now_realtime_sec(); + rc->rrdset = st; + + netdata_rwlock_wrlock(&st->alerts.rwlock); + DOUBLE_LINKED_LIST_APPEND_UNSAFE(st->alerts.base, rc, prev, next); + netdata_rwlock_unlock(&st->alerts.rwlock); + + if(rc->update_every < rc->rrdset->update_every) { + error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); + rc->update_every = rc->rrdset->update_every; + } + + if(!isnan(rc->green) && isnan(st->green)) { + debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " NETDATA_DOUBLE_FORMAT_AUTO + " to " NETDATA_DOUBLE_FORMAT_AUTO ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->green, rc->green); + st->green = rc->green; + } + + if(!isnan(rc->red) && isnan(st->red)) { + debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " NETDATA_DOUBLE_FORMAT_AUTO " to " NETDATA_DOUBLE_FORMAT_AUTO + ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->red, rc->red); + st->red = rc->red; + } + + char buf[RRDVAR_MAX_LENGTH + 1]; + snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), rrdcalc_name(rc)); + STRING *rrdset_name_rrdcalc_name = string_strdupz(buf); + snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), rrdcalc_name(rc)); + STRING *rrdset_id_rrdcalc_name = string_strdupz(buf); + + rc->rrdvar_local = rrdvar_add_and_acquire( + "local", + st->rrdvars, + rc->name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_LOCAL_VAR, + &rc->value); + + rc->rrdvar_family = rrdvar_add_and_acquire( + "family", + rrdfamily_rrdvars_dict(st->rrdfamily), + rc->name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_FAMILY_VAR, + &rc->value); + + rc->rrdvar_host_chart_name = rrdvar_add_and_acquire( + "host", + host->rrdvars, + rrdset_name_rrdcalc_name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR, + &rc->value); + + rc->rrdvar_host_chart_id = rrdvar_add_and_acquire( + "host", + host->rrdvars, + rrdset_id_rrdcalc_name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR | ((rc->rrdvar_host_chart_name) ? 0 : RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR), + &rc->value); + + string_freez(rrdset_id_rrdcalc_name); + string_freez(rrdset_name_rrdcalc_name); + + if(!rc->units) + rc->units = string_dup(st->units); + + rrdcalc_update_info_using_rrdset_labels(rc); + + time_t now = now_realtime_sec(); + + ALARM_ENTRY *ae = health_create_alarm_entry( + host, + rc->id, + rc->next_event_id++, + rc->config_hash_id, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->context, + rc->rrdset->family, + rc->classification, + rc->component, + rc->type, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_UNINITIALIZED, + rc->source, + rc->units, + rc->info, + 0, + rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0); + + health_alarm_log_add_entry(host, ae); +} + +static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) { + RRDSET *st = rc->rrdset; + + if(!st) { + debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + return; + } + + RRDHOST *host = st->rrdhost; + + time_t now = now_realtime_sec(); + + if (likely(rc->status != RRDCALC_STATUS_REMOVED)) { + ALARM_ENTRY *ae = health_create_alarm_entry( + host, + rc->id, + rc->next_event_id++, + rc->config_hash_id, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->context, + rc->rrdset->family, + rc->classification, + rc->component, + rc->type, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_REMOVED, + rc->source, + rc->units, + rc->info, + 0, + 0); + + health_alarm_log_add_entry(host, ae); + } + + debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host)); + + // unlink it + + if(!having_ll_wrlock) + netdata_rwlock_wrlock(&st->alerts.rwlock); + + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(st->alerts.base, rc, prev, next); + + if(!having_ll_wrlock) + netdata_rwlock_unlock(&st->alerts.rwlock); + + rc->rrdset = NULL; + + rrdvar_release_and_del(st->rrdvars, rc->rrdvar_local); + rc->rrdvar_local = NULL; + + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rc->rrdvar_family); + rc->rrdvar_family = NULL; + + rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_id); + rc->rrdvar_host_chart_id = NULL; + + rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_name); + rc->rrdvar_host_chart_name = NULL; + + // RRDCALC will remain in RRDHOST + // so that if the matching chart is found in the future + // it will be applied automatically +} + +static inline bool rrdcalc_check_if_it_matches_rrdset(RRDCALC *rc, RRDSET *st) { + if ( (rc->chart != st->id) + && (rc->chart != st->name)) + return false; + + if (rc->module_pattern && !simple_pattern_matches(rc->module_pattern, rrdset_module_name(st))) + return false; + + if (rc->plugin_pattern && !simple_pattern_matches(rc->plugin_pattern, rrdset_plugin_name(st))) + return false; + + if (st->rrdhost->rrdlabels && rc->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(st->rrdhost->rrdlabels, rc->host_labels_pattern, '=')) + return false; + + return true; +} + +void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st) { + RRDHOST *host = st->rrdhost; + // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); + + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(host, rc) { + if(rc->rrdset) + continue; + + if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st))) + rrdcalc_link_to_rrdset(st, rc); + } + foreach_rrdcalc_in_rrdhost_done(rc); +} + +static inline int rrdcalc_check_and_link_rrdset_callback(RRDSET *st, void *rrdcalc) { + RRDCALC *rc = rrdcalc; + + if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st))) { + rrdcalc_link_to_rrdset(st, rc); + return -1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// RRDCALC rrdhost index management - constructor + +struct rrdcalc_constructor { + RRDHOST *rrdhost; // the host we operate upon + RRDCALC *from_config; // points to the original RRDCALC, as loaded from the config + RRDCALCTEMPLATE *from_rrdcalctemplate; // the template this alert is generated from + RRDSET *rrdset; // when this comes from rrdcalctemplate, we have a matching rrdset + const char *overwrite_alert_name; // when we have a dimension foreach, the alert is renamed + const char *overwrite_dimensions; // when we have a dimension foreach, the dimensions filter is renamed + + enum { + RRDCALC_REACT_NONE, + RRDCALC_REACT_NEW, + } react_action; + + bool existing_from_template; +}; + +static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; + RRDHOST *host = ctr->rrdhost; + + rc->key = string_strdupz(dictionary_acquired_item_name(item)); + + if(ctr->from_rrdcalctemplate) { + rc->run_flags |= RRDCALC_FLAG_FROM_TEMPLATE; + + RRDCALCTEMPLATE *rt = ctr->from_rrdcalctemplate; + RRDSET *st = ctr->rrdset; + + rc->next_event_id = 1; + rc->name = (ctr->overwrite_alert_name) ? string_strdupz(ctr->overwrite_alert_name) : string_dup(rt->name); + rc->chart = string_dup(st->id); + uuid_copy(rc->config_hash_id, rt->config_hash_id); + + rc->dimensions = (ctr->overwrite_dimensions) ? string_strdupz(ctr->overwrite_dimensions) : string_dup(rt->dimensions); + rc->foreach_dimension = NULL; + rc->foreach_dimension_pattern = NULL; + + rc->green = rt->green; + rc->red = rt->red; + rc->value = NAN; + rc->old_value = NAN; + + rc->delay_up_duration = rt->delay_up_duration; + rc->delay_down_duration = rt->delay_down_duration; + rc->delay_max_duration = rt->delay_max_duration; + rc->delay_multiplier = rt->delay_multiplier; + + rc->last_repeat = 0; + rc->times_repeat = 0; + rc->warn_repeat_every = rt->warn_repeat_every; + rc->crit_repeat_every = rt->crit_repeat_every; + + rc->group = rt->group; + rc->after = rt->after; + rc->before = rt->before; + rc->update_every = rt->update_every; + rc->options = rt->options; + + rc->exec = string_dup(rt->exec); + rc->recipient = string_dup(rt->recipient); + rc->source = string_dup(rt->source); + rc->units = string_dup(rt->units); + rc->info = string_dup(rt->info); + rc->original_info = string_dup(rt->info); + + rc->classification = string_dup(rt->classification); + rc->component = string_dup(rt->component); + rc->type = string_dup(rt->type); + + if(rt->calculation) { + rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); + if(!rc->calculation) + error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->calculation->source); + } + if(rt->warning) { + rc->warning = expression_parse(rt->warning->source, NULL, NULL); + if(!rc->warning) + error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->warning->source); + } + if(rt->critical) { + rc->critical = expression_parse(rt->critical->source, NULL, NULL); + if(!rc->critical) + error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->critical->source); + } + } + else if(ctr->from_config) { + // dictionary has already copied all the members values and pointers + // no need for additional work in this case + ; + } + + rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); + + if(rc->calculation) { + rc->calculation->status = &rc->status; + rc->calculation->myself = &rc->value; + rc->calculation->after = &rc->db_after; + rc->calculation->before = &rc->db_before; + rc->calculation->rrdcalc = rc; + } + + if(rc->warning) { + rc->warning->status = &rc->status; + rc->warning->myself = &rc->value; + rc->warning->after = &rc->db_after; + rc->warning->before = &rc->db_before; + rc->warning->rrdcalc = rc; + } + + if(rc->critical) { + rc->critical->status = &rc->status; + rc->critical->myself = &rc->value; + rc->critical->after = &rc->db_after; + rc->critical->before = &rc->db_before; + rc->critical->rrdcalc = rc; + } + + debug(D_HEALTH, "Health added alarm '%s.%s': exec '%s', recipient '%s', green " NETDATA_DOUBLE_FORMAT_AUTO + ", red " NETDATA_DOUBLE_FORMAT_AUTO + ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", + rrdcalc_chart_name(rc), + rrdcalc_name(rc), + (rc->exec)?rrdcalc_exec(rc):"DEFAULT", + (rc->recipient)?rrdcalc_recipient(rc):"DEFAULT", + rc->green, + rc->red, + (int)rc->group, + rc->after, + rc->before, + rc->options, + (rc->dimensions)?rrdcalc_dimensions(rc):"NONE", + (rc->foreach_dimension)?rrdcalc_foreachdim(rc):"NONE", + rc->update_every, + (rc->calculation)?rc->calculation->parsed_as:"NONE", + (rc->warning)?rc->warning->parsed_as:"NONE", + (rc->critical)?rc->critical->parsed_as:"NONE", + rrdcalc_source(rc), + rc->delay_up_duration, + rc->delay_down_duration, + rc->delay_max_duration, + rc->delay_multiplier, + rc->warn_repeat_every, + rc->crit_repeat_every + ); + + ctr->react_action = RRDCALC_REACT_NEW; +} + +static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdcalc_new __maybe_unused, void *constructor_data ) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; + + if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) + ctr->existing_from_template = true; + else + ctr->existing_from_template = false; + + ctr->react_action = RRDCALC_REACT_NONE; + + return false; +} + +static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; + RRDHOST *host = ctr->rrdhost; + + if(ctr->react_action == RRDCALC_REACT_NEW) { + if(ctr->rrdset) + rrdcalc_link_to_rrdset(ctr->rrdset, rc); + + else if (ctr->from_rrdcalctemplate) + rrdcontext_foreach_instance_with_rrdset_in_context(host, string2str(ctr->from_rrdcalctemplate->context), rrdcalc_check_and_link_rrdset_callback, rc); + } +} + +// ---------------------------------------------------------------------------- +// RRDCALC rrdhost index management - destructor + +static void rrdcalc_free_internals(RRDCALC *rc) { + if(unlikely(!rc)) return; + + expression_free(rc->calculation); + expression_free(rc->warning); + expression_free(rc->critical); + + string_freez(rc->key); + string_freez(rc->name); + string_freez(rc->chart); + string_freez(rc->dimensions); + string_freez(rc->foreach_dimension); + string_freez(rc->exec); + string_freez(rc->recipient); + string_freez(rc->source); + string_freez(rc->units); + string_freez(rc->info); + string_freez(rc->original_info); + string_freez(rc->classification); + string_freez(rc->component); + string_freez(rc->type); + string_freez(rc->host_labels); + string_freez(rc->module_match); + string_freez(rc->plugin_match); + + simple_pattern_free(rc->foreach_dimension_pattern); + simple_pattern_free(rc->host_labels_pattern); + simple_pattern_free(rc->module_pattern); + simple_pattern_free(rc->plugin_pattern); +} + +static void rrdcalc_rrdhost_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdhost __maybe_unused) { + RRDCALC *rc = rrdcalc; + //RRDHOST *host = rrdhost; + + if(unlikely(rc->rrdset)) + rrdcalc_unlink_from_rrdset(rc, false); + + // any destruction actions that require other locks + // have to be placed in rrdcalc_del(), because the object is actually locked for deletion + + rrdcalc_free_internals(rc); +} + +// ---------------------------------------------------------------------------- +// RRDCALC rrdhost index management - index API + +void rrdcalc_rrdhost_index_init(RRDHOST *host) { + if(!host->rrdcalc_root_index) { + host->rrdcalc_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL); + dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL); + dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL); + dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host); + } +} + +void rrdcalc_rrdhost_index_destroy(RRDHOST *host) { + dictionary_destroy(host->rrdcalc_root_index); + host->rrdcalc_root_index = NULL; +} + +void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions) { + char key[RRDCALC_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), + overwrite_alert_name?overwrite_alert_name:string2str(rt->name)); + + struct rrdcalc_constructor tmp = { + .rrdhost = host, + .from_config = NULL, + .from_rrdcalctemplate = rt, + .rrdset = st, + .overwrite_alert_name = overwrite_alert_name, + .overwrite_dimensions = overwrite_dimensions, + .react_action = RRDCALC_REACT_NONE, + .existing_from_template = false, + }; + + dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDCALC), &tmp); + if(tmp.react_action != RRDCALC_REACT_NEW && tmp.existing_from_template == false) + error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It is manually configured.", + string2str(rt->name), rrdset_id(st), key, rrdhost_hostname(host)); +} + +int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc) { + if(!rc->chart) { + error("Health configuration for alarm '%s' does not have a chart", rrdcalc_name(rc)); + return 0; + } + + if(!rc->update_every) { + error("Health configuration for alarm '%s.%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + return 0; + } + + if(!RRDCALC_HAS_DB_LOOKUP(rc) && !rc->calculation && !rc->warning && !rc->critical) { + error("Health configuration for alarm '%s.%s' is useless (no db lookup, no calculation, no warning and no critical expressions)", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + return 0; + } + + char key[RRDCALC_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, string2str(rc->chart), string2str(rc->name)); + + struct rrdcalc_constructor tmp = { + .rrdhost = host, + .from_config = rc, + .from_rrdcalctemplate = NULL, + .rrdset = NULL, + .react_action = RRDCALC_REACT_NONE, + }; + + int ret = 1; + RRDCALC *t = dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), rc, sizeof(RRDCALC), &tmp); + if(tmp.react_action == RRDCALC_REACT_NEW) { + // we copied rc into the dictionary, so we have to free the container here + freez(rc); + rc = t; + + // since we loaded this config from configuration, we need to check if we can link it to alarms + RRDSET *st; + rrdset_foreach_read(st, host) { + if (unlikely(rrdcalc_check_and_link_rrdset_callback(st, rc) == -1)) + break; + } + rrdset_foreach_done(st); + } + else { + error( + "RRDCALC: from config '%s' on chart '%s' failed to be added to host '%s'. It already exists.", + string2str(rc->name), + string2str(rc->chart), + rrdhost_hostname(host)); + + ret = 0; + + // free all of it, internals and the container + rrdcalc_free_unused_rrdcalc_loaded_from_config(rc); + } + + return ret; +} + +static void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock) { + if(rc->rrdset) + rrdcalc_unlink_from_rrdset(rc, having_ll_wrlock); + + dictionary_del_advanced(host->rrdcalc_root_index, string2str(rc->key), (ssize_t)string_strlen(rc->key) + 1); +} + + +// ---------------------------------------------------------------------------- +// RRDCALC cleanup API functions + +void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host) { + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_reentrant(host, rc) { + if (!rc->host_labels) + continue; + + if(!rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rc->host_labels_pattern, '=')) { + info("Health configuration for alarm '%s' cannot be applied, because the host %s does not have the label(s) '%s'", + rrdcalc_name(rc), + rrdhost_hostname(host), + rrdcalc_host_labels(rc)); + + rrdcalc_unlink_and_delete(host, rc, false); + } + } + foreach_rrdcalc_in_rrdhost_done(rc); +} + +void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts() { + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + if (unlikely(!host->health_enabled)) + continue; + + if (host->rrdlabels) + rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(host); + } + + rrd_unlock(); +} + +void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st) { + RRDCALC *rc, *last = NULL; + netdata_rwlock_wrlock(&st->alerts.rwlock); + while((rc = st->alerts.base)) { + if(last == rc) { + error("RRDCALC: malformed list of alerts linked to chart - cannot cleanup - giving up."); + break; + } + last = rc; + + if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) { + // if the alert comes from a template we can just delete it + rrdcalc_unlink_and_delete(st->rrdhost, rc, true); + } + else { + // this is a configuration for a specific chart + // it should stay in the list + rrdcalc_unlink_from_rrdset(rc, true); + } + + } + netdata_rwlock_unlock(&st->alerts.rwlock); +} + +void rrdcalc_delete_all(RRDHOST *host) { + dictionary_flush(host->rrdcalc_root_index); +} + +void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc) { + if(rc->rrdset) + rrdcalc_unlink_from_rrdset(rc, false); + + rrdcalc_free_internals(rc); + freez(rc); +} diff --git a/database/rrdcalc.h b/database/rrdcalc.h new file mode 100644 index 0000000..a25c05c --- /dev/null +++ b/database/rrdcalc.h @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +#ifndef NETDATA_RRDCALC_H +#define NETDATA_RRDCALC_H 1 + +// calculated variables (defined in health configuration) +// These aggregate time-series data at fixed intervals +// (defined in their update_every member below) +// They increase the overhead of netdata. +// +// These calculations are stored under RRDHOST. +// Then are also linked to RRDSET (of course only when a +// matching chart is found). + +typedef enum { + RRDCALC_FLAG_DB_ERROR = (1 << 0), + RRDCALC_FLAG_DB_NAN = (1 << 1), + // RRDCALC_FLAG_DB_STALE = (1 << 2), + RRDCALC_FLAG_CALC_ERROR = (1 << 3), + RRDCALC_FLAG_WARN_ERROR = (1 << 4), + RRDCALC_FLAG_CRIT_ERROR = (1 << 5), + RRDCALC_FLAG_RUNNABLE = (1 << 6), + RRDCALC_FLAG_DISABLED = (1 << 7), + RRDCALC_FLAG_SILENCED = (1 << 8), + RRDCALC_FLAG_RUN_ONCE = (1 << 9), + RRDCALC_FLAG_FROM_TEMPLATE = (1 << 10), // the rrdcalc has been created from a template +} RRDCALC_FLAGS; + +typedef enum { + // This list uses several other options from RRDR_OPTIONS for db lookups. + // To add an item here, you need to reserve a bit in RRDR_OPTIONS. + RRDCALC_OPTION_NO_CLEAR_NOTIFICATION = 0x80000000, +} RRDCALC_OPTIONS; + +#define RRDCALC_ALL_OPTIONS_EXCLUDING_THE_RRDR_ONES (RRDCALC_OPTION_NO_CLEAR_NOTIFICATION) + +struct rrdcalc { + STRING *key; // the unique key in the host's rrdcalc_root_index + + uint32_t id; // the unique id of this alarm + uint32_t next_event_id; // the next event id that will be used for this alarm + + uuid_t config_hash_id; // a predictable hash_id based on specific alert configuration + + STRING *name; // the name of this alarm + STRING *chart; // the chart id this should be linked to + + STRING *exec; // the command to execute when this alarm switches state + STRING *recipient; // the recipient of the alarm (the first parameter to exec) + + STRING *classification; // the class that this alarm belongs + STRING *component; // the component that this alarm refers to + STRING *type; // type of the alarm + + STRING *plugin_match; // the plugin name that should be linked to + SIMPLE_PATTERN *plugin_pattern; + + STRING *module_match; // the module name that should be linked to + SIMPLE_PATTERN *module_pattern; + + STRING *source; // the source of this alarm + STRING *units; // the units of the alarm + STRING *original_info; // the original info field before any variable replacement + STRING *info; // a short description of the alarm + + int update_every; // update frequency for the alarm + + // the red and green threshold of this alarm (to be set to the chart) + NETDATA_DOUBLE green; + NETDATA_DOUBLE red; + + // ------------------------------------------------------------------------ + // database lookup settings + + STRING *dimensions; // the chart dimensions + STRING *foreach_dimension; // the group of dimensions that the `foreach` will be applied. + SIMPLE_PATTERN *foreach_dimension_pattern; // used if and only if there is a simple pattern for the chart. + RRDR_GROUPING group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + RRDCALC_OPTIONS options; // configuration options + + // ------------------------------------------------------------------------ + // expressions related to the alarm + + EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm + EVAL_EXPRESSION *warning; // expression to check the warning condition + EVAL_EXPRESSION *critical; // expression to check the critical condition + + // ------------------------------------------------------------------------ + // notification delay settings + + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + // while now < delay_up_to + + // ------------------------------------------------------------------------ + // notification repeat settings + + uint32_t warn_repeat_every; // interval between repeating warning notifications + uint32_t crit_repeat_every; // interval between repeating critical notifications + + // ------------------------------------------------------------------------ + // Labels settings + STRING *host_labels; // the label read from an alarm file + SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels + + // ------------------------------------------------------------------------ + // runtime information + + RRDCALC_STATUS old_status; // the old status of the alarm + RRDCALC_STATUS status; // the current status of the alarm + + NETDATA_DOUBLE value; // the current value of the alarm + NETDATA_DOUBLE old_value; // the previous value of the alarm + + RRDCALC_FLAGS run_flags; // check RRDCALC_FLAG_* + + time_t last_updated; // the last update timestamp of the alarm + time_t next_update; // the next update timestamp of the alarm + time_t last_status_change; // the timestamp of the last time this alarm changed status + time_t last_repeat; // the last time the alarm got repeated + uint32_t times_repeat; // number of times the alarm got repeated + + time_t db_after; // the first timestamp evaluated by the db lookup + time_t db_before; // the last timestamp evaluated by the db lookup + + time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications + int delay_up_current; // the current up notification delay duration + int delay_down_current; // the current down notification delay duration + int delay_last; // the last delay we used + + // ------------------------------------------------------------------------ + // variables this alarm exposes to the rest of the alarms + + const RRDVAR_ACQUIRED *rrdvar_local; + const RRDVAR_ACQUIRED *rrdvar_family; + const RRDVAR_ACQUIRED *rrdvar_host_chart_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name; + + // ------------------------------------------------------------------------ + // the chart this alarm it is linked to + + size_t labels_version; + struct rrdset *rrdset; + + struct rrdcalc *next; + struct rrdcalc *prev; +}; + +#define rrdcalc_name(rc) string2str((rc)->name) +#define rrdcalc_chart_name(rc) string2str((rc)->chart) +#define rrdcalc_exec(rc) string2str((rc)->exec) +#define rrdcalc_recipient(rc) string2str((rc)->recipient) +#define rrdcalc_classification(rc) string2str((rc)->classification) +#define rrdcalc_component(rc) string2str((rc)->component) +#define rrdcalc_type(rc) string2str((rc)->type) +#define rrdcalc_plugin_match(rc) string2str((rc)->plugin_match) +#define rrdcalc_module_match(rc) string2str((rc)->module_match) +#define rrdcalc_source(rc) string2str((rc)->source) +#define rrdcalc_units(rc) string2str((rc)->units) +#define rrdcalc_original_info(rc) string2str((rc)->original_info) +#define rrdcalc_info(rc) string2str((rc)->info) +#define rrdcalc_dimensions(rc) string2str((rc)->dimensions) +#define rrdcalc_foreachdim(rc) string2str((rc)->foreach_dimension) +#define rrdcalc_host_labels(rc) string2str((rc)->host_labels) + +#define foreach_rrdcalc_in_rrdhost_read(host, rc) \ + dfe_start_read((host)->rrdcalc_root_index, rc) \ + +#define foreach_rrdcalc_in_rrdhost_reentrant(host, rc) \ + dfe_start_reentrant((host)->rrdcalc_root_index, rc) + +#define foreach_rrdcalc_in_rrdhost_done(rc) \ + dfe_done(rc) + +struct alert_config { + STRING *alarm; + STRING *template_key; + STRING *os; + STRING *host; + STRING *on; + STRING *families; + STRING *plugin; + STRING *module; + STRING *charts; + STRING *lookup; + STRING *calc; + STRING *warn; + STRING *crit; + STRING *every; + STRING *green; + STRING *red; + STRING *exec; + STRING *to; + STRING *units; + STRING *info; + STRING *classification; + STRING *component; + STRING *type; + STRING *delay; + STRING *options; + STRING *repeat; + STRING *host_labels; + + STRING *p_db_lookup_dimensions; + STRING *p_db_lookup_method; + + uint32_t p_db_lookup_options; + int32_t p_db_lookup_after; + int32_t p_db_lookup_before; + int32_t p_update_every; +}; + +#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after) + +void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc); + +void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st); + +const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name); +void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca); +RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca); + +const char *rrdcalc_status2string(RRDCALC_STATUS status); + +void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc); + +uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id); +void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions); +int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc); + +void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts(); +void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host); + +static inline int rrdcalc_isrepeating(RRDCALC *rc) { + if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) { + return 1; + } + return 0; +} + +void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st); +void rrdcalc_delete_all(RRDHOST *host); + +void rrdcalc_rrdhost_index_init(RRDHOST *host); +void rrdcalc_rrdhost_index_destroy(RRDHOST *host); + +#define RRDCALC_VAR_MAX 100 +#define RRDCALC_VAR_FAMILY "$family" +#define RRDCALC_VAR_LABEL "$label:" +#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1) + +#endif //NETDATA_RRDCALC_H diff --git a/database/rrdcalctemplate.c b/database/rrdcalctemplate.c new file mode 100644 index 0000000..87e085c --- /dev/null +++ b/database/rrdcalctemplate.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDCALCTEMPLATE management +/** + * RRDCALC TEMPLATE LINK MATCHING + * + * @param rt is the template used to create the chart. + * @param st is the chart where the alarm will be attached. + */ + +static char *rrdcalc_alert_name_with_dimension(const char *name, size_t namelen, const char *dim, size_t dimlen) { + char *newname,*move; + + newname = mallocz(namelen + dimlen + 2); + move = newname; + memcpy(move, name, namelen); + move += namelen; + + *move++ = '_'; + memcpy(move, dim, dimlen); + move += dimlen; + *move = '\0'; + + return newname; +} + +bool rrdcalctemplate_check_rrdset_conditions(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) { + if(rt->context != st->context) + return false; + + if(rt->foreach_dimension_pattern && !rrdset_number_of_dimensions(st)) + return false; + + if (rt->charts_pattern && !simple_pattern_matches(rt->charts_pattern, rrdset_name(st)) && !simple_pattern_matches(rt->charts_pattern, rrdset_id(st))) + return false; + + if (rt->family_pattern && !simple_pattern_matches(rt->family_pattern, rrdset_family(st))) + return false; + + if (rt->module_pattern && !simple_pattern_matches(rt->module_pattern, rrdset_module_name(st))) + return false; + + if (rt->plugin_pattern && !simple_pattern_matches(rt->plugin_pattern, rrdset_plugin_name(st))) + return false; + + if(host->rrdlabels && rt->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rt->host_labels_pattern, '=')) + return false; + + return true; +} + +void rrdcalctemplate_check_rrddim_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDDIM *rd, RRDHOST *host) { + if (simple_pattern_matches(rt->foreach_dimension_pattern, rrddim_id(rd)) || simple_pattern_matches(rt->foreach_dimension_pattern, rrddim_name(rd))) { + char *overwrite_alert_name = rrdcalc_alert_name_with_dimension( + rrdcalctemplate_name(rt), string_strlen(rt->name), rrddim_name(rd), string_strlen(rd->name)); + rrdcalc_add_from_rrdcalctemplate(host, rt, st, overwrite_alert_name, rrddim_name(rd)); + freez(overwrite_alert_name); + } +} + +void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) { + if(!rrdcalctemplate_check_rrdset_conditions(rt, st, host)) + return; + + if(!rt->foreach_dimension_pattern) { + rrdcalc_add_from_rrdcalctemplate(host, rt, st, NULL, NULL); + return; + } + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + rrdcalctemplate_check_rrddim_conditions_and_link(rt, st, rd, host); + } + rrddim_foreach_done(rd); +} + +void rrdcalctemplate_link_matching_templates_to_rrdset(RRDSET *st) { + RRDHOST *host = st->rrdhost; + + RRDCALCTEMPLATE *rt; + foreach_rrdcalctemplate_read(host, rt) { + rrdcalctemplate_check_conditions_and_link(rt, st, host); + } + foreach_rrdcalctemplate_done(rt); +} + +static void rrdcalctemplate_free_internals(RRDCALCTEMPLATE *rt) { + expression_free(rt->calculation); + expression_free(rt->warning); + expression_free(rt->critical); + + string_freez(rt->family_match); + simple_pattern_free(rt->family_pattern); + + string_freez(rt->plugin_match); + simple_pattern_free(rt->plugin_pattern); + + string_freez(rt->module_match); + simple_pattern_free(rt->module_pattern); + + string_freez(rt->charts_match); + simple_pattern_free(rt->charts_pattern); + + string_freez(rt->name); + string_freez(rt->exec); + string_freez(rt->recipient); + string_freez(rt->classification); + string_freez(rt->component); + string_freez(rt->type); + string_freez(rt->context); + string_freez(rt->source); + string_freez(rt->units); + string_freez(rt->info); + string_freez(rt->dimensions); + string_freez(rt->foreach_dimension); + string_freez(rt->host_labels); + simple_pattern_free(rt->foreach_dimension_pattern); + simple_pattern_free(rt->host_labels_pattern); +} + +void rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(RRDCALCTEMPLATE *rt) { + if(unlikely(!rt)) return; + + rrdcalctemplate_free_internals(rt); + freez(rt); +} +static void rrdcalctemplate_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalctemplate, void *added_bool) { + RRDCALCTEMPLATE *rt = rrdcalctemplate; (void)rt; + + bool *added = added_bool; + *added = true; + + debug(D_HEALTH, "Health configuration adding template '%s'" + ": context '%s'" + ", exec '%s'" + ", recipient '%s'" + ", green " NETDATA_DOUBLE_FORMAT_AUTO + ", red " NETDATA_DOUBLE_FORMAT_AUTO + ", lookup: group %d" + ", after %d" + ", before %d" + ", options %u" + ", dimensions '%s'" + ", for each dimension '%s'" + ", update every %d" + ", calculation '%s'" + ", warning '%s'" + ", critical '%s'" + ", source '%s'" + ", delay up %d" + ", delay down %d" + ", delay max %d" + ", delay_multiplier %f" + ", warn_repeat_every %u" + ", crit_repeat_every %u", + rrdcalctemplate_name(rt), + (rt->context)?string2str(rt->context):"NONE", + (rt->exec)?rrdcalctemplate_exec(rt):"DEFAULT", + (rt->recipient)?rrdcalctemplate_recipient(rt):"DEFAULT", + rt->green, + rt->red, + (int)rt->group, + rt->after, + rt->before, + rt->options, + (rt->dimensions)?rrdcalctemplate_dimensions(rt):"NONE", + (rt->foreach_dimension)?rrdcalctemplate_foreachdim(rt):"NONE", + rt->update_every, + (rt->calculation)?rt->calculation->parsed_as:"NONE", + (rt->warning)?rt->warning->parsed_as:"NONE", + (rt->critical)?rt->critical->parsed_as:"NONE", + rrdcalctemplate_source(rt), + rt->delay_up_duration, + rt->delay_down_duration, + rt->delay_max_duration, + rt->delay_multiplier, + rt->warn_repeat_every, + rt->crit_repeat_every + ); +} + +static void rrdcalctemplate_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalctemplate, void *rrdhost __maybe_unused) { + RRDCALCTEMPLATE *rt = rrdcalctemplate; + rrdcalctemplate_free_internals(rt); +} + +void rrdcalctemplate_index_init(RRDHOST *host) { + if(!host->rrdcalctemplate_root_index) { + host->rrdcalctemplate_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdcalctemplate_root_index, rrdcalctemplate_insert_callback, NULL); + dictionary_register_delete_callback(host->rrdcalctemplate_root_index, rrdcalctemplate_delete_callback, host); + } +} + +void rrdcalctemplate_index_destroy(RRDHOST *host) { + dictionary_destroy(host->rrdcalctemplate_root_index); + host->rrdcalctemplate_root_index = NULL; +} + +inline void rrdcalctemplate_delete_all(RRDHOST *host) { + dictionary_flush(host->rrdcalctemplate_root_index); +} + +#define RRDCALCTEMPLATE_MAX_KEY_SIZE 1024 +static size_t rrdcalctemplate_key(char *dst, size_t dst_len, const char *name, const char *family_match) { + return snprintfz(dst, dst_len, "%s/%s", name, (family_match && *family_match)?family_match:"*"); +} + +void rrdcalctemplate_add_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt) { + if(unlikely(!rt->context)) { + error("Health configuration for template '%s' does not have a context", rrdcalctemplate_name(rt)); + return; + } + + if(unlikely(!rt->update_every)) { + error("Health configuration for template '%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalctemplate_name(rt)); + return; + } + + if(unlikely(!RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) && !rt->calculation && !rt->warning && !rt->critical)) { + error("Health configuration for template '%s' is useless (no calculation, no warning and no critical evaluation)", rrdcalctemplate_name(rt)); + return; + } + + char key[RRDCALCTEMPLATE_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalctemplate_key(key, RRDCALCTEMPLATE_MAX_KEY_SIZE, rrdcalctemplate_name(rt), rrdcalctemplate_family_match(rt)); + + bool added = false; + dictionary_set_advanced(host->rrdcalctemplate_root_index, key, (ssize_t)(key_len + 1), rt, sizeof(*rt), &added); + + if(added) + freez(rt); + else { + info("Health configuration template '%s' already exists for host '%s'.", rrdcalctemplate_name(rt), rrdhost_hostname(host)); + rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(rt); + } +} diff --git a/database/rrdcalctemplate.h b/database/rrdcalctemplate.h new file mode 100644 index 0000000..6212a42 --- /dev/null +++ b/database/rrdcalctemplate.h @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCALCTEMPLATE_H +#define NETDATA_RRDCALCTEMPLATE_H 1 + +#include "rrd.h" + +// RRDCALCTEMPLATE +// these are to be applied to charts found dynamically +// based on their context. +struct rrdcalctemplate { + uuid_t config_hash_id; + + STRING *name; + + STRING *exec; + STRING *recipient; + + STRING *classification; + STRING *component; + STRING *type; + + STRING *context; + + STRING *family_match; + SIMPLE_PATTERN *family_pattern; + + STRING *plugin_match; + SIMPLE_PATTERN *plugin_pattern; + + STRING *module_match; + SIMPLE_PATTERN *module_pattern; + + STRING *charts_match; + SIMPLE_PATTERN *charts_pattern; + + STRING *source; // the source of this alarm + STRING *units; // the units of the alarm + STRING *info; // a short description of the alarm + + int update_every; // update frequency for the alarm + + // the red and green threshold of this alarm (to be set to the chart) + NETDATA_DOUBLE green; + NETDATA_DOUBLE red; + + // ------------------------------------------------------------------------ + // database lookup settings + + STRING *dimensions; // the chart dimensions + STRING *foreach_dimension; // the group of dimensions that the lookup will be applied. + SIMPLE_PATTERN *foreach_dimension_pattern; // used if and only if there is a simple pattern for the chart. + RRDR_GROUPING group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + RRDCALC_OPTIONS options; // configuration options + + // ------------------------------------------------------------------------ + // notification delay settings + + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + + // ------------------------------------------------------------------------ + // notification repeat settings + + uint32_t warn_repeat_every; // interval between repeating warning notifications + uint32_t crit_repeat_every; // interval between repeating critical notifications + + // ------------------------------------------------------------------------ + // Labels settings + STRING *host_labels; // the label read from an alarm file + SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels + + // ------------------------------------------------------------------------ + // expressions related to the alarm + + EVAL_EXPRESSION *calculation; + EVAL_EXPRESSION *warning; + EVAL_EXPRESSION *critical; + + struct rrdcalctemplate *next; + struct rrdcalctemplate *prev; +}; + +#define foreach_rrdcalctemplate_read(host, rt) \ + dfe_start_read((host)->rrdcalctemplate_root_index, rt) + +#define foreach_rrdcalctemplate_done(rt) \ + dfe_done(rt) + +#define rrdcalctemplate_name(rt) string2str((rt)->name) +#define rrdcalctemplate_exec(rt) string2str((rt)->exec) +#define rrdcalctemplate_recipient(rt) string2str((rt)->recipient) +#define rrdcalctemplate_classification(rt) string2str((rt)->classification) +#define rrdcalctemplate_component(rt) string2str((rt)->component) +#define rrdcalctemplate_type(rt) string2str((rt)->type) +#define rrdcalctemplate_family_match(rt) string2str((rt)->family_match) +#define rrdcalctemplate_plugin_match(rt) string2str((rt)->plugin_match) +#define rrdcalctemplate_module_match(rt) string2str((rt)->module_match) +#define rrdcalctemplate_charts_match(rt) string2str((rt)->charts_match) +#define rrdcalctemplate_units(rt) string2str((rt)->units) +#define rrdcalctemplate_info(rt) string2str((rt)->info) +#define rrdcalctemplate_source(rt) string2str((rt)->source) +#define rrdcalctemplate_dimensions(rt) string2str((rt)->dimensions) +#define rrdcalctemplate_foreachdim(rt) string2str((rt)->foreach_dimension) +#define rrdcalctemplate_host_labels(rt) string2str((rt)->host_labels) + +#define RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) ((rt)->after) + +void rrdcalctemplate_link_matching_templates_to_rrdset(RRDSET *st); + +void rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(RRDCALCTEMPLATE *rt); +void rrdcalctemplate_delete_all(RRDHOST *host); +void rrdcalctemplate_add_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt); + +void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host); + +bool rrdcalctemplate_check_rrdset_conditions(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host); +void rrdcalctemplate_check_rrddim_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDDIM *rd, RRDHOST *host); + + +void rrdcalctemplate_index_init(RRDHOST *host); +void rrdcalctemplate_index_destroy(RRDHOST *host); + +#endif //NETDATA_RRDCALCTEMPLATE_H diff --git a/database/rrdcontext.c b/database/rrdcontext.c new file mode 100644 index 0000000..3413d1e --- /dev/null +++ b/database/rrdcontext.c @@ -0,0 +1,3937 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdcontext.h" +#include "sqlite/sqlite_context.h" +#include "aclk/schema-wrappers/context.h" +#include "aclk/aclk_contexts_api.h" +#include "aclk/aclk.h" +#include "storage_engine.h" + +#define MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST 5000 +#define FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS 120 +#define RRDCONTEXT_WORKER_THREAD_HEARTBEAT_USEC (1000 * USEC_PER_MS) +#define RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY 10 + +#define LOG_TRANSITIONS false + +#define WORKER_JOB_HOSTS 1 +#define WORKER_JOB_CHECK 2 +#define WORKER_JOB_SEND 3 +#define WORKER_JOB_DEQUEUE 4 +#define WORKER_JOB_RETENTION 5 +#define WORKER_JOB_QUEUED 6 +#define WORKER_JOB_CLEANUP 7 +#define WORKER_JOB_CLEANUP_DELETE 8 +#define WORKER_JOB_PP_METRIC 9 // post-processing metrics +#define WORKER_JOB_PP_INSTANCE 10 // post-processing instances +#define WORKER_JOB_PP_CONTEXT 11 // post-processing contexts +#define WORKER_JOB_HUB_QUEUE_SIZE 12 +#define WORKER_JOB_PP_QUEUE_SIZE 13 + + +typedef enum __attribute__ ((__packed__)) { + RRD_FLAG_NONE = 0, + RRD_FLAG_DELETED = (1 << 0), // this is a deleted object (metrics, instances, contexts) + RRD_FLAG_COLLECTED = (1 << 1), // this object is currently being collected + RRD_FLAG_UPDATED = (1 << 2), // this object has updates to propagate + RRD_FLAG_ARCHIVED = (1 << 3), // this object is not currently being collected + RRD_FLAG_OWN_LABELS = (1 << 4), // this instance has its own labels - not linked to an RRDSET + RRD_FLAG_LIVE_RETENTION = (1 << 5), // we have got live retention from the database + RRD_FLAG_QUEUED_FOR_HUB = (1 << 6), // this context is currently queued to be dispatched to hub + RRD_FLAG_QUEUED_FOR_PP = (1 << 7), // this context is currently queued to be post-processed + RRD_FLAG_HIDDEN = (1 << 8), // don't expose this to the hub or the API + + RRD_FLAG_UPDATE_REASON_TRIGGERED = (1 << 9), // the update was triggered by the child object + RRD_FLAG_UPDATE_REASON_LOAD_SQL = (1 << 10), // this object has just been loaded from SQL + RRD_FLAG_UPDATE_REASON_NEW_OBJECT = (1 << 11), // this object has just been created + RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT = (1 << 12), // we received an update on this object + RRD_FLAG_UPDATE_REASON_CHANGED_LINKING = (1 << 13), // an instance or a metric switched RRDSET or RRDDIM + RRD_FLAG_UPDATE_REASON_CHANGED_METADATA = (1 << 14), // this context or instance changed uuid, name, units, title, family, chart type, priority, update every, rrd changed flags + RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 15), // this object has no retention + RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 16), // this object changed its oldest time in the db + RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 17), // this object change its latest time in the db + RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 18), // this object has stopped being collected + RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 19), // this object has started being collected + RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 20), // this context belongs to a host that just disconnected + RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 21), // this context is not used anymore + RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 22), // this context changed because of a db rotation + + // action to perform on an object + RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION = (1 << 30), // this object has to update its retention from the db +} RRD_FLAGS; + +#define RRD_FLAG_ALL_UPDATE_REASONS ( \ + RRD_FLAG_UPDATE_REASON_TRIGGERED \ + |RRD_FLAG_UPDATE_REASON_LOAD_SQL \ + |RRD_FLAG_UPDATE_REASON_NEW_OBJECT \ + |RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT \ + |RRD_FLAG_UPDATE_REASON_CHANGED_LINKING \ + |RRD_FLAG_UPDATE_REASON_CHANGED_METADATA \ + |RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + |RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T \ + |RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T \ + |RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED \ + |RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED \ + |RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ + |RRD_FLAG_UPDATE_REASON_DB_ROTATION \ + |RRD_FLAG_UPDATE_REASON_UNUSED \ + ) + +#define RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS ( \ + RRD_FLAG_ARCHIVED \ + |RRD_FLAG_HIDDEN \ + |RRD_FLAG_ALL_UPDATE_REASONS \ + ) + +#define RRD_FLAGS_REQUIRED_FOR_DELETIONS ( \ + RRD_FLAG_DELETED \ + |RRD_FLAG_LIVE_RETENTION \ +) + +#define RRD_FLAGS_PREVENTING_DELETIONS ( \ + RRD_FLAG_QUEUED_FOR_HUB \ + |RRD_FLAG_COLLECTED \ + |RRD_FLAG_QUEUED_FOR_PP \ +) + +// get all the flags of an object +#define rrd_flags_get(obj) __atomic_load_n(&((obj)->flags), __ATOMIC_SEQ_CST) + +// check if ANY of the given flags (bits) is set +#define rrd_flag_check(obj, flag) (rrd_flags_get(obj) & (flag)) + +// check if ALL the given flags (bits) are set +#define rrd_flag_check_all(obj, flag) (rrd_flag_check(obj, flag) == (flag)) + +// set one or more flags (bits) +#define rrd_flag_set(obj, flag) __atomic_or_fetch(&((obj)->flags), flag, __ATOMIC_SEQ_CST) + +// clear one or more flags (bits) +#define rrd_flag_clear(obj, flag) __atomic_and_fetch(&((obj)->flags), ~(flag), __ATOMIC_SEQ_CST) + +// replace the flags of an object, with the supplied ones +#define rrd_flags_replace(obj, all_flags) __atomic_store_n(&((obj)->flags), all_flags, __ATOMIC_SEQ_CST) + +static inline void +rrd_flag_add_remove_atomic(RRD_FLAGS *flags, RRD_FLAGS check, RRD_FLAGS conditionally_add, RRD_FLAGS always_remove) { + RRD_FLAGS expected, desired; + + do { + expected = *flags; + + desired = expected; + desired &= ~(always_remove); + + if(!(expected & check)) + desired |= (check | conditionally_add); + + } while(!__atomic_compare_exchange_n(flags, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); +} + +#define rrd_flag_set_collected(obj) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_COLLECTED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED | RRD_FLAG_UPDATED \ + \ + /* always remove these flags */ \ + , RRD_FLAG_ARCHIVED \ + | RRD_FLAG_DELETED \ + | RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED \ + | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + | RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ + ) + +#define rrd_flag_set_archived(obj) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_ARCHIVED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED | RRD_FLAG_UPDATED \ + \ + /* always remove these flags */ \ + , RRD_FLAG_COLLECTED \ + | RRD_FLAG_DELETED \ + | RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED \ + | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + ) + +#define rrd_flag_set_deleted(obj, reason) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_DELETED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_ZERO_RETENTION | RRD_FLAG_UPDATED | (reason) \ + \ + /* always remove these flags */ \ + , RRD_FLAG_ARCHIVED \ + | RRD_FLAG_COLLECTED \ + ) + +#define rrd_flag_is_collected(obj) rrd_flag_check(obj, RRD_FLAG_COLLECTED) +#define rrd_flag_is_archived(obj) rrd_flag_check(obj, RRD_FLAG_ARCHIVED) +#define rrd_flag_is_deleted(obj) rrd_flag_check(obj, RRD_FLAG_DELETED) +#define rrd_flag_is_updated(obj) rrd_flag_check(obj, RRD_FLAG_UPDATED) + +// mark an object as updated, providing reasons (additional bits) +#define rrd_flag_set_updated(obj, reason) rrd_flag_set(obj, RRD_FLAG_UPDATED | (reason)) + +// clear an object as being updated, clearing also all the reasons +#define rrd_flag_unset_updated(obj) rrd_flag_clear(obj, RRD_FLAG_UPDATED | RRD_FLAG_ALL_UPDATE_REASONS) + + +static struct rrdcontext_reason { + RRD_FLAGS flag; + const char *name; + usec_t delay_ut; +} rrdcontext_reasons[] = { + // context related + {RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_METADATA, "changed metadata", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC }, + + // not context related + {RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION, "updated retention", 65 * USEC_PER_SEC }, + + // terminator + {0, NULL, 0 }, +}; + + +typedef struct rrdmetric { + uuid_t uuid; + + STRING *id; + STRING *name; + + RRDDIM *rrddim; + + time_t first_time_t; + time_t last_time_t; + RRD_FLAGS flags; + + struct rrdinstance *ri; +} RRDMETRIC; + +typedef struct rrdinstance { + uuid_t uuid; + + STRING *id; + STRING *name; + STRING *title; + STRING *units; + STRING *family; + uint32_t priority; + RRDSET_TYPE chart_type; + + RRD_FLAGS flags; // flags related to this instance + time_t first_time_t; + time_t last_time_t; + + int update_every; // data collection frequency + RRDSET *rrdset; // pointer to RRDSET when collected, or NULL + + DICTIONARY *rrdlabels; // linked to RRDSET->chart_labels or own version + + struct rrdcontext *rc; + DICTIONARY *rrdmetrics; + + struct { + uint32_t collected_metrics_count; // a temporary variable to detect BEGIN/END without SET + // don't use it for other purposes + // it goes up and then resets to zero, on every iteration + } internal; +} RRDINSTANCE; + +typedef struct rrdcontext { + uint64_t version; + + STRING *id; + STRING *title; + STRING *units; + STRING *family; + uint32_t priority; + RRDSET_TYPE chart_type; + + RRD_FLAGS flags; + time_t first_time_t; + time_t last_time_t; + + VERSIONED_CONTEXT_DATA hub; + + DICTIONARY *rrdinstances; + RRDHOST *rrdhost; + + struct { + RRD_FLAGS queued_flags; // the last flags that triggered the post-processing + usec_t queued_ut; // the last time this was queued + usec_t dequeued_ut; // the last time we sent (or deduplicated) this context + size_t executions; // how many times this context has been processed + } pp; + + struct { + RRD_FLAGS queued_flags; // the last flags that triggered the queueing + usec_t queued_ut; // the last time this was queued + usec_t delay_calc_ut; // the last time we calculated the scheduled_dispatched_ut + usec_t scheduled_dispatch_ut; // the time it was/is scheduled to be sent + usec_t dequeued_ut; // the last time we sent (or deduplicated) this context + size_t dispatches; // the number of times this has been dispatched to hub + } queue; + + netdata_mutex_t mutex; +} RRDCONTEXT; + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDMETRIC + +static bool rrdmetric_update_retention(RRDMETRIC *rm); + +static inline RRDMETRIC *rrdmetric_acquired_value(RRDMETRIC_ACQUIRED *rma) { + return dictionary_acquired_item_value((DICTIONARY_ITEM *)rma); +} + +static inline RRDMETRIC_ACQUIRED *rrdmetric_acquired_dup(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return (RRDMETRIC_ACQUIRED *)dictionary_acquired_item_dup(rm->ri->rrdmetrics, (DICTIONARY_ITEM *)rma); +} + +static inline void rrdmetric_release(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + dictionary_acquired_item_release(rm->ri->rrdmetrics, (DICTIONARY_ITEM *)rma); +} + +const char *rrdmetric_acquired_id(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string2str(rm->id); +} + +const char *rrdmetric_acquired_name(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string2str(rm->name); +} + +NETDATA_DOUBLE rrdmetric_acquired_last_stored_value(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + if(rm->rrddim) + return rm->rrddim->last_stored_value; + + return NAN; +} + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDINSTANCE + +static inline RRDINSTANCE *rrdinstance_acquired_value(RRDINSTANCE_ACQUIRED *ria) { + return dictionary_acquired_item_value((DICTIONARY_ITEM *)ria); +} + +static inline RRDINSTANCE_ACQUIRED *rrdinstance_acquired_dup(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return (RRDINSTANCE_ACQUIRED *)dictionary_acquired_item_dup(ri->rc->rrdinstances, (DICTIONARY_ITEM *)ria); +} + +static inline void rrdinstance_release(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + dictionary_acquired_item_release(ri->rc->rrdinstances, (DICTIONARY_ITEM *)ria); +} + +const char *rrdinstance_acquired_id(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->id); +} + +const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->name); +} + +DICTIONARY *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->rrdlabels; +} + +DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + if(!ri->rrdset) return NULL; + return ri->rrdset->functions_view; +} + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDCONTEXT + +static inline RRDCONTEXT *rrdcontext_acquired_value(RRDCONTEXT_ACQUIRED *rca) { + return dictionary_acquired_item_value((DICTIONARY_ITEM *)rca); +} + +const char *rrdcontext_acquired_id(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return string2str(rc->id); +} + +static inline RRDCONTEXT_ACQUIRED *rrdcontext_acquired_dup(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return (RRDCONTEXT_ACQUIRED *)dictionary_acquired_item_dup((DICTIONARY *)rc->rrdhost->rrdctx, (DICTIONARY_ITEM *)rca); +} + +static inline void rrdcontext_release(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + dictionary_acquired_item_release((DICTIONARY *)rc->rrdhost->rrdctx, (DICTIONARY_ITEM *)rca); +} + +static void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs); +static void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, bool worker_jobs); + +#define rrdcontext_version_hash(host) rrdcontext_version_hash_with_callback(host, NULL, false, NULL) +static uint64_t rrdcontext_version_hash_with_callback(RRDHOST *host, void (*callback)(RRDCONTEXT *, bool, void *), bool snapshot, void *bundle); + +static void rrdcontext_garbage_collect_single_host(RRDHOST *host, bool worker_jobs); +static void rrdcontext_garbage_collect_for_all_hosts(void); + +#define rrdcontext_lock(rc) netdata_mutex_lock(&((rc)->mutex)) +#define rrdcontext_unlock(rc) netdata_mutex_unlock(&((rc)->mutex)) + +// ---------------------------------------------------------------------------- +// Forward definitions + +static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc); +static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused); +static void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused); + +static void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc); + +static void rrdcontext_dequeue_from_post_processing(RRDCONTEXT *rc); +static void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function, RRD_FLAGS flags); +static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAGS reason, bool worker_jobs); + +static void rrdmetric_trigger_updates(RRDMETRIC *rm, const char *function); +static void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function); +static void rrdcontext_trigger_updates(RRDCONTEXT *rc, const char *function); + +// ---------------------------------------------------------------------------- +// visualizing flags + +static void rrd_flags_to_buffer(RRD_FLAGS flags, BUFFER *wb) { + if(flags & RRD_FLAG_QUEUED_FOR_HUB) + buffer_strcat(wb, "QUEUED "); + + if(flags & RRD_FLAG_DELETED) + buffer_strcat(wb, "DELETED "); + + if(flags & RRD_FLAG_COLLECTED) + buffer_strcat(wb, "COLLECTED "); + + if(flags & RRD_FLAG_UPDATED) + buffer_strcat(wb, "UPDATED "); + + if(flags & RRD_FLAG_ARCHIVED) + buffer_strcat(wb, "ARCHIVED "); + + if(flags & RRD_FLAG_OWN_LABELS) + buffer_strcat(wb, "OWN_LABELS "); + + if(flags & RRD_FLAG_LIVE_RETENTION) + buffer_strcat(wb, "LIVE_RETENTION "); + + if(flags & RRD_FLAG_HIDDEN) + buffer_strcat(wb, "HIDDEN "); + + if(flags & RRD_FLAG_QUEUED_FOR_PP) + buffer_strcat(wb, "PENDING_UPDATES "); +} + +static void rrd_reasons_to_buffer(RRD_FLAGS flags, BUFFER *wb) { + for(int i = 0, added = 0; rrdcontext_reasons[i].name ; i++) { + if (flags & rrdcontext_reasons[i].flag) { + if (added) + buffer_strcat(wb, ", "); + buffer_strcat(wb, rrdcontext_reasons[i].name); + added++; + } + } +} + +// ---------------------------------------------------------------------------- +// RRDMETRIC + +// free the contents of RRDMETRIC. +// RRDMETRIC itself is managed by DICTIONARY - no need to free it here. +static void rrdmetric_free(RRDMETRIC *rm) { + string_freez(rm->id); + string_freez(rm->name); + + rm->id = NULL; + rm->name = NULL; + rm->ri = NULL; +} + +// called when this rrdmetric is inserted to the rrdmetrics dictionary of a rrdinstance +// the constructor of the rrdmetric object +static void rrdmetric_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance) { + RRDMETRIC *rm = value; + + // link it to its parent + rm->ri = rrdinstance; + + // remove flags that we need to figure out at runtime + rm->flags = rm->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics + + // signal the react callback to do the job + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); +} + +// called when this rrdmetric is deleted from the rrdmetrics dictionary of a rrdinstance +// the destructor of the rrdmetric object +static void rrdmetric_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = value; + + internal_error(rm->rrddim, "RRDMETRIC: '%s' is freed but there is a RRDDIM linked to it.", string2str(rm->id)); + + // free the resources + rrdmetric_free(rm); +} + +// called when the same rrdmetric is inserted again to the rrdmetrics dictionary of a rrdinstance +// while this is called, the dictionary is write locked, but there may be other users of the object +static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = old_value; + RRDMETRIC *rm_new = new_value; + + internal_error(rm->id != rm_new->id, + "RRDMETRIC: '%s' cannot change id to '%s'", + string2str(rm->id), string2str(rm_new->id)); + + if(uuid_compare(rm->uuid, rm_new->uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(rm->uuid, uuid1); + uuid_unparse(rm_new->uuid, uuid2); + + time_t old_first_time_t = 0; + time_t old_last_time_t = 0; + if(rrdmetric_update_retention(rm)) { + old_first_time_t = rm->first_time_t; + old_last_time_t = rm->last_time_t; + } + + uuid_copy(rm->uuid, rm_new->uuid); + + time_t new_first_time_t = 0; + time_t new_last_time_t = 0; + if(rrdmetric_update_retention(rm)) { + new_first_time_t = rm->first_time_t; + new_last_time_t = rm->last_time_t; + } + + internal_error(true, + "RRDMETRIC: '%s' of instance '%s' of host '%s' changed UUID from '%s' (retention %ld to %ld, %ld secs) to '%s' (retention %ld to %ld, %ld secs)" + , string2str(rm->id) + , string2str(rm->ri->id) + , rrdhost_hostname(rm->ri->rc->rrdhost) + , uuid1, old_first_time_t, old_last_time_t, old_last_time_t - old_first_time_t + , uuid2, new_first_time_t, new_last_time_t, new_last_time_t - new_first_time_t + ); +#else + uuid_copy(rm->uuid, rm_new->uuid); +#endif + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rm->rrddim && rm_new->rrddim && rm->rrddim != rm_new->rrddim) { + rm->rrddim = rm_new->rrddim; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(rm->rrddim && uuid_compare(rm->uuid, rm->rrddim->metric_uuid) != 0) { + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(rm->uuid, uuid1); + uuid_unparse(rm_new->uuid, uuid2); + internal_error(true, "RRDMETRIC: '%s' is linked to RRDDIM '%s' but they have different UUIDs. RRDMETRIC has '%s', RRDDIM has '%s'", string2str(rm->id), rrddim_id(rm->rrddim), uuid1, uuid2); + } +#endif + + if(rm->rrddim != rm_new->rrddim) + rm->rrddim = rm_new->rrddim; + + if(rm->name != rm_new->name) { + STRING *old = rm->name; + rm->name = string_dup(rm_new->name); + string_freez(old); + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(!rm->first_time_t || (rm_new->first_time_t && rm_new->first_time_t < rm->first_time_t)) { + rm->first_time_t = rm_new->first_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(!rm->last_time_t || (rm_new->last_time_t && rm_new->last_time_t > rm->last_time_t)) { + rm->last_time_t = rm_new->last_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set(rm, rm_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no needs for atomics on rm_new + + if(rrd_flag_is_collected(rm) && rrd_flag_is_archived(rm)) + rrd_flag_set_collected(rm); + + if(rrd_flag_check(rm, RRD_FLAG_UPDATED)) + rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); + + rrdmetric_free(rm_new); + + // the react callback will continue from here + return rrd_flag_is_updated(rm); +} + +// this is called after the insert or the conflict callbacks, +// but the dictionary is now unlocked +static void rrdmetric_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = value; + rrdmetric_trigger_updates(rm, __FUNCTION__ ); +} + +static void rrdmetrics_create_in_rrdinstance(RRDINSTANCE *ri) { + if(unlikely(!ri)) return; + if(likely(ri->rrdmetrics)) return; + + ri->rrdmetrics = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(ri->rrdmetrics, rrdmetric_insert_callback, ri); + dictionary_register_delete_callback(ri->rrdmetrics, rrdmetric_delete_callback, ri); + dictionary_register_conflict_callback(ri->rrdmetrics, rrdmetric_conflict_callback, ri); + dictionary_register_react_callback(ri->rrdmetrics, rrdmetric_react_callback, ri); +} + +static void rrdmetrics_destroy_from_rrdinstance(RRDINSTANCE *ri) { + if(unlikely(!ri || !ri->rrdmetrics)) return; + dictionary_destroy(ri->rrdmetrics); + ri->rrdmetrics = NULL; +} + +// trigger post-processing of the rrdmetric, escalating changes to the rrdinstance it belongs +static void rrdmetric_trigger_updates(RRDMETRIC *rm, const char *function) { + if(unlikely(rrd_flag_is_collected(rm)) && (!rm->rrddim || rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD))) + rrd_flag_set_archived(rm); + + if(rrd_flag_is_updated(rm) || !rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION)) { + rrd_flag_set_updated(rm->ri, RRD_FLAG_UPDATE_REASON_TRIGGERED); + rrdcontext_queue_for_post_processing(rm->ri->rc, function, rm->flags); + } +} + +// ---------------------------------------------------------------------------- +// RRDMETRIC HOOKS ON RRDDIM + +static inline void rrdmetric_from_rrddim(RRDDIM *rd) { + if(unlikely(!rd->rrdset)) + fatal("RRDMETRIC: rrddim '%s' does not have a rrdset.", rrddim_id(rd)); + + if(unlikely(!rd->rrdset->rrdhost)) + fatal("RRDMETRIC: rrdset '%s' does not have a rrdhost", rrdset_id(rd->rrdset)); + + if(unlikely(!rd->rrdset->rrdinstance)) + fatal("RRDMETRIC: rrdset '%s' does not have a rrdinstance", rrdset_id(rd->rrdset)); + + RRDINSTANCE *ri = rrdinstance_acquired_value(rd->rrdset->rrdinstance); + + RRDMETRIC trm = { + .id = string_dup(rd->id), + .name = string_dup(rd->name), + .flags = RRD_FLAG_NONE, // no need for atomics + .rrddim = rd, + }; + uuid_copy(trm.uuid, rd->metric_uuid); + + RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)dictionary_set_and_acquire_item(ri->rrdmetrics, string2str(trm.id), &trm, sizeof(trm)); + + if(rd->rrdmetric) + rrdmetric_release(rd->rrdmetric); + + rd->rrdmetric = rma; +} + +#define rrddim_get_rrdmetric(rd) rrddim_get_rrdmetric_with_trace(rd, __FUNCTION__) +static inline RRDMETRIC *rrddim_get_rrdmetric_with_trace(RRDDIM *rd, const char *function) { + if(unlikely(!rd->rrdmetric)) { + error("RRDMETRIC: RRDDIM '%s' is not linked to an RRDMETRIC at %s()", rrddim_id(rd), function); + return NULL; + } + + RRDMETRIC *rm = rrdmetric_acquired_value(rd->rrdmetric); + if(unlikely(!rm)) { + error("RRDMETRIC: RRDDIM '%s' lost the link to its RRDMETRIC at %s()", rrddim_id(rd), function); + return NULL; + } + + if(unlikely(rm->rrddim != rd)) + fatal("RRDMETRIC: '%s' is not linked to RRDDIM '%s' at %s()", string2str(rm->id), rrddim_id(rd), function); + + return rm; +} + +static inline void rrdmetric_rrddim_is_freed(RRDDIM *rd) { + RRDMETRIC *rm = rrddim_get_rrdmetric(rd); + if(unlikely(!rm)) return; + + if(unlikely(rrd_flag_is_collected(rm))) + rrd_flag_set_archived(rm); + + rm->rrddim = NULL; + rrdmetric_trigger_updates(rm, __FUNCTION__ ); + rrdmetric_release(rd->rrdmetric); + rd->rrdmetric = NULL; +} + +static inline void rrdmetric_updated_rrddim_flags(RRDDIM *rd) { + RRDMETRIC *rm = rrddim_get_rrdmetric(rd); + if(unlikely(!rm)) return; + + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED|RRDDIM_FLAG_OBSOLETE))) { + if(unlikely(rrd_flag_is_collected(rm))) + rrd_flag_set_archived(rm); + } + + rrdmetric_trigger_updates(rm, __FUNCTION__ ); +} + +static inline void rrdmetric_collected_rrddim(RRDDIM *rd) { + RRDMETRIC *rm = rrddim_get_rrdmetric(rd); + if(unlikely(!rm)) return; + + if(unlikely(!rrd_flag_is_collected(rm))) + rrd_flag_set_collected(rm); + + // we use this variable to detect BEGIN/END without SET + rm->ri->internal.collected_metrics_count++; + + rrdmetric_trigger_updates(rm, __FUNCTION__ ); +} + +// ---------------------------------------------------------------------------- +// RRDINSTANCE + +static void rrdinstance_free(RRDINSTANCE *ri) { + + if(rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) + dictionary_destroy(ri->rrdlabels); + + rrdmetrics_destroy_from_rrdinstance(ri); + string_freez(ri->id); + string_freez(ri->name); + string_freez(ri->title); + string_freez(ri->units); + string_freez(ri->family); + + ri->id = NULL; + ri->name = NULL; + ri->title = NULL; + ri->units = NULL; + ri->family = NULL; + ri->rc = NULL; + ri->rrdlabels = NULL; + ri->rrdmetrics = NULL; + ri->rrdset = NULL; +} + +static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext) { + static STRING *ml_anomaly_rates_id = NULL; + + if(unlikely(!ml_anomaly_rates_id)) + ml_anomaly_rates_id = string_strdupz(ML_ANOMALY_RATES_CHART_ID); + + RRDINSTANCE *ri = value; + + // link it to its parent + ri->rc = rrdcontext; + + ri->flags = ri->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics + + if(!ri->name) + ri->name = string_dup(ri->id); + + if(ri->rrdset) { + ri->rrdlabels = ri->rrdset->rrdlabels; + ri->flags &= ~RRD_FLAG_OWN_LABELS; // no need of atomics at the constructor + } + else { + ri->rrdlabels = rrdlabels_create(); + ri->flags |= RRD_FLAG_OWN_LABELS; // no need of atomics at the constructor + } + + if(ri->rrdset) { + if(unlikely(rrdset_flag_check(ri->rrdset, RRDSET_FLAG_HIDDEN))) + ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor + else + ri->flags &= ~RRD_FLAG_HIDDEN; // no need of atomics at the constructor + } + + // we need this when loading from SQL + if(unlikely(ri->id == ml_anomaly_rates_id)) + ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor + + rrdmetrics_create_in_rrdinstance(ri); + + // signal the react callback to do the job + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); +} + +static void rrdinstance_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = (RRDINSTANCE *)value; + + internal_error(ri->rrdset, "RRDINSTANCE: '%s' is freed but there is a RRDSET linked to it.", string2str(ri->id)); + + rrdinstance_free(ri); +} + +static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = (RRDINSTANCE *)old_value; + RRDINSTANCE *ri_new = (RRDINSTANCE *)new_value; + + internal_error(ri->id != ri_new->id, + "RRDINSTANCE: '%s' cannot change id to '%s'", + string2str(ri->id), string2str(ri_new->id)); + + if(uuid_compare(ri->uuid, ri_new->uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid1); + uuid_unparse(ri_new->uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' of host '%s' changed UUID from '%s' to '%s'", + string2str(ri->id), rrdhost_hostname(ri->rc->rrdhost), uuid1, uuid2); +#endif + + uuid_copy(ri->uuid, ri_new->uuid); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->rrdset && ri_new->rrdset && ri->rrdset != ri_new->rrdset) { + ri->rrdset = ri_new->rrdset; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(ri->rrdset && uuid_compare(ri->uuid, ri->rrdset->chart_uuid) != 0) { + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid1); + uuid_unparse(ri->rrdset->chart_uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' is linked to RRDSET '%s' but they have different UUIDs. RRDINSTANCE has '%s', RRDSET has '%s'", string2str(ri->id), rrdset_id(ri->rrdset), uuid1, uuid2); + } +#endif + + if(ri->name != ri_new->name) { + STRING *old = ri->name; + ri->name = string_dup(ri_new->name); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->title != ri_new->title) { + STRING *old = ri->title; + ri->title = string_dup(ri_new->title); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->units != ri_new->units) { + STRING *old = ri->units; + ri->units = string_dup(ri_new->units); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->family != ri_new->family) { + STRING *old = ri->family; + ri->family = string_dup(ri_new->family); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->chart_type != ri_new->chart_type) { + ri->chart_type = ri_new->chart_type; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->priority != ri_new->priority) { + ri->priority = ri_new->priority; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->update_every != ri_new->update_every) { + ri->update_every = ri_new->update_every; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->rrdset != ri_new->rrdset) { + ri->rrdset = ri_new->rrdset; + + if(ri->rrdset && rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { + DICTIONARY *old = ri->rrdlabels; + ri->rrdlabels = ri->rrdset->rrdlabels; + rrd_flag_clear(ri, RRD_FLAG_OWN_LABELS); + rrdlabels_destroy(old); + } + else if(!ri->rrdset && !rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { + ri->rrdlabels = rrdlabels_create(); + rrd_flag_set(ri, RRD_FLAG_OWN_LABELS); + } + } + + if(ri->rrdset) { + if(unlikely(rrdset_flag_check(ri->rrdset, RRDSET_FLAG_HIDDEN))) + rrd_flag_set(ri, RRD_FLAG_HIDDEN); + else + rrd_flag_clear(ri, RRD_FLAG_HIDDEN); + } + + rrd_flag_set(ri, ri_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on ri_new + + if(rrd_flag_is_collected(ri) && rrd_flag_is_archived(ri)) + rrd_flag_set_collected(ri); + + if(rrd_flag_is_updated(ri)) + rrd_flag_set(ri, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); + + // free the new one + rrdinstance_free(ri_new); + + // the react callback will continue from here + return rrd_flag_is_updated(ri); +} + +static void rrdinstance_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = value; + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +void rrdinstances_create_in_rrdcontext(RRDCONTEXT *rc) { + if(unlikely(!rc || rc->rrdinstances)) return; + + rc->rrdinstances = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(rc->rrdinstances, rrdinstance_insert_callback, rc); + dictionary_register_delete_callback(rc->rrdinstances, rrdinstance_delete_callback, rc); + dictionary_register_conflict_callback(rc->rrdinstances, rrdinstance_conflict_callback, rc); + dictionary_register_react_callback(rc->rrdinstances, rrdinstance_react_callback, rc); +} + +void rrdinstances_destroy_from_rrdcontext(RRDCONTEXT *rc) { + if(unlikely(!rc || !rc->rrdinstances)) return; + + dictionary_destroy(rc->rrdinstances); + rc->rrdinstances = NULL; +} + +static void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function) { + RRDSET *st = ri->rrdset; + + if(likely(st)) { + if(unlikely((unsigned int) st->priority != ri->priority)) { + ri->priority = st->priority; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + if(unlikely(st->update_every != ri->update_every)) { + ri->update_every = st->update_every; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } + else if(unlikely(rrd_flag_is_collected(ri))) { + // there is no rrdset, but we have it as collected! + + rrd_flag_set_archived(ri); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); + } + + if(rrd_flag_is_updated(ri) || !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION)) { + rrd_flag_set_updated(ri->rc, RRD_FLAG_UPDATE_REASON_TRIGGERED); + rrdcontext_queue_for_post_processing(ri->rc, function, ri->flags); + } +} + +// ---------------------------------------------------------------------------- +// RRDINSTANCE HOOKS ON RRDSET + +static inline void rrdinstance_from_rrdset(RRDSET *st) { + RRDCONTEXT trc = { + .id = string_dup(st->context), + .title = string_dup(st->title), + .units = string_dup(st->units), + .family = string_dup(st->family), + .priority = st->priority, + .chart_type = st->chart_type, + .flags = RRD_FLAG_NONE, // no need for atomics + .rrdhost = st->rrdhost, + }; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_set_and_acquire_item((DICTIONARY *)st->rrdhost->rrdctx, string2str(trc.id), &trc, sizeof(trc)); + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE tri = { + .id = string_dup(st->id), + .name = string_dup(st->name), + .units = string_dup(st->units), + .family = string_dup(st->family), + .title = string_dup(st->title), + .chart_type = st->chart_type, + .priority = st->priority, + .update_every = st->update_every, + .flags = RRD_FLAG_NONE, // no need for atomics + .rrdset = st, + }; + uuid_copy(tri.uuid, st->chart_uuid); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_set_and_acquire_item(rc->rrdinstances, string2str(tri.id), &tri, sizeof(tri)); + + RRDCONTEXT_ACQUIRED *rca_old = st->rrdcontext; + RRDINSTANCE_ACQUIRED *ria_old = st->rrdinstance; + + st->rrdcontext = rca; + st->rrdinstance = ria; + + if(rca == rca_old) { + rrdcontext_release(rca_old); + rca_old = NULL; + } + + if(ria == ria_old) { + rrdinstance_release(ria_old); + ria_old = NULL; + } + + if(rca_old && ria_old) { + // Oops! The chart changed context! + + // RRDCONTEXT *rc_old = rrdcontext_acquired_value(rca_old); + RRDINSTANCE *ri_old = rrdinstance_acquired_value(ria_old); + + // migrate all dimensions to the new metrics + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (!rd->rrdmetric) continue; + + RRDMETRIC *rm_old = rrdmetric_acquired_value(rd->rrdmetric); + rrd_flags_replace(rm_old, RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + rm_old->rrddim = NULL; + rm_old->first_time_t = 0; + rm_old->last_time_t = 0; + + rrdmetric_release(rd->rrdmetric); + rd->rrdmetric = NULL; + + rrdmetric_from_rrddim(rd); + } + rrddim_foreach_done(rd); + + // mark the old instance, ready to be deleted + if(!rrd_flag_check(ri_old, RRD_FLAG_OWN_LABELS)) + ri_old->rrdlabels = rrdlabels_create(); + + rrd_flags_replace(ri_old, RRD_FLAG_OWN_LABELS|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + ri_old->rrdset = NULL; + ri_old->first_time_t = 0; + ri_old->last_time_t = 0; + + rrdinstance_trigger_updates(ri_old, __FUNCTION__ ); + rrdinstance_release(ria_old); + + /* + // trigger updates on the old context + if(!dictionary_entries(rc_old->rrdinstances) && !dictionary_stats_referenced_items(rc_old->rrdinstances)) { + rrdcontext_lock(rc_old); + rc_old->flags = ((rc_old->flags & RRD_FLAG_QUEUED)?RRD_FLAG_QUEUED:RRD_FLAG_NONE)|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; + rc_old->first_time_t = 0; + rc_old->last_time_t = 0; + rrdcontext_unlock(rc_old); + rrdcontext_trigger_updates(rc_old, __FUNCTION__ ); + } + else + rrdcontext_trigger_updates(rc_old, __FUNCTION__ ); + */ + + rrdcontext_release(rca_old); + rca_old = NULL; + ria_old = NULL; + } + + if(rca_old || ria_old) + fatal("RRDCONTEXT: cannot switch rrdcontext without switching rrdinstance too"); +} + +#define rrdset_get_rrdinstance(st) rrdset_get_rrdinstance_with_trace(st, __FUNCTION__); +static inline RRDINSTANCE *rrdset_get_rrdinstance_with_trace(RRDSET *st, const char *function) { + if(unlikely(!st->rrdinstance)) { + error("RRDINSTANCE: RRDSET '%s' is not linked to an RRDINSTANCE at %s()", rrdset_id(st), function); + return NULL; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(st->rrdinstance); + if(unlikely(!ri)) { + error("RRDINSTANCE: RRDSET '%s' lost its link to an RRDINSTANCE at %s()", rrdset_id(st), function); + return NULL; + } + + if(unlikely(ri->rrdset != st)) + fatal("RRDINSTANCE: '%s' is not linked to RRDSET '%s' at %s()", string2str(ri->id), rrdset_id(st), function); + + return ri; +} + +static inline void rrdinstance_rrdset_is_freed(RRDSET *st) { + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + rrd_flag_set_archived(ri); + + if(!rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { + ri->rrdlabels = rrdlabels_create(); + rrdlabels_copy(ri->rrdlabels, st->rrdlabels); + rrd_flag_set(ri, RRD_FLAG_OWN_LABELS); + } + + ri->rrdset = NULL; + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + + rrdinstance_release(st->rrdinstance); + st->rrdinstance = NULL; + + rrdcontext_release(st->rrdcontext); + st->rrdcontext = NULL; +} + +static inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) { + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +static inline void rrdinstance_updated_rrdset_name(RRDSET *st) { + // the chart may not be initialized when this is called + if(unlikely(!st->rrdinstance)) return; + + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + if(st->name != ri->name) { + STRING *old = ri->name; + ri->name = string_dup(st->name); + string_freez(old); + + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + } +} + +static inline void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, RRDSET *st) { + if(unlikely(ri->rrdset != st)) + fatal("RRDCONTEXT: instance '%s' is not linked to chart '%s' on host '%s'", + string2str(ri->id), rrdset_id(st), rrdhost_hostname(st->rrdhost)); + + bool st_is_hidden = rrdset_flag_check(st, RRDSET_FLAG_HIDDEN); + bool ri_is_hidden = rrd_flag_check(ri, RRD_FLAG_HIDDEN); + + if(unlikely(st_is_hidden != ri_is_hidden)) { + if (unlikely(st_is_hidden && !ri_is_hidden)) + rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + + else if (unlikely(!st_is_hidden && ri_is_hidden)) { + rrd_flag_clear(ri, RRD_FLAG_HIDDEN); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } +} + +static inline void rrdinstance_updated_rrdset_flags(RRDSET *st) { + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED|RRDSET_FLAG_OBSOLETE))) + rrd_flag_set_archived(ri); + + rrdinstance_updated_rrdset_flags_no_action(ri, st); + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +static inline void rrdinstance_collected_rrdset(RRDSET *st) { + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + rrdinstance_updated_rrdset_flags_no_action(ri, st); + + if(unlikely(ri->internal.collected_metrics_count && !rrd_flag_is_collected(ri))) + rrd_flag_set_collected(ri); + + // we use this variable to detect BEGIN/END without SET + ri->internal.collected_metrics_count = 0; + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +// ---------------------------------------------------------------------------- +// RRDCONTEXT + +static void rrdcontext_freez(RRDCONTEXT *rc) { + string_freez(rc->id); + string_freez(rc->title); + string_freez(rc->units); + string_freez(rc->family); +} + +static void rrdcontext_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost) { + RRDHOST *host = (RRDHOST *)rrdhost; + RRDCONTEXT *rc = (RRDCONTEXT *)value; + + rc->rrdhost = host; + rc->flags = rc->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics at constructor + + if(rc->hub.version) { + // we are loading data from the SQL database + + if(rc->version) + error("RRDCONTEXT: context '%s' is already initialized with version %"PRIu64", but it is loaded again from SQL with version %"PRIu64"", string2str(rc->id), rc->version, rc->hub.version); + + // IMPORTANT + // replace all string pointers in rc->hub with our own versions + // the originals are coming from a tmp allocation of sqlite + + string_freez(rc->id); + rc->id = string_strdupz(rc->hub.id); + rc->hub.id = string2str(rc->id); + + string_freez(rc->title); + rc->title = string_strdupz(rc->hub.title); + rc->hub.title = string2str(rc->title); + + string_freez(rc->units); + rc->units = string_strdupz(rc->hub.units); + rc->hub.units = string2str(rc->units); + + string_freez(rc->family); + rc->family = string_strdupz(rc->hub.family); + rc->hub.family = string2str(rc->family); + + rc->chart_type = rrdset_type_id(rc->hub.chart_type); + rc->hub.chart_type = rrdset_type_name(rc->chart_type); + + rc->version = rc->hub.version; + rc->priority = rc->hub.priority; + rc->first_time_t = (time_t)rc->hub.first_time_t; + rc->last_time_t = (time_t)rc->hub.last_time_t; + + if(rc->hub.deleted || !rc->hub.first_time_t) + rrd_flag_set_deleted(rc, RRD_FLAG_NONE); + else { + if (rc->last_time_t == 0) + rrd_flag_set_collected(rc); + else + rrd_flag_set_archived(rc); + } + + rc->flags |= RRD_FLAG_UPDATE_REASON_LOAD_SQL; // no need for atomics at constructor + } + else { + // we are adding this context now for the first time + rc->version = now_realtime_sec(); + } + + rrdinstances_create_in_rrdcontext(rc); + netdata_mutex_init(&rc->mutex); + + // signal the react callback to do the job + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); +} + +static void rrdcontext_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost __maybe_unused) { + + RRDCONTEXT *rc = (RRDCONTEXT *)value; + + rrdinstances_destroy_from_rrdcontext(rc); + netdata_mutex_destroy(&rc->mutex); + rrdcontext_freez(rc); +} + +static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdhost __maybe_unused) { + RRDCONTEXT *rc = (RRDCONTEXT *)old_value; + RRDCONTEXT *rc_new = (RRDCONTEXT *)new_value; + + //current rc is not archived, new_rc is archived, don't merge + if (!rrd_flag_is_archived(rc) && rrd_flag_is_archived(rc_new)) { + rrdcontext_freez(rc_new); + return false; + } + + rrdcontext_lock(rc); + + if(rc->title != rc_new->title) { + STRING *old_title = rc->title; + if (rrd_flag_is_archived(rc) && !rrd_flag_is_archived(rc_new)) + rc->title = string_dup(rc_new->title); + else + rc->title = string_2way_merge(rc->title, rc_new->title); + string_freez(old_title); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rc->units != rc_new->units) { + STRING *old_units = rc->units; + rc->units = string_dup(rc_new->units); + string_freez(old_units); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rc->family != rc_new->family) { + STRING *old_family = rc->family; + if (rrd_flag_is_archived(rc) && !rrd_flag_is_archived(rc_new)) + rc->family = string_dup(rc_new->family); + else + rc->family = string_2way_merge(rc->family, rc_new->family); + string_freez(old_family); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rc->chart_type != rc_new->chart_type) { + rc->chart_type = rc_new->chart_type; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rc->priority != rc_new->priority) { + rc->priority = rc_new->priority; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + rrd_flag_set(rc, rc_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on rc_new + + if(rrd_flag_is_collected(rc) && rrd_flag_is_archived(rc)) + rrd_flag_set_collected(rc); + + if(rrd_flag_is_updated(rc)) + rrd_flag_set(rc, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); + + rrdcontext_unlock(rc); + + // free the resources of the new one + rrdcontext_freez(rc_new); + + // the react callback will continue from here + return rrd_flag_is_updated(rc); +} + +static void rrdcontext_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost __maybe_unused) { + RRDCONTEXT *rc = (RRDCONTEXT *)value; + rrdcontext_trigger_updates(rc, __FUNCTION__ ); +} + +static void rrdcontext_trigger_updates(RRDCONTEXT *rc, const char *function) { + if(rrd_flag_is_updated(rc) || !rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrdcontext_queue_for_post_processing(rc, function, rc->flags); +} + +static void rrdcontext_hub_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_HUB); + rc->queue.queued_ut = now_realtime_usec(); + rc->queue.queued_flags = rrd_flags_get(rc); +} + +static void rrdcontext_hub_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_HUB); +} + +static bool rrdcontext_hub_queue_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *new_context __maybe_unused, void *nothing __maybe_unused) { + // context and new_context are the same + // we just need to update the timings + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_HUB); + rc->queue.queued_ut = now_realtime_usec(); + rc->queue.queued_flags |= rrd_flags_get(rc); + + return true; +} + +static void rrdcontext_post_processing_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); + rc->pp.queued_flags = rc->flags; + rc->pp.queued_ut = now_realtime_usec(); +} + +static void rrdcontext_post_processing_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_PP); + rc->pp.dequeued_ut = now_realtime_usec(); +} + +static bool rrdcontext_post_processing_queue_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *new_context __maybe_unused, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + bool changed = false; + + if(!(rc->flags & RRD_FLAG_QUEUED_FOR_PP)) { + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); + changed = true; + } + + if(rc->pp.queued_flags != rc->flags) { + rc->pp.queued_flags |= rc->flags; + changed = true; + } + + return changed; +} + +void rrdhost_create_rrdcontexts(RRDHOST *host) { + if(unlikely(!host)) return; + if(likely(host->rrdctx)) return; + + host->rrdctx = (RRDCONTEXTS *)dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback((DICTIONARY *)host->rrdctx, rrdcontext_insert_callback, host); + dictionary_register_delete_callback((DICTIONARY *)host->rrdctx, rrdcontext_delete_callback, host); + dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx, rrdcontext_conflict_callback, host); + dictionary_register_react_callback((DICTIONARY *)host->rrdctx, rrdcontext_react_callback, host); + + host->rrdctx_hub_queue = (RRDCONTEXTS *)dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_VALUE_LINK_DONT_CLONE); + dictionary_register_insert_callback((DICTIONARY *)host->rrdctx_hub_queue, rrdcontext_hub_queue_insert_callback, NULL); + dictionary_register_delete_callback((DICTIONARY *)host->rrdctx_hub_queue, rrdcontext_hub_queue_delete_callback, NULL); + dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx_hub_queue, rrdcontext_hub_queue_conflict_callback, NULL); + + host->rrdctx_post_processing_queue = (RRDCONTEXTS *)dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_VALUE_LINK_DONT_CLONE); + dictionary_register_insert_callback((DICTIONARY *)host->rrdctx_post_processing_queue, rrdcontext_post_processing_queue_insert_callback, NULL); + dictionary_register_delete_callback((DICTIONARY *)host->rrdctx_post_processing_queue, rrdcontext_post_processing_queue_delete_callback, NULL); + dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx_post_processing_queue, rrdcontext_post_processing_queue_conflict_callback, NULL); +} + +void rrdhost_destroy_rrdcontexts(RRDHOST *host) { + if(unlikely(!host)) return; + if(unlikely(!host->rrdctx)) return; + + DICTIONARY *old; + + if(host->rrdctx_hub_queue) { + old = (DICTIONARY *)host->rrdctx_hub_queue; + host->rrdctx_hub_queue = NULL; + + RRDCONTEXT *rc; + dfe_start_write(old, rc) { + dictionary_del(old, string2str(rc->id)); + } + dfe_done(rc); + dictionary_destroy(old); + } + + if(host->rrdctx_post_processing_queue) { + old = (DICTIONARY *)host->rrdctx_post_processing_queue; + host->rrdctx_post_processing_queue = NULL; + + RRDCONTEXT *rc; + dfe_start_write(old, rc) { + dictionary_del(old, string2str(rc->id)); + } + dfe_done(rc); + dictionary_destroy(old); + } + + old = (DICTIONARY *)host->rrdctx; + host->rrdctx = NULL; + dictionary_destroy(old); +} + +// ---------------------------------------------------------------------------- +// public API + +void rrdcontext_updated_rrddim(RRDDIM *rd) { + rrdmetric_from_rrddim(rd); +} + +void rrdcontext_removed_rrddim(RRDDIM *rd) { + rrdmetric_rrddim_is_freed(rd); +} + +void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_updated_rrddim_divisor(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_updated_rrddim_flags(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_collected_rrddim(RRDDIM *rd) { + rrdmetric_collected_rrddim(rd); +} + +void rrdcontext_updated_rrdset(RRDSET *st) { + rrdinstance_from_rrdset(st); +} + +void rrdcontext_removed_rrdset(RRDSET *st) { + rrdinstance_rrdset_is_freed(st); +} + +void rrdcontext_updated_retention_rrdset(RRDSET *st) { + rrdinstance_rrdset_has_updated_retention(st); +} + +void rrdcontext_updated_rrdset_name(RRDSET *st) { + rrdinstance_updated_rrdset_name(st); +} + +void rrdcontext_updated_rrdset_flags(RRDSET *st) { + rrdinstance_updated_rrdset_flags(st); +} + +void rrdcontext_collected_rrdset(RRDSET *st) { + rrdinstance_collected_rrdset(st); +} + +void rrdcontext_host_child_connected(RRDHOST *host) { + (void)host; + + // no need to do anything here + ; +} + +int rrdcontext_find_dimension_uuid(RRDSET *st, const char *id, uuid_t *store_uuid) { + if(!st->rrdhost) return 1; + if(!st->context) return 2; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)st->rrdhost->rrdctx, string2str(st->context)); + if(!rca) return 3; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_get_and_acquire_item(rc->rrdinstances, string2str(st->id)); + if(!ria) { + rrdcontext_release(rca); + return 4; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + + RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)dictionary_get_and_acquire_item(ri->rrdmetrics, id); + if(!rma) { + rrdinstance_release(ria); + rrdcontext_release(rca); + return 5; + } + + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + uuid_copy(*store_uuid, rm->uuid); + + rrdmetric_release(rma); + rrdinstance_release(ria); + rrdcontext_release(rca); + return 0; +} + +int rrdcontext_find_chart_uuid(RRDSET *st, uuid_t *store_uuid) { + if(!st->rrdhost) return 1; + if(!st->context) return 2; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)st->rrdhost->rrdctx, string2str(st->context)); + if(!rca) return 3; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_get_and_acquire_item(rc->rrdinstances, string2str(st->id)); + if(!ria) { + rrdcontext_release(rca); + return 4; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + uuid_copy(*store_uuid, ri->uuid); + + rrdinstance_release(ria); + rrdcontext_release(rca); + return 0; +} + +void rrdcontext_host_child_disconnected(RRDHOST *host) { + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, false); +} + +static usec_t rrdcontext_next_db_rotation_ut = 0; +void rrdcontext_db_rotation(void) { + // called when the db rotates its database + rrdcontext_next_db_rotation_ut = now_realtime_usec() + FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS * USEC_PER_SEC; +} + +int rrdcontext_foreach_instance_with_rrdset_in_context(RRDHOST *host, const char *context, int (*callback)(RRDSET *st, void *data), void *data) { + if(unlikely(!host || !context || !*context || !callback)) + return -1; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)host->rrdctx, context); + if(unlikely(!rca)) return -1; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(unlikely(!rc)) return -1; + + int ret = 0; + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(ri->rrdset) { + int r = callback(ri->rrdset, data); + if(r >= 0) ret += r; + else { + ret = r; + break; + } + } + } + dfe_done(ri); + + rrdcontext_release(rca); + + return ret; +} + +// ---------------------------------------------------------------------------- +// ACLK interface + +static bool rrdhost_check_our_claim_id(const char *claim_id) { + if(!localhost->aclk_state.claimed_id) return false; + return (strcasecmp(claim_id, localhost->aclk_state.claimed_id) == 0) ? true : false; +} + +static RRDHOST *rrdhost_find_by_node_id(const char *node_id) { + uuid_t uuid; + if (uuid_parse(node_id, uuid)) + return NULL; + + RRDHOST *host = NULL; + + rrd_rdlock(); + rrdhost_foreach_read(host) { + if(!host->node_id) continue; + + if(uuid_compare(uuid, *host->node_id) == 0) + break; + } + rrd_unlock(); + + return host; +} + +void rrdcontext_hub_checkpoint_command(void *ptr) { + struct ctxs_checkpoint *cmd = ptr; + + if(!rrdhost_check_our_claim_id(cmd->claim_id)) { + error("RRDCONTEXT: received checkpoint command for claim_id '%s', node id '%s', but this is not our claim id. Ours '%s', received '%s'. Ignoring command.", + cmd->claim_id, cmd->node_id, + localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET", + cmd->claim_id); + + return; + } + + RRDHOST *host = rrdhost_find_by_node_id(cmd->node_id); + if(!host) { + error("RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', but there is no node with such node id here. Ignoring command.", + cmd->claim_id, cmd->node_id); + + return; + } + + if(rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { + info("RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', while node '%s' has an active context streaming.", + cmd->claim_id, cmd->node_id, rrdhost_hostname(host)); + + // disable it temporarily, so that our worker will not attempt to send messages in parallel + rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); + } + + uint64_t our_version_hash = rrdcontext_version_hash(host); + + if(cmd->version_hash != our_version_hash) { + error("RRDCONTEXT: received version hash %"PRIu64" for host '%s', does not match our version hash %"PRIu64". Sending snapshot of all contexts.", + cmd->version_hash, rrdhost_hostname(host), our_version_hash); + +#ifdef ENABLE_ACLK + // prepare the snapshot + char uuid[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, uuid); + contexts_snapshot_t bundle = contexts_snapshot_new(cmd->claim_id, uuid, our_version_hash); + + // do a deep scan on every metric of the host to make sure all our data are updated + rrdcontext_recalculate_host_retention(host, RRD_FLAG_NONE, false); + + // calculate version hash and pack all the messages together in one go + our_version_hash = rrdcontext_version_hash_with_callback(host, rrdcontext_message_send_unsafe, true, bundle); + + // update the version + contexts_snapshot_set_version(bundle, our_version_hash); + + // send it + aclk_send_contexts_snapshot(bundle); +#endif + } + + internal_error(true, "RRDCONTEXT: host '%s' enabling streaming of contexts", rrdhost_hostname(host)); + rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); + char node_str[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, node_str); + log_access("ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED", node_str, rrdhost_hostname(host)); +} + +void rrdcontext_hub_stop_streaming_command(void *ptr) { + struct stop_streaming_ctxs *cmd = ptr; + + if(!rrdhost_check_our_claim_id(cmd->claim_id)) { + error("RRDCONTEXT: received stop streaming command for claim_id '%s', node id '%s', but this is not our claim id. Ours '%s', received '%s'. Ignoring command.", + cmd->claim_id, cmd->node_id, + localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET", + cmd->claim_id); + + return; + } + + RRDHOST *host = rrdhost_find_by_node_id(cmd->node_id); + if(!host) { + error("RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', but there is no node with such node id here. Ignoring command.", + cmd->claim_id, cmd->node_id); + + return; + } + + if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { + error("RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', but node '%s' does not have active context streaming. Ignoring command.", + cmd->claim_id, cmd->node_id, rrdhost_hostname(host)); + + return; + } + + internal_error(true, "RRDCONTEXT: host '%s' disabling streaming of contexts", rrdhost_hostname(host)); + rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); +} + +// ---------------------------------------------------------------------------- +// web API + +struct rrdcontext_to_json { + BUFFER *wb; + RRDCONTEXT_TO_JSON_OPTIONS options; + time_t after; + time_t before; + SIMPLE_PATTERN *chart_label_key; + SIMPLE_PATTERN *chart_labels_filter; + SIMPLE_PATTERN *chart_dimensions; + size_t written; + time_t now; + time_t combined_first_time_t; + time_t combined_last_time_t; + RRD_FLAGS combined_flags; +}; + +static inline int rrdmetric_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + struct rrdcontext_to_json * t = data; + RRDMETRIC *rm = value; + BUFFER *wb = t->wb; + RRDCONTEXT_TO_JSON_OPTIONS options = t->options; + time_t after = t->after; + time_t before = t->before; + + if(unlikely(rrd_flag_is_deleted(rm) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) + return 0; + + if(after && (!rm->last_time_t || after > rm->last_time_t)) + return 0; + + if(before && (!rm->first_time_t || before < rm->first_time_t)) + return 0; + + if(t->chart_dimensions + && !simple_pattern_matches(t->chart_dimensions, string2str(rm->id)) + && !simple_pattern_matches(t->chart_dimensions, string2str(rm->name))) + return 0; + + if(t->written) { + buffer_strcat(wb, ",\n"); + t->combined_first_time_t = MIN(t->combined_first_time_t, rm->first_time_t); + t->combined_last_time_t = MAX(t->combined_last_time_t, rm->last_time_t); + t->combined_flags |= rrd_flags_get(rm); + } + else { + buffer_strcat(wb, "\n"); + t->combined_first_time_t = rm->first_time_t; + t->combined_last_time_t = rm->last_time_t; + t->combined_flags = rrd_flags_get(rm); + } + + buffer_sprintf(wb, "\t\t\t\t\t\t\"%s\": {", id); + + if(options & RRDCONTEXT_OPTION_SHOW_UUIDS) { + char uuid[UUID_STR_LEN]; + uuid_unparse(rm->uuid, uuid); + buffer_sprintf(wb, "\n\t\t\t\t\t\t\t\"uuid\":\"%s\",", uuid); + } + + buffer_sprintf(wb, + "\n\t\t\t\t\t\t\t\"name\":\"%s\"" + ",\n\t\t\t\t\t\t\t\"first_time_t\":%lld" + ",\n\t\t\t\t\t\t\t\"last_time_t\":%lld" + ",\n\t\t\t\t\t\t\t\"collected\":%s" + , string2str(rm->name) + , (long long)rm->first_time_t + , rrd_flag_is_collected(rm) ? (long long)t->now : (long long)rm->last_time_t + , rrd_flag_is_collected(rm) ? "true" : "false" + ); + + if(options & RRDCONTEXT_OPTION_SHOW_DELETED) { + buffer_sprintf(wb, + ",\n\t\t\t\t\t\t\t\"deleted\":%s" + , rrd_flag_is_deleted(rm) ? "true" : "false" + ); + } + + if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { + buffer_strcat(wb, ",\n\t\t\t\t\t\t\t\"flags\":\""); + rrd_flags_to_buffer(rrd_flags_get(rm), wb); + buffer_strcat(wb, "\""); + } + + buffer_strcat(wb, "\n\t\t\t\t\t\t}"); + t->written++; + return 1; +} + +static inline int rrdinstance_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + + struct rrdcontext_to_json *t_parent = data; + RRDINSTANCE *ri = value; + BUFFER *wb = t_parent->wb; + RRDCONTEXT_TO_JSON_OPTIONS options = t_parent->options; + time_t after = t_parent->after; + time_t before = t_parent->before; + bool has_filter = t_parent->chart_label_key || t_parent->chart_labels_filter || t_parent->chart_dimensions; + + if(unlikely(rrd_flag_is_deleted(ri) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) + return 0; + + if(after && (!ri->last_time_t || after > ri->last_time_t)) + return 0; + + if(before && (!ri->first_time_t || before < ri->first_time_t)) + return 0; + + if(t_parent->chart_label_key && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, t_parent->chart_label_key, '\0')) + return 0; + + if(t_parent->chart_labels_filter && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, t_parent->chart_labels_filter, ':')) + return 0; + + time_t first_time_t = ri->first_time_t; + time_t last_time_t = ri->last_time_t; + RRD_FLAGS flags = rrd_flags_get(ri); + + BUFFER *wb_metrics = NULL; + if(options & RRDCONTEXT_OPTION_SHOW_METRICS || t_parent->chart_dimensions) { + + wb_metrics = buffer_create(4096); + + struct rrdcontext_to_json t_metrics = { + .wb = wb_metrics, + .options = options, + .chart_label_key = t_parent->chart_label_key, + .chart_labels_filter = t_parent->chart_labels_filter, + .chart_dimensions = t_parent->chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = t_parent->now, + }; + dictionary_walkthrough_read(ri->rrdmetrics, rrdmetric_to_json_callback, &t_metrics); + + if(has_filter && !t_metrics.written) { + buffer_free(wb_metrics); + return 0; + } + + first_time_t = t_metrics.combined_first_time_t; + last_time_t = t_metrics.combined_last_time_t; + flags = t_metrics.combined_flags; + } + + if(t_parent->written) { + buffer_strcat(wb, ",\n"); + t_parent->combined_first_time_t = MIN(t_parent->combined_first_time_t, first_time_t); + t_parent->combined_last_time_t = MAX(t_parent->combined_last_time_t, last_time_t); + t_parent->combined_flags |= flags; + } + else { + buffer_strcat(wb, "\n"); + t_parent->combined_first_time_t = first_time_t; + t_parent->combined_last_time_t = last_time_t; + t_parent->combined_flags = flags; + } + + buffer_sprintf(wb, "\t\t\t\t\"%s\": {", id); + + if(options & RRDCONTEXT_OPTION_SHOW_UUIDS) { + char uuid[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid); + buffer_sprintf(wb,"\n\t\t\t\t\t\"uuid\":\"%s\",", uuid); + } + + buffer_sprintf(wb, + "\n\t\t\t\t\t\"name\":\"%s\"" + ",\n\t\t\t\t\t\"context\":\"%s\"" + ",\n\t\t\t\t\t\"title\":\"%s\"" + ",\n\t\t\t\t\t\"units\":\"%s\"" + ",\n\t\t\t\t\t\"family\":\"%s\"" + ",\n\t\t\t\t\t\"chart_type\":\"%s\"" + ",\n\t\t\t\t\t\"priority\":%u" + ",\n\t\t\t\t\t\"update_every\":%d" + ",\n\t\t\t\t\t\"first_time_t\":%lld" + ",\n\t\t\t\t\t\"last_time_t\":%lld" + ",\n\t\t\t\t\t\"collected\":%s" + , string2str(ri->name) + , string2str(ri->rc->id) + , string2str(ri->title) + , string2str(ri->units) + , string2str(ri->family) + , rrdset_type_name(ri->chart_type) + , ri->priority + , ri->update_every + , (long long)first_time_t + , (flags & RRD_FLAG_COLLECTED) ? (long long)t_parent->now : (long long)last_time_t + , (flags & RRD_FLAG_COLLECTED) ? "true" : "false" + ); + + if(options & RRDCONTEXT_OPTION_SHOW_DELETED) { + buffer_sprintf(wb, + ",\n\t\t\t\t\t\"deleted\":%s" + , rrd_flag_is_deleted(ri) ? "true" : "false" + ); + } + + if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { + buffer_strcat(wb, ",\n\t\t\t\t\t\"flags\":\""); + rrd_flags_to_buffer(rrd_flags_get(ri), wb); + buffer_strcat(wb, "\""); + } + + if(options & RRDCONTEXT_OPTION_SHOW_LABELS && ri->rrdlabels && dictionary_entries(ri->rrdlabels)) { + buffer_sprintf(wb, ",\n\t\t\t\t\t\"labels\": {\n"); + rrdlabels_to_buffer(ri->rrdlabels, wb, "\t\t\t\t\t\t", ":", "\"", ",\n", NULL, NULL, NULL, NULL); + buffer_strcat(wb, "\n\t\t\t\t\t}"); + } + + if(wb_metrics) { + buffer_sprintf(wb, ",\n\t\t\t\t\t\"dimensions\": {"); + buffer_fast_strcat(wb, buffer_tostring(wb_metrics), buffer_strlen(wb_metrics)); + buffer_strcat(wb, "\n\t\t\t\t\t}"); + + buffer_free(wb_metrics); + } + + buffer_strcat(wb, "\n\t\t\t\t}"); + t_parent->written++; + return 1; +} + +static inline int rrdcontext_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + struct rrdcontext_to_json *t_parent = data; + RRDCONTEXT *rc = value; + BUFFER *wb = t_parent->wb; + RRDCONTEXT_TO_JSON_OPTIONS options = t_parent->options; + time_t after = t_parent->after; + time_t before = t_parent->before; + bool has_filter = t_parent->chart_label_key || t_parent->chart_labels_filter || t_parent->chart_dimensions; + + if(unlikely(rrd_flag_check(rc, RRD_FLAG_HIDDEN) && !(options & RRDCONTEXT_OPTION_SHOW_HIDDEN))) + return 0; + + if(unlikely(rrd_flag_is_deleted(rc) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) + return 0; + + if(options & RRDCONTEXT_OPTION_DEEPSCAN) + rrdcontext_recalculate_context_retention(rc, RRD_FLAG_NONE, false); + + if(after && (!rc->last_time_t || after > rc->last_time_t)) + return 0; + + if(before && (!rc->first_time_t || before < rc->first_time_t)) + return 0; + + time_t first_time_t = rc->first_time_t; + time_t last_time_t = rc->last_time_t; + RRD_FLAGS flags = rrd_flags_get(rc); + + BUFFER *wb_instances = NULL; + if((options & (RRDCONTEXT_OPTION_SHOW_LABELS|RRDCONTEXT_OPTION_SHOW_INSTANCES|RRDCONTEXT_OPTION_SHOW_METRICS)) + || t_parent->chart_label_key + || t_parent->chart_labels_filter + || t_parent->chart_dimensions) { + + wb_instances = buffer_create(4096); + + struct rrdcontext_to_json t_instances = { + .wb = wb_instances, + .options = options, + .chart_label_key = t_parent->chart_label_key, + .chart_labels_filter = t_parent->chart_labels_filter, + .chart_dimensions = t_parent->chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = t_parent->now, + }; + dictionary_walkthrough_read(rc->rrdinstances, rrdinstance_to_json_callback, &t_instances); + + if(has_filter && !t_instances.written) { + buffer_free(wb_instances); + return 0; + } + + first_time_t = t_instances.combined_first_time_t; + last_time_t = t_instances.combined_last_time_t; + flags = t_instances.combined_flags; + } + + if(t_parent->written) + buffer_strcat(wb, ",\n"); + else + buffer_strcat(wb, "\n"); + + if(options & RRDCONTEXT_OPTION_SKIP_ID) + buffer_sprintf(wb, "\t\t\{"); + else + buffer_sprintf(wb, "\t\t\"%s\": {", id); + + rrdcontext_lock(rc); + + buffer_sprintf(wb, + "\n\t\t\t\"title\":\"%s\"" + ",\n\t\t\t\"units\":\"%s\"" + ",\n\t\t\t\"family\":\"%s\"" + ",\n\t\t\t\"chart_type\":\"%s\"" + ",\n\t\t\t\"priority\":%u" + ",\n\t\t\t\"first_time_t\":%lld" + ",\n\t\t\t\"last_time_t\":%lld" + ",\n\t\t\t\"collected\":%s" + , string2str(rc->title) + , string2str(rc->units) + , string2str(rc->family) + , rrdset_type_name(rc->chart_type) + , rc->priority + , (long long)first_time_t + , (flags & RRD_FLAG_COLLECTED) ? (long long)t_parent->now : (long long)last_time_t + , (flags & RRD_FLAG_COLLECTED) ? "true" : "false" + ); + + if(options & RRDCONTEXT_OPTION_SHOW_DELETED) { + buffer_sprintf(wb, + ",\n\t\t\t\"deleted\":%s" + , rrd_flag_is_deleted(rc) ? "true" : "false" + ); + } + + if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { + buffer_strcat(wb, ",\n\t\t\t\"flags\":\""); + rrd_flags_to_buffer(rrd_flags_get(rc), wb); + buffer_strcat(wb, "\""); + } + + if(options & RRDCONTEXT_OPTION_SHOW_QUEUED) { + buffer_strcat(wb, ",\n\t\t\t\"queued_reasons\":\""); + rrd_reasons_to_buffer(rc->queue.queued_flags, wb); + buffer_strcat(wb, "\""); + + buffer_sprintf(wb, + ",\n\t\t\t\"last_queued\":%llu" + ",\n\t\t\t\"scheduled_dispatch\":%llu" + ",\n\t\t\t\"last_dequeued\":%llu" + ",\n\t\t\t\"dispatches\":%zu" + ",\n\t\t\t\"hub_version\":%"PRIu64"" + ",\n\t\t\t\"version\":%"PRIu64"" + , rc->queue.queued_ut / USEC_PER_SEC + , rc->queue.scheduled_dispatch_ut / USEC_PER_SEC + , rc->queue.dequeued_ut / USEC_PER_SEC + , rc->queue.dispatches + , rc->hub.version + , rc->version + ); + + buffer_strcat(wb, ",\n\t\t\t\"pp_reasons\":\""); + rrd_reasons_to_buffer(rc->pp.queued_flags, wb); + buffer_strcat(wb, "\""); + + buffer_sprintf(wb, + ",\n\t\t\t\"pp_last_queued\":%llu" + ",\n\t\t\t\"pp_last_dequeued\":%llu" + ",\n\t\t\t\"pp_executed\":%zu" + , rc->pp.queued_ut / USEC_PER_SEC + , rc->pp.dequeued_ut / USEC_PER_SEC + , rc->pp.executions + ); + } + + rrdcontext_unlock(rc); + + if(wb_instances) { + buffer_sprintf(wb, ",\n\t\t\t\"charts\": {"); + buffer_fast_strcat(wb, buffer_tostring(wb_instances), buffer_strlen(wb_instances)); + buffer_strcat(wb, "\n\t\t\t}"); + + buffer_free(wb_instances); + } + + buffer_strcat(wb, "\n\t\t}"); + t_parent->written++; + return 1; +} + +int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions) { + if(!host->rrdctx) { + error("%s(): request for host '%s' that does not have rrdcontexts initialized.", __FUNCTION__, rrdhost_hostname(host)); + return HTTP_RESP_NOT_FOUND; + } + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)host->rrdctx, context); + if(!rca) return HTTP_RESP_NOT_FOUND; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + if(after != 0 && before != 0) + rrdr_relative_window_to_absolute(&after, &before); + + struct rrdcontext_to_json t_contexts = { + .wb = wb, + .options = options|RRDCONTEXT_OPTION_SKIP_ID, + .chart_label_key = chart_label_key, + .chart_labels_filter = chart_labels_filter, + .chart_dimensions = chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = now_realtime_sec(), + }; + rrdcontext_to_json_callback((DICTIONARY_ITEM *)rca, rc, &t_contexts); + + rrdcontext_release(rca); + + if(!t_contexts.written) + return HTTP_RESP_NOT_FOUND; + + return HTTP_RESP_OK; +} + +int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions) { + if(!host->rrdctx) { + error("%s(): request for host '%s' that does not have rrdcontexts initialized.", __FUNCTION__, rrdhost_hostname(host)); + return HTTP_RESP_NOT_FOUND; + } + + char node_uuid[UUID_STR_LEN] = ""; + + if(host->node_id) + uuid_unparse(*host->node_id, node_uuid); + + if(after != 0 && before != 0) + rrdr_relative_window_to_absolute(&after, &before); + + buffer_sprintf(wb, "{\n" + "\t\"hostname\": \"%s\"" + ",\n\t\"machine_guid\": \"%s\"" + ",\n\t\"node_id\": \"%s\"" + ",\n\t\"claim_id\": \"%s\"" + , rrdhost_hostname(host) + , host->machine_guid + , node_uuid + , host->aclk_state.claimed_id ? host->aclk_state.claimed_id : "" + ); + + if(options & RRDCONTEXT_OPTION_SHOW_LABELS) { + buffer_sprintf(wb, ",\n\t\"host_labels\": {\n"); + rrdlabels_to_buffer(host->rrdlabels, wb, "\t\t", ":", "\"", ",\n", NULL, NULL, NULL, NULL); + buffer_strcat(wb, "\n\t}"); + } + + buffer_sprintf(wb, ",\n\t\"contexts\": {"); + struct rrdcontext_to_json t_contexts = { + .wb = wb, + .options = options, + .chart_label_key = chart_label_key, + .chart_labels_filter = chart_labels_filter, + .chart_dimensions = chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = now_realtime_sec(), + }; + dictionary_walkthrough_read((DICTIONARY *)host->rrdctx, rrdcontext_to_json_callback, &t_contexts); + + // close contexts, close main + buffer_strcat(wb, "\n\t}\n}"); + + return HTTP_RESP_OK; +} + +// ---------------------------------------------------------------------------- +// weights API + +static void metric_entry_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct metric_entry *t = value; + t->rca = rrdcontext_acquired_dup(t->rca); + t->ria = rrdinstance_acquired_dup(t->ria); + t->rma = rrdmetric_acquired_dup(t->rma); +} +static void metric_entry_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct metric_entry *t = value; + rrdcontext_release(t->rca); + rrdinstance_release(t->ria); + rrdmetric_release(t->rma); +} +static bool metric_entry_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value __maybe_unused, void *new_value __maybe_unused, void *data __maybe_unused) { + fatal("RRDCONTEXT: %s() detected a conflict on a metric pointer!", __FUNCTION__); + return false; +} + +DICTIONARY *rrdcontext_all_metrics_to_dict(RRDHOST *host, SIMPLE_PATTERN *contexts) { + if(!host || !host->rrdctx) + return NULL; + + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(dict, metric_entry_insert_callback, NULL); + dictionary_register_delete_callback(dict, metric_entry_delete_callback, NULL); + dictionary_register_conflict_callback(dict, metric_entry_conflict_callback, NULL); + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx, rc) { + if(rrd_flag_is_deleted(rc)) + continue; + + if(contexts && !simple_pattern_matches(contexts, string2str(rc->id))) + continue; + + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(rrd_flag_is_deleted(ri)) + continue; + + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + if(rrd_flag_is_deleted(rm)) + continue; + + struct metric_entry tmp = { + .rca = (RRDCONTEXT_ACQUIRED *)rc_dfe.item, + .ria = (RRDINSTANCE_ACQUIRED *)ri_dfe.item, + .rma = (RRDMETRIC_ACQUIRED *)rm_dfe.item, + }; + + char buffer[20 + 1]; + ssize_t len = snprintfz(buffer, 20, "%p", rm); + dictionary_set_advanced(dict, buffer, len + 1, &tmp, sizeof(struct metric_entry), NULL); + } + dfe_done(rm); + } + dfe_done(ri); + } + dfe_done(rc); + + return dict; +} + +// ---------------------------------------------------------------------------- +// query API + +typedef struct query_target_locals { + time_t start_s; + + QUERY_TARGET *qt; + + RRDSET *st; + + const char *hosts; + const char *contexts; + const char *charts; + const char *dimensions; + const char *chart_label_key; + const char *charts_labels_filter; + + long long after; + long long before; + bool match_ids; + bool match_names; + + RRDHOST *host; + RRDCONTEXT_ACQUIRED *rca; + RRDINSTANCE_ACQUIRED *ria; + + size_t metrics_skipped_due_to_not_matching_timeframe; +} QUERY_TARGET_LOCALS; + +static __thread QUERY_TARGET thread_query_target = {}; +void query_target_release(QUERY_TARGET *qt) { + if(unlikely(!qt)) return; + if(unlikely(!qt->used)) return; + + simple_pattern_free(qt->hosts.pattern); + qt->hosts.pattern = NULL; + + simple_pattern_free(qt->contexts.pattern); + qt->contexts.pattern = NULL; + + simple_pattern_free(qt->instances.pattern); + qt->instances.pattern = NULL; + + simple_pattern_free(qt->instances.chart_label_key_pattern); + qt->instances.chart_label_key_pattern = NULL; + + simple_pattern_free(qt->instances.charts_labels_filter_pattern); + qt->instances.charts_labels_filter_pattern = NULL; + + simple_pattern_free(qt->query.pattern); + qt->query.pattern = NULL; + + // release the query + for(size_t i = 0, used = qt->query.used; i < used ;i++) { + string_freez(qt->query.array[i].dimension.id); + qt->query.array[i].dimension.id = NULL; + + string_freez(qt->query.array[i].dimension.name); + qt->query.array[i].dimension.name = NULL; + + string_freez(qt->query.array[i].chart.id); + qt->query.array[i].chart.id = NULL; + + string_freez(qt->query.array[i].chart.name); + qt->query.array[i].chart.name = NULL; + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(qt->query.array[i].tiers[tier].db_metric_handle) { + STORAGE_ENGINE *eng = qt->query.array[i].tiers[tier].eng; + eng->api.metric_release(qt->query.array[i].tiers[tier].db_metric_handle); + qt->query.array[i].tiers[tier].db_metric_handle = NULL; + } + } + } + + // release the metrics + for(size_t i = 0, used = qt->metrics.used; i < used ;i++) { + rrdmetric_release(qt->metrics.array[i]); + qt->metrics.array[i] = NULL; + } + + // release the instances + for(size_t i = 0, used = qt->instances.used; i < used ;i++) { + rrdinstance_release(qt->instances.array[i]); + qt->instances.array[i] = NULL; + } + + // release the contexts + for(size_t i = 0, used = qt->contexts.used; i < used ;i++) { + rrdcontext_release(qt->contexts.array[i]); + qt->contexts.array[i] = NULL; + } + + // release the hosts + for(size_t i = 0, used = qt->hosts.used; i < used ;i++) { + qt->hosts.array[i] = NULL; + } + + qt->query.used = 0; + qt->metrics.used = 0; + qt->instances.used = 0; + qt->contexts.used = 0; + qt->hosts.used = 0; + + qt->db.minimum_latest_update_every = 0; + qt->db.first_time_t = 0; + qt->db.last_time_t = 0; + + qt->id[0] = '\0'; + + qt->used = false; +} +void query_target_free(void) { + if(thread_query_target.used) + query_target_release(&thread_query_target); + + freez(thread_query_target.query.array); + thread_query_target.query.array = NULL; + thread_query_target.query.size = 0; + + freez(thread_query_target.metrics.array); + thread_query_target.metrics.array = NULL; + thread_query_target.metrics.size = 0; + + freez(thread_query_target.instances.array); + thread_query_target.instances.array = NULL; + thread_query_target.instances.size = 0; + + freez(thread_query_target.contexts.array); + thread_query_target.contexts.array = NULL; + thread_query_target.contexts.size = 0; + + freez(thread_query_target.hosts.array); + thread_query_target.hosts.array = NULL; + thread_query_target.hosts.size = 0; +} + +static void query_target_add_metric(QUERY_TARGET_LOCALS *qtl, RRDMETRIC_ACQUIRED *rma, RRDINSTANCE *ri, + bool queryable_instance) { + QUERY_TARGET *qt = qtl->qt; + + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + if(rrd_flag_is_deleted(rm)) + return; + + if(qt->metrics.used == qt->metrics.size) { + qt->metrics.size = (qt->metrics.size) ? qt->metrics.size * 2 : 1; + qt->metrics.array = reallocz(qt->metrics.array, qt->metrics.size * sizeof(RRDMETRIC_ACQUIRED *)); + } + qt->metrics.array[qt->metrics.used++] = rrdmetric_acquired_dup(rma); + + if(!queryable_instance) + return; + + time_t common_first_time_t = 0; + time_t common_last_time_t = 0; + time_t common_update_every = 0; + size_t tiers_added = 0; + struct { + STORAGE_ENGINE *eng; + STORAGE_METRIC_HANDLE *db_metric_handle; + time_t db_first_time_t; + time_t db_last_time_t; + time_t db_update_every; + } tier_retention[storage_tiers]; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + STORAGE_ENGINE *eng = qtl->host->db[tier].eng; + tier_retention[tier].eng = eng; + tier_retention[tier].db_update_every = (time_t) (qtl->host->db[tier].tier_grouping * ri->update_every); + + if(rm->rrddim && rm->rrddim->tiers[tier] && rm->rrddim->tiers[tier]->db_metric_handle) + tier_retention[tier].db_metric_handle = eng->api.metric_dup(rm->rrddim->tiers[tier]->db_metric_handle); + else + tier_retention[tier].db_metric_handle = eng->api.metric_get(qtl->host->db[tier].instance, &rm->uuid); + + if(tier_retention[tier].db_metric_handle) { + tier_retention[tier].db_first_time_t = tier_retention[tier].eng->api.query_ops.oldest_time(tier_retention[tier].db_metric_handle); + tier_retention[tier].db_last_time_t = tier_retention[tier].eng->api.query_ops.latest_time(tier_retention[tier].db_metric_handle); + + if(!common_first_time_t) + common_first_time_t = tier_retention[tier].db_first_time_t; + else if(tier_retention[tier].db_first_time_t) + common_first_time_t = MIN(common_first_time_t, tier_retention[tier].db_first_time_t); + + if(!common_last_time_t) + common_last_time_t = tier_retention[tier].db_last_time_t; + else + common_last_time_t = MAX(common_last_time_t, tier_retention[tier].db_last_time_t); + + if(!common_update_every) + common_update_every = tier_retention[tier].db_update_every; + else if(tier_retention[tier].db_update_every) + common_update_every = MIN(common_update_every, tier_retention[tier].db_update_every); + + tiers_added++; + } + else { + tier_retention[tier].db_first_time_t = 0; + tier_retention[tier].db_last_time_t = 0; + tier_retention[tier].db_update_every = 0; + } + } + + bool release_retention = true; + bool timeframe_matches = + (tiers_added + && (common_first_time_t - common_update_every * 2) <= qt->window.before + && (common_last_time_t + common_update_every * 2) >= qt->window.after + ) ? true : false; + + if(timeframe_matches) { + RRDR_DIMENSION_FLAGS options = RRDR_DIMENSION_DEFAULT; + + if (rrd_flag_check(rm, RRD_FLAG_HIDDEN) + || (rm->rrddim && rrddim_option_check(rm->rrddim, RRDDIM_OPTION_HIDDEN))) { + options |= RRDR_DIMENSION_HIDDEN; + options &= ~RRDR_DIMENSION_SELECTED; + } + + if (qt->query.pattern) { + // we have a dimensions pattern + // lets see if this dimension is selected + + if ((qtl->match_ids && simple_pattern_matches(qt->query.pattern, string2str(rm->id))) + || (qtl->match_names && simple_pattern_matches(qt->query.pattern, string2str(rm->name))) + ) { + // it matches the pattern + options |= (RRDR_DIMENSION_SELECTED | RRDR_DIMENSION_NONZERO); + options &= ~RRDR_DIMENSION_HIDDEN; + } + else { + // it does not match the pattern + options |= RRDR_DIMENSION_HIDDEN; + options &= ~RRDR_DIMENSION_SELECTED; + } + } + else { + // we don't have a dimensions pattern + // so this is a selected dimension + // if it is not hidden + if(!(options & RRDR_DIMENSION_HIDDEN)) + options |= RRDR_DIMENSION_SELECTED; + } + + if((options & RRDR_DIMENSION_HIDDEN) && (options & RRDR_DIMENSION_SELECTED)) + options &= ~RRDR_DIMENSION_HIDDEN; + + if(!(options & RRDR_DIMENSION_HIDDEN) || (qt->request.options & RRDR_OPTION_PERCENTAGE)) { + // we have a non-hidden dimension + // let's add it to the query metrics + + if(ri->rrdset) + ri->rrdset->last_accessed_time = qtl->start_s; + + if (qt->query.used == qt->query.size) { + qt->query.size = (qt->query.size) ? qt->query.size * 2 : 1; + qt->query.array = reallocz(qt->query.array, qt->query.size * sizeof(QUERY_METRIC)); + } + QUERY_METRIC *qm = &qt->query.array[qt->query.used++]; + + qm->dimension.options = options; + + qm->link.host = qtl->host; + qm->link.rca = qtl->rca; + qm->link.ria = qtl->ria; + qm->link.rma = rma; + + qm->chart.id = string_dup(ri->id); + qm->chart.name = string_dup(ri->name); + + qm->dimension.id = string_dup(rm->id); + qm->dimension.name = string_dup(rm->name); + + if (!qt->db.first_time_t || common_first_time_t < qt->db.first_time_t) + qt->db.first_time_t = common_first_time_t; + + if (!qt->db.last_time_t || common_last_time_t > qt->db.last_time_t) + qt->db.last_time_t = common_last_time_t; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + qm->tiers[tier].eng = tier_retention[tier].eng; + qm->tiers[tier].db_metric_handle = tier_retention[tier].db_metric_handle; + qm->tiers[tier].db_first_time_t = tier_retention[tier].db_first_time_t; + qm->tiers[tier].db_last_time_t = tier_retention[tier].db_last_time_t; + qm->tiers[tier].db_update_every = tier_retention[tier].db_update_every; + } + release_retention = false; + } + } + else + qtl->metrics_skipped_due_to_not_matching_timeframe++; + + if(release_retention) { + // cleanup anything we allocated to the retention we will not use + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (tier_retention[tier].db_metric_handle) + tier_retention[tier].eng->api.metric_release(tier_retention[tier].db_metric_handle); + } + } +} + +static void query_target_add_instance(QUERY_TARGET_LOCALS *qtl, RRDINSTANCE_ACQUIRED *ria, bool queryable_instance) { + QUERY_TARGET *qt = qtl->qt; + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + if(rrd_flag_is_deleted(ri)) + return; + + if(qt->instances.used == qt->instances.size) { + qt->instances.size = (qt->instances.size) ? qt->instances.size * 2 : 1; + qt->instances.array = reallocz(qt->instances.array, qt->instances.size * sizeof(RRDINSTANCE_ACQUIRED *)); + } + + qtl->ria = qt->instances.array[qt->instances.used++] = rrdinstance_acquired_dup(ria); + + if(qt->db.minimum_latest_update_every == 0 || ri->update_every < qt->db.minimum_latest_update_every) + qt->db.minimum_latest_update_every = ri->update_every; + + if(queryable_instance) { + if ((qt->instances.chart_label_key_pattern && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, qt->instances.chart_label_key_pattern, ':')) || + (qt->instances.charts_labels_filter_pattern && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, qt->instances.charts_labels_filter_pattern, ':'))) + queryable_instance = false; + } + + size_t added = 0; + + if(unlikely(qt->request.rma)) { + query_target_add_metric(qtl, qt->request.rma, ri, queryable_instance); + added++; + } + else { + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + query_target_add_metric(qtl, (RRDMETRIC_ACQUIRED *) rm_dfe.item, ri, queryable_instance); + added++; + } + dfe_done(rm); + } + + if(!added) { + qt->instances.used--; + rrdinstance_release(ria); + } +} + +static void query_target_add_context(QUERY_TARGET_LOCALS *qtl, RRDCONTEXT_ACQUIRED *rca) { + QUERY_TARGET *qt = qtl->qt; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(rrd_flag_is_deleted(rc)) + return; + + if(qt->contexts.used == qt->contexts.size) { + qt->contexts.size = (qt->contexts.size) ? qt->contexts.size * 2 : 1; + qt->contexts.array = reallocz(qt->contexts.array, qt->contexts.size * sizeof(RRDCONTEXT_ACQUIRED *)); + } + qtl->rca = qt->contexts.array[qt->contexts.used++] = rrdcontext_acquired_dup(rca); + + size_t added = 0; + if(unlikely(qt->request.ria)) { + query_target_add_instance(qtl, qt->request.ria, true); + added++; + } + else if(unlikely(qtl->st && qtl->st->rrdcontext == rca && qtl->st->rrdinstance)) { + query_target_add_instance(qtl, qtl->st->rrdinstance, true); + added++; + } + else { + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + bool queryable_instance = false; + if(!qt->instances.pattern + || (qtl->match_ids && simple_pattern_matches(qt->instances.pattern, string2str(ri->id))) + || (qtl->match_names && simple_pattern_matches(qt->instances.pattern, string2str(ri->name))) + ) + queryable_instance = true; + + query_target_add_instance(qtl, (RRDINSTANCE_ACQUIRED *)ri_dfe.item, queryable_instance); + added++; + } + dfe_done(ri); + } + + if(!added) { + qt->contexts.used--; + rrdcontext_release(rca); + } +} + +static void query_target_add_host(QUERY_TARGET_LOCALS *qtl, RRDHOST *host) { + QUERY_TARGET *qt = qtl->qt; + + if(qt->hosts.used == qt->hosts.size) { + qt->hosts.size = (qt->hosts.size) ? qt->hosts.size * 2 : 1; + qt->hosts.array = reallocz(qt->hosts.array, qt->hosts.size * sizeof(RRDHOST *)); + } + qtl->host = qt->hosts.array[qt->hosts.used++] = host; + + // is the chart given valid? + if(unlikely(qtl->st && (!qtl->st->rrdinstance || !qtl->st->rrdcontext))) { + error("QUERY TARGET: RRDSET '%s' given, because it is not linked to rrdcontext structures. Switching to context query.", rrdset_name(qtl->st)); + + if(!is_valid_sp(qtl->charts)) + qtl->charts = rrdset_name(qtl->st); + + qtl->st = NULL; + } + + size_t added = 0; + if(unlikely(qt->request.rca)) { + query_target_add_context(qtl, qt->request.rca); + added++; + } + else if(unlikely(qtl->st)) { + // single chart data queries + query_target_add_context(qtl, qtl->st->rrdcontext); + added++; + } + else { + // context pattern queries + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)qtl->host->rrdctx, qtl->contexts); + if(likely(rca)) { + // we found it! + query_target_add_context(qtl, rca); + rrdcontext_release(rca); + added++; + } + else { + // Probably it is a pattern, we need to search for it... + RRDCONTEXT *rc; + dfe_start_read((DICTIONARY *)qtl->host->rrdctx, rc) { + if(qt->contexts.pattern && !simple_pattern_matches(qt->contexts.pattern, string2str(rc->id))) + continue; + + query_target_add_context(qtl, (RRDCONTEXT_ACQUIRED *)rc_dfe.item); + added++; + } + dfe_done(rc); + } + } + + if(!added) { + qt->hosts.used--; + } +} + +void query_target_generate_name(QUERY_TARGET *qt) { + char options_buffer[100 + 1]; + web_client_api_request_v1_data_options_to_string(options_buffer, 100, qt->request.options); + + char resampling_buffer[20 + 1] = ""; + if(qt->request.resampling_time > 1) + snprintfz(resampling_buffer, 20, "/resampling:%lld", (long long)qt->request.resampling_time); + + char tier_buffer[20 + 1] = ""; + if(qt->request.options & RRDR_OPTION_SELECTED_TIER) + snprintfz(tier_buffer, 20, "/tier:%zu", qt->request.tier); + + if(qt->request.st) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "chart://host:%s/instance:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , rrdhost_hostname(qt->request.st->rrdhost) + , rrdset_name(qt->request.st) + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , web_client_api_request_v1_data_group_to_string(qt->request.group_method) + , qt->request.group_options?qt->request.group_options:"" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else if(qt->request.host && qt->request.rca && qt->request.ria && qt->request.rma) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "metric://host:%s/context:%s/instance:%s/dimension:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , rrdhost_hostname(qt->request.host) + , rrdcontext_acquired_id(qt->request.rca) + , rrdinstance_acquired_id(qt->request.ria) + , rrdmetric_acquired_id(qt->request.rma) + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , web_client_api_request_v1_data_group_to_string(qt->request.group_method) + , qt->request.group_options?qt->request.group_options:"" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "context://host:%s/contexts:%s/instances:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , (qt->request.host) ? rrdhost_hostname(qt->request.host) : ((qt->request.hosts) ? qt->request.hosts : "*") + , (qt->request.contexts) ? qt->request.contexts : "*" + , (qt->request.charts) ? qt->request.charts : "*" + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , web_client_api_request_v1_data_group_to_string(qt->request.group_method) + , qt->request.group_options?qt->request.group_options:"" + , options_buffer + , resampling_buffer + , tier_buffer + ); + + json_fix_string(qt->id); +} + +QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) { + QUERY_TARGET *qt = &thread_query_target; + + if(qt->used) + fatal("QUERY TARGET: this query target is already used (%zu queries made with this QUERY_TARGET so far).", qt->queries); + + qt->used = true; + qt->queries++; + + // copy the request into query_thread_target + qt->request = *qtr; + + query_target_generate_name(qt); + qt->window.after = qt->request.after; + qt->window.before = qt->request.before; + rrdr_relative_window_to_absolute(&qt->window.after, &qt->window.before); + + // prepare our local variables - we need these across all these functions + QUERY_TARGET_LOCALS qtl = { + .qt = qt, + .start_s = now_realtime_sec(), + .host = qt->request.host, + .st = qt->request.st, + .hosts = qt->request.hosts, + .contexts = qt->request.contexts, + .charts = qt->request.charts, + .dimensions = qt->request.dimensions, + .chart_label_key = qt->request.chart_label_key, + .charts_labels_filter = qt->request.charts_labels_filter, + }; + + qt->db.minimum_latest_update_every = 0; // it will be updated by query_target_add_query() + + // prepare all the patterns + qt->hosts.pattern = is_valid_sp(qtl.hosts) ? simple_pattern_create(qtl.hosts, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->contexts.pattern = is_valid_sp(qtl.contexts) ? simple_pattern_create(qtl.contexts, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->instances.pattern = is_valid_sp(qtl.charts) ? simple_pattern_create(qtl.charts, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->query.pattern = is_valid_sp(qtl.dimensions) ? simple_pattern_create(qtl.dimensions, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->instances.chart_label_key_pattern = is_valid_sp(qtl.chart_label_key) ? simple_pattern_create(qtl.chart_label_key, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->instances.charts_labels_filter_pattern = is_valid_sp(qtl.charts_labels_filter) ? simple_pattern_create(qtl.charts_labels_filter, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + + qtl.match_ids = qt->request.options & RRDR_OPTION_MATCH_IDS; + qtl.match_names = qt->request.options & RRDR_OPTION_MATCH_NAMES; + if(likely(!qtl.match_ids && !qtl.match_names)) + qtl.match_ids = qtl.match_names = true; + + // verify that the chart belongs to the host we are interested + if(qtl.st) { + if (!qtl.host) { + // It is NULL, set it ourselves. + qtl.host = qtl.st->rrdhost; + } + else if (unlikely(qtl.host != qtl.st->rrdhost)) { + // Oops! A different host! + error("QUERY TARGET: RRDSET '%s' given does not belong to host '%s'. Switching query host to '%s'", + rrdset_name(qtl.st), rrdhost_hostname(qtl.host), rrdhost_hostname(qtl.st->rrdhost)); + qtl.host = qtl.st->rrdhost; + } + } + + if(qtl.host) { + // single host query + query_target_add_host(&qtl, qtl.host); + qtl.hosts = rrdhost_hostname(qtl.host); + } + else { + // multi host query + rrd_rdlock(); + rrdhost_foreach_read(qtl.host) { + if(!qt->hosts.pattern || simple_pattern_matches(qt->hosts.pattern, rrdhost_hostname(qtl.host))) + query_target_add_host(&qtl, qtl.host); + } + rrd_unlock(); + } + + // make sure everything is good + if(!qt->query.used || !qt->metrics.used || !qt->instances.used || !qt->contexts.used || !qt->hosts.used) { + internal_error( + true + , "QUERY TARGET: query '%s' does not have all the data required. " + "Matched %u hosts, %u contexts, %u instances, %u dimensions, %u metrics to query, " + "%zu metrics skipped because they don't have data in the desired time-frame. " + "Aborting it." + , qt->id + , qt->hosts.used + , qt->contexts.used + , qt->instances.used + , qt->metrics.used + , qt->query.used + , qtl.metrics_skipped_due_to_not_matching_timeframe + ); + + query_target_release(qt); + return NULL; + } + + if(!query_target_calculate_window(qt)) { + query_target_release(qt); + return NULL; + } + + return qt; +} + + +// ---------------------------------------------------------------------------- +// load from SQL + +static void rrdinstance_load_clabel(SQL_CLABEL_DATA *sld, void *data) { + RRDINSTANCE *ri = data; + rrdlabels_add(ri->rrdlabels, sld->label_key, sld->label_value, sld->label_source); +} + +static void rrdinstance_load_dimension(SQL_DIMENSION_DATA *sd, void *data) { + RRDINSTANCE *ri = data; + + RRDMETRIC trm = { + .id = string_strdupz(sd->id), + .name = string_strdupz(sd->name), + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomic + }; + if(sd->hidden) trm.flags |= RRD_FLAG_HIDDEN; + + uuid_copy(trm.uuid, sd->dim_id); + + dictionary_set(ri->rrdmetrics, string2str(trm.id), &trm, sizeof(trm)); +} + +static void rrdinstance_load_chart_callback(SQL_CHART_DATA *sc, void *data) { + RRDHOST *host = data; + + RRDCONTEXT tc = { + .id = string_strdupz(sc->context), + .title = string_strdupz(sc->title), + .units = string_strdupz(sc->units), + .family = string_strdupz(sc->family), + .priority = sc->priority, + .chart_type = sc->chart_type, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics + .rrdhost = host, + }; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_set_and_acquire_item((DICTIONARY *)host->rrdctx, string2str(tc.id), &tc, sizeof(tc)); + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE tri = { + .id = string_strdupz(sc->id), + .name = string_strdupz(sc->name), + .title = string_strdupz(sc->title), + .units = string_strdupz(sc->units), + .family = string_strdupz(sc->family), + .chart_type = sc->chart_type, + .priority = sc->priority, + .update_every = sc->update_every, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics + }; + uuid_copy(tri.uuid, sc->chart_id); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_set_and_acquire_item(rc->rrdinstances, sc->id, &tri, sizeof(tri)); + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + + ctx_get_dimension_list(&ri->uuid, rrdinstance_load_dimension, ri); + ctx_get_label_list(&ri->uuid, rrdinstance_load_clabel, ri); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + rrdinstance_release(ria); + rrdcontext_release(rca); +} + +static void rrdcontext_load_context_callback(VERSIONED_CONTEXT_DATA *ctx_data, void *data) { + RRDHOST *host = data; + (void)host; + + RRDCONTEXT trc = { + .id = string_strdupz(ctx_data->id), + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics + + // no need to set more data here + // we only need the hub data + + .hub = *ctx_data, + }; + dictionary_set((DICTIONARY *)host->rrdctx, string2str(trc.id), &trc, sizeof(trc)); +} + +void rrdhost_load_rrdcontext_data(RRDHOST *host) { + if(host->rrdctx) return; + + rrdhost_create_rrdcontexts(host); + ctx_get_context_list(&host->host_uuid, rrdcontext_load_context_callback, host); + ctx_get_chart_list(&host->host_uuid, rrdinstance_load_chart_callback, host); + + RRDCONTEXT *rc; + dfe_start_read((DICTIONARY *)host->rrdctx, rc) { + rrdcontext_trigger_updates(rc, __FUNCTION__ ); + } + dfe_done(rc); + + rrdcontext_garbage_collect_single_host(host, false); +} + +// ---------------------------------------------------------------------------- +// version hash calculation + +static uint64_t rrdcontext_version_hash_with_callback( + RRDHOST *host, + void (*callback)(RRDCONTEXT *, bool, void *), + bool snapshot, + void *bundle) { + + if(unlikely(!host || !host->rrdctx)) return 0; + + RRDCONTEXT *rc; + uint64_t hash = 0; + + // loop through all contexts of the host + dfe_start_read((DICTIONARY *)host->rrdctx, rc) { + + rrdcontext_lock(rc); + + if(unlikely(rrd_flag_check(rc, RRD_FLAG_HIDDEN))) { + rrdcontext_unlock(rc); + continue; + } + + if(unlikely(callback)) + callback(rc, snapshot, bundle); + + // skip any deleted contexts + if(unlikely(rrd_flag_is_deleted(rc))) { + rrdcontext_unlock(rc); + continue; + } + + // we use rc->hub.* which has the latest + // metadata we have sent to the hub + + // if a context is currently queued, rc->hub.* does NOT + // reflect the queued changes. rc->hub.* is updated with + // their metadata, after messages are dispatched to hub. + + // when the context is being collected, + // rc->hub.last_time_t is already zero + + hash += rc->hub.version + rc->hub.last_time_t - rc->hub.first_time_t; + + rrdcontext_unlock(rc); + + } + dfe_done(rc); + + return hash; +} + +// ---------------------------------------------------------------------------- +// retention recalculation + +static void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs) { + rrdcontext_post_process_updates(rc, true, reason, worker_jobs); +} + +static void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, bool worker_jobs) { + if(unlikely(!host || !host->rrdctx)) return; + + RRDCONTEXT *rc; + dfe_start_read((DICTIONARY *)host->rrdctx, rc) { + rrdcontext_recalculate_context_retention(rc, reason, worker_jobs); + } + dfe_done(rc); +} + +static void rrdcontext_recalculate_retention_all_hosts(void) { + rrdcontext_next_db_rotation_ut = 0; + rrd_rdlock(); + RRDHOST *host; + rrdhost_foreach_read(host) { + worker_is_busy(WORKER_JOB_RETENTION); + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DB_ROTATION, true); + } + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// garbage collector + +static bool rrdmetric_update_retention(RRDMETRIC *rm) { + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + + if(rm->rrddim) { + min_first_time_t = rrddim_first_entry_t(rm->rrddim); + max_last_time_t = rrddim_last_entry_t(rm->rrddim); + } +#ifdef ENABLE_DBENGINE + else if (dbengine_enabled) { + RRDHOST *rrdhost = rm->ri->rc->rrdhost; + for (size_t tier = 0; tier < storage_tiers; tier++) { + if(!rrdhost->db[tier].instance) continue; + + time_t first_time_t, last_time_t; + if (rrdeng_metric_retention_by_uuid(rrdhost->db[tier].instance, &rm->uuid, &first_time_t, &last_time_t) == 0) { + if (first_time_t < min_first_time_t) + min_first_time_t = first_time_t; + + if (last_time_t > max_last_time_t) + max_last_time_t = last_time_t; + } + } + } + else { + // cannot get retention + return false; + } +#endif + + if(min_first_time_t == LONG_MAX) + min_first_time_t = 0; + + if(min_first_time_t > max_last_time_t) { + internal_error(true, "RRDMETRIC: retention of '%s' is flipped", string2str(rm->id)); + time_t tmp = min_first_time_t; + min_first_time_t = max_last_time_t; + max_last_time_t = tmp; + } + + // check if retention changed + + if (min_first_time_t != rm->first_time_t) { + rm->first_time_t = min_first_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (max_last_time_t != rm->last_time_t) { + rm->last_time_t = max_last_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(unlikely(!rm->first_time_t && !rm->last_time_t)) + rrd_flag_set_deleted(rm, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + rrd_flag_set(rm, RRD_FLAG_LIVE_RETENTION); + + return true; +} + +static inline bool rrdmetric_should_be_deleted(RRDMETRIC *rm) { + if(likely(!rrd_flag_check(rm, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(rm, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(likely(rm->rrddim)) + return false; + + rrdmetric_update_retention(rm); + if(rm->first_time_t || rm->last_time_t) + return false; + + return true; +} + +static inline bool rrdinstance_should_be_deleted(RRDINSTANCE *ri) { + if(likely(!rrd_flag_check(ri, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(ri, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(likely(ri->rrdset)) + return false; + + if(unlikely(dictionary_referenced_items(ri->rrdmetrics) != 0)) + return false; + + if(unlikely(dictionary_entries(ri->rrdmetrics) != 0)) + return false; + + if(ri->first_time_t || ri->last_time_t) + return false; + + return true; +} + +static inline bool rrdcontext_should_be_deleted(RRDCONTEXT *rc) { + if(likely(!rrd_flag_check(rc, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(rc, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(unlikely(dictionary_referenced_items(rc->rrdinstances) != 0)) + return false; + + if(unlikely(dictionary_entries(rc->rrdinstances) != 0)) + return false; + + if(unlikely(rc->first_time_t || rc->last_time_t)) + return false; + + return true; +} + +void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc) { + // we need to refresh the string pointers in rc->hub + // in case the context changed values + rc->hub.id = string2str(rc->id); + rc->hub.title = string2str(rc->title); + rc->hub.units = string2str(rc->units); + rc->hub.family = string2str(rc->family); + + // delete it from SQL + if(ctx_delete_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + error("RRDCONTEXT: failed to delete context '%s' version %"PRIu64" from SQL.", rc->hub.id, rc->hub.version); +} + +static void rrdcontext_garbage_collect_single_host(RRDHOST *host, bool worker_jobs) { + + internal_error(true, "RRDCONTEXT: garbage collecting context structures of host '%s'", rrdhost_hostname(host)); + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx, rc) { + if(unlikely(netdata_exit)) break; + + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP); + + rrdcontext_lock(rc); + + RRDINSTANCE *ri; + dfe_start_reentrant(rc->rrdinstances, ri) { + if(unlikely(netdata_exit)) break; + + RRDMETRIC *rm; + dfe_start_write(ri->rrdmetrics, rm) { + if(rrdmetric_should_be_deleted(rm)) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + if(!dictionary_del(ri->rrdmetrics, string2str(rm->id))) + error("RRDCONTEXT: metric '%s' of instance '%s' of context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(rm->id), + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: metric '%s' of instance '%s' of context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(rm->id), + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + } + } + dfe_done(rm); + + if(rrdinstance_should_be_deleted(ri)) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + if(!dictionary_del(rc->rrdinstances, string2str(ri->id))) + error("RRDCONTEXT: instance '%s' of context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: instance '%s' of context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + } + } + dfe_done(ri); + + if(unlikely(rrdcontext_should_be_deleted(rc))) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_delete_from_sql_unsafe(rc); + + if(!dictionary_del((DICTIONARY *)host->rrdctx, string2str(rc->id))) + error("RRDCONTEXT: context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(rc->id), + rrdhost_hostname(host)); + } + + // the item is referenced in the dictionary + // so, it is still here to unlock, even if we have deleted it + rrdcontext_unlock(rc); + } + dfe_done(rc); +} + +static void rrdcontext_garbage_collect_for_all_hosts(void) { + rrd_rdlock(); + RRDHOST *host; + rrdhost_foreach_read(host) { + rrdcontext_garbage_collect_single_host(host, true); + } + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// post processing + +static void rrdmetric_process_updates(RRDMETRIC *rm, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(rm, reason); + + if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + return; + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_METRIC); + + if(reason & RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) { + rrd_flag_set_archived(rm); + rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD); + } + if(rrd_flag_is_deleted(rm) && (reason & RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + rrd_flag_set_archived(rm); + + rrdmetric_update_retention(rm); + + rrd_flag_unset_updated(rm); +} + +static void rrdinstance_post_process_updates(RRDINSTANCE *ri, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(ri, reason); + + if(!force && !rrd_flag_is_updated(ri) && rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION)) + return; + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_INSTANCE); + + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + size_t metrics_active = 0, metrics_deleted = 0; + bool live_retention = true, currently_collected = false; + if(dictionary_entries(ri->rrdmetrics) > 0) { + RRDMETRIC *rm; + dfe_start_read((DICTIONARY *)ri->rrdmetrics, rm) { + if(unlikely(netdata_exit)) break; + + RRD_FLAGS reason_to_pass = reason; + if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; + + rrdmetric_process_updates(rm, force, reason_to_pass, worker_jobs); + + if(unlikely(!rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION))) + live_retention = false; + + if (unlikely((rrdmetric_should_be_deleted(rm)))) { + metrics_deleted++; + continue; + } + + if(!currently_collected && rrd_flag_check(rm, RRD_FLAG_COLLECTED) && rm->first_time_t) + currently_collected = true; + + metrics_active++; + + if (rm->first_time_t && rm->first_time_t < min_first_time_t) + min_first_time_t = rm->first_time_t; + + if (rm->last_time_t && rm->last_time_t > max_last_time_t) + max_last_time_t = rm->last_time_t; + } + dfe_done(rm); + } + + if(unlikely(live_retention && !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + rrd_flag_set(ri, RRD_FLAG_LIVE_RETENTION); + else if(unlikely(!live_retention && rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + rrd_flag_clear(ri, RRD_FLAG_LIVE_RETENTION); + + if(unlikely(!metrics_active)) { + // no metrics available + + if(ri->first_time_t) { + ri->first_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(ri->last_time_t) { + ri->last_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + // we have active metrics... + + if (unlikely(min_first_time_t == LONG_MAX)) + min_first_time_t = 0; + + if (unlikely(min_first_time_t == 0 || max_last_time_t == 0)) { + if(ri->first_time_t) { + ri->first_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(ri->last_time_t) { + ri->last_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(live_retention)) + rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + rrd_flag_clear(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + if (unlikely(ri->first_time_t != min_first_time_t)) { + ri->first_time_t = min_first_time_t; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (unlikely(ri->last_time_t != max_last_time_t)) { + ri->last_time_t = max_last_time_t; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(currently_collected)) + rrd_flag_set_collected(ri); + else + rrd_flag_set_archived(ri); + } + } + + rrd_flag_unset_updated(ri); +} + +static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(rc, reason); + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_CONTEXT); + + size_t min_priority = LONG_MAX; + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + size_t instances_active = 0, instances_deleted = 0; + bool live_retention = true, currently_collected = false, hidden = true; + if(dictionary_entries(rc->rrdinstances) > 0) { + RRDINSTANCE *ri; + dfe_start_reentrant(rc->rrdinstances, ri) { + if(unlikely(netdata_exit)) break; + + RRD_FLAGS reason_to_pass = reason; + if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; + + rrdinstance_post_process_updates(ri, force, reason_to_pass, worker_jobs); + + if(unlikely(hidden && !rrd_flag_check(ri, RRD_FLAG_HIDDEN))) + hidden = false; + + if(unlikely(live_retention && !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + live_retention = false; + + if (unlikely(rrdinstance_should_be_deleted(ri))) { + instances_deleted++; + continue; + } + + if(unlikely(!currently_collected && rrd_flag_is_collected(ri) && ri->first_time_t)) + currently_collected = true; + + internal_error(rc->units != ri->units, + "RRDCONTEXT: '%s' rrdinstance '%s' has different units, context '%s', instance '%s'", + string2str(rc->id), string2str(ri->id), + string2str(rc->units), string2str(ri->units)); + + instances_active++; + + if (ri->priority >= RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY && ri->priority < min_priority) + min_priority = ri->priority; + + if (ri->first_time_t && ri->first_time_t < min_first_time_t) + min_first_time_t = ri->first_time_t; + + if (ri->last_time_t && ri->last_time_t > max_last_time_t) + max_last_time_t = ri->last_time_t; + } + dfe_done(ri); + } + + { + bool previous_hidden = rrd_flag_check(rc, RRD_FLAG_HIDDEN); + if (hidden != previous_hidden) { + if (hidden && !rrd_flag_check(rc, RRD_FLAG_HIDDEN)) + rrd_flag_set(rc, RRD_FLAG_HIDDEN); + else if (!hidden && rrd_flag_check(rc, RRD_FLAG_HIDDEN)) + rrd_flag_clear(rc, RRD_FLAG_HIDDEN); + } + + bool previous_live_retention = rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION); + if (live_retention != previous_live_retention) { + if (live_retention && !rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrd_flag_set(rc, RRD_FLAG_LIVE_RETENTION); + else if (!live_retention && rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrd_flag_clear(rc, RRD_FLAG_LIVE_RETENTION); + } + } + + rrdcontext_lock(rc); + rc->pp.executions++; + + if(unlikely(!instances_active)) { + // we had some instances, but they are gone now... + + if(rc->first_time_t) { + rc->first_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(rc->last_time_t) { + rc->last_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + // we have some active instances... + + if (unlikely(min_first_time_t == LONG_MAX)) + min_first_time_t = 0; + + if (unlikely(min_first_time_t == 0 && max_last_time_t == 0)) { + if(rc->first_time_t) { + rc->first_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(rc->last_time_t) { + rc->last_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + rrd_flag_clear(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + if (unlikely(rc->first_time_t != min_first_time_t)) { + rc->first_time_t = min_first_time_t; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (rc->last_time_t != max_last_time_t) { + rc->last_time_t = max_last_time_t; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(currently_collected)) + rrd_flag_set_collected(rc); + else + rrd_flag_set_archived(rc); + } + + if (min_priority != LONG_MAX && rc->priority != min_priority) { + rc->priority = min_priority; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } + + if(unlikely(rrd_flag_is_updated(rc) && rc->rrdhost->rrdctx_hub_queue)) { + if(check_if_cloud_version_changed_unsafe(rc, false)) { + rc->version = rrdcontext_get_next_version(rc); + dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_hub_queue, + string2str(rc->id), rc, sizeof(*rc)); + } + } + + rrd_flag_unset_updated(rc); + rrdcontext_unlock(rc); +} + +static void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function __maybe_unused, RRD_FLAGS flags __maybe_unused) { + if(unlikely(!rc->rrdhost->rrdctx_post_processing_queue)) return; + + if(!rrd_flag_check(rc, RRD_FLAG_QUEUED_FOR_PP)) { + dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_post_processing_queue, + string2str(rc->id), + rc, + sizeof(*rc)); + +#if(defined(NETDATA_INTERNAL_CHECKS) && defined(LOG_POST_PROCESSING_QUEUE_INSERTIONS)) + { + BUFFER *wb_flags = buffer_create(1000); + rrd_flags_to_buffer(flags, wb_flags); + + BUFFER *wb_reasons = buffer_create(1000); + rrd_reasons_to_buffer(flags, wb_reasons); + + internal_error(true, "RRDCONTEXT: '%s' update triggered by function %s(), due to flags: %s, reasons: %s", + string2str(rc->id), function, + buffer_tostring(wb_flags), + buffer_tostring(wb_reasons)); + + buffer_free(wb_reasons); + buffer_free(wb_flags); + } +#endif + } +} + +static void rrdcontext_dequeue_from_post_processing(RRDCONTEXT *rc) { + if(unlikely(!rc->rrdhost->rrdctx_post_processing_queue)) return; + dictionary_del((DICTIONARY *)rc->rrdhost->rrdctx_post_processing_queue, string2str(rc->id)); +} + +static void rrdcontext_post_process_queued_contexts(RRDHOST *host) { + if(unlikely(!host->rrdctx_post_processing_queue)) return; + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx_post_processing_queue, rc) { + if(unlikely(netdata_exit)) break; + + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_post_process_updates(rc, false, RRD_FLAG_NONE, true); + } + dfe_done(rc); +} + +// ---------------------------------------------------------------------------- +// dispatching contexts to cloud + +static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc) { + time_t now = now_realtime_sec(); + uint64_t version = MAX(rc->version, rc->hub.version); + version = MAX((uint64_t)now, version); + version++; + return version; +} + +static void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused) { + + // save it, so that we know the last version we sent to hub + rc->version = rc->hub.version = rrdcontext_get_next_version(rc); + rc->hub.id = string2str(rc->id); + rc->hub.title = string2str(rc->title); + rc->hub.units = string2str(rc->units); + rc->hub.family = string2str(rc->family); + rc->hub.chart_type = rrdset_type_name(rc->chart_type); + rc->hub.priority = rc->priority; + rc->hub.first_time_t = rc->first_time_t; + rc->hub.last_time_t = rrd_flag_is_collected(rc) ? 0 : rc->last_time_t; + rc->hub.deleted = rrd_flag_is_deleted(rc) ? true : false; + +#ifdef ENABLE_ACLK + struct context_updated message = { + .id = rc->hub.id, + .version = rc->hub.version, + .title = rc->hub.title, + .units = rc->hub.units, + .family = rc->hub.family, + .chart_type = rc->hub.chart_type, + .priority = rc->hub.priority, + .first_entry = rc->hub.first_time_t, + .last_entry = rc->hub.last_time_t, + .deleted = rc->hub.deleted, + }; + + if(likely(!rrd_flag_check(rc, RRD_FLAG_HIDDEN))) { + if (snapshot) { + if (!rc->hub.deleted) + contexts_snapshot_add_ctx_update(bundle, &message); + } + else + contexts_updated_add_ctx_update(bundle, &message); + } +#endif + + // store it to SQL + + if(rrd_flag_is_deleted(rc)) + rrdcontext_delete_from_sql_unsafe(rc); + + else if (ctx_store_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + error("RRDCONTEXT: failed to save context '%s' version %"PRIu64" to SQL.", rc->hub.id, rc->hub.version); +} + +static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused) { + bool id_changed = false, + title_changed = false, + units_changed = false, + family_changed = false, + chart_type_changed = false, + priority_changed = false, + first_time_changed = false, + last_time_changed = false, + deleted_changed = false; + + RRD_FLAGS flags = rrd_flags_get(rc); + + if(unlikely(string2str(rc->id) != rc->hub.id)) + id_changed = true; + + if(unlikely(string2str(rc->title) != rc->hub.title)) + title_changed = true; + + if(unlikely(string2str(rc->units) != rc->hub.units)) + units_changed = true; + + if(unlikely(string2str(rc->family) != rc->hub.family)) + family_changed = true; + + if(unlikely(rrdset_type_name(rc->chart_type) != rc->hub.chart_type)) + chart_type_changed = true; + + if(unlikely(rc->priority != rc->hub.priority)) + priority_changed = true; + + if(unlikely((uint64_t)rc->first_time_t != rc->hub.first_time_t)) + first_time_changed = true; + + if(unlikely((uint64_t)((flags & RRD_FLAG_COLLECTED) ? 0 : rc->last_time_t) != rc->hub.last_time_t)) + last_time_changed = true; + + if(unlikely(((flags & RRD_FLAG_DELETED) ? true : false) != rc->hub.deleted)) + deleted_changed = true; + + if(unlikely(id_changed || title_changed || units_changed || family_changed || chart_type_changed || priority_changed || first_time_changed || last_time_changed || deleted_changed)) { + + internal_error(LOG_TRANSITIONS, + "RRDCONTEXT: %s NEW VERSION '%s'%s of host '%s', version %"PRIu64", title '%s'%s, units '%s'%s, family '%s'%s, chart type '%s'%s, priority %u%s, first_time_t %ld%s, last_time_t %ld%s, deleted '%s'%s, (queued for %llu ms, expected %llu ms)", + sending?"SENDING":"QUEUE", + string2str(rc->id), id_changed ? " (CHANGED)" : "", + rrdhost_hostname(rc->rrdhost), + rc->version, + string2str(rc->title), title_changed ? " (CHANGED)" : "", + string2str(rc->units), units_changed ? " (CHANGED)" : "", + string2str(rc->family), family_changed ? " (CHANGED)" : "", + rrdset_type_name(rc->chart_type), chart_type_changed ? " (CHANGED)" : "", + rc->priority, priority_changed ? " (CHANGED)" : "", + rc->first_time_t, first_time_changed ? " (CHANGED)" : "", + (flags & RRD_FLAG_COLLECTED) ? 0 : rc->last_time_t, last_time_changed ? " (CHANGED)" : "", + (flags & RRD_FLAG_DELETED) ? "true" : "false", deleted_changed ? " (CHANGED)" : "", + sending ? (now_realtime_usec() - rc->queue.queued_ut) / USEC_PER_MS : 0, + sending ? (rc->queue.scheduled_dispatch_ut - rc->queue.queued_ut) / USEC_PER_MS : 0 + ); + + return true; + } + + return false; +} + +static inline usec_t rrdcontext_calculate_queued_dispatch_time_ut(RRDCONTEXT *rc, usec_t now_ut) { + + if(likely(rc->queue.delay_calc_ut >= rc->queue.queued_ut)) + return rc->queue.scheduled_dispatch_ut; + + RRD_FLAGS flags = rc->queue.queued_flags; + + usec_t delay = LONG_MAX; + int i; + struct rrdcontext_reason *reason; + for(i = 0, reason = &rrdcontext_reasons[i]; reason->name ; reason = &rrdcontext_reasons[++i]) { + if(unlikely(flags & reason->flag)) { + if(reason->delay_ut < delay) + delay = reason->delay_ut; + } + } + + if(unlikely(delay == LONG_MAX)) { + internal_error(true, "RRDCONTEXT: '%s', cannot find minimum delay of flags %x", string2str(rc->id), (unsigned int)flags); + delay = 60 * USEC_PER_SEC; + } + + rc->queue.delay_calc_ut = now_ut; + usec_t dispatch_ut = rc->queue.scheduled_dispatch_ut = rc->queue.queued_ut + delay; + return dispatch_ut; +} + +static void rrdcontext_dequeue_from_hub_queue(RRDCONTEXT *rc) { + dictionary_del((DICTIONARY *)rc->rrdhost->rrdctx_hub_queue, string2str(rc->id)); +} + +static void rrdcontext_dispatch_queued_contexts_to_hub(RRDHOST *host, usec_t now_ut) { + + // check if we have received a streaming command for this host + if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS) || !aclk_connected || !host->rrdctx_hub_queue) + return; + + // check if there are queued items to send + if(!dictionary_entries((DICTIONARY *)host->rrdctx_hub_queue)) + return; + + if(!host->node_id) + return; + + size_t messages_added = 0; + contexts_updated_t bundle = NULL; + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx_hub_queue, rc) { + if(unlikely(netdata_exit)) break; + + if(unlikely(messages_added >= MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST)) + break; + + worker_is_busy(WORKER_JOB_QUEUED); + usec_t dispatch_ut = rrdcontext_calculate_queued_dispatch_time_ut(rc, now_ut); + char *claim_id = get_agent_claimid(); + + if(unlikely(now_ut >= dispatch_ut) && claim_id) { + worker_is_busy(WORKER_JOB_CHECK); + + rrdcontext_lock(rc); + + if(check_if_cloud_version_changed_unsafe(rc, true)) { + worker_is_busy(WORKER_JOB_SEND); + +#ifdef ENABLE_ACLK + if(!bundle) { + // prepare the bundle to send the messages + char uuid[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, uuid); + + bundle = contexts_updated_new(claim_id, uuid, 0, now_ut); + } +#endif + // update the hub data of the context, give a new version, pack the message + // and save an update to SQL + rrdcontext_message_send_unsafe(rc, false, bundle); + messages_added++; + + rc->queue.dispatches++; + rc->queue.dequeued_ut = now_ut; + } + else + rc->version = rc->hub.version; + + // remove it from the queue + worker_is_busy(WORKER_JOB_DEQUEUE); + rrdcontext_dequeue_from_hub_queue(rc); + + if(unlikely(rrdcontext_should_be_deleted(rc))) { + // this is a deleted context - delete it forever... + + worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_delete_from_sql_unsafe(rc); + + STRING *id = string_dup(rc->id); + rrdcontext_unlock(rc); + + // delete it from the master dictionary + if(!dictionary_del((DICTIONARY *)host->rrdctx, string2str(rc->id))) + error("RRDCONTEXT: '%s' of host '%s' failed to be deleted from rrdcontext dictionary.", + string2str(id), rrdhost_hostname(host)); + + string_freez(id); + } + else + rrdcontext_unlock(rc); + } + freez(claim_id); + } + dfe_done(rc); + +#ifdef ENABLE_ACLK + if(!netdata_exit && bundle) { + // we have a bundle to send messages + + // update the version hash + contexts_updated_update_version_hash(bundle, rrdcontext_version_hash(host)); + + // send it + aclk_send_contexts_updated(bundle); + } + else if(bundle) + contexts_updated_delete(bundle); +#endif + +} + +// ---------------------------------------------------------------------------- +// worker thread + +static void rrdcontext_main_cleanup(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + // custom code + worker_unregister(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *rrdcontext_main(void *ptr) { + netdata_thread_cleanup_push(rrdcontext_main_cleanup, ptr); + + worker_register("RRDCONTEXT"); + worker_register_job_name(WORKER_JOB_HOSTS, "hosts"); + worker_register_job_name(WORKER_JOB_CHECK, "dedup checks"); + worker_register_job_name(WORKER_JOB_SEND, "sent contexts"); + worker_register_job_name(WORKER_JOB_DEQUEUE, "deduplicated contexts"); + worker_register_job_name(WORKER_JOB_RETENTION, "metrics retention"); + worker_register_job_name(WORKER_JOB_QUEUED, "queued contexts"); + worker_register_job_name(WORKER_JOB_CLEANUP, "cleanups"); + worker_register_job_name(WORKER_JOB_CLEANUP_DELETE, "deletes"); + worker_register_job_name(WORKER_JOB_PP_METRIC, "check metrics"); + worker_register_job_name(WORKER_JOB_PP_INSTANCE, "check instances"); + worker_register_job_name(WORKER_JOB_PP_CONTEXT, "check contexts"); + + worker_register_job_custom_metric(WORKER_JOB_HUB_QUEUE_SIZE, "hub queue size", "contexts", WORKER_METRIC_ABSOLUTE); + worker_register_job_custom_metric(WORKER_JOB_PP_QUEUE_SIZE, "post processing queue size", "contexts", WORKER_METRIC_ABSOLUTE); + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = RRDCONTEXT_WORKER_THREAD_HEARTBEAT_USEC; + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + if(unlikely(netdata_exit)) break; + + usec_t now_ut = now_realtime_usec(); + + if(rrdcontext_next_db_rotation_ut && now_ut > rrdcontext_next_db_rotation_ut) { + rrdcontext_recalculate_retention_all_hosts(); + rrdcontext_garbage_collect_for_all_hosts(); + rrdcontext_next_db_rotation_ut = 0; + } + + size_t hub_queued_contexts_for_all_hosts = 0; + size_t pp_queued_contexts_for_all_hosts = 0; + + rrd_rdlock(); + RRDHOST *host; + rrdhost_foreach_read(host) { + if(unlikely(netdata_exit)) break; + + worker_is_busy(WORKER_JOB_HOSTS); + + if(host->rrdctx_post_processing_queue) { + pp_queued_contexts_for_all_hosts += + dictionary_entries((DICTIONARY *)host->rrdctx_post_processing_queue); + rrdcontext_post_process_queued_contexts(host); + } + + if(host->rrdctx_hub_queue) { + hub_queued_contexts_for_all_hosts += dictionary_entries((DICTIONARY *)host->rrdctx_hub_queue); + rrdcontext_dispatch_queued_contexts_to_hub(host, now_ut); + } + } + rrd_unlock(); + + worker_set_metric(WORKER_JOB_HUB_QUEUE_SIZE, (NETDATA_DOUBLE)hub_queued_contexts_for_all_hosts); + worker_set_metric(WORKER_JOB_PP_QUEUE_SIZE, (NETDATA_DOUBLE)pp_queued_contexts_for_all_hosts); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/database/rrdcontext.h b/database/rrdcontext.h new file mode 100644 index 0000000..67e6cf3 --- /dev/null +++ b/database/rrdcontext.h @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCONTEXT_H +#define NETDATA_RRDCONTEXT_H 1 + +// ---------------------------------------------------------------------------- +// RRDMETRIC + +typedef struct rrdmetric_acquired RRDMETRIC_ACQUIRED; + +// ---------------------------------------------------------------------------- +// RRDINSTANCE + +typedef struct rrdinstance_acquired RRDINSTANCE_ACQUIRED; + +// ---------------------------------------------------------------------------- +// RRDCONTEXT + +typedef struct rrdcontexts_dictionary RRDCONTEXTS; +typedef struct rrdcontext_acquired RRDCONTEXT_ACQUIRED; + +// ---------------------------------------------------------------------------- + +#include "rrd.h" + +const char *rrdmetric_acquired_id(RRDMETRIC_ACQUIRED *rma); +const char *rrdmetric_acquired_name(RRDMETRIC_ACQUIRED *rma); +NETDATA_DOUBLE rrdmetric_acquired_last_stored_value(RRDMETRIC_ACQUIRED *rma); + +const char *rrdinstance_acquired_id(RRDINSTANCE_ACQUIRED *ria); +const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria); +DICTIONARY *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria); +DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria); + +// ---------------------------------------------------------------------------- +// public API for rrdhost + +void rrdhost_load_rrdcontext_data(RRDHOST *host); +void rrdhost_create_rrdcontexts(RRDHOST *host); +void rrdhost_destroy_rrdcontexts(RRDHOST *host); + +void rrdcontext_host_child_connected(RRDHOST *host); +void rrdcontext_host_child_disconnected(RRDHOST *host); + +int rrdcontext_foreach_instance_with_rrdset_in_context(RRDHOST *host, const char *context, int (*callback)(RRDSET *st, void *data), void *data); + +typedef enum { + RRDCONTEXT_OPTION_NONE = 0, + RRDCONTEXT_OPTION_SHOW_METRICS = (1 << 0), + RRDCONTEXT_OPTION_SHOW_INSTANCES = (1 << 1), + RRDCONTEXT_OPTION_SHOW_LABELS = (1 << 2), + RRDCONTEXT_OPTION_SHOW_QUEUED = (1 << 3), + RRDCONTEXT_OPTION_SHOW_FLAGS = (1 << 4), + RRDCONTEXT_OPTION_SHOW_DELETED = (1 << 5), + RRDCONTEXT_OPTION_DEEPSCAN = (1 << 6), + RRDCONTEXT_OPTION_SHOW_UUIDS = (1 << 7), + RRDCONTEXT_OPTION_SHOW_HIDDEN = (1 << 8), + RRDCONTEXT_OPTION_SKIP_ID = (1 << 31), // internal use +} RRDCONTEXT_TO_JSON_OPTIONS; + +#define RRDCONTEXT_OPTIONS_ALL (RRDCONTEXT_OPTION_SHOW_METRICS|RRDCONTEXT_OPTION_SHOW_INSTANCES|RRDCONTEXT_OPTION_SHOW_LABELS|RRDCONTEXT_OPTION_SHOW_QUEUED|RRDCONTEXT_OPTION_SHOW_FLAGS|RRDCONTEXT_OPTION_SHOW_DELETED|RRDCONTEXT_OPTION_SHOW_UUIDS|RRDCONTEXT_OPTION_SHOW_HIDDEN) + +int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); +int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); + +// ---------------------------------------------------------------------------- +// public API for rrdcontexts + +const char *rrdcontext_acquired_id(RRDCONTEXT_ACQUIRED *rca); + +// ---------------------------------------------------------------------------- +// public API for rrddims + +void rrdcontext_updated_rrddim(RRDDIM *rd); +void rrdcontext_removed_rrddim(RRDDIM *rd); +void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd); +void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd); +void rrdcontext_updated_rrddim_divisor(RRDDIM *rd); +void rrdcontext_updated_rrddim_flags(RRDDIM *rd); +void rrdcontext_collected_rrddim(RRDDIM *rd); +int rrdcontext_find_dimension_uuid(RRDSET *st, const char *id, uuid_t *store_uuid); + +// ---------------------------------------------------------------------------- +// public API for rrdsets + +void rrdcontext_updated_rrdset(RRDSET *st); +void rrdcontext_removed_rrdset(RRDSET *st); +void rrdcontext_updated_rrdset_name(RRDSET *st); +void rrdcontext_updated_rrdset_flags(RRDSET *st); +void rrdcontext_updated_retention_rrdset(RRDSET *st); +void rrdcontext_collected_rrdset(RRDSET *st); +int rrdcontext_find_chart_uuid(RRDSET *st, uuid_t *store_uuid); + +// ---------------------------------------------------------------------------- +// public API for ACLK + +void rrdcontext_hub_checkpoint_command(void *cmd); +void rrdcontext_hub_stop_streaming_command(void *cmd); + + +// ---------------------------------------------------------------------------- +// public API for threads + +void rrdcontext_db_rotation(void); +void *rrdcontext_main(void *); + +// ---------------------------------------------------------------------------- +// public API for weights + +struct metric_entry { + RRDCONTEXT_ACQUIRED *rca; + RRDINSTANCE_ACQUIRED *ria; + RRDMETRIC_ACQUIRED *rma; +}; + +DICTIONARY *rrdcontext_all_metrics_to_dict(RRDHOST *host, SIMPLE_PATTERN *contexts); + +// ---------------------------------------------------------------------------- +// public API for queries + +typedef struct query_metric { + struct query_metric_tier { + struct storage_engine *eng; + STORAGE_METRIC_HANDLE *db_metric_handle; + time_t db_first_time_t; // the oldest timestamp available for this tier + time_t db_last_time_t; // the latest timestamp available for this tier + time_t db_update_every; // latest update every for this tier + } tiers[RRD_STORAGE_TIERS]; + + struct { + RRDHOST *host; + RRDCONTEXT_ACQUIRED *rca; + RRDINSTANCE_ACQUIRED *ria; + RRDMETRIC_ACQUIRED *rma; + } link; + + struct { + STRING *id; + STRING *name; + RRDR_DIMENSION_FLAGS options; + } dimension; + + struct { + STRING *id; + STRING *name; + } chart; + +} QUERY_METRIC; + +#define MAX_QUERY_TARGET_ID_LENGTH 255 + +typedef struct query_target_request { + RRDHOST *host; // the host to be queried (can be NULL, hosts will be used) + RRDCONTEXT_ACQUIRED *rca; // the context to be queried (can be NULL) + RRDINSTANCE_ACQUIRED *ria; // the instance to be queried (can be NULL) + RRDMETRIC_ACQUIRED *rma; // the metric to be queried (can be NULL) + RRDSET *st; // the chart to be queried (NULL, for context queries) + const char *hosts; // hosts simple pattern + const char *contexts; // contexts simple pattern (context queries) + const char *charts; // charts simple pattern (for context queries) + const char *dimensions; // dimensions simple pattern + const char *chart_label_key; // select only the chart having this label key + const char *charts_labels_filter; // select only the charts having this combo of label key:value + time_t after; // the requested timeframe + time_t before; // the requested timeframe + size_t points; // the requested number of points + time_t timeout; // the timeout of the query in seconds + uint32_t format; // DATASOURCE_FORMAT + RRDR_OPTIONS options; + RRDR_GROUPING group_method; + const char *group_options; + time_t resampling_time; + size_t tier; + QUERY_SOURCE query_source; +} QUERY_TARGET_REQUEST; + +typedef struct query_target { + char id[MAX_QUERY_TARGET_ID_LENGTH + 1]; // query identifier (for logging) + QUERY_TARGET_REQUEST request; + + bool used; // when true, this query is currently being used + size_t queries; // how many query we have done so far + + struct { + bool relative; // true when the request made with relative timestamps, true if it was absolute + bool aligned; + time_t after; // the absolute timestamp this query is about + time_t before; // the absolute timestamp this query is about + time_t query_granularity; + size_t points; // the number of points the query will return (maybe different from the request) + size_t group; + RRDR_GROUPING group_method; + const char *group_options; + size_t resampling_group; + NETDATA_DOUBLE resampling_divisor; + RRDR_OPTIONS options; + size_t tier; + } window; + + struct { + time_t first_time_t; // the combined first_time_t of all metrics in the query, across all tiers + time_t last_time_t; // the combined last_time_T of all metrics in the query, across all tiers + time_t minimum_latest_update_every; // the min update every of the metrics in the query + } db; + + struct { + QUERY_METRIC *array; // the metrics to be queried (all of them should be queried, no exceptions) + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } query; + + struct { + RRDMETRIC_ACQUIRED **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + } metrics; + + struct { + RRDINSTANCE_ACQUIRED **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + SIMPLE_PATTERN *chart_label_key_pattern; + SIMPLE_PATTERN *charts_labels_filter_pattern; + } instances; + + struct { + RRDCONTEXT_ACQUIRED **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } contexts; + + struct { + RRDHOST **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } hosts; + +} QUERY_TARGET; + +void query_target_free(void); +void query_target_release(QUERY_TARGET *qt); + +QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr); + +#endif // NETDATA_RRDCONTEXT_H + diff --git a/database/rrddim.c b/database/rrddim.c new file mode 100644 index 0000000..2d909a7 --- /dev/null +++ b/database/rrddim.c @@ -0,0 +1,756 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" +#include "storage_engine.h" + +// ---------------------------------------------------------------------------- +// RRDDIM index + +struct rrddim_constructor { + RRDSET *st; + const char *id; + const char *name; + collected_number multiplier; + collected_number divisor; + RRD_ALGORITHM algorithm; + RRD_MEMORY_MODE memory_mode; + + enum { + RRDDIM_REACT_NONE = 0, + RRDDIM_REACT_NEW = (1 << 0), + RRDDIM_REACT_UPDATED = (1 << 2), + } react_action; + +}; + +// isolated call to appear +// separate in statistics +static void *rrddim_alloc_db(size_t entries) { + return callocz(entries, sizeof(storage_number)); +} + +static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *constructor_data) { + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + RRDHOST *host = st->rrdhost; + + rd->flags = RRDDIM_FLAG_NONE; + + rd->id = string_strdupz(ctr->id); + rd->name = (ctr->name && *ctr->name)?rrd_string_strdupz(ctr->name):string_dup(rd->id); + + rd->algorithm = ctr->algorithm; + rd->multiplier = ctr->multiplier; + rd->divisor = ctr->divisor; + if(!rd->divisor) rd->divisor = 1; + + rd->update_every = st->update_every; + + rd->rrdset = st; + + if(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)) + rd->collections_counter = 1; + + if(ctr->memory_mode == RRD_MEMORY_MODE_MAP || ctr->memory_mode == RRD_MEMORY_MODE_SAVE) { + if(!rrddim_memory_load_or_create_map_save(st, rd, ctr->memory_mode)) { + info("Failed to use memory mode %s for chart '%s', dimension '%s', falling back to ram", (ctr->memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", rrdset_name(st), rrddim_name(rd)); + ctr->memory_mode = RRD_MEMORY_MODE_RAM; + } + } + + if(ctr->memory_mode == RRD_MEMORY_MODE_RAM) { + size_t entries = st->entries; + if(!entries) entries = 5; + + rd->db = netdata_mmap(NULL, entries * sizeof(storage_number), MAP_PRIVATE, 1); + if(!rd->db) { + info("Failed to use memory mode ram for chart '%s', dimension '%s', falling back to alloc", rrdset_name(st), rrddim_name(rd)); + ctr->memory_mode = RRD_MEMORY_MODE_ALLOC; + } + else rd->memsize = entries * sizeof(storage_number); + } + + if(ctr->memory_mode == RRD_MEMORY_MODE_ALLOC || ctr->memory_mode == RRD_MEMORY_MODE_NONE) { + size_t entries = st->entries; + if(entries < 5) entries = 5; + + rd->db = rrddim_alloc_db(entries); + rd->memsize = entries * sizeof(storage_number); + } + + rd->rrd_memory_mode = ctr->memory_mode; + + if (unlikely(rrdcontext_find_dimension_uuid(st, rrddim_id(rd), &(rd->metric_uuid)))) { + uuid_generate(rd->metric_uuid); + bool found_in_sql = false; (void)found_in_sql; + +// bool found_in_sql = true; +// if(unlikely(sql_find_dimension_uuid(st, rd, &rd->metric_uuid))) { +// found_in_sql = false; +// uuid_generate(rd->metric_uuid); +// } + +#ifdef NETDATA_INTERNAL_CHECKS + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(rd->metric_uuid, uuid_str); + error_report("Dimension UUID for host %s chart [%s] dimension [%s] not found in context. It is now set to %s (%s)", + string2str(host->hostname), + string2str(st->name), + string2str(rd->name), + uuid_str, found_in_sql ? "found in sqlite" : "newly generated"); +#endif + } + + // initialize the db tiers + { + size_t initialized = 0; + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = host->db[tier].eng; + rd->tiers[tier] = callocz(1, sizeof(struct rrddim_tier)); + rd->tiers[tier]->tier_grouping = host->db[tier].tier_grouping; + rd->tiers[tier]->collect_ops = &eng->api.collect_ops; + rd->tiers[tier]->query_ops = &eng->api.query_ops; + rd->tiers[tier]->db_metric_handle = eng->api.metric_get_or_create(rd, host->db[tier].instance); + storage_point_unset(rd->tiers[tier]->virtual_point); + initialized++; + + // internal_error(true, "TIER GROUPING of chart '%s', dimension '%s' for tier %d is set to %d", rd->rrdset->name, rd->name, tier, rd->tiers[tier]->tier_grouping); + } + + if(!initialized) + error("Failed to initialize all db tiers for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); + + if(!rd->tiers[0]) + error("Failed to initialize the first db tier for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); + } + + // initialize data collection for all tiers + { + size_t initialized = 0; + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (rd->tiers[tier]) { + rd->tiers[tier]->db_collection_handle = rd->tiers[tier]->collect_ops->init(rd->tiers[tier]->db_metric_handle, st->rrdhost->db[tier].tier_grouping * st->update_every, rd->rrdset->storage_metrics_groups[tier]); + initialized++; + } + } + + if(!initialized) + error("Failed to initialize data collection for all db tiers for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); + } + + if(rrdset_number_of_dimensions(st) != 0) { + RRDDIM *td; + dfe_start_write(st->rrddim_root_index, td) { + if(!td) break; + } + dfe_done(td); + + if(td && (td->algorithm != rd->algorithm || ABS(td->multiplier) != ABS(rd->multiplier) || ABS(td->divisor) != ABS(rd->divisor))) { + if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { +#ifdef NETDATA_INTERNAL_CHECKS + info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", + rrddim_name(rd), + rrdset_name(st), + rrdhost_hostname(host), + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(td->algorithm), + rd->multiplier, td->multiplier, + rd->divisor, td->divisor + ); +#endif + rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); + } + } + } + + rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + + // let the chart resync + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + ml_new_dimension(rd); + + ctr->react_action = RRDDIM_REACT_NEW; + + internal_error(false, "RRDDIM: inserted dimension '%s' of chart '%s' of host '%s'", + rrddim_name(rd), rrdset_name(st), rrdhost_hostname(st->rrdhost)); + +} + +static void rrddim_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *rrdset) { + RRDDIM *rd = rrddim; + RRDSET *st = rrdset; + RRDHOST *host = st->rrdhost; + + internal_error(false, "RRDDIM: deleting dimension '%s' of chart '%s' of host '%s'", + rrddim_name(rd), rrdset_name(st), rrdhost_hostname(host)); + + rrdcontext_removed_rrddim(rd); + + ml_delete_dimension(rd); + + debug(D_RRD_CALLS, "rrddim_free() %s.%s", rrdset_name(st), rrddim_name(rd)); + + size_t tiers_available = 0, tiers_said_no_retention = 0; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(rd->tiers[tier] && rd->tiers[tier]->db_collection_handle) { + tiers_available++; + + if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) + tiers_said_no_retention++; + + rd->tiers[tier]->db_collection_handle = NULL; + } + } + + if (tiers_available == tiers_said_no_retention && tiers_said_no_retention && rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); + } + + rrddimvar_delete_all(rd); + + // free(rd->annotations); + //#ifdef ENABLE_ACLK + // if (!netdata_exit) + // aclk_send_dimension_update(rd); + //#endif + + // this will free MEMORY_MODE_SAVE and MEMORY_MODE_MAP structures + rrddim_memory_file_free(rd); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(!rd->tiers[tier]) continue; + + STORAGE_ENGINE* eng = host->db[tier].eng; + eng->api.metric_release(rd->tiers[tier]->db_metric_handle); + + freez(rd->tiers[tier]); + rd->tiers[tier] = NULL; + } + + if(rd->db) { + if(rd->rrd_memory_mode == RRD_MEMORY_MODE_RAM) + netdata_munmap(rd->db, rd->memsize); + else + freez(rd->db); + } + + string_freez(rd->id); + string_freez(rd->name); +} + +static bool rrddim_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *new_rrddim, void *constructor_data) { + (void)new_rrddim; // it is NULL + + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + + ctr->react_action = RRDDIM_REACT_NONE; + + int rc = rrddim_reset_name(st, rd, ctr->name); + rc += rrddim_set_algorithm(st, rd, ctr->algorithm); + rc += rrddim_set_multiplier(st, rd, ctr->multiplier); + rc += rrddim_set_divisor(st, rd, ctr->divisor); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (rd->tiers[tier] && !rd->tiers[tier]->db_collection_handle) + rd->tiers[tier]->db_collection_handle = + rd->tiers[tier]->collect_ops->init(rd->tiers[tier]->db_metric_handle, st->rrdhost->db[tier].tier_grouping * st->update_every, rd->rrdset->storage_metrics_groups[tier]); + } + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_ARCHIVED); + + rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + } + + if(unlikely(rc)) + ctr->react_action = RRDDIM_REACT_UPDATED; + + return ctr->react_action == RRDDIM_REACT_UPDATED; +} + +static void rrddim_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *constructor_data) { + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + + if(ctr->react_action & (RRDDIM_REACT_UPDATED | RRDDIM_REACT_NEW)) { + rrddim_flag_set(rd, RRDDIM_FLAG_METADATA_UPDATE); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_METADATA_UPDATE); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + if(ctr->react_action == RRDDIM_REACT_UPDATED) { + // the chart needs to be updated to the parent + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + } + + rrdcontext_updated_rrddim(rd); +} + +void rrddim_index_init(RRDSET *st) { + if(!st->rrddim_root_index) { + st->rrddim_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(st->rrddim_root_index, rrddim_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrddim_root_index, rrddim_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrddim_root_index, rrddim_delete_callback, st); + dictionary_register_react_callback(st->rrddim_root_index, rrddim_react_callback, st); + } +} + +void rrddim_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrddim_root_index); + st->rrddim_root_index = NULL; +} + +static inline RRDDIM *rrddim_index_find(RRDSET *st, const char *id) { + return dictionary_get(st->rrddim_root_index, id); +} + +// ---------------------------------------------------------------------------- +// RRDDIM - find a dimension + +inline RRDDIM *rrddim_find(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", rrdset_name(st), id); + + return rrddim_index_find(st, id); +} + +inline RRDDIM_ACQUIRED *rrddim_find_and_acquire(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", rrdset_name(st), id); + + return (RRDDIM_ACQUIRED *)dictionary_get_and_acquire_item(st->rrddim_root_index, id); +} + +RRDDIM *rrddim_acquired_to_rrddim(RRDDIM_ACQUIRED *rda) { + if(unlikely(!rda)) + return NULL; + + return (RRDDIM *) dictionary_acquired_item_value((const DICTIONARY_ITEM *)rda); +} + +void rrddim_acquired_release(RRDDIM_ACQUIRED *rda) { + if(unlikely(!rda)) + return; + + RRDDIM *rd = rrddim_acquired_to_rrddim(rda); + dictionary_acquired_item_release(rd->rrdset->rrddim_root_index, (const DICTIONARY_ITEM *)rda); +} + +// This will not return dimensions that are archived +RRDDIM *rrddim_find_active(RRDSET *st, const char *id) { + RRDDIM *rd = rrddim_find(st, id); + + if (unlikely(rd && rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) + return NULL; + + return rd; +} + +// ---------------------------------------------------------------------------- +// RRDDIM rename a dimension + +inline int rrddim_reset_name(RRDSET *st, RRDDIM *rd, const char *name) { + if(unlikely(!name || !*name || !strcmp(rrddim_name(rd), name))) + return 0; + + debug(D_RRD_CALLS, "rrddim_reset_name() from %s.%s to %s.%s", rrdset_name(st), rrddim_name(rd), rrdset_name(st), name); + + STRING *old = rd->name; + rd->name = rrd_string_strdupz(name); + string_freez(old); + + rrddimvar_rename_all(rd); + + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + return 1; +} + +inline int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm) { + if(unlikely(rd->algorithm == algorithm)) + return 0; + + debug(D_RRD_CALLS, "Updating algorithm of dimension '%s/%s' from %s to %s", rrdset_id(st), rrddim_name(rd), rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm)); + rd->algorithm = algorithm; + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_algorithm(rd); + return 1; +} + +inline int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier) { + if(unlikely(rd->multiplier == multiplier)) + return 0; + + debug(D_RRD_CALLS, "Updating multiplier of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, rrdset_id(st), rrddim_name(rd), rd->multiplier, multiplier); + rd->multiplier = multiplier; + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_multiplier(rd); + return 1; +} + +inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor) { + if(unlikely(rd->divisor == divisor)) + return 0; + + debug(D_RRD_CALLS, "Updating divisor of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, rrdset_id(st), rrddim_name(rd), rd->divisor, divisor); + rd->divisor = divisor; + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_divisor(rd); + return 1; +} + +// ---------------------------------------------------------------------------- + +// get the timestamp of the last entry in the round-robin database +time_t rrddim_last_entry_t(RRDDIM *rd) { + time_t latest = rd->tiers[0]->query_ops->latest_time(rd->tiers[0]->db_metric_handle); + + for(size_t tier = 1; tier < storage_tiers ;tier++) { + if(unlikely(!rd->tiers[tier])) continue; + + time_t t = rd->tiers[tier]->query_ops->latest_time(rd->tiers[tier]->db_metric_handle); + if(t > latest) + latest = t; + } + + return latest; +} + +time_t rrddim_first_entry_t_of_tier(RRDDIM *rd, size_t tier) { + if(unlikely(tier > storage_tiers || !rd->tiers[tier])) + return 0; + + return rd->tiers[tier]->query_ops->oldest_time(rd->tiers[tier]->db_metric_handle); +} + +time_t rrddim_first_entry_t(RRDDIM *rd) { + time_t oldest = 0; + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + time_t t = rrddim_first_entry_t_of_tier(rd, tier); + if(t != 0 && (oldest == 0 || t < oldest)) + oldest = t; + } + + return oldest; +} + +RRDDIM *rrddim_add_custom(RRDSET *st + , const char *id + , const char *name + , collected_number multiplier + , collected_number divisor + , RRD_ALGORITHM algorithm + , RRD_MEMORY_MODE memory_mode + ) { + struct rrddim_constructor tmp = { + .st = st, + .id = id, + .name = name, + .multiplier = multiplier, + .divisor = divisor, + .algorithm = algorithm, + .memory_mode = memory_mode, + }; + + RRDDIM *rd = dictionary_set_advanced(st->rrddim_root_index, tmp.id, -1, NULL, sizeof(RRDDIM), &tmp); + return(rd); +} + +// ---------------------------------------------------------------------------- +// RRDDIM remove / free a dimension + +void rrddim_free(RRDSET *st, RRDDIM *rd) { + dictionary_del(st->rrddim_root_index, string2str(rd->id)); +} + + +// ---------------------------------------------------------------------------- +// RRDDIM - set dimension options + +int rrddim_hide(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_hide() for chart %s, dimension %s", rrdset_name(st), id); + + RRDHOST *host = st->rrdhost; + + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); + return 1; + } + if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN); + metaqueue_dimension_update_flags(rd); + } + + rrddim_option_set(rd, RRDDIM_OPTION_HIDDEN); + rrdcontext_updated_rrddim_flags(rd); + return 0; +} + +int rrddim_unhide(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_unhide() for chart %s, dimension %s", rrdset_name(st), id); + + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); + return 1; + } + if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); + metaqueue_dimension_update_flags(rd); + } + + rrddim_option_clear(rd, RRDDIM_OPTION_HIDDEN); + rrdcontext_updated_rrddim_flags(rd); + return 0; +} + +inline void rrddim_is_obsolete(RRDSET *st, RRDDIM *rd) { + debug(D_RRD_CALLS, "rrddim_is_obsolete() for chart %s, dimension %s", rrdset_name(st), rrddim_name(rd)); + + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) { + info("Cannot obsolete already archived dimension %s from chart %s", rrddim_name(rd), rrdset_name(st)); + return; + } + rrddim_flag_set(rd, RRDDIM_FLAG_OBSOLETE); + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + rrdcontext_updated_rrddim_flags(rd); +} + +inline void rrddim_isnot_obsolete(RRDSET *st __maybe_unused, RRDDIM *rd) { + debug(D_RRD_CALLS, "rrddim_isnot_obsolete() for chart %s, dimension %s", rrdset_name(st), rrddim_name(rd)); + + rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); + rrdcontext_updated_rrddim_flags(rd); +} + +// ---------------------------------------------------------------------------- +// RRDDIM - collect values for a dimension + +inline collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) { + struct timeval now; + now_realtime_timeval(&now); + + return rrddim_timed_set_by_pointer(st, rd, now, value); +} + +collected_number rrddim_timed_set_by_pointer(RRDSET *st __maybe_unused, RRDDIM *rd, struct timeval collected_time, collected_number value) { + debug(D_RRD_CALLS, "rrddim_set_by_pointer() for chart %s, dimension %s, value " COLLECTED_NUMBER_FORMAT, rrdset_name(st), rrddim_name(rd), value); + + rd->last_collected_time = collected_time; + rd->collected_value = value; + rd->updated = 1; + rd->collections_counter++; + + collected_number v = (value >= 0) ? value : -value; + if (unlikely(v > rd->collected_value_max)) + rd->collected_value_max = v; + + return rd->last_collected_value; +} + + +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) { + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); + return 0; + } + + return rrddim_set_by_pointer(st, rd, value); +} + + +// ---------------------------------------------------------------------------- +// compatibility layer for RRDDIM files v019 + +#define RRDDIMENSION_MAGIC_V019 "NETDATA RRD DIMENSION FILE V019" + +struct avl_element_v019 { + void *avl_link[2]; + signed char avl_balance; +}; + +struct rrddim_map_save_v019 { + struct avl_element_v019 avl; // ignored + void *id; // ignored + void *name; // ignored + uint32_t algorithm; // print warning on mismatch - update on load + uint32_t rrd_memory_mode; // ignored + long long multiplier; // print warning on mismatch - update on load + long long divisor; // print warning on mismatch - update on load + uint32_t flags; // ignored + uint32_t hash; // ignored + uint32_t hash_name; // ignored + void *cache_filename; // ignored - we use it to keep the filename to save back + size_t collections_counter; // ignored + void *state; // ignored + size_t unused[8]; // ignored + long long collected_value_max; // ignored + unsigned int updated:1; // ignored + unsigned int exposed:1; // ignored + struct timeval last_collected_time; // check to reset all - ignored after load + long double calculated_value; // ignored + long double last_calculated_value; // ignored + long double last_stored_value; // ignored + long long collected_value; // ignored + long long last_collected_value; // load and save + long double collected_volume; // ignored + long double stored_volume; // ignored + void *next; // ignored + void *rrdset; // ignored + long entries; // check to reset all - update on load + int update_every; // check to reset all - update on load + size_t memsize; // check to reset all - update on load + char magic[sizeof(RRDDIMENSION_MAGIC_V019) + 1];// check to reset all - update on load + void *variables; // ignored + storage_number values[]; // the array of values +}; + +size_t rrddim_memory_file_header_size(void) { + return sizeof(struct rrddim_map_save_v019); +} + +void rrddim_memory_file_update(RRDDIM *rd) { + if(!rd || !rd->rd_on_file) return; + struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; + + rd_on_file->last_collected_time.tv_sec = rd->last_collected_time.tv_sec; + rd_on_file->last_collected_time.tv_usec = rd->last_collected_time.tv_usec; + rd_on_file->last_collected_value = rd->last_collected_value; +} + +void rrddim_memory_file_free(RRDDIM *rd) { + if(!rd || !rd->rd_on_file) return; + + // needed for memory mode map, to save the latest state + rrddim_memory_file_update(rd); + + struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; + freez(rd_on_file->cache_filename); + netdata_munmap(rd_on_file, rd_on_file->memsize); + + // remove the pointers from the RRDDIM + rd->rd_on_file = NULL; + rd->db = NULL; +} + +const char *rrddim_cache_filename(RRDDIM *rd) { + if(!rd || !rd->rd_on_file) return NULL; + struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; + return rd_on_file->cache_filename; +} + +void rrddim_memory_file_save(RRDDIM *rd) { + if(!rd || !rd->rd_on_file) return; + + rrddim_memory_file_update(rd); + + struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; + if(rd_on_file->rrd_memory_mode != RRD_MEMORY_MODE_SAVE) return; + + memory_file_save(rd_on_file->cache_filename, rd_on_file, rd_on_file->memsize); +} + +bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MODE memory_mode) { + if(memory_mode != RRD_MEMORY_MODE_SAVE && memory_mode != RRD_MEMORY_MODE_MAP) + return false; + + struct rrddim_map_save_v019 *rd_on_file = NULL; + + unsigned long size = sizeof(struct rrddim_map_save_v019) + (st->entries * sizeof(storage_number)); + + char filename[FILENAME_MAX + 1]; + char fullfilename[FILENAME_MAX + 1]; + rrdset_strncpyz_name(filename, rrddim_id(rd), FILENAME_MAX); + snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename); + + rd_on_file = (struct rrddim_map_save_v019 *)netdata_mmap(fullfilename, size, + ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE), 1); + + if(unlikely(!rd_on_file)) return false; + + struct timeval now; + now_realtime_timeval(&now); + + int reset = 0; + rd_on_file->magic[sizeof(RRDDIMENSION_MAGIC_V019)] = '\0'; + if(strcmp(rd_on_file->magic, RRDDIMENSION_MAGIC_V019) != 0) { + info("Initializing file %s.", fullfilename); + memset(rd_on_file, 0, size); + reset = 1; + } + else if(rd_on_file->memsize != size) { + error("File %s does not have the desired size, expected %lu but found %lu. Clearing it.", fullfilename, size, (unsigned long int) rd_on_file->memsize); + memset(rd_on_file, 0, size); + reset = 1; + } + else if(rd_on_file->update_every != st->update_every) { + error("File %s does not have the same update frequency, expected %d but found %d. Clearing it.", fullfilename, st->update_every, rd_on_file->update_every); + memset(rd_on_file, 0, size); + reset = 1; + } + else if(dt_usec(&now, &rd_on_file->last_collected_time) > (rd_on_file->entries * rd_on_file->update_every * USEC_PER_SEC)) { + info("File %s is too old (last collected %llu seconds ago, but the database is %ld seconds). Clearing it.", fullfilename, dt_usec(&now, &rd_on_file->last_collected_time) / USEC_PER_SEC, rd_on_file->entries * rd_on_file->update_every); + memset(rd_on_file, 0, size); + reset = 1; + } + + if(!reset) { + rd->last_collected_value = rd_on_file->last_collected_value; + + if(rd_on_file->algorithm != rd->algorithm) + info("File %s does not have the expected algorithm (expected %u '%s', found %u '%s'). Previous values may be wrong.", + fullfilename, rd->algorithm, rrd_algorithm_name(rd->algorithm), rd_on_file->algorithm, rrd_algorithm_name(rd_on_file->algorithm)); + + if(rd_on_file->multiplier != rd->multiplier) + info("File %s does not have the expected multiplier (expected " COLLECTED_NUMBER_FORMAT ", found " COLLECTED_NUMBER_FORMAT "). Previous values may be wrong.", fullfilename, rd->multiplier, rd_on_file->multiplier); + + if(rd_on_file->divisor != rd->divisor) + info("File %s does not have the expected divisor (expected " COLLECTED_NUMBER_FORMAT ", found " COLLECTED_NUMBER_FORMAT "). Previous values may be wrong.", fullfilename, rd->divisor, rd_on_file->divisor); + } + + // zero the entire header + memset(rd_on_file, 0, sizeof(struct rrddim_map_save_v019)); + + // set the important fields + strcpy(rd_on_file->magic, RRDDIMENSION_MAGIC_V019); + rd_on_file->algorithm = rd->algorithm; + rd_on_file->multiplier = rd->multiplier; + rd_on_file->divisor = rd->divisor; + rd_on_file->entries = st->entries; + rd_on_file->update_every = rd->update_every; + rd_on_file->memsize = size; + rd_on_file->rrd_memory_mode = memory_mode; + rd_on_file->cache_filename = strdupz(fullfilename); + + rd->db = &rd_on_file->values[0]; + rd->rd_on_file = rd_on_file; + rd->memsize = size; + rrddim_memory_file_update(rd); + + return true; +} diff --git a/database/rrddimvar.c b/database/rrddimvar.c new file mode 100644 index 0000000..449ceeb --- /dev/null +++ b/database/rrddimvar.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +typedef struct rrddimvar { + struct rrddim *rrddim; + + STRING *prefix; + STRING *suffix; + void *value; + + const RRDVAR_ACQUIRED *rrdvar_local_dim_id; + const RRDVAR_ACQUIRED *rrdvar_local_dim_name; + + const RRDVAR_ACQUIRED *rrdvar_family_id; + const RRDVAR_ACQUIRED *rrdvar_family_name; + const RRDVAR_ACQUIRED *rrdvar_family_context_dim_id; + const RRDVAR_ACQUIRED *rrdvar_family_context_dim_name; + + const RRDVAR_ACQUIRED *rrdvar_host_chart_id_dim_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_id_dim_name; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name_dim_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name_dim_name; + + RRDVAR_FLAGS flags:24; + RRDVAR_TYPE type:8; +} RRDDIMVAR; + +// ---------------------------------------------------------------------------- +// RRDDIMVAR management +// DIMENSION VARIABLES + +#define RRDDIMVAR_ID_MAX 1024 + +static inline void rrddimvar_free_variables_unsafe(RRDDIMVAR *rs) { + RRDDIM *rd = rs->rrddim; + RRDSET *st = rd->rrdset; + RRDHOST *host = st->rrdhost; + + // CHART VARIABLES FOR THIS DIMENSION + + if(st->rrdvars) { + rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local_dim_id); + rs->rrdvar_local_dim_id = NULL; + + rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local_dim_name); + rs->rrdvar_local_dim_name = NULL; + } + + // FAMILY VARIABLES FOR THIS DIMENSION + + if(st->rrdfamily) { + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_id); + rs->rrdvar_family_id = NULL; + + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_name); + rs->rrdvar_family_name = NULL; + + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_context_dim_id); + rs->rrdvar_family_context_dim_id = NULL; + + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_context_dim_name); + rs->rrdvar_family_context_dim_name = NULL; + } + + // HOST VARIABLES FOR THIS DIMENSION + + if(host->rrdvars && host->health_enabled) { + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id_dim_id); + rs->rrdvar_host_chart_id_dim_id = NULL; + + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id_dim_name); + rs->rrdvar_host_chart_id_dim_name = NULL; + + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name_dim_id); + rs->rrdvar_host_chart_name_dim_id = NULL; + + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name_dim_name); + rs->rrdvar_host_chart_name_dim_name = NULL; + } +} + +static inline void rrddimvar_update_variables_unsafe(RRDDIMVAR *rs) { + rrddimvar_free_variables_unsafe(rs); + + RRDDIM *rd = rs->rrddim; + RRDSET *st = rd->rrdset; + RRDHOST *host = st->rrdhost; + + char buffer[RRDDIMVAR_ID_MAX + 1]; + + // KEYS + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", string2str(rs->prefix), rrddim_id(rd), string2str(rs->suffix)); + STRING *key_dim_id = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", string2str(rs->prefix), rrddim_name(rd), string2str(rs->suffix)); + STRING *key_dim_name = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_id(st), string2str(key_dim_id)); + STRING *key_chart_id_dim_id = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_id(st), string2str(key_dim_name)); + STRING *key_chart_id_dim_name = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_context(st), string2str(key_dim_id)); + STRING *key_context_dim_id = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_context(st), string2str(key_dim_name)); + STRING *key_context_dim_name = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_name(st), string2str(key_dim_id)); + STRING *key_chart_name_dim_id = string_strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_name(st), string2str(key_dim_name)); + STRING *key_chart_name_dim_name = string_strdupz(buffer); + + // CHART VARIABLES FOR THIS DIMENSION + // ----------------------------------- + // + // dimensions are available as: + // - $id + // - $name + + if(st->rrdvars) { + rs->rrdvar_local_dim_id = rrdvar_add_and_acquire("local", st->rrdvars, key_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_local_dim_name = rrdvar_add_and_acquire("local", st->rrdvars, key_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + } + + // FAMILY VARIABLES FOR THIS DIMENSION + // ----------------------------------- + // + // dimensions are available as: + // - $id (only the first, when multiple overlap) + // - $name (only the first, when multiple overlap) + // - $chart-context.id + // - $chart-context.name + + if(st->rrdfamily) { + rs->rrdvar_family_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_family_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_family_context_dim_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_context_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_family_context_dim_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_context_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + } + + // HOST VARIABLES FOR THIS DIMENSION + // ----------------------------------- + // + // dimensions are available as: + // - $chart-id.id + // - $chart-id.name + // - $chart-name.id + // - $chart-name.name + + if(host->rrdvars && host->health_enabled) { + rs->rrdvar_host_chart_id_dim_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_host_chart_id_dim_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_host_chart_name_dim_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_host_chart_name_dim_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + } + + // free the keys + + string_freez(key_dim_id); + string_freez(key_dim_name); + string_freez(key_chart_id_dim_id); + string_freez(key_chart_id_dim_name); + string_freez(key_context_dim_id); + string_freez(key_context_dim_name); + string_freez(key_chart_name_dim_id); + string_freez(key_chart_name_dim_name); +} + +struct rrddimvar_constructor { + RRDDIM *rrddim; + const char *prefix; + const char *suffix; + void *value; + RRDVAR_FLAGS flags :16; + RRDVAR_TYPE type:8; +}; + +static void rrddimvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *constructor_data) { + RRDDIMVAR *rs = rrddimvar; + struct rrddimvar_constructor *ctr = constructor_data; + + if(!ctr->prefix) ctr->prefix = ""; + if(!ctr->suffix) ctr->suffix = ""; + + rs->prefix = string_strdupz(ctr->prefix); + rs->suffix = string_strdupz(ctr->suffix); + + rs->type = ctr->type; + rs->value = ctr->value; + rs->flags = ctr->flags; + rs->rrddim = ctr->rrddim; + + rrddimvar_update_variables_unsafe(rs); +} + +static bool rrddimvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *new_rrddimvar __maybe_unused, void *constructor_data __maybe_unused) { + RRDDIMVAR *rs = rrddimvar; + rrddimvar_update_variables_unsafe(rs); + + return true; +} + +static void rrddimvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *rrdset __maybe_unused) { + RRDDIMVAR *rs = rrddimvar; + rrddimvar_free_variables_unsafe(rs); + string_freez(rs->prefix); + string_freez(rs->suffix); +} + +void rrddimvar_index_init(RRDSET *st) { + if(!st->rrddimvar_root_index) { + st->rrddimvar_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(st->rrddimvar_root_index, rrddimvar_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrddimvar_root_index, rrddimvar_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrddimvar_root_index, rrddimvar_delete_callback, st); + } +} + +void rrddimvar_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrddimvar_root_index); + st->rrddimvar_root_index = NULL; +} + +void rrddimvar_add_and_leave_released(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_FLAGS flags) { + if(!prefix) prefix = ""; + if(!suffix) suffix = ""; + + char key[RRDDIMVAR_ID_MAX + 1]; + size_t key_len = snprintfz(key, RRDDIMVAR_ID_MAX, "%s_%s_%s", prefix, rrddim_id(rd), suffix); + + struct rrddimvar_constructor tmp = { + .suffix = suffix, + .prefix = prefix, + .type = type, + .flags = flags, + .value = value, + .rrddim = rd + }; + dictionary_set_advanced(rd->rrdset->rrddimvar_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDDIMVAR), &tmp); +} + +void rrddimvar_rename_all(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + + debug(D_VARIABLES, "RRDDIMVAR rename for chart id '%s' name '%s', dimension id '%s', name '%s'", rrdset_id(st), rrdset_name(st), rrddim_id(rd), rrddim_name(rd)); + + RRDDIMVAR *rs; + dfe_start_write(st->rrddimvar_root_index, rs) { + if(unlikely(rs->rrddim == rd)) + rrddimvar_update_variables_unsafe(rs); + } + dfe_done(rs); +} + +void rrddimvar_delete_all(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + + debug(D_VARIABLES, "RRDDIMVAR delete for chart id '%s' name '%s', dimension id '%s', name '%s'", rrdset_id(st), rrdset_name(st), rrddim_id(rd), rrddim_name(rd)); + + RRDDIMVAR *rs; + dfe_start_write(st->rrddimvar_root_index, rs) { + if(unlikely(rs->rrddim == rd)) + dictionary_del(st->rrddimvar_root_index, rs_dfe.name); + } + dfe_done(rs); +} diff --git a/database/rrddimvar.h b/database/rrddimvar.h new file mode 100644 index 0000000..a803ea7 --- /dev/null +++ b/database/rrddimvar.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDDIMVAR_H +#define NETDATA_RRDDIMVAR_H 1 + +#include "rrd.h" + +// variables linked to individual dimensions +// We link variables to point the values that are already +// calculated / processed by the normal data collection process +// This means, there will be no speed penalty for using +// these variables + +void rrddimvar_rename_all(RRDDIM *rd); +void rrddimvar_add_and_leave_released(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_FLAGS flags); +void rrddimvar_delete_all(RRDDIM *rd); + +void rrddimvar_index_init(RRDSET *st); +void rrddimvar_index_destroy(RRDSET *st); + +#endif //NETDATA_RRDDIMVAR_H diff --git a/database/rrdfamily.c b/database/rrdfamily.c new file mode 100644 index 0000000..e7d1536 --- /dev/null +++ b/database/rrdfamily.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +typedef struct rrdfamily { + STRING *family; + DICTIONARY *rrdvars; +} RRDFAMILY; + +// ---------------------------------------------------------------------------- +// RRDFAMILY index + +struct rrdfamily_constructor { + const char *family; +}; + +static void rrdfamily_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdfamily, void *constructor_data) { + RRDFAMILY *rf = rrdfamily; + struct rrdfamily_constructor *ctr = constructor_data; + + rf->family = string_strdupz(ctr->family); + rf->rrdvars = rrdvariables_create(); +} + +static void rrdfamily_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdfamily, void *rrdhost __maybe_unused) { + RRDFAMILY *rf = rrdfamily; + string_freez(rf->family); + rrdvariables_destroy(rf->rrdvars); + rf->family = NULL; + rf->rrdvars = NULL; +} + +void rrdfamily_index_init(RRDHOST *host) { + if(!host->rrdfamily_root_index) { + host->rrdfamily_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdfamily_root_index, rrdfamily_insert_callback, NULL); + dictionary_register_delete_callback(host->rrdfamily_root_index, rrdfamily_delete_callback, host); + } +} + +void rrdfamily_index_destroy(RRDHOST *host) { + dictionary_destroy(host->rrdfamily_root_index); + host->rrdfamily_root_index = NULL; +} + + +// ---------------------------------------------------------------------------- +// RRDFAMILY management + +const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id) { + struct rrdfamily_constructor tmp = { + .family = id, + }; + return (const RRDFAMILY_ACQUIRED *)dictionary_set_and_acquire_item_advanced(host->rrdfamily_root_index, id, -1, NULL, sizeof(RRDFAMILY), &tmp); +} + +void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa) { + if(unlikely(!rfa)) return; + dictionary_acquired_item_release(host->rrdfamily_root_index, (const DICTIONARY_ITEM *)rfa); +} + +DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rfa) { + if(unlikely(!rfa)) return NULL; + RRDFAMILY *rf = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rfa); + return(rf->rrdvars); +} diff --git a/database/rrdfunctions.c b/database/rrdfunctions.c new file mode 100644 index 0000000..fb847a3 --- /dev/null +++ b/database/rrdfunctions.c @@ -0,0 +1,758 @@ +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +#define MAX_FUNCTION_LENGTH (PLUGINSD_LINE_MAX - 512) // we need some space for the rest of the line + +static unsigned char functions_allowed_chars[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = ' ', // Horizontal Tab + [10] = ' ', // Line Feed + [11] = ' ', // Vertical Tab + [12] = ' ', // Form Feed + [13] = ' ', // Carriage Return + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = ' ', // SPACE keep + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '_', // ( + [41] = '_', // ) + [42] = '_', // * + [43] = '_', // + + [44] = ',', // , keep + [45] = '-', // - keep + [46] = '.', // . keep + [47] = '/', // / keep + [48] = '0', // 0 keep + [49] = '1', // 1 keep + [50] = '2', // 2 keep + [51] = '3', // 3 keep + [52] = '4', // 4 keep + [53] = '5', // 5 keep + [54] = '6', // 6 keep + [55] = '7', // 7 keep + [56] = '8', // 8 keep + [57] = '9', // 9 keep + [58] = ':', // : keep + [59] = ':', // ; convert ; to : + [60] = '_', // < + [61] = ':', // = convert = to : + [62] = '_', // > + [63] = '_', // ? + [64] = '_', // @ + [65] = 'A', // A keep + [66] = 'B', // B keep + [67] = 'C', // C keep + [68] = 'D', // D keep + [69] = 'E', // E keep + [70] = 'F', // F keep + [71] = 'G', // G keep + [72] = 'H', // H keep + [73] = 'I', // I keep + [74] = 'J', // J keep + [75] = 'K', // K keep + [76] = 'L', // L keep + [77] = 'M', // M keep + [78] = 'N', // N keep + [79] = 'O', // O keep + [80] = 'P', // P keep + [81] = 'Q', // Q keep + [82] = 'R', // R keep + [83] = 'S', // S keep + [84] = 'T', // T keep + [85] = 'U', // U keep + [86] = 'V', // V keep + [87] = 'W', // W keep + [88] = 'X', // X keep + [89] = 'Y', // Y keep + [90] = 'Z', // Z keep + [91] = '_', // [ + [92] = '/', // backslash convert \ to / + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ keep + [96] = '_', // ` + [97] = 'a', // a keep + [98] = 'b', // b keep + [99] = 'c', // c keep + [100] = 'd', // d keep + [101] = 'e', // e keep + [102] = 'f', // f keep + [103] = 'g', // g keep + [104] = 'h', // h keep + [105] = 'i', // i keep + [106] = 'j', // j keep + [107] = 'k', // k keep + [108] = 'l', // l keep + [109] = 'm', // m keep + [110] = 'n', // n keep + [111] = 'o', // o keep + [112] = 'p', // p keep + [113] = 'q', // q keep + [114] = 'r', // r keep + [115] = 's', // s keep + [116] = 't', // t keep + [117] = 'u', // u keep + [118] = 'v', // v keep + [119] = 'w', // w keep + [120] = 'x', // x keep + [121] = 'y', // y keep + [122] = 'z', // z keep + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +static inline size_t sanitize_function_text(char *dst, const char *src, size_t dst_len) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len, + functions_allowed_chars, true, "", NULL); +} + +// we keep a dictionary per RRDSET with these functions +// the dictionary is created on demand (only when a function is added to an RRDSET) + +typedef enum { + RRD_FUNCTION_LOCAL = (1 << 0), + RRD_FUNCTION_GLOBAL = (1 << 1), + + // this is 8-bit +} RRD_FUNCTION_OPTIONS; + +struct rrd_collector_function { + bool sync; // when true, the function is called synchronously + uint8_t options; // RRD_FUNCTION_OPTIONS + STRING *help; + int timeout; // the default timeout of the function + + int (*function)(BUFFER *wb, int timeout, const char *function, void *collector_data, + function_data_ready_callback callback, void *callback_data); + + void *collector_data; + struct rrd_collector *collector; +}; + +// Each function points to this collector structure +// so that when the collector exits, all of them will +// be invalidated (running == false) +// The last function that is using this collector +// frees the structure too (or when the collector calls +// rrdset_collector_finished()). + +struct rrd_collector { + int32_t refcount; + pid_t tid; + bool running; +}; + +// Each thread that adds RRDSET functions, has to call +// rrdset_collector_started() and rrdset_collector_finished() +// to create the collector structure. + +static __thread struct rrd_collector *thread_rrd_collector = NULL; + +static void rrd_collector_free(struct rrd_collector *rdc) { + int32_t expected = 0; + if(likely(!__atomic_compare_exchange_n(&rdc->refcount, &expected, -1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))) { + // the collector is still referenced by charts. + // leave it hanging there, the last chart will actually free it. + return; + } + + // we can free it now + freez(rdc); +} + +// called once per collector +void rrd_collector_started(void) { + if(likely(thread_rrd_collector)) return; + + thread_rrd_collector = callocz(1, sizeof(struct rrd_collector)); + thread_rrd_collector->tid = gettid(); + thread_rrd_collector->running = true; +} + +// called once per collector +void rrd_collector_finished(void) { + if(!thread_rrd_collector) + return; + + thread_rrd_collector->running = false; + rrd_collector_free(thread_rrd_collector); + thread_rrd_collector = NULL; +} + +static struct rrd_collector *rrd_collector_acquire(void) { + __atomic_add_fetch(&thread_rrd_collector->refcount, 1, __ATOMIC_SEQ_CST); + return thread_rrd_collector; +} + +static void rrd_collector_release(struct rrd_collector *rdc) { + if(unlikely(!rdc)) return; + + int32_t refcount = __atomic_sub_fetch(&rdc->refcount, 1, __ATOMIC_SEQ_CST); + if(refcount == 0 && !rdc->running) + rrd_collector_free(rdc); +} + +static void rrd_functions_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, + void *rrdhost __maybe_unused) { + struct rrd_collector_function *rdcf = func; + + if(!thread_rrd_collector) + fatal("RRDSET_COLLECTOR: called %s() for function '%s' without calling rrd_collector_started() first.", + __FUNCTION__, dictionary_acquired_item_name(item)); + + rdcf->collector = rrd_collector_acquire(); +} + +static void rrd_functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, + void *rrdhost __maybe_unused) { + struct rrd_collector_function *rdcf = func; + rrd_collector_release(rdcf->collector); +} + +static bool rrd_functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, + void *new_func __maybe_unused, void *rrdhost __maybe_unused) { + struct rrd_collector_function *rdcf = func; + struct rrd_collector_function *new_rdcf = new_func; + + if(!thread_rrd_collector) + fatal("RRDSET_COLLECTOR: called %s() for function '%s' without calling rrd_collector_started() first.", + __FUNCTION__, dictionary_acquired_item_name(item)); + + bool changed = false; + + if(rdcf->collector != thread_rrd_collector) { + struct rrd_collector *old_rdc = rdcf->collector; + rdcf->collector = rrd_collector_acquire(); + rrd_collector_release(old_rdc); + changed = true; + } + + if(rdcf->function != new_rdcf->function) { + rdcf->function = new_rdcf->function; + changed = true; + } + + if(rdcf->help != new_rdcf->help) { + STRING *old = rdcf->help; + rdcf->help = new_rdcf->help; + string_freez(old); + changed = true; + } + else + string_freez(new_rdcf->help); + + if(rdcf->timeout != new_rdcf->timeout) { + rdcf->timeout = new_rdcf->timeout; + changed = true; + } + + if(rdcf->sync != new_rdcf->sync) { + rdcf->sync = new_rdcf->sync; + changed = true; + } + + if(rdcf->collector_data != new_rdcf->collector_data) { + rdcf->collector_data = new_rdcf->collector_data; + changed = true; + } + + return changed; +} + + +void rrdfunctions_init(RRDHOST *host) { + if(host->functions) return; + + host->functions = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(host->functions, rrd_functions_insert_callback, host); + dictionary_register_delete_callback(host->functions, rrd_functions_delete_callback, host); + dictionary_register_conflict_callback(host->functions, rrd_functions_conflict_callback, host); +} + +void rrdfunctions_destroy(RRDHOST *host) { + dictionary_destroy(host->functions); +} + +void rrd_collector_add_function(RRDHOST *host, RRDSET *st, const char *name, int timeout, const char *help, + bool sync, function_execute_at_collector function, void *collector_data) { + + // RRDSET *st may be NULL in this function + // to create a GLOBAL function + + if(st && !st->functions_view) + st->functions_view = dictionary_create_view(host->functions); + + char key[PLUGINSD_LINE_MAX + 1]; + sanitize_function_text(key, name, PLUGINSD_LINE_MAX); + + struct rrd_collector_function tmp = { + .sync = sync, + .timeout = timeout, + .options = (st)?RRD_FUNCTION_LOCAL:RRD_FUNCTION_GLOBAL, + .function = function, + .collector_data = collector_data, + .help = string_strdupz(help), + }; + const DICTIONARY_ITEM *item = dictionary_set_and_acquire_item(host->functions, key, &tmp, sizeof(tmp)); + + if(st) + dictionary_view_set(st->functions_view, key, item); + + dictionary_acquired_item_release(host->functions, item); +} + +void rrd_functions_expose_rrdpush(RRDSET *st, BUFFER *wb) { + if(!st->functions_view) + return; + + struct rrd_collector_function *tmp; + dfe_start_read(st->functions_view, tmp) { + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " \"%s\" %d \"%s\"\n" + , tmp_dfe.name + , tmp->timeout + , string2str(tmp->help) + ); + } + dfe_done(tmp); +} + +struct rrd_function_call_wait { + bool free_with_signal; + bool data_are_ready; + netdata_mutex_t mutex; + pthread_cond_t cond; + int code; +}; + +static void rrd_function_call_wait_free(struct rrd_function_call_wait *tmp) { + pthread_cond_destroy(&tmp->cond); + netdata_mutex_destroy(&tmp->mutex); + freez(tmp); +} + +struct { + const char *format; + uint8_t content_type; +} function_formats[] = { + { .format = "application/json", CT_APPLICATION_JSON }, + { .format = "text/plain", CT_TEXT_PLAIN }, + { .format = "application/xml", CT_APPLICATION_XML }, + { .format = "prometheus", CT_PROMETHEUS }, + { .format = "text", CT_TEXT_PLAIN }, + { .format = "txt", CT_TEXT_PLAIN }, + { .format = "json", CT_APPLICATION_JSON }, + { .format = "html", CT_TEXT_HTML }, + { .format = "text/html", CT_TEXT_HTML }, + { .format = "xml", CT_APPLICATION_XML }, + + // terminator + { .format = NULL, CT_TEXT_PLAIN }, +}; + +uint8_t functions_format_to_content_type(const char *format) { + if(format && *format) { + for (int i = 0; function_formats[i].format; i++) + if (strcmp(function_formats[i].format, format) == 0) + return function_formats[i].content_type; + } + + return CT_TEXT_PLAIN; +} + +const char *functions_content_type_to_format(uint8_t content_type) { + for (int i = 0; function_formats[i].format; i++) + if (function_formats[i].content_type == content_type) + return function_formats[i].format; + + return "text/plain"; +} + +int rrd_call_function_error(BUFFER *wb, const char *msg, int code) { + char buffer[PLUGINSD_LINE_MAX]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + buffer_flush(wb); + buffer_sprintf(wb, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + wb->contenttype = CT_APPLICATION_JSON; + buffer_no_cacheable(wb); + return code; +} + +static int rrd_call_function_find(RRDHOST *host, BUFFER *wb, const char *name, size_t key_length, struct rrd_collector_function **rdcf) { + char buffer[MAX_FUNCTION_LENGTH + 1]; + + strncpyz(buffer, name, MAX_FUNCTION_LENGTH); + char *s = NULL; + + *rdcf = NULL; + while(!(*rdcf) && buffer[0]) { + *rdcf = dictionary_get(host->functions, buffer); + if(*rdcf) break; + + // if s == NULL, set it to the end of the buffer + // this should happen only the first time + if(unlikely(!s)) + s = &buffer[key_length - 1]; + + // skip a word from the end + while(s >= buffer && !isspace(*s)) *s-- = '\0'; + + // skip all spaces + while(s >= buffer && isspace(*s)) *s-- = '\0'; + } + + buffer_flush(wb); + + if(!(*rdcf)) + return rrd_call_function_error(wb, "No collector is supplying this function on this host at this time.", HTTP_RESP_NOT_FOUND); + + if(!(*rdcf)->collector->running) + return rrd_call_function_error(wb, "The collector that registered this function, is not currently running.", HTTP_RESP_BACKEND_FETCH_FAILED); + + return HTTP_RESP_OK; +} + +static void rrd_call_function_signal_when_ready(BUFFER *temp_wb __maybe_unused, int code, void *callback_data) { + struct rrd_function_call_wait *tmp = callback_data; + bool we_should_free = false; + + netdata_mutex_lock(&tmp->mutex); + + // since we got the mutex, + // the waiting thread is either in pthread_cond_timedwait() + // or gave up and left. + + tmp->code = code; + tmp->data_are_ready = true; + + if(tmp->free_with_signal) + we_should_free = true; + + pthread_cond_signal(&tmp->cond); + + netdata_mutex_unlock(&tmp->mutex); + + if(we_should_free) { + buffer_free(temp_wb); + rrd_function_call_wait_free(tmp); + } +} + +int rrd_call_function_and_wait(RRDHOST *host, BUFFER *wb, int timeout, const char *name) { + int code; + + struct rrd_collector_function *rdcf = NULL; + + char key[PLUGINSD_LINE_MAX + 1]; + size_t key_length = sanitize_function_text(key, name, PLUGINSD_LINE_MAX); + code = rrd_call_function_find(host, wb, key, key_length, &rdcf); + if(code != HTTP_RESP_OK) + return code; + + if(timeout <= 0) + timeout = rdcf->timeout; + + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + tp.tv_sec += (time_t)timeout; + + if(rdcf->sync) { + code = rdcf->function(wb, timeout, key, rdcf->collector_data, NULL, NULL); + } + else { + struct rrd_function_call_wait *tmp = mallocz(sizeof(struct rrd_function_call_wait)); + tmp->free_with_signal = false; + tmp->data_are_ready = false; + netdata_mutex_init(&tmp->mutex); + pthread_cond_init(&tmp->cond, NULL); + + bool we_should_free = true; + BUFFER *temp_wb = buffer_create(PLUGINSD_LINE_MAX + 1); // we need it because we may give up on it + temp_wb->contenttype = wb->contenttype; + code = rdcf->function(temp_wb, timeout, key, rdcf->collector_data, rrd_call_function_signal_when_ready, tmp); + if (code == HTTP_RESP_OK) { + netdata_mutex_lock(&tmp->mutex); + + int rc = 0; + while (rc == 0 && !tmp->data_are_ready) { + // the mutex is unlocked within pthread_cond_timedwait() + rc = pthread_cond_timedwait(&tmp->cond, &tmp->mutex, &tp); + // the mutex is again ours + } + + if (tmp->data_are_ready) { + // we have a response + buffer_fast_strcat(wb, buffer_tostring(temp_wb), buffer_strlen(temp_wb)); + wb->contenttype = temp_wb->contenttype; + wb->expires = temp_wb->expires; + + if(wb->expires) + buffer_cacheable(wb); + else + buffer_no_cacheable(wb); + + code = tmp->code; + } + else if (rc == ETIMEDOUT) { + // timeout + // we will go away and let the callback free the structure + tmp->free_with_signal = true; + we_should_free = false; + code = rrd_call_function_error(wb, "Timeout while waiting for a response from the collector.", HTTP_RESP_GATEWAY_TIMEOUT); + } + else + code = rrd_call_function_error(wb, "Failed to get the response from the collector.", HTTP_RESP_INTERNAL_SERVER_ERROR); + + netdata_mutex_unlock(&tmp->mutex); + } + else { + buffer_free(temp_wb); + if(!buffer_strlen(wb)) + rrd_call_function_error(wb, "Failed to send request to the collector.", code); + } + + if (we_should_free) + rrd_function_call_wait_free(tmp); + } + + return code; +} + +int rrd_call_function_async(RRDHOST *host, BUFFER *wb, int timeout, const char *name, + rrd_call_function_async_callback callback, void *callback_data) { + int code; + + struct rrd_collector_function *rdcf = NULL; + char key[PLUGINSD_LINE_MAX + 1]; + size_t key_length = sanitize_function_text(key, name, PLUGINSD_LINE_MAX); + code = rrd_call_function_find(host, wb, key, key_length, &rdcf); + if(code != HTTP_RESP_OK) + return code; + + if(timeout <= 0) + timeout = rdcf->timeout; + + code = rdcf->function(wb, timeout, key, rdcf->collector_data, callback, callback_data); + + if(code != HTTP_RESP_OK) { + if (!buffer_strlen(wb)) + rrd_call_function_error(wb, "Failed to send request to the collector.", code); + } + + return code; +} + +static void functions2json(DICTIONARY *functions, BUFFER *wb, const char *ident, const char *kq, const char *sq) { + struct rrd_collector_function *t; + dfe_start_read(functions, t) { + if(!t->collector->running) continue; + + if(t_dfe.counter) + buffer_strcat(wb, ",\n"); + + buffer_sprintf(wb, "%s%s%s%s: {", ident, kq, t_dfe.name, kq); + buffer_sprintf(wb, "\n\t%s%shelp%s: %s%s%s", ident, kq, kq, sq, string2str(t->help), sq); + buffer_sprintf(wb, ",\n\t%s%stimeout%s: %d", ident, kq, kq, t->timeout); + buffer_sprintf(wb, ",\n\t%s%soptions%s: \"%s%s\"", ident, kq, kq + , (t->options & RRD_FUNCTION_LOCAL)?"LOCAL ":"" + , (t->options & RRD_FUNCTION_GLOBAL)?"GLOBAL ":"" + ); + buffer_sprintf(wb, "\n%s}", ident); + } + dfe_done(t); + buffer_strcat(wb, "\n"); +} + +void chart_functions2json(RRDSET *st, BUFFER *wb, int tabs, const char *kq, const char *sq) { + if(!st || !st->functions_view) return; + + char ident[tabs + 1]; + ident[tabs] = '\0'; + while(tabs) ident[--tabs] = '\t'; + + functions2json(st->functions_view, wb, ident, kq, sq); +} + +void host_functions2json(RRDHOST *host, BUFFER *wb, int tabs, const char *kq, const char *sq) { + if(!host || !host->functions) return; + + char ident[tabs + 1]; + ident[tabs] = '\0'; + while(tabs) ident[--tabs] = '\t'; + + functions2json(host->functions, wb, ident, kq, sq); +} + +void chart_functions_to_dict(DICTIONARY *rrdset_functions_view, DICTIONARY *dst) { + if(!rrdset_functions_view || !dst) return; + + struct rrd_collector_function *t; + dfe_start_read(rrdset_functions_view, t) { + if(!t->collector->running) continue; + + dictionary_set(dst, t_dfe.name, NULL, 0); + } + dfe_done(t); +} diff --git a/database/rrdfunctions.h b/database/rrdfunctions.h new file mode 100644 index 0000000..f031ec3 --- /dev/null +++ b/database/rrdfunctions.h @@ -0,0 +1,35 @@ +#ifndef NETDATA_RRDFUNCTIONS_H +#define NETDATA_RRDFUNCTIONS_H 1 + +#include "rrd.h" + +void rrdfunctions_init(RRDHOST *host); +void rrdfunctions_destroy(RRDHOST *host); + +void rrd_collector_started(void); +void rrd_collector_finished(void); + +typedef void (*function_data_ready_callback)(BUFFER *wb, int code, void *callback_data); + +typedef int (*function_execute_at_collector)(BUFFER *wb, int timeout, const char *function, void *collector_data, + function_data_ready_callback callback, void *callback_data); + +void rrd_collector_add_function(RRDHOST *host, RRDSET *st, const char *name, int timeout, const char *help, + bool sync, function_execute_at_collector function, void *collector_data); + +int rrd_call_function_and_wait(RRDHOST *host, BUFFER *wb, int timeout, const char *name); + +typedef void (*rrd_call_function_async_callback)(BUFFER *wb, int code, void *callback_data); +int rrd_call_function_async(RRDHOST *host, BUFFER *wb, int timeout, const char *name, rrd_call_function_async_callback, void *callback_data); + +void rrd_functions_expose_rrdpush(RRDSET *st, BUFFER *wb); + +void chart_functions2json(RRDSET *st, BUFFER *wb, int tabs, const char *kq, const char *sq); +void chart_functions_to_dict(DICTIONARY *rrdset_functions_view, DICTIONARY *dst); +void host_functions2json(RRDHOST *host, BUFFER *wb, int tabs, const char *kq, const char *sq); + +uint8_t functions_format_to_content_type(const char *format); +const char *functions_content_type_to_format(uint8_t content_type); +int rrd_call_function_error(BUFFER *wb, const char *msg, int code); + +#endif // NETDATA_RRDFUNCTIONS_H diff --git a/database/rrdhost.c b/database/rrdhost.c new file mode 100644 index 0000000..5ba13d4 --- /dev/null +++ b/database/rrdhost.c @@ -0,0 +1,1640 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +bool dbengine_enabled = false; // will become true if and when dbengine is initialized +size_t storage_tiers = 3; +size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 }; +RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW }; + +#if RRD_STORAGE_TIERS != 5 +#error RRD_STORAGE_TIERS is not 5 - you need to update the grouping iterations per tier +#endif + +size_t get_tier_grouping(size_t tier) { + if(unlikely(tier >= storage_tiers)) tier = storage_tiers - 1; + + size_t grouping = 1; + // first tier is always 1 iteration of whatever update every the chart has + for(size_t i = 1; i <= tier ;i++) + grouping *= storage_tiers_grouping_iterations[i]; + + return grouping; +} + +RRDHOST *localhost = NULL; +size_t rrd_hosts_available = 0; +netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER; + +time_t rrdset_free_obsolete_time = 3600; +time_t rrdhost_free_orphan_time = 3600; + +bool is_storage_engine_shared(STORAGE_INSTANCE *engine) { +#ifdef ENABLE_DBENGINE + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (engine == (STORAGE_INSTANCE *)multidb_ctx[tier]) + return true; + } +#endif + + return false; +} + + +// ---------------------------------------------------------------------------- +// RRDHOST indexes management + +DICTIONARY *rrdhost_root_index = NULL; +static DICTIONARY *rrdhost_root_index_hostname = NULL; + +static inline void rrdhost_init() { + if(unlikely(!rrdhost_root_index)) { + rrdhost_root_index = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + } + + if(unlikely(!rrdhost_root_index_hostname)) { + rrdhost_root_index_hostname = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + } +} + +// ---------------------------------------------------------------------------- +// RRDHOST index by UUID + +inline long rrdhost_hosts_available(void) { + return dictionary_entries(rrdhost_root_index); +} + +inline RRDHOST *rrdhost_find_by_guid(const char *guid) { + return dictionary_get(rrdhost_root_index, guid); +} + +static inline RRDHOST *rrdhost_index_add_by_guid(RRDHOST *host) { + RRDHOST *ret_machine_guid = dictionary_set(rrdhost_root_index, host->machine_guid, host, sizeof(RRDHOST)); + if(ret_machine_guid == host) + rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID); + else { + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID); + error("RRDHOST: %s() host with machine guid '%s' is already indexed", __FUNCTION__, host->machine_guid); + } + + return host; +} + +static void rrdhost_index_del_by_guid(RRDHOST *host) { + if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID)) { + if(!dictionary_del(rrdhost_root_index, host->machine_guid)) + error("RRDHOST: %s() failed to delete machine guid '%s' from index", __FUNCTION__, host->machine_guid); + + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID); + } +} + +// ---------------------------------------------------------------------------- +// RRDHOST index by hostname + +inline RRDHOST *rrdhost_find_by_hostname(const char *hostname) { + if(unlikely(!strcmp(hostname, "localhost"))) + return localhost; + + return dictionary_get(rrdhost_root_index_hostname, hostname); +} + +static inline RRDHOST *rrdhost_index_add_hostname(RRDHOST *host) { + if(!host->hostname) return host; + + RRDHOST *ret_hostname = dictionary_set(rrdhost_root_index_hostname, rrdhost_hostname(host), host, sizeof(RRDHOST)); + if(ret_hostname == host) + rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_HOSTNAME); + else { + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME); + error("RRDHOST: %s() host with hostname '%s' is already indexed", __FUNCTION__, rrdhost_hostname(host)); + } + + return host; +} + +static inline void rrdhost_index_del_hostname(RRDHOST *host) { + if(unlikely(!host->hostname)) return; + + if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_HOSTNAME)) { + if(!dictionary_del(rrdhost_root_index_hostname, rrdhost_hostname(host))) + error("RRDHOST: %s() failed to delete hostname '%s' from index", __FUNCTION__, rrdhost_hostname(host)); + + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME); + } +} + +// ---------------------------------------------------------------------------- +// RRDHOST - internal helpers + +static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) { + if(host->tags && tags && !strcmp(rrdhost_tags(host), tags)) + return; + + STRING *old = host->tags; + host->tags = string_strdupz((tags && *tags)?tags:NULL); + string_freez(old); +} + +static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) { + if(unlikely(hostname && !*hostname)) hostname = NULL; + + if(host->hostname && hostname && !strcmp(rrdhost_hostname(host), hostname)) + return; + + rrdhost_index_del_hostname(host); + + STRING *old = host->hostname; + host->hostname = string_strdupz(hostname?hostname:"localhost"); + string_freez(old); + + rrdhost_index_add_hostname(host); +} + +static inline void rrdhost_init_os(RRDHOST *host, const char *os) { + if(host->os && os && !strcmp(rrdhost_os(host), os)) + return; + + STRING *old = host->os; + host->os = string_strdupz(os?os:"unknown"); + string_freez(old); +} + +static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, const char *abbrev_timezone, int32_t utc_offset) { + if (host->timezone && timezone && !strcmp(rrdhost_timezone(host), timezone) && host->abbrev_timezone && abbrev_timezone && + !strcmp(rrdhost_abbrev_timezone(host), abbrev_timezone) && host->utc_offset == utc_offset) + return; + + STRING *old = host->timezone; + host->timezone = string_strdupz((timezone && *timezone)?timezone:"unknown"); + string_freez(old); + + old = (void *)host->abbrev_timezone; + host->abbrev_timezone = string_strdupz((abbrev_timezone && *abbrev_timezone) ? abbrev_timezone : "UTC"); + string_freez(old); + + host->utc_offset = utc_offset; +} + +void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, + const char *registry_hostname, const char *os, const char *tags, + const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name, + const char *program_version) +{ + + host->rrd_update_every = update_every; + host->rrd_memory_mode = memory_mode; + + rrdhost_init_os(host, os); + rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset); + rrdhost_init_tags(host, tags); + + host->program_name = string_strdupz((program_name && *program_name) ? program_name : "unknown"); + host->program_version = string_strdupz((program_version && *program_version) ? program_version : "unknown"); + host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname) ? registry_hostname : rrdhost_hostname(host)); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - add a host + +static void rrdhost_initialize_rrdpush_sender(RRDHOST *host, + unsigned int rrdpush_enabled, + char *rrdpush_destination, + char *rrdpush_api_key, + char *rrdpush_send_charts_matching +) { + if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED)) return; + + if(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) { + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED); + + sender_init(host); + +#ifdef ENABLE_HTTPS + host->sender->ssl.conn = NULL; + host->sender->ssl.flags = NETDATA_SSL_START; +#endif + + host->rrdpush_send_destination = strdupz(rrdpush_destination); + rrdpush_destinations_init(host); + + host->rrdpush_send_api_key = strdupz(rrdpush_api_key); + host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT); + + rrdhost_option_set(host, RRDHOST_OPTION_SENDER_ENABLED); + } + else + rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED); +} + +RRDHOST *rrdhost_create(const char *hostname, + const char *registry_hostname, + const char *guid, + const char *os, + const char *timezone, + const char *abbrev_timezone, + int32_t utc_offset, + const char *tags, + const char *program_name, + const char *program_version, + int update_every, + long entries, + RRD_MEMORY_MODE memory_mode, + unsigned int health_enabled, + unsigned int rrdpush_enabled, + char *rrdpush_destination, + char *rrdpush_api_key, + char *rrdpush_send_charts_matching, + bool rrdpush_enable_replication, + time_t rrdpush_seconds_to_replicate, + time_t rrdpush_replication_step, + struct rrdhost_system_info *system_info, + int is_localhost, + bool archived +) { + debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid); + + rrd_check_wrlock(); + + if(memory_mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled) { + error("memory mode 'dbengine' is not enabled, but host '%s' is configured for it. Falling back to 'alloc'", hostname); + memory_mode = RRD_MEMORY_MODE_ALLOC; + } + +#ifdef ENABLE_DBENGINE + int is_legacy = (memory_mode == RRD_MEMORY_MODE_DBENGINE) && is_legacy_child(guid); +#else +int is_legacy = 1; +#endif + + int is_in_multihost = (memory_mode == RRD_MEMORY_MODE_DBENGINE && !is_legacy); + RRDHOST *host = callocz(1, sizeof(RRDHOST)); + + strncpyz(host->machine_guid, guid, GUID_LEN + 1); + + set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, registry_hostname, os, + tags, timezone, abbrev_timezone, utc_offset, program_name, program_version); + + rrdhost_init_hostname(host, hostname); + + host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries); + host->health_enabled = ((memory_mode == RRD_MEMORY_MODE_NONE)) ? 0 : health_enabled; + + if (likely(!archived)) { + rrdfunctions_init(host); + host->rrdlabels = rrdlabels_create(); + rrdhost_initialize_rrdpush_sender( + host, rrdpush_enabled, rrdpush_destination, rrdpush_api_key, rrdpush_send_charts_matching); + } + + if(rrdpush_enable_replication) + rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION); + else + rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION); + + host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate; + host->rrdpush_replication_step = rrdpush_replication_step; + + switch(memory_mode) { + default: + case RRD_MEMORY_MODE_ALLOC: + case RRD_MEMORY_MODE_MAP: + case RRD_MEMORY_MODE_SAVE: + case RRD_MEMORY_MODE_RAM: + if(host->rrdpush_seconds_to_replicate > host->rrd_history_entries * host->rrd_update_every) + host->rrdpush_seconds_to_replicate = host->rrd_history_entries * host->rrd_update_every; + break; + + case RRD_MEMORY_MODE_DBENGINE: + break; + } + + netdata_rwlock_init(&host->rrdhost_rwlock); + netdata_mutex_init(&host->aclk_state_lock); + netdata_mutex_init(&host->receiver_lock); + + host->system_info = system_info; + + rrdset_index_init(host); + + if(config_get_boolean(CONFIG_SECTION_DB, "delete obsolete charts files", 1)) + rrdhost_option_set(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS); + + if(config_get_boolean(CONFIG_SECTION_DB, "delete orphan hosts files", 1) && !is_localhost) + rrdhost_option_set(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST); + + char filename[FILENAME_MAX + 1]; + if(is_localhost) { + host->cache_dir = strdupz(netdata_configured_cache_dir); + host->varlib_dir = strdupz(netdata_configured_varlib_dir); + } + else { + // this is not localhost - append our GUID to localhost path + if (is_in_multihost) { // don't append to cache dir in multihost + host->cache_dir = strdupz(netdata_configured_cache_dir); + } + else { + snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid); + host->cache_dir = strdupz(filename); + } + + if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || + (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) { + int r = mkdir(host->cache_dir, 0775); + if(r != 0 && errno != EEXIST) + error("Host '%s': cannot create directory '%s'", rrdhost_hostname(host), host->cache_dir); + } + + snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid); + host->varlib_dir = strdupz(filename); + } + + // this is also needed for custom host variables - not only health + if(!host->rrdvars) + host->rrdvars = rrdvariables_create(); + + RRDHOST *t = rrdhost_index_add_by_guid(host); + if(t != host) { + error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", rrdhost_hostname(host), host->machine_guid, rrdhost_hostname(t), t->machine_guid); + rrdhost_free(host, 1); + return NULL; + } + + if (likely(!uuid_parse(host->machine_guid, host->host_uuid))) { + if(!archived) + metaqueue_host_update_info(host->machine_guid); + sql_load_node_id(host); + } + else + error_report("Host machine GUID %s is not valid", host->machine_guid); + + rrdfamily_index_init(host); + rrdcalctemplate_index_init(host); + rrdcalc_rrdhost_index_init(host); + + if (health_enabled) + health_thread_spawn(host); + + if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { +#ifdef ENABLE_DBENGINE + char dbenginepath[FILENAME_MAX + 1]; + int ret; + + snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir); + ret = mkdir(dbenginepath, 0775); + if (ret != 0 && errno != EEXIST) + error("Host '%s': cannot create directory '%s'", rrdhost_hostname(host), dbenginepath); + else ret = 0; // succeed + if (is_legacy) { + // initialize legacy dbengine instance as needed + + host->db[0].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[0].eng = storage_engine_get(host->db[0].mode); + host->db[0].tier_grouping = get_tier_grouping(0); + + ret = rrdeng_init( + host, + (struct rrdengine_instance **)&host->db[0].instance, + dbenginepath, + default_rrdeng_page_cache_mb, + default_rrdeng_disk_quota_mb, + 0); // may fail here for legacy dbengine initialization + + if(ret == 0) { + // assign the rest of the shared storage instances to it + // to allow them collect its metrics too + for(size_t tier = 1; tier < storage_tiers ; tier++) { + host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[tier].eng = storage_engine_get(host->db[tier].mode); + host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier]; + host->db[tier].tier_grouping = get_tier_grouping(tier); + } + } + } + else { + for(size_t tier = 0; tier < storage_tiers ; tier++) { + host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[tier].eng = storage_engine_get(host->db[tier].mode); + host->db[tier].instance = (STORAGE_INSTANCE *)multidb_ctx[tier]; + host->db[tier].tier_grouping = get_tier_grouping(tier); + } + } + if (ret) { // check legacy or multihost initialization success + error( + "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.", + rrdhost_hostname(host), host->machine_guid, host->cache_dir); + rrdhost_free(host, 1); + host = NULL; + //rrd_hosts_available++; //TODO: maybe we want this? + + return host; + } + +#else + fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform."); +#endif + } + else { + host->db[0].mode = host->rrd_memory_mode; + host->db[0].eng = storage_engine_get(host->db[0].mode); + host->db[0].instance = NULL; + host->db[0].tier_grouping = get_tier_grouping(0); + +#ifdef ENABLE_DBENGINE + // the first tier is reserved for the non-dbengine modes + for(size_t tier = 1; tier < storage_tiers ; tier++) { + host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[tier].eng = storage_engine_get(host->db[tier].mode); + host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier]; + host->db[tier].tier_grouping = get_tier_grouping(tier); + } +#endif + } + + // ------------------------------------------------------------------------ + // link it and add it to the index + + if(is_localhost) + DOUBLE_LINKED_LIST_PREPEND_UNSAFE(localhost, host, prev, next); + else + DOUBLE_LINKED_LIST_APPEND_UNSAFE(localhost, host, prev, next); + + // ------------------------------------------------------------------------ + // init new ML host and update system_info to let upstreams know + // about ML functionality + // + + if (is_localhost && host->system_info) { + host->system_info->ml_capable = ml_capable(); + host->system_info->ml_enabled = ml_enabled(host); + host->system_info->mc_version = enable_metric_correlations ? metric_correlations_version : 0; + } + + info("Host '%s' (at registry as '%s') with guid '%s' initialized" + ", os '%s'" + ", timezone '%s'" + ", tags '%s'" + ", program_name '%s'" + ", program_version '%s'" + ", update every %d" + ", memory mode %s" + ", history entries %ld" + ", streaming %s" + " (to '%s' with api key '%s')" + ", health %s" + ", cache_dir '%s'" + ", varlib_dir '%s'" + ", health_log '%s'" + ", alarms default handler '%s'" + ", alarms default recipient '%s'" + , rrdhost_hostname(host) + , rrdhost_registry_hostname(host) + , host->machine_guid + , rrdhost_os(host) + , rrdhost_timezone(host) + , rrdhost_tags(host) + , rrdhost_program_name(host) + , rrdhost_program_version(host) + , host->rrd_update_every + , rrd_memory_mode_name(host->rrd_memory_mode) + , host->rrd_history_entries + , rrdhost_has_rrdpush_sender_enabled(host)?"enabled":"disabled" + , host->rrdpush_send_destination?host->rrdpush_send_destination:"" + , host->rrdpush_send_api_key?host->rrdpush_send_api_key:"" + , host->health_enabled?"enabled":"disabled" + , host->cache_dir + , host->varlib_dir + , host->health_log_filename + , string2str(host->health_default_exec) + , string2str(host->health_default_recipient) + ); + if(!archived) + metaqueue_host_update_system_info(host); + + rrd_hosts_available++; + + rrdhost_load_rrdcontext_data(host); + if (!archived) + ml_new_host(host); + else + rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED); + + + return host; +} + +void rrdhost_update(RRDHOST *host + , const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *abbrev_timezone + , int32_t utc_offset + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step + , struct rrdhost_system_info *system_info +) +{ + UNUSED(guid); + + host->health_enabled = (mode == RRD_MEMORY_MODE_NONE) ? 0 : health_enabled; + + rrdhost_system_info_free(host->system_info); + host->system_info = system_info; + metaqueue_host_update_system_info(host); + + rrdhost_init_os(host, os); + rrdhost_init_timezone(host, timezone, abbrev_timezone, utc_offset); + + string_freez(host->registry_hostname); + host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname); + + if(strcmp(rrdhost_hostname(host), hostname) != 0) { + info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", rrdhost_hostname(host), hostname); + rrdhost_init_hostname(host, hostname); + } + + if(strcmp(rrdhost_program_name(host), program_name) != 0) { + info("Host '%s' switched program name from '%s' to '%s'", rrdhost_hostname(host), rrdhost_program_name(host), program_name); + STRING *t = host->program_name; + host->program_name = string_strdupz(program_name); + string_freez(t); + } + + if(strcmp(rrdhost_program_version(host), program_version) != 0) { + info("Host '%s' switched program version from '%s' to '%s'", rrdhost_hostname(host), rrdhost_program_version(host), program_version); + STRING *t = host->program_version; + host->program_version = string_strdupz(program_version); + string_freez(t); + } + + if(host->rrd_update_every != update_every) + error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", rrdhost_hostname(host), host->rrd_update_every, update_every); + + if(host->rrd_memory_mode != mode) + error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", rrdhost_hostname(host), rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); + + else if(host->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && host->rrd_history_entries < history) + error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", rrdhost_hostname(host), host->rrd_history_entries, history); + + // update host tags + rrdhost_init_tags(host, tags); + + if(!host->rrdvars) + host->rrdvars = rrdvariables_create(); + + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { + rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED); + + rrdfunctions_init(host); + + if(!host->rrdlabels) + host->rrdlabels = rrdlabels_create(); + + if (!host->rrdset_root_index) + rrdset_index_init(host); + + rrdhost_initialize_rrdpush_sender(host, + rrdpush_enabled, + rrdpush_destination, + rrdpush_api_key, + rrdpush_send_charts_matching); + + rrdfamily_index_init(host); + rrdcalctemplate_index_init(host); + rrdcalc_rrdhost_index_init(host); + + if(rrdpush_enable_replication) + rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION); + else + rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION); + + host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate; + host->rrdpush_replication_step = rrdpush_replication_step; + + rrd_hosts_available++; + ml_new_host(host); + rrdhost_load_rrdcontext_data(host); + info("Host %s is not in archived mode anymore", rrdhost_hostname(host)); + } + + if (health_enabled) + health_thread_spawn(host); +} + +RRDHOST *rrdhost_find_or_create( + const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *abbrev_timezone + , int32_t utc_offset + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step + , struct rrdhost_system_info *system_info + , bool archived +) { + debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid); + + rrd_wrlock(); + RRDHOST *host = rrdhost_find_by_guid(guid); + if (unlikely(host && host->rrd_memory_mode != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) { + /* If a legacy memory mode instantiates all dbengine state must be discarded to avoid inconsistencies */ + error("Archived host '%s' has memory mode '%s', but the wanted one is '%s'. Discarding archived state.", + rrdhost_hostname(host), rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); + rrdhost_free(host, 1); + host = NULL; + } + if(!host) { + host = rrdhost_create( + hostname + , registry_hostname + , guid + , os + , timezone + , abbrev_timezone + , utc_offset + , tags + , program_name + , program_version + , update_every + , history + , mode + , health_enabled + , rrdpush_enabled + , rrdpush_destination + , rrdpush_api_key + , rrdpush_send_charts_matching + , rrdpush_enable_replication + , rrdpush_seconds_to_replicate + , rrdpush_replication_step + , system_info + , 0 + , archived + ); + } + else { + rrdhost_update(host + , hostname + , registry_hostname + , guid + , os + , timezone + , abbrev_timezone + , utc_offset + , tags + , program_name + , program_version + , update_every + , history + , mode + , health_enabled + , rrdpush_enabled + , rrdpush_destination + , rrdpush_api_key + , rrdpush_send_charts_matching + , rrdpush_enable_replication + , rrdpush_seconds_to_replicate + , rrdpush_replication_step + , system_info); + } + if (host) { + rrdhost_wrlock(host); + rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN); + host->senders_disconnected_time = 0; + rrdhost_unlock(host); + } + + rrd_unlock(); + + return host; +} +inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now) { + if(host != protected_host + && host != localhost + && rrdhost_receiver_replicating_charts(host) == 0 + && rrdhost_sender_replicating_charts(host) == 0 + && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN) + && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED) + && !host->receiver + && host->senders_disconnected_time + && host->senders_disconnected_time + rrdhost_free_orphan_time < now) + return 1; + + return 0; +} + +// ---------------------------------------------------------------------------- +// RRDHOST global / startup initialization + +void dbengine_init(char *hostname) { +#ifdef ENABLE_DBENGINE + storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); + if(storage_tiers < 1) { + error("At least 1 storage tier is required. Assuming 1."); + storage_tiers = 1; + config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); + } + if(storage_tiers > RRD_STORAGE_TIERS) { + error("Up to %d storage tier are supported. Assuming %d.", RRD_STORAGE_TIERS, RRD_STORAGE_TIERS); + storage_tiers = RRD_STORAGE_TIERS; + config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); + } + + default_rrdeng_page_fetch_timeout = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", PAGE_CACHE_FETCH_WAIT_TIMEOUT); + if (default_rrdeng_page_fetch_timeout < 1) { + info("'dbengine page fetch timeout secs' cannot be %d, using 1", default_rrdeng_page_fetch_timeout); + default_rrdeng_page_fetch_timeout = 1; + config_set_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", default_rrdeng_page_fetch_timeout); + } + + default_rrdeng_page_fetch_retries = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch retries", MAX_PAGE_CACHE_FETCH_RETRIES); + if (default_rrdeng_page_fetch_retries < 1) { + info("\"dbengine page fetch retries\" found in netdata.conf cannot be %d, using 1", default_rrdeng_page_fetch_retries); + default_rrdeng_page_fetch_retries = 1; + config_set_number(CONFIG_SECTION_DB, "dbengine page fetch retries", default_rrdeng_page_fetch_retries); + } + + if(config_get_boolean(CONFIG_SECTION_DB, "dbengine page descriptors in file mapped memory", rrdeng_page_descr_is_mmap()) == CONFIG_BOOLEAN_YES) + rrdeng_page_descr_use_mmap(); + else + rrdeng_page_descr_use_malloc(); + + size_t created_tiers = 0; + char dbenginepath[FILENAME_MAX + 1]; + char dbengineconfig[200 + 1]; + int divisor = 1; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(tier == 0) + snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", netdata_configured_cache_dir); + else + snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%zu", netdata_configured_cache_dir, tier); + + int ret = mkdir(dbenginepath, 0775); + if (ret != 0 && errno != EEXIST) { + error("DBENGINE on '%s': cannot create directory '%s'", hostname, dbenginepath); + break; + } + + if(tier > 0) + divisor *= 2; + + int page_cache_mb = default_rrdeng_page_cache_mb / divisor; + int disk_space_mb = default_multidb_disk_quota_mb / divisor; + size_t grouping_iterations = storage_tiers_grouping_iterations[tier]; + RRD_BACKFILL backfill = storage_tiers_backfill[tier]; + + if(tier > 0) { + snprintfz(dbengineconfig, 200, "dbengine tier %zu page cache size MB", tier); + page_cache_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, page_cache_mb); + + snprintfz(dbengineconfig, 200, "dbengine tier %zu multihost disk space MB", tier); + disk_space_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, disk_space_mb); + + snprintfz(dbengineconfig, 200, "dbengine tier %zu update every iterations", tier); + grouping_iterations = config_get_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations); + if(grouping_iterations < 2) { + grouping_iterations = 2; + config_set_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations); + error("DBENGINE on '%s': 'dbegnine tier %zu update every iterations' cannot be less than 2. Assuming 2.", hostname, tier); + } + + snprintfz(dbengineconfig, 200, "dbengine tier %zu backfill", tier); + const char *bf = config_get(CONFIG_SECTION_DB, dbengineconfig, backfill == RRD_BACKFILL_NEW ? "new" : backfill == RRD_BACKFILL_FULL ? "full" : "none"); + if(strcmp(bf, "new") == 0) backfill = RRD_BACKFILL_NEW; + else if(strcmp(bf, "full") == 0) backfill = RRD_BACKFILL_FULL; + else if(strcmp(bf, "none") == 0) backfill = RRD_BACKFILL_NONE; + else { + error("DBENGINE: unknown backfill value '%s', assuming 'new'", bf); + config_set(CONFIG_SECTION_DB, dbengineconfig, "new"); + backfill = RRD_BACKFILL_NEW; + } + } + + storage_tiers_grouping_iterations[tier] = grouping_iterations; + storage_tiers_backfill[tier] = backfill; + + if(tier > 0 && get_tier_grouping(tier) > 65535) { + storage_tiers_grouping_iterations[tier] = 1; + error("DBENGINE on '%s': dbengine tier %zu gives aggregation of more than 65535 points of tier 0. Disabling tiers above %zu", hostname, tier, tier); + break; + } + + internal_error(true, "DBENGINE tier %zu grouping iterations is set to %zu", tier, storage_tiers_grouping_iterations[tier]); + ret = rrdeng_init(NULL, NULL, dbenginepath, page_cache_mb, disk_space_mb, tier); + if(ret != 0) { + error("DBENGINE on '%s': Failed to initialize multi-host database tier %zu on path '%s'", + hostname, tier, dbenginepath); + break; + } + else + created_tiers++; + } + + if(created_tiers && created_tiers < storage_tiers) { + error("DBENGINE on '%s': Managed to create %zu tiers instead of %zu. Continuing with %zu available.", + hostname, created_tiers, storage_tiers, created_tiers); + storage_tiers = created_tiers; + } + else if(!created_tiers) + fatal("DBENGINE on '%s', failed to initialize databases at '%s'.", hostname, netdata_configured_cache_dir); + + dbengine_enabled = true; +#else + storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", 1); + if(storage_tiers != 1) { + error("DBENGINE is not available on '%s', so only 1 database tier can be supported.", hostname); + storage_tiers = 1; + config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); + } + dbengine_enabled = false; +#endif +} + +int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { + rrdhost_init(); + + if (unlikely(sql_init_database(DB_CHECK_NONE, system_info ? 0 : 1))) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + fatal("Failed to initialize SQLite"); + info("Skipping SQLITE metadata initialization since memory mode is not dbengine"); + } + + if (unlikely(sql_init_context_database(system_info ? 0 : 1))) { + error_report("Failed to initialize context metadata database"); + } + + if (unlikely(strcmp(hostname, "unittest") == 0)) { + dbengine_enabled = true; + } + else { + health_init(); + rrdpush_init(); + + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) { + info("Initializing dbengine..."); + dbengine_init(hostname); + } + else { + info("Not initializing dbengine..."); + storage_tiers = 1; + } + + if (!dbengine_enabled) { + if (storage_tiers > 1) { + error("dbengine is not enabled, but %zu tiers have been requested. Resetting tiers to 1", + storage_tiers); + storage_tiers = 1; + } + + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + error("dbengine is not enabled, but it has been given as the default db mode. Resetting db mode to alloc"); + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + } + } + } + + metadata_sync_init(); + debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname); + rrd_wrlock(); + localhost = rrdhost_create( + hostname + , registry_get_this_machine_hostname() + , registry_get_this_machine_guid() + , os_type + , netdata_configured_timezone + , netdata_configured_abbrev_timezone + , netdata_configured_utc_offset + , "" + , program_name + , program_version + , default_rrd_update_every + , default_rrd_history_entries + , default_rrd_memory_mode + , default_health_enabled + , default_rrdpush_enabled + , default_rrdpush_destination + , default_rrdpush_api_key + , default_rrdpush_send_charts_matching + , default_rrdpush_enable_replication + , default_rrdpush_seconds_to_replicate + , default_rrdpush_replication_step + , system_info + , 1 + , 0 + ); + if (unlikely(!localhost)) { + rrd_unlock(); + return 1; + } + + rrd_unlock(); + + if (likely(system_info)) { + migrate_localhost(&localhost->host_uuid); + sql_aclk_sync_init(); + web_client_api_v1_management_init(); + } + return localhost==NULL; +} + +// ---------------------------------------------------------------------------- +// RRDHOST - lock validations +// there are only used when NETDATA_INTERNAL_CHECKS is set + +void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking read lock on host '%s'", rrdhost_hostname(host)); + + int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock); + if(ret == 0) + fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", rrdhost_hostname(host), function, line, file); +} + +void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking write lock on host '%s'", rrdhost_hostname(host)); + + int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock); + if(ret == 0) + fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", rrdhost_hostname(host), function, line, file); +} + +void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking read lock on all RRDs"); + + int ret = netdata_rwlock_trywrlock(&rrd_rwlock); + if(ret == 0) + fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file); +} + +void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking write lock on all RRDs"); + + int ret = netdata_rwlock_tryrdlock(&rrd_rwlock); + if(ret == 0) + fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - free + +void rrdhost_system_info_free(struct rrdhost_system_info *system_info) { + if(likely(system_info)) { + freez(system_info->cloud_provider_type); + freez(system_info->cloud_instance_type); + freez(system_info->cloud_instance_region); + freez(system_info->host_os_name); + freez(system_info->host_os_id); + freez(system_info->host_os_id_like); + freez(system_info->host_os_version); + freez(system_info->host_os_version_id); + freez(system_info->host_os_detection); + freez(system_info->host_cores); + freez(system_info->host_cpu_freq); + freez(system_info->host_ram_total); + freez(system_info->host_disk_space); + freez(system_info->container_os_name); + freez(system_info->container_os_id); + freez(system_info->container_os_id_like); + freez(system_info->container_os_version); + freez(system_info->container_os_version_id); + freez(system_info->container_os_detection); + freez(system_info->kernel_name); + freez(system_info->kernel_version); + freez(system_info->architecture); + freez(system_info->virtualization); + freez(system_info->virt_detection); + freez(system_info->container); + freez(system_info->container_detection); + freez(system_info->is_k8s_node); + freez(system_info->install_type); + freez(system_info->prebuilt_arch); + freez(system_info->prebuilt_dist); + freez(system_info); + } +} + +void destroy_receiver_state(struct receiver_state *rpt); + +void stop_streaming_sender(RRDHOST *host) +{ + rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED); + + if (unlikely(!host->sender)) + return; + + rrdpush_sender_thread_stop(host); // stop a possibly running thread + cbuffer_free(host->sender->buffer); +#ifdef ENABLE_COMPRESSION + if (host->sender->compressor) + host->sender->compressor->destroy(&host->sender->compressor); +#endif + replication_cleanup_sender(host->sender); + freez(host->sender); + host->sender = NULL; + rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED); +} + +void stop_streaming_receiver(RRDHOST *host) +{ + netdata_mutex_lock(&host->receiver_lock); + if (host->receiver) { + if (!host->receiver->exited) + netdata_thread_cancel(host->receiver->thread); + netdata_mutex_unlock(&host->receiver_lock); + struct receiver_state *rpt = host->receiver; + while (host->receiver && !rpt->exited) + sleep_usec(50 * USEC_PER_MS); + // If the receiver detached from the host then its thread will destroy the state + if (host->receiver == rpt) + destroy_receiver_state(host->receiver); + } else + netdata_mutex_unlock(&host->receiver_lock); +} + +void rrdhost_free(RRDHOST *host, bool force) { + if(!host) return; + + if (netdata_exit || force) + info("Freeing all memory for host '%s'...", rrdhost_hostname(host)); + + rrd_check_wrlock(); // make sure the RRDs are write locked + + rrdhost_wrlock(host); + ml_delete_host(host); + rrdhost_unlock(host); + + // ------------------------------------------------------------------------ + // clean up streaming + + stop_streaming_sender(host); + + if (netdata_exit || force) + stop_streaming_receiver(host); + + + // ------------------------------------------------------------------------ + // clean up alarms + + rrdcalc_delete_all(host); + + + rrdhost_wrlock(host); // lock this RRDHOST + + // ------------------------------------------------------------------------ + // release its children resources + +#ifdef ENABLE_DBENGINE + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE + && host->db[tier].instance + && !is_storage_engine_shared(host->db[tier].instance)) + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[tier].instance); + } +#endif + + // delete all the RRDSETs of the host + rrdset_index_destroy(host); + rrdcalc_rrdhost_index_destroy(host); + rrdcalctemplate_index_destroy(host); + + freez(host->exporting_flags); + + health_alarm_log_free(host); + +#ifdef ENABLE_DBENGINE + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE + && host->db[tier].instance + && !is_storage_engine_shared(host->db[tier].instance)) + rrdeng_exit((struct rrdengine_instance *)host->db[tier].instance); + } +#endif + + if (!netdata_exit && !force) { + info("Setting archive mode for host '%s'...", rrdhost_hostname(host)); + rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED); + rrdhost_unlock(host); + return; + } + +#ifdef ENABLE_ACLK + struct aclk_database_worker_config *wc = host->dbsync_worker; + if (wc && !netdata_exit) { + struct aclk_database_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = ACLK_DATABASE_ORPHAN_HOST; + struct aclk_completion compl ; + init_aclk_completion(&compl ); + cmd.completion = &compl ; + aclk_database_enq_cmd(wc, &cmd); + wait_for_aclk_completion(&compl ); + destroy_aclk_completion(&compl ); + } +#endif + + // ------------------------------------------------------------------------ + // remove it from the indexes + + rrdhost_index_del_hostname(host); + rrdhost_index_del_by_guid(host); + + // ------------------------------------------------------------------------ + // unlink it from the host + + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(localhost, host, prev, next); + + // ------------------------------------------------------------------------ + // free it + + pthread_mutex_destroy(&host->aclk_state_lock); + freez(host->aclk_state.claimed_id); + freez(host->aclk_state.prev_claimed_id); + string_freez(host->tags); + rrdlabels_destroy(host->rrdlabels); + string_freez(host->os); + string_freez(host->timezone); + string_freez(host->abbrev_timezone); + string_freez(host->program_name); + string_freez(host->program_version); + rrdhost_system_info_free(host->system_info); + freez(host->cache_dir); + freez(host->varlib_dir); + freez(host->rrdpush_send_api_key); + freez(host->rrdpush_send_destination); + rrdpush_destinations_free(host); + string_freez(host->health_default_exec); + string_freez(host->health_default_recipient); + freez(host->health_log_filename); + string_freez(host->registry_hostname); + simple_pattern_free(host->rrdpush_send_charts_matching); + rrdhost_unlock(host); + netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock); + netdata_rwlock_destroy(&host->rrdhost_rwlock); + freez(host->node_id); + + rrdfamily_index_destroy(host); + rrdfunctions_destroy(host); + rrdvariables_destroy(host->rrdvars); + + rrdhost_destroy_rrdcontexts(host); + + string_freez(host->hostname); + freez(host); +#ifdef ENABLE_ACLK + if (wc) + wc->is_orphan = 0; +#endif + rrd_hosts_available--; +} + +void rrdhost_free_all(void) { + rrd_wrlock(); + + /* Make sure child-hosts are released before the localhost. */ + while(localhost && localhost->next) + rrdhost_free(localhost->next, 1); + + if(localhost) + rrdhost_free(localhost, 1); + + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - save host files + +void rrdhost_save_charts(RRDHOST *host) { + if(!host) return; + + info("Saving/Closing database of host '%s'...", rrdhost_hostname(host)); + + RRDSET *st; + + // we get a write lock + // to ensure only one thread is saving the database + rrdset_foreach_write(st, host) { + rrdset_save(st); + } + rrdset_foreach_done(st); +} + +static void rrdhost_load_auto_labels(void) { + DICTIONARY *labels = localhost->rrdlabels; + + if (localhost->system_info->cloud_provider_type) + rrdlabels_add(labels, "_cloud_provider_type", localhost->system_info->cloud_provider_type, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->cloud_instance_type) + rrdlabels_add(labels, "_cloud_instance_type", localhost->system_info->cloud_instance_type, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->cloud_instance_region) + rrdlabels_add( + labels, "_cloud_instance_region", localhost->system_info->cloud_instance_region, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->host_os_name) + rrdlabels_add(labels, "_os_name", localhost->system_info->host_os_name, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->host_os_version) + rrdlabels_add(labels, "_os_version", localhost->system_info->host_os_version, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->kernel_version) + rrdlabels_add(labels, "_kernel_version", localhost->system_info->kernel_version, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->host_cores) + rrdlabels_add(labels, "_system_cores", localhost->system_info->host_cores, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->host_cpu_freq) + rrdlabels_add(labels, "_system_cpu_freq", localhost->system_info->host_cpu_freq, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->host_ram_total) + rrdlabels_add(labels, "_system_ram_total", localhost->system_info->host_ram_total, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->host_disk_space) + rrdlabels_add(labels, "_system_disk_space", localhost->system_info->host_disk_space, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->architecture) + rrdlabels_add(labels, "_architecture", localhost->system_info->architecture, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->virtualization) + rrdlabels_add(labels, "_virtualization", localhost->system_info->virtualization, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->container) + rrdlabels_add(labels, "_container", localhost->system_info->container, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->container_detection) + rrdlabels_add(labels, "_container_detection", localhost->system_info->container_detection, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->virt_detection) + rrdlabels_add(labels, "_virt_detection", localhost->system_info->virt_detection, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->is_k8s_node) + rrdlabels_add(labels, "_is_k8s_node", localhost->system_info->is_k8s_node, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->install_type) + rrdlabels_add(labels, "_install_type", localhost->system_info->install_type, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->prebuilt_arch) + rrdlabels_add(labels, "_prebuilt_arch", localhost->system_info->prebuilt_arch, RRDLABEL_SRC_AUTO); + + if (localhost->system_info->prebuilt_dist) + rrdlabels_add(labels, "_prebuilt_dist", localhost->system_info->prebuilt_dist, RRDLABEL_SRC_AUTO); + + add_aclk_host_labels(); + + health_add_host_labels(); + + rrdlabels_add( + labels, "_is_parent", (localhost->senders_count > 0) ? "true" : "false", RRDLABEL_SRC_AUTO); + + if (localhost->rrdpush_send_destination) + rrdlabels_add(labels, "_streams_to", localhost->rrdpush_send_destination, RRDLABEL_SRC_AUTO); +} + +void rrdhost_set_is_parent_label(int count) { + DICTIONARY *labels = localhost->rrdlabels; + + if (count == 0 || count == 1) { + rrdlabels_add( + labels, "_is_parent", (count) ? "true" : "false", RRDLABEL_SRC_AUTO); + + //queue a node info +#ifdef ENABLE_ACLK + if (netdata_cloud_setting) { + aclk_queue_node_info(localhost); + } +#endif + } +} + +static void rrdhost_load_config_labels(void) { + int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL); + if(!status) { + char *filename = CONFIG_DIR "/" CONFIG_FILENAME; + error("RRDLABEL: Cannot reload the configuration file '%s', using labels in memory", filename); + } + + struct section *co = appconfig_get_section(&netdata_config, CONFIG_SECTION_HOST_LABEL); + if(co) { + config_section_wrlock(co); + struct config_option *cv; + for(cv = co->values; cv ; cv = cv->next) { + rrdlabels_add(localhost->rrdlabels, cv->name, cv->value, RRDLABEL_SRC_CONFIG); + cv->flags |= CONFIG_VALUE_USED; + } + config_section_unlock(co); + } +} + +static void rrdhost_load_kubernetes_labels(void) { + char label_script[sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("get-kubernetes-labels.sh") + 2)]; + sprintf(label_script, "%s/%s", netdata_configured_primary_plugins_dir, "get-kubernetes-labels.sh"); + + if (unlikely(access(label_script, R_OK) != 0)) { + error("Kubernetes pod label fetching script %s not found.",label_script); + return; + } + + debug(D_RRDHOST, "Attempting to fetch external labels via %s", label_script); + + pid_t pid; + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(label_script, &pid, &fp_child_input); + if(!fp_child_output) return; + + char buffer[1000 + 1]; + while (fgets(buffer, 1000, fp_child_output) != NULL) + rrdlabels_add_pair(localhost->rrdlabels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S); + + // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':' + // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null + int rc = netdata_pclose(fp_child_input, fp_child_output, pid); + if(rc) error("%s exited abnormally. Failed to get kubernetes labels.", label_script); +} + +void reload_host_labels(void) { + if(!localhost->rrdlabels) + localhost->rrdlabels = rrdlabels_create(); + + rrdlabels_unmark_all(localhost->rrdlabels); + + // priority is important here + rrdhost_load_config_labels(); + rrdhost_load_kubernetes_labels(); + rrdhost_load_auto_labels(); + + rrdlabels_remove_all_unmarked(localhost->rrdlabels); + metaqueue_store_host_labels(localhost->machine_guid); + + health_label_log_save(localhost); + + rrdpush_send_host_labels(localhost); + health_reload(); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - delete host files + +void rrdhost_delete_charts(RRDHOST *host) { + if(!host) return; + + info("Deleting database of host '%s'...", rrdhost_hostname(host)); + + RRDSET *st; + + // we get a write lock + // to ensure only one thread is saving the database + rrdset_foreach_write(st, host) { + rrdset_delete_files(st); + } + rrdset_foreach_done(st); + + recursively_delete_dir(host->cache_dir, "left over host"); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - cleanup host files + +void rrdhost_cleanup_charts(RRDHOST *host) { + if(!host) return; + + info("Cleaning up database of host '%s'...", rrdhost_hostname(host)); + + RRDSET *st; + uint32_t rrdhost_delete_obsolete_charts = rrdhost_option_check(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS); + + // we get a write lock + // to ensure only one thread is saving the database + rrdset_foreach_write(st, host) { + + if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)) + rrdset_delete_files(st); + + else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) + rrdset_delete_obsolete_dimensions(st); + + else + rrdset_save(st); + + } + rrdset_foreach_done(st); +} + + +// ---------------------------------------------------------------------------- +// RRDHOST - save all hosts to disk + +void rrdhost_save_all(void) { + info("Saving database [%zu hosts(s)]...", rrd_hosts_available); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) + rrdhost_save_charts(host); + + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - save or delete all hosts from disk + +void rrdhost_cleanup_all(void) { + info("Cleaning up database [%zu hosts(s)]...", rrd_hosts_available); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + if (host != localhost && rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) && !host->receiver + /* don't delete multi-host DB host files */ + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) + ) + rrdhost_delete_charts(host); + else + rrdhost_cleanup_charts(host); + } + + rrd_unlock(); +} + + +// ---------------------------------------------------------------------------- +// RRDHOST - set system info from environment variables +// system_info fields must be heap allocated or NULL +int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) { + int res = 0; + + if (!strcmp(name, "NETDATA_PROTOCOL_VERSION")) + return res; + else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_TYPE")){ + freez(system_info->cloud_provider_type); + system_info->cloud_provider_type = strdupz(value); + } + else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE")){ + freez(system_info->cloud_instance_type); + system_info->cloud_instance_type = strdupz(value); + } + else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION")){ + freez(system_info->cloud_instance_region); + system_info->cloud_instance_region = strdupz(value); + } + else if(!strcmp(name, "NETDATA_CONTAINER_OS_NAME")){ + freez(system_info->container_os_name); + system_info->container_os_name = strdupz(value); + } + else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID")){ + freez(system_info->container_os_id); + system_info->container_os_id = strdupz(value); + } + else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID_LIKE")){ + freez(system_info->container_os_id_like); + system_info->container_os_id_like = strdupz(value); + } + else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION")){ + freez(system_info->container_os_version); + system_info->container_os_version = strdupz(value); + } + else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION_ID")){ + freez(system_info->container_os_version_id); + system_info->container_os_version_id = strdupz(value); + } + else if(!strcmp(name, "NETDATA_CONTAINER_OS_DETECTION")){ + freez(system_info->container_os_detection); + system_info->container_os_detection = strdupz(value); + } + else if(!strcmp(name, "NETDATA_HOST_OS_NAME")){ + freez(system_info->host_os_name); + system_info->host_os_name = strdupz(value); + json_fix_string(system_info->host_os_name); + } + else if(!strcmp(name, "NETDATA_HOST_OS_ID")){ + freez(system_info->host_os_id); + system_info->host_os_id = strdupz(value); + } + else if(!strcmp(name, "NETDATA_HOST_OS_ID_LIKE")){ + freez(system_info->host_os_id_like); + system_info->host_os_id_like = strdupz(value); + } + else if(!strcmp(name, "NETDATA_HOST_OS_VERSION")){ + freez(system_info->host_os_version); + system_info->host_os_version = strdupz(value); + } + else if(!strcmp(name, "NETDATA_HOST_OS_VERSION_ID")){ + freez(system_info->host_os_version_id); + system_info->host_os_version_id = strdupz(value); + } + else if(!strcmp(name, "NETDATA_HOST_OS_DETECTION")){ + freez(system_info->host_os_detection); + system_info->host_os_detection = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){ + freez(system_info->kernel_name); + system_info->kernel_name = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT")){ + freez(system_info->host_cores); + system_info->host_cores = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_CPU_FREQ")){ + freez(system_info->host_cpu_freq); + system_info->host_cpu_freq = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_RAM")){ + freez(system_info->host_ram_total); + system_info->host_ram_total = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_DISK_SIZE")){ + freez(system_info->host_disk_space); + system_info->host_disk_space = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){ + freez(system_info->kernel_version); + system_info->kernel_version = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){ + freez(system_info->architecture); + system_info->architecture = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){ + freez(system_info->virtualization); + system_info->virtualization = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){ + freez(system_info->virt_detection); + system_info->virt_detection = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){ + freez(system_info->container); + system_info->container = strdupz(value); + } + else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){ + freez(system_info->container_detection); + system_info->container_detection = strdupz(value); + } + else if(!strcmp(name, "NETDATA_HOST_IS_K8S_NODE")){ + freez(system_info->is_k8s_node); + system_info->is_k8s_node = strdupz(value); + } + else if (!strcmp(name, "NETDATA_SYSTEM_CPU_VENDOR")) + return res; + else if (!strcmp(name, "NETDATA_SYSTEM_CPU_MODEL")) + return res; + else if (!strcmp(name, "NETDATA_SYSTEM_CPU_DETECTION")) + return res; + else if (!strcmp(name, "NETDATA_SYSTEM_RAM_DETECTION")) + return res; + else if (!strcmp(name, "NETDATA_SYSTEM_DISK_DETECTION")) + return res; + else if (!strcmp(name, "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE")) + return res; + else { + res = 1; + } + + return res; +} + +// Added for gap-filling, if this proves to be a bottleneck in large-scale systems then we will need to cache +// the last entry times as the metric updates, but let's see if it is a problem first. +time_t rrdhost_last_entry_t(RRDHOST *h) { + RRDSET *st; + time_t result = 0; + + rrdset_foreach_read(st, h) { + time_t st_last = rrdset_last_entry_t(st); + + if (st_last > result) + result = st_last; + } + rrdset_foreach_done(st); + return result; +} diff --git a/database/rrdlabels.c b/database/rrdlabels.c new file mode 100644 index 0000000..743499a --- /dev/null +++ b/database/rrdlabels.c @@ -0,0 +1,1183 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// labels sanitization + +/* + * All labels follow these rules: + * + * Character Symbol Values Names + * UTF-8 characters UTF-8 yes -> _ + * Lower case letter [a-z] yes yes + * Upper case letter [A-Z] yes -> [a-z] + * Digit [0-9] yes yes + * Underscore _ yes yes + * Minus - yes yes + * Plus + yes -> _ + * Colon : yes -> _ + * Semicolon ; -> : -> _ + * Equal = -> : -> _ + * Period . yes yes + * Comma , -> . -> . + * Slash / yes yes + * Backslash \ -> / -> / + * At @ yes -> _ + * Space yes -> _ + * Opening parenthesis ( yes -> _ + * Closing parenthesis ) yes -> _ + * anything else -> _ -> _ +* + * The above rules should allow users to set in tags (indicative): + * + * 1. hostnames and domain names as-is + * 2. email addresses as-is + * 3. floating point numbers, converted to always use a dot as the decimal point + * + * Leading and trailing spaces and control characters are removed from both label + * names and values. + * + * Multiple spaces inside the label name or the value are removed (only 1 is retained). + * In names spaces are also converted to underscores. + * + * Names that are only underscores are rejected (they do not enter the dictionary). + * + * The above rules do not require any conversion to be included in JSON strings. + * + * Label names and values are truncated to LABELS_MAX_LENGTH (200) characters. + * + * When parsing, label key and value are separated by the first colon (:) found. + * So label:value1:value2 is parsed as key = "label", value = "value1:value2" + * + * This means a label key cannot contain a colon (:) - it is converted to + * underscore if it does. + * + */ + +#define RRDLABELS_MAX_NAME_LENGTH 200 +#define RRDLABELS_MAX_VALUE_LENGTH 800 // 800 in bytes, up to 200 UTF-8 characters + +static unsigned char label_spaces_char_map[256]; +static unsigned char label_names_char_map[256]; +static unsigned char label_values_char_map[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = '_', // + [10] = '_', // + [11] = '_', // + [12] = '_', // + [13] = '_', // + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = ' ', // SPACE keep + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '(', // ( keep + [41] = ')', // ) keep + [42] = '_', // * + [43] = '+', // + keep + [44] = '.', // , convert , to . + [45] = '-', // - keep + [46] = '.', // . keep + [47] = '/', // / keep + [48] = '0', // 0 keep + [49] = '1', // 1 keep + [50] = '2', // 2 keep + [51] = '3', // 3 keep + [52] = '4', // 4 keep + [53] = '5', // 5 keep + [54] = '6', // 6 keep + [55] = '7', // 7 keep + [56] = '8', // 8 keep + [57] = '9', // 9 keep + [58] = ':', // : keep + [59] = ':', // ; convert ; to : + [60] = '_', // < + [61] = ':', // = convert = to : + [62] = '_', // > + [63] = '_', // ? + [64] = '@', // @ + [65] = 'A', // A keep + [66] = 'B', // B keep + [67] = 'C', // C keep + [68] = 'D', // D keep + [69] = 'E', // E keep + [70] = 'F', // F keep + [71] = 'G', // G keep + [72] = 'H', // H keep + [73] = 'I', // I keep + [74] = 'J', // J keep + [75] = 'K', // K keep + [76] = 'L', // L keep + [77] = 'M', // M keep + [78] = 'N', // N keep + [79] = 'O', // O keep + [80] = 'P', // P keep + [81] = 'Q', // Q keep + [82] = 'R', // R keep + [83] = 'S', // S keep + [84] = 'T', // T keep + [85] = 'U', // U keep + [86] = 'V', // V keep + [87] = 'W', // W keep + [88] = 'X', // X keep + [89] = 'Y', // Y keep + [90] = 'Z', // Z keep + [91] = '_', // [ + [92] = '/', // backslash convert \ to / + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ keep + [96] = '_', // ` + [97] = 'a', // a keep + [98] = 'b', // b keep + [99] = 'c', // c keep + [100] = 'd', // d keep + [101] = 'e', // e keep + [102] = 'f', // f keep + [103] = 'g', // g keep + [104] = 'h', // h keep + [105] = 'i', // i keep + [106] = 'j', // j keep + [107] = 'k', // k keep + [108] = 'l', // l keep + [109] = 'm', // m keep + [110] = 'n', // n keep + [111] = 'o', // o keep + [112] = 'p', // p keep + [113] = 'q', // q keep + [114] = 'r', // r keep + [115] = 's', // s keep + [116] = 't', // t keep + [117] = 'u', // u keep + [118] = 'v', // v keep + [119] = 'w', // w keep + [120] = 'x', // x keep + [121] = 'y', // y keep + [122] = 'z', // z keep + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +__attribute__((constructor)) void initialize_labels_keys_char_map(void) { + // copy the values char map to the names char map + size_t i; + for(i = 0; i < 256 ;i++) + label_names_char_map[i] = label_values_char_map[i]; + + // apply overrides to the label names map + label_names_char_map['A'] = 'a'; + label_names_char_map['B'] = 'b'; + label_names_char_map['C'] = 'c'; + label_names_char_map['D'] = 'd'; + label_names_char_map['E'] = 'e'; + label_names_char_map['F'] = 'f'; + label_names_char_map['G'] = 'g'; + label_names_char_map['H'] = 'h'; + label_names_char_map['I'] = 'i'; + label_names_char_map['J'] = 'j'; + label_names_char_map['K'] = 'k'; + label_names_char_map['L'] = 'l'; + label_names_char_map['M'] = 'm'; + label_names_char_map['N'] = 'n'; + label_names_char_map['O'] = 'o'; + label_names_char_map['P'] = 'p'; + label_names_char_map['Q'] = 'q'; + label_names_char_map['R'] = 'r'; + label_names_char_map['S'] = 's'; + label_names_char_map['T'] = 't'; + label_names_char_map['U'] = 'u'; + label_names_char_map['V'] = 'v'; + label_names_char_map['W'] = 'w'; + label_names_char_map['X'] = 'x'; + label_names_char_map['Y'] = 'y'; + label_names_char_map['Z'] = 'z'; + label_names_char_map['='] = '_'; + label_names_char_map[':'] = '_'; + label_names_char_map['+'] = '_'; + label_names_char_map[';'] = '_'; + label_names_char_map['@'] = '_'; + label_names_char_map['('] = '_'; + label_names_char_map[')'] = '_'; + label_names_char_map[' '] = '_'; + label_names_char_map['\\'] = '/'; + + // create the spaces map + for(i = 0; i < 256 ;i++) + label_spaces_char_map[i] = (isspace(i) || iscntrl(i) || !isprint(i))?1:0; + +} + +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) { + if(unlikely(!dst_size)) return 0; + + if(unlikely(!src || !*src)) { + strncpyz((char *)dst, empty, dst_size); + dst[dst_size - 1] = '\0'; + size_t len = strlen((char *)dst); + if(multibyte_length) *multibyte_length = len; + return len; + } + + unsigned char *d = dst; + + // make room for the final string termination + unsigned char *end = &d[dst_size - 1]; + + // copy while converting, but keep only one white space + // we start wil last_is_space = 1 to skip leading spaces + int last_is_space = 1; + + size_t mblen = 0; + + while(*src && d < end) { + unsigned char c = *src; + + if(IS_UTF8_STARTBYTE(c) && IS_UTF8_BYTE(src[1]) && d + 2 < end) { + // UTF-8 multi-byte encoded character + + // find how big this character is (2-4 bytes) + size_t utf_character_size = 2; + while(utf_character_size <= 4 && src[utf_character_size] && IS_UTF8_BYTE(src[utf_character_size]) && !IS_UTF8_STARTBYTE(src[utf_character_size])) + utf_character_size++; + + if(utf) { + while(utf_character_size) { + utf_character_size--; + *d++ = *src++; + } + } + else { + // UTF-8 characters are not allowed. + // Assume it is an underscore + // and skip all except the first byte + *d++ = '_'; + src += (utf_character_size - 1); + } + + last_is_space = 0; + mblen++; + continue; + } + + if(label_spaces_char_map[c]) { + // a space character + + if(!last_is_space) { + // add one space + *d++ = char_map[c]; + mblen++; + } + + last_is_space++; + } + else { + *d++ = char_map[c]; + last_is_space = 0; + mblen++; + } + + src++; + } + + // remove the last trailing space + if(last_is_space && d > dst) { + d--; + mblen--; + } + + // put a termination at the end of what we copied + *d = '\0'; + + // check if dst is all underscores and empty it if it is + if(*dst == '_') { + unsigned char *t = dst; + while (*t == '_') t++; + if (unlikely(*t == '\0')) { + *dst = '\0'; + mblen = 0; + } + } + + if(unlikely(*dst == '\0')) { + strncpyz((char *)dst, empty, dst_size); + dst[dst_size - 1] = '\0'; + mblen = strlen((char *)dst); + if(multibyte_length) *multibyte_length = mblen; + return mblen; + } + + if(multibyte_length) *multibyte_length = mblen; + + return d - dst; +} + +static inline size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, "", NULL); +} + +static inline size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]", NULL); +} + +// ---------------------------------------------------------------------------- +// rrdlabels_create() + +typedef struct rrdlabel { + STRING *label_value; + RRDLABEL_SRC label_source; +} RRDLABEL; + +static void rrdlabel_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *dict_ptr __maybe_unused) { + RRDLABEL *lb = (RRDLABEL *)value; + + // label_value is already allocated by the STRING + lb->label_source |= RRDLABEL_FLAG_NEW; + lb->label_source &= ~RRDLABEL_FLAG_OLD; +} + +static void rrdlabel_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *dict_ptr __maybe_unused) { + RRDLABEL *lb = (RRDLABEL *)value; + + string_freez(lb->label_value); + lb->label_value = NULL; +} + +static bool rrdlabel_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *oldvalue, void *newvalue, void *dict_ptr __maybe_unused) { + RRDLABEL *lbold = (RRDLABEL *)oldvalue; + RRDLABEL *lbnew = (RRDLABEL *)newvalue; + + if(lbold->label_value == lbnew->label_value) { + // they are the same + + lbold->label_source |= lbnew->label_source; + lbold->label_source |= RRDLABEL_FLAG_OLD; + lbold->label_source &= ~RRDLABEL_FLAG_NEW; + + // free the new one + string_freez(lbnew->label_value); + + return false; + } + + // they are different + + string_freez(lbold->label_value); + lbold->label_value = lbnew->label_value; + lbold->label_source = lbnew->label_source; + lbold->label_source |= RRDLABEL_FLAG_NEW; + lbold->label_source &= ~RRDLABEL_FLAG_OLD; + + return true; +} + +DICTIONARY *rrdlabels_create(void) { + DICTIONARY *dict = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(dict, rrdlabel_insert_callback, dict); + dictionary_register_delete_callback(dict, rrdlabel_delete_callback, dict); + dictionary_register_conflict_callback(dict, rrdlabel_conflict_callback, dict); + return dict; +} + + +// ---------------------------------------------------------------------------- +// rrdlabels_destroy() + +void rrdlabels_destroy(DICTIONARY *labels_dict) { + dictionary_destroy(labels_dict); +} + +void rrdlabels_flush(DICTIONARY *labels_dict) { + dictionary_flush(labels_dict); +} + +// ---------------------------------------------------------------------------- +// rrdlabels_add() + +static void labels_add_already_sanitized(DICTIONARY *dict, const char *key, const char *value, RRDLABEL_SRC ls) { + if(ls & RRDLABEL_FLAG_NEW) ls &= ~RRDLABEL_FLAG_NEW; + if(ls & RRDLABEL_FLAG_OLD) ls &= ~RRDLABEL_FLAG_OLD; + + RRDLABEL tmp = { + .label_source = ls, + .label_value = string_strdupz(value) + }; + dictionary_set(dict, key, &tmp, sizeof(RRDLABEL)); +} + + +void rrdlabels_add(DICTIONARY *dict, const char *name, const char *value, RRDLABEL_SRC ls) { + if(!dict) { + error("%s(): called with NULL dictionary.", __FUNCTION__ ); + return; + } + + char n[RRDLABELS_MAX_NAME_LENGTH + 1], v[RRDLABELS_MAX_VALUE_LENGTH + 1]; + rrdlabels_sanitize_name(n, name, RRDLABELS_MAX_NAME_LENGTH); + rrdlabels_sanitize_value(v, value, RRDLABELS_MAX_VALUE_LENGTH); + + if(!*n) { + error("%s: cannot add name '%s' (value '%s') which is sanitized as empty string", __FUNCTION__, name, value); + return; + } + + labels_add_already_sanitized(dict, n, v, ls); +} + +static const char *get_quoted_string_up_to(char *dst, size_t dst_size, const char *string, char upto1, char upto2) { + size_t len = 0; + char *d = dst, quote = 0; + while(*string && len++ < dst_size) { + if(unlikely(!quote && (*string == '\'' || *string == '"'))) { + quote = *string++; + continue; + } + + if(unlikely(quote && *string == quote)) { + quote = 0; + string++; + continue; + } + + if(unlikely(quote && *string == '\\' && string[1])) { + string++; + *d++ = *string++; + continue; + } + + if(unlikely(!quote && (*string == upto1 || *string == upto2))) break; + + *d++ = *string++; + } + *d = '\0'; + + if(*string) string++; + + return string; +} + +void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls) { + if(!dict) { + error("%s(): called with NULL dictionary.", __FUNCTION__ ); + return; + } + + char name[RRDLABELS_MAX_NAME_LENGTH + 1]; + string = get_quoted_string_up_to(name, RRDLABELS_MAX_NAME_LENGTH, string, '=', ':'); + + char value[RRDLABELS_MAX_VALUE_LENGTH + 1]; + get_quoted_string_up_to(value, RRDLABELS_MAX_VALUE_LENGTH, string, '\0', '\0'); + + rrdlabels_add(dict, name, value, ls); +} + +// ---------------------------------------------------------------------------- +// rrdlabels_get_value_to_buffer_or_null() + +void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null) { + if(!labels) return; + + const DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key); + RRDLABEL *lb = dictionary_acquired_item_value(acquired_item); + + if(lb && lb->label_value) + buffer_sprintf(wb, "%s%s%s", quote, string2str(lb->label_value), quote); + else + buffer_strcat(wb, null); + + dictionary_acquired_item_release(labels, acquired_item); +} + +// ---------------------------------------------------------------------------- +// rrdlabels_get_value_to_char_or_null() + +void rrdlabels_get_value_to_char_or_null(DICTIONARY *labels, char **value, const char *key) { + const DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key); + RRDLABEL *lb = dictionary_acquired_item_value(acquired_item); + + *value = (lb && lb->label_value) ? strdupz(string2str(lb->label_value)) : NULL; + + dictionary_acquired_item_release(labels, acquired_item); +} + +// ---------------------------------------------------------------------------- +// rrdlabels_unmark_all() +// remove labels RRDLABEL_FLAG_OLD and RRDLABEL_FLAG_NEW from all dictionary items + +static int remove_flags_old_new(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + RRDLABEL *lb = (RRDLABEL *)value; + + if(lb->label_source & RRDLABEL_FLAG_OLD) lb->label_source &= ~RRDLABEL_FLAG_OLD; + if(lb->label_source & RRDLABEL_FLAG_NEW) lb->label_source &= ~RRDLABEL_FLAG_NEW; + + return 1; +} + +void rrdlabels_unmark_all(DICTIONARY *labels) { + dictionary_walkthrough_read(labels, remove_flags_old_new, NULL); +} + + +// ---------------------------------------------------------------------------- +// rrdlabels_remove_all_unmarked() +// remove dictionary items that are neither old, nor new + +static int remove_not_old_not_new_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + DICTIONARY *dict = (DICTIONARY *)data; + RRDLABEL *lb = (RRDLABEL *)value; + + if(!(lb->label_source & (RRDLABEL_FLAG_OLD | RRDLABEL_FLAG_NEW | RRDLABEL_FLAG_PERMANENT))) { + dictionary_del(dict, name); + return 1; + } + + return 0; +} + +void rrdlabels_remove_all_unmarked(DICTIONARY *labels) { + dictionary_walkthrough_write(labels, remove_not_old_not_new_callback, labels); +} + + +// ---------------------------------------------------------------------------- +// rrdlabels_walkthrough_read() + +struct labels_walkthrough { + int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data); + void *data; +}; + +static int labels_walkthrough_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + struct labels_walkthrough *d = (struct labels_walkthrough *)data; + RRDLABEL *lb = (RRDLABEL *)value; + + RRDLABEL_SRC ls = lb->label_source; + if(ls & RRDLABEL_FLAG_NEW) ls &= ~RRDLABEL_FLAG_NEW; + if(ls & RRDLABEL_FLAG_OLD) ls &= ~RRDLABEL_FLAG_OLD; + + return d->callback(name, string2str(lb->label_value), ls, d->data); +} + +int rrdlabels_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data) { + struct labels_walkthrough d = { + .callback = callback, + .data = data + }; + return dictionary_walkthrough_read(labels, labels_walkthrough_callback, &d); +} + +int rrdlabels_sorted_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data) { + struct labels_walkthrough d = { + .callback = callback, + .data = data + }; + return dictionary_sorted_walkthrough_read(labels, labels_walkthrough_callback, &d); +} + + +// ---------------------------------------------------------------------------- +// rrdlabels_migrate_to_these() +// migrate an existing label list to a new list, INPLACE + +static int copy_label_to_dictionary_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + DICTIONARY *dst = (DICTIONARY *)data; + RRDLABEL *lb = (RRDLABEL *)value; + labels_add_already_sanitized(dst, name, string2str(lb->label_value), lb->label_source); + return 1; +} + +void rrdlabels_migrate_to_these(DICTIONARY *dst, DICTIONARY *src) { + if(!dst || !src) return; + + // remove the RRDLABEL_FLAG_OLD and RRDLABEL_FLAG_NEW from all items + rrdlabels_unmark_all(dst); + + // Mark the existing ones as RRDLABEL_FLAG_OLD, + // or the newly added ones as RRDLABEL_FLAG_NEW + dictionary_walkthrough_read(src, copy_label_to_dictionary_callback, dst); + + // remove the unmarked dst + rrdlabels_remove_all_unmarked(dst); +} + +void rrdlabels_copy(DICTIONARY *dst, DICTIONARY *src) { + if(!dst || !src) return; + + dictionary_walkthrough_read(src, copy_label_to_dictionary_callback, dst); +} + + +// ---------------------------------------------------------------------------- +// rrdlabels_match_simple_pattern() +// returns true when there are keys in the dictionary matching a simple pattern + +struct simple_pattern_match_name_value { + SIMPLE_PATTERN *pattern; + char equal; +}; + +static int simple_pattern_match_name_only_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + struct simple_pattern_match_name_value *t = (struct simple_pattern_match_name_value *)data; + (void)value; + + // we return -1 to stop the walkthrough on first match + if(simple_pattern_matches(t->pattern, name)) return -1; + + return 0; +} + +static int simple_pattern_match_name_and_value_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + struct simple_pattern_match_name_value *t = (struct simple_pattern_match_name_value *)data; + RRDLABEL *lb = (RRDLABEL *)value; + + // we return -1 to stop the walkthrough on first match + if(simple_pattern_matches(t->pattern, name)) return -1; + + size_t len = RRDLABELS_MAX_NAME_LENGTH + RRDLABELS_MAX_VALUE_LENGTH + 2; // +1 for =, +1 for \0 + char tmp[len], *dst = &tmp[0]; + const char *v = string2str(lb->label_value); + + // copy the name + while(*name) *dst++ = *name++; + + // add the equal + *dst++ = t->equal; + + // add the value + while(*v) *dst++ = *v++; + + // terminate it + *dst = '\0'; + + if(simple_pattern_matches(t->pattern, tmp)) return -1; + + return 0; +} + +bool rrdlabels_match_simple_pattern_parsed(DICTIONARY *labels, SIMPLE_PATTERN *pattern, char equal) { + if (!labels) return false; + + struct simple_pattern_match_name_value t = { + .pattern = pattern, + .equal = equal + }; + + int ret = dictionary_walkthrough_read(labels, equal?simple_pattern_match_name_and_value_callback:simple_pattern_match_name_only_callback, &t); + + return (ret == -1)?true:false; +} + +bool rrdlabels_match_simple_pattern(DICTIONARY *labels, const char *simple_pattern_txt) { + if (!labels) return false; + + SIMPLE_PATTERN *pattern = simple_pattern_create(simple_pattern_txt, " ,|\t\r\n\f\v", SIMPLE_PATTERN_EXACT); + char equal = '\0'; + + const char *s; + for(s = simple_pattern_txt; *s ; s++) { + if (*s == '=' || *s == ':') { + equal = *s; + break; + } + } + + bool ret = rrdlabels_match_simple_pattern_parsed(labels, pattern, equal); + + simple_pattern_free(pattern); + + return ret; +} + + +// ---------------------------------------------------------------------------- +// Log all labels + +static int rrdlabels_log_label_to_buffer_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + + BUFFER *wb = (BUFFER *)data; + RRDLABEL *lb = (RRDLABEL *)value; + + buffer_sprintf(wb, "Label: %s: \"%s\" (", name, string2str(lb->label_value)); + + size_t sources = 0; + if(lb->label_source & RRDLABEL_SRC_AUTO) { + buffer_sprintf(wb, "auto"); + sources++; + } + + if(lb->label_source & RRDLABEL_SRC_CONFIG) + buffer_sprintf(wb, "%snetdata.conf", sources++?",":""); + + if(lb->label_source & RRDLABEL_SRC_K8S) + buffer_sprintf(wb, "%sk8s", sources++?",":""); + + if(lb->label_source & RRDLABEL_SRC_ACLK) + buffer_sprintf(wb, "%saclk", sources++?",":""); + + if(!sources) + buffer_strcat(wb, "unknown"); + + buffer_strcat(wb, ")\n"); + + return 1; +} + +void rrdlabels_log_to_buffer(DICTIONARY *labels, BUFFER *wb) { + dictionary_sorted_walkthrough_read(labels, rrdlabels_log_label_to_buffer_callback, wb); +} + + +// ---------------------------------------------------------------------------- +// rrdlabels_to_buffer() + +struct labels_to_buffer { + BUFFER *wb; + bool (*filter_callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data); + void *filter_data; + void (*name_sanitizer)(char *dst, const char *src, size_t dst_size); + void (*value_sanitizer)(char *dst, const char *src, size_t dst_size); + const char *before_each; + const char *quote; + const char *equal; + const char *between_them; + size_t count; +}; + +static int label_to_buffer_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + struct labels_to_buffer *t = (struct labels_to_buffer *)data; + RRDLABEL *lb = (RRDLABEL *)value; + + size_t n_size = (t->name_sanitizer ) ? ( RRDLABELS_MAX_NAME_LENGTH * 2 ) : 1; + size_t v_size = (t->value_sanitizer) ? ( RRDLABELS_MAX_VALUE_LENGTH * 2 ) : 1; + + char n[n_size]; + char v[v_size]; + + const char *nn = name, *vv = string2str(lb->label_value); + + if(t->name_sanitizer) { + t->name_sanitizer(n, name, n_size); + nn = n; + } + + if(t->value_sanitizer) { + t->value_sanitizer(v, string2str(lb->label_value), v_size); + vv = v; + } + + if(!t->filter_callback || t->filter_callback(name, string2str(lb->label_value), lb->label_source, t->filter_data)) { + buffer_sprintf(t->wb, "%s%s%s%s%s%s%s%s%s", t->count++?t->between_them:"", t->before_each, t->quote, nn, t->quote, t->equal, t->quote, vv, t->quote); + return 1; + } + + return 0; +} + +int rrdlabels_to_buffer(DICTIONARY *labels, BUFFER *wb, const char *before_each, const char *equal, const char *quote, const char *between_them, bool (*filter_callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *filter_data, void (*name_sanitizer)(char *dst, const char *src, size_t dst_size), void (*value_sanitizer)(char *dst, const char *src, size_t dst_size)) { + struct labels_to_buffer tmp = { + .wb = wb, + .filter_callback = filter_callback, + .filter_data = filter_data, + .name_sanitizer = name_sanitizer, + .value_sanitizer = value_sanitizer, + .before_each = before_each, + .equal = equal, + .quote = quote, + .between_them = between_them, + .count = 0 + }; + return dictionary_walkthrough_read(labels, label_to_buffer_callback, (void *)&tmp); +} + +void rrdset_update_rrdlabels(RRDSET *st, DICTIONARY *new_rrdlabels) { + if(!st->rrdlabels) + st->rrdlabels = rrdlabels_create(); + + if (new_rrdlabels) + rrdlabels_migrate_to_these(st->rrdlabels, new_rrdlabels); + + metaqueue_chart_labels(st); +} + + +// ---------------------------------------------------------------------------- +// rrdlabels unit test + +struct rrdlabels_unittest_add_a_pair { + const char *pair; + const char *expected_name; + const char *expected_value; + const char *name; + const char *value; + RRDLABEL_SRC ls; + int errors; +}; + +int rrdlabels_unittest_add_a_pair_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct rrdlabels_unittest_add_a_pair *t = (struct rrdlabels_unittest_add_a_pair *)data; + + t->name = name; + t->value = value; + t->ls = ls; + + if(strcmp(name, t->expected_name) != 0) { + fprintf(stderr, "name is wrong, found \"%s\", expected \"%s\"", name, t->expected_name); + t->errors++; + } + + if(value == NULL && t->expected_value == NULL) { + ; + } + else if(value == NULL || t->expected_value == NULL) { + fprintf(stderr, "value is wrong, found \"%s\", expected \"%s\"", value?value:"(null)", t->expected_value?t->expected_value:"(null)"); + t->errors++; + } + else if(strcmp(value, t->expected_value) != 0) { + fprintf(stderr, "values don't match, found \"%s\", expected \"%s\"", value, t->expected_value); + t->errors++; + } + + return 1; +} + +int rrdlabels_unittest_add_a_pair(const char *pair, const char *name, const char *value) { + DICTIONARY *labels = rrdlabels_create(); + int errors; + + fprintf(stderr, "rrdlabels_add_pair(labels, %s) ... ", pair); + + rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_CONFIG); + + struct rrdlabels_unittest_add_a_pair tmp = { + .pair = pair, + .expected_name = name, + .expected_value = value, + .errors = 0 + }; + int ret = rrdlabels_walkthrough_read(labels, rrdlabels_unittest_add_a_pair_callback, &tmp); + errors = tmp.errors; + if(ret != 1) { + fprintf(stderr, "failed to get \"%s\" label", name); + errors++; + } + + if(!errors) + fprintf(stderr, " OK, name='%s' and value='%s'\n", tmp.name, tmp.value?tmp.value:"(null)"); + else + fprintf(stderr, " FAILED\n"); + + rrdlabels_destroy(labels); + return errors; +} + +int rrdlabels_unittest_add_pairs() { + fprintf(stderr, "\n%s() tests\n", __FUNCTION__); + + int errors = 0; + + // basic test + errors += rrdlabels_unittest_add_a_pair("tag=value", "tag", "value"); + errors += rrdlabels_unittest_add_a_pair("tag:value", "tag", "value"); + + // test newlines + errors += rrdlabels_unittest_add_a_pair(" tag = \t value \r\n", "tag", "value"); + + // test : in values + errors += rrdlabels_unittest_add_a_pair("tag=:value", "tag", ":value"); + errors += rrdlabels_unittest_add_a_pair("tag::value", "tag", ":value"); + errors += rrdlabels_unittest_add_a_pair(" tag = :value ", "tag", ":value"); + errors += rrdlabels_unittest_add_a_pair(" tag : :value ", "tag", ":value"); + errors += rrdlabels_unittest_add_a_pair("tag:5", "tag", "5"); + errors += rrdlabels_unittest_add_a_pair("tag:55", "tag", "55"); + errors += rrdlabels_unittest_add_a_pair("tag:aa", "tag", "aa"); + errors += rrdlabels_unittest_add_a_pair("tag:a", "tag", "a"); + + // test empty values + errors += rrdlabels_unittest_add_a_pair("tag", "tag", "[none]"); + errors += rrdlabels_unittest_add_a_pair("tag:", "tag", "[none]"); + errors += rrdlabels_unittest_add_a_pair("tag:\"\"", "tag", "[none]"); + errors += rrdlabels_unittest_add_a_pair("tag:''", "tag", "[none]"); + errors += rrdlabels_unittest_add_a_pair("tag:\r\n", "tag", "[none]"); + errors += rrdlabels_unittest_add_a_pair("tag\r\n", "tag", "[none]"); + + // test UTF-8 in values + errors += rrdlabels_unittest_add_a_pair("tag: country:Ελλάδα", "tag", "country:Ελλάδα"); + errors += rrdlabels_unittest_add_a_pair("\"tag\": \"country:Ελλάδα\"", "tag", "country:Ελλάδα"); + errors += rrdlabels_unittest_add_a_pair("\"tag\": country:\"Ελλάδα\"", "tag", "country:Ελλάδα"); + errors += rrdlabels_unittest_add_a_pair("\"tag=1\": country:\"Gre\\\"ece\"", "tag_1", "country:Gre_ece"); + errors += rrdlabels_unittest_add_a_pair("\"tag=1\" = country:\"Gre\\\"ece\"", "tag_1", "country:Gre_ece"); + + errors += rrdlabels_unittest_add_a_pair("\t'LABE=L'\t=\t\"World\" peace", "labe_l", "World peace"); + errors += rrdlabels_unittest_add_a_pair("\t'LA\\'B:EL'\t=\tcountry:\"World\":\"Europe\":\"Greece\"", "la_b_el", "country:World:Europe:Greece"); + errors += rrdlabels_unittest_add_a_pair("\t'LA\\'B:EL'\t=\tcountry\\\"World\"\\\"Europe\"\\\"Greece\"", "la_b_el", "country/World/Europe/Greece"); + + errors += rrdlabels_unittest_add_a_pair("NAME=\"VALUE\"", "name", "VALUE"); + errors += rrdlabels_unittest_add_a_pair("\"NAME\" : \"VALUE\"", "name", "VALUE"); + errors += rrdlabels_unittest_add_a_pair("NAME: \"VALUE\"", "name", "VALUE"); + + return errors; +} + +int rrdlabels_unittest_check_simple_pattern(DICTIONARY *labels, const char *pattern, bool expected) { + fprintf(stderr, "rrdlabels_match_simple_pattern(labels, \"%s\") ... ", pattern); + + bool ret = rrdlabels_match_simple_pattern(labels, pattern); + fprintf(stderr, "%s, got %s expected %s\n", (ret == expected)?"OK":"FAILED", ret?"true":"false", expected?"true":"false"); + + return (ret == expected)?0:1; +} + +int rrdlabels_unittest_simple_pattern() { + fprintf(stderr, "\n%s() tests\n", __FUNCTION__); + + int errors = 0; + + DICTIONARY *labels = rrdlabels_create(); + rrdlabels_add(labels, "tag1", "value1", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "tag2", "value2", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "tag3", "value3", RRDLABEL_SRC_CONFIG); + + errors += rrdlabels_unittest_check_simple_pattern(labels, "*", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag", false); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag*", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "*1", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "value*", false); + errors += rrdlabels_unittest_check_simple_pattern(labels, "*=value*", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "*:value*", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "*2", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "*2 *3", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "!tag3 *2", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag1 tag2", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag1tag2", false); + errors += rrdlabels_unittest_check_simple_pattern(labels, "invalid1 invalid2 tag3", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "!tag1 tag4", false); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag1=value1", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag1=value2", false); + errors += rrdlabels_unittest_check_simple_pattern(labels, "tag*=value*", true); + errors += rrdlabels_unittest_check_simple_pattern(labels, "!tag*=value*", false); + errors += rrdlabels_unittest_check_simple_pattern(labels, "!tag2=something2 tag2=*2", true); + + rrdlabels_destroy(labels); + + return errors; +} + +int rrdlabels_unittest_sanitize_value(const char *src, const char *expected) { + char buf[RRDLABELS_MAX_VALUE_LENGTH + 1]; + size_t len = rrdlabels_sanitize_value(buf, src, RRDLABELS_MAX_VALUE_LENGTH); + size_t expected_len = strlen(expected); + + int err = 0; + if(strcmp(buf, expected) != 0) err = 1; + if(len != expected_len) err = 1; + + fprintf(stderr, "%s(%s): %s, expected '%s', got '%s', expected bytes = %zu, got bytes = %zu\n", __FUNCTION__, src, (err==1)?"FAILED":"OK", expected, buf, expected_len, strlen(buf)); + return err; +} + +int rrdlabels_unittest_sanitization() { + int errors = 0; + + errors += rrdlabels_unittest_sanitize_value("", "[none]"); + errors += rrdlabels_unittest_sanitize_value("1", "1"); + errors += rrdlabels_unittest_sanitize_value(" hello world ", "hello world"); + + // 2-byte UTF-8 + errors += rrdlabels_unittest_sanitize_value(" Ελλάδα ", "Ελλάδα"); + errors += rrdlabels_unittest_sanitize_value("aŰbŲcŴ", "aŰbŲcŴ"); + errors += rrdlabels_unittest_sanitize_value("Ű b Ų c Ŵ", "Ű b Ų c Ŵ"); + + // 3-byte UTF-8 + errors += rrdlabels_unittest_sanitize_value("‱", "‱"); + errors += rrdlabels_unittest_sanitize_value("a‱b", "a‱b"); + errors += rrdlabels_unittest_sanitize_value("a ‱ b", "a ‱ b"); + + // 4-byte UTF-8 + errors += rrdlabels_unittest_sanitize_value("𩸽", "𩸽"); + errors += rrdlabels_unittest_sanitize_value("a𩸽b", "a𩸽b"); + errors += rrdlabels_unittest_sanitize_value("a 𩸽 b", "a 𩸽 b"); + + // mixed multi-byte + errors += rrdlabels_unittest_sanitize_value("Ű‱𩸽‱Ű", "Ű‱𩸽‱Ű"); + + return errors; +} + +int rrdlabels_unittest(void) { + int errors = 0; + + errors += rrdlabels_unittest_sanitization(); + errors += rrdlabels_unittest_add_pairs(); + errors += rrdlabels_unittest_simple_pattern(); + + fprintf(stderr, "%d errors found\n", errors); + return errors; +} diff --git a/database/rrdset.c b/database/rrdset.c new file mode 100644 index 0000000..6eb3c71 --- /dev/null +++ b/database/rrdset.c @@ -0,0 +1,2173 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" +#include +#include "storage_engine.h" + +// ---------------------------------------------------------------------------- +// RRDSET name index + +static void rrdset_name_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *rrdhost __maybe_unused) { + RRDSET *st = rrdset; + rrdset_flag_set(st, RRDSET_FLAG_INDEXED_NAME); +} +static void rrdset_name_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *rrdhost __maybe_unused) { + RRDSET *st = rrdset; + rrdset_flag_clear(st, RRDSET_FLAG_INDEXED_NAME); +} + +static inline void rrdset_index_add_name(RRDHOST *host, RRDSET *st) { + if(!st->name) return; + dictionary_set(host->rrdset_root_index_name, rrdset_name(st), st, sizeof(RRDSET)); +} + +static inline void rrdset_index_del_name(RRDHOST *host, RRDSET *st) { + if(rrdset_flag_check(st, RRDSET_FLAG_INDEXED_NAME)) + dictionary_del(host->rrdset_root_index_name, rrdset_name(st)); +} + +static inline RRDSET *rrdset_index_find_name(RRDHOST *host, const char *name) { + return dictionary_get(host->rrdset_root_index_name, name); +} + +// ---------------------------------------------------------------------------- +// RRDSET index + +static inline void rrdset_update_permanent_labels(RRDSET *st) { + if(!st->rrdlabels) return; + + rrdlabels_add(st->rrdlabels, "_collect_plugin", rrdset_plugin_name(st), RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); + rrdlabels_add(st->rrdlabels, "_collect_module", rrdset_module_name(st), RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); +} + +static STRING *rrdset_fix_name(RRDHOST *host, const char *chart_full_id, const char *type, const char *current_name, const char *name) { + if(!name || !*name) return NULL; + + char full_name[RRD_ID_LENGTH_MAX + 1]; + char sanitized_name[CONFIG_MAX_VALUE + 1]; + char new_name[CONFIG_MAX_VALUE + 1]; + + snprintfz(full_name, RRD_ID_LENGTH_MAX, "%s.%s", type, name); + rrdset_strncpyz_name(sanitized_name, full_name, CONFIG_MAX_VALUE); + strncpyz(new_name, sanitized_name, CONFIG_MAX_VALUE); + + if(rrdset_index_find_name(host, new_name)) { + debug(D_RRD_CALLS, "RRDSET: chart name '%s' on host '%s' already exists.", new_name, rrdhost_hostname(host)); + if(!strcmp(chart_full_id, full_name) && (!current_name || !*current_name)) { + unsigned i = 1; + + do { + snprintfz(new_name, CONFIG_MAX_VALUE, "%s_%u", sanitized_name, i); + i++; + } while (rrdset_index_find_name(host, new_name)); + + info("RRDSET: using name '%s' for chart '%s' on host '%s'.", new_name, full_name, rrdhost_hostname(host)); + } + else + return NULL; + } + + return string_strdupz(new_name); +} + +struct rrdset_constructor { + RRDHOST *host; + const char *type; + const char *id; + const char *name; + const char *family; + const char *context; + const char *title; + const char *units; + const char *plugin; + const char *module; + long priority; + int update_every; + RRDSET_TYPE chart_type; + RRD_MEMORY_MODE memory_mode; + long history_entries; + + enum { + RRDSET_REACT_NONE = 0, + RRDSET_REACT_NEW = (1 << 0), + RRDSET_REACT_UPDATED = (1 << 1), + RRDSET_REACT_PLUGIN_UPDATED = (1 << 2), + RRDSET_REACT_MODULE_UPDATED = (1 << 3), + RRDSET_REACT_CHART_ACTIVATED = (1 << 4), + } react_action; +}; + +// the constructor - the dictionary is write locked while this runs +static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *constructor_data) { + struct rrdset_constructor *ctr = constructor_data; + RRDHOST *host = ctr->host; + RRDSET *st = rrdset; + + const char *chart_full_id = dictionary_acquired_item_name(item); + + st->id = string_strdupz(chart_full_id); + + st->name = rrdset_fix_name(host, chart_full_id, ctr->type, NULL, ctr->name); + if(!st->name) + st->name = rrdset_fix_name(host, chart_full_id, ctr->type, NULL, ctr->id); + rrdset_index_add_name(host, st); + + st->parts.id = string_strdupz(ctr->id); + st->parts.type = string_strdupz(ctr->type); + st->parts.name = string_strdupz(ctr->name); + + st->family = (ctr->family && *ctr->family) ? rrd_string_strdupz(ctr->family) : rrd_string_strdupz(ctr->type); + st->context = (ctr->context && *ctr->context) ? rrd_string_strdupz(ctr->context) : rrd_string_strdupz(chart_full_id); + + st->units = rrd_string_strdupz(ctr->units); + st->title = rrd_string_strdupz(ctr->title); + st->plugin_name = rrd_string_strdupz(ctr->plugin); + st->module_name = rrd_string_strdupz(ctr->module); + st->priority = ctr->priority; + + st->cache_dir = rrdset_cache_dir(host, chart_full_id); + st->entries = (ctr->memory_mode != RRD_MEMORY_MODE_DBENGINE) ? align_entries_to_pagesize(ctr->memory_mode, ctr->history_entries) : 5; + st->update_every = ctr->update_every; + st->rrd_memory_mode = ctr->memory_mode; + + st->chart_type = ctr->chart_type; + st->gap_when_lost_iterations_above = (int) (gap_when_lost_iterations_above + 2); + st->rrdhost = host; + + st->flags = RRDSET_FLAG_SYNC_CLOCK + | RRDSET_FLAG_INDEXED_ID + | RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED + | RRDSET_FLAG_SENDER_REPLICATION_FINISHED + ; + + netdata_rwlock_init(&st->alerts.rwlock); + + if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { + if(!rrdset_memory_load_or_create_map_save(st, st->rrd_memory_mode)) { + info("Failed to use db mode %s for chart '%s', falling back to ram mode.", (st->rrd_memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", rrdset_name(st)); + st->rrd_memory_mode = RRD_MEMORY_MODE_RAM; + } + } + + // initialize the db tiers + { + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = st->rrdhost->db[tier].eng; + if(!eng) continue; + + st->storage_metrics_groups[tier] = eng->api.collect_ops.metrics_group_get(host->db[tier].instance, &st->chart_uuid); + } + } + + rrddim_index_init(st); + + // chart variables - we need this for data collection to work (collector given chart variables) - not only health + rrdsetvar_index_init(st); + + if (host->health_enabled) { + st->rrdfamily = rrdfamily_add_and_acquire(host, rrdset_family(st)); + st->rrdvars = rrdvariables_create(); + rrddimvar_index_init(st); + } + + st->rrdlabels = rrdlabels_create(); + rrdset_update_permanent_labels(st); + + st->green = NAN; + st->red = NAN; + + ctr->react_action = RRDSET_REACT_NEW; +} + +// the destructor - the dictionary is write locked while this runs +static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *rrdhost) { + RRDHOST *host = rrdhost; + RRDSET *st = rrdset; + + rrdset_flag_clear(st, RRDSET_FLAG_INDEXED_ID); + + // cleanup storage engines + { + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = st->rrdhost->db[tier].eng; + if(!eng) continue; + + eng->api.collect_ops.metrics_group_release(host->db[tier].instance, st->storage_metrics_groups[tier]); + } + } + + // remove it from the name index + rrdset_index_del_name(host, st); + + // release the collector info + dictionary_destroy(st->functions_view); + + rrdcalc_unlink_all_rrdset_alerts(st); + + // ------------------------------------------------------------------------ + // the order of destruction is important here + + // 1. delete RRDDIMVAR index - this will speed up the destruction of RRDDIMs + // because each dimension loops to find its own variables in this index. + // There are no references to the items on this index from the dimensions. + // To find their own, they have to walk-through the dictionary. + rrddimvar_index_destroy(st); // destroy the rrddimvar index + + // 2. delete RRDSETVAR index + rrdsetvar_index_destroy(st); // destroy the rrdsetvar index + + // 3. delete RRDVAR index after the above, to avoid triggering its garbage collector (they have references on this) + rrdvariables_destroy(st->rrdvars); // free all variables and destroy the rrdvar dictionary + + // 4. delete RRDFAMILY - this has to be last, because RRDDIMVAR and RRDSETVAR need the reference counter + rrdfamily_release(host, st->rrdfamily); // release the acquired rrdfamily -- has to be after all variables + + // 5. delete RRDDIMs, now their variables are not existing, so this is fast + rrddim_index_destroy(st); // free all the dimensions and destroy the dimensions index + + // 6. this has to be after the dimensions are freed, but before labels are freed (contexts need the labels) + rrdcontext_removed_rrdset(st); // let contexts know + + // 7. destroy the chart labels + rrdlabels_destroy(st->rrdlabels); // destroy the labels, after letting the contexts know + + rrdset_memory_file_free(st); // remove files of db mode save and map + + // ------------------------------------------------------------------------ + // free it + + netdata_rwlock_destroy(&st->alerts.rwlock); + + string_freez(st->id); + string_freez(st->name); + string_freez(st->parts.id); + string_freez(st->parts.type); + string_freez(st->parts.name); + string_freez(st->family); + string_freez(st->title); + string_freez(st->units); + string_freez(st->context); + string_freez(st->plugin_name); + string_freez(st->module_name); + + freez(st->exporting_flags); + freez(st->cache_dir); +} + +// the item to be inserted, is already in the dictionary +// this callback deals with the situation, migrating the existing object to the new values +// the dictionary is write locked while this runs +static bool rrdset_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *new_rrdset, void *constructor_data) { + (void)new_rrdset; // it is NULL + + struct rrdset_constructor *ctr = constructor_data; + RRDSET *st = rrdset; + + rrdset_isnot_obsolete(st); + + ctr->react_action = RRDSET_REACT_NONE; + + if (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED)) { + rrdset_flag_clear(st, RRDSET_FLAG_ARCHIVED); + ctr->react_action |= RRDSET_REACT_CHART_ACTIVATED; + } + + if (rrdset_reset_name(st, (ctr->name && *ctr->name) ? ctr->name : ctr->id) == 2) + ctr->react_action |= RRDSET_REACT_UPDATED; + + if (unlikely(st->priority != ctr->priority)) { + st->priority = ctr->priority; + ctr->react_action |= RRDSET_REACT_UPDATED; + } + + if (unlikely(st->update_every != ctr->update_every)) { + rrdset_set_update_every(st, ctr->update_every); + ctr->react_action |= RRDSET_REACT_UPDATED; + } + + if(ctr->plugin && *ctr->plugin) { + STRING *old_plugin = st->plugin_name; + st->plugin_name = rrd_string_strdupz(ctr->plugin); + if (old_plugin != st->plugin_name) + ctr->react_action |= RRDSET_REACT_PLUGIN_UPDATED; + string_freez(old_plugin); + } + + if(ctr->module && *ctr->module) { + STRING *old_module = st->module_name; + st->module_name = rrd_string_strdupz(ctr->module); + if (old_module != st->module_name) + ctr->react_action |= RRDSET_REACT_MODULE_UPDATED; + string_freez(old_module); + } + + if(ctr->title && *ctr->title) { + STRING *old_title = st->title; + st->title = rrd_string_strdupz(ctr->title); + if(old_title != st->title) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_title); + } + + if(ctr->units && *ctr->units) { + STRING *old_units = st->units; + st->units = rrd_string_strdupz(ctr->units); + if(old_units != st->units) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_units); + } + + if(ctr->family && *ctr->family) { + STRING *old_family = st->family; + st->family = rrd_string_strdupz(ctr->family); + if(old_family != st->family) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_family); + + // TODO - we should rename RRDFAMILY variables + } + + if(ctr->context && *ctr->context) { + STRING *old_context = st->context; + st->context = rrd_string_strdupz(ctr->context); + if(old_context != st->context) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_context); + } + + if(st->chart_type != ctr->chart_type) { + st->chart_type = ctr->chart_type; + ctr->react_action |= RRDSET_REACT_UPDATED; + } + + rrdset_update_permanent_labels(st); + + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + return ctr->react_action != RRDSET_REACT_NONE; +} + +// this is called after all insertions/conflicts, with the dictionary unlocked, with a reference to RRDSET +// so, any actions requiring locks on other objects, should be placed here +static void rrdset_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *constructor_data) { + struct rrdset_constructor *ctr = constructor_data; + RRDSET *st = rrdset; + RRDHOST *host = st->rrdhost; + + st->last_accessed_time = now_realtime_sec(); + + if(host->health_enabled && (ctr->react_action & (RRDSET_REACT_NEW | RRDSET_REACT_CHART_ACTIVATED))) { + rrdset_flag_set(st, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + } + + if(ctr->react_action & (RRDSET_REACT_NEW | RRDSET_REACT_PLUGIN_UPDATED | RRDSET_REACT_MODULE_UPDATED)) { + if (ctr->react_action & RRDSET_REACT_NEW) { + if(unlikely(rrdcontext_find_chart_uuid(st, &st->chart_uuid))) { + uuid_generate(st->chart_uuid); + bool found_in_sql = false; (void)found_in_sql; + +// bool found_in_sql = true; +// if(unlikely(sql_find_chart_uuid(host, st, &st->chart_uuid))) { +// uuid_generate(st->chart_uuid); +// found_in_sql = false; +// } + +#ifdef NETDATA_INTERNAL_CHECKS + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(st->chart_uuid, uuid_str); + error_report("Chart UUID for host %s chart [%s] not found in context. It is now set to %s (%s)", + string2str(host->hostname), + string2str(st->name), uuid_str, found_in_sql ? "found in sqlite" : "newly generated"); +#endif + } + } + rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + rrdcontext_updated_rrdset(st); +} + +void rrdset_index_init(RRDHOST *host) { + if(!host->rrdset_root_index) { + host->rrdset_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdset_root_index, rrdset_insert_callback, NULL); + dictionary_register_conflict_callback(host->rrdset_root_index, rrdset_conflict_callback, NULL); + dictionary_register_react_callback(host->rrdset_root_index, rrdset_react_callback, NULL); + dictionary_register_delete_callback(host->rrdset_root_index, rrdset_delete_callback, host); + } + + if(!host->rrdset_root_index_name) { + host->rrdset_root_index_name = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdset_root_index_name, rrdset_name_insert_callback, host); + dictionary_register_delete_callback(host->rrdset_root_index_name, rrdset_name_delete_callback, host); + } +} + +void rrdset_index_destroy(RRDHOST *host) { + // destroy the name index first + dictionary_destroy(host->rrdset_root_index_name); + host->rrdset_root_index_name = NULL; + + // destroy the id index last + dictionary_destroy(host->rrdset_root_index); + host->rrdset_root_index = NULL; +} + +static inline RRDSET *rrdset_index_add(RRDHOST *host, const char *id, struct rrdset_constructor *st_ctr) { + return dictionary_set_advanced(host->rrdset_root_index, id, -1, NULL, sizeof(RRDSET), st_ctr); +} + +static inline void rrdset_index_del(RRDHOST *host, RRDSET *st) { + if(rrdset_flag_check(st, RRDSET_FLAG_INDEXED_ID)) + dictionary_del(host->rrdset_root_index, rrdset_id(st)); +} + +static RRDSET *rrdset_index_find(RRDHOST *host, const char *id) { + // TODO - the name index should have an acquired dictionary item, not just a pointer to RRDSET + return dictionary_get(host->rrdset_root_index, id); +} + +// ---------------------------------------------------------------------------- +// RRDSET - find charts + +inline RRDSET *rrdset_find(RRDHOST *host, const char *id) { + debug(D_RRD_CALLS, "rrdset_find() for chart '%s' in host '%s'", id, rrdhost_hostname(host)); + RRDSET *st = rrdset_index_find(host, id); + + if(st) + st->last_accessed_time = now_realtime_sec(); + + return(st); +} + +inline RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id) { + debug(D_RRD_CALLS, "rrdset_find_bytype() for chart '%s.%s' in host '%s'", type, id, rrdhost_hostname(host)); + + char buf[RRD_ID_LENGTH_MAX + 1]; + strncpyz(buf, type, RRD_ID_LENGTH_MAX - 1); + strcat(buf, "."); + int len = (int) strlen(buf); + strncpyz(&buf[len], id, (size_t) (RRD_ID_LENGTH_MAX - len)); + + return(rrdset_find(host, buf)); +} + +inline RRDSET *rrdset_find_byname(RRDHOST *host, const char *name) { + debug(D_RRD_CALLS, "rrdset_find_byname() for chart '%s' in host '%s'", name, rrdhost_hostname(host)); + RRDSET *st = rrdset_index_find_name(host, name); + return(st); +} + +// ---------------------------------------------------------------------------- +// RRDSET - rename charts + +char *rrdset_strncpyz_name(char *to, const char *from, size_t length) { + char c, *p = to; + + while (length-- && (c = *from++)) { + if(c != '.' && c != '-' && !isalnum(c)) + c = '_'; + + *p++ = c; + } + + *p = '\0'; + + return to; +} + +int rrdset_reset_name(RRDSET *st, const char *name) { + if(unlikely(!strcmp(rrdset_name(st), name))) + return 1; + + RRDHOST *host = st->rrdhost; + + debug(D_RRD_CALLS, "rrdset_reset_name() old: '%s', new: '%s'", rrdset_name(st), name); + + STRING *name_string = rrdset_fix_name(host, rrdset_id(st), rrdset_parts_type(st), string2str(st->name), name); + if(!name_string) return 0; + + if(st->name) { + rrdset_index_del_name(host, st); + string_freez(st->name); + st->name = name_string; + rrdsetvar_rename_all(st); + } + else + st->name = name_string; + + RRDDIM *rd; + rrddim_foreach_read(rd, st) + rrddimvar_rename_all(rd); + rrddim_foreach_done(rd); + + rrdset_index_add_name(host, st); + + rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_SEND); + rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_IGNORE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + rrdcontext_updated_rrdset_name(st); + return 2; +} + +// get the timestamp of the last entry in the round-robin database +time_t rrdset_last_entry_t(RRDSET *st) { + RRDDIM *rd; + time_t last_entry_t = 0; + + rrddim_foreach_read(rd, st) { + time_t t = rrddim_last_entry_t(rd); + if(t > last_entry_t) last_entry_t = t; + } + rrddim_foreach_done(rd); + + return last_entry_t; +} + +// get the timestamp of first entry in the round-robin database +time_t rrdset_first_entry_t(RRDSET *st) { + RRDDIM *rd; + time_t first_entry_t = LONG_MAX; + + rrddim_foreach_read(rd, st) { + time_t t = rrddim_first_entry_t(rd); + if(t < first_entry_t) + first_entry_t = t; + } + rrddim_foreach_done(rd); + + if (unlikely(LONG_MAX == first_entry_t)) return 0; + return first_entry_t; +} + +time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier) { + if(unlikely(tier > storage_tiers)) + return 0; + + RRDDIM *rd; + time_t first_entry_t = LONG_MAX; + + rrddim_foreach_read(rd, st) { + time_t t = rrddim_first_entry_t_of_tier(rd, tier); + if(t && t < first_entry_t) + first_entry_t = t; + } + rrddim_foreach_done(rd); + + if (unlikely(LONG_MAX == first_entry_t)) return 0; + return first_entry_t; +} + +inline void rrdset_is_obsolete(RRDSET *st) { + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED))) { + info("Cannot obsolete already archived chart %s", rrdset_name(st)); + return; + } + + if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); + + st->last_accessed_time = now_realtime_sec(); + + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + // the chart will not get more updates (data collection) + // so, we have to push its definition now + rrdset_push_chart_definition_now(st); + rrdcontext_updated_rrdset_flags(st); + } +} + +inline void rrdset_isnot_obsolete(RRDSET *st) { + if(unlikely((rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + st->last_accessed_time = now_realtime_sec(); + + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + // the chart will be pushed upstream automatically + // due to data collection + rrdcontext_updated_rrdset_flags(st); + } +} + +inline void rrdset_update_heterogeneous_flag(RRDSET *st) { + RRDHOST *host = st->rrdhost; + (void)host; + + RRDDIM *rd; + + rrdset_flag_clear(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + + bool init = false, is_heterogeneous = false; + RRD_ALGORITHM algorithm; + collected_number multiplier; + collected_number divisor; + + rrddim_foreach_read(rd, st) { + if(!init) { + algorithm = rd->algorithm; + multiplier = rd->multiplier; + divisor = ABS(rd->divisor); + init = true; + continue; + } + + if(algorithm != rd->algorithm || multiplier != ABS(rd->multiplier) || divisor != ABS(rd->divisor)) { + if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { + #ifdef NETDATA_INTERNAL_CHECKS + info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", + rrddim_name(rd), + rrdset_name(st), + rrdhost_hostname(host), + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm), + rd->multiplier, multiplier, + rd->divisor, divisor + ); + #endif + rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); + } + + is_heterogeneous = true; + break; + } + } + rrddim_foreach_done(rd); + + if(!is_heterogeneous) { + rrdset_flag_clear(st, RRDSET_FLAG_HETEROGENEOUS); + rrdcontext_updated_rrdset_flags(st); + } +} + +// ---------------------------------------------------------------------------- +// RRDSET - reset a chart + +void rrdset_reset(RRDSET *st) { + debug(D_RRD_CALLS, "rrdset_reset() %s", rrdset_name(st)); + + st->last_collected_time.tv_sec = 0; + st->last_collected_time.tv_usec = 0; + st->last_updated.tv_sec = 0; + st->last_updated.tv_usec = 0; + st->current_entry = 0; + st->counter = 0; + st->counter_done = 0; + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + rd->last_collected_time.tv_sec = 0; + rd->last_collected_time.tv_usec = 0; + rd->collections_counter = 0; + + if(!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(rd->tiers[tier]) + rd->tiers[tier]->collect_ops->flush(rd->tiers[tier]->db_collection_handle); + } + } + } + rrddim_foreach_done(rd); +} + +// ---------------------------------------------------------------------------- +// RRDSET - helpers for rrdset_create() + +inline long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries) { + if(mode == RRD_MEMORY_MODE_DBENGINE) return 0; + if(mode == RRD_MEMORY_MODE_NONE) return 5; + + if(entries < 5) entries = 5; + if(entries > RRD_HISTORY_ENTRIES_MAX) entries = RRD_HISTORY_ENTRIES_MAX; + + if(mode == RRD_MEMORY_MODE_MAP || mode == RRD_MEMORY_MODE_SAVE || mode == RRD_MEMORY_MODE_RAM) { + long header_size = 0; + + if(mode == RRD_MEMORY_MODE_MAP || mode == RRD_MEMORY_MODE_SAVE) + header_size = (long)rrddim_memory_file_header_size(); + + long page = (long)sysconf(_SC_PAGESIZE); + long size = (long)(header_size + entries * sizeof(storage_number)); + if (unlikely(size % page)) { + size -= (size % page); + size += page; + + long n = (long)((size - header_size) / sizeof(storage_number)); + return n; + } + } + + return entries; +} + +static inline void last_collected_time_align(RRDSET *st) { + st->last_collected_time.tv_sec -= st->last_collected_time.tv_sec % st->update_every; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST))) + st->last_collected_time.tv_usec = 0; + else + st->last_collected_time.tv_usec = 500000; +} + +static inline void last_updated_time_align(RRDSET *st) { + st->last_updated.tv_sec -= st->last_updated.tv_sec % st->update_every; + st->last_updated.tv_usec = 0; +} + +// ---------------------------------------------------------------------------- +// RRDSET - free a chart + +void rrdset_free(RRDSET *st) { + if(unlikely(!st)) return; + rrdset_index_del(st->rrdhost, st); +} + +void rrdset_save(RRDSET *st) { + rrdset_memory_file_save(st); + + RRDDIM *rd; + rrddim_foreach_read(rd, st) + rrddim_memory_file_save(rd); + rrddim_foreach_done(rd); +} + +void rrdset_delete_files(RRDSET *st) { + RRDDIM *rd; + + info("Deleting chart '%s' ('%s') from disk...", rrdset_id(st), rrdset_name(st)); + + if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { + const char *cache_filename = rrdset_cache_filename(st); + if(cache_filename) { + info("Deleting chart header file '%s'.", cache_filename); + if (unlikely(unlink(cache_filename) == -1)) + error("Cannot delete chart header file '%s'", cache_filename); + } + else + error("Cannot find the cache filename of chart '%s'", rrdset_id(st)); + } + + rrddim_foreach_read(rd, st) { + const char *cache_filename = rrddim_cache_filename(rd); + if(!cache_filename) continue; + + info("Deleting dimension file '%s'.", cache_filename); + if(unlikely(unlink(cache_filename) == -1)) + error("Cannot delete dimension file '%s'", cache_filename); + } + rrddim_foreach_done(rd); + + recursively_delete_dir(st->cache_dir, "left-over chart"); +} + +void rrdset_delete_obsolete_dimensions(RRDSET *st) { + RRDDIM *rd; + + info("Deleting dimensions of chart '%s' ('%s') from disk...", rrdset_id(st), rrdset_name(st)); + + rrddim_foreach_read(rd, st) { + if(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { + const char *cache_filename = rrddim_cache_filename(rd); + if(!cache_filename) continue; + info("Deleting dimension file '%s'.", cache_filename); + if(unlikely(unlink(cache_filename) == -1)) + error("Cannot delete dimension file '%s'", cache_filename); + } + } + rrddim_foreach_done(rd); +} + +// ---------------------------------------------------------------------------- +// RRDSET - create a chart + +RRDSET *rrdset_create_custom( + RRDHOST *host + , const char *type + , const char *id + , const char *name + , const char *family + , const char *context + , const char *title + , const char *units + , const char *plugin + , const char *module + , long priority + , int update_every + , RRDSET_TYPE chart_type + , RRD_MEMORY_MODE memory_mode + , long history_entries +) { + if (host != localhost) + host->senders_last_chart_command = now_realtime_sec(); + + if(!type || !type[0]) + fatal("Cannot create rrd stats without a type: id '%s', name '%s', family '%s', context '%s', title '%s', units '%s', plugin '%s', module '%s'." + , (id && *id)?id:"" + , (name && *name)?name:"" + , (family && *family)?family:"" + , (context && *context)?context:"" + , (title && *title)?title:"" + , (units && *units)?units:"" + , (plugin && *plugin)?plugin:"" + , (module && *module)?module:"" + ); + + if(!id || !id[0]) + fatal("Cannot create rrd stats without an id: type '%s', name '%s', family '%s', context '%s', title '%s', units '%s', plugin '%s', module '%s'." + , type + , (name && *name)?name:"" + , (family && *family)?family:"" + , (context && *context)?context:"" + , (title && *title)?title:"" + , (units && *units)?units:"" + , (plugin && *plugin)?plugin:"" + , (module && *module)?module:"" + ); + + // ------------------------------------------------------------------------ + // check if it already exists + + char full_id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(full_id, RRD_ID_LENGTH_MAX, "%s.%s", type, id); + + // ------------------------------------------------------------------------ + // allocate it + + debug(D_RRD_CALLS, "Creating RRD_STATS for '%s.%s'.", type, id); + + struct rrdset_constructor tmp = { + .host = host, + .type = type, + .id = id, + .name = name, + .family = family, + .context = context, + .title = title, + .units = units, + .plugin = plugin, + .module = module, + .priority = priority, + .update_every = update_every, + .chart_type = chart_type, + .memory_mode = memory_mode, + .history_entries = history_entries, + }; + + RRDSET *st = rrdset_index_add(host, full_id, &tmp); + return(st); +} + +// ---------------------------------------------------------------------------- +// RRDSET - data collection iteration control + +void rrdset_timed_next(RRDSET *st, struct timeval now, usec_t duration_since_last_update) { + #ifdef NETDATA_INTERNAL_CHECKS + char *discard_reason = NULL; + usec_t discarded = duration_since_last_update; + #endif + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK))) { + // the chart needs to be re-synced to current time + rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK); + + // discard the duration supplied + duration_since_last_update = 0; + + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "SYNC CLOCK FLAG"; + #endif + } + + if(unlikely(!st->last_collected_time.tv_sec)) { + // the first entry + duration_since_last_update = st->update_every * USEC_PER_SEC; + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "FIRST DATA COLLECTION"; + #endif + } + else if(unlikely(!duration_since_last_update)) { + // no dt given by the plugin + duration_since_last_update = dt_usec(&now, &st->last_collected_time); + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "NO USEC GIVEN BY COLLECTOR"; + #endif + } + else { + // microseconds has the time since the last collection + susec_t since_last_usec = dt_usec_signed(&now, &st->last_collected_time); + + if(unlikely(since_last_usec < 0)) { + // oops! the database is in the future + #ifdef NETDATA_INTERNAL_CHECKS + info("RRD database for chart '%s' on host '%s' is %0.5" NETDATA_DOUBLE_MODIFIER + " secs in the future (counter #%zu, update #%zu). Adjusting it to current time." + , rrdset_id(st) + , rrdhost_hostname(st->rrdhost) + , (NETDATA_DOUBLE)-since_last_usec / USEC_PER_SEC + , st->counter + , st->counter_done + ); + #endif + + st->last_collected_time.tv_sec = now.tv_sec - st->update_every; + st->last_collected_time.tv_usec = now.tv_usec; + last_collected_time_align(st); + + st->last_updated.tv_sec = now.tv_sec - st->update_every; + st->last_updated.tv_usec = now.tv_usec; + last_updated_time_align(st); + + duration_since_last_update = st->update_every * USEC_PER_SEC; + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "COLLECTION TIME IN FUTURE"; + #endif + } + else if(unlikely((usec_t)since_last_usec > (usec_t)(st->update_every * 5 * USEC_PER_SEC))) { + // oops! the database is too far behind + #ifdef NETDATA_INTERNAL_CHECKS + info("RRD database for chart '%s' on host '%s' is %0.5" NETDATA_DOUBLE_MODIFIER + " secs in the past (counter #%zu, update #%zu). Adjusting it to current time.", rrdset_id(st), rrdhost_hostname(st->rrdhost), (NETDATA_DOUBLE)since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); + #endif + + duration_since_last_update = (usec_t)since_last_usec; + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "COLLECTION TIME TOO FAR IN THE PAST"; + #endif + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(since_last_usec > 0 && (susec_t) duration_since_last_update < since_last_usec) { + static __thread susec_t min_delta = USEC_PER_SEC * 3600, permanent_min_delta = 0; + static __thread time_t last_t = 0; + + // the first time initialize it so that it will make the check later + if(last_t == 0) last_t = now.tv_sec + 60; + + susec_t delta = since_last_usec - (susec_t) duration_since_last_update; + if(delta < min_delta) min_delta = delta; + + if(now.tv_sec >= last_t + 60) { + last_t = now.tv_sec; + + if(min_delta > permanent_min_delta) { + info("MINIMUM MICROSECONDS DELTA of thread %d increased from %lld to %lld (+%lld)", gettid(), permanent_min_delta, min_delta, min_delta - permanent_min_delta); + permanent_min_delta = min_delta; + } + + min_delta = USEC_PER_SEC * 3600; + } + } +#endif + } + + debug(D_RRD_CALLS, "rrdset_timed_next() for chart %s with duration since last update %llu usec", rrdset_name(st), duration_since_last_update); + rrdset_debug(st, "NEXT: %llu microseconds", duration_since_last_update); + + internal_error(discarded && discarded != duration_since_last_update, + "host '%s', chart '%s': discarded data collection time of %llu usec, " + "replaced with %llu usec, reason: '%s'" + , rrdhost_hostname(st->rrdhost) + , rrdset_id(st) + , discarded + , duration_since_last_update + , discard_reason?discard_reason:"UNDEFINED" + ); + + st->usec_since_last_update = duration_since_last_update; +} + +inline void rrdset_next_usec_unfiltered(RRDSET *st, usec_t duration_since_last_update) { + if(unlikely(!st->last_collected_time.tv_sec || !duration_since_last_update || (rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK)))) { + // call the full next_usec() function + rrdset_next_usec(st, duration_since_last_update); + return; + } + + st->usec_since_last_update = duration_since_last_update; +} + +inline void rrdset_next_usec(RRDSET *st, usec_t duration_since_last_update) { + struct timeval now; + + now_realtime_timeval(&now); + rrdset_timed_next(st, now, duration_since_last_update); +} + +// ---------------------------------------------------------------------------- +// RRDSET - process the collected values for all dimensions of a chart + +static inline usec_t rrdset_init_last_collected_time(RRDSET *st, struct timeval now) { + st->last_collected_time = now; + last_collected_time_align(st); + + usec_t last_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + + rrdset_debug(st, "initialized last collected time to %0.3" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)last_collect_ut / USEC_PER_SEC); + + return last_collect_ut; +} + +static inline usec_t rrdset_update_last_collected_time(RRDSET *st) { + usec_t last_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + usec_t ut = last_collect_ut + st->usec_since_last_update; + st->last_collected_time.tv_sec = (time_t) (ut / USEC_PER_SEC); + st->last_collected_time.tv_usec = (suseconds_t) (ut % USEC_PER_SEC); + + rrdset_debug(st, "updated last collected time to %0.3" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)last_collect_ut / USEC_PER_SEC); + + return last_collect_ut; +} + +static inline usec_t rrdset_init_last_updated_time(RRDSET *st) { + // copy the last collected time to last updated time + st->last_updated.tv_sec = st->last_collected_time.tv_sec; + st->last_updated.tv_usec = st->last_collected_time.tv_usec; + + if(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)) + st->last_updated.tv_sec -= st->update_every; + + last_updated_time_align(st); + + usec_t last_updated_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + + rrdset_debug(st, "initialized last updated time to %0.3" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)last_updated_ut / USEC_PER_SEC); + + return last_updated_ut; +} + +static __thread size_t rrdset_done_statistics_points_stored_per_tier[RRD_STORAGE_TIERS]; + +static inline time_t tier_next_point_time(RRDDIM *rd, struct rrddim_tier *t, time_t now) { + time_t loop = (time_t)rd->update_every * (time_t)t->tier_grouping; + return now + loop - ((now + loop) % loop); +} + +void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut __maybe_unused) { + if (unlikely(!t->next_point_time)) + t->next_point_time = tier_next_point_time(rd, t, sp.end_time); + + if(unlikely(sp.start_time > t->next_point_time)) { + if (likely(!storage_point_is_unset(t->virtual_point))) { + + t->collect_ops->store_metric( + t->db_collection_handle, + t->next_point_time * USEC_PER_SEC, + t->virtual_point.sum, + t->virtual_point.min, + t->virtual_point.max, + t->virtual_point.count, + t->virtual_point.anomaly_count, + t->virtual_point.flags); + } + else { + t->collect_ops->store_metric( + t->db_collection_handle, + t->next_point_time * USEC_PER_SEC, + NAN, + NAN, + NAN, + 0, + 0, SN_FLAG_NONE); + } + + rrdset_done_statistics_points_stored_per_tier[tier]++; + t->virtual_point.count = 0; // make the point unset + t->next_point_time = tier_next_point_time(rd, t, sp.end_time); + } + + // merge the dates into our virtual point + if (unlikely(sp.start_time < t->virtual_point.start_time)) + t->virtual_point.start_time = sp.start_time; + + if (likely(sp.end_time > t->virtual_point.end_time)) + t->virtual_point.end_time = sp.end_time; + + // merge the values into our virtual point + if (likely(!storage_point_is_empty(sp))) { + // we aggregate only non NULLs into higher tiers + + if (likely(!storage_point_is_unset(t->virtual_point))) { + // merge the collected point to our virtual one + t->virtual_point.sum += sp.sum; + t->virtual_point.min = MIN(t->virtual_point.min, sp.min); + t->virtual_point.max = MAX(t->virtual_point.max, sp.max); + t->virtual_point.count += sp.count; + t->virtual_point.anomaly_count += sp.anomaly_count; + t->virtual_point.flags |= sp.flags; + } + else { + // reset our virtual point to this one + t->virtual_point = sp; + } + } +} +#ifdef NETDATA_LOG_COLLECTION_ERRORS +void rrddim_store_metric_with_trace(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags, const char *function) { +#else // !NETDATA_LOG_COLLECTION_ERRORS +void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) { +#endif // !NETDATA_LOG_COLLECTION_ERRORS +#ifdef NETDATA_LOG_COLLECTION_ERRORS + rd->rrddim_store_metric_count++; + + if(likely(rd->rrddim_store_metric_count > 1)) { + usec_t expected = rd->rrddim_store_metric_last_ut + rd->update_every * USEC_PER_SEC; + + if(point_end_time_ut != rd->rrddim_store_metric_last_ut) { + internal_error(true, + "%s COLLECTION: 'host:%s/chart:%s/dim:%s' granularity %d, collection %zu, expected to store at tier 0 a value at %llu, but it gave %llu [%s%llu usec] (called from %s(), previously by %s())", + (point_end_time_ut < rd->rrddim_store_metric_last_ut) ? "**PAST**" : "GAP", + rrdhost_hostname(rd->rrdset->rrdhost), rrdset_id(rd->rrdset), rrddim_id(rd), + rd->update_every, + rd->rrddim_store_metric_count, + expected, point_end_time_ut, + (point_end_time_ut < rd->rrddim_store_metric_last_ut)?"by -" : "gap ", + expected - point_end_time_ut, + function, + rd->rrddim_store_metric_last_caller?rd->rrddim_store_metric_last_caller:"none"); + } + } + + rd->rrddim_store_metric_last_ut = point_end_time_ut; + rd->rrddim_store_metric_last_caller = function; +#endif // NETDATA_LOG_COLLECTION_ERRORS + + // store the metric on tier 0 + rd->tiers[0]->collect_ops->store_metric(rd->tiers[0]->db_collection_handle, point_end_time_ut, n, 0, 0, 1, 0, flags); + rrdset_done_statistics_points_stored_per_tier[0]++; + + time_t now = (time_t)(point_end_time_ut / USEC_PER_SEC); + + STORAGE_POINT sp = { + .start_time = now - rd->update_every, + .end_time = now, + .min = n, + .max = n, + .sum = n, + .count = 1, + .anomaly_count = (flags & SN_FLAG_NOT_ANOMALOUS) ? 0 : 1, + .flags = flags + }; + + for(size_t tier = 1; tier < storage_tiers ;tier++) { + if(unlikely(!rd->tiers[tier])) continue; + + struct rrddim_tier *t = rd->tiers[tier]; + + if(!rrddim_option_check(rd, RRDDIM_OPTION_BACKFILLED_HIGH_TIERS)) { + // we have not collected this tier before + // let's fill any gap that may exist + rrdr_fill_tier_gap_from_smaller_tiers(rd, tier, now); + rrddim_option_set(rd, RRDDIM_OPTION_BACKFILLED_HIGH_TIERS); + } + + store_metric_at_tier(rd, tier, t, sp, point_end_time_ut); + } +} + +void store_metric_collection_completed() { + global_statistics_rrdset_done_chart_collection_completed(rrdset_done_statistics_points_stored_per_tier); +} + +// caching of dimensions rrdset_done() and rrdset_done_interpolate() loop through +struct rda_item { + const DICTIONARY_ITEM *item; + RRDDIM *rd; +}; + +static __thread struct rda_item *thread_rda = NULL; +static __thread size_t thread_rda_entries = 0; + +struct rda_item *rrdset_thread_rda(size_t *dimensions) { + + if(unlikely(!thread_rda || (*dimensions) > thread_rda_entries)) { + freez(thread_rda); + thread_rda = mallocz((*dimensions) * sizeof(struct rda_item)); + thread_rda_entries = *dimensions; + } + + *dimensions = thread_rda_entries; + return thread_rda; +} + +void rrdset_thread_rda_free(void) { + freez(thread_rda); + thread_rda = NULL; + thread_rda_entries = 0; +} + +static inline size_t rrdset_done_interpolate( + RRDSET *st + , struct rda_item *rda_base + , size_t rda_slots + , usec_t update_every_ut + , usec_t last_stored_ut + , usec_t next_store_ut + , usec_t last_collect_ut + , usec_t now_collect_ut + , char store_this_entry + , uint32_t has_reset_value +) { + RRDDIM *rd; + + size_t stored_entries = 0; // the number of entries we have stored in the db, during this call to rrdset_done() + + usec_t first_ut = last_stored_ut, last_ut = 0; + (void)first_ut; + + ssize_t iterations = (ssize_t)((now_collect_ut - last_stored_ut) / (update_every_ut)); + if((now_collect_ut % (update_every_ut)) == 0) iterations++; + + size_t counter = st->counter; + long current_entry = st->current_entry; + + SN_FLAGS storage_flags = SN_DEFAULT_FLAGS; + + if (has_reset_value) + storage_flags |= SN_FLAG_RESET; + + for( ; next_store_ut <= now_collect_ut ; last_collect_ut = next_store_ut, next_store_ut += update_every_ut, iterations-- ) { + + internal_error(iterations < 0, + "RRDSET: '%s': iterations calculation wrapped! " + "first_ut = %llu, last_stored_ut = %llu, next_store_ut = %llu, now_collect_ut = %llu" + , rrdset_id(st) + , first_ut + , last_stored_ut + , next_store_ut + , now_collect_ut + ); + + rrdset_debug(st, "last_stored_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last updated time)", (NETDATA_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (next interpolation point)", (NETDATA_DOUBLE)next_store_ut/USEC_PER_SEC); + + last_ut = next_store_ut; + + struct rda_item *rda; + size_t dim_id; + for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; + + NETDATA_DOUBLE new_value; + + switch(rd->algorithm) { + case RRD_ALGORITHM_INCREMENTAL: + new_value = (NETDATA_DOUBLE) + ( rd->calculated_value + * (NETDATA_DOUBLE)(next_store_ut - last_collect_ut) + / (NETDATA_DOUBLE)(now_collect_ut - last_collect_ut) + ); + + rrdset_debug(st, "%s: CALC2 INC " NETDATA_DOUBLE_FORMAT " = " + NETDATA_DOUBLE_FORMAT + " * (%llu - %llu)" + " / (%llu - %llu)" + , rrddim_name(rd) + , new_value + , rd->calculated_value + , next_store_ut, last_collect_ut + , now_collect_ut, last_collect_ut + ); + + rd->calculated_value -= new_value; + new_value += rd->last_calculated_value; + rd->last_calculated_value = 0; + new_value /= (NETDATA_DOUBLE)st->update_every; + + if(unlikely(next_store_ut - last_stored_ut < update_every_ut)) { + + rrdset_debug(st, "%s: COLLECTION POINT IS SHORT " NETDATA_DOUBLE_FORMAT " - EXTRAPOLATING", + rrddim_name(rd) + , (NETDATA_DOUBLE)(next_store_ut - last_stored_ut) + ); + + new_value = new_value * (NETDATA_DOUBLE)(st->update_every * USEC_PER_SEC) / (NETDATA_DOUBLE)(next_store_ut - last_stored_ut); + } + break; + + case RRD_ALGORITHM_ABSOLUTE: + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + default: + if(iterations == 1) { + // this is the last iteration + // do not interpolate + // just show the calculated value + + new_value = rd->calculated_value; + } + else { + // we have missed an update + // interpolate in the middle values + + new_value = (NETDATA_DOUBLE) + ( ( (rd->calculated_value - rd->last_calculated_value) + * (NETDATA_DOUBLE)(next_store_ut - last_collect_ut) + / (NETDATA_DOUBLE)(now_collect_ut - last_collect_ut) + ) + + rd->last_calculated_value + ); + + rrdset_debug(st, "%s: CALC2 DEF " NETDATA_DOUBLE_FORMAT " = (((" + "(" NETDATA_DOUBLE_FORMAT " - " NETDATA_DOUBLE_FORMAT ")" + " * %llu" + " / %llu) + " NETDATA_DOUBLE_FORMAT, rrddim_name(rd) + , new_value + , rd->calculated_value, rd->last_calculated_value + , (next_store_ut - first_ut) + , (now_collect_ut - first_ut), rd->last_calculated_value + ); + } + break; + } + + if(unlikely(!store_this_entry)) { + (void) ml_is_anomalous(rd, 0, false); + rrddim_store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); + rrdcontext_collected_rrddim(rd); + continue; + } + + if(likely(rd->updated && rd->collections_counter > 1 && iterations < st->gap_when_lost_iterations_above)) { + uint32_t dim_storage_flags = storage_flags; + + if (ml_is_anomalous(rd, new_value, true)) { + // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous + dim_storage_flags &= ~((storage_number)SN_FLAG_NOT_ANOMALOUS); + } + + rrddim_store_metric(rd, next_store_ut, new_value, dim_storage_flags); + rrdcontext_collected_rrddim(rd); + rd->last_stored_value = new_value; + } + else { + (void) ml_is_anomalous(rd, 0, false); + + rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rrddim_name(rd), current_entry); + + rrddim_store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); + rrdcontext_collected_rrddim(rd); + rd->last_stored_value = NAN; + } + + stored_entries++; + } + + // reset the storage flags for the next point, if any; + storage_flags = SN_DEFAULT_FLAGS; + + st->counter = ++counter; + st->current_entry = current_entry = ((current_entry + 1) >= st->entries) ? 0 : current_entry + 1; + + st->last_updated.tv_sec = (time_t) (last_ut / USEC_PER_SEC); + st->last_updated.tv_usec = 0; + + last_stored_ut = next_store_ut; + } + +/* + st->counter = counter; + st->current_entry = current_entry; + + if(likely(last_ut)) { + st->last_updated.tv_sec = (time_t) (last_ut / USEC_PER_SEC); + st->last_updated.tv_usec = 0; + } +*/ + + return stored_entries; +} + +static inline void rrdset_done_fill_the_gap(RRDSET *st) { + usec_t update_every_ut = st->update_every * USEC_PER_SEC; + usec_t now_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + + long c = 0, entries = st->entries; + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + usec_t next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + long current_entry = st->current_entry; + + for(c = 0; c < entries && next_store_ut <= now_collect_ut ; next_store_ut += update_every_ut, c++) { + rd->db[current_entry] = pack_storage_number(NAN, SN_FLAG_NONE); + current_entry = ((current_entry + 1) >= entries) ? 0 : current_entry + 1; + + rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING (FILLED THE GAP)", rrddim_name(rd), current_entry); + } + } + rrddim_foreach_done(rd); + + if(c > 0) { + c--; + st->last_updated.tv_sec += c * st->update_every; + + st->current_entry += c; + st->counter += c; + if(st->current_entry >= st->entries) + st->current_entry -= st->entries; + } +} + +void rrdset_done(RRDSET *st) { + struct timeval now; + + now_realtime_timeval(&now); + rrdset_timed_done(st, now, /* pending_rrdset_next = */ st->counter_done != 0); +} + +void rrdset_timed_done(RRDSET *st, struct timeval now, bool pending_rrdset_next) { + if(unlikely(netdata_exit)) return; + + if (pending_rrdset_next) + rrdset_next(st); + + debug(D_RRD_CALLS, "rrdset_done() for chart '%s'", rrdset_name(st)); + + RRDDIM *rd; + + char + store_this_entry = 1, // boolean: 1 = store this entry, 0 = don't store this entry + first_entry = 0; // boolean: 1 = this is the first entry seen for this chart, 0 = all other entries + + usec_t + last_collect_ut = 0, // the timestamp in microseconds, of the last collected value + now_collect_ut = 0, // the timestamp in microseconds, of this collected value (this is NOW) + last_stored_ut = 0, // the timestamp in microseconds, of the last stored entry in the db + next_store_ut = 0, // the timestamp in microseconds, of the next entry to store in the db + update_every_ut = st->update_every * USEC_PER_SEC; // st->update_every in microseconds + + netdata_thread_disable_cancelability(); + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) { + error("Chart '%s' has the OBSOLETE flag set, but it is collected.", rrdset_id(st)); + rrdset_isnot_obsolete(st); + } + + // check if the chart has a long time to be updated + if(unlikely(st->usec_since_last_update > st->entries * update_every_ut && + st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && st->rrd_memory_mode != RRD_MEMORY_MODE_NONE)) { + info("host '%s', chart '%s': took too long to be updated (counter #%zu, update #%zu, %0.3" NETDATA_DOUBLE_MODIFIER + " secs). Resetting it.", rrdhost_hostname(st->rrdhost), rrdset_id(st), st->counter, st->counter_done, (NETDATA_DOUBLE)st->usec_since_last_update / USEC_PER_SEC); + rrdset_reset(st); + st->usec_since_last_update = update_every_ut; + store_this_entry = 0; + first_entry = 1; + } + + rrdset_debug(st, "microseconds since last update: %llu", st->usec_since_last_update); + + // set last_collected_time + if(unlikely(!st->last_collected_time.tv_sec)) { + // it is the first entry + // set the last_collected_time to now + last_collect_ut = rrdset_init_last_collected_time(st, now) - update_every_ut; + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } + else { + // it is not the first entry + // calculate the proper last_collected_time, using usec_since_last_update + last_collect_ut = rrdset_update_last_collected_time(st); + } + if (unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_NONE)) { + goto after_first_database_work; + } + + // if this set has not been updated in the past + // we fake the last_update time to be = now - usec_since_last_update + if(unlikely(!st->last_updated.tv_sec)) { + // it has never been updated before + // set a fake last_updated, in the past using usec_since_last_update + rrdset_init_last_updated_time(st); + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } + + // check if we will re-write the entire data set + if(unlikely(dt_usec(&st->last_collected_time, &st->last_updated) > st->entries * update_every_ut && + st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE)) { + info( + "'%s': too old data (last updated at %"PRId64".%"PRId64", last collected at %"PRId64".%"PRId64"). " + "Resetting it. Will not store the next entry.", + rrdset_id(st), + (int64_t)st->last_updated.tv_sec, + (int64_t)st->last_updated.tv_usec, + (int64_t)st->last_collected_time.tv_sec, + (int64_t)st->last_collected_time.tv_usec); + rrdset_reset(st); + rrdset_init_last_updated_time(st); + + st->usec_since_last_update = update_every_ut; + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } + +#ifdef ENABLE_DBENGINE + // check if we will re-write the entire page + if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && + dt_usec(&st->last_collected_time, &st->last_updated) > (RRDENG_BLOCK_SIZE / sizeof(storage_number)) * update_every_ut)) { + info( + "'%s': too old data (last updated at %" PRId64 ".%" PRId64 ", last collected at %" PRId64 ".%" PRId64 "). " + "Resetting it. Will not store the next entry.", + rrdset_id(st), + (int64_t)st->last_updated.tv_sec, + (int64_t)st->last_updated.tv_usec, + (int64_t)st->last_collected_time.tv_sec, + (int64_t)st->last_collected_time.tv_usec); + rrdset_reset(st); + rrdset_init_last_updated_time(st); + + st->usec_since_last_update = update_every_ut; + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } +#endif + + // these are the 3 variables that will help us in interpolation + // last_stored_ut = the last time we added a value to the storage + // now_collect_ut = the time the current value has been collected + // next_store_ut = the time of the next interpolation point + now_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + + if(unlikely(!st->counter_done)) { + // if we have not collected metrics this session (st->counter_done == 0) + // and we have collected metrics for this chart in the past (st->counter != 0) + // fill the gap (the chart has been just loaded from disk) + if(unlikely(st->counter) && st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) { + // TODO this should be inside the storage engine + rrdset_done_fill_the_gap(st); + last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + } + if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + // set a fake last_updated to jump to current time + rrdset_init_last_updated_time(st); + last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + } + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST))) { + store_this_entry = 1; + last_collect_ut = next_store_ut - update_every_ut; + + rrdset_debug(st, "Fixed first entry."); + } + else { + store_this_entry = 0; + + rrdset_debug(st, "Will not store the next entry."); + } + } + +after_first_database_work: + st->counter_done++; + + if(unlikely(rrdhost_has_rrdpush_sender_enabled(st->rrdhost))) + rrdset_done_push(st); + + uint32_t has_reset_value = 0; + + size_t rda_slots = dictionary_entries(st->rrddim_root_index); + struct rda_item *rda_base = rrdset_thread_rda(&rda_slots); + + size_t dim_id; + size_t dimensions = 0; + struct rda_item *rda = rda_base; + total_number collected_total = 0; + total_number last_collected_total = 0; + rrddim_foreach_read(rd, st) { + if(rd_dfe.counter >= rda_slots) + break; + + rda = &rda_base[dimensions++]; + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + rda->item = NULL; + rda->rd = NULL; + continue; + } + + // store the dimension in the array + rda->item = dictionary_acquired_item_dup(st->rrddim_root_index, rd_dfe.item); + rda->rd = dictionary_acquired_item_value(rda->item); + + // calculate totals + if(likely(rd->updated)) { + // if the new is smaller than the old (an overflow, or reset), set the old equal to the new + // to reset the calculation (it will give zero as the calculation for this second) + if(unlikely(rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL && rd->last_collected_value > rd->collected_value)) { + debug(D_RRD_STATS, "'%s' / '%s': RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT + , rrdset_id(st) + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + ); + + if(!(rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS))) + has_reset_value = 1; + + rd->last_collected_value = rd->collected_value; + } + + last_collected_total += rd->last_collected_value; + collected_total += rd->collected_value; + + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE))) { + error("Dimension %s in chart '%s' has the OBSOLETE flag set, but it is collected.", rrddim_name(rd), rrdset_id(st)); + rrddim_isnot_obsolete(st, rd); + } + } + } + rrddim_foreach_done(rd); + rda_slots = dimensions; + + if (unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_NONE)) + goto after_second_database_work; + + rrdset_debug(st, "last_collect_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last collection time)", (NETDATA_DOUBLE)last_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "now_collect_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (current collection time)", (NETDATA_DOUBLE)now_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "last_stored_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last updated time)", (NETDATA_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (next interpolation point)", (NETDATA_DOUBLE)next_store_ut/USEC_PER_SEC); + + // process all dimensions to calculate their values + // based on the collected figures only + // at this stage we do not interpolate anything + for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; + + if(unlikely(!rd->updated)) { + rd->calculated_value = 0; + continue; + } + + rrdset_debug(st, "%s: START " + " last_collected_value = " COLLECTED_NUMBER_FORMAT + " collected_value = " COLLECTED_NUMBER_FORMAT + " last_calculated_value = " NETDATA_DOUBLE_FORMAT + " calculated_value = " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value + ); + + switch(rd->algorithm) { + case RRD_ALGORITHM_ABSOLUTE: + rd->calculated_value = (NETDATA_DOUBLE)rd->collected_value + * (NETDATA_DOUBLE)rd->multiplier + / (NETDATA_DOUBLE)rd->divisor; + + rrdset_debug(st, "%s: CALC ABS/ABS-NO-IN " NETDATA_DOUBLE_FORMAT " = " + COLLECTED_NUMBER_FORMAT + " * " NETDATA_DOUBLE_FORMAT + " / " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->calculated_value + , rd->collected_value + , (NETDATA_DOUBLE)rd->multiplier + , (NETDATA_DOUBLE)rd->divisor + ); + break; + + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + if(unlikely(!collected_total)) + rd->calculated_value = 0; + else + // the percentage of the current value + // over the total of all dimensions + rd->calculated_value = + (NETDATA_DOUBLE)100 + * (NETDATA_DOUBLE)rd->collected_value + / (NETDATA_DOUBLE)collected_total; + + rrdset_debug(st, "%s: CALC PCENT-ROW " NETDATA_DOUBLE_FORMAT " = 100" + " * " COLLECTED_NUMBER_FORMAT + " / " COLLECTED_NUMBER_FORMAT + , rrddim_name(rd) + , rd->calculated_value + , rd->collected_value + , collected_total + ); + break; + + case RRD_ALGORITHM_INCREMENTAL: + if(unlikely(rd->collections_counter <= 1)) { + rd->calculated_value = 0; + continue; + } + + // If the new is smaller than the old (an overflow, or reset), set the old equal to the new + // to reset the calculation (it will give zero as the calculation for this second). + // It is imperative to set the comparison to uint64_t since type collected_number is signed and + // produces wrong results as far as incremental counters are concerned. + if(unlikely((uint64_t)rd->last_collected_value > (uint64_t)rd->collected_value)) { + debug(D_RRD_STATS, "'%s' / '%s': RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT + , rrdset_id(st) + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value); + + if(!(rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS))) + has_reset_value = 1; + + uint64_t last = (uint64_t)rd->last_collected_value; + uint64_t new = (uint64_t)rd->collected_value; + uint64_t max = (uint64_t)rd->collected_value_max; + uint64_t cap = 0; + + // Signed values are handled by exploiting two's complement which will produce positive deltas + if (max > 0x00000000FFFFFFFFULL) + cap = 0xFFFFFFFFFFFFFFFFULL; // handles signed and unsigned 64-bit counters + else + cap = 0x00000000FFFFFFFFULL; // handles signed and unsigned 32-bit counters + + uint64_t delta = cap - last + new; + uint64_t max_acceptable_rate = (cap / 100) * MAX_INCREMENTAL_PERCENT_RATE; + + // If the delta is less than the maximum acceptable rate and the previous value was near the cap + // then this is an overflow. There can be false positives such that a reset is detected as an + // overflow. + // TODO: remember recent history of rates and compare with current rate to reduce this chance. + if (delta < max_acceptable_rate) { + rd->calculated_value += + (NETDATA_DOUBLE) delta + * (NETDATA_DOUBLE) rd->multiplier + / (NETDATA_DOUBLE) rd->divisor; + } else { + // This is a reset. Any overflow with a rate greater than MAX_INCREMENTAL_PERCENT_RATE will also + // be detected as a reset instead. + rd->calculated_value += (NETDATA_DOUBLE)0; + } + } + else { + rd->calculated_value += + (NETDATA_DOUBLE) (rd->collected_value - rd->last_collected_value) + * (NETDATA_DOUBLE) rd->multiplier + / (NETDATA_DOUBLE) rd->divisor; + } + + rrdset_debug(st, "%s: CALC INC PRE " NETDATA_DOUBLE_FORMAT " = (" + COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT + ")" + " * " NETDATA_DOUBLE_FORMAT + " / " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->calculated_value + , rd->collected_value, rd->last_collected_value + , (NETDATA_DOUBLE)rd->multiplier + , (NETDATA_DOUBLE)rd->divisor + ); + break; + + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + if(unlikely(rd->collections_counter <= 1)) { + rd->calculated_value = 0; + continue; + } + + // the percentage of the current increment + // over the increment of all dimensions together + if(unlikely(collected_total == last_collected_total)) + rd->calculated_value = 0; + else + rd->calculated_value = + (NETDATA_DOUBLE)100 + * (NETDATA_DOUBLE)(rd->collected_value - rd->last_collected_value) + / (NETDATA_DOUBLE)(collected_total - last_collected_total); + + rrdset_debug(st, "%s: CALC PCENT-DIFF " NETDATA_DOUBLE_FORMAT " = 100" + " * (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" + " / (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" + , rrddim_name(rd) + , rd->calculated_value + , rd->collected_value, rd->last_collected_value + , collected_total, last_collected_total + ); + break; + + default: + // make the default zero, to make sure + // it gets noticed when we add new types + rd->calculated_value = 0; + + rrdset_debug(st, "%s: CALC " NETDATA_DOUBLE_FORMAT " = 0" + , rrddim_name(rd) + , rd->calculated_value + ); + break; + } + + rrdset_debug(st, "%s: PHASE2 " + " last_collected_value = " COLLECTED_NUMBER_FORMAT + " collected_value = " COLLECTED_NUMBER_FORMAT + " last_calculated_value = " NETDATA_DOUBLE_FORMAT + " calculated_value = " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value + ); + } + + // at this point we have all the calculated values ready + // it is now time to interpolate values on a second boundary + +// #ifdef NETDATA_INTERNAL_CHECKS +// if(unlikely(now_collect_ut < next_store_ut && st->counter_done > 1)) { +// // this is collected in the same interpolation point +// rrdset_debug(st, "THIS IS IN THE SAME INTERPOLATION POINT"); +// info("INTERNAL CHECK: host '%s', chart '%s' collection %zu is in the same interpolation point: short by %llu microseconds", st->rrdhost->hostname, rrdset_name(st), st->counter_done, next_store_ut - now_collect_ut); +// } +// #endif + + rrdset_done_interpolate( + st + , rda_base + , rda_slots + , update_every_ut + , last_stored_ut + , next_store_ut + , last_collect_ut + , now_collect_ut + , store_this_entry + , has_reset_value + ); + +after_second_database_work: + for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; + + if(unlikely(!rd->updated)) + continue; + + rrdset_debug(st, "%s: setting last_collected_value (old: " COLLECTED_NUMBER_FORMAT ") to last_collected_value (new: " COLLECTED_NUMBER_FORMAT ")", rrddim_name(rd), rd->last_collected_value, rd->collected_value); + + rd->last_collected_value = rd->collected_value; + + switch(rd->algorithm) { + case RRD_ALGORITHM_INCREMENTAL: + if(unlikely(!first_entry)) { + rrdset_debug(st, "%s: setting last_calculated_value (old: " NETDATA_DOUBLE_FORMAT ") to " + "last_calculated_value (new: " NETDATA_DOUBLE_FORMAT ")" + , rrddim_name(rd) + , rd->last_calculated_value + rd->calculated_value + , rd->calculated_value); + + rd->last_calculated_value += rd->calculated_value; + } + else { + rrdset_debug(st, "THIS IS THE FIRST POINT"); + } + break; + + case RRD_ALGORITHM_ABSOLUTE: + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + rrdset_debug(st, "%s: setting last_calculated_value (old: " NETDATA_DOUBLE_FORMAT ") to " + "last_calculated_value (new: " NETDATA_DOUBLE_FORMAT ")" + , rrddim_name(rd) + , rd->last_calculated_value + , rd->calculated_value); + + rd->last_calculated_value = rd->calculated_value; + break; + } + + rd->calculated_value = 0; + rd->collected_value = 0; + rd->updated = 0; + + rrdset_debug(st, "%s: END " + " last_collected_value = " COLLECTED_NUMBER_FORMAT + " collected_value = " COLLECTED_NUMBER_FORMAT + " last_calculated_value = " NETDATA_DOUBLE_FORMAT + " calculated_value = " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value + ); + } + + // ALL DONE ABOUT THE DATA UPDATE + // -------------------------------------------------------------------- + + if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_MAP)) { + // update the memory mapped files with the latest values + + rrdset_memory_file_update(st); + + for(dim_id = 0, rda = rda_base; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; + rrddim_memory_file_update(rd); + } + } + + for(dim_id = 0, rda = rda_base; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; + + dictionary_acquired_item_release(st->rrddim_root_index, rda->item); + rda->item = NULL; + rda->rd = NULL; + } + + rrdcontext_collected_rrdset(st); + + netdata_thread_enable_cancelability(); + + store_metric_collection_completed(); +} + +time_t rrdset_set_update_every(RRDSET *st, time_t update_every) { + + internal_error(true, "RRDSET '%s' switching update every from %d to %d", + rrdset_id(st), (int)st->update_every, (int)update_every); + + time_t prev_update_every = st->update_every; + st->update_every = update_every; + + // switch update every to the storage engine + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (rd->tiers[tier] && rd->tiers[tier]->db_collection_handle) + rd->tiers[tier]->collect_ops->change_collection_frequency(rd->tiers[tier]->db_collection_handle, (int)(st->rrdhost->db[tier].tier_grouping * st->update_every)); + } + + assert(rd->update_every == prev_update_every && + "chart's update every differs from the update every of its dimensions"); + rd->update_every = st->update_every; + } + rrddim_foreach_done(rd); + + return prev_update_every; +} + +// ---------------------------------------------------------------------------- +// compatibility layer for RRDSET files v019 + +#define RRDSET_MAGIC_V019 "NETDATA RRD SET FILE V019" +#define RRD_ID_LENGTH_MAX_V019 200 + +struct avl_element_v019 { + void *avl_link[2]; + signed char avl_balance; +}; +struct avl_tree_type_v019 { + void *root; + int (*compar)(void *a, void *b); +}; +struct avl_tree_lock_v019 { + struct avl_tree_type_v019 avl_tree; + pthread_rwlock_t rwlock; +}; +struct rrdset_map_save_v019 { + struct avl_element_v019 avl; // ignored + struct avl_element_v019 avlname; // ignored + char id[RRD_ID_LENGTH_MAX_V019 + 1]; // check to reset all - update on load + void *name; // ignored + void *unused_ptr; // ignored + void *type; // ignored + void *family; // ignored + void *title; // ignored + void *units; // ignored + void *context; // ignored + uint32_t hash_context; // ignored + uint32_t chart_type; // ignored + int update_every; // check to reset all - update on load + long entries; // check to reset all - update on load + long current_entry; // NEEDS TO BE UPDATED - FIXED ON LOAD + uint32_t flags; // ignored + void *exporting_flags; // ignored + int gap_when_lost_iterations_above; // ignored + long priority; // ignored + uint32_t rrd_memory_mode; // ignored + void *cache_dir; // ignored + char cache_filename[FILENAME_MAX+1]; // ignored - update on load + pthread_rwlock_t rrdset_rwlock; // ignored + size_t counter; // NEEDS TO BE UPDATED - maintained on load + size_t counter_done; // ignored + union { // + time_t last_accessed_time; // ignored + time_t last_entry_t; // ignored + }; // + time_t upstream_resync_time; // ignored + void *plugin_name; // ignored + void *module_name; // ignored + void *chart_uuid; // ignored + void *state; // ignored + size_t unused[3]; // ignored + size_t rrddim_page_alignment; // ignored + uint32_t hash; // ignored + uint32_t hash_name; // ignored + usec_t usec_since_last_update; // NEEDS TO BE UPDATED - maintained on load + struct timeval last_updated; // NEEDS TO BE UPDATED - check to reset all - fixed on load + struct timeval last_collected_time; // ignored + long long collected_total; // ignored + long long last_collected_total; // ignored + void *rrdfamily; // ignored + void *rrdhost; // ignored + void *next; // ignored + long double green; // ignored + long double red; // ignored + struct avl_tree_lock_v019 rrdvar_root_index; // ignored + void *variables; // ignored + void *alarms; // ignored + unsigned long memsize; // check to reset all - update on load + char magic[sizeof(RRDSET_MAGIC_V019) + 1]; // check to reset all - update on load + struct avl_tree_lock_v019 dimensions_index; // ignored + void *dimensions; // ignored +}; + +void rrdset_memory_file_update(RRDSET *st) { + if(!st->st_on_file) return; + struct rrdset_map_save_v019 *st_on_file = st->st_on_file; + + st_on_file->current_entry = st->current_entry; + st_on_file->counter = st->counter; + st_on_file->usec_since_last_update = st->usec_since_last_update; + st_on_file->last_updated.tv_sec = st->last_updated.tv_sec; + st_on_file->last_updated.tv_usec = st->last_updated.tv_usec; +} + +const char *rrdset_cache_filename(RRDSET *st) { + if(!st->st_on_file) return NULL; + struct rrdset_map_save_v019 *st_on_file = st->st_on_file; + return st_on_file->cache_filename; +} + +void rrdset_memory_file_free(RRDSET *st) { + if(!st->st_on_file) return; + + // needed for memory mode map, to save the latest state + rrdset_memory_file_update(st); + + struct rrdset_map_save_v019 *st_on_file = st->st_on_file; + netdata_munmap(st_on_file, st_on_file->memsize); + + // remove the pointers from the RRDDIM + st->st_on_file = NULL; +} + +void rrdset_memory_file_save(RRDSET *st) { + if(!st->st_on_file) return; + + rrdset_memory_file_update(st); + + struct rrdset_map_save_v019 *st_on_file = st->st_on_file; + if(st_on_file->rrd_memory_mode != RRD_MEMORY_MODE_SAVE) return; + + memory_file_save(st_on_file->cache_filename, st->st_on_file, st_on_file->memsize); +} + +bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mode) { + if(memory_mode != RRD_MEMORY_MODE_SAVE && memory_mode != RRD_MEMORY_MODE_MAP) + return false; + + char fullfilename[FILENAME_MAX + 1]; + snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", st->cache_dir); + + unsigned long size = sizeof(struct rrdset_map_save_v019); + struct rrdset_map_save_v019 *st_on_file = (struct rrdset_map_save_v019 *)netdata_mmap( + fullfilename, size, + ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE), + 0); + + if(!st_on_file) return false; + + time_t now = now_realtime_sec(); + + st_on_file->magic[sizeof(RRDSET_MAGIC_V019)] = '\0'; + if(strcmp(st_on_file->magic, RRDSET_MAGIC_V019) != 0) { + info("Initializing file '%s'.", fullfilename); + memset(st_on_file, 0, size); + } + else if(strncmp(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019) != 0) { + error("File '%s' contents are not for chart '%s'. Clearing it.", fullfilename, rrdset_id(st)); + memset(st_on_file, 0, size); + } + else if(st_on_file->memsize != size || st_on_file->entries != st->entries) { + error("File '%s' does not have the desired size. Clearing it.", fullfilename); + memset(st_on_file, 0, size); + } + else if(st_on_file->update_every != st->update_every) { + error("File '%s' does not have the desired granularity. Clearing it.", fullfilename); + memset(st_on_file, 0, size); + } + else if((now - st_on_file->last_updated.tv_sec) > st->update_every * st->entries) { + info("File '%s' is too old. Clearing it.", fullfilename); + memset(st_on_file, 0, size); + } + else if(st_on_file->last_updated.tv_sec > now + st->update_every) { + error("File '%s' refers to the future by %zd secs. Resetting it to now.", fullfilename, (ssize_t)(st_on_file->last_updated.tv_sec - now)); + st_on_file->last_updated.tv_sec = now; + } + + if(st_on_file->current_entry >= st_on_file->entries) + st_on_file->current_entry = 0; + + // make sure the database is aligned + bool align_last_updated = false; + if(st_on_file->last_updated.tv_sec) { + st_on_file->update_every = st->update_every; + align_last_updated = true; + } + + // copy the useful values to st + st->current_entry = st_on_file->current_entry; + st->counter = st_on_file->counter; + st->usec_since_last_update = st_on_file->usec_since_last_update; + st->last_updated.tv_sec = st_on_file->last_updated.tv_sec; + st->last_updated.tv_usec = st_on_file->last_updated.tv_usec; + + // link it to st + st->st_on_file = st_on_file; + + // clear everything + memset(st_on_file, 0, size); + + // set the values we need + strncpyz(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019 + 1); + strcpy(st_on_file->cache_filename, fullfilename); + strcpy(st_on_file->magic, RRDSET_MAGIC_V019); + st_on_file->memsize = size; + st_on_file->entries = st->entries; + st_on_file->update_every = st->update_every; + st_on_file->rrd_memory_mode = memory_mode; + + if(align_last_updated) + last_updated_time_align(st); + + // copy the useful values back to st_on_file + rrdset_memory_file_update(st); + + return true; +} diff --git a/database/rrdsetvar.c b/database/rrdsetvar.c new file mode 100644 index 0000000..22cf8a1 --- /dev/null +++ b/database/rrdsetvar.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +typedef struct rrdsetvar { + STRING *name; // variable name + void *value; // we need this to maintain the allocation for custom chart variables + + const RRDVAR_ACQUIRED *rrdvar_local; + const RRDVAR_ACQUIRED *rrdvar_family_chart_id; + const RRDVAR_ACQUIRED *rrdvar_family_chart_name; + const RRDVAR_ACQUIRED *rrdvar_host_chart_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name; + + RRDVAR_FLAGS flags:24; + RRDVAR_TYPE type:8; +} RRDSETVAR; + +// should only be called while the rrdsetvar dict is write locked +// otherwise, 2+ threads may be setting the same variables at the same time +static inline void rrdsetvar_free_rrdvars_unsafe(RRDSET *st, RRDSETVAR *rs) { + RRDHOST *host = st->rrdhost; + + // ------------------------------------------------------------------------ + // CHART + + if(st->rrdvars) { + rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local); + rs->rrdvar_local = NULL; + } + + // ------------------------------------------------------------------------ + // FAMILY + + if(st->rrdfamily) { + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_chart_id); + rs->rrdvar_family_chart_id = NULL; + + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_chart_name); + rs->rrdvar_family_chart_name = NULL; + } + + // ------------------------------------------------------------------------ + // HOST + + if(host->rrdvars && host->health_enabled) { + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id); + rs->rrdvar_host_chart_id = NULL; + + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name); + rs->rrdvar_host_chart_name = NULL; + } +} + +// should only be called while the rrdsetvar dict is write locked +// otherwise, 2+ threads may be setting the same variables at the same time +static inline void rrdsetvar_update_rrdvars_unsafe(RRDSET *st, RRDSETVAR *rs) { + RRDHOST *host = st->rrdhost; + + RRDVAR_FLAGS options = rs->flags; + options &= ~RRDVAR_OPTIONS_REMOVED_WHEN_PROPAGATING_TO_RRDVAR; + + // ------------------------------------------------------------------------ + // free the old ones (if any) + + rrdsetvar_free_rrdvars_unsafe(st, rs); + + // ------------------------------------------------------------------------ + // KEYS + + char buffer[RRDVAR_MAX_LENGTH + 1]; + snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), string2str(rs->name)); + STRING *key_chart_id = string_strdupz(buffer); + + snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), string2str(rs->name)); + STRING *key_chart_name = string_strdupz(buffer); + + // ------------------------------------------------------------------------ + // CHART + + if(st->rrdvars) { + rs->rrdvar_local = rrdvar_add_and_acquire("local", st->rrdvars, rs->name, rs->type, options, rs->value); + } + + // ------------------------------------------------------------------------ + // FAMILY + + if(st->rrdfamily) { + rs->rrdvar_family_chart_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_chart_id, rs->type, options, rs->value); + rs->rrdvar_family_chart_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_chart_name, rs->type, options, rs->value); + } + + // ------------------------------------------------------------------------ + // HOST + + if(host->rrdvars && host->health_enabled) { + rs->rrdvar_host_chart_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id, rs->type, options, rs->value); + rs->rrdvar_host_chart_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name, rs->type, options, rs->value); + } + + // free the keys + string_freez(key_chart_id); + string_freez(key_chart_name); +} + +static void rrdsetvar_free_value_unsafe(RRDSETVAR *rs) { + if(rs->flags & RRDVAR_FLAG_ALLOCATED) { + void *old = rs->value; + rs->value = NULL; + rs->flags &= ~RRDVAR_FLAG_ALLOCATED; + freez(old); + } +} + +static void rrdsetvar_set_value_unsafe(RRDSETVAR *rs, void *new_value) { + rrdsetvar_free_value_unsafe(rs); + + if(new_value) + rs->value = new_value; + else { + NETDATA_DOUBLE *n = mallocz(sizeof(NETDATA_DOUBLE)); + *n = NAN; + rs->value = n; + rs->flags |= RRDVAR_FLAG_ALLOCATED; + } +} + +struct rrdsetvar_constructor { + RRDSET *rrdset; + const char *name; + void *value; + RRDVAR_FLAGS flags :16; + RRDVAR_TYPE type:8; +}; + +static void rrdsetvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *constructor_data) { + RRDSETVAR *rs = rrdsetvar; + struct rrdsetvar_constructor *ctr = constructor_data; + + ctr->flags &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; + + rs->name = string_strdupz(ctr->name); + rs->type = ctr->type; + rs->flags = ctr->flags; + rrdsetvar_set_value_unsafe(rs, ctr->value); + + // create the rrdvariables while we are having a write lock to the dictionary + rrdsetvar_update_rrdvars_unsafe(ctr->rrdset, rs); +} + +static bool rrdsetvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *new_rrdsetvar __maybe_unused, void *constructor_data) { + RRDSETVAR *rs = rrdsetvar; + struct rrdsetvar_constructor *ctr = constructor_data; + + ctr->flags &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; + + RRDVAR_FLAGS options = rs->flags; + options &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; + + if(((ctr->value == NULL && rs->value != NULL && rs->flags & RRDVAR_FLAG_ALLOCATED) || (rs->value == ctr->value)) + && ctr->flags == options && rs->type == ctr->type) { + // don't reset it - everything is the same, or as it should... + return false; + } + + internal_error(true, "RRDSETVAR: resetting variable '%s' of chart '%s' of host '%s', options from 0x%x to 0x%x, type from %d to %d", + string2str(rs->name), rrdset_id(ctr->rrdset), rrdhost_hostname(ctr->rrdset->rrdhost), + options, ctr->flags, rs->type, ctr->type); + + rrdsetvar_free_value_unsafe(rs); // we are going to change the options, so free it before setting it + rs->flags = ctr->flags; + rs->type = ctr->type; + rrdsetvar_set_value_unsafe(rs, ctr->value); + + // recreate the rrdvariables while we are having a write lock to the dictionary + rrdsetvar_update_rrdvars_unsafe(ctr->rrdset, rs); + return true; +} + +static void rrdsetvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *rrdset __maybe_unused) { + RRDSET *st = rrdset; + RRDSETVAR *rs = rrdsetvar; + + rrdsetvar_free_rrdvars_unsafe(st, rs); + rrdsetvar_free_value_unsafe(rs); + string_freez(rs->name); + rs->name = NULL; +} + +void rrdsetvar_index_init(RRDSET *st) { + if(!st->rrdsetvar_root_index) { + st->rrdsetvar_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(st->rrdsetvar_root_index, rrdsetvar_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrdsetvar_root_index, rrdsetvar_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrdsetvar_root_index, rrdsetvar_delete_callback, st); + } +} + +void rrdsetvar_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrdsetvar_root_index); + st->rrdsetvar_root_index = NULL; +} + +const RRDSETVAR_ACQUIRED *rrdsetvar_add_and_acquire(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags) { + struct rrdsetvar_constructor tmp = { + .name = name, + .type = type, + .value = value, + .flags = flags, + .rrdset = st, + }; + + const RRDSETVAR_ACQUIRED *rsa = (const RRDSETVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(st->rrdsetvar_root_index, name, -1, NULL, sizeof(RRDSETVAR), &tmp); + return rsa; +} + +void rrdsetvar_add_and_leave_released(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags) { + const RRDSETVAR_ACQUIRED *rsa = rrdsetvar_add_and_acquire(st, name, type, value, flags); + dictionary_acquired_item_release(st->rrdsetvar_root_index, (const DICTIONARY_ITEM *)rsa); +} + +void rrdsetvar_rename_all(RRDSET *st) { + debug(D_VARIABLES, "RRDSETVAR rename for chart id '%s' name '%s'", rrdset_id(st), rrdset_name(st)); + + RRDSETVAR *rs; + dfe_start_write(st->rrdsetvar_root_index, rs) { + // should only be called while the rrdsetvar dict is write locked + rrdsetvar_update_rrdvars_unsafe(st, rs); + } + dfe_done(rs); + + rrdcalc_link_matching_alerts_to_rrdset(st); +} + +void rrdsetvar_release_and_delete_all(RRDSET *st) { + RRDSETVAR *rs; + dfe_start_write(st->rrdsetvar_root_index, rs) { + dictionary_del_advanced(st->rrdsetvar_root_index, string2str(rs->name), (ssize_t)string_strlen(rs->name) + 1); + } + dfe_done(rs); +} + +void rrdsetvar_release(DICTIONARY *dict, const RRDSETVAR_ACQUIRED *rsa) { + dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rsa); +} + +// -------------------------------------------------------------------------------------------------------------------- +// custom chart variables + +const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name) { + STRING *name_string = rrdvar_name_to_string(name); + const RRDSETVAR_ACQUIRED *rs = rrdsetvar_add_and_acquire(st, string2str(name_string), RRDVAR_TYPE_CALCULATED, NULL, RRDVAR_FLAG_CUSTOM_CHART_VAR); + string_freez(name_string); + return rs; +} + +void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value) { + if(!rsa) return; + + RRDSETVAR *rs = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rsa); + + if(rs->type != RRDVAR_TYPE_CALCULATED || !(rs->flags & RRDVAR_FLAG_CUSTOM_CHART_VAR) || !(rs->flags & RRDVAR_FLAG_ALLOCATED)) { + error("RRDSETVAR: requested to set variable '%s' of chart '%s' on host '%s' to value " NETDATA_DOUBLE_FORMAT + " but the variable is not a custom chart one (it has options 0x%x, value pointer %p). Ignoring request.", string2str(rs->name), rrdset_id(st), rrdhost_hostname(st->rrdhost), value, (uint32_t)rs->flags, rs->value); + } + else { + NETDATA_DOUBLE *v = rs->value; + if(*v != value) { + *v = value; + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES); + } + } +} + +void rrdsetvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb) { + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES); + + // send the chart local custom variables + RRDSETVAR *rs; + dfe_start_read(st->rrdsetvar_root_index, rs) { + if(unlikely(rs->type == RRDVAR_TYPE_CALCULATED && rs->flags & RRDVAR_FLAG_CUSTOM_CHART_VAR)) { + NETDATA_DOUBLE *value = (NETDATA_DOUBLE *) rs->value; + + buffer_sprintf(wb + , "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n" + , string2str(rs->name) + , *value + ); + } + } + dfe_done(rs); +} diff --git a/database/rrdsetvar.h b/database/rrdsetvar.h new file mode 100644 index 0000000..0c2e667 --- /dev/null +++ b/database/rrdsetvar.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDSETVAR_H +#define NETDATA_RRDSETVAR_H 1 + +#include "rrd.h" + +// variables linked to charts +// We link variables to point to the values that are already +// calculated / processed by the normal data collection process +// This means, there will be no speed penalty for using +// these variables + +void rrdsetvar_index_init(RRDSET *st); +void rrdsetvar_index_destroy(RRDSET *st); +void rrdsetvar_release_and_delete_all(RRDSET *st); + +#define rrdsetvar_custom_chart_variable_release(st, rsa) rrdsetvar_release((st)->rrdsetvar_root_index, rsa) +void rrdsetvar_release(DICTIONARY *dict, const RRDSETVAR_ACQUIRED *rsa); + +const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name); +void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value); + +void rrdsetvar_rename_all(RRDSET *st); +const RRDSETVAR_ACQUIRED *rrdsetvar_add_and_acquire(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags); +void rrdsetvar_add_and_leave_released(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags); + +void rrdsetvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb); + +#endif //NETDATA_RRDSETVAR_H diff --git a/database/rrdvar.c b/database/rrdvar.c new file mode 100644 index 0000000..28be4f6 --- /dev/null +++ b/database/rrdvar.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +// the variables as stored in the variables indexes +// there are 3 indexes: +// 1. at each chart (RRDSET.rrdvar_root_index) +// 2. at each context (RRDFAMILY.rrdvar_root_index) +// 3. at each host (RRDHOST.rrdvar_root_index) +typedef struct rrdvar { + STRING *name; + void *value; + RRDVAR_FLAGS flags:24; + RRDVAR_TYPE type:8; +} RRDVAR; + +// ---------------------------------------------------------------------------- +// RRDVAR management + +inline int rrdvar_fix_name(char *variable) { + int fixed = 0; + while(*variable) { + if (!isalnum(*variable) && *variable != '.' && *variable != '_') { + *variable++ = '_'; + fixed++; + } + else + variable++; + } + + return fixed; +} + +inline STRING *rrdvar_name_to_string(const char *name) { + char *variable = strdupz(name); + rrdvar_fix_name(variable); + STRING *name_string = string_strdupz(variable); + freez(variable); + return name_string; +} + +struct rrdvar_constructor { + STRING *name; + void *value; + RRDVAR_FLAGS options:16; + RRDVAR_TYPE type:8; + + enum { + RRDVAR_REACT_NONE = 0, + RRDVAR_REACT_NEW = (1 << 0), + } react_action; +}; + +static void rrdvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar, void *constructor_data) { + RRDVAR *rv = rrdvar; + struct rrdvar_constructor *ctr = constructor_data; + + ctr->options &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; + + rv->name = string_dup(ctr->name); + rv->type = ctr->type; + rv->flags = ctr->options; + + if(!ctr->value) { + NETDATA_DOUBLE *v = mallocz(sizeof(NETDATA_DOUBLE)); + *v = NAN; + rv->value = v; + rv->flags |= RRDVAR_FLAG_ALLOCATED; + } + else + rv->value = ctr->value; + + ctr->react_action = RRDVAR_REACT_NEW; +} + +static void rrdvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar, void *nothing __maybe_unused) { + RRDVAR *rv = rrdvar; + + if(rv->flags & RRDVAR_FLAG_ALLOCATED) + freez(rv->value); + + string_freez(rv->name); + rv->name = NULL; +} + +DICTIONARY *rrdvariables_create(void) { + DICTIONARY *dict = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(dict, rrdvar_insert_callback, NULL); + dictionary_register_delete_callback(dict, rrdvar_delete_callback, NULL); + + return dict; +} + +void rrdvariables_destroy(DICTIONARY *dict) { + dictionary_destroy(dict); +} + +static inline const RRDVAR_ACQUIRED *rrdvar_get_and_acquire(DICTIONARY *dict, STRING *name) { + return (const RRDVAR_ACQUIRED *)dictionary_get_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name) + 1); +} + +inline void rrdvar_release_and_del(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) { + if(unlikely(!dict || !rva)) return; + + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + + dictionary_del_advanced(dict, string2str(rv->name), (ssize_t)string_strlen(rv->name) + 1); + + dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva); +} + +inline const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(const char *scope __maybe_unused, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value) { + if(unlikely(!dict || !name)) return NULL; + + struct rrdvar_constructor tmp = { + .name = name, + .value = value, + .type = type, + .options = options, + .react_action = RRDVAR_REACT_NONE, + }; + return (const RRDVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name) + 1, NULL, sizeof(RRDVAR), &tmp); +} + +void rrdvar_delete_all(DICTIONARY *dict) { + dictionary_flush(dict); +} + + +// ---------------------------------------------------------------------------- +// CUSTOM HOST VARIABLES + +inline int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data) { + if(unlikely(!dict)) return 0; // when health is not enabled + return dictionary_walkthrough_read(dict, callback, data); +} + +const RRDVAR_ACQUIRED *rrdvar_custom_host_variable_add_and_acquire(RRDHOST *host, const char *name) { + DICTIONARY *dict = host->rrdvars; + if(unlikely(!dict)) return NULL; // when health is not enabled + + STRING *name_string = rrdvar_name_to_string(name); + + const RRDVAR_ACQUIRED *rva = rrdvar_add_and_acquire("host", dict, name_string, RRDVAR_TYPE_CALCULATED, RRDVAR_FLAG_CUSTOM_HOST_VAR, NULL); + + string_freez(name_string); + return rva; +} + +void rrdvar_custom_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) { + if(unlikely(!host->rrdvars || !rva)) return; // when health is not enabled + + if(rrdvar_type(rva) != RRDVAR_TYPE_CALCULATED || !(rrdvar_flags(rva) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_ALLOCATED))) + error("requested to set variable '%s' to value " NETDATA_DOUBLE_FORMAT " but the variable is not a custom one.", rrdvar_name(rva), value); + else { + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + NETDATA_DOUBLE *v = rv->value; + if(*v != value) { + *v = value; + + // if the host is streaming, send this variable upstream immediately + rrdpush_sender_send_this_host_variable_now(host, rva); + } + } +} + +void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) { + if(unlikely(!dict || !rva)) return; // when health is not enabled + dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva); +} + +// ---------------------------------------------------------------------------- +// RRDVAR lookup + +NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva) { + if(unlikely(!rva)) return NAN; + + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + + switch(rv->type) { + case RRDVAR_TYPE_CALCULATED: { + NETDATA_DOUBLE *n = (NETDATA_DOUBLE *)rv->value; + return *n; + } + + case RRDVAR_TYPE_TIME_T: { + time_t *n = (time_t *)rv->value; + return (NETDATA_DOUBLE)*n; + } + + case RRDVAR_TYPE_COLLECTED: { + collected_number *n = (collected_number *)rv->value; + return (NETDATA_DOUBLE)*n; + } + + case RRDVAR_TYPE_TOTAL: { + total_number *n = (total_number *)rv->value; + return (NETDATA_DOUBLE)*n; + } + + case RRDVAR_TYPE_INT: { + int *n = (int *)rv->value; + return *n; + } + + default: + error("I don't know how to convert RRDVAR type %u to NETDATA_DOUBLE", rv->type); + return NAN; + } +} + +int health_variable_lookup(STRING *variable, RRDCALC *rc, NETDATA_DOUBLE *result) { + RRDSET *st = rc->rrdset; + if(!st) return 0; + + RRDHOST *host = st->rrdhost; + const RRDVAR_ACQUIRED *rva; + + rva = rrdvar_get_and_acquire(st->rrdvars, variable); + if(rva) { + *result = rrdvar2number(rva); + dictionary_acquired_item_release(st->rrdvars, (const DICTIONARY_ITEM *)rva); + return 1; + } + + rva = rrdvar_get_and_acquire(rrdfamily_rrdvars_dict(st->rrdfamily), variable); + if(rva) { + *result = rrdvar2number(rva); + dictionary_acquired_item_release(rrdfamily_rrdvars_dict(st->rrdfamily), (const DICTIONARY_ITEM *)rva); + return 1; + } + + rva = rrdvar_get_and_acquire(host->rrdvars, variable); + if(rva) { + *result = rrdvar2number(rva); + dictionary_acquired_item_release(host->rrdvars, (const DICTIONARY_ITEM *)rva); + return 1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// RRDVAR to JSON + +struct variable2json_helper { + BUFFER *buf; + size_t counter; + RRDVAR_FLAGS options; +}; + +static int single_variable2json_callback(const DICTIONARY_ITEM *item __maybe_unused, void *entry __maybe_unused, void *helper_data) { + struct variable2json_helper *helper = (struct variable2json_helper *)helper_data; + const RRDVAR_ACQUIRED *rva = (const RRDVAR_ACQUIRED *)item; + NETDATA_DOUBLE value = rrdvar2number(rva); + + if (helper->options == RRDVAR_FLAG_NONE || rrdvar_flags(rva) & helper->options) { + if(unlikely(isnan(value) || isinf(value))) + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": null", helper->counter?",":"", rrdvar_name(rva)); + else + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": %0.5" NETDATA_DOUBLE_MODIFIER, helper->counter?",":"", rrdvar_name(rva), (NETDATA_DOUBLE)value); + + helper->counter++; + } + + return 0; +} + +void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf) { + struct variable2json_helper helper = { + .buf = buf, + .counter = 0, + .options = RRDVAR_FLAG_CUSTOM_CHART_VAR}; + + buffer_sprintf(buf, "{"); + rrdvar_walkthrough_read(st->rrdvars, single_variable2json_callback, &helper); + buffer_strcat(buf, "\n\t\t\t}"); +} + +void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf) { + RRDHOST *host = st->rrdhost; + + struct variable2json_helper helper = { + .buf = buf, + .counter = 0, + .options = RRDVAR_FLAG_NONE}; + + buffer_sprintf(buf, "{\n\t\"chart\": \"%s\",\n\t\"chart_name\": \"%s\",\n\t\"chart_context\": \"%s\",\n\t\"chart_variables\": {", rrdset_id(st), rrdset_name(st), rrdset_context(st)); + rrdvar_walkthrough_read(st->rrdvars, single_variable2json_callback, &helper); + + buffer_sprintf(buf, "\n\t},\n\t\"family\": \"%s\",\n\t\"family_variables\": {", rrdset_family(st)); + helper.counter = 0; + rrdvar_walkthrough_read(rrdfamily_rrdvars_dict(st->rrdfamily), single_variable2json_callback, &helper); + + buffer_sprintf(buf, "\n\t},\n\t\"host\": \"%s\",\n\t\"host_variables\": {", rrdhost_hostname(host)); + helper.counter = 0; + rrdvar_walkthrough_read(host->rrdvars, single_variable2json_callback, &helper); + + buffer_strcat(buf, "\n\t}\n}\n"); +} + +// ---------------------------------------------------------------------------- +// RRDVAR private members examination + +const char *rrdvar_name(const RRDVAR_ACQUIRED *rva) { + return dictionary_acquired_item_name((const DICTIONARY_ITEM *)rva); +} + +RRDVAR_FLAGS rrdvar_flags(const RRDVAR_ACQUIRED *rva) { + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + return rv->flags; +} +RRDVAR_TYPE rrdvar_type(const RRDVAR_ACQUIRED *rva) { + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + return rv->type; +} diff --git a/database/rrdvar.h b/database/rrdvar.h new file mode 100644 index 0000000..a511c73 --- /dev/null +++ b/database/rrdvar.h @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDVAR_H +#define NETDATA_RRDVAR_H 1 + +#include "libnetdata/libnetdata.h" + +typedef enum rrdvar_type { + RRDVAR_TYPE_CALCULATED = 1, + RRDVAR_TYPE_TIME_T = 2, + RRDVAR_TYPE_COLLECTED = 3, + RRDVAR_TYPE_TOTAL = 4, + RRDVAR_TYPE_INT = 5 + + // this is 8 bit + // to increase it you have to set change the bitfield in + // rrdvar, rrdsetvar, rrddimvar +} RRDVAR_TYPE; + +typedef enum rrdvar_options { + RRDVAR_FLAG_NONE = 0, + RRDVAR_FLAG_ALLOCATED = (1 << 0), // the value ptr is allocated (not a reference) + RRDVAR_FLAG_CUSTOM_HOST_VAR = (1 << 1), // this is a custom host variable, not associated with a dimension + RRDVAR_FLAG_CUSTOM_CHART_VAR = (1 << 2), // this is a custom chart variable, not associated with a dimension + RRDVAR_FLAG_RRDCALC_LOCAL_VAR = (1 << 3), // this is a an alarm variable, attached to a chart + RRDVAR_FLAG_RRDCALC_FAMILY_VAR = (1 << 4), // this is a an alarm variable, attached to a family + RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR = (1 << 5), // this is a an alarm variable, attached to the host, using the chart id + RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR = (1 << 6), // this is a an alarm variable, attached to the host, using the chart name + + // this is 24 bit + // to increase it you have to set change the bitfield in + // rrdvar, rrdsetvar, rrddimvar +} RRDVAR_FLAGS; + +#define RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS \ + (RRDVAR_FLAG_ALLOCATED) + +#define RRDVAR_OPTIONS_REMOVED_WHEN_PROPAGATING_TO_RRDVAR \ + (RRDVAR_FLAG_ALLOCATED) + +#define RRDVAR_MAX_LENGTH 1024 + +int rrdvar_fix_name(char *variable); + +#include "rrd.h" + +STRING *rrdvar_name_to_string(const char *name); + +const RRDVAR_ACQUIRED *rrdvar_custom_host_variable_add_and_acquire(RRDHOST *host, const char *name); +void rrdvar_custom_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value); + +int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data); + +#define rrdvar_custom_host_variable_release(host, rva) rrdvar_release((host)->rrdvars, rva) +void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva); + +NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva); + +const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(const char *scope, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value); +void rrdvar_release_and_del(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva); + +DICTIONARY *rrdvariables_create(void); +void rrdvariables_destroy(DICTIONARY *dict); + +void rrdvar_delete_all(DICTIONARY *dict); + +const char *rrdvar_name(const RRDVAR_ACQUIRED *rva); +RRDVAR_FLAGS rrdvar_flags(const RRDVAR_ACQUIRED *rva); +RRDVAR_TYPE rrdvar_type(const RRDVAR_ACQUIRED *rva); + +#endif //NETDATA_RRDVAR_H diff --git a/database/sqlite/Makefile.am b/database/sqlite/Makefile.am new file mode 100644 index 0000000..babdcf0 --- /dev/null +++ b/database/sqlite/Makefile.am @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in diff --git a/database/sqlite/sqlite3.c b/database/sqlite/sqlite3.c new file mode 100644 index 0000000..296de3e --- /dev/null +++ b/database/sqlite/sqlite3.c @@ -0,0 +1,239479 @@ +/****************************************************************************** +** This file is an amalgamation of many separate C source files from SQLite +** version 3.38.5. By combining all the individual C code files into this +** single large file, the entire code can be compiled as a single translation +** unit. This allows many compilers to do optimizations that would not be +** possible if the files were compiled separately. Performance improvements +** of 5% or more are commonly seen when SQLite is compiled as a single +** translation unit. +** +** This file is all you need to compile SQLite. To use SQLite in other +** programs, you need this file and the "sqlite3.h" header file that defines +** the programming interface to the SQLite library. (If you do not have +** the "sqlite3.h" header file at hand, you will find a copy embedded within +** the text of this file. Search for "Begin file sqlite3.h" to find the start +** of the embedded sqlite3.h header file.) Additional code files may be needed +** if you want a wrapper to interface SQLite with your choice of programming +** language. The code for the "sqlite3" command-line shell is also in a +** separate file. This file contains only code for the core SQLite library. +*/ +#define __maybe_unused __attribute__((unused)) +#define SQLITE_CORE 1 +#define SQLITE_AMALGAMATION 1 +#ifndef SQLITE_PRIVATE +# define SQLITE_PRIVATE static +#endif +#define SQLITE_UDL_CAPABLE_PARSER 1 +#define SQLITE_ENABLE_UPDATE_DELETE_LIMIT 1 +#define SQLITE_OMIT_LOAD_EXTENSION 1 +#define SQLITE_ENABLE_DBSTAT_VTAB 1 + +/************** Begin file sqliteInt.h ***************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Internal interface definitions for SQLite. +** +*/ +#ifndef SQLITEINT_H +#define SQLITEINT_H + +/* Special Comments: +** +** Some comments have special meaning to the tools that measure test +** coverage: +** +** NO_TEST - The branches on this line are not +** measured by branch coverage. This is +** used on lines of code that actually +** implement parts of coverage testing. +** +** OPTIMIZATION-IF-TRUE - This branch is allowed to alway be false +** and the correct answer is still obtained, +** though perhaps more slowly. +** +** OPTIMIZATION-IF-FALSE - This branch is allowed to alway be true +** and the correct answer is still obtained, +** though perhaps more slowly. +** +** PREVENTS-HARMLESS-OVERREAD - This branch prevents a buffer overread +** that would be harmless and undetectable +** if it did occur. +** +** In all cases, the special comment must be enclosed in the usual +** slash-asterisk...asterisk-slash comment marks, with no spaces between the +** asterisks and the comment text. +*/ + +/* +** Make sure the Tcl calling convention macro is defined. This macro is +** only used by test code and Tcl integration code. +*/ +#ifndef SQLITE_TCLAPI +# define SQLITE_TCLAPI +#endif + +/* +** Include the header file used to customize the compiler options for MSVC. +** This should be done first so that it can successfully prevent spurious +** compiler warnings due to subsequent content in this file and other files +** that are included by this file. +*/ +/************** Include msvc.h in the middle of sqliteInt.h ******************/ +/************** Begin file msvc.h ********************************************/ +/* +** 2015 January 12 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to MSVC. +*/ +#ifndef SQLITE_MSVC_H +#define SQLITE_MSVC_H + +#if defined(_MSC_VER) +#pragma warning(disable : 4054) +#pragma warning(disable : 4055) +#pragma warning(disable : 4100) +#pragma warning(disable : 4127) +#pragma warning(disable : 4130) +#pragma warning(disable : 4152) +#pragma warning(disable : 4189) +#pragma warning(disable : 4206) +#pragma warning(disable : 4210) +#pragma warning(disable : 4232) +#pragma warning(disable : 4244) +#pragma warning(disable : 4305) +#pragma warning(disable : 4306) +#pragma warning(disable : 4702) +#pragma warning(disable : 4706) +#endif /* defined(_MSC_VER) */ + +#if defined(_MSC_VER) && !defined(_WIN64) +#undef SQLITE_4_BYTE_ALIGNED_MALLOC +#define SQLITE_4_BYTE_ALIGNED_MALLOC +#endif /* defined(_MSC_VER) && !defined(_WIN64) */ + +#endif /* SQLITE_MSVC_H */ + +/************** End of msvc.h ************************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** Special setup for VxWorks +*/ +/************** Include vxworks.h in the middle of sqliteInt.h ***************/ +/************** Begin file vxworks.h *****************************************/ +/* +** 2015-03-02 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to Wind River's VxWorks +*/ +#if defined(__RTP__) || defined(_WRS_KERNEL) +/* This is VxWorks. Set up things specially for that OS +*/ +#include +#include /* amalgamator: dontcache */ +#define OS_VXWORKS 1 +#define SQLITE_OS_OTHER 0 +#define SQLITE_HOMEGROWN_RECURSIVE_MUTEX 1 +#define SQLITE_OMIT_LOAD_EXTENSION 1 +#define SQLITE_ENABLE_LOCKING_STYLE 0 +#define HAVE_UTIME 1 +#else +/* This is not VxWorks. */ +#define OS_VXWORKS 0 +#define HAVE_FCHOWN 1 +#define HAVE_READLINK 1 +#define HAVE_LSTAT 1 +#endif /* defined(_WRS_KERNEL) */ + +/************** End of vxworks.h *********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** These #defines should enable >2GB file support on POSIX if the +** underlying operating system supports it. If the OS lacks +** large file support, or if the OS is windows, these should be no-ops. +** +** Ticket #2739: The _LARGEFILE_SOURCE macro must appear before any +** system #includes. Hence, this block of code must be the very first +** code in all source files. +** +** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch +** on the compiler command line. This is necessary if you are compiling +** on a recent machine (ex: Red Hat 7.2) but you want your code to work +** on an older machine (ex: Red Hat 6.0). If you compile on Red Hat 7.2 +** without this option, LFS is enable. But LFS does not exist in the kernel +** in Red Hat 6.0, so the code won't work. Hence, for maximum binary +** portability you should omit LFS. +** +** The previous paragraph was written in 2005. (This paragraph is written +** on 2008-11-28.) These days, all Linux kernels support large files, so +** you should probably leave LFS enabled. But some embedded platforms might +** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful. +** +** Similar is true for Mac OS X. LFS is only supported on Mac OS X 9 and later. +*/ +#ifndef SQLITE_DISABLE_LFS +# define _LARGE_FILE 1 +# ifndef _FILE_OFFSET_BITS +# define _FILE_OFFSET_BITS 64 +# endif +# define _LARGEFILE_SOURCE 1 +#endif + +/* The GCC_VERSION and MSVC_VERSION macros are used to +** conditionally include optimizations for each of these compilers. A +** value of 0 means that compiler is not being used. The +** SQLITE_DISABLE_INTRINSIC macro means do not use any compiler-specific +** optimizations, and hence set all compiler macros to 0 +** +** There was once also a CLANG_VERSION macro. However, we learn that the +** version numbers in clang are for "marketing" only and are inconsistent +** and unreliable. Fortunately, all versions of clang also recognize the +** gcc version numbers and have reasonable settings for gcc version numbers, +** so the GCC_VERSION macro will be set to a correct non-zero value even +** when compiling with clang. +*/ +#if defined(__GNUC__) && !defined(SQLITE_DISABLE_INTRINSIC) +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif +#if defined(_MSC_VER) && !defined(SQLITE_DISABLE_INTRINSIC) +# define MSVC_VERSION _MSC_VER +#else +# define MSVC_VERSION 0 +#endif + +/* +** Some C99 functions in "math.h" are only present for MSVC when its version +** is associated with Visual Studio 2013 or higher. +*/ +#ifndef SQLITE_HAVE_C99_MATH_FUNCS +# if MSVC_VERSION==0 || MSVC_VERSION>=1800 +# define SQLITE_HAVE_C99_MATH_FUNCS (1) +# else +# define SQLITE_HAVE_C99_MATH_FUNCS (0) +# endif +#endif + +/* Needed for various definitions... */ +#if defined(__GNUC__) && !defined(_GNU_SOURCE) +# define _GNU_SOURCE +#endif + +#if defined(__OpenBSD__) && !defined(_BSD_SOURCE) +# define _BSD_SOURCE +#endif + +/* +** Macro to disable warnings about missing "break" at the end of a "case". +*/ +#if GCC_VERSION>=7000000 +# define deliberate_fall_through __attribute__((fallthrough)); +#else +# define deliberate_fall_through +#endif + +/* +** For MinGW, check to see if we can include the header file containing its +** version information, among other things. Normally, this internal MinGW +** header file would [only] be included automatically by other MinGW header +** files; however, the contained version information is now required by this +** header file to work around binary compatibility issues (see below) and +** this is the only known way to reliably obtain it. This entire #if block +** would be completely unnecessary if there was any other way of detecting +** MinGW via their preprocessor (e.g. if they customized their GCC to define +** some MinGW-specific macros). When compiling for MinGW, either the +** _HAVE_MINGW_H or _HAVE__MINGW_H (note the extra underscore) macro must be +** defined; otherwise, detection of conditions specific to MinGW will be +** disabled. +*/ +#if defined(_HAVE_MINGW_H) +# include "mingw.h" +#elif defined(_HAVE__MINGW_H) +# include "_mingw.h" +#endif + +/* +** For MinGW version 4.x (and higher), check to see if the _USE_32BIT_TIME_T +** define is required to maintain binary compatibility with the MSVC runtime +** library in use (e.g. for Windows XP). +*/ +#if !defined(_USE_32BIT_TIME_T) && !defined(_USE_64BIT_TIME_T) && \ + defined(_WIN32) && !defined(_WIN64) && \ + defined(__MINGW_MAJOR_VERSION) && __MINGW_MAJOR_VERSION >= 4 && \ + defined(__MSVCRT__) +# define _USE_32BIT_TIME_T +#endif + +/* Optionally #include a user-defined header, whereby compilation options +** may be set prior to where they take effect, but after platform setup. +** If SQLITE_CUSTOM_INCLUDE=? is defined, its value names the #include +** file. +*/ +#ifdef SQLITE_CUSTOM_INCLUDE +# define INC_STRINGIFY_(f) #f +# define INC_STRINGIFY(f) INC_STRINGIFY_(f) +# include INC_STRINGIFY(SQLITE_CUSTOM_INCLUDE) +#endif + +/* The public SQLite interface. The _FILE_OFFSET_BITS macro must appear +** first in QNX. Also, the _USE_32BIT_TIME_T macro must appear first for +** MinGW. +*/ +/************** Include sqlite3.h in the middle of sqliteInt.h ***************/ +/************** Begin file sqlite3.h *****************************************/ +/* +** 2001-09-15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface that the SQLite library +** presents to client programs. If a C-function, structure, datatype, +** or constant definition does not appear in this file, then it is +** not a published API of SQLite, is subject to change without +** notice, and should not be referenced by programs that use SQLite. +** +** Some of the definitions that are in this file are marked as +** "experimental". Experimental interfaces are normally new +** features recently added to SQLite. We do not anticipate changes +** to experimental interfaces but reserve the right to make minor changes +** if experience from use "in the wild" suggest such changes are prudent. +** +** The official C-language API documentation for SQLite is derived +** from comments in this file. This file is the authoritative source +** on how SQLite interfaces are supposed to operate. +** +** The name of this file under configuration management is "sqlite.h.in". +** The makefile makes some minor changes to this file (such as inserting +** the version number) and changes its name to "sqlite3.h" as +** part of the build process. +*/ +#ifndef SQLITE3_H +#define SQLITE3_H +#include /* Needed for the definition of va_list */ + +/* +** Make sure we can call this stuff from C++. +*/ +#if 0 +extern "C" { +#endif + + +/* +** Facilitate override of interface linkage and calling conventions. +** Be aware that these macros may not be used within this particular +** translation of the amalgamation and its associated header file. +** +** The SQLITE_EXTERN and SQLITE_API macros are used to instruct the +** compiler that the target identifier should have external linkage. +** +** The SQLITE_CDECL macro is used to set the calling convention for +** public functions that accept a variable number of arguments. +** +** The SQLITE_APICALL macro is used to set the calling convention for +** public functions that accept a fixed number of arguments. +** +** The SQLITE_STDCALL macro is no longer used and is now deprecated. +** +** The SQLITE_CALLBACK macro is used to set the calling convention for +** function pointers. +** +** The SQLITE_SYSAPI macro is used to set the calling convention for +** functions provided by the operating system. +** +** Currently, the SQLITE_CDECL, SQLITE_APICALL, SQLITE_CALLBACK, and +** SQLITE_SYSAPI macros are used only when building for environments +** that require non-default calling conventions. +*/ +#ifndef SQLITE_EXTERN +# define SQLITE_EXTERN extern +#endif +#ifndef SQLITE_API +# define SQLITE_API +#endif +#ifndef SQLITE_CDECL +# define SQLITE_CDECL +#endif +#ifndef SQLITE_APICALL +# define SQLITE_APICALL +#endif +#ifndef SQLITE_STDCALL +# define SQLITE_STDCALL SQLITE_APICALL +#endif +#ifndef SQLITE_CALLBACK +# define SQLITE_CALLBACK +#endif +#ifndef SQLITE_SYSAPI +# define SQLITE_SYSAPI +#endif + +/* +** These no-op macros are used in front of interfaces to mark those +** interfaces as either deprecated or experimental. New applications +** should not use deprecated interfaces - they are supported for backwards +** compatibility only. Application writers should be aware that +** experimental interfaces are subject to change in point releases. +** +** These macros used to resolve to various kinds of compiler magic that +** would generate warning messages when they were used. But that +** compiler magic ended up generating such a flurry of bug reports +** that we have taken it all out and gone back to using simple +** noop macros. +*/ +#define SQLITE_DEPRECATED +#define SQLITE_EXPERIMENTAL + +/* +** Ensure these symbols were not defined by some previous header file. +*/ +#ifdef SQLITE_VERSION +# undef SQLITE_VERSION +#endif +#ifdef SQLITE_VERSION_NUMBER +# undef SQLITE_VERSION_NUMBER +#endif + +/* +** CAPI3REF: Compile-Time Library Version Numbers +** +** ^(The [SQLITE_VERSION] C preprocessor macro in the sqlite3.h header +** evaluates to a string literal that is the SQLite version in the +** format "X.Y.Z" where X is the major version number (always 3 for +** SQLite3) and Y is the minor version number and Z is the release number.)^ +** ^(The [SQLITE_VERSION_NUMBER] C preprocessor macro resolves to an integer +** with the value (X*1000000 + Y*1000 + Z) where X, Y, and Z are the same +** numbers used in [SQLITE_VERSION].)^ +** The SQLITE_VERSION_NUMBER for any given release of SQLite will also +** be larger than the release from which it is derived. Either Y will +** be held constant and Z will be incremented or else Y will be incremented +** and Z will be reset to zero. +** +** Since [version 3.6.18] ([dateof:3.6.18]), +** SQLite source code has been stored in the +** Fossil configuration management +** system. ^The SQLITE_SOURCE_ID macro evaluates to +** a string which identifies a particular check-in of SQLite +** within its configuration management system. ^The SQLITE_SOURCE_ID +** string contains the date and time of the check-in (UTC) and a SHA1 +** or SHA3-256 hash of the entire source tree. If the source code has +** been edited in any way since it was last checked in, then the last +** four hexadecimal digits of the hash may be modified. +** +** See also: [sqlite3_libversion()], +** [sqlite3_libversion_number()], [sqlite3_sourceid()], +** [sqlite_version()] and [sqlite_source_id()]. +*/ +#define SQLITE_VERSION "3.38.5" +#define SQLITE_VERSION_NUMBER 3038005 +#define SQLITE_SOURCE_ID "2022-05-06 15:25:27 78d9c993d404cdfaa7fdd2973fa1052e3da9f66215cff9c5540ebe55c407d9fe" + +/* +** CAPI3REF: Run-Time Library Version Numbers +** KEYWORDS: sqlite3_version sqlite3_sourceid +** +** These interfaces provide the same information as the [SQLITE_VERSION], +** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros +** but are associated with the library instead of the header file. ^(Cautious +** programmers might include assert() statements in their application to +** verify that values returned by these interfaces match the macros in +** the header, and thus ensure that the application is +** compiled with matching library and header files. +** +**
+** assert( sqlite3_libversion_number()==SQLITE_VERSION_NUMBER );
+** assert( strncmp(sqlite3_sourceid(),SQLITE_SOURCE_ID,80)==0 );
+** assert( strcmp(sqlite3_libversion(),SQLITE_VERSION)==0 );
+** 
)^ +** +** ^The sqlite3_version[] string constant contains the text of [SQLITE_VERSION] +** macro. ^The sqlite3_libversion() function returns a pointer to the +** to the sqlite3_version[] string constant. The sqlite3_libversion() +** function is provided for use in DLLs since DLL users usually do not have +** direct access to string constants within the DLL. ^The +** sqlite3_libversion_number() function returns an integer equal to +** [SQLITE_VERSION_NUMBER]. ^(The sqlite3_sourceid() function returns +** a pointer to a string constant whose value is the same as the +** [SQLITE_SOURCE_ID] C preprocessor macro. Except if SQLite is built +** using an edited copy of [the amalgamation], then the last four characters +** of the hash might be different from [SQLITE_SOURCE_ID].)^ +** +** See also: [sqlite_version()] and [sqlite_source_id()]. +*/ +SQLITE_API const char sqlite3_version[] = SQLITE_VERSION; +SQLITE_API const char *sqlite3_libversion(void); +SQLITE_API const char *sqlite3_sourceid(void); +SQLITE_API int sqlite3_libversion_number(void); + +/* +** CAPI3REF: Run-Time Library Compilation Options Diagnostics +** +** ^The sqlite3_compileoption_used() function returns 0 or 1 +** indicating whether the specified option was defined at +** compile time. ^The SQLITE_ prefix may be omitted from the +** option name passed to sqlite3_compileoption_used(). +** +** ^The sqlite3_compileoption_get() function allows iterating +** over the list of options that were defined at compile time by +** returning the N-th compile time option string. ^If N is out of range, +** sqlite3_compileoption_get() returns a NULL pointer. ^The SQLITE_ +** prefix is omitted from any strings returned by +** sqlite3_compileoption_get(). +** +** ^Support for the diagnostic functions sqlite3_compileoption_used() +** and sqlite3_compileoption_get() may be omitted by specifying the +** [SQLITE_OMIT_COMPILEOPTION_DIAGS] option at compile time. +** +** See also: SQL functions [sqlite_compileoption_used()] and +** [sqlite_compileoption_get()] and the [compile_options pragma]. +*/ +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +SQLITE_API int sqlite3_compileoption_used(const char *zOptName); +SQLITE_API const char *sqlite3_compileoption_get(int N); +#else +# define sqlite3_compileoption_used(X) 0 +# define sqlite3_compileoption_get(X) ((void*)0) +#endif + +/* +** CAPI3REF: Test To See If The Library Is Threadsafe +** +** ^The sqlite3_threadsafe() function returns zero if and only if +** SQLite was compiled with mutexing code omitted due to the +** [SQLITE_THREADSAFE] compile-time option being set to 0. +** +** SQLite can be compiled with or without mutexes. When +** the [SQLITE_THREADSAFE] C preprocessor macro is 1 or 2, mutexes +** are enabled and SQLite is threadsafe. When the +** [SQLITE_THREADSAFE] macro is 0, +** the mutexes are omitted. Without the mutexes, it is not safe +** to use SQLite concurrently from more than one thread. +** +** Enabling mutexes incurs a measurable performance penalty. +** So if speed is of utmost importance, it makes sense to disable +** the mutexes. But for maximum safety, mutexes should be enabled. +** ^The default behavior is for mutexes to be enabled. +** +** This interface can be used by an application to make sure that the +** version of SQLite that it is linking against was compiled with +** the desired setting of the [SQLITE_THREADSAFE] macro. +** +** This interface only reports on the compile-time mutex setting +** of the [SQLITE_THREADSAFE] flag. If SQLite is compiled with +** SQLITE_THREADSAFE=1 or =2 then mutexes are enabled by default but +** can be fully or partially disabled using a call to [sqlite3_config()] +** with the verbs [SQLITE_CONFIG_SINGLETHREAD], [SQLITE_CONFIG_MULTITHREAD], +** or [SQLITE_CONFIG_SERIALIZED]. ^(The return value of the +** sqlite3_threadsafe() function shows only the compile-time setting of +** thread safety, not any run-time changes to that setting made by +** sqlite3_config(). In other words, the return value from sqlite3_threadsafe() +** is unchanged by calls to sqlite3_config().)^ +** +** See the [threading mode] documentation for additional information. +*/ +SQLITE_API int sqlite3_threadsafe(void); + +/* +** CAPI3REF: Database Connection Handle +** KEYWORDS: {database connection} {database connections} +** +** Each open SQLite database is represented by a pointer to an instance of +** the opaque structure named "sqlite3". It is useful to think of an sqlite3 +** pointer as an object. The [sqlite3_open()], [sqlite3_open16()], and +** [sqlite3_open_v2()] interfaces are its constructors, and [sqlite3_close()] +** and [sqlite3_close_v2()] are its destructors. There are many other +** interfaces (such as +** [sqlite3_prepare_v2()], [sqlite3_create_function()], and +** [sqlite3_busy_timeout()] to name but three) that are methods on an +** sqlite3 object. +*/ +typedef struct sqlite3 sqlite3; + +/* +** CAPI3REF: 64-Bit Integer Types +** KEYWORDS: sqlite_int64 sqlite_uint64 +** +** Because there is no cross-platform way to specify 64-bit integer types +** SQLite includes typedefs for 64-bit signed and unsigned integers. +** +** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions. +** The sqlite_int64 and sqlite_uint64 types are supported for backwards +** compatibility only. +** +** ^The sqlite3_int64 and sqlite_int64 types can store integer values +** between -9223372036854775808 and +9223372036854775807 inclusive. ^The +** sqlite3_uint64 and sqlite_uint64 types can store integer values +** between 0 and +18446744073709551615 inclusive. +*/ +#ifdef SQLITE_INT64_TYPE + typedef SQLITE_INT64_TYPE sqlite_int64; +# ifdef SQLITE_UINT64_TYPE + typedef SQLITE_UINT64_TYPE sqlite_uint64; +# else + typedef unsigned SQLITE_INT64_TYPE sqlite_uint64; +# endif +#elif defined(_MSC_VER) || defined(__BORLANDC__) + typedef __int64 sqlite_int64; + typedef unsigned __int64 sqlite_uint64; +#else + typedef long long int sqlite_int64; + typedef unsigned long long int sqlite_uint64; +#endif +typedef sqlite_int64 sqlite3_int64; +typedef sqlite_uint64 sqlite3_uint64; + +/* +** If compiling for a processor that lacks floating point support, +** substitute integer for floating-point. +*/ +#ifdef SQLITE_OMIT_FLOATING_POINT +# define double sqlite3_int64 +#endif + +/* +** CAPI3REF: Closing A Database Connection +** DESTRUCTOR: sqlite3 +** +** ^The sqlite3_close() and sqlite3_close_v2() routines are destructors +** for the [sqlite3] object. +** ^Calls to sqlite3_close() and sqlite3_close_v2() return [SQLITE_OK] if +** the [sqlite3] object is successfully destroyed and all associated +** resources are deallocated. +** +** Ideally, applications should [sqlite3_finalize | finalize] all +** [prepared statements], [sqlite3_blob_close | close] all [BLOB handles], and +** [sqlite3_backup_finish | finish] all [sqlite3_backup] objects associated +** with the [sqlite3] object prior to attempting to close the object. +** ^If the database connection is associated with unfinalized prepared +** statements, BLOB handlers, and/or unfinished sqlite3_backup objects then +** sqlite3_close() will leave the database connection open and return +** [SQLITE_BUSY]. ^If sqlite3_close_v2() is called with unfinalized prepared +** statements, unclosed BLOB handlers, and/or unfinished sqlite3_backups, +** it returns [SQLITE_OK] regardless, but instead of deallocating the database +** connection immediately, it marks the database connection as an unusable +** "zombie" and makes arrangements to automatically deallocate the database +** connection after all prepared statements are finalized, all BLOB handles +** are closed, and all backups have finished. The sqlite3_close_v2() interface +** is intended for use with host languages that are garbage collected, and +** where the order in which destructors are called is arbitrary. +** +** ^If an [sqlite3] object is destroyed while a transaction is open, +** the transaction is automatically rolled back. +** +** The C parameter to [sqlite3_close(C)] and [sqlite3_close_v2(C)] +** must be either a NULL +** pointer or an [sqlite3] object pointer obtained +** from [sqlite3_open()], [sqlite3_open16()], or +** [sqlite3_open_v2()], and not previously closed. +** ^Calling sqlite3_close() or sqlite3_close_v2() with a NULL pointer +** argument is a harmless no-op. +*/ +SQLITE_API int sqlite3_close(sqlite3*); +SQLITE_API int sqlite3_close_v2(sqlite3*); + +/* +** The type for a callback function. +** This is legacy and deprecated. It is included for historical +** compatibility and is not documented. +*/ +typedef int (*sqlite3_callback)(void*,int,char**, char**); + +/* +** CAPI3REF: One-Step Query Execution Interface +** METHOD: sqlite3 +** +** The sqlite3_exec() interface is a convenience wrapper around +** [sqlite3_prepare_v2()], [sqlite3_step()], and [sqlite3_finalize()], +** that allows an application to run multiple statements of SQL +** without having to use a lot of C code. +** +** ^The sqlite3_exec() interface runs zero or more UTF-8 encoded, +** semicolon-separate SQL statements passed into its 2nd argument, +** in the context of the [database connection] passed in as its 1st +** argument. ^If the callback function of the 3rd argument to +** sqlite3_exec() is not NULL, then it is invoked for each result row +** coming out of the evaluated SQL statements. ^The 4th argument to +** sqlite3_exec() is relayed through to the 1st argument of each +** callback invocation. ^If the callback pointer to sqlite3_exec() +** is NULL, then no callback is ever invoked and result rows are +** ignored. +** +** ^If an error occurs while evaluating the SQL statements passed into +** sqlite3_exec(), then execution of the current statement stops and +** subsequent statements are skipped. ^If the 5th parameter to sqlite3_exec() +** is not NULL then any error message is written into memory obtained +** from [sqlite3_malloc()] and passed back through the 5th parameter. +** To avoid memory leaks, the application should invoke [sqlite3_free()] +** on error message strings returned through the 5th parameter of +** sqlite3_exec() after the error message string is no longer needed. +** ^If the 5th parameter to sqlite3_exec() is not NULL and no errors +** occur, then sqlite3_exec() sets the pointer in its 5th parameter to +** NULL before returning. +** +** ^If an sqlite3_exec() callback returns non-zero, the sqlite3_exec() +** routine returns SQLITE_ABORT without invoking the callback again and +** without running any subsequent SQL statements. +** +** ^The 2nd argument to the sqlite3_exec() callback function is the +** number of columns in the result. ^The 3rd argument to the sqlite3_exec() +** callback is an array of pointers to strings obtained as if from +** [sqlite3_column_text()], one for each column. ^If an element of a +** result row is NULL then the corresponding string pointer for the +** sqlite3_exec() callback is a NULL pointer. ^The 4th argument to the +** sqlite3_exec() callback is an array of pointers to strings where each +** entry represents the name of corresponding result column as obtained +** from [sqlite3_column_name()]. +** +** ^If the 2nd parameter to sqlite3_exec() is a NULL pointer, a pointer +** to an empty string, or a pointer that contains only whitespace and/or +** SQL comments, then no SQL statements are evaluated and the database +** is not changed. +** +** Restrictions: +** +**
    +**
  • The application must ensure that the 1st parameter to sqlite3_exec() +** is a valid and open [database connection]. +**
  • The application must not close the [database connection] specified by +** the 1st parameter to sqlite3_exec() while sqlite3_exec() is running. +**
  • The application must not modify the SQL statement text passed into +** the 2nd parameter of sqlite3_exec() while sqlite3_exec() is running. +**
+*/ +SQLITE_API int sqlite3_exec( + sqlite3*, /* An open database */ + const char *sql, /* SQL to be evaluated */ + int (*callback)(void*,int,char**,char**), /* Callback function */ + void *, /* 1st argument to callback */ + char **errmsg /* Error msg written here */ +); + +/* +** CAPI3REF: Result Codes +** KEYWORDS: {result code definitions} +** +** Many SQLite functions return an integer result code from the set shown +** here in order to indicate success or failure. +** +** New error codes may be added in future versions of SQLite. +** +** See also: [extended result code definitions] +*/ +#define SQLITE_OK 0 /* Successful result */ +/* beginning-of-error-codes */ +#define SQLITE_ERROR 1 /* Generic error */ +#define SQLITE_INTERNAL 2 /* Internal logic error in SQLite */ +#define SQLITE_PERM 3 /* Access permission denied */ +#define SQLITE_ABORT 4 /* Callback routine requested an abort */ +#define SQLITE_BUSY 5 /* The database file is locked */ +#define SQLITE_LOCKED 6 /* A table in the database is locked */ +#define SQLITE_NOMEM 7 /* A malloc() failed */ +#define SQLITE_READONLY 8 /* Attempt to write a readonly database */ +#define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/ +#define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */ +#define SQLITE_CORRUPT 11 /* The database disk image is malformed */ +#define SQLITE_NOTFOUND 12 /* Unknown opcode in sqlite3_file_control() */ +#define SQLITE_FULL 13 /* Insertion failed because database is full */ +#define SQLITE_CANTOPEN 14 /* Unable to open the database file */ +#define SQLITE_PROTOCOL 15 /* Database lock protocol error */ +#define SQLITE_EMPTY 16 /* Internal use only */ +#define SQLITE_SCHEMA 17 /* The database schema changed */ +#define SQLITE_TOOBIG 18 /* String or BLOB exceeds size limit */ +#define SQLITE_CONSTRAINT 19 /* Abort due to constraint violation */ +#define SQLITE_MISMATCH 20 /* Data type mismatch */ +#define SQLITE_MISUSE 21 /* Library used incorrectly */ +#define SQLITE_NOLFS 22 /* Uses OS features not supported on host */ +#define SQLITE_AUTH 23 /* Authorization denied */ +#define SQLITE_FORMAT 24 /* Not used */ +#define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */ +#define SQLITE_NOTADB 26 /* File opened that is not a database file */ +#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */ +#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */ +#define SQLITE_ROW 100 /* sqlite3_step() has another row ready */ +#define SQLITE_DONE 101 /* sqlite3_step() has finished executing */ +/* end-of-error-codes */ + +/* +** CAPI3REF: Extended Result Codes +** KEYWORDS: {extended result code definitions} +** +** In its default configuration, SQLite API routines return one of 30 integer +** [result codes]. However, experience has shown that many of +** these result codes are too coarse-grained. They do not provide as +** much information about problems as programmers might like. In an effort to +** address this, newer versions of SQLite (version 3.3.8 [dateof:3.3.8] +** and later) include +** support for additional result codes that provide more detailed information +** about errors. These [extended result codes] are enabled or disabled +** on a per database connection basis using the +** [sqlite3_extended_result_codes()] API. Or, the extended code for +** the most recent error can be obtained using +** [sqlite3_extended_errcode()]. +*/ +#define SQLITE_ERROR_MISSING_COLLSEQ (SQLITE_ERROR | (1<<8)) +#define SQLITE_ERROR_RETRY (SQLITE_ERROR | (2<<8)) +#define SQLITE_ERROR_SNAPSHOT (SQLITE_ERROR | (3<<8)) +#define SQLITE_IOERR_READ (SQLITE_IOERR | (1<<8)) +#define SQLITE_IOERR_SHORT_READ (SQLITE_IOERR | (2<<8)) +#define SQLITE_IOERR_WRITE (SQLITE_IOERR | (3<<8)) +#define SQLITE_IOERR_FSYNC (SQLITE_IOERR | (4<<8)) +#define SQLITE_IOERR_DIR_FSYNC (SQLITE_IOERR | (5<<8)) +#define SQLITE_IOERR_TRUNCATE (SQLITE_IOERR | (6<<8)) +#define SQLITE_IOERR_FSTAT (SQLITE_IOERR | (7<<8)) +#define SQLITE_IOERR_UNLOCK (SQLITE_IOERR | (8<<8)) +#define SQLITE_IOERR_RDLOCK (SQLITE_IOERR | (9<<8)) +#define SQLITE_IOERR_DELETE (SQLITE_IOERR | (10<<8)) +#define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8)) +#define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8)) +#define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8)) +#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8)) +#define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) +#define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) +#define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) +#define SQLITE_IOERR_SHMOPEN (SQLITE_IOERR | (18<<8)) +#define SQLITE_IOERR_SHMSIZE (SQLITE_IOERR | (19<<8)) +#define SQLITE_IOERR_SHMLOCK (SQLITE_IOERR | (20<<8)) +#define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8)) +#define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8)) +#define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8)) +#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8)) +#define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8)) +#define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8)) +#define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8)) +#define SQLITE_IOERR_AUTH (SQLITE_IOERR | (28<<8)) +#define SQLITE_IOERR_BEGIN_ATOMIC (SQLITE_IOERR | (29<<8)) +#define SQLITE_IOERR_COMMIT_ATOMIC (SQLITE_IOERR | (30<<8)) +#define SQLITE_IOERR_ROLLBACK_ATOMIC (SQLITE_IOERR | (31<<8)) +#define SQLITE_IOERR_DATA (SQLITE_IOERR | (32<<8)) +#define SQLITE_IOERR_CORRUPTFS (SQLITE_IOERR | (33<<8)) +#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) +#define SQLITE_LOCKED_VTAB (SQLITE_LOCKED | (2<<8)) +#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) +#define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8)) +#define SQLITE_BUSY_TIMEOUT (SQLITE_BUSY | (3<<8)) +#define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) +#define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8)) +#define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8)) +#define SQLITE_CANTOPEN_CONVPATH (SQLITE_CANTOPEN | (4<<8)) +#define SQLITE_CANTOPEN_DIRTYWAL (SQLITE_CANTOPEN | (5<<8)) /* Not Used */ +#define SQLITE_CANTOPEN_SYMLINK (SQLITE_CANTOPEN | (6<<8)) +#define SQLITE_CORRUPT_VTAB (SQLITE_CORRUPT | (1<<8)) +#define SQLITE_CORRUPT_SEQUENCE (SQLITE_CORRUPT | (2<<8)) +#define SQLITE_CORRUPT_INDEX (SQLITE_CORRUPT | (3<<8)) +#define SQLITE_READONLY_RECOVERY (SQLITE_READONLY | (1<<8)) +#define SQLITE_READONLY_CANTLOCK (SQLITE_READONLY | (2<<8)) +#define SQLITE_READONLY_ROLLBACK (SQLITE_READONLY | (3<<8)) +#define SQLITE_READONLY_DBMOVED (SQLITE_READONLY | (4<<8)) +#define SQLITE_READONLY_CANTINIT (SQLITE_READONLY | (5<<8)) +#define SQLITE_READONLY_DIRECTORY (SQLITE_READONLY | (6<<8)) +#define SQLITE_ABORT_ROLLBACK (SQLITE_ABORT | (2<<8)) +#define SQLITE_CONSTRAINT_CHECK (SQLITE_CONSTRAINT | (1<<8)) +#define SQLITE_CONSTRAINT_COMMITHOOK (SQLITE_CONSTRAINT | (2<<8)) +#define SQLITE_CONSTRAINT_FOREIGNKEY (SQLITE_CONSTRAINT | (3<<8)) +#define SQLITE_CONSTRAINT_FUNCTION (SQLITE_CONSTRAINT | (4<<8)) +#define SQLITE_CONSTRAINT_NOTNULL (SQLITE_CONSTRAINT | (5<<8)) +#define SQLITE_CONSTRAINT_PRIMARYKEY (SQLITE_CONSTRAINT | (6<<8)) +#define SQLITE_CONSTRAINT_TRIGGER (SQLITE_CONSTRAINT | (7<<8)) +#define SQLITE_CONSTRAINT_UNIQUE (SQLITE_CONSTRAINT | (8<<8)) +#define SQLITE_CONSTRAINT_VTAB (SQLITE_CONSTRAINT | (9<<8)) +#define SQLITE_CONSTRAINT_ROWID (SQLITE_CONSTRAINT |(10<<8)) +#define SQLITE_CONSTRAINT_PINNED (SQLITE_CONSTRAINT |(11<<8)) +#define SQLITE_CONSTRAINT_DATATYPE (SQLITE_CONSTRAINT |(12<<8)) +#define SQLITE_NOTICE_RECOVER_WAL (SQLITE_NOTICE | (1<<8)) +#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8)) +#define SQLITE_WARNING_AUTOINDEX (SQLITE_WARNING | (1<<8)) +#define SQLITE_AUTH_USER (SQLITE_AUTH | (1<<8)) +#define SQLITE_OK_LOAD_PERMANENTLY (SQLITE_OK | (1<<8)) +#define SQLITE_OK_SYMLINK (SQLITE_OK | (2<<8)) /* internal use only */ + +/* +** CAPI3REF: Flags For File Open Operations +** +** These bit values are intended for use in the +** 3rd parameter to the [sqlite3_open_v2()] interface and +** in the 4th parameter to the [sqlite3_vfs.xOpen] method. +** +** Only those flags marked as "Ok for sqlite3_open_v2()" may be +** used as the third argument to the [sqlite3_open_v2()] interface. +** The other flags have historically been ignored by sqlite3_open_v2(), +** though future versions of SQLite might change so that an error is +** raised if any of the disallowed bits are passed into sqlite3_open_v2(). +** Applications should not depend on the historical behavior. +** +** Note in particular that passing the SQLITE_OPEN_EXCLUSIVE flag into +** [sqlite3_open_v2()] does *not* cause the underlying database file +** to be opened using O_EXCL. Passing SQLITE_OPEN_EXCLUSIVE into +** [sqlite3_open_v2()] has historically be a no-op and might become an +** error in future versions of SQLite. +*/ +#define SQLITE_OPEN_READONLY 0x00000001 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_READWRITE 0x00000002 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_CREATE 0x00000004 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_DELETEONCLOSE 0x00000008 /* VFS only */ +#define SQLITE_OPEN_EXCLUSIVE 0x00000010 /* VFS only */ +#define SQLITE_OPEN_AUTOPROXY 0x00000020 /* VFS only */ +#define SQLITE_OPEN_URI 0x00000040 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_MEMORY 0x00000080 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_MAIN_DB 0x00000100 /* VFS only */ +#define SQLITE_OPEN_TEMP_DB 0x00000200 /* VFS only */ +#define SQLITE_OPEN_TRANSIENT_DB 0x00000400 /* VFS only */ +#define SQLITE_OPEN_MAIN_JOURNAL 0x00000800 /* VFS only */ +#define SQLITE_OPEN_TEMP_JOURNAL 0x00001000 /* VFS only */ +#define SQLITE_OPEN_SUBJOURNAL 0x00002000 /* VFS only */ +#define SQLITE_OPEN_SUPER_JOURNAL 0x00004000 /* VFS only */ +#define SQLITE_OPEN_NOMUTEX 0x00008000 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_FULLMUTEX 0x00010000 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_SHAREDCACHE 0x00020000 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_PRIVATECACHE 0x00040000 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_WAL 0x00080000 /* VFS only */ +#define SQLITE_OPEN_NOFOLLOW 0x01000000 /* Ok for sqlite3_open_v2() */ +#define SQLITE_OPEN_EXRESCODE 0x02000000 /* Extended result codes */ + +/* Reserved: 0x00F00000 */ +/* Legacy compatibility: */ +#define SQLITE_OPEN_MASTER_JOURNAL 0x00004000 /* VFS only */ + + +/* +** CAPI3REF: Device Characteristics +** +** The xDeviceCharacteristics method of the [sqlite3_io_methods] +** object returns an integer which is a vector of these +** bit values expressing I/O characteristics of the mass storage +** device that holds the file that the [sqlite3_io_methods] +** refers to. +** +** The SQLITE_IOCAP_ATOMIC property means that all writes of +** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values +** mean that writes of blocks that are nnn bytes in size and +** are aligned to an address which is an integer multiple of +** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means +** that when data is appended to a file, the data is appended +** first then the size of the file is extended, never the other +** way around. The SQLITE_IOCAP_SEQUENTIAL property means that +** information is written to disk in the same order as calls +** to xWrite(). The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that +** after reboot following a crash or power loss, the only bytes in a +** file that were written at the application level might have changed +** and that adjacent bytes, even bytes within the same sector are +** guaranteed to be unchanged. The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN +** flag indicates that a file cannot be deleted when open. The +** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on +** read-only media and cannot be changed even by processes with +** elevated privileges. +** +** The SQLITE_IOCAP_BATCH_ATOMIC property means that the underlying +** filesystem supports doing multiple write operations atomically when those +** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and +** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. +*/ +#define SQLITE_IOCAP_ATOMIC 0x00000001 +#define SQLITE_IOCAP_ATOMIC512 0x00000002 +#define SQLITE_IOCAP_ATOMIC1K 0x00000004 +#define SQLITE_IOCAP_ATOMIC2K 0x00000008 +#define SQLITE_IOCAP_ATOMIC4K 0x00000010 +#define SQLITE_IOCAP_ATOMIC8K 0x00000020 +#define SQLITE_IOCAP_ATOMIC16K 0x00000040 +#define SQLITE_IOCAP_ATOMIC32K 0x00000080 +#define SQLITE_IOCAP_ATOMIC64K 0x00000100 +#define SQLITE_IOCAP_SAFE_APPEND 0x00000200 +#define SQLITE_IOCAP_SEQUENTIAL 0x00000400 +#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 +#define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 +#define SQLITE_IOCAP_IMMUTABLE 0x00002000 +#define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 + +/* +** CAPI3REF: File Locking Levels +** +** SQLite uses one of these integer values as the second +** argument to calls it makes to the xLock() and xUnlock() methods +** of an [sqlite3_io_methods] object. +*/ +#define SQLITE_LOCK_NONE 0 +#define SQLITE_LOCK_SHARED 1 +#define SQLITE_LOCK_RESERVED 2 +#define SQLITE_LOCK_PENDING 3 +#define SQLITE_LOCK_EXCLUSIVE 4 + +/* +** CAPI3REF: Synchronization Type Flags +** +** When SQLite invokes the xSync() method of an +** [sqlite3_io_methods] object it uses a combination of +** these integer values as the second argument. +** +** When the SQLITE_SYNC_DATAONLY flag is used, it means that the +** sync operation only needs to flush data to mass storage. Inode +** information need not be flushed. If the lower four bits of the flag +** equal SQLITE_SYNC_NORMAL, that means to use normal fsync() semantics. +** If the lower four bits equal SQLITE_SYNC_FULL, that means +** to use Mac OS X style fullsync instead of fsync(). +** +** Do not confuse the SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags +** with the [PRAGMA synchronous]=NORMAL and [PRAGMA synchronous]=FULL +** settings. The [synchronous pragma] determines when calls to the +** xSync VFS method occur and applies uniformly across all platforms. +** The SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags determine how +** energetic or rigorous or forceful the sync operations are and +** only make a difference on Mac OSX for the default SQLite code. +** (Third-party VFS implementations might also make the distinction +** between SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL, but among the +** operating systems natively supported by SQLite, only Mac OSX +** cares about the difference.) +*/ +#define SQLITE_SYNC_NORMAL 0x00002 +#define SQLITE_SYNC_FULL 0x00003 +#define SQLITE_SYNC_DATAONLY 0x00010 + +/* +** CAPI3REF: OS Interface Open File Handle +** +** An [sqlite3_file] object represents an open file in the +** [sqlite3_vfs | OS interface layer]. Individual OS interface +** implementations will +** want to subclass this object by appending additional fields +** for their own use. The pMethods entry is a pointer to an +** [sqlite3_io_methods] object that defines methods for performing +** I/O operations on the open file. +*/ +typedef struct sqlite3_file sqlite3_file; +struct sqlite3_file { + const struct sqlite3_io_methods *pMethods; /* Methods for an open file */ +}; + +/* +** CAPI3REF: OS Interface File Virtual Methods Object +** +** Every file opened by the [sqlite3_vfs.xOpen] method populates an +** [sqlite3_file] object (or, more commonly, a subclass of the +** [sqlite3_file] object) with a pointer to an instance of this object. +** This object defines the methods used to perform various operations +** against the open file represented by the [sqlite3_file] object. +** +** If the [sqlite3_vfs.xOpen] method sets the sqlite3_file.pMethods element +** to a non-NULL pointer, then the sqlite3_io_methods.xClose method +** may be invoked even if the [sqlite3_vfs.xOpen] reported that it failed. The +** only way to prevent a call to xClose following a failed [sqlite3_vfs.xOpen] +** is for the [sqlite3_vfs.xOpen] to set the sqlite3_file.pMethods element +** to NULL. +** +** The flags argument to xSync may be one of [SQLITE_SYNC_NORMAL] or +** [SQLITE_SYNC_FULL]. The first choice is the normal fsync(). +** The second choice is a Mac OS X style fullsync. The [SQLITE_SYNC_DATAONLY] +** flag may be ORed in to indicate that only the data of the file +** and not its inode needs to be synced. +** +** The integer values to xLock() and xUnlock() are one of +**
    +**
  • [SQLITE_LOCK_NONE], +**
  • [SQLITE_LOCK_SHARED], +**
  • [SQLITE_LOCK_RESERVED], +**
  • [SQLITE_LOCK_PENDING], or +**
  • [SQLITE_LOCK_EXCLUSIVE]. +**
+** xLock() increases the lock. xUnlock() decreases the lock. +** The xCheckReservedLock() method checks whether any database connection, +** either in this process or in some other process, is holding a RESERVED, +** PENDING, or EXCLUSIVE lock on the file. It returns true +** if such a lock exists and false otherwise. +** +** The xFileControl() method is a generic interface that allows custom +** VFS implementations to directly control an open file using the +** [sqlite3_file_control()] interface. The second "op" argument is an +** integer opcode. The third argument is a generic pointer intended to +** point to a structure that may contain arguments or space in which to +** write return values. Potential uses for xFileControl() might be +** functions to enable blocking locks with timeouts, to change the +** locking strategy (for example to use dot-file locks), to inquire +** about the status of a lock, or to break stale locks. The SQLite +** core reserves all opcodes less than 100 for its own use. +** A [file control opcodes | list of opcodes] less than 100 is available. +** Applications that define a custom xFileControl method should use opcodes +** greater than 100 to avoid conflicts. VFS implementations should +** return [SQLITE_NOTFOUND] for file control opcodes that they do not +** recognize. +** +** The xSectorSize() method returns the sector size of the +** device that underlies the file. The sector size is the +** minimum write that can be performed without disturbing +** other bytes in the file. The xDeviceCharacteristics() +** method returns a bit vector describing behaviors of the +** underlying device: +** +**
    +**
  • [SQLITE_IOCAP_ATOMIC] +**
  • [SQLITE_IOCAP_ATOMIC512] +**
  • [SQLITE_IOCAP_ATOMIC1K] +**
  • [SQLITE_IOCAP_ATOMIC2K] +**
  • [SQLITE_IOCAP_ATOMIC4K] +**
  • [SQLITE_IOCAP_ATOMIC8K] +**
  • [SQLITE_IOCAP_ATOMIC16K] +**
  • [SQLITE_IOCAP_ATOMIC32K] +**
  • [SQLITE_IOCAP_ATOMIC64K] +**
  • [SQLITE_IOCAP_SAFE_APPEND] +**
  • [SQLITE_IOCAP_SEQUENTIAL] +**
  • [SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN] +**
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] +**
  • [SQLITE_IOCAP_IMMUTABLE] +**
  • [SQLITE_IOCAP_BATCH_ATOMIC] +**
+** +** The SQLITE_IOCAP_ATOMIC property means that all writes of +** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values +** mean that writes of blocks that are nnn bytes in size and +** are aligned to an address which is an integer multiple of +** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means +** that when data is appended to a file, the data is appended +** first then the size of the file is extended, never the other +** way around. The SQLITE_IOCAP_SEQUENTIAL property means that +** information is written to disk in the same order as calls +** to xWrite(). +** +** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill +** in the unread portions of the buffer with zeros. A VFS that +** fails to zero-fill short reads might seem to work. However, +** failure to zero-fill short reads will eventually lead to +** database corruption. +*/ +typedef struct sqlite3_io_methods sqlite3_io_methods; +struct sqlite3_io_methods { + int iVersion; + int (*xClose)(sqlite3_file*); + int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); + int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst); + int (*xTruncate)(sqlite3_file*, sqlite3_int64 size); + int (*xSync)(sqlite3_file*, int flags); + int (*xFileSize)(sqlite3_file*, sqlite3_int64 *pSize); + int (*xLock)(sqlite3_file*, int); + int (*xUnlock)(sqlite3_file*, int); + int (*xCheckReservedLock)(sqlite3_file*, int *pResOut); + int (*xFileControl)(sqlite3_file*, int op, void *pArg); + int (*xSectorSize)(sqlite3_file*); + int (*xDeviceCharacteristics)(sqlite3_file*); + /* Methods above are valid for version 1 */ + int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**); + int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); + void (*xShmBarrier)(sqlite3_file*); + int (*xShmUnmap)(sqlite3_file*, int deleteFlag); + /* Methods above are valid for version 2 */ + int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); + int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); + /* Methods above are valid for version 3 */ + /* Additional methods may be added in future releases */ +}; + +/* +** CAPI3REF: Standard File Control Opcodes +** KEYWORDS: {file control opcodes} {file control opcode} +** +** These integer constants are opcodes for the xFileControl method +** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()] +** interface. +** +**
    +**
  • [[SQLITE_FCNTL_LOCKSTATE]] +** The [SQLITE_FCNTL_LOCKSTATE] opcode is used for debugging. This +** opcode causes the xFileControl method to write the current state of +** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED], +** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE]) +** into an integer that the pArg argument points to. This capability +** is used during testing and is only available when the SQLITE_TEST +** compile-time option is used. +** +**
  • [[SQLITE_FCNTL_SIZE_HINT]] +** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS +** layer a hint of how large the database file will grow to be during the +** current transaction. This hint is not guaranteed to be accurate but it +** is often close. The underlying VFS might choose to preallocate database +** file space based on this hint in order to help writes to the database +** file run faster. +** +**
  • [[SQLITE_FCNTL_SIZE_LIMIT]] +** The [SQLITE_FCNTL_SIZE_LIMIT] opcode is used by in-memory VFS that +** implements [sqlite3_deserialize()] to set an upper bound on the size +** of the in-memory database. The argument is a pointer to a [sqlite3_int64]. +** If the integer pointed to is negative, then it is filled in with the +** current limit. Otherwise the limit is set to the larger of the value +** of the integer pointed to and the current database size. The integer +** pointed to is set to the new limit. +** +**
  • [[SQLITE_FCNTL_CHUNK_SIZE]] +** The [SQLITE_FCNTL_CHUNK_SIZE] opcode is used to request that the VFS +** extends and truncates the database file in chunks of a size specified +** by the user. The fourth argument to [sqlite3_file_control()] should +** point to an integer (type int) containing the new chunk-size to use +** for the nominated database. Allocating database file space in large +** chunks (say 1MB at a time), may reduce file-system fragmentation and +** improve performance on some systems. +** +**
  • [[SQLITE_FCNTL_FILE_POINTER]] +** The [SQLITE_FCNTL_FILE_POINTER] opcode is used to obtain a pointer +** to the [sqlite3_file] object associated with a particular database +** connection. See also [SQLITE_FCNTL_JOURNAL_POINTER]. +** +**
  • [[SQLITE_FCNTL_JOURNAL_POINTER]] +** The [SQLITE_FCNTL_JOURNAL_POINTER] opcode is used to obtain a pointer +** to the [sqlite3_file] object associated with the journal file (either +** the [rollback journal] or the [write-ahead log]) for a particular database +** connection. See also [SQLITE_FCNTL_FILE_POINTER]. +** +**
  • [[SQLITE_FCNTL_SYNC_OMITTED]] +** No longer in use. +** +**
  • [[SQLITE_FCNTL_SYNC]] +** The [SQLITE_FCNTL_SYNC] opcode is generated internally by SQLite and +** sent to the VFS immediately before the xSync method is invoked on a +** database file descriptor. Or, if the xSync method is not invoked +** because the user has configured SQLite with +** [PRAGMA synchronous | PRAGMA synchronous=OFF] it is invoked in place +** of the xSync method. In most cases, the pointer argument passed with +** this file-control is NULL. However, if the database file is being synced +** as part of a multi-database commit, the argument points to a nul-terminated +** string containing the transactions super-journal file name. VFSes that +** do not need this signal should silently ignore this opcode. Applications +** should not call [sqlite3_file_control()] with this opcode as doing so may +** disrupt the operation of the specialized VFSes that do require it. +** +**
  • [[SQLITE_FCNTL_COMMIT_PHASETWO]] +** The [SQLITE_FCNTL_COMMIT_PHASETWO] opcode is generated internally by SQLite +** and sent to the VFS after a transaction has been committed immediately +** but before the database is unlocked. VFSes that do not need this signal +** should silently ignore this opcode. Applications should not call +** [sqlite3_file_control()] with this opcode as doing so may disrupt the +** operation of the specialized VFSes that do require it. +** +**
  • [[SQLITE_FCNTL_WIN32_AV_RETRY]] +** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic +** retry counts and intervals for certain disk I/O operations for the +** windows [VFS] in order to provide robustness in the presence of +** anti-virus programs. By default, the windows VFS will retry file read, +** file write, and file delete operations up to 10 times, with a delay +** of 25 milliseconds before the first retry and with the delay increasing +** by an additional 25 milliseconds with each subsequent retry. This +** opcode allows these two values (10 retries and 25 milliseconds of delay) +** to be adjusted. The values are changed for all database connections +** within the same process. The argument is a pointer to an array of two +** integers where the first integer is the new retry count and the second +** integer is the delay. If either integer is negative, then the setting +** is not changed but instead the prior value of that setting is written +** into the array entry, allowing the current retry settings to be +** interrogated. The zDbName parameter is ignored. +** +**
  • [[SQLITE_FCNTL_PERSIST_WAL]] +** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the +** persistent [WAL | Write Ahead Log] setting. By default, the auxiliary +** write ahead log ([WAL file]) and shared memory +** files used for transaction control +** are automatically deleted when the latest connection to the database +** closes. Setting persistent WAL mode causes those files to persist after +** close. Persisting the files is useful when other processes that do not +** have write permission on the directory containing the database file want +** to read the database file, as the WAL and shared memory files must exist +** in order for the database to be readable. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable persistent WAL mode or 1 to enable persistent +** WAL mode. If the integer is -1, then it is overwritten with the current +** WAL persistence setting. +** +**
  • [[SQLITE_FCNTL_POWERSAFE_OVERWRITE]] +** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the +** persistent "powersafe-overwrite" or "PSOW" setting. The PSOW setting +** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the +** xDeviceCharacteristics methods. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage +** mode. If the integer is -1, then it is overwritten with the current +** zero-damage mode setting. +** +**
  • [[SQLITE_FCNTL_OVERWRITE]] +** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening +** a write transaction to indicate that, unless it is rolled back for some +** reason, the entire database file will be overwritten by the current +** transaction. This is used by VACUUM operations. +** +**
  • [[SQLITE_FCNTL_VFSNAME]] +** ^The [SQLITE_FCNTL_VFSNAME] opcode can be used to obtain the names of +** all [VFSes] in the VFS stack. The names are of all VFS shims and the +** final bottom-level VFS are written into memory obtained from +** [sqlite3_malloc()] and the result is stored in the char* variable +** that the fourth parameter of [sqlite3_file_control()] points to. +** The caller is responsible for freeing the memory when done. As with +** all file-control actions, there is no guarantee that this will actually +** do anything. Callers should initialize the char* variable to a NULL +** pointer in case this file-control is not implemented. This file-control +** is intended for diagnostic use only. +** +**
  • [[SQLITE_FCNTL_VFS_POINTER]] +** ^The [SQLITE_FCNTL_VFS_POINTER] opcode finds a pointer to the top-level +** [VFSes] currently in use. ^(The argument X in +** sqlite3_file_control(db,SQLITE_FCNTL_VFS_POINTER,X) must be +** of type "[sqlite3_vfs] **". This opcodes will set *X +** to a pointer to the top-level VFS.)^ +** ^When there are multiple VFS shims in the stack, this opcode finds the +** upper-most shim only. +** +**
  • [[SQLITE_FCNTL_PRAGMA]] +** ^Whenever a [PRAGMA] statement is parsed, an [SQLITE_FCNTL_PRAGMA] +** file control is sent to the open [sqlite3_file] object corresponding +** to the database file to which the pragma statement refers. ^The argument +** to the [SQLITE_FCNTL_PRAGMA] file control is an array of +** pointers to strings (char**) in which the second element of the array +** is the name of the pragma and the third element is the argument to the +** pragma or NULL if the pragma has no argument. ^The handler for an +** [SQLITE_FCNTL_PRAGMA] file control can optionally make the first element +** of the char** argument point to a string obtained from [sqlite3_mprintf()] +** or the equivalent and that string will become the result of the pragma or +** the error message if the pragma fails. ^If the +** [SQLITE_FCNTL_PRAGMA] file control returns [SQLITE_NOTFOUND], then normal +** [PRAGMA] processing continues. ^If the [SQLITE_FCNTL_PRAGMA] +** file control returns [SQLITE_OK], then the parser assumes that the +** VFS has handled the PRAGMA itself and the parser generates a no-op +** prepared statement if result string is NULL, or that returns a copy +** of the result string if the string is non-NULL. +** ^If the [SQLITE_FCNTL_PRAGMA] file control returns +** any result code other than [SQLITE_OK] or [SQLITE_NOTFOUND], that means +** that the VFS encountered an error while handling the [PRAGMA] and the +** compilation of the PRAGMA fails with an error. ^The [SQLITE_FCNTL_PRAGMA] +** file control occurs at the beginning of pragma statement analysis and so +** it is able to override built-in [PRAGMA] statements. +** +**
  • [[SQLITE_FCNTL_BUSYHANDLER]] +** ^The [SQLITE_FCNTL_BUSYHANDLER] +** file-control may be invoked by SQLite on the database file handle +** shortly after it is opened in order to provide a custom VFS with access +** to the connection's busy-handler callback. The argument is of type (void**) +** - an array of two (void *) values. The first (void *) actually points +** to a function of type (int (*)(void *)). In order to invoke the connection's +** busy-handler, this function should be invoked with the second (void *) in +** the array as the only argument. If it returns non-zero, then the operation +** should be retried. If it returns zero, the custom VFS should abandon the +** current operation. +** +**
  • [[SQLITE_FCNTL_TEMPFILENAME]] +** ^Applications can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control +** to have SQLite generate a +** temporary filename using the same algorithm that is followed to generate +** temporary filenames for TEMP tables and other internal uses. The +** argument should be a char** which will be filled with the filename +** written into memory obtained from [sqlite3_malloc()]. The caller should +** invoke [sqlite3_free()] on the result to avoid a memory leak. +** +**
  • [[SQLITE_FCNTL_MMAP_SIZE]] +** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the +** maximum number of bytes that will be used for memory-mapped I/O. +** The argument is a pointer to a value of type sqlite3_int64 that +** is an advisory maximum number of bytes in the file to memory map. The +** pointer is overwritten with the old value. The limit is not changed if +** the value originally pointed to is negative, and so the current limit +** can be queried by passing in a pointer to a negative number. This +** file-control is used internally to implement [PRAGMA mmap_size]. +** +**
  • [[SQLITE_FCNTL_TRACE]] +** The [SQLITE_FCNTL_TRACE] file control provides advisory information +** to the VFS about what the higher layers of the SQLite stack are doing. +** This file control is used by some VFS activity tracing [shims]. +** The argument is a zero-terminated string. Higher layers in the +** SQLite stack may generate instances of this file control if +** the [SQLITE_USE_FCNTL_TRACE] compile-time option is enabled. +** +**
  • [[SQLITE_FCNTL_HAS_MOVED]] +** The [SQLITE_FCNTL_HAS_MOVED] file control interprets its argument as a +** pointer to an integer and it writes a boolean into that integer depending +** on whether or not the file has been renamed, moved, or deleted since it +** was first opened. +** +**
  • [[SQLITE_FCNTL_WIN32_GET_HANDLE]] +** The [SQLITE_FCNTL_WIN32_GET_HANDLE] opcode can be used to obtain the +** underlying native file handle associated with a file handle. This file +** control interprets its argument as a pointer to a native file handle and +** writes the resulting value there. +** +**
  • [[SQLITE_FCNTL_WIN32_SET_HANDLE]] +** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging. This +** opcode causes the xFileControl method to swap the file handle with the one +** pointed to by the pArg argument. This capability is used during testing +** and only needs to be supported when SQLITE_TEST is defined. +** +**
  • [[SQLITE_FCNTL_WAL_BLOCK]] +** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might +** be advantageous to block on the next WAL lock if the lock is not immediately +** available. The WAL subsystem issues this signal during rare +** circumstances in order to fix a problem with priority inversion. +** Applications should not use this file-control. +** +**
  • [[SQLITE_FCNTL_ZIPVFS]] +** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other +** VFS should return SQLITE_NOTFOUND for this opcode. +** +**
  • [[SQLITE_FCNTL_RBU]] +** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by +** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for +** this opcode. +** +**
  • [[SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]] +** If the [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] opcode returns SQLITE_OK, then +** the file descriptor is placed in "batch write mode", which +** means all subsequent write operations will be deferred and done +** atomically at the next [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. Systems +** that do not support batch atomic writes will return SQLITE_NOTFOUND. +** ^Following a successful SQLITE_FCNTL_BEGIN_ATOMIC_WRITE and prior to +** the closing [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE] or +** [SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE], SQLite will make +** no VFS interface calls on the same [sqlite3_file] file descriptor +** except for calls to the xWrite method and the xFileControl method +** with [SQLITE_FCNTL_SIZE_HINT]. +** +**
  • [[SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]] +** The [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE] opcode causes all write +** operations since the previous successful call to +** [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] to be performed atomically. +** This file control returns [SQLITE_OK] if and only if the writes were +** all performed successfully and have been committed to persistent storage. +** ^Regardless of whether or not it is successful, this file control takes +** the file descriptor out of batch write mode so that all subsequent +** write operations are independent. +** ^SQLite will never invoke SQLITE_FCNTL_COMMIT_ATOMIC_WRITE without +** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]. +** +**
  • [[SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE]] +** The [SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE] opcode causes all write +** operations since the previous successful call to +** [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] to be rolled back. +** ^This file control takes the file descriptor out of batch write mode +** so that all subsequent write operations are independent. +** ^SQLite will never invoke SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE without +** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]. +** +**
  • [[SQLITE_FCNTL_LOCK_TIMEOUT]] +** The [SQLITE_FCNTL_LOCK_TIMEOUT] opcode is used to configure a VFS +** to block for up to M milliseconds before failing when attempting to +** obtain a file lock using the xLock or xShmLock methods of the VFS. +** The parameter is a pointer to a 32-bit signed integer that contains +** the value that M is to be set to. Before returning, the 32-bit signed +** integer is overwritten with the previous value of M. +** +**
  • [[SQLITE_FCNTL_DATA_VERSION]] +** The [SQLITE_FCNTL_DATA_VERSION] opcode is used to detect changes to +** a database file. The argument is a pointer to a 32-bit unsigned integer. +** The "data version" for the pager is written into the pointer. The +** "data version" changes whenever any change occurs to the corresponding +** database file, either through SQL statements on the same database +** connection or through transactions committed by separate database +** connections possibly in other processes. The [sqlite3_total_changes()] +** interface can be used to find if any database on the connection has changed, +** but that interface responds to changes on TEMP as well as MAIN and does +** not provide a mechanism to detect changes to MAIN only. Also, the +** [sqlite3_total_changes()] interface responds to internal changes only and +** omits changes made by other database connections. The +** [PRAGMA data_version] command provides a mechanism to detect changes to +** a single attached database that occur due to other database connections, +** but omits changes implemented by the database connection on which it is +** called. This file control is the only mechanism to detect changes that +** happen either internally or externally and that are associated with +** a particular attached database. +** +**
  • [[SQLITE_FCNTL_CKPT_START]] +** The [SQLITE_FCNTL_CKPT_START] opcode is invoked from within a checkpoint +** in wal mode before the client starts to copy pages from the wal +** file to the database file. +** +**
  • [[SQLITE_FCNTL_CKPT_DONE]] +** The [SQLITE_FCNTL_CKPT_DONE] opcode is invoked from within a checkpoint +** in wal mode after the client has finished copying pages from the wal +** file to the database file, but before the *-shm file is updated to +** record the fact that the pages have been checkpointed. +**
+** +**
  • [[SQLITE_FCNTL_EXTERNAL_READER]] +** The EXPERIMENTAL [SQLITE_FCNTL_EXTERNAL_READER] opcode is used to detect +** whether or not there is a database client in another process with a wal-mode +** transaction open on the database or not. It is only available on unix.The +** (void*) argument passed with this file-control should be a pointer to a +** value of type (int). The integer value is set to 1 if the database is a wal +** mode database and there exists at least one client in another process that +** currently has an SQL transaction open on the database. It is set to 0 if +** the database is not a wal-mode db, or if there is no such connection in any +** other process. This opcode cannot be used to detect transactions opened +** by clients within the current process, only within other processes. +** +** +**
  • [[SQLITE_FCNTL_CKSM_FILE]] +** Used by the cksmvfs VFS module only. +** +*/ +#define SQLITE_FCNTL_LOCKSTATE 1 +#define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 +#define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 +#define SQLITE_FCNTL_LAST_ERRNO 4 +#define SQLITE_FCNTL_SIZE_HINT 5 +#define SQLITE_FCNTL_CHUNK_SIZE 6 +#define SQLITE_FCNTL_FILE_POINTER 7 +#define SQLITE_FCNTL_SYNC_OMITTED 8 +#define SQLITE_FCNTL_WIN32_AV_RETRY 9 +#define SQLITE_FCNTL_PERSIST_WAL 10 +#define SQLITE_FCNTL_OVERWRITE 11 +#define SQLITE_FCNTL_VFSNAME 12 +#define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 +#define SQLITE_FCNTL_PRAGMA 14 +#define SQLITE_FCNTL_BUSYHANDLER 15 +#define SQLITE_FCNTL_TEMPFILENAME 16 +#define SQLITE_FCNTL_MMAP_SIZE 18 +#define SQLITE_FCNTL_TRACE 19 +#define SQLITE_FCNTL_HAS_MOVED 20 +#define SQLITE_FCNTL_SYNC 21 +#define SQLITE_FCNTL_COMMIT_PHASETWO 22 +#define SQLITE_FCNTL_WIN32_SET_HANDLE 23 +#define SQLITE_FCNTL_WAL_BLOCK 24 +#define SQLITE_FCNTL_ZIPVFS 25 +#define SQLITE_FCNTL_RBU 26 +#define SQLITE_FCNTL_VFS_POINTER 27 +#define SQLITE_FCNTL_JOURNAL_POINTER 28 +#define SQLITE_FCNTL_WIN32_GET_HANDLE 29 +#define SQLITE_FCNTL_PDB 30 +#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE 31 +#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE 32 +#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE 33 +#define SQLITE_FCNTL_LOCK_TIMEOUT 34 +#define SQLITE_FCNTL_DATA_VERSION 35 +#define SQLITE_FCNTL_SIZE_LIMIT 36 +#define SQLITE_FCNTL_CKPT_DONE 37 +#define SQLITE_FCNTL_RESERVE_BYTES 38 +#define SQLITE_FCNTL_CKPT_START 39 +#define SQLITE_FCNTL_EXTERNAL_READER 40 +#define SQLITE_FCNTL_CKSM_FILE 41 + +/* deprecated names */ +#define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE +#define SQLITE_SET_LOCKPROXYFILE SQLITE_FCNTL_SET_LOCKPROXYFILE +#define SQLITE_LAST_ERRNO SQLITE_FCNTL_LAST_ERRNO + + +/* +** CAPI3REF: Mutex Handle +** +** The mutex module within SQLite defines [sqlite3_mutex] to be an +** abstract type for a mutex object. The SQLite core never looks +** at the internal representation of an [sqlite3_mutex]. It only +** deals with pointers to the [sqlite3_mutex] object. +** +** Mutexes are created using [sqlite3_mutex_alloc()]. +*/ +typedef struct sqlite3_mutex sqlite3_mutex; + +/* +** CAPI3REF: Loadable Extension Thunk +** +** A pointer to the opaque sqlite3_api_routines structure is passed as +** the third parameter to entry points of [loadable extensions]. This +** structure must be typedefed in order to work around compiler warnings +** on some platforms. +*/ +typedef struct sqlite3_api_routines sqlite3_api_routines; + +/* +** CAPI3REF: OS Interface Object +** +** An instance of the sqlite3_vfs object defines the interface between +** the SQLite core and the underlying operating system. The "vfs" +** in the name of the object stands for "virtual file system". See +** the [VFS | VFS documentation] for further information. +** +** The VFS interface is sometimes extended by adding new methods onto +** the end. Each time such an extension occurs, the iVersion field +** is incremented. The iVersion value started out as 1 in +** SQLite [version 3.5.0] on [dateof:3.5.0], then increased to 2 +** with SQLite [version 3.7.0] on [dateof:3.7.0], and then increased +** to 3 with SQLite [version 3.7.6] on [dateof:3.7.6]. Additional fields +** may be appended to the sqlite3_vfs object and the iVersion value +** may increase again in future versions of SQLite. +** Note that due to an oversight, the structure +** of the sqlite3_vfs object changed in the transition from +** SQLite [version 3.5.9] to [version 3.6.0] on [dateof:3.6.0] +** and yet the iVersion field was not increased. +** +** The szOsFile field is the size of the subclassed [sqlite3_file] +** structure used by this VFS. mxPathname is the maximum length of +** a pathname in this VFS. +** +** Registered sqlite3_vfs objects are kept on a linked list formed by +** the pNext pointer. The [sqlite3_vfs_register()] +** and [sqlite3_vfs_unregister()] interfaces manage this list +** in a thread-safe way. The [sqlite3_vfs_find()] interface +** searches the list. Neither the application code nor the VFS +** implementation should use the pNext pointer. +** +** The pNext field is the only field in the sqlite3_vfs +** structure that SQLite will ever modify. SQLite will only access +** or modify this field while holding a particular static mutex. +** The application should never modify anything within the sqlite3_vfs +** object once the object has been registered. +** +** The zName field holds the name of the VFS module. The name must +** be unique across all VFS modules. +** +** [[sqlite3_vfs.xOpen]] +** ^SQLite guarantees that the zFilename parameter to xOpen +** is either a NULL pointer or string obtained +** from xFullPathname() with an optional suffix added. +** ^If a suffix is added to the zFilename parameter, it will +** consist of a single "-" character followed by no more than +** 11 alphanumeric and/or "-" characters. +** ^SQLite further guarantees that +** the string will be valid and unchanged until xClose() is +** called. Because of the previous sentence, +** the [sqlite3_file] can safely store a pointer to the +** filename if it needs to remember the filename for some reason. +** If the zFilename parameter to xOpen is a NULL pointer then xOpen +** must invent its own temporary name for the file. ^Whenever the +** xFilename parameter is NULL it will also be the case that the +** flags parameter will include [SQLITE_OPEN_DELETEONCLOSE]. +** +** The flags argument to xOpen() includes all bits set in +** the flags argument to [sqlite3_open_v2()]. Or if [sqlite3_open()] +** or [sqlite3_open16()] is used, then flags includes at least +** [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]. +** If xOpen() opens a file read-only then it sets *pOutFlags to +** include [SQLITE_OPEN_READONLY]. Other bits in *pOutFlags may be set. +** +** ^(SQLite will also add one of the following flags to the xOpen() +** call, depending on the object being opened: +** +**
      +**
    • [SQLITE_OPEN_MAIN_DB] +**
    • [SQLITE_OPEN_MAIN_JOURNAL] +**
    • [SQLITE_OPEN_TEMP_DB] +**
    • [SQLITE_OPEN_TEMP_JOURNAL] +**
    • [SQLITE_OPEN_TRANSIENT_DB] +**
    • [SQLITE_OPEN_SUBJOURNAL] +**
    • [SQLITE_OPEN_SUPER_JOURNAL] +**
    • [SQLITE_OPEN_WAL] +**
    )^ +** +** The file I/O implementation can use the object type flags to +** change the way it deals with files. For example, an application +** that does not care about crash recovery or rollback might make +** the open of a journal file a no-op. Writes to this journal would +** also be no-ops, and any attempt to read the journal would return +** SQLITE_IOERR. Or the implementation might recognize that a database +** file will be doing page-aligned sector reads and writes in a random +** order and set up its I/O subsystem accordingly. +** +** SQLite might also add one of the following flags to the xOpen method: +** +**
      +**
    • [SQLITE_OPEN_DELETEONCLOSE] +**
    • [SQLITE_OPEN_EXCLUSIVE] +**
    +** +** The [SQLITE_OPEN_DELETEONCLOSE] flag means the file should be +** deleted when it is closed. ^The [SQLITE_OPEN_DELETEONCLOSE] +** will be set for TEMP databases and their journals, transient +** databases, and subjournals. +** +** ^The [SQLITE_OPEN_EXCLUSIVE] flag is always used in conjunction +** with the [SQLITE_OPEN_CREATE] flag, which are both directly +** analogous to the O_EXCL and O_CREAT flags of the POSIX open() +** API. The SQLITE_OPEN_EXCLUSIVE flag, when paired with the +** SQLITE_OPEN_CREATE, is used to indicate that file should always +** be created, and that it is an error if it already exists. +** It is not used to indicate the file should be opened +** for exclusive access. +** +** ^At least szOsFile bytes of memory are allocated by SQLite +** to hold the [sqlite3_file] structure passed as the third +** argument to xOpen. The xOpen method does not have to +** allocate the structure; it should just fill it in. Note that +** the xOpen method must set the sqlite3_file.pMethods to either +** a valid [sqlite3_io_methods] object or to NULL. xOpen must do +** this even if the open fails. SQLite expects that the sqlite3_file.pMethods +** element will be valid after xOpen returns regardless of the success +** or failure of the xOpen call. +** +** [[sqlite3_vfs.xAccess]] +** ^The flags argument to xAccess() may be [SQLITE_ACCESS_EXISTS] +** to test for the existence of a file, or [SQLITE_ACCESS_READWRITE] to +** test whether a file is readable and writable, or [SQLITE_ACCESS_READ] +** to test whether a file is at least readable. The SQLITE_ACCESS_READ +** flag is never actually used and is not implemented in the built-in +** VFSes of SQLite. The file is named by the second argument and can be a +** directory. The xAccess method returns [SQLITE_OK] on success or some +** non-zero error code if there is an I/O error or if the name of +** the file given in the second argument is illegal. If SQLITE_OK +** is returned, then non-zero or zero is written into *pResOut to indicate +** whether or not the file is accessible. +** +** ^SQLite will always allocate at least mxPathname+1 bytes for the +** output buffer xFullPathname. The exact size of the output buffer +** is also passed as a parameter to both methods. If the output buffer +** is not large enough, [SQLITE_CANTOPEN] should be returned. Since this is +** handled as a fatal error by SQLite, vfs implementations should endeavor +** to prevent this by setting mxPathname to a sufficiently large value. +** +** The xRandomness(), xSleep(), xCurrentTime(), and xCurrentTimeInt64() +** interfaces are not strictly a part of the filesystem, but they are +** included in the VFS structure for completeness. +** The xRandomness() function attempts to return nBytes bytes +** of good-quality randomness into zOut. The return value is +** the actual number of bytes of randomness obtained. +** The xSleep() method causes the calling thread to sleep for at +** least the number of microseconds given. ^The xCurrentTime() +** method returns a Julian Day Number for the current date and time as +** a floating point value. +** ^The xCurrentTimeInt64() method returns, as an integer, the Julian +** Day Number multiplied by 86400000 (the number of milliseconds in +** a 24-hour day). +** ^SQLite will use the xCurrentTimeInt64() method to get the current +** date and time if that method is available (if iVersion is 2 or +** greater and the function pointer is not NULL) and will fall back +** to xCurrentTime() if xCurrentTimeInt64() is unavailable. +** +** ^The xSetSystemCall(), xGetSystemCall(), and xNestSystemCall() interfaces +** are not used by the SQLite core. These optional interfaces are provided +** by some VFSes to facilitate testing of the VFS code. By overriding +** system calls with functions under its control, a test program can +** simulate faults and error conditions that would otherwise be difficult +** or impossible to induce. The set of system calls that can be overridden +** varies from one VFS to another, and from one version of the same VFS to the +** next. Applications that use these interfaces must be prepared for any +** or all of these interfaces to be NULL or for their behavior to change +** from one release to the next. Applications must not attempt to access +** any of these methods if the iVersion of the VFS is less than 3. +*/ +typedef struct sqlite3_vfs sqlite3_vfs; +typedef void (*sqlite3_syscall_ptr)(void); +struct sqlite3_vfs { + int iVersion; /* Structure version number (currently 3) */ + int szOsFile; /* Size of subclassed sqlite3_file */ + int mxPathname; /* Maximum file pathname length */ + sqlite3_vfs *pNext; /* Next registered VFS */ + const char *zName; /* Name of this virtual file system */ + void *pAppData; /* Pointer to application-specific data */ + int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*, + int flags, int *pOutFlags); + int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir); + int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut); + int (*xFullPathname)(sqlite3_vfs*, const char *zName, int nOut, char *zOut); + void *(*xDlOpen)(sqlite3_vfs*, const char *zFilename); + void (*xDlError)(sqlite3_vfs*, int nByte, char *zErrMsg); + void (*(*xDlSym)(sqlite3_vfs*,void*, const char *zSymbol))(void); + void (*xDlClose)(sqlite3_vfs*, void*); + int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut); + int (*xSleep)(sqlite3_vfs*, int microseconds); + int (*xCurrentTime)(sqlite3_vfs*, double*); + int (*xGetLastError)(sqlite3_vfs*, int, char *); + /* + ** The methods above are in version 1 of the sqlite_vfs object + ** definition. Those that follow are added in version 2 or later + */ + int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*); + /* + ** The methods above are in versions 1 and 2 of the sqlite_vfs object. + ** Those below are for version 3 and greater. + */ + int (*xSetSystemCall)(sqlite3_vfs*, const char *zName, sqlite3_syscall_ptr); + sqlite3_syscall_ptr (*xGetSystemCall)(sqlite3_vfs*, const char *zName); + const char *(*xNextSystemCall)(sqlite3_vfs*, const char *zName); + /* + ** The methods above are in versions 1 through 3 of the sqlite_vfs object. + ** New fields may be appended in future versions. The iVersion + ** value will increment whenever this happens. + */ +}; + +/* +** CAPI3REF: Flags for the xAccess VFS method +** +** These integer constants can be used as the third parameter to +** the xAccess method of an [sqlite3_vfs] object. They determine +** what kind of permissions the xAccess method is looking for. +** With SQLITE_ACCESS_EXISTS, the xAccess method +** simply checks whether the file exists. +** With SQLITE_ACCESS_READWRITE, the xAccess method +** checks whether the named directory is both readable and writable +** (in other words, if files can be added, removed, and renamed within +** the directory). +** The SQLITE_ACCESS_READWRITE constant is currently used only by the +** [temp_store_directory pragma], though this could change in a future +** release of SQLite. +** With SQLITE_ACCESS_READ, the xAccess method +** checks whether the file is readable. The SQLITE_ACCESS_READ constant is +** currently unused, though it might be used in a future release of +** SQLite. +*/ +#define SQLITE_ACCESS_EXISTS 0 +#define SQLITE_ACCESS_READWRITE 1 /* Used by PRAGMA temp_store_directory */ +#define SQLITE_ACCESS_READ 2 /* Unused */ + +/* +** CAPI3REF: Flags for the xShmLock VFS method +** +** These integer constants define the various locking operations +** allowed by the xShmLock method of [sqlite3_io_methods]. The +** following are the only legal combinations of flags to the +** xShmLock method: +** +**
      +**
    • SQLITE_SHM_LOCK | SQLITE_SHM_SHARED +**
    • SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE +**
    • SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED +**
    • SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE +**
    +** +** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as +** was given on the corresponding lock. +** +** The xShmLock method can transition between unlocked and SHARED or +** between unlocked and EXCLUSIVE. It cannot transition between SHARED +** and EXCLUSIVE. +*/ +#define SQLITE_SHM_UNLOCK 1 +#define SQLITE_SHM_LOCK 2 +#define SQLITE_SHM_SHARED 4 +#define SQLITE_SHM_EXCLUSIVE 8 + +/* +** CAPI3REF: Maximum xShmLock index +** +** The xShmLock method on [sqlite3_io_methods] may use values +** between 0 and this upper bound as its "offset" argument. +** The SQLite core will never attempt to acquire or release a +** lock outside of this range +*/ +#define SQLITE_SHM_NLOCK 8 + + +/* +** CAPI3REF: Initialize The SQLite Library +** +** ^The sqlite3_initialize() routine initializes the +** SQLite library. ^The sqlite3_shutdown() routine +** deallocates any resources that were allocated by sqlite3_initialize(). +** These routines are designed to aid in process initialization and +** shutdown on embedded systems. Workstation applications using +** SQLite normally do not need to invoke either of these routines. +** +** A call to sqlite3_initialize() is an "effective" call if it is +** the first time sqlite3_initialize() is invoked during the lifetime of +** the process, or if it is the first time sqlite3_initialize() is invoked +** following a call to sqlite3_shutdown(). ^(Only an effective call +** of sqlite3_initialize() does any initialization. All other calls +** are harmless no-ops.)^ +** +** A call to sqlite3_shutdown() is an "effective" call if it is the first +** call to sqlite3_shutdown() since the last sqlite3_initialize(). ^(Only +** an effective call to sqlite3_shutdown() does any deinitialization. +** All other valid calls to sqlite3_shutdown() are harmless no-ops.)^ +** +** The sqlite3_initialize() interface is threadsafe, but sqlite3_shutdown() +** is not. The sqlite3_shutdown() interface must only be called from a +** single thread. All open [database connections] must be closed and all +** other SQLite resources must be deallocated prior to invoking +** sqlite3_shutdown(). +** +** Among other things, ^sqlite3_initialize() will invoke +** sqlite3_os_init(). Similarly, ^sqlite3_shutdown() +** will invoke sqlite3_os_end(). +** +** ^The sqlite3_initialize() routine returns [SQLITE_OK] on success. +** ^If for some reason, sqlite3_initialize() is unable to initialize +** the library (perhaps it is unable to allocate a needed resource such +** as a mutex) it returns an [error code] other than [SQLITE_OK]. +** +** ^The sqlite3_initialize() routine is called internally by many other +** SQLite interfaces so that an application usually does not need to +** invoke sqlite3_initialize() directly. For example, [sqlite3_open()] +** calls sqlite3_initialize() so the SQLite library will be automatically +** initialized when [sqlite3_open()] is called if it has not be initialized +** already. ^However, if SQLite is compiled with the [SQLITE_OMIT_AUTOINIT] +** compile-time option, then the automatic calls to sqlite3_initialize() +** are omitted and the application must call sqlite3_initialize() directly +** prior to using any other SQLite interface. For maximum portability, +** it is recommended that applications always invoke sqlite3_initialize() +** directly prior to using any other SQLite interface. Future releases +** of SQLite may require this. In other words, the behavior exhibited +** when SQLite is compiled with [SQLITE_OMIT_AUTOINIT] might become the +** default behavior in some future release of SQLite. +** +** The sqlite3_os_init() routine does operating-system specific +** initialization of the SQLite library. The sqlite3_os_end() +** routine undoes the effect of sqlite3_os_init(). Typical tasks +** performed by these routines include allocation or deallocation +** of static resources, initialization of global variables, +** setting up a default [sqlite3_vfs] module, or setting up +** a default configuration using [sqlite3_config()]. +** +** The application should never invoke either sqlite3_os_init() +** or sqlite3_os_end() directly. The application should only invoke +** sqlite3_initialize() and sqlite3_shutdown(). The sqlite3_os_init() +** interface is called automatically by sqlite3_initialize() and +** sqlite3_os_end() is called by sqlite3_shutdown(). Appropriate +** implementations for sqlite3_os_init() and sqlite3_os_end() +** are built into SQLite when it is compiled for Unix, Windows, or OS/2. +** When [custom builds | built for other platforms] +** (using the [SQLITE_OS_OTHER=1] compile-time +** option) the application must supply a suitable implementation for +** sqlite3_os_init() and sqlite3_os_end(). An application-supplied +** implementation of sqlite3_os_init() or sqlite3_os_end() +** must return [SQLITE_OK] on success and some other [error code] upon +** failure. +*/ +SQLITE_API int sqlite3_initialize(void); +SQLITE_API int sqlite3_shutdown(void); +SQLITE_API int sqlite3_os_init(void); +SQLITE_API int sqlite3_os_end(void); + +/* +** CAPI3REF: Configuring The SQLite Library +** +** The sqlite3_config() interface is used to make global configuration +** changes to SQLite in order to tune SQLite to the specific needs of +** the application. The default configuration is recommended for most +** applications and so this routine is usually not necessary. It is +** provided to support rare applications with unusual needs. +** +** The sqlite3_config() interface is not threadsafe. The application +** must ensure that no other SQLite interfaces are invoked by other +** threads while sqlite3_config() is running. +** +** The sqlite3_config() interface +** may only be invoked prior to library initialization using +** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()]. +** ^If sqlite3_config() is called after [sqlite3_initialize()] and before +** [sqlite3_shutdown()] then it will return SQLITE_MISUSE. +** Note, however, that ^sqlite3_config() can be called as part of the +** implementation of an application-defined [sqlite3_os_init()]. +** +** The first argument to sqlite3_config() is an integer +** [configuration option] that determines +** what property of SQLite is to be configured. Subsequent arguments +** vary depending on the [configuration option] +** in the first argument. +** +** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK]. +** ^If the option is unknown or SQLite is unable to set the option +** then this routine returns a non-zero [error code]. +*/ +SQLITE_API int sqlite3_config(int, ...); + +/* +** CAPI3REF: Configure database connections +** METHOD: sqlite3 +** +** The sqlite3_db_config() interface is used to make configuration +** changes to a [database connection]. The interface is similar to +** [sqlite3_config()] except that the changes apply to a single +** [database connection] (specified in the first argument). +** +** The second argument to sqlite3_db_config(D,V,...) is the +** [SQLITE_DBCONFIG_LOOKASIDE | configuration verb] - an integer code +** that indicates what aspect of the [database connection] is being configured. +** Subsequent arguments vary depending on the configuration verb. +** +** ^Calls to sqlite3_db_config() return SQLITE_OK if and only if +** the call is considered successful. +*/ +SQLITE_API int sqlite3_db_config(sqlite3*, int op, ...); + +/* +** CAPI3REF: Memory Allocation Routines +** +** An instance of this object defines the interface between SQLite +** and low-level memory allocation routines. +** +** This object is used in only one place in the SQLite interface. +** A pointer to an instance of this object is the argument to +** [sqlite3_config()] when the configuration option is +** [SQLITE_CONFIG_MALLOC] or [SQLITE_CONFIG_GETMALLOC]. +** By creating an instance of this object +** and passing it to [sqlite3_config]([SQLITE_CONFIG_MALLOC]) +** during configuration, an application can specify an alternative +** memory allocation subsystem for SQLite to use for all of its +** dynamic memory needs. +** +** Note that SQLite comes with several [built-in memory allocators] +** that are perfectly adequate for the overwhelming majority of applications +** and that this object is only useful to a tiny minority of applications +** with specialized memory allocation requirements. This object is +** also used during testing of SQLite in order to specify an alternative +** memory allocator that simulates memory out-of-memory conditions in +** order to verify that SQLite recovers gracefully from such +** conditions. +** +** The xMalloc, xRealloc, and xFree methods must work like the +** malloc(), realloc() and free() functions from the standard C library. +** ^SQLite guarantees that the second argument to +** xRealloc is always a value returned by a prior call to xRoundup. +** +** xSize should return the allocated size of a memory allocation +** previously obtained from xMalloc or xRealloc. The allocated size +** is always at least as big as the requested size but may be larger. +** +** The xRoundup method returns what would be the allocated size of +** a memory allocation given a particular requested size. Most memory +** allocators round up memory allocations at least to the next multiple +** of 8. Some allocators round up to a larger multiple or to a power of 2. +** Every memory allocation request coming in through [sqlite3_malloc()] +** or [sqlite3_realloc()] first calls xRoundup. If xRoundup returns 0, +** that causes the corresponding memory allocation to fail. +** +** The xInit method initializes the memory allocator. For example, +** it might allocate any required mutexes or initialize internal data +** structures. The xShutdown method is invoked (indirectly) by +** [sqlite3_shutdown()] and should deallocate any resources acquired +** by xInit. The pAppData pointer is used as the only parameter to +** xInit and xShutdown. +** +** SQLite holds the [SQLITE_MUTEX_STATIC_MAIN] mutex when it invokes +** the xInit method, so the xInit method need not be threadsafe. The +** xShutdown method is only called from [sqlite3_shutdown()] so it does +** not need to be threadsafe either. For all other methods, SQLite +** holds the [SQLITE_MUTEX_STATIC_MEM] mutex as long as the +** [SQLITE_CONFIG_MEMSTATUS] configuration option is turned on (which +** it is by default) and so the methods are automatically serialized. +** However, if [SQLITE_CONFIG_MEMSTATUS] is disabled, then the other +** methods must be threadsafe or else make their own arrangements for +** serialization. +** +** SQLite will never invoke xInit() more than once without an intervening +** call to xShutdown(). +*/ +typedef struct sqlite3_mem_methods sqlite3_mem_methods; +struct sqlite3_mem_methods { + void *(*xMalloc)(int); /* Memory allocation function */ + void (*xFree)(void*); /* Free a prior allocation */ + void *(*xRealloc)(void*,int); /* Resize an allocation */ + int (*xSize)(void*); /* Return the size of an allocation */ + int (*xRoundup)(int); /* Round up request size to allocation size */ + int (*xInit)(void*); /* Initialize the memory allocator */ + void (*xShutdown)(void*); /* Deinitialize the memory allocator */ + void *pAppData; /* Argument to xInit() and xShutdown() */ +}; + +/* +** CAPI3REF: Configuration Options +** KEYWORDS: {configuration option} +** +** These constants are the available integer configuration options that +** can be passed as the first argument to the [sqlite3_config()] interface. +** +** New configuration options may be added in future releases of SQLite. +** Existing configuration options might be discontinued. Applications +** should check the return code from [sqlite3_config()] to make sure that +** the call worked. The [sqlite3_config()] interface will return a +** non-zero [error code] if a discontinued or unsupported configuration option +** is invoked. +** +**
    +** [[SQLITE_CONFIG_SINGLETHREAD]]
    SQLITE_CONFIG_SINGLETHREAD
    +**
    There are no arguments to this option. ^This option sets the +** [threading mode] to Single-thread. In other words, it disables +** all mutexing and puts SQLite into a mode where it can only be used +** by a single thread. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** it is not possible to change the [threading mode] from its default +** value of Single-thread and so [sqlite3_config()] will return +** [SQLITE_ERROR] if called with the SQLITE_CONFIG_SINGLETHREAD +** configuration option.
    +** +** [[SQLITE_CONFIG_MULTITHREAD]]
    SQLITE_CONFIG_MULTITHREAD
    +**
    There are no arguments to this option. ^This option sets the +** [threading mode] to Multi-thread. In other words, it disables +** mutexing on [database connection] and [prepared statement] objects. +** The application is responsible for serializing access to +** [database connections] and [prepared statements]. But other mutexes +** are enabled so that SQLite will be safe to use in a multi-threaded +** environment as long as no two threads attempt to use the same +** [database connection] at the same time. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** it is not possible to set the Multi-thread [threading mode] and +** [sqlite3_config()] will return [SQLITE_ERROR] if called with the +** SQLITE_CONFIG_MULTITHREAD configuration option.
    +** +** [[SQLITE_CONFIG_SERIALIZED]]
    SQLITE_CONFIG_SERIALIZED
    +**
    There are no arguments to this option. ^This option sets the +** [threading mode] to Serialized. In other words, this option enables +** all mutexes including the recursive +** mutexes on [database connection] and [prepared statement] objects. +** In this mode (which is the default when SQLite is compiled with +** [SQLITE_THREADSAFE=1]) the SQLite library will itself serialize access +** to [database connections] and [prepared statements] so that the +** application is free to use the same [database connection] or the +** same [prepared statement] in different threads at the same time. +** ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** it is not possible to set the Serialized [threading mode] and +** [sqlite3_config()] will return [SQLITE_ERROR] if called with the +** SQLITE_CONFIG_SERIALIZED configuration option.
    +** +** [[SQLITE_CONFIG_MALLOC]]
    SQLITE_CONFIG_MALLOC
    +**
    ^(The SQLITE_CONFIG_MALLOC option takes a single argument which is +** a pointer to an instance of the [sqlite3_mem_methods] structure. +** The argument specifies +** alternative low-level memory allocation routines to be used in place of +** the memory allocation routines built into SQLite.)^ ^SQLite makes +** its own private copy of the content of the [sqlite3_mem_methods] structure +** before the [sqlite3_config()] call returns.
    +** +** [[SQLITE_CONFIG_GETMALLOC]]
    SQLITE_CONFIG_GETMALLOC
    +**
    ^(The SQLITE_CONFIG_GETMALLOC option takes a single argument which +** is a pointer to an instance of the [sqlite3_mem_methods] structure. +** The [sqlite3_mem_methods] +** structure is filled with the currently defined memory allocation routines.)^ +** This option can be used to overload the default memory allocation +** routines with a wrapper that simulations memory allocation failure or +** tracks memory usage, for example.
    +** +** [[SQLITE_CONFIG_SMALL_MALLOC]]
    SQLITE_CONFIG_SMALL_MALLOC
    +**
    ^The SQLITE_CONFIG_SMALL_MALLOC option takes single argument of +** type int, interpreted as a boolean, which if true provides a hint to +** SQLite that it should avoid large memory allocations if possible. +** SQLite will run faster if it is free to make large memory allocations, +** but some application might prefer to run slower in exchange for +** guarantees about memory fragmentation that are possible if large +** allocations are avoided. This hint is normally off. +**
    +** +** [[SQLITE_CONFIG_MEMSTATUS]]
    SQLITE_CONFIG_MEMSTATUS
    +**
    ^The SQLITE_CONFIG_MEMSTATUS option takes single argument of type int, +** interpreted as a boolean, which enables or disables the collection of +** memory allocation statistics. ^(When memory allocation statistics are +** disabled, the following SQLite interfaces become non-operational: +**
      +**
    • [sqlite3_hard_heap_limit64()] +**
    • [sqlite3_memory_used()] +**
    • [sqlite3_memory_highwater()] +**
    • [sqlite3_soft_heap_limit64()] +**
    • [sqlite3_status64()] +**
    )^ +** ^Memory allocation statistics are enabled by default unless SQLite is +** compiled with [SQLITE_DEFAULT_MEMSTATUS]=0 in which case memory +** allocation statistics are disabled by default. +**
    +** +** [[SQLITE_CONFIG_SCRATCH]]
    SQLITE_CONFIG_SCRATCH
    +**
    The SQLITE_CONFIG_SCRATCH option is no longer used. +**
    +** +** [[SQLITE_CONFIG_PAGECACHE]]
    SQLITE_CONFIG_PAGECACHE
    +**
    ^The SQLITE_CONFIG_PAGECACHE option specifies a memory pool +** that SQLite can use for the database page cache with the default page +** cache implementation. +** This configuration option is a no-op if an application-defined page +** cache implementation is loaded using the [SQLITE_CONFIG_PCACHE2]. +** ^There are three arguments to SQLITE_CONFIG_PAGECACHE: A pointer to +** 8-byte aligned memory (pMem), the size of each page cache line (sz), +** and the number of cache lines (N). +** The sz argument should be the size of the largest database page +** (a power of two between 512 and 65536) plus some extra bytes for each +** page header. ^The number of extra bytes needed by the page header +** can be determined using [SQLITE_CONFIG_PCACHE_HDRSZ]. +** ^It is harmless, apart from the wasted memory, +** for the sz parameter to be larger than necessary. The pMem +** argument must be either a NULL pointer or a pointer to an 8-byte +** aligned block of memory of at least sz*N bytes, otherwise +** subsequent behavior is undefined. +** ^When pMem is not NULL, SQLite will strive to use the memory provided +** to satisfy page cache needs, falling back to [sqlite3_malloc()] if +** a page cache line is larger than sz bytes or if all of the pMem buffer +** is exhausted. +** ^If pMem is NULL and N is non-zero, then each database connection +** does an initial bulk allocation for page cache memory +** from [sqlite3_malloc()] sufficient for N cache lines if N is positive or +** of -1024*N bytes if N is negative, . ^If additional +** page cache memory is needed beyond what is provided by the initial +** allocation, then SQLite goes to [sqlite3_malloc()] separately for each +** additional cache line.
    +** +** [[SQLITE_CONFIG_HEAP]]
    SQLITE_CONFIG_HEAP
    +**
    ^The SQLITE_CONFIG_HEAP option specifies a static memory buffer +** that SQLite will use for all of its dynamic memory allocation needs +** beyond those provided for by [SQLITE_CONFIG_PAGECACHE]. +** ^The SQLITE_CONFIG_HEAP option is only available if SQLite is compiled +** with either [SQLITE_ENABLE_MEMSYS3] or [SQLITE_ENABLE_MEMSYS5] and returns +** [SQLITE_ERROR] if invoked otherwise. +** ^There are three arguments to SQLITE_CONFIG_HEAP: +** An 8-byte aligned pointer to the memory, +** the number of bytes in the memory buffer, and the minimum allocation size. +** ^If the first pointer (the memory pointer) is NULL, then SQLite reverts +** to using its default memory allocator (the system malloc() implementation), +** undoing any prior invocation of [SQLITE_CONFIG_MALLOC]. ^If the +** memory pointer is not NULL then the alternative memory +** allocator is engaged to handle all of SQLites memory allocation needs. +** The first pointer (the memory pointer) must be aligned to an 8-byte +** boundary or subsequent behavior of SQLite will be undefined. +** The minimum allocation size is capped at 2**12. Reasonable values +** for the minimum allocation size are 2**5 through 2**8.
    +** +** [[SQLITE_CONFIG_MUTEX]]
    SQLITE_CONFIG_MUTEX
    +**
    ^(The SQLITE_CONFIG_MUTEX option takes a single argument which is a +** pointer to an instance of the [sqlite3_mutex_methods] structure. +** The argument specifies alternative low-level mutex routines to be used +** in place the mutex routines built into SQLite.)^ ^SQLite makes a copy of +** the content of the [sqlite3_mutex_methods] structure before the call to +** [sqlite3_config()] returns. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** the entire mutexing subsystem is omitted from the build and hence calls to +** [sqlite3_config()] with the SQLITE_CONFIG_MUTEX configuration option will +** return [SQLITE_ERROR].
    +** +** [[SQLITE_CONFIG_GETMUTEX]]
    SQLITE_CONFIG_GETMUTEX
    +**
    ^(The SQLITE_CONFIG_GETMUTEX option takes a single argument which +** is a pointer to an instance of the [sqlite3_mutex_methods] structure. The +** [sqlite3_mutex_methods] +** structure is filled with the currently defined mutex routines.)^ +** This option can be used to overload the default mutex allocation +** routines with a wrapper used to track mutex usage for performance +** profiling or testing, for example. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** the entire mutexing subsystem is omitted from the build and hence calls to +** [sqlite3_config()] with the SQLITE_CONFIG_GETMUTEX configuration option will +** return [SQLITE_ERROR].
    +** +** [[SQLITE_CONFIG_LOOKASIDE]]
    SQLITE_CONFIG_LOOKASIDE
    +**
    ^(The SQLITE_CONFIG_LOOKASIDE option takes two arguments that determine +** the default size of lookaside memory on each [database connection]. +** The first argument is the +** size of each lookaside buffer slot and the second is the number of +** slots allocated to each database connection.)^ ^(SQLITE_CONFIG_LOOKASIDE +** sets the default lookaside size. The [SQLITE_DBCONFIG_LOOKASIDE] +** option to [sqlite3_db_config()] can be used to change the lookaside +** configuration on individual connections.)^
    +** +** [[SQLITE_CONFIG_PCACHE2]]
    SQLITE_CONFIG_PCACHE2
    +**
    ^(The SQLITE_CONFIG_PCACHE2 option takes a single argument which is +** a pointer to an [sqlite3_pcache_methods2] object. This object specifies +** the interface to a custom page cache implementation.)^ +** ^SQLite makes a copy of the [sqlite3_pcache_methods2] object.
    +** +** [[SQLITE_CONFIG_GETPCACHE2]]
    SQLITE_CONFIG_GETPCACHE2
    +**
    ^(The SQLITE_CONFIG_GETPCACHE2 option takes a single argument which +** is a pointer to an [sqlite3_pcache_methods2] object. SQLite copies of +** the current page cache implementation into that object.)^
    +** +** [[SQLITE_CONFIG_LOG]]
    SQLITE_CONFIG_LOG
    +**
    The SQLITE_CONFIG_LOG option is used to configure the SQLite +** global [error log]. +** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a +** function with a call signature of void(*)(void*,int,const char*), +** and a pointer to void. ^If the function pointer is not NULL, it is +** invoked by [sqlite3_log()] to process each logging event. ^If the +** function pointer is NULL, the [sqlite3_log()] interface becomes a no-op. +** ^The void pointer that is the second argument to SQLITE_CONFIG_LOG is +** passed through as the first parameter to the application-defined logger +** function whenever that function is invoked. ^The second parameter to +** the logger function is a copy of the first parameter to the corresponding +** [sqlite3_log()] call and is intended to be a [result code] or an +** [extended result code]. ^The third parameter passed to the logger is +** log message after formatting via [sqlite3_snprintf()]. +** The SQLite logging interface is not reentrant; the logger function +** supplied by the application must not invoke any SQLite interface. +** In a multi-threaded application, the application-defined logger +** function must be threadsafe.
    +** +** [[SQLITE_CONFIG_URI]]
    SQLITE_CONFIG_URI +**
    ^(The SQLITE_CONFIG_URI option takes a single argument of type int. +** If non-zero, then URI handling is globally enabled. If the parameter is zero, +** then URI handling is globally disabled.)^ ^If URI handling is globally +** enabled, all filenames passed to [sqlite3_open()], [sqlite3_open_v2()], +** [sqlite3_open16()] or +** specified as part of [ATTACH] commands are interpreted as URIs, regardless +** of whether or not the [SQLITE_OPEN_URI] flag is set when the database +** connection is opened. ^If it is globally disabled, filenames are +** only interpreted as URIs if the SQLITE_OPEN_URI flag is set when the +** database connection is opened. ^(By default, URI handling is globally +** disabled. The default value may be changed by compiling with the +** [SQLITE_USE_URI] symbol defined.)^ +** +** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]]
    SQLITE_CONFIG_COVERING_INDEX_SCAN +**
    ^The SQLITE_CONFIG_COVERING_INDEX_SCAN option takes a single integer +** argument which is interpreted as a boolean in order to enable or disable +** the use of covering indices for full table scans in the query optimizer. +** ^The default setting is determined +** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on" +** if that compile-time option is omitted. +** The ability to disable the use of covering indices for full table scans +** is because some incorrectly coded legacy applications might malfunction +** when the optimization is enabled. Providing the ability to +** disable the optimization allows the older, buggy application code to work +** without change even with newer versions of SQLite. +** +** [[SQLITE_CONFIG_PCACHE]] [[SQLITE_CONFIG_GETPCACHE]] +**
    SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE +**
    These options are obsolete and should not be used by new code. +** They are retained for backwards compatibility but are now no-ops. +**
    +** +** [[SQLITE_CONFIG_SQLLOG]] +**
    SQLITE_CONFIG_SQLLOG +**
    This option is only available if sqlite is compiled with the +** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should +** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int). +** The second should be of type (void*). The callback is invoked by the library +** in three separate circumstances, identified by the value passed as the +** fourth parameter. If the fourth parameter is 0, then the database connection +** passed as the second argument has just been opened. The third argument +** points to a buffer containing the name of the main database file. If the +** fourth parameter is 1, then the SQL statement that the third parameter +** points to has just been executed. Or, if the fourth parameter is 2, then +** the connection being passed as the second parameter is being closed. The +** third parameter is passed NULL In this case. An example of using this +** configuration option can be seen in the "test_sqllog.c" source file in +** the canonical SQLite source tree.
    +** +** [[SQLITE_CONFIG_MMAP_SIZE]] +**
    SQLITE_CONFIG_MMAP_SIZE +**
    ^SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values +** that are the default mmap size limit (the default setting for +** [PRAGMA mmap_size]) and the maximum allowed mmap size limit. +** ^The default setting can be overridden by each database connection using +** either the [PRAGMA mmap_size] command, or by using the +** [SQLITE_FCNTL_MMAP_SIZE] file control. ^(The maximum allowed mmap size +** will be silently truncated if necessary so that it does not exceed the +** compile-time maximum mmap size set by the +** [SQLITE_MAX_MMAP_SIZE] compile-time option.)^ +** ^If either argument to this option is negative, then that argument is +** changed to its compile-time default. +** +** [[SQLITE_CONFIG_WIN32_HEAPSIZE]] +**
    SQLITE_CONFIG_WIN32_HEAPSIZE +**
    ^The SQLITE_CONFIG_WIN32_HEAPSIZE option is only available if SQLite is +** compiled for Windows with the [SQLITE_WIN32_MALLOC] pre-processor macro +** defined. ^SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit unsigned integer value +** that specifies the maximum size of the created heap. +** +** [[SQLITE_CONFIG_PCACHE_HDRSZ]] +**
    SQLITE_CONFIG_PCACHE_HDRSZ +**
    ^The SQLITE_CONFIG_PCACHE_HDRSZ option takes a single parameter which +** is a pointer to an integer and writes into that integer the number of extra +** bytes per page required for each page in [SQLITE_CONFIG_PAGECACHE]. +** The amount of extra space required can change depending on the compiler, +** target platform, and SQLite version. +** +** [[SQLITE_CONFIG_PMASZ]] +**
    SQLITE_CONFIG_PMASZ +**
    ^The SQLITE_CONFIG_PMASZ option takes a single parameter which +** is an unsigned integer and sets the "Minimum PMA Size" for the multithreaded +** sorter to that integer. The default minimum PMA Size is set by the +** [SQLITE_SORTER_PMASZ] compile-time option. New threads are launched +** to help with sort operations when multithreaded sorting +** is enabled (using the [PRAGMA threads] command) and the amount of content +** to be sorted exceeds the page size times the minimum of the +** [PRAGMA cache_size] setting and this value. +** +** [[SQLITE_CONFIG_STMTJRNL_SPILL]] +**
    SQLITE_CONFIG_STMTJRNL_SPILL +**
    ^The SQLITE_CONFIG_STMTJRNL_SPILL option takes a single parameter which +** becomes the [statement journal] spill-to-disk threshold. +** [Statement journals] are held in memory until their size (in bytes) +** exceeds this threshold, at which point they are written to disk. +** Or if the threshold is -1, statement journals are always held +** exclusively in memory. +** Since many statement journals never become large, setting the spill +** threshold to a value such as 64KiB can greatly reduce the amount of +** I/O required to support statement rollback. +** The default value for this setting is controlled by the +** [SQLITE_STMTJRNL_SPILL] compile-time option. +** +** [[SQLITE_CONFIG_SORTERREF_SIZE]] +**
    SQLITE_CONFIG_SORTERREF_SIZE +**
    The SQLITE_CONFIG_SORTERREF_SIZE option accepts a single parameter +** of type (int) - the new value of the sorter-reference size threshold. +** Usually, when SQLite uses an external sort to order records according +** to an ORDER BY clause, all fields required by the caller are present in the +** sorted records. However, if SQLite determines based on the declared type +** of a table column that its values are likely to be very large - larger +** than the configured sorter-reference size threshold - then a reference +** is stored in each sorted record and the required column values loaded +** from the database as records are returned in sorted order. The default +** value for this option is to never use this optimization. Specifying a +** negative value for this option restores the default behaviour. +** This option is only available if SQLite is compiled with the +** [SQLITE_ENABLE_SORTER_REFERENCES] compile-time option. +** +** [[SQLITE_CONFIG_MEMDB_MAXSIZE]] +**
    SQLITE_CONFIG_MEMDB_MAXSIZE +**
    The SQLITE_CONFIG_MEMDB_MAXSIZE option accepts a single parameter +** [sqlite3_int64] parameter which is the default maximum size for an in-memory +** database created using [sqlite3_deserialize()]. This default maximum +** size can be adjusted up or down for individual databases using the +** [SQLITE_FCNTL_SIZE_LIMIT] [sqlite3_file_control|file-control]. If this +** configuration setting is never used, then the default maximum is determined +** by the [SQLITE_MEMDB_DEFAULT_MAXSIZE] compile-time option. If that +** compile-time option is not set, then the default maximum is 1073741824. +**
    +*/ +#define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ +#define SQLITE_CONFIG_MULTITHREAD 2 /* nil */ +#define SQLITE_CONFIG_SERIALIZED 3 /* nil */ +#define SQLITE_CONFIG_MALLOC 4 /* sqlite3_mem_methods* */ +#define SQLITE_CONFIG_GETMALLOC 5 /* sqlite3_mem_methods* */ +#define SQLITE_CONFIG_SCRATCH 6 /* No longer used */ +#define SQLITE_CONFIG_PAGECACHE 7 /* void*, int sz, int N */ +#define SQLITE_CONFIG_HEAP 8 /* void*, int nByte, int min */ +#define SQLITE_CONFIG_MEMSTATUS 9 /* boolean */ +#define SQLITE_CONFIG_MUTEX 10 /* sqlite3_mutex_methods* */ +#define SQLITE_CONFIG_GETMUTEX 11 /* sqlite3_mutex_methods* */ +/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */ +#define SQLITE_CONFIG_LOOKASIDE 13 /* int int */ +#define SQLITE_CONFIG_PCACHE 14 /* no-op */ +#define SQLITE_CONFIG_GETPCACHE 15 /* no-op */ +#define SQLITE_CONFIG_LOG 16 /* xFunc, void* */ +#define SQLITE_CONFIG_URI 17 /* int */ +#define SQLITE_CONFIG_PCACHE2 18 /* sqlite3_pcache_methods2* */ +#define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ +#define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ +#define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ +#define SQLITE_CONFIG_MMAP_SIZE 22 /* sqlite3_int64, sqlite3_int64 */ +#define SQLITE_CONFIG_WIN32_HEAPSIZE 23 /* int nByte */ +#define SQLITE_CONFIG_PCACHE_HDRSZ 24 /* int *psz */ +#define SQLITE_CONFIG_PMASZ 25 /* unsigned int szPma */ +#define SQLITE_CONFIG_STMTJRNL_SPILL 26 /* int nByte */ +#define SQLITE_CONFIG_SMALL_MALLOC 27 /* boolean */ +#define SQLITE_CONFIG_SORTERREF_SIZE 28 /* int nByte */ +#define SQLITE_CONFIG_MEMDB_MAXSIZE 29 /* sqlite3_int64 */ + +/* +** CAPI3REF: Database Connection Configuration Options +** +** These constants are the available integer configuration options that +** can be passed as the second argument to the [sqlite3_db_config()] interface. +** +** New configuration options may be added in future releases of SQLite. +** Existing configuration options might be discontinued. Applications +** should check the return code from [sqlite3_db_config()] to make sure that +** the call worked. ^The [sqlite3_db_config()] interface will return a +** non-zero [error code] if a discontinued or unsupported configuration option +** is invoked. +** +**
    +** [[SQLITE_DBCONFIG_LOOKASIDE]] +**
    SQLITE_DBCONFIG_LOOKASIDE
    +**
    ^This option takes three additional arguments that determine the +** [lookaside memory allocator] configuration for the [database connection]. +** ^The first argument (the third parameter to [sqlite3_db_config()] is a +** pointer to a memory buffer to use for lookaside memory. +** ^The first argument after the SQLITE_DBCONFIG_LOOKASIDE verb +** may be NULL in which case SQLite will allocate the +** lookaside buffer itself using [sqlite3_malloc()]. ^The second argument is the +** size of each lookaside buffer slot. ^The third argument is the number of +** slots. The size of the buffer in the first argument must be greater than +** or equal to the product of the second and third arguments. The buffer +** must be aligned to an 8-byte boundary. ^If the second argument to +** SQLITE_DBCONFIG_LOOKASIDE is not a multiple of 8, it is internally +** rounded down to the next smaller multiple of 8. ^(The lookaside memory +** configuration for a database connection can only be changed when that +** connection is not currently using lookaside memory, or in other words +** when the "current value" returned by +** [sqlite3_db_status](D,[SQLITE_CONFIG_LOOKASIDE],...) is zero. +** Any attempt to change the lookaside memory configuration when lookaside +** memory is in use leaves the configuration unchanged and returns +** [SQLITE_BUSY].)^
    +** +** [[SQLITE_DBCONFIG_ENABLE_FKEY]] +**
    SQLITE_DBCONFIG_ENABLE_FKEY
    +**
    ^This option is used to enable or disable the enforcement of +** [foreign key constraints]. There should be two additional arguments. +** The first argument is an integer which is 0 to disable FK enforcement, +** positive to enable FK enforcement or negative to leave FK enforcement +** unchanged. The second parameter is a pointer to an integer into which +** is written 0 or 1 to indicate whether FK enforcement is off or on +** following this call. The second parameter may be a NULL pointer, in +** which case the FK enforcement setting is not reported back.
    +** +** [[SQLITE_DBCONFIG_ENABLE_TRIGGER]] +**
    SQLITE_DBCONFIG_ENABLE_TRIGGER
    +**
    ^This option is used to enable or disable [CREATE TRIGGER | triggers]. +** There should be two additional arguments. +** The first argument is an integer which is 0 to disable triggers, +** positive to enable triggers or negative to leave the setting unchanged. +** The second parameter is a pointer to an integer into which +** is written 0 or 1 to indicate whether triggers are disabled or enabled +** following this call. The second parameter may be a NULL pointer, in +** which case the trigger setting is not reported back. +** +**

    Originally this option disabled all triggers. ^(However, since +** SQLite version 3.35.0, TEMP triggers are still allowed even if +** this option is off. So, in other words, this option now only disables +** triggers in the main database schema or in the schemas of ATTACH-ed +** databases.)^

    +** +** [[SQLITE_DBCONFIG_ENABLE_VIEW]] +**
    SQLITE_DBCONFIG_ENABLE_VIEW
    +**
    ^This option is used to enable or disable [CREATE VIEW | views]. +** There should be two additional arguments. +** The first argument is an integer which is 0 to disable views, +** positive to enable views or negative to leave the setting unchanged. +** The second parameter is a pointer to an integer into which +** is written 0 or 1 to indicate whether views are disabled or enabled +** following this call. The second parameter may be a NULL pointer, in +** which case the view setting is not reported back. +** +**

    Originally this option disabled all views. ^(However, since +** SQLite version 3.35.0, TEMP views are still allowed even if +** this option is off. So, in other words, this option now only disables +** views in the main database schema or in the schemas of ATTACH-ed +** databases.)^

    +** +** [[SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER]] +**
    SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER
    +**
    ^This option is used to enable or disable the +** [fts3_tokenizer()] function which is part of the +** [FTS3] full-text search engine extension. +** There should be two additional arguments. +** The first argument is an integer which is 0 to disable fts3_tokenizer() or +** positive to enable fts3_tokenizer() or negative to leave the setting +** unchanged. +** The second parameter is a pointer to an integer into which +** is written 0 or 1 to indicate whether fts3_tokenizer is disabled or enabled +** following this call. The second parameter may be a NULL pointer, in +** which case the new setting is not reported back.
    +** +** [[SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION]] +**
    SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION
    +**
    ^This option is used to enable or disable the [sqlite3_load_extension()] +** interface independently of the [load_extension()] SQL function. +** The [sqlite3_enable_load_extension()] API enables or disables both the +** C-API [sqlite3_load_extension()] and the SQL function [load_extension()]. +** There should be two additional arguments. +** When the first argument to this interface is 1, then only the C-API is +** enabled and the SQL function remains disabled. If the first argument to +** this interface is 0, then both the C-API and the SQL function are disabled. +** If the first argument is -1, then no changes are made to state of either the +** C-API or the SQL function. +** The second parameter is a pointer to an integer into which +** is written 0 or 1 to indicate whether [sqlite3_load_extension()] interface +** is disabled or enabled following this call. The second parameter may +** be a NULL pointer, in which case the new setting is not reported back. +**
    +** +** [[SQLITE_DBCONFIG_MAINDBNAME]]
    SQLITE_DBCONFIG_MAINDBNAME
    +**
    ^This option is used to change the name of the "main" database +** schema. ^The sole argument is a pointer to a constant UTF8 string +** which will become the new schema name in place of "main". ^SQLite +** does not make a copy of the new main schema name string, so the application +** must ensure that the argument passed into this DBCONFIG option is unchanged +** until after the database connection closes. +**
    +** +** [[SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE]] +**
    SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE
    +**
    Usually, when a database in wal mode is closed or detached from a +** database handle, SQLite checks if this will mean that there are now no +** connections at all to the database. If so, it performs a checkpoint +** operation before closing the connection. This option may be used to +** override this behaviour. The first parameter passed to this operation +** is an integer - positive to disable checkpoints-on-close, or zero (the +** default) to enable them, and negative to leave the setting unchanged. +** The second parameter is a pointer to an integer +** into which is written 0 or 1 to indicate whether checkpoints-on-close +** have been disabled - 0 if they are not disabled, 1 if they are. +**
    +** +** [[SQLITE_DBCONFIG_ENABLE_QPSG]]
    SQLITE_DBCONFIG_ENABLE_QPSG
    +**
    ^(The SQLITE_DBCONFIG_ENABLE_QPSG option activates or deactivates +** the [query planner stability guarantee] (QPSG). When the QPSG is active, +** a single SQL query statement will always use the same algorithm regardless +** of values of [bound parameters].)^ The QPSG disables some query optimizations +** that look at the values of bound parameters, which can make some queries +** slower. But the QPSG has the advantage of more predictable behavior. With +** the QPSG active, SQLite will always use the same query plan in the field as +** was used during testing in the lab. +** The first argument to this setting is an integer which is 0 to disable +** the QPSG, positive to enable QPSG, or negative to leave the setting +** unchanged. The second parameter is a pointer to an integer into which +** is written 0 or 1 to indicate whether the QPSG is disabled or enabled +** following this call. +**
    +** +** [[SQLITE_DBCONFIG_TRIGGER_EQP]]
    SQLITE_DBCONFIG_TRIGGER_EQP
    +**
    By default, the output of EXPLAIN QUERY PLAN commands does not +** include output for any operations performed by trigger programs. This +** option is used to set or clear (the default) a flag that governs this +** behavior. The first parameter passed to this operation is an integer - +** positive to enable output for trigger programs, or zero to disable it, +** or negative to leave the setting unchanged. +** The second parameter is a pointer to an integer into which is written +** 0 or 1 to indicate whether output-for-triggers has been disabled - 0 if +** it is not disabled, 1 if it is. +**
    +** +** [[SQLITE_DBCONFIG_RESET_DATABASE]]
    SQLITE_DBCONFIG_RESET_DATABASE
    +**
    Set the SQLITE_DBCONFIG_RESET_DATABASE flag and then run +** [VACUUM] in order to reset a database back to an empty database +** with no schema and no content. The following process works even for +** a badly corrupted database file: +**
      +**
    1. If the database connection is newly opened, make sure it has read the +** database schema by preparing then discarding some query against the +** database, or calling sqlite3_table_column_metadata(), ignoring any +** errors. This step is only necessary if the application desires to keep +** the database in WAL mode after the reset if it was in WAL mode before +** the reset. +**
    2. sqlite3_db_config(db, SQLITE_DBCONFIG_RESET_DATABASE, 1, 0); +**
    3. [sqlite3_exec](db, "[VACUUM]", 0, 0, 0); +**
    4. sqlite3_db_config(db, SQLITE_DBCONFIG_RESET_DATABASE, 0, 0); +**
    +** Because resetting a database is destructive and irreversible, the +** process requires the use of this obscure API and multiple steps to help +** ensure that it does not happen by accident. +** +** [[SQLITE_DBCONFIG_DEFENSIVE]]
    SQLITE_DBCONFIG_DEFENSIVE
    +**
    The SQLITE_DBCONFIG_DEFENSIVE option activates or deactivates the +** "defensive" flag for a database connection. When the defensive +** flag is enabled, language features that allow ordinary SQL to +** deliberately corrupt the database file are disabled. The disabled +** features include but are not limited to the following: +**
      +**
    • The [PRAGMA writable_schema=ON] statement. +**
    • The [PRAGMA journal_mode=OFF] statement. +**
    • Writes to the [sqlite_dbpage] virtual table. +**
    • Direct writes to [shadow tables]. +**
    +**
    +** +** [[SQLITE_DBCONFIG_WRITABLE_SCHEMA]]
    SQLITE_DBCONFIG_WRITABLE_SCHEMA
    +**
    The SQLITE_DBCONFIG_WRITABLE_SCHEMA option activates or deactivates the +** "writable_schema" flag. This has the same effect and is logically equivalent +** to setting [PRAGMA writable_schema=ON] or [PRAGMA writable_schema=OFF]. +** The first argument to this setting is an integer which is 0 to disable +** the writable_schema, positive to enable writable_schema, or negative to +** leave the setting unchanged. The second parameter is a pointer to an +** integer into which is written 0 or 1 to indicate whether the writable_schema +** is enabled or disabled following this call. +**
    +** +** [[SQLITE_DBCONFIG_LEGACY_ALTER_TABLE]] +**
    SQLITE_DBCONFIG_LEGACY_ALTER_TABLE
    +**
    The SQLITE_DBCONFIG_LEGACY_ALTER_TABLE option activates or deactivates +** the legacy behavior of the [ALTER TABLE RENAME] command such it +** behaves as it did prior to [version 3.24.0] (2018-06-04). See the +** "Compatibility Notice" on the [ALTER TABLE RENAME documentation] for +** additional information. This feature can also be turned on and off +** using the [PRAGMA legacy_alter_table] statement. +**
    +** +** [[SQLITE_DBCONFIG_DQS_DML]] +**
    SQLITE_DBCONFIG_DQS_DML +**
    The SQLITE_DBCONFIG_DQS_DML option activates or deactivates +** the legacy [double-quoted string literal] misfeature for DML statements +** only, that is DELETE, INSERT, SELECT, and UPDATE statements. The +** default value of this setting is determined by the [-DSQLITE_DQS] +** compile-time option. +**
    +** +** [[SQLITE_DBCONFIG_DQS_DDL]] +**
    SQLITE_DBCONFIG_DQS_DDL +**
    The SQLITE_DBCONFIG_DQS option activates or deactivates +** the legacy [double-quoted string literal] misfeature for DDL statements, +** such as CREATE TABLE and CREATE INDEX. The +** default value of this setting is determined by the [-DSQLITE_DQS] +** compile-time option. +**
    +** +** [[SQLITE_DBCONFIG_TRUSTED_SCHEMA]] +**
    SQLITE_DBCONFIG_TRUSTED_SCHEMA +**
    The SQLITE_DBCONFIG_TRUSTED_SCHEMA option tells SQLite to +** assume that database schemas are untainted by malicious content. +** When the SQLITE_DBCONFIG_TRUSTED_SCHEMA option is disabled, SQLite +** takes additional defensive steps to protect the application from harm +** including: +**
      +**
    • Prohibit the use of SQL functions inside triggers, views, +** CHECK constraints, DEFAULT clauses, expression indexes, +** partial indexes, or generated columns +** unless those functions are tagged with [SQLITE_INNOCUOUS]. +**
    • Prohibit the use of virtual tables inside of triggers or views +** unless those virtual tables are tagged with [SQLITE_VTAB_INNOCUOUS]. +**
    +** This setting defaults to "on" for legacy compatibility, however +** all applications are advised to turn it off if possible. This setting +** can also be controlled using the [PRAGMA trusted_schema] statement. +**
    +** +** [[SQLITE_DBCONFIG_LEGACY_FILE_FORMAT]] +**
    SQLITE_DBCONFIG_LEGACY_FILE_FORMAT +**
    The SQLITE_DBCONFIG_LEGACY_FILE_FORMAT option activates or deactivates +** the legacy file format flag. When activated, this flag causes all newly +** created database file to have a schema format version number (the 4-byte +** integer found at offset 44 into the database header) of 1. This in turn +** means that the resulting database file will be readable and writable by +** any SQLite version back to 3.0.0 ([dateof:3.0.0]). Without this setting, +** newly created databases are generally not understandable by SQLite versions +** prior to 3.3.0 ([dateof:3.3.0]). As these words are written, there +** is now scarcely any need to generated database files that are compatible +** all the way back to version 3.0.0, and so this setting is of little +** practical use, but is provided so that SQLite can continue to claim the +** ability to generate new database files that are compatible with version +** 3.0.0. +**

    Note that when the SQLITE_DBCONFIG_LEGACY_FILE_FORMAT setting is on, +** the [VACUUM] command will fail with an obscure error when attempting to +** process a table with generated columns and a descending index. This is +** not considered a bug since SQLite versions 3.3.0 and earlier do not support +** either generated columns or decending indexes. +**

    +**
    +*/ +#define SQLITE_DBCONFIG_MAINDBNAME 1000 /* const char* */ +#define SQLITE_DBCONFIG_LOOKASIDE 1001 /* void* int int */ +#define SQLITE_DBCONFIG_ENABLE_FKEY 1002 /* int int* */ +#define SQLITE_DBCONFIG_ENABLE_TRIGGER 1003 /* int int* */ +#define SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER 1004 /* int int* */ +#define SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION 1005 /* int int* */ +#define SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE 1006 /* int int* */ +#define SQLITE_DBCONFIG_ENABLE_QPSG 1007 /* int int* */ +#define SQLITE_DBCONFIG_TRIGGER_EQP 1008 /* int int* */ +#define SQLITE_DBCONFIG_RESET_DATABASE 1009 /* int int* */ +#define SQLITE_DBCONFIG_DEFENSIVE 1010 /* int int* */ +#define SQLITE_DBCONFIG_WRITABLE_SCHEMA 1011 /* int int* */ +#define SQLITE_DBCONFIG_LEGACY_ALTER_TABLE 1012 /* int int* */ +#define SQLITE_DBCONFIG_DQS_DML 1013 /* int int* */ +#define SQLITE_DBCONFIG_DQS_DDL 1014 /* int int* */ +#define SQLITE_DBCONFIG_ENABLE_VIEW 1015 /* int int* */ +#define SQLITE_DBCONFIG_LEGACY_FILE_FORMAT 1016 /* int int* */ +#define SQLITE_DBCONFIG_TRUSTED_SCHEMA 1017 /* int int* */ +#define SQLITE_DBCONFIG_MAX 1017 /* Largest DBCONFIG */ + +/* +** CAPI3REF: Enable Or Disable Extended Result Codes +** METHOD: sqlite3 +** +** ^The sqlite3_extended_result_codes() routine enables or disables the +** [extended result codes] feature of SQLite. ^The extended result +** codes are disabled by default for historical compatibility. +*/ +SQLITE_API int sqlite3_extended_result_codes(sqlite3*, int onoff); + +/* +** CAPI3REF: Last Insert Rowid +** METHOD: sqlite3 +** +** ^Each entry in most SQLite tables (except for [WITHOUT ROWID] tables) +** has a unique 64-bit signed +** integer key called the [ROWID | "rowid"]. ^The rowid is always available +** as an undeclared column named ROWID, OID, or _ROWID_ as long as those +** names are not also used by explicitly declared columns. ^If +** the table has a column of type [INTEGER PRIMARY KEY] then that column +** is another alias for the rowid. +** +** ^The sqlite3_last_insert_rowid(D) interface usually returns the [rowid] of +** the most recent successful [INSERT] into a rowid table or [virtual table] +** on database connection D. ^Inserts into [WITHOUT ROWID] tables are not +** recorded. ^If no successful [INSERT]s into rowid tables have ever occurred +** on the database connection D, then sqlite3_last_insert_rowid(D) returns +** zero. +** +** As well as being set automatically as rows are inserted into database +** tables, the value returned by this function may be set explicitly by +** [sqlite3_set_last_insert_rowid()] +** +** Some virtual table implementations may INSERT rows into rowid tables as +** part of committing a transaction (e.g. to flush data accumulated in memory +** to disk). In this case subsequent calls to this function return the rowid +** associated with these internal INSERT operations, which leads to +** unintuitive results. Virtual table implementations that do write to rowid +** tables in this way can avoid this problem by restoring the original +** rowid value using [sqlite3_set_last_insert_rowid()] before returning +** control to the user. +** +** ^(If an [INSERT] occurs within a trigger then this routine will +** return the [rowid] of the inserted row as long as the trigger is +** running. Once the trigger program ends, the value returned +** by this routine reverts to what it was before the trigger was fired.)^ +** +** ^An [INSERT] that fails due to a constraint violation is not a +** successful [INSERT] and does not change the value returned by this +** routine. ^Thus INSERT OR FAIL, INSERT OR IGNORE, INSERT OR ROLLBACK, +** and INSERT OR ABORT make no changes to the return value of this +** routine when their insertion fails. ^(When INSERT OR REPLACE +** encounters a constraint violation, it does not fail. The +** INSERT continues to completion after deleting rows that caused +** the constraint problem so INSERT OR REPLACE will always change +** the return value of this interface.)^ +** +** ^For the purposes of this routine, an [INSERT] is considered to +** be successful even if it is subsequently rolled back. +** +** This function is accessible to SQL statements via the +** [last_insert_rowid() SQL function]. +** +** If a separate thread performs a new [INSERT] on the same +** database connection while the [sqlite3_last_insert_rowid()] +** function is running and thus changes the last insert [rowid], +** then the value returned by [sqlite3_last_insert_rowid()] is +** unpredictable and might not equal either the old or the new +** last insert [rowid]. +*/ +SQLITE_API sqlite3_int64 sqlite3_last_insert_rowid(sqlite3*); + +/* +** CAPI3REF: Set the Last Insert Rowid value. +** METHOD: sqlite3 +** +** The sqlite3_set_last_insert_rowid(D, R) method allows the application to +** set the value returned by calling sqlite3_last_insert_rowid(D) to R +** without inserting a row into the database. +*/ +SQLITE_API void sqlite3_set_last_insert_rowid(sqlite3*,sqlite3_int64); + +/* +** CAPI3REF: Count The Number Of Rows Modified +** METHOD: sqlite3 +** +** ^These functions return the number of rows modified, inserted or +** deleted by the most recently completed INSERT, UPDATE or DELETE +** statement on the database connection specified by the only parameter. +** The two functions are identical except for the type of the return value +** and that if the number of rows modified by the most recent INSERT, UPDATE +** or DELETE is greater than the maximum value supported by type "int", then +** the return value of sqlite3_changes() is undefined. ^Executing any other +** type of SQL statement does not modify the value returned by these functions. +** +** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are +** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], +** [foreign key actions] or [REPLACE] constraint resolution are not counted. +** +** Changes to a view that are intercepted by +** [INSTEAD OF trigger | INSTEAD OF triggers] are not counted. ^The value +** returned by sqlite3_changes() immediately after an INSERT, UPDATE or +** DELETE statement run on a view is always zero. Only changes made to real +** tables are counted. +** +** Things are more complicated if the sqlite3_changes() function is +** executed while a trigger program is running. This may happen if the +** program uses the [changes() SQL function], or if some other callback +** function invokes sqlite3_changes() directly. Essentially: +** +**
      +**
    • ^(Before entering a trigger program the value returned by +** sqlite3_changes() function is saved. After the trigger program +** has finished, the original value is restored.)^ +** +**
    • ^(Within a trigger program each INSERT, UPDATE and DELETE +** statement sets the value returned by sqlite3_changes() +** upon completion as normal. Of course, this value will not include +** any changes performed by sub-triggers, as the sqlite3_changes() +** value will be saved and restored after each sub-trigger has run.)^ +**
    +** +** ^This means that if the changes() SQL function (or similar) is used +** by the first INSERT, UPDATE or DELETE statement within a trigger, it +** returns the value as set when the calling statement began executing. +** ^If it is used by the second or subsequent such statement within a trigger +** program, the value returned reflects the number of rows modified by the +** previous INSERT, UPDATE or DELETE statement within the same trigger. +** +** If a separate thread makes changes on the same database connection +** while [sqlite3_changes()] is running then the value returned +** is unpredictable and not meaningful. +** +** See also: +**
      +**
    • the [sqlite3_total_changes()] interface +**
    • the [count_changes pragma] +**
    • the [changes() SQL function] +**
    • the [data_version pragma] +**
    +*/ +SQLITE_API int sqlite3_changes(sqlite3*); +SQLITE_API sqlite3_int64 sqlite3_changes64(sqlite3*); + +/* +** CAPI3REF: Total Number Of Rows Modified +** METHOD: sqlite3 +** +** ^These functions return the total number of rows inserted, modified or +** deleted by all [INSERT], [UPDATE] or [DELETE] statements completed +** since the database connection was opened, including those executed as +** part of trigger programs. The two functions are identical except for the +** type of the return value and that if the number of rows modified by the +** connection exceeds the maximum value supported by type "int", then +** the return value of sqlite3_total_changes() is undefined. ^Executing +** any other type of SQL statement does not affect the value returned by +** sqlite3_total_changes(). +** +** ^Changes made as part of [foreign key actions] are included in the +** count, but those made as part of REPLACE constraint resolution are +** not. ^Changes to a view that are intercepted by INSTEAD OF triggers +** are not counted. +** +** The [sqlite3_total_changes(D)] interface only reports the number +** of rows that changed due to SQL statement run against database +** connection D. Any changes by other database connections are ignored. +** To detect changes against a database file from other database +** connections use the [PRAGMA data_version] command or the +** [SQLITE_FCNTL_DATA_VERSION] [file control]. +** +** If a separate thread makes changes on the same database connection +** while [sqlite3_total_changes()] is running then the value +** returned is unpredictable and not meaningful. +** +** See also: +**
      +**
    • the [sqlite3_changes()] interface +**
    • the [count_changes pragma] +**
    • the [changes() SQL function] +**
    • the [data_version pragma] +**
    • the [SQLITE_FCNTL_DATA_VERSION] [file control] +**
    +*/ +SQLITE_API int sqlite3_total_changes(sqlite3*); +SQLITE_API sqlite3_int64 sqlite3_total_changes64(sqlite3*); + +/* +** CAPI3REF: Interrupt A Long-Running Query +** METHOD: sqlite3 +** +** ^This function causes any pending database operation to abort and +** return at its earliest opportunity. This routine is typically +** called in response to a user action such as pressing "Cancel" +** or Ctrl-C where the user wants a long query operation to halt +** immediately. +** +** ^It is safe to call this routine from a thread different from the +** thread that is currently running the database operation. But it +** is not safe to call this routine with a [database connection] that +** is closed or might close before sqlite3_interrupt() returns. +** +** ^If an SQL operation is very nearly finished at the time when +** sqlite3_interrupt() is called, then it might not have an opportunity +** to be interrupted and might continue to completion. +** +** ^An SQL operation that is interrupted will return [SQLITE_INTERRUPT]. +** ^If the interrupted SQL operation is an INSERT, UPDATE, or DELETE +** that is inside an explicit transaction, then the entire transaction +** will be rolled back automatically. +** +** ^The sqlite3_interrupt(D) call is in effect until all currently running +** SQL statements on [database connection] D complete. ^Any new SQL statements +** that are started after the sqlite3_interrupt() call and before the +** running statement count reaches zero are interrupted as if they had been +** running prior to the sqlite3_interrupt() call. ^New SQL statements +** that are started after the running statement count reaches zero are +** not effected by the sqlite3_interrupt(). +** ^A call to sqlite3_interrupt(D) that occurs when there are no running +** SQL statements is a no-op and has no effect on SQL statements +** that are started after the sqlite3_interrupt() call returns. +*/ +SQLITE_API void sqlite3_interrupt(sqlite3*); + +/* +** CAPI3REF: Determine If An SQL Statement Is Complete +** +** These routines are useful during command-line input to determine if the +** currently entered text seems to form a complete SQL statement or +** if additional input is needed before sending the text into +** SQLite for parsing. ^These routines return 1 if the input string +** appears to be a complete SQL statement. ^A statement is judged to be +** complete if it ends with a semicolon token and is not a prefix of a +** well-formed CREATE TRIGGER statement. ^Semicolons that are embedded within +** string literals or quoted identifier names or comments are not +** independent tokens (they are part of the token in which they are +** embedded) and thus do not count as a statement terminator. ^Whitespace +** and comments that follow the final semicolon are ignored. +** +** ^These routines return 0 if the statement is incomplete. ^If a +** memory allocation fails, then SQLITE_NOMEM is returned. +** +** ^These routines do not parse the SQL statements thus +** will not detect syntactically incorrect SQL. +** +** ^(If SQLite has not been initialized using [sqlite3_initialize()] prior +** to invoking sqlite3_complete16() then sqlite3_initialize() is invoked +** automatically by sqlite3_complete16(). If that initialization fails, +** then the return value from sqlite3_complete16() will be non-zero +** regardless of whether or not the input SQL is complete.)^ +** +** The input to [sqlite3_complete()] must be a zero-terminated +** UTF-8 string. +** +** The input to [sqlite3_complete16()] must be a zero-terminated +** UTF-16 string in native byte order. +*/ +SQLITE_API int sqlite3_complete(const char *sql); +SQLITE_API int sqlite3_complete16(const void *sql); + +/* +** CAPI3REF: Register A Callback To Handle SQLITE_BUSY Errors +** KEYWORDS: {busy-handler callback} {busy handler} +** METHOD: sqlite3 +** +** ^The sqlite3_busy_handler(D,X,P) routine sets a callback function X +** that might be invoked with argument P whenever +** an attempt is made to access a database table associated with +** [database connection] D when another thread +** or process has the table locked. +** The sqlite3_busy_handler() interface is used to implement +** [sqlite3_busy_timeout()] and [PRAGMA busy_timeout]. +** +** ^If the busy callback is NULL, then [SQLITE_BUSY] +** is returned immediately upon encountering the lock. ^If the busy callback +** is not NULL, then the callback might be invoked with two arguments. +** +** ^The first argument to the busy handler is a copy of the void* pointer which +** is the third argument to sqlite3_busy_handler(). ^The second argument to +** the busy handler callback is the number of times that the busy handler has +** been invoked previously for the same locking event. ^If the +** busy callback returns 0, then no additional attempts are made to +** access the database and [SQLITE_BUSY] is returned +** to the application. +** ^If the callback returns non-zero, then another attempt +** is made to access the database and the cycle repeats. +** +** The presence of a busy handler does not guarantee that it will be invoked +** when there is lock contention. ^If SQLite determines that invoking the busy +** handler could result in a deadlock, it will go ahead and return [SQLITE_BUSY] +** to the application instead of invoking the +** busy handler. +** Consider a scenario where one process is holding a read lock that +** it is trying to promote to a reserved lock and +** a second process is holding a reserved lock that it is trying +** to promote to an exclusive lock. The first process cannot proceed +** because it is blocked by the second and the second process cannot +** proceed because it is blocked by the first. If both processes +** invoke the busy handlers, neither will make any progress. Therefore, +** SQLite returns [SQLITE_BUSY] for the first process, hoping that this +** will induce the first process to release its read lock and allow +** the second process to proceed. +** +** ^The default busy callback is NULL. +** +** ^(There can only be a single busy handler defined for each +** [database connection]. Setting a new busy handler clears any +** previously set handler.)^ ^Note that calling [sqlite3_busy_timeout()] +** or evaluating [PRAGMA busy_timeout=N] will change the +** busy handler and thus clear any previously set busy handler. +** +** The busy callback should not take any actions which modify the +** database connection that invoked the busy handler. In other words, +** the busy handler is not reentrant. Any such actions +** result in undefined behavior. +** +** A busy handler must not close the database connection +** or [prepared statement] that invoked the busy handler. +*/ +SQLITE_API int sqlite3_busy_handler(sqlite3*,int(*)(void*,int),void*); + +/* +** CAPI3REF: Set A Busy Timeout +** METHOD: sqlite3 +** +** ^This routine sets a [sqlite3_busy_handler | busy handler] that sleeps +** for a specified amount of time when a table is locked. ^The handler +** will sleep multiple times until at least "ms" milliseconds of sleeping +** have accumulated. ^After at least "ms" milliseconds of sleeping, +** the handler returns 0 which causes [sqlite3_step()] to return +** [SQLITE_BUSY]. +** +** ^Calling this routine with an argument less than or equal to zero +** turns off all busy handlers. +** +** ^(There can only be a single busy handler for a particular +** [database connection] at any given moment. If another busy handler +** was defined (using [sqlite3_busy_handler()]) prior to calling +** this routine, that other busy handler is cleared.)^ +** +** See also: [PRAGMA busy_timeout] +*/ +SQLITE_API int sqlite3_busy_timeout(sqlite3*, int ms); + +/* +** CAPI3REF: Convenience Routines For Running Queries +** METHOD: sqlite3 +** +** This is a legacy interface that is preserved for backwards compatibility. +** Use of this interface is not recommended. +** +** Definition: A result table is memory data structure created by the +** [sqlite3_get_table()] interface. A result table records the +** complete query results from one or more queries. +** +** The table conceptually has a number of rows and columns. But +** these numbers are not part of the result table itself. These +** numbers are obtained separately. Let N be the number of rows +** and M be the number of columns. +** +** A result table is an array of pointers to zero-terminated UTF-8 strings. +** There are (N+1)*M elements in the array. The first M pointers point +** to zero-terminated strings that contain the names of the columns. +** The remaining entries all point to query results. NULL values result +** in NULL pointers. All other values are in their UTF-8 zero-terminated +** string representation as returned by [sqlite3_column_text()]. +** +** A result table might consist of one or more memory allocations. +** It is not safe to pass a result table directly to [sqlite3_free()]. +** A result table should be deallocated using [sqlite3_free_table()]. +** +** ^(As an example of the result table format, suppose a query result +** is as follows: +** +**
    +**        Name        | Age
    +**        -----------------------
    +**        Alice       | 43
    +**        Bob         | 28
    +**        Cindy       | 21
    +** 
    +** +** There are two columns (M==2) and three rows (N==3). Thus the +** result table has 8 entries. Suppose the result table is stored +** in an array named azResult. Then azResult holds this content: +** +**
    +**        azResult[0] = "Name";
    +**        azResult[1] = "Age";
    +**        azResult[2] = "Alice";
    +**        azResult[3] = "43";
    +**        azResult[4] = "Bob";
    +**        azResult[5] = "28";
    +**        azResult[6] = "Cindy";
    +**        azResult[7] = "21";
    +** 
    )^ +** +** ^The sqlite3_get_table() function evaluates one or more +** semicolon-separated SQL statements in the zero-terminated UTF-8 +** string of its 2nd parameter and returns a result table to the +** pointer given in its 3rd parameter. +** +** After the application has finished with the result from sqlite3_get_table(), +** it must pass the result table pointer to sqlite3_free_table() in order to +** release the memory that was malloced. Because of the way the +** [sqlite3_malloc()] happens within sqlite3_get_table(), the calling +** function must not try to call [sqlite3_free()] directly. Only +** [sqlite3_free_table()] is able to release the memory properly and safely. +** +** The sqlite3_get_table() interface is implemented as a wrapper around +** [sqlite3_exec()]. The sqlite3_get_table() routine does not have access +** to any internal data structures of SQLite. It uses only the public +** interface defined here. As a consequence, errors that occur in the +** wrapper layer outside of the internal [sqlite3_exec()] call are not +** reflected in subsequent calls to [sqlite3_errcode()] or +** [sqlite3_errmsg()]. +*/ +SQLITE_API int sqlite3_get_table( + sqlite3 *db, /* An open database */ + const char *zSql, /* SQL to be evaluated */ + char ***pazResult, /* Results of the query */ + int *pnRow, /* Number of result rows written here */ + int *pnColumn, /* Number of result columns written here */ + char **pzErrmsg /* Error msg written here */ +); +SQLITE_API void sqlite3_free_table(char **result); + +/* +** CAPI3REF: Formatted String Printing Functions +** +** These routines are work-alikes of the "printf()" family of functions +** from the standard C library. +** These routines understand most of the common formatting options from +** the standard library printf() +** plus some additional non-standard formats ([%q], [%Q], [%w], and [%z]). +** See the [built-in printf()] documentation for details. +** +** ^The sqlite3_mprintf() and sqlite3_vmprintf() routines write their +** results into memory obtained from [sqlite3_malloc64()]. +** The strings returned by these two routines should be +** released by [sqlite3_free()]. ^Both routines return a +** NULL pointer if [sqlite3_malloc64()] is unable to allocate enough +** memory to hold the resulting string. +** +** ^(The sqlite3_snprintf() routine is similar to "snprintf()" from +** the standard C library. The result is written into the +** buffer supplied as the second parameter whose size is given by +** the first parameter. Note that the order of the +** first two parameters is reversed from snprintf().)^ This is an +** historical accident that cannot be fixed without breaking +** backwards compatibility. ^(Note also that sqlite3_snprintf() +** returns a pointer to its buffer instead of the number of +** characters actually written into the buffer.)^ We admit that +** the number of characters written would be a more useful return +** value but we cannot change the implementation of sqlite3_snprintf() +** now without breaking compatibility. +** +** ^As long as the buffer size is greater than zero, sqlite3_snprintf() +** guarantees that the buffer is always zero-terminated. ^The first +** parameter "n" is the total size of the buffer, including space for +** the zero terminator. So the longest string that can be completely +** written will be n-1 characters. +** +** ^The sqlite3_vsnprintf() routine is a varargs version of sqlite3_snprintf(). +** +** See also: [built-in printf()], [printf() SQL function] +*/ +SQLITE_API char *sqlite3_mprintf(const char*,...); +SQLITE_API char *sqlite3_vmprintf(const char*, va_list); +SQLITE_API char *sqlite3_snprintf(int,char*,const char*, ...); +SQLITE_API char *sqlite3_vsnprintf(int,char*,const char*, va_list); + +/* +** CAPI3REF: Memory Allocation Subsystem +** +** The SQLite core uses these three routines for all of its own +** internal memory allocation needs. "Core" in the previous sentence +** does not include operating-system specific [VFS] implementation. The +** Windows VFS uses native malloc() and free() for some operations. +** +** ^The sqlite3_malloc() routine returns a pointer to a block +** of memory at least N bytes in length, where N is the parameter. +** ^If sqlite3_malloc() is unable to obtain sufficient free +** memory, it returns a NULL pointer. ^If the parameter N to +** sqlite3_malloc() is zero or negative then sqlite3_malloc() returns +** a NULL pointer. +** +** ^The sqlite3_malloc64(N) routine works just like +** sqlite3_malloc(N) except that N is an unsigned 64-bit integer instead +** of a signed 32-bit integer. +** +** ^Calling sqlite3_free() with a pointer previously returned +** by sqlite3_malloc() or sqlite3_realloc() releases that memory so +** that it might be reused. ^The sqlite3_free() routine is +** a no-op if is called with a NULL pointer. Passing a NULL pointer +** to sqlite3_free() is harmless. After being freed, memory +** should neither be read nor written. Even reading previously freed +** memory might result in a segmentation fault or other severe error. +** Memory corruption, a segmentation fault, or other severe error +** might result if sqlite3_free() is called with a non-NULL pointer that +** was not obtained from sqlite3_malloc() or sqlite3_realloc(). +** +** ^The sqlite3_realloc(X,N) interface attempts to resize a +** prior memory allocation X to be at least N bytes. +** ^If the X parameter to sqlite3_realloc(X,N) +** is a NULL pointer then its behavior is identical to calling +** sqlite3_malloc(N). +** ^If the N parameter to sqlite3_realloc(X,N) is zero or +** negative then the behavior is exactly the same as calling +** sqlite3_free(X). +** ^sqlite3_realloc(X,N) returns a pointer to a memory allocation +** of at least N bytes in size or NULL if insufficient memory is available. +** ^If M is the size of the prior allocation, then min(N,M) bytes +** of the prior allocation are copied into the beginning of buffer returned +** by sqlite3_realloc(X,N) and the prior allocation is freed. +** ^If sqlite3_realloc(X,N) returns NULL and N is positive, then the +** prior allocation is not freed. +** +** ^The sqlite3_realloc64(X,N) interfaces works the same as +** sqlite3_realloc(X,N) except that N is a 64-bit unsigned integer instead +** of a 32-bit signed integer. +** +** ^If X is a memory allocation previously obtained from sqlite3_malloc(), +** sqlite3_malloc64(), sqlite3_realloc(), or sqlite3_realloc64(), then +** sqlite3_msize(X) returns the size of that memory allocation in bytes. +** ^The value returned by sqlite3_msize(X) might be larger than the number +** of bytes requested when X was allocated. ^If X is a NULL pointer then +** sqlite3_msize(X) returns zero. If X points to something that is not +** the beginning of memory allocation, or if it points to a formerly +** valid memory allocation that has now been freed, then the behavior +** of sqlite3_msize(X) is undefined and possibly harmful. +** +** ^The memory returned by sqlite3_malloc(), sqlite3_realloc(), +** sqlite3_malloc64(), and sqlite3_realloc64() +** is always aligned to at least an 8 byte boundary, or to a +** 4 byte boundary if the [SQLITE_4_BYTE_ALIGNED_MALLOC] compile-time +** option is used. +** +** The pointer arguments to [sqlite3_free()] and [sqlite3_realloc()] +** must be either NULL or else pointers obtained from a prior +** invocation of [sqlite3_malloc()] or [sqlite3_realloc()] that have +** not yet been released. +** +** The application must not read or write any part of +** a block of memory after it has been released using +** [sqlite3_free()] or [sqlite3_realloc()]. +*/ +SQLITE_API void *sqlite3_malloc(int); +SQLITE_API void *sqlite3_malloc64(sqlite3_uint64); +SQLITE_API void *sqlite3_realloc(void*, int); +SQLITE_API void *sqlite3_realloc64(void*, sqlite3_uint64); +SQLITE_API void sqlite3_free(void*); +SQLITE_API sqlite3_uint64 sqlite3_msize(void*); + +/* +** CAPI3REF: Memory Allocator Statistics +** +** SQLite provides these two interfaces for reporting on the status +** of the [sqlite3_malloc()], [sqlite3_free()], and [sqlite3_realloc()] +** routines, which form the built-in memory allocation subsystem. +** +** ^The [sqlite3_memory_used()] routine returns the number of bytes +** of memory currently outstanding (malloced but not freed). +** ^The [sqlite3_memory_highwater()] routine returns the maximum +** value of [sqlite3_memory_used()] since the high-water mark +** was last reset. ^The values returned by [sqlite3_memory_used()] and +** [sqlite3_memory_highwater()] include any overhead +** added by SQLite in its implementation of [sqlite3_malloc()], +** but not overhead added by the any underlying system library +** routines that [sqlite3_malloc()] may call. +** +** ^The memory high-water mark is reset to the current value of +** [sqlite3_memory_used()] if and only if the parameter to +** [sqlite3_memory_highwater()] is true. ^The value returned +** by [sqlite3_memory_highwater(1)] is the high-water mark +** prior to the reset. +*/ +SQLITE_API sqlite3_int64 sqlite3_memory_used(void); +SQLITE_API sqlite3_int64 sqlite3_memory_highwater(int resetFlag); + +/* +** CAPI3REF: Pseudo-Random Number Generator +** +** SQLite contains a high-quality pseudo-random number generator (PRNG) used to +** select random [ROWID | ROWIDs] when inserting new records into a table that +** already uses the largest possible [ROWID]. The PRNG is also used for +** the built-in random() and randomblob() SQL functions. This interface allows +** applications to access the same PRNG for other purposes. +** +** ^A call to this routine stores N bytes of randomness into buffer P. +** ^The P parameter can be a NULL pointer. +** +** ^If this routine has not been previously called or if the previous +** call had N less than one or a NULL pointer for P, then the PRNG is +** seeded using randomness obtained from the xRandomness method of +** the default [sqlite3_vfs] object. +** ^If the previous call to this routine had an N of 1 or more and a +** non-NULL P then the pseudo-randomness is generated +** internally and without recourse to the [sqlite3_vfs] xRandomness +** method. +*/ +SQLITE_API void sqlite3_randomness(int N, void *P); + +/* +** CAPI3REF: Compile-Time Authorization Callbacks +** METHOD: sqlite3 +** KEYWORDS: {authorizer callback} +** +** ^This routine registers an authorizer callback with a particular +** [database connection], supplied in the first argument. +** ^The authorizer callback is invoked as SQL statements are being compiled +** by [sqlite3_prepare()] or its variants [sqlite3_prepare_v2()], +** [sqlite3_prepare_v3()], [sqlite3_prepare16()], [sqlite3_prepare16_v2()], +** and [sqlite3_prepare16_v3()]. ^At various +** points during the compilation process, as logic is being created +** to perform various actions, the authorizer callback is invoked to +** see if those actions are allowed. ^The authorizer callback should +** return [SQLITE_OK] to allow the action, [SQLITE_IGNORE] to disallow the +** specific action but allow the SQL statement to continue to be +** compiled, or [SQLITE_DENY] to cause the entire SQL statement to be +** rejected with an error. ^If the authorizer callback returns +** any value other than [SQLITE_IGNORE], [SQLITE_OK], or [SQLITE_DENY] +** then the [sqlite3_prepare_v2()] or equivalent call that triggered +** the authorizer will fail with an error message. +** +** When the callback returns [SQLITE_OK], that means the operation +** requested is ok. ^When the callback returns [SQLITE_DENY], the +** [sqlite3_prepare_v2()] or equivalent call that triggered the +** authorizer will fail with an error message explaining that +** access is denied. +** +** ^The first parameter to the authorizer callback is a copy of the third +** parameter to the sqlite3_set_authorizer() interface. ^The second parameter +** to the callback is an integer [SQLITE_COPY | action code] that specifies +** the particular action to be authorized. ^The third through sixth parameters +** to the callback are either NULL pointers or zero-terminated strings +** that contain additional details about the action to be authorized. +** Applications must always be prepared to encounter a NULL pointer in any +** of the third through the sixth parameters of the authorization callback. +** +** ^If the action code is [SQLITE_READ] +** and the callback returns [SQLITE_IGNORE] then the +** [prepared statement] statement is constructed to substitute +** a NULL value in place of the table column that would have +** been read if [SQLITE_OK] had been returned. The [SQLITE_IGNORE] +** return can be used to deny an untrusted user access to individual +** columns of a table. +** ^When a table is referenced by a [SELECT] but no column values are +** extracted from that table (for example in a query like +** "SELECT count(*) FROM tab") then the [SQLITE_READ] authorizer callback +** is invoked once for that table with a column name that is an empty string. +** ^If the action code is [SQLITE_DELETE] and the callback returns +** [SQLITE_IGNORE] then the [DELETE] operation proceeds but the +** [truncate optimization] is disabled and all rows are deleted individually. +** +** An authorizer is used when [sqlite3_prepare | preparing] +** SQL statements from an untrusted source, to ensure that the SQL statements +** do not try to access data they are not allowed to see, or that they do not +** try to execute malicious statements that damage the database. For +** example, an application may allow a user to enter arbitrary +** SQL queries for evaluation by a database. But the application does +** not want the user to be able to make arbitrary changes to the +** database. An authorizer could then be put in place while the +** user-entered SQL is being [sqlite3_prepare | prepared] that +** disallows everything except [SELECT] statements. +** +** Applications that need to process SQL from untrusted sources +** might also consider lowering resource limits using [sqlite3_limit()] +** and limiting database size using the [max_page_count] [PRAGMA] +** in addition to using an authorizer. +** +** ^(Only a single authorizer can be in place on a database connection +** at a time. Each call to sqlite3_set_authorizer overrides the +** previous call.)^ ^Disable the authorizer by installing a NULL callback. +** The authorizer is disabled by default. +** +** The authorizer callback must not do anything that will modify +** the database connection that invoked the authorizer callback. +** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their +** database connections for the meaning of "modify" in this paragraph. +** +** ^When [sqlite3_prepare_v2()] is used to prepare a statement, the +** statement might be re-prepared during [sqlite3_step()] due to a +** schema change. Hence, the application should ensure that the +** correct authorizer callback remains in place during the [sqlite3_step()]. +** +** ^Note that the authorizer callback is invoked only during +** [sqlite3_prepare()] or its variants. Authorization is not +** performed during statement evaluation in [sqlite3_step()], unless +** as stated in the previous paragraph, sqlite3_step() invokes +** sqlite3_prepare_v2() to reprepare a statement after a schema change. +*/ +SQLITE_API int sqlite3_set_authorizer( + sqlite3*, + int (*xAuth)(void*,int,const char*,const char*,const char*,const char*), + void *pUserData +); + +/* +** CAPI3REF: Authorizer Return Codes +** +** The [sqlite3_set_authorizer | authorizer callback function] must +** return either [SQLITE_OK] or one of these two constants in order +** to signal SQLite whether or not the action is permitted. See the +** [sqlite3_set_authorizer | authorizer documentation] for additional +** information. +** +** Note that SQLITE_IGNORE is also used as a [conflict resolution mode] +** returned from the [sqlite3_vtab_on_conflict()] interface. +*/ +#define SQLITE_DENY 1 /* Abort the SQL statement with an error */ +#define SQLITE_IGNORE 2 /* Don't allow access, but don't generate an error */ + +/* +** CAPI3REF: Authorizer Action Codes +** +** The [sqlite3_set_authorizer()] interface registers a callback function +** that is invoked to authorize certain SQL statement actions. The +** second parameter to the callback is an integer code that specifies +** what action is being authorized. These are the integer action codes that +** the authorizer callback may be passed. +** +** These action code values signify what kind of operation is to be +** authorized. The 3rd and 4th parameters to the authorization +** callback function will be parameters or NULL depending on which of these +** codes is used as the second parameter. ^(The 5th parameter to the +** authorizer callback is the name of the database ("main", "temp", +** etc.) if applicable.)^ ^The 6th parameter to the authorizer callback +** is the name of the inner-most trigger or view that is responsible for +** the access attempt or NULL if this access attempt is directly from +** top-level SQL code. +*/ +/******************************************* 3rd ************ 4th ***********/ +#define SQLITE_CREATE_INDEX 1 /* Index Name Table Name */ +#define SQLITE_CREATE_TABLE 2 /* Table Name NULL */ +#define SQLITE_CREATE_TEMP_INDEX 3 /* Index Name Table Name */ +#define SQLITE_CREATE_TEMP_TABLE 4 /* Table Name NULL */ +#define SQLITE_CREATE_TEMP_TRIGGER 5 /* Trigger Name Table Name */ +#define SQLITE_CREATE_TEMP_VIEW 6 /* View Name NULL */ +#define SQLITE_CREATE_TRIGGER 7 /* Trigger Name Table Name */ +#define SQLITE_CREATE_VIEW 8 /* View Name NULL */ +#define SQLITE_DELETE 9 /* Table Name NULL */ +#define SQLITE_DROP_INDEX 10 /* Index Name Table Name */ +#define SQLITE_DROP_TABLE 11 /* Table Name NULL */ +#define SQLITE_DROP_TEMP_INDEX 12 /* Index Name Table Name */ +#define SQLITE_DROP_TEMP_TABLE 13 /* Table Name NULL */ +#define SQLITE_DROP_TEMP_TRIGGER 14 /* Trigger Name Table Name */ +#define SQLITE_DROP_TEMP_VIEW 15 /* View Name NULL */ +#define SQLITE_DROP_TRIGGER 16 /* Trigger Name Table Name */ +#define SQLITE_DROP_VIEW 17 /* View Name NULL */ +#define SQLITE_INSERT 18 /* Table Name NULL */ +#define SQLITE_PRAGMA 19 /* Pragma Name 1st arg or NULL */ +#define SQLITE_READ 20 /* Table Name Column Name */ +#define SQLITE_SELECT 21 /* NULL NULL */ +#define SQLITE_TRANSACTION 22 /* Operation NULL */ +#define SQLITE_UPDATE 23 /* Table Name Column Name */ +#define SQLITE_ATTACH 24 /* Filename NULL */ +#define SQLITE_DETACH 25 /* Database Name NULL */ +#define SQLITE_ALTER_TABLE 26 /* Database Name Table Name */ +#define SQLITE_REINDEX 27 /* Index Name NULL */ +#define SQLITE_ANALYZE 28 /* Table Name NULL */ +#define SQLITE_CREATE_VTABLE 29 /* Table Name Module Name */ +#define SQLITE_DROP_VTABLE 30 /* Table Name Module Name */ +#define SQLITE_FUNCTION 31 /* NULL Function Name */ +#define SQLITE_SAVEPOINT 32 /* Operation Savepoint Name */ +#define SQLITE_COPY 0 /* No longer used */ +#define SQLITE_RECURSIVE 33 /* NULL NULL */ + +/* +** CAPI3REF: Tracing And Profiling Functions +** METHOD: sqlite3 +** +** These routines are deprecated. Use the [sqlite3_trace_v2()] interface +** instead of the routines described here. +** +** These routines register callback functions that can be used for +** tracing and profiling the execution of SQL statements. +** +** ^The callback function registered by sqlite3_trace() is invoked at +** various times when an SQL statement is being run by [sqlite3_step()]. +** ^The sqlite3_trace() callback is invoked with a UTF-8 rendering of the +** SQL statement text as the statement first begins executing. +** ^(Additional sqlite3_trace() callbacks might occur +** as each triggered subprogram is entered. The callbacks for triggers +** contain a UTF-8 SQL comment that identifies the trigger.)^ +** +** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit +** the length of [bound parameter] expansion in the output of sqlite3_trace(). +** +** ^The callback function registered by sqlite3_profile() is invoked +** as each SQL statement finishes. ^The profile callback contains +** the original statement text and an estimate of wall-clock time +** of how long that statement took to run. ^The profile callback +** time is in units of nanoseconds, however the current implementation +** is only capable of millisecond resolution so the six least significant +** digits in the time are meaningless. Future versions of SQLite +** might provide greater resolution on the profiler callback. Invoking +** either [sqlite3_trace()] or [sqlite3_trace_v2()] will cancel the +** profile callback. +*/ +SQLITE_API SQLITE_DEPRECATED void *sqlite3_trace(sqlite3*, + void(*xTrace)(void*,const char*), void*); +SQLITE_API SQLITE_DEPRECATED void *sqlite3_profile(sqlite3*, + void(*xProfile)(void*,const char*,sqlite3_uint64), void*); + +/* +** CAPI3REF: SQL Trace Event Codes +** KEYWORDS: SQLITE_TRACE +** +** These constants identify classes of events that can be monitored +** using the [sqlite3_trace_v2()] tracing logic. The M argument +** to [sqlite3_trace_v2(D,M,X,P)] is an OR-ed combination of one or more of +** the following constants. ^The first argument to the trace callback +** is one of the following constants. +** +** New tracing constants may be added in future releases. +** +** ^A trace callback has four arguments: xCallback(T,C,P,X). +** ^The T argument is one of the integer type codes above. +** ^The C argument is a copy of the context pointer passed in as the +** fourth argument to [sqlite3_trace_v2()]. +** The P and X arguments are pointers whose meanings depend on T. +** +**
    +** [[SQLITE_TRACE_STMT]]
    SQLITE_TRACE_STMT
    +**
    ^An SQLITE_TRACE_STMT callback is invoked when a prepared statement +** first begins running and possibly at other times during the +** execution of the prepared statement, such as at the start of each +** trigger subprogram. ^The P argument is a pointer to the +** [prepared statement]. ^The X argument is a pointer to a string which +** is the unexpanded SQL text of the prepared statement or an SQL comment +** that indicates the invocation of a trigger. ^The callback can compute +** the same text that would have been returned by the legacy [sqlite3_trace()] +** interface by using the X argument when X begins with "--" and invoking +** [sqlite3_expanded_sql(P)] otherwise. +** +** [[SQLITE_TRACE_PROFILE]]
    SQLITE_TRACE_PROFILE
    +**
    ^An SQLITE_TRACE_PROFILE callback provides approximately the same +** information as is provided by the [sqlite3_profile()] callback. +** ^The P argument is a pointer to the [prepared statement] and the +** X argument points to a 64-bit integer which is the estimated of +** the number of nanosecond that the prepared statement took to run. +** ^The SQLITE_TRACE_PROFILE callback is invoked when the statement finishes. +** +** [[SQLITE_TRACE_ROW]]
    SQLITE_TRACE_ROW
    +**
    ^An SQLITE_TRACE_ROW callback is invoked whenever a prepared +** statement generates a single row of result. +** ^The P argument is a pointer to the [prepared statement] and the +** X argument is unused. +** +** [[SQLITE_TRACE_CLOSE]]
    SQLITE_TRACE_CLOSE
    +**
    ^An SQLITE_TRACE_CLOSE callback is invoked when a database +** connection closes. +** ^The P argument is a pointer to the [database connection] object +** and the X argument is unused. +**
    +*/ +#define SQLITE_TRACE_STMT 0x01 +#define SQLITE_TRACE_PROFILE 0x02 +#define SQLITE_TRACE_ROW 0x04 +#define SQLITE_TRACE_CLOSE 0x08 + +/* +** CAPI3REF: SQL Trace Hook +** METHOD: sqlite3 +** +** ^The sqlite3_trace_v2(D,M,X,P) interface registers a trace callback +** function X against [database connection] D, using property mask M +** and context pointer P. ^If the X callback is +** NULL or if the M mask is zero, then tracing is disabled. The +** M argument should be the bitwise OR-ed combination of +** zero or more [SQLITE_TRACE] constants. +** +** ^Each call to either sqlite3_trace() or sqlite3_trace_v2() overrides +** (cancels) any prior calls to sqlite3_trace() or sqlite3_trace_v2(). +** +** ^The X callback is invoked whenever any of the events identified by +** mask M occur. ^The integer return value from the callback is currently +** ignored, though this may change in future releases. Callback +** implementations should return zero to ensure future compatibility. +** +** ^A trace callback is invoked with four arguments: callback(T,C,P,X). +** ^The T argument is one of the [SQLITE_TRACE] +** constants to indicate why the callback was invoked. +** ^The C argument is a copy of the context pointer. +** The P and X arguments are pointers whose meanings depend on T. +** +** The sqlite3_trace_v2() interface is intended to replace the legacy +** interfaces [sqlite3_trace()] and [sqlite3_profile()], both of which +** are deprecated. +*/ +SQLITE_API int sqlite3_trace_v2( + sqlite3*, + unsigned uMask, + int(*xCallback)(unsigned,void*,void*,void*), + void *pCtx +); + +/* +** CAPI3REF: Query Progress Callbacks +** METHOD: sqlite3 +** +** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback +** function X to be invoked periodically during long running calls to +** [sqlite3_exec()], [sqlite3_step()] and [sqlite3_get_table()] for +** database connection D. An example use for this +** interface is to keep a GUI updated during a large query. +** +** ^The parameter P is passed through as the only parameter to the +** callback function X. ^The parameter N is the approximate number of +** [virtual machine instructions] that are evaluated between successive +** invocations of the callback X. ^If N is less than one then the progress +** handler is disabled. +** +** ^Only a single progress handler may be defined at one time per +** [database connection]; setting a new progress handler cancels the +** old one. ^Setting parameter X to NULL disables the progress handler. +** ^The progress handler is also disabled by setting N to a value less +** than 1. +** +** ^If the progress callback returns non-zero, the operation is +** interrupted. This feature can be used to implement a +** "Cancel" button on a GUI progress dialog box. +** +** The progress handler callback must not do anything that will modify +** the database connection that invoked the progress handler. +** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their +** database connections for the meaning of "modify" in this paragraph. +** +*/ +SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*); + +/* +** CAPI3REF: Opening A New Database Connection +** CONSTRUCTOR: sqlite3 +** +** ^These routines open an SQLite database file as specified by the +** filename argument. ^The filename argument is interpreted as UTF-8 for +** sqlite3_open() and sqlite3_open_v2() and as UTF-16 in the native byte +** order for sqlite3_open16(). ^(A [database connection] handle is usually +** returned in *ppDb, even if an error occurs. The only exception is that +** if SQLite is unable to allocate memory to hold the [sqlite3] object, +** a NULL will be written into *ppDb instead of a pointer to the [sqlite3] +** object.)^ ^(If the database is opened (and/or created) successfully, then +** [SQLITE_OK] is returned. Otherwise an [error code] is returned.)^ ^The +** [sqlite3_errmsg()] or [sqlite3_errmsg16()] routines can be used to obtain +** an English language description of the error following a failure of any +** of the sqlite3_open() routines. +** +** ^The default encoding will be UTF-8 for databases created using +** sqlite3_open() or sqlite3_open_v2(). ^The default encoding for databases +** created using sqlite3_open16() will be UTF-16 in the native byte order. +** +** Whether or not an error occurs when it is opened, resources +** associated with the [database connection] handle should be released by +** passing it to [sqlite3_close()] when it is no longer required. +** +** The sqlite3_open_v2() interface works like sqlite3_open() +** except that it accepts two additional parameters for additional control +** over the new database connection. ^(The flags parameter to +** sqlite3_open_v2() must include, at a minimum, one of the following +** three flag combinations:)^ +** +**
    +** ^(
    [SQLITE_OPEN_READONLY]
    +**
    The database is opened in read-only mode. If the database does not +** already exist, an error is returned.
    )^ +** +** ^(
    [SQLITE_OPEN_READWRITE]
    +**
    The database is opened for reading and writing if possible, or reading +** only if the file is write protected by the operating system. In either +** case the database must already exist, otherwise an error is returned.
    )^ +** +** ^(
    [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]
    +**
    The database is opened for reading and writing, and is created if +** it does not already exist. This is the behavior that is always used for +** sqlite3_open() and sqlite3_open16().
    )^ +**
    +** +** In addition to the required flags, the following optional flags are +** also supported: +** +**
    +** ^(
    [SQLITE_OPEN_URI]
    +**
    The filename can be interpreted as a URI if this flag is set.
    )^ +** +** ^(
    [SQLITE_OPEN_MEMORY]
    +**
    The database will be opened as an in-memory database. The database +** is named by the "filename" argument for the purposes of cache-sharing, +** if shared cache mode is enabled, but the "filename" is otherwise ignored. +**
    )^ +** +** ^(
    [SQLITE_OPEN_NOMUTEX]
    +**
    The new database connection will use the "multi-thread" +** [threading mode].)^ This means that separate threads are allowed +** to use SQLite at the same time, as long as each thread is using +** a different [database connection]. +** +** ^(
    [SQLITE_OPEN_FULLMUTEX]
    +**
    The new database connection will use the "serialized" +** [threading mode].)^ This means the multiple threads can safely +** attempt to use the same database connection at the same time. +** (Mutexes will block any actual concurrency, but in this mode +** there is no harm in trying.) +** +** ^(
    [SQLITE_OPEN_SHAREDCACHE]
    +**
    The database is opened [shared cache] enabled, overriding +** the default shared cache setting provided by +** [sqlite3_enable_shared_cache()].)^ +** +** ^(
    [SQLITE_OPEN_PRIVATECACHE]
    +**
    The database is opened [shared cache] disabled, overriding +** the default shared cache setting provided by +** [sqlite3_enable_shared_cache()].)^ +** +** [[OPEN_EXRESCODE]] ^(
    [SQLITE_OPEN_EXRESCODE]
    +**
    The database connection comes up in "extended result code mode". +** In other words, the database behaves has if +** [sqlite3_extended_result_codes(db,1)] where called on the database +** connection as soon as the connection is created. In addition to setting +** the extended result code mode, this flag also causes [sqlite3_open_v2()] +** to return an extended result code.
    +** +** [[OPEN_NOFOLLOW]] ^(
    [SQLITE_OPEN_NOFOLLOW]
    +**
    The database filename is not allowed to be a symbolic link
    +**
    )^ +** +** If the 3rd parameter to sqlite3_open_v2() is not one of the +** required combinations shown above optionally combined with other +** [SQLITE_OPEN_READONLY | SQLITE_OPEN_* bits] +** then the behavior is undefined. Historic versions of SQLite +** have silently ignored surplus bits in the flags parameter to +** sqlite3_open_v2(), however that behavior might not be carried through +** into future versions of SQLite and so applications should not rely +** upon it. Note in particular that the SQLITE_OPEN_EXCLUSIVE flag is a no-op +** for sqlite3_open_v2(). The SQLITE_OPEN_EXCLUSIVE does *not* cause +** the open to fail if the database already exists. The SQLITE_OPEN_EXCLUSIVE +** flag is intended for use by the [sqlite3_vfs|VFS interface] only, and not +** by sqlite3_open_v2(). +** +** ^The fourth parameter to sqlite3_open_v2() is the name of the +** [sqlite3_vfs] object that defines the operating system interface that +** the new database connection should use. ^If the fourth parameter is +** a NULL pointer then the default [sqlite3_vfs] object is used. +** +** ^If the filename is ":memory:", then a private, temporary in-memory database +** is created for the connection. ^This in-memory database will vanish when +** the database connection is closed. Future versions of SQLite might +** make use of additional special filenames that begin with the ":" character. +** It is recommended that when a database filename actually does begin with +** a ":" character you should prefix the filename with a pathname such as +** "./" to avoid ambiguity. +** +** ^If the filename is an empty string, then a private, temporary +** on-disk database will be created. ^This private database will be +** automatically deleted as soon as the database connection is closed. +** +** [[URI filenames in sqlite3_open()]]

    URI Filenames

    +** +** ^If [URI filename] interpretation is enabled, and the filename argument +** begins with "file:", then the filename is interpreted as a URI. ^URI +** filename interpretation is enabled if the [SQLITE_OPEN_URI] flag is +** set in the third argument to sqlite3_open_v2(), or if it has +** been enabled globally using the [SQLITE_CONFIG_URI] option with the +** [sqlite3_config()] method or by the [SQLITE_USE_URI] compile-time option. +** URI filename interpretation is turned off +** by default, but future releases of SQLite might enable URI filename +** interpretation by default. See "[URI filenames]" for additional +** information. +** +** URI filenames are parsed according to RFC 3986. ^If the URI contains an +** authority, then it must be either an empty string or the string +** "localhost". ^If the authority is not an empty string or "localhost", an +** error is returned to the caller. ^The fragment component of a URI, if +** present, is ignored. +** +** ^SQLite uses the path component of the URI as the name of the disk file +** which contains the database. ^If the path begins with a '/' character, +** then it is interpreted as an absolute path. ^If the path does not begin +** with a '/' (meaning that the authority section is omitted from the URI) +** then the path is interpreted as a relative path. +** ^(On windows, the first component of an absolute path +** is a drive specification (e.g. "C:").)^ +** +** [[core URI query parameters]] +** The query component of a URI may contain parameters that are interpreted +** either by SQLite itself, or by a [VFS | custom VFS implementation]. +** SQLite and its built-in [VFSes] interpret the +** following query parameters: +** +**
      +**
    • vfs: ^The "vfs" parameter may be used to specify the name of +** a VFS object that provides the operating system interface that should +** be used to access the database file on disk. ^If this option is set to +** an empty string the default VFS object is used. ^Specifying an unknown +** VFS is an error. ^If sqlite3_open_v2() is used and the vfs option is +** present, then the VFS specified by the option takes precedence over +** the value passed as the fourth parameter to sqlite3_open_v2(). +** +**
    • mode: ^(The mode parameter may be set to either "ro", "rw", +** "rwc", or "memory". Attempting to set it to any other value is +** an error)^. +** ^If "ro" is specified, then the database is opened for read-only +** access, just as if the [SQLITE_OPEN_READONLY] flag had been set in the +** third argument to sqlite3_open_v2(). ^If the mode option is set to +** "rw", then the database is opened for read-write (but not create) +** access, as if SQLITE_OPEN_READWRITE (but not SQLITE_OPEN_CREATE) had +** been set. ^Value "rwc" is equivalent to setting both +** SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE. ^If the mode option is +** set to "memory" then a pure [in-memory database] that never reads +** or writes from disk is used. ^It is an error to specify a value for +** the mode parameter that is less restrictive than that specified by +** the flags passed in the third parameter to sqlite3_open_v2(). +** +**
    • cache: ^The cache parameter may be set to either "shared" or +** "private". ^Setting it to "shared" is equivalent to setting the +** SQLITE_OPEN_SHAREDCACHE bit in the flags argument passed to +** sqlite3_open_v2(). ^Setting the cache parameter to "private" is +** equivalent to setting the SQLITE_OPEN_PRIVATECACHE bit. +** ^If sqlite3_open_v2() is used and the "cache" parameter is present in +** a URI filename, its value overrides any behavior requested by setting +** SQLITE_OPEN_PRIVATECACHE or SQLITE_OPEN_SHAREDCACHE flag. +** +**
    • psow: ^The psow parameter indicates whether or not the +** [powersafe overwrite] property does or does not apply to the +** storage media on which the database file resides. +** +**
    • nolock: ^The nolock parameter is a boolean query parameter +** which if set disables file locking in rollback journal modes. This +** is useful for accessing a database on a filesystem that does not +** support locking. Caution: Database corruption might result if two +** or more processes write to the same database and any one of those +** processes uses nolock=1. +** +**
    • immutable: ^The immutable parameter is a boolean query +** parameter that indicates that the database file is stored on +** read-only media. ^When immutable is set, SQLite assumes that the +** database file cannot be changed, even by a process with higher +** privilege, and so the database is opened read-only and all locking +** and change detection is disabled. Caution: Setting the immutable +** property on a database file that does in fact change can result +** in incorrect query results and/or [SQLITE_CORRUPT] errors. +** See also: [SQLITE_IOCAP_IMMUTABLE]. +** +**
    +** +** ^Specifying an unknown parameter in the query component of a URI is not an +** error. Future versions of SQLite might understand additional query +** parameters. See "[query parameters with special meaning to SQLite]" for +** additional information. +** +** [[URI filename examples]]

    URI filename examples

    +** +** +**
    URI filenames Results +**
    file:data.db +** Open the file "data.db" in the current directory. +**
    file:/home/fred/data.db
    +** file:///home/fred/data.db
    +** file://localhost/home/fred/data.db
    +** Open the database file "/home/fred/data.db". +**
    file://darkstar/home/fred/data.db +** An error. "darkstar" is not a recognized authority. +**
    +** file:///C:/Documents%20and%20Settings/fred/Desktop/data.db +** Windows only: Open the file "data.db" on fred's desktop on drive +** C:. Note that the %20 escaping in this example is not strictly +** necessary - space characters can be used literally +** in URI filenames. +**
    file:data.db?mode=ro&cache=private +** Open file "data.db" in the current directory for read-only access. +** Regardless of whether or not shared-cache mode is enabled by +** default, use a private cache. +**
    file:/home/fred/data.db?vfs=unix-dotfile +** Open file "/home/fred/data.db". Use the special VFS "unix-dotfile" +** that uses dot-files in place of posix advisory locking. +**
    file:data.db?mode=readonly +** An error. "readonly" is not a valid option for the "mode" parameter. +** Use "ro" instead: "file:data.db?mode=ro". +**
    +** +** ^URI hexadecimal escape sequences (%HH) are supported within the path and +** query components of a URI. A hexadecimal escape sequence consists of a +** percent sign - "%" - followed by exactly two hexadecimal digits +** specifying an octet value. ^Before the path or query components of a +** URI filename are interpreted, they are encoded using UTF-8 and all +** hexadecimal escape sequences replaced by a single byte containing the +** corresponding octet. If this process generates an invalid UTF-8 encoding, +** the results are undefined. +** +** Note to Windows users: The encoding used for the filename argument +** of sqlite3_open() and sqlite3_open_v2() must be UTF-8, not whatever +** codepage is currently defined. Filenames containing international +** characters must be converted to UTF-8 prior to passing them into +** sqlite3_open() or sqlite3_open_v2(). +** +** Note to Windows Runtime users: The temporary directory must be set +** prior to calling sqlite3_open() or sqlite3_open_v2(). Otherwise, various +** features that require the use of temporary files may fail. +** +** See also: [sqlite3_temp_directory] +*/ +SQLITE_API int sqlite3_open( + const char *filename, /* Database filename (UTF-8) */ + sqlite3 **ppDb /* OUT: SQLite db handle */ +); +SQLITE_API int sqlite3_open16( + const void *filename, /* Database filename (UTF-16) */ + sqlite3 **ppDb /* OUT: SQLite db handle */ +); +SQLITE_API int sqlite3_open_v2( + const char *filename, /* Database filename (UTF-8) */ + sqlite3 **ppDb, /* OUT: SQLite db handle */ + int flags, /* Flags */ + const char *zVfs /* Name of VFS module to use */ +); + +/* +** CAPI3REF: Obtain Values For URI Parameters +** +** These are utility routines, useful to [VFS|custom VFS implementations], +** that check if a database file was a URI that contained a specific query +** parameter, and if so obtains the value of that query parameter. +** +** The first parameter to these interfaces (hereafter referred to +** as F) must be one of: +**
      +**
    • A database filename pointer created by the SQLite core and +** passed into the xOpen() method of a VFS implemention, or +**
    • A filename obtained from [sqlite3_db_filename()], or +**
    • A new filename constructed using [sqlite3_create_filename()]. +**
    +** If the F parameter is not one of the above, then the behavior is +** undefined and probably undesirable. Older versions of SQLite were +** more tolerant of invalid F parameters than newer versions. +** +** If F is a suitable filename (as described in the previous paragraph) +** and if P is the name of the query parameter, then +** sqlite3_uri_parameter(F,P) returns the value of the P +** parameter if it exists or a NULL pointer if P does not appear as a +** query parameter on F. If P is a query parameter of F and it +** has no explicit value, then sqlite3_uri_parameter(F,P) returns +** a pointer to an empty string. +** +** The sqlite3_uri_boolean(F,P,B) routine assumes that P is a boolean +** parameter and returns true (1) or false (0) according to the value +** of P. The sqlite3_uri_boolean(F,P,B) routine returns true (1) if the +** value of query parameter P is one of "yes", "true", or "on" in any +** case or if the value begins with a non-zero number. The +** sqlite3_uri_boolean(F,P,B) routines returns false (0) if the value of +** query parameter P is one of "no", "false", or "off" in any case or +** if the value begins with a numeric zero. If P is not a query +** parameter on F or if the value of P does not match any of the +** above, then sqlite3_uri_boolean(F,P,B) returns (B!=0). +** +** The sqlite3_uri_int64(F,P,D) routine converts the value of P into a +** 64-bit signed integer and returns that integer, or D if P does not +** exist. If the value of P is something other than an integer, then +** zero is returned. +** +** The sqlite3_uri_key(F,N) returns a pointer to the name (not +** the value) of the N-th query parameter for filename F, or a NULL +** pointer if N is less than zero or greater than the number of query +** parameters minus 1. The N value is zero-based so N should be 0 to obtain +** the name of the first query parameter, 1 for the second parameter, and +** so forth. +** +** If F is a NULL pointer, then sqlite3_uri_parameter(F,P) returns NULL and +** sqlite3_uri_boolean(F,P,B) returns B. If F is not a NULL pointer and +** is not a database file pathname pointer that the SQLite core passed +** into the xOpen VFS method, then the behavior of this routine is undefined +** and probably undesirable. +** +** Beginning with SQLite [version 3.31.0] ([dateof:3.31.0]) the input F +** parameter can also be the name of a rollback journal file or WAL file +** in addition to the main database file. Prior to version 3.31.0, these +** routines would only work if F was the name of the main database file. +** When the F parameter is the name of the rollback journal or WAL file, +** it has access to all the same query parameters as were found on the +** main database file. +** +** See the [URI filename] documentation for additional information. +*/ +SQLITE_API const char *sqlite3_uri_parameter(const char *zFilename, const char *zParam); +SQLITE_API int sqlite3_uri_boolean(const char *zFile, const char *zParam, int bDefault); +SQLITE_API sqlite3_int64 sqlite3_uri_int64(const char*, const char*, sqlite3_int64); +SQLITE_API const char *sqlite3_uri_key(const char *zFilename, int N); + +/* +** CAPI3REF: Translate filenames +** +** These routines are available to [VFS|custom VFS implementations] for +** translating filenames between the main database file, the journal file, +** and the WAL file. +** +** If F is the name of an sqlite database file, journal file, or WAL file +** passed by the SQLite core into the VFS, then sqlite3_filename_database(F) +** returns the name of the corresponding database file. +** +** If F is the name of an sqlite database file, journal file, or WAL file +** passed by the SQLite core into the VFS, or if F is a database filename +** obtained from [sqlite3_db_filename()], then sqlite3_filename_journal(F) +** returns the name of the corresponding rollback journal file. +** +** If F is the name of an sqlite database file, journal file, or WAL file +** that was passed by the SQLite core into the VFS, or if F is a database +** filename obtained from [sqlite3_db_filename()], then +** sqlite3_filename_wal(F) returns the name of the corresponding +** WAL file. +** +** In all of the above, if F is not the name of a database, journal or WAL +** filename passed into the VFS from the SQLite core and F is not the +** return value from [sqlite3_db_filename()], then the result is +** undefined and is likely a memory access violation. +*/ +SQLITE_API const char *sqlite3_filename_database(const char*); +SQLITE_API const char *sqlite3_filename_journal(const char*); +SQLITE_API const char *sqlite3_filename_wal(const char*); + +/* +** CAPI3REF: Database File Corresponding To A Journal +** +** ^If X is the name of a rollback or WAL-mode journal file that is +** passed into the xOpen method of [sqlite3_vfs], then +** sqlite3_database_file_object(X) returns a pointer to the [sqlite3_file] +** object that represents the main database file. +** +** This routine is intended for use in custom [VFS] implementations +** only. It is not a general-purpose interface. +** The argument sqlite3_file_object(X) must be a filename pointer that +** has been passed into [sqlite3_vfs].xOpen method where the +** flags parameter to xOpen contains one of the bits +** [SQLITE_OPEN_MAIN_JOURNAL] or [SQLITE_OPEN_WAL]. Any other use +** of this routine results in undefined and probably undesirable +** behavior. +*/ +SQLITE_API sqlite3_file *sqlite3_database_file_object(const char*); + +/* +** CAPI3REF: Create and Destroy VFS Filenames +** +** These interfces are provided for use by [VFS shim] implementations and +** are not useful outside of that context. +** +** The sqlite3_create_filename(D,J,W,N,P) allocates memory to hold a version of +** database filename D with corresponding journal file J and WAL file W and +** with N URI parameters key/values pairs in the array P. The result from +** sqlite3_create_filename(D,J,W,N,P) is a pointer to a database filename that +** is safe to pass to routines like: +**
      +**
    • [sqlite3_uri_parameter()], +**
    • [sqlite3_uri_boolean()], +**
    • [sqlite3_uri_int64()], +**
    • [sqlite3_uri_key()], +**
    • [sqlite3_filename_database()], +**
    • [sqlite3_filename_journal()], or +**
    • [sqlite3_filename_wal()]. +**
    +** If a memory allocation error occurs, sqlite3_create_filename() might +** return a NULL pointer. The memory obtained from sqlite3_create_filename(X) +** must be released by a corresponding call to sqlite3_free_filename(Y). +** +** The P parameter in sqlite3_create_filename(D,J,W,N,P) should be an array +** of 2*N pointers to strings. Each pair of pointers in this array corresponds +** to a key and value for a query parameter. The P parameter may be a NULL +** pointer if N is zero. None of the 2*N pointers in the P array may be +** NULL pointers and key pointers should not be empty strings. +** None of the D, J, or W parameters to sqlite3_create_filename(D,J,W,N,P) may +** be NULL pointers, though they can be empty strings. +** +** The sqlite3_free_filename(Y) routine releases a memory allocation +** previously obtained from sqlite3_create_filename(). Invoking +** sqlite3_free_filename(Y) where Y is a NULL pointer is a harmless no-op. +** +** If the Y parameter to sqlite3_free_filename(Y) is anything other +** than a NULL pointer or a pointer previously acquired from +** sqlite3_create_filename(), then bad things such as heap +** corruption or segfaults may occur. The value Y should not be +** used again after sqlite3_free_filename(Y) has been called. This means +** that if the [sqlite3_vfs.xOpen()] method of a VFS has been called using Y, +** then the corresponding [sqlite3_module.xClose() method should also be +** invoked prior to calling sqlite3_free_filename(Y). +*/ +SQLITE_API char *sqlite3_create_filename( + const char *zDatabase, + const char *zJournal, + const char *zWal, + int nParam, + const char **azParam +); +SQLITE_API void sqlite3_free_filename(char*); + +/* +** CAPI3REF: Error Codes And Messages +** METHOD: sqlite3 +** +** ^If the most recent sqlite3_* API call associated with +** [database connection] D failed, then the sqlite3_errcode(D) interface +** returns the numeric [result code] or [extended result code] for that +** API call. +** ^The sqlite3_extended_errcode() +** interface is the same except that it always returns the +** [extended result code] even when extended result codes are +** disabled. +** +** The values returned by sqlite3_errcode() and/or +** sqlite3_extended_errcode() might change with each API call. +** Except, there are some interfaces that are guaranteed to never +** change the value of the error code. The error-code preserving +** interfaces include the following: +** +**
      +**
    • sqlite3_errcode() +**
    • sqlite3_extended_errcode() +**
    • sqlite3_errmsg() +**
    • sqlite3_errmsg16() +**
    • sqlite3_error_offset() +**
    +** +** ^The sqlite3_errmsg() and sqlite3_errmsg16() return English-language +** text that describes the error, as either UTF-8 or UTF-16 respectively. +** ^(Memory to hold the error message string is managed internally. +** The application does not need to worry about freeing the result. +** However, the error string might be overwritten or deallocated by +** subsequent calls to other SQLite interface functions.)^ +** +** ^The sqlite3_errstr() interface returns the English-language text +** that describes the [result code], as UTF-8. +** ^(Memory to hold the error message string is managed internally +** and must not be freed by the application)^. +** +** ^If the most recent error references a specific token in the input +** SQL, the sqlite3_error_offset() interface returns the byte offset +** of the start of that token. ^The byte offset returned by +** sqlite3_error_offset() assumes that the input SQL is UTF8. +** ^If the most recent error does not reference a specific token in the input +** SQL, then the sqlite3_error_offset() function returns -1. +** +** When the serialized [threading mode] is in use, it might be the +** case that a second error occurs on a separate thread in between +** the time of the first error and the call to these interfaces. +** When that happens, the second error will be reported since these +** interfaces always report the most recent result. To avoid +** this, each thread can obtain exclusive use of the [database connection] D +** by invoking [sqlite3_mutex_enter]([sqlite3_db_mutex](D)) before beginning +** to use D and invoking [sqlite3_mutex_leave]([sqlite3_db_mutex](D)) after +** all calls to the interfaces listed here are completed. +** +** If an interface fails with SQLITE_MISUSE, that means the interface +** was invoked incorrectly by the application. In that case, the +** error code and message may or may not be set. +*/ +SQLITE_API int sqlite3_errcode(sqlite3 *db); +SQLITE_API int sqlite3_extended_errcode(sqlite3 *db); +SQLITE_API const char *sqlite3_errmsg(sqlite3*); +SQLITE_API const void *sqlite3_errmsg16(sqlite3*); +SQLITE_API const char *sqlite3_errstr(int); +SQLITE_API int sqlite3_error_offset(sqlite3 *db); + +/* +** CAPI3REF: Prepared Statement Object +** KEYWORDS: {prepared statement} {prepared statements} +** +** An instance of this object represents a single SQL statement that +** has been compiled into binary form and is ready to be evaluated. +** +** Think of each SQL statement as a separate computer program. The +** original SQL text is source code. A prepared statement object +** is the compiled object code. All SQL must be converted into a +** prepared statement before it can be run. +** +** The life-cycle of a prepared statement object usually goes like this: +** +**
      +**
    1. Create the prepared statement object using [sqlite3_prepare_v2()]. +**
    2. Bind values to [parameters] using the sqlite3_bind_*() +** interfaces. +**
    3. Run the SQL by calling [sqlite3_step()] one or more times. +**
    4. Reset the prepared statement using [sqlite3_reset()] then go back +** to step 2. Do this zero or more times. +**
    5. Destroy the object using [sqlite3_finalize()]. +**
    +*/ +typedef struct sqlite3_stmt sqlite3_stmt; + +/* +** CAPI3REF: Run-time Limits +** METHOD: sqlite3 +** +** ^(This interface allows the size of various constructs to be limited +** on a connection by connection basis. The first parameter is the +** [database connection] whose limit is to be set or queried. The +** second parameter is one of the [limit categories] that define a +** class of constructs to be size limited. The third parameter is the +** new limit for that construct.)^ +** +** ^If the new limit is a negative number, the limit is unchanged. +** ^(For each limit category SQLITE_LIMIT_NAME there is a +** [limits | hard upper bound] +** set at compile-time by a C preprocessor macro called +** [limits | SQLITE_MAX_NAME]. +** (The "_LIMIT_" in the name is changed to "_MAX_".))^ +** ^Attempts to increase a limit above its hard upper bound are +** silently truncated to the hard upper bound. +** +** ^Regardless of whether or not the limit was changed, the +** [sqlite3_limit()] interface returns the prior value of the limit. +** ^Hence, to find the current value of a limit without changing it, +** simply invoke this interface with the third parameter set to -1. +** +** Run-time limits are intended for use in applications that manage +** both their own internal database and also databases that are controlled +** by untrusted external sources. An example application might be a +** web browser that has its own databases for storing history and +** separate databases controlled by JavaScript applications downloaded +** off the Internet. The internal databases can be given the +** large, default limits. Databases managed by external sources can +** be given much smaller limits designed to prevent a denial of service +** attack. Developers might also want to use the [sqlite3_set_authorizer()] +** interface to further control untrusted SQL. The size of the database +** created by an untrusted script can be contained using the +** [max_page_count] [PRAGMA]. +** +** New run-time limit categories may be added in future releases. +*/ +SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); + +/* +** CAPI3REF: Run-Time Limit Categories +** KEYWORDS: {limit category} {*limit categories} +** +** These constants define various performance limits +** that can be lowered at run-time using [sqlite3_limit()]. +** The synopsis of the meanings of the various limits is shown below. +** Additional information is available at [limits | Limits in SQLite]. +** +**
    +** [[SQLITE_LIMIT_LENGTH]] ^(
    SQLITE_LIMIT_LENGTH
    +**
    The maximum size of any string or BLOB or table row, in bytes.
    )^ +** +** [[SQLITE_LIMIT_SQL_LENGTH]] ^(
    SQLITE_LIMIT_SQL_LENGTH
    +**
    The maximum length of an SQL statement, in bytes.
    )^ +** +** [[SQLITE_LIMIT_COLUMN]] ^(
    SQLITE_LIMIT_COLUMN
    +**
    The maximum number of columns in a table definition or in the +** result set of a [SELECT] or the maximum number of columns in an index +** or in an ORDER BY or GROUP BY clause.
    )^ +** +** [[SQLITE_LIMIT_EXPR_DEPTH]] ^(
    SQLITE_LIMIT_EXPR_DEPTH
    +**
    The maximum depth of the parse tree on any expression.
    )^ +** +** [[SQLITE_LIMIT_COMPOUND_SELECT]] ^(
    SQLITE_LIMIT_COMPOUND_SELECT
    +**
    The maximum number of terms in a compound SELECT statement.
    )^ +** +** [[SQLITE_LIMIT_VDBE_OP]] ^(
    SQLITE_LIMIT_VDBE_OP
    +**
    The maximum number of instructions in a virtual machine program +** used to implement an SQL statement. If [sqlite3_prepare_v2()] or +** the equivalent tries to allocate space for more than this many opcodes +** in a single prepared statement, an SQLITE_NOMEM error is returned.
    )^ +** +** [[SQLITE_LIMIT_FUNCTION_ARG]] ^(
    SQLITE_LIMIT_FUNCTION_ARG
    +**
    The maximum number of arguments on a function.
    )^ +** +** [[SQLITE_LIMIT_ATTACHED]] ^(
    SQLITE_LIMIT_ATTACHED
    +**
    The maximum number of [ATTACH | attached databases].)^
    +** +** [[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]] +** ^(
    SQLITE_LIMIT_LIKE_PATTERN_LENGTH
    +**
    The maximum length of the pattern argument to the [LIKE] or +** [GLOB] operators.
    )^ +** +** [[SQLITE_LIMIT_VARIABLE_NUMBER]] +** ^(
    SQLITE_LIMIT_VARIABLE_NUMBER
    +**
    The maximum index number of any [parameter] in an SQL statement.)^ +** +** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(
    SQLITE_LIMIT_TRIGGER_DEPTH
    +**
    The maximum depth of recursion for triggers.
    )^ +** +** [[SQLITE_LIMIT_WORKER_THREADS]] ^(
    SQLITE_LIMIT_WORKER_THREADS
    +**
    The maximum number of auxiliary worker threads that a single +** [prepared statement] may start.
    )^ +**
    +*/ +#define SQLITE_LIMIT_LENGTH 0 +#define SQLITE_LIMIT_SQL_LENGTH 1 +#define SQLITE_LIMIT_COLUMN 2 +#define SQLITE_LIMIT_EXPR_DEPTH 3 +#define SQLITE_LIMIT_COMPOUND_SELECT 4 +#define SQLITE_LIMIT_VDBE_OP 5 +#define SQLITE_LIMIT_FUNCTION_ARG 6 +#define SQLITE_LIMIT_ATTACHED 7 +#define SQLITE_LIMIT_LIKE_PATTERN_LENGTH 8 +#define SQLITE_LIMIT_VARIABLE_NUMBER 9 +#define SQLITE_LIMIT_TRIGGER_DEPTH 10 +#define SQLITE_LIMIT_WORKER_THREADS 11 + +/* +** CAPI3REF: Prepare Flags +** +** These constants define various flags that can be passed into +** "prepFlags" parameter of the [sqlite3_prepare_v3()] and +** [sqlite3_prepare16_v3()] interfaces. +** +** New flags may be added in future releases of SQLite. +** +**
    +** [[SQLITE_PREPARE_PERSISTENT]] ^(
    SQLITE_PREPARE_PERSISTENT
    +**
    The SQLITE_PREPARE_PERSISTENT flag is a hint to the query planner +** that the prepared statement will be retained for a long time and +** probably reused many times.)^ ^Without this flag, [sqlite3_prepare_v3()] +** and [sqlite3_prepare16_v3()] assume that the prepared statement will +** be used just once or at most a few times and then destroyed using +** [sqlite3_finalize()] relatively soon. The current implementation acts +** on this hint by avoiding the use of [lookaside memory] so as not to +** deplete the limited store of lookaside memory. Future versions of +** SQLite may act on this hint differently. +** +** [[SQLITE_PREPARE_NORMALIZE]]
    SQLITE_PREPARE_NORMALIZE
    +**
    The SQLITE_PREPARE_NORMALIZE flag is a no-op. This flag used +** to be required for any prepared statement that wanted to use the +** [sqlite3_normalized_sql()] interface. However, the +** [sqlite3_normalized_sql()] interface is now available to all +** prepared statements, regardless of whether or not they use this +** flag. +** +** [[SQLITE_PREPARE_NO_VTAB]]
    SQLITE_PREPARE_NO_VTAB
    +**
    The SQLITE_PREPARE_NO_VTAB flag causes the SQL compiler +** to return an error (error code SQLITE_ERROR) if the statement uses +** any virtual tables. +**
    +*/ +#define SQLITE_PREPARE_PERSISTENT 0x01 +#define SQLITE_PREPARE_NORMALIZE 0x02 +#define SQLITE_PREPARE_NO_VTAB 0x04 + +/* +** CAPI3REF: Compiling An SQL Statement +** KEYWORDS: {SQL statement compiler} +** METHOD: sqlite3 +** CONSTRUCTOR: sqlite3_stmt +** +** To execute an SQL statement, it must first be compiled into a byte-code +** program using one of these routines. Or, in other words, these routines +** are constructors for the [prepared statement] object. +** +** The preferred routine to use is [sqlite3_prepare_v2()]. The +** [sqlite3_prepare()] interface is legacy and should be avoided. +** [sqlite3_prepare_v3()] has an extra "prepFlags" option that is used +** for special purposes. +** +** The use of the UTF-8 interfaces is preferred, as SQLite currently +** does all parsing using UTF-8. The UTF-16 interfaces are provided +** as a convenience. The UTF-16 interfaces work by converting the +** input text into UTF-8, then invoking the corresponding UTF-8 interface. +** +** The first argument, "db", is a [database connection] obtained from a +** prior successful call to [sqlite3_open()], [sqlite3_open_v2()] or +** [sqlite3_open16()]. The database connection must not have been closed. +** +** The second argument, "zSql", is the statement to be compiled, encoded +** as either UTF-8 or UTF-16. The sqlite3_prepare(), sqlite3_prepare_v2(), +** and sqlite3_prepare_v3() +** interfaces use UTF-8, and sqlite3_prepare16(), sqlite3_prepare16_v2(), +** and sqlite3_prepare16_v3() use UTF-16. +** +** ^If the nByte argument is negative, then zSql is read up to the +** first zero terminator. ^If nByte is positive, then it is the +** number of bytes read from zSql. ^If nByte is zero, then no prepared +** statement is generated. +** If the caller knows that the supplied string is nul-terminated, then +** there is a small performance advantage to passing an nByte parameter that +** is the number of bytes in the input string including +** the nul-terminator. +** +** ^If pzTail is not NULL then *pzTail is made to point to the first byte +** past the end of the first SQL statement in zSql. These routines only +** compile the first statement in zSql, so *pzTail is left pointing to +** what remains uncompiled. +** +** ^*ppStmt is left pointing to a compiled [prepared statement] that can be +** executed using [sqlite3_step()]. ^If there is an error, *ppStmt is set +** to NULL. ^If the input text contains no SQL (if the input is an empty +** string or a comment) then *ppStmt is set to NULL. +** The calling procedure is responsible for deleting the compiled +** SQL statement using [sqlite3_finalize()] after it has finished with it. +** ppStmt may not be NULL. +** +** ^On success, the sqlite3_prepare() family of routines return [SQLITE_OK]; +** otherwise an [error code] is returned. +** +** The sqlite3_prepare_v2(), sqlite3_prepare_v3(), sqlite3_prepare16_v2(), +** and sqlite3_prepare16_v3() interfaces are recommended for all new programs. +** The older interfaces (sqlite3_prepare() and sqlite3_prepare16()) +** are retained for backwards compatibility, but their use is discouraged. +** ^In the "vX" interfaces, the prepared statement +** that is returned (the [sqlite3_stmt] object) contains a copy of the +** original SQL text. This causes the [sqlite3_step()] interface to +** behave differently in three ways: +** +**
      +**
    1. +** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it +** always used to do, [sqlite3_step()] will automatically recompile the SQL +** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY] +** retries will occur before sqlite3_step() gives up and returns an error. +**
    2. +** +**
    3. +** ^When an error occurs, [sqlite3_step()] will return one of the detailed +** [error codes] or [extended error codes]. ^The legacy behavior was that +** [sqlite3_step()] would only return a generic [SQLITE_ERROR] result code +** and the application would have to make a second call to [sqlite3_reset()] +** in order to find the underlying cause of the problem. With the "v2" prepare +** interfaces, the underlying reason for the error is returned immediately. +**
    4. +** +**
    5. +** ^If the specific value bound to a [parameter | host parameter] in the +** WHERE clause might influence the choice of query plan for a statement, +** then the statement will be automatically recompiled, as if there had been +** a schema change, on the first [sqlite3_step()] call following any change +** to the [sqlite3_bind_text | bindings] of that [parameter]. +** ^The specific value of a WHERE-clause [parameter] might influence the +** choice of query plan if the parameter is the left-hand side of a [LIKE] +** or [GLOB] operator or if the parameter is compared to an indexed column +** and the [SQLITE_ENABLE_STAT4] compile-time option is enabled. +**
    6. +**
    +** +**

    ^sqlite3_prepare_v3() differs from sqlite3_prepare_v2() only in having +** the extra prepFlags parameter, which is a bit array consisting of zero or +** more of the [SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_*] flags. ^The +** sqlite3_prepare_v2() interface works exactly the same as +** sqlite3_prepare_v3() with a zero prepFlags parameter. +*/ +SQLITE_API int sqlite3_prepare( + sqlite3 *db, /* Database handle */ + const char *zSql, /* SQL statement, UTF-8 encoded */ + int nByte, /* Maximum length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: Statement handle */ + const char **pzTail /* OUT: Pointer to unused portion of zSql */ +); +SQLITE_API int sqlite3_prepare_v2( + sqlite3 *db, /* Database handle */ + const char *zSql, /* SQL statement, UTF-8 encoded */ + int nByte, /* Maximum length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: Statement handle */ + const char **pzTail /* OUT: Pointer to unused portion of zSql */ +); +SQLITE_API int sqlite3_prepare_v3( + sqlite3 *db, /* Database handle */ + const char *zSql, /* SQL statement, UTF-8 encoded */ + int nByte, /* Maximum length of zSql in bytes. */ + unsigned int prepFlags, /* Zero or more SQLITE_PREPARE_ flags */ + sqlite3_stmt **ppStmt, /* OUT: Statement handle */ + const char **pzTail /* OUT: Pointer to unused portion of zSql */ +); +SQLITE_API int sqlite3_prepare16( + sqlite3 *db, /* Database handle */ + const void *zSql, /* SQL statement, UTF-16 encoded */ + int nByte, /* Maximum length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: Statement handle */ + const void **pzTail /* OUT: Pointer to unused portion of zSql */ +); +SQLITE_API int sqlite3_prepare16_v2( + sqlite3 *db, /* Database handle */ + const void *zSql, /* SQL statement, UTF-16 encoded */ + int nByte, /* Maximum length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: Statement handle */ + const void **pzTail /* OUT: Pointer to unused portion of zSql */ +); +SQLITE_API int sqlite3_prepare16_v3( + sqlite3 *db, /* Database handle */ + const void *zSql, /* SQL statement, UTF-16 encoded */ + int nByte, /* Maximum length of zSql in bytes. */ + unsigned int prepFlags, /* Zero or more SQLITE_PREPARE_ flags */ + sqlite3_stmt **ppStmt, /* OUT: Statement handle */ + const void **pzTail /* OUT: Pointer to unused portion of zSql */ +); + +/* +** CAPI3REF: Retrieving Statement SQL +** METHOD: sqlite3_stmt +** +** ^The sqlite3_sql(P) interface returns a pointer to a copy of the UTF-8 +** SQL text used to create [prepared statement] P if P was +** created by [sqlite3_prepare_v2()], [sqlite3_prepare_v3()], +** [sqlite3_prepare16_v2()], or [sqlite3_prepare16_v3()]. +** ^The sqlite3_expanded_sql(P) interface returns a pointer to a UTF-8 +** string containing the SQL text of prepared statement P with +** [bound parameters] expanded. +** ^The sqlite3_normalized_sql(P) interface returns a pointer to a UTF-8 +** string containing the normalized SQL text of prepared statement P. The +** semantics used to normalize a SQL statement are unspecified and subject +** to change. At a minimum, literal values will be replaced with suitable +** placeholders. +** +** ^(For example, if a prepared statement is created using the SQL +** text "SELECT $abc,:xyz" and if parameter $abc is bound to integer 2345 +** and parameter :xyz is unbound, then sqlite3_sql() will return +** the original string, "SELECT $abc,:xyz" but sqlite3_expanded_sql() +** will return "SELECT 2345,NULL".)^ +** +** ^The sqlite3_expanded_sql() interface returns NULL if insufficient memory +** is available to hold the result, or if the result would exceed the +** the maximum string length determined by the [SQLITE_LIMIT_LENGTH]. +** +** ^The [SQLITE_TRACE_SIZE_LIMIT] compile-time option limits the size of +** bound parameter expansions. ^The [SQLITE_OMIT_TRACE] compile-time +** option causes sqlite3_expanded_sql() to always return NULL. +** +** ^The strings returned by sqlite3_sql(P) and sqlite3_normalized_sql(P) +** are managed by SQLite and are automatically freed when the prepared +** statement is finalized. +** ^The string returned by sqlite3_expanded_sql(P), on the other hand, +** is obtained from [sqlite3_malloc()] and must be freed by the application +** by passing it to [sqlite3_free()]. +** +** ^The sqlite3_normalized_sql() interface is only available if +** the [SQLITE_ENABLE_NORMALIZE] compile-time option is defined. +*/ +SQLITE_API const char *sqlite3_sql(sqlite3_stmt *pStmt); +SQLITE_API char *sqlite3_expanded_sql(sqlite3_stmt *pStmt); +#ifdef SQLITE_ENABLE_NORMALIZE +SQLITE_API const char *sqlite3_normalized_sql(sqlite3_stmt *pStmt); +#endif + +/* +** CAPI3REF: Determine If An SQL Statement Writes The Database +** METHOD: sqlite3_stmt +** +** ^The sqlite3_stmt_readonly(X) interface returns true (non-zero) if +** and only if the [prepared statement] X makes no direct changes to +** the content of the database file. +** +** Note that [application-defined SQL functions] or +** [virtual tables] might change the database indirectly as a side effect. +** ^(For example, if an application defines a function "eval()" that +** calls [sqlite3_exec()], then the following SQL statement would +** change the database file through side-effects: +** +**

    +**    SELECT eval('DELETE FROM t1') FROM t2;
    +** 
    +** +** But because the [SELECT] statement does not change the database file +** directly, sqlite3_stmt_readonly() would still return true.)^ +** +** ^Transaction control statements such as [BEGIN], [COMMIT], [ROLLBACK], +** [SAVEPOINT], and [RELEASE] cause sqlite3_stmt_readonly() to return true, +** since the statements themselves do not actually modify the database but +** rather they control the timing of when other statements modify the +** database. ^The [ATTACH] and [DETACH] statements also cause +** sqlite3_stmt_readonly() to return true since, while those statements +** change the configuration of a database connection, they do not make +** changes to the content of the database files on disk. +** ^The sqlite3_stmt_readonly() interface returns true for [BEGIN] since +** [BEGIN] merely sets internal flags, but the [BEGIN|BEGIN IMMEDIATE] and +** [BEGIN|BEGIN EXCLUSIVE] commands do touch the database and so +** sqlite3_stmt_readonly() returns false for those commands. +** +** ^This routine returns false if there is any possibility that the +** statement might change the database file. ^A false return does +** not guarantee that the statement will change the database file. +** ^For example, an UPDATE statement might have a WHERE clause that +** makes it a no-op, but the sqlite3_stmt_readonly() result would still +** be false. ^Similarly, a CREATE TABLE IF NOT EXISTS statement is a +** read-only no-op if the table already exists, but +** sqlite3_stmt_readonly() still returns false for such a statement. +** +** ^If prepared statement X is an [EXPLAIN] or [EXPLAIN QUERY PLAN] +** statement, then sqlite3_stmt_readonly(X) returns the same value as +** if the EXPLAIN or EXPLAIN QUERY PLAN prefix were omitted. +*/ +SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Query The EXPLAIN Setting For A Prepared Statement +** METHOD: sqlite3_stmt +** +** ^The sqlite3_stmt_isexplain(S) interface returns 1 if the +** prepared statement S is an EXPLAIN statement, or 2 if the +** statement S is an EXPLAIN QUERY PLAN. +** ^The sqlite3_stmt_isexplain(S) interface returns 0 if S is +** an ordinary statement or a NULL pointer. +*/ +SQLITE_API int sqlite3_stmt_isexplain(sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Determine If A Prepared Statement Has Been Reset +** METHOD: sqlite3_stmt +** +** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the +** [prepared statement] S has been stepped at least once using +** [sqlite3_step(S)] but has neither run to completion (returned +** [SQLITE_DONE] from [sqlite3_step(S)]) nor +** been reset using [sqlite3_reset(S)]. ^The sqlite3_stmt_busy(S) +** interface returns false if S is a NULL pointer. If S is not a +** NULL pointer and is not a pointer to a valid [prepared statement] +** object, then the behavior is undefined and probably undesirable. +** +** This interface can be used in combination [sqlite3_next_stmt()] +** to locate all prepared statements associated with a database +** connection that are in need of being reset. This can be used, +** for example, in diagnostic routines to search for prepared +** statements that are holding a transaction open. +*/ +SQLITE_API int sqlite3_stmt_busy(sqlite3_stmt*); + +/* +** CAPI3REF: Dynamically Typed Value Object +** KEYWORDS: {protected sqlite3_value} {unprotected sqlite3_value} +** +** SQLite uses the sqlite3_value object to represent all values +** that can be stored in a database table. SQLite uses dynamic typing +** for the values it stores. ^Values stored in sqlite3_value objects +** can be integers, floating point values, strings, BLOBs, or NULL. +** +** An sqlite3_value object may be either "protected" or "unprotected". +** Some interfaces require a protected sqlite3_value. Other interfaces +** will accept either a protected or an unprotected sqlite3_value. +** Every interface that accepts sqlite3_value arguments specifies +** whether or not it requires a protected sqlite3_value. The +** [sqlite3_value_dup()] interface can be used to construct a new +** protected sqlite3_value from an unprotected sqlite3_value. +** +** The terms "protected" and "unprotected" refer to whether or not +** a mutex is held. An internal mutex is held for a protected +** sqlite3_value object but no mutex is held for an unprotected +** sqlite3_value object. If SQLite is compiled to be single-threaded +** (with [SQLITE_THREADSAFE=0] and with [sqlite3_threadsafe()] returning 0) +** or if SQLite is run in one of reduced mutex modes +** [SQLITE_CONFIG_SINGLETHREAD] or [SQLITE_CONFIG_MULTITHREAD] +** then there is no distinction between protected and unprotected +** sqlite3_value objects and they can be used interchangeably. However, +** for maximum code portability it is recommended that applications +** still make the distinction between protected and unprotected +** sqlite3_value objects even when not strictly required. +** +** ^The sqlite3_value objects that are passed as parameters into the +** implementation of [application-defined SQL functions] are protected. +** ^The sqlite3_value objects returned by [sqlite3_vtab_rhs_value()] +** are protected. +** ^The sqlite3_value object returned by +** [sqlite3_column_value()] is unprotected. +** Unprotected sqlite3_value objects may only be used as arguments +** to [sqlite3_result_value()], [sqlite3_bind_value()], and +** [sqlite3_value_dup()]. +** The [sqlite3_value_blob | sqlite3_value_type()] family of +** interfaces require protected sqlite3_value objects. +*/ +typedef struct sqlite3_value sqlite3_value; + +/* +** CAPI3REF: SQL Function Context Object +** +** The context in which an SQL function executes is stored in an +** sqlite3_context object. ^A pointer to an sqlite3_context object +** is always first parameter to [application-defined SQL functions]. +** The application-defined SQL function implementation will pass this +** pointer through into calls to [sqlite3_result_int | sqlite3_result()], +** [sqlite3_aggregate_context()], [sqlite3_user_data()], +** [sqlite3_context_db_handle()], [sqlite3_get_auxdata()], +** and/or [sqlite3_set_auxdata()]. +*/ +typedef struct sqlite3_context sqlite3_context; + +/* +** CAPI3REF: Binding Values To Prepared Statements +** KEYWORDS: {host parameter} {host parameters} {host parameter name} +** KEYWORDS: {SQL parameter} {SQL parameters} {parameter binding} +** METHOD: sqlite3_stmt +** +** ^(In the SQL statement text input to [sqlite3_prepare_v2()] and its variants, +** literals may be replaced by a [parameter] that matches one of following +** templates: +** +**
      +**
    • ? +**
    • ?NNN +**
    • :VVV +**
    • @VVV +**
    • $VVV +**
    +** +** In the templates above, NNN represents an integer literal, +** and VVV represents an alphanumeric identifier.)^ ^The values of these +** parameters (also called "host parameter names" or "SQL parameters") +** can be set using the sqlite3_bind_*() routines defined here. +** +** ^The first argument to the sqlite3_bind_*() routines is always +** a pointer to the [sqlite3_stmt] object returned from +** [sqlite3_prepare_v2()] or its variants. +** +** ^The second argument is the index of the SQL parameter to be set. +** ^The leftmost SQL parameter has an index of 1. ^When the same named +** SQL parameter is used more than once, second and subsequent +** occurrences have the same index as the first occurrence. +** ^The index for named parameters can be looked up using the +** [sqlite3_bind_parameter_index()] API if desired. ^The index +** for "?NNN" parameters is the value of NNN. +** ^The NNN value must be between 1 and the [sqlite3_limit()] +** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 32766). +** +** ^The third argument is the value to bind to the parameter. +** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16() +** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter +** is ignored and the end result is the same as sqlite3_bind_null(). +** ^If the third parameter to sqlite3_bind_text() is not NULL, then +** it should be a pointer to well-formed UTF8 text. +** ^If the third parameter to sqlite3_bind_text16() is not NULL, then +** it should be a pointer to well-formed UTF16 text. +** ^If the third parameter to sqlite3_bind_text64() is not NULL, then +** it should be a pointer to a well-formed unicode string that is +** either UTF8 if the sixth parameter is SQLITE_UTF8, or UTF16 +** otherwise. +** +** [[byte-order determination rules]] ^The byte-order of +** UTF16 input text is determined by the byte-order mark (BOM, U+FEFF) +** found in first character, which is removed, or in the absence of a BOM +** the byte order is the native byte order of the host +** machine for sqlite3_bind_text16() or the byte order specified in +** the 6th parameter for sqlite3_bind_text64().)^ +** ^If UTF16 input text contains invalid unicode +** characters, then SQLite might change those invalid characters +** into the unicode replacement character: U+FFFD. +** +** ^(In those routines that have a fourth argument, its value is the +** number of bytes in the parameter. To be clear: the value is the +** number of bytes in the value, not the number of characters.)^ +** ^If the fourth parameter to sqlite3_bind_text() or sqlite3_bind_text16() +** is negative, then the length of the string is +** the number of bytes up to the first zero terminator. +** If the fourth parameter to sqlite3_bind_blob() is negative, then +** the behavior is undefined. +** If a non-negative fourth parameter is provided to sqlite3_bind_text() +** or sqlite3_bind_text16() or sqlite3_bind_text64() then +** that parameter must be the byte offset +** where the NUL terminator would occur assuming the string were NUL +** terminated. If any NUL characters occurs at byte offsets less than +** the value of the fourth parameter then the resulting string value will +** contain embedded NULs. The result of expressions involving strings +** with embedded NULs is undefined. +** +** ^The fifth argument to the BLOB and string binding interfaces controls +** or indicates the lifetime of the object referenced by the third parameter. +** These three options exist: +** ^ (1) A destructor to dispose of the BLOB or string after SQLite has finished +** with it may be passed. ^It is called to dispose of the BLOB or string even +** if the call to the bind API fails, except the destructor is not called if +** the third parameter is a NULL pointer or the fourth parameter is negative. +** ^ (2) The special constant, [SQLITE_STATIC], may be passsed to indicate that +** the application remains responsible for disposing of the object. ^In this +** case, the object and the provided pointer to it must remain valid until +** either the prepared statement is finalized or the same SQL parameter is +** bound to something else, whichever occurs sooner. +** ^ (3) The constant, [SQLITE_TRANSIENT], may be passed to indicate that the +** object is to be copied prior to the return from sqlite3_bind_*(). ^The +** object and pointer to it must remain valid until then. ^SQLite will then +** manage the lifetime of its private copy. +** +** ^The sixth argument to sqlite3_bind_text64() must be one of +** [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE] +** to specify the encoding of the text in the third parameter. If +** the sixth argument to sqlite3_bind_text64() is not one of the +** allowed values shown above, or if the text encoding is different +** from the encoding specified by the sixth parameter, then the behavior +** is undefined. +** +** ^The sqlite3_bind_zeroblob() routine binds a BLOB of length N that +** is filled with zeroes. ^A zeroblob uses a fixed amount of memory +** (just an integer to hold its size) while it is being processed. +** Zeroblobs are intended to serve as placeholders for BLOBs whose +** content is later written using +** [sqlite3_blob_open | incremental BLOB I/O] routines. +** ^A negative value for the zeroblob results in a zero-length BLOB. +** +** ^The sqlite3_bind_pointer(S,I,P,T,D) routine causes the I-th parameter in +** [prepared statement] S to have an SQL value of NULL, but to also be +** associated with the pointer P of type T. ^D is either a NULL pointer or +** a pointer to a destructor function for P. ^SQLite will invoke the +** destructor D with a single argument of P when it is finished using +** P. The T parameter should be a static string, preferably a string +** literal. The sqlite3_bind_pointer() routine is part of the +** [pointer passing interface] added for SQLite 3.20.0. +** +** ^If any of the sqlite3_bind_*() routines are called with a NULL pointer +** for the [prepared statement] or with a prepared statement for which +** [sqlite3_step()] has been called more recently than [sqlite3_reset()], +** then the call will return [SQLITE_MISUSE]. If any sqlite3_bind_() +** routine is passed a [prepared statement] that has been finalized, the +** result is undefined and probably harmful. +** +** ^Bindings are not cleared by the [sqlite3_reset()] routine. +** ^Unbound parameters are interpreted as NULL. +** +** ^The sqlite3_bind_* routines return [SQLITE_OK] on success or an +** [error code] if anything goes wrong. +** ^[SQLITE_TOOBIG] might be returned if the size of a string or BLOB +** exceeds limits imposed by [sqlite3_limit]([SQLITE_LIMIT_LENGTH]) or +** [SQLITE_MAX_LENGTH]. +** ^[SQLITE_RANGE] is returned if the parameter +** index is out of range. ^[SQLITE_NOMEM] is returned if malloc() fails. +** +** See also: [sqlite3_bind_parameter_count()], +** [sqlite3_bind_parameter_name()], and [sqlite3_bind_parameter_index()]. +*/ +SQLITE_API int sqlite3_bind_blob(sqlite3_stmt*, int, const void*, int n, void(*)(void*)); +SQLITE_API int sqlite3_bind_blob64(sqlite3_stmt*, int, const void*, sqlite3_uint64, + void(*)(void*)); +SQLITE_API int sqlite3_bind_double(sqlite3_stmt*, int, double); +SQLITE_API int sqlite3_bind_int(sqlite3_stmt*, int, int); +SQLITE_API int sqlite3_bind_int64(sqlite3_stmt*, int, sqlite3_int64); +SQLITE_API int sqlite3_bind_null(sqlite3_stmt*, int); +SQLITE_API int sqlite3_bind_text(sqlite3_stmt*,int,const char*,int,void(*)(void*)); +SQLITE_API int sqlite3_bind_text16(sqlite3_stmt*, int, const void*, int, void(*)(void*)); +SQLITE_API int sqlite3_bind_text64(sqlite3_stmt*, int, const char*, sqlite3_uint64, + void(*)(void*), unsigned char encoding); +SQLITE_API int sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*); +SQLITE_API int sqlite3_bind_pointer(sqlite3_stmt*, int, void*, const char*,void(*)(void*)); +SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n); +SQLITE_API int sqlite3_bind_zeroblob64(sqlite3_stmt*, int, sqlite3_uint64); + +/* +** CAPI3REF: Number Of SQL Parameters +** METHOD: sqlite3_stmt +** +** ^This routine can be used to find the number of [SQL parameters] +** in a [prepared statement]. SQL parameters are tokens of the +** form "?", "?NNN", ":AAA", "$AAA", or "@AAA" that serve as +** placeholders for values that are [sqlite3_bind_blob | bound] +** to the parameters at a later time. +** +** ^(This routine actually returns the index of the largest (rightmost) +** parameter. For all forms except ?NNN, this will correspond to the +** number of unique parameters. If parameters of the ?NNN form are used, +** there may be gaps in the list.)^ +** +** See also: [sqlite3_bind_blob|sqlite3_bind()], +** [sqlite3_bind_parameter_name()], and +** [sqlite3_bind_parameter_index()]. +*/ +SQLITE_API int sqlite3_bind_parameter_count(sqlite3_stmt*); + +/* +** CAPI3REF: Name Of A Host Parameter +** METHOD: sqlite3_stmt +** +** ^The sqlite3_bind_parameter_name(P,N) interface returns +** the name of the N-th [SQL parameter] in the [prepared statement] P. +** ^(SQL parameters of the form "?NNN" or ":AAA" or "@AAA" or "$AAA" +** have a name which is the string "?NNN" or ":AAA" or "@AAA" or "$AAA" +** respectively. +** In other words, the initial ":" or "$" or "@" or "?" +** is included as part of the name.)^ +** ^Parameters of the form "?" without a following integer have no name +** and are referred to as "nameless" or "anonymous parameters". +** +** ^The first host parameter has an index of 1, not 0. +** +** ^If the value N is out of range or if the N-th parameter is +** nameless, then NULL is returned. ^The returned string is +** always in UTF-8 encoding even if the named parameter was +** originally specified as UTF-16 in [sqlite3_prepare16()], +** [sqlite3_prepare16_v2()], or [sqlite3_prepare16_v3()]. +** +** See also: [sqlite3_bind_blob|sqlite3_bind()], +** [sqlite3_bind_parameter_count()], and +** [sqlite3_bind_parameter_index()]. +*/ +SQLITE_API const char *sqlite3_bind_parameter_name(sqlite3_stmt*, int); + +/* +** CAPI3REF: Index Of A Parameter With A Given Name +** METHOD: sqlite3_stmt +** +** ^Return the index of an SQL parameter given its name. ^The +** index value returned is suitable for use as the second +** parameter to [sqlite3_bind_blob|sqlite3_bind()]. ^A zero +** is returned if no matching parameter is found. ^The parameter +** name must be given in UTF-8 even if the original statement +** was prepared from UTF-16 text using [sqlite3_prepare16_v2()] or +** [sqlite3_prepare16_v3()]. +** +** See also: [sqlite3_bind_blob|sqlite3_bind()], +** [sqlite3_bind_parameter_count()], and +** [sqlite3_bind_parameter_name()]. +*/ +SQLITE_API int sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName); + +/* +** CAPI3REF: Reset All Bindings On A Prepared Statement +** METHOD: sqlite3_stmt +** +** ^Contrary to the intuition of many, [sqlite3_reset()] does not reset +** the [sqlite3_bind_blob | bindings] on a [prepared statement]. +** ^Use this routine to reset all host parameters to NULL. +*/ +SQLITE_API int sqlite3_clear_bindings(sqlite3_stmt*); + +/* +** CAPI3REF: Number Of Columns In A Result Set +** METHOD: sqlite3_stmt +** +** ^Return the number of columns in the result set returned by the +** [prepared statement]. ^If this routine returns 0, that means the +** [prepared statement] returns no data (for example an [UPDATE]). +** ^However, just because this routine returns a positive number does not +** mean that one or more rows of data will be returned. ^A SELECT statement +** will always have a positive sqlite3_column_count() but depending on the +** WHERE clause constraints and the table content, it might return no rows. +** +** See also: [sqlite3_data_count()] +*/ +SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Column Names In A Result Set +** METHOD: sqlite3_stmt +** +** ^These routines return the name assigned to a particular column +** in the result set of a [SELECT] statement. ^The sqlite3_column_name() +** interface returns a pointer to a zero-terminated UTF-8 string +** and sqlite3_column_name16() returns a pointer to a zero-terminated +** UTF-16 string. ^The first parameter is the [prepared statement] +** that implements the [SELECT] statement. ^The second parameter is the +** column number. ^The leftmost column is number 0. +** +** ^The returned string pointer is valid until either the [prepared statement] +** is destroyed by [sqlite3_finalize()] or until the statement is automatically +** reprepared by the first call to [sqlite3_step()] for a particular run +** or until the next call to +** sqlite3_column_name() or sqlite3_column_name16() on the same column. +** +** ^If sqlite3_malloc() fails during the processing of either routine +** (for example during a conversion from UTF-8 to UTF-16) then a +** NULL pointer is returned. +** +** ^The name of a result column is the value of the "AS" clause for +** that column, if there is an AS clause. If there is no AS clause +** then the name of the column is unspecified and may change from +** one release of SQLite to the next. +*/ +SQLITE_API const char *sqlite3_column_name(sqlite3_stmt*, int N); +SQLITE_API const void *sqlite3_column_name16(sqlite3_stmt*, int N); + +/* +** CAPI3REF: Source Of Data In A Query Result +** METHOD: sqlite3_stmt +** +** ^These routines provide a means to determine the database, table, and +** table column that is the origin of a particular result column in +** [SELECT] statement. +** ^The name of the database or table or column can be returned as +** either a UTF-8 or UTF-16 string. ^The _database_ routines return +** the database name, the _table_ routines return the table name, and +** the origin_ routines return the column name. +** ^The returned string is valid until the [prepared statement] is destroyed +** using [sqlite3_finalize()] or until the statement is automatically +** reprepared by the first call to [sqlite3_step()] for a particular run +** or until the same information is requested +** again in a different encoding. +** +** ^The names returned are the original un-aliased names of the +** database, table, and column. +** +** ^The first argument to these interfaces is a [prepared statement]. +** ^These functions return information about the Nth result column returned by +** the statement, where N is the second function argument. +** ^The left-most column is column 0 for these routines. +** +** ^If the Nth column returned by the statement is an expression or +** subquery and is not a column value, then all of these functions return +** NULL. ^These routines might also return NULL if a memory allocation error +** occurs. ^Otherwise, they return the name of the attached database, table, +** or column that query result column was extracted from. +** +** ^As with all other SQLite APIs, those whose names end with "16" return +** UTF-16 encoded strings and the other functions return UTF-8. +** +** ^These APIs are only available if the library was compiled with the +** [SQLITE_ENABLE_COLUMN_METADATA] C-preprocessor symbol. +** +** If two or more threads call one or more +** [sqlite3_column_database_name | column metadata interfaces] +** for the same [prepared statement] and result column +** at the same time then the results are undefined. +*/ +SQLITE_API const char *sqlite3_column_database_name(sqlite3_stmt*,int); +SQLITE_API const void *sqlite3_column_database_name16(sqlite3_stmt*,int); +SQLITE_API const char *sqlite3_column_table_name(sqlite3_stmt*,int); +SQLITE_API const void *sqlite3_column_table_name16(sqlite3_stmt*,int); +SQLITE_API const char *sqlite3_column_origin_name(sqlite3_stmt*,int); +SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt*,int); + +/* +** CAPI3REF: Declared Datatype Of A Query Result +** METHOD: sqlite3_stmt +** +** ^(The first parameter is a [prepared statement]. +** If this statement is a [SELECT] statement and the Nth column of the +** returned result set of that [SELECT] is a table column (not an +** expression or subquery) then the declared type of the table +** column is returned.)^ ^If the Nth column of the result set is an +** expression or subquery, then a NULL pointer is returned. +** ^The returned string is always UTF-8 encoded. +** +** ^(For example, given the database schema: +** +** CREATE TABLE t1(c1 VARIANT); +** +** and the following statement to be compiled: +** +** SELECT c1 + 1, c1 FROM t1; +** +** this routine would return the string "VARIANT" for the second result +** column (i==1), and a NULL pointer for the first result column (i==0).)^ +** +** ^SQLite uses dynamic run-time typing. ^So just because a column +** is declared to contain a particular type does not mean that the +** data stored in that column is of the declared type. SQLite is +** strongly typed, but the typing is dynamic not static. ^Type +** is associated with individual values, not with the containers +** used to hold those values. +*/ +SQLITE_API const char *sqlite3_column_decltype(sqlite3_stmt*,int); +SQLITE_API const void *sqlite3_column_decltype16(sqlite3_stmt*,int); + +/* +** CAPI3REF: Evaluate An SQL Statement +** METHOD: sqlite3_stmt +** +** After a [prepared statement] has been prepared using any of +** [sqlite3_prepare_v2()], [sqlite3_prepare_v3()], [sqlite3_prepare16_v2()], +** or [sqlite3_prepare16_v3()] or one of the legacy +** interfaces [sqlite3_prepare()] or [sqlite3_prepare16()], this function +** must be called one or more times to evaluate the statement. +** +** The details of the behavior of the sqlite3_step() interface depend +** on whether the statement was prepared using the newer "vX" interfaces +** [sqlite3_prepare_v3()], [sqlite3_prepare_v2()], [sqlite3_prepare16_v3()], +** [sqlite3_prepare16_v2()] or the older legacy +** interfaces [sqlite3_prepare()] and [sqlite3_prepare16()]. The use of the +** new "vX" interface is recommended for new applications but the legacy +** interface will continue to be supported. +** +** ^In the legacy interface, the return value will be either [SQLITE_BUSY], +** [SQLITE_DONE], [SQLITE_ROW], [SQLITE_ERROR], or [SQLITE_MISUSE]. +** ^With the "v2" interface, any of the other [result codes] or +** [extended result codes] might be returned as well. +** +** ^[SQLITE_BUSY] means that the database engine was unable to acquire the +** database locks it needs to do its job. ^If the statement is a [COMMIT] +** or occurs outside of an explicit transaction, then you can retry the +** statement. If the statement is not a [COMMIT] and occurs within an +** explicit transaction then you should rollback the transaction before +** continuing. +** +** ^[SQLITE_DONE] means that the statement has finished executing +** successfully. sqlite3_step() should not be called again on this virtual +** machine without first calling [sqlite3_reset()] to reset the virtual +** machine back to its initial state. +** +** ^If the SQL statement being executed returns any data, then [SQLITE_ROW] +** is returned each time a new row of data is ready for processing by the +** caller. The values may be accessed using the [column access functions]. +** sqlite3_step() is called again to retrieve the next row of data. +** +** ^[SQLITE_ERROR] means that a run-time error (such as a constraint +** violation) has occurred. sqlite3_step() should not be called again on +** the VM. More information may be found by calling [sqlite3_errmsg()]. +** ^With the legacy interface, a more specific error code (for example, +** [SQLITE_INTERRUPT], [SQLITE_SCHEMA], [SQLITE_CORRUPT], and so forth) +** can be obtained by calling [sqlite3_reset()] on the +** [prepared statement]. ^In the "v2" interface, +** the more specific error code is returned directly by sqlite3_step(). +** +** [SQLITE_MISUSE] means that the this routine was called inappropriately. +** Perhaps it was called on a [prepared statement] that has +** already been [sqlite3_finalize | finalized] or on one that had +** previously returned [SQLITE_ERROR] or [SQLITE_DONE]. Or it could +** be the case that the same database connection is being used by two or +** more threads at the same moment in time. +** +** For all versions of SQLite up to and including 3.6.23.1, a call to +** [sqlite3_reset()] was required after sqlite3_step() returned anything +** other than [SQLITE_ROW] before any subsequent invocation of +** sqlite3_step(). Failure to reset the prepared statement using +** [sqlite3_reset()] would result in an [SQLITE_MISUSE] return from +** sqlite3_step(). But after [version 3.6.23.1] ([dateof:3.6.23.1], +** sqlite3_step() began +** calling [sqlite3_reset()] automatically in this circumstance rather +** than returning [SQLITE_MISUSE]. This is not considered a compatibility +** break because any application that ever receives an SQLITE_MISUSE error +** is broken by definition. The [SQLITE_OMIT_AUTORESET] compile-time option +** can be used to restore the legacy behavior. +** +** Goofy Interface Alert: In the legacy interface, the sqlite3_step() +** API always returns a generic error code, [SQLITE_ERROR], following any +** error other than [SQLITE_BUSY] and [SQLITE_MISUSE]. You must call +** [sqlite3_reset()] or [sqlite3_finalize()] in order to find one of the +** specific [error codes] that better describes the error. +** We admit that this is a goofy design. The problem has been fixed +** with the "v2" interface. If you prepare all of your SQL statements +** using [sqlite3_prepare_v3()] or [sqlite3_prepare_v2()] +** or [sqlite3_prepare16_v2()] or [sqlite3_prepare16_v3()] instead +** of the legacy [sqlite3_prepare()] and [sqlite3_prepare16()] interfaces, +** then the more specific [error codes] are returned directly +** by sqlite3_step(). The use of the "vX" interfaces is recommended. +*/ +SQLITE_API int sqlite3_step(sqlite3_stmt*); + +/* +** CAPI3REF: Number of columns in a result set +** METHOD: sqlite3_stmt +** +** ^The sqlite3_data_count(P) interface returns the number of columns in the +** current row of the result set of [prepared statement] P. +** ^If prepared statement P does not have results ready to return +** (via calls to the [sqlite3_column_int | sqlite3_column()] family of +** interfaces) then sqlite3_data_count(P) returns 0. +** ^The sqlite3_data_count(P) routine also returns 0 if P is a NULL pointer. +** ^The sqlite3_data_count(P) routine returns 0 if the previous call to +** [sqlite3_step](P) returned [SQLITE_DONE]. ^The sqlite3_data_count(P) +** will return non-zero if previous call to [sqlite3_step](P) returned +** [SQLITE_ROW], except in the case of the [PRAGMA incremental_vacuum] +** where it always returns zero since each step of that multi-step +** pragma returns 0 columns of data. +** +** See also: [sqlite3_column_count()] +*/ +SQLITE_API int sqlite3_data_count(sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Fundamental Datatypes +** KEYWORDS: SQLITE_TEXT +** +** ^(Every value in SQLite has one of five fundamental datatypes: +** +**
      +**
    • 64-bit signed integer +**
    • 64-bit IEEE floating point number +**
    • string +**
    • BLOB +**
    • NULL +**
    )^ +** +** These constants are codes for each of those types. +** +** Note that the SQLITE_TEXT constant was also used in SQLite version 2 +** for a completely different meaning. Software that links against both +** SQLite version 2 and SQLite version 3 should use SQLITE3_TEXT, not +** SQLITE_TEXT. +*/ +#define SQLITE_INTEGER 1 +#define SQLITE_FLOAT 2 +#define SQLITE_BLOB 4 +#define SQLITE_NULL 5 +#ifdef SQLITE_TEXT +# undef SQLITE_TEXT +#else +# define SQLITE_TEXT 3 +#endif +#define SQLITE3_TEXT 3 + +/* +** CAPI3REF: Result Values From A Query +** KEYWORDS: {column access functions} +** METHOD: sqlite3_stmt +** +** Summary: +**
    +**
    sqlite3_column_blobBLOB result +**
    sqlite3_column_doubleREAL result +**
    sqlite3_column_int32-bit INTEGER result +**
    sqlite3_column_int6464-bit INTEGER result +**
    sqlite3_column_textUTF-8 TEXT result +**
    sqlite3_column_text16UTF-16 TEXT result +**
    sqlite3_column_valueThe result as an +** [sqlite3_value|unprotected sqlite3_value] object. +**
        +**
    sqlite3_column_bytesSize of a BLOB +** or a UTF-8 TEXT result in bytes +**
    sqlite3_column_bytes16   +** →  Size of UTF-16 +** TEXT in bytes +**
    sqlite3_column_typeDefault +** datatype of the result +**
    +** +** Details: +** +** ^These routines return information about a single column of the current +** result row of a query. ^In every case the first argument is a pointer +** to the [prepared statement] that is being evaluated (the [sqlite3_stmt*] +** that was returned from [sqlite3_prepare_v2()] or one of its variants) +** and the second argument is the index of the column for which information +** should be returned. ^The leftmost column of the result set has the index 0. +** ^The number of columns in the result can be determined using +** [sqlite3_column_count()]. +** +** If the SQL statement does not currently point to a valid row, or if the +** column index is out of range, the result is undefined. +** These routines may only be called when the most recent call to +** [sqlite3_step()] has returned [SQLITE_ROW] and neither +** [sqlite3_reset()] nor [sqlite3_finalize()] have been called subsequently. +** If any of these routines are called after [sqlite3_reset()] or +** [sqlite3_finalize()] or after [sqlite3_step()] has returned +** something other than [SQLITE_ROW], the results are undefined. +** If [sqlite3_step()] or [sqlite3_reset()] or [sqlite3_finalize()] +** are called from a different thread while any of these routines +** are pending, then the results are undefined. +** +** The first six interfaces (_blob, _double, _int, _int64, _text, and _text16) +** each return the value of a result column in a specific data format. If +** the result column is not initially in the requested format (for example, +** if the query returns an integer but the sqlite3_column_text() interface +** is used to extract the value) then an automatic type conversion is performed. +** +** ^The sqlite3_column_type() routine returns the +** [SQLITE_INTEGER | datatype code] for the initial data type +** of the result column. ^The returned value is one of [SQLITE_INTEGER], +** [SQLITE_FLOAT], [SQLITE_TEXT], [SQLITE_BLOB], or [SQLITE_NULL]. +** The return value of sqlite3_column_type() can be used to decide which +** of the first six interface should be used to extract the column value. +** The value returned by sqlite3_column_type() is only meaningful if no +** automatic type conversions have occurred for the value in question. +** After a type conversion, the result of calling sqlite3_column_type() +** is undefined, though harmless. Future +** versions of SQLite may change the behavior of sqlite3_column_type() +** following a type conversion. +** +** If the result is a BLOB or a TEXT string, then the sqlite3_column_bytes() +** or sqlite3_column_bytes16() interfaces can be used to determine the size +** of that BLOB or string. +** +** ^If the result is a BLOB or UTF-8 string then the sqlite3_column_bytes() +** routine returns the number of bytes in that BLOB or string. +** ^If the result is a UTF-16 string, then sqlite3_column_bytes() converts +** the string to UTF-8 and then returns the number of bytes. +** ^If the result is a numeric value then sqlite3_column_bytes() uses +** [sqlite3_snprintf()] to convert that value to a UTF-8 string and returns +** the number of bytes in that string. +** ^If the result is NULL, then sqlite3_column_bytes() returns zero. +** +** ^If the result is a BLOB or UTF-16 string then the sqlite3_column_bytes16() +** routine returns the number of bytes in that BLOB or string. +** ^If the result is a UTF-8 string, then sqlite3_column_bytes16() converts +** the string to UTF-16 and then returns the number of bytes. +** ^If the result is a numeric value then sqlite3_column_bytes16() uses +** [sqlite3_snprintf()] to convert that value to a UTF-16 string and returns +** the number of bytes in that string. +** ^If the result is NULL, then sqlite3_column_bytes16() returns zero. +** +** ^The values returned by [sqlite3_column_bytes()] and +** [sqlite3_column_bytes16()] do not include the zero terminators at the end +** of the string. ^For clarity: the values returned by +** [sqlite3_column_bytes()] and [sqlite3_column_bytes16()] are the number of +** bytes in the string, not the number of characters. +** +** ^Strings returned by sqlite3_column_text() and sqlite3_column_text16(), +** even empty strings, are always zero-terminated. ^The return +** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer. +** +** ^Strings returned by sqlite3_column_text16() always have the endianness +** which is native to the platform, regardless of the text encoding set +** for the database. +** +** Warning: ^The object returned by [sqlite3_column_value()] is an +** [unprotected sqlite3_value] object. In a multithreaded environment, +** an unprotected sqlite3_value object may only be used safely with +** [sqlite3_bind_value()] and [sqlite3_result_value()]. +** If the [unprotected sqlite3_value] object returned by +** [sqlite3_column_value()] is used in any other way, including calls +** to routines like [sqlite3_value_int()], [sqlite3_value_text()], +** or [sqlite3_value_bytes()], the behavior is not threadsafe. +** Hence, the sqlite3_column_value() interface +** is normally only useful within the implementation of +** [application-defined SQL functions] or [virtual tables], not within +** top-level application code. +** +** These routines may attempt to convert the datatype of the result. +** ^For example, if the internal representation is FLOAT and a text result +** is requested, [sqlite3_snprintf()] is used internally to perform the +** conversion automatically. ^(The following table details the conversions +** that are applied: +** +**
    +** +**
    Internal
    Type
    Requested
    Type
    Conversion +** +**
    NULL INTEGER Result is 0 +**
    NULL FLOAT Result is 0.0 +**
    NULL TEXT Result is a NULL pointer +**
    NULL BLOB Result is a NULL pointer +**
    INTEGER FLOAT Convert from integer to float +**
    INTEGER TEXT ASCII rendering of the integer +**
    INTEGER BLOB Same as INTEGER->TEXT +**
    FLOAT INTEGER [CAST] to INTEGER +**
    FLOAT TEXT ASCII rendering of the float +**
    FLOAT BLOB [CAST] to BLOB +**
    TEXT INTEGER [CAST] to INTEGER +**
    TEXT FLOAT [CAST] to REAL +**
    TEXT BLOB No change +**
    BLOB INTEGER [CAST] to INTEGER +**
    BLOB FLOAT [CAST] to REAL +**
    BLOB TEXT [CAST] to TEXT, ensure zero terminator +**
    +**
    )^ +** +** Note that when type conversions occur, pointers returned by prior +** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or +** sqlite3_column_text16() may be invalidated. +** Type conversions and pointer invalidations might occur +** in the following cases: +** +**
      +**
    • The initial content is a BLOB and sqlite3_column_text() or +** sqlite3_column_text16() is called. A zero-terminator might +** need to be added to the string.
    • +**
    • The initial content is UTF-8 text and sqlite3_column_bytes16() or +** sqlite3_column_text16() is called. The content must be converted +** to UTF-16.
    • +**
    • The initial content is UTF-16 text and sqlite3_column_bytes() or +** sqlite3_column_text() is called. The content must be converted +** to UTF-8.
    • +**
    +** +** ^Conversions between UTF-16be and UTF-16le are always done in place and do +** not invalidate a prior pointer, though of course the content of the buffer +** that the prior pointer references will have been modified. Other kinds +** of conversion are done in place when it is possible, but sometimes they +** are not possible and in those cases prior pointers are invalidated. +** +** The safest policy is to invoke these routines +** in one of the following ways: +** +**
      +**
    • sqlite3_column_text() followed by sqlite3_column_bytes()
    • +**
    • sqlite3_column_blob() followed by sqlite3_column_bytes()
    • +**
    • sqlite3_column_text16() followed by sqlite3_column_bytes16()
    • +**
    +** +** In other words, you should call sqlite3_column_text(), +** sqlite3_column_blob(), or sqlite3_column_text16() first to force the result +** into the desired format, then invoke sqlite3_column_bytes() or +** sqlite3_column_bytes16() to find the size of the result. Do not mix calls +** to sqlite3_column_text() or sqlite3_column_blob() with calls to +** sqlite3_column_bytes16(), and do not mix calls to sqlite3_column_text16() +** with calls to sqlite3_column_bytes(). +** +** ^The pointers returned are valid until a type conversion occurs as +** described above, or until [sqlite3_step()] or [sqlite3_reset()] or +** [sqlite3_finalize()] is called. ^The memory space used to hold strings +** and BLOBs is freed automatically. Do not pass the pointers returned +** from [sqlite3_column_blob()], [sqlite3_column_text()], etc. into +** [sqlite3_free()]. +** +** As long as the input parameters are correct, these routines will only +** fail if an out-of-memory error occurs during a format conversion. +** Only the following subset of interfaces are subject to out-of-memory +** errors: +** +**
      +**
    • sqlite3_column_blob() +**
    • sqlite3_column_text() +**
    • sqlite3_column_text16() +**
    • sqlite3_column_bytes() +**
    • sqlite3_column_bytes16() +**
    +** +** If an out-of-memory error occurs, then the return value from these +** routines is the same as if the column had contained an SQL NULL value. +** Valid SQL NULL returns can be distinguished from out-of-memory errors +** by invoking the [sqlite3_errcode()] immediately after the suspect +** return value is obtained and before any +** other SQLite interface is called on the same [database connection]. +*/ +SQLITE_API const void *sqlite3_column_blob(sqlite3_stmt*, int iCol); +SQLITE_API double sqlite3_column_double(sqlite3_stmt*, int iCol); +SQLITE_API int sqlite3_column_int(sqlite3_stmt*, int iCol); +SQLITE_API sqlite3_int64 sqlite3_column_int64(sqlite3_stmt*, int iCol); +SQLITE_API const unsigned char *sqlite3_column_text(sqlite3_stmt*, int iCol); +SQLITE_API const void *sqlite3_column_text16(sqlite3_stmt*, int iCol); +SQLITE_API sqlite3_value *sqlite3_column_value(sqlite3_stmt*, int iCol); +SQLITE_API int sqlite3_column_bytes(sqlite3_stmt*, int iCol); +SQLITE_API int sqlite3_column_bytes16(sqlite3_stmt*, int iCol); +SQLITE_API int sqlite3_column_type(sqlite3_stmt*, int iCol); + +/* +** CAPI3REF: Destroy A Prepared Statement Object +** DESTRUCTOR: sqlite3_stmt +** +** ^The sqlite3_finalize() function is called to delete a [prepared statement]. +** ^If the most recent evaluation of the statement encountered no errors +** or if the statement is never been evaluated, then sqlite3_finalize() returns +** SQLITE_OK. ^If the most recent evaluation of statement S failed, then +** sqlite3_finalize(S) returns the appropriate [error code] or +** [extended error code]. +** +** ^The sqlite3_finalize(S) routine can be called at any point during +** the life cycle of [prepared statement] S: +** before statement S is ever evaluated, after +** one or more calls to [sqlite3_reset()], or after any call +** to [sqlite3_step()] regardless of whether or not the statement has +** completed execution. +** +** ^Invoking sqlite3_finalize() on a NULL pointer is a harmless no-op. +** +** The application must finalize every [prepared statement] in order to avoid +** resource leaks. It is a grievous error for the application to try to use +** a prepared statement after it has been finalized. Any use of a prepared +** statement after it has been finalized can result in undefined and +** undesirable behavior such as segfaults and heap corruption. +*/ +SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Reset A Prepared Statement Object +** METHOD: sqlite3_stmt +** +** The sqlite3_reset() function is called to reset a [prepared statement] +** object back to its initial state, ready to be re-executed. +** ^Any SQL statement variables that had values bound to them using +** the [sqlite3_bind_blob | sqlite3_bind_*() API] retain their values. +** Use [sqlite3_clear_bindings()] to reset the bindings. +** +** ^The [sqlite3_reset(S)] interface resets the [prepared statement] S +** back to the beginning of its program. +** +** ^If the most recent call to [sqlite3_step(S)] for the +** [prepared statement] S returned [SQLITE_ROW] or [SQLITE_DONE], +** or if [sqlite3_step(S)] has never before been called on S, +** then [sqlite3_reset(S)] returns [SQLITE_OK]. +** +** ^If the most recent call to [sqlite3_step(S)] for the +** [prepared statement] S indicated an error, then +** [sqlite3_reset(S)] returns an appropriate [error code]. +** +** ^The [sqlite3_reset(S)] interface does not change the values +** of any [sqlite3_bind_blob|bindings] on the [prepared statement] S. +*/ +SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Create Or Redefine SQL Functions +** KEYWORDS: {function creation routines} +** METHOD: sqlite3 +** +** ^These functions (collectively known as "function creation routines") +** are used to add SQL functions or aggregates or to redefine the behavior +** of existing SQL functions or aggregates. The only differences between +** the three "sqlite3_create_function*" routines are the text encoding +** expected for the second parameter (the name of the function being +** created) and the presence or absence of a destructor callback for +** the application data pointer. Function sqlite3_create_window_function() +** is similar, but allows the user to supply the extra callback functions +** needed by [aggregate window functions]. +** +** ^The first parameter is the [database connection] to which the SQL +** function is to be added. ^If an application uses more than one database +** connection then application-defined SQL functions must be added +** to each database connection separately. +** +** ^The second parameter is the name of the SQL function to be created or +** redefined. ^The length of the name is limited to 255 bytes in a UTF-8 +** representation, exclusive of the zero-terminator. ^Note that the name +** length limit is in UTF-8 bytes, not characters nor UTF-16 bytes. +** ^Any attempt to create a function with a longer name +** will result in [SQLITE_MISUSE] being returned. +** +** ^The third parameter (nArg) +** is the number of arguments that the SQL function or +** aggregate takes. ^If this parameter is -1, then the SQL function or +** aggregate may take any number of arguments between 0 and the limit +** set by [sqlite3_limit]([SQLITE_LIMIT_FUNCTION_ARG]). If the third +** parameter is less than -1 or greater than 127 then the behavior is +** undefined. +** +** ^The fourth parameter, eTextRep, specifies what +** [SQLITE_UTF8 | text encoding] this SQL function prefers for +** its parameters. The application should set this parameter to +** [SQLITE_UTF16LE] if the function implementation invokes +** [sqlite3_value_text16le()] on an input, or [SQLITE_UTF16BE] if the +** implementation invokes [sqlite3_value_text16be()] on an input, or +** [SQLITE_UTF16] if [sqlite3_value_text16()] is used, or [SQLITE_UTF8] +** otherwise. ^The same SQL function may be registered multiple times using +** different preferred text encodings, with different implementations for +** each encoding. +** ^When multiple implementations of the same function are available, SQLite +** will pick the one that involves the least amount of data conversion. +** +** ^The fourth parameter may optionally be ORed with [SQLITE_DETERMINISTIC] +** to signal that the function will always return the same result given +** the same inputs within a single SQL statement. Most SQL functions are +** deterministic. The built-in [random()] SQL function is an example of a +** function that is not deterministic. The SQLite query planner is able to +** perform additional optimizations on deterministic functions, so use +** of the [SQLITE_DETERMINISTIC] flag is recommended where possible. +** +** ^The fourth parameter may also optionally include the [SQLITE_DIRECTONLY] +** flag, which if present prevents the function from being invoked from +** within VIEWs, TRIGGERs, CHECK constraints, generated column expressions, +** index expressions, or the WHERE clause of partial indexes. +** +** For best security, the [SQLITE_DIRECTONLY] flag is recommended for +** all application-defined SQL functions that do not need to be +** used inside of triggers, view, CHECK constraints, or other elements of +** the database schema. This flags is especially recommended for SQL +** functions that have side effects or reveal internal application state. +** Without this flag, an attacker might be able to modify the schema of +** a database file to include invocations of the function with parameters +** chosen by the attacker, which the application will then execute when +** the database file is opened and read. +** +** ^(The fifth parameter is an arbitrary pointer. The implementation of the +** function can gain access to this pointer using [sqlite3_user_data()].)^ +** +** ^The sixth, seventh and eighth parameters passed to the three +** "sqlite3_create_function*" functions, xFunc, xStep and xFinal, are +** pointers to C-language functions that implement the SQL function or +** aggregate. ^A scalar SQL function requires an implementation of the xFunc +** callback only; NULL pointers must be passed as the xStep and xFinal +** parameters. ^An aggregate SQL function requires an implementation of xStep +** and xFinal and NULL pointer must be passed for xFunc. ^To delete an existing +** SQL function or aggregate, pass NULL pointers for all three function +** callbacks. +** +** ^The sixth, seventh, eighth and ninth parameters (xStep, xFinal, xValue +** and xInverse) passed to sqlite3_create_window_function are pointers to +** C-language callbacks that implement the new function. xStep and xFinal +** must both be non-NULL. xValue and xInverse may either both be NULL, in +** which case a regular aggregate function is created, or must both be +** non-NULL, in which case the new function may be used as either an aggregate +** or aggregate window function. More details regarding the implementation +** of aggregate window functions are +** [user-defined window functions|available here]. +** +** ^(If the final parameter to sqlite3_create_function_v2() or +** sqlite3_create_window_function() is not NULL, then it is destructor for +** the application data pointer. The destructor is invoked when the function +** is deleted, either by being overloaded or when the database connection +** closes.)^ ^The destructor is also invoked if the call to +** sqlite3_create_function_v2() fails. ^When the destructor callback is +** invoked, it is passed a single argument which is a copy of the application +** data pointer which was the fifth parameter to sqlite3_create_function_v2(). +** +** ^It is permitted to register multiple implementations of the same +** functions with the same name but with either differing numbers of +** arguments or differing preferred text encodings. ^SQLite will use +** the implementation that most closely matches the way in which the +** SQL function is used. ^A function implementation with a non-negative +** nArg parameter is a better match than a function implementation with +** a negative nArg. ^A function where the preferred text encoding +** matches the database encoding is a better +** match than a function where the encoding is different. +** ^A function where the encoding difference is between UTF16le and UTF16be +** is a closer match than a function where the encoding difference is +** between UTF8 and UTF16. +** +** ^Built-in functions may be overloaded by new application-defined functions. +** +** ^An application-defined function is permitted to call other +** SQLite interfaces. However, such calls must not +** close the database connection nor finalize or reset the prepared +** statement in which the function is running. +*/ +SQLITE_API int sqlite3_create_function( + sqlite3 *db, + const char *zFunctionName, + int nArg, + int eTextRep, + void *pApp, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*) +); +SQLITE_API int sqlite3_create_function16( + sqlite3 *db, + const void *zFunctionName, + int nArg, + int eTextRep, + void *pApp, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*) +); +SQLITE_API int sqlite3_create_function_v2( + sqlite3 *db, + const char *zFunctionName, + int nArg, + int eTextRep, + void *pApp, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void(*xDestroy)(void*) +); +SQLITE_API int sqlite3_create_window_function( + sqlite3 *db, + const char *zFunctionName, + int nArg, + int eTextRep, + void *pApp, + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void (*xValue)(sqlite3_context*), + void (*xInverse)(sqlite3_context*,int,sqlite3_value**), + void(*xDestroy)(void*) +); + +/* +** CAPI3REF: Text Encodings +** +** These constant define integer codes that represent the various +** text encodings supported by SQLite. +*/ +#define SQLITE_UTF8 1 /* IMP: R-37514-35566 */ +#define SQLITE_UTF16LE 2 /* IMP: R-03371-37637 */ +#define SQLITE_UTF16BE 3 /* IMP: R-51971-34154 */ +#define SQLITE_UTF16 4 /* Use native byte order */ +#define SQLITE_ANY 5 /* Deprecated */ +#define SQLITE_UTF16_ALIGNED 8 /* sqlite3_create_collation only */ + +/* +** CAPI3REF: Function Flags +** +** These constants may be ORed together with the +** [SQLITE_UTF8 | preferred text encoding] as the fourth argument +** to [sqlite3_create_function()], [sqlite3_create_function16()], or +** [sqlite3_create_function_v2()]. +** +**
    +** [[SQLITE_DETERMINISTIC]]
    SQLITE_DETERMINISTIC
    +** The SQLITE_DETERMINISTIC flag means that the new function always gives +** the same output when the input parameters are the same. +** The [abs|abs() function] is deterministic, for example, but +** [randomblob|randomblob()] is not. Functions must +** be deterministic in order to be used in certain contexts such as +** with the WHERE clause of [partial indexes] or in [generated columns]. +** SQLite might also optimize deterministic functions by factoring them +** out of inner loops. +**
    +** +** [[SQLITE_DIRECTONLY]]
    SQLITE_DIRECTONLY
    +** The SQLITE_DIRECTONLY flag means that the function may only be invoked +** from top-level SQL, and cannot be used in VIEWs or TRIGGERs nor in +** schema structures such as [CHECK constraints], [DEFAULT clauses], +** [expression indexes], [partial indexes], or [generated columns]. +** The SQLITE_DIRECTONLY flags is a security feature which is recommended +** for all [application-defined SQL functions], and especially for functions +** that have side-effects or that could potentially leak sensitive +** information. +**
    +** +** [[SQLITE_INNOCUOUS]]
    SQLITE_INNOCUOUS
    +** The SQLITE_INNOCUOUS flag means that the function is unlikely +** to cause problems even if misused. An innocuous function should have +** no side effects and should not depend on any values other than its +** input parameters. The [abs|abs() function] is an example of an +** innocuous function. +** The [load_extension() SQL function] is not innocuous because of its +** side effects. +**

    SQLITE_INNOCUOUS is similar to SQLITE_DETERMINISTIC, but is not +** exactly the same. The [random|random() function] is an example of a +** function that is innocuous but not deterministic. +**

    Some heightened security settings +** ([SQLITE_DBCONFIG_TRUSTED_SCHEMA] and [PRAGMA trusted_schema=OFF]) +** disable the use of SQL functions inside views and triggers and in +** schema structures such as [CHECK constraints], [DEFAULT clauses], +** [expression indexes], [partial indexes], and [generated columns] unless +** the function is tagged with SQLITE_INNOCUOUS. Most built-in functions +** are innocuous. Developers are advised to avoid using the +** SQLITE_INNOCUOUS flag for application-defined functions unless the +** function has been carefully audited and found to be free of potentially +** security-adverse side-effects and information-leaks. +**

    +** +** [[SQLITE_SUBTYPE]]
    SQLITE_SUBTYPE
    +** The SQLITE_SUBTYPE flag indicates to SQLite that a function may call +** [sqlite3_value_subtype()] to inspect the sub-types of its arguments. +** Specifying this flag makes no difference for scalar or aggregate user +** functions. However, if it is not specified for a user-defined window +** function, then any sub-types belonging to arguments passed to the window +** function may be discarded before the window function is called (i.e. +** sqlite3_value_subtype() will always return 0). +**
    +**
    +*/ +#define SQLITE_DETERMINISTIC 0x000000800 +#define SQLITE_DIRECTONLY 0x000080000 +#define SQLITE_SUBTYPE 0x000100000 +#define SQLITE_INNOCUOUS 0x000200000 + +/* +** CAPI3REF: Deprecated Functions +** DEPRECATED +** +** These functions are [deprecated]. In order to maintain +** backwards compatibility with older code, these functions continue +** to be supported. However, new applications should avoid +** the use of these functions. To encourage programmers to avoid +** these functions, we will not explain what they do. +*/ +#ifndef SQLITE_OMIT_DEPRECATED +SQLITE_API SQLITE_DEPRECATED int sqlite3_aggregate_count(sqlite3_context*); +SQLITE_API SQLITE_DEPRECATED int sqlite3_expired(sqlite3_stmt*); +SQLITE_API SQLITE_DEPRECATED int sqlite3_transfer_bindings(sqlite3_stmt*, sqlite3_stmt*); +SQLITE_API SQLITE_DEPRECATED int sqlite3_global_recover(void); +SQLITE_API SQLITE_DEPRECATED void sqlite3_thread_cleanup(void); +SQLITE_API SQLITE_DEPRECATED int sqlite3_memory_alarm(void(*)(void*,sqlite3_int64,int), + void*,sqlite3_int64); +#endif + +/* +** CAPI3REF: Obtaining SQL Values +** METHOD: sqlite3_value +** +** Summary: +**
    +**
    sqlite3_value_blobBLOB value +**
    sqlite3_value_doubleREAL value +**
    sqlite3_value_int32-bit INTEGER value +**
    sqlite3_value_int6464-bit INTEGER value +**
    sqlite3_value_pointerPointer value +**
    sqlite3_value_textUTF-8 TEXT value +**
    sqlite3_value_text16UTF-16 TEXT value in +** the native byteorder +**
    sqlite3_value_text16beUTF-16be TEXT value +**
    sqlite3_value_text16leUTF-16le TEXT value +**
        +**
    sqlite3_value_bytesSize of a BLOB +** or a UTF-8 TEXT in bytes +**
    sqlite3_value_bytes16   +** →  Size of UTF-16 +** TEXT in bytes +**
    sqlite3_value_typeDefault +** datatype of the value +**
    sqlite3_value_numeric_type   +** →  Best numeric datatype of the value +**
    sqlite3_value_nochange   +** →  True if the column is unchanged in an UPDATE +** against a virtual table. +**
    sqlite3_value_frombind   +** →  True if value originated from a [bound parameter] +**
    +** +** Details: +** +** These routines extract type, size, and content information from +** [protected sqlite3_value] objects. Protected sqlite3_value objects +** are used to pass parameter information into the functions that +** implement [application-defined SQL functions] and [virtual tables]. +** +** These routines work only with [protected sqlite3_value] objects. +** Any attempt to use these routines on an [unprotected sqlite3_value] +** is not threadsafe. +** +** ^These routines work just like the corresponding [column access functions] +** except that these routines take a single [protected sqlite3_value] object +** pointer instead of a [sqlite3_stmt*] pointer and an integer column number. +** +** ^The sqlite3_value_text16() interface extracts a UTF-16 string +** in the native byte-order of the host machine. ^The +** sqlite3_value_text16be() and sqlite3_value_text16le() interfaces +** extract UTF-16 strings as big-endian and little-endian respectively. +** +** ^If [sqlite3_value] object V was initialized +** using [sqlite3_bind_pointer(S,I,P,X,D)] or [sqlite3_result_pointer(C,P,X,D)] +** and if X and Y are strings that compare equal according to strcmp(X,Y), +** then sqlite3_value_pointer(V,Y) will return the pointer P. ^Otherwise, +** sqlite3_value_pointer(V,Y) returns a NULL. The sqlite3_bind_pointer() +** routine is part of the [pointer passing interface] added for SQLite 3.20.0. +** +** ^(The sqlite3_value_type(V) interface returns the +** [SQLITE_INTEGER | datatype code] for the initial datatype of the +** [sqlite3_value] object V. The returned value is one of [SQLITE_INTEGER], +** [SQLITE_FLOAT], [SQLITE_TEXT], [SQLITE_BLOB], or [SQLITE_NULL].)^ +** Other interfaces might change the datatype for an sqlite3_value object. +** For example, if the datatype is initially SQLITE_INTEGER and +** sqlite3_value_text(V) is called to extract a text value for that +** integer, then subsequent calls to sqlite3_value_type(V) might return +** SQLITE_TEXT. Whether or not a persistent internal datatype conversion +** occurs is undefined and may change from one release of SQLite to the next. +** +** ^(The sqlite3_value_numeric_type() interface attempts to apply +** numeric affinity to the value. This means that an attempt is +** made to convert the value to an integer or floating point. If +** such a conversion is possible without loss of information (in other +** words, if the value is a string that looks like a number) +** then the conversion is performed. Otherwise no conversion occurs. +** The [SQLITE_INTEGER | datatype] after conversion is returned.)^ +** +** ^Within the [xUpdate] method of a [virtual table], the +** sqlite3_value_nochange(X) interface returns true if and only if +** the column corresponding to X is unchanged by the UPDATE operation +** that the xUpdate method call was invoked to implement and if +** and the prior [xColumn] method call that was invoked to extracted +** the value for that column returned without setting a result (probably +** because it queried [sqlite3_vtab_nochange()] and found that the column +** was unchanging). ^Within an [xUpdate] method, any value for which +** sqlite3_value_nochange(X) is true will in all other respects appear +** to be a NULL value. If sqlite3_value_nochange(X) is invoked anywhere other +** than within an [xUpdate] method call for an UPDATE statement, then +** the return value is arbitrary and meaningless. +** +** ^The sqlite3_value_frombind(X) interface returns non-zero if the +** value X originated from one of the [sqlite3_bind_int|sqlite3_bind()] +** interfaces. ^If X comes from an SQL literal value, or a table column, +** or an expression, then sqlite3_value_frombind(X) returns zero. +** +** Please pay particular attention to the fact that the pointer returned +** from [sqlite3_value_blob()], [sqlite3_value_text()], or +** [sqlite3_value_text16()] can be invalidated by a subsequent call to +** [sqlite3_value_bytes()], [sqlite3_value_bytes16()], [sqlite3_value_text()], +** or [sqlite3_value_text16()]. +** +** These routines must be called from the same thread as +** the SQL function that supplied the [sqlite3_value*] parameters. +** +** As long as the input parameter is correct, these routines can only +** fail if an out-of-memory error occurs during a format conversion. +** Only the following subset of interfaces are subject to out-of-memory +** errors: +** +**
      +**
    • sqlite3_value_blob() +**
    • sqlite3_value_text() +**
    • sqlite3_value_text16() +**
    • sqlite3_value_text16le() +**
    • sqlite3_value_text16be() +**
    • sqlite3_value_bytes() +**
    • sqlite3_value_bytes16() +**
    +** +** If an out-of-memory error occurs, then the return value from these +** routines is the same as if the column had contained an SQL NULL value. +** Valid SQL NULL returns can be distinguished from out-of-memory errors +** by invoking the [sqlite3_errcode()] immediately after the suspect +** return value is obtained and before any +** other SQLite interface is called on the same [database connection]. +*/ +SQLITE_API const void *sqlite3_value_blob(sqlite3_value*); +SQLITE_API double sqlite3_value_double(sqlite3_value*); +SQLITE_API int sqlite3_value_int(sqlite3_value*); +SQLITE_API sqlite3_int64 sqlite3_value_int64(sqlite3_value*); +SQLITE_API void *sqlite3_value_pointer(sqlite3_value*, const char*); +SQLITE_API const unsigned char *sqlite3_value_text(sqlite3_value*); +SQLITE_API const void *sqlite3_value_text16(sqlite3_value*); +SQLITE_API const void *sqlite3_value_text16le(sqlite3_value*); +SQLITE_API const void *sqlite3_value_text16be(sqlite3_value*); +SQLITE_API int sqlite3_value_bytes(sqlite3_value*); +SQLITE_API int sqlite3_value_bytes16(sqlite3_value*); +SQLITE_API int sqlite3_value_type(sqlite3_value*); +SQLITE_API int sqlite3_value_numeric_type(sqlite3_value*); +SQLITE_API int sqlite3_value_nochange(sqlite3_value*); +SQLITE_API int sqlite3_value_frombind(sqlite3_value*); + +/* +** CAPI3REF: Finding The Subtype Of SQL Values +** METHOD: sqlite3_value +** +** The sqlite3_value_subtype(V) function returns the subtype for +** an [application-defined SQL function] argument V. The subtype +** information can be used to pass a limited amount of context from +** one SQL function to another. Use the [sqlite3_result_subtype()] +** routine to set the subtype for the return value of an SQL function. +*/ +SQLITE_API unsigned int sqlite3_value_subtype(sqlite3_value*); + +/* +** CAPI3REF: Copy And Free SQL Values +** METHOD: sqlite3_value +** +** ^The sqlite3_value_dup(V) interface makes a copy of the [sqlite3_value] +** object D and returns a pointer to that copy. ^The [sqlite3_value] returned +** is a [protected sqlite3_value] object even if the input is not. +** ^The sqlite3_value_dup(V) interface returns NULL if V is NULL or if a +** memory allocation fails. +** +** ^The sqlite3_value_free(V) interface frees an [sqlite3_value] object +** previously obtained from [sqlite3_value_dup()]. ^If V is a NULL pointer +** then sqlite3_value_free(V) is a harmless no-op. +*/ +SQLITE_API sqlite3_value *sqlite3_value_dup(const sqlite3_value*); +SQLITE_API void sqlite3_value_free(sqlite3_value*); + +/* +** CAPI3REF: Obtain Aggregate Function Context +** METHOD: sqlite3_context +** +** Implementations of aggregate SQL functions use this +** routine to allocate memory for storing their state. +** +** ^The first time the sqlite3_aggregate_context(C,N) routine is called +** for a particular aggregate function, SQLite allocates +** N bytes of memory, zeroes out that memory, and returns a pointer +** to the new memory. ^On second and subsequent calls to +** sqlite3_aggregate_context() for the same aggregate function instance, +** the same buffer is returned. Sqlite3_aggregate_context() is normally +** called once for each invocation of the xStep callback and then one +** last time when the xFinal callback is invoked. ^(When no rows match +** an aggregate query, the xStep() callback of the aggregate function +** implementation is never called and xFinal() is called exactly once. +** In those cases, sqlite3_aggregate_context() might be called for the +** first time from within xFinal().)^ +** +** ^The sqlite3_aggregate_context(C,N) routine returns a NULL pointer +** when first called if N is less than or equal to zero or if a memory +** allocate error occurs. +** +** ^(The amount of space allocated by sqlite3_aggregate_context(C,N) is +** determined by the N parameter on first successful call. Changing the +** value of N in any subsequent call to sqlite3_aggregate_context() within +** the same aggregate function instance will not resize the memory +** allocation.)^ Within the xFinal callback, it is customary to set +** N=0 in calls to sqlite3_aggregate_context(C,N) so that no +** pointless memory allocations occur. +** +** ^SQLite automatically frees the memory allocated by +** sqlite3_aggregate_context() when the aggregate query concludes. +** +** The first parameter must be a copy of the +** [sqlite3_context | SQL function context] that is the first parameter +** to the xStep or xFinal callback routine that implements the aggregate +** function. +** +** This routine must be called from the same thread in which +** the aggregate SQL function is running. +*/ +SQLITE_API void *sqlite3_aggregate_context(sqlite3_context*, int nBytes); + +/* +** CAPI3REF: User Data For Functions +** METHOD: sqlite3_context +** +** ^The sqlite3_user_data() interface returns a copy of +** the pointer that was the pUserData parameter (the 5th parameter) +** of the [sqlite3_create_function()] +** and [sqlite3_create_function16()] routines that originally +** registered the application defined function. +** +** This routine must be called from the same thread in which +** the application-defined function is running. +*/ +SQLITE_API void *sqlite3_user_data(sqlite3_context*); + +/* +** CAPI3REF: Database Connection For Functions +** METHOD: sqlite3_context +** +** ^The sqlite3_context_db_handle() interface returns a copy of +** the pointer to the [database connection] (the 1st parameter) +** of the [sqlite3_create_function()] +** and [sqlite3_create_function16()] routines that originally +** registered the application defined function. +*/ +SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*); + +/* +** CAPI3REF: Function Auxiliary Data +** METHOD: sqlite3_context +** +** These functions may be used by (non-aggregate) SQL functions to +** associate metadata with argument values. If the same value is passed to +** multiple invocations of the same SQL function during query execution, under +** some circumstances the associated metadata may be preserved. An example +** of where this might be useful is in a regular-expression matching +** function. The compiled version of the regular expression can be stored as +** metadata associated with the pattern string. +** Then as long as the pattern string remains the same, +** the compiled regular expression can be reused on multiple +** invocations of the same function. +** +** ^The sqlite3_get_auxdata(C,N) interface returns a pointer to the metadata +** associated by the sqlite3_set_auxdata(C,N,P,X) function with the Nth argument +** value to the application-defined function. ^N is zero for the left-most +** function argument. ^If there is no metadata +** associated with the function argument, the sqlite3_get_auxdata(C,N) interface +** returns a NULL pointer. +** +** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as metadata for the N-th +** argument of the application-defined function. ^Subsequent +** calls to sqlite3_get_auxdata(C,N) return P from the most recent +** sqlite3_set_auxdata(C,N,P,X) call if the metadata is still valid or +** NULL if the metadata has been discarded. +** ^After each call to sqlite3_set_auxdata(C,N,P,X) where X is not NULL, +** SQLite will invoke the destructor function X with parameter P exactly +** once, when the metadata is discarded. +** SQLite is free to discard the metadata at any time, including:
      +**
    • ^(when the corresponding function parameter changes)^, or +**
    • ^(when [sqlite3_reset()] or [sqlite3_finalize()] is called for the +** SQL statement)^, or +**
    • ^(when sqlite3_set_auxdata() is invoked again on the same +** parameter)^, or +**
    • ^(during the original sqlite3_set_auxdata() call when a memory +** allocation error occurs.)^
    +** +** Note the last bullet in particular. The destructor X in +** sqlite3_set_auxdata(C,N,P,X) might be called immediately, before the +** sqlite3_set_auxdata() interface even returns. Hence sqlite3_set_auxdata() +** should be called near the end of the function implementation and the +** function implementation should not make any use of P after +** sqlite3_set_auxdata() has been called. +** +** ^(In practice, metadata is preserved between function calls for +** function parameters that are compile-time constants, including literal +** values and [parameters] and expressions composed from the same.)^ +** +** The value of the N parameter to these interfaces should be non-negative. +** Future enhancements may make use of negative N values to define new +** kinds of function caching behavior. +** +** These routines must be called from the same thread in which +** the SQL function is running. +*/ +SQLITE_API void *sqlite3_get_auxdata(sqlite3_context*, int N); +SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(void*)); + + +/* +** CAPI3REF: Constants Defining Special Destructor Behavior +** +** These are special values for the destructor that is passed in as the +** final argument to routines like [sqlite3_result_blob()]. ^If the destructor +** argument is SQLITE_STATIC, it means that the content pointer is constant +** and will never change. It does not need to be destroyed. ^The +** SQLITE_TRANSIENT value means that the content will likely change in +** the near future and that SQLite should make its own private copy of +** the content before returning. +** +** The typedef is necessary to work around problems in certain +** C++ compilers. +*/ +typedef void (*sqlite3_destructor_type)(void*); +#define SQLITE_STATIC ((sqlite3_destructor_type)0) +#define SQLITE_TRANSIENT ((sqlite3_destructor_type)-1) + +/* +** CAPI3REF: Setting The Result Of An SQL Function +** METHOD: sqlite3_context +** +** These routines are used by the xFunc or xFinal callbacks that +** implement SQL functions and aggregates. See +** [sqlite3_create_function()] and [sqlite3_create_function16()] +** for additional information. +** +** These functions work very much like the [parameter binding] family of +** functions used to bind values to host parameters in prepared statements. +** Refer to the [SQL parameter] documentation for additional information. +** +** ^The sqlite3_result_blob() interface sets the result from +** an application-defined function to be the BLOB whose content is pointed +** to by the second parameter and which is N bytes long where N is the +** third parameter. +** +** ^The sqlite3_result_zeroblob(C,N) and sqlite3_result_zeroblob64(C,N) +** interfaces set the result of the application-defined function to be +** a BLOB containing all zero bytes and N bytes in size. +** +** ^The sqlite3_result_double() interface sets the result from +** an application-defined function to be a floating point value specified +** by its 2nd argument. +** +** ^The sqlite3_result_error() and sqlite3_result_error16() functions +** cause the implemented SQL function to throw an exception. +** ^SQLite uses the string pointed to by the +** 2nd parameter of sqlite3_result_error() or sqlite3_result_error16() +** as the text of an error message. ^SQLite interprets the error +** message string from sqlite3_result_error() as UTF-8. ^SQLite +** interprets the string from sqlite3_result_error16() as UTF-16 using +** the same [byte-order determination rules] as [sqlite3_bind_text16()]. +** ^If the third parameter to sqlite3_result_error() +** or sqlite3_result_error16() is negative then SQLite takes as the error +** message all text up through the first zero character. +** ^If the third parameter to sqlite3_result_error() or +** sqlite3_result_error16() is non-negative then SQLite takes that many +** bytes (not characters) from the 2nd parameter as the error message. +** ^The sqlite3_result_error() and sqlite3_result_error16() +** routines make a private copy of the error message text before +** they return. Hence, the calling function can deallocate or +** modify the text after they return without harm. +** ^The sqlite3_result_error_code() function changes the error code +** returned by SQLite as a result of an error in a function. ^By default, +** the error code is SQLITE_ERROR. ^A subsequent call to sqlite3_result_error() +** or sqlite3_result_error16() resets the error code to SQLITE_ERROR. +** +** ^The sqlite3_result_error_toobig() interface causes SQLite to throw an +** error indicating that a string or BLOB is too long to represent. +** +** ^The sqlite3_result_error_nomem() interface causes SQLite to throw an +** error indicating that a memory allocation failed. +** +** ^The sqlite3_result_int() interface sets the return value +** of the application-defined function to be the 32-bit signed integer +** value given in the 2nd argument. +** ^The sqlite3_result_int64() interface sets the return value +** of the application-defined function to be the 64-bit signed integer +** value given in the 2nd argument. +** +** ^The sqlite3_result_null() interface sets the return value +** of the application-defined function to be NULL. +** +** ^The sqlite3_result_text(), sqlite3_result_text16(), +** sqlite3_result_text16le(), and sqlite3_result_text16be() interfaces +** set the return value of the application-defined function to be +** a text string which is represented as UTF-8, UTF-16 native byte order, +** UTF-16 little endian, or UTF-16 big endian, respectively. +** ^The sqlite3_result_text64() interface sets the return value of an +** application-defined function to be a text string in an encoding +** specified by the fifth (and last) parameter, which must be one +** of [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE]. +** ^SQLite takes the text result from the application from +** the 2nd parameter of the sqlite3_result_text* interfaces. +** ^If the 3rd parameter to the sqlite3_result_text* interfaces +** is negative, then SQLite takes result text from the 2nd parameter +** through the first zero character. +** ^If the 3rd parameter to the sqlite3_result_text* interfaces +** is non-negative, then as many bytes (not characters) of the text +** pointed to by the 2nd parameter are taken as the application-defined +** function result. If the 3rd parameter is non-negative, then it +** must be the byte offset into the string where the NUL terminator would +** appear if the string where NUL terminated. If any NUL characters occur +** in the string at a byte offset that is less than the value of the 3rd +** parameter, then the resulting string will contain embedded NULs and the +** result of expressions operating on strings with embedded NULs is undefined. +** ^If the 4th parameter to the sqlite3_result_text* interfaces +** or sqlite3_result_blob is a non-NULL pointer, then SQLite calls that +** function as the destructor on the text or BLOB result when it has +** finished using that result. +** ^If the 4th parameter to the sqlite3_result_text* interfaces or to +** sqlite3_result_blob is the special constant SQLITE_STATIC, then SQLite +** assumes that the text or BLOB result is in constant space and does not +** copy the content of the parameter nor call a destructor on the content +** when it has finished using that result. +** ^If the 4th parameter to the sqlite3_result_text* interfaces +** or sqlite3_result_blob is the special constant SQLITE_TRANSIENT +** then SQLite makes a copy of the result into space obtained +** from [sqlite3_malloc()] before it returns. +** +** ^For the sqlite3_result_text16(), sqlite3_result_text16le(), and +** sqlite3_result_text16be() routines, and for sqlite3_result_text64() +** when the encoding is not UTF8, if the input UTF16 begins with a +** byte-order mark (BOM, U+FEFF) then the BOM is removed from the +** string and the rest of the string is interpreted according to the +** byte-order specified by the BOM. ^The byte-order specified by +** the BOM at the beginning of the text overrides the byte-order +** specified by the interface procedure. ^So, for example, if +** sqlite3_result_text16le() is invoked with text that begins +** with bytes 0xfe, 0xff (a big-endian byte-order mark) then the +** first two bytes of input are skipped and the remaining input +** is interpreted as UTF16BE text. +** +** ^For UTF16 input text to the sqlite3_result_text16(), +** sqlite3_result_text16be(), sqlite3_result_text16le(), and +** sqlite3_result_text64() routines, if the text contains invalid +** UTF16 characters, the invalid characters might be converted +** into the unicode replacement character, U+FFFD. +** +** ^The sqlite3_result_value() interface sets the result of +** the application-defined function to be a copy of the +** [unprotected sqlite3_value] object specified by the 2nd parameter. ^The +** sqlite3_result_value() interface makes a copy of the [sqlite3_value] +** so that the [sqlite3_value] specified in the parameter may change or +** be deallocated after sqlite3_result_value() returns without harm. +** ^A [protected sqlite3_value] object may always be used where an +** [unprotected sqlite3_value] object is required, so either +** kind of [sqlite3_value] object can be used with this interface. +** +** ^The sqlite3_result_pointer(C,P,T,D) interface sets the result to an +** SQL NULL value, just like [sqlite3_result_null(C)], except that it +** also associates the host-language pointer P or type T with that +** NULL value such that the pointer can be retrieved within an +** [application-defined SQL function] using [sqlite3_value_pointer()]. +** ^If the D parameter is not NULL, then it is a pointer to a destructor +** for the P parameter. ^SQLite invokes D with P as its only argument +** when SQLite is finished with P. The T parameter should be a static +** string and preferably a string literal. The sqlite3_result_pointer() +** routine is part of the [pointer passing interface] added for SQLite 3.20.0. +** +** If these routines are called from within the different thread +** than the one containing the application-defined function that received +** the [sqlite3_context] pointer, the results are undefined. +*/ +SQLITE_API void sqlite3_result_blob(sqlite3_context*, const void*, int, void(*)(void*)); +SQLITE_API void sqlite3_result_blob64(sqlite3_context*,const void*, + sqlite3_uint64,void(*)(void*)); +SQLITE_API void sqlite3_result_double(sqlite3_context*, double); +SQLITE_API void sqlite3_result_error(sqlite3_context*, const char*, int); +SQLITE_API void sqlite3_result_error16(sqlite3_context*, const void*, int); +SQLITE_API void sqlite3_result_error_toobig(sqlite3_context*); +SQLITE_API void sqlite3_result_error_nomem(sqlite3_context*); +SQLITE_API void sqlite3_result_error_code(sqlite3_context*, int); +SQLITE_API void sqlite3_result_int(sqlite3_context*, int); +SQLITE_API void sqlite3_result_int64(sqlite3_context*, sqlite3_int64); +SQLITE_API void sqlite3_result_null(sqlite3_context*); +SQLITE_API void sqlite3_result_text(sqlite3_context*, const char*, int, void(*)(void*)); +SQLITE_API void sqlite3_result_text64(sqlite3_context*, const char*,sqlite3_uint64, + void(*)(void*), unsigned char encoding); +SQLITE_API void sqlite3_result_text16(sqlite3_context*, const void*, int, void(*)(void*)); +SQLITE_API void sqlite3_result_text16le(sqlite3_context*, const void*, int,void(*)(void*)); +SQLITE_API void sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*)); +SQLITE_API void sqlite3_result_value(sqlite3_context*, sqlite3_value*); +SQLITE_API void sqlite3_result_pointer(sqlite3_context*, void*,const char*,void(*)(void*)); +SQLITE_API void sqlite3_result_zeroblob(sqlite3_context*, int n); +SQLITE_API int sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n); + + +/* +** CAPI3REF: Setting The Subtype Of An SQL Function +** METHOD: sqlite3_context +** +** The sqlite3_result_subtype(C,T) function causes the subtype of +** the result from the [application-defined SQL function] with +** [sqlite3_context] C to be the value T. Only the lower 8 bits +** of the subtype T are preserved in current versions of SQLite; +** higher order bits are discarded. +** The number of subtype bytes preserved by SQLite might increase +** in future releases of SQLite. +*/ +SQLITE_API void sqlite3_result_subtype(sqlite3_context*,unsigned int); + +/* +** CAPI3REF: Define New Collating Sequences +** METHOD: sqlite3 +** +** ^These functions add, remove, or modify a [collation] associated +** with the [database connection] specified as the first argument. +** +** ^The name of the collation is a UTF-8 string +** for sqlite3_create_collation() and sqlite3_create_collation_v2() +** and a UTF-16 string in native byte order for sqlite3_create_collation16(). +** ^Collation names that compare equal according to [sqlite3_strnicmp()] are +** considered to be the same name. +** +** ^(The third argument (eTextRep) must be one of the constants: +**
      +**
    • [SQLITE_UTF8], +**
    • [SQLITE_UTF16LE], +**
    • [SQLITE_UTF16BE], +**
    • [SQLITE_UTF16], or +**
    • [SQLITE_UTF16_ALIGNED]. +**
    )^ +** ^The eTextRep argument determines the encoding of strings passed +** to the collating function callback, xCompare. +** ^The [SQLITE_UTF16] and [SQLITE_UTF16_ALIGNED] values for eTextRep +** force strings to be UTF16 with native byte order. +** ^The [SQLITE_UTF16_ALIGNED] value for eTextRep forces strings to begin +** on an even byte address. +** +** ^The fourth argument, pArg, is an application data pointer that is passed +** through as the first argument to the collating function callback. +** +** ^The fifth argument, xCompare, is a pointer to the collating function. +** ^Multiple collating functions can be registered using the same name but +** with different eTextRep parameters and SQLite will use whichever +** function requires the least amount of data transformation. +** ^If the xCompare argument is NULL then the collating function is +** deleted. ^When all collating functions having the same name are deleted, +** that collation is no longer usable. +** +** ^The collating function callback is invoked with a copy of the pArg +** application data pointer and with two strings in the encoding specified +** by the eTextRep argument. The two integer parameters to the collating +** function callback are the length of the two strings, in bytes. The collating +** function must return an integer that is negative, zero, or positive +** if the first string is less than, equal to, or greater than the second, +** respectively. A collating function must always return the same answer +** given the same inputs. If two or more collating functions are registered +** to the same collation name (using different eTextRep values) then all +** must give an equivalent answer when invoked with equivalent strings. +** The collating function must obey the following properties for all +** strings A, B, and C: +** +**
      +**
    1. If A==B then B==A. +**
    2. If A==B and B==C then A==C. +**
    3. If A<B THEN B>A. +**
    4. If A<B and B<C then A<C. +**
    +** +** If a collating function fails any of the above constraints and that +** collating function is registered and used, then the behavior of SQLite +** is undefined. +** +** ^The sqlite3_create_collation_v2() works like sqlite3_create_collation() +** with the addition that the xDestroy callback is invoked on pArg when +** the collating function is deleted. +** ^Collating functions are deleted when they are overridden by later +** calls to the collation creation functions or when the +** [database connection] is closed using [sqlite3_close()]. +** +** ^The xDestroy callback is not called if the +** sqlite3_create_collation_v2() function fails. Applications that invoke +** sqlite3_create_collation_v2() with a non-NULL xDestroy argument should +** check the return code and dispose of the application data pointer +** themselves rather than expecting SQLite to deal with it for them. +** This is different from every other SQLite interface. The inconsistency +** is unfortunate but cannot be changed without breaking backwards +** compatibility. +** +** See also: [sqlite3_collation_needed()] and [sqlite3_collation_needed16()]. +*/ +SQLITE_API int sqlite3_create_collation( + sqlite3*, + const char *zName, + int eTextRep, + void *pArg, + int(*xCompare)(void*,int,const void*,int,const void*) +); +SQLITE_API int sqlite3_create_collation_v2( + sqlite3*, + const char *zName, + int eTextRep, + void *pArg, + int(*xCompare)(void*,int,const void*,int,const void*), + void(*xDestroy)(void*) +); +SQLITE_API int sqlite3_create_collation16( + sqlite3*, + const void *zName, + int eTextRep, + void *pArg, + int(*xCompare)(void*,int,const void*,int,const void*) +); + +/* +** CAPI3REF: Collation Needed Callbacks +** METHOD: sqlite3 +** +** ^To avoid having to register all collation sequences before a database +** can be used, a single callback function may be registered with the +** [database connection] to be invoked whenever an undefined collation +** sequence is required. +** +** ^If the function is registered using the sqlite3_collation_needed() API, +** then it is passed the names of undefined collation sequences as strings +** encoded in UTF-8. ^If sqlite3_collation_needed16() is used, +** the names are passed as UTF-16 in machine native byte order. +** ^A call to either function replaces the existing collation-needed callback. +** +** ^(When the callback is invoked, the first argument passed is a copy +** of the second argument to sqlite3_collation_needed() or +** sqlite3_collation_needed16(). The second argument is the database +** connection. The third argument is one of [SQLITE_UTF8], [SQLITE_UTF16BE], +** or [SQLITE_UTF16LE], indicating the most desirable form of the collation +** sequence function required. The fourth parameter is the name of the +** required collation sequence.)^ +** +** The callback function should register the desired collation using +** [sqlite3_create_collation()], [sqlite3_create_collation16()], or +** [sqlite3_create_collation_v2()]. +*/ +SQLITE_API int sqlite3_collation_needed( + sqlite3*, + void*, + void(*)(void*,sqlite3*,int eTextRep,const char*) +); +SQLITE_API int sqlite3_collation_needed16( + sqlite3*, + void*, + void(*)(void*,sqlite3*,int eTextRep,const void*) +); + +#ifdef SQLITE_ENABLE_CEROD +/* +** Specify the activation key for a CEROD database. Unless +** activated, none of the CEROD routines will work. +*/ +SQLITE_API void sqlite3_activate_cerod( + const char *zPassPhrase /* Activation phrase */ +); +#endif + +/* +** CAPI3REF: Suspend Execution For A Short Time +** +** The sqlite3_sleep() function causes the current thread to suspend execution +** for at least a number of milliseconds specified in its parameter. +** +** If the operating system does not support sleep requests with +** millisecond time resolution, then the time will be rounded up to +** the nearest second. The number of milliseconds of sleep actually +** requested from the operating system is returned. +** +** ^SQLite implements this interface by calling the xSleep() +** method of the default [sqlite3_vfs] object. If the xSleep() method +** of the default VFS is not implemented correctly, or not implemented at +** all, then the behavior of sqlite3_sleep() may deviate from the description +** in the previous paragraphs. +*/ +SQLITE_API int sqlite3_sleep(int); + +/* +** CAPI3REF: Name Of The Folder Holding Temporary Files +** +** ^(If this global variable is made to point to a string which is +** the name of a folder (a.k.a. directory), then all temporary files +** created by SQLite when using a built-in [sqlite3_vfs | VFS] +** will be placed in that directory.)^ ^If this variable +** is a NULL pointer, then SQLite performs a search for an appropriate +** temporary file directory. +** +** Applications are strongly discouraged from using this global variable. +** It is required to set a temporary folder on Windows Runtime (WinRT). +** But for all other platforms, it is highly recommended that applications +** neither read nor write this variable. This global variable is a relic +** that exists for backwards compatibility of legacy applications and should +** be avoided in new projects. +** +** It is not safe to read or modify this variable in more than one +** thread at a time. It is not safe to read or modify this variable +** if a [database connection] is being used at the same time in a separate +** thread. +** It is intended that this variable be set once +** as part of process initialization and before any SQLite interface +** routines have been called and that this variable remain unchanged +** thereafter. +** +** ^The [temp_store_directory pragma] may modify this variable and cause +** it to point to memory obtained from [sqlite3_malloc]. ^Furthermore, +** the [temp_store_directory pragma] always assumes that any string +** that this variable points to is held in memory obtained from +** [sqlite3_malloc] and the pragma may attempt to free that memory +** using [sqlite3_free]. +** Hence, if this variable is modified directly, either it should be +** made NULL or made to point to memory obtained from [sqlite3_malloc] +** or else the use of the [temp_store_directory pragma] should be avoided. +** Except when requested by the [temp_store_directory pragma], SQLite +** does not free the memory that sqlite3_temp_directory points to. If +** the application wants that memory to be freed, it must do +** so itself, taking care to only do so after all [database connection] +** objects have been destroyed. +** +** Note to Windows Runtime users: The temporary directory must be set +** prior to calling [sqlite3_open] or [sqlite3_open_v2]. Otherwise, various +** features that require the use of temporary files may fail. Here is an +** example of how to do this using C++ with the Windows Runtime: +** +**
    +** LPCWSTR zPath = Windows::Storage::ApplicationData::Current->
    +**       TemporaryFolder->Path->Data();
    +** char zPathBuf[MAX_PATH + 1];
    +** memset(zPathBuf, 0, sizeof(zPathBuf));
    +** WideCharToMultiByte(CP_UTF8, 0, zPath, -1, zPathBuf, sizeof(zPathBuf),
    +**       NULL, NULL);
    +** sqlite3_temp_directory = sqlite3_mprintf("%s", zPathBuf);
    +** 
    +*/ +SQLITE_API char *sqlite3_temp_directory; + +/* +** CAPI3REF: Name Of The Folder Holding Database Files +** +** ^(If this global variable is made to point to a string which is +** the name of a folder (a.k.a. directory), then all database files +** specified with a relative pathname and created or accessed by +** SQLite when using a built-in windows [sqlite3_vfs | VFS] will be assumed +** to be relative to that directory.)^ ^If this variable is a NULL +** pointer, then SQLite assumes that all database files specified +** with a relative pathname are relative to the current directory +** for the process. Only the windows VFS makes use of this global +** variable; it is ignored by the unix VFS. +** +** Changing the value of this variable while a database connection is +** open can result in a corrupt database. +** +** It is not safe to read or modify this variable in more than one +** thread at a time. It is not safe to read or modify this variable +** if a [database connection] is being used at the same time in a separate +** thread. +** It is intended that this variable be set once +** as part of process initialization and before any SQLite interface +** routines have been called and that this variable remain unchanged +** thereafter. +** +** ^The [data_store_directory pragma] may modify this variable and cause +** it to point to memory obtained from [sqlite3_malloc]. ^Furthermore, +** the [data_store_directory pragma] always assumes that any string +** that this variable points to is held in memory obtained from +** [sqlite3_malloc] and the pragma may attempt to free that memory +** using [sqlite3_free]. +** Hence, if this variable is modified directly, either it should be +** made NULL or made to point to memory obtained from [sqlite3_malloc] +** or else the use of the [data_store_directory pragma] should be avoided. +*/ +SQLITE_API char *sqlite3_data_directory; + +/* +** CAPI3REF: Win32 Specific Interface +** +** These interfaces are available only on Windows. The +** [sqlite3_win32_set_directory] interface is used to set the value associated +** with the [sqlite3_temp_directory] or [sqlite3_data_directory] variable, to +** zValue, depending on the value of the type parameter. The zValue parameter +** should be NULL to cause the previous value to be freed via [sqlite3_free]; +** a non-NULL value will be copied into memory obtained from [sqlite3_malloc] +** prior to being used. The [sqlite3_win32_set_directory] interface returns +** [SQLITE_OK] to indicate success, [SQLITE_ERROR] if the type is unsupported, +** or [SQLITE_NOMEM] if memory could not be allocated. The value of the +** [sqlite3_data_directory] variable is intended to act as a replacement for +** the current directory on the sub-platforms of Win32 where that concept is +** not present, e.g. WinRT and UWP. The [sqlite3_win32_set_directory8] and +** [sqlite3_win32_set_directory16] interfaces behave exactly the same as the +** sqlite3_win32_set_directory interface except the string parameter must be +** UTF-8 or UTF-16, respectively. +*/ +SQLITE_API int sqlite3_win32_set_directory( + unsigned long type, /* Identifier for directory being set or reset */ + void *zValue /* New value for directory being set or reset */ +); +SQLITE_API int sqlite3_win32_set_directory8(unsigned long type, const char *zValue); +SQLITE_API int sqlite3_win32_set_directory16(unsigned long type, const void *zValue); + +/* +** CAPI3REF: Win32 Directory Types +** +** These macros are only available on Windows. They define the allowed values +** for the type argument to the [sqlite3_win32_set_directory] interface. +*/ +#define SQLITE_WIN32_DATA_DIRECTORY_TYPE 1 +#define SQLITE_WIN32_TEMP_DIRECTORY_TYPE 2 + +/* +** CAPI3REF: Test For Auto-Commit Mode +** KEYWORDS: {autocommit mode} +** METHOD: sqlite3 +** +** ^The sqlite3_get_autocommit() interface returns non-zero or +** zero if the given database connection is or is not in autocommit mode, +** respectively. ^Autocommit mode is on by default. +** ^Autocommit mode is disabled by a [BEGIN] statement. +** ^Autocommit mode is re-enabled by a [COMMIT] or [ROLLBACK]. +** +** If certain kinds of errors occur on a statement within a multi-statement +** transaction (errors including [SQLITE_FULL], [SQLITE_IOERR], +** [SQLITE_NOMEM], [SQLITE_BUSY], and [SQLITE_INTERRUPT]) then the +** transaction might be rolled back automatically. The only way to +** find out whether SQLite automatically rolled back the transaction after +** an error is to use this function. +** +** If another thread changes the autocommit status of the database +** connection while this routine is running, then the return value +** is undefined. +*/ +SQLITE_API int sqlite3_get_autocommit(sqlite3*); + +/* +** CAPI3REF: Find The Database Handle Of A Prepared Statement +** METHOD: sqlite3_stmt +** +** ^The sqlite3_db_handle interface returns the [database connection] handle +** to which a [prepared statement] belongs. ^The [database connection] +** returned by sqlite3_db_handle is the same [database connection] +** that was the first argument +** to the [sqlite3_prepare_v2()] call (or its variants) that was used to +** create the statement in the first place. +*/ +SQLITE_API sqlite3 *sqlite3_db_handle(sqlite3_stmt*); + +/* +** CAPI3REF: Return The Filename For A Database Connection +** METHOD: sqlite3 +** +** ^The sqlite3_db_filename(D,N) interface returns a pointer to the filename +** associated with database N of connection D. +** ^If there is no attached database N on the database +** connection D, or if database N is a temporary or in-memory database, then +** this function will return either a NULL pointer or an empty string. +** +** ^The string value returned by this routine is owned and managed by +** the database connection. ^The value will be valid until the database N +** is [DETACH]-ed or until the database connection closes. +** +** ^The filename returned by this function is the output of the +** xFullPathname method of the [VFS]. ^In other words, the filename +** will be an absolute pathname, even if the filename used +** to open the database originally was a URI or relative pathname. +** +** If the filename pointer returned by this routine is not NULL, then it +** can be used as the filename input parameter to these routines: +**
      +**
    • [sqlite3_uri_parameter()] +**
    • [sqlite3_uri_boolean()] +**
    • [sqlite3_uri_int64()] +**
    • [sqlite3_filename_database()] +**
    • [sqlite3_filename_journal()] +**
    • [sqlite3_filename_wal()] +**
    +*/ +SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName); + +/* +** CAPI3REF: Determine if a database is read-only +** METHOD: sqlite3 +** +** ^The sqlite3_db_readonly(D,N) interface returns 1 if the database N +** of connection D is read-only, 0 if it is read/write, or -1 if N is not +** the name of a database on connection D. +*/ +SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName); + +/* +** CAPI3REF: Determine the transaction state of a database +** METHOD: sqlite3 +** +** ^The sqlite3_txn_state(D,S) interface returns the current +** [transaction state] of schema S in database connection D. ^If S is NULL, +** then the highest transaction state of any schema on database connection D +** is returned. Transaction states are (in order of lowest to highest): +**
      +**
    1. SQLITE_TXN_NONE +**
    2. SQLITE_TXN_READ +**
    3. SQLITE_TXN_WRITE +**
    +** ^If the S argument to sqlite3_txn_state(D,S) is not the name of +** a valid schema, then -1 is returned. +*/ +SQLITE_API int sqlite3_txn_state(sqlite3*,const char *zSchema); + +/* +** CAPI3REF: Allowed return values from [sqlite3_txn_state()] +** KEYWORDS: {transaction state} +** +** These constants define the current transaction state of a database file. +** ^The [sqlite3_txn_state(D,S)] interface returns one of these +** constants in order to describe the transaction state of schema S +** in [database connection] D. +** +**
    +** [[SQLITE_TXN_NONE]]
    SQLITE_TXN_NONE
    +**
    The SQLITE_TXN_NONE state means that no transaction is currently +** pending.
    +** +** [[SQLITE_TXN_READ]]
    SQLITE_TXN_READ
    +**
    The SQLITE_TXN_READ state means that the database is currently +** in a read transaction. Content has been read from the database file +** but nothing in the database file has changed. The transaction state +** will advanced to SQLITE_TXN_WRITE if any changes occur and there are +** no other conflicting concurrent write transactions. The transaction +** state will revert to SQLITE_TXN_NONE following a [ROLLBACK] or +** [COMMIT].
    +** +** [[SQLITE_TXN_WRITE]]
    SQLITE_TXN_WRITE
    +**
    The SQLITE_TXN_WRITE state means that the database is currently +** in a write transaction. Content has been written to the database file +** but has not yet committed. The transaction state will change to +** to SQLITE_TXN_NONE at the next [ROLLBACK] or [COMMIT].
    +*/ +#define SQLITE_TXN_NONE 0 +#define SQLITE_TXN_READ 1 +#define SQLITE_TXN_WRITE 2 + +/* +** CAPI3REF: Find the next prepared statement +** METHOD: sqlite3 +** +** ^This interface returns a pointer to the next [prepared statement] after +** pStmt associated with the [database connection] pDb. ^If pStmt is NULL +** then this interface returns a pointer to the first prepared statement +** associated with the database connection pDb. ^If no prepared statement +** satisfies the conditions of this routine, it returns NULL. +** +** The [database connection] pointer D in a call to +** [sqlite3_next_stmt(D,S)] must refer to an open database +** connection and in particular must not be a NULL pointer. +*/ +SQLITE_API sqlite3_stmt *sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt); + +/* +** CAPI3REF: Commit And Rollback Notification Callbacks +** METHOD: sqlite3 +** +** ^The sqlite3_commit_hook() interface registers a callback +** function to be invoked whenever a transaction is [COMMIT | committed]. +** ^Any callback set by a previous call to sqlite3_commit_hook() +** for the same database connection is overridden. +** ^The sqlite3_rollback_hook() interface registers a callback +** function to be invoked whenever a transaction is [ROLLBACK | rolled back]. +** ^Any callback set by a previous call to sqlite3_rollback_hook() +** for the same database connection is overridden. +** ^The pArg argument is passed through to the callback. +** ^If the callback on a commit hook function returns non-zero, +** then the commit is converted into a rollback. +** +** ^The sqlite3_commit_hook(D,C,P) and sqlite3_rollback_hook(D,C,P) functions +** return the P argument from the previous call of the same function +** on the same [database connection] D, or NULL for +** the first call for each function on D. +** +** The commit and rollback hook callbacks are not reentrant. +** The callback implementation must not do anything that will modify +** the database connection that invoked the callback. Any actions +** to modify the database connection must be deferred until after the +** completion of the [sqlite3_step()] call that triggered the commit +** or rollback hook in the first place. +** Note that running any other SQL statements, including SELECT statements, +** or merely calling [sqlite3_prepare_v2()] and [sqlite3_step()] will modify +** the database connections for the meaning of "modify" in this paragraph. +** +** ^Registering a NULL function disables the callback. +** +** ^When the commit hook callback routine returns zero, the [COMMIT] +** operation is allowed to continue normally. ^If the commit hook +** returns non-zero, then the [COMMIT] is converted into a [ROLLBACK]. +** ^The rollback hook is invoked on a rollback that results from a commit +** hook returning non-zero, just as it would be with any other rollback. +** +** ^For the purposes of this API, a transaction is said to have been +** rolled back if an explicit "ROLLBACK" statement is executed, or +** an error or constraint causes an implicit rollback to occur. +** ^The rollback callback is not invoked if a transaction is +** automatically rolled back because the database connection is closed. +** +** See also the [sqlite3_update_hook()] interface. +*/ +SQLITE_API void *sqlite3_commit_hook(sqlite3*, int(*)(void*), void*); +SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*); + +/* +** CAPI3REF: Autovacuum Compaction Amount Callback +** METHOD: sqlite3 +** +** ^The sqlite3_autovacuum_pages(D,C,P,X) interface registers a callback +** function C that is invoked prior to each autovacuum of the database +** file. ^The callback is passed a copy of the generic data pointer (P), +** the schema-name of the attached database that is being autovacuumed, +** the the size of the database file in pages, the number of free pages, +** and the number of bytes per page, respectively. The callback should +** return the number of free pages that should be removed by the +** autovacuum. ^If the callback returns zero, then no autovacuum happens. +** ^If the value returned is greater than or equal to the number of +** free pages, then a complete autovacuum happens. +** +**

    ^If there are multiple ATTACH-ed database files that are being +** modified as part of a transaction commit, then the autovacuum pages +** callback is invoked separately for each file. +** +**

    The callback is not reentrant. The callback function should +** not attempt to invoke any other SQLite interface. If it does, bad +** things may happen, including segmentation faults and corrupt database +** files. The callback function should be a simple function that +** does some arithmetic on its input parameters and returns a result. +** +** ^The X parameter to sqlite3_autovacuum_pages(D,C,P,X) is an optional +** destructor for the P parameter. ^If X is not NULL, then X(P) is +** invoked whenever the database connection closes or when the callback +** is overwritten by another invocation of sqlite3_autovacuum_pages(). +** +**

    ^There is only one autovacuum pages callback per database connection. +** ^Each call to the sqlite3_autovacuum_pages() interface overrides all +** previous invocations for that database connection. ^If the callback +** argument (C) to sqlite3_autovacuum_pages(D,C,P,X) is a NULL pointer, +** then the autovacuum steps callback is cancelled. The return value +** from sqlite3_autovacuum_pages() is normally SQLITE_OK, but might +** be some other error code if something goes wrong. The current +** implementation will only return SQLITE_OK or SQLITE_MISUSE, but other +** return codes might be added in future releases. +** +**

    If no autovacuum pages callback is specified (the usual case) or +** a NULL pointer is provided for the callback, +** then the default behavior is to vacuum all free pages. So, in other +** words, the default behavior is the same as if the callback function +** were something like this: +** +**

    +**     unsigned int demonstration_autovac_pages_callback(
    +**       void *pClientData,
    +**       const char *zSchema,
    +**       unsigned int nDbPage,
    +**       unsigned int nFreePage,
    +**       unsigned int nBytePerPage
    +**     ){
    +**       return nFreePage;
    +**     }
    +** 
    +*/ +SQLITE_API int sqlite3_autovacuum_pages( + sqlite3 *db, + unsigned int(*)(void*,const char*,unsigned int,unsigned int,unsigned int), + void*, + void(*)(void*) +); + + +/* +** CAPI3REF: Data Change Notification Callbacks +** METHOD: sqlite3 +** +** ^The sqlite3_update_hook() interface registers a callback function +** with the [database connection] identified by the first argument +** to be invoked whenever a row is updated, inserted or deleted in +** a [rowid table]. +** ^Any callback set by a previous call to this function +** for the same database connection is overridden. +** +** ^The second argument is a pointer to the function to invoke when a +** row is updated, inserted or deleted in a rowid table. +** ^The first argument to the callback is a copy of the third argument +** to sqlite3_update_hook(). +** ^The second callback argument is one of [SQLITE_INSERT], [SQLITE_DELETE], +** or [SQLITE_UPDATE], depending on the operation that caused the callback +** to be invoked. +** ^The third and fourth arguments to the callback contain pointers to the +** database and table name containing the affected row. +** ^The final callback parameter is the [rowid] of the row. +** ^In the case of an update, this is the [rowid] after the update takes place. +** +** ^(The update hook is not invoked when internal system tables are +** modified (i.e. sqlite_sequence).)^ +** ^The update hook is not invoked when [WITHOUT ROWID] tables are modified. +** +** ^In the current implementation, the update hook +** is not invoked when conflicting rows are deleted because of an +** [ON CONFLICT | ON CONFLICT REPLACE] clause. ^Nor is the update hook +** invoked when rows are deleted using the [truncate optimization]. +** The exceptions defined in this paragraph might change in a future +** release of SQLite. +** +** The update hook implementation must not do anything that will modify +** the database connection that invoked the update hook. Any actions +** to modify the database connection must be deferred until after the +** completion of the [sqlite3_step()] call that triggered the update hook. +** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their +** database connections for the meaning of "modify" in this paragraph. +** +** ^The sqlite3_update_hook(D,C,P) function +** returns the P argument from the previous call +** on the same [database connection] D, or NULL for +** the first call on D. +** +** See also the [sqlite3_commit_hook()], [sqlite3_rollback_hook()], +** and [sqlite3_preupdate_hook()] interfaces. +*/ +SQLITE_API void *sqlite3_update_hook( + sqlite3*, + void(*)(void *,int ,char const *,char const *,sqlite3_int64), + void* +); + +/* +** CAPI3REF: Enable Or Disable Shared Pager Cache +** +** ^(This routine enables or disables the sharing of the database cache +** and schema data structures between [database connection | connections] +** to the same database. Sharing is enabled if the argument is true +** and disabled if the argument is false.)^ +** +** ^Cache sharing is enabled and disabled for an entire process. +** This is a change as of SQLite [version 3.5.0] ([dateof:3.5.0]). +** In prior versions of SQLite, +** sharing was enabled or disabled for each thread separately. +** +** ^(The cache sharing mode set by this interface effects all subsequent +** calls to [sqlite3_open()], [sqlite3_open_v2()], and [sqlite3_open16()]. +** Existing database connections continue to use the sharing mode +** that was in effect at the time they were opened.)^ +** +** ^(This routine returns [SQLITE_OK] if shared cache was enabled or disabled +** successfully. An [error code] is returned otherwise.)^ +** +** ^Shared cache is disabled by default. It is recommended that it stay +** that way. In other words, do not use this routine. This interface +** continues to be provided for historical compatibility, but its use is +** discouraged. Any use of shared cache is discouraged. If shared cache +** must be used, it is recommended that shared cache only be enabled for +** individual database connections using the [sqlite3_open_v2()] interface +** with the [SQLITE_OPEN_SHAREDCACHE] flag. +** +** Note: This method is disabled on MacOS X 10.7 and iOS version 5.0 +** and will always return SQLITE_MISUSE. On those systems, +** shared cache mode should be enabled per-database connection via +** [sqlite3_open_v2()] with [SQLITE_OPEN_SHAREDCACHE]. +** +** This interface is threadsafe on processors where writing a +** 32-bit integer is atomic. +** +** See Also: [SQLite Shared-Cache Mode] +*/ +SQLITE_API int sqlite3_enable_shared_cache(int); + +/* +** CAPI3REF: Attempt To Free Heap Memory +** +** ^The sqlite3_release_memory() interface attempts to free N bytes +** of heap memory by deallocating non-essential memory allocations +** held by the database library. Memory used to cache database +** pages to improve performance is an example of non-essential memory. +** ^sqlite3_release_memory() returns the number of bytes actually freed, +** which might be more or less than the amount requested. +** ^The sqlite3_release_memory() routine is a no-op returning zero +** if SQLite is not compiled with [SQLITE_ENABLE_MEMORY_MANAGEMENT]. +** +** See also: [sqlite3_db_release_memory()] +*/ +SQLITE_API int sqlite3_release_memory(int); + +/* +** CAPI3REF: Free Memory Used By A Database Connection +** METHOD: sqlite3 +** +** ^The sqlite3_db_release_memory(D) interface attempts to free as much heap +** memory as possible from database connection D. Unlike the +** [sqlite3_release_memory()] interface, this interface is in effect even +** when the [SQLITE_ENABLE_MEMORY_MANAGEMENT] compile-time option is +** omitted. +** +** See also: [sqlite3_release_memory()] +*/ +SQLITE_API int sqlite3_db_release_memory(sqlite3*); + +/* +** CAPI3REF: Impose A Limit On Heap Size +** +** These interfaces impose limits on the amount of heap memory that will be +** by all database connections within a single process. +** +** ^The sqlite3_soft_heap_limit64() interface sets and/or queries the +** soft limit on the amount of heap memory that may be allocated by SQLite. +** ^SQLite strives to keep heap memory utilization below the soft heap +** limit by reducing the number of pages held in the page cache +** as heap memory usages approaches the limit. +** ^The soft heap limit is "soft" because even though SQLite strives to stay +** below the limit, it will exceed the limit rather than generate +** an [SQLITE_NOMEM] error. In other words, the soft heap limit +** is advisory only. +** +** ^The sqlite3_hard_heap_limit64(N) interface sets a hard upper bound of +** N bytes on the amount of memory that will be allocated. ^The +** sqlite3_hard_heap_limit64(N) interface is similar to +** sqlite3_soft_heap_limit64(N) except that memory allocations will fail +** when the hard heap limit is reached. +** +** ^The return value from both sqlite3_soft_heap_limit64() and +** sqlite3_hard_heap_limit64() is the size of +** the heap limit prior to the call, or negative in the case of an +** error. ^If the argument N is negative +** then no change is made to the heap limit. Hence, the current +** size of heap limits can be determined by invoking +** sqlite3_soft_heap_limit64(-1) or sqlite3_hard_heap_limit(-1). +** +** ^Setting the heap limits to zero disables the heap limiter mechanism. +** +** ^The soft heap limit may not be greater than the hard heap limit. +** ^If the hard heap limit is enabled and if sqlite3_soft_heap_limit(N) +** is invoked with a value of N that is greater than the hard heap limit, +** the the soft heap limit is set to the value of the hard heap limit. +** ^The soft heap limit is automatically enabled whenever the hard heap +** limit is enabled. ^When sqlite3_hard_heap_limit64(N) is invoked and +** the soft heap limit is outside the range of 1..N, then the soft heap +** limit is set to N. ^Invoking sqlite3_soft_heap_limit64(0) when the +** hard heap limit is enabled makes the soft heap limit equal to the +** hard heap limit. +** +** The memory allocation limits can also be adjusted using +** [PRAGMA soft_heap_limit] and [PRAGMA hard_heap_limit]. +** +** ^(The heap limits are not enforced in the current implementation +** if one or more of following conditions are true: +** +**
      +**
    • The limit value is set to zero. +**
    • Memory accounting is disabled using a combination of the +** [sqlite3_config]([SQLITE_CONFIG_MEMSTATUS],...) start-time option and +** the [SQLITE_DEFAULT_MEMSTATUS] compile-time option. +**
    • An alternative page cache implementation is specified using +** [sqlite3_config]([SQLITE_CONFIG_PCACHE2],...). +**
    • The page cache allocates from its own memory pool supplied +** by [sqlite3_config]([SQLITE_CONFIG_PAGECACHE],...) rather than +** from the heap. +**
    )^ +** +** The circumstances under which SQLite will enforce the heap limits may +** changes in future releases of SQLite. +*/ +SQLITE_API sqlite3_int64 sqlite3_soft_heap_limit64(sqlite3_int64 N); +SQLITE_API sqlite3_int64 sqlite3_hard_heap_limit64(sqlite3_int64 N); + +/* +** CAPI3REF: Deprecated Soft Heap Limit Interface +** DEPRECATED +** +** This is a deprecated version of the [sqlite3_soft_heap_limit64()] +** interface. This routine is provided for historical compatibility +** only. All new applications should use the +** [sqlite3_soft_heap_limit64()] interface rather than this one. +*/ +SQLITE_API SQLITE_DEPRECATED void sqlite3_soft_heap_limit(int N); + + +/* +** CAPI3REF: Extract Metadata About A Column Of A Table +** METHOD: sqlite3 +** +** ^(The sqlite3_table_column_metadata(X,D,T,C,....) routine returns +** information about column C of table T in database D +** on [database connection] X.)^ ^The sqlite3_table_column_metadata() +** interface returns SQLITE_OK and fills in the non-NULL pointers in +** the final five arguments with appropriate values if the specified +** column exists. ^The sqlite3_table_column_metadata() interface returns +** SQLITE_ERROR if the specified column does not exist. +** ^If the column-name parameter to sqlite3_table_column_metadata() is a +** NULL pointer, then this routine simply checks for the existence of the +** table and returns SQLITE_OK if the table exists and SQLITE_ERROR if it +** does not. If the table name parameter T in a call to +** sqlite3_table_column_metadata(X,D,T,C,...) is NULL then the result is +** undefined behavior. +** +** ^The column is identified by the second, third and fourth parameters to +** this function. ^(The second parameter is either the name of the database +** (i.e. "main", "temp", or an attached database) containing the specified +** table or NULL.)^ ^If it is NULL, then all attached databases are searched +** for the table using the same algorithm used by the database engine to +** resolve unqualified table references. +** +** ^The third and fourth parameters to this function are the table and column +** name of the desired column, respectively. +** +** ^Metadata is returned by writing to the memory locations passed as the 5th +** and subsequent parameters to this function. ^Any of these arguments may be +** NULL, in which case the corresponding element of metadata is omitted. +** +** ^(
    +** +**
    Parameter Output
    Type
    Description +** +**
    5th const char* Data type +**
    6th const char* Name of default collation sequence +**
    7th int True if column has a NOT NULL constraint +**
    8th int True if column is part of the PRIMARY KEY +**
    9th int True if column is [AUTOINCREMENT] +**
    +**
    )^ +** +** ^The memory pointed to by the character pointers returned for the +** declaration type and collation sequence is valid until the next +** call to any SQLite API function. +** +** ^If the specified table is actually a view, an [error code] is returned. +** +** ^If the specified column is "rowid", "oid" or "_rowid_" and the table +** is not a [WITHOUT ROWID] table and an +** [INTEGER PRIMARY KEY] column has been explicitly declared, then the output +** parameters are set for the explicitly declared column. ^(If there is no +** [INTEGER PRIMARY KEY] column, then the outputs +** for the [rowid] are set as follows: +** +**
    +**     data type: "INTEGER"
    +**     collation sequence: "BINARY"
    +**     not null: 0
    +**     primary key: 1
    +**     auto increment: 0
    +** 
    )^ +** +** ^This function causes all database schemas to be read from disk and +** parsed, if that has not already been done, and returns an error if +** any errors are encountered while loading the schema. +*/ +SQLITE_API int sqlite3_table_column_metadata( + sqlite3 *db, /* Connection handle */ + const char *zDbName, /* Database name or NULL */ + const char *zTableName, /* Table name */ + const char *zColumnName, /* Column name */ + char const **pzDataType, /* OUTPUT: Declared data type */ + char const **pzCollSeq, /* OUTPUT: Collation sequence name */ + int *pNotNull, /* OUTPUT: True if NOT NULL constraint exists */ + int *pPrimaryKey, /* OUTPUT: True if column part of PK */ + int *pAutoinc /* OUTPUT: True if column is auto-increment */ +); + +/* +** CAPI3REF: Load An Extension +** METHOD: sqlite3 +** +** ^This interface loads an SQLite extension library from the named file. +** +** ^The sqlite3_load_extension() interface attempts to load an +** [SQLite extension] library contained in the file zFile. If +** the file cannot be loaded directly, attempts are made to load +** with various operating-system specific extensions added. +** So for example, if "samplelib" cannot be loaded, then names like +** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might +** be tried also. +** +** ^The entry point is zProc. +** ^(zProc may be 0, in which case SQLite will try to come up with an +** entry point name on its own. It first tries "sqlite3_extension_init". +** If that does not work, it constructs a name "sqlite3_X_init" where the +** X is consists of the lower-case equivalent of all ASCII alphabetic +** characters in the filename from the last "/" to the first following +** "." and omitting any initial "lib".)^ +** ^The sqlite3_load_extension() interface returns +** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong. +** ^If an error occurs and pzErrMsg is not 0, then the +** [sqlite3_load_extension()] interface shall attempt to +** fill *pzErrMsg with error message text stored in memory +** obtained from [sqlite3_malloc()]. The calling function +** should free this memory by calling [sqlite3_free()]. +** +** ^Extension loading must be enabled using +** [sqlite3_enable_load_extension()] or +** [sqlite3_db_config](db,[SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION],1,NULL) +** prior to calling this API, +** otherwise an error will be returned. +** +** Security warning: It is recommended that the +** [SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION] method be used to enable only this +** interface. The use of the [sqlite3_enable_load_extension()] interface +** should be avoided. This will keep the SQL function [load_extension()] +** disabled and prevent SQL injections from giving attackers +** access to extension loading capabilities. +** +** See also the [load_extension() SQL function]. +*/ +SQLITE_API int sqlite3_load_extension( + sqlite3 *db, /* Load the extension into this database connection */ + const char *zFile, /* Name of the shared library containing extension */ + const char *zProc, /* Entry point. Derived from zFile if 0 */ + char **pzErrMsg /* Put error message here if not 0 */ +); + +/* +** CAPI3REF: Enable Or Disable Extension Loading +** METHOD: sqlite3 +** +** ^So as not to open security holes in older applications that are +** unprepared to deal with [extension loading], and as a means of disabling +** [extension loading] while evaluating user-entered SQL, the following API +** is provided to turn the [sqlite3_load_extension()] mechanism on and off. +** +** ^Extension loading is off by default. +** ^Call the sqlite3_enable_load_extension() routine with onoff==1 +** to turn extension loading on and call it with onoff==0 to turn +** it back off again. +** +** ^This interface enables or disables both the C-API +** [sqlite3_load_extension()] and the SQL function [load_extension()]. +** ^(Use [sqlite3_db_config](db,[SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION],..) +** to enable or disable only the C-API.)^ +** +** Security warning: It is recommended that extension loading +** be enabled using the [SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION] method +** rather than this interface, so the [load_extension()] SQL function +** remains disabled. This will prevent SQL injections from giving attackers +** access to extension loading capabilities. +*/ +SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff); + +/* +** CAPI3REF: Automatically Load Statically Linked Extensions +** +** ^This interface causes the xEntryPoint() function to be invoked for +** each new [database connection] that is created. The idea here is that +** xEntryPoint() is the entry point for a statically linked [SQLite extension] +** that is to be automatically loaded into all new database connections. +** +** ^(Even though the function prototype shows that xEntryPoint() takes +** no arguments and returns void, SQLite invokes xEntryPoint() with three +** arguments and expects an integer result as if the signature of the +** entry point where as follows: +** +**
    +**    int xEntryPoint(
    +**      sqlite3 *db,
    +**      const char **pzErrMsg,
    +**      const struct sqlite3_api_routines *pThunk
    +**    );
    +** 
    )^ +** +** If the xEntryPoint routine encounters an error, it should make *pzErrMsg +** point to an appropriate error message (obtained from [sqlite3_mprintf()]) +** and return an appropriate [error code]. ^SQLite ensures that *pzErrMsg +** is NULL before calling the xEntryPoint(). ^SQLite will invoke +** [sqlite3_free()] on *pzErrMsg after xEntryPoint() returns. ^If any +** xEntryPoint() returns an error, the [sqlite3_open()], [sqlite3_open16()], +** or [sqlite3_open_v2()] call that provoked the xEntryPoint() will fail. +** +** ^Calling sqlite3_auto_extension(X) with an entry point X that is already +** on the list of automatic extensions is a harmless no-op. ^No entry point +** will be called more than once for each database connection that is opened. +** +** See also: [sqlite3_reset_auto_extension()] +** and [sqlite3_cancel_auto_extension()] +*/ +SQLITE_API int sqlite3_auto_extension(void(*xEntryPoint)(void)); + +/* +** CAPI3REF: Cancel Automatic Extension Loading +** +** ^The [sqlite3_cancel_auto_extension(X)] interface unregisters the +** initialization routine X that was registered using a prior call to +** [sqlite3_auto_extension(X)]. ^The [sqlite3_cancel_auto_extension(X)] +** routine returns 1 if initialization routine X was successfully +** unregistered and it returns 0 if X was not on the list of initialization +** routines. +*/ +SQLITE_API int sqlite3_cancel_auto_extension(void(*xEntryPoint)(void)); + +/* +** CAPI3REF: Reset Automatic Extension Loading +** +** ^This interface disables all automatic extensions previously +** registered using [sqlite3_auto_extension()]. +*/ +SQLITE_API void sqlite3_reset_auto_extension(void); + +/* +** The interface to the virtual-table mechanism is currently considered +** to be experimental. The interface might change in incompatible ways. +** If this is a problem for you, do not use the interface at this time. +** +** When the virtual-table mechanism stabilizes, we will declare the +** interface fixed, support it indefinitely, and remove this comment. +*/ + +/* +** Structures used by the virtual table interface +*/ +typedef struct sqlite3_vtab sqlite3_vtab; +typedef struct sqlite3_index_info sqlite3_index_info; +typedef struct sqlite3_vtab_cursor sqlite3_vtab_cursor; +typedef struct sqlite3_module sqlite3_module; + +/* +** CAPI3REF: Virtual Table Object +** KEYWORDS: sqlite3_module {virtual table module} +** +** This structure, sometimes called a "virtual table module", +** defines the implementation of a [virtual table]. +** This structure consists mostly of methods for the module. +** +** ^A virtual table module is created by filling in a persistent +** instance of this structure and passing a pointer to that instance +** to [sqlite3_create_module()] or [sqlite3_create_module_v2()]. +** ^The registration remains valid until it is replaced by a different +** module or until the [database connection] closes. The content +** of this structure must not change while it is registered with +** any database connection. +*/ +struct sqlite3_module { + int iVersion; + int (*xCreate)(sqlite3*, void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVTab, char**); + int (*xConnect)(sqlite3*, void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVTab, char**); + int (*xBestIndex)(sqlite3_vtab *pVTab, sqlite3_index_info*); + int (*xDisconnect)(sqlite3_vtab *pVTab); + int (*xDestroy)(sqlite3_vtab *pVTab); + int (*xOpen)(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor); + int (*xClose)(sqlite3_vtab_cursor*); + int (*xFilter)(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, + int argc, sqlite3_value **argv); + int (*xNext)(sqlite3_vtab_cursor*); + int (*xEof)(sqlite3_vtab_cursor*); + int (*xColumn)(sqlite3_vtab_cursor*, sqlite3_context*, int); + int (*xRowid)(sqlite3_vtab_cursor*, sqlite3_int64 *pRowid); + int (*xUpdate)(sqlite3_vtab *, int, sqlite3_value **, sqlite3_int64 *); + int (*xBegin)(sqlite3_vtab *pVTab); + int (*xSync)(sqlite3_vtab *pVTab); + int (*xCommit)(sqlite3_vtab *pVTab); + int (*xRollback)(sqlite3_vtab *pVTab); + int (*xFindFunction)(sqlite3_vtab *pVtab, int nArg, const char *zName, + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), + void **ppArg); + int (*xRename)(sqlite3_vtab *pVtab, const char *zNew); + /* The methods above are in version 1 of the sqlite_module object. Those + ** below are for version 2 and greater. */ + int (*xSavepoint)(sqlite3_vtab *pVTab, int); + int (*xRelease)(sqlite3_vtab *pVTab, int); + int (*xRollbackTo)(sqlite3_vtab *pVTab, int); + /* The methods above are in versions 1 and 2 of the sqlite_module object. + ** Those below are for version 3 and greater. */ + int (*xShadowName)(const char*); +}; + +/* +** CAPI3REF: Virtual Table Indexing Information +** KEYWORDS: sqlite3_index_info +** +** The sqlite3_index_info structure and its substructures is used as part +** of the [virtual table] interface to +** pass information into and receive the reply from the [xBestIndex] +** method of a [virtual table module]. The fields under **Inputs** are the +** inputs to xBestIndex and are read-only. xBestIndex inserts its +** results into the **Outputs** fields. +** +** ^(The aConstraint[] array records WHERE clause constraints of the form: +** +**
    column OP expr
    +** +** where OP is =, <, <=, >, or >=.)^ ^(The particular operator is +** stored in aConstraint[].op using one of the +** [SQLITE_INDEX_CONSTRAINT_EQ | SQLITE_INDEX_CONSTRAINT_ values].)^ +** ^(The index of the column is stored in +** aConstraint[].iColumn.)^ ^(aConstraint[].usable is TRUE if the +** expr on the right-hand side can be evaluated (and thus the constraint +** is usable) and false if it cannot.)^ +** +** ^The optimizer automatically inverts terms of the form "expr OP column" +** and makes other simplifications to the WHERE clause in an attempt to +** get as many WHERE clause terms into the form shown above as possible. +** ^The aConstraint[] array only reports WHERE clause terms that are +** relevant to the particular virtual table being queried. +** +** ^Information about the ORDER BY clause is stored in aOrderBy[]. +** ^Each term of aOrderBy records a column of the ORDER BY clause. +** +** The colUsed field indicates which columns of the virtual table may be +** required by the current scan. Virtual table columns are numbered from +** zero in the order in which they appear within the CREATE TABLE statement +** passed to sqlite3_declare_vtab(). For the first 63 columns (columns 0-62), +** the corresponding bit is set within the colUsed mask if the column may be +** required by SQLite. If the table has at least 64 columns and any column +** to the right of the first 63 is required, then bit 63 of colUsed is also +** set. In other words, column iCol may be required if the expression +** (colUsed & ((sqlite3_uint64)1 << (iCol>=63 ? 63 : iCol))) evaluates to +** non-zero. +** +** The [xBestIndex] method must fill aConstraintUsage[] with information +** about what parameters to pass to xFilter. ^If argvIndex>0 then +** the right-hand side of the corresponding aConstraint[] is evaluated +** and becomes the argvIndex-th entry in argv. ^(If aConstraintUsage[].omit +** is true, then the constraint is assumed to be fully handled by the +** virtual table and might not be checked again by the byte code.)^ ^(The +** aConstraintUsage[].omit flag is an optimization hint. When the omit flag +** is left in its default setting of false, the constraint will always be +** checked separately in byte code. If the omit flag is change to true, then +** the constraint may or may not be checked in byte code. In other words, +** when the omit flag is true there is no guarantee that the constraint will +** not be checked again using byte code.)^ +** +** ^The idxNum and idxPtr values are recorded and passed into the +** [xFilter] method. +** ^[sqlite3_free()] is used to free idxPtr if and only if +** needToFreeIdxPtr is true. +** +** ^The orderByConsumed means that output from [xFilter]/[xNext] will occur in +** the correct order to satisfy the ORDER BY clause so that no separate +** sorting step is required. +** +** ^The estimatedCost value is an estimate of the cost of a particular +** strategy. A cost of N indicates that the cost of the strategy is similar +** to a linear scan of an SQLite table with N rows. A cost of log(N) +** indicates that the expense of the operation is similar to that of a +** binary search on a unique indexed field of an SQLite table with N rows. +** +** ^The estimatedRows value is an estimate of the number of rows that +** will be returned by the strategy. +** +** The xBestIndex method may optionally populate the idxFlags field with a +** mask of SQLITE_INDEX_SCAN_* flags. Currently there is only one such flag - +** SQLITE_INDEX_SCAN_UNIQUE. If the xBestIndex method sets this flag, SQLite +** assumes that the strategy may visit at most one row. +** +** Additionally, if xBestIndex sets the SQLITE_INDEX_SCAN_UNIQUE flag, then +** SQLite also assumes that if a call to the xUpdate() method is made as +** part of the same statement to delete or update a virtual table row and the +** implementation returns SQLITE_CONSTRAINT, then there is no need to rollback +** any database changes. In other words, if the xUpdate() returns +** SQLITE_CONSTRAINT, the database contents must be exactly as they were +** before xUpdate was called. By contrast, if SQLITE_INDEX_SCAN_UNIQUE is not +** set and xUpdate returns SQLITE_CONSTRAINT, any database changes made by +** the xUpdate method are automatically rolled back by SQLite. +** +** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info +** structure for SQLite [version 3.8.2] ([dateof:3.8.2]). +** If a virtual table extension is +** used with an SQLite version earlier than 3.8.2, the results of attempting +** to read or write the estimatedRows field are undefined (but are likely +** to include crashing the application). The estimatedRows field should +** therefore only be used if [sqlite3_libversion_number()] returns a +** value greater than or equal to 3008002. Similarly, the idxFlags field +** was added for [version 3.9.0] ([dateof:3.9.0]). +** It may therefore only be used if +** sqlite3_libversion_number() returns a value greater than or equal to +** 3009000. +*/ +struct sqlite3_index_info { + /* Inputs */ + int nConstraint; /* Number of entries in aConstraint */ + struct sqlite3_index_constraint { + int iColumn; /* Column constrained. -1 for ROWID */ + unsigned char op; /* Constraint operator */ + unsigned char usable; /* True if this constraint is usable */ + int iTermOffset; /* Used internally - xBestIndex should ignore */ + } *aConstraint; /* Table of WHERE clause constraints */ + int nOrderBy; /* Number of terms in the ORDER BY clause */ + struct sqlite3_index_orderby { + int iColumn; /* Column number */ + unsigned char desc; /* True for DESC. False for ASC. */ + } *aOrderBy; /* The ORDER BY clause */ + /* Outputs */ + struct sqlite3_index_constraint_usage { + int argvIndex; /* if >0, constraint is part of argv to xFilter */ + unsigned char omit; /* Do not code a test for this constraint */ + } *aConstraintUsage; + int idxNum; /* Number used to identify the index */ + char *idxStr; /* String, possibly obtained from sqlite3_malloc */ + int needToFreeIdxStr; /* Free idxStr using sqlite3_free() if true */ + int orderByConsumed; /* True if output is already ordered */ + double estimatedCost; /* Estimated cost of using this index */ + /* Fields below are only available in SQLite 3.8.2 and later */ + sqlite3_int64 estimatedRows; /* Estimated number of rows returned */ + /* Fields below are only available in SQLite 3.9.0 and later */ + int idxFlags; /* Mask of SQLITE_INDEX_SCAN_* flags */ + /* Fields below are only available in SQLite 3.10.0 and later */ + sqlite3_uint64 colUsed; /* Input: Mask of columns used by statement */ +}; + +/* +** CAPI3REF: Virtual Table Scan Flags +** +** Virtual table implementations are allowed to set the +** [sqlite3_index_info].idxFlags field to some combination of +** these bits. +*/ +#define SQLITE_INDEX_SCAN_UNIQUE 1 /* Scan visits at most 1 row */ + +/* +** CAPI3REF: Virtual Table Constraint Operator Codes +** +** These macros define the allowed values for the +** [sqlite3_index_info].aConstraint[].op field. Each value represents +** an operator that is part of a constraint term in the WHERE clause of +** a query that uses a [virtual table]. +** +** ^The left-hand operand of the operator is given by the corresponding +** aConstraint[].iColumn field. ^An iColumn of -1 indicates the left-hand +** operand is the rowid. +** The SQLITE_INDEX_CONSTRAINT_LIMIT and SQLITE_INDEX_CONSTRAINT_OFFSET +** operators have no left-hand operand, and so for those operators the +** corresponding aConstraint[].iColumn is meaningless and should not be +** used. +** +** All operator values from SQLITE_INDEX_CONSTRAINT_FUNCTION through +** value 255 are reserved to represent functions that are overloaded +** by the [xFindFunction|xFindFunction method] of the virtual table +** implementation. +** +** The right-hand operands for each constraint might be accessible using +** the [sqlite3_vtab_rhs_value()] interface. Usually the right-hand +** operand is only available if it appears as a single constant literal +** in the input SQL. If the right-hand operand is another column or an +** expression (even a constant expression) or a parameter, then the +** sqlite3_vtab_rhs_value() probably will not be able to extract it. +** ^The SQLITE_INDEX_CONSTRAINT_ISNULL and +** SQLITE_INDEX_CONSTRAINT_ISNOTNULL operators have no right-hand operand +** and hence calls to sqlite3_vtab_rhs_value() for those operators will +** always return SQLITE_NOTFOUND. +** +** The collating sequence to be used for comparison can be found using +** the [sqlite3_vtab_collation()] interface. For most real-world virtual +** tables, the collating sequence of constraints does not matter (for example +** because the constraints are numeric) and so the sqlite3_vtab_collation() +** interface is no commonly needed. +*/ +#define SQLITE_INDEX_CONSTRAINT_EQ 2 +#define SQLITE_INDEX_CONSTRAINT_GT 4 +#define SQLITE_INDEX_CONSTRAINT_LE 8 +#define SQLITE_INDEX_CONSTRAINT_LT 16 +#define SQLITE_INDEX_CONSTRAINT_GE 32 +#define SQLITE_INDEX_CONSTRAINT_MATCH 64 +#define SQLITE_INDEX_CONSTRAINT_LIKE 65 +#define SQLITE_INDEX_CONSTRAINT_GLOB 66 +#define SQLITE_INDEX_CONSTRAINT_REGEXP 67 +#define SQLITE_INDEX_CONSTRAINT_NE 68 +#define SQLITE_INDEX_CONSTRAINT_ISNOT 69 +#define SQLITE_INDEX_CONSTRAINT_ISNOTNULL 70 +#define SQLITE_INDEX_CONSTRAINT_ISNULL 71 +#define SQLITE_INDEX_CONSTRAINT_IS 72 +#define SQLITE_INDEX_CONSTRAINT_LIMIT 73 +#define SQLITE_INDEX_CONSTRAINT_OFFSET 74 +#define SQLITE_INDEX_CONSTRAINT_FUNCTION 150 + +/* +** CAPI3REF: Register A Virtual Table Implementation +** METHOD: sqlite3 +** +** ^These routines are used to register a new [virtual table module] name. +** ^Module names must be registered before +** creating a new [virtual table] using the module and before using a +** preexisting [virtual table] for the module. +** +** ^The module name is registered on the [database connection] specified +** by the first parameter. ^The name of the module is given by the +** second parameter. ^The third parameter is a pointer to +** the implementation of the [virtual table module]. ^The fourth +** parameter is an arbitrary client data pointer that is passed through +** into the [xCreate] and [xConnect] methods of the virtual table module +** when a new virtual table is be being created or reinitialized. +** +** ^The sqlite3_create_module_v2() interface has a fifth parameter which +** is a pointer to a destructor for the pClientData. ^SQLite will +** invoke the destructor function (if it is not NULL) when SQLite +** no longer needs the pClientData pointer. ^The destructor will also +** be invoked if the call to sqlite3_create_module_v2() fails. +** ^The sqlite3_create_module() +** interface is equivalent to sqlite3_create_module_v2() with a NULL +** destructor. +** +** ^If the third parameter (the pointer to the sqlite3_module object) is +** NULL then no new module is created and any existing modules with the +** same name are dropped. +** +** See also: [sqlite3_drop_modules()] +*/ +SQLITE_API int sqlite3_create_module( + sqlite3 *db, /* SQLite connection to register module with */ + const char *zName, /* Name of the module */ + const sqlite3_module *p, /* Methods for the module */ + void *pClientData /* Client data for xCreate/xConnect */ +); +SQLITE_API int sqlite3_create_module_v2( + sqlite3 *db, /* SQLite connection to register module with */ + const char *zName, /* Name of the module */ + const sqlite3_module *p, /* Methods for the module */ + void *pClientData, /* Client data for xCreate/xConnect */ + void(*xDestroy)(void*) /* Module destructor function */ +); + +/* +** CAPI3REF: Remove Unnecessary Virtual Table Implementations +** METHOD: sqlite3 +** +** ^The sqlite3_drop_modules(D,L) interface removes all virtual +** table modules from database connection D except those named on list L. +** The L parameter must be either NULL or a pointer to an array of pointers +** to strings where the array is terminated by a single NULL pointer. +** ^If the L parameter is NULL, then all virtual table modules are removed. +** +** See also: [sqlite3_create_module()] +*/ +SQLITE_API int sqlite3_drop_modules( + sqlite3 *db, /* Remove modules from this connection */ + const char **azKeep /* Except, do not remove the ones named here */ +); + +/* +** CAPI3REF: Virtual Table Instance Object +** KEYWORDS: sqlite3_vtab +** +** Every [virtual table module] implementation uses a subclass +** of this object to describe a particular instance +** of the [virtual table]. Each subclass will +** be tailored to the specific needs of the module implementation. +** The purpose of this superclass is to define certain fields that are +** common to all module implementations. +** +** ^Virtual tables methods can set an error message by assigning a +** string obtained from [sqlite3_mprintf()] to zErrMsg. The method should +** take care that any prior string is freed by a call to [sqlite3_free()] +** prior to assigning a new string to zErrMsg. ^After the error message +** is delivered up to the client application, the string will be automatically +** freed by sqlite3_free() and the zErrMsg field will be zeroed. +*/ +struct sqlite3_vtab { + const sqlite3_module *pModule; /* The module for this virtual table */ + int nRef; /* Number of open cursors */ + char *zErrMsg; /* Error message from sqlite3_mprintf() */ + /* Virtual table implementations will typically add additional fields */ +}; + +/* +** CAPI3REF: Virtual Table Cursor Object +** KEYWORDS: sqlite3_vtab_cursor {virtual table cursor} +** +** Every [virtual table module] implementation uses a subclass of the +** following structure to describe cursors that point into the +** [virtual table] and are used +** to loop through the virtual table. Cursors are created using the +** [sqlite3_module.xOpen | xOpen] method of the module and are destroyed +** by the [sqlite3_module.xClose | xClose] method. Cursors are used +** by the [xFilter], [xNext], [xEof], [xColumn], and [xRowid] methods +** of the module. Each module implementation will define +** the content of a cursor structure to suit its own needs. +** +** This superclass exists in order to define fields of the cursor that +** are common to all implementations. +*/ +struct sqlite3_vtab_cursor { + sqlite3_vtab *pVtab; /* Virtual table of this cursor */ + /* Virtual table implementations will typically add additional fields */ +}; + +/* +** CAPI3REF: Declare The Schema Of A Virtual Table +** +** ^The [xCreate] and [xConnect] methods of a +** [virtual table module] call this interface +** to declare the format (the names and datatypes of the columns) of +** the virtual tables they implement. +*/ +SQLITE_API int sqlite3_declare_vtab(sqlite3*, const char *zSQL); + +/* +** CAPI3REF: Overload A Function For A Virtual Table +** METHOD: sqlite3 +** +** ^(Virtual tables can provide alternative implementations of functions +** using the [xFindFunction] method of the [virtual table module]. +** But global versions of those functions +** must exist in order to be overloaded.)^ +** +** ^(This API makes sure a global version of a function with a particular +** name and number of parameters exists. If no such function exists +** before this API is called, a new function is created.)^ ^The implementation +** of the new function always causes an exception to be thrown. So +** the new function is not good for anything by itself. Its only +** purpose is to be a placeholder function that can be overloaded +** by a [virtual table]. +*/ +SQLITE_API int sqlite3_overload_function(sqlite3*, const char *zFuncName, int nArg); + +/* +** The interface to the virtual-table mechanism defined above (back up +** to a comment remarkably similar to this one) is currently considered +** to be experimental. The interface might change in incompatible ways. +** If this is a problem for you, do not use the interface at this time. +** +** When the virtual-table mechanism stabilizes, we will declare the +** interface fixed, support it indefinitely, and remove this comment. +*/ + +/* +** CAPI3REF: A Handle To An Open BLOB +** KEYWORDS: {BLOB handle} {BLOB handles} +** +** An instance of this object represents an open BLOB on which +** [sqlite3_blob_open | incremental BLOB I/O] can be performed. +** ^Objects of this type are created by [sqlite3_blob_open()] +** and destroyed by [sqlite3_blob_close()]. +** ^The [sqlite3_blob_read()] and [sqlite3_blob_write()] interfaces +** can be used to read or write small subsections of the BLOB. +** ^The [sqlite3_blob_bytes()] interface returns the size of the BLOB in bytes. +*/ +typedef struct sqlite3_blob sqlite3_blob; + +/* +** CAPI3REF: Open A BLOB For Incremental I/O +** METHOD: sqlite3 +** CONSTRUCTOR: sqlite3_blob +** +** ^(This interfaces opens a [BLOB handle | handle] to the BLOB located +** in row iRow, column zColumn, table zTable in database zDb; +** in other words, the same BLOB that would be selected by: +** +**
    +**     SELECT zColumn FROM zDb.zTable WHERE [rowid] = iRow;
    +** 
    )^ +** +** ^(Parameter zDb is not the filename that contains the database, but +** rather the symbolic name of the database. For attached databases, this is +** the name that appears after the AS keyword in the [ATTACH] statement. +** For the main database file, the database name is "main". For TEMP +** tables, the database name is "temp".)^ +** +** ^If the flags parameter is non-zero, then the BLOB is opened for read +** and write access. ^If the flags parameter is zero, the BLOB is opened for +** read-only access. +** +** ^(On success, [SQLITE_OK] is returned and the new [BLOB handle] is stored +** in *ppBlob. Otherwise an [error code] is returned and, unless the error +** code is SQLITE_MISUSE, *ppBlob is set to NULL.)^ ^This means that, provided +** the API is not misused, it is always safe to call [sqlite3_blob_close()] +** on *ppBlob after this function it returns. +** +** This function fails with SQLITE_ERROR if any of the following are true: +**
      +**
    • ^(Database zDb does not exist)^, +**
    • ^(Table zTable does not exist within database zDb)^, +**
    • ^(Table zTable is a WITHOUT ROWID table)^, +**
    • ^(Column zColumn does not exist)^, +**
    • ^(Row iRow is not present in the table)^, +**
    • ^(The specified column of row iRow contains a value that is not +** a TEXT or BLOB value)^, +**
    • ^(Column zColumn is part of an index, PRIMARY KEY or UNIQUE +** constraint and the blob is being opened for read/write access)^, +**
    • ^([foreign key constraints | Foreign key constraints] are enabled, +** column zColumn is part of a [child key] definition and the blob is +** being opened for read/write access)^. +**
    +** +** ^Unless it returns SQLITE_MISUSE, this function sets the +** [database connection] error code and message accessible via +** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. +** +** A BLOB referenced by sqlite3_blob_open() may be read using the +** [sqlite3_blob_read()] interface and modified by using +** [sqlite3_blob_write()]. The [BLOB handle] can be moved to a +** different row of the same table using the [sqlite3_blob_reopen()] +** interface. However, the column, table, or database of a [BLOB handle] +** cannot be changed after the [BLOB handle] is opened. +** +** ^(If the row that a BLOB handle points to is modified by an +** [UPDATE], [DELETE], or by [ON CONFLICT] side-effects +** then the BLOB handle is marked as "expired". +** This is true if any column of the row is changed, even a column +** other than the one the BLOB handle is open on.)^ +** ^Calls to [sqlite3_blob_read()] and [sqlite3_blob_write()] for +** an expired BLOB handle fail with a return code of [SQLITE_ABORT]. +** ^(Changes written into a BLOB prior to the BLOB expiring are not +** rolled back by the expiration of the BLOB. Such changes will eventually +** commit if the transaction continues to completion.)^ +** +** ^Use the [sqlite3_blob_bytes()] interface to determine the size of +** the opened blob. ^The size of a blob may not be changed by this +** interface. Use the [UPDATE] SQL command to change the size of a +** blob. +** +** ^The [sqlite3_bind_zeroblob()] and [sqlite3_result_zeroblob()] interfaces +** and the built-in [zeroblob] SQL function may be used to create a +** zero-filled blob to read or write using the incremental-blob interface. +** +** To avoid a resource leak, every open [BLOB handle] should eventually +** be released by a call to [sqlite3_blob_close()]. +** +** See also: [sqlite3_blob_close()], +** [sqlite3_blob_reopen()], [sqlite3_blob_read()], +** [sqlite3_blob_bytes()], [sqlite3_blob_write()]. +*/ +SQLITE_API int sqlite3_blob_open( + sqlite3*, + const char *zDb, + const char *zTable, + const char *zColumn, + sqlite3_int64 iRow, + int flags, + sqlite3_blob **ppBlob +); + +/* +** CAPI3REF: Move a BLOB Handle to a New Row +** METHOD: sqlite3_blob +** +** ^This function is used to move an existing [BLOB handle] so that it points +** to a different row of the same database table. ^The new row is identified +** by the rowid value passed as the second argument. Only the row can be +** changed. ^The database, table and column on which the blob handle is open +** remain the same. Moving an existing [BLOB handle] to a new row is +** faster than closing the existing handle and opening a new one. +** +** ^(The new row must meet the same criteria as for [sqlite3_blob_open()] - +** it must exist and there must be either a blob or text value stored in +** the nominated column.)^ ^If the new row is not present in the table, or if +** it does not contain a blob or text value, or if another error occurs, an +** SQLite error code is returned and the blob handle is considered aborted. +** ^All subsequent calls to [sqlite3_blob_read()], [sqlite3_blob_write()] or +** [sqlite3_blob_reopen()] on an aborted blob handle immediately return +** SQLITE_ABORT. ^Calling [sqlite3_blob_bytes()] on an aborted blob handle +** always returns zero. +** +** ^This function sets the database handle error code and message. +*/ +SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); + +/* +** CAPI3REF: Close A BLOB Handle +** DESTRUCTOR: sqlite3_blob +** +** ^This function closes an open [BLOB handle]. ^(The BLOB handle is closed +** unconditionally. Even if this routine returns an error code, the +** handle is still closed.)^ +** +** ^If the blob handle being closed was opened for read-write access, and if +** the database is in auto-commit mode and there are no other open read-write +** blob handles or active write statements, the current transaction is +** committed. ^If an error occurs while committing the transaction, an error +** code is returned and the transaction rolled back. +** +** Calling this function with an argument that is not a NULL pointer or an +** open blob handle results in undefined behaviour. ^Calling this routine +** with a null pointer (such as would be returned by a failed call to +** [sqlite3_blob_open()]) is a harmless no-op. ^Otherwise, if this function +** is passed a valid open blob handle, the values returned by the +** sqlite3_errcode() and sqlite3_errmsg() functions are set before returning. +*/ +SQLITE_API int sqlite3_blob_close(sqlite3_blob *); + +/* +** CAPI3REF: Return The Size Of An Open BLOB +** METHOD: sqlite3_blob +** +** ^Returns the size in bytes of the BLOB accessible via the +** successfully opened [BLOB handle] in its only argument. ^The +** incremental blob I/O routines can only read or overwriting existing +** blob content; they cannot change the size of a blob. +** +** This routine only works on a [BLOB handle] which has been created +** by a prior successful call to [sqlite3_blob_open()] and which has not +** been closed by [sqlite3_blob_close()]. Passing any other pointer in +** to this routine results in undefined and probably undesirable behavior. +*/ +SQLITE_API int sqlite3_blob_bytes(sqlite3_blob *); + +/* +** CAPI3REF: Read Data From A BLOB Incrementally +** METHOD: sqlite3_blob +** +** ^(This function is used to read data from an open [BLOB handle] into a +** caller-supplied buffer. N bytes of data are copied into buffer Z +** from the open BLOB, starting at offset iOffset.)^ +** +** ^If offset iOffset is less than N bytes from the end of the BLOB, +** [SQLITE_ERROR] is returned and no data is read. ^If N or iOffset is +** less than zero, [SQLITE_ERROR] is returned and no data is read. +** ^The size of the blob (and hence the maximum value of N+iOffset) +** can be determined using the [sqlite3_blob_bytes()] interface. +** +** ^An attempt to read from an expired [BLOB handle] fails with an +** error code of [SQLITE_ABORT]. +** +** ^(On success, sqlite3_blob_read() returns SQLITE_OK. +** Otherwise, an [error code] or an [extended error code] is returned.)^ +** +** This routine only works on a [BLOB handle] which has been created +** by a prior successful call to [sqlite3_blob_open()] and which has not +** been closed by [sqlite3_blob_close()]. Passing any other pointer in +** to this routine results in undefined and probably undesirable behavior. +** +** See also: [sqlite3_blob_write()]. +*/ +SQLITE_API int sqlite3_blob_read(sqlite3_blob *, void *Z, int N, int iOffset); + +/* +** CAPI3REF: Write Data Into A BLOB Incrementally +** METHOD: sqlite3_blob +** +** ^(This function is used to write data into an open [BLOB handle] from a +** caller-supplied buffer. N bytes of data are copied from the buffer Z +** into the open BLOB, starting at offset iOffset.)^ +** +** ^(On success, sqlite3_blob_write() returns SQLITE_OK. +** Otherwise, an [error code] or an [extended error code] is returned.)^ +** ^Unless SQLITE_MISUSE is returned, this function sets the +** [database connection] error code and message accessible via +** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. +** +** ^If the [BLOB handle] passed as the first argument was not opened for +** writing (the flags parameter to [sqlite3_blob_open()] was zero), +** this function returns [SQLITE_READONLY]. +** +** This function may only modify the contents of the BLOB; it is +** not possible to increase the size of a BLOB using this API. +** ^If offset iOffset is less than N bytes from the end of the BLOB, +** [SQLITE_ERROR] is returned and no data is written. The size of the +** BLOB (and hence the maximum value of N+iOffset) can be determined +** using the [sqlite3_blob_bytes()] interface. ^If N or iOffset are less +** than zero [SQLITE_ERROR] is returned and no data is written. +** +** ^An attempt to write to an expired [BLOB handle] fails with an +** error code of [SQLITE_ABORT]. ^Writes to the BLOB that occurred +** before the [BLOB handle] expired are not rolled back by the +** expiration of the handle, though of course those changes might +** have been overwritten by the statement that expired the BLOB handle +** or by other independent statements. +** +** This routine only works on a [BLOB handle] which has been created +** by a prior successful call to [sqlite3_blob_open()] and which has not +** been closed by [sqlite3_blob_close()]. Passing any other pointer in +** to this routine results in undefined and probably undesirable behavior. +** +** See also: [sqlite3_blob_read()]. +*/ +SQLITE_API int sqlite3_blob_write(sqlite3_blob *, const void *z, int n, int iOffset); + +/* +** CAPI3REF: Virtual File System Objects +** +** A virtual filesystem (VFS) is an [sqlite3_vfs] object +** that SQLite uses to interact +** with the underlying operating system. Most SQLite builds come with a +** single default VFS that is appropriate for the host computer. +** New VFSes can be registered and existing VFSes can be unregistered. +** The following interfaces are provided. +** +** ^The sqlite3_vfs_find() interface returns a pointer to a VFS given its name. +** ^Names are case sensitive. +** ^Names are zero-terminated UTF-8 strings. +** ^If there is no match, a NULL pointer is returned. +** ^If zVfsName is NULL then the default VFS is returned. +** +** ^New VFSes are registered with sqlite3_vfs_register(). +** ^Each new VFS becomes the default VFS if the makeDflt flag is set. +** ^The same VFS can be registered multiple times without injury. +** ^To make an existing VFS into the default VFS, register it again +** with the makeDflt flag set. If two different VFSes with the +** same name are registered, the behavior is undefined. If a +** VFS is registered with a name that is NULL or an empty string, +** then the behavior is undefined. +** +** ^Unregister a VFS with the sqlite3_vfs_unregister() interface. +** ^(If the default VFS is unregistered, another VFS is chosen as +** the default. The choice for the new VFS is arbitrary.)^ +*/ +SQLITE_API sqlite3_vfs *sqlite3_vfs_find(const char *zVfsName); +SQLITE_API int sqlite3_vfs_register(sqlite3_vfs*, int makeDflt); +SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs*); + +/* +** CAPI3REF: Mutexes +** +** The SQLite core uses these routines for thread +** synchronization. Though they are intended for internal +** use by SQLite, code that links against SQLite is +** permitted to use any of these routines. +** +** The SQLite source code contains multiple implementations +** of these mutex routines. An appropriate implementation +** is selected automatically at compile-time. The following +** implementations are available in the SQLite core: +** +**
      +**
    • SQLITE_MUTEX_PTHREADS +**
    • SQLITE_MUTEX_W32 +**
    • SQLITE_MUTEX_NOOP +**
    +** +** The SQLITE_MUTEX_NOOP implementation is a set of routines +** that does no real locking and is appropriate for use in +** a single-threaded application. The SQLITE_MUTEX_PTHREADS and +** SQLITE_MUTEX_W32 implementations are appropriate for use on Unix +** and Windows. +** +** If SQLite is compiled with the SQLITE_MUTEX_APPDEF preprocessor +** macro defined (with "-DSQLITE_MUTEX_APPDEF=1"), then no mutex +** implementation is included with the library. In this case the +** application must supply a custom mutex implementation using the +** [SQLITE_CONFIG_MUTEX] option of the sqlite3_config() function +** before calling sqlite3_initialize() or any other public sqlite3_ +** function that calls sqlite3_initialize(). +** +** ^The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. ^The sqlite3_mutex_alloc() +** routine returns NULL if it is unable to allocate the requested +** mutex. The argument to sqlite3_mutex_alloc() must one of these +** integer constants: +** +**
      +**
    • SQLITE_MUTEX_FAST +**
    • SQLITE_MUTEX_RECURSIVE +**
    • SQLITE_MUTEX_STATIC_MAIN +**
    • SQLITE_MUTEX_STATIC_MEM +**
    • SQLITE_MUTEX_STATIC_OPEN +**
    • SQLITE_MUTEX_STATIC_PRNG +**
    • SQLITE_MUTEX_STATIC_LRU +**
    • SQLITE_MUTEX_STATIC_PMEM +**
    • SQLITE_MUTEX_STATIC_APP1 +**
    • SQLITE_MUTEX_STATIC_APP2 +**
    • SQLITE_MUTEX_STATIC_APP3 +**
    • SQLITE_MUTEX_STATIC_VFS1 +**
    • SQLITE_MUTEX_STATIC_VFS2 +**
    • SQLITE_MUTEX_STATIC_VFS3 +**
    +** +** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) +** cause sqlite3_mutex_alloc() to create +** a new mutex. ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE +** is used but not necessarily so when SQLITE_MUTEX_FAST is used. +** The mutex implementation does not need to make a distinction +** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does +** not want to. SQLite will only request a recursive mutex in +** cases where it really needs one. If a faster non-recursive mutex +** implementation is available on the host platform, the mutex subsystem +** might return such a mutex in response to SQLITE_MUTEX_FAST. +** +** ^The other allowed parameters to sqlite3_mutex_alloc() (anything other +** than SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) each return +** a pointer to a static preexisting mutex. ^Nine static mutexes are +** used by the current version of SQLite. Future versions of SQLite +** may add additional static mutexes. Static mutexes are for internal +** use by SQLite only. Applications that use SQLite mutexes should +** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or +** SQLITE_MUTEX_RECURSIVE. +** +** ^Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST +** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() +** returns a different mutex on every call. ^For the static +** mutex types, the same mutex is returned on every call that has +** the same type number. +** +** ^The sqlite3_mutex_free() routine deallocates a previously +** allocated dynamic mutex. Attempting to deallocate a static +** mutex results in undefined behavior. +** +** ^The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. ^If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. ^The sqlite3_mutex_try() interface returns [SQLITE_OK] +** upon successful entry. ^(Mutexes created using +** SQLITE_MUTEX_RECURSIVE can be entered multiple times by the same thread. +** In such cases, the +** mutex must be exited an equal number of times before another thread +** can enter.)^ If the same thread tries to enter any mutex other +** than an SQLITE_MUTEX_RECURSIVE more than once, the behavior is undefined. +** +** ^(Some systems (for example, Windows 95) do not support the operation +** implemented by sqlite3_mutex_try(). On those systems, sqlite3_mutex_try() +** will always return SQLITE_BUSY. The SQLite core only ever uses +** sqlite3_mutex_try() as an optimization so this is acceptable +** behavior.)^ +** +** ^The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered by the +** calling thread or is not currently allocated. +** +** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or +** sqlite3_mutex_leave() is a NULL pointer, then all three routines +** behave as no-ops. +** +** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()]. +*/ +SQLITE_API sqlite3_mutex *sqlite3_mutex_alloc(int); +SQLITE_API void sqlite3_mutex_free(sqlite3_mutex*); +SQLITE_API void sqlite3_mutex_enter(sqlite3_mutex*); +SQLITE_API int sqlite3_mutex_try(sqlite3_mutex*); +SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex*); + +/* +** CAPI3REF: Mutex Methods Object +** +** An instance of this structure defines the low-level routines +** used to allocate and use mutexes. +** +** Usually, the default mutex implementations provided by SQLite are +** sufficient, however the application has the option of substituting a custom +** implementation for specialized deployments or systems for which SQLite +** does not provide a suitable implementation. In this case, the application +** creates and populates an instance of this structure to pass +** to sqlite3_config() along with the [SQLITE_CONFIG_MUTEX] option. +** Additionally, an instance of this structure can be used as an +** output variable when querying the system for the current mutex +** implementation, using the [SQLITE_CONFIG_GETMUTEX] option. +** +** ^The xMutexInit method defined by this structure is invoked as +** part of system initialization by the sqlite3_initialize() function. +** ^The xMutexInit routine is called by SQLite exactly once for each +** effective call to [sqlite3_initialize()]. +** +** ^The xMutexEnd method defined by this structure is invoked as +** part of system shutdown by the sqlite3_shutdown() function. The +** implementation of this method is expected to release all outstanding +** resources obtained by the mutex methods implementation, especially +** those obtained by the xMutexInit method. ^The xMutexEnd() +** interface is invoked exactly once for each call to [sqlite3_shutdown()]. +** +** ^(The remaining seven methods defined by this structure (xMutexAlloc, +** xMutexFree, xMutexEnter, xMutexTry, xMutexLeave, xMutexHeld and +** xMutexNotheld) implement the following interfaces (respectively): +** +**
      +**
    • [sqlite3_mutex_alloc()]
    • +**
    • [sqlite3_mutex_free()]
    • +**
    • [sqlite3_mutex_enter()]
    • +**
    • [sqlite3_mutex_try()]
    • +**
    • [sqlite3_mutex_leave()]
    • +**
    • [sqlite3_mutex_held()]
    • +**
    • [sqlite3_mutex_notheld()]
    • +**
    )^ +** +** The only difference is that the public sqlite3_XXX functions enumerated +** above silently ignore any invocations that pass a NULL pointer instead +** of a valid mutex handle. The implementations of the methods defined +** by this structure are not required to handle this case. The results +** of passing a NULL pointer instead of a valid mutex handle are undefined +** (i.e. it is acceptable to provide an implementation that segfaults if +** it is passed a NULL pointer). +** +** The xMutexInit() method must be threadsafe. It must be harmless to +** invoke xMutexInit() multiple times within the same process and without +** intervening calls to xMutexEnd(). Second and subsequent calls to +** xMutexInit() must be no-ops. +** +** xMutexInit() must not use SQLite memory allocation ([sqlite3_malloc()] +** and its associates). Similarly, xMutexAlloc() must not use SQLite memory +** allocation for a static mutex. ^However xMutexAlloc() may use SQLite +** memory allocation for a fast or recursive mutex. +** +** ^SQLite will invoke the xMutexEnd() method when [sqlite3_shutdown()] is +** called, but only if the prior call to xMutexInit returned SQLITE_OK. +** If xMutexInit fails in any way, it is expected to clean up after itself +** prior to returning. +*/ +typedef struct sqlite3_mutex_methods sqlite3_mutex_methods; +struct sqlite3_mutex_methods { + int (*xMutexInit)(void); + int (*xMutexEnd)(void); + sqlite3_mutex *(*xMutexAlloc)(int); + void (*xMutexFree)(sqlite3_mutex *); + void (*xMutexEnter)(sqlite3_mutex *); + int (*xMutexTry)(sqlite3_mutex *); + void (*xMutexLeave)(sqlite3_mutex *); + int (*xMutexHeld)(sqlite3_mutex *); + int (*xMutexNotheld)(sqlite3_mutex *); +}; + +/* +** CAPI3REF: Mutex Verification Routines +** +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routines +** are intended for use inside assert() statements. The SQLite core +** never uses these routines except inside an assert() and applications +** are advised to follow the lead of the core. The SQLite core only +** provides implementations for these routines when it is compiled +** with the SQLITE_DEBUG flag. External mutex implementations +** are only required to provide these routines if SQLITE_DEBUG is +** defined and if NDEBUG is not defined. +** +** These routines should return true if the mutex in their argument +** is held or not held, respectively, by the calling thread. +** +** The implementation is not required to provide versions of these +** routines that actually work. If the implementation does not provide working +** versions of these routines, it should at least provide stubs that always +** return true so that one does not get spurious assertion failures. +** +** If the argument to sqlite3_mutex_held() is a NULL pointer then +** the routine should return 1. This seems counter-intuitive since +** clearly the mutex cannot be held if it does not exist. But +** the reason the mutex does not exist is because the build is not +** using mutexes. And we do not want the assert() containing the +** call to sqlite3_mutex_held() to fail, so a non-zero return is +** the appropriate thing to do. The sqlite3_mutex_notheld() +** interface should also return 1 when given a NULL pointer. +*/ +#ifndef NDEBUG +SQLITE_API int sqlite3_mutex_held(sqlite3_mutex*); +SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex*); +#endif + +/* +** CAPI3REF: Mutex Types +** +** The [sqlite3_mutex_alloc()] interface takes a single argument +** which is one of these integer constants. +** +** The set of static mutexes may change from one SQLite release to the +** next. Applications that override the built-in mutex logic must be +** prepared to accommodate additional static mutexes. +*/ +#define SQLITE_MUTEX_FAST 0 +#define SQLITE_MUTEX_RECURSIVE 1 +#define SQLITE_MUTEX_STATIC_MAIN 2 +#define SQLITE_MUTEX_STATIC_MEM 3 /* sqlite3_malloc() */ +#define SQLITE_MUTEX_STATIC_MEM2 4 /* NOT USED */ +#define SQLITE_MUTEX_STATIC_OPEN 4 /* sqlite3BtreeOpen() */ +#define SQLITE_MUTEX_STATIC_PRNG 5 /* sqlite3_randomness() */ +#define SQLITE_MUTEX_STATIC_LRU 6 /* lru page list */ +#define SQLITE_MUTEX_STATIC_LRU2 7 /* NOT USED */ +#define SQLITE_MUTEX_STATIC_PMEM 7 /* sqlite3PageMalloc() */ +#define SQLITE_MUTEX_STATIC_APP1 8 /* For use by application */ +#define SQLITE_MUTEX_STATIC_APP2 9 /* For use by application */ +#define SQLITE_MUTEX_STATIC_APP3 10 /* For use by application */ +#define SQLITE_MUTEX_STATIC_VFS1 11 /* For use by built-in VFS */ +#define SQLITE_MUTEX_STATIC_VFS2 12 /* For use by extension VFS */ +#define SQLITE_MUTEX_STATIC_VFS3 13 /* For use by application VFS */ + +/* Legacy compatibility: */ +#define SQLITE_MUTEX_STATIC_MASTER 2 + + +/* +** CAPI3REF: Retrieve the mutex for a database connection +** METHOD: sqlite3 +** +** ^This interface returns a pointer the [sqlite3_mutex] object that +** serializes access to the [database connection] given in the argument +** when the [threading mode] is Serialized. +** ^If the [threading mode] is Single-thread or Multi-thread then this +** routine returns a NULL pointer. +*/ +SQLITE_API sqlite3_mutex *sqlite3_db_mutex(sqlite3*); + +/* +** CAPI3REF: Low-Level Control Of Database Files +** METHOD: sqlite3 +** KEYWORDS: {file control} +** +** ^The [sqlite3_file_control()] interface makes a direct call to the +** xFileControl method for the [sqlite3_io_methods] object associated +** with a particular database identified by the second argument. ^The +** name of the database is "main" for the main database or "temp" for the +** TEMP database, or the name that appears after the AS keyword for +** databases that are added using the [ATTACH] SQL command. +** ^A NULL pointer can be used in place of "main" to refer to the +** main database file. +** ^The third and fourth parameters to this routine +** are passed directly through to the second and third parameters of +** the xFileControl method. ^The return value of the xFileControl +** method becomes the return value of this routine. +** +** A few opcodes for [sqlite3_file_control()] are handled directly +** by the SQLite core and never invoke the +** sqlite3_io_methods.xFileControl method. +** ^The [SQLITE_FCNTL_FILE_POINTER] value for the op parameter causes +** a pointer to the underlying [sqlite3_file] object to be written into +** the space pointed to by the 4th parameter. The +** [SQLITE_FCNTL_JOURNAL_POINTER] works similarly except that it returns +** the [sqlite3_file] object associated with the journal file instead of +** the main database. The [SQLITE_FCNTL_VFS_POINTER] opcode returns +** a pointer to the underlying [sqlite3_vfs] object for the file. +** The [SQLITE_FCNTL_DATA_VERSION] returns the data version counter +** from the pager. +** +** ^If the second parameter (zDbName) does not match the name of any +** open database file, then SQLITE_ERROR is returned. ^This error +** code is not remembered and will not be recalled by [sqlite3_errcode()] +** or [sqlite3_errmsg()]. The underlying xFileControl method might +** also return SQLITE_ERROR. There is no way to distinguish between +** an incorrect zDbName and an SQLITE_ERROR return from the underlying +** xFileControl method. +** +** See also: [file control opcodes] +*/ +SQLITE_API int sqlite3_file_control(sqlite3*, const char *zDbName, int op, void*); + +/* +** CAPI3REF: Testing Interface +** +** ^The sqlite3_test_control() interface is used to read out internal +** state of SQLite and to inject faults into SQLite for testing +** purposes. ^The first parameter is an operation code that determines +** the number, meaning, and operation of all subsequent parameters. +** +** This interface is not for use by applications. It exists solely +** for verifying the correct operation of the SQLite library. Depending +** on how the SQLite library is compiled, this interface might not exist. +** +** The details of the operation codes, their meanings, the parameters +** they take, and what they do are all subject to change without notice. +** Unlike most of the SQLite API, this function is not guaranteed to +** operate consistently from one release to the next. +*/ +SQLITE_API int sqlite3_test_control(int op, ...); + +/* +** CAPI3REF: Testing Interface Operation Codes +** +** These constants are the valid operation code parameters used +** as the first argument to [sqlite3_test_control()]. +** +** These parameters and their meanings are subject to change +** without notice. These values are for testing purposes only. +** Applications should not use any of these parameters or the +** [sqlite3_test_control()] interface. +*/ +#define SQLITE_TESTCTRL_FIRST 5 +#define SQLITE_TESTCTRL_PRNG_SAVE 5 +#define SQLITE_TESTCTRL_PRNG_RESTORE 6 +#define SQLITE_TESTCTRL_PRNG_RESET 7 /* NOT USED */ +#define SQLITE_TESTCTRL_BITVEC_TEST 8 +#define SQLITE_TESTCTRL_FAULT_INSTALL 9 +#define SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS 10 +#define SQLITE_TESTCTRL_PENDING_BYTE 11 +#define SQLITE_TESTCTRL_ASSERT 12 +#define SQLITE_TESTCTRL_ALWAYS 13 +#define SQLITE_TESTCTRL_RESERVE 14 /* NOT USED */ +#define SQLITE_TESTCTRL_OPTIMIZATIONS 15 +#define SQLITE_TESTCTRL_ISKEYWORD 16 /* NOT USED */ +#define SQLITE_TESTCTRL_SCRATCHMALLOC 17 /* NOT USED */ +#define SQLITE_TESTCTRL_INTERNAL_FUNCTIONS 17 +#define SQLITE_TESTCTRL_LOCALTIME_FAULT 18 +#define SQLITE_TESTCTRL_EXPLAIN_STMT 19 /* NOT USED */ +#define SQLITE_TESTCTRL_ONCE_RESET_THRESHOLD 19 +#define SQLITE_TESTCTRL_NEVER_CORRUPT 20 +#define SQLITE_TESTCTRL_VDBE_COVERAGE 21 +#define SQLITE_TESTCTRL_BYTEORDER 22 +#define SQLITE_TESTCTRL_ISINIT 23 +#define SQLITE_TESTCTRL_SORTER_MMAP 24 +#define SQLITE_TESTCTRL_IMPOSTER 25 +#define SQLITE_TESTCTRL_PARSER_COVERAGE 26 +#define SQLITE_TESTCTRL_RESULT_INTREAL 27 +#define SQLITE_TESTCTRL_PRNG_SEED 28 +#define SQLITE_TESTCTRL_EXTRA_SCHEMA_CHECKS 29 +#define SQLITE_TESTCTRL_SEEK_COUNT 30 +#define SQLITE_TESTCTRL_TRACEFLAGS 31 +#define SQLITE_TESTCTRL_TUNE 32 +#define SQLITE_TESTCTRL_LOGEST 33 +#define SQLITE_TESTCTRL_LAST 33 /* Largest TESTCTRL */ + +/* +** CAPI3REF: SQL Keyword Checking +** +** These routines provide access to the set of SQL language keywords +** recognized by SQLite. Applications can uses these routines to determine +** whether or not a specific identifier needs to be escaped (for example, +** by enclosing in double-quotes) so as not to confuse the parser. +** +** The sqlite3_keyword_count() interface returns the number of distinct +** keywords understood by SQLite. +** +** The sqlite3_keyword_name(N,Z,L) interface finds the N-th keyword and +** makes *Z point to that keyword expressed as UTF8 and writes the number +** of bytes in the keyword into *L. The string that *Z points to is not +** zero-terminated. The sqlite3_keyword_name(N,Z,L) routine returns +** SQLITE_OK if N is within bounds and SQLITE_ERROR if not. If either Z +** or L are NULL or invalid pointers then calls to +** sqlite3_keyword_name(N,Z,L) result in undefined behavior. +** +** The sqlite3_keyword_check(Z,L) interface checks to see whether or not +** the L-byte UTF8 identifier that Z points to is a keyword, returning non-zero +** if it is and zero if not. +** +** The parser used by SQLite is forgiving. It is often possible to use +** a keyword as an identifier as long as such use does not result in a +** parsing ambiguity. For example, the statement +** "CREATE TABLE BEGIN(REPLACE,PRAGMA,END);" is accepted by SQLite, and +** creates a new table named "BEGIN" with three columns named +** "REPLACE", "PRAGMA", and "END". Nevertheless, best practice is to avoid +** using keywords as identifiers. Common techniques used to avoid keyword +** name collisions include: +**
      +**
    • Put all identifier names inside double-quotes. This is the official +** SQL way to escape identifier names. +**
    • Put identifier names inside [...]. This is not standard SQL, +** but it is what SQL Server does and so lots of programmers use this +** technique. +**
    • Begin every identifier with the letter "Z" as no SQL keywords start +** with "Z". +**
    • Include a digit somewhere in every identifier name. +**
    +** +** Note that the number of keywords understood by SQLite can depend on +** compile-time options. For example, "VACUUM" is not a keyword if +** SQLite is compiled with the [-DSQLITE_OMIT_VACUUM] option. Also, +** new keywords may be added to future releases of SQLite. +*/ +SQLITE_API int sqlite3_keyword_count(void); +SQLITE_API int sqlite3_keyword_name(int,const char**,int*); +SQLITE_API int sqlite3_keyword_check(const char*,int); + +/* +** CAPI3REF: Dynamic String Object +** KEYWORDS: {dynamic string} +** +** An instance of the sqlite3_str object contains a dynamically-sized +** string under construction. +** +** The lifecycle of an sqlite3_str object is as follows: +**
      +**
    1. ^The sqlite3_str object is created using [sqlite3_str_new()]. +**
    2. ^Text is appended to the sqlite3_str object using various +** methods, such as [sqlite3_str_appendf()]. +**
    3. ^The sqlite3_str object is destroyed and the string it created +** is returned using the [sqlite3_str_finish()] interface. +**
    +*/ +typedef struct sqlite3_str sqlite3_str; + +/* +** CAPI3REF: Create A New Dynamic String Object +** CONSTRUCTOR: sqlite3_str +** +** ^The [sqlite3_str_new(D)] interface allocates and initializes +** a new [sqlite3_str] object. To avoid memory leaks, the object returned by +** [sqlite3_str_new()] must be freed by a subsequent call to +** [sqlite3_str_finish(X)]. +** +** ^The [sqlite3_str_new(D)] interface always returns a pointer to a +** valid [sqlite3_str] object, though in the event of an out-of-memory +** error the returned object might be a special singleton that will +** silently reject new text, always return SQLITE_NOMEM from +** [sqlite3_str_errcode()], always return 0 for +** [sqlite3_str_length()], and always return NULL from +** [sqlite3_str_finish(X)]. It is always safe to use the value +** returned by [sqlite3_str_new(D)] as the sqlite3_str parameter +** to any of the other [sqlite3_str] methods. +** +** The D parameter to [sqlite3_str_new(D)] may be NULL. If the +** D parameter in [sqlite3_str_new(D)] is not NULL, then the maximum +** length of the string contained in the [sqlite3_str] object will be +** the value set for [sqlite3_limit](D,[SQLITE_LIMIT_LENGTH]) instead +** of [SQLITE_MAX_LENGTH]. +*/ +SQLITE_API sqlite3_str *sqlite3_str_new(sqlite3*); + +/* +** CAPI3REF: Finalize A Dynamic String +** DESTRUCTOR: sqlite3_str +** +** ^The [sqlite3_str_finish(X)] interface destroys the sqlite3_str object X +** and returns a pointer to a memory buffer obtained from [sqlite3_malloc64()] +** that contains the constructed string. The calling application should +** pass the returned value to [sqlite3_free()] to avoid a memory leak. +** ^The [sqlite3_str_finish(X)] interface may return a NULL pointer if any +** errors were encountered during construction of the string. ^The +** [sqlite3_str_finish(X)] interface will also return a NULL pointer if the +** string in [sqlite3_str] object X is zero bytes long. +*/ +SQLITE_API char *sqlite3_str_finish(sqlite3_str*); + +/* +** CAPI3REF: Add Content To A Dynamic String +** METHOD: sqlite3_str +** +** These interfaces add content to an sqlite3_str object previously obtained +** from [sqlite3_str_new()]. +** +** ^The [sqlite3_str_appendf(X,F,...)] and +** [sqlite3_str_vappendf(X,F,V)] interfaces uses the [built-in printf] +** functionality of SQLite to append formatted text onto the end of +** [sqlite3_str] object X. +** +** ^The [sqlite3_str_append(X,S,N)] method appends exactly N bytes from string S +** onto the end of the [sqlite3_str] object X. N must be non-negative. +** S must contain at least N non-zero bytes of content. To append a +** zero-terminated string in its entirety, use the [sqlite3_str_appendall()] +** method instead. +** +** ^The [sqlite3_str_appendall(X,S)] method appends the complete content of +** zero-terminated string S onto the end of [sqlite3_str] object X. +** +** ^The [sqlite3_str_appendchar(X,N,C)] method appends N copies of the +** single-byte character C onto the end of [sqlite3_str] object X. +** ^This method can be used, for example, to add whitespace indentation. +** +** ^The [sqlite3_str_reset(X)] method resets the string under construction +** inside [sqlite3_str] object X back to zero bytes in length. +** +** These methods do not return a result code. ^If an error occurs, that fact +** is recorded in the [sqlite3_str] object and can be recovered by a +** subsequent call to [sqlite3_str_errcode(X)]. +*/ +SQLITE_API void sqlite3_str_appendf(sqlite3_str*, const char *zFormat, ...); +SQLITE_API void sqlite3_str_vappendf(sqlite3_str*, const char *zFormat, va_list); +SQLITE_API void sqlite3_str_append(sqlite3_str*, const char *zIn, int N); +SQLITE_API void sqlite3_str_appendall(sqlite3_str*, const char *zIn); +SQLITE_API void sqlite3_str_appendchar(sqlite3_str*, int N, char C); +SQLITE_API void sqlite3_str_reset(sqlite3_str*); + +/* +** CAPI3REF: Status Of A Dynamic String +** METHOD: sqlite3_str +** +** These interfaces return the current status of an [sqlite3_str] object. +** +** ^If any prior errors have occurred while constructing the dynamic string +** in sqlite3_str X, then the [sqlite3_str_errcode(X)] method will return +** an appropriate error code. ^The [sqlite3_str_errcode(X)] method returns +** [SQLITE_NOMEM] following any out-of-memory error, or +** [SQLITE_TOOBIG] if the size of the dynamic string exceeds +** [SQLITE_MAX_LENGTH], or [SQLITE_OK] if there have been no errors. +** +** ^The [sqlite3_str_length(X)] method returns the current length, in bytes, +** of the dynamic string under construction in [sqlite3_str] object X. +** ^The length returned by [sqlite3_str_length(X)] does not include the +** zero-termination byte. +** +** ^The [sqlite3_str_value(X)] method returns a pointer to the current +** content of the dynamic string under construction in X. The value +** returned by [sqlite3_str_value(X)] is managed by the sqlite3_str object X +** and might be freed or altered by any subsequent method on the same +** [sqlite3_str] object. Applications must not used the pointer returned +** [sqlite3_str_value(X)] after any subsequent method call on the same +** object. ^Applications may change the content of the string returned +** by [sqlite3_str_value(X)] as long as they do not write into any bytes +** outside the range of 0 to [sqlite3_str_length(X)] and do not read or +** write any byte after any subsequent sqlite3_str method call. +*/ +SQLITE_API int sqlite3_str_errcode(sqlite3_str*); +SQLITE_API int sqlite3_str_length(sqlite3_str*); +SQLITE_API char *sqlite3_str_value(sqlite3_str*); + +/* +** CAPI3REF: SQLite Runtime Status +** +** ^These interfaces are used to retrieve runtime status information +** about the performance of SQLite, and optionally to reset various +** highwater marks. ^The first argument is an integer code for +** the specific parameter to measure. ^(Recognized integer codes +** are of the form [status parameters | SQLITE_STATUS_...].)^ +** ^The current value of the parameter is returned into *pCurrent. +** ^The highest recorded value is returned in *pHighwater. ^If the +** resetFlag is true, then the highest record value is reset after +** *pHighwater is written. ^(Some parameters do not record the highest +** value. For those parameters +** nothing is written into *pHighwater and the resetFlag is ignored.)^ +** ^(Other parameters record only the highwater mark and not the current +** value. For these latter parameters nothing is written into *pCurrent.)^ +** +** ^The sqlite3_status() and sqlite3_status64() routines return +** SQLITE_OK on success and a non-zero [error code] on failure. +** +** If either the current value or the highwater mark is too large to +** be represented by a 32-bit integer, then the values returned by +** sqlite3_status() are undefined. +** +** See also: [sqlite3_db_status()] +*/ +SQLITE_API int sqlite3_status(int op, int *pCurrent, int *pHighwater, int resetFlag); +SQLITE_API int sqlite3_status64( + int op, + sqlite3_int64 *pCurrent, + sqlite3_int64 *pHighwater, + int resetFlag +); + + +/* +** CAPI3REF: Status Parameters +** KEYWORDS: {status parameters} +** +** These integer constants designate various run-time status parameters +** that can be returned by [sqlite3_status()]. +** +**
    +** [[SQLITE_STATUS_MEMORY_USED]] ^(
    SQLITE_STATUS_MEMORY_USED
    +**
    This parameter is the current amount of memory checked out +** using [sqlite3_malloc()], either directly or indirectly. The +** figure includes calls made to [sqlite3_malloc()] by the application +** and internal memory usage by the SQLite library. Auxiliary page-cache +** memory controlled by [SQLITE_CONFIG_PAGECACHE] is not included in +** this parameter. The amount returned is the sum of the allocation +** sizes as reported by the xSize method in [sqlite3_mem_methods].
    )^ +** +** [[SQLITE_STATUS_MALLOC_SIZE]] ^(
    SQLITE_STATUS_MALLOC_SIZE
    +**
    This parameter records the largest memory allocation request +** handed to [sqlite3_malloc()] or [sqlite3_realloc()] (or their +** internal equivalents). Only the value returned in the +** *pHighwater parameter to [sqlite3_status()] is of interest. +** The value written into the *pCurrent parameter is undefined.
    )^ +** +** [[SQLITE_STATUS_MALLOC_COUNT]] ^(
    SQLITE_STATUS_MALLOC_COUNT
    +**
    This parameter records the number of separate memory allocations +** currently checked out.
    )^ +** +** [[SQLITE_STATUS_PAGECACHE_USED]] ^(
    SQLITE_STATUS_PAGECACHE_USED
    +**
    This parameter returns the number of pages used out of the +** [pagecache memory allocator] that was configured using +** [SQLITE_CONFIG_PAGECACHE]. The +** value returned is in pages, not in bytes.
    )^ +** +** [[SQLITE_STATUS_PAGECACHE_OVERFLOW]] +** ^(
    SQLITE_STATUS_PAGECACHE_OVERFLOW
    +**
    This parameter returns the number of bytes of page cache +** allocation which could not be satisfied by the [SQLITE_CONFIG_PAGECACHE] +** buffer and where forced to overflow to [sqlite3_malloc()]. The +** returned value includes allocations that overflowed because they +** where too large (they were larger than the "sz" parameter to +** [SQLITE_CONFIG_PAGECACHE]) and allocations that overflowed because +** no space was left in the page cache.
    )^ +** +** [[SQLITE_STATUS_PAGECACHE_SIZE]] ^(
    SQLITE_STATUS_PAGECACHE_SIZE
    +**
    This parameter records the largest memory allocation request +** handed to the [pagecache memory allocator]. Only the value returned in the +** *pHighwater parameter to [sqlite3_status()] is of interest. +** The value written into the *pCurrent parameter is undefined.
    )^ +** +** [[SQLITE_STATUS_SCRATCH_USED]]
    SQLITE_STATUS_SCRATCH_USED
    +**
    No longer used.
    +** +** [[SQLITE_STATUS_SCRATCH_OVERFLOW]] ^(
    SQLITE_STATUS_SCRATCH_OVERFLOW
    +**
    No longer used.
    +** +** [[SQLITE_STATUS_SCRATCH_SIZE]]
    SQLITE_STATUS_SCRATCH_SIZE
    +**
    No longer used.
    +** +** [[SQLITE_STATUS_PARSER_STACK]] ^(
    SQLITE_STATUS_PARSER_STACK
    +**
    The *pHighwater parameter records the deepest parser stack. +** The *pCurrent value is undefined. The *pHighwater value is only +** meaningful if SQLite is compiled with [YYTRACKMAXSTACKDEPTH].
    )^ +**
    +** +** New status parameters may be added from time to time. +*/ +#define SQLITE_STATUS_MEMORY_USED 0 +#define SQLITE_STATUS_PAGECACHE_USED 1 +#define SQLITE_STATUS_PAGECACHE_OVERFLOW 2 +#define SQLITE_STATUS_SCRATCH_USED 3 /* NOT USED */ +#define SQLITE_STATUS_SCRATCH_OVERFLOW 4 /* NOT USED */ +#define SQLITE_STATUS_MALLOC_SIZE 5 +#define SQLITE_STATUS_PARSER_STACK 6 +#define SQLITE_STATUS_PAGECACHE_SIZE 7 +#define SQLITE_STATUS_SCRATCH_SIZE 8 /* NOT USED */ +#define SQLITE_STATUS_MALLOC_COUNT 9 + +/* +** CAPI3REF: Database Connection Status +** METHOD: sqlite3 +** +** ^This interface is used to retrieve runtime status information +** about a single [database connection]. ^The first argument is the +** database connection object to be interrogated. ^The second argument +** is an integer constant, taken from the set of +** [SQLITE_DBSTATUS options], that +** determines the parameter to interrogate. The set of +** [SQLITE_DBSTATUS options] is likely +** to grow in future releases of SQLite. +** +** ^The current value of the requested parameter is written into *pCur +** and the highest instantaneous value is written into *pHiwtr. ^If +** the resetFlg is true, then the highest instantaneous value is +** reset back down to the current value. +** +** ^The sqlite3_db_status() routine returns SQLITE_OK on success and a +** non-zero [error code] on failure. +** +** See also: [sqlite3_status()] and [sqlite3_stmt_status()]. +*/ +SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int resetFlg); + +/* +** CAPI3REF: Status Parameters for database connections +** KEYWORDS: {SQLITE_DBSTATUS options} +** +** These constants are the available integer "verbs" that can be passed as +** the second argument to the [sqlite3_db_status()] interface. +** +** New verbs may be added in future releases of SQLite. Existing verbs +** might be discontinued. Applications should check the return code from +** [sqlite3_db_status()] to make sure that the call worked. +** The [sqlite3_db_status()] interface will return a non-zero error code +** if a discontinued or unsupported verb is invoked. +** +**
    +** [[SQLITE_DBSTATUS_LOOKASIDE_USED]] ^(
    SQLITE_DBSTATUS_LOOKASIDE_USED
    +**
    This parameter returns the number of lookaside memory slots currently +** checked out.
    )^ +** +** [[SQLITE_DBSTATUS_LOOKASIDE_HIT]] ^(
    SQLITE_DBSTATUS_LOOKASIDE_HIT
    +**
    This parameter returns the number of malloc attempts that were +** satisfied using lookaside memory. Only the high-water value is meaningful; +** the current value is always zero.)^ +** +** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE]] +** ^(
    SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE
    +**
    This parameter returns the number malloc attempts that might have +** been satisfied using lookaside memory but failed due to the amount of +** memory requested being larger than the lookaside slot size. +** Only the high-water value is meaningful; +** the current value is always zero.)^ +** +** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL]] +** ^(
    SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL
    +**
    This parameter returns the number malloc attempts that might have +** been satisfied using lookaside memory but failed due to all lookaside +** memory already being in use. +** Only the high-water value is meaningful; +** the current value is always zero.)^ +** +** [[SQLITE_DBSTATUS_CACHE_USED]] ^(
    SQLITE_DBSTATUS_CACHE_USED
    +**
    This parameter returns the approximate number of bytes of heap +** memory used by all pager caches associated with the database connection.)^ +** ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_USED is always 0. +** +** [[SQLITE_DBSTATUS_CACHE_USED_SHARED]] +** ^(
    SQLITE_DBSTATUS_CACHE_USED_SHARED
    +**
    This parameter is similar to DBSTATUS_CACHE_USED, except that if a +** pager cache is shared between two or more connections the bytes of heap +** memory used by that pager cache is divided evenly between the attached +** connections.)^ In other words, if none of the pager caches associated +** with the database connection are shared, this request returns the same +** value as DBSTATUS_CACHE_USED. Or, if one or more or the pager caches are +** shared, the value returned by this call will be smaller than that returned +** by DBSTATUS_CACHE_USED. ^The highwater mark associated with +** SQLITE_DBSTATUS_CACHE_USED_SHARED is always 0. +** +** [[SQLITE_DBSTATUS_SCHEMA_USED]] ^(
    SQLITE_DBSTATUS_SCHEMA_USED
    +**
    This parameter returns the approximate number of bytes of heap +** memory used to store the schema for all databases associated +** with the connection - main, temp, and any [ATTACH]-ed databases.)^ +** ^The full amount of memory used by the schemas is reported, even if the +** schema memory is shared with other database connections due to +** [shared cache mode] being enabled. +** ^The highwater mark associated with SQLITE_DBSTATUS_SCHEMA_USED is always 0. +** +** [[SQLITE_DBSTATUS_STMT_USED]] ^(
    SQLITE_DBSTATUS_STMT_USED
    +**
    This parameter returns the approximate number of bytes of heap +** and lookaside memory used by all prepared statements associated with +** the database connection.)^ +** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0. +**
    +** +** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(
    SQLITE_DBSTATUS_CACHE_HIT
    +**
    This parameter returns the number of pager cache hits that have +** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT +** is always 0. +**
    +** +** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(
    SQLITE_DBSTATUS_CACHE_MISS
    +**
    This parameter returns the number of pager cache misses that have +** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS +** is always 0. +**
    +** +** [[SQLITE_DBSTATUS_CACHE_WRITE]] ^(
    SQLITE_DBSTATUS_CACHE_WRITE
    +**
    This parameter returns the number of dirty cache entries that have +** been written to disk. Specifically, the number of pages written to the +** wal file in wal mode databases, or the number of pages written to the +** database file in rollback mode databases. Any pages written as part of +** transaction rollback or database recovery operations are not included. +** If an IO or other error occurs while writing a page to disk, the effect +** on subsequent SQLITE_DBSTATUS_CACHE_WRITE requests is undefined.)^ ^The +** highwater mark associated with SQLITE_DBSTATUS_CACHE_WRITE is always 0. +**
    +** +** [[SQLITE_DBSTATUS_CACHE_SPILL]] ^(
    SQLITE_DBSTATUS_CACHE_SPILL
    +**
    This parameter returns the number of dirty cache entries that have +** been written to disk in the middle of a transaction due to the page +** cache overflowing. Transactions are more efficient if they are written +** to disk all at once. When pages spill mid-transaction, that introduces +** additional overhead. This parameter can be used help identify +** inefficiencies that can be resolved by increasing the cache size. +**
    +** +** [[SQLITE_DBSTATUS_DEFERRED_FKS]] ^(
    SQLITE_DBSTATUS_DEFERRED_FKS
    +**
    This parameter returns zero for the current value if and only if +** all foreign key constraints (deferred or immediate) have been +** resolved.)^ ^The highwater mark is always 0. +**
    +**
    +*/ +#define SQLITE_DBSTATUS_LOOKASIDE_USED 0 +#define SQLITE_DBSTATUS_CACHE_USED 1 +#define SQLITE_DBSTATUS_SCHEMA_USED 2 +#define SQLITE_DBSTATUS_STMT_USED 3 +#define SQLITE_DBSTATUS_LOOKASIDE_HIT 4 +#define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE 5 +#define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL 6 +#define SQLITE_DBSTATUS_CACHE_HIT 7 +#define SQLITE_DBSTATUS_CACHE_MISS 8 +#define SQLITE_DBSTATUS_CACHE_WRITE 9 +#define SQLITE_DBSTATUS_DEFERRED_FKS 10 +#define SQLITE_DBSTATUS_CACHE_USED_SHARED 11 +#define SQLITE_DBSTATUS_CACHE_SPILL 12 +#define SQLITE_DBSTATUS_MAX 12 /* Largest defined DBSTATUS */ + + +/* +** CAPI3REF: Prepared Statement Status +** METHOD: sqlite3_stmt +** +** ^(Each prepared statement maintains various +** [SQLITE_STMTSTATUS counters] that measure the number +** of times it has performed specific operations.)^ These counters can +** be used to monitor the performance characteristics of the prepared +** statements. For example, if the number of table steps greatly exceeds +** the number of table searches or result rows, that would tend to indicate +** that the prepared statement is using a full table scan rather than +** an index. +** +** ^(This interface is used to retrieve and reset counter values from +** a [prepared statement]. The first argument is the prepared statement +** object to be interrogated. The second argument +** is an integer code for a specific [SQLITE_STMTSTATUS counter] +** to be interrogated.)^ +** ^The current value of the requested counter is returned. +** ^If the resetFlg is true, then the counter is reset to zero after this +** interface call returns. +** +** See also: [sqlite3_status()] and [sqlite3_db_status()]. +*/ +SQLITE_API int sqlite3_stmt_status(sqlite3_stmt*, int op,int resetFlg); + +/* +** CAPI3REF: Status Parameters for prepared statements +** KEYWORDS: {SQLITE_STMTSTATUS counter} {SQLITE_STMTSTATUS counters} +** +** These preprocessor macros define integer codes that name counter +** values associated with the [sqlite3_stmt_status()] interface. +** The meanings of the various counters are as follows: +** +**
    +** [[SQLITE_STMTSTATUS_FULLSCAN_STEP]]
    SQLITE_STMTSTATUS_FULLSCAN_STEP
    +**
    ^This is the number of times that SQLite has stepped forward in +** a table as part of a full table scan. Large numbers for this counter +** may indicate opportunities for performance improvement through +** careful use of indices.
    +** +** [[SQLITE_STMTSTATUS_SORT]]
    SQLITE_STMTSTATUS_SORT
    +**
    ^This is the number of sort operations that have occurred. +** A non-zero value in this counter may indicate an opportunity to +** improvement performance through careful use of indices.
    +** +** [[SQLITE_STMTSTATUS_AUTOINDEX]]
    SQLITE_STMTSTATUS_AUTOINDEX
    +**
    ^This is the number of rows inserted into transient indices that +** were created automatically in order to help joins run faster. +** A non-zero value in this counter may indicate an opportunity to +** improvement performance by adding permanent indices that do not +** need to be reinitialized each time the statement is run.
    +** +** [[SQLITE_STMTSTATUS_VM_STEP]]
    SQLITE_STMTSTATUS_VM_STEP
    +**
    ^This is the number of virtual machine operations executed +** by the prepared statement if that number is less than or equal +** to 2147483647. The number of virtual machine operations can be +** used as a proxy for the total work done by the prepared statement. +** If the number of virtual machine operations exceeds 2147483647 +** then the value returned by this statement status code is undefined. +** +** [[SQLITE_STMTSTATUS_REPREPARE]]
    SQLITE_STMTSTATUS_REPREPARE
    +**
    ^This is the number of times that the prepare statement has been +** automatically regenerated due to schema changes or changes to +** [bound parameters] that might affect the query plan. +** +** [[SQLITE_STMTSTATUS_RUN]]
    SQLITE_STMTSTATUS_RUN
    +**
    ^This is the number of times that the prepared statement has +** been run. A single "run" for the purposes of this counter is one +** or more calls to [sqlite3_step()] followed by a call to [sqlite3_reset()]. +** The counter is incremented on the first [sqlite3_step()] call of each +** cycle. +** +** [[SQLITE_STMTSTATUS_FILTER_MISS]] +** [[SQLITE_STMTSTATUS_FILTER HIT]] +**
    SQLITE_STMTSTATUS_FILTER_HIT
    +** SQLITE_STMTSTATUS_FILTER_MISS
    +**
    ^SQLITE_STMTSTATUS_FILTER_HIT is the number of times that a join +** step was bypassed because a Bloom filter returned not-found. The +** corresponding SQLITE_STMTSTATUS_FILTER_MISS value is the number of +** times that the Bloom filter returned a find, and thus the join step +** had to be processed as normal. +** +** [[SQLITE_STMTSTATUS_MEMUSED]]
    SQLITE_STMTSTATUS_MEMUSED
    +**
    ^This is the approximate number of bytes of heap memory +** used to store the prepared statement. ^This value is not actually +** a counter, and so the resetFlg parameter to sqlite3_stmt_status() +** is ignored when the opcode is SQLITE_STMTSTATUS_MEMUSED. +**
    +**
    +*/ +#define SQLITE_STMTSTATUS_FULLSCAN_STEP 1 +#define SQLITE_STMTSTATUS_SORT 2 +#define SQLITE_STMTSTATUS_AUTOINDEX 3 +#define SQLITE_STMTSTATUS_VM_STEP 4 +#define SQLITE_STMTSTATUS_REPREPARE 5 +#define SQLITE_STMTSTATUS_RUN 6 +#define SQLITE_STMTSTATUS_FILTER_MISS 7 +#define SQLITE_STMTSTATUS_FILTER_HIT 8 +#define SQLITE_STMTSTATUS_MEMUSED 99 + +/* +** CAPI3REF: Custom Page Cache Object +** +** The sqlite3_pcache type is opaque. It is implemented by +** the pluggable module. The SQLite core has no knowledge of +** its size or internal structure and never deals with the +** sqlite3_pcache object except by holding and passing pointers +** to the object. +** +** See [sqlite3_pcache_methods2] for additional information. +*/ +typedef struct sqlite3_pcache sqlite3_pcache; + +/* +** CAPI3REF: Custom Page Cache Object +** +** The sqlite3_pcache_page object represents a single page in the +** page cache. The page cache will allocate instances of this +** object. Various methods of the page cache use pointers to instances +** of this object as parameters or as their return value. +** +** See [sqlite3_pcache_methods2] for additional information. +*/ +typedef struct sqlite3_pcache_page sqlite3_pcache_page; +struct sqlite3_pcache_page { + void *pBuf; /* The content of the page */ + void *pExtra; /* Extra information associated with the page */ +}; + +/* +** CAPI3REF: Application Defined Page Cache. +** KEYWORDS: {page cache} +** +** ^(The [sqlite3_config]([SQLITE_CONFIG_PCACHE2], ...) interface can +** register an alternative page cache implementation by passing in an +** instance of the sqlite3_pcache_methods2 structure.)^ +** In many applications, most of the heap memory allocated by +** SQLite is used for the page cache. +** By implementing a +** custom page cache using this API, an application can better control +** the amount of memory consumed by SQLite, the way in which +** that memory is allocated and released, and the policies used to +** determine exactly which parts of a database file are cached and for +** how long. +** +** The alternative page cache mechanism is an +** extreme measure that is only needed by the most demanding applications. +** The built-in page cache is recommended for most uses. +** +** ^(The contents of the sqlite3_pcache_methods2 structure are copied to an +** internal buffer by SQLite within the call to [sqlite3_config]. Hence +** the application may discard the parameter after the call to +** [sqlite3_config()] returns.)^ +** +** [[the xInit() page cache method]] +** ^(The xInit() method is called once for each effective +** call to [sqlite3_initialize()])^ +** (usually only once during the lifetime of the process). ^(The xInit() +** method is passed a copy of the sqlite3_pcache_methods2.pArg value.)^ +** The intent of the xInit() method is to set up global data structures +** required by the custom page cache implementation. +** ^(If the xInit() method is NULL, then the +** built-in default page cache is used instead of the application defined +** page cache.)^ +** +** [[the xShutdown() page cache method]] +** ^The xShutdown() method is called by [sqlite3_shutdown()]. +** It can be used to clean up +** any outstanding resources before process shutdown, if required. +** ^The xShutdown() method may be NULL. +** +** ^SQLite automatically serializes calls to the xInit method, +** so the xInit method need not be threadsafe. ^The +** xShutdown method is only called from [sqlite3_shutdown()] so it does +** not need to be threadsafe either. All other methods must be threadsafe +** in multithreaded applications. +** +** ^SQLite will never invoke xInit() more than once without an intervening +** call to xShutdown(). +** +** [[the xCreate() page cache methods]] +** ^SQLite invokes the xCreate() method to construct a new cache instance. +** SQLite will typically create one cache instance for each open database file, +** though this is not guaranteed. ^The +** first parameter, szPage, is the size in bytes of the pages that must +** be allocated by the cache. ^szPage will always a power of two. ^The +** second parameter szExtra is a number of bytes of extra storage +** associated with each page cache entry. ^The szExtra parameter will +** a number less than 250. SQLite will use the +** extra szExtra bytes on each page to store metadata about the underlying +** database page on disk. The value passed into szExtra depends +** on the SQLite version, the target platform, and how SQLite was compiled. +** ^The third argument to xCreate(), bPurgeable, is true if the cache being +** created will be used to cache database pages of a file stored on disk, or +** false if it is used for an in-memory database. The cache implementation +** does not have to do anything special based with the value of bPurgeable; +** it is purely advisory. ^On a cache where bPurgeable is false, SQLite will +** never invoke xUnpin() except to deliberately delete a page. +** ^In other words, calls to xUnpin() on a cache with bPurgeable set to +** false will always have the "discard" flag set to true. +** ^Hence, a cache created with bPurgeable false will +** never contain any unpinned pages. +** +** [[the xCachesize() page cache method]] +** ^(The xCachesize() method may be called at any time by SQLite to set the +** suggested maximum cache-size (number of pages stored by) the cache +** instance passed as the first argument. This is the value configured using +** the SQLite "[PRAGMA cache_size]" command.)^ As with the bPurgeable +** parameter, the implementation is not required to do anything with this +** value; it is advisory only. +** +** [[the xPagecount() page cache methods]] +** The xPagecount() method must return the number of pages currently +** stored in the cache, both pinned and unpinned. +** +** [[the xFetch() page cache methods]] +** The xFetch() method locates a page in the cache and returns a pointer to +** an sqlite3_pcache_page object associated with that page, or a NULL pointer. +** The pBuf element of the returned sqlite3_pcache_page object will be a +** pointer to a buffer of szPage bytes used to store the content of a +** single database page. The pExtra element of sqlite3_pcache_page will be +** a pointer to the szExtra bytes of extra storage that SQLite has requested +** for each entry in the page cache. +** +** The page to be fetched is determined by the key. ^The minimum key value +** is 1. After it has been retrieved using xFetch, the page is considered +** to be "pinned". +** +** If the requested page is already in the page cache, then the page cache +** implementation must return a pointer to the page buffer with its content +** intact. If the requested page is not already in the cache, then the +** cache implementation should use the value of the createFlag +** parameter to help it determined what action to take: +** +** +**
    createFlag Behavior when page is not already in cache +**
    0 Do not allocate a new page. Return NULL. +**
    1 Allocate a new page if it easy and convenient to do so. +** Otherwise return NULL. +**
    2 Make every effort to allocate a new page. Only return +** NULL if allocating a new page is effectively impossible. +**
    +** +** ^(SQLite will normally invoke xFetch() with a createFlag of 0 or 1. SQLite +** will only use a createFlag of 2 after a prior call with a createFlag of 1 +** failed.)^ In between the xFetch() calls, SQLite may +** attempt to unpin one or more cache pages by spilling the content of +** pinned pages to disk and synching the operating system disk cache. +** +** [[the xUnpin() page cache method]] +** ^xUnpin() is called by SQLite with a pointer to a currently pinned page +** as its second argument. If the third parameter, discard, is non-zero, +** then the page must be evicted from the cache. +** ^If the discard parameter is +** zero, then the page may be discarded or retained at the discretion of +** page cache implementation. ^The page cache implementation +** may choose to evict unpinned pages at any time. +** +** The cache must not perform any reference counting. A single +** call to xUnpin() unpins the page regardless of the number of prior calls +** to xFetch(). +** +** [[the xRekey() page cache methods]] +** The xRekey() method is used to change the key value associated with the +** page passed as the second argument. If the cache +** previously contains an entry associated with newKey, it must be +** discarded. ^Any prior cache entry associated with newKey is guaranteed not +** to be pinned. +** +** When SQLite calls the xTruncate() method, the cache must discard all +** existing cache entries with page numbers (keys) greater than or equal +** to the value of the iLimit parameter passed to xTruncate(). If any +** of these pages are pinned, they are implicitly unpinned, meaning that +** they can be safely discarded. +** +** [[the xDestroy() page cache method]] +** ^The xDestroy() method is used to delete a cache allocated by xCreate(). +** All resources associated with the specified cache should be freed. ^After +** calling the xDestroy() method, SQLite considers the [sqlite3_pcache*] +** handle invalid, and will not use it with any other sqlite3_pcache_methods2 +** functions. +** +** [[the xShrink() page cache method]] +** ^SQLite invokes the xShrink() method when it wants the page cache to +** free up as much of heap memory as possible. The page cache implementation +** is not obligated to free any memory, but well-behaved implementations should +** do their best. +*/ +typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2; +struct sqlite3_pcache_methods2 { + int iVersion; + void *pArg; + int (*xInit)(void*); + void (*xShutdown)(void*); + sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable); + void (*xCachesize)(sqlite3_pcache*, int nCachesize); + int (*xPagecount)(sqlite3_pcache*); + sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag); + void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard); + void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, + unsigned oldKey, unsigned newKey); + void (*xTruncate)(sqlite3_pcache*, unsigned iLimit); + void (*xDestroy)(sqlite3_pcache*); + void (*xShrink)(sqlite3_pcache*); +}; + +/* +** This is the obsolete pcache_methods object that has now been replaced +** by sqlite3_pcache_methods2. This object is not used by SQLite. It is +** retained in the header file for backwards compatibility only. +*/ +typedef struct sqlite3_pcache_methods sqlite3_pcache_methods; +struct sqlite3_pcache_methods { + void *pArg; + int (*xInit)(void*); + void (*xShutdown)(void*); + sqlite3_pcache *(*xCreate)(int szPage, int bPurgeable); + void (*xCachesize)(sqlite3_pcache*, int nCachesize); + int (*xPagecount)(sqlite3_pcache*); + void *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag); + void (*xUnpin)(sqlite3_pcache*, void*, int discard); + void (*xRekey)(sqlite3_pcache*, void*, unsigned oldKey, unsigned newKey); + void (*xTruncate)(sqlite3_pcache*, unsigned iLimit); + void (*xDestroy)(sqlite3_pcache*); +}; + + +/* +** CAPI3REF: Online Backup Object +** +** The sqlite3_backup object records state information about an ongoing +** online backup operation. ^The sqlite3_backup object is created by +** a call to [sqlite3_backup_init()] and is destroyed by a call to +** [sqlite3_backup_finish()]. +** +** See Also: [Using the SQLite Online Backup API] +*/ +typedef struct sqlite3_backup sqlite3_backup; + +/* +** CAPI3REF: Online Backup API. +** +** The backup API copies the content of one database into another. +** It is useful either for creating backups of databases or +** for copying in-memory databases to or from persistent files. +** +** See Also: [Using the SQLite Online Backup API] +** +** ^SQLite holds a write transaction open on the destination database file +** for the duration of the backup operation. +** ^The source database is read-locked only while it is being read; +** it is not locked continuously for the entire backup operation. +** ^Thus, the backup may be performed on a live source database without +** preventing other database connections from +** reading or writing to the source database while the backup is underway. +** +** ^(To perform a backup operation: +**
      +**
    1. sqlite3_backup_init() is called once to initialize the +** backup, +**
    2. sqlite3_backup_step() is called one or more times to transfer +** the data between the two databases, and finally +**
    3. sqlite3_backup_finish() is called to release all resources +** associated with the backup operation. +**
    )^ +** There should be exactly one call to sqlite3_backup_finish() for each +** successful call to sqlite3_backup_init(). +** +** [[sqlite3_backup_init()]] sqlite3_backup_init() +** +** ^The D and N arguments to sqlite3_backup_init(D,N,S,M) are the +** [database connection] associated with the destination database +** and the database name, respectively. +** ^The database name is "main" for the main database, "temp" for the +** temporary database, or the name specified after the AS keyword in +** an [ATTACH] statement for an attached database. +** ^The S and M arguments passed to +** sqlite3_backup_init(D,N,S,M) identify the [database connection] +** and database name of the source database, respectively. +** ^The source and destination [database connections] (parameters S and D) +** must be different or else sqlite3_backup_init(D,N,S,M) will fail with +** an error. +** +** ^A call to sqlite3_backup_init() will fail, returning NULL, if +** there is already a read or read-write transaction open on the +** destination database. +** +** ^If an error occurs within sqlite3_backup_init(D,N,S,M), then NULL is +** returned and an error code and error message are stored in the +** destination [database connection] D. +** ^The error code and message for the failed call to sqlite3_backup_init() +** can be retrieved using the [sqlite3_errcode()], [sqlite3_errmsg()], and/or +** [sqlite3_errmsg16()] functions. +** ^A successful call to sqlite3_backup_init() returns a pointer to an +** [sqlite3_backup] object. +** ^The [sqlite3_backup] object may be used with the sqlite3_backup_step() and +** sqlite3_backup_finish() functions to perform the specified backup +** operation. +** +** [[sqlite3_backup_step()]] sqlite3_backup_step() +** +** ^Function sqlite3_backup_step(B,N) will copy up to N pages between +** the source and destination databases specified by [sqlite3_backup] object B. +** ^If N is negative, all remaining source pages are copied. +** ^If sqlite3_backup_step(B,N) successfully copies N pages and there +** are still more pages to be copied, then the function returns [SQLITE_OK]. +** ^If sqlite3_backup_step(B,N) successfully finishes copying all pages +** from source to destination, then it returns [SQLITE_DONE]. +** ^If an error occurs while running sqlite3_backup_step(B,N), +** then an [error code] is returned. ^As well as [SQLITE_OK] and +** [SQLITE_DONE], a call to sqlite3_backup_step() may return [SQLITE_READONLY], +** [SQLITE_NOMEM], [SQLITE_BUSY], [SQLITE_LOCKED], or an +** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX] extended error code. +** +** ^(The sqlite3_backup_step() might return [SQLITE_READONLY] if +**
      +**
    1. the destination database was opened read-only, or +**
    2. the destination database is using write-ahead-log journaling +** and the destination and source page sizes differ, or +**
    3. the destination database is an in-memory database and the +** destination and source page sizes differ. +**
    )^ +** +** ^If sqlite3_backup_step() cannot obtain a required file-system lock, then +** the [sqlite3_busy_handler | busy-handler function] +** is invoked (if one is specified). ^If the +** busy-handler returns non-zero before the lock is available, then +** [SQLITE_BUSY] is returned to the caller. ^In this case the call to +** sqlite3_backup_step() can be retried later. ^If the source +** [database connection] +** is being used to write to the source database when sqlite3_backup_step() +** is called, then [SQLITE_LOCKED] is returned immediately. ^Again, in this +** case the call to sqlite3_backup_step() can be retried later on. ^(If +** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX], [SQLITE_NOMEM], or +** [SQLITE_READONLY] is returned, then +** there is no point in retrying the call to sqlite3_backup_step(). These +** errors are considered fatal.)^ The application must accept +** that the backup operation has failed and pass the backup operation handle +** to the sqlite3_backup_finish() to release associated resources. +** +** ^The first call to sqlite3_backup_step() obtains an exclusive lock +** on the destination file. ^The exclusive lock is not released until either +** sqlite3_backup_finish() is called or the backup operation is complete +** and sqlite3_backup_step() returns [SQLITE_DONE]. ^Every call to +** sqlite3_backup_step() obtains a [shared lock] on the source database that +** lasts for the duration of the sqlite3_backup_step() call. +** ^Because the source database is not locked between calls to +** sqlite3_backup_step(), the source database may be modified mid-way +** through the backup process. ^If the source database is modified by an +** external process or via a database connection other than the one being +** used by the backup operation, then the backup will be automatically +** restarted by the next call to sqlite3_backup_step(). ^If the source +** database is modified by the using the same database connection as is used +** by the backup operation, then the backup database is automatically +** updated at the same time. +** +** [[sqlite3_backup_finish()]] sqlite3_backup_finish() +** +** When sqlite3_backup_step() has returned [SQLITE_DONE], or when the +** application wishes to abandon the backup operation, the application +** should destroy the [sqlite3_backup] by passing it to sqlite3_backup_finish(). +** ^The sqlite3_backup_finish() interfaces releases all +** resources associated with the [sqlite3_backup] object. +** ^If sqlite3_backup_step() has not yet returned [SQLITE_DONE], then any +** active write-transaction on the destination database is rolled back. +** The [sqlite3_backup] object is invalid +** and may not be used following a call to sqlite3_backup_finish(). +** +** ^The value returned by sqlite3_backup_finish is [SQLITE_OK] if no +** sqlite3_backup_step() errors occurred, regardless or whether or not +** sqlite3_backup_step() completed. +** ^If an out-of-memory condition or IO error occurred during any prior +** sqlite3_backup_step() call on the same [sqlite3_backup] object, then +** sqlite3_backup_finish() returns the corresponding [error code]. +** +** ^A return of [SQLITE_BUSY] or [SQLITE_LOCKED] from sqlite3_backup_step() +** is not a permanent error and does not affect the return value of +** sqlite3_backup_finish(). +** +** [[sqlite3_backup_remaining()]] [[sqlite3_backup_pagecount()]] +** sqlite3_backup_remaining() and sqlite3_backup_pagecount() +** +** ^The sqlite3_backup_remaining() routine returns the number of pages still +** to be backed up at the conclusion of the most recent sqlite3_backup_step(). +** ^The sqlite3_backup_pagecount() routine returns the total number of pages +** in the source database at the conclusion of the most recent +** sqlite3_backup_step(). +** ^(The values returned by these functions are only updated by +** sqlite3_backup_step(). If the source database is modified in a way that +** changes the size of the source database or the number of pages remaining, +** those changes are not reflected in the output of sqlite3_backup_pagecount() +** and sqlite3_backup_remaining() until after the next +** sqlite3_backup_step().)^ +** +** Concurrent Usage of Database Handles +** +** ^The source [database connection] may be used by the application for other +** purposes while a backup operation is underway or being initialized. +** ^If SQLite is compiled and configured to support threadsafe database +** connections, then the source database connection may be used concurrently +** from within other threads. +** +** However, the application must guarantee that the destination +** [database connection] is not passed to any other API (by any thread) after +** sqlite3_backup_init() is called and before the corresponding call to +** sqlite3_backup_finish(). SQLite does not currently check to see +** if the application incorrectly accesses the destination [database connection] +** and so no error code is reported, but the operations may malfunction +** nevertheless. Use of the destination database connection while a +** backup is in progress might also also cause a mutex deadlock. +** +** If running in [shared cache mode], the application must +** guarantee that the shared cache used by the destination database +** is not accessed while the backup is running. In practice this means +** that the application must guarantee that the disk file being +** backed up to is not accessed by any connection within the process, +** not just the specific connection that was passed to sqlite3_backup_init(). +** +** The [sqlite3_backup] object itself is partially threadsafe. Multiple +** threads may safely make multiple concurrent calls to sqlite3_backup_step(). +** However, the sqlite3_backup_remaining() and sqlite3_backup_pagecount() +** APIs are not strictly speaking threadsafe. If they are invoked at the +** same time as another thread is invoking sqlite3_backup_step() it is +** possible that they return invalid values. +*/ +SQLITE_API sqlite3_backup *sqlite3_backup_init( + sqlite3 *pDest, /* Destination database handle */ + const char *zDestName, /* Destination database name */ + sqlite3 *pSource, /* Source database handle */ + const char *zSourceName /* Source database name */ +); +SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage); +SQLITE_API int sqlite3_backup_finish(sqlite3_backup *p); +SQLITE_API int sqlite3_backup_remaining(sqlite3_backup *p); +SQLITE_API int sqlite3_backup_pagecount(sqlite3_backup *p); + +/* +** CAPI3REF: Unlock Notification +** METHOD: sqlite3 +** +** ^When running in shared-cache mode, a database operation may fail with +** an [SQLITE_LOCKED] error if the required locks on the shared-cache or +** individual tables within the shared-cache cannot be obtained. See +** [SQLite Shared-Cache Mode] for a description of shared-cache locking. +** ^This API may be used to register a callback that SQLite will invoke +** when the connection currently holding the required lock relinquishes it. +** ^This API is only available if the library was compiled with the +** [SQLITE_ENABLE_UNLOCK_NOTIFY] C-preprocessor symbol defined. +** +** See Also: [Using the SQLite Unlock Notification Feature]. +** +** ^Shared-cache locks are released when a database connection concludes +** its current transaction, either by committing it or rolling it back. +** +** ^When a connection (known as the blocked connection) fails to obtain a +** shared-cache lock and SQLITE_LOCKED is returned to the caller, the +** identity of the database connection (the blocking connection) that +** has locked the required resource is stored internally. ^After an +** application receives an SQLITE_LOCKED error, it may call the +** sqlite3_unlock_notify() method with the blocked connection handle as +** the first argument to register for a callback that will be invoked +** when the blocking connections current transaction is concluded. ^The +** callback is invoked from within the [sqlite3_step] or [sqlite3_close] +** call that concludes the blocking connection's transaction. +** +** ^(If sqlite3_unlock_notify() is called in a multi-threaded application, +** there is a chance that the blocking connection will have already +** concluded its transaction by the time sqlite3_unlock_notify() is invoked. +** If this happens, then the specified callback is invoked immediately, +** from within the call to sqlite3_unlock_notify().)^ +** +** ^If the blocked connection is attempting to obtain a write-lock on a +** shared-cache table, and more than one other connection currently holds +** a read-lock on the same table, then SQLite arbitrarily selects one of +** the other connections to use as the blocking connection. +** +** ^(There may be at most one unlock-notify callback registered by a +** blocked connection. If sqlite3_unlock_notify() is called when the +** blocked connection already has a registered unlock-notify callback, +** then the new callback replaces the old.)^ ^If sqlite3_unlock_notify() is +** called with a NULL pointer as its second argument, then any existing +** unlock-notify callback is canceled. ^The blocked connections +** unlock-notify callback may also be canceled by closing the blocked +** connection using [sqlite3_close()]. +** +** The unlock-notify callback is not reentrant. If an application invokes +** any sqlite3_xxx API functions from within an unlock-notify callback, a +** crash or deadlock may be the result. +** +** ^Unless deadlock is detected (see below), sqlite3_unlock_notify() always +** returns SQLITE_OK. +** +** Callback Invocation Details +** +** When an unlock-notify callback is registered, the application provides a +** single void* pointer that is passed to the callback when it is invoked. +** However, the signature of the callback function allows SQLite to pass +** it an array of void* context pointers. The first argument passed to +** an unlock-notify callback is a pointer to an array of void* pointers, +** and the second is the number of entries in the array. +** +** When a blocking connection's transaction is concluded, there may be +** more than one blocked connection that has registered for an unlock-notify +** callback. ^If two or more such blocked connections have specified the +** same callback function, then instead of invoking the callback function +** multiple times, it is invoked once with the set of void* context pointers +** specified by the blocked connections bundled together into an array. +** This gives the application an opportunity to prioritize any actions +** related to the set of unblocked database connections. +** +** Deadlock Detection +** +** Assuming that after registering for an unlock-notify callback a +** database waits for the callback to be issued before taking any further +** action (a reasonable assumption), then using this API may cause the +** application to deadlock. For example, if connection X is waiting for +** connection Y's transaction to be concluded, and similarly connection +** Y is waiting on connection X's transaction, then neither connection +** will proceed and the system may remain deadlocked indefinitely. +** +** To avoid this scenario, the sqlite3_unlock_notify() performs deadlock +** detection. ^If a given call to sqlite3_unlock_notify() would put the +** system in a deadlocked state, then SQLITE_LOCKED is returned and no +** unlock-notify callback is registered. The system is said to be in +** a deadlocked state if connection A has registered for an unlock-notify +** callback on the conclusion of connection B's transaction, and connection +** B has itself registered for an unlock-notify callback when connection +** A's transaction is concluded. ^Indirect deadlock is also detected, so +** the system is also considered to be deadlocked if connection B has +** registered for an unlock-notify callback on the conclusion of connection +** C's transaction, where connection C is waiting on connection A. ^Any +** number of levels of indirection are allowed. +** +** The "DROP TABLE" Exception +** +** When a call to [sqlite3_step()] returns SQLITE_LOCKED, it is almost +** always appropriate to call sqlite3_unlock_notify(). There is however, +** one exception. When executing a "DROP TABLE" or "DROP INDEX" statement, +** SQLite checks if there are any currently executing SELECT statements +** that belong to the same connection. If there are, SQLITE_LOCKED is +** returned. In this case there is no "blocking connection", so invoking +** sqlite3_unlock_notify() results in the unlock-notify callback being +** invoked immediately. If the application then re-attempts the "DROP TABLE" +** or "DROP INDEX" query, an infinite loop might be the result. +** +** One way around this problem is to check the extended error code returned +** by an sqlite3_step() call. ^(If there is a blocking connection, then the +** extended error code is set to SQLITE_LOCKED_SHAREDCACHE. Otherwise, in +** the special "DROP TABLE/INDEX" case, the extended error code is just +** SQLITE_LOCKED.)^ +*/ +SQLITE_API int sqlite3_unlock_notify( + sqlite3 *pBlocked, /* Waiting connection */ + void (*xNotify)(void **apArg, int nArg), /* Callback function to invoke */ + void *pNotifyArg /* Argument to pass to xNotify */ +); + + +/* +** CAPI3REF: String Comparison +** +** ^The [sqlite3_stricmp()] and [sqlite3_strnicmp()] APIs allow applications +** and extensions to compare the contents of two buffers containing UTF-8 +** strings in a case-independent fashion, using the same definition of "case +** independence" that SQLite uses internally when comparing identifiers. +*/ +SQLITE_API int sqlite3_stricmp(const char *, const char *); +SQLITE_API int sqlite3_strnicmp(const char *, const char *, int); + +/* +** CAPI3REF: String Globbing +* +** ^The [sqlite3_strglob(P,X)] interface returns zero if and only if +** string X matches the [GLOB] pattern P. +** ^The definition of [GLOB] pattern matching used in +** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the +** SQL dialect understood by SQLite. ^The [sqlite3_strglob(P,X)] function +** is case sensitive. +** +** Note that this routine returns zero on a match and non-zero if the strings +** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()]. +** +** See also: [sqlite3_strlike()]. +*/ +SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr); + +/* +** CAPI3REF: String LIKE Matching +* +** ^The [sqlite3_strlike(P,X,E)] interface returns zero if and only if +** string X matches the [LIKE] pattern P with escape character E. +** ^The definition of [LIKE] pattern matching used in +** [sqlite3_strlike(P,X,E)] is the same as for the "X LIKE P ESCAPE E" +** operator in the SQL dialect understood by SQLite. ^For "X LIKE P" without +** the ESCAPE clause, set the E parameter of [sqlite3_strlike(P,X,E)] to 0. +** ^As with the LIKE operator, the [sqlite3_strlike(P,X,E)] function is case +** insensitive - equivalent upper and lower case ASCII characters match +** one another. +** +** ^The [sqlite3_strlike(P,X,E)] function matches Unicode characters, though +** only ASCII characters are case folded. +** +** Note that this routine returns zero on a match and non-zero if the strings +** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()]. +** +** See also: [sqlite3_strglob()]. +*/ +SQLITE_API int sqlite3_strlike(const char *zGlob, const char *zStr, unsigned int cEsc); + +/* +** CAPI3REF: Error Logging Interface +** +** ^The [sqlite3_log()] interface writes a message into the [error log] +** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()]. +** ^If logging is enabled, the zFormat string and subsequent arguments are +** used with [sqlite3_snprintf()] to generate the final output string. +** +** The sqlite3_log() interface is intended for use by extensions such as +** virtual tables, collating functions, and SQL functions. While there is +** nothing to prevent an application from calling sqlite3_log(), doing so +** is considered bad form. +** +** The zFormat string must not be NULL. +** +** To avoid deadlocks and other threading problems, the sqlite3_log() routine +** will not use dynamically allocated memory. The log message is stored in +** a fixed-length buffer on the stack. If the log message is longer than +** a few hundred characters, it will be truncated to the length of the +** buffer. +*/ +SQLITE_API void sqlite3_log(int iErrCode, const char *zFormat, ...); + +/* +** CAPI3REF: Write-Ahead Log Commit Hook +** METHOD: sqlite3 +** +** ^The [sqlite3_wal_hook()] function is used to register a callback that +** is invoked each time data is committed to a database in wal mode. +** +** ^(The callback is invoked by SQLite after the commit has taken place and +** the associated write-lock on the database released)^, so the implementation +** may read, write or [checkpoint] the database as required. +** +** ^The first parameter passed to the callback function when it is invoked +** is a copy of the third parameter passed to sqlite3_wal_hook() when +** registering the callback. ^The second is a copy of the database handle. +** ^The third parameter is the name of the database that was written to - +** either "main" or the name of an [ATTACH]-ed database. ^The fourth parameter +** is the number of pages currently in the write-ahead log file, +** including those that were just committed. +** +** The callback function should normally return [SQLITE_OK]. ^If an error +** code is returned, that error will propagate back up through the +** SQLite code base to cause the statement that provoked the callback +** to report an error, though the commit will have still occurred. If the +** callback returns [SQLITE_ROW] or [SQLITE_DONE], or if it returns a value +** that does not correspond to any valid SQLite error code, the results +** are undefined. +** +** A single database handle may have at most a single write-ahead log callback +** registered at one time. ^Calling [sqlite3_wal_hook()] replaces any +** previously registered write-ahead log callback. ^The return value is +** a copy of the third parameter from the previous call, if any, or 0. +** ^Note that the [sqlite3_wal_autocheckpoint()] interface and the +** [wal_autocheckpoint pragma] both invoke [sqlite3_wal_hook()] and will +** overwrite any prior [sqlite3_wal_hook()] settings. +*/ +SQLITE_API void *sqlite3_wal_hook( + sqlite3*, + int(*)(void *,sqlite3*,const char*,int), + void* +); + +/* +** CAPI3REF: Configure an auto-checkpoint +** METHOD: sqlite3 +** +** ^The [sqlite3_wal_autocheckpoint(D,N)] is a wrapper around +** [sqlite3_wal_hook()] that causes any database on [database connection] D +** to automatically [checkpoint] +** after committing a transaction if there are N or +** more frames in the [write-ahead log] file. ^Passing zero or +** a negative value as the nFrame parameter disables automatic +** checkpoints entirely. +** +** ^The callback registered by this function replaces any existing callback +** registered using [sqlite3_wal_hook()]. ^Likewise, registering a callback +** using [sqlite3_wal_hook()] disables the automatic checkpoint mechanism +** configured by this function. +** +** ^The [wal_autocheckpoint pragma] can be used to invoke this interface +** from SQL. +** +** ^Checkpoints initiated by this mechanism are +** [sqlite3_wal_checkpoint_v2|PASSIVE]. +** +** ^Every new [database connection] defaults to having the auto-checkpoint +** enabled with a threshold of 1000 or [SQLITE_DEFAULT_WAL_AUTOCHECKPOINT] +** pages. The use of this interface +** is only necessary if the default setting is found to be suboptimal +** for a particular application. +*/ +SQLITE_API int sqlite3_wal_autocheckpoint(sqlite3 *db, int N); + +/* +** CAPI3REF: Checkpoint a database +** METHOD: sqlite3 +** +** ^(The sqlite3_wal_checkpoint(D,X) is equivalent to +** [sqlite3_wal_checkpoint_v2](D,X,[SQLITE_CHECKPOINT_PASSIVE],0,0).)^ +** +** In brief, sqlite3_wal_checkpoint(D,X) causes the content in the +** [write-ahead log] for database X on [database connection] D to be +** transferred into the database file and for the write-ahead log to +** be reset. See the [checkpointing] documentation for addition +** information. +** +** This interface used to be the only way to cause a checkpoint to +** occur. But then the newer and more powerful [sqlite3_wal_checkpoint_v2()] +** interface was added. This interface is retained for backwards +** compatibility and as a convenience for applications that need to manually +** start a callback but which do not need the full power (and corresponding +** complication) of [sqlite3_wal_checkpoint_v2()]. +*/ +SQLITE_API int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb); + +/* +** CAPI3REF: Checkpoint a database +** METHOD: sqlite3 +** +** ^(The sqlite3_wal_checkpoint_v2(D,X,M,L,C) interface runs a checkpoint +** operation on database X of [database connection] D in mode M. Status +** information is written back into integers pointed to by L and C.)^ +** ^(The M parameter must be a valid [checkpoint mode]:)^ +** +**
    +**
    SQLITE_CHECKPOINT_PASSIVE
    +** ^Checkpoint as many frames as possible without waiting for any database +** readers or writers to finish, then sync the database file if all frames +** in the log were checkpointed. ^The [busy-handler callback] +** is never invoked in the SQLITE_CHECKPOINT_PASSIVE mode. +** ^On the other hand, passive mode might leave the checkpoint unfinished +** if there are concurrent readers or writers. +** +**
    SQLITE_CHECKPOINT_FULL
    +** ^This mode blocks (it invokes the +** [sqlite3_busy_handler|busy-handler callback]) until there is no +** database writer and all readers are reading from the most recent database +** snapshot. ^It then checkpoints all frames in the log file and syncs the +** database file. ^This mode blocks new database writers while it is pending, +** but new database readers are allowed to continue unimpeded. +** +**
    SQLITE_CHECKPOINT_RESTART
    +** ^This mode works the same way as SQLITE_CHECKPOINT_FULL with the addition +** that after checkpointing the log file it blocks (calls the +** [busy-handler callback]) +** until all readers are reading from the database file only. ^This ensures +** that the next writer will restart the log file from the beginning. +** ^Like SQLITE_CHECKPOINT_FULL, this mode blocks new +** database writer attempts while it is pending, but does not impede readers. +** +**
    SQLITE_CHECKPOINT_TRUNCATE
    +** ^This mode works the same way as SQLITE_CHECKPOINT_RESTART with the +** addition that it also truncates the log file to zero bytes just prior +** to a successful return. +**
    +** +** ^If pnLog is not NULL, then *pnLog is set to the total number of frames in +** the log file or to -1 if the checkpoint could not run because +** of an error or because the database is not in [WAL mode]. ^If pnCkpt is not +** NULL,then *pnCkpt is set to the total number of checkpointed frames in the +** log file (including any that were already checkpointed before the function +** was called) or to -1 if the checkpoint could not run due to an error or +** because the database is not in WAL mode. ^Note that upon successful +** completion of an SQLITE_CHECKPOINT_TRUNCATE, the log file will have been +** truncated to zero bytes and so both *pnLog and *pnCkpt will be set to zero. +** +** ^All calls obtain an exclusive "checkpoint" lock on the database file. ^If +** any other process is running a checkpoint operation at the same time, the +** lock cannot be obtained and SQLITE_BUSY is returned. ^Even if there is a +** busy-handler configured, it will not be invoked in this case. +** +** ^The SQLITE_CHECKPOINT_FULL, RESTART and TRUNCATE modes also obtain the +** exclusive "writer" lock on the database file. ^If the writer lock cannot be +** obtained immediately, and a busy-handler is configured, it is invoked and +** the writer lock retried until either the busy-handler returns 0 or the lock +** is successfully obtained. ^The busy-handler is also invoked while waiting for +** database readers as described above. ^If the busy-handler returns 0 before +** the writer lock is obtained or while waiting for database readers, the +** checkpoint operation proceeds from that point in the same way as +** SQLITE_CHECKPOINT_PASSIVE - checkpointing as many frames as possible +** without blocking any further. ^SQLITE_BUSY is returned in this case. +** +** ^If parameter zDb is NULL or points to a zero length string, then the +** specified operation is attempted on all WAL databases [attached] to +** [database connection] db. In this case the +** values written to output parameters *pnLog and *pnCkpt are undefined. ^If +** an SQLITE_BUSY error is encountered when processing one or more of the +** attached WAL databases, the operation is still attempted on any remaining +** attached databases and SQLITE_BUSY is returned at the end. ^If any other +** error occurs while processing an attached database, processing is abandoned +** and the error code is returned to the caller immediately. ^If no error +** (SQLITE_BUSY or otherwise) is encountered while processing the attached +** databases, SQLITE_OK is returned. +** +** ^If database zDb is the name of an attached database that is not in WAL +** mode, SQLITE_OK is returned and both *pnLog and *pnCkpt set to -1. ^If +** zDb is not NULL (or a zero length string) and is not the name of any +** attached database, SQLITE_ERROR is returned to the caller. +** +** ^Unless it returns SQLITE_MISUSE, +** the sqlite3_wal_checkpoint_v2() interface +** sets the error information that is queried by +** [sqlite3_errcode()] and [sqlite3_errmsg()]. +** +** ^The [PRAGMA wal_checkpoint] command can be used to invoke this interface +** from SQL. +*/ +SQLITE_API int sqlite3_wal_checkpoint_v2( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of attached database (or NULL) */ + int eMode, /* SQLITE_CHECKPOINT_* value */ + int *pnLog, /* OUT: Size of WAL log in frames */ + int *pnCkpt /* OUT: Total number of frames checkpointed */ +); + +/* +** CAPI3REF: Checkpoint Mode Values +** KEYWORDS: {checkpoint mode} +** +** These constants define all valid values for the "checkpoint mode" passed +** as the third parameter to the [sqlite3_wal_checkpoint_v2()] interface. +** See the [sqlite3_wal_checkpoint_v2()] documentation for details on the +** meaning of each of these checkpoint modes. +*/ +#define SQLITE_CHECKPOINT_PASSIVE 0 /* Do as much as possible w/o blocking */ +#define SQLITE_CHECKPOINT_FULL 1 /* Wait for writers, then checkpoint */ +#define SQLITE_CHECKPOINT_RESTART 2 /* Like FULL but wait for for readers */ +#define SQLITE_CHECKPOINT_TRUNCATE 3 /* Like RESTART but also truncate WAL */ + +/* +** CAPI3REF: Virtual Table Interface Configuration +** +** This function may be called by either the [xConnect] or [xCreate] method +** of a [virtual table] implementation to configure +** various facets of the virtual table interface. +** +** If this interface is invoked outside the context of an xConnect or +** xCreate virtual table method then the behavior is undefined. +** +** In the call sqlite3_vtab_config(D,C,...) the D parameter is the +** [database connection] in which the virtual table is being created and +** which is passed in as the first argument to the [xConnect] or [xCreate] +** method that is invoking sqlite3_vtab_config(). The C parameter is one +** of the [virtual table configuration options]. The presence and meaning +** of parameters after C depend on which [virtual table configuration option] +** is used. +*/ +SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...); + +/* +** CAPI3REF: Virtual Table Configuration Options +** KEYWORDS: {virtual table configuration options} +** KEYWORDS: {virtual table configuration option} +** +** These macros define the various options to the +** [sqlite3_vtab_config()] interface that [virtual table] implementations +** can use to customize and optimize their behavior. +** +**
    +** [[SQLITE_VTAB_CONSTRAINT_SUPPORT]] +**
    SQLITE_VTAB_CONSTRAINT_SUPPORT
    +**
    Calls of the form +** [sqlite3_vtab_config](db,SQLITE_VTAB_CONSTRAINT_SUPPORT,X) are supported, +** where X is an integer. If X is zero, then the [virtual table] whose +** [xCreate] or [xConnect] method invoked [sqlite3_vtab_config()] does not +** support constraints. In this configuration (which is the default) if +** a call to the [xUpdate] method returns [SQLITE_CONSTRAINT], then the entire +** statement is rolled back as if [ON CONFLICT | OR ABORT] had been +** specified as part of the users SQL statement, regardless of the actual +** ON CONFLICT mode specified. +** +** If X is non-zero, then the virtual table implementation guarantees +** that if [xUpdate] returns [SQLITE_CONSTRAINT], it will do so before +** any modifications to internal or persistent data structures have been made. +** If the [ON CONFLICT] mode is ABORT, FAIL, IGNORE or ROLLBACK, SQLite +** is able to roll back a statement or database transaction, and abandon +** or continue processing the current SQL statement as appropriate. +** If the ON CONFLICT mode is REPLACE and the [xUpdate] method returns +** [SQLITE_CONSTRAINT], SQLite handles this as if the ON CONFLICT mode +** had been ABORT. +** +** Virtual table implementations that are required to handle OR REPLACE +** must do so within the [xUpdate] method. If a call to the +** [sqlite3_vtab_on_conflict()] function indicates that the current ON +** CONFLICT policy is REPLACE, the virtual table implementation should +** silently replace the appropriate rows within the xUpdate callback and +** return SQLITE_OK. Or, if this is not possible, it may return +** SQLITE_CONSTRAINT, in which case SQLite falls back to OR ABORT +** constraint handling. +**
    +** +** [[SQLITE_VTAB_DIRECTONLY]]
    SQLITE_VTAB_DIRECTONLY
    +**
    Calls of the form +** [sqlite3_vtab_config](db,SQLITE_VTAB_DIRECTONLY) from within the +** the [xConnect] or [xCreate] methods of a [virtual table] implmentation +** prohibits that virtual table from being used from within triggers and +** views. +**
    +** +** [[SQLITE_VTAB_INNOCUOUS]]
    SQLITE_VTAB_INNOCUOUS
    +**
    Calls of the form +** [sqlite3_vtab_config](db,SQLITE_VTAB_INNOCUOUS) from within the +** the [xConnect] or [xCreate] methods of a [virtual table] implmentation +** identify that virtual table as being safe to use from within triggers +** and views. Conceptually, the SQLITE_VTAB_INNOCUOUS tag means that the +** virtual table can do no serious harm even if it is controlled by a +** malicious hacker. Developers should avoid setting the SQLITE_VTAB_INNOCUOUS +** flag unless absolutely necessary. +**
    +**
    +*/ +#define SQLITE_VTAB_CONSTRAINT_SUPPORT 1 +#define SQLITE_VTAB_INNOCUOUS 2 +#define SQLITE_VTAB_DIRECTONLY 3 + +/* +** CAPI3REF: Determine The Virtual Table Conflict Policy +** +** This function may only be called from within a call to the [xUpdate] method +** of a [virtual table] implementation for an INSERT or UPDATE operation. ^The +** value returned is one of [SQLITE_ROLLBACK], [SQLITE_IGNORE], [SQLITE_FAIL], +** [SQLITE_ABORT], or [SQLITE_REPLACE], according to the [ON CONFLICT] mode +** of the SQL statement that triggered the call to the [xUpdate] method of the +** [virtual table]. +*/ +SQLITE_API int sqlite3_vtab_on_conflict(sqlite3 *); + +/* +** CAPI3REF: Determine If Virtual Table Column Access Is For UPDATE +** +** If the sqlite3_vtab_nochange(X) routine is called within the [xColumn] +** method of a [virtual table], then it might return true if the +** column is being fetched as part of an UPDATE operation during which the +** column value will not change. The virtual table implementation can use +** this hint as permission to substitute a return value that is less +** expensive to compute and that the corresponding +** [xUpdate] method understands as a "no-change" value. +** +** If the [xColumn] method calls sqlite3_vtab_nochange() and finds that +** the column is not changed by the UPDATE statement, then the xColumn +** method can optionally return without setting a result, without calling +** any of the [sqlite3_result_int|sqlite3_result_xxxxx() interfaces]. +** In that case, [sqlite3_value_nochange(X)] will return true for the +** same column in the [xUpdate] method. +** +** The sqlite3_vtab_nochange() routine is an optimization. Virtual table +** implementations should continue to give a correct answer even if the +** sqlite3_vtab_nochange() interface were to always return false. In the +** current implementation, the sqlite3_vtab_nochange() interface does always +** returns false for the enhanced [UPDATE FROM] statement. +*/ +SQLITE_API int sqlite3_vtab_nochange(sqlite3_context*); + +/* +** CAPI3REF: Determine The Collation For a Virtual Table Constraint +** METHOD: sqlite3_index_info +** +** This function may only be called from within a call to the [xBestIndex] +** method of a [virtual table]. This function returns a pointer to a string +** that is the name of the appropriate collation sequence to use for text +** comparisons on the constraint identified by its arguments. +** +** The first argument must be the pointer to the [sqlite3_index_info] object +** that is the first parameter to the xBestIndex() method. The second argument +** must be an index into the aConstraint[] array belonging to the +** sqlite3_index_info structure passed to xBestIndex. +** +** Important: +** The first parameter must be the same pointer that is passed into the +** xBestMethod() method. The first parameter may not be a pointer to a +** different [sqlite3_index_info] object, even an exact copy. +** +** The return value is computed as follows: +** +**
      +**
    1. If the constraint comes from a WHERE clause expression that contains +** a [COLLATE operator], then the name of the collation specified by +** that COLLATE operator is returned. +**

    2. If there is no COLLATE operator, but the column that is the subject +** of the constraint specifies an alternative collating sequence via +** a [COLLATE clause] on the column definition within the CREATE TABLE +** statement that was passed into [sqlite3_declare_vtab()], then the +** name of that alternative collating sequence is returned. +**

    3. Otherwise, "BINARY" is returned. +**

    +*/ +SQLITE_API SQLITE_EXPERIMENTAL const char *sqlite3_vtab_collation(sqlite3_index_info*,int); + +/* +** CAPI3REF: Determine if a virtual table query is DISTINCT +** METHOD: sqlite3_index_info +** +** This API may only be used from within an [xBestIndex|xBestIndex method] +** of a [virtual table] implementation. The result of calling this +** interface from outside of xBestIndex() is undefined and probably harmful. +** +** ^The sqlite3_vtab_distinct() interface returns an integer that is +** either 0, 1, or 2. The integer returned by sqlite3_vtab_distinct() +** gives the virtual table additional information about how the query +** planner wants the output to be ordered. As long as the virtual table +** can meet the ordering requirements of the query planner, it may set +** the "orderByConsumed" flag. +** +**
    1. +** ^If the sqlite3_vtab_distinct() interface returns 0, that means +** that the query planner needs the virtual table to return all rows in the +** sort order defined by the "nOrderBy" and "aOrderBy" fields of the +** [sqlite3_index_info] object. This is the default expectation. If the +** virtual table outputs all rows in sorted order, then it is always safe for +** the xBestIndex method to set the "orderByConsumed" flag, regardless of +** the return value from sqlite3_vtab_distinct(). +**

    2. +** ^(If the sqlite3_vtab_distinct() interface returns 1, that means +** that the query planner does not need the rows to be returned in sorted order +** as long as all rows with the same values in all columns identified by the +** "aOrderBy" field are adjacent.)^ This mode is used when the query planner +** is doing a GROUP BY. +**

    3. +** ^(If the sqlite3_vtab_distinct() interface returns 2, that means +** that the query planner does not need the rows returned in any particular +** order, as long as rows with the same values in all "aOrderBy" columns +** are adjacent.)^ ^(Furthermore, only a single row for each particular +** combination of values in the columns identified by the "aOrderBy" field +** needs to be returned.)^ ^It is always ok for two or more rows with the same +** values in all "aOrderBy" columns to be returned, as long as all such rows +** are adjacent. ^The virtual table may, if it chooses, omit extra rows +** that have the same value for all columns identified by "aOrderBy". +** ^However omitting the extra rows is optional. +** This mode is used for a DISTINCT query. +**

    +** +** ^For the purposes of comparing virtual table output values to see if the +** values are same value for sorting purposes, two NULL values are considered +** to be the same. In other words, the comparison operator is "IS" +** (or "IS NOT DISTINCT FROM") and not "==". +** +** If a virtual table implementation is unable to meet the requirements +** specified above, then it must not set the "orderByConsumed" flag in the +** [sqlite3_index_info] object or an incorrect answer may result. +** +** ^A virtual table implementation is always free to return rows in any order +** it wants, as long as the "orderByConsumed" flag is not set. ^When the +** the "orderByConsumed" flag is unset, the query planner will add extra +** [bytecode] to ensure that the final results returned by the SQL query are +** ordered correctly. The use of the "orderByConsumed" flag and the +** sqlite3_vtab_distinct() interface is merely an optimization. ^Careful +** use of the sqlite3_vtab_distinct() interface and the "orderByConsumed" +** flag might help queries against a virtual table to run faster. Being +** overly aggressive and setting the "orderByConsumed" flag when it is not +** valid to do so, on the other hand, might cause SQLite to return incorrect +** results. +*/ +SQLITE_API int sqlite3_vtab_distinct(sqlite3_index_info*); + +/* +** CAPI3REF: Identify and handle IN constraints in xBestIndex +** +** This interface may only be used from within an +** [xBestIndex|xBestIndex() method] of a [virtual table] implementation. +** The result of invoking this interface from any other context is +** undefined and probably harmful. +** +** ^(A constraint on a virtual table of the form +** "[IN operator|column IN (...)]" is +** communicated to the xBestIndex method as a +** [SQLITE_INDEX_CONSTRAINT_EQ] constraint.)^ If xBestIndex wants to use +** this constraint, it must set the corresponding +** aConstraintUsage[].argvIndex to a postive integer. ^(Then, under +** the usual mode of handling IN operators, SQLite generates [bytecode] +** that invokes the [xFilter|xFilter() method] once for each value +** on the right-hand side of the IN operator.)^ Thus the virtual table +** only sees a single value from the right-hand side of the IN operator +** at a time. +** +** In some cases, however, it would be advantageous for the virtual +** table to see all values on the right-hand of the IN operator all at +** once. The sqlite3_vtab_in() interfaces facilitates this in two ways: +** +**
      +**
    1. +** ^A call to sqlite3_vtab_in(P,N,-1) will return true (non-zero) +** if and only if the [sqlite3_index_info|P->aConstraint][N] constraint +** is an [IN operator] that can be processed all at once. ^In other words, +** sqlite3_vtab_in() with -1 in the third argument is a mechanism +** by which the virtual table can ask SQLite if all-at-once processing +** of the IN operator is even possible. +** +**

    2. +** ^A call to sqlite3_vtab_in(P,N,F) with F==1 or F==0 indicates +** to SQLite that the virtual table does or does not want to process +** the IN operator all-at-once, respectively. ^Thus when the third +** parameter (F) is non-negative, this interface is the mechanism by +** which the virtual table tells SQLite how it wants to process the +** IN operator. +**

    +** +** ^The sqlite3_vtab_in(P,N,F) interface can be invoked multiple times +** within the same xBestIndex method call. ^For any given P,N pair, +** the return value from sqlite3_vtab_in(P,N,F) will always be the same +** within the same xBestIndex call. ^If the interface returns true +** (non-zero), that means that the constraint is an IN operator +** that can be processed all-at-once. ^If the constraint is not an IN +** operator or cannot be processed all-at-once, then the interface returns +** false. +** +** ^(All-at-once processing of the IN operator is selected if both of the +** following conditions are met: +** +**
      +**
    1. The P->aConstraintUsage[N].argvIndex value is set to a positive +** integer. This is how the virtual table tells SQLite that it wants to +** use the N-th constraint. +** +**

    2. The last call to sqlite3_vtab_in(P,N,F) for which F was +** non-negative had F>=1. +**

    )^ +** +** ^If either or both of the conditions above are false, then SQLite uses +** the traditional one-at-a-time processing strategy for the IN constraint. +** ^If both conditions are true, then the argvIndex-th parameter to the +** xFilter method will be an [sqlite3_value] that appears to be NULL, +** but which can be passed to [sqlite3_vtab_in_first()] and +** [sqlite3_vtab_in_next()] to find all values on the right-hand side +** of the IN constraint. +*/ +SQLITE_API int sqlite3_vtab_in(sqlite3_index_info*, int iCons, int bHandle); + +/* +** CAPI3REF: Find all elements on the right-hand side of an IN constraint. +** +** These interfaces are only useful from within the +** [xFilter|xFilter() method] of a [virtual table] implementation. +** The result of invoking these interfaces from any other context +** is undefined and probably harmful. +** +** The X parameter in a call to sqlite3_vtab_in_first(X,P) or +** sqlite3_vtab_in_next(X,P) must be one of the parameters to the +** xFilter method which invokes these routines, and specifically +** a parameter that was previously selected for all-at-once IN constraint +** processing use the [sqlite3_vtab_in()] interface in the +** [xBestIndex|xBestIndex method]. ^(If the X parameter is not +** an xFilter argument that was selected for all-at-once IN constraint +** processing, then these routines return [SQLITE_MISUSE])^ or perhaps +** exhibit some other undefined or harmful behavior. +** +** ^(Use these routines to access all values on the right-hand side +** of the IN constraint using code like the following: +** +**
    +**    for(rc=sqlite3_vtab_in_first(pList, &pVal);
    +**        rc==SQLITE_OK && pVal
    +**        rc=sqlite3_vtab_in_next(pList, &pVal)
    +**    ){
    +**      // do something with pVal
    +**    }
    +**    if( rc!=SQLITE_OK ){
    +**      // an error has occurred
    +**    }
    +** 
    )^ +** +** ^On success, the sqlite3_vtab_in_first(X,P) and sqlite3_vtab_in_next(X,P) +** routines return SQLITE_OK and set *P to point to the first or next value +** on the RHS of the IN constraint. ^If there are no more values on the +** right hand side of the IN constraint, then *P is set to NULL and these +** routines return [SQLITE_DONE]. ^The return value might be +** some other value, such as SQLITE_NOMEM, in the event of a malfunction. +** +** The *ppOut values returned by these routines are only valid until the +** next call to either of these routines or until the end of the xFilter +** method from which these routines were called. If the virtual table +** implementation needs to retain the *ppOut values for longer, it must make +** copies. The *ppOut values are [protected sqlite3_value|protected]. +*/ +SQLITE_API int sqlite3_vtab_in_first(sqlite3_value *pVal, sqlite3_value **ppOut); +SQLITE_API int sqlite3_vtab_in_next(sqlite3_value *pVal, sqlite3_value **ppOut); + +/* +** CAPI3REF: Constraint values in xBestIndex() +** METHOD: sqlite3_index_info +** +** This API may only be used from within the [xBestIndex|xBestIndex method] +** of a [virtual table] implementation. The result of calling this interface +** from outside of an xBestIndex method are undefined and probably harmful. +** +** ^When the sqlite3_vtab_rhs_value(P,J,V) interface is invoked from within +** the [xBestIndex] method of a [virtual table] implementation, with P being +** a copy of the [sqlite3_index_info] object pointer passed into xBestIndex and +** J being a 0-based index into P->aConstraint[], then this routine +** attempts to set *V to the value of the right-hand operand of +** that constraint if the right-hand operand is known. ^If the +** right-hand operand is not known, then *V is set to a NULL pointer. +** ^The sqlite3_vtab_rhs_value(P,J,V) interface returns SQLITE_OK if +** and only if *V is set to a value. ^The sqlite3_vtab_rhs_value(P,J,V) +** inteface returns SQLITE_NOTFOUND if the right-hand side of the J-th +** constraint is not available. ^The sqlite3_vtab_rhs_value() interface +** can return an result code other than SQLITE_OK or SQLITE_NOTFOUND if +** something goes wrong. +** +** The sqlite3_vtab_rhs_value() interface is usually only successful if +** the right-hand operand of a constraint is a literal value in the original +** SQL statement. If the right-hand operand is an expression or a reference +** to some other column or a [host parameter], then sqlite3_vtab_rhs_value() +** will probably return [SQLITE_NOTFOUND]. +** +** ^(Some constraints, such as [SQLITE_INDEX_CONSTRAINT_ISNULL] and +** [SQLITE_INDEX_CONSTRAINT_ISNOTNULL], have no right-hand operand. For such +** constraints, sqlite3_vtab_rhs_value() always returns SQLITE_NOTFOUND.)^ +** +** ^The [sqlite3_value] object returned in *V is a protected sqlite3_value +** and remains valid for the duration of the xBestIndex method call. +** ^When xBestIndex returns, the sqlite3_value object returned by +** sqlite3_vtab_rhs_value() is automatically deallocated. +** +** The "_rhs_" in the name of this routine is an abbreviation for +** "Right-Hand Side". +*/ +SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **ppVal); + +/* +** CAPI3REF: Conflict resolution modes +** KEYWORDS: {conflict resolution mode} +** +** These constants are returned by [sqlite3_vtab_on_conflict()] to +** inform a [virtual table] implementation what the [ON CONFLICT] mode +** is for the SQL statement being evaluated. +** +** Note that the [SQLITE_IGNORE] constant is also used as a potential +** return value from the [sqlite3_set_authorizer()] callback and that +** [SQLITE_ABORT] is also a [result code]. +*/ +#define SQLITE_ROLLBACK 1 +/* #define SQLITE_IGNORE 2 // Also used by sqlite3_authorizer() callback */ +#define SQLITE_FAIL 3 +/* #define SQLITE_ABORT 4 // Also an error code */ +#define SQLITE_REPLACE 5 + +/* +** CAPI3REF: Prepared Statement Scan Status Opcodes +** KEYWORDS: {scanstatus options} +** +** The following constants can be used for the T parameter to the +** [sqlite3_stmt_scanstatus(S,X,T,V)] interface. Each constant designates a +** different metric for sqlite3_stmt_scanstatus() to return. +** +** When the value returned to V is a string, space to hold that string is +** managed by the prepared statement S and will be automatically freed when +** S is finalized. +** +**
    +** [[SQLITE_SCANSTAT_NLOOP]]
    SQLITE_SCANSTAT_NLOOP
    +**
    ^The [sqlite3_int64] variable pointed to by the V parameter will be +** set to the total number of times that the X-th loop has run.
    +** +** [[SQLITE_SCANSTAT_NVISIT]]
    SQLITE_SCANSTAT_NVISIT
    +**
    ^The [sqlite3_int64] variable pointed to by the V parameter will be set +** to the total number of rows examined by all iterations of the X-th loop.
    +** +** [[SQLITE_SCANSTAT_EST]]
    SQLITE_SCANSTAT_EST
    +**
    ^The "double" variable pointed to by the V parameter will be set to the +** query planner's estimate for the average number of rows output from each +** iteration of the X-th loop. If the query planner's estimates was accurate, +** then this value will approximate the quotient NVISIT/NLOOP and the +** product of this value for all prior loops with the same SELECTID will +** be the NLOOP value for the current loop. +** +** [[SQLITE_SCANSTAT_NAME]]
    SQLITE_SCANSTAT_NAME
    +**
    ^The "const char *" variable pointed to by the V parameter will be set +** to a zero-terminated UTF-8 string containing the name of the index or table +** used for the X-th loop. +** +** [[SQLITE_SCANSTAT_EXPLAIN]]
    SQLITE_SCANSTAT_EXPLAIN
    +**
    ^The "const char *" variable pointed to by the V parameter will be set +** to a zero-terminated UTF-8 string containing the [EXPLAIN QUERY PLAN] +** description for the X-th loop. +** +** [[SQLITE_SCANSTAT_SELECTID]]
    SQLITE_SCANSTAT_SELECT
    +**
    ^The "int" variable pointed to by the V parameter will be set to the +** "select-id" for the X-th loop. The select-id identifies which query or +** subquery the loop is part of. The main query has a select-id of zero. +** The select-id is the same value as is output in the first column +** of an [EXPLAIN QUERY PLAN] query. +**
    +*/ +#define SQLITE_SCANSTAT_NLOOP 0 +#define SQLITE_SCANSTAT_NVISIT 1 +#define SQLITE_SCANSTAT_EST 2 +#define SQLITE_SCANSTAT_NAME 3 +#define SQLITE_SCANSTAT_EXPLAIN 4 +#define SQLITE_SCANSTAT_SELECTID 5 + +/* +** CAPI3REF: Prepared Statement Scan Status +** METHOD: sqlite3_stmt +** +** This interface returns information about the predicted and measured +** performance for pStmt. Advanced applications can use this +** interface to compare the predicted and the measured performance and +** issue warnings and/or rerun [ANALYZE] if discrepancies are found. +** +** Since this interface is expected to be rarely used, it is only +** available if SQLite is compiled using the [SQLITE_ENABLE_STMT_SCANSTATUS] +** compile-time option. +** +** The "iScanStatusOp" parameter determines which status information to return. +** The "iScanStatusOp" must be one of the [scanstatus options] or the behavior +** of this interface is undefined. +** ^The requested measurement is written into a variable pointed to by +** the "pOut" parameter. +** Parameter "idx" identifies the specific loop to retrieve statistics for. +** Loops are numbered starting from zero. ^If idx is out of range - less than +** zero or greater than or equal to the total number of loops used to implement +** the statement - a non-zero value is returned and the variable that pOut +** points to is unchanged. +** +** ^Statistics might not be available for all loops in all statements. ^In cases +** where there exist loops with no available statistics, this function behaves +** as if the loop did not exist - it returns non-zero and leave the variable +** that pOut points to unchanged. +** +** See also: [sqlite3_stmt_scanstatus_reset()] +*/ +SQLITE_API int sqlite3_stmt_scanstatus( + sqlite3_stmt *pStmt, /* Prepared statement for which info desired */ + int idx, /* Index of loop to report on */ + int iScanStatusOp, /* Information desired. SQLITE_SCANSTAT_* */ + void *pOut /* Result written here */ +); + +/* +** CAPI3REF: Zero Scan-Status Counters +** METHOD: sqlite3_stmt +** +** ^Zero all [sqlite3_stmt_scanstatus()] related event counters. +** +** This API is only available if the library is built with pre-processor +** symbol [SQLITE_ENABLE_STMT_SCANSTATUS] defined. +*/ +SQLITE_API void sqlite3_stmt_scanstatus_reset(sqlite3_stmt*); + +/* +** CAPI3REF: Flush caches to disk mid-transaction +** METHOD: sqlite3 +** +** ^If a write-transaction is open on [database connection] D when the +** [sqlite3_db_cacheflush(D)] interface invoked, any dirty +** pages in the pager-cache that are not currently in use are written out +** to disk. A dirty page may be in use if a database cursor created by an +** active SQL statement is reading from it, or if it is page 1 of a database +** file (page 1 is always "in use"). ^The [sqlite3_db_cacheflush(D)] +** interface flushes caches for all schemas - "main", "temp", and +** any [attached] databases. +** +** ^If this function needs to obtain extra database locks before dirty pages +** can be flushed to disk, it does so. ^If those locks cannot be obtained +** immediately and there is a busy-handler callback configured, it is invoked +** in the usual manner. ^If the required lock still cannot be obtained, then +** the database is skipped and an attempt made to flush any dirty pages +** belonging to the next (if any) database. ^If any databases are skipped +** because locks cannot be obtained, but no other error occurs, this +** function returns SQLITE_BUSY. +** +** ^If any other error occurs while flushing dirty pages to disk (for +** example an IO error or out-of-memory condition), then processing is +** abandoned and an SQLite [error code] is returned to the caller immediately. +** +** ^Otherwise, if no error occurs, [sqlite3_db_cacheflush()] returns SQLITE_OK. +** +** ^This function does not set the database handle error code or message +** returned by the [sqlite3_errcode()] and [sqlite3_errmsg()] functions. +*/ +SQLITE_API int sqlite3_db_cacheflush(sqlite3*); + +/* +** CAPI3REF: The pre-update hook. +** METHOD: sqlite3 +** +** ^These interfaces are only available if SQLite is compiled using the +** [SQLITE_ENABLE_PREUPDATE_HOOK] compile-time option. +** +** ^The [sqlite3_preupdate_hook()] interface registers a callback function +** that is invoked prior to each [INSERT], [UPDATE], and [DELETE] operation +** on a database table. +** ^At most one preupdate hook may be registered at a time on a single +** [database connection]; each call to [sqlite3_preupdate_hook()] overrides +** the previous setting. +** ^The preupdate hook is disabled by invoking [sqlite3_preupdate_hook()] +** with a NULL pointer as the second parameter. +** ^The third parameter to [sqlite3_preupdate_hook()] is passed through as +** the first parameter to callbacks. +** +** ^The preupdate hook only fires for changes to real database tables; the +** preupdate hook is not invoked for changes to [virtual tables] or to +** system tables like sqlite_sequence or sqlite_stat1. +** +** ^The second parameter to the preupdate callback is a pointer to +** the [database connection] that registered the preupdate hook. +** ^The third parameter to the preupdate callback is one of the constants +** [SQLITE_INSERT], [SQLITE_DELETE], or [SQLITE_UPDATE] to identify the +** kind of update operation that is about to occur. +** ^(The fourth parameter to the preupdate callback is the name of the +** database within the database connection that is being modified. This +** will be "main" for the main database or "temp" for TEMP tables or +** the name given after the AS keyword in the [ATTACH] statement for attached +** databases.)^ +** ^The fifth parameter to the preupdate callback is the name of the +** table that is being modified. +** +** For an UPDATE or DELETE operation on a [rowid table], the sixth +** parameter passed to the preupdate callback is the initial [rowid] of the +** row being modified or deleted. For an INSERT operation on a rowid table, +** or any operation on a WITHOUT ROWID table, the value of the sixth +** parameter is undefined. For an INSERT or UPDATE on a rowid table the +** seventh parameter is the final rowid value of the row being inserted +** or updated. The value of the seventh parameter passed to the callback +** function is not defined for operations on WITHOUT ROWID tables, or for +** DELETE operations on rowid tables. +** +** The [sqlite3_preupdate_old()], [sqlite3_preupdate_new()], +** [sqlite3_preupdate_count()], and [sqlite3_preupdate_depth()] interfaces +** provide additional information about a preupdate event. These routines +** may only be called from within a preupdate callback. Invoking any of +** these routines from outside of a preupdate callback or with a +** [database connection] pointer that is different from the one supplied +** to the preupdate callback results in undefined and probably undesirable +** behavior. +** +** ^The [sqlite3_preupdate_count(D)] interface returns the number of columns +** in the row that is being inserted, updated, or deleted. +** +** ^The [sqlite3_preupdate_old(D,N,P)] interface writes into P a pointer to +** a [protected sqlite3_value] that contains the value of the Nth column of +** the table row before it is updated. The N parameter must be between 0 +** and one less than the number of columns or the behavior will be +** undefined. This must only be used within SQLITE_UPDATE and SQLITE_DELETE +** preupdate callbacks; if it is used by an SQLITE_INSERT callback then the +** behavior is undefined. The [sqlite3_value] that P points to +** will be destroyed when the preupdate callback returns. +** +** ^The [sqlite3_preupdate_new(D,N,P)] interface writes into P a pointer to +** a [protected sqlite3_value] that contains the value of the Nth column of +** the table row after it is updated. The N parameter must be between 0 +** and one less than the number of columns or the behavior will be +** undefined. This must only be used within SQLITE_INSERT and SQLITE_UPDATE +** preupdate callbacks; if it is used by an SQLITE_DELETE callback then the +** behavior is undefined. The [sqlite3_value] that P points to +** will be destroyed when the preupdate callback returns. +** +** ^The [sqlite3_preupdate_depth(D)] interface returns 0 if the preupdate +** callback was invoked as a result of a direct insert, update, or delete +** operation; or 1 for inserts, updates, or deletes invoked by top-level +** triggers; or 2 for changes resulting from triggers called by top-level +** triggers; and so forth. +** +** When the [sqlite3_blob_write()] API is used to update a blob column, +** the pre-update hook is invoked with SQLITE_DELETE. This is because the +** in this case the new values are not available. In this case, when a +** callback made with op==SQLITE_DELETE is actuall a write using the +** sqlite3_blob_write() API, the [sqlite3_preupdate_blobwrite()] returns +** the index of the column being written. In other cases, where the +** pre-update hook is being invoked for some other reason, including a +** regular DELETE, sqlite3_preupdate_blobwrite() returns -1. +** +** See also: [sqlite3_update_hook()] +*/ +#if defined(SQLITE_ENABLE_PREUPDATE_HOOK) +SQLITE_API void *sqlite3_preupdate_hook( + sqlite3 *db, + void(*xPreUpdate)( + void *pCtx, /* Copy of third arg to preupdate_hook() */ + sqlite3 *db, /* Database handle */ + int op, /* SQLITE_UPDATE, DELETE or INSERT */ + char const *zDb, /* Database name */ + char const *zName, /* Table name */ + sqlite3_int64 iKey1, /* Rowid of row about to be deleted/updated */ + sqlite3_int64 iKey2 /* New rowid value (for a rowid UPDATE) */ + ), + void* +); +SQLITE_API int sqlite3_preupdate_old(sqlite3 *, int, sqlite3_value **); +SQLITE_API int sqlite3_preupdate_count(sqlite3 *); +SQLITE_API int sqlite3_preupdate_depth(sqlite3 *); +SQLITE_API int sqlite3_preupdate_new(sqlite3 *, int, sqlite3_value **); +SQLITE_API int sqlite3_preupdate_blobwrite(sqlite3 *); +#endif + +/* +** CAPI3REF: Low-level system error code +** METHOD: sqlite3 +** +** ^Attempt to return the underlying operating system error code or error +** number that caused the most recent I/O error or failure to open a file. +** The return value is OS-dependent. For example, on unix systems, after +** [sqlite3_open_v2()] returns [SQLITE_CANTOPEN], this interface could be +** called to get back the underlying "errno" that caused the problem, such +** as ENOSPC, EAUTH, EISDIR, and so forth. +*/ +SQLITE_API int sqlite3_system_errno(sqlite3*); + +/* +** CAPI3REF: Database Snapshot +** KEYWORDS: {snapshot} {sqlite3_snapshot} +** +** An instance of the snapshot object records the state of a [WAL mode] +** database for some specific point in history. +** +** In [WAL mode], multiple [database connections] that are open on the +** same database file can each be reading a different historical version +** of the database file. When a [database connection] begins a read +** transaction, that connection sees an unchanging copy of the database +** as it existed for the point in time when the transaction first started. +** Subsequent changes to the database from other connections are not seen +** by the reader until a new read transaction is started. +** +** The sqlite3_snapshot object records state information about an historical +** version of the database file so that it is possible to later open a new read +** transaction that sees that historical version of the database rather than +** the most recent version. +*/ +typedef struct sqlite3_snapshot { + unsigned char hidden[48]; +} sqlite3_snapshot; + +/* +** CAPI3REF: Record A Database Snapshot +** CONSTRUCTOR: sqlite3_snapshot +** +** ^The [sqlite3_snapshot_get(D,S,P)] interface attempts to make a +** new [sqlite3_snapshot] object that records the current state of +** schema S in database connection D. ^On success, the +** [sqlite3_snapshot_get(D,S,P)] interface writes a pointer to the newly +** created [sqlite3_snapshot] object into *P and returns SQLITE_OK. +** If there is not already a read-transaction open on schema S when +** this function is called, one is opened automatically. +** +** The following must be true for this function to succeed. If any of +** the following statements are false when sqlite3_snapshot_get() is +** called, SQLITE_ERROR is returned. The final value of *P is undefined +** in this case. +** +**
      +**
    • The database handle must not be in [autocommit mode]. +** +**
    • Schema S of [database connection] D must be a [WAL mode] database. +** +**
    • There must not be a write transaction open on schema S of database +** connection D. +** +**
    • One or more transactions must have been written to the current wal +** file since it was created on disk (by any connection). This means +** that a snapshot cannot be taken on a wal mode database with no wal +** file immediately after it is first opened. At least one transaction +** must be written to it first. +**
    +** +** This function may also return SQLITE_NOMEM. If it is called with the +** database handle in autocommit mode but fails for some other reason, +** whether or not a read transaction is opened on schema S is undefined. +** +** The [sqlite3_snapshot] object returned from a successful call to +** [sqlite3_snapshot_get()] must be freed using [sqlite3_snapshot_free()] +** to avoid a memory leak. +** +** The [sqlite3_snapshot_get()] interface is only available when the +** [SQLITE_ENABLE_SNAPSHOT] compile-time option is used. +*/ +SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_snapshot_get( + sqlite3 *db, + const char *zSchema, + sqlite3_snapshot **ppSnapshot +); + +/* +** CAPI3REF: Start a read transaction on an historical snapshot +** METHOD: sqlite3_snapshot +** +** ^The [sqlite3_snapshot_open(D,S,P)] interface either starts a new read +** transaction or upgrades an existing one for schema S of +** [database connection] D such that the read transaction refers to +** historical [snapshot] P, rather than the most recent change to the +** database. ^The [sqlite3_snapshot_open()] interface returns SQLITE_OK +** on success or an appropriate [error code] if it fails. +** +** ^In order to succeed, the database connection must not be in +** [autocommit mode] when [sqlite3_snapshot_open(D,S,P)] is called. If there +** is already a read transaction open on schema S, then the database handle +** must have no active statements (SELECT statements that have been passed +** to sqlite3_step() but not sqlite3_reset() or sqlite3_finalize()). +** SQLITE_ERROR is returned if either of these conditions is violated, or +** if schema S does not exist, or if the snapshot object is invalid. +** +** ^A call to sqlite3_snapshot_open() will fail to open if the specified +** snapshot has been overwritten by a [checkpoint]. In this case +** SQLITE_ERROR_SNAPSHOT is returned. +** +** If there is already a read transaction open when this function is +** invoked, then the same read transaction remains open (on the same +** database snapshot) if SQLITE_ERROR, SQLITE_BUSY or SQLITE_ERROR_SNAPSHOT +** is returned. If another error code - for example SQLITE_PROTOCOL or an +** SQLITE_IOERR error code - is returned, then the final state of the +** read transaction is undefined. If SQLITE_OK is returned, then the +** read transaction is now open on database snapshot P. +** +** ^(A call to [sqlite3_snapshot_open(D,S,P)] will fail if the +** database connection D does not know that the database file for +** schema S is in [WAL mode]. A database connection might not know +** that the database file is in [WAL mode] if there has been no prior +** I/O on that database connection, or if the database entered [WAL mode] +** after the most recent I/O on the database connection.)^ +** (Hint: Run "[PRAGMA application_id]" against a newly opened +** database connection in order to make it ready to use snapshots.) +** +** The [sqlite3_snapshot_open()] interface is only available when the +** [SQLITE_ENABLE_SNAPSHOT] compile-time option is used. +*/ +SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_snapshot_open( + sqlite3 *db, + const char *zSchema, + sqlite3_snapshot *pSnapshot +); + +/* +** CAPI3REF: Destroy a snapshot +** DESTRUCTOR: sqlite3_snapshot +** +** ^The [sqlite3_snapshot_free(P)] interface destroys [sqlite3_snapshot] P. +** The application must eventually free every [sqlite3_snapshot] object +** using this routine to avoid a memory leak. +** +** The [sqlite3_snapshot_free()] interface is only available when the +** [SQLITE_ENABLE_SNAPSHOT] compile-time option is used. +*/ +SQLITE_API SQLITE_EXPERIMENTAL void sqlite3_snapshot_free(sqlite3_snapshot*); + +/* +** CAPI3REF: Compare the ages of two snapshot handles. +** METHOD: sqlite3_snapshot +** +** The sqlite3_snapshot_cmp(P1, P2) interface is used to compare the ages +** of two valid snapshot handles. +** +** If the two snapshot handles are not associated with the same database +** file, the result of the comparison is undefined. +** +** Additionally, the result of the comparison is only valid if both of the +** snapshot handles were obtained by calling sqlite3_snapshot_get() since the +** last time the wal file was deleted. The wal file is deleted when the +** database is changed back to rollback mode or when the number of database +** clients drops to zero. If either snapshot handle was obtained before the +** wal file was last deleted, the value returned by this function +** is undefined. +** +** Otherwise, this API returns a negative value if P1 refers to an older +** snapshot than P2, zero if the two handles refer to the same database +** snapshot, and a positive value if P1 is a newer snapshot than P2. +** +** This interface is only available if SQLite is compiled with the +** [SQLITE_ENABLE_SNAPSHOT] option. +*/ +SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_snapshot_cmp( + sqlite3_snapshot *p1, + sqlite3_snapshot *p2 +); + +/* +** CAPI3REF: Recover snapshots from a wal file +** METHOD: sqlite3_snapshot +** +** If a [WAL file] remains on disk after all database connections close +** (either through the use of the [SQLITE_FCNTL_PERSIST_WAL] [file control] +** or because the last process to have the database opened exited without +** calling [sqlite3_close()]) and a new connection is subsequently opened +** on that database and [WAL file], the [sqlite3_snapshot_open()] interface +** will only be able to open the last transaction added to the WAL file +** even though the WAL file contains other valid transactions. +** +** This function attempts to scan the WAL file associated with database zDb +** of database handle db and make all valid snapshots available to +** sqlite3_snapshot_open(). It is an error if there is already a read +** transaction open on the database, or if the database is not a WAL mode +** database. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +** +** This interface is only available if SQLite is compiled with the +** [SQLITE_ENABLE_SNAPSHOT] option. +*/ +SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_snapshot_recover(sqlite3 *db, const char *zDb); + +/* +** CAPI3REF: Serialize a database +** +** The sqlite3_serialize(D,S,P,F) interface returns a pointer to memory +** that is a serialization of the S database on [database connection] D. +** If P is not a NULL pointer, then the size of the database in bytes +** is written into *P. +** +** For an ordinary on-disk database file, the serialization is just a +** copy of the disk file. For an in-memory database or a "TEMP" database, +** the serialization is the same sequence of bytes which would be written +** to disk if that database where backed up to disk. +** +** The usual case is that sqlite3_serialize() copies the serialization of +** the database into memory obtained from [sqlite3_malloc64()] and returns +** a pointer to that memory. The caller is responsible for freeing the +** returned value to avoid a memory leak. However, if the F argument +** contains the SQLITE_SERIALIZE_NOCOPY bit, then no memory allocations +** are made, and the sqlite3_serialize() function will return a pointer +** to the contiguous memory representation of the database that SQLite +** is currently using for that database, or NULL if the no such contiguous +** memory representation of the database exists. A contiguous memory +** representation of the database will usually only exist if there has +** been a prior call to [sqlite3_deserialize(D,S,...)] with the same +** values of D and S. +** The size of the database is written into *P even if the +** SQLITE_SERIALIZE_NOCOPY bit is set but no contiguous copy +** of the database exists. +** +** A call to sqlite3_serialize(D,S,P,F) might return NULL even if the +** SQLITE_SERIALIZE_NOCOPY bit is omitted from argument F if a memory +** allocation error occurs. +** +** This interface is omitted if SQLite is compiled with the +** [SQLITE_OMIT_DESERIALIZE] option. +*/ +SQLITE_API unsigned char *sqlite3_serialize( + sqlite3 *db, /* The database connection */ + const char *zSchema, /* Which DB to serialize. ex: "main", "temp", ... */ + sqlite3_int64 *piSize, /* Write size of the DB here, if not NULL */ + unsigned int mFlags /* Zero or more SQLITE_SERIALIZE_* flags */ +); + +/* +** CAPI3REF: Flags for sqlite3_serialize +** +** Zero or more of the following constants can be OR-ed together for +** the F argument to [sqlite3_serialize(D,S,P,F)]. +** +** SQLITE_SERIALIZE_NOCOPY means that [sqlite3_serialize()] will return +** a pointer to contiguous in-memory database that it is currently using, +** without making a copy of the database. If SQLite is not currently using +** a contiguous in-memory database, then this option causes +** [sqlite3_serialize()] to return a NULL pointer. SQLite will only be +** using a contiguous in-memory database if it has been initialized by a +** prior call to [sqlite3_deserialize()]. +*/ +#define SQLITE_SERIALIZE_NOCOPY 0x001 /* Do no memory allocations */ + +/* +** CAPI3REF: Deserialize a database +** +** The sqlite3_deserialize(D,S,P,N,M,F) interface causes the +** [database connection] D to disconnect from database S and then +** reopen S as an in-memory database based on the serialization contained +** in P. The serialized database P is N bytes in size. M is the size of +** the buffer P, which might be larger than N. If M is larger than N, and +** the SQLITE_DESERIALIZE_READONLY bit is not set in F, then SQLite is +** permitted to add content to the in-memory database as long as the total +** size does not exceed M bytes. +** +** If the SQLITE_DESERIALIZE_FREEONCLOSE bit is set in F, then SQLite will +** invoke sqlite3_free() on the serialization buffer when the database +** connection closes. If the SQLITE_DESERIALIZE_RESIZEABLE bit is set, then +** SQLite will try to increase the buffer size using sqlite3_realloc64() +** if writes on the database cause it to grow larger than M bytes. +** +** The sqlite3_deserialize() interface will fail with SQLITE_BUSY if the +** database is currently in a read transaction or is involved in a backup +** operation. +** +** It is not possible to deserialized into the TEMP database. If the +** S argument to sqlite3_deserialize(D,S,P,N,M,F) is "temp" then the +** function returns SQLITE_ERROR. +** +** If sqlite3_deserialize(D,S,P,N,M,F) fails for any reason and if the +** SQLITE_DESERIALIZE_FREEONCLOSE bit is set in argument F, then +** [sqlite3_free()] is invoked on argument P prior to returning. +** +** This interface is omitted if SQLite is compiled with the +** [SQLITE_OMIT_DESERIALIZE] option. +*/ +SQLITE_API int sqlite3_deserialize( + sqlite3 *db, /* The database connection */ + const char *zSchema, /* Which DB to reopen with the deserialization */ + unsigned char *pData, /* The serialized database content */ + sqlite3_int64 szDb, /* Number bytes in the deserialization */ + sqlite3_int64 szBuf, /* Total size of buffer pData[] */ + unsigned mFlags /* Zero or more SQLITE_DESERIALIZE_* flags */ +); + +/* +** CAPI3REF: Flags for sqlite3_deserialize() +** +** The following are allowed values for 6th argument (the F argument) to +** the [sqlite3_deserialize(D,S,P,N,M,F)] interface. +** +** The SQLITE_DESERIALIZE_FREEONCLOSE means that the database serialization +** in the P argument is held in memory obtained from [sqlite3_malloc64()] +** and that SQLite should take ownership of this memory and automatically +** free it when it has finished using it. Without this flag, the caller +** is responsible for freeing any dynamically allocated memory. +** +** The SQLITE_DESERIALIZE_RESIZEABLE flag means that SQLite is allowed to +** grow the size of the database using calls to [sqlite3_realloc64()]. This +** flag should only be used if SQLITE_DESERIALIZE_FREEONCLOSE is also used. +** Without this flag, the deserialized database cannot increase in size beyond +** the number of bytes specified by the M parameter. +** +** The SQLITE_DESERIALIZE_READONLY flag means that the deserialized database +** should be treated as read-only. +*/ +#define SQLITE_DESERIALIZE_FREEONCLOSE 1 /* Call sqlite3_free() on close */ +#define SQLITE_DESERIALIZE_RESIZEABLE 2 /* Resize using sqlite3_realloc64() */ +#define SQLITE_DESERIALIZE_READONLY 4 /* Database is read-only */ + +/* +** Undo the hack that converts floating point types to integer for +** builds on processors without floating point support. +*/ +#ifdef SQLITE_OMIT_FLOATING_POINT +# undef double +#endif + +#if 0 +} /* End of the 'extern "C"' block */ +#endif +#endif /* SQLITE3_H */ + +/******** Begin file sqlite3rtree.h *********/ +/* +** 2010 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +#ifndef _SQLITE3RTREE_H_ +#define _SQLITE3RTREE_H_ + + +#if 0 +extern "C" { +#endif + +typedef struct sqlite3_rtree_geometry sqlite3_rtree_geometry; +typedef struct sqlite3_rtree_query_info sqlite3_rtree_query_info; + +/* The double-precision datatype used by RTree depends on the +** SQLITE_RTREE_INT_ONLY compile-time option. +*/ +#ifdef SQLITE_RTREE_INT_ONLY + typedef sqlite3_int64 sqlite3_rtree_dbl; +#else + typedef double sqlite3_rtree_dbl; +#endif + +/* +** Register a geometry callback named zGeom that can be used as part of an +** R-Tree geometry query as follows: +** +** SELECT ... FROM WHERE MATCH $zGeom(... params ...) +*/ +SQLITE_API int sqlite3_rtree_geometry_callback( + sqlite3 *db, + const char *zGeom, + int (*xGeom)(sqlite3_rtree_geometry*, int, sqlite3_rtree_dbl*,int*), + void *pContext +); + + +/* +** A pointer to a structure of the following type is passed as the first +** argument to callbacks registered using rtree_geometry_callback(). +*/ +struct sqlite3_rtree_geometry { + void *pContext; /* Copy of pContext passed to s_r_g_c() */ + int nParam; /* Size of array aParam[] */ + sqlite3_rtree_dbl *aParam; /* Parameters passed to SQL geom function */ + void *pUser; /* Callback implementation user data */ + void (*xDelUser)(void *); /* Called by SQLite to clean up pUser */ +}; + +/* +** Register a 2nd-generation geometry callback named zScore that can be +** used as part of an R-Tree geometry query as follows: +** +** SELECT ... FROM WHERE MATCH $zQueryFunc(... params ...) +*/ +SQLITE_API int sqlite3_rtree_query_callback( + sqlite3 *db, + const char *zQueryFunc, + int (*xQueryFunc)(sqlite3_rtree_query_info*), + void *pContext, + void (*xDestructor)(void*) +); + + +/* +** A pointer to a structure of the following type is passed as the +** argument to scored geometry callback registered using +** sqlite3_rtree_query_callback(). +** +** Note that the first 5 fields of this structure are identical to +** sqlite3_rtree_geometry. This structure is a subclass of +** sqlite3_rtree_geometry. +*/ +struct sqlite3_rtree_query_info { + void *pContext; /* pContext from when function registered */ + int nParam; /* Number of function parameters */ + sqlite3_rtree_dbl *aParam; /* value of function parameters */ + void *pUser; /* callback can use this, if desired */ + void (*xDelUser)(void*); /* function to free pUser */ + sqlite3_rtree_dbl *aCoord; /* Coordinates of node or entry to check */ + unsigned int *anQueue; /* Number of pending entries in the queue */ + int nCoord; /* Number of coordinates */ + int iLevel; /* Level of current node or entry */ + int mxLevel; /* The largest iLevel value in the tree */ + sqlite3_int64 iRowid; /* Rowid for current entry */ + sqlite3_rtree_dbl rParentScore; /* Score of parent node */ + int eParentWithin; /* Visibility of parent node */ + int eWithin; /* OUT: Visibility */ + sqlite3_rtree_dbl rScore; /* OUT: Write the score here */ + /* The following fields are only available in 3.8.11 and later */ + sqlite3_value **apSqlParam; /* Original SQL values of parameters */ +}; + +/* +** Allowed values for sqlite3_rtree_query.eWithin and .eParentWithin. +*/ +#define NOT_WITHIN 0 /* Object completely outside of query region */ +#define PARTLY_WITHIN 1 /* Object partially overlaps query region */ +#define FULLY_WITHIN 2 /* Object fully contained within query region */ + + +#if 0 +} /* end of the 'extern "C"' block */ +#endif + +#endif /* ifndef _SQLITE3RTREE_H_ */ + +/******** End of sqlite3rtree.h *********/ +/******** Begin file sqlite3session.h *********/ + +#if !defined(__SQLITESESSION_H_) && defined(SQLITE_ENABLE_SESSION) +#define __SQLITESESSION_H_ 1 + +/* +** Make sure we can call this stuff from C++. +*/ +#if 0 +extern "C" { +#endif + + +/* +** CAPI3REF: Session Object Handle +** +** An instance of this object is a [session] that can be used to +** record changes to a database. +*/ +typedef struct sqlite3_session sqlite3_session; + +/* +** CAPI3REF: Changeset Iterator Handle +** +** An instance of this object acts as a cursor for iterating +** over the elements of a [changeset] or [patchset]. +*/ +typedef struct sqlite3_changeset_iter sqlite3_changeset_iter; + +/* +** CAPI3REF: Create A New Session Object +** CONSTRUCTOR: sqlite3_session +** +** Create a new session object attached to database handle db. If successful, +** a pointer to the new object is written to *ppSession and SQLITE_OK is +** returned. If an error occurs, *ppSession is set to NULL and an SQLite +** error code (e.g. SQLITE_NOMEM) is returned. +** +** It is possible to create multiple session objects attached to a single +** database handle. +** +** Session objects created using this function should be deleted using the +** [sqlite3session_delete()] function before the database handle that they +** are attached to is itself closed. If the database handle is closed before +** the session object is deleted, then the results of calling any session +** module function, including [sqlite3session_delete()] on the session object +** are undefined. +** +** Because the session module uses the [sqlite3_preupdate_hook()] API, it +** is not possible for an application to register a pre-update hook on a +** database handle that has one or more session objects attached. Nor is +** it possible to create a session object attached to a database handle for +** which a pre-update hook is already defined. The results of attempting +** either of these things are undefined. +** +** The session object will be used to create changesets for tables in +** database zDb, where zDb is either "main", or "temp", or the name of an +** attached database. It is not an error if database zDb is not attached +** to the database when the session object is created. +*/ +SQLITE_API int sqlite3session_create( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of db (e.g. "main") */ + sqlite3_session **ppSession /* OUT: New session object */ +); + +/* +** CAPI3REF: Delete A Session Object +** DESTRUCTOR: sqlite3_session +** +** Delete a session object previously allocated using +** [sqlite3session_create()]. Once a session object has been deleted, the +** results of attempting to use pSession with any other session module +** function are undefined. +** +** Session objects must be deleted before the database handle to which they +** are attached is closed. Refer to the documentation for +** [sqlite3session_create()] for details. +*/ +SQLITE_API void sqlite3session_delete(sqlite3_session *pSession); + +/* +** CAPIREF: Conigure a Session Object +** METHOD: sqlite3_session +** +** This method is used to configure a session object after it has been +** created. At present the only valid value for the second parameter is +** [SQLITE_SESSION_OBJCONFIG_SIZE]. +** +** Arguments for sqlite3session_object_config() +** +** The following values may passed as the the 4th parameter to +** sqlite3session_object_config(). +** +**
    SQLITE_SESSION_OBJCONFIG_SIZE
    +** This option is used to set, clear or query the flag that enables +** the [sqlite3session_changeset_size()] API. Because it imposes some +** computational overhead, this API is disabled by default. Argument +** pArg must point to a value of type (int). If the value is initially +** 0, then the sqlite3session_changeset_size() API is disabled. If it +** is greater than 0, then the same API is enabled. Or, if the initial +** value is less than zero, no change is made. In all cases the (int) +** variable is set to 1 if the sqlite3session_changeset_size() API is +** enabled following the current call, or 0 otherwise. +** +** It is an error (SQLITE_MISUSE) to attempt to modify this setting after +** the first table has been attached to the session object. +*/ +SQLITE_API int sqlite3session_object_config(sqlite3_session*, int op, void *pArg); + +/* +*/ +#define SQLITE_SESSION_OBJCONFIG_SIZE 1 + +/* +** CAPI3REF: Enable Or Disable A Session Object +** METHOD: sqlite3_session +** +** Enable or disable the recording of changes by a session object. When +** enabled, a session object records changes made to the database. When +** disabled - it does not. A newly created session object is enabled. +** Refer to the documentation for [sqlite3session_changeset()] for further +** details regarding how enabling and disabling a session object affects +** the eventual changesets. +** +** Passing zero to this function disables the session. Passing a value +** greater than zero enables it. Passing a value less than zero is a +** no-op, and may be used to query the current state of the session. +** +** The return value indicates the final state of the session object: 0 if +** the session is disabled, or 1 if it is enabled. +*/ +SQLITE_API int sqlite3session_enable(sqlite3_session *pSession, int bEnable); + +/* +** CAPI3REF: Set Or Clear the Indirect Change Flag +** METHOD: sqlite3_session +** +** Each change recorded by a session object is marked as either direct or +** indirect. A change is marked as indirect if either: +** +**
      +**
    • The session object "indirect" flag is set when the change is +** made, or +**
    • The change is made by an SQL trigger or foreign key action +** instead of directly as a result of a users SQL statement. +**
    +** +** If a single row is affected by more than one operation within a session, +** then the change is considered indirect if all operations meet the criteria +** for an indirect change above, or direct otherwise. +** +** This function is used to set, clear or query the session object indirect +** flag. If the second argument passed to this function is zero, then the +** indirect flag is cleared. If it is greater than zero, the indirect flag +** is set. Passing a value less than zero does not modify the current value +** of the indirect flag, and may be used to query the current state of the +** indirect flag for the specified session object. +** +** The return value indicates the final state of the indirect flag: 0 if +** it is clear, or 1 if it is set. +*/ +SQLITE_API int sqlite3session_indirect(sqlite3_session *pSession, int bIndirect); + +/* +** CAPI3REF: Attach A Table To A Session Object +** METHOD: sqlite3_session +** +** If argument zTab is not NULL, then it is the name of a table to attach +** to the session object passed as the first argument. All subsequent changes +** made to the table while the session object is enabled will be recorded. See +** documentation for [sqlite3session_changeset()] for further details. +** +** Or, if argument zTab is NULL, then changes are recorded for all tables +** in the database. If additional tables are added to the database (by +** executing "CREATE TABLE" statements) after this call is made, changes for +** the new tables are also recorded. +** +** Changes can only be recorded for tables that have a PRIMARY KEY explicitly +** defined as part of their CREATE TABLE statement. It does not matter if the +** PRIMARY KEY is an "INTEGER PRIMARY KEY" (rowid alias) or not. The PRIMARY +** KEY may consist of a single column, or may be a composite key. +** +** It is not an error if the named table does not exist in the database. Nor +** is it an error if the named table does not have a PRIMARY KEY. However, +** no changes will be recorded in either of these scenarios. +** +** Changes are not recorded for individual rows that have NULL values stored +** in one or more of their PRIMARY KEY columns. +** +** SQLITE_OK is returned if the call completes without error. Or, if an error +** occurs, an SQLite error code (e.g. SQLITE_NOMEM) is returned. +** +**

    Special sqlite_stat1 Handling

    +** +** As of SQLite version 3.22.0, the "sqlite_stat1" table is an exception to +** some of the rules above. In SQLite, the schema of sqlite_stat1 is: +**
    +**        CREATE TABLE sqlite_stat1(tbl,idx,stat)
    +**  
    +** +** Even though sqlite_stat1 does not have a PRIMARY KEY, changes are +** recorded for it as if the PRIMARY KEY is (tbl,idx). Additionally, changes +** are recorded for rows for which (idx IS NULL) is true. However, for such +** rows a zero-length blob (SQL value X'') is stored in the changeset or +** patchset instead of a NULL value. This allows such changesets to be +** manipulated by legacy implementations of sqlite3changeset_invert(), +** concat() and similar. +** +** The sqlite3changeset_apply() function automatically converts the +** zero-length blob back to a NULL value when updating the sqlite_stat1 +** table. However, if the application calls sqlite3changeset_new(), +** sqlite3changeset_old() or sqlite3changeset_conflict on a changeset +** iterator directly (including on a changeset iterator passed to a +** conflict-handler callback) then the X'' value is returned. The application +** must translate X'' to NULL itself if required. +** +** Legacy (older than 3.22.0) versions of the sessions module cannot capture +** changes made to the sqlite_stat1 table. Legacy versions of the +** sqlite3changeset_apply() function silently ignore any modifications to the +** sqlite_stat1 table that are part of a changeset or patchset. +*/ +SQLITE_API int sqlite3session_attach( + sqlite3_session *pSession, /* Session object */ + const char *zTab /* Table name */ +); + +/* +** CAPI3REF: Set a table filter on a Session Object. +** METHOD: sqlite3_session +** +** The second argument (xFilter) is the "filter callback". For changes to rows +** in tables that are not attached to the Session object, the filter is called +** to determine whether changes to the table's rows should be tracked or not. +** If xFilter returns 0, changes are not tracked. Note that once a table is +** attached, xFilter will not be called again. +*/ +SQLITE_API void sqlite3session_table_filter( + sqlite3_session *pSession, /* Session object */ + int(*xFilter)( + void *pCtx, /* Copy of third arg to _filter_table() */ + const char *zTab /* Table name */ + ), + void *pCtx /* First argument passed to xFilter */ +); + +/* +** CAPI3REF: Generate A Changeset From A Session Object +** METHOD: sqlite3_session +** +** Obtain a changeset containing changes to the tables attached to the +** session object passed as the first argument. If successful, +** set *ppChangeset to point to a buffer containing the changeset +** and *pnChangeset to the size of the changeset in bytes before returning +** SQLITE_OK. If an error occurs, set both *ppChangeset and *pnChangeset to +** zero and return an SQLite error code. +** +** A changeset consists of zero or more INSERT, UPDATE and/or DELETE changes, +** each representing a change to a single row of an attached table. An INSERT +** change contains the values of each field of a new database row. A DELETE +** contains the original values of each field of a deleted database row. An +** UPDATE change contains the original values of each field of an updated +** database row along with the updated values for each updated non-primary-key +** column. It is not possible for an UPDATE change to represent a change that +** modifies the values of primary key columns. If such a change is made, it +** is represented in a changeset as a DELETE followed by an INSERT. +** +** Changes are not recorded for rows that have NULL values stored in one or +** more of their PRIMARY KEY columns. If such a row is inserted or deleted, +** no corresponding change is present in the changesets returned by this +** function. If an existing row with one or more NULL values stored in +** PRIMARY KEY columns is updated so that all PRIMARY KEY columns are non-NULL, +** only an INSERT is appears in the changeset. Similarly, if an existing row +** with non-NULL PRIMARY KEY values is updated so that one or more of its +** PRIMARY KEY columns are set to NULL, the resulting changeset contains a +** DELETE change only. +** +** The contents of a changeset may be traversed using an iterator created +** using the [sqlite3changeset_start()] API. A changeset may be applied to +** a database with a compatible schema using the [sqlite3changeset_apply()] +** API. +** +** Within a changeset generated by this function, all changes related to a +** single table are grouped together. In other words, when iterating through +** a changeset or when applying a changeset to a database, all changes related +** to a single table are processed before moving on to the next table. Tables +** are sorted in the same order in which they were attached (or auto-attached) +** to the sqlite3_session object. The order in which the changes related to +** a single table are stored is undefined. +** +** Following a successful call to this function, it is the responsibility of +** the caller to eventually free the buffer that *ppChangeset points to using +** [sqlite3_free()]. +** +**

    Changeset Generation

    +** +** Once a table has been attached to a session object, the session object +** records the primary key values of all new rows inserted into the table. +** It also records the original primary key and other column values of any +** deleted or updated rows. For each unique primary key value, data is only +** recorded once - the first time a row with said primary key is inserted, +** updated or deleted in the lifetime of the session. +** +** There is one exception to the previous paragraph: when a row is inserted, +** updated or deleted, if one or more of its primary key columns contain a +** NULL value, no record of the change is made. +** +** The session object therefore accumulates two types of records - those +** that consist of primary key values only (created when the user inserts +** a new record) and those that consist of the primary key values and the +** original values of other table columns (created when the users deletes +** or updates a record). +** +** When this function is called, the requested changeset is created using +** both the accumulated records and the current contents of the database +** file. Specifically: +** +**
      +**
    • For each record generated by an insert, the database is queried +** for a row with a matching primary key. If one is found, an INSERT +** change is added to the changeset. If no such row is found, no change +** is added to the changeset. +** +**
    • For each record generated by an update or delete, the database is +** queried for a row with a matching primary key. If such a row is +** found and one or more of the non-primary key fields have been +** modified from their original values, an UPDATE change is added to +** the changeset. Or, if no such row is found in the table, a DELETE +** change is added to the changeset. If there is a row with a matching +** primary key in the database, but all fields contain their original +** values, no change is added to the changeset. +**
    +** +** This means, amongst other things, that if a row is inserted and then later +** deleted while a session object is active, neither the insert nor the delete +** will be present in the changeset. Or if a row is deleted and then later a +** row with the same primary key values inserted while a session object is +** active, the resulting changeset will contain an UPDATE change instead of +** a DELETE and an INSERT. +** +** When a session object is disabled (see the [sqlite3session_enable()] API), +** it does not accumulate records when rows are inserted, updated or deleted. +** This may appear to have some counter-intuitive effects if a single row +** is written to more than once during a session. For example, if a row +** is inserted while a session object is enabled, then later deleted while +** the same session object is disabled, no INSERT record will appear in the +** changeset, even though the delete took place while the session was disabled. +** Or, if one field of a row is updated while a session is disabled, and +** another field of the same row is updated while the session is enabled, the +** resulting changeset will contain an UPDATE change that updates both fields. +*/ +SQLITE_API int sqlite3session_changeset( + sqlite3_session *pSession, /* Session object */ + int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ + void **ppChangeset /* OUT: Buffer containing changeset */ +); + +/* +** CAPI3REF: Return An Upper-limit For The Size Of The Changeset +** METHOD: sqlite3_session +** +** By default, this function always returns 0. For it to return +** a useful result, the sqlite3_session object must have been configured +** to enable this API using sqlite3session_object_config() with the +** SQLITE_SESSION_OBJCONFIG_SIZE verb. +** +** When enabled, this function returns an upper limit, in bytes, for the size +** of the changeset that might be produced if sqlite3session_changeset() were +** called. The final changeset size might be equal to or smaller than the +** size in bytes returned by this function. +*/ +SQLITE_API sqlite3_int64 sqlite3session_changeset_size(sqlite3_session *pSession); + +/* +** CAPI3REF: Load The Difference Between Tables Into A Session +** METHOD: sqlite3_session +** +** If it is not already attached to the session object passed as the first +** argument, this function attaches table zTbl in the same manner as the +** [sqlite3session_attach()] function. If zTbl does not exist, or if it +** does not have a primary key, this function is a no-op (but does not return +** an error). +** +** Argument zFromDb must be the name of a database ("main", "temp" etc.) +** attached to the same database handle as the session object that contains +** a table compatible with the table attached to the session by this function. +** A table is considered compatible if it: +** +**
      +**
    • Has the same name, +**
    • Has the same set of columns declared in the same order, and +**
    • Has the same PRIMARY KEY definition. +**
    +** +** If the tables are not compatible, SQLITE_SCHEMA is returned. If the tables +** are compatible but do not have any PRIMARY KEY columns, it is not an error +** but no changes are added to the session object. As with other session +** APIs, tables without PRIMARY KEYs are simply ignored. +** +** This function adds a set of changes to the session object that could be +** used to update the table in database zFrom (call this the "from-table") +** so that its content is the same as the table attached to the session +** object (call this the "to-table"). Specifically: +** +**
      +**
    • For each row (primary key) that exists in the to-table but not in +** the from-table, an INSERT record is added to the session object. +** +**
    • For each row (primary key) that exists in the to-table but not in +** the from-table, a DELETE record is added to the session object. +** +**
    • For each row (primary key) that exists in both tables, but features +** different non-PK values in each, an UPDATE record is added to the +** session. +**
    +** +** To clarify, if this function is called and then a changeset constructed +** using [sqlite3session_changeset()], then after applying that changeset to +** database zFrom the contents of the two compatible tables would be +** identical. +** +** It an error if database zFrom does not exist or does not contain the +** required compatible table. +** +** If the operation is successful, SQLITE_OK is returned. Otherwise, an SQLite +** error code. In this case, if argument pzErrMsg is not NULL, *pzErrMsg +** may be set to point to a buffer containing an English language error +** message. It is the responsibility of the caller to free this buffer using +** sqlite3_free(). +*/ +SQLITE_API int sqlite3session_diff( + sqlite3_session *pSession, + const char *zFromDb, + const char *zTbl, + char **pzErrMsg +); + + +/* +** CAPI3REF: Generate A Patchset From A Session Object +** METHOD: sqlite3_session +** +** The differences between a patchset and a changeset are that: +** +**
      +**
    • DELETE records consist of the primary key fields only. The +** original values of other fields are omitted. +**
    • The original values of any modified fields are omitted from +** UPDATE records. +**
    +** +** A patchset blob may be used with up to date versions of all +** sqlite3changeset_xxx API functions except for sqlite3changeset_invert(), +** which returns SQLITE_CORRUPT if it is passed a patchset. Similarly, +** attempting to use a patchset blob with old versions of the +** sqlite3changeset_xxx APIs also provokes an SQLITE_CORRUPT error. +** +** Because the non-primary key "old.*" fields are omitted, no +** SQLITE_CHANGESET_DATA conflicts can be detected or reported if a patchset +** is passed to the sqlite3changeset_apply() API. Other conflict types work +** in the same way as for changesets. +** +** Changes within a patchset are ordered in the same way as for changesets +** generated by the sqlite3session_changeset() function (i.e. all changes for +** a single table are grouped together, tables appear in the order in which +** they were attached to the session object). +*/ +SQLITE_API int sqlite3session_patchset( + sqlite3_session *pSession, /* Session object */ + int *pnPatchset, /* OUT: Size of buffer at *ppPatchset */ + void **ppPatchset /* OUT: Buffer containing patchset */ +); + +/* +** CAPI3REF: Test if a changeset has recorded any changes. +** +** Return non-zero if no changes to attached tables have been recorded by +** the session object passed as the first argument. Otherwise, if one or +** more changes have been recorded, return zero. +** +** Even if this function returns zero, it is possible that calling +** [sqlite3session_changeset()] on the session handle may still return a +** changeset that contains no changes. This can happen when a row in +** an attached table is modified and then later on the original values +** are restored. However, if this function returns non-zero, then it is +** guaranteed that a call to sqlite3session_changeset() will return a +** changeset containing zero changes. +*/ +SQLITE_API int sqlite3session_isempty(sqlite3_session *pSession); + +/* +** CAPI3REF: Query for the amount of heap memory used by a session object. +** +** This API returns the total amount of heap memory in bytes currently +** used by the session object passed as the only argument. +*/ +SQLITE_API sqlite3_int64 sqlite3session_memory_used(sqlite3_session *pSession); + +/* +** CAPI3REF: Create An Iterator To Traverse A Changeset +** CONSTRUCTOR: sqlite3_changeset_iter +** +** Create an iterator used to iterate through the contents of a changeset. +** If successful, *pp is set to point to the iterator handle and SQLITE_OK +** is returned. Otherwise, if an error occurs, *pp is set to zero and an +** SQLite error code is returned. +** +** The following functions can be used to advance and query a changeset +** iterator created by this function: +** +**
      +**
    • [sqlite3changeset_next()] +**
    • [sqlite3changeset_op()] +**
    • [sqlite3changeset_new()] +**
    • [sqlite3changeset_old()] +**
    +** +** It is the responsibility of the caller to eventually destroy the iterator +** by passing it to [sqlite3changeset_finalize()]. The buffer containing the +** changeset (pChangeset) must remain valid until after the iterator is +** destroyed. +** +** Assuming the changeset blob was created by one of the +** [sqlite3session_changeset()], [sqlite3changeset_concat()] or +** [sqlite3changeset_invert()] functions, all changes within the changeset +** that apply to a single table are grouped together. This means that when +** an application iterates through a changeset using an iterator created by +** this function, all changes that relate to a single table are visited +** consecutively. There is no chance that the iterator will visit a change +** the applies to table X, then one for table Y, and then later on visit +** another change for table X. +** +** The behavior of sqlite3changeset_start_v2() and its streaming equivalent +** may be modified by passing a combination of +** [SQLITE_CHANGESETSTART_INVERT | supported flags] as the 4th parameter. +** +** Note that the sqlite3changeset_start_v2() API is still experimental +** and therefore subject to change. +*/ +SQLITE_API int sqlite3changeset_start( + sqlite3_changeset_iter **pp, /* OUT: New changeset iterator handle */ + int nChangeset, /* Size of changeset blob in bytes */ + void *pChangeset /* Pointer to blob containing changeset */ +); +SQLITE_API int sqlite3changeset_start_v2( + sqlite3_changeset_iter **pp, /* OUT: New changeset iterator handle */ + int nChangeset, /* Size of changeset blob in bytes */ + void *pChangeset, /* Pointer to blob containing changeset */ + int flags /* SESSION_CHANGESETSTART_* flags */ +); + +/* +** CAPI3REF: Flags for sqlite3changeset_start_v2 +** +** The following flags may passed via the 4th parameter to +** [sqlite3changeset_start_v2] and [sqlite3changeset_start_v2_strm]: +** +**
    SQLITE_CHANGESETAPPLY_INVERT
    +** Invert the changeset while iterating through it. This is equivalent to +** inverting a changeset using sqlite3changeset_invert() before applying it. +** It is an error to specify this flag with a patchset. +*/ +#define SQLITE_CHANGESETSTART_INVERT 0x0002 + + +/* +** CAPI3REF: Advance A Changeset Iterator +** METHOD: sqlite3_changeset_iter +** +** This function may only be used with iterators created by the function +** [sqlite3changeset_start()]. If it is called on an iterator passed to +** a conflict-handler callback by [sqlite3changeset_apply()], SQLITE_MISUSE +** is returned and the call has no effect. +** +** Immediately after an iterator is created by sqlite3changeset_start(), it +** does not point to any change in the changeset. Assuming the changeset +** is not empty, the first call to this function advances the iterator to +** point to the first change in the changeset. Each subsequent call advances +** the iterator to point to the next change in the changeset (if any). If +** no error occurs and the iterator points to a valid change after a call +** to sqlite3changeset_next() has advanced it, SQLITE_ROW is returned. +** Otherwise, if all changes in the changeset have already been visited, +** SQLITE_DONE is returned. +** +** If an error occurs, an SQLite error code is returned. Possible error +** codes include SQLITE_CORRUPT (if the changeset buffer is corrupt) or +** SQLITE_NOMEM. +*/ +SQLITE_API int sqlite3changeset_next(sqlite3_changeset_iter *pIter); + +/* +** CAPI3REF: Obtain The Current Operation From A Changeset Iterator +** METHOD: sqlite3_changeset_iter +** +** The pIter argument passed to this function may either be an iterator +** passed to a conflict-handler by [sqlite3changeset_apply()], or an iterator +** created by [sqlite3changeset_start()]. In the latter case, the most recent +** call to [sqlite3changeset_next()] must have returned [SQLITE_ROW]. If this +** is not the case, this function returns [SQLITE_MISUSE]. +** +** Arguments pOp, pnCol and pzTab may not be NULL. Upon return, three +** outputs are set through these pointers: +** +** *pOp is set to one of [SQLITE_INSERT], [SQLITE_DELETE] or [SQLITE_UPDATE], +** depending on the type of change that the iterator currently points to; +** +** *pnCol is set to the number of columns in the table affected by the change; and +** +** *pzTab is set to point to a nul-terminated utf-8 encoded string containing +** the name of the table affected by the current change. The buffer remains +** valid until either sqlite3changeset_next() is called on the iterator +** or until the conflict-handler function returns. +** +** If pbIndirect is not NULL, then *pbIndirect is set to true (1) if the change +** is an indirect change, or false (0) otherwise. See the documentation for +** [sqlite3session_indirect()] for a description of direct and indirect +** changes. +** +** If no error occurs, SQLITE_OK is returned. If an error does occur, an +** SQLite error code is returned. The values of the output variables may not +** be trusted in this case. +*/ +SQLITE_API int sqlite3changeset_op( + sqlite3_changeset_iter *pIter, /* Iterator object */ + const char **pzTab, /* OUT: Pointer to table name */ + int *pnCol, /* OUT: Number of columns in table */ + int *pOp, /* OUT: SQLITE_INSERT, DELETE or UPDATE */ + int *pbIndirect /* OUT: True for an 'indirect' change */ +); + +/* +** CAPI3REF: Obtain The Primary Key Definition Of A Table +** METHOD: sqlite3_changeset_iter +** +** For each modified table, a changeset includes the following: +** +**
      +**
    • The number of columns in the table, and +**
    • Which of those columns make up the tables PRIMARY KEY. +**
    +** +** This function is used to find which columns comprise the PRIMARY KEY of +** the table modified by the change that iterator pIter currently points to. +** If successful, *pabPK is set to point to an array of nCol entries, where +** nCol is the number of columns in the table. Elements of *pabPK are set to +** 0x01 if the corresponding column is part of the tables primary key, or +** 0x00 if it is not. +** +** If argument pnCol is not NULL, then *pnCol is set to the number of columns +** in the table. +** +** If this function is called when the iterator does not point to a valid +** entry, SQLITE_MISUSE is returned and the output variables zeroed. Otherwise, +** SQLITE_OK is returned and the output variables populated as described +** above. +*/ +SQLITE_API int sqlite3changeset_pk( + sqlite3_changeset_iter *pIter, /* Iterator object */ + unsigned char **pabPK, /* OUT: Array of boolean - true for PK cols */ + int *pnCol /* OUT: Number of entries in output array */ +); + +/* +** CAPI3REF: Obtain old.* Values From A Changeset Iterator +** METHOD: sqlite3_changeset_iter +** +** The pIter argument passed to this function may either be an iterator +** passed to a conflict-handler by [sqlite3changeset_apply()], or an iterator +** created by [sqlite3changeset_start()]. In the latter case, the most recent +** call to [sqlite3changeset_next()] must have returned SQLITE_ROW. +** Furthermore, it may only be called if the type of change that the iterator +** currently points to is either [SQLITE_DELETE] or [SQLITE_UPDATE]. Otherwise, +** this function returns [SQLITE_MISUSE] and sets *ppValue to NULL. +** +** Argument iVal must be greater than or equal to 0, and less than the number +** of columns in the table affected by the current change. Otherwise, +** [SQLITE_RANGE] is returned and *ppValue is set to NULL. +** +** If successful, this function sets *ppValue to point to a protected +** sqlite3_value object containing the iVal'th value from the vector of +** original row values stored as part of the UPDATE or DELETE change and +** returns SQLITE_OK. The name of the function comes from the fact that this +** is similar to the "old.*" columns available to update or delete triggers. +** +** If some other error occurs (e.g. an OOM condition), an SQLite error code +** is returned and *ppValue is set to NULL. +*/ +SQLITE_API int sqlite3changeset_old( + sqlite3_changeset_iter *pIter, /* Changeset iterator */ + int iVal, /* Column number */ + sqlite3_value **ppValue /* OUT: Old value (or NULL pointer) */ +); + +/* +** CAPI3REF: Obtain new.* Values From A Changeset Iterator +** METHOD: sqlite3_changeset_iter +** +** The pIter argument passed to this function may either be an iterator +** passed to a conflict-handler by [sqlite3changeset_apply()], or an iterator +** created by [sqlite3changeset_start()]. In the latter case, the most recent +** call to [sqlite3changeset_next()] must have returned SQLITE_ROW. +** Furthermore, it may only be called if the type of change that the iterator +** currently points to is either [SQLITE_UPDATE] or [SQLITE_INSERT]. Otherwise, +** this function returns [SQLITE_MISUSE] and sets *ppValue to NULL. +** +** Argument iVal must be greater than or equal to 0, and less than the number +** of columns in the table affected by the current change. Otherwise, +** [SQLITE_RANGE] is returned and *ppValue is set to NULL. +** +** If successful, this function sets *ppValue to point to a protected +** sqlite3_value object containing the iVal'th value from the vector of +** new row values stored as part of the UPDATE or INSERT change and +** returns SQLITE_OK. If the change is an UPDATE and does not include +** a new value for the requested column, *ppValue is set to NULL and +** SQLITE_OK returned. The name of the function comes from the fact that +** this is similar to the "new.*" columns available to update or delete +** triggers. +** +** If some other error occurs (e.g. an OOM condition), an SQLite error code +** is returned and *ppValue is set to NULL. +*/ +SQLITE_API int sqlite3changeset_new( + sqlite3_changeset_iter *pIter, /* Changeset iterator */ + int iVal, /* Column number */ + sqlite3_value **ppValue /* OUT: New value (or NULL pointer) */ +); + +/* +** CAPI3REF: Obtain Conflicting Row Values From A Changeset Iterator +** METHOD: sqlite3_changeset_iter +** +** This function should only be used with iterator objects passed to a +** conflict-handler callback by [sqlite3changeset_apply()] with either +** [SQLITE_CHANGESET_DATA] or [SQLITE_CHANGESET_CONFLICT]. If this function +** is called on any other iterator, [SQLITE_MISUSE] is returned and *ppValue +** is set to NULL. +** +** Argument iVal must be greater than or equal to 0, and less than the number +** of columns in the table affected by the current change. Otherwise, +** [SQLITE_RANGE] is returned and *ppValue is set to NULL. +** +** If successful, this function sets *ppValue to point to a protected +** sqlite3_value object containing the iVal'th value from the +** "conflicting row" associated with the current conflict-handler callback +** and returns SQLITE_OK. +** +** If some other error occurs (e.g. an OOM condition), an SQLite error code +** is returned and *ppValue is set to NULL. +*/ +SQLITE_API int sqlite3changeset_conflict( + sqlite3_changeset_iter *pIter, /* Changeset iterator */ + int iVal, /* Column number */ + sqlite3_value **ppValue /* OUT: Value from conflicting row */ +); + +/* +** CAPI3REF: Determine The Number Of Foreign Key Constraint Violations +** METHOD: sqlite3_changeset_iter +** +** This function may only be called with an iterator passed to an +** SQLITE_CHANGESET_FOREIGN_KEY conflict handler callback. In this case +** it sets the output variable to the total number of known foreign key +** violations in the destination database and returns SQLITE_OK. +** +** In all other cases this function returns SQLITE_MISUSE. +*/ +SQLITE_API int sqlite3changeset_fk_conflicts( + sqlite3_changeset_iter *pIter, /* Changeset iterator */ + int *pnOut /* OUT: Number of FK violations */ +); + + +/* +** CAPI3REF: Finalize A Changeset Iterator +** METHOD: sqlite3_changeset_iter +** +** This function is used to finalize an iterator allocated with +** [sqlite3changeset_start()]. +** +** This function should only be called on iterators created using the +** [sqlite3changeset_start()] function. If an application calls this +** function with an iterator passed to a conflict-handler by +** [sqlite3changeset_apply()], [SQLITE_MISUSE] is immediately returned and the +** call has no effect. +** +** If an error was encountered within a call to an sqlite3changeset_xxx() +** function (for example an [SQLITE_CORRUPT] in [sqlite3changeset_next()] or an +** [SQLITE_NOMEM] in [sqlite3changeset_new()]) then an error code corresponding +** to that error is returned by this function. Otherwise, SQLITE_OK is +** returned. This is to allow the following pattern (pseudo-code): +** +**
    +**   sqlite3changeset_start();
    +**   while( SQLITE_ROW==sqlite3changeset_next() ){
    +**     // Do something with change.
    +**   }
    +**   rc = sqlite3changeset_finalize();
    +**   if( rc!=SQLITE_OK ){
    +**     // An error has occurred
    +**   }
    +** 
    +*/ +SQLITE_API int sqlite3changeset_finalize(sqlite3_changeset_iter *pIter); + +/* +** CAPI3REF: Invert A Changeset +** +** This function is used to "invert" a changeset object. Applying an inverted +** changeset to a database reverses the effects of applying the uninverted +** changeset. Specifically: +** +**
      +**
    • Each DELETE change is changed to an INSERT, and +**
    • Each INSERT change is changed to a DELETE, and +**
    • For each UPDATE change, the old.* and new.* values are exchanged. +**
    +** +** This function does not change the order in which changes appear within +** the changeset. It merely reverses the sense of each individual change. +** +** If successful, a pointer to a buffer containing the inverted changeset +** is stored in *ppOut, the size of the same buffer is stored in *pnOut, and +** SQLITE_OK is returned. If an error occurs, both *pnOut and *ppOut are +** zeroed and an SQLite error code returned. +** +** It is the responsibility of the caller to eventually call sqlite3_free() +** on the *ppOut pointer to free the buffer allocation following a successful +** call to this function. +** +** WARNING/TODO: This function currently assumes that the input is a valid +** changeset. If it is not, the results are undefined. +*/ +SQLITE_API int sqlite3changeset_invert( + int nIn, const void *pIn, /* Input changeset */ + int *pnOut, void **ppOut /* OUT: Inverse of input */ +); + +/* +** CAPI3REF: Concatenate Two Changeset Objects +** +** This function is used to concatenate two changesets, A and B, into a +** single changeset. The result is a changeset equivalent to applying +** changeset A followed by changeset B. +** +** This function combines the two input changesets using an +** sqlite3_changegroup object. Calling it produces similar results as the +** following code fragment: +** +**
    +**   sqlite3_changegroup *pGrp;
    +**   rc = sqlite3_changegroup_new(&pGrp);
    +**   if( rc==SQLITE_OK ) rc = sqlite3changegroup_add(pGrp, nA, pA);
    +**   if( rc==SQLITE_OK ) rc = sqlite3changegroup_add(pGrp, nB, pB);
    +**   if( rc==SQLITE_OK ){
    +**     rc = sqlite3changegroup_output(pGrp, pnOut, ppOut);
    +**   }else{
    +**     *ppOut = 0;
    +**     *pnOut = 0;
    +**   }
    +** 
    +** +** Refer to the sqlite3_changegroup documentation below for details. +*/ +SQLITE_API int sqlite3changeset_concat( + int nA, /* Number of bytes in buffer pA */ + void *pA, /* Pointer to buffer containing changeset A */ + int nB, /* Number of bytes in buffer pB */ + void *pB, /* Pointer to buffer containing changeset B */ + int *pnOut, /* OUT: Number of bytes in output changeset */ + void **ppOut /* OUT: Buffer containing output changeset */ +); + + +/* +** CAPI3REF: Changegroup Handle +** +** A changegroup is an object used to combine two or more +** [changesets] or [patchsets] +*/ +typedef struct sqlite3_changegroup sqlite3_changegroup; + +/* +** CAPI3REF: Create A New Changegroup Object +** CONSTRUCTOR: sqlite3_changegroup +** +** An sqlite3_changegroup object is used to combine two or more changesets +** (or patchsets) into a single changeset (or patchset). A single changegroup +** object may combine changesets or patchsets, but not both. The output is +** always in the same format as the input. +** +** If successful, this function returns SQLITE_OK and populates (*pp) with +** a pointer to a new sqlite3_changegroup object before returning. The caller +** should eventually free the returned object using a call to +** sqlite3changegroup_delete(). If an error occurs, an SQLite error code +** (i.e. SQLITE_NOMEM) is returned and *pp is set to NULL. +** +** The usual usage pattern for an sqlite3_changegroup object is as follows: +** +**
      +**
    • It is created using a call to sqlite3changegroup_new(). +** +**
    • Zero or more changesets (or patchsets) are added to the object +** by calling sqlite3changegroup_add(). +** +**
    • The result of combining all input changesets together is obtained +** by the application via a call to sqlite3changegroup_output(). +** +**
    • The object is deleted using a call to sqlite3changegroup_delete(). +**
    +** +** Any number of calls to add() and output() may be made between the calls to +** new() and delete(), and in any order. +** +** As well as the regular sqlite3changegroup_add() and +** sqlite3changegroup_output() functions, also available are the streaming +** versions sqlite3changegroup_add_strm() and sqlite3changegroup_output_strm(). +*/ +SQLITE_API int sqlite3changegroup_new(sqlite3_changegroup **pp); + +/* +** CAPI3REF: Add A Changeset To A Changegroup +** METHOD: sqlite3_changegroup +** +** Add all changes within the changeset (or patchset) in buffer pData (size +** nData bytes) to the changegroup. +** +** If the buffer contains a patchset, then all prior calls to this function +** on the same changegroup object must also have specified patchsets. Or, if +** the buffer contains a changeset, so must have the earlier calls to this +** function. Otherwise, SQLITE_ERROR is returned and no changes are added +** to the changegroup. +** +** Rows within the changeset and changegroup are identified by the values in +** their PRIMARY KEY columns. A change in the changeset is considered to +** apply to the same row as a change already present in the changegroup if +** the two rows have the same primary key. +** +** Changes to rows that do not already appear in the changegroup are +** simply copied into it. Or, if both the new changeset and the changegroup +** contain changes that apply to a single row, the final contents of the +** changegroup depends on the type of each change, as follows: +** +** +** +** +**
    Existing Change New Change Output Change +**
    INSERT INSERT +** The new change is ignored. This case does not occur if the new +** changeset was recorded immediately after the changesets already +** added to the changegroup. +**
    INSERT UPDATE +** The INSERT change remains in the changegroup. The values in the +** INSERT change are modified as if the row was inserted by the +** existing change and then updated according to the new change. +**
    INSERT DELETE +** The existing INSERT is removed from the changegroup. The DELETE is +** not added. +**
    UPDATE INSERT +** The new change is ignored. This case does not occur if the new +** changeset was recorded immediately after the changesets already +** added to the changegroup. +**
    UPDATE UPDATE +** The existing UPDATE remains within the changegroup. It is amended +** so that the accompanying values are as if the row was updated once +** by the existing change and then again by the new change. +**
    UPDATE DELETE +** The existing UPDATE is replaced by the new DELETE within the +** changegroup. +**
    DELETE INSERT +** If one or more of the column values in the row inserted by the +** new change differ from those in the row deleted by the existing +** change, the existing DELETE is replaced by an UPDATE within the +** changegroup. Otherwise, if the inserted row is exactly the same +** as the deleted row, the existing DELETE is simply discarded. +**
    DELETE UPDATE +** The new change is ignored. This case does not occur if the new +** changeset was recorded immediately after the changesets already +** added to the changegroup. +**
    DELETE DELETE +** The new change is ignored. This case does not occur if the new +** changeset was recorded immediately after the changesets already +** added to the changegroup. +**
    +** +** If the new changeset contains changes to a table that is already present +** in the changegroup, then the number of columns and the position of the +** primary key columns for the table must be consistent. If this is not the +** case, this function fails with SQLITE_SCHEMA. If the input changeset +** appears to be corrupt and the corruption is detected, SQLITE_CORRUPT is +** returned. Or, if an out-of-memory condition occurs during processing, this +** function returns SQLITE_NOMEM. In all cases, if an error occurs the state +** of the final contents of the changegroup is undefined. +** +** If no error occurs, SQLITE_OK is returned. +*/ +SQLITE_API int sqlite3changegroup_add(sqlite3_changegroup*, int nData, void *pData); + +/* +** CAPI3REF: Obtain A Composite Changeset From A Changegroup +** METHOD: sqlite3_changegroup +** +** Obtain a buffer containing a changeset (or patchset) representing the +** current contents of the changegroup. If the inputs to the changegroup +** were themselves changesets, the output is a changeset. Or, if the +** inputs were patchsets, the output is also a patchset. +** +** As with the output of the sqlite3session_changeset() and +** sqlite3session_patchset() functions, all changes related to a single +** table are grouped together in the output of this function. Tables appear +** in the same order as for the very first changeset added to the changegroup. +** If the second or subsequent changesets added to the changegroup contain +** changes for tables that do not appear in the first changeset, they are +** appended onto the end of the output changeset, again in the order in +** which they are first encountered. +** +** If an error occurs, an SQLite error code is returned and the output +** variables (*pnData) and (*ppData) are set to 0. Otherwise, SQLITE_OK +** is returned and the output variables are set to the size of and a +** pointer to the output buffer, respectively. In this case it is the +** responsibility of the caller to eventually free the buffer using a +** call to sqlite3_free(). +*/ +SQLITE_API int sqlite3changegroup_output( + sqlite3_changegroup*, + int *pnData, /* OUT: Size of output buffer in bytes */ + void **ppData /* OUT: Pointer to output buffer */ +); + +/* +** CAPI3REF: Delete A Changegroup Object +** DESTRUCTOR: sqlite3_changegroup +*/ +SQLITE_API void sqlite3changegroup_delete(sqlite3_changegroup*); + +/* +** CAPI3REF: Apply A Changeset To A Database +** +** Apply a changeset or patchset to a database. These functions attempt to +** update the "main" database attached to handle db with the changes found in +** the changeset passed via the second and third arguments. +** +** The fourth argument (xFilter) passed to these functions is the "filter +** callback". If it is not NULL, then for each table affected by at least one +** change in the changeset, the filter callback is invoked with +** the table name as the second argument, and a copy of the context pointer +** passed as the sixth argument as the first. If the "filter callback" +** returns zero, then no attempt is made to apply any changes to the table. +** Otherwise, if the return value is non-zero or the xFilter argument to +** is NULL, all changes related to the table are attempted. +** +** For each table that is not excluded by the filter callback, this function +** tests that the target database contains a compatible table. A table is +** considered compatible if all of the following are true: +** +**
      +**
    • The table has the same name as the name recorded in the +** changeset, and +**
    • The table has at least as many columns as recorded in the +** changeset, and +**
    • The table has primary key columns in the same position as +** recorded in the changeset. +**
    +** +** If there is no compatible table, it is not an error, but none of the +** changes associated with the table are applied. A warning message is issued +** via the sqlite3_log() mechanism with the error code SQLITE_SCHEMA. At most +** one such warning is issued for each table in the changeset. +** +** For each change for which there is a compatible table, an attempt is made +** to modify the table contents according to the UPDATE, INSERT or DELETE +** change. If a change cannot be applied cleanly, the conflict handler +** function passed as the fifth argument to sqlite3changeset_apply() may be +** invoked. A description of exactly when the conflict handler is invoked for +** each type of change is below. +** +** Unlike the xFilter argument, xConflict may not be passed NULL. The results +** of passing anything other than a valid function pointer as the xConflict +** argument are undefined. +** +** Each time the conflict handler function is invoked, it must return one +** of [SQLITE_CHANGESET_OMIT], [SQLITE_CHANGESET_ABORT] or +** [SQLITE_CHANGESET_REPLACE]. SQLITE_CHANGESET_REPLACE may only be returned +** if the second argument passed to the conflict handler is either +** SQLITE_CHANGESET_DATA or SQLITE_CHANGESET_CONFLICT. If the conflict-handler +** returns an illegal value, any changes already made are rolled back and +** the call to sqlite3changeset_apply() returns SQLITE_MISUSE. Different +** actions are taken by sqlite3changeset_apply() depending on the value +** returned by each invocation of the conflict-handler function. Refer to +** the documentation for the three +** [SQLITE_CHANGESET_OMIT|available return values] for details. +** +**
    +**
    DELETE Changes
    +** For each DELETE change, the function checks if the target database +** contains a row with the same primary key value (or values) as the +** original row values stored in the changeset. If it does, and the values +** stored in all non-primary key columns also match the values stored in +** the changeset the row is deleted from the target database. +** +** If a row with matching primary key values is found, but one or more of +** the non-primary key fields contains a value different from the original +** row value stored in the changeset, the conflict-handler function is +** invoked with [SQLITE_CHANGESET_DATA] as the second argument. If the +** database table has more columns than are recorded in the changeset, +** only the values of those non-primary key fields are compared against +** the current database contents - any trailing database table columns +** are ignored. +** +** If no row with matching primary key values is found in the database, +** the conflict-handler function is invoked with [SQLITE_CHANGESET_NOTFOUND] +** passed as the second argument. +** +** If the DELETE operation is attempted, but SQLite returns SQLITE_CONSTRAINT +** (which can only happen if a foreign key constraint is violated), the +** conflict-handler function is invoked with [SQLITE_CHANGESET_CONSTRAINT] +** passed as the second argument. This includes the case where the DELETE +** operation is attempted because an earlier call to the conflict handler +** function returned [SQLITE_CHANGESET_REPLACE]. +** +**
    INSERT Changes
    +** For each INSERT change, an attempt is made to insert the new row into +** the database. If the changeset row contains fewer fields than the +** database table, the trailing fields are populated with their default +** values. +** +** If the attempt to insert the row fails because the database already +** contains a row with the same primary key values, the conflict handler +** function is invoked with the second argument set to +** [SQLITE_CHANGESET_CONFLICT]. +** +** If the attempt to insert the row fails because of some other constraint +** violation (e.g. NOT NULL or UNIQUE), the conflict handler function is +** invoked with the second argument set to [SQLITE_CHANGESET_CONSTRAINT]. +** This includes the case where the INSERT operation is re-attempted because +** an earlier call to the conflict handler function returned +** [SQLITE_CHANGESET_REPLACE]. +** +**
    UPDATE Changes
    +** For each UPDATE change, the function checks if the target database +** contains a row with the same primary key value (or values) as the +** original row values stored in the changeset. If it does, and the values +** stored in all modified non-primary key columns also match the values +** stored in the changeset the row is updated within the target database. +** +** If a row with matching primary key values is found, but one or more of +** the modified non-primary key fields contains a value different from an +** original row value stored in the changeset, the conflict-handler function +** is invoked with [SQLITE_CHANGESET_DATA] as the second argument. Since +** UPDATE changes only contain values for non-primary key fields that are +** to be modified, only those fields need to match the original values to +** avoid the SQLITE_CHANGESET_DATA conflict-handler callback. +** +** If no row with matching primary key values is found in the database, +** the conflict-handler function is invoked with [SQLITE_CHANGESET_NOTFOUND] +** passed as the second argument. +** +** If the UPDATE operation is attempted, but SQLite returns +** SQLITE_CONSTRAINT, the conflict-handler function is invoked with +** [SQLITE_CHANGESET_CONSTRAINT] passed as the second argument. +** This includes the case where the UPDATE operation is attempted after +** an earlier call to the conflict handler function returned +** [SQLITE_CHANGESET_REPLACE]. +**
    +** +** It is safe to execute SQL statements, including those that write to the +** table that the callback related to, from within the xConflict callback. +** This can be used to further customize the application's conflict +** resolution strategy. +** +** All changes made by these functions are enclosed in a savepoint transaction. +** If any other error (aside from a constraint failure when attempting to +** write to the target database) occurs, then the savepoint transaction is +** rolled back, restoring the target database to its original state, and an +** SQLite error code returned. +** +** If the output parameters (ppRebase) and (pnRebase) are non-NULL and +** the input is a changeset (not a patchset), then sqlite3changeset_apply_v2() +** may set (*ppRebase) to point to a "rebase" that may be used with the +** sqlite3_rebaser APIs buffer before returning. In this case (*pnRebase) +** is set to the size of the buffer in bytes. It is the responsibility of the +** caller to eventually free any such buffer using sqlite3_free(). The buffer +** is only allocated and populated if one or more conflicts were encountered +** while applying the patchset. See comments surrounding the sqlite3_rebaser +** APIs for further details. +** +** The behavior of sqlite3changeset_apply_v2() and its streaming equivalent +** may be modified by passing a combination of +** [SQLITE_CHANGESETAPPLY_NOSAVEPOINT | supported flags] as the 9th parameter. +** +** Note that the sqlite3changeset_apply_v2() API is still experimental +** and therefore subject to change. +*/ +SQLITE_API int sqlite3changeset_apply( + sqlite3 *db, /* Apply change to "main" db of this handle */ + int nChangeset, /* Size of changeset in bytes */ + void *pChangeset, /* Changeset blob */ + int(*xFilter)( + void *pCtx, /* Copy of sixth arg to _apply() */ + const char *zTab /* Table name */ + ), + int(*xConflict)( + void *pCtx, /* Copy of sixth arg to _apply() */ + int eConflict, /* DATA, MISSING, CONFLICT, CONSTRAINT */ + sqlite3_changeset_iter *p /* Handle describing change and conflict */ + ), + void *pCtx /* First argument passed to xConflict */ +); +SQLITE_API int sqlite3changeset_apply_v2( + sqlite3 *db, /* Apply change to "main" db of this handle */ + int nChangeset, /* Size of changeset in bytes */ + void *pChangeset, /* Changeset blob */ + int(*xFilter)( + void *pCtx, /* Copy of sixth arg to _apply() */ + const char *zTab /* Table name */ + ), + int(*xConflict)( + void *pCtx, /* Copy of sixth arg to _apply() */ + int eConflict, /* DATA, MISSING, CONFLICT, CONSTRAINT */ + sqlite3_changeset_iter *p /* Handle describing change and conflict */ + ), + void *pCtx, /* First argument passed to xConflict */ + void **ppRebase, int *pnRebase, /* OUT: Rebase data */ + int flags /* SESSION_CHANGESETAPPLY_* flags */ +); + +/* +** CAPI3REF: Flags for sqlite3changeset_apply_v2 +** +** The following flags may passed via the 9th parameter to +** [sqlite3changeset_apply_v2] and [sqlite3changeset_apply_v2_strm]: +** +**
    +**
    SQLITE_CHANGESETAPPLY_NOSAVEPOINT
    +** Usually, the sessions module encloses all operations performed by +** a single call to apply_v2() or apply_v2_strm() in a [SAVEPOINT]. The +** SAVEPOINT is committed if the changeset or patchset is successfully +** applied, or rolled back if an error occurs. Specifying this flag +** causes the sessions module to omit this savepoint. In this case, if the +** caller has an open transaction or savepoint when apply_v2() is called, +** it may revert the partially applied changeset by rolling it back. +** +**
    SQLITE_CHANGESETAPPLY_INVERT
    +** Invert the changeset before applying it. This is equivalent to inverting +** a changeset using sqlite3changeset_invert() before applying it. It is +** an error to specify this flag with a patchset. +*/ +#define SQLITE_CHANGESETAPPLY_NOSAVEPOINT 0x0001 +#define SQLITE_CHANGESETAPPLY_INVERT 0x0002 + +/* +** CAPI3REF: Constants Passed To The Conflict Handler +** +** Values that may be passed as the second argument to a conflict-handler. +** +**
    +**
    SQLITE_CHANGESET_DATA
    +** The conflict handler is invoked with CHANGESET_DATA as the second argument +** when processing a DELETE or UPDATE change if a row with the required +** PRIMARY KEY fields is present in the database, but one or more other +** (non primary-key) fields modified by the update do not contain the +** expected "before" values. +** +** The conflicting row, in this case, is the database row with the matching +** primary key. +** +**
    SQLITE_CHANGESET_NOTFOUND
    +** The conflict handler is invoked with CHANGESET_NOTFOUND as the second +** argument when processing a DELETE or UPDATE change if a row with the +** required PRIMARY KEY fields is not present in the database. +** +** There is no conflicting row in this case. The results of invoking the +** sqlite3changeset_conflict() API are undefined. +** +**
    SQLITE_CHANGESET_CONFLICT
    +** CHANGESET_CONFLICT is passed as the second argument to the conflict +** handler while processing an INSERT change if the operation would result +** in duplicate primary key values. +** +** The conflicting row in this case is the database row with the matching +** primary key. +** +**
    SQLITE_CHANGESET_FOREIGN_KEY
    +** If foreign key handling is enabled, and applying a changeset leaves the +** database in a state containing foreign key violations, the conflict +** handler is invoked with CHANGESET_FOREIGN_KEY as the second argument +** exactly once before the changeset is committed. If the conflict handler +** returns CHANGESET_OMIT, the changes, including those that caused the +** foreign key constraint violation, are committed. Or, if it returns +** CHANGESET_ABORT, the changeset is rolled back. +** +** No current or conflicting row information is provided. The only function +** it is possible to call on the supplied sqlite3_changeset_iter handle +** is sqlite3changeset_fk_conflicts(). +** +**
    SQLITE_CHANGESET_CONSTRAINT
    +** If any other constraint violation occurs while applying a change (i.e. +** a UNIQUE, CHECK or NOT NULL constraint), the conflict handler is +** invoked with CHANGESET_CONSTRAINT as the second argument. +** +** There is no conflicting row in this case. The results of invoking the +** sqlite3changeset_conflict() API are undefined. +** +**
    +*/ +#define SQLITE_CHANGESET_DATA 1 +#define SQLITE_CHANGESET_NOTFOUND 2 +#define SQLITE_CHANGESET_CONFLICT 3 +#define SQLITE_CHANGESET_CONSTRAINT 4 +#define SQLITE_CHANGESET_FOREIGN_KEY 5 + +/* +** CAPI3REF: Constants Returned By The Conflict Handler +** +** A conflict handler callback must return one of the following three values. +** +**
    +**
    SQLITE_CHANGESET_OMIT
    +** If a conflict handler returns this value no special action is taken. The +** change that caused the conflict is not applied. The session module +** continues to the next change in the changeset. +** +**
    SQLITE_CHANGESET_REPLACE
    +** This value may only be returned if the second argument to the conflict +** handler was SQLITE_CHANGESET_DATA or SQLITE_CHANGESET_CONFLICT. If this +** is not the case, any changes applied so far are rolled back and the +** call to sqlite3changeset_apply() returns SQLITE_MISUSE. +** +** If CHANGESET_REPLACE is returned by an SQLITE_CHANGESET_DATA conflict +** handler, then the conflicting row is either updated or deleted, depending +** on the type of change. +** +** If CHANGESET_REPLACE is returned by an SQLITE_CHANGESET_CONFLICT conflict +** handler, then the conflicting row is removed from the database and a +** second attempt to apply the change is made. If this second attempt fails, +** the original row is restored to the database before continuing. +** +**
    SQLITE_CHANGESET_ABORT
    +** If this value is returned, any changes applied so far are rolled back +** and the call to sqlite3changeset_apply() returns SQLITE_ABORT. +**
    +*/ +#define SQLITE_CHANGESET_OMIT 0 +#define SQLITE_CHANGESET_REPLACE 1 +#define SQLITE_CHANGESET_ABORT 2 + +/* +** CAPI3REF: Rebasing changesets +** EXPERIMENTAL +** +** Suppose there is a site hosting a database in state S0. And that +** modifications are made that move that database to state S1 and a +** changeset recorded (the "local" changeset). Then, a changeset based +** on S0 is received from another site (the "remote" changeset) and +** applied to the database. The database is then in state +** (S1+"remote"), where the exact state depends on any conflict +** resolution decisions (OMIT or REPLACE) made while applying "remote". +** Rebasing a changeset is to update it to take those conflict +** resolution decisions into account, so that the same conflicts +** do not have to be resolved elsewhere in the network. +** +** For example, if both the local and remote changesets contain an +** INSERT of the same key on "CREATE TABLE t1(a PRIMARY KEY, b)": +** +** local: INSERT INTO t1 VALUES(1, 'v1'); +** remote: INSERT INTO t1 VALUES(1, 'v2'); +** +** and the conflict resolution is REPLACE, then the INSERT change is +** removed from the local changeset (it was overridden). Or, if the +** conflict resolution was "OMIT", then the local changeset is modified +** to instead contain: +** +** UPDATE t1 SET b = 'v2' WHERE a=1; +** +** Changes within the local changeset are rebased as follows: +** +**
    +**
    Local INSERT
    +** This may only conflict with a remote INSERT. If the conflict +** resolution was OMIT, then add an UPDATE change to the rebased +** changeset. Or, if the conflict resolution was REPLACE, add +** nothing to the rebased changeset. +** +**
    Local DELETE
    +** This may conflict with a remote UPDATE or DELETE. In both cases the +** only possible resolution is OMIT. If the remote operation was a +** DELETE, then add no change to the rebased changeset. If the remote +** operation was an UPDATE, then the old.* fields of change are updated +** to reflect the new.* values in the UPDATE. +** +**
    Local UPDATE
    +** This may conflict with a remote UPDATE or DELETE. If it conflicts +** with a DELETE, and the conflict resolution was OMIT, then the update +** is changed into an INSERT. Any undefined values in the new.* record +** from the update change are filled in using the old.* values from +** the conflicting DELETE. Or, if the conflict resolution was REPLACE, +** the UPDATE change is simply omitted from the rebased changeset. +** +** If conflict is with a remote UPDATE and the resolution is OMIT, then +** the old.* values are rebased using the new.* values in the remote +** change. Or, if the resolution is REPLACE, then the change is copied +** into the rebased changeset with updates to columns also updated by +** the conflicting remote UPDATE removed. If this means no columns would +** be updated, the change is omitted. +**
    +** +** A local change may be rebased against multiple remote changes +** simultaneously. If a single key is modified by multiple remote +** changesets, they are combined as follows before the local changeset +** is rebased: +** +**
      +**
    • If there has been one or more REPLACE resolutions on a +** key, it is rebased according to a REPLACE. +** +**
    • If there have been no REPLACE resolutions on a key, then +** the local changeset is rebased according to the most recent +** of the OMIT resolutions. +**
    +** +** Note that conflict resolutions from multiple remote changesets are +** combined on a per-field basis, not per-row. This means that in the +** case of multiple remote UPDATE operations, some fields of a single +** local change may be rebased for REPLACE while others are rebased for +** OMIT. +** +** In order to rebase a local changeset, the remote changeset must first +** be applied to the local database using sqlite3changeset_apply_v2() and +** the buffer of rebase information captured. Then: +** +**
      +**
    1. An sqlite3_rebaser object is created by calling +** sqlite3rebaser_create(). +**
    2. The new object is configured with the rebase buffer obtained from +** sqlite3changeset_apply_v2() by calling sqlite3rebaser_configure(). +** If the local changeset is to be rebased against multiple remote +** changesets, then sqlite3rebaser_configure() should be called +** multiple times, in the same order that the multiple +** sqlite3changeset_apply_v2() calls were made. +**
    3. Each local changeset is rebased by calling sqlite3rebaser_rebase(). +**
    4. The sqlite3_rebaser object is deleted by calling +** sqlite3rebaser_delete(). +**
    +*/ +typedef struct sqlite3_rebaser sqlite3_rebaser; + +/* +** CAPI3REF: Create a changeset rebaser object. +** EXPERIMENTAL +** +** Allocate a new changeset rebaser object. If successful, set (*ppNew) to +** point to the new object and return SQLITE_OK. Otherwise, if an error +** occurs, return an SQLite error code (e.g. SQLITE_NOMEM) and set (*ppNew) +** to NULL. +*/ +SQLITE_API int sqlite3rebaser_create(sqlite3_rebaser **ppNew); + +/* +** CAPI3REF: Configure a changeset rebaser object. +** EXPERIMENTAL +** +** Configure the changeset rebaser object to rebase changesets according +** to the conflict resolutions described by buffer pRebase (size nRebase +** bytes), which must have been obtained from a previous call to +** sqlite3changeset_apply_v2(). +*/ +SQLITE_API int sqlite3rebaser_configure( + sqlite3_rebaser*, + int nRebase, const void *pRebase +); + +/* +** CAPI3REF: Rebase a changeset +** EXPERIMENTAL +** +** Argument pIn must point to a buffer containing a changeset nIn bytes +** in size. This function allocates and populates a buffer with a copy +** of the changeset rebased according to the configuration of the +** rebaser object passed as the first argument. If successful, (*ppOut) +** is set to point to the new buffer containing the rebased changeset and +** (*pnOut) to its size in bytes and SQLITE_OK returned. It is the +** responsibility of the caller to eventually free the new buffer using +** sqlite3_free(). Otherwise, if an error occurs, (*ppOut) and (*pnOut) +** are set to zero and an SQLite error code returned. +*/ +SQLITE_API int sqlite3rebaser_rebase( + sqlite3_rebaser*, + int nIn, const void *pIn, + int *pnOut, void **ppOut +); + +/* +** CAPI3REF: Delete a changeset rebaser object. +** EXPERIMENTAL +** +** Delete the changeset rebaser object and all associated resources. There +** should be one call to this function for each successful invocation +** of sqlite3rebaser_create(). +*/ +SQLITE_API void sqlite3rebaser_delete(sqlite3_rebaser *p); + +/* +** CAPI3REF: Streaming Versions of API functions. +** +** The six streaming API xxx_strm() functions serve similar purposes to the +** corresponding non-streaming API functions: +** +** +** +**
    Streaming functionNon-streaming equivalent
    sqlite3changeset_apply_strm[sqlite3changeset_apply] +**
    sqlite3changeset_apply_strm_v2[sqlite3changeset_apply_v2] +**
    sqlite3changeset_concat_strm[sqlite3changeset_concat] +**
    sqlite3changeset_invert_strm[sqlite3changeset_invert] +**
    sqlite3changeset_start_strm[sqlite3changeset_start] +**
    sqlite3session_changeset_strm[sqlite3session_changeset] +**
    sqlite3session_patchset_strm[sqlite3session_patchset] +**
    +** +** Non-streaming functions that accept changesets (or patchsets) as input +** require that the entire changeset be stored in a single buffer in memory. +** Similarly, those that return a changeset or patchset do so by returning +** a pointer to a single large buffer allocated using sqlite3_malloc(). +** Normally this is convenient. However, if an application running in a +** low-memory environment is required to handle very large changesets, the +** large contiguous memory allocations required can become onerous. +** +** In order to avoid this problem, instead of a single large buffer, input +** is passed to a streaming API functions by way of a callback function that +** the sessions module invokes to incrementally request input data as it is +** required. In all cases, a pair of API function parameters such as +** +**
    +**        int nChangeset,
    +**        void *pChangeset,
    +**  
    +** +** Is replaced by: +** +**
    +**        int (*xInput)(void *pIn, void *pData, int *pnData),
    +**        void *pIn,
    +**  
    +** +** Each time the xInput callback is invoked by the sessions module, the first +** argument passed is a copy of the supplied pIn context pointer. The second +** argument, pData, points to a buffer (*pnData) bytes in size. Assuming no +** error occurs the xInput method should copy up to (*pnData) bytes of data +** into the buffer and set (*pnData) to the actual number of bytes copied +** before returning SQLITE_OK. If the input is completely exhausted, (*pnData) +** should be set to zero to indicate this. Or, if an error occurs, an SQLite +** error code should be returned. In all cases, if an xInput callback returns +** an error, all processing is abandoned and the streaming API function +** returns a copy of the error code to the caller. +** +** In the case of sqlite3changeset_start_strm(), the xInput callback may be +** invoked by the sessions module at any point during the lifetime of the +** iterator. If such an xInput callback returns an error, the iterator enters +** an error state, whereby all subsequent calls to iterator functions +** immediately fail with the same error code as returned by xInput. +** +** Similarly, streaming API functions that return changesets (or patchsets) +** return them in chunks by way of a callback function instead of via a +** pointer to a single large buffer. In this case, a pair of parameters such +** as: +** +**
    +**        int *pnChangeset,
    +**        void **ppChangeset,
    +**  
    +** +** Is replaced by: +** +**
    +**        int (*xOutput)(void *pOut, const void *pData, int nData),
    +**        void *pOut
    +**  
    +** +** The xOutput callback is invoked zero or more times to return data to +** the application. The first parameter passed to each call is a copy of the +** pOut pointer supplied by the application. The second parameter, pData, +** points to a buffer nData bytes in size containing the chunk of output +** data being returned. If the xOutput callback successfully processes the +** supplied data, it should return SQLITE_OK to indicate success. Otherwise, +** it should return some other SQLite error code. In this case processing +** is immediately abandoned and the streaming API function returns a copy +** of the xOutput error code to the application. +** +** The sessions module never invokes an xOutput callback with the third +** parameter set to a value less than or equal to zero. Other than this, +** no guarantees are made as to the size of the chunks of data returned. +*/ +SQLITE_API int sqlite3changeset_apply_strm( + sqlite3 *db, /* Apply change to "main" db of this handle */ + int (*xInput)(void *pIn, void *pData, int *pnData), /* Input function */ + void *pIn, /* First arg for xInput */ + int(*xFilter)( + void *pCtx, /* Copy of sixth arg to _apply() */ + const char *zTab /* Table name */ + ), + int(*xConflict)( + void *pCtx, /* Copy of sixth arg to _apply() */ + int eConflict, /* DATA, MISSING, CONFLICT, CONSTRAINT */ + sqlite3_changeset_iter *p /* Handle describing change and conflict */ + ), + void *pCtx /* First argument passed to xConflict */ +); +SQLITE_API int sqlite3changeset_apply_v2_strm( + sqlite3 *db, /* Apply change to "main" db of this handle */ + int (*xInput)(void *pIn, void *pData, int *pnData), /* Input function */ + void *pIn, /* First arg for xInput */ + int(*xFilter)( + void *pCtx, /* Copy of sixth arg to _apply() */ + const char *zTab /* Table name */ + ), + int(*xConflict)( + void *pCtx, /* Copy of sixth arg to _apply() */ + int eConflict, /* DATA, MISSING, CONFLICT, CONSTRAINT */ + sqlite3_changeset_iter *p /* Handle describing change and conflict */ + ), + void *pCtx, /* First argument passed to xConflict */ + void **ppRebase, int *pnRebase, + int flags +); +SQLITE_API int sqlite3changeset_concat_strm( + int (*xInputA)(void *pIn, void *pData, int *pnData), + void *pInA, + int (*xInputB)(void *pIn, void *pData, int *pnData), + void *pInB, + int (*xOutput)(void *pOut, const void *pData, int nData), + void *pOut +); +SQLITE_API int sqlite3changeset_invert_strm( + int (*xInput)(void *pIn, void *pData, int *pnData), + void *pIn, + int (*xOutput)(void *pOut, const void *pData, int nData), + void *pOut +); +SQLITE_API int sqlite3changeset_start_strm( + sqlite3_changeset_iter **pp, + int (*xInput)(void *pIn, void *pData, int *pnData), + void *pIn +); +SQLITE_API int sqlite3changeset_start_v2_strm( + sqlite3_changeset_iter **pp, + int (*xInput)(void *pIn, void *pData, int *pnData), + void *pIn, + int flags +); +SQLITE_API int sqlite3session_changeset_strm( + sqlite3_session *pSession, + int (*xOutput)(void *pOut, const void *pData, int nData), + void *pOut +); +SQLITE_API int sqlite3session_patchset_strm( + sqlite3_session *pSession, + int (*xOutput)(void *pOut, const void *pData, int nData), + void *pOut +); +SQLITE_API int sqlite3changegroup_add_strm(sqlite3_changegroup*, + int (*xInput)(void *pIn, void *pData, int *pnData), + void *pIn +); +SQLITE_API int sqlite3changegroup_output_strm(sqlite3_changegroup*, + int (*xOutput)(void *pOut, const void *pData, int nData), + void *pOut +); +SQLITE_API int sqlite3rebaser_rebase_strm( + sqlite3_rebaser *pRebaser, + int (*xInput)(void *pIn, void *pData, int *pnData), + void *pIn, + int (*xOutput)(void *pOut, const void *pData, int nData), + void *pOut +); + +/* +** CAPI3REF: Configure global parameters +** +** The sqlite3session_config() interface is used to make global configuration +** changes to the sessions module in order to tune it to the specific needs +** of the application. +** +** The sqlite3session_config() interface is not threadsafe. If it is invoked +** while any other thread is inside any other sessions method then the +** results are undefined. Furthermore, if it is invoked after any sessions +** related objects have been created, the results are also undefined. +** +** The first argument to the sqlite3session_config() function must be one +** of the SQLITE_SESSION_CONFIG_XXX constants defined below. The +** interpretation of the (void*) value passed as the second parameter and +** the effect of calling this function depends on the value of the first +** parameter. +** +**
    +**
    SQLITE_SESSION_CONFIG_STRMSIZE
    +** By default, the sessions module streaming interfaces attempt to input +** and output data in approximately 1 KiB chunks. This operand may be used +** to set and query the value of this configuration setting. The pointer +** passed as the second argument must point to a value of type (int). +** If this value is greater than 0, it is used as the new streaming data +** chunk size for both input and output. Before returning, the (int) value +** pointed to by pArg is set to the final value of the streaming interface +** chunk size. +**
    +** +** This function returns SQLITE_OK if successful, or an SQLite error code +** otherwise. +*/ +SQLITE_API int sqlite3session_config(int op, void *pArg); + +/* +** CAPI3REF: Values for sqlite3session_config(). +*/ +#define SQLITE_SESSION_CONFIG_STRMSIZE 1 + +/* +** Make sure we can call this stuff from C++. +*/ +#if 0 +} +#endif + +#endif /* !defined(__SQLITESESSION_H_) && defined(SQLITE_ENABLE_SESSION) */ + +/******** End of sqlite3session.h *********/ +/******** Begin file fts5.h *********/ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Interfaces to extend FTS5. Using the interfaces defined in this file, +** FTS5 may be extended with: +** +** * custom tokenizers, and +** * custom auxiliary functions. +*/ + + +#ifndef _FTS5_H +#define _FTS5_H + + +#if 0 +extern "C" { +#endif + +/************************************************************************* +** CUSTOM AUXILIARY FUNCTIONS +** +** Virtual table implementations may overload SQL functions by implementing +** the sqlite3_module.xFindFunction() method. +*/ + +typedef struct Fts5ExtensionApi Fts5ExtensionApi; +typedef struct Fts5Context Fts5Context; +typedef struct Fts5PhraseIter Fts5PhraseIter; + +typedef void (*fts5_extension_function)( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +); + +struct Fts5PhraseIter { + const unsigned char *a; + const unsigned char *b; +}; + +/* +** EXTENSION API FUNCTIONS +** +** xUserData(pFts): +** Return a copy of the context pointer the extension function was +** registered with. +** +** xColumnTotalSize(pFts, iCol, pnToken): +** If parameter iCol is less than zero, set output variable *pnToken +** to the total number of tokens in the FTS5 table. Or, if iCol is +** non-negative but less than the number of columns in the table, return +** the total number of tokens in column iCol, considering all rows in +** the FTS5 table. +** +** If parameter iCol is greater than or equal to the number of columns +** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. +** an OOM condition or IO error), an appropriate SQLite error code is +** returned. +** +** xColumnCount(pFts): +** Return the number of columns in the table. +** +** xColumnSize(pFts, iCol, pnToken): +** If parameter iCol is less than zero, set output variable *pnToken +** to the total number of tokens in the current row. Or, if iCol is +** non-negative but less than the number of columns in the table, set +** *pnToken to the number of tokens in column iCol of the current row. +** +** If parameter iCol is greater than or equal to the number of columns +** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. +** an OOM condition or IO error), an appropriate SQLite error code is +** returned. +** +** This function may be quite inefficient if used with an FTS5 table +** created with the "columnsize=0" option. +** +** xColumnText: +** This function attempts to retrieve the text of column iCol of the +** current document. If successful, (*pz) is set to point to a buffer +** containing the text in utf-8 encoding, (*pn) is set to the size in bytes +** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, +** if an error occurs, an SQLite error code is returned and the final values +** of (*pz) and (*pn) are undefined. +** +** xPhraseCount: +** Returns the number of phrases in the current query expression. +** +** xPhraseSize: +** Returns the number of tokens in phrase iPhrase of the query. Phrases +** are numbered starting from zero. +** +** xInstCount: +** Set *pnInst to the total number of occurrences of all phrases within +** the query within the current row. Return SQLITE_OK if successful, or +** an error code (i.e. SQLITE_NOMEM) if an error occurs. +** +** This API can be quite slow if used with an FTS5 table created with the +** "detail=none" or "detail=column" option. If the FTS5 table is created +** with either "detail=none" or "detail=column" and "content=" option +** (i.e. if it is a contentless table), then this API always returns 0. +** +** xInst: +** Query for the details of phrase match iIdx within the current row. +** Phrase matches are numbered starting from zero, so the iIdx argument +** should be greater than or equal to zero and smaller than the value +** output by xInstCount(). +** +** Usually, output parameter *piPhrase is set to the phrase number, *piCol +** to the column in which it occurs and *piOff the token offset of the +** first token of the phrase. Returns SQLITE_OK if successful, or an error +** code (i.e. SQLITE_NOMEM) if an error occurs. +** +** This API can be quite slow if used with an FTS5 table created with the +** "detail=none" or "detail=column" option. +** +** xRowid: +** Returns the rowid of the current row. +** +** xTokenize: +** Tokenize text using the tokenizer belonging to the FTS5 table. +** +** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): +** This API function is used to query the FTS table for phrase iPhrase +** of the current query. Specifically, a query equivalent to: +** +** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid +** +** with $p set to a phrase equivalent to the phrase iPhrase of the +** current query is executed. Any column filter that applies to +** phrase iPhrase of the current query is included in $p. For each +** row visited, the callback function passed as the fourth argument +** is invoked. The context and API objects passed to the callback +** function may be used to access the properties of each matched row. +** Invoking Api.xUserData() returns a copy of the pointer passed as +** the third argument to pUserData. +** +** If the callback function returns any value other than SQLITE_OK, the +** query is abandoned and the xQueryPhrase function returns immediately. +** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. +** Otherwise, the error code is propagated upwards. +** +** If the query runs to completion without incident, SQLITE_OK is returned. +** Or, if some error occurs before the query completes or is aborted by +** the callback, an SQLite error code is returned. +** +** +** xSetAuxdata(pFts5, pAux, xDelete) +** +** Save the pointer passed as the second argument as the extension function's +** "auxiliary data". The pointer may then be retrieved by the current or any +** future invocation of the same fts5 extension function made as part of +** the same MATCH query using the xGetAuxdata() API. +** +** Each extension function is allocated a single auxiliary data slot for +** each FTS query (MATCH expression). If the extension function is invoked +** more than once for a single FTS query, then all invocations share a +** single auxiliary data context. +** +** If there is already an auxiliary data pointer when this function is +** invoked, then it is replaced by the new pointer. If an xDelete callback +** was specified along with the original pointer, it is invoked at this +** point. +** +** The xDelete callback, if one is specified, is also invoked on the +** auxiliary data pointer after the FTS5 query has finished. +** +** If an error (e.g. an OOM condition) occurs within this function, +** the auxiliary data is set to NULL and an error code returned. If the +** xDelete parameter was not NULL, it is invoked on the auxiliary data +** pointer before returning. +** +** +** xGetAuxdata(pFts5, bClear) +** +** Returns the current auxiliary data pointer for the fts5 extension +** function. See the xSetAuxdata() method for details. +** +** If the bClear argument is non-zero, then the auxiliary data is cleared +** (set to NULL) before this function returns. In this case the xDelete, +** if any, is not invoked. +** +** +** xRowCount(pFts5, pnRow) +** +** This function is used to retrieve the total number of rows in the table. +** In other words, the same value that would be returned by: +** +** SELECT count(*) FROM ftstable; +** +** xPhraseFirst() +** This function is used, along with type Fts5PhraseIter and the xPhraseNext +** method, to iterate through all instances of a single query phrase within +** the current row. This is the same information as is accessible via the +** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient +** to use, this API may be faster under some circumstances. To iterate +** through instances of phrase iPhrase, use the following code: +** +** Fts5PhraseIter iter; +** int iCol, iOff; +** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); +** iCol>=0; +** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) +** ){ +** // An instance of phrase iPhrase at offset iOff of column iCol +** } +** +** The Fts5PhraseIter structure is defined above. Applications should not +** modify this structure directly - it should only be used as shown above +** with the xPhraseFirst() and xPhraseNext() API methods (and by +** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). +** +** This API can be quite slow if used with an FTS5 table created with the +** "detail=none" or "detail=column" option. If the FTS5 table is created +** with either "detail=none" or "detail=column" and "content=" option +** (i.e. if it is a contentless table), then this API always iterates +** through an empty set (all calls to xPhraseFirst() set iCol to -1). +** +** xPhraseNext() +** See xPhraseFirst above. +** +** xPhraseFirstColumn() +** This function and xPhraseNextColumn() are similar to the xPhraseFirst() +** and xPhraseNext() APIs described above. The difference is that instead +** of iterating through all instances of a phrase in the current row, these +** APIs are used to iterate through the set of columns in the current row +** that contain one or more instances of a specified phrase. For example: +** +** Fts5PhraseIter iter; +** int iCol; +** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); +** iCol>=0; +** pApi->xPhraseNextColumn(pFts, &iter, &iCol) +** ){ +** // Column iCol contains at least one instance of phrase iPhrase +** } +** +** This API can be quite slow if used with an FTS5 table created with the +** "detail=none" option. If the FTS5 table is created with either +** "detail=none" "content=" option (i.e. if it is a contentless table), +** then this API always iterates through an empty set (all calls to +** xPhraseFirstColumn() set iCol to -1). +** +** The information accessed using this API and its companion +** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext +** (or xInst/xInstCount). The chief advantage of this API is that it is +** significantly more efficient than those alternatives when used with +** "detail=column" tables. +** +** xPhraseNextColumn() +** See xPhraseFirstColumn above. +*/ +struct Fts5ExtensionApi { + int iVersion; /* Currently always set to 3 */ + + void *(*xUserData)(Fts5Context*); + + int (*xColumnCount)(Fts5Context*); + int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); + int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); + + int (*xTokenize)(Fts5Context*, + const char *pText, int nText, /* Text to tokenize */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ + ); + + int (*xPhraseCount)(Fts5Context*); + int (*xPhraseSize)(Fts5Context*, int iPhrase); + + int (*xInstCount)(Fts5Context*, int *pnInst); + int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); + + sqlite3_int64 (*xRowid)(Fts5Context*); + int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); + int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); + + int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, + int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) + ); + int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); + void *(*xGetAuxdata)(Fts5Context*, int bClear); + + int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); + void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); + + int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); + void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); +}; + +/* +** CUSTOM AUXILIARY FUNCTIONS +*************************************************************************/ + +/************************************************************************* +** CUSTOM TOKENIZERS +** +** Applications may also register custom tokenizer types. A tokenizer +** is registered by providing fts5 with a populated instance of the +** following structure. All structure methods must be defined, setting +** any member of the fts5_tokenizer struct to NULL leads to undefined +** behaviour. The structure methods are expected to function as follows: +** +** xCreate: +** This function is used to allocate and initialize a tokenizer instance. +** A tokenizer instance is required to actually tokenize text. +** +** The first argument passed to this function is a copy of the (void*) +** pointer provided by the application when the fts5_tokenizer object +** was registered with FTS5 (the third argument to xCreateTokenizer()). +** The second and third arguments are an array of nul-terminated strings +** containing the tokenizer arguments, if any, specified following the +** tokenizer name as part of the CREATE VIRTUAL TABLE statement used +** to create the FTS5 table. +** +** The final argument is an output variable. If successful, (*ppOut) +** should be set to point to the new tokenizer handle and SQLITE_OK +** returned. If an error occurs, some value other than SQLITE_OK should +** be returned. In this case, fts5 assumes that the final value of *ppOut +** is undefined. +** +** xDelete: +** This function is invoked to delete a tokenizer handle previously +** allocated using xCreate(). Fts5 guarantees that this function will +** be invoked exactly once for each successful call to xCreate(). +** +** xTokenize: +** This function is expected to tokenize the nText byte string indicated +** by argument pText. pText may or may not be nul-terminated. The first +** argument passed to this function is a pointer to an Fts5Tokenizer object +** returned by an earlier call to xCreate(). +** +** The second argument indicates the reason that FTS5 is requesting +** tokenization of the supplied text. This is always one of the following +** four values: +** +**
    • FTS5_TOKENIZE_DOCUMENT - A document is being inserted into +** or removed from the FTS table. The tokenizer is being invoked to +** determine the set of tokens to add to (or delete from) the +** FTS index. +** +**
    • FTS5_TOKENIZE_QUERY - A MATCH query is being executed +** against the FTS index. The tokenizer is being called to tokenize +** a bareword or quoted string specified as part of the query. +** +**
    • (FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX) - Same as +** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is +** followed by a "*" character, indicating that the last token +** returned by the tokenizer will be treated as a token prefix. +** +**
    • FTS5_TOKENIZE_AUX - The tokenizer is being invoked to +** satisfy an fts5_api.xTokenize() request made by an auxiliary +** function. Or an fts5_api.xColumnSize() request made by the same +** on a columnsize=0 database. +**
    +** +** For each token in the input string, the supplied callback xToken() must +** be invoked. The first argument to it should be a copy of the pointer +** passed as the second argument to xTokenize(). The third and fourth +** arguments are a pointer to a buffer containing the token text, and the +** size of the token in bytes. The 4th and 5th arguments are the byte offsets +** of the first byte of and first byte immediately following the text from +** which the token is derived within the input. +** +** The second argument passed to the xToken() callback ("tflags") should +** normally be set to 0. The exception is if the tokenizer supports +** synonyms. In this case see the discussion below for details. +** +** FTS5 assumes the xToken() callback is invoked for each token in the +** order that they occur within the input text. +** +** If an xToken() callback returns any value other than SQLITE_OK, then +** the tokenization should be abandoned and the xTokenize() method should +** immediately return a copy of the xToken() return value. Or, if the +** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, +** if an error occurs with the xTokenize() implementation itself, it +** may abandon the tokenization and return any error code other than +** SQLITE_OK or SQLITE_DONE. +** +** SYNONYM SUPPORT +** +** Custom tokenizers may also support synonyms. Consider a case in which a +** user wishes to query for a phrase such as "first place". Using the +** built-in tokenizers, the FTS5 query 'first + place' will match instances +** of "first place" within the document set, but not alternative forms +** such as "1st place". In some applications, it would be better to match +** all instances of "first place" or "1st place" regardless of which form +** the user specified in the MATCH query text. +** +** There are several ways to approach this in FTS5: +** +**
    1. By mapping all synonyms to a single token. In this case, using +** the above example, this means that the tokenizer returns the +** same token for inputs "first" and "1st". Say that token is in +** fact "first", so that when the user inserts the document "I won +** 1st place" entries are added to the index for tokens "i", "won", +** "first" and "place". If the user then queries for '1st + place', +** the tokenizer substitutes "first" for "1st" and the query works +** as expected. +** +**
    2. By querying the index for all synonyms of each query term +** separately. In this case, when tokenizing query text, the +** tokenizer may provide multiple synonyms for a single term +** within the document. FTS5 then queries the index for each +** synonym individually. For example, faced with the query: +** +** +** ... MATCH 'first place' +** +** the tokenizer offers both "1st" and "first" as synonyms for the +** first token in the MATCH query and FTS5 effectively runs a query +** similar to: +** +** +** ... MATCH '(first OR 1st) place' +** +** except that, for the purposes of auxiliary functions, the query +** still appears to contain just two phrases - "(first OR 1st)" +** being treated as a single phrase. +** +**
    3. By adding multiple synonyms for a single term to the FTS index. +** Using this method, when tokenizing document text, the tokenizer +** provides multiple synonyms for each token. So that when a +** document such as "I won first place" is tokenized, entries are +** added to the FTS index for "i", "won", "first", "1st" and +** "place". +** +** This way, even if the tokenizer does not provide synonyms +** when tokenizing query text (it should not - to do so would be +** inefficient), it doesn't matter if the user queries for +** 'first + place' or '1st + place', as there are entries in the +** FTS index corresponding to both forms of the first token. +**
    +** +** Whether it is parsing document or query text, any call to xToken that +** specifies a tflags argument with the FTS5_TOKEN_COLOCATED bit +** is considered to supply a synonym for the previous token. For example, +** when parsing the document "I won first place", a tokenizer that supports +** synonyms would call xToken() 5 times, as follows: +** +** +** xToken(pCtx, 0, "i", 1, 0, 1); +** xToken(pCtx, 0, "won", 3, 2, 5); +** xToken(pCtx, 0, "first", 5, 6, 11); +** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); +** xToken(pCtx, 0, "place", 5, 12, 17); +** +** +** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time +** xToken() is called. Multiple synonyms may be specified for a single token +** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. +** There is no limit to the number of synonyms that may be provided for a +** single token. +** +** In many cases, method (1) above is the best approach. It does not add +** extra data to the FTS index or require FTS5 to query for multiple terms, +** so it is efficient in terms of disk space and query speed. However, it +** does not support prefix queries very well. If, as suggested above, the +** token "first" is substituted for "1st" by the tokenizer, then the query: +** +** +** ... MATCH '1s*' +** +** will not match documents that contain the token "1st" (as the tokenizer +** will probably not map "1s" to any prefix of "first"). +** +** For full prefix support, method (3) may be preferred. In this case, +** because the index contains entries for both "first" and "1st", prefix +** queries such as 'fi*' or '1s*' will match correctly. However, because +** extra entries are added to the FTS index, this method uses more space +** within the database. +** +** Method (2) offers a midpoint between (1) and (3). Using this method, +** a query such as '1s*' will match documents that contain the literal +** token "1st", but not "first" (assuming the tokenizer is not able to +** provide synonyms for prefixes). However, a non-prefix query like '1st' +** will match against "1st" and "first". This method does not require +** extra disk space, as no extra entries are added to the FTS index. +** On the other hand, it may require more CPU cycles to run MATCH queries, +** as separate queries of the FTS index are required for each synonym. +** +** When using methods (2) or (3), it is important that the tokenizer only +** provide synonyms when tokenizing document text (method (2)) or query +** text (method (3)), not both. Doing so will not cause any errors, but is +** inefficient. +*/ +typedef struct Fts5Tokenizer Fts5Tokenizer; +typedef struct fts5_tokenizer fts5_tokenizer; +struct fts5_tokenizer { + int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); + void (*xDelete)(Fts5Tokenizer*); + int (*xTokenize)(Fts5Tokenizer*, + void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ + const char *pText, int nText, + int (*xToken)( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd /* Byte offset of end of token within input text */ + ) + ); +}; + +/* Flags that may be passed as the third argument to xTokenize() */ +#define FTS5_TOKENIZE_QUERY 0x0001 +#define FTS5_TOKENIZE_PREFIX 0x0002 +#define FTS5_TOKENIZE_DOCUMENT 0x0004 +#define FTS5_TOKENIZE_AUX 0x0008 + +/* Flags that may be passed by the tokenizer implementation back to FTS5 +** as the third argument to the supplied xToken callback. */ +#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ + +/* +** END OF CUSTOM TOKENIZERS +*************************************************************************/ + +/************************************************************************* +** FTS5 EXTENSION REGISTRATION API +*/ +typedef struct fts5_api fts5_api; +struct fts5_api { + int iVersion; /* Currently always set to 2 */ + + /* Create a new tokenizer */ + int (*xCreateTokenizer)( + fts5_api *pApi, + const char *zName, + void *pContext, + fts5_tokenizer *pTokenizer, + void (*xDestroy)(void*) + ); + + /* Find an existing tokenizer */ + int (*xFindTokenizer)( + fts5_api *pApi, + const char *zName, + void **ppContext, + fts5_tokenizer *pTokenizer + ); + + /* Create a new auxiliary function */ + int (*xCreateFunction)( + fts5_api *pApi, + const char *zName, + void *pContext, + fts5_extension_function xFunction, + void (*xDestroy)(void*) + ); +}; + +/* +** END OF REGISTRATION API +*************************************************************************/ + +#if 0 +} /* end of the 'extern "C"' block */ +#endif + +#endif /* _FTS5_H */ + +/******** End of fts5.h *********/ + +/************** End of sqlite3.h *********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** Include the configuration header output by 'configure' if we're using the +** autoconf-based build +*/ +#if defined(_HAVE_SQLITE_CONFIG_H) && !defined(SQLITECONFIG_H) +#include "config.h" +#define SQLITECONFIG_H 1 +#endif + +/************** Include sqliteLimit.h in the middle of sqliteInt.h ***********/ +/************** Begin file sqliteLimit.h *************************************/ +/* +** 2007 May 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file defines various limits of what SQLite can process. +*/ + +/* +** The maximum length of a TEXT or BLOB in bytes. This also +** limits the size of a row in a table or index. +** +** The hard limit is the ability of a 32-bit signed integer +** to count the size: 2^31-1 or 2147483647. +*/ +#ifndef SQLITE_MAX_LENGTH +# define SQLITE_MAX_LENGTH 1000000000 +#endif + +/* +** This is the maximum number of +** +** * Columns in a table +** * Columns in an index +** * Columns in a view +** * Terms in the SET clause of an UPDATE statement +** * Terms in the result set of a SELECT statement +** * Terms in the GROUP BY or ORDER BY clauses of a SELECT statement. +** * Terms in the VALUES clause of an INSERT statement +** +** The hard upper limit here is 32676. Most database people will +** tell you that in a well-normalized database, you usually should +** not have more than a dozen or so columns in any table. And if +** that is the case, there is no point in having more than a few +** dozen values in any of the other situations described above. +*/ +#ifndef SQLITE_MAX_COLUMN +# define SQLITE_MAX_COLUMN 2000 +#endif + +/* +** The maximum length of a single SQL statement in bytes. +** +** It used to be the case that setting this value to zero would +** turn the limit off. That is no longer true. It is not possible +** to turn this limit off. +*/ +#ifndef SQLITE_MAX_SQL_LENGTH +# define SQLITE_MAX_SQL_LENGTH 1000000000 +#endif + +/* +** The maximum depth of an expression tree. This is limited to +** some extent by SQLITE_MAX_SQL_LENGTH. But sometime you might +** want to place more severe limits on the complexity of an +** expression. A value of 0 means that there is no limit. +*/ +#ifndef SQLITE_MAX_EXPR_DEPTH +# define SQLITE_MAX_EXPR_DEPTH 1000 +#endif + +/* +** The maximum number of terms in a compound SELECT statement. +** The code generator for compound SELECT statements does one +** level of recursion for each term. A stack overflow can result +** if the number of terms is too large. In practice, most SQL +** never has more than 3 or 4 terms. Use a value of 0 to disable +** any limit on the number of terms in a compount SELECT. +*/ +#ifndef SQLITE_MAX_COMPOUND_SELECT +# define SQLITE_MAX_COMPOUND_SELECT 500 +#endif + +/* +** The maximum number of opcodes in a VDBE program. +** Not currently enforced. +*/ +#ifndef SQLITE_MAX_VDBE_OP +# define SQLITE_MAX_VDBE_OP 250000000 +#endif + +/* +** The maximum number of arguments to an SQL function. +*/ +#ifndef SQLITE_MAX_FUNCTION_ARG +# define SQLITE_MAX_FUNCTION_ARG 127 +#endif + +/* +** The suggested maximum number of in-memory pages to use for +** the main database table and for temporary tables. +** +** IMPLEMENTATION-OF: R-30185-15359 The default suggested cache size is -2000, +** which means the cache size is limited to 2048000 bytes of memory. +** IMPLEMENTATION-OF: R-48205-43578 The default suggested cache size can be +** altered using the SQLITE_DEFAULT_CACHE_SIZE compile-time options. +*/ +#ifndef SQLITE_DEFAULT_CACHE_SIZE +# define SQLITE_DEFAULT_CACHE_SIZE -2000 +#endif + +/* +** The default number of frames to accumulate in the log file before +** checkpointing the database in WAL mode. +*/ +#ifndef SQLITE_DEFAULT_WAL_AUTOCHECKPOINT +# define SQLITE_DEFAULT_WAL_AUTOCHECKPOINT 1000 +#endif + +/* +** The maximum number of attached databases. This must be between 0 +** and 125. The upper bound of 125 is because the attached databases are +** counted using a signed 8-bit integer which has a maximum value of 127 +** and we have to allow 2 extra counts for the "main" and "temp" databases. +*/ +#ifndef SQLITE_MAX_ATTACHED +# define SQLITE_MAX_ATTACHED 10 +#endif + + +/* +** The maximum value of a ?nnn wildcard that the parser will accept. +** If the value exceeds 32767 then extra space is required for the Expr +** structure. But otherwise, we believe that the number can be as large +** as a signed 32-bit integer can hold. +*/ +#ifndef SQLITE_MAX_VARIABLE_NUMBER +# define SQLITE_MAX_VARIABLE_NUMBER 32766 +#endif + +/* Maximum page size. The upper bound on this value is 65536. This a limit +** imposed by the use of 16-bit offsets within each page. +** +** Earlier versions of SQLite allowed the user to change this value at +** compile time. This is no longer permitted, on the grounds that it creates +** a library that is technically incompatible with an SQLite library +** compiled with a different limit. If a process operating on a database +** with a page-size of 65536 bytes crashes, then an instance of SQLite +** compiled with the default page-size limit will not be able to rollback +** the aborted transaction. This could lead to database corruption. +*/ +#ifdef SQLITE_MAX_PAGE_SIZE +# undef SQLITE_MAX_PAGE_SIZE +#endif +#define SQLITE_MAX_PAGE_SIZE 65536 + + +/* +** The default size of a database page. +*/ +#ifndef SQLITE_DEFAULT_PAGE_SIZE +# define SQLITE_DEFAULT_PAGE_SIZE 4096 +#endif +#if SQLITE_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE +# undef SQLITE_DEFAULT_PAGE_SIZE +# define SQLITE_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE +#endif + +/* +** Ordinarily, if no value is explicitly provided, SQLite creates databases +** with page size SQLITE_DEFAULT_PAGE_SIZE. However, based on certain +** device characteristics (sector-size and atomic write() support), +** SQLite may choose a larger value. This constant is the maximum value +** SQLite will choose on its own. +*/ +#ifndef SQLITE_MAX_DEFAULT_PAGE_SIZE +# define SQLITE_MAX_DEFAULT_PAGE_SIZE 8192 +#endif +#if SQLITE_MAX_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE +# undef SQLITE_MAX_DEFAULT_PAGE_SIZE +# define SQLITE_MAX_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE +#endif + + +/* +** Maximum number of pages in one database file. +** +** This is really just the default value for the max_page_count pragma. +** This value can be lowered (or raised) at run-time using that the +** max_page_count macro. +*/ +#ifndef SQLITE_MAX_PAGE_COUNT +# define SQLITE_MAX_PAGE_COUNT 1073741823 +#endif + +/* +** Maximum length (in bytes) of the pattern in a LIKE or GLOB +** operator. +*/ +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 +#endif + +/* +** Maximum depth of recursion for triggers. +** +** A value of 1 means that a trigger program will not be able to itself +** fire any triggers. A value of 0 means that no trigger programs at all +** may be executed. +*/ +#ifndef SQLITE_MAX_TRIGGER_DEPTH +# define SQLITE_MAX_TRIGGER_DEPTH 1000 +#endif + +/************** End of sqliteLimit.h *****************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* Disable nuisance warnings on Borland compilers */ +#if defined(__BORLANDC__) +#pragma warn -rch /* unreachable code */ +#pragma warn -ccc /* Condition is always true or false */ +#pragma warn -aus /* Assigned value is never used */ +#pragma warn -csu /* Comparing signed and unsigned */ +#pragma warn -spa /* Suspicious pointer arithmetic */ +#endif + +/* +** WAL mode depends on atomic aligned 32-bit loads and stores in a few +** places. The following macros try to make this explicit. +*/ +#ifndef __has_extension +# define __has_extension(x) 0 /* compatibility with non-clang compilers */ +#endif +#if GCC_VERSION>=4007000 || __has_extension(c_atomic) +# define SQLITE_ATOMIC_INTRINSICS 1 +# define AtomicLoad(PTR) __atomic_load_n((PTR),__ATOMIC_RELAXED) +# define AtomicStore(PTR,VAL) __atomic_store_n((PTR),(VAL),__ATOMIC_RELAXED) +#else +# define SQLITE_ATOMIC_INTRINSICS 0 +# define AtomicLoad(PTR) (*(PTR)) +# define AtomicStore(PTR,VAL) (*(PTR) = (VAL)) +#endif + +/* +** Include standard header files as necessary +*/ +#ifdef HAVE_STDINT_H +#include +#endif +#ifdef HAVE_INTTYPES_H +#include +#endif + +/* +** The following macros are used to cast pointers to integers and +** integers to pointers. The way you do this varies from one compiler +** to the next, so we have developed the following set of #if statements +** to generate appropriate macros for a wide range of compilers. +** +** The correct "ANSI" way to do this is to use the intptr_t type. +** Unfortunately, that typedef is not available on all compilers, or +** if it is available, it requires an #include of specific headers +** that vary from one machine to the next. +** +** Ticket #3860: The llvm-gcc-4.2 compiler from Apple chokes on +** the ((void*)&((char*)0)[X]) construct. But MSVC chokes on ((void*)(X)). +** So we have to define the macros in different ways depending on the +** compiler. +*/ +#if defined(HAVE_STDINT_H) /* Use this case if we have ANSI headers */ +# define SQLITE_INT_TO_PTR(X) ((void*)(intptr_t)(X)) +# define SQLITE_PTR_TO_INT(X) ((int)(intptr_t)(X)) +#elif defined(__PTRDIFF_TYPE__) /* This case should work for GCC */ +# define SQLITE_INT_TO_PTR(X) ((void*)(__PTRDIFF_TYPE__)(X)) +# define SQLITE_PTR_TO_INT(X) ((int)(__PTRDIFF_TYPE__)(X)) +#elif !defined(__GNUC__) /* Works for compilers other than LLVM */ +# define SQLITE_INT_TO_PTR(X) ((void*)&((char*)0)[X]) +# define SQLITE_PTR_TO_INT(X) ((int)(((char*)X)-(char*)0)) +#else /* Generates a warning - but it always works */ +# define SQLITE_INT_TO_PTR(X) ((void*)(X)) +# define SQLITE_PTR_TO_INT(X) ((int)(X)) +#endif + +/* +** A macro to hint to the compiler that a function should not be +** inlined. +*/ +#if defined(__GNUC__) +# define SQLITE_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) && _MSC_VER>=1310 +# define SQLITE_NOINLINE __declspec(noinline) +#else +# define SQLITE_NOINLINE +#endif + +/* +** Make sure that the compiler intrinsics we desire are enabled when +** compiling with an appropriate version of MSVC unless prevented by +** the SQLITE_DISABLE_INTRINSIC define. +*/ +#if !defined(SQLITE_DISABLE_INTRINSIC) +# if defined(_MSC_VER) && _MSC_VER>=1400 +# if !defined(_WIN32_WCE) +# include +# pragma intrinsic(_byteswap_ushort) +# pragma intrinsic(_byteswap_ulong) +# pragma intrinsic(_byteswap_uint64) +# pragma intrinsic(_ReadWriteBarrier) +# else +# include +# endif +# endif +#endif + +/* +** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2. +** 0 means mutexes are permanently disable and the library is never +** threadsafe. 1 means the library is serialized which is the highest +** level of threadsafety. 2 means the library is multithreaded - multiple +** threads can use SQLite as long as no two threads try to use the same +** database connection at the same time. +** +** Older versions of SQLite used an optional THREADSAFE macro. +** We support that for legacy. +** +** To ensure that the correct value of "THREADSAFE" is reported when querying +** for compile-time options at runtime (e.g. "PRAGMA compile_options"), this +** logic is partially replicated in ctime.c. If it is updated here, it should +** also be updated there. +*/ +#if !defined(SQLITE_THREADSAFE) +# if defined(THREADSAFE) +# define SQLITE_THREADSAFE THREADSAFE +# else +# define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */ +# endif +#endif + +/* +** Powersafe overwrite is on by default. But can be turned off using +** the -DSQLITE_POWERSAFE_OVERWRITE=0 command-line option. +*/ +#ifndef SQLITE_POWERSAFE_OVERWRITE +# define SQLITE_POWERSAFE_OVERWRITE 1 +#endif + +/* +** EVIDENCE-OF: R-25715-37072 Memory allocation statistics are enabled by +** default unless SQLite is compiled with SQLITE_DEFAULT_MEMSTATUS=0 in +** which case memory allocation statistics are disabled by default. +*/ +#if !defined(SQLITE_DEFAULT_MEMSTATUS) +# define SQLITE_DEFAULT_MEMSTATUS 1 +#endif + +/* +** Exactly one of the following macros must be defined in order to +** specify which memory allocation subsystem to use. +** +** SQLITE_SYSTEM_MALLOC // Use normal system malloc() +** SQLITE_WIN32_MALLOC // Use Win32 native heap API +** SQLITE_ZERO_MALLOC // Use a stub allocator that always fails +** SQLITE_MEMDEBUG // Debugging version of system malloc() +** +** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the +** assert() macro is enabled, each call into the Win32 native heap subsystem +** will cause HeapValidate to be called. If heap validation should fail, an +** assertion will be triggered. +** +** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as +** the default. +*/ +#if defined(SQLITE_SYSTEM_MALLOC) \ + + defined(SQLITE_WIN32_MALLOC) \ + + defined(SQLITE_ZERO_MALLOC) \ + + defined(SQLITE_MEMDEBUG)>1 +# error "Two or more of the following compile-time configuration options\ + are defined but at most one is allowed:\ + SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG,\ + SQLITE_ZERO_MALLOC" +#endif +#if defined(SQLITE_SYSTEM_MALLOC) \ + + defined(SQLITE_WIN32_MALLOC) \ + + defined(SQLITE_ZERO_MALLOC) \ + + defined(SQLITE_MEMDEBUG)==0 +# define SQLITE_SYSTEM_MALLOC 1 +#endif + +/* +** If SQLITE_MALLOC_SOFT_LIMIT is not zero, then try to keep the +** sizes of memory allocations below this value where possible. +*/ +#if !defined(SQLITE_MALLOC_SOFT_LIMIT) +# define SQLITE_MALLOC_SOFT_LIMIT 1024 +#endif + +/* +** We need to define _XOPEN_SOURCE as follows in order to enable +** recursive mutexes on most Unix systems and fchmod() on OpenBSD. +** But _XOPEN_SOURCE define causes problems for Mac OS X, so omit +** it. +*/ +#if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__) +# define _XOPEN_SOURCE 600 +#endif + +/* +** NDEBUG and SQLITE_DEBUG are opposites. It should always be true that +** defined(NDEBUG)==!defined(SQLITE_DEBUG). If this is not currently true, +** make it true by defining or undefining NDEBUG. +** +** Setting NDEBUG makes the code smaller and faster by disabling the +** assert() statements in the code. So we want the default action +** to be for NDEBUG to be set and NDEBUG to be undefined only if SQLITE_DEBUG +** is set. Thus NDEBUG becomes an opt-in rather than an opt-out +** feature. +*/ +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 +#endif +#if defined(NDEBUG) && defined(SQLITE_DEBUG) +# undef NDEBUG +#endif + +/* +** Enable SQLITE_ENABLE_EXPLAIN_COMMENTS if SQLITE_DEBUG is turned on. +*/ +#if !defined(SQLITE_ENABLE_EXPLAIN_COMMENTS) && defined(SQLITE_DEBUG) +# define SQLITE_ENABLE_EXPLAIN_COMMENTS 1 +#endif + +/* +** The testcase() macro is used to aid in coverage testing. When +** doing coverage testing, the condition inside the argument to +** testcase() must be evaluated both true and false in order to +** get full branch coverage. The testcase() macro is inserted +** to help ensure adequate test coverage in places where simple +** condition/decision coverage is inadequate. For example, testcase() +** can be used to make sure boundary values are tested. For +** bitmask tests, testcase() can be used to make sure each bit +** is significant and used at least once. On switch statements +** where multiple cases go to the same block of code, testcase() +** can insure that all cases are evaluated. +*/ +#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) +# ifndef SQLITE_AMALGAMATION + extern unsigned int sqlite3CoverageCounter; +# endif +# define testcase(X) if( X ){ sqlite3CoverageCounter += (unsigned)__LINE__; } +#else +# define testcase(X) +#endif + +/* +** The TESTONLY macro is used to enclose variable declarations or +** other bits of code that are needed to support the arguments +** within testcase() and assert() macros. +*/ +#if !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST) +# define TESTONLY(X) X +#else +# define TESTONLY(X) +#endif + +/* +** Sometimes we need a small amount of code such as a variable initialization +** to setup for a later assert() statement. We do not want this code to +** appear when assert() is disabled. The following macro is therefore +** used to contain that setup code. The "VVA" acronym stands for +** "Verification, Validation, and Accreditation". In other words, the +** code within VVA_ONLY() will only run during verification processes. +*/ +#ifndef NDEBUG +# define VVA_ONLY(X) X +#else +# define VVA_ONLY(X) +#endif + +/* +** Disable ALWAYS() and NEVER() (make them pass-throughs) for coverage +** and mutation testing +*/ +#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) +# define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 +#endif + +/* +** The ALWAYS and NEVER macros surround boolean expressions which +** are intended to always be true or false, respectively. Such +** expressions could be omitted from the code completely. But they +** are included in a few cases in order to enhance the resilience +** of SQLite to unexpected behavior - to make the code "self-healing" +** or "ductile" rather than being "brittle" and crashing at the first +** hint of unplanned behavior. +** +** In other words, ALWAYS and NEVER are added for defensive code. +** +** When doing coverage testing ALWAYS and NEVER are hard-coded to +** be true and false so that the unreachable code they specify will +** not be counted as untested code. +*/ +#if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) +# define ALWAYS(X) (1) +# define NEVER(X) (0) +#elif !defined(NDEBUG) +# define ALWAYS(X) ((X)?1:(assert(0),0)) +# define NEVER(X) ((X)?(assert(0),1):0) +#else +# define ALWAYS(X) (X) +# define NEVER(X) (X) +#endif + +/* +** Some conditionals are optimizations only. In other words, if the +** conditionals are replaced with a constant 1 (true) or 0 (false) then +** the correct answer is still obtained, though perhaps not as quickly. +** +** The following macros mark these optimizations conditionals. +*/ +#if defined(SQLITE_MUTATION_TEST) +# define OK_IF_ALWAYS_TRUE(X) (1) +# define OK_IF_ALWAYS_FALSE(X) (0) +#else +# define OK_IF_ALWAYS_TRUE(X) (X) +# define OK_IF_ALWAYS_FALSE(X) (X) +#endif + +/* +** Some malloc failures are only possible if SQLITE_TEST_REALLOC_STRESS is +** defined. We need to defend against those failures when testing with +** SQLITE_TEST_REALLOC_STRESS, but we don't want the unreachable branches +** during a normal build. The following macro can be used to disable tests +** that are always false except when SQLITE_TEST_REALLOC_STRESS is set. +*/ +#if defined(SQLITE_TEST_REALLOC_STRESS) +# define ONLY_IF_REALLOC_STRESS(X) (X) +#elif !defined(NDEBUG) +# define ONLY_IF_REALLOC_STRESS(X) ((X)?(assert(0),1):0) +#else +# define ONLY_IF_REALLOC_STRESS(X) (0) +#endif + +/* +** Declarations used for tracing the operating system interfaces. +*/ +#if defined(SQLITE_FORCE_OS_TRACE) || defined(SQLITE_TEST) || \ + (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) + extern int sqlite3OSTrace; +# define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X +# define SQLITE_HAVE_OS_TRACE +#else +# define OSTRACE(X) +# undef SQLITE_HAVE_OS_TRACE +#endif + +/* +** Is the sqlite3ErrName() function needed in the build? Currently, +** it is needed by "mutex_w32.c" (when debugging), "os_win.c" (when +** OSTRACE is enabled), and by several "test*.c" files (which are +** compiled using SQLITE_TEST). +*/ +#if defined(SQLITE_HAVE_OS_TRACE) || defined(SQLITE_TEST) || \ + (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) +# define SQLITE_NEED_ERR_NAME +#else +# undef SQLITE_NEED_ERR_NAME +#endif + +/* +** SQLITE_ENABLE_EXPLAIN_COMMENTS is incompatible with SQLITE_OMIT_EXPLAIN +*/ +#ifdef SQLITE_OMIT_EXPLAIN +# undef SQLITE_ENABLE_EXPLAIN_COMMENTS +#endif + +/* +** SQLITE_OMIT_VIRTUALTABLE implies SQLITE_OMIT_ALTERTABLE +*/ +#if defined(SQLITE_OMIT_VIRTUALTABLE) && !defined(SQLITE_OMIT_ALTERTABLE) +# define SQLITE_OMIT_ALTERTABLE +#endif + +/* +** Return true (non-zero) if the input is an integer that is too large +** to fit in 32-bits. This macro is used inside of various testcase() +** macros to verify that we have tested SQLite for large-file support. +*/ +#define IS_BIG_INT(X) (((X)&~(i64)0xffffffff)!=0) + +/* +** The macro unlikely() is a hint that surrounds a boolean +** expression that is usually false. Macro likely() surrounds +** a boolean expression that is usually true. These hints could, +** in theory, be used by the compiler to generate better code, but +** currently they are just comments for human readers. +*/ +#define likely(X) (X) +#define unlikely(X) (X) + +/************** Include hash.h in the middle of sqliteInt.h ******************/ +/************** Begin file hash.h ********************************************/ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the header file for the generic hash-table implementation +** used in SQLite. +*/ +#ifndef SQLITE_HASH_H +#define SQLITE_HASH_H + +/* Forward declarations of structures. */ +typedef struct Hash Hash; +typedef struct HashElem HashElem; + +/* A complete hash table is an instance of the following structure. +** The internals of this structure are intended to be opaque -- client +** code should not attempt to access or modify the fields of this structure +** directly. Change this structure only by using the routines below. +** However, some of the "procedures" and "functions" for modifying and +** accessing this structure are really macros, so we can't really make +** this structure opaque. +** +** All elements of the hash table are on a single doubly-linked list. +** Hash.first points to the head of this list. +** +** There are Hash.htsize buckets. Each bucket points to a spot in +** the global doubly-linked list. The contents of the bucket are the +** element pointed to plus the next _ht.count-1 elements in the list. +** +** Hash.htsize and Hash.ht may be zero. In that case lookup is done +** by a linear search of the global list. For small tables, the +** Hash.ht table is never allocated because if there are few elements +** in the table, it is faster to do a linear search than to manage +** the hash table. +*/ +struct Hash { + unsigned int htsize; /* Number of buckets in the hash table */ + unsigned int count; /* Number of entries in this table */ + HashElem *first; /* The first element of the array */ + struct _ht { /* the hash table */ + unsigned int count; /* Number of entries with this hash */ + HashElem *chain; /* Pointer to first entry with this hash */ + } *ht; +}; + +/* Each element in the hash table is an instance of the following +** structure. All elements are stored on a single doubly-linked list. +** +** Again, this structure is intended to be opaque, but it can't really +** be opaque because it is used by macros. +*/ +struct HashElem { + HashElem *next, *prev; /* Next and previous elements in the table */ + void *data; /* Data associated with this element */ + const char *pKey; /* Key associated with this element */ +}; + +/* +** Access routines. To delete, insert a NULL pointer. +*/ +SQLITE_PRIVATE void sqlite3HashInit(Hash*); +SQLITE_PRIVATE void *sqlite3HashInsert(Hash*, const char *pKey, void *pData); +SQLITE_PRIVATE void *sqlite3HashFind(const Hash*, const char *pKey); +SQLITE_PRIVATE void sqlite3HashClear(Hash*); + +/* +** Macros for looping over all elements of a hash table. The idiom is +** like this: +** +** Hash h; +** HashElem *p; +** ... +** for(p=sqliteHashFirst(&h); p; p=sqliteHashNext(p)){ +** SomeStructure *pData = sqliteHashData(p); +** // do something with pData +** } +*/ +#define sqliteHashFirst(H) ((H)->first) +#define sqliteHashNext(E) ((E)->next) +#define sqliteHashData(E) ((E)->data) +/* #define sqliteHashKey(E) ((E)->pKey) // NOT USED */ +/* #define sqliteHashKeysize(E) ((E)->nKey) // NOT USED */ + +/* +** Number of entries in a hash table +*/ +#define sqliteHashCount(H) ((H)->count) + +#endif /* SQLITE_HASH_H */ + +/************** End of hash.h ************************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include parse.h in the middle of sqliteInt.h *****************/ +/************** Begin file parse.h *******************************************/ +#define TK_SEMI 1 +#define TK_EXPLAIN 2 +#define TK_QUERY 3 +#define TK_PLAN 4 +#define TK_BEGIN 5 +#define TK_TRANSACTION 6 +#define TK_DEFERRED 7 +#define TK_IMMEDIATE 8 +#define TK_EXCLUSIVE 9 +#define TK_COMMIT 10 +#define TK_END 11 +#define TK_ROLLBACK 12 +#define TK_SAVEPOINT 13 +#define TK_RELEASE 14 +#define TK_TO 15 +#define TK_TABLE 16 +#define TK_CREATE 17 +#define TK_IF 18 +#define TK_NOT 19 +#define TK_EXISTS 20 +#define TK_TEMP 21 +#define TK_LP 22 +#define TK_RP 23 +#define TK_AS 24 +#define TK_COMMA 25 +#define TK_WITHOUT 26 +#define TK_ABORT 27 +#define TK_ACTION 28 +#define TK_AFTER 29 +#define TK_ANALYZE 30 +#define TK_ASC 31 +#define TK_ATTACH 32 +#define TK_BEFORE 33 +#define TK_BY 34 +#define TK_CASCADE 35 +#define TK_CAST 36 +#define TK_CONFLICT 37 +#define TK_DATABASE 38 +#define TK_DESC 39 +#define TK_DETACH 40 +#define TK_EACH 41 +#define TK_FAIL 42 +#define TK_OR 43 +#define TK_AND 44 +#define TK_IS 45 +#define TK_MATCH 46 +#define TK_LIKE_KW 47 +#define TK_BETWEEN 48 +#define TK_IN 49 +#define TK_ISNULL 50 +#define TK_NOTNULL 51 +#define TK_NE 52 +#define TK_EQ 53 +#define TK_GT 54 +#define TK_LE 55 +#define TK_LT 56 +#define TK_GE 57 +#define TK_ESCAPE 58 +#define TK_ID 59 +#define TK_COLUMNKW 60 +#define TK_DO 61 +#define TK_FOR 62 +#define TK_IGNORE 63 +#define TK_INITIALLY 64 +#define TK_INSTEAD 65 +#define TK_NO 66 +#define TK_KEY 67 +#define TK_OF 68 +#define TK_OFFSET 69 +#define TK_PRAGMA 70 +#define TK_RAISE 71 +#define TK_RECURSIVE 72 +#define TK_REPLACE 73 +#define TK_RESTRICT 74 +#define TK_ROW 75 +#define TK_ROWS 76 +#define TK_TRIGGER 77 +#define TK_VACUUM 78 +#define TK_VIEW 79 +#define TK_VIRTUAL 80 +#define TK_WITH 81 +#define TK_NULLS 82 +#define TK_FIRST 83 +#define TK_LAST 84 +#define TK_CURRENT 85 +#define TK_FOLLOWING 86 +#define TK_PARTITION 87 +#define TK_PRECEDING 88 +#define TK_RANGE 89 +#define TK_UNBOUNDED 90 +#define TK_EXCLUDE 91 +#define TK_GROUPS 92 +#define TK_OTHERS 93 +#define TK_TIES 94 +#define TK_GENERATED 95 +#define TK_ALWAYS 96 +#define TK_MATERIALIZED 97 +#define TK_REINDEX 98 +#define TK_RENAME 99 +#define TK_CTIME_KW 100 +#define TK_ANY 101 +#define TK_BITAND 102 +#define TK_BITOR 103 +#define TK_LSHIFT 104 +#define TK_RSHIFT 105 +#define TK_PLUS 106 +#define TK_MINUS 107 +#define TK_STAR 108 +#define TK_SLASH 109 +#define TK_REM 110 +#define TK_CONCAT 111 +#define TK_PTR 112 +#define TK_COLLATE 113 +#define TK_BITNOT 114 +#define TK_ON 115 +#define TK_INDEXED 116 +#define TK_STRING 117 +#define TK_JOIN_KW 118 +#define TK_CONSTRAINT 119 +#define TK_DEFAULT 120 +#define TK_NULL 121 +#define TK_PRIMARY 122 +#define TK_UNIQUE 123 +#define TK_CHECK 124 +#define TK_REFERENCES 125 +#define TK_AUTOINCR 126 +#define TK_INSERT 127 +#define TK_DELETE 128 +#define TK_UPDATE 129 +#define TK_SET 130 +#define TK_DEFERRABLE 131 +#define TK_FOREIGN 132 +#define TK_DROP 133 +#define TK_UNION 134 +#define TK_ALL 135 +#define TK_EXCEPT 136 +#define TK_INTERSECT 137 +#define TK_SELECT 138 +#define TK_VALUES 139 +#define TK_DISTINCT 140 +#define TK_DOT 141 +#define TK_FROM 142 +#define TK_JOIN 143 +#define TK_USING 144 +#define TK_ORDER 145 +#define TK_GROUP 146 +#define TK_HAVING 147 +#define TK_LIMIT 148 +#define TK_WHERE 149 +#define TK_RETURNING 150 +#define TK_INTO 151 +#define TK_NOTHING 152 +#define TK_FLOAT 153 +#define TK_BLOB 154 +#define TK_INTEGER 155 +#define TK_VARIABLE 156 +#define TK_CASE 157 +#define TK_WHEN 158 +#define TK_THEN 159 +#define TK_ELSE 160 +#define TK_INDEX 161 +#define TK_ALTER 162 +#define TK_ADD 163 +#define TK_WINDOW 164 +#define TK_OVER 165 +#define TK_FILTER 166 +#define TK_COLUMN 167 +#define TK_AGG_FUNCTION 168 +#define TK_AGG_COLUMN 169 +#define TK_TRUEFALSE 170 +#define TK_ISNOT 171 +#define TK_FUNCTION 172 +#define TK_UMINUS 173 +#define TK_UPLUS 174 +#define TK_TRUTH 175 +#define TK_REGISTER 176 +#define TK_VECTOR 177 +#define TK_SELECT_COLUMN 178 +#define TK_IF_NULL_ROW 179 +#define TK_ASTERISK 180 +#define TK_SPAN 181 +#define TK_ERROR 182 +#define TK_SPACE 183 +#define TK_ILLEGAL 184 + +/************** End of parse.h ***********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +#include +#include +#include +#include +#include + +/* +** Use a macro to replace memcpy() if compiled with SQLITE_INLINE_MEMCPY. +** This allows better measurements of where memcpy() is used when running +** cachegrind. But this macro version of memcpy() is very slow so it +** should not be used in production. This is a performance measurement +** hack only. +*/ +#ifdef SQLITE_INLINE_MEMCPY +# define memcpy(D,S,N) {char*xxd=(char*)(D);const char*xxs=(const char*)(S);\ + int xxn=(N);while(xxn-->0)*(xxd++)=*(xxs++);} +#endif + +/* +** If compiling for a processor that lacks floating point support, +** substitute integer for floating-point +*/ +#ifdef SQLITE_OMIT_FLOATING_POINT +# define double sqlite_int64 +# define float sqlite_int64 +# define LONGDOUBLE_TYPE sqlite_int64 +# ifndef SQLITE_BIG_DBL +# define SQLITE_BIG_DBL (((sqlite3_int64)1)<<50) +# endif +# define SQLITE_OMIT_DATETIME_FUNCS 1 +# define SQLITE_OMIT_TRACE 1 +# undef SQLITE_MIXED_ENDIAN_64BIT_FLOAT +# undef SQLITE_HAVE_ISNAN +#endif +#ifndef SQLITE_BIG_DBL +# define SQLITE_BIG_DBL (1e99) +#endif + +/* +** OMIT_TEMPDB is set to 1 if SQLITE_OMIT_TEMPDB is defined, or 0 +** afterward. Having this macro allows us to cause the C compiler +** to omit code used by TEMP tables without messy #ifndef statements. +*/ +#ifdef SQLITE_OMIT_TEMPDB +#define OMIT_TEMPDB 1 +#else +#define OMIT_TEMPDB 0 +#endif + +/* +** The "file format" number is an integer that is incremented whenever +** the VDBE-level file format changes. The following macros define the +** the default file format for new databases and the maximum file format +** that the library can read. +*/ +#define SQLITE_MAX_FILE_FORMAT 4 +#ifndef SQLITE_DEFAULT_FILE_FORMAT +# define SQLITE_DEFAULT_FILE_FORMAT 4 +#endif + +/* +** Determine whether triggers are recursive by default. This can be +** changed at run-time using a pragma. +*/ +#ifndef SQLITE_DEFAULT_RECURSIVE_TRIGGERS +# define SQLITE_DEFAULT_RECURSIVE_TRIGGERS 0 +#endif + +/* +** Provide a default value for SQLITE_TEMP_STORE in case it is not specified +** on the command-line +*/ +#ifndef SQLITE_TEMP_STORE +# define SQLITE_TEMP_STORE 1 +#endif + +/* +** If no value has been provided for SQLITE_MAX_WORKER_THREADS, or if +** SQLITE_TEMP_STORE is set to 3 (never use temporary files), set it +** to zero. +*/ +#if SQLITE_TEMP_STORE==3 || SQLITE_THREADSAFE==0 +# undef SQLITE_MAX_WORKER_THREADS +# define SQLITE_MAX_WORKER_THREADS 0 +#endif +#ifndef SQLITE_MAX_WORKER_THREADS +# define SQLITE_MAX_WORKER_THREADS 8 +#endif +#ifndef SQLITE_DEFAULT_WORKER_THREADS +# define SQLITE_DEFAULT_WORKER_THREADS 0 +#endif +#if SQLITE_DEFAULT_WORKER_THREADS>SQLITE_MAX_WORKER_THREADS +# undef SQLITE_MAX_WORKER_THREADS +# define SQLITE_MAX_WORKER_THREADS SQLITE_DEFAULT_WORKER_THREADS +#endif + +/* +** The default initial allocation for the pagecache when using separate +** pagecaches for each database connection. A positive number is the +** number of pages. A negative number N translations means that a buffer +** of -1024*N bytes is allocated and used for as many pages as it will hold. +** +** The default value of "20" was chosen to minimize the run-time of the +** speedtest1 test program with options: --shrink-memory --reprepare +*/ +#ifndef SQLITE_DEFAULT_PCACHE_INITSZ +# define SQLITE_DEFAULT_PCACHE_INITSZ 20 +#endif + +/* +** Default value for the SQLITE_CONFIG_SORTERREF_SIZE option. +*/ +#ifndef SQLITE_DEFAULT_SORTERREF_SIZE +# define SQLITE_DEFAULT_SORTERREF_SIZE 0x7fffffff +#endif + +/* +** The compile-time options SQLITE_MMAP_READWRITE and +** SQLITE_ENABLE_BATCH_ATOMIC_WRITE are not compatible with one another. +** You must choose one or the other (or neither) but not both. +*/ +#if defined(SQLITE_MMAP_READWRITE) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +#error Cannot use both SQLITE_MMAP_READWRITE and SQLITE_ENABLE_BATCH_ATOMIC_WRITE +#endif + +/* +** GCC does not define the offsetof() macro so we'll have to do it +** ourselves. +*/ +#ifndef offsetof +#define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD)) +#endif + +/* +** Macros to compute minimum and maximum of two numbers. +*/ +#ifndef MIN +# define MIN(A,B) ((A)<(B)?(A):(B)) +#endif +#ifndef MAX +# define MAX(A,B) ((A)>(B)?(A):(B)) +#endif + +/* +** Swap two objects of type TYPE. +*/ +#define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;} + +/* +** Check to see if this machine uses EBCDIC. (Yes, believe it or +** not, there are still machines out there that use EBCDIC.) +*/ +#if 'A' == '\301' +# define SQLITE_EBCDIC 1 +#else +# define SQLITE_ASCII 1 +#endif + +/* +** Integers of known sizes. These typedefs might change for architectures +** where the sizes very. Preprocessor macros are available so that the +** types can be conveniently redefined at compile-type. Like this: +** +** cc '-DUINTPTR_TYPE=long long int' ... +*/ +#ifndef UINT32_TYPE +# ifdef HAVE_UINT32_T +# define UINT32_TYPE uint32_t +# else +# define UINT32_TYPE unsigned int +# endif +#endif +#ifndef UINT16_TYPE +# ifdef HAVE_UINT16_T +# define UINT16_TYPE uint16_t +# else +# define UINT16_TYPE unsigned short int +# endif +#endif +#ifndef INT16_TYPE +# ifdef HAVE_INT16_T +# define INT16_TYPE int16_t +# else +# define INT16_TYPE short int +# endif +#endif +#ifndef UINT8_TYPE +# ifdef HAVE_UINT8_T +# define UINT8_TYPE uint8_t +# else +# define UINT8_TYPE unsigned char +# endif +#endif +#ifndef INT8_TYPE +# ifdef HAVE_INT8_T +# define INT8_TYPE int8_t +# else +# define INT8_TYPE signed char +# endif +#endif +#ifndef LONGDOUBLE_TYPE +# define LONGDOUBLE_TYPE long double +#endif +typedef sqlite_int64 i64; /* 8-byte signed integer */ +typedef sqlite_uint64 u64; /* 8-byte unsigned integer */ +typedef UINT32_TYPE u32; /* 4-byte unsigned integer */ +typedef UINT16_TYPE u16; /* 2-byte unsigned integer */ +typedef INT16_TYPE i16; /* 2-byte signed integer */ +typedef UINT8_TYPE u8; /* 1-byte unsigned integer */ +typedef INT8_TYPE i8; /* 1-byte signed integer */ + +/* +** SQLITE_MAX_U32 is a u64 constant that is the maximum u64 value +** that can be stored in a u32 without loss of data. The value +** is 0x00000000ffffffff. But because of quirks of some compilers, we +** have to specify the value in the less intuitive manner shown: +*/ +#define SQLITE_MAX_U32 ((((u64)1)<<32)-1) + +/* +** The datatype used to store estimates of the number of rows in a +** table or index. This is an unsigned integer type. For 99.9% of +** the world, a 32-bit integer is sufficient. But a 64-bit integer +** can be used at compile-time if desired. +*/ +#ifdef SQLITE_64BIT_STATS + typedef u64 tRowcnt; /* 64-bit only if requested at compile-time */ +#else + typedef u32 tRowcnt; /* 32-bit is the default */ +#endif + +/* +** Estimated quantities used for query planning are stored as 16-bit +** logarithms. For quantity X, the value stored is 10*log2(X). This +** gives a possible range of values of approximately 1.0e986 to 1e-986. +** But the allowed values are "grainy". Not every value is representable. +** For example, quantities 16 and 17 are both represented by a LogEst +** of 40. However, since LogEst quantities are suppose to be estimates, +** not exact values, this imprecision is not a problem. +** +** "LogEst" is short for "Logarithmic Estimate". +** +** Examples: +** 1 -> 0 20 -> 43 10000 -> 132 +** 2 -> 10 25 -> 46 25000 -> 146 +** 3 -> 16 100 -> 66 1000000 -> 199 +** 4 -> 20 1000 -> 99 1048576 -> 200 +** 10 -> 33 1024 -> 100 4294967296 -> 320 +** +** The LogEst can be negative to indicate fractional values. +** Examples: +** +** 0.5 -> -10 0.1 -> -33 0.0625 -> -40 +*/ +typedef INT16_TYPE LogEst; + +/* +** Set the SQLITE_PTRSIZE macro to the number of bytes in a pointer +*/ +#ifndef SQLITE_PTRSIZE +# if defined(__SIZEOF_POINTER__) +# define SQLITE_PTRSIZE __SIZEOF_POINTER__ +# elif defined(i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(_M_ARM) || defined(__arm__) || defined(__x86) || \ + (defined(__APPLE__) && defined(__POWERPC__)) || \ + (defined(__TOS_AIX__) && !defined(__64BIT__)) +# define SQLITE_PTRSIZE 4 +# else +# define SQLITE_PTRSIZE 8 +# endif +#endif + +/* The uptr type is an unsigned integer large enough to hold a pointer +*/ +#if defined(HAVE_STDINT_H) + typedef uintptr_t uptr; +#elif SQLITE_PTRSIZE==4 + typedef u32 uptr; +#else + typedef u64 uptr; +#endif + +/* +** The SQLITE_WITHIN(P,S,E) macro checks to see if pointer P points to +** something between S (inclusive) and E (exclusive). +** +** In other words, S is a buffer and E is a pointer to the first byte after +** the end of buffer S. This macro returns true if P points to something +** contained within the buffer S. +*/ +#define SQLITE_WITHIN(P,S,E) (((uptr)(P)>=(uptr)(S))&&((uptr)(P)<(uptr)(E))) + + +/* +** Macros to determine whether the machine is big or little endian, +** and whether or not that determination is run-time or compile-time. +** +** For best performance, an attempt is made to guess at the byte-order +** using C-preprocessor macros. If that is unsuccessful, or if +** -DSQLITE_BYTEORDER=0 is set, then byte-order is determined +** at run-time. +*/ +#ifndef SQLITE_BYTEORDER +# if defined(i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ + defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \ + defined(__ARMEL__) || defined(__AARCH64EL__) || defined(_M_ARM64) +# define SQLITE_BYTEORDER 1234 +# elif defined(sparc) || defined(__ppc__) || \ + defined(__ARMEB__) || defined(__AARCH64EB__) +# define SQLITE_BYTEORDER 4321 +# else +# define SQLITE_BYTEORDER 0 +# endif +#endif +#if SQLITE_BYTEORDER==4321 +# define SQLITE_BIGENDIAN 1 +# define SQLITE_LITTLEENDIAN 0 +# define SQLITE_UTF16NATIVE SQLITE_UTF16BE +#elif SQLITE_BYTEORDER==1234 +# define SQLITE_BIGENDIAN 0 +# define SQLITE_LITTLEENDIAN 1 +# define SQLITE_UTF16NATIVE SQLITE_UTF16LE +#else +# ifdef SQLITE_AMALGAMATION + const int sqlite3one = 1; +# else + extern const int sqlite3one; +# endif +# define SQLITE_BIGENDIAN (*(char *)(&sqlite3one)==0) +# define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1) +# define SQLITE_UTF16NATIVE (SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE) +#endif + +/* +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) +#define LARGEST_UINT64 (0xffffffff|(((u64)0xffffffff)<<32)) +#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) + +/* +** Round up a number to the next larger multiple of 8. This is used +** to force 8-byte alignment on 64-bit architectures. +*/ +#define ROUND8(x) (((x)+7)&~7) + +/* +** Round down to the nearest multiple of 8 +*/ +#define ROUNDDOWN8(x) ((x)&~7) + +/* +** Assert that the pointer X is aligned to an 8-byte boundary. This +** macro is used only within assert() to verify that the code gets +** all alignment restrictions correct. +** +** Except, if SQLITE_4_BYTE_ALIGNED_MALLOC is defined, then the +** underlying malloc() implementation might return us 4-byte aligned +** pointers. In that case, only verify 4-byte alignment. +*/ +#ifdef SQLITE_4_BYTE_ALIGNED_MALLOC +# define EIGHT_BYTE_ALIGNMENT(X) ((((char*)(X) - (char*)0)&3)==0) +#else +# define EIGHT_BYTE_ALIGNMENT(X) ((((char*)(X) - (char*)0)&7)==0) +#endif + +/* +** Disable MMAP on platforms where it is known to not work +*/ +#if defined(__OpenBSD__) || defined(__QNXNTO__) +# undef SQLITE_MAX_MMAP_SIZE +# define SQLITE_MAX_MMAP_SIZE 0 +#endif + +/* +** Default maximum size of memory used by memory-mapped I/O in the VFS +*/ +#ifdef __APPLE__ +# include +#endif +#ifndef SQLITE_MAX_MMAP_SIZE +# if defined(__linux__) \ + || defined(_WIN32) \ + || (defined(__APPLE__) && defined(__MACH__)) \ + || defined(__sun) \ + || defined(__FreeBSD__) \ + || defined(__DragonFly__) +# define SQLITE_MAX_MMAP_SIZE 0x7fff0000 /* 2147418112 */ +# else +# define SQLITE_MAX_MMAP_SIZE 0 +# endif +#endif + +/* +** The default MMAP_SIZE is zero on all platforms. Or, even if a larger +** default MMAP_SIZE is specified at compile-time, make sure that it does +** not exceed the maximum mmap size. +*/ +#ifndef SQLITE_DEFAULT_MMAP_SIZE +# define SQLITE_DEFAULT_MMAP_SIZE 0 +#endif +#if SQLITE_DEFAULT_MMAP_SIZE>SQLITE_MAX_MMAP_SIZE +# undef SQLITE_DEFAULT_MMAP_SIZE +# define SQLITE_DEFAULT_MMAP_SIZE SQLITE_MAX_MMAP_SIZE +#endif + +/* +** SELECTTRACE_ENABLED will be either 1 or 0 depending on whether or not +** the Select query generator tracing logic is turned on. +*/ +#if !defined(SQLITE_AMALGAMATION) +SQLITE_PRIVATE u32 sqlite3SelectTrace; +#endif +#if defined(SQLITE_DEBUG) \ + && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_SELECTTRACE)) +# define SELECTTRACE_ENABLED 1 +# define SELECTTRACE(K,P,S,X) \ + if(sqlite3SelectTrace&(K)) \ + sqlite3DebugPrintf("%u/%d/%p: ",(S)->selId,(P)->addrExplain,(S)),\ + sqlite3DebugPrintf X +#else +# define SELECTTRACE(K,P,S,X) +# define SELECTTRACE_ENABLED 0 +#endif + +/* +** Macros for "wheretrace" +*/ +SQLITE_PRIVATE u32 sqlite3WhereTrace; +#if defined(SQLITE_DEBUG) \ + && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_WHERETRACE)) +# define WHERETRACE(K,X) if(sqlite3WhereTrace&(K)) sqlite3DebugPrintf X +# define WHERETRACE_ENABLED 1 +#else +# define WHERETRACE(K,X) +#endif + + +/* +** An instance of the following structure is used to store the busy-handler +** callback for a given sqlite handle. +** +** The sqlite.busyHandler member of the sqlite struct contains the busy +** callback for the database handle. Each pager opened via the sqlite +** handle is passed a pointer to sqlite.busyHandler. The busy-handler +** callback is currently invoked only from within pager.c. +*/ +typedef struct BusyHandler BusyHandler; +struct BusyHandler { + int (*xBusyHandler)(void *,int); /* The busy callback */ + void *pBusyArg; /* First arg to busy callback */ + int nBusy; /* Incremented with each busy call */ +}; + +/* +** Name of table that holds the database schema. +** +** The PREFERRED names are used whereever possible. But LEGACY is also +** used for backwards compatibility. +** +** 1. Queries can use either the PREFERRED or the LEGACY names +** 2. The sqlite3_set_authorizer() callback uses the LEGACY name +** 3. The PRAGMA table_list statement uses the PREFERRED name +** +** The LEGACY names are stored in the internal symbol hash table +** in support of (2). Names are translated using sqlite3PreferredTableName() +** for (3). The sqlite3FindTable() function takes care of translating +** names for (1). +** +** Note that "sqlite_temp_schema" can also be called "temp.sqlite_schema". +*/ +#define LEGACY_SCHEMA_TABLE "sqlite_master" +#define LEGACY_TEMP_SCHEMA_TABLE "sqlite_temp_master" +#define PREFERRED_SCHEMA_TABLE "sqlite_schema" +#define PREFERRED_TEMP_SCHEMA_TABLE "sqlite_temp_schema" + + +/* +** The root-page of the schema table. +*/ +#define SCHEMA_ROOT 1 + +/* +** The name of the schema table. The name is different for TEMP. +*/ +#define SCHEMA_TABLE(x) \ + ((!OMIT_TEMPDB)&&(x==1)?LEGACY_TEMP_SCHEMA_TABLE:LEGACY_SCHEMA_TABLE) + +/* +** A convenience macro that returns the number of elements in +** an array. +*/ +#define ArraySize(X) ((int)(sizeof(X)/sizeof(X[0]))) + +/* +** Determine if the argument is a power of two +*/ +#define IsPowerOfTwo(X) (((X)&((X)-1))==0) + +/* +** The following value as a destructor means to use sqlite3DbFree(). +** The sqlite3DbFree() routine requires two parameters instead of the +** one parameter that destructors normally want. So we have to introduce +** this magic value that the code knows to handle differently. Any +** pointer will work here as long as it is distinct from SQLITE_STATIC +** and SQLITE_TRANSIENT. +*/ +#define SQLITE_DYNAMIC ((sqlite3_destructor_type)sqlite3OomFault) + +/* +** When SQLITE_OMIT_WSD is defined, it means that the target platform does +** not support Writable Static Data (WSD) such as global and static variables. +** All variables must either be on the stack or dynamically allocated from +** the heap. When WSD is unsupported, the variable declarations scattered +** throughout the SQLite code must become constants instead. The SQLITE_WSD +** macro is used for this purpose. And instead of referencing the variable +** directly, we use its constant as a key to lookup the run-time allocated +** buffer that holds real variable. The constant is also the initializer +** for the run-time allocated buffer. +** +** In the usual case where WSD is supported, the SQLITE_WSD and GLOBAL +** macros become no-ops and have zero performance impact. +*/ +#ifdef SQLITE_OMIT_WSD + #define SQLITE_WSD const + #define GLOBAL(t,v) (*(t*)sqlite3_wsd_find((void*)&(v), sizeof(v))) + #define sqlite3GlobalConfig GLOBAL(struct Sqlite3Config, sqlite3Config) +SQLITE_API int sqlite3_wsd_init(int N, int J); +SQLITE_API void *sqlite3_wsd_find(void *K, int L); +#else + #define SQLITE_WSD + #define GLOBAL(t,v) v + #define sqlite3GlobalConfig sqlite3Config +#endif + +/* +** The following macros are used to suppress compiler warnings and to +** make it clear to human readers when a function parameter is deliberately +** left unused within the body of a function. This usually happens when +** a function is called via a function pointer. For example the +** implementation of an SQL aggregate step callback may not use the +** parameter indicating the number of arguments passed to the aggregate, +** if it knows that this is enforced elsewhere. +** +** When a function parameter is not used at all within the body of a function, +** it is generally named "NotUsed" or "NotUsed2" to make things even clearer. +** However, these macros may also be used to suppress warnings related to +** parameters that may or may not be used depending on compilation options. +** For example those parameters only used in assert() statements. In these +** cases the parameters are named as per the usual conventions. +*/ +#define UNUSED_PARAMETER(x) (void)(x) +#define UNUSED_PARAMETER2(x,y) UNUSED_PARAMETER(x),UNUSED_PARAMETER(y) + +/* +** Forward references to structures +*/ +typedef struct AggInfo AggInfo; +typedef struct AuthContext AuthContext; +typedef struct AutoincInfo AutoincInfo; +typedef struct Bitvec Bitvec; +typedef struct CollSeq CollSeq; +typedef struct Column Column; +typedef struct Cte Cte; +typedef struct CteUse CteUse; +typedef struct Db Db; +typedef struct DbFixer DbFixer; +typedef struct Schema Schema; +typedef struct Expr Expr; +typedef struct ExprList ExprList; +typedef struct FKey FKey; +typedef struct FuncDestructor FuncDestructor; +typedef struct FuncDef FuncDef; +typedef struct FuncDefHash FuncDefHash; +typedef struct IdList IdList; +typedef struct Index Index; +typedef struct IndexSample IndexSample; +typedef struct KeyClass KeyClass; +typedef struct KeyInfo KeyInfo; +typedef struct Lookaside Lookaside; +typedef struct LookasideSlot LookasideSlot; +typedef struct Module Module; +typedef struct NameContext NameContext; +typedef struct Parse Parse; +typedef struct ParseCleanup ParseCleanup; +typedef struct PreUpdate PreUpdate; +typedef struct PrintfArguments PrintfArguments; +typedef struct RenameToken RenameToken; +typedef struct Returning Returning; +typedef struct RowSet RowSet; +typedef struct Savepoint Savepoint; +typedef struct Select Select; +typedef struct SQLiteThread SQLiteThread; +typedef struct SelectDest SelectDest; +typedef struct SrcItem SrcItem; +typedef struct SrcList SrcList; +typedef struct sqlite3_str StrAccum; /* Internal alias for sqlite3_str */ +typedef struct Table Table; +typedef struct TableLock TableLock; +typedef struct Token Token; +typedef struct TreeView TreeView; +typedef struct Trigger Trigger; +typedef struct TriggerPrg TriggerPrg; +typedef struct TriggerStep TriggerStep; +typedef struct UnpackedRecord UnpackedRecord; +typedef struct Upsert Upsert; +typedef struct VTable VTable; +typedef struct VtabCtx VtabCtx; +typedef struct Walker Walker; +typedef struct WhereInfo WhereInfo; +typedef struct Window Window; +typedef struct With With; + + +/* +** The bitmask datatype defined below is used for various optimizations. +** +** Changing this from a 64-bit to a 32-bit type limits the number of +** tables in a join to 32 instead of 64. But it also reduces the size +** of the library by 738 bytes on ix86. +*/ +#ifdef SQLITE_BITMASK_TYPE + typedef SQLITE_BITMASK_TYPE Bitmask; +#else + typedef u64 Bitmask; +#endif + +/* +** The number of bits in a Bitmask. "BMS" means "BitMask Size". +*/ +#define BMS ((int)(sizeof(Bitmask)*8)) + +/* +** A bit in a Bitmask +*/ +#define MASKBIT(n) (((Bitmask)1)<<(n)) +#define MASKBIT64(n) (((u64)1)<<(n)) +#define MASKBIT32(n) (((unsigned int)1)<<(n)) +#define SMASKBIT32(n) ((n)<=31?((unsigned int)1)<<(n):0) +#define ALLBITS ((Bitmask)-1) + +/* A VList object records a mapping between parameters/variables/wildcards +** in the SQL statement (such as $abc, @pqr, or :xyz) and the integer +** variable number associated with that parameter. See the format description +** on the sqlite3VListAdd() routine for more information. A VList is really +** just an array of integers. +*/ +typedef int VList; + +/* +** Defer sourcing vdbe.h and btree.h until after the "u8" and +** "BusyHandler" typedefs. vdbe.h also requires a few of the opaque +** pointer types (i.e. FuncDef) defined above. +*/ +/************** Include pager.h in the middle of sqliteInt.h *****************/ +/************** Begin file pager.h *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface that the sqlite page cache +** subsystem. The page cache subsystem reads and writes a file a page +** at a time and provides a journal for rollback. +*/ + +#ifndef SQLITE_PAGER_H +#define SQLITE_PAGER_H + +/* +** Default maximum size for persistent journal files. A negative +** value means no limit. This value may be overridden using the +** sqlite3PagerJournalSizeLimit() API. See also "PRAGMA journal_size_limit". +*/ +#ifndef SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT + #define SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT -1 +#endif + +/* +** The type used to represent a page number. The first page in a file +** is called page 1. 0 is used to represent "not a page". +*/ +typedef u32 Pgno; + +/* +** Each open file is managed by a separate instance of the "Pager" structure. +*/ +typedef struct Pager Pager; + +/* +** Handle type for pages. +*/ +typedef struct PgHdr DbPage; + +/* +** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is +** reserved for working around a windows/posix incompatibility). It is +** used in the journal to signify that the remainder of the journal file +** is devoted to storing a super-journal name - there are no more pages to +** roll back. See comments for function writeSuperJournal() in pager.c +** for details. +*/ +#define PAGER_MJ_PGNO(x) ((Pgno)((PENDING_BYTE/((x)->pageSize))+1)) + +/* +** Allowed values for the flags parameter to sqlite3PagerOpen(). +** +** NOTE: These values must match the corresponding BTREE_ values in btree.h. +*/ +#define PAGER_OMIT_JOURNAL 0x0001 /* Do not use a rollback journal */ +#define PAGER_MEMORY 0x0002 /* In-memory database */ + +/* +** Valid values for the second argument to sqlite3PagerLockingMode(). +*/ +#define PAGER_LOCKINGMODE_QUERY -1 +#define PAGER_LOCKINGMODE_NORMAL 0 +#define PAGER_LOCKINGMODE_EXCLUSIVE 1 + +/* +** Numeric constants that encode the journalmode. +** +** The numeric values encoded here (other than PAGER_JOURNALMODE_QUERY) +** are exposed in the API via the "PRAGMA journal_mode" command and +** therefore cannot be changed without a compatibility break. +*/ +#define PAGER_JOURNALMODE_QUERY (-1) /* Query the value of journalmode */ +#define PAGER_JOURNALMODE_DELETE 0 /* Commit by deleting journal file */ +#define PAGER_JOURNALMODE_PERSIST 1 /* Commit by zeroing journal header */ +#define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ +#define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ +#define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ +#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ + +/* +** Flags that make up the mask passed to sqlite3PagerGet(). +*/ +#define PAGER_GET_NOCONTENT 0x01 /* Do not load data from disk */ +#define PAGER_GET_READONLY 0x02 /* Read-only page is acceptable */ + +/* +** Flags for sqlite3PagerSetFlags() +** +** Value constraints (enforced via assert()): +** PAGER_FULLFSYNC == SQLITE_FullFSync +** PAGER_CKPT_FULLFSYNC == SQLITE_CkptFullFSync +** PAGER_CACHE_SPILL == SQLITE_CacheSpill +*/ +#define PAGER_SYNCHRONOUS_OFF 0x01 /* PRAGMA synchronous=OFF */ +#define PAGER_SYNCHRONOUS_NORMAL 0x02 /* PRAGMA synchronous=NORMAL */ +#define PAGER_SYNCHRONOUS_FULL 0x03 /* PRAGMA synchronous=FULL */ +#define PAGER_SYNCHRONOUS_EXTRA 0x04 /* PRAGMA synchronous=EXTRA */ +#define PAGER_SYNCHRONOUS_MASK 0x07 /* Mask for four values above */ +#define PAGER_FULLFSYNC 0x08 /* PRAGMA fullfsync=ON */ +#define PAGER_CKPT_FULLFSYNC 0x10 /* PRAGMA checkpoint_fullfsync=ON */ +#define PAGER_CACHESPILL 0x20 /* PRAGMA cache_spill=ON */ +#define PAGER_FLAGS_MASK 0x38 /* All above except SYNCHRONOUS */ + +/* +** The remainder of this file contains the declarations of the functions +** that make up the Pager sub-system API. See source code comments for +** a detailed description of each routine. +*/ + +/* Open and close a Pager connection. */ +SQLITE_PRIVATE int sqlite3PagerOpen( + sqlite3_vfs*, + Pager **ppPager, + const char*, + int, + int, + int, + void(*)(DbPage*) +); +SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager, sqlite3*); +SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager*, int, unsigned char*); + +/* Functions used to configure a Pager object. */ +SQLITE_PRIVATE void sqlite3PagerSetBusyHandler(Pager*, int(*)(void *), void *); +SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager*, u32*, int); +SQLITE_PRIVATE Pgno sqlite3PagerMaxPageCount(Pager*, Pgno); +SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager*, int); +SQLITE_PRIVATE int sqlite3PagerSetSpillsize(Pager*, int); +SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64); +SQLITE_PRIVATE void sqlite3PagerShrink(Pager*); +SQLITE_PRIVATE void sqlite3PagerSetFlags(Pager*,unsigned); +SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *, int); +SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *, int); +SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager*); +SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager*); +SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *, i64); +SQLITE_PRIVATE sqlite3_backup **sqlite3PagerBackupPtr(Pager*); +SQLITE_PRIVATE int sqlite3PagerFlush(Pager*); + +/* Functions used to obtain and release page references. */ +SQLITE_PRIVATE int sqlite3PagerGet(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag); +SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno); +SQLITE_PRIVATE void sqlite3PagerRef(DbPage*); +SQLITE_PRIVATE void sqlite3PagerUnref(DbPage*); +SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage*); +SQLITE_PRIVATE void sqlite3PagerUnrefPageOne(DbPage*); + +/* Operations on page references. */ +SQLITE_PRIVATE int sqlite3PagerWrite(DbPage*); +SQLITE_PRIVATE void sqlite3PagerDontWrite(DbPage*); +SQLITE_PRIVATE int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int); +SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage*); +SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *); +SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *); + +/* Functions used to manage pager transactions and savepoints. */ +SQLITE_PRIVATE void sqlite3PagerPagecount(Pager*, int*); +SQLITE_PRIVATE int sqlite3PagerBegin(Pager*, int exFlag, int); +SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(Pager*,const char *zSuper, int); +SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager*); +SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zSuper); +SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager*); +SQLITE_PRIVATE int sqlite3PagerRollback(Pager*); +SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int n); +SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); +SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager); + +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, sqlite3*, int, int*, int*); +SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager); +SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager); +SQLITE_PRIVATE int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); +SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager, sqlite3*); +# ifdef SQLITE_ENABLE_SNAPSHOT +SQLITE_PRIVATE int sqlite3PagerSnapshotGet(Pager*, sqlite3_snapshot **ppSnapshot); +SQLITE_PRIVATE int sqlite3PagerSnapshotOpen(Pager*, sqlite3_snapshot *pSnapshot); +SQLITE_PRIVATE int sqlite3PagerSnapshotRecover(Pager *pPager); +SQLITE_PRIVATE int sqlite3PagerSnapshotCheck(Pager *pPager, sqlite3_snapshot *pSnapshot); +SQLITE_PRIVATE void sqlite3PagerSnapshotUnlock(Pager *pPager); +# endif +#endif + +#if !defined(SQLITE_OMIT_WAL) && defined(SQLITE_ENABLE_SETLK_TIMEOUT) +SQLITE_PRIVATE int sqlite3PagerWalWriteLock(Pager*, int); +SQLITE_PRIVATE void sqlite3PagerWalDb(Pager*, sqlite3*); +#else +# define sqlite3PagerWalWriteLock(y,z) SQLITE_OK +# define sqlite3PagerWalDb(x,y) +#endif + +#ifdef SQLITE_DIRECT_OVERFLOW_READ +SQLITE_PRIVATE int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno); +#endif + +#ifdef SQLITE_ENABLE_ZIPVFS +SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager); +#endif + +/* Functions used to query pager state and configuration. */ +SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager*); +SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager*); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3PagerRefcount(Pager*); +#endif +SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager*); +SQLITE_PRIVATE const char *sqlite3PagerFilename(const Pager*, int); +SQLITE_PRIVATE sqlite3_vfs *sqlite3PagerVfs(Pager*); +SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager*); +SQLITE_PRIVATE sqlite3_file *sqlite3PagerJrnlFile(Pager*); +SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager*); +SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager*); +SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager*); +SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *, int, int, int *); +SQLITE_PRIVATE void sqlite3PagerClearCache(Pager*); +SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *); + +/* Functions used to truncate the database file. */ +SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager*,Pgno); + +SQLITE_PRIVATE void sqlite3PagerRekey(DbPage*, Pgno, u16); + +/* Functions to support testing and debugging. */ +#if !defined(NDEBUG) || defined(SQLITE_TEST) +SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage*); +SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage*); +#endif +#ifdef SQLITE_TEST +SQLITE_PRIVATE int *sqlite3PagerStats(Pager*); +SQLITE_PRIVATE void sqlite3PagerRefdump(Pager*); + void disable_simulated_io_errors(void); + void enable_simulated_io_errors(void); +#else +# define disable_simulated_io_errors() +# define enable_simulated_io_errors() +#endif + +#endif /* SQLITE_PAGER_H */ + +/************** End of pager.h ***********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include btree.h in the middle of sqliteInt.h *****************/ +/************** Begin file btree.h *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface that the sqlite B-Tree file +** subsystem. See comments in the source code for a detailed description +** of what each interface routine does. +*/ +#ifndef SQLITE_BTREE_H +#define SQLITE_BTREE_H + +/* TODO: This definition is just included so other modules compile. It +** needs to be revisited. +*/ +#define SQLITE_N_BTREE_META 16 + +/* +** If defined as non-zero, auto-vacuum is enabled by default. Otherwise +** it must be turned on for each database using "PRAGMA auto_vacuum = 1". +*/ +#ifndef SQLITE_DEFAULT_AUTOVACUUM + #define SQLITE_DEFAULT_AUTOVACUUM 0 +#endif + +#define BTREE_AUTOVACUUM_NONE 0 /* Do not do auto-vacuum */ +#define BTREE_AUTOVACUUM_FULL 1 /* Do full auto-vacuum */ +#define BTREE_AUTOVACUUM_INCR 2 /* Incremental vacuum */ + +/* +** Forward declarations of structure +*/ +typedef struct Btree Btree; +typedef struct BtCursor BtCursor; +typedef struct BtShared BtShared; +typedef struct BtreePayload BtreePayload; + + +SQLITE_PRIVATE int sqlite3BtreeOpen( + sqlite3_vfs *pVfs, /* VFS to use with this b-tree */ + const char *zFilename, /* Name of database file to open */ + sqlite3 *db, /* Associated database connection */ + Btree **ppBtree, /* Return open Btree* here */ + int flags, /* Flags */ + int vfsFlags /* Flags passed through to VFS open */ +); + +/* The flags parameter to sqlite3BtreeOpen can be the bitwise or of the +** following values. +** +** NOTE: These values must match the corresponding PAGER_ values in +** pager.h. +*/ +#define BTREE_OMIT_JOURNAL 1 /* Do not create or use a rollback journal */ +#define BTREE_MEMORY 2 /* This is an in-memory DB */ +#define BTREE_SINGLE 4 /* The file contains at most 1 b-tree */ +#define BTREE_UNORDERED 8 /* Use of a hash implementation is OK */ + +SQLITE_PRIVATE int sqlite3BtreeClose(Btree*); +SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree*,int); +SQLITE_PRIVATE int sqlite3BtreeSetSpillSize(Btree*,int); +#if SQLITE_MAX_MMAP_SIZE>0 +SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64); +#endif +SQLITE_PRIVATE int sqlite3BtreeSetPagerFlags(Btree*,unsigned); +SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix); +SQLITE_PRIVATE int sqlite3BtreeGetPageSize(Btree*); +SQLITE_PRIVATE Pgno sqlite3BtreeMaxPageCount(Btree*,Pgno); +SQLITE_PRIVATE Pgno sqlite3BtreeLastPage(Btree*); +SQLITE_PRIVATE int sqlite3BtreeSecureDelete(Btree*,int); +SQLITE_PRIVATE int sqlite3BtreeGetRequestedReserve(Btree*); +SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p); +SQLITE_PRIVATE int sqlite3BtreeSetAutoVacuum(Btree *, int); +SQLITE_PRIVATE int sqlite3BtreeGetAutoVacuum(Btree *); +SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree*,int,int*); +SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree*, const char*); +SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree*, int); +SQLITE_PRIVATE int sqlite3BtreeCommit(Btree*); +SQLITE_PRIVATE int sqlite3BtreeRollback(Btree*,int,int); +SQLITE_PRIVATE int sqlite3BtreeBeginStmt(Btree*,int); +SQLITE_PRIVATE int sqlite3BtreeCreateTable(Btree*, Pgno*, int flags); +SQLITE_PRIVATE int sqlite3BtreeTxnState(Btree*); +SQLITE_PRIVATE int sqlite3BtreeIsInBackup(Btree*); + +SQLITE_PRIVATE void *sqlite3BtreeSchema(Btree *, int, void(*)(void *)); +SQLITE_PRIVATE int sqlite3BtreeSchemaLocked(Btree *pBtree); +#ifndef SQLITE_OMIT_SHARED_CACHE +SQLITE_PRIVATE int sqlite3BtreeLockTable(Btree *pBtree, int iTab, u8 isWriteLock); +#endif + +/* Savepoints are named, nestable SQL transactions mostly implemented */ +/* in vdbe.c and pager.c See https://sqlite.org/lang_savepoint.html */ +SQLITE_PRIVATE int sqlite3BtreeSavepoint(Btree *, int, int); + +/* "Checkpoint" only refers to WAL. See https://sqlite.org/wal.html#ckpt */ +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3BtreeCheckpoint(Btree*, int, int *, int *); +#endif + +SQLITE_PRIVATE const char *sqlite3BtreeGetFilename(Btree *); +SQLITE_PRIVATE const char *sqlite3BtreeGetJournalname(Btree *); +SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *, Btree *); + +SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *); + +/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR +** of the flags shown below. +** +** Every SQLite table must have either BTREE_INTKEY or BTREE_BLOBKEY set. +** With BTREE_INTKEY, the table key is a 64-bit integer and arbitrary data +** is stored in the leaves. (BTREE_INTKEY is used for SQL tables.) With +** BTREE_BLOBKEY, the key is an arbitrary BLOB and no content is stored +** anywhere - the key is the content. (BTREE_BLOBKEY is used for SQL +** indices.) +*/ +#define BTREE_INTKEY 1 /* Table has only 64-bit signed integer keys */ +#define BTREE_BLOBKEY 2 /* Table has keys only - no data */ + +SQLITE_PRIVATE int sqlite3BtreeDropTable(Btree*, int, int*); +SQLITE_PRIVATE int sqlite3BtreeClearTable(Btree*, int, i64*); +SQLITE_PRIVATE int sqlite3BtreeClearTableOfCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3BtreeTripAllCursors(Btree*, int, int); + +SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *pBtree, int idx, u32 *pValue); +SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree*, int idx, u32 value); + +SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p); + +/* +** The second parameter to sqlite3BtreeGetMeta or sqlite3BtreeUpdateMeta +** should be one of the following values. The integer values are assigned +** to constants so that the offset of the corresponding field in an +** SQLite database header may be found using the following formula: +** +** offset = 36 + (idx * 4) +** +** For example, the free-page-count field is located at byte offset 36 of +** the database file header. The incr-vacuum-flag field is located at +** byte offset 64 (== 36+4*7). +** +** The BTREE_DATA_VERSION value is not really a value stored in the header. +** It is a read-only number computed by the pager. But we merge it with +** the header value access routines since its access pattern is the same. +** Call it a "virtual meta value". +*/ +#define BTREE_FREE_PAGE_COUNT 0 +#define BTREE_SCHEMA_VERSION 1 +#define BTREE_FILE_FORMAT 2 +#define BTREE_DEFAULT_CACHE_SIZE 3 +#define BTREE_LARGEST_ROOT_PAGE 4 +#define BTREE_TEXT_ENCODING 5 +#define BTREE_USER_VERSION 6 +#define BTREE_INCR_VACUUM 7 +#define BTREE_APPLICATION_ID 8 +#define BTREE_DATA_VERSION 15 /* A virtual meta-value */ + +/* +** Kinds of hints that can be passed into the sqlite3BtreeCursorHint() +** interface. +** +** BTREE_HINT_RANGE (arguments: Expr*, Mem*) +** +** The first argument is an Expr* (which is guaranteed to be constant for +** the lifetime of the cursor) that defines constraints on which rows +** might be fetched with this cursor. The Expr* tree may contain +** TK_REGISTER nodes that refer to values stored in the array of registers +** passed as the second parameter. In other words, if Expr.op==TK_REGISTER +** then the value of the node is the value in Mem[pExpr.iTable]. Any +** TK_COLUMN node in the expression tree refers to the Expr.iColumn-th +** column of the b-tree of the cursor. The Expr tree will not contain +** any function calls nor subqueries nor references to b-trees other than +** the cursor being hinted. +** +** The design of the _RANGE hint is aid b-tree implementations that try +** to prefetch content from remote machines - to provide those +** implementations with limits on what needs to be prefetched and thereby +** reduce network bandwidth. +** +** Note that BTREE_HINT_FLAGS with BTREE_BULKLOAD is the only hint used by +** standard SQLite. The other hints are provided for extentions that use +** the SQLite parser and code generator but substitute their own storage +** engine. +*/ +#define BTREE_HINT_RANGE 0 /* Range constraints on queries */ + +/* +** Values that may be OR'd together to form the argument to the +** BTREE_HINT_FLAGS hint for sqlite3BtreeCursorHint(): +** +** The BTREE_BULKLOAD flag is set on index cursors when the index is going +** to be filled with content that is already in sorted order. +** +** The BTREE_SEEK_EQ flag is set on cursors that will get OP_SeekGE or +** OP_SeekLE opcodes for a range search, but where the range of entries +** selected will all have the same key. In other words, the cursor will +** be used only for equality key searches. +** +*/ +#define BTREE_BULKLOAD 0x00000001 /* Used to full index in sorted order */ +#define BTREE_SEEK_EQ 0x00000002 /* EQ seeks only - no range seeks */ + +/* +** Flags passed as the third argument to sqlite3BtreeCursor(). +** +** For read-only cursors the wrFlag argument is always zero. For read-write +** cursors it may be set to either (BTREE_WRCSR|BTREE_FORDELETE) or just +** (BTREE_WRCSR). If the BTREE_FORDELETE bit is set, then the cursor will +** only be used by SQLite for the following: +** +** * to seek to and then delete specific entries, and/or +** +** * to read values that will be used to create keys that other +** BTREE_FORDELETE cursors will seek to and delete. +** +** The BTREE_FORDELETE flag is an optimization hint. It is not used by +** by this, the native b-tree engine of SQLite, but it is available to +** alternative storage engines that might be substituted in place of this +** b-tree system. For alternative storage engines in which a delete of +** the main table row automatically deletes corresponding index rows, +** the FORDELETE flag hint allows those alternative storage engines to +** skip a lot of work. Namely: FORDELETE cursors may treat all SEEK +** and DELETE operations as no-ops, and any READ operation against a +** FORDELETE cursor may return a null row: 0x01 0x00. +*/ +#define BTREE_WRCSR 0x00000004 /* read-write cursor */ +#define BTREE_FORDELETE 0x00000008 /* Cursor is for seek/delete only */ + +SQLITE_PRIVATE int sqlite3BtreeCursor( + Btree*, /* BTree containing table to open */ + Pgno iTable, /* Index of root page */ + int wrFlag, /* 1 for writing. 0 for read-only */ + struct KeyInfo*, /* First argument to compare function */ + BtCursor *pCursor /* Space to write cursor structure */ +); +SQLITE_PRIVATE BtCursor *sqlite3BtreeFakeValidCursor(void); +SQLITE_PRIVATE int sqlite3BtreeCursorSize(void); +SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor*); +SQLITE_PRIVATE void sqlite3BtreeCursorHintFlags(BtCursor*, unsigned); +#ifdef SQLITE_ENABLE_CURSOR_HINTS +SQLITE_PRIVATE void sqlite3BtreeCursorHint(BtCursor*, int, ...); +#endif + +SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3BtreeTableMoveto( + BtCursor*, + i64 intKey, + int bias, + int *pRes +); +SQLITE_PRIVATE int sqlite3BtreeIndexMoveto( + BtCursor*, + UnpackedRecord *pUnKey, + int *pRes +); +SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor*); +SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor*, int*); +SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor*, u8 flags); + +/* Allowed flags for sqlite3BtreeDelete() and sqlite3BtreeInsert() */ +#define BTREE_SAVEPOSITION 0x02 /* Leave cursor pointing at NEXT or PREV */ +#define BTREE_AUXDELETE 0x04 /* not the primary delete operation */ +#define BTREE_APPEND 0x08 /* Insert is likely an append */ +#define BTREE_PREFORMAT 0x80 /* Inserted data is a preformated cell */ + +/* An instance of the BtreePayload object describes the content of a single +** entry in either an index or table btree. +** +** Index btrees (used for indexes and also WITHOUT ROWID tables) contain +** an arbitrary key and no data. These btrees have pKey,nKey set to the +** key and the pData,nData,nZero fields are uninitialized. The aMem,nMem +** fields give an array of Mem objects that are a decomposition of the key. +** The nMem field might be zero, indicating that no decomposition is available. +** +** Table btrees (used for rowid tables) contain an integer rowid used as +** the key and passed in the nKey field. The pKey field is zero. +** pData,nData hold the content of the new entry. nZero extra zero bytes +** are appended to the end of the content when constructing the entry. +** The aMem,nMem fields are uninitialized for table btrees. +** +** Field usage summary: +** +** Table BTrees Index Btrees +** +** pKey always NULL encoded key +** nKey the ROWID length of pKey +** pData data not used +** aMem not used decomposed key value +** nMem not used entries in aMem +** nData length of pData not used +** nZero extra zeros after pData not used +** +** This object is used to pass information into sqlite3BtreeInsert(). The +** same information used to be passed as five separate parameters. But placing +** the information into this object helps to keep the interface more +** organized and understandable, and it also helps the resulting code to +** run a little faster by using fewer registers for parameter passing. +*/ +struct BtreePayload { + const void *pKey; /* Key content for indexes. NULL for tables */ + sqlite3_int64 nKey; /* Size of pKey for indexes. PRIMARY KEY for tabs */ + const void *pData; /* Data for tables. */ + sqlite3_value *aMem; /* First of nMem value in the unpacked pKey */ + u16 nMem; /* Number of aMem[] value. Might be zero */ + int nData; /* Size of pData. 0 if none. */ + int nZero; /* Extra zero data appended after pData,nData */ +}; + +SQLITE_PRIVATE int sqlite3BtreeInsert(BtCursor*, const BtreePayload *pPayload, + int flags, int seekResult); +SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor*, int *pRes); +SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor*, int *pRes); +SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor*, int flags); +SQLITE_PRIVATE int sqlite3BtreeEof(BtCursor*); +SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor*, int flags); +SQLITE_PRIVATE i64 sqlite3BtreeIntegerKey(BtCursor*); +SQLITE_PRIVATE void sqlite3BtreeCursorPin(BtCursor*); +SQLITE_PRIVATE void sqlite3BtreeCursorUnpin(BtCursor*); +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC +SQLITE_PRIVATE i64 sqlite3BtreeOffset(BtCursor*); +#endif +SQLITE_PRIVATE int sqlite3BtreePayload(BtCursor*, u32 offset, u32 amt, void*); +SQLITE_PRIVATE const void *sqlite3BtreePayloadFetch(BtCursor*, u32 *pAmt); +SQLITE_PRIVATE u32 sqlite3BtreePayloadSize(BtCursor*); +SQLITE_PRIVATE sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor*); + +SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(sqlite3*,Btree*,Pgno*aRoot,int nRoot,int,int*); +SQLITE_PRIVATE struct Pager *sqlite3BtreePager(Btree*); +SQLITE_PRIVATE i64 sqlite3BtreeRowCountEst(BtCursor*); + +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE int sqlite3BtreePayloadChecked(BtCursor*, u32 offset, u32 amt, void*); +SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*); +SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *); +#endif +SQLITE_PRIVATE void sqlite3BtreeClearCursor(BtCursor *); +SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *pBt, int iVersion); +SQLITE_PRIVATE int sqlite3BtreeCursorHasHint(BtCursor*, unsigned int mask); +SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *pBt); +SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void); + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE sqlite3_uint64 sqlite3BtreeSeekCount(Btree*); +#else +# define sqlite3BtreeSeekCount(X) 0 +#endif + +#ifndef NDEBUG +SQLITE_PRIVATE int sqlite3BtreeCursorIsValid(BtCursor*); +#endif +SQLITE_PRIVATE int sqlite3BtreeCursorIsValidNN(BtCursor*); + +SQLITE_PRIVATE int sqlite3BtreeCount(sqlite3*, BtCursor*, i64*); + +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3BtreeCursorInfo(BtCursor*, int*, int); +SQLITE_PRIVATE void sqlite3BtreeCursorList(Btree*); +#endif + +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3BtreeCheckpoint(Btree*, int, int *, int *); +#endif + +SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor*, BtCursor*, i64); + +/* +** If we are not using shared cache, then there is no need to +** use mutexes to access the BtShared structures. So make the +** Enter and Leave procedures no-ops. +*/ +#ifndef SQLITE_OMIT_SHARED_CACHE +SQLITE_PRIVATE void sqlite3BtreeEnter(Btree*); +SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3*); +SQLITE_PRIVATE int sqlite3BtreeSharable(Btree*); +SQLITE_PRIVATE void sqlite3BtreeEnterCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3BtreeConnectionCount(Btree*); +#else +# define sqlite3BtreeEnter(X) +# define sqlite3BtreeEnterAll(X) +# define sqlite3BtreeSharable(X) 0 +# define sqlite3BtreeEnterCursor(X) +# define sqlite3BtreeConnectionCount(X) 1 +#endif + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE +SQLITE_PRIVATE void sqlite3BtreeLeave(Btree*); +SQLITE_PRIVATE void sqlite3BtreeLeaveCursor(BtCursor*); +SQLITE_PRIVATE void sqlite3BtreeLeaveAll(sqlite3*); +#ifndef NDEBUG + /* These routines are used inside assert() statements only. */ +SQLITE_PRIVATE int sqlite3BtreeHoldsMutex(Btree*); +SQLITE_PRIVATE int sqlite3BtreeHoldsAllMutexes(sqlite3*); +SQLITE_PRIVATE int sqlite3SchemaMutexHeld(sqlite3*,int,Schema*); +#endif +#else + +# define sqlite3BtreeLeave(X) +# define sqlite3BtreeLeaveCursor(X) +# define sqlite3BtreeLeaveAll(X) + +# define sqlite3BtreeHoldsMutex(X) 1 +# define sqlite3BtreeHoldsAllMutexes(X) 1 +# define sqlite3SchemaMutexHeld(X,Y,Z) 1 +#endif + + +#endif /* SQLITE_BTREE_H */ + +/************** End of btree.h ***********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include vdbe.h in the middle of sqliteInt.h ******************/ +/************** Begin file vdbe.h ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Header file for the Virtual DataBase Engine (VDBE) +** +** This header defines the interface to the virtual database engine +** or VDBE. The VDBE implements an abstract machine that runs a +** simple program to access and modify the underlying database. +*/ +#ifndef SQLITE_VDBE_H +#define SQLITE_VDBE_H +/* #include */ + +/* +** A single VDBE is an opaque structure named "Vdbe". Only routines +** in the source file sqliteVdbe.c are allowed to see the insides +** of this structure. +*/ +typedef struct Vdbe Vdbe; + +/* +** The names of the following types declared in vdbeInt.h are required +** for the VdbeOp definition. +*/ +typedef struct sqlite3_value Mem; +typedef struct SubProgram SubProgram; + +/* +** A single instruction of the virtual machine has an opcode +** and as many as three operands. The instruction is recorded +** as an instance of the following structure: +*/ +struct VdbeOp { + u8 opcode; /* What operation to perform */ + signed char p4type; /* One of the P4_xxx constants for p4 */ + u16 p5; /* Fifth parameter is an unsigned 16-bit integer */ + int p1; /* First operand */ + int p2; /* Second parameter (often the jump destination) */ + int p3; /* The third parameter */ + union p4union { /* fourth parameter */ + int i; /* Integer value if p4type==P4_INT32 */ + void *p; /* Generic pointer */ + char *z; /* Pointer to data for string (char array) types */ + i64 *pI64; /* Used when p4type is P4_INT64 */ + double *pReal; /* Used when p4type is P4_REAL */ + FuncDef *pFunc; /* Used when p4type is P4_FUNCDEF */ + sqlite3_context *pCtx; /* Used when p4type is P4_FUNCCTX */ + CollSeq *pColl; /* Used when p4type is P4_COLLSEQ */ + Mem *pMem; /* Used when p4type is P4_MEM */ + VTable *pVtab; /* Used when p4type is P4_VTAB */ + KeyInfo *pKeyInfo; /* Used when p4type is P4_KEYINFO */ + u32 *ai; /* Used when p4type is P4_INTARRAY */ + SubProgram *pProgram; /* Used when p4type is P4_SUBPROGRAM */ + Table *pTab; /* Used when p4type is P4_TABLE */ +#ifdef SQLITE_ENABLE_CURSOR_HINTS + Expr *pExpr; /* Used when p4type is P4_EXPR */ +#endif + int (*xAdvance)(BtCursor *, int); + } p4; +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + char *zComment; /* Comment to improve readability */ +#endif +#ifdef VDBE_PROFILE + u32 cnt; /* Number of times this instruction was executed */ + u64 cycles; /* Total time spent executing this instruction */ +#endif +#ifdef SQLITE_VDBE_COVERAGE + u32 iSrcLine; /* Source-code line that generated this opcode + ** with flags in the upper 8 bits */ +#endif +}; +typedef struct VdbeOp VdbeOp; + + +/* +** A sub-routine used to implement a trigger program. +*/ +struct SubProgram { + VdbeOp *aOp; /* Array of opcodes for sub-program */ + int nOp; /* Elements in aOp[] */ + int nMem; /* Number of memory cells required */ + int nCsr; /* Number of cursors required */ + u8 *aOnce; /* Array of OP_Once flags */ + void *token; /* id that may be used to recursive triggers */ + SubProgram *pNext; /* Next sub-program already visited */ +}; + +/* +** A smaller version of VdbeOp used for the VdbeAddOpList() function because +** it takes up less space. +*/ +struct VdbeOpList { + u8 opcode; /* What operation to perform */ + signed char p1; /* First operand */ + signed char p2; /* Second parameter (often the jump destination) */ + signed char p3; /* Third parameter */ +}; +typedef struct VdbeOpList VdbeOpList; + +/* +** Allowed values of VdbeOp.p4type +*/ +#define P4_NOTUSED 0 /* The P4 parameter is not used */ +#define P4_TRANSIENT 0 /* P4 is a pointer to a transient string */ +#define P4_STATIC (-1) /* Pointer to a static string */ +#define P4_COLLSEQ (-2) /* P4 is a pointer to a CollSeq structure */ +#define P4_INT32 (-3) /* P4 is a 32-bit signed integer */ +#define P4_SUBPROGRAM (-4) /* P4 is a pointer to a SubProgram structure */ +#define P4_ADVANCE (-5) /* P4 is a pointer to BtreeNext() or BtreePrev() */ +#define P4_TABLE (-6) /* P4 is a pointer to a Table structure */ +/* Above do not own any resources. Must free those below */ +#define P4_FREE_IF_LE (-7) +#define P4_DYNAMIC (-7) /* Pointer to memory from sqliteMalloc() */ +#define P4_FUNCDEF (-8) /* P4 is a pointer to a FuncDef structure */ +#define P4_KEYINFO (-9) /* P4 is a pointer to a KeyInfo structure */ +#define P4_EXPR (-10) /* P4 is a pointer to an Expr tree */ +#define P4_MEM (-11) /* P4 is a pointer to a Mem* structure */ +#define P4_VTAB (-12) /* P4 is a pointer to an sqlite3_vtab structure */ +#define P4_REAL (-13) /* P4 is a 64-bit floating point value */ +#define P4_INT64 (-14) /* P4 is a 64-bit signed integer */ +#define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */ +#define P4_FUNCCTX (-16) /* P4 is a pointer to an sqlite3_context object */ +#define P4_DYNBLOB (-17) /* Pointer to memory from sqliteMalloc() */ + +/* Error message codes for OP_Halt */ +#define P5_ConstraintNotNull 1 +#define P5_ConstraintUnique 2 +#define P5_ConstraintCheck 3 +#define P5_ConstraintFK 4 + +/* +** The Vdbe.aColName array contains 5n Mem structures, where n is the +** number of columns of data returned by the statement. +*/ +#define COLNAME_NAME 0 +#define COLNAME_DECLTYPE 1 +#define COLNAME_DATABASE 2 +#define COLNAME_TABLE 3 +#define COLNAME_COLUMN 4 +#ifdef SQLITE_ENABLE_COLUMN_METADATA +# define COLNAME_N 5 /* Number of COLNAME_xxx symbols */ +#else +# ifdef SQLITE_OMIT_DECLTYPE +# define COLNAME_N 1 /* Store only the name */ +# else +# define COLNAME_N 2 /* Store the name and decltype */ +# endif +#endif + +/* +** The following macro converts a label returned by sqlite3VdbeMakeLabel() +** into an index into the Parse.aLabel[] array that contains the resolved +** address of that label. +*/ +#define ADDR(X) (~(X)) + +/* +** The makefile scans the vdbe.c source file and creates the "opcodes.h" +** header file that defines a number for each opcode used by the VDBE. +*/ +/************** Include opcodes.h in the middle of vdbe.h ********************/ +/************** Begin file opcodes.h *****************************************/ +/* Automatically generated. Do not edit */ +/* See the tool/mkopcodeh.tcl script for details */ +#define OP_Savepoint 0 +#define OP_AutoCommit 1 +#define OP_Transaction 2 +#define OP_SorterNext 3 /* jump */ +#define OP_Prev 4 /* jump */ +#define OP_Next 5 /* jump */ +#define OP_Checkpoint 6 +#define OP_JournalMode 7 +#define OP_Vacuum 8 +#define OP_VFilter 9 /* jump, synopsis: iplan=r[P3] zplan='P4' */ +#define OP_VUpdate 10 /* synopsis: data=r[P3@P2] */ +#define OP_Goto 11 /* jump */ +#define OP_Gosub 12 /* jump */ +#define OP_InitCoroutine 13 /* jump */ +#define OP_Yield 14 /* jump */ +#define OP_MustBeInt 15 /* jump */ +#define OP_Jump 16 /* jump */ +#define OP_Once 17 /* jump */ +#define OP_If 18 /* jump */ +#define OP_Not 19 /* same as TK_NOT, synopsis: r[P2]= !r[P1] */ +#define OP_IfNot 20 /* jump */ +#define OP_IsNullOrType 21 /* jump, synopsis: if typeof(r[P1]) IN (P3,5) goto P2 */ +#define OP_IfNullRow 22 /* jump, synopsis: if P1.nullRow then r[P3]=NULL, goto P2 */ +#define OP_SeekLT 23 /* jump, synopsis: key=r[P3@P4] */ +#define OP_SeekLE 24 /* jump, synopsis: key=r[P3@P4] */ +#define OP_SeekGE 25 /* jump, synopsis: key=r[P3@P4] */ +#define OP_SeekGT 26 /* jump, synopsis: key=r[P3@P4] */ +#define OP_IfNotOpen 27 /* jump, synopsis: if( !csr[P1] ) goto P2 */ +#define OP_IfNoHope 28 /* jump, synopsis: key=r[P3@P4] */ +#define OP_NoConflict 29 /* jump, synopsis: key=r[P3@P4] */ +#define OP_NotFound 30 /* jump, synopsis: key=r[P3@P4] */ +#define OP_Found 31 /* jump, synopsis: key=r[P3@P4] */ +#define OP_SeekRowid 32 /* jump, synopsis: intkey=r[P3] */ +#define OP_NotExists 33 /* jump, synopsis: intkey=r[P3] */ +#define OP_Last 34 /* jump */ +#define OP_IfSmaller 35 /* jump */ +#define OP_SorterSort 36 /* jump */ +#define OP_Sort 37 /* jump */ +#define OP_Rewind 38 /* jump */ +#define OP_IdxLE 39 /* jump, synopsis: key=r[P3@P4] */ +#define OP_IdxGT 40 /* jump, synopsis: key=r[P3@P4] */ +#define OP_IdxLT 41 /* jump, synopsis: key=r[P3@P4] */ +#define OP_IdxGE 42 /* jump, synopsis: key=r[P3@P4] */ +#define OP_Or 43 /* same as TK_OR, synopsis: r[P3]=(r[P1] || r[P2]) */ +#define OP_And 44 /* same as TK_AND, synopsis: r[P3]=(r[P1] && r[P2]) */ +#define OP_RowSetRead 45 /* jump, synopsis: r[P3]=rowset(P1) */ +#define OP_RowSetTest 46 /* jump, synopsis: if r[P3] in rowset(P1) goto P2 */ +#define OP_Program 47 /* jump */ +#define OP_FkIfZero 48 /* jump, synopsis: if fkctr[P1]==0 goto P2 */ +#define OP_IfPos 49 /* jump, synopsis: if r[P1]>0 then r[P1]-=P3, goto P2 */ +#define OP_IsNull 50 /* jump, same as TK_ISNULL, synopsis: if r[P1]==NULL goto P2 */ +#define OP_NotNull 51 /* jump, same as TK_NOTNULL, synopsis: if r[P1]!=NULL goto P2 */ +#define OP_Ne 52 /* jump, same as TK_NE, synopsis: IF r[P3]!=r[P1] */ +#define OP_Eq 53 /* jump, same as TK_EQ, synopsis: IF r[P3]==r[P1] */ +#define OP_Gt 54 /* jump, same as TK_GT, synopsis: IF r[P3]>r[P1] */ +#define OP_Le 55 /* jump, same as TK_LE, synopsis: IF r[P3]<=r[P1] */ +#define OP_Lt 56 /* jump, same as TK_LT, synopsis: IF r[P3]=r[P1] */ +#define OP_ElseEq 58 /* jump, same as TK_ESCAPE */ +#define OP_IfNotZero 59 /* jump, synopsis: if r[P1]!=0 then r[P1]--, goto P2 */ +#define OP_DecrJumpZero 60 /* jump, synopsis: if (--r[P1])==0 goto P2 */ +#define OP_IncrVacuum 61 /* jump */ +#define OP_VNext 62 /* jump */ +#define OP_Filter 63 /* jump, synopsis: if key(P3@P4) not in filter(P1) goto P2 */ +#define OP_Init 64 /* jump, synopsis: Start at P2 */ +#define OP_PureFunc 65 /* synopsis: r[P3]=func(r[P2@NP]) */ +#define OP_Function 66 /* synopsis: r[P3]=func(r[P2@NP]) */ +#define OP_Return 67 +#define OP_EndCoroutine 68 +#define OP_HaltIfNull 69 /* synopsis: if r[P3]=null halt */ +#define OP_Halt 70 +#define OP_Integer 71 /* synopsis: r[P2]=P1 */ +#define OP_Int64 72 /* synopsis: r[P2]=P4 */ +#define OP_String 73 /* synopsis: r[P2]='P4' (len=P1) */ +#define OP_Null 74 /* synopsis: r[P2..P3]=NULL */ +#define OP_SoftNull 75 /* synopsis: r[P1]=NULL */ +#define OP_Blob 76 /* synopsis: r[P2]=P4 (len=P1) */ +#define OP_Variable 77 /* synopsis: r[P2]=parameter(P1,P4) */ +#define OP_Move 78 /* synopsis: r[P2@P3]=r[P1@P3] */ +#define OP_Copy 79 /* synopsis: r[P2@P3+1]=r[P1@P3+1] */ +#define OP_SCopy 80 /* synopsis: r[P2]=r[P1] */ +#define OP_IntCopy 81 /* synopsis: r[P2]=r[P1] */ +#define OP_FkCheck 82 +#define OP_ResultRow 83 /* synopsis: output=r[P1@P2] */ +#define OP_CollSeq 84 +#define OP_AddImm 85 /* synopsis: r[P1]=r[P1]+P2 */ +#define OP_RealAffinity 86 +#define OP_Cast 87 /* synopsis: affinity(r[P1]) */ +#define OP_Permutation 88 +#define OP_Compare 89 /* synopsis: r[P1@P3] <-> r[P2@P3] */ +#define OP_IsTrue 90 /* synopsis: r[P2] = coalesce(r[P1]==TRUE,P3) ^ P4 */ +#define OP_ZeroOrNull 91 /* synopsis: r[P2] = 0 OR NULL */ +#define OP_Offset 92 /* synopsis: r[P3] = sqlite_offset(P1) */ +#define OP_Column 93 /* synopsis: r[P3]=PX */ +#define OP_TypeCheck 94 /* synopsis: typecheck(r[P1@P2]) */ +#define OP_Affinity 95 /* synopsis: affinity(r[P1@P2]) */ +#define OP_MakeRecord 96 /* synopsis: r[P3]=mkrec(r[P1@P2]) */ +#define OP_Count 97 /* synopsis: r[P2]=count() */ +#define OP_ReadCookie 98 +#define OP_SetCookie 99 +#define OP_ReopenIdx 100 /* synopsis: root=P2 iDb=P3 */ +#define OP_OpenRead 101 /* synopsis: root=P2 iDb=P3 */ +#define OP_BitAnd 102 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */ +#define OP_BitOr 103 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */ +#define OP_ShiftLeft 104 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<>r[P1] */ +#define OP_Add 106 /* same as TK_PLUS, synopsis: r[P3]=r[P1]+r[P2] */ +#define OP_Subtract 107 /* same as TK_MINUS, synopsis: r[P3]=r[P2]-r[P1] */ +#define OP_Multiply 108 /* same as TK_STAR, synopsis: r[P3]=r[P1]*r[P2] */ +#define OP_Divide 109 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */ +#define OP_Remainder 110 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */ +#define OP_Concat 111 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */ +#define OP_OpenWrite 112 /* synopsis: root=P2 iDb=P3 */ +#define OP_OpenDup 113 +#define OP_BitNot 114 /* same as TK_BITNOT, synopsis: r[P2]= ~r[P1] */ +#define OP_OpenAutoindex 115 /* synopsis: nColumn=P2 */ +#define OP_OpenEphemeral 116 /* synopsis: nColumn=P2 */ +#define OP_String8 117 /* same as TK_STRING, synopsis: r[P2]='P4' */ +#define OP_SorterOpen 118 +#define OP_SequenceTest 119 /* synopsis: if( cursor[P1].ctr++ ) pc = P2 */ +#define OP_OpenPseudo 120 /* synopsis: P3 columns in r[P2] */ +#define OP_Close 121 +#define OP_ColumnsUsed 122 +#define OP_SeekScan 123 /* synopsis: Scan-ahead up to P1 rows */ +#define OP_SeekHit 124 /* synopsis: set P2<=seekHit<=P3 */ +#define OP_Sequence 125 /* synopsis: r[P2]=cursor[P1].ctr++ */ +#define OP_NewRowid 126 /* synopsis: r[P2]=rowid */ +#define OP_Insert 127 /* synopsis: intkey=r[P3] data=r[P2] */ +#define OP_RowCell 128 +#define OP_Delete 129 +#define OP_ResetCount 130 +#define OP_SorterCompare 131 /* synopsis: if key(P1)!=trim(r[P3],P4) goto P2 */ +#define OP_SorterData 132 /* synopsis: r[P2]=data */ +#define OP_RowData 133 /* synopsis: r[P2]=data */ +#define OP_Rowid 134 /* synopsis: r[P2]=rowid */ +#define OP_NullRow 135 +#define OP_SeekEnd 136 +#define OP_IdxInsert 137 /* synopsis: key=r[P2] */ +#define OP_SorterInsert 138 /* synopsis: key=r[P2] */ +#define OP_IdxDelete 139 /* synopsis: key=r[P2@P3] */ +#define OP_DeferredSeek 140 /* synopsis: Move P3 to P1.rowid if needed */ +#define OP_IdxRowid 141 /* synopsis: r[P2]=rowid */ +#define OP_FinishSeek 142 +#define OP_Destroy 143 +#define OP_Clear 144 +#define OP_ResetSorter 145 +#define OP_CreateBtree 146 /* synopsis: r[P2]=root iDb=P1 flags=P3 */ +#define OP_SqlExec 147 +#define OP_ParseSchema 148 +#define OP_LoadAnalysis 149 +#define OP_DropTable 150 +#define OP_DropIndex 151 +#define OP_DropTrigger 152 +#define OP_Real 153 /* same as TK_FLOAT, synopsis: r[P2]=P4 */ +#define OP_IntegrityCk 154 +#define OP_RowSetAdd 155 /* synopsis: rowset(P1)=r[P2] */ +#define OP_Param 156 +#define OP_FkCounter 157 /* synopsis: fkctr[P1]+=P2 */ +#define OP_MemMax 158 /* synopsis: r[P1]=max(r[P1],r[P2]) */ +#define OP_OffsetLimit 159 /* synopsis: if r[P1]>0 then r[P2]=r[P1]+max(0,r[P3]) else r[P2]=(-1) */ +#define OP_AggInverse 160 /* synopsis: accum=r[P3] inverse(r[P2@P5]) */ +#define OP_AggStep 161 /* synopsis: accum=r[P3] step(r[P2@P5]) */ +#define OP_AggStep1 162 /* synopsis: accum=r[P3] step(r[P2@P5]) */ +#define OP_AggValue 163 /* synopsis: r[P3]=value N=P2 */ +#define OP_AggFinal 164 /* synopsis: accum=r[P1] N=P2 */ +#define OP_Expire 165 +#define OP_CursorLock 166 +#define OP_CursorUnlock 167 +#define OP_TableLock 168 /* synopsis: iDb=P1 root=P2 write=P3 */ +#define OP_VBegin 169 +#define OP_VCreate 170 +#define OP_VDestroy 171 +#define OP_VOpen 172 +#define OP_VInitIn 173 /* synopsis: r[P2]=ValueList(P1,P3) */ +#define OP_VColumn 174 /* synopsis: r[P3]=vcolumn(P2) */ +#define OP_VRename 175 +#define OP_Pagecount 176 +#define OP_MaxPgcnt 177 +#define OP_FilterAdd 178 /* synopsis: filter(P1) += key(P3@P4) */ +#define OP_Trace 179 +#define OP_CursorHint 180 +#define OP_ReleaseReg 181 /* synopsis: release r[P1@P2] mask P3 */ +#define OP_Noop 182 +#define OP_Explain 183 +#define OP_Abortable 184 + +/* Properties such as "out2" or "jump" that are specified in +** comments following the "case" for each opcode in the vdbe.c +** are encoded into bitvectors as follows: +*/ +#define OPFLG_JUMP 0x01 /* jump: P2 holds jmp target */ +#define OPFLG_IN1 0x02 /* in1: P1 is an input */ +#define OPFLG_IN2 0x04 /* in2: P2 is an input */ +#define OPFLG_IN3 0x08 /* in3: P3 is an input */ +#define OPFLG_OUT2 0x10 /* out2: P2 is an output */ +#define OPFLG_OUT3 0x20 /* out3: P3 is an output */ +#define OPFLG_INITIALIZER {\ +/* 0 */ 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x10,\ +/* 8 */ 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x03, 0x03,\ +/* 16 */ 0x01, 0x01, 0x03, 0x12, 0x03, 0x03, 0x01, 0x09,\ +/* 24 */ 0x09, 0x09, 0x09, 0x01, 0x09, 0x09, 0x09, 0x09,\ +/* 32 */ 0x09, 0x09, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\ +/* 40 */ 0x01, 0x01, 0x01, 0x26, 0x26, 0x23, 0x0b, 0x01,\ +/* 48 */ 0x01, 0x03, 0x03, 0x03, 0x0b, 0x0b, 0x0b, 0x0b,\ +/* 56 */ 0x0b, 0x0b, 0x01, 0x03, 0x03, 0x01, 0x01, 0x01,\ +/* 64 */ 0x01, 0x00, 0x00, 0x02, 0x02, 0x08, 0x00, 0x10,\ +/* 72 */ 0x10, 0x10, 0x10, 0x00, 0x10, 0x10, 0x00, 0x00,\ +/* 80 */ 0x10, 0x10, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02,\ +/* 88 */ 0x00, 0x00, 0x12, 0x1e, 0x20, 0x00, 0x00, 0x00,\ +/* 96 */ 0x00, 0x10, 0x10, 0x00, 0x00, 0x00, 0x26, 0x26,\ +/* 104 */ 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26,\ +/* 112 */ 0x00, 0x00, 0x12, 0x00, 0x00, 0x10, 0x00, 0x00,\ +/* 120 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,\ +/* 128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,\ +/* 136 */ 0x00, 0x04, 0x04, 0x00, 0x00, 0x10, 0x00, 0x10,\ +/* 144 */ 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,\ +/* 152 */ 0x00, 0x10, 0x00, 0x06, 0x10, 0x00, 0x04, 0x1a,\ +/* 160 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\ +/* 168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00,\ +/* 176 */ 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\ +/* 184 */ 0x00,} + +/* The resolve3P2Values() routine is able to run faster if it knows +** the value of the largest JUMP opcode. The smaller the maximum +** JUMP opcode the better, so the mkopcodeh.tcl script that +** generated this include file strives to group all JUMP opcodes +** together near the beginning of the list. +*/ +#define SQLITE_MX_JUMP_OPCODE 64 /* Maximum JUMP opcode */ + +/************** End of opcodes.h *********************************************/ +/************** Continuing where we left off in vdbe.h ***********************/ + +/* +** Additional non-public SQLITE_PREPARE_* flags +*/ +#define SQLITE_PREPARE_SAVESQL 0x80 /* Preserve SQL text */ +#define SQLITE_PREPARE_MASK 0x0f /* Mask of public flags */ + +/* +** Prototypes for the VDBE interface. See comments on the implementation +** for a description of what each of these routines does. +*/ +SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse*); +SQLITE_PRIVATE Parse *sqlite3VdbeParser(Vdbe*); +SQLITE_PRIVATE int sqlite3VdbeAddOp0(Vdbe*,int); +SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe*,int,int); +SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe*,int,int,int); +SQLITE_PRIVATE int sqlite3VdbeGoto(Vdbe*,int); +SQLITE_PRIVATE int sqlite3VdbeLoadString(Vdbe*,int,const char*); +SQLITE_PRIVATE void sqlite3VdbeMultiLoad(Vdbe*,int,const char*,...); +SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe*,int,int,int,int); +SQLITE_PRIVATE int sqlite3VdbeAddOp4(Vdbe*,int,int,int,int,const char *zP4,int); +SQLITE_PRIVATE int sqlite3VdbeAddOp4Dup8(Vdbe*,int,int,int,int,const u8*,int); +SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(Vdbe*,int,int,int,int,int); +SQLITE_PRIVATE int sqlite3VdbeAddFunctionCall(Parse*,int,int,int,int,const FuncDef*,int); +SQLITE_PRIVATE void sqlite3VdbeEndCoroutine(Vdbe*,int); +#if defined(SQLITE_DEBUG) && !defined(SQLITE_TEST_REALLOC_STRESS) +SQLITE_PRIVATE void sqlite3VdbeVerifyNoMallocRequired(Vdbe *p, int N); +SQLITE_PRIVATE void sqlite3VdbeVerifyNoResultRow(Vdbe *p); +#else +# define sqlite3VdbeVerifyNoMallocRequired(A,B) +# define sqlite3VdbeVerifyNoResultRow(A) +#endif +#if defined(SQLITE_DEBUG) +SQLITE_PRIVATE void sqlite3VdbeVerifyAbortable(Vdbe *p, int); +#else +# define sqlite3VdbeVerifyAbortable(A,B) +#endif +SQLITE_PRIVATE VdbeOp *sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp,int iLineno); +#ifndef SQLITE_OMIT_EXPLAIN +SQLITE_PRIVATE void sqlite3VdbeExplain(Parse*,u8,const char*,...); +SQLITE_PRIVATE void sqlite3VdbeExplainPop(Parse*); +SQLITE_PRIVATE int sqlite3VdbeExplainParent(Parse*); +# define ExplainQueryPlan(P) sqlite3VdbeExplain P +# define ExplainQueryPlanPop(P) sqlite3VdbeExplainPop(P) +# define ExplainQueryPlanParent(P) sqlite3VdbeExplainParent(P) +#else +# define ExplainQueryPlan(P) +# define ExplainQueryPlanPop(P) +# define ExplainQueryPlanParent(P) 0 +# define sqlite3ExplainBreakpoint(A,B) /*no-op*/ +#endif +#if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_EXPLAIN) +SQLITE_PRIVATE void sqlite3ExplainBreakpoint(const char*,const char*); +#else +# define sqlite3ExplainBreakpoint(A,B) /*no-op*/ +#endif +SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe*, int, char*, u16); +SQLITE_PRIVATE void sqlite3VdbeChangeOpcode(Vdbe*, int addr, u8); +SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe*, int addr, int P1); +SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe*, int addr, int P2); +SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe*, int addr, int P3); +SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe*, u16 P5); +SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe*, int addr); +SQLITE_PRIVATE void sqlite3VdbeJumpHereOrPopInst(Vdbe*, int addr); +SQLITE_PRIVATE int sqlite3VdbeChangeToNoop(Vdbe*, int addr); +SQLITE_PRIVATE int sqlite3VdbeDeletePriorOpcode(Vdbe*, u8 op); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE void sqlite3VdbeReleaseRegisters(Parse*,int addr, int n, u32 mask, int); +#else +# define sqlite3VdbeReleaseRegisters(P,A,N,M,F) +#endif +SQLITE_PRIVATE void sqlite3VdbeChangeP4(Vdbe*, int addr, const char *zP4, int N); +SQLITE_PRIVATE void sqlite3VdbeAppendP4(Vdbe*, void *pP4, int p4type); +SQLITE_PRIVATE void sqlite3VdbeSetP4KeyInfo(Parse*, Index*); +SQLITE_PRIVATE void sqlite3VdbeUsesBtree(Vdbe*, int); +SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe*, int); +SQLITE_PRIVATE int sqlite3VdbeMakeLabel(Parse*); +SQLITE_PRIVATE void sqlite3VdbeRunOnlyOnce(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeReusable(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeDelete(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeClearObject(sqlite3*,Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeMakeReady(Vdbe*,Parse*); +SQLITE_PRIVATE int sqlite3VdbeFinalize(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeResolveLabel(Vdbe*, int); +SQLITE_PRIVATE int sqlite3VdbeCurrentAddr(Vdbe*); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *, int); +#endif +SQLITE_PRIVATE void sqlite3VdbeResetStepResult(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeRewind(Vdbe*); +SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeSetNumCols(Vdbe*,int); +SQLITE_PRIVATE int sqlite3VdbeSetColName(Vdbe*, int, int, const char *, void(*)(void*)); +SQLITE_PRIVATE void sqlite3VdbeCountChanges(Vdbe*); +SQLITE_PRIVATE sqlite3 *sqlite3VdbeDb(Vdbe*); +SQLITE_PRIVATE u8 sqlite3VdbePrepareFlags(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeSetSql(Vdbe*, const char *z, int n, u8); +#ifdef SQLITE_ENABLE_NORMALIZE +SQLITE_PRIVATE void sqlite3VdbeAddDblquoteStr(sqlite3*,Vdbe*,const char*); +SQLITE_PRIVATE int sqlite3VdbeUsesDoubleQuotedString(Vdbe*,const char*); +#endif +SQLITE_PRIVATE void sqlite3VdbeSwap(Vdbe*,Vdbe*); +SQLITE_PRIVATE VdbeOp *sqlite3VdbeTakeOpArray(Vdbe*, int*, int*); +SQLITE_PRIVATE sqlite3_value *sqlite3VdbeGetBoundValue(Vdbe*, int, u8); +SQLITE_PRIVATE void sqlite3VdbeSetVarmask(Vdbe*, int); +#ifndef SQLITE_OMIT_TRACE +SQLITE_PRIVATE char *sqlite3VdbeExpandSql(Vdbe*, const char*); +#endif +SQLITE_PRIVATE int sqlite3MemCompare(const Mem*, const Mem*, const CollSeq*); +SQLITE_PRIVATE int sqlite3BlobCompare(const Mem*, const Mem*); + +SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*); +SQLITE_PRIVATE int sqlite3VdbeRecordCompare(int,const void*,UnpackedRecord*); +SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedRecord *, int); +SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo*); + +typedef int (*RecordCompare)(int,const void*,UnpackedRecord*); +SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*); + +SQLITE_PRIVATE void sqlite3VdbeLinkSubProgram(Vdbe *, SubProgram *); +SQLITE_PRIVATE int sqlite3VdbeHasSubProgram(Vdbe*); + +SQLITE_PRIVATE int sqlite3NotPureFunc(sqlite3_context*); +#ifdef SQLITE_ENABLE_BYTECODE_VTAB +SQLITE_PRIVATE int sqlite3VdbeBytecodeVtabInit(sqlite3*); +#endif + +/* Use SQLITE_ENABLE_COMMENTS to enable generation of extra comments on +** each VDBE opcode. +** +** Use the SQLITE_ENABLE_MODULE_COMMENTS macro to see some extra no-op +** comments in VDBE programs that show key decision points in the code +** generator. +*/ +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS +SQLITE_PRIVATE void sqlite3VdbeComment(Vdbe*, const char*, ...); +# define VdbeComment(X) sqlite3VdbeComment X +SQLITE_PRIVATE void sqlite3VdbeNoopComment(Vdbe*, const char*, ...); +# define VdbeNoopComment(X) sqlite3VdbeNoopComment X +# ifdef SQLITE_ENABLE_MODULE_COMMENTS +# define VdbeModuleComment(X) sqlite3VdbeNoopComment X +# else +# define VdbeModuleComment(X) +# endif +#else +# define VdbeComment(X) +# define VdbeNoopComment(X) +# define VdbeModuleComment(X) +#endif + +/* +** The VdbeCoverage macros are used to set a coverage testing point +** for VDBE branch instructions. The coverage testing points are line +** numbers in the sqlite3.c source file. VDBE branch coverage testing +** only works with an amalagmation build. That's ok since a VDBE branch +** coverage build designed for testing the test suite only. No application +** should ever ship with VDBE branch coverage measuring turned on. +** +** VdbeCoverage(v) // Mark the previously coded instruction +** // as a branch +** +** VdbeCoverageIf(v, conditional) // Mark previous if conditional true +** +** VdbeCoverageAlwaysTaken(v) // Previous branch is always taken +** +** VdbeCoverageNeverTaken(v) // Previous branch is never taken +** +** VdbeCoverageNeverNull(v) // Previous three-way branch is only +** // taken on the first two ways. The +** // NULL option is not possible +** +** VdbeCoverageEqNe(v) // Previous OP_Jump is only interested +** // in distingishing equal and not-equal. +** +** Every VDBE branch operation must be tagged with one of the macros above. +** If not, then when "make test" is run with -DSQLITE_VDBE_COVERAGE and +** -DSQLITE_DEBUG then an ALWAYS() will fail in the vdbeTakeBranch() +** routine in vdbe.c, alerting the developer to the missed tag. +** +** During testing, the test application will invoke +** sqlite3_test_control(SQLITE_TESTCTRL_VDBE_COVERAGE,...) to set a callback +** routine that is invoked as each bytecode branch is taken. The callback +** contains the sqlite3.c source line number ov the VdbeCoverage macro and +** flags to indicate whether or not the branch was taken. The test application +** is responsible for keeping track of this and reporting byte-code branches +** that are never taken. +** +** See the VdbeBranchTaken() macro and vdbeTakeBranch() function in the +** vdbe.c source file for additional information. +*/ +#ifdef SQLITE_VDBE_COVERAGE +SQLITE_PRIVATE void sqlite3VdbeSetLineNumber(Vdbe*,int); +# define VdbeCoverage(v) sqlite3VdbeSetLineNumber(v,__LINE__) +# define VdbeCoverageIf(v,x) if(x)sqlite3VdbeSetLineNumber(v,__LINE__) +# define VdbeCoverageAlwaysTaken(v) \ + sqlite3VdbeSetLineNumber(v,__LINE__|0x5000000); +# define VdbeCoverageNeverTaken(v) \ + sqlite3VdbeSetLineNumber(v,__LINE__|0x6000000); +# define VdbeCoverageNeverNull(v) \ + sqlite3VdbeSetLineNumber(v,__LINE__|0x4000000); +# define VdbeCoverageNeverNullIf(v,x) \ + if(x)sqlite3VdbeSetLineNumber(v,__LINE__|0x4000000); +# define VdbeCoverageEqNe(v) \ + sqlite3VdbeSetLineNumber(v,__LINE__|0x8000000); +# define VDBE_OFFSET_LINENO(x) (__LINE__+x) +#else +# define VdbeCoverage(v) +# define VdbeCoverageIf(v,x) +# define VdbeCoverageAlwaysTaken(v) +# define VdbeCoverageNeverTaken(v) +# define VdbeCoverageNeverNull(v) +# define VdbeCoverageNeverNullIf(v,x) +# define VdbeCoverageEqNe(v) +# define VDBE_OFFSET_LINENO(x) 0 +#endif + +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +SQLITE_PRIVATE void sqlite3VdbeScanStatus(Vdbe*, int, int, int, LogEst, const char*); +#else +# define sqlite3VdbeScanStatus(a,b,c,d,e) +#endif + +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) +SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE*, int, VdbeOp*); +#endif + +#endif /* SQLITE_VDBE_H */ + +/************** End of vdbe.h ************************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include pcache.h in the middle of sqliteInt.h ****************/ +/************** Begin file pcache.h ******************************************/ +/* +** 2008 August 05 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface that the sqlite page cache +** subsystem. +*/ + +#ifndef _PCACHE_H_ + +typedef struct PgHdr PgHdr; +typedef struct PCache PCache; + +/* +** Every page in the cache is controlled by an instance of the following +** structure. +*/ +struct PgHdr { + sqlite3_pcache_page *pPage; /* Pcache object page handle */ + void *pData; /* Page data */ + void *pExtra; /* Extra content */ + PCache *pCache; /* PRIVATE: Cache that owns this page */ + PgHdr *pDirty; /* Transient list of dirty sorted by pgno */ + Pager *pPager; /* The pager this page is part of */ + Pgno pgno; /* Page number for this page */ +#ifdef SQLITE_CHECK_PAGES + u32 pageHash; /* Hash of page content */ +#endif + u16 flags; /* PGHDR flags defined below */ + + /********************************************************************** + ** Elements above, except pCache, are public. All that follow are + ** private to pcache.c and should not be accessed by other modules. + ** pCache is grouped with the public elements for efficiency. + */ + i16 nRef; /* Number of users of this page */ + PgHdr *pDirtyNext; /* Next element in list of dirty pages */ + PgHdr *pDirtyPrev; /* Previous element in list of dirty pages */ + /* NB: pDirtyNext and pDirtyPrev are undefined if the + ** PgHdr object is not dirty */ +}; + +/* Bit values for PgHdr.flags */ +#define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */ +#define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */ +#define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */ +#define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before + ** writing this page to the database */ +#define PGHDR_DONT_WRITE 0x010 /* Do not write content to disk */ +#define PGHDR_MMAP 0x020 /* This is an mmap page object */ + +#define PGHDR_WAL_APPEND 0x040 /* Appended to wal file */ + +/* Initialize and shutdown the page cache subsystem */ +SQLITE_PRIVATE int sqlite3PcacheInitialize(void); +SQLITE_PRIVATE void sqlite3PcacheShutdown(void); + +/* Page cache buffer management: +** These routines implement SQLITE_CONFIG_PAGECACHE. +*/ +SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *, int sz, int n); + +/* Create a new pager cache. +** Under memory stress, invoke xStress to try to make pages clean. +** Only clean and unpinned pages can be reclaimed. +*/ +SQLITE_PRIVATE int sqlite3PcacheOpen( + int szPage, /* Size of every page */ + int szExtra, /* Extra space associated with each page */ + int bPurgeable, /* True if pages are on backing store */ + int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */ + void *pStress, /* Argument to xStress */ + PCache *pToInit /* Preallocated space for the PCache */ +); + +/* Modify the page-size after the cache has been created. */ +SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *, int); + +/* Return the size in bytes of a PCache object. Used to preallocate +** storage space. +*/ +SQLITE_PRIVATE int sqlite3PcacheSize(void); + +/* One release per successful fetch. Page is pinned until released. +** Reference counted. +*/ +SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch(PCache*, Pgno, int createFlag); +SQLITE_PRIVATE int sqlite3PcacheFetchStress(PCache*, Pgno, sqlite3_pcache_page**); +SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish(PCache*, Pgno, sqlite3_pcache_page *pPage); +SQLITE_PRIVATE void sqlite3PcacheRelease(PgHdr*); + +SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr*); /* Remove page from cache */ +SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr*); /* Make sure page is marked dirty */ +SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr*); /* Mark a single page as clean */ +SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache*); /* Mark all dirty list pages as clean */ +SQLITE_PRIVATE void sqlite3PcacheClearWritable(PCache*); + +/* Change a page number. Used by incr-vacuum. */ +SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr*, Pgno); + +/* Remove all pages with pgno>x. Reset the cache if x==0 */ +SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache*, Pgno x); + +/* Get a list of all dirty pages in the cache, sorted by page number */ +SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache*); + +/* Reset and close the cache object */ +SQLITE_PRIVATE void sqlite3PcacheClose(PCache*); + +/* Clear flags from pages of the page cache */ +SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *); + +/* Discard the contents of the cache */ +SQLITE_PRIVATE void sqlite3PcacheClear(PCache*); + +/* Return the total number of outstanding page references */ +SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache*); + +/* Increment the reference count of an existing page */ +SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr*); + +SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr*); + +/* Return the total number of pages stored in the cache */ +SQLITE_PRIVATE int sqlite3PcachePagecount(PCache*); + +#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) +/* Iterate through all dirty pages currently stored in the cache. This +** interface is only available if SQLITE_CHECK_PAGES is defined when the +** library is built. +*/ +SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)); +#endif + +#if defined(SQLITE_DEBUG) +/* Check invariants on a PgHdr object */ +SQLITE_PRIVATE int sqlite3PcachePageSanity(PgHdr*); +#endif + +/* Set and get the suggested cache-size for the specified pager-cache. +** +** If no global maximum is configured, then the system attempts to limit +** the total number of pages cached by purgeable pager-caches to the sum +** of the suggested cache-sizes. +*/ +SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *, int); +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *); +#endif + +/* Set or get the suggested spill-size for the specified pager-cache. +** +** The spill-size is the minimum number of pages in cache before the cache +** will attempt to spill dirty pages by calling xStress. +*/ +SQLITE_PRIVATE int sqlite3PcacheSetSpillsize(PCache *, int); + +/* Free up as much memory as possible from the page cache */ +SQLITE_PRIVATE void sqlite3PcacheShrink(PCache*); + +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT +/* Try to return memory used by the pcache module to the main memory heap */ +SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int); +#endif + +#ifdef SQLITE_TEST +SQLITE_PRIVATE void sqlite3PcacheStats(int*,int*,int*,int*); +#endif + +SQLITE_PRIVATE void sqlite3PCacheSetDefault(void); + +/* Return the header size */ +SQLITE_PRIVATE int sqlite3HeaderSizePcache(void); +SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void); + +/* Number of dirty pages as a percentage of the configured cache size */ +SQLITE_PRIVATE int sqlite3PCachePercentDirty(PCache*); + +#ifdef SQLITE_DIRECT_OVERFLOW_READ +SQLITE_PRIVATE int sqlite3PCacheIsDirty(PCache *pCache); +#endif + +#endif /* _PCACHE_H_ */ + +/************** End of pcache.h **********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include os.h in the middle of sqliteInt.h ********************/ +/************** Begin file os.h **********************************************/ +/* +** 2001 September 16 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file (together with is companion C source-code file +** "os.c") attempt to abstract the underlying operating system so that +** the SQLite library will work on both POSIX and windows systems. +** +** This header file is #include-ed by sqliteInt.h and thus ends up +** being included by every source file. +*/ +#ifndef _SQLITE_OS_H_ +#define _SQLITE_OS_H_ + +/* +** Attempt to automatically detect the operating system and setup the +** necessary pre-processor macros for it. +*/ +/************** Include os_setup.h in the middle of os.h *********************/ +/************** Begin file os_setup.h ****************************************/ +/* +** 2013 November 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains pre-processor directives related to operating system +** detection and/or setup. +*/ +#ifndef SQLITE_OS_SETUP_H +#define SQLITE_OS_SETUP_H + +/* +** Figure out if we are dealing with Unix, Windows, or some other operating +** system. +** +** After the following block of preprocess macros, all of SQLITE_OS_UNIX, +** SQLITE_OS_WIN, and SQLITE_OS_OTHER will defined to either 1 or 0. One of +** the three will be 1. The other two will be 0. +*/ +#if defined(SQLITE_OS_OTHER) +# if SQLITE_OS_OTHER==1 +# undef SQLITE_OS_UNIX +# define SQLITE_OS_UNIX 0 +# undef SQLITE_OS_WIN +# define SQLITE_OS_WIN 0 +# else +# undef SQLITE_OS_OTHER +# endif +#endif +#if !defined(SQLITE_OS_UNIX) && !defined(SQLITE_OS_OTHER) +# define SQLITE_OS_OTHER 0 +# ifndef SQLITE_OS_WIN +# if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ + defined(__MINGW32__) || defined(__BORLANDC__) +# define SQLITE_OS_WIN 1 +# define SQLITE_OS_UNIX 0 +# else +# define SQLITE_OS_WIN 0 +# define SQLITE_OS_UNIX 1 +# endif +# else +# define SQLITE_OS_UNIX 0 +# endif +#else +# ifndef SQLITE_OS_WIN +# define SQLITE_OS_WIN 0 +# endif +#endif + +#endif /* SQLITE_OS_SETUP_H */ + +/************** End of os_setup.h ********************************************/ +/************** Continuing where we left off in os.h *************************/ + +/* If the SET_FULLSYNC macro is not defined above, then make it +** a no-op +*/ +#ifndef SET_FULLSYNC +# define SET_FULLSYNC(x,y) +#endif + +/* Maximum pathname length. Note: FILENAME_MAX defined by stdio.h +*/ +#ifndef SQLITE_MAX_PATHLEN +# define SQLITE_MAX_PATHLEN FILENAME_MAX +#endif + +/* +** The default size of a disk sector +*/ +#ifndef SQLITE_DEFAULT_SECTOR_SIZE +# define SQLITE_DEFAULT_SECTOR_SIZE 4096 +#endif + +/* +** Temporary files are named starting with this prefix followed by 16 random +** alphanumeric characters, and no file extension. They are stored in the +** OS's standard temporary file directory, and are deleted prior to exit. +** If sqlite is being embedded in another program, you may wish to change the +** prefix to reflect your program's name, so that if your program exits +** prematurely, old temporary files can be easily identified. This can be done +** using -DSQLITE_TEMP_FILE_PREFIX=myprefix_ on the compiler command line. +** +** 2006-10-31: The default prefix used to be "sqlite_". But then +** Mcafee started using SQLite in their anti-virus product and it +** started putting files with the "sqlite" name in the c:/temp folder. +** This annoyed many windows users. Those users would then do a +** Google search for "sqlite", find the telephone numbers of the +** developers and call to wake them up at night and complain. +** For this reason, the default name prefix is changed to be "sqlite" +** spelled backwards. So the temp files are still identified, but +** anybody smart enough to figure out the code is also likely smart +** enough to know that calling the developer will not help get rid +** of the file. +*/ +#ifndef SQLITE_TEMP_FILE_PREFIX +# define SQLITE_TEMP_FILE_PREFIX "etilqs_" +#endif + +/* +** The following values may be passed as the second argument to +** sqlite3OsLock(). The various locks exhibit the following semantics: +** +** SHARED: Any number of processes may hold a SHARED lock simultaneously. +** RESERVED: A single process may hold a RESERVED lock on a file at +** any time. Other processes may hold and obtain new SHARED locks. +** PENDING: A single process may hold a PENDING lock on a file at +** any one time. Existing SHARED locks may persist, but no new +** SHARED locks may be obtained by other processes. +** EXCLUSIVE: An EXCLUSIVE lock precludes all other locks. +** +** PENDING_LOCK may not be passed directly to sqlite3OsLock(). Instead, a +** process that requests an EXCLUSIVE lock may actually obtain a PENDING +** lock. This can be upgraded to an EXCLUSIVE lock by a subsequent call to +** sqlite3OsLock(). +*/ +#define NO_LOCK 0 +#define SHARED_LOCK 1 +#define RESERVED_LOCK 2 +#define PENDING_LOCK 3 +#define EXCLUSIVE_LOCK 4 + +/* +** File Locking Notes: (Mostly about windows but also some info for Unix) +** +** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because +** those functions are not available. So we use only LockFile() and +** UnlockFile(). +** +** LockFile() prevents not just writing but also reading by other processes. +** A SHARED_LOCK is obtained by locking a single randomly-chosen +** byte out of a specific range of bytes. The lock byte is obtained at +** random so two separate readers can probably access the file at the +** same time, unless they are unlucky and choose the same lock byte. +** An EXCLUSIVE_LOCK is obtained by locking all bytes in the range. +** There can only be one writer. A RESERVED_LOCK is obtained by locking +** a single byte of the file that is designated as the reserved lock byte. +** A PENDING_LOCK is obtained by locking a designated byte different from +** the RESERVED_LOCK byte. +** +** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available, +** which means we can use reader/writer locks. When reader/writer locks +** are used, the lock is placed on the same range of bytes that is used +** for probabilistic locking in Win95/98/ME. Hence, the locking scheme +** will support two or more Win95 readers or two or more WinNT readers. +** But a single Win95 reader will lock out all WinNT readers and a single +** WinNT reader will lock out all other Win95 readers. +** +** The following #defines specify the range of bytes used for locking. +** SHARED_SIZE is the number of bytes available in the pool from which +** a random byte is selected for a shared lock. The pool of bytes for +** shared locks begins at SHARED_FIRST. +** +** The same locking strategy and +** byte ranges are used for Unix. This leaves open the possibility of having +** clients on win95, winNT, and unix all talking to the same shared file +** and all locking correctly. To do so would require that samba (or whatever +** tool is being used for file sharing) implements locks correctly between +** windows and unix. I'm guessing that isn't likely to happen, but by +** using the same locking range we are at least open to the possibility. +** +** Locking in windows is manditory. For this reason, we cannot store +** actual data in the bytes used for locking. The pager never allocates +** the pages involved in locking therefore. SHARED_SIZE is selected so +** that all locks will fit on a single page even at the minimum page size. +** PENDING_BYTE defines the beginning of the locks. By default PENDING_BYTE +** is set high so that we don't have to allocate an unused page except +** for very large databases. But one should test the page skipping logic +** by setting PENDING_BYTE low and running the entire regression suite. +** +** Changing the value of PENDING_BYTE results in a subtly incompatible +** file format. Depending on how it is changed, you might not notice +** the incompatibility right away, even running a full regression test. +** The default location of PENDING_BYTE is the first byte past the +** 1GB boundary. +** +*/ +#ifdef SQLITE_OMIT_WSD +# define PENDING_BYTE (0x40000000) +#else +# define PENDING_BYTE sqlite3PendingByte +#endif +#define RESERVED_BYTE (PENDING_BYTE+1) +#define SHARED_FIRST (PENDING_BYTE+2) +#define SHARED_SIZE 510 + +/* +** Wrapper around OS specific sqlite3_os_init() function. +*/ +SQLITE_PRIVATE int sqlite3OsInit(void); + +/* +** Functions for accessing sqlite3_file methods +*/ +SQLITE_PRIVATE void sqlite3OsClose(sqlite3_file*); +SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file*, void*, int amt, i64 offset); +SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file*, const void*, int amt, i64 offset); +SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file*, i64 size); +SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file*, int); +SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file*, i64 *pSize); +SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file*, int); +SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file*, int); +SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut); +SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file*,int,void*); +SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file*,int,void*); +#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0 +SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id); +SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id); +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); +SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int); +SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id); +SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int); +#endif /* SQLITE_OMIT_WAL */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **); +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *); + + +/* +** Functions for accessing sqlite3_vfs methods +*/ +SQLITE_PRIVATE int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *); +SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *, const char *, int); +SQLITE_PRIVATE int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut); +SQLITE_PRIVATE int sqlite3OsFullPathname(sqlite3_vfs *, const char *, int, char *); +#ifndef SQLITE_OMIT_LOAD_EXTENSION +SQLITE_PRIVATE void *sqlite3OsDlOpen(sqlite3_vfs *, const char *); +SQLITE_PRIVATE void sqlite3OsDlError(sqlite3_vfs *, int, char *); +SQLITE_PRIVATE void (*sqlite3OsDlSym(sqlite3_vfs *, void *, const char *))(void); +SQLITE_PRIVATE void sqlite3OsDlClose(sqlite3_vfs *, void *); +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ +SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *, int, char *); +SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *, int); +SQLITE_PRIVATE int sqlite3OsGetLastError(sqlite3_vfs*); +SQLITE_PRIVATE int sqlite3OsCurrentTimeInt64(sqlite3_vfs *, sqlite3_int64*); + +/* +** Convenience functions for opening and closing files using +** sqlite3_malloc() to obtain space for the file-handle structure. +*/ +SQLITE_PRIVATE int sqlite3OsOpenMalloc(sqlite3_vfs *, const char *, sqlite3_file **, int,int*); +SQLITE_PRIVATE void sqlite3OsCloseFree(sqlite3_file *); + +#endif /* _SQLITE_OS_H_ */ + +/************** End of os.h **************************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include mutex.h in the middle of sqliteInt.h *****************/ +/************** Begin file mutex.h *******************************************/ +/* +** 2007 August 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the common header for all mutex implementations. +** The sqliteInt.h header #includes this file so that it is available +** to all source files. We break it out in an effort to keep the code +** better organized. +** +** NOTE: source files should *not* #include this header file directly. +** Source files should #include the sqliteInt.h file and let that file +** include this one indirectly. +*/ + + +/* +** Figure out what version of the code to use. The choices are +** +** SQLITE_MUTEX_OMIT No mutex logic. Not even stubs. The +** mutexes implementation cannot be overridden +** at start-time. +** +** SQLITE_MUTEX_NOOP For single-threaded applications. No +** mutual exclusion is provided. But this +** implementation can be overridden at +** start-time. +** +** SQLITE_MUTEX_PTHREADS For multi-threaded applications on Unix. +** +** SQLITE_MUTEX_W32 For multi-threaded applications on Win32. +*/ +#if !SQLITE_THREADSAFE +# define SQLITE_MUTEX_OMIT +#endif +#if SQLITE_THREADSAFE && !defined(SQLITE_MUTEX_NOOP) +# if SQLITE_OS_UNIX +# define SQLITE_MUTEX_PTHREADS +# elif SQLITE_OS_WIN +# define SQLITE_MUTEX_W32 +# else +# define SQLITE_MUTEX_NOOP +# endif +#endif + +#ifdef SQLITE_MUTEX_OMIT +/* +** If this is a no-op implementation, implement everything as macros. +*/ +#define sqlite3_mutex_alloc(X) ((sqlite3_mutex*)8) +#define sqlite3_mutex_free(X) +#define sqlite3_mutex_enter(X) +#define sqlite3_mutex_try(X) SQLITE_OK +#define sqlite3_mutex_leave(X) +#define sqlite3_mutex_held(X) ((void)(X),1) +#define sqlite3_mutex_notheld(X) ((void)(X),1) +#define sqlite3MutexAlloc(X) ((sqlite3_mutex*)8) +#define sqlite3MutexInit() SQLITE_OK +#define sqlite3MutexEnd() +#define MUTEX_LOGIC(X) +#else +#define MUTEX_LOGIC(X) X +SQLITE_API int sqlite3_mutex_held(sqlite3_mutex*); +#endif /* defined(SQLITE_MUTEX_OMIT) */ + +/************** End of mutex.h ***********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* The SQLITE_EXTRA_DURABLE compile-time option used to set the default +** synchronous setting to EXTRA. It is no longer supported. +*/ +#ifdef SQLITE_EXTRA_DURABLE +# warning Use SQLITE_DEFAULT_SYNCHRONOUS=3 instead of SQLITE_EXTRA_DURABLE +# define SQLITE_DEFAULT_SYNCHRONOUS 3 +#endif + +/* +** Default synchronous levels. +** +** Note that (for historcal reasons) the PAGER_SYNCHRONOUS_* macros differ +** from the SQLITE_DEFAULT_SYNCHRONOUS value by 1. +** +** PAGER_SYNCHRONOUS DEFAULT_SYNCHRONOUS +** OFF 1 0 +** NORMAL 2 1 +** FULL 3 2 +** EXTRA 4 3 +** +** The "PRAGMA synchronous" statement also uses the zero-based numbers. +** In other words, the zero-based numbers are used for all external interfaces +** and the one-based values are used internally. +*/ +#ifndef SQLITE_DEFAULT_SYNCHRONOUS +# define SQLITE_DEFAULT_SYNCHRONOUS 2 +#endif +#ifndef SQLITE_DEFAULT_WAL_SYNCHRONOUS +# define SQLITE_DEFAULT_WAL_SYNCHRONOUS SQLITE_DEFAULT_SYNCHRONOUS +#endif + +/* +** Each database file to be accessed by the system is an instance +** of the following structure. There are normally two of these structures +** in the sqlite.aDb[] array. aDb[0] is the main database file and +** aDb[1] is the database file used to hold temporary tables. Additional +** databases may be attached. +*/ +struct Db { + char *zDbSName; /* Name of this database. (schema name, not filename) */ + Btree *pBt; /* The B*Tree structure for this database file */ + u8 safety_level; /* How aggressive at syncing data to disk */ + u8 bSyncSet; /* True if "PRAGMA synchronous=N" has been run */ + Schema *pSchema; /* Pointer to database schema (possibly shared) */ +}; + +/* +** An instance of the following structure stores a database schema. +** +** Most Schema objects are associated with a Btree. The exception is +** the Schema for the TEMP databaes (sqlite3.aDb[1]) which is free-standing. +** In shared cache mode, a single Schema object can be shared by multiple +** Btrees that refer to the same underlying BtShared object. +** +** Schema objects are automatically deallocated when the last Btree that +** references them is destroyed. The TEMP Schema is manually freed by +** sqlite3_close(). +* +** A thread must be holding a mutex on the corresponding Btree in order +** to access Schema content. This implies that the thread must also be +** holding a mutex on the sqlite3 connection pointer that owns the Btree. +** For a TEMP Schema, only the connection mutex is required. +*/ +struct Schema { + int schema_cookie; /* Database schema version number for this file */ + int iGeneration; /* Generation counter. Incremented with each change */ + Hash tblHash; /* All tables indexed by name */ + Hash idxHash; /* All (named) indices indexed by name */ + Hash trigHash; /* All triggers indexed by name */ + Hash fkeyHash; /* All foreign keys by referenced table name */ + Table *pSeqTab; /* The sqlite_sequence table used by AUTOINCREMENT */ + u8 file_format; /* Schema format version for this file */ + u8 enc; /* Text encoding used by this database */ + u16 schemaFlags; /* Flags associated with this schema */ + int cache_size; /* Number of pages to use in the cache */ +}; + +/* +** These macros can be used to test, set, or clear bits in the +** Db.pSchema->flags field. +*/ +#define DbHasProperty(D,I,P) (((D)->aDb[I].pSchema->schemaFlags&(P))==(P)) +#define DbHasAnyProperty(D,I,P) (((D)->aDb[I].pSchema->schemaFlags&(P))!=0) +#define DbSetProperty(D,I,P) (D)->aDb[I].pSchema->schemaFlags|=(P) +#define DbClearProperty(D,I,P) (D)->aDb[I].pSchema->schemaFlags&=~(P) + +/* +** Allowed values for the DB.pSchema->flags field. +** +** The DB_SchemaLoaded flag is set after the database schema has been +** read into internal hash tables. +** +** DB_UnresetViews means that one or more views have column names that +** have been filled out. If the schema changes, these column names might +** changes and so the view will need to be reset. +*/ +#define DB_SchemaLoaded 0x0001 /* The schema has been loaded */ +#define DB_UnresetViews 0x0002 /* Some views have defined column names */ +#define DB_ResetWanted 0x0008 /* Reset the schema when nSchemaLock==0 */ + +/* +** The number of different kinds of things that can be limited +** using the sqlite3_limit() interface. +*/ +#define SQLITE_N_LIMIT (SQLITE_LIMIT_WORKER_THREADS+1) + +/* +** Lookaside malloc is a set of fixed-size buffers that can be used +** to satisfy small transient memory allocation requests for objects +** associated with a particular database connection. The use of +** lookaside malloc provides a significant performance enhancement +** (approx 10%) by avoiding numerous malloc/free requests while parsing +** SQL statements. +** +** The Lookaside structure holds configuration information about the +** lookaside malloc subsystem. Each available memory allocation in +** the lookaside subsystem is stored on a linked list of LookasideSlot +** objects. +** +** Lookaside allocations are only allowed for objects that are associated +** with a particular database connection. Hence, schema information cannot +** be stored in lookaside because in shared cache mode the schema information +** is shared by multiple database connections. Therefore, while parsing +** schema information, the Lookaside.bEnabled flag is cleared so that +** lookaside allocations are not used to construct the schema objects. +** +** New lookaside allocations are only allowed if bDisable==0. When +** bDisable is greater than zero, sz is set to zero which effectively +** disables lookaside without adding a new test for the bDisable flag +** in a performance-critical path. sz should be set by to szTrue whenever +** bDisable changes back to zero. +** +** Lookaside buffers are initially held on the pInit list. As they are +** used and freed, they are added back to the pFree list. New allocations +** come off of pFree first, then pInit as a fallback. This dual-list +** allows use to compute a high-water mark - the maximum number of allocations +** outstanding at any point in the past - by subtracting the number of +** allocations on the pInit list from the total number of allocations. +** +** Enhancement on 2019-12-12: Two-size-lookaside +** The default lookaside configuration is 100 slots of 1200 bytes each. +** The larger slot sizes are important for performance, but they waste +** a lot of space, as most lookaside allocations are less than 128 bytes. +** The two-size-lookaside enhancement breaks up the lookaside allocation +** into two pools: One of 128-byte slots and the other of the default size +** (1200-byte) slots. Allocations are filled from the small-pool first, +** failing over to the full-size pool if that does not work. Thus more +** lookaside slots are available while also using less memory. +** This enhancement can be omitted by compiling with +** SQLITE_OMIT_TWOSIZE_LOOKASIDE. +*/ +struct Lookaside { + u32 bDisable; /* Only operate the lookaside when zero */ + u16 sz; /* Size of each buffer in bytes */ + u16 szTrue; /* True value of sz, even if disabled */ + u8 bMalloced; /* True if pStart obtained from sqlite3_malloc() */ + u32 nSlot; /* Number of lookaside slots allocated */ + u32 anStat[3]; /* 0: hits. 1: size misses. 2: full misses */ + LookasideSlot *pInit; /* List of buffers not previously used */ + LookasideSlot *pFree; /* List of available buffers */ +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + LookasideSlot *pSmallInit; /* List of small buffers not prediously used */ + LookasideSlot *pSmallFree; /* List of available small buffers */ + void *pMiddle; /* First byte past end of full-size buffers and + ** the first byte of LOOKASIDE_SMALL buffers */ +#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */ + void *pStart; /* First byte of available memory space */ + void *pEnd; /* First byte past end of available space */ +}; +struct LookasideSlot { + LookasideSlot *pNext; /* Next buffer in the list of free buffers */ +}; + +#define DisableLookaside db->lookaside.bDisable++;db->lookaside.sz=0 +#define EnableLookaside db->lookaside.bDisable--;\ + db->lookaside.sz=db->lookaside.bDisable?0:db->lookaside.szTrue + +/* Size of the smaller allocations in two-size lookside */ +#ifdef SQLITE_OMIT_TWOSIZE_LOOKASIDE +# define LOOKASIDE_SMALL 0 +#else +# define LOOKASIDE_SMALL 128 +#endif + +/* +** A hash table for built-in function definitions. (Application-defined +** functions use a regular table table from hash.h.) +** +** Hash each FuncDef structure into one of the FuncDefHash.a[] slots. +** Collisions are on the FuncDef.u.pHash chain. Use the SQLITE_FUNC_HASH() +** macro to compute a hash on the function name. +*/ +#define SQLITE_FUNC_HASH_SZ 23 +struct FuncDefHash { + FuncDef *a[SQLITE_FUNC_HASH_SZ]; /* Hash table for functions */ +}; +#define SQLITE_FUNC_HASH(C,L) (((C)+(L))%SQLITE_FUNC_HASH_SZ) + +#ifdef SQLITE_USER_AUTHENTICATION +/* +** Information held in the "sqlite3" database connection object and used +** to manage user authentication. +*/ +typedef struct sqlite3_userauth sqlite3_userauth; +struct sqlite3_userauth { + u8 authLevel; /* Current authentication level */ + int nAuthPW; /* Size of the zAuthPW in bytes */ + char *zAuthPW; /* Password used to authenticate */ + char *zAuthUser; /* User name used to authenticate */ +}; + +/* Allowed values for sqlite3_userauth.authLevel */ +#define UAUTH_Unknown 0 /* Authentication not yet checked */ +#define UAUTH_Fail 1 /* User authentication failed */ +#define UAUTH_User 2 /* Authenticated as a normal user */ +#define UAUTH_Admin 3 /* Authenticated as an administrator */ + +/* Functions used only by user authorization logic */ +SQLITE_PRIVATE int sqlite3UserAuthTable(const char*); +SQLITE_PRIVATE int sqlite3UserAuthCheckLogin(sqlite3*,const char*,u8*); +SQLITE_PRIVATE void sqlite3UserAuthInit(sqlite3*); +SQLITE_PRIVATE void sqlite3CryptFunc(sqlite3_context*,int,sqlite3_value**); + +#endif /* SQLITE_USER_AUTHENTICATION */ + +/* +** typedef for the authorization callback function. +*/ +#ifdef SQLITE_USER_AUTHENTICATION + typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, + const char*, const char*); +#else + typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, + const char*); +#endif + +#ifndef SQLITE_OMIT_DEPRECATED +/* This is an extra SQLITE_TRACE macro that indicates "legacy" tracing +** in the style of sqlite3_trace() +*/ +#define SQLITE_TRACE_LEGACY 0x40 /* Use the legacy xTrace */ +#define SQLITE_TRACE_XPROFILE 0x80 /* Use the legacy xProfile */ +#else +#define SQLITE_TRACE_LEGACY 0 +#define SQLITE_TRACE_XPROFILE 0 +#endif /* SQLITE_OMIT_DEPRECATED */ +#define SQLITE_TRACE_NONLEGACY_MASK 0x0f /* Normal flags */ + +/* +** Maximum number of sqlite3.aDb[] entries. This is the number of attached +** databases plus 2 for "main" and "temp". +*/ +#define SQLITE_MAX_DB (SQLITE_MAX_ATTACHED+2) + +/* +** Each database connection is an instance of the following structure. +*/ +struct sqlite3 { + sqlite3_vfs *pVfs; /* OS Interface */ + struct Vdbe *pVdbe; /* List of active virtual machines */ + CollSeq *pDfltColl; /* BINARY collseq for the database encoding */ + sqlite3_mutex *mutex; /* Connection mutex */ + Db *aDb; /* All backends */ + int nDb; /* Number of backends currently in use */ + u32 mDbFlags; /* flags recording internal state */ + u64 flags; /* flags settable by pragmas. See below */ + i64 lastRowid; /* ROWID of most recent insert (see above) */ + i64 szMmap; /* Default mmap_size setting */ + u32 nSchemaLock; /* Do not reset the schema when non-zero */ + unsigned int openFlags; /* Flags passed to sqlite3_vfs.xOpen() */ + int errCode; /* Most recent error code (SQLITE_*) */ + int errByteOffset; /* Byte offset of error in SQL statement */ + int errMask; /* & result codes with this before returning */ + int iSysErrno; /* Errno value from last system error */ + u32 dbOptFlags; /* Flags to enable/disable optimizations */ + u8 enc; /* Text encoding */ + u8 autoCommit; /* The auto-commit flag. */ + u8 temp_store; /* 1: file 2: memory 0: default */ + u8 mallocFailed; /* True if we have seen a malloc failure */ + u8 bBenignMalloc; /* Do not require OOMs if true */ + u8 dfltLockMode; /* Default locking-mode for attached dbs */ + signed char nextAutovac; /* Autovac setting after VACUUM if >=0 */ + u8 suppressErr; /* Do not issue error messages if true */ + u8 vtabOnConflict; /* Value to return for s3_vtab_on_conflict() */ + u8 isTransactionSavepoint; /* True if the outermost savepoint is a TS */ + u8 mTrace; /* zero or more SQLITE_TRACE flags */ + u8 noSharedCache; /* True if no shared-cache backends */ + u8 nSqlExec; /* Number of pending OP_SqlExec opcodes */ + u8 eOpenState; /* Current condition of the connection */ + int nextPagesize; /* Pagesize after VACUUM if >0 */ + i64 nChange; /* Value returned by sqlite3_changes() */ + i64 nTotalChange; /* Value returned by sqlite3_total_changes() */ + int aLimit[SQLITE_N_LIMIT]; /* Limits */ + int nMaxSorterMmap; /* Maximum size of regions mapped by sorter */ + struct sqlite3InitInfo { /* Information used during initialization */ + Pgno newTnum; /* Rootpage of table being initialized */ + u8 iDb; /* Which db file is being initialized */ + u8 busy; /* TRUE if currently initializing */ + unsigned orphanTrigger : 1; /* Last statement is orphaned TEMP trigger */ + unsigned imposterTable : 1; /* Building an imposter table */ + unsigned reopenMemdb : 1; /* ATTACH is really a reopen using MemDB */ + const char **azInit; /* "type", "name", and "tbl_name" columns */ + } init; + int nVdbeActive; /* Number of VDBEs currently running */ + int nVdbeRead; /* Number of active VDBEs that read or write */ + int nVdbeWrite; /* Number of active VDBEs that read and write */ + int nVdbeExec; /* Number of nested calls to VdbeExec() */ + int nVDestroy; /* Number of active OP_VDestroy operations */ + int nExtension; /* Number of loaded extensions */ + void **aExtension; /* Array of shared library handles */ + union { + void (*xLegacy)(void*,const char*); /* mTrace==SQLITE_TRACE_LEGACY */ + int (*xV2)(u32,void*,void*,void*); /* All other mTrace values */ + } trace; + void *pTraceArg; /* Argument to the trace function */ +#ifndef SQLITE_OMIT_DEPRECATED + void (*xProfile)(void*,const char*,u64); /* Profiling function */ + void *pProfileArg; /* Argument to profile function */ +#endif + void *pCommitArg; /* Argument to xCommitCallback() */ + int (*xCommitCallback)(void*); /* Invoked at every commit. */ + void *pRollbackArg; /* Argument to xRollbackCallback() */ + void (*xRollbackCallback)(void*); /* Invoked at every commit. */ + void *pUpdateArg; + void (*xUpdateCallback)(void*,int, const char*,const char*,sqlite_int64); + void *pAutovacPagesArg; /* Client argument to autovac_pages */ + void (*xAutovacDestr)(void*); /* Destructor for pAutovacPAgesArg */ + unsigned int (*xAutovacPages)(void*,const char*,u32,u32,u32); + Parse *pParse; /* Current parse */ +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + void *pPreUpdateArg; /* First argument to xPreUpdateCallback */ + void (*xPreUpdateCallback)( /* Registered using sqlite3_preupdate_hook() */ + void*,sqlite3*,int,char const*,char const*,sqlite3_int64,sqlite3_int64 + ); + PreUpdate *pPreUpdate; /* Context for active pre-update callback */ +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ +#ifndef SQLITE_OMIT_WAL + int (*xWalCallback)(void *, sqlite3 *, const char *, int); + void *pWalArg; +#endif + void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*); + void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*); + void *pCollNeededArg; + sqlite3_value *pErr; /* Most recent error message */ + union { + volatile int isInterrupted; /* True if sqlite3_interrupt has been called */ + double notUsed1; /* Spacer */ + } u1; + Lookaside lookaside; /* Lookaside malloc configuration */ +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth; /* Access authorization function */ + void *pAuthArg; /* 1st argument to the access auth function */ +#endif +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK + int (*xProgress)(void *); /* The progress callback */ + void *pProgressArg; /* Argument to the progress callback */ + unsigned nProgressOps; /* Number of opcodes for progress callback */ +#endif +#ifndef SQLITE_OMIT_VIRTUALTABLE + int nVTrans; /* Allocated size of aVTrans */ + Hash aModule; /* populated by sqlite3_create_module() */ + VtabCtx *pVtabCtx; /* Context for active vtab connect/create */ + VTable **aVTrans; /* Virtual tables with open transactions */ + VTable *pDisconnect; /* Disconnect these in next sqlite3_prepare() */ +#endif + Hash aFunc; /* Hash table of connection functions */ + Hash aCollSeq; /* All collating sequences */ + BusyHandler busyHandler; /* Busy callback */ + Db aDbStatic[2]; /* Static space for the 2 default backends */ + Savepoint *pSavepoint; /* List of active savepoints */ + int nAnalysisLimit; /* Number of index rows to ANALYZE */ + int busyTimeout; /* Busy handler timeout, in msec */ + int nSavepoint; /* Number of non-transaction savepoints */ + int nStatement; /* Number of nested statement-transactions */ + i64 nDeferredCons; /* Net deferred constraints this transaction. */ + i64 nDeferredImmCons; /* Net deferred immediate constraints */ + int *pnBytesFreed; /* If not NULL, increment this in DbFree() */ +#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY + /* The following variables are all protected by the STATIC_MAIN + ** mutex, not by sqlite3.mutex. They are used by code in notify.c. + ** + ** When X.pUnlockConnection==Y, that means that X is waiting for Y to + ** unlock so that it can proceed. + ** + ** When X.pBlockingConnection==Y, that means that something that X tried + ** tried to do recently failed with an SQLITE_LOCKED error due to locks + ** held by Y. + */ + sqlite3 *pBlockingConnection; /* Connection that caused SQLITE_LOCKED */ + sqlite3 *pUnlockConnection; /* Connection to watch for unlock */ + void *pUnlockArg; /* Argument to xUnlockNotify */ + void (*xUnlockNotify)(void **, int); /* Unlock notify callback */ + sqlite3 *pNextBlocked; /* Next in list of all blocked connections */ +#endif +#ifdef SQLITE_USER_AUTHENTICATION + sqlite3_userauth auth; /* User authentication information */ +#endif +}; + +/* +** A macro to discover the encoding of a database. +*/ +#define SCHEMA_ENC(db) ((db)->aDb[0].pSchema->enc) +#define ENC(db) ((db)->enc) + +/* +** A u64 constant where the lower 32 bits are all zeros. Only the +** upper 32 bits are included in the argument. Necessary because some +** C-compilers still do not accept LL integer literals. +*/ +#define HI(X) ((u64)(X)<<32) + +/* +** Possible values for the sqlite3.flags. +** +** Value constraints (enforced via assert()): +** SQLITE_FullFSync == PAGER_FULLFSYNC +** SQLITE_CkptFullFSync == PAGER_CKPT_FULLFSYNC +** SQLITE_CacheSpill == PAGER_CACHE_SPILL +*/ +#define SQLITE_WriteSchema 0x00000001 /* OK to update SQLITE_SCHEMA */ +#define SQLITE_LegacyFileFmt 0x00000002 /* Create new databases in format 1 */ +#define SQLITE_FullColNames 0x00000004 /* Show full column names on SELECT */ +#define SQLITE_FullFSync 0x00000008 /* Use full fsync on the backend */ +#define SQLITE_CkptFullFSync 0x00000010 /* Use full fsync for checkpoint */ +#define SQLITE_CacheSpill 0x00000020 /* OK to spill pager cache */ +#define SQLITE_ShortColNames 0x00000040 /* Show short columns names */ +#define SQLITE_TrustedSchema 0x00000080 /* Allow unsafe functions and + ** vtabs in the schema definition */ +#define SQLITE_NullCallback 0x00000100 /* Invoke the callback once if the */ + /* result set is empty */ +#define SQLITE_IgnoreChecks 0x00000200 /* Do not enforce check constraints */ +#define SQLITE_ReadUncommit 0x00000400 /* READ UNCOMMITTED in shared-cache */ +#define SQLITE_NoCkptOnClose 0x00000800 /* No checkpoint on close()/DETACH */ +#define SQLITE_ReverseOrder 0x00001000 /* Reverse unordered SELECTs */ +#define SQLITE_RecTriggers 0x00002000 /* Enable recursive triggers */ +#define SQLITE_ForeignKeys 0x00004000 /* Enforce foreign key constraints */ +#define SQLITE_AutoIndex 0x00008000 /* Enable automatic indexes */ +#define SQLITE_LoadExtension 0x00010000 /* Enable load_extension */ +#define SQLITE_LoadExtFunc 0x00020000 /* Enable load_extension() SQL func */ +#define SQLITE_EnableTrigger 0x00040000 /* True to enable triggers */ +#define SQLITE_DeferFKs 0x00080000 /* Defer all FK constraints */ +#define SQLITE_QueryOnly 0x00100000 /* Disable database changes */ +#define SQLITE_CellSizeCk 0x00200000 /* Check btree cell sizes on load */ +#define SQLITE_Fts3Tokenizer 0x00400000 /* Enable fts3_tokenizer(2) */ +#define SQLITE_EnableQPSG 0x00800000 /* Query Planner Stability Guarantee*/ +#define SQLITE_TriggerEQP 0x01000000 /* Show trigger EXPLAIN QUERY PLAN */ +#define SQLITE_ResetDatabase 0x02000000 /* Reset the database */ +#define SQLITE_LegacyAlter 0x04000000 /* Legacy ALTER TABLE behaviour */ +#define SQLITE_NoSchemaError 0x08000000 /* Do not report schema parse errors*/ +#define SQLITE_Defensive 0x10000000 /* Input SQL is likely hostile */ +#define SQLITE_DqsDDL 0x20000000 /* dbl-quoted strings allowed in DDL*/ +#define SQLITE_DqsDML 0x40000000 /* dbl-quoted strings allowed in DML*/ +#define SQLITE_EnableView 0x80000000 /* Enable the use of views */ +#define SQLITE_CountRows HI(0x00001) /* Count rows changed by INSERT, */ + /* DELETE, or UPDATE and return */ + /* the count using a callback. */ +#define SQLITE_CorruptRdOnly HI(0x00002) /* Prohibit writes due to error */ + +/* Flags used only if debugging */ +#ifdef SQLITE_DEBUG +#define SQLITE_SqlTrace HI(0x0100000) /* Debug print SQL as it executes */ +#define SQLITE_VdbeListing HI(0x0200000) /* Debug listings of VDBE progs */ +#define SQLITE_VdbeTrace HI(0x0400000) /* True to trace VDBE execution */ +#define SQLITE_VdbeAddopTrace HI(0x0800000) /* Trace sqlite3VdbeAddOp() calls */ +#define SQLITE_VdbeEQP HI(0x1000000) /* Debug EXPLAIN QUERY PLAN */ +#define SQLITE_ParserTrace HI(0x2000000) /* PRAGMA parser_trace=ON */ +#endif + +/* +** Allowed values for sqlite3.mDbFlags +*/ +#define DBFLAG_SchemaChange 0x0001 /* Uncommitted Hash table changes */ +#define DBFLAG_PreferBuiltin 0x0002 /* Preference to built-in funcs */ +#define DBFLAG_Vacuum 0x0004 /* Currently in a VACUUM */ +#define DBFLAG_VacuumInto 0x0008 /* Currently running VACUUM INTO */ +#define DBFLAG_SchemaKnownOk 0x0010 /* Schema is known to be valid */ +#define DBFLAG_InternalFunc 0x0020 /* Allow use of internal functions */ +#define DBFLAG_EncodingFixed 0x0040 /* No longer possible to change enc. */ + +/* +** Bits of the sqlite3.dbOptFlags field that are used by the +** sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS,...) interface to +** selectively disable various optimizations. +*/ +#define SQLITE_QueryFlattener 0x00000001 /* Query flattening */ +#define SQLITE_WindowFunc 0x00000002 /* Use xInverse for window functions */ +#define SQLITE_GroupByOrder 0x00000004 /* GROUPBY cover of ORDERBY */ +#define SQLITE_FactorOutConst 0x00000008 /* Constant factoring */ +#define SQLITE_DistinctOpt 0x00000010 /* DISTINCT using indexes */ +#define SQLITE_CoverIdxScan 0x00000020 /* Covering index scans */ +#define SQLITE_OrderByIdxJoin 0x00000040 /* ORDER BY of joins via index */ +#define SQLITE_Transitive 0x00000080 /* Transitive constraints */ +#define SQLITE_OmitNoopJoin 0x00000100 /* Omit unused tables in joins */ +#define SQLITE_CountOfView 0x00000200 /* The count-of-view optimization */ +#define SQLITE_CursorHints 0x00000400 /* Add OP_CursorHint opcodes */ +#define SQLITE_Stat4 0x00000800 /* Use STAT4 data */ + /* TH3 expects this value ^^^^^^^^^^ to be 0x0000800. Don't change it */ +#define SQLITE_PushDown 0x00001000 /* The push-down optimization */ +#define SQLITE_SimplifyJoin 0x00002000 /* Convert LEFT JOIN to JOIN */ +#define SQLITE_SkipScan 0x00004000 /* Skip-scans */ +#define SQLITE_PropagateConst 0x00008000 /* The constant propagation opt */ +#define SQLITE_MinMaxOpt 0x00010000 /* The min/max optimization */ +#define SQLITE_SeekScan 0x00020000 /* The OP_SeekScan optimization */ +#define SQLITE_OmitOrderBy 0x00040000 /* Omit pointless ORDER BY */ + /* TH3 expects this value ^^^^^^^^^^ to be 0x40000. Coordinate any change */ +#define SQLITE_BloomFilter 0x00080000 /* Use a Bloom filter on searches */ +#define SQLITE_BloomPulldown 0x00100000 /* Run Bloom filters early */ +#define SQLITE_BalancedMerge 0x00200000 /* Balance multi-way merges */ +#define SQLITE_AllOpts 0xffffffff /* All optimizations */ + +/* +** Macros for testing whether or not optimizations are enabled or disabled. +*/ +#define OptimizationDisabled(db, mask) (((db)->dbOptFlags&(mask))!=0) +#define OptimizationEnabled(db, mask) (((db)->dbOptFlags&(mask))==0) + +/* +** Return true if it OK to factor constant expressions into the initialization +** code. The argument is a Parse object for the code generator. +*/ +#define ConstFactorOk(P) ((P)->okConstFactor) + +/* Possible values for the sqlite3.eOpenState field. +** The numbers are randomly selected such that a minimum of three bits must +** change to convert any number to another or to zero +*/ +#define SQLITE_STATE_OPEN 0x76 /* Database is open */ +#define SQLITE_STATE_CLOSED 0xce /* Database is closed */ +#define SQLITE_STATE_SICK 0xba /* Error and awaiting close */ +#define SQLITE_STATE_BUSY 0x6d /* Database currently in use */ +#define SQLITE_STATE_ERROR 0xd5 /* An SQLITE_MISUSE error occurred */ +#define SQLITE_STATE_ZOMBIE 0xa7 /* Close with last statement close */ + +/* +** Each SQL function is defined by an instance of the following +** structure. For global built-in functions (ex: substr(), max(), count()) +** a pointer to this structure is held in the sqlite3BuiltinFunctions object. +** For per-connection application-defined functions, a pointer to this +** structure is held in the db->aHash hash table. +** +** The u.pHash field is used by the global built-ins. The u.pDestructor +** field is used by per-connection app-def functions. +*/ +struct FuncDef { + i8 nArg; /* Number of arguments. -1 means unlimited */ + u32 funcFlags; /* Some combination of SQLITE_FUNC_* */ + void *pUserData; /* User data parameter */ + FuncDef *pNext; /* Next function with same name */ + void (*xSFunc)(sqlite3_context*,int,sqlite3_value**); /* func or agg-step */ + void (*xFinalize)(sqlite3_context*); /* Agg finalizer */ + void (*xValue)(sqlite3_context*); /* Current agg value */ + void (*xInverse)(sqlite3_context*,int,sqlite3_value**); /* inverse agg-step */ + const char *zName; /* SQL name of the function. */ + union { + FuncDef *pHash; /* Next with a different name but the same hash */ + FuncDestructor *pDestructor; /* Reference counted destructor function */ + } u; /* pHash if SQLITE_FUNC_BUILTIN, pDestructor otherwise */ +}; + +/* +** This structure encapsulates a user-function destructor callback (as +** configured using create_function_v2()) and a reference counter. When +** create_function_v2() is called to create a function with a destructor, +** a single object of this type is allocated. FuncDestructor.nRef is set to +** the number of FuncDef objects created (either 1 or 3, depending on whether +** or not the specified encoding is SQLITE_ANY). The FuncDef.pDestructor +** member of each of the new FuncDef objects is set to point to the allocated +** FuncDestructor. +** +** Thereafter, when one of the FuncDef objects is deleted, the reference +** count on this object is decremented. When it reaches 0, the destructor +** is invoked and the FuncDestructor structure freed. +*/ +struct FuncDestructor { + int nRef; + void (*xDestroy)(void *); + void *pUserData; +}; + +/* +** Possible values for FuncDef.flags. Note that the _LENGTH and _TYPEOF +** values must correspond to OPFLAG_LENGTHARG and OPFLAG_TYPEOFARG. And +** SQLITE_FUNC_CONSTANT must be the same as SQLITE_DETERMINISTIC. There +** are assert() statements in the code to verify this. +** +** Value constraints (enforced via assert()): +** SQLITE_FUNC_MINMAX == NC_MinMaxAgg == SF_MinMaxAgg +** SQLITE_FUNC_ANYORDER == NC_OrderAgg == SF_OrderByReqd +** SQLITE_FUNC_LENGTH == OPFLAG_LENGTHARG +** SQLITE_FUNC_TYPEOF == OPFLAG_TYPEOFARG +** SQLITE_FUNC_CONSTANT == SQLITE_DETERMINISTIC from the API +** SQLITE_FUNC_DIRECT == SQLITE_DIRECTONLY from the API +** SQLITE_FUNC_UNSAFE == SQLITE_INNOCUOUS +** SQLITE_FUNC_ENCMASK depends on SQLITE_UTF* macros in the API +*/ +#define SQLITE_FUNC_ENCMASK 0x0003 /* SQLITE_UTF8, SQLITE_UTF16BE or UTF16LE */ +#define SQLITE_FUNC_LIKE 0x0004 /* Candidate for the LIKE optimization */ +#define SQLITE_FUNC_CASE 0x0008 /* Case-sensitive LIKE-type function */ +#define SQLITE_FUNC_EPHEM 0x0010 /* Ephemeral. Delete with VDBE */ +#define SQLITE_FUNC_NEEDCOLL 0x0020 /* sqlite3GetFuncCollSeq() might be called*/ +#define SQLITE_FUNC_LENGTH 0x0040 /* Built-in length() function */ +#define SQLITE_FUNC_TYPEOF 0x0080 /* Built-in typeof() function */ +#define SQLITE_FUNC_COUNT 0x0100 /* Built-in count(*) aggregate */ +/* 0x0200 -- available for reuse */ +#define SQLITE_FUNC_UNLIKELY 0x0400 /* Built-in unlikely() function */ +#define SQLITE_FUNC_CONSTANT 0x0800 /* Constant inputs give a constant output */ +#define SQLITE_FUNC_MINMAX 0x1000 /* True for min() and max() aggregates */ +#define SQLITE_FUNC_SLOCHNG 0x2000 /* "Slow Change". Value constant during a + ** single query - might change over time */ +#define SQLITE_FUNC_TEST 0x4000 /* Built-in testing functions */ +#define SQLITE_FUNC_OFFSET 0x8000 /* Built-in sqlite_offset() function */ +#define SQLITE_FUNC_WINDOW 0x00010000 /* Built-in window-only function */ +#define SQLITE_FUNC_INTERNAL 0x00040000 /* For use by NestedParse() only */ +#define SQLITE_FUNC_DIRECT 0x00080000 /* Not for use in TRIGGERs or VIEWs */ +#define SQLITE_FUNC_SUBTYPE 0x00100000 /* Result likely to have sub-type */ +#define SQLITE_FUNC_UNSAFE 0x00200000 /* Function has side effects */ +#define SQLITE_FUNC_INLINE 0x00400000 /* Functions implemented in-line */ +#define SQLITE_FUNC_BUILTIN 0x00800000 /* This is a built-in function */ +#define SQLITE_FUNC_ANYORDER 0x08000000 /* count/min/max aggregate */ + +/* Identifier numbers for each in-line function */ +#define INLINEFUNC_coalesce 0 +#define INLINEFUNC_implies_nonnull_row 1 +#define INLINEFUNC_expr_implies_expr 2 +#define INLINEFUNC_expr_compare 3 +#define INLINEFUNC_affinity 4 +#define INLINEFUNC_iif 5 +#define INLINEFUNC_unlikely 99 /* Default case */ + +/* +** The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are +** used to create the initializers for the FuncDef structures. +** +** FUNCTION(zName, nArg, iArg, bNC, xFunc) +** Used to create a scalar function definition of a function zName +** implemented by C function xFunc that accepts nArg arguments. The +** value passed as iArg is cast to a (void*) and made available +** as the user-data (sqlite3_user_data()) for the function. If +** argument bNC is true, then the SQLITE_FUNC_NEEDCOLL flag is set. +** +** VFUNCTION(zName, nArg, iArg, bNC, xFunc) +** Like FUNCTION except it omits the SQLITE_FUNC_CONSTANT flag. +** +** SFUNCTION(zName, nArg, iArg, bNC, xFunc) +** Like FUNCTION except it omits the SQLITE_FUNC_CONSTANT flag and +** adds the SQLITE_DIRECTONLY flag. +** +** INLINE_FUNC(zName, nArg, iFuncId, mFlags) +** zName is the name of a function that is implemented by in-line +** byte code rather than by the usual callbacks. The iFuncId +** parameter determines the function id. The mFlags parameter is +** optional SQLITE_FUNC_ flags for this function. +** +** TEST_FUNC(zName, nArg, iFuncId, mFlags) +** zName is the name of a test-only function implemented by in-line +** byte code rather than by the usual callbacks. The iFuncId +** parameter determines the function id. The mFlags parameter is +** optional SQLITE_FUNC_ flags for this function. +** +** DFUNCTION(zName, nArg, iArg, bNC, xFunc) +** Like FUNCTION except it omits the SQLITE_FUNC_CONSTANT flag and +** adds the SQLITE_FUNC_SLOCHNG flag. Used for date & time functions +** and functions like sqlite_version() that can change, but not during +** a single query. The iArg is ignored. The user-data is always set +** to a NULL pointer. The bNC parameter is not used. +** +** MFUNCTION(zName, nArg, xPtr, xFunc) +** For math-library functions. xPtr is an arbitrary pointer. +** +** PURE_DATE(zName, nArg, iArg, bNC, xFunc) +** Used for "pure" date/time functions, this macro is like DFUNCTION +** except that it does set the SQLITE_FUNC_CONSTANT flags. iArg is +** ignored and the user-data for these functions is set to an +** arbitrary non-NULL pointer. The bNC parameter is not used. +** +** AGGREGATE(zName, nArg, iArg, bNC, xStep, xFinal) +** Used to create an aggregate function definition implemented by +** the C functions xStep and xFinal. The first four parameters +** are interpreted in the same way as the first 4 parameters to +** FUNCTION(). +** +** WAGGREGATE(zName, nArg, iArg, xStep, xFinal, xValue, xInverse) +** Used to create an aggregate function definition implemented by +** the C functions xStep and xFinal. The first four parameters +** are interpreted in the same way as the first 4 parameters to +** FUNCTION(). +** +** LIKEFUNC(zName, nArg, pArg, flags) +** Used to create a scalar function definition of a function zName +** that accepts nArg arguments and is implemented by a call to C +** function likeFunc. Argument pArg is cast to a (void *) and made +** available as the function user-data (sqlite3_user_data()). The +** FuncDef.flags variable is set to the value passed as the flags +** parameter. +*/ +#define FUNCTION(zName, nArg, iArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ + SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, 0, #zName, {0} } +#define VFUNCTION(zName, nArg, iArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ + SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, 0, #zName, {0} } +#define SFUNCTION(zName, nArg, iArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_UTF8|SQLITE_DIRECTONLY|SQLITE_FUNC_UNSAFE, \ + SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, 0, #zName, {0} } +#define MFUNCTION(zName, nArg, xPtr, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_FUNC_CONSTANT|SQLITE_UTF8, \ + xPtr, 0, xFunc, 0, 0, 0, #zName, {0} } +#define JFUNCTION(zName, nArg, iArg, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS|\ + SQLITE_FUNC_CONSTANT|SQLITE_UTF8, \ + SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, 0, #zName, {0} } +#define INLINE_FUNC(zName, nArg, iArg, mFlags) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_UTF8|SQLITE_FUNC_INLINE|SQLITE_FUNC_CONSTANT|(mFlags), \ + SQLITE_INT_TO_PTR(iArg), 0, noopFunc, 0, 0, 0, #zName, {0} } +#define TEST_FUNC(zName, nArg, iArg, mFlags) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_UTF8|SQLITE_FUNC_INTERNAL|SQLITE_FUNC_TEST| \ + SQLITE_FUNC_INLINE|SQLITE_FUNC_CONSTANT|(mFlags), \ + SQLITE_INT_TO_PTR(iArg), 0, noopFunc, 0, 0, 0, #zName, {0} } +#define DFUNCTION(zName, nArg, iArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_FUNC_SLOCHNG|SQLITE_UTF8, \ + 0, 0, xFunc, 0, 0, 0, #zName, {0} } +#define PURE_DATE(zName, nArg, iArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_FUNC_SLOCHNG|SQLITE_UTF8|SQLITE_FUNC_CONSTANT, \ + (void*)&sqlite3Config, 0, xFunc, 0, 0, 0, #zName, {0} } +#define FUNCTION2(zName, nArg, iArg, bNC, xFunc, extraFlags) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL)|extraFlags,\ + SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, 0, #zName, {0} } +#define STR_FUNCTION(zName, nArg, pArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_FUNC_SLOCHNG|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ + pArg, 0, xFunc, 0, 0, 0, #zName, } +#define LIKEFUNC(zName, nArg, arg, flags) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_FUNC_CONSTANT|SQLITE_UTF8|flags, \ + (void *)arg, 0, likeFunc, 0, 0, 0, #zName, {0} } +#define WAGGREGATE(zName, nArg, arg, nc, xStep, xFinal, xValue, xInverse, f) \ + {nArg, SQLITE_FUNC_BUILTIN|SQLITE_UTF8|(nc*SQLITE_FUNC_NEEDCOLL)|f, \ + SQLITE_INT_TO_PTR(arg), 0, xStep,xFinal,xValue,xInverse,#zName, {0}} +#define INTERNAL_FUNCTION(zName, nArg, xFunc) \ + {nArg, SQLITE_FUNC_BUILTIN|\ + SQLITE_FUNC_INTERNAL|SQLITE_UTF8|SQLITE_FUNC_CONSTANT, \ + 0, 0, xFunc, 0, 0, 0, #zName, {0} } + + +/* +** All current savepoints are stored in a linked list starting at +** sqlite3.pSavepoint. The first element in the list is the most recently +** opened savepoint. Savepoints are added to the list by the vdbe +** OP_Savepoint instruction. +*/ +struct Savepoint { + char *zName; /* Savepoint name (nul-terminated) */ + i64 nDeferredCons; /* Number of deferred fk violations */ + i64 nDeferredImmCons; /* Number of deferred imm fk. */ + Savepoint *pNext; /* Parent savepoint (if any) */ +}; + +/* +** The following are used as the second parameter to sqlite3Savepoint(), +** and as the P1 argument to the OP_Savepoint instruction. +*/ +#define SAVEPOINT_BEGIN 0 +#define SAVEPOINT_RELEASE 1 +#define SAVEPOINT_ROLLBACK 2 + + +/* +** Each SQLite module (virtual table definition) is defined by an +** instance of the following structure, stored in the sqlite3.aModule +** hash table. +*/ +struct Module { + const sqlite3_module *pModule; /* Callback pointers */ + const char *zName; /* Name passed to create_module() */ + int nRefModule; /* Number of pointers to this object */ + void *pAux; /* pAux passed to create_module() */ + void (*xDestroy)(void *); /* Module destructor function */ + Table *pEpoTab; /* Eponymous table for this module */ +}; + +/* +** Information about each column of an SQL table is held in an instance +** of the Column structure, in the Table.aCol[] array. +** +** Definitions: +** +** "table column index" This is the index of the column in the +** Table.aCol[] array, and also the index of +** the column in the original CREATE TABLE stmt. +** +** "storage column index" This is the index of the column in the +** record BLOB generated by the OP_MakeRecord +** opcode. The storage column index is less than +** or equal to the table column index. It is +** equal if and only if there are no VIRTUAL +** columns to the left. +** +** Notes on zCnName: +** The zCnName field stores the name of the column, the datatype of the +** column, and the collating sequence for the column, in that order, all in +** a single allocation. Each string is 0x00 terminated. The datatype +** is only included if the COLFLAG_HASTYPE bit of colFlags is set and the +** collating sequence name is only included if the COLFLAG_HASCOLL bit is +** set. +*/ +struct Column { + char *zCnName; /* Name of this column */ + unsigned notNull :4; /* An OE_ code for handling a NOT NULL constraint */ + unsigned eCType :4; /* One of the standard types */ + char affinity; /* One of the SQLITE_AFF_... values */ + u8 szEst; /* Est size of value in this column. sizeof(INT)==1 */ + u8 hName; /* Column name hash for faster lookup */ + u16 iDflt; /* 1-based index of DEFAULT. 0 means "none" */ + u16 colFlags; /* Boolean properties. See COLFLAG_ defines below */ +}; + +/* Allowed values for Column.eCType. +** +** Values must match entries in the global constant arrays +** sqlite3StdTypeLen[] and sqlite3StdType[]. Each value is one more +** than the offset into these arrays for the corresponding name. +** Adjust the SQLITE_N_STDTYPE value if adding or removing entries. +*/ +#define COLTYPE_CUSTOM 0 /* Type appended to zName */ +#define COLTYPE_ANY 1 +#define COLTYPE_BLOB 2 +#define COLTYPE_INT 3 +#define COLTYPE_INTEGER 4 +#define COLTYPE_REAL 5 +#define COLTYPE_TEXT 6 +#define SQLITE_N_STDTYPE 6 /* Number of standard types */ + +/* Allowed values for Column.colFlags. +** +** Constraints: +** TF_HasVirtual == COLFLAG_VIRTUAL +** TF_HasStored == COLFLAG_STORED +** TF_HasHidden == COLFLAG_HIDDEN +*/ +#define COLFLAG_PRIMKEY 0x0001 /* Column is part of the primary key */ +#define COLFLAG_HIDDEN 0x0002 /* A hidden column in a virtual table */ +#define COLFLAG_HASTYPE 0x0004 /* Type name follows column name */ +#define COLFLAG_UNIQUE 0x0008 /* Column def contains "UNIQUE" or "PK" */ +#define COLFLAG_SORTERREF 0x0010 /* Use sorter-refs with this column */ +#define COLFLAG_VIRTUAL 0x0020 /* GENERATED ALWAYS AS ... VIRTUAL */ +#define COLFLAG_STORED 0x0040 /* GENERATED ALWAYS AS ... STORED */ +#define COLFLAG_NOTAVAIL 0x0080 /* STORED column not yet calculated */ +#define COLFLAG_BUSY 0x0100 /* Blocks recursion on GENERATED columns */ +#define COLFLAG_HASCOLL 0x0200 /* Has collating sequence name in zCnName */ +#define COLFLAG_GENERATED 0x0060 /* Combo: _STORED, _VIRTUAL */ +#define COLFLAG_NOINSERT 0x0062 /* Combo: _HIDDEN, _STORED, _VIRTUAL */ + +/* +** A "Collating Sequence" is defined by an instance of the following +** structure. Conceptually, a collating sequence consists of a name and +** a comparison routine that defines the order of that sequence. +** +** If CollSeq.xCmp is NULL, it means that the +** collating sequence is undefined. Indices built on an undefined +** collating sequence may not be read or written. +*/ +struct CollSeq { + char *zName; /* Name of the collating sequence, UTF-8 encoded */ + u8 enc; /* Text encoding handled by xCmp() */ + void *pUser; /* First argument to xCmp() */ + int (*xCmp)(void*,int, const void*, int, const void*); + void (*xDel)(void*); /* Destructor for pUser */ +}; + +/* +** A sort order can be either ASC or DESC. +*/ +#define SQLITE_SO_ASC 0 /* Sort in ascending order */ +#define SQLITE_SO_DESC 1 /* Sort in ascending order */ +#define SQLITE_SO_UNDEFINED -1 /* No sort order specified */ + +/* +** Column affinity types. +** +** These used to have mnemonic name like 'i' for SQLITE_AFF_INTEGER and +** 't' for SQLITE_AFF_TEXT. But we can save a little space and improve +** the speed a little by numbering the values consecutively. +** +** But rather than start with 0 or 1, we begin with 'A'. That way, +** when multiple affinity types are concatenated into a string and +** used as the P4 operand, they will be more readable. +** +** Note also that the numeric types are grouped together so that testing +** for a numeric type is a single comparison. And the BLOB type is first. +*/ +#define SQLITE_AFF_NONE 0x40 /* '@' */ +#define SQLITE_AFF_BLOB 0x41 /* 'A' */ +#define SQLITE_AFF_TEXT 0x42 /* 'B' */ +#define SQLITE_AFF_NUMERIC 0x43 /* 'C' */ +#define SQLITE_AFF_INTEGER 0x44 /* 'D' */ +#define SQLITE_AFF_REAL 0x45 /* 'E' */ + +#define sqlite3IsNumericAffinity(X) ((X)>=SQLITE_AFF_NUMERIC) + +/* +** The SQLITE_AFF_MASK values masks off the significant bits of an +** affinity value. +*/ +#define SQLITE_AFF_MASK 0x47 + +/* +** Additional bit values that can be ORed with an affinity without +** changing the affinity. +** +** The SQLITE_NOTNULL flag is a combination of NULLEQ and JUMPIFNULL. +** It causes an assert() to fire if either operand to a comparison +** operator is NULL. It is added to certain comparison operators to +** prove that the operands are always NOT NULL. +*/ +#define SQLITE_JUMPIFNULL 0x10 /* jumps if either operand is NULL */ +#define SQLITE_NULLEQ 0x80 /* NULL=NULL */ +#define SQLITE_NOTNULL 0x90 /* Assert that operands are never NULL */ + +/* +** An object of this type is created for each virtual table present in +** the database schema. +** +** If the database schema is shared, then there is one instance of this +** structure for each database connection (sqlite3*) that uses the shared +** schema. This is because each database connection requires its own unique +** instance of the sqlite3_vtab* handle used to access the virtual table +** implementation. sqlite3_vtab* handles can not be shared between +** database connections, even when the rest of the in-memory database +** schema is shared, as the implementation often stores the database +** connection handle passed to it via the xConnect() or xCreate() method +** during initialization internally. This database connection handle may +** then be used by the virtual table implementation to access real tables +** within the database. So that they appear as part of the callers +** transaction, these accesses need to be made via the same database +** connection as that used to execute SQL operations on the virtual table. +** +** All VTable objects that correspond to a single table in a shared +** database schema are initially stored in a linked-list pointed to by +** the Table.pVTable member variable of the corresponding Table object. +** When an sqlite3_prepare() operation is required to access the virtual +** table, it searches the list for the VTable that corresponds to the +** database connection doing the preparing so as to use the correct +** sqlite3_vtab* handle in the compiled query. +** +** When an in-memory Table object is deleted (for example when the +** schema is being reloaded for some reason), the VTable objects are not +** deleted and the sqlite3_vtab* handles are not xDisconnect()ed +** immediately. Instead, they are moved from the Table.pVTable list to +** another linked list headed by the sqlite3.pDisconnect member of the +** corresponding sqlite3 structure. They are then deleted/xDisconnected +** next time a statement is prepared using said sqlite3*. This is done +** to avoid deadlock issues involving multiple sqlite3.mutex mutexes. +** Refer to comments above function sqlite3VtabUnlockList() for an +** explanation as to why it is safe to add an entry to an sqlite3.pDisconnect +** list without holding the corresponding sqlite3.mutex mutex. +** +** The memory for objects of this type is always allocated by +** sqlite3DbMalloc(), using the connection handle stored in VTable.db as +** the first argument. +*/ +struct VTable { + sqlite3 *db; /* Database connection associated with this table */ + Module *pMod; /* Pointer to module implementation */ + sqlite3_vtab *pVtab; /* Pointer to vtab instance */ + int nRef; /* Number of pointers to this structure */ + u8 bConstraint; /* True if constraints are supported */ + u8 eVtabRisk; /* Riskiness of allowing hacker access */ + int iSavepoint; /* Depth of the SAVEPOINT stack */ + VTable *pNext; /* Next in linked list (see above) */ +}; + +/* Allowed values for VTable.eVtabRisk +*/ +#define SQLITE_VTABRISK_Low 0 +#define SQLITE_VTABRISK_Normal 1 +#define SQLITE_VTABRISK_High 2 + +/* +** The schema for each SQL table, virtual table, and view is represented +** in memory by an instance of the following structure. +*/ +struct Table { + char *zName; /* Name of the table or view */ + Column *aCol; /* Information about each column */ + Index *pIndex; /* List of SQL indexes on this table. */ + char *zColAff; /* String defining the affinity of each column */ + ExprList *pCheck; /* All CHECK constraints */ + /* ... also used as column name list in a VIEW */ + Pgno tnum; /* Root BTree page for this table */ + u32 nTabRef; /* Number of pointers to this Table */ + u32 tabFlags; /* Mask of TF_* values */ + i16 iPKey; /* If not negative, use aCol[iPKey] as the rowid */ + i16 nCol; /* Number of columns in this table */ + i16 nNVCol; /* Number of columns that are not VIRTUAL */ + LogEst nRowLogEst; /* Estimated rows in table - from sqlite_stat1 table */ + LogEst szTabRow; /* Estimated size of each table row in bytes */ +#ifdef SQLITE_ENABLE_COSTMULT + LogEst costMult; /* Cost multiplier for using this table */ +#endif + u8 keyConf; /* What to do in case of uniqueness conflict on iPKey */ + u8 eTabType; /* 0: normal, 1: virtual, 2: view */ + union { + struct { /* Used by ordinary tables: */ + int addColOffset; /* Offset in CREATE TABLE stmt to add a new column */ + FKey *pFKey; /* Linked list of all foreign keys in this table */ + ExprList *pDfltList; /* DEFAULT clauses on various columns. + ** Or the AS clause for generated columns. */ + } tab; + struct { /* Used by views: */ + Select *pSelect; /* View definition */ + } view; + struct { /* Used by virtual tables only: */ + int nArg; /* Number of arguments to the module */ + char **azArg; /* 0: module 1: schema 2: vtab name 3...: args */ + VTable *p; /* List of VTable objects. */ + } vtab; + } u; + Trigger *pTrigger; /* List of triggers on this object */ + Schema *pSchema; /* Schema that contains this table */ +}; + +/* +** Allowed values for Table.tabFlags. +** +** TF_OOOHidden applies to tables or view that have hidden columns that are +** followed by non-hidden columns. Example: "CREATE VIRTUAL TABLE x USING +** vtab1(a HIDDEN, b);". Since "b" is a non-hidden column but "a" is hidden, +** the TF_OOOHidden attribute would apply in this case. Such tables require +** special handling during INSERT processing. The "OOO" means "Out Of Order". +** +** Constraints: +** +** TF_HasVirtual == COLFLAG_VIRTUAL +** TF_HasStored == COLFLAG_STORED +** TF_HasHidden == COLFLAG_HIDDEN +*/ +#define TF_Readonly 0x00000001 /* Read-only system table */ +#define TF_HasHidden 0x00000002 /* Has one or more hidden columns */ +#define TF_HasPrimaryKey 0x00000004 /* Table has a primary key */ +#define TF_Autoincrement 0x00000008 /* Integer primary key is autoincrement */ +#define TF_HasStat1 0x00000010 /* nRowLogEst set from sqlite_stat1 */ +#define TF_HasVirtual 0x00000020 /* Has one or more VIRTUAL columns */ +#define TF_HasStored 0x00000040 /* Has one or more STORED columns */ +#define TF_HasGenerated 0x00000060 /* Combo: HasVirtual + HasStored */ +#define TF_WithoutRowid 0x00000080 /* No rowid. PRIMARY KEY is the key */ +#define TF_StatsUsed 0x00000100 /* Query planner decisions affected by + ** Index.aiRowLogEst[] values */ +#define TF_NoVisibleRowid 0x00000200 /* No user-visible "rowid" column */ +#define TF_OOOHidden 0x00000400 /* Out-of-Order hidden columns */ +#define TF_HasNotNull 0x00000800 /* Contains NOT NULL constraints */ +#define TF_Shadow 0x00001000 /* True for a shadow table */ +#define TF_HasStat4 0x00002000 /* STAT4 info available for this table */ +#define TF_Ephemeral 0x00004000 /* An ephemeral table */ +#define TF_Eponymous 0x00008000 /* An eponymous virtual table */ +#define TF_Strict 0x00010000 /* STRICT mode */ + +/* +** Allowed values for Table.eTabType +*/ +#define TABTYP_NORM 0 /* Ordinary table */ +#define TABTYP_VTAB 1 /* Virtual table */ +#define TABTYP_VIEW 2 /* A view */ + +#define IsView(X) ((X)->eTabType==TABTYP_VIEW) +#define IsOrdinaryTable(X) ((X)->eTabType==TABTYP_NORM) + +/* +** Test to see whether or not a table is a virtual table. This is +** done as a macro so that it will be optimized out when virtual +** table support is omitted from the build. +*/ +#ifndef SQLITE_OMIT_VIRTUALTABLE +# define IsVirtual(X) ((X)->eTabType==TABTYP_VTAB) +# define ExprIsVtab(X) \ + ((X)->op==TK_COLUMN && (X)->y.pTab!=0 && (X)->y.pTab->eTabType==TABTYP_VTAB) +#else +# define IsVirtual(X) 0 +# define ExprIsVtab(X) 0 +#endif + +/* +** Macros to determine if a column is hidden. IsOrdinaryHiddenColumn() +** only works for non-virtual tables (ordinary tables and views) and is +** always false unless SQLITE_ENABLE_HIDDEN_COLUMNS is defined. The +** IsHiddenColumn() macro is general purpose. +*/ +#if defined(SQLITE_ENABLE_HIDDEN_COLUMNS) +# define IsHiddenColumn(X) (((X)->colFlags & COLFLAG_HIDDEN)!=0) +# define IsOrdinaryHiddenColumn(X) (((X)->colFlags & COLFLAG_HIDDEN)!=0) +#elif !defined(SQLITE_OMIT_VIRTUALTABLE) +# define IsHiddenColumn(X) (((X)->colFlags & COLFLAG_HIDDEN)!=0) +# define IsOrdinaryHiddenColumn(X) 0 +#else +# define IsHiddenColumn(X) 0 +# define IsOrdinaryHiddenColumn(X) 0 +#endif + + +/* Does the table have a rowid */ +#define HasRowid(X) (((X)->tabFlags & TF_WithoutRowid)==0) +#define VisibleRowid(X) (((X)->tabFlags & TF_NoVisibleRowid)==0) + +/* +** Each foreign key constraint is an instance of the following structure. +** +** A foreign key is associated with two tables. The "from" table is +** the table that contains the REFERENCES clause that creates the foreign +** key. The "to" table is the table that is named in the REFERENCES clause. +** Consider this example: +** +** CREATE TABLE ex1( +** a INTEGER PRIMARY KEY, +** b INTEGER CONSTRAINT fk1 REFERENCES ex2(x) +** ); +** +** For foreign key "fk1", the from-table is "ex1" and the to-table is "ex2". +** Equivalent names: +** +** from-table == child-table +** to-table == parent-table +** +** Each REFERENCES clause generates an instance of the following structure +** which is attached to the from-table. The to-table need not exist when +** the from-table is created. The existence of the to-table is not checked. +** +** The list of all parents for child Table X is held at X.pFKey. +** +** A list of all children for a table named Z (which might not even exist) +** is held in Schema.fkeyHash with a hash key of Z. +*/ +struct FKey { + Table *pFrom; /* Table containing the REFERENCES clause (aka: Child) */ + FKey *pNextFrom; /* Next FKey with the same in pFrom. Next parent of pFrom */ + char *zTo; /* Name of table that the key points to (aka: Parent) */ + FKey *pNextTo; /* Next with the same zTo. Next child of zTo. */ + FKey *pPrevTo; /* Previous with the same zTo */ + int nCol; /* Number of columns in this key */ + /* EV: R-30323-21917 */ + u8 isDeferred; /* True if constraint checking is deferred till COMMIT */ + u8 aAction[2]; /* ON DELETE and ON UPDATE actions, respectively */ + Trigger *apTrigger[2];/* Triggers for aAction[] actions */ + struct sColMap { /* Mapping of columns in pFrom to columns in zTo */ + int iFrom; /* Index of column in pFrom */ + char *zCol; /* Name of column in zTo. If NULL use PRIMARY KEY */ + } aCol[1]; /* One entry for each of nCol columns */ +}; + +/* +** SQLite supports many different ways to resolve a constraint +** error. ROLLBACK processing means that a constraint violation +** causes the operation in process to fail and for the current transaction +** to be rolled back. ABORT processing means the operation in process +** fails and any prior changes from that one operation are backed out, +** but the transaction is not rolled back. FAIL processing means that +** the operation in progress stops and returns an error code. But prior +** changes due to the same operation are not backed out and no rollback +** occurs. IGNORE means that the particular row that caused the constraint +** error is not inserted or updated. Processing continues and no error +** is returned. REPLACE means that preexisting database rows that caused +** a UNIQUE constraint violation are removed so that the new insert or +** update can proceed. Processing continues and no error is reported. +** UPDATE applies to insert operations only and means that the insert +** is omitted and the DO UPDATE clause of an upsert is run instead. +** +** RESTRICT, SETNULL, SETDFLT, and CASCADE actions apply only to foreign keys. +** RESTRICT is the same as ABORT for IMMEDIATE foreign keys and the +** same as ROLLBACK for DEFERRED keys. SETNULL means that the foreign +** key is set to NULL. SETDFLT means that the foreign key is set +** to its default value. CASCADE means that a DELETE or UPDATE of the +** referenced table row is propagated into the row that holds the +** foreign key. +** +** The OE_Default value is a place holder that means to use whatever +** conflict resolution algorthm is required from context. +** +** The following symbolic values are used to record which type +** of conflict resolution action to take. +*/ +#define OE_None 0 /* There is no constraint to check */ +#define OE_Rollback 1 /* Fail the operation and rollback the transaction */ +#define OE_Abort 2 /* Back out changes but do no rollback transaction */ +#define OE_Fail 3 /* Stop the operation but leave all prior changes */ +#define OE_Ignore 4 /* Ignore the error. Do not do the INSERT or UPDATE */ +#define OE_Replace 5 /* Delete existing record, then do INSERT or UPDATE */ +#define OE_Update 6 /* Process as a DO UPDATE in an upsert */ +#define OE_Restrict 7 /* OE_Abort for IMMEDIATE, OE_Rollback for DEFERRED */ +#define OE_SetNull 8 /* Set the foreign key value to NULL */ +#define OE_SetDflt 9 /* Set the foreign key value to its default */ +#define OE_Cascade 10 /* Cascade the changes */ +#define OE_Default 11 /* Do whatever the default action is */ + + +/* +** An instance of the following structure is passed as the first +** argument to sqlite3VdbeKeyCompare and is used to control the +** comparison of the two index keys. +** +** Note that aSortOrder[] and aColl[] have nField+1 slots. There +** are nField slots for the columns of an index then one extra slot +** for the rowid at the end. +*/ +struct KeyInfo { + u32 nRef; /* Number of references to this KeyInfo object */ + u8 enc; /* Text encoding - one of the SQLITE_UTF* values */ + u16 nKeyField; /* Number of key columns in the index */ + u16 nAllField; /* Total columns, including key plus others */ + sqlite3 *db; /* The database connection */ + u8 *aSortFlags; /* Sort order for each column. */ + CollSeq *aColl[1]; /* Collating sequence for each term of the key */ +}; + +/* +** Allowed bit values for entries in the KeyInfo.aSortFlags[] array. +*/ +#define KEYINFO_ORDER_DESC 0x01 /* DESC sort order */ +#define KEYINFO_ORDER_BIGNULL 0x02 /* NULL is larger than any other value */ + +/* +** This object holds a record which has been parsed out into individual +** fields, for the purposes of doing a comparison. +** +** A record is an object that contains one or more fields of data. +** Records are used to store the content of a table row and to store +** the key of an index. A blob encoding of a record is created by +** the OP_MakeRecord opcode of the VDBE and is disassembled by the +** OP_Column opcode. +** +** An instance of this object serves as a "key" for doing a search on +** an index b+tree. The goal of the search is to find the entry that +** is closed to the key described by this object. This object might hold +** just a prefix of the key. The number of fields is given by +** pKeyInfo->nField. +** +** The r1 and r2 fields are the values to return if this key is less than +** or greater than a key in the btree, respectively. These are normally +** -1 and +1 respectively, but might be inverted to +1 and -1 if the b-tree +** is in DESC order. +** +** The key comparison functions actually return default_rc when they find +** an equals comparison. default_rc can be -1, 0, or +1. If there are +** multiple entries in the b-tree with the same key (when only looking +** at the first pKeyInfo->nFields,) then default_rc can be set to -1 to +** cause the search to find the last match, or +1 to cause the search to +** find the first match. +** +** The key comparison functions will set eqSeen to true if they ever +** get and equal results when comparing this structure to a b-tree record. +** When default_rc!=0, the search might end up on the record immediately +** before the first match or immediately after the last match. The +** eqSeen field will indicate whether or not an exact match exists in the +** b-tree. +*/ +struct UnpackedRecord { + KeyInfo *pKeyInfo; /* Collation and sort-order information */ + Mem *aMem; /* Values */ + u16 nField; /* Number of entries in apMem[] */ + i8 default_rc; /* Comparison result if keys are equal */ + u8 errCode; /* Error detected by xRecordCompare (CORRUPT or NOMEM) */ + i8 r1; /* Value to return if (lhs < rhs) */ + i8 r2; /* Value to return if (lhs > rhs) */ + u8 eqSeen; /* True if an equality comparison has been seen */ +}; + + +/* +** Each SQL index is represented in memory by an +** instance of the following structure. +** +** The columns of the table that are to be indexed are described +** by the aiColumn[] field of this structure. For example, suppose +** we have the following table and index: +** +** CREATE TABLE Ex1(c1 int, c2 int, c3 text); +** CREATE INDEX Ex2 ON Ex1(c3,c1); +** +** In the Table structure describing Ex1, nCol==3 because there are +** three columns in the table. In the Index structure describing +** Ex2, nColumn==2 since 2 of the 3 columns of Ex1 are indexed. +** The value of aiColumn is {2, 0}. aiColumn[0]==2 because the +** first column to be indexed (c3) has an index of 2 in Ex1.aCol[]. +** The second column to be indexed (c1) has an index of 0 in +** Ex1.aCol[], hence Ex2.aiColumn[1]==0. +** +** The Index.onError field determines whether or not the indexed columns +** must be unique and what to do if they are not. When Index.onError=OE_None, +** it means this is not a unique index. Otherwise it is a unique index +** and the value of Index.onError indicate the which conflict resolution +** algorithm to employ whenever an attempt is made to insert a non-unique +** element. +** +** While parsing a CREATE TABLE or CREATE INDEX statement in order to +** generate VDBE code (as opposed to parsing one read from an sqlite_schema +** table as part of parsing an existing database schema), transient instances +** of this structure may be created. In this case the Index.tnum variable is +** used to store the address of a VDBE instruction, not a database page +** number (it cannot - the database page is not allocated until the VDBE +** program is executed). See convertToWithoutRowidTable() for details. +*/ +struct Index { + char *zName; /* Name of this index */ + i16 *aiColumn; /* Which columns are used by this index. 1st is 0 */ + LogEst *aiRowLogEst; /* From ANALYZE: Est. rows selected by each column */ + Table *pTable; /* The SQL table being indexed */ + char *zColAff; /* String defining the affinity of each column */ + Index *pNext; /* The next index associated with the same table */ + Schema *pSchema; /* Schema containing this index */ + u8 *aSortOrder; /* for each column: True==DESC, False==ASC */ + const char **azColl; /* Array of collation sequence names for index */ + Expr *pPartIdxWhere; /* WHERE clause for partial indices */ + ExprList *aColExpr; /* Column expressions */ + Pgno tnum; /* DB Page containing root of this index */ + LogEst szIdxRow; /* Estimated average row size in bytes */ + u16 nKeyCol; /* Number of columns forming the key */ + u16 nColumn; /* Number of columns stored in the index */ + u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ + unsigned idxType:2; /* 0:Normal 1:UNIQUE, 2:PRIMARY KEY, 3:IPK */ + unsigned bUnordered:1; /* Use this index for == or IN queries only */ + unsigned uniqNotNull:1; /* True if UNIQUE and NOT NULL for all columns */ + unsigned isResized:1; /* True if resizeIndexObject() has been called */ + unsigned isCovering:1; /* True if this is a covering index */ + unsigned noSkipScan:1; /* Do not try to use skip-scan if true */ + unsigned hasStat1:1; /* aiRowLogEst values come from sqlite_stat1 */ + unsigned bNoQuery:1; /* Do not use this index to optimize queries */ + unsigned bAscKeyBug:1; /* True if the bba7b69f9849b5bf bug applies */ + unsigned bHasVCol:1; /* Index references one or more VIRTUAL columns */ +#ifdef SQLITE_ENABLE_STAT4 + int nSample; /* Number of elements in aSample[] */ + int nSampleCol; /* Size of IndexSample.anEq[] and so on */ + tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ + IndexSample *aSample; /* Samples of the left-most key */ + tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this index */ + tRowcnt nRowEst0; /* Non-logarithmic number of rows in the index */ +#endif + Bitmask colNotIdxed; /* 0 for unindexed columns in pTab */ +}; + +/* +** Allowed values for Index.idxType +*/ +#define SQLITE_IDXTYPE_APPDEF 0 /* Created using CREATE INDEX */ +#define SQLITE_IDXTYPE_UNIQUE 1 /* Implements a UNIQUE constraint */ +#define SQLITE_IDXTYPE_PRIMARYKEY 2 /* Is the PRIMARY KEY for the table */ +#define SQLITE_IDXTYPE_IPK 3 /* INTEGER PRIMARY KEY index */ + +/* Return true if index X is a PRIMARY KEY index */ +#define IsPrimaryKeyIndex(X) ((X)->idxType==SQLITE_IDXTYPE_PRIMARYKEY) + +/* Return true if index X is a UNIQUE index */ +#define IsUniqueIndex(X) ((X)->onError!=OE_None) + +/* The Index.aiColumn[] values are normally positive integer. But +** there are some negative values that have special meaning: +*/ +#define XN_ROWID (-1) /* Indexed column is the rowid */ +#define XN_EXPR (-2) /* Indexed column is an expression */ + +/* +** Each sample stored in the sqlite_stat4 table is represented in memory +** using a structure of this type. See documentation at the top of the +** analyze.c source file for additional information. +*/ +struct IndexSample { + void *p; /* Pointer to sampled record */ + int n; /* Size of record in bytes */ + tRowcnt *anEq; /* Est. number of rows where the key equals this sample */ + tRowcnt *anLt; /* Est. number of rows where key is less than this sample */ + tRowcnt *anDLt; /* Est. number of distinct keys less than this sample */ +}; + +/* +** Possible values to use within the flags argument to sqlite3GetToken(). +*/ +#define SQLITE_TOKEN_QUOTED 0x1 /* Token is a quoted identifier. */ +#define SQLITE_TOKEN_KEYWORD 0x2 /* Token is a keyword. */ + +/* +** Each token coming out of the lexer is an instance of +** this structure. Tokens are also used as part of an expression. +** +** The memory that "z" points to is owned by other objects. Take care +** that the owner of the "z" string does not deallocate the string before +** the Token goes out of scope! Very often, the "z" points to some place +** in the middle of the Parse.zSql text. But it might also point to a +** static string. +*/ +struct Token { + const char *z; /* Text of the token. Not NULL-terminated! */ + unsigned int n; /* Number of characters in this token */ +}; + +/* +** An instance of this structure contains information needed to generate +** code for a SELECT that contains aggregate functions. +** +** If Expr.op==TK_AGG_COLUMN or TK_AGG_FUNCTION then Expr.pAggInfo is a +** pointer to this structure. The Expr.iAgg field is the index in +** AggInfo.aCol[] or AggInfo.aFunc[] of information needed to generate +** code for that node. +** +** AggInfo.pGroupBy and AggInfo.aFunc.pExpr point to fields within the +** original Select structure that describes the SELECT statement. These +** fields do not need to be freed when deallocating the AggInfo structure. +*/ +struct AggInfo { + u8 directMode; /* Direct rendering mode means take data directly + ** from source tables rather than from accumulators */ + u8 useSortingIdx; /* In direct mode, reference the sorting index rather + ** than the source table */ + int sortingIdx; /* Cursor number of the sorting index */ + int sortingIdxPTab; /* Cursor number of pseudo-table */ + int nSortingColumn; /* Number of columns in the sorting index */ + int mnReg, mxReg; /* Range of registers allocated for aCol and aFunc */ + ExprList *pGroupBy; /* The group by clause */ + struct AggInfo_col { /* For each column used in source tables */ + Table *pTab; /* Source table */ + Expr *pCExpr; /* The original expression */ + int iTable; /* Cursor number of the source table */ + int iMem; /* Memory location that acts as accumulator */ + i16 iColumn; /* Column number within the source table */ + i16 iSorterColumn; /* Column number in the sorting index */ + } *aCol; + int nColumn; /* Number of used entries in aCol[] */ + int nAccumulator; /* Number of columns that show through to the output. + ** Additional columns are used only as parameters to + ** aggregate functions */ + struct AggInfo_func { /* For each aggregate function */ + Expr *pFExpr; /* Expression encoding the function */ + FuncDef *pFunc; /* The aggregate function implementation */ + int iMem; /* Memory location that acts as accumulator */ + int iDistinct; /* Ephemeral table used to enforce DISTINCT */ + int iDistAddr; /* Address of OP_OpenEphemeral */ + } *aFunc; + int nFunc; /* Number of entries in aFunc[] */ + u32 selId; /* Select to which this AggInfo belongs */ +}; + +/* +** The datatype ynVar is a signed integer, either 16-bit or 32-bit. +** Usually it is 16-bits. But if SQLITE_MAX_VARIABLE_NUMBER is greater +** than 32767 we have to make it 32-bit. 16-bit is preferred because +** it uses less memory in the Expr object, which is a big memory user +** in systems with lots of prepared statements. And few applications +** need more than about 10 or 20 variables. But some extreme users want +** to have prepared statements with over 32766 variables, and for them +** the option is available (at compile-time). +*/ +#if SQLITE_MAX_VARIABLE_NUMBER<32767 +typedef i16 ynVar; +#else +typedef int ynVar; +#endif + +/* +** Each node of an expression in the parse tree is an instance +** of this structure. +** +** Expr.op is the opcode. The integer parser token codes are reused +** as opcodes here. For example, the parser defines TK_GE to be an integer +** code representing the ">=" operator. This same integer code is reused +** to represent the greater-than-or-equal-to operator in the expression +** tree. +** +** If the expression is an SQL literal (TK_INTEGER, TK_FLOAT, TK_BLOB, +** or TK_STRING), then Expr.u.zToken contains the text of the SQL literal. If +** the expression is a variable (TK_VARIABLE), then Expr.u.zToken contains the +** variable name. Finally, if the expression is an SQL function (TK_FUNCTION), +** then Expr.u.zToken contains the name of the function. +** +** Expr.pRight and Expr.pLeft are the left and right subexpressions of a +** binary operator. Either or both may be NULL. +** +** Expr.x.pList is a list of arguments if the expression is an SQL function, +** a CASE expression or an IN expression of the form " IN (, ...)". +** Expr.x.pSelect is used if the expression is a sub-select or an expression of +** the form " IN (SELECT ...)". If the EP_xIsSelect bit is set in the +** Expr.flags mask, then Expr.x.pSelect is valid. Otherwise, Expr.x.pList is +** valid. +** +** An expression of the form ID or ID.ID refers to a column in a table. +** For such expressions, Expr.op is set to TK_COLUMN and Expr.iTable is +** the integer cursor number of a VDBE cursor pointing to that table and +** Expr.iColumn is the column number for the specific column. If the +** expression is used as a result in an aggregate SELECT, then the +** value is also stored in the Expr.iAgg column in the aggregate so that +** it can be accessed after all aggregates are computed. +** +** If the expression is an unbound variable marker (a question mark +** character '?' in the original SQL) then the Expr.iTable holds the index +** number for that variable. +** +** If the expression is a subquery then Expr.iColumn holds an integer +** register number containing the result of the subquery. If the +** subquery gives a constant result, then iTable is -1. If the subquery +** gives a different answer at different times during statement processing +** then iTable is the address of a subroutine that computes the subquery. +** +** If the Expr is of type OP_Column, and the table it is selecting from +** is a disk table or the "old.*" pseudo-table, then pTab points to the +** corresponding table definition. +** +** ALLOCATION NOTES: +** +** Expr objects can use a lot of memory space in database schema. To +** help reduce memory requirements, sometimes an Expr object will be +** truncated. And to reduce the number of memory allocations, sometimes +** two or more Expr objects will be stored in a single memory allocation, +** together with Expr.u.zToken strings. +** +** If the EP_Reduced and EP_TokenOnly flags are set when +** an Expr object is truncated. When EP_Reduced is set, then all +** the child Expr objects in the Expr.pLeft and Expr.pRight subtrees +** are contained within the same memory allocation. Note, however, that +** the subtrees in Expr.x.pList or Expr.x.pSelect are always separately +** allocated, regardless of whether or not EP_Reduced is set. +*/ +struct Expr { + u8 op; /* Operation performed by this node */ + char affExpr; /* affinity, or RAISE type */ + u8 op2; /* TK_REGISTER/TK_TRUTH: original value of Expr.op + ** TK_COLUMN: the value of p5 for OP_Column + ** TK_AGG_FUNCTION: nesting depth + ** TK_FUNCTION: NC_SelfRef flag if needs OP_PureFunc */ +#ifdef SQLITE_DEBUG + u8 vvaFlags; /* Verification flags. */ +#endif + u32 flags; /* Various flags. EP_* See below */ + union { + char *zToken; /* Token value. Zero terminated and dequoted */ + int iValue; /* Non-negative integer value if EP_IntValue */ + } u; + + /* If the EP_TokenOnly flag is set in the Expr.flags mask, then no + ** space is allocated for the fields below this point. An attempt to + ** access them will result in a segfault or malfunction. + *********************************************************************/ + + Expr *pLeft; /* Left subnode */ + Expr *pRight; /* Right subnode */ + union { + ExprList *pList; /* op = IN, EXISTS, SELECT, CASE, FUNCTION, BETWEEN */ + Select *pSelect; /* EP_xIsSelect and op = IN, EXISTS, SELECT */ + } x; + + /* If the EP_Reduced flag is set in the Expr.flags mask, then no + ** space is allocated for the fields below this point. An attempt to + ** access them will result in a segfault or malfunction. + *********************************************************************/ + +#if SQLITE_MAX_EXPR_DEPTH>0 + int nHeight; /* Height of the tree headed by this node */ +#endif + int iTable; /* TK_COLUMN: cursor number of table holding column + ** TK_REGISTER: register number + ** TK_TRIGGER: 1 -> new, 0 -> old + ** EP_Unlikely: 134217728 times likelihood + ** TK_IN: ephemerial table holding RHS + ** TK_SELECT_COLUMN: Number of columns on the LHS + ** TK_SELECT: 1st register of result vector */ + ynVar iColumn; /* TK_COLUMN: column index. -1 for rowid. + ** TK_VARIABLE: variable number (always >= 1). + ** TK_SELECT_COLUMN: column of the result vector */ + i16 iAgg; /* Which entry in pAggInfo->aCol[] or ->aFunc[] */ + union { + int iRightJoinTable; /* If EP_FromJoin, the right table of the join */ + int iOfst; /* else: start of token from start of statement */ + } w; + AggInfo *pAggInfo; /* Used by TK_AGG_COLUMN and TK_AGG_FUNCTION */ + union { + Table *pTab; /* TK_COLUMN: Table containing column. Can be NULL + ** for a column of an index on an expression */ + Window *pWin; /* EP_WinFunc: Window/Filter defn for a function */ + struct { /* TK_IN, TK_SELECT, and TK_EXISTS */ + int iAddr; /* Subroutine entry address */ + int regReturn; /* Register used to hold return address */ + } sub; + } y; +}; + +/* The following are the meanings of bits in the Expr.flags field. +** Value restrictions: +** +** EP_Agg == NC_HasAgg == SF_HasAgg +** EP_Win == NC_HasWin +*/ +#define EP_FromJoin 0x000001 /* Originates in ON/USING clause of outer join */ +#define EP_Distinct 0x000002 /* Aggregate function with DISTINCT keyword */ +#define EP_HasFunc 0x000004 /* Contains one or more functions of any kind */ +#define EP_FixedCol 0x000008 /* TK_Column with a known fixed value */ +#define EP_Agg 0x000010 /* Contains one or more aggregate functions */ +#define EP_VarSelect 0x000020 /* pSelect is correlated, not constant */ +#define EP_DblQuoted 0x000040 /* token.z was originally in "..." */ +#define EP_InfixFunc 0x000080 /* True for an infix function: LIKE, GLOB, etc */ +#define EP_Collate 0x000100 /* Tree contains a TK_COLLATE operator */ +#define EP_Commuted 0x000200 /* Comparison operator has been commuted */ +#define EP_IntValue 0x000400 /* Integer value contained in u.iValue */ +#define EP_xIsSelect 0x000800 /* x.pSelect is valid (otherwise x.pList is) */ +#define EP_Skip 0x001000 /* Operator does not contribute to affinity */ +#define EP_Reduced 0x002000 /* Expr struct EXPR_REDUCEDSIZE bytes only */ +#define EP_TokenOnly 0x004000 /* Expr struct EXPR_TOKENONLYSIZE bytes only */ +#define EP_Win 0x008000 /* Contains window functions */ +#define EP_MemToken 0x010000 /* Need to sqlite3DbFree() Expr.zToken */ +#define EP_IfNullRow 0x020000 /* The TK_IF_NULL_ROW opcode */ +#define EP_Unlikely 0x040000 /* unlikely() or likelihood() function */ +#define EP_ConstFunc 0x080000 /* A SQLITE_FUNC_CONSTANT or _SLOCHNG function */ +#define EP_CanBeNull 0x100000 /* Can be null despite NOT NULL constraint */ +#define EP_Subquery 0x200000 /* Tree contains a TK_SELECT operator */ + /* 0x400000 // Available */ +#define EP_Leaf 0x800000 /* Expr.pLeft, .pRight, .u.pSelect all NULL */ +#define EP_WinFunc 0x1000000 /* TK_FUNCTION with Expr.y.pWin set */ +#define EP_Subrtn 0x2000000 /* Uses Expr.y.sub. TK_IN, _SELECT, or _EXISTS */ +#define EP_Quoted 0x4000000 /* TK_ID was originally quoted */ +#define EP_Static 0x8000000 /* Held in memory not obtained from malloc() */ +#define EP_IsTrue 0x10000000 /* Always has boolean value of TRUE */ +#define EP_IsFalse 0x20000000 /* Always has boolean value of FALSE */ +#define EP_FromDDL 0x40000000 /* Originates from sqlite_schema */ + /* 0x80000000 // Available */ + +/* The EP_Propagate mask is a set of properties that automatically propagate +** upwards into parent nodes. +*/ +#define EP_Propagate (EP_Collate|EP_Subquery|EP_HasFunc) + +/* Macros can be used to test, set, or clear bits in the +** Expr.flags field. +*/ +#define ExprHasProperty(E,P) (((E)->flags&(P))!=0) +#define ExprHasAllProperty(E,P) (((E)->flags&(P))==(P)) +#define ExprSetProperty(E,P) (E)->flags|=(P) +#define ExprClearProperty(E,P) (E)->flags&=~(P) +#define ExprAlwaysTrue(E) (((E)->flags&(EP_FromJoin|EP_IsTrue))==EP_IsTrue) +#define ExprAlwaysFalse(E) (((E)->flags&(EP_FromJoin|EP_IsFalse))==EP_IsFalse) + +/* Macros used to ensure that the correct members of unions are accessed +** in Expr. +*/ +#define ExprUseUToken(E) (((E)->flags&EP_IntValue)==0) +#define ExprUseUValue(E) (((E)->flags&EP_IntValue)!=0) +#define ExprUseXList(E) (((E)->flags&EP_xIsSelect)==0) +#define ExprUseXSelect(E) (((E)->flags&EP_xIsSelect)!=0) +#define ExprUseYTab(E) (((E)->flags&(EP_WinFunc|EP_Subrtn))==0) +#define ExprUseYWin(E) (((E)->flags&EP_WinFunc)!=0) +#define ExprUseYSub(E) (((E)->flags&EP_Subrtn)!=0) + +/* Flags for use with Expr.vvaFlags +*/ +#define EP_NoReduce 0x01 /* Cannot EXPRDUP_REDUCE this Expr */ +#define EP_Immutable 0x02 /* Do not change this Expr node */ + +/* The ExprSetVVAProperty() macro is used for Verification, Validation, +** and Accreditation only. It works like ExprSetProperty() during VVA +** processes but is a no-op for delivery. +*/ +#ifdef SQLITE_DEBUG +# define ExprSetVVAProperty(E,P) (E)->vvaFlags|=(P) +# define ExprHasVVAProperty(E,P) (((E)->vvaFlags&(P))!=0) +# define ExprClearVVAProperties(E) (E)->vvaFlags = 0 +#else +# define ExprSetVVAProperty(E,P) +# define ExprHasVVAProperty(E,P) 0 +# define ExprClearVVAProperties(E) +#endif + +/* +** Macros to determine the number of bytes required by a normal Expr +** struct, an Expr struct with the EP_Reduced flag set in Expr.flags +** and an Expr struct with the EP_TokenOnly flag set. +*/ +#define EXPR_FULLSIZE sizeof(Expr) /* Full size */ +#define EXPR_REDUCEDSIZE offsetof(Expr,iTable) /* Common features */ +#define EXPR_TOKENONLYSIZE offsetof(Expr,pLeft) /* Fewer features */ + +/* +** Flags passed to the sqlite3ExprDup() function. See the header comment +** above sqlite3ExprDup() for details. +*/ +#define EXPRDUP_REDUCE 0x0001 /* Used reduced-size Expr nodes */ + +/* +** True if the expression passed as an argument was a function with +** an OVER() clause (a window function). +*/ +#ifdef SQLITE_OMIT_WINDOWFUNC +# define IsWindowFunc(p) 0 +#else +# define IsWindowFunc(p) ( \ + ExprHasProperty((p), EP_WinFunc) && p->y.pWin->eFrmType!=TK_FILTER \ + ) +#endif + +/* +** A list of expressions. Each expression may optionally have a +** name. An expr/name combination can be used in several ways, such +** as the list of "expr AS ID" fields following a "SELECT" or in the +** list of "ID = expr" items in an UPDATE. A list of expressions can +** also be used as the argument to a function, in which case the a.zName +** field is not used. +** +** In order to try to keep memory usage down, the Expr.a.zEName field +** is used for multiple purposes: +** +** eEName Usage +** ---------- ------------------------- +** ENAME_NAME (1) the AS of result set column +** (2) COLUMN= of an UPDATE +** +** ENAME_TAB DB.TABLE.NAME used to resolve names +** of subqueries +** +** ENAME_SPAN Text of the original result set +** expression. +*/ +struct ExprList { + int nExpr; /* Number of expressions on the list */ + int nAlloc; /* Number of a[] slots allocated */ + struct ExprList_item { /* For each expression in the list */ + Expr *pExpr; /* The parse tree for this expression */ + char *zEName; /* Token associated with this expression */ + u8 sortFlags; /* Mask of KEYINFO_ORDER_* flags */ + unsigned eEName :2; /* Meaning of zEName */ + unsigned done :1; /* A flag to indicate when processing is finished */ + unsigned reusable :1; /* Constant expression is reusable */ + unsigned bSorterRef :1; /* Defer evaluation until after sorting */ + unsigned bNulls: 1; /* True if explicit "NULLS FIRST/LAST" */ + union { + struct { /* Used by any ExprList other than Parse.pConsExpr */ + u16 iOrderByCol; /* For ORDER BY, column number in result set */ + u16 iAlias; /* Index into Parse.aAlias[] for zName */ + } x; + int iConstExprReg; /* Register in which Expr value is cached. Used only + ** by Parse.pConstExpr */ + } u; + } a[1]; /* One slot for each expression in the list */ +}; + +/* +** Allowed values for Expr.a.eEName +*/ +#define ENAME_NAME 0 /* The AS clause of a result set */ +#define ENAME_SPAN 1 /* Complete text of the result set expression */ +#define ENAME_TAB 2 /* "DB.TABLE.NAME" for the result set */ + +/* +** An instance of this structure can hold a simple list of identifiers, +** such as the list "a,b,c" in the following statements: +** +** INSERT INTO t(a,b,c) VALUES ...; +** CREATE INDEX idx ON t(a,b,c); +** CREATE TRIGGER trig BEFORE UPDATE ON t(a,b,c) ...; +** +** The IdList.a.idx field is used when the IdList represents the list of +** column names after a table name in an INSERT statement. In the statement +** +** INSERT INTO t(a,b,c) ... +** +** If "a" is the k-th column of table "t", then IdList.a[0].idx==k. +*/ +struct IdList { + struct IdList_item { + char *zName; /* Name of the identifier */ + int idx; /* Index in some Table.aCol[] of a column named zName */ + } *a; + int nId; /* Number of identifiers on the list */ +}; + +/* +** The SrcItem object represents a single term in the FROM clause of a query. +** The SrcList object is mostly an array of SrcItems. +** +** Union member validity: +** +** u1.zIndexedBy fg.isIndexedBy && !fg.isTabFunc +** u1.pFuncArg fg.isTabFunc && !fg.isIndexedBy +** u2.pIBIndex fg.isIndexedBy && !fg.isCte +** u2.pCteUse fg.isCte && !fg.isIndexedBy +*/ +struct SrcItem { + Schema *pSchema; /* Schema to which this item is fixed */ + char *zDatabase; /* Name of database holding this table */ + char *zName; /* Name of the table */ + char *zAlias; /* The "B" part of a "A AS B" phrase. zName is the "A" */ + Table *pTab; /* An SQL table corresponding to zName */ + Select *pSelect; /* A SELECT statement used in place of a table name */ + int addrFillSub; /* Address of subroutine to manifest a subquery */ + int regReturn; /* Register holding return address of addrFillSub */ + int regResult; /* Registers holding results of a co-routine */ + struct { + u8 jointype; /* Type of join between this table and the previous */ + unsigned notIndexed :1; /* True if there is a NOT INDEXED clause */ + unsigned isIndexedBy :1; /* True if there is an INDEXED BY clause */ + unsigned isTabFunc :1; /* True if table-valued-function syntax */ + unsigned isCorrelated :1; /* True if sub-query is correlated */ + unsigned viaCoroutine :1; /* Implemented as a co-routine */ + unsigned isRecursive :1; /* True for recursive reference in WITH */ + unsigned fromDDL :1; /* Comes from sqlite_schema */ + unsigned isCte :1; /* This is a CTE */ + unsigned notCte :1; /* This item may not match a CTE */ + } fg; + int iCursor; /* The VDBE cursor number used to access this table */ + Expr *pOn; /* The ON clause of a join */ + IdList *pUsing; /* The USING clause of a join */ + Bitmask colUsed; /* Bit N (1<" clause */ + ExprList *pFuncArg; /* Arguments to table-valued-function */ + } u1; + union { + Index *pIBIndex; /* Index structure corresponding to u1.zIndexedBy */ + CteUse *pCteUse; /* CTE Usage info info fg.isCte is true */ + } u2; +}; + +/* +** The following structure describes the FROM clause of a SELECT statement. +** Each table or subquery in the FROM clause is a separate element of +** the SrcList.a[] array. +** +** With the addition of multiple database support, the following structure +** can also be used to describe a particular table such as the table that +** is modified by an INSERT, DELETE, or UPDATE statement. In standard SQL, +** such a table must be a simple name: ID. But in SQLite, the table can +** now be identified by a database name, a dot, then the table name: ID.ID. +** +** The jointype starts out showing the join type between the current table +** and the next table on the list. The parser builds the list this way. +** But sqlite3SrcListShiftJoinType() later shifts the jointypes so that each +** jointype expresses the join between the table and the previous table. +** +** In the colUsed field, the high-order bit (bit 63) is set if the table +** contains more than 63 columns and the 64-th or later column is used. +*/ +struct SrcList { + int nSrc; /* Number of tables or subqueries in the FROM clause */ + u32 nAlloc; /* Number of entries allocated in a[] below */ + SrcItem a[1]; /* One entry for each identifier on the list */ +}; + +/* +** Permitted values of the SrcList.a.jointype field +*/ +#define JT_INNER 0x0001 /* Any kind of inner or cross join */ +#define JT_CROSS 0x0002 /* Explicit use of the CROSS keyword */ +#define JT_NATURAL 0x0004 /* True for a "natural" join */ +#define JT_LEFT 0x0008 /* Left outer join */ +#define JT_RIGHT 0x0010 /* Right outer join */ +#define JT_OUTER 0x0020 /* The "OUTER" keyword is present */ +#define JT_ERROR 0x0040 /* unknown or unsupported join type */ + + +/* +** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin() +** and the WhereInfo.wctrlFlags member. +** +** Value constraints (enforced via assert()): +** WHERE_USE_LIMIT == SF_FixedLimit +*/ +#define WHERE_ORDERBY_NORMAL 0x0000 /* No-op */ +#define WHERE_ORDERBY_MIN 0x0001 /* ORDER BY processing for min() func */ +#define WHERE_ORDERBY_MAX 0x0002 /* ORDER BY processing for max() func */ +#define WHERE_ONEPASS_DESIRED 0x0004 /* Want to do one-pass UPDATE/DELETE */ +#define WHERE_ONEPASS_MULTIROW 0x0008 /* ONEPASS is ok with multiple rows */ +#define WHERE_DUPLICATES_OK 0x0010 /* Ok to return a row more than once */ +#define WHERE_OR_SUBCLAUSE 0x0020 /* Processing a sub-WHERE as part of + ** the OR optimization */ +#define WHERE_GROUPBY 0x0040 /* pOrderBy is really a GROUP BY */ +#define WHERE_DISTINCTBY 0x0080 /* pOrderby is really a DISTINCT clause */ +#define WHERE_WANT_DISTINCT 0x0100 /* All output needs to be distinct */ +#define WHERE_SORTBYGROUP 0x0200 /* Support sqlite3WhereIsSorted() */ +#define WHERE_AGG_DISTINCT 0x0400 /* Query is "SELECT agg(DISTINCT ...)" */ +#define WHERE_ORDERBY_LIMIT 0x0800 /* ORDERBY+LIMIT on the inner loop */ + /* 0x1000 not currently used */ + /* 0x2000 not currently used */ +#define WHERE_USE_LIMIT 0x4000 /* Use the LIMIT in cost estimates */ + /* 0x8000 not currently used */ + +/* Allowed return values from sqlite3WhereIsDistinct() +*/ +#define WHERE_DISTINCT_NOOP 0 /* DISTINCT keyword not used */ +#define WHERE_DISTINCT_UNIQUE 1 /* No duplicates */ +#define WHERE_DISTINCT_ORDERED 2 /* All duplicates are adjacent */ +#define WHERE_DISTINCT_UNORDERED 3 /* Duplicates are scattered */ + +/* +** A NameContext defines a context in which to resolve table and column +** names. The context consists of a list of tables (the pSrcList) field and +** a list of named expression (pEList). The named expression list may +** be NULL. The pSrc corresponds to the FROM clause of a SELECT or +** to the table being operated on by INSERT, UPDATE, or DELETE. The +** pEList corresponds to the result set of a SELECT and is NULL for +** other statements. +** +** NameContexts can be nested. When resolving names, the inner-most +** context is searched first. If no match is found, the next outer +** context is checked. If there is still no match, the next context +** is checked. This process continues until either a match is found +** or all contexts are check. When a match is found, the nRef member of +** the context containing the match is incremented. +** +** Each subquery gets a new NameContext. The pNext field points to the +** NameContext in the parent query. Thus the process of scanning the +** NameContext list corresponds to searching through successively outer +** subqueries looking for a match. +*/ +struct NameContext { + Parse *pParse; /* The parser */ + SrcList *pSrcList; /* One or more tables used to resolve names */ + union { + ExprList *pEList; /* Optional list of result-set columns */ + AggInfo *pAggInfo; /* Information about aggregates at this level */ + Upsert *pUpsert; /* ON CONFLICT clause information from an upsert */ + int iBaseReg; /* For TK_REGISTER when parsing RETURNING */ + } uNC; + NameContext *pNext; /* Next outer name context. NULL for outermost */ + int nRef; /* Number of names resolved by this context */ + int nNcErr; /* Number of errors encountered while resolving names */ + int ncFlags; /* Zero or more NC_* flags defined below */ + Select *pWinSelect; /* SELECT statement for any window functions */ +}; + +/* +** Allowed values for the NameContext, ncFlags field. +** +** Value constraints (all checked via assert()): +** NC_HasAgg == SF_HasAgg == EP_Agg +** NC_MinMaxAgg == SF_MinMaxAgg == SQLITE_FUNC_MINMAX +** NC_OrderAgg == SF_OrderByReqd == SQLITE_FUNC_ANYORDER +** NC_HasWin == EP_Win +** +*/ +#define NC_AllowAgg 0x000001 /* Aggregate functions are allowed here */ +#define NC_PartIdx 0x000002 /* True if resolving a partial index WHERE */ +#define NC_IsCheck 0x000004 /* True if resolving a CHECK constraint */ +#define NC_GenCol 0x000008 /* True for a GENERATED ALWAYS AS clause */ +#define NC_HasAgg 0x000010 /* One or more aggregate functions seen */ +#define NC_IdxExpr 0x000020 /* True if resolving columns of CREATE INDEX */ +#define NC_SelfRef 0x00002e /* Combo: PartIdx, isCheck, GenCol, and IdxExpr */ +#define NC_VarSelect 0x000040 /* A correlated subquery has been seen */ +#define NC_UEList 0x000080 /* True if uNC.pEList is used */ +#define NC_UAggInfo 0x000100 /* True if uNC.pAggInfo is used */ +#define NC_UUpsert 0x000200 /* True if uNC.pUpsert is used */ +#define NC_UBaseReg 0x000400 /* True if uNC.iBaseReg is used */ +#define NC_MinMaxAgg 0x001000 /* min/max aggregates seen. See note above */ +#define NC_Complex 0x002000 /* True if a function or subquery seen */ +#define NC_AllowWin 0x004000 /* Window functions are allowed here */ +#define NC_HasWin 0x008000 /* One or more window functions seen */ +#define NC_IsDDL 0x010000 /* Resolving names in a CREATE statement */ +#define NC_InAggFunc 0x020000 /* True if analyzing arguments to an agg func */ +#define NC_FromDDL 0x040000 /* SQL text comes from sqlite_schema */ +#define NC_NoSelect 0x080000 /* Do not descend into sub-selects */ +#define NC_OrderAgg 0x8000000 /* Has an aggregate other than count/min/max */ + +/* +** An instance of the following object describes a single ON CONFLICT +** clause in an upsert. +** +** The pUpsertTarget field is only set if the ON CONFLICT clause includes +** conflict-target clause. (In "ON CONFLICT(a,b)" the "(a,b)" is the +** conflict-target clause.) The pUpsertTargetWhere is the optional +** WHERE clause used to identify partial unique indexes. +** +** pUpsertSet is the list of column=expr terms of the UPDATE statement. +** The pUpsertSet field is NULL for a ON CONFLICT DO NOTHING. The +** pUpsertWhere is the WHERE clause for the UPDATE and is NULL if the +** WHERE clause is omitted. +*/ +struct Upsert { + ExprList *pUpsertTarget; /* Optional description of conflict target */ + Expr *pUpsertTargetWhere; /* WHERE clause for partial index targets */ + ExprList *pUpsertSet; /* The SET clause from an ON CONFLICT UPDATE */ + Expr *pUpsertWhere; /* WHERE clause for the ON CONFLICT UPDATE */ + Upsert *pNextUpsert; /* Next ON CONFLICT clause in the list */ + u8 isDoUpdate; /* True for DO UPDATE. False for DO NOTHING */ + /* Above this point is the parse tree for the ON CONFLICT clauses. + ** The next group of fields stores intermediate data. */ + void *pToFree; /* Free memory when deleting the Upsert object */ + /* All fields above are owned by the Upsert object and must be freed + ** when the Upsert is destroyed. The fields below are used to transfer + ** information from the INSERT processing down into the UPDATE processing + ** while generating code. The fields below are owned by the INSERT + ** statement and will be freed by INSERT processing. */ + Index *pUpsertIdx; /* UNIQUE constraint specified by pUpsertTarget */ + SrcList *pUpsertSrc; /* Table to be updated */ + int regData; /* First register holding array of VALUES */ + int iDataCur; /* Index of the data cursor */ + int iIdxCur; /* Index of the first index cursor */ +}; + +/* +** An instance of the following structure contains all information +** needed to generate code for a single SELECT statement. +** +** See the header comment on the computeLimitRegisters() routine for a +** detailed description of the meaning of the iLimit and iOffset fields. +** +** addrOpenEphm[] entries contain the address of OP_OpenEphemeral opcodes. +** These addresses must be stored so that we can go back and fill in +** the P4_KEYINFO and P2 parameters later. Neither the KeyInfo nor +** the number of columns in P2 can be computed at the same time +** as the OP_OpenEphm instruction is coded because not +** enough information about the compound query is known at that point. +** The KeyInfo for addrOpenTran[0] and [1] contains collating sequences +** for the result set. The KeyInfo for addrOpenEphm[2] contains collating +** sequences for the ORDER BY clause. +*/ +struct Select { + u8 op; /* One of: TK_UNION TK_ALL TK_INTERSECT TK_EXCEPT */ + LogEst nSelectRow; /* Estimated number of result rows */ + u32 selFlags; /* Various SF_* values */ + int iLimit, iOffset; /* Memory registers holding LIMIT & OFFSET counters */ + u32 selId; /* Unique identifier number for this SELECT */ + int addrOpenEphm[2]; /* OP_OpenEphem opcodes related to this select */ + ExprList *pEList; /* The fields of the result */ + SrcList *pSrc; /* The FROM clause */ + Expr *pWhere; /* The WHERE clause */ + ExprList *pGroupBy; /* The GROUP BY clause */ + Expr *pHaving; /* The HAVING clause */ + ExprList *pOrderBy; /* The ORDER BY clause */ + Select *pPrior; /* Prior select in a compound select statement */ + Select *pNext; /* Next select to the left in a compound */ + Expr *pLimit; /* LIMIT expression. NULL means not used. */ + With *pWith; /* WITH clause attached to this select. Or NULL. */ +#ifndef SQLITE_OMIT_WINDOWFUNC + Window *pWin; /* List of window functions */ + Window *pWinDefn; /* List of named window definitions */ +#endif +}; + +/* +** Allowed values for Select.selFlags. The "SF" prefix stands for +** "Select Flag". +** +** Value constraints (all checked via assert()) +** SF_HasAgg == NC_HasAgg +** SF_MinMaxAgg == NC_MinMaxAgg == SQLITE_FUNC_MINMAX +** SF_OrderByReqd == NC_OrderAgg == SQLITE_FUNC_ANYORDER +** SF_FixedLimit == WHERE_USE_LIMIT +*/ +#define SF_Distinct 0x0000001 /* Output should be DISTINCT */ +#define SF_All 0x0000002 /* Includes the ALL keyword */ +#define SF_Resolved 0x0000004 /* Identifiers have been resolved */ +#define SF_Aggregate 0x0000008 /* Contains agg functions or a GROUP BY */ +#define SF_HasAgg 0x0000010 /* Contains aggregate functions */ +#define SF_UsesEphemeral 0x0000020 /* Uses the OpenEphemeral opcode */ +#define SF_Expanded 0x0000040 /* sqlite3SelectExpand() called on this */ +#define SF_HasTypeInfo 0x0000080 /* FROM subqueries have Table metadata */ +#define SF_Compound 0x0000100 /* Part of a compound query */ +#define SF_Values 0x0000200 /* Synthesized from VALUES clause */ +#define SF_MultiValue 0x0000400 /* Single VALUES term with multiple rows */ +#define SF_NestedFrom 0x0000800 /* Part of a parenthesized FROM clause */ +#define SF_MinMaxAgg 0x0001000 /* Aggregate containing min() or max() */ +#define SF_Recursive 0x0002000 /* The recursive part of a recursive CTE */ +#define SF_FixedLimit 0x0004000 /* nSelectRow set by a constant LIMIT */ +#define SF_MaybeConvert 0x0008000 /* Need convertCompoundSelectToSubquery() */ +#define SF_Converted 0x0010000 /* By convertCompoundSelectToSubquery() */ +#define SF_IncludeHidden 0x0020000 /* Include hidden columns in output */ +#define SF_ComplexResult 0x0040000 /* Result contains subquery or function */ +#define SF_WhereBegin 0x0080000 /* Really a WhereBegin() call. Debug Only */ +#define SF_WinRewrite 0x0100000 /* Window function rewrite accomplished */ +#define SF_View 0x0200000 /* SELECT statement is a view */ +#define SF_NoopOrderBy 0x0400000 /* ORDER BY is ignored for this query */ +#define SF_UFSrcCheck 0x0800000 /* Check pSrc as required by UPDATE...FROM */ +#define SF_PushDown 0x1000000 /* SELECT has be modified by push-down opt */ +#define SF_MultiPart 0x2000000 /* Has multiple incompatible PARTITIONs */ +#define SF_CopyCte 0x4000000 /* SELECT statement is a copy of a CTE */ +#define SF_OrderByReqd 0x8000000 /* The ORDER BY clause may not be omitted */ + +/* +** The results of a SELECT can be distributed in several ways, as defined +** by one of the following macros. The "SRT" prefix means "SELECT Result +** Type". +** +** SRT_Union Store results as a key in a temporary index +** identified by pDest->iSDParm. +** +** SRT_Except Remove results from the temporary index pDest->iSDParm. +** +** SRT_Exists Store a 1 in memory cell pDest->iSDParm if the result +** set is not empty. +** +** SRT_Discard Throw the results away. This is used by SELECT +** statements within triggers whose only purpose is +** the side-effects of functions. +** +** SRT_Output Generate a row of output (using the OP_ResultRow +** opcode) for each row in the result set. +** +** SRT_Mem Only valid if the result is a single column. +** Store the first column of the first result row +** in register pDest->iSDParm then abandon the rest +** of the query. This destination implies "LIMIT 1". +** +** SRT_Set The result must be a single column. Store each +** row of result as the key in table pDest->iSDParm. +** Apply the affinity pDest->affSdst before storing +** results. Used to implement "IN (SELECT ...)". +** +** SRT_EphemTab Create an temporary table pDest->iSDParm and store +** the result there. The cursor is left open after +** returning. This is like SRT_Table except that +** this destination uses OP_OpenEphemeral to create +** the table first. +** +** SRT_Coroutine Generate a co-routine that returns a new row of +** results each time it is invoked. The entry point +** of the co-routine is stored in register pDest->iSDParm +** and the result row is stored in pDest->nDest registers +** starting with pDest->iSdst. +** +** SRT_Table Store results in temporary table pDest->iSDParm. +** SRT_Fifo This is like SRT_EphemTab except that the table +** is assumed to already be open. SRT_Fifo has +** the additional property of being able to ignore +** the ORDER BY clause. +** +** SRT_DistFifo Store results in a temporary table pDest->iSDParm. +** But also use temporary table pDest->iSDParm+1 as +** a record of all prior results and ignore any duplicate +** rows. Name means: "Distinct Fifo". +** +** SRT_Queue Store results in priority queue pDest->iSDParm (really +** an index). Append a sequence number so that all entries +** are distinct. +** +** SRT_DistQueue Store results in priority queue pDest->iSDParm only if +** the same record has never been stored before. The +** index at pDest->iSDParm+1 hold all prior stores. +** +** SRT_Upfrom Store results in the temporary table already opened by +** pDest->iSDParm. If (pDest->iSDParm<0), then the temp +** table is an intkey table - in this case the first +** column returned by the SELECT is used as the integer +** key. If (pDest->iSDParm>0), then the table is an index +** table. (pDest->iSDParm) is the number of key columns in +** each index record in this case. +*/ +#define SRT_Union 1 /* Store result as keys in an index */ +#define SRT_Except 2 /* Remove result from a UNION index */ +#define SRT_Exists 3 /* Store 1 if the result is not empty */ +#define SRT_Discard 4 /* Do not save the results anywhere */ +#define SRT_DistFifo 5 /* Like SRT_Fifo, but unique results only */ +#define SRT_DistQueue 6 /* Like SRT_Queue, but unique results only */ + +/* The DISTINCT clause is ignored for all of the above. Not that +** IgnorableDistinct() implies IgnorableOrderby() */ +#define IgnorableDistinct(X) ((X->eDest)<=SRT_DistQueue) + +#define SRT_Queue 7 /* Store result in an queue */ +#define SRT_Fifo 8 /* Store result as data with an automatic rowid */ + +/* The ORDER BY clause is ignored for all of the above */ +#define IgnorableOrderby(X) ((X->eDest)<=SRT_Fifo) + +#define SRT_Output 9 /* Output each row of result */ +#define SRT_Mem 10 /* Store result in a memory cell */ +#define SRT_Set 11 /* Store results as keys in an index */ +#define SRT_EphemTab 12 /* Create transient tab and store like SRT_Table */ +#define SRT_Coroutine 13 /* Generate a single row of result */ +#define SRT_Table 14 /* Store result as data with an automatic rowid */ +#define SRT_Upfrom 15 /* Store result as data with rowid */ + +/* +** An instance of this object describes where to put of the results of +** a SELECT statement. +*/ +struct SelectDest { + u8 eDest; /* How to dispose of the results. One of SRT_* above. */ + int iSDParm; /* A parameter used by the eDest disposal method */ + int iSDParm2; /* A second parameter for the eDest disposal method */ + int iSdst; /* Base register where results are written */ + int nSdst; /* Number of registers allocated */ + char *zAffSdst; /* Affinity used when eDest==SRT_Set */ + ExprList *pOrderBy; /* Key columns for SRT_Queue and SRT_DistQueue */ +}; + +/* +** During code generation of statements that do inserts into AUTOINCREMENT +** tables, the following information is attached to the Table.u.autoInc.p +** pointer of each autoincrement table to record some side information that +** the code generator needs. We have to keep per-table autoincrement +** information in case inserts are done within triggers. Triggers do not +** normally coordinate their activities, but we do need to coordinate the +** loading and saving of autoincrement information. +*/ +struct AutoincInfo { + AutoincInfo *pNext; /* Next info block in a list of them all */ + Table *pTab; /* Table this info block refers to */ + int iDb; /* Index in sqlite3.aDb[] of database holding pTab */ + int regCtr; /* Memory register holding the rowid counter */ +}; + +/* +** At least one instance of the following structure is created for each +** trigger that may be fired while parsing an INSERT, UPDATE or DELETE +** statement. All such objects are stored in the linked list headed at +** Parse.pTriggerPrg and deleted once statement compilation has been +** completed. +** +** A Vdbe sub-program that implements the body and WHEN clause of trigger +** TriggerPrg.pTrigger, assuming a default ON CONFLICT clause of +** TriggerPrg.orconf, is stored in the TriggerPrg.pProgram variable. +** The Parse.pTriggerPrg list never contains two entries with the same +** values for both pTrigger and orconf. +** +** The TriggerPrg.aColmask[0] variable is set to a mask of old.* columns +** accessed (or set to 0 for triggers fired as a result of INSERT +** statements). Similarly, the TriggerPrg.aColmask[1] variable is set to +** a mask of new.* columns used by the program. +*/ +struct TriggerPrg { + Trigger *pTrigger; /* Trigger this program was coded from */ + TriggerPrg *pNext; /* Next entry in Parse.pTriggerPrg list */ + SubProgram *pProgram; /* Program implementing pTrigger/orconf */ + int orconf; /* Default ON CONFLICT policy */ + u32 aColmask[2]; /* Masks of old.*, new.* columns accessed */ +}; + +/* +** The yDbMask datatype for the bitmask of all attached databases. +*/ +#if SQLITE_MAX_ATTACHED>30 + typedef unsigned char yDbMask[(SQLITE_MAX_ATTACHED+9)/8]; +# define DbMaskTest(M,I) (((M)[(I)/8]&(1<<((I)&7)))!=0) +# define DbMaskZero(M) memset((M),0,sizeof(M)) +# define DbMaskSet(M,I) (M)[(I)/8]|=(1<<((I)&7)) +# define DbMaskAllZero(M) sqlite3DbMaskAllZero(M) +# define DbMaskNonZero(M) (sqlite3DbMaskAllZero(M)==0) +#else + typedef unsigned int yDbMask; +# define DbMaskTest(M,I) (((M)&(((yDbMask)1)<<(I)))!=0) +# define DbMaskZero(M) (M)=0 +# define DbMaskSet(M,I) (M)|=(((yDbMask)1)<<(I)) +# define DbMaskAllZero(M) (M)==0 +# define DbMaskNonZero(M) (M)!=0 +#endif + +/* +** An instance of the ParseCleanup object specifies an operation that +** should be performed after parsing to deallocation resources obtained +** during the parse and which are no longer needed. +*/ +struct ParseCleanup { + ParseCleanup *pNext; /* Next cleanup task */ + void *pPtr; /* Pointer to object to deallocate */ + void (*xCleanup)(sqlite3*,void*); /* Deallocation routine */ +}; + +/* +** An SQL parser context. A copy of this structure is passed through +** the parser and down into all the parser action routine in order to +** carry around information that is global to the entire parse. +** +** The structure is divided into two parts. When the parser and code +** generate call themselves recursively, the first part of the structure +** is constant but the second part is reset at the beginning and end of +** each recursion. +** +** The nTableLock and aTableLock variables are only used if the shared-cache +** feature is enabled (if sqlite3Tsd()->useSharedData is true). They are +** used to store the set of table-locks required by the statement being +** compiled. Function sqlite3TableLock() is used to add entries to the +** list. +*/ +struct Parse { + sqlite3 *db; /* The main database structure */ + char *zErrMsg; /* An error message */ + Vdbe *pVdbe; /* An engine for executing database bytecode */ + int rc; /* Return code from execution */ + u8 colNamesSet; /* TRUE after OP_ColumnName has been issued to pVdbe */ + u8 checkSchema; /* Causes schema cookie check after an error */ + u8 nested; /* Number of nested calls to the parser/code generator */ + u8 nTempReg; /* Number of temporary registers in aTempReg[] */ + u8 isMultiWrite; /* True if statement may modify/insert multiple rows */ + u8 mayAbort; /* True if statement may throw an ABORT exception */ + u8 hasCompound; /* Need to invoke convertCompoundSelectToSubquery() */ + u8 okConstFactor; /* OK to factor out constants */ + u8 disableLookaside; /* Number of times lookaside has been disabled */ + u8 disableVtab; /* Disable all virtual tables for this parse */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) + u8 earlyCleanup; /* OOM inside sqlite3ParserAddCleanup() */ +#endif + int nRangeReg; /* Size of the temporary register block */ + int iRangeReg; /* First register in temporary register block */ + int nErr; /* Number of errors seen */ + int nTab; /* Number of previously allocated VDBE cursors */ + int nMem; /* Number of memory cells used so far */ + int szOpAlloc; /* Bytes of memory space allocated for Vdbe.aOp[] */ + int iSelfTab; /* Table associated with an index on expr, or negative + ** of the base register during check-constraint eval */ + int nLabel; /* The *negative* of the number of labels used */ + int nLabelAlloc; /* Number of slots in aLabel */ + int *aLabel; /* Space to hold the labels */ + ExprList *pConstExpr;/* Constant expressions */ + Token constraintName;/* Name of the constraint currently being parsed */ + yDbMask writeMask; /* Start a write transaction on these databases */ + yDbMask cookieMask; /* Bitmask of schema verified databases */ + int regRowid; /* Register holding rowid of CREATE TABLE entry */ + int regRoot; /* Register holding root page number for new objects */ + int nMaxArg; /* Max args passed to user function by sub-program */ + int nSelect; /* Number of SELECT stmts. Counter for Select.selId */ +#ifndef SQLITE_OMIT_SHARED_CACHE + int nTableLock; /* Number of locks in aTableLock */ + TableLock *aTableLock; /* Required table locks for shared-cache mode */ +#endif + AutoincInfo *pAinc; /* Information about AUTOINCREMENT counters */ + Parse *pToplevel; /* Parse structure for main program (or NULL) */ + Table *pTriggerTab; /* Table triggers are being coded for */ + TriggerPrg *pTriggerPrg; /* Linked list of coded triggers */ + ParseCleanup *pCleanup; /* List of cleanup operations to run after parse */ + union { + int addrCrTab; /* Address of OP_CreateBtree on CREATE TABLE */ + Returning *pReturning; /* The RETURNING clause */ + } u1; + u32 nQueryLoop; /* Est number of iterations of a query (10*log2(N)) */ + u32 oldmask; /* Mask of old.* columns referenced */ + u32 newmask; /* Mask of new.* columns referenced */ + u8 eTriggerOp; /* TK_UPDATE, TK_INSERT or TK_DELETE */ + u8 bReturning; /* Coding a RETURNING trigger */ + u8 eOrconf; /* Default ON CONFLICT policy for trigger steps */ + u8 disableTriggers; /* True to disable triggers */ + + /************************************************************************** + ** Fields above must be initialized to zero. The fields that follow, + ** down to the beginning of the recursive section, do not need to be + ** initialized as they will be set before being used. The boundary is + ** determined by offsetof(Parse,aTempReg). + **************************************************************************/ + + int aTempReg[8]; /* Holding area for temporary registers */ + Parse *pOuterParse; /* Outer Parse object when nested */ + Token sNameToken; /* Token with unqualified schema object name */ + + /************************************************************************ + ** Above is constant between recursions. Below is reset before and after + ** each recursion. The boundary between these two regions is determined + ** using offsetof(Parse,sLastToken) so the sLastToken field must be the + ** first field in the recursive region. + ************************************************************************/ + + Token sLastToken; /* The last token parsed */ + ynVar nVar; /* Number of '?' variables seen in the SQL so far */ + u8 iPkSortOrder; /* ASC or DESC for INTEGER PRIMARY KEY */ + u8 explain; /* True if the EXPLAIN flag is found on the query */ + u8 eParseMode; /* PARSE_MODE_XXX constant */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + int nVtabLock; /* Number of virtual tables to lock */ +#endif + int nHeight; /* Expression tree height of current sub-select */ +#ifndef SQLITE_OMIT_EXPLAIN + int addrExplain; /* Address of current OP_Explain opcode */ +#endif + VList *pVList; /* Mapping between variable names and numbers */ + Vdbe *pReprepare; /* VM being reprepared (sqlite3Reprepare()) */ + const char *zTail; /* All SQL text past the last semicolon parsed */ + Table *pNewTable; /* A table being constructed by CREATE TABLE */ + Index *pNewIndex; /* An index being constructed by CREATE INDEX. + ** Also used to hold redundant UNIQUE constraints + ** during a RENAME COLUMN */ + Trigger *pNewTrigger; /* Trigger under construct by a CREATE TRIGGER */ + const char *zAuthContext; /* The 6th parameter to db->xAuth callbacks */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + Token sArg; /* Complete text of a module argument */ + Table **apVtabLock; /* Pointer to virtual tables needing locking */ +#endif + With *pWith; /* Current WITH clause, or NULL */ +#ifndef SQLITE_OMIT_ALTERTABLE + RenameToken *pRename; /* Tokens subject to renaming by ALTER TABLE */ +#endif +}; + +/* Allowed values for Parse.eParseMode +*/ +#define PARSE_MODE_NORMAL 0 +#define PARSE_MODE_DECLARE_VTAB 1 +#define PARSE_MODE_RENAME 2 +#define PARSE_MODE_UNMAP 3 + +/* +** Sizes and pointers of various parts of the Parse object. +*/ +#define PARSE_HDR(X) (((char*)(X))+offsetof(Parse,zErrMsg)) +#define PARSE_HDR_SZ (offsetof(Parse,aTempReg)-offsetof(Parse,zErrMsg)) /* Recursive part w/o aColCache*/ +#define PARSE_RECURSE_SZ offsetof(Parse,sLastToken) /* Recursive part */ +#define PARSE_TAIL_SZ (sizeof(Parse)-PARSE_RECURSE_SZ) /* Non-recursive part */ +#define PARSE_TAIL(X) (((char*)(X))+PARSE_RECURSE_SZ) /* Pointer to tail */ + +/* +** Return true if currently inside an sqlite3_declare_vtab() call. +*/ +#ifdef SQLITE_OMIT_VIRTUALTABLE + #define IN_DECLARE_VTAB 0 +#else + #define IN_DECLARE_VTAB (pParse->eParseMode==PARSE_MODE_DECLARE_VTAB) +#endif + +#if defined(SQLITE_OMIT_ALTERTABLE) + #define IN_RENAME_OBJECT 0 +#else + #define IN_RENAME_OBJECT (pParse->eParseMode>=PARSE_MODE_RENAME) +#endif + +#if defined(SQLITE_OMIT_VIRTUALTABLE) && defined(SQLITE_OMIT_ALTERTABLE) + #define IN_SPECIAL_PARSE 0 +#else + #define IN_SPECIAL_PARSE (pParse->eParseMode!=PARSE_MODE_NORMAL) +#endif + +/* +** An instance of the following structure can be declared on a stack and used +** to save the Parse.zAuthContext value so that it can be restored later. +*/ +struct AuthContext { + const char *zAuthContext; /* Put saved Parse.zAuthContext here */ + Parse *pParse; /* The Parse structure */ +}; + +/* +** Bitfield flags for P5 value in various opcodes. +** +** Value constraints (enforced via assert()): +** OPFLAG_LENGTHARG == SQLITE_FUNC_LENGTH +** OPFLAG_TYPEOFARG == SQLITE_FUNC_TYPEOF +** OPFLAG_BULKCSR == BTREE_BULKLOAD +** OPFLAG_SEEKEQ == BTREE_SEEK_EQ +** OPFLAG_FORDELETE == BTREE_FORDELETE +** OPFLAG_SAVEPOSITION == BTREE_SAVEPOSITION +** OPFLAG_AUXDELETE == BTREE_AUXDELETE +*/ +#define OPFLAG_NCHANGE 0x01 /* OP_Insert: Set to update db->nChange */ + /* Also used in P2 (not P5) of OP_Delete */ +#define OPFLAG_NOCHNG 0x01 /* OP_VColumn nochange for UPDATE */ +#define OPFLAG_EPHEM 0x01 /* OP_Column: Ephemeral output is ok */ +#define OPFLAG_LASTROWID 0x20 /* Set to update db->lastRowid */ +#define OPFLAG_ISUPDATE 0x04 /* This OP_Insert is an sql UPDATE */ +#define OPFLAG_APPEND 0x08 /* This is likely to be an append */ +#define OPFLAG_USESEEKRESULT 0x10 /* Try to avoid a seek in BtreeInsert() */ +#define OPFLAG_ISNOOP 0x40 /* OP_Delete does pre-update-hook only */ +#define OPFLAG_LENGTHARG 0x40 /* OP_Column only used for length() */ +#define OPFLAG_TYPEOFARG 0x80 /* OP_Column only used for typeof() */ +#define OPFLAG_BULKCSR 0x01 /* OP_Open** used to open bulk cursor */ +#define OPFLAG_SEEKEQ 0x02 /* OP_Open** cursor uses EQ seek only */ +#define OPFLAG_FORDELETE 0x08 /* OP_Open should use BTREE_FORDELETE */ +#define OPFLAG_P2ISREG 0x10 /* P2 to OP_Open** is a register number */ +#define OPFLAG_PERMUTE 0x01 /* OP_Compare: use the permutation */ +#define OPFLAG_SAVEPOSITION 0x02 /* OP_Delete/Insert: save cursor pos */ +#define OPFLAG_AUXDELETE 0x04 /* OP_Delete: index in a DELETE op */ +#define OPFLAG_NOCHNG_MAGIC 0x6d /* OP_MakeRecord: serialtype 10 is ok */ +#define OPFLAG_PREFORMAT 0x80 /* OP_Insert uses preformatted cell */ + +/* + * Each trigger present in the database schema is stored as an instance of + * struct Trigger. + * + * Pointers to instances of struct Trigger are stored in two ways. + * 1. In the "trigHash" hash table (part of the sqlite3* that represents the + * database). This allows Trigger structures to be retrieved by name. + * 2. All triggers associated with a single table form a linked list, using the + * pNext member of struct Trigger. A pointer to the first element of the + * linked list is stored as the "pTrigger" member of the associated + * struct Table. + * + * The "step_list" member points to the first element of a linked list + * containing the SQL statements specified as the trigger program. + */ +struct Trigger { + char *zName; /* The name of the trigger */ + char *table; /* The table or view to which the trigger applies */ + u8 op; /* One of TK_DELETE, TK_UPDATE, TK_INSERT */ + u8 tr_tm; /* One of TRIGGER_BEFORE, TRIGGER_AFTER */ + u8 bReturning; /* This trigger implements a RETURNING clause */ + Expr *pWhen; /* The WHEN clause of the expression (may be NULL) */ + IdList *pColumns; /* If this is an UPDATE OF trigger, + the is stored here */ + Schema *pSchema; /* Schema containing the trigger */ + Schema *pTabSchema; /* Schema containing the table */ + TriggerStep *step_list; /* Link list of trigger program steps */ + Trigger *pNext; /* Next trigger associated with the table */ +}; + +/* +** A trigger is either a BEFORE or an AFTER trigger. The following constants +** determine which. +** +** If there are multiple triggers, you might of some BEFORE and some AFTER. +** In that cases, the constants below can be ORed together. +*/ +#define TRIGGER_BEFORE 1 +#define TRIGGER_AFTER 2 + +/* + * An instance of struct TriggerStep is used to store a single SQL statement + * that is a part of a trigger-program. + * + * Instances of struct TriggerStep are stored in a singly linked list (linked + * using the "pNext" member) referenced by the "step_list" member of the + * associated struct Trigger instance. The first element of the linked list is + * the first step of the trigger-program. + * + * The "op" member indicates whether this is a "DELETE", "INSERT", "UPDATE" or + * "SELECT" statement. The meanings of the other members is determined by the + * value of "op" as follows: + * + * (op == TK_INSERT) + * orconf -> stores the ON CONFLICT algorithm + * pSelect -> If this is an INSERT INTO ... SELECT ... statement, then + * this stores a pointer to the SELECT statement. Otherwise NULL. + * zTarget -> Dequoted name of the table to insert into. + * pExprList -> If this is an INSERT INTO ... VALUES ... statement, then + * this stores values to be inserted. Otherwise NULL. + * pIdList -> If this is an INSERT INTO ... () VALUES ... + * statement, then this stores the column-names to be + * inserted into. + * + * (op == TK_DELETE) + * zTarget -> Dequoted name of the table to delete from. + * pWhere -> The WHERE clause of the DELETE statement if one is specified. + * Otherwise NULL. + * + * (op == TK_UPDATE) + * zTarget -> Dequoted name of the table to update. + * pWhere -> The WHERE clause of the UPDATE statement if one is specified. + * Otherwise NULL. + * pExprList -> A list of the columns to update and the expressions to update + * them to. See sqlite3Update() documentation of "pChanges" + * argument. + * + */ +struct TriggerStep { + u8 op; /* One of TK_DELETE, TK_UPDATE, TK_INSERT, TK_SELECT, + ** or TK_RETURNING */ + u8 orconf; /* OE_Rollback etc. */ + Trigger *pTrig; /* The trigger that this step is a part of */ + Select *pSelect; /* SELECT statement or RHS of INSERT INTO SELECT ... */ + char *zTarget; /* Target table for DELETE, UPDATE, INSERT */ + SrcList *pFrom; /* FROM clause for UPDATE statement (if any) */ + Expr *pWhere; /* The WHERE clause for DELETE or UPDATE steps */ + ExprList *pExprList; /* SET clause for UPDATE, or RETURNING clause */ + IdList *pIdList; /* Column names for INSERT */ + Upsert *pUpsert; /* Upsert clauses on an INSERT */ + char *zSpan; /* Original SQL text of this command */ + TriggerStep *pNext; /* Next in the link-list */ + TriggerStep *pLast; /* Last element in link-list. Valid for 1st elem only */ +}; + +/* +** Information about a RETURNING clause +*/ +struct Returning { + Parse *pParse; /* The parse that includes the RETURNING clause */ + ExprList *pReturnEL; /* List of expressions to return */ + Trigger retTrig; /* The transient trigger that implements RETURNING */ + TriggerStep retTStep; /* The trigger step */ + int iRetCur; /* Transient table holding RETURNING results */ + int nRetCol; /* Number of in pReturnEL after expansion */ + int iRetReg; /* Register array for holding a row of RETURNING */ +}; + +/* +** An objected used to accumulate the text of a string where we +** do not necessarily know how big the string will be in the end. +*/ +struct sqlite3_str { + sqlite3 *db; /* Optional database for lookaside. Can be NULL */ + char *zText; /* The string collected so far */ + u32 nAlloc; /* Amount of space allocated in zText */ + u32 mxAlloc; /* Maximum allowed allocation. 0 for no malloc usage */ + u32 nChar; /* Length of the string so far */ + u8 accError; /* SQLITE_NOMEM or SQLITE_TOOBIG */ + u8 printfFlags; /* SQLITE_PRINTF flags below */ +}; +#define SQLITE_PRINTF_INTERNAL 0x01 /* Internal-use-only converters allowed */ +#define SQLITE_PRINTF_SQLFUNC 0x02 /* SQL function arguments to VXPrintf */ +#define SQLITE_PRINTF_MALLOCED 0x04 /* True if xText is allocated space */ + +#define isMalloced(X) (((X)->printfFlags & SQLITE_PRINTF_MALLOCED)!=0) + + +/* +** A pointer to this structure is used to communicate information +** from sqlite3Init and OP_ParseSchema into the sqlite3InitCallback. +*/ +typedef struct { + sqlite3 *db; /* The database being initialized */ + char **pzErrMsg; /* Error message stored here */ + int iDb; /* 0 for main database. 1 for TEMP, 2.. for ATTACHed */ + int rc; /* Result code stored here */ + u32 mInitFlags; /* Flags controlling error messages */ + u32 nInitRow; /* Number of rows processed */ + Pgno mxPage; /* Maximum page number. 0 for no limit. */ +} InitData; + +/* +** Allowed values for mInitFlags +*/ +#define INITFLAG_AlterMask 0x0003 /* Types of ALTER */ +#define INITFLAG_AlterRename 0x0001 /* Reparse after a RENAME */ +#define INITFLAG_AlterDrop 0x0002 /* Reparse after a DROP COLUMN */ +#define INITFLAG_AlterAdd 0x0003 /* Reparse after an ADD COLUMN */ + +/* Tuning parameters are set using SQLITE_TESTCTRL_TUNE and are controlled +** on debug-builds of the CLI using ".testctrl tune ID VALUE". Tuning +** parameters are for temporary use during development, to help find +** optimial values for parameters in the query planner. The should not +** be used on trunk check-ins. They are a temporary mechanism available +** for transient development builds only. +** +** Tuning parameters are numbered starting with 1. +*/ +#define SQLITE_NTUNE 6 /* Should be zero for all trunk check-ins */ +#ifdef SQLITE_DEBUG +# define Tuning(X) (sqlite3Config.aTune[(X)-1]) +#else +# define Tuning(X) 0 +#endif + +/* +** Structure containing global configuration data for the SQLite library. +** +** This structure also contains some state information. +*/ +struct Sqlite3Config { + int bMemstat; /* True to enable memory status */ + u8 bCoreMutex; /* True to enable core mutexing */ + u8 bFullMutex; /* True to enable full mutexing */ + u8 bOpenUri; /* True to interpret filenames as URIs */ + u8 bUseCis; /* Use covering indices for full-scans */ + u8 bSmallMalloc; /* Avoid large memory allocations if true */ + u8 bExtraSchemaChecks; /* Verify type,name,tbl_name in schema */ + int mxStrlen; /* Maximum string length */ + int neverCorrupt; /* Database is always well-formed */ + int szLookaside; /* Default lookaside buffer size */ + int nLookaside; /* Default lookaside buffer count */ + int nStmtSpill; /* Stmt-journal spill-to-disk threshold */ + sqlite3_mem_methods m; /* Low-level memory allocation interface */ + sqlite3_mutex_methods mutex; /* Low-level mutex interface */ + sqlite3_pcache_methods2 pcache2; /* Low-level page-cache interface */ + void *pHeap; /* Heap storage space */ + int nHeap; /* Size of pHeap[] */ + int mnReq, mxReq; /* Min and max heap requests sizes */ + sqlite3_int64 szMmap; /* mmap() space per open file */ + sqlite3_int64 mxMmap; /* Maximum value for szMmap */ + void *pPage; /* Page cache memory */ + int szPage; /* Size of each page in pPage[] */ + int nPage; /* Number of pages in pPage[] */ + int mxParserStack; /* maximum depth of the parser stack */ + int sharedCacheEnabled; /* true if shared-cache mode enabled */ + u32 szPma; /* Maximum Sorter PMA size */ + /* The above might be initialized to non-zero. The following need to always + ** initially be zero, however. */ + int isInit; /* True after initialization has finished */ + int inProgress; /* True while initialization in progress */ + int isMutexInit; /* True after mutexes are initialized */ + int isMallocInit; /* True after malloc is initialized */ + int isPCacheInit; /* True after malloc is initialized */ + int nRefInitMutex; /* Number of users of pInitMutex */ + sqlite3_mutex *pInitMutex; /* Mutex used by sqlite3_initialize() */ + void (*xLog)(void*,int,const char*); /* Function for logging */ + void *pLogArg; /* First argument to xLog() */ +#ifdef SQLITE_ENABLE_SQLLOG + void(*xSqllog)(void*,sqlite3*,const char*, int); + void *pSqllogArg; +#endif +#ifdef SQLITE_VDBE_COVERAGE + /* The following callback (if not NULL) is invoked on every VDBE branch + ** operation. Set the callback using SQLITE_TESTCTRL_VDBE_COVERAGE. + */ + void (*xVdbeBranch)(void*,unsigned iSrcLine,u8 eThis,u8 eMx); /* Callback */ + void *pVdbeBranchArg; /* 1st argument */ +#endif +#ifndef SQLITE_OMIT_DESERIALIZE + sqlite3_int64 mxMemdbSize; /* Default max memdb size */ +#endif +#ifndef SQLITE_UNTESTABLE + int (*xTestCallback)(int); /* Invoked by sqlite3FaultSim() */ +#endif + int bLocaltimeFault; /* True to fail localtime() calls */ + int (*xAltLocaltime)(const void*,void*); /* Alternative localtime() routine */ + int iOnceResetThreshold; /* When to reset OP_Once counters */ + u32 szSorterRef; /* Min size in bytes to use sorter-refs */ + unsigned int iPrngSeed; /* Alternative fixed seed for the PRNG */ + /* vvvv--- must be last ---vvv */ +#ifdef SQLITE_DEBUG + sqlite3_int64 aTune[SQLITE_NTUNE]; /* Tuning parameters */ +#endif +}; + +/* +** This macro is used inside of assert() statements to indicate that +** the assert is only valid on a well-formed database. Instead of: +** +** assert( X ); +** +** One writes: +** +** assert( X || CORRUPT_DB ); +** +** CORRUPT_DB is true during normal operation. CORRUPT_DB does not indicate +** that the database is definitely corrupt, only that it might be corrupt. +** For most test cases, CORRUPT_DB is set to false using a special +** sqlite3_test_control(). This enables assert() statements to prove +** things that are always true for well-formed databases. +*/ +#define CORRUPT_DB (sqlite3Config.neverCorrupt==0) + +/* +** Context pointer passed down through the tree-walk. +*/ +struct Walker { + Parse *pParse; /* Parser context. */ + int (*xExprCallback)(Walker*, Expr*); /* Callback for expressions */ + int (*xSelectCallback)(Walker*,Select*); /* Callback for SELECTs */ + void (*xSelectCallback2)(Walker*,Select*);/* Second callback for SELECTs */ + int walkerDepth; /* Number of subqueries */ + u16 eCode; /* A small processing code */ + union { /* Extra data for callback */ + NameContext *pNC; /* Naming context */ + int n; /* A counter */ + int iCur; /* A cursor number */ + SrcList *pSrcList; /* FROM clause */ + struct CCurHint *pCCurHint; /* Used by codeCursorHint() */ + struct RefSrcList *pRefSrcList; /* sqlite3ReferencesSrcList() */ + int *aiCol; /* array of column indexes */ + struct IdxCover *pIdxCover; /* Check for index coverage */ + struct IdxExprTrans *pIdxTrans; /* Convert idxed expr to column */ + ExprList *pGroupBy; /* GROUP BY clause */ + Select *pSelect; /* HAVING to WHERE clause ctx */ + struct WindowRewrite *pRewrite; /* Window rewrite context */ + struct WhereConst *pConst; /* WHERE clause constants */ + struct RenameCtx *pRename; /* RENAME COLUMN context */ + struct Table *pTab; /* Table of generated column */ + SrcItem *pSrcItem; /* A single FROM clause item */ + DbFixer *pFix; + } u; +}; + +/* +** The following structure contains information used by the sqliteFix... +** routines as they walk the parse tree to make database references +** explicit. +*/ +struct DbFixer { + Parse *pParse; /* The parsing context. Error messages written here */ + Walker w; /* Walker object */ + Schema *pSchema; /* Fix items to this schema */ + u8 bTemp; /* True for TEMP schema entries */ + const char *zDb; /* Make sure all objects are contained in this database */ + const char *zType; /* Type of the container - used for error messages */ + const Token *pName; /* Name of the container - used for error messages */ +}; + +/* Forward declarations */ +SQLITE_PRIVATE int sqlite3WalkExpr(Walker*, Expr*); +SQLITE_PRIVATE int sqlite3WalkExprList(Walker*, ExprList*); +SQLITE_PRIVATE int sqlite3WalkSelect(Walker*, Select*); +SQLITE_PRIVATE int sqlite3WalkSelectExpr(Walker*, Select*); +SQLITE_PRIVATE int sqlite3WalkSelectFrom(Walker*, Select*); +SQLITE_PRIVATE int sqlite3ExprWalkNoop(Walker*, Expr*); +SQLITE_PRIVATE int sqlite3SelectWalkNoop(Walker*, Select*); +SQLITE_PRIVATE int sqlite3SelectWalkFail(Walker*, Select*); +SQLITE_PRIVATE int sqlite3WalkerDepthIncrease(Walker*,Select*); +SQLITE_PRIVATE void sqlite3WalkerDepthDecrease(Walker*,Select*); +SQLITE_PRIVATE void sqlite3WalkWinDefnDummyCallback(Walker*,Select*); + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE void sqlite3SelectWalkAssert2(Walker*, Select*); +#endif + +#ifndef SQLITE_OMIT_CTE +SQLITE_PRIVATE void sqlite3SelectPopWith(Walker*, Select*); +#else +# define sqlite3SelectPopWith 0 +#endif + +/* +** Return code from the parse-tree walking primitives and their +** callbacks. +*/ +#define WRC_Continue 0 /* Continue down into children */ +#define WRC_Prune 1 /* Omit children but continue walking siblings */ +#define WRC_Abort 2 /* Abandon the tree walk */ + +/* +** A single common table expression +*/ +struct Cte { + char *zName; /* Name of this CTE */ + ExprList *pCols; /* List of explicit column names, or NULL */ + Select *pSelect; /* The definition of this CTE */ + const char *zCteErr; /* Error message for circular references */ + CteUse *pUse; /* Usage information for this CTE */ + u8 eM10d; /* The MATERIALIZED flag */ +}; + +/* +** Allowed values for the materialized flag (eM10d): +*/ +#define M10d_Yes 0 /* AS MATERIALIZED */ +#define M10d_Any 1 /* Not specified. Query planner's choice */ +#define M10d_No 2 /* AS NOT MATERIALIZED */ + +/* +** An instance of the With object represents a WITH clause containing +** one or more CTEs (common table expressions). +*/ +struct With { + int nCte; /* Number of CTEs in the WITH clause */ + int bView; /* Belongs to the outermost Select of a view */ + With *pOuter; /* Containing WITH clause, or NULL */ + Cte a[1]; /* For each CTE in the WITH clause.... */ +}; + +/* +** The Cte object is not guaranteed to persist for the entire duration +** of code generation. (The query flattener or other parser tree +** edits might delete it.) The following object records information +** about each Common Table Expression that must be preserved for the +** duration of the parse. +** +** The CteUse objects are freed using sqlite3ParserAddCleanup() rather +** than sqlite3SelectDelete(), which is what enables them to persist +** until the end of code generation. +*/ +struct CteUse { + int nUse; /* Number of users of this CTE */ + int addrM9e; /* Start of subroutine to compute materialization */ + int regRtn; /* Return address register for addrM9e subroutine */ + int iCur; /* Ephemeral table holding the materialization */ + LogEst nRowEst; /* Estimated number of rows in the table */ + u8 eM10d; /* The MATERIALIZED flag */ +}; + + +#ifdef SQLITE_DEBUG +/* +** An instance of the TreeView object is used for printing the content of +** data structures on sqlite3DebugPrintf() using a tree-like view. +*/ +struct TreeView { + int iLevel; /* Which level of the tree we are on */ + u8 bLine[100]; /* Draw vertical in column i if bLine[i] is true */ +}; +#endif /* SQLITE_DEBUG */ + +/* +** This object is used in various ways, most (but not all) related to window +** functions. +** +** (1) A single instance of this structure is attached to the +** the Expr.y.pWin field for each window function in an expression tree. +** This object holds the information contained in the OVER clause, +** plus additional fields used during code generation. +** +** (2) All window functions in a single SELECT form a linked-list +** attached to Select.pWin. The Window.pFunc and Window.pExpr +** fields point back to the expression that is the window function. +** +** (3) The terms of the WINDOW clause of a SELECT are instances of this +** object on a linked list attached to Select.pWinDefn. +** +** (4) For an aggregate function with a FILTER clause, an instance +** of this object is stored in Expr.y.pWin with eFrmType set to +** TK_FILTER. In this case the only field used is Window.pFilter. +** +** The uses (1) and (2) are really the same Window object that just happens +** to be accessible in two different ways. Use case (3) are separate objects. +*/ +struct Window { + char *zName; /* Name of window (may be NULL) */ + char *zBase; /* Name of base window for chaining (may be NULL) */ + ExprList *pPartition; /* PARTITION BY clause */ + ExprList *pOrderBy; /* ORDER BY clause */ + u8 eFrmType; /* TK_RANGE, TK_GROUPS, TK_ROWS, or 0 */ + u8 eStart; /* UNBOUNDED, CURRENT, PRECEDING or FOLLOWING */ + u8 eEnd; /* UNBOUNDED, CURRENT, PRECEDING or FOLLOWING */ + u8 bImplicitFrame; /* True if frame was implicitly specified */ + u8 eExclude; /* TK_NO, TK_CURRENT, TK_TIES, TK_GROUP, or 0 */ + Expr *pStart; /* Expression for " PRECEDING" */ + Expr *pEnd; /* Expression for " FOLLOWING" */ + Window **ppThis; /* Pointer to this object in Select.pWin list */ + Window *pNextWin; /* Next window function belonging to this SELECT */ + Expr *pFilter; /* The FILTER expression */ + FuncDef *pFunc; /* The function */ + int iEphCsr; /* Partition buffer or Peer buffer */ + int regAccum; /* Accumulator */ + int regResult; /* Interim result */ + int csrApp; /* Function cursor (used by min/max) */ + int regApp; /* Function register (also used by min/max) */ + int regPart; /* Array of registers for PARTITION BY values */ + Expr *pOwner; /* Expression object this window is attached to */ + int nBufferCol; /* Number of columns in buffer table */ + int iArgCol; /* Offset of first argument for this function */ + int regOne; /* Register containing constant value 1 */ + int regStartRowid; + int regEndRowid; + u8 bExprArgs; /* Defer evaluation of window function arguments + ** due to the SQLITE_SUBTYPE flag */ +}; + +#ifndef SQLITE_OMIT_WINDOWFUNC +SQLITE_PRIVATE void sqlite3WindowDelete(sqlite3*, Window*); +SQLITE_PRIVATE void sqlite3WindowUnlinkFromSelect(Window*); +SQLITE_PRIVATE void sqlite3WindowListDelete(sqlite3 *db, Window *p); +SQLITE_PRIVATE Window *sqlite3WindowAlloc(Parse*, int, int, Expr*, int , Expr*, u8); +SQLITE_PRIVATE void sqlite3WindowAttach(Parse*, Expr*, Window*); +SQLITE_PRIVATE void sqlite3WindowLink(Select *pSel, Window *pWin); +SQLITE_PRIVATE int sqlite3WindowCompare(const Parse*, const Window*, const Window*, int); +SQLITE_PRIVATE void sqlite3WindowCodeInit(Parse*, Select*); +SQLITE_PRIVATE void sqlite3WindowCodeStep(Parse*, Select*, WhereInfo*, int, int); +SQLITE_PRIVATE int sqlite3WindowRewrite(Parse*, Select*); +SQLITE_PRIVATE void sqlite3WindowUpdate(Parse*, Window*, Window*, FuncDef*); +SQLITE_PRIVATE Window *sqlite3WindowDup(sqlite3 *db, Expr *pOwner, Window *p); +SQLITE_PRIVATE Window *sqlite3WindowListDup(sqlite3 *db, Window *p); +SQLITE_PRIVATE void sqlite3WindowFunctions(void); +SQLITE_PRIVATE void sqlite3WindowChain(Parse*, Window*, Window*); +SQLITE_PRIVATE Window *sqlite3WindowAssemble(Parse*, Window*, ExprList*, ExprList*, Token*); +#else +# define sqlite3WindowDelete(a,b) +# define sqlite3WindowFunctions() +# define sqlite3WindowAttach(a,b,c) +#endif + +/* +** Assuming zIn points to the first byte of a UTF-8 character, +** advance zIn to point to the first byte of the next UTF-8 character. +*/ +#define SQLITE_SKIP_UTF8(zIn) { \ + if( (*(zIn++))>=0xc0 ){ \ + while( (*zIn & 0xc0)==0x80 ){ zIn++; } \ + } \ +} + +/* +** The SQLITE_*_BKPT macros are substitutes for the error codes with +** the same name but without the _BKPT suffix. These macros invoke +** routines that report the line-number on which the error originated +** using sqlite3_log(). The routines also provide a convenient place +** to set a debugger breakpoint. +*/ +SQLITE_PRIVATE int sqlite3ReportError(int iErr, int lineno, const char *zType); +SQLITE_PRIVATE int sqlite3CorruptError(int); +SQLITE_PRIVATE int sqlite3MisuseError(int); +SQLITE_PRIVATE int sqlite3CantopenError(int); +#define SQLITE_CORRUPT_BKPT sqlite3CorruptError(__LINE__) +#define SQLITE_MISUSE_BKPT sqlite3MisuseError(__LINE__) +#define SQLITE_CANTOPEN_BKPT sqlite3CantopenError(__LINE__) +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3NomemError(int); +SQLITE_PRIVATE int sqlite3IoerrnomemError(int); +# define SQLITE_NOMEM_BKPT sqlite3NomemError(__LINE__) +# define SQLITE_IOERR_NOMEM_BKPT sqlite3IoerrnomemError(__LINE__) +#else +# define SQLITE_NOMEM_BKPT SQLITE_NOMEM +# define SQLITE_IOERR_NOMEM_BKPT SQLITE_IOERR_NOMEM +#endif +#if defined(SQLITE_DEBUG) || defined(SQLITE_ENABLE_CORRUPT_PGNO) +SQLITE_PRIVATE int sqlite3CorruptPgnoError(int,Pgno); +# define SQLITE_CORRUPT_PGNO(P) sqlite3CorruptPgnoError(__LINE__,(P)) +#else +# define SQLITE_CORRUPT_PGNO(P) sqlite3CorruptError(__LINE__) +#endif + +/* +** FTS3 and FTS4 both require virtual table support +*/ +#if defined(SQLITE_OMIT_VIRTUALTABLE) +# undef SQLITE_ENABLE_FTS3 +# undef SQLITE_ENABLE_FTS4 +#endif + +/* +** FTS4 is really an extension for FTS3. It is enabled using the +** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also call +** the SQLITE_ENABLE_FTS4 macro to serve as an alias for SQLITE_ENABLE_FTS3. +*/ +#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) +# define SQLITE_ENABLE_FTS3 1 +#endif + +/* +** The ctype.h header is needed for non-ASCII systems. It is also +** needed by FTS3 when FTS3 is included in the amalgamation. +*/ +#if !defined(SQLITE_ASCII) || \ + (defined(SQLITE_ENABLE_FTS3) && defined(SQLITE_AMALGAMATION)) +# include +#endif + +/* +** The following macros mimic the standard library functions toupper(), +** isspace(), isalnum(), isdigit() and isxdigit(), respectively. The +** sqlite versions only work for ASCII characters, regardless of locale. +*/ +#ifdef SQLITE_ASCII +# define sqlite3Toupper(x) ((x)&~(sqlite3CtypeMap[(unsigned char)(x)]&0x20)) +# define sqlite3Isspace(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x01) +# define sqlite3Isalnum(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x06) +# define sqlite3Isalpha(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x02) +# define sqlite3Isdigit(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x04) +# define sqlite3Isxdigit(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x08) +# define sqlite3Tolower(x) (sqlite3UpperToLower[(unsigned char)(x)]) +# define sqlite3Isquote(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x80) +#else +# define sqlite3Toupper(x) toupper((unsigned char)(x)) +# define sqlite3Isspace(x) isspace((unsigned char)(x)) +# define sqlite3Isalnum(x) isalnum((unsigned char)(x)) +# define sqlite3Isalpha(x) isalpha((unsigned char)(x)) +# define sqlite3Isdigit(x) isdigit((unsigned char)(x)) +# define sqlite3Isxdigit(x) isxdigit((unsigned char)(x)) +# define sqlite3Tolower(x) tolower((unsigned char)(x)) +# define sqlite3Isquote(x) ((x)=='"'||(x)=='\''||(x)=='['||(x)=='`') +#endif +SQLITE_PRIVATE int sqlite3IsIdChar(u8); + +/* +** Internal function prototypes +*/ +SQLITE_PRIVATE int sqlite3StrICmp(const char*,const char*); +SQLITE_PRIVATE int sqlite3Strlen30(const char*); +#define sqlite3Strlen30NN(C) (strlen(C)&0x3fffffff) +SQLITE_PRIVATE char *sqlite3ColumnType(Column*,char*); +#define sqlite3StrNICmp sqlite3_strnicmp + +SQLITE_PRIVATE int sqlite3MallocInit(void); +SQLITE_PRIVATE void sqlite3MallocEnd(void); +SQLITE_PRIVATE void *sqlite3Malloc(u64); +SQLITE_PRIVATE void *sqlite3MallocZero(u64); +SQLITE_PRIVATE void *sqlite3DbMallocZero(sqlite3*, u64); +SQLITE_PRIVATE void *sqlite3DbMallocRaw(sqlite3*, u64); +SQLITE_PRIVATE void *sqlite3DbMallocRawNN(sqlite3*, u64); +SQLITE_PRIVATE char *sqlite3DbStrDup(sqlite3*,const char*); +SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3*,const char*, u64); +SQLITE_PRIVATE char *sqlite3DbSpanDup(sqlite3*,const char*,const char*); +SQLITE_PRIVATE void *sqlite3Realloc(void*, u64); +SQLITE_PRIVATE void *sqlite3DbReallocOrFree(sqlite3 *, void *, u64); +SQLITE_PRIVATE void *sqlite3DbRealloc(sqlite3 *, void *, u64); +SQLITE_PRIVATE void sqlite3DbFree(sqlite3*, void*); +SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3*, void*); +SQLITE_PRIVATE int sqlite3MallocSize(const void*); +SQLITE_PRIVATE int sqlite3DbMallocSize(sqlite3*, const void*); +SQLITE_PRIVATE void *sqlite3PageMalloc(int); +SQLITE_PRIVATE void sqlite3PageFree(void*); +SQLITE_PRIVATE void sqlite3MemSetDefault(void); +#ifndef SQLITE_UNTESTABLE +SQLITE_PRIVATE void sqlite3BenignMallocHooks(void (*)(void), void (*)(void)); +#endif +SQLITE_PRIVATE int sqlite3HeapNearlyFull(void); + +/* +** On systems with ample stack space and that support alloca(), make +** use of alloca() to obtain space for large automatic objects. By default, +** obtain space from malloc(). +** +** The alloca() routine never returns NULL. This will cause code paths +** that deal with sqlite3StackAlloc() failures to be unreachable. +*/ +#ifdef SQLITE_USE_ALLOCA +# define sqlite3StackAllocRaw(D,N) alloca(N) +# define sqlite3StackAllocZero(D,N) memset(alloca(N), 0, N) +# define sqlite3StackFree(D,P) +#else +# define sqlite3StackAllocRaw(D,N) sqlite3DbMallocRaw(D,N) +# define sqlite3StackAllocZero(D,N) sqlite3DbMallocZero(D,N) +# define sqlite3StackFree(D,P) sqlite3DbFree(D,P) +#endif + +/* Do not allow both MEMSYS5 and MEMSYS3 to be defined together. If they +** are, disable MEMSYS3 +*/ +#ifdef SQLITE_ENABLE_MEMSYS5 +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys5(void); +#undef SQLITE_ENABLE_MEMSYS3 +#endif +#ifdef SQLITE_ENABLE_MEMSYS3 +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys3(void); +#endif + + +#ifndef SQLITE_MUTEX_OMIT +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void); +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void); +SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int); +SQLITE_PRIVATE int sqlite3MutexInit(void); +SQLITE_PRIVATE int sqlite3MutexEnd(void); +#endif +#if !defined(SQLITE_MUTEX_OMIT) && !defined(SQLITE_MUTEX_NOOP) +SQLITE_PRIVATE void sqlite3MemoryBarrier(void); +#else +# define sqlite3MemoryBarrier() +#endif + +SQLITE_PRIVATE sqlite3_int64 sqlite3StatusValue(int); +SQLITE_PRIVATE void sqlite3StatusUp(int, int); +SQLITE_PRIVATE void sqlite3StatusDown(int, int); +SQLITE_PRIVATE void sqlite3StatusHighwater(int, int); +SQLITE_PRIVATE int sqlite3LookasideUsed(sqlite3*,int*); + +/* Access to mutexes used by sqlite3_status() */ +SQLITE_PRIVATE sqlite3_mutex *sqlite3Pcache1Mutex(void); +SQLITE_PRIVATE sqlite3_mutex *sqlite3MallocMutex(void); + +#if defined(SQLITE_ENABLE_MULTITHREADED_CHECKS) && !defined(SQLITE_MUTEX_OMIT) +SQLITE_PRIVATE void sqlite3MutexWarnOnContention(sqlite3_mutex*); +#else +# define sqlite3MutexWarnOnContention(x) +#endif + +#ifndef SQLITE_OMIT_FLOATING_POINT +# define EXP754 (((u64)0x7ff)<<52) +# define MAN754 ((((u64)1)<<52)-1) +# define IsNaN(X) (((X)&EXP754)==EXP754 && ((X)&MAN754)!=0) +SQLITE_PRIVATE int sqlite3IsNaN(double); +#else +# define IsNaN(X) 0 +# define sqlite3IsNaN(X) 0 +#endif + +/* +** An instance of the following structure holds information about SQL +** functions arguments that are the parameters to the printf() function. +*/ +struct PrintfArguments { + int nArg; /* Total number of arguments */ + int nUsed; /* Number of arguments used so far */ + sqlite3_value **apArg; /* The argument values */ +}; + +SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3*,const char*, ...); +SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3*,const char*, va_list); +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) +SQLITE_PRIVATE void sqlite3DebugPrintf(const char*, ...); +#endif +#if defined(SQLITE_TEST) +SQLITE_PRIVATE void *sqlite3TestTextToPtr(const char*); +#endif + +#if defined(SQLITE_DEBUG) +SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView*, const Expr*, u8); +SQLITE_PRIVATE void sqlite3TreeViewBareExprList(TreeView*, const ExprList*, const char*); +SQLITE_PRIVATE void sqlite3TreeViewExprList(TreeView*, const ExprList*, u8, const char*); +SQLITE_PRIVATE void sqlite3TreeViewSrcList(TreeView*, const SrcList*); +SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView*, const Select*, u8); +SQLITE_PRIVATE void sqlite3TreeViewWith(TreeView*, const With*, u8); +#ifndef SQLITE_OMIT_WINDOWFUNC +SQLITE_PRIVATE void sqlite3TreeViewWindow(TreeView*, const Window*, u8); +SQLITE_PRIVATE void sqlite3TreeViewWinFunc(TreeView*, const Window*, u8); +#endif +#endif + + +SQLITE_PRIVATE void sqlite3SetString(char **, sqlite3*, const char*); +SQLITE_PRIVATE void sqlite3ErrorMsg(Parse*, const char*, ...); +SQLITE_PRIVATE int sqlite3ErrorToParser(sqlite3*,int); +SQLITE_PRIVATE void sqlite3Dequote(char*); +SQLITE_PRIVATE void sqlite3DequoteExpr(Expr*); +SQLITE_PRIVATE void sqlite3DequoteToken(Token*); +SQLITE_PRIVATE void sqlite3TokenInit(Token*,char*); +SQLITE_PRIVATE int sqlite3KeywordCode(const unsigned char*, int); +SQLITE_PRIVATE int sqlite3RunParser(Parse*, const char*); +SQLITE_PRIVATE void sqlite3FinishCoding(Parse*); +SQLITE_PRIVATE int sqlite3GetTempReg(Parse*); +SQLITE_PRIVATE void sqlite3ReleaseTempReg(Parse*,int); +SQLITE_PRIVATE int sqlite3GetTempRange(Parse*,int); +SQLITE_PRIVATE void sqlite3ReleaseTempRange(Parse*,int,int); +SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse*); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3NoTempsInRange(Parse*,int,int); +#endif +SQLITE_PRIVATE Expr *sqlite3ExprAlloc(sqlite3*,int,const Token*,int); +SQLITE_PRIVATE Expr *sqlite3Expr(sqlite3*,int,const char*); +SQLITE_PRIVATE void sqlite3ExprAttachSubtrees(sqlite3*,Expr*,Expr*,Expr*); +SQLITE_PRIVATE Expr *sqlite3PExpr(Parse*, int, Expr*, Expr*); +SQLITE_PRIVATE void sqlite3PExprAddSelect(Parse*, Expr*, Select*); +SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse*,Expr*, Expr*); +SQLITE_PRIVATE Expr *sqlite3ExprSimplifiedAndOr(Expr*); +SQLITE_PRIVATE Expr *sqlite3ExprFunction(Parse*,ExprList*, const Token*, int); +SQLITE_PRIVATE void sqlite3ExprFunctionUsable(Parse*,const Expr*,const FuncDef*); +SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse*, Expr*, u32); +SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3*, Expr*); +SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse*, Expr*); +SQLITE_PRIVATE void sqlite3ExprUnmapAndDelete(Parse*, Expr*); +SQLITE_PRIVATE ExprList *sqlite3ExprListAppend(Parse*,ExprList*,Expr*); +SQLITE_PRIVATE ExprList *sqlite3ExprListAppendVector(Parse*,ExprList*,IdList*,Expr*); +SQLITE_PRIVATE Select *sqlite3ExprListToValues(Parse*, int, ExprList*); +SQLITE_PRIVATE void sqlite3ExprListSetSortOrder(ExprList*,int,int); +SQLITE_PRIVATE void sqlite3ExprListSetName(Parse*,ExprList*,const Token*,int); +SQLITE_PRIVATE void sqlite3ExprListSetSpan(Parse*,ExprList*,const char*,const char*); +SQLITE_PRIVATE void sqlite3ExprListDelete(sqlite3*, ExprList*); +SQLITE_PRIVATE u32 sqlite3ExprListFlags(const ExprList*); +SQLITE_PRIVATE int sqlite3IndexHasDuplicateRootPage(Index*); +SQLITE_PRIVATE int sqlite3Init(sqlite3*, char**); +SQLITE_PRIVATE int sqlite3InitCallback(void*, int, char**, char**); +SQLITE_PRIVATE int sqlite3InitOne(sqlite3*, int, char**, u32); +SQLITE_PRIVATE void sqlite3Pragma(Parse*,Token*,Token*,Token*,int); +#ifndef SQLITE_OMIT_VIRTUALTABLE +SQLITE_PRIVATE Module *sqlite3PragmaVtabRegister(sqlite3*,const char *zName); +#endif +SQLITE_PRIVATE void sqlite3ResetAllSchemasOfConnection(sqlite3*); +SQLITE_PRIVATE void sqlite3ResetOneSchema(sqlite3*,int); +SQLITE_PRIVATE void sqlite3CollapseDatabaseArray(sqlite3*); +SQLITE_PRIVATE void sqlite3CommitInternalChanges(sqlite3*); +SQLITE_PRIVATE void sqlite3ColumnSetExpr(Parse*,Table*,Column*,Expr*); +SQLITE_PRIVATE Expr *sqlite3ColumnExpr(Table*,Column*); +SQLITE_PRIVATE void sqlite3ColumnSetColl(sqlite3*,Column*,const char*zColl); +SQLITE_PRIVATE const char *sqlite3ColumnColl(Column*); +SQLITE_PRIVATE void sqlite3DeleteColumnNames(sqlite3*,Table*); +SQLITE_PRIVATE void sqlite3GenerateColumnNames(Parse *pParse, Select *pSelect); +SQLITE_PRIVATE int sqlite3ColumnsFromExprList(Parse*,ExprList*,i16*,Column**); +SQLITE_PRIVATE void sqlite3SelectAddColumnTypeAndCollation(Parse*,Table*,Select*,char); +SQLITE_PRIVATE Table *sqlite3ResultSetOfSelect(Parse*,Select*,char); +SQLITE_PRIVATE void sqlite3OpenSchemaTable(Parse *, int); +SQLITE_PRIVATE Index *sqlite3PrimaryKeyIndex(Table*); +SQLITE_PRIVATE i16 sqlite3TableColumnToIndex(Index*, i16); +#ifdef SQLITE_OMIT_GENERATED_COLUMNS +# define sqlite3TableColumnToStorage(T,X) (X) /* No-op pass-through */ +# define sqlite3StorageColumnToTable(T,X) (X) /* No-op pass-through */ +#else +SQLITE_PRIVATE i16 sqlite3TableColumnToStorage(Table*, i16); +SQLITE_PRIVATE i16 sqlite3StorageColumnToTable(Table*, i16); +#endif +SQLITE_PRIVATE void sqlite3StartTable(Parse*,Token*,Token*,int,int,int,int); +#if SQLITE_ENABLE_HIDDEN_COLUMNS +SQLITE_PRIVATE void sqlite3ColumnPropertiesFromName(Table*, Column*); +#else +# define sqlite3ColumnPropertiesFromName(T,C) /* no-op */ +#endif +SQLITE_PRIVATE void sqlite3AddColumn(Parse*,Token,Token); +SQLITE_PRIVATE void sqlite3AddNotNull(Parse*, int); +SQLITE_PRIVATE void sqlite3AddPrimaryKey(Parse*, ExprList*, int, int, int); +SQLITE_PRIVATE void sqlite3AddCheckConstraint(Parse*, Expr*, const char*, const char*); +SQLITE_PRIVATE void sqlite3AddDefaultValue(Parse*,Expr*,const char*,const char*); +SQLITE_PRIVATE void sqlite3AddCollateType(Parse*, Token*); +SQLITE_PRIVATE void sqlite3AddGenerated(Parse*,Expr*,Token*); +SQLITE_PRIVATE void sqlite3EndTable(Parse*,Token*,Token*,u32,Select*); +SQLITE_PRIVATE void sqlite3AddReturning(Parse*,ExprList*); +SQLITE_PRIVATE int sqlite3ParseUri(const char*,const char*,unsigned int*, + sqlite3_vfs**,char**,char **); +#define sqlite3CodecQueryParameters(A,B,C) 0 +SQLITE_PRIVATE Btree *sqlite3DbNameToBtree(sqlite3*,const char*); + +#ifdef SQLITE_UNTESTABLE +# define sqlite3FaultSim(X) SQLITE_OK +#else +SQLITE_PRIVATE int sqlite3FaultSim(int); +#endif + +SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32); +SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec*, u32); +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec*, u32); +SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec*, u32); +SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec*, u32, void*); +SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec*); +SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec*); +#ifndef SQLITE_UNTESTABLE +SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int,int*); +#endif + +SQLITE_PRIVATE RowSet *sqlite3RowSetInit(sqlite3*); +SQLITE_PRIVATE void sqlite3RowSetDelete(void*); +SQLITE_PRIVATE void sqlite3RowSetClear(void*); +SQLITE_PRIVATE void sqlite3RowSetInsert(RowSet*, i64); +SQLITE_PRIVATE int sqlite3RowSetTest(RowSet*, int iBatch, i64); +SQLITE_PRIVATE int sqlite3RowSetNext(RowSet*, i64*); + +SQLITE_PRIVATE void sqlite3CreateView(Parse*,Token*,Token*,Token*,ExprList*,Select*,int,int); + +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) +SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse*,Table*); +#else +# define sqlite3ViewGetColumnNames(A,B) 0 +#endif + +#if SQLITE_MAX_ATTACHED>30 +SQLITE_PRIVATE int sqlite3DbMaskAllZero(yDbMask); +#endif +SQLITE_PRIVATE void sqlite3DropTable(Parse*, SrcList*, int, int); +SQLITE_PRIVATE void sqlite3CodeDropTable(Parse*, Table*, int, int); +SQLITE_PRIVATE void sqlite3DeleteTable(sqlite3*, Table*); +SQLITE_PRIVATE void sqlite3FreeIndex(sqlite3*, Index*); +#ifndef SQLITE_OMIT_AUTOINCREMENT +SQLITE_PRIVATE void sqlite3AutoincrementBegin(Parse *pParse); +SQLITE_PRIVATE void sqlite3AutoincrementEnd(Parse *pParse); +#else +# define sqlite3AutoincrementBegin(X) +# define sqlite3AutoincrementEnd(X) +#endif +SQLITE_PRIVATE void sqlite3Insert(Parse*, SrcList*, Select*, IdList*, int, Upsert*); +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +SQLITE_PRIVATE void sqlite3ComputeGeneratedColumns(Parse*, int, Table*); +#endif +SQLITE_PRIVATE void *sqlite3ArrayAllocate(sqlite3*,void*,int,int*,int*); +SQLITE_PRIVATE IdList *sqlite3IdListAppend(Parse*, IdList*, Token*); +SQLITE_PRIVATE int sqlite3IdListIndex(IdList*,const char*); +SQLITE_PRIVATE SrcList *sqlite3SrcListEnlarge(Parse*, SrcList*, int, int); +SQLITE_PRIVATE SrcList *sqlite3SrcListAppendList(Parse *pParse, SrcList *p1, SrcList *p2); +SQLITE_PRIVATE SrcList *sqlite3SrcListAppend(Parse*, SrcList*, Token*, Token*); +SQLITE_PRIVATE SrcList *sqlite3SrcListAppendFromTerm(Parse*, SrcList*, Token*, Token*, + Token*, Select*, Expr*, IdList*); +SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *, SrcList *, Token *); +SQLITE_PRIVATE void sqlite3SrcListFuncArgs(Parse*, SrcList*, ExprList*); +SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *, SrcItem *); +SQLITE_PRIVATE void sqlite3SrcListShiftJoinType(SrcList*); +SQLITE_PRIVATE void sqlite3SrcListAssignCursors(Parse*, SrcList*); +SQLITE_PRIVATE void sqlite3IdListDelete(sqlite3*, IdList*); +SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3*, SrcList*); +SQLITE_PRIVATE Index *sqlite3AllocateIndexObject(sqlite3*,i16,int,char**); +SQLITE_PRIVATE void sqlite3CreateIndex(Parse*,Token*,Token*,SrcList*,ExprList*,int,Token*, + Expr*, int, int, u8); +SQLITE_PRIVATE void sqlite3DropIndex(Parse*, SrcList*, int); +SQLITE_PRIVATE int sqlite3Select(Parse*, Select*, SelectDest*); +SQLITE_PRIVATE Select *sqlite3SelectNew(Parse*,ExprList*,SrcList*,Expr*,ExprList*, + Expr*,ExprList*,u32,Expr*); +SQLITE_PRIVATE void sqlite3SelectDelete(sqlite3*, Select*); +SQLITE_PRIVATE Table *sqlite3SrcListLookup(Parse*, SrcList*); +SQLITE_PRIVATE int sqlite3IsReadOnly(Parse*, Table*, int); +SQLITE_PRIVATE void sqlite3OpenTable(Parse*, int iCur, int iDb, Table*, int); +#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY) +SQLITE_PRIVATE Expr *sqlite3LimitWhere(Parse*,SrcList*,Expr*,ExprList*,Expr*,char*); +#endif +SQLITE_PRIVATE void sqlite3CodeChangeCount(Vdbe*,int,const char*); +SQLITE_PRIVATE void sqlite3DeleteFrom(Parse*, SrcList*, Expr*, ExprList*, Expr*); +SQLITE_PRIVATE void sqlite3Update(Parse*, SrcList*, ExprList*,Expr*,int,ExprList*,Expr*, + Upsert*); +SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(Parse*,SrcList*,Expr*,ExprList*, + ExprList*,Select*,u16,int); +SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo*); +SQLITE_PRIVATE LogEst sqlite3WhereOutputRowCount(WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereOrderByLimitOptLabel(WhereInfo*); +SQLITE_PRIVATE void sqlite3WhereMinMaxOptEarlyOut(Vdbe*,WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereIsSorted(WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo*); +SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo*, int*); +#define ONEPASS_OFF 0 /* Use of ONEPASS not allowed */ +#define ONEPASS_SINGLE 1 /* ONEPASS valid for a single row update */ +#define ONEPASS_MULTI 2 /* ONEPASS is valid for multiple rows */ +SQLITE_PRIVATE int sqlite3WhereUsesDeferredSeek(WhereInfo*); +SQLITE_PRIVATE void sqlite3ExprCodeLoadIndexColumn(Parse*, Index*, int, int, int); +SQLITE_PRIVATE int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int, u8); +SQLITE_PRIVATE void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int); +SQLITE_PRIVATE void sqlite3ExprCodeMove(Parse*, int, int, int); +SQLITE_PRIVATE void sqlite3ExprCode(Parse*, Expr*, int); +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +SQLITE_PRIVATE void sqlite3ExprCodeGeneratedColumn(Parse*, Table*, Column*, int); +#endif +SQLITE_PRIVATE void sqlite3ExprCodeCopy(Parse*, Expr*, int); +SQLITE_PRIVATE void sqlite3ExprCodeFactorable(Parse*, Expr*, int); +SQLITE_PRIVATE int sqlite3ExprCodeRunJustOnce(Parse*, Expr*, int); +SQLITE_PRIVATE int sqlite3ExprCodeTemp(Parse*, Expr*, int*); +SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse*, Expr*, int); +SQLITE_PRIVATE int sqlite3ExprCodeExprList(Parse*, ExprList*, int, int, u8); +#define SQLITE_ECEL_DUP 0x01 /* Deep, not shallow copies */ +#define SQLITE_ECEL_FACTOR 0x02 /* Factor out constant terms */ +#define SQLITE_ECEL_REF 0x04 /* Use ExprList.u.x.iOrderByCol */ +#define SQLITE_ECEL_OMITREF 0x08 /* Omit if ExprList.u.x.iOrderByCol */ +SQLITE_PRIVATE void sqlite3ExprIfTrue(Parse*, Expr*, int, int); +SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse*, Expr*, int, int); +SQLITE_PRIVATE void sqlite3ExprIfFalseDup(Parse*, Expr*, int, int); +SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3*,const char*, const char*); +#define LOCATE_VIEW 0x01 +#define LOCATE_NOERR 0x02 +SQLITE_PRIVATE Table *sqlite3LocateTable(Parse*,u32 flags,const char*, const char*); +SQLITE_PRIVATE const char *sqlite3PreferredTableName(const char*); +SQLITE_PRIVATE Table *sqlite3LocateTableItem(Parse*,u32 flags,SrcItem *); +SQLITE_PRIVATE Index *sqlite3FindIndex(sqlite3*,const char*, const char*); +SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTable(sqlite3*,int,const char*); +SQLITE_PRIVATE void sqlite3UnlinkAndDeleteIndex(sqlite3*,int,const char*); +SQLITE_PRIVATE void sqlite3Vacuum(Parse*,Token*,Expr*); +SQLITE_PRIVATE int sqlite3RunVacuum(char**, sqlite3*, int, sqlite3_value*); +SQLITE_PRIVATE char *sqlite3NameFromToken(sqlite3*, const Token*); +SQLITE_PRIVATE int sqlite3ExprCompare(const Parse*,const Expr*,const Expr*, int); +SQLITE_PRIVATE int sqlite3ExprCompareSkip(Expr*,Expr*,int); +SQLITE_PRIVATE int sqlite3ExprListCompare(const ExprList*,const ExprList*, int); +SQLITE_PRIVATE int sqlite3ExprImpliesExpr(const Parse*,const Expr*,const Expr*, int); +SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr*,int); +SQLITE_PRIVATE void sqlite3AggInfoPersistWalkerInit(Walker*,Parse*); +SQLITE_PRIVATE void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*); +SQLITE_PRIVATE void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*); +SQLITE_PRIVATE int sqlite3ExprCoveredByIndex(Expr*, int iCur, Index *pIdx); +SQLITE_PRIVATE int sqlite3ReferencesSrcList(Parse*, Expr*, SrcList*); +SQLITE_PRIVATE Vdbe *sqlite3GetVdbe(Parse*); +#ifndef SQLITE_UNTESTABLE +SQLITE_PRIVATE void sqlite3PrngSaveState(void); +SQLITE_PRIVATE void sqlite3PrngRestoreState(void); +#endif +SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3*,int); +SQLITE_PRIVATE void sqlite3CodeVerifySchema(Parse*, int); +SQLITE_PRIVATE void sqlite3CodeVerifyNamedSchema(Parse*, const char *zDb); +SQLITE_PRIVATE void sqlite3BeginTransaction(Parse*, int); +SQLITE_PRIVATE void sqlite3EndTransaction(Parse*,int); +SQLITE_PRIVATE void sqlite3Savepoint(Parse*, int, Token*); +SQLITE_PRIVATE void sqlite3CloseSavepoints(sqlite3 *); +SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3*); +SQLITE_PRIVATE u32 sqlite3IsTrueOrFalse(const char*); +SQLITE_PRIVATE int sqlite3ExprIdToTrueFalse(Expr*); +SQLITE_PRIVATE int sqlite3ExprTruthValue(const Expr*); +SQLITE_PRIVATE int sqlite3ExprIsConstant(Expr*); +SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr*); +SQLITE_PRIVATE int sqlite3ExprIsConstantOrFunction(Expr*, u8); +SQLITE_PRIVATE int sqlite3ExprIsConstantOrGroupBy(Parse*, Expr*, ExprList*); +SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr*,int); +SQLITE_PRIVATE int sqlite3ExprIsTableConstraint(Expr*,const SrcItem*); +#ifdef SQLITE_ENABLE_CURSOR_HINTS +SQLITE_PRIVATE int sqlite3ExprContainsSubquery(Expr*); +#endif +SQLITE_PRIVATE int sqlite3ExprIsInteger(const Expr*, int*); +SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr*); +SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr*, char); +SQLITE_PRIVATE int sqlite3IsRowid(const char*); +SQLITE_PRIVATE void sqlite3GenerateRowDelete( + Parse*,Table*,Trigger*,int,int,int,i16,u8,u8,u8,int); +SQLITE_PRIVATE void sqlite3GenerateRowIndexDelete(Parse*, Table*, int, int, int*, int); +SQLITE_PRIVATE int sqlite3GenerateIndexKey(Parse*, Index*, int, int, int, int*,Index*,int); +SQLITE_PRIVATE void sqlite3ResolvePartIdxLabel(Parse*,int); +SQLITE_PRIVATE int sqlite3ExprReferencesUpdatedColumn(Expr*,int*,int); +SQLITE_PRIVATE void sqlite3GenerateConstraintChecks(Parse*,Table*,int*,int,int,int,int, + u8,u8,int,int*,int*,Upsert*); +#ifdef SQLITE_ENABLE_NULL_TRIM +SQLITE_PRIVATE void sqlite3SetMakeRecordP5(Vdbe*,Table*); +#else +# define sqlite3SetMakeRecordP5(A,B) +#endif +SQLITE_PRIVATE void sqlite3CompleteInsertion(Parse*,Table*,int,int,int,int*,int,int,int); +SQLITE_PRIVATE int sqlite3OpenTableAndIndices(Parse*, Table*, int, u8, int, u8*, int*, int*); +SQLITE_PRIVATE void sqlite3BeginWriteOperation(Parse*, int, int); +SQLITE_PRIVATE void sqlite3MultiWrite(Parse*); +SQLITE_PRIVATE void sqlite3MayAbort(Parse*); +SQLITE_PRIVATE void sqlite3HaltConstraint(Parse*, int, int, char*, i8, u8); +SQLITE_PRIVATE void sqlite3UniqueConstraint(Parse*, int, Index*); +SQLITE_PRIVATE void sqlite3RowidConstraint(Parse*, int, Table*); +SQLITE_PRIVATE Expr *sqlite3ExprDup(sqlite3*,const Expr*,int); +SQLITE_PRIVATE ExprList *sqlite3ExprListDup(sqlite3*,const ExprList*,int); +SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3*,const SrcList*,int); +SQLITE_PRIVATE IdList *sqlite3IdListDup(sqlite3*,const IdList*); +SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3*,const Select*,int); +SQLITE_PRIVATE FuncDef *sqlite3FunctionSearch(int,const char*); +SQLITE_PRIVATE void sqlite3InsertBuiltinFuncs(FuncDef*,int); +SQLITE_PRIVATE FuncDef *sqlite3FindFunction(sqlite3*,const char*,int,u8,u8); +SQLITE_PRIVATE void sqlite3QuoteValue(StrAccum*,sqlite3_value*); +SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(void); +SQLITE_PRIVATE void sqlite3RegisterDateTimeFunctions(void); +SQLITE_PRIVATE void sqlite3RegisterJsonFunctions(void); +SQLITE_PRIVATE void sqlite3RegisterPerConnectionBuiltinFunctions(sqlite3*); +#if !defined(SQLITE_OMIT_VIRTUALTABLE) && !defined(SQLITE_OMIT_JSON) +SQLITE_PRIVATE int sqlite3JsonTableFunctions(sqlite3*); +#endif +SQLITE_PRIVATE int sqlite3SafetyCheckOk(sqlite3*); +SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(sqlite3*); +SQLITE_PRIVATE void sqlite3ChangeCookie(Parse*, int); +SQLITE_PRIVATE With *sqlite3WithDup(sqlite3 *db, With *p); + +#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER) +SQLITE_PRIVATE void sqlite3MaterializeView(Parse*, Table*, Expr*, ExprList*,Expr*,int); +#endif + +#ifndef SQLITE_OMIT_TRIGGER +SQLITE_PRIVATE void sqlite3BeginTrigger(Parse*, Token*,Token*,int,int,IdList*,SrcList*, + Expr*,int, int); +SQLITE_PRIVATE void sqlite3FinishTrigger(Parse*, TriggerStep*, Token*); +SQLITE_PRIVATE void sqlite3DropTrigger(Parse*, SrcList*, int); +SQLITE_PRIVATE void sqlite3DropTriggerPtr(Parse*, Trigger*); +SQLITE_PRIVATE Trigger *sqlite3TriggersExist(Parse *, Table*, int, ExprList*, int *pMask); +SQLITE_PRIVATE Trigger *sqlite3TriggerList(Parse *, Table *); +SQLITE_PRIVATE void sqlite3CodeRowTrigger(Parse*, Trigger *, int, ExprList*, int, Table *, + int, int, int); +SQLITE_PRIVATE void sqlite3CodeRowTriggerDirect(Parse *, Trigger *, Table *, int, int, int); + void sqliteViewTriggers(Parse*, Table*, Expr*, int, ExprList*); +SQLITE_PRIVATE void sqlite3DeleteTriggerStep(sqlite3*, TriggerStep*); +SQLITE_PRIVATE TriggerStep *sqlite3TriggerSelectStep(sqlite3*,Select*, + const char*,const char*); +SQLITE_PRIVATE TriggerStep *sqlite3TriggerInsertStep(Parse*,Token*, IdList*, + Select*,u8,Upsert*, + const char*,const char*); +SQLITE_PRIVATE TriggerStep *sqlite3TriggerUpdateStep(Parse*,Token*,SrcList*,ExprList*, + Expr*, u8, const char*,const char*); +SQLITE_PRIVATE TriggerStep *sqlite3TriggerDeleteStep(Parse*,Token*, Expr*, + const char*,const char*); +SQLITE_PRIVATE void sqlite3DeleteTrigger(sqlite3*, Trigger*); +SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTrigger(sqlite3*,int,const char*); +SQLITE_PRIVATE u32 sqlite3TriggerColmask(Parse*,Trigger*,ExprList*,int,int,Table*,int); +SQLITE_PRIVATE SrcList *sqlite3TriggerStepSrc(Parse*, TriggerStep*); +# define sqlite3ParseToplevel(p) ((p)->pToplevel ? (p)->pToplevel : (p)) +# define sqlite3IsToplevel(p) ((p)->pToplevel==0) +#else +# define sqlite3TriggersExist(B,C,D,E,F) 0 +# define sqlite3DeleteTrigger(A,B) +# define sqlite3DropTriggerPtr(A,B) +# define sqlite3UnlinkAndDeleteTrigger(A,B,C) +# define sqlite3CodeRowTrigger(A,B,C,D,E,F,G,H,I) +# define sqlite3CodeRowTriggerDirect(A,B,C,D,E,F) +# define sqlite3TriggerList(X, Y) 0 +# define sqlite3ParseToplevel(p) p +# define sqlite3IsToplevel(p) 1 +# define sqlite3TriggerColmask(A,B,C,D,E,F,G) 0 +# define sqlite3TriggerStepSrc(A,B) 0 +#endif + +SQLITE_PRIVATE int sqlite3JoinType(Parse*, Token*, Token*, Token*); +SQLITE_PRIVATE int sqlite3ColumnIndex(Table *pTab, const char *zCol); +SQLITE_PRIVATE void sqlite3SetJoinExpr(Expr*,int); +SQLITE_PRIVATE void sqlite3CreateForeignKey(Parse*, ExprList*, Token*, ExprList*, int); +SQLITE_PRIVATE void sqlite3DeferForeignKey(Parse*, int); +#ifndef SQLITE_OMIT_AUTHORIZATION +SQLITE_PRIVATE void sqlite3AuthRead(Parse*,Expr*,Schema*,SrcList*); +SQLITE_PRIVATE int sqlite3AuthCheck(Parse*,int, const char*, const char*, const char*); +SQLITE_PRIVATE void sqlite3AuthContextPush(Parse*, AuthContext*, const char*); +SQLITE_PRIVATE void sqlite3AuthContextPop(AuthContext*); +SQLITE_PRIVATE int sqlite3AuthReadCol(Parse*, const char *, const char *, int); +#else +# define sqlite3AuthRead(a,b,c,d) +# define sqlite3AuthCheck(a,b,c,d,e) SQLITE_OK +# define sqlite3AuthContextPush(a,b,c) +# define sqlite3AuthContextPop(a) ((void)(a)) +#endif +SQLITE_PRIVATE int sqlite3DbIsNamed(sqlite3 *db, int iDb, const char *zName); +SQLITE_PRIVATE void sqlite3Attach(Parse*, Expr*, Expr*, Expr*); +SQLITE_PRIVATE void sqlite3Detach(Parse*, Expr*); +SQLITE_PRIVATE void sqlite3FixInit(DbFixer*, Parse*, int, const char*, const Token*); +SQLITE_PRIVATE int sqlite3FixSrcList(DbFixer*, SrcList*); +SQLITE_PRIVATE int sqlite3FixSelect(DbFixer*, Select*); +SQLITE_PRIVATE int sqlite3FixExpr(DbFixer*, Expr*); +SQLITE_PRIVATE int sqlite3FixTriggerStep(DbFixer*, TriggerStep*); +SQLITE_PRIVATE int sqlite3RealSameAsInt(double,sqlite3_int64); +SQLITE_PRIVATE void sqlite3Int64ToText(i64,char*); +SQLITE_PRIVATE int sqlite3AtoF(const char *z, double*, int, u8); +SQLITE_PRIVATE int sqlite3GetInt32(const char *, int*); +SQLITE_PRIVATE int sqlite3GetUInt32(const char*, u32*); +SQLITE_PRIVATE int sqlite3Atoi(const char*); +#ifndef SQLITE_OMIT_UTF16 +SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *pData, int nChar); +#endif +SQLITE_PRIVATE int sqlite3Utf8CharLen(const char *pData, int nByte); +SQLITE_PRIVATE u32 sqlite3Utf8Read(const u8**); +SQLITE_PRIVATE LogEst sqlite3LogEst(u64); +SQLITE_PRIVATE LogEst sqlite3LogEstAdd(LogEst,LogEst); +SQLITE_PRIVATE LogEst sqlite3LogEstFromDouble(double); +SQLITE_PRIVATE u64 sqlite3LogEstToInt(LogEst); +SQLITE_PRIVATE VList *sqlite3VListAdd(sqlite3*,VList*,const char*,int,int); +SQLITE_PRIVATE const char *sqlite3VListNumToName(VList*,int); +SQLITE_PRIVATE int sqlite3VListNameToNum(VList*,const char*,int); + +/* +** Routines to read and write variable-length integers. These used to +** be defined locally, but now we use the varint routines in the util.c +** file. +*/ +SQLITE_PRIVATE int sqlite3PutVarint(unsigned char*, u64); +SQLITE_PRIVATE u8 sqlite3GetVarint(const unsigned char *, u64 *); +SQLITE_PRIVATE u8 sqlite3GetVarint32(const unsigned char *, u32 *); +SQLITE_PRIVATE int sqlite3VarintLen(u64 v); + +/* +** The common case is for a varint to be a single byte. They following +** macros handle the common case without a procedure call, but then call +** the procedure for larger varints. +*/ +#define getVarint32(A,B) \ + (u8)((*(A)<(u8)0x80)?((B)=(u32)*(A)),1:sqlite3GetVarint32((A),(u32 *)&(B))) +#define getVarint32NR(A,B) \ + B=(u32)*(A);if(B>=0x80)sqlite3GetVarint32((A),(u32*)&(B)) +#define putVarint32(A,B) \ + (u8)(((u32)(B)<(u32)0x80)?(*(A)=(unsigned char)(B)),1:\ + sqlite3PutVarint((A),(B))) +#define getVarint sqlite3GetVarint +#define putVarint sqlite3PutVarint + + +SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3*, Index*); +SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe*, Table*, int); +SQLITE_PRIVATE char sqlite3CompareAffinity(const Expr *pExpr, char aff2); +SQLITE_PRIVATE int sqlite3IndexAffinityOk(const Expr *pExpr, char idx_affinity); +SQLITE_PRIVATE char sqlite3TableColumnAffinity(const Table*,int); +SQLITE_PRIVATE char sqlite3ExprAffinity(const Expr *pExpr); +SQLITE_PRIVATE int sqlite3Atoi64(const char*, i64*, int, u8); +SQLITE_PRIVATE int sqlite3DecOrHexToI64(const char*, i64*); +SQLITE_PRIVATE void sqlite3ErrorWithMsg(sqlite3*, int, const char*,...); +SQLITE_PRIVATE void sqlite3Error(sqlite3*,int); +SQLITE_PRIVATE void sqlite3ErrorClear(sqlite3*); +SQLITE_PRIVATE void sqlite3SystemError(sqlite3*,int); +SQLITE_PRIVATE void *sqlite3HexToBlob(sqlite3*, const char *z, int n); +SQLITE_PRIVATE u8 sqlite3HexToInt(int h); +SQLITE_PRIVATE int sqlite3TwoPartName(Parse *, Token *, Token *, Token **); + +#if defined(SQLITE_NEED_ERR_NAME) +SQLITE_PRIVATE const char *sqlite3ErrName(int); +#endif + +#ifndef SQLITE_OMIT_DESERIALIZE +SQLITE_PRIVATE int sqlite3MemdbInit(void); +#endif + +SQLITE_PRIVATE const char *sqlite3ErrStr(int); +SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse); +SQLITE_PRIVATE CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int); +SQLITE_PRIVATE int sqlite3IsBinary(const CollSeq*); +SQLITE_PRIVATE CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName); +SQLITE_PRIVATE void sqlite3SetTextEncoding(sqlite3 *db, u8); +SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, const Expr *pExpr); +SQLITE_PRIVATE CollSeq *sqlite3ExprNNCollSeq(Parse *pParse, const Expr *pExpr); +SQLITE_PRIVATE int sqlite3ExprCollSeqMatch(Parse*,const Expr*,const Expr*); +SQLITE_PRIVATE Expr *sqlite3ExprAddCollateToken(const Parse *pParse, Expr*, const Token*, int); +SQLITE_PRIVATE Expr *sqlite3ExprAddCollateString(const Parse*,Expr*,const char*); +SQLITE_PRIVATE Expr *sqlite3ExprSkipCollate(Expr*); +SQLITE_PRIVATE Expr *sqlite3ExprSkipCollateAndLikely(Expr*); +SQLITE_PRIVATE int sqlite3CheckCollSeq(Parse *, CollSeq *); +SQLITE_PRIVATE int sqlite3WritableSchema(sqlite3*); +SQLITE_PRIVATE int sqlite3CheckObjectName(Parse*, const char*,const char*,const char*); +SQLITE_PRIVATE void sqlite3VdbeSetChanges(sqlite3 *, i64); +SQLITE_PRIVATE int sqlite3AddInt64(i64*,i64); +SQLITE_PRIVATE int sqlite3SubInt64(i64*,i64); +SQLITE_PRIVATE int sqlite3MulInt64(i64*,i64); +SQLITE_PRIVATE int sqlite3AbsInt32(int); +#ifdef SQLITE_ENABLE_8_3_NAMES +SQLITE_PRIVATE void sqlite3FileSuffix3(const char*, char*); +#else +# define sqlite3FileSuffix3(X,Y) +#endif +SQLITE_PRIVATE u8 sqlite3GetBoolean(const char *z,u8); + +SQLITE_PRIVATE const void *sqlite3ValueText(sqlite3_value*, u8); +SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value*, u8); +SQLITE_PRIVATE void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, + void(*)(void*)); +SQLITE_PRIVATE void sqlite3ValueSetNull(sqlite3_value*); +SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value*); +#ifndef SQLITE_UNTESTABLE +SQLITE_PRIVATE void sqlite3ResultIntReal(sqlite3_context*); +#endif +SQLITE_PRIVATE sqlite3_value *sqlite3ValueNew(sqlite3 *); +#ifndef SQLITE_OMIT_UTF16 +SQLITE_PRIVATE char *sqlite3Utf16to8(sqlite3 *, const void*, int, u8); +#endif +SQLITE_PRIVATE int sqlite3ValueFromExpr(sqlite3 *, const Expr *, u8, u8, sqlite3_value **); +SQLITE_PRIVATE void sqlite3ValueApplyAffinity(sqlite3_value *, u8, u8); +#ifndef SQLITE_AMALGAMATION +SQLITE_PRIVATE const unsigned char sqlite3OpcodeProperty[]; +SQLITE_PRIVATE const char sqlite3StrBINARY[]; +SQLITE_PRIVATE const unsigned char sqlite3StdTypeLen[]; +SQLITE_PRIVATE const char sqlite3StdTypeAffinity[]; +SQLITE_PRIVATE const char sqlite3StdTypeMap[]; +SQLITE_PRIVATE const char *sqlite3StdType[]; +SQLITE_PRIVATE const unsigned char sqlite3UpperToLower[]; +SQLITE_PRIVATE const unsigned char *sqlite3aLTb; +SQLITE_PRIVATE const unsigned char *sqlite3aEQb; +SQLITE_PRIVATE const unsigned char *sqlite3aGTb; +SQLITE_PRIVATE const unsigned char sqlite3CtypeMap[]; +SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config; +SQLITE_PRIVATE FuncDefHash sqlite3BuiltinFunctions; +#ifndef SQLITE_OMIT_WSD +SQLITE_PRIVATE int sqlite3PendingByte; +#endif +#endif /* SQLITE_AMALGAMATION */ +#ifdef VDBE_PROFILE +SQLITE_PRIVATE sqlite3_uint64 sqlite3NProfileCnt; +#endif +SQLITE_PRIVATE void sqlite3RootPageMoved(sqlite3*, int, Pgno, Pgno); +SQLITE_PRIVATE void sqlite3Reindex(Parse*, Token*, Token*); +SQLITE_PRIVATE void sqlite3AlterFunctions(void); +SQLITE_PRIVATE void sqlite3AlterRenameTable(Parse*, SrcList*, Token*); +SQLITE_PRIVATE void sqlite3AlterRenameColumn(Parse*, SrcList*, Token*, Token*); +SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *, int *); +SQLITE_PRIVATE void sqlite3NestedParse(Parse*, const char*, ...); +SQLITE_PRIVATE void sqlite3ExpirePreparedStatements(sqlite3*, int); +SQLITE_PRIVATE void sqlite3CodeRhsOfIN(Parse*, Expr*, int); +SQLITE_PRIVATE int sqlite3CodeSubselect(Parse*, Expr*); +SQLITE_PRIVATE void sqlite3SelectPrep(Parse*, Select*, NameContext*); +SQLITE_PRIVATE int sqlite3ExpandSubquery(Parse*, SrcItem*); +SQLITE_PRIVATE void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p); +SQLITE_PRIVATE int sqlite3MatchEName( + const struct ExprList_item*, + const char*, + const char*, + const char* +); +SQLITE_PRIVATE Bitmask sqlite3ExprColUsed(Expr*); +SQLITE_PRIVATE u8 sqlite3StrIHash(const char*); +SQLITE_PRIVATE int sqlite3ResolveExprNames(NameContext*, Expr*); +SQLITE_PRIVATE int sqlite3ResolveExprListNames(NameContext*, ExprList*); +SQLITE_PRIVATE void sqlite3ResolveSelectNames(Parse*, Select*, NameContext*); +SQLITE_PRIVATE int sqlite3ResolveSelfReference(Parse*,Table*,int,Expr*,ExprList*); +SQLITE_PRIVATE int sqlite3ResolveOrderGroupBy(Parse*, Select*, ExprList*, const char*); +SQLITE_PRIVATE void sqlite3ColumnDefault(Vdbe *, Table *, int, int); +SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *, Token *); +SQLITE_PRIVATE void sqlite3AlterBeginAddColumn(Parse *, SrcList *); +SQLITE_PRIVATE void sqlite3AlterDropColumn(Parse*, SrcList*, const Token*); +SQLITE_PRIVATE const void *sqlite3RenameTokenMap(Parse*, const void*, const Token*); +SQLITE_PRIVATE void sqlite3RenameTokenRemap(Parse*, const void *pTo, const void *pFrom); +SQLITE_PRIVATE void sqlite3RenameExprUnmap(Parse*, Expr*); +SQLITE_PRIVATE void sqlite3RenameExprlistUnmap(Parse*, ExprList*); +SQLITE_PRIVATE CollSeq *sqlite3GetCollSeq(Parse*, u8, CollSeq *, const char*); +SQLITE_PRIVATE char sqlite3AffinityType(const char*, Column*); +SQLITE_PRIVATE void sqlite3Analyze(Parse*, Token*, Token*); +SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler*); +SQLITE_PRIVATE int sqlite3FindDb(sqlite3*, Token*); +SQLITE_PRIVATE int sqlite3FindDbName(sqlite3 *, const char *); +SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3*,int iDB); +SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3*,Index*); +SQLITE_PRIVATE void sqlite3DefaultRowEst(Index*); +SQLITE_PRIVATE void sqlite3RegisterLikeFunctions(sqlite3*, int); +SQLITE_PRIVATE int sqlite3IsLikeFunction(sqlite3*,Expr*,int*,char*); +SQLITE_PRIVATE void sqlite3SchemaClear(void *); +SQLITE_PRIVATE Schema *sqlite3SchemaGet(sqlite3 *, Btree *); +SQLITE_PRIVATE int sqlite3SchemaToIndex(sqlite3 *db, Schema *); +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3*,int,int); +SQLITE_PRIVATE void sqlite3KeyInfoUnref(KeyInfo*); +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoRef(KeyInfo*); +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoOfIndex(Parse*, Index*); +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoFromExprList(Parse*, ExprList*, int, int); +SQLITE_PRIVATE const char *sqlite3SelectOpName(int); +SQLITE_PRIVATE int sqlite3HasExplicitNulls(Parse*, ExprList*); + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3KeyInfoIsWriteable(KeyInfo*); +#endif +SQLITE_PRIVATE int sqlite3CreateFunc(sqlite3 *, const char *, int, int, void *, + void (*)(sqlite3_context*,int,sqlite3_value **), + void (*)(sqlite3_context*,int,sqlite3_value **), + void (*)(sqlite3_context*), + void (*)(sqlite3_context*), + void (*)(sqlite3_context*,int,sqlite3_value **), + FuncDestructor *pDestructor +); +SQLITE_PRIVATE void sqlite3NoopDestructor(void*); +SQLITE_PRIVATE void *sqlite3OomFault(sqlite3*); +SQLITE_PRIVATE void sqlite3OomClear(sqlite3*); +SQLITE_PRIVATE int sqlite3ApiExit(sqlite3 *db, int); +SQLITE_PRIVATE int sqlite3OpenTempDatabase(Parse *); + +SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum*, sqlite3*, char*, int, int); +SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum*, int); +SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum*); +SQLITE_PRIVATE void sqlite3StrAccumSetError(StrAccum*, u8); +SQLITE_PRIVATE void sqlite3ResultStrAccum(sqlite3_context*,StrAccum*); +SQLITE_PRIVATE void sqlite3SelectDestInit(SelectDest*,int,int); +SQLITE_PRIVATE Expr *sqlite3CreateColumnExpr(sqlite3 *, SrcList *, int, int); +SQLITE_PRIVATE void sqlite3RecordErrorByteOffset(sqlite3*,const char*); +SQLITE_PRIVATE void sqlite3RecordErrorOffsetOfExpr(sqlite3*,const Expr*); + +SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *); +SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *); + +#ifndef SQLITE_OMIT_SUBQUERY +SQLITE_PRIVATE int sqlite3ExprCheckIN(Parse*, Expr*); +#else +# define sqlite3ExprCheckIN(x,y) SQLITE_OK +#endif + +#ifdef SQLITE_ENABLE_STAT4 +SQLITE_PRIVATE int sqlite3Stat4ProbeSetValue( + Parse*,Index*,UnpackedRecord**,Expr*,int,int,int*); +SQLITE_PRIVATE int sqlite3Stat4ValueFromExpr(Parse*, Expr*, u8, sqlite3_value**); +SQLITE_PRIVATE void sqlite3Stat4ProbeFree(UnpackedRecord*); +SQLITE_PRIVATE int sqlite3Stat4Column(sqlite3*, const void*, int, int, sqlite3_value**); +SQLITE_PRIVATE char sqlite3IndexColumnAffinity(sqlite3*, Index*, int); +#endif + +/* +** The interface to the LEMON-generated parser +*/ +#ifndef SQLITE_AMALGAMATION +SQLITE_PRIVATE void *sqlite3ParserAlloc(void*(*)(u64), Parse*); +SQLITE_PRIVATE void sqlite3ParserFree(void*, void(*)(void*)); +#endif +SQLITE_PRIVATE void sqlite3Parser(void*, int, Token); +SQLITE_PRIVATE int sqlite3ParserFallback(int); +#ifdef YYTRACKMAXSTACKDEPTH +SQLITE_PRIVATE int sqlite3ParserStackPeak(void*); +#endif + +SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3*); +#ifndef SQLITE_OMIT_LOAD_EXTENSION +SQLITE_PRIVATE void sqlite3CloseExtensions(sqlite3*); +#else +# define sqlite3CloseExtensions(X) +#endif + +#ifndef SQLITE_OMIT_SHARED_CACHE +SQLITE_PRIVATE void sqlite3TableLock(Parse *, int, Pgno, u8, const char *); +#else + #define sqlite3TableLock(v,w,x,y,z) +#endif + +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3Utf8To8(unsigned char*); +#endif + +#ifdef SQLITE_OMIT_VIRTUALTABLE +# define sqlite3VtabClear(D,T) +# define sqlite3VtabSync(X,Y) SQLITE_OK +# define sqlite3VtabRollback(X) +# define sqlite3VtabCommit(X) +# define sqlite3VtabInSync(db) 0 +# define sqlite3VtabLock(X) +# define sqlite3VtabUnlock(X) +# define sqlite3VtabModuleUnref(D,X) +# define sqlite3VtabUnlockList(X) +# define sqlite3VtabSavepoint(X, Y, Z) SQLITE_OK +# define sqlite3GetVTable(X,Y) ((VTable*)0) +#else +SQLITE_PRIVATE void sqlite3VtabClear(sqlite3 *db, Table*); +SQLITE_PRIVATE void sqlite3VtabDisconnect(sqlite3 *db, Table *p); +SQLITE_PRIVATE int sqlite3VtabSync(sqlite3 *db, Vdbe*); +SQLITE_PRIVATE int sqlite3VtabRollback(sqlite3 *db); +SQLITE_PRIVATE int sqlite3VtabCommit(sqlite3 *db); +SQLITE_PRIVATE void sqlite3VtabLock(VTable *); +SQLITE_PRIVATE void sqlite3VtabUnlock(VTable *); +SQLITE_PRIVATE void sqlite3VtabModuleUnref(sqlite3*,Module*); +SQLITE_PRIVATE void sqlite3VtabUnlockList(sqlite3*); +SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *, int, int); +SQLITE_PRIVATE void sqlite3VtabImportErrmsg(Vdbe*, sqlite3_vtab*); +SQLITE_PRIVATE VTable *sqlite3GetVTable(sqlite3*, Table*); +SQLITE_PRIVATE Module *sqlite3VtabCreateModule( + sqlite3*, + const char*, + const sqlite3_module*, + void*, + void(*)(void*) + ); +# define sqlite3VtabInSync(db) ((db)->nVTrans>0 && (db)->aVTrans==0) +#endif +SQLITE_PRIVATE int sqlite3ReadOnlyShadowTables(sqlite3 *db); +#ifndef SQLITE_OMIT_VIRTUALTABLE +SQLITE_PRIVATE int sqlite3ShadowTableName(sqlite3 *db, const char *zName); +SQLITE_PRIVATE int sqlite3IsShadowTableOf(sqlite3*,Table*,const char*); +SQLITE_PRIVATE void sqlite3MarkAllShadowTablesOf(sqlite3*, Table*); +#else +# define sqlite3ShadowTableName(A,B) 0 +# define sqlite3IsShadowTableOf(A,B,C) 0 +# define sqlite3MarkAllShadowTablesOf(A,B) +#endif +SQLITE_PRIVATE int sqlite3VtabEponymousTableInit(Parse*,Module*); +SQLITE_PRIVATE void sqlite3VtabEponymousTableClear(sqlite3*,Module*); +SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse*,Table*); +SQLITE_PRIVATE void sqlite3VtabBeginParse(Parse*, Token*, Token*, Token*, int); +SQLITE_PRIVATE void sqlite3VtabFinishParse(Parse*, Token*); +SQLITE_PRIVATE void sqlite3VtabArgInit(Parse*); +SQLITE_PRIVATE void sqlite3VtabArgExtend(Parse*, Token*); +SQLITE_PRIVATE int sqlite3VtabCallCreate(sqlite3*, int, const char *, char **); +SQLITE_PRIVATE int sqlite3VtabCallConnect(Parse*, Table*); +SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3*, int, const char *); +SQLITE_PRIVATE int sqlite3VtabBegin(sqlite3 *, VTable *); + +SQLITE_PRIVATE FuncDef *sqlite3VtabOverloadFunction(sqlite3 *,FuncDef*, int nArg, Expr*); +#if (defined(SQLITE_ENABLE_DBPAGE_VTAB) || defined(SQLITE_TEST)) \ + && !defined(SQLITE_OMIT_VIRTUALTABLE) +SQLITE_PRIVATE void sqlite3VtabWriteAll(sqlite3_index_info*); +#endif +SQLITE_PRIVATE sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context*); +SQLITE_PRIVATE int sqlite3VdbeParameterIndex(Vdbe*, const char*, int); +SQLITE_PRIVATE int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *); +SQLITE_PRIVATE void sqlite3ParseObjectInit(Parse*,sqlite3*); +SQLITE_PRIVATE void sqlite3ParseObjectReset(Parse*); +SQLITE_PRIVATE void *sqlite3ParserAddCleanup(Parse*,void(*)(sqlite3*,void*),void*); +#ifdef SQLITE_ENABLE_NORMALIZE +SQLITE_PRIVATE char *sqlite3Normalize(Vdbe*, const char*); +#endif +SQLITE_PRIVATE int sqlite3Reprepare(Vdbe*); +SQLITE_PRIVATE void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*); +SQLITE_PRIVATE CollSeq *sqlite3ExprCompareCollSeq(Parse*,const Expr*); +SQLITE_PRIVATE CollSeq *sqlite3BinaryCompareCollSeq(Parse *, const Expr*, const Expr*); +SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3*); +SQLITE_PRIVATE const char *sqlite3JournalModename(int); +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3Checkpoint(sqlite3*, int, int, int*, int*); +SQLITE_PRIVATE int sqlite3WalDefaultHook(void*,sqlite3*,const char*,int); +#endif +#ifndef SQLITE_OMIT_CTE +SQLITE_PRIVATE Cte *sqlite3CteNew(Parse*,Token*,ExprList*,Select*,u8); +SQLITE_PRIVATE void sqlite3CteDelete(sqlite3*,Cte*); +SQLITE_PRIVATE With *sqlite3WithAdd(Parse*,With*,Cte*); +SQLITE_PRIVATE void sqlite3WithDelete(sqlite3*,With*); +SQLITE_PRIVATE With *sqlite3WithPush(Parse*, With*, u8); +#else +# define sqlite3CteNew(P,T,E,S) ((void*)0) +# define sqlite3CteDelete(D,C) +# define sqlite3CteWithAdd(P,W,C) ((void*)0) +# define sqlite3WithDelete(x,y) +# define sqlite3WithPush(x,y,z) ((void*)0) +#endif +#ifndef SQLITE_OMIT_UPSERT +SQLITE_PRIVATE Upsert *sqlite3UpsertNew(sqlite3*,ExprList*,Expr*,ExprList*,Expr*,Upsert*); +SQLITE_PRIVATE void sqlite3UpsertDelete(sqlite3*,Upsert*); +SQLITE_PRIVATE Upsert *sqlite3UpsertDup(sqlite3*,Upsert*); +SQLITE_PRIVATE int sqlite3UpsertAnalyzeTarget(Parse*,SrcList*,Upsert*); +SQLITE_PRIVATE void sqlite3UpsertDoUpdate(Parse*,Upsert*,Table*,Index*,int); +SQLITE_PRIVATE Upsert *sqlite3UpsertOfIndex(Upsert*,Index*); +SQLITE_PRIVATE int sqlite3UpsertNextIsIPK(Upsert*); +#else +#define sqlite3UpsertNew(u,v,w,x,y,z) ((Upsert*)0) +#define sqlite3UpsertDelete(x,y) +#define sqlite3UpsertDup(x,y) ((Upsert*)0) +#define sqlite3UpsertOfIndex(x,y) ((Upsert*)0) +#define sqlite3UpsertNextIsIPK(x) 0 +#endif + + +/* Declarations for functions in fkey.c. All of these are replaced by +** no-op macros if OMIT_FOREIGN_KEY is defined. In this case no foreign +** key functionality is available. If OMIT_TRIGGER is defined but +** OMIT_FOREIGN_KEY is not, only some of the functions are no-oped. In +** this case foreign keys are parsed, but no other functionality is +** provided (enforcement of FK constraints requires the triggers sub-system). +*/ +#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER) +SQLITE_PRIVATE void sqlite3FkCheck(Parse*, Table*, int, int, int*, int); +SQLITE_PRIVATE void sqlite3FkDropTable(Parse*, SrcList *, Table*); +SQLITE_PRIVATE void sqlite3FkActions(Parse*, Table*, ExprList*, int, int*, int); +SQLITE_PRIVATE int sqlite3FkRequired(Parse*, Table*, int*, int); +SQLITE_PRIVATE u32 sqlite3FkOldmask(Parse*, Table*); +SQLITE_PRIVATE FKey *sqlite3FkReferences(Table *); +SQLITE_PRIVATE void sqlite3FkClearTriggerCache(sqlite3*,int); +#else + #define sqlite3FkActions(a,b,c,d,e,f) + #define sqlite3FkCheck(a,b,c,d,e,f) + #define sqlite3FkDropTable(a,b,c) + #define sqlite3FkOldmask(a,b) 0 + #define sqlite3FkRequired(a,b,c,d) 0 + #define sqlite3FkReferences(a) 0 + #define sqlite3FkClearTriggerCache(a,b) +#endif +#ifndef SQLITE_OMIT_FOREIGN_KEY +SQLITE_PRIVATE void sqlite3FkDelete(sqlite3 *, Table*); +SQLITE_PRIVATE int sqlite3FkLocateIndex(Parse*,Table*,FKey*,Index**,int**); +#else + #define sqlite3FkDelete(a,b) + #define sqlite3FkLocateIndex(a,b,c,d,e) +#endif + + +/* +** Available fault injectors. Should be numbered beginning with 0. +*/ +#define SQLITE_FAULTINJECTOR_MALLOC 0 +#define SQLITE_FAULTINJECTOR_COUNT 1 + +/* +** The interface to the code in fault.c used for identifying "benign" +** malloc failures. This is only present if SQLITE_UNTESTABLE +** is not defined. +*/ +#ifndef SQLITE_UNTESTABLE +SQLITE_PRIVATE void sqlite3BeginBenignMalloc(void); +SQLITE_PRIVATE void sqlite3EndBenignMalloc(void); +#else + #define sqlite3BeginBenignMalloc() + #define sqlite3EndBenignMalloc() +#endif + +/* +** Allowed return values from sqlite3FindInIndex() +*/ +#define IN_INDEX_ROWID 1 /* Search the rowid of the table */ +#define IN_INDEX_EPH 2 /* Search an ephemeral b-tree */ +#define IN_INDEX_INDEX_ASC 3 /* Existing index ASCENDING */ +#define IN_INDEX_INDEX_DESC 4 /* Existing index DESCENDING */ +#define IN_INDEX_NOOP 5 /* No table available. Use comparisons */ +/* +** Allowed flags for the 3rd parameter to sqlite3FindInIndex(). +*/ +#define IN_INDEX_NOOP_OK 0x0001 /* OK to return IN_INDEX_NOOP */ +#define IN_INDEX_MEMBERSHIP 0x0002 /* IN operator used for membership test */ +#define IN_INDEX_LOOP 0x0004 /* IN operator used as a loop */ +SQLITE_PRIVATE int sqlite3FindInIndex(Parse *, Expr *, u32, int*, int*, int*); + +SQLITE_PRIVATE int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int); +SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *); +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *); +#endif + +SQLITE_PRIVATE int sqlite3JournalIsInMemory(sqlite3_file *p); +SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *); + +SQLITE_PRIVATE void sqlite3ExprSetHeightAndFlags(Parse *pParse, Expr *p); +#if SQLITE_MAX_EXPR_DEPTH>0 +SQLITE_PRIVATE int sqlite3SelectExprHeight(const Select *); +SQLITE_PRIVATE int sqlite3ExprCheckHeight(Parse*, int); +#else + #define sqlite3SelectExprHeight(x) 0 + #define sqlite3ExprCheckHeight(x,y) +#endif + +SQLITE_PRIVATE u32 sqlite3Get4byte(const u8*); +SQLITE_PRIVATE void sqlite3Put4byte(u8*, u32); + +#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY +SQLITE_PRIVATE void sqlite3ConnectionBlocked(sqlite3 *, sqlite3 *); +SQLITE_PRIVATE void sqlite3ConnectionUnlocked(sqlite3 *db); +SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db); +#else + #define sqlite3ConnectionBlocked(x,y) + #define sqlite3ConnectionUnlocked(x) + #define sqlite3ConnectionClosed(x) +#endif + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE void sqlite3ParserTrace(FILE*, char *); +#endif +#if defined(YYCOVERAGE) +SQLITE_PRIVATE int sqlite3ParserCoverage(FILE*); +#endif + +/* +** If the SQLITE_ENABLE IOTRACE exists then the global variable +** sqlite3IoTrace is a pointer to a printf-like routine used to +** print I/O tracing messages. +*/ +#ifdef SQLITE_ENABLE_IOTRACE +# define IOTRACE(A) if( sqlite3IoTrace ){ sqlite3IoTrace A; } +SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe*); +SQLITE_API SQLITE_EXTERN void (SQLITE_CDECL *sqlite3IoTrace)(const char*,...); +#else +# define IOTRACE(A) +# define sqlite3VdbeIOTraceSql(X) +#endif + +/* +** These routines are available for the mem2.c debugging memory allocator +** only. They are used to verify that different "types" of memory +** allocations are properly tracked by the system. +** +** sqlite3MemdebugSetType() sets the "type" of an allocation to one of +** the MEMTYPE_* macros defined below. The type must be a bitmask with +** a single bit set. +** +** sqlite3MemdebugHasType() returns true if any of the bits in its second +** argument match the type set by the previous sqlite3MemdebugSetType(). +** sqlite3MemdebugHasType() is intended for use inside assert() statements. +** +** sqlite3MemdebugNoType() returns true if none of the bits in its second +** argument match the type set by the previous sqlite3MemdebugSetType(). +** +** Perhaps the most important point is the difference between MEMTYPE_HEAP +** and MEMTYPE_LOOKASIDE. If an allocation is MEMTYPE_LOOKASIDE, that means +** it might have been allocated by lookaside, except the allocation was +** too large or lookaside was already full. It is important to verify +** that allocations that might have been satisfied by lookaside are not +** passed back to non-lookaside free() routines. Asserts such as the +** example above are placed on the non-lookaside free() routines to verify +** this constraint. +** +** All of this is no-op for a production build. It only comes into +** play when the SQLITE_MEMDEBUG compile-time option is used. +*/ +#ifdef SQLITE_MEMDEBUG +SQLITE_PRIVATE void sqlite3MemdebugSetType(void*,u8); +SQLITE_PRIVATE int sqlite3MemdebugHasType(const void*,u8); +SQLITE_PRIVATE int sqlite3MemdebugNoType(const void*,u8); +#else +# define sqlite3MemdebugSetType(X,Y) /* no-op */ +# define sqlite3MemdebugHasType(X,Y) 1 +# define sqlite3MemdebugNoType(X,Y) 1 +#endif +#define MEMTYPE_HEAP 0x01 /* General heap allocations */ +#define MEMTYPE_LOOKASIDE 0x02 /* Heap that might have been lookaside */ +#define MEMTYPE_PCACHE 0x04 /* Page cache allocations */ + +/* +** Threading interface +*/ +#if SQLITE_MAX_WORKER_THREADS>0 +SQLITE_PRIVATE int sqlite3ThreadCreate(SQLiteThread**,void*(*)(void*),void*); +SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread*, void**); +#endif + +#if defined(SQLITE_ENABLE_DBPAGE_VTAB) || defined(SQLITE_TEST) +SQLITE_PRIVATE int sqlite3DbpageRegister(sqlite3*); +#endif +#if defined(SQLITE_ENABLE_DBSTAT_VTAB) || defined(SQLITE_TEST) +SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3*); +#endif + +SQLITE_PRIVATE int sqlite3ExprVectorSize(const Expr *pExpr); +SQLITE_PRIVATE int sqlite3ExprIsVector(const Expr *pExpr); +SQLITE_PRIVATE Expr *sqlite3VectorFieldSubexpr(Expr*, int); +SQLITE_PRIVATE Expr *sqlite3ExprForVectorField(Parse*,Expr*,int,int); +SQLITE_PRIVATE void sqlite3VectorErrorMsg(Parse*, Expr*); + +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +SQLITE_PRIVATE const char **sqlite3CompileOptions(int *pnOpt); +#endif + +#endif /* SQLITEINT_H */ + +/************** End of sqliteInt.h *******************************************/ +/************** Begin file os_common.h ***************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains macros and a little bit of code that is common to +** all of the platform-specific files (os_*.c) and is #included into those +** files. +** +** This file should be #included by the os_*.c files only. It is not a +** general purpose header file. +*/ +#ifndef _OS_COMMON_H_ +#define _OS_COMMON_H_ + +/* +** At least two bugs have slipped in because we changed the MEMORY_DEBUG +** macro to SQLITE_DEBUG and some older makefiles have not yet made the +** switch. The following code should catch this problem at compile-time. +*/ +#ifdef MEMORY_DEBUG +# error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." +#endif + +/* +** Macros for performance tracing. Normally turned off. Only works +** on i486 hardware. +*/ +#ifdef SQLITE_PERFORMANCE_TRACE + +/* +** hwtime.h contains inline assembler code for implementing +** high-performance timing routines. +*/ +/************** Include hwtime.h in the middle of os_common.h ****************/ +/************** Begin file hwtime.h ******************************************/ +/* +** 2008 May 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains inline asm code for retrieving "high-performance" +** counters for x86 and x86_64 class CPUs. +*/ +#ifndef SQLITE_HWTIME_H +#define SQLITE_HWTIME_H + +/* +** The following routine only works on pentium-class (or newer) processors. +** It uses the RDTSC opcode to read the cycle count value out of the +** processor and returns that value. This can be used for high-res +** profiling. +*/ +#if !defined(__STRICT_ANSI__) && \ + (defined(__GNUC__) || defined(_MSC_VER)) && \ + (defined(i386) || defined(__i386__) || defined(_M_IX86)) + + #if defined(__GNUC__) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned int lo, hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (sqlite_uint64)hi << 32 | lo; + } + + #elif defined(_MSC_VER) + + __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){ + __asm { + rdtsc + ret ; return value at EDX:EAX + } + } + + #endif + +#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__)) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned long val; + __asm__ __volatile__ ("rdtsc" : "=A" (val)); + return val; + } + +#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__)) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned long long retval; + unsigned long junk; + __asm__ __volatile__ ("\n\ + 1: mftbu %1\n\ + mftb %L0\n\ + mftbu %0\n\ + cmpw %0,%1\n\ + bne 1b" + : "=r" (retval), "=r" (junk)); + return retval; + } + +#else + + /* + ** asm() is needed for hardware timing support. Without asm(), + ** disable the sqlite3Hwtime() routine. + ** + ** sqlite3Hwtime() is only used for some obscure debugging + ** and analysis configurations, not in any deliverable, so this + ** should not be a great loss. + */ +SQLITE_PRIVATE sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); } + +#endif + +#endif /* !defined(SQLITE_HWTIME_H) */ + +/************** End of hwtime.h **********************************************/ +/************** Continuing where we left off in os_common.h ******************/ + +static sqlite_uint64 g_start; +static sqlite_uint64 g_elapsed; +#define TIMER_START g_start=sqlite3Hwtime() +#define TIMER_END g_elapsed=sqlite3Hwtime()-g_start +#define TIMER_ELAPSED g_elapsed +#else +#define TIMER_START +#define TIMER_END +#define TIMER_ELAPSED ((sqlite_uint64)0) +#endif + +/* +** If we compile with the SQLITE_TEST macro set, then the following block +** of code will give us the ability to simulate a disk I/O error. This +** is used for testing the I/O recovery logic. +*/ +#if defined(SQLITE_TEST) +SQLITE_API extern int sqlite3_io_error_hit; +SQLITE_API extern int sqlite3_io_error_hardhit; +SQLITE_API extern int sqlite3_io_error_pending; +SQLITE_API extern int sqlite3_io_error_persist; +SQLITE_API extern int sqlite3_io_error_benign; +SQLITE_API extern int sqlite3_diskfull_pending; +SQLITE_API extern int sqlite3_diskfull; +#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) +#define SimulateIOError(CODE) \ + if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ + || sqlite3_io_error_pending-- == 1 ) \ + { local_ioerr(); CODE; } +static void local_ioerr(){ + IOTRACE(("IOERR\n")); + sqlite3_io_error_hit++; + if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; +} +#define SimulateDiskfullError(CODE) \ + if( sqlite3_diskfull_pending ){ \ + if( sqlite3_diskfull_pending == 1 ){ \ + local_ioerr(); \ + sqlite3_diskfull = 1; \ + sqlite3_io_error_hit = 1; \ + CODE; \ + }else{ \ + sqlite3_diskfull_pending--; \ + } \ + } +#else +#define SimulateIOErrorBenign(X) +#define SimulateIOError(A) +#define SimulateDiskfullError(A) +#endif /* defined(SQLITE_TEST) */ + +/* +** When testing, keep a count of the number of open files. +*/ +#if defined(SQLITE_TEST) +SQLITE_API extern int sqlite3_open_file_count; +#define OpenCounter(X) sqlite3_open_file_count+=(X) +#else +#define OpenCounter(X) +#endif /* defined(SQLITE_TEST) */ + +#endif /* !defined(_OS_COMMON_H_) */ + +/************** End of os_common.h *******************************************/ +/************** Begin file ctime.c *******************************************/ +/* DO NOT EDIT! +** This file is automatically generated by the script in the canonical +** SQLite source tree at tool/mkctimec.tcl. +** +** To modify this header, edit any of the various lists in that script +** which specify categories of generated conditionals in this file. +*/ + +/* +** 2010 February 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements routines used to report what compile-time options +** SQLite was built with. +*/ +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS /* IMP: R-16824-07538 */ + +/* +** Include the configuration header output by 'configure' if we're using the +** autoconf-based build +*/ +#if defined(_HAVE_SQLITE_CONFIG_H) && !defined(SQLITECONFIG_H) +/* #include "config.h" */ +#define SQLITECONFIG_H 1 +#endif + +/* These macros are provided to "stringify" the value of the define +** for those options in which the value is meaningful. */ +#define CTIMEOPT_VAL_(opt) #opt +#define CTIMEOPT_VAL(opt) CTIMEOPT_VAL_(opt) + +/* Like CTIMEOPT_VAL, but especially for SQLITE_DEFAULT_LOOKASIDE. This +** option requires a separate macro because legal values contain a single +** comma. e.g. (-DSQLITE_DEFAULT_LOOKASIDE="100,100") */ +#define CTIMEOPT_VAL2_(opt1,opt2) #opt1 "," #opt2 +#define CTIMEOPT_VAL2(opt) CTIMEOPT_VAL2_(opt) +/* #include "sqliteInt.h" */ + +/* +** An array of names of all compile-time options. This array should +** be sorted A-Z. +** +** This array looks large, but in a typical installation actually uses +** only a handful of compile-time options, so most times this array is usually +** rather short and uses little memory space. +*/ +static const char * const sqlite3azCompileOpt[] = { + +#ifdef SQLITE_32BIT_ROWID + "32BIT_ROWID", +#endif +#ifdef SQLITE_4_BYTE_ALIGNED_MALLOC + "4_BYTE_ALIGNED_MALLOC", +#endif +#ifdef SQLITE_64BIT_STATS + "64BIT_STATS", +#endif +#ifdef SQLITE_ALLOW_COVERING_INDEX_SCAN +# if SQLITE_ALLOW_COVERING_INDEX_SCAN != 1 + "ALLOW_COVERING_INDEX_SCAN=" CTIMEOPT_VAL(SQLITE_ALLOW_COVERING_INDEX_SCAN), +# endif +#endif +#ifdef SQLITE_ALLOW_URI_AUTHORITY + "ALLOW_URI_AUTHORITY", +#endif +#ifdef SQLITE_ATOMIC_INTRINSICS + "ATOMIC_INTRINSICS=" CTIMEOPT_VAL(SQLITE_ATOMIC_INTRINSICS), +#endif +#ifdef SQLITE_BITMASK_TYPE + "BITMASK_TYPE=" CTIMEOPT_VAL(SQLITE_BITMASK_TYPE), +#endif +#ifdef SQLITE_BUG_COMPATIBLE_20160819 + "BUG_COMPATIBLE_20160819", +#endif +#ifdef SQLITE_CASE_SENSITIVE_LIKE + "CASE_SENSITIVE_LIKE", +#endif +#ifdef SQLITE_CHECK_PAGES + "CHECK_PAGES", +#endif +#if defined(__clang__) && defined(__clang_major__) + "COMPILER=clang-" CTIMEOPT_VAL(__clang_major__) "." + CTIMEOPT_VAL(__clang_minor__) "." + CTIMEOPT_VAL(__clang_patchlevel__), +#elif defined(_MSC_VER) + "COMPILER=msvc-" CTIMEOPT_VAL(_MSC_VER), +#elif defined(__GNUC__) && defined(__VERSION__) + "COMPILER=gcc-" __VERSION__, +#endif +#ifdef SQLITE_COVERAGE_TEST + "COVERAGE_TEST", +#endif +#ifdef SQLITE_DEBUG + "DEBUG", +#endif +#ifdef SQLITE_DEFAULT_AUTOMATIC_INDEX + "DEFAULT_AUTOMATIC_INDEX", +#endif +#ifdef SQLITE_DEFAULT_AUTOVACUUM + "DEFAULT_AUTOVACUUM", +#endif +#ifdef SQLITE_DEFAULT_CACHE_SIZE + "DEFAULT_CACHE_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_CACHE_SIZE), +#endif +#ifdef SQLITE_DEFAULT_CKPTFULLFSYNC + "DEFAULT_CKPTFULLFSYNC", +#endif +#ifdef SQLITE_DEFAULT_FILE_FORMAT + "DEFAULT_FILE_FORMAT=" CTIMEOPT_VAL(SQLITE_DEFAULT_FILE_FORMAT), +#endif +#ifdef SQLITE_DEFAULT_FILE_PERMISSIONS + "DEFAULT_FILE_PERMISSIONS=" CTIMEOPT_VAL(SQLITE_DEFAULT_FILE_PERMISSIONS), +#endif +#ifdef SQLITE_DEFAULT_FOREIGN_KEYS + "DEFAULT_FOREIGN_KEYS", +#endif +#ifdef SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT + "DEFAULT_JOURNAL_SIZE_LIMIT=" CTIMEOPT_VAL(SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT), +#endif +#ifdef SQLITE_DEFAULT_LOCKING_MODE + "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE), +#endif +#ifdef SQLITE_DEFAULT_LOOKASIDE + "DEFAULT_LOOKASIDE=" CTIMEOPT_VAL2(SQLITE_DEFAULT_LOOKASIDE), +#endif +#ifdef SQLITE_DEFAULT_MEMSTATUS +# if SQLITE_DEFAULT_MEMSTATUS != 1 + "DEFAULT_MEMSTATUS=" CTIMEOPT_VAL(SQLITE_DEFAULT_MEMSTATUS), +# endif +#endif +#ifdef SQLITE_DEFAULT_MMAP_SIZE + "DEFAULT_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_MMAP_SIZE), +#endif +#ifdef SQLITE_DEFAULT_PAGE_SIZE + "DEFAULT_PAGE_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_PAGE_SIZE), +#endif +#ifdef SQLITE_DEFAULT_PCACHE_INITSZ + "DEFAULT_PCACHE_INITSZ=" CTIMEOPT_VAL(SQLITE_DEFAULT_PCACHE_INITSZ), +#endif +#ifdef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS + "DEFAULT_PROXYDIR_PERMISSIONS=" CTIMEOPT_VAL(SQLITE_DEFAULT_PROXYDIR_PERMISSIONS), +#endif +#ifdef SQLITE_DEFAULT_RECURSIVE_TRIGGERS + "DEFAULT_RECURSIVE_TRIGGERS", +#endif +#ifdef SQLITE_DEFAULT_ROWEST + "DEFAULT_ROWEST=" CTIMEOPT_VAL(SQLITE_DEFAULT_ROWEST), +#endif +#ifdef SQLITE_DEFAULT_SECTOR_SIZE + "DEFAULT_SECTOR_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_SECTOR_SIZE), +#endif +#ifdef SQLITE_DEFAULT_SYNCHRONOUS + "DEFAULT_SYNCHRONOUS=" CTIMEOPT_VAL(SQLITE_DEFAULT_SYNCHRONOUS), +#endif +#ifdef SQLITE_DEFAULT_WAL_AUTOCHECKPOINT + "DEFAULT_WAL_AUTOCHECKPOINT=" CTIMEOPT_VAL(SQLITE_DEFAULT_WAL_AUTOCHECKPOINT), +#endif +#ifdef SQLITE_DEFAULT_WAL_SYNCHRONOUS + "DEFAULT_WAL_SYNCHRONOUS=" CTIMEOPT_VAL(SQLITE_DEFAULT_WAL_SYNCHRONOUS), +#endif +#ifdef SQLITE_DEFAULT_WORKER_THREADS + "DEFAULT_WORKER_THREADS=" CTIMEOPT_VAL(SQLITE_DEFAULT_WORKER_THREADS), +#endif +#ifdef SQLITE_DIRECT_OVERFLOW_READ + "DIRECT_OVERFLOW_READ", +#endif +#ifdef SQLITE_DISABLE_DIRSYNC + "DISABLE_DIRSYNC", +#endif +#ifdef SQLITE_DISABLE_FTS3_UNICODE + "DISABLE_FTS3_UNICODE", +#endif +#ifdef SQLITE_DISABLE_FTS4_DEFERRED + "DISABLE_FTS4_DEFERRED", +#endif +#ifdef SQLITE_DISABLE_INTRINSIC + "DISABLE_INTRINSIC", +#endif +#ifdef SQLITE_DISABLE_LFS + "DISABLE_LFS", +#endif +#ifdef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS + "DISABLE_PAGECACHE_OVERFLOW_STATS", +#endif +#ifdef SQLITE_DISABLE_SKIPAHEAD_DISTINCT + "DISABLE_SKIPAHEAD_DISTINCT", +#endif +#ifdef SQLITE_ENABLE_8_3_NAMES + "ENABLE_8_3_NAMES=" CTIMEOPT_VAL(SQLITE_ENABLE_8_3_NAMES), +#endif +#ifdef SQLITE_ENABLE_API_ARMOR + "ENABLE_API_ARMOR", +#endif +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + "ENABLE_ATOMIC_WRITE", +#endif +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + "ENABLE_BATCH_ATOMIC_WRITE", +#endif +#ifdef SQLITE_ENABLE_BYTECODE_VTAB + "ENABLE_BYTECODE_VTAB", +#endif +#ifdef SQLITE_ENABLE_CEROD + "ENABLE_CEROD=" CTIMEOPT_VAL(SQLITE_ENABLE_CEROD), +#endif +#ifdef SQLITE_ENABLE_COLUMN_METADATA + "ENABLE_COLUMN_METADATA", +#endif +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + "ENABLE_COLUMN_USED_MASK", +#endif +#ifdef SQLITE_ENABLE_COSTMULT + "ENABLE_COSTMULT", +#endif +#ifdef SQLITE_ENABLE_CURSOR_HINTS + "ENABLE_CURSOR_HINTS", +#endif +#ifdef SQLITE_ENABLE_DBPAGE_VTAB + "ENABLE_DBPAGE_VTAB", +#endif +#ifdef SQLITE_ENABLE_DBSTAT_VTAB + "ENABLE_DBSTAT_VTAB", +#endif +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + "ENABLE_EXPENSIVE_ASSERT", +#endif +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + "ENABLE_EXPLAIN_COMMENTS", +#endif +#ifdef SQLITE_ENABLE_FTS3 + "ENABLE_FTS3", +#endif +#ifdef SQLITE_ENABLE_FTS3_PARENTHESIS + "ENABLE_FTS3_PARENTHESIS", +#endif +#ifdef SQLITE_ENABLE_FTS3_TOKENIZER + "ENABLE_FTS3_TOKENIZER", +#endif +#ifdef SQLITE_ENABLE_FTS4 + "ENABLE_FTS4", +#endif +#ifdef SQLITE_ENABLE_FTS5 + "ENABLE_FTS5", +#endif +#ifdef SQLITE_ENABLE_GEOPOLY + "ENABLE_GEOPOLY", +#endif +#ifdef SQLITE_ENABLE_HIDDEN_COLUMNS + "ENABLE_HIDDEN_COLUMNS", +#endif +#ifdef SQLITE_ENABLE_ICU + "ENABLE_ICU", +#endif +#ifdef SQLITE_ENABLE_IOTRACE + "ENABLE_IOTRACE", +#endif +#ifdef SQLITE_ENABLE_LOAD_EXTENSION + "ENABLE_LOAD_EXTENSION", +#endif +#ifdef SQLITE_ENABLE_LOCKING_STYLE + "ENABLE_LOCKING_STYLE=" CTIMEOPT_VAL(SQLITE_ENABLE_LOCKING_STYLE), +#endif +#ifdef SQLITE_ENABLE_MATH_FUNCTIONS + "ENABLE_MATH_FUNCTIONS", +#endif +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + "ENABLE_MEMORY_MANAGEMENT", +#endif +#ifdef SQLITE_ENABLE_MEMSYS3 + "ENABLE_MEMSYS3", +#endif +#ifdef SQLITE_ENABLE_MEMSYS5 + "ENABLE_MEMSYS5", +#endif +#ifdef SQLITE_ENABLE_MULTIPLEX + "ENABLE_MULTIPLEX", +#endif +#ifdef SQLITE_ENABLE_NORMALIZE + "ENABLE_NORMALIZE", +#endif +#ifdef SQLITE_ENABLE_NULL_TRIM + "ENABLE_NULL_TRIM", +#endif +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC + "ENABLE_OFFSET_SQL_FUNC", +#endif +#ifdef SQLITE_ENABLE_OVERSIZE_CELL_CHECK + "ENABLE_OVERSIZE_CELL_CHECK", +#endif +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + "ENABLE_PREUPDATE_HOOK", +#endif +#ifdef SQLITE_ENABLE_QPSG + "ENABLE_QPSG", +#endif +#ifdef SQLITE_ENABLE_RBU + "ENABLE_RBU", +#endif +#ifdef SQLITE_ENABLE_RTREE + "ENABLE_RTREE", +#endif +#ifdef SQLITE_ENABLE_SELECTTRACE + "ENABLE_SELECTTRACE", +#endif +#ifdef SQLITE_ENABLE_SESSION + "ENABLE_SESSION", +#endif +#ifdef SQLITE_ENABLE_SNAPSHOT + "ENABLE_SNAPSHOT", +#endif +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + "ENABLE_SORTER_REFERENCES", +#endif +#ifdef SQLITE_ENABLE_SQLLOG + "ENABLE_SQLLOG", +#endif +#ifdef SQLITE_ENABLE_STAT4 + "ENABLE_STAT4", +#endif +#ifdef SQLITE_ENABLE_STMTVTAB + "ENABLE_STMTVTAB", +#endif +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + "ENABLE_STMT_SCANSTATUS", +#endif +#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION + "ENABLE_UNKNOWN_SQL_FUNCTION", +#endif +#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY + "ENABLE_UNLOCK_NOTIFY", +#endif +#ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + "ENABLE_UPDATE_DELETE_LIMIT", +#endif +#ifdef SQLITE_ENABLE_URI_00_ERROR + "ENABLE_URI_00_ERROR", +#endif +#ifdef SQLITE_ENABLE_VFSTRACE + "ENABLE_VFSTRACE", +#endif +#ifdef SQLITE_ENABLE_WHERETRACE + "ENABLE_WHERETRACE", +#endif +#ifdef SQLITE_ENABLE_ZIPVFS + "ENABLE_ZIPVFS", +#endif +#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS + "EXPLAIN_ESTIMATED_ROWS", +#endif +#ifdef SQLITE_EXTRA_IFNULLROW + "EXTRA_IFNULLROW", +#endif +#ifdef SQLITE_EXTRA_INIT + "EXTRA_INIT=" CTIMEOPT_VAL(SQLITE_EXTRA_INIT), +#endif +#ifdef SQLITE_EXTRA_SHUTDOWN + "EXTRA_SHUTDOWN=" CTIMEOPT_VAL(SQLITE_EXTRA_SHUTDOWN), +#endif +#ifdef SQLITE_FTS3_MAX_EXPR_DEPTH + "FTS3_MAX_EXPR_DEPTH=" CTIMEOPT_VAL(SQLITE_FTS3_MAX_EXPR_DEPTH), +#endif +#ifdef SQLITE_FTS5_ENABLE_TEST_MI + "FTS5_ENABLE_TEST_MI", +#endif +#ifdef SQLITE_FTS5_NO_WITHOUT_ROWID + "FTS5_NO_WITHOUT_ROWID", +#endif +#if HAVE_ISNAN || SQLITE_HAVE_ISNAN + "HAVE_ISNAN", +#endif +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX +# if SQLITE_HOMEGROWN_RECURSIVE_MUTEX != 1 + "HOMEGROWN_RECURSIVE_MUTEX=" CTIMEOPT_VAL(SQLITE_HOMEGROWN_RECURSIVE_MUTEX), +# endif +#endif +#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS + "IGNORE_AFP_LOCK_ERRORS", +#endif +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + "IGNORE_FLOCK_LOCK_ERRORS", +#endif +#ifdef SQLITE_INLINE_MEMCPY + "INLINE_MEMCPY", +#endif +#ifdef SQLITE_INT64_TYPE + "INT64_TYPE", +#endif +#ifdef SQLITE_INTEGRITY_CHECK_ERROR_MAX + "INTEGRITY_CHECK_ERROR_MAX=" CTIMEOPT_VAL(SQLITE_INTEGRITY_CHECK_ERROR_MAX), +#endif +#ifdef SQLITE_LIKE_DOESNT_MATCH_BLOBS + "LIKE_DOESNT_MATCH_BLOBS", +#endif +#ifdef SQLITE_LOCK_TRACE + "LOCK_TRACE", +#endif +#ifdef SQLITE_LOG_CACHE_SPILL + "LOG_CACHE_SPILL", +#endif +#ifdef SQLITE_MALLOC_SOFT_LIMIT + "MALLOC_SOFT_LIMIT=" CTIMEOPT_VAL(SQLITE_MALLOC_SOFT_LIMIT), +#endif +#ifdef SQLITE_MAX_ATTACHED + "MAX_ATTACHED=" CTIMEOPT_VAL(SQLITE_MAX_ATTACHED), +#endif +#ifdef SQLITE_MAX_COLUMN + "MAX_COLUMN=" CTIMEOPT_VAL(SQLITE_MAX_COLUMN), +#endif +#ifdef SQLITE_MAX_COMPOUND_SELECT + "MAX_COMPOUND_SELECT=" CTIMEOPT_VAL(SQLITE_MAX_COMPOUND_SELECT), +#endif +#ifdef SQLITE_MAX_DEFAULT_PAGE_SIZE + "MAX_DEFAULT_PAGE_SIZE=" CTIMEOPT_VAL(SQLITE_MAX_DEFAULT_PAGE_SIZE), +#endif +#ifdef SQLITE_MAX_EXPR_DEPTH + "MAX_EXPR_DEPTH=" CTIMEOPT_VAL(SQLITE_MAX_EXPR_DEPTH), +#endif +#ifdef SQLITE_MAX_FUNCTION_ARG + "MAX_FUNCTION_ARG=" CTIMEOPT_VAL(SQLITE_MAX_FUNCTION_ARG), +#endif +#ifdef SQLITE_MAX_LENGTH + "MAX_LENGTH=" CTIMEOPT_VAL(SQLITE_MAX_LENGTH), +#endif +#ifdef SQLITE_MAX_LIKE_PATTERN_LENGTH + "MAX_LIKE_PATTERN_LENGTH=" CTIMEOPT_VAL(SQLITE_MAX_LIKE_PATTERN_LENGTH), +#endif +#ifdef SQLITE_MAX_MEMORY + "MAX_MEMORY=" CTIMEOPT_VAL(SQLITE_MAX_MEMORY), +#endif +#ifdef SQLITE_MAX_MMAP_SIZE + "MAX_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_MAX_MMAP_SIZE), +#endif +#ifdef SQLITE_MAX_MMAP_SIZE_ + "MAX_MMAP_SIZE_=" CTIMEOPT_VAL(SQLITE_MAX_MMAP_SIZE_), +#endif +#ifdef SQLITE_MAX_PAGE_COUNT + "MAX_PAGE_COUNT=" CTIMEOPT_VAL(SQLITE_MAX_PAGE_COUNT), +#endif +#ifdef SQLITE_MAX_PAGE_SIZE + "MAX_PAGE_SIZE=" CTIMEOPT_VAL(SQLITE_MAX_PAGE_SIZE), +#endif +#ifdef SQLITE_MAX_SCHEMA_RETRY + "MAX_SCHEMA_RETRY=" CTIMEOPT_VAL(SQLITE_MAX_SCHEMA_RETRY), +#endif +#ifdef SQLITE_MAX_SQL_LENGTH + "MAX_SQL_LENGTH=" CTIMEOPT_VAL(SQLITE_MAX_SQL_LENGTH), +#endif +#ifdef SQLITE_MAX_TRIGGER_DEPTH + "MAX_TRIGGER_DEPTH=" CTIMEOPT_VAL(SQLITE_MAX_TRIGGER_DEPTH), +#endif +#ifdef SQLITE_MAX_VARIABLE_NUMBER + "MAX_VARIABLE_NUMBER=" CTIMEOPT_VAL(SQLITE_MAX_VARIABLE_NUMBER), +#endif +#ifdef SQLITE_MAX_VDBE_OP + "MAX_VDBE_OP=" CTIMEOPT_VAL(SQLITE_MAX_VDBE_OP), +#endif +#ifdef SQLITE_MAX_WORKER_THREADS + "MAX_WORKER_THREADS=" CTIMEOPT_VAL(SQLITE_MAX_WORKER_THREADS), +#endif +#ifdef SQLITE_MEMDEBUG + "MEMDEBUG", +#endif +#ifdef SQLITE_MIXED_ENDIAN_64BIT_FLOAT + "MIXED_ENDIAN_64BIT_FLOAT", +#endif +#ifdef SQLITE_MMAP_READWRITE + "MMAP_READWRITE", +#endif +#ifdef SQLITE_MUTEX_NOOP + "MUTEX_NOOP", +#endif +#ifdef SQLITE_MUTEX_OMIT + "MUTEX_OMIT", +#endif +#ifdef SQLITE_MUTEX_PTHREADS + "MUTEX_PTHREADS", +#endif +#ifdef SQLITE_MUTEX_W32 + "MUTEX_W32", +#endif +#ifdef SQLITE_NEED_ERR_NAME + "NEED_ERR_NAME", +#endif +#ifdef SQLITE_NO_SYNC + "NO_SYNC", +#endif +#ifdef SQLITE_OMIT_ALTERTABLE + "OMIT_ALTERTABLE", +#endif +#ifdef SQLITE_OMIT_ANALYZE + "OMIT_ANALYZE", +#endif +#ifdef SQLITE_OMIT_ATTACH + "OMIT_ATTACH", +#endif +#ifdef SQLITE_OMIT_AUTHORIZATION + "OMIT_AUTHORIZATION", +#endif +#ifdef SQLITE_OMIT_AUTOINCREMENT + "OMIT_AUTOINCREMENT", +#endif +#ifdef SQLITE_OMIT_AUTOINIT + "OMIT_AUTOINIT", +#endif +#ifdef SQLITE_OMIT_AUTOMATIC_INDEX + "OMIT_AUTOMATIC_INDEX", +#endif +#ifdef SQLITE_OMIT_AUTORESET + "OMIT_AUTORESET", +#endif +#ifdef SQLITE_OMIT_AUTOVACUUM + "OMIT_AUTOVACUUM", +#endif +#ifdef SQLITE_OMIT_BETWEEN_OPTIMIZATION + "OMIT_BETWEEN_OPTIMIZATION", +#endif +#ifdef SQLITE_OMIT_BLOB_LITERAL + "OMIT_BLOB_LITERAL", +#endif +#ifdef SQLITE_OMIT_CAST + "OMIT_CAST", +#endif +#ifdef SQLITE_OMIT_CHECK + "OMIT_CHECK", +#endif +#ifdef SQLITE_OMIT_COMPLETE + "OMIT_COMPLETE", +#endif +#ifdef SQLITE_OMIT_COMPOUND_SELECT + "OMIT_COMPOUND_SELECT", +#endif +#ifdef SQLITE_OMIT_CONFLICT_CLAUSE + "OMIT_CONFLICT_CLAUSE", +#endif +#ifdef SQLITE_OMIT_CTE + "OMIT_CTE", +#endif +#if defined(SQLITE_OMIT_DATETIME_FUNCS) || defined(SQLITE_OMIT_FLOATING_POINT) + "OMIT_DATETIME_FUNCS", +#endif +#ifdef SQLITE_OMIT_DECLTYPE + "OMIT_DECLTYPE", +#endif +#ifdef SQLITE_OMIT_DEPRECATED + "OMIT_DEPRECATED", +#endif +#ifdef SQLITE_OMIT_DESERIALIZE + "OMIT_DESERIALIZE", +#endif +#ifdef SQLITE_OMIT_DISKIO + "OMIT_DISKIO", +#endif +#ifdef SQLITE_OMIT_EXPLAIN + "OMIT_EXPLAIN", +#endif +#ifdef SQLITE_OMIT_FLAG_PRAGMAS + "OMIT_FLAG_PRAGMAS", +#endif +#ifdef SQLITE_OMIT_FLOATING_POINT + "OMIT_FLOATING_POINT", +#endif +#ifdef SQLITE_OMIT_FOREIGN_KEY + "OMIT_FOREIGN_KEY", +#endif +#ifdef SQLITE_OMIT_GET_TABLE + "OMIT_GET_TABLE", +#endif +#ifdef SQLITE_OMIT_HEX_INTEGER + "OMIT_HEX_INTEGER", +#endif +#ifdef SQLITE_OMIT_INCRBLOB + "OMIT_INCRBLOB", +#endif +#ifdef SQLITE_OMIT_INTEGRITY_CHECK + "OMIT_INTEGRITY_CHECK", +#endif +#ifdef SQLITE_OMIT_INTROSPECTION_PRAGMAS + "OMIT_INTROSPECTION_PRAGMAS", +#endif +#ifdef SQLITE_OMIT_JSON + "OMIT_JSON", +#endif +#ifdef SQLITE_OMIT_LIKE_OPTIMIZATION + "OMIT_LIKE_OPTIMIZATION", +#endif +#ifdef SQLITE_OMIT_LOAD_EXTENSION + "OMIT_LOAD_EXTENSION", +#endif +#ifdef SQLITE_OMIT_LOCALTIME + "OMIT_LOCALTIME", +#endif +#ifdef SQLITE_OMIT_LOOKASIDE + "OMIT_LOOKASIDE", +#endif +#ifdef SQLITE_OMIT_MEMORYDB + "OMIT_MEMORYDB", +#endif +#ifdef SQLITE_OMIT_OR_OPTIMIZATION + "OMIT_OR_OPTIMIZATION", +#endif +#ifdef SQLITE_OMIT_PAGER_PRAGMAS + "OMIT_PAGER_PRAGMAS", +#endif +#ifdef SQLITE_OMIT_PARSER_TRACE + "OMIT_PARSER_TRACE", +#endif +#ifdef SQLITE_OMIT_POPEN + "OMIT_POPEN", +#endif +#ifdef SQLITE_OMIT_PRAGMA + "OMIT_PRAGMA", +#endif +#ifdef SQLITE_OMIT_PROGRESS_CALLBACK + "OMIT_PROGRESS_CALLBACK", +#endif +#ifdef SQLITE_OMIT_QUICKBALANCE + "OMIT_QUICKBALANCE", +#endif +#ifdef SQLITE_OMIT_REINDEX + "OMIT_REINDEX", +#endif +#ifdef SQLITE_OMIT_SCHEMA_PRAGMAS + "OMIT_SCHEMA_PRAGMAS", +#endif +#ifdef SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS + "OMIT_SCHEMA_VERSION_PRAGMAS", +#endif +#ifdef SQLITE_OMIT_SHARED_CACHE + "OMIT_SHARED_CACHE", +#endif +#ifdef SQLITE_OMIT_SHUTDOWN_DIRECTORIES + "OMIT_SHUTDOWN_DIRECTORIES", +#endif +#ifdef SQLITE_OMIT_SUBQUERY + "OMIT_SUBQUERY", +#endif +#ifdef SQLITE_OMIT_TCL_VARIABLE + "OMIT_TCL_VARIABLE", +#endif +#ifdef SQLITE_OMIT_TEMPDB + "OMIT_TEMPDB", +#endif +#ifdef SQLITE_OMIT_TEST_CONTROL + "OMIT_TEST_CONTROL", +#endif +#ifdef SQLITE_OMIT_TRACE +# if SQLITE_OMIT_TRACE != 1 + "OMIT_TRACE=" CTIMEOPT_VAL(SQLITE_OMIT_TRACE), +# endif +#endif +#ifdef SQLITE_OMIT_TRIGGER + "OMIT_TRIGGER", +#endif +#ifdef SQLITE_OMIT_TRUNCATE_OPTIMIZATION + "OMIT_TRUNCATE_OPTIMIZATION", +#endif +#ifdef SQLITE_OMIT_UTF16 + "OMIT_UTF16", +#endif +#ifdef SQLITE_OMIT_VACUUM + "OMIT_VACUUM", +#endif +#ifdef SQLITE_OMIT_VIEW + "OMIT_VIEW", +#endif +#ifdef SQLITE_OMIT_VIRTUALTABLE + "OMIT_VIRTUALTABLE", +#endif +#ifdef SQLITE_OMIT_WAL + "OMIT_WAL", +#endif +#ifdef SQLITE_OMIT_WSD + "OMIT_WSD", +#endif +#ifdef SQLITE_OMIT_XFER_OPT + "OMIT_XFER_OPT", +#endif +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + "PCACHE_SEPARATE_HEADER", +#endif +#ifdef SQLITE_PERFORMANCE_TRACE + "PERFORMANCE_TRACE", +#endif +#ifdef SQLITE_POWERSAFE_OVERWRITE +# if SQLITE_POWERSAFE_OVERWRITE != 1 + "POWERSAFE_OVERWRITE=" CTIMEOPT_VAL(SQLITE_POWERSAFE_OVERWRITE), +# endif +#endif +#ifdef SQLITE_PREFER_PROXY_LOCKING + "PREFER_PROXY_LOCKING", +#endif +#ifdef SQLITE_PROXY_DEBUG + "PROXY_DEBUG", +#endif +#ifdef SQLITE_REVERSE_UNORDERED_SELECTS + "REVERSE_UNORDERED_SELECTS", +#endif +#ifdef SQLITE_RTREE_INT_ONLY + "RTREE_INT_ONLY", +#endif +#ifdef SQLITE_SECURE_DELETE + "SECURE_DELETE", +#endif +#ifdef SQLITE_SMALL_STACK + "SMALL_STACK", +#endif +#ifdef SQLITE_SORTER_PMASZ + "SORTER_PMASZ=" CTIMEOPT_VAL(SQLITE_SORTER_PMASZ), +#endif +#ifdef SQLITE_SOUNDEX + "SOUNDEX", +#endif +#ifdef SQLITE_STAT4_SAMPLES + "STAT4_SAMPLES=" CTIMEOPT_VAL(SQLITE_STAT4_SAMPLES), +#endif +#ifdef SQLITE_STMTJRNL_SPILL + "STMTJRNL_SPILL=" CTIMEOPT_VAL(SQLITE_STMTJRNL_SPILL), +#endif +#ifdef SQLITE_SUBSTR_COMPATIBILITY + "SUBSTR_COMPATIBILITY", +#endif +#if (!defined(SQLITE_WIN32_MALLOC) \ + && !defined(SQLITE_ZERO_MALLOC) \ + && !defined(SQLITE_MEMDEBUG) \ + ) || defined(SQLITE_SYSTEM_MALLOC) + "SYSTEM_MALLOC", +#endif +#ifdef SQLITE_TCL + "TCL", +#endif +#ifdef SQLITE_TEMP_STORE + "TEMP_STORE=" CTIMEOPT_VAL(SQLITE_TEMP_STORE), +#endif +#ifdef SQLITE_TEST + "TEST", +#endif +#if defined(SQLITE_THREADSAFE) + "THREADSAFE=" CTIMEOPT_VAL(SQLITE_THREADSAFE), +#elif defined(THREADSAFE) + "THREADSAFE=" CTIMEOPT_VAL(THREADSAFE), +#else + "THREADSAFE=1", +#endif +#ifdef SQLITE_UNLINK_AFTER_CLOSE + "UNLINK_AFTER_CLOSE", +#endif +#ifdef SQLITE_UNTESTABLE + "UNTESTABLE", +#endif +#ifdef SQLITE_USER_AUTHENTICATION + "USER_AUTHENTICATION", +#endif +#ifdef SQLITE_USE_ALLOCA + "USE_ALLOCA", +#endif +#ifdef SQLITE_USE_FCNTL_TRACE + "USE_FCNTL_TRACE", +#endif +#ifdef SQLITE_USE_URI + "USE_URI", +#endif +#ifdef SQLITE_VDBE_COVERAGE + "VDBE_COVERAGE", +#endif +#ifdef SQLITE_WIN32_MALLOC + "WIN32_MALLOC", +#endif +#ifdef SQLITE_ZERO_MALLOC + "ZERO_MALLOC", +#endif + +} ; + +SQLITE_PRIVATE const char **sqlite3CompileOptions(int *pnOpt){ + *pnOpt = sizeof(sqlite3azCompileOpt) / sizeof(sqlite3azCompileOpt[0]); + return (const char**)sqlite3azCompileOpt; +} + +#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + +/************** End of ctime.c ***********************************************/ +/************** Begin file global.c ******************************************/ +/* +** 2008 June 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains definitions of global variables and constants. +*/ +/* #include "sqliteInt.h" */ + +/* An array to map all upper-case characters into their corresponding +** lower-case character. +** +** SQLite only considers US-ASCII (or EBCDIC) characters. We do not +** handle case conversions for the UTF character set since the tables +** involved are nearly as big or bigger than SQLite itself. +*/ +SQLITE_PRIVATE const unsigned char sqlite3UpperToLower[] = { +#ifdef SQLITE_ASCII + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, + 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, + 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, + 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, + 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, + 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, + 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, + 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, + 252,253,254,255, +#endif +#ifdef SQLITE_EBCDIC + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 0x */ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 1x */ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 2x */ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 3x */ + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 4x */ + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 5x */ + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 6x */ + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 7x */ + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, /* 8x */ + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, /* 9x */ + 160,161,162,163,164,165,166,167,168,169,170,171,140,141,142,175, /* Ax */ + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, /* Bx */ + 192,129,130,131,132,133,134,135,136,137,202,203,204,205,206,207, /* Cx */ + 208,145,146,147,148,149,150,151,152,153,218,219,220,221,222,223, /* Dx */ + 224,225,162,163,164,165,166,167,168,169,234,235,236,237,238,239, /* Ex */ + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, /* Fx */ +#endif +/* All of the upper-to-lower conversion data is above. The following +** 18 integers are completely unrelated. They are appended to the +** sqlite3UpperToLower[] array to avoid UBSAN warnings. Here's what is +** going on: +** +** The SQL comparison operators (<>, =, >, <=, <, and >=) are implemented +** by invoking sqlite3MemCompare(A,B) which compares values A and B and +** returns negative, zero, or positive if A is less then, equal to, or +** greater than B, respectively. Then the true false results is found by +** consulting sqlite3aLTb[opcode], sqlite3aEQb[opcode], or +** sqlite3aGTb[opcode] depending on whether the result of compare(A,B) +** is negative, zero, or positive, where opcode is the specific opcode. +** The only works because the comparison opcodes are consecutive and in +** this order: NE EQ GT LE LT GE. Various assert()s throughout the code +** ensure that is the case. +** +** These elements must be appended to another array. Otherwise the +** index (here shown as [256-OP_Ne]) would be out-of-bounds and thus +** be undefined behavior. That's goofy, but the C-standards people thought +** it was a good idea, so here we are. +*/ +/* NE EQ GT LE LT GE */ + 1, 0, 0, 1, 1, 0, /* aLTb[]: Use when compare(A,B) less than zero */ + 0, 1, 0, 1, 0, 1, /* aEQb[]: Use when compare(A,B) equals zero */ + 1, 0, 1, 0, 0, 1 /* aGTb[]: Use when compare(A,B) greater than zero*/ +}; +SQLITE_PRIVATE const unsigned char *sqlite3aLTb = &sqlite3UpperToLower[256-OP_Ne]; +SQLITE_PRIVATE const unsigned char *sqlite3aEQb = &sqlite3UpperToLower[256+6-OP_Ne]; +SQLITE_PRIVATE const unsigned char *sqlite3aGTb = &sqlite3UpperToLower[256+12-OP_Ne]; + +/* +** The following 256 byte lookup table is used to support SQLites built-in +** equivalents to the following standard library functions: +** +** isspace() 0x01 +** isalpha() 0x02 +** isdigit() 0x04 +** isalnum() 0x06 +** isxdigit() 0x08 +** toupper() 0x20 +** SQLite identifier character 0x40 +** Quote character 0x80 +** +** Bit 0x20 is set if the mapped character requires translation to upper +** case. i.e. if the character is a lower-case ASCII character. +** If x is a lower-case ASCII character, then its upper-case equivalent +** is (x - 0x20). Therefore toupper() can be implemented as: +** +** (x & ~(map[x]&0x20)) +** +** The equivalent of tolower() is implemented using the sqlite3UpperToLower[] +** array. tolower() is used more often than toupper() by SQLite. +** +** Bit 0x40 is set if the character is non-alphanumeric and can be used in an +** SQLite identifier. Identifiers are alphanumerics, "_", "$", and any +** non-ASCII UTF character. Hence the test for whether or not a character is +** part of an identifier is 0x46. +*/ +SQLITE_PRIVATE const unsigned char sqlite3CtypeMap[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00..07 ........ */ + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 08..0f ........ */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10..17 ........ */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 18..1f ........ */ + 0x01, 0x00, 0x80, 0x00, 0x40, 0x00, 0x00, 0x80, /* 20..27 !"#$%&' */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 28..2f ()*+,-./ */ + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, /* 30..37 01234567 */ + 0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 38..3f 89:;<=>? */ + + 0x00, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x02, /* 40..47 @ABCDEFG */ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 48..4f HIJKLMNO */ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 50..57 PQRSTUVW */ + 0x02, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x40, /* 58..5f XYZ[\]^_ */ + 0x80, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x22, /* 60..67 `abcdefg */ + 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 68..6f hijklmno */ + 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 70..77 pqrstuvw */ + 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, /* 78..7f xyz{|}~. */ + + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 80..87 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 88..8f ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 90..97 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 98..9f ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a0..a7 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a8..af ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b0..b7 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b8..bf ........ */ + + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c0..c7 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c8..cf ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d0..d7 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d8..df ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e0..e7 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e8..ef ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* f0..f7 ........ */ + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 /* f8..ff ........ */ +}; + +/* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards +** compatibility for legacy applications, the URI filename capability is +** disabled by default. +** +** EVIDENCE-OF: R-38799-08373 URI filenames can be enabled or disabled +** using the SQLITE_USE_URI=1 or SQLITE_USE_URI=0 compile-time options. +** +** EVIDENCE-OF: R-43642-56306 By default, URI handling is globally +** disabled. The default value may be changed by compiling with the +** SQLITE_USE_URI symbol defined. +*/ +#ifndef SQLITE_USE_URI +# define SQLITE_USE_URI 0 +#endif + +/* EVIDENCE-OF: R-38720-18127 The default setting is determined by the +** SQLITE_ALLOW_COVERING_INDEX_SCAN compile-time option, or is "on" if +** that compile-time option is omitted. +*/ +#if !defined(SQLITE_ALLOW_COVERING_INDEX_SCAN) +# define SQLITE_ALLOW_COVERING_INDEX_SCAN 1 +#else +# if !SQLITE_ALLOW_COVERING_INDEX_SCAN +# error "Compile-time disabling of covering index scan using the\ + -DSQLITE_ALLOW_COVERING_INDEX_SCAN=0 option is deprecated.\ + Contact SQLite developers if this is a problem for you, and\ + delete this #error macro to continue with your build." +# endif +#endif + +/* The minimum PMA size is set to this value multiplied by the database +** page size in bytes. +*/ +#ifndef SQLITE_SORTER_PMASZ +# define SQLITE_SORTER_PMASZ 250 +#endif + +/* Statement journals spill to disk when their size exceeds the following +** threshold (in bytes). 0 means that statement journals are created and +** written to disk immediately (the default behavior for SQLite versions +** before 3.12.0). -1 means always keep the entire statement journal in +** memory. (The statement journal is also always held entirely in memory +** if journal_mode=MEMORY or if temp_store=MEMORY, regardless of this +** setting.) +*/ +#ifndef SQLITE_STMTJRNL_SPILL +# define SQLITE_STMTJRNL_SPILL (64*1024) +#endif + +/* +** The default lookaside-configuration, the format "SZ,N". SZ is the +** number of bytes in each lookaside slot (should be a multiple of 8) +** and N is the number of slots. The lookaside-configuration can be +** changed as start-time using sqlite3_config(SQLITE_CONFIG_LOOKASIDE) +** or at run-time for an individual database connection using +** sqlite3_db_config(db, SQLITE_DBCONFIG_LOOKASIDE); +** +** With the two-size-lookaside enhancement, less lookaside is required. +** The default configuration of 1200,40 actually provides 30 1200-byte slots +** and 93 128-byte slots, which is more lookaside than is available +** using the older 1200,100 configuration without two-size-lookaside. +*/ +#ifndef SQLITE_DEFAULT_LOOKASIDE +# ifdef SQLITE_OMIT_TWOSIZE_LOOKASIDE +# define SQLITE_DEFAULT_LOOKASIDE 1200,100 /* 120KB of memory */ +# else +# define SQLITE_DEFAULT_LOOKASIDE 1200,40 /* 48KB of memory */ +# endif +#endif + + +/* The default maximum size of an in-memory database created using +** sqlite3_deserialize() +*/ +#ifndef SQLITE_MEMDB_DEFAULT_MAXSIZE +# define SQLITE_MEMDB_DEFAULT_MAXSIZE 1073741824 +#endif + +/* +** The following singleton contains the global configuration for +** the SQLite library. +*/ +SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { + SQLITE_DEFAULT_MEMSTATUS, /* bMemstat */ + 1, /* bCoreMutex */ + SQLITE_THREADSAFE==1, /* bFullMutex */ + SQLITE_USE_URI, /* bOpenUri */ + SQLITE_ALLOW_COVERING_INDEX_SCAN, /* bUseCis */ + 0, /* bSmallMalloc */ + 1, /* bExtraSchemaChecks */ + 0x7ffffffe, /* mxStrlen */ + 0, /* neverCorrupt */ + SQLITE_DEFAULT_LOOKASIDE, /* szLookaside, nLookaside */ + SQLITE_STMTJRNL_SPILL, /* nStmtSpill */ + {0,0,0,0,0,0,0,0}, /* m */ + {0,0,0,0,0,0,0,0,0}, /* mutex */ + {0,0,0,0,0,0,0,0,0,0,0,0,0},/* pcache2 */ + (void*)0, /* pHeap */ + 0, /* nHeap */ + 0, 0, /* mnHeap, mxHeap */ + SQLITE_DEFAULT_MMAP_SIZE, /* szMmap */ + SQLITE_MAX_MMAP_SIZE, /* mxMmap */ + (void*)0, /* pPage */ + 0, /* szPage */ + SQLITE_DEFAULT_PCACHE_INITSZ, /* nPage */ + 0, /* mxParserStack */ + 0, /* sharedCacheEnabled */ + SQLITE_SORTER_PMASZ, /* szPma */ + /* All the rest should always be initialized to zero */ + 0, /* isInit */ + 0, /* inProgress */ + 0, /* isMutexInit */ + 0, /* isMallocInit */ + 0, /* isPCacheInit */ + 0, /* nRefInitMutex */ + 0, /* pInitMutex */ + 0, /* xLog */ + 0, /* pLogArg */ +#ifdef SQLITE_ENABLE_SQLLOG + 0, /* xSqllog */ + 0, /* pSqllogArg */ +#endif +#ifdef SQLITE_VDBE_COVERAGE + 0, /* xVdbeBranch */ + 0, /* pVbeBranchArg */ +#endif +#ifndef SQLITE_OMIT_DESERIALIZE + SQLITE_MEMDB_DEFAULT_MAXSIZE, /* mxMemdbSize */ +#endif +#ifndef SQLITE_UNTESTABLE + 0, /* xTestCallback */ +#endif + 0, /* bLocaltimeFault */ + 0, /* xAltLocaltime */ + 0x7ffffffe, /* iOnceResetThreshold */ + SQLITE_DEFAULT_SORTERREF_SIZE, /* szSorterRef */ + 0, /* iPrngSeed */ +}; + +/* +** Hash table for global functions - functions common to all +** database connections. After initialization, this table is +** read-only. +*/ +SQLITE_PRIVATE FuncDefHash sqlite3BuiltinFunctions; + +#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) +/* +** Counter used for coverage testing. Does not come into play for +** release builds. +** +** Access to this global variable is not mutex protected. This might +** result in TSAN warnings. But as the variable does not exist in +** release builds, that should not be a concern. +*/ +SQLITE_PRIVATE unsigned int sqlite3CoverageCounter; +#endif /* SQLITE_COVERAGE_TEST || SQLITE_DEBUG */ + +#ifdef VDBE_PROFILE +/* +** The following performance counter can be used in place of +** sqlite3Hwtime() for profiling. This is a no-op on standard builds. +*/ +SQLITE_PRIVATE sqlite3_uint64 sqlite3NProfileCnt = 0; +#endif + +/* +** The value of the "pending" byte must be 0x40000000 (1 byte past the +** 1-gibabyte boundary) in a compatible database. SQLite never uses +** the database page that contains the pending byte. It never attempts +** to read or write that page. The pending byte page is set aside +** for use by the VFS layers as space for managing file locks. +** +** During testing, it is often desirable to move the pending byte to +** a different position in the file. This allows code that has to +** deal with the pending byte to run on files that are much smaller +** than 1 GiB. The sqlite3_test_control() interface can be used to +** move the pending byte. +** +** IMPORTANT: Changing the pending byte to any value other than +** 0x40000000 results in an incompatible database file format! +** Changing the pending byte during operation will result in undefined +** and incorrect behavior. +*/ +#ifndef SQLITE_OMIT_WSD +SQLITE_PRIVATE int sqlite3PendingByte = 0x40000000; +#endif + +/* +** Tracing flags set by SQLITE_TESTCTRL_TRACEFLAGS. +*/ +SQLITE_PRIVATE u32 sqlite3SelectTrace = 0; +SQLITE_PRIVATE u32 sqlite3WhereTrace = 0; + +/* #include "opcodes.h" */ +/* +** Properties of opcodes. The OPFLG_INITIALIZER macro is +** created by mkopcodeh.awk during compilation. Data is obtained +** from the comments following the "case OP_xxxx:" statements in +** the vdbe.c file. +*/ +SQLITE_PRIVATE const unsigned char sqlite3OpcodeProperty[] = OPFLG_INITIALIZER; + +/* +** Name of the default collating sequence +*/ +SQLITE_PRIVATE const char sqlite3StrBINARY[] = "BINARY"; + +/* +** Standard typenames. These names must match the COLTYPE_* definitions. +** Adjust the SQLITE_N_STDTYPE value if adding or removing entries. +** +** sqlite3StdType[] The actual names of the datatypes. +** +** sqlite3StdTypeLen[] The length (in bytes) of each entry +** in sqlite3StdType[]. +** +** sqlite3StdTypeAffinity[] The affinity associated with each entry +** in sqlite3StdType[]. +** +** sqlite3StdTypeMap[] The type value (as returned from +** sqlite3_column_type() or sqlite3_value_type()) +** for each entry in sqlite3StdType[]. +*/ +SQLITE_PRIVATE const unsigned char sqlite3StdTypeLen[] = { 3, 4, 3, 7, 4, 4 }; +SQLITE_PRIVATE const char sqlite3StdTypeAffinity[] = { + SQLITE_AFF_NUMERIC, + SQLITE_AFF_BLOB, + SQLITE_AFF_INTEGER, + SQLITE_AFF_INTEGER, + SQLITE_AFF_REAL, + SQLITE_AFF_TEXT +}; +SQLITE_PRIVATE const char sqlite3StdTypeMap[] = { + 0, + SQLITE_BLOB, + SQLITE_INTEGER, + SQLITE_INTEGER, + SQLITE_FLOAT, + SQLITE_TEXT +}; +SQLITE_PRIVATE const char *sqlite3StdType[] = { + "ANY", + "BLOB", + "INT", + "INTEGER", + "REAL", + "TEXT" +}; + +/************** End of global.c **********************************************/ +/************** Begin file status.c ******************************************/ +/* +** 2008 June 18 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This module implements the sqlite3_status() interface and related +** functionality. +*/ +/* #include "sqliteInt.h" */ +/************** Include vdbeInt.h in the middle of status.c ******************/ +/************** Begin file vdbeInt.h *****************************************/ +/* +** 2003 September 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the header file for information that is private to the +** VDBE. This information used to all be at the top of the single +** source code file "vdbe.c". When that file became too big (over +** 6000 lines long) it was split up into several smaller files and +** this header information was factored out. +*/ +#ifndef SQLITE_VDBEINT_H +#define SQLITE_VDBEINT_H + +/* +** The maximum number of times that a statement will try to reparse +** itself before giving up and returning SQLITE_SCHEMA. +*/ +#ifndef SQLITE_MAX_SCHEMA_RETRY +# define SQLITE_MAX_SCHEMA_RETRY 50 +#endif + +/* +** VDBE_DISPLAY_P4 is true or false depending on whether or not the +** "explain" P4 display logic is enabled. +*/ +#if !defined(SQLITE_OMIT_EXPLAIN) || !defined(NDEBUG) \ + || defined(VDBE_PROFILE) || defined(SQLITE_DEBUG) \ + || defined(SQLITE_ENABLE_BYTECODE_VTAB) +# define VDBE_DISPLAY_P4 1 +#else +# define VDBE_DISPLAY_P4 0 +#endif + +/* +** SQL is translated into a sequence of instructions to be +** executed by a virtual machine. Each instruction is an instance +** of the following structure. +*/ +typedef struct VdbeOp Op; + +/* +** Boolean values +*/ +typedef unsigned Bool; + +/* Opaque type used by code in vdbesort.c */ +typedef struct VdbeSorter VdbeSorter; + +/* Elements of the linked list at Vdbe.pAuxData */ +typedef struct AuxData AuxData; + +/* Types of VDBE cursors */ +#define CURTYPE_BTREE 0 +#define CURTYPE_SORTER 1 +#define CURTYPE_VTAB 2 +#define CURTYPE_PSEUDO 3 + +/* +** A VdbeCursor is an superclass (a wrapper) for various cursor objects: +** +** * A b-tree cursor +** - In the main database or in an ephemeral database +** - On either an index or a table +** * A sorter +** * A virtual table +** * A one-row "pseudotable" stored in a single register +*/ +typedef struct VdbeCursor VdbeCursor; +struct VdbeCursor { + u8 eCurType; /* One of the CURTYPE_* values above */ + i8 iDb; /* Index of cursor database in db->aDb[] */ + u8 nullRow; /* True if pointing to a row with no data */ + u8 deferredMoveto; /* A call to sqlite3BtreeMoveto() is needed */ + u8 isTable; /* True for rowid tables. False for indexes */ +#ifdef SQLITE_DEBUG + u8 seekOp; /* Most recent seek operation on this cursor */ + u8 wrFlag; /* The wrFlag argument to sqlite3BtreeCursor() */ +#endif + Bool isEphemeral:1; /* True for an ephemeral table */ + Bool useRandomRowid:1; /* Generate new record numbers semi-randomly */ + Bool isOrdered:1; /* True if the table is not BTREE_UNORDERED */ + Bool hasBeenDuped:1; /* This cursor was source or target of OP_OpenDup */ + u16 seekHit; /* See the OP_SeekHit and OP_IfNoHope opcodes */ + union { /* pBtx for isEphermeral. pAltMap otherwise */ + Btree *pBtx; /* Separate file holding temporary table */ + u32 *aAltMap; /* Mapping from table to index column numbers */ + } ub; + i64 seqCount; /* Sequence counter */ + + /* Cached OP_Column parse information is only valid if cacheStatus matches + ** Vdbe.cacheCtr. Vdbe.cacheCtr will never take on the value of + ** CACHE_STALE (0) and so setting cacheStatus=CACHE_STALE guarantees that + ** the cache is out of date. */ + u32 cacheStatus; /* Cache is valid if this matches Vdbe.cacheCtr */ + int seekResult; /* Result of previous sqlite3BtreeMoveto() or 0 + ** if there have been no prior seeks on the cursor. */ + /* seekResult does not distinguish between "no seeks have ever occurred + ** on this cursor" and "the most recent seek was an exact match". + ** For CURTYPE_PSEUDO, seekResult is the register holding the record */ + + /* When a new VdbeCursor is allocated, only the fields above are zeroed. + ** The fields that follow are uninitialized, and must be individually + ** initialized prior to first use. */ + VdbeCursor *pAltCursor; /* Associated index cursor from which to read */ + union { + BtCursor *pCursor; /* CURTYPE_BTREE or _PSEUDO. Btree cursor */ + sqlite3_vtab_cursor *pVCur; /* CURTYPE_VTAB. Vtab cursor */ + VdbeSorter *pSorter; /* CURTYPE_SORTER. Sorter object */ + } uc; + KeyInfo *pKeyInfo; /* Info about index keys needed by index cursors */ + u32 iHdrOffset; /* Offset to next unparsed byte of the header */ + Pgno pgnoRoot; /* Root page of the open btree cursor */ + i16 nField; /* Number of fields in the header */ + u16 nHdrParsed; /* Number of header fields parsed so far */ + i64 movetoTarget; /* Argument to the deferred sqlite3BtreeMoveto() */ + u32 *aOffset; /* Pointer to aType[nField] */ + const u8 *aRow; /* Data for the current row, if all on one page */ + u32 payloadSize; /* Total number of bytes in the record */ + u32 szRow; /* Byte available in aRow */ +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + u64 maskUsed; /* Mask of columns used by this cursor */ +#endif + + /* 2*nField extra array elements allocated for aType[], beyond the one + ** static element declared in the structure. nField total array slots for + ** aType[] and nField+1 array slots for aOffset[] */ + u32 aType[1]; /* Type values record decode. MUST BE LAST */ +}; + + +/* +** A value for VdbeCursor.cacheStatus that means the cache is always invalid. +*/ +#define CACHE_STALE 0 + +/* +** When a sub-program is executed (OP_Program), a structure of this type +** is allocated to store the current value of the program counter, as +** well as the current memory cell array and various other frame specific +** values stored in the Vdbe struct. When the sub-program is finished, +** these values are copied back to the Vdbe from the VdbeFrame structure, +** restoring the state of the VM to as it was before the sub-program +** began executing. +** +** The memory for a VdbeFrame object is allocated and managed by a memory +** cell in the parent (calling) frame. When the memory cell is deleted or +** overwritten, the VdbeFrame object is not freed immediately. Instead, it +** is linked into the Vdbe.pDelFrame list. The contents of the Vdbe.pDelFrame +** list is deleted when the VM is reset in VdbeHalt(). The reason for doing +** this instead of deleting the VdbeFrame immediately is to avoid recursive +** calls to sqlite3VdbeMemRelease() when the memory cells belonging to the +** child frame are released. +** +** The currently executing frame is stored in Vdbe.pFrame. Vdbe.pFrame is +** set to NULL if the currently executing frame is the main program. +*/ +typedef struct VdbeFrame VdbeFrame; +struct VdbeFrame { + Vdbe *v; /* VM this frame belongs to */ + VdbeFrame *pParent; /* Parent of this frame, or NULL if parent is main */ + Op *aOp; /* Program instructions for parent frame */ + i64 *anExec; /* Event counters from parent frame */ + Mem *aMem; /* Array of memory cells for parent frame */ + VdbeCursor **apCsr; /* Array of Vdbe cursors for parent frame */ + u8 *aOnce; /* Bitmask used by OP_Once */ + void *token; /* Copy of SubProgram.token */ + i64 lastRowid; /* Last insert rowid (sqlite3.lastRowid) */ + AuxData *pAuxData; /* Linked list of auxdata allocations */ +#if SQLITE_DEBUG + u32 iFrameMagic; /* magic number for sanity checking */ +#endif + int nCursor; /* Number of entries in apCsr */ + int pc; /* Program Counter in parent (calling) frame */ + int nOp; /* Size of aOp array */ + int nMem; /* Number of entries in aMem */ + int nChildMem; /* Number of memory cells for child frame */ + int nChildCsr; /* Number of cursors for child frame */ + i64 nChange; /* Statement changes (Vdbe.nChange) */ + i64 nDbChange; /* Value of db->nChange */ +}; + +/* Magic number for sanity checking on VdbeFrame objects */ +#define SQLITE_FRAME_MAGIC 0x879fb71e + +/* +** Return a pointer to the array of registers allocated for use +** by a VdbeFrame. +*/ +#define VdbeFrameMem(p) ((Mem *)&((u8 *)p)[ROUND8(sizeof(VdbeFrame))]) + +/* +** Internally, the vdbe manipulates nearly all SQL values as Mem +** structures. Each Mem struct may cache multiple representations (string, +** integer etc.) of the same value. +*/ +struct sqlite3_value { + union MemValue { + double r; /* Real value used when MEM_Real is set in flags */ + i64 i; /* Integer value used when MEM_Int is set in flags */ + int nZero; /* Extra zero bytes when MEM_Zero and MEM_Blob set */ + const char *zPType; /* Pointer type when MEM_Term|MEM_Subtype|MEM_Null */ + FuncDef *pDef; /* Used only when flags==MEM_Agg */ + } u; + u16 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */ + u8 enc; /* SQLITE_UTF8, SQLITE_UTF16BE, SQLITE_UTF16LE */ + u8 eSubtype; /* Subtype for this value */ + int n; /* Number of characters in string value, excluding '\0' */ + char *z; /* String or BLOB value */ + /* ShallowCopy only needs to copy the information above */ + char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */ + int szMalloc; /* Size of the zMalloc allocation */ + u32 uTemp; /* Transient storage for serial_type in OP_MakeRecord */ + sqlite3 *db; /* The associated database connection */ + void (*xDel)(void*);/* Destructor for Mem.z - only valid if MEM_Dyn */ +#ifdef SQLITE_DEBUG + Mem *pScopyFrom; /* This Mem is a shallow copy of pScopyFrom */ + u16 mScopyFlags; /* flags value immediately after the shallow copy */ +#endif +}; + +/* +** Size of struct Mem not including the Mem.zMalloc member or anything that +** follows. +*/ +#define MEMCELLSIZE offsetof(Mem,zMalloc) + +/* One or more of the following flags are set to indicate the validOK +** representations of the value stored in the Mem struct. +** +** If the MEM_Null flag is set, then the value is an SQL NULL value. +** For a pointer type created using sqlite3_bind_pointer() or +** sqlite3_result_pointer() the MEM_Term and MEM_Subtype flags are also set. +** +** If the MEM_Str flag is set then Mem.z points at a string representation. +** Usually this is encoded in the same unicode encoding as the main +** database (see below for exceptions). If the MEM_Term flag is also +** set, then the string is nul terminated. The MEM_Int and MEM_Real +** flags may coexist with the MEM_Str flag. +*/ +#define MEM_Null 0x0001 /* Value is NULL (or a pointer) */ +#define MEM_Str 0x0002 /* Value is a string */ +#define MEM_Int 0x0004 /* Value is an integer */ +#define MEM_Real 0x0008 /* Value is a real number */ +#define MEM_Blob 0x0010 /* Value is a BLOB */ +#define MEM_IntReal 0x0020 /* MEM_Int that stringifies like MEM_Real */ +#define MEM_AffMask 0x003f /* Mask of affinity bits */ +#define MEM_FromBind 0x0040 /* Value originates from sqlite3_bind() */ +#define MEM_Undefined 0x0080 /* Value is undefined */ +#define MEM_Cleared 0x0100 /* NULL set by OP_Null, not from data */ +#define MEM_TypeMask 0xc1bf /* Mask of type bits */ + + +/* Whenever Mem contains a valid string or blob representation, one of +** the following flags must be set to determine the memory management +** policy for Mem.z. The MEM_Term flag tells us whether or not the +** string is \000 or \u0000 terminated +*/ +#define MEM_Term 0x0200 /* String in Mem.z is zero terminated */ +#define MEM_Dyn 0x0400 /* Need to call Mem.xDel() on Mem.z */ +#define MEM_Static 0x0800 /* Mem.z points to a static string */ +#define MEM_Ephem 0x1000 /* Mem.z points to an ephemeral string */ +#define MEM_Agg 0x2000 /* Mem.z points to an agg function context */ +#define MEM_Zero 0x4000 /* Mem.i contains count of 0s appended to blob */ +#define MEM_Subtype 0x8000 /* Mem.eSubtype is valid */ +#ifdef SQLITE_OMIT_INCRBLOB + #undef MEM_Zero + #define MEM_Zero 0x0000 +#endif + +/* Return TRUE if Mem X contains dynamically allocated content - anything +** that needs to be deallocated to avoid a leak. +*/ +#define VdbeMemDynamic(X) \ + (((X)->flags&(MEM_Agg|MEM_Dyn))!=0) + +/* +** Clear any existing type flags from a Mem and replace them with f +*/ +#define MemSetTypeFlag(p, f) \ + ((p)->flags = ((p)->flags&~(MEM_TypeMask|MEM_Zero))|f) + +/* +** True if Mem X is a NULL-nochng type. +*/ +#define MemNullNochng(X) \ + (((X)->flags&MEM_TypeMask)==(MEM_Null|MEM_Zero) \ + && (X)->n==0 && (X)->u.nZero==0) + +/* +** Return true if a memory cell is not marked as invalid. This macro +** is for use inside assert() statements only. +*/ +#ifdef SQLITE_DEBUG +#define memIsValid(M) ((M)->flags & MEM_Undefined)==0 +#endif + +/* +** Each auxiliary data pointer stored by a user defined function +** implementation calling sqlite3_set_auxdata() is stored in an instance +** of this structure. All such structures associated with a single VM +** are stored in a linked list headed at Vdbe.pAuxData. All are destroyed +** when the VM is halted (if not before). +*/ +struct AuxData { + int iAuxOp; /* Instruction number of OP_Function opcode */ + int iAuxArg; /* Index of function argument. */ + void *pAux; /* Aux data pointer */ + void (*xDeleteAux)(void*); /* Destructor for the aux data */ + AuxData *pNextAux; /* Next element in list */ +}; + +/* +** The "context" argument for an installable function. A pointer to an +** instance of this structure is the first argument to the routines used +** implement the SQL functions. +** +** There is a typedef for this structure in sqlite.h. So all routines, +** even the public interface to SQLite, can use a pointer to this structure. +** But this file is the only place where the internal details of this +** structure are known. +** +** This structure is defined inside of vdbeInt.h because it uses substructures +** (Mem) which are only defined there. +*/ +struct sqlite3_context { + Mem *pOut; /* The return value is stored here */ + FuncDef *pFunc; /* Pointer to function information */ + Mem *pMem; /* Memory cell used to store aggregate context */ + Vdbe *pVdbe; /* The VM that owns this context */ + int iOp; /* Instruction number of OP_Function */ + int isError; /* Error code returned by the function. */ + u8 skipFlag; /* Skip accumulator loading if true */ + u8 argc; /* Number of arguments */ + sqlite3_value *argv[1]; /* Argument set */ +}; + +/* A bitfield type for use inside of structures. Always follow with :N where +** N is the number of bits. +*/ +typedef unsigned bft; /* Bit Field Type */ + +/* The ScanStatus object holds a single value for the +** sqlite3_stmt_scanstatus() interface. +*/ +typedef struct ScanStatus ScanStatus; +struct ScanStatus { + int addrExplain; /* OP_Explain for loop */ + int addrLoop; /* Address of "loops" counter */ + int addrVisit; /* Address of "rows visited" counter */ + int iSelectID; /* The "Select-ID" for this loop */ + LogEst nEst; /* Estimated output rows per loop */ + char *zName; /* Name of table or index */ +}; + +/* The DblquoteStr object holds the text of a double-quoted +** string for a prepared statement. A linked list of these objects +** is constructed during statement parsing and is held on Vdbe.pDblStr. +** When computing a normalized SQL statement for an SQL statement, that +** list is consulted for each double-quoted identifier to see if the +** identifier should really be a string literal. +*/ +typedef struct DblquoteStr DblquoteStr; +struct DblquoteStr { + DblquoteStr *pNextStr; /* Next string literal in the list */ + char z[8]; /* Dequoted value for the string */ +}; + +/* +** An instance of the virtual machine. This structure contains the complete +** state of the virtual machine. +** +** The "sqlite3_stmt" structure pointer that is returned by sqlite3_prepare() +** is really a pointer to an instance of this structure. +*/ +struct Vdbe { + sqlite3 *db; /* The database connection that owns this statement */ + Vdbe *pPrev,*pNext; /* Linked list of VDBEs with the same Vdbe.db */ + Parse *pParse; /* Parsing context used to create this Vdbe */ + ynVar nVar; /* Number of entries in aVar[] */ + u32 iVdbeMagic; /* Magic number defining state of the SQL statement */ + int nMem; /* Number of memory locations currently allocated */ + int nCursor; /* Number of slots in apCsr[] */ + u32 cacheCtr; /* VdbeCursor row cache generation counter */ + int pc; /* The program counter */ + int rc; /* Value to return */ + i64 nChange; /* Number of db changes made since last reset */ + int iStatement; /* Statement number (or 0 if has no opened stmt) */ + i64 iCurrentTime; /* Value of julianday('now') for this statement */ + i64 nFkConstraint; /* Number of imm. FK constraints this VM */ + i64 nStmtDefCons; /* Number of def. constraints when stmt started */ + i64 nStmtDefImmCons; /* Number of def. imm constraints when stmt started */ + Mem *aMem; /* The memory locations */ + Mem **apArg; /* Arguments to currently executing user function */ + VdbeCursor **apCsr; /* One element of this array for each open cursor */ + Mem *aVar; /* Values for the OP_Variable opcode. */ + + /* When allocating a new Vdbe object, all of the fields below should be + ** initialized to zero or NULL */ + + Op *aOp; /* Space to hold the virtual machine's program */ + int nOp; /* Number of instructions in the program */ + int nOpAlloc; /* Slots allocated for aOp[] */ + Mem *aColName; /* Column names to return */ + Mem *pResultSet; /* Pointer to an array of results */ + char *zErrMsg; /* Error message written here */ + VList *pVList; /* Name of variables */ +#ifndef SQLITE_OMIT_TRACE + i64 startTime; /* Time when query started - used for profiling */ +#endif +#ifdef SQLITE_DEBUG + int rcApp; /* errcode set by sqlite3_result_error_code() */ + u32 nWrite; /* Number of write operations that have occurred */ +#endif + u16 nResColumn; /* Number of columns in one row of the result set */ + u8 errorAction; /* Recovery action to do in case of an error */ + u8 minWriteFileFormat; /* Minimum file format for writable database files */ + u8 prepFlags; /* SQLITE_PREPARE_* flags */ + u8 doingRerun; /* True if rerunning after an auto-reprepare */ + bft expired:2; /* 1: recompile VM immediately 2: when convenient */ + bft explain:2; /* True if EXPLAIN present on SQL command */ + bft changeCntOn:1; /* True to update the change-counter */ + bft runOnlyOnce:1; /* Automatically expire on reset */ + bft usesStmtJournal:1; /* True if uses a statement journal */ + bft readOnly:1; /* True for statements that do not write */ + bft bIsReader:1; /* True for statements that read */ + yDbMask btreeMask; /* Bitmask of db->aDb[] entries referenced */ + yDbMask lockMask; /* Subset of btreeMask that requires a lock */ + u32 aCounter[9]; /* Counters used by sqlite3_stmt_status() */ + char *zSql; /* Text of the SQL statement that generated this */ +#ifdef SQLITE_ENABLE_NORMALIZE + char *zNormSql; /* Normalization of the associated SQL statement */ + DblquoteStr *pDblStr; /* List of double-quoted string literals */ +#endif + void *pFree; /* Free this when deleting the vdbe */ + VdbeFrame *pFrame; /* Parent frame */ + VdbeFrame *pDelFrame; /* List of frame objects to free on VM reset */ + int nFrame; /* Number of frames in pFrame list */ + u32 expmask; /* Binding to these vars invalidates VM */ + SubProgram *pProgram; /* Linked list of all sub-programs used by VM */ + AuxData *pAuxData; /* Linked list of auxdata allocations */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + i64 *anExec; /* Number of times each op has been executed */ + int nScan; /* Entries in aScan[] */ + ScanStatus *aScan; /* Scan definitions for sqlite3_stmt_scanstatus() */ +#endif +}; + +/* +** The following are allowed values for Vdbe.magic +*/ +#define VDBE_MAGIC_INIT 0x16bceaa5 /* Building a VDBE program */ +#define VDBE_MAGIC_RUN 0x2df20da3 /* VDBE is ready to execute */ +#define VDBE_MAGIC_HALT 0x319c2973 /* VDBE has completed execution */ +#define VDBE_MAGIC_RESET 0x48fa9f76 /* Reset and ready to run again */ +#define VDBE_MAGIC_DEAD 0x5606c3c8 /* The VDBE has been deallocated */ + +/* +** Structure used to store the context required by the +** sqlite3_preupdate_*() API functions. +*/ +struct PreUpdate { + Vdbe *v; + VdbeCursor *pCsr; /* Cursor to read old values from */ + int op; /* One of SQLITE_INSERT, UPDATE, DELETE */ + u8 *aRecord; /* old.* database record */ + KeyInfo keyinfo; + UnpackedRecord *pUnpacked; /* Unpacked version of aRecord[] */ + UnpackedRecord *pNewUnpacked; /* Unpacked version of new.* record */ + int iNewReg; /* Register for new.* values */ + int iBlobWrite; /* Value returned by preupdate_blobwrite() */ + i64 iKey1; /* First key value passed to hook */ + i64 iKey2; /* Second key value passed to hook */ + Mem *aNew; /* Array of new.* values */ + Table *pTab; /* Schema object being upated */ + Index *pPk; /* PK index if pTab is WITHOUT ROWID */ +}; + +/* +** An instance of this object is used to pass an vector of values into +** OP_VFilter, the xFilter method of a virtual table. The vector is the +** set of values on the right-hand side of an IN constraint. +** +** The value as passed into xFilter is an sqlite3_value with a "pointer" +** type, such as is generated by sqlite3_result_pointer() and read by +** sqlite3_value_pointer. Such values have MEM_Term|MEM_Subtype|MEM_Null +** and a subtype of 'p'. The sqlite3_vtab_in_first() and _next() interfaces +** know how to use this object to step through all the values in the +** right operand of the IN constraint. +*/ +typedef struct ValueList ValueList; +struct ValueList { + BtCursor *pCsr; /* An ephemeral table holding all values */ + sqlite3_value *pOut; /* Register to hold each decoded output value */ +}; + +/* +** Function prototypes +*/ +SQLITE_PRIVATE void sqlite3VdbeError(Vdbe*, const char *, ...); +SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *, VdbeCursor*); +void sqliteVdbePopStack(Vdbe*,int); +SQLITE_PRIVATE int SQLITE_NOINLINE sqlite3VdbeFinishMoveto(VdbeCursor*); +SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor**, u32*); +SQLITE_PRIVATE int sqlite3VdbeCursorRestore(VdbeCursor*); +SQLITE_PRIVATE u32 sqlite3VdbeSerialTypeLen(u32); +SQLITE_PRIVATE u8 sqlite3VdbeOneByteSerialTypeLen(u8); +SQLITE_PRIVATE u32 sqlite3VdbeSerialPut(unsigned char*, Mem*, u32); +SQLITE_PRIVATE void sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*); +SQLITE_PRIVATE void sqlite3VdbeDeleteAuxData(sqlite3*, AuxData**, int, int); + +int sqlite2BtreeKeyCompare(BtCursor *, const void *, int, int, int *); +SQLITE_PRIVATE int sqlite3VdbeIdxKeyCompare(sqlite3*,VdbeCursor*,UnpackedRecord*,int*); +SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3*, BtCursor*, i64*); +SQLITE_PRIVATE int sqlite3VdbeExec(Vdbe*); +#if !defined(SQLITE_OMIT_EXPLAIN) || defined(SQLITE_ENABLE_BYTECODE_VTAB) +SQLITE_PRIVATE int sqlite3VdbeNextOpcode(Vdbe*,Mem*,int,int*,int*,Op**); +SQLITE_PRIVATE char *sqlite3VdbeDisplayP4(sqlite3*,Op*); +#endif +#if defined(SQLITE_ENABLE_EXPLAIN_COMMENTS) +SQLITE_PRIVATE char *sqlite3VdbeDisplayComment(sqlite3*,const Op*,const char*); +#endif +#if !defined(SQLITE_OMIT_EXPLAIN) +SQLITE_PRIVATE int sqlite3VdbeList(Vdbe*); +#endif +SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe*); +SQLITE_PRIVATE int sqlite3VdbeChangeEncoding(Mem *, int); +SQLITE_PRIVATE int sqlite3VdbeMemTooBig(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem*, const Mem*); +SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem*, const Mem*, int); +SQLITE_PRIVATE void sqlite3VdbeMemMove(Mem*, Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemNulTerminate(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemSetStr(Mem*, const char*, i64, u8, void(*)(void*)); +SQLITE_PRIVATE void sqlite3VdbeMemSetInt64(Mem*, i64); +#ifdef SQLITE_OMIT_FLOATING_POINT +# define sqlite3VdbeMemSetDouble sqlite3VdbeMemSetInt64 +#else +SQLITE_PRIVATE void sqlite3VdbeMemSetDouble(Mem*, double); +#endif +SQLITE_PRIVATE void sqlite3VdbeMemSetPointer(Mem*, void*, const char*, void(*)(void*)); +SQLITE_PRIVATE void sqlite3VdbeMemInit(Mem*,sqlite3*,u16); +SQLITE_PRIVATE void sqlite3VdbeMemSetNull(Mem*); +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE void sqlite3VdbeMemSetZeroBlob(Mem*,int); +#else +SQLITE_PRIVATE int sqlite3VdbeMemSetZeroBlob(Mem*,int); +#endif +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3VdbeMemIsRowSet(const Mem*); +#endif +SQLITE_PRIVATE int sqlite3VdbeMemSetRowSet(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem*, u8, u8); +SQLITE_PRIVATE int sqlite3IntFloatCompare(i64,double); +SQLITE_PRIVATE i64 sqlite3VdbeIntValue(const Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemIntegerify(Mem*); +SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem*); +SQLITE_PRIVATE int sqlite3VdbeBooleanValue(Mem*, int ifNull); +SQLITE_PRIVATE void sqlite3VdbeIntegerAffinity(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemRealify(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemCast(Mem*,u8,u8); +SQLITE_PRIVATE int sqlite3VdbeMemFromBtree(BtCursor*,u32,u32,Mem*); +SQLITE_PRIVATE int sqlite3VdbeMemFromBtreeZeroOffset(BtCursor*,u32,Mem*); +SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p); +SQLITE_PRIVATE int sqlite3VdbeMemFinalize(Mem*, FuncDef*); +#ifndef SQLITE_OMIT_WINDOWFUNC +SQLITE_PRIVATE int sqlite3VdbeMemAggValue(Mem*, Mem*, FuncDef*); +#endif +#if !defined(SQLITE_OMIT_EXPLAIN) || defined(SQLITE_ENABLE_BYTECODE_VTAB) +SQLITE_PRIVATE const char *sqlite3OpcodeName(int); +#endif +SQLITE_PRIVATE int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve); +SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int n); +SQLITE_PRIVATE int sqlite3VdbeCloseStatement(Vdbe *, int); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3VdbeFrameIsValid(VdbeFrame*); +#endif +SQLITE_PRIVATE void sqlite3VdbeFrameMemDel(void*); /* Destructor on Mem */ +SQLITE_PRIVATE void sqlite3VdbeFrameDelete(VdbeFrame*); /* Actually deletes the Frame */ +SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *); +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +SQLITE_PRIVATE void sqlite3VdbePreUpdateHook( + Vdbe*,VdbeCursor*,int,const char*,Table*,i64,int,int); +#endif +SQLITE_PRIVATE int sqlite3VdbeTransferError(Vdbe *p); + +SQLITE_PRIVATE int sqlite3VdbeSorterInit(sqlite3 *, int, VdbeCursor *); +SQLITE_PRIVATE void sqlite3VdbeSorterReset(sqlite3 *, VdbeSorter *); +SQLITE_PRIVATE void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *); +SQLITE_PRIVATE int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *); +SQLITE_PRIVATE int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *); +SQLITE_PRIVATE int sqlite3VdbeSorterRewind(const VdbeCursor *, int *); +SQLITE_PRIVATE int sqlite3VdbeSorterWrite(const VdbeCursor *, Mem *); +SQLITE_PRIVATE int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *); + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE void sqlite3VdbeIncrWriteCounter(Vdbe*, VdbeCursor*); +SQLITE_PRIVATE void sqlite3VdbeAssertAbortable(Vdbe*); +#else +# define sqlite3VdbeIncrWriteCounter(V,C) +# define sqlite3VdbeAssertAbortable(V) +#endif + +#if !defined(SQLITE_OMIT_SHARED_CACHE) +SQLITE_PRIVATE void sqlite3VdbeEnter(Vdbe*); +#else +# define sqlite3VdbeEnter(X) +#endif + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0 +SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe*); +#else +# define sqlite3VdbeLeave(X) +#endif + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe*,Mem*); +SQLITE_PRIVATE int sqlite3VdbeCheckMemInvariants(Mem*); +#endif + +#ifndef SQLITE_OMIT_FOREIGN_KEY +SQLITE_PRIVATE int sqlite3VdbeCheckFk(Vdbe *, int); +#else +# define sqlite3VdbeCheckFk(p,i) 0 +#endif + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE void sqlite3VdbePrintSql(Vdbe*); +SQLITE_PRIVATE void sqlite3VdbeMemPrettyPrint(Mem *pMem, StrAccum *pStr); +#endif +#ifndef SQLITE_OMIT_UTF16 +SQLITE_PRIVATE int sqlite3VdbeMemTranslate(Mem*, u8); +SQLITE_PRIVATE int sqlite3VdbeMemHandleBom(Mem *pMem); +#endif + +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *); + #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0) +#else + #define sqlite3VdbeMemExpandBlob(x) SQLITE_OK + #define ExpandBlob(P) SQLITE_OK +#endif + +#endif /* !defined(SQLITE_VDBEINT_H) */ + +/************** End of vdbeInt.h *********************************************/ +/************** Continuing where we left off in status.c *********************/ + +/* +** Variables in which to record status information. +*/ +#if SQLITE_PTRSIZE>4 +typedef sqlite3_int64 sqlite3StatValueType; +#else +typedef u32 sqlite3StatValueType; +#endif +typedef struct sqlite3StatType sqlite3StatType; +static SQLITE_WSD struct sqlite3StatType { + sqlite3StatValueType nowValue[10]; /* Current value */ + sqlite3StatValueType mxValue[10]; /* Maximum value */ +} sqlite3Stat = { {0,}, {0,} }; + +/* +** Elements of sqlite3Stat[] are protected by either the memory allocator +** mutex, or by the pcache1 mutex. The following array determines which. +*/ +static const char statMutex[] = { + 0, /* SQLITE_STATUS_MEMORY_USED */ + 1, /* SQLITE_STATUS_PAGECACHE_USED */ + 1, /* SQLITE_STATUS_PAGECACHE_OVERFLOW */ + 0, /* SQLITE_STATUS_SCRATCH_USED */ + 0, /* SQLITE_STATUS_SCRATCH_OVERFLOW */ + 0, /* SQLITE_STATUS_MALLOC_SIZE */ + 0, /* SQLITE_STATUS_PARSER_STACK */ + 1, /* SQLITE_STATUS_PAGECACHE_SIZE */ + 0, /* SQLITE_STATUS_SCRATCH_SIZE */ + 0, /* SQLITE_STATUS_MALLOC_COUNT */ +}; + + +/* The "wsdStat" macro will resolve to the status information +** state vector. If writable static data is unsupported on the target, +** we have to locate the state vector at run-time. In the more common +** case where writable static data is supported, wsdStat can refer directly +** to the "sqlite3Stat" state vector declared above. +*/ +#ifdef SQLITE_OMIT_WSD +# define wsdStatInit sqlite3StatType *x = &GLOBAL(sqlite3StatType,sqlite3Stat) +# define wsdStat x[0] +#else +# define wsdStatInit +# define wsdStat sqlite3Stat +#endif + +/* +** Return the current value of a status parameter. The caller must +** be holding the appropriate mutex. +*/ +SQLITE_PRIVATE sqlite3_int64 sqlite3StatusValue(int op){ + wsdStatInit; + assert( op>=0 && op=0 && op=0 && op=0 && opwsdStat.mxValue[op] ){ + wsdStat.mxValue[op] = wsdStat.nowValue[op]; + } +} +SQLITE_PRIVATE void sqlite3StatusDown(int op, int N){ + wsdStatInit; + assert( N>=0 ); + assert( op>=0 && op=0 && op=0 ); + newValue = (sqlite3StatValueType)X; + assert( op>=0 && op=0 && opwsdStat.mxValue[op] ){ + wsdStat.mxValue[op] = newValue; + } +} + +/* +** Query status information. +*/ +SQLITE_API int sqlite3_status64( + int op, + sqlite3_int64 *pCurrent, + sqlite3_int64 *pHighwater, + int resetFlag +){ + sqlite3_mutex *pMutex; + wsdStatInit; + if( op<0 || op>=ArraySize(wsdStat.nowValue) ){ + return SQLITE_MISUSE_BKPT; + } +#ifdef SQLITE_ENABLE_API_ARMOR + if( pCurrent==0 || pHighwater==0 ) return SQLITE_MISUSE_BKPT; +#endif + pMutex = statMutex[op] ? sqlite3Pcache1Mutex() : sqlite3MallocMutex(); + sqlite3_mutex_enter(pMutex); + *pCurrent = wsdStat.nowValue[op]; + *pHighwater = wsdStat.mxValue[op]; + if( resetFlag ){ + wsdStat.mxValue[op] = wsdStat.nowValue[op]; + } + sqlite3_mutex_leave(pMutex); + (void)pMutex; /* Prevent warning when SQLITE_THREADSAFE=0 */ + return SQLITE_OK; +} +SQLITE_API int sqlite3_status(int op, int *pCurrent, int *pHighwater, int resetFlag){ + sqlite3_int64 iCur = 0, iHwtr = 0; + int rc; +#ifdef SQLITE_ENABLE_API_ARMOR + if( pCurrent==0 || pHighwater==0 ) return SQLITE_MISUSE_BKPT; +#endif + rc = sqlite3_status64(op, &iCur, &iHwtr, resetFlag); + if( rc==0 ){ + *pCurrent = (int)iCur; + *pHighwater = (int)iHwtr; + } + return rc; +} + +/* +** Return the number of LookasideSlot elements on the linked list +*/ +static u32 countLookasideSlots(LookasideSlot *p){ + u32 cnt = 0; + while( p ){ + p = p->pNext; + cnt++; + } + return cnt; +} + +/* +** Count the number of slots of lookaside memory that are outstanding +*/ +SQLITE_PRIVATE int sqlite3LookasideUsed(sqlite3 *db, int *pHighwater){ + u32 nInit = countLookasideSlots(db->lookaside.pInit); + u32 nFree = countLookasideSlots(db->lookaside.pFree); +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + nInit += countLookasideSlots(db->lookaside.pSmallInit); + nFree += countLookasideSlots(db->lookaside.pSmallFree); +#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */ + if( pHighwater ) *pHighwater = db->lookaside.nSlot - nInit; + return db->lookaside.nSlot - (nInit+nFree); +} + +/* +** Query status information for a single database connection +*/ +SQLITE_API int sqlite3_db_status( + sqlite3 *db, /* The database connection whose status is desired */ + int op, /* Status verb */ + int *pCurrent, /* Write current value here */ + int *pHighwater, /* Write high-water mark here */ + int resetFlag /* Reset high-water mark if true */ +){ + int rc = SQLITE_OK; /* Return code */ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || pCurrent==0|| pHighwater==0 ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + switch( op ){ + case SQLITE_DBSTATUS_LOOKASIDE_USED: { + *pCurrent = sqlite3LookasideUsed(db, pHighwater); + if( resetFlag ){ + LookasideSlot *p = db->lookaside.pFree; + if( p ){ + while( p->pNext ) p = p->pNext; + p->pNext = db->lookaside.pInit; + db->lookaside.pInit = db->lookaside.pFree; + db->lookaside.pFree = 0; + } +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + p = db->lookaside.pSmallFree; + if( p ){ + while( p->pNext ) p = p->pNext; + p->pNext = db->lookaside.pSmallInit; + db->lookaside.pSmallInit = db->lookaside.pSmallFree; + db->lookaside.pSmallFree = 0; + } +#endif + } + break; + } + + case SQLITE_DBSTATUS_LOOKASIDE_HIT: + case SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE: + case SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL: { + testcase( op==SQLITE_DBSTATUS_LOOKASIDE_HIT ); + testcase( op==SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE ); + testcase( op==SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL ); + assert( (op-SQLITE_DBSTATUS_LOOKASIDE_HIT)>=0 ); + assert( (op-SQLITE_DBSTATUS_LOOKASIDE_HIT)<3 ); + *pCurrent = 0; + *pHighwater = db->lookaside.anStat[op - SQLITE_DBSTATUS_LOOKASIDE_HIT]; + if( resetFlag ){ + db->lookaside.anStat[op - SQLITE_DBSTATUS_LOOKASIDE_HIT] = 0; + } + break; + } + + /* + ** Return an approximation for the amount of memory currently used + ** by all pagers associated with the given database connection. The + ** highwater mark is meaningless and is returned as zero. + */ + case SQLITE_DBSTATUS_CACHE_USED_SHARED: + case SQLITE_DBSTATUS_CACHE_USED: { + int totalUsed = 0; + int i; + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + Pager *pPager = sqlite3BtreePager(pBt); + int nByte = sqlite3PagerMemUsed(pPager); + if( op==SQLITE_DBSTATUS_CACHE_USED_SHARED ){ + nByte = nByte / sqlite3BtreeConnectionCount(pBt); + } + totalUsed += nByte; + } + } + sqlite3BtreeLeaveAll(db); + *pCurrent = totalUsed; + *pHighwater = 0; + break; + } + + /* + ** *pCurrent gets an accurate estimate of the amount of memory used + ** to store the schema for all databases (main, temp, and any ATTACHed + ** databases. *pHighwater is set to zero. + */ + case SQLITE_DBSTATUS_SCHEMA_USED: { + int i; /* Used to iterate through schemas */ + int nByte = 0; /* Used to accumulate return value */ + + sqlite3BtreeEnterAll(db); + db->pnBytesFreed = &nByte; + for(i=0; inDb; i++){ + Schema *pSchema = db->aDb[i].pSchema; + if( ALWAYS(pSchema!=0) ){ + HashElem *p; + + nByte += sqlite3GlobalConfig.m.xRoundup(sizeof(HashElem)) * ( + pSchema->tblHash.count + + pSchema->trigHash.count + + pSchema->idxHash.count + + pSchema->fkeyHash.count + ); + nByte += sqlite3_msize(pSchema->tblHash.ht); + nByte += sqlite3_msize(pSchema->trigHash.ht); + nByte += sqlite3_msize(pSchema->idxHash.ht); + nByte += sqlite3_msize(pSchema->fkeyHash.ht); + + for(p=sqliteHashFirst(&pSchema->trigHash); p; p=sqliteHashNext(p)){ + sqlite3DeleteTrigger(db, (Trigger*)sqliteHashData(p)); + } + for(p=sqliteHashFirst(&pSchema->tblHash); p; p=sqliteHashNext(p)){ + sqlite3DeleteTable(db, (Table *)sqliteHashData(p)); + } + } + } + db->pnBytesFreed = 0; + sqlite3BtreeLeaveAll(db); + + *pHighwater = 0; + *pCurrent = nByte; + break; + } + + /* + ** *pCurrent gets an accurate estimate of the amount of memory used + ** to store all prepared statements. + ** *pHighwater is set to zero. + */ + case SQLITE_DBSTATUS_STMT_USED: { + struct Vdbe *pVdbe; /* Used to iterate through VMs */ + int nByte = 0; /* Used to accumulate return value */ + + db->pnBytesFreed = &nByte; + for(pVdbe=db->pVdbe; pVdbe; pVdbe=pVdbe->pNext){ + sqlite3VdbeClearObject(db, pVdbe); + sqlite3DbFree(db, pVdbe); + } + db->pnBytesFreed = 0; + + *pHighwater = 0; /* IMP: R-64479-57858 */ + *pCurrent = nByte; + + break; + } + + /* + ** Set *pCurrent to the total cache hits or misses encountered by all + ** pagers the database handle is connected to. *pHighwater is always set + ** to zero. + */ + case SQLITE_DBSTATUS_CACHE_SPILL: + op = SQLITE_DBSTATUS_CACHE_WRITE+1; + /* no break */ deliberate_fall_through + case SQLITE_DBSTATUS_CACHE_HIT: + case SQLITE_DBSTATUS_CACHE_MISS: + case SQLITE_DBSTATUS_CACHE_WRITE:{ + int i; + int nRet = 0; + assert( SQLITE_DBSTATUS_CACHE_MISS==SQLITE_DBSTATUS_CACHE_HIT+1 ); + assert( SQLITE_DBSTATUS_CACHE_WRITE==SQLITE_DBSTATUS_CACHE_HIT+2 ); + + for(i=0; inDb; i++){ + if( db->aDb[i].pBt ){ + Pager *pPager = sqlite3BtreePager(db->aDb[i].pBt); + sqlite3PagerCacheStat(pPager, op, resetFlag, &nRet); + } + } + *pHighwater = 0; /* IMP: R-42420-56072 */ + /* IMP: R-54100-20147 */ + /* IMP: R-29431-39229 */ + *pCurrent = nRet; + break; + } + + /* Set *pCurrent to non-zero if there are unresolved deferred foreign + ** key constraints. Set *pCurrent to zero if all foreign key constraints + ** have been satisfied. The *pHighwater is always set to zero. + */ + case SQLITE_DBSTATUS_DEFERRED_FKS: { + *pHighwater = 0; /* IMP: R-11967-56545 */ + *pCurrent = db->nDeferredImmCons>0 || db->nDeferredCons>0; + break; + } + + default: { + rc = SQLITE_ERROR; + } + } + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/************** End of status.c **********************************************/ +/************** Begin file date.c ********************************************/ +/* +** 2003 October 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement date and time +** functions for SQLite. +** +** There is only one exported symbol in this file - the function +** sqlite3RegisterDateTimeFunctions() found at the bottom of the file. +** All other code has file scope. +** +** SQLite processes all times and dates as julian day numbers. The +** dates and times are stored as the number of days since noon +** in Greenwich on November 24, 4714 B.C. according to the Gregorian +** calendar system. +** +** 1970-01-01 00:00:00 is JD 2440587.5 +** 2000-01-01 00:00:00 is JD 2451544.5 +** +** This implementation requires years to be expressed as a 4-digit number +** which means that only dates between 0000-01-01 and 9999-12-31 can +** be represented, even though julian day numbers allow a much wider +** range of dates. +** +** The Gregorian calendar system is used for all dates and times, +** even those that predate the Gregorian calendar. Historians usually +** use the julian calendar for dates prior to 1582-10-15 and for some +** dates afterwards, depending on locale. Beware of this difference. +** +** The conversion algorithms are implemented based on descriptions +** in the following text: +** +** Jean Meeus +** Astronomical Algorithms, 2nd Edition, 1998 +** ISBN 0-943396-61-1 +** Willmann-Bell, Inc +** Richmond, Virginia (USA) +*/ +/* #include "sqliteInt.h" */ +/* #include */ +/* #include */ +#include + +#ifndef SQLITE_OMIT_DATETIME_FUNCS + +/* +** The MSVC CRT on Windows CE may not have a localtime() function. +** So declare a substitute. The substitute function itself is +** defined in "os_win.c". +*/ +#if !defined(SQLITE_OMIT_LOCALTIME) && defined(_WIN32_WCE) && \ + (!defined(SQLITE_MSVC_LOCALTIME_API) || !SQLITE_MSVC_LOCALTIME_API) +struct tm *__cdecl localtime(const time_t *); +#endif + +/* +** A structure for holding a single date and time. +*/ +typedef struct DateTime DateTime; +struct DateTime { + sqlite3_int64 iJD; /* The julian day number times 86400000 */ + int Y, M, D; /* Year, month, and day */ + int h, m; /* Hour and minutes */ + int tz; /* Timezone offset in minutes */ + double s; /* Seconds */ + char validJD; /* True (1) if iJD is valid */ + char rawS; /* Raw numeric value stored in s */ + char validYMD; /* True (1) if Y,M,D are valid */ + char validHMS; /* True (1) if h,m,s are valid */ + char validTZ; /* True (1) if tz is valid */ + char tzSet; /* Timezone was set explicitly */ + char isError; /* An overflow has occurred */ +}; + + +/* +** Convert zDate into one or more integers according to the conversion +** specifier zFormat. +** +** zFormat[] contains 4 characters for each integer converted, except for +** the last integer which is specified by three characters. The meaning +** of a four-character format specifiers ABCD is: +** +** A: number of digits to convert. Always "2" or "4". +** B: minimum value. Always "0" or "1". +** C: maximum value, decoded as: +** a: 12 +** b: 14 +** c: 24 +** d: 31 +** e: 59 +** f: 9999 +** D: the separator character, or \000 to indicate this is the +** last number to convert. +** +** Example: To translate an ISO-8601 date YYYY-MM-DD, the format would +** be "40f-21a-20c". The "40f-" indicates the 4-digit year followed by "-". +** The "21a-" indicates the 2-digit month followed by "-". The "20c" indicates +** the 2-digit day which is the last integer in the set. +** +** The function returns the number of successful conversions. +*/ +static int getDigits(const char *zDate, const char *zFormat, ...){ + /* The aMx[] array translates the 3rd character of each format + ** spec into a max size: a b c d e f */ + static const u16 aMx[] = { 12, 14, 24, 31, 59, 9999 }; + va_list ap; + int cnt = 0; + char nextC; + va_start(ap, zFormat); + do{ + char N = zFormat[0] - '0'; + char min = zFormat[1] - '0'; + int val = 0; + u16 max; + + assert( zFormat[2]>='a' && zFormat[2]<='f' ); + max = aMx[zFormat[2] - 'a']; + nextC = zFormat[3]; + val = 0; + while( N-- ){ + if( !sqlite3Isdigit(*zDate) ){ + goto end_getDigits; + } + val = val*10 + *zDate - '0'; + zDate++; + } + if( val<(int)min || val>(int)max || (nextC!=0 && nextC!=*zDate) ){ + goto end_getDigits; + } + *va_arg(ap,int*) = val; + zDate++; + cnt++; + zFormat += 4; + }while( nextC ); +end_getDigits: + va_end(ap); + return cnt; +} + +/* +** Parse a timezone extension on the end of a date-time. +** The extension is of the form: +** +** (+/-)HH:MM +** +** Or the "zulu" notation: +** +** Z +** +** If the parse is successful, write the number of minutes +** of change in p->tz and return 0. If a parser error occurs, +** return non-zero. +** +** A missing specifier is not considered an error. +*/ +static int parseTimezone(const char *zDate, DateTime *p){ + int sgn = 0; + int nHr, nMn; + int c; + while( sqlite3Isspace(*zDate) ){ zDate++; } + p->tz = 0; + c = *zDate; + if( c=='-' ){ + sgn = -1; + }else if( c=='+' ){ + sgn = +1; + }else if( c=='Z' || c=='z' ){ + zDate++; + goto zulu_time; + }else{ + return c!=0; + } + zDate++; + if( getDigits(zDate, "20b:20e", &nHr, &nMn)!=2 ){ + return 1; + } + zDate += 5; + p->tz = sgn*(nMn + nHr*60); +zulu_time: + while( sqlite3Isspace(*zDate) ){ zDate++; } + p->tzSet = 1; + return *zDate!=0; +} + +/* +** Parse times of the form HH:MM or HH:MM:SS or HH:MM:SS.FFFF. +** The HH, MM, and SS must each be exactly 2 digits. The +** fractional seconds FFFF can be one or more digits. +** +** Return 1 if there is a parsing error and 0 on success. +*/ +static int parseHhMmSs(const char *zDate, DateTime *p){ + int h, m, s; + double ms = 0.0; + if( getDigits(zDate, "20c:20e", &h, &m)!=2 ){ + return 1; + } + zDate += 5; + if( *zDate==':' ){ + zDate++; + if( getDigits(zDate, "20e", &s)!=1 ){ + return 1; + } + zDate += 2; + if( *zDate=='.' && sqlite3Isdigit(zDate[1]) ){ + double rScale = 1.0; + zDate++; + while( sqlite3Isdigit(*zDate) ){ + ms = ms*10.0 + *zDate - '0'; + rScale *= 10.0; + zDate++; + } + ms /= rScale; + } + }else{ + s = 0; + } + p->validJD = 0; + p->rawS = 0; + p->validHMS = 1; + p->h = h; + p->m = m; + p->s = s + ms; + if( parseTimezone(zDate, p) ) return 1; + p->validTZ = (p->tz!=0)?1:0; + return 0; +} + +/* +** Put the DateTime object into its error state. +*/ +static void datetimeError(DateTime *p){ + memset(p, 0, sizeof(*p)); + p->isError = 1; +} + +/* +** Convert from YYYY-MM-DD HH:MM:SS to julian day. We always assume +** that the YYYY-MM-DD is according to the Gregorian calendar. +** +** Reference: Meeus page 61 +*/ +static void computeJD(DateTime *p){ + int Y, M, D, A, B, X1, X2; + + if( p->validJD ) return; + if( p->validYMD ){ + Y = p->Y; + M = p->M; + D = p->D; + }else{ + Y = 2000; /* If no YMD specified, assume 2000-Jan-01 */ + M = 1; + D = 1; + } + if( Y<-4713 || Y>9999 || p->rawS ){ + datetimeError(p); + return; + } + if( M<=2 ){ + Y--; + M += 12; + } + A = Y/100; + B = 2 - A + (A/4); + X1 = 36525*(Y+4716)/100; + X2 = 306001*(M+1)/10000; + p->iJD = (sqlite3_int64)((X1 + X2 + D + B - 1524.5 ) * 86400000); + p->validJD = 1; + if( p->validHMS ){ + p->iJD += p->h*3600000 + p->m*60000 + (sqlite3_int64)(p->s*1000); + if( p->validTZ ){ + p->iJD -= p->tz*60000; + p->validYMD = 0; + p->validHMS = 0; + p->validTZ = 0; + } + } +} + +/* +** Parse dates of the form +** +** YYYY-MM-DD HH:MM:SS.FFF +** YYYY-MM-DD HH:MM:SS +** YYYY-MM-DD HH:MM +** YYYY-MM-DD +** +** Write the result into the DateTime structure and return 0 +** on success and 1 if the input string is not a well-formed +** date. +*/ +static int parseYyyyMmDd(const char *zDate, DateTime *p){ + int Y, M, D, neg; + + if( zDate[0]=='-' ){ + zDate++; + neg = 1; + }else{ + neg = 0; + } + if( getDigits(zDate, "40f-21a-21d", &Y, &M, &D)!=3 ){ + return 1; + } + zDate += 10; + while( sqlite3Isspace(*zDate) || 'T'==*(u8*)zDate ){ zDate++; } + if( parseHhMmSs(zDate, p)==0 ){ + /* We got the time */ + }else if( *zDate==0 ){ + p->validHMS = 0; + }else{ + return 1; + } + p->validJD = 0; + p->validYMD = 1; + p->Y = neg ? -Y : Y; + p->M = M; + p->D = D; + if( p->validTZ ){ + computeJD(p); + } + return 0; +} + +/* +** Set the time to the current time reported by the VFS. +** +** Return the number of errors. +*/ +static int setDateTimeToCurrent(sqlite3_context *context, DateTime *p){ + p->iJD = sqlite3StmtCurrentTime(context); + if( p->iJD>0 ){ + p->validJD = 1; + return 0; + }else{ + return 1; + } +} + +/* +** Input "r" is a numeric quantity which might be a julian day number, +** or the number of seconds since 1970. If the value if r is within +** range of a julian day number, install it as such and set validJD. +** If the value is a valid unix timestamp, put it in p->s and set p->rawS. +*/ +static void setRawDateNumber(DateTime *p, double r){ + p->s = r; + p->rawS = 1; + if( r>=0.0 && r<5373484.5 ){ + p->iJD = (sqlite3_int64)(r*86400000.0 + 0.5); + p->validJD = 1; + } +} + +/* +** Attempt to parse the given string into a julian day number. Return +** the number of errors. +** +** The following are acceptable forms for the input string: +** +** YYYY-MM-DD HH:MM:SS.FFF +/-HH:MM +** DDDD.DD +** now +** +** In the first form, the +/-HH:MM is always optional. The fractional +** seconds extension (the ".FFF") is optional. The seconds portion +** (":SS.FFF") is option. The year and date can be omitted as long +** as there is a time string. The time string can be omitted as long +** as there is a year and date. +*/ +static int parseDateOrTime( + sqlite3_context *context, + const char *zDate, + DateTime *p +){ + double r; + if( parseYyyyMmDd(zDate,p)==0 ){ + return 0; + }else if( parseHhMmSs(zDate, p)==0 ){ + return 0; + }else if( sqlite3StrICmp(zDate,"now")==0 && sqlite3NotPureFunc(context) ){ + return setDateTimeToCurrent(context, p); + }else if( sqlite3AtoF(zDate, &r, sqlite3Strlen30(zDate), SQLITE_UTF8)>0 ){ + setRawDateNumber(p, r); + return 0; + } + return 1; +} + +/* The julian day number for 9999-12-31 23:59:59.999 is 5373484.4999999. +** Multiplying this by 86400000 gives 464269060799999 as the maximum value +** for DateTime.iJD. +** +** But some older compilers (ex: gcc 4.2.1 on older Macs) cannot deal with +** such a large integer literal, so we have to encode it. +*/ +#define INT_464269060799999 ((((i64)0x1a640)<<32)|0x1072fdff) + +/* +** Return TRUE if the given julian day number is within range. +** +** The input is the JulianDay times 86400000. +*/ +static int validJulianDay(sqlite3_int64 iJD){ + return iJD>=0 && iJD<=INT_464269060799999; +} + +/* +** Compute the Year, Month, and Day from the julian day number. +*/ +static void computeYMD(DateTime *p){ + int Z, A, B, C, D, E, X1; + if( p->validYMD ) return; + if( !p->validJD ){ + p->Y = 2000; + p->M = 1; + p->D = 1; + }else if( !validJulianDay(p->iJD) ){ + datetimeError(p); + return; + }else{ + Z = (int)((p->iJD + 43200000)/86400000); + A = (int)((Z - 1867216.25)/36524.25); + A = Z + 1 + A - (A/4); + B = A + 1524; + C = (int)((B - 122.1)/365.25); + D = (36525*(C&32767))/100; + E = (int)((B-D)/30.6001); + X1 = (int)(30.6001*E); + p->D = B - D - X1; + p->M = E<14 ? E-1 : E-13; + p->Y = p->M>2 ? C - 4716 : C - 4715; + } + p->validYMD = 1; +} + +/* +** Compute the Hour, Minute, and Seconds from the julian day number. +*/ +static void computeHMS(DateTime *p){ + int s; + if( p->validHMS ) return; + computeJD(p); + s = (int)((p->iJD + 43200000) % 86400000); + p->s = s/1000.0; + s = (int)p->s; + p->s -= s; + p->h = s/3600; + s -= p->h*3600; + p->m = s/60; + p->s += s - p->m*60; + p->rawS = 0; + p->validHMS = 1; +} + +/* +** Compute both YMD and HMS +*/ +static void computeYMD_HMS(DateTime *p){ + computeYMD(p); + computeHMS(p); +} + +/* +** Clear the YMD and HMS and the TZ +*/ +static void clearYMD_HMS_TZ(DateTime *p){ + p->validYMD = 0; + p->validHMS = 0; + p->validTZ = 0; +} + +#ifndef SQLITE_OMIT_LOCALTIME +/* +** On recent Windows platforms, the localtime_s() function is available +** as part of the "Secure CRT". It is essentially equivalent to +** localtime_r() available under most POSIX platforms, except that the +** order of the parameters is reversed. +** +** See http://msdn.microsoft.com/en-us/library/a442x3ye(VS.80).aspx. +** +** If the user has not indicated to use localtime_r() or localtime_s() +** already, check for an MSVC build environment that provides +** localtime_s(). +*/ +#if !HAVE_LOCALTIME_R && !HAVE_LOCALTIME_S \ + && defined(_MSC_VER) && defined(_CRT_INSECURE_DEPRECATE) +#undef HAVE_LOCALTIME_S +#define HAVE_LOCALTIME_S 1 +#endif + +/* +** The following routine implements the rough equivalent of localtime_r() +** using whatever operating-system specific localtime facility that +** is available. This routine returns 0 on success and +** non-zero on any kind of error. +** +** If the sqlite3GlobalConfig.bLocaltimeFault variable is non-zero then this +** routine will always fail. If bLocaltimeFault is nonzero and +** sqlite3GlobalConfig.xAltLocaltime is not NULL, then xAltLocaltime() is +** invoked in place of the OS-defined localtime() function. +** +** EVIDENCE-OF: R-62172-00036 In this implementation, the standard C +** library function localtime_r() is used to assist in the calculation of +** local time. +*/ +static int osLocaltime(time_t *t, struct tm *pTm){ + int rc; +#if !HAVE_LOCALTIME_R && !HAVE_LOCALTIME_S + struct tm *pX; +#if SQLITE_THREADSAFE>0 + sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); +#endif + sqlite3_mutex_enter(mutex); + pX = localtime(t); +#ifndef SQLITE_UNTESTABLE + if( sqlite3GlobalConfig.bLocaltimeFault ){ + if( sqlite3GlobalConfig.xAltLocaltime!=0 + && 0==sqlite3GlobalConfig.xAltLocaltime((const void*)t,(void*)pTm) + ){ + pX = pTm; + }else{ + pX = 0; + } + } +#endif + if( pX ) *pTm = *pX; +#if SQLITE_THREADSAFE>0 + sqlite3_mutex_leave(mutex); +#endif + rc = pX==0; +#else +#ifndef SQLITE_UNTESTABLE + if( sqlite3GlobalConfig.bLocaltimeFault ){ + if( sqlite3GlobalConfig.xAltLocaltime!=0 ){ + return sqlite3GlobalConfig.xAltLocaltime((const void*)t,(void*)pTm); + }else{ + return 1; + } + } +#endif +#if HAVE_LOCALTIME_R + rc = localtime_r(t, pTm)==0; +#else + rc = localtime_s(pTm, t); +#endif /* HAVE_LOCALTIME_R */ +#endif /* HAVE_LOCALTIME_R || HAVE_LOCALTIME_S */ + return rc; +} +#endif /* SQLITE_OMIT_LOCALTIME */ + + +#ifndef SQLITE_OMIT_LOCALTIME +/* +** Assuming the input DateTime is UTC, move it to its localtime equivalent. +*/ +static int toLocaltime( + DateTime *p, /* Date at which to calculate offset */ + sqlite3_context *pCtx /* Write error here if one occurs */ +){ + time_t t; + struct tm sLocal; + int iYearDiff; + + /* Initialize the contents of sLocal to avoid a compiler warning. */ + memset(&sLocal, 0, sizeof(sLocal)); + + computeJD(p); + if( p->iJD<2108667600*(i64)100000 /* 1970-01-01 */ + || p->iJD>2130141456*(i64)100000 /* 2038-01-18 */ + ){ + /* EVIDENCE-OF: R-55269-29598 The localtime_r() C function normally only + ** works for years between 1970 and 2037. For dates outside this range, + ** SQLite attempts to map the year into an equivalent year within this + ** range, do the calculation, then map the year back. + */ + DateTime x = *p; + computeYMD_HMS(&x); + iYearDiff = (2000 + x.Y%4) - x.Y; + x.Y += iYearDiff; + x.validJD = 0; + computeJD(&x); + t = (time_t)(x.iJD/1000 - 21086676*(i64)10000); + }else{ + iYearDiff = 0; + t = (time_t)(p->iJD/1000 - 21086676*(i64)10000); + } + if( osLocaltime(&t, &sLocal) ){ + sqlite3_result_error(pCtx, "local time unavailable", -1); + return SQLITE_ERROR; + } + p->Y = sLocal.tm_year + 1900 - iYearDiff; + p->M = sLocal.tm_mon + 1; + p->D = sLocal.tm_mday; + p->h = sLocal.tm_hour; + p->m = sLocal.tm_min; + p->s = sLocal.tm_sec + (p->iJD%1000)*0.001; + p->validYMD = 1; + p->validHMS = 1; + p->validJD = 0; + p->rawS = 0; + p->validTZ = 0; + p->isError = 0; + return SQLITE_OK; +} +#endif /* SQLITE_OMIT_LOCALTIME */ + +/* +** The following table defines various date transformations of the form +** +** 'NNN days' +** +** Where NNN is an arbitrary floating-point number and "days" can be one +** of several units of time. +*/ +static const struct { + u8 nName; /* Length of the name */ + char zName[7]; /* Name of the transformation */ + float rLimit; /* Maximum NNN value for this transform */ + float rXform; /* Constant used for this transform */ +} aXformType[] = { + { 6, "second", 4.6427e+14, 1.0 }, + { 6, "minute", 7.7379e+12, 60.0 }, + { 4, "hour", 1.2897e+11, 3600.0 }, + { 3, "day", 5373485.0, 86400.0 }, + { 5, "month", 176546.0, 2592000.0 }, + { 4, "year", 14713.0, 31536000.0 }, +}; + +/* +** Process a modifier to a date-time stamp. The modifiers are +** as follows: +** +** NNN days +** NNN hours +** NNN minutes +** NNN.NNNN seconds +** NNN months +** NNN years +** start of month +** start of year +** start of week +** start of day +** weekday N +** unixepoch +** localtime +** utc +** +** Return 0 on success and 1 if there is any kind of error. If the error +** is in a system call (i.e. localtime()), then an error message is written +** to context pCtx. If the error is an unrecognized modifier, no error is +** written to pCtx. +*/ +static int parseModifier( + sqlite3_context *pCtx, /* Function context */ + const char *z, /* The text of the modifier */ + int n, /* Length of zMod in bytes */ + DateTime *p, /* The date/time value to be modified */ + int idx /* Parameter index of the modifier */ +){ + int rc = 1; + double r; + switch(sqlite3UpperToLower[(u8)z[0]] ){ + case 'a': { + /* + ** auto + ** + ** If rawS is available, then interpret as a julian day number, or + ** a unix timestamp, depending on its magnitude. + */ + if( sqlite3_stricmp(z, "auto")==0 ){ + if( idx>1 ) return 1; /* IMP: R-33611-57934 */ + if( !p->rawS || p->validJD ){ + rc = 0; + p->rawS = 0; + }else if( p->s>=-21086676*(i64)10000 /* -4713-11-24 12:00:00 */ + && p->s<=(25340230*(i64)10000)+799 /* 9999-12-31 23:59:59 */ + ){ + r = p->s*1000.0 + 210866760000000.0; + clearYMD_HMS_TZ(p); + p->iJD = (sqlite3_int64)(r + 0.5); + p->validJD = 1; + p->rawS = 0; + rc = 0; + } + } + break; + } + case 'j': { + /* + ** julianday + ** + ** Always interpret the prior number as a julian-day value. If this + ** is not the first modifier, or if the prior argument is not a numeric + ** value in the allowed range of julian day numbers understood by + ** SQLite (0..5373484.5) then the result will be NULL. + */ + if( sqlite3_stricmp(z, "julianday")==0 ){ + if( idx>1 ) return 1; /* IMP: R-31176-64601 */ + if( p->validJD && p->rawS ){ + rc = 0; + p->rawS = 0; + } + } + break; + } +#ifndef SQLITE_OMIT_LOCALTIME + case 'l': { + /* localtime + ** + ** Assuming the current time value is UTC (a.k.a. GMT), shift it to + ** show local time. + */ + if( sqlite3_stricmp(z, "localtime")==0 && sqlite3NotPureFunc(pCtx) ){ + rc = toLocaltime(p, pCtx); + } + break; + } +#endif + case 'u': { + /* + ** unixepoch + ** + ** Treat the current value of p->s as the number of + ** seconds since 1970. Convert to a real julian day number. + */ + if( sqlite3_stricmp(z, "unixepoch")==0 && p->rawS ){ + if( idx>1 ) return 1; /* IMP: R-49255-55373 */ + r = p->s*1000.0 + 210866760000000.0; + if( r>=0.0 && r<464269060800000.0 ){ + clearYMD_HMS_TZ(p); + p->iJD = (sqlite3_int64)(r + 0.5); + p->validJD = 1; + p->rawS = 0; + rc = 0; + } + } +#ifndef SQLITE_OMIT_LOCALTIME + else if( sqlite3_stricmp(z, "utc")==0 && sqlite3NotPureFunc(pCtx) ){ + if( p->tzSet==0 ){ + i64 iOrigJD; /* Original localtime */ + i64 iGuess; /* Guess at the corresponding utc time */ + int cnt = 0; /* Safety to prevent infinite loop */ + int iErr; /* Guess is off by this much */ + + computeJD(p); + iGuess = iOrigJD = p->iJD; + iErr = 0; + do{ + DateTime new; + memset(&new, 0, sizeof(new)); + iGuess -= iErr; + new.iJD = iGuess; + new.validJD = 1; + rc = toLocaltime(&new, pCtx); + if( rc ) return rc; + computeJD(&new); + iErr = new.iJD - iOrigJD; + }while( iErr && cnt++<3 ); + memset(p, 0, sizeof(*p)); + p->iJD = iGuess; + p->validJD = 1; + p->tzSet = 1; + } + rc = SQLITE_OK; + } +#endif + break; + } + case 'w': { + /* + ** weekday N + ** + ** Move the date to the same time on the next occurrence of + ** weekday N where 0==Sunday, 1==Monday, and so forth. If the + ** date is already on the appropriate weekday, this is a no-op. + */ + if( sqlite3_strnicmp(z, "weekday ", 8)==0 + && sqlite3AtoF(&z[8], &r, sqlite3Strlen30(&z[8]), SQLITE_UTF8)>0 + && (n=(int)r)==r && n>=0 && r<7 ){ + sqlite3_int64 Z; + computeYMD_HMS(p); + p->validTZ = 0; + p->validJD = 0; + computeJD(p); + Z = ((p->iJD + 129600000)/86400000) % 7; + if( Z>n ) Z -= 7; + p->iJD += (n - Z)*86400000; + clearYMD_HMS_TZ(p); + rc = 0; + } + break; + } + case 's': { + /* + ** start of TTTTT + ** + ** Move the date backwards to the beginning of the current day, + ** or month or year. + */ + if( sqlite3_strnicmp(z, "start of ", 9)!=0 ) break; + if( !p->validJD && !p->validYMD && !p->validHMS ) break; + z += 9; + computeYMD(p); + p->validHMS = 1; + p->h = p->m = 0; + p->s = 0.0; + p->rawS = 0; + p->validTZ = 0; + p->validJD = 0; + if( sqlite3_stricmp(z,"month")==0 ){ + p->D = 1; + rc = 0; + }else if( sqlite3_stricmp(z,"year")==0 ){ + p->M = 1; + p->D = 1; + rc = 0; + }else if( sqlite3_stricmp(z,"day")==0 ){ + rc = 0; + } + break; + } + case '+': + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + double rRounder; + int i; + for(n=1; z[n] && z[n]!=':' && !sqlite3Isspace(z[n]); n++){} + if( sqlite3AtoF(z, &r, n, SQLITE_UTF8)<=0 ){ + rc = 1; + break; + } + if( z[n]==':' ){ + /* A modifier of the form (+|-)HH:MM:SS.FFF adds (or subtracts) the + ** specified number of hours, minutes, seconds, and fractional seconds + ** to the time. The ".FFF" may be omitted. The ":SS.FFF" may be + ** omitted. + */ + const char *z2 = z; + DateTime tx; + sqlite3_int64 day; + if( !sqlite3Isdigit(*z2) ) z2++; + memset(&tx, 0, sizeof(tx)); + if( parseHhMmSs(z2, &tx) ) break; + computeJD(&tx); + tx.iJD -= 43200000; + day = tx.iJD/86400000; + tx.iJD -= day*86400000; + if( z[0]=='-' ) tx.iJD = -tx.iJD; + computeJD(p); + clearYMD_HMS_TZ(p); + p->iJD += tx.iJD; + rc = 0; + break; + } + + /* If control reaches this point, it means the transformation is + ** one of the forms like "+NNN days". */ + z += n; + while( sqlite3Isspace(*z) ) z++; + n = sqlite3Strlen30(z); + if( n>10 || n<3 ) break; + if( sqlite3UpperToLower[(u8)z[n-1]]=='s' ) n--; + computeJD(p); + rc = 1; + rRounder = r<0 ? -0.5 : +0.5; + for(i=0; i-aXformType[i].rLimit && rM += (int)r; + x = p->M>0 ? (p->M-1)/12 : (p->M-12)/12; + p->Y += x; + p->M -= x*12; + p->validJD = 0; + r -= (int)r; + break; + } + case 5: { /* Special processing to add years */ + int y = (int)r; + assert( strcmp(aXformType[i].zName,"year")==0 ); + computeYMD_HMS(p); + p->Y += y; + p->validJD = 0; + r -= (int)r; + break; + } + } + computeJD(p); + p->iJD += (sqlite3_int64)(r*1000.0*aXformType[i].rXform + rRounder); + rc = 0; + break; + } + } + clearYMD_HMS_TZ(p); + break; + } + default: { + break; + } + } + return rc; +} + +/* +** Process time function arguments. argv[0] is a date-time stamp. +** argv[1] and following are modifiers. Parse them all and write +** the resulting time into the DateTime structure p. Return 0 +** on success and 1 if there are any errors. +** +** If there are zero parameters (if even argv[0] is undefined) +** then assume a default value of "now" for argv[0]. +*/ +static int isDate( + sqlite3_context *context, + int argc, + sqlite3_value **argv, + DateTime *p +){ + int i, n; + const unsigned char *z; + int eType; + memset(p, 0, sizeof(*p)); + if( argc==0 ){ + if( !sqlite3NotPureFunc(context) ) return 1; + return setDateTimeToCurrent(context, p); + } + if( (eType = sqlite3_value_type(argv[0]))==SQLITE_FLOAT + || eType==SQLITE_INTEGER ){ + setRawDateNumber(p, sqlite3_value_double(argv[0])); + }else{ + z = sqlite3_value_text(argv[0]); + if( !z || parseDateOrTime(context, (char*)z, p) ){ + return 1; + } + } + for(i=1; iisError || !validJulianDay(p->iJD) ) return 1; + return 0; +} + + +/* +** The following routines implement the various date and time functions +** of SQLite. +*/ + +/* +** julianday( TIMESTRING, MOD, MOD, ...) +** +** Return the julian day number of the date specified in the arguments +*/ +static void juliandayFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + DateTime x; + if( isDate(context, argc, argv, &x)==0 ){ + computeJD(&x); + sqlite3_result_double(context, x.iJD/86400000.0); + } +} + +/* +** unixepoch( TIMESTRING, MOD, MOD, ...) +** +** Return the number of seconds (including fractional seconds) since +** the unix epoch of 1970-01-01 00:00:00 GMT. +*/ +static void unixepochFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + DateTime x; + if( isDate(context, argc, argv, &x)==0 ){ + computeJD(&x); + sqlite3_result_int64(context, x.iJD/1000 - 21086676*(i64)10000); + } +} + +/* +** datetime( TIMESTRING, MOD, MOD, ...) +** +** Return YYYY-MM-DD HH:MM:SS +*/ +static void datetimeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + DateTime x; + if( isDate(context, argc, argv, &x)==0 ){ + int Y, s; + char zBuf[24]; + computeYMD_HMS(&x); + Y = x.Y; + if( Y<0 ) Y = -Y; + zBuf[1] = '0' + (Y/1000)%10; + zBuf[2] = '0' + (Y/100)%10; + zBuf[3] = '0' + (Y/10)%10; + zBuf[4] = '0' + (Y)%10; + zBuf[5] = '-'; + zBuf[6] = '0' + (x.M/10)%10; + zBuf[7] = '0' + (x.M)%10; + zBuf[8] = '-'; + zBuf[9] = '0' + (x.D/10)%10; + zBuf[10] = '0' + (x.D)%10; + zBuf[11] = ' '; + zBuf[12] = '0' + (x.h/10)%10; + zBuf[13] = '0' + (x.h)%10; + zBuf[14] = ':'; + zBuf[15] = '0' + (x.m/10)%10; + zBuf[16] = '0' + (x.m)%10; + zBuf[17] = ':'; + s = (int)x.s; + zBuf[18] = '0' + (s/10)%10; + zBuf[19] = '0' + (s)%10; + zBuf[20] = 0; + if( x.Y<0 ){ + zBuf[0] = '-'; + sqlite3_result_text(context, zBuf, 20, SQLITE_TRANSIENT); + }else{ + sqlite3_result_text(context, &zBuf[1], 19, SQLITE_TRANSIENT); + } + } +} + +/* +** time( TIMESTRING, MOD, MOD, ...) +** +** Return HH:MM:SS +*/ +static void timeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + DateTime x; + if( isDate(context, argc, argv, &x)==0 ){ + int s; + char zBuf[16]; + computeHMS(&x); + zBuf[0] = '0' + (x.h/10)%10; + zBuf[1] = '0' + (x.h)%10; + zBuf[2] = ':'; + zBuf[3] = '0' + (x.m/10)%10; + zBuf[4] = '0' + (x.m)%10; + zBuf[5] = ':'; + s = (int)x.s; + zBuf[6] = '0' + (s/10)%10; + zBuf[7] = '0' + (s)%10; + zBuf[8] = 0; + sqlite3_result_text(context, zBuf, 8, SQLITE_TRANSIENT); + } +} + +/* +** date( TIMESTRING, MOD, MOD, ...) +** +** Return YYYY-MM-DD +*/ +static void dateFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + DateTime x; + if( isDate(context, argc, argv, &x)==0 ){ + int Y; + char zBuf[16]; + computeYMD(&x); + Y = x.Y; + if( Y<0 ) Y = -Y; + zBuf[1] = '0' + (Y/1000)%10; + zBuf[2] = '0' + (Y/100)%10; + zBuf[3] = '0' + (Y/10)%10; + zBuf[4] = '0' + (Y)%10; + zBuf[5] = '-'; + zBuf[6] = '0' + (x.M/10)%10; + zBuf[7] = '0' + (x.M)%10; + zBuf[8] = '-'; + zBuf[9] = '0' + (x.D/10)%10; + zBuf[10] = '0' + (x.D)%10; + zBuf[11] = 0; + if( x.Y<0 ){ + zBuf[0] = '-'; + sqlite3_result_text(context, zBuf, 11, SQLITE_TRANSIENT); + }else{ + sqlite3_result_text(context, &zBuf[1], 10, SQLITE_TRANSIENT); + } + } +} + +/* +** strftime( FORMAT, TIMESTRING, MOD, MOD, ...) +** +** Return a string described by FORMAT. Conversions as follows: +** +** %d day of month +** %f ** fractional seconds SS.SSS +** %H hour 00-24 +** %j day of year 000-366 +** %J ** julian day number +** %m month 01-12 +** %M minute 00-59 +** %s seconds since 1970-01-01 +** %S seconds 00-59 +** %w day of week 0-6 sunday==0 +** %W week of year 00-53 +** %Y year 0000-9999 +** %% % +*/ +static void strftimeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + DateTime x; + size_t i,j; + sqlite3 *db; + const char *zFmt; + sqlite3_str sRes; + + + if( argc==0 ) return; + zFmt = (const char*)sqlite3_value_text(argv[0]); + if( zFmt==0 || isDate(context, argc-1, argv+1, &x) ) return; + db = sqlite3_context_db_handle(context); + sqlite3StrAccumInit(&sRes, 0, 0, 0, db->aLimit[SQLITE_LIMIT_LENGTH]); + + computeJD(&x); + computeYMD_HMS(&x); + for(i=j=0; zFmt[i]; i++){ + if( zFmt[i]!='%' ) continue; + if( j59.999 ) s = 59.999; + sqlite3_str_appendf(&sRes, "%06.3f", s); + break; + } + case 'H': { + sqlite3_str_appendf(&sRes, "%02d", x.h); + break; + } + case 'W': /* Fall thru */ + case 'j': { + int nDay; /* Number of days since 1st day of year */ + DateTime y = x; + y.validJD = 0; + y.M = 1; + y.D = 1; + computeJD(&y); + nDay = (int)((x.iJD-y.iJD+43200000)/86400000); + if( zFmt[i]=='W' ){ + int wd; /* 0=Monday, 1=Tuesday, ... 6=Sunday */ + wd = (int)(((x.iJD+43200000)/86400000)%7); + sqlite3_str_appendf(&sRes,"%02d",(nDay+7-wd)/7); + }else{ + sqlite3_str_appendf(&sRes,"%03d",nDay+1); + } + break; + } + case 'J': { + sqlite3_str_appendf(&sRes,"%.16g",x.iJD/86400000.0); + break; + } + case 'm': { + sqlite3_str_appendf(&sRes,"%02d",x.M); + break; + } + case 'M': { + sqlite3_str_appendf(&sRes,"%02d",x.m); + break; + } + case 's': { + i64 iS = (i64)(x.iJD/1000 - 21086676*(i64)10000); + sqlite3_str_appendf(&sRes,"%lld",iS); + break; + } + case 'S': { + sqlite3_str_appendf(&sRes,"%02d",(int)x.s); + break; + } + case 'w': { + sqlite3_str_appendchar(&sRes, 1, + (char)(((x.iJD+129600000)/86400000) % 7) + '0'); + break; + } + case 'Y': { + sqlite3_str_appendf(&sRes,"%04d",x.Y); + break; + } + case '%': { + sqlite3_str_appendchar(&sRes, 1, '%'); + break; + } + default: { + sqlite3_str_reset(&sRes); + return; + } + } + } + if( jpMethods ){ + pId->pMethods->xClose(pId); + pId->pMethods = 0; + } +} +SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file *id, void *pBuf, int amt, i64 offset){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xRead(id, pBuf, amt, offset); +} +SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file *id, const void *pBuf, int amt, i64 offset){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xWrite(id, pBuf, amt, offset); +} +SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file *id, i64 size){ + return id->pMethods->xTruncate(id, size); +} +SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file *id, int flags){ + DO_OS_MALLOC_TEST(id); + return flags ? id->pMethods->xSync(id, flags) : SQLITE_OK; +} +SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file *id, i64 *pSize){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xFileSize(id, pSize); +} +SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file *id, int lockType){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xLock(id, lockType); +} +SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file *id, int lockType){ + return id->pMethods->xUnlock(id, lockType); +} +SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xCheckReservedLock(id, pResOut); +} + +/* +** Use sqlite3OsFileControl() when we are doing something that might fail +** and we need to know about the failures. Use sqlite3OsFileControlHint() +** when simply tossing information over the wall to the VFS and we do not +** really care if the VFS receives and understands the information since it +** is only a hint and can be safely ignored. The sqlite3OsFileControlHint() +** routine has no return value since the return value would be meaningless. +*/ +SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file *id, int op, void *pArg){ + if( id->pMethods==0 ) return SQLITE_NOTFOUND; +#ifdef SQLITE_TEST + if( op!=SQLITE_FCNTL_COMMIT_PHASETWO + && op!=SQLITE_FCNTL_LOCK_TIMEOUT + && op!=SQLITE_FCNTL_CKPT_DONE + && op!=SQLITE_FCNTL_CKPT_START + ){ + /* Faults are not injected into COMMIT_PHASETWO because, assuming SQLite + ** is using a regular VFS, it is called after the corresponding + ** transaction has been committed. Injecting a fault at this point + ** confuses the test scripts - the COMMIT comand returns SQLITE_NOMEM + ** but the transaction is committed anyway. + ** + ** The core must call OsFileControl() though, not OsFileControlHint(), + ** as if a custom VFS (e.g. zipvfs) returns an error here, it probably + ** means the commit really has failed and an error should be returned + ** to the user. + ** + ** The CKPT_DONE and CKPT_START file-controls are write-only signals + ** to the cksumvfs. Their return code is meaningless and is ignored + ** by the SQLite core, so there is no point in simulating OOMs for them. + */ + DO_OS_MALLOC_TEST(id); + } +#endif + return id->pMethods->xFileControl(id, op, pArg); +} +SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file *id, int op, void *pArg){ + if( id->pMethods ) (void)id->pMethods->xFileControl(id, op, pArg); +} + +SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id){ + int (*xSectorSize)(sqlite3_file*) = id->pMethods->xSectorSize; + return (xSectorSize ? xSectorSize(id) : SQLITE_DEFAULT_SECTOR_SIZE); +} +SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id){ + if( NEVER(id->pMethods==0) ) return 0; + return id->pMethods->xDeviceCharacteristics(id); +} +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ + return id->pMethods->xShmLock(id, offset, n, flags); +} +SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id){ + id->pMethods->xShmBarrier(id); +} +SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int deleteFlag){ + return id->pMethods->xShmUnmap(id, deleteFlag); +} +SQLITE_PRIVATE int sqlite3OsShmMap( + sqlite3_file *id, /* Database file handle */ + int iPage, + int pgsz, + int bExtend, /* True to extend file if necessary */ + void volatile **pp /* OUT: Pointer to mapping */ +){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp); +} +#endif /* SQLITE_OMIT_WAL */ + +#if SQLITE_MAX_MMAP_SIZE>0 +/* The real implementation of xFetch and xUnfetch */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xFetch(id, iOff, iAmt, pp); +} +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return id->pMethods->xUnfetch(id, iOff, p); +} +#else +/* No-op stubs to use when memory-mapped I/O is disabled */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + *pp = 0; + return SQLITE_OK; +} +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return SQLITE_OK; +} +#endif + +/* +** The next group of routines are convenience wrappers around the +** VFS methods. +*/ +SQLITE_PRIVATE int sqlite3OsOpen( + sqlite3_vfs *pVfs, + const char *zPath, + sqlite3_file *pFile, + int flags, + int *pFlagsOut +){ + int rc; + DO_OS_MALLOC_TEST(0); + /* 0x87f7f is a mask of SQLITE_OPEN_ flags that are valid to be passed + ** down into the VFS layer. Some SQLITE_OPEN_ flags (for example, + ** SQLITE_OPEN_FULLMUTEX or SQLITE_OPEN_SHAREDCACHE) are blocked before + ** reaching the VFS. */ + rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x1087f7f, pFlagsOut); + assert( rc==SQLITE_OK || pFile->pMethods==0 ); + return rc; +} +SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ + DO_OS_MALLOC_TEST(0); + assert( dirSync==0 || dirSync==1 ); + return pVfs->xDelete!=0 ? pVfs->xDelete(pVfs, zPath, dirSync) : SQLITE_OK; +} +SQLITE_PRIVATE int sqlite3OsAccess( + sqlite3_vfs *pVfs, + const char *zPath, + int flags, + int *pResOut +){ + DO_OS_MALLOC_TEST(0); + return pVfs->xAccess(pVfs, zPath, flags, pResOut); +} +SQLITE_PRIVATE int sqlite3OsFullPathname( + sqlite3_vfs *pVfs, + const char *zPath, + int nPathOut, + char *zPathOut +){ + DO_OS_MALLOC_TEST(0); + zPathOut[0] = 0; + return pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); +} +#ifndef SQLITE_OMIT_LOAD_EXTENSION +SQLITE_PRIVATE void *sqlite3OsDlOpen(sqlite3_vfs *pVfs, const char *zPath){ + assert( zPath!=0 ); + assert( strlen(zPath)<=SQLITE_MAX_PATHLEN ); /* tag-20210611-1 */ + return pVfs->xDlOpen(pVfs, zPath); +} +SQLITE_PRIVATE void sqlite3OsDlError(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ + pVfs->xDlError(pVfs, nByte, zBufOut); +} +SQLITE_PRIVATE void (*sqlite3OsDlSym(sqlite3_vfs *pVfs, void *pHdle, const char *zSym))(void){ + return pVfs->xDlSym(pVfs, pHdle, zSym); +} +SQLITE_PRIVATE void sqlite3OsDlClose(sqlite3_vfs *pVfs, void *pHandle){ + pVfs->xDlClose(pVfs, pHandle); +} +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ +SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ + if( sqlite3Config.iPrngSeed ){ + memset(zBufOut, 0, nByte); + if( ALWAYS(nByte>(signed)sizeof(unsigned)) ) nByte = sizeof(unsigned int); + memcpy(zBufOut, &sqlite3Config.iPrngSeed, nByte); + return SQLITE_OK; + }else{ + return pVfs->xRandomness(pVfs, nByte, zBufOut); + } + +} +SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *pVfs, int nMicro){ + return pVfs->xSleep(pVfs, nMicro); +} +SQLITE_PRIVATE int sqlite3OsGetLastError(sqlite3_vfs *pVfs){ + return pVfs->xGetLastError ? pVfs->xGetLastError(pVfs, 0, 0) : 0; +} +SQLITE_PRIVATE int sqlite3OsCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *pTimeOut){ + int rc; + /* IMPLEMENTATION-OF: R-49045-42493 SQLite will use the xCurrentTimeInt64() + ** method to get the current date and time if that method is available + ** (if iVersion is 2 or greater and the function pointer is not NULL) and + ** will fall back to xCurrentTime() if xCurrentTimeInt64() is + ** unavailable. + */ + if( pVfs->iVersion>=2 && pVfs->xCurrentTimeInt64 ){ + rc = pVfs->xCurrentTimeInt64(pVfs, pTimeOut); + }else{ + double r; + rc = pVfs->xCurrentTime(pVfs, &r); + *pTimeOut = (sqlite3_int64)(r*86400000.0); + } + return rc; +} + +SQLITE_PRIVATE int sqlite3OsOpenMalloc( + sqlite3_vfs *pVfs, + const char *zFile, + sqlite3_file **ppFile, + int flags, + int *pOutFlags +){ + int rc; + sqlite3_file *pFile; + pFile = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile); + if( pFile ){ + rc = sqlite3OsOpen(pVfs, zFile, pFile, flags, pOutFlags); + if( rc!=SQLITE_OK ){ + sqlite3_free(pFile); + *ppFile = 0; + }else{ + *ppFile = pFile; + } + }else{ + *ppFile = 0; + rc = SQLITE_NOMEM_BKPT; + } + assert( *ppFile!=0 || rc!=SQLITE_OK ); + return rc; +} +SQLITE_PRIVATE void sqlite3OsCloseFree(sqlite3_file *pFile){ + assert( pFile ); + sqlite3OsClose(pFile); + sqlite3_free(pFile); +} + +/* +** This function is a wrapper around the OS specific implementation of +** sqlite3_os_init(). The purpose of the wrapper is to provide the +** ability to simulate a malloc failure, so that the handling of an +** error in sqlite3_os_init() by the upper layers can be tested. +*/ +SQLITE_PRIVATE int sqlite3OsInit(void){ + void *p = sqlite3_malloc(10); + if( p==0 ) return SQLITE_NOMEM_BKPT; + sqlite3_free(p); + return sqlite3_os_init(); +} + +/* +** The list of all registered VFS implementations. +*/ +static sqlite3_vfs * SQLITE_WSD vfsList = 0; +#define vfsList GLOBAL(sqlite3_vfs *, vfsList) + +/* +** Locate a VFS by name. If no name is given, simply return the +** first VFS on the list. +*/ +SQLITE_API sqlite3_vfs *sqlite3_vfs_find(const char *zVfs){ + sqlite3_vfs *pVfs = 0; +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex; +#endif +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3_initialize(); + if( rc ) return 0; +#endif +#if SQLITE_THREADSAFE + mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); +#endif + sqlite3_mutex_enter(mutex); + for(pVfs = vfsList; pVfs; pVfs=pVfs->pNext){ + if( zVfs==0 ) break; + if( strcmp(zVfs, pVfs->zName)==0 ) break; + } + sqlite3_mutex_leave(mutex); + return pVfs; +} + +/* +** Unlink a VFS from the linked list +*/ +static void vfsUnlink(sqlite3_vfs *pVfs){ + assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN)) ); + if( pVfs==0 ){ + /* No-op */ + }else if( vfsList==pVfs ){ + vfsList = pVfs->pNext; + }else if( vfsList ){ + sqlite3_vfs *p = vfsList; + while( p->pNext && p->pNext!=pVfs ){ + p = p->pNext; + } + if( p->pNext==pVfs ){ + p->pNext = pVfs->pNext; + } + } +} + +/* +** Register a VFS with the system. It is harmless to register the same +** VFS multiple times. The new VFS becomes the default if makeDflt is +** true. +*/ +SQLITE_API int sqlite3_vfs_register(sqlite3_vfs *pVfs, int makeDflt){ + MUTEX_LOGIC(sqlite3_mutex *mutex;) +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3_initialize(); + if( rc ) return rc; +#endif +#ifdef SQLITE_ENABLE_API_ARMOR + if( pVfs==0 ) return SQLITE_MISUSE_BKPT; +#endif + + MUTEX_LOGIC( mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); ) + sqlite3_mutex_enter(mutex); + vfsUnlink(pVfs); + if( makeDflt || vfsList==0 ){ + pVfs->pNext = vfsList; + vfsList = pVfs; + }else{ + pVfs->pNext = vfsList->pNext; + vfsList->pNext = pVfs; + } + assert(vfsList); + sqlite3_mutex_leave(mutex); + return SQLITE_OK; +} + +/* +** Unregister a VFS so that it is no longer accessible. +*/ +SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs *pVfs){ + MUTEX_LOGIC(sqlite3_mutex *mutex;) +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + MUTEX_LOGIC( mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); ) + sqlite3_mutex_enter(mutex); + vfsUnlink(pVfs); + sqlite3_mutex_leave(mutex); + return SQLITE_OK; +} + +/************** End of os.c **************************************************/ +/************** Begin file fault.c *******************************************/ +/* +** 2008 Jan 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code to support the concept of "benign" +** malloc failures (when the xMalloc() or xRealloc() method of the +** sqlite3_mem_methods structure fails to allocate a block of memory +** and returns 0). +** +** Most malloc failures are non-benign. After they occur, SQLite +** abandons the current operation and returns an error code (usually +** SQLITE_NOMEM) to the user. However, sometimes a fault is not necessarily +** fatal. For example, if a malloc fails while resizing a hash table, this +** is completely recoverable simply by not carrying out the resize. The +** hash table will continue to function normally. So a malloc failure +** during a hash table resize is a benign fault. +*/ + +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_UNTESTABLE + +/* +** Global variables. +*/ +typedef struct BenignMallocHooks BenignMallocHooks; +static SQLITE_WSD struct BenignMallocHooks { + void (*xBenignBegin)(void); + void (*xBenignEnd)(void); +} sqlite3Hooks = { 0, 0 }; + +/* The "wsdHooks" macro will resolve to the appropriate BenignMallocHooks +** structure. If writable static data is unsupported on the target, +** we have to locate the state vector at run-time. In the more common +** case where writable static data is supported, wsdHooks can refer directly +** to the "sqlite3Hooks" state vector declared above. +*/ +#ifdef SQLITE_OMIT_WSD +# define wsdHooksInit \ + BenignMallocHooks *x = &GLOBAL(BenignMallocHooks,sqlite3Hooks) +# define wsdHooks x[0] +#else +# define wsdHooksInit +# define wsdHooks sqlite3Hooks +#endif + + +/* +** Register hooks to call when sqlite3BeginBenignMalloc() and +** sqlite3EndBenignMalloc() are called, respectively. +*/ +SQLITE_PRIVATE void sqlite3BenignMallocHooks( + void (*xBenignBegin)(void), + void (*xBenignEnd)(void) +){ + wsdHooksInit; + wsdHooks.xBenignBegin = xBenignBegin; + wsdHooks.xBenignEnd = xBenignEnd; +} + +/* +** This (sqlite3EndBenignMalloc()) is called by SQLite code to indicate that +** subsequent malloc failures are benign. A call to sqlite3EndBenignMalloc() +** indicates that subsequent malloc failures are non-benign. +*/ +SQLITE_PRIVATE void sqlite3BeginBenignMalloc(void){ + wsdHooksInit; + if( wsdHooks.xBenignBegin ){ + wsdHooks.xBenignBegin(); + } +} +SQLITE_PRIVATE void sqlite3EndBenignMalloc(void){ + wsdHooksInit; + if( wsdHooks.xBenignEnd ){ + wsdHooks.xBenignEnd(); + } +} + +#endif /* #ifndef SQLITE_UNTESTABLE */ + +/************** End of fault.c ***********************************************/ +/************** Begin file mem0.c ********************************************/ +/* +** 2008 October 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains a no-op memory allocation drivers for use when +** SQLITE_ZERO_MALLOC is defined. The allocation drivers implemented +** here always fail. SQLite will not operate with these drivers. These +** are merely placeholders. Real drivers must be substituted using +** sqlite3_config() before SQLite will operate. +*/ +/* #include "sqliteInt.h" */ + +/* +** This version of the memory allocator is the default. It is +** used when no other memory allocator is specified using compile-time +** macros. +*/ +#ifdef SQLITE_ZERO_MALLOC + +/* +** No-op versions of all memory allocation routines +*/ +static void *sqlite3MemMalloc(int nByte){ return 0; } +static void sqlite3MemFree(void *pPrior){ return; } +static void *sqlite3MemRealloc(void *pPrior, int nByte){ return 0; } +static int sqlite3MemSize(void *pPrior){ return 0; } +static int sqlite3MemRoundup(int n){ return n; } +static int sqlite3MemInit(void *NotUsed){ return SQLITE_OK; } +static void sqlite3MemShutdown(void *NotUsed){ return; } + +/* +** This routine is the only routine in this file with external linkage. +** +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. +*/ +SQLITE_PRIVATE void sqlite3MemSetDefault(void){ + static const sqlite3_mem_methods defaultMethods = { + sqlite3MemMalloc, + sqlite3MemFree, + sqlite3MemRealloc, + sqlite3MemSize, + sqlite3MemRoundup, + sqlite3MemInit, + sqlite3MemShutdown, + 0 + }; + sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods); +} + +#endif /* SQLITE_ZERO_MALLOC */ + +/************** End of mem0.c ************************************************/ +/************** Begin file mem1.c ********************************************/ +/* +** 2007 August 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains low-level memory allocation drivers for when +** SQLite will use the standard C-library malloc/realloc/free interface +** to obtain the memory it needs. +** +** This file contains implementations of the low-level memory allocation +** routines specified in the sqlite3_mem_methods object. The content of +** this file is only used if SQLITE_SYSTEM_MALLOC is defined. The +** SQLITE_SYSTEM_MALLOC macro is defined automatically if neither the +** SQLITE_MEMDEBUG nor the SQLITE_WIN32_MALLOC macros are defined. The +** default configuration is to use memory allocation routines in this +** file. +** +** C-preprocessor macro summary: +** +** HAVE_MALLOC_USABLE_SIZE The configure script sets this symbol if +** the malloc_usable_size() interface exists +** on the target platform. Or, this symbol +** can be set manually, if desired. +** If an equivalent interface exists by +** a different name, using a separate -D +** option to rename it. +** +** SQLITE_WITHOUT_ZONEMALLOC Some older macs lack support for the zone +** memory allocator. Set this symbol to enable +** building on older macs. +** +** SQLITE_WITHOUT_MSIZE Set this symbol to disable the use of +** _msize() on windows systems. This might +** be necessary when compiling for Delphi, +** for example. +*/ +/* #include "sqliteInt.h" */ + +/* +** This version of the memory allocator is the default. It is +** used when no other memory allocator is specified using compile-time +** macros. +*/ +#ifdef SQLITE_SYSTEM_MALLOC +#if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) + +/* +** Use the zone allocator available on apple products unless the +** SQLITE_WITHOUT_ZONEMALLOC symbol is defined. +*/ +#include +#include +#ifdef SQLITE_MIGHT_BE_SINGLE_CORE +#include +#endif /* SQLITE_MIGHT_BE_SINGLE_CORE */ +static malloc_zone_t* _sqliteZone_; +#define SQLITE_MALLOC(x) malloc_zone_malloc(_sqliteZone_, (x)) +#define SQLITE_FREE(x) malloc_zone_free(_sqliteZone_, (x)); +#define SQLITE_REALLOC(x,y) malloc_zone_realloc(_sqliteZone_, (x), (y)) +#define SQLITE_MALLOCSIZE(x) \ + (_sqliteZone_ ? _sqliteZone_->size(_sqliteZone_,x) : malloc_size(x)) + +#else /* if not __APPLE__ */ + +/* +** Use standard C library malloc and free on non-Apple systems. +** Also used by Apple systems if SQLITE_WITHOUT_ZONEMALLOC is defined. +*/ +#define SQLITE_MALLOC(x) malloc(x) +#define SQLITE_FREE(x) free(x) +#define SQLITE_REALLOC(x,y) realloc((x),(y)) + +/* +** The malloc.h header file is needed for malloc_usable_size() function +** on some systems (e.g. Linux). +*/ +#if HAVE_MALLOC_H && HAVE_MALLOC_USABLE_SIZE +# define SQLITE_USE_MALLOC_H 1 +# define SQLITE_USE_MALLOC_USABLE_SIZE 1 +/* +** The MSVCRT has malloc_usable_size(), but it is called _msize(). The +** use of _msize() is automatic, but can be disabled by compiling with +** -DSQLITE_WITHOUT_MSIZE. Using the _msize() function also requires +** the malloc.h header file. +*/ +#elif defined(_MSC_VER) && !defined(SQLITE_WITHOUT_MSIZE) +# define SQLITE_USE_MALLOC_H +# define SQLITE_USE_MSIZE +#endif + +/* +** Include the malloc.h header file, if necessary. Also set define macro +** SQLITE_MALLOCSIZE to the appropriate function name, which is _msize() +** for MSVC and malloc_usable_size() for most other systems (e.g. Linux). +** The memory size function can always be overridden manually by defining +** the macro SQLITE_MALLOCSIZE to the desired function name. +*/ +#if defined(SQLITE_USE_MALLOC_H) +# include +# if defined(SQLITE_USE_MALLOC_USABLE_SIZE) +# if !defined(SQLITE_MALLOCSIZE) +# define SQLITE_MALLOCSIZE(x) malloc_usable_size(x) +# endif +# elif defined(SQLITE_USE_MSIZE) +# if !defined(SQLITE_MALLOCSIZE) +# define SQLITE_MALLOCSIZE _msize +# endif +# endif +#endif /* defined(SQLITE_USE_MALLOC_H) */ + +#endif /* __APPLE__ or not __APPLE__ */ + +/* +** Like malloc(), but remember the size of the allocation +** so that we can find it later using sqlite3MemSize(). +** +** For this low-level routine, we are guaranteed that nByte>0 because +** cases of nByte<=0 will be intercepted and dealt with by higher level +** routines. +*/ +static void *sqlite3MemMalloc(int nByte){ +#ifdef SQLITE_MALLOCSIZE + void *p; + testcase( ROUND8(nByte)==nByte ); + p = SQLITE_MALLOC( nByte ); + if( p==0 ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte); + } + return p; +#else + sqlite3_int64 *p; + assert( nByte>0 ); + testcase( ROUND8(nByte)!=nByte ); + p = SQLITE_MALLOC( nByte+8 ); + if( p ){ + p[0] = nByte; + p++; + }else{ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte); + } + return (void *)p; +#endif +} + +/* +** Like free() but works for allocations obtained from sqlite3MemMalloc() +** or sqlite3MemRealloc(). +** +** For this low-level routine, we already know that pPrior!=0 since +** cases where pPrior==0 will have been intecepted and dealt with +** by higher-level routines. +*/ +static void sqlite3MemFree(void *pPrior){ +#ifdef SQLITE_MALLOCSIZE + SQLITE_FREE(pPrior); +#else + sqlite3_int64 *p = (sqlite3_int64*)pPrior; + assert( pPrior!=0 ); + p--; + SQLITE_FREE(p); +#endif +} + +/* +** Report the allocated size of a prior return from xMalloc() +** or xRealloc(). +*/ +static int sqlite3MemSize(void *pPrior){ +#ifdef SQLITE_MALLOCSIZE + assert( pPrior!=0 ); + return (int)SQLITE_MALLOCSIZE(pPrior); +#else + sqlite3_int64 *p; + assert( pPrior!=0 ); + p = (sqlite3_int64*)pPrior; + p--; + return (int)p[0]; +#endif +} + +/* +** Like realloc(). Resize an allocation previously obtained from +** sqlite3MemMalloc(). +** +** For this low-level interface, we know that pPrior!=0. Cases where +** pPrior==0 while have been intercepted by higher-level routine and +** redirected to xMalloc. Similarly, we know that nByte>0 because +** cases where nByte<=0 will have been intercepted by higher-level +** routines and redirected to xFree. +*/ +static void *sqlite3MemRealloc(void *pPrior, int nByte){ +#ifdef SQLITE_MALLOCSIZE + void *p = SQLITE_REALLOC(pPrior, nByte); + if( p==0 ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, + "failed memory resize %u to %u bytes", + SQLITE_MALLOCSIZE(pPrior), nByte); + } + return p; +#else + sqlite3_int64 *p = (sqlite3_int64*)pPrior; + assert( pPrior!=0 && nByte>0 ); + assert( nByte==ROUND8(nByte) ); /* EV: R-46199-30249 */ + p--; + p = SQLITE_REALLOC(p, nByte+8 ); + if( p ){ + p[0] = nByte; + p++; + }else{ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, + "failed memory resize %u to %u bytes", + sqlite3MemSize(pPrior), nByte); + } + return (void*)p; +#endif +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int sqlite3MemRoundup(int n){ + return ROUND8(n); +} + +/* +** Initialize this module. +*/ +static int sqlite3MemInit(void *NotUsed){ +#if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) + int cpuCount; + size_t len; + if( _sqliteZone_ ){ + return SQLITE_OK; + } + len = sizeof(cpuCount); + /* One usually wants to use hw.acctivecpu for MT decisions, but not here */ + sysctlbyname("hw.ncpu", &cpuCount, &len, NULL, 0); + if( cpuCount>1 ){ + /* defer MT decisions to system malloc */ + _sqliteZone_ = malloc_default_zone(); + }else{ + /* only 1 core, use our own zone to contention over global locks, + ** e.g. we have our own dedicated locks */ + _sqliteZone_ = malloc_create_zone(4096, 0); + malloc_set_zone_name(_sqliteZone_, "Sqlite_Heap"); + } +#endif /* defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) */ + UNUSED_PARAMETER(NotUsed); + return SQLITE_OK; +} + +/* +** Deinitialize this module. +*/ +static void sqlite3MemShutdown(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + return; +} + +/* +** This routine is the only routine in this file with external linkage. +** +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. +*/ +SQLITE_PRIVATE void sqlite3MemSetDefault(void){ + static const sqlite3_mem_methods defaultMethods = { + sqlite3MemMalloc, + sqlite3MemFree, + sqlite3MemRealloc, + sqlite3MemSize, + sqlite3MemRoundup, + sqlite3MemInit, + sqlite3MemShutdown, + 0 + }; + sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods); +} + +#endif /* SQLITE_SYSTEM_MALLOC */ + +/************** End of mem1.c ************************************************/ +/************** Begin file mem2.c ********************************************/ +/* +** 2007 August 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains low-level memory allocation drivers for when +** SQLite will use the standard C-library malloc/realloc/free interface +** to obtain the memory it needs while adding lots of additional debugging +** information to each allocation in order to help detect and fix memory +** leaks and memory usage errors. +** +** This file contains implementations of the low-level memory allocation +** routines specified in the sqlite3_mem_methods object. +*/ +/* #include "sqliteInt.h" */ + +/* +** This version of the memory allocator is used only if the +** SQLITE_MEMDEBUG macro is defined +*/ +#ifdef SQLITE_MEMDEBUG + +/* +** The backtrace functionality is only available with GLIBC +*/ +#ifdef __GLIBC__ + extern int backtrace(void**,int); + extern void backtrace_symbols_fd(void*const*,int,int); +#else +# define backtrace(A,B) 1 +# define backtrace_symbols_fd(A,B,C) +#endif +/* #include */ + +/* +** Each memory allocation looks like this: +** +** ------------------------------------------------------------------------ +** | Title | backtrace pointers | MemBlockHdr | allocation | EndGuard | +** ------------------------------------------------------------------------ +** +** The application code sees only a pointer to the allocation. We have +** to back up from the allocation pointer to find the MemBlockHdr. The +** MemBlockHdr tells us the size of the allocation and the number of +** backtrace pointers. There is also a guard word at the end of the +** MemBlockHdr. +*/ +struct MemBlockHdr { + i64 iSize; /* Size of this allocation */ + struct MemBlockHdr *pNext, *pPrev; /* Linked list of all unfreed memory */ + char nBacktrace; /* Number of backtraces on this alloc */ + char nBacktraceSlots; /* Available backtrace slots */ + u8 nTitle; /* Bytes of title; includes '\0' */ + u8 eType; /* Allocation type code */ + int iForeGuard; /* Guard word for sanity */ +}; + +/* +** Guard words +*/ +#define FOREGUARD 0x80F5E153 +#define REARGUARD 0xE4676B53 + +/* +** Number of malloc size increments to track. +*/ +#define NCSIZE 1000 + +/* +** All of the static variables used by this module are collected +** into a single structure named "mem". This is to keep the +** static variables organized and to reduce namespace pollution +** when this module is combined with other in the amalgamation. +*/ +static struct { + + /* + ** Mutex to control access to the memory allocation subsystem. + */ + sqlite3_mutex *mutex; + + /* + ** Head and tail of a linked list of all outstanding allocations + */ + struct MemBlockHdr *pFirst; + struct MemBlockHdr *pLast; + + /* + ** The number of levels of backtrace to save in new allocations. + */ + int nBacktrace; + void (*xBacktrace)(int, int, void **); + + /* + ** Title text to insert in front of each block + */ + int nTitle; /* Bytes of zTitle to save. Includes '\0' and padding */ + char zTitle[100]; /* The title text */ + + /* + ** sqlite3MallocDisallow() increments the following counter. + ** sqlite3MallocAllow() decrements it. + */ + int disallow; /* Do not allow memory allocation */ + + /* + ** Gather statistics on the sizes of memory allocations. + ** nAlloc[i] is the number of allocation attempts of i*8 + ** bytes. i==NCSIZE is the number of allocation attempts for + ** sizes more than NCSIZE*8 bytes. + */ + int nAlloc[NCSIZE]; /* Total number of allocations */ + int nCurrent[NCSIZE]; /* Current number of allocations */ + int mxCurrent[NCSIZE]; /* Highwater mark for nCurrent */ + +} mem; + + +/* +** Adjust memory usage statistics +*/ +static void adjustStats(int iSize, int increment){ + int i = ROUND8(iSize)/8; + if( i>NCSIZE-1 ){ + i = NCSIZE - 1; + } + if( increment>0 ){ + mem.nAlloc[i]++; + mem.nCurrent[i]++; + if( mem.nCurrent[i]>mem.mxCurrent[i] ){ + mem.mxCurrent[i] = mem.nCurrent[i]; + } + }else{ + mem.nCurrent[i]--; + assert( mem.nCurrent[i]>=0 ); + } +} + +/* +** Given an allocation, find the MemBlockHdr for that allocation. +** +** This routine checks the guards at either end of the allocation and +** if they are incorrect it asserts. +*/ +static struct MemBlockHdr *sqlite3MemsysGetHeader(const void *pAllocation){ + struct MemBlockHdr *p; + int *pInt; + u8 *pU8; + int nReserve; + + p = (struct MemBlockHdr*)pAllocation; + p--; + assert( p->iForeGuard==(int)FOREGUARD ); + nReserve = ROUND8(p->iSize); + pInt = (int*)pAllocation; + pU8 = (u8*)pAllocation; + assert( pInt[nReserve/sizeof(int)]==(int)REARGUARD ); + /* This checks any of the "extra" bytes allocated due + ** to rounding up to an 8 byte boundary to ensure + ** they haven't been overwritten. + */ + while( nReserve-- > p->iSize ) assert( pU8[nReserve]==0x65 ); + return p; +} + +/* +** Return the number of bytes currently allocated at address p. +*/ +static int sqlite3MemSize(void *p){ + struct MemBlockHdr *pHdr; + if( !p ){ + return 0; + } + pHdr = sqlite3MemsysGetHeader(p); + return (int)pHdr->iSize; +} + +/* +** Initialize the memory allocation subsystem. +*/ +static int sqlite3MemInit(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + assert( (sizeof(struct MemBlockHdr)&7) == 0 ); + if( !sqlite3GlobalConfig.bMemstat ){ + /* If memory status is enabled, then the malloc.c wrapper will already + ** hold the STATIC_MEM mutex when the routines here are invoked. */ + mem.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM); + } + return SQLITE_OK; +} + +/* +** Deinitialize the memory allocation subsystem. +*/ +static void sqlite3MemShutdown(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + mem.mutex = 0; +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int sqlite3MemRoundup(int n){ + return ROUND8(n); +} + +/* +** Fill a buffer with pseudo-random bytes. This is used to preset +** the content of a new memory allocation to unpredictable values and +** to clear the content of a freed allocation to unpredictable values. +*/ +static void randomFill(char *pBuf, int nByte){ + unsigned int x, y, r; + x = SQLITE_PTR_TO_INT(pBuf); + y = nByte | 1; + while( nByte >= 4 ){ + x = (x>>1) ^ (-(int)(x&1) & 0xd0000001); + y = y*1103515245 + 12345; + r = x ^ y; + *(int*)pBuf = r; + pBuf += 4; + nByte -= 4; + } + while( nByte-- > 0 ){ + x = (x>>1) ^ (-(int)(x&1) & 0xd0000001); + y = y*1103515245 + 12345; + r = x ^ y; + *(pBuf++) = r & 0xff; + } +} + +/* +** Allocate nByte bytes of memory. +*/ +static void *sqlite3MemMalloc(int nByte){ + struct MemBlockHdr *pHdr; + void **pBt; + char *z; + int *pInt; + void *p = 0; + int totalSize; + int nReserve; + sqlite3_mutex_enter(mem.mutex); + assert( mem.disallow==0 ); + nReserve = ROUND8(nByte); + totalSize = nReserve + sizeof(*pHdr) + sizeof(int) + + mem.nBacktrace*sizeof(void*) + mem.nTitle; + p = malloc(totalSize); + if( p ){ + z = p; + pBt = (void**)&z[mem.nTitle]; + pHdr = (struct MemBlockHdr*)&pBt[mem.nBacktrace]; + pHdr->pNext = 0; + pHdr->pPrev = mem.pLast; + if( mem.pLast ){ + mem.pLast->pNext = pHdr; + }else{ + mem.pFirst = pHdr; + } + mem.pLast = pHdr; + pHdr->iForeGuard = FOREGUARD; + pHdr->eType = MEMTYPE_HEAP; + pHdr->nBacktraceSlots = mem.nBacktrace; + pHdr->nTitle = mem.nTitle; + if( mem.nBacktrace ){ + void *aAddr[40]; + pHdr->nBacktrace = backtrace(aAddr, mem.nBacktrace+1)-1; + memcpy(pBt, &aAddr[1], pHdr->nBacktrace*sizeof(void*)); + assert(pBt[0]); + if( mem.xBacktrace ){ + mem.xBacktrace(nByte, pHdr->nBacktrace-1, &aAddr[1]); + } + }else{ + pHdr->nBacktrace = 0; + } + if( mem.nTitle ){ + memcpy(z, mem.zTitle, mem.nTitle); + } + pHdr->iSize = nByte; + adjustStats(nByte, +1); + pInt = (int*)&pHdr[1]; + pInt[nReserve/sizeof(int)] = REARGUARD; + randomFill((char*)pInt, nByte); + memset(((char*)pInt)+nByte, 0x65, nReserve-nByte); + p = (void*)pInt; + } + sqlite3_mutex_leave(mem.mutex); + return p; +} + +/* +** Free memory. +*/ +static void sqlite3MemFree(void *pPrior){ + struct MemBlockHdr *pHdr; + void **pBt; + char *z; + assert( sqlite3GlobalConfig.bMemstat || sqlite3GlobalConfig.bCoreMutex==0 + || mem.mutex!=0 ); + pHdr = sqlite3MemsysGetHeader(pPrior); + pBt = (void**)pHdr; + pBt -= pHdr->nBacktraceSlots; + sqlite3_mutex_enter(mem.mutex); + if( pHdr->pPrev ){ + assert( pHdr->pPrev->pNext==pHdr ); + pHdr->pPrev->pNext = pHdr->pNext; + }else{ + assert( mem.pFirst==pHdr ); + mem.pFirst = pHdr->pNext; + } + if( pHdr->pNext ){ + assert( pHdr->pNext->pPrev==pHdr ); + pHdr->pNext->pPrev = pHdr->pPrev; + }else{ + assert( mem.pLast==pHdr ); + mem.pLast = pHdr->pPrev; + } + z = (char*)pBt; + z -= pHdr->nTitle; + adjustStats((int)pHdr->iSize, -1); + randomFill(z, sizeof(void*)*pHdr->nBacktraceSlots + sizeof(*pHdr) + + (int)pHdr->iSize + sizeof(int) + pHdr->nTitle); + free(z); + sqlite3_mutex_leave(mem.mutex); +} + +/* +** Change the size of an existing memory allocation. +** +** For this debugging implementation, we *always* make a copy of the +** allocation into a new place in memory. In this way, if the +** higher level code is using pointer to the old allocation, it is +** much more likely to break and we are much more liking to find +** the error. +*/ +static void *sqlite3MemRealloc(void *pPrior, int nByte){ + struct MemBlockHdr *pOldHdr; + void *pNew; + assert( mem.disallow==0 ); + assert( (nByte & 7)==0 ); /* EV: R-46199-30249 */ + pOldHdr = sqlite3MemsysGetHeader(pPrior); + pNew = sqlite3MemMalloc(nByte); + if( pNew ){ + memcpy(pNew, pPrior, (int)(nByteiSize ? nByte : pOldHdr->iSize)); + if( nByte>pOldHdr->iSize ){ + randomFill(&((char*)pNew)[pOldHdr->iSize], nByte - (int)pOldHdr->iSize); + } + sqlite3MemFree(pPrior); + } + return pNew; +} + +/* +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. +*/ +SQLITE_PRIVATE void sqlite3MemSetDefault(void){ + static const sqlite3_mem_methods defaultMethods = { + sqlite3MemMalloc, + sqlite3MemFree, + sqlite3MemRealloc, + sqlite3MemSize, + sqlite3MemRoundup, + sqlite3MemInit, + sqlite3MemShutdown, + 0 + }; + sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods); +} + +/* +** Set the "type" of an allocation. +*/ +SQLITE_PRIVATE void sqlite3MemdebugSetType(void *p, u8 eType){ + if( p && sqlite3GlobalConfig.m.xFree==sqlite3MemFree ){ + struct MemBlockHdr *pHdr; + pHdr = sqlite3MemsysGetHeader(p); + assert( pHdr->iForeGuard==FOREGUARD ); + pHdr->eType = eType; + } +} + +/* +** Return TRUE if the mask of type in eType matches the type of the +** allocation p. Also return true if p==NULL. +** +** This routine is designed for use within an assert() statement, to +** verify the type of an allocation. For example: +** +** assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); +*/ +SQLITE_PRIVATE int sqlite3MemdebugHasType(const void *p, u8 eType){ + int rc = 1; + if( p && sqlite3GlobalConfig.m.xFree==sqlite3MemFree ){ + struct MemBlockHdr *pHdr; + pHdr = sqlite3MemsysGetHeader(p); + assert( pHdr->iForeGuard==FOREGUARD ); /* Allocation is valid */ + if( (pHdr->eType&eType)==0 ){ + rc = 0; + } + } + return rc; +} + +/* +** Return TRUE if the mask of type in eType matches no bits of the type of the +** allocation p. Also return true if p==NULL. +** +** This routine is designed for use within an assert() statement, to +** verify the type of an allocation. For example: +** +** assert( sqlite3MemdebugNoType(p, MEMTYPE_LOOKASIDE) ); +*/ +SQLITE_PRIVATE int sqlite3MemdebugNoType(const void *p, u8 eType){ + int rc = 1; + if( p && sqlite3GlobalConfig.m.xFree==sqlite3MemFree ){ + struct MemBlockHdr *pHdr; + pHdr = sqlite3MemsysGetHeader(p); + assert( pHdr->iForeGuard==FOREGUARD ); /* Allocation is valid */ + if( (pHdr->eType&eType)!=0 ){ + rc = 0; + } + } + return rc; +} + +/* +** Set the number of backtrace levels kept for each allocation. +** A value of zero turns off backtracing. The number is always rounded +** up to a multiple of 2. +*/ +SQLITE_PRIVATE void sqlite3MemdebugBacktrace(int depth){ + if( depth<0 ){ depth = 0; } + if( depth>20 ){ depth = 20; } + depth = (depth+1)&0xfe; + mem.nBacktrace = depth; +} + +SQLITE_PRIVATE void sqlite3MemdebugBacktraceCallback(void (*xBacktrace)(int, int, void **)){ + mem.xBacktrace = xBacktrace; +} + +/* +** Set the title string for subsequent allocations. +*/ +SQLITE_PRIVATE void sqlite3MemdebugSettitle(const char *zTitle){ + unsigned int n = sqlite3Strlen30(zTitle) + 1; + sqlite3_mutex_enter(mem.mutex); + if( n>=sizeof(mem.zTitle) ) n = sizeof(mem.zTitle)-1; + memcpy(mem.zTitle, zTitle, n); + mem.zTitle[n] = 0; + mem.nTitle = ROUND8(n); + sqlite3_mutex_leave(mem.mutex); +} + +SQLITE_PRIVATE void sqlite3MemdebugSync(){ + struct MemBlockHdr *pHdr; + for(pHdr=mem.pFirst; pHdr; pHdr=pHdr->pNext){ + void **pBt = (void**)pHdr; + pBt -= pHdr->nBacktraceSlots; + mem.xBacktrace((int)pHdr->iSize, pHdr->nBacktrace-1, &pBt[1]); + } +} + +/* +** Open the file indicated and write a log of all unfreed memory +** allocations into that log. +*/ +SQLITE_PRIVATE void sqlite3MemdebugDump(const char *zFilename){ + FILE *out; + struct MemBlockHdr *pHdr; + void **pBt; + int i; + out = fopen(zFilename, "w"); + if( out==0 ){ + fprintf(stderr, "** Unable to output memory debug output log: %s **\n", + zFilename); + return; + } + for(pHdr=mem.pFirst; pHdr; pHdr=pHdr->pNext){ + char *z = (char*)pHdr; + z -= pHdr->nBacktraceSlots*sizeof(void*) + pHdr->nTitle; + fprintf(out, "**** %lld bytes at %p from %s ****\n", + pHdr->iSize, &pHdr[1], pHdr->nTitle ? z : "???"); + if( pHdr->nBacktrace ){ + fflush(out); + pBt = (void**)pHdr; + pBt -= pHdr->nBacktraceSlots; + backtrace_symbols_fd(pBt, pHdr->nBacktrace, fileno(out)); + fprintf(out, "\n"); + } + } + fprintf(out, "COUNTS:\n"); + for(i=0; i=1 ); + size = mem3.aPool[i-1].u.hdr.size4x/4; + assert( size==mem3.aPool[i+size-1].u.hdr.prevSize ); + assert( size>=2 ); + if( size <= MX_SMALL ){ + memsys3UnlinkFromList(i, &mem3.aiSmall[size-2]); + }else{ + hash = size % N_HASH; + memsys3UnlinkFromList(i, &mem3.aiHash[hash]); + } +} + +/* +** Link the chunk at mem3.aPool[i] so that is on the list rooted +** at *pRoot. +*/ +static void memsys3LinkIntoList(u32 i, u32 *pRoot){ + assert( sqlite3_mutex_held(mem3.mutex) ); + mem3.aPool[i].u.list.next = *pRoot; + mem3.aPool[i].u.list.prev = 0; + if( *pRoot ){ + mem3.aPool[*pRoot].u.list.prev = i; + } + *pRoot = i; +} + +/* +** Link the chunk at index i into either the appropriate +** small chunk list, or into the large chunk hash table. +*/ +static void memsys3Link(u32 i){ + u32 size, hash; + assert( sqlite3_mutex_held(mem3.mutex) ); + assert( i>=1 ); + assert( (mem3.aPool[i-1].u.hdr.size4x & 1)==0 ); + size = mem3.aPool[i-1].u.hdr.size4x/4; + assert( size==mem3.aPool[i+size-1].u.hdr.prevSize ); + assert( size>=2 ); + if( size <= MX_SMALL ){ + memsys3LinkIntoList(i, &mem3.aiSmall[size-2]); + }else{ + hash = size % N_HASH; + memsys3LinkIntoList(i, &mem3.aiHash[hash]); + } +} + +/* +** If the STATIC_MEM mutex is not already held, obtain it now. The mutex +** will already be held (obtained by code in malloc.c) if +** sqlite3GlobalConfig.bMemStat is true. +*/ +static void memsys3Enter(void){ + if( sqlite3GlobalConfig.bMemstat==0 && mem3.mutex==0 ){ + mem3.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM); + } + sqlite3_mutex_enter(mem3.mutex); +} +static void memsys3Leave(void){ + sqlite3_mutex_leave(mem3.mutex); +} + +/* +** Called when we are unable to satisfy an allocation of nBytes. +*/ +static void memsys3OutOfMemory(int nByte){ + if( !mem3.alarmBusy ){ + mem3.alarmBusy = 1; + assert( sqlite3_mutex_held(mem3.mutex) ); + sqlite3_mutex_leave(mem3.mutex); + sqlite3_release_memory(nByte); + sqlite3_mutex_enter(mem3.mutex); + mem3.alarmBusy = 0; + } +} + + +/* +** Chunk i is a free chunk that has been unlinked. Adjust its +** size parameters for check-out and return a pointer to the +** user portion of the chunk. +*/ +static void *memsys3Checkout(u32 i, u32 nBlock){ + u32 x; + assert( sqlite3_mutex_held(mem3.mutex) ); + assert( i>=1 ); + assert( mem3.aPool[i-1].u.hdr.size4x/4==nBlock ); + assert( mem3.aPool[i+nBlock-1].u.hdr.prevSize==nBlock ); + x = mem3.aPool[i-1].u.hdr.size4x; + mem3.aPool[i-1].u.hdr.size4x = nBlock*4 | 1 | (x&2); + mem3.aPool[i+nBlock-1].u.hdr.prevSize = nBlock; + mem3.aPool[i+nBlock-1].u.hdr.size4x |= 2; + return &mem3.aPool[i]; +} + +/* +** Carve a piece off of the end of the mem3.iKeyBlk free chunk. +** Return a pointer to the new allocation. Or, if the key chunk +** is not large enough, return 0. +*/ +static void *memsys3FromKeyBlk(u32 nBlock){ + assert( sqlite3_mutex_held(mem3.mutex) ); + assert( mem3.szKeyBlk>=nBlock ); + if( nBlock>=mem3.szKeyBlk-1 ){ + /* Use the entire key chunk */ + void *p = memsys3Checkout(mem3.iKeyBlk, mem3.szKeyBlk); + mem3.iKeyBlk = 0; + mem3.szKeyBlk = 0; + mem3.mnKeyBlk = 0; + return p; + }else{ + /* Split the key block. Return the tail. */ + u32 newi, x; + newi = mem3.iKeyBlk + mem3.szKeyBlk - nBlock; + assert( newi > mem3.iKeyBlk+1 ); + mem3.aPool[mem3.iKeyBlk+mem3.szKeyBlk-1].u.hdr.prevSize = nBlock; + mem3.aPool[mem3.iKeyBlk+mem3.szKeyBlk-1].u.hdr.size4x |= 2; + mem3.aPool[newi-1].u.hdr.size4x = nBlock*4 + 1; + mem3.szKeyBlk -= nBlock; + mem3.aPool[newi-1].u.hdr.prevSize = mem3.szKeyBlk; + x = mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x & 2; + mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x = mem3.szKeyBlk*4 | x; + if( mem3.szKeyBlk < mem3.mnKeyBlk ){ + mem3.mnKeyBlk = mem3.szKeyBlk; + } + return (void*)&mem3.aPool[newi]; + } +} + +/* +** *pRoot is the head of a list of free chunks of the same size +** or same size hash. In other words, *pRoot is an entry in either +** mem3.aiSmall[] or mem3.aiHash[]. +** +** This routine examines all entries on the given list and tries +** to coalesce each entries with adjacent free chunks. +** +** If it sees a chunk that is larger than mem3.iKeyBlk, it replaces +** the current mem3.iKeyBlk with the new larger chunk. In order for +** this mem3.iKeyBlk replacement to work, the key chunk must be +** linked into the hash tables. That is not the normal state of +** affairs, of course. The calling routine must link the key +** chunk before invoking this routine, then must unlink the (possibly +** changed) key chunk once this routine has finished. +*/ +static void memsys3Merge(u32 *pRoot){ + u32 iNext, prev, size, i, x; + + assert( sqlite3_mutex_held(mem3.mutex) ); + for(i=*pRoot; i>0; i=iNext){ + iNext = mem3.aPool[i].u.list.next; + size = mem3.aPool[i-1].u.hdr.size4x; + assert( (size&1)==0 ); + if( (size&2)==0 ){ + memsys3UnlinkFromList(i, pRoot); + assert( i > mem3.aPool[i-1].u.hdr.prevSize ); + prev = i - mem3.aPool[i-1].u.hdr.prevSize; + if( prev==iNext ){ + iNext = mem3.aPool[prev].u.list.next; + } + memsys3Unlink(prev); + size = i + size/4 - prev; + x = mem3.aPool[prev-1].u.hdr.size4x & 2; + mem3.aPool[prev-1].u.hdr.size4x = size*4 | x; + mem3.aPool[prev+size-1].u.hdr.prevSize = size; + memsys3Link(prev); + i = prev; + }else{ + size /= 4; + } + if( size>mem3.szKeyBlk ){ + mem3.iKeyBlk = i; + mem3.szKeyBlk = size; + } + } +} + +/* +** Return a block of memory of at least nBytes in size. +** Return NULL if unable. +** +** This function assumes that the necessary mutexes, if any, are +** already held by the caller. Hence "Unsafe". +*/ +static void *memsys3MallocUnsafe(int nByte){ + u32 i; + u32 nBlock; + u32 toFree; + + assert( sqlite3_mutex_held(mem3.mutex) ); + assert( sizeof(Mem3Block)==8 ); + if( nByte<=12 ){ + nBlock = 2; + }else{ + nBlock = (nByte + 11)/8; + } + assert( nBlock>=2 ); + + /* STEP 1: + ** Look for an entry of the correct size in either the small + ** chunk table or in the large chunk hash table. This is + ** successful most of the time (about 9 times out of 10). + */ + if( nBlock <= MX_SMALL ){ + i = mem3.aiSmall[nBlock-2]; + if( i>0 ){ + memsys3UnlinkFromList(i, &mem3.aiSmall[nBlock-2]); + return memsys3Checkout(i, nBlock); + } + }else{ + int hash = nBlock % N_HASH; + for(i=mem3.aiHash[hash]; i>0; i=mem3.aPool[i].u.list.next){ + if( mem3.aPool[i-1].u.hdr.size4x/4==nBlock ){ + memsys3UnlinkFromList(i, &mem3.aiHash[hash]); + return memsys3Checkout(i, nBlock); + } + } + } + + /* STEP 2: + ** Try to satisfy the allocation by carving a piece off of the end + ** of the key chunk. This step usually works if step 1 fails. + */ + if( mem3.szKeyBlk>=nBlock ){ + return memsys3FromKeyBlk(nBlock); + } + + + /* STEP 3: + ** Loop through the entire memory pool. Coalesce adjacent free + ** chunks. Recompute the key chunk as the largest free chunk. + ** Then try again to satisfy the allocation by carving a piece off + ** of the end of the key chunk. This step happens very + ** rarely (we hope!) + */ + for(toFree=nBlock*16; toFree<(mem3.nPool*16); toFree *= 2){ + memsys3OutOfMemory(toFree); + if( mem3.iKeyBlk ){ + memsys3Link(mem3.iKeyBlk); + mem3.iKeyBlk = 0; + mem3.szKeyBlk = 0; + } + for(i=0; i=nBlock ){ + return memsys3FromKeyBlk(nBlock); + } + } + } + + /* If none of the above worked, then we fail. */ + return 0; +} + +/* +** Free an outstanding memory allocation. +** +** This function assumes that the necessary mutexes, if any, are +** already held by the caller. Hence "Unsafe". +*/ +static void memsys3FreeUnsafe(void *pOld){ + Mem3Block *p = (Mem3Block*)pOld; + int i; + u32 size, x; + assert( sqlite3_mutex_held(mem3.mutex) ); + assert( p>mem3.aPool && p<&mem3.aPool[mem3.nPool] ); + i = p - mem3.aPool; + assert( (mem3.aPool[i-1].u.hdr.size4x&1)==1 ); + size = mem3.aPool[i-1].u.hdr.size4x/4; + assert( i+size<=mem3.nPool+1 ); + mem3.aPool[i-1].u.hdr.size4x &= ~1; + mem3.aPool[i+size-1].u.hdr.prevSize = size; + mem3.aPool[i+size-1].u.hdr.size4x &= ~2; + memsys3Link(i); + + /* Try to expand the key using the newly freed chunk */ + if( mem3.iKeyBlk ){ + while( (mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x&2)==0 ){ + size = mem3.aPool[mem3.iKeyBlk-1].u.hdr.prevSize; + mem3.iKeyBlk -= size; + mem3.szKeyBlk += size; + memsys3Unlink(mem3.iKeyBlk); + x = mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x & 2; + mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x = mem3.szKeyBlk*4 | x; + mem3.aPool[mem3.iKeyBlk+mem3.szKeyBlk-1].u.hdr.prevSize = mem3.szKeyBlk; + } + x = mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x & 2; + while( (mem3.aPool[mem3.iKeyBlk+mem3.szKeyBlk-1].u.hdr.size4x&1)==0 ){ + memsys3Unlink(mem3.iKeyBlk+mem3.szKeyBlk); + mem3.szKeyBlk += mem3.aPool[mem3.iKeyBlk+mem3.szKeyBlk-1].u.hdr.size4x/4; + mem3.aPool[mem3.iKeyBlk-1].u.hdr.size4x = mem3.szKeyBlk*4 | x; + mem3.aPool[mem3.iKeyBlk+mem3.szKeyBlk-1].u.hdr.prevSize = mem3.szKeyBlk; + } + } +} + +/* +** Return the size of an outstanding allocation, in bytes. The +** size returned omits the 8-byte header overhead. This only +** works for chunks that are currently checked out. +*/ +static int memsys3Size(void *p){ + Mem3Block *pBlock; + assert( p!=0 ); + pBlock = (Mem3Block*)p; + assert( (pBlock[-1].u.hdr.size4x&1)!=0 ); + return (pBlock[-1].u.hdr.size4x&~3)*2 - 4; +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int memsys3Roundup(int n){ + if( n<=12 ){ + return 12; + }else{ + return ((n+11)&~7) - 4; + } +} + +/* +** Allocate nBytes of memory. +*/ +static void *memsys3Malloc(int nBytes){ + sqlite3_int64 *p; + assert( nBytes>0 ); /* malloc.c filters out 0 byte requests */ + memsys3Enter(); + p = memsys3MallocUnsafe(nBytes); + memsys3Leave(); + return (void*)p; +} + +/* +** Free memory. +*/ +static void memsys3Free(void *pPrior){ + assert( pPrior ); + memsys3Enter(); + memsys3FreeUnsafe(pPrior); + memsys3Leave(); +} + +/* +** Change the size of an existing memory allocation +*/ +static void *memsys3Realloc(void *pPrior, int nBytes){ + int nOld; + void *p; + if( pPrior==0 ){ + return sqlite3_malloc(nBytes); + } + if( nBytes<=0 ){ + sqlite3_free(pPrior); + return 0; + } + nOld = memsys3Size(pPrior); + if( nBytes<=nOld && nBytes>=nOld-128 ){ + return pPrior; + } + memsys3Enter(); + p = memsys3MallocUnsafe(nBytes); + if( p ){ + if( nOld>1)!=(size&1) ){ + fprintf(out, "%p tail checkout bit is incorrect\n", &mem3.aPool[i]); + assert( 0 ); + break; + } + if( size&1 ){ + fprintf(out, "%p %6d bytes checked out\n", &mem3.aPool[i], (size/4)*8-8); + }else{ + fprintf(out, "%p %6d bytes free%s\n", &mem3.aPool[i], (size/4)*8-8, + i==mem3.iKeyBlk ? " **key**" : ""); + } + } + for(i=0; i0; j=mem3.aPool[j].u.list.next){ + fprintf(out, " %p(%d)", &mem3.aPool[j], + (mem3.aPool[j-1].u.hdr.size4x/4)*8-8); + } + fprintf(out, "\n"); + } + for(i=0; i0; j=mem3.aPool[j].u.list.next){ + fprintf(out, " %p(%d)", &mem3.aPool[j], + (mem3.aPool[j-1].u.hdr.size4x/4)*8-8); + } + fprintf(out, "\n"); + } + fprintf(out, "key=%d\n", mem3.iKeyBlk); + fprintf(out, "nowUsed=%d\n", mem3.nPool*8 - mem3.szKeyBlk*8); + fprintf(out, "mxUsed=%d\n", mem3.nPool*8 - mem3.mnKeyBlk*8); + sqlite3_mutex_leave(mem3.mutex); + if( out==stdout ){ + fflush(stdout); + }else{ + fclose(out); + } +#else + UNUSED_PARAMETER(zFilename); +#endif +} + +/* +** This routine is the only routine in this file with external +** linkage. +** +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. The +** arguments specify the block of memory to manage. +** +** This routine is only called by sqlite3_config(), and therefore +** is not required to be threadsafe (it is not). +*/ +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys3(void){ + static const sqlite3_mem_methods mempoolMethods = { + memsys3Malloc, + memsys3Free, + memsys3Realloc, + memsys3Size, + memsys3Roundup, + memsys3Init, + memsys3Shutdown, + 0 + }; + return &mempoolMethods; +} + +#endif /* SQLITE_ENABLE_MEMSYS3 */ + +/************** End of mem3.c ************************************************/ +/************** Begin file mem5.c ********************************************/ +/* +** 2007 October 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement a memory +** allocation subsystem for use by SQLite. +** +** This version of the memory allocation subsystem omits all +** use of malloc(). The application gives SQLite a block of memory +** before calling sqlite3_initialize() from which allocations +** are made and returned by the xMalloc() and xRealloc() +** implementations. Once sqlite3_initialize() has been called, +** the amount of memory available to SQLite is fixed and cannot +** be changed. +** +** This version of the memory allocation subsystem is included +** in the build only if SQLITE_ENABLE_MEMSYS5 is defined. +** +** This memory allocator uses the following algorithm: +** +** 1. All memory allocation sizes are rounded up to a power of 2. +** +** 2. If two adjacent free blocks are the halves of a larger block, +** then the two blocks are coalesced into the single larger block. +** +** 3. New memory is allocated from the first available free block. +** +** This algorithm is described in: J. M. Robson. "Bounds for Some Functions +** Concerning Dynamic Storage Allocation". Journal of the Association for +** Computing Machinery, Volume 21, Number 8, July 1974, pages 491-499. +** +** Let n be the size of the largest allocation divided by the minimum +** allocation size (after rounding all sizes up to a power of 2.) Let M +** be the maximum amount of memory ever outstanding at one time. Let +** N be the total amount of memory available for allocation. Robson +** proved that this memory allocator will never breakdown due to +** fragmentation as long as the following constraint holds: +** +** N >= M*(1 + log2(n)/2) - n + 1 +** +** The sqlite3_status() logic tracks the maximum values of n and M so +** that an application can, at any time, verify this constraint. +*/ +/* #include "sqliteInt.h" */ + +/* +** This version of the memory allocator is used only when +** SQLITE_ENABLE_MEMSYS5 is defined. +*/ +#ifdef SQLITE_ENABLE_MEMSYS5 + +/* +** A minimum allocation is an instance of the following structure. +** Larger allocations are an array of these structures where the +** size of the array is a power of 2. +** +** The size of this object must be a power of two. That fact is +** verified in memsys5Init(). +*/ +typedef struct Mem5Link Mem5Link; +struct Mem5Link { + int next; /* Index of next free chunk */ + int prev; /* Index of previous free chunk */ +}; + +/* +** Maximum size of any allocation is ((1<=0 && i=0 && iLogsize<=LOGMAX ); + assert( (mem5.aCtrl[i] & CTRL_LOGSIZE)==iLogsize ); + + next = MEM5LINK(i)->next; + prev = MEM5LINK(i)->prev; + if( prev<0 ){ + mem5.aiFreelist[iLogsize] = next; + }else{ + MEM5LINK(prev)->next = next; + } + if( next>=0 ){ + MEM5LINK(next)->prev = prev; + } +} + +/* +** Link the chunk at mem5.aPool[i] so that is on the iLogsize +** free list. +*/ +static void memsys5Link(int i, int iLogsize){ + int x; + assert( sqlite3_mutex_held(mem5.mutex) ); + assert( i>=0 && i=0 && iLogsize<=LOGMAX ); + assert( (mem5.aCtrl[i] & CTRL_LOGSIZE)==iLogsize ); + + x = MEM5LINK(i)->next = mem5.aiFreelist[iLogsize]; + MEM5LINK(i)->prev = -1; + if( x>=0 ){ + assert( xprev = i; + } + mem5.aiFreelist[iLogsize] = i; +} + +/* +** Obtain or release the mutex needed to access global data structures. +*/ +static void memsys5Enter(void){ + sqlite3_mutex_enter(mem5.mutex); +} +static void memsys5Leave(void){ + sqlite3_mutex_leave(mem5.mutex); +} + +/* +** Return the size of an outstanding allocation, in bytes. +** This only works for chunks that are currently checked out. +*/ +static int memsys5Size(void *p){ + int iSize, i; + assert( p!=0 ); + i = (int)(((u8 *)p-mem5.zPool)/mem5.szAtom); + assert( i>=0 && i0 ); + + /* No more than 1GiB per allocation */ + if( nByte > 0x40000000 ) return 0; + +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + /* Keep track of the maximum allocation request. Even unfulfilled + ** requests are counted */ + if( (u32)nByte>mem5.maxRequest ){ + mem5.maxRequest = nByte; + } +#endif + + + /* Round nByte up to the next valid power of two */ + for(iFullSz=mem5.szAtom,iLogsize=0; iFullSzLOGMAX ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes", nByte); + return 0; + } + i = mem5.aiFreelist[iBin]; + memsys5Unlink(i, iBin); + while( iBin>iLogsize ){ + int newSize; + + iBin--; + newSize = 1 << iBin; + mem5.aCtrl[i+newSize] = CTRL_FREE | iBin; + memsys5Link(i+newSize, iBin); + } + mem5.aCtrl[i] = iLogsize; + +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + /* Update allocator performance statistics. */ + mem5.nAlloc++; + mem5.totalAlloc += iFullSz; + mem5.totalExcess += iFullSz - nByte; + mem5.currentCount++; + mem5.currentOut += iFullSz; + if( mem5.maxCount=0 && iBlock0 ); + assert( mem5.currentOut>=(size*mem5.szAtom) ); + mem5.currentCount--; + mem5.currentOut -= size*mem5.szAtom; + assert( mem5.currentOut>0 || mem5.currentCount==0 ); + assert( mem5.currentCount>0 || mem5.currentOut==0 ); +#endif + + mem5.aCtrl[iBlock] = CTRL_FREE | iLogsize; + while( ALWAYS(iLogsize>iLogsize) & 1 ){ + iBuddy = iBlock - size; + assert( iBuddy>=0 ); + }else{ + iBuddy = iBlock + size; + if( iBuddy>=mem5.nBlock ) break; + } + if( mem5.aCtrl[iBuddy]!=(CTRL_FREE | iLogsize) ) break; + memsys5Unlink(iBuddy, iLogsize); + iLogsize++; + if( iBuddy0 ){ + memsys5Enter(); + p = memsys5MallocUnsafe(nBytes); + memsys5Leave(); + } + return (void*)p; +} + +/* +** Free memory. +** +** The outer layer memory allocator prevents this routine from +** being called with pPrior==0. +*/ +static void memsys5Free(void *pPrior){ + assert( pPrior!=0 ); + memsys5Enter(); + memsys5FreeUnsafe(pPrior); + memsys5Leave(); +} + +/* +** Change the size of an existing memory allocation. +** +** The outer layer memory allocator prevents this routine from +** being called with pPrior==0. +** +** nBytes is always a value obtained from a prior call to +** memsys5Round(). Hence nBytes is always a non-negative power +** of two. If nBytes==0 that means that an oversize allocation +** (an allocation larger than 0x40000000) was requested and this +** routine should return 0 without freeing pPrior. +*/ +static void *memsys5Realloc(void *pPrior, int nBytes){ + int nOld; + void *p; + assert( pPrior!=0 ); + assert( (nBytes&(nBytes-1))==0 ); /* EV: R-46199-30249 */ + assert( nBytes>=0 ); + if( nBytes==0 ){ + return 0; + } + nOld = memsys5Size(pPrior); + if( nBytes<=nOld ){ + return pPrior; + } + p = memsys5Malloc(nBytes); + if( p ){ + memcpy(p, pPrior, nOld); + memsys5Free(pPrior); + } + return p; +} + +/* +** Round up a request size to the next valid allocation size. If +** the allocation is too large to be handled by this allocation system, +** return 0. +** +** All allocations must be a power of two and must be expressed by a +** 32-bit signed integer. Hence the largest allocation is 0x40000000 +** or 1073741824 bytes. +*/ +static int memsys5Roundup(int n){ + int iFullSz; + if( n > 0x40000000 ) return 0; + for(iFullSz=mem5.szAtom; iFullSz 0 +** memsys5Log(2) -> 1 +** memsys5Log(4) -> 2 +** memsys5Log(5) -> 3 +** memsys5Log(8) -> 3 +** memsys5Log(9) -> 4 +*/ +static int memsys5Log(int iValue){ + int iLog; + for(iLog=0; (iLog<(int)((sizeof(int)*8)-1)) && (1<mem5.szAtom ){ + mem5.szAtom = mem5.szAtom << 1; + } + + mem5.nBlock = (nByte / (mem5.szAtom+sizeof(u8))); + mem5.zPool = zByte; + mem5.aCtrl = (u8 *)&mem5.zPool[mem5.nBlock*mem5.szAtom]; + + for(ii=0; ii<=LOGMAX; ii++){ + mem5.aiFreelist[ii] = -1; + } + + iOffset = 0; + for(ii=LOGMAX; ii>=0; ii--){ + int nAlloc = (1<mem5.nBlock); + } + + /* If a mutex is required for normal operation, allocate one */ + if( sqlite3GlobalConfig.bMemstat==0 ){ + mem5.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM); + } + + return SQLITE_OK; +} + +/* +** Deinitialize this module. +*/ +static void memsys5Shutdown(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + mem5.mutex = 0; + return; +} + +#ifdef SQLITE_TEST +/* +** Open the file indicated and write a log of all unfreed memory +** allocations into that log. +*/ +SQLITE_PRIVATE void sqlite3Memsys5Dump(const char *zFilename){ + FILE *out; + int i, j, n; + int nMinLog; + + if( zFilename==0 || zFilename[0]==0 ){ + out = stdout; + }else{ + out = fopen(zFilename, "w"); + if( out==0 ){ + fprintf(stderr, "** Unable to output memory debug output log: %s **\n", + zFilename); + return; + } + } + memsys5Enter(); + nMinLog = memsys5Log(mem5.szAtom); + for(i=0; i<=LOGMAX && i+nMinLog<32; i++){ + for(n=0, j=mem5.aiFreelist[i]; j>=0; j = MEM5LINK(j)->next, n++){} + fprintf(out, "freelist items of size %d: %d\n", mem5.szAtom << i, n); + } + fprintf(out, "mem5.nAlloc = %llu\n", mem5.nAlloc); + fprintf(out, "mem5.totalAlloc = %llu\n", mem5.totalAlloc); + fprintf(out, "mem5.totalExcess = %llu\n", mem5.totalExcess); + fprintf(out, "mem5.currentOut = %u\n", mem5.currentOut); + fprintf(out, "mem5.currentCount = %u\n", mem5.currentCount); + fprintf(out, "mem5.maxOut = %u\n", mem5.maxOut); + fprintf(out, "mem5.maxCount = %u\n", mem5.maxCount); + fprintf(out, "mem5.maxRequest = %u\n", mem5.maxRequest); + memsys5Leave(); + if( out==stdout ){ + fflush(stdout); + }else{ + fclose(out); + } +} +#endif + +/* +** This routine is the only routine in this file with external +** linkage. It returns a pointer to a static sqlite3_mem_methods +** struct populated with the memsys5 methods. +*/ +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys5(void){ + static const sqlite3_mem_methods memsys5Methods = { + memsys5Malloc, + memsys5Free, + memsys5Realloc, + memsys5Size, + memsys5Roundup, + memsys5Init, + memsys5Shutdown, + 0 + }; + return &memsys5Methods; +} + +#endif /* SQLITE_ENABLE_MEMSYS5 */ + +/************** End of mem5.c ************************************************/ +/************** Begin file mutex.c *******************************************/ +/* +** 2007 August 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes. +** +** This file contains code that is common across all mutex implementations. +*/ +/* #include "sqliteInt.h" */ + +#if defined(SQLITE_DEBUG) && !defined(SQLITE_MUTEX_OMIT) +/* +** For debugging purposes, record when the mutex subsystem is initialized +** and uninitialized so that we can assert() if there is an attempt to +** allocate a mutex while the system is uninitialized. +*/ +static SQLITE_WSD int mutexIsInit = 0; +#endif /* SQLITE_DEBUG && !defined(SQLITE_MUTEX_OMIT) */ + + +#ifndef SQLITE_MUTEX_OMIT + +#ifdef SQLITE_ENABLE_MULTITHREADED_CHECKS +/* +** This block (enclosed by SQLITE_ENABLE_MULTITHREADED_CHECKS) contains +** the implementation of a wrapper around the system default mutex +** implementation (sqlite3DefaultMutex()). +** +** Most calls are passed directly through to the underlying default +** mutex implementation. Except, if a mutex is configured by calling +** sqlite3MutexWarnOnContention() on it, then if contention is ever +** encountered within xMutexEnter() a warning is emitted via sqlite3_log(). +** +** This type of mutex is used as the database handle mutex when testing +** apps that usually use SQLITE_CONFIG_MULTITHREAD mode. +*/ + +/* +** Type for all mutexes used when SQLITE_ENABLE_MULTITHREADED_CHECKS +** is defined. Variable CheckMutex.mutex is a pointer to the real mutex +** allocated by the system mutex implementation. Variable iType is usually set +** to the type of mutex requested - SQLITE_MUTEX_RECURSIVE, SQLITE_MUTEX_FAST +** or one of the static mutex identifiers. Or, if this is a recursive mutex +** that has been configured using sqlite3MutexWarnOnContention(), it is +** set to SQLITE_MUTEX_WARNONCONTENTION. +*/ +typedef struct CheckMutex CheckMutex; +struct CheckMutex { + int iType; + sqlite3_mutex *mutex; +}; + +#define SQLITE_MUTEX_WARNONCONTENTION (-1) + +/* +** Pointer to real mutex methods object used by the CheckMutex +** implementation. Set by checkMutexInit(). +*/ +static SQLITE_WSD const sqlite3_mutex_methods *pGlobalMutexMethods; + +#ifdef SQLITE_DEBUG +static int checkMutexHeld(sqlite3_mutex *p){ + return pGlobalMutexMethods->xMutexHeld(((CheckMutex*)p)->mutex); +} +static int checkMutexNotheld(sqlite3_mutex *p){ + return pGlobalMutexMethods->xMutexNotheld(((CheckMutex*)p)->mutex); +} +#endif + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static int checkMutexInit(void){ + pGlobalMutexMethods = sqlite3DefaultMutex(); + return SQLITE_OK; +} +static int checkMutexEnd(void){ + pGlobalMutexMethods = 0; + return SQLITE_OK; +} + +/* +** Allocate a mutex. +*/ +static sqlite3_mutex *checkMutexAlloc(int iType){ + static CheckMutex staticMutexes[] = { + {2, 0}, {3, 0}, {4, 0}, {5, 0}, + {6, 0}, {7, 0}, {8, 0}, {9, 0}, + {10, 0}, {11, 0}, {12, 0}, {13, 0} + }; + CheckMutex *p = 0; + + assert( SQLITE_MUTEX_RECURSIVE==1 && SQLITE_MUTEX_FAST==0 ); + if( iType<2 ){ + p = sqlite3MallocZero(sizeof(CheckMutex)); + if( p==0 ) return 0; + p->iType = iType; + }else{ +#ifdef SQLITE_ENABLE_API_ARMOR + if( iType-2>=ArraySize(staticMutexes) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + p = &staticMutexes[iType-2]; + } + + if( p->mutex==0 ){ + p->mutex = pGlobalMutexMethods->xMutexAlloc(iType); + if( p->mutex==0 ){ + if( iType<2 ){ + sqlite3_free(p); + } + p = 0; + } + } + + return (sqlite3_mutex*)p; +} + +/* +** Free a mutex. +*/ +static void checkMutexFree(sqlite3_mutex *p){ + assert( SQLITE_MUTEX_RECURSIVE<2 ); + assert( SQLITE_MUTEX_FAST<2 ); + assert( SQLITE_MUTEX_WARNONCONTENTION<2 ); + +#if SQLITE_ENABLE_API_ARMOR + if( ((CheckMutex*)p)->iType<2 ) +#endif + { + CheckMutex *pCheck = (CheckMutex*)p; + pGlobalMutexMethods->xMutexFree(pCheck->mutex); + sqlite3_free(pCheck); + } +#ifdef SQLITE_ENABLE_API_ARMOR + else{ + (void)SQLITE_MISUSE_BKPT; + } +#endif +} + +/* +** Enter the mutex. +*/ +static void checkMutexEnter(sqlite3_mutex *p){ + CheckMutex *pCheck = (CheckMutex*)p; + if( pCheck->iType==SQLITE_MUTEX_WARNONCONTENTION ){ + if( SQLITE_OK==pGlobalMutexMethods->xMutexTry(pCheck->mutex) ){ + return; + } + sqlite3_log(SQLITE_MISUSE, + "illegal multi-threaded access to database connection" + ); + } + pGlobalMutexMethods->xMutexEnter(pCheck->mutex); +} + +/* +** Enter the mutex (do not block). +*/ +static int checkMutexTry(sqlite3_mutex *p){ + CheckMutex *pCheck = (CheckMutex*)p; + return pGlobalMutexMethods->xMutexTry(pCheck->mutex); +} + +/* +** Leave the mutex. +*/ +static void checkMutexLeave(sqlite3_mutex *p){ + CheckMutex *pCheck = (CheckMutex*)p; + pGlobalMutexMethods->xMutexLeave(pCheck->mutex); +} + +sqlite3_mutex_methods const *multiThreadedCheckMutex(void){ + static const sqlite3_mutex_methods sMutex = { + checkMutexInit, + checkMutexEnd, + checkMutexAlloc, + checkMutexFree, + checkMutexEnter, + checkMutexTry, + checkMutexLeave, +#ifdef SQLITE_DEBUG + checkMutexHeld, + checkMutexNotheld +#else + 0, + 0 +#endif + }; + return &sMutex; +} + +/* +** Mark the SQLITE_MUTEX_RECURSIVE mutex passed as the only argument as +** one on which there should be no contention. +*/ +SQLITE_PRIVATE void sqlite3MutexWarnOnContention(sqlite3_mutex *p){ + if( sqlite3GlobalConfig.mutex.xMutexAlloc==checkMutexAlloc ){ + CheckMutex *pCheck = (CheckMutex*)p; + assert( pCheck->iType==SQLITE_MUTEX_RECURSIVE ); + pCheck->iType = SQLITE_MUTEX_WARNONCONTENTION; + } +} +#endif /* ifdef SQLITE_ENABLE_MULTITHREADED_CHECKS */ + +/* +** Initialize the mutex system. +*/ +SQLITE_PRIVATE int sqlite3MutexInit(void){ + int rc = SQLITE_OK; + if( !sqlite3GlobalConfig.mutex.xMutexAlloc ){ + /* If the xMutexAlloc method has not been set, then the user did not + ** install a mutex implementation via sqlite3_config() prior to + ** sqlite3_initialize() being called. This block copies pointers to + ** the default implementation into the sqlite3GlobalConfig structure. + */ + sqlite3_mutex_methods const *pFrom; + sqlite3_mutex_methods *pTo = &sqlite3GlobalConfig.mutex; + + if( sqlite3GlobalConfig.bCoreMutex ){ +#ifdef SQLITE_ENABLE_MULTITHREADED_CHECKS + pFrom = multiThreadedCheckMutex(); +#else + pFrom = sqlite3DefaultMutex(); +#endif + }else{ + pFrom = sqlite3NoopMutex(); + } + pTo->xMutexInit = pFrom->xMutexInit; + pTo->xMutexEnd = pFrom->xMutexEnd; + pTo->xMutexFree = pFrom->xMutexFree; + pTo->xMutexEnter = pFrom->xMutexEnter; + pTo->xMutexTry = pFrom->xMutexTry; + pTo->xMutexLeave = pFrom->xMutexLeave; + pTo->xMutexHeld = pFrom->xMutexHeld; + pTo->xMutexNotheld = pFrom->xMutexNotheld; + sqlite3MemoryBarrier(); + pTo->xMutexAlloc = pFrom->xMutexAlloc; + } + assert( sqlite3GlobalConfig.mutex.xMutexInit ); + rc = sqlite3GlobalConfig.mutex.xMutexInit(); + +#ifdef SQLITE_DEBUG + GLOBAL(int, mutexIsInit) = 1; +#endif + + sqlite3MemoryBarrier(); + return rc; +} + +/* +** Shutdown the mutex system. This call frees resources allocated by +** sqlite3MutexInit(). +*/ +SQLITE_PRIVATE int sqlite3MutexEnd(void){ + int rc = SQLITE_OK; + if( sqlite3GlobalConfig.mutex.xMutexEnd ){ + rc = sqlite3GlobalConfig.mutex.xMutexEnd(); + } + +#ifdef SQLITE_DEBUG + GLOBAL(int, mutexIsInit) = 0; +#endif + + return rc; +} + +/* +** Retrieve a pointer to a static mutex or allocate a new dynamic one. +*/ +SQLITE_API sqlite3_mutex *sqlite3_mutex_alloc(int id){ +#ifndef SQLITE_OMIT_AUTOINIT + if( id<=SQLITE_MUTEX_RECURSIVE && sqlite3_initialize() ) return 0; + if( id>SQLITE_MUTEX_RECURSIVE && sqlite3MutexInit() ) return 0; +#endif + assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); + return sqlite3GlobalConfig.mutex.xMutexAlloc(id); +} + +SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int id){ + if( !sqlite3GlobalConfig.bCoreMutex ){ + return 0; + } + assert( GLOBAL(int, mutexIsInit) ); + assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); + return sqlite3GlobalConfig.mutex.xMutexAlloc(id); +} + +/* +** Free a dynamic mutex. +*/ +SQLITE_API void sqlite3_mutex_free(sqlite3_mutex *p){ + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexFree ); + sqlite3GlobalConfig.mutex.xMutexFree(p); + } +} + +/* +** Obtain the mutex p. If some other thread already has the mutex, block +** until it can be obtained. +*/ +SQLITE_API void sqlite3_mutex_enter(sqlite3_mutex *p){ + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexEnter ); + sqlite3GlobalConfig.mutex.xMutexEnter(p); + } +} + +/* +** Obtain the mutex p. If successful, return SQLITE_OK. Otherwise, if another +** thread holds the mutex and it cannot be obtained, return SQLITE_BUSY. +*/ +SQLITE_API int sqlite3_mutex_try(sqlite3_mutex *p){ + int rc = SQLITE_OK; + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexTry ); + return sqlite3GlobalConfig.mutex.xMutexTry(p); + } + return rc; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was previously +** entered by the same thread. The behavior is undefined if the mutex +** is not currently entered. If a NULL pointer is passed as an argument +** this function is a no-op. +*/ +SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex *p){ + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexLeave ); + sqlite3GlobalConfig.mutex.xMutexLeave(p); + } +} + +#ifndef NDEBUG +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use inside assert() statements. +*/ +SQLITE_API int sqlite3_mutex_held(sqlite3_mutex *p){ + assert( p==0 || sqlite3GlobalConfig.mutex.xMutexHeld ); + return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p); +} +SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex *p){ + assert( p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld ); + return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p); +} +#endif + +#endif /* !defined(SQLITE_MUTEX_OMIT) */ + +/************** End of mutex.c ***********************************************/ +/************** Begin file mutex_noop.c **************************************/ +/* +** 2008 October 07 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes. +** +** This implementation in this file does not provide any mutual +** exclusion and is thus suitable for use only in applications +** that use SQLite in a single thread. The routines defined +** here are place-holders. Applications can substitute working +** mutex routines at start-time using the +** +** sqlite3_config(SQLITE_CONFIG_MUTEX,...) +** +** interface. +** +** If compiled with SQLITE_DEBUG, then additional logic is inserted +** that does error checking on mutexes to make sure they are being +** called correctly. +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_MUTEX_OMIT + +#ifndef SQLITE_DEBUG +/* +** Stub routines for all mutex methods. +** +** This routines provide no mutual exclusion or error checking. +*/ +static int noopMutexInit(void){ return SQLITE_OK; } +static int noopMutexEnd(void){ return SQLITE_OK; } +static sqlite3_mutex *noopMutexAlloc(int id){ + UNUSED_PARAMETER(id); + return (sqlite3_mutex*)8; +} +static void noopMutexFree(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } +static void noopMutexEnter(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } +static int noopMutexTry(sqlite3_mutex *p){ + UNUSED_PARAMETER(p); + return SQLITE_OK; +} +static void noopMutexLeave(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){ + static const sqlite3_mutex_methods sMutex = { + noopMutexInit, + noopMutexEnd, + noopMutexAlloc, + noopMutexFree, + noopMutexEnter, + noopMutexTry, + noopMutexLeave, + + 0, + 0, + }; + + return &sMutex; +} +#endif /* !SQLITE_DEBUG */ + +#ifdef SQLITE_DEBUG +/* +** In this implementation, error checking is provided for testing +** and debugging purposes. The mutexes still do not provide any +** mutual exclusion. +*/ + +/* +** The mutex object +*/ +typedef struct sqlite3_debug_mutex { + int id; /* The mutex type */ + int cnt; /* Number of entries without a matching leave */ +} sqlite3_debug_mutex; + +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use inside assert() statements. +*/ +static int debugMutexHeld(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + return p==0 || p->cnt>0; +} +static int debugMutexNotheld(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + return p==0 || p->cnt==0; +} + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static int debugMutexInit(void){ return SQLITE_OK; } +static int debugMutexEnd(void){ return SQLITE_OK; } + +/* +** The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. If it returns NULL +** that means that a mutex could not be allocated. +*/ +static sqlite3_mutex *debugMutexAlloc(int id){ + static sqlite3_debug_mutex aStatic[SQLITE_MUTEX_STATIC_VFS3 - 1]; + sqlite3_debug_mutex *pNew = 0; + switch( id ){ + case SQLITE_MUTEX_FAST: + case SQLITE_MUTEX_RECURSIVE: { + pNew = sqlite3Malloc(sizeof(*pNew)); + if( pNew ){ + pNew->id = id; + pNew->cnt = 0; + } + break; + } + default: { +#ifdef SQLITE_ENABLE_API_ARMOR + if( id-2<0 || id-2>=ArraySize(aStatic) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + pNew = &aStatic[id-2]; + pNew->id = id; + break; + } + } + return (sqlite3_mutex*)pNew; +} + +/* +** This routine deallocates a previously allocated mutex. +*/ +static void debugMutexFree(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( p->cnt==0 ); + if( p->id==SQLITE_MUTEX_RECURSIVE || p->id==SQLITE_MUTEX_FAST ){ + sqlite3_free(p); + }else{ +#ifdef SQLITE_ENABLE_API_ARMOR + (void)SQLITE_MISUSE_BKPT; +#endif + } +} + +/* +** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK +** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can +** be entered multiple times by the same thread. In such cases the, +** mutex must be exited an equal number of times before another thread +** can enter. If the same thread tries to enter any other kind of mutex +** more than once, the behavior is undefined. +*/ +static void debugMutexEnter(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); + p->cnt++; +} +static int debugMutexTry(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); + p->cnt++; + return SQLITE_OK; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered or +** is not currently allocated. SQLite will never do either. +*/ +static void debugMutexLeave(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( debugMutexHeld(pX) ); + p->cnt--; + assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); +} + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){ + static const sqlite3_mutex_methods sMutex = { + debugMutexInit, + debugMutexEnd, + debugMutexAlloc, + debugMutexFree, + debugMutexEnter, + debugMutexTry, + debugMutexLeave, + + debugMutexHeld, + debugMutexNotheld + }; + + return &sMutex; +} +#endif /* SQLITE_DEBUG */ + +/* +** If compiled with SQLITE_MUTEX_NOOP, then the no-op mutex implementation +** is used regardless of the run-time threadsafety setting. +*/ +#ifdef SQLITE_MUTEX_NOOP +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ + return sqlite3NoopMutex(); +} +#endif /* defined(SQLITE_MUTEX_NOOP) */ +#endif /* !defined(SQLITE_MUTEX_OMIT) */ + +/************** End of mutex_noop.c ******************************************/ +/************** Begin file mutex_unix.c **************************************/ +/* +** 2007 August 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes for pthreads +*/ +/* #include "sqliteInt.h" */ + +/* +** The code in this file is only used if we are compiling threadsafe +** under unix with pthreads. +** +** Note that this implementation requires a version of pthreads that +** supports recursive mutexes. +*/ +#ifdef SQLITE_MUTEX_PTHREADS + +#include + +/* +** The sqlite3_mutex.id, sqlite3_mutex.nRef, and sqlite3_mutex.owner fields +** are necessary under two condidtions: (1) Debug builds and (2) using +** home-grown mutexes. Encapsulate these conditions into a single #define. +*/ +#if defined(SQLITE_DEBUG) || defined(SQLITE_HOMEGROWN_RECURSIVE_MUTEX) +# define SQLITE_MUTEX_NREF 1 +#else +# define SQLITE_MUTEX_NREF 0 +#endif + +/* +** Each recursive mutex is an instance of the following structure. +*/ +struct sqlite3_mutex { + pthread_mutex_t mutex; /* Mutex controlling the lock */ +#if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) + int id; /* Mutex type */ +#endif +#if SQLITE_MUTEX_NREF + volatile int nRef; /* Number of entrances */ + volatile pthread_t owner; /* Thread that is within this mutex */ + int trace; /* True to trace changes */ +#endif +}; +#if SQLITE_MUTEX_NREF +# define SQLITE3_MUTEX_INITIALIZER(id) \ + {PTHREAD_MUTEX_INITIALIZER,id,0,(pthread_t)0,0} +#elif defined(SQLITE_ENABLE_API_ARMOR) +# define SQLITE3_MUTEX_INITIALIZER(id) { PTHREAD_MUTEX_INITIALIZER, id } +#else +#define SQLITE3_MUTEX_INITIALIZER(id) { PTHREAD_MUTEX_INITIALIZER } +#endif + +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use only inside assert() statements. On some platforms, +** there might be race conditions that can cause these routines to +** deliver incorrect results. In particular, if pthread_equal() is +** not an atomic operation, then these routines might delivery +** incorrect results. On most platforms, pthread_equal() is a +** comparison of two integers and is therefore atomic. But we are +** told that HPUX is not such a platform. If so, then these routines +** will not always work correctly on HPUX. +** +** On those platforms where pthread_equal() is not atomic, SQLite +** should be compiled without -DSQLITE_DEBUG and with -DNDEBUG to +** make sure no assert() statements are evaluated and hence these +** routines are never called. +*/ +#if !defined(NDEBUG) || defined(SQLITE_DEBUG) +static int pthreadMutexHeld(sqlite3_mutex *p){ + return (p->nRef!=0 && pthread_equal(p->owner, pthread_self())); +} +static int pthreadMutexNotheld(sqlite3_mutex *p){ + return p->nRef==0 || pthread_equal(p->owner, pthread_self())==0; +} +#endif + +/* +** Try to provide a memory barrier operation, needed for initialization +** and also for the implementation of xShmBarrier in the VFS in cases +** where SQLite is compiled without mutexes. +*/ +SQLITE_PRIVATE void sqlite3MemoryBarrier(void){ +#if defined(SQLITE_MEMORY_BARRIER) + SQLITE_MEMORY_BARRIER; +#elif defined(__GNUC__) && GCC_VERSION>=4001000 + __sync_synchronize(); +#endif +} + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static int pthreadMutexInit(void){ return SQLITE_OK; } +static int pthreadMutexEnd(void){ return SQLITE_OK; } + +/* +** The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. If it returns NULL +** that means that a mutex could not be allocated. SQLite +** will unwind its stack and return an error. The argument +** to sqlite3_mutex_alloc() is one of these integer constants: +** +**
      +**
    • SQLITE_MUTEX_FAST +**
    • SQLITE_MUTEX_RECURSIVE +**
    • SQLITE_MUTEX_STATIC_MAIN +**
    • SQLITE_MUTEX_STATIC_MEM +**
    • SQLITE_MUTEX_STATIC_OPEN +**
    • SQLITE_MUTEX_STATIC_PRNG +**
    • SQLITE_MUTEX_STATIC_LRU +**
    • SQLITE_MUTEX_STATIC_PMEM +**
    • SQLITE_MUTEX_STATIC_APP1 +**
    • SQLITE_MUTEX_STATIC_APP2 +**
    • SQLITE_MUTEX_STATIC_APP3 +**
    • SQLITE_MUTEX_STATIC_VFS1 +**
    • SQLITE_MUTEX_STATIC_VFS2 +**
    • SQLITE_MUTEX_STATIC_VFS3 +**
    +** +** The first two constants cause sqlite3_mutex_alloc() to create +** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE +** is used but not necessarily so when SQLITE_MUTEX_FAST is used. +** The mutex implementation does not need to make a distinction +** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does +** not want to. But SQLite will only request a recursive mutex in +** cases where it really needs one. If a faster non-recursive mutex +** implementation is available on the host platform, the mutex subsystem +** might return such a mutex in response to SQLITE_MUTEX_FAST. +** +** The other allowed parameters to sqlite3_mutex_alloc() each return +** a pointer to a static preexisting mutex. Six static mutexes are +** used by the current version of SQLite. Future versions of SQLite +** may add additional static mutexes. Static mutexes are for internal +** use by SQLite only. Applications that use SQLite mutexes should +** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or +** SQLITE_MUTEX_RECURSIVE. +** +** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST +** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() +** returns a different mutex on every call. But for the static +** mutex types, the same mutex is returned on every call that has +** the same type number. +*/ +static sqlite3_mutex *pthreadMutexAlloc(int iType){ + static sqlite3_mutex staticMutexes[] = { + SQLITE3_MUTEX_INITIALIZER(2), + SQLITE3_MUTEX_INITIALIZER(3), + SQLITE3_MUTEX_INITIALIZER(4), + SQLITE3_MUTEX_INITIALIZER(5), + SQLITE3_MUTEX_INITIALIZER(6), + SQLITE3_MUTEX_INITIALIZER(7), + SQLITE3_MUTEX_INITIALIZER(8), + SQLITE3_MUTEX_INITIALIZER(9), + SQLITE3_MUTEX_INITIALIZER(10), + SQLITE3_MUTEX_INITIALIZER(11), + SQLITE3_MUTEX_INITIALIZER(12), + SQLITE3_MUTEX_INITIALIZER(13) + }; + sqlite3_mutex *p; + switch( iType ){ + case SQLITE_MUTEX_RECURSIVE: { + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + /* If recursive mutexes are not available, we will have to + ** build our own. See below. */ + pthread_mutex_init(&p->mutex, 0); +#else + /* Use a recursive mutex if it is available */ + pthread_mutexattr_t recursiveAttr; + pthread_mutexattr_init(&recursiveAttr); + pthread_mutexattr_settype(&recursiveAttr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&p->mutex, &recursiveAttr); + pthread_mutexattr_destroy(&recursiveAttr); +#endif +#if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) + p->id = SQLITE_MUTEX_RECURSIVE; +#endif + } + break; + } + case SQLITE_MUTEX_FAST: { + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ + pthread_mutex_init(&p->mutex, 0); +#if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) + p->id = SQLITE_MUTEX_FAST; +#endif + } + break; + } + default: { +#ifdef SQLITE_ENABLE_API_ARMOR + if( iType-2<0 || iType-2>=ArraySize(staticMutexes) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + p = &staticMutexes[iType-2]; + break; + } + } +#if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) + assert( p==0 || p->id==iType ); +#endif + return p; +} + + +/* +** This routine deallocates a previously +** allocated mutex. SQLite is careful to deallocate every +** mutex that it allocates. +*/ +static void pthreadMutexFree(sqlite3_mutex *p){ + assert( p->nRef==0 ); +#if SQLITE_ENABLE_API_ARMOR + if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ) +#endif + { + pthread_mutex_destroy(&p->mutex); + sqlite3_free(p); + } +#ifdef SQLITE_ENABLE_API_ARMOR + else{ + (void)SQLITE_MISUSE_BKPT; + } +#endif +} + +/* +** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK +** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can +** be entered multiple times by the same thread. In such cases the, +** mutex must be exited an equal number of times before another thread +** can enter. If the same thread tries to enter any other kind of mutex +** more than once, the behavior is undefined. +*/ +static void pthreadMutexEnter(sqlite3_mutex *p){ + assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) ); + +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + /* If recursive mutexes are not available, then we have to grow + ** our own. This implementation assumes that pthread_equal() + ** is atomic - that it cannot be deceived into thinking self + ** and p->owner are equal if p->owner changes between two values + ** that are not equal to self while the comparison is taking place. + ** This implementation also assumes a coherent cache - that + ** separate processes cannot read different values from the same + ** address at the same time. If either of these two conditions + ** are not met, then the mutexes will fail and problems will result. + */ + { + pthread_t self = pthread_self(); + if( p->nRef>0 && pthread_equal(p->owner, self) ){ + p->nRef++; + }else{ + pthread_mutex_lock(&p->mutex); + assert( p->nRef==0 ); + p->owner = self; + p->nRef = 1; + } + } +#else + /* Use the built-in recursive mutexes if they are available. + */ + pthread_mutex_lock(&p->mutex); +#if SQLITE_MUTEX_NREF + assert( p->nRef>0 || p->owner==0 ); + p->owner = pthread_self(); + p->nRef++; +#endif +#endif + +#ifdef SQLITE_DEBUG + if( p->trace ){ + printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); + } +#endif +} +static int pthreadMutexTry(sqlite3_mutex *p){ + int rc; + assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) ); + +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + /* If recursive mutexes are not available, then we have to grow + ** our own. This implementation assumes that pthread_equal() + ** is atomic - that it cannot be deceived into thinking self + ** and p->owner are equal if p->owner changes between two values + ** that are not equal to self while the comparison is taking place. + ** This implementation also assumes a coherent cache - that + ** separate processes cannot read different values from the same + ** address at the same time. If either of these two conditions + ** are not met, then the mutexes will fail and problems will result. + */ + { + pthread_t self = pthread_self(); + if( p->nRef>0 && pthread_equal(p->owner, self) ){ + p->nRef++; + rc = SQLITE_OK; + }else if( pthread_mutex_trylock(&p->mutex)==0 ){ + assert( p->nRef==0 ); + p->owner = self; + p->nRef = 1; + rc = SQLITE_OK; + }else{ + rc = SQLITE_BUSY; + } + } +#else + /* Use the built-in recursive mutexes if they are available. + */ + if( pthread_mutex_trylock(&p->mutex)==0 ){ +#if SQLITE_MUTEX_NREF + p->owner = pthread_self(); + p->nRef++; +#endif + rc = SQLITE_OK; + }else{ + rc = SQLITE_BUSY; + } +#endif + +#ifdef SQLITE_DEBUG + if( rc==SQLITE_OK && p->trace ){ + printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); + } +#endif + return rc; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered or +** is not currently allocated. SQLite will never do either. +*/ +static void pthreadMutexLeave(sqlite3_mutex *p){ + assert( pthreadMutexHeld(p) ); +#if SQLITE_MUTEX_NREF + p->nRef--; + if( p->nRef==0 ) p->owner = 0; +#endif + assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); + +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + if( p->nRef==0 ){ + pthread_mutex_unlock(&p->mutex); + } +#else + pthread_mutex_unlock(&p->mutex); +#endif + +#ifdef SQLITE_DEBUG + if( p->trace ){ + printf("leave mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); + } +#endif +} + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ + static const sqlite3_mutex_methods sMutex = { + pthreadMutexInit, + pthreadMutexEnd, + pthreadMutexAlloc, + pthreadMutexFree, + pthreadMutexEnter, + pthreadMutexTry, + pthreadMutexLeave, +#ifdef SQLITE_DEBUG + pthreadMutexHeld, + pthreadMutexNotheld +#else + 0, + 0 +#endif + }; + + return &sMutex; +} + +#endif /* SQLITE_MUTEX_PTHREADS */ + +/************** End of mutex_unix.c ******************************************/ +/************** Begin file mutex_w32.c ***************************************/ +/* +** 2007 August 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes for Win32. +*/ +/* #include "sqliteInt.h" */ + +#if SQLITE_OS_WIN +/* +** Include code that is common to all os_*.c files +*/ +/* #include "os_common.h" */ + +/* +** Include the header file for the Windows VFS. +*/ +/************** Include os_win.h in the middle of mutex_w32.c ****************/ +/************** Begin file os_win.h ******************************************/ +/* +** 2013 November 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to Windows. +*/ +#ifndef SQLITE_OS_WIN_H +#define SQLITE_OS_WIN_H + +/* +** Include the primary Windows SDK header file. +*/ +#include "windows.h" + +#ifdef __CYGWIN__ +# include +# include /* amalgamator: dontcache */ +#endif + +/* +** Determine if we are dealing with Windows NT. +** +** We ought to be able to determine if we are compiling for Windows 9x or +** Windows NT using the _WIN32_WINNT macro as follows: +** +** #if defined(_WIN32_WINNT) +** # define SQLITE_OS_WINNT 1 +** #else +** # define SQLITE_OS_WINNT 0 +** #endif +** +** However, Visual Studio 2005 does not set _WIN32_WINNT by default, as +** it ought to, so the above test does not work. We'll just assume that +** everything is Windows NT unless the programmer explicitly says otherwise +** by setting SQLITE_OS_WINNT to 0. +*/ +#if SQLITE_OS_WIN && !defined(SQLITE_OS_WINNT) +# define SQLITE_OS_WINNT 1 +#endif + +/* +** Determine if we are dealing with Windows CE - which has a much reduced +** API. +*/ +#if defined(_WIN32_WCE) +# define SQLITE_OS_WINCE 1 +#else +# define SQLITE_OS_WINCE 0 +#endif + +/* +** Determine if we are dealing with WinRT, which provides only a subset of +** the full Win32 API. +*/ +#if !defined(SQLITE_OS_WINRT) +# define SQLITE_OS_WINRT 0 +#endif + +/* +** For WinCE, some API function parameters do not appear to be declared as +** volatile. +*/ +#if SQLITE_OS_WINCE +# define SQLITE_WIN32_VOLATILE +#else +# define SQLITE_WIN32_VOLATILE volatile +#endif + +/* +** For some Windows sub-platforms, the _beginthreadex() / _endthreadex() +** functions are not available (e.g. those not using MSVC, Cygwin, etc). +*/ +#if SQLITE_OS_WIN && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ + SQLITE_THREADSAFE>0 && !defined(__CYGWIN__) +# define SQLITE_OS_WIN_THREADS 1 +#else +# define SQLITE_OS_WIN_THREADS 0 +#endif + +#endif /* SQLITE_OS_WIN_H */ + +/************** End of os_win.h **********************************************/ +/************** Continuing where we left off in mutex_w32.c ******************/ +#endif + +/* +** The code in this file is only used if we are compiling multithreaded +** on a Win32 system. +*/ +#ifdef SQLITE_MUTEX_W32 + +/* +** Each recursive mutex is an instance of the following structure. +*/ +struct sqlite3_mutex { + CRITICAL_SECTION mutex; /* Mutex controlling the lock */ + int id; /* Mutex type */ +#ifdef SQLITE_DEBUG + volatile int nRef; /* Number of enterances */ + volatile DWORD owner; /* Thread holding this mutex */ + volatile LONG trace; /* True to trace changes */ +#endif +}; + +/* +** These are the initializer values used when declaring a "static" mutex +** on Win32. It should be noted that all mutexes require initialization +** on the Win32 platform. +*/ +#define SQLITE_W32_MUTEX_INITIALIZER { 0 } + +#ifdef SQLITE_DEBUG +#define SQLITE3_MUTEX_INITIALIZER(id) { SQLITE_W32_MUTEX_INITIALIZER, id, \ + 0L, (DWORD)0, 0 } +#else +#define SQLITE3_MUTEX_INITIALIZER(id) { SQLITE_W32_MUTEX_INITIALIZER, id } +#endif + +#ifdef SQLITE_DEBUG +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use only inside assert() statements. +*/ +static int winMutexHeld(sqlite3_mutex *p){ + return p->nRef!=0 && p->owner==GetCurrentThreadId(); +} + +static int winMutexNotheld2(sqlite3_mutex *p, DWORD tid){ + return p->nRef==0 || p->owner!=tid; +} + +static int winMutexNotheld(sqlite3_mutex *p){ + DWORD tid = GetCurrentThreadId(); + return winMutexNotheld2(p, tid); +} +#endif + +/* +** Try to provide a memory barrier operation, needed for initialization +** and also for the xShmBarrier method of the VFS in cases when SQLite is +** compiled without mutexes (SQLITE_THREADSAFE=0). +*/ +SQLITE_PRIVATE void sqlite3MemoryBarrier(void){ +#if defined(SQLITE_MEMORY_BARRIER) + SQLITE_MEMORY_BARRIER; +#elif defined(__GNUC__) + __sync_synchronize(); +#elif MSVC_VERSION>=1300 + _ReadWriteBarrier(); +#elif defined(MemoryBarrier) + MemoryBarrier(); +#endif +} + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static sqlite3_mutex winMutex_staticMutexes[] = { + SQLITE3_MUTEX_INITIALIZER(2), + SQLITE3_MUTEX_INITIALIZER(3), + SQLITE3_MUTEX_INITIALIZER(4), + SQLITE3_MUTEX_INITIALIZER(5), + SQLITE3_MUTEX_INITIALIZER(6), + SQLITE3_MUTEX_INITIALIZER(7), + SQLITE3_MUTEX_INITIALIZER(8), + SQLITE3_MUTEX_INITIALIZER(9), + SQLITE3_MUTEX_INITIALIZER(10), + SQLITE3_MUTEX_INITIALIZER(11), + SQLITE3_MUTEX_INITIALIZER(12), + SQLITE3_MUTEX_INITIALIZER(13) +}; + +static int winMutex_isInit = 0; +static int winMutex_isNt = -1; /* <0 means "need to query" */ + +/* As the winMutexInit() and winMutexEnd() functions are called as part +** of the sqlite3_initialize() and sqlite3_shutdown() processing, the +** "interlocked" magic used here is probably not strictly necessary. +*/ +static LONG SQLITE_WIN32_VOLATILE winMutex_lock = 0; + +SQLITE_API int sqlite3_win32_is_nt(void); /* os_win.c */ +SQLITE_API void sqlite3_win32_sleep(DWORD milliseconds); /* os_win.c */ + +static int winMutexInit(void){ + /* The first to increment to 1 does actual initialization */ + if( InterlockedCompareExchange(&winMutex_lock, 1, 0)==0 ){ + int i; + for(i=0; i +**
  • SQLITE_MUTEX_FAST +**
  • SQLITE_MUTEX_RECURSIVE +**
  • SQLITE_MUTEX_STATIC_MAIN +**
  • SQLITE_MUTEX_STATIC_MEM +**
  • SQLITE_MUTEX_STATIC_OPEN +**
  • SQLITE_MUTEX_STATIC_PRNG +**
  • SQLITE_MUTEX_STATIC_LRU +**
  • SQLITE_MUTEX_STATIC_PMEM +**
  • SQLITE_MUTEX_STATIC_APP1 +**
  • SQLITE_MUTEX_STATIC_APP2 +**
  • SQLITE_MUTEX_STATIC_APP3 +**
  • SQLITE_MUTEX_STATIC_VFS1 +**
  • SQLITE_MUTEX_STATIC_VFS2 +**
  • SQLITE_MUTEX_STATIC_VFS3 +** +** +** The first two constants cause sqlite3_mutex_alloc() to create +** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE +** is used but not necessarily so when SQLITE_MUTEX_FAST is used. +** The mutex implementation does not need to make a distinction +** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does +** not want to. But SQLite will only request a recursive mutex in +** cases where it really needs one. If a faster non-recursive mutex +** implementation is available on the host platform, the mutex subsystem +** might return such a mutex in response to SQLITE_MUTEX_FAST. +** +** The other allowed parameters to sqlite3_mutex_alloc() each return +** a pointer to a static preexisting mutex. Six static mutexes are +** used by the current version of SQLite. Future versions of SQLite +** may add additional static mutexes. Static mutexes are for internal +** use by SQLite only. Applications that use SQLite mutexes should +** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or +** SQLITE_MUTEX_RECURSIVE. +** +** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST +** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() +** returns a different mutex on every call. But for the static +** mutex types, the same mutex is returned on every call that has +** the same type number. +*/ +static sqlite3_mutex *winMutexAlloc(int iType){ + sqlite3_mutex *p; + + switch( iType ){ + case SQLITE_MUTEX_FAST: + case SQLITE_MUTEX_RECURSIVE: { + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ + p->id = iType; +#ifdef SQLITE_DEBUG +#ifdef SQLITE_WIN32_MUTEX_TRACE_DYNAMIC + p->trace = 1; +#endif +#endif +#if SQLITE_OS_WINRT + InitializeCriticalSectionEx(&p->mutex, 0, 0); +#else + InitializeCriticalSection(&p->mutex); +#endif + } + break; + } + default: { +#ifdef SQLITE_ENABLE_API_ARMOR + if( iType-2<0 || iType-2>=ArraySize(winMutex_staticMutexes) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + p = &winMutex_staticMutexes[iType-2]; +#ifdef SQLITE_DEBUG +#ifdef SQLITE_WIN32_MUTEX_TRACE_STATIC + InterlockedCompareExchange(&p->trace, 1, 0); +#endif +#endif + break; + } + } + assert( p==0 || p->id==iType ); + return p; +} + + +/* +** This routine deallocates a previously +** allocated mutex. SQLite is careful to deallocate every +** mutex that it allocates. +*/ +static void winMutexFree(sqlite3_mutex *p){ + assert( p ); + assert( p->nRef==0 && p->owner==0 ); + if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ){ + DeleteCriticalSection(&p->mutex); + sqlite3_free(p); + }else{ +#ifdef SQLITE_ENABLE_API_ARMOR + (void)SQLITE_MISUSE_BKPT; +#endif + } +} + +/* +** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK +** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can +** be entered multiple times by the same thread. In such cases the, +** mutex must be exited an equal number of times before another thread +** can enter. If the same thread tries to enter any other kind of mutex +** more than once, the behavior is undefined. +*/ +static void winMutexEnter(sqlite3_mutex *p){ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + DWORD tid = GetCurrentThreadId(); +#endif +#ifdef SQLITE_DEBUG + assert( p ); + assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); +#else + assert( p ); +#endif + assert( winMutex_isInit==1 ); + EnterCriticalSection(&p->mutex); +#ifdef SQLITE_DEBUG + assert( p->nRef>0 || p->owner==0 ); + p->owner = tid; + p->nRef++; + if( p->trace ){ + OSTRACE(("ENTER-MUTEX tid=%lu, mutex(%d)=%p (%d), nRef=%d\n", + tid, p->id, p, p->trace, p->nRef)); + } +#endif +} + +static int winMutexTry(sqlite3_mutex *p){ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + DWORD tid = GetCurrentThreadId(); +#endif + int rc = SQLITE_BUSY; + assert( p ); + assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); + /* + ** The sqlite3_mutex_try() routine is very rarely used, and when it + ** is used it is merely an optimization. So it is OK for it to always + ** fail. + ** + ** The TryEnterCriticalSection() interface is only available on WinNT. + ** And some windows compilers complain if you try to use it without + ** first doing some #defines that prevent SQLite from building on Win98. + ** For that reason, we will omit this optimization for now. See + ** ticket #2685. + */ +#if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0400 + assert( winMutex_isInit==1 ); + assert( winMutex_isNt>=-1 && winMutex_isNt<=1 ); + if( winMutex_isNt<0 ){ + winMutex_isNt = sqlite3_win32_is_nt(); + } + assert( winMutex_isNt==0 || winMutex_isNt==1 ); + if( winMutex_isNt && TryEnterCriticalSection(&p->mutex) ){ +#ifdef SQLITE_DEBUG + p->owner = tid; + p->nRef++; +#endif + rc = SQLITE_OK; + } +#else + UNUSED_PARAMETER(p); +#endif +#ifdef SQLITE_DEBUG + if( p->trace ){ + OSTRACE(("TRY-MUTEX tid=%lu, mutex(%d)=%p (%d), owner=%lu, nRef=%d, rc=%s\n", + tid, p->id, p, p->trace, p->owner, p->nRef, sqlite3ErrName(rc))); + } +#endif + return rc; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered or +** is not currently allocated. SQLite will never do either. +*/ +static void winMutexLeave(sqlite3_mutex *p){ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + DWORD tid = GetCurrentThreadId(); +#endif + assert( p ); +#ifdef SQLITE_DEBUG + assert( p->nRef>0 ); + assert( p->owner==tid ); + p->nRef--; + if( p->nRef==0 ) p->owner = 0; + assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); +#endif + assert( winMutex_isInit==1 ); + LeaveCriticalSection(&p->mutex); +#ifdef SQLITE_DEBUG + if( p->trace ){ + OSTRACE(("LEAVE-MUTEX tid=%lu, mutex(%d)=%p (%d), nRef=%d\n", + tid, p->id, p, p->trace, p->nRef)); + } +#endif +} + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ + static const sqlite3_mutex_methods sMutex = { + winMutexInit, + winMutexEnd, + winMutexAlloc, + winMutexFree, + winMutexEnter, + winMutexTry, + winMutexLeave, +#ifdef SQLITE_DEBUG + winMutexHeld, + winMutexNotheld +#else + 0, + 0 +#endif + }; + return &sMutex; +} + +#endif /* SQLITE_MUTEX_W32 */ + +/************** End of mutex_w32.c *******************************************/ +/************** Begin file malloc.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** Memory allocation functions used throughout sqlite. +*/ +/* #include "sqliteInt.h" */ +/* #include */ + +/* +** Attempt to release up to n bytes of non-essential memory currently +** held by SQLite. An example of non-essential memory is memory used to +** cache database pages that are not currently in use. +*/ +SQLITE_API int sqlite3_release_memory(int n){ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + return sqlite3PcacheReleaseMemory(n); +#else + /* IMPLEMENTATION-OF: R-34391-24921 The sqlite3_release_memory() routine + ** is a no-op returning zero if SQLite is not compiled with + ** SQLITE_ENABLE_MEMORY_MANAGEMENT. */ + UNUSED_PARAMETER(n); + return 0; +#endif +} + +/* +** Default value of the hard heap limit. 0 means "no limit". +*/ +#ifndef SQLITE_MAX_MEMORY +# define SQLITE_MAX_MEMORY 0 +#endif + +/* +** State information local to the memory allocation subsystem. +*/ +static SQLITE_WSD struct Mem0Global { + sqlite3_mutex *mutex; /* Mutex to serialize access */ + sqlite3_int64 alarmThreshold; /* The soft heap limit */ + sqlite3_int64 hardLimit; /* The hard upper bound on memory */ + + /* + ** True if heap is nearly "full" where "full" is defined by the + ** sqlite3_soft_heap_limit() setting. + */ + int nearlyFull; +} mem0 = { 0, SQLITE_MAX_MEMORY, SQLITE_MAX_MEMORY, 0 }; + +#define mem0 GLOBAL(struct Mem0Global, mem0) + +/* +** Return the memory allocator mutex. sqlite3_status() needs it. +*/ +SQLITE_PRIVATE sqlite3_mutex *sqlite3MallocMutex(void){ + return mem0.mutex; +} + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** Deprecated external interface. It used to set an alarm callback +** that was invoked when memory usage grew too large. Now it is a +** no-op. +*/ +SQLITE_API int sqlite3_memory_alarm( + void(*xCallback)(void *pArg, sqlite3_int64 used,int N), + void *pArg, + sqlite3_int64 iThreshold +){ + (void)xCallback; + (void)pArg; + (void)iThreshold; + return SQLITE_OK; +} +#endif + +/* +** Set the soft heap-size limit for the library. An argument of +** zero disables the limit. A negative argument is a no-op used to +** obtain the return value. +** +** The return value is the value of the heap limit just before this +** interface was called. +** +** If the hard heap limit is enabled, then the soft heap limit cannot +** be disabled nor raised above the hard heap limit. +*/ +SQLITE_API sqlite3_int64 sqlite3_soft_heap_limit64(sqlite3_int64 n){ + sqlite3_int64 priorLimit; + sqlite3_int64 excess; + sqlite3_int64 nUsed; +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3_initialize(); + if( rc ) return -1; +#endif + sqlite3_mutex_enter(mem0.mutex); + priorLimit = mem0.alarmThreshold; + if( n<0 ){ + sqlite3_mutex_leave(mem0.mutex); + return priorLimit; + } + if( mem0.hardLimit>0 && (n>mem0.hardLimit || n==0) ){ + n = mem0.hardLimit; + } + mem0.alarmThreshold = n; + nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); + AtomicStore(&mem0.nearlyFull, n>0 && n<=nUsed); + sqlite3_mutex_leave(mem0.mutex); + excess = sqlite3_memory_used() - n; + if( excess>0 ) sqlite3_release_memory((int)(excess & 0x7fffffff)); + return priorLimit; +} +SQLITE_API void sqlite3_soft_heap_limit(int n){ + if( n<0 ) n = 0; + sqlite3_soft_heap_limit64(n); +} + +/* +** Set the hard heap-size limit for the library. An argument of zero +** disables the hard heap limit. A negative argument is a no-op used +** to obtain the return value without affecting the hard heap limit. +** +** The return value is the value of the hard heap limit just prior to +** calling this interface. +** +** Setting the hard heap limit will also activate the soft heap limit +** and constrain the soft heap limit to be no more than the hard heap +** limit. +*/ +SQLITE_API sqlite3_int64 sqlite3_hard_heap_limit64(sqlite3_int64 n){ + sqlite3_int64 priorLimit; +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3_initialize(); + if( rc ) return -1; +#endif + sqlite3_mutex_enter(mem0.mutex); + priorLimit = mem0.hardLimit; + if( n>=0 ){ + mem0.hardLimit = n; + if( n0 ); + + /* In Firefox (circa 2017-02-08), xRoundup() is remapped to an internal + ** implementation of malloc_good_size(), which must be called in debug + ** mode and specifically when the DMD "Dark Matter Detector" is enabled + ** or else a crash results. Hence, do not attempt to optimize out the + ** following xRoundup() call. */ + nFull = sqlite3GlobalConfig.m.xRoundup(n); + + sqlite3StatusHighwater(SQLITE_STATUS_MALLOC_SIZE, n); + if( mem0.alarmThreshold>0 ){ + sqlite3_int64 nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); + if( nUsed >= mem0.alarmThreshold - nFull ){ + AtomicStore(&mem0.nearlyFull, 1); + sqlite3MallocAlarm(nFull); + if( mem0.hardLimit ){ + nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); + if( nUsed >= mem0.hardLimit - nFull ){ + *pp = 0; + return; + } + } + }else{ + AtomicStore(&mem0.nearlyFull, 0); + } + } + p = sqlite3GlobalConfig.m.xMalloc(nFull); +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + if( p==0 && mem0.alarmThreshold>0 ){ + sqlite3MallocAlarm(nFull); + p = sqlite3GlobalConfig.m.xMalloc(nFull); + } +#endif + if( p ){ + nFull = sqlite3MallocSize(p); + sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nFull); + sqlite3StatusUp(SQLITE_STATUS_MALLOC_COUNT, 1); + } + *pp = p; +} + +/* +** Allocate memory. This routine is like sqlite3_malloc() except that it +** assumes the memory subsystem has already been initialized. +*/ +SQLITE_PRIVATE void *sqlite3Malloc(u64 n){ + void *p; + if( n==0 || n>=0x7fffff00 ){ + /* A memory allocation of a number of bytes which is near the maximum + ** signed integer value might cause an integer overflow inside of the + ** xMalloc(). Hence we limit the maximum size to 0x7fffff00, giving + ** 255 bytes of overhead. SQLite itself will never use anything near + ** this amount. The only way to reach the limit is with sqlite3_malloc() */ + p = 0; + }else if( sqlite3GlobalConfig.bMemstat ){ + sqlite3_mutex_enter(mem0.mutex); + mallocWithAlarm((int)n, &p); + sqlite3_mutex_leave(mem0.mutex); + }else{ + p = sqlite3GlobalConfig.m.xMalloc((int)n); + } + assert( EIGHT_BYTE_ALIGNMENT(p) ); /* IMP: R-11148-40995 */ + return p; +} + +/* +** This version of the memory allocation is for use by the application. +** First make sure the memory subsystem is initialized, then do the +** allocation. +*/ +SQLITE_API void *sqlite3_malloc(int n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return n<=0 ? 0 : sqlite3Malloc(n); +} +SQLITE_API void *sqlite3_malloc64(sqlite3_uint64 n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return sqlite3Malloc(n); +} + +/* +** TRUE if p is a lookaside memory allocation from db +*/ +#ifndef SQLITE_OMIT_LOOKASIDE +static int isLookaside(sqlite3 *db, const void *p){ + return SQLITE_WITHIN(p, db->lookaside.pStart, db->lookaside.pEnd); +} +#else +#define isLookaside(A,B) 0 +#endif + +/* +** Return the size of a memory allocation previously obtained from +** sqlite3Malloc() or sqlite3_malloc(). +*/ +SQLITE_PRIVATE int sqlite3MallocSize(const void *p){ + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); + return sqlite3GlobalConfig.m.xSize((void*)p); +} +static int lookasideMallocSize(sqlite3 *db, const void *p){ +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + return plookaside.pMiddle ? db->lookaside.szTrue : LOOKASIDE_SMALL; +#else + return db->lookaside.szTrue; +#endif +} +SQLITE_PRIVATE int sqlite3DbMallocSize(sqlite3 *db, const void *p){ + assert( p!=0 ); +#ifdef SQLITE_DEBUG + if( db==0 || !isLookaside(db,p) ){ + if( db==0 ){ + assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); + }else{ + assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + } + } +#endif + if( db ){ + if( ((uptr)p)<(uptr)(db->lookaside.pEnd) ){ +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + if( ((uptr)p)>=(uptr)(db->lookaside.pMiddle) ){ + assert( sqlite3_mutex_held(db->mutex) ); + return LOOKASIDE_SMALL; + } +#endif + if( ((uptr)p)>=(uptr)(db->lookaside.pStart) ){ + assert( sqlite3_mutex_held(db->mutex) ); + return db->lookaside.szTrue; + } + } + } + return sqlite3GlobalConfig.m.xSize((void*)p); +} +SQLITE_API sqlite3_uint64 sqlite3_msize(void *p){ + assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); + return p ? sqlite3GlobalConfig.m.xSize(p) : 0; +} + +/* +** Free memory previously obtained from sqlite3Malloc(). +*/ +SQLITE_API void sqlite3_free(void *p){ + if( p==0 ) return; /* IMP: R-49053-54554 */ + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); + assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); + if( sqlite3GlobalConfig.bMemstat ){ + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusDown(SQLITE_STATUS_MEMORY_USED, sqlite3MallocSize(p)); + sqlite3StatusDown(SQLITE_STATUS_MALLOC_COUNT, 1); + sqlite3GlobalConfig.m.xFree(p); + sqlite3_mutex_leave(mem0.mutex); + }else{ + sqlite3GlobalConfig.m.xFree(p); + } +} + +/* +** Add the size of memory allocation "p" to the count in +** *db->pnBytesFreed. +*/ +static SQLITE_NOINLINE void measureAllocationSize(sqlite3 *db, void *p){ + *db->pnBytesFreed += sqlite3DbMallocSize(db,p); +} + +/* +** Free memory that might be associated with a particular database +** connection. Calling sqlite3DbFree(D,X) for X==0 is a harmless no-op. +** The sqlite3DbFreeNN(D,X) version requires that X be non-NULL. +*/ +SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3 *db, void *p){ + assert( db==0 || sqlite3_mutex_held(db->mutex) ); + assert( p!=0 ); + if( db ){ + if( db->pnBytesFreed ){ + measureAllocationSize(db, p); + return; + } + if( ((uptr)p)<(uptr)(db->lookaside.pEnd) ){ +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + if( ((uptr)p)>=(uptr)(db->lookaside.pMiddle) ){ + LookasideSlot *pBuf = (LookasideSlot*)p; +#ifdef SQLITE_DEBUG + memset(p, 0xaa, LOOKASIDE_SMALL); /* Trash freed content */ +#endif + pBuf->pNext = db->lookaside.pSmallFree; + db->lookaside.pSmallFree = pBuf; + return; + } +#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */ + if( ((uptr)p)>=(uptr)(db->lookaside.pStart) ){ + LookasideSlot *pBuf = (LookasideSlot*)p; +#ifdef SQLITE_DEBUG + memset(p, 0xaa, db->lookaside.szTrue); /* Trash freed content */ +#endif + pBuf->pNext = db->lookaside.pFree; + db->lookaside.pFree = pBuf; + return; + } + } + } + assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( db!=0 || sqlite3MemdebugNoType(p, MEMTYPE_LOOKASIDE) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + sqlite3_free(p); +} +SQLITE_PRIVATE void sqlite3DbFree(sqlite3 *db, void *p){ + assert( db==0 || sqlite3_mutex_held(db->mutex) ); + if( p ) sqlite3DbFreeNN(db, p); +} + +/* +** Change the size of an existing memory allocation +*/ +SQLITE_PRIVATE void *sqlite3Realloc(void *pOld, u64 nBytes){ + int nOld, nNew, nDiff; + void *pNew; + assert( sqlite3MemdebugHasType(pOld, MEMTYPE_HEAP) ); + assert( sqlite3MemdebugNoType(pOld, (u8)~MEMTYPE_HEAP) ); + if( pOld==0 ){ + return sqlite3Malloc(nBytes); /* IMP: R-04300-56712 */ + } + if( nBytes==0 ){ + sqlite3_free(pOld); /* IMP: R-26507-47431 */ + return 0; + } + if( nBytes>=0x7fffff00 ){ + /* The 0x7ffff00 limit term is explained in comments on sqlite3Malloc() */ + return 0; + } + nOld = sqlite3MallocSize(pOld); + /* IMPLEMENTATION-OF: R-46199-30249 SQLite guarantees that the second + ** argument to xRealloc is always a value returned by a prior call to + ** xRoundup. */ + nNew = sqlite3GlobalConfig.m.xRoundup((int)nBytes); + if( nOld==nNew ){ + pNew = pOld; + }else if( sqlite3GlobalConfig.bMemstat ){ + sqlite3_int64 nUsed; + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusHighwater(SQLITE_STATUS_MALLOC_SIZE, (int)nBytes); + nDiff = nNew - nOld; + if( nDiff>0 && (nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED)) >= + mem0.alarmThreshold-nDiff ){ + sqlite3MallocAlarm(nDiff); + if( mem0.hardLimit>0 && nUsed >= mem0.hardLimit - nDiff ){ + sqlite3_mutex_leave(mem0.mutex); + return 0; + } + } + pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + if( pNew==0 && mem0.alarmThreshold>0 ){ + sqlite3MallocAlarm((int)nBytes); + pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); + } +#endif + if( pNew ){ + nNew = sqlite3MallocSize(pNew); + sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nNew-nOld); + } + sqlite3_mutex_leave(mem0.mutex); + }else{ + pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); + } + assert( EIGHT_BYTE_ALIGNMENT(pNew) ); /* IMP: R-11148-40995 */ + return pNew; +} + +/* +** The public interface to sqlite3Realloc. Make sure that the memory +** subsystem is initialized prior to invoking sqliteRealloc. +*/ +SQLITE_API void *sqlite3_realloc(void *pOld, int n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + if( n<0 ) n = 0; /* IMP: R-26507-47431 */ + return sqlite3Realloc(pOld, n); +} +SQLITE_API void *sqlite3_realloc64(void *pOld, sqlite3_uint64 n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return sqlite3Realloc(pOld, n); +} + + +/* +** Allocate and zero memory. +*/ +SQLITE_PRIVATE void *sqlite3MallocZero(u64 n){ + void *p = sqlite3Malloc(n); + if( p ){ + memset(p, 0, (size_t)n); + } + return p; +} + +/* +** Allocate and zero memory. If the allocation fails, make +** the mallocFailed flag in the connection pointer. +*/ +SQLITE_PRIVATE void *sqlite3DbMallocZero(sqlite3 *db, u64 n){ + void *p; + testcase( db==0 ); + p = sqlite3DbMallocRaw(db, n); + if( p ) memset(p, 0, (size_t)n); + return p; +} + + +/* Finish the work of sqlite3DbMallocRawNN for the unusual and +** slower case when the allocation cannot be fulfilled using lookaside. +*/ +static SQLITE_NOINLINE void *dbMallocRawFinish(sqlite3 *db, u64 n){ + void *p; + assert( db!=0 ); + p = sqlite3Malloc(n); + if( !p ) sqlite3OomFault(db); + sqlite3MemdebugSetType(p, + (db->lookaside.bDisable==0) ? MEMTYPE_LOOKASIDE : MEMTYPE_HEAP); + return p; +} + +/* +** Allocate memory, either lookaside (if possible) or heap. +** If the allocation fails, set the mallocFailed flag in +** the connection pointer. +** +** If db!=0 and db->mallocFailed is true (indicating a prior malloc +** failure on the same database connection) then always return 0. +** Hence for a particular database connection, once malloc starts +** failing, it fails consistently until mallocFailed is reset. +** This is an important assumption. There are many places in the +** code that do things like this: +** +** int *a = (int*)sqlite3DbMallocRaw(db, 100); +** int *b = (int*)sqlite3DbMallocRaw(db, 200); +** if( b ) a[10] = 9; +** +** In other words, if a subsequent malloc (ex: "b") worked, it is assumed +** that all prior mallocs (ex: "a") worked too. +** +** The sqlite3MallocRawNN() variant guarantees that the "db" parameter is +** not a NULL pointer. +*/ +SQLITE_PRIVATE void *sqlite3DbMallocRaw(sqlite3 *db, u64 n){ + void *p; + if( db ) return sqlite3DbMallocRawNN(db, n); + p = sqlite3Malloc(n); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + return p; +} +SQLITE_PRIVATE void *sqlite3DbMallocRawNN(sqlite3 *db, u64 n){ +#ifndef SQLITE_OMIT_LOOKASIDE + LookasideSlot *pBuf; + assert( db!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); + assert( db->pnBytesFreed==0 ); + if( n>db->lookaside.sz ){ + if( !db->lookaside.bDisable ){ + db->lookaside.anStat[1]++; + }else if( db->mallocFailed ){ + return 0; + } + return dbMallocRawFinish(db, n); + } +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + if( n<=LOOKASIDE_SMALL ){ + if( (pBuf = db->lookaside.pSmallFree)!=0 ){ + db->lookaside.pSmallFree = pBuf->pNext; + db->lookaside.anStat[0]++; + return (void*)pBuf; + }else if( (pBuf = db->lookaside.pSmallInit)!=0 ){ + db->lookaside.pSmallInit = pBuf->pNext; + db->lookaside.anStat[0]++; + return (void*)pBuf; + } + } +#endif + if( (pBuf = db->lookaside.pFree)!=0 ){ + db->lookaside.pFree = pBuf->pNext; + db->lookaside.anStat[0]++; + return (void*)pBuf; + }else if( (pBuf = db->lookaside.pInit)!=0 ){ + db->lookaside.pInit = pBuf->pNext; + db->lookaside.anStat[0]++; + return (void*)pBuf; + }else{ + db->lookaside.anStat[2]++; + } +#else + assert( db!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); + assert( db->pnBytesFreed==0 ); + if( db->mallocFailed ){ + return 0; + } +#endif + return dbMallocRawFinish(db, n); +} + +/* Forward declaration */ +static SQLITE_NOINLINE void *dbReallocFinish(sqlite3 *db, void *p, u64 n); + +/* +** Resize the block of memory pointed to by p to n bytes. If the +** resize fails, set the mallocFailed flag in the connection object. +*/ +SQLITE_PRIVATE void *sqlite3DbRealloc(sqlite3 *db, void *p, u64 n){ + assert( db!=0 ); + if( p==0 ) return sqlite3DbMallocRawNN(db, n); + assert( sqlite3_mutex_held(db->mutex) ); + if( ((uptr)p)<(uptr)db->lookaside.pEnd ){ +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + if( ((uptr)p)>=(uptr)db->lookaside.pMiddle ){ + if( n<=LOOKASIDE_SMALL ) return p; + }else +#endif + if( ((uptr)p)>=(uptr)db->lookaside.pStart ){ + if( n<=db->lookaside.szTrue ) return p; + } + } + return dbReallocFinish(db, p, n); +} +static SQLITE_NOINLINE void *dbReallocFinish(sqlite3 *db, void *p, u64 n){ + void *pNew = 0; + assert( db!=0 ); + assert( p!=0 ); + if( db->mallocFailed==0 ){ + if( isLookaside(db, p) ){ + pNew = sqlite3DbMallocRawNN(db, n); + if( pNew ){ + memcpy(pNew, p, lookasideMallocSize(db, p)); + sqlite3DbFree(db, p); + } + }else{ + assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + pNew = sqlite3Realloc(p, n); + if( !pNew ){ + sqlite3OomFault(db); + } + sqlite3MemdebugSetType(pNew, + (db->lookaside.bDisable==0 ? MEMTYPE_LOOKASIDE : MEMTYPE_HEAP)); + } + } + return pNew; +} + +/* +** Attempt to reallocate p. If the reallocation fails, then free p +** and set the mallocFailed flag in the database connection. +*/ +SQLITE_PRIVATE void *sqlite3DbReallocOrFree(sqlite3 *db, void *p, u64 n){ + void *pNew; + pNew = sqlite3DbRealloc(db, p, n); + if( !pNew ){ + sqlite3DbFree(db, p); + } + return pNew; +} + +/* +** Make a copy of a string in memory obtained from sqliteMalloc(). These +** functions call sqlite3MallocRaw() directly instead of sqliteMalloc(). This +** is because when memory debugging is turned on, these two functions are +** called via macros that record the current file and line number in the +** ThreadData structure. +*/ +SQLITE_PRIVATE char *sqlite3DbStrDup(sqlite3 *db, const char *z){ + char *zNew; + size_t n; + if( z==0 ){ + return 0; + } + n = strlen(z) + 1; + zNew = sqlite3DbMallocRaw(db, n); + if( zNew ){ + memcpy(zNew, z, n); + } + return zNew; +} +SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3 *db, const char *z, u64 n){ + char *zNew; + assert( db!=0 ); + assert( z!=0 || n==0 ); + assert( (n&0x7fffffff)==n ); + zNew = z ? sqlite3DbMallocRawNN(db, n+1) : 0; + if( zNew ){ + memcpy(zNew, z, (size_t)n); + zNew[n] = 0; + } + return zNew; +} + +/* +** The text between zStart and zEnd represents a phrase within a larger +** SQL statement. Make a copy of this phrase in space obtained form +** sqlite3DbMalloc(). Omit leading and trailing whitespace. +*/ +SQLITE_PRIVATE char *sqlite3DbSpanDup(sqlite3 *db, const char *zStart, const char *zEnd){ + int n; + while( sqlite3Isspace(zStart[0]) ) zStart++; + n = (int)(zEnd - zStart); + while( ALWAYS(n>0) && sqlite3Isspace(zStart[n-1]) ) n--; + return sqlite3DbStrNDup(db, zStart, n); +} + +/* +** Free any prior content in *pz and replace it with a copy of zNew. +*/ +SQLITE_PRIVATE void sqlite3SetString(char **pz, sqlite3 *db, const char *zNew){ + char *z = sqlite3DbStrDup(db, zNew); + sqlite3DbFree(db, *pz); + *pz = z; +} + +/* +** Call this routine to record the fact that an OOM (out-of-memory) error +** has happened. This routine will set db->mallocFailed, and also +** temporarily disable the lookaside memory allocator and interrupt +** any running VDBEs. +** +** Always return a NULL pointer so that this routine can be invoked using +** +** return sqlite3OomFault(db); +** +** and thereby avoid unnecessary stack frame allocations for the overwhelmingly +** common case where no OOM occurs. +*/ +SQLITE_PRIVATE void *sqlite3OomFault(sqlite3 *db){ + if( db->mallocFailed==0 && db->bBenignMalloc==0 ){ + db->mallocFailed = 1; + if( db->nVdbeExec>0 ){ + AtomicStore(&db->u1.isInterrupted, 1); + } + DisableLookaside; + if( db->pParse ){ + sqlite3ErrorMsg(db->pParse, "out of memory"); + db->pParse->rc = SQLITE_NOMEM_BKPT; + } + } + return 0; +} + +/* +** This routine reactivates the memory allocator and clears the +** db->mallocFailed flag as necessary. +** +** The memory allocator is not restarted if there are running +** VDBEs. +*/ +SQLITE_PRIVATE void sqlite3OomClear(sqlite3 *db){ + if( db->mallocFailed && db->nVdbeExec==0 ){ + db->mallocFailed = 0; + AtomicStore(&db->u1.isInterrupted, 0); + assert( db->lookaside.bDisable>0 ); + EnableLookaside; + } +} + +/* +** Take actions at the end of an API call to deal with error codes. +*/ +static SQLITE_NOINLINE int apiHandleError(sqlite3 *db, int rc){ + if( db->mallocFailed || rc==SQLITE_IOERR_NOMEM ){ + sqlite3OomClear(db); + sqlite3Error(db, SQLITE_NOMEM); + return SQLITE_NOMEM_BKPT; + } + return rc & db->errMask; +} + +/* +** This function must be called before exiting any API function (i.e. +** returning control to the user) that has called sqlite3_malloc or +** sqlite3_realloc. +** +** The returned value is normally a copy of the second argument to this +** function. However, if a malloc() failure has occurred since the previous +** invocation SQLITE_NOMEM is returned instead. +** +** If an OOM as occurred, then the connection error-code (the value +** returned by sqlite3_errcode()) is set to SQLITE_NOMEM. +*/ +SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){ + /* If the db handle must hold the connection handle mutex here. + ** Otherwise the read (and possible write) of db->mallocFailed + ** is unsafe, as is the call to sqlite3Error(). + */ + assert( db!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); + if( db->mallocFailed || rc ){ + return apiHandleError(db, rc); + } + return rc & db->errMask; +} + +/************** End of malloc.c **********************************************/ +/************** Begin file printf.c ******************************************/ +/* +** The "printf" code that follows dates from the 1980's. It is in +** the public domain. +** +************************************************************************** +** +** This file contains code for a set of "printf"-like routines. These +** routines format strings much like the printf() from the standard C +** library, though the implementation here has enhancements to support +** SQLite. +*/ +/* #include "sqliteInt.h" */ + +/* +** Conversion types fall into various categories as defined by the +** following enumeration. +*/ +#define etRADIX 0 /* non-decimal integer types. %x %o */ +#define etFLOAT 1 /* Floating point. %f */ +#define etEXP 2 /* Exponentional notation. %e and %E */ +#define etGENERIC 3 /* Floating or exponential, depending on exponent. %g */ +#define etSIZE 4 /* Return number of characters processed so far. %n */ +#define etSTRING 5 /* Strings. %s */ +#define etDYNSTRING 6 /* Dynamically allocated strings. %z */ +#define etPERCENT 7 /* Percent symbol. %% */ +#define etCHARX 8 /* Characters. %c */ +/* The rest are extensions, not normally found in printf() */ +#define etSQLESCAPE 9 /* Strings with '\'' doubled. %q */ +#define etSQLESCAPE2 10 /* Strings with '\'' doubled and enclosed in '', + NULL pointers replaced by SQL NULL. %Q */ +#define etTOKEN 11 /* a pointer to a Token structure */ +#define etSRCITEM 12 /* a pointer to a SrcItem */ +#define etPOINTER 13 /* The %p conversion */ +#define etSQLESCAPE3 14 /* %w -> Strings with '\"' doubled */ +#define etORDINAL 15 /* %r -> 1st, 2nd, 3rd, 4th, etc. English only */ +#define etDECIMAL 16 /* %d or %u, but not %x, %o */ + +#define etINVALID 17 /* Any unrecognized conversion type */ + + +/* +** An "etByte" is an 8-bit unsigned value. +*/ +typedef unsigned char etByte; + +/* +** Each builtin conversion character (ex: the 'd' in "%d") is described +** by an instance of the following structure +*/ +typedef struct et_info { /* Information about each format field */ + char fmttype; /* The format field code letter */ + etByte base; /* The base for radix conversion */ + etByte flags; /* One or more of FLAG_ constants below */ + etByte type; /* Conversion paradigm */ + etByte charset; /* Offset into aDigits[] of the digits string */ + etByte prefix; /* Offset into aPrefix[] of the prefix string */ +} et_info; + +/* +** Allowed values for et_info.flags +*/ +#define FLAG_SIGNED 1 /* True if the value to convert is signed */ +#define FLAG_STRING 4 /* Allow infinite precision */ + + +/* +** The following table is searched linearly, so it is good to put the +** most frequently used conversion types first. +*/ +static const char aDigits[] = "0123456789ABCDEF0123456789abcdef"; +static const char aPrefix[] = "-x0\000X0"; +static const et_info fmtinfo[] = { + { 'd', 10, 1, etDECIMAL, 0, 0 }, + { 's', 0, 4, etSTRING, 0, 0 }, + { 'g', 0, 1, etGENERIC, 30, 0 }, + { 'z', 0, 4, etDYNSTRING, 0, 0 }, + { 'q', 0, 4, etSQLESCAPE, 0, 0 }, + { 'Q', 0, 4, etSQLESCAPE2, 0, 0 }, + { 'w', 0, 4, etSQLESCAPE3, 0, 0 }, + { 'c', 0, 0, etCHARX, 0, 0 }, + { 'o', 8, 0, etRADIX, 0, 2 }, + { 'u', 10, 0, etDECIMAL, 0, 0 }, + { 'x', 16, 0, etRADIX, 16, 1 }, + { 'X', 16, 0, etRADIX, 0, 4 }, +#ifndef SQLITE_OMIT_FLOATING_POINT + { 'f', 0, 1, etFLOAT, 0, 0 }, + { 'e', 0, 1, etEXP, 30, 0 }, + { 'E', 0, 1, etEXP, 14, 0 }, + { 'G', 0, 1, etGENERIC, 14, 0 }, +#endif + { 'i', 10, 1, etDECIMAL, 0, 0 }, + { 'n', 0, 0, etSIZE, 0, 0 }, + { '%', 0, 0, etPERCENT, 0, 0 }, + { 'p', 16, 0, etPOINTER, 0, 1 }, + + /* All the rest are undocumented and are for internal use only */ + { 'T', 0, 0, etTOKEN, 0, 0 }, + { 'S', 0, 0, etSRCITEM, 0, 0 }, + { 'r', 10, 1, etORDINAL, 0, 0 }, +}; + +/* Notes: +** +** %S Takes a pointer to SrcItem. Shows name or database.name +** %!S Like %S but prefer the zName over the zAlias +*/ + +/* Floating point constants used for rounding */ +static const double arRound[] = { + 5.0e-01, 5.0e-02, 5.0e-03, 5.0e-04, 5.0e-05, + 5.0e-06, 5.0e-07, 5.0e-08, 5.0e-09, 5.0e-10, +}; + +/* +** If SQLITE_OMIT_FLOATING_POINT is defined, then none of the floating point +** conversions will work. +*/ +#ifndef SQLITE_OMIT_FLOATING_POINT +/* +** "*val" is a double such that 0.1 <= *val < 10.0 +** Return the ascii code for the leading digit of *val, then +** multiply "*val" by 10.0 to renormalize. +** +** Example: +** input: *val = 3.14159 +** output: *val = 1.4159 function return = '3' +** +** The counter *cnt is incremented each time. After counter exceeds +** 16 (the number of significant digits in a 64-bit float) '0' is +** always returned. +*/ +static char et_getdigit(LONGDOUBLE_TYPE *val, int *cnt){ + int digit; + LONGDOUBLE_TYPE d; + if( (*cnt)<=0 ) return '0'; + (*cnt)--; + digit = (int)*val; + d = digit; + digit += '0'; + *val = (*val - d)*10.0; + return (char)digit; +} +#endif /* SQLITE_OMIT_FLOATING_POINT */ + +/* +** Set the StrAccum object to an error mode. +*/ +SQLITE_PRIVATE void sqlite3StrAccumSetError(StrAccum *p, u8 eError){ + assert( eError==SQLITE_NOMEM || eError==SQLITE_TOOBIG ); + p->accError = eError; + if( p->mxAlloc ) sqlite3_str_reset(p); + if( eError==SQLITE_TOOBIG ) sqlite3ErrorToParser(p->db, eError); +} + +/* +** Extra argument values from a PrintfArguments object +*/ +static sqlite3_int64 getIntArg(PrintfArguments *p){ + if( p->nArg<=p->nUsed ) return 0; + return sqlite3_value_int64(p->apArg[p->nUsed++]); +} +static double getDoubleArg(PrintfArguments *p){ + if( p->nArg<=p->nUsed ) return 0.0; + return sqlite3_value_double(p->apArg[p->nUsed++]); +} +static char *getTextArg(PrintfArguments *p){ + if( p->nArg<=p->nUsed ) return 0; + return (char*)sqlite3_value_text(p->apArg[p->nUsed++]); +} + +/* +** Allocate memory for a temporary buffer needed for printf rendering. +** +** If the requested size of the temp buffer is larger than the size +** of the output buffer in pAccum, then cause an SQLITE_TOOBIG error. +** Do the size check before the memory allocation to prevent rogue +** SQL from requesting large allocations using the precision or width +** field of the printf() function. +*/ +static char *printfTempBuf(sqlite3_str *pAccum, sqlite3_int64 n){ + char *z; + if( pAccum->accError ) return 0; + if( n>pAccum->nAlloc && n>pAccum->mxAlloc ){ + sqlite3StrAccumSetError(pAccum, SQLITE_TOOBIG); + return 0; + } + z = sqlite3DbMallocRaw(pAccum->db, n); + if( z==0 ){ + sqlite3StrAccumSetError(pAccum, SQLITE_NOMEM); + } + return z; +} + +/* +** On machines with a small stack size, you can redefine the +** SQLITE_PRINT_BUF_SIZE to be something smaller, if desired. +*/ +#ifndef SQLITE_PRINT_BUF_SIZE +# define SQLITE_PRINT_BUF_SIZE 70 +#endif +#define etBUFSIZE SQLITE_PRINT_BUF_SIZE /* Size of the output buffer */ + +/* +** Hard limit on the precision of floating-point conversions. +*/ +#ifndef SQLITE_PRINTF_PRECISION_LIMIT +# define SQLITE_FP_PRECISION_LIMIT 100000000 +#endif + +/* +** Render a string given by "fmt" into the StrAccum object. +*/ +SQLITE_API void sqlite3_str_vappendf( + sqlite3_str *pAccum, /* Accumulate results here */ + const char *fmt, /* Format string */ + va_list ap /* arguments */ +){ + int c; /* Next character in the format string */ + char *bufpt; /* Pointer to the conversion buffer */ + int precision; /* Precision of the current field */ + int length; /* Length of the field */ + int idx; /* A general purpose loop counter */ + int width; /* Width of the current field */ + etByte flag_leftjustify; /* True if "-" flag is present */ + etByte flag_prefix; /* '+' or ' ' or 0 for prefix */ + etByte flag_alternateform; /* True if "#" flag is present */ + etByte flag_altform2; /* True if "!" flag is present */ + etByte flag_zeropad; /* True if field width constant starts with zero */ + etByte flag_long; /* 1 for the "l" flag, 2 for "ll", 0 by default */ + etByte done; /* Loop termination flag */ + etByte cThousand; /* Thousands separator for %d and %u */ + etByte xtype = etINVALID; /* Conversion paradigm */ + u8 bArgList; /* True for SQLITE_PRINTF_SQLFUNC */ + char prefix; /* Prefix character. "+" or "-" or " " or '\0'. */ + sqlite_uint64 longvalue; /* Value for integer types */ + LONGDOUBLE_TYPE realvalue; /* Value for real types */ + const et_info *infop; /* Pointer to the appropriate info structure */ + char *zOut; /* Rendering buffer */ + int nOut; /* Size of the rendering buffer */ + char *zExtra = 0; /* Malloced memory used by some conversion */ +#ifndef SQLITE_OMIT_FLOATING_POINT + int exp, e2; /* exponent of real numbers */ + int nsd; /* Number of significant digits returned */ + double rounder; /* Used for rounding floating point values */ + etByte flag_dp; /* True if decimal point should be shown */ + etByte flag_rtz; /* True if trailing zeros should be removed */ +#endif + PrintfArguments *pArgList = 0; /* Arguments for SQLITE_PRINTF_SQLFUNC */ + char buf[etBUFSIZE]; /* Conversion buffer */ + + /* pAccum never starts out with an empty buffer that was obtained from + ** malloc(). This precondition is required by the mprintf("%z...") + ** optimization. */ + assert( pAccum->nChar>0 || (pAccum->printfFlags&SQLITE_PRINTF_MALLOCED)==0 ); + + bufpt = 0; + if( (pAccum->printfFlags & SQLITE_PRINTF_SQLFUNC)!=0 ){ + pArgList = va_arg(ap, PrintfArguments*); + bArgList = 1; + }else{ + bArgList = 0; + } + for(; (c=(*fmt))!=0; ++fmt){ + if( c!='%' ){ + bufpt = (char *)fmt; +#if HAVE_STRCHRNUL + fmt = strchrnul(fmt, '%'); +#else + do{ fmt++; }while( *fmt && *fmt != '%' ); +#endif + sqlite3_str_append(pAccum, bufpt, (int)(fmt - bufpt)); + if( *fmt==0 ) break; + } + if( (c=(*++fmt))==0 ){ + sqlite3_str_append(pAccum, "%", 1); + break; + } + /* Find out what flags are present */ + flag_leftjustify = flag_prefix = cThousand = + flag_alternateform = flag_altform2 = flag_zeropad = 0; + done = 0; + width = 0; + flag_long = 0; + precision = -1; + do{ + switch( c ){ + case '-': flag_leftjustify = 1; break; + case '+': flag_prefix = '+'; break; + case ' ': flag_prefix = ' '; break; + case '#': flag_alternateform = 1; break; + case '!': flag_altform2 = 1; break; + case '0': flag_zeropad = 1; break; + case ',': cThousand = ','; break; + default: done = 1; break; + case 'l': { + flag_long = 1; + c = *++fmt; + if( c=='l' ){ + c = *++fmt; + flag_long = 2; + } + done = 1; + break; + } + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': { + unsigned wx = c - '0'; + while( (c = *++fmt)>='0' && c<='9' ){ + wx = wx*10 + c - '0'; + } + testcase( wx>0x7fffffff ); + width = wx & 0x7fffffff; +#ifdef SQLITE_PRINTF_PRECISION_LIMIT + if( width>SQLITE_PRINTF_PRECISION_LIMIT ){ + width = SQLITE_PRINTF_PRECISION_LIMIT; + } +#endif + if( c!='.' && c!='l' ){ + done = 1; + }else{ + fmt--; + } + break; + } + case '*': { + if( bArgList ){ + width = (int)getIntArg(pArgList); + }else{ + width = va_arg(ap,int); + } + if( width<0 ){ + flag_leftjustify = 1; + width = width >= -2147483647 ? -width : 0; + } +#ifdef SQLITE_PRINTF_PRECISION_LIMIT + if( width>SQLITE_PRINTF_PRECISION_LIMIT ){ + width = SQLITE_PRINTF_PRECISION_LIMIT; + } +#endif + if( (c = fmt[1])!='.' && c!='l' ){ + c = *++fmt; + done = 1; + } + break; + } + case '.': { + c = *++fmt; + if( c=='*' ){ + if( bArgList ){ + precision = (int)getIntArg(pArgList); + }else{ + precision = va_arg(ap,int); + } + if( precision<0 ){ + precision = precision >= -2147483647 ? -precision : -1; + } + c = *++fmt; + }else{ + unsigned px = 0; + while( c>='0' && c<='9' ){ + px = px*10 + c - '0'; + c = *++fmt; + } + testcase( px>0x7fffffff ); + precision = px & 0x7fffffff; + } +#ifdef SQLITE_PRINTF_PRECISION_LIMIT + if( precision>SQLITE_PRINTF_PRECISION_LIMIT ){ + precision = SQLITE_PRINTF_PRECISION_LIMIT; + } +#endif + if( c=='l' ){ + --fmt; + }else{ + done = 1; + } + break; + } + } + }while( !done && (c=(*++fmt))!=0 ); + + /* Fetch the info entry for the field */ + infop = &fmtinfo[0]; + xtype = etINVALID; + for(idx=0; idxtype; + break; + } + } + + /* + ** At this point, variables are initialized as follows: + ** + ** flag_alternateform TRUE if a '#' is present. + ** flag_altform2 TRUE if a '!' is present. + ** flag_prefix '+' or ' ' or zero + ** flag_leftjustify TRUE if a '-' is present or if the + ** field width was negative. + ** flag_zeropad TRUE if the width began with 0. + ** flag_long 1 for "l", 2 for "ll" + ** width The specified field width. This is + ** always non-negative. Zero is the default. + ** precision The specified precision. The default + ** is -1. + ** xtype The class of the conversion. + ** infop Pointer to the appropriate info struct. + */ + assert( width>=0 ); + assert( precision>=(-1) ); + switch( xtype ){ + case etPOINTER: + flag_long = sizeof(char*)==sizeof(i64) ? 2 : + sizeof(char*)==sizeof(long int) ? 1 : 0; + /* no break */ deliberate_fall_through + case etORDINAL: + case etRADIX: + cThousand = 0; + /* no break */ deliberate_fall_through + case etDECIMAL: + if( infop->flags & FLAG_SIGNED ){ + i64 v; + if( bArgList ){ + v = getIntArg(pArgList); + }else if( flag_long ){ + if( flag_long==2 ){ + v = va_arg(ap,i64) ; + }else{ + v = va_arg(ap,long int); + } + }else{ + v = va_arg(ap,int); + } + if( v<0 ){ + testcase( v==SMALLEST_INT64 ); + testcase( v==(-1) ); + longvalue = ~v; + longvalue++; + prefix = '-'; + }else{ + longvalue = v; + prefix = flag_prefix; + } + }else{ + if( bArgList ){ + longvalue = (u64)getIntArg(pArgList); + }else if( flag_long ){ + if( flag_long==2 ){ + longvalue = va_arg(ap,u64); + }else{ + longvalue = va_arg(ap,unsigned long int); + } + }else{ + longvalue = va_arg(ap,unsigned int); + } + prefix = 0; + } + if( longvalue==0 ) flag_alternateform = 0; + if( flag_zeropad && precision=4 || (longvalue/10)%10==1 ){ + x = 0; + } + *(--bufpt) = zOrd[x*2+1]; + *(--bufpt) = zOrd[x*2]; + } + { + const char *cset = &aDigits[infop->charset]; + u8 base = infop->base; + do{ /* Convert to ascii */ + *(--bufpt) = cset[longvalue%base]; + longvalue = longvalue/base; + }while( longvalue>0 ); + } + length = (int)(&zOut[nOut-1]-bufpt); + while( precision>length ){ + *(--bufpt) = '0'; /* Zero pad */ + length++; + } + if( cThousand ){ + int nn = (length - 1)/3; /* Number of "," to insert */ + int ix = (length - 1)%3 + 1; + bufpt -= nn; + for(idx=0; nn>0; idx++){ + bufpt[idx] = bufpt[idx+nn]; + ix--; + if( ix==0 ){ + bufpt[++idx] = cThousand; + nn--; + ix = 3; + } + } + } + if( prefix ) *(--bufpt) = prefix; /* Add sign */ + if( flag_alternateform && infop->prefix ){ /* Add "0" or "0x" */ + const char *pre; + char x; + pre = &aPrefix[infop->prefix]; + for(; (x=(*pre))!=0; pre++) *(--bufpt) = x; + } + length = (int)(&zOut[nOut-1]-bufpt); + break; + case etFLOAT: + case etEXP: + case etGENERIC: + if( bArgList ){ + realvalue = getDoubleArg(pArgList); + }else{ + realvalue = va_arg(ap,double); + } +#ifdef SQLITE_OMIT_FLOATING_POINT + length = 0; +#else + if( precision<0 ) precision = 6; /* Set default precision */ +#ifdef SQLITE_FP_PRECISION_LIMIT + if( precision>SQLITE_FP_PRECISION_LIMIT ){ + precision = SQLITE_FP_PRECISION_LIMIT; + } +#endif + if( realvalue<0.0 ){ + realvalue = -realvalue; + prefix = '-'; + }else{ + prefix = flag_prefix; + } + if( xtype==etGENERIC && precision>0 ) precision--; + testcase( precision>0xfff ); + idx = precision & 0xfff; + rounder = arRound[idx%10]; + while( idx>=10 ){ rounder *= 1.0e-10; idx -= 10; } + if( xtype==etFLOAT ){ + double rx = (double)realvalue; + sqlite3_uint64 u; + int ex; + memcpy(&u, &rx, sizeof(u)); + ex = -1023 + (int)((u>>52)&0x7ff); + if( precision+(ex/3) < 15 ) rounder += realvalue*3e-16; + realvalue += rounder; + } + /* Normalize realvalue to within 10.0 > realvalue >= 1.0 */ + exp = 0; + if( sqlite3IsNaN((double)realvalue) ){ + bufpt = "NaN"; + length = 3; + break; + } + if( realvalue>0.0 ){ + LONGDOUBLE_TYPE scale = 1.0; + while( realvalue>=1e100*scale && exp<=350 ){ scale *= 1e100;exp+=100;} + while( realvalue>=1e10*scale && exp<=350 ){ scale *= 1e10; exp+=10; } + while( realvalue>=10.0*scale && exp<=350 ){ scale *= 10.0; exp++; } + realvalue /= scale; + while( realvalue<1e-8 ){ realvalue *= 1e8; exp-=8; } + while( realvalue<1.0 ){ realvalue *= 10.0; exp--; } + if( exp>350 ){ + bufpt = buf; + buf[0] = prefix; + memcpy(buf+(prefix!=0),"Inf",4); + length = 3+(prefix!=0); + break; + } + } + bufpt = buf; + /* + ** If the field type is etGENERIC, then convert to either etEXP + ** or etFLOAT, as appropriate. + */ + if( xtype!=etFLOAT ){ + realvalue += rounder; + if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; } + } + if( xtype==etGENERIC ){ + flag_rtz = !flag_alternateform; + if( exp<-4 || exp>precision ){ + xtype = etEXP; + }else{ + precision = precision - exp; + xtype = etFLOAT; + } + }else{ + flag_rtz = flag_altform2; + } + if( xtype==etEXP ){ + e2 = 0; + }else{ + e2 = exp; + } + { + i64 szBufNeeded; /* Size of a temporary buffer needed */ + szBufNeeded = MAX(e2,0)+(i64)precision+(i64)width+15; + if( szBufNeeded > etBUFSIZE ){ + bufpt = zExtra = printfTempBuf(pAccum, szBufNeeded); + if( bufpt==0 ) return; + } + } + zOut = bufpt; + nsd = 16 + flag_altform2*10; + flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2; + /* The sign in front of the number */ + if( prefix ){ + *(bufpt++) = prefix; + } + /* Digits prior to the decimal point */ + if( e2<0 ){ + *(bufpt++) = '0'; + }else{ + for(; e2>=0; e2--){ + *(bufpt++) = et_getdigit(&realvalue,&nsd); + } + } + /* The decimal point */ + if( flag_dp ){ + *(bufpt++) = '.'; + } + /* "0" digits after the decimal point but before the first + ** significant digit of the number */ + for(e2++; e2<0; precision--, e2++){ + assert( precision>0 ); + *(bufpt++) = '0'; + } + /* Significant digits after the decimal point */ + while( (precision--)>0 ){ + *(bufpt++) = et_getdigit(&realvalue,&nsd); + } + /* Remove trailing zeros and the "." if no digits follow the "." */ + if( flag_rtz && flag_dp ){ + while( bufpt[-1]=='0' ) *(--bufpt) = 0; + assert( bufpt>zOut ); + if( bufpt[-1]=='.' ){ + if( flag_altform2 ){ + *(bufpt++) = '0'; + }else{ + *(--bufpt) = 0; + } + } + } + /* Add the "eNNN" suffix */ + if( xtype==etEXP ){ + *(bufpt++) = aDigits[infop->charset]; + if( exp<0 ){ + *(bufpt++) = '-'; exp = -exp; + }else{ + *(bufpt++) = '+'; + } + if( exp>=100 ){ + *(bufpt++) = (char)((exp/100)+'0'); /* 100's digit */ + exp %= 100; + } + *(bufpt++) = (char)(exp/10+'0'); /* 10's digit */ + *(bufpt++) = (char)(exp%10+'0'); /* 1's digit */ + } + *bufpt = 0; + + /* The converted number is in buf[] and zero terminated. Output it. + ** Note that the number is in the usual order, not reversed as with + ** integer conversions. */ + length = (int)(bufpt-zOut); + bufpt = zOut; + + /* Special case: Add leading zeros if the flag_zeropad flag is + ** set and we are not left justified */ + if( flag_zeropad && !flag_leftjustify && length < width){ + int i; + int nPad = width - length; + for(i=width; i>=nPad; i--){ + bufpt[i] = bufpt[i-nPad]; + } + i = prefix!=0; + while( nPad-- ) bufpt[i++] = '0'; + length = width; + } +#endif /* !defined(SQLITE_OMIT_FLOATING_POINT) */ + break; + case etSIZE: + if( !bArgList ){ + *(va_arg(ap,int*)) = pAccum->nChar; + } + length = width = 0; + break; + case etPERCENT: + buf[0] = '%'; + bufpt = buf; + length = 1; + break; + case etCHARX: + if( bArgList ){ + bufpt = getTextArg(pArgList); + length = 1; + if( bufpt ){ + buf[0] = c = *(bufpt++); + if( (c&0xc0)==0xc0 ){ + while( length<4 && (bufpt[0]&0xc0)==0x80 ){ + buf[length++] = *(bufpt++); + } + } + }else{ + buf[0] = 0; + } + }else{ + unsigned int ch = va_arg(ap,unsigned int); + if( ch<0x00080 ){ + buf[0] = ch & 0xff; + length = 1; + }else if( ch<0x00800 ){ + buf[0] = 0xc0 + (u8)((ch>>6)&0x1f); + buf[1] = 0x80 + (u8)(ch & 0x3f); + length = 2; + }else if( ch<0x10000 ){ + buf[0] = 0xe0 + (u8)((ch>>12)&0x0f); + buf[1] = 0x80 + (u8)((ch>>6) & 0x3f); + buf[2] = 0x80 + (u8)(ch & 0x3f); + length = 3; + }else{ + buf[0] = 0xf0 + (u8)((ch>>18) & 0x07); + buf[1] = 0x80 + (u8)((ch>>12) & 0x3f); + buf[2] = 0x80 + (u8)((ch>>6) & 0x3f); + buf[3] = 0x80 + (u8)(ch & 0x3f); + length = 4; + } + } + if( precision>1 ){ + width -= precision-1; + if( width>1 && !flag_leftjustify ){ + sqlite3_str_appendchar(pAccum, width-1, ' '); + width = 0; + } + while( precision-- > 1 ){ + sqlite3_str_append(pAccum, buf, length); + } + } + bufpt = buf; + flag_altform2 = 1; + goto adjust_width_for_utf8; + case etSTRING: + case etDYNSTRING: + if( bArgList ){ + bufpt = getTextArg(pArgList); + xtype = etSTRING; + }else{ + bufpt = va_arg(ap,char*); + } + if( bufpt==0 ){ + bufpt = ""; + }else if( xtype==etDYNSTRING ){ + if( pAccum->nChar==0 + && pAccum->mxAlloc + && width==0 + && precision<0 + && pAccum->accError==0 + ){ + /* Special optimization for sqlite3_mprintf("%z..."): + ** Extend an existing memory allocation rather than creating + ** a new one. */ + assert( (pAccum->printfFlags&SQLITE_PRINTF_MALLOCED)==0 ); + pAccum->zText = bufpt; + pAccum->nAlloc = sqlite3DbMallocSize(pAccum->db, bufpt); + pAccum->nChar = 0x7fffffff & (int)strlen(bufpt); + pAccum->printfFlags |= SQLITE_PRINTF_MALLOCED; + length = 0; + break; + } + zExtra = bufpt; + } + if( precision>=0 ){ + if( flag_altform2 ){ + /* Set length to the number of bytes needed in order to display + ** precision characters */ + unsigned char *z = (unsigned char*)bufpt; + while( precision-- > 0 && z[0] ){ + SQLITE_SKIP_UTF8(z); + } + length = (int)(z - (unsigned char*)bufpt); + }else{ + for(length=0; length0 ){ + /* Adjust width to account for extra bytes in UTF-8 characters */ + int ii = length - 1; + while( ii>=0 ) if( (bufpt[ii--] & 0xc0)==0x80 ) width++; + } + break; + case etSQLESCAPE: /* %q: Escape ' characters */ + case etSQLESCAPE2: /* %Q: Escape ' and enclose in '...' */ + case etSQLESCAPE3: { /* %w: Escape " characters */ + int i, j, k, n, isnull; + int needQuote; + char ch; + char q = ((xtype==etSQLESCAPE3)?'"':'\''); /* Quote character */ + char *escarg; + + if( bArgList ){ + escarg = getTextArg(pArgList); + }else{ + escarg = va_arg(ap,char*); + } + isnull = escarg==0; + if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)"); + /* For %q, %Q, and %w, the precision is the number of bytes (or + ** characters if the ! flags is present) to use from the input. + ** Because of the extra quoting characters inserted, the number + ** of output characters may be larger than the precision. + */ + k = precision; + for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){ + if( ch==q ) n++; + if( flag_altform2 && (ch&0xc0)==0xc0 ){ + while( (escarg[i+1]&0xc0)==0x80 ){ i++; } + } + } + needQuote = !isnull && xtype==etSQLESCAPE2; + n += i + 3; + if( n>etBUFSIZE ){ + bufpt = zExtra = printfTempBuf(pAccum, n); + if( bufpt==0 ) return; + }else{ + bufpt = buf; + } + j = 0; + if( needQuote ) bufpt[j++] = q; + k = i; + for(i=0; iprintfFlags & SQLITE_PRINTF_INTERNAL)==0 ) return; + if( flag_alternateform ){ + /* %#T means an Expr pointer that uses Expr.u.zToken */ + Expr *pExpr = va_arg(ap,Expr*); + if( ALWAYS(pExpr) && ALWAYS(!ExprHasProperty(pExpr,EP_IntValue)) ){ + sqlite3_str_appendall(pAccum, (const char*)pExpr->u.zToken); + sqlite3RecordErrorOffsetOfExpr(pAccum->db, pExpr); + } + }else{ + /* %T means a Token pointer */ + Token *pToken = va_arg(ap, Token*); + assert( bArgList==0 ); + if( pToken && pToken->n ){ + sqlite3_str_append(pAccum, (const char*)pToken->z, pToken->n); + sqlite3RecordErrorByteOffset(pAccum->db, pToken->z); + } + } + length = width = 0; + break; + } + case etSRCITEM: { + SrcItem *pItem; + if( (pAccum->printfFlags & SQLITE_PRINTF_INTERNAL)==0 ) return; + pItem = va_arg(ap, SrcItem*); + assert( bArgList==0 ); + if( pItem->zAlias && !flag_altform2 ){ + sqlite3_str_appendall(pAccum, pItem->zAlias); + }else if( pItem->zName ){ + if( pItem->zDatabase ){ + sqlite3_str_appendall(pAccum, pItem->zDatabase); + sqlite3_str_append(pAccum, ".", 1); + } + sqlite3_str_appendall(pAccum, pItem->zName); + }else if( pItem->zAlias ){ + sqlite3_str_appendall(pAccum, pItem->zAlias); + }else if( ALWAYS(pItem->pSelect) ){ + sqlite3_str_appendf(pAccum, "SUBQUERY %u", pItem->pSelect->selId); + } + length = width = 0; + break; + } + default: { + assert( xtype==etINVALID ); + return; + } + }/* End switch over the format type */ + /* + ** The text of the conversion is pointed to by "bufpt" and is + ** "length" characters long. The field width is "width". Do + ** the output. Both length and width are in bytes, not characters, + ** at this point. If the "!" flag was present on string conversions + ** indicating that width and precision should be expressed in characters, + ** then the values have been translated prior to reaching this point. + */ + width -= length; + if( width>0 ){ + if( !flag_leftjustify ) sqlite3_str_appendchar(pAccum, width, ' '); + sqlite3_str_append(pAccum, bufpt, length); + if( flag_leftjustify ) sqlite3_str_appendchar(pAccum, width, ' '); + }else{ + sqlite3_str_append(pAccum, bufpt, length); + } + + if( zExtra ){ + sqlite3DbFree(pAccum->db, zExtra); + zExtra = 0; + } + }/* End for loop over the format string */ +} /* End of function */ + + +/* +** The z string points to the first character of a token that is +** associated with an error. If db does not already have an error +** byte offset recorded, try to compute the error byte offset for +** z and set the error byte offset in db. +*/ +SQLITE_PRIVATE void sqlite3RecordErrorByteOffset(sqlite3 *db, const char *z){ + const Parse *pParse; + const char *zText; + const char *zEnd; + assert( z!=0 ); + if( NEVER(db==0) ) return; + if( db->errByteOffset!=(-2) ) return; + pParse = db->pParse; + if( NEVER(pParse==0) ) return; + zText =pParse->zTail; + if( NEVER(zText==0) ) return; + zEnd = &zText[strlen(zText)]; + if( SQLITE_WITHIN(z,zText,zEnd) ){ + db->errByteOffset = (int)(z-zText); + } +} + +/* +** If pExpr has a byte offset for the start of a token, record that as +** as the error offset. +*/ +SQLITE_PRIVATE void sqlite3RecordErrorOffsetOfExpr(sqlite3 *db, const Expr *pExpr){ + while( pExpr && (ExprHasProperty(pExpr,EP_FromJoin) || pExpr->w.iOfst<=0) ){ + pExpr = pExpr->pLeft; + } + if( pExpr==0 ) return; + db->errByteOffset = pExpr->w.iOfst; +} + +/* +** Enlarge the memory allocation on a StrAccum object so that it is +** able to accept at least N more bytes of text. +** +** Return the number of bytes of text that StrAccum is able to accept +** after the attempted enlargement. The value returned might be zero. +*/ +SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum *p, int N){ + char *zNew; + assert( p->nChar+(i64)N >= p->nAlloc ); /* Only called if really needed */ + if( p->accError ){ + testcase(p->accError==SQLITE_TOOBIG); + testcase(p->accError==SQLITE_NOMEM); + return 0; + } + if( p->mxAlloc==0 ){ + sqlite3StrAccumSetError(p, SQLITE_TOOBIG); + return p->nAlloc - p->nChar - 1; + }else{ + char *zOld = isMalloced(p) ? p->zText : 0; + i64 szNew = p->nChar; + szNew += (sqlite3_int64)N + 1; + if( szNew+p->nChar<=p->mxAlloc ){ + /* Force exponential buffer size growth as long as it does not overflow, + ** to avoid having to call this routine too often */ + szNew += p->nChar; + } + if( szNew > p->mxAlloc ){ + sqlite3_str_reset(p); + sqlite3StrAccumSetError(p, SQLITE_TOOBIG); + return 0; + }else{ + p->nAlloc = (int)szNew; + } + if( p->db ){ + zNew = sqlite3DbRealloc(p->db, zOld, p->nAlloc); + }else{ + zNew = sqlite3Realloc(zOld, p->nAlloc); + } + if( zNew ){ + assert( p->zText!=0 || p->nChar==0 ); + if( !isMalloced(p) && p->nChar>0 ) memcpy(zNew, p->zText, p->nChar); + p->zText = zNew; + p->nAlloc = sqlite3DbMallocSize(p->db, zNew); + p->printfFlags |= SQLITE_PRINTF_MALLOCED; + }else{ + sqlite3_str_reset(p); + sqlite3StrAccumSetError(p, SQLITE_NOMEM); + return 0; + } + } + return N; +} + +/* +** Append N copies of character c to the given string buffer. +*/ +SQLITE_API void sqlite3_str_appendchar(sqlite3_str *p, int N, char c){ + testcase( p->nChar + (i64)N > 0x7fffffff ); + if( p->nChar+(i64)N >= p->nAlloc && (N = sqlite3StrAccumEnlarge(p, N))<=0 ){ + return; + } + while( (N--)>0 ) p->zText[p->nChar++] = c; +} + +/* +** The StrAccum "p" is not large enough to accept N new bytes of z[]. +** So enlarge if first, then do the append. +** +** This is a helper routine to sqlite3_str_append() that does special-case +** work (enlarging the buffer) using tail recursion, so that the +** sqlite3_str_append() routine can use fast calling semantics. +*/ +static void SQLITE_NOINLINE enlargeAndAppend(StrAccum *p, const char *z, int N){ + N = sqlite3StrAccumEnlarge(p, N); + if( N>0 ){ + memcpy(&p->zText[p->nChar], z, N); + p->nChar += N; + } +} + +/* +** Append N bytes of text from z to the StrAccum object. Increase the +** size of the memory allocation for StrAccum if necessary. +*/ +SQLITE_API void sqlite3_str_append(sqlite3_str *p, const char *z, int N){ + assert( z!=0 || N==0 ); + assert( p->zText!=0 || p->nChar==0 || p->accError ); + assert( N>=0 ); + assert( p->accError==0 || p->nAlloc==0 || p->mxAlloc==0 ); + if( p->nChar+N >= p->nAlloc ){ + enlargeAndAppend(p,z,N); + }else if( N ){ + assert( p->zText ); + p->nChar += N; + memcpy(&p->zText[p->nChar-N], z, N); + } +} + +/* +** Append the complete text of zero-terminated string z[] to the p string. +*/ +SQLITE_API void sqlite3_str_appendall(sqlite3_str *p, const char *z){ + sqlite3_str_append(p, z, sqlite3Strlen30(z)); +} + + +/* +** Finish off a string by making sure it is zero-terminated. +** Return a pointer to the resulting string. Return a NULL +** pointer if any kind of error was encountered. +*/ +static SQLITE_NOINLINE char *strAccumFinishRealloc(StrAccum *p){ + char *zText; + assert( p->mxAlloc>0 && !isMalloced(p) ); + zText = sqlite3DbMallocRaw(p->db, p->nChar+1 ); + if( zText ){ + memcpy(zText, p->zText, p->nChar+1); + p->printfFlags |= SQLITE_PRINTF_MALLOCED; + }else{ + sqlite3StrAccumSetError(p, SQLITE_NOMEM); + } + p->zText = zText; + return zText; +} +SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum *p){ + if( p->zText ){ + p->zText[p->nChar] = 0; + if( p->mxAlloc>0 && !isMalloced(p) ){ + return strAccumFinishRealloc(p); + } + } + return p->zText; +} + +/* +** Use the content of the StrAccum passed as the second argument +** as the result of an SQL function. +*/ +SQLITE_PRIVATE void sqlite3ResultStrAccum(sqlite3_context *pCtx, StrAccum *p){ + if( p->accError ){ + sqlite3_result_error_code(pCtx, p->accError); + sqlite3_str_reset(p); + }else if( isMalloced(p) ){ + sqlite3_result_text(pCtx, p->zText, p->nChar, SQLITE_DYNAMIC); + }else{ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + sqlite3_str_reset(p); + } +} + +/* +** This singleton is an sqlite3_str object that is returned if +** sqlite3_malloc() fails to provide space for a real one. This +** sqlite3_str object accepts no new text and always returns +** an SQLITE_NOMEM error. +*/ +static sqlite3_str sqlite3OomStr = { + 0, 0, 0, 0, 0, SQLITE_NOMEM, 0 +}; + +/* Finalize a string created using sqlite3_str_new(). +*/ +SQLITE_API char *sqlite3_str_finish(sqlite3_str *p){ + char *z; + if( p!=0 && p!=&sqlite3OomStr ){ + z = sqlite3StrAccumFinish(p); + sqlite3_free(p); + }else{ + z = 0; + } + return z; +} + +/* Return any error code associated with p */ +SQLITE_API int sqlite3_str_errcode(sqlite3_str *p){ + return p ? p->accError : SQLITE_NOMEM; +} + +/* Return the current length of p in bytes */ +SQLITE_API int sqlite3_str_length(sqlite3_str *p){ + return p ? p->nChar : 0; +} + +/* Return the current value for p */ +SQLITE_API char *sqlite3_str_value(sqlite3_str *p){ + if( p==0 || p->nChar==0 ) return 0; + p->zText[p->nChar] = 0; + return p->zText; +} + +/* +** Reset an StrAccum string. Reclaim all malloced memory. +*/ +SQLITE_API void sqlite3_str_reset(StrAccum *p){ + if( isMalloced(p) ){ + sqlite3DbFree(p->db, p->zText); + p->printfFlags &= ~SQLITE_PRINTF_MALLOCED; + } + p->nAlloc = 0; + p->nChar = 0; + p->zText = 0; +} + +/* +** Initialize a string accumulator. +** +** p: The accumulator to be initialized. +** db: Pointer to a database connection. May be NULL. Lookaside +** memory is used if not NULL. db->mallocFailed is set appropriately +** when not NULL. +** zBase: An initial buffer. May be NULL in which case the initial buffer +** is malloced. +** n: Size of zBase in bytes. If total space requirements never exceed +** n then no memory allocations ever occur. +** mx: Maximum number of bytes to accumulate. If mx==0 then no memory +** allocations will ever occur. +*/ +SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum *p, sqlite3 *db, char *zBase, int n, int mx){ + p->zText = zBase; + p->db = db; + p->nAlloc = n; + p->mxAlloc = mx; + p->nChar = 0; + p->accError = 0; + p->printfFlags = 0; +} + +/* Allocate and initialize a new dynamic string object */ +SQLITE_API sqlite3_str *sqlite3_str_new(sqlite3 *db){ + sqlite3_str *p = sqlite3_malloc64(sizeof(*p)); + if( p ){ + sqlite3StrAccumInit(p, 0, 0, 0, + db ? db->aLimit[SQLITE_LIMIT_LENGTH] : SQLITE_MAX_LENGTH); + }else{ + p = &sqlite3OomStr; + } + return p; +} + +/* +** Print into memory obtained from sqliteMalloc(). Use the internal +** %-conversion extensions. +*/ +SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3 *db, const char *zFormat, va_list ap){ + char *z; + char zBase[SQLITE_PRINT_BUF_SIZE]; + StrAccum acc; + assert( db!=0 ); + sqlite3StrAccumInit(&acc, db, zBase, sizeof(zBase), + db->aLimit[SQLITE_LIMIT_LENGTH]); + acc.printfFlags = SQLITE_PRINTF_INTERNAL; + sqlite3_str_vappendf(&acc, zFormat, ap); + z = sqlite3StrAccumFinish(&acc); + if( acc.accError==SQLITE_NOMEM ){ + sqlite3OomFault(db); + } + return z; +} + +/* +** Print into memory obtained from sqliteMalloc(). Use the internal +** %-conversion extensions. +*/ +SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3 *db, const char *zFormat, ...){ + va_list ap; + char *z; + va_start(ap, zFormat); + z = sqlite3VMPrintf(db, zFormat, ap); + va_end(ap); + return z; +} + +/* +** Print into memory obtained from sqlite3_malloc(). Omit the internal +** %-conversion extensions. +*/ +SQLITE_API char *sqlite3_vmprintf(const char *zFormat, va_list ap){ + char *z; + char zBase[SQLITE_PRINT_BUF_SIZE]; + StrAccum acc; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( zFormat==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + sqlite3StrAccumInit(&acc, 0, zBase, sizeof(zBase), SQLITE_MAX_LENGTH); + sqlite3_str_vappendf(&acc, zFormat, ap); + z = sqlite3StrAccumFinish(&acc); + return z; +} + +/* +** Print into memory obtained from sqlite3_malloc()(). Omit the internal +** %-conversion extensions. +*/ +SQLITE_API char *sqlite3_mprintf(const char *zFormat, ...){ + va_list ap; + char *z; +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + va_start(ap, zFormat); + z = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + return z; +} + +/* +** sqlite3_snprintf() works like snprintf() except that it ignores the +** current locale settings. This is important for SQLite because we +** are not able to use a "," as the decimal point in place of "." as +** specified by some locales. +** +** Oops: The first two arguments of sqlite3_snprintf() are backwards +** from the snprintf() standard. Unfortunately, it is too late to change +** this without breaking compatibility, so we just have to live with the +** mistake. +** +** sqlite3_vsnprintf() is the varargs version. +*/ +SQLITE_API char *sqlite3_vsnprintf(int n, char *zBuf, const char *zFormat, va_list ap){ + StrAccum acc; + if( n<=0 ) return zBuf; +#ifdef SQLITE_ENABLE_API_ARMOR + if( zBuf==0 || zFormat==0 ) { + (void)SQLITE_MISUSE_BKPT; + if( zBuf ) zBuf[0] = 0; + return zBuf; + } +#endif + sqlite3StrAccumInit(&acc, 0, zBuf, n, 0); + sqlite3_str_vappendf(&acc, zFormat, ap); + zBuf[acc.nChar] = 0; + return zBuf; +} +SQLITE_API char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){ + char *z; + va_list ap; + va_start(ap,zFormat); + z = sqlite3_vsnprintf(n, zBuf, zFormat, ap); + va_end(ap); + return z; +} + +/* +** This is the routine that actually formats the sqlite3_log() message. +** We house it in a separate routine from sqlite3_log() to avoid using +** stack space on small-stack systems when logging is disabled. +** +** sqlite3_log() must render into a static buffer. It cannot dynamically +** allocate memory because it might be called while the memory allocator +** mutex is held. +** +** sqlite3_str_vappendf() might ask for *temporary* memory allocations for +** certain format characters (%q) or for very large precisions or widths. +** Care must be taken that any sqlite3_log() calls that occur while the +** memory mutex is held do not use these mechanisms. +*/ +static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ + StrAccum acc; /* String accumulator */ + char zMsg[SQLITE_PRINT_BUF_SIZE*3]; /* Complete log message */ + + sqlite3StrAccumInit(&acc, 0, zMsg, sizeof(zMsg), 0); + sqlite3_str_vappendf(&acc, zFormat, ap); + sqlite3GlobalConfig.xLog(sqlite3GlobalConfig.pLogArg, iErrCode, + sqlite3StrAccumFinish(&acc)); +} + +/* +** Format and write a message to the log if logging is enabled. +*/ +SQLITE_API void sqlite3_log(int iErrCode, const char *zFormat, ...){ + va_list ap; /* Vararg list */ + if( sqlite3GlobalConfig.xLog ){ + va_start(ap, zFormat); + renderLogMsg(iErrCode, zFormat, ap); + va_end(ap); + } +} + +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) +/* +** A version of printf() that understands %lld. Used for debugging. +** The printf() built into some versions of windows does not understand %lld +** and segfaults if you give it a long long int. +*/ +SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){ + va_list ap; + StrAccum acc; + char zBuf[SQLITE_PRINT_BUF_SIZE*10]; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + va_start(ap,zFormat); + sqlite3_str_vappendf(&acc, zFormat, ap); + va_end(ap); + sqlite3StrAccumFinish(&acc); +#ifdef SQLITE_OS_TRACE_PROC + { + extern void SQLITE_OS_TRACE_PROC(const char *zBuf, int nBuf); + SQLITE_OS_TRACE_PROC(zBuf, sizeof(zBuf)); + } +#else + fprintf(stdout,"%s", zBuf); + fflush(stdout); +#endif +} +#endif + + +/* +** variable-argument wrapper around sqlite3_str_vappendf(). The bFlags argument +** can contain the bit SQLITE_PRINTF_INTERNAL enable internal formats. +*/ +SQLITE_API void sqlite3_str_appendf(StrAccum *p, const char *zFormat, ...){ + va_list ap; + va_start(ap,zFormat); + sqlite3_str_vappendf(p, zFormat, ap); + va_end(ap); +} + +/************** End of printf.c **********************************************/ +/************** Begin file treeview.c ****************************************/ +/* +** 2015-06-08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains C code to implement the TreeView debugging routines. +** These routines print a parse tree to standard output for debugging and +** analysis. +** +** The interfaces in this file is only available when compiling +** with SQLITE_DEBUG. +*/ +/* #include "sqliteInt.h" */ +#ifdef SQLITE_DEBUG + +/* +** Add a new subitem to the tree. The moreToFollow flag indicates that this +** is not the last item in the tree. +*/ +static TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ + if( p==0 ){ + p = sqlite3_malloc64( sizeof(*p) ); + if( p==0 ) return 0; + memset(p, 0, sizeof(*p)); + }else{ + p->iLevel++; + } + assert( moreToFollow==0 || moreToFollow==1 ); + if( p->iLevelbLine) ) p->bLine[p->iLevel] = moreToFollow; + return p; +} + +/* +** Finished with one layer of the tree +*/ +static void sqlite3TreeViewPop(TreeView *p){ + if( p==0 ) return; + p->iLevel--; + if( p->iLevel<0 ) sqlite3_free(p); +} + +/* +** Generate a single line of output for the tree, with a prefix that contains +** all the appropriate tree lines +*/ +static void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ + va_list ap; + int i; + StrAccum acc; + char zBuf[500]; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + if( p ){ + for(i=0; iiLevel && ibLine)-1; i++){ + sqlite3_str_append(&acc, p->bLine[i] ? "| " : " ", 4); + } + sqlite3_str_append(&acc, p->bLine[i] ? "|-- " : "'-- ", 4); + } + if( zFormat!=0 ){ + va_start(ap, zFormat); + sqlite3_str_vappendf(&acc, zFormat, ap); + va_end(ap); + assert( acc.nChar>0 || acc.accError ); + sqlite3_str_append(&acc, "\n", 1); + } + sqlite3StrAccumFinish(&acc); + fprintf(stdout,"%s", zBuf); + fflush(stdout); +} + +/* +** Shorthand for starting a new tree item that consists of a single label +*/ +static void sqlite3TreeViewItem(TreeView *p, const char *zLabel,u8 moreFollows){ + p = sqlite3TreeViewPush(p, moreFollows); + sqlite3TreeViewLine(p, "%s", zLabel); +} + +/* +** Generate a human-readable description of a WITH clause. +*/ +SQLITE_PRIVATE void sqlite3TreeViewWith(TreeView *pView, const With *pWith, u8 moreToFollow){ + int i; + if( pWith==0 ) return; + if( pWith->nCte==0 ) return; + if( pWith->pOuter ){ + sqlite3TreeViewLine(pView, "WITH (0x%p, pOuter=0x%p)",pWith,pWith->pOuter); + }else{ + sqlite3TreeViewLine(pView, "WITH (0x%p)", pWith); + } + if( pWith->nCte>0 ){ + pView = sqlite3TreeViewPush(pView, 1); + for(i=0; inCte; i++){ + StrAccum x; + char zLine[1000]; + const struct Cte *pCte = &pWith->a[i]; + sqlite3StrAccumInit(&x, 0, zLine, sizeof(zLine), 0); + sqlite3_str_appendf(&x, "%s", pCte->zName); + if( pCte->pCols && pCte->pCols->nExpr>0 ){ + char cSep = '('; + int j; + for(j=0; jpCols->nExpr; j++){ + sqlite3_str_appendf(&x, "%c%s", cSep, pCte->pCols->a[j].zEName); + cSep = ','; + } + sqlite3_str_appendf(&x, ")"); + } + if( pCte->pUse ){ + sqlite3_str_appendf(&x, " (pUse=0x%p, nUse=%d)", pCte->pUse, + pCte->pUse->nUse); + } + sqlite3StrAccumFinish(&x); + sqlite3TreeViewItem(pView, zLine, inCte-1); + sqlite3TreeViewSelect(pView, pCte->pSelect, 0); + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); + } +} + +/* +** Generate a human-readable description of a SrcList object. +*/ +SQLITE_PRIVATE void sqlite3TreeViewSrcList(TreeView *pView, const SrcList *pSrc){ + int i; + for(i=0; inSrc; i++){ + const SrcItem *pItem = &pSrc->a[i]; + StrAccum x; + char zLine[100]; + sqlite3StrAccumInit(&x, 0, zLine, sizeof(zLine), 0); + x.printfFlags |= SQLITE_PRINTF_INTERNAL; + sqlite3_str_appendf(&x, "{%d:*} %!S", pItem->iCursor, pItem); + if( pItem->pTab ){ + sqlite3_str_appendf(&x, " tab=%Q nCol=%d ptr=%p used=%llx", + pItem->pTab->zName, pItem->pTab->nCol, pItem->pTab, pItem->colUsed); + } + if( pItem->fg.jointype & JT_LEFT ){ + sqlite3_str_appendf(&x, " LEFT-JOIN"); + }else if( pItem->fg.jointype & JT_CROSS ){ + sqlite3_str_appendf(&x, " CROSS-JOIN"); + } + if( pItem->fg.fromDDL ){ + sqlite3_str_appendf(&x, " DDL"); + } + if( pItem->fg.isCte ){ + sqlite3_str_appendf(&x, " CteUse=0x%p", pItem->u2.pCteUse); + } + sqlite3StrAccumFinish(&x); + sqlite3TreeViewItem(pView, zLine, inSrc-1); + if( pItem->pSelect ){ + sqlite3TreeViewSelect(pView, pItem->pSelect, 0); + } + if( pItem->fg.isTabFunc ){ + sqlite3TreeViewExprList(pView, pItem->u1.pFuncArg, 0, "func-args:"); + } + sqlite3TreeViewPop(pView); + } +} + +/* +** Generate a human-readable description of a Select object. +*/ +SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 moreToFollow){ + int n = 0; + int cnt = 0; + if( p==0 ){ + sqlite3TreeViewLine(pView, "nil-SELECT"); + return; + } + pView = sqlite3TreeViewPush(pView, moreToFollow); + if( p->pWith ){ + sqlite3TreeViewWith(pView, p->pWith, 1); + cnt = 1; + sqlite3TreeViewPush(pView, 1); + } + do{ + if( p->selFlags & SF_WhereBegin ){ + sqlite3TreeViewLine(pView, "sqlite3WhereBegin()"); + }else{ + sqlite3TreeViewLine(pView, + "SELECT%s%s (%u/%p) selFlags=0x%x nSelectRow=%d", + ((p->selFlags & SF_Distinct) ? " DISTINCT" : ""), + ((p->selFlags & SF_Aggregate) ? " agg_flag" : ""), + p->selId, p, p->selFlags, + (int)p->nSelectRow + ); + } + if( cnt++ ) sqlite3TreeViewPop(pView); + if( p->pPrior ){ + n = 1000; + }else{ + n = 0; + if( p->pSrc && p->pSrc->nSrc ) n++; + if( p->pWhere ) n++; + if( p->pGroupBy ) n++; + if( p->pHaving ) n++; + if( p->pOrderBy ) n++; + if( p->pLimit ) n++; +#ifndef SQLITE_OMIT_WINDOWFUNC + if( p->pWin ) n++; + if( p->pWinDefn ) n++; +#endif + } + if( p->pEList ){ + sqlite3TreeViewExprList(pView, p->pEList, n>0, "result-set"); + } + n--; +#ifndef SQLITE_OMIT_WINDOWFUNC + if( p->pWin ){ + Window *pX; + pView = sqlite3TreeViewPush(pView, (n--)>0); + sqlite3TreeViewLine(pView, "window-functions"); + for(pX=p->pWin; pX; pX=pX->pNextWin){ + sqlite3TreeViewWinFunc(pView, pX, pX->pNextWin!=0); + } + sqlite3TreeViewPop(pView); + } +#endif + if( p->pSrc && p->pSrc->nSrc ){ + pView = sqlite3TreeViewPush(pView, (n--)>0); + sqlite3TreeViewLine(pView, "FROM"); + sqlite3TreeViewSrcList(pView, p->pSrc); + sqlite3TreeViewPop(pView); + } + if( p->pWhere ){ + sqlite3TreeViewItem(pView, "WHERE", (n--)>0); + sqlite3TreeViewExpr(pView, p->pWhere, 0); + sqlite3TreeViewPop(pView); + } + if( p->pGroupBy ){ + sqlite3TreeViewExprList(pView, p->pGroupBy, (n--)>0, "GROUPBY"); + } + if( p->pHaving ){ + sqlite3TreeViewItem(pView, "HAVING", (n--)>0); + sqlite3TreeViewExpr(pView, p->pHaving, 0); + sqlite3TreeViewPop(pView); + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( p->pWinDefn ){ + Window *pX; + sqlite3TreeViewItem(pView, "WINDOW", (n--)>0); + for(pX=p->pWinDefn; pX; pX=pX->pNextWin){ + sqlite3TreeViewWindow(pView, pX, pX->pNextWin!=0); + } + sqlite3TreeViewPop(pView); + } +#endif + if( p->pOrderBy ){ + sqlite3TreeViewExprList(pView, p->pOrderBy, (n--)>0, "ORDERBY"); + } + if( p->pLimit ){ + sqlite3TreeViewItem(pView, "LIMIT", (n--)>0); + sqlite3TreeViewExpr(pView, p->pLimit->pLeft, p->pLimit->pRight!=0); + if( p->pLimit->pRight ){ + sqlite3TreeViewItem(pView, "OFFSET", (n--)>0); + sqlite3TreeViewExpr(pView, p->pLimit->pRight, 0); + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); + } + if( p->pPrior ){ + const char *zOp = "UNION"; + switch( p->op ){ + case TK_ALL: zOp = "UNION ALL"; break; + case TK_INTERSECT: zOp = "INTERSECT"; break; + case TK_EXCEPT: zOp = "EXCEPT"; break; + } + sqlite3TreeViewItem(pView, zOp, 1); + } + p = p->pPrior; + }while( p!=0 ); + sqlite3TreeViewPop(pView); +} + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** Generate a description of starting or stopping bounds +*/ +SQLITE_PRIVATE void sqlite3TreeViewBound( + TreeView *pView, /* View context */ + u8 eBound, /* UNBOUNDED, CURRENT, PRECEDING, FOLLOWING */ + Expr *pExpr, /* Value for PRECEDING or FOLLOWING */ + u8 moreToFollow /* True if more to follow */ +){ + switch( eBound ){ + case TK_UNBOUNDED: { + sqlite3TreeViewItem(pView, "UNBOUNDED", moreToFollow); + sqlite3TreeViewPop(pView); + break; + } + case TK_CURRENT: { + sqlite3TreeViewItem(pView, "CURRENT", moreToFollow); + sqlite3TreeViewPop(pView); + break; + } + case TK_PRECEDING: { + sqlite3TreeViewItem(pView, "PRECEDING", moreToFollow); + sqlite3TreeViewExpr(pView, pExpr, 0); + sqlite3TreeViewPop(pView); + break; + } + case TK_FOLLOWING: { + sqlite3TreeViewItem(pView, "FOLLOWING", moreToFollow); + sqlite3TreeViewExpr(pView, pExpr, 0); + sqlite3TreeViewPop(pView); + break; + } + } +} +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** Generate a human-readable explanation for a Window object +*/ +SQLITE_PRIVATE void sqlite3TreeViewWindow(TreeView *pView, const Window *pWin, u8 more){ + int nElement = 0; + if( pWin->pFilter ){ + sqlite3TreeViewItem(pView, "FILTER", 1); + sqlite3TreeViewExpr(pView, pWin->pFilter, 0); + sqlite3TreeViewPop(pView); + } + pView = sqlite3TreeViewPush(pView, more); + if( pWin->zName ){ + sqlite3TreeViewLine(pView, "OVER %s (%p)", pWin->zName, pWin); + }else{ + sqlite3TreeViewLine(pView, "OVER (%p)", pWin); + } + if( pWin->zBase ) nElement++; + if( pWin->pOrderBy ) nElement++; + if( pWin->eFrmType ) nElement++; + if( pWin->eExclude ) nElement++; + if( pWin->zBase ){ + sqlite3TreeViewPush(pView, (--nElement)>0); + sqlite3TreeViewLine(pView, "window: %s", pWin->zBase); + sqlite3TreeViewPop(pView); + } + if( pWin->pPartition ){ + sqlite3TreeViewExprList(pView, pWin->pPartition, nElement>0,"PARTITION-BY"); + } + if( pWin->pOrderBy ){ + sqlite3TreeViewExprList(pView, pWin->pOrderBy, (--nElement)>0, "ORDER-BY"); + } + if( pWin->eFrmType ){ + char zBuf[30]; + const char *zFrmType = "ROWS"; + if( pWin->eFrmType==TK_RANGE ) zFrmType = "RANGE"; + if( pWin->eFrmType==TK_GROUPS ) zFrmType = "GROUPS"; + sqlite3_snprintf(sizeof(zBuf),zBuf,"%s%s",zFrmType, + pWin->bImplicitFrame ? " (implied)" : ""); + sqlite3TreeViewItem(pView, zBuf, (--nElement)>0); + sqlite3TreeViewBound(pView, pWin->eStart, pWin->pStart, 1); + sqlite3TreeViewBound(pView, pWin->eEnd, pWin->pEnd, 0); + sqlite3TreeViewPop(pView); + } + if( pWin->eExclude ){ + char zBuf[30]; + const char *zExclude; + switch( pWin->eExclude ){ + case TK_NO: zExclude = "NO OTHERS"; break; + case TK_CURRENT: zExclude = "CURRENT ROW"; break; + case TK_GROUP: zExclude = "GROUP"; break; + case TK_TIES: zExclude = "TIES"; break; + default: + sqlite3_snprintf(sizeof(zBuf),zBuf,"invalid(%d)", pWin->eExclude); + zExclude = zBuf; + break; + } + sqlite3TreeViewPush(pView, 0); + sqlite3TreeViewLine(pView, "EXCLUDE %s", zExclude); + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); +} +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** Generate a human-readable explanation for a Window Function object +*/ +SQLITE_PRIVATE void sqlite3TreeViewWinFunc(TreeView *pView, const Window *pWin, u8 more){ + pView = sqlite3TreeViewPush(pView, more); + sqlite3TreeViewLine(pView, "WINFUNC %s(%d)", + pWin->pFunc->zName, pWin->pFunc->nArg); + sqlite3TreeViewWindow(pView, pWin, 0); + sqlite3TreeViewPop(pView); +} +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/* +** Generate a human-readable explanation of an expression tree. +*/ +SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 moreToFollow){ + const char *zBinOp = 0; /* Binary operator */ + const char *zUniOp = 0; /* Unary operator */ + char zFlgs[200]; + pView = sqlite3TreeViewPush(pView, moreToFollow); + if( pExpr==0 ){ + sqlite3TreeViewLine(pView, "nil"); + sqlite3TreeViewPop(pView); + return; + } + if( pExpr->flags || pExpr->affExpr || pExpr->vvaFlags ){ + StrAccum x; + sqlite3StrAccumInit(&x, 0, zFlgs, sizeof(zFlgs), 0); + sqlite3_str_appendf(&x, " fg.af=%x.%c", + pExpr->flags, pExpr->affExpr ? pExpr->affExpr : 'n'); + if( ExprHasProperty(pExpr, EP_FromJoin) ){ + sqlite3_str_appendf(&x, " iRJT=%d", pExpr->w.iRightJoinTable); + } + if( ExprHasProperty(pExpr, EP_FromDDL) ){ + sqlite3_str_appendf(&x, " DDL"); + } + if( ExprHasVVAProperty(pExpr, EP_Immutable) ){ + sqlite3_str_appendf(&x, " IMMUTABLE"); + } + sqlite3StrAccumFinish(&x); + }else{ + zFlgs[0] = 0; + } + switch( pExpr->op ){ + case TK_AGG_COLUMN: { + sqlite3TreeViewLine(pView, "AGG{%d:%d}%s", + pExpr->iTable, pExpr->iColumn, zFlgs); + break; + } + case TK_COLUMN: { + if( pExpr->iTable<0 ){ + /* This only happens when coding check constraints */ + char zOp2[16]; + if( pExpr->op2 ){ + sqlite3_snprintf(sizeof(zOp2),zOp2," op2=0x%02x",pExpr->op2); + }else{ + zOp2[0] = 0; + } + sqlite3TreeViewLine(pView, "COLUMN(%d)%s%s", + pExpr->iColumn, zFlgs, zOp2); + }else{ + assert( ExprUseYTab(pExpr) ); + sqlite3TreeViewLine(pView, "{%d:%d} pTab=%p%s", + pExpr->iTable, pExpr->iColumn, + pExpr->y.pTab, zFlgs); + } + if( ExprHasProperty(pExpr, EP_FixedCol) ){ + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + } + break; + } + case TK_INTEGER: { + if( pExpr->flags & EP_IntValue ){ + sqlite3TreeViewLine(pView, "%d", pExpr->u.iValue); + }else{ + sqlite3TreeViewLine(pView, "%s", pExpr->u.zToken); + } + break; + } +#ifndef SQLITE_OMIT_FLOATING_POINT + case TK_FLOAT: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); + break; + } +#endif + case TK_STRING: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView,"%Q", pExpr->u.zToken); + break; + } + case TK_NULL: { + sqlite3TreeViewLine(pView,"NULL"); + break; + } + case TK_TRUEFALSE: { + sqlite3TreeViewLine(pView,"%s%s", + sqlite3ExprTruthValue(pExpr) ? "TRUE" : "FALSE", zFlgs); + break; + } +#ifndef SQLITE_OMIT_BLOB_LITERAL + case TK_BLOB: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); + break; + } +#endif + case TK_VARIABLE: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView,"VARIABLE(%s,%d)", + pExpr->u.zToken, pExpr->iColumn); + break; + } + case TK_REGISTER: { + sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable); + break; + } + case TK_ID: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView,"ID \"%w\"", pExpr->u.zToken); + break; + } +#ifndef SQLITE_OMIT_CAST + case TK_CAST: { + /* Expressions of the form: CAST(pLeft AS token) */ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView,"CAST %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } +#endif /* SQLITE_OMIT_CAST */ + case TK_LT: zBinOp = "LT"; break; + case TK_LE: zBinOp = "LE"; break; + case TK_GT: zBinOp = "GT"; break; + case TK_GE: zBinOp = "GE"; break; + case TK_NE: zBinOp = "NE"; break; + case TK_EQ: zBinOp = "EQ"; break; + case TK_IS: zBinOp = "IS"; break; + case TK_ISNOT: zBinOp = "ISNOT"; break; + case TK_AND: zBinOp = "AND"; break; + case TK_OR: zBinOp = "OR"; break; + case TK_PLUS: zBinOp = "ADD"; break; + case TK_STAR: zBinOp = "MUL"; break; + case TK_MINUS: zBinOp = "SUB"; break; + case TK_REM: zBinOp = "REM"; break; + case TK_BITAND: zBinOp = "BITAND"; break; + case TK_BITOR: zBinOp = "BITOR"; break; + case TK_SLASH: zBinOp = "DIV"; break; + case TK_LSHIFT: zBinOp = "LSHIFT"; break; + case TK_RSHIFT: zBinOp = "RSHIFT"; break; + case TK_CONCAT: zBinOp = "CONCAT"; break; + case TK_DOT: zBinOp = "DOT"; break; + case TK_LIMIT: zBinOp = "LIMIT"; break; + + case TK_UMINUS: zUniOp = "UMINUS"; break; + case TK_UPLUS: zUniOp = "UPLUS"; break; + case TK_BITNOT: zUniOp = "BITNOT"; break; + case TK_NOT: zUniOp = "NOT"; break; + case TK_ISNULL: zUniOp = "ISNULL"; break; + case TK_NOTNULL: zUniOp = "NOTNULL"; break; + + case TK_TRUTH: { + int x; + const char *azOp[] = { + "IS-FALSE", "IS-TRUE", "IS-NOT-FALSE", "IS-NOT-TRUE" + }; + assert( pExpr->op2==TK_IS || pExpr->op2==TK_ISNOT ); + assert( pExpr->pRight ); + assert( sqlite3ExprSkipCollate(pExpr->pRight)->op==TK_TRUEFALSE ); + x = (pExpr->op2==TK_ISNOT)*2 + sqlite3ExprTruthValue(pExpr->pRight); + zUniOp = azOp[x]; + break; + } + + case TK_SPAN: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView, "SPAN %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + + case TK_COLLATE: { + /* COLLATE operators without the EP_Collate flag are intended to + ** emulate collation associated with a table column. These show + ** up in the treeview output as "SOFT-COLLATE". Explicit COLLATE + ** operators that appear in the original SQL always have the + ** EP_Collate bit set and appear in treeview output as just "COLLATE" */ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView, "%sCOLLATE %Q%s", + !ExprHasProperty(pExpr, EP_Collate) ? "SOFT-" : "", + pExpr->u.zToken, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + + case TK_AGG_FUNCTION: + case TK_FUNCTION: { + ExprList *pFarg; /* List of function arguments */ + Window *pWin; + if( ExprHasProperty(pExpr, EP_TokenOnly) ){ + pFarg = 0; + pWin = 0; + }else{ + assert( ExprUseXList(pExpr) ); + pFarg = pExpr->x.pList; +#ifndef SQLITE_OMIT_WINDOWFUNC + pWin = ExprHasProperty(pExpr, EP_WinFunc) ? pExpr->y.pWin : 0; +#else + pWin = 0; +#endif + } + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + if( pExpr->op==TK_AGG_FUNCTION ){ + sqlite3TreeViewLine(pView, "AGG_FUNCTION%d %Q%s agg=%d[%d]/%p", + pExpr->op2, pExpr->u.zToken, zFlgs, + pExpr->pAggInfo ? pExpr->pAggInfo->selId : 0, + pExpr->iAgg, pExpr->pAggInfo); + }else if( pExpr->op2!=0 ){ + const char *zOp2; + char zBuf[8]; + sqlite3_snprintf(sizeof(zBuf),zBuf,"0x%02x",pExpr->op2); + zOp2 = zBuf; + if( pExpr->op2==NC_IsCheck ) zOp2 = "NC_IsCheck"; + if( pExpr->op2==NC_IdxExpr ) zOp2 = "NC_IdxExpr"; + if( pExpr->op2==NC_PartIdx ) zOp2 = "NC_PartIdx"; + if( pExpr->op2==NC_GenCol ) zOp2 = "NC_GenCol"; + sqlite3TreeViewLine(pView, "FUNCTION %Q%s op2=%s", + pExpr->u.zToken, zFlgs, zOp2); + }else{ + sqlite3TreeViewLine(pView, "FUNCTION %Q%s", pExpr->u.zToken, zFlgs); + } + if( pFarg ){ + sqlite3TreeViewExprList(pView, pFarg, pWin!=0, 0); + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pWin ){ + sqlite3TreeViewWindow(pView, pWin, 0); + } +#endif + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_EXISTS: { + assert( ExprUseXSelect(pExpr) ); + sqlite3TreeViewLine(pView, "EXISTS-expr flags=0x%x", pExpr->flags); + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + break; + } + case TK_SELECT: { + assert( ExprUseXSelect(pExpr) ); + sqlite3TreeViewLine(pView, "subquery-expr flags=0x%x", pExpr->flags); + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + break; + } + case TK_IN: { + sqlite3TreeViewLine(pView, "IN flags=0x%x", pExpr->flags); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + if( ExprUseXSelect(pExpr) ){ + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + }else{ + sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); + } + break; + } +#endif /* SQLITE_OMIT_SUBQUERY */ + + /* + ** x BETWEEN y AND z + ** + ** This is equivalent to + ** + ** x>=y AND x<=z + ** + ** X is stored in pExpr->pLeft. + ** Y is stored in pExpr->pList->a[0].pExpr. + ** Z is stored in pExpr->pList->a[1].pExpr. + */ + case TK_BETWEEN: { + const Expr *pX, *pY, *pZ; + pX = pExpr->pLeft; + assert( ExprUseXList(pExpr) ); + assert( pExpr->x.pList->nExpr==2 ); + pY = pExpr->x.pList->a[0].pExpr; + pZ = pExpr->x.pList->a[1].pExpr; + sqlite3TreeViewLine(pView, "BETWEEN"); + sqlite3TreeViewExpr(pView, pX, 1); + sqlite3TreeViewExpr(pView, pY, 1); + sqlite3TreeViewExpr(pView, pZ, 0); + break; + } + case TK_TRIGGER: { + /* If the opcode is TK_TRIGGER, then the expression is a reference + ** to a column in the new.* or old.* pseudo-tables available to + ** trigger programs. In this case Expr.iTable is set to 1 for the + ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn + ** is set to the column of the pseudo-table to read, or to -1 to + ** read the rowid field. + */ + sqlite3TreeViewLine(pView, "%s(%d)", + pExpr->iTable ? "NEW" : "OLD", pExpr->iColumn); + break; + } + case TK_CASE: { + sqlite3TreeViewLine(pView, "CASE"); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + assert( ExprUseXList(pExpr) ); + sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); + break; + } +#ifndef SQLITE_OMIT_TRIGGER + case TK_RAISE: { + const char *zType = "unk"; + switch( pExpr->affExpr ){ + case OE_Rollback: zType = "rollback"; break; + case OE_Abort: zType = "abort"; break; + case OE_Fail: zType = "fail"; break; + case OE_Ignore: zType = "ignore"; break; + } + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3TreeViewLine(pView, "RAISE %s(%Q)", zType, pExpr->u.zToken); + break; + } +#endif + case TK_MATCH: { + sqlite3TreeViewLine(pView, "MATCH {%d:%d}%s", + pExpr->iTable, pExpr->iColumn, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pRight, 0); + break; + } + case TK_VECTOR: { + char *z = sqlite3_mprintf("VECTOR%s",zFlgs); + assert( ExprUseXList(pExpr) ); + sqlite3TreeViewBareExprList(pView, pExpr->x.pList, z); + sqlite3_free(z); + break; + } + case TK_SELECT_COLUMN: { + sqlite3TreeViewLine(pView, "SELECT-COLUMN %d of [0..%d]%s", + pExpr->iColumn, pExpr->iTable-1, + pExpr->pRight==pExpr->pLeft ? " (SELECT-owner)" : ""); + assert( ExprUseXSelect(pExpr->pLeft) ); + sqlite3TreeViewSelect(pView, pExpr->pLeft->x.pSelect, 0); + break; + } + case TK_IF_NULL_ROW: { + sqlite3TreeViewLine(pView, "IF-NULL-ROW %d", pExpr->iTable); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + case TK_ERROR: { + Expr tmp; + sqlite3TreeViewLine(pView, "ERROR"); + tmp = *pExpr; + tmp.op = pExpr->op2; + sqlite3TreeViewExpr(pView, &tmp, 0); + break; + } + case TK_ROW: { + if( pExpr->iColumn<=0 ){ + sqlite3TreeViewLine(pView, "First FROM table rowid"); + }else{ + sqlite3TreeViewLine(pView, "First FROM table column %d", + pExpr->iColumn-1); + } + break; + } + default: { + sqlite3TreeViewLine(pView, "op=%d", pExpr->op); + break; + } + } + if( zBinOp ){ + sqlite3TreeViewLine(pView, "%s%s", zBinOp, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + sqlite3TreeViewExpr(pView, pExpr->pRight, 0); + }else if( zUniOp ){ + sqlite3TreeViewLine(pView, "%s%s", zUniOp, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + } + sqlite3TreeViewPop(pView); +} + + +/* +** Generate a human-readable explanation of an expression list. +*/ +SQLITE_PRIVATE void sqlite3TreeViewBareExprList( + TreeView *pView, + const ExprList *pList, + const char *zLabel +){ + if( zLabel==0 || zLabel[0]==0 ) zLabel = "LIST"; + if( pList==0 ){ + sqlite3TreeViewLine(pView, "%s (empty)", zLabel); + }else{ + int i; + sqlite3TreeViewLine(pView, "%s", zLabel); + for(i=0; inExpr; i++){ + int j = pList->a[i].u.x.iOrderByCol; + char *zName = pList->a[i].zEName; + int moreToFollow = inExpr - 1; + if( pList->a[i].eEName!=ENAME_NAME ) zName = 0; + if( j || zName ){ + sqlite3TreeViewPush(pView, moreToFollow); + moreToFollow = 0; + sqlite3TreeViewLine(pView, 0); + if( zName ){ + fprintf(stdout, "AS %s ", zName); + } + if( j ){ + fprintf(stdout, "iOrderByCol=%d", j); + } + fprintf(stdout, "\n"); + fflush(stdout); + } + sqlite3TreeViewExpr(pView, pList->a[i].pExpr, moreToFollow); + if( j || zName ){ + sqlite3TreeViewPop(pView); + } + } + } +} +SQLITE_PRIVATE void sqlite3TreeViewExprList( + TreeView *pView, + const ExprList *pList, + u8 moreToFollow, + const char *zLabel +){ + pView = sqlite3TreeViewPush(pView, moreToFollow); + sqlite3TreeViewBareExprList(pView, pList, zLabel); + sqlite3TreeViewPop(pView); +} + +#endif /* SQLITE_DEBUG */ + +/************** End of treeview.c ********************************************/ +/************** Begin file random.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code to implement a pseudo-random number +** generator (PRNG) for SQLite. +** +** Random numbers are used by some of the database backends in order +** to generate random integer keys for tables or random filenames. +*/ +/* #include "sqliteInt.h" */ + + +/* All threads share a single random number generator. +** This structure is the current state of the generator. +*/ +static SQLITE_WSD struct sqlite3PrngType { + unsigned char isInit; /* True if initialized */ + unsigned char i, j; /* State variables */ + unsigned char s[256]; /* State variables */ +} sqlite3Prng; + +/* +** Return N random bytes. +*/ +SQLITE_API void sqlite3_randomness(int N, void *pBuf){ + unsigned char t; + unsigned char *zBuf = pBuf; + + /* The "wsdPrng" macro will resolve to the pseudo-random number generator + ** state vector. If writable static data is unsupported on the target, + ** we have to locate the state vector at run-time. In the more common + ** case where writable static data is supported, wsdPrng can refer directly + ** to the "sqlite3Prng" state vector declared above. + */ +#ifdef SQLITE_OMIT_WSD + struct sqlite3PrngType *p = &GLOBAL(struct sqlite3PrngType, sqlite3Prng); +# define wsdPrng p[0] +#else +# define wsdPrng sqlite3Prng +#endif + +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex; +#endif + +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return; +#endif + +#if SQLITE_THREADSAFE + mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_PRNG); +#endif + + sqlite3_mutex_enter(mutex); + if( N<=0 || pBuf==0 ){ + wsdPrng.isInit = 0; + sqlite3_mutex_leave(mutex); + return; + } + + /* Initialize the state of the random number generator once, + ** the first time this routine is called. The seed value does + ** not need to contain a lot of randomness since we are not + ** trying to do secure encryption or anything like that... + ** + ** Nothing in this file or anywhere else in SQLite does any kind of + ** encryption. The RC4 algorithm is being used as a PRNG (pseudo-random + ** number generator) not as an encryption device. + */ + if( !wsdPrng.isInit ){ + sqlite3_vfs *pVfs = sqlite3_vfs_find(0); + int i; + char k[256]; + wsdPrng.j = 0; + wsdPrng.i = 0; + if( NEVER(pVfs==0) ){ + memset(k, 0, sizeof(k)); + }else{ + sqlite3OsRandomness(pVfs, 256, k); + } + for(i=0; i<256; i++){ + wsdPrng.s[i] = (u8)i; + } + for(i=0; i<256; i++){ + wsdPrng.j += wsdPrng.s[i] + k[i]; + t = wsdPrng.s[wsdPrng.j]; + wsdPrng.s[wsdPrng.j] = wsdPrng.s[i]; + wsdPrng.s[i] = t; + } + wsdPrng.isInit = 1; + } + + assert( N>0 ); + do{ + wsdPrng.i++; + t = wsdPrng.s[wsdPrng.i]; + wsdPrng.j += t; + wsdPrng.s[wsdPrng.i] = wsdPrng.s[wsdPrng.j]; + wsdPrng.s[wsdPrng.j] = t; + t += wsdPrng.s[wsdPrng.i]; + *(zBuf++) = wsdPrng.s[t]; + }while( --N ); + sqlite3_mutex_leave(mutex); +} + +#ifndef SQLITE_UNTESTABLE +/* +** For testing purposes, we sometimes want to preserve the state of +** PRNG and restore the PRNG to its saved state at a later time, or +** to reset the PRNG to its initial state. These routines accomplish +** those tasks. +** +** The sqlite3_test_control() interface calls these routines to +** control the PRNG. +*/ +static SQLITE_WSD struct sqlite3PrngType sqlite3SavedPrng; +SQLITE_PRIVATE void sqlite3PrngSaveState(void){ + memcpy( + &GLOBAL(struct sqlite3PrngType, sqlite3SavedPrng), + &GLOBAL(struct sqlite3PrngType, sqlite3Prng), + sizeof(sqlite3Prng) + ); +} +SQLITE_PRIVATE void sqlite3PrngRestoreState(void){ + memcpy( + &GLOBAL(struct sqlite3PrngType, sqlite3Prng), + &GLOBAL(struct sqlite3PrngType, sqlite3SavedPrng), + sizeof(sqlite3Prng) + ); +} +#endif /* SQLITE_UNTESTABLE */ + +/************** End of random.c **********************************************/ +/************** Begin file threads.c *****************************************/ +/* +** 2012 July 21 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file presents a simple cross-platform threading interface for +** use internally by SQLite. +** +** A "thread" can be created using sqlite3ThreadCreate(). This thread +** runs independently of its creator until it is joined using +** sqlite3ThreadJoin(), at which point it terminates. +** +** Threads do not have to be real. It could be that the work of the +** "thread" is done by the main thread at either the sqlite3ThreadCreate() +** or sqlite3ThreadJoin() call. This is, in fact, what happens in +** single threaded systems. Nothing in SQLite requires multiple threads. +** This interface exists so that applications that want to take advantage +** of multiple cores can do so, while also allowing applications to stay +** single-threaded if desired. +*/ +/* #include "sqliteInt.h" */ +#if SQLITE_OS_WIN +/* # include "os_win.h" */ +#endif + +#if SQLITE_MAX_WORKER_THREADS>0 + +/********************************* Unix Pthreads ****************************/ +#if SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) && SQLITE_THREADSAFE>0 + +#define SQLITE_THREADS_IMPLEMENTED 1 /* Prevent the single-thread code below */ +/* #include */ + +/* A running thread */ +struct SQLiteThread { + pthread_t tid; /* Thread ID */ + int done; /* Set to true when thread finishes */ + void *pOut; /* Result returned by the thread */ + void *(*xTask)(void*); /* The thread routine */ + void *pIn; /* Argument to the thread */ +}; + +/* Create a new thread */ +SQLITE_PRIVATE int sqlite3ThreadCreate( + SQLiteThread **ppThread, /* OUT: Write the thread object here */ + void *(*xTask)(void*), /* Routine to run in a separate thread */ + void *pIn /* Argument passed into xTask() */ +){ + SQLiteThread *p; + int rc; + + assert( ppThread!=0 ); + assert( xTask!=0 ); + /* This routine is never used in single-threaded mode */ + assert( sqlite3GlobalConfig.bCoreMutex!=0 ); + + *ppThread = 0; + p = sqlite3Malloc(sizeof(*p)); + if( p==0 ) return SQLITE_NOMEM_BKPT; + memset(p, 0, sizeof(*p)); + p->xTask = xTask; + p->pIn = pIn; + /* If the SQLITE_TESTCTRL_FAULT_INSTALL callback is registered to a + ** function that returns SQLITE_ERROR when passed the argument 200, that + ** forces worker threads to run sequentially and deterministically + ** for testing purposes. */ + if( sqlite3FaultSim(200) ){ + rc = 1; + }else{ + rc = pthread_create(&p->tid, 0, xTask, pIn); + } + if( rc ){ + p->done = 1; + p->pOut = xTask(pIn); + } + *ppThread = p; + return SQLITE_OK; +} + +/* Get the results of the thread */ +SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){ + int rc; + + assert( ppOut!=0 ); + if( NEVER(p==0) ) return SQLITE_NOMEM_BKPT; + if( p->done ){ + *ppOut = p->pOut; + rc = SQLITE_OK; + }else{ + rc = pthread_join(p->tid, ppOut) ? SQLITE_ERROR : SQLITE_OK; + } + sqlite3_free(p); + return rc; +} + +#endif /* SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) */ +/******************************** End Unix Pthreads *************************/ + + +/********************************* Win32 Threads ****************************/ +#if SQLITE_OS_WIN_THREADS + +#define SQLITE_THREADS_IMPLEMENTED 1 /* Prevent the single-thread code below */ +#include + +/* A running thread */ +struct SQLiteThread { + void *tid; /* The thread handle */ + unsigned id; /* The thread identifier */ + void *(*xTask)(void*); /* The routine to run as a thread */ + void *pIn; /* Argument to xTask */ + void *pResult; /* Result of xTask */ +}; + +/* Thread procedure Win32 compatibility shim */ +static unsigned __stdcall sqlite3ThreadProc( + void *pArg /* IN: Pointer to the SQLiteThread structure */ +){ + SQLiteThread *p = (SQLiteThread *)pArg; + + assert( p!=0 ); +#if 0 + /* + ** This assert appears to trigger spuriously on certain + ** versions of Windows, possibly due to _beginthreadex() + ** and/or CreateThread() not fully setting their thread + ** ID parameter before starting the thread. + */ + assert( p->id==GetCurrentThreadId() ); +#endif + assert( p->xTask!=0 ); + p->pResult = p->xTask(p->pIn); + + _endthreadex(0); + return 0; /* NOT REACHED */ +} + +/* Create a new thread */ +SQLITE_PRIVATE int sqlite3ThreadCreate( + SQLiteThread **ppThread, /* OUT: Write the thread object here */ + void *(*xTask)(void*), /* Routine to run in a separate thread */ + void *pIn /* Argument passed into xTask() */ +){ + SQLiteThread *p; + + assert( ppThread!=0 ); + assert( xTask!=0 ); + *ppThread = 0; + p = sqlite3Malloc(sizeof(*p)); + if( p==0 ) return SQLITE_NOMEM_BKPT; + /* If the SQLITE_TESTCTRL_FAULT_INSTALL callback is registered to a + ** function that returns SQLITE_ERROR when passed the argument 200, that + ** forces worker threads to run sequentially and deterministically + ** (via the sqlite3FaultSim() term of the conditional) for testing + ** purposes. */ + if( sqlite3GlobalConfig.bCoreMutex==0 || sqlite3FaultSim(200) ){ + memset(p, 0, sizeof(*p)); + }else{ + p->xTask = xTask; + p->pIn = pIn; + p->tid = (void*)_beginthreadex(0, 0, sqlite3ThreadProc, p, 0, &p->id); + if( p->tid==0 ){ + memset(p, 0, sizeof(*p)); + } + } + if( p->xTask==0 ){ + p->id = GetCurrentThreadId(); + p->pResult = xTask(pIn); + } + *ppThread = p; + return SQLITE_OK; +} + +SQLITE_PRIVATE DWORD sqlite3Win32Wait(HANDLE hObject); /* os_win.c */ + +/* Get the results of the thread */ +SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){ + DWORD rc; + BOOL bRc; + + assert( ppOut!=0 ); + if( NEVER(p==0) ) return SQLITE_NOMEM_BKPT; + if( p->xTask==0 ){ + /* assert( p->id==GetCurrentThreadId() ); */ + rc = WAIT_OBJECT_0; + assert( p->tid==0 ); + }else{ + assert( p->id!=0 && p->id!=GetCurrentThreadId() ); + rc = sqlite3Win32Wait((HANDLE)p->tid); + assert( rc!=WAIT_IO_COMPLETION ); + bRc = CloseHandle((HANDLE)p->tid); + assert( bRc ); + } + if( rc==WAIT_OBJECT_0 ) *ppOut = p->pResult; + sqlite3_free(p); + return (rc==WAIT_OBJECT_0) ? SQLITE_OK : SQLITE_ERROR; +} + +#endif /* SQLITE_OS_WIN_THREADS */ +/******************************** End Win32 Threads *************************/ + + +/********************************* Single-Threaded **************************/ +#ifndef SQLITE_THREADS_IMPLEMENTED +/* +** This implementation does not actually create a new thread. It does the +** work of the thread in the main thread, when either the thread is created +** or when it is joined +*/ + +/* A running thread */ +struct SQLiteThread { + void *(*xTask)(void*); /* The routine to run as a thread */ + void *pIn; /* Argument to xTask */ + void *pResult; /* Result of xTask */ +}; + +/* Create a new thread */ +SQLITE_PRIVATE int sqlite3ThreadCreate( + SQLiteThread **ppThread, /* OUT: Write the thread object here */ + void *(*xTask)(void*), /* Routine to run in a separate thread */ + void *pIn /* Argument passed into xTask() */ +){ + SQLiteThread *p; + + assert( ppThread!=0 ); + assert( xTask!=0 ); + *ppThread = 0; + p = sqlite3Malloc(sizeof(*p)); + if( p==0 ) return SQLITE_NOMEM_BKPT; + if( (SQLITE_PTR_TO_INT(p)/17)&1 ){ + p->xTask = xTask; + p->pIn = pIn; + }else{ + p->xTask = 0; + p->pResult = xTask(pIn); + } + *ppThread = p; + return SQLITE_OK; +} + +/* Get the results of the thread */ +SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){ + + assert( ppOut!=0 ); + if( NEVER(p==0) ) return SQLITE_NOMEM_BKPT; + if( p->xTask ){ + *ppOut = p->xTask(p->pIn); + }else{ + *ppOut = p->pResult; + } + sqlite3_free(p); + +#if defined(SQLITE_TEST) + { + void *pTstAlloc = sqlite3Malloc(10); + if (!pTstAlloc) return SQLITE_NOMEM_BKPT; + sqlite3_free(pTstAlloc); + } +#endif + + return SQLITE_OK; +} + +#endif /* !defined(SQLITE_THREADS_IMPLEMENTED) */ +/****************************** End Single-Threaded *************************/ +#endif /* SQLITE_MAX_WORKER_THREADS>0 */ + +/************** End of threads.c *********************************************/ +/************** Begin file utf.c *********************************************/ +/* +** 2004 April 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains routines used to translate between UTF-8, +** UTF-16, UTF-16BE, and UTF-16LE. +** +** Notes on UTF-8: +** +** Byte-0 Byte-1 Byte-2 Byte-3 Value +** 0xxxxxxx 00000000 00000000 0xxxxxxx +** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx +** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx +** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx +** +** +** Notes on UTF-16: (with wwww+1==uuuuu) +** +** Word-0 Word-1 Value +** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx +** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx +** +** +** BOM or Byte Order Mark: +** 0xff 0xfe little-endian utf-16 follows +** 0xfe 0xff big-endian utf-16 follows +** +*/ +/* #include "sqliteInt.h" */ +/* #include */ +/* #include "vdbeInt.h" */ + +#if !defined(SQLITE_AMALGAMATION) && SQLITE_BYTEORDER==0 +/* +** The following constant value is used by the SQLITE_BIGENDIAN and +** SQLITE_LITTLEENDIAN macros. +*/ +SQLITE_PRIVATE const int sqlite3one = 1; +#endif /* SQLITE_AMALGAMATION && SQLITE_BYTEORDER==0 */ + +/* +** This lookup table is used to help decode the first byte of +** a multi-byte UTF8 character. +*/ +static const unsigned char sqlite3Utf8Trans1[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +}; + + +#define WRITE_UTF8(zOut, c) { \ + if( c<0x00080 ){ \ + *zOut++ = (u8)(c&0xFF); \ + } \ + else if( c<0x00800 ){ \ + *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ + else if( c<0x10000 ){ \ + *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + }else{ \ + *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ + *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ +} + +#define WRITE_UTF16LE(zOut, c) { \ + if( c<=0xFFFF ){ \ + *zOut++ = (u8)(c&0x00FF); \ + *zOut++ = (u8)((c>>8)&0x00FF); \ + }else{ \ + *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ + *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \ + *zOut++ = (u8)(c&0x00FF); \ + *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \ + } \ +} + +#define WRITE_UTF16BE(zOut, c) { \ + if( c<=0xFFFF ){ \ + *zOut++ = (u8)((c>>8)&0x00FF); \ + *zOut++ = (u8)(c&0x00FF); \ + }else{ \ + *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \ + *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ + *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \ + *zOut++ = (u8)(c&0x00FF); \ + } \ +} + +/* +** Translate a single UTF-8 character. Return the unicode value. +** +** During translation, assume that the byte that zTerm points +** is a 0x00. +** +** Write a pointer to the next unread byte back into *pzNext. +** +** Notes On Invalid UTF-8: +** +** * This routine never allows a 7-bit character (0x00 through 0x7f) to +** be encoded as a multi-byte character. Any multi-byte character that +** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd. +** +** * This routine never allows a UTF16 surrogate value to be encoded. +** If a multi-byte character attempts to encode a value between +** 0xd800 and 0xe000 then it is rendered as 0xfffd. +** +** * Bytes in the range of 0x80 through 0xbf which occur as the first +** byte of a character are interpreted as single-byte characters +** and rendered as themselves even though they are technically +** invalid characters. +** +** * This routine accepts over-length UTF8 encodings +** for unicode values 0x80 and greater. It does not change over-length +** encodings to 0xfffd as some systems recommend. +*/ +#define READ_UTF8(zIn, zTerm, c) \ + c = *(zIn++); \ + if( c>=0xc0 ){ \ + c = sqlite3Utf8Trans1[c-0xc0]; \ + while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } +SQLITE_PRIVATE u32 sqlite3Utf8Read( + const unsigned char **pz /* Pointer to string from which to read char */ +){ + unsigned int c; + + /* Same as READ_UTF8() above but without the zTerm parameter. + ** For this routine, we assume the UTF8 string is always zero-terminated. + */ + c = *((*pz)++); + if( c>=0xc0 ){ + c = sqlite3Utf8Trans1[c-0xc0]; + while( (*(*pz) & 0xc0)==0x80 ){ + c = (c<<6) + (0x3f & *((*pz)++)); + } + if( c<0x80 + || (c&0xFFFFF800)==0xD800 + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } + } + return c; +} + + + + +/* +** If the TRANSLATE_TRACE macro is defined, the value of each Mem is +** printed on stderr on the way into and out of sqlite3VdbeMemTranslate(). +*/ +/* #define TRANSLATE_TRACE 1 */ + +#ifndef SQLITE_OMIT_UTF16 +/* +** This routine transforms the internal text encoding used by pMem to +** desiredEnc. It is an error if the string is already of the desired +** encoding, or if *pMem does not contain a string value. +*/ +SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ + sqlite3_int64 len; /* Maximum length of output string in bytes */ + unsigned char *zOut; /* Output buffer */ + unsigned char *zIn; /* Input iterator */ + unsigned char *zTerm; /* End of input */ + unsigned char *z; /* Output iterator */ + unsigned int c; + + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( pMem->flags&MEM_Str ); + assert( pMem->enc!=desiredEnc ); + assert( pMem->enc!=0 ); + assert( pMem->n>=0 ); + +#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) + { + StrAccum acc; + char zBuf[1000]; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + sqlite3VdbeMemPrettyPrint(pMem, &acc); + fprintf(stderr, "INPUT: %s\n", sqlite3StrAccumFinish(&acc)); + } +#endif + + /* If the translation is between UTF-16 little and big endian, then + ** all that is required is to swap the byte order. This case is handled + ** differently from the others. + */ + if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){ + u8 temp; + int rc; + rc = sqlite3VdbeMemMakeWriteable(pMem); + if( rc!=SQLITE_OK ){ + assert( rc==SQLITE_NOMEM ); + return SQLITE_NOMEM_BKPT; + } + zIn = (u8*)pMem->z; + zTerm = &zIn[pMem->n&~1]; + while( zInenc = desiredEnc; + goto translate_out; + } + + /* Set len to the maximum number of bytes required in the output buffer. */ + if( desiredEnc==SQLITE_UTF8 ){ + /* When converting from UTF-16, the maximum growth results from + ** translating a 2-byte character to a 4-byte UTF-8 character. + ** A single byte is required for the output string + ** nul-terminator. + */ + pMem->n &= ~1; + len = 2 * (sqlite3_int64)pMem->n + 1; + }else{ + /* When converting from UTF-8 to UTF-16 the maximum growth is caused + ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16 + ** character. Two bytes are required in the output buffer for the + ** nul-terminator. + */ + len = 2 * (sqlite3_int64)pMem->n + 2; + } + + /* Set zIn to point at the start of the input buffer and zTerm to point 1 + ** byte past the end. + ** + ** Variable zOut is set to point at the output buffer, space obtained + ** from sqlite3_malloc(). + */ + zIn = (u8*)pMem->z; + zTerm = &zIn[pMem->n]; + zOut = sqlite3DbMallocRaw(pMem->db, len); + if( !zOut ){ + return SQLITE_NOMEM_BKPT; + } + z = zOut; + + if( pMem->enc==SQLITE_UTF8 ){ + if( desiredEnc==SQLITE_UTF16LE ){ + /* UTF-8 -> UTF-16 Little-endian */ + while( zIn UTF-16 Big-endian */ + while( zInn = (int)(z - zOut); + *z++ = 0; + }else{ + assert( desiredEnc==SQLITE_UTF8 ); + if( pMem->enc==SQLITE_UTF16LE ){ + /* UTF-16 Little-endian -> UTF-8 */ + while( zIn=0xd800 && c<0xe000 ){ +#ifdef SQLITE_REPLACE_INVALID_UTF + if( c>=0xdc00 || zIn>=zTerm ){ + c = 0xfffd; + }else{ + int c2 = *(zIn++); + c2 += (*(zIn++))<<8; + if( c2<0xdc00 || c2>=0xe000 ){ + zIn -= 2; + c = 0xfffd; + }else{ + c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000; + } + } +#else + if( zIn UTF-8 */ + while( zIn=0xd800 && c<0xe000 ){ +#ifdef SQLITE_REPLACE_INVALID_UTF + if( c>=0xdc00 || zIn>=zTerm ){ + c = 0xfffd; + }else{ + int c2 = (*(zIn++))<<8; + c2 += *(zIn++); + if( c2<0xdc00 || c2>=0xe000 ){ + zIn -= 2; + c = 0xfffd; + }else{ + c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000; + } + } +#else + if( zInn = (int)(z - zOut); + } + *z = 0; + assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); + + c = MEM_Str|MEM_Term|(pMem->flags&(MEM_AffMask|MEM_Subtype)); + sqlite3VdbeMemRelease(pMem); + pMem->flags = c; + pMem->enc = desiredEnc; + pMem->z = (char*)zOut; + pMem->zMalloc = pMem->z; + pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->z); + +translate_out: +#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) + { + StrAccum acc; + char zBuf[1000]; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + sqlite3VdbeMemPrettyPrint(pMem, &acc); + fprintf(stderr, "OUTPUT: %s\n", sqlite3StrAccumFinish(&acc)); + } +#endif + return SQLITE_OK; +} +#endif /* SQLITE_OMIT_UTF16 */ + +#ifndef SQLITE_OMIT_UTF16 +/* +** This routine checks for a byte-order mark at the beginning of the +** UTF-16 string stored in *pMem. If one is present, it is removed and +** the encoding of the Mem adjusted. This routine does not do any +** byte-swapping, it just sets Mem.enc appropriately. +** +** The allocation (static, dynamic etc.) and encoding of the Mem may be +** changed by this function. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemHandleBom(Mem *pMem){ + int rc = SQLITE_OK; + u8 bom = 0; + + assert( pMem->n>=0 ); + if( pMem->n>1 ){ + u8 b1 = *(u8 *)pMem->z; + u8 b2 = *(((u8 *)pMem->z) + 1); + if( b1==0xFE && b2==0xFF ){ + bom = SQLITE_UTF16BE; + } + if( b1==0xFF && b2==0xFE ){ + bom = SQLITE_UTF16LE; + } + } + + if( bom ){ + rc = sqlite3VdbeMemMakeWriteable(pMem); + if( rc==SQLITE_OK ){ + pMem->n -= 2; + memmove(pMem->z, &pMem->z[2], pMem->n); + pMem->z[pMem->n] = '\0'; + pMem->z[pMem->n+1] = '\0'; + pMem->flags |= MEM_Term; + pMem->enc = bom; + } + } + return rc; +} +#endif /* SQLITE_OMIT_UTF16 */ + +/* +** pZ is a UTF-8 encoded unicode string. If nByte is less than zero, +** return the number of unicode characters in pZ up to (but not including) +** the first 0x00 byte. If nByte is not less than zero, return the +** number of unicode characters in the first nByte of pZ (or up to +** the first 0x00, whichever comes first). +*/ +SQLITE_PRIVATE int sqlite3Utf8CharLen(const char *zIn, int nByte){ + int r = 0; + const u8 *z = (const u8*)zIn; + const u8 *zTerm; + if( nByte>=0 ){ + zTerm = &z[nByte]; + }else{ + zTerm = (const u8*)(-1); + } + assert( z<=zTerm ); + while( *z!=0 && zmallocFailed ){ + sqlite3VdbeMemRelease(&m); + m.z = 0; + } + assert( (m.flags & MEM_Term)!=0 || db->mallocFailed ); + assert( (m.flags & MEM_Str)!=0 || db->mallocFailed ); + assert( m.z || db->mallocFailed ); + return m.z; +} + +/* +** zIn is a UTF-16 encoded unicode string at least nChar characters long. +** Return the number of bytes in the first nChar unicode characters +** in pZ. nChar must be non-negative. +*/ +SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *zIn, int nChar){ + int c; + unsigned char const *z = zIn; + int n = 0; + + if( SQLITE_UTF16NATIVE==SQLITE_UTF16LE ) z++; + while( n=0xd8 && c<0xdc && z[0]>=0xdc && z[0]<0xe0 ) z += 2; + n++; + } + return (int)(z-(unsigned char const *)zIn) + - (SQLITE_UTF16NATIVE==SQLITE_UTF16LE); +} + +#if defined(SQLITE_TEST) +/* +** This routine is called from the TCL test function "translate_selftest". +** It checks that the primitives for serializing and deserializing +** characters in each encoding are inverses of each other. +*/ +SQLITE_PRIVATE void sqlite3UtfSelfTest(void){ + unsigned int i, t; + unsigned char zBuf[20]; + unsigned char *z; + int n; + unsigned int c; + + for(i=0; i<0x00110000; i++){ + z = zBuf; + WRITE_UTF8(z, i); + n = (int)(z-zBuf); + assert( n>0 && n<=4 ); + z[0] = 0; + z = zBuf; + c = sqlite3Utf8Read((const u8**)&z); + t = i; + if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; + if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; + assert( c==t ); + assert( (z-zBuf)==n ); + } +} +#endif /* SQLITE_TEST */ +#endif /* SQLITE_OMIT_UTF16 */ + +/************** End of utf.c *************************************************/ +/************** Begin file util.c ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Utility functions used throughout sqlite. +** +** This file contains functions for allocating memory, comparing +** strings, and stuff like that. +** +*/ +/* #include "sqliteInt.h" */ +/* #include */ +#ifndef SQLITE_OMIT_FLOATING_POINT +#include +#endif + +/* +** Calls to sqlite3FaultSim() are used to simulate a failure during testing, +** or to bypass normal error detection during testing in order to let +** execute proceed futher downstream. +** +** In deployment, sqlite3FaultSim() *always* return SQLITE_OK (0). The +** sqlite3FaultSim() function only returns non-zero during testing. +** +** During testing, if the test harness has set a fault-sim callback using +** a call to sqlite3_test_control(SQLITE_TESTCTRL_FAULT_INSTALL), then +** each call to sqlite3FaultSim() is relayed to that application-supplied +** callback and the integer return value form the application-supplied +** callback is returned by sqlite3FaultSim(). +** +** The integer argument to sqlite3FaultSim() is a code to identify which +** sqlite3FaultSim() instance is being invoked. Each call to sqlite3FaultSim() +** should have a unique code. To prevent legacy testing applications from +** breaking, the codes should not be changed or reused. +*/ +#ifndef SQLITE_UNTESTABLE +SQLITE_PRIVATE int sqlite3FaultSim(int iTest){ + int (*xCallback)(int) = sqlite3GlobalConfig.xTestCallback; + return xCallback ? xCallback(iTest) : SQLITE_OK; +} +#endif + +#ifndef SQLITE_OMIT_FLOATING_POINT +/* +** Return true if the floating point value is Not a Number (NaN). +** +** Use the math library isnan() function if compiled with SQLITE_HAVE_ISNAN. +** Otherwise, we have our own implementation that works on most systems. +*/ +SQLITE_PRIVATE int sqlite3IsNaN(double x){ + int rc; /* The value return */ +#if !SQLITE_HAVE_ISNAN && !HAVE_ISNAN + u64 y; + memcpy(&y,&x,sizeof(y)); + rc = IsNaN(y); +#else + rc = isnan(x); +#endif /* HAVE_ISNAN */ + testcase( rc ); + return rc; +} +#endif /* SQLITE_OMIT_FLOATING_POINT */ + +/* +** Compute a string length that is limited to what can be stored in +** lower 30 bits of a 32-bit signed integer. +** +** The value returned will never be negative. Nor will it ever be greater +** than the actual length of the string. For very long strings (greater +** than 1GiB) the value returned might be less than the true string length. +*/ +SQLITE_PRIVATE int sqlite3Strlen30(const char *z){ + if( z==0 ) return 0; + return 0x3fffffff & (int)strlen(z); +} + +/* +** Return the declared type of a column. Or return zDflt if the column +** has no declared type. +** +** The column type is an extra string stored after the zero-terminator on +** the column name if and only if the COLFLAG_HASTYPE flag is set. +*/ +SQLITE_PRIVATE char *sqlite3ColumnType(Column *pCol, char *zDflt){ + if( pCol->colFlags & COLFLAG_HASTYPE ){ + return pCol->zCnName + strlen(pCol->zCnName) + 1; + }else if( pCol->eCType ){ + assert( pCol->eCType<=SQLITE_N_STDTYPE ); + return (char*)sqlite3StdType[pCol->eCType-1]; + }else{ + return zDflt; + } +} + +/* +** Helper function for sqlite3Error() - called rarely. Broken out into +** a separate routine to avoid unnecessary register saves on entry to +** sqlite3Error(). +*/ +static SQLITE_NOINLINE void sqlite3ErrorFinish(sqlite3 *db, int err_code){ + if( db->pErr ) sqlite3ValueSetNull(db->pErr); + sqlite3SystemError(db, err_code); +} + +/* +** Set the current error code to err_code and clear any prior error message. +** Also set iSysErrno (by calling sqlite3System) if the err_code indicates +** that would be appropriate. +*/ +SQLITE_PRIVATE void sqlite3Error(sqlite3 *db, int err_code){ + assert( db!=0 ); + db->errCode = err_code; + if( err_code || db->pErr ){ + sqlite3ErrorFinish(db, err_code); + }else{ + db->errByteOffset = -1; + } +} + +/* +** The equivalent of sqlite3Error(db, SQLITE_OK). Clear the error state +** and error message. +*/ +SQLITE_PRIVATE void sqlite3ErrorClear(sqlite3 *db){ + assert( db!=0 ); + db->errCode = SQLITE_OK; + db->errByteOffset = -1; + if( db->pErr ) sqlite3ValueSetNull(db->pErr); +} + +/* +** Load the sqlite3.iSysErrno field if that is an appropriate thing +** to do based on the SQLite error code in rc. +*/ +SQLITE_PRIVATE void sqlite3SystemError(sqlite3 *db, int rc){ + if( rc==SQLITE_IOERR_NOMEM ) return; + rc &= 0xff; + if( rc==SQLITE_CANTOPEN || rc==SQLITE_IOERR ){ + db->iSysErrno = sqlite3OsGetLastError(db->pVfs); + } +} + +/* +** Set the most recent error code and error string for the sqlite +** handle "db". The error code is set to "err_code". +** +** If it is not NULL, string zFormat specifies the format of the +** error string. zFormat and any string tokens that follow it are +** assumed to be encoded in UTF-8. +** +** To clear the most recent error for sqlite handle "db", sqlite3Error +** should be called with err_code set to SQLITE_OK and zFormat set +** to NULL. +*/ +SQLITE_PRIVATE void sqlite3ErrorWithMsg(sqlite3 *db, int err_code, const char *zFormat, ...){ + assert( db!=0 ); + db->errCode = err_code; + sqlite3SystemError(db, err_code); + if( zFormat==0 ){ + sqlite3Error(db, err_code); + }else if( db->pErr || (db->pErr = sqlite3ValueNew(db))!=0 ){ + char *z; + va_list ap; + va_start(ap, zFormat); + z = sqlite3VMPrintf(db, zFormat, ap); + va_end(ap); + sqlite3ValueSetStr(db->pErr, -1, z, SQLITE_UTF8, SQLITE_DYNAMIC); + } +} + +/* +** Add an error message to pParse->zErrMsg and increment pParse->nErr. +** +** This function should be used to report any error that occurs while +** compiling an SQL statement (i.e. within sqlite3_prepare()). The +** last thing the sqlite3_prepare() function does is copy the error +** stored by this function into the database handle using sqlite3Error(). +** Functions sqlite3Error() or sqlite3ErrorWithMsg() should be used +** during statement execution (sqlite3_step() etc.). +*/ +SQLITE_PRIVATE void sqlite3ErrorMsg(Parse *pParse, const char *zFormat, ...){ + char *zMsg; + va_list ap; + sqlite3 *db = pParse->db; + assert( db!=0 ); + assert( db->pParse==pParse ); + db->errByteOffset = -2; + va_start(ap, zFormat); + zMsg = sqlite3VMPrintf(db, zFormat, ap); + va_end(ap); + if( db->errByteOffset<-1 ) db->errByteOffset = -1; + if( db->suppressErr ){ + sqlite3DbFree(db, zMsg); + if( db->mallocFailed ){ + pParse->nErr++; + pParse->rc = SQLITE_NOMEM; + } + }else{ + pParse->nErr++; + sqlite3DbFree(db, pParse->zErrMsg); + pParse->zErrMsg = zMsg; + pParse->rc = SQLITE_ERROR; + pParse->pWith = 0; + } +} + +/* +** If database connection db is currently parsing SQL, then transfer +** error code errCode to that parser if the parser has not already +** encountered some other kind of error. +*/ +SQLITE_PRIVATE int sqlite3ErrorToParser(sqlite3 *db, int errCode){ + Parse *pParse; + if( db==0 || (pParse = db->pParse)==0 ) return errCode; + pParse->rc = errCode; + pParse->nErr++; + return errCode; +} + +/* +** Convert an SQL-style quoted string into a normal string by removing +** the quote characters. The conversion is done in-place. If the +** input does not begin with a quote character, then this routine +** is a no-op. +** +** The input string must be zero-terminated. A new zero-terminator +** is added to the dequoted string. +** +** The return value is -1 if no dequoting occurs or the length of the +** dequoted string, exclusive of the zero terminator, if dequoting does +** occur. +** +** 2002-02-14: This routine is extended to remove MS-Access style +** brackets from around identifiers. For example: "[a-b-c]" becomes +** "a-b-c". +*/ +SQLITE_PRIVATE void sqlite3Dequote(char *z){ + char quote; + int i, j; + if( z==0 ) return; + quote = z[0]; + if( !sqlite3Isquote(quote) ) return; + if( quote=='[' ) quote = ']'; + for(i=1, j=0;; i++){ + assert( z[i] ); + if( z[i]==quote ){ + if( z[i+1]==quote ){ + z[j++] = quote; + i++; + }else{ + break; + } + }else{ + z[j++] = z[i]; + } + } + z[j] = 0; +} +SQLITE_PRIVATE void sqlite3DequoteExpr(Expr *p){ + assert( !ExprHasProperty(p, EP_IntValue) ); + assert( sqlite3Isquote(p->u.zToken[0]) ); + p->flags |= p->u.zToken[0]=='"' ? EP_Quoted|EP_DblQuoted : EP_Quoted; + sqlite3Dequote(p->u.zToken); +} + +/* +** If the input token p is quoted, try to adjust the token to remove +** the quotes. This is not always possible: +** +** "abc" -> abc +** "ab""cd" -> (not possible because of the interior "") +** +** Remove the quotes if possible. This is a optimization. The overall +** system should still return the correct answer even if this routine +** is always a no-op. +*/ +SQLITE_PRIVATE void sqlite3DequoteToken(Token *p){ + unsigned int i; + if( p->n<2 ) return; + if( !sqlite3Isquote(p->z[0]) ) return; + for(i=1; in-1; i++){ + if( sqlite3Isquote(p->z[i]) ) return; + } + p->n -= 2; + p->z++; +} + +/* +** Generate a Token object from a string +*/ +SQLITE_PRIVATE void sqlite3TokenInit(Token *p, char *z){ + p->z = z; + p->n = sqlite3Strlen30(z); +} + +/* Convenient short-hand */ +#define UpperToLower sqlite3UpperToLower + +/* +** Some systems have stricmp(). Others have strcasecmp(). Because +** there is no consistency, we will define our own. +** +** IMPLEMENTATION-OF: R-30243-02494 The sqlite3_stricmp() and +** sqlite3_strnicmp() APIs allow applications and extensions to compare +** the contents of two buffers containing UTF-8 strings in a +** case-independent fashion, using the same definition of "case +** independence" that SQLite uses internally when comparing identifiers. +*/ +SQLITE_API int sqlite3_stricmp(const char *zLeft, const char *zRight){ + if( zLeft==0 ){ + return zRight ? -1 : 0; + }else if( zRight==0 ){ + return 1; + } + return sqlite3StrICmp(zLeft, zRight); +} +SQLITE_PRIVATE int sqlite3StrICmp(const char *zLeft, const char *zRight){ + unsigned char *a, *b; + int c, x; + a = (unsigned char *)zLeft; + b = (unsigned char *)zRight; + for(;;){ + c = *a; + x = *b; + if( c==x ){ + if( c==0 ) break; + }else{ + c = (int)UpperToLower[c] - (int)UpperToLower[x]; + if( c ) break; + } + a++; + b++; + } + return c; +} +SQLITE_API int sqlite3_strnicmp(const char *zLeft, const char *zRight, int N){ + register unsigned char *a, *b; + if( zLeft==0 ){ + return zRight ? -1 : 0; + }else if( zRight==0 ){ + return 1; + } + a = (unsigned char *)zLeft; + b = (unsigned char *)zRight; + while( N-- > 0 && *a!=0 && UpperToLower[*a]==UpperToLower[*b]){ a++; b++; } + return N<0 ? 0 : UpperToLower[*a] - UpperToLower[*b]; +} + +/* +** Compute an 8-bit hash on a string that is insensitive to case differences +*/ +SQLITE_PRIVATE u8 sqlite3StrIHash(const char *z){ + u8 h = 0; + if( z==0 ) return 0; + while( z[0] ){ + h += UpperToLower[(unsigned char)z[0]]; + z++; + } + return h; +} + +/* +** Compute 10 to the E-th power. Examples: E==1 results in 10. +** E==2 results in 100. E==50 results in 1.0e50. +** +** This routine only works for values of E between 1 and 341. +*/ +static LONGDOUBLE_TYPE sqlite3Pow10(int E){ +#if defined(_MSC_VER) + static const LONGDOUBLE_TYPE x[] = { + 1.0e+001L, + 1.0e+002L, + 1.0e+004L, + 1.0e+008L, + 1.0e+016L, + 1.0e+032L, + 1.0e+064L, + 1.0e+128L, + 1.0e+256L + }; + LONGDOUBLE_TYPE r = 1.0; + int i; + assert( E>=0 && E<=307 ); + for(i=0; E!=0; i++, E >>=1){ + if( E & 1 ) r *= x[i]; + } + return r; +#else + LONGDOUBLE_TYPE x = 10.0; + LONGDOUBLE_TYPE r = 1.0; + while(1){ + if( E & 1 ) r *= x; + E >>= 1; + if( E==0 ) break; + x *= x; + } + return r; +#endif +} + +/* +** The string z[] is an text representation of a real number. +** Convert this string to a double and write it into *pResult. +** +** The string z[] is length bytes in length (bytes, not characters) and +** uses the encoding enc. The string is not necessarily zero-terminated. +** +** Return TRUE if the result is a valid real number (or integer) and FALSE +** if the string is empty or contains extraneous text. More specifically +** return +** 1 => The input string is a pure integer +** 2 or more => The input has a decimal point or eNNN clause +** 0 or less => The input string is not a valid number +** -1 => Not a valid number, but has a valid prefix which +** includes a decimal point and/or an eNNN clause +** +** Valid numbers are in one of these formats: +** +** [+-]digits[E[+-]digits] +** [+-]digits.[digits][E[+-]digits] +** [+-].digits[E[+-]digits] +** +** Leading and trailing whitespace is ignored for the purpose of determining +** validity. +** +** If some prefix of the input string is a valid number, this routine +** returns FALSE but it still converts the prefix and writes the result +** into *pResult. +*/ +#if defined(_MSC_VER) +#pragma warning(disable : 4756) +#endif +SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 enc){ +#ifndef SQLITE_OMIT_FLOATING_POINT + int incr; + const char *zEnd; + /* sign * significand * (10 ^ (esign * exponent)) */ + int sign = 1; /* sign of significand */ + i64 s = 0; /* significand */ + int d = 0; /* adjust exponent for shifting decimal point */ + int esign = 1; /* sign of exponent */ + int e = 0; /* exponent */ + int eValid = 1; /* True exponent is either not used or is well-formed */ + double result; + int nDigit = 0; /* Number of digits processed */ + int eType = 1; /* 1: pure integer, 2+: fractional -1 or less: bad UTF16 */ + + assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); + *pResult = 0.0; /* Default return value, in case of an error */ + if( length==0 ) return 0; + + if( enc==SQLITE_UTF8 ){ + incr = 1; + zEnd = z + length; + }else{ + int i; + incr = 2; + length &= ~1; + assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); + testcase( enc==SQLITE_UTF16LE ); + testcase( enc==SQLITE_UTF16BE ); + for(i=3-enc; i=zEnd ) return 0; + + /* get sign of significand */ + if( *z=='-' ){ + sign = -1; + z+=incr; + }else if( *z=='+' ){ + z+=incr; + } + + /* copy max significant digits to significand */ + while( z=((LARGEST_INT64-9)/10) ){ + /* skip non-significant significand digits + ** (increase exponent by d to shift decimal left) */ + while( z=zEnd ) goto do_atof_calc; + + /* if decimal point is present */ + if( *z=='.' ){ + z+=incr; + eType++; + /* copy digits from after decimal to significand + ** (decrease exponent by d to shift decimal right) */ + while( z=zEnd ) goto do_atof_calc; + + /* if exponent is present */ + if( *z=='e' || *z=='E' ){ + z+=incr; + eValid = 0; + eType++; + + /* This branch is needed to avoid a (harmless) buffer overread. The + ** special comment alerts the mutation tester that the correct answer + ** is obtained even if the branch is omitted */ + if( z>=zEnd ) goto do_atof_calc; /*PREVENTS-HARMLESS-OVERREAD*/ + + /* get sign of exponent */ + if( *z=='-' ){ + esign = -1; + z+=incr; + }else if( *z=='+' ){ + z+=incr; + } + /* copy digits to exponent */ + while( z0 ){ /*OPTIMIZATION-IF-TRUE*/ + if( esign>0 ){ + if( s>=(LARGEST_INT64/10) ) break; /*OPTIMIZATION-IF-FALSE*/ + s *= 10; + }else{ + if( s%10!=0 ) break; /*OPTIMIZATION-IF-FALSE*/ + s /= 10; + } + e--; + } + + /* adjust the sign of significand */ + s = sign<0 ? -s : s; + + if( e==0 ){ /*OPTIMIZATION-IF-TRUE*/ + result = (double)s; + }else{ + /* attempt to handle extremely small/large numbers better */ + if( e>307 ){ /*OPTIMIZATION-IF-TRUE*/ + if( e<342 ){ /*OPTIMIZATION-IF-TRUE*/ + LONGDOUBLE_TYPE scale = sqlite3Pow10(e-308); + if( esign<0 ){ + result = s / scale; + result /= 1.0e+308; + }else{ + result = s * scale; + result *= 1.0e+308; + } + }else{ assert( e>=342 ); + if( esign<0 ){ + result = 0.0*s; + }else{ +#ifdef INFINITY + result = INFINITY*s; +#else + result = 1e308*1e308*s; /* Infinity */ +#endif + } + } + }else{ + LONGDOUBLE_TYPE scale = sqlite3Pow10(e); + if( esign<0 ){ + result = s / scale; + }else{ + result = s * scale; + } + } + } + } + + /* store the result */ + *pResult = result; + + /* return true if number and no extra non-whitespace chracters after */ + if( z==zEnd && nDigit>0 && eValid && eType>0 ){ + return eType; + }else if( eType>=2 && (eType==3 || eValid) && nDigit>0 ){ + return -1; + }else{ + return 0; + } +#else + return !sqlite3Atoi64(z, pResult, length, enc); +#endif /* SQLITE_OMIT_FLOATING_POINT */ +} +#if defined(_MSC_VER) +#pragma warning(default : 4756) +#endif + +/* +** Render an signed 64-bit integer as text. Store the result in zOut[]. +** +** The caller must ensure that zOut[] is at least 21 bytes in size. +*/ +SQLITE_PRIVATE void sqlite3Int64ToText(i64 v, char *zOut){ + int i; + u64 x; + char zTemp[22]; + if( v<0 ){ + x = (v==SMALLEST_INT64) ? ((u64)1)<<63 : (u64)-v; + }else{ + x = v; + } + i = sizeof(zTemp)-2; + zTemp[sizeof(zTemp)-1] = 0; + do{ + zTemp[i--] = (x%10) + '0'; + x = x/10; + }while( x ); + if( v<0 ) zTemp[i--] = '-'; + memcpy(zOut, &zTemp[i+1], sizeof(zTemp)-1-i); +} + +/* +** Compare the 19-character string zNum against the text representation +** value 2^63: 9223372036854775808. Return negative, zero, or positive +** if zNum is less than, equal to, or greater than the string. +** Note that zNum must contain exactly 19 characters. +** +** Unlike memcmp() this routine is guaranteed to return the difference +** in the values of the last digit if the only difference is in the +** last digit. So, for example, +** +** compare2pow63("9223372036854775800", 1) +** +** will return -8. +*/ +static int compare2pow63(const char *zNum, int incr){ + int c = 0; + int i; + /* 012345678901234567 */ + const char *pow63 = "922337203685477580"; + for(i=0; c==0 && i<18; i++){ + c = (zNum[i*incr]-pow63[i])*10; + } + if( c==0 ){ + c = zNum[18*incr] - '8'; + testcase( c==(-1) ); + testcase( c==0 ); + testcase( c==(+1) ); + } + return c; +} + +/* +** Convert zNum to a 64-bit signed integer. zNum must be decimal. This +** routine does *not* accept hexadecimal notation. +** +** Returns: +** +** -1 Not even a prefix of the input text looks like an integer +** 0 Successful transformation. Fits in a 64-bit signed integer. +** 1 Excess non-space text after the integer value +** 2 Integer too large for a 64-bit signed integer or is malformed +** 3 Special case of 9223372036854775808 +** +** length is the number of bytes in the string (bytes, not characters). +** The string is not necessarily zero-terminated. The encoding is +** given by enc. +*/ +SQLITE_PRIVATE int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc){ + int incr; + u64 u = 0; + int neg = 0; /* assume positive */ + int i; + int c = 0; + int nonNum = 0; /* True if input contains UTF16 with high byte non-zero */ + int rc; /* Baseline return code */ + const char *zStart; + const char *zEnd = zNum + length; + assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); + if( enc==SQLITE_UTF8 ){ + incr = 1; + }else{ + incr = 2; + length &= ~1; + assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); + for(i=3-enc; i='0' && c<='9'; i+=incr){ + u = u*10 + c - '0'; + } + testcase( i==18*incr ); + testcase( i==19*incr ); + testcase( i==20*incr ); + if( u>LARGEST_INT64 ){ + /* This test and assignment is needed only to suppress UB warnings + ** from clang and -fsanitize=undefined. This test and assignment make + ** the code a little larger and slower, and no harm comes from omitting + ** them, but we must appaise the undefined-behavior pharisees. */ + *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64; + }else if( neg ){ + *pNum = -(i64)u; + }else{ + *pNum = (i64)u; + } + rc = 0; + if( i==0 && zStart==zNum ){ /* No digits */ + rc = -1; + }else if( nonNum ){ /* UTF16 with high-order bytes non-zero */ + rc = 1; + }else if( &zNum[i]19*incr ? 1 : compare2pow63(zNum, incr); + if( c<0 ){ + /* zNum is less than 9223372036854775808 so it fits */ + assert( u<=LARGEST_INT64 ); + return rc; + }else{ + *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64; + if( c>0 ){ + /* zNum is greater than 9223372036854775808 so it overflows */ + return 2; + }else{ + /* zNum is exactly 9223372036854775808. Fits if negative. The + ** special case 2 overflow if positive */ + assert( u-1==LARGEST_INT64 ); + return neg ? rc : 3; + } + } + } +} + +/* +** Transform a UTF-8 integer literal, in either decimal or hexadecimal, +** into a 64-bit signed integer. This routine accepts hexadecimal literals, +** whereas sqlite3Atoi64() does not. +** +** Returns: +** +** 0 Successful transformation. Fits in a 64-bit signed integer. +** 1 Excess text after the integer value +** 2 Integer too large for a 64-bit signed integer or is malformed +** 3 Special case of 9223372036854775808 +*/ +SQLITE_PRIVATE int sqlite3DecOrHexToI64(const char *z, i64 *pOut){ +#ifndef SQLITE_OMIT_HEX_INTEGER + if( z[0]=='0' + && (z[1]=='x' || z[1]=='X') + ){ + u64 u = 0; + int i, k; + for(i=2; z[i]=='0'; i++){} + for(k=i; sqlite3Isxdigit(z[k]); k++){ + u = u*16 + sqlite3HexToInt(z[k]); + } + memcpy(pOut, &u, 8); + return (z[k]==0 && k-i<=16) ? 0 : 2; + }else +#endif /* SQLITE_OMIT_HEX_INTEGER */ + { + return sqlite3Atoi64(z, pOut, sqlite3Strlen30(z), SQLITE_UTF8); + } +} + +/* +** If zNum represents an integer that will fit in 32-bits, then set +** *pValue to that integer and return true. Otherwise return false. +** +** This routine accepts both decimal and hexadecimal notation for integers. +** +** Any non-numeric characters that following zNum are ignored. +** This is different from sqlite3Atoi64() which requires the +** input number to be zero-terminated. +*/ +SQLITE_PRIVATE int sqlite3GetInt32(const char *zNum, int *pValue){ + sqlite_int64 v = 0; + int i, c; + int neg = 0; + if( zNum[0]=='-' ){ + neg = 1; + zNum++; + }else if( zNum[0]=='+' ){ + zNum++; + } +#ifndef SQLITE_OMIT_HEX_INTEGER + else if( zNum[0]=='0' + && (zNum[1]=='x' || zNum[1]=='X') + && sqlite3Isxdigit(zNum[2]) + ){ + u32 u = 0; + zNum += 2; + while( zNum[0]=='0' ) zNum++; + for(i=0; sqlite3Isxdigit(zNum[i]) && i<8; i++){ + u = u*16 + sqlite3HexToInt(zNum[i]); + } + if( (u&0x80000000)==0 && sqlite3Isxdigit(zNum[i])==0 ){ + memcpy(pValue, &u, 4); + return 1; + }else{ + return 0; + } + } +#endif + if( !sqlite3Isdigit(zNum[0]) ) return 0; + while( zNum[0]=='0' ) zNum++; + for(i=0; i<11 && (c = zNum[i] - '0')>=0 && c<=9; i++){ + v = v*10 + c; + } + + /* The longest decimal representation of a 32 bit integer is 10 digits: + ** + ** 1234567890 + ** 2^31 -> 2147483648 + */ + testcase( i==10 ); + if( i>10 ){ + return 0; + } + testcase( v-neg==2147483647 ); + if( v-neg>2147483647 ){ + return 0; + } + if( neg ){ + v = -v; + } + *pValue = (int)v; + return 1; +} + +/* +** Return a 32-bit integer value extracted from a string. If the +** string is not an integer, just return 0. +*/ +SQLITE_PRIVATE int sqlite3Atoi(const char *z){ + int x = 0; + sqlite3GetInt32(z, &x); + return x; +} + +/* +** Try to convert z into an unsigned 32-bit integer. Return true on +** success and false if there is an error. +** +** Only decimal notation is accepted. +*/ +SQLITE_PRIVATE int sqlite3GetUInt32(const char *z, u32 *pI){ + u64 v = 0; + int i; + for(i=0; sqlite3Isdigit(z[i]); i++){ + v = v*10 + z[i] - '0'; + if( v>4294967296LL ){ *pI = 0; return 0; } + } + if( i==0 || z[i]!=0 ){ *pI = 0; return 0; } + *pI = (u32)v; + return 1; +} + +/* +** The variable-length integer encoding is as follows: +** +** KEY: +** A = 0xxxxxxx 7 bits of data and one flag bit +** B = 1xxxxxxx 7 bits of data and one flag bit +** C = xxxxxxxx 8 bits of data +** +** 7 bits - A +** 14 bits - BA +** 21 bits - BBA +** 28 bits - BBBA +** 35 bits - BBBBA +** 42 bits - BBBBBA +** 49 bits - BBBBBBA +** 56 bits - BBBBBBBA +** 64 bits - BBBBBBBBC +*/ + +/* +** Write a 64-bit variable-length integer to memory starting at p[0]. +** The length of data write will be between 1 and 9 bytes. The number +** of bytes written is returned. +** +** A variable-length integer consists of the lower 7 bits of each byte +** for all bytes that have the 8th bit set and one byte with the 8th +** bit clear. Except, if we get to the 9th byte, it stores the full +** 8 bits and is the last byte. +*/ +static int SQLITE_NOINLINE putVarint64(unsigned char *p, u64 v){ + int i, j, n; + u8 buf[10]; + if( v & (((u64)0xff000000)<<32) ){ + p[8] = (u8)v; + v >>= 8; + for(i=7; i>=0; i--){ + p[i] = (u8)((v & 0x7f) | 0x80); + v >>= 7; + } + return 9; + } + n = 0; + do{ + buf[n++] = (u8)((v & 0x7f) | 0x80); + v >>= 7; + }while( v!=0 ); + buf[0] &= 0x7f; + assert( n<=9 ); + for(i=0, j=n-1; j>=0; j--, i++){ + p[i] = buf[j]; + } + return n; +} +SQLITE_PRIVATE int sqlite3PutVarint(unsigned char *p, u64 v){ + if( v<=0x7f ){ + p[0] = v&0x7f; + return 1; + } + if( v<=0x3fff ){ + p[0] = ((v>>7)&0x7f)|0x80; + p[1] = v&0x7f; + return 2; + } + return putVarint64(p,v); +} + +/* +** Bitmasks used by sqlite3GetVarint(). These precomputed constants +** are defined here rather than simply putting the constant expressions +** inline in order to work around bugs in the RVT compiler. +** +** SLOT_2_0 A mask for (0x7f<<14) | 0x7f +** +** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 +*/ +#define SLOT_2_0 0x001fc07f +#define SLOT_4_2_0 0xf01fc07f + + +/* +** Read a 64-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read. The value is stored in *v. +*/ +SQLITE_PRIVATE u8 sqlite3GetVarint(const unsigned char *p, u64 *v){ + u32 a,b,s; + + if( ((signed char*)p)[0]>=0 ){ + *v = *p; + return 1; + } + if( ((signed char*)p)[1]>=0 ){ + *v = ((u32)(p[0]&0x7f)<<7) | p[1]; + return 2; + } + + /* Verify that constants are precomputed correctly */ + assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); + assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); + + a = ((u32)p[0])<<14; + b = p[1]; + p += 2; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + a &= SLOT_2_0; + b &= 0x7f; + b = b<<7; + a |= b; + *v = a; + return 3; + } + + /* CSE1 from below */ + a &= SLOT_2_0; + p++; + b = b<<14; + b |= *p; + /* b: p1<<14 | p3 (unmasked) */ + if (!(b&0x80)) + { + b &= SLOT_2_0; + /* moved CSE1 up */ + /* a &= (0x7f<<14)|(0x7f); */ + a = a<<7; + a |= b; + *v = a; + return 4; + } + + /* a: p0<<14 | p2 (masked) */ + /* b: p1<<14 | p3 (unmasked) */ + /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + /* moved CSE1 up */ + /* a &= (0x7f<<14)|(0x7f); */ + b &= SLOT_2_0; + s = a; + /* s: p0<<14 | p2 (masked) */ + + p++; + a = a<<14; + a |= *p; + /* a: p0<<28 | p2<<14 | p4 (unmasked) */ + if (!(a&0x80)) + { + /* we can skip these cause they were (effectively) done above + ** while calculating s */ + /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ + /* b &= (0x7f<<14)|(0x7f); */ + b = b<<7; + a |= b; + s = s>>18; + *v = ((u64)s)<<32 | a; + return 5; + } + + /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + s = s<<7; + s |= b; + /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + + p++; + b = b<<14; + b |= *p; + /* b: p1<<28 | p3<<14 | p5 (unmasked) */ + if (!(b&0x80)) + { + /* we can skip this cause it was (effectively) done above in calc'ing s */ + /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ + a &= SLOT_2_0; + a = a<<7; + a |= b; + s = s>>18; + *v = ((u64)s)<<32 | a; + return 6; + } + + p++; + a = a<<14; + a |= *p; + /* a: p2<<28 | p4<<14 | p6 (unmasked) */ + if (!(a&0x80)) + { + a &= SLOT_4_2_0; + b &= SLOT_2_0; + b = b<<7; + a |= b; + s = s>>11; + *v = ((u64)s)<<32 | a; + return 7; + } + + /* CSE2 from below */ + a &= SLOT_2_0; + p++; + b = b<<14; + b |= *p; + /* b: p3<<28 | p5<<14 | p7 (unmasked) */ + if (!(b&0x80)) + { + b &= SLOT_4_2_0; + /* moved CSE2 up */ + /* a &= (0x7f<<14)|(0x7f); */ + a = a<<7; + a |= b; + s = s>>4; + *v = ((u64)s)<<32 | a; + return 8; + } + + p++; + a = a<<15; + a |= *p; + /* a: p4<<29 | p6<<15 | p8 (unmasked) */ + + /* moved CSE2 up */ + /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ + b &= SLOT_2_0; + b = b<<8; + a |= b; + + s = s<<4; + b = p[-4]; + b &= 0x7f; + b = b>>3; + s |= b; + + *v = ((u64)s)<<32 | a; + + return 9; +} + +/* +** Read a 32-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read. The value is stored in *v. +** +** If the varint stored in p[0] is larger than can fit in a 32-bit unsigned +** integer, then set *v to 0xffffffff. +** +** A MACRO version, getVarint32, is provided which inlines the +** single-byte case. All code should use the MACRO version as +** this function assumes the single-byte case has already been handled. +*/ +SQLITE_PRIVATE u8 sqlite3GetVarint32(const unsigned char *p, u32 *v){ + u32 a,b; + + /* The 1-byte case. Overwhelmingly the most common. Handled inline + ** by the getVarin32() macro */ + a = *p; + /* a: p0 (unmasked) */ +#ifndef getVarint32 + if (!(a&0x80)) + { + /* Values between 0 and 127 */ + *v = a; + return 1; + } +#endif + + /* The 2-byte case */ + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 128 and 16383 */ + a &= 0x7f; + a = a<<7; + *v = a | b; + return 2; + } + + /* The 3-byte case */ + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 16384 and 2097151 */ + a &= (0x7f<<14)|(0x7f); + b &= 0x7f; + b = b<<7; + *v = a | b; + return 3; + } + + /* A 32-bit varint is used to store size information in btrees. + ** Objects are rarely larger than 2MiB limit of a 3-byte varint. + ** A 3-byte varint is sufficient, for example, to record the size + ** of a 1048569-byte BLOB or string. + ** + ** We only unroll the first 1-, 2-, and 3- byte cases. The very + ** rare larger cases can be handled by the slower 64-bit varint + ** routine. + */ +#if 1 + { + u64 v64; + u8 n; + + n = sqlite3GetVarint(p-2, &v64); + assert( n>3 && n<=9 ); + if( (v64 & SQLITE_MAX_U32)!=v64 ){ + *v = 0xffffffff; + }else{ + *v = (u32)v64; + } + return n; + } + +#else + /* For following code (kept for historical record only) shows an + ** unrolling for the 3- and 4-byte varint cases. This code is + ** slightly faster, but it is also larger and much harder to test. + */ + p++; + b = b<<14; + b |= *p; + /* b: p1<<14 | p3 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 2097152 and 268435455 */ + b &= (0x7f<<14)|(0x7f); + a &= (0x7f<<14)|(0x7f); + a = a<<7; + *v = a | b; + return 4; + } + + p++; + a = a<<14; + a |= *p; + /* a: p0<<28 | p2<<14 | p4 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 268435456 and 34359738367 */ + a &= SLOT_4_2_0; + b &= SLOT_4_2_0; + b = b<<7; + *v = a | b; + return 5; + } + + /* We can only reach this point when reading a corrupt database + ** file. In that case we are not in any hurry. Use the (relatively + ** slow) general-purpose sqlite3GetVarint() routine to extract the + ** value. */ + { + u64 v64; + u8 n; + + p -= 4; + n = sqlite3GetVarint(p, &v64); + assert( n>5 && n<=9 ); + *v = (u32)v64; + return n; + } +#endif +} + +/* +** Return the number of bytes that will be needed to store the given +** 64-bit integer. +*/ +SQLITE_PRIVATE int sqlite3VarintLen(u64 v){ + int i; + for(i=1; (v >>= 7)!=0; i++){ assert( i<10 ); } + return i; +} + + +/* +** Read or write a four-byte big-endian integer value. +*/ +SQLITE_PRIVATE u32 sqlite3Get4byte(const u8 *p){ +#if SQLITE_BYTEORDER==4321 + u32 x; + memcpy(&x,p,4); + return x; +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 + u32 x; + memcpy(&x,p,4); + return __builtin_bswap32(x); +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + u32 x; + memcpy(&x,p,4); + return _byteswap_ulong(x); +#else + testcase( p[0]&0x80 ); + return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; +#endif +} +SQLITE_PRIVATE void sqlite3Put4byte(unsigned char *p, u32 v){ +#if SQLITE_BYTEORDER==4321 + memcpy(p,&v,4); +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 + u32 x = __builtin_bswap32(v); + memcpy(p,&x,4); +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + u32 x = _byteswap_ulong(v); + memcpy(p,&x,4); +#else + p[0] = (u8)(v>>24); + p[1] = (u8)(v>>16); + p[2] = (u8)(v>>8); + p[3] = (u8)v; +#endif +} + + + +/* +** Translate a single byte of Hex into an integer. +** This routine only works if h really is a valid hexadecimal +** character: 0..9a..fA..F +*/ +SQLITE_PRIVATE u8 sqlite3HexToInt(int h){ + assert( (h>='0' && h<='9') || (h>='a' && h<='f') || (h>='A' && h<='F') ); +#ifdef SQLITE_ASCII + h += 9*(1&(h>>6)); +#endif +#ifdef SQLITE_EBCDIC + h += 9*(1&~(h>>4)); +#endif + return (u8)(h & 0xf); +} + +#if !defined(SQLITE_OMIT_BLOB_LITERAL) +/* +** Convert a BLOB literal of the form "x'hhhhhh'" into its binary +** value. Return a pointer to its binary value. Space to hold the +** binary value has been obtained from malloc and must be freed by +** the calling routine. +*/ +SQLITE_PRIVATE void *sqlite3HexToBlob(sqlite3 *db, const char *z, int n){ + char *zBlob; + int i; + + zBlob = (char *)sqlite3DbMallocRawNN(db, n/2 + 1); + n--; + if( zBlob ){ + for(i=0; ieOpenState; + if( eOpenState!=SQLITE_STATE_OPEN ){ + if( sqlite3SafetyCheckSickOrOk(db) ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + logBadConnection("unopened"); + } + return 0; + }else{ + return 1; + } +} +SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(sqlite3 *db){ + u8 eOpenState; + eOpenState = db->eOpenState; + if( eOpenState!=SQLITE_STATE_SICK && + eOpenState!=SQLITE_STATE_OPEN && + eOpenState!=SQLITE_STATE_BUSY ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + logBadConnection("invalid"); + return 0; + }else{ + return 1; + } +} + +/* +** Attempt to add, substract, or multiply the 64-bit signed value iB against +** the other 64-bit signed integer at *pA and store the result in *pA. +** Return 0 on success. Or if the operation would have resulted in an +** overflow, leave *pA unchanged and return 1. +*/ +SQLITE_PRIVATE int sqlite3AddInt64(i64 *pA, i64 iB){ +#if GCC_VERSION>=5004000 && !defined(__INTEL_COMPILER) + return __builtin_add_overflow(*pA, iB, pA); +#else + i64 iA = *pA; + testcase( iA==0 ); testcase( iA==1 ); + testcase( iB==-1 ); testcase( iB==0 ); + if( iB>=0 ){ + testcase( iA>0 && LARGEST_INT64 - iA == iB ); + testcase( iA>0 && LARGEST_INT64 - iA == iB - 1 ); + if( iA>0 && LARGEST_INT64 - iA < iB ) return 1; + }else{ + testcase( iA<0 && -(iA + LARGEST_INT64) == iB + 1 ); + testcase( iA<0 && -(iA + LARGEST_INT64) == iB + 2 ); + if( iA<0 && -(iA + LARGEST_INT64) > iB + 1 ) return 1; + } + *pA += iB; + return 0; +#endif +} +SQLITE_PRIVATE int sqlite3SubInt64(i64 *pA, i64 iB){ +#if GCC_VERSION>=5004000 && !defined(__INTEL_COMPILER) + return __builtin_sub_overflow(*pA, iB, pA); +#else + testcase( iB==SMALLEST_INT64+1 ); + if( iB==SMALLEST_INT64 ){ + testcase( (*pA)==(-1) ); testcase( (*pA)==0 ); + if( (*pA)>=0 ) return 1; + *pA -= iB; + return 0; + }else{ + return sqlite3AddInt64(pA, -iB); + } +#endif +} +SQLITE_PRIVATE int sqlite3MulInt64(i64 *pA, i64 iB){ +#if GCC_VERSION>=5004000 && !defined(__INTEL_COMPILER) + return __builtin_mul_overflow(*pA, iB, pA); +#else + i64 iA = *pA; + if( iB>0 ){ + if( iA>LARGEST_INT64/iB ) return 1; + if( iA0 ){ + if( iBLARGEST_INT64/-iB ) return 1; + } + } + *pA = iA*iB; + return 0; +#endif +} + +/* +** Compute the absolute value of a 32-bit signed integer, of possible. Or +** if the integer has a value of -2147483648, return +2147483647 +*/ +SQLITE_PRIVATE int sqlite3AbsInt32(int x){ + if( x>=0 ) return x; + if( x==(int)0x80000000 ) return 0x7fffffff; + return -x; +} + +#ifdef SQLITE_ENABLE_8_3_NAMES +/* +** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database +** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and +** if filename in z[] has a suffix (a.k.a. "extension") that is longer than +** three characters, then shorten the suffix on z[] to be the last three +** characters of the original suffix. +** +** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always +** do the suffix shortening regardless of URI parameter. +** +** Examples: +** +** test.db-journal => test.nal +** test.db-wal => test.wal +** test.db-shm => test.shm +** test.db-mj7f3319fa => test.9fa +*/ +SQLITE_PRIVATE void sqlite3FileSuffix3(const char *zBaseFilename, char *z){ +#if SQLITE_ENABLE_8_3_NAMES<2 + if( sqlite3_uri_boolean(zBaseFilename, "8_3_names", 0) ) +#endif + { + int i, sz; + sz = sqlite3Strlen30(z); + for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} + if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4); + } +} +#endif + +/* +** Find (an approximate) sum of two LogEst values. This computation is +** not a simple "+" operator because LogEst is stored as a logarithmic +** value. +** +*/ +SQLITE_PRIVATE LogEst sqlite3LogEstAdd(LogEst a, LogEst b){ + static const unsigned char x[] = { + 10, 10, /* 0,1 */ + 9, 9, /* 2,3 */ + 8, 8, /* 4,5 */ + 7, 7, 7, /* 6,7,8 */ + 6, 6, 6, /* 9,10,11 */ + 5, 5, 5, /* 12-14 */ + 4, 4, 4, 4, /* 15-18 */ + 3, 3, 3, 3, 3, 3, /* 19-24 */ + 2, 2, 2, 2, 2, 2, 2, /* 25-31 */ + }; + if( a>=b ){ + if( a>b+49 ) return a; + if( a>b+31 ) return a+1; + return a+x[a-b]; + }else{ + if( b>a+49 ) return b; + if( b>a+31 ) return b+1; + return b+x[b-a]; + } +} + +/* +** Convert an integer into a LogEst. In other words, compute an +** approximation for 10*log2(x). +*/ +SQLITE_PRIVATE LogEst sqlite3LogEst(u64 x){ + static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; + LogEst y = 40; + if( x<8 ){ + if( x<2 ) return 0; + while( x<8 ){ y -= 10; x <<= 1; } + }else{ +#if GCC_VERSION>=5004000 + int i = 60 - __builtin_clzll(x); + y += i*10; + x >>= i; +#else + while( x>255 ){ y += 40; x >>= 4; } /*OPTIMIZATION-IF-TRUE*/ + while( x>15 ){ y += 10; x >>= 1; } +#endif + } + return a[x&7] + y - 10; +} + +/* +** Convert a double into a LogEst +** In other words, compute an approximation for 10*log2(x). +*/ +SQLITE_PRIVATE LogEst sqlite3LogEstFromDouble(double x){ + u64 a; + LogEst e; + assert( sizeof(x)==8 && sizeof(a)==8 ); + if( x<=1 ) return 0; + if( x<=2000000000 ) return sqlite3LogEst((u64)x); + memcpy(&a, &x, 8); + e = (a>>52) - 1022; + return e*10; +} + +/* +** Convert a LogEst into an integer. +*/ +SQLITE_PRIVATE u64 sqlite3LogEstToInt(LogEst x){ + u64 n; + n = x%10; + x /= 10; + if( n>=5 ) n -= 2; + else if( n>=1 ) n -= 1; + if( x>60 ) return (u64)LARGEST_INT64; + return x>=3 ? (n+8)<<(x-3) : (n+8)>>(3-x); +} + +/* +** Add a new name/number pair to a VList. This might require that the +** VList object be reallocated, so return the new VList. If an OOM +** error occurs, the original VList returned and the +** db->mallocFailed flag is set. +** +** A VList is really just an array of integers. To destroy a VList, +** simply pass it to sqlite3DbFree(). +** +** The first integer is the number of integers allocated for the whole +** VList. The second integer is the number of integers actually used. +** Each name/number pair is encoded by subsequent groups of 3 or more +** integers. +** +** Each name/number pair starts with two integers which are the numeric +** value for the pair and the size of the name/number pair, respectively. +** The text name overlays one or more following integers. The text name +** is always zero-terminated. +** +** Conceptually: +** +** struct VList { +** int nAlloc; // Number of allocated slots +** int nUsed; // Number of used slots +** struct VListEntry { +** int iValue; // Value for this entry +** int nSlot; // Slots used by this entry +** // ... variable name goes here +** } a[0]; +** } +** +** During code generation, pointers to the variable names within the +** VList are taken. When that happens, nAlloc is set to zero as an +** indication that the VList may never again be enlarged, since the +** accompanying realloc() would invalidate the pointers. +*/ +SQLITE_PRIVATE VList *sqlite3VListAdd( + sqlite3 *db, /* The database connection used for malloc() */ + VList *pIn, /* The input VList. Might be NULL */ + const char *zName, /* Name of symbol to add */ + int nName, /* Bytes of text in zName */ + int iVal /* Value to associate with zName */ +){ + int nInt; /* number of sizeof(int) objects needed for zName */ + char *z; /* Pointer to where zName will be stored */ + int i; /* Index in pIn[] where zName is stored */ + + nInt = nName/4 + 3; + assert( pIn==0 || pIn[0]>=3 ); /* Verify ok to add new elements */ + if( pIn==0 || pIn[1]+nInt > pIn[0] ){ + /* Enlarge the allocation */ + sqlite3_int64 nAlloc = (pIn ? 2*(sqlite3_int64)pIn[0] : 10) + nInt; + VList *pOut = sqlite3DbRealloc(db, pIn, nAlloc*sizeof(int)); + if( pOut==0 ) return pIn; + if( pIn==0 ) pOut[1] = 2; + pIn = pOut; + pIn[0] = nAlloc; + } + i = pIn[1]; + pIn[i] = iVal; + pIn[i+1] = nInt; + z = (char*)&pIn[i+2]; + pIn[1] = i+nInt; + assert( pIn[1]<=pIn[0] ); + memcpy(z, zName, nName); + z[nName] = 0; + return pIn; +} + +/* +** Return a pointer to the name of a variable in the given VList that +** has the value iVal. Or return a NULL if there is no such variable in +** the list +*/ +SQLITE_PRIVATE const char *sqlite3VListNumToName(VList *pIn, int iVal){ + int i, mx; + if( pIn==0 ) return 0; + mx = pIn[1]; + i = 2; + do{ + if( pIn[i]==iVal ) return (char*)&pIn[i+2]; + i += pIn[i+1]; + }while( i */ + +/* Turn bulk memory into a hash table object by initializing the +** fields of the Hash structure. +** +** "pNew" is a pointer to the hash table that is to be initialized. +*/ +SQLITE_PRIVATE void sqlite3HashInit(Hash *pNew){ + assert( pNew!=0 ); + pNew->first = 0; + pNew->count = 0; + pNew->htsize = 0; + pNew->ht = 0; +} + +/* Remove all entries from a hash table. Reclaim all memory. +** Call this routine to delete a hash table or to reset a hash table +** to the empty state. +*/ +SQLITE_PRIVATE void sqlite3HashClear(Hash *pH){ + HashElem *elem; /* For looping over all elements of the table */ + + assert( pH!=0 ); + elem = pH->first; + pH->first = 0; + sqlite3_free(pH->ht); + pH->ht = 0; + pH->htsize = 0; + while( elem ){ + HashElem *next_elem = elem->next; + sqlite3_free(elem); + elem = next_elem; + } + pH->count = 0; +} + +/* +** The hashing function. +*/ +static unsigned int strHash(const char *z){ + unsigned int h = 0; + unsigned char c; + while( (c = (unsigned char)*z++)!=0 ){ /*OPTIMIZATION-IF-TRUE*/ + /* Knuth multiplicative hashing. (Sorting & Searching, p. 510). + ** 0x9e3779b1 is 2654435761 which is the closest prime number to + ** (2**32)*golden_ratio, where golden_ratio = (sqrt(5) - 1)/2. */ + h += sqlite3UpperToLower[c]; + h *= 0x9e3779b1; + } + return h; +} + + +/* Link pNew element into the hash table pH. If pEntry!=0 then also +** insert pNew into the pEntry hash bucket. +*/ +static void insertElement( + Hash *pH, /* The complete hash table */ + struct _ht *pEntry, /* The entry into which pNew is inserted */ + HashElem *pNew /* The element to be inserted */ +){ + HashElem *pHead; /* First element already in pEntry */ + if( pEntry ){ + pHead = pEntry->count ? pEntry->chain : 0; + pEntry->count++; + pEntry->chain = pNew; + }else{ + pHead = 0; + } + if( pHead ){ + pNew->next = pHead; + pNew->prev = pHead->prev; + if( pHead->prev ){ pHead->prev->next = pNew; } + else { pH->first = pNew; } + pHead->prev = pNew; + }else{ + pNew->next = pH->first; + if( pH->first ){ pH->first->prev = pNew; } + pNew->prev = 0; + pH->first = pNew; + } +} + + +/* Resize the hash table so that it cantains "new_size" buckets. +** +** The hash table might fail to resize if sqlite3_malloc() fails or +** if the new size is the same as the prior size. +** Return TRUE if the resize occurs and false if not. +*/ +static int rehash(Hash *pH, unsigned int new_size){ + struct _ht *new_ht; /* The new hash table */ + HashElem *elem, *next_elem; /* For looping over existing elements */ + +#if SQLITE_MALLOC_SOFT_LIMIT>0 + if( new_size*sizeof(struct _ht)>SQLITE_MALLOC_SOFT_LIMIT ){ + new_size = SQLITE_MALLOC_SOFT_LIMIT/sizeof(struct _ht); + } + if( new_size==pH->htsize ) return 0; +#endif + + /* The inability to allocates space for a larger hash table is + ** a performance hit but it is not a fatal error. So mark the + ** allocation as a benign. Use sqlite3Malloc()/memset(0) instead of + ** sqlite3MallocZero() to make the allocation, as sqlite3MallocZero() + ** only zeroes the requested number of bytes whereas this module will + ** use the actual amount of space allocated for the hash table (which + ** may be larger than the requested amount). + */ + sqlite3BeginBenignMalloc(); + new_ht = (struct _ht *)sqlite3Malloc( new_size*sizeof(struct _ht) ); + sqlite3EndBenignMalloc(); + + if( new_ht==0 ) return 0; + sqlite3_free(pH->ht); + pH->ht = new_ht; + pH->htsize = new_size = sqlite3MallocSize(new_ht)/sizeof(struct _ht); + memset(new_ht, 0, new_size*sizeof(struct _ht)); + for(elem=pH->first, pH->first=0; elem; elem = next_elem){ + unsigned int h = strHash(elem->pKey) % new_size; + next_elem = elem->next; + insertElement(pH, &new_ht[h], elem); + } + return 1; +} + +/* This function (for internal use only) locates an element in an +** hash table that matches the given key. If no element is found, +** a pointer to a static null element with HashElem.data==0 is returned. +** If pH is not NULL, then the hash for this key is written to *pH. +*/ +static HashElem *findElementWithHash( + const Hash *pH, /* The pH to be searched */ + const char *pKey, /* The key we are searching for */ + unsigned int *pHash /* Write the hash value here */ +){ + HashElem *elem; /* Used to loop thru the element list */ + unsigned int count; /* Number of elements left to test */ + unsigned int h; /* The computed hash */ + static HashElem nullElement = { 0, 0, 0, 0 }; + + if( pH->ht ){ /*OPTIMIZATION-IF-TRUE*/ + struct _ht *pEntry; + h = strHash(pKey) % pH->htsize; + pEntry = &pH->ht[h]; + elem = pEntry->chain; + count = pEntry->count; + }else{ + h = 0; + elem = pH->first; + count = pH->count; + } + if( pHash ) *pHash = h; + while( count-- ){ + assert( elem!=0 ); + if( sqlite3StrICmp(elem->pKey,pKey)==0 ){ + return elem; + } + elem = elem->next; + } + return &nullElement; +} + +/* Remove a single entry from the hash table given a pointer to that +** element and a hash on the element's key. +*/ +static void removeElementGivenHash( + Hash *pH, /* The pH containing "elem" */ + HashElem* elem, /* The element to be removed from the pH */ + unsigned int h /* Hash value for the element */ +){ + struct _ht *pEntry; + if( elem->prev ){ + elem->prev->next = elem->next; + }else{ + pH->first = elem->next; + } + if( elem->next ){ + elem->next->prev = elem->prev; + } + if( pH->ht ){ + pEntry = &pH->ht[h]; + if( pEntry->chain==elem ){ + pEntry->chain = elem->next; + } + assert( pEntry->count>0 ); + pEntry->count--; + } + sqlite3_free( elem ); + pH->count--; + if( pH->count==0 ){ + assert( pH->first==0 ); + assert( pH->count==0 ); + sqlite3HashClear(pH); + } +} + +/* Attempt to locate an element of the hash table pH with a key +** that matches pKey. Return the data for this element if it is +** found, or NULL if there is no match. +*/ +SQLITE_PRIVATE void *sqlite3HashFind(const Hash *pH, const char *pKey){ + assert( pH!=0 ); + assert( pKey!=0 ); + return findElementWithHash(pH, pKey, 0)->data; +} + +/* Insert an element into the hash table pH. The key is pKey +** and the data is "data". +** +** If no element exists with a matching key, then a new +** element is created and NULL is returned. +** +** If another element already exists with the same key, then the +** new data replaces the old data and the old data is returned. +** The key is not copied in this instance. If a malloc fails, then +** the new data is returned and the hash table is unchanged. +** +** If the "data" parameter to this function is NULL, then the +** element corresponding to "key" is removed from the hash table. +*/ +SQLITE_PRIVATE void *sqlite3HashInsert(Hash *pH, const char *pKey, void *data){ + unsigned int h; /* the hash of the key modulo hash table size */ + HashElem *elem; /* Used to loop thru the element list */ + HashElem *new_elem; /* New element added to the pH */ + + assert( pH!=0 ); + assert( pKey!=0 ); + elem = findElementWithHash(pH,pKey,&h); + if( elem->data ){ + void *old_data = elem->data; + if( data==0 ){ + removeElementGivenHash(pH,elem,h); + }else{ + elem->data = data; + elem->pKey = pKey; + } + return old_data; + } + if( data==0 ) return 0; + new_elem = (HashElem*)sqlite3Malloc( sizeof(HashElem) ); + if( new_elem==0 ) return data; + new_elem->pKey = pKey; + new_elem->data = data; + pH->count++; + if( pH->count>=10 && pH->count > 2*pH->htsize ){ + if( rehash(pH, pH->count*2) ){ + assert( pH->htsize>0 ); + h = strHash(pKey) % pH->htsize; + } + } + insertElement(pH, pH->ht ? &pH->ht[h] : 0, new_elem); + return 0; +} + +/************** End of hash.c ************************************************/ +/************** Begin file opcodes.c *****************************************/ +/* Automatically generated. Do not edit */ +/* See the tool/mkopcodec.tcl script for details. */ +#if !defined(SQLITE_OMIT_EXPLAIN) \ + || defined(VDBE_PROFILE) \ + || defined(SQLITE_DEBUG) +#if defined(SQLITE_ENABLE_EXPLAIN_COMMENTS) || defined(SQLITE_DEBUG) +# define OpHelp(X) "\0" X +#else +# define OpHelp(X) +#endif +SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ + static const char *const azName[] = { + /* 0 */ "Savepoint" OpHelp(""), + /* 1 */ "AutoCommit" OpHelp(""), + /* 2 */ "Transaction" OpHelp(""), + /* 3 */ "SorterNext" OpHelp(""), + /* 4 */ "Prev" OpHelp(""), + /* 5 */ "Next" OpHelp(""), + /* 6 */ "Checkpoint" OpHelp(""), + /* 7 */ "JournalMode" OpHelp(""), + /* 8 */ "Vacuum" OpHelp(""), + /* 9 */ "VFilter" OpHelp("iplan=r[P3] zplan='P4'"), + /* 10 */ "VUpdate" OpHelp("data=r[P3@P2]"), + /* 11 */ "Goto" OpHelp(""), + /* 12 */ "Gosub" OpHelp(""), + /* 13 */ "InitCoroutine" OpHelp(""), + /* 14 */ "Yield" OpHelp(""), + /* 15 */ "MustBeInt" OpHelp(""), + /* 16 */ "Jump" OpHelp(""), + /* 17 */ "Once" OpHelp(""), + /* 18 */ "If" OpHelp(""), + /* 19 */ "Not" OpHelp("r[P2]= !r[P1]"), + /* 20 */ "IfNot" OpHelp(""), + /* 21 */ "IsNullOrType" OpHelp("if typeof(r[P1]) IN (P3,5) goto P2"), + /* 22 */ "IfNullRow" OpHelp("if P1.nullRow then r[P3]=NULL, goto P2"), + /* 23 */ "SeekLT" OpHelp("key=r[P3@P4]"), + /* 24 */ "SeekLE" OpHelp("key=r[P3@P4]"), + /* 25 */ "SeekGE" OpHelp("key=r[P3@P4]"), + /* 26 */ "SeekGT" OpHelp("key=r[P3@P4]"), + /* 27 */ "IfNotOpen" OpHelp("if( !csr[P1] ) goto P2"), + /* 28 */ "IfNoHope" OpHelp("key=r[P3@P4]"), + /* 29 */ "NoConflict" OpHelp("key=r[P3@P4]"), + /* 30 */ "NotFound" OpHelp("key=r[P3@P4]"), + /* 31 */ "Found" OpHelp("key=r[P3@P4]"), + /* 32 */ "SeekRowid" OpHelp("intkey=r[P3]"), + /* 33 */ "NotExists" OpHelp("intkey=r[P3]"), + /* 34 */ "Last" OpHelp(""), + /* 35 */ "IfSmaller" OpHelp(""), + /* 36 */ "SorterSort" OpHelp(""), + /* 37 */ "Sort" OpHelp(""), + /* 38 */ "Rewind" OpHelp(""), + /* 39 */ "IdxLE" OpHelp("key=r[P3@P4]"), + /* 40 */ "IdxGT" OpHelp("key=r[P3@P4]"), + /* 41 */ "IdxLT" OpHelp("key=r[P3@P4]"), + /* 42 */ "IdxGE" OpHelp("key=r[P3@P4]"), + /* 43 */ "Or" OpHelp("r[P3]=(r[P1] || r[P2])"), + /* 44 */ "And" OpHelp("r[P3]=(r[P1] && r[P2])"), + /* 45 */ "RowSetRead" OpHelp("r[P3]=rowset(P1)"), + /* 46 */ "RowSetTest" OpHelp("if r[P3] in rowset(P1) goto P2"), + /* 47 */ "Program" OpHelp(""), + /* 48 */ "FkIfZero" OpHelp("if fkctr[P1]==0 goto P2"), + /* 49 */ "IfPos" OpHelp("if r[P1]>0 then r[P1]-=P3, goto P2"), + /* 50 */ "IsNull" OpHelp("if r[P1]==NULL goto P2"), + /* 51 */ "NotNull" OpHelp("if r[P1]!=NULL goto P2"), + /* 52 */ "Ne" OpHelp("IF r[P3]!=r[P1]"), + /* 53 */ "Eq" OpHelp("IF r[P3]==r[P1]"), + /* 54 */ "Gt" OpHelp("IF r[P3]>r[P1]"), + /* 55 */ "Le" OpHelp("IF r[P3]<=r[P1]"), + /* 56 */ "Lt" OpHelp("IF r[P3]=r[P1]"), + /* 58 */ "ElseEq" OpHelp(""), + /* 59 */ "IfNotZero" OpHelp("if r[P1]!=0 then r[P1]--, goto P2"), + /* 60 */ "DecrJumpZero" OpHelp("if (--r[P1])==0 goto P2"), + /* 61 */ "IncrVacuum" OpHelp(""), + /* 62 */ "VNext" OpHelp(""), + /* 63 */ "Filter" OpHelp("if key(P3@P4) not in filter(P1) goto P2"), + /* 64 */ "Init" OpHelp("Start at P2"), + /* 65 */ "PureFunc" OpHelp("r[P3]=func(r[P2@NP])"), + /* 66 */ "Function" OpHelp("r[P3]=func(r[P2@NP])"), + /* 67 */ "Return" OpHelp(""), + /* 68 */ "EndCoroutine" OpHelp(""), + /* 69 */ "HaltIfNull" OpHelp("if r[P3]=null halt"), + /* 70 */ "Halt" OpHelp(""), + /* 71 */ "Integer" OpHelp("r[P2]=P1"), + /* 72 */ "Int64" OpHelp("r[P2]=P4"), + /* 73 */ "String" OpHelp("r[P2]='P4' (len=P1)"), + /* 74 */ "Null" OpHelp("r[P2..P3]=NULL"), + /* 75 */ "SoftNull" OpHelp("r[P1]=NULL"), + /* 76 */ "Blob" OpHelp("r[P2]=P4 (len=P1)"), + /* 77 */ "Variable" OpHelp("r[P2]=parameter(P1,P4)"), + /* 78 */ "Move" OpHelp("r[P2@P3]=r[P1@P3]"), + /* 79 */ "Copy" OpHelp("r[P2@P3+1]=r[P1@P3+1]"), + /* 80 */ "SCopy" OpHelp("r[P2]=r[P1]"), + /* 81 */ "IntCopy" OpHelp("r[P2]=r[P1]"), + /* 82 */ "FkCheck" OpHelp(""), + /* 83 */ "ResultRow" OpHelp("output=r[P1@P2]"), + /* 84 */ "CollSeq" OpHelp(""), + /* 85 */ "AddImm" OpHelp("r[P1]=r[P1]+P2"), + /* 86 */ "RealAffinity" OpHelp(""), + /* 87 */ "Cast" OpHelp("affinity(r[P1])"), + /* 88 */ "Permutation" OpHelp(""), + /* 89 */ "Compare" OpHelp("r[P1@P3] <-> r[P2@P3]"), + /* 90 */ "IsTrue" OpHelp("r[P2] = coalesce(r[P1]==TRUE,P3) ^ P4"), + /* 91 */ "ZeroOrNull" OpHelp("r[P2] = 0 OR NULL"), + /* 92 */ "Offset" OpHelp("r[P3] = sqlite_offset(P1)"), + /* 93 */ "Column" OpHelp("r[P3]=PX"), + /* 94 */ "TypeCheck" OpHelp("typecheck(r[P1@P2])"), + /* 95 */ "Affinity" OpHelp("affinity(r[P1@P2])"), + /* 96 */ "MakeRecord" OpHelp("r[P3]=mkrec(r[P1@P2])"), + /* 97 */ "Count" OpHelp("r[P2]=count()"), + /* 98 */ "ReadCookie" OpHelp(""), + /* 99 */ "SetCookie" OpHelp(""), + /* 100 */ "ReopenIdx" OpHelp("root=P2 iDb=P3"), + /* 101 */ "OpenRead" OpHelp("root=P2 iDb=P3"), + /* 102 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), + /* 103 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), + /* 104 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<>r[P1]"), + /* 106 */ "Add" OpHelp("r[P3]=r[P1]+r[P2]"), + /* 107 */ "Subtract" OpHelp("r[P3]=r[P2]-r[P1]"), + /* 108 */ "Multiply" OpHelp("r[P3]=r[P1]*r[P2]"), + /* 109 */ "Divide" OpHelp("r[P3]=r[P2]/r[P1]"), + /* 110 */ "Remainder" OpHelp("r[P3]=r[P2]%r[P1]"), + /* 111 */ "Concat" OpHelp("r[P3]=r[P2]+r[P1]"), + /* 112 */ "OpenWrite" OpHelp("root=P2 iDb=P3"), + /* 113 */ "OpenDup" OpHelp(""), + /* 114 */ "BitNot" OpHelp("r[P2]= ~r[P1]"), + /* 115 */ "OpenAutoindex" OpHelp("nColumn=P2"), + /* 116 */ "OpenEphemeral" OpHelp("nColumn=P2"), + /* 117 */ "String8" OpHelp("r[P2]='P4'"), + /* 118 */ "SorterOpen" OpHelp(""), + /* 119 */ "SequenceTest" OpHelp("if( cursor[P1].ctr++ ) pc = P2"), + /* 120 */ "OpenPseudo" OpHelp("P3 columns in r[P2]"), + /* 121 */ "Close" OpHelp(""), + /* 122 */ "ColumnsUsed" OpHelp(""), + /* 123 */ "SeekScan" OpHelp("Scan-ahead up to P1 rows"), + /* 124 */ "SeekHit" OpHelp("set P2<=seekHit<=P3"), + /* 125 */ "Sequence" OpHelp("r[P2]=cursor[P1].ctr++"), + /* 126 */ "NewRowid" OpHelp("r[P2]=rowid"), + /* 127 */ "Insert" OpHelp("intkey=r[P3] data=r[P2]"), + /* 128 */ "RowCell" OpHelp(""), + /* 129 */ "Delete" OpHelp(""), + /* 130 */ "ResetCount" OpHelp(""), + /* 131 */ "SorterCompare" OpHelp("if key(P1)!=trim(r[P3],P4) goto P2"), + /* 132 */ "SorterData" OpHelp("r[P2]=data"), + /* 133 */ "RowData" OpHelp("r[P2]=data"), + /* 134 */ "Rowid" OpHelp("r[P2]=rowid"), + /* 135 */ "NullRow" OpHelp(""), + /* 136 */ "SeekEnd" OpHelp(""), + /* 137 */ "IdxInsert" OpHelp("key=r[P2]"), + /* 138 */ "SorterInsert" OpHelp("key=r[P2]"), + /* 139 */ "IdxDelete" OpHelp("key=r[P2@P3]"), + /* 140 */ "DeferredSeek" OpHelp("Move P3 to P1.rowid if needed"), + /* 141 */ "IdxRowid" OpHelp("r[P2]=rowid"), + /* 142 */ "FinishSeek" OpHelp(""), + /* 143 */ "Destroy" OpHelp(""), + /* 144 */ "Clear" OpHelp(""), + /* 145 */ "ResetSorter" OpHelp(""), + /* 146 */ "CreateBtree" OpHelp("r[P2]=root iDb=P1 flags=P3"), + /* 147 */ "SqlExec" OpHelp(""), + /* 148 */ "ParseSchema" OpHelp(""), + /* 149 */ "LoadAnalysis" OpHelp(""), + /* 150 */ "DropTable" OpHelp(""), + /* 151 */ "DropIndex" OpHelp(""), + /* 152 */ "DropTrigger" OpHelp(""), + /* 153 */ "Real" OpHelp("r[P2]=P4"), + /* 154 */ "IntegrityCk" OpHelp(""), + /* 155 */ "RowSetAdd" OpHelp("rowset(P1)=r[P2]"), + /* 156 */ "Param" OpHelp(""), + /* 157 */ "FkCounter" OpHelp("fkctr[P1]+=P2"), + /* 158 */ "MemMax" OpHelp("r[P1]=max(r[P1],r[P2])"), + /* 159 */ "OffsetLimit" OpHelp("if r[P1]>0 then r[P2]=r[P1]+max(0,r[P3]) else r[P2]=(-1)"), + /* 160 */ "AggInverse" OpHelp("accum=r[P3] inverse(r[P2@P5])"), + /* 161 */ "AggStep" OpHelp("accum=r[P3] step(r[P2@P5])"), + /* 162 */ "AggStep1" OpHelp("accum=r[P3] step(r[P2@P5])"), + /* 163 */ "AggValue" OpHelp("r[P3]=value N=P2"), + /* 164 */ "AggFinal" OpHelp("accum=r[P1] N=P2"), + /* 165 */ "Expire" OpHelp(""), + /* 166 */ "CursorLock" OpHelp(""), + /* 167 */ "CursorUnlock" OpHelp(""), + /* 168 */ "TableLock" OpHelp("iDb=P1 root=P2 write=P3"), + /* 169 */ "VBegin" OpHelp(""), + /* 170 */ "VCreate" OpHelp(""), + /* 171 */ "VDestroy" OpHelp(""), + /* 172 */ "VOpen" OpHelp(""), + /* 173 */ "VInitIn" OpHelp("r[P2]=ValueList(P1,P3)"), + /* 174 */ "VColumn" OpHelp("r[P3]=vcolumn(P2)"), + /* 175 */ "VRename" OpHelp(""), + /* 176 */ "Pagecount" OpHelp(""), + /* 177 */ "MaxPgcnt" OpHelp(""), + /* 178 */ "FilterAdd" OpHelp("filter(P1) += key(P3@P4)"), + /* 179 */ "Trace" OpHelp(""), + /* 180 */ "CursorHint" OpHelp(""), + /* 181 */ "ReleaseReg" OpHelp("release r[P1@P2] mask P3"), + /* 182 */ "Noop" OpHelp(""), + /* 183 */ "Explain" OpHelp(""), + /* 184 */ "Abortable" OpHelp(""), + }; + return azName[i]; +} +#endif + +/************** End of opcodes.c *********************************************/ +/************** Begin file os_unix.c *****************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains the VFS implementation for unix-like operating systems +** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. +** +** There are actually several different VFS implementations in this file. +** The differences are in the way that file locking is done. The default +** implementation uses Posix Advisory Locks. Alternative implementations +** use flock(), dot-files, various proprietary locking schemas, or simply +** skip locking all together. +** +** This source file is organized into divisions where the logic for various +** subfunctions is contained within the appropriate division. PLEASE +** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed +** in the correct division and should be clearly labeled. +** +** The layout of divisions is as follows: +** +** * General-purpose declarations and utility functions. +** * Unique file ID logic used by VxWorks. +** * Various locking primitive implementations (all except proxy locking): +** + for Posix Advisory Locks +** + for no-op locks +** + for dot-file locks +** + for flock() locking +** + for named semaphore locks (VxWorks only) +** + for AFP filesystem locks (MacOSX only) +** * sqlite3_file methods not associated with locking. +** * Definitions of sqlite3_io_methods objects for all locking +** methods plus "finder" functions for each locking method. +** * sqlite3_vfs method implementations. +** * Locking primitives for the proxy uber-locking-method. (MacOSX only) +** * Definitions of sqlite3_vfs objects for all locking methods +** plus implementations of sqlite3_os_init() and sqlite3_os_end(). +*/ +/* #include "sqliteInt.h" */ +#if SQLITE_OS_UNIX /* This file is used on unix only */ + +/* +** There are various methods for file locking used for concurrency +** control: +** +** 1. POSIX locking (the default), +** 2. No locking, +** 3. Dot-file locking, +** 4. flock() locking, +** 5. AFP locking (OSX only), +** 6. Named POSIX semaphores (VXWorks only), +** 7. proxy locking. (OSX only) +** +** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE +** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic +** selection of the appropriate locking style based on the filesystem +** where the database is located. +*/ +#if !defined(SQLITE_ENABLE_LOCKING_STYLE) +# if defined(__APPLE__) +# define SQLITE_ENABLE_LOCKING_STYLE 1 +# else +# define SQLITE_ENABLE_LOCKING_STYLE 0 +# endif +#endif + +/* Use pread() and pwrite() if they are available */ +#if defined(__APPLE__) +# define HAVE_PREAD 1 +# define HAVE_PWRITE 1 +#endif +#if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) +# undef USE_PREAD +# define USE_PREAD64 1 +#elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) +# undef USE_PREAD64 +# define USE_PREAD 1 +#endif + +/* +** standard include files. +*/ +#include +#include +#include +#include +#include +/* #include */ +#include +#include +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 +# include +#endif + +#if SQLITE_ENABLE_LOCKING_STYLE +/* # include */ +# include +# include +#endif /* SQLITE_ENABLE_LOCKING_STYLE */ + +/* +** Try to determine if gethostuuid() is available based on standard +** macros. This might sometimes compute the wrong value for some +** obscure platforms. For those cases, simply compile with one of +** the following: +** +** -DHAVE_GETHOSTUUID=0 +** -DHAVE_GETHOSTUUID=1 +** +** None if this matters except when building on Apple products with +** -DSQLITE_ENABLE_LOCKING_STYLE. +*/ +#ifndef HAVE_GETHOSTUUID +# define HAVE_GETHOSTUUID 0 +# if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ + (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) +# if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ + && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))\ + && (!defined(TARGET_OS_MACCATALYST) || (TARGET_OS_MACCATALYST==0)) +# undef HAVE_GETHOSTUUID +# define HAVE_GETHOSTUUID 1 +# else +# warning "gethostuuid() is disabled." +# endif +# endif +#endif + + +#if OS_VXWORKS +/* # include */ +# include +# include +#endif /* OS_VXWORKS */ + +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE +# include +#endif + +#ifdef HAVE_UTIME +# include +#endif + +/* +** Allowed values of unixFile.fsFlags +*/ +#define SQLITE_FSFLAGS_IS_MSDOS 0x1 + +/* +** If we are to be thread-safe, include the pthreads header. +*/ +#if SQLITE_THREADSAFE +/* # include */ +#endif + +/* +** Default permissions when creating a new file +*/ +#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS +# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 +#endif + +/* +** Default permissions when creating auto proxy dir +*/ +#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS +# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 +#endif + +/* +** Maximum supported path-length. +*/ +#define MAX_PATHNAME 512 + +/* +** Maximum supported symbolic links +*/ +#define SQLITE_MAX_SYMLINKS 100 + +/* Always cast the getpid() return type for compatibility with +** kernel modules in VxWorks. */ +#define osGetpid(X) (pid_t)getpid() + +/* +** Only set the lastErrno if the error code is a real error and not +** a normal expected return code of SQLITE_BUSY or SQLITE_OK +*/ +#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) + +/* Forward references */ +typedef struct unixShm unixShm; /* Connection shared memory */ +typedef struct unixShmNode unixShmNode; /* Shared memory instance */ +typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ +typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ + +/* +** Sometimes, after a file handle is closed by SQLite, the file descriptor +** cannot be closed immediately. In these cases, instances of the following +** structure are used to store the file descriptor while waiting for an +** opportunity to either close or reuse it. +*/ +struct UnixUnusedFd { + int fd; /* File descriptor to close */ + int flags; /* Flags this file descriptor was opened with */ + UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ +}; + +/* +** The unixFile structure is subclass of sqlite3_file specific to the unix +** VFS implementations. +*/ +typedef struct unixFile unixFile; +struct unixFile { + sqlite3_io_methods const *pMethod; /* Always the first entry */ + sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ + unixInodeInfo *pInode; /* Info about locks on this inode */ + int h; /* The file descriptor */ + unsigned char eFileLock; /* The type of lock held on this fd */ + unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ + int lastErrno; /* The unix errno from last I/O error */ + void *lockingContext; /* Locking style specific state */ + UnixUnusedFd *pPreallocatedUnused; /* Pre-allocated UnixUnusedFd */ + const char *zPath; /* Name of the file */ + unixShm *pShm; /* Shared memory segment information */ + int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ +#if SQLITE_MAX_MMAP_SIZE>0 + int nFetchOut; /* Number of outstanding xFetch refs */ + sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ + void *pMapRegion; /* Memory mapped region */ +#endif + int sectorSize; /* Device sector size */ + int deviceCharacteristics; /* Precomputed device characteristics */ +#if SQLITE_ENABLE_LOCKING_STYLE + int openFlags; /* The flags specified at open() */ +#endif +#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) + unsigned fsFlags; /* cached details from statfs() */ +#endif +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + unsigned iBusyTimeout; /* Wait this many millisec on locks */ +#endif +#if OS_VXWORKS + struct vxworksFileId *pId; /* Unique file ID */ +#endif +#ifdef SQLITE_DEBUG + /* The next group of variables are used to track whether or not the + ** transaction counter in bytes 24-27 of database files are updated + ** whenever any part of the database changes. An assertion fault will + ** occur if a file is updated without also updating the transaction + ** counter. This test is made to avoid new problems similar to the + ** one described by ticket #3584. + */ + unsigned char transCntrChng; /* True if the transaction counter changed */ + unsigned char dbUpdate; /* True if any part of database file changed */ + unsigned char inNormalWrite; /* True if in a normal write operation */ + +#endif + +#ifdef SQLITE_TEST + /* In test mode, increase the size of this structure a bit so that + ** it is larger than the struct CrashFile defined in test6.c. + */ + char aPadding[32]; +#endif +}; + +/* This variable holds the process id (pid) from when the xRandomness() +** method was called. If xOpen() is called from a different process id, +** indicating that a fork() has occurred, the PRNG will be reset. +*/ +static pid_t randomnessPid = 0; + +/* +** Allowed values for the unixFile.ctrlFlags bitmask: +*/ +#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ +#define UNIXFILE_RDONLY 0x02 /* Connection is read only */ +#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#ifndef SQLITE_DISABLE_DIRSYNC +# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ +#else +# define UNIXFILE_DIRSYNC 0x00 +#endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ +#define UNIXFILE_DELETE 0x20 /* Delete on close */ +#define UNIXFILE_URI 0x40 /* Filename might have query parameters */ +#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ + +/* +** Include code that is common to all os_*.c files +*/ +/* #include "os_common.h" */ + +/* +** Define various macros that are missing from some systems. +*/ +#ifndef O_LARGEFILE +# define O_LARGEFILE 0 +#endif +#ifdef SQLITE_DISABLE_LFS +# undef O_LARGEFILE +# define O_LARGEFILE 0 +#endif +#ifndef O_NOFOLLOW +# define O_NOFOLLOW 0 +#endif +#ifndef O_BINARY +# define O_BINARY 0 +#endif + +/* +** The threadid macro resolves to the thread-id or to 0. Used for +** testing and debugging only. +*/ +#if SQLITE_THREADSAFE +#define threadid pthread_self() +#else +#define threadid 0 +#endif + +/* +** HAVE_MREMAP defaults to true on Linux and false everywhere else. +*/ +#if !defined(HAVE_MREMAP) +# if defined(__linux__) && defined(_GNU_SOURCE) +# define HAVE_MREMAP 1 +# else +# define HAVE_MREMAP 0 +# endif +#endif + +/* +** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() +** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. +*/ +#ifdef __ANDROID__ +# define lseek lseek64 +#endif + +#ifdef __linux__ +/* +** Linux-specific IOCTL magic numbers used for controlling F2FS +*/ +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#endif /* __linux__ */ + + +/* +** Different Unix systems declare open() in different ways. Same use +** open(const char*,int,mode_t). Others use open(const char*,int,...). +** The difference is important when using a pointer to the function. +** +** The safest way to deal with the problem is to always use this wrapper +** which always has the same well-defined interface. +*/ +static int posixOpen(const char *zFile, int flags, int mode){ + return open(zFile, flags, mode); +} + +/* Forward reference */ +static int openDirectory(const char*, int*); +static int unixGetpagesize(void); + +/* +** Many system calls are accessed through pointer-to-functions so that +** they may be overridden at runtime to facilitate fault injection during +** testing and sandboxing. The following array holds the names and pointers +** to all overrideable system calls. +*/ +static struct unix_syscall { + const char *zName; /* Name of the system call */ + sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ + sqlite3_syscall_ptr pDefault; /* Default value */ +} aSyscall[] = { + { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, +#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) + + { "close", (sqlite3_syscall_ptr)close, 0 }, +#define osClose ((int(*)(int))aSyscall[1].pCurrent) + + { "access", (sqlite3_syscall_ptr)access, 0 }, +#define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) + + { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, +#define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) + + { "stat", (sqlite3_syscall_ptr)stat, 0 }, +#define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) + +/* +** The DJGPP compiler environment looks mostly like Unix, but it +** lacks the fcntl() system call. So redefine fcntl() to be something +** that always succeeds. This means that locking does not occur under +** DJGPP. But it is DOS - what did you expect? +*/ +#ifdef __DJGPP__ + { "fstat", 0, 0 }, +#define osFstat(a,b,c) 0 +#else + { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, +#define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) +#endif + + { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, +#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) + + { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, +#define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) + + { "read", (sqlite3_syscall_ptr)read, 0 }, +#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) + +#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE + { "pread", (sqlite3_syscall_ptr)pread, 0 }, +#else + { "pread", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) + +#if defined(USE_PREAD64) + { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, +#else + { "pread64", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) + + { "write", (sqlite3_syscall_ptr)write, 0 }, +#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) + +#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE + { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, +#else + { "pwrite", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ + aSyscall[12].pCurrent) + +#if defined(USE_PREAD64) + { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, +#else + { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ + aSyscall[13].pCurrent) + + { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, +#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) + +#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE + { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, +#else + { "fallocate", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) + + { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, +#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) + + { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, +#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) + + { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, +#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) + + { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, +#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + +#if defined(HAVE_FCHOWN) + { "fchown", (sqlite3_syscall_ptr)fchown, 0 }, +#else + { "fchown", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) + +#if defined(HAVE_FCHOWN) + { "geteuid", (sqlite3_syscall_ptr)geteuid, 0 }, +#else + { "geteuid", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, +#else + { "mmap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, +#else + { "munmap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent) + +#if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) + { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, +#else + { "mremap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, +#else + { "getpagesize", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) + +#if defined(HAVE_READLINK) + { "readlink", (sqlite3_syscall_ptr)readlink, 0 }, +#else + { "readlink", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) + +#if defined(HAVE_LSTAT) + { "lstat", (sqlite3_syscall_ptr)lstat, 0 }, +#else + { "lstat", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) + +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +# ifdef __ANDROID__ + { "ioctl", (sqlite3_syscall_ptr)(int(*)(int, int, ...))ioctl, 0 }, +#define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) +# else + { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, +#define osIoctl ((int(*)(int,unsigned long,...))aSyscall[28].pCurrent) +# endif +#else + { "ioctl", (sqlite3_syscall_ptr)0, 0 }, +#endif + +}; /* End of the overrideable system calls */ + + +/* +** On some systems, calls to fchown() will trigger a message in a security +** log if they come from non-root processes. So avoid calling fchown() if +** we are not running as root. +*/ +static int robustFchown(int fd, uid_t uid, gid_t gid){ +#if defined(HAVE_FCHOWN) + return osGeteuid() ? 0 : osFchown(fd,uid,gid); +#else + return 0; +#endif +} + +/* +** This is the xSetSystemCall() method of sqlite3_vfs for all of the +** "unix" VFSes. Return SQLITE_OK opon successfully updating the +** system call pointer, or SQLITE_NOTFOUND if there is no configurable +** system call named zName. +*/ +static int unixSetSystemCall( + sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ + const char *zName, /* Name of system call to override */ + sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ +){ + unsigned int i; + int rc = SQLITE_NOTFOUND; + + UNUSED_PARAMETER(pNotUsed); + if( zName==0 ){ + /* If no zName is given, restore all system calls to their default + ** settings and return NULL + */ + rc = SQLITE_OK; + for(i=0; i=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; + osClose(fd); + sqlite3_log(SQLITE_WARNING, + "attempt to open \"%s\" as file descriptor %d", z, fd); + fd = -1; + if( osOpen("/dev/null", O_RDONLY, m)<0 ) break; + } + if( fd>=0 ){ + if( m!=0 ){ + struct stat statbuf; + if( osFstat(fd, &statbuf)==0 + && statbuf.st_size==0 + && (statbuf.st_mode&0777)!=m + ){ + osFchmod(fd, m); + } + } +#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) + osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); +#endif + } + return fd; +} + +/* +** Helper functions to obtain and relinquish the global mutex. The +** global mutex is used to protect the unixInodeInfo and +** vxworksFileId objects used by this file, all of which may be +** shared by multiple threads. +** +** Function unixMutexHeld() is used to assert() that the global mutex +** is held when required. This function is only used as part of assert() +** statements. e.g. +** +** unixEnterMutex() +** assert( unixMutexHeld() ); +** unixEnterLeave() +** +** To prevent deadlock, the global unixBigLock must must be acquired +** before the unixInodeInfo.pLockMutex mutex, if both are held. It is +** OK to get the pLockMutex without holding unixBigLock first, but if +** that happens, the unixBigLock mutex must not be acquired until after +** pLockMutex is released. +** +** OK: enter(unixBigLock), enter(pLockInfo) +** OK: enter(unixBigLock) +** OK: enter(pLockInfo) +** ERROR: enter(pLockInfo), enter(unixBigLock) +*/ +static sqlite3_mutex *unixBigLock = 0; +static void unixEnterMutex(void){ + assert( sqlite3_mutex_notheld(unixBigLock) ); /* Not a recursive mutex */ + sqlite3_mutex_enter(unixBigLock); +} +static void unixLeaveMutex(void){ + assert( sqlite3_mutex_held(unixBigLock) ); + sqlite3_mutex_leave(unixBigLock); +} +#ifdef SQLITE_DEBUG +static int unixMutexHeld(void) { + return sqlite3_mutex_held(unixBigLock); +} +#endif + + +#ifdef SQLITE_HAVE_OS_TRACE +/* +** Helper function for printing out trace information from debugging +** binaries. This returns the string representation of the supplied +** integer lock-type. +*/ +static const char *azFileLock(int eFileLock){ + switch( eFileLock ){ + case NO_LOCK: return "NONE"; + case SHARED_LOCK: return "SHARED"; + case RESERVED_LOCK: return "RESERVED"; + case PENDING_LOCK: return "PENDING"; + case EXCLUSIVE_LOCK: return "EXCLUSIVE"; + } + return "ERROR"; +} +#endif + +#ifdef SQLITE_LOCK_TRACE +/* +** Print out information about all locking operations. +** +** This routine is used for troubleshooting locks on multithreaded +** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE +** command-line option on the compiler. This code is normally +** turned off. +*/ +static int lockTrace(int fd, int op, struct flock *p){ + char *zOpName, *zType; + int s; + int savedErrno; + if( op==F_GETLK ){ + zOpName = "GETLK"; + }else if( op==F_SETLK ){ + zOpName = "SETLK"; + }else{ + s = osFcntl(fd, op, p); + sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); + return s; + } + if( p->l_type==F_RDLCK ){ + zType = "RDLCK"; + }else if( p->l_type==F_WRLCK ){ + zType = "WRLCK"; + }else if( p->l_type==F_UNLCK ){ + zType = "UNLCK"; + }else{ + assert( 0 ); + } + assert( p->l_whence==SEEK_SET ); + s = osFcntl(fd, op, p); + savedErrno = errno; + sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", + threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, + (int)p->l_pid, s); + if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ + struct flock l2; + l2 = *p; + osFcntl(fd, F_GETLK, &l2); + if( l2.l_type==F_RDLCK ){ + zType = "RDLCK"; + }else if( l2.l_type==F_WRLCK ){ + zType = "WRLCK"; + }else if( l2.l_type==F_UNLCK ){ + zType = "UNLCK"; + }else{ + assert( 0 ); + } + sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", + zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); + } + errno = savedErrno; + return s; +} +#undef osFcntl +#define osFcntl lockTrace +#endif /* SQLITE_LOCK_TRACE */ + +/* +** Retry ftruncate() calls that fail due to EINTR +** +** All calls to ftruncate() within this file should be made through +** this wrapper. On the Android platform, bypassing the logic below +** could lead to a corrupt database. +*/ +static int robust_ftruncate(int h, sqlite3_int64 sz){ + int rc; +#ifdef __ANDROID__ + /* On Android, ftruncate() always uses 32-bit offsets, even if + ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to + ** truncate a file to any size larger than 2GiB. Silently ignore any + ** such attempts. */ + if( sz>(sqlite3_int64)0x7FFFFFFF ){ + rc = SQLITE_OK; + }else +#endif + do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); + return rc; +} + +/* +** This routine translates a standard POSIX errno code into something +** useful to the clients of the sqlite3 functions. Specifically, it is +** intended to translate a variety of "try again" errors into SQLITE_BUSY +** and a variety of "please close the file descriptor NOW" errors into +** SQLITE_IOERR +** +** Errors during initialization of locks, or file system support for locks, +** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. +*/ +static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { + assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || + (sqliteIOErr == SQLITE_IOERR_UNLOCK) || + (sqliteIOErr == SQLITE_IOERR_RDLOCK) || + (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ); + switch (posixError) { + case EACCES: + case EAGAIN: + case ETIMEDOUT: + case EBUSY: + case EINTR: + case ENOLCK: + /* random NFS retry error, unless during file system support + * introspection, in which it actually means what it says */ + return SQLITE_BUSY; + + case EPERM: + return SQLITE_PERM; + + default: + return sqliteIOErr; + } +} + + +/****************************************************************************** +****************** Begin Unique File ID Utility Used By VxWorks *************** +** +** On most versions of unix, we can get a unique ID for a file by concatenating +** the device number and the inode number. But this does not work on VxWorks. +** On VxWorks, a unique file id must be based on the canonical filename. +** +** A pointer to an instance of the following structure can be used as a +** unique file ID in VxWorks. Each instance of this structure contains +** a copy of the canonical filename. There is also a reference count. +** The structure is reclaimed when the number of pointers to it drops to +** zero. +** +** There are never very many files open at one time and lookups are not +** a performance-critical path, so it is sufficient to put these +** structures on a linked list. +*/ +struct vxworksFileId { + struct vxworksFileId *pNext; /* Next in a list of them all */ + int nRef; /* Number of references to this one */ + int nName; /* Length of the zCanonicalName[] string */ + char *zCanonicalName; /* Canonical filename */ +}; + +#if OS_VXWORKS +/* +** All unique filenames are held on a linked list headed by this +** variable: +*/ +static struct vxworksFileId *vxworksFileList = 0; + +/* +** Simplify a filename into its canonical form +** by making the following changes: +** +** * removing any trailing and duplicate / +** * convert /./ into just / +** * convert /A/../ where A is any simple name into just / +** +** Changes are made in-place. Return the new name length. +** +** The original filename is in z[0..n-1]. Return the number of +** characters in the simplified name. +*/ +static int vxworksSimplifyName(char *z, int n){ + int i, j; + while( n>1 && z[n-1]=='/' ){ n--; } + for(i=j=0; i0 && z[j-1]!='/' ){ j--; } + if( j>0 ){ j--; } + i += 2; + continue; + } + } + z[j++] = z[i]; + } + z[j] = 0; + return j; +} + +/* +** Find a unique file ID for the given absolute pathname. Return +** a pointer to the vxworksFileId object. This pointer is the unique +** file ID. +** +** The nRef field of the vxworksFileId object is incremented before +** the object is returned. A new vxworksFileId object is created +** and added to the global list if necessary. +** +** If a memory allocation error occurs, return NULL. +*/ +static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ + struct vxworksFileId *pNew; /* search key and new file ID */ + struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ + int n; /* Length of zAbsoluteName string */ + + assert( zAbsoluteName[0]=='/' ); + n = (int)strlen(zAbsoluteName); + pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); + if( pNew==0 ) return 0; + pNew->zCanonicalName = (char*)&pNew[1]; + memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); + n = vxworksSimplifyName(pNew->zCanonicalName, n); + + /* Search for an existing entry that matching the canonical name. + ** If found, increment the reference count and return a pointer to + ** the existing file ID. + */ + unixEnterMutex(); + for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ + if( pCandidate->nName==n + && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 + ){ + sqlite3_free(pNew); + pCandidate->nRef++; + unixLeaveMutex(); + return pCandidate; + } + } + + /* No match was found. We will make a new file ID */ + pNew->nRef = 1; + pNew->nName = n; + pNew->pNext = vxworksFileList; + vxworksFileList = pNew; + unixLeaveMutex(); + return pNew; +} + +/* +** Decrement the reference count on a vxworksFileId object. Free +** the object when the reference count reaches zero. +*/ +static void vxworksReleaseFileId(struct vxworksFileId *pId){ + unixEnterMutex(); + assert( pId->nRef>0 ); + pId->nRef--; + if( pId->nRef==0 ){ + struct vxworksFileId **pp; + for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} + assert( *pp==pId ); + *pp = pId->pNext; + sqlite3_free(pId); + } + unixLeaveMutex(); +} +#endif /* OS_VXWORKS */ +/*************** End of Unique File ID Utility Used By VxWorks **************** +******************************************************************************/ + + +/****************************************************************************** +*************************** Posix Advisory Locking **************************** +** +** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) +** section 6.5.2.2 lines 483 through 490 specify that when a process +** sets or clears a lock, that operation overrides any prior locks set +** by the same process. It does not explicitly say so, but this implies +** that it overrides locks set by the same process using a different +** file descriptor. Consider this test case: +** +** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); +** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); +** +** Suppose ./file1 and ./file2 are really the same file (because +** one is a hard or symbolic link to the other) then if you set +** an exclusive lock on fd1, then try to get an exclusive lock +** on fd2, it works. I would have expected the second lock to +** fail since there was already a lock on the file due to fd1. +** But not so. Since both locks came from the same process, the +** second overrides the first, even though they were on different +** file descriptors opened on different file names. +** +** This means that we cannot use POSIX locks to synchronize file access +** among competing threads of the same process. POSIX locks will work fine +** to synchronize access for threads in separate processes, but not +** threads within the same process. +** +** To work around the problem, SQLite has to manage file locks internally +** on its own. Whenever a new database is opened, we have to find the +** specific inode of the database file (the inode is determined by the +** st_dev and st_ino fields of the stat structure that fstat() fills in) +** and check for locks already existing on that inode. When locks are +** created or removed, we have to look at our own internal record of the +** locks to see if another thread has previously set a lock on that same +** inode. +** +** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. +** For VxWorks, we have to use the alternative unique ID system based on +** canonical filename and implemented in the previous division.) +** +** The sqlite3_file structure for POSIX is no longer just an integer file +** descriptor. It is now a structure that holds the integer file +** descriptor and a pointer to a structure that describes the internal +** locks on the corresponding inode. There is one locking structure +** per inode, so if the same inode is opened twice, both unixFile structures +** point to the same locking structure. The locking structure keeps +** a reference count (so we will know when to delete it) and a "cnt" +** field that tells us its internal lock status. cnt==0 means the +** file is unlocked. cnt==-1 means the file has an exclusive lock. +** cnt>0 means there are cnt shared locks on the file. +** +** Any attempt to lock or unlock a file first checks the locking +** structure. The fcntl() system call is only invoked to set a +** POSIX lock if the internal lock structure transitions between +** a locked and an unlocked state. +** +** But wait: there are yet more problems with POSIX advisory locks. +** +** If you close a file descriptor that points to a file that has locks, +** all locks on that file that are owned by the current process are +** released. To work around this problem, each unixInodeInfo object +** maintains a count of the number of pending locks on tha inode. +** When an attempt is made to close an unixFile, if there are +** other unixFile open on the same inode that are holding locks, the call +** to close() the file descriptor is deferred until all of the locks clear. +** The unixInodeInfo structure keeps a list of file descriptors that need to +** be closed and that list is walked (and cleared) when the last lock +** clears. +** +** Yet another problem: LinuxThreads do not play well with posix locks. +** +** Many older versions of linux use the LinuxThreads library which is +** not posix compliant. Under LinuxThreads, a lock created by thread +** A cannot be modified or overridden by a different thread B. +** Only thread A can modify the lock. Locking behavior is correct +** if the appliation uses the newer Native Posix Thread Library (NPTL) +** on linux - with NPTL a lock created by thread A can override locks +** in thread B. But there is no way to know at compile-time which +** threading library is being used. So there is no way to know at +** compile-time whether or not thread A can override locks on thread B. +** One has to do a run-time check to discover the behavior of the +** current process. +** +** SQLite used to support LinuxThreads. But support for LinuxThreads +** was dropped beginning with version 3.7.0. SQLite will still work with +** LinuxThreads provided that (1) there is no more than one connection +** per database file in the same process and (2) database connections +** do not move across threads. +*/ + +/* +** An instance of the following structure serves as the key used +** to locate a particular unixInodeInfo object. +*/ +struct unixFileId { + dev_t dev; /* Device number */ +#if OS_VXWORKS + struct vxworksFileId *pId; /* Unique file ID for vxworks. */ +#else + /* We are told that some versions of Android contain a bug that + ** sizes ino_t at only 32-bits instead of 64-bits. (See + ** https://android-review.googlesource.com/#/c/115351/3/dist/sqlite3.c) + ** To work around this, always allocate 64-bits for the inode number. + ** On small machines that only have 32-bit inodes, this wastes 4 bytes, + ** but that should not be a big deal. */ + /* WAS: ino_t ino; */ + u64 ino; /* Inode number */ +#endif +}; + +/* +** An instance of the following structure is allocated for each open +** inode. +** +** A single inode can have multiple file descriptors, so each unixFile +** structure contains a pointer to an instance of this object and this +** object keeps a count of the number of unixFile pointing to it. +** +** Mutex rules: +** +** (1) Only the pLockMutex mutex must be held in order to read or write +** any of the locking fields: +** nShared, nLock, eFileLock, bProcessLock, pUnused +** +** (2) When nRef>0, then the following fields are unchanging and can +** be read (but not written) without holding any mutex: +** fileId, pLockMutex +** +** (3) With the exceptions above, all the fields may only be read +** or written while holding the global unixBigLock mutex. +** +** Deadlock prevention: The global unixBigLock mutex may not +** be acquired while holding the pLockMutex mutex. If both unixBigLock +** and pLockMutex are needed, then unixBigLock must be acquired first. +*/ +struct unixInodeInfo { + struct unixFileId fileId; /* The lookup key */ + sqlite3_mutex *pLockMutex; /* Hold this mutex for... */ + int nShared; /* Number of SHARED locks held */ + int nLock; /* Number of outstanding file locks */ + unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ + unsigned char bProcessLock; /* An exclusive process lock is held */ + UnixUnusedFd *pUnused; /* Unused file descriptors to close */ + int nRef; /* Number of pointers to this structure */ + unixShmNode *pShmNode; /* Shared memory associated with this inode */ + unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ + unixInodeInfo *pPrev; /* .... doubly linked */ +#if SQLITE_ENABLE_LOCKING_STYLE + unsigned long long sharedByte; /* for AFP simulated shared lock */ +#endif +#if OS_VXWORKS + sem_t *pSem; /* Named POSIX semaphore */ + char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ +#endif +}; + +/* +** A lists of all unixInodeInfo objects. +** +** Must hold unixBigLock in order to read or write this variable. +*/ +static unixInodeInfo *inodeList = 0; /* All unixInodeInfo objects */ + +#ifdef SQLITE_DEBUG +/* +** True if the inode mutex (on the unixFile.pFileMutex field) is held, or not. +** This routine is used only within assert() to help verify correct mutex +** usage. +*/ +int unixFileMutexHeld(unixFile *pFile){ + assert( pFile->pInode ); + return sqlite3_mutex_held(pFile->pInode->pLockMutex); +} +int unixFileMutexNotheld(unixFile *pFile){ + assert( pFile->pInode ); + return sqlite3_mutex_notheld(pFile->pInode->pLockMutex); +} +#endif + +/* +** +** This function - unixLogErrorAtLine(), is only ever called via the macro +** unixLogError(). +** +** It is invoked after an error occurs in an OS function and errno has been +** set. It logs a message using sqlite3_log() containing the current value of +** errno and, if possible, the human-readable equivalent from strerror() or +** strerror_r(). +** +** The first argument passed to the macro should be the error code that +** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). +** The two subsequent arguments should be the name of the OS function that +** failed (e.g. "unlink", "open") and the associated file-system path, +** if any. +*/ +#define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) +static int unixLogErrorAtLine( + int errcode, /* SQLite error code */ + const char *zFunc, /* Name of OS function that failed */ + const char *zPath, /* File path associated with error */ + int iLine /* Source line number where error occurred */ +){ + char *zErr; /* Message from strerror() or equivalent */ + int iErrno = errno; /* Saved syscall error number */ + + /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use + ** the strerror() function to obtain the human-readable error message + ** equivalent to errno. Otherwise, use strerror_r(). + */ +#if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) + char aErr[80]; + memset(aErr, 0, sizeof(aErr)); + zErr = aErr; + + /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, + ** assume that the system provides the GNU version of strerror_r() that + ** returns a pointer to a buffer containing the error message. That pointer + ** may point to aErr[], or it may point to some static storage somewhere. + ** Otherwise, assume that the system provides the POSIX version of + ** strerror_r(), which always writes an error message into aErr[]. + ** + ** If the code incorrectly assumes that it is the POSIX version that is + ** available, the error message will often be an empty string. Not a + ** huge problem. Incorrectly concluding that the GNU version is available + ** could lead to a segfault though. + */ +#if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) + zErr = +# endif + strerror_r(iErrno, aErr, sizeof(aErr)-1); + +#elif SQLITE_THREADSAFE + /* This is a threadsafe build, but strerror_r() is not available. */ + zErr = ""; +#else + /* Non-threadsafe build, use strerror(). */ + zErr = strerror(iErrno); +#endif + + if( zPath==0 ) zPath = ""; + sqlite3_log(errcode, + "os_unix.c:%d: (%d) %s(%s) - %s", + iLine, iErrno, zFunc, zPath, zErr + ); + + return errcode; +} + +/* +** Close a file descriptor. +** +** We assume that close() almost always works, since it is only in a +** very sick application or on a very sick platform that it might fail. +** If it does fail, simply leak the file descriptor, but do log the +** error. +** +** Note that it is not safe to retry close() after EINTR since the +** file descriptor might have already been reused by another thread. +** So we don't even try to recover from an EINTR. Just log the error +** and move on. +*/ +static void robust_close(unixFile *pFile, int h, int lineno){ + if( osClose(h) ){ + unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", + pFile ? pFile->zPath : 0, lineno); + } +} + +/* +** Set the pFile->lastErrno. Do this in a subroutine as that provides +** a convenient place to set a breakpoint. +*/ +static void storeLastErrno(unixFile *pFile, int error){ + pFile->lastErrno = error; +} + +/* +** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. +*/ +static void closePendingFds(unixFile *pFile){ + unixInodeInfo *pInode = pFile->pInode; + UnixUnusedFd *p; + UnixUnusedFd *pNext; + assert( unixFileMutexHeld(pFile) ); + for(p=pInode->pUnused; p; p=pNext){ + pNext = p->pNext; + robust_close(pFile, p->fd, __LINE__); + sqlite3_free(p); + } + pInode->pUnused = 0; +} + +/* +** Release a unixInodeInfo structure previously allocated by findInodeInfo(). +** +** The global mutex must be held when this routine is called, but the mutex +** on the inode being deleted must NOT be held. +*/ +static void releaseInodeInfo(unixFile *pFile){ + unixInodeInfo *pInode = pFile->pInode; + assert( unixMutexHeld() ); + assert( unixFileMutexNotheld(pFile) ); + if( ALWAYS(pInode) ){ + pInode->nRef--; + if( pInode->nRef==0 ){ + assert( pInode->pShmNode==0 ); + sqlite3_mutex_enter(pInode->pLockMutex); + closePendingFds(pFile); + sqlite3_mutex_leave(pInode->pLockMutex); + if( pInode->pPrev ){ + assert( pInode->pPrev->pNext==pInode ); + pInode->pPrev->pNext = pInode->pNext; + }else{ + assert( inodeList==pInode ); + inodeList = pInode->pNext; + } + if( pInode->pNext ){ + assert( pInode->pNext->pPrev==pInode ); + pInode->pNext->pPrev = pInode->pPrev; + } + sqlite3_mutex_free(pInode->pLockMutex); + sqlite3_free(pInode); + } + } +} + +/* +** Given a file descriptor, locate the unixInodeInfo object that +** describes that file descriptor. Create a new one if necessary. The +** return value might be uninitialized if an error occurs. +** +** The global mutex must held when calling this routine. +** +** Return an appropriate error code. +*/ +static int findInodeInfo( + unixFile *pFile, /* Unix file with file desc used in the key */ + unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ +){ + int rc; /* System call return code */ + int fd; /* The file descriptor for pFile */ + struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ + struct stat statbuf; /* Low-level file information */ + unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ + + assert( unixMutexHeld() ); + + /* Get low-level information about the file that we can used to + ** create a unique name for the file. + */ + fd = pFile->h; + rc = osFstat(fd, &statbuf); + if( rc!=0 ){ + storeLastErrno(pFile, errno); +#if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) + if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; +#endif + return SQLITE_IOERR; + } + +#ifdef __APPLE__ + /* On OS X on an msdos filesystem, the inode number is reported + ** incorrectly for zero-size files. See ticket #3260. To work + ** around this problem (we consider it a bug in OS X, not SQLite) + ** we always increase the file size to 1 by writing a single byte + ** prior to accessing the inode number. The one byte written is + ** an ASCII 'S' character which also happens to be the first byte + ** in the header of every SQLite database. In this way, if there + ** is a race condition such that another thread has already populated + ** the first page of the database, no damage is done. + */ + if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ + do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); + if( rc!=1 ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR; + } + rc = osFstat(fd, &statbuf); + if( rc!=0 ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR; + } + } +#endif + + memset(&fileId, 0, sizeof(fileId)); + fileId.dev = statbuf.st_dev; +#if OS_VXWORKS + fileId.pId = pFile->pId; +#else + fileId.ino = (u64)statbuf.st_ino; +#endif + assert( unixMutexHeld() ); + pInode = inodeList; + while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ + pInode = pInode->pNext; + } + if( pInode==0 ){ + pInode = sqlite3_malloc64( sizeof(*pInode) ); + if( pInode==0 ){ + return SQLITE_NOMEM_BKPT; + } + memset(pInode, 0, sizeof(*pInode)); + memcpy(&pInode->fileId, &fileId, sizeof(fileId)); + if( sqlite3GlobalConfig.bCoreMutex ){ + pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pInode->pLockMutex==0 ){ + sqlite3_free(pInode); + return SQLITE_NOMEM_BKPT; + } + } + pInode->nRef = 1; + assert( unixMutexHeld() ); + pInode->pNext = inodeList; + pInode->pPrev = 0; + if( inodeList ) inodeList->pPrev = pInode; + inodeList = pInode; + }else{ + pInode->nRef++; + } + *ppInode = pInode; + return SQLITE_OK; +} + +/* +** Return TRUE if pFile has been renamed or unlinked since it was first opened. +*/ +static int fileHasMoved(unixFile *pFile){ +#if OS_VXWORKS + return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; +#else + struct stat buf; + return pFile->pInode!=0 && + (osStat(pFile->zPath, &buf)!=0 + || (u64)buf.st_ino!=pFile->pInode->fileId.ino); +#endif +} + + +/* +** Check a unixFile that is a database. Verify the following: +** +** (1) There is exactly one hard link on the file +** (2) The file is not a symbolic link +** (3) The file has not been renamed or unlinked +** +** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. +*/ +static void verifyDbFile(unixFile *pFile){ + struct stat buf; + int rc; + + /* These verifications occurs for the main database only */ + if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; + + rc = osFstat(pFile->h, &buf); + if( rc!=0 ){ + sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); + return; + } + if( buf.st_nlink==0 ){ + sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); + return; + } + if( buf.st_nlink>1 ){ + sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); + return; + } + if( fileHasMoved(pFile) ){ + sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); + return; + } +} + + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + assert( pFile->eFileLock<=SHARED_LOCK ); + sqlite3_mutex_enter(pFile->pInode->pLockMutex); + + /* Check if a thread in this process holds such a lock */ + if( pFile->pInode->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. + */ +#ifndef __DJGPP__ + if( !reserved && !pFile->pInode->bProcessLock ){ + struct flock lock; + lock.l_whence = SEEK_SET; + lock.l_start = RESERVED_BYTE; + lock.l_len = 1; + lock.l_type = F_WRLCK; + if( osFcntl(pFile->h, F_GETLK, &lock) ){ + rc = SQLITE_IOERR_CHECKRESERVEDLOCK; + storeLastErrno(pFile, errno); + } else if( lock.l_type!=F_UNLCK ){ + reserved = 1; + } + } +#endif + + sqlite3_mutex_leave(pFile->pInode->pLockMutex); + OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); + + *pResOut = reserved; + return rc; +} + +/* Forward declaration*/ +static int unixSleep(sqlite3_vfs*,int); + +/* +** Set a posix-advisory-lock. +** +** There are two versions of this routine. If compiled with +** SQLITE_ENABLE_SETLK_TIMEOUT then the routine has an extra parameter +** which is a pointer to a unixFile. If the unixFile->iBusyTimeout +** value is set, then it is the number of milliseconds to wait before +** failing the lock. The iBusyTimeout value is always reset back to +** zero on each call. +** +** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking +** attempt to set the lock. +*/ +#ifndef SQLITE_ENABLE_SETLK_TIMEOUT +# define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) +#else +static int osSetPosixAdvisoryLock( + int h, /* The file descriptor on which to take the lock */ + struct flock *pLock, /* The description of the lock */ + unixFile *pFile /* Structure holding timeout value */ +){ + int tm = pFile->iBusyTimeout; + int rc = osFcntl(h,F_SETLK,pLock); + while( rc<0 && tm>0 ){ + /* On systems that support some kind of blocking file lock with a timeout, + ** make appropriate changes here to invoke that blocking file lock. On + ** generic posix, however, there is no such API. So we simply try the + ** lock once every millisecond until either the timeout expires, or until + ** the lock is obtained. */ + unixSleep(0,1000); + rc = osFcntl(h,F_SETLK,pLock); + tm--; + } + return rc; +} +#endif /* SQLITE_ENABLE_SETLK_TIMEOUT */ + + +/* +** Attempt to set a system-lock on the file pFile. The lock is +** described by pLock. +** +** If the pFile was opened read/write from unix-excl, then the only lock +** ever obtained is an exclusive lock, and it is obtained exactly once +** the first time any lock is attempted. All subsequent system locking +** operations become no-ops. Locking operations still happen internally, +** in order to coordinate access between separate database connections +** within this process, but all of that is handled in memory and the +** operating system does not participate. +** +** This function is a pass-through to fcntl(F_SETLK) if pFile is using +** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" +** and is read-only. +** +** Zero is returned if the call completes successfully, or -1 if a call +** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). +*/ +static int unixFileLock(unixFile *pFile, struct flock *pLock){ + int rc; + unixInodeInfo *pInode = pFile->pInode; + assert( pInode!=0 ); + assert( sqlite3_mutex_held(pInode->pLockMutex) ); + if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ + if( pInode->bProcessLock==0 ){ + struct flock lock; + assert( pInode->nLock==0 ); + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + lock.l_type = F_WRLCK; + rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); + if( rc<0 ) return rc; + pInode->bProcessLock = 1; + pInode->nLock++; + }else{ + rc = 0; + } + }else{ + rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); + } + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int unixLock(sqlite3_file *id, int eFileLock){ + /* The following describes the implementation of the various locks and + ** lock transitions in terms of the POSIX advisory shared and exclusive + ** lock primitives (called read-locks and write-locks below, to avoid + ** confusion with SQLite lock names). The algorithms are complicated + ** slightly in order to be compatible with Windows95 systems simultaneously + ** accessing the same database file, in case that is ever required. + ** + ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved + ** byte', each single bytes at well known offsets, and the 'shared byte + ** range', a range of 510 bytes at a well known offset. + ** + ** To obtain a SHARED lock, a read-lock is obtained on the 'pending + ** byte'. If this is successful, 'shared byte range' is read-locked + ** and the lock on the 'pending byte' released. (Legacy note: When + ** SQLite was first developed, Windows95 systems were still very common, + ** and Widnows95 lacks a shared-lock capability. So on Windows95, a + ** single randomly selected by from the 'shared byte range' is locked. + ** Windows95 is now pretty much extinct, but this work-around for the + ** lack of shared-locks on Windows95 lives on, for backwards + ** compatibility.) + ** + ** A process may only obtain a RESERVED lock after it has a SHARED lock. + ** A RESERVED lock is implemented by grabbing a write-lock on the + ** 'reserved byte'. + ** + ** A process may only obtain a PENDING lock after it has obtained a + ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock + ** on the 'pending byte'. This ensures that no new SHARED locks can be + ** obtained, but existing SHARED locks are allowed to persist. A process + ** does not have to obtain a RESERVED lock on the way to a PENDING lock. + ** This property is used by the algorithm for rolling back a journal file + ** after a crash. + ** + ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is + ** implemented by obtaining a write-lock on the entire 'shared byte + ** range'. Since all other locks require a read-lock on one of the bytes + ** within this range, this ensures that no other locks are held on the + ** database. + */ + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode; + struct flock lock; + int tErrno = 0; + + assert( pFile ); + OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, + azFileLock(eFileLock), azFileLock(pFile->eFileLock), + azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, + osGetpid(0))); + + /* If there is already a lock of this type or more restrictive on the + ** unixFile, do nothing. Don't use the end_lock: exit path, as + ** unixEnterMutex() hasn't been called yet. + */ + if( pFile->eFileLock>=eFileLock ){ + OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, + azFileLock(eFileLock))); + return SQLITE_OK; + } + + /* Make sure the locking sequence is correct. + ** (1) We never move from unlocked to anything higher than shared lock. + ** (2) SQLite never explicitly requests a pendig lock. + ** (3) A shared lock is always held when a reserve lock is requested. + */ + assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); + assert( eFileLock!=PENDING_LOCK ); + assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); + + /* This mutex is needed because pFile->pInode is shared across threads + */ + pInode = pFile->pInode; + sqlite3_mutex_enter(pInode->pLockMutex); + + /* If some thread using this PID has a lock via a different unixFile* + ** handle that precludes the requested lock, return BUSY. + */ + if( (pFile->eFileLock!=pInode->eFileLock && + (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) + ){ + rc = SQLITE_BUSY; + goto end_lock; + } + + /* If a SHARED lock is requested, and some thread using this PID already + ** has a SHARED or RESERVED lock, then increment reference counts and + ** return SQLITE_OK. + */ + if( eFileLock==SHARED_LOCK && + (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ + assert( eFileLock==SHARED_LOCK ); + assert( pFile->eFileLock==0 ); + assert( pInode->nShared>0 ); + pFile->eFileLock = SHARED_LOCK; + pInode->nShared++; + pInode->nLock++; + goto end_lock; + } + + + /* A PENDING lock is needed before acquiring a SHARED lock and before + ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will + ** be released. + */ + lock.l_len = 1L; + lock.l_whence = SEEK_SET; + if( eFileLock==SHARED_LOCK + || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLocknShared==0 ); + assert( pInode->eFileLock==0 ); + assert( rc==SQLITE_OK ); + + /* Now get the read-lock */ + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + if( unixFileLock(pFile, &lock) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + } + + /* Drop the temporary PENDING lock */ + lock.l_start = PENDING_BYTE; + lock.l_len = 1L; + lock.l_type = F_UNLCK; + if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ + /* This could happen with a network mount */ + tErrno = errno; + rc = SQLITE_IOERR_UNLOCK; + } + + if( rc ){ + if( rc!=SQLITE_BUSY ){ + storeLastErrno(pFile, tErrno); + } + goto end_lock; + }else{ + pFile->eFileLock = SHARED_LOCK; + pInode->nLock++; + pInode->nShared = 1; + } + }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ + /* We are trying for an exclusive lock but another thread in this + ** same process is still holding a shared lock. */ + rc = SQLITE_BUSY; + }else{ + /* The request was for a RESERVED or EXCLUSIVE lock. It is + ** assumed that there is a SHARED or greater lock on the file + ** already. + */ + assert( 0!=pFile->eFileLock ); + lock.l_type = F_WRLCK; + + assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); + if( eFileLock==RESERVED_LOCK ){ + lock.l_start = RESERVED_BYTE; + lock.l_len = 1L; + }else{ + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + } + + if( unixFileLock(pFile, &lock) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( rc!=SQLITE_BUSY ){ + storeLastErrno(pFile, tErrno); + } + } + } + + +#ifdef SQLITE_DEBUG + /* Set up the transaction-counter change checking flags when + ** transitioning from a SHARED to a RESERVED lock. The change + ** from SHARED to RESERVED marks the beginning of a normal + ** write operation (not a hot journal rollback). + */ + if( rc==SQLITE_OK + && pFile->eFileLock<=SHARED_LOCK + && eFileLock==RESERVED_LOCK + ){ + pFile->transCntrChng = 0; + pFile->dbUpdate = 0; + pFile->inNormalWrite = 1; + } +#endif + + + if( rc==SQLITE_OK ){ + pFile->eFileLock = eFileLock; + pInode->eFileLock = eFileLock; + }else if( eFileLock==EXCLUSIVE_LOCK ){ + pFile->eFileLock = PENDING_LOCK; + pInode->eFileLock = PENDING_LOCK; + } + +end_lock: + sqlite3_mutex_leave(pInode->pLockMutex); + OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), + rc==SQLITE_OK ? "ok" : "failed")); + return rc; +} + +/* +** Add the file descriptor used by file handle pFile to the corresponding +** pUnused list. +*/ +static void setPendingFd(unixFile *pFile){ + unixInodeInfo *pInode = pFile->pInode; + UnixUnusedFd *p = pFile->pPreallocatedUnused; + assert( unixFileMutexHeld(pFile) ); + p->pNext = pInode->pUnused; + pInode->pUnused = p; + pFile->h = -1; + pFile->pPreallocatedUnused = 0; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED +** the byte range is divided into 2 parts and the first part is unlocked then +** set to a read lock, then the other part is simply unlocked. This works +** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to +** remove the write lock on a region when a read lock is set. +*/ +static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode; + struct flock lock; + int rc = SQLITE_OK; + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, + pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, + osGetpid(0))); + + assert( eFileLock<=SHARED_LOCK ); + if( pFile->eFileLock<=eFileLock ){ + return SQLITE_OK; + } + pInode = pFile->pInode; + sqlite3_mutex_enter(pInode->pLockMutex); + assert( pInode->nShared!=0 ); + if( pFile->eFileLock>SHARED_LOCK ){ + assert( pInode->eFileLock==pFile->eFileLock ); + +#ifdef SQLITE_DEBUG + /* When reducing a lock such that other processes can start + ** reading the database file again, make sure that the + ** transaction counter was updated if any part of the database + ** file changed. If the transaction counter is not updated, + ** other connections to the same file might not realize that + ** the file has changed and hence might not know to flush their + ** cache. The use of a stale cache can lead to database corruption. + */ + pFile->inNormalWrite = 0; +#endif + + /* downgrading to a shared lock on NFS involves clearing the write lock + ** before establishing the readlock - to avoid a race condition we downgrade + ** the lock in 2 blocks, so that part of the range will be covered by a + ** write lock until the rest is covered by a read lock: + ** 1: [WWWWW] + ** 2: [....W] + ** 3: [RRRRW] + ** 4: [RRRR.] + */ + if( eFileLock==SHARED_LOCK ){ +#if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE + (void)handleNFSUnlock; + assert( handleNFSUnlock==0 ); +#endif +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + if( handleNFSUnlock ){ + int tErrno; /* Error code from system call errors */ + off_t divSize = SHARED_SIZE - 1; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = divSize; + if( unixFileLock(pFile, &lock)==(-1) ){ + tErrno = errno; + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, tErrno); + goto end_unlock; + } + lock.l_type = F_RDLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = divSize; + if( unixFileLock(pFile, &lock)==(-1) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + goto end_unlock; + } + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST+divSize; + lock.l_len = SHARED_SIZE-divSize; + if( unixFileLock(pFile, &lock)==(-1) ){ + tErrno = errno; + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, tErrno); + goto end_unlock; + } + }else +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ + { + lock.l_type = F_RDLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + if( unixFileLock(pFile, &lock) ){ + /* In theory, the call to unixFileLock() cannot fail because another + ** process is holding an incompatible lock. If it does, this + ** indicates that the other process is not following the locking + ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning + ** SQLITE_BUSY would confuse the upper layer (in practice it causes + ** an assert to fail). */ + rc = SQLITE_IOERR_RDLOCK; + storeLastErrno(pFile, errno); + goto end_unlock; + } + } + } + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = PENDING_BYTE; + lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); + if( unixFileLock(pFile, &lock)==0 ){ + pInode->eFileLock = SHARED_LOCK; + }else{ + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, errno); + goto end_unlock; + } + } + if( eFileLock==NO_LOCK ){ + /* Decrement the shared lock counter. Release the lock using an + ** OS call only when all threads in this same process have released + ** the lock. + */ + pInode->nShared--; + if( pInode->nShared==0 ){ + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = lock.l_len = 0L; + if( unixFileLock(pFile, &lock)==0 ){ + pInode->eFileLock = NO_LOCK; + }else{ + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, errno); + pInode->eFileLock = NO_LOCK; + pFile->eFileLock = NO_LOCK; + } + } + + /* Decrement the count of locks against this same file. When the + ** count reaches zero, close any other file descriptors whose close + ** was deferred because of outstanding locks. + */ + pInode->nLock--; + assert( pInode->nLock>=0 ); + if( pInode->nLock==0 ) closePendingFds(pFile); + } + +end_unlock: + sqlite3_mutex_leave(pInode->pLockMutex); + if( rc==SQLITE_OK ){ + pFile->eFileLock = eFileLock; + } + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int unixUnlock(sqlite3_file *id, int eFileLock){ +#if SQLITE_MAX_MMAP_SIZE>0 + assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); +#endif + return posixUnlock(id, eFileLock, 0); +} + +#if SQLITE_MAX_MMAP_SIZE>0 +static int unixMapfile(unixFile *pFd, i64 nByte); +static void unixUnmapfile(unixFile *pFd); +#endif + +/* +** This function performs the parts of the "close file" operation +** common to all locking schemes. It closes the directory and file +** handles, if they are valid, and sets all fields of the unixFile +** structure to 0. +** +** It is *not* necessary to hold the mutex when this routine is called, +** even on VxWorks. A mutex will be acquired on VxWorks by the +** vxworksReleaseFileId() routine. +*/ +static int closeUnixFile(sqlite3_file *id){ + unixFile *pFile = (unixFile*)id; +#if SQLITE_MAX_MMAP_SIZE>0 + unixUnmapfile(pFile); +#endif + if( pFile->h>=0 ){ + robust_close(pFile, pFile->h, __LINE__); + pFile->h = -1; + } +#if OS_VXWORKS + if( pFile->pId ){ + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ + osUnlink(pFile->pId->zCanonicalName); + } + vxworksReleaseFileId(pFile->pId); + pFile->pId = 0; + } +#endif +#ifdef SQLITE_UNLINK_AFTER_CLOSE + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ + osUnlink(pFile->zPath); + sqlite3_free(*(char**)&pFile->zPath); + pFile->zPath = 0; + } +#endif + OSTRACE(("CLOSE %-3d\n", pFile->h)); + OpenCounter(-1); + sqlite3_free(pFile->pPreallocatedUnused); + memset(pFile, 0, sizeof(unixFile)); + return SQLITE_OK; +} + +/* +** Close a file. +*/ +static int unixClose(sqlite3_file *id){ + int rc = SQLITE_OK; + unixFile *pFile = (unixFile *)id; + unixInodeInfo *pInode = pFile->pInode; + + assert( pInode!=0 ); + verifyDbFile(pFile); + unixUnlock(id, NO_LOCK); + assert( unixFileMutexNotheld(pFile) ); + unixEnterMutex(); + + /* unixFile.pInode is always valid here. Otherwise, a different close + ** routine (e.g. nolockClose()) would be called instead. + */ + assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); + sqlite3_mutex_enter(pInode->pLockMutex); + if( pInode->nLock ){ + /* If there are outstanding locks, do not actually close the file just + ** yet because that would clear those locks. Instead, add the file + ** descriptor to pInode->pUnused list. It will be automatically closed + ** when the last lock is cleared. + */ + setPendingFd(pFile); + } + sqlite3_mutex_leave(pInode->pLockMutex); + releaseInodeInfo(pFile); + assert( pFile->pShm==0 ); + rc = closeUnixFile(id); + unixLeaveMutex(); + return rc; +} + +/************** End of the posix advisory lock implementation ***************** +******************************************************************************/ + +/****************************************************************************** +****************************** No-op Locking ********************************** +** +** Of the various locking implementations available, this is by far the +** simplest: locking is ignored. No attempt is made to lock the database +** file for reading or writing. +** +** This locking mode is appropriate for use on read-only databases +** (ex: databases that are burned into CD-ROM, for example.) It can +** also be used if the application employs some external mechanism to +** prevent simultaneous access of the same database by two or more +** database connections. But there is a serious risk of database +** corruption if this locking mode is used in situations where multiple +** database connections are accessing the same database file at the same +** time and one or more of those connections are writing. +*/ + +static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ + UNUSED_PARAMETER(NotUsed); + *pResOut = 0; + return SQLITE_OK; +} +static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return SQLITE_OK; +} +static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return SQLITE_OK; +} + +/* +** Close the file. +*/ +static int nolockClose(sqlite3_file *id) { + return closeUnixFile(id); +} + +/******************* End of the no-op lock implementation ********************* +******************************************************************************/ + +/****************************************************************************** +************************* Begin dot-file Locking ****************************** +** +** The dotfile locking implementation uses the existence of separate lock +** files (really a directory) to control access to the database. This works +** on just about every filesystem imaginable. But there are serious downsides: +** +** (1) There is zero concurrency. A single reader blocks all other +** connections from reading or writing the database. +** +** (2) An application crash or power loss can leave stale lock files +** sitting around that need to be cleared manually. +** +** Nevertheless, a dotlock is an appropriate locking mode for use if no +** other locking strategy is available. +** +** Dotfile locking works by creating a subdirectory in the same directory as +** the database and with the same name but with a ".lock" extension added. +** The existence of a lock directory implies an EXCLUSIVE lock. All other +** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. +*/ + +/* +** The file suffix added to the data base filename in order to create the +** lock directory. +*/ +#define DOTLOCK_SUFFIX ".lock" + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +** +** In dotfile locking, either a lock exists or it does not. So in this +** variation of CheckReservedLock(), *pResOut is set to true if any lock +** is held on the file and false if the file is unlocked. +*/ +static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + reserved = osAccess((const char*)pFile->lockingContext, 0)==0; + OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +** +** With dotfile locking, we really only support state (4): EXCLUSIVE. +** But we track the other locking levels internally. +*/ +static int dotlockLock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + char *zLockFile = (char *)pFile->lockingContext; + int rc = SQLITE_OK; + + + /* If we have any lock, then the lock file already exists. All we have + ** to do is adjust our internal record of the lock level. + */ + if( pFile->eFileLock > NO_LOCK ){ + pFile->eFileLock = eFileLock; + /* Always update the timestamp on the old file */ +#ifdef HAVE_UTIME + utime(zLockFile, NULL); +#else + utimes(zLockFile, NULL); +#endif + return SQLITE_OK; + } + + /* grab an exclusive lock */ + rc = osMkdir(zLockFile, 0777); + if( rc<0 ){ + /* failed to open/create the lock directory */ + int tErrno = errno; + if( EEXIST == tErrno ){ + rc = SQLITE_BUSY; + } else { + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( rc!=SQLITE_BUSY ){ + storeLastErrno(pFile, tErrno); + } + } + return rc; + } + + /* got it, set the type and return ok */ + pFile->eFileLock = eFileLock; + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** When the locking level reaches NO_LOCK, delete the lock file. +*/ +static int dotlockUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + char *zLockFile = (char *)pFile->lockingContext; + int rc; + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, + pFile->eFileLock, osGetpid(0))); + assert( eFileLock<=SHARED_LOCK ); + + /* no-op if possible */ + if( pFile->eFileLock==eFileLock ){ + return SQLITE_OK; + } + + /* To downgrade to shared, simply update our internal notion of the + ** lock state. No need to mess with the file on disk. + */ + if( eFileLock==SHARED_LOCK ){ + pFile->eFileLock = SHARED_LOCK; + return SQLITE_OK; + } + + /* To fully unlock the database, delete the lock file */ + assert( eFileLock==NO_LOCK ); + rc = osRmdir(zLockFile); + if( rc<0 ){ + int tErrno = errno; + if( tErrno==ENOENT ){ + rc = SQLITE_OK; + }else{ + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, tErrno); + } + return rc; + } + pFile->eFileLock = NO_LOCK; + return SQLITE_OK; +} + +/* +** Close a file. Make sure the lock has been released before closing. +*/ +static int dotlockClose(sqlite3_file *id) { + unixFile *pFile = (unixFile*)id; + assert( id!=0 ); + dotlockUnlock(id, NO_LOCK); + sqlite3_free(pFile->lockingContext); + return closeUnixFile(id); +} +/****************** End of the dot-file lock implementation ******************* +******************************************************************************/ + +/****************************************************************************** +************************** Begin flock Locking ******************************** +** +** Use the flock() system call to do file locking. +** +** flock() locking is like dot-file locking in that the various +** fine-grain locking levels supported by SQLite are collapsed into +** a single exclusive lock. In other words, SHARED, RESERVED, and +** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite +** still works when you do this, but concurrency is reduced since +** only a single process can be reading the database at a time. +** +** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off +*/ +#if SQLITE_ENABLE_LOCKING_STYLE + +/* +** Retry flock() calls that fail with EINTR +*/ +#ifdef EINTR +static int robust_flock(int fd, int op){ + int rc; + do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); + return rc; +} +#else +# define robust_flock(a,b) flock(a,b) +#endif + + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + + /* Check if a thread in this process holds such a lock */ + if( pFile->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. */ + if( !reserved ){ + /* attempt to get the lock */ + int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); + if( !lrc ){ + /* got the lock, unlock it */ + lrc = robust_flock(pFile->h, LOCK_UN); + if ( lrc ) { + int tErrno = errno; + /* unlock failed with an error */ + lrc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, tErrno); + rc = lrc; + } + } else { + int tErrno = errno; + reserved = 1; + /* someone else might have it reserved */ + lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( IS_LOCK_ERROR(lrc) ){ + storeLastErrno(pFile, tErrno); + rc = lrc; + } + } + } + OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); + +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + if( (rc & 0xff) == SQLITE_IOERR ){ + rc = SQLITE_OK; + reserved=1; + } +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** flock() only really support EXCLUSIVE locks. We track intermediate +** lock states in the sqlite3_file structure, but all locks SHARED or +** above are really EXCLUSIVE locks and exclude all other processes from +** access the file. +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int flockLock(sqlite3_file *id, int eFileLock) { + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + + assert( pFile ); + + /* if we already have a lock, it is exclusive. + ** Just adjust level and punt on outta here. */ + if (pFile->eFileLock > NO_LOCK) { + pFile->eFileLock = eFileLock; + return SQLITE_OK; + } + + /* grab an exclusive lock */ + + if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { + int tErrno = errno; + /* didn't get, must be busy */ + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + } else { + /* got it, set the type and return ok */ + pFile->eFileLock = eFileLock; + } + OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), + rc==SQLITE_OK ? "ok" : "failed")); +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + if( (rc & 0xff) == SQLITE_IOERR ){ + rc = SQLITE_BUSY; + } +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + return rc; +} + + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int flockUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, + pFile->eFileLock, osGetpid(0))); + assert( eFileLock<=SHARED_LOCK ); + + /* no-op if possible */ + if( pFile->eFileLock==eFileLock ){ + return SQLITE_OK; + } + + /* shared can just be set because we always have an exclusive */ + if (eFileLock==SHARED_LOCK) { + pFile->eFileLock = eFileLock; + return SQLITE_OK; + } + + /* no, really, unlock. */ + if( robust_flock(pFile->h, LOCK_UN) ){ +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + return SQLITE_OK; +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + return SQLITE_IOERR_UNLOCK; + }else{ + pFile->eFileLock = NO_LOCK; + return SQLITE_OK; + } +} + +/* +** Close a file. +*/ +static int flockClose(sqlite3_file *id) { + assert( id!=0 ); + flockUnlock(id, NO_LOCK); + return closeUnixFile(id); +} + +#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ + +/******************* End of the flock lock implementation ********************* +******************************************************************************/ + +/****************************************************************************** +************************ Begin Named Semaphore Locking ************************ +** +** Named semaphore locking is only supported on VxWorks. +** +** Semaphore locking is like dot-lock and flock in that it really only +** supports EXCLUSIVE locking. Only a single process can read or write +** the database file at a time. This reduces potential concurrency, but +** makes the lock implementation much easier. +*/ +#if OS_VXWORKS + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + + /* Check if a thread in this process holds such a lock */ + if( pFile->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. */ + if( !reserved ){ + sem_t *pSem = pFile->pInode->pSem; + + if( sem_trywait(pSem)==-1 ){ + int tErrno = errno; + if( EAGAIN != tErrno ){ + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); + storeLastErrno(pFile, tErrno); + } else { + /* someone else has the lock when we are in NO_LOCK */ + reserved = (pFile->eFileLock < SHARED_LOCK); + } + }else{ + /* we could have it if we want it */ + sem_post(pSem); + } + } + OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); + + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** Semaphore locks only really support EXCLUSIVE locks. We track intermediate +** lock states in the sqlite3_file structure, but all locks SHARED or +** above are really EXCLUSIVE locks and exclude all other processes from +** access the file. +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int semXLock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + sem_t *pSem = pFile->pInode->pSem; + int rc = SQLITE_OK; + + /* if we already have a lock, it is exclusive. + ** Just adjust level and punt on outta here. */ + if (pFile->eFileLock > NO_LOCK) { + pFile->eFileLock = eFileLock; + rc = SQLITE_OK; + goto sem_end_lock; + } + + /* lock semaphore now but bail out when already locked. */ + if( sem_trywait(pSem)==-1 ){ + rc = SQLITE_BUSY; + goto sem_end_lock; + } + + /* got it, set the type and return ok */ + pFile->eFileLock = eFileLock; + + sem_end_lock: + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int semXUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + sem_t *pSem = pFile->pInode->pSem; + + assert( pFile ); + assert( pSem ); + OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, + pFile->eFileLock, osGetpid(0))); + assert( eFileLock<=SHARED_LOCK ); + + /* no-op if possible */ + if( pFile->eFileLock==eFileLock ){ + return SQLITE_OK; + } + + /* shared can just be set because we always have an exclusive */ + if (eFileLock==SHARED_LOCK) { + pFile->eFileLock = eFileLock; + return SQLITE_OK; + } + + /* no, really unlock. */ + if ( sem_post(pSem)==-1 ) { + int rc, tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + return rc; + } + pFile->eFileLock = NO_LOCK; + return SQLITE_OK; +} + +/* + ** Close a file. + */ +static int semXClose(sqlite3_file *id) { + if( id ){ + unixFile *pFile = (unixFile*)id; + semXUnlock(id, NO_LOCK); + assert( pFile ); + assert( unixFileMutexNotheld(pFile) ); + unixEnterMutex(); + releaseInodeInfo(pFile); + unixLeaveMutex(); + closeUnixFile(id); + } + return SQLITE_OK; +} + +#endif /* OS_VXWORKS */ +/* +** Named semaphore locking is only available on VxWorks. +** +*************** End of the named semaphore lock implementation **************** +******************************************************************************/ + + +/****************************************************************************** +*************************** Begin AFP Locking ********************************* +** +** AFP is the Apple Filing Protocol. AFP is a network filesystem found +** on Apple Macintosh computers - both OS9 and OSX. +** +** Third-party implementations of AFP are available. But this code here +** only works on OSX. +*/ + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +/* +** The afpLockingContext structure contains all afp lock specific state +*/ +typedef struct afpLockingContext afpLockingContext; +struct afpLockingContext { + int reserved; + const char *dbPath; /* Name of the open file */ +}; + +struct ByteRangeLockPB2 +{ + unsigned long long offset; /* offset to first byte to lock */ + unsigned long long length; /* nbr of bytes to lock */ + unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ + unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ + unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ + int fd; /* file desc to assoc this lock with */ +}; + +#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) + +/* +** This is a utility for setting or clearing a bit-range lock on an +** AFP filesystem. +** +** Return SQLITE_OK on success, SQLITE_BUSY on failure. +*/ +static int afpSetLock( + const char *path, /* Name of the file to be locked or unlocked */ + unixFile *pFile, /* Open file descriptor on path */ + unsigned long long offset, /* First byte to be locked */ + unsigned long long length, /* Number of bytes to lock */ + int setLockFlag /* True to set lock. False to clear lock */ +){ + struct ByteRangeLockPB2 pb; + int err; + + pb.unLockFlag = setLockFlag ? 0 : 1; + pb.startEndFlag = 0; + pb.offset = offset; + pb.length = length; + pb.fd = pFile->h; + + OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", + (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), + offset, length)); + err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); + if ( err==-1 ) { + int rc; + int tErrno = errno; + OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", + path, tErrno, strerror(tErrno))); +#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS + rc = SQLITE_BUSY; +#else + rc = sqliteErrorFromPosixError(tErrno, + setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); +#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + return rc; + } else { + return SQLITE_OK; + } +} + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + afpLockingContext *context; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + context = (afpLockingContext *) pFile->lockingContext; + if( context->reserved ){ + *pResOut = 1; + return SQLITE_OK; + } + sqlite3_mutex_enter(pFile->pInode->pLockMutex); + /* Check if a thread in this process holds such a lock */ + if( pFile->pInode->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. + */ + if( !reserved ){ + /* lock the RESERVED byte */ + int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); + if( SQLITE_OK==lrc ){ + /* if we succeeded in taking the reserved lock, unlock it to restore + ** the original state */ + lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); + } else { + /* if we failed to get the lock then someone else must have it */ + reserved = 1; + } + if( IS_LOCK_ERROR(lrc) ){ + rc=lrc; + } + } + + sqlite3_mutex_leave(pFile->pInode->pLockMutex); + OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); + + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int afpLock(sqlite3_file *id, int eFileLock){ + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode = pFile->pInode; + afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; + + assert( pFile ); + OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, + azFileLock(eFileLock), azFileLock(pFile->eFileLock), + azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); + + /* If there is already a lock of this type or more restrictive on the + ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as + ** unixEnterMutex() hasn't been called yet. + */ + if( pFile->eFileLock>=eFileLock ){ + OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, + azFileLock(eFileLock))); + return SQLITE_OK; + } + + /* Make sure the locking sequence is correct + ** (1) We never move from unlocked to anything higher than shared lock. + ** (2) SQLite never explicitly requests a pendig lock. + ** (3) A shared lock is always held when a reserve lock is requested. + */ + assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); + assert( eFileLock!=PENDING_LOCK ); + assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); + + /* This mutex is needed because pFile->pInode is shared across threads + */ + pInode = pFile->pInode; + sqlite3_mutex_enter(pInode->pLockMutex); + + /* If some thread using this PID has a lock via a different unixFile* + ** handle that precludes the requested lock, return BUSY. + */ + if( (pFile->eFileLock!=pInode->eFileLock && + (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) + ){ + rc = SQLITE_BUSY; + goto afp_end_lock; + } + + /* If a SHARED lock is requested, and some thread using this PID already + ** has a SHARED or RESERVED lock, then increment reference counts and + ** return SQLITE_OK. + */ + if( eFileLock==SHARED_LOCK && + (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ + assert( eFileLock==SHARED_LOCK ); + assert( pFile->eFileLock==0 ); + assert( pInode->nShared>0 ); + pFile->eFileLock = SHARED_LOCK; + pInode->nShared++; + pInode->nLock++; + goto afp_end_lock; + } + + /* A PENDING lock is needed before acquiring a SHARED lock and before + ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will + ** be released. + */ + if( eFileLock==SHARED_LOCK + || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLockdbPath, pFile, PENDING_BYTE, 1, 1); + if (failed) { + rc = failed; + goto afp_end_lock; + } + } + + /* If control gets to this point, then actually go ahead and make + ** operating system calls for the specified lock. + */ + if( eFileLock==SHARED_LOCK ){ + int lrc1, lrc2, lrc1Errno = 0; + long lk, mask; + + assert( pInode->nShared==0 ); + assert( pInode->eFileLock==0 ); + + mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; + /* Now get the read-lock SHARED_LOCK */ + /* note that the quality of the randomness doesn't matter that much */ + lk = random(); + pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); + lrc1 = afpSetLock(context->dbPath, pFile, + SHARED_FIRST+pInode->sharedByte, 1, 1); + if( IS_LOCK_ERROR(lrc1) ){ + lrc1Errno = pFile->lastErrno; + } + /* Drop the temporary PENDING lock */ + lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); + + if( IS_LOCK_ERROR(lrc1) ) { + storeLastErrno(pFile, lrc1Errno); + rc = lrc1; + goto afp_end_lock; + } else if( IS_LOCK_ERROR(lrc2) ){ + rc = lrc2; + goto afp_end_lock; + } else if( lrc1 != SQLITE_OK ) { + rc = lrc1; + } else { + pFile->eFileLock = SHARED_LOCK; + pInode->nLock++; + pInode->nShared = 1; + } + }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ + /* We are trying for an exclusive lock but another thread in this + ** same process is still holding a shared lock. */ + rc = SQLITE_BUSY; + }else{ + /* The request was for a RESERVED or EXCLUSIVE lock. It is + ** assumed that there is a SHARED or greater lock on the file + ** already. + */ + int failed = 0; + assert( 0!=pFile->eFileLock ); + if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { + /* Acquire a RESERVED lock */ + failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); + if( !failed ){ + context->reserved = 1; + } + } + if (!failed && eFileLock == EXCLUSIVE_LOCK) { + /* Acquire an EXCLUSIVE lock */ + + /* Remove the shared lock before trying the range. we'll need to + ** reestablish the shared lock if we can't get the afpUnlock + */ + if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + + pInode->sharedByte, 1, 0)) ){ + int failed2 = SQLITE_OK; + /* now attemmpt to get the exclusive lock range */ + failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, + SHARED_SIZE, 1); + if( failed && (failed2 = afpSetLock(context->dbPath, pFile, + SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ + /* Can't reestablish the shared lock. Sqlite can't deal, this is + ** a critical I/O error + */ + rc = ((failed & 0xff) == SQLITE_IOERR) ? failed2 : + SQLITE_IOERR_LOCK; + goto afp_end_lock; + } + }else{ + rc = failed; + } + } + if( failed ){ + rc = failed; + } + } + + if( rc==SQLITE_OK ){ + pFile->eFileLock = eFileLock; + pInode->eFileLock = eFileLock; + }else if( eFileLock==EXCLUSIVE_LOCK ){ + pFile->eFileLock = PENDING_LOCK; + pInode->eFileLock = PENDING_LOCK; + } + +afp_end_lock: + sqlite3_mutex_leave(pInode->pLockMutex); + OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), + rc==SQLITE_OK ? "ok" : "failed")); + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int afpUnlock(sqlite3_file *id, int eFileLock) { + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode; + afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; + int skipShared = 0; +#ifdef SQLITE_TEST + int h = pFile->h; +#endif + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, + pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, + osGetpid(0))); + + assert( eFileLock<=SHARED_LOCK ); + if( pFile->eFileLock<=eFileLock ){ + return SQLITE_OK; + } + pInode = pFile->pInode; + sqlite3_mutex_enter(pInode->pLockMutex); + assert( pInode->nShared!=0 ); + if( pFile->eFileLock>SHARED_LOCK ){ + assert( pInode->eFileLock==pFile->eFileLock ); + SimulateIOErrorBenign(1); + SimulateIOError( h=(-1) ) + SimulateIOErrorBenign(0); + +#ifdef SQLITE_DEBUG + /* When reducing a lock such that other processes can start + ** reading the database file again, make sure that the + ** transaction counter was updated if any part of the database + ** file changed. If the transaction counter is not updated, + ** other connections to the same file might not realize that + ** the file has changed and hence might not know to flush their + ** cache. The use of a stale cache can lead to database corruption. + */ + assert( pFile->inNormalWrite==0 + || pFile->dbUpdate==0 + || pFile->transCntrChng==1 ); + pFile->inNormalWrite = 0; +#endif + + if( pFile->eFileLock==EXCLUSIVE_LOCK ){ + rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); + if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ + /* only re-establish the shared lock if necessary */ + int sharedLockByte = SHARED_FIRST+pInode->sharedByte; + rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); + } else { + skipShared = 1; + } + } + if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ + rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); + } + if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ + rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); + if( !rc ){ + context->reserved = 0; + } + } + if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ + pInode->eFileLock = SHARED_LOCK; + } + } + if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ + + /* Decrement the shared lock counter. Release the lock using an + ** OS call only when all threads in this same process have released + ** the lock. + */ + unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; + pInode->nShared--; + if( pInode->nShared==0 ){ + SimulateIOErrorBenign(1); + SimulateIOError( h=(-1) ) + SimulateIOErrorBenign(0); + if( !skipShared ){ + rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); + } + if( !rc ){ + pInode->eFileLock = NO_LOCK; + pFile->eFileLock = NO_LOCK; + } + } + if( rc==SQLITE_OK ){ + pInode->nLock--; + assert( pInode->nLock>=0 ); + if( pInode->nLock==0 ) closePendingFds(pFile); + } + } + + sqlite3_mutex_leave(pInode->pLockMutex); + if( rc==SQLITE_OK ){ + pFile->eFileLock = eFileLock; + } + return rc; +} + +/* +** Close a file & cleanup AFP specific locking context +*/ +static int afpClose(sqlite3_file *id) { + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + assert( id!=0 ); + afpUnlock(id, NO_LOCK); + assert( unixFileMutexNotheld(pFile) ); + unixEnterMutex(); + if( pFile->pInode ){ + unixInodeInfo *pInode = pFile->pInode; + sqlite3_mutex_enter(pInode->pLockMutex); + if( pInode->nLock ){ + /* If there are outstanding locks, do not actually close the file just + ** yet because that would clear those locks. Instead, add the file + ** descriptor to pInode->aPending. It will be automatically closed when + ** the last lock is cleared. + */ + setPendingFd(pFile); + } + sqlite3_mutex_leave(pInode->pLockMutex); + } + releaseInodeInfo(pFile); + sqlite3_free(pFile->lockingContext); + rc = closeUnixFile(id); + unixLeaveMutex(); + return rc; +} + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* +** The code above is the AFP lock implementation. The code is specific +** to MacOSX and does not work on other unix platforms. No alternative +** is available. If you don't compile for a mac, then the "unix-afp" +** VFS is not available. +** +********************* End of the AFP lock implementation ********************** +******************************************************************************/ + +/****************************************************************************** +*************************** Begin NFS Locking ********************************/ + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +/* + ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock + ** must be either NO_LOCK or SHARED_LOCK. + ** + ** If the locking level of the file descriptor is already at or below + ** the requested locking level, this routine is a no-op. + */ +static int nfsUnlock(sqlite3_file *id, int eFileLock){ + return posixUnlock(id, eFileLock, 1); +} + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* +** The code above is the NFS lock implementation. The code is specific +** to MacOSX and does not work on other unix platforms. No alternative +** is available. +** +********************* End of the NFS lock implementation ********************** +******************************************************************************/ + +/****************************************************************************** +**************** Non-locking sqlite3_file methods ***************************** +** +** The next division contains implementations for all methods of the +** sqlite3_file object other than the locking methods. The locking +** methods were defined in divisions above (one locking method per +** division). Those methods that are common to all locking modes +** are gather together into this division. +*/ + +/* +** Seek to the offset passed as the second argument, then read cnt +** bytes into pBuf. Return the number of bytes actually read. +** +** NB: If you define USE_PREAD or USE_PREAD64, then it might also +** be necessary to define _XOPEN_SOURCE to be 500. This varies from +** one system to another. Since SQLite does not define USE_PREAD +** in any form by default, we will not attempt to define _XOPEN_SOURCE. +** See tickets #2741 and #2681. +** +** To avoid stomping the errno value on a failed read the lastErrno value +** is set before returning. +*/ +static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ + int got; + int prior = 0; +#if (!defined(USE_PREAD) && !defined(USE_PREAD64)) + i64 newOffset; +#endif + TIMER_START; + assert( cnt==(cnt&0x1ffff) ); + assert( id->h>2 ); + do{ +#if defined(USE_PREAD) + got = osPread(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); +#elif defined(USE_PREAD64) + got = osPread64(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); +#else + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError( newOffset = -1 ); + if( newOffset<0 ){ + storeLastErrno((unixFile*)id, errno); + return -1; + } + got = osRead(id->h, pBuf, cnt); +#endif + if( got==cnt ) break; + if( got<0 ){ + if( errno==EINTR ){ got = 1; continue; } + prior = 0; + storeLastErrno((unixFile*)id, errno); + break; + }else if( got>0 ){ + cnt -= got; + offset += got; + prior += got; + pBuf = (void*)(got + (char*)pBuf); + } + }while( got>0 ); + TIMER_END; + OSTRACE(("READ %-3d %5d %7lld %llu\n", + id->h, got+prior, offset-prior, TIMER_ELAPSED)); + return got+prior; +} + +/* +** Read data from a file into a buffer. Return SQLITE_OK if all +** bytes were read successfully and SQLITE_IOERR if anything goes +** wrong. +*/ +static int unixRead( + sqlite3_file *id, + void *pBuf, + int amt, + sqlite3_int64 offset +){ + unixFile *pFile = (unixFile *)id; + int got; + assert( id ); + assert( offset>=0 ); + assert( amt>0 ); + + /* If this is a database file (not a journal, super-journal or temp + ** file), the bytes in the locking range should never be read or written. */ +#if 0 + assert( pFile->pPreallocatedUnused==0 + || offset>=PENDING_BYTE+512 + || offset+amt<=PENDING_BYTE + ); +#endif + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + + got = seekAndRead(pFile, offset, pBuf, amt); + if( got==amt ){ + return SQLITE_OK; + }else if( got<0 ){ + /* pFile->lastErrno has been set by seekAndRead(). + ** Usually we return SQLITE_IOERR_READ here, though for some + ** kinds of errors we return SQLITE_IOERR_CORRUPTFS. The + ** SQLITE_IOERR_CORRUPTFS will be converted into SQLITE_CORRUPT + ** prior to returning to the application by the sqlite3ApiExit() + ** routine. + */ + switch( pFile->lastErrno ){ + case ERANGE: + case EIO: +#ifdef ENXIO + case ENXIO: +#endif +#ifdef EDEVERR + case EDEVERR: +#endif + return SQLITE_IOERR_CORRUPTFS; + } + return SQLITE_IOERR_READ; + }else{ + storeLastErrno(pFile, 0); /* not a system error */ + /* Unread parts of the buffer must be zero-filled */ + memset(&((char*)pBuf)[got], 0, amt-got); + return SQLITE_IOERR_SHORT_READ; + } +} + +/* +** Attempt to seek the file-descriptor passed as the first argument to +** absolute offset iOff, then attempt to write nBuf bytes of data from +** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, +** return the actual number of bytes written (which may be less than +** nBuf). +*/ +static int seekAndWriteFd( + int fd, /* File descriptor to write to */ + i64 iOff, /* File offset to begin writing at */ + const void *pBuf, /* Copy data from this buffer to the file */ + int nBuf, /* Size of buffer pBuf in bytes */ + int *piErrno /* OUT: Error number if error occurs */ +){ + int rc = 0; /* Value returned by system call */ + + assert( nBuf==(nBuf&0x1ffff) ); + assert( fd>2 ); + assert( piErrno!=0 ); + nBuf &= 0x1ffff; + TIMER_START; + +#if defined(USE_PREAD) + do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); +#elif defined(USE_PREAD64) + do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); +#else + do{ + i64 iSeek = lseek(fd, iOff, SEEK_SET); + SimulateIOError( iSeek = -1 ); + if( iSeek<0 ){ + rc = -1; + break; + } + rc = osWrite(fd, pBuf, nBuf); + }while( rc<0 && errno==EINTR ); +#endif + + TIMER_END; + OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); + + if( rc<0 ) *piErrno = errno; + return rc; +} + + +/* +** Seek to the offset in id->offset then read cnt bytes into pBuf. +** Return the number of bytes actually read. Update the offset. +** +** To avoid stomping the errno value on a failed write the lastErrno value +** is set before returning. +*/ +static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ + return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); +} + + +/* +** Write data from a buffer into a file. Return SQLITE_OK on success +** or some other error code on failure. +*/ +static int unixWrite( + sqlite3_file *id, + const void *pBuf, + int amt, + sqlite3_int64 offset +){ + unixFile *pFile = (unixFile*)id; + int wrote = 0; + assert( id ); + assert( amt>0 ); + + /* If this is a database file (not a journal, super-journal or temp + ** file), the bytes in the locking range should never be read or written. */ +#if 0 + assert( pFile->pPreallocatedUnused==0 + || offset>=PENDING_BYTE+512 + || offset+amt<=PENDING_BYTE + ); +#endif + +#ifdef SQLITE_DEBUG + /* If we are doing a normal write to a database file (as opposed to + ** doing a hot-journal rollback or a write to some file other than a + ** normal database file) then record the fact that the database + ** has changed. If the transaction counter is modified, record that + ** fact too. + */ + if( pFile->inNormalWrite ){ + pFile->dbUpdate = 1; /* The database has been modified */ + if( offset<=24 && offset+amt>=27 ){ + int rc; + char oldCntr[4]; + SimulateIOErrorBenign(1); + rc = seekAndRead(pFile, 24, oldCntr, 4); + SimulateIOErrorBenign(0); + if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ + pFile->transCntrChng = 1; /* The transaction counter has changed */ + } + } + } +#endif + +#if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + + while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))0 ){ + amt -= wrote; + offset += wrote; + pBuf = &((char*)pBuf)[wrote]; + } + SimulateIOError(( wrote=(-1), amt=1 )); + SimulateDiskfullError(( wrote=0, amt=1 )); + + if( amt>wrote ){ + if( wrote<0 && pFile->lastErrno!=ENOSPC ){ + /* lastErrno set by seekAndWrite */ + return SQLITE_IOERR_WRITE; + }else{ + storeLastErrno(pFile, 0); /* not a system error */ + return SQLITE_FULL; + } + } + + return SQLITE_OK; +} + +#ifdef SQLITE_TEST +/* +** Count the number of fullsyncs and normal syncs. This is used to test +** that syncs and fullsyncs are occurring at the right times. +*/ +SQLITE_API int sqlite3_sync_count = 0; +SQLITE_API int sqlite3_fullsync_count = 0; +#endif + +/* +** We do not trust systems to provide a working fdatasync(). Some do. +** Others do no. To be safe, we will stick with the (slightly slower) +** fsync(). If you know that your system does support fdatasync() correctly, +** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC +*/ +#if !defined(fdatasync) && !HAVE_FDATASYNC +# define fdatasync fsync +#endif + +/* +** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not +** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently +** only available on Mac OS X. But that could change. +*/ +#ifdef F_FULLFSYNC +# define HAVE_FULLFSYNC 1 +#else +# define HAVE_FULLFSYNC 0 +#endif + + +/* +** The fsync() system call does not work as advertised on many +** unix systems. The following procedure is an attempt to make +** it work better. +** +** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful +** for testing when we want to run through the test suite quickly. +** You are strongly advised *not* to deploy with SQLITE_NO_SYNC +** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash +** or power failure will likely corrupt the database file. +** +** SQLite sets the dataOnly flag if the size of the file is unchanged. +** The idea behind dataOnly is that it should only write the file content +** to disk, not the inode. We only set dataOnly if the file size is +** unchanged since the file size is part of the inode. However, +** Ted Ts'o tells us that fdatasync() will also write the inode if the +** file size has changed. The only real difference between fdatasync() +** and fsync(), Ted tells us, is that fdatasync() will not flush the +** inode if the mtime or owner or other inode attributes have changed. +** We only care about the file size, not the other file attributes, so +** as far as SQLite is concerned, an fdatasync() is always adequate. +** So, we always use fdatasync() if it is available, regardless of +** the value of the dataOnly flag. +*/ +static int full_fsync(int fd, int fullSync, int dataOnly){ + int rc; + + /* The following "ifdef/elif/else/" block has the same structure as + ** the one below. It is replicated here solely to avoid cluttering + ** up the real code with the UNUSED_PARAMETER() macros. + */ +#ifdef SQLITE_NO_SYNC + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(fullSync); + UNUSED_PARAMETER(dataOnly); +#elif HAVE_FULLFSYNC + UNUSED_PARAMETER(dataOnly); +#else + UNUSED_PARAMETER(fullSync); + UNUSED_PARAMETER(dataOnly); +#endif + + /* Record the number of times that we do a normal fsync() and + ** FULLSYNC. This is used during testing to verify that this procedure + ** gets called with the correct arguments. + */ +#ifdef SQLITE_TEST + if( fullSync ) sqlite3_fullsync_count++; + sqlite3_sync_count++; +#endif + + /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a + ** no-op. But go ahead and call fstat() to validate the file + ** descriptor as we need a method to provoke a failure during + ** coverate testing. + */ +#ifdef SQLITE_NO_SYNC + { + struct stat buf; + rc = osFstat(fd, &buf); + } +#elif HAVE_FULLFSYNC + if( fullSync ){ + rc = osFcntl(fd, F_FULLFSYNC, 0); + }else{ + rc = 1; + } + /* If the FULLFSYNC failed, fall back to attempting an fsync(). + ** It shouldn't be possible for fullfsync to fail on the local + ** file system (on OSX), so failure indicates that FULLFSYNC + ** isn't supported for this file system. So, attempt an fsync + ** and (for now) ignore the overhead of a superfluous fcntl call. + ** It'd be better to detect fullfsync support once and avoid + ** the fcntl call every time sync is called. + */ + if( rc ) rc = fsync(fd); + +#elif defined(__APPLE__) + /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly + ** so currently we default to the macro that redefines fdatasync to fsync + */ + rc = fsync(fd); +#else + rc = fdatasync(fd); +#if OS_VXWORKS + if( rc==-1 && errno==ENOTSUP ){ + rc = fsync(fd); + } +#endif /* OS_VXWORKS */ +#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ + + if( OS_VXWORKS && rc!= -1 ){ + rc = 0; + } + return rc; +} + +/* +** Open a file descriptor to the directory containing file zFilename. +** If successful, *pFd is set to the opened file descriptor and +** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM +** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined +** value. +** +** The directory file descriptor is used for only one thing - to +** fsync() a directory to make sure file creation and deletion events +** are flushed to disk. Such fsyncs are not needed on newer +** journaling filesystems, but are required on older filesystems. +** +** This routine can be overridden using the xSetSysCall interface. +** The ability to override this routine was added in support of the +** chromium sandbox. Opening a directory is a security risk (we are +** told) so making it overrideable allows the chromium sandbox to +** replace this routine with a harmless no-op. To make this routine +** a no-op, replace it with a stub that returns SQLITE_OK but leaves +** *pFd set to a negative number. +** +** If SQLITE_OK is returned, the caller is responsible for closing +** the file descriptor *pFd using close(). +*/ +static int openDirectory(const char *zFilename, int *pFd){ + int ii; + int fd = -1; + char zDirname[MAX_PATHNAME+1]; + + sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); + for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); + if( ii>0 ){ + zDirname[ii] = '\0'; + }else{ + if( zDirname[0]!='/' ) zDirname[0] = '.'; + zDirname[1] = 0; + } + fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); + if( fd>=0 ){ + OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); + } + *pFd = fd; + if( fd>=0 ) return SQLITE_OK; + return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); +} + +/* +** Make sure all writes to a particular file are committed to disk. +** +** If dataOnly==0 then both the file itself and its metadata (file +** size, access time, etc) are synced. If dataOnly!=0 then only the +** file data is synced. +** +** Under Unix, also make sure that the directory entry for the file +** has been created by fsync-ing the directory that contains the file. +** If we do not do this and we encounter a power failure, the directory +** entry for the journal might not exist after we reboot. The next +** SQLite to access the file will not know that the journal exists (because +** the directory entry for the journal was never created) and the transaction +** will not roll back - possibly leading to database corruption. +*/ +static int unixSync(sqlite3_file *id, int flags){ + int rc; + unixFile *pFile = (unixFile*)id; + + int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); + int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; + + /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ + assert((flags&0x0F)==SQLITE_SYNC_NORMAL + || (flags&0x0F)==SQLITE_SYNC_FULL + ); + + /* Unix cannot, but some systems may return SQLITE_FULL from here. This + ** line is to test that doing so does not cause any problems. + */ + SimulateDiskfullError( return SQLITE_FULL ); + + assert( pFile ); + OSTRACE(("SYNC %-3d\n", pFile->h)); + rc = full_fsync(pFile->h, isFullsync, isDataOnly); + SimulateIOError( rc=1 ); + if( rc ){ + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); + } + + /* Also fsync the directory containing the file if the DIRSYNC flag + ** is set. This is a one-time occurrence. Many systems (examples: AIX) + ** are unable to fsync a directory, so ignore errors on the fsync. + */ + if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ + int dirfd; + OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, + HAVE_FULLFSYNC, isFullsync)); + rc = osOpenDirectory(pFile->zPath, &dirfd); + if( rc==SQLITE_OK ){ + full_fsync(dirfd, 0, 0); + robust_close(pFile, dirfd, __LINE__); + }else{ + assert( rc==SQLITE_CANTOPEN ); + rc = SQLITE_OK; + } + pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; + } + return rc; +} + +/* +** Truncate an open file to a specified size +*/ +static int unixTruncate(sqlite3_file *id, i64 nByte){ + unixFile *pFile = (unixFile *)id; + int rc; + assert( pFile ); + SimulateIOError( return SQLITE_IOERR_TRUNCATE ); + + /* If the user has configured a chunk-size for this file, truncate the + ** file so that it consists of an integer number of chunks (i.e. the + ** actual file size after the operation may be larger than the requested + ** size). + */ + if( pFile->szChunk>0 ){ + nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; + } + + rc = robust_ftruncate(pFile->h, nByte); + if( rc ){ + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + }else{ +#ifdef SQLITE_DEBUG + /* If we are doing a normal write to a database file (as opposed to + ** doing a hot-journal rollback or a write to some file other than a + ** normal database file) and we truncate the file to zero length, + ** that effectively updates the change counter. This might happen + ** when restoring a database using the backup API from a zero-length + ** source. + */ + if( pFile->inNormalWrite && nByte==0 ){ + pFile->transCntrChng = 1; + } +#endif + +#if SQLITE_MAX_MMAP_SIZE>0 + /* If the file was just truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( nBytemmapSize ){ + pFile->mmapSize = nByte; + } +#endif + + return SQLITE_OK; + } +} + +/* +** Determine the current size of a file in bytes +*/ +static int unixFileSize(sqlite3_file *id, i64 *pSize){ + int rc; + struct stat buf; + assert( id ); + rc = osFstat(((unixFile*)id)->h, &buf); + SimulateIOError( rc=1 ); + if( rc!=0 ){ + storeLastErrno((unixFile*)id, errno); + return SQLITE_IOERR_FSTAT; + } + *pSize = buf.st_size; + + /* When opening a zero-size database, the findInodeInfo() procedure + ** writes a single byte into that file in order to work around a bug + ** in the OS-X msdos filesystem. In order to avoid problems with upper + ** layers, we need to report this file size as zero even though it is + ** really 1. Ticket #3260. + */ + if( *pSize==1 ) *pSize = 0; + + + return SQLITE_OK; +} + +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) +/* +** Handler for proxy-locking file-control verbs. Defined below in the +** proxying locking division. +*/ +static int proxyFileControl(sqlite3_file*,int,void*); +#endif + +/* +** This function is called to handle the SQLITE_FCNTL_SIZE_HINT +** file-control operation. Enlarge the database to nBytes in size +** (rounded up to the next chunk-size). If the database is already +** nBytes or larger, this routine is a no-op. +*/ +static int fcntlSizeHint(unixFile *pFile, i64 nByte){ + if( pFile->szChunk>0 ){ + i64 nSize; /* Required file size */ + struct stat buf; /* Used to hold return values of fstat() */ + + if( osFstat(pFile->h, &buf) ){ + return SQLITE_IOERR_FSTAT; + } + + nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; + if( nSize>(i64)buf.st_size ){ + +#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE + /* The code below is handling the return value of osFallocate() + ** correctly. posix_fallocate() is defined to "returns zero on success, + ** or an error number on failure". See the manpage for details. */ + int err; + do{ + err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); + }while( err==EINTR ); + if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE; +#else + /* If the OS does not have posix_fallocate(), fake it. Write a + ** single byte to the last byte in each block that falls entirely + ** within the extended region. Then, if required, a single byte + ** at offset (nSize-1), to set the size of the file correctly. + ** This is a similar technique to that used by glibc on systems + ** that do not have a real fallocate() call. + */ + int nBlk = buf.st_blksize; /* File-system block size */ + int nWrite = 0; /* Number of bytes written by seekAndWrite */ + i64 iWrite; /* Next offset to write to */ + + iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; + assert( iWrite>=buf.st_size ); + assert( ((iWrite+1)%nBlk)==0 ); + for(/*no-op*/; iWrite=nSize ) iWrite = nSize - 1; + nWrite = seekAndWrite(pFile, iWrite, "", 1); + if( nWrite!=1 ) return SQLITE_IOERR_WRITE; + } +#endif + } + } + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ + int rc; + if( pFile->szChunk<=0 ){ + if( robust_ftruncate(pFile->h, nByte) ){ + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + } + } + + rc = unixMapfile(pFile, nByte); + return rc; + } +#endif + + return SQLITE_OK; +} + +/* +** If *pArg is initially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* Forward declaration */ +static int unixGetTempname(int nBuf, char *zBuf); +#ifndef SQLITE_OMIT_WAL + static int unixFcntlExternalReader(unixFile*, int*); +#endif + +/* +** Information and control of an open file handle. +*/ +static int unixFileControl(sqlite3_file *id, int op, void *pArg){ + unixFile *pFile = (unixFile*)id; + switch( op ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); + return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + + case SQLITE_FCNTL_LOCKSTATE: { + *(int*)pArg = pFile->eFileLock; + return SQLITE_OK; + } + case SQLITE_FCNTL_LAST_ERRNO: { + *(int*)pArg = pFile->lastErrno; + return SQLITE_OK; + } + case SQLITE_FCNTL_CHUNK_SIZE: { + pFile->szChunk = *(int *)pArg; + return SQLITE_OK; + } + case SQLITE_FCNTL_SIZE_HINT: { + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *)pArg); + SimulateIOErrorBenign(0); + return rc; + } + case SQLITE_FCNTL_PERSIST_WAL: { + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); + return SQLITE_OK; + } + case SQLITE_FCNTL_TEMPFILENAME: { + char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); + if( zTFile ){ + unixGetTempname(pFile->pVfs->mxPathname, zTFile); + *(char**)pArg = zTFile; + } + return SQLITE_OK; + } + case SQLITE_FCNTL_HAS_MOVED: { + *(int*)pArg = fileHasMoved(pFile); + return SQLITE_OK; + } +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + case SQLITE_FCNTL_LOCK_TIMEOUT: { + int iOld = pFile->iBusyTimeout; + pFile->iBusyTimeout = *(int*)pArg; + *(int*)pArg = iOld; + return SQLITE_OK; + } +#endif +#if SQLITE_MAX_MMAP_SIZE>0 + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + int rc = SQLITE_OK; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + + /* The value of newLimit may be eventually cast to (size_t) and passed + ** to mmap(). Restrict its value to 2GB if (size_t) is not at least a + ** 64-bit type. */ + if( newLimit>0 && sizeof(size_t)<8 ){ + newLimit = (newLimit & 0x7FFFFFFF); + } + + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ + pFile->mmapSizeMax = newLimit; + if( pFile->mmapSize>0 ){ + unixUnmapfile(pFile); + rc = unixMapfile(pFile, -1); + } + } + return rc; + } +#endif +#ifdef SQLITE_DEBUG + /* The pager calls this method to signal that it has done + ** a rollback and that the database is therefore unchanged and + ** it hence it is OK for the transaction change counter to be + ** unchanged. + */ + case SQLITE_FCNTL_DB_UNCHANGED: { + ((unixFile*)id)->dbUpdate = 0; + return SQLITE_OK; + } +#endif +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + case SQLITE_FCNTL_SET_LOCKPROXYFILE: + case SQLITE_FCNTL_GET_LOCKPROXYFILE: { + return proxyFileControl(id,op,pArg); + } +#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ + + case SQLITE_FCNTL_EXTERNAL_READER: { +#ifndef SQLITE_OMIT_WAL + return unixFcntlExternalReader((unixFile*)id, (int*)pArg); +#else + *(int*)pArg = 0; + return SQLITE_OK; +#endif + } + } + return SQLITE_NOTFOUND; +} + +/* +** If pFd->sectorSize is non-zero when this function is called, it is a +** no-op. Otherwise, the values of pFd->sectorSize and +** pFd->deviceCharacteristics are set according to the file-system +** characteristics. +** +** There are two versions of this function. One for QNX and one for all +** other systems. +*/ +#ifndef __QNXNTO__ +static void setDeviceCharacteristics(unixFile *pFd){ + assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); + if( pFd->sectorSize==0 ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int res; + u32 f = 0; + + /* Check for support for F2FS atomic batch writes. */ + res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); + if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ + pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + + /* Set the POWERSAFE_OVERWRITE flag if requested. */ + if( pFd->ctrlFlags & UNIXFILE_PSOW ){ + pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; + } + + pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } +} +#else +#include +#include +static void setDeviceCharacteristics(unixFile *pFile){ + if( pFile->sectorSize == 0 ){ + struct statvfs fsInfo; + + /* Set defaults for non-supported filesystems */ + pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + pFile->deviceCharacteristics = 0; + if( fstatvfs(pFile->h, &fsInfo) == -1 ) { + return; + } + + if( !strcmp(fsInfo.f_basetype, "tmp") ) { + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( strstr(fsInfo.f_basetype, "etfs") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* etfs cluster size writes are atomic */ + (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* full bitset of atomics from max sector size and smaller */ + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( strstr(fsInfo.f_basetype, "dos") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* full bitset of atomics from max sector size and smaller */ + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else{ + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + 0; + } + } + /* Last chance verification. If the sector size isn't a multiple of 512 + ** then it isn't valid.*/ + if( pFile->sectorSize % 512 != 0 ){ + pFile->deviceCharacteristics = 0; + pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } +} +#endif + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +static int unixSectorSize(sqlite3_file *id){ + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->sectorSize; +} + +/* +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is controversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. +*/ +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->deviceCharacteristics; +} + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + +/* +** Return the system page size. +** +** This function should not be called directly by other code in this file. +** Instead, it should be called via macro osGetpagesize(). +*/ +static int unixGetpagesize(void){ +#if OS_VXWORKS + return 1024; +#elif defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} + +#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ + +#ifndef SQLITE_OMIT_WAL + +/* +** Object used to represent an shared memory buffer. +** +** When multiple threads all reference the same wal-index, each thread +** has its own unixShm object, but they all point to a single instance +** of this unixShmNode object. In other words, each wal-index is opened +** only once per process. +** +** Each unixShmNode object is connected to a single unixInodeInfo object. +** We could coalesce this object into unixInodeInfo, but that would mean +** every open file that does not use shared memory (in other words, most +** open files) would have to carry around this extra information. So +** the unixInodeInfo object contains a pointer to this unixShmNode object +** and the unixShmNode object is created only when needed. +** +** unixMutexHeld() must be true when creating or destroying +** this object or while reading or writing the following fields: +** +** nRef +** +** The following fields are read-only after the object is created: +** +** hShm +** zFilename +** +** Either unixShmNode.pShmMutex must be held or unixShmNode.nRef==0 and +** unixMutexHeld() is true when reading or writing any other field +** in this structure. +*/ +struct unixShmNode { + unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ + sqlite3_mutex *pShmMutex; /* Mutex to access this object */ + char *zFilename; /* Name of the mmapped file */ + int hShm; /* Open file descriptor */ + int szRegion; /* Size of shared-memory regions */ + u16 nRegion; /* Size of array apRegion */ + u8 isReadonly; /* True if read-only */ + u8 isUnlocked; /* True if no DMS lock held */ + char **apRegion; /* Array of mapped shared-memory regions */ + int nRef; /* Number of unixShm objects pointing to this */ + unixShm *pFirst; /* All unixShm objects pointing to this */ + int aLock[SQLITE_SHM_NLOCK]; /* # shared locks on slot, -1==excl lock */ +#ifdef SQLITE_DEBUG + u8 exclMask; /* Mask of exclusive locks held */ + u8 sharedMask; /* Mask of shared locks held */ + u8 nextShmId; /* Next available unixShm.id value */ +#endif +}; + +/* +** Structure used internally by this VFS to record the state of an +** open shared memory connection. +** +** The following fields are initialized when this object is created and +** are read-only thereafter: +** +** unixShm.pShmNode +** unixShm.id +** +** All other fields are read/write. The unixShm.pShmNode->pShmMutex must +** be held while accessing any read/write fields. +*/ +struct unixShm { + unixShmNode *pShmNode; /* The underlying unixShmNode object */ + unixShm *pNext; /* Next unixShm with the same unixShmNode */ + u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ + u8 id; /* Id of this connection within its unixShmNode */ + u16 sharedMask; /* Mask of shared locks held */ + u16 exclMask; /* Mask of exclusive locks held */ +}; + +/* +** Constants used for locking +*/ +#define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ +#define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ + +/* +** Use F_GETLK to check whether or not there are any readers with open +** wal-mode transactions in other processes on database file pFile. If +** no error occurs, return SQLITE_OK and set (*piOut) to 1 if there are +** such transactions, or 0 otherwise. If an error occurs, return an +** SQLite error code. The final value of *piOut is undefined in this +** case. +*/ +static int unixFcntlExternalReader(unixFile *pFile, int *piOut){ + int rc = SQLITE_OK; + *piOut = 0; + if( pFile->pShm){ + unixShmNode *pShmNode = pFile->pShm->pShmNode; + struct flock f; + + memset(&f, 0, sizeof(f)); + f.l_type = F_WRLCK; + f.l_whence = SEEK_SET; + f.l_start = UNIX_SHM_BASE + 3; + f.l_len = SQLITE_SHM_NLOCK - 3; + + sqlite3_mutex_enter(pShmNode->pShmMutex); + if( osFcntl(pShmNode->hShm, F_GETLK, &f)<0 ){ + rc = SQLITE_IOERR_LOCK; + }else{ + *piOut = (f.l_type!=F_UNLCK); + } + sqlite3_mutex_leave(pShmNode->pShmMutex); + } + + return rc; +} + + +/* +** Apply posix advisory locks for all bytes from ofst through ofst+n-1. +** +** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking +** otherwise. +*/ +static int unixShmSystemLock( + unixFile *pFile, /* Open connection to the WAL file */ + int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ + int ofst, /* First byte of the locking range */ + int n /* Number of bytes to lock */ +){ + unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ + struct flock f; /* The posix advisory locking structure */ + int rc = SQLITE_OK; /* Result code form fcntl() */ + + /* Access to the unixShmNode object is serialized by the caller */ + pShmNode = pFile->pInode->pShmNode; + assert( pShmNode->nRef==0 || sqlite3_mutex_held(pShmNode->pShmMutex) ); + assert( pShmNode->nRef>0 || unixMutexHeld() ); + + /* Shared locks never span more than one byte */ + assert( n==1 || lockType!=F_RDLCK ); + + /* Locks are within range */ + assert( n>=1 && n<=SQLITE_SHM_NLOCK ); + + if( pShmNode->hShm>=0 ){ + int res; + /* Initialize the locking parameters */ + f.l_type = lockType; + f.l_whence = SEEK_SET; + f.l_start = ofst; + f.l_len = n; + res = osSetPosixAdvisoryLock(pShmNode->hShm, &f, pFile); + if( res==-1 ){ +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + rc = (pFile->iBusyTimeout ? SQLITE_BUSY_TIMEOUT : SQLITE_BUSY); +#else + rc = SQLITE_BUSY; +#endif + } + } + + /* Update the global lock state and do debug tracing */ +#ifdef SQLITE_DEBUG + { u16 mask; + OSTRACE(("SHM-LOCK ")); + mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<exclMask &= ~mask; + pShmNode->sharedMask &= ~mask; + }else if( lockType==F_RDLCK ){ + OSTRACE(("read-lock %d ok", ofst)); + pShmNode->exclMask &= ~mask; + pShmNode->sharedMask |= mask; + }else{ + assert( lockType==F_WRLCK ); + OSTRACE(("write-lock %d ok", ofst)); + pShmNode->exclMask |= mask; + pShmNode->sharedMask &= ~mask; + } + }else{ + if( lockType==F_UNLCK ){ + OSTRACE(("unlock %d failed", ofst)); + }else if( lockType==F_RDLCK ){ + OSTRACE(("read-lock failed")); + }else{ + assert( lockType==F_WRLCK ); + OSTRACE(("write-lock %d failed", ofst)); + } + } + OSTRACE((" - afterwards %03x,%03x\n", + pShmNode->sharedMask, pShmNode->exclMask)); + } +#endif + + return rc; +} + +/* +** Return the minimum number of 32KB shm regions that should be mapped at +** a time, assuming that each mapping must be an integer multiple of the +** current system page-size. +** +** Usually, this is 1. The exception seems to be systems that are configured +** to use 64KB pages - in this case each mapping must cover at least two +** shm regions. +*/ +static int unixShmRegionPerMap(void){ + int shmsz = 32*1024; /* SHM region size */ + int pgsz = osGetpagesize(); /* System page size */ + assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ + if( pgszpInode->pShmNode; + assert( unixMutexHeld() ); + if( p && ALWAYS(p->nRef==0) ){ + int nShmPerMap = unixShmRegionPerMap(); + int i; + assert( p->pInode==pFd->pInode ); + sqlite3_mutex_free(p->pShmMutex); + for(i=0; inRegion; i+=nShmPerMap){ + if( p->hShm>=0 ){ + osMunmap(p->apRegion[i], p->szRegion); + }else{ + sqlite3_free(p->apRegion[i]); + } + } + sqlite3_free(p->apRegion); + if( p->hShm>=0 ){ + robust_close(pFd, p->hShm, __LINE__); + p->hShm = -1; + } + p->pInode->pShmNode = 0; + sqlite3_free(p); + } +} + +/* +** The DMS lock has not yet been taken on shm file pShmNode. Attempt to +** take it now. Return SQLITE_OK if successful, or an SQLite error +** code otherwise. +** +** If the DMS cannot be locked because this is a readonly_shm=1 +** connection and no other process already holds a lock, return +** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. +*/ +static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ + struct flock lock; + int rc = SQLITE_OK; + + /* Use F_GETLK to determine the locks other processes are holding + ** on the DMS byte. If it indicates that another process is holding + ** a SHARED lock, then this process may also take a SHARED lock + ** and proceed with opening the *-shm file. + ** + ** Or, if no other process is holding any lock, then this process + ** is the first to open it. In this case take an EXCLUSIVE lock on the + ** DMS byte and truncate the *-shm file to zero bytes in size. Then + ** downgrade to a SHARED lock on the DMS byte. + ** + ** If another process is holding an EXCLUSIVE lock on the DMS byte, + ** return SQLITE_BUSY to the caller (it will try again). An earlier + ** version of this code attempted the SHARED lock at this point. But + ** this introduced a subtle race condition: if the process holding + ** EXCLUSIVE failed just before truncating the *-shm file, then this + ** process might open and use the *-shm file without truncating it. + ** And if the *-shm file has been corrupted by a power failure or + ** system crash, the database itself may also become corrupt. */ + lock.l_whence = SEEK_SET; + lock.l_start = UNIX_SHM_DMS; + lock.l_len = 1; + lock.l_type = F_WRLCK; + if( osFcntl(pShmNode->hShm, F_GETLK, &lock)!=0 ) { + rc = SQLITE_IOERR_LOCK; + }else if( lock.l_type==F_UNLCK ){ + if( pShmNode->isReadonly ){ + pShmNode->isUnlocked = 1; + rc = SQLITE_READONLY_CANTINIT; + }else{ + rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1); + /* The first connection to attach must truncate the -shm file. We + ** truncate to 3 bytes (an arbitrary small number, less than the + ** -shm header size) rather than 0 as a system debugging aid, to + ** help detect if a -shm file truncation is legitimate or is the work + ** or a rogue process. */ + if( rc==SQLITE_OK && robust_ftruncate(pShmNode->hShm, 3) ){ + rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate",pShmNode->zFilename); + } + } + }else if( lock.l_type==F_WRLCK ){ + rc = SQLITE_BUSY; + } + + if( rc==SQLITE_OK ){ + assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK ); + rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); + } + return rc; +} + +/* +** Open a shared-memory area associated with open database file pDbFd. +** This particular implementation uses mmapped files. +** +** The file used to implement shared-memory is in the same directory +** as the open database file and has the same name as the open database +** file with the "-shm" suffix added. For example, if the database file +** is "/home/user1/config.db" then the file that is created and mmapped +** for shared memory will be called "/home/user1/config.db-shm". +** +** Another approach to is to use files in /dev/shm or /dev/tmp or an +** some other tmpfs mount. But if a file in a different directory +** from the database file is used, then differing access permissions +** or a chroot() might cause two different processes on the same +** database to end up using different files for shared memory - +** meaning that their memory would not really be shared - resulting +** in database corruption. Nevertheless, this tmpfs file usage +** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" +** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time +** option results in an incompatible build of SQLite; builds of SQLite +** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the +** same database file at the same time, database corruption will likely +** result. The SQLITE_SHM_DIRECTORY compile-time option is considered +** "unsupported" and may go away in a future SQLite release. +** +** When opening a new shared-memory file, if no other instances of that +** file are currently open, in this process or in other processes, then +** the file must be truncated to zero length or have its header cleared. +** +** If the original database file (pDbFd) is using the "unix-excl" VFS +** that means that an exclusive lock is held on the database file and +** that no other processes are able to read or write the database. In +** that case, we do not really need shared memory. No shared memory +** file is created. The shared memory will be simulated with heap memory. +*/ +static int unixOpenSharedMemory(unixFile *pDbFd){ + struct unixShm *p = 0; /* The connection to be opened */ + struct unixShmNode *pShmNode; /* The underlying mmapped file */ + int rc = SQLITE_OK; /* Result code */ + unixInodeInfo *pInode; /* The inode of fd */ + char *zShm; /* Name of the file used for SHM */ + int nShmFilename; /* Size of the SHM filename in bytes */ + + /* Allocate space for the new unixShm object. */ + p = sqlite3_malloc64( sizeof(*p) ); + if( p==0 ) return SQLITE_NOMEM_BKPT; + memset(p, 0, sizeof(*p)); + assert( pDbFd->pShm==0 ); + + /* Check to see if a unixShmNode object already exists. Reuse an existing + ** one if present. Create a new one if necessary. + */ + assert( unixFileMutexNotheld(pDbFd) ); + unixEnterMutex(); + pInode = pDbFd->pInode; + pShmNode = pInode->pShmNode; + if( pShmNode==0 ){ + struct stat sStat; /* fstat() info for database file */ +#ifndef SQLITE_SHM_DIRECTORY + const char *zBasePath = pDbFd->zPath; +#endif + + /* Call fstat() to figure out the permissions on the database file. If + ** a new *-shm file is created, an attempt will be made to create it + ** with the same permissions. + */ + if( osFstat(pDbFd->h, &sStat) ){ + rc = SQLITE_IOERR_FSTAT; + goto shm_open_err; + } + +#ifdef SQLITE_SHM_DIRECTORY + nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; +#else + nShmFilename = 6 + (int)strlen(zBasePath); +#endif + pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); + if( pShmNode==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto shm_open_err; + } + memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); + zShm = pShmNode->zFilename = (char*)&pShmNode[1]; +#ifdef SQLITE_SHM_DIRECTORY + sqlite3_snprintf(nShmFilename, zShm, + SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", + (u32)sStat.st_ino, (u32)sStat.st_dev); +#else + sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath); + sqlite3FileSuffix3(pDbFd->zPath, zShm); +#endif + pShmNode->hShm = -1; + pDbFd->pInode->pShmNode = pShmNode; + pShmNode->pInode = pDbFd->pInode; + if( sqlite3GlobalConfig.bCoreMutex ){ + pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pShmNode->pShmMutex==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto shm_open_err; + } + } + + if( pInode->bProcessLock==0 ){ + if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ + pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT|O_NOFOLLOW, + (sStat.st_mode&0777)); + } + if( pShmNode->hShm<0 ){ + pShmNode->hShm = robust_open(zShm, O_RDONLY|O_NOFOLLOW, + (sStat.st_mode&0777)); + if( pShmNode->hShm<0 ){ + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm); + goto shm_open_err; + } + pShmNode->isReadonly = 1; + } + + /* If this process is running as root, make sure that the SHM file + ** is owned by the same user that owns the original database. Otherwise, + ** the original owner will not be able to connect. + */ + robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid); + + rc = unixLockSharedMemory(pDbFd, pShmNode); + if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; + } + } + + /* Make the new connection a child of the unixShmNode */ + p->pShmNode = pShmNode; +#ifdef SQLITE_DEBUG + p->id = pShmNode->nextShmId++; +#endif + pShmNode->nRef++; + pDbFd->pShm = p; + unixLeaveMutex(); + + /* The reference count on pShmNode has already been incremented under + ** the cover of the unixEnterMutex() mutex and the pointer from the + ** new (struct unixShm) object to the pShmNode has been set. All that is + ** left to do is to link the new object into the linked list starting + ** at pShmNode->pFirst. This must be done while holding the + ** pShmNode->pShmMutex. + */ + sqlite3_mutex_enter(pShmNode->pShmMutex); + p->pNext = pShmNode->pFirst; + pShmNode->pFirst = p; + sqlite3_mutex_leave(pShmNode->pShmMutex); + return rc; + + /* Jump here on any error */ +shm_open_err: + unixShmPurge(pDbFd); /* This call frees pShmNode if required */ + sqlite3_free(p); + unixLeaveMutex(); + return rc; +} + +/* +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the bExtend parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** bExtend is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int unixShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int bExtend, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + unixFile *pDbFd = (unixFile*)fd; + unixShm *p; + unixShmNode *pShmNode; + int rc = SQLITE_OK; + int nShmPerMap = unixShmRegionPerMap(); + int nReqRegion; + + /* If the shared-memory file has not yet been opened, open it now. */ + if( pDbFd->pShm==0 ){ + rc = unixOpenSharedMemory(pDbFd); + if( rc!=SQLITE_OK ) return rc; + } + + p = pDbFd->pShm; + pShmNode = p->pShmNode; + sqlite3_mutex_enter(pShmNode->pShmMutex); + if( pShmNode->isUnlocked ){ + rc = unixLockSharedMemory(pDbFd, pShmNode); + if( rc!=SQLITE_OK ) goto shmpage_out; + pShmNode->isUnlocked = 0; + } + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + assert( pShmNode->pInode==pDbFd->pInode ); + assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); + assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); + + /* Minimum number of regions required to be mapped. */ + nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; + + if( pShmNode->nRegionszRegion = szRegion; + + if( pShmNode->hShm>=0 ){ + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + if( osFstat(pShmNode->hShm, &sStat) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + + if( sStat.st_sizehShm, iPg*pgsz + pgsz-1,"",1,&x)!=1 ){ + const char *zFile = pShmNode->zFilename; + rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); + goto shmpage_out; + } + } + } + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (char **)sqlite3_realloc( + pShmNode->apRegion, nReqRegion*sizeof(char *) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM_BKPT; + goto shmpage_out; + } + pShmNode->apRegion = apNew; + while( pShmNode->nRegionhShm>=0 ){ + pMem = osMmap(0, nMap, + pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, + MAP_SHARED, pShmNode->hShm, szRegion*(i64)pShmNode->nRegion + ); + if( pMem==MAP_FAILED ){ + rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); + goto shmpage_out; + } + }else{ + pMem = sqlite3_malloc64(nMap); + if( pMem==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto shmpage_out; + } + memset(pMem, 0, nMap); + } + + for(i=0; iapRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; + } + pShmNode->nRegion += nShmPerMap; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + *pp = pShmNode->apRegion[iRegion]; + }else{ + *pp = 0; + } + if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; + sqlite3_mutex_leave(pShmNode->pShmMutex); + return rc; +} + +/* +** Check that the pShmNode->aLock[] array comports with the locking bitmasks +** held by each client. Return true if it does, or false otherwise. This +** is to be used in an assert(). e.g. +** +** assert( assertLockingArrayOk(pShmNode) ); +*/ +#ifdef SQLITE_DEBUG +static int assertLockingArrayOk(unixShmNode *pShmNode){ + unixShm *pX; + int aLock[SQLITE_SHM_NLOCK]; + assert( sqlite3_mutex_held(pShmNode->pShmMutex) ); + + memset(aLock, 0, sizeof(aLock)); + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + int i; + for(i=0; iexclMask & (1<sharedMask & (1<=0 ); + aLock[i]++; + } + } + } + + assert( 0==memcmp(pShmNode->aLock, aLock, sizeof(aLock)) ); + return (memcmp(pShmNode->aLock, aLock, sizeof(aLock))==0); +} +#endif + +/* +** Change the lock state for a shared-memory segment. +** +** Note that the relationship between SHAREd and EXCLUSIVE locks is a little +** different here than in posix. In xShmLock(), one can go from unlocked +** to shared and back or from unlocked to exclusive and back. But one may +** not go from shared to exclusive or from exclusive to shared. +*/ +static int unixShmLock( + sqlite3_file *fd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ +){ + unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ + unixShm *p; /* The shared memory being locked */ + unixShmNode *pShmNode; /* The underlying file iNode */ + int rc = SQLITE_OK; /* Result code */ + u16 mask; /* Mask of locks to take or release */ + int *aLock; + + p = pDbFd->pShm; + if( p==0 ) return SQLITE_IOERR_SHMLOCK; + pShmNode = p->pShmNode; + if( NEVER(pShmNode==0) ) return SQLITE_IOERR_SHMLOCK; + aLock = pShmNode->aLock; + + assert( pShmNode==pDbFd->pInode->pShmNode ); + assert( pShmNode->pInode==pDbFd->pInode ); + assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); + assert( n>=1 ); + assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); + assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); + assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); + assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); + + /* Check that, if this to be a blocking lock, no locks that occur later + ** in the following list than the lock being obtained are already held: + ** + ** 1. Checkpointer lock (ofst==1). + ** 2. Write lock (ofst==0). + ** 3. Read locks (ofst>=3 && ofstiBusyTimeout==0 || ( + (ofst!=2) /* not RECOVER */ + && (ofst!=1 || (p->exclMask|p->sharedMask)==0) + && (ofst!=0 || (p->exclMask|p->sharedMask)<3) + && (ofst<3 || (p->exclMask|p->sharedMask)<(1<1 || mask==(1<pShmMutex); + assert( assertLockingArrayOk(pShmNode) ); + if( flags & SQLITE_SHM_UNLOCK ){ + if( (p->exclMask|p->sharedMask) & mask ){ + int ii; + int bUnlock = 1; + + for(ii=ofst; ii((p->sharedMask & (1<sharedMask & (1<1 ); + aLock[ofst]--; + } + + /* Undo the local locks */ + if( rc==SQLITE_OK ){ + p->exclMask &= ~mask; + p->sharedMask &= ~mask; + } + } + }else if( flags & SQLITE_SHM_SHARED ){ + assert( n==1 ); + assert( (p->exclMask & (1<sharedMask & mask)==0 ){ + if( aLock[ofst]<0 ){ + rc = SQLITE_BUSY; + }else if( aLock[ofst]==0 ){ + rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); + } + + /* Get the local shared locks */ + if( rc==SQLITE_OK ){ + p->sharedMask |= mask; + aLock[ofst]++; + } + } + }else{ + /* Make sure no sibling connections hold locks that will block this + ** lock. If any do, return SQLITE_BUSY right away. */ + int ii; + for(ii=ofst; iisharedMask & mask)==0 ); + if( ALWAYS((p->exclMask & (1<sharedMask & mask)==0 ); + p->exclMask |= mask; + for(ii=ofst; iipShmMutex); + OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", + p->id, osGetpid(0), p->sharedMask, p->exclMask)); + return rc; +} + +/* +** Implement a memory barrier or memory fence on shared memory. +** +** All loads and stores begun before the barrier must complete before +** any load or store begun after the barrier. +*/ +static void unixShmBarrier( + sqlite3_file *fd /* Database file holding the shared memory */ +){ + UNUSED_PARAMETER(fd); + sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ + assert( fd->pMethods->xLock==nolockLock + || unixFileMutexNotheld((unixFile*)fd) + ); + unixEnterMutex(); /* Also mutex, for redundancy */ + unixLeaveMutex(); +} + +/* +** Close a connection to shared-memory. Delete the underlying +** storage if deleteFlag is true. +** +** If there is no shared memory associated with the connection then this +** routine is a harmless no-op. +*/ +static int unixShmUnmap( + sqlite3_file *fd, /* The underlying database file */ + int deleteFlag /* Delete shared-memory if true */ +){ + unixShm *p; /* The connection to be closed */ + unixShmNode *pShmNode; /* The underlying shared-memory file */ + unixShm **pp; /* For looping over sibling connections */ + unixFile *pDbFd; /* The underlying database file */ + + pDbFd = (unixFile*)fd; + p = pDbFd->pShm; + if( p==0 ) return SQLITE_OK; + pShmNode = p->pShmNode; + + assert( pShmNode==pDbFd->pInode->pShmNode ); + assert( pShmNode->pInode==pDbFd->pInode ); + + /* Remove connection p from the set of connections associated + ** with pShmNode */ + sqlite3_mutex_enter(pShmNode->pShmMutex); + for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} + *pp = p->pNext; + + /* Free the connection p */ + sqlite3_free(p); + pDbFd->pShm = 0; + sqlite3_mutex_leave(pShmNode->pShmMutex); + + /* If pShmNode->nRef has reached 0, then close the underlying + ** shared-memory file, too */ + assert( unixFileMutexNotheld(pDbFd) ); + unixEnterMutex(); + assert( pShmNode->nRef>0 ); + pShmNode->nRef--; + if( pShmNode->nRef==0 ){ + if( deleteFlag && pShmNode->hShm>=0 ){ + osUnlink(pShmNode->zFilename); + } + unixShmPurge(pDbFd); + } + unixLeaveMutex(); + + return SQLITE_OK; +} + + +#else +# define unixShmMap 0 +# define unixShmLock 0 +# define unixShmBarrier 0 +# define unixShmUnmap 0 +#endif /* #ifndef SQLITE_OMIT_WAL */ + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** If it is currently memory mapped, unmap file pFd. +*/ +static void unixUnmapfile(unixFile *pFd){ + assert( pFd->nFetchOut==0 ); + if( pFd->pMapRegion ){ + osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); + pFd->pMapRegion = 0; + pFd->mmapSize = 0; + pFd->mmapSizeActual = 0; + } +} + +/* +** Attempt to set the size of the memory mapping maintained by file +** descriptor pFd to nNew bytes. Any existing mapping is discarded. +** +** If successful, this function sets the following variables: +** +** unixFile.pMapRegion +** unixFile.mmapSize +** unixFile.mmapSizeActual +** +** If unsuccessful, an error message is logged via sqlite3_log() and +** the three variables above are zeroed. In this case SQLite should +** continue accessing the database using the xRead() and xWrite() +** methods. +*/ +static void unixRemapfile( + unixFile *pFd, /* File descriptor object */ + i64 nNew /* Required mapping size */ +){ + const char *zErr = "mmap"; + int h = pFd->h; /* File descriptor open on db file */ + u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ + i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ + u8 *pNew = 0; /* Location of new mapping */ + int flags = PROT_READ; /* Flags to pass to mmap() */ + + assert( pFd->nFetchOut==0 ); + assert( nNew>pFd->mmapSize ); + assert( nNew<=pFd->mmapSizeMax ); + assert( nNew>0 ); + assert( pFd->mmapSizeActual>=pFd->mmapSize ); + assert( MAP_FAILED!=0 ); + +#ifdef SQLITE_MMAP_READWRITE + if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; +#endif + + if( pOrig ){ +#if HAVE_MREMAP + i64 nReuse = pFd->mmapSize; +#else + const int szSyspage = osGetpagesize(); + i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); +#endif + u8 *pReq = &pOrig[nReuse]; + + /* Unmap any pages of the existing mapping that cannot be reused. */ + if( nReuse!=nOrig ){ + osMunmap(pReq, nOrig-nReuse); + } + +#if HAVE_MREMAP + pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); + zErr = "mremap"; +#else + pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); + if( pNew!=MAP_FAILED ){ + if( pNew!=pReq ){ + osMunmap(pNew, nNew - nReuse); + pNew = 0; + }else{ + pNew = pOrig; + } + } +#endif + + /* The attempt to extend the existing mapping failed. Free it. */ + if( pNew==MAP_FAILED || pNew==0 ){ + osMunmap(pOrig, nReuse); + } + } + + /* If pNew is still NULL, try to create an entirely new mapping. */ + if( pNew==0 ){ + pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); + } + + if( pNew==MAP_FAILED ){ + pNew = 0; + nNew = 0; + unixLogError(SQLITE_OK, zErr, pFd->zPath); + + /* If the mmap() above failed, assume that all subsequent mmap() calls + ** will probably fail too. Fall back to using xRead/xWrite exclusively + ** in this case. */ + pFd->mmapSizeMax = 0; + } + pFd->pMapRegion = (void *)pNew; + pFd->mmapSize = pFd->mmapSizeActual = nNew; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int unixMapfile(unixFile *pFd, i64 nMap){ + assert( nMap>=0 || pFd->nFetchOut==0 ); + assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + struct stat statbuf; /* Low-level file information */ + if( osFstat(pFd->h, &statbuf) ){ + return SQLITE_IOERR_FSTAT; + } + nMap = statbuf.st_size; + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + + assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); + if( nMap!=pFd->mmapSize ){ + unixRemapfile(pFd, nMap); + } + + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling unixUnfetch(). +*/ +static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ +#endif + *pp = 0; + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = unixMapfile(pFd, -1); + if( rc!=SQLITE_OK ) return rc; + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to unixFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the unixFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ + UNUSED_PARAMETER(iOff); + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + if( p ){ + pFd->nFetchOut--; + }else{ + unixUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); +#else + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(p); + UNUSED_PARAMETER(iOff); +#endif + return SQLITE_OK; +} + +/* +** Here ends the implementation of all sqlite3_file methods. +** +********************** End sqlite3_file Methods ******************************* +******************************************************************************/ + +/* +** This division contains definitions of sqlite3_io_methods objects that +** implement various file locking strategies. It also contains definitions +** of "finder" functions. A finder-function is used to locate the appropriate +** sqlite3_io_methods object for a particular database file. The pAppData +** field of the sqlite3_vfs VFS objects are initialized to be pointers to +** the correct finder-function for that VFS. +** +** Most finder functions return a pointer to a fixed sqlite3_io_methods +** object. The only interesting finder-function is autolockIoFinder, which +** looks at the filesystem type and tries to guess the best locking +** strategy from that. +** +** For finder-function F, two objects are created: +** +** (1) The real finder-function named "FImpt()". +** +** (2) A constant pointer to this function named just "F". +** +** +** A pointer to the F pointer is used as the pAppData value for VFS +** objects. We have to do this instead of letting pAppData point +** directly at the finder-function since C90 rules prevent a void* +** from be cast into a function pointer. +** +** +** Each instance of this macro generates two objects: +** +** * A constant sqlite3_io_methods object call METHOD that has locking +** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. +** +** * An I/O method finder function called FINDER that returns a pointer +** to the METHOD object in the previous bullet. +*/ +#define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ +static const sqlite3_io_methods METHOD = { \ + VERSION, /* iVersion */ \ + CLOSE, /* xClose */ \ + unixRead, /* xRead */ \ + unixWrite, /* xWrite */ \ + unixTruncate, /* xTruncate */ \ + unixSync, /* xSync */ \ + unixFileSize, /* xFileSize */ \ + LOCK, /* xLock */ \ + UNLOCK, /* xUnlock */ \ + CKLOCK, /* xCheckReservedLock */ \ + unixFileControl, /* xFileControl */ \ + unixSectorSize, /* xSectorSize */ \ + unixDeviceCharacteristics, /* xDeviceCapabilities */ \ + SHMMAP, /* xShmMap */ \ + unixShmLock, /* xShmLock */ \ + unixShmBarrier, /* xShmBarrier */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixFetch, /* xFetch */ \ + unixUnfetch, /* xUnfetch */ \ +}; \ +static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ + UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ + return &METHOD; \ +} \ +static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ + = FINDER##Impl; + +/* +** Here are all of the sqlite3_io_methods objects for each of the +** locking strategies. Functions that return pointers to these methods +** are also created. +*/ +IOMETHODS( + posixIoFinder, /* Finder function name */ + posixIoMethods, /* sqlite3_io_methods object name */ + 3, /* shared memory and mmap are enabled */ + unixClose, /* xClose method */ + unixLock, /* xLock method */ + unixUnlock, /* xUnlock method */ + unixCheckReservedLock, /* xCheckReservedLock method */ + unixShmMap /* xShmMap method */ +) +IOMETHODS( + nolockIoFinder, /* Finder function name */ + nolockIoMethods, /* sqlite3_io_methods object name */ + 3, /* shared memory and mmap are enabled */ + nolockClose, /* xClose method */ + nolockLock, /* xLock method */ + nolockUnlock, /* xUnlock method */ + nolockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +IOMETHODS( + dotlockIoFinder, /* Finder function name */ + dotlockIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + dotlockClose, /* xClose method */ + dotlockLock, /* xLock method */ + dotlockUnlock, /* xUnlock method */ + dotlockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) + +#if SQLITE_ENABLE_LOCKING_STYLE +IOMETHODS( + flockIoFinder, /* Finder function name */ + flockIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + flockClose, /* xClose method */ + flockLock, /* xLock method */ + flockUnlock, /* xUnlock method */ + flockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +#if OS_VXWORKS +IOMETHODS( + semIoFinder, /* Finder function name */ + semIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + semXClose, /* xClose method */ + semXLock, /* xLock method */ + semXUnlock, /* xUnlock method */ + semXCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +IOMETHODS( + afpIoFinder, /* Finder function name */ + afpIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + afpClose, /* xClose method */ + afpLock, /* xLock method */ + afpUnlock, /* xUnlock method */ + afpCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +/* +** The proxy locking method is a "super-method" in the sense that it +** opens secondary file descriptors for the conch and lock files and +** it uses proxy, dot-file, AFP, and flock() locking methods on those +** secondary files. For this reason, the division that implements +** proxy locking is located much further down in the file. But we need +** to go ahead and define the sqlite3_io_methods and finder function +** for proxy locking here. So we forward declare the I/O methods. +*/ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +static int proxyClose(sqlite3_file*); +static int proxyLock(sqlite3_file*, int); +static int proxyUnlock(sqlite3_file*, int); +static int proxyCheckReservedLock(sqlite3_file*, int*); +IOMETHODS( + proxyIoFinder, /* Finder function name */ + proxyIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + proxyClose, /* xClose method */ + proxyLock, /* xLock method */ + proxyUnlock, /* xUnlock method */ + proxyCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +IOMETHODS( + nfsIoFinder, /* Finder function name */ + nfsIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + unixClose, /* xClose method */ + unixLock, /* xLock method */ + nfsUnlock, /* xUnlock method */ + unixCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +/* +** This "finder" function attempts to determine the best locking strategy +** for the database file "filePath". It then returns the sqlite3_io_methods +** object that implements that strategy. +** +** This is for MacOSX only. +*/ +static const sqlite3_io_methods *autolockIoFinderImpl( + const char *filePath, /* name of the database file */ + unixFile *pNew /* open file object for the database file */ +){ + static const struct Mapping { + const char *zFilesystem; /* Filesystem type name */ + const sqlite3_io_methods *pMethods; /* Appropriate locking method */ + } aMap[] = { + { "hfs", &posixIoMethods }, + { "ufs", &posixIoMethods }, + { "afpfs", &afpIoMethods }, + { "smbfs", &afpIoMethods }, + { "webdav", &nolockIoMethods }, + { 0, 0 } + }; + int i; + struct statfs fsInfo; + struct flock lockInfo; + + if( !filePath ){ + /* If filePath==NULL that means we are dealing with a transient file + ** that does not need to be locked. */ + return &nolockIoMethods; + } + if( statfs(filePath, &fsInfo) != -1 ){ + if( fsInfo.f_flags & MNT_RDONLY ){ + return &nolockIoMethods; + } + for(i=0; aMap[i].zFilesystem; i++){ + if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ + return aMap[i].pMethods; + } + } + } + + /* Default case. Handles, amongst others, "nfs". + ** Test byte-range lock using fcntl(). If the call succeeds, + ** assume that the file-system supports POSIX style locks. + */ + lockInfo.l_len = 1; + lockInfo.l_start = 0; + lockInfo.l_whence = SEEK_SET; + lockInfo.l_type = F_RDLCK; + if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { + if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ + return &nfsIoMethods; + } else { + return &posixIoMethods; + } + }else{ + return &dotlockIoMethods; + } +} +static const sqlite3_io_methods + *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ + +#if OS_VXWORKS +/* +** This "finder" function for VxWorks checks to see if posix advisory +** locking works. If it does, then that is what is used. If it does not +** work, then fallback to named semaphore locking. +*/ +static const sqlite3_io_methods *vxworksIoFinderImpl( + const char *filePath, /* name of the database file */ + unixFile *pNew /* the open file object */ +){ + struct flock lockInfo; + + if( !filePath ){ + /* If filePath==NULL that means we are dealing with a transient file + ** that does not need to be locked. */ + return &nolockIoMethods; + } + + /* Test if fcntl() is supported and use POSIX style locks. + ** Otherwise fall back to the named semaphore method. + */ + lockInfo.l_len = 1; + lockInfo.l_start = 0; + lockInfo.l_whence = SEEK_SET; + lockInfo.l_type = F_RDLCK; + if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { + return &posixIoMethods; + }else{ + return &semIoMethods; + } +} +static const sqlite3_io_methods + *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; + +#endif /* OS_VXWORKS */ + +/* +** An abstract type for a pointer to an IO method finder function: +*/ +typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); + + +/**************************************************************************** +**************************** sqlite3_vfs methods **************************** +** +** This division contains the implementation of methods on the +** sqlite3_vfs object. +*/ + +/* +** Initialize the contents of the unixFile structure pointed to by pId. +*/ +static int fillInUnixFile( + sqlite3_vfs *pVfs, /* Pointer to vfs object */ + int h, /* Open file descriptor of file being opened */ + sqlite3_file *pId, /* Write to the unixFile structure here */ + const char *zFilename, /* Name of the file being opened */ + int ctrlFlags /* Zero or more UNIXFILE_* values */ +){ + const sqlite3_io_methods *pLockingStyle; + unixFile *pNew = (unixFile *)pId; + int rc = SQLITE_OK; + + assert( pNew->pInode==NULL ); + + /* No locking occurs in temporary files */ + assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); + + OSTRACE(("OPEN %-3d %s\n", h, zFilename)); + pNew->h = h; + pNew->pVfs = pVfs; + pNew->zPath = zFilename; + pNew->ctrlFlags = (u8)ctrlFlags; +#if SQLITE_MAX_MMAP_SIZE>0 + pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif + if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), + "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; + } + if( strcmp(pVfs->zName,"unix-excl")==0 ){ + pNew->ctrlFlags |= UNIXFILE_EXCL; + } + +#if OS_VXWORKS + pNew->pId = vxworksFindFileId(zFilename); + if( pNew->pId==0 ){ + ctrlFlags |= UNIXFILE_NOLOCK; + rc = SQLITE_NOMEM_BKPT; + } +#endif + + if( ctrlFlags & UNIXFILE_NOLOCK ){ + pLockingStyle = &nolockIoMethods; + }else{ + pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); +#if SQLITE_ENABLE_LOCKING_STYLE + /* Cache zFilename in the locking context (AFP and dotlock override) for + ** proxyLock activation is possible (remote proxy is based on db name) + ** zFilename remains valid until file is closed, to support */ + pNew->lockingContext = (void*)zFilename; +#endif + } + + if( pLockingStyle == &posixIoMethods +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + || pLockingStyle == &nfsIoMethods +#endif + ){ + unixEnterMutex(); + rc = findInodeInfo(pNew, &pNew->pInode); + if( rc!=SQLITE_OK ){ + /* If an error occurred in findInodeInfo(), close the file descriptor + ** immediately, before releasing the mutex. findInodeInfo() may fail + ** in two scenarios: + ** + ** (a) A call to fstat() failed. + ** (b) A malloc failed. + ** + ** Scenario (b) may only occur if the process is holding no other + ** file descriptors open on the same file. If there were other file + ** descriptors on this file, then no malloc would be required by + ** findInodeInfo(). If this is the case, it is quite safe to close + ** handle h - as it is guaranteed that no posix locks will be released + ** by doing so. + ** + ** If scenario (a) caused the error then things are not so safe. The + ** implicit assumption here is that if fstat() fails, things are in + ** such bad shape that dropping a lock or two doesn't matter much. + */ + robust_close(pNew, h, __LINE__); + h = -1; + } + unixLeaveMutex(); + } + +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + else if( pLockingStyle == &afpIoMethods ){ + /* AFP locking uses the file path so it needs to be included in + ** the afpLockingContext. + */ + afpLockingContext *pCtx; + pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); + if( pCtx==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + /* NB: zFilename exists and remains valid until the file is closed + ** according to requirement F11141. So we do not need to make a + ** copy of the filename. */ + pCtx->dbPath = zFilename; + pCtx->reserved = 0; + srandomdev(); + unixEnterMutex(); + rc = findInodeInfo(pNew, &pNew->pInode); + if( rc!=SQLITE_OK ){ + sqlite3_free(pNew->lockingContext); + robust_close(pNew, h, __LINE__); + h = -1; + } + unixLeaveMutex(); + } + } +#endif + + else if( pLockingStyle == &dotlockIoMethods ){ + /* Dotfile locking uses the file path so it needs to be included in + ** the dotlockLockingContext + */ + char *zLockFile; + int nFilename; + assert( zFilename!=0 ); + nFilename = (int)strlen(zFilename) + 6; + zLockFile = (char *)sqlite3_malloc64(nFilename); + if( zLockFile==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); + } + pNew->lockingContext = zLockFile; + } + +#if OS_VXWORKS + else if( pLockingStyle == &semIoMethods ){ + /* Named semaphore locking uses the file path so it needs to be + ** included in the semLockingContext + */ + unixEnterMutex(); + rc = findInodeInfo(pNew, &pNew->pInode); + if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ + char *zSemName = pNew->pInode->aSemName; + int n; + sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", + pNew->pId->zCanonicalName); + for( n=1; zSemName[n]; n++ ) + if( zSemName[n]=='/' ) zSemName[n] = '_'; + pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); + if( pNew->pInode->pSem == SEM_FAILED ){ + rc = SQLITE_NOMEM_BKPT; + pNew->pInode->aSemName[0] = '\0'; + } + } + unixLeaveMutex(); + } +#endif + + storeLastErrno(pNew, 0); +#if OS_VXWORKS + if( rc!=SQLITE_OK ){ + if( h>=0 ) robust_close(pNew, h, __LINE__); + h = -1; + osUnlink(zFilename); + pNew->ctrlFlags |= UNIXFILE_DELETE; + } +#endif + if( rc!=SQLITE_OK ){ + if( h>=0 ) robust_close(pNew, h, __LINE__); + }else{ + pId->pMethods = pLockingStyle; + OpenCounter(+1); + verifyDbFile(pNew); + } + return rc; +} + +/* +** Directories to consider for temp files. +*/ +static const char *azTempDirs[] = { + 0, + 0, + "/var/tmp", + "/usr/tmp", + "/tmp", + "." +}; + +/* +** Initialize first two members of azTempDirs[] array. +*/ +static void unixTempFileInit(void){ + azTempDirs[0] = getenv("SQLITE_TMPDIR"); + azTempDirs[1] = getenv("TMPDIR"); +} + +/* +** Return the name of a directory in which to put temporary files. +** If no suitable temporary file directory can be found, return NULL. +*/ +static const char *unixTempFileDir(void){ + unsigned int i = 0; + struct stat buf; + const char *zDir = sqlite3_temp_directory; + + while(1){ + if( zDir!=0 + && osStat(zDir, &buf)==0 + && S_ISDIR(buf.st_mode) + && osAccess(zDir, 03)==0 + ){ + return zDir; + } + if( i>=sizeof(azTempDirs)/sizeof(azTempDirs[0]) ) break; + zDir = azTempDirs[i++]; + } + return 0; +} + +/* +** Create a temporary file name in zBuf. zBuf must be allocated +** by the calling process and must be big enough to hold at least +** pVfs->mxPathname bytes. +*/ +static int unixGetTempname(int nBuf, char *zBuf){ + const char *zDir; + int iLimit = 0; + + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. + */ + zBuf[0] = 0; + SimulateIOError( return SQLITE_IOERR ); + + zDir = unixTempFileDir(); + if( zDir==0 ) return SQLITE_IOERR_GETTEMPPATH; + do{ + u64 r; + sqlite3_randomness(sizeof(r), &r); + assert( nBuf>2 ); + zBuf[nBuf-2] = 0; + sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c", + zDir, r, 0); + if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR; + }while( osAccess(zBuf,0)==0 ); + return SQLITE_OK; +} + +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) +/* +** Routine to transform a unixFile into a proxy-locking unixFile. +** Implementation in the proxy-lock division, but used by unixOpen() +** if SQLITE_PREFER_PROXY_LOCKING is defined. +*/ +static int proxyTransformUnixFile(unixFile*, const char*); +#endif + +/* +** Search for an unused file descriptor that was opened on the database +** file (not a journal or super-journal file) identified by pathname +** zPath with SQLITE_OPEN_XXX flags matching those passed as the second +** argument to this function. +** +** Such a file descriptor may exist if a database connection was closed +** but the associated file descriptor could not be closed because some +** other file descriptor open on the same file is holding a file-lock. +** Refer to comments in the unixClose() function and the lengthy comment +** describing "Posix Advisory Locking" at the start of this file for +** further details. Also, ticket #4018. +** +** If a suitable file descriptor is found, then it is returned. If no +** such file descriptor is located, -1 is returned. +*/ +static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ + UnixUnusedFd *pUnused = 0; + + /* Do not search for an unused file descriptor on vxworks. Not because + ** vxworks would not benefit from the change (it might, we're not sure), + ** but because no way to test it is currently available. It is better + ** not to risk breaking vxworks support for the sake of such an obscure + ** feature. */ +#if !OS_VXWORKS + struct stat sStat; /* Results of stat() call */ + + unixEnterMutex(); + + /* A stat() call may fail for various reasons. If this happens, it is + ** almost certain that an open() call on the same path will also fail. + ** For this reason, if an error occurs in the stat() call here, it is + ** ignored and -1 is returned. The caller will try to open a new file + ** descriptor on the same path, fail, and return an error to SQLite. + ** + ** Even if a subsequent open() call does succeed, the consequences of + ** not searching for a reusable file descriptor are not dire. */ + if( inodeList!=0 && 0==osStat(zPath, &sStat) ){ + unixInodeInfo *pInode; + + pInode = inodeList; + while( pInode && (pInode->fileId.dev!=sStat.st_dev + || pInode->fileId.ino!=(u64)sStat.st_ino) ){ + pInode = pInode->pNext; + } + if( pInode ){ + UnixUnusedFd **pp; + assert( sqlite3_mutex_notheld(pInode->pLockMutex) ); + sqlite3_mutex_enter(pInode->pLockMutex); + flags &= (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); + for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); + pUnused = *pp; + if( pUnused ){ + *pp = pUnused->pNext; + } + sqlite3_mutex_leave(pInode->pLockMutex); + } + } + unixLeaveMutex(); +#endif /* if !OS_VXWORKS */ + return pUnused; +} + +/* +** Find the mode, uid and gid of file zFile. +*/ +static int getFileMode( + const char *zFile, /* File name */ + mode_t *pMode, /* OUT: Permissions of zFile */ + uid_t *pUid, /* OUT: uid of zFile. */ + gid_t *pGid /* OUT: gid of zFile. */ +){ + struct stat sStat; /* Output of stat() on database file */ + int rc = SQLITE_OK; + if( 0==osStat(zFile, &sStat) ){ + *pMode = sStat.st_mode & 0777; + *pUid = sStat.st_uid; + *pGid = sStat.st_gid; + }else{ + rc = SQLITE_IOERR_FSTAT; + } + return rc; +} + +/* +** This function is called by unixOpen() to determine the unix permissions +** to create new files with. If no error occurs, then SQLITE_OK is returned +** and a value suitable for passing as the third argument to open(2) is +** written to *pMode. If an IO error occurs, an SQLite error code is +** returned and the value of *pMode is not modified. +** +** In most cases, this routine sets *pMode to 0, which will become +** an indication to robust_open() to create the file using +** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. +** But if the file being opened is a WAL or regular journal file, then +** this function queries the file-system for the permissions on the +** corresponding database file and sets *pMode to this value. Whenever +** possible, WAL and journal files are created using the same permissions +** as the associated database file. +** +** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the +** original filename is unavailable. But 8_3_NAMES is only used for +** FAT filesystems and permissions do not matter there, so just use +** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero. +*/ +static int findCreateFileMode( + const char *zPath, /* Path of file (possibly) being created */ + int flags, /* Flags passed as 4th argument to xOpen() */ + mode_t *pMode, /* OUT: Permissions to open file with */ + uid_t *pUid, /* OUT: uid to set on the file */ + gid_t *pGid /* OUT: gid to set on the file */ +){ + int rc = SQLITE_OK; /* Return Code */ + *pMode = 0; + *pUid = 0; + *pGid = 0; + if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ + char zDb[MAX_PATHNAME+1]; /* Database file path */ + int nDb; /* Number of valid bytes in zDb */ + + /* zPath is a path to a WAL or journal file. The following block derives + ** the path to the associated database file from zPath. This block handles + ** the following naming conventions: + ** + ** "-journal" + ** "-wal" + ** "-journalNN" + ** "-walNN" + ** + ** where NN is a decimal number. The NN naming schemes are + ** used by the test_multiplex.c module. + */ + nDb = sqlite3Strlen30(zPath) - 1; + while( zPath[nDb]!='-' ){ + /* In normal operation, the journal file name will always contain + ** a '-' character. However in 8+3 filename mode, or if a corrupt + ** rollback journal specifies a super-journal with a goofy name, then + ** the '-' might be missing. */ + if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK; + nDb--; + } + memcpy(zDb, zPath, nDb); + zDb[nDb] = '\0'; + + rc = getFileMode(zDb, pMode, pUid, pGid); + }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ + *pMode = 0600; + }else if( flags & SQLITE_OPEN_URI ){ + /* If this is a main database file and the file was opened using a URI + ** filename, check for the "modeof" parameter. If present, interpret + ** its value as a filename and try to copy the mode, uid and gid from + ** that file. */ + const char *z = sqlite3_uri_parameter(zPath, "modeof"); + if( z ){ + rc = getFileMode(z, pMode, pUid, pGid); + } + } + return rc; +} + +/* +** Open the file zPath. +** +** Previously, the SQLite OS layer used three functions in place of this +** one: +** +** sqlite3OsOpenReadWrite(); +** sqlite3OsOpenReadOnly(); +** sqlite3OsOpenExclusive(); +** +** These calls correspond to the following combinations of flags: +** +** ReadWrite() -> (READWRITE | CREATE) +** ReadOnly() -> (READONLY) +** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) +** +** The old OpenExclusive() accepted a boolean argument - "delFlag". If +** true, the file was configured to be automatically deleted when the +** file handle closed. To achieve the same effect using this new +** interface, add the DELETEONCLOSE flag to those specified above for +** OpenExclusive(). +*/ +static int unixOpen( + sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ + const char *zPath, /* Pathname of file to be opened */ + sqlite3_file *pFile, /* The file descriptor to be filled in */ + int flags, /* Input flags to control the opening */ + int *pOutFlags /* Output flags returned to SQLite core */ +){ + unixFile *p = (unixFile *)pFile; + int fd = -1; /* File descriptor returned by open() */ + int openFlags = 0; /* Flags to pass to open() */ + int eType = flags&0x0FFF00; /* Type of file to open */ + int noLock; /* True to omit locking primitives */ + int rc = SQLITE_OK; /* Function Return Code */ + int ctrlFlags = 0; /* UNIXFILE_* flags */ + + int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); + int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); + int isCreate = (flags & SQLITE_OPEN_CREATE); + int isReadonly = (flags & SQLITE_OPEN_READONLY); + int isReadWrite = (flags & SQLITE_OPEN_READWRITE); +#if SQLITE_ENABLE_LOCKING_STYLE + int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); +#endif +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE + struct statfs fsInfo; +#endif + + /* If creating a super- or main-file journal, this function will open + ** a file-descriptor on the directory too. The first time unixSync() + ** is called the directory file descriptor will be fsync()ed and close()d. + */ + int isNewJrnl = (isCreate && ( + eType==SQLITE_OPEN_SUPER_JOURNAL + || eType==SQLITE_OPEN_MAIN_JOURNAL + || eType==SQLITE_OPEN_WAL + )); + + /* If argument zPath is a NULL pointer, this function is required to open + ** a temporary file. Use this buffer to store the file name in. + */ + char zTmpname[MAX_PATHNAME+2]; + const char *zName = zPath; + + /* Check the following statements are true: + ** + ** (a) Exactly one of the READWRITE and READONLY flags must be set, and + ** (b) if CREATE is set, then READWRITE must also be set, and + ** (c) if EXCLUSIVE is set, then CREATE must also be set. + ** (d) if DELETEONCLOSE is set, then CREATE must also be set. + */ + assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); + assert(isCreate==0 || isReadWrite); + assert(isExclusive==0 || isCreate); + assert(isDelete==0 || isCreate); + + /* The main DB, main journal, WAL file and super-journal are never + ** automatically deleted. Nor are they ever temporary files. */ + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_SUPER_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); + + /* Assert that the upper layer has set one of the "file-type" flags. */ + assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB + || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL + || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_SUPER_JOURNAL + || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL + ); + + /* Detect a pid change and reset the PRNG. There is a race condition + ** here such that two or more threads all trying to open databases at + ** the same instant might all reset the PRNG. But multiple resets + ** are harmless. + */ + if( randomnessPid!=osGetpid(0) ){ + randomnessPid = osGetpid(0); + sqlite3_randomness(0,0); + } + memset(p, 0, sizeof(unixFile)); + +#ifdef SQLITE_ASSERT_NO_FILES + /* Applications that never read or write a persistent disk files */ + assert( zName==0 ); +#endif + + if( eType==SQLITE_OPEN_MAIN_DB ){ + UnixUnusedFd *pUnused; + pUnused = findReusableFd(zName, flags); + if( pUnused ){ + fd = pUnused->fd; + }else{ + pUnused = sqlite3_malloc64(sizeof(*pUnused)); + if( !pUnused ){ + return SQLITE_NOMEM_BKPT; + } + } + p->pPreallocatedUnused = pUnused; + + /* Database filenames are double-zero terminated if they are not + ** URIs with parameters. Hence, they can always be passed into + ** sqlite3_uri_parameter(). */ + assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); + + }else if( !zName ){ + /* If zName is NULL, the upper layer is requesting a temp file. */ + assert(isDelete && !isNewJrnl); + rc = unixGetTempname(pVfs->mxPathname, zTmpname); + if( rc!=SQLITE_OK ){ + return rc; + } + zName = zTmpname; + + /* Generated temporary filenames are always double-zero terminated + ** for use by sqlite3_uri_parameter(). */ + assert( zName[strlen(zName)+1]==0 ); + } + + /* Determine the value of the flags parameter passed to POSIX function + ** open(). These must be calculated even if open() is not called, as + ** they may be stored as part of the file handle and used by the + ** 'conch file' locking functions later on. */ + if( isReadonly ) openFlags |= O_RDONLY; + if( isReadWrite ) openFlags |= O_RDWR; + if( isCreate ) openFlags |= O_CREAT; + if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); + openFlags |= (O_LARGEFILE|O_BINARY|O_NOFOLLOW); + + if( fd<0 ){ + mode_t openMode; /* Permissions to create file with */ + uid_t uid; /* Userid for the file */ + gid_t gid; /* Groupid for the file */ + rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); + if( rc!=SQLITE_OK ){ + assert( !p->pPreallocatedUnused ); + assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); + return rc; + } + fd = robust_open(zName, openFlags, openMode); + OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); + assert( !isExclusive || (openFlags & O_CREAT)!=0 ); + if( fd<0 ){ + if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){ + /* If unable to create a journal because the directory is not + ** writable, change the error code to indicate that. */ + rc = SQLITE_READONLY_DIRECTORY; + }else if( errno!=EISDIR && isReadWrite ){ + /* Failed to open the file for read/write access. Try read-only. */ + flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); + openFlags &= ~(O_RDWR|O_CREAT); + flags |= SQLITE_OPEN_READONLY; + openFlags |= O_RDONLY; + isReadonly = 1; + fd = robust_open(zName, openFlags, openMode); + } + } + if( fd<0 ){ + int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); + if( rc==SQLITE_OK ) rc = rc2; + goto open_finished; + } + + /* The owner of the rollback journal or WAL file should always be the + ** same as the owner of the database file. Try to ensure that this is + ** the case. The chown() system call will be a no-op if the current + ** process lacks root privileges, be we should at least try. Without + ** this step, if a root process opens a database file, it can leave + ** behinds a journal/WAL that is owned by root and hence make the + ** database inaccessible to unprivileged processes. + ** + ** If openMode==0, then that means uid and gid are not set correctly + ** (probably because SQLite is configured to use 8+3 filename mode) and + ** in that case we do not want to attempt the chown(). + */ + if( openMode && (flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL))!=0 ){ + robustFchown(fd, uid, gid); + } + } + assert( fd>=0 ); + if( pOutFlags ){ + *pOutFlags = flags; + } + + if( p->pPreallocatedUnused ){ + p->pPreallocatedUnused->fd = fd; + p->pPreallocatedUnused->flags = + flags & (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); + } + + if( isDelete ){ +#if OS_VXWORKS + zPath = zName; +#elif defined(SQLITE_UNLINK_AFTER_CLOSE) + zPath = sqlite3_mprintf("%s", zName); + if( zPath==0 ){ + robust_close(p, fd, __LINE__); + return SQLITE_NOMEM_BKPT; + } +#else + osUnlink(zName); +#endif + } +#if SQLITE_ENABLE_LOCKING_STYLE + else{ + p->openFlags = openFlags; + } +#endif + +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE + if( fstatfs(fd, &fsInfo) == -1 ){ + storeLastErrno(p, errno); + robust_close(p, fd, __LINE__); + return SQLITE_IOERR_ACCESS; + } + if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { + ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; + } + if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { + ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; + } +#endif + + /* Set up appropriate ctrlFlags */ + if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; + if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; + noLock = eType!=SQLITE_OPEN_MAIN_DB; + if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; + if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC; + if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; + +#if SQLITE_ENABLE_LOCKING_STYLE +#if SQLITE_PREFER_PROXY_LOCKING + isAutoProxy = 1; +#endif + if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ + char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); + int useProxy = 0; + + /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means + ** never use proxy, NULL means use proxy for non-local files only. */ + if( envforce!=NULL ){ + useProxy = atoi(envforce)>0; + }else{ + useProxy = !(fsInfo.f_flags&MNT_LOCAL); + } + if( useProxy ){ + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + if( rc==SQLITE_OK ){ + rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); + if( rc!=SQLITE_OK ){ + /* Use unixClose to clean up the resources added in fillInUnixFile + ** and clear all the structure's references. Specifically, + ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op + */ + unixClose(pFile); + return rc; + } + } + goto open_finished; + } + } +#endif + + assert( zPath==0 || zPath[0]=='/' + || eType==SQLITE_OPEN_SUPER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL + ); + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + +open_finished: + if( rc!=SQLITE_OK ){ + sqlite3_free(p->pPreallocatedUnused); + } + return rc; +} + + +/* +** Delete the file at zPath. If the dirSync argument is true, fsync() +** the directory after deleting the file. +*/ +static int unixDelete( + sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ + const char *zPath, /* Name of file to be deleted */ + int dirSync /* If true, fsync() directory after deleting file */ +){ + int rc = SQLITE_OK; + UNUSED_PARAMETER(NotUsed); + SimulateIOError(return SQLITE_IOERR_DELETE); + if( osUnlink(zPath)==(-1) ){ + if( errno==ENOENT +#if OS_VXWORKS + || osAccess(zPath,0)!=0 +#endif + ){ + rc = SQLITE_IOERR_DELETE_NOENT; + }else{ + rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); + } + return rc; + } +#ifndef SQLITE_DISABLE_DIRSYNC + if( (dirSync & 1)!=0 ){ + int fd; + rc = osOpenDirectory(zPath, &fd); + if( rc==SQLITE_OK ){ + if( full_fsync(fd,0,0) ){ + rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); + } + robust_close(0, fd, __LINE__); + }else{ + assert( rc==SQLITE_CANTOPEN ); + rc = SQLITE_OK; + } + } +#endif + return rc; +} + +/* +** Test the existence of or access permissions of file zPath. The +** test performed depends on the value of flags: +** +** SQLITE_ACCESS_EXISTS: Return 1 if the file exists +** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. +** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. +** +** Otherwise return 0. +*/ +static int unixAccess( + sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ + const char *zPath, /* Path of the file to examine */ + int flags, /* What do we want to learn about the zPath file? */ + int *pResOut /* Write result boolean here */ +){ + UNUSED_PARAMETER(NotUsed); + SimulateIOError( return SQLITE_IOERR_ACCESS; ); + assert( pResOut!=0 ); + + /* The spec says there are three possible values for flags. But only + ** two of them are actually used */ + assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); + + if( flags==SQLITE_ACCESS_EXISTS ){ + struct stat buf; + *pResOut = 0==osStat(zPath, &buf) && + (!S_ISREG(buf.st_mode) || buf.st_size>0); + }else{ + *pResOut = osAccess(zPath, W_OK|R_OK)==0; + } + return SQLITE_OK; +} + +/* +** If the last component of the pathname in z[0]..z[j-1] is something +** other than ".." then back it out and return true. If the last +** component is empty or if it is ".." then return false. +*/ +static int unixBackupDir(const char *z, int *pJ){ + int j = *pJ; + int i; + if( j<=0 ) return 0; + for(i=j-1; i>0 && z[i-1]!='/'; i--){} + if( i==0 ) return 0; + if( z[i]=='.' && i==j-2 && z[i+1]=='.' ) return 0; + *pJ = i-1; + return 1; +} + +/* +** Convert a relative pathname into a full pathname. Also +** simplify the pathname as follows: +** +** Remove all instances of /./ +** Remove all isntances of /X/../ for any X +*/ +static int mkFullPathname( + const char *zPath, /* Input path */ + char *zOut, /* Output buffer */ + int nOut /* Allocated size of buffer zOut */ +){ + int nPath = sqlite3Strlen30(zPath); + int iOff = 0; + int i, j; + if( zPath[0]!='/' ){ + if( osGetcwd(zOut, nOut-2)==0 ){ + return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); + } + iOff = sqlite3Strlen30(zOut); + zOut[iOff++] = '/'; + } + if( (iOff+nPath+1)>nOut ){ + /* SQLite assumes that xFullPathname() nul-terminates the output buffer + ** even if it returns an error. */ + zOut[iOff] = '\0'; + return SQLITE_CANTOPEN_BKPT; + } + sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath); + + /* Remove duplicate '/' characters. Except, two // at the beginning + ** of a pathname is allowed since this is important on windows. */ + for(i=j=1; zOut[i]; i++){ + zOut[j++] = zOut[i]; + while( zOut[i]=='/' && zOut[i+1]=='/' ) i++; + } + zOut[j] = 0; + + assert( zOut[0]=='/' ); + for(i=j=0; zOut[i]; i++){ + if( zOut[i]=='/' ){ + /* Skip over internal "/." directory components */ + if( zOut[i+1]=='.' && zOut[i+2]=='/' ){ + i += 1; + continue; + } + + /* If this is a "/.." directory component then back out the + ** previous term of the directory if it is something other than "..". + */ + if( zOut[i+1]=='.' + && zOut[i+2]=='.' + && zOut[i+3]=='/' + && unixBackupDir(zOut, &j) + ){ + i += 2; + continue; + } + } + if( ALWAYS(j>=0) ) zOut[j] = zOut[i]; + j++; + } + if( NEVER(j==0) ) zOut[j++] = '/'; + zOut[j] = 0; + return SQLITE_OK; +} + +/* +** Turn a relative pathname into a full pathname. The relative path +** is stored as a nul-terminated string in the buffer pointed to by +** zPath. +** +** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes +** (in this case, MAX_PATHNAME bytes). The full-path is written to +** this buffer before returning. +*/ +static int unixFullPathname( + sqlite3_vfs *pVfs, /* Pointer to vfs object */ + const char *zPath, /* Possibly relative input path */ + int nOut, /* Size of output buffer in bytes */ + char *zOut /* Output buffer */ +){ +#if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT) + return mkFullPathname(zPath, zOut, nOut); +#else + int rc = SQLITE_OK; + int nByte; + int nLink = 0; /* Number of symbolic links followed so far */ + const char *zIn = zPath; /* Input path for each iteration of loop */ + char *zDel = 0; + + assert( pVfs->mxPathname==MAX_PATHNAME ); + UNUSED_PARAMETER(pVfs); + + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. This function could fail if, for example, the + ** current working directory has been unlinked. + */ + SimulateIOError( return SQLITE_ERROR ); + + do { + + /* Call stat() on path zIn. Set bLink to true if the path is a symbolic + ** link, or false otherwise. */ + int bLink = 0; + struct stat buf; + if( osLstat(zIn, &buf)!=0 ){ + if( errno!=ENOENT ){ + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn); + } + }else{ + bLink = S_ISLNK(buf.st_mode); + } + + if( bLink ){ + nLink++; + if( zDel==0 ){ + zDel = sqlite3_malloc(nOut); + if( zDel==0 ) rc = SQLITE_NOMEM_BKPT; + }else if( nLink>=SQLITE_MAX_SYMLINKS ){ + rc = SQLITE_CANTOPEN_BKPT; + } + + if( rc==SQLITE_OK ){ + nByte = osReadlink(zIn, zDel, nOut-1); + if( nByte<0 ){ + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn); + }else{ + if( zDel[0]!='/' ){ + int n; + for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--); + if( nByte+n+1>nOut ){ + rc = SQLITE_CANTOPEN_BKPT; + }else{ + memmove(&zDel[n], zDel, nByte+1); + memcpy(zDel, zIn, n); + nByte += n; + } + } + zDel[nByte] = '\0'; + } + } + + zIn = zDel; + } + + assert( rc!=SQLITE_OK || zIn!=zOut || zIn[0]=='/' ); + if( rc==SQLITE_OK && zIn!=zOut ){ + rc = mkFullPathname(zIn, zOut, nOut); + } + if( bLink==0 ) break; + zIn = zOut; + }while( rc==SQLITE_OK ); + + sqlite3_free(zDel); + if( rc==SQLITE_OK && nLink ) rc = SQLITE_OK_SYMLINK; + return rc; +#endif /* HAVE_READLINK && HAVE_LSTAT */ +} + + +#ifndef SQLITE_OMIT_LOAD_EXTENSION +/* +** Interfaces for opening a shared library, finding entry points +** within the shared library, and closing the shared library. +*/ +#include +static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ + UNUSED_PARAMETER(NotUsed); + return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); +} + +/* +** SQLite calls this function immediately after a call to unixDlSym() or +** unixDlOpen() fails (returns a null pointer). If a more detailed error +** message is available, it is written to zBufOut. If no error message +** is available, zBufOut is left unmodified and SQLite uses a default +** error message. +*/ +static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ + const char *zErr; + UNUSED_PARAMETER(NotUsed); + unixEnterMutex(); + zErr = dlerror(); + if( zErr ){ + sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); + } + unixLeaveMutex(); +} +static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ + /* + ** GCC with -pedantic-errors says that C90 does not allow a void* to be + ** cast into a pointer to a function. And yet the library dlsym() routine + ** returns a void* which is really a pointer to a function. So how do we + ** use dlsym() with -pedantic-errors? + ** + ** Variable x below is defined to be a pointer to a function taking + ** parameters void* and const char* and returning a pointer to a function. + ** We initialize x by assigning it a pointer to the dlsym() function. + ** (That assignment requires a cast.) Then we call the function that + ** x points to. + ** + ** This work-around is unlikely to work correctly on any system where + ** you really cannot cast a function pointer into void*. But then, on the + ** other hand, dlsym() will not work on such a system either, so we have + ** not really lost anything. + */ + void (*(*x)(void*,const char*))(void); + UNUSED_PARAMETER(NotUsed); + x = (void(*(*)(void*,const char*))(void))dlsym; + return (*x)(p, zSym); +} +static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ + UNUSED_PARAMETER(NotUsed); + dlclose(pHandle); +} +#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ + #define unixDlOpen 0 + #define unixDlError 0 + #define unixDlSym 0 + #define unixDlClose 0 +#endif + +/* +** Write nBuf bytes of random data to the supplied buffer zBuf. +*/ +static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ + UNUSED_PARAMETER(NotUsed); + assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); + + /* We have to initialize zBuf to prevent valgrind from reporting + ** errors. The reports issued by valgrind are incorrect - we would + ** prefer that the randomness be increased by making use of the + ** uninitialized space in zBuf - but valgrind errors tend to worry + ** some users. Rather than argue, it seems easier just to initialize + ** the whole array and silence valgrind, even if that means less randomness + ** in the random seed. + ** + ** When testing, initializing zBuf[] to zero is all we do. That means + ** that we always use the same random number sequence. This makes the + ** tests repeatable. + */ + memset(zBuf, 0, nBuf); + randomnessPid = osGetpid(0); +#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) + { + int fd, got; + fd = robust_open("/dev/urandom", O_RDONLY, 0); + if( fd<0 ){ + time_t t; + time(&t); + memcpy(zBuf, &t, sizeof(t)); + memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); + assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); + nBuf = sizeof(t) + sizeof(randomnessPid); + }else{ + do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); + robust_close(0, fd, __LINE__); + } + } +#endif + return nBuf; +} + + +/* +** Sleep for a little while. Return the amount of time slept. +** The argument is the number of microseconds we want to sleep. +** The return value is the number of microseconds of sleep actually +** requested from the underlying operating system, a number which +** might be greater than or equal to the argument, but not less +** than the argument. +*/ +static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ +#if OS_VXWORKS + struct timespec sp; + + sp.tv_sec = microseconds / 1000000; + sp.tv_nsec = (microseconds % 1000000) * 1000; + nanosleep(&sp, NULL); + UNUSED_PARAMETER(NotUsed); + return microseconds; +#elif defined(HAVE_USLEEP) && HAVE_USLEEP + if( microseconds>=1000000 ) sleep(microseconds/1000000); + if( microseconds%1000000 ) usleep(microseconds%1000000); + UNUSED_PARAMETER(NotUsed); + return microseconds; +#else + int seconds = (microseconds+999999)/1000000; + sleep(seconds); + UNUSED_PARAMETER(NotUsed); + return seconds*1000000; +#endif +} + +/* +** The following variable, if set to a non-zero value, is interpreted as +** the number of seconds since 1970 and is used to set the result of +** sqlite3OsCurrentTime() during testing. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ +#endif + +/* +** Find the current time (in Universal Coordinated Time). Write into *piNow +** the current time and date as a Julian Day number times 86_400_000. In +** other words, write into *piNow the number of milliseconds since the Julian +** epoch of noon in Greenwich on November 24, 4714 B.C according to the +** proleptic Gregorian calendar. +** +** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date +** cannot be found. +*/ +static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ + static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; + int rc = SQLITE_OK; +#if defined(NO_GETTOD) + time_t t; + time(&t); + *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; +#elif OS_VXWORKS + struct timespec sNow; + clock_gettime(CLOCK_REALTIME, &sNow); + *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; +#else + struct timeval sNow; + (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ + *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; +#endif + +#ifdef SQLITE_TEST + if( sqlite3_current_time ){ + *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; + } +#endif + UNUSED_PARAMETER(NotUsed); + return rc; +} + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** Find the current time (in Universal Coordinated Time). Write the +** current time and date as a Julian Day number into *prNow and +** return 0. Return 1 if the time and date cannot be found. +*/ +static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ + sqlite3_int64 i = 0; + int rc; + UNUSED_PARAMETER(NotUsed); + rc = unixCurrentTimeInt64(0, &i); + *prNow = i/86400000.0; + return rc; +} +#else +# define unixCurrentTime 0 +#endif + +/* +** The xGetLastError() method is designed to return a better +** low-level error message when operating-system problems come up +** during SQLite operation. Only the integer return code is currently +** used. +*/ +static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ + UNUSED_PARAMETER(NotUsed); + UNUSED_PARAMETER(NotUsed2); + UNUSED_PARAMETER(NotUsed3); + return errno; +} + + +/* +************************ End of sqlite3_vfs methods *************************** +******************************************************************************/ + +/****************************************************************************** +************************** Begin Proxy Locking ******************************** +** +** Proxy locking is a "uber-locking-method" in this sense: It uses the +** other locking methods on secondary lock files. Proxy locking is a +** meta-layer over top of the primitive locking implemented above. For +** this reason, the division that implements of proxy locking is deferred +** until late in the file (here) after all of the other I/O methods have +** been defined - so that the primitive locking methods are available +** as services to help with the implementation of proxy locking. +** +**** +** +** The default locking schemes in SQLite use byte-range locks on the +** database file to coordinate safe, concurrent access by multiple readers +** and writers [http://sqlite.org/lockingv3.html]. The five file locking +** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented +** as POSIX read & write locks over fixed set of locations (via fsctl), +** on AFP and SMB only exclusive byte-range locks are available via fsctl +** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. +** To simulate a F_RDLCK on the shared range, on AFP a randomly selected +** address in the shared range is taken for a SHARED lock, the entire +** shared range is taken for an EXCLUSIVE lock): +** +** PENDING_BYTE 0x40000000 +** RESERVED_BYTE 0x40000001 +** SHARED_RANGE 0x40000002 -> 0x40000200 +** +** This works well on the local file system, but shows a nearly 100x +** slowdown in read performance on AFP because the AFP client disables +** the read cache when byte-range locks are present. Enabling the read +** cache exposes a cache coherency problem that is present on all OS X +** supported network file systems. NFS and AFP both observe the +** close-to-open semantics for ensuring cache coherency +** [http://nfs.sourceforge.net/#faq_a8], which does not effectively +** address the requirements for concurrent database access by multiple +** readers and writers +** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. +** +** To address the performance and cache coherency issues, proxy file locking +** changes the way database access is controlled by limiting access to a +** single host at a time and moving file locks off of the database file +** and onto a proxy file on the local file system. +** +** +** Using proxy locks +** ----------------- +** +** C APIs +** +** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, +** | ":auto:"); +** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, +** &); +** +** +** SQL pragmas +** +** PRAGMA [database.]lock_proxy_file= | :auto: +** PRAGMA [database.]lock_proxy_file +** +** Specifying ":auto:" means that if there is a conch file with a matching +** host ID in it, the proxy path in the conch file will be used, otherwise +** a proxy path based on the user's temp dir +** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the +** actual proxy file name is generated from the name and path of the +** database file. For example: +** +** For database path "/Users/me/foo.db" +** The lock path will be "/sqliteplocks/_Users_me_foo.db:auto:") +** +** Once a lock proxy is configured for a database connection, it can not +** be removed, however it may be switched to a different proxy path via +** the above APIs (assuming the conch file is not being held by another +** connection or process). +** +** +** How proxy locking works +** ----------------------- +** +** Proxy file locking relies primarily on two new supporting files: +** +** * conch file to limit access to the database file to a single host +** at a time +** +** * proxy file to act as a proxy for the advisory locks normally +** taken on the database +** +** The conch file - to use a proxy file, sqlite must first "hold the conch" +** by taking an sqlite-style shared lock on the conch file, reading the +** contents and comparing the host's unique host ID (see below) and lock +** proxy path against the values stored in the conch. The conch file is +** stored in the same directory as the database file and the file name +** is patterned after the database file name as ".-conch". +** If the conch file does not exist, or its contents do not match the +** host ID and/or proxy path, then the lock is escalated to an exclusive +** lock and the conch file contents is updated with the host ID and proxy +** path and the lock is downgraded to a shared lock again. If the conch +** is held by another process (with a shared lock), the exclusive lock +** will fail and SQLITE_BUSY is returned. +** +** The proxy file - a single-byte file used for all advisory file locks +** normally taken on the database file. This allows for safe sharing +** of the database file for multiple readers and writers on the same +** host (the conch ensures that they all use the same local lock file). +** +** Requesting the lock proxy does not immediately take the conch, it is +** only taken when the first request to lock database file is made. +** This matches the semantics of the traditional locking behavior, where +** opening a connection to a database file does not take a lock on it. +** The shared lock and an open file descriptor are maintained until +** the connection to the database is closed. +** +** The proxy file and the lock file are never deleted so they only need +** to be created the first time they are used. +** +** Configuration options +** --------------------- +** +** SQLITE_PREFER_PROXY_LOCKING +** +** Database files accessed on non-local file systems are +** automatically configured for proxy locking, lock files are +** named automatically using the same logic as +** PRAGMA lock_proxy_file=":auto:" +** +** SQLITE_PROXY_DEBUG +** +** Enables the logging of error messages during host id file +** retrieval and creation +** +** LOCKPROXYDIR +** +** Overrides the default directory used for lock proxy files that +** are named automatically via the ":auto:" setting +** +** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS +** +** Permissions to use when creating a directory for storing the +** lock proxy files, only used when LOCKPROXYDIR is not set. +** +** +** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, +** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will +** force proxy locking to be used for every database file opened, and 0 +** will force automatic proxy locking to be disabled for all database +** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or +** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). +*/ + +/* +** Proxy locking is only available on MacOSX +*/ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + +/* +** The proxyLockingContext has the path and file structures for the remote +** and local proxy files in it +*/ +typedef struct proxyLockingContext proxyLockingContext; +struct proxyLockingContext { + unixFile *conchFile; /* Open conch file */ + char *conchFilePath; /* Name of the conch file */ + unixFile *lockProxy; /* Open proxy lock file */ + char *lockProxyPath; /* Name of the proxy lock file */ + char *dbPath; /* Name of the open file */ + int conchHeld; /* 1 if the conch is held, -1 if lockless */ + int nFails; /* Number of conch taking failures */ + void *oldLockingContext; /* Original lockingcontext to restore on close */ + sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ +}; + +/* +** The proxy lock file path for the database at dbPath is written into lPath, +** which must point to valid, writable memory large enough for a maxLen length +** file path. +*/ +static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ + int len; + int dbLen; + int i; + +#ifdef LOCKPROXYDIR + len = strlcpy(lPath, LOCKPROXYDIR, maxLen); +#else +# ifdef _CS_DARWIN_USER_TEMP_DIR + { + if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ + OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", + lPath, errno, osGetpid(0))); + return SQLITE_IOERR_LOCK; + } + len = strlcat(lPath, "sqliteplocks", maxLen); + } +# else + len = strlcpy(lPath, "/tmp/", maxLen); +# endif +#endif + + if( lPath[len-1]!='/' ){ + len = strlcat(lPath, "/", maxLen); + } + + /* transform the db path to a unique cache name */ + dbLen = (int)strlen(dbPath); + for( i=0; i 0) ){ + /* only mkdir if leaf dir != "." or "/" or ".." */ + if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') + || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ + buf[i]='\0'; + if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ + int err=errno; + if( err!=EEXIST ) { + OSTRACE(("CREATELOCKPATH FAILED creating %s, " + "'%s' proxy lock path=%s pid=%d\n", + buf, strerror(err), lockPath, osGetpid(0))); + return err; + } + } + } + start=i+1; + } + buf[i] = lockPath[i]; + } + OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0))); + return 0; +} + +/* +** Create a new VFS file descriptor (stored in memory obtained from +** sqlite3_malloc) and open the file named "path" in the file descriptor. +** +** The caller is responsible not only for closing the file descriptor +** but also for freeing the memory associated with the file descriptor. +*/ +static int proxyCreateUnixFile( + const char *path, /* path for the new unixFile */ + unixFile **ppFile, /* unixFile created and returned by ref */ + int islockfile /* if non zero missing dirs will be created */ +) { + int fd = -1; + unixFile *pNew; + int rc = SQLITE_OK; + int openFlags = O_RDWR | O_CREAT | O_NOFOLLOW; + sqlite3_vfs dummyVfs; + int terrno = 0; + UnixUnusedFd *pUnused = NULL; + + /* 1. first try to open/create the file + ** 2. if that fails, and this is a lock file (not-conch), try creating + ** the parent directories and then try again. + ** 3. if that fails, try to open the file read-only + ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file + */ + pUnused = findReusableFd(path, openFlags); + if( pUnused ){ + fd = pUnused->fd; + }else{ + pUnused = sqlite3_malloc64(sizeof(*pUnused)); + if( !pUnused ){ + return SQLITE_NOMEM_BKPT; + } + } + if( fd<0 ){ + fd = robust_open(path, openFlags, 0); + terrno = errno; + if( fd<0 && errno==ENOENT && islockfile ){ + if( proxyCreateLockPath(path) == SQLITE_OK ){ + fd = robust_open(path, openFlags, 0); + } + } + } + if( fd<0 ){ + openFlags = O_RDONLY | O_NOFOLLOW; + fd = robust_open(path, openFlags, 0); + terrno = errno; + } + if( fd<0 ){ + if( islockfile ){ + return SQLITE_BUSY; + } + switch (terrno) { + case EACCES: + return SQLITE_PERM; + case EIO: + return SQLITE_IOERR_LOCK; /* even though it is the conch */ + default: + return SQLITE_CANTOPEN_BKPT; + } + } + + pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); + if( pNew==NULL ){ + rc = SQLITE_NOMEM_BKPT; + goto end_create_proxy; + } + memset(pNew, 0, sizeof(unixFile)); + pNew->openFlags = openFlags; + memset(&dummyVfs, 0, sizeof(dummyVfs)); + dummyVfs.pAppData = (void*)&autolockIoFinder; + dummyVfs.zName = "dummy"; + pUnused->fd = fd; + pUnused->flags = openFlags; + pNew->pPreallocatedUnused = pUnused; + + rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); + if( rc==SQLITE_OK ){ + *ppFile = pNew; + return SQLITE_OK; + } +end_create_proxy: + robust_close(pNew, fd, __LINE__); + sqlite3_free(pNew); + sqlite3_free(pUnused); + return rc; +} + +#ifdef SQLITE_TEST +/* simulate multiple hosts by creating unique hostid file paths */ +SQLITE_API int sqlite3_hostid_num = 0; +#endif + +#define PROXY_HOSTIDLEN 16 /* conch file host id length */ + +#if HAVE_GETHOSTUUID +/* Not always defined in the headers as it ought to be */ +extern int gethostuuid(uuid_t id, const struct timespec *wait); +#endif + +/* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN +** bytes of writable memory. +*/ +static int proxyGetHostID(unsigned char *pHostID, int *pError){ + assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); + memset(pHostID, 0, PROXY_HOSTIDLEN); +#if HAVE_GETHOSTUUID + { + struct timespec timeout = {1, 0}; /* 1 sec timeout */ + if( gethostuuid(pHostID, &timeout) ){ + int err = errno; + if( pError ){ + *pError = err; + } + return SQLITE_IOERR; + } + } +#else + UNUSED_PARAMETER(pError); +#endif +#ifdef SQLITE_TEST + /* simulate multiple hosts by creating unique hostid file paths */ + if( sqlite3_hostid_num != 0){ + pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); + } +#endif + + return SQLITE_OK; +} + +/* The conch file contains the header, host id and lock file path + */ +#define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ +#define PROXY_HEADERLEN 1 /* conch file header length */ +#define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) +#define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) + +/* +** Takes an open conch file, copies the contents to a new path and then moves +** it back. The newly created file's file descriptor is assigned to the +** conch file structure and finally the original conch file descriptor is +** closed. Returns zero if successful. +*/ +static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + unixFile *conchFile = pCtx->conchFile; + char tPath[MAXPATHLEN]; + char buf[PROXY_MAXCONCHLEN]; + char *cPath = pCtx->conchFilePath; + size_t readLen = 0; + size_t pathLen = 0; + char errmsg[64] = ""; + int fd = -1; + int rc = -1; + UNUSED_PARAMETER(myHostID); + + /* create a new path by replace the trailing '-conch' with '-break' */ + pathLen = strlcpy(tPath, cPath, MAXPATHLEN); + if( pathLen>MAXPATHLEN || pathLen<6 || + (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ + sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); + goto end_breaklock; + } + /* read the conch content */ + readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); + if( readLenh, __LINE__); + conchFile->h = fd; + conchFile->openFlags = O_RDWR | O_CREAT; + +end_breaklock: + if( rc ){ + if( fd>=0 ){ + osUnlink(tPath); + robust_close(pFile, fd, __LINE__); + } + fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); + } + return rc; +} + +/* Take the requested lock on the conch file and break a stale lock if the +** host id matches. +*/ +static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + unixFile *conchFile = pCtx->conchFile; + int rc = SQLITE_OK; + int nTries = 0; + struct timespec conchModTime; + + memset(&conchModTime, 0, sizeof(conchModTime)); + do { + rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); + nTries ++; + if( rc==SQLITE_BUSY ){ + /* If the lock failed (busy): + * 1st try: get the mod time of the conch, wait 0.5s and try again. + * 2nd try: fail if the mod time changed or host id is different, wait + * 10 sec and try again + * 3rd try: break the lock unless the mod time has changed. + */ + struct stat buf; + if( osFstat(conchFile->h, &buf) ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR_LOCK; + } + + if( nTries==1 ){ + conchModTime = buf.st_mtimespec; + unixSleep(0,500000); /* wait 0.5 sec and try the lock again*/ + continue; + } + + assert( nTries>1 ); + if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || + conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ + return SQLITE_BUSY; + } + + if( nTries==2 ){ + char tBuf[PROXY_MAXCONCHLEN]; + int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); + if( len<0 ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR_LOCK; + } + if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ + /* don't break the lock if the host id doesn't match */ + if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ + return SQLITE_BUSY; + } + }else{ + /* don't break the lock on short read or a version mismatch */ + return SQLITE_BUSY; + } + unixSleep(0,10000000); /* wait 10 sec and try the lock again */ + continue; + } + + assert( nTries==3 ); + if( 0==proxyBreakConchLock(pFile, myHostID) ){ + rc = SQLITE_OK; + if( lockType==EXCLUSIVE_LOCK ){ + rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); + } + if( !rc ){ + rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); + } + } + } + } while( rc==SQLITE_BUSY && nTries<3 ); + + return rc; +} + +/* Takes the conch by taking a shared lock and read the contents conch, if +** lockPath is non-NULL, the host ID and lock file path must match. A NULL +** lockPath means that the lockPath in the conch file will be used if the +** host IDs match, or a new lock path will be generated automatically +** and written to the conch file. +*/ +static int proxyTakeConch(unixFile *pFile){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + + if( pCtx->conchHeld!=0 ){ + return SQLITE_OK; + }else{ + unixFile *conchFile = pCtx->conchFile; + uuid_t myHostID; + int pError = 0; + char readBuf[PROXY_MAXCONCHLEN]; + char lockPath[MAXPATHLEN]; + char *tempLockPath = NULL; + int rc = SQLITE_OK; + int createConch = 0; + int hostIdMatch = 0; + int readLen = 0; + int tryOldLockPath = 0; + int forceNewLockPath = 0; + + OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, + (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), + osGetpid(0))); + + rc = proxyGetHostID(myHostID, &pError); + if( (rc&0xff)==SQLITE_IOERR ){ + storeLastErrno(pFile, pError); + goto end_takeconch; + } + rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + goto end_takeconch; + } + /* read the existing conch file */ + readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); + if( readLen<0 ){ + /* I/O error: lastErrno set by seekAndRead */ + storeLastErrno(pFile, conchFile->lastErrno); + rc = SQLITE_IOERR_READ; + goto end_takeconch; + }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || + readBuf[0]!=(char)PROXY_CONCHVERSION ){ + /* a short read or version format mismatch means we need to create a new + ** conch file. + */ + createConch = 1; + } + /* if the host id matches and the lock path already exists in the conch + ** we'll try to use the path there, if we can't open that path, we'll + ** retry with a new auto-generated path + */ + do { /* in case we need to try again for an :auto: named lock file */ + + if( !createConch && !forceNewLockPath ){ + hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, + PROXY_HOSTIDLEN); + /* if the conch has data compare the contents */ + if( !pCtx->lockProxyPath ){ + /* for auto-named local lock file, just check the host ID and we'll + ** use the local lock file path that's already in there + */ + if( hostIdMatch ){ + size_t pathLen = (readLen - PROXY_PATHINDEX); + + if( pathLen>=MAXPATHLEN ){ + pathLen=MAXPATHLEN-1; + } + memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); + lockPath[pathLen] = 0; + tempLockPath = lockPath; + tryOldLockPath = 1; + /* create a copy of the lock path if the conch is taken */ + goto end_takeconch; + } + }else if( hostIdMatch + && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], + readLen-PROXY_PATHINDEX) + ){ + /* conch host and lock path match */ + goto end_takeconch; + } + } + + /* if the conch isn't writable and doesn't match, we can't take it */ + if( (conchFile->openFlags&O_RDWR) == 0 ){ + rc = SQLITE_BUSY; + goto end_takeconch; + } + + /* either the conch didn't match or we need to create a new one */ + if( !pCtx->lockProxyPath ){ + proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); + tempLockPath = lockPath; + /* create a copy of the lock path _only_ if the conch is taken */ + } + + /* update conch with host and path (this will fail if other process + ** has a shared lock already), if the host id matches, use the big + ** stick. + */ + futimes(conchFile->h, NULL); + if( hostIdMatch && !createConch ){ + if( conchFile->pInode && conchFile->pInode->nShared>1 ){ + /* We are trying for an exclusive lock but another thread in this + ** same process is still holding a shared lock. */ + rc = SQLITE_BUSY; + } else { + rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); + } + }else{ + rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); + } + if( rc==SQLITE_OK ){ + char writeBuffer[PROXY_MAXCONCHLEN]; + int writeSize = 0; + + writeBuffer[0] = (char)PROXY_CONCHVERSION; + memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); + if( pCtx->lockProxyPath!=NULL ){ + strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, + MAXPATHLEN); + }else{ + strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); + } + writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); + robust_ftruncate(conchFile->h, writeSize); + rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); + full_fsync(conchFile->h,0,0); + /* If we created a new conch file (not just updated the contents of a + ** valid conch file), try to match the permissions of the database + */ + if( rc==SQLITE_OK && createConch ){ + struct stat buf; + int err = osFstat(pFile->h, &buf); + if( err==0 ){ + mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | + S_IROTH|S_IWOTH); + /* try to match the database file R/W permissions, ignore failure */ +#ifndef SQLITE_PROXY_DEBUG + osFchmod(conchFile->h, cmode); +#else + do{ + rc = osFchmod(conchFile->h, cmode); + }while( rc==(-1) && errno==EINTR ); + if( rc!=0 ){ + int code = errno; + fprintf(stderr, "fchmod %o FAILED with %d %s\n", + cmode, code, strerror(code)); + } else { + fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); + } + }else{ + int code = errno; + fprintf(stderr, "STAT FAILED[%d] with %d %s\n", + err, code, strerror(code)); +#endif + } + } + } + conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); + + end_takeconch: + OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); + if( rc==SQLITE_OK && pFile->openFlags ){ + int fd; + if( pFile->h>=0 ){ + robust_close(pFile, pFile->h, __LINE__); + } + pFile->h = -1; + fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); + OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); + if( fd>=0 ){ + pFile->h = fd; + }else{ + rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called + during locking */ + } + } + if( rc==SQLITE_OK && !pCtx->lockProxy ){ + char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; + rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ + /* we couldn't create the proxy lock file with the old lock file path + ** so try again via auto-naming + */ + forceNewLockPath = 1; + tryOldLockPath = 0; + continue; /* go back to the do {} while start point, try again */ + } + } + if( rc==SQLITE_OK ){ + /* Need to make a copy of path if we extracted the value + ** from the conch file or the path was allocated on the stack + */ + if( tempLockPath ){ + pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); + if( !pCtx->lockProxyPath ){ + rc = SQLITE_NOMEM_BKPT; + } + } + } + if( rc==SQLITE_OK ){ + pCtx->conchHeld = 1; + + if( pCtx->lockProxy->pMethod == &afpIoMethods ){ + afpLockingContext *afpCtx; + afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; + afpCtx->dbPath = pCtx->lockProxyPath; + } + } else { + conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); + } + OSTRACE(("TAKECONCH %d %s\n", conchFile->h, + rc==SQLITE_OK?"ok":"failed")); + return rc; + } while (1); /* in case we need to retry the :auto: lock file - + ** we should never get here except via the 'continue' call. */ + } +} + +/* +** If pFile holds a lock on a conch file, then release that lock. +*/ +static int proxyReleaseConch(unixFile *pFile){ + int rc = SQLITE_OK; /* Subroutine return code */ + proxyLockingContext *pCtx; /* The locking context for the proxy lock */ + unixFile *conchFile; /* Name of the conch file */ + + pCtx = (proxyLockingContext *)pFile->lockingContext; + conchFile = pCtx->conchFile; + OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, + (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), + osGetpid(0))); + if( pCtx->conchHeld>0 ){ + rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); + } + pCtx->conchHeld = 0; + OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, + (rc==SQLITE_OK ? "ok" : "failed"))); + return rc; +} + +/* +** Given the name of a database file, compute the name of its conch file. +** Store the conch filename in memory obtained from sqlite3_malloc64(). +** Make *pConchPath point to the new name. Return SQLITE_OK on success +** or SQLITE_NOMEM if unable to obtain memory. +** +** The caller is responsible for ensuring that the allocated memory +** space is eventually freed. +** +** *pConchPath is set to NULL if a memory allocation error occurs. +*/ +static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ + int i; /* Loop counter */ + int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ + char *conchPath; /* buffer in which to construct conch name */ + + /* Allocate space for the conch filename and initialize the name to + ** the name of the original database file. */ + *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); + if( conchPath==0 ){ + return SQLITE_NOMEM_BKPT; + } + memcpy(conchPath, dbPath, len+1); + + /* now insert a "." before the last / character */ + for( i=(len-1); i>=0; i-- ){ + if( conchPath[i]=='/' ){ + i++; + break; + } + } + conchPath[i]='.'; + while ( ilockingContext; + char *oldPath = pCtx->lockProxyPath; + int rc = SQLITE_OK; + + if( pFile->eFileLock!=NO_LOCK ){ + return SQLITE_BUSY; + } + + /* nothing to do if the path is NULL, :auto: or matches the existing path */ + if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || + (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ + return SQLITE_OK; + }else{ + unixFile *lockProxy = pCtx->lockProxy; + pCtx->lockProxy=NULL; + pCtx->conchHeld = 0; + if( lockProxy!=NULL ){ + rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); + if( rc ) return rc; + sqlite3_free(lockProxy); + } + sqlite3_free(oldPath); + pCtx->lockProxyPath = sqlite3DbStrDup(0, path); + } + + return rc; +} + +/* +** pFile is a file that has been opened by a prior xOpen call. dbPath +** is a string buffer at least MAXPATHLEN+1 characters in size. +** +** This routine find the filename associated with pFile and writes it +** int dbPath. +*/ +static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ +#if defined(__APPLE__) + if( pFile->pMethod == &afpIoMethods ){ + /* afp style keeps a reference to the db path in the filePath field + ** of the struct */ + assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); + strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, + MAXPATHLEN); + } else +#endif + if( pFile->pMethod == &dotlockIoMethods ){ + /* dot lock style uses the locking context to store the dot lock + ** file path */ + int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); + memcpy(dbPath, (char *)pFile->lockingContext, len + 1); + }else{ + /* all other styles use the locking context to store the db file path */ + assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); + strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); + } + return SQLITE_OK; +} + +/* +** Takes an already filled in unix file and alters it so all file locking +** will be performed on the local proxy lock file. The following fields +** are preserved in the locking context so that they can be restored and +** the unix structure properly cleaned up at close time: +** ->lockingContext +** ->pMethod +*/ +static int proxyTransformUnixFile(unixFile *pFile, const char *path) { + proxyLockingContext *pCtx; + char dbPath[MAXPATHLEN+1]; /* Name of the database file */ + char *lockPath=NULL; + int rc = SQLITE_OK; + + if( pFile->eFileLock!=NO_LOCK ){ + return SQLITE_BUSY; + } + proxyGetDbPathForUnixFile(pFile, dbPath); + if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ + lockPath=NULL; + }else{ + lockPath=(char *)path; + } + + OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, + (lockPath ? lockPath : ":auto:"), osGetpid(0))); + + pCtx = sqlite3_malloc64( sizeof(*pCtx) ); + if( pCtx==0 ){ + return SQLITE_NOMEM_BKPT; + } + memset(pCtx, 0, sizeof(*pCtx)); + + rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); + if( rc==SQLITE_OK ){ + rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); + if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ + /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and + ** (c) the file system is read-only, then enable no-locking access. + ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts + ** that openFlags will have only one of O_RDONLY or O_RDWR. + */ + struct statfs fsInfo; + struct stat conchInfo; + int goLockless = 0; + + if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { + int err = errno; + if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ + goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; + } + } + if( goLockless ){ + pCtx->conchHeld = -1; /* read only FS/ lockless */ + rc = SQLITE_OK; + } + } + } + if( rc==SQLITE_OK && lockPath ){ + pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); + } + + if( rc==SQLITE_OK ){ + pCtx->dbPath = sqlite3DbStrDup(0, dbPath); + if( pCtx->dbPath==NULL ){ + rc = SQLITE_NOMEM_BKPT; + } + } + if( rc==SQLITE_OK ){ + /* all memory is allocated, proxys are created and assigned, + ** switch the locking context and pMethod then return. + */ + pCtx->oldLockingContext = pFile->lockingContext; + pFile->lockingContext = pCtx; + pCtx->pOldMethod = pFile->pMethod; + pFile->pMethod = &proxyIoMethods; + }else{ + if( pCtx->conchFile ){ + pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); + sqlite3_free(pCtx->conchFile); + } + sqlite3DbFree(0, pCtx->lockProxyPath); + sqlite3_free(pCtx->conchFilePath); + sqlite3_free(pCtx); + } + OSTRACE(("TRANSPROXY %d %s\n", pFile->h, + (rc==SQLITE_OK ? "ok" : "failed"))); + return rc; +} + + +/* +** This routine handles sqlite3_file_control() calls that are specific +** to proxy locking. +*/ +static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ + switch( op ){ + case SQLITE_FCNTL_GET_LOCKPROXYFILE: { + unixFile *pFile = (unixFile*)id; + if( pFile->pMethod == &proxyIoMethods ){ + proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; + proxyTakeConch(pFile); + if( pCtx->lockProxyPath ){ + *(const char **)pArg = pCtx->lockProxyPath; + }else{ + *(const char **)pArg = ":auto: (not held)"; + } + } else { + *(const char **)pArg = NULL; + } + return SQLITE_OK; + } + case SQLITE_FCNTL_SET_LOCKPROXYFILE: { + unixFile *pFile = (unixFile*)id; + int rc = SQLITE_OK; + int isProxyStyle = (pFile->pMethod == &proxyIoMethods); + if( pArg==NULL || (const char *)pArg==0 ){ + if( isProxyStyle ){ + /* turn off proxy locking - not supported. If support is added for + ** switching proxy locking mode off then it will need to fail if + ** the journal mode is WAL mode. + */ + rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; + }else{ + /* turn off proxy locking - already off - NOOP */ + rc = SQLITE_OK; + } + }else{ + const char *proxyPath = (const char *)pArg; + if( isProxyStyle ){ + proxyLockingContext *pCtx = + (proxyLockingContext*)pFile->lockingContext; + if( !strcmp(pArg, ":auto:") + || (pCtx->lockProxyPath && + !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) + ){ + rc = SQLITE_OK; + }else{ + rc = switchLockProxyPath(pFile, proxyPath); + } + }else{ + /* turn on proxy file locking */ + rc = proxyTransformUnixFile(pFile, proxyPath); + } + } + return rc; + } + default: { + assert( 0 ); /* The call assures that only valid opcodes are sent */ + } + } + /*NOTREACHED*/ assert(0); + return SQLITE_ERROR; +} + +/* +** Within this division (the proxying locking implementation) the procedures +** above this point are all utilities. The lock-related methods of the +** proxy-locking sqlite3_io_method object follow. +*/ + + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { + unixFile *pFile = (unixFile*)id; + int rc = proxyTakeConch(pFile); + if( rc==SQLITE_OK ){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + if( pCtx->conchHeld>0 ){ + unixFile *proxy = pCtx->lockProxy; + return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); + }else{ /* conchHeld < 0 is lockless */ + pResOut=0; + } + } + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int proxyLock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + int rc = proxyTakeConch(pFile); + if( rc==SQLITE_OK ){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + if( pCtx->conchHeld>0 ){ + unixFile *proxy = pCtx->lockProxy; + rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); + pFile->eFileLock = proxy->eFileLock; + }else{ + /* conchHeld < 0 is lockless */ + } + } + return rc; +} + + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int proxyUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + int rc = proxyTakeConch(pFile); + if( rc==SQLITE_OK ){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + if( pCtx->conchHeld>0 ){ + unixFile *proxy = pCtx->lockProxy; + rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); + pFile->eFileLock = proxy->eFileLock; + }else{ + /* conchHeld < 0 is lockless */ + } + } + return rc; +} + +/* +** Close a file that uses proxy locks. +*/ +static int proxyClose(sqlite3_file *id) { + if( ALWAYS(id) ){ + unixFile *pFile = (unixFile*)id; + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + unixFile *lockProxy = pCtx->lockProxy; + unixFile *conchFile = pCtx->conchFile; + int rc = SQLITE_OK; + + if( lockProxy ){ + rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); + if( rc ) return rc; + rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); + if( rc ) return rc; + sqlite3_free(lockProxy); + pCtx->lockProxy = 0; + } + if( conchFile ){ + if( pCtx->conchHeld ){ + rc = proxyReleaseConch(pFile); + if( rc ) return rc; + } + rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); + if( rc ) return rc; + sqlite3_free(conchFile); + } + sqlite3DbFree(0, pCtx->lockProxyPath); + sqlite3_free(pCtx->conchFilePath); + sqlite3DbFree(0, pCtx->dbPath); + /* restore the original locking context and pMethod then close it */ + pFile->lockingContext = pCtx->oldLockingContext; + pFile->pMethod = pCtx->pOldMethod; + sqlite3_free(pCtx); + return pFile->pMethod->xClose(id); + } + return SQLITE_OK; +} + + + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* +** The proxy locking style is intended for use with AFP filesystems. +** And since AFP is only supported on MacOSX, the proxy locking is also +** restricted to MacOSX. +** +** +******************* End of the proxy lock implementation ********************** +******************************************************************************/ + +/* +** Initialize the operating system interface. +** +** This routine registers all VFS implementations for unix-like operating +** systems. This routine, and the sqlite3_os_end() routine that follows, +** should be the only routines in this file that are visible from other +** files. +** +** This routine is called once during SQLite initialization and by a +** single thread. The memory allocation and mutex subsystems have not +** necessarily been initialized when this routine is called, and so they +** should not be used. +*/ +SQLITE_API int sqlite3_os_init(void){ + /* + ** The following macro defines an initializer for an sqlite3_vfs object. + ** The name of the VFS is NAME. The pAppData is a pointer to a pointer + ** to the "finder" function. (pAppData is a pointer to a pointer because + ** silly C90 rules prohibit a void* from being cast to a function pointer + ** and so we have to go through the intermediate pointer to avoid problems + ** when compiling with -pedantic-errors on GCC.) + ** + ** The FINDER parameter to this macro is the name of the pointer to the + ** finder-function. The finder-function returns a pointer to the + ** sqlite_io_methods object that implements the desired locking + ** behaviors. See the division above that contains the IOMETHODS + ** macro for addition information on finder-functions. + ** + ** Most finders simply return a pointer to a fixed sqlite3_io_methods + ** object. But the "autolockIoFinder" available on MacOSX does a little + ** more than that; it looks at the filesystem type that hosts the + ** database file and tries to choose an locking method appropriate for + ** that filesystem time. + */ + #define UNIXVFS(VFSNAME, FINDER) { \ + 3, /* iVersion */ \ + sizeof(unixFile), /* szOsFile */ \ + MAX_PATHNAME, /* mxPathname */ \ + 0, /* pNext */ \ + VFSNAME, /* zName */ \ + (void*)&FINDER, /* pAppData */ \ + unixOpen, /* xOpen */ \ + unixDelete, /* xDelete */ \ + unixAccess, /* xAccess */ \ + unixFullPathname, /* xFullPathname */ \ + unixDlOpen, /* xDlOpen */ \ + unixDlError, /* xDlError */ \ + unixDlSym, /* xDlSym */ \ + unixDlClose, /* xDlClose */ \ + unixRandomness, /* xRandomness */ \ + unixSleep, /* xSleep */ \ + unixCurrentTime, /* xCurrentTime */ \ + unixGetLastError, /* xGetLastError */ \ + unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ + unixSetSystemCall, /* xSetSystemCall */ \ + unixGetSystemCall, /* xGetSystemCall */ \ + unixNextSystemCall, /* xNextSystemCall */ \ + } + + /* + ** All default VFSes for unix are contained in the following array. + ** + ** Note that the sqlite3_vfs.pNext field of the VFS object is modified + ** by the SQLite core when the VFS is registered. So the following + ** array cannot be const. + */ + static sqlite3_vfs aVfs[] = { +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + UNIXVFS("unix", autolockIoFinder ), +#elif OS_VXWORKS + UNIXVFS("unix", vxworksIoFinder ), +#else + UNIXVFS("unix", posixIoFinder ), +#endif + UNIXVFS("unix-none", nolockIoFinder ), + UNIXVFS("unix-dotfile", dotlockIoFinder ), + UNIXVFS("unix-excl", posixIoFinder ), +#if OS_VXWORKS + UNIXVFS("unix-namedsem", semIoFinder ), +#endif +#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS + UNIXVFS("unix-posix", posixIoFinder ), +#endif +#if SQLITE_ENABLE_LOCKING_STYLE + UNIXVFS("unix-flock", flockIoFinder ), +#endif +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + UNIXVFS("unix-afp", afpIoFinder ), + UNIXVFS("unix-nfs", nfsIoFinder ), + UNIXVFS("unix-proxy", proxyIoFinder ), +#endif + }; + unsigned int i; /* Loop counter */ + + /* Double-check that the aSyscall[] array has been constructed + ** correctly. See ticket [bb3a86e890c8e96ab] */ + assert( ArraySize(aSyscall)==29 ); + + /* Register all VFSes defined in the aVfs[] array */ + for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ + sqlite3_vfs_register(&aVfs[i], i==0); + } + unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); + +#ifndef SQLITE_OMIT_WAL + /* Validate lock assumptions */ + assert( SQLITE_SHM_NLOCK==8 ); /* Number of available locks */ + assert( UNIX_SHM_BASE==120 ); /* Start of locking area */ + /* Locks: + ** WRITE UNIX_SHM_BASE 120 + ** CKPT UNIX_SHM_BASE+1 121 + ** RECOVER UNIX_SHM_BASE+2 122 + ** READ-0 UNIX_SHM_BASE+3 123 + ** READ-1 UNIX_SHM_BASE+4 124 + ** READ-2 UNIX_SHM_BASE+5 125 + ** READ-3 UNIX_SHM_BASE+6 126 + ** READ-4 UNIX_SHM_BASE+7 127 + ** DMS UNIX_SHM_BASE+8 128 + */ + assert( UNIX_SHM_DMS==128 ); /* Byte offset of the deadman-switch */ +#endif + + /* Initialize temp file dir array. */ + unixTempFileInit(); + + return SQLITE_OK; +} + +/* +** Shutdown the operating system interface. +** +** Some operating systems might need to do some cleanup in this routine, +** to release dynamically allocated objects. But not on unix. +** This routine is a no-op for unix. +*/ +SQLITE_API int sqlite3_os_end(void){ + unixBigLock = 0; + return SQLITE_OK; +} + +#endif /* SQLITE_OS_UNIX */ + +/************** End of os_unix.c *********************************************/ +/************** Begin file os_win.c ******************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to Windows. +*/ +/* #include "sqliteInt.h" */ +#if SQLITE_OS_WIN /* This file is used for Windows only */ + +/* +** Include code that is common to all os_*.c files +*/ +/* #include "os_common.h" */ + +/* +** Include the header file for the Windows VFS. +*/ +/* #include "os_win.h" */ + +/* +** Compiling and using WAL mode requires several APIs that are only +** available in Windows platforms based on the NT kernel. +*/ +#if !SQLITE_OS_WINNT && !defined(SQLITE_OMIT_WAL) +# error "WAL mode requires support from the Windows NT kernel, compile\ + with SQLITE_OMIT_WAL." +#endif + +#if !SQLITE_OS_WINNT && SQLITE_MAX_MMAP_SIZE>0 +# error "Memory mapped files require support from the Windows NT kernel,\ + compile with SQLITE_MAX_MMAP_SIZE=0." +#endif + +/* +** Are most of the Win32 ANSI APIs available (i.e. with certain exceptions +** based on the sub-platform)? +*/ +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(SQLITE_WIN32_NO_ANSI) +# define SQLITE_WIN32_HAS_ANSI +#endif + +/* +** Are most of the Win32 Unicode APIs available (i.e. with certain exceptions +** based on the sub-platform)? +*/ +#if (SQLITE_OS_WINCE || SQLITE_OS_WINNT || SQLITE_OS_WINRT) && \ + !defined(SQLITE_WIN32_NO_WIDE) +# define SQLITE_WIN32_HAS_WIDE +#endif + +/* +** Make sure at least one set of Win32 APIs is available. +*/ +#if !defined(SQLITE_WIN32_HAS_ANSI) && !defined(SQLITE_WIN32_HAS_WIDE) +# error "At least one of SQLITE_WIN32_HAS_ANSI and SQLITE_WIN32_HAS_WIDE\ + must be defined." +#endif + +/* +** Define the required Windows SDK version constants if they are not +** already available. +*/ +#ifndef NTDDI_WIN8 +# define NTDDI_WIN8 0x06020000 +#endif + +#ifndef NTDDI_WINBLUE +# define NTDDI_WINBLUE 0x06030000 +#endif + +#ifndef NTDDI_WINTHRESHOLD +# define NTDDI_WINTHRESHOLD 0x06040000 +#endif + +/* +** Check to see if the GetVersionEx[AW] functions are deprecated on the +** target system. GetVersionEx was first deprecated in Win8.1. +*/ +#ifndef SQLITE_WIN32_GETVERSIONEX +# if defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WINBLUE +# define SQLITE_WIN32_GETVERSIONEX 0 /* GetVersionEx() is deprecated */ +# else +# define SQLITE_WIN32_GETVERSIONEX 1 /* GetVersionEx() is current */ +# endif +#endif + +/* +** Check to see if the CreateFileMappingA function is supported on the +** target system. It is unavailable when using "mincore.lib" on Win10. +** When compiling for Windows 10, always assume "mincore.lib" is in use. +*/ +#ifndef SQLITE_WIN32_CREATEFILEMAPPINGA +# if defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WINTHRESHOLD +# define SQLITE_WIN32_CREATEFILEMAPPINGA 0 +# else +# define SQLITE_WIN32_CREATEFILEMAPPINGA 1 +# endif +#endif + +/* +** This constant should already be defined (in the "WinDef.h" SDK file). +*/ +#ifndef MAX_PATH +# define MAX_PATH (260) +#endif + +/* +** Maximum pathname length (in chars) for Win32. This should normally be +** MAX_PATH. +*/ +#ifndef SQLITE_WIN32_MAX_PATH_CHARS +# define SQLITE_WIN32_MAX_PATH_CHARS (MAX_PATH) +#endif + +/* +** This constant should already be defined (in the "WinNT.h" SDK file). +*/ +#ifndef UNICODE_STRING_MAX_CHARS +# define UNICODE_STRING_MAX_CHARS (32767) +#endif + +/* +** Maximum pathname length (in chars) for WinNT. This should normally be +** UNICODE_STRING_MAX_CHARS. +*/ +#ifndef SQLITE_WINNT_MAX_PATH_CHARS +# define SQLITE_WINNT_MAX_PATH_CHARS (UNICODE_STRING_MAX_CHARS) +#endif + +/* +** Maximum pathname length (in bytes) for Win32. The MAX_PATH macro is in +** characters, so we allocate 4 bytes per character assuming worst-case of +** 4-bytes-per-character for UTF8. +*/ +#ifndef SQLITE_WIN32_MAX_PATH_BYTES +# define SQLITE_WIN32_MAX_PATH_BYTES (SQLITE_WIN32_MAX_PATH_CHARS*4) +#endif + +/* +** Maximum pathname length (in bytes) for WinNT. This should normally be +** UNICODE_STRING_MAX_CHARS * sizeof(WCHAR). +*/ +#ifndef SQLITE_WINNT_MAX_PATH_BYTES +# define SQLITE_WINNT_MAX_PATH_BYTES \ + (sizeof(WCHAR) * SQLITE_WINNT_MAX_PATH_CHARS) +#endif + +/* +** Maximum error message length (in chars) for WinRT. +*/ +#ifndef SQLITE_WIN32_MAX_ERRMSG_CHARS +# define SQLITE_WIN32_MAX_ERRMSG_CHARS (1024) +#endif + +/* +** Returns non-zero if the character should be treated as a directory +** separator. +*/ +#ifndef winIsDirSep +# define winIsDirSep(a) (((a) == '/') || ((a) == '\\')) +#endif + +/* +** This macro is used when a local variable is set to a value that is +** [sometimes] not used by the code (e.g. via conditional compilation). +*/ +#ifndef UNUSED_VARIABLE_VALUE +# define UNUSED_VARIABLE_VALUE(x) (void)(x) +#endif + +/* +** Returns the character that should be used as the directory separator. +*/ +#ifndef winGetDirSep +# define winGetDirSep() '\\' +#endif + +/* +** Do we need to manually define the Win32 file mapping APIs for use with WAL +** mode or memory mapped files (e.g. these APIs are available in the Windows +** CE SDK; however, they are not present in the header file)? +*/ +#if SQLITE_WIN32_FILEMAPPING_API && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) +/* +** Two of the file mapping APIs are different under WinRT. Figure out which +** set we need. +*/ +#if SQLITE_OS_WINRT +WINBASEAPI HANDLE WINAPI CreateFileMappingFromApp(HANDLE, \ + LPSECURITY_ATTRIBUTES, ULONG, ULONG64, LPCWSTR); + +WINBASEAPI LPVOID WINAPI MapViewOfFileFromApp(HANDLE, ULONG, ULONG64, SIZE_T); +#else +#if defined(SQLITE_WIN32_HAS_ANSI) +WINBASEAPI HANDLE WINAPI CreateFileMappingA(HANDLE, LPSECURITY_ATTRIBUTES, \ + DWORD, DWORD, DWORD, LPCSTR); +#endif /* defined(SQLITE_WIN32_HAS_ANSI) */ + +#if defined(SQLITE_WIN32_HAS_WIDE) +WINBASEAPI HANDLE WINAPI CreateFileMappingW(HANDLE, LPSECURITY_ATTRIBUTES, \ + DWORD, DWORD, DWORD, LPCWSTR); +#endif /* defined(SQLITE_WIN32_HAS_WIDE) */ + +WINBASEAPI LPVOID WINAPI MapViewOfFile(HANDLE, DWORD, DWORD, DWORD, SIZE_T); +#endif /* SQLITE_OS_WINRT */ + +/* +** These file mapping APIs are common to both Win32 and WinRT. +*/ + +WINBASEAPI BOOL WINAPI FlushViewOfFile(LPCVOID, SIZE_T); +WINBASEAPI BOOL WINAPI UnmapViewOfFile(LPCVOID); +#endif /* SQLITE_WIN32_FILEMAPPING_API */ + +/* +** Some Microsoft compilers lack this definition. +*/ +#ifndef INVALID_FILE_ATTRIBUTES +# define INVALID_FILE_ATTRIBUTES ((DWORD)-1) +#endif + +#ifndef FILE_FLAG_MASK +# define FILE_FLAG_MASK (0xFF3C0000) +#endif + +#ifndef FILE_ATTRIBUTE_MASK +# define FILE_ATTRIBUTE_MASK (0x0003FFF7) +#endif + +#ifndef SQLITE_OMIT_WAL +/* Forward references to structures used for WAL */ +typedef struct winShm winShm; /* A connection to shared-memory */ +typedef struct winShmNode winShmNode; /* A region of shared-memory */ +#endif + +/* +** WinCE lacks native support for file locking so we have to fake it +** with some code of our own. +*/ +#if SQLITE_OS_WINCE +typedef struct winceLock { + int nReaders; /* Number of reader locks obtained */ + BOOL bPending; /* Indicates a pending lock has been obtained */ + BOOL bReserved; /* Indicates a reserved lock has been obtained */ + BOOL bExclusive; /* Indicates an exclusive lock has been obtained */ +} winceLock; +#endif + +/* +** The winFile structure is a subclass of sqlite3_file* specific to the win32 +** portability layer. +*/ +typedef struct winFile winFile; +struct winFile { + const sqlite3_io_methods *pMethod; /*** Must be first ***/ + sqlite3_vfs *pVfs; /* The VFS used to open this file */ + HANDLE h; /* Handle for accessing the file */ + u8 locktype; /* Type of lock currently held on this file */ + short sharedLockByte; /* Randomly chosen byte used as a shared lock */ + u8 ctrlFlags; /* Flags. See WINFILE_* below */ + DWORD lastErrno; /* The Windows errno from the last I/O error */ +#ifndef SQLITE_OMIT_WAL + winShm *pShm; /* Instance of shared memory on this file */ +#endif + const char *zPath; /* Full pathname of this file */ + int szChunk; /* Chunk size configured by FCNTL_CHUNK_SIZE */ +#if SQLITE_OS_WINCE + LPWSTR zDeleteOnClose; /* Name of file to delete when closing */ + HANDLE hMutex; /* Mutex used to control access to shared lock */ + HANDLE hShared; /* Shared memory segment used for locking */ + winceLock local; /* Locks obtained by this instance of winFile */ + winceLock *shared; /* Global shared lock memory for the file */ +#endif +#if SQLITE_MAX_MMAP_SIZE>0 + int nFetchOut; /* Number of outstanding xFetch references */ + HANDLE hMap; /* Handle for accessing memory mapping */ + void *pMapRegion; /* Area memory mapped */ + sqlite3_int64 mmapSize; /* Size of mapped region */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ +#endif +}; + +/* +** The winVfsAppData structure is used for the pAppData member for all of the +** Win32 VFS variants. +*/ +typedef struct winVfsAppData winVfsAppData; +struct winVfsAppData { + const sqlite3_io_methods *pMethod; /* The file I/O methods to use. */ + void *pAppData; /* The extra pAppData, if any. */ + BOOL bNoLock; /* Non-zero if locking is disabled. */ +}; + +/* +** Allowed values for winFile.ctrlFlags +*/ +#define WINFILE_RDONLY 0x02 /* Connection is read only */ +#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ + +/* + * The size of the buffer used by sqlite3_win32_write_debug(). + */ +#ifndef SQLITE_WIN32_DBG_BUF_SIZE +# define SQLITE_WIN32_DBG_BUF_SIZE ((int)(4096-sizeof(DWORD))) +#endif + +/* + * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the + * various Win32 API heap functions instead of our own. + */ +#ifdef SQLITE_WIN32_MALLOC + +/* + * If this is non-zero, an isolated heap will be created by the native Win32 + * allocator subsystem; otherwise, the default process heap will be used. This + * setting has no effect when compiling for WinRT. By default, this is enabled + * and an isolated heap will be created to store all allocated data. + * + ****************************************************************************** + * WARNING: It is important to note that when this setting is non-zero and the + * winMemShutdown function is called (e.g. by the sqlite3_shutdown + * function), all data that was allocated using the isolated heap will + * be freed immediately and any attempt to access any of that freed + * data will almost certainly result in an immediate access violation. + ****************************************************************************** + */ +#ifndef SQLITE_WIN32_HEAP_CREATE +# define SQLITE_WIN32_HEAP_CREATE (TRUE) +#endif + +/* + * This is the maximum possible initial size of the Win32-specific heap, in + * bytes. + */ +#ifndef SQLITE_WIN32_HEAP_MAX_INIT_SIZE +# define SQLITE_WIN32_HEAP_MAX_INIT_SIZE (4294967295U) +#endif + +/* + * This is the extra space for the initial size of the Win32-specific heap, + * in bytes. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_INIT_EXTRA +# define SQLITE_WIN32_HEAP_INIT_EXTRA (4194304) +#endif + +/* + * Calculate the maximum legal cache size, in pages, based on the maximum + * possible initial heap size and the default page size, setting aside the + * needed extra space. + */ +#ifndef SQLITE_WIN32_MAX_CACHE_SIZE +# define SQLITE_WIN32_MAX_CACHE_SIZE (((SQLITE_WIN32_HEAP_MAX_INIT_SIZE) - \ + (SQLITE_WIN32_HEAP_INIT_EXTRA)) / \ + (SQLITE_DEFAULT_PAGE_SIZE)) +#endif + +/* + * This is cache size used in the calculation of the initial size of the + * Win32-specific heap. It cannot be negative. + */ +#ifndef SQLITE_WIN32_CACHE_SIZE +# if SQLITE_DEFAULT_CACHE_SIZE>=0 +# define SQLITE_WIN32_CACHE_SIZE (SQLITE_DEFAULT_CACHE_SIZE) +# else +# define SQLITE_WIN32_CACHE_SIZE (-(SQLITE_DEFAULT_CACHE_SIZE)) +# endif +#endif + +/* + * Make sure that the calculated cache size, in pages, cannot cause the + * initial size of the Win32-specific heap to exceed the maximum amount + * of memory that can be specified in the call to HeapCreate. + */ +#if SQLITE_WIN32_CACHE_SIZE>SQLITE_WIN32_MAX_CACHE_SIZE +# undef SQLITE_WIN32_CACHE_SIZE +# define SQLITE_WIN32_CACHE_SIZE (2000) +#endif + +/* + * The initial size of the Win32-specific heap. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_INIT_SIZE +# define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_WIN32_CACHE_SIZE) * \ + (SQLITE_DEFAULT_PAGE_SIZE) + \ + (SQLITE_WIN32_HEAP_INIT_EXTRA)) +#endif + +/* + * The maximum size of the Win32-specific heap. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_MAX_SIZE +# define SQLITE_WIN32_HEAP_MAX_SIZE (0) +#endif + +/* + * The extra flags to use in calls to the Win32 heap APIs. This value may be + * zero for the default behavior. + */ +#ifndef SQLITE_WIN32_HEAP_FLAGS +# define SQLITE_WIN32_HEAP_FLAGS (0) +#endif + + +/* +** The winMemData structure stores information required by the Win32-specific +** sqlite3_mem_methods implementation. +*/ +typedef struct winMemData winMemData; +struct winMemData { +#ifndef NDEBUG + u32 magic1; /* Magic number to detect structure corruption. */ +#endif + HANDLE hHeap; /* The handle to our heap. */ + BOOL bOwned; /* Do we own the heap (i.e. destroy it on shutdown)? */ +#ifndef NDEBUG + u32 magic2; /* Magic number to detect structure corruption. */ +#endif +}; + +#ifndef NDEBUG +#define WINMEM_MAGIC1 0x42b2830b +#define WINMEM_MAGIC2 0xbd4d7cf4 +#endif + +static struct winMemData win_mem_data = { +#ifndef NDEBUG + WINMEM_MAGIC1, +#endif + NULL, FALSE +#ifndef NDEBUG + ,WINMEM_MAGIC2 +#endif +}; + +#ifndef NDEBUG +#define winMemAssertMagic1() assert( win_mem_data.magic1==WINMEM_MAGIC1 ) +#define winMemAssertMagic2() assert( win_mem_data.magic2==WINMEM_MAGIC2 ) +#define winMemAssertMagic() winMemAssertMagic1(); winMemAssertMagic2(); +#else +#define winMemAssertMagic() +#endif + +#define winMemGetDataPtr() &win_mem_data +#define winMemGetHeap() win_mem_data.hHeap +#define winMemGetOwned() win_mem_data.bOwned + +static void *winMemMalloc(int nBytes); +static void winMemFree(void *pPrior); +static void *winMemRealloc(void *pPrior, int nBytes); +static int winMemSize(void *p); +static int winMemRoundup(int n); +static int winMemInit(void *pAppData); +static void winMemShutdown(void *pAppData); + +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void); +#endif /* SQLITE_WIN32_MALLOC */ + +/* +** The following variable is (normally) set once and never changes +** thereafter. It records whether the operating system is Win9x +** or WinNT. +** +** 0: Operating system unknown. +** 1: Operating system is Win9x. +** 2: Operating system is WinNT. +** +** In order to facilitate testing on a WinNT system, the test fixture +** can manually set this value to 1 to emulate Win98 behavior. +*/ +#ifdef SQLITE_TEST +SQLITE_API LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0; +#else +static LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0; +#endif + +#ifndef SYSCALL +# define SYSCALL sqlite3_syscall_ptr +#endif + +/* +** This function is not available on Windows CE or WinRT. + */ + +#if SQLITE_OS_WINCE || SQLITE_OS_WINRT +# define osAreFileApisANSI() 1 +#endif + +/* +** Many system calls are accessed through pointer-to-functions so that +** they may be overridden at runtime to facilitate fault injection during +** testing and sandboxing. The following array holds the names and pointers +** to all overrideable system calls. +*/ +static struct win_syscall { + const char *zName; /* Name of the system call */ + sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ + sqlite3_syscall_ptr pDefault; /* Default value */ +} aSyscall[] = { +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "AreFileApisANSI", (SYSCALL)AreFileApisANSI, 0 }, +#else + { "AreFileApisANSI", (SYSCALL)0, 0 }, +#endif + +#ifndef osAreFileApisANSI +#define osAreFileApisANSI ((BOOL(WINAPI*)(VOID))aSyscall[0].pCurrent) +#endif + +#if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE) + { "CharLowerW", (SYSCALL)CharLowerW, 0 }, +#else + { "CharLowerW", (SYSCALL)0, 0 }, +#endif + +#define osCharLowerW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[1].pCurrent) + +#if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE) + { "CharUpperW", (SYSCALL)CharUpperW, 0 }, +#else + { "CharUpperW", (SYSCALL)0, 0 }, +#endif + +#define osCharUpperW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[2].pCurrent) + + { "CloseHandle", (SYSCALL)CloseHandle, 0 }, + +#define osCloseHandle ((BOOL(WINAPI*)(HANDLE))aSyscall[3].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "CreateFileA", (SYSCALL)CreateFileA, 0 }, +#else + { "CreateFileA", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileA ((HANDLE(WINAPI*)(LPCSTR,DWORD,DWORD, \ + LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[4].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "CreateFileW", (SYSCALL)CreateFileW, 0 }, +#else + { "CreateFileW", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileW ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD, \ + LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[5].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_ANSI) && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) && \ + SQLITE_WIN32_CREATEFILEMAPPINGA + { "CreateFileMappingA", (SYSCALL)CreateFileMappingA, 0 }, +#else + { "CreateFileMappingA", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileMappingA ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \ + DWORD,DWORD,DWORD,LPCSTR))aSyscall[6].pCurrent) + +#if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) + { "CreateFileMappingW", (SYSCALL)CreateFileMappingW, 0 }, +#else + { "CreateFileMappingW", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileMappingW ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \ + DWORD,DWORD,DWORD,LPCWSTR))aSyscall[7].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "CreateMutexW", (SYSCALL)CreateMutexW, 0 }, +#else + { "CreateMutexW", (SYSCALL)0, 0 }, +#endif + +#define osCreateMutexW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,BOOL, \ + LPCWSTR))aSyscall[8].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "DeleteFileA", (SYSCALL)DeleteFileA, 0 }, +#else + { "DeleteFileA", (SYSCALL)0, 0 }, +#endif + +#define osDeleteFileA ((BOOL(WINAPI*)(LPCSTR))aSyscall[9].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "DeleteFileW", (SYSCALL)DeleteFileW, 0 }, +#else + { "DeleteFileW", (SYSCALL)0, 0 }, +#endif + +#define osDeleteFileW ((BOOL(WINAPI*)(LPCWSTR))aSyscall[10].pCurrent) + +#if SQLITE_OS_WINCE + { "FileTimeToLocalFileTime", (SYSCALL)FileTimeToLocalFileTime, 0 }, +#else + { "FileTimeToLocalFileTime", (SYSCALL)0, 0 }, +#endif + +#define osFileTimeToLocalFileTime ((BOOL(WINAPI*)(CONST FILETIME*, \ + LPFILETIME))aSyscall[11].pCurrent) + +#if SQLITE_OS_WINCE + { "FileTimeToSystemTime", (SYSCALL)FileTimeToSystemTime, 0 }, +#else + { "FileTimeToSystemTime", (SYSCALL)0, 0 }, +#endif + +#define osFileTimeToSystemTime ((BOOL(WINAPI*)(CONST FILETIME*, \ + LPSYSTEMTIME))aSyscall[12].pCurrent) + + { "FlushFileBuffers", (SYSCALL)FlushFileBuffers, 0 }, + +#define osFlushFileBuffers ((BOOL(WINAPI*)(HANDLE))aSyscall[13].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "FormatMessageA", (SYSCALL)FormatMessageA, 0 }, +#else + { "FormatMessageA", (SYSCALL)0, 0 }, +#endif + +#define osFormatMessageA ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPSTR, \ + DWORD,va_list*))aSyscall[14].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "FormatMessageW", (SYSCALL)FormatMessageW, 0 }, +#else + { "FormatMessageW", (SYSCALL)0, 0 }, +#endif + +#define osFormatMessageW ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPWSTR, \ + DWORD,va_list*))aSyscall[15].pCurrent) + +#if !defined(SQLITE_OMIT_LOAD_EXTENSION) + { "FreeLibrary", (SYSCALL)FreeLibrary, 0 }, +#else + { "FreeLibrary", (SYSCALL)0, 0 }, +#endif + +#define osFreeLibrary ((BOOL(WINAPI*)(HMODULE))aSyscall[16].pCurrent) + + { "GetCurrentProcessId", (SYSCALL)GetCurrentProcessId, 0 }, + +#define osGetCurrentProcessId ((DWORD(WINAPI*)(VOID))aSyscall[17].pCurrent) + +#if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI) + { "GetDiskFreeSpaceA", (SYSCALL)GetDiskFreeSpaceA, 0 }, +#else + { "GetDiskFreeSpaceA", (SYSCALL)0, 0 }, +#endif + +#define osGetDiskFreeSpaceA ((BOOL(WINAPI*)(LPCSTR,LPDWORD,LPDWORD,LPDWORD, \ + LPDWORD))aSyscall[18].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetDiskFreeSpaceW", (SYSCALL)GetDiskFreeSpaceW, 0 }, +#else + { "GetDiskFreeSpaceW", (SYSCALL)0, 0 }, +#endif + +#define osGetDiskFreeSpaceW ((BOOL(WINAPI*)(LPCWSTR,LPDWORD,LPDWORD,LPDWORD, \ + LPDWORD))aSyscall[19].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "GetFileAttributesA", (SYSCALL)GetFileAttributesA, 0 }, +#else + { "GetFileAttributesA", (SYSCALL)0, 0 }, +#endif + +#define osGetFileAttributesA ((DWORD(WINAPI*)(LPCSTR))aSyscall[20].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetFileAttributesW", (SYSCALL)GetFileAttributesW, 0 }, +#else + { "GetFileAttributesW", (SYSCALL)0, 0 }, +#endif + +#define osGetFileAttributesW ((DWORD(WINAPI*)(LPCWSTR))aSyscall[21].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "GetFileAttributesExW", (SYSCALL)GetFileAttributesExW, 0 }, +#else + { "GetFileAttributesExW", (SYSCALL)0, 0 }, +#endif + +#define osGetFileAttributesExW ((BOOL(WINAPI*)(LPCWSTR,GET_FILEEX_INFO_LEVELS, \ + LPVOID))aSyscall[22].pCurrent) + +#if !SQLITE_OS_WINRT + { "GetFileSize", (SYSCALL)GetFileSize, 0 }, +#else + { "GetFileSize", (SYSCALL)0, 0 }, +#endif + +#define osGetFileSize ((DWORD(WINAPI*)(HANDLE,LPDWORD))aSyscall[23].pCurrent) + +#if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI) + { "GetFullPathNameA", (SYSCALL)GetFullPathNameA, 0 }, +#else + { "GetFullPathNameA", (SYSCALL)0, 0 }, +#endif + +#define osGetFullPathNameA ((DWORD(WINAPI*)(LPCSTR,DWORD,LPSTR, \ + LPSTR*))aSyscall[24].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetFullPathNameW", (SYSCALL)GetFullPathNameW, 0 }, +#else + { "GetFullPathNameW", (SYSCALL)0, 0 }, +#endif + +#define osGetFullPathNameW ((DWORD(WINAPI*)(LPCWSTR,DWORD,LPWSTR, \ + LPWSTR*))aSyscall[25].pCurrent) + + { "GetLastError", (SYSCALL)GetLastError, 0 }, + +#define osGetLastError ((DWORD(WINAPI*)(VOID))aSyscall[26].pCurrent) + +#if !defined(SQLITE_OMIT_LOAD_EXTENSION) +#if SQLITE_OS_WINCE + /* The GetProcAddressA() routine is only available on Windows CE. */ + { "GetProcAddressA", (SYSCALL)GetProcAddressA, 0 }, +#else + /* All other Windows platforms expect GetProcAddress() to take + ** an ANSI string regardless of the _UNICODE setting */ + { "GetProcAddressA", (SYSCALL)GetProcAddress, 0 }, +#endif +#else + { "GetProcAddressA", (SYSCALL)0, 0 }, +#endif + +#define osGetProcAddressA ((FARPROC(WINAPI*)(HMODULE, \ + LPCSTR))aSyscall[27].pCurrent) + +#if !SQLITE_OS_WINRT + { "GetSystemInfo", (SYSCALL)GetSystemInfo, 0 }, +#else + { "GetSystemInfo", (SYSCALL)0, 0 }, +#endif + +#define osGetSystemInfo ((VOID(WINAPI*)(LPSYSTEM_INFO))aSyscall[28].pCurrent) + + { "GetSystemTime", (SYSCALL)GetSystemTime, 0 }, + +#define osGetSystemTime ((VOID(WINAPI*)(LPSYSTEMTIME))aSyscall[29].pCurrent) + +#if !SQLITE_OS_WINCE + { "GetSystemTimeAsFileTime", (SYSCALL)GetSystemTimeAsFileTime, 0 }, +#else + { "GetSystemTimeAsFileTime", (SYSCALL)0, 0 }, +#endif + +#define osGetSystemTimeAsFileTime ((VOID(WINAPI*)( \ + LPFILETIME))aSyscall[30].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "GetTempPathA", (SYSCALL)GetTempPathA, 0 }, +#else + { "GetTempPathA", (SYSCALL)0, 0 }, +#endif + +#define osGetTempPathA ((DWORD(WINAPI*)(DWORD,LPSTR))aSyscall[31].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetTempPathW", (SYSCALL)GetTempPathW, 0 }, +#else + { "GetTempPathW", (SYSCALL)0, 0 }, +#endif + +#define osGetTempPathW ((DWORD(WINAPI*)(DWORD,LPWSTR))aSyscall[32].pCurrent) + +#if !SQLITE_OS_WINRT + { "GetTickCount", (SYSCALL)GetTickCount, 0 }, +#else + { "GetTickCount", (SYSCALL)0, 0 }, +#endif + +#define osGetTickCount ((DWORD(WINAPI*)(VOID))aSyscall[33].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) && SQLITE_WIN32_GETVERSIONEX + { "GetVersionExA", (SYSCALL)GetVersionExA, 0 }, +#else + { "GetVersionExA", (SYSCALL)0, 0 }, +#endif + +#define osGetVersionExA ((BOOL(WINAPI*)( \ + LPOSVERSIONINFOA))aSyscall[34].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ + SQLITE_WIN32_GETVERSIONEX + { "GetVersionExW", (SYSCALL)GetVersionExW, 0 }, +#else + { "GetVersionExW", (SYSCALL)0, 0 }, +#endif + +#define osGetVersionExW ((BOOL(WINAPI*)( \ + LPOSVERSIONINFOW))aSyscall[35].pCurrent) + + { "HeapAlloc", (SYSCALL)HeapAlloc, 0 }, + +#define osHeapAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD, \ + SIZE_T))aSyscall[36].pCurrent) + +#if !SQLITE_OS_WINRT + { "HeapCreate", (SYSCALL)HeapCreate, 0 }, +#else + { "HeapCreate", (SYSCALL)0, 0 }, +#endif + +#define osHeapCreate ((HANDLE(WINAPI*)(DWORD,SIZE_T, \ + SIZE_T))aSyscall[37].pCurrent) + +#if !SQLITE_OS_WINRT + { "HeapDestroy", (SYSCALL)HeapDestroy, 0 }, +#else + { "HeapDestroy", (SYSCALL)0, 0 }, +#endif + +#define osHeapDestroy ((BOOL(WINAPI*)(HANDLE))aSyscall[38].pCurrent) + + { "HeapFree", (SYSCALL)HeapFree, 0 }, + +#define osHeapFree ((BOOL(WINAPI*)(HANDLE,DWORD,LPVOID))aSyscall[39].pCurrent) + + { "HeapReAlloc", (SYSCALL)HeapReAlloc, 0 }, + +#define osHeapReAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD,LPVOID, \ + SIZE_T))aSyscall[40].pCurrent) + + { "HeapSize", (SYSCALL)HeapSize, 0 }, + +#define osHeapSize ((SIZE_T(WINAPI*)(HANDLE,DWORD, \ + LPCVOID))aSyscall[41].pCurrent) + +#if !SQLITE_OS_WINRT + { "HeapValidate", (SYSCALL)HeapValidate, 0 }, +#else + { "HeapValidate", (SYSCALL)0, 0 }, +#endif + +#define osHeapValidate ((BOOL(WINAPI*)(HANDLE,DWORD, \ + LPCVOID))aSyscall[42].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "HeapCompact", (SYSCALL)HeapCompact, 0 }, +#else + { "HeapCompact", (SYSCALL)0, 0 }, +#endif + +#define osHeapCompact ((UINT(WINAPI*)(HANDLE,DWORD))aSyscall[43].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) && !defined(SQLITE_OMIT_LOAD_EXTENSION) + { "LoadLibraryA", (SYSCALL)LoadLibraryA, 0 }, +#else + { "LoadLibraryA", (SYSCALL)0, 0 }, +#endif + +#define osLoadLibraryA ((HMODULE(WINAPI*)(LPCSTR))aSyscall[44].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ + !defined(SQLITE_OMIT_LOAD_EXTENSION) + { "LoadLibraryW", (SYSCALL)LoadLibraryW, 0 }, +#else + { "LoadLibraryW", (SYSCALL)0, 0 }, +#endif + +#define osLoadLibraryW ((HMODULE(WINAPI*)(LPCWSTR))aSyscall[45].pCurrent) + +#if !SQLITE_OS_WINRT + { "LocalFree", (SYSCALL)LocalFree, 0 }, +#else + { "LocalFree", (SYSCALL)0, 0 }, +#endif + +#define osLocalFree ((HLOCAL(WINAPI*)(HLOCAL))aSyscall[46].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "LockFile", (SYSCALL)LockFile, 0 }, +#else + { "LockFile", (SYSCALL)0, 0 }, +#endif + +#ifndef osLockFile +#define osLockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + DWORD))aSyscall[47].pCurrent) +#endif + +#if !SQLITE_OS_WINCE + { "LockFileEx", (SYSCALL)LockFileEx, 0 }, +#else + { "LockFileEx", (SYSCALL)0, 0 }, +#endif + +#ifndef osLockFileEx +#define osLockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD,DWORD, \ + LPOVERLAPPED))aSyscall[48].pCurrent) +#endif + +#if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) + { "MapViewOfFile", (SYSCALL)MapViewOfFile, 0 }, +#else + { "MapViewOfFile", (SYSCALL)0, 0 }, +#endif + +#define osMapViewOfFile ((LPVOID(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + SIZE_T))aSyscall[49].pCurrent) + + { "MultiByteToWideChar", (SYSCALL)MultiByteToWideChar, 0 }, + +#define osMultiByteToWideChar ((int(WINAPI*)(UINT,DWORD,LPCSTR,int,LPWSTR, \ + int))aSyscall[50].pCurrent) + + { "QueryPerformanceCounter", (SYSCALL)QueryPerformanceCounter, 0 }, + +#define osQueryPerformanceCounter ((BOOL(WINAPI*)( \ + LARGE_INTEGER*))aSyscall[51].pCurrent) + + { "ReadFile", (SYSCALL)ReadFile, 0 }, + +#define osReadFile ((BOOL(WINAPI*)(HANDLE,LPVOID,DWORD,LPDWORD, \ + LPOVERLAPPED))aSyscall[52].pCurrent) + + { "SetEndOfFile", (SYSCALL)SetEndOfFile, 0 }, + +#define osSetEndOfFile ((BOOL(WINAPI*)(HANDLE))aSyscall[53].pCurrent) + +#if !SQLITE_OS_WINRT + { "SetFilePointer", (SYSCALL)SetFilePointer, 0 }, +#else + { "SetFilePointer", (SYSCALL)0, 0 }, +#endif + +#define osSetFilePointer ((DWORD(WINAPI*)(HANDLE,LONG,PLONG, \ + DWORD))aSyscall[54].pCurrent) + +#if !SQLITE_OS_WINRT + { "Sleep", (SYSCALL)Sleep, 0 }, +#else + { "Sleep", (SYSCALL)0, 0 }, +#endif + +#define osSleep ((VOID(WINAPI*)(DWORD))aSyscall[55].pCurrent) + + { "SystemTimeToFileTime", (SYSCALL)SystemTimeToFileTime, 0 }, + +#define osSystemTimeToFileTime ((BOOL(WINAPI*)(CONST SYSTEMTIME*, \ + LPFILETIME))aSyscall[56].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "UnlockFile", (SYSCALL)UnlockFile, 0 }, +#else + { "UnlockFile", (SYSCALL)0, 0 }, +#endif + +#ifndef osUnlockFile +#define osUnlockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + DWORD))aSyscall[57].pCurrent) +#endif + +#if !SQLITE_OS_WINCE + { "UnlockFileEx", (SYSCALL)UnlockFileEx, 0 }, +#else + { "UnlockFileEx", (SYSCALL)0, 0 }, +#endif + +#define osUnlockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + LPOVERLAPPED))aSyscall[58].pCurrent) + +#if SQLITE_OS_WINCE || !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "UnmapViewOfFile", (SYSCALL)UnmapViewOfFile, 0 }, +#else + { "UnmapViewOfFile", (SYSCALL)0, 0 }, +#endif + +#define osUnmapViewOfFile ((BOOL(WINAPI*)(LPCVOID))aSyscall[59].pCurrent) + + { "WideCharToMultiByte", (SYSCALL)WideCharToMultiByte, 0 }, + +#define osWideCharToMultiByte ((int(WINAPI*)(UINT,DWORD,LPCWSTR,int,LPSTR,int, \ + LPCSTR,LPBOOL))aSyscall[60].pCurrent) + + { "WriteFile", (SYSCALL)WriteFile, 0 }, + +#define osWriteFile ((BOOL(WINAPI*)(HANDLE,LPCVOID,DWORD,LPDWORD, \ + LPOVERLAPPED))aSyscall[61].pCurrent) + +#if SQLITE_OS_WINRT + { "CreateEventExW", (SYSCALL)CreateEventExW, 0 }, +#else + { "CreateEventExW", (SYSCALL)0, 0 }, +#endif + +#define osCreateEventExW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,LPCWSTR, \ + DWORD,DWORD))aSyscall[62].pCurrent) + +#if !SQLITE_OS_WINRT + { "WaitForSingleObject", (SYSCALL)WaitForSingleObject, 0 }, +#else + { "WaitForSingleObject", (SYSCALL)0, 0 }, +#endif + +#define osWaitForSingleObject ((DWORD(WINAPI*)(HANDLE, \ + DWORD))aSyscall[63].pCurrent) + +#if !SQLITE_OS_WINCE + { "WaitForSingleObjectEx", (SYSCALL)WaitForSingleObjectEx, 0 }, +#else + { "WaitForSingleObjectEx", (SYSCALL)0, 0 }, +#endif + +#define osWaitForSingleObjectEx ((DWORD(WINAPI*)(HANDLE,DWORD, \ + BOOL))aSyscall[64].pCurrent) + +#if SQLITE_OS_WINRT + { "SetFilePointerEx", (SYSCALL)SetFilePointerEx, 0 }, +#else + { "SetFilePointerEx", (SYSCALL)0, 0 }, +#endif + +#define osSetFilePointerEx ((BOOL(WINAPI*)(HANDLE,LARGE_INTEGER, \ + PLARGE_INTEGER,DWORD))aSyscall[65].pCurrent) + +#if SQLITE_OS_WINRT + { "GetFileInformationByHandleEx", (SYSCALL)GetFileInformationByHandleEx, 0 }, +#else + { "GetFileInformationByHandleEx", (SYSCALL)0, 0 }, +#endif + +#define osGetFileInformationByHandleEx ((BOOL(WINAPI*)(HANDLE, \ + FILE_INFO_BY_HANDLE_CLASS,LPVOID,DWORD))aSyscall[66].pCurrent) + +#if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) + { "MapViewOfFileFromApp", (SYSCALL)MapViewOfFileFromApp, 0 }, +#else + { "MapViewOfFileFromApp", (SYSCALL)0, 0 }, +#endif + +#define osMapViewOfFileFromApp ((LPVOID(WINAPI*)(HANDLE,ULONG,ULONG64, \ + SIZE_T))aSyscall[67].pCurrent) + +#if SQLITE_OS_WINRT + { "CreateFile2", (SYSCALL)CreateFile2, 0 }, +#else + { "CreateFile2", (SYSCALL)0, 0 }, +#endif + +#define osCreateFile2 ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD,DWORD, \ + LPCREATEFILE2_EXTENDED_PARAMETERS))aSyscall[68].pCurrent) + +#if SQLITE_OS_WINRT && !defined(SQLITE_OMIT_LOAD_EXTENSION) + { "LoadPackagedLibrary", (SYSCALL)LoadPackagedLibrary, 0 }, +#else + { "LoadPackagedLibrary", (SYSCALL)0, 0 }, +#endif + +#define osLoadPackagedLibrary ((HMODULE(WINAPI*)(LPCWSTR, \ + DWORD))aSyscall[69].pCurrent) + +#if SQLITE_OS_WINRT + { "GetTickCount64", (SYSCALL)GetTickCount64, 0 }, +#else + { "GetTickCount64", (SYSCALL)0, 0 }, +#endif + +#define osGetTickCount64 ((ULONGLONG(WINAPI*)(VOID))aSyscall[70].pCurrent) + +#if SQLITE_OS_WINRT + { "GetNativeSystemInfo", (SYSCALL)GetNativeSystemInfo, 0 }, +#else + { "GetNativeSystemInfo", (SYSCALL)0, 0 }, +#endif + +#define osGetNativeSystemInfo ((VOID(WINAPI*)( \ + LPSYSTEM_INFO))aSyscall[71].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "OutputDebugStringA", (SYSCALL)OutputDebugStringA, 0 }, +#else + { "OutputDebugStringA", (SYSCALL)0, 0 }, +#endif + +#define osOutputDebugStringA ((VOID(WINAPI*)(LPCSTR))aSyscall[72].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "OutputDebugStringW", (SYSCALL)OutputDebugStringW, 0 }, +#else + { "OutputDebugStringW", (SYSCALL)0, 0 }, +#endif + +#define osOutputDebugStringW ((VOID(WINAPI*)(LPCWSTR))aSyscall[73].pCurrent) + + { "GetProcessHeap", (SYSCALL)GetProcessHeap, 0 }, + +#define osGetProcessHeap ((HANDLE(WINAPI*)(VOID))aSyscall[74].pCurrent) + +#if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) + { "CreateFileMappingFromApp", (SYSCALL)CreateFileMappingFromApp, 0 }, +#else + { "CreateFileMappingFromApp", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileMappingFromApp ((HANDLE(WINAPI*)(HANDLE, \ + LPSECURITY_ATTRIBUTES,ULONG,ULONG64,LPCWSTR))aSyscall[75].pCurrent) + +/* +** NOTE: On some sub-platforms, the InterlockedCompareExchange "function" +** is really just a macro that uses a compiler intrinsic (e.g. x64). +** So do not try to make this is into a redefinable interface. +*/ +#if defined(InterlockedCompareExchange) + { "InterlockedCompareExchange", (SYSCALL)0, 0 }, + +#define osInterlockedCompareExchange InterlockedCompareExchange +#else + { "InterlockedCompareExchange", (SYSCALL)InterlockedCompareExchange, 0 }, + +#define osInterlockedCompareExchange ((LONG(WINAPI*)(LONG \ + SQLITE_WIN32_VOLATILE*, LONG,LONG))aSyscall[76].pCurrent) +#endif /* defined(InterlockedCompareExchange) */ + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID + { "UuidCreate", (SYSCALL)UuidCreate, 0 }, +#else + { "UuidCreate", (SYSCALL)0, 0 }, +#endif + +#define osUuidCreate ((RPC_STATUS(RPC_ENTRY*)(UUID*))aSyscall[77].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID + { "UuidCreateSequential", (SYSCALL)UuidCreateSequential, 0 }, +#else + { "UuidCreateSequential", (SYSCALL)0, 0 }, +#endif + +#define osUuidCreateSequential \ + ((RPC_STATUS(RPC_ENTRY*)(UUID*))aSyscall[78].pCurrent) + +#if !defined(SQLITE_NO_SYNC) && SQLITE_MAX_MMAP_SIZE>0 + { "FlushViewOfFile", (SYSCALL)FlushViewOfFile, 0 }, +#else + { "FlushViewOfFile", (SYSCALL)0, 0 }, +#endif + +#define osFlushViewOfFile \ + ((BOOL(WINAPI*)(LPCVOID,SIZE_T))aSyscall[79].pCurrent) + +}; /* End of the overrideable system calls */ + +/* +** This is the xSetSystemCall() method of sqlite3_vfs for all of the +** "win32" VFSes. Return SQLITE_OK opon successfully updating the +** system call pointer, or SQLITE_NOTFOUND if there is no configurable +** system call named zName. +*/ +static int winSetSystemCall( + sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ + const char *zName, /* Name of system call to override */ + sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ +){ + unsigned int i; + int rc = SQLITE_NOTFOUND; + + UNUSED_PARAMETER(pNotUsed); + if( zName==0 ){ + /* If no zName is given, restore all system calls to their default + ** settings and return NULL + */ + rc = SQLITE_OK; + for(i=0; i0 ){ + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + memcpy(zDbgBuf, zBuf, nMin); + osOutputDebugStringA(zDbgBuf); + }else{ + osOutputDebugStringA(zBuf); + } +#elif defined(SQLITE_WIN32_HAS_WIDE) + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + if ( osMultiByteToWideChar( + osAreFileApisANSI() ? CP_ACP : CP_OEMCP, 0, zBuf, + nMin, (LPWSTR)zDbgBuf, SQLITE_WIN32_DBG_BUF_SIZE/sizeof(WCHAR))<=0 ){ + return; + } + osOutputDebugStringW((LPCWSTR)zDbgBuf); +#else + if( nMin>0 ){ + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + memcpy(zDbgBuf, zBuf, nMin); + fprintf(stderr, "%s", zDbgBuf); + }else{ + fprintf(stderr, "%s", zBuf); + } +#endif +} + +/* +** The following routine suspends the current thread for at least ms +** milliseconds. This is equivalent to the Win32 Sleep() interface. +*/ +#if SQLITE_OS_WINRT +static HANDLE sleepObj = NULL; +#endif + +SQLITE_API void sqlite3_win32_sleep(DWORD milliseconds){ +#if SQLITE_OS_WINRT + if ( sleepObj==NULL ){ + sleepObj = osCreateEventExW(NULL, NULL, CREATE_EVENT_MANUAL_RESET, + SYNCHRONIZE); + } + assert( sleepObj!=NULL ); + osWaitForSingleObjectEx(sleepObj, milliseconds, FALSE); +#else + osSleep(milliseconds); +#endif +} + +#if SQLITE_MAX_WORKER_THREADS>0 && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ + SQLITE_THREADSAFE>0 +SQLITE_PRIVATE DWORD sqlite3Win32Wait(HANDLE hObject){ + DWORD rc; + while( (rc = osWaitForSingleObjectEx(hObject, INFINITE, + TRUE))==WAIT_IO_COMPLETION ){} + return rc; +} +#endif + +/* +** Return true (non-zero) if we are running under WinNT, Win2K, WinXP, +** or WinCE. Return false (zero) for Win95, Win98, or WinME. +** +** Here is an interesting observation: Win95, Win98, and WinME lack +** the LockFileEx() API. But we can still statically link against that +** API as long as we don't call it when running Win95/98/ME. A call to +** this routine is used to determine if the host is Win95/98/ME or +** WinNT/2K/XP so that we will know whether or not we can safely call +** the LockFileEx() API. +*/ + +#if !SQLITE_WIN32_GETVERSIONEX +# define osIsNT() (1) +#elif SQLITE_OS_WINCE || SQLITE_OS_WINRT || !defined(SQLITE_WIN32_HAS_ANSI) +# define osIsNT() (1) +#elif !defined(SQLITE_WIN32_HAS_WIDE) +# define osIsNT() (0) +#else +# define osIsNT() ((sqlite3_os_type==2) || sqlite3_win32_is_nt()) +#endif + +/* +** This function determines if the machine is running a version of Windows +** based on the NT kernel. +*/ +SQLITE_API int sqlite3_win32_is_nt(void){ +#if SQLITE_OS_WINRT + /* + ** NOTE: The WinRT sub-platform is always assumed to be based on the NT + ** kernel. + */ + return 1; +#elif SQLITE_WIN32_GETVERSIONEX + if( osInterlockedCompareExchange(&sqlite3_os_type, 0, 0)==0 ){ +#if defined(SQLITE_WIN32_HAS_ANSI) + OSVERSIONINFOA sInfo; + sInfo.dwOSVersionInfoSize = sizeof(sInfo); + osGetVersionExA(&sInfo); + osInterlockedCompareExchange(&sqlite3_os_type, + (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0); +#elif defined(SQLITE_WIN32_HAS_WIDE) + OSVERSIONINFOW sInfo; + sInfo.dwOSVersionInfoSize = sizeof(sInfo); + osGetVersionExW(&sInfo); + osInterlockedCompareExchange(&sqlite3_os_type, + (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0); +#endif + } + return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2; +#elif SQLITE_TEST + return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2; +#else + /* + ** NOTE: All sub-platforms where the GetVersionEx[AW] functions are + ** deprecated are always assumed to be based on the NT kernel. + */ + return 1; +#endif +} + +#ifdef SQLITE_WIN32_MALLOC +/* +** Allocate nBytes of memory. +*/ +static void *winMemMalloc(int nBytes){ + HANDLE hHeap; + void *p; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + assert( nBytes>=0 ); + p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); + if( !p ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%lu), heap=%p", + nBytes, osGetLastError(), (void*)hHeap); + } + return p; +} + +/* +** Free memory. +*/ +static void winMemFree(void *pPrior){ + HANDLE hHeap; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); +#endif + if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */ + if( !osHeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%lu), heap=%p", + pPrior, osGetLastError(), (void*)hHeap); + } +} + +/* +** Change the size of an existing memory allocation +*/ +static void *winMemRealloc(void *pPrior, int nBytes){ + HANDLE hHeap; + void *p; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); +#endif + assert( nBytes>=0 ); + if( !pPrior ){ + p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); + }else{ + p = osHeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes); + } + if( !p ){ + sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%lu), heap=%p", + pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, osGetLastError(), + (void*)hHeap); + } + return p; +} + +/* +** Return the size of an outstanding allocation, in bytes. +*/ +static int winMemSize(void *p){ + HANDLE hHeap; + SIZE_T n; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, p) ); +#endif + if( !p ) return 0; + n = osHeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p); + if( n==(SIZE_T)-1 ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%lu), heap=%p", + p, osGetLastError(), (void*)hHeap); + return 0; + } + return (int)n; +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int winMemRoundup(int n){ + return n; +} + +/* +** Initialize this module. +*/ +static int winMemInit(void *pAppData){ + winMemData *pWinMemData = (winMemData *)pAppData; + + if( !pWinMemData ) return SQLITE_ERROR; + assert( pWinMemData->magic1==WINMEM_MAGIC1 ); + assert( pWinMemData->magic2==WINMEM_MAGIC2 ); + +#if !SQLITE_OS_WINRT && SQLITE_WIN32_HEAP_CREATE + if( !pWinMemData->hHeap ){ + DWORD dwInitialSize = SQLITE_WIN32_HEAP_INIT_SIZE; + DWORD dwMaximumSize = (DWORD)sqlite3GlobalConfig.nHeap; + if( dwMaximumSize==0 ){ + dwMaximumSize = SQLITE_WIN32_HEAP_MAX_SIZE; + }else if( dwInitialSize>dwMaximumSize ){ + dwInitialSize = dwMaximumSize; + } + pWinMemData->hHeap = osHeapCreate(SQLITE_WIN32_HEAP_FLAGS, + dwInitialSize, dwMaximumSize); + if( !pWinMemData->hHeap ){ + sqlite3_log(SQLITE_NOMEM, + "failed to HeapCreate (%lu), flags=%u, initSize=%lu, maxSize=%lu", + osGetLastError(), SQLITE_WIN32_HEAP_FLAGS, dwInitialSize, + dwMaximumSize); + return SQLITE_NOMEM_BKPT; + } + pWinMemData->bOwned = TRUE; + assert( pWinMemData->bOwned ); + } +#else + pWinMemData->hHeap = osGetProcessHeap(); + if( !pWinMemData->hHeap ){ + sqlite3_log(SQLITE_NOMEM, + "failed to GetProcessHeap (%lu)", osGetLastError()); + return SQLITE_NOMEM_BKPT; + } + pWinMemData->bOwned = FALSE; + assert( !pWinMemData->bOwned ); +#endif + assert( pWinMemData->hHeap!=0 ); + assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + return SQLITE_OK; +} + +/* +** Deinitialize this module. +*/ +static void winMemShutdown(void *pAppData){ + winMemData *pWinMemData = (winMemData *)pAppData; + + if( !pWinMemData ) return; + assert( pWinMemData->magic1==WINMEM_MAGIC1 ); + assert( pWinMemData->magic2==WINMEM_MAGIC2 ); + + if( pWinMemData->hHeap ){ + assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + if( pWinMemData->bOwned ){ + if( !osHeapDestroy(pWinMemData->hHeap) ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%lu), heap=%p", + osGetLastError(), (void*)pWinMemData->hHeap); + } + pWinMemData->bOwned = FALSE; + } + pWinMemData->hHeap = NULL; + } +} + +/* +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. The +** arguments specify the block of memory to manage. +** +** This routine is only called by sqlite3_config(), and therefore +** is not required to be threadsafe (it is not). +*/ +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void){ + static const sqlite3_mem_methods winMemMethods = { + winMemMalloc, + winMemFree, + winMemRealloc, + winMemSize, + winMemRoundup, + winMemInit, + winMemShutdown, + &win_mem_data + }; + return &winMemMethods; +} + +SQLITE_PRIVATE void sqlite3MemSetDefault(void){ + sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32()); +} +#endif /* SQLITE_WIN32_MALLOC */ + +/* +** Convert a UTF-8 string to Microsoft Unicode. +** +** Space to hold the returned string is obtained from sqlite3_malloc(). +*/ +static LPWSTR winUtf8ToUnicode(const char *zText){ + int nChar; + LPWSTR zWideText; + + nChar = osMultiByteToWideChar(CP_UTF8, 0, zText, -1, NULL, 0); + if( nChar==0 ){ + return 0; + } + zWideText = sqlite3MallocZero( nChar*sizeof(WCHAR) ); + if( zWideText==0 ){ + return 0; + } + nChar = osMultiByteToWideChar(CP_UTF8, 0, zText, -1, zWideText, + nChar); + if( nChar==0 ){ + sqlite3_free(zWideText); + zWideText = 0; + } + return zWideText; +} + +/* +** Convert a Microsoft Unicode string to UTF-8. +** +** Space to hold the returned string is obtained from sqlite3_malloc(). +*/ +static char *winUnicodeToUtf8(LPCWSTR zWideText){ + int nByte; + char *zText; + + nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideText, -1, 0, 0, 0, 0); + if( nByte == 0 ){ + return 0; + } + zText = sqlite3MallocZero( nByte ); + if( zText==0 ){ + return 0; + } + nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideText, -1, zText, nByte, + 0, 0); + if( nByte == 0 ){ + sqlite3_free(zText); + zText = 0; + } + return zText; +} + +/* +** Convert an ANSI string to Microsoft Unicode, using the ANSI or OEM +** code page. +** +** Space to hold the returned string is obtained from sqlite3_malloc(). +*/ +static LPWSTR winMbcsToUnicode(const char *zText, int useAnsi){ + int nByte; + LPWSTR zMbcsText; + int codepage = useAnsi ? CP_ACP : CP_OEMCP; + + nByte = osMultiByteToWideChar(codepage, 0, zText, -1, NULL, + 0)*sizeof(WCHAR); + if( nByte==0 ){ + return 0; + } + zMbcsText = sqlite3MallocZero( nByte*sizeof(WCHAR) ); + if( zMbcsText==0 ){ + return 0; + } + nByte = osMultiByteToWideChar(codepage, 0, zText, -1, zMbcsText, + nByte); + if( nByte==0 ){ + sqlite3_free(zMbcsText); + zMbcsText = 0; + } + return zMbcsText; +} + +/* +** Convert a Microsoft Unicode string to a multi-byte character string, +** using the ANSI or OEM code page. +** +** Space to hold the returned string is obtained from sqlite3_malloc(). +*/ +static char *winUnicodeToMbcs(LPCWSTR zWideText, int useAnsi){ + int nByte; + char *zText; + int codepage = useAnsi ? CP_ACP : CP_OEMCP; + + nByte = osWideCharToMultiByte(codepage, 0, zWideText, -1, 0, 0, 0, 0); + if( nByte == 0 ){ + return 0; + } + zText = sqlite3MallocZero( nByte ); + if( zText==0 ){ + return 0; + } + nByte = osWideCharToMultiByte(codepage, 0, zWideText, -1, zText, + nByte, 0, 0); + if( nByte == 0 ){ + sqlite3_free(zText); + zText = 0; + } + return zText; +} + +/* +** Convert a multi-byte character string to UTF-8. +** +** Space to hold the returned string is obtained from sqlite3_malloc(). +*/ +static char *winMbcsToUtf8(const char *zText, int useAnsi){ + char *zTextUtf8; + LPWSTR zTmpWide; + + zTmpWide = winMbcsToUnicode(zText, useAnsi); + if( zTmpWide==0 ){ + return 0; + } + zTextUtf8 = winUnicodeToUtf8(zTmpWide); + sqlite3_free(zTmpWide); + return zTextUtf8; +} + +/* +** Convert a UTF-8 string to a multi-byte character string. +** +** Space to hold the returned string is obtained from sqlite3_malloc(). +*/ +static char *winUtf8ToMbcs(const char *zText, int useAnsi){ + char *zTextMbcs; + LPWSTR zTmpWide; + + zTmpWide = winUtf8ToUnicode(zText); + if( zTmpWide==0 ){ + return 0; + } + zTextMbcs = winUnicodeToMbcs(zTmpWide, useAnsi); + sqlite3_free(zTmpWide); + return zTextMbcs; +} + +/* +** This is a public wrapper for the winUtf8ToUnicode() function. +*/ +SQLITE_API LPWSTR sqlite3_win32_utf8_to_unicode(const char *zText){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !zText ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return winUtf8ToUnicode(zText); +} + +/* +** This is a public wrapper for the winUnicodeToUtf8() function. +*/ +SQLITE_API char *sqlite3_win32_unicode_to_utf8(LPCWSTR zWideText){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !zWideText ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return winUnicodeToUtf8(zWideText); +} + +/* +** This is a public wrapper for the winMbcsToUtf8() function. +*/ +SQLITE_API char *sqlite3_win32_mbcs_to_utf8(const char *zText){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !zText ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return winMbcsToUtf8(zText, osAreFileApisANSI()); +} + +/* +** This is a public wrapper for the winMbcsToUtf8() function. +*/ +SQLITE_API char *sqlite3_win32_mbcs_to_utf8_v2(const char *zText, int useAnsi){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !zText ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return winMbcsToUtf8(zText, useAnsi); +} + +/* +** This is a public wrapper for the winUtf8ToMbcs() function. +*/ +SQLITE_API char *sqlite3_win32_utf8_to_mbcs(const char *zText){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !zText ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return winUtf8ToMbcs(zText, osAreFileApisANSI()); +} + +/* +** This is a public wrapper for the winUtf8ToMbcs() function. +*/ +SQLITE_API char *sqlite3_win32_utf8_to_mbcs_v2(const char *zText, int useAnsi){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !zText ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize() ) return 0; +#endif + return winUtf8ToMbcs(zText, useAnsi); +} + +/* +** This function is the same as sqlite3_win32_set_directory (below); however, +** it accepts a UTF-8 string. +*/ +SQLITE_API int sqlite3_win32_set_directory8( + unsigned long type, /* Identifier for directory being set or reset */ + const char *zValue /* New value for directory being set or reset */ +){ + char **ppDirectory = 0; +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + if( type==SQLITE_WIN32_DATA_DIRECTORY_TYPE ){ + ppDirectory = &sqlite3_data_directory; + }else if( type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE ){ + ppDirectory = &sqlite3_temp_directory; + } + assert( !ppDirectory || type==SQLITE_WIN32_DATA_DIRECTORY_TYPE + || type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE + ); + assert( !ppDirectory || sqlite3MemdebugHasType(*ppDirectory, MEMTYPE_HEAP) ); + if( ppDirectory ){ + char *zCopy = 0; + if( zValue && zValue[0] ){ + zCopy = sqlite3_mprintf("%s", zValue); + if ( zCopy==0 ){ + return SQLITE_NOMEM_BKPT; + } + } + sqlite3_free(*ppDirectory); + *ppDirectory = zCopy; + return SQLITE_OK; + } + return SQLITE_ERROR; +} + +/* +** This function is the same as sqlite3_win32_set_directory (below); however, +** it accepts a UTF-16 string. +*/ +SQLITE_API int sqlite3_win32_set_directory16( + unsigned long type, /* Identifier for directory being set or reset */ + const void *zValue /* New value for directory being set or reset */ +){ + int rc; + char *zUtf8 = 0; + if( zValue ){ + zUtf8 = sqlite3_win32_unicode_to_utf8(zValue); + if( zUtf8==0 ) return SQLITE_NOMEM_BKPT; + } + rc = sqlite3_win32_set_directory8(type, zUtf8); + if( zUtf8 ) sqlite3_free(zUtf8); + return rc; +} + +/* +** This function sets the data directory or the temporary directory based on +** the provided arguments. The type argument must be 1 in order to set the +** data directory or 2 in order to set the temporary directory. The zValue +** argument is the name of the directory to use. The return value will be +** SQLITE_OK if successful. +*/ +SQLITE_API int sqlite3_win32_set_directory( + unsigned long type, /* Identifier for directory being set or reset */ + void *zValue /* New value for directory being set or reset */ +){ + return sqlite3_win32_set_directory16(type, zValue); +} + +/* +** The return value of winGetLastErrorMsg +** is zero if the error message fits in the buffer, or non-zero +** otherwise (if the message was truncated). +*/ +static int winGetLastErrorMsg(DWORD lastErrno, int nBuf, char *zBuf){ + /* FormatMessage returns 0 on failure. Otherwise it + ** returns the number of TCHARs written to the output + ** buffer, excluding the terminating null char. + */ + DWORD dwLen = 0; + char *zOut = 0; + + if( osIsNT() ){ +#if SQLITE_OS_WINRT + WCHAR zTempWide[SQLITE_WIN32_MAX_ERRMSG_CHARS+1]; + dwLen = osFormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + lastErrno, + 0, + zTempWide, + SQLITE_WIN32_MAX_ERRMSG_CHARS, + 0); +#else + LPWSTR zTempWide = NULL; + dwLen = osFormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + lastErrno, + 0, + (LPWSTR) &zTempWide, + 0, + 0); +#endif + if( dwLen > 0 ){ + /* allocate a buffer and convert to UTF8 */ + sqlite3BeginBenignMalloc(); + zOut = winUnicodeToUtf8(zTempWide); + sqlite3EndBenignMalloc(); +#if !SQLITE_OS_WINRT + /* free the system buffer allocated by FormatMessage */ + osLocalFree(zTempWide); +#endif + } + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + char *zTemp = NULL; + dwLen = osFormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + lastErrno, + 0, + (LPSTR) &zTemp, + 0, + 0); + if( dwLen > 0 ){ + /* allocate a buffer and convert to UTF8 */ + sqlite3BeginBenignMalloc(); + zOut = winMbcsToUtf8(zTemp, osAreFileApisANSI()); + sqlite3EndBenignMalloc(); + /* free the system buffer allocated by FormatMessage */ + osLocalFree(zTemp); + } + } +#endif + if( 0 == dwLen ){ + sqlite3_snprintf(nBuf, zBuf, "OsError 0x%lx (%lu)", lastErrno, lastErrno); + }else{ + /* copy a maximum of nBuf chars to output buffer */ + sqlite3_snprintf(nBuf, zBuf, "%s", zOut); + /* free the UTF8 buffer */ + sqlite3_free(zOut); + } + return 0; +} + +/* +** +** This function - winLogErrorAtLine() - is only ever called via the macro +** winLogError(). +** +** This routine is invoked after an error occurs in an OS function. +** It logs a message using sqlite3_log() containing the current value of +** error code and, if possible, the human-readable equivalent from +** FormatMessage. +** +** The first argument passed to the macro should be the error code that +** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). +** The two subsequent arguments should be the name of the OS function that +** failed and the associated file-system path, if any. +*/ +#define winLogError(a,b,c,d) winLogErrorAtLine(a,b,c,d,__LINE__) +static int winLogErrorAtLine( + int errcode, /* SQLite error code */ + DWORD lastErrno, /* Win32 last error */ + const char *zFunc, /* Name of OS function that failed */ + const char *zPath, /* File path associated with error */ + int iLine /* Source line number where error occurred */ +){ + char zMsg[500]; /* Human readable error text */ + int i; /* Loop counter */ + + zMsg[0] = 0; + winGetLastErrorMsg(lastErrno, sizeof(zMsg), zMsg); + assert( errcode!=SQLITE_OK ); + if( zPath==0 ) zPath = ""; + for(i=0; zMsg[i] && zMsg[i]!='\r' && zMsg[i]!='\n'; i++){} + zMsg[i] = 0; + sqlite3_log(errcode, + "os_win.c:%d: (%lu) %s(%s) - %s", + iLine, lastErrno, zFunc, zPath, zMsg + ); + + return errcode; +} + +/* +** The number of times that a ReadFile(), WriteFile(), and DeleteFile() +** will be retried following a locking error - probably caused by +** antivirus software. Also the initial delay before the first retry. +** The delay increases linearly with each retry. +*/ +#ifndef SQLITE_WIN32_IOERR_RETRY +# define SQLITE_WIN32_IOERR_RETRY 10 +#endif +#ifndef SQLITE_WIN32_IOERR_RETRY_DELAY +# define SQLITE_WIN32_IOERR_RETRY_DELAY 25 +#endif +static int winIoerrRetry = SQLITE_WIN32_IOERR_RETRY; +static int winIoerrRetryDelay = SQLITE_WIN32_IOERR_RETRY_DELAY; + +/* +** The "winIoerrCanRetry1" macro is used to determine if a particular I/O +** error code obtained via GetLastError() is eligible to be retried. It +** must accept the error code DWORD as its only argument and should return +** non-zero if the error code is transient in nature and the operation +** responsible for generating the original error might succeed upon being +** retried. The argument to this macro should be a variable. +** +** Additionally, a macro named "winIoerrCanRetry2" may be defined. If it +** is defined, it will be consulted only when the macro "winIoerrCanRetry1" +** returns zero. The "winIoerrCanRetry2" macro is completely optional and +** may be used to include additional error codes in the set that should +** result in the failing I/O operation being retried by the caller. If +** defined, the "winIoerrCanRetry2" macro must exhibit external semantics +** identical to those of the "winIoerrCanRetry1" macro. +*/ +#if !defined(winIoerrCanRetry1) +#define winIoerrCanRetry1(a) (((a)==ERROR_ACCESS_DENIED) || \ + ((a)==ERROR_SHARING_VIOLATION) || \ + ((a)==ERROR_LOCK_VIOLATION) || \ + ((a)==ERROR_DEV_NOT_EXIST) || \ + ((a)==ERROR_NETNAME_DELETED) || \ + ((a)==ERROR_SEM_TIMEOUT) || \ + ((a)==ERROR_NETWORK_UNREACHABLE)) +#endif + +/* +** If a ReadFile() or WriteFile() error occurs, invoke this routine +** to see if it should be retried. Return TRUE to retry. Return FALSE +** to give up with an error. +*/ +static int winRetryIoerr(int *pnRetry, DWORD *pError){ + DWORD e = osGetLastError(); + if( *pnRetry>=winIoerrRetry ){ + if( pError ){ + *pError = e; + } + return 0; + } + if( winIoerrCanRetry1(e) ){ + sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry)); + ++*pnRetry; + return 1; + } +#if defined(winIoerrCanRetry2) + else if( winIoerrCanRetry2(e) ){ + sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry)); + ++*pnRetry; + return 1; + } +#endif + if( pError ){ + *pError = e; + } + return 0; +} + +/* +** Log a I/O error retry episode. +*/ +static void winLogIoerr(int nRetry, int lineno){ + if( nRetry ){ + sqlite3_log(SQLITE_NOTICE, + "delayed %dms for lock/sharing conflict at line %d", + winIoerrRetryDelay*nRetry*(nRetry+1)/2, lineno + ); + } +} + +/* +** This #if does not rely on the SQLITE_OS_WINCE define because the +** corresponding section in "date.c" cannot use it. +*/ +#if !defined(SQLITE_OMIT_LOCALTIME) && defined(_WIN32_WCE) && \ + (!defined(SQLITE_MSVC_LOCALTIME_API) || !SQLITE_MSVC_LOCALTIME_API) +/* +** The MSVC CRT on Windows CE may not have a localtime() function. +** So define a substitute. +*/ +/* # include */ +struct tm *__cdecl localtime(const time_t *t) +{ + static struct tm y; + FILETIME uTm, lTm; + SYSTEMTIME pTm; + sqlite3_int64 t64; + t64 = *t; + t64 = (t64 + 11644473600)*10000000; + uTm.dwLowDateTime = (DWORD)(t64 & 0xFFFFFFFF); + uTm.dwHighDateTime= (DWORD)(t64 >> 32); + osFileTimeToLocalFileTime(&uTm,&lTm); + osFileTimeToSystemTime(&lTm,&pTm); + y.tm_year = pTm.wYear - 1900; + y.tm_mon = pTm.wMonth - 1; + y.tm_wday = pTm.wDayOfWeek; + y.tm_mday = pTm.wDay; + y.tm_hour = pTm.wHour; + y.tm_min = pTm.wMinute; + y.tm_sec = pTm.wSecond; + return &y; +} +#endif + +#if SQLITE_OS_WINCE +/************************************************************************* +** This section contains code for WinCE only. +*/ +#define HANDLE_TO_WINFILE(a) (winFile*)&((char*)a)[-(int)offsetof(winFile,h)] + +/* +** Acquire a lock on the handle h +*/ +static void winceMutexAcquire(HANDLE h){ + DWORD dwErr; + do { + dwErr = osWaitForSingleObject(h, INFINITE); + } while (dwErr != WAIT_OBJECT_0 && dwErr != WAIT_ABANDONED); +} +/* +** Release a lock acquired by winceMutexAcquire() +*/ +#define winceMutexRelease(h) ReleaseMutex(h) + +/* +** Create the mutex and shared memory used for locking in the file +** descriptor pFile +*/ +static int winceCreateLock(const char *zFilename, winFile *pFile){ + LPWSTR zTok; + LPWSTR zName; + DWORD lastErrno; + BOOL bLogged = FALSE; + BOOL bInit = TRUE; + + zName = winUtf8ToUnicode(zFilename); + if( zName==0 ){ + /* out of memory */ + return SQLITE_IOERR_NOMEM_BKPT; + } + + /* Initialize the local lockdata */ + memset(&pFile->local, 0, sizeof(pFile->local)); + + /* Replace the backslashes from the filename and lowercase it + ** to derive a mutex name. */ + zTok = osCharLowerW(zName); + for (;*zTok;zTok++){ + if (*zTok == '\\') *zTok = '_'; + } + + /* Create/open the named mutex */ + pFile->hMutex = osCreateMutexW(NULL, FALSE, zName); + if (!pFile->hMutex){ + pFile->lastErrno = osGetLastError(); + sqlite3_free(zName); + return winLogError(SQLITE_IOERR, pFile->lastErrno, + "winceCreateLock1", zFilename); + } + + /* Acquire the mutex before continuing */ + winceMutexAcquire(pFile->hMutex); + + /* Since the names of named mutexes, semaphores, file mappings etc are + ** case-sensitive, take advantage of that by uppercasing the mutex name + ** and using that as the shared filemapping name. + */ + osCharUpperW(zName); + pFile->hShared = osCreateFileMappingW(INVALID_HANDLE_VALUE, NULL, + PAGE_READWRITE, 0, sizeof(winceLock), + zName); + + /* Set a flag that indicates we're the first to create the memory so it + ** must be zero-initialized */ + lastErrno = osGetLastError(); + if (lastErrno == ERROR_ALREADY_EXISTS){ + bInit = FALSE; + } + + sqlite3_free(zName); + + /* If we succeeded in making the shared memory handle, map it. */ + if( pFile->hShared ){ + pFile->shared = (winceLock*)osMapViewOfFile(pFile->hShared, + FILE_MAP_READ|FILE_MAP_WRITE, 0, 0, sizeof(winceLock)); + /* If mapping failed, close the shared memory handle and erase it */ + if( !pFile->shared ){ + pFile->lastErrno = osGetLastError(); + winLogError(SQLITE_IOERR, pFile->lastErrno, + "winceCreateLock2", zFilename); + bLogged = TRUE; + osCloseHandle(pFile->hShared); + pFile->hShared = NULL; + } + } + + /* If shared memory could not be created, then close the mutex and fail */ + if( pFile->hShared==NULL ){ + if( !bLogged ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_IOERR, pFile->lastErrno, + "winceCreateLock3", zFilename); + bLogged = TRUE; + } + winceMutexRelease(pFile->hMutex); + osCloseHandle(pFile->hMutex); + pFile->hMutex = NULL; + return SQLITE_IOERR; + } + + /* Initialize the shared memory if we're supposed to */ + if( bInit ){ + memset(pFile->shared, 0, sizeof(winceLock)); + } + + winceMutexRelease(pFile->hMutex); + return SQLITE_OK; +} + +/* +** Destroy the part of winFile that deals with wince locks +*/ +static void winceDestroyLock(winFile *pFile){ + if (pFile->hMutex){ + /* Acquire the mutex */ + winceMutexAcquire(pFile->hMutex); + + /* The following blocks should probably assert in debug mode, but they + are to cleanup in case any locks remained open */ + if (pFile->local.nReaders){ + pFile->shared->nReaders --; + } + if (pFile->local.bReserved){ + pFile->shared->bReserved = FALSE; + } + if (pFile->local.bPending){ + pFile->shared->bPending = FALSE; + } + if (pFile->local.bExclusive){ + pFile->shared->bExclusive = FALSE; + } + + /* De-reference and close our copy of the shared memory handle */ + osUnmapViewOfFile(pFile->shared); + osCloseHandle(pFile->hShared); + + /* Done with the mutex */ + winceMutexRelease(pFile->hMutex); + osCloseHandle(pFile->hMutex); + pFile->hMutex = NULL; + } +} + +/* +** An implementation of the LockFile() API of Windows for CE +*/ +static BOOL winceLockFile( + LPHANDLE phFile, + DWORD dwFileOffsetLow, + DWORD dwFileOffsetHigh, + DWORD nNumberOfBytesToLockLow, + DWORD nNumberOfBytesToLockHigh +){ + winFile *pFile = HANDLE_TO_WINFILE(phFile); + BOOL bReturn = FALSE; + + UNUSED_PARAMETER(dwFileOffsetHigh); + UNUSED_PARAMETER(nNumberOfBytesToLockHigh); + + if (!pFile->hMutex) return TRUE; + winceMutexAcquire(pFile->hMutex); + + /* Wanting an exclusive lock? */ + if (dwFileOffsetLow == (DWORD)SHARED_FIRST + && nNumberOfBytesToLockLow == (DWORD)SHARED_SIZE){ + if (pFile->shared->nReaders == 0 && pFile->shared->bExclusive == 0){ + pFile->shared->bExclusive = TRUE; + pFile->local.bExclusive = TRUE; + bReturn = TRUE; + } + } + + /* Want a read-only lock? */ + else if (dwFileOffsetLow == (DWORD)SHARED_FIRST && + nNumberOfBytesToLockLow == 1){ + if (pFile->shared->bExclusive == 0){ + pFile->local.nReaders ++; + if (pFile->local.nReaders == 1){ + pFile->shared->nReaders ++; + } + bReturn = TRUE; + } + } + + /* Want a pending lock? */ + else if (dwFileOffsetLow == (DWORD)PENDING_BYTE + && nNumberOfBytesToLockLow == 1){ + /* If no pending lock has been acquired, then acquire it */ + if (pFile->shared->bPending == 0) { + pFile->shared->bPending = TRUE; + pFile->local.bPending = TRUE; + bReturn = TRUE; + } + } + + /* Want a reserved lock? */ + else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE + && nNumberOfBytesToLockLow == 1){ + if (pFile->shared->bReserved == 0) { + pFile->shared->bReserved = TRUE; + pFile->local.bReserved = TRUE; + bReturn = TRUE; + } + } + + winceMutexRelease(pFile->hMutex); + return bReturn; +} + +/* +** An implementation of the UnlockFile API of Windows for CE +*/ +static BOOL winceUnlockFile( + LPHANDLE phFile, + DWORD dwFileOffsetLow, + DWORD dwFileOffsetHigh, + DWORD nNumberOfBytesToUnlockLow, + DWORD nNumberOfBytesToUnlockHigh +){ + winFile *pFile = HANDLE_TO_WINFILE(phFile); + BOOL bReturn = FALSE; + + UNUSED_PARAMETER(dwFileOffsetHigh); + UNUSED_PARAMETER(nNumberOfBytesToUnlockHigh); + + if (!pFile->hMutex) return TRUE; + winceMutexAcquire(pFile->hMutex); + + /* Releasing a reader lock or an exclusive lock */ + if (dwFileOffsetLow == (DWORD)SHARED_FIRST){ + /* Did we have an exclusive lock? */ + if (pFile->local.bExclusive){ + assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE); + pFile->local.bExclusive = FALSE; + pFile->shared->bExclusive = FALSE; + bReturn = TRUE; + } + + /* Did we just have a reader lock? */ + else if (pFile->local.nReaders){ + assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE + || nNumberOfBytesToUnlockLow == 1); + pFile->local.nReaders --; + if (pFile->local.nReaders == 0) + { + pFile->shared->nReaders --; + } + bReturn = TRUE; + } + } + + /* Releasing a pending lock */ + else if (dwFileOffsetLow == (DWORD)PENDING_BYTE + && nNumberOfBytesToUnlockLow == 1){ + if (pFile->local.bPending){ + pFile->local.bPending = FALSE; + pFile->shared->bPending = FALSE; + bReturn = TRUE; + } + } + /* Releasing a reserved lock */ + else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE + && nNumberOfBytesToUnlockLow == 1){ + if (pFile->local.bReserved) { + pFile->local.bReserved = FALSE; + pFile->shared->bReserved = FALSE; + bReturn = TRUE; + } + } + + winceMutexRelease(pFile->hMutex); + return bReturn; +} +/* +** End of the special code for wince +*****************************************************************************/ +#endif /* SQLITE_OS_WINCE */ + +/* +** Lock a file region. +*/ +static BOOL winLockFile( + LPHANDLE phFile, + DWORD flags, + DWORD offsetLow, + DWORD offsetHigh, + DWORD numBytesLow, + DWORD numBytesHigh +){ +#if SQLITE_OS_WINCE + /* + ** NOTE: Windows CE is handled differently here due its lack of the Win32 + ** API LockFile. + */ + return winceLockFile(phFile, offsetLow, offsetHigh, + numBytesLow, numBytesHigh); +#else + if( osIsNT() ){ + OVERLAPPED ovlp; + memset(&ovlp, 0, sizeof(OVERLAPPED)); + ovlp.Offset = offsetLow; + ovlp.OffsetHigh = offsetHigh; + return osLockFileEx(*phFile, flags, 0, numBytesLow, numBytesHigh, &ovlp); + }else{ + return osLockFile(*phFile, offsetLow, offsetHigh, numBytesLow, + numBytesHigh); + } +#endif +} + +/* +** Unlock a file region. + */ +static BOOL winUnlockFile( + LPHANDLE phFile, + DWORD offsetLow, + DWORD offsetHigh, + DWORD numBytesLow, + DWORD numBytesHigh +){ +#if SQLITE_OS_WINCE + /* + ** NOTE: Windows CE is handled differently here due its lack of the Win32 + ** API UnlockFile. + */ + return winceUnlockFile(phFile, offsetLow, offsetHigh, + numBytesLow, numBytesHigh); +#else + if( osIsNT() ){ + OVERLAPPED ovlp; + memset(&ovlp, 0, sizeof(OVERLAPPED)); + ovlp.Offset = offsetLow; + ovlp.OffsetHigh = offsetHigh; + return osUnlockFileEx(*phFile, 0, numBytesLow, numBytesHigh, &ovlp); + }else{ + return osUnlockFile(*phFile, offsetLow, offsetHigh, numBytesLow, + numBytesHigh); + } +#endif +} + +/***************************************************************************** +** The next group of routines implement the I/O methods specified +** by the sqlite3_io_methods object. +******************************************************************************/ + +/* +** Some Microsoft compilers lack this definition. +*/ +#ifndef INVALID_SET_FILE_POINTER +# define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif + +/* +** Move the current position of the file handle passed as the first +** argument to offset iOffset within the file. If successful, return 0. +** Otherwise, set pFile->lastErrno and return non-zero. +*/ +static int winSeekFile(winFile *pFile, sqlite3_int64 iOffset){ +#if !SQLITE_OS_WINRT + LONG upperBits; /* Most sig. 32 bits of new offset */ + LONG lowerBits; /* Least sig. 32 bits of new offset */ + DWORD dwRet; /* Value returned by SetFilePointer() */ + DWORD lastErrno; /* Value returned by GetLastError() */ + + OSTRACE(("SEEK file=%p, offset=%lld\n", pFile->h, iOffset)); + + upperBits = (LONG)((iOffset>>32) & 0x7fffffff); + lowerBits = (LONG)(iOffset & 0xffffffff); + + /* API oddity: If successful, SetFilePointer() returns a dword + ** containing the lower 32-bits of the new file-offset. Or, if it fails, + ** it returns INVALID_SET_FILE_POINTER. However according to MSDN, + ** INVALID_SET_FILE_POINTER may also be a valid new offset. So to determine + ** whether an error has actually occurred, it is also necessary to call + ** GetLastError(). + */ + dwRet = osSetFilePointer(pFile->h, lowerBits, &upperBits, FILE_BEGIN); + + if( (dwRet==INVALID_SET_FILE_POINTER + && ((lastErrno = osGetLastError())!=NO_ERROR)) ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, + "winSeekFile", pFile->zPath); + OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); + return 1; + } + + OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); + return 0; +#else + /* + ** Same as above, except that this implementation works for WinRT. + */ + + LARGE_INTEGER x; /* The new offset */ + BOOL bRet; /* Value returned by SetFilePointerEx() */ + + x.QuadPart = iOffset; + bRet = osSetFilePointerEx(pFile->h, x, 0, FILE_BEGIN); + + if(!bRet){ + pFile->lastErrno = osGetLastError(); + winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, + "winSeekFile", pFile->zPath); + OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); + return 1; + } + + OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); + return 0; +#endif +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* Forward references to VFS helper methods used for memory mapped files */ +static int winMapfile(winFile*, sqlite3_int64); +static int winUnmapfile(winFile*); +#endif + +/* +** Close a file. +** +** It is reported that an attempt to close a handle might sometimes +** fail. This is a very unreasonable result, but Windows is notorious +** for being unreasonable so I do not doubt that it might happen. If +** the close fails, we pause for 100 milliseconds and try again. As +** many as MX_CLOSE_ATTEMPT attempts to close the handle are made before +** giving up and returning an error. +*/ +#define MX_CLOSE_ATTEMPT 3 +static int winClose(sqlite3_file *id){ + int rc, cnt = 0; + winFile *pFile = (winFile*)id; + + assert( id!=0 ); +#ifndef SQLITE_OMIT_WAL + assert( pFile->pShm==0 ); +#endif + assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE ); + OSTRACE(("CLOSE pid=%lu, pFile=%p, file=%p\n", + osGetCurrentProcessId(), pFile, pFile->h)); + +#if SQLITE_MAX_MMAP_SIZE>0 + winUnmapfile(pFile); +#endif + + do{ + rc = osCloseHandle(pFile->h); + /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */ + }while( rc==0 && ++cnt < MX_CLOSE_ATTEMPT && (sqlite3_win32_sleep(100), 1) ); +#if SQLITE_OS_WINCE +#define WINCE_DELETION_ATTEMPTS 3 + { + winVfsAppData *pAppData = (winVfsAppData*)pFile->pVfs->pAppData; + if( pAppData==NULL || !pAppData->bNoLock ){ + winceDestroyLock(pFile); + } + } + if( pFile->zDeleteOnClose ){ + int cnt = 0; + while( + osDeleteFileW(pFile->zDeleteOnClose)==0 + && osGetFileAttributesW(pFile->zDeleteOnClose)!=0xffffffff + && cnt++ < WINCE_DELETION_ATTEMPTS + ){ + sqlite3_win32_sleep(100); /* Wait a little before trying again */ + } + sqlite3_free(pFile->zDeleteOnClose); + } +#endif + if( rc ){ + pFile->h = NULL; + } + OpenCounter(-1); + OSTRACE(("CLOSE pid=%lu, pFile=%p, file=%p, rc=%s\n", + osGetCurrentProcessId(), pFile, pFile->h, rc ? "ok" : "failed")); + return rc ? SQLITE_OK + : winLogError(SQLITE_IOERR_CLOSE, osGetLastError(), + "winClose", pFile->zPath); +} + +/* +** Read data from a file into a buffer. Return SQLITE_OK if all +** bytes were read successfully and SQLITE_IOERR if anything goes +** wrong. +*/ +static int winRead( + sqlite3_file *id, /* File to read from */ + void *pBuf, /* Write content into this buffer */ + int amt, /* Number of bytes to read */ + sqlite3_int64 offset /* Begin reading at this offset */ +){ +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + OVERLAPPED overlapped; /* The offset for ReadFile. */ +#endif + winFile *pFile = (winFile*)id; /* file handle */ + DWORD nRead; /* Number of bytes actually read from file */ + int nRetry = 0; /* Number of retrys */ + + assert( id!=0 ); + assert( amt>0 ); + assert( offset>=0 ); + SimulateIOError(return SQLITE_IOERR_READ); + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, buffer=%p, amount=%d, " + "offset=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, + pFile->h, pBuf, amt, offset, pFile->locktype)); + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + OSTRACE(("READ-MMAP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + +#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) + if( winSeekFile(pFile, offset) ){ + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_FULL\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_FULL; + } + while( !osReadFile(pFile->h, pBuf, amt, &nRead, 0) ){ +#else + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.Offset = (LONG)(offset & 0xffffffff); + overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); + while( !osReadFile(pFile->h, pBuf, amt, &nRead, &overlapped) && + osGetLastError()!=ERROR_HANDLE_EOF ){ +#endif + DWORD lastErrno; + if( winRetryIoerr(&nRetry, &lastErrno) ) continue; + pFile->lastErrno = lastErrno; + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_READ\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_IOERR_READ, pFile->lastErrno, + "winRead", pFile->zPath); + } + winLogIoerr(nRetry, __LINE__); + if( nRead<(DWORD)amt ){ + /* Unread parts of the buffer must be zero-filled */ + memset(&((char*)pBuf)[nRead], 0, amt-nRead); + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_SHORT_READ\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_IOERR_SHORT_READ; + } + + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; +} + +/* +** Write data from a buffer into a file. Return SQLITE_OK on success +** or some other error code on failure. +*/ +static int winWrite( + sqlite3_file *id, /* File to write into */ + const void *pBuf, /* The bytes to be written */ + int amt, /* Number of bytes to write */ + sqlite3_int64 offset /* Offset into the file to begin writing at */ +){ + int rc = 0; /* True if error has occurred, else false */ + winFile *pFile = (winFile*)id; /* File handle */ + int nRetry = 0; /* Number of retries */ + + assert( amt>0 ); + assert( pFile ); + SimulateIOError(return SQLITE_IOERR_WRITE); + SimulateDiskfullError(return SQLITE_FULL); + + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, buffer=%p, amount=%d, " + "offset=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, + pFile->h, pBuf, amt, offset, pFile->locktype)); + +#if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + OSTRACE(("WRITE-MMAP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + +#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) + rc = winSeekFile(pFile, offset); + if( rc==0 ){ +#else + { +#endif +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + OVERLAPPED overlapped; /* The offset for WriteFile. */ +#endif + u8 *aRem = (u8 *)pBuf; /* Data yet to be written */ + int nRem = amt; /* Number of bytes yet to be written */ + DWORD nWrite; /* Bytes written by each WriteFile() call */ + DWORD lastErrno = NO_ERROR; /* Value returned by GetLastError() */ + +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.Offset = (LONG)(offset & 0xffffffff); + overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); +#endif + + while( nRem>0 ){ +#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) + if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, 0) ){ +#else + if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, &overlapped) ){ +#endif + if( winRetryIoerr(&nRetry, &lastErrno) ) continue; + break; + } + assert( nWrite==0 || nWrite<=(DWORD)nRem ); + if( nWrite==0 || nWrite>(DWORD)nRem ){ + lastErrno = osGetLastError(); + break; + } +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + offset += nWrite; + overlapped.Offset = (LONG)(offset & 0xffffffff); + overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); +#endif + aRem += nWrite; + nRem -= nWrite; + } + if( nRem>0 ){ + pFile->lastErrno = lastErrno; + rc = 1; + } + } + + if( rc ){ + if( ( pFile->lastErrno==ERROR_HANDLE_DISK_FULL ) + || ( pFile->lastErrno==ERROR_DISK_FULL )){ + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_FULL\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_FULL, pFile->lastErrno, + "winWrite1", pFile->zPath); + } + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_WRITE\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_IOERR_WRITE, pFile->lastErrno, + "winWrite2", pFile->zPath); + }else{ + winLogIoerr(nRetry, __LINE__); + } + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; +} + +/* +** Truncate an open file to a specified size +*/ +static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){ + winFile *pFile = (winFile*)id; /* File handle object */ + int rc = SQLITE_OK; /* Return code for this function */ + DWORD lastErrno; +#if SQLITE_MAX_MMAP_SIZE>0 + sqlite3_int64 oldMmapSize; + if( pFile->nFetchOut>0 ){ + /* File truncation is a no-op if there are outstanding memory mapped + ** pages. This is because truncating the file means temporarily unmapping + ** the file, and that might delete memory out from under existing cursors. + ** + ** This can result in incremental vacuum not truncating the file, + ** if there is an active read cursor when the incremental vacuum occurs. + ** No real harm comes of this - the database file is not corrupted, + ** though some folks might complain that the file is bigger than it + ** needs to be. + ** + ** The only feasible work-around is to defer the truncation until after + ** all references to memory-mapped content are closed. That is doable, + ** but involves adding a few branches in the common write code path which + ** could slow down normal operations slightly. Hence, we have decided for + ** now to simply make trancations a no-op if there are pending reads. We + ** can maybe revisit this decision in the future. + */ + return SQLITE_OK; + } +#endif + + assert( pFile ); + SimulateIOError(return SQLITE_IOERR_TRUNCATE); + OSTRACE(("TRUNCATE pid=%lu, pFile=%p, file=%p, size=%lld, lock=%d\n", + osGetCurrentProcessId(), pFile, pFile->h, nByte, pFile->locktype)); + + /* If the user has configured a chunk-size for this file, truncate the + ** file so that it consists of an integer number of chunks (i.e. the + ** actual file size after the operation may be larger than the requested + ** size). + */ + if( pFile->szChunk>0 ){ + nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; + } + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFile->pMapRegion ){ + oldMmapSize = pFile->mmapSize; + }else{ + oldMmapSize = 0; + } + winUnmapfile(pFile); +#endif + + /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */ + if( winSeekFile(pFile, nByte) ){ + rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, + "winTruncate1", pFile->zPath); + }else if( 0==osSetEndOfFile(pFile->h) && + ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){ + pFile->lastErrno = lastErrno; + rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, + "winTruncate2", pFile->zPath); + } + +#if SQLITE_MAX_MMAP_SIZE>0 + if( rc==SQLITE_OK && oldMmapSize>0 ){ + if( oldMmapSize>nByte ){ + winMapfile(pFile, -1); + }else{ + winMapfile(pFile, oldMmapSize); + } + } +#endif + + OSTRACE(("TRUNCATE pid=%lu, pFile=%p, file=%p, rc=%s\n", + osGetCurrentProcessId(), pFile, pFile->h, sqlite3ErrName(rc))); + return rc; +} + +#ifdef SQLITE_TEST +/* +** Count the number of fullsyncs and normal syncs. This is used to test +** that syncs and fullsyncs are occuring at the right times. +*/ +SQLITE_API int sqlite3_sync_count = 0; +SQLITE_API int sqlite3_fullsync_count = 0; +#endif + +/* +** Make sure all writes to a particular file are committed to disk. +*/ +static int winSync(sqlite3_file *id, int flags){ +#ifndef SQLITE_NO_SYNC + /* + ** Used only when SQLITE_NO_SYNC is not defined. + */ + BOOL rc; +#endif +#if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || \ + defined(SQLITE_HAVE_OS_TRACE) + /* + ** Used when SQLITE_NO_SYNC is not defined and by the assert() and/or + ** OSTRACE() macros. + */ + winFile *pFile = (winFile*)id; +#else + UNUSED_PARAMETER(id); +#endif + + assert( pFile ); + /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ + assert((flags&0x0F)==SQLITE_SYNC_NORMAL + || (flags&0x0F)==SQLITE_SYNC_FULL + ); + + /* Unix cannot, but some systems may return SQLITE_FULL from here. This + ** line is to test that doing so does not cause any problems. + */ + SimulateDiskfullError( return SQLITE_FULL ); + + OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, flags=%x, lock=%d\n", + osGetCurrentProcessId(), pFile, pFile->h, flags, + pFile->locktype)); + +#ifndef SQLITE_TEST + UNUSED_PARAMETER(flags); +#else + if( (flags&0x0F)==SQLITE_SYNC_FULL ){ + sqlite3_fullsync_count++; + } + sqlite3_sync_count++; +#endif + + /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a + ** no-op + */ +#ifdef SQLITE_NO_SYNC + OSTRACE(("SYNC-NOP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; +#else +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFile->pMapRegion ){ + if( osFlushViewOfFile(pFile->pMapRegion, 0) ){ + OSTRACE(("SYNC-MMAP pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_OK\n", osGetCurrentProcessId(), + pFile, pFile->pMapRegion)); + }else{ + pFile->lastErrno = osGetLastError(); + OSTRACE(("SYNC-MMAP pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), + pFile, pFile->pMapRegion)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winSync1", pFile->zPath); + } + } +#endif + rc = osFlushFileBuffers(pFile->h); + SimulateIOError( rc=FALSE ); + if( rc ){ + OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; + }else{ + pFile->lastErrno = osGetLastError(); + OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_FSYNC\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_IOERR_FSYNC, pFile->lastErrno, + "winSync2", pFile->zPath); + } +#endif +} + +/* +** Determine the current size of a file in bytes +*/ +static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){ + winFile *pFile = (winFile*)id; + int rc = SQLITE_OK; + + assert( id!=0 ); + assert( pSize!=0 ); + SimulateIOError(return SQLITE_IOERR_FSTAT); + OSTRACE(("SIZE file=%p, pSize=%p\n", pFile->h, pSize)); + +#if SQLITE_OS_WINRT + { + FILE_STANDARD_INFO info; + if( osGetFileInformationByHandleEx(pFile->h, FileStandardInfo, + &info, sizeof(info)) ){ + *pSize = info.EndOfFile.QuadPart; + }else{ + pFile->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno, + "winFileSize", pFile->zPath); + } + } +#else + { + DWORD upperBits; + DWORD lowerBits; + DWORD lastErrno; + + lowerBits = osGetFileSize(pFile->h, &upperBits); + *pSize = (((sqlite3_int64)upperBits)<<32) + lowerBits; + if( (lowerBits == INVALID_FILE_SIZE) + && ((lastErrno = osGetLastError())!=NO_ERROR) ){ + pFile->lastErrno = lastErrno; + rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno, + "winFileSize", pFile->zPath); + } + } +#endif + OSTRACE(("SIZE file=%p, pSize=%p, *pSize=%lld, rc=%s\n", + pFile->h, pSize, *pSize, sqlite3ErrName(rc))); + return rc; +} + +/* +** LOCKFILE_FAIL_IMMEDIATELY is undefined on some Windows systems. +*/ +#ifndef LOCKFILE_FAIL_IMMEDIATELY +# define LOCKFILE_FAIL_IMMEDIATELY 1 +#endif + +#ifndef LOCKFILE_EXCLUSIVE_LOCK +# define LOCKFILE_EXCLUSIVE_LOCK 2 +#endif + +/* +** Historically, SQLite has used both the LockFile and LockFileEx functions. +** When the LockFile function was used, it was always expected to fail +** immediately if the lock could not be obtained. Also, it always expected to +** obtain an exclusive lock. These flags are used with the LockFileEx function +** and reflect those expectations; therefore, they should not be changed. +*/ +#ifndef SQLITE_LOCKFILE_FLAGS +# define SQLITE_LOCKFILE_FLAGS (LOCKFILE_FAIL_IMMEDIATELY | \ + LOCKFILE_EXCLUSIVE_LOCK) +#endif + +/* +** Currently, SQLite never calls the LockFileEx function without wanting the +** call to fail immediately if the lock cannot be obtained. +*/ +#ifndef SQLITE_LOCKFILEEX_FLAGS +# define SQLITE_LOCKFILEEX_FLAGS (LOCKFILE_FAIL_IMMEDIATELY) +#endif + +/* +** Acquire a reader lock. +** Different API routines are called depending on whether or not this +** is Win9x or WinNT. +*/ +static int winGetReadLock(winFile *pFile){ + int res; + OSTRACE(("READ-LOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); + if( osIsNT() ){ +#if SQLITE_OS_WINCE + /* + ** NOTE: Windows CE is handled differently here due its lack of the Win32 + ** API LockFileEx. + */ + res = winceLockFile(&pFile->h, SHARED_FIRST, 0, 1, 0); +#else + res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS, SHARED_FIRST, 0, + SHARED_SIZE, 0); +#endif + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + int lk; + sqlite3_randomness(sizeof(lk), &lk); + pFile->sharedLockByte = (short)((lk & 0x7fffffff)%(SHARED_SIZE - 1)); + res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, + SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0); + } +#endif + if( res == 0 ){ + pFile->lastErrno = osGetLastError(); + /* No need to log a failure to lock */ + } + OSTRACE(("READ-LOCK file=%p, result=%d\n", pFile->h, res)); + return res; +} + +/* +** Undo a readlock +*/ +static int winUnlockReadLock(winFile *pFile){ + int res; + DWORD lastErrno; + OSTRACE(("READ-UNLOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); + if( osIsNT() ){ + res = winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + res = winUnlockFile(&pFile->h, SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0); + } +#endif + if( res==0 && ((lastErrno = osGetLastError())!=ERROR_NOT_LOCKED) ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_IOERR_UNLOCK, pFile->lastErrno, + "winUnlockReadLock", pFile->zPath); + } + OSTRACE(("READ-UNLOCK file=%p, result=%d\n", pFile->h, res)); + return res; +} + +/* +** Lock the file with the lock specified by parameter locktype - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. The winUnlock() routine +** erases all locks at once and returns us immediately to locking level 0. +** It is not possible to lower the locking level one step at a time. You +** must go straight to locking level 0. +*/ +static int winLock(sqlite3_file *id, int locktype){ + int rc = SQLITE_OK; /* Return code from subroutines */ + int res = 1; /* Result of a Windows lock call */ + int newLocktype; /* Set pFile->locktype to this value before exiting */ + int gotPendingLock = 0;/* True if we acquired a PENDING lock this time */ + winFile *pFile = (winFile*)id; + DWORD lastErrno = NO_ERROR; + + assert( id!=0 ); + OSTRACE(("LOCK file=%p, oldLock=%d(%d), newLock=%d\n", + pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); + + /* If there is already a lock of this type or more restrictive on the + ** OsFile, do nothing. Don't use the end_lock: exit path, as + ** sqlite3OsEnterMutex() hasn't been called yet. + */ + if( pFile->locktype>=locktype ){ + OSTRACE(("LOCK-HELD file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + + /* Do not allow any kind of write-lock on a read-only database + */ + if( (pFile->ctrlFlags & WINFILE_RDONLY)!=0 && locktype>=RESERVED_LOCK ){ + return SQLITE_IOERR_LOCK; + } + + /* Make sure the locking sequence is correct + */ + assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); + assert( locktype!=PENDING_LOCK ); + assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK ); + + /* Lock the PENDING_LOCK byte if we need to acquire a PENDING lock or + ** a SHARED lock. If we are acquiring a SHARED lock, the acquisition of + ** the PENDING_LOCK byte is temporary. + */ + newLocktype = pFile->locktype; + if( pFile->locktype==NO_LOCK + || (locktype==EXCLUSIVE_LOCK && pFile->locktype<=RESERVED_LOCK) + ){ + int cnt = 3; + while( cnt-->0 && (res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, + PENDING_BYTE, 0, 1, 0))==0 ){ + /* Try 3 times to get the pending lock. This is needed to work + ** around problems caused by indexing and/or anti-virus software on + ** Windows systems. + ** If you are using this code as a model for alternative VFSes, do not + ** copy this retry logic. It is a hack intended for Windows only. + */ + lastErrno = osGetLastError(); + OSTRACE(("LOCK-PENDING-FAIL file=%p, count=%d, result=%d\n", + pFile->h, cnt, res)); + if( lastErrno==ERROR_INVALID_HANDLE ){ + pFile->lastErrno = lastErrno; + rc = SQLITE_IOERR_LOCK; + OSTRACE(("LOCK-FAIL file=%p, count=%d, rc=%s\n", + pFile->h, cnt, sqlite3ErrName(rc))); + return rc; + } + if( cnt ) sqlite3_win32_sleep(1); + } + gotPendingLock = res; + if( !res ){ + lastErrno = osGetLastError(); + } + } + + /* Acquire a shared lock + */ + if( locktype==SHARED_LOCK && res ){ + assert( pFile->locktype==NO_LOCK ); + res = winGetReadLock(pFile); + if( res ){ + newLocktype = SHARED_LOCK; + }else{ + lastErrno = osGetLastError(); + } + } + + /* Acquire a RESERVED lock + */ + if( locktype==RESERVED_LOCK && res ){ + assert( pFile->locktype==SHARED_LOCK ); + res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, RESERVED_BYTE, 0, 1, 0); + if( res ){ + newLocktype = RESERVED_LOCK; + }else{ + lastErrno = osGetLastError(); + } + } + + /* Acquire a PENDING lock + */ + if( locktype==EXCLUSIVE_LOCK && res ){ + newLocktype = PENDING_LOCK; + gotPendingLock = 0; + } + + /* Acquire an EXCLUSIVE lock + */ + if( locktype==EXCLUSIVE_LOCK && res ){ + assert( pFile->locktype>=SHARED_LOCK ); + res = winUnlockReadLock(pFile); + res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0, + SHARED_SIZE, 0); + if( res ){ + newLocktype = EXCLUSIVE_LOCK; + }else{ + lastErrno = osGetLastError(); + winGetReadLock(pFile); + } + } + + /* If we are holding a PENDING lock that ought to be released, then + ** release it now. + */ + if( gotPendingLock && locktype==SHARED_LOCK ){ + winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); + } + + /* Update the state of the lock has held in the file descriptor then + ** return the appropriate result code. + */ + if( res ){ + rc = SQLITE_OK; + }else{ + pFile->lastErrno = lastErrno; + rc = SQLITE_BUSY; + OSTRACE(("LOCK-FAIL file=%p, wanted=%d, got=%d\n", + pFile->h, locktype, newLocktype)); + } + pFile->locktype = (u8)newLocktype; + OSTRACE(("LOCK file=%p, lock=%d, rc=%s\n", + pFile->h, pFile->locktype, sqlite3ErrName(rc))); + return rc; +} + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, return +** non-zero, otherwise zero. +*/ +static int winCheckReservedLock(sqlite3_file *id, int *pResOut){ + int res; + winFile *pFile = (winFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p\n", pFile->h, pResOut)); + + assert( id!=0 ); + if( pFile->locktype>=RESERVED_LOCK ){ + res = 1; + OSTRACE(("TEST-WR-LOCK file=%p, result=%d (local)\n", pFile->h, res)); + }else{ + res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS,RESERVED_BYTE,0,1,0); + if( res ){ + winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); + } + res = !res; + OSTRACE(("TEST-WR-LOCK file=%p, result=%d (remote)\n", pFile->h, res)); + } + *pResOut = res; + OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", + pFile->h, pResOut, *pResOut)); + return SQLITE_OK; +} + +/* +** Lower the locking level on file descriptor id to locktype. locktype +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** It is not possible for this routine to fail if the second argument +** is NO_LOCK. If the second argument is SHARED_LOCK then this routine +** might return SQLITE_IOERR; +*/ +static int winUnlock(sqlite3_file *id, int locktype){ + int type; + winFile *pFile = (winFile*)id; + int rc = SQLITE_OK; + assert( pFile!=0 ); + assert( locktype<=SHARED_LOCK ); + OSTRACE(("UNLOCK file=%p, oldLock=%d(%d), newLock=%d\n", + pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); + type = pFile->locktype; + if( type>=EXCLUSIVE_LOCK ){ + winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); + if( locktype==SHARED_LOCK && !winGetReadLock(pFile) ){ + /* This should never happen. We should always be able to + ** reacquire the read lock */ + rc = winLogError(SQLITE_IOERR_UNLOCK, osGetLastError(), + "winUnlock", pFile->zPath); + } + } + if( type>=RESERVED_LOCK ){ + winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); + } + if( locktype==NO_LOCK && type>=SHARED_LOCK ){ + winUnlockReadLock(pFile); + } + if( type>=PENDING_LOCK ){ + winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); + } + pFile->locktype = (u8)locktype; + OSTRACE(("UNLOCK file=%p, lock=%d, rc=%s\n", + pFile->h, pFile->locktype, sqlite3ErrName(rc))); + return rc; +} + +/****************************************************************************** +****************************** No-op Locking ********************************** +** +** Of the various locking implementations available, this is by far the +** simplest: locking is ignored. No attempt is made to lock the database +** file for reading or writing. +** +** This locking mode is appropriate for use on read-only databases +** (ex: databases that are burned into CD-ROM, for example.) It can +** also be used if the application employs some external mechanism to +** prevent simultaneous access of the same database by two or more +** database connections. But there is a serious risk of database +** corruption if this locking mode is used in situations where multiple +** database connections are accessing the same database file at the same +** time and one or more of those connections are writing. +*/ + +static int winNolockLock(sqlite3_file *id, int locktype){ + UNUSED_PARAMETER(id); + UNUSED_PARAMETER(locktype); + return SQLITE_OK; +} + +static int winNolockCheckReservedLock(sqlite3_file *id, int *pResOut){ + UNUSED_PARAMETER(id); + UNUSED_PARAMETER(pResOut); + return SQLITE_OK; +} + +static int winNolockUnlock(sqlite3_file *id, int locktype){ + UNUSED_PARAMETER(id); + UNUSED_PARAMETER(locktype); + return SQLITE_OK; +} + +/******************* End of the no-op lock implementation ********************* +******************************************************************************/ + +/* +** If *pArg is initially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void winModeBit(winFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* Forward references to VFS helper methods used for temporary files */ +static int winGetTempname(sqlite3_vfs *, char **); +static int winIsDir(const void *); +static BOOL winIsLongPathPrefix(const char *); +static BOOL winIsDriveLetterAndColon(const char *); + +/* +** Control and query of the open file handle. +*/ +static int winFileControl(sqlite3_file *id, int op, void *pArg){ + winFile *pFile = (winFile*)id; + OSTRACE(("FCNTL file=%p, op=%d, pArg=%p\n", pFile->h, op, pArg)); + switch( op ){ + case SQLITE_FCNTL_LOCKSTATE: { + *(int*)pArg = pFile->locktype; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_LAST_ERRNO: { + *(int*)pArg = (int)pFile->lastErrno; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_CHUNK_SIZE: { + pFile->szChunk = *(int *)pArg; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_SIZE_HINT: { + if( pFile->szChunk>0 ){ + sqlite3_int64 oldSz; + int rc = winFileSize(id, &oldSz); + if( rc==SQLITE_OK ){ + sqlite3_int64 newSz = *(sqlite3_int64*)pArg; + if( newSz>oldSz ){ + SimulateIOErrorBenign(1); + rc = winTruncate(id, newSz); + SimulateIOErrorBenign(0); + } + } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); + return rc; + } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_PERSIST_WAL: { + winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + winModeBit(pFile, WINFILE_PSOW, (int*)pArg); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_WIN32_AV_RETRY: { + int *a = (int*)pArg; + if( a[0]>0 ){ + winIoerrRetry = a[0]; + }else{ + a[0] = winIoerrRetry; + } + if( a[1]>0 ){ + winIoerrRetryDelay = a[1]; + }else{ + a[1] = winIoerrRetryDelay; + } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_WIN32_GET_HANDLE: { + LPHANDLE phFile = (LPHANDLE)pArg; + *phFile = pFile->h; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } +#ifdef SQLITE_TEST + case SQLITE_FCNTL_WIN32_SET_HANDLE: { + LPHANDLE phFile = (LPHANDLE)pArg; + HANDLE hOldFile = pFile->h; + pFile->h = *phFile; + *phFile = hOldFile; + OSTRACE(("FCNTL oldFile=%p, newFile=%p, rc=SQLITE_OK\n", + hOldFile, pFile->h)); + return SQLITE_OK; + } +#endif + case SQLITE_FCNTL_TEMPFILENAME: { + char *zTFile = 0; + int rc = winGetTempname(pFile->pVfs, &zTFile); + if( rc==SQLITE_OK ){ + *(char**)pArg = zTFile; + } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); + return rc; + } +#if SQLITE_MAX_MMAP_SIZE>0 + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + int rc = SQLITE_OK; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + + /* The value of newLimit may be eventually cast to (SIZE_T) and passed + ** to MapViewOfFile(). Restrict its value to 2GB if (SIZE_T) is not at + ** least a 64-bit type. */ + if( newLimit>0 && sizeof(SIZE_T)<8 ){ + newLimit = (newLimit & 0x7FFFFFFF); + } + + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ + pFile->mmapSizeMax = newLimit; + if( pFile->mmapSize>0 ){ + winUnmapfile(pFile); + rc = winMapfile(pFile, -1); + } + } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); + return rc; + } +#endif + } + OSTRACE(("FCNTL file=%p, rc=SQLITE_NOTFOUND\n", pFile->h)); + return SQLITE_NOTFOUND; +} + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +static int winSectorSize(sqlite3_file *id){ + (void)id; + return SQLITE_DEFAULT_SECTOR_SIZE; +} + +/* +** Return a vector of device characteristics. +*/ +static int winDeviceCharacteristics(sqlite3_file *id){ + winFile *p = (winFile*)id; + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | + ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); +} + +/* +** Windows will only let you create file view mappings +** on allocation size granularity boundaries. +** During sqlite3_os_init() we do a GetSystemInfo() +** to get the granularity size. +*/ +static SYSTEM_INFO winSysInfo; + +#ifndef SQLITE_OMIT_WAL + +/* +** Helper functions to obtain and relinquish the global mutex. The +** global mutex is used to protect the winLockInfo objects used by +** this file, all of which may be shared by multiple threads. +** +** Function winShmMutexHeld() is used to assert() that the global mutex +** is held when required. This function is only used as part of assert() +** statements. e.g. +** +** winShmEnterMutex() +** assert( winShmMutexHeld() ); +** winShmLeaveMutex() +*/ +static sqlite3_mutex *winBigLock = 0; +static void winShmEnterMutex(void){ + sqlite3_mutex_enter(winBigLock); +} +static void winShmLeaveMutex(void){ + sqlite3_mutex_leave(winBigLock); +} +#ifndef NDEBUG +static int winShmMutexHeld(void) { + return sqlite3_mutex_held(winBigLock); +} +#endif + +/* +** Object used to represent a single file opened and mmapped to provide +** shared memory. When multiple threads all reference the same +** log-summary, each thread has its own winFile object, but they all +** point to a single instance of this object. In other words, each +** log-summary is opened only once per process. +** +** winShmMutexHeld() must be true when creating or destroying +** this object or while reading or writing the following fields: +** +** nRef +** pNext +** +** The following fields are read-only after the object is created: +** +** fid +** zFilename +** +** Either winShmNode.mutex must be held or winShmNode.nRef==0 and +** winShmMutexHeld() is true when reading or writing any other field +** in this structure. +** +*/ +struct winShmNode { + sqlite3_mutex *mutex; /* Mutex to access this object */ + char *zFilename; /* Name of the file */ + winFile hFile; /* File handle from winOpen */ + + int szRegion; /* Size of shared-memory regions */ + int nRegion; /* Size of array apRegion */ + u8 isReadonly; /* True if read-only */ + u8 isUnlocked; /* True if no DMS lock held */ + + struct ShmRegion { + HANDLE hMap; /* File handle from CreateFileMapping */ + void *pMap; + } *aRegion; + DWORD lastErrno; /* The Windows errno from the last I/O error */ + + int nRef; /* Number of winShm objects pointing to this */ + winShm *pFirst; /* All winShm objects pointing to this */ + winShmNode *pNext; /* Next in list of all winShmNode objects */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) + u8 nextShmId; /* Next available winShm.id value */ +#endif +}; + +/* +** A global array of all winShmNode objects. +** +** The winShmMutexHeld() must be true while reading or writing this list. +*/ +static winShmNode *winShmNodeList = 0; + +/* +** Structure used internally by this VFS to record the state of an +** open shared memory connection. +** +** The following fields are initialized when this object is created and +** are read-only thereafter: +** +** winShm.pShmNode +** winShm.id +** +** All other fields are read/write. The winShm.pShmNode->mutex must be held +** while accessing any read/write fields. +*/ +struct winShm { + winShmNode *pShmNode; /* The underlying winShmNode object */ + winShm *pNext; /* Next winShm with the same winShmNode */ + u8 hasMutex; /* True if holding the winShmNode mutex */ + u16 sharedMask; /* Mask of shared locks held */ + u16 exclMask; /* Mask of exclusive locks held */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) + u8 id; /* Id of this connection with its winShmNode */ +#endif +}; + +/* +** Constants used for locking +*/ +#define WIN_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ +#define WIN_SHM_DMS (WIN_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ + +/* +** Apply advisory locks for all n bytes beginning at ofst. +*/ +#define WINSHM_UNLCK 1 +#define WINSHM_RDLCK 2 +#define WINSHM_WRLCK 3 +static int winShmSystemLock( + winShmNode *pFile, /* Apply locks to this open shared-memory segment */ + int lockType, /* WINSHM_UNLCK, WINSHM_RDLCK, or WINSHM_WRLCK */ + int ofst, /* Offset to first byte to be locked/unlocked */ + int nByte /* Number of bytes to lock or unlock */ +){ + int rc = 0; /* Result code form Lock/UnlockFileEx() */ + + /* Access to the winShmNode object is serialized by the caller */ + assert( pFile->nRef==0 || sqlite3_mutex_held(pFile->mutex) ); + + OSTRACE(("SHM-LOCK file=%p, lock=%d, offset=%d, size=%d\n", + pFile->hFile.h, lockType, ofst, nByte)); + + /* Release/Acquire the system-level lock */ + if( lockType==WINSHM_UNLCK ){ + rc = winUnlockFile(&pFile->hFile.h, ofst, 0, nByte, 0); + }else{ + /* Initialize the locking parameters */ + DWORD dwFlags = LOCKFILE_FAIL_IMMEDIATELY; + if( lockType == WINSHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; + rc = winLockFile(&pFile->hFile.h, dwFlags, ofst, 0, nByte, 0); + } + + if( rc!= 0 ){ + rc = SQLITE_OK; + }else{ + pFile->lastErrno = osGetLastError(); + rc = SQLITE_BUSY; + } + + OSTRACE(("SHM-LOCK file=%p, func=%s, errno=%lu, rc=%s\n", + pFile->hFile.h, (lockType == WINSHM_UNLCK) ? "winUnlockFile" : + "winLockFile", pFile->lastErrno, sqlite3ErrName(rc))); + + return rc; +} + +/* Forward references to VFS methods */ +static int winOpen(sqlite3_vfs*,const char*,sqlite3_file*,int,int*); +static int winDelete(sqlite3_vfs *,const char*,int); + +/* +** Purge the winShmNodeList list of all entries with winShmNode.nRef==0. +** +** This is not a VFS shared-memory method; it is a utility function called +** by VFS shared-memory methods. +*/ +static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){ + winShmNode **pp; + winShmNode *p; + assert( winShmMutexHeld() ); + OSTRACE(("SHM-PURGE pid=%lu, deleteFlag=%d\n", + osGetCurrentProcessId(), deleteFlag)); + pp = &winShmNodeList; + while( (p = *pp)!=0 ){ + if( p->nRef==0 ){ + int i; + if( p->mutex ){ sqlite3_mutex_free(p->mutex); } + for(i=0; inRegion; i++){ + BOOL bRc = osUnmapViewOfFile(p->aRegion[i].pMap); + OSTRACE(("SHM-PURGE-UNMAP pid=%lu, region=%d, rc=%s\n", + osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); + UNUSED_VARIABLE_VALUE(bRc); + bRc = osCloseHandle(p->aRegion[i].hMap); + OSTRACE(("SHM-PURGE-CLOSE pid=%lu, region=%d, rc=%s\n", + osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); + UNUSED_VARIABLE_VALUE(bRc); + } + if( p->hFile.h!=NULL && p->hFile.h!=INVALID_HANDLE_VALUE ){ + SimulateIOErrorBenign(1); + winClose((sqlite3_file *)&p->hFile); + SimulateIOErrorBenign(0); + } + if( deleteFlag ){ + SimulateIOErrorBenign(1); + sqlite3BeginBenignMalloc(); + winDelete(pVfs, p->zFilename, 0); + sqlite3EndBenignMalloc(); + SimulateIOErrorBenign(0); + } + *pp = p->pNext; + sqlite3_free(p->aRegion); + sqlite3_free(p); + }else{ + pp = &p->pNext; + } + } +} + +/* +** The DMS lock has not yet been taken on shm file pShmNode. Attempt to +** take it now. Return SQLITE_OK if successful, or an SQLite error +** code otherwise. +** +** If the DMS cannot be locked because this is a readonly_shm=1 +** connection and no other process already holds a lock, return +** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. +*/ +static int winLockSharedMemory(winShmNode *pShmNode){ + int rc = winShmSystemLock(pShmNode, WINSHM_WRLCK, WIN_SHM_DMS, 1); + + if( rc==SQLITE_OK ){ + if( pShmNode->isReadonly ){ + pShmNode->isUnlocked = 1; + winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1); + return SQLITE_READONLY_CANTINIT; + }else if( winTruncate((sqlite3_file*)&pShmNode->hFile, 0) ){ + winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1); + return winLogError(SQLITE_IOERR_SHMOPEN, osGetLastError(), + "winLockSharedMemory", pShmNode->zFilename); + } + } + + if( rc==SQLITE_OK ){ + winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1); + } + + return winShmSystemLock(pShmNode, WINSHM_RDLCK, WIN_SHM_DMS, 1); +} + +/* +** Open the shared-memory area associated with database file pDbFd. +** +** When opening a new shared-memory file, if no other instances of that +** file are currently open, in this process or in other processes, then +** the file must be truncated to zero length or have its header cleared. +*/ +static int winOpenSharedMemory(winFile *pDbFd){ + struct winShm *p; /* The connection to be opened */ + winShmNode *pShmNode = 0; /* The underlying mmapped file */ + int rc = SQLITE_OK; /* Result code */ + winShmNode *pNew; /* Newly allocated winShmNode */ + int nName; /* Size of zName in bytes */ + + assert( pDbFd->pShm==0 ); /* Not previously opened */ + + /* Allocate space for the new sqlite3_shm object. Also speculatively + ** allocate space for a new winShmNode and filename. + */ + p = sqlite3MallocZero( sizeof(*p) ); + if( p==0 ) return SQLITE_IOERR_NOMEM_BKPT; + nName = sqlite3Strlen30(pDbFd->zPath); + pNew = sqlite3MallocZero( sizeof(*pShmNode) + nName + 17 ); + if( pNew==0 ){ + sqlite3_free(p); + return SQLITE_IOERR_NOMEM_BKPT; + } + pNew->zFilename = (char*)&pNew[1]; + sqlite3_snprintf(nName+15, pNew->zFilename, "%s-shm", pDbFd->zPath); + sqlite3FileSuffix3(pDbFd->zPath, pNew->zFilename); + + /* Look to see if there is an existing winShmNode that can be used. + ** If no matching winShmNode currently exists, create a new one. + */ + winShmEnterMutex(); + for(pShmNode = winShmNodeList; pShmNode; pShmNode=pShmNode->pNext){ + /* TBD need to come up with better match here. Perhaps + ** use FILE_ID_BOTH_DIR_INFO Structure. + */ + if( sqlite3StrICmp(pShmNode->zFilename, pNew->zFilename)==0 ) break; + } + if( pShmNode ){ + sqlite3_free(pNew); + }else{ + int inFlags = SQLITE_OPEN_WAL; + int outFlags = 0; + + pShmNode = pNew; + pNew = 0; + ((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE; + pShmNode->pNext = winShmNodeList; + winShmNodeList = pShmNode; + + if( sqlite3GlobalConfig.bCoreMutex ){ + pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pShmNode->mutex==0 ){ + rc = SQLITE_IOERR_NOMEM_BKPT; + goto shm_open_err; + } + } + + if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ + inFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; + }else{ + inFlags |= SQLITE_OPEN_READONLY; + } + rc = winOpen(pDbFd->pVfs, pShmNode->zFilename, + (sqlite3_file*)&pShmNode->hFile, + inFlags, &outFlags); + if( rc!=SQLITE_OK ){ + rc = winLogError(rc, osGetLastError(), "winOpenShm", + pShmNode->zFilename); + goto shm_open_err; + } + if( outFlags==SQLITE_OPEN_READONLY ) pShmNode->isReadonly = 1; + + rc = winLockSharedMemory(pShmNode); + if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; + } + + /* Make the new connection a child of the winShmNode */ + p->pShmNode = pShmNode; +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) + p->id = pShmNode->nextShmId++; +#endif + pShmNode->nRef++; + pDbFd->pShm = p; + winShmLeaveMutex(); + + /* The reference count on pShmNode has already been incremented under + ** the cover of the winShmEnterMutex() mutex and the pointer from the + ** new (struct winShm) object to the pShmNode has been set. All that is + ** left to do is to link the new object into the linked list starting + ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex + ** mutex. + */ + sqlite3_mutex_enter(pShmNode->mutex); + p->pNext = pShmNode->pFirst; + pShmNode->pFirst = p; + sqlite3_mutex_leave(pShmNode->mutex); + return rc; + + /* Jump here on any error */ +shm_open_err: + winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1); + winShmPurge(pDbFd->pVfs, 0); /* This call frees pShmNode if required */ + sqlite3_free(p); + sqlite3_free(pNew); + winShmLeaveMutex(); + return rc; +} + +/* +** Close a connection to shared-memory. Delete the underlying +** storage if deleteFlag is true. +*/ +static int winShmUnmap( + sqlite3_file *fd, /* Database holding shared memory */ + int deleteFlag /* Delete after closing if true */ +){ + winFile *pDbFd; /* Database holding shared-memory */ + winShm *p; /* The connection to be closed */ + winShmNode *pShmNode; /* The underlying shared-memory file */ + winShm **pp; /* For looping over sibling connections */ + + pDbFd = (winFile*)fd; + p = pDbFd->pShm; + if( p==0 ) return SQLITE_OK; + pShmNode = p->pShmNode; + + /* Remove connection p from the set of connections associated + ** with pShmNode */ + sqlite3_mutex_enter(pShmNode->mutex); + for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} + *pp = p->pNext; + + /* Free the connection p */ + sqlite3_free(p); + pDbFd->pShm = 0; + sqlite3_mutex_leave(pShmNode->mutex); + + /* If pShmNode->nRef has reached 0, then close the underlying + ** shared-memory file, too */ + winShmEnterMutex(); + assert( pShmNode->nRef>0 ); + pShmNode->nRef--; + if( pShmNode->nRef==0 ){ + winShmPurge(pDbFd->pVfs, deleteFlag); + } + winShmLeaveMutex(); + + return SQLITE_OK; +} + +/* +** Change the lock state for a shared-memory segment. +*/ +static int winShmLock( + sqlite3_file *fd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ +){ + winFile *pDbFd = (winFile*)fd; /* Connection holding shared memory */ + winShm *p = pDbFd->pShm; /* The shared memory being locked */ + winShm *pX; /* For looping over all siblings */ + winShmNode *pShmNode; + int rc = SQLITE_OK; /* Result code */ + u16 mask; /* Mask of locks to take or release */ + + if( p==0 ) return SQLITE_IOERR_SHMLOCK; + pShmNode = p->pShmNode; + if( NEVER(pShmNode==0) ) return SQLITE_IOERR_SHMLOCK; + + assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); + assert( n>=1 ); + assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); + assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); + + mask = (u16)((1U<<(ofst+n)) - (1U<1 || mask==(1<mutex); + if( flags & SQLITE_SHM_UNLOCK ){ + u16 allMask = 0; /* Mask of locks held by siblings */ + + /* See if any siblings hold this same lock */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); + allMask |= pX->sharedMask; + } + + /* Unlock the system-level locks */ + if( (mask & allMask)==0 ){ + rc = winShmSystemLock(pShmNode, WINSHM_UNLCK, ofst+WIN_SHM_BASE, n); + }else{ + rc = SQLITE_OK; + } + + /* Undo the local locks */ + if( rc==SQLITE_OK ){ + p->exclMask &= ~mask; + p->sharedMask &= ~mask; + } + }else if( flags & SQLITE_SHM_SHARED ){ + u16 allShared = 0; /* Union of locks held by connections other than "p" */ + + /* Find out which shared locks are already held by sibling connections. + ** If any sibling already holds an exclusive lock, go ahead and return + ** SQLITE_BUSY. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + allShared |= pX->sharedMask; + } + + /* Get shared locks at the system level, if necessary */ + if( rc==SQLITE_OK ){ + if( (allShared & mask)==0 ){ + rc = winShmSystemLock(pShmNode, WINSHM_RDLCK, ofst+WIN_SHM_BASE, n); + }else{ + rc = SQLITE_OK; + } + } + + /* Get the local shared locks */ + if( rc==SQLITE_OK ){ + p->sharedMask |= mask; + } + }else{ + /* Make sure no sibling connections hold locks that will block this + ** lock. If any do, return SQLITE_BUSY right away. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + } + + /* Get the exclusive locks at the system level. Then if successful + ** also mark the local connection as being locked. + */ + if( rc==SQLITE_OK ){ + rc = winShmSystemLock(pShmNode, WINSHM_WRLCK, ofst+WIN_SHM_BASE, n); + if( rc==SQLITE_OK ){ + assert( (p->sharedMask & mask)==0 ); + p->exclMask |= mask; + } + } + } + sqlite3_mutex_leave(pShmNode->mutex); + OSTRACE(("SHM-LOCK pid=%lu, id=%d, sharedMask=%03x, exclMask=%03x, rc=%s\n", + osGetCurrentProcessId(), p->id, p->sharedMask, p->exclMask, + sqlite3ErrName(rc))); + return rc; +} + +/* +** Implement a memory barrier or memory fence on shared memory. +** +** All loads and stores begun before the barrier must complete before +** any load or store begun after the barrier. +*/ +static void winShmBarrier( + sqlite3_file *fd /* Database holding the shared memory */ +){ + UNUSED_PARAMETER(fd); + sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ + winShmEnterMutex(); /* Also mutex, for redundancy */ + winShmLeaveMutex(); +} + +/* +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the isWrite parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** isWrite is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int winShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + winFile *pDbFd = (winFile*)fd; + winShm *pShm = pDbFd->pShm; + winShmNode *pShmNode; + DWORD protect = PAGE_READWRITE; + DWORD flags = FILE_MAP_WRITE | FILE_MAP_READ; + int rc = SQLITE_OK; + + if( !pShm ){ + rc = winOpenSharedMemory(pDbFd); + if( rc!=SQLITE_OK ) return rc; + pShm = pDbFd->pShm; + assert( pShm!=0 ); + } + pShmNode = pShm->pShmNode; + + sqlite3_mutex_enter(pShmNode->mutex); + if( pShmNode->isUnlocked ){ + rc = winLockSharedMemory(pShmNode); + if( rc!=SQLITE_OK ) goto shmpage_out; + pShmNode->isUnlocked = 0; + } + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + + if( pShmNode->nRegion<=iRegion ){ + struct ShmRegion *apNew; /* New aRegion[] array */ + int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ + sqlite3_int64 sz; /* Current size of wal-index file */ + + pShmNode->szRegion = szRegion; + + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); + if( rc!=SQLITE_OK ){ + rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(), + "winShmMap1", pDbFd->zPath); + goto shmpage_out; + } + + if( szhFile, nByte); + if( rc!=SQLITE_OK ){ + rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(), + "winShmMap2", pDbFd->zPath); + goto shmpage_out; + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (struct ShmRegion *)sqlite3_realloc64( + pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0]) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM_BKPT; + goto shmpage_out; + } + pShmNode->aRegion = apNew; + + if( pShmNode->isReadonly ){ + protect = PAGE_READONLY; + flags = FILE_MAP_READ; + } + + while( pShmNode->nRegion<=iRegion ){ + HANDLE hMap = NULL; /* file-mapping handle */ + void *pMap = 0; /* Mapped memory region */ + +#if SQLITE_OS_WINRT + hMap = osCreateFileMappingFromApp(pShmNode->hFile.h, + NULL, protect, nByte, NULL + ); +#elif defined(SQLITE_WIN32_HAS_WIDE) + hMap = osCreateFileMappingW(pShmNode->hFile.h, + NULL, protect, 0, nByte, NULL + ); +#elif defined(SQLITE_WIN32_HAS_ANSI) && SQLITE_WIN32_CREATEFILEMAPPINGA + hMap = osCreateFileMappingA(pShmNode->hFile.h, + NULL, protect, 0, nByte, NULL + ); +#endif + OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n", + osGetCurrentProcessId(), pShmNode->nRegion, nByte, + hMap ? "ok" : "failed")); + if( hMap ){ + int iOffset = pShmNode->nRegion*szRegion; + int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity; +#if SQLITE_OS_WINRT + pMap = osMapViewOfFileFromApp(hMap, flags, + iOffset - iOffsetShift, szRegion + iOffsetShift + ); +#else + pMap = osMapViewOfFile(hMap, flags, + 0, iOffset - iOffsetShift, szRegion + iOffsetShift + ); +#endif + OSTRACE(("SHM-MAP-MAP pid=%lu, region=%d, offset=%d, size=%d, rc=%s\n", + osGetCurrentProcessId(), pShmNode->nRegion, iOffset, + szRegion, pMap ? "ok" : "failed")); + } + if( !pMap ){ + pShmNode->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_SHMMAP, pShmNode->lastErrno, + "winShmMap3", pDbFd->zPath); + if( hMap ) osCloseHandle(hMap); + goto shmpage_out; + } + + pShmNode->aRegion[pShmNode->nRegion].pMap = pMap; + pShmNode->aRegion[pShmNode->nRegion].hMap = hMap; + pShmNode->nRegion++; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + int iOffset = iRegion*szRegion; + int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity; + char *p = (char *)pShmNode->aRegion[iRegion].pMap; + *pp = (void *)&p[iOffsetShift]; + }else{ + *pp = 0; + } + if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; + sqlite3_mutex_leave(pShmNode->mutex); + return rc; +} + +#else +# define winShmMap 0 +# define winShmLock 0 +# define winShmBarrier 0 +# define winShmUnmap 0 +#endif /* #ifndef SQLITE_OMIT_WAL */ + +/* +** Cleans up the mapped region of the specified file, if any. +*/ +#if SQLITE_MAX_MMAP_SIZE>0 +static int winUnmapfile(winFile *pFile){ + assert( pFile!=0 ); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, pMapRegion=%p, " + "mmapSize=%lld, mmapSizeMax=%lld\n", + osGetCurrentProcessId(), pFile, pFile->hMap, pFile->pMapRegion, + pFile->mmapSize, pFile->mmapSizeMax)); + if( pFile->pMapRegion ){ + if( !osUnmapViewOfFile(pFile->pMapRegion) ){ + pFile->lastErrno = osGetLastError(); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile, + pFile->pMapRegion)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmapfile1", pFile->zPath); + } + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + } + if( pFile->hMap!=NULL ){ + if( !osCloseHandle(pFile->hMap) ){ + pFile->lastErrno = osGetLastError(); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, rc=SQLITE_IOERR_MMAP\n", + osGetCurrentProcessId(), pFile, pFile->hMap)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmapfile2", pFile->zPath); + } + pFile->hMap = NULL; + } + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile)); + return SQLITE_OK; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_SIZE, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int winMapfile(winFile *pFd, sqlite3_int64 nByte){ + sqlite3_int64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, size=%lld\n", + osGetCurrentProcessId(), pFd, nByte)); + + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + rc = winFileSize((sqlite3_file*)pFd, &nMap); + if( rc ){ + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_IOERR_FSTAT\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_IOERR_FSTAT; + } + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1); + + if( nMap==0 && pFd->mmapSize>0 ){ + winUnmapfile(pFd); + } + if( nMap!=pFd->mmapSize ){ + void *pNew = 0; + DWORD protect = PAGE_READONLY; + DWORD flags = FILE_MAP_READ; + + winUnmapfile(pFd); +#ifdef SQLITE_MMAP_READWRITE + if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){ + protect = PAGE_READWRITE; + flags |= FILE_MAP_WRITE; + } +#endif +#if SQLITE_OS_WINRT + pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL); +#elif defined(SQLITE_WIN32_HAS_WIDE) + pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#elif defined(SQLITE_WIN32_HAS_ANSI) && SQLITE_WIN32_CREATEFILEMAPPINGA + pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#endif + if( pFd->hMap==NULL ){ + pFd->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile1", pFd->zPath); + /* Log the error, but continue normal operation using xRead/xWrite */ + OSTRACE(("MAP-FILE-CREATE pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return SQLITE_OK; + } + assert( (nMap % winSysInfo.dwPageSize)==0 ); + assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff ); +#if SQLITE_OS_WINRT + pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, (SIZE_T)nMap); +#else + pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap); +#endif + if( pNew==NULL ){ + osCloseHandle(pFd->hMap); + pFd->hMap = NULL; + pFd->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile2", pFd->zPath); + /* Log the error, but continue normal operation using xRead/xWrite */ + OSTRACE(("MAP-FILE-MAP pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return SQLITE_OK; + } + pFd->pMapRegion = pNew; + pFd->mmapSize = nMap; + } + + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling winUnfetch(). +*/ +static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + winFile *pFd = (winFile*)fd; /* The underlying database file */ +#endif + *pp = 0; + + OSTRACE(("FETCH pid=%lu, pFile=%p, offset=%lld, amount=%d, pp=%p\n", + osGetCurrentProcessId(), fd, iOff, nAmt, pp)); + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = winMapfile(pFd, -1); + if( rc!=SQLITE_OK ){ + OSTRACE(("FETCH pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return rc; + } + } + if( pFd->mmapSize >= iOff+nAmt ){ + assert( pFd->pMapRegion!=0 ); + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + + OSTRACE(("FETCH pid=%lu, pFile=%p, pp=%p, *pp=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), fd, pp, *pp)); + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to winFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the winFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 + winFile *pFd = (winFile*)fd; /* The underlying database file */ + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + OSTRACE(("UNFETCH pid=%lu, pFile=%p, offset=%lld, p=%p\n", + osGetCurrentProcessId(), pFd, iOff, p)); + + if( p ){ + pFd->nFetchOut--; + }else{ + /* FIXME: If Windows truly always prevents truncating or deleting a + ** file while a mapping is held, then the following winUnmapfile() call + ** is unnecessary can be omitted - potentially improving + ** performance. */ + winUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); +#endif + + OSTRACE(("UNFETCH pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), fd)); + return SQLITE_OK; +} + +/* +** Here ends the implementation of all sqlite3_file methods. +** +********************** End sqlite3_file Methods ******************************* +******************************************************************************/ + +/* +** This vector defines all the methods that can operate on an +** sqlite3_file for win32. +*/ +static const sqlite3_io_methods winIoMethod = { + 3, /* iVersion */ + winClose, /* xClose */ + winRead, /* xRead */ + winWrite, /* xWrite */ + winTruncate, /* xTruncate */ + winSync, /* xSync */ + winFileSize, /* xFileSize */ + winLock, /* xLock */ + winUnlock, /* xUnlock */ + winCheckReservedLock, /* xCheckReservedLock */ + winFileControl, /* xFileControl */ + winSectorSize, /* xSectorSize */ + winDeviceCharacteristics, /* xDeviceCharacteristics */ + winShmMap, /* xShmMap */ + winShmLock, /* xShmLock */ + winShmBarrier, /* xShmBarrier */ + winShmUnmap, /* xShmUnmap */ + winFetch, /* xFetch */ + winUnfetch /* xUnfetch */ +}; + +/* +** This vector defines all the methods that can operate on an +** sqlite3_file for win32 without performing any locking. +*/ +static const sqlite3_io_methods winIoNolockMethod = { + 3, /* iVersion */ + winClose, /* xClose */ + winRead, /* xRead */ + winWrite, /* xWrite */ + winTruncate, /* xTruncate */ + winSync, /* xSync */ + winFileSize, /* xFileSize */ + winNolockLock, /* xLock */ + winNolockUnlock, /* xUnlock */ + winNolockCheckReservedLock, /* xCheckReservedLock */ + winFileControl, /* xFileControl */ + winSectorSize, /* xSectorSize */ + winDeviceCharacteristics, /* xDeviceCharacteristics */ + winShmMap, /* xShmMap */ + winShmLock, /* xShmLock */ + winShmBarrier, /* xShmBarrier */ + winShmUnmap, /* xShmUnmap */ + winFetch, /* xFetch */ + winUnfetch /* xUnfetch */ +}; + +static winVfsAppData winAppData = { + &winIoMethod, /* pMethod */ + 0, /* pAppData */ + 0 /* bNoLock */ +}; + +static winVfsAppData winNolockAppData = { + &winIoNolockMethod, /* pMethod */ + 0, /* pAppData */ + 1 /* bNoLock */ +}; + +/**************************************************************************** +**************************** sqlite3_vfs methods **************************** +** +** This division contains the implementation of methods on the +** sqlite3_vfs object. +*/ + +#if defined(__CYGWIN__) +/* +** Convert a filename from whatever the underlying operating system +** supports for filenames into UTF-8. Space to hold the result is +** obtained from malloc and must be freed by the calling function. +*/ +static char *winConvertToUtf8Filename(const void *zFilename){ + char *zConverted = 0; + if( osIsNT() ){ + zConverted = winUnicodeToUtf8(zFilename); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + zConverted = winMbcsToUtf8(zFilename, osAreFileApisANSI()); + } +#endif + /* caller will handle out of memory */ + return zConverted; +} +#endif + +/* +** Convert a UTF-8 filename into whatever form the underlying +** operating system wants filenames in. Space to hold the result +** is obtained from malloc and must be freed by the calling +** function. +*/ +static void *winConvertFromUtf8Filename(const char *zFilename){ + void *zConverted = 0; + if( osIsNT() ){ + zConverted = winUtf8ToUnicode(zFilename); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + zConverted = winUtf8ToMbcs(zFilename, osAreFileApisANSI()); + } +#endif + /* caller will handle out of memory */ + return zConverted; +} + +/* +** This function returns non-zero if the specified UTF-8 string buffer +** ends with a directory separator character or one was successfully +** added to it. +*/ +static int winMakeEndInDirSep(int nBuf, char *zBuf){ + if( zBuf ){ + int nLen = sqlite3Strlen30(zBuf); + if( nLen>0 ){ + if( winIsDirSep(zBuf[nLen-1]) ){ + return 1; + }else if( nLen+1mxPathname; nBuf = nMax + 2; + zBuf = sqlite3MallocZero( nBuf ); + if( !zBuf ){ + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM_BKPT; + } + + /* Figure out the effective temporary directory. First, check if one + ** has been explicitly set by the application; otherwise, use the one + ** configured by the operating system. + */ + nDir = nMax - (nPre + 15); + assert( nDir>0 ); + if( sqlite3_temp_directory ){ + int nDirLen = sqlite3Strlen30(sqlite3_temp_directory); + if( nDirLen>0 ){ + if( !winIsDirSep(sqlite3_temp_directory[nDirLen-1]) ){ + nDirLen++; + } + if( nDirLen>nDir ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); + return winLogError(SQLITE_ERROR, 0, "winGetTempname1", 0); + } + sqlite3_snprintf(nMax, zBuf, "%s", sqlite3_temp_directory); + } + } +#if defined(__CYGWIN__) + else{ + static const char *azDirs[] = { + 0, /* getenv("SQLITE_TMPDIR") */ + 0, /* getenv("TMPDIR") */ + 0, /* getenv("TMP") */ + 0, /* getenv("TEMP") */ + 0, /* getenv("USERPROFILE") */ + "/var/tmp", + "/usr/tmp", + "/tmp", + ".", + 0 /* List terminator */ + }; + unsigned int i; + const char *zDir = 0; + + if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR"); + if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); + if( !azDirs[2] ) azDirs[2] = getenv("TMP"); + if( !azDirs[3] ) azDirs[3] = getenv("TEMP"); + if( !azDirs[4] ) azDirs[4] = getenv("USERPROFILE"); + for(i=0; i/etilqs_XXXXXXXXXXXXXXX\0\0" + ** + ** If not, return SQLITE_ERROR. The number 17 is used here in order to + ** account for the space used by the 15 character random suffix and the + ** two trailing NUL characters. The final directory separator character + ** has already added if it was not already present. + */ + nLen = sqlite3Strlen30(zBuf); + if( (nLen + nPre + 17) > nBuf ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); + return winLogError(SQLITE_ERROR, 0, "winGetTempname5", 0); + } + + sqlite3_snprintf(nBuf-16-nLen, zBuf+nLen, SQLITE_TEMP_FILE_PREFIX); + + j = sqlite3Strlen30(zBuf); + sqlite3_randomness(15, &zBuf[j]); + for(i=0; i<15; i++, j++){ + zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; + } + zBuf[j] = 0; + zBuf[j+1] = 0; + *pzBuf = zBuf; + + OSTRACE(("TEMP-FILENAME name=%s, rc=SQLITE_OK\n", zBuf)); + return SQLITE_OK; +} + +/* +** Return TRUE if the named file is really a directory. Return false if +** it is something other than a directory, or if there is any kind of memory +** allocation failure. +*/ +static int winIsDir(const void *zConverted){ + DWORD attr; + int rc = 0; + DWORD lastErrno; + + if( osIsNT() ){ + int cnt = 0; + WIN32_FILE_ATTRIBUTE_DATA sAttrData; + memset(&sAttrData, 0, sizeof(sAttrData)); + while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted, + GetFileExInfoStandard, + &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){} + if( !rc ){ + return 0; /* Invalid name? */ + } + attr = sAttrData.dwFileAttributes; +#if SQLITE_OS_WINCE==0 + }else{ + attr = osGetFileAttributesA((char*)zConverted); +#endif + } + return (attr!=INVALID_FILE_ATTRIBUTES) && (attr&FILE_ATTRIBUTE_DIRECTORY); +} + +/* forward reference */ +static int winAccess( + sqlite3_vfs *pVfs, /* Not used on win32 */ + const char *zFilename, /* Name of file to check */ + int flags, /* Type of test to make on this file */ + int *pResOut /* OUT: Result */ +); + +/* +** Open a file. +*/ +static int winOpen( + sqlite3_vfs *pVfs, /* Used to get maximum path length and AppData */ + const char *zName, /* Name of the file (UTF-8) */ + sqlite3_file *id, /* Write the SQLite file handle here */ + int flags, /* Open mode flags */ + int *pOutFlags /* Status return flags */ +){ + HANDLE h; + DWORD lastErrno = 0; + DWORD dwDesiredAccess; + DWORD dwShareMode; + DWORD dwCreationDisposition; + DWORD dwFlagsAndAttributes = 0; +#if SQLITE_OS_WINCE + int isTemp = 0; +#endif + winVfsAppData *pAppData; + winFile *pFile = (winFile*)id; + void *zConverted; /* Filename in OS encoding */ + const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */ + int cnt = 0; + + /* If argument zPath is a NULL pointer, this function is required to open + ** a temporary file. Use this buffer to store the file name in. + */ + char *zTmpname = 0; /* For temporary filename, if necessary. */ + + int rc = SQLITE_OK; /* Function Return Code */ +#if !defined(NDEBUG) || SQLITE_OS_WINCE + int eType = flags&0xFFFFFF00; /* Type of file to open */ +#endif + + int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); + int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); + int isCreate = (flags & SQLITE_OPEN_CREATE); + int isReadonly = (flags & SQLITE_OPEN_READONLY); + int isReadWrite = (flags & SQLITE_OPEN_READWRITE); + +#ifndef NDEBUG + int isOpenJournal = (isCreate && ( + eType==SQLITE_OPEN_SUPER_JOURNAL + || eType==SQLITE_OPEN_MAIN_JOURNAL + || eType==SQLITE_OPEN_WAL + )); +#endif + + OSTRACE(("OPEN name=%s, pFile=%p, flags=%x, pOutFlags=%p\n", + zUtf8Name, id, flags, pOutFlags)); + + /* Check the following statements are true: + ** + ** (a) Exactly one of the READWRITE and READONLY flags must be set, and + ** (b) if CREATE is set, then READWRITE must also be set, and + ** (c) if EXCLUSIVE is set, then CREATE must also be set. + ** (d) if DELETEONCLOSE is set, then CREATE must also be set. + */ + assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); + assert(isCreate==0 || isReadWrite); + assert(isExclusive==0 || isCreate); + assert(isDelete==0 || isCreate); + + /* The main DB, main journal, WAL file and super-journal are never + ** automatically deleted. Nor are they ever temporary files. */ + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_SUPER_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); + + /* Assert that the upper layer has set one of the "file-type" flags. */ + assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB + || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL + || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_SUPER_JOURNAL + || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL + ); + + assert( pFile!=0 ); + memset(pFile, 0, sizeof(winFile)); + pFile->h = INVALID_HANDLE_VALUE; + +#if SQLITE_OS_WINRT + if( !zUtf8Name && !sqlite3_temp_directory ){ + sqlite3_log(SQLITE_ERROR, + "sqlite3_temp_directory variable should be set for WinRT"); + } +#endif + + /* If the second argument to this function is NULL, generate a + ** temporary file name to use + */ + if( !zUtf8Name ){ + assert( isDelete && !isOpenJournal ); + rc = winGetTempname(pVfs, &zTmpname); + if( rc!=SQLITE_OK ){ + OSTRACE(("OPEN name=%s, rc=%s", zUtf8Name, sqlite3ErrName(rc))); + return rc; + } + zUtf8Name = zTmpname; + } + + /* Database filenames are double-zero terminated if they are not + ** URIs with parameters. Hence, they can always be passed into + ** sqlite3_uri_parameter(). + */ + assert( (eType!=SQLITE_OPEN_MAIN_DB) || (flags & SQLITE_OPEN_URI) || + zUtf8Name[sqlite3Strlen30(zUtf8Name)+1]==0 ); + + /* Convert the filename to the system encoding. */ + zConverted = winConvertFromUtf8Filename(zUtf8Name); + if( zConverted==0 ){ + sqlite3_free(zTmpname); + OSTRACE(("OPEN name=%s, rc=SQLITE_IOERR_NOMEM", zUtf8Name)); + return SQLITE_IOERR_NOMEM_BKPT; + } + + if( winIsDir(zConverted) ){ + sqlite3_free(zConverted); + sqlite3_free(zTmpname); + OSTRACE(("OPEN name=%s, rc=SQLITE_CANTOPEN_ISDIR", zUtf8Name)); + return SQLITE_CANTOPEN_ISDIR; + } + + if( isReadWrite ){ + dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + }else{ + dwDesiredAccess = GENERIC_READ; + } + + /* SQLITE_OPEN_EXCLUSIVE is used to make sure that a new file is + ** created. SQLite doesn't use it to indicate "exclusive access" + ** as it is usually understood. + */ + if( isExclusive ){ + /* Creates a new file, only if it does not already exist. */ + /* If the file exists, it fails. */ + dwCreationDisposition = CREATE_NEW; + }else if( isCreate ){ + /* Open existing file, or create if it doesn't exist */ + dwCreationDisposition = OPEN_ALWAYS; + }else{ + /* Opens a file, only if it exists. */ + dwCreationDisposition = OPEN_EXISTING; + } + + if( 0==sqlite3_uri_boolean(zName, "exclusive", 0) ){ + dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; + }else{ + dwShareMode = 0; + } + + if( isDelete ){ +#if SQLITE_OS_WINCE + dwFlagsAndAttributes = FILE_ATTRIBUTE_HIDDEN; + isTemp = 1; +#else + dwFlagsAndAttributes = FILE_ATTRIBUTE_TEMPORARY + | FILE_ATTRIBUTE_HIDDEN + | FILE_FLAG_DELETE_ON_CLOSE; +#endif + }else{ + dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; + } + /* Reports from the internet are that performance is always + ** better if FILE_FLAG_RANDOM_ACCESS is used. Ticket #2699. */ +#if SQLITE_OS_WINCE + dwFlagsAndAttributes |= FILE_FLAG_RANDOM_ACCESS; +#endif + + if( osIsNT() ){ +#if SQLITE_OS_WINRT + CREATEFILE2_EXTENDED_PARAMETERS extendedParameters; + extendedParameters.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS); + extendedParameters.dwFileAttributes = + dwFlagsAndAttributes & FILE_ATTRIBUTE_MASK; + extendedParameters.dwFileFlags = dwFlagsAndAttributes & FILE_FLAG_MASK; + extendedParameters.dwSecurityQosFlags = SECURITY_ANONYMOUS; + extendedParameters.lpSecurityAttributes = NULL; + extendedParameters.hTemplateFile = NULL; + do{ + h = osCreateFile2((LPCWSTR)zConverted, + dwDesiredAccess, + dwShareMode, + dwCreationDisposition, + &extendedParameters); + if( h!=INVALID_HANDLE_VALUE ) break; + if( isReadWrite ){ + int rc2, isRO = 0; + sqlite3BeginBenignMalloc(); + rc2 = winAccess(pVfs, zName, SQLITE_ACCESS_READ, &isRO); + sqlite3EndBenignMalloc(); + if( rc2==SQLITE_OK && isRO ) break; + } + }while( winRetryIoerr(&cnt, &lastErrno) ); +#else + do{ + h = osCreateFileW((LPCWSTR)zConverted, + dwDesiredAccess, + dwShareMode, NULL, + dwCreationDisposition, + dwFlagsAndAttributes, + NULL); + if( h!=INVALID_HANDLE_VALUE ) break; + if( isReadWrite ){ + int rc2, isRO = 0; + sqlite3BeginBenignMalloc(); + rc2 = winAccess(pVfs, zName, SQLITE_ACCESS_READ, &isRO); + sqlite3EndBenignMalloc(); + if( rc2==SQLITE_OK && isRO ) break; + } + }while( winRetryIoerr(&cnt, &lastErrno) ); +#endif + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + do{ + h = osCreateFileA((LPCSTR)zConverted, + dwDesiredAccess, + dwShareMode, NULL, + dwCreationDisposition, + dwFlagsAndAttributes, + NULL); + if( h!=INVALID_HANDLE_VALUE ) break; + if( isReadWrite ){ + int rc2, isRO = 0; + sqlite3BeginBenignMalloc(); + rc2 = winAccess(pVfs, zName, SQLITE_ACCESS_READ, &isRO); + sqlite3EndBenignMalloc(); + if( rc2==SQLITE_OK && isRO ) break; + } + }while( winRetryIoerr(&cnt, &lastErrno) ); + } +#endif + winLogIoerr(cnt, __LINE__); + + OSTRACE(("OPEN file=%p, name=%s, access=%lx, rc=%s\n", h, zUtf8Name, + dwDesiredAccess, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); + + if( h==INVALID_HANDLE_VALUE ){ + sqlite3_free(zConverted); + sqlite3_free(zTmpname); + if( isReadWrite && !isExclusive ){ + return winOpen(pVfs, zName, id, + ((flags|SQLITE_OPEN_READONLY) & + ~(SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE)), + pOutFlags); + }else{ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_CANTOPEN, pFile->lastErrno, "winOpen", zUtf8Name); + return SQLITE_CANTOPEN_BKPT; + } + } + + if( pOutFlags ){ + if( isReadWrite ){ + *pOutFlags = SQLITE_OPEN_READWRITE; + }else{ + *pOutFlags = SQLITE_OPEN_READONLY; + } + } + + OSTRACE(("OPEN file=%p, name=%s, access=%lx, pOutFlags=%p, *pOutFlags=%d, " + "rc=%s\n", h, zUtf8Name, dwDesiredAccess, pOutFlags, pOutFlags ? + *pOutFlags : 0, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); + + pAppData = (winVfsAppData*)pVfs->pAppData; + +#if SQLITE_OS_WINCE + { + if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB + && ((pAppData==NULL) || !pAppData->bNoLock) + && (rc = winceCreateLock(zName, pFile))!=SQLITE_OK + ){ + osCloseHandle(h); + sqlite3_free(zConverted); + sqlite3_free(zTmpname); + OSTRACE(("OPEN-CE-LOCK name=%s, rc=%s\n", zName, sqlite3ErrName(rc))); + return rc; + } + } + if( isTemp ){ + pFile->zDeleteOnClose = zConverted; + }else +#endif + { + sqlite3_free(zConverted); + } + + sqlite3_free(zTmpname); + id->pMethods = pAppData ? pAppData->pMethod : &winIoMethod; + pFile->pVfs = pVfs; + pFile->h = h; + if( isReadonly ){ + pFile->ctrlFlags |= WINFILE_RDONLY; + } + if( (flags & SQLITE_OPEN_MAIN_DB) + && sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) + ){ + pFile->ctrlFlags |= WINFILE_PSOW; + } + pFile->lastErrno = NO_ERROR; + pFile->zPath = zName; +#if SQLITE_MAX_MMAP_SIZE>0 + pFile->hMap = NULL; + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif + + OpenCounter(+1); + return rc; +} + +/* +** Delete the named file. +** +** Note that Windows does not allow a file to be deleted if some other +** process has it open. Sometimes a virus scanner or indexing program +** will open a journal file shortly after it is created in order to do +** whatever it does. While this other process is holding the +** file open, we will be unable to delete it. To work around this +** problem, we delay 100 milliseconds and try to delete again. Up +** to MX_DELETION_ATTEMPTs deletion attempts are run before giving +** up and returning an error. +*/ +static int winDelete( + sqlite3_vfs *pVfs, /* Not used on win32 */ + const char *zFilename, /* Name of file to delete */ + int syncDir /* Not used on win32 */ +){ + int cnt = 0; + int rc; + DWORD attr; + DWORD lastErrno = 0; + void *zConverted; + UNUSED_PARAMETER(pVfs); + UNUSED_PARAMETER(syncDir); + + SimulateIOError(return SQLITE_IOERR_DELETE); + OSTRACE(("DELETE name=%s, syncDir=%d\n", zFilename, syncDir)); + + zConverted = winConvertFromUtf8Filename(zFilename); + if( zConverted==0 ){ + OSTRACE(("DELETE name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); + return SQLITE_IOERR_NOMEM_BKPT; + } + if( osIsNT() ){ + do { +#if SQLITE_OS_WINRT + WIN32_FILE_ATTRIBUTE_DATA sAttrData; + memset(&sAttrData, 0, sizeof(sAttrData)); + if ( osGetFileAttributesExW(zConverted, GetFileExInfoStandard, + &sAttrData) ){ + attr = sAttrData.dwFileAttributes; + }else{ + lastErrno = osGetLastError(); + if( lastErrno==ERROR_FILE_NOT_FOUND + || lastErrno==ERROR_PATH_NOT_FOUND ){ + rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ + }else{ + rc = SQLITE_ERROR; + } + break; + } +#else + attr = osGetFileAttributesW(zConverted); +#endif + if ( attr==INVALID_FILE_ATTRIBUTES ){ + lastErrno = osGetLastError(); + if( lastErrno==ERROR_FILE_NOT_FOUND + || lastErrno==ERROR_PATH_NOT_FOUND ){ + rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ + }else{ + rc = SQLITE_ERROR; + } + break; + } + if ( attr&FILE_ATTRIBUTE_DIRECTORY ){ + rc = SQLITE_ERROR; /* Files only. */ + break; + } + if ( osDeleteFileW(zConverted) ){ + rc = SQLITE_OK; /* Deleted OK. */ + break; + } + if ( !winRetryIoerr(&cnt, &lastErrno) ){ + rc = SQLITE_ERROR; /* No more retries. */ + break; + } + } while(1); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + do { + attr = osGetFileAttributesA(zConverted); + if ( attr==INVALID_FILE_ATTRIBUTES ){ + lastErrno = osGetLastError(); + if( lastErrno==ERROR_FILE_NOT_FOUND + || lastErrno==ERROR_PATH_NOT_FOUND ){ + rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ + }else{ + rc = SQLITE_ERROR; + } + break; + } + if ( attr&FILE_ATTRIBUTE_DIRECTORY ){ + rc = SQLITE_ERROR; /* Files only. */ + break; + } + if ( osDeleteFileA(zConverted) ){ + rc = SQLITE_OK; /* Deleted OK. */ + break; + } + if ( !winRetryIoerr(&cnt, &lastErrno) ){ + rc = SQLITE_ERROR; /* No more retries. */ + break; + } + } while(1); + } +#endif + if( rc && rc!=SQLITE_IOERR_DELETE_NOENT ){ + rc = winLogError(SQLITE_IOERR_DELETE, lastErrno, "winDelete", zFilename); + }else{ + winLogIoerr(cnt, __LINE__); + } + sqlite3_free(zConverted); + OSTRACE(("DELETE name=%s, rc=%s\n", zFilename, sqlite3ErrName(rc))); + return rc; +} + +/* +** Check the existence and status of a file. +*/ +static int winAccess( + sqlite3_vfs *pVfs, /* Not used on win32 */ + const char *zFilename, /* Name of file to check */ + int flags, /* Type of test to make on this file */ + int *pResOut /* OUT: Result */ +){ + DWORD attr; + int rc = 0; + DWORD lastErrno = 0; + void *zConverted; + UNUSED_PARAMETER(pVfs); + + SimulateIOError( return SQLITE_IOERR_ACCESS; ); + OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n", + zFilename, flags, pResOut)); + + zConverted = winConvertFromUtf8Filename(zFilename); + if( zConverted==0 ){ + OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); + return SQLITE_IOERR_NOMEM_BKPT; + } + if( osIsNT() ){ + int cnt = 0; + WIN32_FILE_ATTRIBUTE_DATA sAttrData; + memset(&sAttrData, 0, sizeof(sAttrData)); + while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted, + GetFileExInfoStandard, + &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){} + if( rc ){ + /* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file + ** as if it does not exist. + */ + if( flags==SQLITE_ACCESS_EXISTS + && sAttrData.nFileSizeHigh==0 + && sAttrData.nFileSizeLow==0 ){ + attr = INVALID_FILE_ATTRIBUTES; + }else{ + attr = sAttrData.dwFileAttributes; + } + }else{ + winLogIoerr(cnt, __LINE__); + if( lastErrno!=ERROR_FILE_NOT_FOUND && lastErrno!=ERROR_PATH_NOT_FOUND ){ + sqlite3_free(zConverted); + return winLogError(SQLITE_IOERR_ACCESS, lastErrno, "winAccess", + zFilename); + }else{ + attr = INVALID_FILE_ATTRIBUTES; + } + } + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + attr = osGetFileAttributesA((char*)zConverted); + } +#endif + sqlite3_free(zConverted); + switch( flags ){ + case SQLITE_ACCESS_READ: + case SQLITE_ACCESS_EXISTS: + rc = attr!=INVALID_FILE_ATTRIBUTES; + break; + case SQLITE_ACCESS_READWRITE: + rc = attr!=INVALID_FILE_ATTRIBUTES && + (attr & FILE_ATTRIBUTE_READONLY)==0; + break; + default: + assert(!"Invalid flags argument"); + } + *pResOut = rc; + OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", + zFilename, pResOut, *pResOut)); + return SQLITE_OK; +} + +/* +** Returns non-zero if the specified path name starts with the "long path" +** prefix. +*/ +static BOOL winIsLongPathPrefix( + const char *zPathname +){ + return ( zPathname[0]=='\\' && zPathname[1]=='\\' + && zPathname[2]=='?' && zPathname[3]=='\\' ); +} + +/* +** Returns non-zero if the specified path name starts with a drive letter +** followed by a colon character. +*/ +static BOOL winIsDriveLetterAndColon( + const char *zPathname +){ + return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' ); +} + +/* +** Returns non-zero if the specified path name should be used verbatim. If +** non-zero is returned from this function, the calling function must simply +** use the provided path name verbatim -OR- resolve it into a full path name +** using the GetFullPathName Win32 API function (if available). +*/ +static BOOL winIsVerbatimPathname( + const char *zPathname +){ + /* + ** If the path name starts with a forward slash or a backslash, it is either + ** a legal UNC name, a volume relative path, or an absolute path name in the + ** "Unix" format on Windows. There is no easy way to differentiate between + ** the final two cases; therefore, we return the safer return value of TRUE + ** so that callers of this function will simply use it verbatim. + */ + if ( winIsDirSep(zPathname[0]) ){ + return TRUE; + } + + /* + ** If the path name starts with a letter and a colon it is either a volume + ** relative path or an absolute path. Callers of this function must not + ** attempt to treat it as a relative path name (i.e. they should simply use + ** it verbatim). + */ + if ( winIsDriveLetterAndColon(zPathname) ){ + return TRUE; + } + + /* + ** If we get to this point, the path name should almost certainly be a purely + ** relative one (i.e. not a UNC name, not absolute, and not volume relative). + */ + return FALSE; +} + +/* +** Turn a relative pathname into a full pathname. Write the full +** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname +** bytes in size. +*/ +static int winFullPathname( + sqlite3_vfs *pVfs, /* Pointer to vfs object */ + const char *zRelative, /* Possibly relative input path */ + int nFull, /* Size of output buffer in bytes */ + char *zFull /* Output buffer */ +){ +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(__CYGWIN__) + DWORD nByte; + void *zConverted; + char *zOut; +#endif + + /* If this path name begins with "/X:" or "\\?\", where "X" is any + ** alphabetic character, discard the initial "/" from the pathname. + */ + if( zRelative[0]=='/' && (winIsDriveLetterAndColon(zRelative+1) + || winIsLongPathPrefix(zRelative+1)) ){ + zRelative++; + } + +#if defined(__CYGWIN__) + SimulateIOError( return SQLITE_ERROR ); + UNUSED_PARAMETER(nFull); + assert( nFull>=pVfs->mxPathname ); + if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){ + /* + ** NOTE: We are dealing with a relative path name and the data + ** directory has been set. Therefore, use it as the basis + ** for converting the relative path name to an absolute + ** one by prepending the data directory and a slash. + */ + char *zOut = sqlite3MallocZero( pVfs->mxPathname+1 ); + if( !zOut ){ + return SQLITE_IOERR_NOMEM_BKPT; + } + if( cygwin_conv_path( + (osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A) | + CCP_RELATIVE, zRelative, zOut, pVfs->mxPathname+1)<0 ){ + sqlite3_free(zOut); + return winLogError(SQLITE_CANTOPEN_CONVPATH, (DWORD)errno, + "winFullPathname1", zRelative); + }else{ + char *zUtf8 = winConvertToUtf8Filename(zOut); + if( !zUtf8 ){ + sqlite3_free(zOut); + return SQLITE_IOERR_NOMEM_BKPT; + } + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s%c%s", + sqlite3_data_directory, winGetDirSep(), zUtf8); + sqlite3_free(zUtf8); + sqlite3_free(zOut); + } + }else{ + char *zOut = sqlite3MallocZero( pVfs->mxPathname+1 ); + if( !zOut ){ + return SQLITE_IOERR_NOMEM_BKPT; + } + if( cygwin_conv_path( + (osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A), + zRelative, zOut, pVfs->mxPathname+1)<0 ){ + sqlite3_free(zOut); + return winLogError(SQLITE_CANTOPEN_CONVPATH, (DWORD)errno, + "winFullPathname2", zRelative); + }else{ + char *zUtf8 = winConvertToUtf8Filename(zOut); + if( !zUtf8 ){ + sqlite3_free(zOut); + return SQLITE_IOERR_NOMEM_BKPT; + } + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zUtf8); + sqlite3_free(zUtf8); + sqlite3_free(zOut); + } + } + return SQLITE_OK; +#endif + +#if (SQLITE_OS_WINCE || SQLITE_OS_WINRT) && !defined(__CYGWIN__) + SimulateIOError( return SQLITE_ERROR ); + /* WinCE has no concept of a relative pathname, or so I am told. */ + /* WinRT has no way to convert a relative path to an absolute one. */ + if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){ + /* + ** NOTE: We are dealing with a relative path name and the data + ** directory has been set. Therefore, use it as the basis + ** for converting the relative path name to an absolute + ** one by prepending the data directory and a backslash. + */ + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s%c%s", + sqlite3_data_directory, winGetDirSep(), zRelative); + }else{ + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zRelative); + } + return SQLITE_OK; +#endif + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(__CYGWIN__) + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. This function could fail if, for example, the + ** current working directory has been unlinked. + */ + SimulateIOError( return SQLITE_ERROR ); + if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){ + /* + ** NOTE: We are dealing with a relative path name and the data + ** directory has been set. Therefore, use it as the basis + ** for converting the relative path name to an absolute + ** one by prepending the data directory and a backslash. + */ + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s%c%s", + sqlite3_data_directory, winGetDirSep(), zRelative); + return SQLITE_OK; + } + zConverted = winConvertFromUtf8Filename(zRelative); + if( zConverted==0 ){ + return SQLITE_IOERR_NOMEM_BKPT; + } + if( osIsNT() ){ + LPWSTR zTemp; + nByte = osGetFullPathNameW((LPCWSTR)zConverted, 0, 0, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname1", zRelative); + } + nByte += 3; + zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) ); + if( zTemp==0 ){ + sqlite3_free(zConverted); + return SQLITE_IOERR_NOMEM_BKPT; + } + nByte = osGetFullPathNameW((LPCWSTR)zConverted, nByte, zTemp, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + sqlite3_free(zTemp); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname2", zRelative); + } + sqlite3_free(zConverted); + zOut = winUnicodeToUtf8(zTemp); + sqlite3_free(zTemp); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + char *zTemp; + nByte = osGetFullPathNameA((char*)zConverted, 0, 0, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname3", zRelative); + } + nByte += 3; + zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) ); + if( zTemp==0 ){ + sqlite3_free(zConverted); + return SQLITE_IOERR_NOMEM_BKPT; + } + nByte = osGetFullPathNameA((char*)zConverted, nByte, zTemp, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + sqlite3_free(zTemp); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname4", zRelative); + } + sqlite3_free(zConverted); + zOut = winMbcsToUtf8(zTemp, osAreFileApisANSI()); + sqlite3_free(zTemp); + } +#endif + if( zOut ){ + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zOut); + sqlite3_free(zOut); + return SQLITE_OK; + }else{ + return SQLITE_IOERR_NOMEM_BKPT; + } +#endif +} + +#ifndef SQLITE_OMIT_LOAD_EXTENSION +/* +** Interfaces for opening a shared library, finding entry points +** within the shared library, and closing the shared library. +*/ +static void *winDlOpen(sqlite3_vfs *pVfs, const char *zFilename){ + HANDLE h; +#if defined(__CYGWIN__) + int nFull = pVfs->mxPathname+1; + char *zFull = sqlite3MallocZero( nFull ); + void *zConverted = 0; + if( zFull==0 ){ + OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)0)); + return 0; + } + if( winFullPathname(pVfs, zFilename, nFull, zFull)!=SQLITE_OK ){ + sqlite3_free(zFull); + OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)0)); + return 0; + } + zConverted = winConvertFromUtf8Filename(zFull); + sqlite3_free(zFull); +#else + void *zConverted = winConvertFromUtf8Filename(zFilename); + UNUSED_PARAMETER(pVfs); +#endif + if( zConverted==0 ){ + OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)0)); + return 0; + } + if( osIsNT() ){ +#if SQLITE_OS_WINRT + h = osLoadPackagedLibrary((LPCWSTR)zConverted, 0); +#else + h = osLoadLibraryW((LPCWSTR)zConverted); +#endif + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + h = osLoadLibraryA((char*)zConverted); + } +#endif + OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)h)); + sqlite3_free(zConverted); + return (void*)h; +} +static void winDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){ + UNUSED_PARAMETER(pVfs); + winGetLastErrorMsg(osGetLastError(), nBuf, zBufOut); +} +static void (*winDlSym(sqlite3_vfs *pVfs,void *pH,const char *zSym))(void){ + FARPROC proc; + UNUSED_PARAMETER(pVfs); + proc = osGetProcAddressA((HANDLE)pH, zSym); + OSTRACE(("DLSYM handle=%p, symbol=%s, address=%p\n", + (void*)pH, zSym, (void*)proc)); + return (void(*)(void))proc; +} +static void winDlClose(sqlite3_vfs *pVfs, void *pHandle){ + UNUSED_PARAMETER(pVfs); + osFreeLibrary((HANDLE)pHandle); + OSTRACE(("DLCLOSE handle=%p\n", (void*)pHandle)); +} +#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ + #define winDlOpen 0 + #define winDlError 0 + #define winDlSym 0 + #define winDlClose 0 +#endif + +/* State information for the randomness gatherer. */ +typedef struct EntropyGatherer EntropyGatherer; +struct EntropyGatherer { + unsigned char *a; /* Gather entropy into this buffer */ + int na; /* Size of a[] in bytes */ + int i; /* XOR next input into a[i] */ + int nXor; /* Number of XOR operations done */ +}; + +#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) +/* Mix sz bytes of entropy into p. */ +static void xorMemory(EntropyGatherer *p, unsigned char *x, int sz){ + int j, k; + for(j=0, k=p->i; ja[k++] ^= x[j]; + if( k>=p->na ) k = 0; + } + p->i = k; + p->nXor += sz; +} +#endif /* !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) */ + +/* +** Write up to nBuf bytes of randomness into zBuf. +*/ +static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ +#if defined(SQLITE_TEST) || defined(SQLITE_OMIT_RANDOMNESS) + UNUSED_PARAMETER(pVfs); + memset(zBuf, 0, nBuf); + return nBuf; +#else + EntropyGatherer e; + UNUSED_PARAMETER(pVfs); + memset(zBuf, 0, nBuf); + e.a = (unsigned char*)zBuf; + e.na = nBuf; + e.nXor = 0; + e.i = 0; + { + SYSTEMTIME x; + osGetSystemTime(&x); + xorMemory(&e, (unsigned char*)&x, sizeof(SYSTEMTIME)); + } + { + DWORD pid = osGetCurrentProcessId(); + xorMemory(&e, (unsigned char*)&pid, sizeof(DWORD)); + } +#if SQLITE_OS_WINRT + { + ULONGLONG cnt = osGetTickCount64(); + xorMemory(&e, (unsigned char*)&cnt, sizeof(ULONGLONG)); + } +#else + { + DWORD cnt = osGetTickCount(); + xorMemory(&e, (unsigned char*)&cnt, sizeof(DWORD)); + } +#endif /* SQLITE_OS_WINRT */ + { + LARGE_INTEGER i; + osQueryPerformanceCounter(&i); + xorMemory(&e, (unsigned char*)&i, sizeof(LARGE_INTEGER)); + } +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID + { + UUID id; + memset(&id, 0, sizeof(UUID)); + osUuidCreate(&id); + xorMemory(&e, (unsigned char*)&id, sizeof(UUID)); + memset(&id, 0, sizeof(UUID)); + osUuidCreateSequential(&id); + xorMemory(&e, (unsigned char*)&id, sizeof(UUID)); + } +#endif /* !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID */ + return e.nXor>nBuf ? nBuf : e.nXor; +#endif /* defined(SQLITE_TEST) || defined(SQLITE_OMIT_RANDOMNESS) */ +} + + +/* +** Sleep for a little while. Return the amount of time slept. +*/ +static int winSleep(sqlite3_vfs *pVfs, int microsec){ + sqlite3_win32_sleep((microsec+999)/1000); + UNUSED_PARAMETER(pVfs); + return ((microsec+999)/1000)*1000; +} + +/* +** The following variable, if set to a non-zero value, is interpreted as +** the number of seconds since 1970 and is used to set the result of +** sqlite3OsCurrentTime() during testing. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ +#endif + +/* +** Find the current time (in Universal Coordinated Time). Write into *piNow +** the current time and date as a Julian Day number times 86_400_000. In +** other words, write into *piNow the number of milliseconds since the Julian +** epoch of noon in Greenwich on November 24, 4714 B.C according to the +** proleptic Gregorian calendar. +** +** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date +** cannot be found. +*/ +static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){ + /* FILETIME structure is a 64-bit value representing the number of + 100-nanosecond intervals since January 1, 1601 (= JD 2305813.5). + */ + FILETIME ft; + static const sqlite3_int64 winFiletimeEpoch = 23058135*(sqlite3_int64)8640000; +#ifdef SQLITE_TEST + static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; +#endif + /* 2^32 - to avoid use of LL and warnings in gcc */ + static const sqlite3_int64 max32BitValue = + (sqlite3_int64)2000000000 + (sqlite3_int64)2000000000 + + (sqlite3_int64)294967296; + +#if SQLITE_OS_WINCE + SYSTEMTIME time; + osGetSystemTime(&time); + /* if SystemTimeToFileTime() fails, it returns zero. */ + if (!osSystemTimeToFileTime(&time,&ft)){ + return SQLITE_ERROR; + } +#else + osGetSystemTimeAsFileTime( &ft ); +#endif + + *piNow = winFiletimeEpoch + + ((((sqlite3_int64)ft.dwHighDateTime)*max32BitValue) + + (sqlite3_int64)ft.dwLowDateTime)/(sqlite3_int64)10000; + +#ifdef SQLITE_TEST + if( sqlite3_current_time ){ + *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; + } +#endif + UNUSED_PARAMETER(pVfs); + return SQLITE_OK; +} + +/* +** Find the current time (in Universal Coordinated Time). Write the +** current time and date as a Julian Day number into *prNow and +** return 0. Return 1 if the time and date cannot be found. +*/ +static int winCurrentTime(sqlite3_vfs *pVfs, double *prNow){ + int rc; + sqlite3_int64 i; + rc = winCurrentTimeInt64(pVfs, &i); + if( !rc ){ + *prNow = i/86400000.0; + } + return rc; +} + +/* +** The idea is that this function works like a combination of +** GetLastError() and FormatMessage() on Windows (or errno and +** strerror_r() on Unix). After an error is returned by an OS +** function, SQLite calls this function with zBuf pointing to +** a buffer of nBuf bytes. The OS layer should populate the +** buffer with a nul-terminated UTF-8 encoded error message +** describing the last IO error to have occurred within the calling +** thread. +** +** If the error message is too large for the supplied buffer, +** it should be truncated. The return value of xGetLastError +** is zero if the error message fits in the buffer, or non-zero +** otherwise (if the message was truncated). If non-zero is returned, +** then it is not necessary to include the nul-terminator character +** in the output buffer. +** +** Not supplying an error message will have no adverse effect +** on SQLite. It is fine to have an implementation that never +** returns an error message: +** +** int xGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ +** assert(zBuf[0]=='\0'); +** return 0; +** } +** +** However if an error message is supplied, it will be incorporated +** by sqlite into the error message available to the user using +** sqlite3_errmsg(), possibly making IO errors easier to debug. +*/ +static int winGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ + DWORD e = osGetLastError(); + UNUSED_PARAMETER(pVfs); + if( nBuf>0 ) winGetLastErrorMsg(e, nBuf, zBuf); + return e; +} + +/* +** Initialize and deinitialize the operating system interface. +*/ +SQLITE_API int sqlite3_os_init(void){ + static sqlite3_vfs winVfs = { + 3, /* iVersion */ + sizeof(winFile), /* szOsFile */ + SQLITE_WIN32_MAX_PATH_BYTES, /* mxPathname */ + 0, /* pNext */ + "win32", /* zName */ + &winAppData, /* pAppData */ + winOpen, /* xOpen */ + winDelete, /* xDelete */ + winAccess, /* xAccess */ + winFullPathname, /* xFullPathname */ + winDlOpen, /* xDlOpen */ + winDlError, /* xDlError */ + winDlSym, /* xDlSym */ + winDlClose, /* xDlClose */ + winRandomness, /* xRandomness */ + winSleep, /* xSleep */ + winCurrentTime, /* xCurrentTime */ + winGetLastError, /* xGetLastError */ + winCurrentTimeInt64, /* xCurrentTimeInt64 */ + winSetSystemCall, /* xSetSystemCall */ + winGetSystemCall, /* xGetSystemCall */ + winNextSystemCall, /* xNextSystemCall */ + }; +#if defined(SQLITE_WIN32_HAS_WIDE) + static sqlite3_vfs winLongPathVfs = { + 3, /* iVersion */ + sizeof(winFile), /* szOsFile */ + SQLITE_WINNT_MAX_PATH_BYTES, /* mxPathname */ + 0, /* pNext */ + "win32-longpath", /* zName */ + &winAppData, /* pAppData */ + winOpen, /* xOpen */ + winDelete, /* xDelete */ + winAccess, /* xAccess */ + winFullPathname, /* xFullPathname */ + winDlOpen, /* xDlOpen */ + winDlError, /* xDlError */ + winDlSym, /* xDlSym */ + winDlClose, /* xDlClose */ + winRandomness, /* xRandomness */ + winSleep, /* xSleep */ + winCurrentTime, /* xCurrentTime */ + winGetLastError, /* xGetLastError */ + winCurrentTimeInt64, /* xCurrentTimeInt64 */ + winSetSystemCall, /* xSetSystemCall */ + winGetSystemCall, /* xGetSystemCall */ + winNextSystemCall, /* xNextSystemCall */ + }; +#endif + static sqlite3_vfs winNolockVfs = { + 3, /* iVersion */ + sizeof(winFile), /* szOsFile */ + SQLITE_WIN32_MAX_PATH_BYTES, /* mxPathname */ + 0, /* pNext */ + "win32-none", /* zName */ + &winNolockAppData, /* pAppData */ + winOpen, /* xOpen */ + winDelete, /* xDelete */ + winAccess, /* xAccess */ + winFullPathname, /* xFullPathname */ + winDlOpen, /* xDlOpen */ + winDlError, /* xDlError */ + winDlSym, /* xDlSym */ + winDlClose, /* xDlClose */ + winRandomness, /* xRandomness */ + winSleep, /* xSleep */ + winCurrentTime, /* xCurrentTime */ + winGetLastError, /* xGetLastError */ + winCurrentTimeInt64, /* xCurrentTimeInt64 */ + winSetSystemCall, /* xSetSystemCall */ + winGetSystemCall, /* xGetSystemCall */ + winNextSystemCall, /* xNextSystemCall */ + }; +#if defined(SQLITE_WIN32_HAS_WIDE) + static sqlite3_vfs winLongPathNolockVfs = { + 3, /* iVersion */ + sizeof(winFile), /* szOsFile */ + SQLITE_WINNT_MAX_PATH_BYTES, /* mxPathname */ + 0, /* pNext */ + "win32-longpath-none", /* zName */ + &winNolockAppData, /* pAppData */ + winOpen, /* xOpen */ + winDelete, /* xDelete */ + winAccess, /* xAccess */ + winFullPathname, /* xFullPathname */ + winDlOpen, /* xDlOpen */ + winDlError, /* xDlError */ + winDlSym, /* xDlSym */ + winDlClose, /* xDlClose */ + winRandomness, /* xRandomness */ + winSleep, /* xSleep */ + winCurrentTime, /* xCurrentTime */ + winGetLastError, /* xGetLastError */ + winCurrentTimeInt64, /* xCurrentTimeInt64 */ + winSetSystemCall, /* xSetSystemCall */ + winGetSystemCall, /* xGetSystemCall */ + winNextSystemCall, /* xNextSystemCall */ + }; +#endif + + /* Double-check that the aSyscall[] array has been constructed + ** correctly. See ticket [bb3a86e890c8e96ab] */ + assert( ArraySize(aSyscall)==80 ); + + /* get memory map allocation granularity */ + memset(&winSysInfo, 0, sizeof(SYSTEM_INFO)); +#if SQLITE_OS_WINRT + osGetNativeSystemInfo(&winSysInfo); +#else + osGetSystemInfo(&winSysInfo); +#endif + assert( winSysInfo.dwAllocationGranularity>0 ); + assert( winSysInfo.dwPageSize>0 ); + + sqlite3_vfs_register(&winVfs, 1); + +#if defined(SQLITE_WIN32_HAS_WIDE) + sqlite3_vfs_register(&winLongPathVfs, 0); +#endif + + sqlite3_vfs_register(&winNolockVfs, 0); + +#if defined(SQLITE_WIN32_HAS_WIDE) + sqlite3_vfs_register(&winLongPathNolockVfs, 0); +#endif + +#ifndef SQLITE_OMIT_WAL + winBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); +#endif + + return SQLITE_OK; +} + +SQLITE_API int sqlite3_os_end(void){ +#if SQLITE_OS_WINRT + if( sleepObj!=NULL ){ + osCloseHandle(sleepObj); + sleepObj = NULL; + } +#endif + +#ifndef SQLITE_OMIT_WAL + winBigLock = 0; +#endif + + return SQLITE_OK; +} + +#endif /* SQLITE_OS_WIN */ + +/************** End of os_win.c **********************************************/ +/************** Begin file memdb.c *******************************************/ +/* +** 2016-09-07 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file implements an in-memory VFS. A database is held as a contiguous +** block of memory. +** +** This file also implements interface sqlite3_serialize() and +** sqlite3_deserialize(). +*/ +/* #include "sqliteInt.h" */ +#ifndef SQLITE_OMIT_DESERIALIZE + +/* +** Forward declaration of objects used by this utility +*/ +typedef struct sqlite3_vfs MemVfs; +typedef struct MemFile MemFile; +typedef struct MemStore MemStore; + +/* Access to a lower-level VFS that (might) implement dynamic loading, +** access to randomness, etc. +*/ +#define ORIGVFS(p) ((sqlite3_vfs*)((p)->pAppData)) + +/* Storage for a memdb file. +** +** An memdb object can be shared or separate. Shared memdb objects can be +** used by more than one database connection. Mutexes are used by shared +** memdb objects to coordinate access. Separate memdb objects are only +** connected to a single database connection and do not require additional +** mutexes. +** +** Shared memdb objects have .zFName!=0 and .pMutex!=0. They are created +** using "file:/name?vfs=memdb". The first character of the name must be +** "/" or else the object will be a separate memdb object. All shared +** memdb objects are stored in memdb_g.apMemStore[] in an arbitrary order. +** +** Separate memdb objects are created using a name that does not begin +** with "/" or using sqlite3_deserialize(). +** +** Access rules for shared MemStore objects: +** +** * .zFName is initialized when the object is created and afterwards +** is unchanged until the object is destroyed. So it can be accessed +** at any time as long as we know the object is not being destroyed, +** which means while either the SQLITE_MUTEX_STATIC_VFS1 or +** .pMutex is held or the object is not part of memdb_g.apMemStore[]. +** +** * Can .pMutex can only be changed while holding the +** SQLITE_MUTEX_STATIC_VFS1 mutex or while the object is not part +** of memdb_g.apMemStore[]. +** +** * Other fields can only be changed while holding the .pMutex mutex +** or when the .nRef is less than zero and the object is not part of +** memdb_g.apMemStore[]. +** +** * The .aData pointer has the added requirement that it can can only +** be changed (for resizing) when nMmap is zero. +** +*/ +struct MemStore { + sqlite3_int64 sz; /* Size of the file */ + sqlite3_int64 szAlloc; /* Space allocated to aData */ + sqlite3_int64 szMax; /* Maximum allowed size of the file */ + unsigned char *aData; /* content of the file */ + sqlite3_mutex *pMutex; /* Used by shared stores only */ + int nMmap; /* Number of memory mapped pages */ + unsigned mFlags; /* Flags */ + int nRdLock; /* Number of readers */ + int nWrLock; /* Number of writers. (Always 0 or 1) */ + int nRef; /* Number of users of this MemStore */ + char *zFName; /* The filename for shared stores */ +}; + +/* An open file */ +struct MemFile { + sqlite3_file base; /* IO methods */ + MemStore *pStore; /* The storage */ + int eLock; /* Most recent lock against this file */ +}; + +/* +** File-scope variables for holding the memdb files that are accessible +** to multiple database connections in separate threads. +** +** Must hold SQLITE_MUTEX_STATIC_VFS1 to access any part of this object. +*/ +static struct MemFS { + int nMemStore; /* Number of shared MemStore objects */ + MemStore **apMemStore; /* Array of all shared MemStore objects */ +} memdb_g; + +/* +** Methods for MemFile +*/ +static int memdbClose(sqlite3_file*); +static int memdbRead(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); +static int memdbWrite(sqlite3_file*,const void*,int iAmt, sqlite3_int64 iOfst); +static int memdbTruncate(sqlite3_file*, sqlite3_int64 size); +static int memdbSync(sqlite3_file*, int flags); +static int memdbFileSize(sqlite3_file*, sqlite3_int64 *pSize); +static int memdbLock(sqlite3_file*, int); +/* static int memdbCheckReservedLock(sqlite3_file*, int *pResOut);// not used */ +static int memdbFileControl(sqlite3_file*, int op, void *pArg); +/* static int memdbSectorSize(sqlite3_file*); // not used */ +static int memdbDeviceCharacteristics(sqlite3_file*); +static int memdbFetch(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); +static int memdbUnfetch(sqlite3_file*, sqlite3_int64 iOfst, void *p); + +/* +** Methods for MemVfs +*/ +static int memdbOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *); +/* static int memdbDelete(sqlite3_vfs*, const char *zName, int syncDir); */ +static int memdbAccess(sqlite3_vfs*, const char *zName, int flags, int *); +static int memdbFullPathname(sqlite3_vfs*, const char *zName, int, char *zOut); +static void *memdbDlOpen(sqlite3_vfs*, const char *zFilename); +static void memdbDlError(sqlite3_vfs*, int nByte, char *zErrMsg); +static void (*memdbDlSym(sqlite3_vfs *pVfs, void *p, const char*zSym))(void); +static void memdbDlClose(sqlite3_vfs*, void*); +static int memdbRandomness(sqlite3_vfs*, int nByte, char *zOut); +static int memdbSleep(sqlite3_vfs*, int microseconds); +/* static int memdbCurrentTime(sqlite3_vfs*, double*); */ +static int memdbGetLastError(sqlite3_vfs*, int, char *); +static int memdbCurrentTimeInt64(sqlite3_vfs*, sqlite3_int64*); + +static sqlite3_vfs memdb_vfs = { + 2, /* iVersion */ + 0, /* szOsFile (set when registered) */ + 1024, /* mxPathname */ + 0, /* pNext */ + "memdb", /* zName */ + 0, /* pAppData (set when registered) */ + memdbOpen, /* xOpen */ + 0, /* memdbDelete, */ /* xDelete */ + memdbAccess, /* xAccess */ + memdbFullPathname, /* xFullPathname */ + memdbDlOpen, /* xDlOpen */ + memdbDlError, /* xDlError */ + memdbDlSym, /* xDlSym */ + memdbDlClose, /* xDlClose */ + memdbRandomness, /* xRandomness */ + memdbSleep, /* xSleep */ + 0, /* memdbCurrentTime, */ /* xCurrentTime */ + memdbGetLastError, /* xGetLastError */ + memdbCurrentTimeInt64, /* xCurrentTimeInt64 */ + 0, /* xSetSystemCall */ + 0, /* xGetSystemCall */ + 0, /* xNextSystemCall */ +}; + +static const sqlite3_io_methods memdb_io_methods = { + 3, /* iVersion */ + memdbClose, /* xClose */ + memdbRead, /* xRead */ + memdbWrite, /* xWrite */ + memdbTruncate, /* xTruncate */ + memdbSync, /* xSync */ + memdbFileSize, /* xFileSize */ + memdbLock, /* xLock */ + memdbLock, /* xUnlock - same as xLock in this case */ + 0, /* memdbCheckReservedLock, */ /* xCheckReservedLock */ + memdbFileControl, /* xFileControl */ + 0, /* memdbSectorSize,*/ /* xSectorSize */ + memdbDeviceCharacteristics, /* xDeviceCharacteristics */ + 0, /* xShmMap */ + 0, /* xShmLock */ + 0, /* xShmBarrier */ + 0, /* xShmUnmap */ + memdbFetch, /* xFetch */ + memdbUnfetch /* xUnfetch */ +}; + +/* +** Enter/leave the mutex on a MemStore +*/ +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE==0 +static void memdbEnter(MemStore *p){ + UNUSED_PARAMETER(p); +} +static void memdbLeave(MemStore *p){ + UNUSED_PARAMETER(p); +} +#else +static void memdbEnter(MemStore *p){ + sqlite3_mutex_enter(p->pMutex); +} +static void memdbLeave(MemStore *p){ + sqlite3_mutex_leave(p->pMutex); +} +#endif + + + +/* +** Close an memdb-file. +** Free the underlying MemStore object when its refcount drops to zero +** or less. +*/ +static int memdbClose(sqlite3_file *pFile){ + MemStore *p = ((MemFile*)pFile)->pStore; + if( p->zFName ){ + int i; +#ifndef SQLITE_MUTEX_OMIT + sqlite3_mutex *pVfsMutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); +#endif + sqlite3_mutex_enter(pVfsMutex); + for(i=0; ALWAYS(inRef==1 ){ + memdb_g.apMemStore[i] = memdb_g.apMemStore[--memdb_g.nMemStore]; + if( memdb_g.nMemStore==0 ){ + sqlite3_free(memdb_g.apMemStore); + memdb_g.apMemStore = 0; + } + } + break; + } + } + sqlite3_mutex_leave(pVfsMutex); + }else{ + memdbEnter(p); + } + p->nRef--; + if( p->nRef<=0 ){ + if( p->mFlags & SQLITE_DESERIALIZE_FREEONCLOSE ){ + sqlite3_free(p->aData); + } + memdbLeave(p); + sqlite3_mutex_free(p->pMutex); + sqlite3_free(p); + }else{ + memdbLeave(p); + } + return SQLITE_OK; +} + +/* +** Read data from an memdb-file. +*/ +static int memdbRead( + sqlite3_file *pFile, + void *zBuf, + int iAmt, + sqlite_int64 iOfst +){ + MemStore *p = ((MemFile*)pFile)->pStore; + memdbEnter(p); + if( iOfst+iAmt>p->sz ){ + memset(zBuf, 0, iAmt); + if( iOfstsz ) memcpy(zBuf, p->aData+iOfst, p->sz - iOfst); + memdbLeave(p); + return SQLITE_IOERR_SHORT_READ; + } + memcpy(zBuf, p->aData+iOfst, iAmt); + memdbLeave(p); + return SQLITE_OK; +} + +/* +** Try to enlarge the memory allocation to hold at least sz bytes +*/ +static int memdbEnlarge(MemStore *p, sqlite3_int64 newSz){ + unsigned char *pNew; + if( (p->mFlags & SQLITE_DESERIALIZE_RESIZEABLE)==0 || NEVER(p->nMmap>0) ){ + return SQLITE_FULL; + } + if( newSz>p->szMax ){ + return SQLITE_FULL; + } + newSz *= 2; + if( newSz>p->szMax ) newSz = p->szMax; + pNew = sqlite3Realloc(p->aData, newSz); + if( pNew==0 ) return SQLITE_IOERR_NOMEM; + p->aData = pNew; + p->szAlloc = newSz; + return SQLITE_OK; +} + +/* +** Write data to an memdb-file. +*/ +static int memdbWrite( + sqlite3_file *pFile, + const void *z, + int iAmt, + sqlite_int64 iOfst +){ + MemStore *p = ((MemFile*)pFile)->pStore; + memdbEnter(p); + if( NEVER(p->mFlags & SQLITE_DESERIALIZE_READONLY) ){ + /* Can't happen: memdbLock() will return SQLITE_READONLY before + ** reaching this point */ + memdbLeave(p); + return SQLITE_IOERR_WRITE; + } + if( iOfst+iAmt>p->sz ){ + int rc; + if( iOfst+iAmt>p->szAlloc + && (rc = memdbEnlarge(p, iOfst+iAmt))!=SQLITE_OK + ){ + memdbLeave(p); + return rc; + } + if( iOfst>p->sz ) memset(p->aData+p->sz, 0, iOfst-p->sz); + p->sz = iOfst+iAmt; + } + memcpy(p->aData+iOfst, z, iAmt); + memdbLeave(p); + return SQLITE_OK; +} + +/* +** Truncate an memdb-file. +** +** In rollback mode (which is always the case for memdb, as it does not +** support WAL mode) the truncate() method is only used to reduce +** the size of a file, never to increase the size. +*/ +static int memdbTruncate(sqlite3_file *pFile, sqlite_int64 size){ + MemStore *p = ((MemFile*)pFile)->pStore; + int rc = SQLITE_OK; + memdbEnter(p); + if( size>p->sz ){ + /* This can only happen with a corrupt wal mode db */ + rc = SQLITE_CORRUPT; + }else{ + p->sz = size; + } + memdbLeave(p); + return rc; +} + +/* +** Sync an memdb-file. +*/ +static int memdbSync(sqlite3_file *pFile, int flags){ + UNUSED_PARAMETER(pFile); + UNUSED_PARAMETER(flags); + return SQLITE_OK; +} + +/* +** Return the current file-size of an memdb-file. +*/ +static int memdbFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){ + MemStore *p = ((MemFile*)pFile)->pStore; + memdbEnter(p); + *pSize = p->sz; + memdbLeave(p); + return SQLITE_OK; +} + +/* +** Lock an memdb-file. +*/ +static int memdbLock(sqlite3_file *pFile, int eLock){ + MemFile *pThis = (MemFile*)pFile; + MemStore *p = pThis->pStore; + int rc = SQLITE_OK; + if( eLock==pThis->eLock ) return SQLITE_OK; + memdbEnter(p); + if( eLock>SQLITE_LOCK_SHARED ){ + if( p->mFlags & SQLITE_DESERIALIZE_READONLY ){ + rc = SQLITE_READONLY; + }else if( pThis->eLock<=SQLITE_LOCK_SHARED ){ + if( p->nWrLock ){ + rc = SQLITE_BUSY; + }else{ + p->nWrLock = 1; + } + } + }else if( eLock==SQLITE_LOCK_SHARED ){ + if( pThis->eLock > SQLITE_LOCK_SHARED ){ + assert( p->nWrLock==1 ); + p->nWrLock = 0; + }else if( p->nWrLock ){ + rc = SQLITE_BUSY; + }else{ + p->nRdLock++; + } + }else{ + assert( eLock==SQLITE_LOCK_NONE ); + if( pThis->eLock>SQLITE_LOCK_SHARED ){ + assert( p->nWrLock==1 ); + p->nWrLock = 0; + } + assert( p->nRdLock>0 ); + p->nRdLock--; + } + if( rc==SQLITE_OK ) pThis->eLock = eLock; + memdbLeave(p); + return rc; +} + +#if 0 +/* +** This interface is only used for crash recovery, which does not +** occur on an in-memory database. +*/ +static int memdbCheckReservedLock(sqlite3_file *pFile, int *pResOut){ + *pResOut = 0; + return SQLITE_OK; +} +#endif + + +/* +** File control method. For custom operations on an memdb-file. +*/ +static int memdbFileControl(sqlite3_file *pFile, int op, void *pArg){ + MemStore *p = ((MemFile*)pFile)->pStore; + int rc = SQLITE_NOTFOUND; + memdbEnter(p); + if( op==SQLITE_FCNTL_VFSNAME ){ + *(char**)pArg = sqlite3_mprintf("memdb(%p,%lld)", p->aData, p->sz); + rc = SQLITE_OK; + } + if( op==SQLITE_FCNTL_SIZE_LIMIT ){ + sqlite3_int64 iLimit = *(sqlite3_int64*)pArg; + if( iLimitsz ){ + if( iLimit<0 ){ + iLimit = p->szMax; + }else{ + iLimit = p->sz; + } + } + p->szMax = iLimit; + *(sqlite3_int64*)pArg = iLimit; + rc = SQLITE_OK; + } + memdbLeave(p); + return rc; +} + +#if 0 /* Not used because of SQLITE_IOCAP_POWERSAFE_OVERWRITE */ +/* +** Return the sector-size in bytes for an memdb-file. +*/ +static int memdbSectorSize(sqlite3_file *pFile){ + return 1024; +} +#endif + +/* +** Return the device characteristic flags supported by an memdb-file. +*/ +static int memdbDeviceCharacteristics(sqlite3_file *pFile){ + UNUSED_PARAMETER(pFile); + return SQLITE_IOCAP_ATOMIC | + SQLITE_IOCAP_POWERSAFE_OVERWRITE | + SQLITE_IOCAP_SAFE_APPEND | + SQLITE_IOCAP_SEQUENTIAL; +} + +/* Fetch a page of a memory-mapped file */ +static int memdbFetch( + sqlite3_file *pFile, + sqlite3_int64 iOfst, + int iAmt, + void **pp +){ + MemStore *p = ((MemFile*)pFile)->pStore; + memdbEnter(p); + if( iOfst+iAmt>p->sz || (p->mFlags & SQLITE_DESERIALIZE_RESIZEABLE)!=0 ){ + *pp = 0; + }else{ + p->nMmap++; + *pp = (void*)(p->aData + iOfst); + } + memdbLeave(p); + return SQLITE_OK; +} + +/* Release a memory-mapped page */ +static int memdbUnfetch(sqlite3_file *pFile, sqlite3_int64 iOfst, void *pPage){ + MemStore *p = ((MemFile*)pFile)->pStore; + UNUSED_PARAMETER(iOfst); + UNUSED_PARAMETER(pPage); + memdbEnter(p); + p->nMmap--; + memdbLeave(p); + return SQLITE_OK; +} + +/* +** Open an mem file handle. +*/ +static int memdbOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_file *pFd, + int flags, + int *pOutFlags +){ + MemFile *pFile = (MemFile*)pFd; + MemStore *p = 0; + int szName; + UNUSED_PARAMETER(pVfs); + + memset(pFile, 0, sizeof(*pFile)); + szName = sqlite3Strlen30(zName); + if( szName>1 && zName[0]=='/' ){ + int i; +#ifndef SQLITE_MUTEX_OMIT + sqlite3_mutex *pVfsMutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); +#endif + sqlite3_mutex_enter(pVfsMutex); + for(i=0; izFName,zName)==0 ){ + p = memdb_g.apMemStore[i]; + break; + } + } + if( p==0 ){ + MemStore **apNew; + p = sqlite3Malloc( sizeof(*p) + szName + 3 ); + if( p==0 ){ + sqlite3_mutex_leave(pVfsMutex); + return SQLITE_NOMEM; + } + apNew = sqlite3Realloc(memdb_g.apMemStore, + sizeof(apNew[0])*(memdb_g.nMemStore+1) ); + if( apNew==0 ){ + sqlite3_free(p); + sqlite3_mutex_leave(pVfsMutex); + return SQLITE_NOMEM; + } + apNew[memdb_g.nMemStore++] = p; + memdb_g.apMemStore = apNew; + memset(p, 0, sizeof(*p)); + p->mFlags = SQLITE_DESERIALIZE_RESIZEABLE|SQLITE_DESERIALIZE_FREEONCLOSE; + p->szMax = sqlite3GlobalConfig.mxMemdbSize; + p->zFName = (char*)&p[1]; + memcpy(p->zFName, zName, szName+1); + p->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( p->pMutex==0 ){ + memdb_g.nMemStore--; + sqlite3_free(p); + sqlite3_mutex_leave(pVfsMutex); + return SQLITE_NOMEM; + } + p->nRef = 1; + memdbEnter(p); + }else{ + memdbEnter(p); + p->nRef++; + } + sqlite3_mutex_leave(pVfsMutex); + }else{ + p = sqlite3Malloc( sizeof(*p) ); + if( p==0 ){ + return SQLITE_NOMEM; + } + memset(p, 0, sizeof(*p)); + p->mFlags = SQLITE_DESERIALIZE_RESIZEABLE | SQLITE_DESERIALIZE_FREEONCLOSE; + p->szMax = sqlite3GlobalConfig.mxMemdbSize; + } + pFile->pStore = p; + if( pOutFlags!=0 ){ + *pOutFlags = flags | SQLITE_OPEN_MEMORY; + } + pFd->pMethods = &memdb_io_methods; + memdbLeave(p); + return SQLITE_OK; +} + +#if 0 /* Only used to delete rollback journals, super-journals, and WAL + ** files, none of which exist in memdb. So this routine is never used */ +/* +** Delete the file located at zPath. If the dirSync argument is true, +** ensure the file-system modifications are synced to disk before +** returning. +*/ +static int memdbDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ + return SQLITE_IOERR_DELETE; +} +#endif + +/* +** Test for access permissions. Return true if the requested permission +** is available, or false otherwise. +** +** With memdb, no files ever exist on disk. So always return false. +*/ +static int memdbAccess( + sqlite3_vfs *pVfs, + const char *zPath, + int flags, + int *pResOut +){ + UNUSED_PARAMETER(pVfs); + UNUSED_PARAMETER(zPath); + UNUSED_PARAMETER(flags); + *pResOut = 0; + return SQLITE_OK; +} + +/* +** Populate buffer zOut with the full canonical pathname corresponding +** to the pathname in zPath. zOut is guaranteed to point to a buffer +** of at least (INST_MAX_PATHNAME+1) bytes. +*/ +static int memdbFullPathname( + sqlite3_vfs *pVfs, + const char *zPath, + int nOut, + char *zOut +){ + UNUSED_PARAMETER(pVfs); + sqlite3_snprintf(nOut, zOut, "%s", zPath); + return SQLITE_OK; +} + +/* +** Open the dynamic library located at zPath and return a handle. +*/ +static void *memdbDlOpen(sqlite3_vfs *pVfs, const char *zPath){ + return ORIGVFS(pVfs)->xDlOpen(ORIGVFS(pVfs), zPath); +} + +/* +** Populate the buffer zErrMsg (size nByte bytes) with a human readable +** utf-8 string describing the most recent error encountered associated +** with dynamic libraries. +*/ +static void memdbDlError(sqlite3_vfs *pVfs, int nByte, char *zErrMsg){ + ORIGVFS(pVfs)->xDlError(ORIGVFS(pVfs), nByte, zErrMsg); +} + +/* +** Return a pointer to the symbol zSymbol in the dynamic library pHandle. +*/ +static void (*memdbDlSym(sqlite3_vfs *pVfs, void *p, const char *zSym))(void){ + return ORIGVFS(pVfs)->xDlSym(ORIGVFS(pVfs), p, zSym); +} + +/* +** Close the dynamic library handle pHandle. +*/ +static void memdbDlClose(sqlite3_vfs *pVfs, void *pHandle){ + ORIGVFS(pVfs)->xDlClose(ORIGVFS(pVfs), pHandle); +} + +/* +** Populate the buffer pointed to by zBufOut with nByte bytes of +** random data. +*/ +static int memdbRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ + return ORIGVFS(pVfs)->xRandomness(ORIGVFS(pVfs), nByte, zBufOut); +} + +/* +** Sleep for nMicro microseconds. Return the number of microseconds +** actually slept. +*/ +static int memdbSleep(sqlite3_vfs *pVfs, int nMicro){ + return ORIGVFS(pVfs)->xSleep(ORIGVFS(pVfs), nMicro); +} + +#if 0 /* Never used. Modern cores only call xCurrentTimeInt64() */ +/* +** Return the current time as a Julian Day number in *pTimeOut. +*/ +static int memdbCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ + return ORIGVFS(pVfs)->xCurrentTime(ORIGVFS(pVfs), pTimeOut); +} +#endif + +static int memdbGetLastError(sqlite3_vfs *pVfs, int a, char *b){ + return ORIGVFS(pVfs)->xGetLastError(ORIGVFS(pVfs), a, b); +} +static int memdbCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *p){ + return ORIGVFS(pVfs)->xCurrentTimeInt64(ORIGVFS(pVfs), p); +} + +/* +** Translate a database connection pointer and schema name into a +** MemFile pointer. +*/ +static MemFile *memdbFromDbSchema(sqlite3 *db, const char *zSchema){ + MemFile *p = 0; + MemStore *pStore; + int rc = sqlite3_file_control(db, zSchema, SQLITE_FCNTL_FILE_POINTER, &p); + if( rc ) return 0; + if( p->base.pMethods!=&memdb_io_methods ) return 0; + pStore = p->pStore; + memdbEnter(pStore); + if( pStore->zFName!=0 ) p = 0; + memdbLeave(pStore); + return p; +} + +/* +** Return the serialization of a database +*/ +SQLITE_API unsigned char *sqlite3_serialize( + sqlite3 *db, /* The database connection */ + const char *zSchema, /* Which database within the connection */ + sqlite3_int64 *piSize, /* Write size here, if not NULL */ + unsigned int mFlags /* Maybe SQLITE_SERIALIZE_NOCOPY */ +){ + MemFile *p; + int iDb; + Btree *pBt; + sqlite3_int64 sz; + int szPage = 0; + sqlite3_stmt *pStmt = 0; + unsigned char *pOut; + char *zSql; + int rc; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + + if( zSchema==0 ) zSchema = db->aDb[0].zDbSName; + p = memdbFromDbSchema(db, zSchema); + iDb = sqlite3FindDbName(db, zSchema); + if( piSize ) *piSize = -1; + if( iDb<0 ) return 0; + if( p ){ + MemStore *pStore = p->pStore; + assert( pStore->pMutex==0 ); + if( piSize ) *piSize = pStore->sz; + if( mFlags & SQLITE_SERIALIZE_NOCOPY ){ + pOut = pStore->aData; + }else{ + pOut = sqlite3_malloc64( pStore->sz ); + if( pOut ) memcpy(pOut, pStore->aData, pStore->sz); + } + return pOut; + } + pBt = db->aDb[iDb].pBt; + if( pBt==0 ) return 0; + szPage = sqlite3BtreeGetPageSize(pBt); + zSql = sqlite3_mprintf("PRAGMA \"%w\".page_count", zSchema); + rc = zSql ? sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0) : SQLITE_NOMEM; + sqlite3_free(zSql); + if( rc ) return 0; + rc = sqlite3_step(pStmt); + if( rc!=SQLITE_ROW ){ + pOut = 0; + }else{ + sz = sqlite3_column_int64(pStmt, 0)*szPage; + if( piSize ) *piSize = sz; + if( mFlags & SQLITE_SERIALIZE_NOCOPY ){ + pOut = 0; + }else{ + pOut = sqlite3_malloc64( sz ); + if( pOut ){ + int nPage = sqlite3_column_int(pStmt, 0); + Pager *pPager = sqlite3BtreePager(pBt); + int pgno; + for(pgno=1; pgno<=nPage; pgno++){ + DbPage *pPage = 0; + unsigned char *pTo = pOut + szPage*(sqlite3_int64)(pgno-1); + rc = sqlite3PagerGet(pPager, pgno, (DbPage**)&pPage, 0); + if( rc==SQLITE_OK ){ + memcpy(pTo, sqlite3PagerGetData(pPage), szPage); + }else{ + memset(pTo, 0, szPage); + } + sqlite3PagerUnref(pPage); + } + } + } + } + sqlite3_finalize(pStmt); + return pOut; +} + +/* Convert zSchema to a MemDB and initialize its content. +*/ +SQLITE_API int sqlite3_deserialize( + sqlite3 *db, /* The database connection */ + const char *zSchema, /* Which DB to reopen with the deserialization */ + unsigned char *pData, /* The serialized database content */ + sqlite3_int64 szDb, /* Number bytes in the deserialization */ + sqlite3_int64 szBuf, /* Total size of buffer pData[] */ + unsigned mFlags /* Zero or more SQLITE_DESERIALIZE_* flags */ +){ + MemFile *p; + char *zSql; + sqlite3_stmt *pStmt = 0; + int rc; + int iDb; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + if( szDb<0 ) return SQLITE_MISUSE_BKPT; + if( szBuf<0 ) return SQLITE_MISUSE_BKPT; +#endif + + sqlite3_mutex_enter(db->mutex); + if( zSchema==0 ) zSchema = db->aDb[0].zDbSName; + iDb = sqlite3FindDbName(db, zSchema); + testcase( iDb==1 ); + if( iDb<2 && iDb!=0 ){ + rc = SQLITE_ERROR; + goto end_deserialize; + } + zSql = sqlite3_mprintf("ATTACH x AS %Q", zSchema); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + } + if( rc ) goto end_deserialize; + db->init.iDb = (u8)iDb; + db->init.reopenMemdb = 1; + rc = sqlite3_step(pStmt); + db->init.reopenMemdb = 0; + if( rc!=SQLITE_DONE ){ + rc = SQLITE_ERROR; + goto end_deserialize; + } + p = memdbFromDbSchema(db, zSchema); + if( p==0 ){ + rc = SQLITE_ERROR; + }else{ + MemStore *pStore = p->pStore; + pStore->aData = pData; + pData = 0; + pStore->sz = szDb; + pStore->szAlloc = szBuf; + pStore->szMax = szBuf; + if( pStore->szMaxszMax = sqlite3GlobalConfig.mxMemdbSize; + } + pStore->mFlags = mFlags; + rc = SQLITE_OK; + } + +end_deserialize: + sqlite3_finalize(pStmt); + if( pData && (mFlags & SQLITE_DESERIALIZE_FREEONCLOSE)!=0 ){ + sqlite3_free(pData); + } + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** This routine is called when the extension is loaded. +** Register the new VFS. +*/ +SQLITE_PRIVATE int sqlite3MemdbInit(void){ + sqlite3_vfs *pLower = sqlite3_vfs_find(0); + unsigned int sz; + if( NEVER(pLower==0) ) return SQLITE_ERROR; + sz = pLower->szOsFile; + memdb_vfs.pAppData = pLower; + /* The following conditional can only be true when compiled for + ** Windows x86 and SQLITE_MAX_MMAP_SIZE=0. We always leave + ** it in, to be safe, but it is marked as NO_TEST since there + ** is no way to reach it under most builds. */ + if( szBITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is +** a hash table that will hold up to BITVEC_MXHASH distinct values. +** +** Otherwise, the value i is redirected into one of BITVEC_NPTR +** sub-bitmaps pointed to by Bitvec.u.apSub[]. Each subbitmap +** handles up to iDivisor separate values of i. apSub[0] holds +** values between 1 and iDivisor. apSub[1] holds values between +** iDivisor+1 and 2*iDivisor. apSub[N] holds values between +** N*iDivisor+1 and (N+1)*iDivisor. Each subbitmap is normalized +** to hold deal with values between 1 and iDivisor. +*/ +struct Bitvec { + u32 iSize; /* Maximum bit index. Max iSize is 4,294,967,296. */ + u32 nSet; /* Number of bits that are set - only valid for aHash + ** element. Max is BITVEC_NINT. For BITVEC_SZ of 512, + ** this would be 125. */ + u32 iDivisor; /* Number of bits handled by each apSub[] entry. */ + /* Should >=0 for apSub element. */ + /* Max iDivisor is max(u32) / BITVEC_NPTR + 1. */ + /* For a BITVEC_SZ of 512, this would be 34,359,739. */ + union { + BITVEC_TELEM aBitmap[BITVEC_NELEM]; /* Bitmap representation */ + u32 aHash[BITVEC_NINT]; /* Hash table representation */ + Bitvec *apSub[BITVEC_NPTR]; /* Recursive representation */ + } u; +}; + +/* +** Create a new bitmap object able to handle bits between 0 and iSize, +** inclusive. Return a pointer to the new object. Return NULL if +** malloc fails. +*/ +SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32 iSize){ + Bitvec *p; + assert( sizeof(*p)==BITVEC_SZ ); + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ + p->iSize = iSize; + } + return p; +} + +/* +** Check to see if the i-th bit is set. Return true or false. +** If p is NULL (if the bitmap has not been created) or if +** i is out of range, then return false. +*/ +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ + assert( p!=0 ); + i--; + if( i>=p->iSize ) return 0; + while( p->iDivisor ){ + u32 bin = i/p->iDivisor; + i = i%p->iDivisor; + p = p->u.apSub[bin]; + if (!p) { + return 0; + } + } + if( p->iSize<=BITVEC_NBIT ){ + return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0; + } else{ + u32 h = BITVEC_HASH(i++); + while( p->u.aHash[h] ){ + if( p->u.aHash[h]==i ) return 1; + h = (h+1) % BITVEC_NINT; + } + return 0; + } +} +SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ + return p!=0 && sqlite3BitvecTestNotNull(p,i); +} + +/* +** Set the i-th bit. Return 0 on success and an error code if +** anything goes wrong. +** +** This routine might cause sub-bitmaps to be allocated. Failing +** to get the memory needed to hold the sub-bitmap is the only +** that can go wrong with an insert, assuming p and i are valid. +** +** The calling function must ensure that p is a valid Bitvec object +** and that the value for "i" is within range of the Bitvec object. +** Otherwise the behavior is undefined. +*/ +SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec *p, u32 i){ + u32 h; + if( p==0 ) return SQLITE_OK; + assert( i>0 ); + assert( i<=p->iSize ); + i--; + while((p->iSize > BITVEC_NBIT) && p->iDivisor) { + u32 bin = i/p->iDivisor; + i = i%p->iDivisor; + if( p->u.apSub[bin]==0 ){ + p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor ); + if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM_BKPT; + } + p = p->u.apSub[bin]; + } + if( p->iSize<=BITVEC_NBIT ){ + p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1)); + return SQLITE_OK; + } + h = BITVEC_HASH(i++); + /* if there wasn't a hash collision, and this doesn't */ + /* completely fill the hash, then just add it without */ + /* worring about sub-dividing and re-hashing. */ + if( !p->u.aHash[h] ){ + if (p->nSet<(BITVEC_NINT-1)) { + goto bitvec_set_end; + } else { + goto bitvec_set_rehash; + } + } + /* there was a collision, check to see if it's already */ + /* in hash, if not, try to find a spot for it */ + do { + if( p->u.aHash[h]==i ) return SQLITE_OK; + h++; + if( h>=BITVEC_NINT ) h = 0; + } while( p->u.aHash[h] ); + /* we didn't find it in the hash. h points to the first */ + /* available free spot. check to see if this is going to */ + /* make our hash too "full". */ +bitvec_set_rehash: + if( p->nSet>=BITVEC_MXHASH ){ + unsigned int j; + int rc; + u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash)); + if( aiValues==0 ){ + return SQLITE_NOMEM_BKPT; + }else{ + memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); + memset(p->u.apSub, 0, sizeof(p->u.apSub)); + p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR; + rc = sqlite3BitvecSet(p, i); + for(j=0; jnSet++; + p->u.aHash[h] = i; + return SQLITE_OK; +} + +/* +** Clear the i-th bit. +** +** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage +** that BitvecClear can use to rebuilt its hash table. +*/ +SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){ + if( p==0 ) return; + assert( i>0 ); + i--; + while( p->iDivisor ){ + u32 bin = i/p->iDivisor; + i = i%p->iDivisor; + p = p->u.apSub[bin]; + if (!p) { + return; + } + } + if( p->iSize<=BITVEC_NBIT ){ + p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1))); + }else{ + unsigned int j; + u32 *aiValues = pBuf; + memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); + memset(p->u.aHash, 0, sizeof(p->u.aHash)); + p->nSet = 0; + for(j=0; jnSet++; + while( p->u.aHash[h] ){ + h++; + if( h>=BITVEC_NINT ) h = 0; + } + p->u.aHash[h] = aiValues[j]; + } + } + } +} + +/* +** Destroy a bitmap object. Reclaim all memory used. +*/ +SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec *p){ + if( p==0 ) return; + if( p->iDivisor ){ + unsigned int i; + for(i=0; iu.apSub[i]); + } + } + sqlite3_free(p); +} + +/* +** Return the value of the iSize parameter specified when Bitvec *p +** was created. +*/ +SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec *p){ + return p->iSize; +} + +#ifndef SQLITE_UNTESTABLE +/* +** Let V[] be an array of unsigned characters sufficient to hold +** up to N bits. Let I be an integer between 0 and N. 0<=I>3] |= (1<<(I&7)) +#define CLEARBIT(V,I) V[I>>3] &= ~(1<<(I&7)) +#define TESTBIT(V,I) (V[I>>3]&(1<<(I&7)))!=0 + +/* +** This routine runs an extensive test of the Bitvec code. +** +** The input is an array of integers that acts as a program +** to test the Bitvec. The integers are opcodes followed +** by 0, 1, or 3 operands, depending on the opcode. Another +** opcode follows immediately after the last operand. +** +** There are 6 opcodes numbered from 0 through 5. 0 is the +** "halt" opcode and causes the test to end. +** +** 0 Halt and return the number of errors +** 1 N S X Set N bits beginning with S and incrementing by X +** 2 N S X Clear N bits beginning with S and incrementing by X +** 3 N Set N randomly chosen bits +** 4 N Clear N randomly chosen bits +** 5 N S X Set N bits from S increment X in array only, not in bitvec +** +** The opcodes 1 through 4 perform set and clear operations are performed +** on both a Bitvec object and on a linear array of bits obtained from malloc. +** Opcode 5 works on the linear array only, not on the Bitvec. +** Opcode 5 is used to deliberately induce a fault in order to +** confirm that error detection works. +** +** At the conclusion of the test the linear array is compared +** against the Bitvec object. If there are any differences, +** an error is returned. If they are the same, zero is returned. +** +** If a memory allocation error occurs, return -1. +*/ +SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int sz, int *aOp){ + Bitvec *pBitvec = 0; + unsigned char *pV = 0; + int rc = -1; + int i, nx, pc, op; + void *pTmpSpace; + + /* Allocate the Bitvec to be tested and a linear array of + ** bits to act as the reference */ + pBitvec = sqlite3BitvecCreate( sz ); + pV = sqlite3MallocZero( (sz+7)/8 + 1 ); + pTmpSpace = sqlite3_malloc64(BITVEC_SZ); + if( pBitvec==0 || pV==0 || pTmpSpace==0 ) goto bitvec_end; + + /* NULL pBitvec tests */ + sqlite3BitvecSet(0, 1); + sqlite3BitvecClear(0, 1, pTmpSpace); + + /* Run the program */ + pc = i = 0; + while( (op = aOp[pc])!=0 ){ + switch( op ){ + case 1: + case 2: + case 5: { + nx = 4; + i = aOp[pc+2] - 1; + aOp[pc+2] += aOp[pc+3]; + break; + } + case 3: + case 4: + default: { + nx = 2; + sqlite3_randomness(sizeof(i), &i); + break; + } + } + if( (--aOp[pc+1]) > 0 ) nx = 0; + pc += nx; + i = (i & 0x7fffffff)%sz; + if( (op & 1)!=0 ){ + SETBIT(pV, (i+1)); + if( op!=5 ){ + if( sqlite3BitvecSet(pBitvec, i+1) ) goto bitvec_end; + } + }else{ + CLEARBIT(pV, (i+1)); + sqlite3BitvecClear(pBitvec, i+1, pTmpSpace); + } + } + + /* Test to make sure the linear array exactly matches the + ** Bitvec object. Start with the assumption that they do + ** match (rc==0). Change rc to non-zero if a discrepancy + ** is found. + */ + rc = sqlite3BitvecTest(0,0) + sqlite3BitvecTest(pBitvec, sz+1) + + sqlite3BitvecTest(pBitvec, 0) + + (sqlite3BitvecSize(pBitvec) - sz); + for(i=1; i<=sz; i++){ + if( (TESTBIT(pV,i))!=sqlite3BitvecTest(pBitvec,i) ){ + rc = i; + break; + } + } + + /* Free allocated structure */ +bitvec_end: + sqlite3_free(pTmpSpace); + sqlite3_free(pV); + sqlite3BitvecDestroy(pBitvec); + return rc; +} +#endif /* SQLITE_UNTESTABLE */ + +/************** End of bitvec.c **********************************************/ +/************** Begin file pcache.c ******************************************/ +/* +** 2008 August 05 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements that page cache. +*/ +/* #include "sqliteInt.h" */ + +/* +** A complete page cache is an instance of this structure. Every +** entry in the cache holds a single page of the database file. The +** btree layer only operates on the cached copy of the database pages. +** +** A page cache entry is "clean" if it exactly matches what is currently +** on disk. A page is "dirty" if it has been modified and needs to be +** persisted to disk. +** +** pDirty, pDirtyTail, pSynced: +** All dirty pages are linked into the doubly linked list using +** PgHdr.pDirtyNext and pDirtyPrev. The list is maintained in LRU order +** such that p was added to the list more recently than p->pDirtyNext. +** PCache.pDirty points to the first (newest) element in the list and +** pDirtyTail to the last (oldest). +** +** The PCache.pSynced variable is used to optimize searching for a dirty +** page to eject from the cache mid-transaction. It is better to eject +** a page that does not require a journal sync than one that does. +** Therefore, pSynced is maintained so that it *almost* always points +** to either the oldest page in the pDirty/pDirtyTail list that has a +** clear PGHDR_NEED_SYNC flag or to a page that is older than this one +** (so that the right page to eject can be found by following pDirtyPrev +** pointers). +*/ +struct PCache { + PgHdr *pDirty, *pDirtyTail; /* List of dirty pages in LRU order */ + PgHdr *pSynced; /* Last synced page in dirty page list */ + int nRefSum; /* Sum of ref counts over all pages */ + int szCache; /* Configured cache size */ + int szSpill; /* Size before spilling occurs */ + int szPage; /* Size of every page in this cache */ + int szExtra; /* Size of extra space for each page */ + u8 bPurgeable; /* True if pages are on backing store */ + u8 eCreate; /* eCreate value for for xFetch() */ + int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */ + void *pStress; /* Argument to xStress */ + sqlite3_pcache *pCache; /* Pluggable cache module */ +}; + +/********************************** Test and Debug Logic **********************/ +/* +** Debug tracing macros. Enable by by changing the "0" to "1" and +** recompiling. +** +** When sqlite3PcacheTrace is 1, single line trace messages are issued. +** When sqlite3PcacheTrace is 2, a dump of the pcache showing all cache entries +** is displayed for many operations, resulting in a lot of output. +*/ +#if defined(SQLITE_DEBUG) && 0 + int sqlite3PcacheTrace = 2; /* 0: off 1: simple 2: cache dumps */ + int sqlite3PcacheMxDump = 9999; /* Max cache entries for pcacheDump() */ +# define pcacheTrace(X) if(sqlite3PcacheTrace){sqlite3DebugPrintf X;} + void pcacheDump(PCache *pCache){ + int N; + int i, j; + sqlite3_pcache_page *pLower; + PgHdr *pPg; + unsigned char *a; + + if( sqlite3PcacheTrace<2 ) return; + if( pCache->pCache==0 ) return; + N = sqlite3PcachePagecount(pCache); + if( N>sqlite3PcacheMxDump ) N = sqlite3PcacheMxDump; + for(i=1; i<=N; i++){ + pLower = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, i, 0); + if( pLower==0 ) continue; + pPg = (PgHdr*)pLower->pExtra; + printf("%3d: nRef %2d flgs %02x data ", i, pPg->nRef, pPg->flags); + a = (unsigned char *)pLower->pBuf; + for(j=0; j<12; j++) printf("%02x", a[j]); + printf("\n"); + if( pPg->pPage==0 ){ + sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, pLower, 0); + } + } + } + #else +# define pcacheTrace(X) +# define pcacheDump(X) +#endif + +/* +** Check invariants on a PgHdr entry. Return true if everything is OK. +** Return false if any invariant is violated. +** +** This routine is for use inside of assert() statements only. For +** example: +** +** assert( sqlite3PcachePageSanity(pPg) ); +*/ +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3PcachePageSanity(PgHdr *pPg){ + PCache *pCache; + assert( pPg!=0 ); + assert( pPg->pgno>0 || pPg->pPager==0 ); /* Page number is 1 or more */ + pCache = pPg->pCache; + assert( pCache!=0 ); /* Every page has an associated PCache */ + if( pPg->flags & PGHDR_CLEAN ){ + assert( (pPg->flags & PGHDR_DIRTY)==0 );/* Cannot be both CLEAN and DIRTY */ + assert( pCache->pDirty!=pPg ); /* CLEAN pages not on dirty list */ + assert( pCache->pDirtyTail!=pPg ); + } + /* WRITEABLE pages must also be DIRTY */ + if( pPg->flags & PGHDR_WRITEABLE ){ + assert( pPg->flags & PGHDR_DIRTY ); /* WRITEABLE implies DIRTY */ + } + /* NEED_SYNC can be set independently of WRITEABLE. This can happen, + ** for example, when using the sqlite3PagerDontWrite() optimization: + ** (1) Page X is journalled, and gets WRITEABLE and NEED_SEEK. + ** (2) Page X moved to freelist, WRITEABLE is cleared + ** (3) Page X reused, WRITEABLE is set again + ** If NEED_SYNC had been cleared in step 2, then it would not be reset + ** in step 3, and page might be written into the database without first + ** syncing the rollback journal, which might cause corruption on a power + ** loss. + ** + ** Another example is when the database page size is smaller than the + ** disk sector size. When any page of a sector is journalled, all pages + ** in that sector are marked NEED_SYNC even if they are still CLEAN, just + ** in case they are later modified, since all pages in the same sector + ** must be journalled and synced before any of those pages can be safely + ** written. + */ + return 1; +} +#endif /* SQLITE_DEBUG */ + + +/********************************** Linked List Management ********************/ + +/* Allowed values for second argument to pcacheManageDirtyList() */ +#define PCACHE_DIRTYLIST_REMOVE 1 /* Remove pPage from dirty list */ +#define PCACHE_DIRTYLIST_ADD 2 /* Add pPage to the dirty list */ +#define PCACHE_DIRTYLIST_FRONT 3 /* Move pPage to the front of the list */ + +/* +** Manage pPage's participation on the dirty list. Bits of the addRemove +** argument determines what operation to do. The 0x01 bit means first +** remove pPage from the dirty list. The 0x02 means add pPage back to +** the dirty list. Doing both moves pPage to the front of the dirty list. +*/ +static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){ + PCache *p = pPage->pCache; + + pcacheTrace(("%p.DIRTYLIST.%s %d\n", p, + addRemove==1 ? "REMOVE" : addRemove==2 ? "ADD" : "FRONT", + pPage->pgno)); + if( addRemove & PCACHE_DIRTYLIST_REMOVE ){ + assert( pPage->pDirtyNext || pPage==p->pDirtyTail ); + assert( pPage->pDirtyPrev || pPage==p->pDirty ); + + /* Update the PCache1.pSynced variable if necessary. */ + if( p->pSynced==pPage ){ + p->pSynced = pPage->pDirtyPrev; + } + + if( pPage->pDirtyNext ){ + pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev; + }else{ + assert( pPage==p->pDirtyTail ); + p->pDirtyTail = pPage->pDirtyPrev; + } + if( pPage->pDirtyPrev ){ + pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext; + }else{ + /* If there are now no dirty pages in the cache, set eCreate to 2. + ** This is an optimization that allows sqlite3PcacheFetch() to skip + ** searching for a dirty page to eject from the cache when it might + ** otherwise have to. */ + assert( pPage==p->pDirty ); + p->pDirty = pPage->pDirtyNext; + assert( p->bPurgeable || p->eCreate==2 ); + if( p->pDirty==0 ){ /*OPTIMIZATION-IF-TRUE*/ + assert( p->bPurgeable==0 || p->eCreate==1 ); + p->eCreate = 2; + } + } + } + if( addRemove & PCACHE_DIRTYLIST_ADD ){ + pPage->pDirtyPrev = 0; + pPage->pDirtyNext = p->pDirty; + if( pPage->pDirtyNext ){ + assert( pPage->pDirtyNext->pDirtyPrev==0 ); + pPage->pDirtyNext->pDirtyPrev = pPage; + }else{ + p->pDirtyTail = pPage; + if( p->bPurgeable ){ + assert( p->eCreate==2 ); + p->eCreate = 1; + } + } + p->pDirty = pPage; + + /* If pSynced is NULL and this page has a clear NEED_SYNC flag, set + ** pSynced to point to it. Checking the NEED_SYNC flag is an + ** optimization, as if pSynced points to a page with the NEED_SYNC + ** flag set sqlite3PcacheFetchStress() searches through all newer + ** entries of the dirty-list for a page with NEED_SYNC clear anyway. */ + if( !p->pSynced + && 0==(pPage->flags&PGHDR_NEED_SYNC) /*OPTIMIZATION-IF-FALSE*/ + ){ + p->pSynced = pPage; + } + } + pcacheDump(p); +} + +/* +** Wrapper around the pluggable caches xUnpin method. If the cache is +** being used for an in-memory database, this function is a no-op. +*/ +static void pcacheUnpin(PgHdr *p){ + if( p->pCache->bPurgeable ){ + pcacheTrace(("%p.UNPIN %d\n", p->pCache, p->pgno)); + sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); + pcacheDump(p->pCache); + } +} + +/* +** Compute the number of pages of cache requested. p->szCache is the +** cache size requested by the "PRAGMA cache_size" statement. +*/ +static int numberOfCachePages(PCache *p){ + if( p->szCache>=0 ){ + /* IMPLEMENTATION-OF: R-42059-47211 If the argument N is positive then the + ** suggested cache size is set to N. */ + return p->szCache; + }else{ + i64 n; + /* IMPLEMANTATION-OF: R-59858-46238 If the argument N is negative, then the + ** number of cache pages is adjusted to be a number of pages that would + ** use approximately abs(N*1024) bytes of memory based on the current + ** page size. */ + n = ((-1024*(i64)p->szCache)/(p->szPage+p->szExtra)); + if( n>1000000000 ) n = 1000000000; + return (int)n; + } +} + +/*************************************************** General Interfaces ****** +** +** Initialize and shutdown the page cache subsystem. Neither of these +** functions are threadsafe. +*/ +SQLITE_PRIVATE int sqlite3PcacheInitialize(void){ + if( sqlite3GlobalConfig.pcache2.xInit==0 ){ + /* IMPLEMENTATION-OF: R-26801-64137 If the xInit() method is NULL, then the + ** built-in default page cache is used instead of the application defined + ** page cache. */ + sqlite3PCacheSetDefault(); + assert( sqlite3GlobalConfig.pcache2.xInit!=0 ); + } + return sqlite3GlobalConfig.pcache2.xInit(sqlite3GlobalConfig.pcache2.pArg); +} +SQLITE_PRIVATE void sqlite3PcacheShutdown(void){ + if( sqlite3GlobalConfig.pcache2.xShutdown ){ + /* IMPLEMENTATION-OF: R-26000-56589 The xShutdown() method may be NULL. */ + sqlite3GlobalConfig.pcache2.xShutdown(sqlite3GlobalConfig.pcache2.pArg); + } +} + +/* +** Return the size in bytes of a PCache object. +*/ +SQLITE_PRIVATE int sqlite3PcacheSize(void){ return sizeof(PCache); } + +/* +** Create a new PCache object. Storage space to hold the object +** has already been allocated and is passed in as the p pointer. +** The caller discovers how much space needs to be allocated by +** calling sqlite3PcacheSize(). +** +** szExtra is some extra space allocated for each page. The first +** 8 bytes of the extra space will be zeroed as the page is allocated, +** but remaining content will be uninitialized. Though it is opaque +** to this module, the extra space really ends up being the MemPage +** structure in the pager. +*/ +SQLITE_PRIVATE int sqlite3PcacheOpen( + int szPage, /* Size of every page */ + int szExtra, /* Extra space associated with each page */ + int bPurgeable, /* True if pages are on backing store */ + int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */ + void *pStress, /* Argument to xStress */ + PCache *p /* Preallocated space for the PCache */ +){ + memset(p, 0, sizeof(PCache)); + p->szPage = 1; + p->szExtra = szExtra; + assert( szExtra>=8 ); /* First 8 bytes will be zeroed */ + p->bPurgeable = bPurgeable; + p->eCreate = 2; + p->xStress = xStress; + p->pStress = pStress; + p->szCache = 100; + p->szSpill = 1; + pcacheTrace(("%p.OPEN szPage %d bPurgeable %d\n",p,szPage,bPurgeable)); + return sqlite3PcacheSetPageSize(p, szPage); +} + +/* +** Change the page size for PCache object. The caller must ensure that there +** are no outstanding page references when this function is called. +*/ +SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ + assert( pCache->nRefSum==0 && pCache->pDirty==0 ); + if( pCache->szPage ){ + sqlite3_pcache *pNew; + pNew = sqlite3GlobalConfig.pcache2.xCreate( + szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)), + pCache->bPurgeable + ); + if( pNew==0 ) return SQLITE_NOMEM_BKPT; + sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache)); + if( pCache->pCache ){ + sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); + } + pCache->pCache = pNew; + pCache->szPage = szPage; + pcacheTrace(("%p.PAGESIZE %d\n",pCache,szPage)); + } + return SQLITE_OK; +} + +/* +** Try to obtain a page from the cache. +** +** This routine returns a pointer to an sqlite3_pcache_page object if +** such an object is already in cache, or if a new one is created. +** This routine returns a NULL pointer if the object was not in cache +** and could not be created. +** +** The createFlags should be 0 to check for existing pages and should +** be 3 (not 1, but 3) to try to create a new page. +** +** If the createFlag is 0, then NULL is always returned if the page +** is not already in the cache. If createFlag is 1, then a new page +** is created only if that can be done without spilling dirty pages +** and without exceeding the cache size limit. +** +** The caller needs to invoke sqlite3PcacheFetchFinish() to properly +** initialize the sqlite3_pcache_page object and convert it into a +** PgHdr object. The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish() +** routines are split this way for performance reasons. When separated +** they can both (usually) operate without having to push values to +** the stack on entry and pop them back off on exit, which saves a +** lot of pushing and popping. +*/ +SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number to obtain */ + int createFlag /* If true, create page if it does not exist already */ +){ + int eCreate; + sqlite3_pcache_page *pRes; + + assert( pCache!=0 ); + assert( pCache->pCache!=0 ); + assert( createFlag==3 || createFlag==0 ); + assert( pCache->eCreate==((pCache->bPurgeable && pCache->pDirty) ? 1 : 2) ); + + /* eCreate defines what to do if the page does not exist. + ** 0 Do not allocate a new page. (createFlag==0) + ** 1 Allocate a new page if doing so is inexpensive. + ** (createFlag==1 AND bPurgeable AND pDirty) + ** 2 Allocate a new page even it doing so is difficult. + ** (createFlag==1 AND !(bPurgeable AND pDirty) + */ + eCreate = createFlag & pCache->eCreate; + assert( eCreate==0 || eCreate==1 || eCreate==2 ); + assert( createFlag==0 || pCache->eCreate==eCreate ); + assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) ); + pRes = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate); + pcacheTrace(("%p.FETCH %d%s (result: %p)\n",pCache,pgno, + createFlag?" create":"",pRes)); + return pRes; +} + +/* +** If the sqlite3PcacheFetch() routine is unable to allocate a new +** page because no clean pages are available for reuse and the cache +** size limit has been reached, then this routine can be invoked to +** try harder to allocate a page. This routine might invoke the stress +** callback to spill dirty pages to the journal. It will then try to +** allocate the new page and will only fail to allocate a new page on +** an OOM error. +** +** This routine should be invoked only after sqlite3PcacheFetch() fails. +*/ +SQLITE_PRIVATE int sqlite3PcacheFetchStress( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number to obtain */ + sqlite3_pcache_page **ppPage /* Write result here */ +){ + PgHdr *pPg; + if( pCache->eCreate==2 ) return 0; + + if( sqlite3PcachePagecount(pCache)>pCache->szSpill ){ + /* Find a dirty page to write-out and recycle. First try to find a + ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC + ** cleared), but if that is not possible settle for any other + ** unreferenced dirty page. + ** + ** If the LRU page in the dirty list that has a clear PGHDR_NEED_SYNC + ** flag is currently referenced, then the following may leave pSynced + ** set incorrectly (pointing to other than the LRU page with NEED_SYNC + ** cleared). This is Ok, as pSynced is just an optimization. */ + for(pPg=pCache->pSynced; + pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); + pPg=pPg->pDirtyPrev + ); + pCache->pSynced = pPg; + if( !pPg ){ + for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev); + } + if( pPg ){ + int rc; +#ifdef SQLITE_LOG_CACHE_SPILL + sqlite3_log(SQLITE_FULL, + "spill page %d making room for %d - cache used: %d/%d", + pPg->pgno, pgno, + sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache), + numberOfCachePages(pCache)); +#endif + pcacheTrace(("%p.SPILL %d\n",pCache,pPg->pgno)); + rc = pCache->xStress(pCache->pStress, pPg); + pcacheDump(pCache); + if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){ + return rc; + } + } + } + *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2); + return *ppPage==0 ? SQLITE_NOMEM_BKPT : SQLITE_OK; +} + +/* +** This is a helper routine for sqlite3PcacheFetchFinish() +** +** In the uncommon case where the page being fetched has not been +** initialized, this routine is invoked to do the initialization. +** This routine is broken out into a separate function since it +** requires extra stack manipulation that can be avoided in the common +** case. +*/ +static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number obtained */ + sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ +){ + PgHdr *pPgHdr; + assert( pPage!=0 ); + pPgHdr = (PgHdr*)pPage->pExtra; + assert( pPgHdr->pPage==0 ); + memset(&pPgHdr->pDirty, 0, sizeof(PgHdr) - offsetof(PgHdr,pDirty)); + pPgHdr->pPage = pPage; + pPgHdr->pData = pPage->pBuf; + pPgHdr->pExtra = (void *)&pPgHdr[1]; + memset(pPgHdr->pExtra, 0, 8); + pPgHdr->pCache = pCache; + pPgHdr->pgno = pgno; + pPgHdr->flags = PGHDR_CLEAN; + return sqlite3PcacheFetchFinish(pCache,pgno,pPage); +} + +/* +** This routine converts the sqlite3_pcache_page object returned by +** sqlite3PcacheFetch() into an initialized PgHdr object. This routine +** must be called after sqlite3PcacheFetch() in order to get a usable +** result. +*/ +SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number obtained */ + sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ +){ + PgHdr *pPgHdr; + + assert( pPage!=0 ); + pPgHdr = (PgHdr *)pPage->pExtra; + + if( !pPgHdr->pPage ){ + return pcacheFetchFinishWithInit(pCache, pgno, pPage); + } + pCache->nRefSum++; + pPgHdr->nRef++; + assert( sqlite3PcachePageSanity(pPgHdr) ); + return pPgHdr; +} + +/* +** Decrement the reference count on a page. If the page is clean and the +** reference count drops to 0, then it is made eligible for recycling. +*/ +SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ + assert( p->nRef>0 ); + p->pCache->nRefSum--; + if( (--p->nRef)==0 ){ + if( p->flags&PGHDR_CLEAN ){ + pcacheUnpin(p); + }else{ + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); + } + } +} + +/* +** Increase the reference count of a supplied page by 1. +*/ +SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr *p){ + assert(p->nRef>0); + assert( sqlite3PcachePageSanity(p) ); + p->nRef++; + p->pCache->nRefSum++; +} + +/* +** Drop a page from the cache. There must be exactly one reference to the +** page. This function deletes that reference, so after it returns the +** page pointed to by p is invalid. +*/ +SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ + assert( p->nRef==1 ); + assert( sqlite3PcachePageSanity(p) ); + if( p->flags&PGHDR_DIRTY ){ + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); + } + p->pCache->nRefSum--; + sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); +} + +/* +** Make sure the page is marked as dirty. If it isn't dirty already, +** make it so. +*/ +SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ + assert( p->nRef>0 || p->pCache->bPurgeable==0 ); + testcase( p->nRef==0 ); + assert( sqlite3PcachePageSanity(p) ); + if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){ /*OPTIMIZATION-IF-FALSE*/ + p->flags &= ~PGHDR_DONT_WRITE; + if( p->flags & PGHDR_CLEAN ){ + p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN); + pcacheTrace(("%p.DIRTY %d\n",p->pCache,p->pgno)); + assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + } + assert( sqlite3PcachePageSanity(p) ); + } +} + +/* +** Make sure the page is marked as clean. If it isn't clean already, +** make it so. +*/ +SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr *p){ + assert( sqlite3PcachePageSanity(p) ); + assert( (p->flags & PGHDR_DIRTY)!=0 ); + assert( (p->flags & PGHDR_CLEAN)==0 ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); + p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE); + p->flags |= PGHDR_CLEAN; + pcacheTrace(("%p.CLEAN %d\n",p->pCache,p->pgno)); + assert( sqlite3PcachePageSanity(p) ); + if( p->nRef==0 ){ + pcacheUnpin(p); + } +} + +/* +** Make every page in the cache clean. +*/ +SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache *pCache){ + PgHdr *p; + pcacheTrace(("%p.CLEAN-ALL\n",pCache)); + while( (p = pCache->pDirty)!=0 ){ + sqlite3PcacheMakeClean(p); + } +} + +/* +** Clear the PGHDR_NEED_SYNC and PGHDR_WRITEABLE flag from all dirty pages. +*/ +SQLITE_PRIVATE void sqlite3PcacheClearWritable(PCache *pCache){ + PgHdr *p; + pcacheTrace(("%p.CLEAR-WRITEABLE\n",pCache)); + for(p=pCache->pDirty; p; p=p->pDirtyNext){ + p->flags &= ~(PGHDR_NEED_SYNC|PGHDR_WRITEABLE); + } + pCache->pSynced = pCache->pDirtyTail; +} + +/* +** Clear the PGHDR_NEED_SYNC flag from all dirty pages. +*/ +SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *pCache){ + PgHdr *p; + for(p=pCache->pDirty; p; p=p->pDirtyNext){ + p->flags &= ~PGHDR_NEED_SYNC; + } + pCache->pSynced = pCache->pDirtyTail; +} + +/* +** Change the page number of page p to newPgno. +*/ +SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr *p, Pgno newPgno){ + PCache *pCache = p->pCache; + assert( p->nRef>0 ); + assert( newPgno>0 ); + assert( sqlite3PcachePageSanity(p) ); + pcacheTrace(("%p.MOVE %d -> %d\n",pCache,p->pgno,newPgno)); + sqlite3GlobalConfig.pcache2.xRekey(pCache->pCache, p->pPage, p->pgno,newPgno); + p->pgno = newPgno; + if( (p->flags&PGHDR_DIRTY) && (p->flags&PGHDR_NEED_SYNC) ){ + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); + } +} + +/* +** Drop every cache entry whose page number is greater than "pgno". The +** caller must ensure that there are no outstanding references to any pages +** other than page 1 with a page number greater than pgno. +** +** If there is a reference to page 1 and the pgno parameter passed to this +** function is 0, then the data area associated with page 1 is zeroed, but +** the page object is not dropped. +*/ +SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){ + if( pCache->pCache ){ + PgHdr *p; + PgHdr *pNext; + pcacheTrace(("%p.TRUNCATE %d\n",pCache,pgno)); + for(p=pCache->pDirty; p; p=pNext){ + pNext = p->pDirtyNext; + /* This routine never gets call with a positive pgno except right + ** after sqlite3PcacheCleanAll(). So if there are dirty pages, + ** it must be that pgno==0. + */ + assert( p->pgno>0 ); + if( p->pgno>pgno ){ + assert( p->flags&PGHDR_DIRTY ); + sqlite3PcacheMakeClean(p); + } + } + if( pgno==0 && pCache->nRefSum ){ + sqlite3_pcache_page *pPage1; + pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); + if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because + ** pCache->nRefSum>0 */ + memset(pPage1->pBuf, 0, pCache->szPage); + pgno = 1; + } + } + sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); + } +} + +/* +** Close a cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheClose(PCache *pCache){ + assert( pCache->pCache!=0 ); + pcacheTrace(("%p.CLOSE\n",pCache)); + sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); +} + +/* +** Discard the contents of the cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheClear(PCache *pCache){ + sqlite3PcacheTruncate(pCache, 0); +} + +/* +** Merge two lists of pages connected by pDirty and in pgno order. +** Do not bother fixing the pDirtyPrev pointers. +*/ +static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB){ + PgHdr result, *pTail; + pTail = &result; + assert( pA!=0 && pB!=0 ); + for(;;){ + if( pA->pgnopgno ){ + pTail->pDirty = pA; + pTail = pA; + pA = pA->pDirty; + if( pA==0 ){ + pTail->pDirty = pB; + break; + } + }else{ + pTail->pDirty = pB; + pTail = pB; + pB = pB->pDirty; + if( pB==0 ){ + pTail->pDirty = pA; + break; + } + } + } + return result.pDirty; +} + +/* +** Sort the list of pages in accending order by pgno. Pages are +** connected by pDirty pointers. The pDirtyPrev pointers are +** corrupted by this sort. +** +** Since there cannot be more than 2^31 distinct pages in a database, +** there cannot be more than 31 buckets required by the merge sorter. +** One extra bucket is added to catch overflow in case something +** ever changes to make the previous sentence incorrect. +*/ +#define N_SORT_BUCKET 32 +static PgHdr *pcacheSortDirtyList(PgHdr *pIn){ + PgHdr *a[N_SORT_BUCKET], *p; + int i; + memset(a, 0, sizeof(a)); + while( pIn ){ + p = pIn; + pIn = p->pDirty; + p->pDirty = 0; + for(i=0; ALWAYS(ipDirty; p; p=p->pDirtyNext){ + p->pDirty = p->pDirtyNext; + } + return pcacheSortDirtyList(pCache->pDirty); +} + +/* +** Return the total number of references to all pages held by the cache. +** +** This is not the total number of pages referenced, but the sum of the +** reference count for all pages. +*/ +SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache *pCache){ + return pCache->nRefSum; +} + +/* +** Return the number of references to the page supplied as an argument. +*/ +SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr *p){ + return p->nRef; +} + +/* +** Return the total number of pages in the cache. +*/ +SQLITE_PRIVATE int sqlite3PcachePagecount(PCache *pCache){ + assert( pCache->pCache!=0 ); + return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache); +} + +#ifdef SQLITE_TEST +/* +** Get the suggested cache-size value. +*/ +SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *pCache){ + return numberOfCachePages(pCache); +} +#endif + +/* +** Set the suggested cache-size value. +*/ +SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){ + assert( pCache->pCache!=0 ); + pCache->szCache = mxPage; + sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache, + numberOfCachePages(pCache)); +} + +/* +** Set the suggested cache-spill value. Make no changes if if the +** argument is zero. Return the effective cache-spill size, which will +** be the larger of the szSpill and szCache. +*/ +SQLITE_PRIVATE int sqlite3PcacheSetSpillsize(PCache *p, int mxPage){ + int res; + assert( p->pCache!=0 ); + if( mxPage ){ + if( mxPage<0 ){ + mxPage = (int)((-1024*(i64)mxPage)/(p->szPage+p->szExtra)); + } + p->szSpill = mxPage; + } + res = numberOfCachePages(p); + if( resszSpill ) res = p->szSpill; + return res; +} + +/* +** Free up as much memory as possible from the page cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheShrink(PCache *pCache){ + assert( pCache->pCache!=0 ); + sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache); +} + +/* +** Return the size of the header added by this middleware layer +** in the page-cache hierarchy. +*/ +SQLITE_PRIVATE int sqlite3HeaderSizePcache(void){ return ROUND8(sizeof(PgHdr)); } + +/* +** Return the number of dirty pages currently in the cache, as a percentage +** of the configured cache size. +*/ +SQLITE_PRIVATE int sqlite3PCachePercentDirty(PCache *pCache){ + PgHdr *pDirty; + int nDirty = 0; + int nCache = numberOfCachePages(pCache); + for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext) nDirty++; + return nCache ? (int)(((i64)nDirty * 100) / nCache) : 0; +} + +#ifdef SQLITE_DIRECT_OVERFLOW_READ +/* +** Return true if there are one or more dirty pages in the cache. Else false. +*/ +SQLITE_PRIVATE int sqlite3PCacheIsDirty(PCache *pCache){ + return (pCache->pDirty!=0); +} +#endif + +#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) +/* +** For all dirty pages currently in the cache, invoke the specified +** callback. This is only used if the SQLITE_CHECK_PAGES macro is +** defined. +*/ +SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)){ + PgHdr *pDirty; + for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext){ + xIter(pDirty); + } +} +#endif + +/************** End of pcache.c **********************************************/ +/************** Begin file pcache1.c *****************************************/ +/* +** 2008 November 05 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements the default page cache implementation (the +** sqlite3_pcache interface). It also contains part of the implementation +** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features. +** If the default page cache implementation is overridden, then neither of +** these two features are available. +** +** A Page cache line looks like this: +** +** ------------------------------------------------------------- +** | database page content | PgHdr1 | MemPage | PgHdr | +** ------------------------------------------------------------- +** +** The database page content is up front (so that buffer overreads tend to +** flow harmlessly into the PgHdr1, MemPage, and PgHdr extensions). MemPage +** is the extension added by the btree.c module containing information such +** as the database page number and how that database page is used. PgHdr +** is added by the pcache.c layer and contains information used to keep track +** of which pages are "dirty". PgHdr1 is an extension added by this +** module (pcache1.c). The PgHdr1 header is a subclass of sqlite3_pcache_page. +** PgHdr1 contains information needed to look up a page by its page number. +** The superclass sqlite3_pcache_page.pBuf points to the start of the +** database page content and sqlite3_pcache_page.pExtra points to PgHdr. +** +** The size of the extension (MemPage+PgHdr+PgHdr1) can be determined at +** runtime using sqlite3_config(SQLITE_CONFIG_PCACHE_HDRSZ, &size). The +** sizes of the extensions sum to 272 bytes on x64 for 3.8.10, but this +** size can vary according to architecture, compile-time options, and +** SQLite library version number. +** +** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained +** using a separate memory allocation from the database page content. This +** seeks to overcome the "clownshoe" problem (also called "internal +** fragmentation" in academic literature) of allocating a few bytes more +** than a power of two with the memory allocator rounding up to the next +** power of two, and leaving the rounded-up space unused. +** +** This module tracks pointers to PgHdr1 objects. Only pcache.c communicates +** with this module. Information is passed back and forth as PgHdr1 pointers. +** +** The pcache.c and pager.c modules deal pointers to PgHdr objects. +** The btree.c module deals with pointers to MemPage objects. +** +** SOURCE OF PAGE CACHE MEMORY: +** +** Memory for a page might come from any of three sources: +** +** (1) The general-purpose memory allocator - sqlite3Malloc() +** (2) Global page-cache memory provided using sqlite3_config() with +** SQLITE_CONFIG_PAGECACHE. +** (3) PCache-local bulk allocation. +** +** The third case is a chunk of heap memory (defaulting to 100 pages worth) +** that is allocated when the page cache is created. The size of the local +** bulk allocation can be adjusted using +** +** sqlite3_config(SQLITE_CONFIG_PAGECACHE, (void*)0, 0, N). +** +** If N is positive, then N pages worth of memory are allocated using a single +** sqlite3Malloc() call and that memory is used for the first N pages allocated. +** Or if N is negative, then -1024*N bytes of memory are allocated and used +** for as many pages as can be accomodated. +** +** Only one of (2) or (3) can be used. Once the memory available to (2) or +** (3) is exhausted, subsequent allocations fail over to the general-purpose +** memory allocator (1). +** +** Earlier versions of SQLite used only methods (1) and (2). But experiments +** show that method (3) with N==100 provides about a 5% performance boost for +** common workloads. +*/ +/* #include "sqliteInt.h" */ + +typedef struct PCache1 PCache1; +typedef struct PgHdr1 PgHdr1; +typedef struct PgFreeslot PgFreeslot; +typedef struct PGroup PGroup; + +/* +** Each cache entry is represented by an instance of the following +** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of +** PgHdr1.pCache->szPage bytes is allocated directly before this structure +** in memory. +** +** Note: Variables isBulkLocal and isAnchor were once type "u8". That works, +** but causes a 2-byte gap in the structure for most architectures (since +** pointers must be either 4 or 8-byte aligned). As this structure is located +** in memory directly after the associated page data, if the database is +** corrupt, code at the b-tree layer may overread the page buffer and +** read part of this structure before the corruption is detected. This +** can cause a valgrind error if the unitialized gap is accessed. Using u16 +** ensures there is no such gap, and therefore no bytes of unitialized memory +** in the structure. +*/ +struct PgHdr1 { + sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */ + unsigned int iKey; /* Key value (page number) */ + u16 isBulkLocal; /* This page from bulk local storage */ + u16 isAnchor; /* This is the PGroup.lru element */ + PgHdr1 *pNext; /* Next in hash table chain */ + PCache1 *pCache; /* Cache that currently owns this page */ + PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ + PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */ + /* NB: pLruPrev is only valid if pLruNext!=0 */ +}; + +/* +** A page is pinned if it is not on the LRU list. To be "pinned" means +** that the page is in active use and must not be deallocated. +*/ +#define PAGE_IS_PINNED(p) ((p)->pLruNext==0) +#define PAGE_IS_UNPINNED(p) ((p)->pLruNext!=0) + +/* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set +** of one or more PCaches that are able to recycle each other's unpinned +** pages when they are under memory pressure. A PGroup is an instance of +** the following object. +** +** This page cache implementation works in one of two modes: +** +** (1) Every PCache is the sole member of its own PGroup. There is +** one PGroup per PCache. +** +** (2) There is a single global PGroup that all PCaches are a member +** of. +** +** Mode 1 uses more memory (since PCache instances are not able to rob +** unused pages from other PCaches) but it also operates without a mutex, +** and is therefore often faster. Mode 2 requires a mutex in order to be +** threadsafe, but recycles pages more efficiently. +** +** For mode (1), PGroup.mutex is NULL. For mode (2) there is only a single +** PGroup which is the pcache1.grp global variable and its mutex is +** SQLITE_MUTEX_STATIC_LRU. +*/ +struct PGroup { + sqlite3_mutex *mutex; /* MUTEX_STATIC_LRU or NULL */ + unsigned int nMaxPage; /* Sum of nMax for purgeable caches */ + unsigned int nMinPage; /* Sum of nMin for purgeable caches */ + unsigned int mxPinned; /* nMaxpage + 10 - nMinPage */ + unsigned int nPurgeable; /* Number of purgeable pages allocated */ + PgHdr1 lru; /* The beginning and end of the LRU list */ +}; + +/* Each page cache is an instance of the following object. Every +** open database file (including each in-memory database and each +** temporary or transient database) has a single page cache which +** is an instance of this object. +** +** Pointers to structures of this type are cast and returned as +** opaque sqlite3_pcache* handles. +*/ +struct PCache1 { + /* Cache configuration parameters. Page size (szPage) and the purgeable + ** flag (bPurgeable) and the pnPurgeable pointer are all set when the + ** cache is created and are never changed thereafter. nMax may be + ** modified at any time by a call to the pcache1Cachesize() method. + ** The PGroup mutex must be held when accessing nMax. + */ + PGroup *pGroup; /* PGroup this cache belongs to */ + unsigned int *pnPurgeable; /* Pointer to pGroup->nPurgeable */ + int szPage; /* Size of database content section */ + int szExtra; /* sizeof(MemPage)+sizeof(PgHdr) */ + int szAlloc; /* Total size of one pcache line */ + int bPurgeable; /* True if cache is purgeable */ + unsigned int nMin; /* Minimum number of pages reserved */ + unsigned int nMax; /* Configured "cache_size" value */ + unsigned int n90pct; /* nMax*9/10 */ + unsigned int iMaxKey; /* Largest key seen since xTruncate() */ + unsigned int nPurgeableDummy; /* pnPurgeable points here when not used*/ + + /* Hash table of all pages. The following variables may only be accessed + ** when the accessor is holding the PGroup mutex. + */ + unsigned int nRecyclable; /* Number of pages in the LRU list */ + unsigned int nPage; /* Total number of pages in apHash */ + unsigned int nHash; /* Number of slots in apHash[] */ + PgHdr1 **apHash; /* Hash table for fast lookup by key */ + PgHdr1 *pFree; /* List of unused pcache-local pages */ + void *pBulk; /* Bulk memory used by pcache-local */ +}; + +/* +** Free slots in the allocator used to divide up the global page cache +** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. +*/ +struct PgFreeslot { + PgFreeslot *pNext; /* Next free slot */ +}; + +/* +** Global data used by this cache. +*/ +static SQLITE_WSD struct PCacheGlobal { + PGroup grp; /* The global PGroup for mode (2) */ + + /* Variables related to SQLITE_CONFIG_PAGECACHE settings. The + ** szSlot, nSlot, pStart, pEnd, nReserve, and isInit values are all + ** fixed at sqlite3_initialize() time and do not require mutex protection. + ** The nFreeSlot and pFree values do require mutex protection. + */ + int isInit; /* True if initialized */ + int separateCache; /* Use a new PGroup for each PCache */ + int nInitPage; /* Initial bulk allocation size */ + int szSlot; /* Size of each free slot */ + int nSlot; /* The number of pcache slots */ + int nReserve; /* Try to keep nFreeSlot above this */ + void *pStart, *pEnd; /* Bounds of global page cache memory */ + /* Above requires no mutex. Use mutex below for variable that follow. */ + sqlite3_mutex *mutex; /* Mutex for accessing the following: */ + PgFreeslot *pFree; /* Free page blocks */ + int nFreeSlot; /* Number of unused pcache slots */ + /* The following value requires a mutex to change. We skip the mutex on + ** reading because (1) most platforms read a 32-bit integer atomically and + ** (2) even if an incorrect value is read, no great harm is done since this + ** is really just an optimization. */ + int bUnderPressure; /* True if low on PAGECACHE memory */ +} pcache1_g; + +/* +** All code in this file should access the global structure above via the +** alias "pcache1". This ensures that the WSD emulation is used when +** compiling for systems that do not support real WSD. +*/ +#define pcache1 (GLOBAL(struct PCacheGlobal, pcache1_g)) + +/* +** Macros to enter and leave the PCache LRU mutex. +*/ +#if !defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 +# define pcache1EnterMutex(X) assert((X)->mutex==0) +# define pcache1LeaveMutex(X) assert((X)->mutex==0) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 0 +#else +# define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) +# define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 +#endif + +/******************************************************************************/ +/******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ + + +/* +** This function is called during initialization if a static buffer is +** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE +** verb to sqlite3_config(). Parameter pBuf points to an allocation large +** enough to contain 'n' buffers of 'sz' bytes each. +** +** This routine is called from sqlite3_initialize() and so it is guaranteed +** to be serialized already. There is no need for further mutexing. +*/ +SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ + if( pcache1.isInit ){ + PgFreeslot *p; + if( pBuf==0 ) sz = n = 0; + if( n==0 ) sz = 0; + sz = ROUNDDOWN8(sz); + pcache1.szSlot = sz; + pcache1.nSlot = pcache1.nFreeSlot = n; + pcache1.nReserve = n>90 ? 10 : (n/10 + 1); + pcache1.pStart = pBuf; + pcache1.pFree = 0; + pcache1.bUnderPressure = 0; + while( n-- ){ + p = (PgFreeslot*)pBuf; + p->pNext = pcache1.pFree; + pcache1.pFree = p; + pBuf = (void*)&((char*)pBuf)[sz]; + } + pcache1.pEnd = pBuf; + } +} + +/* +** Try to initialize the pCache->pFree and pCache->pBulk fields. Return +** true if pCache->pFree ends up containing one or more free pages. +*/ +static int pcache1InitBulk(PCache1 *pCache){ + i64 szBulk; + char *zBulk; + if( pcache1.nInitPage==0 ) return 0; + /* Do not bother with a bulk allocation if the cache size very small */ + if( pCache->nMax<3 ) return 0; + sqlite3BeginBenignMalloc(); + if( pcache1.nInitPage>0 ){ + szBulk = pCache->szAlloc * (i64)pcache1.nInitPage; + }else{ + szBulk = -1024 * (i64)pcache1.nInitPage; + } + if( szBulk > pCache->szAlloc*(i64)pCache->nMax ){ + szBulk = pCache->szAlloc*(i64)pCache->nMax; + } + zBulk = pCache->pBulk = sqlite3Malloc( szBulk ); + sqlite3EndBenignMalloc(); + if( zBulk ){ + int nBulk = sqlite3MallocSize(zBulk)/pCache->szAlloc; + do{ + PgHdr1 *pX = (PgHdr1*)&zBulk[pCache->szPage]; + pX->page.pBuf = zBulk; + pX->page.pExtra = &pX[1]; + pX->isBulkLocal = 1; + pX->isAnchor = 0; + pX->pNext = pCache->pFree; + pX->pLruPrev = 0; /* Initializing this saves a valgrind error */ + pCache->pFree = pX; + zBulk += pCache->szAlloc; + }while( --nBulk ); + } + return pCache->pFree!=0; +} + +/* +** Malloc function used within this file to allocate space from the buffer +** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no +** such buffer exists or there is no space left in it, this function falls +** back to sqlite3Malloc(). +** +** Multiple threads can run this routine at the same time. Global variables +** in pcache1 need to be protected via mutex. +*/ +static void *pcache1Alloc(int nByte){ + void *p = 0; + assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); + if( nByte<=pcache1.szSlot ){ + sqlite3_mutex_enter(pcache1.mutex); + p = (PgHdr1 *)pcache1.pFree; + if( p ){ + pcache1.pFree = pcache1.pFree->pNext; + pcache1.nFreeSlot--; + pcache1.bUnderPressure = pcache1.nFreeSlot=0 ); + sqlite3StatusHighwater(SQLITE_STATUS_PAGECACHE_SIZE, nByte); + sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_USED, 1); + } + sqlite3_mutex_leave(pcache1.mutex); + } + if( p==0 ){ + /* Memory is not available in the SQLITE_CONFIG_PAGECACHE pool. Get + ** it from sqlite3Malloc instead. + */ + p = sqlite3Malloc(nByte); +#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS + if( p ){ + int sz = sqlite3MallocSize(p); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusHighwater(SQLITE_STATUS_PAGECACHE_SIZE, nByte); + sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); + sqlite3_mutex_leave(pcache1.mutex); + } +#endif + sqlite3MemdebugSetType(p, MEMTYPE_PCACHE); + } + return p; +} + +/* +** Free an allocated buffer obtained from pcache1Alloc(). +*/ +static void pcache1Free(void *p){ + if( p==0 ) return; + if( SQLITE_WITHIN(p, pcache1.pStart, pcache1.pEnd) ){ + PgFreeslot *pSlot; + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusDown(SQLITE_STATUS_PAGECACHE_USED, 1); + pSlot = (PgFreeslot*)p; + pSlot->pNext = pcache1.pFree; + pcache1.pFree = pSlot; + pcache1.nFreeSlot++; + pcache1.bUnderPressure = pcache1.nFreeSlot=pcache1.pStart && ppGroup->mutex) ); + if( pCache->pFree || (pCache->nPage==0 && pcache1InitBulk(pCache)) ){ + assert( pCache->pFree!=0 ); + p = pCache->pFree; + pCache->pFree = p->pNext; + p->pNext = 0; + }else{ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it might call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( pcache1.separateCache==0 ); + assert( pCache->pGroup==&pcache1.grp ); + pcache1LeaveMutex(pCache->pGroup); +#endif + if( benignMalloc ){ sqlite3BeginBenignMalloc(); } +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + pPg = pcache1Alloc(pCache->szPage); + p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); + if( !pPg || !p ){ + pcache1Free(pPg); + sqlite3_free(p); + pPg = 0; + } +#else + pPg = pcache1Alloc(pCache->szAlloc); +#endif + if( benignMalloc ){ sqlite3EndBenignMalloc(); } +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + pcache1EnterMutex(pCache->pGroup); +#endif + if( pPg==0 ) return 0; +#ifndef SQLITE_PCACHE_SEPARATE_HEADER + p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; +#endif + p->page.pBuf = pPg; + p->page.pExtra = &p[1]; + p->isBulkLocal = 0; + p->isAnchor = 0; + p->pLruPrev = 0; /* Initializing this saves a valgrind error */ + } + (*pCache->pnPurgeable)++; + return p; +} + +/* +** Free a page object allocated by pcache1AllocPage(). +*/ +static void pcache1FreePage(PgHdr1 *p){ + PCache1 *pCache; + assert( p!=0 ); + pCache = p->pCache; + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + if( p->isBulkLocal ){ + p->pNext = pCache->pFree; + pCache->pFree = p; + }else{ + pcache1Free(p->page.pBuf); +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + sqlite3_free(p); +#endif + } + (*pCache->pnPurgeable)--; +} + +/* +** Malloc function used by SQLite to obtain space from the buffer configured +** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer +** exists, this function falls back to sqlite3Malloc(). +*/ +SQLITE_PRIVATE void *sqlite3PageMalloc(int sz){ + assert( sz<=65536+8 ); /* These allocations are never very large */ + return pcache1Alloc(sz); +} + +/* +** Free an allocated buffer obtained from sqlite3PageMalloc(). +*/ +SQLITE_PRIVATE void sqlite3PageFree(void *p){ + pcache1Free(p); +} + + +/* +** Return true if it desirable to avoid allocating a new page cache +** entry. +** +** If memory was allocated specifically to the page cache using +** SQLITE_CONFIG_PAGECACHE but that memory has all been used, then +** it is desirable to avoid allocating a new page cache entry because +** presumably SQLITE_CONFIG_PAGECACHE was suppose to be sufficient +** for all page cache needs and we should not need to spill the +** allocation onto the heap. +** +** Or, the heap is used for all page cache memory but the heap is +** under memory pressure, then again it is desirable to avoid +** allocating a new page cache entry in order to avoid stressing +** the heap even further. +*/ +static int pcache1UnderMemoryPressure(PCache1 *pCache){ + if( pcache1.nSlot && (pCache->szPage+pCache->szExtra)<=pcache1.szSlot ){ + return pcache1.bUnderPressure; + }else{ + return sqlite3HeapNearlyFull(); + } +} + +/******************************************************************************/ +/******** General Implementation Functions ************************************/ + +/* +** This function is used to resize the hash table used by the cache passed +** as the first argument. +** +** The PCache mutex must be held when this function is called. +*/ +static void pcache1ResizeHash(PCache1 *p){ + PgHdr1 **apNew; + unsigned int nNew; + unsigned int i; + + assert( sqlite3_mutex_held(p->pGroup->mutex) ); + + nNew = p->nHash*2; + if( nNew<256 ){ + nNew = 256; + } + + pcache1LeaveMutex(p->pGroup); + if( p->nHash ){ sqlite3BeginBenignMalloc(); } + apNew = (PgHdr1 **)sqlite3MallocZero(sizeof(PgHdr1 *)*nNew); + if( p->nHash ){ sqlite3EndBenignMalloc(); } + pcache1EnterMutex(p->pGroup); + if( apNew ){ + for(i=0; inHash; i++){ + PgHdr1 *pPage; + PgHdr1 *pNext = p->apHash[i]; + while( (pPage = pNext)!=0 ){ + unsigned int h = pPage->iKey % nNew; + pNext = pPage->pNext; + pPage->pNext = apNew[h]; + apNew[h] = pPage; + } + } + sqlite3_free(p->apHash); + p->apHash = apNew; + p->nHash = nNew; + } +} + +/* +** This function is used internally to remove the page pPage from the +** PGroup LRU list, if is part of it. If pPage is not part of the PGroup +** LRU list, then this function is a no-op. +** +** The PGroup mutex must be held when this function is called. +*/ +static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ + assert( pPage!=0 ); + assert( PAGE_IS_UNPINNED(pPage) ); + assert( pPage->pLruNext ); + assert( pPage->pLruPrev ); + assert( sqlite3_mutex_held(pPage->pCache->pGroup->mutex) ); + pPage->pLruPrev->pLruNext = pPage->pLruNext; + pPage->pLruNext->pLruPrev = pPage->pLruPrev; + pPage->pLruNext = 0; + /* pPage->pLruPrev = 0; + ** No need to clear pLruPrev as it is never accessed if pLruNext is 0 */ + assert( pPage->isAnchor==0 ); + assert( pPage->pCache->pGroup->lru.isAnchor==1 ); + pPage->pCache->nRecyclable--; + return pPage; +} + + +/* +** Remove the page supplied as an argument from the hash table +** (PCache1.apHash structure) that it is currently stored in. +** Also free the page if freePage is true. +** +** The PGroup mutex must be held when this function is called. +*/ +static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){ + unsigned int h; + PCache1 *pCache = pPage->pCache; + PgHdr1 **pp; + + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + h = pPage->iKey % pCache->nHash; + for(pp=&pCache->apHash[h]; (*pp)!=pPage; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + + pCache->nPage--; + if( freeFlag ) pcache1FreePage(pPage); +} + +/* +** If there are currently more than nMaxPage pages allocated, try +** to recycle pages to reduce the number allocated to nMaxPage. +*/ +static void pcache1EnforceMaxPage(PCache1 *pCache){ + PGroup *pGroup = pCache->pGroup; + PgHdr1 *p; + assert( sqlite3_mutex_held(pGroup->mutex) ); + while( pGroup->nPurgeable>pGroup->nMaxPage + && (p=pGroup->lru.pLruPrev)->isAnchor==0 + ){ + assert( p->pCache->pGroup==pGroup ); + assert( PAGE_IS_UNPINNED(p) ); + pcache1PinPage(p); + pcache1RemoveFromHash(p, 1); + } + if( pCache->nPage==0 && pCache->pBulk ){ + sqlite3_free(pCache->pBulk); + pCache->pBulk = pCache->pFree = 0; + } +} + +/* +** Discard all pages from cache pCache with a page number (key value) +** greater than or equal to iLimit. Any pinned pages that meet this +** criteria are unpinned before they are discarded. +** +** The PCache mutex must be held when this function is called. +*/ +static void pcache1TruncateUnsafe( + PCache1 *pCache, /* The cache to truncate */ + unsigned int iLimit /* Drop pages with this pgno or larger */ +){ + TESTONLY( int nPage = 0; ) /* To assert pCache->nPage is correct */ + unsigned int h, iStop; + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + assert( pCache->iMaxKey >= iLimit ); + assert( pCache->nHash > 0 ); + if( pCache->iMaxKey - iLimit < pCache->nHash ){ + /* If we are just shaving the last few pages off the end of the + ** cache, then there is no point in scanning the entire hash table. + ** Only scan those hash slots that might contain pages that need to + ** be removed. */ + h = iLimit % pCache->nHash; + iStop = pCache->iMaxKey % pCache->nHash; + TESTONLY( nPage = -10; ) /* Disable the pCache->nPage validity check */ + }else{ + /* This is the general case where many pages are being removed. + ** It is necessary to scan the entire hash table */ + h = pCache->nHash/2; + iStop = h - 1; + } + for(;;){ + PgHdr1 **pp; + PgHdr1 *pPage; + assert( hnHash ); + pp = &pCache->apHash[h]; + while( (pPage = *pp)!=0 ){ + if( pPage->iKey>=iLimit ){ + pCache->nPage--; + *pp = pPage->pNext; + if( PAGE_IS_UNPINNED(pPage) ) pcache1PinPage(pPage); + pcache1FreePage(pPage); + }else{ + pp = &pPage->pNext; + TESTONLY( if( nPage>=0 ) nPage++; ) + } + } + if( h==iStop ) break; + h = (h+1) % pCache->nHash; + } + assert( nPage<0 || pCache->nPage==(unsigned)nPage ); +} + +/******************************************************************************/ +/******** sqlite3_pcache Methods **********************************************/ + +/* +** Implementation of the sqlite3_pcache.xInit method. +*/ +static int pcache1Init(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + assert( pcache1.isInit==0 ); + memset(&pcache1, 0, sizeof(pcache1)); + + + /* + ** The pcache1.separateCache variable is true if each PCache has its own + ** private PGroup (mode-1). pcache1.separateCache is false if the single + ** PGroup in pcache1.grp is used for all page caches (mode-2). + ** + ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT + ** + ** * Use a unified cache in single-threaded applications that have + ** configured a start-time buffer for use as page-cache memory using + ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL + ** pBuf argument. + ** + ** * Otherwise use separate caches (mode-1) + */ +#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) + pcache1.separateCache = 0; +#elif SQLITE_THREADSAFE + pcache1.separateCache = sqlite3GlobalConfig.pPage==0 + || sqlite3GlobalConfig.bCoreMutex>0; +#else + pcache1.separateCache = sqlite3GlobalConfig.pPage==0; +#endif + +#if SQLITE_THREADSAFE + if( sqlite3GlobalConfig.bCoreMutex ){ + pcache1.grp.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU); + pcache1.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_PMEM); + } +#endif + if( pcache1.separateCache + && sqlite3GlobalConfig.nPage!=0 + && sqlite3GlobalConfig.pPage==0 + ){ + pcache1.nInitPage = sqlite3GlobalConfig.nPage; + }else{ + pcache1.nInitPage = 0; + } + pcache1.grp.mxPinned = 10; + pcache1.isInit = 1; + return SQLITE_OK; +} + +/* +** Implementation of the sqlite3_pcache.xShutdown method. +** Note that the static mutex allocated in xInit does +** not need to be freed. +*/ +static void pcache1Shutdown(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + assert( pcache1.isInit!=0 ); + memset(&pcache1, 0, sizeof(pcache1)); +} + +/* forward declaration */ +static void pcache1Destroy(sqlite3_pcache *p); + +/* +** Implementation of the sqlite3_pcache.xCreate method. +** +** Allocate a new cache. +*/ +static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ + PCache1 *pCache; /* The newly created page cache */ + PGroup *pGroup; /* The group the new page cache will belong to */ + int sz; /* Bytes of memory required to allocate the new cache */ + + assert( (szPage & (szPage-1))==0 && szPage>=512 && szPage<=65536 ); + assert( szExtra < 300 ); + + sz = sizeof(PCache1) + sizeof(PGroup)*pcache1.separateCache; + pCache = (PCache1 *)sqlite3MallocZero(sz); + if( pCache ){ + if( pcache1.separateCache ){ + pGroup = (PGroup*)&pCache[1]; + pGroup->mxPinned = 10; + }else{ + pGroup = &pcache1.grp; + } + pcache1EnterMutex(pGroup); + if( pGroup->lru.isAnchor==0 ){ + pGroup->lru.isAnchor = 1; + pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru; + } + pCache->pGroup = pGroup; + pCache->szPage = szPage; + pCache->szExtra = szExtra; + pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); + pCache->bPurgeable = (bPurgeable ? 1 : 0); + pcache1ResizeHash(pCache); + if( bPurgeable ){ + pCache->nMin = 10; + pGroup->nMinPage += pCache->nMin; + pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; + pCache->pnPurgeable = &pGroup->nPurgeable; + }else{ + pCache->pnPurgeable = &pCache->nPurgeableDummy; + } + pcache1LeaveMutex(pGroup); + if( pCache->nHash==0 ){ + pcache1Destroy((sqlite3_pcache*)pCache); + pCache = 0; + } + } + return (sqlite3_pcache *)pCache; +} + +/* +** Implementation of the sqlite3_pcache.xCachesize method. +** +** Configure the cache_size limit for a cache. +*/ +static void pcache1Cachesize(sqlite3_pcache *p, int nMax){ + PCache1 *pCache = (PCache1 *)p; + u32 n; + assert( nMax>=0 ); + if( pCache->bPurgeable ){ + PGroup *pGroup = pCache->pGroup; + pcache1EnterMutex(pGroup); + n = (u32)nMax; + if( n > 0x7fff0000 - pGroup->nMaxPage + pCache->nMax ){ + n = 0x7fff0000 - pGroup->nMaxPage + pCache->nMax; + } + pGroup->nMaxPage += (n - pCache->nMax); + pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; + pCache->nMax = n; + pCache->n90pct = pCache->nMax*9/10; + pcache1EnforceMaxPage(pCache); + pcache1LeaveMutex(pGroup); + } +} + +/* +** Implementation of the sqlite3_pcache.xShrink method. +** +** Free up as much memory as possible. +*/ +static void pcache1Shrink(sqlite3_pcache *p){ + PCache1 *pCache = (PCache1*)p; + if( pCache->bPurgeable ){ + PGroup *pGroup = pCache->pGroup; + unsigned int savedMaxPage; + pcache1EnterMutex(pGroup); + savedMaxPage = pGroup->nMaxPage; + pGroup->nMaxPage = 0; + pcache1EnforceMaxPage(pCache); + pGroup->nMaxPage = savedMaxPage; + pcache1LeaveMutex(pGroup); + } +} + +/* +** Implementation of the sqlite3_pcache.xPagecount method. +*/ +static int pcache1Pagecount(sqlite3_pcache *p){ + int n; + PCache1 *pCache = (PCache1*)p; + pcache1EnterMutex(pCache->pGroup); + n = pCache->nPage; + pcache1LeaveMutex(pCache->pGroup); + return n; +} + + +/* +** Implement steps 3, 4, and 5 of the pcache1Fetch() algorithm described +** in the header of the pcache1Fetch() procedure. +** +** This steps are broken out into a separate procedure because they are +** usually not needed, and by avoiding the stack initialization required +** for these steps, the main pcache1Fetch() procedure can run faster. +*/ +static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( + PCache1 *pCache, + unsigned int iKey, + int createFlag +){ + unsigned int nPinned; + PGroup *pGroup = pCache->pGroup; + PgHdr1 *pPage = 0; + + /* Step 3: Abort if createFlag is 1 but the cache is nearly full */ + assert( pCache->nPage >= pCache->nRecyclable ); + nPinned = pCache->nPage - pCache->nRecyclable; + assert( pGroup->mxPinned == pGroup->nMaxPage + 10 - pGroup->nMinPage ); + assert( pCache->n90pct == pCache->nMax*9/10 ); + if( createFlag==1 && ( + nPinned>=pGroup->mxPinned + || nPinned>=pCache->n90pct + || (pcache1UnderMemoryPressure(pCache) && pCache->nRecyclablenPage>=pCache->nHash ) pcache1ResizeHash(pCache); + assert( pCache->nHash>0 && pCache->apHash ); + + /* Step 4. Try to recycle a page. */ + if( pCache->bPurgeable + && !pGroup->lru.pLruPrev->isAnchor + && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache)) + ){ + PCache1 *pOther; + pPage = pGroup->lru.pLruPrev; + assert( PAGE_IS_UNPINNED(pPage) ); + pcache1RemoveFromHash(pPage, 0); + pcache1PinPage(pPage); + pOther = pPage->pCache; + if( pOther->szAlloc != pCache->szAlloc ){ + pcache1FreePage(pPage); + pPage = 0; + }else{ + pGroup->nPurgeable -= (pOther->bPurgeable - pCache->bPurgeable); + } + } + + /* Step 5. If a usable page buffer has still not been found, + ** attempt to allocate a new one. + */ + if( !pPage ){ + pPage = pcache1AllocPage(pCache, createFlag==1); + } + + if( pPage ){ + unsigned int h = iKey % pCache->nHash; + pCache->nPage++; + pPage->iKey = iKey; + pPage->pNext = pCache->apHash[h]; + pPage->pCache = pCache; + pPage->pLruNext = 0; + /* pPage->pLruPrev = 0; + ** No need to clear pLruPrev since it is not accessed when pLruNext==0 */ + *(void **)pPage->page.pExtra = 0; + pCache->apHash[h] = pPage; + if( iKey>pCache->iMaxKey ){ + pCache->iMaxKey = iKey; + } + } + return pPage; +} + +/* +** Implementation of the sqlite3_pcache.xFetch method. +** +** Fetch a page by key value. +** +** Whether or not a new page may be allocated by this function depends on +** the value of the createFlag argument. 0 means do not allocate a new +** page. 1 means allocate a new page if space is easily available. 2 +** means to try really hard to allocate a new page. +** +** For a non-purgeable cache (a cache used as the storage for an in-memory +** database) there is really no difference between createFlag 1 and 2. So +** the calling function (pcache.c) will never have a createFlag of 1 on +** a non-purgeable cache. +** +** There are three different approaches to obtaining space for a page, +** depending on the value of parameter createFlag (which may be 0, 1 or 2). +** +** 1. Regardless of the value of createFlag, the cache is searched for a +** copy of the requested page. If one is found, it is returned. +** +** 2. If createFlag==0 and the page is not already in the cache, NULL is +** returned. +** +** 3. If createFlag is 1, and the page is not already in the cache, then +** return NULL (do not allocate a new page) if any of the following +** conditions are true: +** +** (a) the number of pages pinned by the cache is greater than +** PCache1.nMax, or +** +** (b) the number of pages pinned by the cache is greater than +** the sum of nMax for all purgeable caches, less the sum of +** nMin for all other purgeable caches, or +** +** 4. If none of the first three conditions apply and the cache is marked +** as purgeable, and if one of the following is true: +** +** (a) The number of pages allocated for the cache is already +** PCache1.nMax, or +** +** (b) The number of pages allocated for all purgeable caches is +** already equal to or greater than the sum of nMax for all +** purgeable caches, +** +** (c) The system is under memory pressure and wants to avoid +** unnecessary pages cache entry allocations +** +** then attempt to recycle a page from the LRU list. If it is the right +** size, return the recycled buffer. Otherwise, free the buffer and +** proceed to step 5. +** +** 5. Otherwise, allocate and return a new page buffer. +** +** There are two versions of this routine. pcache1FetchWithMutex() is +** the general case. pcache1FetchNoMutex() is a faster implementation for +** the common case where pGroup->mutex is NULL. The pcache1Fetch() wrapper +** invokes the appropriate routine. +*/ +static PgHdr1 *pcache1FetchNoMutex( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage = 0; + + /* Step 1: Search the hash table for an existing entry. */ + pPage = pCache->apHash[iKey % pCache->nHash]; + while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; } + + /* Step 2: If the page was found in the hash table, then return it. + ** If the page was not in the hash table and createFlag is 0, abort. + ** Otherwise (page not in hash and createFlag!=0) continue with + ** subsequent steps to try to create the page. */ + if( pPage ){ + if( PAGE_IS_UNPINNED(pPage) ){ + return pcache1PinPage(pPage); + }else{ + return pPage; + } + }else if( createFlag ){ + /* Steps 3, 4, and 5 implemented by this subroutine */ + return pcache1FetchStage2(pCache, iKey, createFlag); + }else{ + return 0; + } +} +#if PCACHE1_MIGHT_USE_GROUP_MUTEX +static PgHdr1 *pcache1FetchWithMutex( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage; + + pcache1EnterMutex(pCache->pGroup); + pPage = pcache1FetchNoMutex(p, iKey, createFlag); + assert( pPage==0 || pCache->iMaxKey>=iKey ); + pcache1LeaveMutex(pCache->pGroup); + return pPage; +} +#endif +static sqlite3_pcache_page *pcache1Fetch( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ +#if PCACHE1_MIGHT_USE_GROUP_MUTEX || defined(SQLITE_DEBUG) + PCache1 *pCache = (PCache1 *)p; +#endif + + assert( offsetof(PgHdr1,page)==0 ); + assert( pCache->bPurgeable || createFlag!=1 ); + assert( pCache->bPurgeable || pCache->nMin==0 ); + assert( pCache->bPurgeable==0 || pCache->nMin==10 ); + assert( pCache->nMin==0 || pCache->bPurgeable ); + assert( pCache->nHash>0 ); +#if PCACHE1_MIGHT_USE_GROUP_MUTEX + if( pCache->pGroup->mutex ){ + return (sqlite3_pcache_page*)pcache1FetchWithMutex(p, iKey, createFlag); + }else +#endif + { + return (sqlite3_pcache_page*)pcache1FetchNoMutex(p, iKey, createFlag); + } +} + + +/* +** Implementation of the sqlite3_pcache.xUnpin method. +** +** Mark a page as unpinned (eligible for asynchronous recycling). +*/ +static void pcache1Unpin( + sqlite3_pcache *p, + sqlite3_pcache_page *pPg, + int reuseUnlikely +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage = (PgHdr1 *)pPg; + PGroup *pGroup = pCache->pGroup; + + assert( pPage->pCache==pCache ); + pcache1EnterMutex(pGroup); + + /* It is an error to call this function if the page is already + ** part of the PGroup LRU list. + */ + assert( pPage->pLruNext==0 ); + assert( PAGE_IS_PINNED(pPage) ); + + if( reuseUnlikely || pGroup->nPurgeable>pGroup->nMaxPage ){ + pcache1RemoveFromHash(pPage, 1); + }else{ + /* Add the page to the PGroup LRU list. */ + PgHdr1 **ppFirst = &pGroup->lru.pLruNext; + pPage->pLruPrev = &pGroup->lru; + (pPage->pLruNext = *ppFirst)->pLruPrev = pPage; + *ppFirst = pPage; + pCache->nRecyclable++; + } + + pcache1LeaveMutex(pCache->pGroup); +} + +/* +** Implementation of the sqlite3_pcache.xRekey method. +*/ +static void pcache1Rekey( + sqlite3_pcache *p, + sqlite3_pcache_page *pPg, + unsigned int iOld, + unsigned int iNew +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage = (PgHdr1 *)pPg; + PgHdr1 **pp; + unsigned int h; + assert( pPage->iKey==iOld ); + assert( pPage->pCache==pCache ); + + pcache1EnterMutex(pCache->pGroup); + + h = iOld%pCache->nHash; + pp = &pCache->apHash[h]; + while( (*pp)!=pPage ){ + pp = &(*pp)->pNext; + } + *pp = pPage->pNext; + + h = iNew%pCache->nHash; + pPage->iKey = iNew; + pPage->pNext = pCache->apHash[h]; + pCache->apHash[h] = pPage; + if( iNew>pCache->iMaxKey ){ + pCache->iMaxKey = iNew; + } + + pcache1LeaveMutex(pCache->pGroup); +} + +/* +** Implementation of the sqlite3_pcache.xTruncate method. +** +** Discard all unpinned pages in the cache with a page number equal to +** or greater than parameter iLimit. Any pinned pages with a page number +** equal to or greater than iLimit are implicitly unpinned. +*/ +static void pcache1Truncate(sqlite3_pcache *p, unsigned int iLimit){ + PCache1 *pCache = (PCache1 *)p; + pcache1EnterMutex(pCache->pGroup); + if( iLimit<=pCache->iMaxKey ){ + pcache1TruncateUnsafe(pCache, iLimit); + pCache->iMaxKey = iLimit-1; + } + pcache1LeaveMutex(pCache->pGroup); +} + +/* +** Implementation of the sqlite3_pcache.xDestroy method. +** +** Destroy a cache allocated using pcache1Create(). +*/ +static void pcache1Destroy(sqlite3_pcache *p){ + PCache1 *pCache = (PCache1 *)p; + PGroup *pGroup = pCache->pGroup; + assert( pCache->bPurgeable || (pCache->nMax==0 && pCache->nMin==0) ); + pcache1EnterMutex(pGroup); + if( pCache->nPage ) pcache1TruncateUnsafe(pCache, 0); + assert( pGroup->nMaxPage >= pCache->nMax ); + pGroup->nMaxPage -= pCache->nMax; + assert( pGroup->nMinPage >= pCache->nMin ); + pGroup->nMinPage -= pCache->nMin; + pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; + pcache1EnforceMaxPage(pCache); + pcache1LeaveMutex(pGroup); + sqlite3_free(pCache->pBulk); + sqlite3_free(pCache->apHash); + sqlite3_free(pCache); +} + +/* +** This function is called during initialization (sqlite3_initialize()) to +** install the default pluggable cache module, assuming the user has not +** already provided an alternative. +*/ +SQLITE_PRIVATE void sqlite3PCacheSetDefault(void){ + static const sqlite3_pcache_methods2 defaultMethods = { + 1, /* iVersion */ + 0, /* pArg */ + pcache1Init, /* xInit */ + pcache1Shutdown, /* xShutdown */ + pcache1Create, /* xCreate */ + pcache1Cachesize, /* xCachesize */ + pcache1Pagecount, /* xPagecount */ + pcache1Fetch, /* xFetch */ + pcache1Unpin, /* xUnpin */ + pcache1Rekey, /* xRekey */ + pcache1Truncate, /* xTruncate */ + pcache1Destroy, /* xDestroy */ + pcache1Shrink /* xShrink */ + }; + sqlite3_config(SQLITE_CONFIG_PCACHE2, &defaultMethods); +} + +/* +** Return the size of the header on each page of this PCACHE implementation. +*/ +SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void){ return ROUND8(sizeof(PgHdr1)); } + +/* +** Return the global mutex used by this PCACHE implementation. The +** sqlite3_status() routine needs access to this mutex. +*/ +SQLITE_PRIVATE sqlite3_mutex *sqlite3Pcache1Mutex(void){ + return pcache1.mutex; +} + +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT +/* +** This function is called to free superfluous dynamically allocated memory +** held by the pager system. Memory in use by any SQLite pager allocated +** by the current thread may be sqlite3_free()ed. +** +** nReq is the number of bytes of memory required. Once this much has +** been released, the function returns. The return value is the total number +** of bytes of memory released. +*/ +SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){ + int nFree = 0; + assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); + assert( sqlite3_mutex_notheld(pcache1.mutex) ); + if( sqlite3GlobalConfig.pPage==0 ){ + PgHdr1 *p; + pcache1EnterMutex(&pcache1.grp); + while( (nReq<0 || nFreeisAnchor==0 + ){ + nFree += pcache1MemSize(p->page.pBuf); +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + nFree += sqlite3MemSize(p); +#endif + assert( PAGE_IS_UNPINNED(p) ); + pcache1PinPage(p); + pcache1RemoveFromHash(p, 1); + } + pcache1LeaveMutex(&pcache1.grp); + } + return nFree; +} +#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ + +#ifdef SQLITE_TEST +/* +** This function is used by test procedures to inspect the internal state +** of the global cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheStats( + int *pnCurrent, /* OUT: Total number of pages cached */ + int *pnMax, /* OUT: Global maximum cache size */ + int *pnMin, /* OUT: Sum of PCache1.nMin for purgeable caches */ + int *pnRecyclable /* OUT: Total number of pages available for recycling */ +){ + PgHdr1 *p; + int nRecyclable = 0; + for(p=pcache1.grp.lru.pLruNext; p && !p->isAnchor; p=p->pLruNext){ + assert( PAGE_IS_UNPINNED(p) ); + nRecyclable++; + } + *pnCurrent = pcache1.grp.nPurgeable; + *pnMax = (int)pcache1.grp.nMaxPage; + *pnMin = (int)pcache1.grp.nMinPage; + *pnRecyclable = nRecyclable; +} +#endif + +/************** End of pcache1.c *********************************************/ +/************** Begin file rowset.c ******************************************/ +/* +** 2008 December 3 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This module implements an object we call a "RowSet". +** +** The RowSet object is a collection of rowids. Rowids +** are inserted into the RowSet in an arbitrary order. Inserts +** can be intermixed with tests to see if a given rowid has been +** previously inserted into the RowSet. +** +** After all inserts are finished, it is possible to extract the +** elements of the RowSet in sorted order. Once this extraction +** process has started, no new elements may be inserted. +** +** Hence, the primitive operations for a RowSet are: +** +** CREATE +** INSERT +** TEST +** SMALLEST +** DESTROY +** +** The CREATE and DESTROY primitives are the constructor and destructor, +** obviously. The INSERT primitive adds a new element to the RowSet. +** TEST checks to see if an element is already in the RowSet. SMALLEST +** extracts the least value from the RowSet. +** +** The INSERT primitive might allocate additional memory. Memory is +** allocated in chunks so most INSERTs do no allocation. There is an +** upper bound on the size of allocated memory. No memory is freed +** until DESTROY. +** +** The TEST primitive includes a "batch" number. The TEST primitive +** will only see elements that were inserted before the last change +** in the batch number. In other words, if an INSERT occurs between +** two TESTs where the TESTs have the same batch nubmer, then the +** value added by the INSERT will not be visible to the second TEST. +** The initial batch number is zero, so if the very first TEST contains +** a non-zero batch number, it will see all prior INSERTs. +** +** No INSERTs may occurs after a SMALLEST. An assertion will fail if +** that is attempted. +** +** The cost of an INSERT is roughly constant. (Sometimes new memory +** has to be allocated on an INSERT.) The cost of a TEST with a new +** batch number is O(NlogN) where N is the number of elements in the RowSet. +** The cost of a TEST using the same batch number is O(logN). The cost +** of the first SMALLEST is O(NlogN). Second and subsequent SMALLEST +** primitives are constant time. The cost of DESTROY is O(N). +** +** TEST and SMALLEST may not be used by the same RowSet. This used to +** be possible, but the feature was not used, so it was removed in order +** to simplify the code. +*/ +/* #include "sqliteInt.h" */ + + +/* +** Target size for allocation chunks. +*/ +#define ROWSET_ALLOCATION_SIZE 1024 + +/* +** The number of rowset entries per allocation chunk. +*/ +#define ROWSET_ENTRY_PER_CHUNK \ + ((ROWSET_ALLOCATION_SIZE-8)/sizeof(struct RowSetEntry)) + +/* +** Each entry in a RowSet is an instance of the following object. +** +** This same object is reused to store a linked list of trees of RowSetEntry +** objects. In that alternative use, pRight points to the next entry +** in the list, pLeft points to the tree, and v is unused. The +** RowSet.pForest value points to the head of this forest list. +*/ +struct RowSetEntry { + i64 v; /* ROWID value for this entry */ + struct RowSetEntry *pRight; /* Right subtree (larger entries) or list */ + struct RowSetEntry *pLeft; /* Left subtree (smaller entries) */ +}; + +/* +** RowSetEntry objects are allocated in large chunks (instances of the +** following structure) to reduce memory allocation overhead. The +** chunks are kept on a linked list so that they can be deallocated +** when the RowSet is destroyed. +*/ +struct RowSetChunk { + struct RowSetChunk *pNextChunk; /* Next chunk on list of them all */ + struct RowSetEntry aEntry[ROWSET_ENTRY_PER_CHUNK]; /* Allocated entries */ +}; + +/* +** A RowSet in an instance of the following structure. +** +** A typedef of this structure if found in sqliteInt.h. +*/ +struct RowSet { + struct RowSetChunk *pChunk; /* List of all chunk allocations */ + sqlite3 *db; /* The database connection */ + struct RowSetEntry *pEntry; /* List of entries using pRight */ + struct RowSetEntry *pLast; /* Last entry on the pEntry list */ + struct RowSetEntry *pFresh; /* Source of new entry objects */ + struct RowSetEntry *pForest; /* List of binary trees of entries */ + u16 nFresh; /* Number of objects on pFresh */ + u16 rsFlags; /* Various flags */ + int iBatch; /* Current insert batch */ +}; + +/* +** Allowed values for RowSet.rsFlags +*/ +#define ROWSET_SORTED 0x01 /* True if RowSet.pEntry is sorted */ +#define ROWSET_NEXT 0x02 /* True if sqlite3RowSetNext() has been called */ + +/* +** Allocate a RowSet object. Return NULL if a memory allocation +** error occurs. +*/ +SQLITE_PRIVATE RowSet *sqlite3RowSetInit(sqlite3 *db){ + RowSet *p = sqlite3DbMallocRawNN(db, sizeof(*p)); + if( p ){ + int N = sqlite3DbMallocSize(db, p); + p->pChunk = 0; + p->db = db; + p->pEntry = 0; + p->pLast = 0; + p->pForest = 0; + p->pFresh = (struct RowSetEntry*)(ROUND8(sizeof(*p)) + (char*)p); + p->nFresh = (u16)((N - ROUND8(sizeof(*p)))/sizeof(struct RowSetEntry)); + p->rsFlags = ROWSET_SORTED; + p->iBatch = 0; + } + return p; +} + +/* +** Deallocate all chunks from a RowSet. This frees all memory that +** the RowSet has allocated over its lifetime. This routine is +** the destructor for the RowSet. +*/ +SQLITE_PRIVATE void sqlite3RowSetClear(void *pArg){ + RowSet *p = (RowSet*)pArg; + struct RowSetChunk *pChunk, *pNextChunk; + for(pChunk=p->pChunk; pChunk; pChunk = pNextChunk){ + pNextChunk = pChunk->pNextChunk; + sqlite3DbFree(p->db, pChunk); + } + p->pChunk = 0; + p->nFresh = 0; + p->pEntry = 0; + p->pLast = 0; + p->pForest = 0; + p->rsFlags = ROWSET_SORTED; +} + +/* +** Deallocate all chunks from a RowSet. This frees all memory that +** the RowSet has allocated over its lifetime. This routine is +** the destructor for the RowSet. +*/ +SQLITE_PRIVATE void sqlite3RowSetDelete(void *pArg){ + sqlite3RowSetClear(pArg); + sqlite3DbFree(((RowSet*)pArg)->db, pArg); +} + +/* +** Allocate a new RowSetEntry object that is associated with the +** given RowSet. Return a pointer to the new and completely uninitialized +** object. +** +** In an OOM situation, the RowSet.db->mallocFailed flag is set and this +** routine returns NULL. +*/ +static struct RowSetEntry *rowSetEntryAlloc(RowSet *p){ + assert( p!=0 ); + if( p->nFresh==0 ){ /*OPTIMIZATION-IF-FALSE*/ + /* We could allocate a fresh RowSetEntry each time one is needed, but it + ** is more efficient to pull a preallocated entry from the pool */ + struct RowSetChunk *pNew; + pNew = sqlite3DbMallocRawNN(p->db, sizeof(*pNew)); + if( pNew==0 ){ + return 0; + } + pNew->pNextChunk = p->pChunk; + p->pChunk = pNew; + p->pFresh = pNew->aEntry; + p->nFresh = ROWSET_ENTRY_PER_CHUNK; + } + p->nFresh--; + return p->pFresh++; +} + +/* +** Insert a new value into a RowSet. +** +** The mallocFailed flag of the database connection is set if a +** memory allocation fails. +*/ +SQLITE_PRIVATE void sqlite3RowSetInsert(RowSet *p, i64 rowid){ + struct RowSetEntry *pEntry; /* The new entry */ + struct RowSetEntry *pLast; /* The last prior entry */ + + /* This routine is never called after sqlite3RowSetNext() */ + assert( p!=0 && (p->rsFlags & ROWSET_NEXT)==0 ); + + pEntry = rowSetEntryAlloc(p); + if( pEntry==0 ) return; + pEntry->v = rowid; + pEntry->pRight = 0; + pLast = p->pLast; + if( pLast ){ + if( rowid<=pLast->v ){ /*OPTIMIZATION-IF-FALSE*/ + /* Avoid unnecessary sorts by preserving the ROWSET_SORTED flags + ** where possible */ + p->rsFlags &= ~ROWSET_SORTED; + } + pLast->pRight = pEntry; + }else{ + p->pEntry = pEntry; + } + p->pLast = pEntry; +} + +/* +** Merge two lists of RowSetEntry objects. Remove duplicates. +** +** The input lists are connected via pRight pointers and are +** assumed to each already be in sorted order. +*/ +static struct RowSetEntry *rowSetEntryMerge( + struct RowSetEntry *pA, /* First sorted list to be merged */ + struct RowSetEntry *pB /* Second sorted list to be merged */ +){ + struct RowSetEntry head; + struct RowSetEntry *pTail; + + pTail = &head; + assert( pA!=0 && pB!=0 ); + for(;;){ + assert( pA->pRight==0 || pA->v<=pA->pRight->v ); + assert( pB->pRight==0 || pB->v<=pB->pRight->v ); + if( pA->v<=pB->v ){ + if( pA->vv ) pTail = pTail->pRight = pA; + pA = pA->pRight; + if( pA==0 ){ + pTail->pRight = pB; + break; + } + }else{ + pTail = pTail->pRight = pB; + pB = pB->pRight; + if( pB==0 ){ + pTail->pRight = pA; + break; + } + } + } + return head.pRight; +} + +/* +** Sort all elements on the list of RowSetEntry objects into order of +** increasing v. +*/ +static struct RowSetEntry *rowSetEntrySort(struct RowSetEntry *pIn){ + unsigned int i; + struct RowSetEntry *pNext, *aBucket[40]; + + memset(aBucket, 0, sizeof(aBucket)); + while( pIn ){ + pNext = pIn->pRight; + pIn->pRight = 0; + for(i=0; aBucket[i]; i++){ + pIn = rowSetEntryMerge(aBucket[i], pIn); + aBucket[i] = 0; + } + aBucket[i] = pIn; + pIn = pNext; + } + pIn = aBucket[0]; + for(i=1; ipLeft ){ + struct RowSetEntry *p; + rowSetTreeToList(pIn->pLeft, ppFirst, &p); + p->pRight = pIn; + }else{ + *ppFirst = pIn; + } + if( pIn->pRight ){ + rowSetTreeToList(pIn->pRight, &pIn->pRight, ppLast); + }else{ + *ppLast = pIn; + } + assert( (*ppLast)->pRight==0 ); +} + + +/* +** Convert a sorted list of elements (connected by pRight) into a binary +** tree with depth of iDepth. A depth of 1 means the tree contains a single +** node taken from the head of *ppList. A depth of 2 means a tree with +** three nodes. And so forth. +** +** Use as many entries from the input list as required and update the +** *ppList to point to the unused elements of the list. If the input +** list contains too few elements, then construct an incomplete tree +** and leave *ppList set to NULL. +** +** Return a pointer to the root of the constructed binary tree. +*/ +static struct RowSetEntry *rowSetNDeepTree( + struct RowSetEntry **ppList, + int iDepth +){ + struct RowSetEntry *p; /* Root of the new tree */ + struct RowSetEntry *pLeft; /* Left subtree */ + if( *ppList==0 ){ /*OPTIMIZATION-IF-TRUE*/ + /* Prevent unnecessary deep recursion when we run out of entries */ + return 0; + } + if( iDepth>1 ){ /*OPTIMIZATION-IF-TRUE*/ + /* This branch causes a *balanced* tree to be generated. A valid tree + ** is still generated without this branch, but the tree is wildly + ** unbalanced and inefficient. */ + pLeft = rowSetNDeepTree(ppList, iDepth-1); + p = *ppList; + if( p==0 ){ /*OPTIMIZATION-IF-FALSE*/ + /* It is safe to always return here, but the resulting tree + ** would be unbalanced */ + return pLeft; + } + p->pLeft = pLeft; + *ppList = p->pRight; + p->pRight = rowSetNDeepTree(ppList, iDepth-1); + }else{ + p = *ppList; + *ppList = p->pRight; + p->pLeft = p->pRight = 0; + } + return p; +} + +/* +** Convert a sorted list of elements into a binary tree. Make the tree +** as deep as it needs to be in order to contain the entire list. +*/ +static struct RowSetEntry *rowSetListToTree(struct RowSetEntry *pList){ + int iDepth; /* Depth of the tree so far */ + struct RowSetEntry *p; /* Current tree root */ + struct RowSetEntry *pLeft; /* Left subtree */ + + assert( pList!=0 ); + p = pList; + pList = p->pRight; + p->pLeft = p->pRight = 0; + for(iDepth=1; pList; iDepth++){ + pLeft = p; + p = pList; + pList = p->pRight; + p->pLeft = pLeft; + p->pRight = rowSetNDeepTree(&pList, iDepth); + } + return p; +} + +/* +** Extract the smallest element from the RowSet. +** Write the element into *pRowid. Return 1 on success. Return +** 0 if the RowSet is already empty. +** +** After this routine has been called, the sqlite3RowSetInsert() +** routine may not be called again. +** +** This routine may not be called after sqlite3RowSetTest() has +** been used. Older versions of RowSet allowed that, but as the +** capability was not used by the code generator, it was removed +** for code economy. +*/ +SQLITE_PRIVATE int sqlite3RowSetNext(RowSet *p, i64 *pRowid){ + assert( p!=0 ); + assert( p->pForest==0 ); /* Cannot be used with sqlite3RowSetText() */ + + /* Merge the forest into a single sorted list on first call */ + if( (p->rsFlags & ROWSET_NEXT)==0 ){ /*OPTIMIZATION-IF-FALSE*/ + if( (p->rsFlags & ROWSET_SORTED)==0 ){ /*OPTIMIZATION-IF-FALSE*/ + p->pEntry = rowSetEntrySort(p->pEntry); + } + p->rsFlags |= ROWSET_SORTED|ROWSET_NEXT; + } + + /* Return the next entry on the list */ + if( p->pEntry ){ + *pRowid = p->pEntry->v; + p->pEntry = p->pEntry->pRight; + if( p->pEntry==0 ){ /*OPTIMIZATION-IF-TRUE*/ + /* Free memory immediately, rather than waiting on sqlite3_finalize() */ + sqlite3RowSetClear(p); + } + return 1; + }else{ + return 0; + } +} + +/* +** Check to see if element iRowid was inserted into the rowset as +** part of any insert batch prior to iBatch. Return 1 or 0. +** +** If this is the first test of a new batch and if there exist entries +** on pRowSet->pEntry, then sort those entries into the forest at +** pRowSet->pForest so that they can be tested. +*/ +SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 iRowid){ + struct RowSetEntry *p, *pTree; + + /* This routine is never called after sqlite3RowSetNext() */ + assert( pRowSet!=0 && (pRowSet->rsFlags & ROWSET_NEXT)==0 ); + + /* Sort entries into the forest on the first test of a new batch. + ** To save unnecessary work, only do this when the batch number changes. + */ + if( iBatch!=pRowSet->iBatch ){ /*OPTIMIZATION-IF-FALSE*/ + p = pRowSet->pEntry; + if( p ){ + struct RowSetEntry **ppPrevTree = &pRowSet->pForest; + if( (pRowSet->rsFlags & ROWSET_SORTED)==0 ){ /*OPTIMIZATION-IF-FALSE*/ + /* Only sort the current set of entries if they need it */ + p = rowSetEntrySort(p); + } + for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){ + ppPrevTree = &pTree->pRight; + if( pTree->pLeft==0 ){ + pTree->pLeft = rowSetListToTree(p); + break; + }else{ + struct RowSetEntry *pAux, *pTail; + rowSetTreeToList(pTree->pLeft, &pAux, &pTail); + pTree->pLeft = 0; + p = rowSetEntryMerge(pAux, p); + } + } + if( pTree==0 ){ + *ppPrevTree = pTree = rowSetEntryAlloc(pRowSet); + if( pTree ){ + pTree->v = 0; + pTree->pRight = 0; + pTree->pLeft = rowSetListToTree(p); + } + } + pRowSet->pEntry = 0; + pRowSet->pLast = 0; + pRowSet->rsFlags |= ROWSET_SORTED; + } + pRowSet->iBatch = iBatch; + } + + /* Test to see if the iRowid value appears anywhere in the forest. + ** Return 1 if it does and 0 if not. + */ + for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){ + p = pTree->pLeft; + while( p ){ + if( p->vpRight; + }else if( p->v>iRowid ){ + p = p->pLeft; + }else{ + return 1; + } + } + } + return 0; +} + +/************** End of rowset.c **********************************************/ +/************** Begin file pager.c *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the implementation of the page cache subsystem or "pager". +** +** The pager is used to access a database disk file. It implements +** atomic commit and rollback through the use of a journal file that +** is separate from the database file. The pager also implements file +** locking to prevent two processes from writing the same database +** file simultaneously, or one process from reading the database while +** another is writing. +*/ +#ifndef SQLITE_OMIT_DISKIO +/* #include "sqliteInt.h" */ +/************** Include wal.h in the middle of pager.c ***********************/ +/************** Begin file wal.h *********************************************/ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface to the write-ahead logging +** system. Refer to the comments below and the header comment attached to +** the implementation of each function in log.c for further details. +*/ + +#ifndef SQLITE_WAL_H +#define SQLITE_WAL_H + +/* #include "sqliteInt.h" */ + +/* Macros for extracting appropriate sync flags for either transaction +** commits (WAL_SYNC_FLAGS(X)) or for checkpoint ops (CKPT_SYNC_FLAGS(X)): +*/ +#define WAL_SYNC_FLAGS(X) ((X)&0x03) +#define CKPT_SYNC_FLAGS(X) (((X)>>2)&0x03) + +#ifdef SQLITE_OMIT_WAL +# define sqlite3WalOpen(x,y,z) 0 +# define sqlite3WalLimit(x,y) +# define sqlite3WalClose(v,w,x,y,z) 0 +# define sqlite3WalBeginReadTransaction(y,z) 0 +# define sqlite3WalEndReadTransaction(z) +# define sqlite3WalDbsize(y) 0 +# define sqlite3WalBeginWriteTransaction(y) 0 +# define sqlite3WalEndWriteTransaction(x) 0 +# define sqlite3WalUndo(x,y,z) 0 +# define sqlite3WalSavepoint(y,z) +# define sqlite3WalSavepointUndo(y,z) 0 +# define sqlite3WalFrames(u,v,w,x,y,z) 0 +# define sqlite3WalCheckpoint(q,r,s,t,u,v,w,x,y,z) 0 +# define sqlite3WalCallback(z) 0 +# define sqlite3WalExclusiveMode(y,z) 0 +# define sqlite3WalHeapMemory(z) 0 +# define sqlite3WalFramesize(z) 0 +# define sqlite3WalFindFrame(x,y,z) 0 +# define sqlite3WalFile(x) 0 +#else + +#define WAL_SAVEPOINT_NDATA 4 + +/* Connection to a write-ahead log (WAL) file. +** There is one object of this type for each pager. +*/ +typedef struct Wal Wal; + +/* Open and close a connection to a write-ahead log. */ +SQLITE_PRIVATE int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**); +SQLITE_PRIVATE int sqlite3WalClose(Wal *pWal, sqlite3*, int sync_flags, int, u8 *); + +/* Set the limiting size of a WAL file. */ +SQLITE_PRIVATE void sqlite3WalLimit(Wal*, i64); + +/* Used by readers to open (lock) and close (unlock) a snapshot. A +** snapshot is like a read-transaction. It is the state of the database +** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and +** preserves the current state even if the other threads or processes +** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the +** transaction and releases the lock. +*/ +SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *); +SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal); + +/* Read a page from the write-ahead log, if it is present. */ +SQLITE_PRIVATE int sqlite3WalFindFrame(Wal *, Pgno, u32 *); +SQLITE_PRIVATE int sqlite3WalReadFrame(Wal *, u32, int, u8 *); + +/* If the WAL is not empty, return the size of the database. */ +SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal); + +/* Obtain or release the WRITER lock. */ +SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal); +SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal); + +/* Undo any frames written (but not committed) to the log */ +SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); + +/* Return an integer that records the current (uncommitted) write +** position in the WAL */ +SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData); + +/* Move the write position of the WAL back to iFrame. Called in +** response to a ROLLBACK TO command. */ +SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData); + +/* Write a frame or frames to the log. */ +SQLITE_PRIVATE int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); + +/* Copy pages from the log to the database file */ +SQLITE_PRIVATE int sqlite3WalCheckpoint( + Wal *pWal, /* Write-ahead log connection */ + sqlite3 *db, /* Check this handle's interrupt flag */ + int eMode, /* One of PASSIVE, FULL and RESTART */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags to sync db file with (or 0) */ + int nBuf, /* Size of buffer nBuf */ + u8 *zBuf, /* Temporary buffer to use */ + int *pnLog, /* OUT: Number of frames in WAL */ + int *pnCkpt /* OUT: Number of backfilled frames in WAL */ +); + +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal); + +/* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released) +** by the pager layer on the database file. +*/ +SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op); + +/* Return true if the argument is non-NULL and the WAL module is using +** heap-memory for the wal-index. Otherwise, if the argument is NULL or the +** WAL module is using shared-memory, return false. +*/ +SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal); + +#ifdef SQLITE_ENABLE_SNAPSHOT +SQLITE_PRIVATE int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot); +SQLITE_PRIVATE void sqlite3WalSnapshotOpen(Wal *pWal, sqlite3_snapshot *pSnapshot); +SQLITE_PRIVATE int sqlite3WalSnapshotRecover(Wal *pWal); +SQLITE_PRIVATE int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot); +SQLITE_PRIVATE void sqlite3WalSnapshotUnlock(Wal *pWal); +#endif + +#ifdef SQLITE_ENABLE_ZIPVFS +/* If the WAL file is not empty, return the number of bytes of content +** stored in each frame (i.e. the db page-size when the WAL was created). +*/ +SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal); +#endif + +/* Return the sqlite3_file object for the WAL file */ +SQLITE_PRIVATE sqlite3_file *sqlite3WalFile(Wal *pWal); + +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT +SQLITE_PRIVATE int sqlite3WalWriteLock(Wal *pWal, int bLock); +SQLITE_PRIVATE void sqlite3WalDb(Wal *pWal, sqlite3 *db); +#endif + +#endif /* ifndef SQLITE_OMIT_WAL */ +#endif /* SQLITE_WAL_H */ + +/************** End of wal.h *************************************************/ +/************** Continuing where we left off in pager.c **********************/ + + +/******************* NOTES ON THE DESIGN OF THE PAGER ************************ +** +** This comment block describes invariants that hold when using a rollback +** journal. These invariants do not apply for journal_mode=WAL, +** journal_mode=MEMORY, or journal_mode=OFF. +** +** Within this comment block, a page is deemed to have been synced +** automatically as soon as it is written when PRAGMA synchronous=OFF. +** Otherwise, the page is not synced until the xSync method of the VFS +** is called successfully on the file containing the page. +** +** Definition: A page of the database file is said to be "overwriteable" if +** one or more of the following are true about the page: +** +** (a) The original content of the page as it was at the beginning of +** the transaction has been written into the rollback journal and +** synced. +** +** (b) The page was a freelist leaf page at the start of the transaction. +** +** (c) The page number is greater than the largest page that existed in +** the database file at the start of the transaction. +** +** (1) A page of the database file is never overwritten unless one of the +** following are true: +** +** (a) The page and all other pages on the same sector are overwriteable. +** +** (b) The atomic page write optimization is enabled, and the entire +** transaction other than the update of the transaction sequence +** number consists of a single page change. +** +** (2) The content of a page written into the rollback journal exactly matches +** both the content in the database when the rollback journal was written +** and the content in the database at the beginning of the current +** transaction. +** +** (3) Writes to the database file are an integer multiple of the page size +** in length and are aligned on a page boundary. +** +** (4) Reads from the database file are either aligned on a page boundary and +** an integer multiple of the page size in length or are taken from the +** first 100 bytes of the database file. +** +** (5) All writes to the database file are synced prior to the rollback journal +** being deleted, truncated, or zeroed. +** +** (6) If a super-journal file is used, then all writes to the database file +** are synced prior to the super-journal being deleted. +** +** Definition: Two databases (or the same database at two points it time) +** are said to be "logically equivalent" if they give the same answer to +** all queries. Note in particular the content of freelist leaf +** pages can be changed arbitrarily without affecting the logical equivalence +** of the database. +** +** (7) At any time, if any subset, including the empty set and the total set, +** of the unsynced changes to a rollback journal are removed and the +** journal is rolled back, the resulting database file will be logically +** equivalent to the database file at the beginning of the transaction. +** +** (8) When a transaction is rolled back, the xTruncate method of the VFS +** is called to restore the database file to the same size it was at +** the beginning of the transaction. (In some VFSes, the xTruncate +** method is a no-op, but that does not change the fact the SQLite will +** invoke it.) +** +** (9) Whenever the database file is modified, at least one bit in the range +** of bytes from 24 through 39 inclusive will be changed prior to releasing +** the EXCLUSIVE lock, thus signaling other connections on the same +** database to flush their caches. +** +** (10) The pattern of bits in bytes 24 through 39 shall not repeat in less +** than one billion transactions. +** +** (11) A database file is well-formed at the beginning and at the conclusion +** of every transaction. +** +** (12) An EXCLUSIVE lock is held on the database file when writing to +** the database file. +** +** (13) A SHARED lock is held on the database file while reading any +** content out of the database file. +** +******************************************************************************/ + +/* +** Macros for troubleshooting. Normally turned off +*/ +#if 0 +int sqlite3PagerTrace=1; /* True to enable tracing */ +#define sqlite3DebugPrintf printf +#define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; } +#else +#define PAGERTRACE(X) +#endif + +/* +** The following two macros are used within the PAGERTRACE() macros above +** to print out file-descriptors. +** +** PAGERID() takes a pointer to a Pager struct as its argument. The +** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file +** struct as its argument. +*/ +#define PAGERID(p) (SQLITE_PTR_TO_INT(p->fd)) +#define FILEHANDLEID(fd) (SQLITE_PTR_TO_INT(fd)) + +/* +** The Pager.eState variable stores the current 'state' of a pager. A +** pager may be in any one of the seven states shown in the following +** state diagram. +** +** OPEN <------+------+ +** | | | +** V | | +** +---------> READER-------+ | +** | | | +** | V | +** |<-------WRITER_LOCKED------> ERROR +** | | ^ +** | V | +** |<------WRITER_CACHEMOD-------->| +** | | | +** | V | +** |<-------WRITER_DBMOD---------->| +** | | | +** | V | +** +<------WRITER_FINISHED-------->+ +** +** +** List of state transitions and the C [function] that performs each: +** +** OPEN -> READER [sqlite3PagerSharedLock] +** READER -> OPEN [pager_unlock] +** +** READER -> WRITER_LOCKED [sqlite3PagerBegin] +** WRITER_LOCKED -> WRITER_CACHEMOD [pager_open_journal] +** WRITER_CACHEMOD -> WRITER_DBMOD [syncJournal] +** WRITER_DBMOD -> WRITER_FINISHED [sqlite3PagerCommitPhaseOne] +** WRITER_*** -> READER [pager_end_transaction] +** +** WRITER_*** -> ERROR [pager_error] +** ERROR -> OPEN [pager_unlock] +** +** +** OPEN: +** +** The pager starts up in this state. Nothing is guaranteed in this +** state - the file may or may not be locked and the database size is +** unknown. The database may not be read or written. +** +** * No read or write transaction is active. +** * Any lock, or no lock at all, may be held on the database file. +** * The dbSize, dbOrigSize and dbFileSize variables may not be trusted. +** +** READER: +** +** In this state all the requirements for reading the database in +** rollback (non-WAL) mode are met. Unless the pager is (or recently +** was) in exclusive-locking mode, a user-level read transaction is +** open. The database size is known in this state. +** +** A connection running with locking_mode=normal enters this state when +** it opens a read-transaction on the database and returns to state +** OPEN after the read-transaction is completed. However a connection +** running in locking_mode=exclusive (including temp databases) remains in +** this state even after the read-transaction is closed. The only way +** a locking_mode=exclusive connection can transition from READER to OPEN +** is via the ERROR state (see below). +** +** * A read transaction may be active (but a write-transaction cannot). +** * A SHARED or greater lock is held on the database file. +** * The dbSize variable may be trusted (even if a user-level read +** transaction is not active). The dbOrigSize and dbFileSize variables +** may not be trusted at this point. +** * If the database is a WAL database, then the WAL connection is open. +** * Even if a read-transaction is not open, it is guaranteed that +** there is no hot-journal in the file-system. +** +** WRITER_LOCKED: +** +** The pager moves to this state from READER when a write-transaction +** is first opened on the database. In WRITER_LOCKED state, all locks +** required to start a write-transaction are held, but no actual +** modifications to the cache or database have taken place. +** +** In rollback mode, a RESERVED or (if the transaction was opened with +** BEGIN EXCLUSIVE) EXCLUSIVE lock is obtained on the database file when +** moving to this state, but the journal file is not written to or opened +** to in this state. If the transaction is committed or rolled back while +** in WRITER_LOCKED state, all that is required is to unlock the database +** file. +** +** IN WAL mode, WalBeginWriteTransaction() is called to lock the log file. +** If the connection is running with locking_mode=exclusive, an attempt +** is made to obtain an EXCLUSIVE lock on the database file. +** +** * A write transaction is active. +** * If the connection is open in rollback-mode, a RESERVED or greater +** lock is held on the database file. +** * If the connection is open in WAL-mode, a WAL write transaction +** is open (i.e. sqlite3WalBeginWriteTransaction() has been successfully +** called). +** * The dbSize, dbOrigSize and dbFileSize variables are all valid. +** * The contents of the pager cache have not been modified. +** * The journal file may or may not be open. +** * Nothing (not even the first header) has been written to the journal. +** +** WRITER_CACHEMOD: +** +** A pager moves from WRITER_LOCKED state to this state when a page is +** first modified by the upper layer. In rollback mode the journal file +** is opened (if it is not already open) and a header written to the +** start of it. The database file on disk has not been modified. +** +** * A write transaction is active. +** * A RESERVED or greater lock is held on the database file. +** * The journal file is open and the first header has been written +** to it, but the header has not been synced to disk. +** * The contents of the page cache have been modified. +** +** WRITER_DBMOD: +** +** The pager transitions from WRITER_CACHEMOD into WRITER_DBMOD state +** when it modifies the contents of the database file. WAL connections +** never enter this state (since they do not modify the database file, +** just the log file). +** +** * A write transaction is active. +** * An EXCLUSIVE or greater lock is held on the database file. +** * The journal file is open and the first header has been written +** and synced to disk. +** * The contents of the page cache have been modified (and possibly +** written to disk). +** +** WRITER_FINISHED: +** +** It is not possible for a WAL connection to enter this state. +** +** A rollback-mode pager changes to WRITER_FINISHED state from WRITER_DBMOD +** state after the entire transaction has been successfully written into the +** database file. In this state the transaction may be committed simply +** by finalizing the journal file. Once in WRITER_FINISHED state, it is +** not possible to modify the database further. At this point, the upper +** layer must either commit or rollback the transaction. +** +** * A write transaction is active. +** * An EXCLUSIVE or greater lock is held on the database file. +** * All writing and syncing of journal and database data has finished. +** If no error occurred, all that remains is to finalize the journal to +** commit the transaction. If an error did occur, the caller will need +** to rollback the transaction. +** +** ERROR: +** +** The ERROR state is entered when an IO or disk-full error (including +** SQLITE_IOERR_NOMEM) occurs at a point in the code that makes it +** difficult to be sure that the in-memory pager state (cache contents, +** db size etc.) are consistent with the contents of the file-system. +** +** Temporary pager files may enter the ERROR state, but in-memory pagers +** cannot. +** +** For example, if an IO error occurs while performing a rollback, +** the contents of the page-cache may be left in an inconsistent state. +** At this point it would be dangerous to change back to READER state +** (as usually happens after a rollback). Any subsequent readers might +** report database corruption (due to the inconsistent cache), and if +** they upgrade to writers, they may inadvertently corrupt the database +** file. To avoid this hazard, the pager switches into the ERROR state +** instead of READER following such an error. +** +** Once it has entered the ERROR state, any attempt to use the pager +** to read or write data returns an error. Eventually, once all +** outstanding transactions have been abandoned, the pager is able to +** transition back to OPEN state, discarding the contents of the +** page-cache and any other in-memory state at the same time. Everything +** is reloaded from disk (and, if necessary, hot-journal rollback peformed) +** when a read-transaction is next opened on the pager (transitioning +** the pager into READER state). At that point the system has recovered +** from the error. +** +** Specifically, the pager jumps into the ERROR state if: +** +** 1. An error occurs while attempting a rollback. This happens in +** function sqlite3PagerRollback(). +** +** 2. An error occurs while attempting to finalize a journal file +** following a commit in function sqlite3PagerCommitPhaseTwo(). +** +** 3. An error occurs while attempting to write to the journal or +** database file in function pagerStress() in order to free up +** memory. +** +** In other cases, the error is returned to the b-tree layer. The b-tree +** layer then attempts a rollback operation. If the error condition +** persists, the pager enters the ERROR state via condition (1) above. +** +** Condition (3) is necessary because it can be triggered by a read-only +** statement executed within a transaction. In this case, if the error +** code were simply returned to the user, the b-tree layer would not +** automatically attempt a rollback, as it assumes that an error in a +** read-only statement cannot leave the pager in an internally inconsistent +** state. +** +** * The Pager.errCode variable is set to something other than SQLITE_OK. +** * There are one or more outstanding references to pages (after the +** last reference is dropped the pager should move back to OPEN state). +** * The pager is not an in-memory pager. +** +** +** Notes: +** +** * A pager is never in WRITER_DBMOD or WRITER_FINISHED state if the +** connection is open in WAL mode. A WAL connection is always in one +** of the first four states. +** +** * Normally, a connection open in exclusive mode is never in PAGER_OPEN +** state. There are two exceptions: immediately after exclusive-mode has +** been turned on (and before any read or write transactions are +** executed), and when the pager is leaving the "error state". +** +** * See also: assert_pager_state(). +*/ +#define PAGER_OPEN 0 +#define PAGER_READER 1 +#define PAGER_WRITER_LOCKED 2 +#define PAGER_WRITER_CACHEMOD 3 +#define PAGER_WRITER_DBMOD 4 +#define PAGER_WRITER_FINISHED 5 +#define PAGER_ERROR 6 + +/* +** The Pager.eLock variable is almost always set to one of the +** following locking-states, according to the lock currently held on +** the database file: NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK. +** This variable is kept up to date as locks are taken and released by +** the pagerLockDb() and pagerUnlockDb() wrappers. +** +** If the VFS xLock() or xUnlock() returns an error other than SQLITE_BUSY +** (i.e. one of the SQLITE_IOERR subtypes), it is not clear whether or not +** the operation was successful. In these circumstances pagerLockDb() and +** pagerUnlockDb() take a conservative approach - eLock is always updated +** when unlocking the file, and only updated when locking the file if the +** VFS call is successful. This way, the Pager.eLock variable may be set +** to a less exclusive (lower) value than the lock that is actually held +** at the system level, but it is never set to a more exclusive value. +** +** This is usually safe. If an xUnlock fails or appears to fail, there may +** be a few redundant xLock() calls or a lock may be held for longer than +** required, but nothing really goes wrong. +** +** The exception is when the database file is unlocked as the pager moves +** from ERROR to OPEN state. At this point there may be a hot-journal file +** in the file-system that needs to be rolled back (as part of an OPEN->SHARED +** transition, by the same pager or any other). If the call to xUnlock() +** fails at this point and the pager is left holding an EXCLUSIVE lock, this +** can confuse the call to xCheckReservedLock() call made later as part +** of hot-journal detection. +** +** xCheckReservedLock() is defined as returning true "if there is a RESERVED +** lock held by this process or any others". So xCheckReservedLock may +** return true because the caller itself is holding an EXCLUSIVE lock (but +** doesn't know it because of a previous error in xUnlock). If this happens +** a hot-journal may be mistaken for a journal being created by an active +** transaction in another process, causing SQLite to read from the database +** without rolling it back. +** +** To work around this, if a call to xUnlock() fails when unlocking the +** database in the ERROR state, Pager.eLock is set to UNKNOWN_LOCK. It +** is only changed back to a real locking state after a successful call +** to xLock(EXCLUSIVE). Also, the code to do the OPEN->SHARED state transition +** omits the check for a hot-journal if Pager.eLock is set to UNKNOWN_LOCK +** lock. Instead, it assumes a hot-journal exists and obtains an EXCLUSIVE +** lock on the database file before attempting to roll it back. See function +** PagerSharedLock() for more detail. +** +** Pager.eLock may only be set to UNKNOWN_LOCK when the pager is in +** PAGER_OPEN state. +*/ +#define UNKNOWN_LOCK (EXCLUSIVE_LOCK+1) + +/* +** The maximum allowed sector size. 64KiB. If the xSectorsize() method +** returns a value larger than this, then MAX_SECTOR_SIZE is used instead. +** This could conceivably cause corruption following a power failure on +** such a system. This is currently an undocumented limit. +*/ +#define MAX_SECTOR_SIZE 0x10000 + + +/* +** An instance of the following structure is allocated for each active +** savepoint and statement transaction in the system. All such structures +** are stored in the Pager.aSavepoint[] array, which is allocated and +** resized using sqlite3Realloc(). +** +** When a savepoint is created, the PagerSavepoint.iHdrOffset field is +** set to 0. If a journal-header is written into the main journal while +** the savepoint is active, then iHdrOffset is set to the byte offset +** immediately following the last journal record written into the main +** journal before the journal-header. This is required during savepoint +** rollback (see pagerPlaybackSavepoint()). +*/ +typedef struct PagerSavepoint PagerSavepoint; +struct PagerSavepoint { + i64 iOffset; /* Starting offset in main journal */ + i64 iHdrOffset; /* See above */ + Bitvec *pInSavepoint; /* Set of pages in this savepoint */ + Pgno nOrig; /* Original number of pages in file */ + Pgno iSubRec; /* Index of first record in sub-journal */ + int bTruncateOnRelease; /* If stmt journal may be truncated on RELEASE */ +#ifndef SQLITE_OMIT_WAL + u32 aWalData[WAL_SAVEPOINT_NDATA]; /* WAL savepoint context */ +#endif +}; + +/* +** Bits of the Pager.doNotSpill flag. See further description below. +*/ +#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ +#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ +#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ + +/* +** An open page cache is an instance of struct Pager. A description of +** some of the more important member variables follows: +** +** eState +** +** The current 'state' of the pager object. See the comment and state +** diagram above for a description of the pager state. +** +** eLock +** +** For a real on-disk database, the current lock held on the database file - +** NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK. +** +** For a temporary or in-memory database (neither of which require any +** locks), this variable is always set to EXCLUSIVE_LOCK. Since such +** databases always have Pager.exclusiveMode==1, this tricks the pager +** logic into thinking that it already has all the locks it will ever +** need (and no reason to release them). +** +** In some (obscure) circumstances, this variable may also be set to +** UNKNOWN_LOCK. See the comment above the #define of UNKNOWN_LOCK for +** details. +** +** changeCountDone +** +** This boolean variable is used to make sure that the change-counter +** (the 4-byte header field at byte offset 24 of the database file) is +** not updated more often than necessary. +** +** It is set to true when the change-counter field is updated, which +** can only happen if an exclusive lock is held on the database file. +** It is cleared (set to false) whenever an exclusive lock is +** relinquished on the database file. Each time a transaction is committed, +** The changeCountDone flag is inspected. If it is true, the work of +** updating the change-counter is omitted for the current transaction. +** +** This mechanism means that when running in exclusive mode, a connection +** need only update the change-counter once, for the first transaction +** committed. +** +** setSuper +** +** When PagerCommitPhaseOne() is called to commit a transaction, it may +** (or may not) specify a super-journal name to be written into the +** journal file before it is synced to disk. +** +** Whether or not a journal file contains a super-journal pointer affects +** the way in which the journal file is finalized after the transaction is +** committed or rolled back when running in "journal_mode=PERSIST" mode. +** If a journal file does not contain a super-journal pointer, it is +** finalized by overwriting the first journal header with zeroes. If +** it does contain a super-journal pointer the journal file is finalized +** by truncating it to zero bytes, just as if the connection were +** running in "journal_mode=truncate" mode. +** +** Journal files that contain super-journal pointers cannot be finalized +** simply by overwriting the first journal-header with zeroes, as the +** super-journal pointer could interfere with hot-journal rollback of any +** subsequently interrupted transaction that reuses the journal file. +** +** The flag is cleared as soon as the journal file is finalized (either +** by PagerCommitPhaseTwo or PagerRollback). If an IO error prevents the +** journal file from being successfully finalized, the setSuper flag +** is cleared anyway (and the pager will move to ERROR state). +** +** doNotSpill +** +** This variables control the behavior of cache-spills (calls made by +** the pcache module to the pagerStress() routine to write cached data +** to the file-system in order to free up memory). +** +** When bits SPILLFLAG_OFF or SPILLFLAG_ROLLBACK of doNotSpill are set, +** writing to the database from pagerStress() is disabled altogether. +** The SPILLFLAG_ROLLBACK case is done in a very obscure case that +** comes up during savepoint rollback that requires the pcache module +** to allocate a new page to prevent the journal file from being written +** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF +** case is a user preference. +** +** If the SPILLFLAG_NOSYNC bit is set, writing to the database from +** pagerStress() is permitted, but syncing the journal file is not. +** This flag is set by sqlite3PagerWrite() when the file-system sector-size +** is larger than the database page-size in order to prevent a journal sync +** from happening in between the journalling of two pages on the same sector. +** +** subjInMemory +** +** This is a boolean variable. If true, then any required sub-journal +** is opened as an in-memory journal file. If false, then in-memory +** sub-journals are only used for in-memory pager files. +** +** This variable is updated by the upper layer each time a new +** write-transaction is opened. +** +** dbSize, dbOrigSize, dbFileSize +** +** Variable dbSize is set to the number of pages in the database file. +** It is valid in PAGER_READER and higher states (all states except for +** OPEN and ERROR). +** +** dbSize is set based on the size of the database file, which may be +** larger than the size of the database (the value stored at offset +** 28 of the database header by the btree). If the size of the file +** is not an integer multiple of the page-size, the value stored in +** dbSize is rounded down (i.e. a 5KB file with 2K page-size has dbSize==2). +** Except, any file that is greater than 0 bytes in size is considered +** to have at least one page. (i.e. a 1KB file with 2K page-size leads +** to dbSize==1). +** +** During a write-transaction, if pages with page-numbers greater than +** dbSize are modified in the cache, dbSize is updated accordingly. +** Similarly, if the database is truncated using PagerTruncateImage(), +** dbSize is updated. +** +** Variables dbOrigSize and dbFileSize are valid in states +** PAGER_WRITER_LOCKED and higher. dbOrigSize is a copy of the dbSize +** variable at the start of the transaction. It is used during rollback, +** and to determine whether or not pages need to be journalled before +** being modified. +** +** Throughout a write-transaction, dbFileSize contains the size of +** the file on disk in pages. It is set to a copy of dbSize when the +** write-transaction is first opened, and updated when VFS calls are made +** to write or truncate the database file on disk. +** +** The only reason the dbFileSize variable is required is to suppress +** unnecessary calls to xTruncate() after committing a transaction. If, +** when a transaction is committed, the dbFileSize variable indicates +** that the database file is larger than the database image (Pager.dbSize), +** pager_truncate() is called. The pager_truncate() call uses xFilesize() +** to measure the database file on disk, and then truncates it if required. +** dbFileSize is not used when rolling back a transaction. In this case +** pager_truncate() is called unconditionally (which means there may be +** a call to xFilesize() that is not strictly required). In either case, +** pager_truncate() may cause the file to become smaller or larger. +** +** dbHintSize +** +** The dbHintSize variable is used to limit the number of calls made to +** the VFS xFileControl(FCNTL_SIZE_HINT) method. +** +** dbHintSize is set to a copy of the dbSize variable when a +** write-transaction is opened (at the same time as dbFileSize and +** dbOrigSize). If the xFileControl(FCNTL_SIZE_HINT) method is called, +** dbHintSize is increased to the number of pages that correspond to the +** size-hint passed to the method call. See pager_write_pagelist() for +** details. +** +** errCode +** +** The Pager.errCode variable is only ever used in PAGER_ERROR state. It +** is set to zero in all other states. In PAGER_ERROR state, Pager.errCode +** is always set to SQLITE_FULL, SQLITE_IOERR or one of the SQLITE_IOERR_XXX +** sub-codes. +** +** syncFlags, walSyncFlags +** +** syncFlags is either SQLITE_SYNC_NORMAL (0x02) or SQLITE_SYNC_FULL (0x03). +** syncFlags is used for rollback mode. walSyncFlags is used for WAL mode +** and contains the flags used to sync the checkpoint operations in the +** lower two bits, and sync flags used for transaction commits in the WAL +** file in bits 0x04 and 0x08. In other words, to get the correct sync flags +** for checkpoint operations, use (walSyncFlags&0x03) and to get the correct +** sync flags for transaction commit, use ((walSyncFlags>>2)&0x03). Note +** that with synchronous=NORMAL in WAL mode, transaction commit is not synced +** meaning that the 0x04 and 0x08 bits are both zero. +*/ +struct Pager { + sqlite3_vfs *pVfs; /* OS functions to use for IO */ + u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ + u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */ + u8 useJournal; /* Use a rollback journal on this file */ + u8 noSync; /* Do not sync the journal if true */ + u8 fullSync; /* Do extra syncs of the journal for robustness */ + u8 extraSync; /* sync directory after journal delete */ + u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */ + u8 walSyncFlags; /* See description above */ + u8 tempFile; /* zFilename is a temporary or immutable file */ + u8 noLock; /* Do not lock (except in WAL mode) */ + u8 readOnly; /* True for a read-only database */ + u8 memDb; /* True to inhibit all file I/O */ + u8 memVfs; /* VFS-implemented memory database */ + + /************************************************************************** + ** The following block contains those class members that change during + ** routine operation. Class members not in this block are either fixed + ** when the pager is first created or else only change when there is a + ** significant mode change (such as changing the page_size, locking_mode, + ** or the journal_mode). From another view, these class members describe + ** the "state" of the pager, while other class members describe the + ** "configuration" of the pager. + */ + u8 eState; /* Pager state (OPEN, READER, WRITER_LOCKED..) */ + u8 eLock; /* Current lock held on database file */ + u8 changeCountDone; /* Set after incrementing the change-counter */ + u8 setSuper; /* Super-jrnl name is written into jrnl */ + u8 doNotSpill; /* Do not spill the cache when non-zero */ + u8 subjInMemory; /* True to use in-memory sub-journals */ + u8 bUseFetch; /* True to use xFetch() */ + u8 hasHeldSharedLock; /* True if a shared lock has ever been held */ + Pgno dbSize; /* Number of pages in the database */ + Pgno dbOrigSize; /* dbSize before the current transaction */ + Pgno dbFileSize; /* Number of pages in the database file */ + Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */ + int errCode; /* One of several kinds of errors */ + int nRec; /* Pages journalled since last j-header written */ + u32 cksumInit; /* Quasi-random value added to every checksum */ + u32 nSubRec; /* Number of records written to sub-journal */ + Bitvec *pInJournal; /* One bit for each page in the database file */ + sqlite3_file *fd; /* File descriptor for database */ + sqlite3_file *jfd; /* File descriptor for main journal */ + sqlite3_file *sjfd; /* File descriptor for sub-journal */ + i64 journalOff; /* Current write offset in the journal file */ + i64 journalHdr; /* Byte offset to previous journal header */ + sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ + PagerSavepoint *aSavepoint; /* Array of active savepoints */ + int nSavepoint; /* Number of elements in aSavepoint[] */ + u32 iDataVersion; /* Changes whenever database content changes */ + char dbFileVers[16]; /* Changes whenever database file changes */ + + int nMmapOut; /* Number of mmap pages currently outstanding */ + sqlite3_int64 szMmap; /* Desired maximum mmap size */ + PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */ + /* + ** End of the routinely-changing class members + ***************************************************************************/ + + u16 nExtra; /* Add this many bytes to each in-memory page */ + i16 nReserve; /* Number of unused bytes at end of each page */ + u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ + u32 sectorSize; /* Assumed sector size during rollback */ + Pgno mxPgno; /* Maximum allowed size of the database */ + i64 pageSize; /* Number of bytes in a page */ + i64 journalSizeLimit; /* Size limit for persistent journal files */ + char *zFilename; /* Name of the database file */ + char *zJournal; /* Name of the journal file */ + int (*xBusyHandler)(void*); /* Function to call when busy */ + void *pBusyHandlerArg; /* Context argument for xBusyHandler */ + int aStat[4]; /* Total cache hits, misses, writes, spills */ +#ifdef SQLITE_TEST + int nRead; /* Database pages read */ +#endif + void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */ + int (*xGet)(Pager*,Pgno,DbPage**,int); /* Routine to fetch a patch */ + char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ + PCache *pPCache; /* Pointer to page cache object */ +#ifndef SQLITE_OMIT_WAL + Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ + char *zWal; /* File name for write-ahead log */ +#endif +}; + +/* +** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains +** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS +** or CACHE_WRITE to sqlite3_db_status(). +*/ +#define PAGER_STAT_HIT 0 +#define PAGER_STAT_MISS 1 +#define PAGER_STAT_WRITE 2 +#define PAGER_STAT_SPILL 3 + +/* +** The following global variables hold counters used for +** testing purposes only. These variables do not exist in +** a non-testing build. These variables are not thread-safe. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ +SQLITE_API int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ +SQLITE_API int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ +# define PAGER_INCR(v) v++ +#else +# define PAGER_INCR(v) +#endif + + + +/* +** Journal files begin with the following magic string. The data +** was obtained from /dev/random. It is used only as a sanity check. +** +** Since version 2.8.0, the journal format contains additional sanity +** checking information. If the power fails while the journal is being +** written, semi-random garbage data might appear in the journal +** file after power is restored. If an attempt is then made +** to roll the journal back, the database could be corrupted. The additional +** sanity checking data is an attempt to discover the garbage in the +** journal and ignore it. +** +** The sanity checking information for the new journal format consists +** of a 32-bit checksum on each page of data. The checksum covers both +** the page number and the pPager->pageSize bytes of data for the page. +** This cksum is initialized to a 32-bit random value that appears in the +** journal file right after the header. The random initializer is important, +** because garbage data that appears at the end of a journal is likely +** data that was once in other files that have now been deleted. If the +** garbage data came from an obsolete journal file, the checksums might +** be correct. But by initializing the checksum to random value which +** is different for every journal, we minimize that risk. +*/ +static const unsigned char aJournalMagic[] = { + 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, +}; + +/* +** The size of the of each page record in the journal is given by +** the following macro. +*/ +#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) + +/* +** The journal header size for this pager. This is usually the same +** size as a single disk sector. See also setSectorSize(). +*/ +#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) + +/* +** The macro MEMDB is true if we are dealing with an in-memory database. +** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set, +** the value of MEMDB will be a constant and the compiler will optimize +** out code that would never execute. +*/ +#ifdef SQLITE_OMIT_MEMORYDB +# define MEMDB 0 +#else +# define MEMDB pPager->memDb +#endif + +/* +** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch +** interfaces to access the database using memory-mapped I/O. +*/ +#if SQLITE_MAX_MMAP_SIZE>0 +# define USEFETCH(x) ((x)->bUseFetch) +#else +# define USEFETCH(x) 0 +#endif + +/* +** The argument to this macro is a file descriptor (type sqlite3_file*). +** Return 0 if it is not open, or non-zero (but not 1) if it is. +** +** This is so that expressions can be written as: +** +** if( isOpen(pPager->jfd) ){ ... +** +** instead of +** +** if( pPager->jfd->pMethods ){ ... +*/ +#define isOpen(pFd) ((pFd)->pMethods!=0) + +#ifdef SQLITE_DIRECT_OVERFLOW_READ +/* +** Return true if page pgno can be read directly from the database file +** by the b-tree layer. This is the case if: +** +** * the database file is open, +** * there are no dirty pages in the cache, and +** * the desired page is not currently in the wal file. +*/ +SQLITE_PRIVATE int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno){ + if( pPager->fd->pMethods==0 ) return 0; + if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; +#ifndef SQLITE_OMIT_WAL + if( pPager->pWal ){ + u32 iRead = 0; + int rc; + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iRead); + return (rc==SQLITE_OK && iRead==0); + } +#endif + return 1; +} +#endif + +#ifndef SQLITE_OMIT_WAL +# define pagerUseWal(x) ((x)->pWal!=0) +#else +# define pagerUseWal(x) 0 +# define pagerRollbackWal(x) 0 +# define pagerWalFrames(v,w,x,y) 0 +# define pagerOpenWalIfPresent(z) SQLITE_OK +# define pagerBeginReadTransaction(z) SQLITE_OK +#endif + +#ifndef NDEBUG +/* +** Usage: +** +** assert( assert_pager_state(pPager) ); +** +** This function runs many asserts to try to find inconsistencies in +** the internal state of the Pager object. +*/ +static int assert_pager_state(Pager *p){ + Pager *pPager = p; + + /* State must be valid. */ + assert( p->eState==PAGER_OPEN + || p->eState==PAGER_READER + || p->eState==PAGER_WRITER_LOCKED + || p->eState==PAGER_WRITER_CACHEMOD + || p->eState==PAGER_WRITER_DBMOD + || p->eState==PAGER_WRITER_FINISHED + || p->eState==PAGER_ERROR + ); + + /* Regardless of the current state, a temp-file connection always behaves + ** as if it has an exclusive lock on the database file. It never updates + ** the change-counter field, so the changeCountDone flag is always set. + */ + assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK ); + assert( p->tempFile==0 || pPager->changeCountDone ); + + /* If the useJournal flag is clear, the journal-mode must be "OFF". + ** And if the journal-mode is "OFF", the journal file must not be open. + */ + assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal ); + assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) ); + + /* Check that MEMDB implies noSync. And an in-memory journal. Since + ** this means an in-memory pager performs no IO at all, it cannot encounter + ** either SQLITE_IOERR or SQLITE_FULL during rollback or while finalizing + ** a journal file. (although the in-memory journal implementation may + ** return SQLITE_IOERR_NOMEM while the journal file is being written). It + ** is therefore not possible for an in-memory pager to enter the ERROR + ** state. + */ + if( MEMDB ){ + assert( !isOpen(p->fd) ); + assert( p->noSync ); + assert( p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_MEMORY + ); + assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN ); + assert( pagerUseWal(p)==0 ); + } + + /* If changeCountDone is set, a RESERVED lock or greater must be held + ** on the file. + */ + assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK ); + assert( p->eLock!=PENDING_LOCK ); + + switch( p->eState ){ + case PAGER_OPEN: + assert( !MEMDB ); + assert( pPager->errCode==SQLITE_OK ); + assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile ); + break; + + case PAGER_READER: + assert( pPager->errCode==SQLITE_OK ); + assert( p->eLock!=UNKNOWN_LOCK ); + assert( p->eLock>=SHARED_LOCK ); + break; + + case PAGER_WRITER_LOCKED: + assert( p->eLock!=UNKNOWN_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + if( !pagerUseWal(pPager) ){ + assert( p->eLock>=RESERVED_LOCK ); + } + assert( pPager->dbSize==pPager->dbOrigSize ); + assert( pPager->dbOrigSize==pPager->dbFileSize ); + assert( pPager->dbOrigSize==pPager->dbHintSize ); + assert( pPager->setSuper==0 ); + break; + + case PAGER_WRITER_CACHEMOD: + assert( p->eLock!=UNKNOWN_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + if( !pagerUseWal(pPager) ){ + /* It is possible that if journal_mode=wal here that neither the + ** journal file nor the WAL file are open. This happens during + ** a rollback transaction that switches from journal_mode=off + ** to journal_mode=wal. + */ + assert( p->eLock>=RESERVED_LOCK ); + assert( isOpen(p->jfd) + || p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_WAL + ); + } + assert( pPager->dbOrigSize==pPager->dbFileSize ); + assert( pPager->dbOrigSize==pPager->dbHintSize ); + break; + + case PAGER_WRITER_DBMOD: + assert( p->eLock==EXCLUSIVE_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + assert( !pagerUseWal(pPager) ); + assert( p->eLock>=EXCLUSIVE_LOCK ); + assert( isOpen(p->jfd) + || p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_WAL + || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) + ); + assert( pPager->dbOrigSize<=pPager->dbHintSize ); + break; + + case PAGER_WRITER_FINISHED: + assert( p->eLock==EXCLUSIVE_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + assert( !pagerUseWal(pPager) ); + assert( isOpen(p->jfd) + || p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_WAL + || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) + ); + break; + + case PAGER_ERROR: + /* There must be at least one outstanding reference to the pager if + ** in ERROR state. Otherwise the pager should have already dropped + ** back to OPEN state. + */ + assert( pPager->errCode!=SQLITE_OK ); + assert( sqlite3PcacheRefCount(pPager->pPCache)>0 || pPager->tempFile ); + break; + } + + return 1; +} +#endif /* ifndef NDEBUG */ + +#ifdef SQLITE_DEBUG +/* +** Return a pointer to a human readable string in a static buffer +** containing the state of the Pager object passed as an argument. This +** is intended to be used within debuggers. For example, as an alternative +** to "print *pPager" in gdb: +** +** (gdb) printf "%s", print_pager_state(pPager) +** +** This routine has external linkage in order to suppress compiler warnings +** about an unused function. It is enclosed within SQLITE_DEBUG and so does +** not appear in normal builds. +*/ +char *print_pager_state(Pager *p){ + static char zRet[1024]; + + sqlite3_snprintf(1024, zRet, + "Filename: %s\n" + "State: %s errCode=%d\n" + "Lock: %s\n" + "Locking mode: locking_mode=%s\n" + "Journal mode: journal_mode=%s\n" + "Backing store: tempFile=%d memDb=%d useJournal=%d\n" + "Journal: journalOff=%lld journalHdr=%lld\n" + "Size: dbsize=%d dbOrigSize=%d dbFileSize=%d\n" + , p->zFilename + , p->eState==PAGER_OPEN ? "OPEN" : + p->eState==PAGER_READER ? "READER" : + p->eState==PAGER_WRITER_LOCKED ? "WRITER_LOCKED" : + p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" : + p->eState==PAGER_WRITER_DBMOD ? "WRITER_DBMOD" : + p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" : + p->eState==PAGER_ERROR ? "ERROR" : "?error?" + , (int)p->errCode + , p->eLock==NO_LOCK ? "NO_LOCK" : + p->eLock==RESERVED_LOCK ? "RESERVED" : + p->eLock==EXCLUSIVE_LOCK ? "EXCLUSIVE" : + p->eLock==SHARED_LOCK ? "SHARED" : + p->eLock==UNKNOWN_LOCK ? "UNKNOWN" : "?error?" + , p->exclusiveMode ? "exclusive" : "normal" + , p->journalMode==PAGER_JOURNALMODE_MEMORY ? "memory" : + p->journalMode==PAGER_JOURNALMODE_OFF ? "off" : + p->journalMode==PAGER_JOURNALMODE_DELETE ? "delete" : + p->journalMode==PAGER_JOURNALMODE_PERSIST ? "persist" : + p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" : + p->journalMode==PAGER_JOURNALMODE_WAL ? "wal" : "?error?" + , (int)p->tempFile, (int)p->memDb, (int)p->useJournal + , p->journalOff, p->journalHdr + , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize + ); + + return zRet; +} +#endif + +/* Forward references to the various page getters */ +static int getPageNormal(Pager*,Pgno,DbPage**,int); +static int getPageError(Pager*,Pgno,DbPage**,int); +#if SQLITE_MAX_MMAP_SIZE>0 +static int getPageMMap(Pager*,Pgno,DbPage**,int); +#endif + +/* +** Set the Pager.xGet method for the appropriate routine used to fetch +** content from the pager. +*/ +static void setGetterMethod(Pager *pPager){ + if( pPager->errCode ){ + pPager->xGet = getPageError; +#if SQLITE_MAX_MMAP_SIZE>0 + }else if( USEFETCH(pPager) ){ + pPager->xGet = getPageMMap; +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + }else{ + pPager->xGet = getPageNormal; + } +} + +/* +** Return true if it is necessary to write page *pPg into the sub-journal. +** A page needs to be written into the sub-journal if there exists one +** or more open savepoints for which: +** +** * The page-number is less than or equal to PagerSavepoint.nOrig, and +** * The bit corresponding to the page-number is not set in +** PagerSavepoint.pInSavepoint. +*/ +static int subjRequiresPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + PagerSavepoint *p; + Pgno pgno = pPg->pgno; + int i; + for(i=0; inSavepoint; i++){ + p = &pPager->aSavepoint[i]; + if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ + for(i=i+1; inSavepoint; i++){ + pPager->aSavepoint[i].bTruncateOnRelease = 0; + } + return 1; + } + } + return 0; +} + +#ifdef SQLITE_DEBUG +/* +** Return true if the page is already in the journal file. +*/ +static int pageInJournal(Pager *pPager, PgHdr *pPg){ + return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); +} +#endif + +/* +** Read a 32-bit integer from the given file descriptor. Store the integer +** that is read in *pRes. Return SQLITE_OK if everything worked, or an +** error code is something goes wrong. +** +** All values are stored on disk as big-endian. +*/ +static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ + unsigned char ac[4]; + int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); + if( rc==SQLITE_OK ){ + *pRes = sqlite3Get4byte(ac); + } + return rc; +} + +/* +** Write a 32-bit integer into a string buffer in big-endian byte order. +*/ +#define put32bits(A,B) sqlite3Put4byte((u8*)A,B) + + +/* +** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK +** on success or an error code is something goes wrong. +*/ +static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ + char ac[4]; + put32bits(ac, val); + return sqlite3OsWrite(fd, ac, 4, offset); +} + +/* +** Unlock the database file to level eLock, which must be either NO_LOCK +** or SHARED_LOCK. Regardless of whether or not the call to xUnlock() +** succeeds, set the Pager.eLock variable to match the (attempted) new lock. +** +** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is +** called, do not modify it. See the comment above the #define of +** UNKNOWN_LOCK for an explanation of this. +*/ +static int pagerUnlockDb(Pager *pPager, int eLock){ + int rc = SQLITE_OK; + + assert( !pPager->exclusiveMode || pPager->eLock==eLock ); + assert( eLock==NO_LOCK || eLock==SHARED_LOCK ); + assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 ); + if( isOpen(pPager->fd) ){ + assert( pPager->eLock>=eLock ); + rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock); + if( pPager->eLock!=UNKNOWN_LOCK ){ + pPager->eLock = (u8)eLock; + } + IOTRACE(("UNLOCK %p %d\n", pPager, eLock)) + } + pPager->changeCountDone = pPager->tempFile; /* ticket fb3b3024ea238d5c */ + return rc; +} + +/* +** Lock the database file to level eLock, which must be either SHARED_LOCK, +** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the +** Pager.eLock variable to the new locking state. +** +** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is +** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. +** See the comment above the #define of UNKNOWN_LOCK for an explanation +** of this. +*/ +static int pagerLockDb(Pager *pPager, int eLock){ + int rc = SQLITE_OK; + + assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK ); + if( pPager->eLockeLock==UNKNOWN_LOCK ){ + rc = pPager->noLock ? SQLITE_OK : sqlite3OsLock(pPager->fd, eLock); + if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){ + pPager->eLock = (u8)eLock; + IOTRACE(("LOCK %p %d\n", pPager, eLock)) + } + } + return rc; +} + +/* +** This function determines whether or not the atomic-write or +** atomic-batch-write optimizations can be used with this pager. The +** atomic-write optimization can be used if: +** +** (a) the value returned by OsDeviceCharacteristics() indicates that +** a database page may be written atomically, and +** (b) the value returned by OsSectorSize() is less than or equal +** to the page size. +** +** If it can be used, then the value returned is the size of the journal +** file when it contains rollback data for exactly one page. +** +** The atomic-batch-write optimization can be used if OsDeviceCharacteristics() +** returns a value with the SQLITE_IOCAP_BATCH_ATOMIC bit set. -1 is +** returned in this case. +** +** If neither optimization can be used, 0 is returned. +*/ +static int jrnlBufferSize(Pager *pPager){ + assert( !MEMDB ); + +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int dc; /* Device characteristics */ + + assert( isOpen(pPager->fd) ); + dc = sqlite3OsDeviceCharacteristics(pPager->fd); +#else + UNUSED_PARAMETER(pPager); +#endif + +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( pPager->dbSize>0 && (dc&SQLITE_IOCAP_BATCH_ATOMIC) ){ + return -1; + } +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + { + int nSector = pPager->sectorSize; + int szPage = pPager->pageSize; + + assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); + if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){ + return 0; + } + } + + return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); +#endif + + return 0; +} + +/* +** If SQLITE_CHECK_PAGES is defined then we do some sanity checking +** on the cache using a hash function. This is used for testing +** and debugging only. +*/ +#ifdef SQLITE_CHECK_PAGES +/* +** Return a 32-bit hash of the page data for pPage. +*/ +static u32 pager_datahash(int nByte, unsigned char *pData){ + u32 hash = 0; + int i; + for(i=0; ipPager->pageSize, (unsigned char *)pPage->pData); +} +static void pager_set_pagehash(PgHdr *pPage){ + pPage->pageHash = pager_pagehash(pPage); +} + +/* +** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES +** is defined, and NDEBUG is not defined, an assert() statement checks +** that the page is either dirty or still matches the calculated page-hash. +*/ +#define CHECK_PAGE(x) checkPage(x) +static void checkPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + assert( pPager->eState!=PAGER_ERROR ); + assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) ); +} + +#else +#define pager_datahash(X,Y) 0 +#define pager_pagehash(X) 0 +#define pager_set_pagehash(X) +#define CHECK_PAGE(x) +#endif /* SQLITE_CHECK_PAGES */ + +/* +** When this is called the journal file for pager pPager must be open. +** This function attempts to read a super-journal file name from the +** end of the file and, if successful, copies it into memory supplied +** by the caller. See comments above writeSuperJournal() for the format +** used to store a super-journal file name at the end of a journal file. +** +** zSuper must point to a buffer of at least nSuper bytes allocated by +** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is +** enough space to write the super-journal name). If the super-journal +** name in the journal is longer than nSuper bytes (including a +** nul-terminator), then this is handled as if no super-journal name +** were present in the journal. +** +** If a super-journal file name is present at the end of the journal +** file, then it is copied into the buffer pointed to by zSuper. A +** nul-terminator byte is appended to the buffer following the +** super-journal file name. +** +** If it is determined that no super-journal file name is present +** zSuper[0] is set to 0 and SQLITE_OK returned. +** +** If an error occurs while reading from the journal file, an SQLite +** error code is returned. +*/ +static int readSuperJournal(sqlite3_file *pJrnl, char *zSuper, u32 nSuper){ + int rc; /* Return code */ + u32 len; /* Length in bytes of super-journal name */ + i64 szJ; /* Total size in bytes of journal file pJrnl */ + u32 cksum; /* MJ checksum value read from journal */ + u32 u; /* Unsigned loop counter */ + unsigned char aMagic[8]; /* A buffer to hold the magic header */ + zSuper[0] = '\0'; + + if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ)) + || szJ<16 + || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len)) + || len>=nSuper + || len>szJ-16 + || len==0 + || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum)) + || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8)) + || memcmp(aMagic, aJournalMagic, 8) + || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zSuper, len, szJ-16-len)) + ){ + return rc; + } + + /* See if the checksum matches the super-journal name */ + for(u=0; ujournalOff, assuming a sector +** size of pPager->sectorSize bytes. +** +** i.e for a sector size of 512: +** +** Pager.journalOff Return value +** --------------------------------------- +** 0 0 +** 512 512 +** 100 512 +** 2000 2048 +** +*/ +static i64 journalHdrOffset(Pager *pPager){ + i64 offset = 0; + i64 c = pPager->journalOff; + if( c ){ + offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); + } + assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); + assert( offset>=c ); + assert( (offset-c)jfd) ); + assert( !sqlite3JournalIsInMemory(pPager->jfd) ); + if( pPager->journalOff ){ + const i64 iLimit = pPager->journalSizeLimit; /* Local cache of jsl */ + + IOTRACE(("JZEROHDR %p\n", pPager)) + if( doTruncate || iLimit==0 ){ + rc = sqlite3OsTruncate(pPager->jfd, 0); + }else{ + static const char zeroHdr[28] = {0}; + rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0); + } + if( rc==SQLITE_OK && !pPager->noSync ){ + rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags); + } + + /* At this point the transaction is committed but the write lock + ** is still held on the file. If there is a size limit configured for + ** the persistent journal and the journal file currently consumes more + ** space than that limit allows for, truncate it now. There is no need + ** to sync the file following this operation. + */ + if( rc==SQLITE_OK && iLimit>0 ){ + i64 sz; + rc = sqlite3OsFileSize(pPager->jfd, &sz); + if( rc==SQLITE_OK && sz>iLimit ){ + rc = sqlite3OsTruncate(pPager->jfd, iLimit); + } + } + } + return rc; +} + +/* +** The journal file must be open when this routine is called. A journal +** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the +** current location. +** +** The format for the journal header is as follows: +** - 8 bytes: Magic identifying journal format. +** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. +** - 4 bytes: Random number used for page hash. +** - 4 bytes: Initial database page count. +** - 4 bytes: Sector size used by the process that wrote this journal. +** - 4 bytes: Database page size. +** +** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space. +*/ +static int writeJournalHdr(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + char *zHeader = pPager->pTmpSpace; /* Temporary space used to build header */ + u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */ + u32 nWrite; /* Bytes of header sector written */ + int ii; /* Loop counter */ + + assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ + + if( nHeader>JOURNAL_HDR_SZ(pPager) ){ + nHeader = JOURNAL_HDR_SZ(pPager); + } + + /* If there are active savepoints and any of them were created + ** since the most recent journal header was written, update the + ** PagerSavepoint.iHdrOffset fields now. + */ + for(ii=0; iinSavepoint; ii++){ + if( pPager->aSavepoint[ii].iHdrOffset==0 ){ + pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff; + } + } + + pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager); + + /* + ** Write the nRec Field - the number of page records that follow this + ** journal header. Normally, zero is written to this value at this time. + ** After the records are added to the journal (and the journal synced, + ** if in full-sync mode), the zero is overwritten with the true number + ** of records (see syncJournal()). + ** + ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When + ** reading the journal this value tells SQLite to assume that the + ** rest of the journal file contains valid page records. This assumption + ** is dangerous, as if a failure occurred whilst writing to the journal + ** file it may contain some garbage data. There are two scenarios + ** where this risk can be ignored: + ** + ** * When the pager is in no-sync mode. Corruption can follow a + ** power failure in this case anyway. + ** + ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees + ** that garbage data is never appended to the journal file. + */ + assert( isOpen(pPager->fd) || pPager->noSync ); + if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY) + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) + ){ + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); + put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); + }else{ + memset(zHeader, 0, sizeof(aJournalMagic)+4); + } + + /* The random check-hash initializer */ + sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); + put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); + /* The initial database size */ + put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize); + /* The assumed sector size for this process */ + put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); + + /* The page size */ + put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize); + + /* Initializing the tail of the buffer is not necessary. Everything + ** works find if the following memset() is omitted. But initializing + ** the memory prevents valgrind from complaining, so we are willing to + ** take the performance hit. + */ + memset(&zHeader[sizeof(aJournalMagic)+20], 0, + nHeader-(sizeof(aJournalMagic)+20)); + + /* In theory, it is only necessary to write the 28 bytes that the + ** journal header consumes to the journal file here. Then increment the + ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next + ** record is written to the following sector (leaving a gap in the file + ** that will be implicitly filled in by the OS). + ** + ** However it has been discovered that on some systems this pattern can + ** be significantly slower than contiguously writing data to the file, + ** even if that means explicitly writing data to the block of + ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what + ** is done. + ** + ** The loop is required here in case the sector-size is larger than the + ** database page size. Since the zHeader buffer is only Pager.pageSize + ** bytes in size, more than one call to sqlite3OsWrite() may be required + ** to populate the entire journal header sector. + */ + for(nWrite=0; rc==SQLITE_OK&&nWritejournalHdr, nHeader)) + rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff); + assert( pPager->journalHdr <= pPager->journalOff ); + pPager->journalOff += nHeader; + } + + return rc; +} + +/* +** The journal file must be open when this is called. A journal header file +** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal +** file. The current location in the journal file is given by +** pPager->journalOff. See comments above function writeJournalHdr() for +** a description of the journal header format. +** +** If the header is read successfully, *pNRec is set to the number of +** page records following this header and *pDbSize is set to the size of the +** database before the transaction began, in pages. Also, pPager->cksumInit +** is set to the value read from the journal header. SQLITE_OK is returned +** in this case. +** +** If the journal header file appears to be corrupted, SQLITE_DONE is +** returned and *pNRec and *PDbSize are undefined. If JOURNAL_HDR_SZ bytes +** cannot be read from the journal file an error code is returned. +*/ +static int readJournalHdr( + Pager *pPager, /* Pager object */ + int isHot, + i64 journalSize, /* Size of the open journal file in bytes */ + u32 *pNRec, /* OUT: Value read from the nRec field */ + u32 *pDbSize /* OUT: Value of original database size field */ +){ + int rc; /* Return code */ + unsigned char aMagic[8]; /* A buffer to hold the magic header */ + i64 iHdrOff; /* Offset of journal header being read */ + + assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ + + /* Advance Pager.journalOff to the start of the next sector. If the + ** journal file is too small for there to be a header stored at this + ** point, return SQLITE_DONE. + */ + pPager->journalOff = journalHdrOffset(pPager); + if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ + return SQLITE_DONE; + } + iHdrOff = pPager->journalOff; + + /* Read in the first 8 bytes of the journal header. If they do not match + ** the magic string found at the start of each journal header, return + ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise, + ** proceed. + */ + if( isHot || iHdrOff!=pPager->journalHdr ){ + rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff); + if( rc ){ + return rc; + } + if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ + return SQLITE_DONE; + } + } + + /* Read the first three 32-bit fields of the journal header: The nRec + ** field, the checksum-initializer and the database size at the start + ** of the transaction. Return an error code if anything goes wrong. + */ + if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec)) + || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit)) + || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize)) + ){ + return rc; + } + + if( pPager->journalOff==0 ){ + u32 iPageSize; /* Page-size field of journal header */ + u32 iSectorSize; /* Sector-size field of journal header */ + + /* Read the page-size and sector-size journal header fields. */ + if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize)) + || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize)) + ){ + return rc; + } + + /* Versions of SQLite prior to 3.5.8 set the page-size field of the + ** journal header to zero. In this case, assume that the Pager.pageSize + ** variable is already set to the correct page size. + */ + if( iPageSize==0 ){ + iPageSize = pPager->pageSize; + } + + /* Check that the values read from the page-size and sector-size fields + ** are within range. To be 'in range', both values need to be a power + ** of two greater than or equal to 512 or 32, and not greater than their + ** respective compile time maximum limits. + */ + if( iPageSize<512 || iSectorSize<32 + || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE + || ((iPageSize-1)&iPageSize)!=0 || ((iSectorSize-1)&iSectorSize)!=0 + ){ + /* If the either the page-size or sector-size in the journal-header is + ** invalid, then the process that wrote the journal-header must have + ** crashed before the header was synced. In this case stop reading + ** the journal file here. + */ + return SQLITE_DONE; + } + + /* Update the page-size to match the value read from the journal. + ** Use a testcase() macro to make sure that malloc failure within + ** PagerSetPagesize() is tested. + */ + rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1); + testcase( rc!=SQLITE_OK ); + + /* Update the assumed sector-size to match the value used by + ** the process that created this journal. If this journal was + ** created by a process other than this one, then this routine + ** is being called from within pager_playback(). The local value + ** of Pager.sectorSize is restored at the end of that routine. + */ + pPager->sectorSize = iSectorSize; + } + + pPager->journalOff += JOURNAL_HDR_SZ(pPager); + return rc; +} + + +/* +** Write the supplied super-journal name into the journal file for pager +** pPager at the current location. The super-journal name must be the last +** thing written to a journal file. If the pager is in full-sync mode, the +** journal file descriptor is advanced to the next sector boundary before +** anything is written. The format is: +** +** + 4 bytes: PAGER_MJ_PGNO. +** + N bytes: super-journal filename in utf-8. +** + 4 bytes: N (length of super-journal name in bytes, no nul-terminator). +** + 4 bytes: super-journal name checksum. +** + 8 bytes: aJournalMagic[]. +** +** The super-journal page checksum is the sum of the bytes in thesuper-journal +** name, where each byte is interpreted as a signed 8-bit integer. +** +** If zSuper is a NULL pointer (occurs for a single database transaction), +** this call is a no-op. +*/ +static int writeSuperJournal(Pager *pPager, const char *zSuper){ + int rc; /* Return code */ + int nSuper; /* Length of string zSuper */ + i64 iHdrOff; /* Offset of header in journal file */ + i64 jrnlSize; /* Size of journal file on disk */ + u32 cksum = 0; /* Checksum of string zSuper */ + + assert( pPager->setSuper==0 ); + assert( !pagerUseWal(pPager) ); + + if( !zSuper + || pPager->journalMode==PAGER_JOURNALMODE_MEMORY + || !isOpen(pPager->jfd) + ){ + return SQLITE_OK; + } + pPager->setSuper = 1; + assert( pPager->journalHdr <= pPager->journalOff ); + + /* Calculate the length in bytes and the checksum of zSuper */ + for(nSuper=0; zSuper[nSuper]; nSuper++){ + cksum += zSuper[nSuper]; + } + + /* If in full-sync mode, advance to the next disk sector before writing + ** the super-journal name. This is in case the previous page written to + ** the journal has already been synced. + */ + if( pPager->fullSync ){ + pPager->journalOff = journalHdrOffset(pPager); + } + iHdrOff = pPager->journalOff; + + /* Write the super-journal data to the end of the journal file. If + ** an error occurs, return the error code to the caller. + */ + if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager)))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, zSuper, nSuper, iHdrOff+4))) + || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nSuper, nSuper))) + || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nSuper+4, cksum))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, + iHdrOff+4+nSuper+8))) + ){ + return rc; + } + pPager->journalOff += (nSuper+20); + + /* If the pager is in peristent-journal mode, then the physical + ** journal-file may extend past the end of the super-journal name + ** and 8 bytes of magic data just written to the file. This is + ** dangerous because the code to rollback a hot-journal file + ** will not be able to find the super-journal name to determine + ** whether or not the journal is hot. + ** + ** Easiest thing to do in this scenario is to truncate the journal + ** file to the required size. + */ + if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize)) + && jrnlSize>pPager->journalOff + ){ + rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff); + } + return rc; +} + +/* +** Discard the entire contents of the in-memory page-cache. +*/ +static void pager_reset(Pager *pPager){ + pPager->iDataVersion++; + sqlite3BackupRestart(pPager->pBackup); + sqlite3PcacheClear(pPager->pPCache); +} + +/* +** Return the pPager->iDataVersion value +*/ +SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager *pPager){ + return pPager->iDataVersion; +} + +/* +** Free all structures in the Pager.aSavepoint[] array and set both +** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal +** if it is open and the pager is not in exclusive mode. +*/ +static void releaseAllSavepoints(Pager *pPager){ + int ii; /* Iterator for looping through Pager.aSavepoint */ + for(ii=0; iinSavepoint; ii++){ + sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); + } + if( !pPager->exclusiveMode || sqlite3JournalIsInMemory(pPager->sjfd) ){ + sqlite3OsClose(pPager->sjfd); + } + sqlite3_free(pPager->aSavepoint); + pPager->aSavepoint = 0; + pPager->nSavepoint = 0; + pPager->nSubRec = 0; +} + +/* +** Set the bit number pgno in the PagerSavepoint.pInSavepoint +** bitvecs of all open savepoints. Return SQLITE_OK if successful +** or SQLITE_NOMEM if a malloc failure occurs. +*/ +static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ + int ii; /* Loop counter */ + int rc = SQLITE_OK; /* Result code */ + + for(ii=0; iinSavepoint; ii++){ + PagerSavepoint *p = &pPager->aSavepoint[ii]; + if( pgno<=p->nOrig ){ + rc |= sqlite3BitvecSet(p->pInSavepoint, pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + } + } + return rc; +} + +/* +** This function is a no-op if the pager is in exclusive mode and not +** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN +** state. +** +** If the pager is not in exclusive-access mode, the database file is +** completely unlocked. If the file is unlocked and the file-system does +** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is +** closed (if it is open). +** +** If the pager is in ERROR state when this function is called, the +** contents of the pager cache are discarded before switching back to +** the OPEN state. Regardless of whether the pager is in exclusive-mode +** or not, any journal file left in the file-system will be treated +** as a hot-journal and rolled back the next time a read-transaction +** is opened (by this or by any other connection). +*/ +static void pager_unlock(Pager *pPager){ + + assert( pPager->eState==PAGER_READER + || pPager->eState==PAGER_OPEN + || pPager->eState==PAGER_ERROR + ); + + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + releaseAllSavepoints(pPager); + + if( pagerUseWal(pPager) ){ + assert( !isOpen(pPager->jfd) ); + sqlite3WalEndReadTransaction(pPager->pWal); + pPager->eState = PAGER_OPEN; + }else if( !pPager->exclusiveMode ){ + int rc; /* Error code returned by pagerUnlockDb() */ + int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0; + + /* If the operating system support deletion of open files, then + ** close the journal file when dropping the database lock. Otherwise + ** another connection with journal_mode=delete might delete the file + ** out from under us. + */ + assert( (PAGER_JOURNALMODE_MEMORY & 5)!=1 ); + assert( (PAGER_JOURNALMODE_OFF & 5)!=1 ); + assert( (PAGER_JOURNALMODE_WAL & 5)!=1 ); + assert( (PAGER_JOURNALMODE_DELETE & 5)!=1 ); + assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); + assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); + if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN) + || 1!=(pPager->journalMode & 5) + ){ + sqlite3OsClose(pPager->jfd); + } + + /* If the pager is in the ERROR state and the call to unlock the database + ** file fails, set the current lock to UNKNOWN_LOCK. See the comment + ** above the #define for UNKNOWN_LOCK for an explanation of why this + ** is necessary. + */ + rc = pagerUnlockDb(pPager, NO_LOCK); + if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){ + pPager->eLock = UNKNOWN_LOCK; + } + + /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here + ** without clearing the error code. This is intentional - the error + ** code is cleared and the cache reset in the block below. + */ + assert( pPager->errCode || pPager->eState!=PAGER_ERROR ); + pPager->eState = PAGER_OPEN; + } + + /* If Pager.errCode is set, the contents of the pager cache cannot be + ** trusted. Now that there are no outstanding references to the pager, + ** it can safely move back to PAGER_OPEN state. This happens in both + ** normal and exclusive-locking mode. + */ + assert( pPager->errCode==SQLITE_OK || !MEMDB ); + if( pPager->errCode ){ + if( pPager->tempFile==0 ){ + pager_reset(pPager); + pPager->changeCountDone = 0; + pPager->eState = PAGER_OPEN; + }else{ + pPager->eState = (isOpen(pPager->jfd) ? PAGER_OPEN : PAGER_READER); + } + if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); + pPager->errCode = SQLITE_OK; + setGetterMethod(pPager); + } + + pPager->journalOff = 0; + pPager->journalHdr = 0; + pPager->setSuper = 0; +} + +/* +** This function is called whenever an IOERR or FULL error that requires +** the pager to transition into the ERROR state may ahve occurred. +** The first argument is a pointer to the pager structure, the second +** the error-code about to be returned by a pager API function. The +** value returned is a copy of the second argument to this function. +** +** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the +** IOERR sub-codes, the pager enters the ERROR state and the error code +** is stored in Pager.errCode. While the pager remains in the ERROR state, +** all major API calls on the Pager will immediately return Pager.errCode. +** +** The ERROR state indicates that the contents of the pager-cache +** cannot be trusted. This state can be cleared by completely discarding +** the contents of the pager-cache. If a transaction was active when +** the persistent error occurred, then the rollback journal may need +** to be replayed to restore the contents of the database file (as if +** it were a hot-journal). +*/ +static int pager_error(Pager *pPager, int rc){ + int rc2 = rc & 0xff; + assert( rc==SQLITE_OK || !MEMDB ); + assert( + pPager->errCode==SQLITE_FULL || + pPager->errCode==SQLITE_OK || + (pPager->errCode & 0xff)==SQLITE_IOERR + ); + if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){ + pPager->errCode = rc; + pPager->eState = PAGER_ERROR; + setGetterMethod(pPager); + } + return rc; +} + +static int pager_truncate(Pager *pPager, Pgno nPage); + +/* +** The write transaction open on pPager is being committed (bCommit==1) +** or rolled back (bCommit==0). +** +** Return TRUE if and only if all dirty pages should be flushed to disk. +** +** Rules: +** +** * For non-TEMP databases, always sync to disk. This is necessary +** for transactions to be durable. +** +** * Sync TEMP database only on a COMMIT (not a ROLLBACK) when the backing +** file has been created already (via a spill on pagerStress()) and +** when the number of dirty pages in memory exceeds 25% of the total +** cache size. +*/ +static int pagerFlushOnCommit(Pager *pPager, int bCommit){ + if( pPager->tempFile==0 ) return 1; + if( !bCommit ) return 0; + if( !isOpen(pPager->fd) ) return 0; + return (sqlite3PCachePercentDirty(pPager->pPCache)>=25); +} + +/* +** This routine ends a transaction. A transaction is usually ended by +** either a COMMIT or a ROLLBACK operation. This routine may be called +** after rollback of a hot-journal, or if an error occurs while opening +** the journal file or writing the very first journal-header of a +** database transaction. +** +** This routine is never called in PAGER_ERROR state. If it is called +** in PAGER_NONE or PAGER_SHARED state and the lock held is less +** exclusive than a RESERVED lock, it is a no-op. +** +** Otherwise, any active savepoints are released. +** +** If the journal file is open, then it is "finalized". Once a journal +** file has been finalized it is not possible to use it to roll back a +** transaction. Nor will it be considered to be a hot-journal by this +** or any other database connection. Exactly how a journal is finalized +** depends on whether or not the pager is running in exclusive mode and +** the current journal-mode (Pager.journalMode value), as follows: +** +** journalMode==MEMORY +** Journal file descriptor is simply closed. This destroys an +** in-memory journal. +** +** journalMode==TRUNCATE +** Journal file is truncated to zero bytes in size. +** +** journalMode==PERSIST +** The first 28 bytes of the journal file are zeroed. This invalidates +** the first journal header in the file, and hence the entire journal +** file. An invalid journal file cannot be rolled back. +** +** journalMode==DELETE +** The journal file is closed and deleted using sqlite3OsDelete(). +** +** If the pager is running in exclusive mode, this method of finalizing +** the journal file is never used. Instead, if the journalMode is +** DELETE and the pager is in exclusive mode, the method described under +** journalMode==PERSIST is used instead. +** +** After the journal is finalized, the pager moves to PAGER_READER state. +** If running in non-exclusive rollback mode, the lock on the file is +** downgraded to a SHARED_LOCK. +** +** SQLITE_OK is returned if no error occurs. If an error occurs during +** any of the IO operations to finalize the journal file or unlock the +** database then the IO error code is returned to the user. If the +** operation to finalize the journal file fails, then the code still +** tries to unlock the database file if not in exclusive mode. If the +** unlock operation fails as well, then the first error code related +** to the first error encountered (the journal finalization one) is +** returned. +*/ +static int pager_end_transaction(Pager *pPager, int hasSuper, int bCommit){ + int rc = SQLITE_OK; /* Error code from journal finalization operation */ + int rc2 = SQLITE_OK; /* Error code from db file unlock operation */ + + /* Do nothing if the pager does not have an open write transaction + ** or at least a RESERVED lock. This function may be called when there + ** is no write-transaction active but a RESERVED or greater lock is + ** held under two circumstances: + ** + ** 1. After a successful hot-journal rollback, it is called with + ** eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK. + ** + ** 2. If a connection with locking_mode=exclusive holding an EXCLUSIVE + ** lock switches back to locking_mode=normal and then executes a + ** read-transaction, this function is called with eState==PAGER_READER + ** and eLock==EXCLUSIVE_LOCK when the read-transaction is closed. + */ + assert( assert_pager_state(pPager) ); + assert( pPager->eState!=PAGER_ERROR ); + if( pPager->eStateeLockjfd) || pPager->pInJournal==0 + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_BATCH_ATOMIC) + ); + if( isOpen(pPager->jfd) ){ + assert( !pagerUseWal(pPager) ); + + /* Finalize the journal file. */ + if( sqlite3JournalIsInMemory(pPager->jfd) ){ + /* assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); */ + sqlite3OsClose(pPager->jfd); + }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){ + if( pPager->journalOff==0 ){ + rc = SQLITE_OK; + }else{ + rc = sqlite3OsTruncate(pPager->jfd, 0); + if( rc==SQLITE_OK && pPager->fullSync ){ + /* Make sure the new file size is written into the inode right away. + ** Otherwise the journal might resurrect following a power loss and + ** cause the last transaction to roll back. See + ** https://bugzilla.mozilla.org/show_bug.cgi?id=1072773 + */ + rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); + } + } + pPager->journalOff = 0; + }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST + || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL) + ){ + rc = zeroJournalHdr(pPager, hasSuper||pPager->tempFile); + pPager->journalOff = 0; + }else{ + /* This branch may be executed with Pager.journalMode==MEMORY if + ** a hot-journal was just rolled back. In this case the journal + ** file should be closed and deleted. If this connection writes to + ** the database file, it will do so using an in-memory journal. + */ + int bDelete = !pPager->tempFile; + assert( sqlite3JournalIsInMemory(pPager->jfd)==0 ); + assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE + || pPager->journalMode==PAGER_JOURNALMODE_MEMORY + || pPager->journalMode==PAGER_JOURNALMODE_WAL + ); + sqlite3OsClose(pPager->jfd); + if( bDelete ){ + rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, pPager->extraSync); + } + } + } + +#ifdef SQLITE_CHECK_PAGES + sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash); + if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){ + PgHdr *p = sqlite3PagerLookup(pPager, 1); + if( p ){ + p->pageHash = 0; + sqlite3PagerUnrefNotNull(p); + } + } +#endif + + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + pPager->nRec = 0; + if( rc==SQLITE_OK ){ + if( MEMDB || pagerFlushOnCommit(pPager, bCommit) ){ + sqlite3PcacheCleanAll(pPager->pPCache); + }else{ + sqlite3PcacheClearWritable(pPager->pPCache); + } + sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize); + } + + if( pagerUseWal(pPager) ){ + /* Drop the WAL write-lock, if any. Also, if the connection was in + ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE + ** lock held on the database file. + */ + rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); + assert( rc2==SQLITE_OK ); + }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){ + /* This branch is taken when committing a transaction in rollback-journal + ** mode if the database file on disk is larger than the database image. + ** At this point the journal has been finalized and the transaction + ** successfully committed, but the EXCLUSIVE lock is still held on the + ** file. So it is safe to truncate the database file to its minimum + ** required size. */ + assert( pPager->eLock==EXCLUSIVE_LOCK ); + rc = pager_truncate(pPager, pPager->dbSize); + } + + if( rc==SQLITE_OK && bCommit ){ + rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + } + + if( !pPager->exclusiveMode + && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) + ){ + rc2 = pagerUnlockDb(pPager, SHARED_LOCK); + } + pPager->eState = PAGER_READER; + pPager->setSuper = 0; + + return (rc==SQLITE_OK?rc2:rc); +} + +/* +** Execute a rollback if a transaction is active and unlock the +** database file. +** +** If the pager has already entered the ERROR state, do not attempt +** the rollback at this time. Instead, pager_unlock() is called. The +** call to pager_unlock() will discard all in-memory pages, unlock +** the database file and move the pager back to OPEN state. If this +** means that there is a hot-journal left in the file-system, the next +** connection to obtain a shared lock on the pager (which may be this one) +** will roll it back. +** +** If the pager has not already entered the ERROR state, but an IO or +** malloc error occurs during a rollback, then this will itself cause +** the pager to enter the ERROR state. Which will be cleared by the +** call to pager_unlock(), as described above. +*/ +static void pagerUnlockAndRollback(Pager *pPager){ + if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){ + assert( assert_pager_state(pPager) ); + if( pPager->eState>=PAGER_WRITER_LOCKED ){ + sqlite3BeginBenignMalloc(); + sqlite3PagerRollback(pPager); + sqlite3EndBenignMalloc(); + }else if( !pPager->exclusiveMode ){ + assert( pPager->eState==PAGER_READER ); + pager_end_transaction(pPager, 0, 0); + } + } + pager_unlock(pPager); +} + +/* +** Parameter aData must point to a buffer of pPager->pageSize bytes +** of data. Compute and return a checksum based ont the contents of the +** page of data and the current value of pPager->cksumInit. +** +** This is not a real checksum. It is really just the sum of the +** random initial value (pPager->cksumInit) and every 200th byte +** of the page data, starting with byte offset (pPager->pageSize%200). +** Each byte is interpreted as an 8-bit unsigned integer. +** +** Changing the formula used to compute this checksum results in an +** incompatible journal file format. +** +** If journal corruption occurs due to a power failure, the most likely +** scenario is that one end or the other of the record will be changed. +** It is much less likely that the two ends of the journal record will be +** correct and the middle be corrupt. Thus, this "checksum" scheme, +** though fast and simple, catches the mostly likely kind of corruption. +*/ +static u32 pager_cksum(Pager *pPager, const u8 *aData){ + u32 cksum = pPager->cksumInit; /* Checksum value to return */ + int i = pPager->pageSize-200; /* Loop counter */ + while( i>0 ){ + cksum += aData[i]; + i -= 200; + } + return cksum; +} + +/* +** Read a single page from either the journal file (if isMainJrnl==1) or +** from the sub-journal (if isMainJrnl==0) and playback that page. +** The page begins at offset *pOffset into the file. The *pOffset +** value is increased to the start of the next page in the journal. +** +** The main rollback journal uses checksums - the statement journal does +** not. +** +** If the page number of the page record read from the (sub-)journal file +** is greater than the current value of Pager.dbSize, then playback is +** skipped and SQLITE_OK is returned. +** +** If pDone is not NULL, then it is a record of pages that have already +** been played back. If the page at *pOffset has already been played back +** (if the corresponding pDone bit is set) then skip the playback. +** Make sure the pDone bit corresponding to the *pOffset page is set +** prior to returning. +** +** If the page record is successfully read from the (sub-)journal file +** and played back, then SQLITE_OK is returned. If an IO error occurs +** while reading the record from the (sub-)journal file or while writing +** to the database file, then the IO error code is returned. If data +** is successfully read from the (sub-)journal file but appears to be +** corrupted, SQLITE_DONE is returned. Data is considered corrupted in +** two circumstances: +** +** * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or +** * If the record is being rolled back from the main journal file +** and the checksum field does not match the record content. +** +** Neither of these two scenarios are possible during a savepoint rollback. +** +** If this is a savepoint rollback, then memory may have to be dynamically +** allocated by this function. If this is the case and an allocation fails, +** SQLITE_NOMEM is returned. +*/ +static int pager_playback_one_page( + Pager *pPager, /* The pager being played back */ + i64 *pOffset, /* Offset of record to playback */ + Bitvec *pDone, /* Bitvec of pages already played back */ + int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */ + int isSavepnt /* True for a savepoint rollback */ +){ + int rc; + PgHdr *pPg; /* An existing page in the cache */ + Pgno pgno; /* The page number of a page in journal */ + u32 cksum; /* Checksum used for sanity checking */ + char *aData; /* Temporary storage for the page */ + sqlite3_file *jfd; /* The file descriptor for the journal file */ + int isSynced; /* True if journal page is synced */ + + assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */ + assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */ + assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */ + assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */ + + aData = pPager->pTmpSpace; + assert( aData ); /* Temp storage must have already been allocated */ + assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) ); + + /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction + ** or savepoint rollback done at the request of the caller) or this is + ** a hot-journal rollback. If it is a hot-journal rollback, the pager + ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback + ** only reads from the main journal, not the sub-journal. + */ + assert( pPager->eState>=PAGER_WRITER_CACHEMOD + || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK) + ); + assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl ); + + /* Read the page number and page data from the journal or sub-journal + ** file. Return an error code to the caller if an IO error occurs. + */ + jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; + rc = read32bits(jfd, *pOffset, &pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4); + if( rc!=SQLITE_OK ) return rc; + *pOffset += pPager->pageSize + 4 + isMainJrnl*4; + + /* Sanity checking on the page. This is more important that I originally + ** thought. If a power failure occurs while the journal is being written, + ** it could cause invalid data to be written into the journal. We need to + ** detect this invalid data (with high probability) and ignore it. + */ + if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ + assert( !isSavepnt ); + return SQLITE_DONE; + } + if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){ + return SQLITE_OK; + } + if( isMainJrnl ){ + rc = read32bits(jfd, (*pOffset)-4, &cksum); + if( rc ) return rc; + if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){ + return SQLITE_DONE; + } + } + + /* If this page has already been played back before during the current + ** rollback, then don't bother to play it back again. + */ + if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ + return rc; + } + + /* When playing back page 1, restore the nReserve setting + */ + if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){ + pPager->nReserve = ((u8*)aData)[20]; + } + + /* If the pager is in CACHEMOD state, then there must be a copy of this + ** page in the pager cache. In this case just update the pager cache, + ** not the database file. The page is left marked dirty in this case. + ** + ** An exception to the above rule: If the database is in no-sync mode + ** and a page is moved during an incremental vacuum then the page may + ** not be in the pager cache. Later: if a malloc() or IO error occurs + ** during a Movepage() call, then the page may not be in the cache + ** either. So the condition described in the above paragraph is not + ** assert()able. + ** + ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the + ** pager cache if it exists and the main file. The page is then marked + ** not dirty. Since this code is only executed in PAGER_OPEN state for + ** a hot-journal rollback, it is guaranteed that the page-cache is empty + ** if the pager is in OPEN state. + ** + ** Ticket #1171: The statement journal might contain page content that is + ** different from the page content at the start of the transaction. + ** This occurs when a page is changed prior to the start of a statement + ** then changed again within the statement. When rolling back such a + ** statement we must not write to the original database unless we know + ** for certain that original page contents are synced into the main rollback + ** journal. Otherwise, a power loss might leave modified data in the + ** database file without an entry in the rollback journal that can + ** restore the database to its original form. Two conditions must be + ** met before writing to the database files. (1) the database must be + ** locked. (2) we know that the original page content is fully synced + ** in the main journal either because the page is not in cache or else + ** the page is marked as needSync==0. + ** + ** 2008-04-14: When attempting to vacuum a corrupt database file, it + ** is possible to fail a statement on a database that does not yet exist. + ** Do not attempt to write if database file has never been opened. + */ + if( pagerUseWal(pPager) ){ + pPg = 0; + }else{ + pPg = sqlite3PagerLookup(pPager, pgno); + } + assert( pPg || !MEMDB ); + assert( pPager->eState!=PAGER_OPEN || pPg==0 || pPager->tempFile ); + PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", + PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData), + (isMainJrnl?"main-journal":"sub-journal") + )); + if( isMainJrnl ){ + isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr); + }else{ + isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC)); + } + if( isOpen(pPager->fd) + && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) + && isSynced + ){ + i64 ofst = (pgno-1)*(i64)pPager->pageSize; + testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); + assert( !pagerUseWal(pPager) ); + + /* Write the data read from the journal back into the database file. + ** This is usually safe even for an encrypted database - as the data + ** was encrypted before it was written to the journal file. The exception + ** is if the data was just read from an in-memory sub-journal. In that + ** case it must be encrypted here before it is copied into the database + ** file. */ + rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst); + + if( pgno>pPager->dbFileSize ){ + pPager->dbFileSize = pgno; + } + if( pPager->pBackup ){ + sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData); + } + }else if( !isMainJrnl && pPg==0 ){ + /* If this is a rollback of a savepoint and data was not written to + ** the database and the page is not in-memory, there is a potential + ** problem. When the page is next fetched by the b-tree layer, it + ** will be read from the database file, which may or may not be + ** current. + ** + ** There are a couple of different ways this can happen. All are quite + ** obscure. When running in synchronous mode, this can only happen + ** if the page is on the free-list at the start of the transaction, then + ** populated, then moved using sqlite3PagerMovepage(). + ** + ** The solution is to add an in-memory page to the cache containing + ** the data just read from the sub-journal. Mark the page as dirty + ** and if the pager requires a journal-sync, then mark the page as + ** requiring a journal-sync before it is written. + */ + assert( isSavepnt ); + assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)==0 ); + pPager->doNotSpill |= SPILLFLAG_ROLLBACK; + rc = sqlite3PagerGet(pPager, pgno, &pPg, 1); + assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)!=0 ); + pPager->doNotSpill &= ~SPILLFLAG_ROLLBACK; + if( rc!=SQLITE_OK ) return rc; + sqlite3PcacheMakeDirty(pPg); + } + if( pPg ){ + /* No page should ever be explicitly rolled back that is in use, except + ** for page 1 which is held in use in order to keep the lock on the + ** database active. However such a page may be rolled back as a result + ** of an internal error resulting in an automatic call to + ** sqlite3PagerRollback(). + */ + void *pData; + pData = pPg->pData; + memcpy(pData, (u8*)aData, pPager->pageSize); + pPager->xReiniter(pPg); + /* It used to be that sqlite3PcacheMakeClean(pPg) was called here. But + ** that call was dangerous and had no detectable benefit since the cache + ** is normally cleaned by sqlite3PcacheCleanAll() after rollback and so + ** has been removed. */ + pager_set_pagehash(pPg); + + /* If this was page 1, then restore the value of Pager.dbFileVers. + ** Do this before any decoding. */ + if( pgno==1 ){ + memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); + } + sqlite3PcacheRelease(pPg); + } + return rc; +} + +/* +** Parameter zSuper is the name of a super-journal file. A single journal +** file that referred to the super-journal file has just been rolled back. +** This routine checks if it is possible to delete the super-journal file, +** and does so if it is. +** +** Argument zSuper may point to Pager.pTmpSpace. So that buffer is not +** available for use within this function. +** +** When a super-journal file is created, it is populated with the names +** of all of its child journals, one after another, formatted as utf-8 +** encoded text. The end of each child journal file is marked with a +** nul-terminator byte (0x00). i.e. the entire contents of a super-journal +** file for a transaction involving two databases might be: +** +** "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00" +** +** A super-journal file may only be deleted once all of its child +** journals have been rolled back. +** +** This function reads the contents of the super-journal file into +** memory and loops through each of the child journal names. For +** each child journal, it checks if: +** +** * if the child journal exists, and if so +** * if the child journal contains a reference to super-journal +** file zSuper +** +** If a child journal can be found that matches both of the criteria +** above, this function returns without doing anything. Otherwise, if +** no such child journal can be found, file zSuper is deleted from +** the file-system using sqlite3OsDelete(). +** +** If an IO error within this function, an error code is returned. This +** function allocates memory by calling sqlite3Malloc(). If an allocation +** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors +** occur, SQLITE_OK is returned. +** +** TODO: This function allocates a single block of memory to load +** the entire contents of the super-journal file. This could be +** a couple of kilobytes or so - potentially larger than the page +** size. +*/ +static int pager_delsuper(Pager *pPager, const char *zSuper){ + sqlite3_vfs *pVfs = pPager->pVfs; + int rc; /* Return code */ + sqlite3_file *pSuper; /* Malloc'd super-journal file descriptor */ + sqlite3_file *pJournal; /* Malloc'd child-journal file descriptor */ + char *zSuperJournal = 0; /* Contents of super-journal file */ + i64 nSuperJournal; /* Size of super-journal file */ + char *zJournal; /* Pointer to one journal within MJ file */ + char *zSuperPtr; /* Space to hold super-journal filename */ + char *zFree = 0; /* Free this buffer */ + int nSuperPtr; /* Amount of space allocated to zSuperPtr[] */ + + /* Allocate space for both the pJournal and pSuper file descriptors. + ** If successful, open the super-journal file for reading. + */ + pSuper = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2); + if( !pSuper ){ + rc = SQLITE_NOMEM_BKPT; + pJournal = 0; + }else{ + const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_SUPER_JOURNAL); + rc = sqlite3OsOpen(pVfs, zSuper, pSuper, flags, 0); + pJournal = (sqlite3_file *)(((u8 *)pSuper) + pVfs->szOsFile); + } + if( rc!=SQLITE_OK ) goto delsuper_out; + + /* Load the entire super-journal file into space obtained from + ** sqlite3_malloc() and pointed to by zSuperJournal. Also obtain + ** sufficient space (in zSuperPtr) to hold the names of super-journal + ** files extracted from regular rollback-journals. + */ + rc = sqlite3OsFileSize(pSuper, &nSuperJournal); + if( rc!=SQLITE_OK ) goto delsuper_out; + nSuperPtr = pVfs->mxPathname+1; + zFree = sqlite3Malloc(4 + nSuperJournal + nSuperPtr + 2); + if( !zFree ){ + rc = SQLITE_NOMEM_BKPT; + goto delsuper_out; + } + zFree[0] = zFree[1] = zFree[2] = zFree[3] = 0; + zSuperJournal = &zFree[4]; + zSuperPtr = &zSuperJournal[nSuperJournal+2]; + rc = sqlite3OsRead(pSuper, zSuperJournal, (int)nSuperJournal, 0); + if( rc!=SQLITE_OK ) goto delsuper_out; + zSuperJournal[nSuperJournal] = 0; + zSuperJournal[nSuperJournal+1] = 0; + + zJournal = zSuperJournal; + while( (zJournal-zSuperJournal)pageSize bytes). +** If the file on disk is currently larger than nPage pages, then use the VFS +** xTruncate() method to truncate it. +** +** Or, it might be the case that the file on disk is smaller than +** nPage pages. Some operating system implementations can get confused if +** you try to truncate a file to some size that is larger than it +** currently is, so detect this case and write a single zero byte to +** the end of the new file instead. +** +** If successful, return SQLITE_OK. If an IO error occurs while modifying +** the database file, return the error code to the caller. +*/ +static int pager_truncate(Pager *pPager, Pgno nPage){ + int rc = SQLITE_OK; + assert( pPager->eState!=PAGER_ERROR ); + assert( pPager->eState!=PAGER_READER ); + + if( isOpen(pPager->fd) + && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) + ){ + i64 currentSize, newSize; + int szPage = pPager->pageSize; + assert( pPager->eLock==EXCLUSIVE_LOCK ); + /* TODO: Is it safe to use Pager.dbFileSize here? */ + rc = sqlite3OsFileSize(pPager->fd, ¤tSize); + newSize = szPage*(i64)nPage; + if( rc==SQLITE_OK && currentSize!=newSize ){ + if( currentSize>newSize ){ + rc = sqlite3OsTruncate(pPager->fd, newSize); + }else if( (currentSize+szPage)<=newSize ){ + char *pTmp = pPager->pTmpSpace; + memset(pTmp, 0, szPage); + testcase( (newSize-szPage) == currentSize ); + testcase( (newSize-szPage) > currentSize ); + rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage); + } + if( rc==SQLITE_OK ){ + pPager->dbFileSize = nPage; + } + } + } + return rc; +} + +/* +** Return a sanitized version of the sector-size of OS file pFile. The +** return value is guaranteed to lie between 32 and MAX_SECTOR_SIZE. +*/ +SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *pFile){ + int iRet = sqlite3OsSectorSize(pFile); + if( iRet<32 ){ + iRet = 512; + }else if( iRet>MAX_SECTOR_SIZE ){ + assert( MAX_SECTOR_SIZE>=512 ); + iRet = MAX_SECTOR_SIZE; + } + return iRet; +} + +/* +** Set the value of the Pager.sectorSize variable for the given +** pager based on the value returned by the xSectorSize method +** of the open database file. The sector size will be used +** to determine the size and alignment of journal header and +** super-journal pointers within created journal files. +** +** For temporary files the effective sector size is always 512 bytes. +** +** Otherwise, for non-temporary files, the effective sector size is +** the value returned by the xSectorSize() method rounded up to 32 if +** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it +** is greater than MAX_SECTOR_SIZE. +** +** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set +** the effective sector size to its minimum value (512). The purpose of +** pPager->sectorSize is to define the "blast radius" of bytes that +** might change if a crash occurs while writing to a single byte in +** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero +** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector +** size. For backwards compatibility of the rollback journal file format, +** we cannot reduce the effective sector size below 512. +*/ +static void setSectorSize(Pager *pPager){ + assert( isOpen(pPager->fd) || pPager->tempFile ); + + if( pPager->tempFile + || (sqlite3OsDeviceCharacteristics(pPager->fd) & + SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0 + ){ + /* Sector size doesn't matter for temporary files. Also, the file + ** may not have been opened yet, in which case the OsSectorSize() + ** call will segfault. */ + pPager->sectorSize = 512; + }else{ + pPager->sectorSize = sqlite3SectorSize(pPager->fd); + } +} + +/* +** Playback the journal and thus restore the database file to +** the state it was in before we started making changes. +** +** The journal file format is as follows: +** +** (1) 8 byte prefix. A copy of aJournalMagic[]. +** (2) 4 byte big-endian integer which is the number of valid page records +** in the journal. If this value is 0xffffffff, then compute the +** number of page records from the journal size. +** (3) 4 byte big-endian integer which is the initial value for the +** sanity checksum. +** (4) 4 byte integer which is the number of pages to truncate the +** database to during a rollback. +** (5) 4 byte big-endian integer which is the sector size. The header +** is this many bytes in size. +** (6) 4 byte big-endian integer which is the page size. +** (7) zero padding out to the next sector size. +** (8) Zero or more pages instances, each as follows: +** + 4 byte page number. +** + pPager->pageSize bytes of data. +** + 4 byte checksum +** +** When we speak of the journal header, we mean the first 7 items above. +** Each entry in the journal is an instance of the 8th item. +** +** Call the value from the second bullet "nRec". nRec is the number of +** valid page entries in the journal. In most cases, you can compute the +** value of nRec from the size of the journal file. But if a power +** failure occurred while the journal was being written, it could be the +** case that the size of the journal file had already been increased but +** the extra entries had not yet made it safely to disk. In such a case, +** the value of nRec computed from the file size would be too large. For +** that reason, we always use the nRec value in the header. +** +** If the nRec value is 0xffffffff it means that nRec should be computed +** from the file size. This value is used when the user selects the +** no-sync option for the journal. A power failure could lead to corruption +** in this case. But for things like temporary table (which will be +** deleted when the power is restored) we don't care. +** +** If the file opened as the journal file is not a well-formed +** journal file then all pages up to the first corrupted page are rolled +** back (or no pages if the journal header is corrupted). The journal file +** is then deleted and SQLITE_OK returned, just as if no corruption had +** been encountered. +** +** If an I/O or malloc() error occurs, the journal-file is not deleted +** and an error code is returned. +** +** The isHot parameter indicates that we are trying to rollback a journal +** that might be a hot journal. Or, it could be that the journal is +** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE. +** If the journal really is hot, reset the pager cache prior rolling +** back any content. If the journal is merely persistent, no reset is +** needed. +*/ +static int pager_playback(Pager *pPager, int isHot){ + sqlite3_vfs *pVfs = pPager->pVfs; + i64 szJ; /* Size of the journal file in bytes */ + u32 nRec; /* Number of Records in the journal */ + u32 u; /* Unsigned loop counter */ + Pgno mxPg = 0; /* Size of the original file in pages */ + int rc; /* Result code of a subroutine */ + int res = 1; /* Value returned by sqlite3OsAccess() */ + char *zSuper = 0; /* Name of super-journal file if any */ + int needPagerReset; /* True to reset page prior to first page rollback */ + int nPlayback = 0; /* Total number of pages restored from journal */ + u32 savedPageSize = pPager->pageSize; + + /* Figure out how many records are in the journal. Abort early if + ** the journal is empty. + */ + assert( isOpen(pPager->jfd) ); + rc = sqlite3OsFileSize(pPager->jfd, &szJ); + if( rc!=SQLITE_OK ){ + goto end_playback; + } + + /* Read the super-journal name from the journal, if it is present. + ** If a super-journal file name is specified, but the file is not + ** present on disk, then the journal is not hot and does not need to be + ** played back. + ** + ** TODO: Technically the following is an error because it assumes that + ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that + ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c, + ** mxPathname is 512, which is the same as the minimum allowable value + ** for pageSize. + */ + zSuper = pPager->pTmpSpace; + rc = readSuperJournal(pPager->jfd, zSuper, pPager->pVfs->mxPathname+1); + if( rc==SQLITE_OK && zSuper[0] ){ + rc = sqlite3OsAccess(pVfs, zSuper, SQLITE_ACCESS_EXISTS, &res); + } + zSuper = 0; + if( rc!=SQLITE_OK || !res ){ + goto end_playback; + } + pPager->journalOff = 0; + needPagerReset = isHot; + + /* This loop terminates either when a readJournalHdr() or + ** pager_playback_one_page() call returns SQLITE_DONE or an IO error + ** occurs. + */ + while( 1 ){ + /* Read the next journal header from the journal file. If there are + ** not enough bytes left in the journal file for a complete header, or + ** it is corrupted, then a process must have failed while writing it. + ** This indicates nothing more needs to be rolled back. + */ + rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + } + goto end_playback; + } + + /* If nRec is 0xffffffff, then this journal was created by a process + ** working in no-sync mode. This means that the rest of the journal + ** file consists of pages, there are no more journal headers. Compute + ** the value of nRec based on this assumption. + */ + if( nRec==0xffffffff ){ + assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); + nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)); + } + + /* If nRec is 0 and this rollback is of a transaction created by this + ** process and if this is the final header in the journal, then it means + ** that this part of the journal was being filled but has not yet been + ** synced to disk. Compute the number of pages based on the remaining + ** size of the file. + ** + ** The third term of the test was added to fix ticket #2565. + ** When rolling back a hot journal, nRec==0 always means that the next + ** chunk of the journal contains zero pages to be rolled back. But + ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in + ** the journal, it means that the journal might contain additional + ** pages that need to be rolled back and that the number of pages + ** should be computed based on the journal file size. + */ + if( nRec==0 && !isHot && + pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ + nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager)); + } + + /* If this is the first header read from the journal, truncate the + ** database file back to its original size. + */ + if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ + rc = pager_truncate(pPager, mxPg); + if( rc!=SQLITE_OK ){ + goto end_playback; + } + pPager->dbSize = mxPg; + if( pPager->mxPgnomxPgno = mxPg; + } + } + + /* Copy original pages out of the journal and back into the + ** database file and/or page cache. + */ + for(u=0; ujournalOff,0,1,0); + if( rc==SQLITE_OK ){ + nPlayback++; + }else{ + if( rc==SQLITE_DONE ){ + pPager->journalOff = szJ; + break; + }else if( rc==SQLITE_IOERR_SHORT_READ ){ + /* If the journal has been truncated, simply stop reading and + ** processing the journal. This might happen if the journal was + ** not completely written and synced prior to a crash. In that + ** case, the database should have never been written in the + ** first place so it is OK to simply abandon the rollback. */ + rc = SQLITE_OK; + goto end_playback; + }else{ + /* If we are unable to rollback, quit and return the error + ** code. This will cause the pager to enter the error state + ** so that no further harm will be done. Perhaps the next + ** process to come along will be able to rollback the database. + */ + goto end_playback; + } + } + } + } + /*NOTREACHED*/ + assert( 0 ); + +end_playback: + if( rc==SQLITE_OK ){ + rc = sqlite3PagerSetPagesize(pPager, &savedPageSize, -1); + } + /* Following a rollback, the database file should be back in its original + ** state prior to the start of the transaction, so invoke the + ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the + ** assertion that the transaction counter was modified. + */ +#ifdef SQLITE_DEBUG + sqlite3OsFileControlHint(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0); +#endif + + /* If this playback is happening automatically as a result of an IO or + ** malloc error that occurred after the change-counter was updated but + ** before the transaction was committed, then the change-counter + ** modification may just have been reverted. If this happens in exclusive + ** mode, then subsequent transactions performed by the connection will not + ** update the change-counter at all. This may lead to cache inconsistency + ** problems for other processes at some point in the future. So, just + ** in case this has happened, clear the changeCountDone flag now. + */ + pPager->changeCountDone = pPager->tempFile; + + if( rc==SQLITE_OK ){ + /* Leave 4 bytes of space before the super-journal filename in memory. + ** This is because it may end up being passed to sqlite3OsOpen(), in + ** which case it requires 4 0x00 bytes in memory immediately before + ** the filename. */ + zSuper = &pPager->pTmpSpace[4]; + rc = readSuperJournal(pPager->jfd, zSuper, pPager->pVfs->mxPathname+1); + testcase( rc!=SQLITE_OK ); + } + if( rc==SQLITE_OK + && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) + ){ + rc = sqlite3PagerSync(pPager, 0); + } + if( rc==SQLITE_OK ){ + rc = pager_end_transaction(pPager, zSuper[0]!='\0', 0); + testcase( rc!=SQLITE_OK ); + } + if( rc==SQLITE_OK && zSuper[0] && res ){ + /* If there was a super-journal and this routine will return success, + ** see if it is possible to delete the super-journal. + */ + assert( zSuper==&pPager->pTmpSpace[4] ); + memset(&zSuper[-4], 0, 4); + rc = pager_delsuper(pPager, zSuper); + testcase( rc!=SQLITE_OK ); + } + if( isHot && nPlayback ){ + sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s", + nPlayback, pPager->zJournal); + } + + /* The Pager.sectorSize variable may have been updated while rolling + ** back a journal created by a process with a different sector size + ** value. Reset it to the correct value for this process. + */ + setSectorSize(pPager); + return rc; +} + + +/* +** Read the content for page pPg out of the database file (or out of +** the WAL if that is where the most recent copy if found) into +** pPg->pData. A shared lock or greater must be held on the database +** file before this function is called. +** +** If page 1 is read, then the value of Pager.dbFileVers[] is set to +** the value read from the database file. +** +** If an IO error occurs, then the IO error is returned to the caller. +** Otherwise, SQLITE_OK is returned. +*/ +static int readDbPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ + int rc = SQLITE_OK; /* Return code */ + +#ifndef SQLITE_OMIT_WAL + u32 iFrame = 0; /* Frame of WAL containing pgno */ + + assert( pPager->eState>=PAGER_READER && !MEMDB ); + assert( isOpen(pPager->fd) ); + + if( pagerUseWal(pPager) ){ + rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); + if( rc ) return rc; + } + if( iFrame ){ + rc = sqlite3WalReadFrame(pPager->pWal, iFrame,pPager->pageSize,pPg->pData); + }else +#endif + { + i64 iOffset = (pPg->pgno-1)*(i64)pPager->pageSize; + rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + + if( pPg->pgno==1 ){ + if( rc ){ + /* If the read is unsuccessful, set the dbFileVers[] to something + ** that will never be a valid file version. dbFileVers[] is a copy + ** of bytes 24..39 of the database. Bytes 28..31 should always be + ** zero or the size of the database in page. Bytes 32..35 and 35..39 + ** should be page numbers which are never 0xffffffff. So filling + ** pPager->dbFileVers[] with all 0xff bytes should suffice. + ** + ** For an encrypted database, the situation is more complex: bytes + ** 24..39 of the database are white noise. But the probability of + ** white noise equaling 16 bytes of 0xff is vanishingly small so + ** we should still be ok. + */ + memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); + }else{ + u8 *dbFileVers = &((u8*)pPg->pData)[24]; + memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); + } + } + PAGER_INCR(sqlite3_pager_readdb_count); + PAGER_INCR(pPager->nRead); + IOTRACE(("PGIN %p %d\n", pPager, pPg->pgno)); + PAGERTRACE(("FETCH %d page %d hash(%08x)\n", + PAGERID(pPager), pPg->pgno, pager_pagehash(pPg))); + + return rc; +} + +/* +** Update the value of the change-counter at offsets 24 and 92 in +** the header and the sqlite version number at offset 96. +** +** This is an unconditional update. See also the pager_incr_changecounter() +** routine which only updates the change-counter if the update is actually +** needed, as determined by the pPager->changeCountDone state variable. +*/ +static void pager_write_changecounter(PgHdr *pPg){ + u32 change_counter; + if( NEVER(pPg==0) ) return; + + /* Increment the value just read and write it back to byte 24. */ + change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1; + put32bits(((char*)pPg->pData)+24, change_counter); + + /* Also store the SQLite version number in bytes 96..99 and in + ** bytes 92..95 store the change counter for which the version number + ** is valid. */ + put32bits(((char*)pPg->pData)+92, change_counter); + put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER); +} + +#ifndef SQLITE_OMIT_WAL +/* +** This function is invoked once for each page that has already been +** written into the log file when a WAL transaction is rolled back. +** Parameter iPg is the page number of said page. The pCtx argument +** is actually a pointer to the Pager structure. +** +** If page iPg is present in the cache, and has no outstanding references, +** it is discarded. Otherwise, if there are one or more outstanding +** references, the page content is reloaded from the database. If the +** attempt to reload content from the database is required and fails, +** return an SQLite error code. Otherwise, SQLITE_OK. +*/ +static int pagerUndoCallback(void *pCtx, Pgno iPg){ + int rc = SQLITE_OK; + Pager *pPager = (Pager *)pCtx; + PgHdr *pPg; + + assert( pagerUseWal(pPager) ); + pPg = sqlite3PagerLookup(pPager, iPg); + if( pPg ){ + if( sqlite3PcachePageRefcount(pPg)==1 ){ + sqlite3PcacheDrop(pPg); + }else{ + rc = readDbPage(pPg); + if( rc==SQLITE_OK ){ + pPager->xReiniter(pPg); + } + sqlite3PagerUnrefNotNull(pPg); + } + } + + /* Normally, if a transaction is rolled back, any backup processes are + ** updated as data is copied out of the rollback journal and into the + ** database. This is not generally possible with a WAL database, as + ** rollback involves simply truncating the log file. Therefore, if one + ** or more frames have already been written to the log (and therefore + ** also copied into the backup databases) as part of this transaction, + ** the backups must be restarted. + */ + sqlite3BackupRestart(pPager->pBackup); + + return rc; +} + +/* +** This function is called to rollback a transaction on a WAL database. +*/ +static int pagerRollbackWal(Pager *pPager){ + int rc; /* Return Code */ + PgHdr *pList; /* List of dirty pages to revert */ + + /* For all pages in the cache that are currently dirty or have already + ** been written (but not committed) to the log file, do one of the + ** following: + ** + ** + Discard the cached page (if refcount==0), or + ** + Reload page content from the database (if refcount>0). + */ + pPager->dbSize = pPager->dbOrigSize; + rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); + pList = sqlite3PcacheDirtyList(pPager->pPCache); + while( pList && rc==SQLITE_OK ){ + PgHdr *pNext = pList->pDirty; + rc = pagerUndoCallback((void *)pPager, pList->pgno); + pList = pNext; + } + + return rc; +} + +/* +** This function is a wrapper around sqlite3WalFrames(). As well as logging +** the contents of the list of pages headed by pList (connected by pDirty), +** this function notifies any active backup processes that the pages have +** changed. +** +** The list of pages passed into this routine is always sorted by page number. +** Hence, if page 1 appears anywhere on the list, it will be the first page. +*/ +static int pagerWalFrames( + Pager *pPager, /* Pager object */ + PgHdr *pList, /* List of frames to log */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit /* True if this is a commit */ +){ + int rc; /* Return code */ + int nList; /* Number of pages in pList */ + PgHdr *p; /* For looping over pages */ + + assert( pPager->pWal ); + assert( pList ); +#ifdef SQLITE_DEBUG + /* Verify that the page list is in accending order */ + for(p=pList; p && p->pDirty; p=p->pDirty){ + assert( p->pgno < p->pDirty->pgno ); + } +#endif + + assert( pList->pDirty==0 || isCommit ); + if( isCommit ){ + /* If a WAL transaction is being committed, there is no point in writing + ** any pages with page numbers greater than nTruncate into the WAL file. + ** They will never be read by any client. So remove them from the pDirty + ** list here. */ + PgHdr **ppNext = &pList; + nList = 0; + for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ + if( p->pgno<=nTruncate ){ + ppNext = &p->pDirty; + nList++; + } + } + assert( pList ); + }else{ + nList = 1; + } + pPager->aStat[PAGER_STAT_WRITE] += nList; + + if( pList->pgno==1 ) pager_write_changecounter(pList); + rc = sqlite3WalFrames(pPager->pWal, + pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags + ); + if( rc==SQLITE_OK && pPager->pBackup ){ + for(p=pList; p; p=p->pDirty){ + sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); + } + } + +#ifdef SQLITE_CHECK_PAGES + pList = sqlite3PcacheDirtyList(pPager->pPCache); + for(p=pList; p; p=p->pDirty){ + pager_set_pagehash(p); + } +#endif + + return rc; +} + +/* +** Begin a read transaction on the WAL. +** +** This routine used to be called "pagerOpenSnapshot()" because it essentially +** makes a snapshot of the database at the current point in time and preserves +** that snapshot for use by the reader in spite of concurrently changes by +** other writers or checkpointers. +*/ +static int pagerBeginReadTransaction(Pager *pPager){ + int rc; /* Return code */ + int changed = 0; /* True if cache must be reset */ + + assert( pagerUseWal(pPager) ); + assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); + + /* sqlite3WalEndReadTransaction() was not called for the previous + ** transaction in locking_mode=EXCLUSIVE. So call it now. If we + ** are in locking_mode=NORMAL and EndRead() was previously called, + ** the duplicate call is harmless. + */ + sqlite3WalEndReadTransaction(pPager->pWal); + + rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); + if( rc!=SQLITE_OK || changed ){ + pager_reset(pPager); + if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); + } + + return rc; +} +#endif + +/* +** This function is called as part of the transition from PAGER_OPEN +** to PAGER_READER state to determine the size of the database file +** in pages (assuming the page size currently stored in Pager.pageSize). +** +** If no error occurs, SQLITE_OK is returned and the size of the database +** in pages is stored in *pnPage. Otherwise, an error code (perhaps +** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified. +*/ +static int pagerPagecount(Pager *pPager, Pgno *pnPage){ + Pgno nPage; /* Value to return via *pnPage */ + + /* Query the WAL sub-system for the database size. The WalDbsize() + ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or + ** if the database size is not available. The database size is not + ** available from the WAL sub-system if the log file is empty or + ** contains no valid committed transactions. + */ + assert( pPager->eState==PAGER_OPEN ); + assert( pPager->eLock>=SHARED_LOCK ); + assert( isOpen(pPager->fd) ); + assert( pPager->tempFile==0 ); + nPage = sqlite3WalDbsize(pPager->pWal); + + /* If the number of pages in the database is not available from the + ** WAL sub-system, determine the page count based on the size of + ** the database file. If the size of the database file is not an + ** integer multiple of the page-size, round up the result. + */ + if( nPage==0 && ALWAYS(isOpen(pPager->fd)) ){ + i64 n = 0; /* Size of db file in bytes */ + int rc = sqlite3OsFileSize(pPager->fd, &n); + if( rc!=SQLITE_OK ){ + return rc; + } + nPage = (Pgno)((n+pPager->pageSize-1) / pPager->pageSize); + } + + /* If the current number of pages in the file is greater than the + ** configured maximum pager number, increase the allowed limit so + ** that the file can be read. + */ + if( nPage>pPager->mxPgno ){ + pPager->mxPgno = (Pgno)nPage; + } + + *pnPage = nPage; + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_WAL +/* +** Check if the *-wal file that corresponds to the database opened by pPager +** exists if the database is not empy, or verify that the *-wal file does +** not exist (by deleting it) if the database file is empty. +** +** If the database is not empty and the *-wal file exists, open the pager +** in WAL mode. If the database is empty or if no *-wal file exists and +** if no error occurs, make sure Pager.journalMode is not set to +** PAGER_JOURNALMODE_WAL. +** +** Return SQLITE_OK or an error code. +** +** The caller must hold a SHARED lock on the database file to call this +** function. Because an EXCLUSIVE lock on the db file is required to delete +** a WAL on a none-empty database, this ensures there is no race condition +** between the xAccess() below and an xDelete() being executed by some +** other connection. +*/ +static int pagerOpenWalIfPresent(Pager *pPager){ + int rc = SQLITE_OK; + assert( pPager->eState==PAGER_OPEN ); + assert( pPager->eLock>=SHARED_LOCK ); + + if( !pPager->tempFile ){ + int isWal; /* True if WAL file exists */ + rc = sqlite3OsAccess( + pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal + ); + if( rc==SQLITE_OK ){ + if( isWal ){ + Pgno nPage; /* Size of the database file */ + + rc = pagerPagecount(pPager, &nPage); + if( rc ) return rc; + if( nPage==0 ){ + rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0); + }else{ + testcase( sqlite3PcachePagecount(pPager->pPCache)==0 ); + rc = sqlite3PagerOpenWal(pPager, 0); + } + }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + pPager->journalMode = PAGER_JOURNALMODE_DELETE; + } + } + } + return rc; +} +#endif + +/* +** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback +** the entire super-journal file. The case pSavepoint==NULL occurs when +** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction +** savepoint. +** +** When pSavepoint is not NULL (meaning a non-transaction savepoint is +** being rolled back), then the rollback consists of up to three stages, +** performed in the order specified: +** +** * Pages are played back from the main journal starting at byte +** offset PagerSavepoint.iOffset and continuing to +** PagerSavepoint.iHdrOffset, or to the end of the main journal +** file if PagerSavepoint.iHdrOffset is zero. +** +** * If PagerSavepoint.iHdrOffset is not zero, then pages are played +** back starting from the journal header immediately following +** PagerSavepoint.iHdrOffset to the end of the main journal file. +** +** * Pages are then played back from the sub-journal file, starting +** with the PagerSavepoint.iSubRec and continuing to the end of +** the journal file. +** +** Throughout the rollback process, each time a page is rolled back, the +** corresponding bit is set in a bitvec structure (variable pDone in the +** implementation below). This is used to ensure that a page is only +** rolled back the first time it is encountered in either journal. +** +** If pSavepoint is NULL, then pages are only played back from the main +** journal file. There is no need for a bitvec in this case. +** +** In either case, before playback commences the Pager.dbSize variable +** is reset to the value that it held at the start of the savepoint +** (or transaction). No page with a page-number greater than this value +** is played back. If one is encountered it is simply skipped. +*/ +static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ + i64 szJ; /* Effective size of the main journal */ + i64 iHdrOff; /* End of first segment of main-journal records */ + int rc = SQLITE_OK; /* Return code */ + Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */ + + assert( pPager->eState!=PAGER_ERROR ); + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + + /* Allocate a bitvec to use to store the set of pages rolled back */ + if( pSavepoint ){ + pDone = sqlite3BitvecCreate(pSavepoint->nOrig); + if( !pDone ){ + return SQLITE_NOMEM_BKPT; + } + } + + /* Set the database size back to the value it was before the savepoint + ** being reverted was opened. + */ + pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; + pPager->changeCountDone = pPager->tempFile; + + if( !pSavepoint && pagerUseWal(pPager) ){ + return pagerRollbackWal(pPager); + } + + /* Use pPager->journalOff as the effective size of the main rollback + ** journal. The actual file might be larger than this in + ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything + ** past pPager->journalOff is off-limits to us. + */ + szJ = pPager->journalOff; + assert( pagerUseWal(pPager)==0 || szJ==0 ); + + /* Begin by rolling back records from the main journal starting at + ** PagerSavepoint.iOffset and continuing to the next journal header. + ** There might be records in the main journal that have a page number + ** greater than the current database size (pPager->dbSize) but those + ** will be skipped automatically. Pages are added to pDone as they + ** are played back. + */ + if( pSavepoint && !pagerUseWal(pPager) ){ + iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; + pPager->journalOff = pSavepoint->iOffset; + while( rc==SQLITE_OK && pPager->journalOffjournalOff, pDone, 1, 1); + } + assert( rc!=SQLITE_DONE ); + }else{ + pPager->journalOff = 0; + } + + /* Continue rolling back records out of the main journal starting at + ** the first journal header seen and continuing until the effective end + ** of the main journal file. Continue to skip out-of-range pages and + ** continue adding pages rolled back to pDone. + */ + while( rc==SQLITE_OK && pPager->journalOffjournalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff" + ** test is related to ticket #2565. See the discussion in the + ** pager_playback() function for additional information. + */ + if( nJRec==0 + && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff + ){ + nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager)); + } + for(ii=0; rc==SQLITE_OK && iijournalOffjournalOff, pDone, 1, 1); + } + assert( rc!=SQLITE_DONE ); + } + assert( rc!=SQLITE_OK || pPager->journalOff>=szJ ); + + /* Finally, rollback pages from the sub-journal. Page that were + ** previously rolled back out of the main journal (and are hence in pDone) + ** will be skipped. Out-of-range pages are also skipped. + */ + if( pSavepoint ){ + u32 ii; /* Loop counter */ + i64 offset = (i64)pSavepoint->iSubRec*(4+pPager->pageSize); + + if( pagerUseWal(pPager) ){ + rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData); + } + for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && iinSubRec; ii++){ + assert( offset==(i64)ii*(4+pPager->pageSize) ); + rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); + } + assert( rc!=SQLITE_DONE ); + } + + sqlite3BitvecDestroy(pDone); + if( rc==SQLITE_OK ){ + pPager->journalOff = szJ; + } + + return rc; +} + +/* +** Change the maximum number of in-memory pages that are allowed +** before attempting to recycle clean and unused pages. +*/ +SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ + sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); +} + +/* +** Change the maximum number of in-memory pages that are allowed +** before attempting to spill pages to journal. +*/ +SQLITE_PRIVATE int sqlite3PagerSetSpillsize(Pager *pPager, int mxPage){ + return sqlite3PcacheSetSpillsize(pPager->pPCache, mxPage); +} + +/* +** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap. +*/ +static void pagerFixMaplimit(Pager *pPager){ +#if SQLITE_MAX_MMAP_SIZE>0 + sqlite3_file *fd = pPager->fd; + if( isOpen(fd) && fd->pMethods->iVersion>=3 ){ + sqlite3_int64 sz; + sz = pPager->szMmap; + pPager->bUseFetch = (sz>0); + setGetterMethod(pPager); + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz); + } +#endif +} + +/* +** Change the maximum size of any memory mapping made of the database file. +*/ +SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){ + pPager->szMmap = szMmap; + pagerFixMaplimit(pPager); +} + +/* +** Free as much memory as possible from the pager. +*/ +SQLITE_PRIVATE void sqlite3PagerShrink(Pager *pPager){ + sqlite3PcacheShrink(pPager->pPCache); +} + +/* +** Adjust settings of the pager to those specified in the pgFlags parameter. +** +** The "level" in pgFlags & PAGER_SYNCHRONOUS_MASK sets the robustness +** of the database to damage due to OS crashes or power failures by +** changing the number of syncs()s when writing the journals. +** There are four levels: +** +** OFF sqlite3OsSync() is never called. This is the default +** for temporary and transient files. +** +** NORMAL The journal is synced once before writes begin on the +** database. This is normally adequate protection, but +** it is theoretically possible, though very unlikely, +** that an inopertune power failure could leave the journal +** in a state which would cause damage to the database +** when it is rolled back. +** +** FULL The journal is synced twice before writes begin on the +** database (with some additional information - the nRec field +** of the journal header - being written in between the two +** syncs). If we assume that writing a +** single disk sector is atomic, then this mode provides +** assurance that the journal will not be corrupted to the +** point of causing damage to the database during rollback. +** +** EXTRA This is like FULL except that is also syncs the directory +** that contains the rollback journal after the rollback +** journal is unlinked. +** +** The above is for a rollback-journal mode. For WAL mode, OFF continues +** to mean that no syncs ever occur. NORMAL means that the WAL is synced +** prior to the start of checkpoint and that the database file is synced +** at the conclusion of the checkpoint if the entire content of the WAL +** was written back into the database. But no sync operations occur for +** an ordinary commit in NORMAL mode with WAL. FULL means that the WAL +** file is synced following each commit operation, in addition to the +** syncs associated with NORMAL. There is no difference between FULL +** and EXTRA for WAL mode. +** +** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL. The +** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync +** using fcntl(F_FULLFSYNC). SQLITE_SYNC_NORMAL means to do an +** ordinary fsync() call. There is no difference between SQLITE_SYNC_FULL +** and SQLITE_SYNC_NORMAL on platforms other than MacOSX. But the +** synchronous=FULL versus synchronous=NORMAL setting determines when +** the xSync primitive is called and is relevant to all platforms. +** +** Numeric values associated with these states are OFF==1, NORMAL=2, +** and FULL=3. +*/ +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +SQLITE_PRIVATE void sqlite3PagerSetFlags( + Pager *pPager, /* The pager to set safety level for */ + unsigned pgFlags /* Various flags */ +){ + unsigned level = pgFlags & PAGER_SYNCHRONOUS_MASK; + if( pPager->tempFile ){ + pPager->noSync = 1; + pPager->fullSync = 0; + pPager->extraSync = 0; + }else{ + pPager->noSync = level==PAGER_SYNCHRONOUS_OFF ?1:0; + pPager->fullSync = level>=PAGER_SYNCHRONOUS_FULL ?1:0; + pPager->extraSync = level==PAGER_SYNCHRONOUS_EXTRA ?1:0; + } + if( pPager->noSync ){ + pPager->syncFlags = 0; + }else if( pgFlags & PAGER_FULLFSYNC ){ + pPager->syncFlags = SQLITE_SYNC_FULL; + }else{ + pPager->syncFlags = SQLITE_SYNC_NORMAL; + } + pPager->walSyncFlags = (pPager->syncFlags<<2); + if( pPager->fullSync ){ + pPager->walSyncFlags |= pPager->syncFlags; + } + if( (pgFlags & PAGER_CKPT_FULLFSYNC) && !pPager->noSync ){ + pPager->walSyncFlags |= (SQLITE_SYNC_FULL<<2); + } + if( pgFlags & PAGER_CACHESPILL ){ + pPager->doNotSpill &= ~SPILLFLAG_OFF; + }else{ + pPager->doNotSpill |= SPILLFLAG_OFF; + } +} +#endif + +/* +** The following global variable is incremented whenever the library +** attempts to open a temporary file. This information is used for +** testing and analysis only. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_opentemp_count = 0; +#endif + +/* +** Open a temporary file. +** +** Write the file descriptor into *pFile. Return SQLITE_OK on success +** or some other error code if we fail. The OS will automatically +** delete the temporary file when it is closed. +** +** The flags passed to the VFS layer xOpen() call are those specified +** by parameter vfsFlags ORed with the following: +** +** SQLITE_OPEN_READWRITE +** SQLITE_OPEN_CREATE +** SQLITE_OPEN_EXCLUSIVE +** SQLITE_OPEN_DELETEONCLOSE +*/ +static int pagerOpentemp( + Pager *pPager, /* The pager object */ + sqlite3_file *pFile, /* Write the file descriptor here */ + int vfsFlags /* Flags passed through to the VFS */ +){ + int rc; /* Return code */ + +#ifdef SQLITE_TEST + sqlite3_opentemp_count++; /* Used for testing and analysis only */ +#endif + + vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; + rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0); + assert( rc!=SQLITE_OK || isOpen(pFile) ); + return rc; +} + +/* +** Set the busy handler function. +** +** The pager invokes the busy-handler if sqlite3OsLock() returns +** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock, +** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE +** lock. It does *not* invoke the busy handler when upgrading from +** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE +** (which occurs during hot-journal rollback). Summary: +** +** Transition | Invokes xBusyHandler +** -------------------------------------------------------- +** NO_LOCK -> SHARED_LOCK | Yes +** SHARED_LOCK -> RESERVED_LOCK | No +** SHARED_LOCK -> EXCLUSIVE_LOCK | No +** RESERVED_LOCK -> EXCLUSIVE_LOCK | Yes +** +** If the busy-handler callback returns non-zero, the lock is +** retried. If it returns zero, then the SQLITE_BUSY error is +** returned to the caller of the pager API function. +*/ +SQLITE_PRIVATE void sqlite3PagerSetBusyHandler( + Pager *pPager, /* Pager object */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ +){ + void **ap; + pPager->xBusyHandler = xBusyHandler; + pPager->pBusyHandlerArg = pBusyHandlerArg; + ap = (void **)&pPager->xBusyHandler; + assert( ((int(*)(void *))(ap[0]))==xBusyHandler ); + assert( ap[1]==pBusyHandlerArg ); + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_BUSYHANDLER, (void *)ap); +} + +/* +** Change the page size used by the Pager object. The new page size +** is passed in *pPageSize. +** +** If the pager is in the error state when this function is called, it +** is a no-op. The value returned is the error state error code (i.e. +** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL). +** +** Otherwise, if all of the following are true: +** +** * the new page size (value of *pPageSize) is valid (a power +** of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and +** +** * there are no outstanding page references, and +** +** * the database is either not an in-memory database or it is +** an in-memory database that currently consists of zero pages. +** +** then the pager object page size is set to *pPageSize. +** +** If the page size is changed, then this function uses sqlite3PagerMalloc() +** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt +** fails, SQLITE_NOMEM is returned and the page size remains unchanged. +** In all other cases, SQLITE_OK is returned. +** +** If the page size is not changed, either because one of the enumerated +** conditions above is not true, the pager was in error state when this +** function was called, or because the memory allocation attempt failed, +** then *pPageSize is set to the old, retained page size before returning. +*/ +SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){ + int rc = SQLITE_OK; + + /* It is not possible to do a full assert_pager_state() here, as this + ** function may be called from within PagerOpen(), before the state + ** of the Pager object is internally consistent. + ** + ** At one point this function returned an error if the pager was in + ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that + ** there is at least one outstanding page reference, this function + ** is a no-op for that case anyhow. + */ + + u32 pageSize = *pPageSize; + assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); + if( (pPager->memDb==0 || pPager->dbSize==0) + && sqlite3PcacheRefCount(pPager->pPCache)==0 + && pageSize && pageSize!=(u32)pPager->pageSize + ){ + char *pNew = NULL; /* New temp space */ + i64 nByte = 0; + + if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){ + rc = sqlite3OsFileSize(pPager->fd, &nByte); + } + if( rc==SQLITE_OK ){ + /* 8 bytes of zeroed overrun space is sufficient so that the b-tree + * cell header parser will never run off the end of the allocation */ + pNew = (char *)sqlite3PageMalloc(pageSize+8); + if( !pNew ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + memset(pNew+pageSize, 0, 8); + } + } + + if( rc==SQLITE_OK ){ + pager_reset(pPager); + rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize); + } + if( rc==SQLITE_OK ){ + sqlite3PageFree(pPager->pTmpSpace); + pPager->pTmpSpace = pNew; + pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize); + pPager->pageSize = pageSize; + }else{ + sqlite3PageFree(pNew); + } + } + + *pPageSize = pPager->pageSize; + if( rc==SQLITE_OK ){ + if( nReserve<0 ) nReserve = pPager->nReserve; + assert( nReserve>=0 && nReserve<1000 ); + pPager->nReserve = (i16)nReserve; + pagerFixMaplimit(pPager); + } + return rc; +} + +/* +** Return a pointer to the "temporary page" buffer held internally +** by the pager. This is a buffer that is big enough to hold the +** entire content of a database page. This buffer is used internally +** during rollback and will be overwritten whenever a rollback +** occurs. But other modules are free to use it too, as long as +** no rollbacks are happening. +*/ +SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager *pPager){ + return pPager->pTmpSpace; +} + +/* +** Attempt to set the maximum database page count if mxPage is positive. +** Make no changes if mxPage is zero or negative. And never reduce the +** maximum page count below the current size of the database. +** +** Regardless of mxPage, return the current maximum page count. +*/ +SQLITE_PRIVATE Pgno sqlite3PagerMaxPageCount(Pager *pPager, Pgno mxPage){ + if( mxPage>0 ){ + pPager->mxPgno = mxPage; + } + assert( pPager->eState!=PAGER_OPEN ); /* Called only by OP_MaxPgcnt */ + /* assert( pPager->mxPgno>=pPager->dbSize ); */ + /* OP_MaxPgcnt ensures that the parameter passed to this function is not + ** less than the total number of valid pages in the database. But this + ** may be less than Pager.dbSize, and so the assert() above is not valid */ + return pPager->mxPgno; +} + +/* +** The following set of routines are used to disable the simulated +** I/O error mechanism. These routines are used to avoid simulated +** errors in places where we do not care about errors. +** +** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops +** and generate no code. +*/ +#ifdef SQLITE_TEST +SQLITE_API extern int sqlite3_io_error_pending; +SQLITE_API extern int sqlite3_io_error_hit; +static int saved_cnt; +void disable_simulated_io_errors(void){ + saved_cnt = sqlite3_io_error_pending; + sqlite3_io_error_pending = -1; +} +void enable_simulated_io_errors(void){ + sqlite3_io_error_pending = saved_cnt; +} +#else +# define disable_simulated_io_errors() +# define enable_simulated_io_errors() +#endif + +/* +** Read the first N bytes from the beginning of the file into memory +** that pDest points to. +** +** If the pager was opened on a transient file (zFilename==""), or +** opened on a file less than N bytes in size, the output buffer is +** zeroed and SQLITE_OK returned. The rationale for this is that this +** function is used to read database headers, and a new transient or +** zero sized database has a header than consists entirely of zeroes. +** +** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered, +** the error code is returned to the caller and the contents of the +** output buffer undefined. +*/ +SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ + int rc = SQLITE_OK; + memset(pDest, 0, N); + assert( isOpen(pPager->fd) || pPager->tempFile ); + + /* This routine is only called by btree immediately after creating + ** the Pager object. There has not been an opportunity to transition + ** to WAL mode yet. + */ + assert( !pagerUseWal(pPager) ); + + if( isOpen(pPager->fd) ){ + IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) + rc = sqlite3OsRead(pPager->fd, pDest, N, 0); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + return rc; +} + +/* +** This function may only be called when a read-transaction is open on +** the pager. It returns the total number of pages in the database. +** +** However, if the file is between 1 and bytes in size, then +** this is considered a 1 page file. +*/ +SQLITE_PRIVATE void sqlite3PagerPagecount(Pager *pPager, int *pnPage){ + assert( pPager->eState>=PAGER_READER ); + assert( pPager->eState!=PAGER_WRITER_FINISHED ); + *pnPage = (int)pPager->dbSize; +} + + +/* +** Try to obtain a lock of type locktype on the database file. If +** a similar or greater lock is already held, this function is a no-op +** (returning SQLITE_OK immediately). +** +** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke +** the busy callback if the lock is currently not available. Repeat +** until the busy callback returns false or until the attempt to +** obtain the lock succeeds. +** +** Return SQLITE_OK on success and an error code if we cannot obtain +** the lock. If the lock is obtained successfully, set the Pager.state +** variable to locktype before returning. +*/ +static int pager_wait_on_lock(Pager *pPager, int locktype){ + int rc; /* Return code */ + + /* Check that this is either a no-op (because the requested lock is + ** already held), or one of the transitions that the busy-handler + ** may be invoked during, according to the comment above + ** sqlite3PagerSetBusyhandler(). + */ + assert( (pPager->eLock>=locktype) + || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK) + || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK) + ); + + do { + rc = pagerLockDb(pPager, locktype); + }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) ); + return rc; +} + +/* +** Function assertTruncateConstraint(pPager) checks that one of the +** following is true for all dirty pages currently in the page-cache: +** +** a) The page number is less than or equal to the size of the +** current database image, in pages, OR +** +** b) if the page content were written at this time, it would not +** be necessary to write the current content out to the sub-journal. +** +** If the condition asserted by this function were not true, and the +** dirty page were to be discarded from the cache via the pagerStress() +** routine, pagerStress() would not write the current page content to +** the database file. If a savepoint transaction were rolled back after +** this happened, the correct behavior would be to restore the current +** content of the page. However, since this content is not present in either +** the database file or the portion of the rollback journal and +** sub-journal rolled back the content could not be restored and the +** database image would become corrupt. It is therefore fortunate that +** this circumstance cannot arise. +*/ +#if defined(SQLITE_DEBUG) +static void assertTruncateConstraintCb(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + assert( pPg->flags&PGHDR_DIRTY ); + if( pPg->pgno>pPager->dbSize ){ /* if (a) is false */ + Pgno pgno = pPg->pgno; + int i; + for(i=0; ipPager->nSavepoint; i++){ + PagerSavepoint *p = &pPager->aSavepoint[i]; + assert( p->nOrigpInSavepoint,pgno) ); + } + } +} +static void assertTruncateConstraint(Pager *pPager){ + sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb); +} +#else +# define assertTruncateConstraint(pPager) +#endif + +/* +** Truncate the in-memory database file image to nPage pages. This +** function does not actually modify the database file on disk. It +** just sets the internal state of the pager object so that the +** truncation will be done when the current transaction is committed. +** +** This function is only called right before committing a transaction. +** Once this function has been called, the transaction must either be +** rolled back or committed. It is not safe to call this function and +** then continue writing to the database. +*/ +SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ + assert( pPager->dbSize>=nPage || CORRUPT_DB ); + assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); + pPager->dbSize = nPage; + + /* At one point the code here called assertTruncateConstraint() to + ** ensure that all pages being truncated away by this operation are, + ** if one or more savepoints are open, present in the savepoint + ** journal so that they can be restored if the savepoint is rolled + ** back. This is no longer necessary as this function is now only + ** called right before committing a transaction. So although the + ** Pager object may still have open savepoints (Pager.nSavepoint!=0), + ** they cannot be rolled back. So the assertTruncateConstraint() call + ** is no longer correct. */ +} + + +/* +** This function is called before attempting a hot-journal rollback. It +** syncs the journal file to disk, then sets pPager->journalHdr to the +** size of the journal file so that the pager_playback() routine knows +** that the entire journal file has been synced. +** +** Syncing a hot-journal to disk before attempting to roll it back ensures +** that if a power-failure occurs during the rollback, the process that +** attempts rollback following system recovery sees the same journal +** content as this process. +** +** If everything goes as planned, SQLITE_OK is returned. Otherwise, +** an SQLite error code. +*/ +static int pagerSyncHotJournal(Pager *pPager){ + int rc = SQLITE_OK; + if( !pPager->noSync ){ + rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL); + } + if( rc==SQLITE_OK ){ + rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr); + } + return rc; +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** Obtain a reference to a memory mapped page object for page number pgno. +** The new object will use the pointer pData, obtained from xFetch(). +** If successful, set *ppPage to point to the new page reference +** and return SQLITE_OK. Otherwise, return an SQLite error code and set +** *ppPage to zero. +** +** Page references obtained by calling this function should be released +** by calling pagerReleaseMapPage(). +*/ +static int pagerAcquireMapPage( + Pager *pPager, /* Pager object */ + Pgno pgno, /* Page number */ + void *pData, /* xFetch()'d data for this page */ + PgHdr **ppPage /* OUT: Acquired page object */ +){ + PgHdr *p; /* Memory mapped page to return */ + + if( pPager->pMmapFreelist ){ + *ppPage = p = pPager->pMmapFreelist; + pPager->pMmapFreelist = p->pDirty; + p->pDirty = 0; + assert( pPager->nExtra>=8 ); + memset(p->pExtra, 0, 8); + }else{ + *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra); + if( p==0 ){ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData); + return SQLITE_NOMEM_BKPT; + } + p->pExtra = (void *)&p[1]; + p->flags = PGHDR_MMAP; + p->nRef = 1; + p->pPager = pPager; + } + + assert( p->pExtra==(void *)&p[1] ); + assert( p->pPage==0 ); + assert( p->flags==PGHDR_MMAP ); + assert( p->pPager==pPager ); + assert( p->nRef==1 ); + + p->pgno = pgno; + p->pData = pData; + pPager->nMmapOut++; + + return SQLITE_OK; +} +#endif + +/* +** Release a reference to page pPg. pPg must have been returned by an +** earlier call to pagerAcquireMapPage(). +*/ +static void pagerReleaseMapPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + pPager->nMmapOut--; + pPg->pDirty = pPager->pMmapFreelist; + pPager->pMmapFreelist = pPg; + + assert( pPager->fd->pMethods->iVersion>=3 ); + sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData); +} + +/* +** Free all PgHdr objects stored in the Pager.pMmapFreelist list. +*/ +static void pagerFreeMapHdrs(Pager *pPager){ + PgHdr *p; + PgHdr *pNext; + for(p=pPager->pMmapFreelist; p; p=pNext){ + pNext = p->pDirty; + sqlite3_free(p); + } +} + +/* Verify that the database file has not be deleted or renamed out from +** under the pager. Return SQLITE_OK if the database is still where it ought +** to be on disk. Return non-zero (SQLITE_READONLY_DBMOVED or some other error +** code from sqlite3OsAccess()) if the database has gone missing. +*/ +static int databaseIsUnmoved(Pager *pPager){ + int bHasMoved = 0; + int rc; + + if( pPager->tempFile ) return SQLITE_OK; + if( pPager->dbSize==0 ) return SQLITE_OK; + assert( pPager->zFilename && pPager->zFilename[0] ); + rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_HAS_MOVED, &bHasMoved); + if( rc==SQLITE_NOTFOUND ){ + /* If the HAS_MOVED file-control is unimplemented, assume that the file + ** has not been moved. That is the historical behavior of SQLite: prior to + ** version 3.8.3, it never checked */ + rc = SQLITE_OK; + }else if( rc==SQLITE_OK && bHasMoved ){ + rc = SQLITE_READONLY_DBMOVED; + } + return rc; +} + + +/* +** Shutdown the page cache. Free all memory and close all files. +** +** If a transaction was in progress when this routine is called, that +** transaction is rolled back. All outstanding pages are invalidated +** and their memory is freed. Any attempt to use a page associated +** with this page cache after this function returns will likely +** result in a coredump. +** +** This function always succeeds. If a transaction is active an attempt +** is made to roll it back. If an error occurs during the rollback +** a hot journal may be left in the filesystem but no error is returned +** to the caller. +*/ +SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager, sqlite3 *db){ + u8 *pTmp = (u8*)pPager->pTmpSpace; + assert( db || pagerUseWal(pPager)==0 ); + assert( assert_pager_state(pPager) ); + disable_simulated_io_errors(); + sqlite3BeginBenignMalloc(); + pagerFreeMapHdrs(pPager); + /* pPager->errCode = 0; */ + pPager->exclusiveMode = 0; +#ifndef SQLITE_OMIT_WAL + { + u8 *a = 0; + assert( db || pPager->pWal==0 ); + if( db && 0==(db->flags & SQLITE_NoCkptOnClose) + && SQLITE_OK==databaseIsUnmoved(pPager) + ){ + a = pTmp; + } + sqlite3WalClose(pPager->pWal, db, pPager->walSyncFlags, pPager->pageSize,a); + pPager->pWal = 0; + } +#endif + pager_reset(pPager); + if( MEMDB ){ + pager_unlock(pPager); + }else{ + /* If it is open, sync the journal file before calling UnlockAndRollback. + ** If this is not done, then an unsynced portion of the open journal + ** file may be played back into the database. If a power failure occurs + ** while this is happening, the database could become corrupt. + ** + ** If an error occurs while trying to sync the journal, shift the pager + ** into the ERROR state. This causes UnlockAndRollback to unlock the + ** database and close the journal file without attempting to roll it + ** back or finalize it. The next database user will have to do hot-journal + ** rollback before accessing the database file. + */ + if( isOpen(pPager->jfd) ){ + pager_error(pPager, pagerSyncHotJournal(pPager)); + } + pagerUnlockAndRollback(pPager); + } + sqlite3EndBenignMalloc(); + enable_simulated_io_errors(); + PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); + IOTRACE(("CLOSE %p\n", pPager)) + sqlite3OsClose(pPager->jfd); + sqlite3OsClose(pPager->fd); + sqlite3PageFree(pTmp); + sqlite3PcacheClose(pPager->pPCache); + assert( !pPager->aSavepoint && !pPager->pInJournal ); + assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ); + + sqlite3_free(pPager); + return SQLITE_OK; +} + +#if !defined(NDEBUG) || defined(SQLITE_TEST) +/* +** Return the page number for page pPg. +*/ +SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage *pPg){ + return pPg->pgno; +} +#endif + +/* +** Increment the reference count for page pPg. +*/ +SQLITE_PRIVATE void sqlite3PagerRef(DbPage *pPg){ + sqlite3PcacheRef(pPg); +} + +/* +** Sync the journal. In other words, make sure all the pages that have +** been written to the journal have actually reached the surface of the +** disk and can be restored in the event of a hot-journal rollback. +** +** If the Pager.noSync flag is set, then this function is a no-op. +** Otherwise, the actions required depend on the journal-mode and the +** device characteristics of the file-system, as follows: +** +** * If the journal file is an in-memory journal file, no action need +** be taken. +** +** * Otherwise, if the device does not support the SAFE_APPEND property, +** then the nRec field of the most recently written journal header +** is updated to contain the number of journal records that have +** been written following it. If the pager is operating in full-sync +** mode, then the journal file is synced before this field is updated. +** +** * If the device does not support the SEQUENTIAL property, then +** journal file is synced. +** +** Or, in pseudo-code: +** +** if( NOT ){ +** if( NOT SAFE_APPEND ){ +** if( ) xSync(); +** +** } +** if( NOT SEQUENTIAL ) xSync(); +** } +** +** If successful, this routine clears the PGHDR_NEED_SYNC flag of every +** page currently held in memory before returning SQLITE_OK. If an IO +** error is encountered, then the IO error code is returned to the caller. +*/ +static int syncJournal(Pager *pPager, int newHdr){ + int rc; /* Return code */ + + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + assert( !pagerUseWal(pPager) ); + + rc = sqlite3PagerExclusiveLock(pPager); + if( rc!=SQLITE_OK ) return rc; + + if( !pPager->noSync ){ + assert( !pPager->tempFile ); + if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ + const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); + assert( isOpen(pPager->jfd) ); + + if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ + /* This block deals with an obscure problem. If the last connection + ** that wrote to this database was operating in persistent-journal + ** mode, then the journal file may at this point actually be larger + ** than Pager.journalOff bytes. If the next thing in the journal + ** file happens to be a journal-header (written as part of the + ** previous connection's transaction), and a crash or power-failure + ** occurs after nRec is updated but before this connection writes + ** anything else to the journal file (or commits/rolls back its + ** transaction), then SQLite may become confused when doing the + ** hot-journal rollback following recovery. It may roll back all + ** of this connections data, then proceed to rolling back the old, + ** out-of-date data that follows it. Database corruption. + ** + ** To work around this, if the journal file does appear to contain + ** a valid header following Pager.journalOff, then write a 0x00 + ** byte to the start of it to prevent it from being recognized. + ** + ** Variable iNextHdrOffset is set to the offset at which this + ** problematic header will occur, if it exists. aMagic is used + ** as a temporary buffer to inspect the first couple of bytes of + ** the potential journal header. + */ + i64 iNextHdrOffset; + u8 aMagic[8]; + u8 zHeader[sizeof(aJournalMagic)+4]; + + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); + put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec); + + iNextHdrOffset = journalHdrOffset(pPager); + rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset); + if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){ + static const u8 zerobyte = 0; + rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset); + } + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ + return rc; + } + + /* Write the nRec value into the journal file header. If in + ** full-synchronous mode, sync the journal first. This ensures that + ** all data has really hit the disk before nRec is updated to mark + ** it as a candidate for rollback. + ** + ** This is not required if the persistent media supports the + ** SAFE_APPEND property. Because in this case it is not possible + ** for garbage data to be appended to the file, the nRec field + ** is populated with 0xFFFFFFFF when the journal header is written + ** and never needs to be updated. + */ + if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ + PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); + IOTRACE(("JSYNC %p\n", pPager)) + rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); + if( rc!=SQLITE_OK ) return rc; + } + IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr)); + rc = sqlite3OsWrite( + pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr + ); + if( rc!=SQLITE_OK ) return rc; + } + if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ + PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); + IOTRACE(("JSYNC %p\n", pPager)) + rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| + (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) + ); + if( rc!=SQLITE_OK ) return rc; + } + + pPager->journalHdr = pPager->journalOff; + if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ + pPager->nRec = 0; + rc = writeJournalHdr(pPager); + if( rc!=SQLITE_OK ) return rc; + } + }else{ + pPager->journalHdr = pPager->journalOff; + } + } + + /* Unless the pager is in noSync mode, the journal file was just + ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on + ** all pages. + */ + sqlite3PcacheClearSyncFlags(pPager->pPCache); + pPager->eState = PAGER_WRITER_DBMOD; + assert( assert_pager_state(pPager) ); + return SQLITE_OK; +} + +/* +** The argument is the first in a linked list of dirty pages connected +** by the PgHdr.pDirty pointer. This function writes each one of the +** in-memory pages in the list to the database file. The argument may +** be NULL, representing an empty list. In this case this function is +** a no-op. +** +** The pager must hold at least a RESERVED lock when this function +** is called. Before writing anything to the database file, this lock +** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained, +** SQLITE_BUSY is returned and no data is written to the database file. +** +** If the pager is a temp-file pager and the actual file-system file +** is not yet open, it is created and opened before any data is +** written out. +** +** Once the lock has been upgraded and, if necessary, the file opened, +** the pages are written out to the database file in list order. Writing +** a page is skipped if it meets either of the following criteria: +** +** * The page number is greater than Pager.dbSize, or +** * The PGHDR_DONT_WRITE flag is set on the page. +** +** If writing out a page causes the database file to grow, Pager.dbFileSize +** is updated accordingly. If page 1 is written out, then the value cached +** in Pager.dbFileVers[] is updated to match the new value stored in +** the database file. +** +** If everything is successful, SQLITE_OK is returned. If an IO error +** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot +** be obtained, SQLITE_BUSY is returned. +*/ +static int pager_write_pagelist(Pager *pPager, PgHdr *pList){ + int rc = SQLITE_OK; /* Return code */ + + /* This function is only called for rollback pagers in WRITER_DBMOD state. */ + assert( !pagerUseWal(pPager) ); + assert( pPager->tempFile || pPager->eState==PAGER_WRITER_DBMOD ); + assert( pPager->eLock==EXCLUSIVE_LOCK ); + assert( isOpen(pPager->fd) || pList->pDirty==0 ); + + /* If the file is a temp-file has not yet been opened, open it now. It + ** is not possible for rc to be other than SQLITE_OK if this branch + ** is taken, as pager_wait_on_lock() is a no-op for temp-files. + */ + if( !isOpen(pPager->fd) ){ + assert( pPager->tempFile && rc==SQLITE_OK ); + rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags); + } + + /* Before the first write, give the VFS a hint of what the final + ** file size will be. + */ + assert( rc!=SQLITE_OK || isOpen(pPager->fd) ); + if( rc==SQLITE_OK + && pPager->dbHintSizedbSize + && (pList->pDirty || pList->pgno>pPager->dbHintSize) + ){ + sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize; + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile); + pPager->dbHintSize = pPager->dbSize; + } + + while( rc==SQLITE_OK && pList ){ + Pgno pgno = pList->pgno; + + /* If there are dirty pages in the page cache with page numbers greater + ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to + ** make the file smaller (presumably by auto-vacuum code). Do not write + ** any such pages to the file. + ** + ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag + ** set (set by sqlite3PagerDontWrite()). + */ + if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ + i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */ + char *pData; /* Data to write */ + + assert( (pList->flags&PGHDR_NEED_SYNC)==0 ); + if( pList->pgno==1 ) pager_write_changecounter(pList); + + pData = pList->pData; + + /* Write out the page data. */ + rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); + + /* If page 1 was just written, update Pager.dbFileVers to match + ** the value now stored in the database file. If writing this + ** page caused the database file to grow, update dbFileSize. + */ + if( pgno==1 ){ + memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); + } + if( pgno>pPager->dbFileSize ){ + pPager->dbFileSize = pgno; + } + pPager->aStat[PAGER_STAT_WRITE]++; + + /* Update any backup objects copying the contents of this pager. */ + sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData); + + PAGERTRACE(("STORE %d page %d hash(%08x)\n", + PAGERID(pPager), pgno, pager_pagehash(pList))); + IOTRACE(("PGOUT %p %d\n", pPager, pgno)); + PAGER_INCR(sqlite3_pager_writedb_count); + }else{ + PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno)); + } + pager_set_pagehash(pList); + pList = pList->pDirty; + } + + return rc; +} + +/* +** Ensure that the sub-journal file is open. If it is already open, this +** function is a no-op. +** +** SQLITE_OK is returned if everything goes according to plan. An +** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() +** fails. +*/ +static int openSubJournal(Pager *pPager){ + int rc = SQLITE_OK; + if( !isOpen(pPager->sjfd) ){ + const int flags = SQLITE_OPEN_SUBJOURNAL | SQLITE_OPEN_READWRITE + | SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE + | SQLITE_OPEN_DELETEONCLOSE; + int nStmtSpill = sqlite3Config.nStmtSpill; + if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ + nStmtSpill = -1; + } + rc = sqlite3JournalOpen(pPager->pVfs, 0, pPager->sjfd, flags, nStmtSpill); + } + return rc; +} + +/* +** Append a record of the current state of page pPg to the sub-journal. +** +** If successful, set the bit corresponding to pPg->pgno in the bitvecs +** for all open savepoints before returning. +** +** This function returns SQLITE_OK if everything is successful, an IO +** error code if the attempt to write to the sub-journal fails, or +** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint +** bitvec. +*/ +static int subjournalPage(PgHdr *pPg){ + int rc = SQLITE_OK; + Pager *pPager = pPg->pPager; + if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + + /* Open the sub-journal, if it has not already been opened */ + assert( pPager->useJournal ); + assert( isOpen(pPager->jfd) || pagerUseWal(pPager) ); + assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 ); + assert( pagerUseWal(pPager) + || pageInJournal(pPager, pPg) + || pPg->pgno>pPager->dbOrigSize + ); + rc = openSubJournal(pPager); + + /* If the sub-journal was opened successfully (or was already open), + ** write the journal record into the file. */ + if( rc==SQLITE_OK ){ + void *pData = pPg->pData; + i64 offset = (i64)pPager->nSubRec*(4+pPager->pageSize); + char *pData2; + pData2 = pData; + PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); + rc = write32bits(pPager->sjfd, offset, pPg->pgno); + if( rc==SQLITE_OK ){ + rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); + } + } + } + if( rc==SQLITE_OK ){ + pPager->nSubRec++; + assert( pPager->nSavepoint>0 ); + rc = addToSavepointBitvecs(pPager, pPg->pgno); + } + return rc; +} +static int subjournalPageIfRequired(PgHdr *pPg){ + if( subjRequiresPage(pPg) ){ + return subjournalPage(pPg); + }else{ + return SQLITE_OK; + } +} + +/* +** This function is called by the pcache layer when it has reached some +** soft memory limit. The first argument is a pointer to a Pager object +** (cast as a void*). The pager is always 'purgeable' (not an in-memory +** database). The second argument is a reference to a page that is +** currently dirty but has no outstanding references. The page +** is always associated with the Pager object passed as the first +** argument. +** +** The job of this function is to make pPg clean by writing its contents +** out to the database file, if possible. This may involve syncing the +** journal file. +** +** If successful, sqlite3PcacheMakeClean() is called on the page and +** SQLITE_OK returned. If an IO error occurs while trying to make the +** page clean, the IO error code is returned. If the page cannot be +** made clean for some other reason, but no error occurs, then SQLITE_OK +** is returned by sqlite3PcacheMakeClean() is not called. +*/ +static int pagerStress(void *p, PgHdr *pPg){ + Pager *pPager = (Pager *)p; + int rc = SQLITE_OK; + + assert( pPg->pPager==pPager ); + assert( pPg->flags&PGHDR_DIRTY ); + + /* The doNotSpill NOSYNC bit is set during times when doing a sync of + ** journal (and adding a new header) is not allowed. This occurs + ** during calls to sqlite3PagerWrite() while trying to journal multiple + ** pages belonging to the same sector. + ** + ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling + ** regardless of whether or not a sync is required. This is set during + ** a rollback or by user request, respectively. + ** + ** Spilling is also prohibited when in an error state since that could + ** lead to database corruption. In the current implementation it + ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3 + ** while in the error state, hence it is impossible for this routine to + ** be called in the error state. Nevertheless, we include a NEVER() + ** test for the error state as a safeguard against future changes. + */ + if( NEVER(pPager->errCode) ) return SQLITE_OK; + testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK ); + testcase( pPager->doNotSpill & SPILLFLAG_OFF ); + testcase( pPager->doNotSpill & SPILLFLAG_NOSYNC ); + if( pPager->doNotSpill + && ((pPager->doNotSpill & (SPILLFLAG_ROLLBACK|SPILLFLAG_OFF))!=0 + || (pPg->flags & PGHDR_NEED_SYNC)!=0) + ){ + return SQLITE_OK; + } + + pPager->aStat[PAGER_STAT_SPILL]++; + pPg->pDirty = 0; + if( pagerUseWal(pPager) ){ + /* Write a single frame for this page to the log. */ + rc = subjournalPageIfRequired(pPg); + if( rc==SQLITE_OK ){ + rc = pagerWalFrames(pPager, pPg, 0, 0); + } + }else{ + +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( pPager->tempFile==0 ){ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc!=SQLITE_OK ) return pager_error(pPager, rc); + } +#endif + + /* Sync the journal file if required. */ + if( pPg->flags&PGHDR_NEED_SYNC + || pPager->eState==PAGER_WRITER_CACHEMOD + ){ + rc = syncJournal(pPager, 1); + } + + /* Write the contents of the page out to the database file. */ + if( rc==SQLITE_OK ){ + assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); + rc = pager_write_pagelist(pPager, pPg); + } + } + + /* Mark the page as clean. */ + if( rc==SQLITE_OK ){ + PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); + sqlite3PcacheMakeClean(pPg); + } + + return pager_error(pPager, rc); +} + +/* +** Flush all unreferenced dirty pages to disk. +*/ +SQLITE_PRIVATE int sqlite3PagerFlush(Pager *pPager){ + int rc = pPager->errCode; + if( !MEMDB ){ + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + assert( assert_pager_state(pPager) ); + while( rc==SQLITE_OK && pList ){ + PgHdr *pNext = pList->pDirty; + if( pList->nRef==0 ){ + rc = pagerStress((void*)pPager, pList); + } + pList = pNext; + } + } + + return rc; +} + +/* +** Allocate and initialize a new Pager object and put a pointer to it +** in *ppPager. The pager should eventually be freed by passing it +** to sqlite3PagerClose(). +** +** The zFilename argument is the path to the database file to open. +** If zFilename is NULL then a randomly-named temporary file is created +** and used as the file to be cached. Temporary files are be deleted +** automatically when they are closed. If zFilename is ":memory:" then +** all information is held in cache. It is never written to disk. +** This can be used to implement an in-memory database. +** +** The nExtra parameter specifies the number of bytes of space allocated +** along with each page reference. This space is available to the user +** via the sqlite3PagerGetExtra() API. When a new page is allocated, the +** first 8 bytes of this space are zeroed but the remainder is uninitialized. +** (The extra space is used by btree as the MemPage object.) +** +** The flags argument is used to specify properties that affect the +** operation of the pager. It should be passed some bitwise combination +** of the PAGER_* flags. +** +** The vfsFlags parameter is a bitmask to pass to the flags parameter +** of the xOpen() method of the supplied VFS when opening files. +** +** If the pager object is allocated and the specified file opened +** successfully, SQLITE_OK is returned and *ppPager set to point to +** the new pager object. If an error occurs, *ppPager is set to NULL +** and error code returned. This function may return SQLITE_NOMEM +** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or +** various SQLITE_IO_XXX errors. +*/ +SQLITE_PRIVATE int sqlite3PagerOpen( + sqlite3_vfs *pVfs, /* The virtual file system to use */ + Pager **ppPager, /* OUT: Return the Pager structure here */ + const char *zFilename, /* Name of the database file to open */ + int nExtra, /* Extra bytes append to each in-memory page */ + int flags, /* flags controlling this file */ + int vfsFlags, /* flags passed through to sqlite3_vfs.xOpen() */ + void (*xReinit)(DbPage*) /* Function to reinitialize pages */ +){ + u8 *pPtr; + Pager *pPager = 0; /* Pager object to allocate and return */ + int rc = SQLITE_OK; /* Return code */ + int tempFile = 0; /* True for temp files (incl. in-memory files) */ + int memDb = 0; /* True if this is an in-memory file */ +#ifndef SQLITE_OMIT_DESERIALIZE + int memJM = 0; /* Memory journal mode */ +#else +# define memJM 0 +#endif + int readOnly = 0; /* True if this is a read-only file */ + int journalFileSize; /* Bytes to allocate for each journal fd */ + char *zPathname = 0; /* Full path to database file */ + int nPathname = 0; /* Number of bytes in zPathname */ + int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */ + int pcacheSize = sqlite3PcacheSize(); /* Bytes to allocate for PCache */ + u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; /* Default page size */ + const char *zUri = 0; /* URI args to copy */ + int nUriByte = 1; /* Number of bytes of URI args at *zUri */ + int nUri = 0; /* Number of URI parameters */ + + /* Figure out how much space is required for each journal file-handle + ** (there are two of them, the main journal and the sub-journal). */ + journalFileSize = ROUND8(sqlite3JournalSize(pVfs)); + + /* Set the output variable to NULL in case an error occurs. */ + *ppPager = 0; + +#ifndef SQLITE_OMIT_MEMORYDB + if( flags & PAGER_MEMORY ){ + memDb = 1; + if( zFilename && zFilename[0] ){ + zPathname = sqlite3DbStrDup(0, zFilename); + if( zPathname==0 ) return SQLITE_NOMEM_BKPT; + nPathname = sqlite3Strlen30(zPathname); + zFilename = 0; + } + } +#endif + + /* Compute and store the full pathname in an allocated buffer pointed + ** to by zPathname, length nPathname. Or, if this is a temporary file, + ** leave both nPathname and zPathname set to 0. + */ + if( zFilename && zFilename[0] ){ + const char *z; + nPathname = pVfs->mxPathname+1; + zPathname = sqlite3DbMallocRaw(0, nPathname*2); + if( zPathname==0 ){ + return SQLITE_NOMEM_BKPT; + } + zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */ + rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_OK_SYMLINK ){ + if( vfsFlags & SQLITE_OPEN_NOFOLLOW ){ + rc = SQLITE_CANTOPEN_SYMLINK; + }else{ + rc = SQLITE_OK; + } + } + } + nPathname = sqlite3Strlen30(zPathname); + z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1]; + while( *z ){ + z += strlen(z)+1; + z += strlen(z)+1; + nUri++; + } + nUriByte = (int)(&z[1] - zUri); + assert( nUriByte>=1 ); + if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){ + /* This branch is taken when the journal path required by + ** the database being opened will be more than pVfs->mxPathname + ** bytes in length. This means the database cannot be opened, + ** as it will not be possible to open the journal file or even + ** check for a hot-journal before reading. + */ + rc = SQLITE_CANTOPEN_BKPT; + } + if( rc!=SQLITE_OK ){ + sqlite3DbFree(0, zPathname); + return rc; + } + } + + /* Allocate memory for the Pager structure, PCache object, the + ** three file descriptors, the database file name and the journal + ** file name. The layout in memory is as follows: + ** + ** Pager object (sizeof(Pager) bytes) + ** PCache object (sqlite3PcacheSize() bytes) + ** Database file handle (pVfs->szOsFile bytes) + ** Sub-journal file handle (journalFileSize bytes) + ** Main journal file handle (journalFileSize bytes) + ** Ptr back to the Pager (sizeof(Pager*) bytes) + ** \0\0\0\0 database prefix (4 bytes) + ** Database file name (nPathname+1 bytes) + ** URI query parameters (nUriByte bytes) + ** Journal filename (nPathname+8+1 bytes) + ** WAL filename (nPathname+4+1 bytes) + ** \0\0\0 terminator (3 bytes) + ** + ** Some 3rd-party software, over which we have no control, depends on + ** the specific order of the filenames and the \0 separators between them + ** so that it can (for example) find the database filename given the WAL + ** filename without using the sqlite3_filename_database() API. This is a + ** misuse of SQLite and a bug in the 3rd-party software, but the 3rd-party + ** software is in widespread use, so we try to avoid changing the filename + ** order and formatting if possible. In particular, the details of the + ** filename format expected by 3rd-party software should be as follows: + ** + ** - Main Database Path + ** - \0 + ** - Multiple URI components consisting of: + ** - Key + ** - \0 + ** - Value + ** - \0 + ** - \0 + ** - Journal Path + ** - \0 + ** - WAL Path (zWALName) + ** - \0 + ** + ** The sqlite3_create_filename() interface and the databaseFilename() utility + ** that is used by sqlite3_filename_database() and kin also depend on the + ** specific formatting and order of the various filenames, so if the format + ** changes here, be sure to change it there as well. + */ + pPtr = (u8 *)sqlite3MallocZero( + ROUND8(sizeof(*pPager)) + /* Pager structure */ + ROUND8(pcacheSize) + /* PCache object */ + ROUND8(pVfs->szOsFile) + /* The main db file */ + journalFileSize * 2 + /* The two journal files */ + sizeof(pPager) + /* Space to hold a pointer */ + 4 + /* Database prefix */ + nPathname + 1 + /* database filename */ + nUriByte + /* query parameters */ + nPathname + 8 + 1 + /* Journal filename */ +#ifndef SQLITE_OMIT_WAL + nPathname + 4 + 1 + /* WAL filename */ +#endif + 3 /* Terminator */ + ); + assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) ); + if( !pPtr ){ + sqlite3DbFree(0, zPathname); + return SQLITE_NOMEM_BKPT; + } + pPager = (Pager*)pPtr; pPtr += ROUND8(sizeof(*pPager)); + pPager->pPCache = (PCache*)pPtr; pPtr += ROUND8(pcacheSize); + pPager->fd = (sqlite3_file*)pPtr; pPtr += ROUND8(pVfs->szOsFile); + pPager->sjfd = (sqlite3_file*)pPtr; pPtr += journalFileSize; + pPager->jfd = (sqlite3_file*)pPtr; pPtr += journalFileSize; + assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) ); + memcpy(pPtr, &pPager, sizeof(pPager)); pPtr += sizeof(pPager); + + /* Fill in the Pager.zFilename and pPager.zQueryParam fields */ + pPtr += 4; /* Skip zero prefix */ + pPager->zFilename = (char*)pPtr; + if( nPathname>0 ){ + memcpy(pPtr, zPathname, nPathname); pPtr += nPathname + 1; + if( zUri ){ + memcpy(pPtr, zUri, nUriByte); pPtr += nUriByte; + }else{ + pPtr++; + } + } + + + /* Fill in Pager.zJournal */ + if( nPathname>0 ){ + pPager->zJournal = (char*)pPtr; + memcpy(pPtr, zPathname, nPathname); pPtr += nPathname; + memcpy(pPtr, "-journal",8); pPtr += 8 + 1; +#ifdef SQLITE_ENABLE_8_3_NAMES + sqlite3FileSuffix3(zFilename,pPager->zJournal); + pPtr = (u8*)(pPager->zJournal + sqlite3Strlen30(pPager->zJournal)+1); +#endif + }else{ + pPager->zJournal = 0; + } + +#ifndef SQLITE_OMIT_WAL + /* Fill in Pager.zWal */ + if( nPathname>0 ){ + pPager->zWal = (char*)pPtr; + memcpy(pPtr, zPathname, nPathname); pPtr += nPathname; + memcpy(pPtr, "-wal", 4); pPtr += 4 + 1; +#ifdef SQLITE_ENABLE_8_3_NAMES + sqlite3FileSuffix3(zFilename, pPager->zWal); + pPtr = (u8*)(pPager->zWal + sqlite3Strlen30(pPager->zWal)+1); +#endif + }else{ + pPager->zWal = 0; + } +#endif + (void)pPtr; /* Suppress warning about unused pPtr value */ + + if( nPathname ) sqlite3DbFree(0, zPathname); + pPager->pVfs = pVfs; + pPager->vfsFlags = vfsFlags; + + /* Open the pager file. + */ + if( zFilename && zFilename[0] ){ + int fout = 0; /* VFS flags returned by xOpen() */ + rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout); + assert( !memDb ); +#ifndef SQLITE_OMIT_DESERIALIZE + pPager->memVfs = memJM = (fout&SQLITE_OPEN_MEMORY)!=0; +#endif + readOnly = (fout&SQLITE_OPEN_READONLY)!=0; + + /* If the file was successfully opened for read/write access, + ** choose a default page size in case we have to create the + ** database file. The default page size is the maximum of: + ** + ** + SQLITE_DEFAULT_PAGE_SIZE, + ** + The value returned by sqlite3OsSectorSize() + ** + The largest page size that can be written atomically. + */ + if( rc==SQLITE_OK ){ + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); + if( !readOnly ){ + setSectorSize(pPager); + assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE); + if( szPageDfltsectorSize ){ + if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){ + szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE; + }else{ + szPageDflt = (u32)pPager->sectorSize; + } + } +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + { + int ii; + assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); + assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536); + for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){ + if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){ + szPageDflt = ii; + } + } + } +#endif + } + pPager->noLock = sqlite3_uri_boolean(pPager->zFilename, "nolock", 0); + if( (iDc & SQLITE_IOCAP_IMMUTABLE)!=0 + || sqlite3_uri_boolean(pPager->zFilename, "immutable", 0) ){ + vfsFlags |= SQLITE_OPEN_READONLY; + goto act_like_temp_file; + } + } + }else{ + /* If a temporary file is requested, it is not opened immediately. + ** In this case we accept the default page size and delay actually + ** opening the file until the first call to OsWrite(). + ** + ** This branch is also run for an in-memory database. An in-memory + ** database is the same as a temp-file that is never written out to + ** disk and uses an in-memory rollback journal. + ** + ** This branch also runs for files marked as immutable. + */ +act_like_temp_file: + tempFile = 1; + pPager->eState = PAGER_READER; /* Pretend we already have a lock */ + pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */ + pPager->noLock = 1; /* Do no locking */ + readOnly = (vfsFlags&SQLITE_OPEN_READONLY); + } + + /* The following call to PagerSetPagesize() serves to set the value of + ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer. + */ + if( rc==SQLITE_OK ){ + assert( pPager->memDb==0 ); + rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1); + testcase( rc!=SQLITE_OK ); + } + + /* Initialize the PCache object. */ + if( rc==SQLITE_OK ){ + nExtra = ROUND8(nExtra); + assert( nExtra>=8 && nExtra<1000 ); + rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, + !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); + } + + /* If an error occurred above, free the Pager structure and close the file. + */ + if( rc!=SQLITE_OK ){ + sqlite3OsClose(pPager->fd); + sqlite3PageFree(pPager->pTmpSpace); + sqlite3_free(pPager); + return rc; + } + + PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename)); + IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) + + pPager->useJournal = (u8)useJournal; + /* pPager->stmtOpen = 0; */ + /* pPager->stmtInUse = 0; */ + /* pPager->nRef = 0; */ + /* pPager->stmtSize = 0; */ + /* pPager->stmtJSize = 0; */ + /* pPager->nPage = 0; */ + pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; + /* pPager->state = PAGER_UNLOCK; */ + /* pPager->errMask = 0; */ + pPager->tempFile = (u8)tempFile; + assert( tempFile==PAGER_LOCKINGMODE_NORMAL + || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE ); + assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 ); + pPager->exclusiveMode = (u8)tempFile; + pPager->changeCountDone = pPager->tempFile; + pPager->memDb = (u8)memDb; + pPager->readOnly = (u8)readOnly; + assert( useJournal || pPager->tempFile ); + pPager->noSync = pPager->tempFile; + if( pPager->noSync ){ + assert( pPager->fullSync==0 ); + assert( pPager->extraSync==0 ); + assert( pPager->syncFlags==0 ); + assert( pPager->walSyncFlags==0 ); + }else{ + pPager->fullSync = 1; + pPager->extraSync = 0; + pPager->syncFlags = SQLITE_SYNC_NORMAL; + pPager->walSyncFlags = SQLITE_SYNC_NORMAL | (SQLITE_SYNC_NORMAL<<2); + } + /* pPager->pFirst = 0; */ + /* pPager->pFirstSynced = 0; */ + /* pPager->pLast = 0; */ + pPager->nExtra = (u16)nExtra; + pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT; + assert( isOpen(pPager->fd) || tempFile ); + setSectorSize(pPager); + if( !useJournal ){ + pPager->journalMode = PAGER_JOURNALMODE_OFF; + }else if( memDb || memJM ){ + pPager->journalMode = PAGER_JOURNALMODE_MEMORY; + } + /* pPager->xBusyHandler = 0; */ + /* pPager->pBusyHandlerArg = 0; */ + pPager->xReiniter = xReinit; + setGetterMethod(pPager); + /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ + /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */ + + *ppPager = pPager; + return SQLITE_OK; +} + +/* +** Return the sqlite3_file for the main database given the name +** of the corresonding WAL or Journal name as passed into +** xOpen. +*/ +SQLITE_API sqlite3_file *sqlite3_database_file_object(const char *zName){ + Pager *pPager; + while( zName[-1]!=0 || zName[-2]!=0 || zName[-3]!=0 || zName[-4]!=0 ){ + zName--; + } + pPager = *(Pager**)(zName - 4 - sizeof(Pager*)); + return pPager->fd; +} + + +/* +** This function is called after transitioning from PAGER_UNLOCK to +** PAGER_SHARED state. It tests if there is a hot journal present in +** the file-system for the given pager. A hot journal is one that +** needs to be played back. According to this function, a hot-journal +** file exists if the following criteria are met: +** +** * The journal file exists in the file system, and +** * No process holds a RESERVED or greater lock on the database file, and +** * The database file itself is greater than 0 bytes in size, and +** * The first byte of the journal file exists and is not 0x00. +** +** If the current size of the database file is 0 but a journal file +** exists, that is probably an old journal left over from a prior +** database with the same name. In this case the journal file is +** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK +** is returned. +** +** This routine does not check if there is a super-journal filename +** at the end of the file. If there is, and that super-journal file +** does not exist, then the journal file is not really hot. In this +** case this routine will return a false-positive. The pager_playback() +** routine will discover that the journal file is not really hot and +** will not roll it back. +** +** If a hot-journal file is found to exist, *pExists is set to 1 and +** SQLITE_OK returned. If no hot-journal file is present, *pExists is +** set to 0 and SQLITE_OK returned. If an IO error occurs while trying +** to determine whether or not a hot-journal file exists, the IO error +** code is returned and the value of *pExists is undefined. +*/ +static int hasHotJournal(Pager *pPager, int *pExists){ + sqlite3_vfs * const pVfs = pPager->pVfs; + int rc = SQLITE_OK; /* Return code */ + int exists = 1; /* True if a journal file is present */ + int jrnlOpen = !!isOpen(pPager->jfd); + + assert( pPager->useJournal ); + assert( isOpen(pPager->fd) ); + assert( pPager->eState==PAGER_OPEN ); + + assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) & + SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN + )); + + *pExists = 0; + if( !jrnlOpen ){ + rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists); + } + if( rc==SQLITE_OK && exists ){ + int locked = 0; /* True if some process holds a RESERVED lock */ + + /* Race condition here: Another process might have been holding the + ** the RESERVED lock and have a journal open at the sqlite3OsAccess() + ** call above, but then delete the journal and drop the lock before + ** we get to the following sqlite3OsCheckReservedLock() call. If that + ** is the case, this routine might think there is a hot journal when + ** in fact there is none. This results in a false-positive which will + ** be dealt with by the playback routine. Ticket #3883. + */ + rc = sqlite3OsCheckReservedLock(pPager->fd, &locked); + if( rc==SQLITE_OK && !locked ){ + Pgno nPage; /* Number of pages in database file */ + + assert( pPager->tempFile==0 ); + rc = pagerPagecount(pPager, &nPage); + if( rc==SQLITE_OK ){ + /* If the database is zero pages in size, that means that either (1) the + ** journal is a remnant from a prior database with the same name where + ** the database file but not the journal was deleted, or (2) the initial + ** transaction that populates a new database is being rolled back. + ** In either case, the journal file can be deleted. However, take care + ** not to delete the journal file if it is already open due to + ** journal_mode=PERSIST. + */ + if( nPage==0 && !jrnlOpen ){ + sqlite3BeginBenignMalloc(); + if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){ + sqlite3OsDelete(pVfs, pPager->zJournal, 0); + if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK); + } + sqlite3EndBenignMalloc(); + }else{ + /* The journal file exists and no other connection has a reserved + ** or greater lock on the database file. Now check that there is + ** at least one non-zero bytes at the start of the journal file. + ** If there is, then we consider this journal to be hot. If not, + ** it can be ignored. + */ + if( !jrnlOpen ){ + int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL; + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f); + } + if( rc==SQLITE_OK ){ + u8 first = 0; + rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + if( !jrnlOpen ){ + sqlite3OsClose(pPager->jfd); + } + *pExists = (first!=0); + }else if( rc==SQLITE_CANTOPEN ){ + /* If we cannot open the rollback journal file in order to see if + ** it has a zero header, that might be due to an I/O error, or + ** it might be due to the race condition described above and in + ** ticket #3883. Either way, assume that the journal is hot. + ** This might be a false positive. But if it is, then the + ** automatic journal playback and recovery mechanism will deal + ** with it under an EXCLUSIVE lock where we do not need to + ** worry so much with race conditions. + */ + *pExists = 1; + rc = SQLITE_OK; + } + } + } + } + } + + return rc; +} + +/* +** This function is called to obtain a shared lock on the database file. +** It is illegal to call sqlite3PagerGet() until after this function +** has been successfully called. If a shared-lock is already held when +** this function is called, it is a no-op. +** +** The following operations are also performed by this function. +** +** 1) If the pager is currently in PAGER_OPEN state (no lock held +** on the database file), then an attempt is made to obtain a +** SHARED lock on the database file. Immediately after obtaining +** the SHARED lock, the file-system is checked for a hot-journal, +** which is played back if present. Following any hot-journal +** rollback, the contents of the cache are validated by checking +** the 'change-counter' field of the database file header and +** discarded if they are found to be invalid. +** +** 2) If the pager is running in exclusive-mode, and there are currently +** no outstanding references to any pages, and is in the error state, +** then an attempt is made to clear the error state by discarding +** the contents of the page cache and rolling back any open journal +** file. +** +** If everything is successful, SQLITE_OK is returned. If an IO error +** occurs while locking the database, checking for a hot-journal file or +** rolling back a journal file, the IO error code is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + + /* This routine is only called from b-tree and only when there are no + ** outstanding pages. This implies that the pager state should either + ** be OPEN or READER. READER is only possible if the pager is or was in + ** exclusive access mode. */ + assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); + assert( assert_pager_state(pPager) ); + assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); + assert( pPager->errCode==SQLITE_OK ); + + if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){ + int bHotJournal = 1; /* True if there exists a hot journal-file */ + + assert( !MEMDB ); + assert( pPager->tempFile==0 || pPager->eLock==EXCLUSIVE_LOCK ); + + rc = pager_wait_on_lock(pPager, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK ); + goto failed; + } + + /* If a journal file exists, and there is no RESERVED lock on the + ** database file, then it either needs to be played back or deleted. + */ + if( pPager->eLock<=SHARED_LOCK ){ + rc = hasHotJournal(pPager, &bHotJournal); + } + if( rc!=SQLITE_OK ){ + goto failed; + } + if( bHotJournal ){ + if( pPager->readOnly ){ + rc = SQLITE_READONLY_ROLLBACK; + goto failed; + } + + /* Get an EXCLUSIVE lock on the database file. At this point it is + ** important that a RESERVED lock is not obtained on the way to the + ** EXCLUSIVE lock. If it were, another process might open the + ** database file, detect the RESERVED lock, and conclude that the + ** database is safe to read while this process is still rolling the + ** hot-journal back. + ** + ** Because the intermediate RESERVED lock is not requested, any + ** other process attempting to access the database file will get to + ** this point in the code and fail to obtain its own EXCLUSIVE lock + ** on the database file. + ** + ** Unless the pager is in locking_mode=exclusive mode, the lock is + ** downgraded to SHARED_LOCK before this function returns. + */ + rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); + if( rc!=SQLITE_OK ){ + goto failed; + } + + /* If it is not already open and the file exists on disk, open the + ** journal for read/write access. Write access is required because + ** in exclusive-access mode the file descriptor will be kept open + ** and possibly used for a transaction later on. Also, write-access + ** is usually required to finalize the journal in journal_mode=persist + ** mode (and also for journal_mode=truncate on some systems). + ** + ** If the journal does not exist, it usually means that some + ** other connection managed to get in and roll it back before + ** this connection obtained the exclusive lock above. Or, it + ** may mean that the pager was in the error-state when this + ** function was called and the journal file does not exist. + */ + if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + sqlite3_vfs * const pVfs = pPager->pVfs; + int bExists; /* True if journal file exists */ + rc = sqlite3OsAccess( + pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists); + if( rc==SQLITE_OK && bExists ){ + int fout = 0; + int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; + assert( !pPager->tempFile ); + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); + assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); + if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){ + rc = SQLITE_CANTOPEN_BKPT; + sqlite3OsClose(pPager->jfd); + } + } + } + + /* Playback and delete the journal. Drop the database write + ** lock and reacquire the read lock. Purge the cache before + ** playing back the hot-journal so that we don't end up with + ** an inconsistent cache. Sync the hot journal before playing + ** it back since the process that crashed and left the hot journal + ** probably did not sync it and we are required to always sync + ** the journal before playing it back. + */ + if( isOpen(pPager->jfd) ){ + assert( rc==SQLITE_OK ); + rc = pagerSyncHotJournal(pPager); + if( rc==SQLITE_OK ){ + rc = pager_playback(pPager, !pPager->tempFile); + pPager->eState = PAGER_OPEN; + } + }else if( !pPager->exclusiveMode ){ + pagerUnlockDb(pPager, SHARED_LOCK); + } + + if( rc!=SQLITE_OK ){ + /* This branch is taken if an error occurs while trying to open + ** or roll back a hot-journal while holding an EXCLUSIVE lock. The + ** pager_unlock() routine will be called before returning to unlock + ** the file. If the unlock attempt fails, then Pager.eLock must be + ** set to UNKNOWN_LOCK (see the comment above the #define for + ** UNKNOWN_LOCK above for an explanation). + ** + ** In order to get pager_unlock() to do this, set Pager.eState to + ** PAGER_ERROR now. This is not actually counted as a transition + ** to ERROR state in the state diagram at the top of this file, + ** since we know that the same call to pager_unlock() will very + ** shortly transition the pager object to the OPEN state. Calling + ** assert_pager_state() would fail now, as it should not be possible + ** to be in ERROR state when there are zero outstanding page + ** references. + */ + pager_error(pPager, rc); + goto failed; + } + + assert( pPager->eState==PAGER_OPEN ); + assert( (pPager->eLock==SHARED_LOCK) + || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) + ); + } + + if( !pPager->tempFile && pPager->hasHeldSharedLock ){ + /* The shared-lock has just been acquired then check to + ** see if the database has been modified. If the database has changed, + ** flush the cache. The hasHeldSharedLock flag prevents this from + ** occurring on the very first access to a file, in order to save a + ** single unnecessary sqlite3OsRead() call at the start-up. + ** + ** Database changes are detected by looking at 15 bytes beginning + ** at offset 24 into the file. The first 4 of these 16 bytes are + ** a 32-bit counter that is incremented with each change. The + ** other bytes change randomly with each file change when + ** a codec is in use. + ** + ** There is a vanishingly small chance that a change will not be + ** detected. The chance of an undetected change is so small that + ** it can be neglected. + */ + char dbFileVers[sizeof(pPager->dbFileVers)]; + + IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); + rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); + if( rc!=SQLITE_OK ){ + if( rc!=SQLITE_IOERR_SHORT_READ ){ + goto failed; + } + memset(dbFileVers, 0, sizeof(dbFileVers)); + } + + if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ + pager_reset(pPager); + + /* Unmap the database file. It is possible that external processes + ** may have truncated the database file and then extended it back + ** to its original size while this process was not holding a lock. + ** In this case there may exist a Pager.pMap mapping that appears + ** to be the right size but is not actually valid. Avoid this + ** possibility by unmapping the db here. */ + if( USEFETCH(pPager) ){ + sqlite3OsUnfetch(pPager->fd, 0, 0); + } + } + } + + /* If there is a WAL file in the file-system, open this database in WAL + ** mode. Otherwise, the following function call is a no-op. + */ + rc = pagerOpenWalIfPresent(pPager); +#ifndef SQLITE_OMIT_WAL + assert( pPager->pWal==0 || rc==SQLITE_OK ); +#endif + } + + if( pagerUseWal(pPager) ){ + assert( rc==SQLITE_OK ); + rc = pagerBeginReadTransaction(pPager); + } + + if( pPager->tempFile==0 && pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){ + rc = pagerPagecount(pPager, &pPager->dbSize); + } + + failed: + if( rc!=SQLITE_OK ){ + assert( !MEMDB ); + pager_unlock(pPager); + assert( pPager->eState==PAGER_OPEN ); + }else{ + pPager->eState = PAGER_READER; + pPager->hasHeldSharedLock = 1; + } + return rc; +} + +/* +** If the reference count has reached zero, rollback any active +** transaction and unlock the pager. +** +** Except, in locking_mode=EXCLUSIVE when there is nothing to in +** the rollback journal, the unlock is not performed and there is +** nothing to rollback, so this routine is a no-op. +*/ +static void pagerUnlockIfUnused(Pager *pPager){ + if( sqlite3PcacheRefCount(pPager->pPCache)==0 ){ + assert( pPager->nMmapOut==0 ); /* because page1 is never memory mapped */ + pagerUnlockAndRollback(pPager); + } +} + +/* +** The page getter methods each try to acquire a reference to a +** page with page number pgno. If the requested reference is +** successfully obtained, it is copied to *ppPage and SQLITE_OK returned. +** +** There are different implementations of the getter method depending +** on the current state of the pager. +** +** getPageNormal() -- The normal getter +** getPageError() -- Used if the pager is in an error state +** getPageMmap() -- Used if memory-mapped I/O is enabled +** +** If the requested page is already in the cache, it is returned. +** Otherwise, a new page object is allocated and populated with data +** read from the database file. In some cases, the pcache module may +** choose not to allocate a new page object and may reuse an existing +** object with no outstanding references. +** +** The extra data appended to a page is always initialized to zeros the +** first time a page is loaded into memory. If the page requested is +** already in the cache when this function is called, then the extra +** data is left as it was when the page object was last used. +** +** If the database image is smaller than the requested page or if +** the flags parameter contains the PAGER_GET_NOCONTENT bit and the +** requested page is not already stored in the cache, then no +** actual disk read occurs. In this case the memory image of the +** page is initialized to all zeros. +** +** If PAGER_GET_NOCONTENT is true, it means that we do not care about +** the contents of the page. This occurs in two scenarios: +** +** a) When reading a free-list leaf page from the database, and +** +** b) When a savepoint is being rolled back and we need to load +** a new page into the cache to be filled with the data read +** from the savepoint journal. +** +** If PAGER_GET_NOCONTENT is true, then the data returned is zeroed instead +** of being read from the database. Additionally, the bits corresponding +** to pgno in Pager.pInJournal (bitvec of pages already written to the +** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open +** savepoints are set. This means if the page is made writable at any +** point in the future, using a call to sqlite3PagerWrite(), its contents +** will not be journaled. This saves IO. +** +** The acquisition might fail for several reasons. In all cases, +** an appropriate error code is returned and *ppPage is set to NULL. +** +** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt +** to find a page in the in-memory cache first. If the page is not already +** in memory, this routine goes to disk to read it in whereas Lookup() +** just returns 0. This routine acquires a read-lock the first time it +** has to go to disk, and could also playback an old journal if necessary. +** Since Lookup() never goes to disk, it never has to deal with locks +** or journal files. +*/ +static int getPageNormal( + Pager *pPager, /* The pager open on the database file */ + Pgno pgno, /* Page number to fetch */ + DbPage **ppPage, /* Write a pointer to the page here */ + int flags /* PAGER_GET_XXX flags */ +){ + int rc = SQLITE_OK; + PgHdr *pPg; + u8 noContent; /* True if PAGER_GET_NOCONTENT is set */ + sqlite3_pcache_page *pBase; + + assert( pPager->errCode==SQLITE_OK ); + assert( pPager->eState>=PAGER_READER ); + assert( assert_pager_state(pPager) ); + assert( pPager->hasHeldSharedLock==1 ); + + if( pgno==0 ) return SQLITE_CORRUPT_BKPT; + pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3); + if( pBase==0 ){ + pPg = 0; + rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + if( pBase==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto pager_acquire_err; + } + } + pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); + assert( pPg==(*ppPage) ); + assert( pPg->pgno==pgno ); + assert( pPg->pPager==pPager || pPg->pPager==0 ); + + noContent = (flags & PAGER_GET_NOCONTENT)!=0; + if( pPg->pPager && !noContent ){ + /* In this case the pcache already contains an initialized copy of + ** the page. Return without further ado. */ + assert( pgno!=PAGER_MJ_PGNO(pPager) ); + pPager->aStat[PAGER_STAT_HIT]++; + return SQLITE_OK; + + }else{ + /* The pager cache has created a new page. Its content needs to + ** be initialized. But first some error checks: + ** + ** (*) obsolete. Was: maximum page number is 2^31 + ** (2) Never try to fetch the locking page + */ + if( pgno==PAGER_MJ_PGNO(pPager) ){ + rc = SQLITE_CORRUPT_BKPT; + goto pager_acquire_err; + } + + pPg->pPager = pPager; + + assert( !isOpen(pPager->fd) || !MEMDB ); + if( !isOpen(pPager->fd) || pPager->dbSizepPager->mxPgno ){ + rc = SQLITE_FULL; + goto pager_acquire_err; + } + if( noContent ){ + /* Failure to set the bits in the InJournal bit-vectors is benign. + ** It merely means that we might do some extra work to journal a + ** page that does not need to be journaled. Nevertheless, be sure + ** to test the case where a malloc error occurs while trying to set + ** a bit in a bit vector. + */ + sqlite3BeginBenignMalloc(); + if( pgno<=pPager->dbOrigSize ){ + TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno); + testcase( rc==SQLITE_NOMEM ); + } + TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno); + testcase( rc==SQLITE_NOMEM ); + sqlite3EndBenignMalloc(); + } + memset(pPg->pData, 0, pPager->pageSize); + IOTRACE(("ZERO %p %d\n", pPager, pgno)); + }else{ + assert( pPg->pPager==pPager ); + pPager->aStat[PAGER_STAT_MISS]++; + rc = readDbPage(pPg); + if( rc!=SQLITE_OK ){ + goto pager_acquire_err; + } + } + pager_set_pagehash(pPg); + } + return SQLITE_OK; + +pager_acquire_err: + assert( rc!=SQLITE_OK ); + if( pPg ){ + sqlite3PcacheDrop(pPg); + } + pagerUnlockIfUnused(pPager); + *ppPage = 0; + return rc; +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* The page getter for when memory-mapped I/O is enabled */ +static int getPageMMap( + Pager *pPager, /* The pager open on the database file */ + Pgno pgno, /* Page number to fetch */ + DbPage **ppPage, /* Write a pointer to the page here */ + int flags /* PAGER_GET_XXX flags */ +){ + int rc = SQLITE_OK; + PgHdr *pPg = 0; + u32 iFrame = 0; /* Frame to read from WAL file */ + + /* It is acceptable to use a read-only (mmap) page for any page except + ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY + ** flag was specified by the caller. And so long as the db is not a + ** temporary or in-memory database. */ + const int bMmapOk = (pgno>1 + && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY)) + ); + + assert( USEFETCH(pPager) ); + + /* Optimization note: Adding the "pgno<=1" term before "pgno==0" here + ** allows the compiler optimizer to reuse the results of the "pgno>1" + ** test in the previous statement, and avoid testing pgno==0 in the + ** common case where pgno is large. */ + if( pgno<=1 && pgno==0 ){ + return SQLITE_CORRUPT_BKPT; + } + assert( pPager->eState>=PAGER_READER ); + assert( assert_pager_state(pPager) ); + assert( pPager->hasHeldSharedLock==1 ); + assert( pPager->errCode==SQLITE_OK ); + + if( bMmapOk && pagerUseWal(pPager) ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ){ + *ppPage = 0; + return rc; + } + } + if( bMmapOk && iFrame==0 ){ + void *pData = 0; + rc = sqlite3OsFetch(pPager->fd, + (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData + ); + if( rc==SQLITE_OK && pData ){ + if( pPager->eState>PAGER_READER || pPager->tempFile ){ + pPg = sqlite3PagerLookup(pPager, pgno); + } + if( pPg==0 ){ + rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg); + }else{ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData); + } + if( pPg ){ + assert( rc==SQLITE_OK ); + *ppPage = pPg; + return SQLITE_OK; + } + } + if( rc!=SQLITE_OK ){ + *ppPage = 0; + return rc; + } + } + return getPageNormal(pPager, pgno, ppPage, flags); +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* The page getter method for when the pager is an error state */ +static int getPageError( + Pager *pPager, /* The pager open on the database file */ + Pgno pgno, /* Page number to fetch */ + DbPage **ppPage, /* Write a pointer to the page here */ + int flags /* PAGER_GET_XXX flags */ +){ + UNUSED_PARAMETER(pgno); + UNUSED_PARAMETER(flags); + assert( pPager->errCode!=SQLITE_OK ); + *ppPage = 0; + return pPager->errCode; +} + + +/* Dispatch all page fetch requests to the appropriate getter method. +*/ +SQLITE_PRIVATE int sqlite3PagerGet( + Pager *pPager, /* The pager open on the database file */ + Pgno pgno, /* Page number to fetch */ + DbPage **ppPage, /* Write a pointer to the page here */ + int flags /* PAGER_GET_XXX flags */ +){ + /* printf("PAGE %u\n", pgno); fflush(stdout); */ + return pPager->xGet(pPager, pgno, ppPage, flags); +} + +/* +** Acquire a page if it is already in the in-memory cache. Do +** not read the page from disk. Return a pointer to the page, +** or 0 if the page is not in cache. +** +** See also sqlite3PagerGet(). The difference between this routine +** and sqlite3PagerGet() is that _get() will go to the disk and read +** in the page if the page is not already in cache. This routine +** returns NULL if the page is not in cache or if a disk I/O error +** has ever happened. +*/ +SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ + sqlite3_pcache_page *pPage; + assert( pPager!=0 ); + assert( pgno!=0 ); + assert( pPager->pPCache!=0 ); + pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); + assert( pPage==0 || pPager->hasHeldSharedLock ); + if( pPage==0 ) return 0; + return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); +} + +/* +** Release a page reference. +** +** The sqlite3PagerUnref() and sqlite3PagerUnrefNotNull() may only be +** used if we know that the page being released is not the last page. +** The btree layer always holds page1 open until the end, so these first +** to routines can be used to release any page other than BtShared.pPage1. +** +** Use sqlite3PagerUnrefPageOne() to release page1. This latter routine +** checks the total number of outstanding pages and if the number of +** pages reaches zero it drops the database lock. +*/ +SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage *pPg){ + TESTONLY( Pager *pPager = pPg->pPager; ) + assert( pPg!=0 ); + if( pPg->flags & PGHDR_MMAP ){ + assert( pPg->pgno!=1 ); /* Page1 is never memory mapped */ + pagerReleaseMapPage(pPg); + }else{ + sqlite3PcacheRelease(pPg); + } + /* Do not use this routine to release the last reference to page1 */ + assert( sqlite3PcacheRefCount(pPager->pPCache)>0 ); +} +SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){ + if( pPg ) sqlite3PagerUnrefNotNull(pPg); +} +SQLITE_PRIVATE void sqlite3PagerUnrefPageOne(DbPage *pPg){ + Pager *pPager; + assert( pPg!=0 ); + assert( pPg->pgno==1 ); + assert( (pPg->flags & PGHDR_MMAP)==0 ); /* Page1 is never memory mapped */ + pPager = pPg->pPager; + sqlite3PcacheRelease(pPg); + pagerUnlockIfUnused(pPager); +} + +/* +** This function is called at the start of every write transaction. +** There must already be a RESERVED or EXCLUSIVE lock on the database +** file when this routine is called. +** +** Open the journal file for pager pPager and write a journal header +** to the start of it. If there are active savepoints, open the sub-journal +** as well. This function is only used when the journal file is being +** opened to write a rollback log for a transaction. It is not used +** when opening a hot journal file to roll it back. +** +** If the journal file is already open (as it may be in exclusive mode), +** then this function just writes a journal header to the start of the +** already open file. +** +** Whether or not the journal file is opened by this function, the +** Pager.pInJournal bitvec structure is allocated. +** +** Return SQLITE_OK if everything is successful. Otherwise, return +** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or +** an IO error code if opening or writing the journal file fails. +*/ +static int pager_open_journal(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + sqlite3_vfs * const pVfs = pPager->pVfs; /* Local cache of vfs pointer */ + + assert( pPager->eState==PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + assert( pPager->pInJournal==0 ); + + /* If already in the error state, this function is a no-op. But on + ** the other hand, this routine is never called if we are already in + ** an error state. */ + if( NEVER(pPager->errCode) ) return pPager->errCode; + + if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize); + if( pPager->pInJournal==0 ){ + return SQLITE_NOMEM_BKPT; + } + + /* Open the journal file if it is not already open. */ + if( !isOpen(pPager->jfd) ){ + if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ + sqlite3MemJournalOpen(pPager->jfd); + }else{ + int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE; + int nSpill; + + if( pPager->tempFile ){ + flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL); + nSpill = sqlite3Config.nStmtSpill; + }else{ + flags |= SQLITE_OPEN_MAIN_JOURNAL; + nSpill = jrnlBufferSize(pPager); + } + + /* Verify that the database still has the same name as it did when + ** it was originally opened. */ + rc = databaseIsUnmoved(pPager); + if( rc==SQLITE_OK ){ + rc = sqlite3JournalOpen ( + pVfs, pPager->zJournal, pPager->jfd, flags, nSpill + ); + } + } + assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); + } + + + /* Write the first journal header to the journal file and open + ** the sub-journal if necessary. + */ + if( rc==SQLITE_OK ){ + /* TODO: Check if all of these are really required. */ + pPager->nRec = 0; + pPager->journalOff = 0; + pPager->setSuper = 0; + pPager->journalHdr = 0; + rc = writeJournalHdr(pPager); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + }else{ + assert( pPager->eState==PAGER_WRITER_LOCKED ); + pPager->eState = PAGER_WRITER_CACHEMOD; + } + + return rc; +} + +/* +** Begin a write-transaction on the specified pager object. If a +** write-transaction has already been opened, this function is a no-op. +** +** If the exFlag argument is false, then acquire at least a RESERVED +** lock on the database file. If exFlag is true, then acquire at least +** an EXCLUSIVE lock. If such a lock is already held, no locking +** functions need be called. +** +** If the subjInMemory argument is non-zero, then any sub-journal opened +** within this transaction will be opened as an in-memory file. This +** has no effect if the sub-journal is already opened (as it may be when +** running in exclusive mode) or if the transaction does not require a +** sub-journal. If the subjInMemory argument is zero, then any required +** sub-journal is implemented in-memory if pPager is an in-memory database, +** or using a temporary file otherwise. +*/ +SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ + int rc = SQLITE_OK; + + if( pPager->errCode ) return pPager->errCode; + assert( pPager->eState>=PAGER_READER && pPager->eStatesubjInMemory = (u8)subjInMemory; + + if( pPager->eState==PAGER_READER ){ + assert( pPager->pInJournal==0 ); + + if( pagerUseWal(pPager) ){ + /* If the pager is configured to use locking_mode=exclusive, and an + ** exclusive lock on the database is not already held, obtain it now. + */ + if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){ + rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); + if( rc!=SQLITE_OK ){ + return rc; + } + (void)sqlite3WalExclusiveMode(pPager->pWal, 1); + } + + /* Grab the write lock on the log file. If successful, upgrade to + ** PAGER_RESERVED state. Otherwise, return an error code to the caller. + ** The busy-handler is not invoked if another connection already + ** holds the write-lock. If possible, the upper layer will call it. + */ + rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + }else{ + /* Obtain a RESERVED lock on the database file. If the exFlag parameter + ** is true, then immediately upgrade this to an EXCLUSIVE lock. The + ** busy-handler callback can be used when upgrading to the EXCLUSIVE + ** lock, but not when obtaining the RESERVED lock. + */ + rc = pagerLockDb(pPager, RESERVED_LOCK); + if( rc==SQLITE_OK && exFlag ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } + } + + if( rc==SQLITE_OK ){ + /* Change to WRITER_LOCKED state. + ** + ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD + ** when it has an open transaction, but never to DBMOD or FINISHED. + ** This is because in those states the code to roll back savepoint + ** transactions may copy data from the sub-journal into the database + ** file as well as into the page cache. Which would be incorrect in + ** WAL mode. + */ + pPager->eState = PAGER_WRITER_LOCKED; + pPager->dbHintSize = pPager->dbSize; + pPager->dbFileSize = pPager->dbSize; + pPager->dbOrigSize = pPager->dbSize; + pPager->journalOff = 0; + } + + assert( rc==SQLITE_OK || pPager->eState==PAGER_READER ); + assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + } + + PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager))); + return rc; +} + +/* +** Write page pPg onto the end of the rollback journal. +*/ +static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc; + u32 cksum; + char *pData2; + i64 iOff = pPager->journalOff; + + /* We should never write to the journal file the page that + ** contains the database locks. The following assert verifies + ** that we do not. */ + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); + + assert( pPager->journalHdr<=pPager->journalOff ); + pData2 = pPg->pData; + cksum = pager_cksum(pPager, (u8*)pData2); + + /* Even if an IO or diskfull error occurs while journalling the + ** page in the block above, set the need-sync flag for the page. + ** Otherwise, when the transaction is rolled back, the logic in + ** playback_one_page() will think that the page needs to be restored + ** in the database file. And if an IO error occurs while doing so, + ** then corruption may follow. + */ + pPg->flags |= PGHDR_NEED_SYNC; + + rc = write32bits(pPager->jfd, iOff, pPg->pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); + if( rc!=SQLITE_OK ) return rc; + rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); + if( rc!=SQLITE_OK ) return rc; + + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, + pPager->journalOff, pPager->pageSize)); + PAGER_INCR(sqlite3_pager_writej_count); + PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); + + pPager->journalOff += 8 + pPager->pageSize; + pPager->nRec++; + assert( pPager->pInJournal!=0 ); + rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + rc |= addToSavepointBitvecs(pPager, pPg->pgno); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + return rc; +} + +/* +** Mark a single data page as writeable. The page is written into the +** main journal or sub-journal as required. If the page is written into +** one of the journals, the corresponding bit is set in the +** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs +** of any open savepoints as appropriate. +*/ +static int pager_write(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc = SQLITE_OK; + + /* This routine is not called unless a write-transaction has already + ** been started. The journal file may or may not be open at this point. + ** It is never called in the ERROR state. + */ + assert( pPager->eState==PAGER_WRITER_LOCKED + || pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + assert( pPager->errCode==0 ); + assert( pPager->readOnly==0 ); + CHECK_PAGE(pPg); + + /* The journal file needs to be opened. Higher level routines have already + ** obtained the necessary locks to begin the write-transaction, but the + ** rollback journal might not yet be open. Open it now if this is the case. + ** + ** This is done before calling sqlite3PcacheMakeDirty() on the page. + ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then + ** an error might occur and the pager would end up in WRITER_LOCKED state + ** with pages marked as dirty in the cache. + */ + if( pPager->eState==PAGER_WRITER_LOCKED ){ + rc = pager_open_journal(pPager); + if( rc!=SQLITE_OK ) return rc; + } + assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); + assert( assert_pager_state(pPager) ); + + /* Mark the page that is about to be modified as dirty. */ + sqlite3PcacheMakeDirty(pPg); + + /* If a rollback journal is in use, them make sure the page that is about + ** to change is in the rollback journal, or if the page is a new page off + ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. + */ + assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); + if( pPager->pInJournal!=0 + && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 + ){ + assert( pagerUseWal(pPager)==0 ); + if( pPg->pgno<=pPager->dbOrigSize ){ + rc = pagerAddPageToRollbackJournal(pPg); + if( rc!=SQLITE_OK ){ + return rc; + } + }else{ + if( pPager->eState!=PAGER_WRITER_DBMOD ){ + pPg->flags |= PGHDR_NEED_SYNC; + } + PAGERTRACE(("APPEND %d page %d needSync=%d\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); + } + } + + /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list + ** and before writing the page into the rollback journal. Wait until now, + ** after the page has been successfully journalled, before setting the + ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. + */ + pPg->flags |= PGHDR_WRITEABLE; + + /* If the statement journal is open and the page is not in it, + ** then write the page into the statement journal. + */ + if( pPager->nSavepoint>0 ){ + rc = subjournalPageIfRequired(pPg); + } + + /* Update the database size and return. */ + if( pPager->dbSizepgno ){ + pPager->dbSize = pPg->pgno; + } + return rc; +} + +/* +** This is a variant of sqlite3PagerWrite() that runs when the sector size +** is larger than the page size. SQLite makes the (reasonable) assumption that +** all bytes of a sector are written together by hardware. Hence, all bytes of +** a sector need to be journalled in case of a power loss in the middle of +** a write. +** +** Usually, the sector size is less than or equal to the page size, in which +** case pages can be individually written. This routine only runs in the +** exceptional case where the page size is smaller than the sector size. +*/ +static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ + int rc = SQLITE_OK; /* Return code */ + Pgno nPageCount; /* Total number of pages in database file */ + Pgno pg1; /* First page of the sector pPg is located on. */ + int nPage = 0; /* Number of pages starting at pg1 to journal */ + int ii; /* Loop counter */ + int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ + Pager *pPager = pPg->pPager; /* The pager that owns pPg */ + Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); + + /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow + ** a journal header to be written between the pages journaled by + ** this function. + */ + assert( !MEMDB ); + assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)==0 ); + pPager->doNotSpill |= SPILLFLAG_NOSYNC; + + /* This trick assumes that both the page-size and sector-size are + ** an integer power of 2. It sets variable pg1 to the identifier + ** of the first page of the sector pPg is located on. + */ + pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; + + nPageCount = pPager->dbSize; + if( pPg->pgno>nPageCount ){ + nPage = (pPg->pgno - pg1)+1; + }else if( (pg1+nPagePerSector-1)>nPageCount ){ + nPage = nPageCount+1-pg1; + }else{ + nPage = nPagePerSector; + } + assert(nPage>0); + assert(pg1<=pPg->pgno); + assert((pg1+nPage)>pPg->pgno); + + for(ii=0; iipgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){ + if( pg!=PAGER_MJ_PGNO(pPager) ){ + rc = sqlite3PagerGet(pPager, pg, &pPage, 0); + if( rc==SQLITE_OK ){ + rc = pager_write(pPage); + if( pPage->flags&PGHDR_NEED_SYNC ){ + needSync = 1; + } + sqlite3PagerUnrefNotNull(pPage); + } + } + }else if( (pPage = sqlite3PagerLookup(pPager, pg))!=0 ){ + if( pPage->flags&PGHDR_NEED_SYNC ){ + needSync = 1; + } + sqlite3PagerUnrefNotNull(pPage); + } + } + + /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages + ** starting at pg1, then it needs to be set for all of them. Because + ** writing to any of these nPage pages may damage the others, the + ** journal file must contain sync()ed copies of all of them + ** before any of them can be written out to the database file. + */ + if( rc==SQLITE_OK && needSync ){ + assert( !MEMDB ); + for(ii=0; iiflags |= PGHDR_NEED_SYNC; + sqlite3PagerUnrefNotNull(pPage); + } + } + } + + assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)!=0 ); + pPager->doNotSpill &= ~SPILLFLAG_NOSYNC; + return rc; +} + +/* +** Mark a data page as writeable. This routine must be called before +** making changes to a page. The caller must check the return value +** of this function and be careful not to change any page data unless +** this routine returns SQLITE_OK. +** +** The difference between this function and pager_write() is that this +** function also deals with the special case where 2 or more pages +** fit on a single disk sector. In this case all co-resident pages +** must have been written to the journal file before returning. +** +** If an error occurs, SQLITE_NOMEM or an IO error code is returned +** as appropriate. Otherwise, SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + assert( (pPg->flags & PGHDR_MMAP)==0 ); + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ + if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); + return SQLITE_OK; + }else if( pPager->errCode ){ + return pPager->errCode; + }else if( pPager->sectorSize > (u32)pPager->pageSize ){ + assert( pPager->tempFile==0 ); + return pagerWriteLargeSector(pPg); + }else{ + return pager_write(pPg); + } +} + +/* +** Return TRUE if the page given in the argument was previously passed +** to sqlite3PagerWrite(). In other words, return TRUE if it is ok +** to change the content of the page. +*/ +#ifndef NDEBUG +SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){ + return pPg->flags & PGHDR_WRITEABLE; +} +#endif + +/* +** A call to this routine tells the pager that it is not necessary to +** write the information on page pPg back to the disk, even though +** that page might be marked as dirty. This happens, for example, when +** the page has been added as a leaf of the freelist and so its +** content no longer matters. +** +** The overlying software layer calls this routine when all of the data +** on the given page is unused. The pager marks the page as clean so +** that it does not get written to disk. +** +** Tests show that this optimization can quadruple the speed of large +** DELETE operations. +** +** This optimization cannot be used with a temp-file, as the page may +** have been dirty at the start of the transaction. In that case, if +** memory pressure forces page pPg out of the cache, the data does need +** to be written out to disk so that it may be read back in if the +** current transaction is rolled back. +*/ +SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + if( !pPager->tempFile && (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){ + PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); + IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) + pPg->flags |= PGHDR_DONT_WRITE; + pPg->flags &= ~PGHDR_WRITEABLE; + testcase( pPg->flags & PGHDR_NEED_SYNC ); + pager_set_pagehash(pPg); + } +} + +/* +** This routine is called to increment the value of the database file +** change-counter, stored as a 4-byte big-endian integer starting at +** byte offset 24 of the pager file. The secondary change counter at +** 92 is also updated, as is the SQLite version number at offset 96. +** +** But this only happens if the pPager->changeCountDone flag is false. +** To avoid excess churning of page 1, the update only happens once. +** See also the pager_write_changecounter() routine that does an +** unconditional update of the change counters. +** +** If the isDirectMode flag is zero, then this is done by calling +** sqlite3PagerWrite() on page 1, then modifying the contents of the +** page data. In this case the file will be updated when the current +** transaction is committed. +** +** The isDirectMode flag may only be non-zero if the library was compiled +** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case, +** if isDirect is non-zero, then the database file is updated directly +** by writing an updated version of page 1 using a call to the +** sqlite3OsWrite() function. +*/ +static int pager_incr_changecounter(Pager *pPager, int isDirectMode){ + int rc = SQLITE_OK; + + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + + /* Declare and initialize constant integer 'isDirect'. If the + ** atomic-write optimization is enabled in this build, then isDirect + ** is initialized to the value passed as the isDirectMode parameter + ** to this function. Otherwise, it is always set to zero. + ** + ** The idea is that if the atomic-write optimization is not + ** enabled at compile time, the compiler can omit the tests of + ** 'isDirect' below, as well as the block enclosed in the + ** "if( isDirect )" condition. + */ +#ifndef SQLITE_ENABLE_ATOMIC_WRITE +# define DIRECT_MODE 0 + assert( isDirectMode==0 ); + UNUSED_PARAMETER(isDirectMode); +#else +# define DIRECT_MODE isDirectMode +#endif + + if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){ + PgHdr *pPgHdr; /* Reference to page 1 */ + + assert( !pPager->tempFile && isOpen(pPager->fd) ); + + /* Open page 1 of the file for writing. */ + rc = sqlite3PagerGet(pPager, 1, &pPgHdr, 0); + assert( pPgHdr==0 || rc==SQLITE_OK ); + + /* If page one was fetched successfully, and this function is not + ** operating in direct-mode, make page 1 writable. When not in + ** direct mode, page 1 is always held in cache and hence the PagerGet() + ** above is always successful - hence the ALWAYS on rc==SQLITE_OK. + */ + if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){ + rc = sqlite3PagerWrite(pPgHdr); + } + + if( rc==SQLITE_OK ){ + /* Actually do the update of the change counter */ + pager_write_changecounter(pPgHdr); + + /* If running in direct mode, write the contents of page 1 to the file. */ + if( DIRECT_MODE ){ + const void *zBuf; + assert( pPager->dbFileSize>0 ); + zBuf = pPgHdr->pData; + if( rc==SQLITE_OK ){ + rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); + pPager->aStat[PAGER_STAT_WRITE]++; + } + if( rc==SQLITE_OK ){ + /* Update the pager's copy of the change-counter. Otherwise, the + ** next time a read transaction is opened the cache will be + ** flushed (as the change-counter values will not match). */ + const void *pCopy = (const void *)&((const char *)zBuf)[24]; + memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers)); + pPager->changeCountDone = 1; + } + }else{ + pPager->changeCountDone = 1; + } + } + + /* Release the page reference. */ + sqlite3PagerUnref(pPgHdr); + } + return rc; +} + +/* +** Sync the database file to disk. This is a no-op for in-memory databases +** or pages with the Pager.noSync flag set. +** +** If successful, or if called on a pager for which it is a no-op, this +** function returns SQLITE_OK. Otherwise, an IO error code is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zSuper){ + int rc = SQLITE_OK; + void *pArg = (void*)zSuper; + rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC, pArg); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + if( rc==SQLITE_OK && !pPager->noSync ){ + assert( !MEMDB ); + rc = sqlite3OsSync(pPager->fd, pPager->syncFlags); + } + return rc; +} + +/* +** This function may only be called while a write-transaction is active in +** rollback. If the connection is in WAL mode, this call is a no-op. +** Otherwise, if the connection does not already have an EXCLUSIVE lock on +** the database file, an attempt is made to obtain one. +** +** If the EXCLUSIVE lock is already held or the attempt to obtain it is +** successful, or the connection is in WAL mode, SQLITE_OK is returned. +** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is +** returned. +*/ +SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager){ + int rc = pPager->errCode; + assert( assert_pager_state(pPager) ); + if( rc==SQLITE_OK ){ + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + || pPager->eState==PAGER_WRITER_LOCKED + ); + assert( assert_pager_state(pPager) ); + if( 0==pagerUseWal(pPager) ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } + } + return rc; +} + +/* +** Sync the database file for the pager pPager. zSuper points to the name +** of a super-journal file that should be written into the individual +** journal file. zSuper may be NULL, which is interpreted as no +** super-journal (a single database transaction). +** +** This routine ensures that: +** +** * The database file change-counter is updated, +** * the journal is synced (unless the atomic-write optimization is used), +** * all dirty pages are written to the database file, +** * the database file is truncated (if required), and +** * the database file synced. +** +** The only thing that remains to commit the transaction is to finalize +** (delete, truncate or zero the first part of) the journal file (or +** delete the super-journal file if specified). +** +** Note that if zSuper==NULL, this does not overwrite a previous value +** passed to an sqlite3PagerCommitPhaseOne() call. +** +** If the final parameter - noSync - is true, then the database file itself +** is not synced. The caller must call sqlite3PagerSync() directly to +** sync the database file before calling CommitPhaseTwo() to delete the +** journal file in this case. +*/ +SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( + Pager *pPager, /* Pager object */ + const char *zSuper, /* If not NULL, the super-journal name */ + int noSync /* True to omit the xSync on the db file */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( pPager->eState==PAGER_WRITER_LOCKED + || pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + || pPager->eState==PAGER_ERROR + ); + assert( assert_pager_state(pPager) ); + + /* If a prior error occurred, report that error again. */ + if( NEVER(pPager->errCode) ) return pPager->errCode; + + /* Provide the ability to easily simulate an I/O error during testing */ + if( sqlite3FaultSim(400) ) return SQLITE_IOERR; + + PAGERTRACE(("DATABASE SYNC: File=%s zSuper=%s nSize=%d\n", + pPager->zFilename, zSuper, pPager->dbSize)); + + /* If no database changes have been made, return early. */ + if( pPager->eStatetempFile ); + assert( isOpen(pPager->fd) || pPager->tempFile ); + if( 0==pagerFlushOnCommit(pPager, 1) ){ + /* If this is an in-memory db, or no pages have been written to, or this + ** function has already been called, it is mostly a no-op. However, any + ** backup in progress needs to be restarted. */ + sqlite3BackupRestart(pPager->pBackup); + }else{ + PgHdr *pList; + if( pagerUseWal(pPager) ){ + PgHdr *pPageOne = 0; + pList = sqlite3PcacheDirtyList(pPager->pPCache); + if( pList==0 ){ + /* Must have at least one page for the WAL commit flag. + ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */ + rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0); + pList = pPageOne; + pList->pDirty = 0; + } + assert( rc==SQLITE_OK ); + if( ALWAYS(pList) ){ + rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); + } + sqlite3PagerUnref(pPageOne); + if( rc==SQLITE_OK ){ + sqlite3PcacheCleanAll(pPager->pPCache); + } + }else{ + /* The bBatch boolean is true if the batch-atomic-write commit method + ** should be used. No rollback journal is created if batch-atomic-write + ** is enabled. + */ +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + sqlite3_file *fd = pPager->fd; + int bBatch = zSuper==0 /* An SQLITE_IOCAP_BATCH_ATOMIC commit */ + && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC) + && !pPager->noSync + && sqlite3JournalIsInMemory(pPager->jfd); +#else +# define bBatch 0 +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + /* The following block updates the change-counter. Exactly how it + ** does this depends on whether or not the atomic-update optimization + ** was enabled at compile time, and if this transaction meets the + ** runtime criteria to use the operation: + ** + ** * The file-system supports the atomic-write property for + ** blocks of size page-size, and + ** * This commit is not part of a multi-file transaction, and + ** * Exactly one page has been modified and store in the journal file. + ** + ** If the optimization was not enabled at compile time, then the + ** pager_incr_changecounter() function is called to update the change + ** counter in 'indirect-mode'. If the optimization is compiled in but + ** is not applicable to this transaction, call sqlite3JournalCreate() + ** to make sure the journal file has actually been created, then call + ** pager_incr_changecounter() to update the change-counter in indirect + ** mode. + ** + ** Otherwise, if the optimization is both enabled and applicable, + ** then call pager_incr_changecounter() to update the change-counter + ** in 'direct' mode. In this case the journal file will never be + ** created for this transaction. + */ + if( bBatch==0 ){ + PgHdr *pPg; + assert( isOpen(pPager->jfd) + || pPager->journalMode==PAGER_JOURNALMODE_OFF + || pPager->journalMode==PAGER_JOURNALMODE_WAL + ); + if( !zSuper && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbOrigSize + && (!(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); + } + } + } +#else /* SQLITE_ENABLE_ATOMIC_WRITE */ +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( zSuper ){ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + assert( bBatch==0 ); + } +#endif + rc = pager_incr_changecounter(pPager, 0); +#endif /* !SQLITE_ENABLE_ATOMIC_WRITE */ + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Write the super-journal name into the journal file. If a + ** super-journal file name has already been written to the journal file, + ** or if zSuper is NULL (no super-journal), then this call is a no-op. + */ + rc = writeSuperJournal(pPager, zSuper); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Sync the journal file and write all dirty pages to the database. + ** If the atomic-update optimization is being used, this sync will not + ** create the journal file or perform any real IO. + ** + ** Because the change-counter page was just modified, unless the + ** atomic-update optimization is used it is almost certain that the + ** journal requires a sync here. However, in locking_mode=exclusive + ** on a system under memory pressure it is just possible that this is + ** not the case. In this case it is likely enough that the redundant + ** xSync() call will be changed to a no-op by the OS anyhow. + */ + rc = syncJournal(pPager, 0); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + pList = sqlite3PcacheDirtyList(pPager->pPCache); +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( bBatch ){ + rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0); + if( rc==SQLITE_OK ){ + rc = pager_write_pagelist(pPager, pList); + if( rc==SQLITE_OK ){ + rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0); + } + if( rc!=SQLITE_OK ){ + sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0); + } + } + + if( (rc&0xFF)==SQLITE_IOERR && rc!=SQLITE_IOERR_NOMEM ){ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc!=SQLITE_OK ){ + sqlite3OsClose(pPager->jfd); + goto commit_phase_one_exit; + } + bBatch = 0; + }else{ + sqlite3OsClose(pPager->jfd); + } + } +#endif /* SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + + if( bBatch==0 ){ + rc = pager_write_pagelist(pPager, pList); + } + if( rc!=SQLITE_OK ){ + assert( rc!=SQLITE_IOERR_BLOCKED ); + goto commit_phase_one_exit; + } + sqlite3PcacheCleanAll(pPager->pPCache); + + /* If the file on disk is smaller than the database image, use + ** pager_truncate to grow the file here. This can happen if the database + ** image was extended as part of the current transaction and then the + ** last page in the db image moved to the free-list. In this case the + ** last page is never written out to disk, leaving the database file + ** undersized. Fix this now if it is the case. */ + if( pPager->dbSize>pPager->dbFileSize ){ + Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); + assert( pPager->eState==PAGER_WRITER_DBMOD ); + rc = pager_truncate(pPager, nNew); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + + /* Finally, sync the database file. */ + if( !noSync ){ + rc = sqlite3PagerSync(pPager, zSuper); + } + IOTRACE(("DBSYNC %p\n", pPager)) + } + } + +commit_phase_one_exit: + if( rc==SQLITE_OK && !pagerUseWal(pPager) ){ + pPager->eState = PAGER_WRITER_FINISHED; + } + return rc; +} + + +/* +** When this function is called, the database file has been completely +** updated to reflect the changes made by the current transaction and +** synced to disk. The journal file still exists in the file-system +** though, and if a failure occurs at this point it will eventually +** be used as a hot-journal and the current transaction rolled back. +** +** This function finalizes the journal file, either by deleting, +** truncating or partially zeroing it, so that it cannot be used +** for hot-journal rollback. Once this is done the transaction is +** irrevocably committed. +** +** If an error occurs, an IO error code is returned and the pager +** moves into the error state. Otherwise, SQLITE_OK is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + + /* This routine should not be called if a prior error has occurred. + ** But if (due to a coding error elsewhere in the system) it does get + ** called, just return the same error code without doing anything. */ + if( NEVER(pPager->errCode) ) return pPager->errCode; + pPager->iDataVersion++; + + assert( pPager->eState==PAGER_WRITER_LOCKED + || pPager->eState==PAGER_WRITER_FINISHED + || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD) + ); + assert( assert_pager_state(pPager) ); + + /* An optimization. If the database was not actually modified during + ** this transaction, the pager is running in exclusive-mode and is + ** using persistent journals, then this function is a no-op. + ** + ** The start of the journal file currently contains a single journal + ** header with the nRec field set to 0. If such a journal is used as + ** a hot-journal during hot-journal rollback, 0 changes will be made + ** to the database file. So there is no need to zero the journal + ** header. Since the pager is in exclusive mode, there is no need + ** to drop any locks either. + */ + if( pPager->eState==PAGER_WRITER_LOCKED + && pPager->exclusiveMode + && pPager->journalMode==PAGER_JOURNALMODE_PERSIST + ){ + assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff ); + pPager->eState = PAGER_READER; + return SQLITE_OK; + } + + PAGERTRACE(("COMMIT %d\n", PAGERID(pPager))); + rc = pager_end_transaction(pPager, pPager->setSuper, 1); + return pager_error(pPager, rc); +} + +/* +** If a write transaction is open, then all changes made within the +** transaction are reverted and the current write-transaction is closed. +** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR +** state if an error occurs. +** +** If the pager is already in PAGER_ERROR state when this function is called, +** it returns Pager.errCode immediately. No work is performed in this case. +** +** Otherwise, in rollback mode, this function performs two functions: +** +** 1) It rolls back the journal file, restoring all database file and +** in-memory cache pages to the state they were in when the transaction +** was opened, and +** +** 2) It finalizes the journal file, so that it is not used for hot +** rollback at any point in the future. +** +** Finalization of the journal file (task 2) is only performed if the +** rollback is successful. +** +** In WAL mode, all cache-entries containing data modified within the +** current transaction are either expelled from the cache or reverted to +** their pre-transaction state by re-reading data from the database or +** WAL files. The WAL transaction is then closed. +*/ +SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); + + /* PagerRollback() is a no-op if called in READER or OPEN state. If + ** the pager is already in the ERROR state, the rollback is not + ** attempted here. Instead, the error code is returned to the caller. + */ + assert( assert_pager_state(pPager) ); + if( pPager->eState==PAGER_ERROR ) return pPager->errCode; + if( pPager->eState<=PAGER_READER ) return SQLITE_OK; + + if( pagerUseWal(pPager) ){ + int rc2; + rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); + rc2 = pager_end_transaction(pPager, pPager->setSuper, 0); + if( rc==SQLITE_OK ) rc = rc2; + }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){ + int eState = pPager->eState; + rc = pager_end_transaction(pPager, 0, 0); + if( !MEMDB && eState>PAGER_WRITER_LOCKED ){ + /* This can happen using journal_mode=off. Move the pager to the error + ** state to indicate that the contents of the cache may not be trusted. + ** Any active readers will get SQLITE_ABORT. + */ + pPager->errCode = SQLITE_ABORT; + pPager->eState = PAGER_ERROR; + setGetterMethod(pPager); + return rc; + } + }else{ + rc = pager_playback(pPager, 0); + } + + assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK ); + assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT + || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR + || rc==SQLITE_CANTOPEN + ); + + /* If an error occurs during a ROLLBACK, we can no longer trust the pager + ** cache. So call pager_error() on the way out to make any error persistent. + */ + return pager_error(pPager, rc); +} + +/* +** Return TRUE if the database file is opened read-only. Return FALSE +** if the database is (in theory) writable. +*/ +SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){ + return pPager->readOnly; +} + +#ifdef SQLITE_DEBUG +/* +** Return the sum of the reference counts for all pages held by pPager. +*/ +SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){ + return sqlite3PcacheRefCount(pPager->pPCache); +} +#endif + +/* +** Return the approximate number of bytes of memory currently +** used by the pager and its associated cache. +*/ +SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager *pPager){ + int perPageSize = pPager->pageSize + pPager->nExtra + + (int)(sizeof(PgHdr) + 5*sizeof(void*)); + return perPageSize*sqlite3PcachePagecount(pPager->pPCache) + + sqlite3MallocSize(pPager) + + pPager->pageSize; +} + +/* +** Return the number of references to the specified page. +*/ +SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage *pPage){ + return sqlite3PcachePageRefcount(pPage); +} + +#ifdef SQLITE_TEST +/* +** This routine is used for testing and analysis only. +*/ +SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){ + static int a[11]; + a[0] = sqlite3PcacheRefCount(pPager->pPCache); + a[1] = sqlite3PcachePagecount(pPager->pPCache); + a[2] = sqlite3PcacheGetCachesize(pPager->pPCache); + a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize; + a[4] = pPager->eState; + a[5] = pPager->errCode; + a[6] = pPager->aStat[PAGER_STAT_HIT]; + a[7] = pPager->aStat[PAGER_STAT_MISS]; + a[8] = 0; /* Used to be pPager->nOvfl */ + a[9] = pPager->nRead; + a[10] = pPager->aStat[PAGER_STAT_WRITE]; + return a; +} +#endif + +/* +** Parameter eStat must be one of SQLITE_DBSTATUS_CACHE_HIT, _MISS, _WRITE, +** or _WRITE+1. The SQLITE_DBSTATUS_CACHE_WRITE+1 case is a translation +** of SQLITE_DBSTATUS_CACHE_SPILL. The _SPILL case is not contiguous because +** it was added later. +** +** Before returning, *pnVal is incremented by the +** current cache hit or miss count, according to the value of eStat. If the +** reset parameter is non-zero, the cache hit or miss count is zeroed before +** returning. +*/ +SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){ + + assert( eStat==SQLITE_DBSTATUS_CACHE_HIT + || eStat==SQLITE_DBSTATUS_CACHE_MISS + || eStat==SQLITE_DBSTATUS_CACHE_WRITE + || eStat==SQLITE_DBSTATUS_CACHE_WRITE+1 + ); + + assert( SQLITE_DBSTATUS_CACHE_HIT+1==SQLITE_DBSTATUS_CACHE_MISS ); + assert( SQLITE_DBSTATUS_CACHE_HIT+2==SQLITE_DBSTATUS_CACHE_WRITE ); + assert( PAGER_STAT_HIT==0 && PAGER_STAT_MISS==1 + && PAGER_STAT_WRITE==2 && PAGER_STAT_SPILL==3 ); + + eStat -= SQLITE_DBSTATUS_CACHE_HIT; + *pnVal += pPager->aStat[eStat]; + if( reset ){ + pPager->aStat[eStat] = 0; + } +} + +/* +** Return true if this is an in-memory or temp-file backed pager. +*/ +SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){ + return pPager->tempFile || pPager->memVfs; +} + +/* +** Check that there are at least nSavepoint savepoints open. If there are +** currently less than nSavepoints open, then open one or more savepoints +** to make up the difference. If the number of savepoints is already +** equal to nSavepoint, then this function is a no-op. +** +** If a memory allocation fails, SQLITE_NOMEM is returned. If an error +** occurs while opening the sub-journal file, then an IO error code is +** returned. Otherwise, SQLITE_OK. +*/ +static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ + int rc = SQLITE_OK; /* Return code */ + int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ + int ii; /* Iterator variable */ + PagerSavepoint *aNew; /* New Pager.aSavepoint array */ + + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + assert( nSavepoint>nCurrent && pPager->useJournal ); + + /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM + ** if the allocation fails. Otherwise, zero the new portion in case a + ** malloc failure occurs while populating it in the for(...) loop below. + */ + aNew = (PagerSavepoint *)sqlite3Realloc( + pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint + ); + if( !aNew ){ + return SQLITE_NOMEM_BKPT; + } + memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); + pPager->aSavepoint = aNew; + + /* Populate the PagerSavepoint structures just allocated. */ + for(ii=nCurrent; iidbSize; + if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ + aNew[ii].iOffset = pPager->journalOff; + }else{ + aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); + } + aNew[ii].iSubRec = pPager->nSubRec; + aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); + aNew[ii].bTruncateOnRelease = 1; + if( !aNew[ii].pInSavepoint ){ + return SQLITE_NOMEM_BKPT; + } + if( pagerUseWal(pPager) ){ + sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); + } + pPager->nSavepoint = ii+1; + } + assert( pPager->nSavepoint==nSavepoint ); + assertTruncateConstraint(pPager); + return rc; +} +SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + + if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ + return pagerOpenSavepoint(pPager, nSavepoint); + }else{ + return SQLITE_OK; + } +} + + +/* +** This function is called to rollback or release (commit) a savepoint. +** The savepoint to release or rollback need not be the most recently +** created savepoint. +** +** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE. +** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with +** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes +** that have occurred since the specified savepoint was created. +** +** The savepoint to rollback or release is identified by parameter +** iSavepoint. A value of 0 means to operate on the outermost savepoint +** (the first created). A value of (Pager.nSavepoint-1) means operate +** on the most recently created savepoint. If iSavepoint is greater than +** (Pager.nSavepoint-1), then this function is a no-op. +** +** If a negative value is passed to this function, then the current +** transaction is rolled back. This is different to calling +** sqlite3PagerRollback() because this function does not terminate +** the transaction or unlock the database, it just restores the +** contents of the database to its original state. +** +** In any case, all savepoints with an index greater than iSavepoint +** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE), +** then savepoint iSavepoint is also destroyed. +** +** This function may return SQLITE_NOMEM if a memory allocation fails, +** or an IO error code if an IO error occurs while rolling back a +** savepoint. If no errors occur, SQLITE_OK is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ + int rc = pPager->errCode; + +#ifdef SQLITE_ENABLE_ZIPVFS + if( op==SAVEPOINT_RELEASE ) rc = SQLITE_OK; +#endif + + assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); + assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK ); + + if( rc==SQLITE_OK && iSavepointnSavepoint ){ + int ii; /* Iterator variable */ + int nNew; /* Number of remaining savepoints after this op. */ + + /* Figure out how many savepoints will still be active after this + ** operation. Store this value in nNew. Then free resources associated + ** with any savepoints that are destroyed by this operation. + */ + nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1); + for(ii=nNew; iinSavepoint; ii++){ + sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); + } + pPager->nSavepoint = nNew; + + /* Truncate the sub-journal so that it only includes the parts + ** that are still in use. */ + if( op==SAVEPOINT_RELEASE ){ + PagerSavepoint *pRel = &pPager->aSavepoint[nNew]; + if( pRel->bTruncateOnRelease && isOpen(pPager->sjfd) ){ + /* Only truncate if it is an in-memory sub-journal. */ + if( sqlite3JournalIsInMemory(pPager->sjfd) ){ + i64 sz = (pPager->pageSize+4)*(i64)pRel->iSubRec; + rc = sqlite3OsTruncate(pPager->sjfd, sz); + assert( rc==SQLITE_OK ); + } + pPager->nSubRec = pRel->iSubRec; + } + } + /* Else this is a rollback operation, playback the specified savepoint. + ** If this is a temp-file, it is possible that the journal file has + ** not yet been opened. In this case there have been no changes to + ** the database file, so the playback operation can be skipped. + */ + else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){ + PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; + rc = pagerPlaybackSavepoint(pPager, pSavepoint); + assert(rc!=SQLITE_DONE); + } + +#ifdef SQLITE_ENABLE_ZIPVFS + /* If the cache has been modified but the savepoint cannot be rolled + ** back journal_mode=off, put the pager in the error state. This way, + ** if the VFS used by this pager includes ZipVFS, the entire transaction + ** can be rolled back at the ZipVFS level. */ + else if( + pPager->journalMode==PAGER_JOURNALMODE_OFF + && pPager->eState>=PAGER_WRITER_CACHEMOD + ){ + pPager->errCode = SQLITE_ABORT; + pPager->eState = PAGER_ERROR; + setGetterMethod(pPager); + } +#endif + } + + return rc; +} + +/* +** Return the full pathname of the database file. +** +** Except, if the pager is in-memory only, then return an empty string if +** nullIfMemDb is true. This routine is called with nullIfMemDb==1 when +** used to report the filename to the user, for compatibility with legacy +** behavior. But when the Btree needs to know the filename for matching to +** shared cache, it uses nullIfMemDb==0 so that in-memory databases can +** participate in shared-cache. +** +** The return value to this routine is always safe to use with +** sqlite3_uri_parameter() and sqlite3_filename_database() and friends. +*/ +SQLITE_PRIVATE const char *sqlite3PagerFilename(const Pager *pPager, int nullIfMemDb){ + static const char zFake[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + return (nullIfMemDb && pPager->memDb) ? &zFake[4] : pPager->zFilename; +} + +/* +** Return the VFS structure for the pager. +*/ +SQLITE_PRIVATE sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ + return pPager->pVfs; +} + +/* +** Return the file handle for the database file associated +** with the pager. This might return NULL if the file has +** not yet been opened. +*/ +SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){ + return pPager->fd; +} + +/* +** Return the file handle for the journal file (if it exists). +** This will be either the rollback journal or the WAL file. +*/ +SQLITE_PRIVATE sqlite3_file *sqlite3PagerJrnlFile(Pager *pPager){ +#if SQLITE_OMIT_WAL + return pPager->jfd; +#else + return pPager->pWal ? sqlite3WalFile(pPager->pWal) : pPager->jfd; +#endif +} + +/* +** Return the full pathname of the journal file. +*/ +SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager *pPager){ + return pPager->zJournal; +} + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** Move the page pPg to location pgno in the file. +** +** There must be no references to the page previously located at +** pgno (which we call pPgOld) though that page is allowed to be +** in cache. If the page previously located at pgno is not already +** in the rollback journal, it is not put there by by this routine. +** +** References to the page pPg remain valid. Updating any +** meta-data associated with pPg (i.e. data stored in the nExtra bytes +** allocated along with the page) is the responsibility of the caller. +** +** A transaction must be active when this routine is called. It used to be +** required that a statement transaction was not active, but this restriction +** has been removed (CREATE INDEX needs to move a page when a statement +** transaction is active). +** +** If the fourth argument, isCommit, is non-zero, then this page is being +** moved as part of a database reorganization just before the transaction +** is being committed. In this case, it is guaranteed that the database page +** pPg refers to will not be written to again within this transaction. +** +** This function may return SQLITE_NOMEM or an IO error code if an error +** occurs. Otherwise, it returns SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ + PgHdr *pPgOld; /* The page being overwritten. */ + Pgno needSyncPgno = 0; /* Old value of pPg->pgno, if sync is required */ + int rc; /* Return code */ + Pgno origPgno; /* The original page number */ + + assert( pPg->nRef>0 ); + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + + /* In order to be able to rollback, an in-memory database must journal + ** the page we are moving from. + */ + assert( pPager->tempFile || !MEMDB ); + if( pPager->tempFile ){ + rc = sqlite3PagerWrite(pPg); + if( rc ) return rc; + } + + /* If the page being moved is dirty and has not been saved by the latest + ** savepoint, then save the current contents of the page into the + ** sub-journal now. This is required to handle the following scenario: + ** + ** BEGIN; + ** + ** SAVEPOINT one; + ** + ** ROLLBACK TO one; + ** + ** If page X were not written to the sub-journal here, it would not + ** be possible to restore its contents when the "ROLLBACK TO one" + ** statement were is processed. + ** + ** subjournalPage() may need to allocate space to store pPg->pgno into + ** one or more savepoint bitvecs. This is the reason this function + ** may return SQLITE_NOMEM. + */ + if( (pPg->flags & PGHDR_DIRTY)!=0 + && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) + ){ + return rc; + } + + PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", + PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno)); + IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) + + /* If the journal needs to be sync()ed before page pPg->pgno can + ** be written to, store pPg->pgno in local variable needSyncPgno. + ** + ** If the isCommit flag is set, there is no need to remember that + ** the journal needs to be sync()ed before database page pPg->pgno + ** can be written to. The caller has already promised not to write to it. + */ + if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){ + needSyncPgno = pPg->pgno; + assert( pPager->journalMode==PAGER_JOURNALMODE_OFF || + pageInJournal(pPager, pPg) || pPg->pgno>pPager->dbOrigSize ); + assert( pPg->flags&PGHDR_DIRTY ); + } + + /* If the cache contains a page with page-number pgno, remove it + ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for + ** page pgno before the 'move' operation, it needs to be retained + ** for the page moved there. + */ + pPg->flags &= ~PGHDR_NEED_SYNC; + pPgOld = sqlite3PagerLookup(pPager, pgno); + assert( !pPgOld || pPgOld->nRef==1 || CORRUPT_DB ); + if( pPgOld ){ + if( NEVER(pPgOld->nRef>1) ){ + sqlite3PagerUnrefNotNull(pPgOld); + return SQLITE_CORRUPT_BKPT; + } + pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC); + if( pPager->tempFile ){ + /* Do not discard pages from an in-memory database since we might + ** need to rollback later. Just move the page out of the way. */ + sqlite3PcacheMove(pPgOld, pPager->dbSize+1); + }else{ + sqlite3PcacheDrop(pPgOld); + } + } + + origPgno = pPg->pgno; + sqlite3PcacheMove(pPg, pgno); + sqlite3PcacheMakeDirty(pPg); + + /* For an in-memory database, make sure the original page continues + ** to exist, in case the transaction needs to roll back. Use pPgOld + ** as the original page since it has already been allocated. + */ + if( pPager->tempFile && pPgOld ){ + sqlite3PcacheMove(pPgOld, origPgno); + sqlite3PagerUnrefNotNull(pPgOld); + } + + if( needSyncPgno ){ + /* If needSyncPgno is non-zero, then the journal file needs to be + ** sync()ed before any data is written to database file page needSyncPgno. + ** Currently, no such page exists in the page-cache and the + ** "is journaled" bitvec flag has been set. This needs to be remedied by + ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC + ** flag. + ** + ** If the attempt to load the page into the page-cache fails, (due + ** to a malloc() or IO failure), clear the bit in the pInJournal[] + ** array. Otherwise, if the page is loaded and written again in + ** this transaction, it may be written to the database file before + ** it is synced into the journal file. This way, it may end up in + ** the journal file twice, but that is not a problem. + */ + PgHdr *pPgHdr; + rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr, 0); + if( rc!=SQLITE_OK ){ + if( needSyncPgno<=pPager->dbOrigSize ){ + assert( pPager->pTmpSpace!=0 ); + sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace); + } + return rc; + } + pPgHdr->flags |= PGHDR_NEED_SYNC; + sqlite3PcacheMakeDirty(pPgHdr); + sqlite3PagerUnrefNotNull(pPgHdr); + } + + return SQLITE_OK; +} +#endif + +/* +** The page handle passed as the first argument refers to a dirty page +** with a page number other than iNew. This function changes the page's +** page number to iNew and sets the value of the PgHdr.flags field to +** the value passed as the third parameter. +*/ +SQLITE_PRIVATE void sqlite3PagerRekey(DbPage *pPg, Pgno iNew, u16 flags){ + assert( pPg->pgno!=iNew ); + pPg->flags = flags; + sqlite3PcacheMove(pPg, iNew); +} + +/* +** Return a pointer to the data for the specified page. +*/ +SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *pPg){ + assert( pPg->nRef>0 || pPg->pPager->memDb ); + return pPg->pData; +} + +/* +** Return a pointer to the Pager.nExtra bytes of "extra" space +** allocated along with the specified page. +*/ +SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *pPg){ + return pPg->pExtra; +} + +/* +** Get/set the locking-mode for this pager. Parameter eMode must be one +** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or +** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then +** the locking-mode is set to the value specified. +** +** The returned value is either PAGER_LOCKINGMODE_NORMAL or +** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) +** locking-mode. +*/ +SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *pPager, int eMode){ + assert( eMode==PAGER_LOCKINGMODE_QUERY + || eMode==PAGER_LOCKINGMODE_NORMAL + || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); + assert( PAGER_LOCKINGMODE_QUERY<0 ); + assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); + assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) ); + if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){ + pPager->exclusiveMode = (u8)eMode; + } + return (int)pPager->exclusiveMode; +} + +/* +** Set the journal-mode for this pager. Parameter eMode must be one of: +** +** PAGER_JOURNALMODE_DELETE +** PAGER_JOURNALMODE_TRUNCATE +** PAGER_JOURNALMODE_PERSIST +** PAGER_JOURNALMODE_OFF +** PAGER_JOURNALMODE_MEMORY +** PAGER_JOURNALMODE_WAL +** +** The journalmode is set to the value specified if the change is allowed. +** The change may be disallowed for the following reasons: +** +** * An in-memory database can only have its journal_mode set to _OFF +** or _MEMORY. +** +** * Temporary databases cannot have _WAL journalmode. +** +** The returned indicate the current (possibly updated) journal-mode. +*/ +SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ + u8 eOld = pPager->journalMode; /* Prior journalmode */ + + /* The eMode parameter is always valid */ + assert( eMode==PAGER_JOURNALMODE_DELETE /* 0 */ + || eMode==PAGER_JOURNALMODE_PERSIST /* 1 */ + || eMode==PAGER_JOURNALMODE_OFF /* 2 */ + || eMode==PAGER_JOURNALMODE_TRUNCATE /* 3 */ + || eMode==PAGER_JOURNALMODE_MEMORY /* 4 */ + || eMode==PAGER_JOURNALMODE_WAL /* 5 */ ); + + /* This routine is only called from the OP_JournalMode opcode, and + ** the logic there will never allow a temporary file to be changed + ** to WAL mode. + */ + assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL ); + + /* Do allow the journalmode of an in-memory database to be set to + ** anything other than MEMORY or OFF + */ + if( MEMDB ){ + assert( eOld==PAGER_JOURNALMODE_MEMORY || eOld==PAGER_JOURNALMODE_OFF ); + if( eMode!=PAGER_JOURNALMODE_MEMORY && eMode!=PAGER_JOURNALMODE_OFF ){ + eMode = eOld; + } + } + + if( eMode!=eOld ){ + + /* Change the journal mode. */ + assert( pPager->eState!=PAGER_ERROR ); + pPager->journalMode = (u8)eMode; + + /* When transistioning from TRUNCATE or PERSIST to any other journal + ** mode except WAL, unless the pager is in locking_mode=exclusive mode, + ** delete the journal file. + */ + assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); + assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); + assert( (PAGER_JOURNALMODE_DELETE & 5)==0 ); + assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 ); + assert( (PAGER_JOURNALMODE_OFF & 5)==0 ); + assert( (PAGER_JOURNALMODE_WAL & 5)==5 ); + + assert( isOpen(pPager->fd) || pPager->exclusiveMode ); + if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){ + /* In this case we would like to delete the journal file. If it is + ** not possible, then that is not a problem. Deleting the journal file + ** here is an optimization only. + ** + ** Before deleting the journal file, obtain a RESERVED lock on the + ** database file. This ensures that the journal file is not deleted + ** while it is in use by some other client. + */ + sqlite3OsClose(pPager->jfd); + if( pPager->eLock>=RESERVED_LOCK ){ + sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); + }else{ + int rc = SQLITE_OK; + int state = pPager->eState; + assert( state==PAGER_OPEN || state==PAGER_READER ); + if( state==PAGER_OPEN ){ + rc = sqlite3PagerSharedLock(pPager); + } + if( pPager->eState==PAGER_READER ){ + assert( rc==SQLITE_OK ); + rc = pagerLockDb(pPager, RESERVED_LOCK); + } + if( rc==SQLITE_OK ){ + sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); + } + if( rc==SQLITE_OK && state==PAGER_READER ){ + pagerUnlockDb(pPager, SHARED_LOCK); + }else if( state==PAGER_OPEN ){ + pager_unlock(pPager); + } + assert( state==pPager->eState ); + } + }else if( eMode==PAGER_JOURNALMODE_OFF ){ + sqlite3OsClose(pPager->jfd); + } + } + + /* Return the new journal mode */ + return (int)pPager->journalMode; +} + +/* +** Return the current journal mode. +*/ +SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){ + return (int)pPager->journalMode; +} + +/* +** Return TRUE if the pager is in a state where it is OK to change the +** journalmode. Journalmode changes can only happen when the database +** is unmodified. +*/ +SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){ + assert( assert_pager_state(pPager) ); + if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0; + if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0; + return 1; +} + +/* +** Get/set the size-limit used for persistent journal files. +** +** Setting the size limit to -1 means no limit is enforced. +** An attempt to set a limit smaller than -1 is a no-op. +*/ +SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){ + if( iLimit>=-1 ){ + pPager->journalSizeLimit = iLimit; + sqlite3WalLimit(pPager->pWal, iLimit); + } + return pPager->journalSizeLimit; +} + +/* +** Return a pointer to the pPager->pBackup variable. The backup module +** in backup.c maintains the content of this variable. This module +** uses it opaquely as an argument to sqlite3BackupRestart() and +** sqlite3BackupUpdate() only. +*/ +SQLITE_PRIVATE sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ + return &pPager->pBackup; +} + +#ifndef SQLITE_OMIT_VACUUM +/* +** Unless this is an in-memory or temporary database, clear the pager cache. +*/ +SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *pPager){ + assert( MEMDB==0 || pPager->tempFile ); + if( pPager->tempFile==0 ) pager_reset(pPager); +} +#endif + + +#ifndef SQLITE_OMIT_WAL +/* +** This function is called when the user invokes "PRAGMA wal_checkpoint", +** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint() +** or wal_blocking_checkpoint() API functions. +** +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. +*/ +SQLITE_PRIVATE int sqlite3PagerCheckpoint( + Pager *pPager, /* Checkpoint on this pager */ + sqlite3 *db, /* Db handle used to check for interrupts */ + int eMode, /* Type of checkpoint */ + int *pnLog, /* OUT: Final number of frames in log */ + int *pnCkpt /* OUT: Final number of checkpointed frames */ +){ + int rc = SQLITE_OK; + if( pPager->pWal==0 && pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + /* This only happens when a database file is zero bytes in size opened and + ** then "PRAGMA journal_mode=WAL" is run and then sqlite3_wal_checkpoint() + ** is invoked without any intervening transactions. We need to start + ** a transaction to initialize pWal. The PRAGMA table_list statement is + ** used for this since it starts transactions on every database file, + ** including all ATTACHed databases. This seems expensive for a single + ** sqlite3_wal_checkpoint() call, but it happens very rarely. + ** https://sqlite.org/forum/forumpost/fd0f19d229156939 + */ + sqlite3_exec(db, "PRAGMA table_list",0,0,0); + } + if( pPager->pWal ){ + rc = sqlite3WalCheckpoint(pPager->pWal, db, eMode, + (eMode==SQLITE_CHECKPOINT_PASSIVE ? 0 : pPager->xBusyHandler), + pPager->pBusyHandlerArg, + pPager->walSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace, + pnLog, pnCkpt + ); + } + return rc; +} + +SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager){ + return sqlite3WalCallback(pPager->pWal); +} + +/* +** Return true if the underlying VFS for the given pager supports the +** primitives necessary for write-ahead logging. +*/ +SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){ + const sqlite3_io_methods *pMethods = pPager->fd->pMethods; + if( pPager->noLock ) return 0; + return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap); +} + +/* +** Attempt to take an exclusive lock on the database file. If a PENDING lock +** is obtained instead, immediately release it. +*/ +static int pagerExclusiveLock(Pager *pPager){ + int rc; /* Return code */ + + assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); + rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); + if( rc!=SQLITE_OK ){ + /* If the attempt to grab the exclusive lock failed, release the + ** pending lock that may have been obtained instead. */ + pagerUnlockDb(pPager, SHARED_LOCK); + } + + return rc; +} + +/* +** Call sqlite3WalOpen() to open the WAL handle. If the pager is in +** exclusive-locking mode when this function is called, take an EXCLUSIVE +** lock on the database file and use heap-memory to store the wal-index +** in. Otherwise, use the normal shared-memory. +*/ +static int pagerOpenWal(Pager *pPager){ + int rc = SQLITE_OK; + + assert( pPager->pWal==0 && pPager->tempFile==0 ); + assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); + + /* If the pager is already in exclusive-mode, the WAL module will use + ** heap-memory for the wal-index instead of the VFS shared-memory + ** implementation. Take the exclusive lock now, before opening the WAL + ** file, to make sure this is safe. + */ + if( pPager->exclusiveMode ){ + rc = pagerExclusiveLock(pPager); + } + + /* Open the connection to the log file. If this operation fails, + ** (e.g. due to malloc() failure), return an error code. + */ + if( rc==SQLITE_OK ){ + rc = sqlite3WalOpen(pPager->pVfs, + pPager->fd, pPager->zWal, pPager->exclusiveMode, + pPager->journalSizeLimit, &pPager->pWal + ); + } + pagerFixMaplimit(pPager); + + return rc; +} + + +/* +** The caller must be holding a SHARED lock on the database file to call +** this function. +** +** If the pager passed as the first argument is open on a real database +** file (not a temp file or an in-memory database), and the WAL file +** is not already open, make an attempt to open it now. If successful, +** return SQLITE_OK. If an error occurs or the VFS used by the pager does +** not support the xShmXXX() methods, return an error code. *pbOpen is +** not modified in either case. +** +** If the pager is open on a temp-file (or in-memory database), or if +** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK +** without doing anything. +*/ +SQLITE_PRIVATE int sqlite3PagerOpenWal( + Pager *pPager, /* Pager object */ + int *pbOpen /* OUT: Set to true if call is a no-op */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( assert_pager_state(pPager) ); + assert( pPager->eState==PAGER_OPEN || pbOpen ); + assert( pPager->eState==PAGER_READER || !pbOpen ); + assert( pbOpen==0 || *pbOpen==0 ); + assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) ); + + if( !pPager->tempFile && !pPager->pWal ){ + if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN; + + /* Close any rollback journal previously open */ + sqlite3OsClose(pPager->jfd); + + rc = pagerOpenWal(pPager); + if( rc==SQLITE_OK ){ + pPager->journalMode = PAGER_JOURNALMODE_WAL; + pPager->eState = PAGER_OPEN; + } + }else{ + *pbOpen = 1; + } + + return rc; +} + +/* +** This function is called to close the connection to the log file prior +** to switching from WAL to rollback mode. +** +** Before closing the log file, this function attempts to take an +** EXCLUSIVE lock on the database file. If this cannot be obtained, an +** error (SQLITE_BUSY) is returned and the log connection is not closed. +** If successful, the EXCLUSIVE lock is not released before returning. +*/ +SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager, sqlite3 *db){ + int rc = SQLITE_OK; + + assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); + + /* If the log file is not already open, but does exist in the file-system, + ** it may need to be checkpointed before the connection can switch to + ** rollback mode. Open it now so this can happen. + */ + if( !pPager->pWal ){ + int logexists = 0; + rc = pagerLockDb(pPager, SHARED_LOCK); + if( rc==SQLITE_OK ){ + rc = sqlite3OsAccess( + pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists + ); + } + if( rc==SQLITE_OK && logexists ){ + rc = pagerOpenWal(pPager); + } + } + + /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on + ** the database file, the log and log-summary files will be deleted. + */ + if( rc==SQLITE_OK && pPager->pWal ){ + rc = pagerExclusiveLock(pPager); + if( rc==SQLITE_OK ){ + rc = sqlite3WalClose(pPager->pWal, db, pPager->walSyncFlags, + pPager->pageSize, (u8*)pPager->pTmpSpace); + pPager->pWal = 0; + pagerFixMaplimit(pPager); + if( rc && !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK); + } + } + return rc; +} + +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT +/* +** If pager pPager is a wal-mode database not in exclusive locking mode, +** invoke the sqlite3WalWriteLock() function on the associated Wal object +** with the same db and bLock parameters as were passed to this function. +** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. +*/ +SQLITE_PRIVATE int sqlite3PagerWalWriteLock(Pager *pPager, int bLock){ + int rc = SQLITE_OK; + if( pagerUseWal(pPager) && pPager->exclusiveMode==0 ){ + rc = sqlite3WalWriteLock(pPager->pWal, bLock); + } + return rc; +} + +/* +** Set the database handle used by the wal layer to determine if +** blocking locks are required. +*/ +SQLITE_PRIVATE void sqlite3PagerWalDb(Pager *pPager, sqlite3 *db){ + if( pagerUseWal(pPager) ){ + sqlite3WalDb(pPager->pWal, db); + } +} +#endif + +#ifdef SQLITE_ENABLE_SNAPSHOT +/* +** If this is a WAL database, obtain a snapshot handle for the snapshot +** currently open. Otherwise, return an error. +*/ +SQLITE_PRIVATE int sqlite3PagerSnapshotGet(Pager *pPager, sqlite3_snapshot **ppSnapshot){ + int rc = SQLITE_ERROR; + if( pPager->pWal ){ + rc = sqlite3WalSnapshotGet(pPager->pWal, ppSnapshot); + } + return rc; +} + +/* +** If this is a WAL database, store a pointer to pSnapshot. Next time a +** read transaction is opened, attempt to read from the snapshot it +** identifies. If this is not a WAL database, return an error. +*/ +SQLITE_PRIVATE int sqlite3PagerSnapshotOpen( + Pager *pPager, + sqlite3_snapshot *pSnapshot +){ + int rc = SQLITE_OK; + if( pPager->pWal ){ + sqlite3WalSnapshotOpen(pPager->pWal, pSnapshot); + }else{ + rc = SQLITE_ERROR; + } + return rc; +} + +/* +** If this is a WAL database, call sqlite3WalSnapshotRecover(). If this +** is not a WAL database, return an error. +*/ +SQLITE_PRIVATE int sqlite3PagerSnapshotRecover(Pager *pPager){ + int rc; + if( pPager->pWal ){ + rc = sqlite3WalSnapshotRecover(pPager->pWal); + }else{ + rc = SQLITE_ERROR; + } + return rc; +} + +/* +** The caller currently has a read transaction open on the database. +** If this is not a WAL database, SQLITE_ERROR is returned. Otherwise, +** this function takes a SHARED lock on the CHECKPOINTER slot and then +** checks if the snapshot passed as the second argument is still +** available. If so, SQLITE_OK is returned. +** +** If the snapshot is not available, SQLITE_ERROR is returned. Or, if +** the CHECKPOINTER lock cannot be obtained, SQLITE_BUSY. If any error +** occurs (any value other than SQLITE_OK is returned), the CHECKPOINTER +** lock is released before returning. +*/ +SQLITE_PRIVATE int sqlite3PagerSnapshotCheck(Pager *pPager, sqlite3_snapshot *pSnapshot){ + int rc; + if( pPager->pWal ){ + rc = sqlite3WalSnapshotCheck(pPager->pWal, pSnapshot); + }else{ + rc = SQLITE_ERROR; + } + return rc; +} + +/* +** Release a lock obtained by an earlier successful call to +** sqlite3PagerSnapshotCheck(). +*/ +SQLITE_PRIVATE void sqlite3PagerSnapshotUnlock(Pager *pPager){ + assert( pPager->pWal ); + sqlite3WalSnapshotUnlock(pPager->pWal); +} + +#endif /* SQLITE_ENABLE_SNAPSHOT */ +#endif /* !SQLITE_OMIT_WAL */ + +#ifdef SQLITE_ENABLE_ZIPVFS +/* +** A read-lock must be held on the pager when this function is called. If +** the pager is in WAL mode and the WAL file currently contains one or more +** frames, return the size in bytes of the page images stored within the +** WAL frames. Otherwise, if this is not a WAL database or the WAL file +** is empty, return 0. +*/ +SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){ + assert( pPager->eState>=PAGER_READER ); + return sqlite3WalFramesize(pPager->pWal); +} +#endif + +#endif /* SQLITE_OMIT_DISKIO */ + +/************** End of pager.c ***********************************************/ +/************** Begin file wal.c *********************************************/ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the implementation of a write-ahead log (WAL) used in +** "journal_mode=WAL" mode. +** +** WRITE-AHEAD LOG (WAL) FILE FORMAT +** +** A WAL file consists of a header followed by zero or more "frames". +** Each frame records the revised content of a single page from the +** database file. All changes to the database are recorded by writing +** frames into the WAL. Transactions commit when a frame is written that +** contains a commit marker. A single WAL can and usually does record +** multiple transactions. Periodically, the content of the WAL is +** transferred back into the database file in an operation called a +** "checkpoint". +** +** A single WAL file can be used multiple times. In other words, the +** WAL can fill up with frames and then be checkpointed and then new +** frames can overwrite the old ones. A WAL always grows from beginning +** toward the end. Checksums and counters attached to each frame are +** used to determine which frames within the WAL are valid and which +** are leftovers from prior checkpoints. +** +** The WAL header is 32 bytes in size and consists of the following eight +** big-endian 32-bit unsigned integer values: +** +** 0: Magic number. 0x377f0682 or 0x377f0683 +** 4: File format version. Currently 3007000 +** 8: Database page size. Example: 1024 +** 12: Checkpoint sequence number +** 16: Salt-1, random integer incremented with each checkpoint +** 20: Salt-2, a different random integer changing with each ckpt +** 24: Checksum-1 (first part of checksum for first 24 bytes of header). +** 28: Checksum-2 (second part of checksum for first 24 bytes of header). +** +** Immediately following the wal-header are zero or more frames. Each +** frame consists of a 24-byte frame-header followed by a bytes +** of page data. The frame-header is six big-endian 32-bit unsigned +** integer values, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Salt-1 (copied from the header) +** 12: Salt-2 (copied from the header) +** 16: Checksum-1. +** 20: Checksum-2. +** +** A frame is considered valid if and only if the following conditions are +** true: +** +** (1) The salt-1 and salt-2 values in the frame-header match +** salt values in the wal-header +** +** (2) The checksum values in the final 8 bytes of the frame-header +** exactly match the checksum computed consecutively on the +** WAL header and the first 8 bytes and the content of all frames +** up to and including the current frame. +** +** The checksum is computed using 32-bit big-endian integers if the +** magic number in the first 4 bytes of the WAL is 0x377f0683 and it +** is computed using little-endian if the magic number is 0x377f0682. +** The checksum values are always stored in the frame header in a +** big-endian format regardless of which byte order is used to compute +** the checksum. The checksum is computed by interpreting the input as +** an even number of unsigned 32-bit integers: x[0] through x[N]. The +** algorithm used for the checksum is as follows: +** +** for i from 0 to n-1 step 2: +** s0 += x[i] + s1; +** s1 += x[i+1] + s0; +** endfor +** +** Note that s0 and s1 are both weighted checksums using fibonacci weights +** in reverse order (the largest fibonacci weight occurs on the first element +** of the sequence being summed.) The s1 value spans all 32-bit +** terms of the sequence whereas s0 omits the final term. +** +** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the +** WAL is transferred into the database, then the database is VFS.xSync-ed. +** The VFS.xSync operations serve as write barriers - all writes launched +** before the xSync must complete before any write that launches after the +** xSync begins. +** +** After each checkpoint, the salt-1 value is incremented and the salt-2 +** value is randomized. This prevents old and new frames in the WAL from +** being considered valid at the same time and being checkpointing together +** following a crash. +** +** READER ALGORITHM +** +** To read a page from the database (call it page number P), a reader +** first checks the WAL to see if it contains page P. If so, then the +** last valid instance of page P that is a followed by a commit frame +** or is a commit frame itself becomes the value read. If the WAL +** contains no copies of page P that are valid and which are a commit +** frame or are followed by a commit frame, then page P is read from +** the database file. +** +** To start a read transaction, the reader records the index of the last +** valid frame in the WAL. The reader uses this recorded "mxFrame" value +** for all subsequent read operations. New transactions can be appended +** to the WAL, but as long as the reader uses its original mxFrame value +** and ignores the newly appended content, it will see a consistent snapshot +** of the database from a single point in time. This technique allows +** multiple concurrent readers to view different versions of the database +** content simultaneously. +** +** The reader algorithm in the previous paragraphs works correctly, but +** because frames for page P can appear anywhere within the WAL, the +** reader has to scan the entire WAL looking for page P frames. If the +** WAL is large (multiple megabytes is typical) that scan can be slow, +** and read performance suffers. To overcome this problem, a separate +** data structure called the wal-index is maintained to expedite the +** search for frames of a particular page. +** +** WAL-INDEX FORMAT +** +** Conceptually, the wal-index is shared memory, though VFS implementations +** might choose to implement the wal-index using a mmapped file. Because +** the wal-index is shared memory, SQLite does not support journal_mode=WAL +** on a network filesystem. All users of the database must be able to +** share memory. +** +** In the default unix and windows implementation, the wal-index is a mmapped +** file whose name is the database name with a "-shm" suffix added. For that +** reason, the wal-index is sometimes called the "shm" file. +** +** The wal-index is transient. After a crash, the wal-index can (and should +** be) reconstructed from the original WAL file. In fact, the VFS is required +** to either truncate or zero the header of the wal-index when the last +** connection to it closes. Because the wal-index is transient, it can +** use an architecture-specific format; it does not have to be cross-platform. +** Hence, unlike the database and WAL file formats which store all values +** as big endian, the wal-index can store multi-byte values in the native +** byte order of the host computer. +** +** The purpose of the wal-index is to answer this question quickly: Given +** a page number P and a maximum frame index M, return the index of the +** last frame in the wal before frame M for page P in the WAL, or return +** NULL if there are no frames for page P in the WAL prior to M. +** +** The wal-index consists of a header region, followed by an one or +** more index blocks. +** +** The wal-index header contains the total number of frames within the WAL +** in the mxFrame field. +** +** Each index block except for the first contains information on +** HASHTABLE_NPAGE frames. The first index block contains information on +** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and +** HASHTABLE_NPAGE are selected so that together the wal-index header and +** first index block are the same size as all other index blocks in the +** wal-index. The values are: +** +** HASHTABLE_NPAGE 4096 +** HASHTABLE_NPAGE_ONE 4062 +** +** Each index block contains two sections, a page-mapping that contains the +** database page number associated with each wal frame, and a hash-table +** that allows readers to query an index block for a specific page number. +** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE +** for the first index block) 32-bit page numbers. The first entry in the +** first index-block contains the database page number corresponding to the +** first frame in the WAL file. The first entry in the second index block +** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in +** the log, and so on. +** +** The last index block in a wal-index usually contains less than the full +** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers, +** depending on the contents of the WAL file. This does not change the +** allocated size of the page-mapping array - the page-mapping array merely +** contains unused entries. +** +** Even without using the hash table, the last frame for page P +** can be found by scanning the page-mapping sections of each index block +** starting with the last index block and moving toward the first, and +** within each index block, starting at the end and moving toward the +** beginning. The first entry that equals P corresponds to the frame +** holding the content for that page. +** +** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers. +** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the +** hash table for each page number in the mapping section, so the hash +** table is never more than half full. The expected number of collisions +** prior to finding a match is 1. Each entry of the hash table is an +** 1-based index of an entry in the mapping section of the same +** index block. Let K be the 1-based index of the largest entry in +** the mapping section. (For index blocks other than the last, K will +** always be exactly HASHTABLE_NPAGE (4096) and for the last index block +** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table +** contain a value of 0. +** +** To look for page P in the hash table, first compute a hash iKey on +** P as follows: +** +** iKey = (P * 383) % HASHTABLE_NSLOT +** +** Then start scanning entries of the hash table, starting with iKey +** (wrapping around to the beginning when the end of the hash table is +** reached) until an unused hash slot is found. Let the first unused slot +** be at index iUnused. (iUnused might be less than iKey if there was +** wrap-around.) Because the hash table is never more than half full, +** the search is guaranteed to eventually hit an unused entry. Let +** iMax be the value between iKey and iUnused, closest to iUnused, +** where aHash[iMax]==P. If there is no iMax entry (if there exists +** no hash slot such that aHash[i]==p) then page P is not in the +** current index block. Otherwise the iMax-th mapping entry of the +** current index block corresponds to the last entry that references +** page P. +** +** A hash search begins with the last index block and moves toward the +** first index block, looking for entries corresponding to page P. On +** average, only two or three slots in each index block need to be +** examined in order to either find the last entry for page P, or to +** establish that no such entry exists in the block. Each index block +** holds over 4000 entries. So two or three index blocks are sufficient +** to cover a typical 10 megabyte WAL file, assuming 1K pages. 8 or 10 +** comparisons (on average) suffice to either locate a frame in the +** WAL or to establish that the frame does not exist in the WAL. This +** is much faster than scanning the entire 10MB WAL. +** +** Note that entries are added in order of increasing K. Hence, one +** reader might be using some value K0 and a second reader that started +** at a later time (after additional transactions were added to the WAL +** and to the wal-index) might be using a different value K1, where K1>K0. +** Both readers can use the same hash table and mapping section to get +** the correct result. There may be entries in the hash table with +** K>K0 but to the first reader, those entries will appear to be unused +** slots in the hash table and so the first reader will get an answer as +** if no values greater than K0 had ever been inserted into the hash table +** in the first place - which is what reader one wants. Meanwhile, the +** second reader using K1 will see additional values that were inserted +** later, which is exactly what reader two wants. +** +** When a rollback occurs, the value of K is decreased. Hash table entries +** that correspond to frames greater than the new K value are removed +** from the hash table at this point. +*/ +#ifndef SQLITE_OMIT_WAL + +/* #include "wal.h" */ + +/* +** Trace output macros +*/ +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +SQLITE_PRIVATE int sqlite3WalTrace = 0; +# define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X +#else +# define WALTRACE(X) +#endif + +/* +** The maximum (and only) versions of the wal and wal-index formats +** that may be interpreted by this version of SQLite. +** +** If a client begins recovering a WAL file and finds that (a) the checksum +** values in the wal-header are correct and (b) the version field is not +** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN. +** +** Similarly, if a client successfully reads a wal-index header (i.e. the +** checksum test is successful) and finds that the version field is not +** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite +** returns SQLITE_CANTOPEN. +*/ +#define WAL_MAX_VERSION 3007000 +#define WALINDEX_MAX_VERSION 3007000 + +/* +** Index numbers for various locking bytes. WAL_NREADER is the number +** of available reader locks and should be at least 3. The default +** is SQLITE_SHM_NLOCK==8 and WAL_NREADER==5. +** +** Technically, the various VFSes are free to implement these locks however +** they see fit. However, compatibility is encouraged so that VFSes can +** interoperate. The standard implemention used on both unix and windows +** is for the index number to indicate a byte offset into the +** WalCkptInfo.aLock[] array in the wal-index header. In other words, all +** locks are on the shm file. The WALINDEX_LOCK_OFFSET constant (which +** should be 120) is the location in the shm file for the first locking +** byte. +*/ +#define WAL_WRITE_LOCK 0 +#define WAL_ALL_BUT_WRITE 1 +#define WAL_CKPT_LOCK 1 +#define WAL_RECOVER_LOCK 2 +#define WAL_READ_LOCK(I) (3+(I)) +#define WAL_NREADER (SQLITE_SHM_NLOCK-3) + + +/* Object declarations */ +typedef struct WalIndexHdr WalIndexHdr; +typedef struct WalIterator WalIterator; +typedef struct WalCkptInfo WalCkptInfo; + + +/* +** The following object holds a copy of the wal-index header content. +** +** The actual header in the wal-index consists of two copies of this +** object followed by one instance of the WalCkptInfo object. +** For all versions of SQLite through 3.10.0 and probably beyond, +** the locking bytes (WalCkptInfo.aLock) start at offset 120 and +** the total header size is 136 bytes. +** +** The szPage value can be any power of 2 between 512 and 32768, inclusive. +** Or it can be 1 to represent a 65536-byte page. The latter case was +** added in 3.7.1 when support for 64K pages was added. +*/ +struct WalIndexHdr { + u32 iVersion; /* Wal-index version */ + u32 unused; /* Unused (padding) field */ + u32 iChange; /* Counter incremented each transaction */ + u8 isInit; /* 1 when initialized */ + u8 bigEndCksum; /* True if checksums in WAL are big-endian */ + u16 szPage; /* Database page size in bytes. 1==64K */ + u32 mxFrame; /* Index of last valid frame in the WAL */ + u32 nPage; /* Size of database in pages */ + u32 aFrameCksum[2]; /* Checksum of last frame in log */ + u32 aSalt[2]; /* Two salt values copied from WAL header */ + u32 aCksum[2]; /* Checksum over all prior fields */ +}; + +/* +** A copy of the following object occurs in the wal-index immediately +** following the second copy of the WalIndexHdr. This object stores +** information used by checkpoint. +** +** nBackfill is the number of frames in the WAL that have been written +** back into the database. (We call the act of moving content from WAL to +** database "backfilling".) The nBackfill number is never greater than +** WalIndexHdr.mxFrame. nBackfill can only be increased by threads +** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). +** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from +** mxFrame back to zero when the WAL is reset. +** +** nBackfillAttempted is the largest value of nBackfill that a checkpoint +** has attempted to achieve. Normally nBackfill==nBackfillAtempted, however +** the nBackfillAttempted is set before any backfilling is done and the +** nBackfill is only set after all backfilling completes. So if a checkpoint +** crashes, nBackfillAttempted might be larger than nBackfill. The +** WalIndexHdr.mxFrame must never be less than nBackfillAttempted. +** +** The aLock[] field is a set of bytes used for locking. These bytes should +** never be read or written. +** +** There is one entry in aReadMark[] for each reader lock. If a reader +** holds read-lock K, then the value in aReadMark[K] is no greater than +** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) +** for any aReadMark[] means that entry is unused. aReadMark[0] is +** a special case; its value is never used and it exists as a place-holder +** to avoid having to offset aReadMark[] indexs by one. Readers holding +** WAL_READ_LOCK(0) always ignore the entire WAL and read all content +** directly from the database. +** +** The value of aReadMark[K] may only be changed by a thread that +** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of +** aReadMark[K] cannot changed while there is a reader is using that mark +** since the reader will be holding a shared lock on WAL_READ_LOCK(K). +** +** The checkpointer may only transfer frames from WAL to database where +** the frame numbers are less than or equal to every aReadMark[] that is +** in use (that is, every aReadMark[j] for which there is a corresponding +** WAL_READ_LOCK(j)). New readers (usually) pick the aReadMark[] with the +** largest value and will increase an unused aReadMark[] to mxFrame if there +** is not already an aReadMark[] equal to mxFrame. The exception to the +** previous sentence is when nBackfill equals mxFrame (meaning that everything +** in the WAL has been backfilled into the database) then new readers +** will choose aReadMark[0] which has value 0 and hence such reader will +** get all their all content directly from the database file and ignore +** the WAL. +** +** Writers normally append new frames to the end of the WAL. However, +** if nBackfill equals mxFrame (meaning that all WAL content has been +** written back into the database) and if no readers are using the WAL +** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then +** the writer will first "reset" the WAL back to the beginning and start +** writing new content beginning at frame 1. +** +** We assume that 32-bit loads are atomic and so no locks are needed in +** order to read from any aReadMark[] entries. +*/ +struct WalCkptInfo { + u32 nBackfill; /* Number of WAL frames backfilled into DB */ + u32 aReadMark[WAL_NREADER]; /* Reader marks */ + u8 aLock[SQLITE_SHM_NLOCK]; /* Reserved space for locks */ + u32 nBackfillAttempted; /* WAL frames perhaps written, or maybe not */ + u32 notUsed0; /* Available for future enhancements */ +}; +#define READMARK_NOT_USED 0xffffffff + +/* +** This is a schematic view of the complete 136-byte header of the +** wal-index file (also known as the -shm file): +** +** +-----------------------------+ +** 0: | iVersion | \ +** +-----------------------------+ | +** 4: | (unused padding) | | +** +-----------------------------+ | +** 8: | iChange | | +** +-------+-------+-------------+ | +** 12: | bInit | bBig | szPage | | +** +-------+-------+-------------+ | +** 16: | mxFrame | | First copy of the +** +-----------------------------+ | WalIndexHdr object +** 20: | nPage | | +** +-----------------------------+ | +** 24: | aFrameCksum | | +** | | | +** +-----------------------------+ | +** 32: | aSalt | | +** | | | +** +-----------------------------+ | +** 40: | aCksum | | +** | | / +** +-----------------------------+ +** 48: | iVersion | \ +** +-----------------------------+ | +** 52: | (unused padding) | | +** +-----------------------------+ | +** 56: | iChange | | +** +-------+-------+-------------+ | +** 60: | bInit | bBig | szPage | | +** +-------+-------+-------------+ | Second copy of the +** 64: | mxFrame | | WalIndexHdr +** +-----------------------------+ | +** 68: | nPage | | +** +-----------------------------+ | +** 72: | aFrameCksum | | +** | | | +** +-----------------------------+ | +** 80: | aSalt | | +** | | | +** +-----------------------------+ | +** 88: | aCksum | | +** | | / +** +-----------------------------+ +** 96: | nBackfill | +** +-----------------------------+ +** 100: | 5 read marks | +** | | +** | | +** | | +** | | +** +-------+-------+------+------+ +** 120: | Write | Ckpt | Rcvr | Rd0 | \ +** +-------+-------+------+------+ ) 8 lock bytes +** | Read1 | Read2 | Rd3 | Rd4 | / +** +-------+-------+------+------+ +** 128: | nBackfillAttempted | +** +-----------------------------+ +** 132: | (unused padding) | +** +-----------------------------+ +*/ + +/* A block of WALINDEX_LOCK_RESERVED bytes beginning at +** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems +** only support mandatory file-locks, we do not read or write data +** from the region of the file on which locks are applied. +*/ +#define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2+offsetof(WalCkptInfo,aLock)) +#define WALINDEX_HDR_SIZE (sizeof(WalIndexHdr)*2+sizeof(WalCkptInfo)) + +/* Size of header before each frame in wal */ +#define WAL_FRAME_HDRSIZE 24 + +/* Size of write ahead log header, including checksum. */ +#define WAL_HDRSIZE 32 + +/* WAL magic value. Either this value, or the same value with the least +** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit +** big-endian format in the first 4 bytes of a WAL file. +** +** If the LSB is set, then the checksums for each frame within the WAL +** file are calculated by treating all data as an array of 32-bit +** big-endian words. Otherwise, they are calculated by interpreting +** all data as 32-bit little-endian words. +*/ +#define WAL_MAGIC 0x377f0682 + +/* +** Return the offset of frame iFrame in the write-ahead log file, +** assuming a database page size of szPage bytes. The offset returned +** is to the start of the write-ahead log frame-header. +*/ +#define walFrameOffset(iFrame, szPage) ( \ + WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE) \ +) + +/* +** An open write-ahead log file is represented by an instance of the +** following object. +*/ +struct Wal { + sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ + sqlite3_file *pDbFd; /* File handle for the database file */ + sqlite3_file *pWalFd; /* File handle for WAL file */ + u32 iCallback; /* Value to pass to log callback (or 0) */ + i64 mxWalSize; /* Truncate WAL to this size upon reset */ + int nWiData; /* Size of array apWiData */ + int szFirstBlock; /* Size of first block written to WAL file */ + volatile u32 **apWiData; /* Pointer to wal-index content in memory */ + u32 szPage; /* Database page size */ + i16 readLock; /* Which read lock is being held. -1 for none */ + u8 syncFlags; /* Flags to use to sync header writes */ + u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ + u8 writeLock; /* True if in a write transaction */ + u8 ckptLock; /* True if holding a checkpoint lock */ + u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ + u8 truncateOnCommit; /* True to truncate WAL file on commit */ + u8 syncHeader; /* Fsync the WAL header if true */ + u8 padToSectorBoundary; /* Pad transactions out to the next sector */ + u8 bShmUnreliable; /* SHM content is read-only and unreliable */ + WalIndexHdr hdr; /* Wal-index header for current transaction */ + u32 minFrame; /* Ignore wal frames before this one */ + u32 iReCksum; /* On commit, recalculate checksums from here */ + const char *zWalName; /* Name of WAL file */ + u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ +#ifdef SQLITE_DEBUG + u8 lockError; /* True if a locking error has occurred */ +#endif +#ifdef SQLITE_ENABLE_SNAPSHOT + WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ +#endif +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + sqlite3 *db; +#endif +}; + +/* +** Candidate values for Wal.exclusiveMode. +*/ +#define WAL_NORMAL_MODE 0 +#define WAL_EXCLUSIVE_MODE 1 +#define WAL_HEAPMEMORY_MODE 2 + +/* +** Possible values for WAL.readOnly +*/ +#define WAL_RDWR 0 /* Normal read/write connection */ +#define WAL_RDONLY 1 /* The WAL file is readonly */ +#define WAL_SHM_RDONLY 2 /* The SHM file is readonly */ + +/* +** Each page of the wal-index mapping contains a hash-table made up of +** an array of HASHTABLE_NSLOT elements of the following type. +*/ +typedef u16 ht_slot; + +/* +** This structure is used to implement an iterator that loops through +** all frames in the WAL in database page order. Where two or more frames +** correspond to the same database page, the iterator visits only the +** frame most recently written to the WAL (in other words, the frame with +** the largest index). +** +** The internals of this structure are only accessed by: +** +** walIteratorInit() - Create a new iterator, +** walIteratorNext() - Step an iterator, +** walIteratorFree() - Free an iterator. +** +** This functionality is used by the checkpoint code (see walCheckpoint()). +*/ +struct WalIterator { + u32 iPrior; /* Last result returned from the iterator */ + int nSegment; /* Number of entries in aSegment[] */ + struct WalSegment { + int iNext; /* Next slot in aIndex[] not yet returned */ + ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ + u32 *aPgno; /* Array of page numbers. */ + int nEntry; /* Nr. of entries in aPgno[] and aIndex[] */ + int iZero; /* Frame number associated with aPgno[0] */ + } aSegment[1]; /* One for every 32KB page in the wal-index */ +}; + +/* +** Define the parameters of the hash tables in the wal-index file. There +** is a hash-table following every HASHTABLE_NPAGE page numbers in the +** wal-index. +** +** Changing any of these constants will alter the wal-index format and +** create incompatibilities. +*/ +#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ +#define HASHTABLE_HASH_1 383 /* Should be prime */ +#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ + +/* +** The block of page numbers associated with the first hash-table in a +** wal-index is smaller than usual. This is so that there is a complete +** hash-table on each aligned 32KB page of the wal-index. +*/ +#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32))) + +/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */ +#define WALINDEX_PGSZ ( \ + sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ +) + +/* +** Obtain a pointer to the iPage'th page of the wal-index. The wal-index +** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are +** numbered from zero. +** +** If the wal-index is currently smaller the iPage pages then the size +** of the wal-index might be increased, but only if it is safe to do +** so. It is safe to enlarge the wal-index if pWal->writeLock is true +** or pWal->exclusiveMode==WAL_HEAPMEMORY_MODE. +** +** Three possible result scenarios: +** +** (1) rc==SQLITE_OK and *ppPage==Requested-Wal-Index-Page +** (2) rc>=SQLITE_ERROR and *ppPage==NULL +** (3) rc==SQLITE_OK and *ppPage==NULL // only if iPage==0 +** +** Scenario (3) can only occur when pWal->writeLock is false and iPage==0 +*/ +static SQLITE_NOINLINE int walIndexPageRealloc( + Wal *pWal, /* The WAL context */ + int iPage, /* The page we seek */ + volatile u32 **ppPage /* Write the page pointer here */ +){ + int rc = SQLITE_OK; + + /* Enlarge the pWal->apWiData[] array if required */ + if( pWal->nWiData<=iPage ){ + sqlite3_int64 nByte = sizeof(u32*)*(iPage+1); + volatile u32 **apNew; + apNew = (volatile u32 **)sqlite3Realloc((void *)pWal->apWiData, nByte); + if( !apNew ){ + *ppPage = 0; + return SQLITE_NOMEM_BKPT; + } + memset((void*)&apNew[pWal->nWiData], 0, + sizeof(u32*)*(iPage+1-pWal->nWiData)); + pWal->apWiData = apNew; + pWal->nWiData = iPage+1; + } + + /* Request a pointer to the required page from the VFS */ + assert( pWal->apWiData[iPage]==0 ); + if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ + pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); + if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM_BKPT; + }else{ + rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, + pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] + ); + assert( pWal->apWiData[iPage]!=0 + || rc!=SQLITE_OK + || (pWal->writeLock==0 && iPage==0) ); + testcase( pWal->apWiData[iPage]==0 && rc==SQLITE_OK ); + if( rc==SQLITE_OK ){ + if( iPage>0 && sqlite3FaultSim(600) ) rc = SQLITE_NOMEM; + }else if( (rc&0xff)==SQLITE_READONLY ){ + pWal->readOnly |= WAL_SHM_RDONLY; + if( rc==SQLITE_READONLY ){ + rc = SQLITE_OK; + } + } + } + + *ppPage = pWal->apWiData[iPage]; + assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); + return rc; +} +static int walIndexPage( + Wal *pWal, /* The WAL context */ + int iPage, /* The page we seek */ + volatile u32 **ppPage /* Write the page pointer here */ +){ + if( pWal->nWiData<=iPage || (*ppPage = pWal->apWiData[iPage])==0 ){ + return walIndexPageRealloc(pWal, iPage, ppPage); + } + return SQLITE_OK; +} + +/* +** Return a pointer to the WalCkptInfo structure in the wal-index. +*/ +static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); +} + +/* +** Return a pointer to the WalIndexHdr structure in the wal-index. +*/ +static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalIndexHdr*)pWal->apWiData[0]; +} + +/* +** The argument to this macro must be of type u32. On a little-endian +** architecture, it returns the u32 value that results from interpreting +** the 4 bytes as a big-endian value. On a big-endian architecture, it +** returns the value that would be produced by interpreting the 4 bytes +** of the input value as a little-endian integer. +*/ +#define BYTESWAP32(x) ( \ + (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \ + + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \ +) + +/* +** Generate or extend an 8 byte checksum based on the data in +** array aByte[] and the initial values of aIn[0] and aIn[1] (or +** initial values of 0 and 0 if aIn==NULL). +** +** The checksum is written back into aOut[] before returning. +** +** nByte must be a positive multiple of 8. +*/ +static void walChecksumBytes( + int nativeCksum, /* True for native byte-order, false for non-native */ + u8 *a, /* Content to be checksummed */ + int nByte, /* Bytes of content in a[]. Must be a multiple of 8. */ + const u32 *aIn, /* Initial checksum value input */ + u32 *aOut /* OUT: Final checksum value output */ +){ + u32 s1, s2; + u32 *aData = (u32 *)a; + u32 *aEnd = (u32 *)&a[nByte]; + + if( aIn ){ + s1 = aIn[0]; + s2 = aIn[1]; + }else{ + s1 = s2 = 0; + } + + assert( nByte>=8 ); + assert( (nByte&0x00000007)==0 ); + assert( nByte<=65536 ); + + if( nativeCksum ){ + do { + s1 += *aData++ + s2; + s2 += *aData++ + s1; + }while( aDataexclusiveMode!=WAL_HEAPMEMORY_MODE ){ + sqlite3OsShmBarrier(pWal->pDbFd); + } +} + +/* +** Add the SQLITE_NO_TSAN as part of the return-type of a function +** definition as a hint that the function contains constructs that +** might give false-positive TSAN warnings. +** +** See tag-20200519-1. +*/ +#if defined(__clang__) && !defined(SQLITE_NO_TSAN) +# define SQLITE_NO_TSAN __attribute__((no_sanitize_thread)) +#else +# define SQLITE_NO_TSAN +#endif + +/* +** Write the header information in pWal->hdr into the wal-index. +** +** The checksum on pWal->hdr is updated before it is written. +*/ +static SQLITE_NO_TSAN void walIndexWriteHdr(Wal *pWal){ + volatile WalIndexHdr *aHdr = walIndexHdr(pWal); + const int nCksum = offsetof(WalIndexHdr, aCksum); + + assert( pWal->writeLock ); + pWal->hdr.isInit = 1; + pWal->hdr.iVersion = WALINDEX_MAX_VERSION; + walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); + /* Possible TSAN false-positive. See tag-20200519-1 */ + memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); + walShmBarrier(pWal); + memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); +} + +/* +** This function encodes a single frame header and writes it to a buffer +** supplied by the caller. A frame-header is made up of a series of +** 4-byte big-endian integers, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Salt-1 (copied from the wal-header) +** 12: Salt-2 (copied from the wal-header) +** 16: Checksum-1. +** 20: Checksum-2. +*/ +static void walEncodeFrame( + Wal *pWal, /* The write-ahead log */ + u32 iPage, /* Database page number for frame */ + u32 nTruncate, /* New db size (or 0 for non-commit frames) */ + u8 *aData, /* Pointer to page data */ + u8 *aFrame /* OUT: Write encoded frame here */ +){ + int nativeCksum; /* True for native byte-order checksums */ + u32 *aCksum = pWal->hdr.aFrameCksum; + assert( WAL_FRAME_HDRSIZE==24 ); + sqlite3Put4byte(&aFrame[0], iPage); + sqlite3Put4byte(&aFrame[4], nTruncate); + if( pWal->iReCksum==0 ){ + memcpy(&aFrame[8], pWal->hdr.aSalt, 8); + + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + + sqlite3Put4byte(&aFrame[16], aCksum[0]); + sqlite3Put4byte(&aFrame[20], aCksum[1]); + }else{ + memset(&aFrame[8], 0, 16); + } +} + +/* +** Check to see if the frame with header in aFrame[] and content +** in aData[] is valid. If it is a valid frame, fill *piPage and +** *pnTruncate and return true. Return if the frame is not valid. +*/ +static int walDecodeFrame( + Wal *pWal, /* The write-ahead log */ + u32 *piPage, /* OUT: Database page number for frame */ + u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* Frame data */ +){ + int nativeCksum; /* True for native byte-order checksums */ + u32 *aCksum = pWal->hdr.aFrameCksum; + u32 pgno; /* Page number of the frame */ + assert( WAL_FRAME_HDRSIZE==24 ); + + /* A frame is only valid if the salt values in the frame-header + ** match the salt values in the wal-header. + */ + if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){ + return 0; + } + + /* A frame is only valid if the page number is creater than zero. + */ + pgno = sqlite3Get4byte(&aFrame[0]); + if( pgno==0 ){ + return 0; + } + + /* A frame is only valid if a checksum of the WAL header, + ** all prior frams, the first 16 bytes of this frame-header, + ** and the frame-data matches the checksum in the last 8 + ** bytes of this frame-header. + */ + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) + ){ + /* Checksum failed. */ + return 0; + } + + /* If we reach this point, the frame is valid. Return the page number + ** and the new database size. + */ + *piPage = pgno; + *pnTruncate = sqlite3Get4byte(&aFrame[4]); + return 1; +} + + +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +/* +** Names of locks. This routine is used to provide debugging output and is not +** a part of an ordinary build. +*/ +static const char *walLockName(int lockIdx){ + if( lockIdx==WAL_WRITE_LOCK ){ + return "WRITE-LOCK"; + }else if( lockIdx==WAL_CKPT_LOCK ){ + return "CKPT-LOCK"; + }else if( lockIdx==WAL_RECOVER_LOCK ){ + return "RECOVER-LOCK"; + }else{ + static char zName[15]; + sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]", + lockIdx-WAL_READ_LOCK(0)); + return zName; + } +} +#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */ + + +/* +** Set or release locks on the WAL. Locks are either shared or exclusive. +** A lock cannot be moved directly between shared and exclusive - it must go +** through the unlocked state first. +** +** In locking_mode=EXCLUSIVE, all of these routines become no-ops. +*/ +static int walLockShared(Wal *pWal, int lockIdx){ + int rc; + if( pWal->exclusiveMode ) return SQLITE_OK; + rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, + SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); + WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, + walLockName(lockIdx), rc ? "failed" : "ok")); + VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); ) + return rc; +} +static void walUnlockShared(Wal *pWal, int lockIdx){ + if( pWal->exclusiveMode ) return; + (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, + SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); + WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); +} +static int walLockExclusive(Wal *pWal, int lockIdx, int n){ + int rc; + if( pWal->exclusiveMode ) return SQLITE_OK; + rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, + SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); + WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, + walLockName(lockIdx), n, rc ? "failed" : "ok")); + VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); ) + return rc; +} +static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ + if( pWal->exclusiveMode ) return; + (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, + SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); + WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, + walLockName(lockIdx), n)); +} + +/* +** Compute a hash on a page number. The resulting hash value must land +** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances +** the hash to the next value in the event of a collision. +*/ +static int walHash(u32 iPage){ + assert( iPage>0 ); + assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 ); + return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1); +} +static int walNextHash(int iPriorHash){ + return (iPriorHash+1)&(HASHTABLE_NSLOT-1); +} + +/* +** An instance of the WalHashLoc object is used to describe the location +** of a page hash table in the wal-index. This becomes the return value +** from walHashGet(). +*/ +typedef struct WalHashLoc WalHashLoc; +struct WalHashLoc { + volatile ht_slot *aHash; /* Start of the wal-index hash table */ + volatile u32 *aPgno; /* aPgno[1] is the page of first frame indexed */ + u32 iZero; /* One less than the frame number of first indexed*/ +}; + +/* +** Return pointers to the hash table and page number array stored on +** page iHash of the wal-index. The wal-index is broken into 32KB pages +** numbered starting from 0. +** +** Set output variable pLoc->aHash to point to the start of the hash table +** in the wal-index file. Set pLoc->iZero to one less than the frame +** number of the first frame indexed by this hash table. If a +** slot in the hash table is set to N, it refers to frame number +** (pLoc->iZero+N) in the log. +** +** Finally, set pLoc->aPgno so that pLoc->aPgno[0] is the page number of the +** first frame indexed by the hash table, frame (pLoc->iZero). +*/ +static int walHashGet( + Wal *pWal, /* WAL handle */ + int iHash, /* Find the iHash'th table */ + WalHashLoc *pLoc /* OUT: Hash table location */ +){ + int rc; /* Return code */ + + rc = walIndexPage(pWal, iHash, &pLoc->aPgno); + assert( rc==SQLITE_OK || iHash>0 ); + + if( pLoc->aPgno ){ + pLoc->aHash = (volatile ht_slot *)&pLoc->aPgno[HASHTABLE_NPAGE]; + if( iHash==0 ){ + pLoc->aPgno = &pLoc->aPgno[WALINDEX_HDR_SIZE/sizeof(u32)]; + pLoc->iZero = 0; + }else{ + pLoc->iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; + } + }else if( NEVER(rc==SQLITE_OK) ){ + rc = SQLITE_ERROR; + } + return rc; +} + +/* +** Return the number of the wal-index page that contains the hash-table +** and page-number array that contain entries corresponding to WAL frame +** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages +** are numbered starting from 0. +*/ +static int walFramePage(u32 iFrame){ + int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; + assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) + && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) + && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) + && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) + && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) + ); + assert( iHash>=0 ); + return iHash; +} + +/* +** Return the page number associated with frame iFrame in this WAL. +*/ +static u32 walFramePgno(Wal *pWal, u32 iFrame){ + int iHash = walFramePage(iFrame); + if( iHash==0 ){ + return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; + } + return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; +} + +/* +** Remove entries from the hash table that point to WAL slots greater +** than pWal->hdr.mxFrame. +** +** This function is called whenever pWal->hdr.mxFrame is decreased due +** to a rollback or savepoint. +** +** At most only the hash table containing pWal->hdr.mxFrame needs to be +** updated. Any later hash tables will be automatically cleared when +** pWal->hdr.mxFrame advances to the point where those hash tables are +** actually needed. +*/ +static void walCleanupHash(Wal *pWal){ + WalHashLoc sLoc; /* Hash table location */ + int iLimit = 0; /* Zero values greater than this */ + int nByte; /* Number of bytes to zero in aPgno[] */ + int i; /* Used to iterate through aHash[] */ + + assert( pWal->writeLock ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 ); + + if( pWal->hdr.mxFrame==0 ) return; + + /* Obtain pointers to the hash-table and page-number array containing + ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed + ** that the page said hash-table and array reside on is already mapped.(1) + */ + assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); + assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); + i = walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &sLoc); + if( NEVER(i) ) return; /* Defense-in-depth, in case (1) above is wrong */ + + /* Zero all hash-table entries that correspond to frame numbers greater + ** than pWal->hdr.mxFrame. + */ + iLimit = pWal->hdr.mxFrame - sLoc.iZero; + assert( iLimit>0 ); + for(i=0; iiLimit ){ + sLoc.aHash[i] = 0; + } + } + + /* Zero the entries in the aPgno array that correspond to frames with + ** frame numbers greater than pWal->hdr.mxFrame. + */ + nByte = (int)((char *)sLoc.aHash - (char *)&sLoc.aPgno[iLimit]); + assert( nByte>=0 ); + memset((void *)&sLoc.aPgno[iLimit], 0, nByte); + +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + /* Verify that the every entry in the mapping region is still reachable + ** via the hash table even after the cleanup. + */ + if( iLimit ){ + int j; /* Loop counter */ + int iKey; /* Hash key */ + for(j=0; j=0 ); + memset((void*)sLoc.aPgno, 0, nByte); + } + + /* If the entry in aPgno[] is already set, then the previous writer + ** must have exited unexpectedly in the middle of a transaction (after + ** writing one or more dirty pages to the WAL to free up memory). + ** Remove the remnants of that writers uncommitted transaction from + ** the hash-table before writing any new entries. + */ + if( sLoc.aPgno[idx-1] ){ + walCleanupHash(pWal); + assert( !sLoc.aPgno[idx-1] ); + } + + /* Write the aPgno[] array entry and the hash-table slot. */ + nCollide = idx; + for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ + if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; + } + sLoc.aPgno[idx-1] = iPage; + AtomicStore(&sLoc.aHash[iKey], (ht_slot)idx); + +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + /* Verify that the number of entries in the hash table exactly equals + ** the number of entries in the mapping region. + */ + { + int i; /* Loop counter */ + int nEntry = 0; /* Number of entries in the hash table */ + for(i=0; ickptLock==1 || pWal->ckptLock==0 ); + assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); + assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); + assert( pWal->writeLock ); + iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; + rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); + if( rc ){ + return rc; + } + + WALTRACE(("WAL%p: recovery begin...\n", pWal)); + + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + + rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); + if( rc!=SQLITE_OK ){ + goto recovery_error; + } + + if( nSize>WAL_HDRSIZE ){ + u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u32 *aPrivate = 0; /* Heap copy of *-shm hash being populated */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int szFrame; /* Number of bytes in buffer aFrame[] */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int szPage; /* Page size according to the log */ + u32 magic; /* Magic value read from WAL header */ + u32 version; /* Magic value read from WAL header */ + int isValid; /* True if this frame is valid */ + u32 iPg; /* Current 32KB wal-index page */ + u32 iLastFrame; /* Last frame in wal, based on nSize alone */ + + /* Read in the WAL header. */ + rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + goto recovery_error; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid + ** data. Similarly, if the 'magic' value is invalid, ignore the whole + ** WAL file. + */ + magic = sqlite3Get4byte(&aBuf[0]); + szPage = sqlite3Get4byte(&aBuf[8]); + if( (magic&0xFFFFFFFE)!=WAL_MAGIC + || szPage&(szPage-1) + || szPage>SQLITE_MAX_PAGE_SIZE + || szPage<512 + ){ + goto finished; + } + pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); + pWal->szPage = szPage; + pWal->nCkpt = sqlite3Get4byte(&aBuf[12]); + memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); + + /* Verify that the WAL header checksum is correct */ + walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, + aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum + ); + if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) + || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) + ){ + goto finished; + } + + /* Verify that the version number on the WAL format is one that + ** are able to understand */ + version = sqlite3Get4byte(&aBuf[4]); + if( version!=WAL_MAX_VERSION ){ + rc = SQLITE_CANTOPEN_BKPT; + goto finished; + } + + /* Malloc a buffer to read frames into. */ + szFrame = szPage + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ); + if( !aFrame ){ + rc = SQLITE_NOMEM_BKPT; + goto recovery_error; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + aPrivate = (u32*)&aData[szPage]; + + /* Read all frames from the log file. */ + iLastFrame = (nSize - WAL_HDRSIZE) / szFrame; + for(iPg=0; iPg<=(u32)walFramePage(iLastFrame); iPg++){ + u32 *aShare; + u32 iFrame; /* Index of last frame read */ + u32 iLast = MIN(iLastFrame, HASHTABLE_NPAGE_ONE+iPg*HASHTABLE_NPAGE); + u32 iFirst = 1 + (iPg==0?0:HASHTABLE_NPAGE_ONE+(iPg-1)*HASHTABLE_NPAGE); + u32 nHdr, nHdr32; + rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare); + assert( aShare!=0 || rc!=SQLITE_OK ); + if( aShare==0 ) break; + pWal->apWiData[iPg] = aPrivate; + + for(iFrame=iFirst; iFrame<=iLast; iFrame++){ + i64 iOffset = walFrameOffset(iFrame, szPage); + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); + if( !isValid ) break; + rc = walIndexAppend(pWal, iFrame, pgno); + if( NEVER(rc!=SQLITE_OK) ) break; + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + pWal->hdr.mxFrame = iFrame; + pWal->hdr.nPage = nTruncate; + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; + aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + } + } + pWal->apWiData[iPg] = aShare; + nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0); + nHdr32 = nHdr / sizeof(u32); +#ifndef SQLITE_SAFER_WALINDEX_RECOVERY + /* Memcpy() should work fine here, on all reasonable implementations. + ** Technically, memcpy() might change the destination to some + ** intermediate value before setting to the final value, and that might + ** cause a concurrent reader to malfunction. Memcpy() is allowed to + ** do that, according to the spec, but no memcpy() implementation that + ** we know of actually does that, which is why we say that memcpy() + ** is safe for this. Memcpy() is certainly a lot faster. + */ + memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr); +#else + /* In the event that some platform is found for which memcpy() + ** changes the destination to some intermediate value before + ** setting the final value, this alternative copy routine is + ** provided. + */ + { + int i; + for(i=nHdr32; ihdr.aFrameCksum[0] = aFrameCksum[0]; + pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; + walIndexWriteHdr(pWal); + + /* Reset the checkpoint-header. This is safe because this thread is + ** currently holding locks that exclude all other writers and + ** checkpointers. Then set the values of read-mark slots 1 through N. + */ + pInfo = walCkptInfo(pWal); + pInfo->nBackfill = 0; + pInfo->nBackfillAttempted = pWal->hdr.mxFrame; + pInfo->aReadMark[0] = 0; + for(i=1; ihdr.mxFrame ){ + pInfo->aReadMark[i] = pWal->hdr.mxFrame; + }else{ + pInfo->aReadMark[i] = READMARK_NOT_USED; + } + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc!=SQLITE_BUSY ){ + goto recovery_error; + } + } + + /* If more than one frame was recovered from the log file, report an + ** event via sqlite3_log(). This is to help with identifying performance + ** problems caused by applications routinely shutting down without + ** checkpointing the log file. + */ + if( pWal->hdr.nPage ){ + sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, + "recovered %d frames from WAL file %s", + pWal->hdr.mxFrame, pWal->zWalName + ); + } + } + +recovery_error: + WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); + walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); + return rc; +} + +/* +** Close an open wal-index. +*/ +static void walIndexClose(Wal *pWal, int isDelete){ + if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){ + int i; + for(i=0; inWiData; i++){ + sqlite3_free((void *)pWal->apWiData[i]); + pWal->apWiData[i] = 0; + } + } + if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){ + sqlite3OsShmUnmap(pWal->pDbFd, isDelete); + } +} + +/* +** Open a connection to the WAL file zWalName. The database file must +** already be opened on connection pDbFd. The buffer that zWalName points +** to must remain valid for the lifetime of the returned Wal* handle. +** +** A SHARED lock should be held on the database file when this function +** is called. The purpose of this SHARED lock is to prevent any other +** client from unlinking the WAL or wal-index file. If another process +** were to do this just after this client opened one of these files, the +** system would be badly broken. +** +** If the log file is successfully opened, SQLITE_OK is returned and +** *ppWal is set to point to a new WAL handle. If an error occurs, +** an SQLite error code is returned and *ppWal is left unmodified. +*/ +SQLITE_PRIVATE int sqlite3WalOpen( + sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ + sqlite3_file *pDbFd, /* The open database file */ + const char *zWalName, /* Name of the WAL file */ + int bNoShm, /* True to run in heap-memory mode */ + i64 mxWalSize, /* Truncate WAL to this size on reset */ + Wal **ppWal /* OUT: Allocated Wal handle */ +){ + int rc; /* Return Code */ + Wal *pRet; /* Object to allocate and return */ + int flags; /* Flags passed to OsOpen() */ + + assert( zWalName && zWalName[0] ); + assert( pDbFd ); + + /* Verify the values of various constants. Any changes to the values + ** of these constants would result in an incompatible on-disk format + ** for the -shm file. Any change that causes one of these asserts to + ** fail is a backward compatibility problem, even if the change otherwise + ** works. + ** + ** This table also serves as a helpful cross-reference when trying to + ** interpret hex dumps of the -shm file. + */ + assert( 48 == sizeof(WalIndexHdr) ); + assert( 40 == sizeof(WalCkptInfo) ); + assert( 120 == WALINDEX_LOCK_OFFSET ); + assert( 136 == WALINDEX_HDR_SIZE ); + assert( 4096 == HASHTABLE_NPAGE ); + assert( 4062 == HASHTABLE_NPAGE_ONE ); + assert( 8192 == HASHTABLE_NSLOT ); + assert( 383 == HASHTABLE_HASH_1 ); + assert( 32768 == WALINDEX_PGSZ ); + assert( 8 == SQLITE_SHM_NLOCK ); + assert( 5 == WAL_NREADER ); + assert( 24 == WAL_FRAME_HDRSIZE ); + assert( 32 == WAL_HDRSIZE ); + assert( 120 == WALINDEX_LOCK_OFFSET + WAL_WRITE_LOCK ); + assert( 121 == WALINDEX_LOCK_OFFSET + WAL_CKPT_LOCK ); + assert( 122 == WALINDEX_LOCK_OFFSET + WAL_RECOVER_LOCK ); + assert( 123 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(0) ); + assert( 124 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(1) ); + assert( 125 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(2) ); + assert( 126 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(3) ); + assert( 127 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(4) ); + + /* In the amalgamation, the os_unix.c and os_win.c source files come before + ** this source file. Verify that the #defines of the locking byte offsets + ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value. + ** For that matter, if the lock offset ever changes from its initial design + ** value of 120, we need to know that so there is an assert() to check it. + */ +#ifdef WIN_SHM_BASE + assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET ); +#endif +#ifdef UNIX_SHM_BASE + assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); +#endif + + + /* Allocate an instance of struct Wal to return. */ + *ppWal = 0; + pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); + if( !pRet ){ + return SQLITE_NOMEM_BKPT; + } + + pRet->pVfs = pVfs; + pRet->pWalFd = (sqlite3_file *)&pRet[1]; + pRet->pDbFd = pDbFd; + pRet->readLock = -1; + pRet->mxWalSize = mxWalSize; + pRet->zWalName = zWalName; + pRet->syncHeader = 1; + pRet->padToSectorBoundary = 1; + pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); + + /* Open file handle on the write-ahead log file. */ + flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); + rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); + if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ + pRet->readOnly = WAL_RDONLY; + } + + if( rc!=SQLITE_OK ){ + walIndexClose(pRet, 0); + sqlite3OsClose(pRet->pWalFd); + sqlite3_free(pRet); + }else{ + int iDC = sqlite3OsDeviceCharacteristics(pDbFd); + if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } + if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ + pRet->padToSectorBoundary = 0; + } + *ppWal = pRet; + WALTRACE(("WAL%d: opened\n", pRet)); + } + return rc; +} + +/* +** Change the size to which the WAL file is trucated on each reset. +*/ +SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){ + if( pWal ) pWal->mxWalSize = iLimit; +} + +/* +** Find the smallest page number out of all pages held in the WAL that +** has not been returned by any prior invocation of this method on the +** same WalIterator object. Write into *piFrame the frame index where +** that page was last written into the WAL. Write into *piPage the page +** number. +** +** Return 0 on success. If there are no pages in the WAL with a page +** number larger than *piPage, then return 1. +*/ +static int walIteratorNext( + WalIterator *p, /* Iterator */ + u32 *piPage, /* OUT: The page number of the next page */ + u32 *piFrame /* OUT: Wal frame index of next page */ +){ + u32 iMin; /* Result pgno must be greater than iMin */ + u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ + int i; /* For looping through segments */ + + iMin = p->iPrior; + assert( iMin<0xffffffff ); + for(i=p->nSegment-1; i>=0; i--){ + struct WalSegment *pSegment = &p->aSegment[i]; + while( pSegment->iNextnEntry ){ + u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; + if( iPg>iMin ){ + if( iPgiZero + pSegment->aIndex[pSegment->iNext]; + } + break; + } + pSegment->iNext++; + } + } + + *piPage = p->iPrior = iRet; + return (iRet==0xFFFFFFFF); +} + +/* +** This function merges two sorted lists into a single sorted list. +** +** aLeft[] and aRight[] are arrays of indices. The sort key is +** aContent[aLeft[]] and aContent[aRight[]]. Upon entry, the following +** is guaranteed for all J0 && nRight>0 ); + while( iRight=nRight || aContent[aLeft[iLeft]]=nLeft || aContent[aLeft[iLeft]]>dbpage ); + assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); + } + + *paRight = aLeft; + *pnRight = iOut; + memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut); +} + +/* +** Sort the elements in list aList using aContent[] as the sort key. +** Remove elements with duplicate keys, preferring to keep the +** larger aList[] values. +** +** The aList[] entries are indices into aContent[]. The values in +** aList[] are to be sorted so that for all J0 ); + assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) ); + + for(iList=0; iListaList && p->nList<=(1<aList==&aList[iList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); + } + aSub[iSub].aList = aMerge; + aSub[iSub].nList = nMerge; + } + + for(iSub++; iSubnList<=(1<aList==&aList[nList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); + } + } + assert( aMerge==aList ); + *pnList = nMerge; + +#ifdef SQLITE_DEBUG + { + int i; + for(i=1; i<*pnList; i++){ + assert( aContent[aList[i]] > aContent[aList[i-1]] ); + } + } +#endif +} + +/* +** Free an iterator allocated by walIteratorInit(). +*/ +static void walIteratorFree(WalIterator *p){ + sqlite3_free(p); +} + +/* +** Construct a WalInterator object that can be used to loop over all +** pages in the WAL following frame nBackfill in ascending order. Frames +** nBackfill or earlier may be included - excluding them is an optimization +** only. The caller must hold the checkpoint lock. +** +** On success, make *pp point to the newly allocated WalInterator object +** return SQLITE_OK. Otherwise, return an error code. If this routine +** returns an error, the value of *pp is undefined. +** +** The calling routine should invoke walIteratorFree() to destroy the +** WalIterator object when it has finished with it. +*/ +static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){ + WalIterator *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + sqlite3_int64 nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + ht_slot *aTmp; /* Temp space used by merge-sort */ + int rc = SQLITE_OK; /* Return Code */ + + /* This routine only runs while holding the checkpoint lock. And + ** it only runs if there is actually content in the log (mxFrame>0). + */ + assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); + iLast = pWal->hdr.mxFrame; + + /* Allocate space for the WalIterator object. */ + nSegment = walFramePage(iLast) + 1; + nByte = sizeof(WalIterator) + + (nSegment-1)*sizeof(struct WalSegment) + + iLast*sizeof(ht_slot); + p = (WalIterator *)sqlite3_malloc64(nByte); + if( !p ){ + return SQLITE_NOMEM_BKPT; + } + memset(p, 0, nByte); + p->nSegment = nSegment; + + /* Allocate temporary space used by the merge-sort routine. This block + ** of memory will be freed before this function returns. + */ + aTmp = (ht_slot *)sqlite3_malloc64( + sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) + ); + if( !aTmp ){ + rc = SQLITE_NOMEM_BKPT; + } + + for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && iaSegment[p->nSegment])[sLoc.iZero]; + sLoc.iZero++; + + for(j=0; jaSegment[i].iZero = sLoc.iZero; + p->aSegment[i].nEntry = nEntry; + p->aSegment[i].aIndex = aIndex; + p->aSegment[i].aPgno = (u32 *)sLoc.aPgno; + } + } + sqlite3_free(aTmp); + + if( rc!=SQLITE_OK ){ + walIteratorFree(p); + p = 0; + } + *pp = p; + return rc; +} + +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT +/* +** Attempt to enable blocking locks. Blocking locks are enabled only if (a) +** they are supported by the VFS, and (b) the database handle is configured +** with a busy-timeout. Return 1 if blocking locks are successfully enabled, +** or 0 otherwise. +*/ +static int walEnableBlocking(Wal *pWal){ + int res = 0; + if( pWal->db ){ + int tmout = pWal->db->busyTimeout; + if( tmout ){ + int rc; + rc = sqlite3OsFileControl( + pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout + ); + res = (rc==SQLITE_OK); + } + } + return res; +} + +/* +** Disable blocking locks. +*/ +static void walDisableBlocking(Wal *pWal){ + int tmout = 0; + sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout); +} + +/* +** If parameter bLock is true, attempt to enable blocking locks, take +** the WRITER lock, and then disable blocking locks. If blocking locks +** cannot be enabled, no attempt to obtain the WRITER lock is made. Return +** an SQLite error code if an error occurs, or SQLITE_OK otherwise. It is not +** an error if blocking locks can not be enabled. +** +** If the bLock parameter is false and the WRITER lock is held, release it. +*/ +SQLITE_PRIVATE int sqlite3WalWriteLock(Wal *pWal, int bLock){ + int rc = SQLITE_OK; + assert( pWal->readLock<0 || bLock==0 ); + if( bLock ){ + assert( pWal->db ); + if( walEnableBlocking(pWal) ){ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + } + walDisableBlocking(pWal); + } + }else if( pWal->writeLock ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + } + return rc; +} + +/* +** Set the database handle used to determine if blocking locks are required. +*/ +SQLITE_PRIVATE void sqlite3WalDb(Wal *pWal, sqlite3 *db){ + pWal->db = db; +} + +/* +** Take an exclusive WRITE lock. Blocking if so configured. +*/ +static int walLockWriter(Wal *pWal){ + int rc; + walEnableBlocking(pWal); + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + walDisableBlocking(pWal); + return rc; +} +#else +# define walEnableBlocking(x) 0 +# define walDisableBlocking(x) +# define walLockWriter(pWal) walLockExclusive((pWal), WAL_WRITE_LOCK, 1) +# define sqlite3WalDb(pWal, db) +#endif /* ifdef SQLITE_ENABLE_SETLK_TIMEOUT */ + + +/* +** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and +** n. If the attempt fails and parameter xBusy is not NULL, then it is a +** busy-handler function. Invoke it and retry the lock until either the +** lock is successfully obtained or the busy-handler returns 0. +*/ +static int walBusyLock( + Wal *pWal, /* WAL connection */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int lockIdx, /* Offset of first byte to lock */ + int n /* Number of bytes to lock */ +){ + int rc; + do { + rc = walLockExclusive(pWal, lockIdx, n); + }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) ); +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + if( rc==SQLITE_BUSY_TIMEOUT ){ + walDisableBlocking(pWal); + rc = SQLITE_BUSY; + } +#endif + return rc; +} + +/* +** The cache of the wal-index header must be valid to call this function. +** Return the page-size in bytes used by the database. +*/ +static int walPagesize(Wal *pWal){ + return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); +} + +/* +** The following is guaranteed when this function is called: +** +** a) the WRITER lock is held, +** b) the entire log file has been checkpointed, and +** c) any existing readers are reading exclusively from the database +** file - there are no readers that may attempt to read a frame from +** the log file. +** +** This function updates the shared-memory structures so that the next +** client to write to the database (which may be this one) does so by +** writing frames into the start of the log file. +** +** The value of parameter salt1 is used as the aSalt[1] value in the +** new wal-index header. It should be passed a pseudo-random value (i.e. +** one obtained from sqlite3_randomness()). +*/ +static void walRestartHdr(Wal *pWal, u32 salt1){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + int i; /* Loop counter */ + u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ + pWal->nCkpt++; + pWal->hdr.mxFrame = 0; + sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); + memcpy(&pWal->hdr.aSalt[1], &salt1, 4); + walIndexWriteHdr(pWal); + AtomicStore(&pInfo->nBackfill, 0); + pInfo->nBackfillAttempted = 0; + pInfo->aReadMark[1] = 0; + for(i=2; iaReadMark[i] = READMARK_NOT_USED; + assert( pInfo->aReadMark[0]==0 ); +} + +/* +** Copy as much content as we can from the WAL back into the database file +** in response to an sqlite3_wal_checkpoint() request or the equivalent. +** +** The amount of information copies from WAL to database might be limited +** by active readers. This routine will never overwrite a database page +** that a concurrent reader might be using. +** +** All I/O barrier operations (a.k.a fsyncs) occur in this routine when +** SQLite is in WAL-mode in synchronous=NORMAL. That means that if +** checkpoints are always run by a background thread or background +** process, foreground threads will never block on a lengthy fsync call. +** +** Fsync is called on the WAL before writing content out of the WAL and +** into the database. This ensures that if the new content is persistent +** in the WAL and can be recovered following a power-loss or hard reset. +** +** Fsync is also called on the database file if (and only if) the entire +** WAL content is copied into the database file. This second fsync makes +** it safe to delete the WAL since the new content will persist in the +** database file. +** +** This routine uses and updates the nBackfill field of the wal-index header. +** This is the only routine that will increase the value of nBackfill. +** (A WAL reset or recovery will revert nBackfill to zero, but not increase +** its value.) +** +** The caller must be holding sufficient locks to ensure that no other +** checkpoint is running (in any other thread or process) at the same +** time. +*/ +static int walCheckpoint( + Wal *pWal, /* Wal connection */ + sqlite3 *db, /* Check for interrupts on this handle */ + int eMode, /* One of PASSIVE, FULL or RESTART */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags for OsSync() (or 0) */ + u8 *zBuf /* Temporary buffer to use */ +){ + int rc = SQLITE_OK; /* Return code */ + int szPage; /* Database page-size */ + WalIterator *pIter = 0; /* Wal iterator context */ + u32 iDbpage = 0; /* Next database page to write */ + u32 iFrame = 0; /* Wal frame containing data for iDbpage */ + u32 mxSafeFrame; /* Max frame that can be backfilled */ + u32 mxPage; /* Max database page to write */ + int i; /* Loop counter */ + volatile WalCkptInfo *pInfo; /* The checkpoint status information */ + + szPage = walPagesize(pWal); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + pInfo = walCkptInfo(pWal); + if( pInfo->nBackfillhdr.mxFrame ){ + + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); + + /* Compute in mxSafeFrame the index of the last frame of the WAL that is + ** safe to write into the database. Frames beyond mxSafeFrame might + ** overwrite database pages that are in use by active readers and thus + ** cannot be backfilled from the WAL. + */ + mxSafeFrame = pWal->hdr.mxFrame; + mxPage = pWal->hdr.nPage; + for(i=1; iaReadMark+i); + if( mxSafeFrame>y ){ + assert( y<=pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); + if( rc==SQLITE_OK ){ + u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED); + AtomicStore(pInfo->aReadMark+i, iMark); + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc==SQLITE_BUSY ){ + mxSafeFrame = y; + xBusy = 0; + }else{ + goto walcheckpoint_out; + } + } + } + + /* Allocate the iterator */ + if( pInfo->nBackfillnBackfill, &pIter); + assert( rc==SQLITE_OK || pIter==0 ); + } + + if( pIter + && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK + ){ + u32 nBackfill = pInfo->nBackfill; + + pInfo->nBackfillAttempted = mxSafeFrame; + + /* Sync the WAL to disk */ + rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); + + /* If the database may grow as a result of this checkpoint, hint + ** about the eventual size of the db file to the VFS layer. + */ + if( rc==SQLITE_OK ){ + i64 nReq = ((i64)mxPage * szPage); + i64 nSize; /* Current size of database file */ + sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_START, 0); + rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); + if( rc==SQLITE_OK && nSizehdr.mxFrame*szPage)pDbFd, SQLITE_FCNTL_SIZE_HINT,&nReq); + } + } + + } + + /* Iterate through the contents of the WAL, copying data to the db file */ + while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + i64 iOffset; + assert( walFramePgno(pWal, iFrame)==iDbpage ); + if( AtomicLoad(&db->u1.isInterrupted) ){ + rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT; + break; + } + if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ + continue; + } + iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ + rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); + if( rc!=SQLITE_OK ) break; + iOffset = (iDbpage-1)*(i64)szPage; + testcase( IS_BIG_INT(iOffset) ); + rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); + if( rc!=SQLITE_OK ) break; + } + sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_DONE, 0); + + /* If work was actually accomplished... */ + if( rc==SQLITE_OK ){ + if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ + i64 szDb = pWal->hdr.nPage*(i64)szPage; + testcase( IS_BIG_INT(szDb) ); + rc = sqlite3OsTruncate(pWal->pDbFd, szDb); + if( rc==SQLITE_OK ){ + rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags)); + } + } + if( rc==SQLITE_OK ){ + AtomicStore(&pInfo->nBackfill, mxSafeFrame); + } + } + + /* Release the reader lock held while backfilling */ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + } + + if( rc==SQLITE_BUSY ){ + /* Reset the return code so as not to report a checkpoint failure + ** just because there are active readers. */ + rc = SQLITE_OK; + } + } + + /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the + ** entire wal file has been copied into the database file, then block + ** until all readers have finished using the wal file. This ensures that + ** the next process to write to the database restarts the wal file. + */ + if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + assert( pWal->writeLock ); + if( pInfo->nBackfillhdr.mxFrame ){ + rc = SQLITE_BUSY; + }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ + u32 salt1; + sqlite3_randomness(4, &salt1); + assert( pInfo->nBackfill==pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); + if( rc==SQLITE_OK ){ + if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ + /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as + ** SQLITE_CHECKPOINT_RESTART with the addition that it also + ** truncates the log file to zero bytes just prior to a + ** successful return. + ** + ** In theory, it might be safe to do this without updating the + ** wal-index header in shared memory, as all subsequent reader or + ** writer clients should see that the entire log file has been + ** checkpointed and behave accordingly. This seems unsafe though, + ** as it would leave the system in a state where the contents of + ** the wal-index header do not match the contents of the + ** file-system. To avoid this, update the wal-index header to + ** indicate that the log file contains zero valid frames. */ + walRestartHdr(pWal, salt1); + rc = sqlite3OsTruncate(pWal->pWalFd, 0); + } + walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + } + } + } + + walcheckpoint_out: + walIteratorFree(pIter); + return rc; +} + +/* +** If the WAL file is currently larger than nMax bytes in size, truncate +** it to exactly nMax bytes. If an error occurs while doing so, ignore it. +*/ +static void walLimitSize(Wal *pWal, i64 nMax){ + i64 sz; + int rx; + sqlite3BeginBenignMalloc(); + rx = sqlite3OsFileSize(pWal->pWalFd, &sz); + if( rx==SQLITE_OK && (sz > nMax ) ){ + rx = sqlite3OsTruncate(pWal->pWalFd, nMax); + } + sqlite3EndBenignMalloc(); + if( rx ){ + sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); + } +} + +/* +** Close a connection to a log file. +*/ +SQLITE_PRIVATE int sqlite3WalClose( + Wal *pWal, /* Wal to close */ + sqlite3 *db, /* For interrupt flag */ + int sync_flags, /* Flags to pass to OsSync() (or 0) */ + int nBuf, + u8 *zBuf /* Buffer of at least nBuf bytes */ +){ + int rc = SQLITE_OK; + if( pWal ){ + int isDelete = 0; /* True to unlink wal and wal-index files */ + + /* If an EXCLUSIVE lock can be obtained on the database file (using the + ** ordinary, rollback-mode locking methods, this guarantees that the + ** connection associated with this log file is the only connection to + ** the database. In this case checkpoint the database and unlink both + ** the wal and wal-index files. + ** + ** The EXCLUSIVE lock is not released before returning. + */ + if( zBuf!=0 + && SQLITE_OK==(rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE)) + ){ + if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ + pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; + } + rc = sqlite3WalCheckpoint(pWal, db, + SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 + ); + if( rc==SQLITE_OK ){ + int bPersist = -1; + sqlite3OsFileControlHint( + pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist + ); + if( bPersist!=1 ){ + /* Try to delete the WAL file if the checkpoint completed and + ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal + ** mode (!bPersist) */ + isDelete = 1; + }else if( pWal->mxWalSize>=0 ){ + /* Try to truncate the WAL file to zero bytes if the checkpoint + ** completed and fsynced (rc==SQLITE_OK) and we are in persistent + ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a + ** non-negative value (pWal->mxWalSize>=0). Note that we truncate + ** to zero bytes as truncating to the journal_size_limit might + ** leave a corrupt WAL file on disk. */ + walLimitSize(pWal, 0); + } + } + } + + walIndexClose(pWal, isDelete); + sqlite3OsClose(pWal->pWalFd); + if( isDelete ){ + sqlite3BeginBenignMalloc(); + sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); + sqlite3EndBenignMalloc(); + } + WALTRACE(("WAL%p: closed\n", pWal)); + sqlite3_free((void *)pWal->apWiData); + sqlite3_free(pWal); + } + return rc; +} + +/* +** Try to read the wal-index header. Return 0 on success and 1 if +** there is a problem. +** +** The wal-index is in shared memory. Another thread or process might +** be writing the header at the same time this procedure is trying to +** read it, which might result in inconsistency. A dirty read is detected +** by verifying that both copies of the header are the same and also by +** a checksum on the header. +** +** If and only if the read is consistent and the header is different from +** pWal->hdr, then pWal->hdr is updated to the content of the new header +** and *pChanged is set to 1. +** +** If the checksum cannot be verified return non-zero. If the header +** is read successfully and the checksum verified, return zero. +*/ +static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){ + u32 aCksum[2]; /* Checksum on the header content */ + WalIndexHdr h1, h2; /* Two copies of the header content */ + WalIndexHdr volatile *aHdr; /* Header in shared memory */ + + /* The first page of the wal-index must be mapped at this point. */ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + + /* Read the header. This might happen concurrently with a write to the + ** same area of shared memory on a different CPU in a SMP, + ** meaning it is possible that an inconsistent snapshot is read + ** from the file. If this happens, return non-zero. + ** + ** tag-20200519-1: + ** There are two copies of the header at the beginning of the wal-index. + ** When reading, read [0] first then [1]. Writes are in the reverse order. + ** Memory barriers are used to prevent the compiler or the hardware from + ** reordering the reads and writes. TSAN and similar tools can sometimes + ** give false-positive warnings about these accesses because the tools do not + ** account for the double-read and the memory barrier. The use of mutexes + ** here would be problematic as the memory being accessed is potentially + ** shared among multiple processes and not all mutex implementions work + ** reliably in that environment. + */ + aHdr = walIndexHdr(pWal); + memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); /* Possible TSAN false-positive */ + walShmBarrier(pWal); + memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); + + if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ + return 1; /* Dirty read */ + } + if( h1.isInit==0 ){ + return 1; /* Malformed header - probably all zeros */ + } + walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); + if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){ + return 1; /* Checksum does not match */ + } + + if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){ + *pChanged = 1; + memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr)); + pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); + testcase( pWal->szPage<=32768 ); + testcase( pWal->szPage>=65536 ); + } + + /* The header was successfully read. Return zero. */ + return 0; +} + +/* +** This is the value that walTryBeginRead returns when it needs to +** be retried. +*/ +#define WAL_RETRY (-1) + +/* +** Read the wal-index header from the wal-index and into pWal->hdr. +** If the wal-header appears to be corrupt, try to reconstruct the +** wal-index from the WAL before returning. +** +** Set *pChanged to 1 if the wal-index header value in pWal->hdr is +** changed by this operation. If pWal->hdr is unchanged, set *pChanged +** to 0. +** +** If the wal-index header is successfully read, return SQLITE_OK. +** Otherwise an SQLite error code. +*/ +static int walIndexReadHdr(Wal *pWal, int *pChanged){ + int rc; /* Return code */ + int badHdr; /* True if a header read failed */ + volatile u32 *page0; /* Chunk of wal-index containing header */ + + /* Ensure that page 0 of the wal-index (the page that contains the + ** wal-index header) is mapped. Return early if an error occurs here. + */ + assert( pChanged ); + rc = walIndexPage(pWal, 0, &page0); + if( rc!=SQLITE_OK ){ + assert( rc!=SQLITE_READONLY ); /* READONLY changed to OK in walIndexPage */ + if( rc==SQLITE_READONLY_CANTINIT ){ + /* The SQLITE_READONLY_CANTINIT return means that the shared-memory + ** was openable but is not writable, and this thread is unable to + ** confirm that another write-capable connection has the shared-memory + ** open, and hence the content of the shared-memory is unreliable, + ** since the shared-memory might be inconsistent with the WAL file + ** and there is no writer on hand to fix it. */ + assert( page0==0 ); + assert( pWal->writeLock==0 ); + assert( pWal->readOnly & WAL_SHM_RDONLY ); + pWal->bShmUnreliable = 1; + pWal->exclusiveMode = WAL_HEAPMEMORY_MODE; + *pChanged = 1; + }else{ + return rc; /* Any other non-OK return is just an error */ + } + }else{ + /* page0 can be NULL if the SHM is zero bytes in size and pWal->writeLock + ** is zero, which prevents the SHM from growing */ + testcase( page0!=0 ); + } + assert( page0!=0 || pWal->writeLock==0 ); + + /* If the first page of the wal-index has been mapped, try to read the + ** wal-index header immediately, without holding any lock. This usually + ** works, but may fail if the wal-index header is corrupt or currently + ** being modified by another thread or process. + */ + badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); + + /* If the first attempt failed, it might have been due to a race + ** with a writer. So get a WRITE lock and try again. + */ + if( badHdr ){ + if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){ + if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ + walUnlockShared(pWal, WAL_WRITE_LOCK); + rc = SQLITE_READONLY_RECOVERY; + } + }else{ + int bWriteLock = pWal->writeLock; + if( bWriteLock || SQLITE_OK==(rc = walLockWriter(pWal)) ){ + pWal->writeLock = 1; + if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ + badHdr = walIndexTryHdr(pWal, pChanged); + if( badHdr ){ + /* If the wal-index header is still malformed even while holding + ** a WRITE lock, it can only mean that the header is corrupted and + ** needs to be reconstructed. So run recovery to do exactly that. + */ + rc = walIndexRecover(pWal); + *pChanged = 1; + } + } + if( bWriteLock==0 ){ + pWal->writeLock = 0; + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + } + } + } + } + + /* If the header is read successfully, check the version number to make + ** sure the wal-index was not constructed with some future format that + ** this version of SQLite cannot understand. + */ + if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ + rc = SQLITE_CANTOPEN_BKPT; + } + if( pWal->bShmUnreliable ){ + if( rc!=SQLITE_OK ){ + walIndexClose(pWal, 0); + pWal->bShmUnreliable = 0; + assert( pWal->nWiData>0 && pWal->apWiData[0]==0 ); + /* walIndexRecover() might have returned SHORT_READ if a concurrent + ** writer truncated the WAL out from under it. If that happens, it + ** indicates that a writer has fixed the SHM file for us, so retry */ + if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY; + } + pWal->exclusiveMode = WAL_NORMAL_MODE; + } + + return rc; +} + +/* +** Open a transaction in a connection where the shared-memory is read-only +** and where we cannot verify that there is a separate write-capable connection +** on hand to keep the shared-memory up-to-date with the WAL file. +** +** This can happen, for example, when the shared-memory is implemented by +** memory-mapping a *-shm file, where a prior writer has shut down and +** left the *-shm file on disk, and now the present connection is trying +** to use that database but lacks write permission on the *-shm file. +** Other scenarios are also possible, depending on the VFS implementation. +** +** Precondition: +** +** The *-wal file has been read and an appropriate wal-index has been +** constructed in pWal->apWiData[] using heap memory instead of shared +** memory. +** +** If this function returns SQLITE_OK, then the read transaction has +** been successfully opened. In this case output variable (*pChanged) +** is set to true before returning if the caller should discard the +** contents of the page cache before proceeding. Or, if it returns +** WAL_RETRY, then the heap memory wal-index has been discarded and +** the caller should retry opening the read transaction from the +** beginning (including attempting to map the *-shm file). +** +** If an error occurs, an SQLite error code is returned. +*/ +static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ + i64 szWal; /* Size of wal file on disk in bytes */ + i64 iOffset; /* Current offset when reading wal file */ + u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int szFrame; /* Number of bytes in buffer aFrame[] */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + volatile void *pDummy; /* Dummy argument for xShmMap */ + int rc; /* Return code */ + u32 aSaveCksum[2]; /* Saved copy of pWal->hdr.aFrameCksum */ + + assert( pWal->bShmUnreliable ); + assert( pWal->readOnly & WAL_SHM_RDONLY ); + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + + /* Take WAL_READ_LOCK(0). This has the effect of preventing any + ** writers from running a checkpoint, but does not stop them + ** from running recovery. */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_BUSY ) rc = WAL_RETRY; + goto begin_unreliable_shm_out; + } + pWal->readLock = 0; + + /* Check to see if a separate writer has attached to the shared-memory area, + ** thus making the shared-memory "reliable" again. Do this by invoking + ** the xShmMap() routine of the VFS and looking to see if the return + ** is SQLITE_READONLY instead of SQLITE_READONLY_CANTINIT. + ** + ** If the shared-memory is now "reliable" return WAL_RETRY, which will + ** cause the heap-memory WAL-index to be discarded and the actual + ** shared memory to be used in its place. + ** + ** This step is important because, even though this connection is holding + ** the WAL_READ_LOCK(0) which prevents a checkpoint, a writer might + ** have already checkpointed the WAL file and, while the current + ** is active, wrap the WAL and start overwriting frames that this + ** process wants to use. + ** + ** Once sqlite3OsShmMap() has been called for an sqlite3_file and has + ** returned any SQLITE_READONLY value, it must return only SQLITE_READONLY + ** or SQLITE_READONLY_CANTINIT or some error for all subsequent invocations, + ** even if some external agent does a "chmod" to make the shared-memory + ** writable by us, until sqlite3OsShmUnmap() has been called. + ** This is a requirement on the VFS implementation. + */ + rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); + assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */ + if( rc!=SQLITE_READONLY_CANTINIT ){ + rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc); + goto begin_unreliable_shm_out; + } + + /* We reach this point only if the real shared-memory is still unreliable. + ** Assume the in-memory WAL-index substitute is correct and load it + ** into pWal->hdr. + */ + memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); + + /* Make sure some writer hasn't come in and changed the WAL file out + ** from under us, then disconnected, while we were not looking. + */ + rc = sqlite3OsFileSize(pWal->pWalFd, &szWal); + if( rc!=SQLITE_OK ){ + goto begin_unreliable_shm_out; + } + if( szWalhdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY); + goto begin_unreliable_shm_out; + } + + /* Check the salt keys at the start of the wal file still match. */ + rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + goto begin_unreliable_shm_out; + } + if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){ + /* Some writer has wrapped the WAL file while we were not looking. + ** Return WAL_RETRY which will cause the in-memory WAL-index to be + ** rebuilt. */ + rc = WAL_RETRY; + goto begin_unreliable_shm_out; + } + + /* Allocate a buffer to read frames into */ + assert( (pWal->szPage & (pWal->szPage-1))==0 ); + assert( pWal->szPage>=512 && pWal->szPage<=65536 ); + szFrame = pWal->szPage + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc64(szFrame); + if( aFrame==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto begin_unreliable_shm_out; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + + /* Check to see if a complete transaction has been appended to the + ** wal file since the heap-memory wal-index was created. If so, the + ** heap-memory wal-index is discarded and WAL_RETRY returned to + ** the caller. */ + aSaveCksum[0] = pWal->hdr.aFrameCksum[0]; + aSaveCksum[1] = pWal->hdr.aFrameCksum[1]; + for(iOffset=walFrameOffset(pWal->hdr.mxFrame+1, pWal->szPage); + iOffset+szFrame<=szWal; + iOffset+=szFrame + ){ + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ) break; + if( !walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame) ) break; + + /* If nTruncate is non-zero, then a complete transaction has been + ** appended to this wal file. Set rc to WAL_RETRY and break out of + ** the loop. */ + if( nTruncate ){ + rc = WAL_RETRY; + break; + } + } + pWal->hdr.aFrameCksum[0] = aSaveCksum[0]; + pWal->hdr.aFrameCksum[1] = aSaveCksum[1]; + + begin_unreliable_shm_out: + sqlite3_free(aFrame); + if( rc!=SQLITE_OK ){ + int i; + for(i=0; inWiData; i++){ + sqlite3_free((void*)pWal->apWiData[i]); + pWal->apWiData[i] = 0; + } + pWal->bShmUnreliable = 0; + sqlite3WalEndReadTransaction(pWal); + *pChanged = 1; + } + return rc; +} + +/* +** Attempt to start a read transaction. This might fail due to a race or +** other transient condition. When that happens, it returns WAL_RETRY to +** indicate to the caller that it is safe to retry immediately. +** +** On success return SQLITE_OK. On a permanent failure (such an +** I/O error or an SQLITE_BUSY because another process is running +** recovery) return a positive error code. +** +** The useWal parameter is true to force the use of the WAL and disable +** the case where the WAL is bypassed because it has been completely +** checkpointed. If useWal==0 then this routine calls walIndexReadHdr() +** to make a copy of the wal-index header into pWal->hdr. If the +** wal-index header has changed, *pChanged is set to 1 (as an indication +** to the caller that the local page cache is obsolete and needs to be +** flushed.) When useWal==1, the wal-index header is assumed to already +** be loaded and the pChanged parameter is unused. +** +** The caller must set the cnt parameter to the number of prior calls to +** this routine during the current read attempt that returned WAL_RETRY. +** This routine will start taking more aggressive measures to clear the +** race conditions after multiple WAL_RETRY returns, and after an excessive +** number of errors will ultimately return SQLITE_PROTOCOL. The +** SQLITE_PROTOCOL return indicates that some other process has gone rogue +** and is not honoring the locking protocol. There is a vanishingly small +** chance that SQLITE_PROTOCOL could be returned because of a run of really +** bad luck when there is lots of contention for the wal-index, but that +** possibility is so small that it can be safely neglected, we believe. +** +** On success, this routine obtains a read lock on +** WAL_READ_LOCK(pWal->readLock). The pWal->readLock integer is +** in the range 0 <= pWal->readLock < WAL_NREADER. If pWal->readLock==(-1) +** that means the Wal does not hold any read lock. The reader must not +** access any database page that is modified by a WAL frame up to and +** including frame number aReadMark[pWal->readLock]. The reader will +** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0 +** Or if pWal->readLock==0, then the reader will ignore the WAL +** completely and get all content directly from the database file. +** If the useWal parameter is 1 then the WAL will never be ignored and +** this routine will always set pWal->readLock>0 on success. +** When the read transaction is completed, the caller must release the +** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1. +** +** This routine uses the nBackfill and aReadMark[] fields of the header +** to select a particular WAL_READ_LOCK() that strives to let the +** checkpoint process do as much work as possible. This routine might +** update values of the aReadMark[] array in the header, but if it does +** so it takes care to hold an exclusive lock on the corresponding +** WAL_READ_LOCK() while changing values. +*/ +static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ + volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ + u32 mxReadMark; /* Largest aReadMark[] value */ + int mxI; /* Index of largest aReadMark[] value */ + int i; /* Loop counter */ + int rc = SQLITE_OK; /* Return code */ + u32 mxFrame; /* Wal frame to lock to */ + + assert( pWal->readLock<0 ); /* Not currently locked */ + + /* useWal may only be set for read/write connections */ + assert( (pWal->readOnly & WAL_SHM_RDONLY)==0 || useWal==0 ); + + /* Take steps to avoid spinning forever if there is a protocol error. + ** + ** Circumstances that cause a RETRY should only last for the briefest + ** instances of time. No I/O or other system calls are done while the + ** locks are held, so the locks should not be held for very long. But + ** if we are unlucky, another process that is holding a lock might get + ** paged out or take a page-fault that is time-consuming to resolve, + ** during the few nanoseconds that it is holding the lock. In that case, + ** it might take longer than normal for the lock to free. + ** + ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few + ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this + ** is more of a scheduler yield than an actual delay. But on the 10th + ** an subsequent retries, the delays start becoming longer and longer, + ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. + ** The total delay time before giving up is less than 10 seconds. + */ + if( cnt>5 ){ + int nDelay = 1; /* Pause time in microseconds */ + if( cnt>100 ){ + VVA_ONLY( pWal->lockError = 1; ) + return SQLITE_PROTOCOL; + } + if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; + sqlite3OsSleep(pWal->pVfs, nDelay); + } + + if( !useWal ){ + assert( rc==SQLITE_OK ); + if( pWal->bShmUnreliable==0 ){ + rc = walIndexReadHdr(pWal, pChanged); + } + if( rc==SQLITE_BUSY ){ + /* If there is not a recovery running in another thread or process + ** then convert BUSY errors to WAL_RETRY. If recovery is known to + ** be running, convert BUSY to BUSY_RECOVERY. There is a race here + ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY + ** would be technically correct. But the race is benign since with + ** WAL_RETRY this routine will be called again and will probably be + ** right on the second iteration. + */ + if( pWal->apWiData[0]==0 ){ + /* This branch is taken when the xShmMap() method returns SQLITE_BUSY. + ** We assume this is a transient condition, so return WAL_RETRY. The + ** xShmMap() implementation used by the default unix and win32 VFS + ** modules may return SQLITE_BUSY due to a race condition in the + ** code that determines whether or not the shared-memory region + ** must be zeroed before the requested page is returned. + */ + rc = WAL_RETRY; + }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){ + walUnlockShared(pWal, WAL_RECOVER_LOCK); + rc = WAL_RETRY; + }else if( rc==SQLITE_BUSY ){ + rc = SQLITE_BUSY_RECOVERY; + } + } + if( rc!=SQLITE_OK ){ + return rc; + } + else if( pWal->bShmUnreliable ){ + return walBeginShmUnreliable(pWal, pChanged); + } + } + + assert( pWal->nWiData>0 ); + assert( pWal->apWiData[0]!=0 ); + pInfo = walCkptInfo(pWal); + if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame +#ifdef SQLITE_ENABLE_SNAPSHOT + && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0) +#endif + ){ + /* The WAL has been completely backfilled (or it is empty). + ** and can be safely ignored. + */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + walShmBarrier(pWal); + if( rc==SQLITE_OK ){ + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ + /* It is not safe to allow the reader to continue here if frames + ** may have been appended to the log before READ_LOCK(0) was obtained. + ** When holding READ_LOCK(0), the reader ignores the entire log file, + ** which implies that the database file contains a trustworthy + ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from + ** happening, this is usually correct. + ** + ** However, if frames have been appended to the log (or if the log + ** is wrapped and written for that matter) before the READ_LOCK(0) + ** is obtained, that is not necessarily true. A checkpointer may + ** have started to backfill the appended frames but crashed before + ** it finished. Leaving a corrupt image in the database file. + */ + walUnlockShared(pWal, WAL_READ_LOCK(0)); + return WAL_RETRY; + } + pWal->readLock = 0; + return SQLITE_OK; + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + + /* If we get this far, it means that the reader will want to use + ** the WAL to get at content from recent commits. The job now is + ** to select one of the aReadMark[] entries that is closest to + ** but not exceeding pWal->hdr.mxFrame and lock that entry. + */ + mxReadMark = 0; + mxI = 0; + mxFrame = pWal->hdr.mxFrame; +#ifdef SQLITE_ENABLE_SNAPSHOT + if( pWal->pSnapshot && pWal->pSnapshot->mxFramepSnapshot->mxFrame; + } +#endif + for(i=1; iaReadMark+i); + if( mxReadMark<=thisMark && thisMark<=mxFrame ){ + assert( thisMark!=READMARK_NOT_USED ); + mxReadMark = thisMark; + mxI = i; + } + } + if( (pWal->readOnly & WAL_SHM_RDONLY)==0 + && (mxReadMarkaReadMark+i,mxFrame); + mxReadMark = mxFrame; + mxI = i; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + break; + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + } + if( mxI==0 ){ + assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); + return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; + } + + rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); + if( rc ){ + return rc==SQLITE_BUSY ? WAL_RETRY : rc; + } + /* Now that the read-lock has been obtained, check that neither the + ** value in the aReadMark[] array or the contents of the wal-index + ** header have changed. + ** + ** It is necessary to check that the wal-index header did not change + ** between the time it was read and when the shared-lock was obtained + ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility + ** that the log file may have been wrapped by a writer, or that frames + ** that occur later in the log than pWal->hdr.mxFrame may have been + ** copied into the database by a checkpointer. If either of these things + ** happened, then reading the database with the current value of + ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry + ** instead. + ** + ** Before checking that the live wal-index header has not changed + ** since it was read, set Wal.minFrame to the first frame in the wal + ** file that has not yet been checkpointed. This client will not need + ** to read any frames earlier than minFrame from the wal file - they + ** can be safely read directly from the database file. + ** + ** Because a ShmBarrier() call is made between taking the copy of + ** nBackfill and checking that the wal-header in shared-memory still + ** matches the one cached in pWal->hdr, it is guaranteed that the + ** checkpointer that set nBackfill was not working with a wal-index + ** header newer than that cached in pWal->hdr. If it were, that could + ** cause a problem. The checkpointer could omit to checkpoint + ** a version of page X that lies before pWal->minFrame (call that version + ** A) on the basis that there is a newer version (version B) of the same + ** page later in the wal file. But if version B happens to like past + ** frame pWal->hdr.mxFrame - then the client would incorrectly assume + ** that it can read version A from the database file. However, since + ** we can guarantee that the checkpointer that set nBackfill could not + ** see any pages past pWal->hdr.mxFrame, this problem does not come up. + */ + pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; + walShmBarrier(pWal); + if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark + || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) + ){ + walUnlockShared(pWal, WAL_READ_LOCK(mxI)); + return WAL_RETRY; + }else{ + assert( mxReadMark<=pWal->hdr.mxFrame ); + pWal->readLock = (i16)mxI; + } + return rc; +} + +#ifdef SQLITE_ENABLE_SNAPSHOT +/* +** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted +** variable so that older snapshots can be accessed. To do this, loop +** through all wal frames from nBackfillAttempted to (nBackfill+1), +** comparing their content to the corresponding page with the database +** file, if any. Set nBackfillAttempted to the frame number of the +** first frame for which the wal file content matches the db file. +** +** This is only really safe if the file-system is such that any page +** writes made by earlier checkpointers were atomic operations, which +** is not always true. It is also possible that nBackfillAttempted +** may be left set to a value larger than expected, if a wal frame +** contains content that duplicate of an earlier version of the same +** page. +** +** SQLITE_OK is returned if successful, or an SQLite error code if an +** error occurs. It is not an error if nBackfillAttempted cannot be +** decreased at all. +*/ +SQLITE_PRIVATE int sqlite3WalSnapshotRecover(Wal *pWal){ + int rc; + + assert( pWal->readLock>=0 ); + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); + if( rc==SQLITE_OK ){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + int szPage = (int)pWal->szPage; + i64 szDb; /* Size of db file in bytes */ + + rc = sqlite3OsFileSize(pWal->pDbFd, &szDb); + if( rc==SQLITE_OK ){ + void *pBuf1 = sqlite3_malloc(szPage); + void *pBuf2 = sqlite3_malloc(szPage); + if( pBuf1==0 || pBuf2==0 ){ + rc = SQLITE_NOMEM; + }else{ + u32 i = pInfo->nBackfillAttempted; + for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){ + WalHashLoc sLoc; /* Hash table location */ + u32 pgno; /* Page number in db file */ + i64 iDbOff; /* Offset of db file entry */ + i64 iWalOff; /* Offset of wal file entry */ + + rc = walHashGet(pWal, walFramePage(i), &sLoc); + if( rc!=SQLITE_OK ) break; + assert( i - sLoc.iZero - 1 >=0 ); + pgno = sLoc.aPgno[i-sLoc.iZero-1]; + iDbOff = (i64)(pgno-1) * szPage; + + if( iDbOff+szPage<=szDb ){ + iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE; + rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff); + + if( rc==SQLITE_OK ){ + rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff); + } + + if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){ + break; + } + } + + pInfo->nBackfillAttempted = i-1; + } + } + + sqlite3_free(pBuf1); + sqlite3_free(pBuf2); + } + walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); + } + + return rc; +} +#endif /* SQLITE_ENABLE_SNAPSHOT */ + +/* +** Begin a read transaction on the database. +** +** This routine used to be called sqlite3OpenSnapshot() and with good reason: +** it takes a snapshot of the state of the WAL and wal-index for the current +** instant in time. The current thread will continue to use this snapshot. +** Other threads might append new content to the WAL and wal-index but +** that extra content is ignored by the current thread. +** +** If the database contents have changes since the previous read +** transaction, then *pChanged is set to 1 before returning. The +** Pager layer will use this to know that its cache is stale and +** needs to be flushed. +*/ +SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ + int rc; /* Return code */ + int cnt = 0; /* Number of TryBeginRead attempts */ +#ifdef SQLITE_ENABLE_SNAPSHOT + int bChanged = 0; + WalIndexHdr *pSnapshot = pWal->pSnapshot; +#endif + + assert( pWal->ckptLock==0 ); + +#ifdef SQLITE_ENABLE_SNAPSHOT + if( pSnapshot ){ + if( memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ + bChanged = 1; + } + + /* It is possible that there is a checkpointer thread running + ** concurrent with this code. If this is the case, it may be that the + ** checkpointer has already determined that it will checkpoint + ** snapshot X, where X is later in the wal file than pSnapshot, but + ** has not yet set the pInfo->nBackfillAttempted variable to indicate + ** its intent. To avoid the race condition this leads to, ensure that + ** there is no checkpointer process by taking a shared CKPT lock + ** before checking pInfo->nBackfillAttempted. */ + (void)walEnableBlocking(pWal); + rc = walLockShared(pWal, WAL_CKPT_LOCK); + walDisableBlocking(pWal); + + if( rc!=SQLITE_OK ){ + return rc; + } + pWal->ckptLock = 1; + } +#endif + + do{ + rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); + }while( rc==WAL_RETRY ); + testcase( (rc&0xff)==SQLITE_BUSY ); + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); + +#ifdef SQLITE_ENABLE_SNAPSHOT + if( rc==SQLITE_OK ){ + if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ + /* At this point the client has a lock on an aReadMark[] slot holding + ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr + ** is populated with the wal-index header corresponding to the head + ** of the wal file. Verify that pSnapshot is still valid before + ** continuing. Reasons why pSnapshot might no longer be valid: + ** + ** (1) The WAL file has been reset since the snapshot was taken. + ** In this case, the salt will have changed. + ** + ** (2) A checkpoint as been attempted that wrote frames past + ** pSnapshot->mxFrame into the database file. Note that the + ** checkpoint need not have completed for this to cause problems. + */ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + + assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); + assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); + + /* Check that the wal file has not been wrapped. Assuming that it has + ** not, also check that no checkpointer has attempted to checkpoint any + ** frames beyond pSnapshot->mxFrame. If either of these conditions are + ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr + ** with *pSnapshot and set *pChanged as appropriate for opening the + ** snapshot. */ + if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) + && pSnapshot->mxFrame>=pInfo->nBackfillAttempted + ){ + assert( pWal->readLock>0 ); + memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr)); + *pChanged = bChanged; + }else{ + rc = SQLITE_ERROR_SNAPSHOT; + } + + /* A client using a non-current snapshot may not ignore any frames + ** from the start of the wal file. This is because, for a system + ** where (minFrame < iSnapshot < maxFrame), a checkpointer may + ** have omitted to checkpoint a frame earlier than minFrame in + ** the file because there exists a frame after iSnapshot that + ** is the same database page. */ + pWal->minFrame = 1; + + if( rc!=SQLITE_OK ){ + sqlite3WalEndReadTransaction(pWal); + } + } + } + + /* Release the shared CKPT lock obtained above. */ + if( pWal->ckptLock ){ + assert( pSnapshot ); + walUnlockShared(pWal, WAL_CKPT_LOCK); + pWal->ckptLock = 0; + } +#endif + return rc; +} + +/* +** Finish with a read transaction. All this does is release the +** read-lock. +*/ +SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){ + sqlite3WalEndWriteTransaction(pWal); + if( pWal->readLock>=0 ){ + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->readLock = -1; + } +} + +/* +** Search the wal file for page pgno. If found, set *piRead to the frame that +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead +** to zero. +** +** Return SQLITE_OK if successful, or an error code if an error occurs. If an +** error does occur, the final value of *piRead is undefined. +*/ +SQLITE_PRIVATE int sqlite3WalFindFrame( + Wal *pWal, /* WAL handle */ + Pgno pgno, /* Database page number to read data for */ + u32 *piRead /* OUT: Frame number (or zero) */ +){ + u32 iRead = 0; /* If !=0, WAL frame to return data from */ + u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ + int iHash; /* Used to loop through N hash tables */ + int iMinHash; + + /* This routine is only be called from within a read transaction. */ + assert( pWal->readLock>=0 || pWal->lockError ); + + /* If the "last page" field of the wal-index header snapshot is 0, then + ** no data will be read from the wal under any circumstances. Return early + ** in this case as an optimization. Likewise, if pWal->readLock==0, + ** then the WAL is ignored by the reader so return early, as if the + ** WAL were empty. + */ + if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ + *piRead = 0; + return SQLITE_OK; + } + + /* Search the hash table or tables for an entry matching page number + ** pgno. Each iteration of the following for() loop searches one + ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). + ** + ** This code might run concurrently to the code in walIndexAppend() + ** that adds entries to the wal-index (and possibly to this hash + ** table). This means the value just read from the hash + ** slot (aHash[iKey]) may have been added before or after the + ** current read transaction was opened. Values added after the + ** read transaction was opened may have been written incorrectly - + ** i.e. these slots may contain garbage data. However, we assume + ** that any slots written before the current read transaction was + ** opened remain unmodified. + ** + ** For the reasons above, the if(...) condition featured in the inner + ** loop of the following block is more stringent that would be required + ** if we had exclusive access to the hash-table: + ** + ** (aPgno[iFrame]==pgno): + ** This condition filters out normal hash-table collisions. + ** + ** (iFrame<=iLast): + ** This condition filters out entries that were added to the hash + ** table after the current read-transaction had started. + */ + iMinHash = walFramePage(pWal->minFrame); + for(iHash=walFramePage(iLast); iHash>=iMinHash; iHash--){ + WalHashLoc sLoc; /* Hash table location */ + int iKey; /* Hash slot index */ + int nCollide; /* Number of hash collisions remaining */ + int rc; /* Error code */ + u32 iH; + + rc = walHashGet(pWal, iHash, &sLoc); + if( rc!=SQLITE_OK ){ + return rc; + } + nCollide = HASHTABLE_NSLOT; + iKey = walHash(pgno); + while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){ + u32 iFrame = iH + sLoc.iZero; + if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){ + assert( iFrame>iRead || CORRUPT_DB ); + iRead = iFrame; + } + if( (nCollide--)==0 ){ + return SQLITE_CORRUPT_BKPT; + } + iKey = walNextHash(iKey); + } + if( iRead ) break; + } + +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + /* If expensive assert() statements are available, do a linear search + ** of the wal-index file content. Make sure the results agree with the + ** result obtained using the hash indexes above. */ + { + u32 iRead2 = 0; + u32 iTest; + assert( pWal->bShmUnreliable || pWal->minFrame>0 ); + for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){ + if( walFramePgno(pWal, iTest)==pgno ){ + iRead2 = iTest; + break; + } + } + assert( iRead==iRead2 ); + } +#endif + + *piRead = iRead; + return SQLITE_OK; +} + +/* +** Read the contents of frame iRead from the wal file into buffer pOut +** (which is nOut bytes in size). Return SQLITE_OK if successful, or an +** error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3WalReadFrame( + Wal *pWal, /* WAL handle */ + u32 iRead, /* Frame to read */ + int nOut, /* Size of buffer pOut in bytes */ + u8 *pOut /* Buffer to write page data to */ +){ + int sz; + i64 iOffset; + sz = pWal->hdr.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + testcase( sz<=32768 ); + testcase( sz>=65536 ); + iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ + return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); +} + +/* +** Return the size of the database in pages (or zero, if unknown). +*/ +SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal){ + if( pWal && ALWAYS(pWal->readLock>=0) ){ + return pWal->hdr.nPage; + } + return 0; +} + + +/* +** This function starts a write transaction on the WAL. +** +** A read transaction must have already been started by a prior call +** to sqlite3WalBeginReadTransaction(). +** +** If another thread or process has written into the database since +** the read transaction was started, then it is not possible for this +** thread to write as doing so would cause a fork. So this routine +** returns SQLITE_BUSY in that case and no write transaction is started. +** +** There can only be a single writer active at a time. +*/ +SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){ + int rc; + +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + /* If the write-lock is already held, then it was obtained before the + ** read-transaction was even opened, making this call a no-op. + ** Return early. */ + if( pWal->writeLock ){ + assert( !memcmp(&pWal->hdr,(void *)walIndexHdr(pWal),sizeof(WalIndexHdr)) ); + return SQLITE_OK; + } +#endif + + /* Cannot start a write transaction without first holding a read + ** transaction. */ + assert( pWal->readLock>=0 ); + assert( pWal->writeLock==0 && pWal->iReCksum==0 ); + + if( pWal->readOnly ){ + return SQLITE_READONLY; + } + + /* Only one writer allowed at a time. Get the write lock. Return + ** SQLITE_BUSY if unable. + */ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( rc ){ + return rc; + } + pWal->writeLock = 1; + + /* If another connection has written to the database file since the + ** time the read transaction on this connection was started, then + ** the write is disallowed. + */ + if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + rc = SQLITE_BUSY_SNAPSHOT; + } + + return rc; +} + +/* +** End a write transaction. The commit has already been done. This +** routine merely releases the lock. +*/ +SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal){ + if( pWal->writeLock ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + pWal->iReCksum = 0; + pWal->truncateOnCommit = 0; + } + return SQLITE_OK; +} + +/* +** If any data has been written (but not committed) to the log file, this +** function moves the write-pointer back to the start of the transaction. +** +** Additionally, the callback function is invoked for each frame written +** to the WAL since the start of the transaction. If the callback returns +** other than SQLITE_OK, it is not invoked again and the error code is +** returned to the caller. +** +** Otherwise, if the callback function does not return an error, this +** function returns SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ + int rc = SQLITE_OK; + if( ALWAYS(pWal->writeLock) ){ + Pgno iMax = pWal->hdr.mxFrame; + Pgno iFrame; + + /* Restore the clients cache of the wal-index header to the state it + ** was in before the client began writing to the database. + */ + memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); + + for(iFrame=pWal->hdr.mxFrame+1; + ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; + iFrame++ + ){ + /* This call cannot fail. Unless the page for which the page number + ** is passed as the second argument is (a) in the cache and + ** (b) has an outstanding reference, then xUndo is either a no-op + ** (if (a) is false) or simply expels the page from the cache (if (b) + ** is false). + ** + ** If the upper layer is doing a rollback, it is guaranteed that there + ** are no outstanding references to any page other than page 1. And + ** page 1 is never written to the log until the transaction is + ** committed. As a result, the call to xUndo may not fail. + */ + assert( walFramePgno(pWal, iFrame)!=1 ); + rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); + } + if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); + } + return rc; +} + +/* +** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 +** values. This function populates the array with values required to +** "rollback" the write position of the WAL handle back to the current +** point in the event of a savepoint rollback (via WalSavepointUndo()). +*/ +SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ + assert( pWal->writeLock ); + aWalData[0] = pWal->hdr.mxFrame; + aWalData[1] = pWal->hdr.aFrameCksum[0]; + aWalData[2] = pWal->hdr.aFrameCksum[1]; + aWalData[3] = pWal->nCkpt; +} + +/* +** Move the write position of the WAL back to the point identified by +** the values in the aWalData[] array. aWalData must point to an array +** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated +** by a call to WalSavepoint(). +*/ +SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ + int rc = SQLITE_OK; + + assert( pWal->writeLock ); + assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); + + if( aWalData[3]!=pWal->nCkpt ){ + /* This savepoint was opened immediately after the write-transaction + ** was started. Right after that, the writer decided to wrap around + ** to the start of the log. Update the savepoint values to match. + */ + aWalData[0] = 0; + aWalData[3] = pWal->nCkpt; + } + + if( aWalData[0]hdr.mxFrame ){ + pWal->hdr.mxFrame = aWalData[0]; + pWal->hdr.aFrameCksum[0] = aWalData[1]; + pWal->hdr.aFrameCksum[1] = aWalData[2]; + walCleanupHash(pWal); + } + + return rc; +} + +/* +** This function is called just before writing a set of frames to the log +** file (see sqlite3WalFrames()). It checks to see if, instead of appending +** to the current log file, it is possible to overwrite the start of the +** existing log file with the new frames (i.e. "reset" the log). If so, +** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left +** unchanged. +** +** SQLITE_OK is returned if no error is encountered (regardless of whether +** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned +** if an error occurs. +*/ +static int walRestartLog(Wal *pWal){ + int rc = SQLITE_OK; + int cnt; + + if( pWal->readLock==0 ){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + assert( pInfo->nBackfill==pWal->hdr.mxFrame ); + if( pInfo->nBackfill>0 ){ + u32 salt1; + sqlite3_randomness(4, &salt1); + rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + if( rc==SQLITE_OK ){ + /* If all readers are using WAL_READ_LOCK(0) (in other words if no + ** readers are currently using the WAL), then the transactions + ** frames will overwrite the start of the existing log. Update the + ** wal-index header to reflect this. + ** + ** In theory it would be Ok to update the cache of the header only + ** at this point. But updating the actual wal-index header is also + ** safe and means there is no special case for sqlite3WalUndo() + ** to handle if this transaction is rolled back. */ + walRestartHdr(pWal, salt1); + walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + walUnlockShared(pWal, WAL_READ_LOCK(0)); + pWal->readLock = -1; + cnt = 0; + do{ + int notUsed; + rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); + }while( rc==WAL_RETRY ); + assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); + } + return rc; +} + +/* +** Information about the current state of the WAL file and where +** the next fsync should occur - passed from sqlite3WalFrames() into +** walWriteToLog(). +*/ +typedef struct WalWriter { + Wal *pWal; /* The complete WAL information */ + sqlite3_file *pFd; /* The WAL file to which we write */ + sqlite3_int64 iSyncPoint; /* Fsync at this offset */ + int syncFlags; /* Flags for the fsync */ + int szPage; /* Size of one page */ +} WalWriter; + +/* +** Write iAmt bytes of content into the WAL file beginning at iOffset. +** Do a sync when crossing the p->iSyncPoint boundary. +** +** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, +** first write the part before iSyncPoint, then sync, then write the +** rest. +*/ +static int walWriteToLog( + WalWriter *p, /* WAL to write to */ + void *pContent, /* Content to be written */ + int iAmt, /* Number of bytes to write */ + sqlite3_int64 iOffset /* Start writing at this offset */ +){ + int rc; + if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ + int iFirstAmt = (int)(p->iSyncPoint - iOffset); + rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); + if( rc ) return rc; + iOffset += iFirstAmt; + iAmt -= iFirstAmt; + pContent = (void*)(iFirstAmt + (char*)pContent); + assert( WAL_SYNC_FLAGS(p->syncFlags)!=0 ); + rc = sqlite3OsSync(p->pFd, WAL_SYNC_FLAGS(p->syncFlags)); + if( iAmt==0 || rc ) return rc; + } + rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); + return rc; +} + +/* +** Write out a single frame of the WAL +*/ +static int walWriteOneFrame( + WalWriter *p, /* Where to write the frame */ + PgHdr *pPage, /* The page of the frame to be written */ + int nTruncate, /* The commit flag. Usually 0. >0 for commit */ + sqlite3_int64 iOffset /* Byte offset at which to write */ +){ + int rc; /* Result code from subfunctions */ + void *pData; /* Data actually written */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ + pData = pPage->pData; + walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; + /* Write the page data */ + rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); + return rc; +} + +/* +** This function is called as part of committing a transaction within which +** one or more frames have been overwritten. It updates the checksums for +** all frames written to the wal file by the current transaction starting +** with the earliest to have been overwritten. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int walRewriteChecksums(Wal *pWal, u32 iLast){ + const int szPage = pWal->szPage;/* Database page size */ + int rc = SQLITE_OK; /* Return code */ + u8 *aBuf; /* Buffer to load data from wal file into */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ + u32 iRead; /* Next frame to read from wal file */ + i64 iCksumOff; + + aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); + if( aBuf==0 ) return SQLITE_NOMEM_BKPT; + + /* Find the checksum values to use as input for the recalculating the + ** first checksum. If the first frame is frame 1 (implying that the current + ** transaction restarted the wal file), these values must be read from the + ** wal-file header. Otherwise, read them from the frame header of the + ** previous frame. */ + assert( pWal->iReCksum>0 ); + if( pWal->iReCksum==1 ){ + iCksumOff = 24; + }else{ + iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16; + } + rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff); + pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); + pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); + + iRead = pWal->iReCksum; + pWal->iReCksum = 0; + for(; rc==SQLITE_OK && iRead<=iLast; iRead++){ + i64 iOff = walFrameOffset(iRead, szPage); + rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); + if( rc==SQLITE_OK ){ + u32 iPgno, nDbSize; + iPgno = sqlite3Get4byte(aBuf); + nDbSize = sqlite3Get4byte(&aBuf[4]); + + walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); + rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); + } + } + + sqlite3_free(aBuf); + return rc; +} + +/* +** Write a set of frames to the log. The caller must hold the write-lock +** on the log file (obtained using sqlite3WalBeginWriteTransaction()). +*/ +SQLITE_PRIVATE int sqlite3WalFrames( + Wal *pWal, /* Wal handle to write to */ + int szPage, /* Database page-size in bytes */ + PgHdr *pList, /* List of dirty pages to write */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int sync_flags /* Flags to pass to OsSync() (or 0) */ +){ + int rc; /* Used to catch return codes */ + u32 iFrame; /* Next frame address */ + PgHdr *p; /* Iterator to run through pList with. */ + PgHdr *pLast = 0; /* Last frame in list */ + int nExtra = 0; /* Number of extra copies of last page */ + int szFrame; /* The size of a single frame */ + i64 iOffset; /* Next byte to write in WAL file */ + WalWriter w; /* The writer */ + u32 iFirst = 0; /* First frame that may be overwritten */ + WalIndexHdr *pLive; /* Pointer to shared header */ + + assert( pList ); + assert( pWal->writeLock ); + + /* If this frame set completes a transaction, then nTruncate>0. If + ** nTruncate==0 then this frame set does not complete the transaction. */ + assert( (isCommit!=0)==(nTruncate!=0) ); + +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) + { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} + WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", + pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); + } +#endif + + pLive = (WalIndexHdr*)walIndexHdr(pWal); + if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){ + iFirst = pLive->mxFrame+1; + } + + /* See if it is possible to write these frames into the start of the + ** log file, instead of appending to it at pWal->hdr.mxFrame. + */ + if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ + return rc; + } + + /* If this is the first frame written into the log, write the WAL + ** header to the start of the WAL file. See comments at the top of + ** this source file for a description of the WAL header format. + */ + iFrame = pWal->hdr.mxFrame; + if( iFrame==0 ){ + u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ + u32 aCksum[2]; /* Checksum for wal-header */ + + sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); + sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); + sqlite3Put4byte(&aWalHdr[8], szPage); + sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); + if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); + memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); + walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); + sqlite3Put4byte(&aWalHdr[24], aCksum[0]); + sqlite3Put4byte(&aWalHdr[28], aCksum[1]); + + pWal->szPage = szPage; + pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; + pWal->hdr.aFrameCksum[0] = aCksum[0]; + pWal->hdr.aFrameCksum[1] = aCksum[1]; + pWal->truncateOnCommit = 1; + + rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); + WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless + ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise + ** an out-of-order write following a WAL restart could result in + ** database corruption. See the ticket: + ** + ** https://sqlite.org/src/info/ff5be73dee + */ + if( pWal->syncHeader ){ + rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); + if( rc ) return rc; + } + } + assert( (int)pWal->szPage==szPage ); + + /* Setup information needed to write frames into the WAL */ + w.pWal = pWal; + w.pFd = pWal->pWalFd; + w.iSyncPoint = 0; + w.syncFlags = sync_flags; + w.szPage = szPage; + iOffset = walFrameOffset(iFrame+1, szPage); + szFrame = szPage + WAL_FRAME_HDRSIZE; + + /* Write all frames into the log file exactly once */ + for(p=pList; p; p=p->pDirty){ + int nDbSize; /* 0 normally. Positive == commit flag */ + + /* Check if this page has already been written into the wal file by + ** the current transaction. If so, overwrite the existing frame and + ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that + ** checksums must be recomputed when the transaction is committed. */ + if( iFirst && (p->pDirty || isCommit==0) ){ + u32 iWrite = 0; + VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite); + assert( rc==SQLITE_OK || iWrite==0 ); + if( iWrite>=iFirst ){ + i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE; + void *pData; + if( pWal->iReCksum==0 || iWriteiReCksum ){ + pWal->iReCksum = iWrite; + } + pData = p->pData; + rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOff); + if( rc ) return rc; + p->flags &= ~PGHDR_WAL_APPEND; + continue; + } + } + + iFrame++; + assert( iOffset==walFrameOffset(iFrame, szPage) ); + nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; + rc = walWriteOneFrame(&w, p, nDbSize, iOffset); + if( rc ) return rc; + pLast = p; + iOffset += szFrame; + p->flags |= PGHDR_WAL_APPEND; + } + + /* Recalculate checksums within the wal file if required. */ + if( isCommit && pWal->iReCksum ){ + rc = walRewriteChecksums(pWal, iFrame); + if( rc ) return rc; + } + + /* If this is the end of a transaction, then we might need to pad + ** the transaction and/or sync the WAL file. + ** + ** Padding and syncing only occur if this set of frames complete a + ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL + ** or synchronous==OFF, then no padding or syncing are needed. + ** + ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not + ** needed and only the sync is done. If padding is needed, then the + ** final frame is repeated (with its commit mark) until the next sector + ** boundary is crossed. Only the part of the WAL prior to the last + ** sector boundary is synced; the part of the last frame that extends + ** past the sector boundary is written after the sync. + */ + if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){ + int bSync = 1; + if( pWal->padToSectorBoundary ){ + int sectorSize = sqlite3SectorSize(pWal->pWalFd); + w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; + bSync = (w.iSyncPoint==iOffset); + testcase( bSync ); + while( iOffsettruncateOnCommit && pWal->mxWalSize>=0 ){ + i64 sz = pWal->mxWalSize; + if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ + sz = walFrameOffset(iFrame+nExtra+1, szPage); + } + walLimitSize(pWal, sz); + pWal->truncateOnCommit = 0; + } + + /* Append data to the wal-index. It is not necessary to lock the + ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index + ** guarantees that there are no other writers, and no data that may + ** be in use by existing readers is being overwritten. + */ + iFrame = pWal->hdr.mxFrame; + for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ + if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; + iFrame++; + rc = walIndexAppend(pWal, iFrame, p->pgno); + } + assert( pLast!=0 || nExtra==0 ); + while( rc==SQLITE_OK && nExtra>0 ){ + iFrame++; + nExtra--; + rc = walIndexAppend(pWal, iFrame, pLast->pgno); + } + + if( rc==SQLITE_OK ){ + /* Update the private copy of the header. */ + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + pWal->hdr.mxFrame = iFrame; + if( isCommit ){ + pWal->hdr.iChange++; + pWal->hdr.nPage = nTruncate; + } + /* If this is a commit, update the wal-index header too. */ + if( isCommit ){ + walIndexWriteHdr(pWal); + pWal->iCallback = iFrame; + } + } + + WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); + return rc; +} + +/* +** This routine is called to implement sqlite3_wal_checkpoint() and +** related interfaces. +** +** Obtain a CHECKPOINT lock and then backfill as much information as +** we can from WAL into the database. +** +** If parameter xBusy is not NULL, it is a pointer to a busy-handler +** callback. In this case this function runs a blocking checkpoint. +*/ +SQLITE_PRIVATE int sqlite3WalCheckpoint( + Wal *pWal, /* Wal connection */ + sqlite3 *db, /* Check this handle's interrupt flag */ + int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags to sync db file with (or 0) */ + int nBuf, /* Size of temporary buffer */ + u8 *zBuf, /* Temporary buffer to use */ + int *pnLog, /* OUT: Number of frames in WAL */ + int *pnCkpt /* OUT: Number of backfilled frames in WAL */ +){ + int rc; /* Return code */ + int isChanged = 0; /* True if a new wal-index header is loaded */ + int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ + int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ + + assert( pWal->ckptLock==0 ); + assert( pWal->writeLock==0 ); + + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); + + if( pWal->readOnly ) return SQLITE_READONLY; + WALTRACE(("WAL%p: checkpoint begins\n", pWal)); + + /* Enable blocking locks, if possible. If blocking locks are successfully + ** enabled, set xBusy2=0 so that the busy-handler is never invoked. */ + sqlite3WalDb(pWal, db); + (void)walEnableBlocking(pWal); + + /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive + ** "checkpoint" lock on the database file. + ** EVIDENCE-OF: R-10421-19736 If any other process is running a + ** checkpoint operation at the same time, the lock cannot be obtained and + ** SQLITE_BUSY is returned. + ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, + ** it will not be invoked in this case. + */ + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); + testcase( rc==SQLITE_BUSY ); + testcase( rc!=SQLITE_OK && xBusy2!=0 ); + if( rc==SQLITE_OK ){ + pWal->ckptLock = 1; + + /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and + ** TRUNCATE modes also obtain the exclusive "writer" lock on the database + ** file. + ** + ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained + ** immediately, and a busy-handler is configured, it is invoked and the + ** writer lock retried until either the busy-handler returns 0 or the + ** lock is successfully obtained. + */ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + }else if( rc==SQLITE_BUSY ){ + eMode2 = SQLITE_CHECKPOINT_PASSIVE; + xBusy2 = 0; + rc = SQLITE_OK; + } + } + } + + + /* Read the wal-index header. */ + if( rc==SQLITE_OK ){ + walDisableBlocking(pWal); + rc = walIndexReadHdr(pWal, &isChanged); + (void)walEnableBlocking(pWal); + if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ + sqlite3OsUnfetch(pWal->pDbFd, 0, 0); + } + } + + /* Copy data from the log to the database file. */ + if( rc==SQLITE_OK ){ + + if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); + } + + /* If no error occurred, set the output variables. */ + if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ + if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; + if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); + } + } + + if( isChanged ){ + /* If a new wal-index header was loaded before the checkpoint was + ** performed, then the pager-cache associated with pWal is now + ** out of date. So zero the cached wal-index header to ensure that + ** next time the pager opens a snapshot on this database it knows that + ** the cache needs to be reset. + */ + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + } + + walDisableBlocking(pWal); + sqlite3WalDb(pWal, 0); + + /* Release the locks. */ + sqlite3WalEndWriteTransaction(pWal); + if( pWal->ckptLock ){ + walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); + pWal->ckptLock = 0; + } + WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY; +#endif + return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc); +} + +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal){ + u32 ret = 0; + if( pWal ){ + ret = pWal->iCallback; + pWal->iCallback = 0; + } + return (int)ret; +} + +/* +** This function is called to change the WAL subsystem into or out +** of locking_mode=EXCLUSIVE. +** +** If op is zero, then attempt to change from locking_mode=EXCLUSIVE +** into locking_mode=NORMAL. This means that we must acquire a lock +** on the pWal->readLock byte. If the WAL is already in locking_mode=NORMAL +** or if the acquisition of the lock fails, then return 0. If the +** transition out of exclusive-mode is successful, return 1. This +** operation must occur while the pager is still holding the exclusive +** lock on the main database file. +** +** If op is one, then change from locking_mode=NORMAL into +** locking_mode=EXCLUSIVE. This means that the pWal->readLock must +** be released. Return 1 if the transition is made and 0 if the +** WAL is already in exclusive-locking mode - meaning that this +** routine is a no-op. The pager must already hold the exclusive lock +** on the main database file before invoking this operation. +** +** If op is negative, then do a dry-run of the op==1 case but do +** not actually change anything. The pager uses this to see if it +** should acquire the database exclusive lock prior to invoking +** the op==1 case. +*/ +SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op){ + int rc; + assert( pWal->writeLock==0 ); + assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 ); + + /* pWal->readLock is usually set, but might be -1 if there was a + ** prior error while attempting to acquire are read-lock. This cannot + ** happen if the connection is actually in exclusive mode (as no xShmLock + ** locks are taken in this case). Nor should the pager attempt to + ** upgrade to exclusive-mode following such an error. + */ + assert( pWal->readLock>=0 || pWal->lockError ); + assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) ); + + if( op==0 ){ + if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){ + pWal->exclusiveMode = WAL_NORMAL_MODE; + if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ + pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; + } + rc = pWal->exclusiveMode==WAL_NORMAL_MODE; + }else{ + /* Already in locking_mode=NORMAL */ + rc = 0; + } + }else if( op>0 ){ + assert( pWal->exclusiveMode==WAL_NORMAL_MODE ); + assert( pWal->readLock>=0 ); + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; + rc = 1; + }else{ + rc = pWal->exclusiveMode==WAL_NORMAL_MODE; + } + return rc; +} + +/* +** Return true if the argument is non-NULL and the WAL module is using +** heap-memory for the wal-index. Otherwise, if the argument is NULL or the +** WAL module is using shared-memory, return false. +*/ +SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal){ + return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); +} + +#ifdef SQLITE_ENABLE_SNAPSHOT +/* Create a snapshot object. The content of a snapshot is opaque to +** every other subsystem, so the WAL module can put whatever it needs +** in the object. +*/ +SQLITE_PRIVATE int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){ + int rc = SQLITE_OK; + WalIndexHdr *pRet; + static const u32 aZero[4] = { 0, 0, 0, 0 }; + + assert( pWal->readLock>=0 && pWal->writeLock==0 ); + + if( memcmp(&pWal->hdr.aFrameCksum[0],aZero,16)==0 ){ + *ppSnapshot = 0; + return SQLITE_ERROR; + } + pRet = (WalIndexHdr*)sqlite3_malloc(sizeof(WalIndexHdr)); + if( pRet==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + memcpy(pRet, &pWal->hdr, sizeof(WalIndexHdr)); + *ppSnapshot = (sqlite3_snapshot*)pRet; + } + + return rc; +} + +/* Try to open on pSnapshot when the next read-transaction starts +*/ +SQLITE_PRIVATE void sqlite3WalSnapshotOpen( + Wal *pWal, + sqlite3_snapshot *pSnapshot +){ + pWal->pSnapshot = (WalIndexHdr*)pSnapshot; +} + +/* +** Return a +ve value if snapshot p1 is newer than p2. A -ve value if +** p1 is older than p2 and zero if p1 and p2 are the same snapshot. +*/ +SQLITE_API int sqlite3_snapshot_cmp(sqlite3_snapshot *p1, sqlite3_snapshot *p2){ + WalIndexHdr *pHdr1 = (WalIndexHdr*)p1; + WalIndexHdr *pHdr2 = (WalIndexHdr*)p2; + + /* aSalt[0] is a copy of the value stored in the wal file header. It + ** is incremented each time the wal file is restarted. */ + if( pHdr1->aSalt[0]aSalt[0] ) return -1; + if( pHdr1->aSalt[0]>pHdr2->aSalt[0] ) return +1; + if( pHdr1->mxFramemxFrame ) return -1; + if( pHdr1->mxFrame>pHdr2->mxFrame ) return +1; + return 0; +} + +/* +** The caller currently has a read transaction open on the database. +** This function takes a SHARED lock on the CHECKPOINTER slot and then +** checks if the snapshot passed as the second argument is still +** available. If so, SQLITE_OK is returned. +** +** If the snapshot is not available, SQLITE_ERROR is returned. Or, if +** the CHECKPOINTER lock cannot be obtained, SQLITE_BUSY. If any error +** occurs (any value other than SQLITE_OK is returned), the CHECKPOINTER +** lock is released before returning. +*/ +SQLITE_PRIVATE int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){ + int rc; + rc = walLockShared(pWal, WAL_CKPT_LOCK); + if( rc==SQLITE_OK ){ + WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot; + if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) + || pNew->mxFramenBackfillAttempted + ){ + rc = SQLITE_ERROR_SNAPSHOT; + walUnlockShared(pWal, WAL_CKPT_LOCK); + } + } + return rc; +} + +/* +** Release a lock obtained by an earlier successful call to +** sqlite3WalSnapshotCheck(). +*/ +SQLITE_PRIVATE void sqlite3WalSnapshotUnlock(Wal *pWal){ + assert( pWal ); + walUnlockShared(pWal, WAL_CKPT_LOCK); +} + + +#endif /* SQLITE_ENABLE_SNAPSHOT */ + +#ifdef SQLITE_ENABLE_ZIPVFS +/* +** If the argument is not NULL, it points to a Wal object that holds a +** read-lock. This function returns the database page-size if it is known, +** or zero if it is not (or if pWal is NULL). +*/ +SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ + assert( pWal==0 || pWal->readLock>=0 ); + return (pWal ? pWal->szPage : 0); +} +#endif + +/* Return the sqlite3_file object for the WAL file +*/ +SQLITE_PRIVATE sqlite3_file *sqlite3WalFile(Wal *pWal){ + return pWal->pWalFd; +} + +#endif /* #ifndef SQLITE_OMIT_WAL */ + +/************** End of wal.c *************************************************/ +/************** Begin file btmutex.c *****************************************/ +/* +** 2007 August 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code used to implement mutexes on Btree objects. +** This code really belongs in btree.c. But btree.c is getting too +** big and we want to break it down some. This packaged seemed like +** a good breakout. +*/ +/************** Include btreeInt.h in the middle of btmutex.c ****************/ +/************** Begin file btreeInt.h ****************************************/ +/* +** 2004 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements an external (disk-based) database using BTrees. +** For a detailed discussion of BTrees, refer to +** +** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: +** "Sorting And Searching", pages 473-480. Addison-Wesley +** Publishing Company, Reading, Massachusetts. +** +** The basic idea is that each page of the file contains N database +** entries and N+1 pointers to subpages. +** +** ---------------------------------------------------------------- +** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N-1) | Ptr(N) | +** ---------------------------------------------------------------- +** +** All of the keys on the page that Ptr(0) points to have values less +** than Key(0). All of the keys on page Ptr(1) and its subpages have +** values greater than Key(0) and less than Key(1). All of the keys +** on Ptr(N) and its subpages have values greater than Key(N-1). And +** so forth. +** +** Finding a particular key requires reading O(log(M)) pages from the +** disk where M is the number of entries in the tree. +** +** In this implementation, a single file can hold one or more separate +** BTrees. Each BTree is identified by the index of its root page. The +** key and data for any entry are combined to form the "payload". A +** fixed amount of payload can be carried directly on the database +** page. If the payload is larger than the preset amount then surplus +** bytes are stored on overflow pages. The payload for an entry +** and the preceding pointer are combined to form a "Cell". Each +** page has a small header which contains the Ptr(N) pointer and other +** information such as the size of key and data. +** +** FORMAT DETAILS +** +** The file is divided into pages. The first page is called page 1, +** the second is page 2, and so forth. A page number of zero indicates +** "no such page". The page size can be any power of 2 between 512 and 65536. +** Each page can be either a btree page, a freelist page, an overflow +** page, or a pointer-map page. +** +** The first page is always a btree page. The first 100 bytes of the first +** page contain a special header (the "file header") that describes the file. +** The format of the file header is as follows: +** +** OFFSET SIZE DESCRIPTION +** 0 16 Header string: "SQLite format 3\000" +** 16 2 Page size in bytes. (1 means 65536) +** 18 1 File format write version +** 19 1 File format read version +** 20 1 Bytes of unused space at the end of each page +** 21 1 Max embedded payload fraction (must be 64) +** 22 1 Min embedded payload fraction (must be 32) +** 23 1 Min leaf payload fraction (must be 32) +** 24 4 File change counter +** 28 4 Reserved for future use +** 32 4 First freelist page +** 36 4 Number of freelist pages in the file +** 40 60 15 4-byte meta values passed to higher layers +** +** 40 4 Schema cookie +** 44 4 File format of schema layer +** 48 4 Size of page cache +** 52 4 Largest root-page (auto/incr_vacuum) +** 56 4 1=UTF-8 2=UTF16le 3=UTF16be +** 60 4 User version +** 64 4 Incremental vacuum mode +** 68 4 Application-ID +** 72 20 unused +** 92 4 The version-valid-for number +** 96 4 SQLITE_VERSION_NUMBER +** +** All of the integer values are big-endian (most significant byte first). +** +** The file change counter is incremented when the database is changed +** This counter allows other processes to know when the file has changed +** and thus when they need to flush their cache. +** +** The max embedded payload fraction is the amount of the total usable +** space in a page that can be consumed by a single cell for standard +** B-tree (non-LEAFDATA) tables. A value of 255 means 100%. The default +** is to limit the maximum cell size so that at least 4 cells will fit +** on one page. Thus the default max embedded payload fraction is 64. +** +** If the payload for a cell is larger than the max payload, then extra +** payload is spilled to overflow pages. Once an overflow page is allocated, +** as many bytes as possible are moved into the overflow pages without letting +** the cell size drop below the min embedded payload fraction. +** +** The min leaf payload fraction is like the min embedded payload fraction +** except that it applies to leaf nodes in a LEAFDATA tree. The maximum +** payload fraction for a LEAFDATA tree is always 100% (or 255) and it +** not specified in the header. +** +** Each btree pages is divided into three sections: The header, the +** cell pointer array, and the cell content area. Page 1 also has a 100-byte +** file header that occurs before the page header. +** +** |----------------| +** | file header | 100 bytes. Page 1 only. +** |----------------| +** | page header | 8 bytes for leaves. 12 bytes for interior nodes +** |----------------| +** | cell pointer | | 2 bytes per cell. Sorted order. +** | array | | Grows downward +** | | v +** |----------------| +** | unallocated | +** | space | +** |----------------| ^ Grows upwards +** | cell content | | Arbitrary order interspersed with freeblocks. +** | area | | and free space fragments. +** |----------------| +** +** The page headers looks like this: +** +** OFFSET SIZE DESCRIPTION +** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf +** 1 2 byte offset to the first freeblock +** 3 2 number of cells on this page +** 5 2 first byte of the cell content area +** 7 1 number of fragmented free bytes +** 8 4 Right child (the Ptr(N) value). Omitted on leaves. +** +** The flags define the format of this btree page. The leaf flag means that +** this page has no children. The zerodata flag means that this page carries +** only keys and no data. The intkey flag means that the key is an integer +** which is stored in the key size entry of the cell header rather than in +** the payload area. +** +** The cell pointer array begins on the first byte after the page header. +** The cell pointer array contains zero or more 2-byte numbers which are +** offsets from the beginning of the page to the cell content in the cell +** content area. The cell pointers occur in sorted order. The system strives +** to keep free space after the last cell pointer so that new cells can +** be easily added without having to defragment the page. +** +** Cell content is stored at the very end of the page and grows toward the +** beginning of the page. +** +** Unused space within the cell content area is collected into a linked list of +** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset +** to the first freeblock is given in the header. Freeblocks occur in +** increasing order. Because a freeblock must be at least 4 bytes in size, +** any group of 3 or fewer unused bytes in the cell content area cannot +** exist on the freeblock chain. A group of 3 or fewer free bytes is called +** a fragment. The total number of bytes in all fragments is recorded. +** in the page header at offset 7. +** +** SIZE DESCRIPTION +** 2 Byte offset of the next freeblock +** 2 Bytes in this freeblock +** +** Cells are of variable length. Cells are stored in the cell content area at +** the end of the page. Pointers to the cells are in the cell pointer array +** that immediately follows the page header. Cells is not necessarily +** contiguous or in order, but cell pointers are contiguous and in order. +** +** Cell content makes use of variable length integers. A variable +** length integer is 1 to 9 bytes where the lower 7 bits of each +** byte are used. The integer consists of all bytes that have bit 8 set and +** the first byte with bit 8 clear. The most significant byte of the integer +** appears first. A variable-length integer may not be more than 9 bytes long. +** As a special case, all 8 bytes of the 9th byte are used as data. This +** allows a 64-bit integer to be encoded in 9 bytes. +** +** 0x00 becomes 0x00000000 +** 0x7f becomes 0x0000007f +** 0x81 0x00 becomes 0x00000080 +** 0x82 0x00 becomes 0x00000100 +** 0x80 0x7f becomes 0x0000007f +** 0x8a 0x91 0xd1 0xac 0x78 becomes 0x12345678 +** 0x81 0x81 0x81 0x81 0x01 becomes 0x10204081 +** +** Variable length integers are used for rowids and to hold the number of +** bytes of key and data in a btree cell. +** +** The content of a cell looks like this: +** +** SIZE DESCRIPTION +** 4 Page number of the left child. Omitted if leaf flag is set. +** var Number of bytes of data. Omitted if the zerodata flag is set. +** var Number of bytes of key. Or the key itself if intkey flag is set. +** * Payload +** 4 First page of the overflow chain. Omitted if no overflow +** +** Overflow pages form a linked list. Each page except the last is completely +** filled with data (pagesize - 4 bytes). The last page can have as little +** as 1 byte of data. +** +** SIZE DESCRIPTION +** 4 Page number of next overflow page +** * Data +** +** Freelist pages come in two subtypes: trunk pages and leaf pages. The +** file header points to the first in a linked list of trunk page. Each trunk +** page points to multiple leaf pages. The content of a leaf page is +** unspecified. A trunk page looks like this: +** +** SIZE DESCRIPTION +** 4 Page number of next trunk page +** 4 Number of leaf pointers on this page +** * zero or more pages numbers of leaves +*/ +/* #include "sqliteInt.h" */ + + +/* The following value is the maximum cell size assuming a maximum page +** size give above. +*/ +#define MX_CELL_SIZE(pBt) ((int)(pBt->pageSize-8)) + +/* The maximum number of cells on a single page of the database. This +** assumes a minimum cell size of 6 bytes (4 bytes for the cell itself +** plus 2 bytes for the index to the cell in the page header). Such +** small cells will be rare, but they are possible. +*/ +#define MX_CELL(pBt) ((pBt->pageSize-8)/6) + +/* Forward declarations */ +typedef struct MemPage MemPage; +typedef struct BtLock BtLock; +typedef struct CellInfo CellInfo; + +/* +** This is a magic string that appears at the beginning of every +** SQLite database in order to identify the file as a real database. +** +** You can change this value at compile-time by specifying a +** -DSQLITE_FILE_HEADER="..." on the compiler command-line. The +** header must be exactly 16 bytes including the zero-terminator so +** the string itself should be 15 characters long. If you change +** the header, then your custom library will not be able to read +** databases generated by the standard tools and the standard tools +** will not be able to read databases created by your custom library. +*/ +#ifndef SQLITE_FILE_HEADER /* 123456789 123456 */ +# define SQLITE_FILE_HEADER "SQLite format 3" +#endif + +/* +** Page type flags. An ORed combination of these flags appear as the +** first byte of on-disk image of every BTree page. +*/ +#define PTF_INTKEY 0x01 +#define PTF_ZERODATA 0x02 +#define PTF_LEAFDATA 0x04 +#define PTF_LEAF 0x08 + +/* +** An instance of this object stores information about each a single database +** page that has been loaded into memory. The information in this object +** is derived from the raw on-disk page content. +** +** As each database page is loaded into memory, the pager allocats an +** instance of this object and zeros the first 8 bytes. (This is the +** "extra" information associated with each page of the pager.) +** +** Access to all fields of this structure is controlled by the mutex +** stored in MemPage.pBt->mutex. +*/ +struct MemPage { + u8 isInit; /* True if previously initialized. MUST BE FIRST! */ + u8 intKey; /* True if table b-trees. False for index b-trees */ + u8 intKeyLeaf; /* True if the leaf of an intKey table */ + Pgno pgno; /* Page number for this page */ + /* Only the first 8 bytes (above) are zeroed by pager.c when a new page + ** is allocated. All fields that follow must be initialized before use */ + u8 leaf; /* True if a leaf page */ + u8 hdrOffset; /* 100 for page 1. 0 otherwise */ + u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */ + u8 max1bytePayload; /* min(maxLocal,127) */ + u8 nOverflow; /* Number of overflow cell bodies in aCell[] */ + u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */ + u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */ + u16 cellOffset; /* Index in aData of first cell pointer */ + int nFree; /* Number of free bytes on the page. -1 for unknown */ + u16 nCell; /* Number of cells on this page, local and ovfl */ + u16 maskPage; /* Mask for page offset */ + u16 aiOvfl[4]; /* Insert the i-th overflow cell before the aiOvfl-th + ** non-overflow cell */ + u8 *apOvfl[4]; /* Pointers to the body of overflow cells */ + BtShared *pBt; /* Pointer to BtShared that this page is part of */ + u8 *aData; /* Pointer to disk image of the page data */ + u8 *aDataEnd; /* One byte past the end of the entire page - not just + ** the usable space, the entire page. Used to prevent + ** corruption-induced of buffer overflow. */ + u8 *aCellIdx; /* The cell index area */ + u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ + DbPage *pDbPage; /* Pager page handle */ + u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ + void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ +}; + +/* +** A linked list of the following structures is stored at BtShared.pLock. +** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor +** is opened on the table with root page BtShared.iTable. Locks are removed +** from this list when a transaction is committed or rolled back, or when +** a btree handle is closed. +*/ +struct BtLock { + Btree *pBtree; /* Btree handle holding this lock */ + Pgno iTable; /* Root page of table */ + u8 eLock; /* READ_LOCK or WRITE_LOCK */ + BtLock *pNext; /* Next in BtShared.pLock list */ +}; + +/* Candidate values for BtLock.eLock */ +#define READ_LOCK 1 +#define WRITE_LOCK 2 + +/* A Btree handle +** +** A database connection contains a pointer to an instance of +** this object for every database file that it has open. This structure +** is opaque to the database connection. The database connection cannot +** see the internals of this structure and only deals with pointers to +** this structure. +** +** For some database files, the same underlying database cache might be +** shared between multiple connections. In that case, each connection +** has it own instance of this object. But each instance of this object +** points to the same BtShared object. The database cache and the +** schema associated with the database file are all contained within +** the BtShared object. +** +** All fields in this structure are accessed under sqlite3.mutex. +** The pBt pointer itself may not be changed while there exists cursors +** in the referenced BtShared that point back to this Btree since those +** cursors have to go through this Btree to find their BtShared and +** they often do so without holding sqlite3.mutex. +*/ +struct Btree { + sqlite3 *db; /* The database connection holding this btree */ + BtShared *pBt; /* Sharable content of this btree */ + u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ + u8 sharable; /* True if we can share pBt with another db */ + u8 locked; /* True if db currently has pBt locked */ + u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ + int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ + int nBackup; /* Number of backup operations reading this btree */ + u32 iBDataVersion; /* Combines with pBt->pPager->iDataVersion */ + Btree *pNext; /* List of other sharable Btrees from the same db */ + Btree *pPrev; /* Back pointer of the same list */ +#ifdef SQLITE_DEBUG + u64 nSeek; /* Calls to sqlite3BtreeMovetoUnpacked() */ +#endif +#ifndef SQLITE_OMIT_SHARED_CACHE + BtLock lock; /* Object used to lock page 1 */ +#endif +}; + +/* +** Btree.inTrans may take one of the following values. +** +** If the shared-data extension is enabled, there may be multiple users +** of the Btree structure. At most one of these may open a write transaction, +** but any number may have active read transactions. +** +** These values must match SQLITE_TXN_NONE, SQLITE_TXN_READ, and +** SQLITE_TXN_WRITE +*/ +#define TRANS_NONE 0 +#define TRANS_READ 1 +#define TRANS_WRITE 2 + +#if TRANS_NONE!=SQLITE_TXN_NONE +# error wrong numeric code for no-transaction +#endif +#if TRANS_READ!=SQLITE_TXN_READ +# error wrong numeric code for read-transaction +#endif +#if TRANS_WRITE!=SQLITE_TXN_WRITE +# error wrong numeric code for write-transaction +#endif + + +/* +** An instance of this object represents a single database file. +** +** A single database file can be in use at the same time by two +** or more database connections. When two or more connections are +** sharing the same database file, each connection has it own +** private Btree object for the file and each of those Btrees points +** to this one BtShared object. BtShared.nRef is the number of +** connections currently sharing this database file. +** +** Fields in this structure are accessed under the BtShared.mutex +** mutex, except for nRef and pNext which are accessed under the +** global SQLITE_MUTEX_STATIC_MAIN mutex. The pPager field +** may not be modified once it is initially set as long as nRef>0. +** The pSchema field may be set once under BtShared.mutex and +** thereafter is unchanged as long as nRef>0. +** +** isPending: +** +** If a BtShared client fails to obtain a write-lock on a database +** table (because there exists one or more read-locks on the table), +** the shared-cache enters 'pending-lock' state and isPending is +** set to true. +** +** The shared-cache leaves the 'pending lock' state when either of +** the following occur: +** +** 1) The current writer (BtShared.pWriter) concludes its transaction, OR +** 2) The number of locks held by other connections drops to zero. +** +** while in the 'pending-lock' state, no connection may start a new +** transaction. +** +** This feature is included to help prevent writer-starvation. +*/ +struct BtShared { + Pager *pPager; /* The page cache */ + sqlite3 *db; /* Database connection currently using this Btree */ + BtCursor *pCursor; /* A list of all open cursors */ + MemPage *pPage1; /* First page of the database */ + u8 openFlags; /* Flags to sqlite3BtreeOpen() */ +#ifndef SQLITE_OMIT_AUTOVACUUM + u8 autoVacuum; /* True if auto-vacuum is enabled */ + u8 incrVacuum; /* True if incr-vacuum is enabled */ + u8 bDoTruncate; /* True to truncate db on commit */ +#endif + u8 inTransaction; /* Transaction state */ + u8 max1bytePayload; /* Maximum first byte of cell for a 1-byte payload */ + u8 nReserveWanted; /* Desired number of extra bytes per page */ + u16 btsFlags; /* Boolean parameters. See BTS_* macros below */ + u16 maxLocal; /* Maximum local payload in non-LEAFDATA tables */ + u16 minLocal; /* Minimum local payload in non-LEAFDATA tables */ + u16 maxLeaf; /* Maximum local payload in a LEAFDATA table */ + u16 minLeaf; /* Minimum local payload in a LEAFDATA table */ + u32 pageSize; /* Total number of bytes on a page */ + u32 usableSize; /* Number of usable bytes on each page */ + int nTransaction; /* Number of open transactions (read + write) */ + u32 nPage; /* Number of pages in the database */ + void *pSchema; /* Pointer to space allocated by sqlite3BtreeSchema() */ + void (*xFreeSchema)(void*); /* Destructor for BtShared.pSchema */ + sqlite3_mutex *mutex; /* Non-recursive mutex required to access this object */ + Bitvec *pHasContent; /* Set of pages moved to free-list this transaction */ +#ifndef SQLITE_OMIT_SHARED_CACHE + int nRef; /* Number of references to this structure */ + BtShared *pNext; /* Next on a list of sharable BtShared structs */ + BtLock *pLock; /* List of locks held on this shared-btree struct */ + Btree *pWriter; /* Btree with currently open write transaction */ +#endif + u8 *pTmpSpace; /* Temp space sufficient to hold a single cell */ + int nPreformatSize; /* Size of last cell written by TransferRow() */ +}; + +/* +** Allowed values for BtShared.btsFlags +*/ +#define BTS_READ_ONLY 0x0001 /* Underlying file is readonly */ +#define BTS_PAGESIZE_FIXED 0x0002 /* Page size can no longer be changed */ +#define BTS_SECURE_DELETE 0x0004 /* PRAGMA secure_delete is enabled */ +#define BTS_OVERWRITE 0x0008 /* Overwrite deleted content with zeros */ +#define BTS_FAST_SECURE 0x000c /* Combination of the previous two */ +#define BTS_INITIALLY_EMPTY 0x0010 /* Database was empty at trans start */ +#define BTS_NO_WAL 0x0020 /* Do not open write-ahead-log files */ +#define BTS_EXCLUSIVE 0x0040 /* pWriter has an exclusive lock */ +#define BTS_PENDING 0x0080 /* Waiting for read-locks to clear */ + +/* +** An instance of the following structure is used to hold information +** about a cell. The parseCellPtr() function fills in this structure +** based on information extract from the raw disk page. +*/ +struct CellInfo { + i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */ + u8 *pPayload; /* Pointer to the start of payload */ + u32 nPayload; /* Bytes of payload */ + u16 nLocal; /* Amount of payload held locally, not on overflow */ + u16 nSize; /* Size of the cell content on the main b-tree page */ +}; + +/* +** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than +** this will be declared corrupt. This value is calculated based on a +** maximum database size of 2^31 pages a minimum fanout of 2 for a +** root-node and 3 for all other internal nodes. +** +** If a tree that appears to be taller than this is encountered, it is +** assumed that the database is corrupt. +*/ +#define BTCURSOR_MAX_DEPTH 20 + +/* +** A cursor is a pointer to a particular entry within a particular +** b-tree within a database file. +** +** The entry is identified by its MemPage and the index in +** MemPage.aCell[] of the entry. +** +** A single database file can be shared by two more database connections, +** but cursors cannot be shared. Each cursor is associated with a +** particular database connection identified BtCursor.pBtree.db. +** +** Fields in this structure are accessed under the BtShared.mutex +** found at self->pBt->mutex. +** +** skipNext meaning: +** The meaning of skipNext depends on the value of eState: +** +** eState Meaning of skipNext +** VALID skipNext is meaningless and is ignored +** INVALID skipNext is meaningless and is ignored +** SKIPNEXT sqlite3BtreeNext() is a no-op if skipNext>0 and +** sqlite3BtreePrevious() is no-op if skipNext<0. +** REQUIRESEEK restoreCursorPosition() restores the cursor to +** eState=SKIPNEXT if skipNext!=0 +** FAULT skipNext holds the cursor fault error code. +*/ +struct BtCursor { + u8 eState; /* One of the CURSOR_XXX constants (see below) */ + u8 curFlags; /* zero or more BTCF_* flags defined below */ + u8 curPagerFlags; /* Flags to send to sqlite3PagerGet() */ + u8 hints; /* As configured by CursorSetHints() */ + int skipNext; /* Prev() is noop if negative. Next() is noop if positive. + ** Error code if eState==CURSOR_FAULT */ + Btree *pBtree; /* The Btree to which this cursor belongs */ + Pgno *aOverflow; /* Cache of overflow page locations */ + void *pKey; /* Saved key that was cursor last known position */ + /* All fields above are zeroed when the cursor is allocated. See + ** sqlite3BtreeCursorZero(). Fields that follow must be manually + ** initialized. */ +#define BTCURSOR_FIRST_UNINIT pBt /* Name of first uninitialized field */ + BtShared *pBt; /* The BtShared this cursor points to */ + BtCursor *pNext; /* Forms a linked list of all cursors */ + CellInfo info; /* A parse of the cell we are pointing at */ + i64 nKey; /* Size of pKey, or last integer key */ + Pgno pgnoRoot; /* The root page of this tree */ + i8 iPage; /* Index of current page in apPage */ + u8 curIntKey; /* Value of apPage[0]->intKey */ + u16 ix; /* Current index for apPage[iPage] */ + u16 aiIdx[BTCURSOR_MAX_DEPTH-1]; /* Current index in apPage[i] */ + struct KeyInfo *pKeyInfo; /* Arg passed to comparison function */ + MemPage *pPage; /* Current page */ + MemPage *apPage[BTCURSOR_MAX_DEPTH-1]; /* Stack of parents of current page */ +}; + +/* +** Legal values for BtCursor.curFlags +*/ +#define BTCF_WriteFlag 0x01 /* True if a write cursor */ +#define BTCF_ValidNKey 0x02 /* True if info.nKey is valid */ +#define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ +#define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ +#define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ +#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ +#define BTCF_Pinned 0x40 /* Cursor is busy and cannot be moved */ + +/* +** Potential values for BtCursor.eState. +** +** CURSOR_INVALID: +** Cursor does not point to a valid entry. This can happen (for example) +** because the table is empty or because BtreeCursorFirst() has not been +** called. +** +** CURSOR_VALID: +** Cursor points to a valid entry. getPayload() etc. may be called. +** +** CURSOR_SKIPNEXT: +** Cursor is valid except that the Cursor.skipNext field is non-zero +** indicating that the next sqlite3BtreeNext() or sqlite3BtreePrevious() +** operation should be a no-op. +** +** CURSOR_REQUIRESEEK: +** The table that this cursor was opened on still exists, but has been +** modified since the cursor was last used. The cursor position is saved +** in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in +** this state, restoreCursorPosition() can be called to attempt to +** seek the cursor to the saved position. +** +** CURSOR_FAULT: +** An unrecoverable error (an I/O error or a malloc failure) has occurred +** on a different connection that shares the BtShared cache with this +** cursor. The error has left the cache in an inconsistent state. +** Do nothing else with this cursor. Any attempt to use the cursor +** should return the error code stored in BtCursor.skipNext +*/ +#define CURSOR_VALID 0 +#define CURSOR_INVALID 1 +#define CURSOR_SKIPNEXT 2 +#define CURSOR_REQUIRESEEK 3 +#define CURSOR_FAULT 4 + +/* +** The database page the PENDING_BYTE occupies. This page is never used. +*/ +# define PENDING_BYTE_PAGE(pBt) PAGER_MJ_PGNO(pBt) + +/* +** These macros define the location of the pointer-map entry for a +** database page. The first argument to each is the number of usable +** bytes on each page of the database (often 1024). The second is the +** page number to look up in the pointer map. +** +** PTRMAP_PAGENO returns the database page number of the pointer-map +** page that stores the required pointer. PTRMAP_PTROFFSET returns +** the offset of the requested map entry. +** +** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page, +** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be +** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements +** this test. +*/ +#define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno) +#define PTRMAP_PTROFFSET(pgptrmap, pgno) (5*(pgno-pgptrmap-1)) +#define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno)) + +/* +** The pointer map is a lookup table that identifies the parent page for +** each child page in the database file. The parent page is the page that +** contains a pointer to the child. Every page in the database contains +** 0 or 1 parent pages. (In this context 'database page' refers +** to any page that is not part of the pointer map itself.) Each pointer map +** entry consists of a single byte 'type' and a 4 byte parent page number. +** The PTRMAP_XXX identifiers below are the valid types. +** +** The purpose of the pointer map is to facility moving pages from one +** position in the file to another as part of autovacuum. When a page +** is moved, the pointer in its parent must be updated to point to the +** new location. The pointer map is used to locate the parent page quickly. +** +** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not +** used in this case. +** +** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number +** is not used in this case. +** +** PTRMAP_OVERFLOW1: The database page is the first page in a list of +** overflow pages. The page number identifies the page that +** contains the cell with a pointer to this overflow page. +** +** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of +** overflow pages. The page-number identifies the previous +** page in the overflow page list. +** +** PTRMAP_BTREE: The database page is a non-root btree page. The page number +** identifies the parent page in the btree. +*/ +#define PTRMAP_ROOTPAGE 1 +#define PTRMAP_FREEPAGE 2 +#define PTRMAP_OVERFLOW1 3 +#define PTRMAP_OVERFLOW2 4 +#define PTRMAP_BTREE 5 + +/* A bunch of assert() statements to check the transaction state variables +** of handle p (type Btree*) are internally consistent. +*/ +#define btreeIntegrity(p) \ + assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \ + assert( p->pBt->inTransaction>=p->inTrans ); + + +/* +** The ISAUTOVACUUM macro is used within balance_nonroot() to determine +** if the database supports auto-vacuum or not. Because it is used +** within an expression that is an argument to another macro +** (sqliteMallocRaw), it is not possible to use conditional compilation. +** So, this macro is defined instead. +*/ +#ifndef SQLITE_OMIT_AUTOVACUUM +#define ISAUTOVACUUM (pBt->autoVacuum) +#else +#define ISAUTOVACUUM 0 +#endif + + +/* +** This structure is passed around through all the sanity checking routines +** in order to keep track of some global state information. +** +** The aRef[] array is allocated so that there is 1 bit for each page in +** the database. As the integrity-check proceeds, for each page used in +** the database the corresponding bit is set. This allows integrity-check to +** detect pages that are used twice and orphaned pages (both of which +** indicate corruption). +*/ +typedef struct IntegrityCk IntegrityCk; +struct IntegrityCk { + BtShared *pBt; /* The tree being checked out */ + Pager *pPager; /* The associated pager. Also accessible by pBt->pPager */ + u8 *aPgRef; /* 1 bit per page in the db (see above) */ + Pgno nPage; /* Number of pages in the database */ + int mxErr; /* Stop accumulating errors when this reaches zero */ + int nErr; /* Number of messages written to zErrMsg so far */ + int bOomFault; /* A memory allocation error has occurred */ + const char *zPfx; /* Error message prefix */ + Pgno v1; /* Value for first %u substitution in zPfx */ + int v2; /* Value for second %d substitution in zPfx */ + StrAccum errMsg; /* Accumulate the error message text here */ + u32 *heap; /* Min-heap used for analyzing cell coverage */ + sqlite3 *db; /* Database connection running the check */ +}; + +/* +** Routines to read or write a two- and four-byte big-endian integer values. +*/ +#define get2byte(x) ((x)[0]<<8 | (x)[1]) +#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v)) +#define get4byte sqlite3Get4byte +#define put4byte sqlite3Put4byte + +/* +** get2byteAligned(), unlike get2byte(), requires that its argument point to a +** two-byte aligned address. get2bytea() is only used for accessing the +** cell addresses in a btree header. +*/ +#if SQLITE_BYTEORDER==4321 +# define get2byteAligned(x) (*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000 +# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 +# define get2byteAligned(x) _byteswap_ushort(*(u16*)(x)) +#else +# define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) +#endif + +/************** End of btreeInt.h ********************************************/ +/************** Continuing where we left off in btmutex.c ********************/ +#ifndef SQLITE_OMIT_SHARED_CACHE +#if SQLITE_THREADSAFE + +/* +** Obtain the BtShared mutex associated with B-Tree handle p. Also, +** set BtShared.db to the database handle associated with p and the +** p->locked boolean to true. +*/ +static void lockBtreeMutex(Btree *p){ + assert( p->locked==0 ); + assert( sqlite3_mutex_notheld(p->pBt->mutex) ); + assert( sqlite3_mutex_held(p->db->mutex) ); + + sqlite3_mutex_enter(p->pBt->mutex); + p->pBt->db = p->db; + p->locked = 1; +} + +/* +** Release the BtShared mutex associated with B-Tree handle p and +** clear the p->locked boolean. +*/ +static void SQLITE_NOINLINE unlockBtreeMutex(Btree *p){ + BtShared *pBt = p->pBt; + assert( p->locked==1 ); + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( sqlite3_mutex_held(p->db->mutex) ); + assert( p->db==pBt->db ); + + sqlite3_mutex_leave(pBt->mutex); + p->locked = 0; +} + +/* Forward reference */ +static void SQLITE_NOINLINE btreeLockCarefully(Btree *p); + +/* +** Enter a mutex on the given BTree object. +** +** If the object is not sharable, then no mutex is ever required +** and this routine is a no-op. The underlying mutex is non-recursive. +** But we keep a reference count in Btree.wantToLock so the behavior +** of this interface is recursive. +** +** To avoid deadlocks, multiple Btrees are locked in the same order +** by all database connections. The p->pNext is a list of other +** Btrees belonging to the same database connection as the p Btree +** which need to be locked after p. If we cannot get a lock on +** p, then first unlock all of the others on p->pNext, then wait +** for the lock to become available on p, then relock all of the +** subsequent Btrees that desire a lock. +*/ +SQLITE_PRIVATE void sqlite3BtreeEnter(Btree *p){ + /* Some basic sanity checking on the Btree. The list of Btrees + ** connected by pNext and pPrev should be in sorted order by + ** Btree.pBt value. All elements of the list should belong to + ** the same connection. Only shared Btrees are on the list. */ + assert( p->pNext==0 || p->pNext->pBt>p->pBt ); + assert( p->pPrev==0 || p->pPrev->pBtpBt ); + assert( p->pNext==0 || p->pNext->db==p->db ); + assert( p->pPrev==0 || p->pPrev->db==p->db ); + assert( p->sharable || (p->pNext==0 && p->pPrev==0) ); + + /* Check for locking consistency */ + assert( !p->locked || p->wantToLock>0 ); + assert( p->sharable || p->wantToLock==0 ); + + /* We should already hold a lock on the database connection */ + assert( sqlite3_mutex_held(p->db->mutex) ); + + /* Unless the database is sharable and unlocked, then BtShared.db + ** should already be set correctly. */ + assert( (p->locked==0 && p->sharable) || p->pBt->db==p->db ); + + if( !p->sharable ) return; + p->wantToLock++; + if( p->locked ) return; + btreeLockCarefully(p); +} + +/* This is a helper function for sqlite3BtreeLock(). By moving +** complex, but seldom used logic, out of sqlite3BtreeLock() and +** into this routine, we avoid unnecessary stack pointer changes +** and thus help the sqlite3BtreeLock() routine to run much faster +** in the common case. +*/ +static void SQLITE_NOINLINE btreeLockCarefully(Btree *p){ + Btree *pLater; + + /* In most cases, we should be able to acquire the lock we + ** want without having to go through the ascending lock + ** procedure that follows. Just be sure not to block. + */ + if( sqlite3_mutex_try(p->pBt->mutex)==SQLITE_OK ){ + p->pBt->db = p->db; + p->locked = 1; + return; + } + + /* To avoid deadlock, first release all locks with a larger + ** BtShared address. Then acquire our lock. Then reacquire + ** the other BtShared locks that we used to hold in ascending + ** order. + */ + for(pLater=p->pNext; pLater; pLater=pLater->pNext){ + assert( pLater->sharable ); + assert( pLater->pNext==0 || pLater->pNext->pBt>pLater->pBt ); + assert( !pLater->locked || pLater->wantToLock>0 ); + if( pLater->locked ){ + unlockBtreeMutex(pLater); + } + } + lockBtreeMutex(p); + for(pLater=p->pNext; pLater; pLater=pLater->pNext){ + if( pLater->wantToLock ){ + lockBtreeMutex(pLater); + } + } +} + + +/* +** Exit the recursive mutex on a Btree. +*/ +SQLITE_PRIVATE void sqlite3BtreeLeave(Btree *p){ + assert( sqlite3_mutex_held(p->db->mutex) ); + if( p->sharable ){ + assert( p->wantToLock>0 ); + p->wantToLock--; + if( p->wantToLock==0 ){ + unlockBtreeMutex(p); + } + } +} + +#ifndef NDEBUG +/* +** Return true if the BtShared mutex is held on the btree, or if the +** B-Tree is not marked as sharable. +** +** This routine is used only from within assert() statements. +*/ +SQLITE_PRIVATE int sqlite3BtreeHoldsMutex(Btree *p){ + assert( p->sharable==0 || p->locked==0 || p->wantToLock>0 ); + assert( p->sharable==0 || p->locked==0 || p->db==p->pBt->db ); + assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->pBt->mutex) ); + assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->db->mutex) ); + + return (p->sharable==0 || p->locked); +} +#endif + + +/* +** Enter the mutex on every Btree associated with a database +** connection. This is needed (for example) prior to parsing +** a statement since we will be comparing table and column names +** against all schemas and we do not want those schemas being +** reset out from under us. +** +** There is a corresponding leave-all procedures. +** +** Enter the mutexes in accending order by BtShared pointer address +** to avoid the possibility of deadlock when two threads with +** two or more btrees in common both try to lock all their btrees +** at the same instant. +*/ +static void SQLITE_NOINLINE btreeEnterAll(sqlite3 *db){ + int i; + int skipOk = 1; + Btree *p; + assert( sqlite3_mutex_held(db->mutex) ); + for(i=0; inDb; i++){ + p = db->aDb[i].pBt; + if( p && p->sharable ){ + sqlite3BtreeEnter(p); + skipOk = 0; + } + } + db->noSharedCache = skipOk; +} +SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3 *db){ + if( db->noSharedCache==0 ) btreeEnterAll(db); +} +static void SQLITE_NOINLINE btreeLeaveAll(sqlite3 *db){ + int i; + Btree *p; + assert( sqlite3_mutex_held(db->mutex) ); + for(i=0; inDb; i++){ + p = db->aDb[i].pBt; + if( p ) sqlite3BtreeLeave(p); + } +} +SQLITE_PRIVATE void sqlite3BtreeLeaveAll(sqlite3 *db){ + if( db->noSharedCache==0 ) btreeLeaveAll(db); +} + +#ifndef NDEBUG +/* +** Return true if the current thread holds the database connection +** mutex and all required BtShared mutexes. +** +** This routine is used inside assert() statements only. +*/ +SQLITE_PRIVATE int sqlite3BtreeHoldsAllMutexes(sqlite3 *db){ + int i; + if( !sqlite3_mutex_held(db->mutex) ){ + return 0; + } + for(i=0; inDb; i++){ + Btree *p; + p = db->aDb[i].pBt; + if( p && p->sharable && + (p->wantToLock==0 || !sqlite3_mutex_held(p->pBt->mutex)) ){ + return 0; + } + } + return 1; +} +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* +** Return true if the correct mutexes are held for accessing the +** db->aDb[iDb].pSchema structure. The mutexes required for schema +** access are: +** +** (1) The mutex on db +** (2) if iDb!=1, then the mutex on db->aDb[iDb].pBt. +** +** If pSchema is not NULL, then iDb is computed from pSchema and +** db using sqlite3SchemaToIndex(). +*/ +SQLITE_PRIVATE int sqlite3SchemaMutexHeld(sqlite3 *db, int iDb, Schema *pSchema){ + Btree *p; + assert( db!=0 ); + if( pSchema ) iDb = sqlite3SchemaToIndex(db, pSchema); + assert( iDb>=0 && iDbnDb ); + if( !sqlite3_mutex_held(db->mutex) ) return 0; + if( iDb==1 ) return 1; + p = db->aDb[iDb].pBt; + assert( p!=0 ); + return p->sharable==0 || p->locked==1; +} +#endif /* NDEBUG */ + +#else /* SQLITE_THREADSAFE>0 above. SQLITE_THREADSAFE==0 below */ +/* +** The following are special cases for mutex enter routines for use +** in single threaded applications that use shared cache. Except for +** these two routines, all mutex operations are no-ops in that case and +** are null #defines in btree.h. +** +** If shared cache is disabled, then all btree mutex routines, including +** the ones below, are no-ops and are null #defines in btree.h. +*/ + +SQLITE_PRIVATE void sqlite3BtreeEnter(Btree *p){ + p->pBt->db = p->db; +} +SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3 *db){ + int i; + for(i=0; inDb; i++){ + Btree *p = db->aDb[i].pBt; + if( p ){ + p->pBt->db = p->db; + } + } +} +#endif /* if SQLITE_THREADSAFE */ + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** Enter a mutex on a Btree given a cursor owned by that Btree. +** +** These entry points are used by incremental I/O only. Enter() is required +** any time OMIT_SHARED_CACHE is not defined, regardless of whether or not +** the build is threadsafe. Leave() is only required by threadsafe builds. +*/ +SQLITE_PRIVATE void sqlite3BtreeEnterCursor(BtCursor *pCur){ + sqlite3BtreeEnter(pCur->pBtree); +} +# if SQLITE_THREADSAFE +SQLITE_PRIVATE void sqlite3BtreeLeaveCursor(BtCursor *pCur){ + sqlite3BtreeLeave(pCur->pBtree); +} +# endif +#endif /* ifndef SQLITE_OMIT_INCRBLOB */ + +#endif /* ifndef SQLITE_OMIT_SHARED_CACHE */ + +/************** End of btmutex.c *********************************************/ +/************** Begin file btree.c *******************************************/ +/* +** 2004 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements an external (disk-based) database using BTrees. +** See the header comment on "btreeInt.h" for additional information. +** Including a description of file format and an overview of operation. +*/ +/* #include "btreeInt.h" */ + +/* +** The header string that appears at the beginning of every +** SQLite database. +*/ +static const char zMagicHeader[] = SQLITE_FILE_HEADER; + +/* +** Set this global variable to 1 to enable tracing using the TRACE +** macro. +*/ +#if 0 +int sqlite3BtreeTrace=1; /* True to enable tracing */ +# define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);} +#else +# define TRACE(X) +#endif + +/* +** Extract a 2-byte big-endian integer from an array of unsigned bytes. +** But if the value is zero, make it 65536. +** +** This routine is used to extract the "offset to cell content area" value +** from the header of a btree page. If the page size is 65536 and the page +** is empty, the offset should be 65536, but the 2-byte value stores zero. +** This routine makes the necessary adjustment to 65536. +*/ +#define get2byteNotZero(X) (((((int)get2byte(X))-1)&0xffff)+1) + +/* +** Values passed as the 5th argument to allocateBtreePage() +*/ +#define BTALLOC_ANY 0 /* Allocate any page */ +#define BTALLOC_EXACT 1 /* Allocate exact page if possible */ +#define BTALLOC_LE 2 /* Allocate any page <= the parameter */ + +/* +** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not +** defined, or 0 if it is. For example: +** +** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum); +*/ +#ifndef SQLITE_OMIT_AUTOVACUUM +#define IfNotOmitAV(expr) (expr) +#else +#define IfNotOmitAV(expr) 0 +#endif + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** A list of BtShared objects that are eligible for participation +** in shared cache. This variable has file scope during normal builds, +** but the test harness needs to access it so we make it global for +** test builds. +** +** Access to this variable is protected by SQLITE_MUTEX_STATIC_MAIN. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; +#else +static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; +#endif +#endif /* SQLITE_OMIT_SHARED_CACHE */ + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Enable or disable the shared pager and schema features. +** +** This routine has no effect on existing database connections. +** The shared cache setting effects only future calls to +** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2(). +*/ +SQLITE_API int sqlite3_enable_shared_cache(int enable){ + sqlite3GlobalConfig.sharedCacheEnabled = enable; + return SQLITE_OK; +} +#endif + + + +#ifdef SQLITE_OMIT_SHARED_CACHE + /* + ** The functions querySharedCacheTableLock(), setSharedCacheTableLock(), + ** and clearAllSharedCacheTableLocks() + ** manipulate entries in the BtShared.pLock linked list used to store + ** shared-cache table level locks. If the library is compiled with the + ** shared-cache feature disabled, then there is only ever one user + ** of each BtShared structure and so this locking is not necessary. + ** So define the lock related functions as no-ops. + */ + #define querySharedCacheTableLock(a,b,c) SQLITE_OK + #define setSharedCacheTableLock(a,b,c) SQLITE_OK + #define clearAllSharedCacheTableLocks(a) + #define downgradeAllSharedCacheTableLocks(a) + #define hasSharedCacheTableLock(a,b,c,d) 1 + #define hasReadConflicts(a, b) 0 +#endif + +#ifdef SQLITE_DEBUG +/* +** Return and reset the seek counter for a Btree object. +*/ +SQLITE_PRIVATE sqlite3_uint64 sqlite3BtreeSeekCount(Btree *pBt){ + u64 n = pBt->nSeek; + pBt->nSeek = 0; + return n; +} +#endif + +/* +** Implementation of the SQLITE_CORRUPT_PAGE() macro. Takes a single +** (MemPage*) as an argument. The (MemPage*) must not be NULL. +** +** If SQLITE_DEBUG is not defined, then this macro is equivalent to +** SQLITE_CORRUPT_BKPT. Or, if SQLITE_DEBUG is set, then the log message +** normally produced as a side-effect of SQLITE_CORRUPT_BKPT is augmented +** with the page number and filename associated with the (MemPage*). +*/ +#ifdef SQLITE_DEBUG +int corruptPageError(int lineno, MemPage *p){ + char *zMsg; + sqlite3BeginBenignMalloc(); + zMsg = sqlite3_mprintf("database corruption page %d of %s", + (int)p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0) + ); + sqlite3EndBenignMalloc(); + if( zMsg ){ + sqlite3ReportError(SQLITE_CORRUPT, lineno, zMsg); + } + sqlite3_free(zMsg); + return SQLITE_CORRUPT_BKPT; +} +# define SQLITE_CORRUPT_PAGE(pMemPage) corruptPageError(__LINE__, pMemPage) +#else +# define SQLITE_CORRUPT_PAGE(pMemPage) SQLITE_CORRUPT_PGNO(pMemPage->pgno) +#endif + +#ifndef SQLITE_OMIT_SHARED_CACHE + +#ifdef SQLITE_DEBUG +/* +**** This function is only used as part of an assert() statement. *** +** +** Check to see if pBtree holds the required locks to read or write to the +** table with root page iRoot. Return 1 if it does and 0 if not. +** +** For example, when writing to a table with root-page iRoot via +** Btree connection pBtree: +** +** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) ); +** +** When writing to an index that resides in a sharable database, the +** caller should have first obtained a lock specifying the root page of +** the corresponding table. This makes things a bit more complicated, +** as this module treats each table as a separate structure. To determine +** the table corresponding to the index being written, this +** function has to search through the database schema. +** +** Instead of a lock on the table/index rooted at page iRoot, the caller may +** hold a write-lock on the schema table (root page 1). This is also +** acceptable. +*/ +static int hasSharedCacheTableLock( + Btree *pBtree, /* Handle that must hold lock */ + Pgno iRoot, /* Root page of b-tree */ + int isIndex, /* True if iRoot is the root of an index b-tree */ + int eLockType /* Required lock type (READ_LOCK or WRITE_LOCK) */ +){ + Schema *pSchema = (Schema *)pBtree->pBt->pSchema; + Pgno iTab = 0; + BtLock *pLock; + + /* If this database is not shareable, or if the client is reading + ** and has the read-uncommitted flag set, then no lock is required. + ** Return true immediately. + */ + if( (pBtree->sharable==0) + || (eLockType==READ_LOCK && (pBtree->db->flags & SQLITE_ReadUncommit)) + ){ + return 1; + } + + /* If the client is reading or writing an index and the schema is + ** not loaded, then it is too difficult to actually check to see if + ** the correct locks are held. So do not bother - just return true. + ** This case does not come up very often anyhow. + */ + if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){ + return 1; + } + + /* Figure out the root-page that the lock should be held on. For table + ** b-trees, this is just the root page of the b-tree being read or + ** written. For index b-trees, it is the root page of the associated + ** table. */ + if( isIndex ){ + HashElem *p; + int bSeen = 0; + for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){ + Index *pIdx = (Index *)sqliteHashData(p); + if( pIdx->tnum==(int)iRoot ){ + if( bSeen ){ + /* Two or more indexes share the same root page. There must + ** be imposter tables. So just return true. The assert is not + ** useful in that case. */ + return 1; + } + iTab = pIdx->pTable->tnum; + bSeen = 1; + } + } + }else{ + iTab = iRoot; + } + + /* Search for the required lock. Either a write-lock on root-page iTab, a + ** write-lock on the schema table, or (if the client is reading) a + ** read-lock on iTab will suffice. Return 1 if any of these are found. */ + for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){ + if( pLock->pBtree==pBtree + && (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1)) + && pLock->eLock>=eLockType + ){ + return 1; + } + } + + /* Failed to find the required lock. */ + return 0; +} +#endif /* SQLITE_DEBUG */ + +#ifdef SQLITE_DEBUG +/* +**** This function may be used as part of assert() statements only. **** +** +** Return true if it would be illegal for pBtree to write into the +** table or index rooted at iRoot because other shared connections are +** simultaneously reading that same table or index. +** +** It is illegal for pBtree to write if some other Btree object that +** shares the same BtShared object is currently reading or writing +** the iRoot table. Except, if the other Btree object has the +** read-uncommitted flag set, then it is OK for the other object to +** have a read cursor. +** +** For example, before writing to any part of the table or index +** rooted at page iRoot, one should call: +** +** assert( !hasReadConflicts(pBtree, iRoot) ); +*/ +static int hasReadConflicts(Btree *pBtree, Pgno iRoot){ + BtCursor *p; + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( p->pgnoRoot==iRoot + && p->pBtree!=pBtree + && 0==(p->pBtree->db->flags & SQLITE_ReadUncommit) + ){ + return 1; + } + } + return 0; +} +#endif /* #ifdef SQLITE_DEBUG */ + +/* +** Query to see if Btree handle p may obtain a lock of type eLock +** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return +** SQLITE_OK if the lock may be obtained (by calling +** setSharedCacheTableLock()), or SQLITE_LOCKED if not. +*/ +static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){ + BtShared *pBt = p->pBt; + BtLock *pIter; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); + assert( p->db!=0 ); + assert( !(p->db->flags&SQLITE_ReadUncommit)||eLock==WRITE_LOCK||iTab==1 ); + + /* If requesting a write-lock, then the Btree must have an open write + ** transaction on this file. And, obviously, for this to be so there + ** must be an open write transaction on the file itself. + */ + assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) ); + assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE ); + + /* This routine is a no-op if the shared-cache is not enabled */ + if( !p->sharable ){ + return SQLITE_OK; + } + + /* If some other connection is holding an exclusive lock, the + ** requested lock may not be obtained. + */ + if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){ + sqlite3ConnectionBlocked(p->db, pBt->pWriter->db); + return SQLITE_LOCKED_SHAREDCACHE; + } + + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ + /* The condition (pIter->eLock!=eLock) in the following if(...) + ** statement is a simplification of: + ** + ** (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK) + ** + ** since we know that if eLock==WRITE_LOCK, then no other connection + ** may hold a WRITE_LOCK on any table in this file (since there can + ** only be a single writer). + */ + assert( pIter->eLock==READ_LOCK || pIter->eLock==WRITE_LOCK ); + assert( eLock==READ_LOCK || pIter->pBtree==p || pIter->eLock==READ_LOCK); + if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){ + sqlite3ConnectionBlocked(p->db, pIter->pBtree->db); + if( eLock==WRITE_LOCK ){ + assert( p==pBt->pWriter ); + pBt->btsFlags |= BTS_PENDING; + } + return SQLITE_LOCKED_SHAREDCACHE; + } + } + return SQLITE_OK; +} +#endif /* !SQLITE_OMIT_SHARED_CACHE */ + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Add a lock on the table with root-page iTable to the shared-btree used +** by Btree handle p. Parameter eLock must be either READ_LOCK or +** WRITE_LOCK. +** +** This function assumes the following: +** +** (a) The specified Btree object p is connected to a sharable +** database (one with the BtShared.sharable flag set), and +** +** (b) No other Btree objects hold a lock that conflicts +** with the requested lock (i.e. querySharedCacheTableLock() has +** already been called and returned SQLITE_OK). +** +** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM +** is returned if a malloc attempt fails. +*/ +static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){ + BtShared *pBt = p->pBt; + BtLock *pLock = 0; + BtLock *pIter; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); + assert( p->db!=0 ); + + /* A connection with the read-uncommitted flag set will never try to + ** obtain a read-lock using this function. The only read-lock obtained + ** by a connection in read-uncommitted mode is on the sqlite_schema + ** table, and that lock is obtained in BtreeBeginTrans(). */ + assert( 0==(p->db->flags&SQLITE_ReadUncommit) || eLock==WRITE_LOCK ); + + /* This function should only be called on a sharable b-tree after it + ** has been determined that no other b-tree holds a conflicting lock. */ + assert( p->sharable ); + assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) ); + + /* First search the list for an existing lock on this table. */ + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ + if( pIter->iTable==iTable && pIter->pBtree==p ){ + pLock = pIter; + break; + } + } + + /* If the above search did not find a BtLock struct associating Btree p + ** with table iTable, allocate one and link it into the list. + */ + if( !pLock ){ + pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); + if( !pLock ){ + return SQLITE_NOMEM_BKPT; + } + pLock->iTable = iTable; + pLock->pBtree = p; + pLock->pNext = pBt->pLock; + pBt->pLock = pLock; + } + + /* Set the BtLock.eLock variable to the maximum of the current lock + ** and the requested lock. This means if a write-lock was already held + ** and a read-lock requested, we don't incorrectly downgrade the lock. + */ + assert( WRITE_LOCK>READ_LOCK ); + if( eLock>pLock->eLock ){ + pLock->eLock = eLock; + } + + return SQLITE_OK; +} +#endif /* !SQLITE_OMIT_SHARED_CACHE */ + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Release all the table locks (locks obtained via calls to +** the setSharedCacheTableLock() procedure) held by Btree object p. +** +** This function assumes that Btree p has an open read or write +** transaction. If it does not, then the BTS_PENDING flag +** may be incorrectly cleared. +*/ +static void clearAllSharedCacheTableLocks(Btree *p){ + BtShared *pBt = p->pBt; + BtLock **ppIter = &pBt->pLock; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( p->sharable || 0==*ppIter ); + assert( p->inTrans>0 ); + + while( *ppIter ){ + BtLock *pLock = *ppIter; + assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree ); + assert( pLock->pBtree->inTrans>=pLock->eLock ); + if( pLock->pBtree==p ){ + *ppIter = pLock->pNext; + assert( pLock->iTable!=1 || pLock==&p->lock ); + if( pLock->iTable!=1 ){ + sqlite3_free(pLock); + } + }else{ + ppIter = &pLock->pNext; + } + } + + assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter ); + if( pBt->pWriter==p ){ + pBt->pWriter = 0; + pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); + }else if( pBt->nTransaction==2 ){ + /* This function is called when Btree p is concluding its + ** transaction. If there currently exists a writer, and p is not + ** that writer, then the number of locks held by connections other + ** than the writer must be about to drop to zero. In this case + ** set the BTS_PENDING flag to 0. + ** + ** If there is not currently a writer, then BTS_PENDING must + ** be zero already. So this next line is harmless in that case. + */ + pBt->btsFlags &= ~BTS_PENDING; + } +} + +/* +** This function changes all write-locks held by Btree p into read-locks. +*/ +static void downgradeAllSharedCacheTableLocks(Btree *p){ + BtShared *pBt = p->pBt; + if( pBt->pWriter==p ){ + BtLock *pLock; + pBt->pWriter = 0; + pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); + for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){ + assert( pLock->eLock==READ_LOCK || pLock->pBtree==p ); + pLock->eLock = READ_LOCK; + } + } +} + +#endif /* SQLITE_OMIT_SHARED_CACHE */ + +static void releasePage(MemPage *pPage); /* Forward reference */ +static void releasePageOne(MemPage *pPage); /* Forward reference */ +static void releasePageNotNull(MemPage *pPage); /* Forward reference */ + +/* +***** This routine is used inside of assert() only **** +** +** Verify that the cursor holds the mutex on its BtShared +*/ +#ifdef SQLITE_DEBUG +static int cursorHoldsMutex(BtCursor *p){ + return sqlite3_mutex_held(p->pBt->mutex); +} + +/* Verify that the cursor and the BtShared agree about what is the current +** database connetion. This is important in shared-cache mode. If the database +** connection pointers get out-of-sync, it is possible for routines like +** btreeInitPage() to reference an stale connection pointer that references a +** a connection that has already closed. This routine is used inside assert() +** statements only and for the purpose of double-checking that the btree code +** does keep the database connection pointers up-to-date. +*/ +static int cursorOwnsBtShared(BtCursor *p){ + assert( cursorHoldsMutex(p) ); + return (p->pBtree->db==p->pBt->db); +} +#endif + +/* +** Invalidate the overflow cache of the cursor passed as the first argument. +** on the shared btree structure pBt. +*/ +#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) + +/* +** Invalidate the overflow page-list cache for all cursors opened +** on the shared btree structure pBt. +*/ +static void invalidateAllOverflowCache(BtShared *pBt){ + BtCursor *p; + assert( sqlite3_mutex_held(pBt->mutex) ); + for(p=pBt->pCursor; p; p=p->pNext){ + invalidateOverflowCache(p); + } +} + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** This function is called before modifying the contents of a table +** to invalidate any incrblob cursors that are open on the +** row or one of the rows being modified. +** +** If argument isClearTable is true, then the entire contents of the +** table is about to be deleted. In this case invalidate all incrblob +** cursors open on any row within the table with root-page pgnoRoot. +** +** Otherwise, if argument isClearTable is false, then the row with +** rowid iRow is being replaced or deleted. In this case invalidate +** only those incrblob cursors open on that specific row. +*/ +static void invalidateIncrblobCursors( + Btree *pBtree, /* The database file to check */ + Pgno pgnoRoot, /* The table that might be changing */ + i64 iRow, /* The rowid that might be changing */ + int isClearTable /* True if all rows are being deleted */ +){ + BtCursor *p; + assert( pBtree->hasIncrblobCur ); + assert( sqlite3BtreeHoldsMutex(pBtree) ); + pBtree->hasIncrblobCur = 0; + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( (p->curFlags & BTCF_Incrblob)!=0 ){ + pBtree->hasIncrblobCur = 1; + if( p->pgnoRoot==pgnoRoot && (isClearTable || p->info.nKey==iRow) ){ + p->eState = CURSOR_INVALID; + } + } + } +} + +#else + /* Stub function when INCRBLOB is omitted */ + #define invalidateIncrblobCursors(w,x,y,z) +#endif /* SQLITE_OMIT_INCRBLOB */ + +/* +** Set bit pgno of the BtShared.pHasContent bitvec. This is called +** when a page that previously contained data becomes a free-list leaf +** page. +** +** The BtShared.pHasContent bitvec exists to work around an obscure +** bug caused by the interaction of two useful IO optimizations surrounding +** free-list leaf pages: +** +** 1) When all data is deleted from a page and the page becomes +** a free-list leaf page, the page is not written to the database +** (as free-list leaf pages contain no meaningful data). Sometimes +** such a page is not even journalled (as it will not be modified, +** why bother journalling it?). +** +** 2) When a free-list leaf page is reused, its content is not read +** from the database or written to the journal file (why should it +** be, if it is not at all meaningful?). +** +** By themselves, these optimizations work fine and provide a handy +** performance boost to bulk delete or insert operations. However, if +** a page is moved to the free-list and then reused within the same +** transaction, a problem comes up. If the page is not journalled when +** it is moved to the free-list and it is also not journalled when it +** is extracted from the free-list and reused, then the original data +** may be lost. In the event of a rollback, it may not be possible +** to restore the database to its original configuration. +** +** The solution is the BtShared.pHasContent bitvec. Whenever a page is +** moved to become a free-list leaf page, the corresponding bit is +** set in the bitvec. Whenever a leaf page is extracted from the free-list, +** optimization 2 above is omitted if the corresponding bit is already +** set in BtShared.pHasContent. The contents of the bitvec are cleared +** at the end of every transaction. +*/ +static int btreeSetHasContent(BtShared *pBt, Pgno pgno){ + int rc = SQLITE_OK; + if( !pBt->pHasContent ){ + assert( pgno<=pBt->nPage ); + pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage); + if( !pBt->pHasContent ){ + rc = SQLITE_NOMEM_BKPT; + } + } + if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){ + rc = sqlite3BitvecSet(pBt->pHasContent, pgno); + } + return rc; +} + +/* +** Query the BtShared.pHasContent vector. +** +** This function is called when a free-list leaf page is removed from the +** free-list for reuse. It returns false if it is safe to retrieve the +** page from the pager layer with the 'no-content' flag set. True otherwise. +*/ +static int btreeGetHasContent(BtShared *pBt, Pgno pgno){ + Bitvec *p = pBt->pHasContent; + return p && (pgno>sqlite3BitvecSize(p) || sqlite3BitvecTestNotNull(p, pgno)); +} + +/* +** Clear (destroy) the BtShared.pHasContent bitvec. This should be +** invoked at the conclusion of each write-transaction. +*/ +static void btreeClearHasContent(BtShared *pBt){ + sqlite3BitvecDestroy(pBt->pHasContent); + pBt->pHasContent = 0; +} + +/* +** Release all of the apPage[] pages for a cursor. +*/ +static void btreeReleaseAllCursorPages(BtCursor *pCur){ + int i; + if( pCur->iPage>=0 ){ + for(i=0; iiPage; i++){ + releasePageNotNull(pCur->apPage[i]); + } + releasePageNotNull(pCur->pPage); + pCur->iPage = -1; + } +} + +/* +** The cursor passed as the only argument must point to a valid entry +** when this function is called (i.e. have eState==CURSOR_VALID). This +** function saves the current cursor key in variables pCur->nKey and +** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error +** code otherwise. +** +** If the cursor is open on an intkey table, then the integer key +** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to +** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is +** set to point to a malloced buffer pCur->nKey bytes in size containing +** the key. +*/ +static int saveCursorKey(BtCursor *pCur){ + int rc = SQLITE_OK; + assert( CURSOR_VALID==pCur->eState ); + assert( 0==pCur->pKey ); + assert( cursorHoldsMutex(pCur) ); + + if( pCur->curIntKey ){ + /* Only the rowid is required for a table btree */ + pCur->nKey = sqlite3BtreeIntegerKey(pCur); + }else{ + /* For an index btree, save the complete key content. It is possible + ** that the current key is corrupt. In that case, it is possible that + ** the sqlite3VdbeRecordUnpack() function may overread the buffer by + ** up to the size of 1 varint plus 1 8-byte value when the cursor + ** position is restored. Hence the 17 bytes of padding allocated + ** below. */ + void *pKey; + pCur->nKey = sqlite3BtreePayloadSize(pCur); + pKey = sqlite3Malloc( pCur->nKey + 9 + 8 ); + if( pKey ){ + rc = sqlite3BtreePayload(pCur, 0, (int)pCur->nKey, pKey); + if( rc==SQLITE_OK ){ + memset(((u8*)pKey)+pCur->nKey, 0, 9+8); + pCur->pKey = pKey; + }else{ + sqlite3_free(pKey); + } + }else{ + rc = SQLITE_NOMEM_BKPT; + } + } + assert( !pCur->curIntKey || !pCur->pKey ); + return rc; +} + +/* +** Save the current cursor position in the variables BtCursor.nKey +** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. +** +** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID) +** prior to calling this routine. +*/ +static int saveCursorPosition(BtCursor *pCur){ + int rc; + + assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState ); + assert( 0==pCur->pKey ); + assert( cursorHoldsMutex(pCur) ); + + if( pCur->curFlags & BTCF_Pinned ){ + return SQLITE_CONSTRAINT_PINNED; + } + if( pCur->eState==CURSOR_SKIPNEXT ){ + pCur->eState = CURSOR_VALID; + }else{ + pCur->skipNext = 0; + } + + rc = saveCursorKey(pCur); + if( rc==SQLITE_OK ){ + btreeReleaseAllCursorPages(pCur); + pCur->eState = CURSOR_REQUIRESEEK; + } + + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl|BTCF_AtLast); + return rc; +} + +/* Forward reference */ +static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); + +/* +** Save the positions of all cursors (except pExcept) that are open on +** the table with root-page iRoot. "Saving the cursor position" means that +** the location in the btree is remembered in such a way that it can be +** moved back to the same spot after the btree has been modified. This +** routine is called just before cursor pExcept is used to modify the +** table, for example in BtreeDelete() or BtreeInsert(). +** +** If there are two or more cursors on the same btree, then all such +** cursors should have their BTCF_Multiple flag set. The btreeCursor() +** routine enforces that rule. This routine only needs to be called in +** the uncommon case when pExpect has the BTCF_Multiple flag set. +** +** If pExpect!=NULL and if no other cursors are found on the same root-page, +** then the BTCF_Multiple flag on pExpect is cleared, to avoid another +** pointless call to this routine. +** +** Implementation note: This routine merely checks to see if any cursors +** need to be saved. It calls out to saveCursorsOnList() in the (unusual) +** event that cursors are in need to being saved. +*/ +static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ + BtCursor *p; + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pExcept==0 || pExcept->pBt==pBt ); + for(p=pBt->pCursor; p; p=p->pNext){ + if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; + } + if( p ) return saveCursorsOnList(p, iRoot, pExcept); + if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; + return SQLITE_OK; +} + +/* This helper routine to saveAllCursors does the actual work of saving +** the cursors if and when a cursor is found that actually requires saving. +** The common case is that no cursors need to be saved, so this routine is +** broken out from its caller to avoid unnecessary stack pointer movement. +*/ +static int SQLITE_NOINLINE saveCursorsOnList( + BtCursor *p, /* The first cursor that needs saving */ + Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */ + BtCursor *pExcept /* Do not save this cursor */ +){ + do{ + if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ + if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ + int rc = saveCursorPosition(p); + if( SQLITE_OK!=rc ){ + return rc; + } + }else{ + testcase( p->iPage>=0 ); + btreeReleaseAllCursorPages(p); + } + } + p = p->pNext; + }while( p ); + return SQLITE_OK; +} + +/* +** Clear the current cursor position. +*/ +SQLITE_PRIVATE void sqlite3BtreeClearCursor(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + sqlite3_free(pCur->pKey); + pCur->pKey = 0; + pCur->eState = CURSOR_INVALID; +} + +/* +** In this version of BtreeMoveto, pKey is a packed index record +** such as is generated by the OP_MakeRecord opcode. Unpack the +** record and then call BtreeMovetoUnpacked() to do the work. +*/ +static int btreeMoveto( + BtCursor *pCur, /* Cursor open on the btree to be searched */ + const void *pKey, /* Packed key if the btree is an index */ + i64 nKey, /* Integer key for tables. Size of pKey for indices */ + int bias, /* Bias search to the high end */ + int *pRes /* Write search results here */ +){ + int rc; /* Status code */ + UnpackedRecord *pIdxKey; /* Unpacked index key */ + + if( pKey ){ + KeyInfo *pKeyInfo = pCur->pKeyInfo; + assert( nKey==(i64)(int)nKey ); + pIdxKey = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pIdxKey==0 ) return SQLITE_NOMEM_BKPT; + sqlite3VdbeRecordUnpack(pKeyInfo, (int)nKey, pKey, pIdxKey); + if( pIdxKey->nField==0 || pIdxKey->nField>pKeyInfo->nAllField ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = sqlite3BtreeIndexMoveto(pCur, pIdxKey, pRes); + } + sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey); + }else{ + pIdxKey = 0; + rc = sqlite3BtreeTableMoveto(pCur, nKey, bias, pRes); + } + return rc; +} + +/* +** Restore the cursor to the position it was in (or as close to as possible) +** when saveCursorPosition() was called. Note that this call deletes the +** saved position info stored by saveCursorPosition(), so there can be +** at most one effective restoreCursorPosition() call after each +** saveCursorPosition(). +*/ +static int btreeRestoreCursorPosition(BtCursor *pCur){ + int rc; + int skipNext = 0; + assert( cursorOwnsBtShared(pCur) ); + assert( pCur->eState>=CURSOR_REQUIRESEEK ); + if( pCur->eState==CURSOR_FAULT ){ + return pCur->skipNext; + } + pCur->eState = CURSOR_INVALID; + if( sqlite3FaultSim(410) ){ + rc = SQLITE_IOERR; + }else{ + rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext); + } + if( rc==SQLITE_OK ){ + sqlite3_free(pCur->pKey); + pCur->pKey = 0; + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); + if( skipNext ) pCur->skipNext = skipNext; + if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ + pCur->eState = CURSOR_SKIPNEXT; + } + } + return rc; +} + +#define restoreCursorPosition(p) \ + (p->eState>=CURSOR_REQUIRESEEK ? \ + btreeRestoreCursorPosition(p) : \ + SQLITE_OK) + +/* +** Determine whether or not a cursor has moved from the position where +** it was last placed, or has been invalidated for any other reason. +** Cursors can move when the row they are pointing at is deleted out +** from under them, for example. Cursor might also move if a btree +** is rebalanced. +** +** Calling this routine with a NULL cursor pointer returns false. +** +** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor +** back to where it ought to be if this routine returns true. +*/ +SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ + assert( EIGHT_BYTE_ALIGNMENT(pCur) + || pCur==sqlite3BtreeFakeValidCursor() ); + assert( offsetof(BtCursor, eState)==0 ); + assert( sizeof(pCur->eState)==1 ); + return CURSOR_VALID != *(u8*)pCur; +} + +/* +** Return a pointer to a fake BtCursor object that will always answer +** false to the sqlite3BtreeCursorHasMoved() routine above. The fake +** cursor returned must not be used with any other Btree interface. +*/ +SQLITE_PRIVATE BtCursor *sqlite3BtreeFakeValidCursor(void){ + static u8 fakeCursor = CURSOR_VALID; + assert( offsetof(BtCursor, eState)==0 ); + return (BtCursor*)&fakeCursor; +} + +/* +** This routine restores a cursor back to its original position after it +** has been moved by some outside activity (such as a btree rebalance or +** a row having been deleted out from under the cursor). +** +** On success, the *pDifferentRow parameter is false if the cursor is left +** pointing at exactly the same row. *pDifferntRow is the row the cursor +** was pointing to has been deleted, forcing the cursor to point to some +** nearby row. +** +** This routine should only be called for a cursor that just returned +** TRUE from sqlite3BtreeCursorHasMoved(). +*/ +SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ + int rc; + + assert( pCur!=0 ); + assert( pCur->eState!=CURSOR_VALID ); + rc = restoreCursorPosition(pCur); + if( rc ){ + *pDifferentRow = 1; + return rc; + } + if( pCur->eState!=CURSOR_VALID ){ + *pDifferentRow = 1; + }else{ + *pDifferentRow = 0; + } + return SQLITE_OK; +} + +#ifdef SQLITE_ENABLE_CURSOR_HINTS +/* +** Provide hints to the cursor. The particular hint given (and the type +** and number of the varargs parameters) is determined by the eHintType +** parameter. See the definitions of the BTREE_HINT_* macros for details. +*/ +SQLITE_PRIVATE void sqlite3BtreeCursorHint(BtCursor *pCur, int eHintType, ...){ + /* Used only by system that substitute their own storage engine */ +} +#endif + +/* +** Provide flag hints to the cursor. +*/ +SQLITE_PRIVATE void sqlite3BtreeCursorHintFlags(BtCursor *pCur, unsigned x){ + assert( x==BTREE_SEEK_EQ || x==BTREE_BULKLOAD || x==0 ); + pCur->hints = x; +} + + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** Given a page number of a regular database page, return the page +** number for the pointer-map page that contains the entry for the +** input page number. +** +** Return 0 (not a valid page) for pgno==1 since there is +** no pointer map associated with page 1. The integrity_check logic +** requires that ptrmapPageno(*,1)!=1. +*/ +static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){ + int nPagesPerMapPage; + Pgno iPtrMap, ret; + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pgno<2 ) return 0; + nPagesPerMapPage = (pBt->usableSize/5)+1; + iPtrMap = (pgno-2)/nPagesPerMapPage; + ret = (iPtrMap*nPagesPerMapPage) + 2; + if( ret==PENDING_BYTE_PAGE(pBt) ){ + ret++; + } + return ret; +} + +/* +** Write an entry into the pointer map. +** +** This routine updates the pointer map entry for page number 'key' +** so that it maps to type 'eType' and parent page number 'pgno'. +** +** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is +** a no-op. If an error occurs, the appropriate error code is written +** into *pRC. +*/ +static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){ + DbPage *pDbPage; /* The pointer map page */ + u8 *pPtrmap; /* The pointer map data */ + Pgno iPtrmap; /* The pointer map page number */ + int offset; /* Offset in pointer map page */ + int rc; /* Return code from subfunctions */ + + if( *pRC ) return; + + assert( sqlite3_mutex_held(pBt->mutex) ); + /* The super-journal page number must never be used as a pointer map page */ + assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); + + assert( pBt->autoVacuum ); + if( key==0 ){ + *pRC = SQLITE_CORRUPT_BKPT; + return; + } + iPtrmap = PTRMAP_PAGENO(pBt, key); + rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0); + if( rc!=SQLITE_OK ){ + *pRC = rc; + return; + } + if( ((char*)sqlite3PagerGetExtra(pDbPage))[0]!=0 ){ + /* The first byte of the extra data is the MemPage.isInit byte. + ** If that byte is set, it means this page is also being used + ** as a btree page. */ + *pRC = SQLITE_CORRUPT_BKPT; + goto ptrmap_exit; + } + offset = PTRMAP_PTROFFSET(iPtrmap, key); + if( offset<0 ){ + *pRC = SQLITE_CORRUPT_BKPT; + goto ptrmap_exit; + } + assert( offset <= (int)pBt->usableSize-5 ); + pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); + + if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ + TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); + *pRC= rc = sqlite3PagerWrite(pDbPage); + if( rc==SQLITE_OK ){ + pPtrmap[offset] = eType; + put4byte(&pPtrmap[offset+1], parent); + } + } + +ptrmap_exit: + sqlite3PagerUnref(pDbPage); +} + +/* +** Read an entry from the pointer map. +** +** This routine retrieves the pointer map entry for page 'key', writing +** the type and parent page number to *pEType and *pPgno respectively. +** An error code is returned if something goes wrong, otherwise SQLITE_OK. +*/ +static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ + DbPage *pDbPage; /* The pointer map page */ + int iPtrmap; /* Pointer map page index */ + u8 *pPtrmap; /* Pointer map page data */ + int offset; /* Offset of entry in pointer map */ + int rc; + + assert( sqlite3_mutex_held(pBt->mutex) ); + + iPtrmap = PTRMAP_PAGENO(pBt, key); + rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0); + if( rc!=0 ){ + return rc; + } + pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); + + offset = PTRMAP_PTROFFSET(iPtrmap, key); + if( offset<0 ){ + sqlite3PagerUnref(pDbPage); + return SQLITE_CORRUPT_BKPT; + } + assert( offset <= (int)pBt->usableSize-5 ); + assert( pEType!=0 ); + *pEType = pPtrmap[offset]; + if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); + + sqlite3PagerUnref(pDbPage); + if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_PGNO(iPtrmap); + return SQLITE_OK; +} + +#else /* if defined SQLITE_OMIT_AUTOVACUUM */ + #define ptrmapPut(w,x,y,z,rc) + #define ptrmapGet(w,x,y,z) SQLITE_OK + #define ptrmapPutOvflPtr(x, y, z, rc) +#endif + +/* +** Given a btree page and a cell index (0 means the first cell on +** the page, 1 means the second cell, and so forth) return a pointer +** to the cell content. +** +** findCellPastPtr() does the same except it skips past the initial +** 4-byte child pointer found on interior pages, if there is one. +** +** This routine works only for pages that do not contain overflow cells. +*/ +#define findCell(P,I) \ + ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) +#define findCellPastPtr(P,I) \ + ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) + + +/* +** This is common tail processing for btreeParseCellPtr() and +** btreeParseCellPtrIndex() for the case when the cell does not fit entirely +** on a single B-tree page. Make necessary adjustments to the CellInfo +** structure. +*/ +static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + /* If the payload will not fit completely on the local page, we have + ** to decide how much to store locally and how much to spill onto + ** overflow pages. The strategy is to minimize the amount of unused + ** space on overflow pages while keeping the amount of local storage + ** in between minLocal and maxLocal. + ** + ** Warning: changing the way overflow payload is distributed in any + ** way will result in an incompatible file format. + */ + int minLocal; /* Minimum amount of payload held locally */ + int maxLocal; /* Maximum amount of payload held locally */ + int surplus; /* Overflow payload available for local storage */ + + minLocal = pPage->minLocal; + maxLocal = pPage->maxLocal; + surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4); + testcase( surplus==maxLocal ); + testcase( surplus==maxLocal+1 ); + if( surplus <= maxLocal ){ + pInfo->nLocal = (u16)surplus; + }else{ + pInfo->nLocal = (u16)minLocal; + } + pInfo->nSize = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell) + 4; +} + +/* +** Given a record with nPayload bytes of payload stored within btree +** page pPage, return the number of bytes of payload stored locally. +*/ +static int btreePayloadToLocal(MemPage *pPage, i64 nPayload){ + int maxLocal; /* Maximum amount of payload held locally */ + maxLocal = pPage->maxLocal; + if( nPayload<=maxLocal ){ + return nPayload; + }else{ + int minLocal; /* Minimum amount of payload held locally */ + int surplus; /* Overflow payload available for local storage */ + minLocal = pPage->minLocal; + surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize-4); + return ( surplus <= maxLocal ) ? surplus : minLocal; + } +} + +/* +** The following routines are implementations of the MemPage.xParseCell() +** method. +** +** Parse a cell content block and fill in the CellInfo structure. +** +** btreeParseCellPtr() => table btree leaf nodes +** btreeParseCellNoPayload() => table btree internal nodes +** btreeParseCellPtrIndex() => index btree nodes +** +** There is also a wrapper function btreeParseCell() that works for +** all MemPage types and that references the cell by index rather than +** by pointer. +*/ +static void btreeParseCellPtrNoPayload( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 ); + assert( pPage->childPtrSize==4 ); +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER(pPage); +#endif + pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); + pInfo->nPayload = 0; + pInfo->nLocal = 0; + pInfo->pPayload = 0; + return; +} +static void btreeParseCellPtr( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + u8 *pIter; /* For scanning through pCell */ + u32 nPayload; /* Number of bytes of cell payload */ + u64 iKey; /* Extracted Key value */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 || pPage->leaf==1 ); + assert( pPage->intKeyLeaf ); + assert( pPage->childPtrSize==0 ); + pIter = pCell; + + /* The next block of code is equivalent to: + ** + ** pIter += getVarint32(pIter, nPayload); + ** + ** The code is inlined to avoid a function call. + */ + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( (*pIter)>=0x80 && pIternKey); + ** + ** The code is inlined and the loop is unrolled for performance. + ** This routine is a high-runner. + */ + iKey = *pIter; + if( iKey>=0x80 ){ + u8 x; + iKey = ((iKey&0x7f)<<7) | ((x = *++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<7) | ((x =*++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<7) | ((x = *++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<7) | ((x = *++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<7) | ((x = *++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<7) | ((x = *++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<7) | ((x = *++pIter) & 0x7f); + if( x>=0x80 ){ + iKey = (iKey<<8) | (*++pIter); + } + } + } + } + } + } + } + } + pIter++; + + pInfo->nKey = *(i64*)&iKey; + pInfo->nPayload = nPayload; + pInfo->pPayload = pIter; + testcase( nPayload==pPage->maxLocal ); + testcase( nPayload==(u32)pPage->maxLocal+1 ); + if( nPayload<=pPage->maxLocal ){ + /* This is the (easy) common case where the entire payload fits + ** on the local page. No overflow is required. + */ + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; + pInfo->nLocal = (u16)nPayload; + }else{ + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); + } +} +static void btreeParseCellPtrIndex( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + u8 *pIter; /* For scanning through pCell */ + u32 nPayload; /* Number of bytes of cell payload */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 || pPage->leaf==1 ); + assert( pPage->intKeyLeaf==0 ); + pIter = pCell + pPage->childPtrSize; + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIternKey = nPayload; + pInfo->nPayload = nPayload; + pInfo->pPayload = pIter; + testcase( nPayload==pPage->maxLocal ); + testcase( nPayload==(u32)pPage->maxLocal+1 ); + if( nPayload<=pPage->maxLocal ){ + /* This is the (easy) common case where the entire payload fits + ** on the local page. No overflow is required. + */ + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; + pInfo->nLocal = (u16)nPayload; + }else{ + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); + } +} +static void btreeParseCell( + MemPage *pPage, /* Page containing the cell */ + int iCell, /* The cell index. First cell is 0 */ + CellInfo *pInfo /* Fill in this structure */ +){ + pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo); +} + +/* +** The following routines are implementations of the MemPage.xCellSize +** method. +** +** Compute the total number of bytes that a Cell needs in the cell +** data area of the btree-page. The return number includes the cell +** data header and the local payload, but not any overflow page or +** the space used by the cell pointer. +** +** cellSizePtrNoPayload() => table internal nodes +** cellSizePtr() => all index nodes & table leaf nodes +*/ +static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ + u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + u32 nSize; /* Size value to return */ + +#ifdef SQLITE_DEBUG + /* The value returned by this function should always be the same as + ** the (CellInfo.nSize) value found by doing a full parse of the + ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of + ** this function verifies that this invariant is not violated. */ + CellInfo debuginfo; + pPage->xParseCell(pPage, pCell, &debuginfo); +#endif + + nSize = *pIter; + if( nSize>=0x80 ){ + pEnd = &pIter[8]; + nSize &= 0x7f; + do{ + nSize = (nSize<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIterintKey ){ + /* pIter now points at the 64-bit integer key value, a variable length + ** integer. The following block moves pIter to point at the first byte + ** past the end of the key value. */ + pEnd = &pIter[9]; + while( (*pIter++)&0x80 && pItermaxLocal ); + testcase( nSize==(u32)pPage->maxLocal+1 ); + if( nSize<=pPage->maxLocal ){ + nSize += (u32)(pIter - pCell); + if( nSize<4 ) nSize = 4; + }else{ + int minLocal = pPage->minLocal; + nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4); + testcase( nSize==pPage->maxLocal ); + testcase( nSize==(u32)pPage->maxLocal+1 ); + if( nSize>pPage->maxLocal ){ + nSize = minLocal; + } + nSize += 4 + (u16)(pIter - pCell); + } + assert( nSize==debuginfo.nSize || CORRUPT_DB ); + return (u16)nSize; +} +static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){ + u8 *pIter = pCell + 4; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + +#ifdef SQLITE_DEBUG + /* The value returned by this function should always be the same as + ** the (CellInfo.nSize) value found by doing a full parse of the + ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of + ** this function verifies that this invariant is not violated. */ + CellInfo debuginfo; + pPage->xParseCell(pPage, pCell, &debuginfo); +#else + UNUSED_PARAMETER(pPage); +#endif + + assert( pPage->childPtrSize==4 ); + pEnd = pIter + 9; + while( (*pIter++)&0x80 && pIterxCellSize(pPage, findCell(pPage, iCell)); +} +#endif + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** The cell pCell is currently part of page pSrc but will ultimately be part +** of pPage. (pSrc and pPager are often the same.) If pCell contains a +** pointer to an overflow page, insert an entry into the pointer-map for +** the overflow page that will be valid after pCell has been moved to pPage. +*/ +static void ptrmapPutOvflPtr(MemPage *pPage, MemPage *pSrc, u8 *pCell,int *pRC){ + CellInfo info; + if( *pRC ) return; + assert( pCell!=0 ); + pPage->xParseCell(pPage, pCell, &info); + if( info.nLocalaDataEnd, pCell, pCell+info.nLocal) ){ + testcase( pSrc!=pPage ); + *pRC = SQLITE_CORRUPT_BKPT; + return; + } + ovfl = get4byte(&pCell[info.nSize-4]); + ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); + } +} +#endif + + +/* +** Defragment the page given. This routine reorganizes cells within the +** page so that there are no free-blocks on the free-block list. +** +** Parameter nMaxFrag is the maximum amount of fragmented space that may be +** present in the page after this routine returns. +** +** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a +** b-tree page so that there are no freeblocks or fragment bytes, all +** unused bytes are contained in the unallocated space region, and all +** cells are packed tightly at the end of the page. +*/ +static int defragmentPage(MemPage *pPage, int nMaxFrag){ + int i; /* Loop counter */ + int pc; /* Address of the i-th cell */ + int hdr; /* Offset to the page header */ + int size; /* Size of a cell */ + int usableSize; /* Number of usable bytes on a page */ + int cellOffset; /* Offset to the cell pointer array */ + int cbrk; /* Offset to the cell content area */ + int nCell; /* Number of cells on the page */ + unsigned char *data; /* The page data */ + unsigned char *temp; /* Temp area for cell content */ + unsigned char *src; /* Source of content */ + int iCellFirst; /* First allowable cell index */ + int iCellLast; /* Last possible cell index */ + int iCellStart; /* First cell offset in input */ + + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( pPage->pBt!=0 ); + assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); + assert( pPage->nOverflow==0 ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + temp = 0; + src = data = pPage->aData; + hdr = pPage->hdrOffset; + cellOffset = pPage->cellOffset; + nCell = pPage->nCell; + assert( nCell==get2byte(&data[hdr+3]) || CORRUPT_DB ); + iCellFirst = cellOffset + 2*nCell; + usableSize = pPage->pBt->usableSize; + + /* This block handles pages with two or fewer free blocks and nMaxFrag + ** or fewer fragmented bytes. In this case it is faster to move the + ** two (or one) blocks of cells using memmove() and add the required + ** offsets to each pointer in the cell-pointer array than it is to + ** reconstruct the entire page. */ + if( (int)data[hdr+7]<=nMaxFrag ){ + int iFree = get2byte(&data[hdr+1]); + if( iFree>usableSize-4 ) return SQLITE_CORRUPT_PAGE(pPage); + if( iFree ){ + int iFree2 = get2byte(&data[iFree]); + if( iFree2>usableSize-4 ) return SQLITE_CORRUPT_PAGE(pPage); + if( 0==iFree2 || (data[iFree2]==0 && data[iFree2+1]==0) ){ + u8 *pEnd = &data[cellOffset + nCell*2]; + u8 *pAddr; + int sz2 = 0; + int sz = get2byte(&data[iFree+2]); + int top = get2byte(&data[hdr+5]); + if( top>=iFree ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + if( iFree2 ){ + if( iFree+sz>iFree2 ) return SQLITE_CORRUPT_PAGE(pPage); + sz2 = get2byte(&data[iFree2+2]); + if( iFree2+sz2 > usableSize ) return SQLITE_CORRUPT_PAGE(pPage); + memmove(&data[iFree+sz+sz2], &data[iFree+sz], iFree2-(iFree+sz)); + sz += sz2; + }else if( NEVER(iFree+sz>usableSize) ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + + cbrk = top+sz; + assert( cbrk+(iFree-top) <= usableSize ); + memmove(&data[cbrk], &data[top], iFree-top); + for(pAddr=&data[cellOffset]; pAddriCellLast ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + assert( pc>=iCellStart && pc<=iCellLast ); + size = pPage->xCellSize(pPage, &src[pc]); + cbrk -= size; + if( cbrkusableSize ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + assert( cbrk+size<=usableSize && cbrk>=iCellStart ); + testcase( cbrk+size==usableSize ); + testcase( pc+size==usableSize ); + put2byte(pAddr, cbrk); + if( temp==0 ){ + if( cbrk==pc ) continue; + temp = sqlite3PagerTempSpace(pPage->pBt->pPager); + memcpy(&temp[iCellStart], &data[iCellStart], usableSize - iCellStart); + src = temp; + } + memcpy(&data[cbrk], &src[pc], size); + } + data[hdr+7] = 0; + + defragment_out: + assert( pPage->nFree>=0 ); + if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + assert( cbrk>=iCellFirst ); + put2byte(&data[hdr+5], cbrk); + data[hdr+1] = 0; + data[hdr+2] = 0; + memset(&data[iCellFirst], 0, cbrk-iCellFirst); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + return SQLITE_OK; +} + +/* +** Search the free-list on page pPg for space to store a cell nByte bytes in +** size. If one can be found, return a pointer to the space and remove it +** from the free-list. +** +** If no suitable space can be found on the free-list, return NULL. +** +** This function may detect corruption within pPg. If corruption is +** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. +** +** Slots on the free list that are between 1 and 3 bytes larger than nByte +** will be ignored if adding the extra space to the fragmentation count +** causes the fragmentation count to exceed 60. +*/ +static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ + const int hdr = pPg->hdrOffset; /* Offset to page header */ + u8 * const aData = pPg->aData; /* Page data */ + int iAddr = hdr + 1; /* Address of ptr to pc */ + int pc = get2byte(&aData[iAddr]); /* Address of a free slot */ + int x; /* Excess size of the slot */ + int maxPC = pPg->pBt->usableSize - nByte; /* Max address for a usable slot */ + int size; /* Size of the free slot */ + + assert( pc>0 ); + while( pc<=maxPC ){ + /* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each + ** freeblock form a big-endian integer which is the size of the freeblock + ** in bytes, including the 4-byte header. */ + size = get2byte(&aData[pc+2]); + if( (x = size - nByte)>=0 ){ + testcase( x==4 ); + testcase( x==3 ); + if( x<4 ){ + /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total + ** number of bytes in fragments may not exceed 60. */ + if( aData[hdr+7]>57 ) return 0; + + /* Remove the slot from the free-list. Update the number of + ** fragmented bytes within the page. */ + memcpy(&aData[iAddr], &aData[pc], 2); + aData[hdr+7] += (u8)x; + testcase( pc+x>maxPC ); + return &aData[pc]; + }else if( x+pc > maxPC ){ + /* This slot extends off the end of the usable part of the page */ + *pRc = SQLITE_CORRUPT_PAGE(pPg); + return 0; + }else{ + /* The slot remains on the free-list. Reduce its size to account + ** for the portion used by the new allocation. */ + put2byte(&aData[pc+2], x); + } + return &aData[pc + x]; + } + iAddr = pc; + pc = get2byte(&aData[pc]); + if( pc<=iAddr+size ){ + if( pc ){ + /* The next slot in the chain is not past the end of the current slot */ + *pRc = SQLITE_CORRUPT_PAGE(pPg); + } + return 0; + } + } + if( pc>maxPC+nByte-4 ){ + /* The free slot chain extends off the end of the page */ + *pRc = SQLITE_CORRUPT_PAGE(pPg); + } + return 0; +} + +/* +** Allocate nByte bytes of space from within the B-Tree page passed +** as the first argument. Write into *pIdx the index into pPage->aData[] +** of the first byte of allocated space. Return either SQLITE_OK or +** an error code (usually SQLITE_CORRUPT). +** +** The caller guarantees that there is sufficient space to make the +** allocation. This routine might need to defragment in order to bring +** all the space together, however. This routine will avoid using +** the first two bytes past the cell pointer area since presumably this +** allocation is being made in order to insert a new cell, so we will +** also end up needing a new cell pointer. +*/ +static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ + const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */ + u8 * const data = pPage->aData; /* Local cache of pPage->aData */ + int top; /* First byte of cell content area */ + int rc = SQLITE_OK; /* Integer return code */ + int gap; /* First byte of gap between cell pointers and cell content */ + + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( pPage->pBt ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( nByte>=0 ); /* Minimum cell size is 4 */ + assert( pPage->nFree>=nByte ); + assert( pPage->nOverflow==0 ); + assert( nByte < (int)(pPage->pBt->usableSize-8) ); + + assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); + gap = pPage->cellOffset + 2*pPage->nCell; + assert( gap<=65536 ); + /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size + ** and the reserved space is zero (the usual value for reserved space) + ** then the cell content offset of an empty page wants to be 65536. + ** However, that integer is too large to be stored in a 2-byte unsigned + ** integer, so a value of 0 is used in its place. */ + top = get2byte(&data[hdr+5]); + assert( top<=(int)pPage->pBt->usableSize ); /* by btreeComputeFreeSpace() */ + if( gap>top ){ + if( top==0 && pPage->pBt->usableSize==65536 ){ + top = 65536; + }else{ + return SQLITE_CORRUPT_PAGE(pPage); + } + } + + /* If there is enough space between gap and top for one more cell pointer, + ** and if the freelist is not empty, then search the + ** freelist looking for a slot big enough to satisfy the request. + */ + testcase( gap+2==top ); + testcase( gap+1==top ); + testcase( gap==top ); + if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ + u8 *pSpace = pageFindSlot(pPage, nByte, &rc); + if( pSpace ){ + int g2; + assert( pSpace+nByte<=data+pPage->pBt->usableSize ); + *pIdx = g2 = (int)(pSpace-data); + if( g2<=gap ){ + return SQLITE_CORRUPT_PAGE(pPage); + }else{ + return SQLITE_OK; + } + }else if( rc ){ + return rc; + } + } + + /* The request could not be fulfilled using a freelist slot. Check + ** to see if defragmentation is necessary. + */ + testcase( gap+2+nByte==top ); + if( gap+2+nByte>top ){ + assert( pPage->nCell>0 || CORRUPT_DB ); + assert( pPage->nFree>=0 ); + rc = defragmentPage(pPage, MIN(4, pPage->nFree - (2+nByte))); + if( rc ) return rc; + top = get2byteNotZero(&data[hdr+5]); + assert( gap+2+nByte<=top ); + } + + + /* Allocate memory from the gap in between the cell pointer array + ** and the cell content area. The btreeComputeFreeSpace() call has already + ** validated the freelist. Given that the freelist is valid, there + ** is no way that the allocation can extend off the end of the page. + ** The assert() below verifies the previous sentence. + */ + top -= nByte; + put2byte(&data[hdr+5], top); + assert( top+nByte <= (int)pPage->pBt->usableSize ); + *pIdx = top; + return SQLITE_OK; +} + +/* +** Return a section of the pPage->aData to the freelist. +** The first byte of the new free block is pPage->aData[iStart] +** and the size of the block is iSize bytes. +** +** Adjacent freeblocks are coalesced. +** +** Even though the freeblock list was checked by btreeComputeFreeSpace(), +** that routine will not detect overlap between cells or freeblocks. Nor +** does it detect cells or freeblocks that encrouch into the reserved bytes +** at the end of the page. So do additional corruption checks inside this +** routine and return SQLITE_CORRUPT if any problems are found. +*/ +static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ + u16 iPtr; /* Address of ptr to next freeblock */ + u16 iFreeBlk; /* Address of the next freeblock */ + u8 hdr; /* Page header size. 0 or 100 */ + u8 nFrag = 0; /* Reduction in fragmentation */ + u16 iOrigSize = iSize; /* Original value of iSize */ + u16 x; /* Offset to cell content area */ + u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */ + unsigned char *data = pPage->aData; /* Page content */ + + assert( pPage->pBt!=0 ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); + assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( iSize>=4 ); /* Minimum cell size is 4 */ + assert( iStart<=pPage->pBt->usableSize-4 ); + + /* The list of freeblocks must be in ascending order. Find the + ** spot on the list where iStart should be inserted. + */ + hdr = pPage->hdrOffset; + iPtr = hdr + 1; + if( data[iPtr+1]==0 && data[iPtr]==0 ){ + iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ + }else{ + while( (iFreeBlk = get2byte(&data[iPtr]))pPage->pBt->usableSize-4 ){ /* TH3: corrupt081.100 */ + return SQLITE_CORRUPT_PAGE(pPage); + } + assert( iFreeBlk>iPtr || iFreeBlk==0 || CORRUPT_DB ); + + /* At this point: + ** iFreeBlk: First freeblock after iStart, or zero if none + ** iPtr: The address of a pointer to iFreeBlk + ** + ** Check to see if iFreeBlk should be coalesced onto the end of iStart. + */ + if( iFreeBlk && iEnd+3>=iFreeBlk ){ + nFrag = iFreeBlk - iEnd; + if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_PAGE(pPage); + iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); + if( iEnd > pPage->pBt->usableSize ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + iSize = iEnd - iStart; + iFreeBlk = get2byte(&data[iFreeBlk]); + } + + /* If iPtr is another freeblock (that is, if iPtr is not the freelist + ** pointer in the page header) then check to see if iStart should be + ** coalesced onto the end of iPtr. + */ + if( iPtr>hdr+1 ){ + int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); + if( iPtrEnd+3>=iStart ){ + if( iPtrEnd>iStart ) return SQLITE_CORRUPT_PAGE(pPage); + nFrag += iStart - iPtrEnd; + iSize = iEnd - iPtr; + iStart = iPtr; + } + } + if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_PAGE(pPage); + data[hdr+7] -= nFrag; + } + x = get2byte(&data[hdr+5]); + if( iStart<=x ){ + /* The new freeblock is at the beginning of the cell content area, + ** so just extend the cell content area rather than create another + ** freelist entry */ + if( iStartpBt->btsFlags & BTS_FAST_SECURE ){ + /* Overwrite deleted information with zeros when the secure_delete + ** option is enabled */ + memset(&data[iStart], 0, iSize); + } + put2byte(&data[iStart], iFreeBlk); + put2byte(&data[iStart+2], iSize); + pPage->nFree += iOrigSize; + return SQLITE_OK; +} + +/* +** Decode the flags byte (the first byte of the header) for a page +** and initialize fields of the MemPage structure accordingly. +** +** Only the following combinations are supported. Anything different +** indicates a corrupt database files: +** +** PTF_ZERODATA +** PTF_ZERODATA | PTF_LEAF +** PTF_LEAFDATA | PTF_INTKEY +** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF +*/ +static int decodeFlags(MemPage *pPage, int flagByte){ + BtShared *pBt; /* A copy of pPage->pBt */ + + assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); + flagByte &= ~PTF_LEAF; + pPage->childPtrSize = 4-4*pPage->leaf; + pPage->xCellSize = cellSizePtr; + pBt = pPage->pBt; + if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ + /* EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an + ** interior table b-tree page. */ + assert( (PTF_LEAFDATA|PTF_INTKEY)==5 ); + /* EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a + ** leaf table b-tree page. */ + assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); + pPage->intKey = 1; + if( pPage->leaf ){ + pPage->intKeyLeaf = 1; + pPage->xParseCell = btreeParseCellPtr; + }else{ + pPage->intKeyLeaf = 0; + pPage->xCellSize = cellSizePtrNoPayload; + pPage->xParseCell = btreeParseCellPtrNoPayload; + } + pPage->maxLocal = pBt->maxLeaf; + pPage->minLocal = pBt->minLeaf; + }else if( flagByte==PTF_ZERODATA ){ + /* EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an + ** interior index b-tree page. */ + assert( (PTF_ZERODATA)==2 ); + /* EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a + ** leaf index b-tree page. */ + assert( (PTF_ZERODATA|PTF_LEAF)==10 ); + pPage->intKey = 0; + pPage->intKeyLeaf = 0; + pPage->xParseCell = btreeParseCellPtrIndex; + pPage->maxLocal = pBt->maxLocal; + pPage->minLocal = pBt->minLocal; + }else{ + /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is + ** an error. */ + return SQLITE_CORRUPT_PAGE(pPage); + } + pPage->max1bytePayload = pBt->max1bytePayload; + return SQLITE_OK; +} + +/* +** Compute the amount of freespace on the page. In other words, fill +** in the pPage->nFree field. +*/ +static int btreeComputeFreeSpace(MemPage *pPage){ + int pc; /* Address of a freeblock within pPage->aData[] */ + u8 hdr; /* Offset to beginning of page header */ + u8 *data; /* Equal to pPage->aData */ + int usableSize; /* Amount of usable space on each page */ + int nFree; /* Number of unused bytes on the page */ + int top; /* First byte of the cell content area */ + int iCellFirst; /* First allowable cell or freeblock offset */ + int iCellLast; /* Last possible cell or freeblock offset */ + + assert( pPage->pBt!=0 ); + assert( pPage->pBt->db!=0 ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); + assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); + assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); + assert( pPage->isInit==1 ); + assert( pPage->nFree<0 ); + + usableSize = pPage->pBt->usableSize; + hdr = pPage->hdrOffset; + data = pPage->aData; + /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates + ** the start of the cell content area. A zero value for this integer is + ** interpreted as 65536. */ + top = get2byteNotZero(&data[hdr+5]); + iCellFirst = hdr + 8 + pPage->childPtrSize + 2*pPage->nCell; + iCellLast = usableSize - 4; + + /* Compute the total free space on the page + ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the + ** start of the first freeblock on the page, or is zero if there are no + ** freeblocks. */ + pc = get2byte(&data[hdr+1]); + nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ + if( pc>0 ){ + u32 next, size; + if( pciCellLast ){ + /* Freeblock off the end of the page */ + return SQLITE_CORRUPT_PAGE(pPage); + } + next = get2byte(&data[pc]); + size = get2byte(&data[pc+2]); + nFree = nFree + size; + if( next<=pc+size+3 ) break; + pc = next; + } + if( next>0 ){ + /* Freeblock not in ascending order */ + return SQLITE_CORRUPT_PAGE(pPage); + } + if( pc+size>(unsigned int)usableSize ){ + /* Last freeblock extends past page end */ + return SQLITE_CORRUPT_PAGE(pPage); + } + } + + /* At this point, nFree contains the sum of the offset to the start + ** of the cell-content area plus the number of free bytes within + ** the cell-content area. If this is greater than the usable-size + ** of the page, then the page must be corrupted. This check also + ** serves to verify that the offset to the start of the cell-content + ** area, according to the page header, lies within the page. + */ + if( nFree>usableSize || nFreenFree = (u16)(nFree - iCellFirst); + return SQLITE_OK; +} + +/* +** Do additional sanity check after btreeInitPage() if +** PRAGMA cell_size_check=ON +*/ +static SQLITE_NOINLINE int btreeCellSizeCheck(MemPage *pPage){ + int iCellFirst; /* First allowable cell or freeblock offset */ + int iCellLast; /* Last possible cell or freeblock offset */ + int i; /* Index into the cell pointer array */ + int sz; /* Size of a cell */ + int pc; /* Address of a freeblock within pPage->aData[] */ + u8 *data; /* Equal to pPage->aData */ + int usableSize; /* Maximum usable space on the page */ + int cellOffset; /* Start of cell content area */ + + iCellFirst = pPage->cellOffset + 2*pPage->nCell; + usableSize = pPage->pBt->usableSize; + iCellLast = usableSize - 4; + data = pPage->aData; + cellOffset = pPage->cellOffset; + if( !pPage->leaf ) iCellLast--; + for(i=0; inCell; i++){ + pc = get2byteAligned(&data[cellOffset+i*2]); + testcase( pc==iCellFirst ); + testcase( pc==iCellLast ); + if( pciCellLast ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + sz = pPage->xCellSize(pPage, &data[pc]); + testcase( pc+sz==usableSize ); + if( pc+sz>usableSize ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + } + return SQLITE_OK; +} + +/* +** Initialize the auxiliary information for a disk block. +** +** Return SQLITE_OK on success. If we see that the page does +** not contain a well-formed database page, then return +** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not +** guarantee that the page is well-formed. It only shows that +** we failed to detect any corruption. +*/ +static int btreeInitPage(MemPage *pPage){ + u8 *data; /* Equal to pPage->aData */ + BtShared *pBt; /* The main btree structure */ + + assert( pPage->pBt!=0 ); + assert( pPage->pBt->db!=0 ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); + assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); + assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); + assert( pPage->isInit==0 ); + + pBt = pPage->pBt; + data = pPage->aData + pPage->hdrOffset; + /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating + ** the b-tree page type. */ + if( decodeFlags(pPage, data[0]) ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); + pPage->maskPage = (u16)(pBt->pageSize - 1); + pPage->nOverflow = 0; + pPage->cellOffset = pPage->hdrOffset + 8 + pPage->childPtrSize; + pPage->aCellIdx = data + pPage->childPtrSize + 8; + pPage->aDataEnd = pPage->aData + pBt->pageSize; + pPage->aDataOfst = pPage->aData + pPage->childPtrSize; + /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the + ** number of cells on the page. */ + pPage->nCell = get2byte(&data[3]); + if( pPage->nCell>MX_CELL(pBt) ){ + /* To many cells for a single page. The page must be corrupt */ + return SQLITE_CORRUPT_PAGE(pPage); + } + testcase( pPage->nCell==MX_CELL(pBt) ); + /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only + ** possible for a root page of a table that contains no rows) then the + ** offset to the cell content area will equal the page size minus the + ** bytes of reserved space. */ + assert( pPage->nCell>0 + || get2byteNotZero(&data[5])==(int)pBt->usableSize + || CORRUPT_DB ); + pPage->nFree = -1; /* Indicate that this value is yet uncomputed */ + pPage->isInit = 1; + if( pBt->db->flags & SQLITE_CellSizeCk ){ + return btreeCellSizeCheck(pPage); + } + return SQLITE_OK; +} + +/* +** Set up a raw page so that it looks like a database page holding +** no entries. +*/ +static void zeroPage(MemPage *pPage, int flags){ + unsigned char *data = pPage->aData; + BtShared *pBt = pPage->pBt; + u8 hdr = pPage->hdrOffset; + u16 first; + + assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno || CORRUPT_DB ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage) == data ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pBt->btsFlags & BTS_FAST_SECURE ){ + memset(&data[hdr], 0, pBt->usableSize - hdr); + } + data[hdr] = (char)flags; + first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); + memset(&data[hdr+1], 0, 4); + data[hdr+7] = 0; + put2byte(&data[hdr+5], pBt->usableSize); + pPage->nFree = (u16)(pBt->usableSize - first); + decodeFlags(pPage, flags); + pPage->cellOffset = first; + pPage->aDataEnd = &data[pBt->pageSize]; + pPage->aCellIdx = &data[first]; + pPage->aDataOfst = &data[pPage->childPtrSize]; + pPage->nOverflow = 0; + assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); + pPage->maskPage = (u16)(pBt->pageSize - 1); + pPage->nCell = 0; + pPage->isInit = 1; +} + + +/* +** Convert a DbPage obtained from the pager into a MemPage used by +** the btree layer. +*/ +static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ + MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); + if( pgno!=pPage->pgno ){ + pPage->aData = sqlite3PagerGetData(pDbPage); + pPage->pDbPage = pDbPage; + pPage->pBt = pBt; + pPage->pgno = pgno; + pPage->hdrOffset = pgno==1 ? 100 : 0; + } + assert( pPage->aData==sqlite3PagerGetData(pDbPage) ); + return pPage; +} + +/* +** Get a page from the pager. Initialize the MemPage.pBt and +** MemPage.aData elements if needed. See also: btreeGetUnusedPage(). +** +** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care +** about the content of the page at this time. So do not go to the disk +** to fetch the content. Just fill in the content with zeros for now. +** If in the future we call sqlite3PagerWrite() on this page, that +** means we have started to be concerned about content and the disk +** read should occur at that point. +*/ +static int btreeGetPage( + BtShared *pBt, /* The btree */ + Pgno pgno, /* Number of the page to fetch */ + MemPage **ppPage, /* Return the page in this parameter */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ +){ + int rc; + DbPage *pDbPage; + + assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); + assert( sqlite3_mutex_held(pBt->mutex) ); + rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); + if( rc ) return rc; + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + return SQLITE_OK; +} + +/* +** Retrieve a page from the pager cache. If the requested page is not +** already in the pager cache return NULL. Initialize the MemPage.pBt and +** MemPage.aData elements if needed. +*/ +static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){ + DbPage *pDbPage; + assert( sqlite3_mutex_held(pBt->mutex) ); + pDbPage = sqlite3PagerLookup(pBt->pPager, pgno); + if( pDbPage ){ + return btreePageFromDbPage(pDbPage, pgno, pBt); + } + return 0; +} + +/* +** Return the size of the database file in pages. If there is any kind of +** error, return ((unsigned int)-1). +*/ +static Pgno btreePagecount(BtShared *pBt){ + return pBt->nPage; +} +SQLITE_PRIVATE Pgno sqlite3BtreeLastPage(Btree *p){ + assert( sqlite3BtreeHoldsMutex(p) ); + return btreePagecount(p->pBt); +} + +/* +** Get a page from the pager and initialize it. +** +** If pCur!=0 then the page is being fetched as part of a moveToChild() +** call. Do additional sanity checking on the page in this case. +** And if the fetch fails, this routine must decrement pCur->iPage. +** +** The page is fetched as read-write unless pCur is not NULL and is +** a read-only cursor. +** +** If an error occurs, then *ppPage is undefined. It +** may remain unchanged, or it may be set to an invalid value. +*/ +static int getAndInitPage( + BtShared *pBt, /* The database file */ + Pgno pgno, /* Number of the page to get */ + MemPage **ppPage, /* Write the page pointer here */ + BtCursor *pCur, /* Cursor to receive the page, or NULL */ + int bReadOnly /* True for a read-only page */ +){ + int rc; + DbPage *pDbPage; + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pCur==0 || ppPage==&pCur->pPage ); + assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); + assert( pCur==0 || pCur->iPage>0 ); + + if( pgno>btreePagecount(pBt) ){ + rc = SQLITE_CORRUPT_BKPT; + goto getAndInitPage_error1; + } + rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); + if( rc ){ + goto getAndInitPage_error1; + } + *ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); + if( (*ppPage)->isInit==0 ){ + btreePageFromDbPage(pDbPage, pgno, pBt); + rc = btreeInitPage(*ppPage); + if( rc!=SQLITE_OK ){ + goto getAndInitPage_error2; + } + } + assert( (*ppPage)->pgno==pgno || CORRUPT_DB ); + assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) ); + + /* If obtaining a child page for a cursor, we must verify that the page is + ** compatible with the root page. */ + if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){ + rc = SQLITE_CORRUPT_PGNO(pgno); + goto getAndInitPage_error2; + } + return SQLITE_OK; + +getAndInitPage_error2: + releasePage(*ppPage); +getAndInitPage_error1: + if( pCur ){ + pCur->iPage--; + pCur->pPage = pCur->apPage[pCur->iPage]; + } + testcase( pgno==0 ); + assert( pgno!=0 || rc==SQLITE_CORRUPT ); + return rc; +} + +/* +** Release a MemPage. This should be called once for each prior +** call to btreeGetPage. +** +** Page1 is a special case and must be released using releasePageOne(). +*/ +static void releasePageNotNull(MemPage *pPage){ + assert( pPage->aData ); + assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + sqlite3PagerUnrefNotNull(pPage->pDbPage); +} +static void releasePage(MemPage *pPage){ + if( pPage ) releasePageNotNull(pPage); +} +static void releasePageOne(MemPage *pPage){ + assert( pPage!=0 ); + assert( pPage->aData ); + assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + sqlite3PagerUnrefPageOne(pPage->pDbPage); +} + +/* +** Get an unused page. +** +** This works just like btreeGetPage() with the addition: +** +** * If the page is already in use for some other purpose, immediately +** release it and return an SQLITE_CURRUPT error. +** * Make sure the isInit flag is clear +*/ +static int btreeGetUnusedPage( + BtShared *pBt, /* The btree */ + Pgno pgno, /* Number of the page to fetch */ + MemPage **ppPage, /* Return the page in this parameter */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ +){ + int rc = btreeGetPage(pBt, pgno, ppPage, flags); + if( rc==SQLITE_OK ){ + if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ + releasePage(*ppPage); + *ppPage = 0; + return SQLITE_CORRUPT_BKPT; + } + (*ppPage)->isInit = 0; + }else{ + *ppPage = 0; + } + return rc; +} + + +/* +** During a rollback, when the pager reloads information into the cache +** so that the cache is restored to its original state at the start of +** the transaction, for each page restored this routine is called. +** +** This routine needs to reset the extra data section at the end of the +** page to agree with the restored data. +*/ +static void pageReinit(DbPage *pData){ + MemPage *pPage; + pPage = (MemPage *)sqlite3PagerGetExtra(pData); + assert( sqlite3PagerPageRefcount(pData)>0 ); + if( pPage->isInit ){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + pPage->isInit = 0; + if( sqlite3PagerPageRefcount(pData)>1 ){ + /* pPage might not be a btree page; it might be an overflow page + ** or ptrmap page or a free page. In those cases, the following + ** call to btreeInitPage() will likely return SQLITE_CORRUPT. + ** But no harm is done by this. And it is very important that + ** btreeInitPage() be called on every btree page so we make + ** the call for every page that comes in for re-initing. */ + btreeInitPage(pPage); + } + } +} + +/* +** Invoke the busy handler for a btree. +*/ +static int btreeInvokeBusyHandler(void *pArg){ + BtShared *pBt = (BtShared*)pArg; + assert( pBt->db ); + assert( sqlite3_mutex_held(pBt->db->mutex) ); + return sqlite3InvokeBusyHandler(&pBt->db->busyHandler); +} + +/* +** Open a database file. +** +** zFilename is the name of the database file. If zFilename is NULL +** then an ephemeral database is created. The ephemeral database might +** be exclusively in memory, or it might use a disk-based memory cache. +** Either way, the ephemeral database will be automatically deleted +** when sqlite3BtreeClose() is called. +** +** If zFilename is ":memory:" then an in-memory database is created +** that is automatically destroyed when it is closed. +** +** The "flags" parameter is a bitmask that might contain bits like +** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. +** +** If the database is already opened in the same database connection +** and we are in shared cache mode, then the open will fail with an +** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared +** objects in the same database connection since doing so will lead +** to problems with locking. +*/ +SQLITE_PRIVATE int sqlite3BtreeOpen( + sqlite3_vfs *pVfs, /* VFS to use for this b-tree */ + const char *zFilename, /* Name of the file containing the BTree database */ + sqlite3 *db, /* Associated database handle */ + Btree **ppBtree, /* Pointer to new Btree object written here */ + int flags, /* Options */ + int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ +){ + BtShared *pBt = 0; /* Shared part of btree structure */ + Btree *p; /* Handle to return */ + sqlite3_mutex *mutexOpen = 0; /* Prevents a race condition. Ticket #3537 */ + int rc = SQLITE_OK; /* Result code from this function */ + u8 nReserve; /* Byte of unused space on each page */ + unsigned char zDbHeader[100]; /* Database header content */ + + /* True if opening an ephemeral, temporary database */ + const int isTempDb = zFilename==0 || zFilename[0]==0; + + /* Set the variable isMemdb to true for an in-memory database, or + ** false for a file-based database. + */ +#ifdef SQLITE_OMIT_MEMORYDB + const int isMemdb = 0; +#else + const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0) + || (isTempDb && sqlite3TempInMemory(db)) + || (vfsFlags & SQLITE_OPEN_MEMORY)!=0; +#endif + + assert( db!=0 ); + assert( pVfs!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); + assert( (flags&0xff)==flags ); /* flags fit in 8 bits */ + + /* Only a BTREE_SINGLE database can be BTREE_UNORDERED */ + assert( (flags & BTREE_UNORDERED)==0 || (flags & BTREE_SINGLE)!=0 ); + + /* A BTREE_SINGLE database is always a temporary and/or ephemeral */ + assert( (flags & BTREE_SINGLE)==0 || isTempDb ); + + if( isMemdb ){ + flags |= BTREE_MEMORY; + } + if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){ + vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB; + } + p = sqlite3MallocZero(sizeof(Btree)); + if( !p ){ + return SQLITE_NOMEM_BKPT; + } + p->inTrans = TRANS_NONE; + p->db = db; +#ifndef SQLITE_OMIT_SHARED_CACHE + p->lock.pBtree = p; + p->lock.iTable = 1; +#endif + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) + /* + ** If this Btree is a candidate for shared cache, try to find an + ** existing BtShared object that we can share with + */ + if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){ + if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ + int nFilename = sqlite3Strlen30(zFilename)+1; + int nFullPathname = pVfs->mxPathname+1; + char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename)); + MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) + + p->sharable = 1; + if( !zFullPathname ){ + sqlite3_free(p); + return SQLITE_NOMEM_BKPT; + } + if( isMemdb ){ + memcpy(zFullPathname, zFilename, nFilename); + }else{ + rc = sqlite3OsFullPathname(pVfs, zFilename, + nFullPathname, zFullPathname); + if( rc ){ + if( rc==SQLITE_OK_SYMLINK ){ + rc = SQLITE_OK; + }else{ + sqlite3_free(zFullPathname); + sqlite3_free(p); + return rc; + } + } + } +#if SQLITE_THREADSAFE + mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN); + sqlite3_mutex_enter(mutexOpen); + mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); + sqlite3_mutex_enter(mutexShared); +#endif + for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){ + assert( pBt->nRef>0 ); + if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0)) + && sqlite3PagerVfs(pBt->pPager)==pVfs ){ + int iDb; + for(iDb=db->nDb-1; iDb>=0; iDb--){ + Btree *pExisting = db->aDb[iDb].pBt; + if( pExisting && pExisting->pBt==pBt ){ + sqlite3_mutex_leave(mutexShared); + sqlite3_mutex_leave(mutexOpen); + sqlite3_free(zFullPathname); + sqlite3_free(p); + return SQLITE_CONSTRAINT; + } + } + p->pBt = pBt; + pBt->nRef++; + break; + } + } + sqlite3_mutex_leave(mutexShared); + sqlite3_free(zFullPathname); + } +#ifdef SQLITE_DEBUG + else{ + /* In debug mode, we mark all persistent databases as sharable + ** even when they are not. This exercises the locking code and + ** gives more opportunity for asserts(sqlite3_mutex_held()) + ** statements to find locking problems. + */ + p->sharable = 1; + } +#endif + } +#endif + if( pBt==0 ){ + /* + ** The following asserts make sure that structures used by the btree are + ** the right size. This is to guard against size changes that result + ** when compiling on a different architecture. + */ + assert( sizeof(i64)==8 ); + assert( sizeof(u64)==8 ); + assert( sizeof(u32)==4 ); + assert( sizeof(u16)==2 ); + assert( sizeof(Pgno)==4 ); + + pBt = sqlite3MallocZero( sizeof(*pBt) ); + if( pBt==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto btree_open_out; + } + rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, + sizeof(MemPage), flags, vfsFlags, pageReinit); + if( rc==SQLITE_OK ){ + sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap); + rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); + } + if( rc!=SQLITE_OK ){ + goto btree_open_out; + } + pBt->openFlags = (u8)flags; + pBt->db = db; + sqlite3PagerSetBusyHandler(pBt->pPager, btreeInvokeBusyHandler, pBt); + p->pBt = pBt; + + pBt->pCursor = 0; + pBt->pPage1 = 0; + if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY; +#if defined(SQLITE_SECURE_DELETE) + pBt->btsFlags |= BTS_SECURE_DELETE; +#elif defined(SQLITE_FAST_SECURE_DELETE) + pBt->btsFlags |= BTS_OVERWRITE; +#endif + /* EVIDENCE-OF: R-51873-39618 The page size for a database file is + ** determined by the 2-byte integer located at an offset of 16 bytes from + ** the beginning of the database file. */ + pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16); + if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE + || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ + pBt->pageSize = 0; +#ifndef SQLITE_OMIT_AUTOVACUUM + /* If the magic name ":memory:" will create an in-memory database, then + ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if + ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if + ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a + ** regular file-name. In this case the auto-vacuum applies as per normal. + */ + if( zFilename && !isMemdb ){ + pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); + pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); + } +#endif + nReserve = 0; + }else{ + /* EVIDENCE-OF: R-37497-42412 The size of the reserved region is + ** determined by the one-byte unsigned integer found at an offset of 20 + ** into the database file header. */ + nReserve = zDbHeader[20]; + pBt->btsFlags |= BTS_PAGESIZE_FIXED; +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); + pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); +#endif + } + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); + if( rc ) goto btree_open_out; + pBt->usableSize = pBt->pageSize - nReserve; + assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) + /* Add the new BtShared object to the linked list sharable BtShareds. + */ + pBt->nRef = 1; + if( p->sharable ){ + MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) + MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN);) + if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ + pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); + if( pBt->mutex==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto btree_open_out; + } + } + sqlite3_mutex_enter(mutexShared); + pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList); + GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt; + sqlite3_mutex_leave(mutexShared); + } +#endif + } + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) + /* If the new Btree uses a sharable pBtShared, then link the new + ** Btree into the list of all sharable Btrees for the same connection. + ** The list is kept in ascending order by pBt address. + */ + if( p->sharable ){ + int i; + Btree *pSib; + for(i=0; inDb; i++){ + if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){ + while( pSib->pPrev ){ pSib = pSib->pPrev; } + if( (uptr)p->pBt<(uptr)pSib->pBt ){ + p->pNext = pSib; + p->pPrev = 0; + pSib->pPrev = p; + }else{ + while( pSib->pNext && (uptr)pSib->pNext->pBt<(uptr)p->pBt ){ + pSib = pSib->pNext; + } + p->pNext = pSib->pNext; + p->pPrev = pSib; + if( p->pNext ){ + p->pNext->pPrev = p; + } + pSib->pNext = p; + } + break; + } + } + } +#endif + *ppBtree = p; + +btree_open_out: + if( rc!=SQLITE_OK ){ + if( pBt && pBt->pPager ){ + sqlite3PagerClose(pBt->pPager, 0); + } + sqlite3_free(pBt); + sqlite3_free(p); + *ppBtree = 0; + }else{ + sqlite3_file *pFile; + + /* If the B-Tree was successfully opened, set the pager-cache size to the + ** default value. Except, when opening on an existing shared pager-cache, + ** do not change the pager-cache size. + */ + if( sqlite3BtreeSchema(p, 0, 0)==0 ){ + sqlite3BtreeSetCacheSize(p, SQLITE_DEFAULT_CACHE_SIZE); + } + + pFile = sqlite3PagerFile(pBt->pPager); + if( pFile->pMethods ){ + sqlite3OsFileControlHint(pFile, SQLITE_FCNTL_PDB, (void*)&pBt->db); + } + } + if( mutexOpen ){ + assert( sqlite3_mutex_held(mutexOpen) ); + sqlite3_mutex_leave(mutexOpen); + } + assert( rc!=SQLITE_OK || sqlite3BtreeConnectionCount(*ppBtree)>0 ); + return rc; +} + +/* +** Decrement the BtShared.nRef counter. When it reaches zero, +** remove the BtShared structure from the sharing list. Return +** true if the BtShared.nRef counter reaches zero and return +** false if it is still positive. +*/ +static int removeFromSharingList(BtShared *pBt){ +#ifndef SQLITE_OMIT_SHARED_CACHE + MUTEX_LOGIC( sqlite3_mutex *pMainMtx; ) + BtShared *pList; + int removed = 0; + + assert( sqlite3_mutex_notheld(pBt->mutex) ); + MUTEX_LOGIC( pMainMtx = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); ) + sqlite3_mutex_enter(pMainMtx); + pBt->nRef--; + if( pBt->nRef<=0 ){ + if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){ + GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext; + }else{ + pList = GLOBAL(BtShared*,sqlite3SharedCacheList); + while( ALWAYS(pList) && pList->pNext!=pBt ){ + pList=pList->pNext; + } + if( ALWAYS(pList) ){ + pList->pNext = pBt->pNext; + } + } + if( SQLITE_THREADSAFE ){ + sqlite3_mutex_free(pBt->mutex); + } + removed = 1; + } + sqlite3_mutex_leave(pMainMtx); + return removed; +#else + return 1; +#endif +} + +/* +** Make sure pBt->pTmpSpace points to an allocation of +** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child +** pointer. +*/ +static SQLITE_NOINLINE int allocateTempSpace(BtShared *pBt){ + assert( pBt!=0 ); + assert( pBt->pTmpSpace==0 ); + /* This routine is called only by btreeCursor() when allocating the + ** first write cursor for the BtShared object */ + assert( pBt->pCursor!=0 && (pBt->pCursor->curFlags & BTCF_WriteFlag)!=0 ); + pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); + if( pBt->pTmpSpace==0 ){ + BtCursor *pCur = pBt->pCursor; + pBt->pCursor = pCur->pNext; /* Unlink the cursor */ + memset(pCur, 0, sizeof(*pCur)); + return SQLITE_NOMEM_BKPT; + } + + /* One of the uses of pBt->pTmpSpace is to format cells before + ** inserting them into a leaf page (function fillInCell()). If + ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes + ** by the various routines that manipulate binary cells. Which + ** can mean that fillInCell() only initializes the first 2 or 3 + ** bytes of pTmpSpace, but that the first 4 bytes are copied from + ** it into a database page. This is not actually a problem, but it + ** does cause a valgrind error when the 1 or 2 bytes of unitialized + ** data is passed to system call write(). So to avoid this error, + ** zero the first 4 bytes of temp space here. + ** + ** Also: Provide four bytes of initialized space before the + ** beginning of pTmpSpace as an area available to prepend the + ** left-child pointer to the beginning of a cell. + */ + memset(pBt->pTmpSpace, 0, 8); + pBt->pTmpSpace += 4; + return SQLITE_OK; +} + +/* +** Free the pBt->pTmpSpace allocation +*/ +static void freeTempSpace(BtShared *pBt){ + if( pBt->pTmpSpace ){ + pBt->pTmpSpace -= 4; + sqlite3PageFree(pBt->pTmpSpace); + pBt->pTmpSpace = 0; + } +} + +/* +** Close an open database and invalidate all cursors. +*/ +SQLITE_PRIVATE int sqlite3BtreeClose(Btree *p){ + BtShared *pBt = p->pBt; + + /* Close all cursors opened via this handle. */ + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + + /* Verify that no other cursors have this Btree open */ +#ifdef SQLITE_DEBUG + { + BtCursor *pCur = pBt->pCursor; + while( pCur ){ + BtCursor *pTmp = pCur; + pCur = pCur->pNext; + assert( pTmp->pBtree!=p ); + + } + } +#endif + + /* Rollback any active transaction and free the handle structure. + ** The call to sqlite3BtreeRollback() drops any table-locks held by + ** this handle. + */ + sqlite3BtreeRollback(p, SQLITE_OK, 0); + sqlite3BtreeLeave(p); + + /* If there are still other outstanding references to the shared-btree + ** structure, return now. The remainder of this procedure cleans + ** up the shared-btree. + */ + assert( p->wantToLock==0 && p->locked==0 ); + if( !p->sharable || removeFromSharingList(pBt) ){ + /* The pBt is no longer on the sharing list, so we can access + ** it without having to hold the mutex. + ** + ** Clean out and delete the BtShared object. + */ + assert( !pBt->pCursor ); + sqlite3PagerClose(pBt->pPager, p->db); + if( pBt->xFreeSchema && pBt->pSchema ){ + pBt->xFreeSchema(pBt->pSchema); + } + sqlite3DbFree(0, pBt->pSchema); + freeTempSpace(pBt); + sqlite3_free(pBt); + } + +#ifndef SQLITE_OMIT_SHARED_CACHE + assert( p->wantToLock==0 ); + assert( p->locked==0 ); + if( p->pPrev ) p->pPrev->pNext = p->pNext; + if( p->pNext ) p->pNext->pPrev = p->pPrev; +#endif + + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Change the "soft" limit on the number of pages in the cache. +** Unused and unmodified pages will be recycled when the number of +** pages in the cache exceeds this soft limit. But the size of the +** cache is allowed to grow larger than this limit if it contains +** dirty pages or pages still in active use. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetCachesize(pBt->pPager, mxPage); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} + +/* +** Change the "spill" limit on the number of pages in the cache. +** If the number of pages exceeds this limit during a write transaction, +** the pager might attempt to "spill" pages to the journal early in +** order to free up memory. +** +** The value returned is the current spill size. If zero is passed +** as an argument, no changes are made to the spill size setting, so +** using mxPage of 0 is a way to query the current spill size. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetSpillSize(Btree *p, int mxPage){ + BtShared *pBt = p->pBt; + int res; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + res = sqlite3PagerSetSpillsize(pBt->pPager, mxPage); + sqlite3BtreeLeave(p); + return res; +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** Change the limit on the amount of the database file that may be +** memory mapped. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** Change the way data is synced to disk in order to increase or decrease +** how well the database resists damage due to OS crashes and power +** failures. Level 1 is the same as asynchronous (no syncs() occur and +** there is a high probability of damage) Level 2 is the default. There +** is a very low but non-zero probability of damage. Level 3 reduces the +** probability of damage to near zero but with a write performance reduction. +*/ +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +SQLITE_PRIVATE int sqlite3BtreeSetPagerFlags( + Btree *p, /* The btree to set the safety level on */ + unsigned pgFlags /* Various PAGER_* flags */ +){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetFlags(pBt->pPager, pgFlags); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} +#endif + +/* +** Change the default pages size and the number of reserved bytes per page. +** Or, if the page size has already been fixed, return SQLITE_READONLY +** without changing anything. +** +** The page size must be a power of 2 between 512 and 65536. If the page +** size supplied does not meet this constraint then the page size is not +** changed. +** +** Page sizes are constrained to be a power of two so that the region +** of the database file used for locking (beginning at PENDING_BYTE, +** the first byte past the 1GB boundary, 0x40000000) needs to occur +** at the beginning of a page. +** +** If parameter nReserve is less than zero, then the number of reserved +** bytes per page is left unchanged. +** +** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size +** and autovacuum mode can no longer be changed. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ + int rc = SQLITE_OK; + int x; + BtShared *pBt = p->pBt; + assert( nReserve>=0 && nReserve<=255 ); + sqlite3BtreeEnter(p); + pBt->nReserveWanted = nReserve; + x = pBt->pageSize - pBt->usableSize; + if( nReservebtsFlags & BTS_PAGESIZE_FIXED ){ + sqlite3BtreeLeave(p); + return SQLITE_READONLY; + } + assert( nReserve>=0 && nReserve<=255 ); + if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && + ((pageSize-1)&pageSize)==0 ){ + assert( (pageSize & 7)==0 ); + assert( !pBt->pCursor ); + if( nReserve>32 && pageSize==512 ) pageSize = 1024; + pBt->pageSize = (u32)pageSize; + freeTempSpace(pBt); + } + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); + pBt->usableSize = pBt->pageSize - (u16)nReserve; + if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED; + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Return the currently defined page size +*/ +SQLITE_PRIVATE int sqlite3BtreeGetPageSize(Btree *p){ + return p->pBt->pageSize; +} + +/* +** This function is similar to sqlite3BtreeGetReserve(), except that it +** may only be called if it is guaranteed that the b-tree mutex is already +** held. +** +** This is useful in one special case in the backup API code where it is +** known that the shared b-tree mutex is held, but the mutex on the +** database handle that owns *p is not. In this case if sqlite3BtreeEnter() +** were to be called, it might collide with some other operation on the +** database handle that owns *p, causing undefined behavior. +*/ +SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p){ + int n; + assert( sqlite3_mutex_held(p->pBt->mutex) ); + n = p->pBt->pageSize - p->pBt->usableSize; + return n; +} + +/* +** Return the number of bytes of space at the end of every page that +** are intentually left unused. This is the "reserved" space that is +** sometimes used by extensions. +** +** The value returned is the larger of the current reserve size and +** the latest reserve size requested by SQLITE_FILECTRL_RESERVE_BYTES. +** The amount of reserve can only grow - never shrink. +*/ +SQLITE_PRIVATE int sqlite3BtreeGetRequestedReserve(Btree *p){ + int n1, n2; + sqlite3BtreeEnter(p); + n1 = (int)p->pBt->nReserveWanted; + n2 = sqlite3BtreeGetReserveNoMutex(p); + sqlite3BtreeLeave(p); + return n1>n2 ? n1 : n2; +} + + +/* +** Set the maximum page count for a database if mxPage is positive. +** No changes are made if mxPage is 0 or negative. +** Regardless of the value of mxPage, return the maximum page count. +*/ +SQLITE_PRIVATE Pgno sqlite3BtreeMaxPageCount(Btree *p, Pgno mxPage){ + Pgno n; + sqlite3BtreeEnter(p); + n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); + sqlite3BtreeLeave(p); + return n; +} + +/* +** Change the values for the BTS_SECURE_DELETE and BTS_OVERWRITE flags: +** +** newFlag==0 Both BTS_SECURE_DELETE and BTS_OVERWRITE are cleared +** newFlag==1 BTS_SECURE_DELETE set and BTS_OVERWRITE is cleared +** newFlag==2 BTS_SECURE_DELETE cleared and BTS_OVERWRITE is set +** newFlag==(-1) No changes +** +** This routine acts as a query if newFlag is less than zero +** +** With BTS_OVERWRITE set, deleted content is overwritten by zeros, but +** freelist leaf pages are not written back to the database. Thus in-page +** deleted content is cleared, but freelist deleted content is not. +** +** With BTS_SECURE_DELETE, operation is like BTS_OVERWRITE with the addition +** that freelist leaf pages are written back into the database, increasing +** the amount of disk I/O. +*/ +SQLITE_PRIVATE int sqlite3BtreeSecureDelete(Btree *p, int newFlag){ + int b; + if( p==0 ) return 0; + sqlite3BtreeEnter(p); + assert( BTS_OVERWRITE==BTS_SECURE_DELETE*2 ); + assert( BTS_FAST_SECURE==(BTS_OVERWRITE|BTS_SECURE_DELETE) ); + if( newFlag>=0 ){ + p->pBt->btsFlags &= ~BTS_FAST_SECURE; + p->pBt->btsFlags |= BTS_SECURE_DELETE*newFlag; + } + b = (p->pBt->btsFlags & BTS_FAST_SECURE)/BTS_SECURE_DELETE; + sqlite3BtreeLeave(p); + return b; +} + +/* +** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' +** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it +** is disabled. The default value for the auto-vacuum property is +** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ +#ifdef SQLITE_OMIT_AUTOVACUUM + return SQLITE_READONLY; +#else + BtShared *pBt = p->pBt; + int rc = SQLITE_OK; + u8 av = (u8)autoVacuum; + + sqlite3BtreeEnter(p); + if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=0 && (av ?1:0)!=pBt->autoVacuum ){ + rc = SQLITE_READONLY; + }else{ + pBt->autoVacuum = av ?1:0; + pBt->incrVacuum = av==2 ?1:0; + } + sqlite3BtreeLeave(p); + return rc; +#endif +} + +/* +** Return the value of the 'auto-vacuum' property. If auto-vacuum is +** enabled 1 is returned. Otherwise 0. +*/ +SQLITE_PRIVATE int sqlite3BtreeGetAutoVacuum(Btree *p){ +#ifdef SQLITE_OMIT_AUTOVACUUM + return BTREE_AUTOVACUUM_NONE; +#else + int rc; + sqlite3BtreeEnter(p); + rc = ( + (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE: + (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL: + BTREE_AUTOVACUUM_INCR + ); + sqlite3BtreeLeave(p); + return rc; +#endif +} + +/* +** If the user has not set the safety-level for this database connection +** using "PRAGMA synchronous", and if the safety-level is not already +** set to the value passed to this function as the second parameter, +** set it so. +*/ +#if SQLITE_DEFAULT_SYNCHRONOUS!=SQLITE_DEFAULT_WAL_SYNCHRONOUS \ + && !defined(SQLITE_OMIT_WAL) +static void setDefaultSyncFlag(BtShared *pBt, u8 safety_level){ + sqlite3 *db; + Db *pDb; + if( (db=pBt->db)!=0 && (pDb=db->aDb)!=0 ){ + while( pDb->pBt==0 || pDb->pBt->pBt!=pBt ){ pDb++; } + if( pDb->bSyncSet==0 + && pDb->safety_level!=safety_level + && pDb!=&db->aDb[1] + ){ + pDb->safety_level = safety_level; + sqlite3PagerSetFlags(pBt->pPager, + pDb->safety_level | (db->flags & PAGER_FLAGS_MASK)); + } + } +} +#else +# define setDefaultSyncFlag(pBt,safety_level) +#endif + +/* Forward declaration */ +static int newDatabase(BtShared*); + + +/* +** Get a reference to pPage1 of the database file. This will +** also acquire a readlock on that file. +** +** SQLITE_OK is returned on success. If the file is not a +** well-formed database file, then SQLITE_CORRUPT is returned. +** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM +** is returned if we run out of memory. +*/ +static int lockBtree(BtShared *pBt){ + int rc; /* Result code from subfunctions */ + MemPage *pPage1; /* Page 1 of the database file */ + u32 nPage; /* Number of pages in the database */ + u32 nPageFile = 0; /* Number of pages in the database file */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pBt->pPage1==0 ); + rc = sqlite3PagerSharedLock(pBt->pPager); + if( rc!=SQLITE_OK ) return rc; + rc = btreeGetPage(pBt, 1, &pPage1, 0); + if( rc!=SQLITE_OK ) return rc; + + /* Do some checking to help insure the file we opened really is + ** a valid database file. + */ + nPage = get4byte(28+(u8*)pPage1->aData); + sqlite3PagerPagecount(pBt->pPager, (int*)&nPageFile); + if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){ + nPage = nPageFile; + } + if( (pBt->db->flags & SQLITE_ResetDatabase)!=0 ){ + nPage = 0; + } + if( nPage>0 ){ + u32 pageSize; + u32 usableSize; + u8 *page1 = pPage1->aData; + rc = SQLITE_NOTADB; + /* EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins + ** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d + ** 61 74 20 33 00. */ + if( memcmp(page1, zMagicHeader, 16)!=0 ){ + goto page1_init_failed; + } + +#ifdef SQLITE_OMIT_WAL + if( page1[18]>1 ){ + pBt->btsFlags |= BTS_READ_ONLY; + } + if( page1[19]>1 ){ + goto page1_init_failed; + } +#else + if( page1[18]>2 ){ + pBt->btsFlags |= BTS_READ_ONLY; + } + if( page1[19]>2 ){ + goto page1_init_failed; + } + + /* If the read version is set to 2, this database should be accessed + ** in WAL mode. If the log is not already open, open it now. Then + ** return SQLITE_OK and return without populating BtShared.pPage1. + ** The caller detects this and calls this function again. This is + ** required as the version of page 1 currently in the page1 buffer + ** may not be the latest version - there may be a newer one in the log + ** file. + */ + if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ + int isOpen = 0; + rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); + if( rc!=SQLITE_OK ){ + goto page1_init_failed; + }else{ + setDefaultSyncFlag(pBt, SQLITE_DEFAULT_WAL_SYNCHRONOUS+1); + if( isOpen==0 ){ + releasePageOne(pPage1); + return SQLITE_OK; + } + } + rc = SQLITE_NOTADB; + }else{ + setDefaultSyncFlag(pBt, SQLITE_DEFAULT_SYNCHRONOUS+1); + } +#endif + + /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload + ** fractions and the leaf payload fraction values must be 64, 32, and 32. + ** + ** The original design allowed these amounts to vary, but as of + ** version 3.6.0, we require them to be fixed. + */ + if( memcmp(&page1[21], "\100\040\040",3)!=0 ){ + goto page1_init_failed; + } + /* EVIDENCE-OF: R-51873-39618 The page size for a database file is + ** determined by the 2-byte integer located at an offset of 16 bytes from + ** the beginning of the database file. */ + pageSize = (page1[16]<<8) | (page1[17]<<16); + /* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two + ** between 512 and 65536 inclusive. */ + if( ((pageSize-1)&pageSize)!=0 + || pageSize>SQLITE_MAX_PAGE_SIZE + || pageSize<=256 + ){ + goto page1_init_failed; + } + pBt->btsFlags |= BTS_PAGESIZE_FIXED; + assert( (pageSize & 7)==0 ); + /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte + ** integer at offset 20 is the number of bytes of space at the end of + ** each page to reserve for extensions. + ** + ** EVIDENCE-OF: R-37497-42412 The size of the reserved region is + ** determined by the one-byte unsigned integer found at an offset of 20 + ** into the database file header. */ + usableSize = pageSize - page1[20]; + if( (u32)pageSize!=pBt->pageSize ){ + /* After reading the first page of the database assuming a page size + ** of BtShared.pageSize, we have discovered that the page-size is + ** actually pageSize. Unlock the database, leave pBt->pPage1 at + ** zero and return SQLITE_OK. The caller will call this function + ** again with the correct page-size. + */ + releasePageOne(pPage1); + pBt->usableSize = usableSize; + pBt->pageSize = pageSize; + freeTempSpace(pBt); + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, + pageSize-usableSize); + return rc; + } + if( nPage>nPageFile ){ + if( sqlite3WritableSchema(pBt->db)==0 ){ + rc = SQLITE_CORRUPT_BKPT; + goto page1_init_failed; + }else{ + nPage = nPageFile; + } + } + /* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to + ** be less than 480. In other words, if the page size is 512, then the + ** reserved space size cannot exceed 32. */ + if( usableSize<480 ){ + goto page1_init_failed; + } + pBt->pageSize = pageSize; + pBt->usableSize = usableSize; +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); + pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); +#endif + } + + /* maxLocal is the maximum amount of payload to store locally for + ** a cell. Make sure it is small enough so that at least minFanout + ** cells can will fit on one page. We assume a 10-byte page header. + ** Besides the payload, the cell must store: + ** 2-byte pointer to the cell + ** 4-byte child pointer + ** 9-byte nKey value + ** 4-byte nData value + ** 4-byte overflow page pointer + ** So a cell consists of a 2-byte pointer, a header which is as much as + ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow + ** page pointer. + */ + pBt->maxLocal = (u16)((pBt->usableSize-12)*64/255 - 23); + pBt->minLocal = (u16)((pBt->usableSize-12)*32/255 - 23); + pBt->maxLeaf = (u16)(pBt->usableSize - 35); + pBt->minLeaf = (u16)((pBt->usableSize-12)*32/255 - 23); + if( pBt->maxLocal>127 ){ + pBt->max1bytePayload = 127; + }else{ + pBt->max1bytePayload = (u8)pBt->maxLocal; + } + assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); + pBt->pPage1 = pPage1; + pBt->nPage = nPage; + return SQLITE_OK; + +page1_init_failed: + releasePageOne(pPage1); + pBt->pPage1 = 0; + return rc; +} + +#ifndef NDEBUG +/* +** Return the number of cursors open on pBt. This is for use +** in assert() expressions, so it is only compiled if NDEBUG is not +** defined. +** +** Only write cursors are counted if wrOnly is true. If wrOnly is +** false then all cursors are counted. +** +** For the purposes of this routine, a cursor is any cursor that +** is capable of reading or writing to the database. Cursors that +** have been tripped into the CURSOR_FAULT state are not counted. +*/ +static int countValidCursors(BtShared *pBt, int wrOnly){ + BtCursor *pCur; + int r = 0; + for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ + if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0) + && pCur->eState!=CURSOR_FAULT ) r++; + } + return r; +} +#endif + +/* +** If there are no outstanding cursors and we are not in the middle +** of a transaction but there is a read lock on the database, then +** this routine unrefs the first page of the database file which +** has the effect of releasing the read lock. +** +** If there is a transaction in progress, this routine is a no-op. +*/ +static void unlockBtreeIfUnused(BtShared *pBt){ + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); + if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ + MemPage *pPage1 = pBt->pPage1; + assert( pPage1->aData ); + assert( sqlite3PagerRefcount(pBt->pPager)==1 ); + pBt->pPage1 = 0; + releasePageOne(pPage1); + } +} + +/* +** If pBt points to an empty file then convert that empty file +** into a new empty database by initializing the first page of +** the database. +*/ +static int newDatabase(BtShared *pBt){ + MemPage *pP1; + unsigned char *data; + int rc; + + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pBt->nPage>0 ){ + return SQLITE_OK; + } + pP1 = pBt->pPage1; + assert( pP1!=0 ); + data = pP1->aData; + rc = sqlite3PagerWrite(pP1->pDbPage); + if( rc ) return rc; + memcpy(data, zMagicHeader, sizeof(zMagicHeader)); + assert( sizeof(zMagicHeader)==16 ); + data[16] = (u8)((pBt->pageSize>>8)&0xff); + data[17] = (u8)((pBt->pageSize>>16)&0xff); + data[18] = 1; + data[19] = 1; + assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+255>=pBt->pageSize); + data[20] = (u8)(pBt->pageSize - pBt->usableSize); + data[21] = 64; + data[22] = 32; + data[23] = 32; + memset(&data[24], 0, 100-24); + zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); + pBt->btsFlags |= BTS_PAGESIZE_FIXED; +#ifndef SQLITE_OMIT_AUTOVACUUM + assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); + assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); + put4byte(&data[36 + 4*4], pBt->autoVacuum); + put4byte(&data[36 + 7*4], pBt->incrVacuum); +#endif + pBt->nPage = 1; + data[31] = 1; + return SQLITE_OK; +} + +/* +** Initialize the first page of the database file (creating a database +** consisting of a single page and no schema objects). Return SQLITE_OK +** if successful, or an SQLite error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p){ + int rc; + sqlite3BtreeEnter(p); + p->pBt->nPage = 0; + rc = newDatabase(p->pBt); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Attempt to start a new transaction. A write-transaction +** is started if the second argument is nonzero, otherwise a read- +** transaction. If the second argument is 2 or more and exclusive +** transaction is started, meaning that no other process is allowed +** to access the database. A preexisting transaction may not be +** upgraded to exclusive by calling this routine a second time - the +** exclusivity flag only works for a new transaction. +** +** A write-transaction must be started before attempting any +** changes to the database. None of the following routines +** will work unless a transaction is started first: +** +** sqlite3BtreeCreateTable() +** sqlite3BtreeCreateIndex() +** sqlite3BtreeClearTable() +** sqlite3BtreeDropTable() +** sqlite3BtreeInsert() +** sqlite3BtreeDelete() +** sqlite3BtreeUpdateMeta() +** +** If an initial attempt to acquire the lock fails because of lock contention +** and the database was previously unlocked, then invoke the busy handler +** if there is one. But if there was previously a read-lock, do not +** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is +** returned when there is already a read-lock in order to avoid a deadlock. +** +** Suppose there are two processes A and B. A has a read lock and B has +** a reserved lock. B tries to promote to exclusive but is blocked because +** of A's read lock. A tries to promote to reserved but is blocked by B. +** One or the other of the two processes must give way or there can be +** no progress. By returning SQLITE_BUSY and not invoking the busy callback +** when A already has a read lock, we encourage A to give up and let B +** proceed. +*/ +SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){ + BtShared *pBt = p->pBt; + Pager *pPager = pBt->pPager; + int rc = SQLITE_OK; + + sqlite3BtreeEnter(p); + btreeIntegrity(p); + + /* If the btree is already in a write-transaction, or it + ** is already in a read-transaction and a read-transaction + ** is requested, this is a no-op. + */ + if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ + goto trans_begun; + } + assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); + + if( (p->db->flags & SQLITE_ResetDatabase) + && sqlite3PagerIsreadonly(pPager)==0 + ){ + pBt->btsFlags &= ~BTS_READ_ONLY; + } + + /* Write transactions are not possible on a read-only database */ + if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ + rc = SQLITE_READONLY; + goto trans_begun; + } + +#ifndef SQLITE_OMIT_SHARED_CACHE + { + sqlite3 *pBlock = 0; + /* If another database handle has already opened a write transaction + ** on this shared-btree structure and a second write transaction is + ** requested, return SQLITE_LOCKED. + */ + if( (wrflag && pBt->inTransaction==TRANS_WRITE) + || (pBt->btsFlags & BTS_PENDING)!=0 + ){ + pBlock = pBt->pWriter->db; + }else if( wrflag>1 ){ + BtLock *pIter; + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ + if( pIter->pBtree!=p ){ + pBlock = pIter->pBtree->db; + break; + } + } + } + if( pBlock ){ + sqlite3ConnectionBlocked(p->db, pBlock); + rc = SQLITE_LOCKED_SHAREDCACHE; + goto trans_begun; + } + } +#endif + + /* Any read-only or read-write transaction implies a read-lock on + ** page 1. So if some other shared-cache client already has a write-lock + ** on page 1, the transaction cannot be opened. */ + rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK); + if( SQLITE_OK!=rc ) goto trans_begun; + + pBt->btsFlags &= ~BTS_INITIALLY_EMPTY; + if( pBt->nPage==0 ) pBt->btsFlags |= BTS_INITIALLY_EMPTY; + do { + sqlite3PagerWalDb(pPager, p->db); + +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + /* If transitioning from no transaction directly to a write transaction, + ** block for the WRITER lock first if possible. */ + if( pBt->pPage1==0 && wrflag ){ + assert( pBt->inTransaction==TRANS_NONE ); + rc = sqlite3PagerWalWriteLock(pPager, 1); + if( rc!=SQLITE_BUSY && rc!=SQLITE_OK ) break; + } +#endif + + /* Call lockBtree() until either pBt->pPage1 is populated or + ** lockBtree() returns something other than SQLITE_OK. lockBtree() + ** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after + ** reading page 1 it discovers that the page-size of the database + ** file is not pBt->pageSize. In this case lockBtree() will update + ** pBt->pageSize to the page-size of the file on disk. + */ + while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) ); + + if( rc==SQLITE_OK && wrflag ){ + if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ + rc = SQLITE_READONLY; + }else{ + rc = sqlite3PagerBegin(pPager, wrflag>1, sqlite3TempInMemory(p->db)); + if( rc==SQLITE_OK ){ + rc = newDatabase(pBt); + }else if( rc==SQLITE_BUSY_SNAPSHOT && pBt->inTransaction==TRANS_NONE ){ + /* if there was no transaction opened when this function was + ** called and SQLITE_BUSY_SNAPSHOT is returned, change the error + ** code to SQLITE_BUSY. */ + rc = SQLITE_BUSY; + } + } + } + + if( rc!=SQLITE_OK ){ + (void)sqlite3PagerWalWriteLock(pPager, 0); + unlockBtreeIfUnused(pBt); + } + }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && + btreeInvokeBusyHandler(pBt) ); + sqlite3PagerWalDb(pPager, 0); +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY; +#endif + + if( rc==SQLITE_OK ){ + if( p->inTrans==TRANS_NONE ){ + pBt->nTransaction++; +#ifndef SQLITE_OMIT_SHARED_CACHE + if( p->sharable ){ + assert( p->lock.pBtree==p && p->lock.iTable==1 ); + p->lock.eLock = READ_LOCK; + p->lock.pNext = pBt->pLock; + pBt->pLock = &p->lock; + } +#endif + } + p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); + if( p->inTrans>pBt->inTransaction ){ + pBt->inTransaction = p->inTrans; + } + if( wrflag ){ + MemPage *pPage1 = pBt->pPage1; +#ifndef SQLITE_OMIT_SHARED_CACHE + assert( !pBt->pWriter ); + pBt->pWriter = p; + pBt->btsFlags &= ~BTS_EXCLUSIVE; + if( wrflag>1 ) pBt->btsFlags |= BTS_EXCLUSIVE; +#endif + + /* If the db-size header field is incorrect (as it may be if an old + ** client has been writing the database file), update it now. Doing + ** this sooner rather than later means the database size can safely + ** re-read the database size from page 1 if a savepoint or transaction + ** rollback occurs within the transaction. + */ + if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc==SQLITE_OK ){ + put4byte(&pPage1->aData[28], pBt->nPage); + } + } + } + } + +trans_begun: + if( rc==SQLITE_OK ){ + if( pSchemaVersion ){ + *pSchemaVersion = get4byte(&pBt->pPage1->aData[40]); + } + if( wrflag ){ + /* This call makes sure that the pager has the correct number of + ** open savepoints. If the second parameter is greater than 0 and + ** the sub-journal is not already open, then it will be opened here. + */ + rc = sqlite3PagerOpenSavepoint(pPager, p->db->nSavepoint); + } + } + + btreeIntegrity(p); + sqlite3BtreeLeave(p); + return rc; +} + +#ifndef SQLITE_OMIT_AUTOVACUUM + +/* +** Set the pointer-map entries for all children of page pPage. Also, if +** pPage contains cells that point to overflow pages, set the pointer +** map entries for the overflow pages as well. +*/ +static int setChildPtrmaps(MemPage *pPage){ + int i; /* Counter variable */ + int nCell; /* Number of cells in page pPage */ + int rc; /* Return code */ + BtShared *pBt = pPage->pBt; + Pgno pgno = pPage->pgno; + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + rc = pPage->isInit ? SQLITE_OK : btreeInitPage(pPage); + if( rc!=SQLITE_OK ) return rc; + nCell = pPage->nCell; + + for(i=0; ileaf ){ + Pgno childPgno = get4byte(pCell); + ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); + } + } + + if( !pPage->leaf ){ + Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); + ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); + } + + return rc; +} + +/* +** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so +** that it points to iTo. Parameter eType describes the type of pointer to +** be modified, as follows: +** +** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child +** page of pPage. +** +** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow +** page pointed to by one of the cells on pPage. +** +** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next +** overflow page in the list. +*/ +static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + if( eType==PTRMAP_OVERFLOW2 ){ + /* The pointer is always the first 4 bytes of the page in this case. */ + if( get4byte(pPage->aData)!=iFrom ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + put4byte(pPage->aData, iTo); + }else{ + int i; + int nCell; + int rc; + + rc = pPage->isInit ? SQLITE_OK : btreeInitPage(pPage); + if( rc ) return rc; + nCell = pPage->nCell; + + for(i=0; ixParseCell(pPage, pCell, &info); + if( info.nLocal pPage->aData+pPage->pBt->usableSize ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + if( iFrom==get4byte(pCell+info.nSize-4) ){ + put4byte(pCell+info.nSize-4, iTo); + break; + } + } + }else{ + if( get4byte(pCell)==iFrom ){ + put4byte(pCell, iTo); + break; + } + } + } + + if( i==nCell ){ + if( eType!=PTRMAP_BTREE || + get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); + } + } + return SQLITE_OK; +} + + +/* +** Move the open database page pDbPage to location iFreePage in the +** database. The pDbPage reference remains valid. +** +** The isCommit flag indicates that there is no need to remember that +** the journal needs to be sync()ed before database page pDbPage->pgno +** can be written to. The caller has already promised not to write to that +** page. +*/ +static int relocatePage( + BtShared *pBt, /* Btree */ + MemPage *pDbPage, /* Open page to move */ + u8 eType, /* Pointer map 'type' entry for pDbPage */ + Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */ + Pgno iFreePage, /* The location to move pDbPage to */ + int isCommit /* isCommit flag passed to sqlite3PagerMovepage */ +){ + MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */ + Pgno iDbPage = pDbPage->pgno; + Pager *pPager = pBt->pPager; + int rc; + + assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || + eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ); + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pDbPage->pBt==pBt ); + if( iDbPage<3 ) return SQLITE_CORRUPT_BKPT; + + /* Move page iDbPage from its current location to page number iFreePage */ + TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", + iDbPage, iFreePage, iPtrPage, eType)); + rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit); + if( rc!=SQLITE_OK ){ + return rc; + } + pDbPage->pgno = iFreePage; + + /* If pDbPage was a btree-page, then it may have child pages and/or cells + ** that point to overflow pages. The pointer map entries for all these + ** pages need to be changed. + ** + ** If pDbPage is an overflow page, then the first 4 bytes may store a + ** pointer to a subsequent overflow page. If this is the case, then + ** the pointer map needs to be updated for the subsequent overflow page. + */ + if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){ + rc = setChildPtrmaps(pDbPage); + if( rc!=SQLITE_OK ){ + return rc; + } + }else{ + Pgno nextOvfl = get4byte(pDbPage->aData); + if( nextOvfl!=0 ){ + ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage, &rc); + if( rc!=SQLITE_OK ){ + return rc; + } + } + } + + /* Fix the database pointer on page iPtrPage that pointed at iDbPage so + ** that it points at iFreePage. Also fix the pointer map entry for + ** iPtrPage. + */ + if( eType!=PTRMAP_ROOTPAGE ){ + rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = sqlite3PagerWrite(pPtrPage->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pPtrPage); + return rc; + } + rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); + releasePage(pPtrPage); + if( rc==SQLITE_OK ){ + ptrmapPut(pBt, iFreePage, eType, iPtrPage, &rc); + } + } + return rc; +} + +/* Forward declaration required by incrVacuumStep(). */ +static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); + +/* +** Perform a single step of an incremental-vacuum. If successful, return +** SQLITE_OK. If there is no work to do (and therefore no point in +** calling this function again), return SQLITE_DONE. Or, if an error +** occurs, return some other error code. +** +** More specifically, this function attempts to re-organize the database so +** that the last page of the file currently in use is no longer in use. +** +** Parameter nFin is the number of pages that this database would contain +** were this function called until it returns SQLITE_DONE. +** +** If the bCommit parameter is non-zero, this function assumes that the +** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE +** or an error. bCommit is passed true for an auto-vacuum-on-commit +** operation, or false for an incremental vacuum. +*/ +static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ + Pgno nFreeList; /* Number of pages still on the free-list */ + int rc; + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( iLastPg>nFin ); + + if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ + u8 eType; + Pgno iPtrPage; + + nFreeList = get4byte(&pBt->pPage1->aData[36]); + if( nFreeList==0 ){ + return SQLITE_DONE; + } + + rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage); + if( rc!=SQLITE_OK ){ + return rc; + } + if( eType==PTRMAP_ROOTPAGE ){ + return SQLITE_CORRUPT_BKPT; + } + + if( eType==PTRMAP_FREEPAGE ){ + if( bCommit==0 ){ + /* Remove the page from the files free-list. This is not required + ** if bCommit is non-zero. In that case, the free-list will be + ** truncated to zero after this function returns, so it doesn't + ** matter if it still contains some garbage entries. + */ + Pgno iFreePg; + MemPage *pFreePg; + rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT); + if( rc!=SQLITE_OK ){ + return rc; + } + assert( iFreePg==iLastPg ); + releasePage(pFreePg); + } + } else { + Pgno iFreePg; /* Index of free page to move pLastPg to */ + MemPage *pLastPg; + u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ + Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ + + rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If bCommit is zero, this loop runs exactly once and page pLastPg + ** is swapped with the first free page pulled off the free list. + ** + ** On the other hand, if bCommit is greater than zero, then keep + ** looping until a free-page located within the first nFin pages + ** of the file is found. + */ + if( bCommit==0 ){ + eMode = BTALLOC_LE; + iNear = nFin; + } + do { + MemPage *pFreePg; + rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode); + if( rc!=SQLITE_OK ){ + releasePage(pLastPg); + return rc; + } + releasePage(pFreePg); + }while( bCommit && iFreePg>nFin ); + assert( iFreePgbDoTruncate = 1; + pBt->nPage = iLastPg; + } + return SQLITE_OK; +} + +/* +** The database opened by the first argument is an auto-vacuum database +** nOrig pages in size containing nFree free pages. Return the expected +** size of the database in pages following an auto-vacuum operation. +*/ +static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){ + int nEntry; /* Number of entries on one ptrmap page */ + Pgno nPtrmap; /* Number of PtrMap pages to be freed */ + Pgno nFin; /* Return value */ + + nEntry = pBt->usableSize/5; + nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; + nFin = nOrig - nFree - nPtrmap; + if( nOrig>PENDING_BYTE_PAGE(pBt) && nFinpBt; + + sqlite3BtreeEnter(p); + assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); + if( !pBt->autoVacuum ){ + rc = SQLITE_DONE; + }else{ + Pgno nOrig = btreePagecount(pBt); + Pgno nFree = get4byte(&pBt->pPage1->aData[36]); + Pgno nFin = finalDbSize(pBt, nOrig, nFree); + + if( nOrig=nOrig ){ + rc = SQLITE_CORRUPT_BKPT; + }else if( nFree>0 ){ + rc = saveAllCursors(pBt, 0, 0); + if( rc==SQLITE_OK ){ + invalidateAllOverflowCache(pBt); + rc = incrVacuumStep(pBt, nFin, nOrig, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + put4byte(&pBt->pPage1->aData[28], pBt->nPage); + } + }else{ + rc = SQLITE_DONE; + } + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** This routine is called prior to sqlite3PagerCommit when a transaction +** is committed for an auto-vacuum database. +*/ +static int autoVacuumCommit(Btree *p){ + int rc = SQLITE_OK; + Pager *pPager; + BtShared *pBt; + sqlite3 *db; + VVA_ONLY( int nRef ); + + assert( p!=0 ); + pBt = p->pBt; + pPager = pBt->pPager; + VVA_ONLY( nRef = sqlite3PagerRefcount(pPager); ) + + assert( sqlite3_mutex_held(pBt->mutex) ); + invalidateAllOverflowCache(pBt); + assert(pBt->autoVacuum); + if( !pBt->incrVacuum ){ + Pgno nFin; /* Number of pages in database after autovacuuming */ + Pgno nFree; /* Number of pages on the freelist initially */ + Pgno nVac; /* Number of pages to vacuum */ + Pgno iFree; /* The next page to be freed */ + Pgno nOrig; /* Database size before freeing */ + + nOrig = btreePagecount(pBt); + if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){ + /* It is not possible to create a database for which the final page + ** is either a pointer-map page or the pending-byte page. If one + ** is encountered, this indicates corruption. + */ + return SQLITE_CORRUPT_BKPT; + } + + nFree = get4byte(&pBt->pPage1->aData[36]); + db = p->db; + if( db->xAutovacPages ){ + int iDb; + for(iDb=0; ALWAYS(iDbnDb); iDb++){ + if( db->aDb[iDb].pBt==p ) break; + } + nVac = db->xAutovacPages( + db->pAutovacPagesArg, + db->aDb[iDb].zDbSName, + nOrig, + nFree, + pBt->pageSize + ); + if( nVac>nFree ){ + nVac = nFree; + } + if( nVac==0 ){ + return SQLITE_OK; + } + }else{ + nVac = nFree; + } + nFin = finalDbSize(pBt, nOrig, nVac); + if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; + if( nFinnFin && rc==SQLITE_OK; iFree--){ + rc = incrVacuumStep(pBt, nFin, iFree, nVac==nFree); + } + if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( nVac==nFree ){ + put4byte(&pBt->pPage1->aData[32], 0); + put4byte(&pBt->pPage1->aData[36], 0); + } + put4byte(&pBt->pPage1->aData[28], nFin); + pBt->bDoTruncate = 1; + pBt->nPage = nFin; + } + if( rc!=SQLITE_OK ){ + sqlite3PagerRollback(pPager); + } + } + + assert( nRef>=sqlite3PagerRefcount(pPager) ); + return rc; +} + +#else /* ifndef SQLITE_OMIT_AUTOVACUUM */ +# define setChildPtrmaps(x) SQLITE_OK +#endif + +/* +** This routine does the first phase of a two-phase commit. This routine +** causes a rollback journal to be created (if it does not already exist) +** and populated with enough information so that if a power loss occurs +** the database can be restored to its original state by playing back +** the journal. Then the contents of the journal are flushed out to +** the disk. After the journal is safely on oxide, the changes to the +** database are written into the database file and flushed to oxide. +** At the end of this call, the rollback journal still exists on the +** disk and we are still holding all locks, so the transaction has not +** committed. See sqlite3BtreeCommitPhaseTwo() for the second phase of the +** commit process. +** +** This call is a no-op if no write-transaction is currently active on pBt. +** +** Otherwise, sync the database file for the btree pBt. zSuperJrnl points to +** the name of a super-journal file that should be written into the +** individual journal file, or is NULL, indicating no super-journal file +** (single database transaction). +** +** When this is called, the super-journal should already have been +** created, populated with this journal pointer and synced to disk. +** +** Once this is routine has returned, the only thing required to commit +** the write-transaction for this database file is to delete the journal. +*/ +SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ + int rc = SQLITE_OK; + if( p->inTrans==TRANS_WRITE ){ + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + rc = autoVacuumCommit(p); + if( rc!=SQLITE_OK ){ + sqlite3BtreeLeave(p); + return rc; + } + } + if( pBt->bDoTruncate ){ + sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); + } +#endif + rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); + sqlite3BtreeLeave(p); + } + return rc; +} + +/* +** This function is called from both BtreeCommitPhaseTwo() and BtreeRollback() +** at the conclusion of a transaction. +*/ +static void btreeEndTransaction(Btree *p){ + BtShared *pBt = p->pBt; + sqlite3 *db = p->db; + assert( sqlite3BtreeHoldsMutex(p) ); + +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->bDoTruncate = 0; +#endif + if( p->inTrans>TRANS_NONE && db->nVdbeRead>1 ){ + /* If there are other active statements that belong to this database + ** handle, downgrade to a read-only transaction. The other statements + ** may still be reading from the database. */ + downgradeAllSharedCacheTableLocks(p); + p->inTrans = TRANS_READ; + }else{ + /* If the handle had any kind of transaction open, decrement the + ** transaction count of the shared btree. If the transaction count + ** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused() + ** call below will unlock the pager. */ + if( p->inTrans!=TRANS_NONE ){ + clearAllSharedCacheTableLocks(p); + pBt->nTransaction--; + if( 0==pBt->nTransaction ){ + pBt->inTransaction = TRANS_NONE; + } + } + + /* Set the current transaction state to TRANS_NONE and unlock the + ** pager if this call closed the only read or write transaction. */ + p->inTrans = TRANS_NONE; + unlockBtreeIfUnused(pBt); + } + + btreeIntegrity(p); +} + +/* +** Commit the transaction currently in progress. +** +** This routine implements the second phase of a 2-phase commit. The +** sqlite3BtreeCommitPhaseOne() routine does the first phase and should +** be invoked prior to calling this routine. The sqlite3BtreeCommitPhaseOne() +** routine did all the work of writing information out to disk and flushing the +** contents so that they are written onto the disk platter. All this +** routine has to do is delete or truncate or zero the header in the +** the rollback journal (which causes the transaction to commit) and +** drop locks. +** +** Normally, if an error occurs while the pager layer is attempting to +** finalize the underlying journal file, this function returns an error and +** the upper layer will attempt a rollback. However, if the second argument +** is non-zero then this b-tree transaction is part of a multi-file +** transaction. In this case, the transaction has already been committed +** (by deleting a super-journal file) and the caller will ignore this +** functions return code. So, even if an error occurs in the pager layer, +** reset the b-tree objects internal state to indicate that the write +** transaction has been closed. This is quite safe, as the pager will have +** transitioned to the error state. +** +** This will release the write lock on the database file. If there +** are no active cursors, it also releases the read lock. +*/ +SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ + + if( p->inTrans==TRANS_NONE ) return SQLITE_OK; + sqlite3BtreeEnter(p); + btreeIntegrity(p); + + /* If the handle has a write-transaction open, commit the shared-btrees + ** transaction and set the shared state to TRANS_READ. + */ + if( p->inTrans==TRANS_WRITE ){ + int rc; + BtShared *pBt = p->pBt; + assert( pBt->inTransaction==TRANS_WRITE ); + assert( pBt->nTransaction>0 ); + rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); + if( rc!=SQLITE_OK && bCleanup==0 ){ + sqlite3BtreeLeave(p); + return rc; + } + p->iBDataVersion--; /* Compensate for pPager->iDataVersion++; */ + pBt->inTransaction = TRANS_READ; + btreeClearHasContent(pBt); + } + + btreeEndTransaction(p); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} + +/* +** Do both phases of a commit. +*/ +SQLITE_PRIVATE int sqlite3BtreeCommit(Btree *p){ + int rc; + sqlite3BtreeEnter(p); + rc = sqlite3BtreeCommitPhaseOne(p, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeCommitPhaseTwo(p, 0); + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** This routine sets the state to CURSOR_FAULT and the error +** code to errCode for every cursor on any BtShared that pBtree +** references. Or if the writeOnly flag is set to 1, then only +** trip write cursors and leave read cursors unchanged. +** +** Every cursor is a candidate to be tripped, including cursors +** that belong to other database connections that happen to be +** sharing the cache with pBtree. +** +** This routine gets called when a rollback occurs. If the writeOnly +** flag is true, then only write-cursors need be tripped - read-only +** cursors save their current positions so that they may continue +** following the rollback. Or, if writeOnly is false, all cursors are +** tripped. In general, writeOnly is false if the transaction being +** rolled back modified the database schema. In this case b-tree root +** pages may be moved or deleted from the database altogether, making +** it unsafe for read cursors to continue. +** +** If the writeOnly flag is true and an error is encountered while +** saving the current position of a read-only cursor, all cursors, +** including all read-cursors are tripped. +** +** SQLITE_OK is returned if successful, or if an error occurs while +** saving a cursor position, an SQLite error code. +*/ +SQLITE_PRIVATE int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ + BtCursor *p; + int rc = SQLITE_OK; + + assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); + if( pBtree ){ + sqlite3BtreeEnter(pBtree); + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ + if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ + rc = saveCursorPosition(p); + if( rc!=SQLITE_OK ){ + (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); + break; + } + } + }else{ + sqlite3BtreeClearCursor(p); + p->eState = CURSOR_FAULT; + p->skipNext = errCode; + } + btreeReleaseAllCursorPages(p); + } + sqlite3BtreeLeave(pBtree); + } + return rc; +} + +/* +** Set the pBt->nPage field correctly, according to the current +** state of the database. Assume pBt->pPage1 is valid. +*/ +static void btreeSetNPage(BtShared *pBt, MemPage *pPage1){ + int nPage = get4byte(&pPage1->aData[28]); + testcase( nPage==0 ); + if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); + testcase( pBt->nPage!=(u32)nPage ); + pBt->nPage = nPage; +} + +/* +** Rollback the transaction in progress. +** +** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). +** Only write cursors are tripped if writeOnly is true but all cursors are +** tripped if writeOnly is false. Any attempt to use +** a tripped cursor will result in an error. +** +** This will release the write lock on the database file. If there +** are no active cursors, it also releases the read lock. +*/ +SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){ + int rc; + BtShared *pBt = p->pBt; + MemPage *pPage1; + + assert( writeOnly==1 || writeOnly==0 ); + assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK ); + sqlite3BtreeEnter(p); + if( tripCode==SQLITE_OK ){ + rc = tripCode = saveAllCursors(pBt, 0, 0); + if( rc ) writeOnly = 0; + }else{ + rc = SQLITE_OK; + } + if( tripCode ){ + int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly); + assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) ); + if( rc2!=SQLITE_OK ) rc = rc2; + } + btreeIntegrity(p); + + if( p->inTrans==TRANS_WRITE ){ + int rc2; + + assert( TRANS_WRITE==pBt->inTransaction ); + rc2 = sqlite3PagerRollback(pBt->pPager); + if( rc2!=SQLITE_OK ){ + rc = rc2; + } + + /* The rollback may have destroyed the pPage1->aData value. So + ** call btreeGetPage() on page 1 again to make + ** sure pPage1->aData is set correctly. */ + if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ + btreeSetNPage(pBt, pPage1); + releasePageOne(pPage1); + } + assert( countValidCursors(pBt, 1)==0 ); + pBt->inTransaction = TRANS_READ; + btreeClearHasContent(pBt); + } + + btreeEndTransaction(p); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Start a statement subtransaction. The subtransaction can be rolled +** back independently of the main transaction. You must start a transaction +** before starting a subtransaction. The subtransaction is ended automatically +** if the main transaction commits or rolls back. +** +** Statement subtransactions are used around individual SQL statements +** that are contained within a BEGIN...COMMIT block. If a constraint +** error occurs within the statement, the effect of that one statement +** can be rolled back without having to rollback the entire transaction. +** +** A statement sub-transaction is implemented as an anonymous savepoint. The +** value passed as the second parameter is the total number of savepoints, +** including the new anonymous savepoint, open on the B-Tree. i.e. if there +** are no active savepoints and no other statement-transactions open, +** iStatement is 1. This anonymous savepoint can be released or rolled back +** using the sqlite3BtreeSavepoint() function. +*/ +SQLITE_PRIVATE int sqlite3BtreeBeginStmt(Btree *p, int iStatement){ + int rc; + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + assert( p->inTrans==TRANS_WRITE ); + assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); + assert( iStatement>0 ); + assert( iStatement>p->db->nSavepoint ); + assert( pBt->inTransaction==TRANS_WRITE ); + /* At the pager level, a statement transaction is a savepoint with + ** an index greater than all savepoints created explicitly using + ** SQL statements. It is illegal to open, release or rollback any + ** such savepoints while the statement transaction savepoint is active. + */ + rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** The second argument to this function, op, is always SAVEPOINT_ROLLBACK +** or SAVEPOINT_RELEASE. This function either releases or rolls back the +** savepoint identified by parameter iSavepoint, depending on the value +** of op. +** +** Normally, iSavepoint is greater than or equal to zero. However, if op is +** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the +** contents of the entire transaction are rolled back. This is different +** from a normal transaction rollback, as no locks are released and the +** transaction remains open. +*/ +SQLITE_PRIVATE int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ + int rc = SQLITE_OK; + if( p && p->inTrans==TRANS_WRITE ){ + BtShared *pBt = p->pBt; + assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); + assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); + sqlite3BtreeEnter(p); + if( op==SAVEPOINT_ROLLBACK ){ + rc = saveAllCursors(pBt, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); + } + if( rc==SQLITE_OK ){ + if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){ + pBt->nPage = 0; + } + rc = newDatabase(pBt); + btreeSetNPage(pBt, pBt->pPage1); + + /* pBt->nPage might be zero if the database was corrupt when + ** the transaction was started. Otherwise, it must be at least 1. */ + assert( CORRUPT_DB || pBt->nPage>0 ); + } + sqlite3BtreeLeave(p); + } + return rc; +} + +/* +** Create a new cursor for the BTree whose root is on the page +** iTable. If a read-only cursor is requested, it is assumed that +** the caller already has at least a read-only transaction open +** on the database already. If a write-cursor is requested, then +** the caller is assumed to have an open write transaction. +** +** If the BTREE_WRCSR bit of wrFlag is clear, then the cursor can only +** be used for reading. If the BTREE_WRCSR bit is set, then the cursor +** can be used for reading or for writing if other conditions for writing +** are also met. These are the conditions that must be met in order +** for writing to be allowed: +** +** 1: The cursor must have been opened with wrFlag containing BTREE_WRCSR +** +** 2: Other database connections that share the same pager cache +** but which are not in the READ_UNCOMMITTED state may not have +** cursors open with wrFlag==0 on the same table. Otherwise +** the changes made by this write cursor would be visible to +** the read cursors in the other database connection. +** +** 3: The database must be writable (not on read-only media) +** +** 4: There must be an active transaction. +** +** The BTREE_FORDELETE bit of wrFlag may optionally be set if BTREE_WRCSR +** is set. If FORDELETE is set, that is a hint to the implementation that +** this cursor will only be used to seek to and delete entries of an index +** as part of a larger DELETE statement. The FORDELETE hint is not used by +** this implementation. But in a hypothetical alternative storage engine +** in which index entries are automatically deleted when corresponding table +** rows are deleted, the FORDELETE flag is a hint that all SEEK and DELETE +** operations on this cursor can be no-ops and all READ operations can +** return a null row (2-bytes: 0x01 0x00). +** +** No checking is done to make sure that page iTable really is the +** root page of a b-tree. If it is not, then the cursor acquired +** will not work correctly. +** +** It is assumed that the sqlite3BtreeCursorZero() has been called +** on pCur to initialize the memory space prior to invoking this routine. +*/ +static int btreeCursor( + Btree *p, /* The btree */ + Pgno iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + struct KeyInfo *pKeyInfo, /* First arg to comparison function */ + BtCursor *pCur /* Space for new cursor */ +){ + BtShared *pBt = p->pBt; /* Shared b-tree handle */ + BtCursor *pX; /* Looping over other all cursors */ + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( wrFlag==0 + || wrFlag==BTREE_WRCSR + || wrFlag==(BTREE_WRCSR|BTREE_FORDELETE) + ); + + /* The following assert statements verify that if this is a sharable + ** b-tree database, the connection is holding the required table locks, + ** and that no other connection has any open cursor that conflicts with + ** this lock. The iTable<1 term disables the check for corrupt schemas. */ + assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, (wrFlag?2:1)) + || iTable<1 ); + assert( wrFlag==0 || !hasReadConflicts(p, iTable) ); + + /* Assert that the caller has opened the required transaction. */ + assert( p->inTrans>TRANS_NONE ); + assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); + assert( pBt->pPage1 && pBt->pPage1->aData ); + assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); + + if( iTable<=1 ){ + if( iTable<1 ){ + return SQLITE_CORRUPT_BKPT; + }else if( btreePagecount(pBt)==0 ){ + assert( wrFlag==0 ); + iTable = 0; + } + } + + /* Now that no other errors can occur, finish filling in the BtCursor + ** variables and link the cursor into the BtShared list. */ + pCur->pgnoRoot = iTable; + pCur->iPage = -1; + pCur->pKeyInfo = pKeyInfo; + pCur->pBtree = p; + pCur->pBt = pBt; + pCur->curFlags = 0; + /* If there are two or more cursors on the same btree, then all such + ** cursors *must* have the BTCF_Multiple flag set. */ + for(pX=pBt->pCursor; pX; pX=pX->pNext){ + if( pX->pgnoRoot==iTable ){ + pX->curFlags |= BTCF_Multiple; + pCur->curFlags = BTCF_Multiple; + } + } + pCur->eState = CURSOR_INVALID; + pCur->pNext = pBt->pCursor; + pBt->pCursor = pCur; + if( wrFlag ){ + pCur->curFlags |= BTCF_WriteFlag; + pCur->curPagerFlags = 0; + if( pBt->pTmpSpace==0 ) return allocateTempSpace(pBt); + }else{ + pCur->curPagerFlags = PAGER_GET_READONLY; + } + return SQLITE_OK; +} +static int btreeCursorWithLock( + Btree *p, /* The btree */ + Pgno iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + struct KeyInfo *pKeyInfo, /* First arg to comparison function */ + BtCursor *pCur /* Space for new cursor */ +){ + int rc; + sqlite3BtreeEnter(p); + rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); + sqlite3BtreeLeave(p); + return rc; +} +SQLITE_PRIVATE int sqlite3BtreeCursor( + Btree *p, /* The btree */ + Pgno iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ + BtCursor *pCur /* Write new cursor here */ +){ + if( p->sharable ){ + return btreeCursorWithLock(p, iTable, wrFlag, pKeyInfo, pCur); + }else{ + return btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); + } +} + +/* +** Return the size of a BtCursor object in bytes. +** +** This interfaces is needed so that users of cursors can preallocate +** sufficient storage to hold a cursor. The BtCursor object is opaque +** to users so they cannot do the sizeof() themselves - they must call +** this routine. +*/ +SQLITE_PRIVATE int sqlite3BtreeCursorSize(void){ + return ROUND8(sizeof(BtCursor)); +} + +/* +** Initialize memory that will be converted into a BtCursor object. +** +** The simple approach here would be to memset() the entire object +** to zero. But it turns out that the apPage[] and aiIdx[] arrays +** do not need to be zeroed and they are large, so we can save a lot +** of run-time by skipping the initialization of those elements. +*/ +SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor *p){ + memset(p, 0, offsetof(BtCursor, BTCURSOR_FIRST_UNINIT)); +} + +/* +** Close a cursor. The read lock on the database file is released +** when the last cursor is closed. +*/ +SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ + Btree *pBtree = pCur->pBtree; + if( pBtree ){ + BtShared *pBt = pCur->pBt; + sqlite3BtreeEnter(pBtree); + assert( pBt->pCursor!=0 ); + if( pBt->pCursor==pCur ){ + pBt->pCursor = pCur->pNext; + }else{ + BtCursor *pPrev = pBt->pCursor; + do{ + if( pPrev->pNext==pCur ){ + pPrev->pNext = pCur->pNext; + break; + } + pPrev = pPrev->pNext; + }while( ALWAYS(pPrev) ); + } + btreeReleaseAllCursorPages(pCur); + unlockBtreeIfUnused(pBt); + sqlite3_free(pCur->aOverflow); + sqlite3_free(pCur->pKey); + if( (pBt->openFlags & BTREE_SINGLE) && pBt->pCursor==0 ){ + /* Since the BtShared is not sharable, there is no need to + ** worry about the missing sqlite3BtreeLeave() call here. */ + assert( pBtree->sharable==0 ); + sqlite3BtreeClose(pBtree); + }else{ + sqlite3BtreeLeave(pBtree); + } + pCur->pBtree = 0; + } + return SQLITE_OK; +} + +/* +** Make sure the BtCursor* given in the argument has a valid +** BtCursor.info structure. If it is not already valid, call +** btreeParseCell() to fill it in. +** +** BtCursor.info is a cache of the information in the current cell. +** Using this cache reduces the number of calls to btreeParseCell(). +*/ +#ifndef NDEBUG + static int cellInfoEqual(CellInfo *a, CellInfo *b){ + if( a->nKey!=b->nKey ) return 0; + if( a->pPayload!=b->pPayload ) return 0; + if( a->nPayload!=b->nPayload ) return 0; + if( a->nLocal!=b->nLocal ) return 0; + if( a->nSize!=b->nSize ) return 0; + return 1; + } + static void assertCellInfo(BtCursor *pCur){ + CellInfo info; + memset(&info, 0, sizeof(info)); + btreeParseCell(pCur->pPage, pCur->ix, &info); + assert( CORRUPT_DB || cellInfoEqual(&info, &pCur->info) ); + } +#else + #define assertCellInfo(x) +#endif +static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){ + if( pCur->info.nSize==0 ){ + pCur->curFlags |= BTCF_ValidNKey; + btreeParseCell(pCur->pPage,pCur->ix,&pCur->info); + }else{ + assertCellInfo(pCur); + } +} + +#ifndef NDEBUG /* The next routine used only within assert() statements */ +/* +** Return true if the given BtCursor is valid. A valid cursor is one +** that is currently pointing to a row in a (non-empty) table. +** This is a verification routine is used only within assert() statements. +*/ +SQLITE_PRIVATE int sqlite3BtreeCursorIsValid(BtCursor *pCur){ + return pCur && pCur->eState==CURSOR_VALID; +} +#endif /* NDEBUG */ +SQLITE_PRIVATE int sqlite3BtreeCursorIsValidNN(BtCursor *pCur){ + assert( pCur!=0 ); + return pCur->eState==CURSOR_VALID; +} + +/* +** Return the value of the integer key or "rowid" for a table btree. +** This routine is only valid for a cursor that is pointing into a +** ordinary table btree. If the cursor points to an index btree or +** is invalid, the result of this routine is undefined. +*/ +SQLITE_PRIVATE i64 sqlite3BtreeIntegerKey(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->curIntKey ); + getCellInfo(pCur); + return pCur->info.nKey; +} + +/* +** Pin or unpin a cursor. +*/ +SQLITE_PRIVATE void sqlite3BtreeCursorPin(BtCursor *pCur){ + assert( (pCur->curFlags & BTCF_Pinned)==0 ); + pCur->curFlags |= BTCF_Pinned; +} +SQLITE_PRIVATE void sqlite3BtreeCursorUnpin(BtCursor *pCur){ + assert( (pCur->curFlags & BTCF_Pinned)!=0 ); + pCur->curFlags &= ~BTCF_Pinned; +} + +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC +/* +** Return the offset into the database file for the start of the +** payload to which the cursor is pointing. +*/ +SQLITE_PRIVATE i64 sqlite3BtreeOffset(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + getCellInfo(pCur); + return (i64)pCur->pBt->pageSize*((i64)pCur->pPage->pgno - 1) + + (i64)(pCur->info.pPayload - pCur->pPage->aData); +} +#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */ + +/* +** Return the number of bytes of payload for the entry that pCur is +** currently pointing to. For table btrees, this will be the amount +** of data. For index btrees, this will be the size of the key. +** +** The caller must guarantee that the cursor is pointing to a non-NULL +** valid entry. In other words, the calling procedure must guarantee +** that the cursor has Cursor.eState==CURSOR_VALID. +*/ +SQLITE_PRIVATE u32 sqlite3BtreePayloadSize(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + getCellInfo(pCur); + return pCur->info.nPayload; +} + +/* +** Return an upper bound on the size of any record for the table +** that the cursor is pointing into. +** +** This is an optimization. Everything will still work if this +** routine always returns 2147483647 (which is the largest record +** that SQLite can handle) or more. But returning a smaller value might +** prevent large memory allocations when trying to interpret a +** corrupt datrabase. +** +** The current implementation merely returns the size of the underlying +** database file. +*/ +SQLITE_PRIVATE sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + return pCur->pBt->pageSize * (sqlite3_int64)pCur->pBt->nPage; +} + +/* +** Given the page number of an overflow page in the database (parameter +** ovfl), this function finds the page number of the next page in the +** linked list of overflow pages. If possible, it uses the auto-vacuum +** pointer-map data instead of reading the content of page ovfl to do so. +** +** If an error occurs an SQLite error code is returned. Otherwise: +** +** The page number of the next overflow page in the linked list is +** written to *pPgnoNext. If page ovfl is the last page in its linked +** list, *pPgnoNext is set to zero. +** +** If ppPage is not NULL, and a reference to the MemPage object corresponding +** to page number pOvfl was obtained, then *ppPage is set to point to that +** reference. It is the responsibility of the caller to call releasePage() +** on *ppPage to free the reference. In no reference was obtained (because +** the pointer-map was used to obtain the value for *pPgnoNext), then +** *ppPage is set to zero. +*/ +static int getOverflowPage( + BtShared *pBt, /* The database file */ + Pgno ovfl, /* Current overflow page number */ + MemPage **ppPage, /* OUT: MemPage handle (may be NULL) */ + Pgno *pPgnoNext /* OUT: Next overflow page number */ +){ + Pgno next = 0; + MemPage *pPage = 0; + int rc = SQLITE_OK; + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert(pPgnoNext); + +#ifndef SQLITE_OMIT_AUTOVACUUM + /* Try to find the next page in the overflow list using the + ** autovacuum pointer-map pages. Guess that the next page in + ** the overflow list is page number (ovfl+1). If that guess turns + ** out to be wrong, fall back to loading the data of page + ** number ovfl to determine the next page number. + */ + if( pBt->autoVacuum ){ + Pgno pgno; + Pgno iGuess = ovfl+1; + u8 eType; + + while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){ + iGuess++; + } + + if( iGuess<=btreePagecount(pBt) ){ + rc = ptrmapGet(pBt, iGuess, &eType, &pgno); + if( rc==SQLITE_OK && eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){ + next = iGuess; + rc = SQLITE_DONE; + } + } + } +#endif + + assert( next==0 || rc==SQLITE_DONE ); + if( rc==SQLITE_OK ){ + rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0); + assert( rc==SQLITE_OK || pPage==0 ); + if( rc==SQLITE_OK ){ + next = get4byte(pPage->aData); + } + } + + *pPgnoNext = next; + if( ppPage ){ + *ppPage = pPage; + }else{ + releasePage(pPage); + } + return (rc==SQLITE_DONE ? SQLITE_OK : rc); +} + +/* +** Copy data from a buffer to a page, or from a page to a buffer. +** +** pPayload is a pointer to data stored on database page pDbPage. +** If argument eOp is false, then nByte bytes of data are copied +** from pPayload to the buffer pointed at by pBuf. If eOp is true, +** then sqlite3PagerWrite() is called on pDbPage and nByte bytes +** of data are copied from the buffer pBuf to pPayload. +** +** SQLITE_OK is returned on success, otherwise an error code. +*/ +static int copyPayload( + void *pPayload, /* Pointer to page data */ + void *pBuf, /* Pointer to buffer */ + int nByte, /* Number of bytes to copy */ + int eOp, /* 0 -> copy from page, 1 -> copy to page */ + DbPage *pDbPage /* Page containing pPayload */ +){ + if( eOp ){ + /* Copy data from buffer to page (a write operation) */ + int rc = sqlite3PagerWrite(pDbPage); + if( rc!=SQLITE_OK ){ + return rc; + } + memcpy(pPayload, pBuf, nByte); + }else{ + /* Copy data from page to buffer (a read operation) */ + memcpy(pBuf, pPayload, nByte); + } + return SQLITE_OK; +} + +/* +** This function is used to read or overwrite payload information +** for the entry that the pCur cursor is pointing to. The eOp +** argument is interpreted as follows: +** +** 0: The operation is a read. Populate the overflow cache. +** 1: The operation is a write. Populate the overflow cache. +** +** A total of "amt" bytes are read or written beginning at "offset". +** Data is read to or from the buffer pBuf. +** +** The content being read or written might appear on the main page +** or be scattered out on multiple overflow pages. +** +** If the current cursor entry uses one or more overflow pages +** this function may allocate space for and lazily populate +** the overflow page-list cache array (BtCursor.aOverflow). +** Subsequent calls use this cache to make seeking to the supplied offset +** more efficient. +** +** Once an overflow page-list cache has been allocated, it must be +** invalidated if some other cursor writes to the same table, or if +** the cursor is moved to a different row. Additionally, in auto-vacuum +** mode, the following events may invalidate an overflow page-list cache. +** +** * An incremental vacuum, +** * A commit in auto_vacuum="full" mode, +** * Creating a table (may require moving an overflow page). +*/ +static int accessPayload( + BtCursor *pCur, /* Cursor pointing to entry to read from */ + u32 offset, /* Begin reading this far into payload */ + u32 amt, /* Read this many bytes */ + unsigned char *pBuf, /* Write the bytes into this buffer */ + int eOp /* zero to read. non-zero to write. */ +){ + unsigned char *aPayload; + int rc = SQLITE_OK; + int iIdx = 0; + MemPage *pPage = pCur->pPage; /* Btree page of current entry */ + BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ +#ifdef SQLITE_DIRECT_OVERFLOW_READ + unsigned char * const pBufStart = pBuf; /* Start of original out buffer */ +#endif + + assert( pPage ); + assert( eOp==0 || eOp==1 ); + assert( pCur->eState==CURSOR_VALID ); + if( pCur->ix>=pPage->nCell ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + assert( cursorHoldsMutex(pCur) ); + + getCellInfo(pCur); + aPayload = pCur->info.pPayload; + assert( offset+amt <= pCur->info.nPayload ); + + assert( aPayload > pPage->aData ); + if( (uptr)(aPayload - pPage->aData) > (pBt->usableSize - pCur->info.nLocal) ){ + /* Trying to read or write past the end of the data is an error. The + ** conditional above is really: + ** &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] + ** but is recast into its current form to avoid integer overflow problems + */ + return SQLITE_CORRUPT_PAGE(pPage); + } + + /* Check if data must be read/written to/from the btree page itself. */ + if( offsetinfo.nLocal ){ + int a = amt; + if( a+offset>pCur->info.nLocal ){ + a = pCur->info.nLocal - offset; + } + rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); + offset = 0; + pBuf += a; + amt -= a; + }else{ + offset -= pCur->info.nLocal; + } + + + if( rc==SQLITE_OK && amt>0 ){ + const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ + Pgno nextPage; + + nextPage = get4byte(&aPayload[pCur->info.nLocal]); + + /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. + ** + ** The aOverflow[] array is sized at one entry for each overflow page + ** in the overflow chain. The page number of the first overflow page is + ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array + ** means "not yet known" (the cache is lazily populated). + */ + if( (pCur->curFlags & BTCF_ValidOvfl)==0 ){ + int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; + if( pCur->aOverflow==0 + || nOvfl*(int)sizeof(Pgno) > sqlite3MallocSize(pCur->aOverflow) + ){ + Pgno *aNew = (Pgno*)sqlite3Realloc( + pCur->aOverflow, nOvfl*2*sizeof(Pgno) + ); + if( aNew==0 ){ + return SQLITE_NOMEM_BKPT; + }else{ + pCur->aOverflow = aNew; + } + } + memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); + pCur->curFlags |= BTCF_ValidOvfl; + }else{ + /* If the overflow page-list cache has been allocated and the + ** entry for the first required overflow page is valid, skip + ** directly to it. + */ + if( pCur->aOverflow[offset/ovflSize] ){ + iIdx = (offset/ovflSize); + nextPage = pCur->aOverflow[iIdx]; + offset = (offset%ovflSize); + } + } + + assert( rc==SQLITE_OK && amt>0 ); + while( nextPage ){ + /* If required, populate the overflow page-list cache. */ + if( nextPage > pBt->nPage ) return SQLITE_CORRUPT_BKPT; + assert( pCur->aOverflow[iIdx]==0 + || pCur->aOverflow[iIdx]==nextPage + || CORRUPT_DB ); + pCur->aOverflow[iIdx] = nextPage; + + if( offset>=ovflSize ){ + /* The only reason to read this page is to obtain the page + ** number for the next page in the overflow chain. The page + ** data is not required. So first try to lookup the overflow + ** page-list cache, if any, then fall back to the getOverflowPage() + ** function. + */ + assert( pCur->curFlags & BTCF_ValidOvfl ); + assert( pCur->pBtree->db==pBt->db ); + if( pCur->aOverflow[iIdx+1] ){ + nextPage = pCur->aOverflow[iIdx+1]; + }else{ + rc = getOverflowPage(pBt, nextPage, 0, &nextPage); + } + offset -= ovflSize; + }else{ + /* Need to read this page properly. It contains some of the + ** range of data that is being read (eOp==0) or written (eOp!=0). + */ + int a = amt; + if( a + offset > ovflSize ){ + a = ovflSize - offset; + } + +#ifdef SQLITE_DIRECT_OVERFLOW_READ + /* If all the following are true: + ** + ** 1) this is a read operation, and + ** 2) data is required from the start of this overflow page, and + ** 3) there are no dirty pages in the page-cache + ** 4) the database is file-backed, and + ** 5) the page is not in the WAL file + ** 6) at least 4 bytes have already been read into the output buffer + ** + ** then data can be read directly from the database file into the + ** output buffer, bypassing the page-cache altogether. This speeds + ** up loading large records that span many overflow pages. + */ + if( eOp==0 /* (1) */ + && offset==0 /* (2) */ + && sqlite3PagerDirectReadOk(pBt->pPager, nextPage) /* (3,4,5) */ + && &pBuf[-4]>=pBufStart /* (6) */ + ){ + sqlite3_file *fd = sqlite3PagerFile(pBt->pPager); + u8 aSave[4]; + u8 *aWrite = &pBuf[-4]; + assert( aWrite>=pBufStart ); /* due to (6) */ + memcpy(aSave, aWrite, 4); + rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); + if( rc && nextPage>pBt->nPage ) rc = SQLITE_CORRUPT_BKPT; + nextPage = get4byte(aWrite); + memcpy(aWrite, aSave, 4); + }else +#endif + + { + DbPage *pDbPage; + rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage, + (eOp==0 ? PAGER_GET_READONLY : 0) + ); + if( rc==SQLITE_OK ){ + aPayload = sqlite3PagerGetData(pDbPage); + nextPage = get4byte(aPayload); + rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); + sqlite3PagerUnref(pDbPage); + offset = 0; + } + } + amt -= a; + if( amt==0 ) return rc; + pBuf += a; + } + if( rc ) break; + iIdx++; + } + } + + if( rc==SQLITE_OK && amt>0 ){ + /* Overflow chain ends prematurely */ + return SQLITE_CORRUPT_PAGE(pPage); + } + return rc; +} + +/* +** Read part of the payload for the row at which that cursor pCur is currently +** pointing. "amt" bytes will be transferred into pBuf[]. The transfer +** begins at "offset". +** +** pCur can be pointing to either a table or an index b-tree. +** If pointing to a table btree, then the content section is read. If +** pCur is pointing to an index b-tree then the key section is read. +** +** For sqlite3BtreePayload(), the caller must ensure that pCur is pointing +** to a valid row in the table. For sqlite3BtreePayloadChecked(), the +** cursor might be invalid or might need to be restored before being read. +** +** Return SQLITE_OK on success or an error code if anything goes +** wrong. An error is returned if "offset+amt" is larger than +** the available payload. +*/ +SQLITE_PRIVATE int sqlite3BtreePayload(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>=0 && pCur->pPage ); + return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0); +} + +/* +** This variant of sqlite3BtreePayload() works even if the cursor has not +** in the CURSOR_VALID state. It is only used by the sqlite3_blob_read() +** interface. +*/ +#ifndef SQLITE_OMIT_INCRBLOB +static SQLITE_NOINLINE int accessPayloadChecked( + BtCursor *pCur, + u32 offset, + u32 amt, + void *pBuf +){ + int rc; + if ( pCur->eState==CURSOR_INVALID ){ + return SQLITE_ABORT; + } + assert( cursorOwnsBtShared(pCur) ); + rc = btreeRestoreCursorPosition(pCur); + return rc ? rc : accessPayload(pCur, offset, amt, pBuf, 0); +} +SQLITE_PRIVATE int sqlite3BtreePayloadChecked(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ + if( pCur->eState==CURSOR_VALID ){ + assert( cursorOwnsBtShared(pCur) ); + return accessPayload(pCur, offset, amt, pBuf, 0); + }else{ + return accessPayloadChecked(pCur, offset, amt, pBuf); + } +} +#endif /* SQLITE_OMIT_INCRBLOB */ + +/* +** Return a pointer to payload information from the entry that the +** pCur cursor is pointing to. The pointer is to the beginning of +** the key if index btrees (pPage->intKey==0) and is the data for +** table btrees (pPage->intKey==1). The number of bytes of available +** key/data is written into *pAmt. If *pAmt==0, then the value +** returned will not be a valid pointer. +** +** This routine is an optimization. It is common for the entire key +** and data to fit on the local page and for there to be no overflow +** pages. When that is so, this routine can be used to access the +** key and data without making a copy. If the key and/or data spills +** onto overflow pages, then accessPayload() must be used to reassemble +** the key/data and copy it into a preallocated buffer. +** +** The pointer returned by this routine looks directly into the cached +** page of the database. The data might change or move the next time +** any btree routine is called. +*/ +static const void *fetchPayload( + BtCursor *pCur, /* Cursor pointing to entry to read from */ + u32 *pAmt /* Write the number of available bytes here */ +){ + int amt; + assert( pCur!=0 && pCur->iPage>=0 && pCur->pPage); + assert( pCur->eState==CURSOR_VALID ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + assert( cursorOwnsBtShared(pCur) ); + assert( pCur->ixpPage->nCell || CORRUPT_DB ); + assert( pCur->info.nSize>0 ); + assert( pCur->info.pPayload>pCur->pPage->aData || CORRUPT_DB ); + assert( pCur->info.pPayloadpPage->aDataEnd ||CORRUPT_DB); + amt = pCur->info.nLocal; + if( amt>(int)(pCur->pPage->aDataEnd - pCur->info.pPayload) ){ + /* There is too little space on the page for the expected amount + ** of local content. Database must be corrupt. */ + assert( CORRUPT_DB ); + amt = MAX(0, (int)(pCur->pPage->aDataEnd - pCur->info.pPayload)); + } + *pAmt = (u32)amt; + return (void*)pCur->info.pPayload; +} + + +/* +** For the entry that cursor pCur is point to, return as +** many bytes of the key or data as are available on the local +** b-tree page. Write the number of available bytes into *pAmt. +** +** The pointer returned is ephemeral. The key/data may move +** or be destroyed on the next call to any Btree routine, +** including calls from other threads against the same cache. +** Hence, a mutex on the BtShared should be held prior to calling +** this routine. +** +** These routines is used to get quick access to key and data +** in the common case where no overflow pages are used. +*/ +SQLITE_PRIVATE const void *sqlite3BtreePayloadFetch(BtCursor *pCur, u32 *pAmt){ + return fetchPayload(pCur, pAmt); +} + + +/* +** Move the cursor down to a new child page. The newPgno argument is the +** page number of the child page to move to. +** +** This function returns SQLITE_CORRUPT if the page-header flags field of +** the new child page does not match the flags field of the parent (i.e. +** if an intkey page appears to be the parent of a non-intkey page, or +** vice-versa). +*/ +static int moveToChild(BtCursor *pCur, u32 newPgno){ + BtShared *pBt = pCur->pBt; + + assert( cursorOwnsBtShared(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPageiPage>=0 ); + if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ + return SQLITE_CORRUPT_BKPT; + } + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + pCur->aiIdx[pCur->iPage] = pCur->ix; + pCur->apPage[pCur->iPage] = pCur->pPage; + pCur->ix = 0; + pCur->iPage++; + return getAndInitPage(pBt, newPgno, &pCur->pPage, pCur, pCur->curPagerFlags); +} + +#ifdef SQLITE_DEBUG +/* +** Page pParent is an internal (non-leaf) tree page. This function +** asserts that page number iChild is the left-child if the iIdx'th +** cell in page pParent. Or, if iIdx is equal to the total number of +** cells in pParent, that page number iChild is the right-child of +** the page. +*/ +static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ + if( CORRUPT_DB ) return; /* The conditions tested below might not be true + ** in a corrupt database */ + assert( iIdx<=pParent->nCell ); + if( iIdx==pParent->nCell ){ + assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); + }else{ + assert( get4byte(findCell(pParent, iIdx))==iChild ); + } +} +#else +# define assertParentIndex(x,y,z) +#endif + +/* +** Move the cursor up to the parent page. +** +** pCur->idx is set to the cell index that contains the pointer +** to the page we are coming from. If we are coming from the +** right-most child page then pCur->idx is set to one more than +** the largest cell index. +*/ +static void moveToParent(BtCursor *pCur){ + MemPage *pLeaf; + assert( cursorOwnsBtShared(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>0 ); + assert( pCur->pPage ); + assertParentIndex( + pCur->apPage[pCur->iPage-1], + pCur->aiIdx[pCur->iPage-1], + pCur->pPage->pgno + ); + testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + pCur->ix = pCur->aiIdx[pCur->iPage-1]; + pLeaf = pCur->pPage; + pCur->pPage = pCur->apPage[--pCur->iPage]; + releasePageNotNull(pLeaf); +} + +/* +** Move the cursor to point to the root page of its b-tree structure. +** +** If the table has a virtual root page, then the cursor is moved to point +** to the virtual root page instead of the actual root page. A table has a +** virtual root page when the actual root page contains no cells and a +** single child page. This can only happen with the table rooted at page 1. +** +** If the b-tree structure is empty, the cursor state is set to +** CURSOR_INVALID and this routine returns SQLITE_EMPTY. Otherwise, +** the cursor is set to point to the first cell located on the root +** (or virtual root) page and the cursor state is set to CURSOR_VALID. +** +** If this function returns successfully, it may be assumed that the +** page-header flags indicate that the [virtual] root-page is the expected +** kind of b-tree page (i.e. if when opening the cursor the caller did not +** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D, +** indicating a table b-tree, or if the caller did specify a KeyInfo +** structure the flags byte is set to 0x02 or 0x0A, indicating an index +** b-tree). +*/ +static int moveToRoot(BtCursor *pCur){ + MemPage *pRoot; + int rc = SQLITE_OK; + + assert( cursorOwnsBtShared(pCur) ); + assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); + assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); + assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); + assert( pCur->eState < CURSOR_REQUIRESEEK || pCur->iPage<0 ); + assert( pCur->pgnoRoot>0 || pCur->iPage<0 ); + + if( pCur->iPage>=0 ){ + if( pCur->iPage ){ + releasePageNotNull(pCur->pPage); + while( --pCur->iPage ){ + releasePageNotNull(pCur->apPage[pCur->iPage]); + } + pRoot = pCur->pPage = pCur->apPage[0]; + goto skip_init; + } + }else if( pCur->pgnoRoot==0 ){ + pCur->eState = CURSOR_INVALID; + return SQLITE_EMPTY; + }else{ + assert( pCur->iPage==(-1) ); + if( pCur->eState>=CURSOR_REQUIRESEEK ){ + if( pCur->eState==CURSOR_FAULT ){ + assert( pCur->skipNext!=SQLITE_OK ); + return pCur->skipNext; + } + sqlite3BtreeClearCursor(pCur); + } + rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->pPage, + 0, pCur->curPagerFlags); + if( rc!=SQLITE_OK ){ + pCur->eState = CURSOR_INVALID; + return rc; + } + pCur->iPage = 0; + pCur->curIntKey = pCur->pPage->intKey; + } + pRoot = pCur->pPage; + assert( pRoot->pgno==pCur->pgnoRoot || CORRUPT_DB ); + + /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor + ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is + ** NULL, the caller expects a table b-tree. If this is not the case, + ** return an SQLITE_CORRUPT error. + ** + ** Earlier versions of SQLite assumed that this test could not fail + ** if the root page was already loaded when this function was called (i.e. + ** if pCur->iPage>=0). But this is not so if the database is corrupted + ** in such a way that page pRoot is linked into a second b-tree table + ** (or the freelist). */ + assert( pRoot->intKey==1 || pRoot->intKey==0 ); + if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ + return SQLITE_CORRUPT_PAGE(pCur->pPage); + } + +skip_init: + pCur->ix = 0; + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); + + if( pRoot->nCell>0 ){ + pCur->eState = CURSOR_VALID; + }else if( !pRoot->leaf ){ + Pgno subpage; + if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; + subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); + pCur->eState = CURSOR_VALID; + rc = moveToChild(pCur, subpage); + }else{ + pCur->eState = CURSOR_INVALID; + rc = SQLITE_EMPTY; + } + return rc; +} + +/* +** Move the cursor down to the left-most leaf entry beneath the +** entry to which it is currently pointing. +** +** The left-most leaf is the one with the smallest key - the first +** in ascending order. +*/ +static int moveToLeftmost(BtCursor *pCur){ + Pgno pgno; + int rc = SQLITE_OK; + MemPage *pPage; + + assert( cursorOwnsBtShared(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){ + assert( pCur->ixnCell ); + pgno = get4byte(findCell(pPage, pCur->ix)); + rc = moveToChild(pCur, pgno); + } + return rc; +} + +/* +** Move the cursor down to the right-most leaf entry beneath the +** page to which it is currently pointing. Notice the difference +** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() +** finds the left-most entry beneath the *entry* whereas moveToRightmost() +** finds the right-most entry beneath the *page*. +** +** The right-most entry is the one with the largest key - the last +** key in ascending order. +*/ +static int moveToRightmost(BtCursor *pCur){ + Pgno pgno; + int rc = SQLITE_OK; + MemPage *pPage = 0; + + assert( cursorOwnsBtShared(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + while( !(pPage = pCur->pPage)->leaf ){ + pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); + pCur->ix = pPage->nCell; + rc = moveToChild(pCur, pgno); + if( rc ) return rc; + } + pCur->ix = pPage->nCell-1; + assert( pCur->info.nSize==0 ); + assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); + return SQLITE_OK; +} + +/* Move the cursor to the first entry in the table. Return SQLITE_OK +** on success. Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ + int rc; + + assert( cursorOwnsBtShared(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + rc = moveToRoot(pCur); + if( rc==SQLITE_OK ){ + assert( pCur->pPage->nCell>0 ); + *pRes = 0; + rc = moveToLeftmost(pCur); + }else if( rc==SQLITE_EMPTY ){ + assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); + *pRes = 1; + rc = SQLITE_OK; + } + return rc; +} + +/* Move the cursor to the last entry in the table. Return SQLITE_OK +** on success. Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ + int rc; + + assert( cursorOwnsBtShared(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + + /* If the cursor already points to the last entry, this is a no-op. */ + if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ +#ifdef SQLITE_DEBUG + /* This block serves to assert() that the cursor really does point + ** to the last entry in the b-tree. */ + int ii; + for(ii=0; iiiPage; ii++){ + assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell ); + } + assert( pCur->ix==pCur->pPage->nCell-1 || CORRUPT_DB ); + testcase( pCur->ix!=pCur->pPage->nCell-1 ); + /* ^-- dbsqlfuzz b92b72e4de80b5140c30ab71372ca719b8feb618 */ + assert( pCur->pPage->leaf ); +#endif + *pRes = 0; + return SQLITE_OK; + } + + rc = moveToRoot(pCur); + if( rc==SQLITE_OK ){ + assert( pCur->eState==CURSOR_VALID ); + *pRes = 0; + rc = moveToRightmost(pCur); + if( rc==SQLITE_OK ){ + pCur->curFlags |= BTCF_AtLast; + }else{ + pCur->curFlags &= ~BTCF_AtLast; + } + }else if( rc==SQLITE_EMPTY ){ + assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); + *pRes = 1; + rc = SQLITE_OK; + } + return rc; +} + +/* Move the cursor so that it points to an entry in a table (a.k.a INTKEY) +** table near the key intKey. Return a success code. +** +** If an exact match is not found, then the cursor is always +** left pointing at a leaf page which would hold the entry if it +** were present. The cursor might point to an entry that comes +** before or after the key. +** +** An integer is written into *pRes which is the result of +** comparing the key with the entry to which the cursor is +** pointing. The meaning of the integer written into +** *pRes is as follows: +** +** *pRes<0 The cursor is left pointing at an entry that +** is smaller than intKey or if the table is empty +** and the cursor is therefore left point to nothing. +** +** *pRes==0 The cursor is left pointing at an entry that +** exactly matches intKey. +** +** *pRes>0 The cursor is left pointing at an entry that +** is larger than intKey. +*/ +SQLITE_PRIVATE int sqlite3BtreeTableMoveto( + BtCursor *pCur, /* The cursor to be moved */ + i64 intKey, /* The table key */ + int biasRight, /* If true, bias the search to the high end */ + int *pRes /* Write search results here */ +){ + int rc; + + assert( cursorOwnsBtShared(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + assert( pRes ); + assert( pCur->pKeyInfo==0 ); + assert( pCur->eState!=CURSOR_VALID || pCur->curIntKey!=0 ); + + /* If the cursor is already positioned at the point we are trying + ** to move to, then just return without doing any work */ + if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 ){ + if( pCur->info.nKey==intKey ){ + *pRes = 0; + return SQLITE_OK; + } + if( pCur->info.nKeycurFlags & BTCF_AtLast)!=0 ){ + *pRes = -1; + return SQLITE_OK; + } + /* If the requested key is one more than the previous key, then + ** try to get there using sqlite3BtreeNext() rather than a full + ** binary search. This is an optimization only. The correct answer + ** is still obtained without this case, only a little more slowely */ + if( pCur->info.nKey+1==intKey ){ + *pRes = 0; + rc = sqlite3BtreeNext(pCur, 0); + if( rc==SQLITE_OK ){ + getCellInfo(pCur); + if( pCur->info.nKey==intKey ){ + return SQLITE_OK; + } + }else if( rc!=SQLITE_DONE ){ + return rc; + } + } + } + } + +#ifdef SQLITE_DEBUG + pCur->pBtree->nSeek++; /* Performance measurement during testing */ +#endif + + rc = moveToRoot(pCur); + if( rc ){ + if( rc==SQLITE_EMPTY ){ + assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); + *pRes = -1; + return SQLITE_OK; + } + return rc; + } + assert( pCur->pPage ); + assert( pCur->pPage->isInit ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->pPage->nCell > 0 ); + assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey ); + assert( pCur->curIntKey ); + + for(;;){ + int lwr, upr, idx, c; + Pgno chldPg; + MemPage *pPage = pCur->pPage; + u8 *pCell; /* Pointer to current cell in pPage */ + + /* pPage->nCell must be greater than zero. If this is the root-page + ** the cursor would have been INVALID above and this for(;;) loop + ** not run. If this is not the root-page, then the moveToChild() routine + ** would have already detected db corruption. Similarly, pPage must + ** be the right kind (index or table) of b-tree page. Otherwise + ** a moveToChild() or moveToRoot() call would have detected corruption. */ + assert( pPage->nCell>0 ); + assert( pPage->intKey ); + lwr = 0; + upr = pPage->nCell-1; + assert( biasRight==0 || biasRight==1 ); + idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ + for(;;){ + i64 nCellKey; + pCell = findCellPastPtr(pPage, idx); + if( pPage->intKeyLeaf ){ + while( 0x80 <= *(pCell++) ){ + if( pCell>=pPage->aDataEnd ){ + return SQLITE_CORRUPT_PAGE(pPage); + } + } + } + getVarint(pCell, (u64*)&nCellKey); + if( nCellKeyupr ){ c = -1; break; } + }else if( nCellKey>intKey ){ + upr = idx-1; + if( lwr>upr ){ c = +1; break; } + }else{ + assert( nCellKey==intKey ); + pCur->ix = (u16)idx; + if( !pPage->leaf ){ + lwr = idx; + goto moveto_table_next_layer; + }else{ + pCur->curFlags |= BTCF_ValidNKey; + pCur->info.nKey = nCellKey; + pCur->info.nSize = 0; + *pRes = 0; + return SQLITE_OK; + } + } + assert( lwr+upr>=0 ); + idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ + } + assert( lwr==upr+1 || !pPage->leaf ); + assert( pPage->isInit ); + if( pPage->leaf ){ + assert( pCur->ixpPage->nCell ); + pCur->ix = (u16)idx; + *pRes = c; + rc = SQLITE_OK; + goto moveto_table_finish; + } +moveto_table_next_layer: + if( lwr>=pPage->nCell ){ + chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); + }else{ + chldPg = get4byte(findCell(pPage, lwr)); + } + pCur->ix = (u16)lwr; + rc = moveToChild(pCur, chldPg); + if( rc ) break; + } +moveto_table_finish: + pCur->info.nSize = 0; + assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); + return rc; +} + +/* Move the cursor so that it points to an entry in an index table +** near the key pIdxKey. Return a success code. +** +** If an exact match is not found, then the cursor is always +** left pointing at a leaf page which would hold the entry if it +** were present. The cursor might point to an entry that comes +** before or after the key. +** +** An integer is written into *pRes which is the result of +** comparing the key with the entry to which the cursor is +** pointing. The meaning of the integer written into +** *pRes is as follows: +** +** *pRes<0 The cursor is left pointing at an entry that +** is smaller than pIdxKey or if the table is empty +** and the cursor is therefore left point to nothing. +** +** *pRes==0 The cursor is left pointing at an entry that +** exactly matches pIdxKey. +** +** *pRes>0 The cursor is left pointing at an entry that +** is larger than pIdxKey. +** +** The pIdxKey->eqSeen field is set to 1 if there +** exists an entry in the table that exactly matches pIdxKey. +*/ +SQLITE_PRIVATE int sqlite3BtreeIndexMoveto( + BtCursor *pCur, /* The cursor to be moved */ + UnpackedRecord *pIdxKey, /* Unpacked index key */ + int *pRes /* Write search results here */ +){ + int rc; + RecordCompare xRecordCompare; + + assert( cursorOwnsBtShared(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + assert( pRes ); + assert( pCur->pKeyInfo!=0 ); + +#ifdef SQLITE_DEBUG + pCur->pBtree->nSeek++; /* Performance measurement during testing */ +#endif + + xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); + pIdxKey->errCode = 0; + assert( pIdxKey->default_rc==1 + || pIdxKey->default_rc==0 + || pIdxKey->default_rc==-1 + ); + + rc = moveToRoot(pCur); + if( rc ){ + if( rc==SQLITE_EMPTY ){ + assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); + *pRes = -1; + return SQLITE_OK; + } + return rc; + } + assert( pCur->pPage ); + assert( pCur->pPage->isInit ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->pPage->nCell > 0 ); + assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey ); + assert( pCur->curIntKey || pIdxKey ); + for(;;){ + int lwr, upr, idx, c; + Pgno chldPg; + MemPage *pPage = pCur->pPage; + u8 *pCell; /* Pointer to current cell in pPage */ + + /* pPage->nCell must be greater than zero. If this is the root-page + ** the cursor would have been INVALID above and this for(;;) loop + ** not run. If this is not the root-page, then the moveToChild() routine + ** would have already detected db corruption. Similarly, pPage must + ** be the right kind (index or table) of b-tree page. Otherwise + ** a moveToChild() or moveToRoot() call would have detected corruption. */ + assert( pPage->nCell>0 ); + assert( pPage->intKey==(pIdxKey==0) ); + lwr = 0; + upr = pPage->nCell-1; + idx = upr>>1; /* idx = (lwr+upr)/2; */ + for(;;){ + int nCell; /* Size of the pCell cell in bytes */ + pCell = findCellPastPtr(pPage, idx); + + /* The maximum supported page-size is 65536 bytes. This means that + ** the maximum number of record bytes stored on an index B-Tree + ** page is less than 16384 bytes and may be stored as a 2-byte + ** varint. This information is used to attempt to avoid parsing + ** the entire cell by checking for the cases where the record is + ** stored entirely within the b-tree page by inspecting the first + ** 2 bytes of the cell. + */ + nCell = pCell[0]; + if( nCell<=pPage->max1bytePayload ){ + /* This branch runs if the record-size field of the cell is a + ** single byte varint and the record fits entirely on the main + ** b-tree page. */ + testcase( pCell+nCell+1==pPage->aDataEnd ); + c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); + }else if( !(pCell[1] & 0x80) + && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal + ){ + /* The record-size field is a 2 byte varint and the record + ** fits entirely on the main b-tree page. */ + testcase( pCell+nCell+2==pPage->aDataEnd ); + c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); + }else{ + /* The record flows over onto one or more overflow pages. In + ** this case the whole cell needs to be parsed, a buffer allocated + ** and accessPayload() used to retrieve the record into the + ** buffer before VdbeRecordCompare() can be called. + ** + ** If the record is corrupt, the xRecordCompare routine may read + ** up to two varints past the end of the buffer. An extra 18 + ** bytes of padding is allocated at the end of the buffer in + ** case this happens. */ + void *pCellKey; + u8 * const pCellBody = pCell - pPage->childPtrSize; + const int nOverrun = 18; /* Size of the overrun padding */ + pPage->xParseCell(pPage, pCellBody, &pCur->info); + nCell = (int)pCur->info.nKey; + testcase( nCell<0 ); /* True if key size is 2^32 or more */ + testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ + testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ + testcase( nCell==2 ); /* Minimum legal index key size */ + if( nCell<2 || nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){ + rc = SQLITE_CORRUPT_PAGE(pPage); + goto moveto_index_finish; + } + pCellKey = sqlite3Malloc( nCell+nOverrun ); + if( pCellKey==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto moveto_index_finish; + } + pCur->ix = (u16)idx; + rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0); + memset(((u8*)pCellKey)+nCell,0,nOverrun); /* Fix uninit warnings */ + pCur->curFlags &= ~BTCF_ValidOvfl; + if( rc ){ + sqlite3_free(pCellKey); + goto moveto_index_finish; + } + c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey); + sqlite3_free(pCellKey); + } + assert( + (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) + && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed) + ); + if( c<0 ){ + lwr = idx+1; + }else if( c>0 ){ + upr = idx-1; + }else{ + assert( c==0 ); + *pRes = 0; + rc = SQLITE_OK; + pCur->ix = (u16)idx; + if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT; + goto moveto_index_finish; + } + if( lwr>upr ) break; + assert( lwr+upr>=0 ); + idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */ + } + assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) ); + assert( pPage->isInit ); + if( pPage->leaf ){ + assert( pCur->ixpPage->nCell || CORRUPT_DB ); + pCur->ix = (u16)idx; + *pRes = c; + rc = SQLITE_OK; + goto moveto_index_finish; + } + if( lwr>=pPage->nCell ){ + chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); + }else{ + chldPg = get4byte(findCell(pPage, lwr)); + } + pCur->ix = (u16)lwr; + rc = moveToChild(pCur, chldPg); + if( rc ) break; + } +moveto_index_finish: + pCur->info.nSize = 0; + assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); + return rc; +} + + +/* +** Return TRUE if the cursor is not pointing at an entry of the table. +** +** TRUE will be returned after a call to sqlite3BtreeNext() moves +** past the last entry in the table or sqlite3BtreePrev() moves past +** the first entry. TRUE is also returned if the table is empty. +*/ +SQLITE_PRIVATE int sqlite3BtreeEof(BtCursor *pCur){ + /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries + ** have been deleted? This API will need to change to return an error code + ** as well as the boolean result value. + */ + return (CURSOR_VALID!=pCur->eState); +} + +/* +** Return an estimate for the number of rows in the table that pCur is +** pointing to. Return a negative number if no estimate is currently +** available. +*/ +SQLITE_PRIVATE i64 sqlite3BtreeRowCountEst(BtCursor *pCur){ + i64 n; + u8 i; + + assert( cursorOwnsBtShared(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + + /* Currently this interface is only called by the OP_IfSmaller + ** opcode, and it that case the cursor will always be valid and + ** will always point to a leaf node. */ + if( NEVER(pCur->eState!=CURSOR_VALID) ) return -1; + if( NEVER(pCur->pPage->leaf==0) ) return -1; + + n = pCur->pPage->nCell; + for(i=0; iiPage; i++){ + n *= pCur->apPage[i]->nCell; + } + return n; +} + +/* +** Advance the cursor to the next entry in the database. +** Return value: +** +** SQLITE_OK success +** SQLITE_DONE cursor is already pointing at the last element +** otherwise some kind of error occurred +** +** The main entry point is sqlite3BtreeNext(). That routine is optimized +** for the common case of merely incrementing the cell counter BtCursor.aiIdx +** to the next cell on the current page. The (slower) btreeNext() helper +** routine is called when it is necessary to move to a different page or +** to restore the cursor. +** +** If bit 0x01 of the F argument in sqlite3BtreeNext(C,F) is 1, then the +** cursor corresponds to an SQL index and this routine could have been +** skipped if the SQL index had been a unique index. The F argument +** is a hint to the implement. SQLite btree implementation does not use +** this hint, but COMDB2 does. +*/ +static SQLITE_NOINLINE int btreeNext(BtCursor *pCur){ + int rc; + int idx; + MemPage *pPage; + + assert( cursorOwnsBtShared(pCur) ); + if( pCur->eState!=CURSOR_VALID ){ + assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); + rc = restoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + return rc; + } + if( CURSOR_INVALID==pCur->eState ){ + return SQLITE_DONE; + } + if( pCur->eState==CURSOR_SKIPNEXT ){ + pCur->eState = CURSOR_VALID; + if( pCur->skipNext>0 ) return SQLITE_OK; + } + } + + pPage = pCur->pPage; + idx = ++pCur->ix; + if( !pPage->isInit || sqlite3FaultSim(412) ){ + /* The only known way for this to happen is for there to be a + ** recursive SQL function that does a DELETE operation as part of a + ** SELECT which deletes content out from under an active cursor + ** in a corrupt database file where the table being DELETE-ed from + ** has pages in common with the table being queried. See TH3 + ** module cov1/btree78.test testcase 220 (2018-06-08) for an + ** example. */ + return SQLITE_CORRUPT_BKPT; + } + + if( idx>=pPage->nCell ){ + if( !pPage->leaf ){ + rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); + if( rc ) return rc; + return moveToLeftmost(pCur); + } + do{ + if( pCur->iPage==0 ){ + pCur->eState = CURSOR_INVALID; + return SQLITE_DONE; + } + moveToParent(pCur); + pPage = pCur->pPage; + }while( pCur->ix>=pPage->nCell ); + if( pPage->intKey ){ + return sqlite3BtreeNext(pCur, 0); + }else{ + return SQLITE_OK; + } + } + if( pPage->leaf ){ + return SQLITE_OK; + }else{ + return moveToLeftmost(pCur); + } +} +SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor *pCur, int flags){ + MemPage *pPage; + UNUSED_PARAMETER( flags ); /* Used in COMDB2 but not native SQLite */ + assert( cursorOwnsBtShared(pCur) ); + assert( flags==0 || flags==1 ); + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur); + pPage = pCur->pPage; + if( (++pCur->ix)>=pPage->nCell ){ + pCur->ix--; + return btreeNext(pCur); + } + if( pPage->leaf ){ + return SQLITE_OK; + }else{ + return moveToLeftmost(pCur); + } +} + +/* +** Step the cursor to the back to the previous entry in the database. +** Return values: +** +** SQLITE_OK success +** SQLITE_DONE the cursor is already on the first element of the table +** otherwise some kind of error occurred +** +** The main entry point is sqlite3BtreePrevious(). That routine is optimized +** for the common case of merely decrementing the cell counter BtCursor.aiIdx +** to the previous cell on the current page. The (slower) btreePrevious() +** helper routine is called when it is necessary to move to a different page +** or to restore the cursor. +** +** If bit 0x01 of the F argument to sqlite3BtreePrevious(C,F) is 1, then +** the cursor corresponds to an SQL index and this routine could have been +** skipped if the SQL index had been a unique index. The F argument is a +** hint to the implement. The native SQLite btree implementation does not +** use this hint, but COMDB2 does. +*/ +static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur){ + int rc; + MemPage *pPage; + + assert( cursorOwnsBtShared(pCur) ); + assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); + assert( pCur->info.nSize==0 ); + if( pCur->eState!=CURSOR_VALID ){ + rc = restoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + return rc; + } + if( CURSOR_INVALID==pCur->eState ){ + return SQLITE_DONE; + } + if( CURSOR_SKIPNEXT==pCur->eState ){ + pCur->eState = CURSOR_VALID; + if( pCur->skipNext<0 ) return SQLITE_OK; + } + } + + pPage = pCur->pPage; + assert( pPage->isInit ); + if( !pPage->leaf ){ + int idx = pCur->ix; + rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); + if( rc ) return rc; + rc = moveToRightmost(pCur); + }else{ + while( pCur->ix==0 ){ + if( pCur->iPage==0 ){ + pCur->eState = CURSOR_INVALID; + return SQLITE_DONE; + } + moveToParent(pCur); + } + assert( pCur->info.nSize==0 ); + assert( (pCur->curFlags & (BTCF_ValidOvfl))==0 ); + + pCur->ix--; + pPage = pCur->pPage; + if( pPage->intKey && !pPage->leaf ){ + rc = sqlite3BtreePrevious(pCur, 0); + }else{ + rc = SQLITE_OK; + } + } + return rc; +} +SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor *pCur, int flags){ + assert( cursorOwnsBtShared(pCur) ); + assert( flags==0 || flags==1 ); + UNUSED_PARAMETER( flags ); /* Used in COMDB2 but not native SQLite */ + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); + pCur->info.nSize = 0; + if( pCur->eState!=CURSOR_VALID + || pCur->ix==0 + || pCur->pPage->leaf==0 + ){ + return btreePrevious(pCur); + } + pCur->ix--; + return SQLITE_OK; +} + +/* +** Allocate a new page from the database file. +** +** The new page is marked as dirty. (In other words, sqlite3PagerWrite() +** has already been called on the new page.) The new page has also +** been referenced and the calling routine is responsible for calling +** sqlite3PagerUnref() on the new page when it is done. +** +** SQLITE_OK is returned on success. Any other return value indicates +** an error. *ppPage is set to NULL in the event of an error. +** +** If the "nearby" parameter is not 0, then an effort is made to +** locate a page close to the page number "nearby". This can be used in an +** attempt to keep related pages close to each other in the database file, +** which in turn can make database access faster. +** +** If the eMode parameter is BTALLOC_EXACT and the nearby page exists +** anywhere on the free-list, then it is guaranteed to be returned. If +** eMode is BTALLOC_LT then the page returned will be less than or equal +** to nearby if any such page exists. If eMode is BTALLOC_ANY then there +** are no restrictions on which page is returned. +*/ +static int allocateBtreePage( + BtShared *pBt, /* The btree */ + MemPage **ppPage, /* Store pointer to the allocated page here */ + Pgno *pPgno, /* Store the page number here */ + Pgno nearby, /* Search for a page near this one */ + u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ +){ + MemPage *pPage1; + int rc; + u32 n; /* Number of pages on the freelist */ + u32 k; /* Number of leaves on the trunk of the freelist */ + MemPage *pTrunk = 0; + MemPage *pPrevTrunk = 0; + Pgno mxPage; /* Total size of the database file */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); + pPage1 = pBt->pPage1; + mxPage = btreePagecount(pBt); + /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36 + ** stores stores the total number of pages on the freelist. */ + n = get4byte(&pPage1->aData[36]); + testcase( n==mxPage-1 ); + if( n>=mxPage ){ + return SQLITE_CORRUPT_BKPT; + } + if( n>0 ){ + /* There are pages on the freelist. Reuse one of those pages. */ + Pgno iTrunk; + u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ + u32 nSearch = 0; /* Count of the number of search attempts */ + + /* If eMode==BTALLOC_EXACT and a query of the pointer-map + ** shows that the page 'nearby' is somewhere on the free-list, then + ** the entire-list will be searched for that page. + */ +#ifndef SQLITE_OMIT_AUTOVACUUM + if( eMode==BTALLOC_EXACT ){ + if( nearby<=mxPage ){ + u8 eType; + assert( nearby>0 ); + assert( pBt->autoVacuum ); + rc = ptrmapGet(pBt, nearby, &eType, 0); + if( rc ) return rc; + if( eType==PTRMAP_FREEPAGE ){ + searchList = 1; + } + } + }else if( eMode==BTALLOC_LE ){ + searchList = 1; + } +#endif + + /* Decrement the free-list count by 1. Set iTrunk to the index of the + ** first free-list trunk page. iPrevTrunk is initially 1. + */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) return rc; + put4byte(&pPage1->aData[36], n-1); + + /* The code within this loop is run only once if the 'searchList' variable + ** is not true. Otherwise, it runs once for each trunk-page on the + ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) + ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT) + */ + do { + pPrevTrunk = pTrunk; + if( pPrevTrunk ){ + /* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page + ** is the page number of the next freelist trunk page in the list or + ** zero if this is the last freelist trunk page. */ + iTrunk = get4byte(&pPrevTrunk->aData[0]); + }else{ + /* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32 + ** stores the page number of the first page of the freelist, or zero if + ** the freelist is empty. */ + iTrunk = get4byte(&pPage1->aData[32]); + } + testcase( iTrunk==mxPage ); + if( iTrunk>mxPage || nSearch++ > n ){ + rc = SQLITE_CORRUPT_PGNO(pPrevTrunk ? pPrevTrunk->pgno : 1); + }else{ + rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); + } + if( rc ){ + pTrunk = 0; + goto end_allocate_page; + } + assert( pTrunk!=0 ); + assert( pTrunk->aData!=0 ); + /* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page + ** is the number of leaf page pointers to follow. */ + k = get4byte(&pTrunk->aData[4]); + if( k==0 && !searchList ){ + /* The trunk has no leaves and the list is not being searched. + ** So extract the trunk page itself and use it as the newly + ** allocated page */ + assert( pPrevTrunk==0 ); + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc ){ + goto end_allocate_page; + } + *pPgno = iTrunk; + memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); + *ppPage = pTrunk; + pTrunk = 0; + TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); + }else if( k>(u32)(pBt->usableSize/4 - 2) ){ + /* Value of k is out of range. Database corruption */ + rc = SQLITE_CORRUPT_PGNO(iTrunk); + goto end_allocate_page; +#ifndef SQLITE_OMIT_AUTOVACUUM + }else if( searchList + && (nearby==iTrunk || (iTrunkpDbPage); + if( rc ){ + goto end_allocate_page; + } + if( k==0 ){ + if( !pPrevTrunk ){ + memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); + }else{ + rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + if( rc!=SQLITE_OK ){ + goto end_allocate_page; + } + memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); + } + }else{ + /* The trunk page is required by the caller but it contains + ** pointers to free-list leaves. The first leaf becomes a trunk + ** page in this case. + */ + MemPage *pNewTrunk; + Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); + if( iNewTrunk>mxPage ){ + rc = SQLITE_CORRUPT_PGNO(iTrunk); + goto end_allocate_page; + } + testcase( iNewTrunk==mxPage ); + rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0); + if( rc!=SQLITE_OK ){ + goto end_allocate_page; + } + rc = sqlite3PagerWrite(pNewTrunk->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pNewTrunk); + goto end_allocate_page; + } + memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); + put4byte(&pNewTrunk->aData[4], k-1); + memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4); + releasePage(pNewTrunk); + if( !pPrevTrunk ){ + assert( sqlite3PagerIswriteable(pPage1->pDbPage) ); + put4byte(&pPage1->aData[32], iNewTrunk); + }else{ + rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + if( rc ){ + goto end_allocate_page; + } + put4byte(&pPrevTrunk->aData[0], iNewTrunk); + } + } + pTrunk = 0; + TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); +#endif + }else if( k>0 ){ + /* Extract a leaf from the trunk */ + u32 closest; + Pgno iPage; + unsigned char *aData = pTrunk->aData; + if( nearby>0 ){ + u32 i; + closest = 0; + if( eMode==BTALLOC_LE ){ + for(i=0; imxPage || iPage<2 ){ + rc = SQLITE_CORRUPT_PGNO(iTrunk); + goto end_allocate_page; + } + testcase( iPage==mxPage ); + if( !searchList + || (iPage==nearby || (iPagepgno, n-1)); + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc ) goto end_allocate_page; + if( closestpDbPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + *ppPage = 0; + } + } + searchList = 0; + } + } + releasePage(pPrevTrunk); + pPrevTrunk = 0; + }while( searchList ); + }else{ + /* There are no pages on the freelist, so append a new page to the + ** database image. + ** + ** Normally, new pages allocated by this block can be requested from the + ** pager layer with the 'no-content' flag set. This prevents the pager + ** from trying to read the pages content from disk. However, if the + ** current transaction has already run one or more incremental-vacuum + ** steps, then the page we are about to allocate may contain content + ** that is required in the event of a rollback. In this case, do + ** not set the no-content flag. This causes the pager to load and journal + ** the current page content before overwriting it. + ** + ** Note that the pager will not actually attempt to load or journal + ** content for any page that really does lie past the end of the database + ** file on disk. So the effects of disabling the no-content optimization + ** here are confined to those pages that lie between the end of the + ** database image and the end of the database file. + */ + int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0; + + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc ) return rc; + pBt->nPage++; + if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; + +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){ + /* If *pPgno refers to a pointer-map page, allocate two new pages + ** at the end of the file instead of one. The first allocated page + ** becomes a new pointer-map page, the second is used by the caller. + */ + MemPage *pPg = 0; + TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); + assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); + rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pPg->pDbPage); + releasePage(pPg); + } + if( rc ) return rc; + pBt->nPage++; + if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; } + } +#endif + put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage); + *pPgno = pBt->nPage; + + assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent); + if( rc ) return rc; + rc = sqlite3PagerWrite((*ppPage)->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + *ppPage = 0; + } + TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); + } + + assert( CORRUPT_DB || *pPgno!=PENDING_BYTE_PAGE(pBt) ); + +end_allocate_page: + releasePage(pTrunk); + releasePage(pPrevTrunk); + assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 ); + assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 ); + return rc; +} + +/* +** This function is used to add page iPage to the database file free-list. +** It is assumed that the page is not already a part of the free-list. +** +** The value passed as the second argument to this function is optional. +** If the caller happens to have a pointer to the MemPage object +** corresponding to page iPage handy, it may pass it as the second value. +** Otherwise, it may pass NULL. +** +** If a pointer to a MemPage object is passed as the second argument, +** its reference count is not altered by this function. +*/ +static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ + MemPage *pTrunk = 0; /* Free-list trunk page */ + Pgno iTrunk = 0; /* Page number of free-list trunk page */ + MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */ + MemPage *pPage; /* Page being freed. May be NULL. */ + int rc; /* Return Code */ + u32 nFree; /* Initial number of pages on free-list */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( CORRUPT_DB || iPage>1 ); + assert( !pMemPage || pMemPage->pgno==iPage ); + + if( iPage<2 || iPage>pBt->nPage ){ + return SQLITE_CORRUPT_BKPT; + } + if( pMemPage ){ + pPage = pMemPage; + sqlite3PagerRef(pPage->pDbPage); + }else{ + pPage = btreePageLookup(pBt, iPage); + } + + /* Increment the free page count on pPage1 */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) goto freepage_out; + nFree = get4byte(&pPage1->aData[36]); + put4byte(&pPage1->aData[36], nFree+1); + + if( pBt->btsFlags & BTS_SECURE_DELETE ){ + /* If the secure_delete option is enabled, then + ** always fully overwrite deleted information with zeros. + */ + if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) + || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) + ){ + goto freepage_out; + } + memset(pPage->aData, 0, pPage->pBt->pageSize); + } + + /* If the database supports auto-vacuum, write an entry in the pointer-map + ** to indicate that the page is free. + */ + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); + if( rc ) goto freepage_out; + } + + /* Now manipulate the actual database free-list structure. There are two + ** possibilities. If the free-list is currently empty, or if the first + ** trunk page in the free-list is full, then this page will become a + ** new free-list trunk page. Otherwise, it will become a leaf of the + ** first trunk page in the current free-list. This block tests if it + ** is possible to add the page as a new free-list leaf. + */ + if( nFree!=0 ){ + u32 nLeaf; /* Initial number of leaf cells on trunk page */ + + iTrunk = get4byte(&pPage1->aData[32]); + if( iTrunk>btreePagecount(pBt) ){ + rc = SQLITE_CORRUPT_BKPT; + goto freepage_out; + } + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + if( rc!=SQLITE_OK ){ + goto freepage_out; + } + + nLeaf = get4byte(&pTrunk->aData[4]); + assert( pBt->usableSize>32 ); + if( nLeaf > (u32)pBt->usableSize/4 - 2 ){ + rc = SQLITE_CORRUPT_BKPT; + goto freepage_out; + } + if( nLeaf < (u32)pBt->usableSize/4 - 8 ){ + /* In this case there is room on the trunk page to insert the page + ** being freed as a new leaf. + ** + ** Note that the trunk page is not really full until it contains + ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have + ** coded. But due to a coding error in versions of SQLite prior to + ** 3.6.0, databases with freelist trunk pages holding more than + ** usableSize/4 - 8 entries will be reported as corrupt. In order + ** to maintain backwards compatibility with older versions of SQLite, + ** we will continue to restrict the number of entries to usableSize/4 - 8 + ** for now. At some point in the future (once everyone has upgraded + ** to 3.6.0 or later) we should consider fixing the conditional above + ** to read "usableSize/4-2" instead of "usableSize/4-8". + ** + ** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still + ** avoid using the last six entries in the freelist trunk page array in + ** order that database files created by newer versions of SQLite can be + ** read by older versions of SQLite. + */ + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc==SQLITE_OK ){ + put4byte(&pTrunk->aData[4], nLeaf+1); + put4byte(&pTrunk->aData[8+nLeaf*4], iPage); + if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){ + sqlite3PagerDontWrite(pPage->pDbPage); + } + rc = btreeSetHasContent(pBt, iPage); + } + TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno)); + goto freepage_out; + } + } + + /* If control flows to this point, then it was not possible to add the + ** the page being freed as a leaf page of the first trunk in the free-list. + ** Possibly because the free-list is empty, or possibly because the + ** first trunk in the free-list is full. Either way, the page being freed + ** will become the new first trunk page in the free-list. + */ + if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ + goto freepage_out; + } + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc!=SQLITE_OK ){ + goto freepage_out; + } + put4byte(pPage->aData, iTrunk); + put4byte(&pPage->aData[4], 0); + put4byte(&pPage1->aData[32], iPage); + TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk)); + +freepage_out: + if( pPage ){ + pPage->isInit = 0; + } + releasePage(pPage); + releasePage(pTrunk); + return rc; +} +static void freePage(MemPage *pPage, int *pRC){ + if( (*pRC)==SQLITE_OK ){ + *pRC = freePage2(pPage->pBt, pPage, pPage->pgno); + } +} + +/* +** Free the overflow pages associated with the given Cell. +*/ +static SQLITE_NOINLINE int clearCellOverflow( + MemPage *pPage, /* The page that contains the Cell */ + unsigned char *pCell, /* First byte of the Cell */ + CellInfo *pInfo /* Size information about the cell */ +){ + BtShared *pBt; + Pgno ovflPgno; + int rc; + int nOvfl; + u32 ovflPageSize; + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pInfo->nLocal!=pInfo->nPayload ); + testcase( pCell + pInfo->nSize == pPage->aDataEnd ); + testcase( pCell + (pInfo->nSize-1) == pPage->aDataEnd ); + if( pCell + pInfo->nSize > pPage->aDataEnd ){ + /* Cell extends past end of page */ + return SQLITE_CORRUPT_PAGE(pPage); + } + ovflPgno = get4byte(pCell + pInfo->nSize - 4); + pBt = pPage->pBt; + assert( pBt->usableSize > 4 ); + ovflPageSize = pBt->usableSize - 4; + nOvfl = (pInfo->nPayload - pInfo->nLocal + ovflPageSize - 1)/ovflPageSize; + assert( nOvfl>0 || + (CORRUPT_DB && (pInfo->nPayload + ovflPageSize)btreePagecount(pBt) ){ + /* 0 is not a legal page number and page 1 cannot be an + ** overflow page. Therefore if ovflPgno<2 or past the end of the + ** file the database must be corrupt. */ + return SQLITE_CORRUPT_BKPT; + } + if( nOvfl ){ + rc = getOverflowPage(pBt, ovflPgno, &pOvfl, &iNext); + if( rc ) return rc; + } + + if( ( pOvfl || ((pOvfl = btreePageLookup(pBt, ovflPgno))!=0) ) + && sqlite3PagerPageRefcount(pOvfl->pDbPage)!=1 + ){ + /* There is no reason any cursor should have an outstanding reference + ** to an overflow page belonging to a cell that is being deleted/updated. + ** So if there exists more than one reference to this page, then it + ** must not really be an overflow page and the database must be corrupt. + ** It is helpful to detect this before calling freePage2(), as + ** freePage2() may zero the page contents if secure-delete mode is + ** enabled. If this 'overflow' page happens to be a page that the + ** caller is iterating through or using in some other way, this + ** can be problematic. + */ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = freePage2(pBt, pOvfl, ovflPgno); + } + + if( pOvfl ){ + sqlite3PagerUnref(pOvfl->pDbPage); + } + if( rc ) return rc; + ovflPgno = iNext; + } + return SQLITE_OK; +} + +/* Call xParseCell to compute the size of a cell. If the cell contains +** overflow, then invoke cellClearOverflow to clear out that overflow. +** STore the result code (SQLITE_OK or some error code) in rc. +** +** Implemented as macro to force inlining for performance. +*/ +#define BTREE_CLEAR_CELL(rc, pPage, pCell, sInfo) \ + pPage->xParseCell(pPage, pCell, &sInfo); \ + if( sInfo.nLocal!=sInfo.nPayload ){ \ + rc = clearCellOverflow(pPage, pCell, &sInfo); \ + }else{ \ + rc = SQLITE_OK; \ + } + + +/* +** Create the byte sequence used to represent a cell on page pPage +** and write that byte sequence into pCell[]. Overflow pages are +** allocated and filled in as necessary. The calling procedure +** is responsible for making sure sufficient space has been allocated +** for pCell[]. +** +** Note that pCell does not necessary need to point to the pPage->aData +** area. pCell might point to some temporary storage. The cell will +** be constructed in this temporary area then copied into pPage->aData +** later. +*/ +static int fillInCell( + MemPage *pPage, /* The page that contains the cell */ + unsigned char *pCell, /* Complete text of the cell */ + const BtreePayload *pX, /* Payload with which to construct the cell */ + int *pnSize /* Write cell size here */ +){ + int nPayload; + const u8 *pSrc; + int nSrc, n, rc, mn; + int spaceLeft; + MemPage *pToRelease; + unsigned char *pPrior; + unsigned char *pPayload; + BtShared *pBt; + Pgno pgnoOvfl; + int nHeader; + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + + /* pPage is not necessarily writeable since pCell might be auxiliary + ** buffer space that is separate from the pPage buffer area */ + assert( pCellaData || pCell>=&pPage->aData[pPage->pBt->pageSize] + || sqlite3PagerIswriteable(pPage->pDbPage) ); + + /* Fill in the header. */ + nHeader = pPage->childPtrSize; + if( pPage->intKey ){ + nPayload = pX->nData + pX->nZero; + pSrc = pX->pData; + nSrc = pX->nData; + assert( pPage->intKeyLeaf ); /* fillInCell() only called for leaves */ + nHeader += putVarint32(&pCell[nHeader], nPayload); + nHeader += putVarint(&pCell[nHeader], *(u64*)&pX->nKey); + }else{ + assert( pX->nKey<=0x7fffffff && pX->pKey!=0 ); + nSrc = nPayload = (int)pX->nKey; + pSrc = pX->pKey; + nHeader += putVarint32(&pCell[nHeader], nPayload); + } + + /* Fill in the payload */ + pPayload = &pCell[nHeader]; + if( nPayload<=pPage->maxLocal ){ + /* This is the common case where everything fits on the btree page + ** and no overflow pages are required. */ + n = nHeader + nPayload; + testcase( n==3 ); + testcase( n==4 ); + if( n<4 ) n = 4; + *pnSize = n; + assert( nSrc<=nPayload ); + testcase( nSrcminLocal; + n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4); + testcase( n==pPage->maxLocal ); + testcase( n==pPage->maxLocal+1 ); + if( n > pPage->maxLocal ) n = mn; + spaceLeft = n; + *pnSize = n + nHeader + 4; + pPrior = &pCell[nHeader+n]; + pToRelease = 0; + pgnoOvfl = 0; + pBt = pPage->pBt; + + /* At this point variables should be set as follows: + ** + ** nPayload Total payload size in bytes + ** pPayload Begin writing payload here + ** spaceLeft Space available at pPayload. If nPayload>spaceLeft, + ** that means content must spill into overflow pages. + ** *pnSize Size of the local cell (not counting overflow pages) + ** pPrior Where to write the pgno of the first overflow page + ** + ** Use a call to btreeParseCellPtr() to verify that the values above + ** were computed correctly. + */ +#ifdef SQLITE_DEBUG + { + CellInfo info; + pPage->xParseCell(pPage, pCell, &info); + assert( nHeader==(int)(info.pPayload - pCell) ); + assert( info.nKey==pX->nKey ); + assert( *pnSize == info.nSize ); + assert( spaceLeft == info.nLocal ); + } +#endif + + /* Write the payload into the local Cell and any extra into overflow pages */ + while( 1 ){ + n = nPayload; + if( n>spaceLeft ) n = spaceLeft; + + /* If pToRelease is not zero than pPayload points into the data area + ** of pToRelease. Make sure pToRelease is still writeable. */ + assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); + + /* If pPayload is part of the data area of pPage, then make sure pPage + ** is still writeable */ + assert( pPayloadaData || pPayload>=&pPage->aData[pBt->pageSize] + || sqlite3PagerIswriteable(pPage->pDbPage) ); + + if( nSrc>=n ){ + memcpy(pPayload, pSrc, n); + }else if( nSrc>0 ){ + n = nSrc; + memcpy(pPayload, pSrc, n); + }else{ + memset(pPayload, 0, n); + } + nPayload -= n; + if( nPayload<=0 ) break; + pPayload += n; + pSrc += n; + nSrc -= n; + spaceLeft -= n; + if( spaceLeft==0 ){ + MemPage *pOvfl = 0; +#ifndef SQLITE_OMIT_AUTOVACUUM + Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ + if( pBt->autoVacuum ){ + do{ + pgnoOvfl++; + } while( + PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) + ); + } +#endif + rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0); +#ifndef SQLITE_OMIT_AUTOVACUUM + /* If the database supports auto-vacuum, and the second or subsequent + ** overflow page is being allocated, add an entry to the pointer-map + ** for that page now. + ** + ** If this is the first overflow page, then write a partial entry + ** to the pointer-map. If we write nothing to this pointer-map slot, + ** then the optimistic overflow chain processing in clearCell() + ** may misinterpret the uninitialized values and delete the + ** wrong pages from the database. + */ + if( pBt->autoVacuum && rc==SQLITE_OK ){ + u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); + ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc); + if( rc ){ + releasePage(pOvfl); + } + } +#endif + if( rc ){ + releasePage(pToRelease); + return rc; + } + + /* If pToRelease is not zero than pPrior points into the data area + ** of pToRelease. Make sure pToRelease is still writeable. */ + assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); + + /* If pPrior is part of the data area of pPage, then make sure pPage + ** is still writeable */ + assert( pPrioraData || pPrior>=&pPage->aData[pBt->pageSize] + || sqlite3PagerIswriteable(pPage->pDbPage) ); + + put4byte(pPrior, pgnoOvfl); + releasePage(pToRelease); + pToRelease = pOvfl; + pPrior = pOvfl->aData; + put4byte(pPrior, 0); + pPayload = &pOvfl->aData[4]; + spaceLeft = pBt->usableSize - 4; + } + } + releasePage(pToRelease); + return SQLITE_OK; +} + +/* +** Remove the i-th cell from pPage. This routine effects pPage only. +** The cell content is not freed or deallocated. It is assumed that +** the cell content has been copied someplace else. This routine just +** removes the reference to the cell from pPage. +** +** "sz" must be the number of bytes in the cell. +*/ +static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ + u32 pc; /* Offset to cell content of cell being deleted */ + u8 *data; /* pPage->aData */ + u8 *ptr; /* Used to move bytes around within data[] */ + int rc; /* The return code */ + int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ + + if( *pRC ) return; + assert( idx>=0 ); + assert( idxnCell ); + assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->nFree>=0 ); + data = pPage->aData; + ptr = &pPage->aCellIdx[2*idx]; + assert( pPage->pBt->usableSize > (u32)(ptr-data) ); + pc = get2byte(ptr); + hdr = pPage->hdrOffset; +#if 0 /* Not required. Omit for efficiency */ + if( pcnCell*2 ){ + *pRC = SQLITE_CORRUPT_BKPT; + return; + } +#endif + testcase( pc==(u32)get2byte(&data[hdr+5]) ); + testcase( pc+sz==pPage->pBt->usableSize ); + if( pc+sz > pPage->pBt->usableSize ){ + *pRC = SQLITE_CORRUPT_BKPT; + return; + } + rc = freeSpace(pPage, pc, sz); + if( rc ){ + *pRC = rc; + return; + } + pPage->nCell--; + if( pPage->nCell==0 ){ + memset(&data[hdr+1], 0, 4); + data[hdr+7] = 0; + put2byte(&data[hdr+5], pPage->pBt->usableSize); + pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset + - pPage->childPtrSize - 8; + }else{ + memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); + put2byte(&data[hdr+3], pPage->nCell); + pPage->nFree += 2; + } +} + +/* +** Insert a new cell on pPage at cell index "i". pCell points to the +** content of the cell. +** +** If the cell content will fit on the page, then put it there. If it +** will not fit, then make a copy of the cell content into pTemp if +** pTemp is not null. Regardless of pTemp, allocate a new entry +** in pPage->apOvfl[] and make it point to the cell content (either +** in pTemp or the original pCell) and also record its index. +** Allocating a new entry in pPage->aCell[] implies that +** pPage->nOverflow is incremented. +** +** *pRC must be SQLITE_OK when this routine is called. +*/ +static void insertCell( + MemPage *pPage, /* Page into which we are copying */ + int i, /* New cell becomes the i-th cell of the page */ + u8 *pCell, /* Content of the new cell */ + int sz, /* Bytes of content in pCell */ + u8 *pTemp, /* Temp storage space for pCell, if needed */ + Pgno iChild, /* If non-zero, replace first 4 bytes with this value */ + int *pRC /* Read and write return code from here */ +){ + int idx = 0; /* Where to write new cell content in data[] */ + int j; /* Loop counter */ + u8 *data; /* The content of the whole page */ + u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ + + assert( *pRC==SQLITE_OK ); + assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); + assert( MX_CELL(pPage->pBt)<=10921 ); + assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); + assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); + assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB ); + assert( pPage->nFree>=0 ); + if( pPage->nOverflow || sz+2>pPage->nFree ){ + if( pTemp ){ + memcpy(pTemp, pCell, sz); + pCell = pTemp; + } + if( iChild ){ + put4byte(pCell, iChild); + } + j = pPage->nOverflow++; + /* Comparison against ArraySize-1 since we hold back one extra slot + ** as a contingency. In other words, never need more than 3 overflow + ** slots but 4 are allocated, just to be safe. */ + assert( j < ArraySize(pPage->apOvfl)-1 ); + pPage->apOvfl[j] = pCell; + pPage->aiOvfl[j] = (u16)i; + + /* When multiple overflows occur, they are always sequential and in + ** sorted order. This invariants arise because multiple overflows can + ** only occur when inserting divider cells into the parent page during + ** balancing, and the dividers are adjacent and sorted. + */ + assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ + assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ + }else{ + int rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc!=SQLITE_OK ){ + *pRC = rc; + return; + } + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + data = pPage->aData; + assert( &data[pPage->cellOffset]==pPage->aCellIdx ); + rc = allocateSpace(pPage, sz, &idx); + if( rc ){ *pRC = rc; return; } + /* The allocateSpace() routine guarantees the following properties + ** if it returns successfully */ + assert( idx >= 0 ); + assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); + assert( idx+sz <= (int)pPage->pBt->usableSize ); + pPage->nFree -= (u16)(2 + sz); + if( iChild ){ + /* In a corrupt database where an entry in the cell index section of + ** a btree page has a value of 3 or less, the pCell value might point + ** as many as 4 bytes in front of the start of the aData buffer for + ** the source page. Make sure this does not cause problems by not + ** reading the first 4 bytes */ + memcpy(&data[idx+4], pCell+4, sz-4); + put4byte(&data[idx], iChild); + }else{ + memcpy(&data[idx], pCell, sz); + } + pIns = pPage->aCellIdx + i*2; + memmove(pIns+2, pIns, 2*(pPage->nCell - i)); + put2byte(pIns, idx); + pPage->nCell++; + /* increment the cell count */ + if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; + assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB ); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pPage->pBt->autoVacuum ){ + /* The cell may contain a pointer to an overflow page. If so, write + ** the entry for the overflow page into the pointer map. + */ + ptrmapPutOvflPtr(pPage, pPage, pCell, pRC); + } +#endif + } +} + +/* +** The following parameters determine how many adjacent pages get involved +** in a balancing operation. NN is the number of neighbors on either side +** of the page that participate in the balancing operation. NB is the +** total number of pages that participate, including the target page and +** NN neighbors on either side. +** +** The minimum value of NN is 1 (of course). Increasing NN above 1 +** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance +** in exchange for a larger degradation in INSERT and UPDATE performance. +** The value of NN appears to give the best results overall. +** +** (Later:) The description above makes it seem as if these values are +** tunable - as if you could change them and recompile and it would all work. +** But that is unlikely. NB has been 3 since the inception of SQLite and +** we have never tested any other value. +*/ +#define NN 1 /* Number of neighbors on either side of pPage */ +#define NB 3 /* (NN*2+1): Total pages involved in the balance */ + +/* +** A CellArray object contains a cache of pointers and sizes for a +** consecutive sequence of cells that might be held on multiple pages. +** +** The cells in this array are the divider cell or cells from the pParent +** page plus up to three child pages. There are a total of nCell cells. +** +** pRef is a pointer to one of the pages that contributes cells. This is +** used to access information such as MemPage.intKey and MemPage.pBt->pageSize +** which should be common to all pages that contribute cells to this array. +** +** apCell[] and szCell[] hold, respectively, pointers to the start of each +** cell and the size of each cell. Some of the apCell[] pointers might refer +** to overflow cells. In other words, some apCel[] pointers might not point +** to content area of the pages. +** +** A szCell[] of zero means the size of that cell has not yet been computed. +** +** The cells come from as many as four different pages: +** +** ----------- +** | Parent | +** ----------- +** / | \ +** / | \ +** --------- --------- --------- +** |Child-1| |Child-2| |Child-3| +** --------- --------- --------- +** +** The order of cells is in the array is for an index btree is: +** +** 1. All cells from Child-1 in order +** 2. The first divider cell from Parent +** 3. All cells from Child-2 in order +** 4. The second divider cell from Parent +** 5. All cells from Child-3 in order +** +** For a table-btree (with rowids) the items 2 and 4 are empty because +** content exists only in leaves and there are no divider cells. +** +** For an index btree, the apEnd[] array holds pointer to the end of page +** for Child-1, the Parent, Child-2, the Parent (again), and Child-3, +** respectively. The ixNx[] array holds the number of cells contained in +** each of these 5 stages, and all stages to the left. Hence: +** +** ixNx[0] = Number of cells in Child-1. +** ixNx[1] = Number of cells in Child-1 plus 1 for first divider. +** ixNx[2] = Number of cells in Child-1 and Child-2 + 1 for 1st divider. +** ixNx[3] = Number of cells in Child-1 and Child-2 + both divider cells +** ixNx[4] = Total number of cells. +** +** For a table-btree, the concept is similar, except only apEnd[0]..apEnd[2] +** are used and they point to the leaf pages only, and the ixNx value are: +** +** ixNx[0] = Number of cells in Child-1. +** ixNx[1] = Number of cells in Child-1 and Child-2. +** ixNx[2] = Total number of cells. +** +** Sometimes when deleting, a child page can have zero cells. In those +** cases, ixNx[] entries with higher indexes, and the corresponding apEnd[] +** entries, shift down. The end result is that each ixNx[] entry should +** be larger than the previous +*/ +typedef struct CellArray CellArray; +struct CellArray { + int nCell; /* Number of cells in apCell[] */ + MemPage *pRef; /* Reference page */ + u8 **apCell; /* All cells begin balanced */ + u16 *szCell; /* Local size of all cells in apCell[] */ + u8 *apEnd[NB*2]; /* MemPage.aDataEnd values */ + int ixNx[NB*2]; /* Index of at which we move to the next apEnd[] */ +}; + +/* +** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been +** computed. +*/ +static void populateCellCache(CellArray *p, int idx, int N){ + assert( idx>=0 && idx+N<=p->nCell ); + while( N>0 ){ + assert( p->apCell[idx]!=0 ); + if( p->szCell[idx]==0 ){ + p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); + }else{ + assert( CORRUPT_DB || + p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); + } + idx++; + N--; + } +} + +/* +** Return the size of the Nth element of the cell array +*/ +static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ + assert( N>=0 && NnCell ); + assert( p->szCell[N]==0 ); + p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); + return p->szCell[N]; +} +static u16 cachedCellSize(CellArray *p, int N){ + assert( N>=0 && NnCell ); + if( p->szCell[N] ) return p->szCell[N]; + return computeCellSize(p, N); +} + +/* +** Array apCell[] contains pointers to nCell b-tree page cells. The +** szCell[] array contains the size in bytes of each cell. This function +** replaces the current contents of page pPg with the contents of the cell +** array. +** +** Some of the cells in apCell[] may currently be stored in pPg. This +** function works around problems caused by this by making a copy of any +** such cells before overwriting the page data. +** +** The MemPage.nFree field is invalidated by this function. It is the +** responsibility of the caller to set it correctly. +*/ +static int rebuildPage( + CellArray *pCArray, /* Content to be added to page pPg */ + int iFirst, /* First cell in pCArray to use */ + int nCell, /* Final number of cells on page */ + MemPage *pPg /* The page to be reconstructed */ +){ + const int hdr = pPg->hdrOffset; /* Offset of header on pPg */ + u8 * const aData = pPg->aData; /* Pointer to data for pPg */ + const int usableSize = pPg->pBt->usableSize; + u8 * const pEnd = &aData[usableSize]; + int i = iFirst; /* Which cell to copy from pCArray*/ + u32 j; /* Start of cell content area */ + int iEnd = i+nCell; /* Loop terminator */ + u8 *pCellptr = pPg->aCellIdx; + u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); + u8 *pData; + int k; /* Current slot in pCArray->apEnd[] */ + u8 *pSrcEnd; /* Current pCArray->apEnd[k] value */ + + assert( i(u32)usableSize ){ j = 0; } + memcpy(&pTmp[j], &aData[j], usableSize - j); + + for(k=0; pCArray->ixNx[k]<=i && ALWAYS(kapEnd[k]; + + pData = pEnd; + while( 1/*exit by break*/ ){ + u8 *pCell = pCArray->apCell[i]; + u16 sz = pCArray->szCell[i]; + assert( sz>0 ); + if( SQLITE_WITHIN(pCell,aData+j,pEnd) ){ + if( ((uptr)(pCell+sz))>(uptr)pEnd ) return SQLITE_CORRUPT_BKPT; + pCell = &pTmp[pCell - aData]; + }else if( (uptr)(pCell+sz)>(uptr)pSrcEnd + && (uptr)(pCell)<(uptr)pSrcEnd + ){ + return SQLITE_CORRUPT_BKPT; + } + + pData -= sz; + put2byte(pCellptr, (pData - aData)); + pCellptr += 2; + if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT; + memmove(pData, pCell, sz); + assert( sz==pPg->xCellSize(pPg, pCell) || CORRUPT_DB ); + i++; + if( i>=iEnd ) break; + if( pCArray->ixNx[k]<=i ){ + k++; + pSrcEnd = pCArray->apEnd[k]; + } + } + + /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ + pPg->nCell = nCell; + pPg->nOverflow = 0; + + put2byte(&aData[hdr+1], 0); + put2byte(&aData[hdr+3], pPg->nCell); + put2byte(&aData[hdr+5], pData - aData); + aData[hdr+7] = 0x00; + return SQLITE_OK; +} + +/* +** The pCArray objects contains pointers to b-tree cells and the cell sizes. +** This function attempts to add the cells stored in the array to page pPg. +** If it cannot (because the page needs to be defragmented before the cells +** will fit), non-zero is returned. Otherwise, if the cells are added +** successfully, zero is returned. +** +** Argument pCellptr points to the first entry in the cell-pointer array +** (part of page pPg) to populate. After cell apCell[0] is written to the +** page body, a 16-bit offset is written to pCellptr. And so on, for each +** cell in the array. It is the responsibility of the caller to ensure +** that it is safe to overwrite this part of the cell-pointer array. +** +** When this function is called, *ppData points to the start of the +** content area on page pPg. If the size of the content area is extended, +** *ppData is updated to point to the new start of the content area +** before returning. +** +** Finally, argument pBegin points to the byte immediately following the +** end of the space required by this page for the cell-pointer area (for +** all cells - not just those inserted by the current call). If the content +** area must be extended to before this point in order to accomodate all +** cells in apCell[], then the cells do not fit and non-zero is returned. +*/ +static int pageInsertArray( + MemPage *pPg, /* Page to add cells to */ + u8 *pBegin, /* End of cell-pointer array */ + u8 **ppData, /* IN/OUT: Page content-area pointer */ + u8 *pCellptr, /* Pointer to cell-pointer area */ + int iFirst, /* Index of first cell to add */ + int nCell, /* Number of cells to add to pPg */ + CellArray *pCArray /* Array of cells */ +){ + int i = iFirst; /* Loop counter - cell index to insert */ + u8 *aData = pPg->aData; /* Complete page */ + u8 *pData = *ppData; /* Content area. A subset of aData[] */ + int iEnd = iFirst + nCell; /* End of loop. One past last cell to ins */ + int k; /* Current slot in pCArray->apEnd[] */ + u8 *pEnd; /* Maximum extent of cell data */ + assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ + if( iEnd<=iFirst ) return 0; + for(k=0; pCArray->ixNx[k]<=i && ALWAYS(kapEnd[k]; + while( 1 /*Exit by break*/ ){ + int sz, rc; + u8 *pSlot; + assert( pCArray->szCell[i]!=0 ); + sz = pCArray->szCell[i]; + if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){ + if( (pData - pBegin)apCell[i] will never overlap on a well-formed + ** database. But they might for a corrupt database. Hence use memmove() + ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */ + assert( (pSlot+sz)<=pCArray->apCell[i] + || pSlot>=(pCArray->apCell[i]+sz) + || CORRUPT_DB ); + if( (uptr)(pCArray->apCell[i]+sz)>(uptr)pEnd + && (uptr)(pCArray->apCell[i])<(uptr)pEnd + ){ + assert( CORRUPT_DB ); + (void)SQLITE_CORRUPT_BKPT; + return 1; + } + memmove(pSlot, pCArray->apCell[i], sz); + put2byte(pCellptr, (pSlot - aData)); + pCellptr += 2; + i++; + if( i>=iEnd ) break; + if( pCArray->ixNx[k]<=i ){ + k++; + pEnd = pCArray->apEnd[k]; + } + } + *ppData = pData; + return 0; +} + +/* +** The pCArray object contains pointers to b-tree cells and their sizes. +** +** This function adds the space associated with each cell in the array +** that is currently stored within the body of pPg to the pPg free-list. +** The cell-pointers and other fields of the page are not updated. +** +** This function returns the total number of cells added to the free-list. +*/ +static int pageFreeArray( + MemPage *pPg, /* Page to edit */ + int iFirst, /* First cell to delete */ + int nCell, /* Cells to delete */ + CellArray *pCArray /* Array of cells */ +){ + u8 * const aData = pPg->aData; + u8 * const pEnd = &aData[pPg->pBt->usableSize]; + u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize]; + int nRet = 0; + int i; + int iEnd = iFirst + nCell; + u8 *pFree = 0; + int szFree = 0; + + for(i=iFirst; iapCell[i]; + if( SQLITE_WITHIN(pCell, pStart, pEnd) ){ + int sz; + /* No need to use cachedCellSize() here. The sizes of all cells that + ** are to be freed have already been computing while deciding which + ** cells need freeing */ + sz = pCArray->szCell[i]; assert( sz>0 ); + if( pFree!=(pCell + sz) ){ + if( pFree ){ + assert( pFree>aData && (pFree - aData)<65536 ); + freeSpace(pPg, (u16)(pFree - aData), szFree); + } + pFree = pCell; + szFree = sz; + if( pFree+sz>pEnd ){ + return 0; + } + }else{ + pFree = pCell; + szFree += sz; + } + nRet++; + } + } + if( pFree ){ + assert( pFree>aData && (pFree - aData)<65536 ); + freeSpace(pPg, (u16)(pFree - aData), szFree); + } + return nRet; +} + +/* +** pCArray contains pointers to and sizes of all cells in the page being +** balanced. The current page, pPg, has pPg->nCell cells starting with +** pCArray->apCell[iOld]. After balancing, this page should hold nNew cells +** starting at apCell[iNew]. +** +** This routine makes the necessary adjustments to pPg so that it contains +** the correct cells after being balanced. +** +** The pPg->nFree field is invalid when this function returns. It is the +** responsibility of the caller to set it correctly. +*/ +static int editPage( + MemPage *pPg, /* Edit this page */ + int iOld, /* Index of first cell currently on page */ + int iNew, /* Index of new first cell on page */ + int nNew, /* Final number of cells on page */ + CellArray *pCArray /* Array of cells and sizes */ +){ + u8 * const aData = pPg->aData; + const int hdr = pPg->hdrOffset; + u8 *pBegin = &pPg->aCellIdx[nNew * 2]; + int nCell = pPg->nCell; /* Cells stored on pPg */ + u8 *pData; + u8 *pCellptr; + int i; + int iOldEnd = iOld + pPg->nCell + pPg->nOverflow; + int iNewEnd = iNew + nNew; + +#ifdef SQLITE_DEBUG + u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); + memcpy(pTmp, aData, pPg->pBt->usableSize); +#endif + + /* Remove cells from the start and end of the page */ + assert( nCell>=0 ); + if( iOldnCell) ) return SQLITE_CORRUPT_BKPT; + memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); + nCell -= nShift; + } + if( iNewEnd < iOldEnd ){ + int nTail = pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray); + assert( nCell>=nTail ); + nCell -= nTail; + } + + pData = &aData[get2byteNotZero(&aData[hdr+5])]; + if( pDatapPg->aDataEnd ) goto editpage_fail; + + /* Add cells to the start of the page */ + if( iNew=0 ); + pCellptr = pPg->aCellIdx; + memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); + if( pageInsertArray( + pPg, pBegin, &pData, pCellptr, + iNew, nAdd, pCArray + ) ) goto editpage_fail; + nCell += nAdd; + } + + /* Add any overflow cells */ + for(i=0; inOverflow; i++){ + int iCell = (iOld + pPg->aiOvfl[i]) - iNew; + if( iCell>=0 && iCellaCellIdx[iCell * 2]; + if( nCell>iCell ){ + memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2); + } + nCell++; + cachedCellSize(pCArray, iCell+iNew); + if( pageInsertArray( + pPg, pBegin, &pData, pCellptr, + iCell+iNew, 1, pCArray + ) ) goto editpage_fail; + } + } + + /* Append cells to the end of the page */ + assert( nCell>=0 ); + pCellptr = &pPg->aCellIdx[nCell*2]; + if( pageInsertArray( + pPg, pBegin, &pData, pCellptr, + iNew+nCell, nNew-nCell, pCArray + ) ) goto editpage_fail; + + pPg->nCell = nNew; + pPg->nOverflow = 0; + + put2byte(&aData[hdr+3], pPg->nCell); + put2byte(&aData[hdr+5], pData - aData); + +#ifdef SQLITE_DEBUG + for(i=0; iapCell[i+iNew]; + int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); + if( SQLITE_WITHIN(pCell, aData, &aData[pPg->pBt->usableSize]) ){ + pCell = &pTmp[pCell - aData]; + } + assert( 0==memcmp(pCell, &aData[iOff], + pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); + } +#endif + + return SQLITE_OK; + editpage_fail: + /* Unable to edit this page. Rebuild it from scratch instead. */ + populateCellCache(pCArray, iNew, nNew); + return rebuildPage(pCArray, iNew, nNew, pPg); +} + + +#ifndef SQLITE_OMIT_QUICKBALANCE +/* +** This version of balance() handles the common special case where +** a new entry is being inserted on the extreme right-end of the +** tree, in other words, when the new entry will become the largest +** entry in the tree. +** +** Instead of trying to balance the 3 right-most leaf pages, just add +** a new page to the right-hand side and put the one new entry in +** that page. This leaves the right side of the tree somewhat +** unbalanced. But odds are that we will be inserting new entries +** at the end soon afterwards so the nearly empty page will quickly +** fill up. On average. +** +** pPage is the leaf page which is the right-most page in the tree. +** pParent is its parent. pPage must have a single overflow entry +** which is also the right-most entry on the page. +** +** The pSpace buffer is used to store a temporary copy of the divider +** cell that will be inserted into pParent. Such a cell consists of a 4 +** byte page number followed by a variable length integer. In other +** words, at most 13 bytes. Hence the pSpace buffer must be at +** least 13 bytes in size. +*/ +static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ + BtShared *const pBt = pPage->pBt; /* B-Tree Database */ + MemPage *pNew; /* Newly allocated page */ + int rc; /* Return Code */ + Pgno pgnoNew; /* Page number of pNew */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + assert( pPage->nOverflow==1 ); + + if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; /* dbfuzz001.test */ + assert( pPage->nFree>=0 ); + assert( pParent->nFree>=0 ); + + /* Allocate a new page. This page will become the right-sibling of + ** pPage. Make the parent page writable, so that the new divider cell + ** may be inserted. If both these operations are successful, proceed. + */ + rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); + + if( rc==SQLITE_OK ){ + + u8 *pOut = &pSpace[4]; + u8 *pCell = pPage->apOvfl[0]; + u16 szCell = pPage->xCellSize(pPage, pCell); + u8 *pStop; + CellArray b; + + assert( sqlite3PagerIswriteable(pNew->pDbPage) ); + assert( CORRUPT_DB || pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); + zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); + b.nCell = 1; + b.pRef = pPage; + b.apCell = &pCell; + b.szCell = &szCell; + b.apEnd[0] = pPage->aDataEnd; + b.ixNx[0] = 2; + rc = rebuildPage(&b, 0, 1, pNew); + if( NEVER(rc) ){ + releasePage(pNew); + return rc; + } + pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; + + /* If this is an auto-vacuum database, update the pointer map + ** with entries for the new page, and any pointer from the + ** cell on the page to an overflow page. If either of these + ** operations fails, the return code is set, but the contents + ** of the parent page are still manipulated by thh code below. + ** That is Ok, at this point the parent page is guaranteed to + ** be marked as dirty. Returning an error code will cause a + ** rollback, undoing any changes made to the parent page. + */ + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc); + if( szCell>pNew->minLocal ){ + ptrmapPutOvflPtr(pNew, pNew, pCell, &rc); + } + } + + /* Create a divider cell to insert into pParent. The divider cell + ** consists of a 4-byte page number (the page number of pPage) and + ** a variable length key value (which must be the same value as the + ** largest key on pPage). + ** + ** To find the largest key value on pPage, first find the right-most + ** cell on pPage. The first two fields of this cell are the + ** record-length (a variable length integer at most 32-bits in size) + ** and the key value (a variable length integer, may have any value). + ** The first of the while(...) loops below skips over the record-length + ** field. The second while(...) loop copies the key value from the + ** cell on pPage into the pSpace buffer. + */ + pCell = findCell(pPage, pPage->nCell-1); + pStop = &pCell[9]; + while( (*(pCell++)&0x80) && pCellnCell, pSpace, (int)(pOut-pSpace), + 0, pPage->pgno, &rc); + } + + /* Set the right-child pointer of pParent to point to the new page. */ + put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); + + /* Release the reference to the new page. */ + releasePage(pNew); + } + + return rc; +} +#endif /* SQLITE_OMIT_QUICKBALANCE */ + +#if 0 +/* +** This function does not contribute anything to the operation of SQLite. +** it is sometimes activated temporarily while debugging code responsible +** for setting pointer-map entries. +*/ +static int ptrmapCheckPages(MemPage **apPage, int nPage){ + int i, j; + for(i=0; ipBt; + assert( pPage->isInit ); + + for(j=0; jnCell; j++){ + CellInfo info; + u8 *z; + + z = findCell(pPage, j); + pPage->xParseCell(pPage, z, &info); + if( info.nLocalpgno && e==PTRMAP_OVERFLOW1 ); + } + if( !pPage->leaf ){ + Pgno child = get4byte(z); + ptrmapGet(pBt, child, &e, &n); + assert( n==pPage->pgno && e==PTRMAP_BTREE ); + } + } + if( !pPage->leaf ){ + Pgno child = get4byte(&pPage->aData[pPage->hdrOffset+8]); + ptrmapGet(pBt, child, &e, &n); + assert( n==pPage->pgno && e==PTRMAP_BTREE ); + } + } + return 1; +} +#endif + +/* +** This function is used to copy the contents of the b-tree node stored +** on page pFrom to page pTo. If page pFrom was not a leaf page, then +** the pointer-map entries for each child page are updated so that the +** parent page stored in the pointer map is page pTo. If pFrom contained +** any cells with overflow page pointers, then the corresponding pointer +** map entries are also updated so that the parent page is page pTo. +** +** If pFrom is currently carrying any overflow cells (entries in the +** MemPage.apOvfl[] array), they are not copied to pTo. +** +** Before returning, page pTo is reinitialized using btreeInitPage(). +** +** The performance of this function is not critical. It is only used by +** the balance_shallower() and balance_deeper() procedures, neither of +** which are called often under normal circumstances. +*/ +static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){ + if( (*pRC)==SQLITE_OK ){ + BtShared * const pBt = pFrom->pBt; + u8 * const aFrom = pFrom->aData; + u8 * const aTo = pTo->aData; + int const iFromHdr = pFrom->hdrOffset; + int const iToHdr = ((pTo->pgno==1) ? 100 : 0); + int rc; + int iData; + + + assert( pFrom->isInit ); + assert( pFrom->nFree>=iToHdr ); + assert( get2byte(&aFrom[iFromHdr+5]) <= (int)pBt->usableSize ); + + /* Copy the b-tree node content from page pFrom to page pTo. */ + iData = get2byte(&aFrom[iFromHdr+5]); + memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData); + memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + 2*pFrom->nCell); + + /* Reinitialize page pTo so that the contents of the MemPage structure + ** match the new data. The initialization of pTo can actually fail under + ** fairly obscure circumstances, even though it is a copy of initialized + ** page pFrom. + */ + pTo->isInit = 0; + rc = btreeInitPage(pTo); + if( rc==SQLITE_OK ) rc = btreeComputeFreeSpace(pTo); + if( rc!=SQLITE_OK ){ + *pRC = rc; + return; + } + + /* If this is an auto-vacuum database, update the pointer-map entries + ** for any b-tree or overflow pages that pTo now contains the pointers to. + */ + if( ISAUTOVACUUM ){ + *pRC = setChildPtrmaps(pTo); + } + } +} + +/* +** This routine redistributes cells on the iParentIdx'th child of pParent +** (hereafter "the page") and up to 2 siblings so that all pages have about the +** same amount of free space. Usually a single sibling on either side of the +** page are used in the balancing, though both siblings might come from one +** side if the page is the first or last child of its parent. If the page +** has fewer than 2 siblings (something which can only happen if the page +** is a root page or a child of a root page) then all available siblings +** participate in the balancing. +** +** The number of siblings of the page might be increased or decreased by +** one or two in an effort to keep pages nearly full but not over full. +** +** Note that when this routine is called, some of the cells on the page +** might not actually be stored in MemPage.aData[]. This can happen +** if the page is overfull. This routine ensures that all cells allocated +** to the page and its siblings fit into MemPage.aData[] before returning. +** +** In the course of balancing the page and its siblings, cells may be +** inserted into or removed from the parent page (pParent). Doing so +** may cause the parent page to become overfull or underfull. If this +** happens, it is the responsibility of the caller to invoke the correct +** balancing routine to fix this problem (see the balance() routine). +** +** If this routine fails for any reason, it might leave the database +** in a corrupted state. So if this routine fails, the database should +** be rolled back. +** +** The third argument to this function, aOvflSpace, is a pointer to a +** buffer big enough to hold one page. If while inserting cells into the parent +** page (pParent) the parent page becomes overfull, this buffer is +** used to store the parent's overflow cells. Because this function inserts +** a maximum of four divider cells into the parent page, and the maximum +** size of a cell stored within an internal node is always less than 1/4 +** of the page-size, the aOvflSpace[] buffer is guaranteed to be large +** enough for all overflow cells. +** +** If aOvflSpace is set to a null pointer, this function returns +** SQLITE_NOMEM. +*/ +static int balance_nonroot( + MemPage *pParent, /* Parent page of siblings being balanced */ + int iParentIdx, /* Index of "the page" in pParent */ + u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ + int isRoot, /* True if pParent is a root-page */ + int bBulk /* True if this call is part of a bulk load */ +){ + BtShared *pBt; /* The whole database */ + int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ + int nNew = 0; /* Number of pages in apNew[] */ + int nOld; /* Number of pages in apOld[] */ + int i, j, k; /* Loop counters */ + int nxDiv; /* Next divider slot in pParent->aCell[] */ + int rc = SQLITE_OK; /* The return code */ + u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ + int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ + int usableSpace; /* Bytes in pPage beyond the header */ + int pageFlags; /* Value of pPage->aData[0] */ + int iSpace1 = 0; /* First unused byte of aSpace1[] */ + int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ + int szScratch; /* Size of scratch memory requested */ + MemPage *apOld[NB]; /* pPage and up to two siblings */ + MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ + u8 *pRight; /* Location in parent of right-sibling pointer */ + u8 *apDiv[NB-1]; /* Divider cells in pParent */ + int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ + int cntOld[NB+2]; /* Old index in b.apCell[] */ + int szNew[NB+2]; /* Combined size of cells placed on i-th page */ + u8 *aSpace1; /* Space for copies of dividers cells */ + Pgno pgno; /* Temp var to store a page number in */ + u8 abDone[NB+2]; /* True after i'th new page is populated */ + Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ + Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ + u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ + CellArray b; /* Parsed information on cells being balanced */ + + memset(abDone, 0, sizeof(abDone)); + memset(&b, 0, sizeof(b)); + pBt = pParent->pBt; + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + + /* At this point pParent may have at most one overflow cell. And if + ** this overflow cell is present, it must be the cell with + ** index iParentIdx. This scenario comes about when this function + ** is called (indirectly) from sqlite3BtreeDelete(). + */ + assert( pParent->nOverflow==0 || pParent->nOverflow==1 ); + assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx ); + + if( !aOvflSpace ){ + return SQLITE_NOMEM_BKPT; + } + assert( pParent->nFree>=0 ); + + /* Find the sibling pages to balance. Also locate the cells in pParent + ** that divide the siblings. An attempt is made to find NN siblings on + ** either side of pPage. More siblings are taken from one side, however, + ** if there are fewer than NN siblings on the other side. If pParent + ** has NB or fewer children then all children of pParent are taken. + ** + ** This loop also drops the divider cells from the parent page. This + ** way, the remainder of the function does not have to deal with any + ** overflow cells in the parent page, since if any existed they will + ** have already been removed. + */ + i = pParent->nOverflow + pParent->nCell; + if( i<2 ){ + nxDiv = 0; + }else{ + assert( bBulk==0 || bBulk==1 ); + if( iParentIdx==0 ){ + nxDiv = 0; + }else if( iParentIdx==i ){ + nxDiv = i-2+bBulk; + }else{ + nxDiv = iParentIdx-1; + } + i = 2-bBulk; + } + nOld = i+1; + if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ + pRight = &pParent->aData[pParent->hdrOffset+8]; + }else{ + pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); + } + pgno = get4byte(pRight); + while( 1 ){ + if( rc==SQLITE_OK ){ + rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); + } + if( rc ){ + memset(apOld, 0, (i+1)*sizeof(MemPage*)); + goto balance_cleanup; + } + if( apOld[i]->nFree<0 ){ + rc = btreeComputeFreeSpace(apOld[i]); + if( rc ){ + memset(apOld, 0, (i)*sizeof(MemPage*)); + goto balance_cleanup; + } + } + nMaxCells += apOld[i]->nCell + ArraySize(pParent->apOvfl); + if( (i--)==0 ) break; + + if( pParent->nOverflow && i+nxDiv==pParent->aiOvfl[0] ){ + apDiv[i] = pParent->apOvfl[0]; + pgno = get4byte(apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); + pParent->nOverflow = 0; + }else{ + apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); + pgno = get4byte(apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); + + /* Drop the cell from the parent page. apDiv[i] still points to + ** the cell within the parent, even though it has been dropped. + ** This is safe because dropping a cell only overwrites the first + ** four bytes of it, and this function does not need the first + ** four bytes of the divider cell. So the pointer is safe to use + ** later on. + ** + ** But not if we are in secure-delete mode. In secure-delete mode, + ** the dropCell() routine will overwrite the entire cell with zeroes. + ** In this case, temporarily copy the cell into the aOvflSpace[] + ** buffer. It will be copied out again as soon as the aSpace[] buffer + ** is allocated. */ + if( pBt->btsFlags & BTS_FAST_SECURE ){ + int iOff; + + /* If the following if() condition is not true, the db is corrupted. + ** The call to dropCell() below will detect this. */ + iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData); + if( (iOff+szNew[i])<=(int)pBt->usableSize ){ + memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]); + apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData]; + } + } + dropCell(pParent, i+nxDiv-pParent->nOverflow, szNew[i], &rc); + } + } + + /* Make nMaxCells a multiple of 4 in order to preserve 8-byte + ** alignment */ + nMaxCells = (nMaxCells + 3)&~3; + + /* + ** Allocate space for memory structures + */ + szScratch = + nMaxCells*sizeof(u8*) /* b.apCell */ + + nMaxCells*sizeof(u16) /* b.szCell */ + + pBt->pageSize; /* aSpace1 */ + + assert( szScratch<=7*(int)pBt->pageSize ); + b.apCell = sqlite3StackAllocRaw(0, szScratch ); + if( b.apCell==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto balance_cleanup; + } + b.szCell = (u16*)&b.apCell[nMaxCells]; + aSpace1 = (u8*)&b.szCell[nMaxCells]; + assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); + + /* + ** Load pointers to all cells on sibling pages and the divider cells + ** into the local b.apCell[] array. Make copies of the divider cells + ** into space obtained from aSpace1[]. The divider cells have already + ** been removed from pParent. + ** + ** If the siblings are on leaf pages, then the child pointers of the + ** divider cells are stripped from the cells before they are copied + ** into aSpace1[]. In this way, all cells in b.apCell[] are without + ** child pointers. If siblings are not leaves, then all cell in + ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] + ** are alike. + ** + ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. + ** leafData: 1 if pPage holds key+data and pParent holds only keys. + */ + b.pRef = apOld[0]; + leafCorrection = b.pRef->leaf*4; + leafData = b.pRef->intKeyLeaf; + for(i=0; inCell; + u8 *aData = pOld->aData; + u16 maskPage = pOld->maskPage; + u8 *piCell = aData + pOld->cellOffset; + u8 *piEnd; + VVA_ONLY( int nCellAtStart = b.nCell; ) + + /* Verify that all sibling pages are of the same "type" (table-leaf, + ** table-interior, index-leaf, or index-interior). + */ + if( pOld->aData[0]!=apOld[0]->aData[0] ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + + /* Load b.apCell[] with pointers to all cells in pOld. If pOld + ** contains overflow cells, include them in the b.apCell[] array + ** in the correct spot. + ** + ** Note that when there are multiple overflow cells, it is always the + ** case that they are sequential and adjacent. This invariant arises + ** because multiple overflows can only occurs when inserting divider + ** cells into a parent on a prior balance, and divider cells are always + ** adjacent and are inserted in order. There is an assert() tagged + ** with "NOTE 1" in the overflow cell insertion loop to prove this + ** invariant. + ** + ** This must be done in advance. Once the balance starts, the cell + ** offset section of the btree page will be overwritten and we will no + ** long be able to find the cells if a pointer to each cell is not saved + ** first. + */ + memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*(limit+pOld->nOverflow)); + if( pOld->nOverflow>0 ){ + if( NEVER(limitaiOvfl[0]) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + limit = pOld->aiOvfl[0]; + for(j=0; jnOverflow; k++){ + assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ + b.apCell[b.nCell] = pOld->apOvfl[k]; + b.nCell++; + } + } + piEnd = aData + pOld->cellOffset + 2*pOld->nCell; + while( piCellnCell+pOld->nOverflow) ); + + cntOld[i] = b.nCell; + if( imaxLocal+23 ); + assert( iSpace1 <= (int)pBt->pageSize ); + memcpy(pTemp, apDiv[i], sz); + b.apCell[b.nCell] = pTemp+leafCorrection; + assert( leafCorrection==0 || leafCorrection==4 ); + b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; + if( !pOld->leaf ){ + assert( leafCorrection==0 ); + assert( pOld->hdrOffset==0 || CORRUPT_DB ); + /* The right pointer of the child page pOld becomes the left + ** pointer of the divider cell */ + memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); + }else{ + assert( leafCorrection==4 ); + while( b.szCell[b.nCell]<4 ){ + /* Do not allow any cells smaller than 4 bytes. If a smaller cell + ** does exist, pad it with 0x00 bytes. */ + assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); + assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); + aSpace1[iSpace1++] = 0x00; + b.szCell[b.nCell]++; + } + } + b.nCell++; + } + } + + /* + ** Figure out the number of pages needed to hold all b.nCell cells. + ** Store this number in "k". Also compute szNew[] which is the total + ** size of all cells on the i-th page and cntNew[] which is the index + ** in b.apCell[] of the cell that divides page i from page i+1. + ** cntNew[k] should equal b.nCell. + ** + ** Values computed by this block: + ** + ** k: The total number of sibling pages + ** szNew[i]: Spaced used on the i-th sibling page. + ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to + ** the right of the i-th sibling page. + ** usableSpace: Number of bytes of space available on each sibling. + ** + */ + usableSpace = pBt->usableSize - 12 + leafCorrection; + for(i=k=0; iaDataEnd; + b.ixNx[k] = cntOld[i]; + if( k && b.ixNx[k]==b.ixNx[k-1] ){ + k--; /* Omit b.ixNx[] entry for child pages with no cells */ + } + if( !leafData ){ + k++; + b.apEnd[k] = pParent->aDataEnd; + b.ixNx[k] = cntOld[i]+1; + } + assert( p->nFree>=0 ); + szNew[i] = usableSpace - p->nFree; + for(j=0; jnOverflow; j++){ + szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); + } + cntNew[i] = cntOld[i]; + } + k = nOld; + for(i=0; iusableSpace ){ + if( i+1>=k ){ + k = i+2; + if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + szNew[k-1] = 0; + cntNew[k-1] = b.nCell; + } + sz = 2 + cachedCellSize(&b, cntNew[i]-1); + szNew[i] -= sz; + if( !leafData ){ + if( cntNew[i]usableSpace ) break; + szNew[i] += sz; + cntNew[i]++; + if( !leafData ){ + if( cntNew[i]=b.nCell ){ + k = i+1; + }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } + + /* + ** The packing computed by the previous block is biased toward the siblings + ** on the left side (siblings with smaller keys). The left siblings are + ** always nearly full, while the right-most sibling might be nearly empty. + ** The next block of code attempts to adjust the packing of siblings to + ** get a better balance. + ** + ** This adjustment is more than an optimization. The packing above might + ** be so out of balance as to be illegal. For example, the right-most + ** sibling might be completely empty. This adjustment is not optional. + */ + for(i=k-1; i>0; i--){ + int szRight = szNew[i]; /* Size of sibling on the right */ + int szLeft = szNew[i-1]; /* Size of sibling on the left */ + int r; /* Index of right-most cell in left sibling */ + int d; /* Index of first cell to the left of right sibling */ + + r = cntNew[i-1] - 1; + d = r + 1 - leafData; + (void)cachedCellSize(&b, d); + do{ + assert( d szLeft-(b.szCell[r]+(i==k-1?0:2)))){ + break; + } + szRight += b.szCell[d] + 2; + szLeft -= b.szCell[r] + 2; + cntNew[i-1] = r; + r--; + d--; + }while( r>=0 ); + szNew[i] = szRight; + szNew[i-1] = szLeft; + if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } + + /* Sanity check: For a non-corrupt database file one of the follwing + ** must be true: + ** (1) We found one or more cells (cntNew[0])>0), or + ** (2) pPage is a virtual root page. A virtual root page is when + ** the real root page is page 1 and we are the only child of + ** that page. + */ + assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB); + TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n", + apOld[0]->pgno, apOld[0]->nCell, + nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0, + nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0 + )); + + /* + ** Allocate k new pages. Reuse old pages where possible. + */ + pageFlags = apOld[0]->aData[0]; + for(i=0; ipDbPage); + nNew++; + if( sqlite3PagerPageRefcount(pNew->pDbPage)!=1+(i==(iParentIdx-nxDiv)) + && rc==SQLITE_OK + ){ + rc = SQLITE_CORRUPT_BKPT; + } + if( rc ) goto balance_cleanup; + }else{ + assert( i>0 ); + rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); + if( rc ) goto balance_cleanup; + zeroPage(pNew, pageFlags); + apNew[i] = pNew; + nNew++; + cntOld[i] = b.nCell; + + /* Set the pointer-map entry for the new sibling page. */ + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); + if( rc!=SQLITE_OK ){ + goto balance_cleanup; + } + } + } + } + + /* + ** Reassign page numbers so that the new pages are in ascending order. + ** This helps to keep entries in the disk file in order so that a scan + ** of the table is closer to a linear scan through the file. That in turn + ** helps the operating system to deliver pages from the disk more rapidly. + ** + ** An O(n^2) insertion sort algorithm is used, but since n is never more + ** than (NB+2) (a small constant), that should not be a problem. + ** + ** When NB==3, this one optimization makes the database about 25% faster + ** for large insertions and deletions. + */ + for(i=0; ipgno; + aPgFlags[i] = apNew[i]->pDbPage->flags; + for(j=0; ji ){ + sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0); + } + sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]); + apNew[i]->pgno = pgno; + } + } + + TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) " + "%d(%d nc=%d) %d(%d nc=%d)\n", + apNew[0]->pgno, szNew[0], cntNew[0], + nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0, + nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0, + nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0, + nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0, + nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0, + nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0, + nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0, + nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0 + )); + + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + assert( nNew>=1 && nNew<=ArraySize(apNew) ); + assert( apNew[nNew-1]!=0 ); + put4byte(pRight, apNew[nNew-1]->pgno); + + /* If the sibling pages are not leaves, ensure that the right-child pointer + ** of the right-most new sibling page is set to the value that was + ** originally in the same field of the right-most old sibling page. */ + if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){ + MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1]; + memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4); + } + + /* Make any required updates to pointer map entries associated with + ** cells stored on sibling pages following the balance operation. Pointer + ** map entries associated with divider cells are set by the insertCell() + ** routine. The associated pointer map entries are: + ** + ** a) if the cell contains a reference to an overflow chain, the + ** entry associated with the first page in the overflow chain, and + ** + ** b) if the sibling pages are not leaves, the child page associated + ** with the cell. + ** + ** If the sibling pages are not leaves, then the pointer map entry + ** associated with the right-child of each sibling may also need to be + ** updated. This happens below, after the sibling pages have been + ** populated, not here. + */ + if( ISAUTOVACUUM ){ + MemPage *pOld; + MemPage *pNew = pOld = apNew[0]; + int cntOldNext = pNew->nCell + pNew->nOverflow; + int iNew = 0; + int iOld = 0; + + for(i=0; i=0 && iOldnCell + pOld->nOverflow + !leafData; + } + if( i==cntNew[iNew] ){ + pNew = apNew[++iNew]; + if( !leafData ) continue; + } + + /* Cell pCell is destined for new sibling page pNew. Originally, it + ** was either part of sibling page iOld (possibly an overflow cell), + ** or else the divider cell to the left of sibling page iOld. So, + ** if sibling page iOld had the same page number as pNew, and if + ** pCell really was a part of sibling page iOld (not a divider or + ** overflow cell), we can skip updating the pointer map entries. */ + if( iOld>=nNew + || pNew->pgno!=aPgno[iOld] + || !SQLITE_WITHIN(pCell,pOld->aData,pOld->aDataEnd) + ){ + if( !leafCorrection ){ + ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); + } + if( cachedCellSize(&b,i)>pNew->minLocal ){ + ptrmapPutOvflPtr(pNew, pOld, pCell, &rc); + } + if( rc ) goto balance_cleanup; + } + } + } + + /* Insert new divider cells into pParent. */ + for(i=0; ileaf ){ + memcpy(&pNew->aData[8], pCell, 4); + }else if( leafData ){ + /* If the tree is a leaf-data tree, and the siblings are leaves, + ** then there is no divider cell in b.apCell[]. Instead, the divider + ** cell consists of the integer key for the right-most cell of + ** the sibling-page assembled above only. + */ + CellInfo info; + j--; + pNew->xParseCell(pNew, b.apCell[j], &info); + pCell = pTemp; + sz = 4 + putVarint(&pCell[4], info.nKey); + pTemp = 0; + }else{ + pCell -= 4; + /* Obscure case for non-leaf-data trees: If the cell at pCell was + ** previously stored on a leaf node, and its reported size was 4 + ** bytes, then it may actually be smaller than this + ** (see btreeParseCellPtr(), 4 bytes is the minimum size of + ** any cell). But it is important to pass the correct size to + ** insertCell(), so reparse the cell now. + ** + ** This can only happen for b-trees used to evaluate "IN (SELECT ...)" + ** and WITHOUT ROWID tables with exactly one column which is the + ** primary key. + */ + if( b.szCell[j]==4 ){ + assert(leafCorrection==4); + sz = pParent->xCellSize(pParent, pCell); + } + } + iOvflSpace += sz; + assert( sz<=pBt->maxLocal+23 ); + assert( iOvflSpace <= (int)pBt->pageSize ); + for(k=0; b.ixNx[k]<=j && ALWAYS(kpgno, &rc); + if( rc!=SQLITE_OK ) goto balance_cleanup; + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + } + + /* Now update the actual sibling pages. The order in which they are updated + ** is important, as this code needs to avoid disrupting any page from which + ** cells may still to be read. In practice, this means: + ** + ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) + ** then it is not safe to update page apNew[iPg] until after + ** the left-hand sibling apNew[iPg-1] has been updated. + ** + ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) + ** then it is not safe to update page apNew[iPg] until after + ** the right-hand sibling apNew[iPg+1] has been updated. + ** + ** If neither of the above apply, the page is safe to update. + ** + ** The iPg value in the following loop starts at nNew-1 goes down + ** to 0, then back up to nNew-1 again, thus making two passes over + ** the pages. On the initial downward pass, only condition (1) above + ** needs to be tested because (2) will always be true from the previous + ** step. On the upward pass, both conditions are always true, so the + ** upwards pass simply processes pages that were missed on the downward + ** pass. + */ + for(i=1-nNew; i=0 && iPg=0 /* On the upwards pass, or... */ + || cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */ + ){ + int iNew; + int iOld; + int nNewCell; + + /* Verify condition (1): If cells are moving left, update iPg + ** only after iPg-1 has already been updated. */ + assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] ); + + /* Verify condition (2): If cells are moving right, update iPg + ** only after iPg+1 has already been updated. */ + assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] ); + + if( iPg==0 ){ + iNew = iOld = 0; + nNewCell = cntNew[0]; + }else{ + iOld = iPgnFree = usableSpace-szNew[iPg]; + assert( apNew[iPg]->nOverflow==0 ); + assert( apNew[iPg]->nCell==nNewCell ); + } + } + + /* All pages have been processed exactly once */ + assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 ); + + assert( nOld>0 ); + assert( nNew>0 ); + + if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){ + /* The root page of the b-tree now contains no cells. The only sibling + ** page is the right-child of the parent. Copy the contents of the + ** child page into the parent, decreasing the overall height of the + ** b-tree structure by one. This is described as the "balance-shallower" + ** sub-algorithm in some documentation. + ** + ** If this is an auto-vacuum database, the call to copyNodeContent() + ** sets all pointer-map entries corresponding to database image pages + ** for which the pointer is stored within the content being copied. + ** + ** It is critical that the child page be defragmented before being + ** copied into the parent, because if the parent is page 1 then it will + ** by smaller than the child due to the database header, and so all the + ** free space needs to be up front. + */ + assert( nNew==1 || CORRUPT_DB ); + rc = defragmentPage(apNew[0], -1); + testcase( rc!=SQLITE_OK ); + assert( apNew[0]->nFree == + (get2byteNotZero(&apNew[0]->aData[5]) - apNew[0]->cellOffset + - apNew[0]->nCell*2) + || rc!=SQLITE_OK + ); + copyNodeContent(apNew[0], pParent, &rc); + freePage(apNew[0], &rc); + }else if( ISAUTOVACUUM && !leafCorrection ){ + /* Fix the pointer map entries associated with the right-child of each + ** sibling page. All other pointer map entries have already been taken + ** care of. */ + for(i=0; iaData[8]); + ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc); + } + } + + assert( pParent->isInit ); + TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", + nOld, nNew, b.nCell)); + + /* Free any old pages that were not reused as new pages. + */ + for(i=nNew; iisInit ){ + /* The ptrmapCheckPages() contains assert() statements that verify that + ** all pointer map pages are set correctly. This is helpful while + ** debugging. This is usually disabled because a corrupt database may + ** cause an assert() statement to fail. */ + ptrmapCheckPages(apNew, nNew); + ptrmapCheckPages(&pParent, 1); + } +#endif + + /* + ** Cleanup before returning. + */ +balance_cleanup: + sqlite3StackFree(0, b.apCell); + for(i=0; ipBt; /* The BTree */ + + assert( pRoot->nOverflow>0 ); + assert( sqlite3_mutex_held(pBt->mutex) ); + + /* Make pRoot, the root page of the b-tree, writable. Allocate a new + ** page that will become the new right-child of pPage. Copy the contents + ** of the node stored on pRoot into the new child page. + */ + rc = sqlite3PagerWrite(pRoot->pDbPage); + if( rc==SQLITE_OK ){ + rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); + copyNodeContent(pRoot, pChild, &rc); + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); + } + } + if( rc ){ + *ppChild = 0; + releasePage(pChild); + return rc; + } + assert( sqlite3PagerIswriteable(pChild->pDbPage) ); + assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); + assert( pChild->nCell==pRoot->nCell || CORRUPT_DB ); + + TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno)); + + /* Copy the overflow cells from pRoot to pChild */ + memcpy(pChild->aiOvfl, pRoot->aiOvfl, + pRoot->nOverflow*sizeof(pRoot->aiOvfl[0])); + memcpy(pChild->apOvfl, pRoot->apOvfl, + pRoot->nOverflow*sizeof(pRoot->apOvfl[0])); + pChild->nOverflow = pRoot->nOverflow; + + /* Zero the contents of pRoot. Then install pChild as the right-child. */ + zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF); + put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild); + + *ppChild = pChild; + return SQLITE_OK; +} + +/* +** Return SQLITE_CORRUPT if any cursor other than pCur is currently valid +** on the same B-tree as pCur. +** +** This can occur if a database is corrupt with two or more SQL tables +** pointing to the same b-tree. If an insert occurs on one SQL table +** and causes a BEFORE TRIGGER to do a secondary insert on the other SQL +** table linked to the same b-tree. If the secondary insert causes a +** rebalance, that can change content out from under the cursor on the +** first SQL table, violating invariants on the first insert. +*/ +static int anotherValidCursor(BtCursor *pCur){ + BtCursor *pOther; + for(pOther=pCur->pBt->pCursor; pOther; pOther=pOther->pNext){ + if( pOther!=pCur + && pOther->eState==CURSOR_VALID + && pOther->pPage==pCur->pPage + ){ + return SQLITE_CORRUPT_BKPT; + } + } + return SQLITE_OK; +} + +/* +** The page that pCur currently points to has just been modified in +** some way. This function figures out if this modification means the +** tree needs to be balanced, and if so calls the appropriate balancing +** routine. Balancing routines are: +** +** balance_quick() +** balance_deeper() +** balance_nonroot() +*/ +static int balance(BtCursor *pCur){ + int rc = SQLITE_OK; + const int nMin = pCur->pBt->usableSize * 2 / 3; + u8 aBalanceQuickSpace[13]; + u8 *pFree = 0; + + VVA_ONLY( int balance_quick_called = 0 ); + VVA_ONLY( int balance_deeper_called = 0 ); + + do { + int iPage; + MemPage *pPage = pCur->pPage; + + if( NEVER(pPage->nFree<0) && btreeComputeFreeSpace(pPage) ) break; + if( pPage->nOverflow==0 && pPage->nFree<=nMin ){ + break; + }else if( (iPage = pCur->iPage)==0 ){ + if( pPage->nOverflow && (rc = anotherValidCursor(pCur))==SQLITE_OK ){ + /* The root page of the b-tree is overfull. In this case call the + ** balance_deeper() function to create a new child for the root-page + ** and copy the current contents of the root-page to it. The + ** next iteration of the do-loop will balance the child page. + */ + assert( balance_deeper_called==0 ); + VVA_ONLY( balance_deeper_called++ ); + rc = balance_deeper(pPage, &pCur->apPage[1]); + if( rc==SQLITE_OK ){ + pCur->iPage = 1; + pCur->ix = 0; + pCur->aiIdx[0] = 0; + pCur->apPage[0] = pPage; + pCur->pPage = pCur->apPage[1]; + assert( pCur->pPage->nOverflow ); + } + }else{ + break; + } + }else{ + MemPage * const pParent = pCur->apPage[iPage-1]; + int const iIdx = pCur->aiIdx[iPage-1]; + + rc = sqlite3PagerWrite(pParent->pDbPage); + if( rc==SQLITE_OK && pParent->nFree<0 ){ + rc = btreeComputeFreeSpace(pParent); + } + if( rc==SQLITE_OK ){ +#ifndef SQLITE_OMIT_QUICKBALANCE + if( pPage->intKeyLeaf + && pPage->nOverflow==1 + && pPage->aiOvfl[0]==pPage->nCell + && pParent->pgno!=1 + && pParent->nCell==iIdx + ){ + /* Call balance_quick() to create a new sibling of pPage on which + ** to store the overflow cell. balance_quick() inserts a new cell + ** into pParent, which may cause pParent overflow. If this + ** happens, the next iteration of the do-loop will balance pParent + ** use either balance_nonroot() or balance_deeper(). Until this + ** happens, the overflow cell is stored in the aBalanceQuickSpace[] + ** buffer. + ** + ** The purpose of the following assert() is to check that only a + ** single call to balance_quick() is made for each call to this + ** function. If this were not verified, a subtle bug involving reuse + ** of the aBalanceQuickSpace[] might sneak in. + */ + assert( balance_quick_called==0 ); + VVA_ONLY( balance_quick_called++ ); + rc = balance_quick(pParent, pPage, aBalanceQuickSpace); + }else +#endif + { + /* In this case, call balance_nonroot() to redistribute cells + ** between pPage and up to 2 of its sibling pages. This involves + ** modifying the contents of pParent, which may cause pParent to + ** become overfull or underfull. The next iteration of the do-loop + ** will balance the parent page to correct this. + ** + ** If the parent page becomes overfull, the overflow cell or cells + ** are stored in the pSpace buffer allocated immediately below. + ** A subsequent iteration of the do-loop will deal with this by + ** calling balance_nonroot() (balance_deeper() may be called first, + ** but it doesn't deal with overflow cells - just moves them to a + ** different page). Once this subsequent call to balance_nonroot() + ** has completed, it is safe to release the pSpace buffer used by + ** the previous call, as the overflow cell data will have been + ** copied either into the body of a database page or into the new + ** pSpace buffer passed to the latter call to balance_nonroot(). + */ + u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); + rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, + pCur->hints&BTREE_BULKLOAD); + if( pFree ){ + /* If pFree is not NULL, it points to the pSpace buffer used + ** by a previous call to balance_nonroot(). Its contents are + ** now stored either on real database pages or within the + ** new pSpace buffer, so it may be safely freed here. */ + sqlite3PageFree(pFree); + } + + /* The pSpace buffer will be freed after the next call to + ** balance_nonroot(), or just before this function returns, whichever + ** comes first. */ + pFree = pSpace; + } + } + + pPage->nOverflow = 0; + + /* The next iteration of the do-loop balances the parent page. */ + releasePage(pPage); + pCur->iPage--; + assert( pCur->iPage>=0 ); + pCur->pPage = pCur->apPage[pCur->iPage]; + } + }while( rc==SQLITE_OK ); + + if( pFree ){ + sqlite3PageFree(pFree); + } + return rc; +} + +/* Overwrite content from pX into pDest. Only do the write if the +** content is different from what is already there. +*/ +static int btreeOverwriteContent( + MemPage *pPage, /* MemPage on which writing will occur */ + u8 *pDest, /* Pointer to the place to start writing */ + const BtreePayload *pX, /* Source of data to write */ + int iOffset, /* Offset of first byte to write */ + int iAmt /* Number of bytes to be written */ +){ + int nData = pX->nData - iOffset; + if( nData<=0 ){ + /* Overwritting with zeros */ + int i; + for(i=0; ipDbPage); + if( rc ) return rc; + memset(pDest + i, 0, iAmt - i); + } + }else{ + if( nDatapData) + iOffset, iAmt)!=0 ){ + int rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc ) return rc; + /* In a corrupt database, it is possible for the source and destination + ** buffers to overlap. This is harmless since the database is already + ** corrupt but it does cause valgrind and ASAN warnings. So use + ** memmove(). */ + memmove(pDest, ((u8*)pX->pData) + iOffset, iAmt); + } + } + return SQLITE_OK; +} + +/* +** Overwrite the cell that cursor pCur is pointing to with fresh content +** contained in pX. +*/ +static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){ + int iOffset; /* Next byte of pX->pData to write */ + int nTotal = pX->nData + pX->nZero; /* Total bytes of to write */ + int rc; /* Return code */ + MemPage *pPage = pCur->pPage; /* Page being written */ + BtShared *pBt; /* Btree */ + Pgno ovflPgno; /* Next overflow page to write */ + u32 ovflPageSize; /* Size to write on overflow page */ + + if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd + || pCur->info.pPayload < pPage->aData + pPage->cellOffset + ){ + return SQLITE_CORRUPT_BKPT; + } + /* Overwrite the local portion first */ + rc = btreeOverwriteContent(pPage, pCur->info.pPayload, pX, + 0, pCur->info.nLocal); + if( rc ) return rc; + if( pCur->info.nLocal==nTotal ) return SQLITE_OK; + + /* Now overwrite the overflow pages */ + iOffset = pCur->info.nLocal; + assert( nTotal>=0 ); + assert( iOffset>=0 ); + ovflPgno = get4byte(pCur->info.pPayload + iOffset); + pBt = pPage->pBt; + ovflPageSize = pBt->usableSize - 4; + do{ + rc = btreeGetPage(pBt, ovflPgno, &pPage, 0); + if( rc ) return rc; + if( sqlite3PagerPageRefcount(pPage->pDbPage)!=1 || pPage->isInit ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + if( iOffset+ovflPageSize<(u32)nTotal ){ + ovflPgno = get4byte(pPage->aData); + }else{ + ovflPageSize = nTotal - iOffset; + } + rc = btreeOverwriteContent(pPage, pPage->aData+4, pX, + iOffset, ovflPageSize); + } + sqlite3PagerUnref(pPage->pDbPage); + if( rc ) return rc; + iOffset += ovflPageSize; + }while( iOffset0 then pCur points to a cell +** that is larger than (pKey,nKey). +** +** If seekResult==0, that means pCur is pointing at some unknown location. +** In that case, this routine must seek the cursor to the correct insertion +** point for (pKey,nKey) before doing the insertion. For index btrees, +** if pX->nMem is non-zero, then pX->aMem contains pointers to the unpacked +** key values and pX->aMem can be used instead of pX->pKey to avoid having +** to decode the key. +*/ +SQLITE_PRIVATE int sqlite3BtreeInsert( + BtCursor *pCur, /* Insert data into the table of this cursor */ + const BtreePayload *pX, /* Content of the row to be inserted */ + int flags, /* True if this is likely an append */ + int seekResult /* Result of prior MovetoUnpacked() call */ +){ + int rc; + int loc = seekResult; /* -1: before desired location +1: after */ + int szNew = 0; + int idx; + MemPage *pPage; + Btree *p = pCur->pBtree; + BtShared *pBt = p->pBt; + unsigned char *oldCell; + unsigned char *newCell = 0; + + assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND|BTREE_PREFORMAT))==flags ); + assert( (flags & BTREE_PREFORMAT)==0 || seekResult || pCur->pKeyInfo==0 ); + + /* Save the positions of any other cursors open on this table. + ** + ** In some cases, the call to btreeMoveto() below is a no-op. For + ** example, when inserting data into a table with auto-generated integer + ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the + ** integer key to use. It then calls this function to actually insert the + ** data into the intkey B-Tree. In this case btreeMoveto() recognizes + ** that the cursor is already where it needs to be and returns without + ** doing any work. To avoid thwarting these optimizations, it is important + ** not to clear the cursor here. + */ + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + if( loc && pCur->iPage<0 ){ + /* This can only happen if the schema is corrupt such that there is more + ** than one table or index with the same root page as used by the cursor. + ** Which can only happen if the SQLITE_NoSchemaError flag was set when + ** the schema was loaded. This cannot be asserted though, as a user might + ** set the flag, load the schema, and then unset the flag. */ + return SQLITE_CORRUPT_BKPT; + } + } + + if( pCur->eState>=CURSOR_REQUIRESEEK ){ + rc = moveToRoot(pCur); + if( rc && rc!=SQLITE_EMPTY ) return rc; + } + + assert( cursorOwnsBtShared(pCur) ); + assert( (pCur->curFlags & BTCF_WriteFlag)!=0 + && pBt->inTransaction==TRANS_WRITE + && (pBt->btsFlags & BTS_READ_ONLY)==0 ); + assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); + + /* Assert that the caller has been consistent. If this cursor was opened + ** expecting an index b-tree, then the caller should be inserting blob + ** keys with no associated data. If the cursor was opened expecting an + ** intkey table, the caller should be inserting integer keys with a + ** blob of associated data. */ + assert( (flags & BTREE_PREFORMAT) || (pX->pKey==0)==(pCur->pKeyInfo==0) ); + + if( pCur->pKeyInfo==0 ){ + assert( pX->pKey==0 ); + /* If this is an insert into a table b-tree, invalidate any incrblob + ** cursors open on the row being replaced */ + if( p->hasIncrblobCur ){ + invalidateIncrblobCursors(p, pCur->pgnoRoot, pX->nKey, 0); + } + + /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing + ** to a row with the same key as the new entry being inserted. + */ +#ifdef SQLITE_DEBUG + if( flags & BTREE_SAVEPOSITION ){ + assert( pCur->curFlags & BTCF_ValidNKey ); + assert( pX->nKey==pCur->info.nKey ); + assert( loc==0 ); + } +#endif + + /* On the other hand, BTREE_SAVEPOSITION==0 does not imply + ** that the cursor is not pointing to a row to be overwritten. + ** So do a complete check. + */ + if( (pCur->curFlags&BTCF_ValidNKey)!=0 && pX->nKey==pCur->info.nKey ){ + /* The cursor is pointing to the entry that is to be + ** overwritten */ + assert( pX->nData>=0 && pX->nZero>=0 ); + if( pCur->info.nSize!=0 + && pCur->info.nPayload==(u32)pX->nData+pX->nZero + ){ + /* New entry is the same size as the old. Do an overwrite */ + return btreeOverwriteCell(pCur, pX); + } + assert( loc==0 ); + }else if( loc==0 ){ + /* The cursor is *not* pointing to the cell to be overwritten, nor + ** to an adjacent cell. Move the cursor so that it is pointing either + ** to the cell to be overwritten or an adjacent cell. + */ + rc = sqlite3BtreeTableMoveto(pCur, pX->nKey, + (flags & BTREE_APPEND)!=0, &loc); + if( rc ) return rc; + } + }else{ + /* This is an index or a WITHOUT ROWID table */ + + /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing + ** to a row with the same key as the new entry being inserted. + */ + assert( (flags & BTREE_SAVEPOSITION)==0 || loc==0 ); + + /* If the cursor is not already pointing either to the cell to be + ** overwritten, or if a new cell is being inserted, if the cursor is + ** not pointing to an immediately adjacent cell, then move the cursor + ** so that it does. + */ + if( loc==0 && (flags & BTREE_SAVEPOSITION)==0 ){ + if( pX->nMem ){ + UnpackedRecord r; + r.pKeyInfo = pCur->pKeyInfo; + r.aMem = pX->aMem; + r.nField = pX->nMem; + r.default_rc = 0; + r.eqSeen = 0; + rc = sqlite3BtreeIndexMoveto(pCur, &r, &loc); + }else{ + rc = btreeMoveto(pCur, pX->pKey, pX->nKey, + (flags & BTREE_APPEND)!=0, &loc); + } + if( rc ) return rc; + } + + /* If the cursor is currently pointing to an entry to be overwritten + ** and the new content is the same as as the old, then use the + ** overwrite optimization. + */ + if( loc==0 ){ + getCellInfo(pCur); + if( pCur->info.nKey==pX->nKey ){ + BtreePayload x2; + x2.pData = pX->pKey; + x2.nData = pX->nKey; + x2.nZero = 0; + return btreeOverwriteCell(pCur, &x2); + } + } + } + assert( pCur->eState==CURSOR_VALID + || (pCur->eState==CURSOR_INVALID && loc) ); + + pPage = pCur->pPage; + assert( pPage->intKey || pX->nKey>=0 || (flags & BTREE_PREFORMAT) ); + assert( pPage->leaf || !pPage->intKey ); + if( pPage->nFree<0 ){ + if( pCur->eState>CURSOR_INVALID ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = btreeComputeFreeSpace(pPage); + } + if( rc ) return rc; + } + + TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", + pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno, + loc==0 ? "overwrite" : "new entry")); + assert( pPage->isInit ); + newCell = pBt->pTmpSpace; + assert( newCell!=0 ); + if( flags & BTREE_PREFORMAT ){ + rc = SQLITE_OK; + szNew = pBt->nPreformatSize; + if( szNew<4 ) szNew = 4; + if( ISAUTOVACUUM && szNew>pPage->maxLocal ){ + CellInfo info; + pPage->xParseCell(pPage, newCell, &info); + if( info.nPayload!=info.nLocal ){ + Pgno ovfl = get4byte(&newCell[szNew-4]); + ptrmapPut(pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc); + } + } + }else{ + rc = fillInCell(pPage, newCell, pX, &szNew); + } + if( rc ) goto end_insert; + assert( szNew==pPage->xCellSize(pPage, newCell) ); + assert( szNew <= MX_CELL_SIZE(pBt) ); + idx = pCur->ix; + if( loc==0 ){ + CellInfo info; + assert( idx>=0 ); + if( idx>=pPage->nCell ){ + return SQLITE_CORRUPT_BKPT; + } + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc ){ + goto end_insert; + } + oldCell = findCell(pPage, idx); + if( !pPage->leaf ){ + memcpy(newCell, oldCell, 4); + } + BTREE_CLEAR_CELL(rc, pPage, oldCell, info); + testcase( pCur->curFlags & BTCF_ValidOvfl ); + invalidateOverflowCache(pCur); + if( info.nSize==szNew && info.nLocal==info.nPayload + && (!ISAUTOVACUUM || szNewminLocal) + ){ + /* Overwrite the old cell with the new if they are the same size. + ** We could also try to do this if the old cell is smaller, then add + ** the leftover space to the free list. But experiments show that + ** doing that is no faster then skipping this optimization and just + ** calling dropCell() and insertCell(). + ** + ** This optimization cannot be used on an autovacuum database if the + ** new entry uses overflow pages, as the insertCell() call below is + ** necessary to add the PTRMAP_OVERFLOW1 pointer-map entry. */ + assert( rc==SQLITE_OK ); /* clearCell never fails when nLocal==nPayload */ + if( oldCell < pPage->aData+pPage->hdrOffset+10 ){ + return SQLITE_CORRUPT_BKPT; + } + if( oldCell+szNew > pPage->aDataEnd ){ + return SQLITE_CORRUPT_BKPT; + } + memcpy(oldCell, newCell, szNew); + return SQLITE_OK; + } + dropCell(pPage, idx, info.nSize, &rc); + if( rc ) goto end_insert; + }else if( loc<0 && pPage->nCell>0 ){ + assert( pPage->leaf ); + idx = ++pCur->ix; + pCur->curFlags &= ~BTCF_ValidNKey; + }else{ + assert( pPage->leaf ); + } + insertCell(pPage, idx, newCell, szNew, 0, 0, &rc); + assert( pPage->nOverflow==0 || rc==SQLITE_OK ); + assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 ); + + /* If no error has occurred and pPage has an overflow cell, call balance() + ** to redistribute the cells within the tree. Since balance() may move + ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey + ** variables. + ** + ** Previous versions of SQLite called moveToRoot() to move the cursor + ** back to the root page as balance() used to invalidate the contents + ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that, + ** set the cursor state to "invalid". This makes common insert operations + ** slightly faster. + ** + ** There is a subtle but important optimization here too. When inserting + ** multiple records into an intkey b-tree using a single cursor (as can + ** happen while processing an "INSERT INTO ... SELECT" statement), it + ** is advantageous to leave the cursor pointing to the last entry in + ** the b-tree if possible. If the cursor is left pointing to the last + ** entry in the table, and the next row inserted has an integer key + ** larger than the largest existing key, it is possible to insert the + ** row without seeking the cursor. This can be a big performance boost. + */ + pCur->info.nSize = 0; + if( pPage->nOverflow ){ + assert( rc==SQLITE_OK ); + pCur->curFlags &= ~(BTCF_ValidNKey); + rc = balance(pCur); + + /* Must make sure nOverflow is reset to zero even if the balance() + ** fails. Internal data structure corruption will result otherwise. + ** Also, set the cursor state to invalid. This stops saveCursorPosition() + ** from trying to save the current position of the cursor. */ + pCur->pPage->nOverflow = 0; + pCur->eState = CURSOR_INVALID; + if( (flags & BTREE_SAVEPOSITION) && rc==SQLITE_OK ){ + btreeReleaseAllCursorPages(pCur); + if( pCur->pKeyInfo ){ + assert( pCur->pKey==0 ); + pCur->pKey = sqlite3Malloc( pX->nKey ); + if( pCur->pKey==0 ){ + rc = SQLITE_NOMEM; + }else{ + memcpy(pCur->pKey, pX->pKey, pX->nKey); + } + } + pCur->eState = CURSOR_REQUIRESEEK; + pCur->nKey = pX->nKey; + } + } + assert( pCur->iPage<0 || pCur->pPage->nOverflow==0 ); + +end_insert: + return rc; +} + +/* +** This function is used as part of copying the current row from cursor +** pSrc into cursor pDest. If the cursors are open on intkey tables, then +** parameter iKey is used as the rowid value when the record is copied +** into pDest. Otherwise, the record is copied verbatim. +** +** This function does not actually write the new value to cursor pDest. +** Instead, it creates and populates any required overflow pages and +** writes the data for the new cell into the BtShared.pTmpSpace buffer +** for the destination database. The size of the cell, in bytes, is left +** in BtShared.nPreformatSize. The caller completes the insertion by +** calling sqlite3BtreeInsert() with the BTREE_PREFORMAT flag specified. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64 iKey){ + int rc = SQLITE_OK; + BtShared *pBt = pDest->pBt; + u8 *aOut = pBt->pTmpSpace; /* Pointer to next output buffer */ + const u8 *aIn; /* Pointer to next input buffer */ + u32 nIn; /* Size of input buffer aIn[] */ + u32 nRem; /* Bytes of data still to copy */ + + getCellInfo(pSrc); + aOut += putVarint32(aOut, pSrc->info.nPayload); + if( pDest->pKeyInfo==0 ) aOut += putVarint(aOut, iKey); + nIn = pSrc->info.nLocal; + aIn = pSrc->info.pPayload; + if( aIn+nIn>pSrc->pPage->aDataEnd ){ + return SQLITE_CORRUPT_BKPT; + } + nRem = pSrc->info.nPayload; + if( nIn==nRem && nInpPage->maxLocal ){ + memcpy(aOut, aIn, nIn); + pBt->nPreformatSize = nIn + (aOut - pBt->pTmpSpace); + }else{ + Pager *pSrcPager = pSrc->pBt->pPager; + u8 *pPgnoOut = 0; + Pgno ovflIn = 0; + DbPage *pPageIn = 0; + MemPage *pPageOut = 0; + u32 nOut; /* Size of output buffer aOut[] */ + + nOut = btreePayloadToLocal(pDest->pPage, pSrc->info.nPayload); + pBt->nPreformatSize = nOut + (aOut - pBt->pTmpSpace); + if( nOutinfo.nPayload ){ + pPgnoOut = &aOut[nOut]; + pBt->nPreformatSize += 4; + } + + if( nRem>nIn ){ + if( aIn+nIn+4>pSrc->pPage->aDataEnd ){ + return SQLITE_CORRUPT_BKPT; + } + ovflIn = get4byte(&pSrc->info.pPayload[nIn]); + } + + do { + nRem -= nOut; + do{ + assert( nOut>0 ); + if( nIn>0 ){ + int nCopy = MIN(nOut, nIn); + memcpy(aOut, aIn, nCopy); + nOut -= nCopy; + nIn -= nCopy; + aOut += nCopy; + aIn += nCopy; + } + if( nOut>0 ){ + sqlite3PagerUnref(pPageIn); + pPageIn = 0; + rc = sqlite3PagerGet(pSrcPager, ovflIn, &pPageIn, PAGER_GET_READONLY); + if( rc==SQLITE_OK ){ + aIn = (const u8*)sqlite3PagerGetData(pPageIn); + ovflIn = get4byte(aIn); + aIn += 4; + nIn = pSrc->pBt->usableSize - 4; + } + } + }while( rc==SQLITE_OK && nOut>0 ); + + if( rc==SQLITE_OK && nRem>0 && ALWAYS(pPgnoOut) ){ + Pgno pgnoNew; + MemPage *pNew = 0; + rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); + put4byte(pPgnoOut, pgnoNew); + if( ISAUTOVACUUM && pPageOut ){ + ptrmapPut(pBt, pgnoNew, PTRMAP_OVERFLOW2, pPageOut->pgno, &rc); + } + releasePage(pPageOut); + pPageOut = pNew; + if( pPageOut ){ + pPgnoOut = pPageOut->aData; + put4byte(pPgnoOut, 0); + aOut = &pPgnoOut[4]; + nOut = MIN(pBt->usableSize - 4, nRem); + } + } + }while( nRem>0 && rc==SQLITE_OK ); + + releasePage(pPageOut); + sqlite3PagerUnref(pPageIn); + } + + return rc; +} + +/* +** Delete the entry that the cursor is pointing to. +** +** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then +** the cursor is left pointing at an arbitrary location after the delete. +** But if that bit is set, then the cursor is left in a state such that +** the next call to BtreeNext() or BtreePrev() moves it to the same row +** as it would have been on if the call to BtreeDelete() had been omitted. +** +** The BTREE_AUXDELETE bit of flags indicates that is one of several deletes +** associated with a single table entry and its indexes. Only one of those +** deletes is considered the "primary" delete. The primary delete occurs +** on a cursor that is not a BTREE_FORDELETE cursor. All but one delete +** operation on non-FORDELETE cursors is tagged with the AUXDELETE flag. +** The BTREE_AUXDELETE bit is a hint that is not used by this implementation, +** but which might be used by alternative storage engines. +*/ +SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){ + Btree *p = pCur->pBtree; + BtShared *pBt = p->pBt; + int rc; /* Return code */ + MemPage *pPage; /* Page to delete cell from */ + unsigned char *pCell; /* Pointer to cell to delete */ + int iCellIdx; /* Index of cell to delete */ + int iCellDepth; /* Depth of node containing pCell */ + CellInfo info; /* Size of the cell being deleted */ + u8 bPreserve; /* Keep cursor valid. 2 for CURSOR_SKIPNEXT */ + + assert( cursorOwnsBtShared(pCur) ); + assert( pBt->inTransaction==TRANS_WRITE ); + assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); + assert( pCur->curFlags & BTCF_WriteFlag ); + assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); + assert( !hasReadConflicts(p, pCur->pgnoRoot) ); + assert( (flags & ~(BTREE_SAVEPOSITION | BTREE_AUXDELETE))==0 ); + if( pCur->eState!=CURSOR_VALID ){ + if( pCur->eState>=CURSOR_REQUIRESEEK ){ + rc = btreeRestoreCursorPosition(pCur); + assert( rc!=SQLITE_OK || CORRUPT_DB || pCur->eState==CURSOR_VALID ); + if( rc || pCur->eState!=CURSOR_VALID ) return rc; + }else{ + return SQLITE_CORRUPT_BKPT; + } + } + assert( pCur->eState==CURSOR_VALID ); + + iCellDepth = pCur->iPage; + iCellIdx = pCur->ix; + pPage = pCur->pPage; + if( pPage->nCell<=iCellIdx ){ + return SQLITE_CORRUPT_BKPT; + } + pCell = findCell(pPage, iCellIdx); + if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ){ + return SQLITE_CORRUPT_BKPT; + } + + /* If the BTREE_SAVEPOSITION bit is on, then the cursor position must + ** be preserved following this delete operation. If the current delete + ** will cause a b-tree rebalance, then this is done by saving the cursor + ** key and leaving the cursor in CURSOR_REQUIRESEEK state before + ** returning. + ** + ** If the current delete will not cause a rebalance, then the cursor + ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately + ** before or after the deleted entry. + ** + ** The bPreserve value records which path is required: + ** + ** bPreserve==0 Not necessary to save the cursor position + ** bPreserve==1 Use CURSOR_REQUIRESEEK to save the cursor position + ** bPreserve==2 Cursor won't move. Set CURSOR_SKIPNEXT. + */ + bPreserve = (flags & BTREE_SAVEPOSITION)!=0; + if( bPreserve ){ + if( !pPage->leaf + || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3) + || pPage->nCell==1 /* See dbfuzz001.test for a test case */ + ){ + /* A b-tree rebalance will be required after deleting this entry. + ** Save the cursor key. */ + rc = saveCursorKey(pCur); + if( rc ) return rc; + }else{ + bPreserve = 2; + } + } + + /* If the page containing the entry to delete is not a leaf page, move + ** the cursor to the largest entry in the tree that is smaller than + ** the entry being deleted. This cell will replace the cell being deleted + ** from the internal node. The 'previous' entry is used for this instead + ** of the 'next' entry, as the previous entry is always a part of the + ** sub-tree headed by the child page of the cell being deleted. This makes + ** balancing the tree following the delete operation easier. */ + if( !pPage->leaf ){ + rc = sqlite3BtreePrevious(pCur, 0); + assert( rc!=SQLITE_DONE ); + if( rc ) return rc; + } + + /* Save the positions of any other cursors open on this table before + ** making any modifications. */ + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } + + /* If this is a delete operation to remove a row from a table b-tree, + ** invalidate any incrblob cursors open on the row being deleted. */ + if( pCur->pKeyInfo==0 && p->hasIncrblobCur ){ + invalidateIncrblobCursors(p, pCur->pgnoRoot, pCur->info.nKey, 0); + } + + /* Make the page containing the entry to be deleted writable. Then free any + ** overflow pages associated with the entry and finally remove the cell + ** itself from within the page. */ + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc ) return rc; + BTREE_CLEAR_CELL(rc, pPage, pCell, info); + dropCell(pPage, iCellIdx, info.nSize, &rc); + if( rc ) return rc; + + /* If the cell deleted was not located on a leaf page, then the cursor + ** is currently pointing to the largest entry in the sub-tree headed + ** by the child-page of the cell that was just deleted from an internal + ** node. The cell from the leaf node needs to be moved to the internal + ** node to replace the deleted cell. */ + if( !pPage->leaf ){ + MemPage *pLeaf = pCur->pPage; + int nCell; + Pgno n; + unsigned char *pTmp; + + if( pLeaf->nFree<0 ){ + rc = btreeComputeFreeSpace(pLeaf); + if( rc ) return rc; + } + if( iCellDepthiPage-1 ){ + n = pCur->apPage[iCellDepth+1]->pgno; + }else{ + n = pCur->pPage->pgno; + } + pCell = findCell(pLeaf, pLeaf->nCell-1); + if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; + nCell = pLeaf->xCellSize(pLeaf, pCell); + assert( MX_CELL_SIZE(pBt) >= nCell ); + pTmp = pBt->pTmpSpace; + assert( pTmp!=0 ); + rc = sqlite3PagerWrite(pLeaf->pDbPage); + if( rc==SQLITE_OK ){ + insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); + } + dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); + if( rc ) return rc; + } + + /* Balance the tree. If the entry deleted was located on a leaf page, + ** then the cursor still points to that page. In this case the first + ** call to balance() repairs the tree, and the if(...) condition is + ** never true. + ** + ** Otherwise, if the entry deleted was on an internal node page, then + ** pCur is pointing to the leaf page from which a cell was removed to + ** replace the cell deleted from the internal node. This is slightly + ** tricky as the leaf node may be underfull, and the internal node may + ** be either under or overfull. In this case run the balancing algorithm + ** on the leaf node first. If the balance proceeds far enough up the + ** tree that we can be sure that any problem in the internal node has + ** been corrected, so be it. Otherwise, after balancing the leaf node, + ** walk the cursor up the tree to the internal node and balance it as + ** well. */ + rc = balance(pCur); + if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){ + releasePageNotNull(pCur->pPage); + pCur->iPage--; + while( pCur->iPage>iCellDepth ){ + releasePage(pCur->apPage[pCur->iPage--]); + } + pCur->pPage = pCur->apPage[pCur->iPage]; + rc = balance(pCur); + } + + if( rc==SQLITE_OK ){ + if( bPreserve>1 ){ + assert( (pCur->iPage==iCellDepth || CORRUPT_DB) ); + assert( pPage==pCur->pPage || CORRUPT_DB ); + assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell ); + pCur->eState = CURSOR_SKIPNEXT; + if( iCellIdx>=pPage->nCell ){ + pCur->skipNext = -1; + pCur->ix = pPage->nCell-1; + }else{ + pCur->skipNext = 1; + } + }else{ + rc = moveToRoot(pCur); + if( bPreserve ){ + btreeReleaseAllCursorPages(pCur); + pCur->eState = CURSOR_REQUIRESEEK; + } + if( rc==SQLITE_EMPTY ) rc = SQLITE_OK; + } + } + return rc; +} + +/* +** Create a new BTree table. Write into *piTable the page +** number for the root page of the new table. +** +** The type of type is determined by the flags parameter. Only the +** following values of flags are currently in use. Other values for +** flags might not work: +** +** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys +** BTREE_ZERODATA Used for SQL indices +*/ +static int btreeCreateTable(Btree *p, Pgno *piTable, int createTabFlags){ + BtShared *pBt = p->pBt; + MemPage *pRoot; + Pgno pgnoRoot; + int rc; + int ptfFlags; /* Page-type flage for the root page of new table */ + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( pBt->inTransaction==TRANS_WRITE ); + assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); + +#ifdef SQLITE_OMIT_AUTOVACUUM + rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); + if( rc ){ + return rc; + } +#else + if( pBt->autoVacuum ){ + Pgno pgnoMove; /* Move a page here to make room for the root-page */ + MemPage *pPageMove; /* The page to move to. */ + + /* Creating a new table may probably require moving an existing database + ** to make room for the new tables root page. In case this page turns + ** out to be an overflow page, delete all overflow page-map caches + ** held by open cursors. + */ + invalidateAllOverflowCache(pBt); + + /* Read the value of meta[3] from the database to determine where the + ** root page of the new table should go. meta[3] is the largest root-page + ** created so far, so the new root-page is (meta[3]+1). + */ + sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot); + if( pgnoRoot>btreePagecount(pBt) ){ + return SQLITE_CORRUPT_BKPT; + } + pgnoRoot++; + + /* The new root-page may not be allocated on a pointer-map page, or the + ** PENDING_BYTE page. + */ + while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || + pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ + pgnoRoot++; + } + assert( pgnoRoot>=3 ); + + /* Allocate a page. The page that currently resides at pgnoRoot will + ** be moved to the allocated page (unless the allocated page happens + ** to reside at pgnoRoot). + */ + rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); + if( rc!=SQLITE_OK ){ + return rc; + } + + if( pgnoMove!=pgnoRoot ){ + /* pgnoRoot is the page that will be used for the root-page of + ** the new table (assuming an error did not occur). But we were + ** allocated pgnoMove. If required (i.e. if it was not allocated + ** by extending the file), the current page at position pgnoMove + ** is already journaled. + */ + u8 eType = 0; + Pgno iPtrPage = 0; + + /* Save the positions of any open cursors. This is required in + ** case they are holding a reference to an xFetch reference + ** corresponding to page pgnoRoot. */ + rc = saveAllCursors(pBt, 0, 0); + releasePage(pPageMove); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Move the page currently at pgnoRoot to pgnoMove. */ + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); + if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ + rc = SQLITE_CORRUPT_BKPT; + } + if( rc!=SQLITE_OK ){ + releasePage(pRoot); + return rc; + } + assert( eType!=PTRMAP_ROOTPAGE ); + assert( eType!=PTRMAP_FREEPAGE ); + rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0); + releasePage(pRoot); + + /* Obtain the page at pgnoRoot */ + if( rc!=SQLITE_OK ){ + return rc; + } + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = sqlite3PagerWrite(pRoot->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pRoot); + return rc; + } + }else{ + pRoot = pPageMove; + } + + /* Update the pointer-map and meta-data with the new root-page number. */ + ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc); + if( rc ){ + releasePage(pRoot); + return rc; + } + + /* When the new root page was allocated, page 1 was made writable in + ** order either to increase the database filesize, or to decrement the + ** freelist count. Hence, the sqlite3BtreeUpdateMeta() call cannot fail. + */ + assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) ); + rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot); + if( NEVER(rc) ){ + releasePage(pRoot); + return rc; + } + + }else{ + rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); + if( rc ) return rc; + } +#endif + assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); + if( createTabFlags & BTREE_INTKEY ){ + ptfFlags = PTF_INTKEY | PTF_LEAFDATA | PTF_LEAF; + }else{ + ptfFlags = PTF_ZERODATA | PTF_LEAF; + } + zeroPage(pRoot, ptfFlags); + sqlite3PagerUnref(pRoot->pDbPage); + assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 ); + *piTable = pgnoRoot; + return SQLITE_OK; +} +SQLITE_PRIVATE int sqlite3BtreeCreateTable(Btree *p, Pgno *piTable, int flags){ + int rc; + sqlite3BtreeEnter(p); + rc = btreeCreateTable(p, piTable, flags); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Erase the given database page and all its children. Return +** the page to the freelist. +*/ +static int clearDatabasePage( + BtShared *pBt, /* The BTree that contains the table */ + Pgno pgno, /* Page number to clear */ + int freePageFlag, /* Deallocate page if true */ + i64 *pnChange /* Add number of Cells freed to this counter */ +){ + MemPage *pPage; + int rc; + unsigned char *pCell; + int i; + int hdr; + CellInfo info; + + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pgno>btreePagecount(pBt) ){ + return SQLITE_CORRUPT_BKPT; + } + rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); + if( rc ) return rc; + if( (pBt->openFlags & BTREE_SINGLE)==0 + && sqlite3PagerPageRefcount(pPage->pDbPage) != (1 + (pgno==1)) + ){ + rc = SQLITE_CORRUPT_BKPT; + goto cleardatabasepage_out; + } + hdr = pPage->hdrOffset; + for(i=0; inCell; i++){ + pCell = findCell(pPage, i); + if( !pPage->leaf ){ + rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); + if( rc ) goto cleardatabasepage_out; + } + BTREE_CLEAR_CELL(rc, pPage, pCell, info); + if( rc ) goto cleardatabasepage_out; + } + if( !pPage->leaf ){ + rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); + if( rc ) goto cleardatabasepage_out; + if( pPage->intKey ) pnChange = 0; + } + if( pnChange ){ + testcase( !pPage->intKey ); + *pnChange += pPage->nCell; + } + if( freePageFlag ){ + freePage(pPage, &rc); + }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ + zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); + } + +cleardatabasepage_out: + releasePage(pPage); + return rc; +} + +/* +** Delete all information from a single table in the database. iTable is +** the page number of the root of the table. After this routine returns, +** the root page is empty, but still exists. +** +** This routine will fail with SQLITE_LOCKED if there are any open +** read cursors on the table. Open write cursors are moved to the +** root of the table. +** +** If pnChange is not NULL, then the integer value pointed to by pnChange +** is incremented by the number of entries in the table. +*/ +SQLITE_PRIVATE int sqlite3BtreeClearTable(Btree *p, int iTable, i64 *pnChange){ + int rc; + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + assert( p->inTrans==TRANS_WRITE ); + + rc = saveAllCursors(pBt, (Pgno)iTable, 0); + + if( SQLITE_OK==rc ){ + /* Invalidate all incrblob cursors open on table iTable (assuming iTable + ** is the root of a table b-tree - if it is not, the following call is + ** a no-op). */ + if( p->hasIncrblobCur ){ + invalidateIncrblobCursors(p, (Pgno)iTable, 0, 1); + } + rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Delete all information from the single table that pCur is open on. +** +** This routine only work for pCur on an ephemeral table. +*/ +SQLITE_PRIVATE int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){ + return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0); +} + +/* +** Erase all information in a table and add the root of the table to +** the freelist. Except, the root of the principle table (the one on +** page 1) is never added to the freelist. +** +** This routine will fail with SQLITE_LOCKED if there are any open +** cursors on the table. +** +** If AUTOVACUUM is enabled and the page at iTable is not the last +** root page in the database file, then the last root page +** in the database file is moved into the slot formerly occupied by +** iTable and that last slot formerly occupied by the last root page +** is added to the freelist instead of iTable. In this say, all +** root pages are kept at the beginning of the database file, which +** is necessary for AUTOVACUUM to work right. *piMoved is set to the +** page number that used to be the last root page in the file before +** the move. If no page gets moved, *piMoved is set to 0. +** The last root page is recorded in meta[3] and the value of +** meta[3] is updated by this procedure. +*/ +static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ + int rc; + MemPage *pPage = 0; + BtShared *pBt = p->pBt; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( p->inTrans==TRANS_WRITE ); + assert( iTable>=2 ); + if( iTable>btreePagecount(pBt) ){ + return SQLITE_CORRUPT_BKPT; + } + + rc = sqlite3BtreeClearTable(p, iTable, 0); + if( rc ) return rc; + rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); + if( NEVER(rc) ){ + releasePage(pPage); + return rc; + } + + *piMoved = 0; + +#ifdef SQLITE_OMIT_AUTOVACUUM + freePage(pPage, &rc); + releasePage(pPage); +#else + if( pBt->autoVacuum ){ + Pgno maxRootPgno; + sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno); + + if( iTable==maxRootPgno ){ + /* If the table being dropped is the table with the largest root-page + ** number in the database, put the root page on the free list. + */ + freePage(pPage, &rc); + releasePage(pPage); + if( rc!=SQLITE_OK ){ + return rc; + } + }else{ + /* The table being dropped does not have the largest root-page + ** number in the database. So move the page that does into the + ** gap left by the deleted root-page. + */ + MemPage *pMove; + releasePage(pPage); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0); + releasePage(pMove); + if( rc!=SQLITE_OK ){ + return rc; + } + pMove = 0; + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + freePage(pMove, &rc); + releasePage(pMove); + if( rc!=SQLITE_OK ){ + return rc; + } + *piMoved = maxRootPgno; + } + + /* Set the new 'max-root-page' value in the database header. This + ** is the old value less one, less one more if that happens to + ** be a root-page number, less one again if that is the + ** PENDING_BYTE_PAGE. + */ + maxRootPgno--; + while( maxRootPgno==PENDING_BYTE_PAGE(pBt) + || PTRMAP_ISPAGE(pBt, maxRootPgno) ){ + maxRootPgno--; + } + assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) ); + + rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno); + }else{ + freePage(pPage, &rc); + releasePage(pPage); + } +#endif + return rc; +} +SQLITE_PRIVATE int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ + int rc; + sqlite3BtreeEnter(p); + rc = btreeDropTable(p, iTable, piMoved); + sqlite3BtreeLeave(p); + return rc; +} + + +/* +** This function may only be called if the b-tree connection already +** has a read or write transaction open on the database. +** +** Read the meta-information out of a database file. Meta[0] +** is the number of free pages currently in the database. Meta[1] +** through meta[15] are available for use by higher layers. Meta[0] +** is read-only, the others are read/write. +** +** The schema layer numbers meta values differently. At the schema +** layer (and the SetCookie and ReadCookie opcodes) the number of +** free pages is not visible. So Cookie[0] is the same as Meta[1]. +** +** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead +** of reading the value out of the header, it instead loads the "DataVersion" +** from the pager. The BTREE_DATA_VERSION value is not actually stored in the +** database file. It is a number computed by the pager. But its access +** pattern is the same as header meta values, and so it is convenient to +** read it from this routine. +*/ +SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ + BtShared *pBt = p->pBt; + + sqlite3BtreeEnter(p); + assert( p->inTrans>TRANS_NONE ); + assert( SQLITE_OK==querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK) ); + assert( pBt->pPage1 ); + assert( idx>=0 && idx<=15 ); + + if( idx==BTREE_DATA_VERSION ){ + *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iBDataVersion; + }else{ + *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); + } + + /* If auto-vacuum is disabled in this build and this is an auto-vacuum + ** database, mark the database as read-only. */ +#ifdef SQLITE_OMIT_AUTOVACUUM + if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){ + pBt->btsFlags |= BTS_READ_ONLY; + } +#endif + + sqlite3BtreeLeave(p); +} + +/* +** Write meta-information back into the database. Meta[0] is +** read-only and may not be written. +*/ +SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ + BtShared *pBt = p->pBt; + unsigned char *pP1; + int rc; + assert( idx>=1 && idx<=15 ); + sqlite3BtreeEnter(p); + assert( p->inTrans==TRANS_WRITE ); + assert( pBt->pPage1!=0 ); + pP1 = pBt->pPage1->aData; + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc==SQLITE_OK ){ + put4byte(&pP1[36 + idx*4], iMeta); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( idx==BTREE_INCR_VACUUM ){ + assert( pBt->autoVacuum || iMeta==0 ); + assert( iMeta==0 || iMeta==1 ); + pBt->incrVacuum = (u8)iMeta; + } +#endif + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** The first argument, pCur, is a cursor opened on some b-tree. Count the +** number of entries in the b-tree and write the result to *pnEntry. +** +** SQLITE_OK is returned if the operation is successfully executed. +** Otherwise, if an error is encountered (i.e. an IO error or database +** corruption) an SQLite error code is returned. +*/ +SQLITE_PRIVATE int sqlite3BtreeCount(sqlite3 *db, BtCursor *pCur, i64 *pnEntry){ + i64 nEntry = 0; /* Value to return in *pnEntry */ + int rc; /* Return code */ + + rc = moveToRoot(pCur); + if( rc==SQLITE_EMPTY ){ + *pnEntry = 0; + return SQLITE_OK; + } + + /* Unless an error occurs, the following loop runs one iteration for each + ** page in the B-Tree structure (not including overflow pages). + */ + while( rc==SQLITE_OK && !AtomicLoad(&db->u1.isInterrupted) ){ + int iIdx; /* Index of child node in parent */ + MemPage *pPage; /* Current page of the b-tree */ + + /* If this is a leaf page or the tree is not an int-key tree, then + ** this page contains countable entries. Increment the entry counter + ** accordingly. + */ + pPage = pCur->pPage; + if( pPage->leaf || !pPage->intKey ){ + nEntry += pPage->nCell; + } + + /* pPage is a leaf node. This loop navigates the cursor so that it + ** points to the first interior cell that it points to the parent of + ** the next page in the tree that has not yet been visited. The + ** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell + ** of the page, or to the number of cells in the page if the next page + ** to visit is the right-child of its parent. + ** + ** If all pages in the tree have been visited, return SQLITE_OK to the + ** caller. + */ + if( pPage->leaf ){ + do { + if( pCur->iPage==0 ){ + /* All pages of the b-tree have been visited. Return successfully. */ + *pnEntry = nEntry; + return moveToRoot(pCur); + } + moveToParent(pCur); + }while ( pCur->ix>=pCur->pPage->nCell ); + + pCur->ix++; + pPage = pCur->pPage; + } + + /* Descend to the child node of the cell that the cursor currently + ** points at. This is the right-child if (iIdx==pPage->nCell). + */ + iIdx = pCur->ix; + if( iIdx==pPage->nCell ){ + rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); + }else{ + rc = moveToChild(pCur, get4byte(findCell(pPage, iIdx))); + } + } + + /* An error has occurred. Return an error code. */ + return rc; +} + +/* +** Return the pager associated with a BTree. This routine is used for +** testing and debugging only. +*/ +SQLITE_PRIVATE Pager *sqlite3BtreePager(Btree *p){ + return p->pBt->pPager; +} + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* +** Append a message to the error message string. +*/ +static void checkAppendMsg( + IntegrityCk *pCheck, + const char *zFormat, + ... +){ + va_list ap; + if( !pCheck->mxErr ) return; + pCheck->mxErr--; + pCheck->nErr++; + va_start(ap, zFormat); + if( pCheck->errMsg.nChar ){ + sqlite3_str_append(&pCheck->errMsg, "\n", 1); + } + if( pCheck->zPfx ){ + sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx, pCheck->v1, pCheck->v2); + } + sqlite3_str_vappendf(&pCheck->errMsg, zFormat, ap); + va_end(ap); + if( pCheck->errMsg.accError==SQLITE_NOMEM ){ + pCheck->bOomFault = 1; + } +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK + +/* +** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that +** corresponds to page iPg is already set. +*/ +static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){ + assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); + return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07))); +} + +/* +** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg. +*/ +static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){ + assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); + pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07)); +} + + +/* +** Add 1 to the reference count for page iPage. If this is the second +** reference to the page, add an error message to pCheck->zErrMsg. +** Return 1 if there are 2 or more references to the page and 0 if +** if this is the first reference to the page. +** +** Also check that the page number is in bounds. +*/ +static int checkRef(IntegrityCk *pCheck, Pgno iPage){ + if( iPage>pCheck->nPage || iPage==0 ){ + checkAppendMsg(pCheck, "invalid page number %d", iPage); + return 1; + } + if( getPageReferenced(pCheck, iPage) ){ + checkAppendMsg(pCheck, "2nd reference to page %d", iPage); + return 1; + } + if( AtomicLoad(&pCheck->db->u1.isInterrupted) ) return 1; + setPageReferenced(pCheck, iPage); + return 0; +} + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** Check that the entry in the pointer-map for page iChild maps to +** page iParent, pointer type ptrType. If not, append an error message +** to pCheck. +*/ +static void checkPtrmap( + IntegrityCk *pCheck, /* Integrity check context */ + Pgno iChild, /* Child page number */ + u8 eType, /* Expected pointer map type */ + Pgno iParent /* Expected pointer map parent page number */ +){ + int rc; + u8 ePtrmapType; + Pgno iPtrmapParent; + + rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->bOomFault = 1; + checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild); + return; + } + + if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ + checkAppendMsg(pCheck, + "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", + iChild, eType, iParent, ePtrmapType, iPtrmapParent); + } +} +#endif + +/* +** Check the integrity of the freelist or of an overflow page list. +** Verify that the number of pages on the list is N. +*/ +static void checkList( + IntegrityCk *pCheck, /* Integrity checking context */ + int isFreeList, /* True for a freelist. False for overflow page list */ + Pgno iPage, /* Page number for first page in the list */ + u32 N /* Expected number of pages in the list */ +){ + int i; + u32 expected = N; + int nErrAtStart = pCheck->nErr; + while( iPage!=0 && pCheck->mxErr ){ + DbPage *pOvflPage; + unsigned char *pOvflData; + if( checkRef(pCheck, iPage) ) break; + N--; + if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage, 0) ){ + checkAppendMsg(pCheck, "failed to get page %d", iPage); + break; + } + pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); + if( isFreeList ){ + u32 n = (u32)get4byte(&pOvflData[4]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pCheck->pBt->autoVacuum ){ + checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); + } +#endif + if( n>pCheck->pBt->usableSize/4-2 ){ + checkAppendMsg(pCheck, + "freelist leaf count too big on page %d", iPage); + N--; + }else{ + for(i=0; i<(int)n; i++){ + Pgno iFreePage = get4byte(&pOvflData[8+i*4]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pCheck->pBt->autoVacuum ){ + checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0); + } +#endif + checkRef(pCheck, iFreePage); + } + N -= n; + } + } +#ifndef SQLITE_OMIT_AUTOVACUUM + else{ + /* If this database supports auto-vacuum and iPage is not the last + ** page in this overflow list, check that the pointer-map entry for + ** the following page matches iPage. + */ + if( pCheck->pBt->autoVacuum && N>0 ){ + i = get4byte(pOvflData); + checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); + } + } +#endif + iPage = get4byte(pOvflData); + sqlite3PagerUnref(pOvflPage); + } + if( N && nErrAtStart==pCheck->nErr ){ + checkAppendMsg(pCheck, + "%s is %d but should be %d", + isFreeList ? "size" : "overflow list length", + expected-N, expected); + } +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +/* +** An implementation of a min-heap. +** +** aHeap[0] is the number of elements on the heap. aHeap[1] is the +** root element. The daughter nodes of aHeap[N] are aHeap[N*2] +** and aHeap[N*2+1]. +** +** The heap property is this: Every node is less than or equal to both +** of its daughter nodes. A consequence of the heap property is that the +** root node aHeap[1] is always the minimum value currently in the heap. +** +** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto +** the heap, preserving the heap property. The btreeHeapPull() routine +** removes the root element from the heap (the minimum value in the heap) +** and then moves other nodes around as necessary to preserve the heap +** property. +** +** This heap is used for cell overlap and coverage testing. Each u32 +** entry represents the span of a cell or freeblock on a btree page. +** The upper 16 bits are the index of the first byte of a range and the +** lower 16 bits are the index of the last byte of that range. +*/ +static void btreeHeapInsert(u32 *aHeap, u32 x){ + u32 j, i = ++aHeap[0]; + aHeap[i] = x; + while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){ + x = aHeap[j]; + aHeap[j] = aHeap[i]; + aHeap[i] = x; + i = j; + } +} +static int btreeHeapPull(u32 *aHeap, u32 *pOut){ + u32 j, i, x; + if( (x = aHeap[0])==0 ) return 0; + *pOut = aHeap[1]; + aHeap[1] = aHeap[x]; + aHeap[x] = 0xffffffff; + aHeap[0]--; + i = 1; + while( (j = i*2)<=aHeap[0] ){ + if( aHeap[j]>aHeap[j+1] ) j++; + if( aHeap[i]zPfx; + int saved_v1 = pCheck->v1; + int saved_v2 = pCheck->v2; + u8 savedIsInit = 0; + + /* Check that the page exists + */ + pBt = pCheck->pBt; + usableSize = pBt->usableSize; + if( iPage==0 ) return 0; + if( checkRef(pCheck, iPage) ) return 0; + pCheck->zPfx = "Page %u: "; + pCheck->v1 = iPage; + if( (rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0 ){ + checkAppendMsg(pCheck, + "unable to get the page. error code=%d", rc); + goto end_of_check; + } + + /* Clear MemPage.isInit to make sure the corruption detection code in + ** btreeInitPage() is executed. */ + savedIsInit = pPage->isInit; + pPage->isInit = 0; + if( (rc = btreeInitPage(pPage))!=0 ){ + assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ + checkAppendMsg(pCheck, + "btreeInitPage() returns error code %d", rc); + goto end_of_check; + } + if( (rc = btreeComputeFreeSpace(pPage))!=0 ){ + assert( rc==SQLITE_CORRUPT ); + checkAppendMsg(pCheck, "free space corruption", rc); + goto end_of_check; + } + data = pPage->aData; + hdr = pPage->hdrOffset; + + /* Set up for cell analysis */ + pCheck->zPfx = "On tree page %u cell %d: "; + contentOffset = get2byteNotZero(&data[hdr+5]); + assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ + + /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the + ** number of cells on the page. */ + nCell = get2byte(&data[hdr+3]); + assert( pPage->nCell==nCell ); + + /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page + ** immediately follows the b-tree page header. */ + cellStart = hdr + 12 - 4*pPage->leaf; + assert( pPage->aCellIdx==&data[cellStart] ); + pCellIdx = &data[cellStart + 2*(nCell-1)]; + + if( !pPage->leaf ){ + /* Analyze the right-child page of internal pages */ + pgno = get4byte(&data[hdr+8]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + pCheck->zPfx = "On page %u at right child: "; + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); + } +#endif + depth = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + }else{ + /* For leaf pages, the coverage check will occur in the same loop + ** as the other cell checks, so initialize the heap. */ + heap = pCheck->heap; + heap[0] = 0; + } + + /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte + ** integer offsets to the cell contents. */ + for(i=nCell-1; i>=0 && pCheck->mxErr; i--){ + CellInfo info; + + /* Check cell size */ + pCheck->v2 = i; + assert( pCellIdx==&data[cellStart + i*2] ); + pc = get2byteAligned(pCellIdx); + pCellIdx -= 2; + if( pcusableSize-4 ){ + checkAppendMsg(pCheck, "Offset %d out of range %d..%d", + pc, contentOffset, usableSize-4); + doCoverageCheck = 0; + continue; + } + pCell = &data[pc]; + pPage->xParseCell(pPage, pCell, &info); + if( pc+info.nSize>usableSize ){ + checkAppendMsg(pCheck, "Extends off end of page"); + doCoverageCheck = 0; + continue; + } + + /* Check for integer primary key out of range */ + if( pPage->intKey ){ + if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){ + checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey); + } + maxKey = info.nKey; + keyCanBeEqual = 0; /* Only the first key on the page may ==maxKey */ + } + + /* Check the content overflow list */ + if( info.nPayload>info.nLocal ){ + u32 nPage; /* Number of pages on the overflow chain */ + Pgno pgnoOvfl; /* First page of the overflow chain */ + assert( pc + info.nSize - 4 <= usableSize ); + nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4); + pgnoOvfl = get4byte(&pCell[info.nSize - 4]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); + } +#endif + checkList(pCheck, 0, pgnoOvfl, nPage); + } + + if( !pPage->leaf ){ + /* Check sanity of left child page for internal pages */ + pgno = get4byte(pCell); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); + } +#endif + d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + if( d2!=depth ){ + checkAppendMsg(pCheck, "Child page depth differs"); + depth = d2; + } + }else{ + /* Populate the coverage-checking heap for leaf pages */ + btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1)); + } + } + *piMinKey = maxKey; + + /* Check for complete coverage of the page + */ + pCheck->zPfx = 0; + if( doCoverageCheck && pCheck->mxErr>0 ){ + /* For leaf pages, the min-heap has already been initialized and the + ** cells have already been inserted. But for internal pages, that has + ** not yet been done, so do it now */ + if( !pPage->leaf ){ + heap = pCheck->heap; + heap[0] = 0; + for(i=nCell-1; i>=0; i--){ + u32 size; + pc = get2byteAligned(&data[cellStart+i*2]); + size = pPage->xCellSize(pPage, &data[pc]); + btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); + } + } + /* Add the freeblocks to the min-heap + ** + ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header + ** is the offset of the first freeblock, or zero if there are no + ** freeblocks on the page. + */ + i = get2byte(&data[hdr+1]); + while( i>0 ){ + int size, j; + assert( (u32)i<=usableSize-4 ); /* Enforced by btreeComputeFreeSpace() */ + size = get2byte(&data[i+2]); + assert( (u32)(i+size)<=usableSize ); /* due to btreeComputeFreeSpace() */ + btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1)); + /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a + ** big-endian integer which is the offset in the b-tree page of the next + ** freeblock in the chain, or zero if the freeblock is the last on the + ** chain. */ + j = get2byte(&data[i]); + /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of + ** increasing offset. */ + assert( j==0 || j>i+size ); /* Enforced by btreeComputeFreeSpace() */ + assert( (u32)j<=usableSize-4 ); /* Enforced by btreeComputeFreeSpace() */ + i = j; + } + /* Analyze the min-heap looking for overlap between cells and/or + ** freeblocks, and counting the number of untracked bytes in nFrag. + ** + ** Each min-heap entry is of the form: (start_address<<16)|end_address. + ** There is an implied first entry the covers the page header, the cell + ** pointer index, and the gap between the cell pointer index and the start + ** of cell content. + ** + ** The loop below pulls entries from the min-heap in order and compares + ** the start_address against the previous end_address. If there is an + ** overlap, that means bytes are used multiple times. If there is a gap, + ** that gap is added to the fragmentation count. + */ + nFrag = 0; + prev = contentOffset - 1; /* Implied first min-heap entry */ + while( btreeHeapPull(heap,&x) ){ + if( (prev&0xffff)>=(x>>16) ){ + checkAppendMsg(pCheck, + "Multiple uses for byte %u of page %u", x>>16, iPage); + break; + }else{ + nFrag += (x>>16) - (prev&0xffff) - 1; + prev = x; + } + } + nFrag += usableSize - (prev&0xffff) - 1; + /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments + ** is stored in the fifth field of the b-tree page header. + ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the + ** number of fragmented free bytes within the cell content area. + */ + if( heap[0]==0 && nFrag!=data[hdr+7] ){ + checkAppendMsg(pCheck, + "Fragmentation of %d bytes reported as %d on page %u", + nFrag, data[hdr+7], iPage); + } + } + +end_of_check: + if( !doCoverageCheck ) pPage->isInit = savedIsInit; + releasePage(pPage); + pCheck->zPfx = saved_zPfx; + pCheck->v1 = saved_v1; + pCheck->v2 = saved_v2; + return depth+1; +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* +** This routine does a complete check of the given BTree file. aRoot[] is +** an array of pages numbers were each page number is the root page of +** a table. nRoot is the number of entries in aRoot. +** +** A read-only or read-write transaction must be opened before calling +** this function. +** +** Write the number of error seen in *pnErr. Except for some memory +** allocation errors, an error message held in memory obtained from +** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is +** returned. If a memory allocation error occurs, NULL is returned. +** +** If the first entry in aRoot[] is 0, that indicates that the list of +** root pages is incomplete. This is a "partial integrity-check". This +** happens when performing an integrity check on a single table. The +** zero is skipped, of course. But in addition, the freelist checks +** and the checks to make sure every page is referenced are also skipped, +** since obviously it is not possible to know which pages are covered by +** the unverified btrees. Except, if aRoot[1] is 1, then the freelist +** checks are still performed. +*/ +SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( + sqlite3 *db, /* Database connection that is running the check */ + Btree *p, /* The btree to be checked */ + Pgno *aRoot, /* An array of root pages numbers for individual trees */ + int nRoot, /* Number of entries in aRoot[] */ + int mxErr, /* Stop reporting errors after this many */ + int *pnErr /* Write number of errors seen to this variable */ +){ + Pgno i; + IntegrityCk sCheck; + BtShared *pBt = p->pBt; + u64 savedDbFlags = pBt->db->flags; + char zErr[100]; + int bPartial = 0; /* True if not checking all btrees */ + int bCkFreelist = 1; /* True to scan the freelist */ + VVA_ONLY( int nRef ); + assert( nRoot>0 ); + + /* aRoot[0]==0 means this is a partial check */ + if( aRoot[0]==0 ){ + assert( nRoot>1 ); + bPartial = 1; + if( aRoot[1]!=1 ) bCkFreelist = 0; + } + + sqlite3BtreeEnter(p); + assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); + VVA_ONLY( nRef = sqlite3PagerRefcount(pBt->pPager) ); + assert( nRef>=0 ); + sCheck.db = db; + sCheck.pBt = pBt; + sCheck.pPager = pBt->pPager; + sCheck.nPage = btreePagecount(sCheck.pBt); + sCheck.mxErr = mxErr; + sCheck.nErr = 0; + sCheck.bOomFault = 0; + sCheck.zPfx = 0; + sCheck.v1 = 0; + sCheck.v2 = 0; + sCheck.aPgRef = 0; + sCheck.heap = 0; + sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); + sCheck.errMsg.printfFlags = SQLITE_PRINTF_INTERNAL; + if( sCheck.nPage==0 ){ + goto integrity_ck_cleanup; + } + + sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); + if( !sCheck.aPgRef ){ + sCheck.bOomFault = 1; + goto integrity_ck_cleanup; + } + sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); + if( sCheck.heap==0 ){ + sCheck.bOomFault = 1; + goto integrity_ck_cleanup; + } + + i = PENDING_BYTE_PAGE(pBt); + if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); + + /* Check the integrity of the freelist + */ + if( bCkFreelist ){ + sCheck.zPfx = "Main freelist: "; + checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), + get4byte(&pBt->pPage1->aData[36])); + sCheck.zPfx = 0; + } + + /* Check all the tables. + */ +#ifndef SQLITE_OMIT_AUTOVACUUM + if( !bPartial ){ + if( pBt->autoVacuum ){ + Pgno mx = 0; + Pgno mxInHdr; + for(i=0; (int)ipPage1->aData[52]); + if( mx!=mxInHdr ){ + checkAppendMsg(&sCheck, + "max rootpage (%d) disagrees with header (%d)", + mx, mxInHdr + ); + } + }else if( get4byte(&pBt->pPage1->aData[64])!=0 ){ + checkAppendMsg(&sCheck, + "incremental_vacuum enabled with a max rootpage of zero" + ); + } + } +#endif + testcase( pBt->db->flags & SQLITE_CellSizeCk ); + pBt->db->flags &= ~(u64)SQLITE_CellSizeCk; + for(i=0; (int)iautoVacuum && aRoot[i]>1 && !bPartial ){ + checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); + } +#endif + checkTreePage(&sCheck, aRoot[i], ¬Used, LARGEST_INT64); + } + pBt->db->flags = savedDbFlags; + + /* Make sure every page in the file is referenced + */ + if( !bPartial ){ + for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ +#ifdef SQLITE_OMIT_AUTOVACUUM + if( getPageReferenced(&sCheck, i)==0 ){ + checkAppendMsg(&sCheck, "Page %d is never used", i); + } +#else + /* If the database supports auto-vacuum, make sure no tables contain + ** references to pointer-map pages. + */ + if( getPageReferenced(&sCheck, i)==0 && + (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ + checkAppendMsg(&sCheck, "Page %d is never used", i); + } + if( getPageReferenced(&sCheck, i)!=0 && + (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ + checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); + } +#endif + } + } + + /* Clean up and report errors. + */ +integrity_ck_cleanup: + sqlite3PageFree(sCheck.heap); + sqlite3_free(sCheck.aPgRef); + if( sCheck.bOomFault ){ + sqlite3_str_reset(&sCheck.errMsg); + sCheck.nErr++; + } + *pnErr = sCheck.nErr; + if( sCheck.nErr==0 ) sqlite3_str_reset(&sCheck.errMsg); + /* Make sure this analysis did not leave any unref() pages. */ + assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); + sqlite3BtreeLeave(p); + return sqlite3StrAccumFinish(&sCheck.errMsg); +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +/* +** Return the full pathname of the underlying database file. Return +** an empty string if the database is in-memory or a TEMP database. +** +** The pager filename is invariant as long as the pager is +** open so it is safe to access without the BtShared mutex. +*/ +SQLITE_PRIVATE const char *sqlite3BtreeGetFilename(Btree *p){ + assert( p->pBt->pPager!=0 ); + return sqlite3PagerFilename(p->pBt->pPager, 1); +} + +/* +** Return the pathname of the journal file for this database. The return +** value of this routine is the same regardless of whether the journal file +** has been created or not. +** +** The pager journal filename is invariant as long as the pager is +** open so it is safe to access without the BtShared mutex. +*/ +SQLITE_PRIVATE const char *sqlite3BtreeGetJournalname(Btree *p){ + assert( p->pBt->pPager!=0 ); + return sqlite3PagerJournalname(p->pBt->pPager); +} + +/* +** Return one of SQLITE_TXN_NONE, SQLITE_TXN_READ, or SQLITE_TXN_WRITE +** to describe the current transaction state of Btree p. +*/ +SQLITE_PRIVATE int sqlite3BtreeTxnState(Btree *p){ + assert( p==0 || sqlite3_mutex_held(p->db->mutex) ); + return p ? p->inTrans : 0; +} + +#ifndef SQLITE_OMIT_WAL +/* +** Run a checkpoint on the Btree passed as the first argument. +** +** Return SQLITE_LOCKED if this or any other connection has an open +** transaction on the shared-cache the argument Btree is connected to. +** +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. +*/ +SQLITE_PRIVATE int sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ + int rc = SQLITE_OK; + if( p ){ + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + if( pBt->inTransaction!=TRANS_NONE ){ + rc = SQLITE_LOCKED; + }else{ + rc = sqlite3PagerCheckpoint(pBt->pPager, p->db, eMode, pnLog, pnCkpt); + } + sqlite3BtreeLeave(p); + } + return rc; +} +#endif + +/* +** Return true if there is currently a backup running on Btree p. +*/ +SQLITE_PRIVATE int sqlite3BtreeIsInBackup(Btree *p){ + assert( p ); + assert( sqlite3_mutex_held(p->db->mutex) ); + return p->nBackup!=0; +} + +/* +** This function returns a pointer to a blob of memory associated with +** a single shared-btree. The memory is used by client code for its own +** purposes (for example, to store a high-level schema associated with +** the shared-btree). The btree layer manages reference counting issues. +** +** The first time this is called on a shared-btree, nBytes bytes of memory +** are allocated, zeroed, and returned to the caller. For each subsequent +** call the nBytes parameter is ignored and a pointer to the same blob +** of memory returned. +** +** If the nBytes parameter is 0 and the blob of memory has not yet been +** allocated, a null pointer is returned. If the blob has already been +** allocated, it is returned as normal. +** +** Just before the shared-btree is closed, the function passed as the +** xFree argument when the memory allocation was made is invoked on the +** blob of allocated memory. The xFree function should not call sqlite3_free() +** on the memory, the btree layer does that. +*/ +SQLITE_PRIVATE void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){ + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + if( !pBt->pSchema && nBytes ){ + pBt->pSchema = sqlite3DbMallocZero(0, nBytes); + pBt->xFreeSchema = xFree; + } + sqlite3BtreeLeave(p); + return pBt->pSchema; +} + +/* +** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared +** btree as the argument handle holds an exclusive lock on the +** sqlite_schema table. Otherwise SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3BtreeSchemaLocked(Btree *p){ + int rc; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK); + assert( rc==SQLITE_OK || rc==SQLITE_LOCKED_SHAREDCACHE ); + sqlite3BtreeLeave(p); + return rc; +} + + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Obtain a lock on the table whose root page is iTab. The +** lock is a write lock if isWritelock is true or a read lock +** if it is false. +*/ +SQLITE_PRIVATE int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ + int rc = SQLITE_OK; + assert( p->inTrans!=TRANS_NONE ); + if( p->sharable ){ + u8 lockType = READ_LOCK + isWriteLock; + assert( READ_LOCK+1==WRITE_LOCK ); + assert( isWriteLock==0 || isWriteLock==1 ); + + sqlite3BtreeEnter(p); + rc = querySharedCacheTableLock(p, iTab, lockType); + if( rc==SQLITE_OK ){ + rc = setSharedCacheTableLock(p, iTab, lockType); + } + sqlite3BtreeLeave(p); + } + return rc; +} +#endif + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** Argument pCsr must be a cursor opened for writing on an +** INTKEY table currently pointing at a valid table entry. +** This function modifies the data stored as part of that entry. +** +** Only the data content may only be modified, it is not possible to +** change the length of the data stored. If this function is called with +** parameters that attempt to write past the end of the existing data, +** no modifications are made and SQLITE_CORRUPT is returned. +*/ +SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ + int rc; + assert( cursorOwnsBtShared(pCsr) ); + assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); + assert( pCsr->curFlags & BTCF_Incrblob ); + + rc = restoreCursorPosition(pCsr); + if( rc!=SQLITE_OK ){ + return rc; + } + assert( pCsr->eState!=CURSOR_REQUIRESEEK ); + if( pCsr->eState!=CURSOR_VALID ){ + return SQLITE_ABORT; + } + + /* Save the positions of all other cursors open on this table. This is + ** required in case any of them are holding references to an xFetch + ** version of the b-tree page modified by the accessPayload call below. + ** + ** Note that pCsr must be open on a INTKEY table and saveCursorPosition() + ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence + ** saveAllCursors can only return SQLITE_OK. + */ + VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); + assert( rc==SQLITE_OK ); + + /* Check some assumptions: + ** (a) the cursor is open for writing, + ** (b) there is a read/write transaction open, + ** (c) the connection holds a write-lock on the table (if required), + ** (d) there are no conflicting read-locks, and + ** (e) the cursor points at a valid row of an intKey table. + */ + if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){ + return SQLITE_READONLY; + } + assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0 + && pCsr->pBt->inTransaction==TRANS_WRITE ); + assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, 0, 2) ); + assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); + assert( pCsr->pPage->intKey ); + + return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); +} + +/* +** Mark this cursor as an incremental blob cursor. +*/ +SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ + pCur->curFlags |= BTCF_Incrblob; + pCur->pBtree->hasIncrblobCur = 1; +} +#endif + +/* +** Set both the "read version" (single byte at byte offset 18) and +** "write version" (single byte at byte offset 19) fields in the database +** header to iVersion. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ + BtShared *pBt = pBtree->pBt; + int rc; /* Return code */ + + assert( iVersion==1 || iVersion==2 ); + + /* If setting the version fields to 1, do not automatically open the + ** WAL connection, even if the version fields are currently set to 2. + */ + pBt->btsFlags &= ~BTS_NO_WAL; + if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL; + + rc = sqlite3BtreeBeginTrans(pBtree, 0, 0); + if( rc==SQLITE_OK ){ + u8 *aData = pBt->pPage1->aData; + if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){ + rc = sqlite3BtreeBeginTrans(pBtree, 2, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc==SQLITE_OK ){ + aData[18] = (u8)iVersion; + aData[19] = (u8)iVersion; + } + } + } + } + + pBt->btsFlags &= ~BTS_NO_WAL; + return rc; +} + +/* +** Return true if the cursor has a hint specified. This routine is +** only used from within assert() statements +*/ +SQLITE_PRIVATE int sqlite3BtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){ + return (pCsr->hints & mask)!=0; +} + +/* +** Return true if the given Btree is read-only. +*/ +SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *p){ + return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; +} + +/* +** Return the size of the header added to each page by this module. +*/ +SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } + +#if !defined(SQLITE_OMIT_SHARED_CACHE) +/* +** Return true if the Btree passed as the only argument is sharable. +*/ +SQLITE_PRIVATE int sqlite3BtreeSharable(Btree *p){ + return p->sharable; +} + +/* +** Return the number of connections to the BtShared object accessed by +** the Btree handle passed as the only argument. For private caches +** this is always 1. For shared caches it may be 1 or greater. +*/ +SQLITE_PRIVATE int sqlite3BtreeConnectionCount(Btree *p){ + testcase( p->sharable ); + return p->pBt->nRef; +} +#endif + +/************** End of btree.c ***********************************************/ +/************** Begin file backup.c ******************************************/ +/* +** 2009 January 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the implementation of the sqlite3_backup_XXX() +** API functions and the related features. +*/ +/* #include "sqliteInt.h" */ +/* #include "btreeInt.h" */ + +/* +** Structure allocated for each backup operation. +*/ +struct sqlite3_backup { + sqlite3* pDestDb; /* Destination database handle */ + Btree *pDest; /* Destination b-tree file */ + u32 iDestSchema; /* Original schema cookie in destination */ + int bDestLocked; /* True once a write-transaction is open on pDest */ + + Pgno iNext; /* Page number of the next source page to copy */ + sqlite3* pSrcDb; /* Source database handle */ + Btree *pSrc; /* Source b-tree file */ + + int rc; /* Backup process error code */ + + /* These two variables are set by every call to backup_step(). They are + ** read by calls to backup_remaining() and backup_pagecount(). + */ + Pgno nRemaining; /* Number of pages left to copy */ + Pgno nPagecount; /* Total number of pages to copy */ + + int isAttached; /* True once backup has been registered with pager */ + sqlite3_backup *pNext; /* Next backup associated with source pager */ +}; + +/* +** THREAD SAFETY NOTES: +** +** Once it has been created using backup_init(), a single sqlite3_backup +** structure may be accessed via two groups of thread-safe entry points: +** +** * Via the sqlite3_backup_XXX() API function backup_step() and +** backup_finish(). Both these functions obtain the source database +** handle mutex and the mutex associated with the source BtShared +** structure, in that order. +** +** * Via the BackupUpdate() and BackupRestart() functions, which are +** invoked by the pager layer to report various state changes in +** the page cache associated with the source database. The mutex +** associated with the source database BtShared structure will always +** be held when either of these functions are invoked. +** +** The other sqlite3_backup_XXX() API functions, backup_remaining() and +** backup_pagecount() are not thread-safe functions. If they are called +** while some other thread is calling backup_step() or backup_finish(), +** the values returned may be invalid. There is no way for a call to +** BackupUpdate() or BackupRestart() to interfere with backup_remaining() +** or backup_pagecount(). +** +** Depending on the SQLite configuration, the database handles and/or +** the Btree objects may have their own mutexes that require locking. +** Non-sharable Btrees (in-memory databases for example), do not have +** associated mutexes. +*/ + +/* +** Return a pointer corresponding to database zDb (i.e. "main", "temp") +** in connection handle pDb. If such a database cannot be found, return +** a NULL pointer and write an error message to pErrorDb. +** +** If the "temp" database is requested, it may need to be opened by this +** function. If an error occurs while doing so, return 0 and write an +** error message to pErrorDb. +*/ +static Btree *findBtree(sqlite3 *pErrorDb, sqlite3 *pDb, const char *zDb){ + int i = sqlite3FindDbName(pDb, zDb); + + if( i==1 ){ + Parse sParse; + int rc = 0; + sqlite3ParseObjectInit(&sParse,pDb); + if( sqlite3OpenTempDatabase(&sParse) ){ + sqlite3ErrorWithMsg(pErrorDb, sParse.rc, "%s", sParse.zErrMsg); + rc = SQLITE_ERROR; + } + sqlite3DbFree(pErrorDb, sParse.zErrMsg); + sqlite3ParseObjectReset(&sParse); + if( rc ){ + return 0; + } + } + + if( i<0 ){ + sqlite3ErrorWithMsg(pErrorDb, SQLITE_ERROR, "unknown database %s", zDb); + return 0; + } + + return pDb->aDb[i].pBt; +} + +/* +** Attempt to set the page size of the destination to match the page size +** of the source. +*/ +static int setDestPgsz(sqlite3_backup *p){ + int rc; + rc = sqlite3BtreeSetPageSize(p->pDest,sqlite3BtreeGetPageSize(p->pSrc),0,0); + return rc; +} + +/* +** Check that there is no open read-transaction on the b-tree passed as the +** second argument. If there is not, return SQLITE_OK. Otherwise, if there +** is an open read-transaction, return SQLITE_ERROR and leave an error +** message in database handle db. +*/ +static int checkReadTransaction(sqlite3 *db, Btree *p){ + if( sqlite3BtreeTxnState(p)!=SQLITE_TXN_NONE ){ + sqlite3ErrorWithMsg(db, SQLITE_ERROR, "destination database is in use"); + return SQLITE_ERROR; + } + return SQLITE_OK; +} + +/* +** Create an sqlite3_backup process to copy the contents of zSrcDb from +** connection handle pSrcDb to zDestDb in pDestDb. If successful, return +** a pointer to the new sqlite3_backup object. +** +** If an error occurs, NULL is returned and an error code and error message +** stored in database handle pDestDb. +*/ +SQLITE_API sqlite3_backup *sqlite3_backup_init( + sqlite3* pDestDb, /* Database to write to */ + const char *zDestDb, /* Name of database within pDestDb */ + sqlite3* pSrcDb, /* Database connection to read from */ + const char *zSrcDb /* Name of database within pSrcDb */ +){ + sqlite3_backup *p; /* Value to return */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(pSrcDb)||!sqlite3SafetyCheckOk(pDestDb) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + + /* Lock the source database handle. The destination database + ** handle is not locked in this routine, but it is locked in + ** sqlite3_backup_step(). The user is required to ensure that no + ** other thread accesses the destination handle for the duration + ** of the backup operation. Any attempt to use the destination + ** database connection while a backup is in progress may cause + ** a malfunction or a deadlock. + */ + sqlite3_mutex_enter(pSrcDb->mutex); + sqlite3_mutex_enter(pDestDb->mutex); + + if( pSrcDb==pDestDb ){ + sqlite3ErrorWithMsg( + pDestDb, SQLITE_ERROR, "source and destination must be distinct" + ); + p = 0; + }else { + /* Allocate space for a new sqlite3_backup object... + ** EVIDENCE-OF: R-64852-21591 The sqlite3_backup object is created by a + ** call to sqlite3_backup_init() and is destroyed by a call to + ** sqlite3_backup_finish(). */ + p = (sqlite3_backup *)sqlite3MallocZero(sizeof(sqlite3_backup)); + if( !p ){ + sqlite3Error(pDestDb, SQLITE_NOMEM_BKPT); + } + } + + /* If the allocation succeeded, populate the new object. */ + if( p ){ + p->pSrc = findBtree(pDestDb, pSrcDb, zSrcDb); + p->pDest = findBtree(pDestDb, pDestDb, zDestDb); + p->pDestDb = pDestDb; + p->pSrcDb = pSrcDb; + p->iNext = 1; + p->isAttached = 0; + + if( 0==p->pSrc || 0==p->pDest + || checkReadTransaction(pDestDb, p->pDest)!=SQLITE_OK + ){ + /* One (or both) of the named databases did not exist or an OOM + ** error was hit. Or there is a transaction open on the destination + ** database. The error has already been written into the pDestDb + ** handle. All that is left to do here is free the sqlite3_backup + ** structure. */ + sqlite3_free(p); + p = 0; + } + } + if( p ){ + p->pSrc->nBackup++; + } + + sqlite3_mutex_leave(pDestDb->mutex); + sqlite3_mutex_leave(pSrcDb->mutex); + return p; +} + +/* +** Argument rc is an SQLite error code. Return true if this error is +** considered fatal if encountered during a backup operation. All errors +** are considered fatal except for SQLITE_BUSY and SQLITE_LOCKED. +*/ +static int isFatalError(int rc){ + return (rc!=SQLITE_OK && rc!=SQLITE_BUSY && ALWAYS(rc!=SQLITE_LOCKED)); +} + +/* +** Parameter zSrcData points to a buffer containing the data for +** page iSrcPg from the source database. Copy this data into the +** destination database. +*/ +static int backupOnePage( + sqlite3_backup *p, /* Backup handle */ + Pgno iSrcPg, /* Source database page to backup */ + const u8 *zSrcData, /* Source database page data */ + int bUpdate /* True for an update, false otherwise */ +){ + Pager * const pDestPager = sqlite3BtreePager(p->pDest); + const int nSrcPgsz = sqlite3BtreeGetPageSize(p->pSrc); + int nDestPgsz = sqlite3BtreeGetPageSize(p->pDest); + const int nCopy = MIN(nSrcPgsz, nDestPgsz); + const i64 iEnd = (i64)iSrcPg*(i64)nSrcPgsz; + int rc = SQLITE_OK; + i64 iOff; + + assert( sqlite3BtreeGetReserveNoMutex(p->pSrc)>=0 ); + assert( p->bDestLocked ); + assert( !isFatalError(p->rc) ); + assert( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ); + assert( zSrcData ); + + /* Catch the case where the destination is an in-memory database and the + ** page sizes of the source and destination differ. + */ + if( nSrcPgsz!=nDestPgsz && sqlite3PagerIsMemdb(pDestPager) ){ + rc = SQLITE_READONLY; + } + + /* This loop runs once for each destination page spanned by the source + ** page. For each iteration, variable iOff is set to the byte offset + ** of the destination page. + */ + for(iOff=iEnd-(i64)nSrcPgsz; rc==SQLITE_OK && iOffpDest->pBt) ) continue; + if( SQLITE_OK==(rc = sqlite3PagerGet(pDestPager, iDest, &pDestPg, 0)) + && SQLITE_OK==(rc = sqlite3PagerWrite(pDestPg)) + ){ + const u8 *zIn = &zSrcData[iOff%nSrcPgsz]; + u8 *zDestData = sqlite3PagerGetData(pDestPg); + u8 *zOut = &zDestData[iOff%nDestPgsz]; + + /* Copy the data from the source page into the destination page. + ** Then clear the Btree layer MemPage.isInit flag. Both this module + ** and the pager code use this trick (clearing the first byte + ** of the page 'extra' space to invalidate the Btree layers + ** cached parse of the page). MemPage.isInit is marked + ** "MUST BE FIRST" for this purpose. + */ + memcpy(zOut, zIn, nCopy); + ((u8 *)sqlite3PagerGetExtra(pDestPg))[0] = 0; + if( iOff==0 && bUpdate==0 ){ + sqlite3Put4byte(&zOut[28], sqlite3BtreeLastPage(p->pSrc)); + } + } + sqlite3PagerUnref(pDestPg); + } + + return rc; +} + +/* +** If pFile is currently larger than iSize bytes, then truncate it to +** exactly iSize bytes. If pFile is not larger than iSize bytes, then +** this function is a no-op. +** +** Return SQLITE_OK if everything is successful, or an SQLite error +** code if an error occurs. +*/ +static int backupTruncateFile(sqlite3_file *pFile, i64 iSize){ + i64 iCurrent; + int rc = sqlite3OsFileSize(pFile, &iCurrent); + if( rc==SQLITE_OK && iCurrent>iSize ){ + rc = sqlite3OsTruncate(pFile, iSize); + } + return rc; +} + +/* +** Register this backup object with the associated source pager for +** callbacks when pages are changed or the cache invalidated. +*/ +static void attachBackupObject(sqlite3_backup *p){ + sqlite3_backup **pp; + assert( sqlite3BtreeHoldsMutex(p->pSrc) ); + pp = sqlite3PagerBackupPtr(sqlite3BtreePager(p->pSrc)); + p->pNext = *pp; + *pp = p; + p->isAttached = 1; +} + +/* +** Copy nPage pages from the source b-tree to the destination. +*/ +SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){ + int rc; + int destMode; /* Destination journal mode */ + int pgszSrc = 0; /* Source page size */ + int pgszDest = 0; /* Destination page size */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( p==0 ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(p->pSrcDb->mutex); + sqlite3BtreeEnter(p->pSrc); + if( p->pDestDb ){ + sqlite3_mutex_enter(p->pDestDb->mutex); + } + + rc = p->rc; + if( !isFatalError(rc) ){ + Pager * const pSrcPager = sqlite3BtreePager(p->pSrc); /* Source pager */ + Pager * const pDestPager = sqlite3BtreePager(p->pDest); /* Dest pager */ + int ii; /* Iterator variable */ + int nSrcPage = -1; /* Size of source db in pages */ + int bCloseTrans = 0; /* True if src db requires unlocking */ + + /* If the source pager is currently in a write-transaction, return + ** SQLITE_BUSY immediately. + */ + if( p->pDestDb && p->pSrc->pBt->inTransaction==TRANS_WRITE ){ + rc = SQLITE_BUSY; + }else{ + rc = SQLITE_OK; + } + + /* If there is no open read-transaction on the source database, open + ** one now. If a transaction is opened here, then it will be closed + ** before this function exits. + */ + if( rc==SQLITE_OK && SQLITE_TXN_NONE==sqlite3BtreeTxnState(p->pSrc) ){ + rc = sqlite3BtreeBeginTrans(p->pSrc, 0, 0); + bCloseTrans = 1; + } + + /* If the destination database has not yet been locked (i.e. if this + ** is the first call to backup_step() for the current backup operation), + ** try to set its page size to the same as the source database. This + ** is especially important on ZipVFS systems, as in that case it is + ** not possible to create a database file that uses one page size by + ** writing to it with another. */ + if( p->bDestLocked==0 && rc==SQLITE_OK && setDestPgsz(p)==SQLITE_NOMEM ){ + rc = SQLITE_NOMEM; + } + + /* Lock the destination database, if it is not locked already. */ + if( SQLITE_OK==rc && p->bDestLocked==0 + && SQLITE_OK==(rc = sqlite3BtreeBeginTrans(p->pDest, 2, + (int*)&p->iDestSchema)) + ){ + p->bDestLocked = 1; + } + + /* Do not allow backup if the destination database is in WAL mode + ** and the page sizes are different between source and destination */ + pgszSrc = sqlite3BtreeGetPageSize(p->pSrc); + pgszDest = sqlite3BtreeGetPageSize(p->pDest); + destMode = sqlite3PagerGetJournalMode(sqlite3BtreePager(p->pDest)); + if( SQLITE_OK==rc && destMode==PAGER_JOURNALMODE_WAL && pgszSrc!=pgszDest ){ + rc = SQLITE_READONLY; + } + + /* Now that there is a read-lock on the source database, query the + ** source pager for the number of pages in the database. + */ + nSrcPage = (int)sqlite3BtreeLastPage(p->pSrc); + assert( nSrcPage>=0 ); + for(ii=0; (nPage<0 || iiiNext<=(Pgno)nSrcPage && !rc; ii++){ + const Pgno iSrcPg = p->iNext; /* Source page number */ + if( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ){ + DbPage *pSrcPg; /* Source page object */ + rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg,PAGER_GET_READONLY); + if( rc==SQLITE_OK ){ + rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0); + sqlite3PagerUnref(pSrcPg); + } + } + p->iNext++; + } + if( rc==SQLITE_OK ){ + p->nPagecount = nSrcPage; + p->nRemaining = nSrcPage+1-p->iNext; + if( p->iNext>(Pgno)nSrcPage ){ + rc = SQLITE_DONE; + }else if( !p->isAttached ){ + attachBackupObject(p); + } + } + + /* Update the schema version field in the destination database. This + ** is to make sure that the schema-version really does change in + ** the case where the source and destination databases have the + ** same schema version. + */ + if( rc==SQLITE_DONE ){ + if( nSrcPage==0 ){ + rc = sqlite3BtreeNewDb(p->pDest); + nSrcPage = 1; + } + if( rc==SQLITE_OK || rc==SQLITE_DONE ){ + rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1); + } + if( rc==SQLITE_OK ){ + if( p->pDestDb ){ + sqlite3ResetAllSchemasOfConnection(p->pDestDb); + } + if( destMode==PAGER_JOURNALMODE_WAL ){ + rc = sqlite3BtreeSetVersion(p->pDest, 2); + } + } + if( rc==SQLITE_OK ){ + int nDestTruncate; + /* Set nDestTruncate to the final number of pages in the destination + ** database. The complication here is that the destination page + ** size may be different to the source page size. + ** + ** If the source page size is smaller than the destination page size, + ** round up. In this case the call to sqlite3OsTruncate() below will + ** fix the size of the file. However it is important to call + ** sqlite3PagerTruncateImage() here so that any pages in the + ** destination file that lie beyond the nDestTruncate page mark are + ** journalled by PagerCommitPhaseOne() before they are destroyed + ** by the file truncation. + */ + assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) ); + assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) ); + if( pgszSrcpDest->pBt) ){ + nDestTruncate--; + } + }else{ + nDestTruncate = nSrcPage * (pgszSrc/pgszDest); + } + assert( nDestTruncate>0 ); + + if( pgszSrc= iSize || ( + nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1) + && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest + )); + + /* This block ensures that all data required to recreate the original + ** database has been stored in the journal for pDestPager and the + ** journal synced to disk. So at this point we may safely modify + ** the database file in any way, knowing that if a power failure + ** occurs, the original database will be reconstructed from the + ** journal file. */ + sqlite3PagerPagecount(pDestPager, &nDstPage); + for(iPg=nDestTruncate; rc==SQLITE_OK && iPg<=(Pgno)nDstPage; iPg++){ + if( iPg!=PENDING_BYTE_PAGE(p->pDest->pBt) ){ + DbPage *pPg; + rc = sqlite3PagerGet(pDestPager, iPg, &pPg, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pPg); + sqlite3PagerUnref(pPg); + } + } + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1); + } + + /* Write the extra pages and truncate the database file as required */ + iEnd = MIN(PENDING_BYTE + pgszDest, iSize); + for( + iOff=PENDING_BYTE+pgszSrc; + rc==SQLITE_OK && iOffpDest, 0)) + ){ + rc = SQLITE_DONE; + } + } + } + + /* If bCloseTrans is true, then this function opened a read transaction + ** on the source database. Close the read transaction here. There is + ** no need to check the return values of the btree methods here, as + ** "committing" a read-only transaction cannot fail. + */ + if( bCloseTrans ){ + TESTONLY( int rc2 ); + TESTONLY( rc2 = ) sqlite3BtreeCommitPhaseOne(p->pSrc, 0); + TESTONLY( rc2 |= ) sqlite3BtreeCommitPhaseTwo(p->pSrc, 0); + assert( rc2==SQLITE_OK ); + } + + if( rc==SQLITE_IOERR_NOMEM ){ + rc = SQLITE_NOMEM_BKPT; + } + p->rc = rc; + } + if( p->pDestDb ){ + sqlite3_mutex_leave(p->pDestDb->mutex); + } + sqlite3BtreeLeave(p->pSrc); + sqlite3_mutex_leave(p->pSrcDb->mutex); + return rc; +} + +/* +** Release all resources associated with an sqlite3_backup* handle. +*/ +SQLITE_API int sqlite3_backup_finish(sqlite3_backup *p){ + sqlite3_backup **pp; /* Ptr to head of pagers backup list */ + sqlite3 *pSrcDb; /* Source database connection */ + int rc; /* Value to return */ + + /* Enter the mutexes */ + if( p==0 ) return SQLITE_OK; + pSrcDb = p->pSrcDb; + sqlite3_mutex_enter(pSrcDb->mutex); + sqlite3BtreeEnter(p->pSrc); + if( p->pDestDb ){ + sqlite3_mutex_enter(p->pDestDb->mutex); + } + + /* Detach this backup from the source pager. */ + if( p->pDestDb ){ + p->pSrc->nBackup--; + } + if( p->isAttached ){ + pp = sqlite3PagerBackupPtr(sqlite3BtreePager(p->pSrc)); + assert( pp!=0 ); + while( *pp!=p ){ + pp = &(*pp)->pNext; + assert( pp!=0 ); + } + *pp = p->pNext; + } + + /* If a transaction is still open on the Btree, roll it back. */ + sqlite3BtreeRollback(p->pDest, SQLITE_OK, 0); + + /* Set the error code of the destination database handle. */ + rc = (p->rc==SQLITE_DONE) ? SQLITE_OK : p->rc; + if( p->pDestDb ){ + sqlite3Error(p->pDestDb, rc); + + /* Exit the mutexes and free the backup context structure. */ + sqlite3LeaveMutexAndCloseZombie(p->pDestDb); + } + sqlite3BtreeLeave(p->pSrc); + if( p->pDestDb ){ + /* EVIDENCE-OF: R-64852-21591 The sqlite3_backup object is created by a + ** call to sqlite3_backup_init() and is destroyed by a call to + ** sqlite3_backup_finish(). */ + sqlite3_free(p); + } + sqlite3LeaveMutexAndCloseZombie(pSrcDb); + return rc; +} + +/* +** Return the number of pages still to be backed up as of the most recent +** call to sqlite3_backup_step(). +*/ +SQLITE_API int sqlite3_backup_remaining(sqlite3_backup *p){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( p==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return p->nRemaining; +} + +/* +** Return the total number of pages in the source database as of the most +** recent call to sqlite3_backup_step(). +*/ +SQLITE_API int sqlite3_backup_pagecount(sqlite3_backup *p){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( p==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return p->nPagecount; +} + +/* +** This function is called after the contents of page iPage of the +** source database have been modified. If page iPage has already been +** copied into the destination database, then the data written to the +** destination is now invalidated. The destination copy of iPage needs +** to be updated with the new data before the backup operation is +** complete. +** +** It is assumed that the mutex associated with the BtShared object +** corresponding to the source database is held when this function is +** called. +*/ +static SQLITE_NOINLINE void backupUpdate( + sqlite3_backup *p, + Pgno iPage, + const u8 *aData +){ + assert( p!=0 ); + do{ + assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); + if( !isFatalError(p->rc) && iPageiNext ){ + /* The backup process p has already copied page iPage. But now it + ** has been modified by a transaction on the source pager. Copy + ** the new data into the backup. + */ + int rc; + assert( p->pDestDb ); + sqlite3_mutex_enter(p->pDestDb->mutex); + rc = backupOnePage(p, iPage, aData, 1); + sqlite3_mutex_leave(p->pDestDb->mutex); + assert( rc!=SQLITE_BUSY && rc!=SQLITE_LOCKED ); + if( rc!=SQLITE_OK ){ + p->rc = rc; + } + } + }while( (p = p->pNext)!=0 ); +} +SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ + if( pBackup ) backupUpdate(pBackup, iPage, aData); +} + +/* +** Restart the backup process. This is called when the pager layer +** detects that the database has been modified by an external database +** connection. In this case there is no way of knowing which of the +** pages that have been copied into the destination database are still +** valid and which are not, so the entire process needs to be restarted. +** +** It is assumed that the mutex associated with the BtShared object +** corresponding to the source database is held when this function is +** called. +*/ +SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *pBackup){ + sqlite3_backup *p; /* Iterator variable */ + for(p=pBackup; p; p=p->pNext){ + assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); + p->iNext = 1; + } +} + +#ifndef SQLITE_OMIT_VACUUM +/* +** Copy the complete content of pBtFrom into pBtTo. A transaction +** must be active for both files. +** +** The size of file pTo may be reduced by this operation. If anything +** goes wrong, the transaction on pTo is rolled back. If successful, the +** transaction is committed before returning. +*/ +SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){ + int rc; + sqlite3_file *pFd; /* File descriptor for database pTo */ + sqlite3_backup b; + sqlite3BtreeEnter(pTo); + sqlite3BtreeEnter(pFrom); + + assert( sqlite3BtreeTxnState(pTo)==SQLITE_TXN_WRITE ); + pFd = sqlite3PagerFile(sqlite3BtreePager(pTo)); + if( pFd->pMethods ){ + i64 nByte = sqlite3BtreeGetPageSize(pFrom)*(i64)sqlite3BtreeLastPage(pFrom); + rc = sqlite3OsFileControl(pFd, SQLITE_FCNTL_OVERWRITE, &nByte); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + if( rc ) goto copy_finished; + } + + /* Set up an sqlite3_backup object. sqlite3_backup.pDestDb must be set + ** to 0. This is used by the implementations of sqlite3_backup_step() + ** and sqlite3_backup_finish() to detect that they are being called + ** from this function, not directly by the user. + */ + memset(&b, 0, sizeof(b)); + b.pSrcDb = pFrom->db; + b.pSrc = pFrom; + b.pDest = pTo; + b.iNext = 1; + + /* 0x7FFFFFFF is the hard limit for the number of pages in a database + ** file. By passing this as the number of pages to copy to + ** sqlite3_backup_step(), we can guarantee that the copy finishes + ** within a single call (unless an error occurs). The assert() statement + ** checks this assumption - (p->rc) should be set to either SQLITE_DONE + ** or an error code. */ + sqlite3_backup_step(&b, 0x7FFFFFFF); + assert( b.rc!=SQLITE_OK ); + + rc = sqlite3_backup_finish(&b); + if( rc==SQLITE_OK ){ + pTo->pBt->btsFlags &= ~BTS_PAGESIZE_FIXED; + }else{ + sqlite3PagerClearCache(sqlite3BtreePager(b.pDest)); + } + + assert( sqlite3BtreeTxnState(pTo)!=SQLITE_TXN_WRITE ); +copy_finished: + sqlite3BtreeLeave(pFrom); + sqlite3BtreeLeave(pTo); + return rc; +} +#endif /* SQLITE_OMIT_VACUUM */ + +/************** End of backup.c **********************************************/ +/************** Begin file vdbemem.c *****************************************/ +/* +** 2004 May 26 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code use to manipulate "Mem" structure. A "Mem" +** stores a single value in the VDBE. Mem is an opaque structure visible +** only within the VDBE. Interface routines refer to a Mem using the +** name sqlite_value +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +/* True if X is a power of two. 0 is considered a power of two here. +** In other words, return true if X has at most one bit set. +*/ +#define ISPOWEROF2(X) (((X)&((X)-1))==0) + +#ifdef SQLITE_DEBUG +/* +** Check invariants on a Mem object. +** +** This routine is intended for use inside of assert() statements, like +** this: assert( sqlite3VdbeCheckMemInvariants(pMem) ); +*/ +SQLITE_PRIVATE int sqlite3VdbeCheckMemInvariants(Mem *p){ + /* If MEM_Dyn is set then Mem.xDel!=0. + ** Mem.xDel might not be initialized if MEM_Dyn is clear. + */ + assert( (p->flags & MEM_Dyn)==0 || p->xDel!=0 ); + + /* MEM_Dyn may only be set if Mem.szMalloc==0. In this way we + ** ensure that if Mem.szMalloc>0 then it is safe to do + ** Mem.z = Mem.zMalloc without having to check Mem.flags&MEM_Dyn. + ** That saves a few cycles in inner loops. */ + assert( (p->flags & MEM_Dyn)==0 || p->szMalloc==0 ); + + /* Cannot have more than one of MEM_Int, MEM_Real, or MEM_IntReal */ + assert( ISPOWEROF2(p->flags & (MEM_Int|MEM_Real|MEM_IntReal)) ); + + if( p->flags & MEM_Null ){ + /* Cannot be both MEM_Null and some other type */ + assert( (p->flags & (MEM_Int|MEM_Real|MEM_Str|MEM_Blob|MEM_Agg))==0 ); + + /* If MEM_Null is set, then either the value is a pure NULL (the usual + ** case) or it is a pointer set using sqlite3_bind_pointer() or + ** sqlite3_result_pointer(). If a pointer, then MEM_Term must also be + ** set. + */ + if( (p->flags & (MEM_Term|MEM_Subtype))==(MEM_Term|MEM_Subtype) ){ + /* This is a pointer type. There may be a flag to indicate what to + ** do with the pointer. */ + assert( ((p->flags&MEM_Dyn)!=0 ? 1 : 0) + + ((p->flags&MEM_Ephem)!=0 ? 1 : 0) + + ((p->flags&MEM_Static)!=0 ? 1 : 0) <= 1 ); + + /* No other bits set */ + assert( (p->flags & ~(MEM_Null|MEM_Term|MEM_Subtype|MEM_FromBind + |MEM_Dyn|MEM_Ephem|MEM_Static))==0 ); + }else{ + /* A pure NULL might have other flags, such as MEM_Static, MEM_Dyn, + ** MEM_Ephem, MEM_Cleared, or MEM_Subtype */ + } + }else{ + /* The MEM_Cleared bit is only allowed on NULLs */ + assert( (p->flags & MEM_Cleared)==0 ); + } + + /* The szMalloc field holds the correct memory allocation size */ + assert( p->szMalloc==0 + || (p->flags==MEM_Undefined + && p->szMalloc<=sqlite3DbMallocSize(p->db,p->zMalloc)) + || p->szMalloc==sqlite3DbMallocSize(p->db,p->zMalloc)); + + /* If p holds a string or blob, the Mem.z must point to exactly + ** one of the following: + ** + ** (1) Memory in Mem.zMalloc and managed by the Mem object + ** (2) Memory to be freed using Mem.xDel + ** (3) An ephemeral string or blob + ** (4) A static string or blob + */ + if( (p->flags & (MEM_Str|MEM_Blob)) && p->n>0 ){ + assert( + ((p->szMalloc>0 && p->z==p->zMalloc)? 1 : 0) + + ((p->flags&MEM_Dyn)!=0 ? 1 : 0) + + ((p->flags&MEM_Ephem)!=0 ? 1 : 0) + + ((p->flags&MEM_Static)!=0 ? 1 : 0) == 1 + ); + } + return 1; +} +#endif + +/* +** Render a Mem object which is one of MEM_Int, MEM_Real, or MEM_IntReal +** into a buffer. +*/ +static void vdbeMemRenderNum(int sz, char *zBuf, Mem *p){ + StrAccum acc; + assert( p->flags & (MEM_Int|MEM_Real|MEM_IntReal) ); + assert( sz>22 ); + if( p->flags & MEM_Int ){ +#if GCC_VERSION>=7000000 + /* Work-around for GCC bug + ** https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96270 */ + i64 x; + assert( (p->flags&MEM_Int)*2==sizeof(x) ); + memcpy(&x, (char*)&p->u, (p->flags&MEM_Int)*2); + sqlite3Int64ToText(x, zBuf); +#else + sqlite3Int64ToText(p->u.i, zBuf); +#endif + }else{ + sqlite3StrAccumInit(&acc, 0, zBuf, sz, 0); + sqlite3_str_appendf(&acc, "%!.15g", + (p->flags & MEM_IntReal)!=0 ? (double)p->u.i : p->u.r); + assert( acc.zText==zBuf && acc.mxAlloc<=0 ); + zBuf[acc.nChar] = 0; /* Fast version of sqlite3StrAccumFinish(&acc) */ + } +} + +#ifdef SQLITE_DEBUG +/* +** Validity checks on pMem. pMem holds a string. +** +** (1) Check that string value of pMem agrees with its integer or real value. +** (2) Check that the string is correctly zero terminated +** +** A single int or real value always converts to the same strings. But +** many different strings can be converted into the same int or real. +** If a table contains a numeric value and an index is based on the +** corresponding string value, then it is important that the string be +** derived from the numeric value, not the other way around, to ensure +** that the index and table are consistent. See ticket +** https://www.sqlite.org/src/info/343634942dd54ab (2018-01-31) for +** an example. +** +** This routine looks at pMem to verify that if it has both a numeric +** representation and a string representation then the string rep has +** been derived from the numeric and not the other way around. It returns +** true if everything is ok and false if there is a problem. +** +** This routine is for use inside of assert() statements only. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemValidStrRep(Mem *p){ + char zBuf[100]; + char *z; + int i, j, incr; + if( (p->flags & MEM_Str)==0 ) return 1; + if( p->flags & MEM_Term ){ + /* Insure that the string is properly zero-terminated. Pay particular + ** attention to the case where p->n is odd */ + if( p->szMalloc>0 && p->z==p->zMalloc ){ + assert( p->enc==SQLITE_UTF8 || p->szMalloc >= ((p->n+1)&~1)+2 ); + assert( p->enc!=SQLITE_UTF8 || p->szMalloc >= p->n+1 ); + } + assert( p->z[p->n]==0 ); + assert( p->enc==SQLITE_UTF8 || p->z[(p->n+1)&~1]==0 ); + assert( p->enc==SQLITE_UTF8 || p->z[((p->n+1)&~1)+1]==0 ); + } + if( (p->flags & (MEM_Int|MEM_Real|MEM_IntReal))==0 ) return 1; + vdbeMemRenderNum(sizeof(zBuf), zBuf, p); + z = p->z; + i = j = 0; + incr = 1; + if( p->enc!=SQLITE_UTF8 ){ + incr = 2; + if( p->enc==SQLITE_UTF16BE ) z++; + } + while( zBuf[j] ){ + if( zBuf[j++]!=z[i] ) return 0; + i += incr; + } + return 1; +} +#endif /* SQLITE_DEBUG */ + +/* +** If pMem is an object with a valid string representation, this routine +** ensures the internal encoding for the string representation is +** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE. +** +** If pMem is not a string object, or the encoding of the string +** representation is already stored using the requested encoding, then this +** routine is a no-op. +** +** SQLITE_OK is returned if the conversion is successful (or not required). +** SQLITE_NOMEM may be returned if a malloc() fails during conversion +** between formats. +*/ +SQLITE_PRIVATE int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){ +#ifndef SQLITE_OMIT_UTF16 + int rc; +#endif + assert( pMem!=0 ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + assert( desiredEnc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF16LE + || desiredEnc==SQLITE_UTF16BE ); + if( !(pMem->flags&MEM_Str) ){ + pMem->enc = desiredEnc; + return SQLITE_OK; + } + if( pMem->enc==desiredEnc ){ + return SQLITE_OK; + } + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); +#ifdef SQLITE_OMIT_UTF16 + return SQLITE_ERROR; +#else + + /* MemTranslate() may return SQLITE_OK or SQLITE_NOMEM. If NOMEM is returned, + ** then the encoding of the value may not have changed. + */ + rc = sqlite3VdbeMemTranslate(pMem, (u8)desiredEnc); + assert(rc==SQLITE_OK || rc==SQLITE_NOMEM); + assert(rc==SQLITE_OK || pMem->enc!=desiredEnc); + assert(rc==SQLITE_NOMEM || pMem->enc==desiredEnc); + return rc; +#endif +} + +/* +** Make sure pMem->z points to a writable allocation of at least n bytes. +** +** If the bPreserve argument is true, then copy of the content of +** pMem->z into the new allocation. pMem must be either a string or +** blob if bPreserve is true. If bPreserve is false, any prior content +** in pMem->z is discarded. +*/ +SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3VdbeMemGrow(Mem *pMem, int n, int bPreserve){ + assert( sqlite3VdbeCheckMemInvariants(pMem) ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + testcase( pMem->db==0 ); + + /* If the bPreserve flag is set to true, then the memory cell must already + ** contain a valid string or blob value. */ + assert( bPreserve==0 || pMem->flags&(MEM_Blob|MEM_Str) ); + testcase( bPreserve && pMem->z==0 ); + + assert( pMem->szMalloc==0 + || (pMem->flags==MEM_Undefined + && pMem->szMalloc<=sqlite3DbMallocSize(pMem->db,pMem->zMalloc)) + || pMem->szMalloc==sqlite3DbMallocSize(pMem->db,pMem->zMalloc)); + if( pMem->szMalloc>0 && bPreserve && pMem->z==pMem->zMalloc ){ + if( pMem->db ){ + pMem->z = pMem->zMalloc = sqlite3DbReallocOrFree(pMem->db, pMem->z, n); + }else{ + pMem->zMalloc = sqlite3Realloc(pMem->z, n); + if( pMem->zMalloc==0 ) sqlite3_free(pMem->z); + pMem->z = pMem->zMalloc; + } + bPreserve = 0; + }else{ + if( pMem->szMalloc>0 ) sqlite3DbFreeNN(pMem->db, pMem->zMalloc); + pMem->zMalloc = sqlite3DbMallocRaw(pMem->db, n); + } + if( pMem->zMalloc==0 ){ + sqlite3VdbeMemSetNull(pMem); + pMem->z = 0; + pMem->szMalloc = 0; + return SQLITE_NOMEM_BKPT; + }else{ + pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->zMalloc); + } + + if( bPreserve && pMem->z ){ + assert( pMem->z!=pMem->zMalloc ); + memcpy(pMem->zMalloc, pMem->z, pMem->n); + } + if( (pMem->flags&MEM_Dyn)!=0 ){ + assert( pMem->xDel!=0 && pMem->xDel!=SQLITE_DYNAMIC ); + pMem->xDel((void *)(pMem->z)); + } + + pMem->z = pMem->zMalloc; + pMem->flags &= ~(MEM_Dyn|MEM_Ephem|MEM_Static); + return SQLITE_OK; +} + +/* +** Change the pMem->zMalloc allocation to be at least szNew bytes. +** If pMem->zMalloc already meets or exceeds the requested size, this +** routine is a no-op. +** +** Any prior string or blob content in the pMem object may be discarded. +** The pMem->xDel destructor is called, if it exists. Though MEM_Str +** and MEM_Blob values may be discarded, MEM_Int, MEM_Real, MEM_IntReal, +** and MEM_Null values are preserved. +** +** Return SQLITE_OK on success or an error code (probably SQLITE_NOMEM) +** if unable to complete the resizing. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int szNew){ + assert( CORRUPT_DB || szNew>0 ); + assert( (pMem->flags & MEM_Dyn)==0 || pMem->szMalloc==0 ); + if( pMem->szMallocflags & MEM_Dyn)==0 ); + pMem->z = pMem->zMalloc; + pMem->flags &= (MEM_Null|MEM_Int|MEM_Real|MEM_IntReal); + return SQLITE_OK; +} + +/* +** It is already known that pMem contains an unterminated string. +** Add the zero terminator. +** +** Three bytes of zero are added. In this way, there is guaranteed +** to be a double-zero byte at an even byte boundary in order to +** terminate a UTF16 string, even if the initial size of the buffer +** is an odd number of bytes. +*/ +static SQLITE_NOINLINE int vdbeMemAddTerminator(Mem *pMem){ + if( sqlite3VdbeMemGrow(pMem, pMem->n+3, 1) ){ + return SQLITE_NOMEM_BKPT; + } + pMem->z[pMem->n] = 0; + pMem->z[pMem->n+1] = 0; + pMem->z[pMem->n+2] = 0; + pMem->flags |= MEM_Term; + return SQLITE_OK; +} + +/* +** Change pMem so that its MEM_Str or MEM_Blob value is stored in +** MEM.zMalloc, where it can be safely written. +** +** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem *pMem){ + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + if( (pMem->flags & (MEM_Str|MEM_Blob))!=0 ){ + if( ExpandBlob(pMem) ) return SQLITE_NOMEM; + if( pMem->szMalloc==0 || pMem->z!=pMem->zMalloc ){ + int rc = vdbeMemAddTerminator(pMem); + if( rc ) return rc; + } + } + pMem->flags &= ~MEM_Ephem; +#ifdef SQLITE_DEBUG + pMem->pScopyFrom = 0; +#endif + + return SQLITE_OK; +} + +/* +** If the given Mem* has a zero-filled tail, turn it into an ordinary +** blob stored in dynamically allocated space. +*/ +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *pMem){ + int nByte; + assert( pMem!=0 ); + assert( pMem->flags & MEM_Zero ); + assert( (pMem->flags&MEM_Blob)!=0 || MemNullNochng(pMem) ); + testcase( sqlite3_value_nochange(pMem) ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + + /* Set nByte to the number of bytes required to store the expanded blob. */ + nByte = pMem->n + pMem->u.nZero; + if( nByte<=0 ){ + if( (pMem->flags & MEM_Blob)==0 ) return SQLITE_OK; + nByte = 1; + } + if( sqlite3VdbeMemGrow(pMem, nByte, 1) ){ + return SQLITE_NOMEM_BKPT; + } + assert( pMem->z!=0 ); + assert( sqlite3DbMallocSize(pMem->db,pMem->z) >= nByte ); + + memset(&pMem->z[pMem->n], 0, pMem->u.nZero); + pMem->n += pMem->u.nZero; + pMem->flags &= ~(MEM_Zero|MEM_Term); + return SQLITE_OK; +} +#endif + +/* +** Make sure the given Mem is \u0000 terminated. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemNulTerminate(Mem *pMem){ + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + testcase( (pMem->flags & (MEM_Term|MEM_Str))==(MEM_Term|MEM_Str) ); + testcase( (pMem->flags & (MEM_Term|MEM_Str))==0 ); + if( (pMem->flags & (MEM_Term|MEM_Str))!=MEM_Str ){ + return SQLITE_OK; /* Nothing to do */ + }else{ + return vdbeMemAddTerminator(pMem); + } +} + +/* +** Add MEM_Str to the set of representations for the given Mem. This +** routine is only called if pMem is a number of some kind, not a NULL +** or a BLOB. +** +** Existing representations MEM_Int, MEM_Real, or MEM_IntReal are invalidated +** if bForce is true but are retained if bForce is false. +** +** A MEM_Null value will never be passed to this function. This function is +** used for converting values to text for returning to the user (i.e. via +** sqlite3_value_text()), or for ensuring that values to be used as btree +** keys are strings. In the former case a NULL pointer is returned the +** user and the latter is an internal programming error. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem *pMem, u8 enc, u8 bForce){ + const int nByte = 32; + + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( !(pMem->flags&MEM_Zero) ); + assert( !(pMem->flags&(MEM_Str|MEM_Blob)) ); + assert( pMem->flags&(MEM_Int|MEM_Real|MEM_IntReal) ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + + + if( sqlite3VdbeMemClearAndResize(pMem, nByte) ){ + pMem->enc = 0; + return SQLITE_NOMEM_BKPT; + } + + vdbeMemRenderNum(nByte, pMem->z, pMem); + assert( pMem->z!=0 ); + pMem->n = sqlite3Strlen30NN(pMem->z); + pMem->enc = SQLITE_UTF8; + pMem->flags |= MEM_Str|MEM_Term; + if( bForce ) pMem->flags &= ~(MEM_Int|MEM_Real|MEM_IntReal); + sqlite3VdbeChangeEncoding(pMem, enc); + return SQLITE_OK; +} + +/* +** Memory cell pMem contains the context of an aggregate function. +** This routine calls the finalize method for that function. The +** result of the aggregate is stored back into pMem. +** +** Return SQLITE_ERROR if the finalizer reports an error. SQLITE_OK +** otherwise. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){ + sqlite3_context ctx; + Mem t; + assert( pFunc!=0 ); + assert( pMem!=0 ); + assert( pFunc->xFinalize!=0 ); + assert( (pMem->flags & MEM_Null)!=0 || pFunc==pMem->u.pDef ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + memset(&ctx, 0, sizeof(ctx)); + memset(&t, 0, sizeof(t)); + t.flags = MEM_Null; + t.db = pMem->db; + ctx.pOut = &t; + ctx.pMem = pMem; + ctx.pFunc = pFunc; + pFunc->xFinalize(&ctx); /* IMP: R-24505-23230 */ + assert( (pMem->flags & MEM_Dyn)==0 ); + if( pMem->szMalloc>0 ) sqlite3DbFreeNN(pMem->db, pMem->zMalloc); + memcpy(pMem, &t, sizeof(t)); + return ctx.isError; +} + +/* +** Memory cell pAccum contains the context of an aggregate function. +** This routine calls the xValue method for that function and stores +** the results in memory cell pMem. +** +** SQLITE_ERROR is returned if xValue() reports an error. SQLITE_OK +** otherwise. +*/ +#ifndef SQLITE_OMIT_WINDOWFUNC +SQLITE_PRIVATE int sqlite3VdbeMemAggValue(Mem *pAccum, Mem *pOut, FuncDef *pFunc){ + sqlite3_context ctx; + assert( pFunc!=0 ); + assert( pFunc->xValue!=0 ); + assert( (pAccum->flags & MEM_Null)!=0 || pFunc==pAccum->u.pDef ); + assert( pAccum->db==0 || sqlite3_mutex_held(pAccum->db->mutex) ); + memset(&ctx, 0, sizeof(ctx)); + sqlite3VdbeMemSetNull(pOut); + ctx.pOut = pOut; + ctx.pMem = pAccum; + ctx.pFunc = pFunc; + pFunc->xValue(&ctx); + return ctx.isError; +} +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/* +** If the memory cell contains a value that must be freed by +** invoking the external callback in Mem.xDel, then this routine +** will free that value. It also sets Mem.flags to MEM_Null. +** +** This is a helper routine for sqlite3VdbeMemSetNull() and +** for sqlite3VdbeMemRelease(). Use those other routines as the +** entry point for releasing Mem resources. +*/ +static SQLITE_NOINLINE void vdbeMemClearExternAndSetNull(Mem *p){ + assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) ); + assert( VdbeMemDynamic(p) ); + if( p->flags&MEM_Agg ){ + sqlite3VdbeMemFinalize(p, p->u.pDef); + assert( (p->flags & MEM_Agg)==0 ); + testcase( p->flags & MEM_Dyn ); + } + if( p->flags&MEM_Dyn ){ + assert( p->xDel!=SQLITE_DYNAMIC && p->xDel!=0 ); + p->xDel((void *)p->z); + } + p->flags = MEM_Null; +} + +/* +** Release memory held by the Mem p, both external memory cleared +** by p->xDel and memory in p->zMalloc. +** +** This is a helper routine invoked by sqlite3VdbeMemRelease() in +** the unusual case where there really is memory in p that needs +** to be freed. +*/ +static SQLITE_NOINLINE void vdbeMemClear(Mem *p){ + if( VdbeMemDynamic(p) ){ + vdbeMemClearExternAndSetNull(p); + } + if( p->szMalloc ){ + sqlite3DbFreeNN(p->db, p->zMalloc); + p->szMalloc = 0; + } + p->z = 0; +} + +/* +** Release any memory resources held by the Mem. Both the memory that is +** free by Mem.xDel and the Mem.zMalloc allocation are freed. +** +** Use this routine prior to clean up prior to abandoning a Mem, or to +** reset a Mem back to its minimum memory utilization. +** +** Use sqlite3VdbeMemSetNull() to release just the Mem.xDel space +** prior to inserting new content into the Mem. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p){ + assert( sqlite3VdbeCheckMemInvariants(p) ); + if( VdbeMemDynamic(p) || p->szMalloc ){ + vdbeMemClear(p); + } +} + +/* +** Convert a 64-bit IEEE double into a 64-bit signed integer. +** If the double is out of range of a 64-bit signed integer then +** return the closest available 64-bit signed integer. +*/ +static SQLITE_NOINLINE i64 doubleToInt64(double r){ +#ifdef SQLITE_OMIT_FLOATING_POINT + /* When floating-point is omitted, double and int64 are the same thing */ + return r; +#else + /* + ** Many compilers we encounter do not define constants for the + ** minimum and maximum 64-bit integers, or they define them + ** inconsistently. And many do not understand the "LL" notation. + ** So we define our own static constants here using nothing + ** larger than a 32-bit integer constant. + */ + static const i64 maxInt = LARGEST_INT64; + static const i64 minInt = SMALLEST_INT64; + + if( r<=(double)minInt ){ + return minInt; + }else if( r>=(double)maxInt ){ + return maxInt; + }else{ + return (i64)r; + } +#endif +} + +/* +** Return some kind of integer value which is the best we can do +** at representing the value that *pMem describes as an integer. +** If pMem is an integer, then the value is exact. If pMem is +** a floating-point then the value returned is the integer part. +** If pMem is a string or blob, then we make an attempt to convert +** it into an integer and return that. If pMem represents an +** an SQL-NULL value, return 0. +** +** If pMem represents a string value, its encoding might be changed. +*/ +static SQLITE_NOINLINE i64 memIntValue(const Mem *pMem){ + i64 value = 0; + sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc); + return value; +} +SQLITE_PRIVATE i64 sqlite3VdbeIntValue(const Mem *pMem){ + int flags; + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + flags = pMem->flags; + if( flags & (MEM_Int|MEM_IntReal) ){ + testcase( flags & MEM_IntReal ); + return pMem->u.i; + }else if( flags & MEM_Real ){ + return doubleToInt64(pMem->u.r); + }else if( (flags & (MEM_Str|MEM_Blob))!=0 && pMem->z!=0 ){ + return memIntValue(pMem); + }else{ + return 0; + } +} + +/* +** Return the best representation of pMem that we can get into a +** double. If pMem is already a double or an integer, return its +** value. If it is a string or blob, try to convert it to a double. +** If it is a NULL, return 0.0. +*/ +static SQLITE_NOINLINE double memRealValue(Mem *pMem){ + /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ + double val = (double)0; + sqlite3AtoF(pMem->z, &val, pMem->n, pMem->enc); + return val; +} +SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem *pMem){ + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + if( pMem->flags & MEM_Real ){ + return pMem->u.r; + }else if( pMem->flags & (MEM_Int|MEM_IntReal) ){ + testcase( pMem->flags & MEM_IntReal ); + return (double)pMem->u.i; + }else if( pMem->flags & (MEM_Str|MEM_Blob) ){ + return memRealValue(pMem); + }else{ + /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ + return (double)0; + } +} + +/* +** Return 1 if pMem represents true, and return 0 if pMem represents false. +** Return the value ifNull if pMem is NULL. +*/ +SQLITE_PRIVATE int sqlite3VdbeBooleanValue(Mem *pMem, int ifNull){ + testcase( pMem->flags & MEM_IntReal ); + if( pMem->flags & (MEM_Int|MEM_IntReal) ) return pMem->u.i!=0; + if( pMem->flags & MEM_Null ) return ifNull; + return sqlite3VdbeRealValue(pMem)!=0.0; +} + +/* +** The MEM structure is already a MEM_Real. Try to also make it a +** MEM_Int if we can. +*/ +SQLITE_PRIVATE void sqlite3VdbeIntegerAffinity(Mem *pMem){ + i64 ix; + assert( pMem!=0 ); + assert( pMem->flags & MEM_Real ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + + ix = doubleToInt64(pMem->u.r); + + /* Only mark the value as an integer if + ** + ** (1) the round-trip conversion real->int->real is a no-op, and + ** (2) The integer is neither the largest nor the smallest + ** possible integer (ticket #3922) + ** + ** The second and third terms in the following conditional enforces + ** the second condition under the assumption that addition overflow causes + ** values to wrap around. + */ + if( pMem->u.r==ix && ix>SMALLEST_INT64 && ixu.i = ix; + MemSetTypeFlag(pMem, MEM_Int); + } +} + +/* +** Convert pMem to type integer. Invalidate any prior representations. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemIntegerify(Mem *pMem){ + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + + pMem->u.i = sqlite3VdbeIntValue(pMem); + MemSetTypeFlag(pMem, MEM_Int); + return SQLITE_OK; +} + +/* +** Convert pMem so that it is of type MEM_Real. +** Invalidate any prior representations. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemRealify(Mem *pMem){ + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + + pMem->u.r = sqlite3VdbeRealValue(pMem); + MemSetTypeFlag(pMem, MEM_Real); + return SQLITE_OK; +} + +/* Compare a floating point value to an integer. Return true if the two +** values are the same within the precision of the floating point value. +** +** This function assumes that i was obtained by assignment from r1. +** +** For some versions of GCC on 32-bit machines, if you do the more obvious +** comparison of "r1==(double)i" you sometimes get an answer of false even +** though the r1 and (double)i values are bit-for-bit the same. +*/ +SQLITE_PRIVATE int sqlite3RealSameAsInt(double r1, sqlite3_int64 i){ + double r2 = (double)i; + return r1==0.0 + || (memcmp(&r1, &r2, sizeof(r1))==0 + && i >= -2251799813685248LL && i < 2251799813685248LL); +} + +/* +** Convert pMem so that it has type MEM_Real or MEM_Int. +** Invalidate any prior representations. +** +** Every effort is made to force the conversion, even if the input +** is a string that does not look completely like a number. Convert +** as much of the string as we can and ignore the rest. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem *pMem){ + assert( pMem!=0 ); + testcase( pMem->flags & MEM_Int ); + testcase( pMem->flags & MEM_Real ); + testcase( pMem->flags & MEM_IntReal ); + testcase( pMem->flags & MEM_Null ); + if( (pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal|MEM_Null))==0 ){ + int rc; + sqlite3_int64 ix; + assert( (pMem->flags & (MEM_Blob|MEM_Str))!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + rc = sqlite3AtoF(pMem->z, &pMem->u.r, pMem->n, pMem->enc); + if( ((rc==0 || rc==1) && sqlite3Atoi64(pMem->z, &ix, pMem->n, pMem->enc)<=1) + || sqlite3RealSameAsInt(pMem->u.r, (ix = (i64)pMem->u.r)) + ){ + pMem->u.i = ix; + MemSetTypeFlag(pMem, MEM_Int); + }else{ + MemSetTypeFlag(pMem, MEM_Real); + } + } + assert( (pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal|MEM_Null))!=0 ); + pMem->flags &= ~(MEM_Str|MEM_Blob|MEM_Zero); + return SQLITE_OK; +} + +/* +** Cast the datatype of the value in pMem according to the affinity +** "aff". Casting is different from applying affinity in that a cast +** is forced. In other words, the value is converted into the desired +** affinity even if that results in loss of data. This routine is +** used (for example) to implement the SQL "cast()" operator. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemCast(Mem *pMem, u8 aff, u8 encoding){ + if( pMem->flags & MEM_Null ) return SQLITE_OK; + switch( aff ){ + case SQLITE_AFF_BLOB: { /* Really a cast to BLOB */ + if( (pMem->flags & MEM_Blob)==0 ){ + sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding); + assert( pMem->flags & MEM_Str || pMem->db->mallocFailed ); + if( pMem->flags & MEM_Str ) MemSetTypeFlag(pMem, MEM_Blob); + }else{ + pMem->flags &= ~(MEM_TypeMask&~MEM_Blob); + } + break; + } + case SQLITE_AFF_NUMERIC: { + sqlite3VdbeMemNumerify(pMem); + break; + } + case SQLITE_AFF_INTEGER: { + sqlite3VdbeMemIntegerify(pMem); + break; + } + case SQLITE_AFF_REAL: { + sqlite3VdbeMemRealify(pMem); + break; + } + default: { + assert( aff==SQLITE_AFF_TEXT ); + assert( MEM_Str==(MEM_Blob>>3) ); + pMem->flags |= (pMem->flags&MEM_Blob)>>3; + sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding); + assert( pMem->flags & MEM_Str || pMem->db->mallocFailed ); + pMem->flags &= ~(MEM_Int|MEM_Real|MEM_IntReal|MEM_Blob|MEM_Zero); + return sqlite3VdbeChangeEncoding(pMem, encoding); + } + } + return SQLITE_OK; +} + +/* +** Initialize bulk memory to be a consistent Mem object. +** +** The minimum amount of initialization feasible is performed. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemInit(Mem *pMem, sqlite3 *db, u16 flags){ + assert( (flags & ~MEM_TypeMask)==0 ); + pMem->flags = flags; + pMem->db = db; + pMem->szMalloc = 0; +} + + +/* +** Delete any previous value and set the value stored in *pMem to NULL. +** +** This routine calls the Mem.xDel destructor to dispose of values that +** require the destructor. But it preserves the Mem.zMalloc memory allocation. +** To free all resources, use sqlite3VdbeMemRelease(), which both calls this +** routine to invoke the destructor and deallocates Mem.zMalloc. +** +** Use this routine to reset the Mem prior to insert a new value. +** +** Use sqlite3VdbeMemRelease() to complete erase the Mem prior to abandoning it. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemSetNull(Mem *pMem){ + if( VdbeMemDynamic(pMem) ){ + vdbeMemClearExternAndSetNull(pMem); + }else{ + pMem->flags = MEM_Null; + } +} +SQLITE_PRIVATE void sqlite3ValueSetNull(sqlite3_value *p){ + sqlite3VdbeMemSetNull((Mem*)p); +} + +/* +** Delete any previous value and set the value to be a BLOB of length +** n containing all zeros. +*/ +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE void sqlite3VdbeMemSetZeroBlob(Mem *pMem, int n){ + sqlite3VdbeMemRelease(pMem); + pMem->flags = MEM_Blob|MEM_Zero; + pMem->n = 0; + if( n<0 ) n = 0; + pMem->u.nZero = n; + pMem->enc = SQLITE_UTF8; + pMem->z = 0; +} +#else +SQLITE_PRIVATE int sqlite3VdbeMemSetZeroBlob(Mem *pMem, int n){ + int nByte = n>0?n:1; + if( sqlite3VdbeMemGrow(pMem, nByte, 0) ){ + return SQLITE_NOMEM_BKPT; + } + assert( pMem->z!=0 ); + assert( sqlite3DbMallocSize(pMem->db, pMem->z)>=nByte ); + memset(pMem->z, 0, nByte); + pMem->n = n>0?n:0; + pMem->flags = MEM_Blob; + pMem->enc = SQLITE_UTF8; + return SQLITE_OK; +} +#endif + +/* +** The pMem is known to contain content that needs to be destroyed prior +** to a value change. So invoke the destructor, then set the value to +** a 64-bit integer. +*/ +static SQLITE_NOINLINE void vdbeReleaseAndSetInt64(Mem *pMem, i64 val){ + sqlite3VdbeMemSetNull(pMem); + pMem->u.i = val; + pMem->flags = MEM_Int; +} + +/* +** Delete any previous value and set the value stored in *pMem to val, +** manifest type INTEGER. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemSetInt64(Mem *pMem, i64 val){ + if( VdbeMemDynamic(pMem) ){ + vdbeReleaseAndSetInt64(pMem, val); + }else{ + pMem->u.i = val; + pMem->flags = MEM_Int; + } +} + +/* A no-op destructor */ +SQLITE_PRIVATE void sqlite3NoopDestructor(void *p){ UNUSED_PARAMETER(p); } + +/* +** Set the value stored in *pMem should already be a NULL. +** Also store a pointer to go with it. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemSetPointer( + Mem *pMem, + void *pPtr, + const char *zPType, + void (*xDestructor)(void*) +){ + assert( pMem->flags==MEM_Null ); + vdbeMemClear(pMem); + pMem->u.zPType = zPType ? zPType : ""; + pMem->z = pPtr; + pMem->flags = MEM_Null|MEM_Dyn|MEM_Subtype|MEM_Term; + pMem->eSubtype = 'p'; + pMem->xDel = xDestructor ? xDestructor : sqlite3NoopDestructor; +} + +#ifndef SQLITE_OMIT_FLOATING_POINT +/* +** Delete any previous value and set the value stored in *pMem to val, +** manifest type REAL. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemSetDouble(Mem *pMem, double val){ + sqlite3VdbeMemSetNull(pMem); + if( !sqlite3IsNaN(val) ){ + pMem->u.r = val; + pMem->flags = MEM_Real; + } +} +#endif + +#ifdef SQLITE_DEBUG +/* +** Return true if the Mem holds a RowSet object. This routine is intended +** for use inside of assert() statements. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemIsRowSet(const Mem *pMem){ + return (pMem->flags&(MEM_Blob|MEM_Dyn))==(MEM_Blob|MEM_Dyn) + && pMem->xDel==sqlite3RowSetDelete; +} +#endif + +/* +** Delete any previous value and set the value of pMem to be an +** empty boolean index. +** +** Return SQLITE_OK on success and SQLITE_NOMEM if a memory allocation +** error occurs. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemSetRowSet(Mem *pMem){ + sqlite3 *db = pMem->db; + RowSet *p; + assert( db!=0 ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + sqlite3VdbeMemRelease(pMem); + p = sqlite3RowSetInit(db); + if( p==0 ) return SQLITE_NOMEM; + pMem->z = (char*)p; + pMem->flags = MEM_Blob|MEM_Dyn; + pMem->xDel = sqlite3RowSetDelete; + return SQLITE_OK; +} + +/* +** Return true if the Mem object contains a TEXT or BLOB that is +** too large - whose size exceeds SQLITE_MAX_LENGTH. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemTooBig(Mem *p){ + assert( p->db!=0 ); + if( p->flags & (MEM_Str|MEM_Blob) ){ + int n = p->n; + if( p->flags & MEM_Zero ){ + n += p->u.nZero; + } + return n>p->db->aLimit[SQLITE_LIMIT_LENGTH]; + } + return 0; +} + +#ifdef SQLITE_DEBUG +/* +** This routine prepares a memory cell for modification by breaking +** its link to a shallow copy and by marking any current shallow +** copies of this cell as invalid. +** +** This is used for testing and debugging only - to help ensure that shallow +** copies (created by OP_SCopy) are not misused. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe *pVdbe, Mem *pMem){ + int i; + Mem *pX; + for(i=1, pX=pVdbe->aMem+1; inMem; i++, pX++){ + if( pX->pScopyFrom==pMem ){ + u16 mFlags; + if( pVdbe->db->flags & SQLITE_VdbeTrace ){ + sqlite3DebugPrintf("Invalidate R[%d] due to change in R[%d]\n", + (int)(pX - pVdbe->aMem), (int)(pMem - pVdbe->aMem)); + } + /* If pX is marked as a shallow copy of pMem, then try to verify that + ** no significant changes have been made to pX since the OP_SCopy. + ** A significant change would indicated a missed call to this + ** function for pX. Minor changes, such as adding or removing a + ** dual type, are allowed, as long as the underlying value is the + ** same. */ + mFlags = pMem->flags & pX->flags & pX->mScopyFlags; + assert( (mFlags&(MEM_Int|MEM_IntReal))==0 || pMem->u.i==pX->u.i ); + + /* pMem is the register that is changing. But also mark pX as + ** undefined so that we can quickly detect the shallow-copy error */ + pX->flags = MEM_Undefined; + pX->pScopyFrom = 0; + } + } + pMem->pScopyFrom = 0; +} +#endif /* SQLITE_DEBUG */ + +/* +** Make an shallow copy of pFrom into pTo. Prior contents of +** pTo are freed. The pFrom->z field is not duplicated. If +** pFrom->z is used, then pTo->z points to the same thing as pFrom->z +** and flags gets srcType (either MEM_Ephem or MEM_Static). +*/ +static SQLITE_NOINLINE void vdbeClrCopy(Mem *pTo, const Mem *pFrom, int eType){ + vdbeMemClearExternAndSetNull(pTo); + assert( !VdbeMemDynamic(pTo) ); + sqlite3VdbeMemShallowCopy(pTo, pFrom, eType); +} +SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){ + assert( !sqlite3VdbeMemIsRowSet(pFrom) ); + assert( pTo->db==pFrom->db ); + if( VdbeMemDynamic(pTo) ){ vdbeClrCopy(pTo,pFrom,srcType); return; } + memcpy(pTo, pFrom, MEMCELLSIZE); + if( (pFrom->flags&MEM_Static)==0 ){ + pTo->flags &= ~(MEM_Dyn|MEM_Static|MEM_Ephem); + assert( srcType==MEM_Ephem || srcType==MEM_Static ); + pTo->flags |= srcType; + } +} + +/* +** Make a full copy of pFrom into pTo. Prior contents of pTo are +** freed before the copy is made. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){ + int rc = SQLITE_OK; + + assert( !sqlite3VdbeMemIsRowSet(pFrom) ); + if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo); + memcpy(pTo, pFrom, MEMCELLSIZE); + pTo->flags &= ~MEM_Dyn; + if( pTo->flags&(MEM_Str|MEM_Blob) ){ + if( 0==(pFrom->flags&MEM_Static) ){ + pTo->flags |= MEM_Ephem; + rc = sqlite3VdbeMemMakeWriteable(pTo); + } + } + + return rc; +} + +/* +** Transfer the contents of pFrom to pTo. Any existing value in pTo is +** freed. If pFrom contains ephemeral data, a copy is made. +** +** pFrom contains an SQL NULL when this routine returns. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemMove(Mem *pTo, Mem *pFrom){ + assert( pFrom->db==0 || sqlite3_mutex_held(pFrom->db->mutex) ); + assert( pTo->db==0 || sqlite3_mutex_held(pTo->db->mutex) ); + assert( pFrom->db==0 || pTo->db==0 || pFrom->db==pTo->db ); + + sqlite3VdbeMemRelease(pTo); + memcpy(pTo, pFrom, sizeof(Mem)); + pFrom->flags = MEM_Null; + pFrom->szMalloc = 0; +} + +/* +** Change the value of a Mem to be a string or a BLOB. +** +** The memory management strategy depends on the value of the xDel +** parameter. If the value passed is SQLITE_TRANSIENT, then the +** string is copied into a (possibly existing) buffer managed by the +** Mem structure. Otherwise, any existing buffer is freed and the +** pointer copied. +** +** If the string is too large (if it exceeds the SQLITE_LIMIT_LENGTH +** size limit) then no memory allocation occurs. If the string can be +** stored without allocating memory, then it is. If a memory allocation +** is required to store the string, then value of pMem is unchanged. In +** either case, SQLITE_TOOBIG is returned. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemSetStr( + Mem *pMem, /* Memory cell to set to string value */ + const char *z, /* String pointer */ + i64 n, /* Bytes in string, or negative */ + u8 enc, /* Encoding of z. 0 for BLOBs */ + void (*xDel)(void*) /* Destructor function */ +){ + i64 nByte = n; /* New value for pMem->n */ + int iLimit; /* Maximum allowed string or blob size */ + u16 flags = 0; /* New value for pMem->flags */ + + assert( pMem!=0 ); + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + + /* If z is a NULL pointer, set pMem to contain an SQL NULL. */ + if( !z ){ + sqlite3VdbeMemSetNull(pMem); + return SQLITE_OK; + } + + if( pMem->db ){ + iLimit = pMem->db->aLimit[SQLITE_LIMIT_LENGTH]; + }else{ + iLimit = SQLITE_MAX_LENGTH; + } + flags = (enc==0?MEM_Blob:MEM_Str); + if( nByte<0 ){ + assert( enc!=0 ); + if( enc==SQLITE_UTF8 ){ + nByte = strlen(z); + }else{ + for(nByte=0; nByte<=iLimit && (z[nByte] | z[nByte+1]); nByte+=2){} + } + flags |= MEM_Term; + } + + /* The following block sets the new values of Mem.z and Mem.xDel. It + ** also sets a flag in local variable "flags" to indicate the memory + ** management (one of MEM_Dyn or MEM_Static). + */ + if( xDel==SQLITE_TRANSIENT ){ + i64 nAlloc = nByte; + if( flags&MEM_Term ){ + nAlloc += (enc==SQLITE_UTF8?1:2); + } + if( nByte>iLimit ){ + return sqlite3ErrorToParser(pMem->db, SQLITE_TOOBIG); + } + testcase( nAlloc==0 ); + testcase( nAlloc==31 ); + testcase( nAlloc==32 ); + if( sqlite3VdbeMemClearAndResize(pMem, (int)MAX(nAlloc,32)) ){ + return SQLITE_NOMEM_BKPT; + } + memcpy(pMem->z, z, nAlloc); + }else{ + sqlite3VdbeMemRelease(pMem); + pMem->z = (char *)z; + if( xDel==SQLITE_DYNAMIC ){ + pMem->zMalloc = pMem->z; + pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->zMalloc); + }else{ + pMem->xDel = xDel; + flags |= ((xDel==SQLITE_STATIC)?MEM_Static:MEM_Dyn); + } + } + + pMem->n = (int)(nByte & 0x7fffffff); + pMem->flags = flags; + if( enc ){ + pMem->enc = enc; +#ifdef SQLITE_ENABLE_SESSION + }else if( pMem->db==0 ){ + pMem->enc = SQLITE_UTF8; +#endif + }else{ + assert( pMem->db!=0 ); + pMem->enc = ENC(pMem->db); + } + +#ifndef SQLITE_OMIT_UTF16 + if( enc>SQLITE_UTF8 && sqlite3VdbeMemHandleBom(pMem) ){ + return SQLITE_NOMEM_BKPT; + } +#endif + + if( nByte>iLimit ){ + return sqlite3ErrorToParser(pMem->db, SQLITE_TOOBIG); + } + + return SQLITE_OK; +} + +/* +** Move data out of a btree key or data field and into a Mem structure. +** The data is payload from the entry that pCur is currently pointing +** to. offset and amt determine what portion of the data or key to retrieve. +** The result is written into the pMem element. +** +** The pMem object must have been initialized. This routine will use +** pMem->zMalloc to hold the content from the btree, if possible. New +** pMem->zMalloc space will be allocated if necessary. The calling routine +** is responsible for making sure that the pMem object is eventually +** destroyed. +** +** If this routine fails for any reason (malloc returns NULL or unable +** to read from the disk) then the pMem is left in an inconsistent state. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( + BtCursor *pCur, /* Cursor pointing at record to retrieve. */ + u32 offset, /* Offset from the start of data to return bytes from. */ + u32 amt, /* Number of bytes to return. */ + Mem *pMem /* OUT: Return data in this Mem structure. */ +){ + int rc; + pMem->flags = MEM_Null; + if( sqlite3BtreeMaxRecordSize(pCur)z); + if( rc==SQLITE_OK ){ + pMem->z[amt] = 0; /* Overrun area used when reading malformed records */ + pMem->flags = MEM_Blob; + pMem->n = (int)amt; + }else{ + sqlite3VdbeMemRelease(pMem); + } + } + return rc; +} +SQLITE_PRIVATE int sqlite3VdbeMemFromBtreeZeroOffset( + BtCursor *pCur, /* Cursor pointing at record to retrieve. */ + u32 amt, /* Number of bytes to return. */ + Mem *pMem /* OUT: Return data in this Mem structure. */ +){ + u32 available = 0; /* Number of bytes available on the local btree page */ + int rc = SQLITE_OK; /* Return code */ + + assert( sqlite3BtreeCursorIsValid(pCur) ); + assert( !VdbeMemDynamic(pMem) ); + + /* Note: the calls to BtreeKeyFetch() and DataFetch() below assert() + ** that both the BtShared and database handle mutexes are held. */ + assert( !sqlite3VdbeMemIsRowSet(pMem) ); + pMem->z = (char *)sqlite3BtreePayloadFetch(pCur, &available); + assert( pMem->z!=0 ); + + if( amt<=available ){ + pMem->flags = MEM_Blob|MEM_Ephem; + pMem->n = (int)amt; + }else{ + rc = sqlite3VdbeMemFromBtree(pCur, 0, amt, pMem); + } + + return rc; +} + +/* +** The pVal argument is known to be a value other than NULL. +** Convert it into a string with encoding enc and return a pointer +** to a zero-terminated version of that string. +*/ +static SQLITE_NOINLINE const void *valueToText(sqlite3_value* pVal, u8 enc){ + assert( pVal!=0 ); + assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) ); + assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); + assert( !sqlite3VdbeMemIsRowSet(pVal) ); + assert( (pVal->flags & (MEM_Null))==0 ); + if( pVal->flags & (MEM_Blob|MEM_Str) ){ + if( ExpandBlob(pVal) ) return 0; + pVal->flags |= MEM_Str; + if( pVal->enc != (enc & ~SQLITE_UTF16_ALIGNED) ){ + sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED); + } + if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){ + assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 ); + if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){ + return 0; + } + } + sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */ + }else{ + sqlite3VdbeMemStringify(pVal, enc, 0); + assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) ); + } + assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->db==0 + || pVal->db->mallocFailed ); + if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){ + assert( sqlite3VdbeMemValidStrRep(pVal) ); + return pVal->z; + }else{ + return 0; + } +} + +/* This function is only available internally, it is not part of the +** external API. It works in a similar way to sqlite3_value_text(), +** except the data returned is in the encoding specified by the second +** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or +** SQLITE_UTF8. +** +** (2006-02-16:) The enc value can be or-ed with SQLITE_UTF16_ALIGNED. +** If that is the case, then the result must be aligned on an even byte +** boundary. +*/ +SQLITE_PRIVATE const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){ + if( !pVal ) return 0; + assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) ); + assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); + assert( !sqlite3VdbeMemIsRowSet(pVal) ); + if( (pVal->flags&(MEM_Str|MEM_Term))==(MEM_Str|MEM_Term) && pVal->enc==enc ){ + assert( sqlite3VdbeMemValidStrRep(pVal) ); + return pVal->z; + } + if( pVal->flags&MEM_Null ){ + return 0; + } + return valueToText(pVal, enc); +} + +/* +** Create a new sqlite3_value object. +*/ +SQLITE_PRIVATE sqlite3_value *sqlite3ValueNew(sqlite3 *db){ + Mem *p = sqlite3DbMallocZero(db, sizeof(*p)); + if( p ){ + p->flags = MEM_Null; + p->db = db; + } + return p; +} + +/* +** Context object passed by sqlite3Stat4ProbeSetValue() through to +** valueNew(). See comments above valueNew() for details. +*/ +struct ValueNewStat4Ctx { + Parse *pParse; + Index *pIdx; + UnpackedRecord **ppRec; + int iVal; +}; + +/* +** Allocate and return a pointer to a new sqlite3_value object. If +** the second argument to this function is NULL, the object is allocated +** by calling sqlite3ValueNew(). +** +** Otherwise, if the second argument is non-zero, then this function is +** being called indirectly by sqlite3Stat4ProbeSetValue(). If it has not +** already been allocated, allocate the UnpackedRecord structure that +** that function will return to its caller here. Then return a pointer to +** an sqlite3_value within the UnpackedRecord.a[] array. +*/ +static sqlite3_value *valueNew(sqlite3 *db, struct ValueNewStat4Ctx *p){ +#ifdef SQLITE_ENABLE_STAT4 + if( p ){ + UnpackedRecord *pRec = p->ppRec[0]; + + if( pRec==0 ){ + Index *pIdx = p->pIdx; /* Index being probed */ + int nByte; /* Bytes of space to allocate */ + int i; /* Counter variable */ + int nCol = pIdx->nColumn; /* Number of index columns including rowid */ + + nByte = sizeof(Mem) * nCol + ROUND8(sizeof(UnpackedRecord)); + pRec = (UnpackedRecord*)sqlite3DbMallocZero(db, nByte); + if( pRec ){ + pRec->pKeyInfo = sqlite3KeyInfoOfIndex(p->pParse, pIdx); + if( pRec->pKeyInfo ){ + assert( pRec->pKeyInfo->nAllField==nCol ); + assert( pRec->pKeyInfo->enc==ENC(db) ); + pRec->aMem = (Mem *)((u8*)pRec + ROUND8(sizeof(UnpackedRecord))); + for(i=0; iaMem[i].flags = MEM_Null; + pRec->aMem[i].db = db; + } + }else{ + sqlite3DbFreeNN(db, pRec); + pRec = 0; + } + } + if( pRec==0 ) return 0; + p->ppRec[0] = pRec; + } + + pRec->nField = p->iVal+1; + return &pRec->aMem[p->iVal]; + } +#else + UNUSED_PARAMETER(p); +#endif /* defined(SQLITE_ENABLE_STAT4) */ + return sqlite3ValueNew(db); +} + +/* +** The expression object indicated by the second argument is guaranteed +** to be a scalar SQL function. If +** +** * all function arguments are SQL literals, +** * one of the SQLITE_FUNC_CONSTANT or _SLOCHNG function flags is set, and +** * the SQLITE_FUNC_NEEDCOLL function flag is not set, +** +** then this routine attempts to invoke the SQL function. Assuming no +** error occurs, output parameter (*ppVal) is set to point to a value +** object containing the result before returning SQLITE_OK. +** +** Affinity aff is applied to the result of the function before returning. +** If the result is a text value, the sqlite3_value object uses encoding +** enc. +** +** If the conditions above are not met, this function returns SQLITE_OK +** and sets (*ppVal) to NULL. Or, if an error occurs, (*ppVal) is set to +** NULL and an SQLite error code returned. +*/ +#ifdef SQLITE_ENABLE_STAT4 +static int valueFromFunction( + sqlite3 *db, /* The database connection */ + const Expr *p, /* The expression to evaluate */ + u8 enc, /* Encoding to use */ + u8 aff, /* Affinity to use */ + sqlite3_value **ppVal, /* Write the new value here */ + struct ValueNewStat4Ctx *pCtx /* Second argument for valueNew() */ +){ + sqlite3_context ctx; /* Context object for function invocation */ + sqlite3_value **apVal = 0; /* Function arguments */ + int nVal = 0; /* Size of apVal[] array */ + FuncDef *pFunc = 0; /* Function definition */ + sqlite3_value *pVal = 0; /* New value */ + int rc = SQLITE_OK; /* Return code */ + ExprList *pList = 0; /* Function arguments */ + int i; /* Iterator variable */ + + assert( pCtx!=0 ); + assert( (p->flags & EP_TokenOnly)==0 ); + assert( ExprUseXList(p) ); + pList = p->x.pList; + if( pList ) nVal = pList->nExpr; + assert( !ExprHasProperty(p, EP_IntValue) ); + pFunc = sqlite3FindFunction(db, p->u.zToken, nVal, enc, 0); + assert( pFunc ); + if( (pFunc->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG))==0 + || (pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL) + ){ + return SQLITE_OK; + } + + if( pList ){ + apVal = (sqlite3_value**)sqlite3DbMallocZero(db, sizeof(apVal[0]) * nVal); + if( apVal==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto value_from_function_out; + } + for(i=0; ia[i].pExpr, enc, aff, &apVal[i]); + if( apVal[i]==0 || rc!=SQLITE_OK ) goto value_from_function_out; + } + } + + pVal = valueNew(db, pCtx); + if( pVal==0 ){ + rc = SQLITE_NOMEM_BKPT; + goto value_from_function_out; + } + + assert( pCtx->pParse->rc==SQLITE_OK ); + memset(&ctx, 0, sizeof(ctx)); + ctx.pOut = pVal; + ctx.pFunc = pFunc; + pFunc->xSFunc(&ctx, nVal, apVal); + if( ctx.isError ){ + rc = ctx.isError; + sqlite3ErrorMsg(pCtx->pParse, "%s", sqlite3_value_text(pVal)); + }else{ + sqlite3ValueApplyAffinity(pVal, aff, SQLITE_UTF8); + assert( rc==SQLITE_OK ); + rc = sqlite3VdbeChangeEncoding(pVal, enc); + if( rc==SQLITE_OK && sqlite3VdbeMemTooBig(pVal) ){ + rc = SQLITE_TOOBIG; + pCtx->pParse->nErr++; + } + } + pCtx->pParse->rc = rc; + + value_from_function_out: + if( rc!=SQLITE_OK ){ + pVal = 0; + } + if( apVal ){ + for(i=0; iop)==TK_UPLUS || op==TK_SPAN ) pExpr = pExpr->pLeft; + if( op==TK_REGISTER ) op = pExpr->op2; + + /* Compressed expressions only appear when parsing the DEFAULT clause + ** on a table column definition, and hence only when pCtx==0. This + ** check ensures that an EP_TokenOnly expression is never passed down + ** into valueFromFunction(). */ + assert( (pExpr->flags & EP_TokenOnly)==0 || pCtx==0 ); + + if( op==TK_CAST ){ + u8 aff; + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + aff = sqlite3AffinityType(pExpr->u.zToken,0); + rc = valueFromExpr(db, pExpr->pLeft, enc, aff, ppVal, pCtx); + testcase( rc!=SQLITE_OK ); + if( *ppVal ){ + sqlite3VdbeMemCast(*ppVal, aff, SQLITE_UTF8); + sqlite3ValueApplyAffinity(*ppVal, affinity, SQLITE_UTF8); + } + return rc; + } + + /* Handle negative integers in a single step. This is needed in the + ** case when the value is -9223372036854775808. + */ + if( op==TK_UMINUS + && (pExpr->pLeft->op==TK_INTEGER || pExpr->pLeft->op==TK_FLOAT) ){ + pExpr = pExpr->pLeft; + op = pExpr->op; + negInt = -1; + zNeg = "-"; + } + + if( op==TK_STRING || op==TK_FLOAT || op==TK_INTEGER ){ + pVal = valueNew(db, pCtx); + if( pVal==0 ) goto no_mem; + if( ExprHasProperty(pExpr, EP_IntValue) ){ + sqlite3VdbeMemSetInt64(pVal, (i64)pExpr->u.iValue*negInt); + }else{ + zVal = sqlite3MPrintf(db, "%s%s", zNeg, pExpr->u.zToken); + if( zVal==0 ) goto no_mem; + sqlite3ValueSetStr(pVal, -1, zVal, SQLITE_UTF8, SQLITE_DYNAMIC); + } + if( (op==TK_INTEGER || op==TK_FLOAT ) && affinity==SQLITE_AFF_BLOB ){ + sqlite3ValueApplyAffinity(pVal, SQLITE_AFF_NUMERIC, SQLITE_UTF8); + }else{ + sqlite3ValueApplyAffinity(pVal, affinity, SQLITE_UTF8); + } + assert( (pVal->flags & MEM_IntReal)==0 ); + if( pVal->flags & (MEM_Int|MEM_IntReal|MEM_Real) ){ + testcase( pVal->flags & MEM_Int ); + testcase( pVal->flags & MEM_Real ); + pVal->flags &= ~MEM_Str; + } + if( enc!=SQLITE_UTF8 ){ + rc = sqlite3VdbeChangeEncoding(pVal, enc); + } + }else if( op==TK_UMINUS ) { + /* This branch happens for multiple negative signs. Ex: -(-5) */ + if( SQLITE_OK==valueFromExpr(db,pExpr->pLeft,enc,affinity,&pVal,pCtx) + && pVal!=0 + ){ + sqlite3VdbeMemNumerify(pVal); + if( pVal->flags & MEM_Real ){ + pVal->u.r = -pVal->u.r; + }else if( pVal->u.i==SMALLEST_INT64 ){ +#ifndef SQLITE_OMIT_FLOATING_POINT + pVal->u.r = -(double)SMALLEST_INT64; +#else + pVal->u.r = LARGEST_INT64; +#endif + MemSetTypeFlag(pVal, MEM_Real); + }else{ + pVal->u.i = -pVal->u.i; + } + sqlite3ValueApplyAffinity(pVal, affinity, enc); + } + }else if( op==TK_NULL ){ + pVal = valueNew(db, pCtx); + if( pVal==0 ) goto no_mem; + sqlite3VdbeMemSetNull(pVal); + } +#ifndef SQLITE_OMIT_BLOB_LITERAL + else if( op==TK_BLOB ){ + int nVal; + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + assert( pExpr->u.zToken[0]=='x' || pExpr->u.zToken[0]=='X' ); + assert( pExpr->u.zToken[1]=='\'' ); + pVal = valueNew(db, pCtx); + if( !pVal ) goto no_mem; + zVal = &pExpr->u.zToken[2]; + nVal = sqlite3Strlen30(zVal)-1; + assert( zVal[nVal]=='\'' ); + sqlite3VdbeMemSetStr(pVal, sqlite3HexToBlob(db, zVal, nVal), nVal/2, + 0, SQLITE_DYNAMIC); + } +#endif +#ifdef SQLITE_ENABLE_STAT4 + else if( op==TK_FUNCTION && pCtx!=0 ){ + rc = valueFromFunction(db, pExpr, enc, affinity, &pVal, pCtx); + } +#endif + else if( op==TK_TRUEFALSE ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + pVal = valueNew(db, pCtx); + if( pVal ){ + pVal->flags = MEM_Int; + pVal->u.i = pExpr->u.zToken[4]==0; + } + } + + *ppVal = pVal; + return rc; + +no_mem: +#ifdef SQLITE_ENABLE_STAT4 + if( pCtx==0 || NEVER(pCtx->pParse->nErr==0) ) +#endif + sqlite3OomFault(db); + sqlite3DbFree(db, zVal); + assert( *ppVal==0 ); +#ifdef SQLITE_ENABLE_STAT4 + if( pCtx==0 ) sqlite3ValueFree(pVal); +#else + assert( pCtx==0 ); sqlite3ValueFree(pVal); +#endif + return SQLITE_NOMEM_BKPT; +} + +/* +** Create a new sqlite3_value object, containing the value of pExpr. +** +** This only works for very simple expressions that consist of one constant +** token (i.e. "5", "5.1", "'a string'"). If the expression can +** be converted directly into a value, then the value is allocated and +** a pointer written to *ppVal. The caller is responsible for deallocating +** the value by passing it to sqlite3ValueFree() later on. If the expression +** cannot be converted to a value, then *ppVal is set to NULL. +*/ +SQLITE_PRIVATE int sqlite3ValueFromExpr( + sqlite3 *db, /* The database connection */ + const Expr *pExpr, /* The expression to evaluate */ + u8 enc, /* Encoding to use */ + u8 affinity, /* Affinity to use */ + sqlite3_value **ppVal /* Write the new value here */ +){ + return pExpr ? valueFromExpr(db, pExpr, enc, affinity, ppVal, 0) : 0; +} + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Attempt to extract a value from pExpr and use it to construct *ppVal. +** +** If pAlloc is not NULL, then an UnpackedRecord object is created for +** pAlloc if one does not exist and the new value is added to the +** UnpackedRecord object. +** +** A value is extracted in the following cases: +** +** * (pExpr==0). In this case the value is assumed to be an SQL NULL, +** +** * The expression is a bound variable, and this is a reprepare, or +** +** * The expression is a literal value. +** +** On success, *ppVal is made to point to the extracted value. The caller +** is responsible for ensuring that the value is eventually freed. +*/ +static int stat4ValueFromExpr( + Parse *pParse, /* Parse context */ + Expr *pExpr, /* The expression to extract a value from */ + u8 affinity, /* Affinity to use */ + struct ValueNewStat4Ctx *pAlloc,/* How to allocate space. Or NULL */ + sqlite3_value **ppVal /* OUT: New value object (or NULL) */ +){ + int rc = SQLITE_OK; + sqlite3_value *pVal = 0; + sqlite3 *db = pParse->db; + + /* Skip over any TK_COLLATE nodes */ + pExpr = sqlite3ExprSkipCollate(pExpr); + + assert( pExpr==0 || pExpr->op!=TK_REGISTER || pExpr->op2!=TK_VARIABLE ); + if( !pExpr ){ + pVal = valueNew(db, pAlloc); + if( pVal ){ + sqlite3VdbeMemSetNull((Mem*)pVal); + } + }else if( pExpr->op==TK_VARIABLE && (db->flags & SQLITE_EnableQPSG)==0 ){ + Vdbe *v; + int iBindVar = pExpr->iColumn; + sqlite3VdbeSetVarmask(pParse->pVdbe, iBindVar); + if( (v = pParse->pReprepare)!=0 ){ + pVal = valueNew(db, pAlloc); + if( pVal ){ + rc = sqlite3VdbeMemCopy((Mem*)pVal, &v->aVar[iBindVar-1]); + sqlite3ValueApplyAffinity(pVal, affinity, ENC(db)); + pVal->db = pParse->db; + } + } + }else{ + rc = valueFromExpr(db, pExpr, ENC(db), affinity, &pVal, pAlloc); + } + + assert( pVal==0 || pVal->db==db ); + *ppVal = pVal; + return rc; +} + +/* +** This function is used to allocate and populate UnpackedRecord +** structures intended to be compared against sample index keys stored +** in the sqlite_stat4 table. +** +** A single call to this function populates zero or more fields of the +** record starting with field iVal (fields are numbered from left to +** right starting with 0). A single field is populated if: +** +** * (pExpr==0). In this case the value is assumed to be an SQL NULL, +** +** * The expression is a bound variable, and this is a reprepare, or +** +** * The sqlite3ValueFromExpr() function is able to extract a value +** from the expression (i.e. the expression is a literal value). +** +** Or, if pExpr is a TK_VECTOR, one field is populated for each of the +** vector components that match either of the two latter criteria listed +** above. +** +** Before any value is appended to the record, the affinity of the +** corresponding column within index pIdx is applied to it. Before +** this function returns, output parameter *pnExtract is set to the +** number of values appended to the record. +** +** When this function is called, *ppRec must either point to an object +** allocated by an earlier call to this function, or must be NULL. If it +** is NULL and a value can be successfully extracted, a new UnpackedRecord +** is allocated (and *ppRec set to point to it) before returning. +** +** Unless an error is encountered, SQLITE_OK is returned. It is not an +** error if a value cannot be extracted from pExpr. If an error does +** occur, an SQLite error code is returned. +*/ +SQLITE_PRIVATE int sqlite3Stat4ProbeSetValue( + Parse *pParse, /* Parse context */ + Index *pIdx, /* Index being probed */ + UnpackedRecord **ppRec, /* IN/OUT: Probe record */ + Expr *pExpr, /* The expression to extract a value from */ + int nElem, /* Maximum number of values to append */ + int iVal, /* Array element to populate */ + int *pnExtract /* OUT: Values appended to the record */ +){ + int rc = SQLITE_OK; + int nExtract = 0; + + if( pExpr==0 || pExpr->op!=TK_SELECT ){ + int i; + struct ValueNewStat4Ctx alloc; + + alloc.pParse = pParse; + alloc.pIdx = pIdx; + alloc.ppRec = ppRec; + + for(i=0; idb, pIdx, iVal+i); + alloc.iVal = iVal+i; + rc = stat4ValueFromExpr(pParse, pElem, aff, &alloc, &pVal); + if( !pVal ) break; + nExtract++; + } + } + + *pnExtract = nExtract; + return rc; +} + +/* +** Attempt to extract a value from expression pExpr using the methods +** as described for sqlite3Stat4ProbeSetValue() above. +** +** If successful, set *ppVal to point to a new value object and return +** SQLITE_OK. If no value can be extracted, but no other error occurs +** (e.g. OOM), return SQLITE_OK and set *ppVal to NULL. Or, if an error +** does occur, return an SQLite error code. The final value of *ppVal +** is undefined in this case. +*/ +SQLITE_PRIVATE int sqlite3Stat4ValueFromExpr( + Parse *pParse, /* Parse context */ + Expr *pExpr, /* The expression to extract a value from */ + u8 affinity, /* Affinity to use */ + sqlite3_value **ppVal /* OUT: New value object (or NULL) */ +){ + return stat4ValueFromExpr(pParse, pExpr, affinity, 0, ppVal); +} + +/* +** Extract the iCol-th column from the nRec-byte record in pRec. Write +** the column value into *ppVal. If *ppVal is initially NULL then a new +** sqlite3_value object is allocated. +** +** If *ppVal is initially NULL then the caller is responsible for +** ensuring that the value written into *ppVal is eventually freed. +*/ +SQLITE_PRIVATE int sqlite3Stat4Column( + sqlite3 *db, /* Database handle */ + const void *pRec, /* Pointer to buffer containing record */ + int nRec, /* Size of buffer pRec in bytes */ + int iCol, /* Column to extract */ + sqlite3_value **ppVal /* OUT: Extracted value */ +){ + u32 t = 0; /* a column type code */ + int nHdr; /* Size of the header in the record */ + int iHdr; /* Next unread header byte */ + int iField; /* Next unread data byte */ + int szField = 0; /* Size of the current data field */ + int i; /* Column index */ + u8 *a = (u8*)pRec; /* Typecast byte array */ + Mem *pMem = *ppVal; /* Write result into this Mem object */ + + assert( iCol>0 ); + iHdr = getVarint32(a, nHdr); + if( nHdr>nRec || iHdr>=nHdr ) return SQLITE_CORRUPT_BKPT; + iField = nHdr; + for(i=0; i<=iCol; i++){ + iHdr += getVarint32(&a[iHdr], t); + testcase( iHdr==nHdr ); + testcase( iHdr==nHdr+1 ); + if( iHdr>nHdr ) return SQLITE_CORRUPT_BKPT; + szField = sqlite3VdbeSerialTypeLen(t); + iField += szField; + } + testcase( iField==nRec ); + testcase( iField==nRec+1 ); + if( iField>nRec ) return SQLITE_CORRUPT_BKPT; + if( pMem==0 ){ + pMem = *ppVal = sqlite3ValueNew(db); + if( pMem==0 ) return SQLITE_NOMEM_BKPT; + } + sqlite3VdbeSerialGet(&a[iField-szField], t, pMem); + pMem->enc = ENC(db); + return SQLITE_OK; +} + +/* +** Unless it is NULL, the argument must be an UnpackedRecord object returned +** by an earlier call to sqlite3Stat4ProbeSetValue(). This call deletes +** the object. +*/ +SQLITE_PRIVATE void sqlite3Stat4ProbeFree(UnpackedRecord *pRec){ + if( pRec ){ + int i; + int nCol = pRec->pKeyInfo->nAllField; + Mem *aMem = pRec->aMem; + sqlite3 *db = aMem[0].db; + for(i=0; ipKeyInfo); + sqlite3DbFreeNN(db, pRec); + } +} +#endif /* ifdef SQLITE_ENABLE_STAT4 */ + +/* +** Change the string value of an sqlite3_value object +*/ +SQLITE_PRIVATE void sqlite3ValueSetStr( + sqlite3_value *v, /* Value to be set */ + int n, /* Length of string z */ + const void *z, /* Text of the new string */ + u8 enc, /* Encoding to use */ + void (*xDel)(void*) /* Destructor for the string */ +){ + if( v ) sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, xDel); +} + +/* +** Free an sqlite3_value object +*/ +SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value *v){ + if( !v ) return; + sqlite3VdbeMemRelease((Mem *)v); + sqlite3DbFreeNN(((Mem*)v)->db, v); +} + +/* +** The sqlite3ValueBytes() routine returns the number of bytes in the +** sqlite3_value object assuming that it uses the encoding "enc". +** The valueBytes() routine is a helper function. +*/ +static SQLITE_NOINLINE int valueBytes(sqlite3_value *pVal, u8 enc){ + return valueToText(pVal, enc)!=0 ? pVal->n : 0; +} +SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ + Mem *p = (Mem*)pVal; + assert( (p->flags & MEM_Null)==0 || (p->flags & (MEM_Str|MEM_Blob))==0 ); + if( (p->flags & MEM_Str)!=0 && pVal->enc==enc ){ + return p->n; + } + if( (p->flags & MEM_Blob)!=0 ){ + if( p->flags & MEM_Zero ){ + return p->n + p->u.nZero; + }else{ + return p->n; + } + } + if( p->flags & MEM_Null ) return 0; + return valueBytes(pVal, enc); +} + +/************** End of vdbemem.c *********************************************/ +/************** Begin file vdbeaux.c *****************************************/ +/* +** 2003 September 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used for creating, destroying, and populating +** a VDBE (or an "sqlite3_stmt" as it is known to the outside world.) +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +/* Forward references */ +static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef); +static void vdbeFreeOpArray(sqlite3 *, Op *, int); + +/* +** Create a new virtual database engine. +*/ +SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse *pParse){ + sqlite3 *db = pParse->db; + Vdbe *p; + p = sqlite3DbMallocRawNN(db, sizeof(Vdbe) ); + if( p==0 ) return 0; + memset(&p->aOp, 0, sizeof(Vdbe)-offsetof(Vdbe,aOp)); + p->db = db; + if( db->pVdbe ){ + db->pVdbe->pPrev = p; + } + p->pNext = db->pVdbe; + p->pPrev = 0; + db->pVdbe = p; + p->iVdbeMagic = VDBE_MAGIC_INIT; + p->pParse = pParse; + pParse->pVdbe = p; + assert( pParse->aLabel==0 ); + assert( pParse->nLabel==0 ); + assert( p->nOpAlloc==0 ); + assert( pParse->szOpAlloc==0 ); + sqlite3VdbeAddOp2(p, OP_Init, 0, 1); + return p; +} + +/* +** Return the Parse object that owns a Vdbe object. +*/ +SQLITE_PRIVATE Parse *sqlite3VdbeParser(Vdbe *p){ + return p->pParse; +} + +/* +** Change the error string stored in Vdbe.zErrMsg +*/ +SQLITE_PRIVATE void sqlite3VdbeError(Vdbe *p, const char *zFormat, ...){ + va_list ap; + sqlite3DbFree(p->db, p->zErrMsg); + va_start(ap, zFormat); + p->zErrMsg = sqlite3VMPrintf(p->db, zFormat, ap); + va_end(ap); +} + +/* +** Remember the SQL string for a prepared statement. +*/ +SQLITE_PRIVATE void sqlite3VdbeSetSql(Vdbe *p, const char *z, int n, u8 prepFlags){ + if( p==0 ) return; + p->prepFlags = prepFlags; + if( (prepFlags & SQLITE_PREPARE_SAVESQL)==0 ){ + p->expmask = 0; + } + assert( p->zSql==0 ); + p->zSql = sqlite3DbStrNDup(p->db, z, n); +} + +#ifdef SQLITE_ENABLE_NORMALIZE +/* +** Add a new element to the Vdbe->pDblStr list. +*/ +SQLITE_PRIVATE void sqlite3VdbeAddDblquoteStr(sqlite3 *db, Vdbe *p, const char *z){ + if( p ){ + int n = sqlite3Strlen30(z); + DblquoteStr *pStr = sqlite3DbMallocRawNN(db, + sizeof(*pStr)+n+1-sizeof(pStr->z)); + if( pStr ){ + pStr->pNextStr = p->pDblStr; + p->pDblStr = pStr; + memcpy(pStr->z, z, n+1); + } + } +} +#endif + +#ifdef SQLITE_ENABLE_NORMALIZE +/* +** zId of length nId is a double-quoted identifier. Check to see if +** that identifier is really used as a string literal. +*/ +SQLITE_PRIVATE int sqlite3VdbeUsesDoubleQuotedString( + Vdbe *pVdbe, /* The prepared statement */ + const char *zId /* The double-quoted identifier, already dequoted */ +){ + DblquoteStr *pStr; + assert( zId!=0 ); + if( pVdbe->pDblStr==0 ) return 0; + for(pStr=pVdbe->pDblStr; pStr; pStr=pStr->pNextStr){ + if( strcmp(zId, pStr->z)==0 ) return 1; + } + return 0; +} +#endif + +/* +** Swap all content between two VDBE structures. +*/ +SQLITE_PRIVATE void sqlite3VdbeSwap(Vdbe *pA, Vdbe *pB){ + Vdbe tmp, *pTmp; + char *zTmp; + assert( pA->db==pB->db ); + tmp = *pA; + *pA = *pB; + *pB = tmp; + pTmp = pA->pNext; + pA->pNext = pB->pNext; + pB->pNext = pTmp; + pTmp = pA->pPrev; + pA->pPrev = pB->pPrev; + pB->pPrev = pTmp; + zTmp = pA->zSql; + pA->zSql = pB->zSql; + pB->zSql = zTmp; +#ifdef SQLITE_ENABLE_NORMALIZE + zTmp = pA->zNormSql; + pA->zNormSql = pB->zNormSql; + pB->zNormSql = zTmp; +#endif + pB->expmask = pA->expmask; + pB->prepFlags = pA->prepFlags; + memcpy(pB->aCounter, pA->aCounter, sizeof(pB->aCounter)); + pB->aCounter[SQLITE_STMTSTATUS_REPREPARE]++; +} + +/* +** Resize the Vdbe.aOp array so that it is at least nOp elements larger +** than its current size. nOp is guaranteed to be less than or equal +** to 1024/sizeof(Op). +** +** If an out-of-memory error occurs while resizing the array, return +** SQLITE_NOMEM. In this case Vdbe.aOp and Vdbe.nOpAlloc remain +** unchanged (this is so that any opcodes already allocated can be +** correctly deallocated along with the rest of the Vdbe). +*/ +static int growOpArray(Vdbe *v, int nOp){ + VdbeOp *pNew; + Parse *p = v->pParse; + + /* The SQLITE_TEST_REALLOC_STRESS compile-time option is designed to force + ** more frequent reallocs and hence provide more opportunities for + ** simulated OOM faults. SQLITE_TEST_REALLOC_STRESS is generally used + ** during testing only. With SQLITE_TEST_REALLOC_STRESS grow the op array + ** by the minimum* amount required until the size reaches 512. Normal + ** operation (without SQLITE_TEST_REALLOC_STRESS) is to double the current + ** size of the op array or add 1KB of space, whichever is smaller. */ +#ifdef SQLITE_TEST_REALLOC_STRESS + sqlite3_int64 nNew = (v->nOpAlloc>=512 ? 2*(sqlite3_int64)v->nOpAlloc + : (sqlite3_int64)v->nOpAlloc+nOp); +#else + sqlite3_int64 nNew = (v->nOpAlloc ? 2*(sqlite3_int64)v->nOpAlloc + : (sqlite3_int64)(1024/sizeof(Op))); + UNUSED_PARAMETER(nOp); +#endif + + /* Ensure that the size of a VDBE does not grow too large */ + if( nNew > p->db->aLimit[SQLITE_LIMIT_VDBE_OP] ){ + sqlite3OomFault(p->db); + return SQLITE_NOMEM; + } + + assert( nOp<=(1024/sizeof(Op)) ); + assert( nNew>=(v->nOpAlloc+nOp) ); + pNew = sqlite3DbRealloc(p->db, v->aOp, nNew*sizeof(Op)); + if( pNew ){ + p->szOpAlloc = sqlite3DbMallocSize(p->db, pNew); + v->nOpAlloc = p->szOpAlloc/sizeof(Op); + v->aOp = pNew; + } + return (pNew ? SQLITE_OK : SQLITE_NOMEM_BKPT); +} + +#ifdef SQLITE_DEBUG +/* This routine is just a convenient place to set a breakpoint that will +** fire after each opcode is inserted and displayed using +** "PRAGMA vdbe_addoptrace=on". Parameters "pc" (program counter) and +** pOp are available to make the breakpoint conditional. +** +** Other useful labels for breakpoints include: +** test_trace_breakpoint(pc,pOp) +** sqlite3CorruptError(lineno) +** sqlite3MisuseError(lineno) +** sqlite3CantopenError(lineno) +*/ +static void test_addop_breakpoint(int pc, Op *pOp){ + static int n = 0; + n++; +} +#endif + +/* +** Add a new instruction to the list of instructions current in the +** VDBE. Return the address of the new instruction. +** +** Parameters: +** +** p Pointer to the VDBE +** +** op The opcode for this instruction +** +** p1, p2, p3 Operands +** +** Use the sqlite3VdbeResolveLabel() function to fix an address and +** the sqlite3VdbeChangeP4() function to change the value of the P4 +** operand. +*/ +static SQLITE_NOINLINE int growOp3(Vdbe *p, int op, int p1, int p2, int p3){ + assert( p->nOpAlloc<=p->nOp ); + if( growOpArray(p, 1) ) return 1; + assert( p->nOpAlloc>p->nOp ); + return sqlite3VdbeAddOp3(p, op, p1, p2, p3); +} +SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe *p, int op, int p1, int p2, int p3){ + int i; + VdbeOp *pOp; + + i = p->nOp; + assert( p->iVdbeMagic==VDBE_MAGIC_INIT ); + assert( op>=0 && op<0xff ); + if( p->nOpAlloc<=i ){ + return growOp3(p, op, p1, p2, p3); + } + assert( p->aOp!=0 ); + p->nOp++; + pOp = &p->aOp[i]; + assert( pOp!=0 ); + pOp->opcode = (u8)op; + pOp->p5 = 0; + pOp->p1 = p1; + pOp->p2 = p2; + pOp->p3 = p3; + pOp->p4.p = 0; + pOp->p4type = P4_NOTUSED; +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + pOp->zComment = 0; +#endif +#ifdef SQLITE_DEBUG + if( p->db->flags & SQLITE_VdbeAddopTrace ){ + sqlite3VdbePrintOp(0, i, &p->aOp[i]); + test_addop_breakpoint(i, &p->aOp[i]); + } +#endif +#ifdef VDBE_PROFILE + pOp->cycles = 0; + pOp->cnt = 0; +#endif +#ifdef SQLITE_VDBE_COVERAGE + pOp->iSrcLine = 0; +#endif + return i; +} +SQLITE_PRIVATE int sqlite3VdbeAddOp0(Vdbe *p, int op){ + return sqlite3VdbeAddOp3(p, op, 0, 0, 0); +} +SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe *p, int op, int p1){ + return sqlite3VdbeAddOp3(p, op, p1, 0, 0); +} +SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe *p, int op, int p1, int p2){ + return sqlite3VdbeAddOp3(p, op, p1, p2, 0); +} + +/* Generate code for an unconditional jump to instruction iDest +*/ +SQLITE_PRIVATE int sqlite3VdbeGoto(Vdbe *p, int iDest){ + return sqlite3VdbeAddOp3(p, OP_Goto, 0, iDest, 0); +} + +/* Generate code to cause the string zStr to be loaded into +** register iDest +*/ +SQLITE_PRIVATE int sqlite3VdbeLoadString(Vdbe *p, int iDest, const char *zStr){ + return sqlite3VdbeAddOp4(p, OP_String8, 0, iDest, 0, zStr, 0); +} + +/* +** Generate code that initializes multiple registers to string or integer +** constants. The registers begin with iDest and increase consecutively. +** One register is initialized for each characgter in zTypes[]. For each +** "s" character in zTypes[], the register is a string if the argument is +** not NULL, or OP_Null if the value is a null pointer. For each "i" character +** in zTypes[], the register is initialized to an integer. +** +** If the input string does not end with "X" then an OP_ResultRow instruction +** is generated for the values inserted. +*/ +SQLITE_PRIVATE void sqlite3VdbeMultiLoad(Vdbe *p, int iDest, const char *zTypes, ...){ + va_list ap; + int i; + char c; + va_start(ap, zTypes); + for(i=0; (c = zTypes[i])!=0; i++){ + if( c=='s' ){ + const char *z = va_arg(ap, const char*); + sqlite3VdbeAddOp4(p, z==0 ? OP_Null : OP_String8, 0, iDest+i, 0, z, 0); + }else if( c=='i' ){ + sqlite3VdbeAddOp2(p, OP_Integer, va_arg(ap, int), iDest+i); + }else{ + goto skip_op_resultrow; + } + } + sqlite3VdbeAddOp2(p, OP_ResultRow, iDest, i); +skip_op_resultrow: + va_end(ap); +} + +/* +** Add an opcode that includes the p4 value as a pointer. +*/ +SQLITE_PRIVATE int sqlite3VdbeAddOp4( + Vdbe *p, /* Add the opcode to this VM */ + int op, /* The new opcode */ + int p1, /* The P1 operand */ + int p2, /* The P2 operand */ + int p3, /* The P3 operand */ + const char *zP4, /* The P4 operand */ + int p4type /* P4 operand type */ +){ + int addr = sqlite3VdbeAddOp3(p, op, p1, p2, p3); + sqlite3VdbeChangeP4(p, addr, zP4, p4type); + return addr; +} + +/* +** Add an OP_Function or OP_PureFunc opcode. +** +** The eCallCtx argument is information (typically taken from Expr.op2) +** that describes the calling context of the function. 0 means a general +** function call. NC_IsCheck means called by a check constraint, +** NC_IdxExpr means called as part of an index expression. NC_PartIdx +** means in the WHERE clause of a partial index. NC_GenCol means called +** while computing a generated column value. 0 is the usual case. +*/ +SQLITE_PRIVATE int sqlite3VdbeAddFunctionCall( + Parse *pParse, /* Parsing context */ + int p1, /* Constant argument mask */ + int p2, /* First argument register */ + int p3, /* Register into which results are written */ + int nArg, /* Number of argument */ + const FuncDef *pFunc, /* The function to be invoked */ + int eCallCtx /* Calling context */ +){ + Vdbe *v = pParse->pVdbe; + int nByte; + int addr; + sqlite3_context *pCtx; + assert( v ); + nByte = sizeof(*pCtx) + (nArg-1)*sizeof(sqlite3_value*); + pCtx = sqlite3DbMallocRawNN(pParse->db, nByte); + if( pCtx==0 ){ + assert( pParse->db->mallocFailed ); + freeEphemeralFunction(pParse->db, (FuncDef*)pFunc); + return 0; + } + pCtx->pOut = 0; + pCtx->pFunc = (FuncDef*)pFunc; + pCtx->pVdbe = 0; + pCtx->isError = 0; + pCtx->argc = nArg; + pCtx->iOp = sqlite3VdbeCurrentAddr(v); + addr = sqlite3VdbeAddOp4(v, eCallCtx ? OP_PureFunc : OP_Function, + p1, p2, p3, (char*)pCtx, P4_FUNCCTX); + sqlite3VdbeChangeP5(v, eCallCtx & NC_SelfRef); + return addr; +} + +/* +** Add an opcode that includes the p4 value with a P4_INT64 or +** P4_REAL type. +*/ +SQLITE_PRIVATE int sqlite3VdbeAddOp4Dup8( + Vdbe *p, /* Add the opcode to this VM */ + int op, /* The new opcode */ + int p1, /* The P1 operand */ + int p2, /* The P2 operand */ + int p3, /* The P3 operand */ + const u8 *zP4, /* The P4 operand */ + int p4type /* P4 operand type */ +){ + char *p4copy = sqlite3DbMallocRawNN(sqlite3VdbeDb(p), 8); + if( p4copy ) memcpy(p4copy, zP4, 8); + return sqlite3VdbeAddOp4(p, op, p1, p2, p3, p4copy, p4type); +} + +#ifndef SQLITE_OMIT_EXPLAIN +/* +** Return the address of the current EXPLAIN QUERY PLAN baseline. +** 0 means "none". +*/ +SQLITE_PRIVATE int sqlite3VdbeExplainParent(Parse *pParse){ + VdbeOp *pOp; + if( pParse->addrExplain==0 ) return 0; + pOp = sqlite3VdbeGetOp(pParse->pVdbe, pParse->addrExplain); + return pOp->p2; +} + +/* +** Set a debugger breakpoint on the following routine in order to +** monitor the EXPLAIN QUERY PLAN code generation. +*/ +#if defined(SQLITE_DEBUG) +SQLITE_PRIVATE void sqlite3ExplainBreakpoint(const char *z1, const char *z2){ + (void)z1; + (void)z2; +} +#endif + +/* +** Add a new OP_Explain opcode. +** +** If the bPush flag is true, then make this opcode the parent for +** subsequent Explains until sqlite3VdbeExplainPop() is called. +*/ +SQLITE_PRIVATE void sqlite3VdbeExplain(Parse *pParse, u8 bPush, const char *zFmt, ...){ +#ifndef SQLITE_DEBUG + /* Always include the OP_Explain opcodes if SQLITE_DEBUG is defined. + ** But omit them (for performance) during production builds */ + if( pParse->explain==2 ) +#endif + { + char *zMsg; + Vdbe *v; + va_list ap; + int iThis; + va_start(ap, zFmt); + zMsg = sqlite3VMPrintf(pParse->db, zFmt, ap); + va_end(ap); + v = pParse->pVdbe; + iThis = v->nOp; + sqlite3VdbeAddOp4(v, OP_Explain, iThis, pParse->addrExplain, 0, + zMsg, P4_DYNAMIC); + sqlite3ExplainBreakpoint(bPush?"PUSH":"", sqlite3VdbeGetOp(v,-1)->p4.z); + if( bPush){ + pParse->addrExplain = iThis; + } + } +} + +/* +** Pop the EXPLAIN QUERY PLAN stack one level. +*/ +SQLITE_PRIVATE void sqlite3VdbeExplainPop(Parse *pParse){ + sqlite3ExplainBreakpoint("POP", 0); + pParse->addrExplain = sqlite3VdbeExplainParent(pParse); +} +#endif /* SQLITE_OMIT_EXPLAIN */ + +/* +** Add an OP_ParseSchema opcode. This routine is broken out from +** sqlite3VdbeAddOp4() since it needs to also needs to mark all btrees +** as having been used. +** +** The zWhere string must have been obtained from sqlite3_malloc(). +** This routine will take ownership of the allocated memory. +*/ +SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe *p, int iDb, char *zWhere, u16 p5){ + int j; + sqlite3VdbeAddOp4(p, OP_ParseSchema, iDb, 0, 0, zWhere, P4_DYNAMIC); + sqlite3VdbeChangeP5(p, p5); + for(j=0; jdb->nDb; j++) sqlite3VdbeUsesBtree(p, j); + sqlite3MayAbort(p->pParse); +} + +/* +** Add an opcode that includes the p4 value as an integer. +*/ +SQLITE_PRIVATE int sqlite3VdbeAddOp4Int( + Vdbe *p, /* Add the opcode to this VM */ + int op, /* The new opcode */ + int p1, /* The P1 operand */ + int p2, /* The P2 operand */ + int p3, /* The P3 operand */ + int p4 /* The P4 operand as an integer */ +){ + int addr = sqlite3VdbeAddOp3(p, op, p1, p2, p3); + if( p->db->mallocFailed==0 ){ + VdbeOp *pOp = &p->aOp[addr]; + pOp->p4type = P4_INT32; + pOp->p4.i = p4; + } + return addr; +} + +/* Insert the end of a co-routine +*/ +SQLITE_PRIVATE void sqlite3VdbeEndCoroutine(Vdbe *v, int regYield){ + sqlite3VdbeAddOp1(v, OP_EndCoroutine, regYield); + + /* Clear the temporary register cache, thereby ensuring that each + ** co-routine has its own independent set of registers, because co-routines + ** might expect their registers to be preserved across an OP_Yield, and + ** that could cause problems if two or more co-routines are using the same + ** temporary register. + */ + v->pParse->nTempReg = 0; + v->pParse->nRangeReg = 0; +} + +/* +** Create a new symbolic label for an instruction that has yet to be +** coded. The symbolic label is really just a negative number. The +** label can be used as the P2 value of an operation. Later, when +** the label is resolved to a specific address, the VDBE will scan +** through its operation list and change all values of P2 which match +** the label into the resolved address. +** +** The VDBE knows that a P2 value is a label because labels are +** always negative and P2 values are suppose to be non-negative. +** Hence, a negative P2 value is a label that has yet to be resolved. +** (Later:) This is only true for opcodes that have the OPFLG_JUMP +** property. +** +** Variable usage notes: +** +** Parse.aLabel[x] Stores the address that the x-th label resolves +** into. For testing (SQLITE_DEBUG), unresolved +** labels stores -1, but that is not required. +** Parse.nLabelAlloc Number of slots allocated to Parse.aLabel[] +** Parse.nLabel The *negative* of the number of labels that have +** been issued. The negative is stored because +** that gives a performance improvement over storing +** the equivalent positive value. +*/ +SQLITE_PRIVATE int sqlite3VdbeMakeLabel(Parse *pParse){ + return --pParse->nLabel; +} + +/* +** Resolve label "x" to be the address of the next instruction to +** be inserted. The parameter "x" must have been obtained from +** a prior call to sqlite3VdbeMakeLabel(). +*/ +static SQLITE_NOINLINE void resizeResolveLabel(Parse *p, Vdbe *v, int j){ + int nNewSize = 10 - p->nLabel; + p->aLabel = sqlite3DbReallocOrFree(p->db, p->aLabel, + nNewSize*sizeof(p->aLabel[0])); + if( p->aLabel==0 ){ + p->nLabelAlloc = 0; + }else{ +#ifdef SQLITE_DEBUG + int i; + for(i=p->nLabelAlloc; iaLabel[i] = -1; +#endif + p->nLabelAlloc = nNewSize; + p->aLabel[j] = v->nOp; + } +} +SQLITE_PRIVATE void sqlite3VdbeResolveLabel(Vdbe *v, int x){ + Parse *p = v->pParse; + int j = ADDR(x); + assert( v->iVdbeMagic==VDBE_MAGIC_INIT ); + assert( j<-p->nLabel ); + assert( j>=0 ); +#ifdef SQLITE_DEBUG + if( p->db->flags & SQLITE_VdbeAddopTrace ){ + printf("RESOLVE LABEL %d to %d\n", x, v->nOp); + } +#endif + if( p->nLabelAlloc + p->nLabel < 0 ){ + resizeResolveLabel(p,v,j); + }else{ + assert( p->aLabel[j]==(-1) ); /* Labels may only be resolved once */ + p->aLabel[j] = v->nOp; + } +} + +/* +** Mark the VDBE as one that can only be run one time. +*/ +SQLITE_PRIVATE void sqlite3VdbeRunOnlyOnce(Vdbe *p){ + p->runOnlyOnce = 1; +} + +/* +** Mark the VDBE as one that can only be run multiple times. +*/ +SQLITE_PRIVATE void sqlite3VdbeReusable(Vdbe *p){ + p->runOnlyOnce = 0; +} + +#ifdef SQLITE_DEBUG /* sqlite3AssertMayAbort() logic */ + +/* +** The following type and function are used to iterate through all opcodes +** in a Vdbe main program and each of the sub-programs (triggers) it may +** invoke directly or indirectly. It should be used as follows: +** +** Op *pOp; +** VdbeOpIter sIter; +** +** memset(&sIter, 0, sizeof(sIter)); +** sIter.v = v; // v is of type Vdbe* +** while( (pOp = opIterNext(&sIter)) ){ +** // Do something with pOp +** } +** sqlite3DbFree(v->db, sIter.apSub); +** +*/ +typedef struct VdbeOpIter VdbeOpIter; +struct VdbeOpIter { + Vdbe *v; /* Vdbe to iterate through the opcodes of */ + SubProgram **apSub; /* Array of subprograms */ + int nSub; /* Number of entries in apSub */ + int iAddr; /* Address of next instruction to return */ + int iSub; /* 0 = main program, 1 = first sub-program etc. */ +}; +static Op *opIterNext(VdbeOpIter *p){ + Vdbe *v = p->v; + Op *pRet = 0; + Op *aOp; + int nOp; + + if( p->iSub<=p->nSub ){ + + if( p->iSub==0 ){ + aOp = v->aOp; + nOp = v->nOp; + }else{ + aOp = p->apSub[p->iSub-1]->aOp; + nOp = p->apSub[p->iSub-1]->nOp; + } + assert( p->iAddriAddr]; + p->iAddr++; + if( p->iAddr==nOp ){ + p->iSub++; + p->iAddr = 0; + } + + if( pRet->p4type==P4_SUBPROGRAM ){ + int nByte = (p->nSub+1)*sizeof(SubProgram*); + int j; + for(j=0; jnSub; j++){ + if( p->apSub[j]==pRet->p4.pProgram ) break; + } + if( j==p->nSub ){ + p->apSub = sqlite3DbReallocOrFree(v->db, p->apSub, nByte); + if( !p->apSub ){ + pRet = 0; + }else{ + p->apSub[p->nSub++] = pRet->p4.pProgram; + } + } + } + } + + return pRet; +} + +/* +** Check if the program stored in the VM associated with pParse may +** throw an ABORT exception (causing the statement, but not entire transaction +** to be rolled back). This condition is true if the main program or any +** sub-programs contains any of the following: +** +** * OP_Halt with P1=SQLITE_CONSTRAINT and P2=OE_Abort. +** * OP_HaltIfNull with P1=SQLITE_CONSTRAINT and P2=OE_Abort. +** * OP_Destroy +** * OP_VUpdate +** * OP_VCreate +** * OP_VRename +** * OP_FkCounter with P2==0 (immediate foreign key constraint) +** * OP_CreateBtree/BTREE_INTKEY and OP_InitCoroutine +** (for CREATE TABLE AS SELECT ...) +** +** Then check that the value of Parse.mayAbort is true if an +** ABORT may be thrown, or false otherwise. Return true if it does +** match, or false otherwise. This function is intended to be used as +** part of an assert statement in the compiler. Similar to: +** +** assert( sqlite3VdbeAssertMayAbort(pParse->pVdbe, pParse->mayAbort) ); +*/ +SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ + int hasAbort = 0; + int hasFkCounter = 0; + int hasCreateTable = 0; + int hasCreateIndex = 0; + int hasInitCoroutine = 0; + Op *pOp; + VdbeOpIter sIter; + memset(&sIter, 0, sizeof(sIter)); + sIter.v = v; + + while( (pOp = opIterNext(&sIter))!=0 ){ + int opcode = pOp->opcode; + if( opcode==OP_Destroy || opcode==OP_VUpdate || opcode==OP_VRename + || opcode==OP_VDestroy + || opcode==OP_VCreate + || opcode==OP_ParseSchema + || ((opcode==OP_Halt || opcode==OP_HaltIfNull) + && ((pOp->p1)!=SQLITE_OK && pOp->p2==OE_Abort)) + ){ + hasAbort = 1; + break; + } + if( opcode==OP_CreateBtree && pOp->p3==BTREE_INTKEY ) hasCreateTable = 1; + if( mayAbort ){ + /* hasCreateIndex may also be set for some DELETE statements that use + ** OP_Clear. So this routine may end up returning true in the case + ** where a "DELETE FROM tbl" has a statement-journal but does not + ** require one. This is not so bad - it is an inefficiency, not a bug. */ + if( opcode==OP_CreateBtree && pOp->p3==BTREE_BLOBKEY ) hasCreateIndex = 1; + if( opcode==OP_Clear ) hasCreateIndex = 1; + } + if( opcode==OP_InitCoroutine ) hasInitCoroutine = 1; +#ifndef SQLITE_OMIT_FOREIGN_KEY + if( opcode==OP_FkCounter && pOp->p1==0 && pOp->p2==1 ){ + hasFkCounter = 1; + } +#endif + } + sqlite3DbFree(v->db, sIter.apSub); + + /* Return true if hasAbort==mayAbort. Or if a malloc failure occurred. + ** If malloc failed, then the while() loop above may not have iterated + ** through all opcodes and hasAbort may be set incorrectly. Return + ** true for this case to prevent the assert() in the callers frame + ** from failing. */ + return ( v->db->mallocFailed || hasAbort==mayAbort || hasFkCounter + || (hasCreateTable && hasInitCoroutine) || hasCreateIndex + ); +} +#endif /* SQLITE_DEBUG - the sqlite3AssertMayAbort() function */ + +#ifdef SQLITE_DEBUG +/* +** Increment the nWrite counter in the VDBE if the cursor is not an +** ephemeral cursor, or if the cursor argument is NULL. +*/ +SQLITE_PRIVATE void sqlite3VdbeIncrWriteCounter(Vdbe *p, VdbeCursor *pC){ + if( pC==0 + || (pC->eCurType!=CURTYPE_SORTER + && pC->eCurType!=CURTYPE_PSEUDO + && !pC->isEphemeral) + ){ + p->nWrite++; + } +} +#endif + +#ifdef SQLITE_DEBUG +/* +** Assert if an Abort at this point in time might result in a corrupt +** database. +*/ +SQLITE_PRIVATE void sqlite3VdbeAssertAbortable(Vdbe *p){ + assert( p->nWrite==0 || p->usesStmtJournal ); +} +#endif + +/* +** This routine is called after all opcodes have been inserted. It loops +** through all the opcodes and fixes up some details. +** +** (1) For each jump instruction with a negative P2 value (a label) +** resolve the P2 value to an actual address. +** +** (2) Compute the maximum number of arguments used by any SQL function +** and store that value in *pMaxFuncArgs. +** +** (3) Update the Vdbe.readOnly and Vdbe.bIsReader flags to accurately +** indicate what the prepared statement actually does. +** +** (4) Initialize the p4.xAdvance pointer on opcodes that use it. +** +** (5) Reclaim the memory allocated for storing labels. +** +** This routine will only function correctly if the mkopcodeh.tcl generator +** script numbers the opcodes correctly. Changes to this routine must be +** coordinated with changes to mkopcodeh.tcl. +*/ +static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){ + int nMaxArgs = *pMaxFuncArgs; + Op *pOp; + Parse *pParse = p->pParse; + int *aLabel = pParse->aLabel; + p->readOnly = 1; + p->bIsReader = 0; + pOp = &p->aOp[p->nOp-1]; + while(1){ + + /* Only JUMP opcodes and the short list of special opcodes in the switch + ** below need to be considered. The mkopcodeh.tcl generator script groups + ** all these opcodes together near the front of the opcode list. Skip + ** any opcode that does not need processing by virtual of the fact that + ** it is larger than SQLITE_MX_JUMP_OPCODE, as a performance optimization. + */ + if( pOp->opcode<=SQLITE_MX_JUMP_OPCODE ){ + /* NOTE: Be sure to update mkopcodeh.tcl when adding or removing + ** cases from this switch! */ + switch( pOp->opcode ){ + case OP_Transaction: { + if( pOp->p2!=0 ) p->readOnly = 0; + /* no break */ deliberate_fall_through + } + case OP_AutoCommit: + case OP_Savepoint: { + p->bIsReader = 1; + break; + } +#ifndef SQLITE_OMIT_WAL + case OP_Checkpoint: +#endif + case OP_Vacuum: + case OP_JournalMode: { + p->readOnly = 0; + p->bIsReader = 1; + break; + } + case OP_Next: + case OP_SorterNext: { + pOp->p4.xAdvance = sqlite3BtreeNext; + pOp->p4type = P4_ADVANCE; + /* The code generator never codes any of these opcodes as a jump + ** to a label. They are always coded as a jump backwards to a + ** known address */ + assert( pOp->p2>=0 ); + break; + } + case OP_Prev: { + pOp->p4.xAdvance = sqlite3BtreePrevious; + pOp->p4type = P4_ADVANCE; + /* The code generator never codes any of these opcodes as a jump + ** to a label. They are always coded as a jump backwards to a + ** known address */ + assert( pOp->p2>=0 ); + break; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + case OP_VUpdate: { + if( pOp->p2>nMaxArgs ) nMaxArgs = pOp->p2; + break; + } + case OP_VFilter: { + int n; + assert( (pOp - p->aOp) >= 3 ); + assert( pOp[-1].opcode==OP_Integer ); + n = pOp[-1].p1; + if( n>nMaxArgs ) nMaxArgs = n; + /* Fall through into the default case */ + /* no break */ deliberate_fall_through + } +#endif + default: { + if( pOp->p2<0 ){ + /* The mkopcodeh.tcl script has so arranged things that the only + ** non-jump opcodes less than SQLITE_MX_JUMP_CODE are guaranteed to + ** have non-negative values for P2. */ + assert( (sqlite3OpcodeProperty[pOp->opcode] & OPFLG_JUMP)!=0 ); + assert( ADDR(pOp->p2)<-pParse->nLabel ); + pOp->p2 = aLabel[ADDR(pOp->p2)]; + } + break; + } + } + /* The mkopcodeh.tcl script has so arranged things that the only + ** non-jump opcodes less than SQLITE_MX_JUMP_CODE are guaranteed to + ** have non-negative values for P2. */ + assert( (sqlite3OpcodeProperty[pOp->opcode]&OPFLG_JUMP)==0 || pOp->p2>=0); + } + if( pOp==p->aOp ) break; + pOp--; + } + sqlite3DbFree(p->db, pParse->aLabel); + pParse->aLabel = 0; + pParse->nLabel = 0; + *pMaxFuncArgs = nMaxArgs; + assert( p->bIsReader!=0 || DbMaskAllZero(p->btreeMask) ); +} + +/* +** Return the address of the next instruction to be inserted. +*/ +SQLITE_PRIVATE int sqlite3VdbeCurrentAddr(Vdbe *p){ + assert( p->iVdbeMagic==VDBE_MAGIC_INIT ); + return p->nOp; +} + +/* +** Verify that at least N opcode slots are available in p without +** having to malloc for more space (except when compiled using +** SQLITE_TEST_REALLOC_STRESS). This interface is used during testing +** to verify that certain calls to sqlite3VdbeAddOpList() can never +** fail due to a OOM fault and hence that the return value from +** sqlite3VdbeAddOpList() will always be non-NULL. +*/ +#if defined(SQLITE_DEBUG) && !defined(SQLITE_TEST_REALLOC_STRESS) +SQLITE_PRIVATE void sqlite3VdbeVerifyNoMallocRequired(Vdbe *p, int N){ + assert( p->nOp + N <= p->nOpAlloc ); +} +#endif + +/* +** Verify that the VM passed as the only argument does not contain +** an OP_ResultRow opcode. Fail an assert() if it does. This is used +** by code in pragma.c to ensure that the implementation of certain +** pragmas comports with the flags specified in the mkpragmatab.tcl +** script. +*/ +#if defined(SQLITE_DEBUG) && !defined(SQLITE_TEST_REALLOC_STRESS) +SQLITE_PRIVATE void sqlite3VdbeVerifyNoResultRow(Vdbe *p){ + int i; + for(i=0; inOp; i++){ + assert( p->aOp[i].opcode!=OP_ResultRow ); + } +} +#endif + +/* +** Generate code (a single OP_Abortable opcode) that will +** verify that the VDBE program can safely call Abort in the current +** context. +*/ +#if defined(SQLITE_DEBUG) +SQLITE_PRIVATE void sqlite3VdbeVerifyAbortable(Vdbe *p, int onError){ + if( onError==OE_Abort ) sqlite3VdbeAddOp0(p, OP_Abortable); +} +#endif + +/* +** This function returns a pointer to the array of opcodes associated with +** the Vdbe passed as the first argument. It is the callers responsibility +** to arrange for the returned array to be eventually freed using the +** vdbeFreeOpArray() function. +** +** Before returning, *pnOp is set to the number of entries in the returned +** array. Also, *pnMaxArg is set to the larger of its current value and +** the number of entries in the Vdbe.apArg[] array required to execute the +** returned program. +*/ +SQLITE_PRIVATE VdbeOp *sqlite3VdbeTakeOpArray(Vdbe *p, int *pnOp, int *pnMaxArg){ + VdbeOp *aOp = p->aOp; + assert( aOp && !p->db->mallocFailed ); + + /* Check that sqlite3VdbeUsesBtree() was not called on this VM */ + assert( DbMaskAllZero(p->btreeMask) ); + + resolveP2Values(p, pnMaxArg); + *pnOp = p->nOp; + p->aOp = 0; + return aOp; +} + +/* +** Add a whole list of operations to the operation stack. Return a +** pointer to the first operation inserted. +** +** Non-zero P2 arguments to jump instructions are automatically adjusted +** so that the jump target is relative to the first operation inserted. +*/ +SQLITE_PRIVATE VdbeOp *sqlite3VdbeAddOpList( + Vdbe *p, /* Add opcodes to the prepared statement */ + int nOp, /* Number of opcodes to add */ + VdbeOpList const *aOp, /* The opcodes to be added */ + int iLineno /* Source-file line number of first opcode */ +){ + int i; + VdbeOp *pOut, *pFirst; + assert( nOp>0 ); + assert( p->iVdbeMagic==VDBE_MAGIC_INIT ); + if( p->nOp + nOp > p->nOpAlloc && growOpArray(p, nOp) ){ + return 0; + } + pFirst = pOut = &p->aOp[p->nOp]; + for(i=0; iopcode = aOp->opcode; + pOut->p1 = aOp->p1; + pOut->p2 = aOp->p2; + assert( aOp->p2>=0 ); + if( (sqlite3OpcodeProperty[aOp->opcode] & OPFLG_JUMP)!=0 && aOp->p2>0 ){ + pOut->p2 += p->nOp; + } + pOut->p3 = aOp->p3; + pOut->p4type = P4_NOTUSED; + pOut->p4.p = 0; + pOut->p5 = 0; +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + pOut->zComment = 0; +#endif +#ifdef SQLITE_VDBE_COVERAGE + pOut->iSrcLine = iLineno+i; +#else + (void)iLineno; +#endif +#ifdef SQLITE_DEBUG + if( p->db->flags & SQLITE_VdbeAddopTrace ){ + sqlite3VdbePrintOp(0, i+p->nOp, &p->aOp[i+p->nOp]); + } +#endif + } + p->nOp += nOp; + return pFirst; +} + +#if defined(SQLITE_ENABLE_STMT_SCANSTATUS) +/* +** Add an entry to the array of counters managed by sqlite3_stmt_scanstatus(). +*/ +SQLITE_PRIVATE void sqlite3VdbeScanStatus( + Vdbe *p, /* VM to add scanstatus() to */ + int addrExplain, /* Address of OP_Explain (or 0) */ + int addrLoop, /* Address of loop counter */ + int addrVisit, /* Address of rows visited counter */ + LogEst nEst, /* Estimated number of output rows */ + const char *zName /* Name of table or index being scanned */ +){ + sqlite3_int64 nByte = (p->nScan+1) * sizeof(ScanStatus); + ScanStatus *aNew; + aNew = (ScanStatus*)sqlite3DbRealloc(p->db, p->aScan, nByte); + if( aNew ){ + ScanStatus *pNew = &aNew[p->nScan++]; + pNew->addrExplain = addrExplain; + pNew->addrLoop = addrLoop; + pNew->addrVisit = addrVisit; + pNew->nEst = nEst; + pNew->zName = sqlite3DbStrDup(p->db, zName); + p->aScan = aNew; + } +} +#endif + + +/* +** Change the value of the opcode, or P1, P2, P3, or P5 operands +** for a specific instruction. +*/ +SQLITE_PRIVATE void sqlite3VdbeChangeOpcode(Vdbe *p, int addr, u8 iNewOpcode){ + sqlite3VdbeGetOp(p,addr)->opcode = iNewOpcode; +} +SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){ + sqlite3VdbeGetOp(p,addr)->p1 = val; +} +SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){ + sqlite3VdbeGetOp(p,addr)->p2 = val; +} +SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe *p, int addr, int val){ + sqlite3VdbeGetOp(p,addr)->p3 = val; +} +SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe *p, u16 p5){ + assert( p->nOp>0 || p->db->mallocFailed ); + if( p->nOp>0 ) p->aOp[p->nOp-1].p5 = p5; +} + +/* +** Change the P2 operand of instruction addr so that it points to +** the address of the next instruction to be coded. +*/ +SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe *p, int addr){ + sqlite3VdbeChangeP2(p, addr, p->nOp); +} + +/* +** Change the P2 operand of the jump instruction at addr so that +** the jump lands on the next opcode. Or if the jump instruction was +** the previous opcode (and is thus a no-op) then simply back up +** the next instruction counter by one slot so that the jump is +** overwritten by the next inserted opcode. +** +** This routine is an optimization of sqlite3VdbeJumpHere() that +** strives to omit useless byte-code like this: +** +** 7 Once 0 8 0 +** 8 ... +*/ +SQLITE_PRIVATE void sqlite3VdbeJumpHereOrPopInst(Vdbe *p, int addr){ + if( addr==p->nOp-1 ){ + assert( p->aOp[addr].opcode==OP_Once + || p->aOp[addr].opcode==OP_If + || p->aOp[addr].opcode==OP_FkIfZero ); + assert( p->aOp[addr].p4type==0 ); +#ifdef SQLITE_VDBE_COVERAGE + sqlite3VdbeGetOp(p,-1)->iSrcLine = 0; /* Erase VdbeCoverage() macros */ +#endif + p->nOp--; + }else{ + sqlite3VdbeChangeP2(p, addr, p->nOp); + } +} + + +/* +** If the input FuncDef structure is ephemeral, then free it. If +** the FuncDef is not ephermal, then do nothing. +*/ +static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef){ + if( (pDef->funcFlags & SQLITE_FUNC_EPHEM)!=0 ){ + sqlite3DbFreeNN(db, pDef); + } +} + +/* +** Delete a P4 value if necessary. +*/ +static SQLITE_NOINLINE void freeP4Mem(sqlite3 *db, Mem *p){ + if( p->szMalloc ) sqlite3DbFree(db, p->zMalloc); + sqlite3DbFreeNN(db, p); +} +static SQLITE_NOINLINE void freeP4FuncCtx(sqlite3 *db, sqlite3_context *p){ + freeEphemeralFunction(db, p->pFunc); + sqlite3DbFreeNN(db, p); +} +static void freeP4(sqlite3 *db, int p4type, void *p4){ + assert( db ); + switch( p4type ){ + case P4_FUNCCTX: { + freeP4FuncCtx(db, (sqlite3_context*)p4); + break; + } + case P4_REAL: + case P4_INT64: + case P4_DYNAMIC: + case P4_DYNBLOB: + case P4_INTARRAY: { + sqlite3DbFree(db, p4); + break; + } + case P4_KEYINFO: { + if( db->pnBytesFreed==0 ) sqlite3KeyInfoUnref((KeyInfo*)p4); + break; + } +#ifdef SQLITE_ENABLE_CURSOR_HINTS + case P4_EXPR: { + sqlite3ExprDelete(db, (Expr*)p4); + break; + } +#endif + case P4_FUNCDEF: { + freeEphemeralFunction(db, (FuncDef*)p4); + break; + } + case P4_MEM: { + if( db->pnBytesFreed==0 ){ + sqlite3ValueFree((sqlite3_value*)p4); + }else{ + freeP4Mem(db, (Mem*)p4); + } + break; + } + case P4_VTAB : { + if( db->pnBytesFreed==0 ) sqlite3VtabUnlock((VTable *)p4); + break; + } + } +} + +/* +** Free the space allocated for aOp and any p4 values allocated for the +** opcodes contained within. If aOp is not NULL it is assumed to contain +** nOp entries. +*/ +static void vdbeFreeOpArray(sqlite3 *db, Op *aOp, int nOp){ + if( aOp ){ + Op *pOp; + for(pOp=&aOp[nOp-1]; pOp>=aOp; pOp--){ + if( pOp->p4type <= P4_FREE_IF_LE ) freeP4(db, pOp->p4type, pOp->p4.p); +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + sqlite3DbFree(db, pOp->zComment); +#endif + } + sqlite3DbFreeNN(db, aOp); + } +} + +/* +** Link the SubProgram object passed as the second argument into the linked +** list at Vdbe.pSubProgram. This list is used to delete all sub-program +** objects when the VM is no longer required. +*/ +SQLITE_PRIVATE void sqlite3VdbeLinkSubProgram(Vdbe *pVdbe, SubProgram *p){ + p->pNext = pVdbe->pProgram; + pVdbe->pProgram = p; +} + +/* +** Return true if the given Vdbe has any SubPrograms. +*/ +SQLITE_PRIVATE int sqlite3VdbeHasSubProgram(Vdbe *pVdbe){ + return pVdbe->pProgram!=0; +} + +/* +** Change the opcode at addr into OP_Noop +*/ +SQLITE_PRIVATE int sqlite3VdbeChangeToNoop(Vdbe *p, int addr){ + VdbeOp *pOp; + if( p->db->mallocFailed ) return 0; + assert( addr>=0 && addrnOp ); + pOp = &p->aOp[addr]; + freeP4(p->db, pOp->p4type, pOp->p4.p); + pOp->p4type = P4_NOTUSED; + pOp->p4.z = 0; + pOp->opcode = OP_Noop; + return 1; +} + +/* +** If the last opcode is "op" and it is not a jump destination, +** then remove it. Return true if and only if an opcode was removed. +*/ +SQLITE_PRIVATE int sqlite3VdbeDeletePriorOpcode(Vdbe *p, u8 op){ + if( p->nOp>0 && p->aOp[p->nOp-1].opcode==op ){ + return sqlite3VdbeChangeToNoop(p, p->nOp-1); + }else{ + return 0; + } +} + +#ifdef SQLITE_DEBUG +/* +** Generate an OP_ReleaseReg opcode to indicate that a range of +** registers, except any identified by mask, are no longer in use. +*/ +SQLITE_PRIVATE void sqlite3VdbeReleaseRegisters( + Parse *pParse, /* Parsing context */ + int iFirst, /* Index of first register to be released */ + int N, /* Number of registers to release */ + u32 mask, /* Mask of registers to NOT release */ + int bUndefine /* If true, mark registers as undefined */ +){ + if( N==0 ) return; + assert( pParse->pVdbe ); + assert( iFirst>=1 ); + assert( iFirst+N-1<=pParse->nMem ); + if( N<=31 && mask!=0 ){ + while( N>0 && (mask&1)!=0 ){ + mask >>= 1; + iFirst++; + N--; + } + while( N>0 && N<=32 && (mask & MASKBIT32(N-1))!=0 ){ + mask &= ~MASKBIT32(N-1); + N--; + } + } + if( N>0 ){ + sqlite3VdbeAddOp3(pParse->pVdbe, OP_ReleaseReg, iFirst, N, *(int*)&mask); + if( bUndefine ) sqlite3VdbeChangeP5(pParse->pVdbe, 1); + } +} +#endif /* SQLITE_DEBUG */ + + +/* +** Change the value of the P4 operand for a specific instruction. +** This routine is useful when a large program is loaded from a +** static array using sqlite3VdbeAddOpList but we want to make a +** few minor changes to the program. +** +** If n>=0 then the P4 operand is dynamic, meaning that a copy of +** the string is made into memory obtained from sqlite3_malloc(). +** A value of n==0 means copy bytes of zP4 up to and including the +** first null byte. If n>0 then copy n+1 bytes of zP4. +** +** Other values of n (P4_STATIC, P4_COLLSEQ etc.) indicate that zP4 points +** to a string or structure that is guaranteed to exist for the lifetime of +** the Vdbe. In these cases we can just copy the pointer. +** +** If addr<0 then change P4 on the most recently inserted instruction. +*/ +static void SQLITE_NOINLINE vdbeChangeP4Full( + Vdbe *p, + Op *pOp, + const char *zP4, + int n +){ + if( pOp->p4type ){ + freeP4(p->db, pOp->p4type, pOp->p4.p); + pOp->p4type = 0; + pOp->p4.p = 0; + } + if( n<0 ){ + sqlite3VdbeChangeP4(p, (int)(pOp - p->aOp), zP4, n); + }else{ + if( n==0 ) n = sqlite3Strlen30(zP4); + pOp->p4.z = sqlite3DbStrNDup(p->db, zP4, n); + pOp->p4type = P4_DYNAMIC; + } +} +SQLITE_PRIVATE void sqlite3VdbeChangeP4(Vdbe *p, int addr, const char *zP4, int n){ + Op *pOp; + sqlite3 *db; + assert( p!=0 ); + db = p->db; + assert( p->iVdbeMagic==VDBE_MAGIC_INIT ); + assert( p->aOp!=0 || db->mallocFailed ); + if( db->mallocFailed ){ + if( n!=P4_VTAB ) freeP4(db, n, (void*)*(char**)&zP4); + return; + } + assert( p->nOp>0 ); + assert( addrnOp ); + if( addr<0 ){ + addr = p->nOp - 1; + } + pOp = &p->aOp[addr]; + if( n>=0 || pOp->p4type ){ + vdbeChangeP4Full(p, pOp, zP4, n); + return; + } + if( n==P4_INT32 ){ + /* Note: this cast is safe, because the origin data point was an int + ** that was cast to a (const char *). */ + pOp->p4.i = SQLITE_PTR_TO_INT(zP4); + pOp->p4type = P4_INT32; + }else if( zP4!=0 ){ + assert( n<0 ); + pOp->p4.p = (void*)zP4; + pOp->p4type = (signed char)n; + if( n==P4_VTAB ) sqlite3VtabLock((VTable*)zP4); + } +} + +/* +** Change the P4 operand of the most recently coded instruction +** to the value defined by the arguments. This is a high-speed +** version of sqlite3VdbeChangeP4(). +** +** The P4 operand must not have been previously defined. And the new +** P4 must not be P4_INT32. Use sqlite3VdbeChangeP4() in either of +** those cases. +*/ +SQLITE_PRIVATE void sqlite3VdbeAppendP4(Vdbe *p, void *pP4, int n){ + VdbeOp *pOp; + assert( n!=P4_INT32 && n!=P4_VTAB ); + assert( n<=0 ); + if( p->db->mallocFailed ){ + freeP4(p->db, n, pP4); + }else{ + assert( pP4!=0 ); + assert( p->nOp>0 ); + pOp = &p->aOp[p->nOp-1]; + assert( pOp->p4type==P4_NOTUSED ); + pOp->p4type = n; + pOp->p4.p = pP4; + } +} + +/* +** Set the P4 on the most recently added opcode to the KeyInfo for the +** index given. +*/ +SQLITE_PRIVATE void sqlite3VdbeSetP4KeyInfo(Parse *pParse, Index *pIdx){ + Vdbe *v = pParse->pVdbe; + KeyInfo *pKeyInfo; + assert( v!=0 ); + assert( pIdx!=0 ); + pKeyInfo = sqlite3KeyInfoOfIndex(pParse, pIdx); + if( pKeyInfo ) sqlite3VdbeAppendP4(v, pKeyInfo, P4_KEYINFO); +} + +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS +/* +** Change the comment on the most recently coded instruction. Or +** insert a No-op and add the comment to that new instruction. This +** makes the code easier to read during debugging. None of this happens +** in a production build. +*/ +static void vdbeVComment(Vdbe *p, const char *zFormat, va_list ap){ + assert( p->nOp>0 || p->aOp==0 ); + assert( p->aOp==0 || p->aOp[p->nOp-1].zComment==0 || p->pParse->nErr>0 ); + if( p->nOp ){ + assert( p->aOp ); + sqlite3DbFree(p->db, p->aOp[p->nOp-1].zComment); + p->aOp[p->nOp-1].zComment = sqlite3VMPrintf(p->db, zFormat, ap); + } +} +SQLITE_PRIVATE void sqlite3VdbeComment(Vdbe *p, const char *zFormat, ...){ + va_list ap; + if( p ){ + va_start(ap, zFormat); + vdbeVComment(p, zFormat, ap); + va_end(ap); + } +} +SQLITE_PRIVATE void sqlite3VdbeNoopComment(Vdbe *p, const char *zFormat, ...){ + va_list ap; + if( p ){ + sqlite3VdbeAddOp0(p, OP_Noop); + va_start(ap, zFormat); + vdbeVComment(p, zFormat, ap); + va_end(ap); + } +} +#endif /* NDEBUG */ + +#ifdef SQLITE_VDBE_COVERAGE +/* +** Set the value if the iSrcLine field for the previously coded instruction. +*/ +SQLITE_PRIVATE void sqlite3VdbeSetLineNumber(Vdbe *v, int iLine){ + sqlite3VdbeGetOp(v,-1)->iSrcLine = iLine; +} +#endif /* SQLITE_VDBE_COVERAGE */ + +/* +** Return the opcode for a given address. If the address is -1, then +** return the most recently inserted opcode. +** +** If a memory allocation error has occurred prior to the calling of this +** routine, then a pointer to a dummy VdbeOp will be returned. That opcode +** is readable but not writable, though it is cast to a writable value. +** The return of a dummy opcode allows the call to continue functioning +** after an OOM fault without having to check to see if the return from +** this routine is a valid pointer. But because the dummy.opcode is 0, +** dummy will never be written to. This is verified by code inspection and +** by running with Valgrind. +*/ +SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe *p, int addr){ + /* C89 specifies that the constant "dummy" will be initialized to all + ** zeros, which is correct. MSVC generates a warning, nevertheless. */ + static VdbeOp dummy; /* Ignore the MSVC warning about no initializer */ + assert( p->iVdbeMagic==VDBE_MAGIC_INIT ); + if( addr<0 ){ + addr = p->nOp - 1; + } + assert( (addr>=0 && addrnOp) || p->db->mallocFailed ); + if( p->db->mallocFailed ){ + return (VdbeOp*)&dummy; + }else{ + return &p->aOp[addr]; + } +} + +#if defined(SQLITE_ENABLE_EXPLAIN_COMMENTS) +/* +** Return an integer value for one of the parameters to the opcode pOp +** determined by character c. +*/ +static int translateP(char c, const Op *pOp){ + if( c=='1' ) return pOp->p1; + if( c=='2' ) return pOp->p2; + if( c=='3' ) return pOp->p3; + if( c=='4' ) return pOp->p4.i; + return pOp->p5; +} + +/* +** Compute a string for the "comment" field of a VDBE opcode listing. +** +** The Synopsis: field in comments in the vdbe.c source file gets converted +** to an extra string that is appended to the sqlite3OpcodeName(). In the +** absence of other comments, this synopsis becomes the comment on the opcode. +** Some translation occurs: +** +** "PX" -> "r[X]" +** "PX@PY" -> "r[X..X+Y-1]" or "r[x]" if y is 0 or 1 +** "PX@PY+1" -> "r[X..X+Y]" or "r[x]" if y is 0 +** "PY..PY" -> "r[X..Y]" or "r[x]" if y<=x +*/ +SQLITE_PRIVATE char *sqlite3VdbeDisplayComment( + sqlite3 *db, /* Optional - Oom error reporting only */ + const Op *pOp, /* The opcode to be commented */ + const char *zP4 /* Previously obtained value for P4 */ +){ + const char *zOpName; + const char *zSynopsis; + int nOpName; + int ii; + char zAlt[50]; + StrAccum x; + + sqlite3StrAccumInit(&x, 0, 0, 0, SQLITE_MAX_LENGTH); + zOpName = sqlite3OpcodeName(pOp->opcode); + nOpName = sqlite3Strlen30(zOpName); + if( zOpName[nOpName+1] ){ + int seenCom = 0; + char c; + zSynopsis = zOpName + nOpName + 1; + if( strncmp(zSynopsis,"IF ",3)==0 ){ + sqlite3_snprintf(sizeof(zAlt), zAlt, "if %s goto P2", zSynopsis+3); + zSynopsis = zAlt; + } + for(ii=0; (c = zSynopsis[ii])!=0; ii++){ + if( c=='P' ){ + c = zSynopsis[++ii]; + if( c=='4' ){ + sqlite3_str_appendall(&x, zP4); + }else if( c=='X' ){ + sqlite3_str_appendall(&x, pOp->zComment); + seenCom = 1; + }else{ + int v1 = translateP(c, pOp); + int v2; + if( strncmp(zSynopsis+ii+1, "@P", 2)==0 ){ + ii += 3; + v2 = translateP(zSynopsis[ii], pOp); + if( strncmp(zSynopsis+ii+1,"+1",2)==0 ){ + ii += 2; + v2++; + } + if( v2<2 ){ + sqlite3_str_appendf(&x, "%d", v1); + }else{ + sqlite3_str_appendf(&x, "%d..%d", v1, v1+v2-1); + } + }else if( strncmp(zSynopsis+ii+1, "@NP", 3)==0 ){ + sqlite3_context *pCtx = pOp->p4.pCtx; + if( pOp->p4type!=P4_FUNCCTX || pCtx->argc==1 ){ + sqlite3_str_appendf(&x, "%d", v1); + }else if( pCtx->argc>1 ){ + sqlite3_str_appendf(&x, "%d..%d", v1, v1+pCtx->argc-1); + }else if( x.accError==0 ){ + assert( x.nChar>2 ); + x.nChar -= 2; + ii++; + } + ii += 3; + }else{ + sqlite3_str_appendf(&x, "%d", v1); + if( strncmp(zSynopsis+ii+1, "..P3", 4)==0 && pOp->p3==0 ){ + ii += 4; + } + } + } + }else{ + sqlite3_str_appendchar(&x, 1, c); + } + } + if( !seenCom && pOp->zComment ){ + sqlite3_str_appendf(&x, "; %s", pOp->zComment); + } + }else if( pOp->zComment ){ + sqlite3_str_appendall(&x, pOp->zComment); + } + if( (x.accError & SQLITE_NOMEM)!=0 && db!=0 ){ + sqlite3OomFault(db); + } + return sqlite3StrAccumFinish(&x); +} +#endif /* SQLITE_ENABLE_EXPLAIN_COMMENTS */ + +#if VDBE_DISPLAY_P4 && defined(SQLITE_ENABLE_CURSOR_HINTS) +/* +** Translate the P4.pExpr value for an OP_CursorHint opcode into text +** that can be displayed in the P4 column of EXPLAIN output. +*/ +static void displayP4Expr(StrAccum *p, Expr *pExpr){ + const char *zOp = 0; + switch( pExpr->op ){ + case TK_STRING: + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3_str_appendf(p, "%Q", pExpr->u.zToken); + break; + case TK_INTEGER: + sqlite3_str_appendf(p, "%d", pExpr->u.iValue); + break; + case TK_NULL: + sqlite3_str_appendf(p, "NULL"); + break; + case TK_REGISTER: { + sqlite3_str_appendf(p, "r[%d]", pExpr->iTable); + break; + } + case TK_COLUMN: { + if( pExpr->iColumn<0 ){ + sqlite3_str_appendf(p, "rowid"); + }else{ + sqlite3_str_appendf(p, "c%d", (int)pExpr->iColumn); + } + break; + } + case TK_LT: zOp = "LT"; break; + case TK_LE: zOp = "LE"; break; + case TK_GT: zOp = "GT"; break; + case TK_GE: zOp = "GE"; break; + case TK_NE: zOp = "NE"; break; + case TK_EQ: zOp = "EQ"; break; + case TK_IS: zOp = "IS"; break; + case TK_ISNOT: zOp = "ISNOT"; break; + case TK_AND: zOp = "AND"; break; + case TK_OR: zOp = "OR"; break; + case TK_PLUS: zOp = "ADD"; break; + case TK_STAR: zOp = "MUL"; break; + case TK_MINUS: zOp = "SUB"; break; + case TK_REM: zOp = "REM"; break; + case TK_BITAND: zOp = "BITAND"; break; + case TK_BITOR: zOp = "BITOR"; break; + case TK_SLASH: zOp = "DIV"; break; + case TK_LSHIFT: zOp = "LSHIFT"; break; + case TK_RSHIFT: zOp = "RSHIFT"; break; + case TK_CONCAT: zOp = "CONCAT"; break; + case TK_UMINUS: zOp = "MINUS"; break; + case TK_UPLUS: zOp = "PLUS"; break; + case TK_BITNOT: zOp = "BITNOT"; break; + case TK_NOT: zOp = "NOT"; break; + case TK_ISNULL: zOp = "ISNULL"; break; + case TK_NOTNULL: zOp = "NOTNULL"; break; + + default: + sqlite3_str_appendf(p, "%s", "expr"); + break; + } + + if( zOp ){ + sqlite3_str_appendf(p, "%s(", zOp); + displayP4Expr(p, pExpr->pLeft); + if( pExpr->pRight ){ + sqlite3_str_append(p, ",", 1); + displayP4Expr(p, pExpr->pRight); + } + sqlite3_str_append(p, ")", 1); + } +} +#endif /* VDBE_DISPLAY_P4 && defined(SQLITE_ENABLE_CURSOR_HINTS) */ + + +#if VDBE_DISPLAY_P4 +/* +** Compute a string that describes the P4 parameter for an opcode. +** Use zTemp for any required temporary buffer space. +*/ +SQLITE_PRIVATE char *sqlite3VdbeDisplayP4(sqlite3 *db, Op *pOp){ + char *zP4 = 0; + StrAccum x; + + sqlite3StrAccumInit(&x, 0, 0, 0, SQLITE_MAX_LENGTH); + switch( pOp->p4type ){ + case P4_KEYINFO: { + int j; + KeyInfo *pKeyInfo = pOp->p4.pKeyInfo; + assert( pKeyInfo->aSortFlags!=0 ); + sqlite3_str_appendf(&x, "k(%d", pKeyInfo->nKeyField); + for(j=0; jnKeyField; j++){ + CollSeq *pColl = pKeyInfo->aColl[j]; + const char *zColl = pColl ? pColl->zName : ""; + if( strcmp(zColl, "BINARY")==0 ) zColl = "B"; + sqlite3_str_appendf(&x, ",%s%s%s", + (pKeyInfo->aSortFlags[j] & KEYINFO_ORDER_DESC) ? "-" : "", + (pKeyInfo->aSortFlags[j] & KEYINFO_ORDER_BIGNULL)? "N." : "", + zColl); + } + sqlite3_str_append(&x, ")", 1); + break; + } +#ifdef SQLITE_ENABLE_CURSOR_HINTS + case P4_EXPR: { + displayP4Expr(&x, pOp->p4.pExpr); + break; + } +#endif + case P4_COLLSEQ: { + static const char *const encnames[] = {"?", "8", "16LE", "16BE"}; + CollSeq *pColl = pOp->p4.pColl; + assert( pColl->enc<4 ); + sqlite3_str_appendf(&x, "%.18s-%s", pColl->zName, + encnames[pColl->enc]); + break; + } + case P4_FUNCDEF: { + FuncDef *pDef = pOp->p4.pFunc; + sqlite3_str_appendf(&x, "%s(%d)", pDef->zName, pDef->nArg); + break; + } + case P4_FUNCCTX: { + FuncDef *pDef = pOp->p4.pCtx->pFunc; + sqlite3_str_appendf(&x, "%s(%d)", pDef->zName, pDef->nArg); + break; + } + case P4_INT64: { + sqlite3_str_appendf(&x, "%lld", *pOp->p4.pI64); + break; + } + case P4_INT32: { + sqlite3_str_appendf(&x, "%d", pOp->p4.i); + break; + } + case P4_REAL: { + sqlite3_str_appendf(&x, "%.16g", *pOp->p4.pReal); + break; + } + case P4_MEM: { + Mem *pMem = pOp->p4.pMem; + if( pMem->flags & MEM_Str ){ + zP4 = pMem->z; + }else if( pMem->flags & (MEM_Int|MEM_IntReal) ){ + sqlite3_str_appendf(&x, "%lld", pMem->u.i); + }else if( pMem->flags & MEM_Real ){ + sqlite3_str_appendf(&x, "%.16g", pMem->u.r); + }else if( pMem->flags & MEM_Null ){ + zP4 = "NULL"; + }else{ + assert( pMem->flags & MEM_Blob ); + zP4 = "(blob)"; + } + break; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + case P4_VTAB: { + sqlite3_vtab *pVtab = pOp->p4.pVtab->pVtab; + sqlite3_str_appendf(&x, "vtab:%p", pVtab); + break; + } +#endif + case P4_INTARRAY: { + u32 i; + u32 *ai = pOp->p4.ai; + u32 n = ai[0]; /* The first element of an INTARRAY is always the + ** count of the number of elements to follow */ + for(i=1; i<=n; i++){ + sqlite3_str_appendf(&x, "%c%u", (i==1 ? '[' : ','), ai[i]); + } + sqlite3_str_append(&x, "]", 1); + break; + } + case P4_SUBPROGRAM: { + zP4 = "program"; + break; + } + case P4_DYNBLOB: + case P4_ADVANCE: { + break; + } + case P4_TABLE: { + zP4 = pOp->p4.pTab->zName; + break; + } + default: { + zP4 = pOp->p4.z; + } + } + if( zP4 ) sqlite3_str_appendall(&x, zP4); + if( (x.accError & SQLITE_NOMEM)!=0 ){ + sqlite3OomFault(db); + } + return sqlite3StrAccumFinish(&x); +} +#endif /* VDBE_DISPLAY_P4 */ + +/* +** Declare to the Vdbe that the BTree object at db->aDb[i] is used. +** +** The prepared statements need to know in advance the complete set of +** attached databases that will be use. A mask of these databases +** is maintained in p->btreeMask. The p->lockMask value is the subset of +** p->btreeMask of databases that will require a lock. +*/ +SQLITE_PRIVATE void sqlite3VdbeUsesBtree(Vdbe *p, int i){ + assert( i>=0 && idb->nDb && i<(int)sizeof(yDbMask)*8 ); + assert( i<(int)sizeof(p->btreeMask)*8 ); + DbMaskSet(p->btreeMask, i); + if( i!=1 && sqlite3BtreeSharable(p->db->aDb[i].pBt) ){ + DbMaskSet(p->lockMask, i); + } +} + +#if !defined(SQLITE_OMIT_SHARED_CACHE) +/* +** If SQLite is compiled to support shared-cache mode and to be threadsafe, +** this routine obtains the mutex associated with each BtShared structure +** that may be accessed by the VM passed as an argument. In doing so it also +** sets the BtShared.db member of each of the BtShared structures, ensuring +** that the correct busy-handler callback is invoked if required. +** +** If SQLite is not threadsafe but does support shared-cache mode, then +** sqlite3BtreeEnter() is invoked to set the BtShared.db variables +** of all of BtShared structures accessible via the database handle +** associated with the VM. +** +** If SQLite is not threadsafe and does not support shared-cache mode, this +** function is a no-op. +** +** The p->btreeMask field is a bitmask of all btrees that the prepared +** statement p will ever use. Let N be the number of bits in p->btreeMask +** corresponding to btrees that use shared cache. Then the runtime of +** this routine is N*N. But as N is rarely more than 1, this should not +** be a problem. +*/ +SQLITE_PRIVATE void sqlite3VdbeEnter(Vdbe *p){ + int i; + sqlite3 *db; + Db *aDb; + int nDb; + if( DbMaskAllZero(p->lockMask) ) return; /* The common case */ + db = p->db; + aDb = db->aDb; + nDb = db->nDb; + for(i=0; ilockMask,i) && ALWAYS(aDb[i].pBt!=0) ){ + sqlite3BtreeEnter(aDb[i].pBt); + } + } +} +#endif + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0 +/* +** Unlock all of the btrees previously locked by a call to sqlite3VdbeEnter(). +*/ +static SQLITE_NOINLINE void vdbeLeave(Vdbe *p){ + int i; + sqlite3 *db; + Db *aDb; + int nDb; + db = p->db; + aDb = db->aDb; + nDb = db->nDb; + for(i=0; ilockMask,i) && ALWAYS(aDb[i].pBt!=0) ){ + sqlite3BtreeLeave(aDb[i].pBt); + } + } +} +SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ + if( DbMaskAllZero(p->lockMask) ) return; /* The common case */ + vdbeLeave(p); +} +#endif + +#if defined(VDBE_PROFILE) || defined(SQLITE_DEBUG) +/* +** Print a single opcode. This routine is used for debugging only. +*/ +SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE *pOut, int pc, VdbeOp *pOp){ + char *zP4; + char *zCom; + sqlite3 dummyDb; + static const char *zFormat1 = "%4d %-13s %4d %4d %4d %-13s %.2X %s\n"; + if( pOut==0 ) pOut = stdout; + sqlite3BeginBenignMalloc(); + dummyDb.mallocFailed = 1; + zP4 = sqlite3VdbeDisplayP4(&dummyDb, pOp); +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + zCom = sqlite3VdbeDisplayComment(0, pOp, zP4); +#else + zCom = 0; +#endif + /* NB: The sqlite3OpcodeName() function is implemented by code created + ** by the mkopcodeh.awk and mkopcodec.awk scripts which extract the + ** information from the vdbe.c source text */ + fprintf(pOut, zFormat1, pc, + sqlite3OpcodeName(pOp->opcode), pOp->p1, pOp->p2, pOp->p3, + zP4 ? zP4 : "", pOp->p5, + zCom ? zCom : "" + ); + fflush(pOut); + sqlite3_free(zP4); + sqlite3_free(zCom); + sqlite3EndBenignMalloc(); +} +#endif + +/* +** Initialize an array of N Mem element. +*/ +static void initMemArray(Mem *p, int N, sqlite3 *db, u16 flags){ + while( (N--)>0 ){ + p->db = db; + p->flags = flags; + p->szMalloc = 0; +#ifdef SQLITE_DEBUG + p->pScopyFrom = 0; +#endif + p++; + } +} + +/* +** Release an array of N Mem elements +*/ +static void releaseMemArray(Mem *p, int N){ + if( p && N ){ + Mem *pEnd = &p[N]; + sqlite3 *db = p->db; + if( db->pnBytesFreed ){ + do{ + if( p->szMalloc ) sqlite3DbFree(db, p->zMalloc); + }while( (++p)flags & MEM_Agg ); + testcase( p->flags & MEM_Dyn ); + if( p->flags&(MEM_Agg|MEM_Dyn) ){ + testcase( (p->flags & MEM_Dyn)!=0 && p->xDel==sqlite3VdbeFrameMemDel ); + sqlite3VdbeMemRelease(p); + }else if( p->szMalloc ){ + sqlite3DbFreeNN(db, p->zMalloc); + p->szMalloc = 0; + } + + p->flags = MEM_Undefined; + }while( (++p)iFrameMagic!=SQLITE_FRAME_MAGIC ) return 0; + return 1; +} +#endif + + +/* +** This is a destructor on a Mem object (which is really an sqlite3_value) +** that deletes the Frame object that is attached to it as a blob. +** +** This routine does not delete the Frame right away. It merely adds the +** frame to a list of frames to be deleted when the Vdbe halts. +*/ +SQLITE_PRIVATE void sqlite3VdbeFrameMemDel(void *pArg){ + VdbeFrame *pFrame = (VdbeFrame*)pArg; + assert( sqlite3VdbeFrameIsValid(pFrame) ); + pFrame->pParent = pFrame->v->pDelFrame; + pFrame->v->pDelFrame = pFrame; +} + +#if defined(SQLITE_ENABLE_BYTECODE_VTAB) || !defined(SQLITE_OMIT_EXPLAIN) +/* +** Locate the next opcode to be displayed in EXPLAIN or EXPLAIN +** QUERY PLAN output. +** +** Return SQLITE_ROW on success. Return SQLITE_DONE if there are no +** more opcodes to be displayed. +*/ +SQLITE_PRIVATE int sqlite3VdbeNextOpcode( + Vdbe *p, /* The statement being explained */ + Mem *pSub, /* Storage for keeping track of subprogram nesting */ + int eMode, /* 0: normal. 1: EQP. 2: TablesUsed */ + int *piPc, /* IN/OUT: Current rowid. Overwritten with next rowid */ + int *piAddr, /* OUT: Write index into (*paOp)[] here */ + Op **paOp /* OUT: Write the opcode array here */ +){ + int nRow; /* Stop when row count reaches this */ + int nSub = 0; /* Number of sub-vdbes seen so far */ + SubProgram **apSub = 0; /* Array of sub-vdbes */ + int i; /* Next instruction address */ + int rc = SQLITE_OK; /* Result code */ + Op *aOp = 0; /* Opcode array */ + int iPc; /* Rowid. Copy of value in *piPc */ + + /* When the number of output rows reaches nRow, that means the + ** listing has finished and sqlite3_step() should return SQLITE_DONE. + ** nRow is the sum of the number of rows in the main program, plus + ** the sum of the number of rows in all trigger subprograms encountered + ** so far. The nRow value will increase as new trigger subprograms are + ** encountered, but p->pc will eventually catch up to nRow. + */ + nRow = p->nOp; + if( pSub!=0 ){ + if( pSub->flags&MEM_Blob ){ + /* pSub is initiallly NULL. It is initialized to a BLOB by + ** the P4_SUBPROGRAM processing logic below */ + nSub = pSub->n/sizeof(Vdbe*); + apSub = (SubProgram **)pSub->z; + } + for(i=0; inOp; + } + } + iPc = *piPc; + while(1){ /* Loop exits via break */ + i = iPc++; + if( i>=nRow ){ + p->rc = SQLITE_OK; + rc = SQLITE_DONE; + break; + } + if( inOp ){ + /* The rowid is small enough that we are still in the + ** main program. */ + aOp = p->aOp; + }else{ + /* We are currently listing subprograms. Figure out which one and + ** pick up the appropriate opcode. */ + int j; + i -= p->nOp; + assert( apSub!=0 ); + assert( nSub>0 ); + for(j=0; i>=apSub[j]->nOp; j++){ + i -= apSub[j]->nOp; + assert( inOp || j+1aOp; + } + + /* When an OP_Program opcode is encounter (the only opcode that has + ** a P4_SUBPROGRAM argument), expand the size of the array of subprograms + ** kept in p->aMem[9].z to hold the new program - assuming this subprogram + ** has not already been seen. + */ + if( pSub!=0 && aOp[i].p4type==P4_SUBPROGRAM ){ + int nByte = (nSub+1)*sizeof(SubProgram*); + int j; + for(j=0; jrc = sqlite3VdbeMemGrow(pSub, nByte, nSub!=0); + if( p->rc!=SQLITE_OK ){ + rc = SQLITE_ERROR; + break; + } + apSub = (SubProgram **)pSub->z; + apSub[nSub++] = aOp[i].p4.pProgram; + MemSetTypeFlag(pSub, MEM_Blob); + pSub->n = nSub*sizeof(SubProgram*); + nRow += aOp[i].p4.pProgram->nOp; + } + } + if( eMode==0 ) break; +#ifdef SQLITE_ENABLE_BYTECODE_VTAB + if( eMode==2 ){ + Op *pOp = aOp + i; + if( pOp->opcode==OP_OpenRead ) break; + if( pOp->opcode==OP_OpenWrite && (pOp->p5 & OPFLAG_P2ISREG)==0 ) break; + if( pOp->opcode==OP_ReopenIdx ) break; + }else +#endif + { + assert( eMode==1 ); + if( aOp[i].opcode==OP_Explain ) break; + if( aOp[i].opcode==OP_Init && iPc>1 ) break; + } + } + *piPc = iPc; + *piAddr = i; + *paOp = aOp; + return rc; +} +#endif /* SQLITE_ENABLE_BYTECODE_VTAB || !SQLITE_OMIT_EXPLAIN */ + + +/* +** Delete a VdbeFrame object and its contents. VdbeFrame objects are +** allocated by the OP_Program opcode in sqlite3VdbeExec(). +*/ +SQLITE_PRIVATE void sqlite3VdbeFrameDelete(VdbeFrame *p){ + int i; + Mem *aMem = VdbeFrameMem(p); + VdbeCursor **apCsr = (VdbeCursor **)&aMem[p->nChildMem]; + assert( sqlite3VdbeFrameIsValid(p) ); + for(i=0; inChildCsr; i++){ + sqlite3VdbeFreeCursor(p->v, apCsr[i]); + } + releaseMemArray(aMem, p->nChildMem); + sqlite3VdbeDeleteAuxData(p->v->db, &p->pAuxData, -1, 0); + sqlite3DbFree(p->v->db, p); +} + +#ifndef SQLITE_OMIT_EXPLAIN +/* +** Give a listing of the program in the virtual machine. +** +** The interface is the same as sqlite3VdbeExec(). But instead of +** running the code, it invokes the callback once for each instruction. +** This feature is used to implement "EXPLAIN". +** +** When p->explain==1, each instruction is listed. When +** p->explain==2, only OP_Explain instructions are listed and these +** are shown in a different format. p->explain==2 is used to implement +** EXPLAIN QUERY PLAN. +** 2018-04-24: In p->explain==2 mode, the OP_Init opcodes of triggers +** are also shown, so that the boundaries between the main program and +** each trigger are clear. +** +** When p->explain==1, first the main program is listed, then each of +** the trigger subprograms are listed one by one. +*/ +SQLITE_PRIVATE int sqlite3VdbeList( + Vdbe *p /* The VDBE */ +){ + Mem *pSub = 0; /* Memory cell hold array of subprogs */ + sqlite3 *db = p->db; /* The database connection */ + int i; /* Loop counter */ + int rc = SQLITE_OK; /* Return code */ + Mem *pMem = &p->aMem[1]; /* First Mem of result set */ + int bListSubprogs = (p->explain==1 || (db->flags & SQLITE_TriggerEQP)!=0); + Op *aOp; /* Array of opcodes */ + Op *pOp; /* Current opcode */ + + assert( p->explain ); + assert( p->iVdbeMagic==VDBE_MAGIC_RUN ); + assert( p->rc==SQLITE_OK || p->rc==SQLITE_BUSY || p->rc==SQLITE_NOMEM ); + + /* Even though this opcode does not use dynamic strings for + ** the result, result columns may become dynamic if the user calls + ** sqlite3_column_text16(), causing a translation to UTF-16 encoding. + */ + releaseMemArray(pMem, 8); + p->pResultSet = 0; + + if( p->rc==SQLITE_NOMEM ){ + /* This happens if a malloc() inside a call to sqlite3_column_text() or + ** sqlite3_column_text16() failed. */ + sqlite3OomFault(db); + return SQLITE_ERROR; + } + + if( bListSubprogs ){ + /* The first 8 memory cells are used for the result set. So we will + ** commandeer the 9th cell to use as storage for an array of pointers + ** to trigger subprograms. The VDBE is guaranteed to have at least 9 + ** cells. */ + assert( p->nMem>9 ); + pSub = &p->aMem[9]; + }else{ + pSub = 0; + } + + /* Figure out which opcode is next to display */ + rc = sqlite3VdbeNextOpcode(p, pSub, p->explain==2, &p->pc, &i, &aOp); + + if( rc==SQLITE_OK ){ + pOp = aOp + i; + if( AtomicLoad(&db->u1.isInterrupted) ){ + p->rc = SQLITE_INTERRUPT; + rc = SQLITE_ERROR; + sqlite3VdbeError(p, sqlite3ErrStr(p->rc)); + }else{ + char *zP4 = sqlite3VdbeDisplayP4(db, pOp); + if( p->explain==2 ){ + sqlite3VdbeMemSetInt64(pMem, pOp->p1); + sqlite3VdbeMemSetInt64(pMem+1, pOp->p2); + sqlite3VdbeMemSetInt64(pMem+2, pOp->p3); + sqlite3VdbeMemSetStr(pMem+3, zP4, -1, SQLITE_UTF8, sqlite3_free); + p->nResColumn = 4; + }else{ + sqlite3VdbeMemSetInt64(pMem+0, i); + sqlite3VdbeMemSetStr(pMem+1, (char*)sqlite3OpcodeName(pOp->opcode), + -1, SQLITE_UTF8, SQLITE_STATIC); + sqlite3VdbeMemSetInt64(pMem+2, pOp->p1); + sqlite3VdbeMemSetInt64(pMem+3, pOp->p2); + sqlite3VdbeMemSetInt64(pMem+4, pOp->p3); + /* pMem+5 for p4 is done last */ + sqlite3VdbeMemSetInt64(pMem+6, pOp->p5); +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + { + char *zCom = sqlite3VdbeDisplayComment(db, pOp, zP4); + sqlite3VdbeMemSetStr(pMem+7, zCom, -1, SQLITE_UTF8, sqlite3_free); + } +#else + sqlite3VdbeMemSetNull(pMem+7); +#endif + sqlite3VdbeMemSetStr(pMem+5, zP4, -1, SQLITE_UTF8, sqlite3_free); + p->nResColumn = 8; + } + p->pResultSet = pMem; + if( db->mallocFailed ){ + p->rc = SQLITE_NOMEM; + rc = SQLITE_ERROR; + }else{ + p->rc = SQLITE_OK; + rc = SQLITE_ROW; + } + } + } + return rc; +} +#endif /* SQLITE_OMIT_EXPLAIN */ + +#ifdef SQLITE_DEBUG +/* +** Print the SQL that was used to generate a VDBE program. +*/ +SQLITE_PRIVATE void sqlite3VdbePrintSql(Vdbe *p){ + const char *z = 0; + if( p->zSql ){ + z = p->zSql; + }else if( p->nOp>=1 ){ + const VdbeOp *pOp = &p->aOp[0]; + if( pOp->opcode==OP_Init && pOp->p4.z!=0 ){ + z = pOp->p4.z; + while( sqlite3Isspace(*z) ) z++; + } + } + if( z ) printf("SQL: [%s]\n", z); +} +#endif + +#if !defined(SQLITE_OMIT_TRACE) && defined(SQLITE_ENABLE_IOTRACE) +/* +** Print an IOTRACE message showing SQL content. +*/ +SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe *p){ + int nOp = p->nOp; + VdbeOp *pOp; + if( sqlite3IoTrace==0 ) return; + if( nOp<1 ) return; + pOp = &p->aOp[0]; + if( pOp->opcode==OP_Init && pOp->p4.z!=0 ){ + int i, j; + char z[1000]; + sqlite3_snprintf(sizeof(z), z, "%s", pOp->p4.z); + for(i=0; sqlite3Isspace(z[i]); i++){} + for(j=0; z[i]; i++){ + if( sqlite3Isspace(z[i]) ){ + if( z[i-1]!=' ' ){ + z[j++] = ' '; + } + }else{ + z[j++] = z[i]; + } + } + z[j] = 0; + sqlite3IoTrace("SQL %s\n", z); + } +} +#endif /* !SQLITE_OMIT_TRACE && SQLITE_ENABLE_IOTRACE */ + +/* An instance of this object describes bulk memory available for use +** by subcomponents of a prepared statement. Space is allocated out +** of a ReusableSpace object by the allocSpace() routine below. +*/ +struct ReusableSpace { + u8 *pSpace; /* Available memory */ + sqlite3_int64 nFree; /* Bytes of available memory */ + sqlite3_int64 nNeeded; /* Total bytes that could not be allocated */ +}; + +/* Try to allocate nByte bytes of 8-byte aligned bulk memory for pBuf +** from the ReusableSpace object. Return a pointer to the allocated +** memory on success. If insufficient memory is available in the +** ReusableSpace object, increase the ReusableSpace.nNeeded +** value by the amount needed and return NULL. +** +** If pBuf is not initially NULL, that means that the memory has already +** been allocated by a prior call to this routine, so just return a copy +** of pBuf and leave ReusableSpace unchanged. +** +** This allocator is employed to repurpose unused slots at the end of the +** opcode array of prepared state for other memory needs of the prepared +** statement. +*/ +static void *allocSpace( + struct ReusableSpace *p, /* Bulk memory available for allocation */ + void *pBuf, /* Pointer to a prior allocation */ + sqlite3_int64 nByte /* Bytes of memory needed */ +){ + assert( EIGHT_BYTE_ALIGNMENT(p->pSpace) ); + if( pBuf==0 ){ + nByte = ROUND8(nByte); + if( nByte <= p->nFree ){ + p->nFree -= nByte; + pBuf = &p->pSpace[p->nFree]; + }else{ + p->nNeeded += nByte; + } + } + assert( EIGHT_BYTE_ALIGNMENT(pBuf) ); + return pBuf; +} + +/* +** Rewind the VDBE back to the beginning in preparation for +** running it. +*/ +SQLITE_PRIVATE void sqlite3VdbeRewind(Vdbe *p){ +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) + int i; +#endif + assert( p!=0 ); + assert( p->iVdbeMagic==VDBE_MAGIC_INIT || p->iVdbeMagic==VDBE_MAGIC_RESET ); + + /* There should be at least one opcode. + */ + assert( p->nOp>0 ); + + /* Set the magic to VDBE_MAGIC_RUN sooner rather than later. */ + p->iVdbeMagic = VDBE_MAGIC_RUN; + +#ifdef SQLITE_DEBUG + for(i=0; inMem; i++){ + assert( p->aMem[i].db==p->db ); + } +#endif + p->pc = -1; + p->rc = SQLITE_OK; + p->errorAction = OE_Abort; + p->nChange = 0; + p->cacheCtr = 1; + p->minWriteFileFormat = 255; + p->iStatement = 0; + p->nFkConstraint = 0; +#ifdef VDBE_PROFILE + for(i=0; inOp; i++){ + p->aOp[i].cnt = 0; + p->aOp[i].cycles = 0; + } +#endif +} + +/* +** Prepare a virtual machine for execution for the first time after +** creating the virtual machine. This involves things such +** as allocating registers and initializing the program counter. +** After the VDBE has be prepped, it can be executed by one or more +** calls to sqlite3VdbeExec(). +** +** This function may be called exactly once on each virtual machine. +** After this routine is called the VM has been "packaged" and is ready +** to run. After this routine is called, further calls to +** sqlite3VdbeAddOp() functions are prohibited. This routine disconnects +** the Vdbe from the Parse object that helped generate it so that the +** the Vdbe becomes an independent entity and the Parse object can be +** destroyed. +** +** Use the sqlite3VdbeRewind() procedure to restore a virtual machine back +** to its initial state after it has been run. +*/ +SQLITE_PRIVATE void sqlite3VdbeMakeReady( + Vdbe *p, /* The VDBE */ + Parse *pParse /* Parsing context */ +){ + sqlite3 *db; /* The database connection */ + int nVar; /* Number of parameters */ + int nMem; /* Number of VM memory registers */ + int nCursor; /* Number of cursors required */ + int nArg; /* Number of arguments in subprograms */ + int n; /* Loop counter */ + struct ReusableSpace x; /* Reusable bulk memory */ + + assert( p!=0 ); + assert( p->nOp>0 ); + assert( pParse!=0 ); + assert( p->iVdbeMagic==VDBE_MAGIC_INIT ); + assert( pParse==p->pParse ); + p->pVList = pParse->pVList; + pParse->pVList = 0; + db = p->db; + assert( db->mallocFailed==0 ); + nVar = pParse->nVar; + nMem = pParse->nMem; + nCursor = pParse->nTab; + nArg = pParse->nMaxArg; + + /* Each cursor uses a memory cell. The first cursor (cursor 0) can + ** use aMem[0] which is not otherwise used by the VDBE program. Allocate + ** space at the end of aMem[] for cursors 1 and greater. + ** See also: allocateCursor(). + */ + nMem += nCursor; + if( nCursor==0 && nMem>0 ) nMem++; /* Space for aMem[0] even if not used */ + + /* Figure out how much reusable memory is available at the end of the + ** opcode array. This extra memory will be reallocated for other elements + ** of the prepared statement. + */ + n = ROUND8(sizeof(Op)*p->nOp); /* Bytes of opcode memory used */ + x.pSpace = &((u8*)p->aOp)[n]; /* Unused opcode memory */ + assert( EIGHT_BYTE_ALIGNMENT(x.pSpace) ); + x.nFree = ROUNDDOWN8(pParse->szOpAlloc - n); /* Bytes of unused memory */ + assert( x.nFree>=0 ); + assert( EIGHT_BYTE_ALIGNMENT(&x.pSpace[x.nFree]) ); + + resolveP2Values(p, &nArg); + p->usesStmtJournal = (u8)(pParse->isMultiWrite && pParse->mayAbort); + if( pParse->explain ){ + static const char * const azColName[] = { + "addr", "opcode", "p1", "p2", "p3", "p4", "p5", "comment", + "id", "parent", "notused", "detail" + }; + int iFirst, mx, i; + if( nMem<10 ) nMem = 10; + p->explain = pParse->explain; + if( pParse->explain==2 ){ + sqlite3VdbeSetNumCols(p, 4); + iFirst = 8; + mx = 12; + }else{ + sqlite3VdbeSetNumCols(p, 8); + iFirst = 0; + mx = 8; + } + for(i=iFirst; iexpired = 0; + + /* Memory for registers, parameters, cursor, etc, is allocated in one or two + ** passes. On the first pass, we try to reuse unused memory at the + ** end of the opcode array. If we are unable to satisfy all memory + ** requirements by reusing the opcode array tail, then the second + ** pass will fill in the remainder using a fresh memory allocation. + ** + ** This two-pass approach that reuses as much memory as possible from + ** the leftover memory at the end of the opcode array. This can significantly + ** reduce the amount of memory held by a prepared statement. + */ + x.nNeeded = 0; + p->aMem = allocSpace(&x, 0, nMem*sizeof(Mem)); + p->aVar = allocSpace(&x, 0, nVar*sizeof(Mem)); + p->apArg = allocSpace(&x, 0, nArg*sizeof(Mem*)); + p->apCsr = allocSpace(&x, 0, nCursor*sizeof(VdbeCursor*)); +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + p->anExec = allocSpace(&x, 0, p->nOp*sizeof(i64)); +#endif + if( x.nNeeded ){ + x.pSpace = p->pFree = sqlite3DbMallocRawNN(db, x.nNeeded); + x.nFree = x.nNeeded; + if( !db->mallocFailed ){ + p->aMem = allocSpace(&x, p->aMem, nMem*sizeof(Mem)); + p->aVar = allocSpace(&x, p->aVar, nVar*sizeof(Mem)); + p->apArg = allocSpace(&x, p->apArg, nArg*sizeof(Mem*)); + p->apCsr = allocSpace(&x, p->apCsr, nCursor*sizeof(VdbeCursor*)); +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + p->anExec = allocSpace(&x, p->anExec, p->nOp*sizeof(i64)); +#endif + } + } + + if( db->mallocFailed ){ + p->nVar = 0; + p->nCursor = 0; + p->nMem = 0; + }else{ + p->nCursor = nCursor; + p->nVar = (ynVar)nVar; + initMemArray(p->aVar, nVar, db, MEM_Null); + p->nMem = nMem; + initMemArray(p->aMem, nMem, db, MEM_Undefined); + memset(p->apCsr, 0, nCursor*sizeof(VdbeCursor*)); +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + memset(p->anExec, 0, p->nOp*sizeof(i64)); +#endif + } + sqlite3VdbeRewind(p); +} + +/* +** Close a VDBE cursor and release all the resources that cursor +** happens to hold. +*/ +SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ + if( pCx==0 ){ + return; + } + switch( pCx->eCurType ){ + case CURTYPE_SORTER: { + sqlite3VdbeSorterClose(p->db, pCx); + break; + } + case CURTYPE_BTREE: { + assert( pCx->uc.pCursor!=0 ); + sqlite3BtreeCloseCursor(pCx->uc.pCursor); + break; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + case CURTYPE_VTAB: { + sqlite3_vtab_cursor *pVCur = pCx->uc.pVCur; + const sqlite3_module *pModule = pVCur->pVtab->pModule; + assert( pVCur->pVtab->nRef>0 ); + pVCur->pVtab->nRef--; + pModule->xClose(pVCur); + break; + } +#endif + } +} + +/* +** Close all cursors in the current frame. +*/ +static void closeCursorsInFrame(Vdbe *p){ + if( p->apCsr ){ + int i; + for(i=0; inCursor; i++){ + VdbeCursor *pC = p->apCsr[i]; + if( pC ){ + sqlite3VdbeFreeCursor(p, pC); + p->apCsr[i] = 0; + } + } + } +} + +/* +** Copy the values stored in the VdbeFrame structure to its Vdbe. This +** is used, for example, when a trigger sub-program is halted to restore +** control to the main program. +*/ +SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *pFrame){ + Vdbe *v = pFrame->v; + closeCursorsInFrame(v); +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + v->anExec = pFrame->anExec; +#endif + v->aOp = pFrame->aOp; + v->nOp = pFrame->nOp; + v->aMem = pFrame->aMem; + v->nMem = pFrame->nMem; + v->apCsr = pFrame->apCsr; + v->nCursor = pFrame->nCursor; + v->db->lastRowid = pFrame->lastRowid; + v->nChange = pFrame->nChange; + v->db->nChange = pFrame->nDbChange; + sqlite3VdbeDeleteAuxData(v->db, &v->pAuxData, -1, 0); + v->pAuxData = pFrame->pAuxData; + pFrame->pAuxData = 0; + return pFrame->pc; +} + +/* +** Close all cursors. +** +** Also release any dynamic memory held by the VM in the Vdbe.aMem memory +** cell array. This is necessary as the memory cell array may contain +** pointers to VdbeFrame objects, which may in turn contain pointers to +** open cursors. +*/ +static void closeAllCursors(Vdbe *p){ + if( p->pFrame ){ + VdbeFrame *pFrame; + for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent); + sqlite3VdbeFrameRestore(pFrame); + p->pFrame = 0; + p->nFrame = 0; + } + assert( p->nFrame==0 ); + closeCursorsInFrame(p); + if( p->aMem ){ + releaseMemArray(p->aMem, p->nMem); + } + while( p->pDelFrame ){ + VdbeFrame *pDel = p->pDelFrame; + p->pDelFrame = pDel->pParent; + sqlite3VdbeFrameDelete(pDel); + } + + /* Delete any auxdata allocations made by the VM */ + if( p->pAuxData ) sqlite3VdbeDeleteAuxData(p->db, &p->pAuxData, -1, 0); + assert( p->pAuxData==0 ); +} + +/* +** Set the number of result columns that will be returned by this SQL +** statement. This is now set at compile time, rather than during +** execution of the vdbe program so that sqlite3_column_count() can +** be called on an SQL statement before sqlite3_step(). +*/ +SQLITE_PRIVATE void sqlite3VdbeSetNumCols(Vdbe *p, int nResColumn){ + int n; + sqlite3 *db = p->db; + + if( p->nResColumn ){ + releaseMemArray(p->aColName, p->nResColumn*COLNAME_N); + sqlite3DbFree(db, p->aColName); + } + n = nResColumn*COLNAME_N; + p->nResColumn = (u16)nResColumn; + p->aColName = (Mem*)sqlite3DbMallocRawNN(db, sizeof(Mem)*n ); + if( p->aColName==0 ) return; + initMemArray(p->aColName, n, db, MEM_Null); +} + +/* +** Set the name of the idx'th column to be returned by the SQL statement. +** zName must be a pointer to a nul terminated string. +** +** This call must be made after a call to sqlite3VdbeSetNumCols(). +** +** The final parameter, xDel, must be one of SQLITE_DYNAMIC, SQLITE_STATIC +** or SQLITE_TRANSIENT. If it is SQLITE_DYNAMIC, then the buffer pointed +** to by zName will be freed by sqlite3DbFree() when the vdbe is destroyed. +*/ +SQLITE_PRIVATE int sqlite3VdbeSetColName( + Vdbe *p, /* Vdbe being configured */ + int idx, /* Index of column zName applies to */ + int var, /* One of the COLNAME_* constants */ + const char *zName, /* Pointer to buffer containing name */ + void (*xDel)(void*) /* Memory management strategy for zName */ +){ + int rc; + Mem *pColName; + assert( idxnResColumn ); + assert( vardb->mallocFailed ){ + assert( !zName || xDel!=SQLITE_DYNAMIC ); + return SQLITE_NOMEM_BKPT; + } + assert( p->aColName!=0 ); + pColName = &(p->aColName[idx+var*p->nResColumn]); + rc = sqlite3VdbeMemSetStr(pColName, zName, -1, SQLITE_UTF8, xDel); + assert( rc!=0 || !zName || (pColName->flags&MEM_Term)!=0 ); + return rc; +} + +/* +** A read or write transaction may or may not be active on database handle +** db. If a transaction is active, commit it. If there is a +** write-transaction spanning more than one database file, this routine +** takes care of the super-journal trickery. +*/ +static int vdbeCommit(sqlite3 *db, Vdbe *p){ + int i; + int nTrans = 0; /* Number of databases with an active write-transaction + ** that are candidates for a two-phase commit using a + ** super-journal */ + int rc = SQLITE_OK; + int needXcommit = 0; + +#ifdef SQLITE_OMIT_VIRTUALTABLE + /* With this option, sqlite3VtabSync() is defined to be simply + ** SQLITE_OK so p is not used. + */ + UNUSED_PARAMETER(p); +#endif + + /* Before doing anything else, call the xSync() callback for any + ** virtual module tables written in this transaction. This has to + ** be done before determining whether a super-journal file is + ** required, as an xSync() callback may add an attached database + ** to the transaction. + */ + rc = sqlite3VtabSync(db, p); + + /* This loop determines (a) if the commit hook should be invoked and + ** (b) how many database files have open write transactions, not + ** including the temp database. (b) is important because if more than + ** one database file has an open write transaction, a super-journal + ** file is required for an atomic commit. + */ + for(i=0; rc==SQLITE_OK && inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( sqlite3BtreeTxnState(pBt)==SQLITE_TXN_WRITE ){ + /* Whether or not a database might need a super-journal depends upon + ** its journal mode (among other things). This matrix determines which + ** journal modes use a super-journal and which do not */ + static const u8 aMJNeeded[] = { + /* DELETE */ 1, + /* PERSIST */ 1, + /* OFF */ 0, + /* TRUNCATE */ 1, + /* MEMORY */ 0, + /* WAL */ 0 + }; + Pager *pPager; /* Pager associated with pBt */ + needXcommit = 1; + sqlite3BtreeEnter(pBt); + pPager = sqlite3BtreePager(pBt); + if( db->aDb[i].safety_level!=PAGER_SYNCHRONOUS_OFF + && aMJNeeded[sqlite3PagerGetJournalMode(pPager)] + && sqlite3PagerIsMemdb(pPager)==0 + ){ + assert( i!=1 ); + nTrans++; + } + rc = sqlite3PagerExclusiveLock(pPager); + sqlite3BtreeLeave(pBt); + } + } + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If there are any write-transactions at all, invoke the commit hook */ + if( needXcommit && db->xCommitCallback ){ + rc = db->xCommitCallback(db->pCommitArg); + if( rc ){ + return SQLITE_CONSTRAINT_COMMITHOOK; + } + } + + /* The simple case - no more than one database file (not counting the + ** TEMP database) has a transaction active. There is no need for the + ** super-journal. + ** + ** If the return value of sqlite3BtreeGetFilename() is a zero length + ** string, it means the main database is :memory: or a temp file. In + ** that case we do not support atomic multi-file commits, so use the + ** simple case then too. + */ + if( 0==sqlite3Strlen30(sqlite3BtreeGetFilename(db->aDb[0].pBt)) + || nTrans<=1 + ){ + for(i=0; rc==SQLITE_OK && inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + rc = sqlite3BtreeCommitPhaseOne(pBt, 0); + } + } + + /* Do the commit only if all databases successfully complete phase 1. + ** If one of the BtreeCommitPhaseOne() calls fails, this indicates an + ** IO error while deleting or truncating a journal file. It is unlikely, + ** but could happen. In this case abandon processing and return the error. + */ + for(i=0; rc==SQLITE_OK && inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + rc = sqlite3BtreeCommitPhaseTwo(pBt, 0); + } + } + if( rc==SQLITE_OK ){ + sqlite3VtabCommit(db); + } + } + + /* The complex case - There is a multi-file write-transaction active. + ** This requires a super-journal file to ensure the transaction is + ** committed atomically. + */ +#ifndef SQLITE_OMIT_DISKIO + else{ + sqlite3_vfs *pVfs = db->pVfs; + char *zSuper = 0; /* File-name for the super-journal */ + char const *zMainFile = sqlite3BtreeGetFilename(db->aDb[0].pBt); + sqlite3_file *pSuperJrnl = 0; + i64 offset = 0; + int res; + int retryCount = 0; + int nMainFile; + + /* Select a super-journal file name */ + nMainFile = sqlite3Strlen30(zMainFile); + zSuper = sqlite3MPrintf(db, "%.4c%s%.16c", 0,zMainFile,0); + if( zSuper==0 ) return SQLITE_NOMEM_BKPT; + zSuper += 4; + do { + u32 iRandom; + if( retryCount ){ + if( retryCount>100 ){ + sqlite3_log(SQLITE_FULL, "MJ delete: %s", zSuper); + sqlite3OsDelete(pVfs, zSuper, 0); + break; + }else if( retryCount==1 ){ + sqlite3_log(SQLITE_FULL, "MJ collide: %s", zSuper); + } + } + retryCount++; + sqlite3_randomness(sizeof(iRandom), &iRandom); + sqlite3_snprintf(13, &zSuper[nMainFile], "-mj%06X9%02X", + (iRandom>>8)&0xffffff, iRandom&0xff); + /* The antipenultimate character of the super-journal name must + ** be "9" to avoid name collisions when using 8+3 filenames. */ + assert( zSuper[sqlite3Strlen30(zSuper)-3]=='9' ); + sqlite3FileSuffix3(zMainFile, zSuper); + rc = sqlite3OsAccess(pVfs, zSuper, SQLITE_ACCESS_EXISTS, &res); + }while( rc==SQLITE_OK && res ); + if( rc==SQLITE_OK ){ + /* Open the super-journal. */ + rc = sqlite3OsOpenMalloc(pVfs, zSuper, &pSuperJrnl, + SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE| + SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_SUPER_JOURNAL, 0 + ); + } + if( rc!=SQLITE_OK ){ + sqlite3DbFree(db, zSuper-4); + return rc; + } + + /* Write the name of each database file in the transaction into the new + ** super-journal file. If an error occurs at this point close + ** and delete the super-journal file. All the individual journal files + ** still have 'null' as the super-journal pointer, so they will roll + ** back independently if a failure occurs. + */ + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( sqlite3BtreeTxnState(pBt)==SQLITE_TXN_WRITE ){ + char const *zFile = sqlite3BtreeGetJournalname(pBt); + if( zFile==0 ){ + continue; /* Ignore TEMP and :memory: databases */ + } + assert( zFile[0]!=0 ); + rc = sqlite3OsWrite(pSuperJrnl, zFile, sqlite3Strlen30(zFile)+1,offset); + offset += sqlite3Strlen30(zFile)+1; + if( rc!=SQLITE_OK ){ + sqlite3OsCloseFree(pSuperJrnl); + sqlite3OsDelete(pVfs, zSuper, 0); + sqlite3DbFree(db, zSuper-4); + return rc; + } + } + } + + /* Sync the super-journal file. If the IOCAP_SEQUENTIAL device + ** flag is set this is not required. + */ + if( 0==(sqlite3OsDeviceCharacteristics(pSuperJrnl)&SQLITE_IOCAP_SEQUENTIAL) + && SQLITE_OK!=(rc = sqlite3OsSync(pSuperJrnl, SQLITE_SYNC_NORMAL)) + ){ + sqlite3OsCloseFree(pSuperJrnl); + sqlite3OsDelete(pVfs, zSuper, 0); + sqlite3DbFree(db, zSuper-4); + return rc; + } + + /* Sync all the db files involved in the transaction. The same call + ** sets the super-journal pointer in each individual journal. If + ** an error occurs here, do not delete the super-journal file. + ** + ** If the error occurs during the first call to + ** sqlite3BtreeCommitPhaseOne(), then there is a chance that the + ** super-journal file will be orphaned. But we cannot delete it, + ** in case the super-journal file name was written into the journal + ** file before the failure occurred. + */ + for(i=0; rc==SQLITE_OK && inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + rc = sqlite3BtreeCommitPhaseOne(pBt, zSuper); + } + } + sqlite3OsCloseFree(pSuperJrnl); + assert( rc!=SQLITE_BUSY ); + if( rc!=SQLITE_OK ){ + sqlite3DbFree(db, zSuper-4); + return rc; + } + + /* Delete the super-journal file. This commits the transaction. After + ** doing this the directory is synced again before any individual + ** transaction files are deleted. + */ + rc = sqlite3OsDelete(pVfs, zSuper, 1); + sqlite3DbFree(db, zSuper-4); + zSuper = 0; + if( rc ){ + return rc; + } + + /* All files and directories have already been synced, so the following + ** calls to sqlite3BtreeCommitPhaseTwo() are only closing files and + ** deleting or truncating journals. If something goes wrong while + ** this is happening we don't really care. The integrity of the + ** transaction is already guaranteed, but some stray 'cold' journals + ** may be lying around. Returning an error code won't help matters. + */ + disable_simulated_io_errors(); + sqlite3BeginBenignMalloc(); + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + sqlite3BtreeCommitPhaseTwo(pBt, 1); + } + } + sqlite3EndBenignMalloc(); + enable_simulated_io_errors(); + + sqlite3VtabCommit(db); + } +#endif + + return rc; +} + +/* +** This routine checks that the sqlite3.nVdbeActive count variable +** matches the number of vdbe's in the list sqlite3.pVdbe that are +** currently active. An assertion fails if the two counts do not match. +** This is an internal self-check only - it is not an essential processing +** step. +** +** This is a no-op if NDEBUG is defined. +*/ +#ifndef NDEBUG +static void checkActiveVdbeCnt(sqlite3 *db){ + Vdbe *p; + int cnt = 0; + int nWrite = 0; + int nRead = 0; + p = db->pVdbe; + while( p ){ + if( sqlite3_stmt_busy((sqlite3_stmt*)p) ){ + cnt++; + if( p->readOnly==0 ) nWrite++; + if( p->bIsReader ) nRead++; + } + p = p->pNext; + } + assert( cnt==db->nVdbeActive ); + assert( nWrite==db->nVdbeWrite ); + assert( nRead==db->nVdbeRead ); +} +#else +#define checkActiveVdbeCnt(x) +#endif + +/* +** If the Vdbe passed as the first argument opened a statement-transaction, +** close it now. Argument eOp must be either SAVEPOINT_ROLLBACK or +** SAVEPOINT_RELEASE. If it is SAVEPOINT_ROLLBACK, then the statement +** transaction is rolled back. If eOp is SAVEPOINT_RELEASE, then the +** statement transaction is committed. +** +** If an IO error occurs, an SQLITE_IOERR_XXX error code is returned. +** Otherwise SQLITE_OK. +*/ +static SQLITE_NOINLINE int vdbeCloseStatement(Vdbe *p, int eOp){ + sqlite3 *const db = p->db; + int rc = SQLITE_OK; + int i; + const int iSavepoint = p->iStatement-1; + + assert( eOp==SAVEPOINT_ROLLBACK || eOp==SAVEPOINT_RELEASE); + assert( db->nStatement>0 ); + assert( p->iStatement==(db->nStatement+db->nSavepoint) ); + + for(i=0; inDb; i++){ + int rc2 = SQLITE_OK; + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + if( eOp==SAVEPOINT_ROLLBACK ){ + rc2 = sqlite3BtreeSavepoint(pBt, SAVEPOINT_ROLLBACK, iSavepoint); + } + if( rc2==SQLITE_OK ){ + rc2 = sqlite3BtreeSavepoint(pBt, SAVEPOINT_RELEASE, iSavepoint); + } + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + db->nStatement--; + p->iStatement = 0; + + if( rc==SQLITE_OK ){ + if( eOp==SAVEPOINT_ROLLBACK ){ + rc = sqlite3VtabSavepoint(db, SAVEPOINT_ROLLBACK, iSavepoint); + } + if( rc==SQLITE_OK ){ + rc = sqlite3VtabSavepoint(db, SAVEPOINT_RELEASE, iSavepoint); + } + } + + /* If the statement transaction is being rolled back, also restore the + ** database handles deferred constraint counter to the value it had when + ** the statement transaction was opened. */ + if( eOp==SAVEPOINT_ROLLBACK ){ + db->nDeferredCons = p->nStmtDefCons; + db->nDeferredImmCons = p->nStmtDefImmCons; + } + return rc; +} +SQLITE_PRIVATE int sqlite3VdbeCloseStatement(Vdbe *p, int eOp){ + if( p->db->nStatement && p->iStatement ){ + return vdbeCloseStatement(p, eOp); + } + return SQLITE_OK; +} + + +/* +** This function is called when a transaction opened by the database +** handle associated with the VM passed as an argument is about to be +** committed. If there are outstanding deferred foreign key constraint +** violations, return SQLITE_ERROR. Otherwise, SQLITE_OK. +** +** If there are outstanding FK violations and this function returns +** SQLITE_ERROR, set the result of the VM to SQLITE_CONSTRAINT_FOREIGNKEY +** and write an error message to it. Then return SQLITE_ERROR. +*/ +#ifndef SQLITE_OMIT_FOREIGN_KEY +SQLITE_PRIVATE int sqlite3VdbeCheckFk(Vdbe *p, int deferred){ + sqlite3 *db = p->db; + if( (deferred && (db->nDeferredCons+db->nDeferredImmCons)>0) + || (!deferred && p->nFkConstraint>0) + ){ + p->rc = SQLITE_CONSTRAINT_FOREIGNKEY; + p->errorAction = OE_Abort; + sqlite3VdbeError(p, "FOREIGN KEY constraint failed"); + return SQLITE_ERROR; + } + return SQLITE_OK; +} +#endif + +/* +** This routine is called the when a VDBE tries to halt. If the VDBE +** has made changes and is in autocommit mode, then commit those +** changes. If a rollback is needed, then do the rollback. +** +** This routine is the only way to move the sqlite3eOpenState of a VM from +** SQLITE_STATE_RUN to SQLITE_STATE_HALT. It is harmless to +** call this on a VM that is in the SQLITE_STATE_HALT state. +** +** Return an error code. If the commit could not complete because of +** lock contention, return SQLITE_BUSY. If SQLITE_BUSY is returned, it +** means the close did not happen and needs to be repeated. +*/ +SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe *p){ + int rc; /* Used to store transient return codes */ + sqlite3 *db = p->db; + + /* This function contains the logic that determines if a statement or + ** transaction will be committed or rolled back as a result of the + ** execution of this virtual machine. + ** + ** If any of the following errors occur: + ** + ** SQLITE_NOMEM + ** SQLITE_IOERR + ** SQLITE_FULL + ** SQLITE_INTERRUPT + ** + ** Then the internal cache might have been left in an inconsistent + ** state. We need to rollback the statement transaction, if there is + ** one, or the complete transaction if there is no statement transaction. + */ + + if( p->iVdbeMagic!=VDBE_MAGIC_RUN ){ + return SQLITE_OK; + } + if( db->mallocFailed ){ + p->rc = SQLITE_NOMEM_BKPT; + } + closeAllCursors(p); + checkActiveVdbeCnt(db); + + /* No commit or rollback needed if the program never started or if the + ** SQL statement does not read or write a database file. */ + if( p->pc>=0 && p->bIsReader ){ + int mrc; /* Primary error code from p->rc */ + int eStatementOp = 0; + int isSpecialError; /* Set to true if a 'special' error */ + + /* Lock all btrees used by the statement */ + sqlite3VdbeEnter(p); + + /* Check for one of the special errors */ + if( p->rc ){ + mrc = p->rc & 0xff; + isSpecialError = mrc==SQLITE_NOMEM + || mrc==SQLITE_IOERR + || mrc==SQLITE_INTERRUPT + || mrc==SQLITE_FULL; + }else{ + mrc = isSpecialError = 0; + } + if( isSpecialError ){ + /* If the query was read-only and the error code is SQLITE_INTERRUPT, + ** no rollback is necessary. Otherwise, at least a savepoint + ** transaction must be rolled back to restore the database to a + ** consistent state. + ** + ** Even if the statement is read-only, it is important to perform + ** a statement or transaction rollback operation. If the error + ** occurred while writing to the journal, sub-journal or database + ** file as part of an effort to free up cache space (see function + ** pagerStress() in pager.c), the rollback is required to restore + ** the pager to a consistent state. + */ + if( !p->readOnly || mrc!=SQLITE_INTERRUPT ){ + if( (mrc==SQLITE_NOMEM || mrc==SQLITE_FULL) && p->usesStmtJournal ){ + eStatementOp = SAVEPOINT_ROLLBACK; + }else{ + /* We are forced to roll back the active transaction. Before doing + ** so, abort any other statements this handle currently has active. + */ + sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); + sqlite3CloseSavepoints(db); + db->autoCommit = 1; + p->nChange = 0; + } + } + } + + /* Check for immediate foreign key violations. */ + if( p->rc==SQLITE_OK || (p->errorAction==OE_Fail && !isSpecialError) ){ + sqlite3VdbeCheckFk(p, 0); + } + + /* If the auto-commit flag is set and this is the only active writer + ** VM, then we do either a commit or rollback of the current transaction. + ** + ** Note: This block also runs if one of the special errors handled + ** above has occurred. + */ + if( !sqlite3VtabInSync(db) + && db->autoCommit + && db->nVdbeWrite==(p->readOnly==0) + ){ + if( p->rc==SQLITE_OK || (p->errorAction==OE_Fail && !isSpecialError) ){ + rc = sqlite3VdbeCheckFk(p, 1); + if( rc!=SQLITE_OK ){ + if( NEVER(p->readOnly) ){ + sqlite3VdbeLeave(p); + return SQLITE_ERROR; + } + rc = SQLITE_CONSTRAINT_FOREIGNKEY; + }else if( db->flags & SQLITE_CorruptRdOnly ){ + rc = SQLITE_CORRUPT; + db->flags &= ~SQLITE_CorruptRdOnly; + }else{ + /* The auto-commit flag is true, the vdbe program was successful + ** or hit an 'OR FAIL' constraint and there are no deferred foreign + ** key constraints to hold up the transaction. This means a commit + ** is required. */ + rc = vdbeCommit(db, p); + } + if( rc==SQLITE_BUSY && p->readOnly ){ + sqlite3VdbeLeave(p); + return SQLITE_BUSY; + }else if( rc!=SQLITE_OK ){ + p->rc = rc; + sqlite3RollbackAll(db, SQLITE_OK); + p->nChange = 0; + }else{ + db->nDeferredCons = 0; + db->nDeferredImmCons = 0; + db->flags &= ~(u64)SQLITE_DeferFKs; + sqlite3CommitInternalChanges(db); + } + }else{ + sqlite3RollbackAll(db, SQLITE_OK); + p->nChange = 0; + } + db->nStatement = 0; + }else if( eStatementOp==0 ){ + if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){ + eStatementOp = SAVEPOINT_RELEASE; + }else if( p->errorAction==OE_Abort ){ + eStatementOp = SAVEPOINT_ROLLBACK; + }else{ + sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); + sqlite3CloseSavepoints(db); + db->autoCommit = 1; + p->nChange = 0; + } + } + + /* If eStatementOp is non-zero, then a statement transaction needs to + ** be committed or rolled back. Call sqlite3VdbeCloseStatement() to + ** do so. If this operation returns an error, and the current statement + ** error code is SQLITE_OK or SQLITE_CONSTRAINT, then promote the + ** current statement error code. + */ + if( eStatementOp ){ + rc = sqlite3VdbeCloseStatement(p, eStatementOp); + if( rc ){ + if( p->rc==SQLITE_OK || (p->rc&0xff)==SQLITE_CONSTRAINT ){ + p->rc = rc; + sqlite3DbFree(db, p->zErrMsg); + p->zErrMsg = 0; + } + sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); + sqlite3CloseSavepoints(db); + db->autoCommit = 1; + p->nChange = 0; + } + } + + /* If this was an INSERT, UPDATE or DELETE and no statement transaction + ** has been rolled back, update the database connection change-counter. + */ + if( p->changeCntOn ){ + if( eStatementOp!=SAVEPOINT_ROLLBACK ){ + sqlite3VdbeSetChanges(db, p->nChange); + }else{ + sqlite3VdbeSetChanges(db, 0); + } + p->nChange = 0; + } + + /* Release the locks */ + sqlite3VdbeLeave(p); + } + + /* We have successfully halted and closed the VM. Record this fact. */ + if( p->pc>=0 ){ + db->nVdbeActive--; + if( !p->readOnly ) db->nVdbeWrite--; + if( p->bIsReader ) db->nVdbeRead--; + assert( db->nVdbeActive>=db->nVdbeRead ); + assert( db->nVdbeRead>=db->nVdbeWrite ); + assert( db->nVdbeWrite>=0 ); + } + p->iVdbeMagic = VDBE_MAGIC_HALT; + checkActiveVdbeCnt(db); + if( db->mallocFailed ){ + p->rc = SQLITE_NOMEM_BKPT; + } + + /* If the auto-commit flag is set to true, then any locks that were held + ** by connection db have now been released. Call sqlite3ConnectionUnlocked() + ** to invoke any required unlock-notify callbacks. + */ + if( db->autoCommit ){ + sqlite3ConnectionUnlocked(db); + } + + assert( db->nVdbeActive>0 || db->autoCommit==0 || db->nStatement==0 ); + return (p->rc==SQLITE_BUSY ? SQLITE_BUSY : SQLITE_OK); +} + + +/* +** Each VDBE holds the result of the most recent sqlite3_step() call +** in p->rc. This routine sets that result back to SQLITE_OK. +*/ +SQLITE_PRIVATE void sqlite3VdbeResetStepResult(Vdbe *p){ + p->rc = SQLITE_OK; +} + +/* +** Copy the error code and error message belonging to the VDBE passed +** as the first argument to its database handle (so that they will be +** returned by calls to sqlite3_errcode() and sqlite3_errmsg()). +** +** This function does not clear the VDBE error code or message, just +** copies them to the database handle. +*/ +SQLITE_PRIVATE int sqlite3VdbeTransferError(Vdbe *p){ + sqlite3 *db = p->db; + int rc = p->rc; + if( p->zErrMsg ){ + db->bBenignMalloc++; + sqlite3BeginBenignMalloc(); + if( db->pErr==0 ) db->pErr = sqlite3ValueNew(db); + sqlite3ValueSetStr(db->pErr, -1, p->zErrMsg, SQLITE_UTF8, SQLITE_TRANSIENT); + sqlite3EndBenignMalloc(); + db->bBenignMalloc--; + }else if( db->pErr ){ + sqlite3ValueSetNull(db->pErr); + } + db->errCode = rc; + db->errByteOffset = -1; + return rc; +} + +#ifdef SQLITE_ENABLE_SQLLOG +/* +** If an SQLITE_CONFIG_SQLLOG hook is registered and the VM has been run, +** invoke it. +*/ +static void vdbeInvokeSqllog(Vdbe *v){ + if( sqlite3GlobalConfig.xSqllog && v->rc==SQLITE_OK && v->zSql && v->pc>=0 ){ + char *zExpanded = sqlite3VdbeExpandSql(v, v->zSql); + assert( v->db->init.busy==0 ); + if( zExpanded ){ + sqlite3GlobalConfig.xSqllog( + sqlite3GlobalConfig.pSqllogArg, v->db, zExpanded, 1 + ); + sqlite3DbFree(v->db, zExpanded); + } + } +} +#else +# define vdbeInvokeSqllog(x) +#endif + +/* +** Clean up a VDBE after execution but do not delete the VDBE just yet. +** Write any error messages into *pzErrMsg. Return the result code. +** +** After this routine is run, the VDBE should be ready to be executed +** again. +** +** To look at it another way, this routine resets the state of the +** virtual machine from VDBE_MAGIC_RUN or VDBE_MAGIC_HALT back to +** VDBE_MAGIC_INIT. +*/ +SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe *p){ +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) + int i; +#endif + + sqlite3 *db; + db = p->db; + + /* If the VM did not run to completion or if it encountered an + ** error, then it might not have been halted properly. So halt + ** it now. + */ + sqlite3VdbeHalt(p); + + /* If the VDBE has been run even partially, then transfer the error code + ** and error message from the VDBE into the main database structure. But + ** if the VDBE has just been set to run but has not actually executed any + ** instructions yet, leave the main database error information unchanged. + */ + if( p->pc>=0 ){ + vdbeInvokeSqllog(p); + if( db->pErr || p->zErrMsg ){ + sqlite3VdbeTransferError(p); + }else{ + db->errCode = p->rc; + } + if( p->runOnlyOnce ) p->expired = 1; + }else if( p->rc && p->expired ){ + /* The expired flag was set on the VDBE before the first call + ** to sqlite3_step(). For consistency (since sqlite3_step() was + ** called), set the database error in this case as well. + */ + sqlite3ErrorWithMsg(db, p->rc, p->zErrMsg ? "%s" : 0, p->zErrMsg); + } + + /* Reset register contents and reclaim error message memory. + */ +#ifdef SQLITE_DEBUG + /* Execute assert() statements to ensure that the Vdbe.apCsr[] and + ** Vdbe.aMem[] arrays have already been cleaned up. */ + if( p->apCsr ) for(i=0; inCursor; i++) assert( p->apCsr[i]==0 ); + if( p->aMem ){ + for(i=0; inMem; i++) assert( p->aMem[i].flags==MEM_Undefined ); + } +#endif + if( p->zErrMsg ){ + sqlite3DbFree(db, p->zErrMsg); + p->zErrMsg = 0; + } + p->pResultSet = 0; +#ifdef SQLITE_DEBUG + p->nWrite = 0; +#endif + + /* Save profiling information from this VDBE run. + */ +#ifdef VDBE_PROFILE + { + FILE *out = fopen("vdbe_profile.out", "a"); + if( out ){ + fprintf(out, "---- "); + for(i=0; inOp; i++){ + fprintf(out, "%02x", p->aOp[i].opcode); + } + fprintf(out, "\n"); + if( p->zSql ){ + char c, pc = 0; + fprintf(out, "-- "); + for(i=0; (c = p->zSql[i])!=0; i++){ + if( pc=='\n' ) fprintf(out, "-- "); + putc(c, out); + pc = c; + } + if( pc!='\n' ) fprintf(out, "\n"); + } + for(i=0; inOp; i++){ + char zHdr[100]; + sqlite3_snprintf(sizeof(zHdr), zHdr, "%6u %12llu %8llu ", + p->aOp[i].cnt, + p->aOp[i].cycles, + p->aOp[i].cnt>0 ? p->aOp[i].cycles/p->aOp[i].cnt : 0 + ); + fprintf(out, "%s", zHdr); + sqlite3VdbePrintOp(out, i, &p->aOp[i]); + } + fclose(out); + } + } +#endif + p->iVdbeMagic = VDBE_MAGIC_RESET; + return p->rc & db->errMask; +} + +/* +** Clean up and delete a VDBE after execution. Return an integer which is +** the result code. Write any error message text into *pzErrMsg. +*/ +SQLITE_PRIVATE int sqlite3VdbeFinalize(Vdbe *p){ + int rc = SQLITE_OK; + if( p->iVdbeMagic==VDBE_MAGIC_RUN || p->iVdbeMagic==VDBE_MAGIC_HALT ){ + rc = sqlite3VdbeReset(p); + assert( (rc & p->db->errMask)==rc ); + } + sqlite3VdbeDelete(p); + return rc; +} + +/* +** If parameter iOp is less than zero, then invoke the destructor for +** all auxiliary data pointers currently cached by the VM passed as +** the first argument. +** +** Or, if iOp is greater than or equal to zero, then the destructor is +** only invoked for those auxiliary data pointers created by the user +** function invoked by the OP_Function opcode at instruction iOp of +** VM pVdbe, and only then if: +** +** * the associated function parameter is the 32nd or later (counting +** from left to right), or +** +** * the corresponding bit in argument mask is clear (where the first +** function parameter corresponds to bit 0 etc.). +*/ +SQLITE_PRIVATE void sqlite3VdbeDeleteAuxData(sqlite3 *db, AuxData **pp, int iOp, int mask){ + while( *pp ){ + AuxData *pAux = *pp; + if( (iOp<0) + || (pAux->iAuxOp==iOp + && pAux->iAuxArg>=0 + && (pAux->iAuxArg>31 || !(mask & MASKBIT32(pAux->iAuxArg)))) + ){ + testcase( pAux->iAuxArg==31 ); + if( pAux->xDeleteAux ){ + pAux->xDeleteAux(pAux->pAux); + } + *pp = pAux->pNextAux; + sqlite3DbFree(db, pAux); + }else{ + pp= &pAux->pNextAux; + } + } +} + +/* +** Free all memory associated with the Vdbe passed as the second argument, +** except for object itself, which is preserved. +** +** The difference between this function and sqlite3VdbeDelete() is that +** VdbeDelete() also unlinks the Vdbe from the list of VMs associated with +** the database connection and frees the object itself. +*/ +SQLITE_PRIVATE void sqlite3VdbeClearObject(sqlite3 *db, Vdbe *p){ + SubProgram *pSub, *pNext; + assert( p->db==0 || p->db==db ); + releaseMemArray(p->aColName, p->nResColumn*COLNAME_N); + for(pSub=p->pProgram; pSub; pSub=pNext){ + pNext = pSub->pNext; + vdbeFreeOpArray(db, pSub->aOp, pSub->nOp); + sqlite3DbFree(db, pSub); + } + if( p->iVdbeMagic!=VDBE_MAGIC_INIT ){ + releaseMemArray(p->aVar, p->nVar); + sqlite3DbFree(db, p->pVList); + sqlite3DbFree(db, p->pFree); + } + vdbeFreeOpArray(db, p->aOp, p->nOp); + sqlite3DbFree(db, p->aColName); + sqlite3DbFree(db, p->zSql); +#ifdef SQLITE_ENABLE_NORMALIZE + sqlite3DbFree(db, p->zNormSql); + { + DblquoteStr *pThis, *pNext; + for(pThis=p->pDblStr; pThis; pThis=pNext){ + pNext = pThis->pNextStr; + sqlite3DbFree(db, pThis); + } + } +#endif +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + { + int i; + for(i=0; inScan; i++){ + sqlite3DbFree(db, p->aScan[i].zName); + } + sqlite3DbFree(db, p->aScan); + } +#endif +} + +/* +** Delete an entire VDBE. +*/ +SQLITE_PRIVATE void sqlite3VdbeDelete(Vdbe *p){ + sqlite3 *db; + + assert( p!=0 ); + db = p->db; + assert( sqlite3_mutex_held(db->mutex) ); + sqlite3VdbeClearObject(db, p); + if( p->pPrev ){ + p->pPrev->pNext = p->pNext; + }else{ + assert( db->pVdbe==p ); + db->pVdbe = p->pNext; + } + if( p->pNext ){ + p->pNext->pPrev = p->pPrev; + } + p->iVdbeMagic = VDBE_MAGIC_DEAD; + p->db = 0; + sqlite3DbFreeNN(db, p); +} + +/* +** The cursor "p" has a pending seek operation that has not yet been +** carried out. Seek the cursor now. If an error occurs, return +** the appropriate error code. +*/ +SQLITE_PRIVATE int SQLITE_NOINLINE sqlite3VdbeFinishMoveto(VdbeCursor *p){ + int res, rc; +#ifdef SQLITE_TEST + extern int sqlite3_search_count; +#endif + assert( p->deferredMoveto ); + assert( p->isTable ); + assert( p->eCurType==CURTYPE_BTREE ); + rc = sqlite3BtreeTableMoveto(p->uc.pCursor, p->movetoTarget, 0, &res); + if( rc ) return rc; + if( res!=0 ) return SQLITE_CORRUPT_BKPT; +#ifdef SQLITE_TEST + sqlite3_search_count++; +#endif + p->deferredMoveto = 0; + p->cacheStatus = CACHE_STALE; + return SQLITE_OK; +} + +/* +** Something has moved cursor "p" out of place. Maybe the row it was +** pointed to was deleted out from under it. Or maybe the btree was +** rebalanced. Whatever the cause, try to restore "p" to the place it +** is supposed to be pointing. If the row was deleted out from under the +** cursor, set the cursor to point to a NULL row. +*/ +static int SQLITE_NOINLINE handleMovedCursor(VdbeCursor *p){ + int isDifferentRow, rc; + assert( p->eCurType==CURTYPE_BTREE ); + assert( p->uc.pCursor!=0 ); + assert( sqlite3BtreeCursorHasMoved(p->uc.pCursor) ); + rc = sqlite3BtreeCursorRestore(p->uc.pCursor, &isDifferentRow); + p->cacheStatus = CACHE_STALE; + if( isDifferentRow ) p->nullRow = 1; + return rc; +} + +/* +** Check to ensure that the cursor is valid. Restore the cursor +** if need be. Return any I/O error from the restore operation. +*/ +SQLITE_PRIVATE int sqlite3VdbeCursorRestore(VdbeCursor *p){ + assert( p->eCurType==CURTYPE_BTREE ); + if( sqlite3BtreeCursorHasMoved(p->uc.pCursor) ){ + return handleMovedCursor(p); + } + return SQLITE_OK; +} + +/* +** Make sure the cursor p is ready to read or write the row to which it +** was last positioned. Return an error code if an OOM fault or I/O error +** prevents us from positioning the cursor to its correct position. +** +** If a MoveTo operation is pending on the given cursor, then do that +** MoveTo now. If no move is pending, check to see if the row has been +** deleted out from under the cursor and if it has, mark the row as +** a NULL row. +** +** If the cursor is already pointing to the correct row and that row has +** not been deleted out from under the cursor, then this routine is a no-op. +*/ +SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor **pp, u32 *piCol){ + VdbeCursor *p = *pp; + assert( p->eCurType==CURTYPE_BTREE || p->eCurType==CURTYPE_PSEUDO ); + if( p->deferredMoveto ){ + u32 iMap; + assert( !p->isEphemeral ); + if( p->ub.aAltMap && (iMap = p->ub.aAltMap[1+*piCol])>0 && !p->nullRow ){ + *pp = p->pAltCursor; + *piCol = iMap - 1; + return SQLITE_OK; + } + return sqlite3VdbeFinishMoveto(p); + } + if( sqlite3BtreeCursorHasMoved(p->uc.pCursor) ){ + return handleMovedCursor(p); + } + return SQLITE_OK; +} + +/* +** The following functions: +** +** sqlite3VdbeSerialType() +** sqlite3VdbeSerialTypeLen() +** sqlite3VdbeSerialLen() +** sqlite3VdbeSerialPut() +** sqlite3VdbeSerialGet() +** +** encapsulate the code that serializes values for storage in SQLite +** data and index records. Each serialized value consists of a +** 'serial-type' and a blob of data. The serial type is an 8-byte unsigned +** integer, stored as a varint. +** +** In an SQLite index record, the serial type is stored directly before +** the blob of data that it corresponds to. In a table record, all serial +** types are stored at the start of the record, and the blobs of data at +** the end. Hence these functions allow the caller to handle the +** serial-type and data blob separately. +** +** The following table describes the various storage classes for data: +** +** serial type bytes of data type +** -------------- --------------- --------------- +** 0 0 NULL +** 1 1 signed integer +** 2 2 signed integer +** 3 3 signed integer +** 4 4 signed integer +** 5 6 signed integer +** 6 8 signed integer +** 7 8 IEEE float +** 8 0 Integer constant 0 +** 9 0 Integer constant 1 +** 10,11 reserved for expansion +** N>=12 and even (N-12)/2 BLOB +** N>=13 and odd (N-13)/2 text +** +** The 8 and 9 types were added in 3.3.0, file format 4. Prior versions +** of SQLite will not understand those serial types. +*/ + +#if 0 /* Inlined into the OP_MakeRecord opcode */ +/* +** Return the serial-type for the value stored in pMem. +** +** This routine might convert a large MEM_IntReal value into MEM_Real. +** +** 2019-07-11: The primary user of this subroutine was the OP_MakeRecord +** opcode in the byte-code engine. But by moving this routine in-line, we +** can omit some redundant tests and make that opcode a lot faster. So +** this routine is now only used by the STAT3 logic and STAT3 support has +** ended. The code is kept here for historical reference only. +*/ +SQLITE_PRIVATE u32 sqlite3VdbeSerialType(Mem *pMem, int file_format, u32 *pLen){ + int flags = pMem->flags; + u32 n; + + assert( pLen!=0 ); + if( flags&MEM_Null ){ + *pLen = 0; + return 0; + } + if( flags&(MEM_Int|MEM_IntReal) ){ + /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */ +# define MAX_6BYTE ((((i64)0x00008000)<<32)-1) + i64 i = pMem->u.i; + u64 u; + testcase( flags & MEM_Int ); + testcase( flags & MEM_IntReal ); + if( i<0 ){ + u = ~i; + }else{ + u = i; + } + if( u<=127 ){ + if( (i&1)==i && file_format>=4 ){ + *pLen = 0; + return 8+(u32)u; + }else{ + *pLen = 1; + return 1; + } + } + if( u<=32767 ){ *pLen = 2; return 2; } + if( u<=8388607 ){ *pLen = 3; return 3; } + if( u<=2147483647 ){ *pLen = 4; return 4; } + if( u<=MAX_6BYTE ){ *pLen = 6; return 5; } + *pLen = 8; + if( flags&MEM_IntReal ){ + /* If the value is IntReal and is going to take up 8 bytes to store + ** as an integer, then we might as well make it an 8-byte floating + ** point value */ + pMem->u.r = (double)pMem->u.i; + pMem->flags &= ~MEM_IntReal; + pMem->flags |= MEM_Real; + return 7; + } + return 6; + } + if( flags&MEM_Real ){ + *pLen = 8; + return 7; + } + assert( pMem->db->mallocFailed || flags&(MEM_Str|MEM_Blob) ); + assert( pMem->n>=0 ); + n = (u32)pMem->n; + if( flags & MEM_Zero ){ + n += pMem->u.nZero; + } + *pLen = n; + return ((n*2) + 12 + ((flags&MEM_Str)!=0)); +} +#endif /* inlined into OP_MakeRecord */ + +/* +** The sizes for serial types less than 128 +*/ +static const u8 sqlite3SmallTypeSizes[] = { + /* 0 1 2 3 4 5 6 7 8 9 */ +/* 0 */ 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, +/* 10 */ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, +/* 20 */ 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, +/* 30 */ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, +/* 40 */ 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, +/* 50 */ 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, +/* 60 */ 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, +/* 70 */ 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, +/* 80 */ 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, +/* 90 */ 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, +/* 100 */ 44, 44, 45, 45, 46, 46, 47, 47, 48, 48, +/* 110 */ 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, +/* 120 */ 54, 54, 55, 55, 56, 56, 57, 57 +}; + +/* +** Return the length of the data corresponding to the supplied serial-type. +*/ +SQLITE_PRIVATE u32 sqlite3VdbeSerialTypeLen(u32 serial_type){ + if( serial_type>=128 ){ + return (serial_type-12)/2; + }else{ + assert( serial_type<12 + || sqlite3SmallTypeSizes[serial_type]==(serial_type - 12)/2 ); + return sqlite3SmallTypeSizes[serial_type]; + } +} +SQLITE_PRIVATE u8 sqlite3VdbeOneByteSerialTypeLen(u8 serial_type){ + assert( serial_type<128 ); + return sqlite3SmallTypeSizes[serial_type]; +} + +/* +** If we are on an architecture with mixed-endian floating +** points (ex: ARM7) then swap the lower 4 bytes with the +** upper 4 bytes. Return the result. +** +** For most architectures, this is a no-op. +** +** (later): It is reported to me that the mixed-endian problem +** on ARM7 is an issue with GCC, not with the ARM7 chip. It seems +** that early versions of GCC stored the two words of a 64-bit +** float in the wrong order. And that error has been propagated +** ever since. The blame is not necessarily with GCC, though. +** GCC might have just copying the problem from a prior compiler. +** I am also told that newer versions of GCC that follow a different +** ABI get the byte order right. +** +** Developers using SQLite on an ARM7 should compile and run their +** application using -DSQLITE_DEBUG=1 at least once. With DEBUG +** enabled, some asserts below will ensure that the byte order of +** floating point values is correct. +** +** (2007-08-30) Frank van Vugt has studied this problem closely +** and has send his findings to the SQLite developers. Frank +** writes that some Linux kernels offer floating point hardware +** emulation that uses only 32-bit mantissas instead of a full +** 48-bits as required by the IEEE standard. (This is the +** CONFIG_FPE_FASTFPE option.) On such systems, floating point +** byte swapping becomes very complicated. To avoid problems, +** the necessary byte swapping is carried out using a 64-bit integer +** rather than a 64-bit float. Frank assures us that the code here +** works for him. We, the developers, have no way to independently +** verify this, but Frank seems to know what he is talking about +** so we trust him. +*/ +#ifdef SQLITE_MIXED_ENDIAN_64BIT_FLOAT +static u64 floatSwap(u64 in){ + union { + u64 r; + u32 i[2]; + } u; + u32 t; + + u.r = in; + t = u.i[0]; + u.i[0] = u.i[1]; + u.i[1] = t; + return u.r; +} +# define swapMixedEndianFloat(X) X = floatSwap(X) +#else +# define swapMixedEndianFloat(X) +#endif + +/* +** Write the serialized data blob for the value stored in pMem into +** buf. It is assumed that the caller has allocated sufficient space. +** Return the number of bytes written. +** +** nBuf is the amount of space left in buf[]. The caller is responsible +** for allocating enough space to buf[] to hold the entire field, exclusive +** of the pMem->u.nZero bytes for a MEM_Zero value. +** +** Return the number of bytes actually written into buf[]. The number +** of bytes in the zero-filled tail is included in the return value only +** if those bytes were zeroed in buf[]. +*/ +SQLITE_PRIVATE u32 sqlite3VdbeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ + u32 len; + + /* Integer and Real */ + if( serial_type<=7 && serial_type>0 ){ + u64 v; + u32 i; + if( serial_type==7 ){ + assert( sizeof(v)==sizeof(pMem->u.r) ); + memcpy(&v, &pMem->u.r, sizeof(v)); + swapMixedEndianFloat(v); + }else{ + v = pMem->u.i; + } + len = i = sqlite3SmallTypeSizes[serial_type]; + assert( i>0 ); + do{ + buf[--i] = (u8)(v&0xFF); + v >>= 8; + }while( i ); + return len; + } + + /* String or blob */ + if( serial_type>=12 ){ + assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0) + == (int)sqlite3VdbeSerialTypeLen(serial_type) ); + len = pMem->n; + if( len>0 ) memcpy(buf, pMem->z, len); + return len; + } + + /* NULL or constants 0 or 1 */ + return 0; +} + +/* Input "x" is a sequence of unsigned characters that represent a +** big-endian integer. Return the equivalent native integer +*/ +#define ONE_BYTE_INT(x) ((i8)(x)[0]) +#define TWO_BYTE_INT(x) (256*(i8)((x)[0])|(x)[1]) +#define THREE_BYTE_INT(x) (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2]) +#define FOUR_BYTE_UINT(x) (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3]) +#define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3]) + +/* +** Deserialize the data blob pointed to by buf as serial type serial_type +** and store the result in pMem. +** +** This function is implemented as two separate routines for performance. +** The few cases that require local variables are broken out into a separate +** routine so that in most cases the overhead of moving the stack pointer +** is avoided. +*/ +static void serialGet( + const unsigned char *buf, /* Buffer to deserialize from */ + u32 serial_type, /* Serial type to deserialize */ + Mem *pMem /* Memory cell to write value into */ +){ + u64 x = FOUR_BYTE_UINT(buf); + u32 y = FOUR_BYTE_UINT(buf+4); + x = (x<<32) + y; + if( serial_type==6 ){ + /* EVIDENCE-OF: R-29851-52272 Value is a big-endian 64-bit + ** twos-complement integer. */ + pMem->u.i = *(i64*)&x; + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + }else{ + /* EVIDENCE-OF: R-57343-49114 Value is a big-endian IEEE 754-2008 64-bit + ** floating point number. */ +#if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT) + /* Verify that integers and floating point values use the same + ** byte order. Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is + ** defined that 64-bit floating point values really are mixed + ** endian. + */ + static const u64 t1 = ((u64)0x3ff00000)<<32; + static const double r1 = 1.0; + u64 t2 = t1; + swapMixedEndianFloat(t2); + assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 ); +#endif + assert( sizeof(x)==8 && sizeof(pMem->u.r)==8 ); + swapMixedEndianFloat(x); + memcpy(&pMem->u.r, &x, sizeof(x)); + pMem->flags = IsNaN(x) ? MEM_Null : MEM_Real; + } +} +SQLITE_PRIVATE void sqlite3VdbeSerialGet( + const unsigned char *buf, /* Buffer to deserialize from */ + u32 serial_type, /* Serial type to deserialize */ + Mem *pMem /* Memory cell to write value into */ +){ + switch( serial_type ){ + case 10: { /* Internal use only: NULL with virtual table + ** UPDATE no-change flag set */ + pMem->flags = MEM_Null|MEM_Zero; + pMem->n = 0; + pMem->u.nZero = 0; + return; + } + case 11: /* Reserved for future use */ + case 0: { /* Null */ + /* EVIDENCE-OF: R-24078-09375 Value is a NULL. */ + pMem->flags = MEM_Null; + return; + } + case 1: { + /* EVIDENCE-OF: R-44885-25196 Value is an 8-bit twos-complement + ** integer. */ + pMem->u.i = ONE_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return; + } + case 2: { /* 2-byte signed integer */ + /* EVIDENCE-OF: R-49794-35026 Value is a big-endian 16-bit + ** twos-complement integer. */ + pMem->u.i = TWO_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return; + } + case 3: { /* 3-byte signed integer */ + /* EVIDENCE-OF: R-37839-54301 Value is a big-endian 24-bit + ** twos-complement integer. */ + pMem->u.i = THREE_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return; + } + case 4: { /* 4-byte signed integer */ + /* EVIDENCE-OF: R-01849-26079 Value is a big-endian 32-bit + ** twos-complement integer. */ + pMem->u.i = FOUR_BYTE_INT(buf); +#ifdef __HP_cc + /* Work around a sign-extension bug in the HP compiler for HP/UX */ + if( buf[0]&0x80 ) pMem->u.i |= 0xffffffff80000000LL; +#endif + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return; + } + case 5: { /* 6-byte signed integer */ + /* EVIDENCE-OF: R-50385-09674 Value is a big-endian 48-bit + ** twos-complement integer. */ + pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return; + } + case 6: /* 8-byte signed integer */ + case 7: { /* IEEE floating point */ + /* These use local variables, so do them in a separate routine + ** to avoid having to move the frame pointer in the common case */ + serialGet(buf,serial_type,pMem); + return; + } + case 8: /* Integer 0 */ + case 9: { /* Integer 1 */ + /* EVIDENCE-OF: R-12976-22893 Value is the integer 0. */ + /* EVIDENCE-OF: R-18143-12121 Value is the integer 1. */ + pMem->u.i = serial_type-8; + pMem->flags = MEM_Int; + return; + } + default: { + /* EVIDENCE-OF: R-14606-31564 Value is a BLOB that is (N-12)/2 bytes in + ** length. + ** EVIDENCE-OF: R-28401-00140 Value is a string in the text encoding and + ** (N-13)/2 bytes in length. */ + static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem }; + pMem->z = (char *)buf; + pMem->n = (serial_type-12)/2; + pMem->flags = aFlag[serial_type&1]; + return; + } + } + return; +} +/* +** This routine is used to allocate sufficient space for an UnpackedRecord +** structure large enough to be used with sqlite3VdbeRecordUnpack() if +** the first argument is a pointer to KeyInfo structure pKeyInfo. +** +** The space is either allocated using sqlite3DbMallocRaw() or from within +** the unaligned buffer passed via the second and third arguments (presumably +** stack space). If the former, then *ppFree is set to a pointer that should +** be eventually freed by the caller using sqlite3DbFree(). Or, if the +** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL +** before returning. +** +** If an OOM error occurs, NULL is returned. +*/ +SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord( + KeyInfo *pKeyInfo /* Description of the record */ +){ + UnpackedRecord *p; /* Unpacked record to return */ + int nByte; /* Number of bytes required for *p */ + nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nKeyField+1); + p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->db, nByte); + if( !p ) return 0; + p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))]; + assert( pKeyInfo->aSortFlags!=0 ); + p->pKeyInfo = pKeyInfo; + p->nField = pKeyInfo->nKeyField + 1; + return p; +} + +/* +** Given the nKey-byte encoding of a record in pKey[], populate the +** UnpackedRecord structure indicated by the fourth argument with the +** contents of the decoded record. +*/ +SQLITE_PRIVATE void sqlite3VdbeRecordUnpack( + KeyInfo *pKeyInfo, /* Information about the record format */ + int nKey, /* Size of the binary record */ + const void *pKey, /* The binary record */ + UnpackedRecord *p /* Populate this structure before returning. */ +){ + const unsigned char *aKey = (const unsigned char *)pKey; + u32 d; + u32 idx; /* Offset in aKey[] to read from */ + u16 u; /* Unsigned loop counter */ + u32 szHdr; + Mem *pMem = p->aMem; + + p->default_rc = 0; + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + idx = getVarint32(aKey, szHdr); + d = szHdr; + u = 0; + while( idxenc = pKeyInfo->enc; + pMem->db = pKeyInfo->db; + /* pMem->flags = 0; // sqlite3VdbeSerialGet() will set this for us */ + pMem->szMalloc = 0; + pMem->z = 0; + sqlite3VdbeSerialGet(&aKey[d], serial_type, pMem); + d += sqlite3VdbeSerialTypeLen(serial_type); + pMem++; + if( (++u)>=p->nField ) break; + } + if( d>(u32)nKey && u ){ + assert( CORRUPT_DB ); + /* In a corrupt record entry, the last pMem might have been set up using + ** uninitialized memory. Overwrite its value with NULL, to prevent + ** warnings from MSAN. */ + sqlite3VdbeMemSetNull(pMem-1); + } + assert( u<=pKeyInfo->nKeyField + 1 ); + p->nField = u; +} + +#ifdef SQLITE_DEBUG +/* +** This function compares two index or table record keys in the same way +** as the sqlite3VdbeRecordCompare() routine. Unlike VdbeRecordCompare(), +** this function deserializes and compares values using the +** sqlite3VdbeSerialGet() and sqlite3MemCompare() functions. It is used +** in assert() statements to ensure that the optimized code in +** sqlite3VdbeRecordCompare() returns results with these two primitives. +** +** Return true if the result of comparison is equivalent to desiredResult. +** Return false if there is a disagreement. +*/ +static int vdbeRecordCompareDebug( + int nKey1, const void *pKey1, /* Left key */ + const UnpackedRecord *pPKey2, /* Right key */ + int desiredResult /* Correct answer */ +){ + u32 d1; /* Offset into aKey[] of next data element */ + u32 idx1; /* Offset into aKey[] of next header element */ + u32 szHdr1; /* Number of bytes in header */ + int i = 0; + int rc = 0; + const unsigned char *aKey1 = (const unsigned char *)pKey1; + KeyInfo *pKeyInfo; + Mem mem1; + + pKeyInfo = pPKey2->pKeyInfo; + if( pKeyInfo->db==0 ) return 1; + mem1.enc = pKeyInfo->enc; + mem1.db = pKeyInfo->db; + /* mem1.flags = 0; // Will be initialized by sqlite3VdbeSerialGet() */ + VVA_ONLY( mem1.szMalloc = 0; ) /* Only needed by assert() statements */ + + /* Compilers may complain that mem1.u.i is potentially uninitialized. + ** We could initialize it, as shown here, to silence those complaints. + ** But in fact, mem1.u.i will never actually be used uninitialized, and doing + ** the unnecessary initialization has a measurable negative performance + ** impact, since this routine is a very high runner. And so, we choose + ** to ignore the compiler warnings and leave this variable uninitialized. + */ + /* mem1.u.i = 0; // not needed, here to silence compiler warning */ + + idx1 = getVarint32(aKey1, szHdr1); + if( szHdr1>98307 ) return SQLITE_CORRUPT; + d1 = szHdr1; + assert( pKeyInfo->nAllField>=pPKey2->nField || CORRUPT_DB ); + assert( pKeyInfo->aSortFlags!=0 ); + assert( pKeyInfo->nKeyField>0 ); + assert( idx1<=szHdr1 || CORRUPT_DB ); + do{ + u32 serial_type1; + + /* Read the serial types for the next element in each key. */ + idx1 += getVarint32( aKey1+idx1, serial_type1 ); + + /* Verify that there is enough key space remaining to avoid + ** a buffer overread. The "d1+serial_type1+2" subexpression will + ** always be greater than or equal to the amount of required key space. + ** Use that approximation to avoid the more expensive call to + ** sqlite3VdbeSerialTypeLen() in the common case. + */ + if( d1+(u64)serial_type1+2>(u64)nKey1 + && d1+(u64)sqlite3VdbeSerialTypeLen(serial_type1)>(u64)nKey1 + ){ + break; + } + + /* Extract the values to be compared. + */ + sqlite3VdbeSerialGet(&aKey1[d1], serial_type1, &mem1); + d1 += sqlite3VdbeSerialTypeLen(serial_type1); + + /* Do the comparison + */ + rc = sqlite3MemCompare(&mem1, &pPKey2->aMem[i], + pKeyInfo->nAllField>i ? pKeyInfo->aColl[i] : 0); + if( rc!=0 ){ + assert( mem1.szMalloc==0 ); /* See comment below */ + if( (pKeyInfo->aSortFlags[i] & KEYINFO_ORDER_BIGNULL) + && ((mem1.flags & MEM_Null) || (pPKey2->aMem[i].flags & MEM_Null)) + ){ + rc = -rc; + } + if( pKeyInfo->aSortFlags[i] & KEYINFO_ORDER_DESC ){ + rc = -rc; /* Invert the result for DESC sort order. */ + } + goto debugCompareEnd; + } + i++; + }while( idx1nField ); + + /* No memory allocation is ever used on mem1. Prove this using + ** the following assert(). If the assert() fails, it indicates a + ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1). + */ + assert( mem1.szMalloc==0 ); + + /* rc==0 here means that one of the keys ran out of fields and + ** all the fields up to that point were equal. Return the default_rc + ** value. */ + rc = pPKey2->default_rc; + +debugCompareEnd: + if( desiredResult==0 && rc==0 ) return 1; + if( desiredResult<0 && rc<0 ) return 1; + if( desiredResult>0 && rc>0 ) return 1; + if( CORRUPT_DB ) return 1; + if( pKeyInfo->db->mallocFailed ) return 1; + return 0; +} +#endif + +#ifdef SQLITE_DEBUG +/* +** Count the number of fields (a.k.a. columns) in the record given by +** pKey,nKey. The verify that this count is less than or equal to the +** limit given by pKeyInfo->nAllField. +** +** If this constraint is not satisfied, it means that the high-speed +** vdbeRecordCompareInt() and vdbeRecordCompareString() routines will +** not work correctly. If this assert() ever fires, it probably means +** that the KeyInfo.nKeyField or KeyInfo.nAllField values were computed +** incorrectly. +*/ +static void vdbeAssertFieldCountWithinLimits( + int nKey, const void *pKey, /* The record to verify */ + const KeyInfo *pKeyInfo /* Compare size with this KeyInfo */ +){ + int nField = 0; + u32 szHdr; + u32 idx; + u32 notUsed; + const unsigned char *aKey = (const unsigned char*)pKey; + + if( CORRUPT_DB ) return; + idx = getVarint32(aKey, szHdr); + assert( nKey>=0 ); + assert( szHdr<=(u32)nKey ); + while( idxnAllField ); +} +#else +# define vdbeAssertFieldCountWithinLimits(A,B,C) +#endif + +/* +** Both *pMem1 and *pMem2 contain string values. Compare the two values +** using the collation sequence pColl. As usual, return a negative , zero +** or positive value if *pMem1 is less than, equal to or greater than +** *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);". +*/ +static int vdbeCompareMemString( + const Mem *pMem1, + const Mem *pMem2, + const CollSeq *pColl, + u8 *prcErr /* If an OOM occurs, set to SQLITE_NOMEM */ +){ + if( pMem1->enc==pColl->enc ){ + /* The strings are already in the correct encoding. Call the + ** comparison function directly */ + return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z); + }else{ + int rc; + const void *v1, *v2; + Mem c1; + Mem c2; + sqlite3VdbeMemInit(&c1, pMem1->db, MEM_Null); + sqlite3VdbeMemInit(&c2, pMem1->db, MEM_Null); + sqlite3VdbeMemShallowCopy(&c1, pMem1, MEM_Ephem); + sqlite3VdbeMemShallowCopy(&c2, pMem2, MEM_Ephem); + v1 = sqlite3ValueText((sqlite3_value*)&c1, pColl->enc); + v2 = sqlite3ValueText((sqlite3_value*)&c2, pColl->enc); + if( (v1==0 || v2==0) ){ + if( prcErr ) *prcErr = SQLITE_NOMEM_BKPT; + rc = 0; + }else{ + rc = pColl->xCmp(pColl->pUser, c1.n, v1, c2.n, v2); + } + sqlite3VdbeMemRelease(&c1); + sqlite3VdbeMemRelease(&c2); + return rc; + } +} + +/* +** The input pBlob is guaranteed to be a Blob that is not marked +** with MEM_Zero. Return true if it could be a zero-blob. +*/ +static int isAllZero(const char *z, int n){ + int i; + for(i=0; in; + int n2 = pB2->n; + + /* It is possible to have a Blob value that has some non-zero content + ** followed by zero content. But that only comes up for Blobs formed + ** by the OP_MakeRecord opcode, and such Blobs never get passed into + ** sqlite3MemCompare(). */ + assert( (pB1->flags & MEM_Zero)==0 || n1==0 ); + assert( (pB2->flags & MEM_Zero)==0 || n2==0 ); + + if( (pB1->flags|pB2->flags) & MEM_Zero ){ + if( pB1->flags & pB2->flags & MEM_Zero ){ + return pB1->u.nZero - pB2->u.nZero; + }else if( pB1->flags & MEM_Zero ){ + if( !isAllZero(pB2->z, pB2->n) ) return -1; + return pB1->u.nZero - n2; + }else{ + if( !isAllZero(pB1->z, pB1->n) ) return +1; + return n1 - pB2->u.nZero; + } + } + c = memcmp(pB1->z, pB2->z, n1>n2 ? n2 : n1); + if( c ) return c; + return n1 - n2; +} + +/* +** Do a comparison between a 64-bit signed integer and a 64-bit floating-point +** number. Return negative, zero, or positive if the first (i64) is less than, +** equal to, or greater than the second (double). +*/ +SQLITE_PRIVATE int sqlite3IntFloatCompare(i64 i, double r){ + if( sizeof(LONGDOUBLE_TYPE)>8 ){ + LONGDOUBLE_TYPE x = (LONGDOUBLE_TYPE)i; + testcase( xr ); + testcase( x==r ); + if( xr ) return +1; /*NO_TEST*/ /* work around bugs in gcov */ + return 0; /*NO_TEST*/ /* work around bugs in gcov */ + }else{ + i64 y; + double s; + if( r<-9223372036854775808.0 ) return +1; + if( r>=9223372036854775808.0 ) return -1; + y = (i64)r; + if( iy ) return +1; + s = (double)i; + if( sr ) return +1; + return 0; + } +} + +/* +** Compare the values contained by the two memory cells, returning +** negative, zero or positive if pMem1 is less than, equal to, or greater +** than pMem2. Sorting order is NULL's first, followed by numbers (integers +** and reals) sorted numerically, followed by text ordered by the collating +** sequence pColl and finally blob's ordered by memcmp(). +** +** Two NULL values are considered equal by this function. +*/ +SQLITE_PRIVATE int sqlite3MemCompare(const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl){ + int f1, f2; + int combined_flags; + + f1 = pMem1->flags; + f2 = pMem2->flags; + combined_flags = f1|f2; + assert( !sqlite3VdbeMemIsRowSet(pMem1) && !sqlite3VdbeMemIsRowSet(pMem2) ); + + /* If one value is NULL, it is less than the other. If both values + ** are NULL, return 0. + */ + if( combined_flags&MEM_Null ){ + return (f2&MEM_Null) - (f1&MEM_Null); + } + + /* At least one of the two values is a number + */ + if( combined_flags&(MEM_Int|MEM_Real|MEM_IntReal) ){ + testcase( combined_flags & MEM_Int ); + testcase( combined_flags & MEM_Real ); + testcase( combined_flags & MEM_IntReal ); + if( (f1 & f2 & (MEM_Int|MEM_IntReal))!=0 ){ + testcase( f1 & f2 & MEM_Int ); + testcase( f1 & f2 & MEM_IntReal ); + if( pMem1->u.i < pMem2->u.i ) return -1; + if( pMem1->u.i > pMem2->u.i ) return +1; + return 0; + } + if( (f1 & f2 & MEM_Real)!=0 ){ + if( pMem1->u.r < pMem2->u.r ) return -1; + if( pMem1->u.r > pMem2->u.r ) return +1; + return 0; + } + if( (f1&(MEM_Int|MEM_IntReal))!=0 ){ + testcase( f1 & MEM_Int ); + testcase( f1 & MEM_IntReal ); + if( (f2&MEM_Real)!=0 ){ + return sqlite3IntFloatCompare(pMem1->u.i, pMem2->u.r); + }else if( (f2&(MEM_Int|MEM_IntReal))!=0 ){ + if( pMem1->u.i < pMem2->u.i ) return -1; + if( pMem1->u.i > pMem2->u.i ) return +1; + return 0; + }else{ + return -1; + } + } + if( (f1&MEM_Real)!=0 ){ + if( (f2&(MEM_Int|MEM_IntReal))!=0 ){ + testcase( f2 & MEM_Int ); + testcase( f2 & MEM_IntReal ); + return -sqlite3IntFloatCompare(pMem2->u.i, pMem1->u.r); + }else{ + return -1; + } + } + return +1; + } + + /* If one value is a string and the other is a blob, the string is less. + ** If both are strings, compare using the collating functions. + */ + if( combined_flags&MEM_Str ){ + if( (f1 & MEM_Str)==0 ){ + return 1; + } + if( (f2 & MEM_Str)==0 ){ + return -1; + } + + assert( pMem1->enc==pMem2->enc || pMem1->db->mallocFailed ); + assert( pMem1->enc==SQLITE_UTF8 || + pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE ); + + /* The collation sequence must be defined at this point, even if + ** the user deletes the collation sequence after the vdbe program is + ** compiled (this was not always the case). + */ + assert( !pColl || pColl->xCmp ); + + if( pColl ){ + return vdbeCompareMemString(pMem1, pMem2, pColl, 0); + } + /* If a NULL pointer was passed as the collate function, fall through + ** to the blob case and use memcmp(). */ + } + + /* Both values must be blobs. Compare using memcmp(). */ + return sqlite3BlobCompare(pMem1, pMem2); +} + + +/* +** The first argument passed to this function is a serial-type that +** corresponds to an integer - all values between 1 and 9 inclusive +** except 7. The second points to a buffer containing an integer value +** serialized according to serial_type. This function deserializes +** and returns the value. +*/ +static i64 vdbeRecordDecodeInt(u32 serial_type, const u8 *aKey){ + u32 y; + assert( CORRUPT_DB || (serial_type>=1 && serial_type<=9 && serial_type!=7) ); + switch( serial_type ){ + case 0: + case 1: + testcase( aKey[0]&0x80 ); + return ONE_BYTE_INT(aKey); + case 2: + testcase( aKey[0]&0x80 ); + return TWO_BYTE_INT(aKey); + case 3: + testcase( aKey[0]&0x80 ); + return THREE_BYTE_INT(aKey); + case 4: { + testcase( aKey[0]&0x80 ); + y = FOUR_BYTE_UINT(aKey); + return (i64)*(int*)&y; + } + case 5: { + testcase( aKey[0]&0x80 ); + return FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey); + } + case 6: { + u64 x = FOUR_BYTE_UINT(aKey); + testcase( aKey[0]&0x80 ); + x = (x<<32) | FOUR_BYTE_UINT(aKey+4); + return (i64)*(i64*)&x; + } + } + + return (serial_type - 8); +} + +/* +** This function compares the two table rows or index records +** specified by {nKey1, pKey1} and pPKey2. It returns a negative, zero +** or positive integer if key1 is less than, equal to or +** greater than key2. The {nKey1, pKey1} key must be a blob +** created by the OP_MakeRecord opcode of the VDBE. The pPKey2 +** key must be a parsed key such as obtained from +** sqlite3VdbeParseRecord. +** +** If argument bSkip is non-zero, it is assumed that the caller has already +** determined that the first fields of the keys are equal. +** +** Key1 and Key2 do not have to contain the same number of fields. If all +** fields that appear in both keys are equal, then pPKey2->default_rc is +** returned. +** +** If database corruption is discovered, set pPKey2->errCode to +** SQLITE_CORRUPT and return 0. If an OOM error is encountered, +** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the +** malloc-failed flag set on database handle (pPKey2->pKeyInfo->db). +*/ +SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2, /* Right key */ + int bSkip /* If true, skip the first field */ +){ + u32 d1; /* Offset into aKey[] of next data element */ + int i; /* Index of next field to compare */ + u32 szHdr1; /* Size of record header in bytes */ + u32 idx1; /* Offset of first type in header */ + int rc = 0; /* Return value */ + Mem *pRhs = pPKey2->aMem; /* Next field of pPKey2 to compare */ + KeyInfo *pKeyInfo; + const unsigned char *aKey1 = (const unsigned char *)pKey1; + Mem mem1; + + /* If bSkip is true, then the caller has already determined that the first + ** two elements in the keys are equal. Fix the various stack variables so + ** that this routine begins comparing at the second field. */ + if( bSkip ){ + u32 s1; + idx1 = 1 + getVarint32(&aKey1[1], s1); + szHdr1 = aKey1[0]; + d1 = szHdr1 + sqlite3VdbeSerialTypeLen(s1); + i = 1; + pRhs++; + }else{ + idx1 = getVarint32(aKey1, szHdr1); + d1 = szHdr1; + i = 0; + } + if( d1>(unsigned)nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + } + + VVA_ONLY( mem1.szMalloc = 0; ) /* Only needed by assert() statements */ + assert( pPKey2->pKeyInfo->nAllField>=pPKey2->nField + || CORRUPT_DB ); + assert( pPKey2->pKeyInfo->aSortFlags!=0 ); + assert( pPKey2->pKeyInfo->nKeyField>0 ); + assert( idx1<=szHdr1 || CORRUPT_DB ); + do{ + u32 serial_type; + + /* RHS is an integer */ + if( pRhs->flags & (MEM_Int|MEM_IntReal) ){ + testcase( pRhs->flags & MEM_Int ); + testcase( pRhs->flags & MEM_IntReal ); + serial_type = aKey1[idx1]; + testcase( serial_type==12 ); + if( serial_type>=10 ){ + rc = +1; + }else if( serial_type==0 ){ + rc = -1; + }else if( serial_type==7 ){ + sqlite3VdbeSerialGet(&aKey1[d1], serial_type, &mem1); + rc = -sqlite3IntFloatCompare(pRhs->u.i, mem1.u.r); + }else{ + i64 lhs = vdbeRecordDecodeInt(serial_type, &aKey1[d1]); + i64 rhs = pRhs->u.i; + if( lhsrhs ){ + rc = +1; + } + } + } + + /* RHS is real */ + else if( pRhs->flags & MEM_Real ){ + serial_type = aKey1[idx1]; + if( serial_type>=10 ){ + /* Serial types 12 or greater are strings and blobs (greater than + ** numbers). Types 10 and 11 are currently "reserved for future + ** use", so it doesn't really matter what the results of comparing + ** them to numberic values are. */ + rc = +1; + }else if( serial_type==0 ){ + rc = -1; + }else{ + sqlite3VdbeSerialGet(&aKey1[d1], serial_type, &mem1); + if( serial_type==7 ){ + if( mem1.u.ru.r ){ + rc = -1; + }else if( mem1.u.r>pRhs->u.r ){ + rc = +1; + } + }else{ + rc = sqlite3IntFloatCompare(mem1.u.i, pRhs->u.r); + } + } + } + + /* RHS is a string */ + else if( pRhs->flags & MEM_Str ){ + getVarint32NR(&aKey1[idx1], serial_type); + testcase( serial_type==12 ); + if( serial_type<12 ){ + rc = -1; + }else if( !(serial_type & 0x01) ){ + rc = +1; + }else{ + mem1.n = (serial_type - 12) / 2; + testcase( (d1+mem1.n)==(unsigned)nKey1 ); + testcase( (d1+mem1.n+1)==(unsigned)nKey1 ); + if( (d1+mem1.n) > (unsigned)nKey1 + || (pKeyInfo = pPKey2->pKeyInfo)->nAllField<=i + ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + }else if( pKeyInfo->aColl[i] ){ + mem1.enc = pKeyInfo->enc; + mem1.db = pKeyInfo->db; + mem1.flags = MEM_Str; + mem1.z = (char*)&aKey1[d1]; + rc = vdbeCompareMemString( + &mem1, pRhs, pKeyInfo->aColl[i], &pPKey2->errCode + ); + }else{ + int nCmp = MIN(mem1.n, pRhs->n); + rc = memcmp(&aKey1[d1], pRhs->z, nCmp); + if( rc==0 ) rc = mem1.n - pRhs->n; + } + } + } + + /* RHS is a blob */ + else if( pRhs->flags & MEM_Blob ){ + assert( (pRhs->flags & MEM_Zero)==0 || pRhs->n==0 ); + getVarint32NR(&aKey1[idx1], serial_type); + testcase( serial_type==12 ); + if( serial_type<12 || (serial_type & 0x01) ){ + rc = -1; + }else{ + int nStr = (serial_type - 12) / 2; + testcase( (d1+nStr)==(unsigned)nKey1 ); + testcase( (d1+nStr+1)==(unsigned)nKey1 ); + if( (d1+nStr) > (unsigned)nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + }else if( pRhs->flags & MEM_Zero ){ + if( !isAllZero((const char*)&aKey1[d1],nStr) ){ + rc = 1; + }else{ + rc = nStr - pRhs->u.nZero; + } + }else{ + int nCmp = MIN(nStr, pRhs->n); + rc = memcmp(&aKey1[d1], pRhs->z, nCmp); + if( rc==0 ) rc = nStr - pRhs->n; + } + } + } + + /* RHS is null */ + else{ + serial_type = aKey1[idx1]; + rc = (serial_type!=0); + } + + if( rc!=0 ){ + int sortFlags = pPKey2->pKeyInfo->aSortFlags[i]; + if( sortFlags ){ + if( (sortFlags & KEYINFO_ORDER_BIGNULL)==0 + || ((sortFlags & KEYINFO_ORDER_DESC) + !=(serial_type==0 || (pRhs->flags&MEM_Null))) + ){ + rc = -rc; + } + } + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) ); + assert( mem1.szMalloc==0 ); /* See comment below */ + return rc; + } + + i++; + if( i==pPKey2->nField ) break; + pRhs++; + d1 += sqlite3VdbeSerialTypeLen(serial_type); + idx1 += sqlite3VarintLen(serial_type); + }while( idx1<(unsigned)szHdr1 && d1<=(unsigned)nKey1 ); + + /* No memory allocation is ever used on mem1. Prove this using + ** the following assert(). If the assert() fails, it indicates a + ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1). */ + assert( mem1.szMalloc==0 ); + + /* rc==0 here means that one or both of the keys ran out of fields and + ** all the fields up to that point were equal. Return the default_rc + ** value. */ + assert( CORRUPT_DB + || vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) + || pPKey2->pKeyInfo->db->mallocFailed + ); + pPKey2->eqSeen = 1; + return pPKey2->default_rc; +} +SQLITE_PRIVATE int sqlite3VdbeRecordCompare( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2 /* Right key */ +){ + return sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 0); +} + + +/* +** This function is an optimized version of sqlite3VdbeRecordCompare() +** that (a) the first field of pPKey2 is an integer, and (b) the +** size-of-header varint at the start of (pKey1/nKey1) fits in a single +** byte (i.e. is less than 128). +** +** To avoid concerns about buffer overreads, this routine is only used +** on schemas where the maximum valid header size is 63 bytes or less. +*/ +static int vdbeRecordCompareInt( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2 /* Right key */ +){ + const u8 *aKey = &((const u8*)pKey1)[*(const u8*)pKey1 & 0x3F]; + int serial_type = ((const u8*)pKey1)[1]; + int res; + u32 y; + u64 x; + i64 v; + i64 lhs; + + vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo); + assert( (*(u8*)pKey1)<=0x3F || CORRUPT_DB ); + switch( serial_type ){ + case 1: { /* 1-byte signed integer */ + lhs = ONE_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 2: { /* 2-byte signed integer */ + lhs = TWO_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 3: { /* 3-byte signed integer */ + lhs = THREE_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 4: { /* 4-byte signed integer */ + y = FOUR_BYTE_UINT(aKey); + lhs = (i64)*(int*)&y; + testcase( lhs<0 ); + break; + } + case 5: { /* 6-byte signed integer */ + lhs = FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 6: { /* 8-byte signed integer */ + x = FOUR_BYTE_UINT(aKey); + x = (x<<32) | FOUR_BYTE_UINT(aKey+4); + lhs = *(i64*)&x; + testcase( lhs<0 ); + break; + } + case 8: + lhs = 0; + break; + case 9: + lhs = 1; + break; + + /* This case could be removed without changing the results of running + ** this code. Including it causes gcc to generate a faster switch + ** statement (since the range of switch targets now starts at zero and + ** is contiguous) but does not cause any duplicate code to be generated + ** (as gcc is clever enough to combine the two like cases). Other + ** compilers might be similar. */ + case 0: case 7: + return sqlite3VdbeRecordCompare(nKey1, pKey1, pPKey2); + + default: + return sqlite3VdbeRecordCompare(nKey1, pKey1, pPKey2); + } + + v = pPKey2->aMem[0].u.i; + if( v>lhs ){ + res = pPKey2->r1; + }else if( vr2; + }else if( pPKey2->nField>1 ){ + /* The first fields of the two keys are equal. Compare the trailing + ** fields. */ + res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); + }else{ + /* The first fields of the two keys are equal and there are no trailing + ** fields. Return pPKey2->default_rc in this case. */ + res = pPKey2->default_rc; + pPKey2->eqSeen = 1; + } + + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) ); + return res; +} + +/* +** This function is an optimized version of sqlite3VdbeRecordCompare() +** that (a) the first field of pPKey2 is a string, that (b) the first field +** uses the collation sequence BINARY and (c) that the size-of-header varint +** at the start of (pKey1/nKey1) fits in a single byte. +*/ +static int vdbeRecordCompareString( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2 /* Right key */ +){ + const u8 *aKey1 = (const u8*)pKey1; + int serial_type; + int res; + + assert( pPKey2->aMem[0].flags & MEM_Str ); + vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo); + serial_type = (u8)(aKey1[1]); + if( serial_type >= 0x80 ){ + sqlite3GetVarint32(&aKey1[1], (u32*)&serial_type); + } + if( serial_type<12 ){ + res = pPKey2->r1; /* (pKey1/nKey1) is a number or a null */ + }else if( !(serial_type & 0x01) ){ + res = pPKey2->r2; /* (pKey1/nKey1) is a blob */ + }else{ + int nCmp; + int nStr; + int szHdr = aKey1[0]; + + nStr = (serial_type-12) / 2; + if( (szHdr + nStr) > nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + } + nCmp = MIN( pPKey2->aMem[0].n, nStr ); + res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp); + + if( res>0 ){ + res = pPKey2->r2; + }else if( res<0 ){ + res = pPKey2->r1; + }else{ + res = nStr - pPKey2->aMem[0].n; + if( res==0 ){ + if( pPKey2->nField>1 ){ + res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); + }else{ + res = pPKey2->default_rc; + pPKey2->eqSeen = 1; + } + }else if( res>0 ){ + res = pPKey2->r2; + }else{ + res = pPKey2->r1; + } + } + } + + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) + || CORRUPT_DB + || pPKey2->pKeyInfo->db->mallocFailed + ); + return res; +} + +/* +** Return a pointer to an sqlite3VdbeRecordCompare() compatible function +** suitable for comparing serialized records to the unpacked record passed +** as the only argument. +*/ +SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord *p){ + /* varintRecordCompareInt() and varintRecordCompareString() both assume + ** that the size-of-header varint that occurs at the start of each record + ** fits in a single byte (i.e. is 127 or less). varintRecordCompareInt() + ** also assumes that it is safe to overread a buffer by at least the + ** maximum possible legal header size plus 8 bytes. Because there is + ** guaranteed to be at least 74 (but not 136) bytes of padding following each + ** buffer passed to varintRecordCompareInt() this makes it convenient to + ** limit the size of the header to 64 bytes in cases where the first field + ** is an integer. + ** + ** The easiest way to enforce this limit is to consider only records with + ** 13 fields or less. If the first field is an integer, the maximum legal + ** header size is (12*5 + 1 + 1) bytes. */ + if( p->pKeyInfo->nAllField<=13 ){ + int flags = p->aMem[0].flags; + if( p->pKeyInfo->aSortFlags[0] ){ + if( p->pKeyInfo->aSortFlags[0] & KEYINFO_ORDER_BIGNULL ){ + return sqlite3VdbeRecordCompare; + } + p->r1 = 1; + p->r2 = -1; + }else{ + p->r1 = -1; + p->r2 = 1; + } + if( (flags & MEM_Int) ){ + return vdbeRecordCompareInt; + } + testcase( flags & MEM_Real ); + testcase( flags & MEM_Null ); + testcase( flags & MEM_Blob ); + if( (flags & (MEM_Real|MEM_IntReal|MEM_Null|MEM_Blob))==0 + && p->pKeyInfo->aColl[0]==0 + ){ + assert( flags & MEM_Str ); + return vdbeRecordCompareString; + } + } + + return sqlite3VdbeRecordCompare; +} + +/* +** pCur points at an index entry created using the OP_MakeRecord opcode. +** Read the rowid (the last field in the record) and store it in *rowid. +** Return SQLITE_OK if everything works, or an error code otherwise. +** +** pCur might be pointing to text obtained from a corrupt database file. +** So the content cannot be trusted. Do appropriate checks on the content. +*/ +SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3 *db, BtCursor *pCur, i64 *rowid){ + i64 nCellKey = 0; + int rc; + u32 szHdr; /* Size of the header */ + u32 typeRowid; /* Serial type of the rowid */ + u32 lenRowid; /* Size of the rowid */ + Mem m, v; + + /* Get the size of the index entry. Only indices entries of less + ** than 2GiB are support - anything large must be database corruption. + ** Any corruption is detected in sqlite3BtreeParseCellPtr(), though, so + ** this code can safely assume that nCellKey is 32-bits + */ + assert( sqlite3BtreeCursorIsValid(pCur) ); + nCellKey = sqlite3BtreePayloadSize(pCur); + assert( (nCellKey & SQLITE_MAX_U32)==(u64)nCellKey ); + + /* Read in the complete content of the index entry */ + sqlite3VdbeMemInit(&m, db, 0); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pCur, (u32)nCellKey, &m); + if( rc ){ + return rc; + } + + /* The index entry must begin with a header size */ + getVarint32NR((u8*)m.z, szHdr); + testcase( szHdr==3 ); + testcase( szHdr==(u32)m.n ); + testcase( szHdr>0x7fffffff ); + assert( m.n>=0 ); + if( unlikely(szHdr<3 || szHdr>(unsigned)m.n) ){ + goto idx_rowid_corruption; + } + + /* The last field of the index should be an integer - the ROWID. + ** Verify that the last entry really is an integer. */ + getVarint32NR((u8*)&m.z[szHdr-1], typeRowid); + testcase( typeRowid==1 ); + testcase( typeRowid==2 ); + testcase( typeRowid==3 ); + testcase( typeRowid==4 ); + testcase( typeRowid==5 ); + testcase( typeRowid==6 ); + testcase( typeRowid==8 ); + testcase( typeRowid==9 ); + if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){ + goto idx_rowid_corruption; + } + lenRowid = sqlite3SmallTypeSizes[typeRowid]; + testcase( (u32)m.n==szHdr+lenRowid ); + if( unlikely((u32)m.neCurType==CURTYPE_BTREE ); + pCur = pC->uc.pCursor; + assert( sqlite3BtreeCursorIsValid(pCur) ); + nCellKey = sqlite3BtreePayloadSize(pCur); + /* nCellKey will always be between 0 and 0xffffffff because of the way + ** that btreeParseCellPtr() and sqlite3GetVarint32() are implemented */ + if( nCellKey<=0 || nCellKey>0x7fffffff ){ + *res = 0; + return SQLITE_CORRUPT_BKPT; + } + sqlite3VdbeMemInit(&m, db, 0); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pCur, (u32)nCellKey, &m); + if( rc ){ + return rc; + } + *res = sqlite3VdbeRecordCompareWithSkip(m.n, m.z, pUnpacked, 0); + sqlite3VdbeMemRelease(&m); + return SQLITE_OK; +} + +/* +** This routine sets the value to be returned by subsequent calls to +** sqlite3_changes() on the database handle 'db'. +*/ +SQLITE_PRIVATE void sqlite3VdbeSetChanges(sqlite3 *db, i64 nChange){ + assert( sqlite3_mutex_held(db->mutex) ); + db->nChange = nChange; + db->nTotalChange += nChange; +} + +/* +** Set a flag in the vdbe to update the change counter when it is finalised +** or reset. +*/ +SQLITE_PRIVATE void sqlite3VdbeCountChanges(Vdbe *v){ + v->changeCntOn = 1; +} + +/* +** Mark every prepared statement associated with a database connection +** as expired. +** +** An expired statement means that recompilation of the statement is +** recommend. Statements expire when things happen that make their +** programs obsolete. Removing user-defined functions or collating +** sequences, or changing an authorization function are the types of +** things that make prepared statements obsolete. +** +** If iCode is 1, then expiration is advisory. The statement should +** be reprepared before being restarted, but if it is already running +** it is allowed to run to completion. +** +** Internally, this function just sets the Vdbe.expired flag on all +** prepared statements. The flag is set to 1 for an immediate expiration +** and set to 2 for an advisory expiration. +*/ +SQLITE_PRIVATE void sqlite3ExpirePreparedStatements(sqlite3 *db, int iCode){ + Vdbe *p; + for(p = db->pVdbe; p; p=p->pNext){ + p->expired = iCode+1; + } +} + +/* +** Return the database associated with the Vdbe. +*/ +SQLITE_PRIVATE sqlite3 *sqlite3VdbeDb(Vdbe *v){ + return v->db; +} + +/* +** Return the SQLITE_PREPARE flags for a Vdbe. +*/ +SQLITE_PRIVATE u8 sqlite3VdbePrepareFlags(Vdbe *v){ + return v->prepFlags; +} + +/* +** Return a pointer to an sqlite3_value structure containing the value bound +** parameter iVar of VM v. Except, if the value is an SQL NULL, return +** 0 instead. Unless it is NULL, apply affinity aff (one of the SQLITE_AFF_* +** constants) to the value before returning it. +** +** The returned value must be freed by the caller using sqlite3ValueFree(). +*/ +SQLITE_PRIVATE sqlite3_value *sqlite3VdbeGetBoundValue(Vdbe *v, int iVar, u8 aff){ + assert( iVar>0 ); + if( v ){ + Mem *pMem = &v->aVar[iVar-1]; + assert( (v->db->flags & SQLITE_EnableQPSG)==0 ); + if( 0==(pMem->flags & MEM_Null) ){ + sqlite3_value *pRet = sqlite3ValueNew(v->db); + if( pRet ){ + sqlite3VdbeMemCopy((Mem *)pRet, pMem); + sqlite3ValueApplyAffinity(pRet, aff, SQLITE_UTF8); + } + return pRet; + } + } + return 0; +} + +/* +** Configure SQL variable iVar so that binding a new value to it signals +** to sqlite3_reoptimize() that re-preparing the statement may result +** in a better query plan. +*/ +SQLITE_PRIVATE void sqlite3VdbeSetVarmask(Vdbe *v, int iVar){ + assert( iVar>0 ); + assert( (v->db->flags & SQLITE_EnableQPSG)==0 ); + if( iVar>=32 ){ + v->expmask |= 0x80000000; + }else{ + v->expmask |= ((u32)1 << (iVar-1)); + } +} + +/* +** Cause a function to throw an error if it was call from OP_PureFunc +** rather than OP_Function. +** +** OP_PureFunc means that the function must be deterministic, and should +** throw an error if it is given inputs that would make it non-deterministic. +** This routine is invoked by date/time functions that use non-deterministic +** features such as 'now'. +*/ +SQLITE_PRIVATE int sqlite3NotPureFunc(sqlite3_context *pCtx){ + const VdbeOp *pOp; +#ifdef SQLITE_ENABLE_STAT4 + if( pCtx->pVdbe==0 ) return 1; +#endif + pOp = pCtx->pVdbe->aOp + pCtx->iOp; + if( pOp->opcode==OP_PureFunc ){ + const char *zContext; + char *zMsg; + if( pOp->p5 & NC_IsCheck ){ + zContext = "a CHECK constraint"; + }else if( pOp->p5 & NC_GenCol ){ + zContext = "a generated column"; + }else{ + zContext = "an index"; + } + zMsg = sqlite3_mprintf("non-deterministic use of %s() in %s", + pCtx->pFunc->zName, zContext); + sqlite3_result_error(pCtx, zMsg, -1); + sqlite3_free(zMsg); + return 0; + } + return 1; +} + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Transfer error message text from an sqlite3_vtab.zErrMsg (text stored +** in memory obtained from sqlite3_malloc) into a Vdbe.zErrMsg (text stored +** in memory obtained from sqlite3DbMalloc). +*/ +SQLITE_PRIVATE void sqlite3VtabImportErrmsg(Vdbe *p, sqlite3_vtab *pVtab){ + if( pVtab->zErrMsg ){ + sqlite3 *db = p->db; + sqlite3DbFree(db, p->zErrMsg); + p->zErrMsg = sqlite3DbStrDup(db, pVtab->zErrMsg); + sqlite3_free(pVtab->zErrMsg); + pVtab->zErrMsg = 0; + } +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + +/* +** If the second argument is not NULL, release any allocations associated +** with the memory cells in the p->aMem[] array. Also free the UnpackedRecord +** structure itself, using sqlite3DbFree(). +** +** This function is used to free UnpackedRecord structures allocated by +** the vdbeUnpackRecord() function found in vdbeapi.c. +*/ +static void vdbeFreeUnpacked(sqlite3 *db, int nField, UnpackedRecord *p){ + if( p ){ + int i; + for(i=0; iaMem[i]; + if( pMem->zMalloc ) sqlite3VdbeMemRelease(pMem); + } + sqlite3DbFreeNN(db, p); + } +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** Invoke the pre-update hook. If this is an UPDATE or DELETE pre-update call, +** then cursor passed as the second argument should point to the row about +** to be update or deleted. If the application calls sqlite3_preupdate_old(), +** the required value will be read from the row the cursor points to. +*/ +SQLITE_PRIVATE void sqlite3VdbePreUpdateHook( + Vdbe *v, /* Vdbe pre-update hook is invoked by */ + VdbeCursor *pCsr, /* Cursor to grab old.* values from */ + int op, /* SQLITE_INSERT, UPDATE or DELETE */ + const char *zDb, /* Database name */ + Table *pTab, /* Modified table */ + i64 iKey1, /* Initial key value */ + int iReg, /* Register for new.* record */ + int iBlobWrite +){ + sqlite3 *db = v->db; + i64 iKey2; + PreUpdate preupdate; + const char *zTbl = pTab->zName; + static const u8 fakeSortOrder = 0; + + assert( db->pPreUpdate==0 ); + memset(&preupdate, 0, sizeof(PreUpdate)); + if( HasRowid(pTab)==0 ){ + iKey1 = iKey2 = 0; + preupdate.pPk = sqlite3PrimaryKeyIndex(pTab); + }else{ + if( op==SQLITE_UPDATE ){ + iKey2 = v->aMem[iReg].u.i; + }else{ + iKey2 = iKey1; + } + } + + assert( pCsr!=0 ); + assert( pCsr->eCurType==CURTYPE_BTREE ); + assert( pCsr->nField==pTab->nCol + || (pCsr->nField==pTab->nCol+1 && op==SQLITE_DELETE && iReg==-1) + ); + + preupdate.v = v; + preupdate.pCsr = pCsr; + preupdate.op = op; + preupdate.iNewReg = iReg; + preupdate.keyinfo.db = db; + preupdate.keyinfo.enc = ENC(db); + preupdate.keyinfo.nKeyField = pTab->nCol; + preupdate.keyinfo.aSortFlags = (u8*)&fakeSortOrder; + preupdate.iKey1 = iKey1; + preupdate.iKey2 = iKey2; + preupdate.pTab = pTab; + preupdate.iBlobWrite = iBlobWrite; + + db->pPreUpdate = &preupdate; + db->xPreUpdateCallback(db->pPreUpdateArg, db, op, zDb, zTbl, iKey1, iKey2); + db->pPreUpdate = 0; + sqlite3DbFree(db, preupdate.aRecord); + vdbeFreeUnpacked(db, preupdate.keyinfo.nKeyField+1, preupdate.pUnpacked); + vdbeFreeUnpacked(db, preupdate.keyinfo.nKeyField+1, preupdate.pNewUnpacked); + if( preupdate.aNew ){ + int i; + for(i=0; inField; i++){ + sqlite3VdbeMemRelease(&preupdate.aNew[i]); + } + sqlite3DbFreeNN(db, preupdate.aNew); + } +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +/************** End of vdbeaux.c *********************************************/ +/************** Begin file vdbeapi.c *****************************************/ +/* +** 2004 May 26 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code use to implement APIs that are part of the +** VDBE. +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** Return TRUE (non-zero) of the statement supplied as an argument needs +** to be recompiled. A statement needs to be recompiled whenever the +** execution environment changes in a way that would alter the program +** that sqlite3_prepare() generates. For example, if new functions or +** collating sequences are registered or if an authorizer function is +** added or changed. +*/ +SQLITE_API int sqlite3_expired(sqlite3_stmt *pStmt){ + Vdbe *p = (Vdbe*)pStmt; + return p==0 || p->expired; +} +#endif + +/* +** Check on a Vdbe to make sure it has not been finalized. Log +** an error and return true if it has been finalized (or is otherwise +** invalid). Return false if it is ok. +*/ +static int vdbeSafety(Vdbe *p){ + if( p->db==0 ){ + sqlite3_log(SQLITE_MISUSE, "API called with finalized prepared statement"); + return 1; + }else{ + return 0; + } +} +static int vdbeSafetyNotNull(Vdbe *p){ + if( p==0 ){ + sqlite3_log(SQLITE_MISUSE, "API called with NULL prepared statement"); + return 1; + }else{ + return vdbeSafety(p); + } +} + +#ifndef SQLITE_OMIT_TRACE +/* +** Invoke the profile callback. This routine is only called if we already +** know that the profile callback is defined and needs to be invoked. +*/ +static SQLITE_NOINLINE void invokeProfileCallback(sqlite3 *db, Vdbe *p){ + sqlite3_int64 iNow; + sqlite3_int64 iElapse; + assert( p->startTime>0 ); + assert( (db->mTrace & (SQLITE_TRACE_PROFILE|SQLITE_TRACE_XPROFILE))!=0 ); + assert( db->init.busy==0 ); + assert( p->zSql!=0 ); + sqlite3OsCurrentTimeInt64(db->pVfs, &iNow); + iElapse = (iNow - p->startTime)*1000000; +#ifndef SQLITE_OMIT_DEPRECATED + if( db->xProfile ){ + db->xProfile(db->pProfileArg, p->zSql, iElapse); + } +#endif + if( db->mTrace & SQLITE_TRACE_PROFILE ){ + db->trace.xV2(SQLITE_TRACE_PROFILE, db->pTraceArg, p, (void*)&iElapse); + } + p->startTime = 0; +} +/* +** The checkProfileCallback(DB,P) macro checks to see if a profile callback +** is needed, and it invokes the callback if it is needed. +*/ +# define checkProfileCallback(DB,P) \ + if( ((P)->startTime)>0 ){ invokeProfileCallback(DB,P); } +#else +# define checkProfileCallback(DB,P) /*no-op*/ +#endif + +/* +** The following routine destroys a virtual machine that is created by +** the sqlite3_compile() routine. The integer returned is an SQLITE_ +** success/failure code that describes the result of executing the virtual +** machine. +** +** This routine sets the error code and string returned by +** sqlite3_errcode(), sqlite3_errmsg() and sqlite3_errmsg16(). +*/ +SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt){ + int rc; + if( pStmt==0 ){ + /* IMPLEMENTATION-OF: R-57228-12904 Invoking sqlite3_finalize() on a NULL + ** pointer is a harmless no-op. */ + rc = SQLITE_OK; + }else{ + Vdbe *v = (Vdbe*)pStmt; + sqlite3 *db = v->db; + if( vdbeSafety(v) ) return SQLITE_MISUSE_BKPT; + sqlite3_mutex_enter(db->mutex); + checkProfileCallback(db, v); + rc = sqlite3VdbeFinalize(v); + rc = sqlite3ApiExit(db, rc); + sqlite3LeaveMutexAndCloseZombie(db); + } + return rc; +} + +/* +** Terminate the current execution of an SQL statement and reset it +** back to its starting state so that it can be reused. A success code from +** the prior execution is returned. +** +** This routine sets the error code and string returned by +** sqlite3_errcode(), sqlite3_errmsg() and sqlite3_errmsg16(). +*/ +SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt){ + int rc; + if( pStmt==0 ){ + rc = SQLITE_OK; + }else{ + Vdbe *v = (Vdbe*)pStmt; + sqlite3 *db = v->db; + sqlite3_mutex_enter(db->mutex); + checkProfileCallback(db, v); + rc = sqlite3VdbeReset(v); + sqlite3VdbeRewind(v); + assert( (rc & (db->errMask))==rc ); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + } + return rc; +} + +/* +** Set all the parameters in the compiled SQL statement to NULL. +*/ +SQLITE_API int sqlite3_clear_bindings(sqlite3_stmt *pStmt){ + int i; + int rc = SQLITE_OK; + Vdbe *p = (Vdbe*)pStmt; +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex = ((Vdbe*)pStmt)->db->mutex; +#endif + sqlite3_mutex_enter(mutex); + for(i=0; inVar; i++){ + sqlite3VdbeMemRelease(&p->aVar[i]); + p->aVar[i].flags = MEM_Null; + } + assert( (p->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 || p->expmask==0 ); + if( p->expmask ){ + p->expired = 1; + } + sqlite3_mutex_leave(mutex); + return rc; +} + + +/**************************** sqlite3_value_ ******************************* +** The following routines extract information from a Mem or sqlite3_value +** structure. +*/ +SQLITE_API const void *sqlite3_value_blob(sqlite3_value *pVal){ + Mem *p = (Mem*)pVal; + if( p->flags & (MEM_Blob|MEM_Str) ){ + if( ExpandBlob(p)!=SQLITE_OK ){ + assert( p->flags==MEM_Null && p->z==0 ); + return 0; + } + p->flags |= MEM_Blob; + return p->n ? p->z : 0; + }else{ + return sqlite3_value_text(pVal); + } +} +SQLITE_API int sqlite3_value_bytes(sqlite3_value *pVal){ + return sqlite3ValueBytes(pVal, SQLITE_UTF8); +} +SQLITE_API int sqlite3_value_bytes16(sqlite3_value *pVal){ + return sqlite3ValueBytes(pVal, SQLITE_UTF16NATIVE); +} +SQLITE_API double sqlite3_value_double(sqlite3_value *pVal){ + return sqlite3VdbeRealValue((Mem*)pVal); +} +SQLITE_API int sqlite3_value_int(sqlite3_value *pVal){ + return (int)sqlite3VdbeIntValue((Mem*)pVal); +} +SQLITE_API sqlite_int64 sqlite3_value_int64(sqlite3_value *pVal){ + return sqlite3VdbeIntValue((Mem*)pVal); +} +SQLITE_API unsigned int sqlite3_value_subtype(sqlite3_value *pVal){ + Mem *pMem = (Mem*)pVal; + return ((pMem->flags & MEM_Subtype) ? pMem->eSubtype : 0); +} +SQLITE_API void *sqlite3_value_pointer(sqlite3_value *pVal, const char *zPType){ + Mem *p = (Mem*)pVal; + if( (p->flags&(MEM_TypeMask|MEM_Term|MEM_Subtype)) == + (MEM_Null|MEM_Term|MEM_Subtype) + && zPType!=0 + && p->eSubtype=='p' + && strcmp(p->u.zPType, zPType)==0 + ){ + return (void*)p->z; + }else{ + return 0; + } +} +SQLITE_API const unsigned char *sqlite3_value_text(sqlite3_value *pVal){ + return (const unsigned char *)sqlite3ValueText(pVal, SQLITE_UTF8); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_value_text16(sqlite3_value* pVal){ + return sqlite3ValueText(pVal, SQLITE_UTF16NATIVE); +} +SQLITE_API const void *sqlite3_value_text16be(sqlite3_value *pVal){ + return sqlite3ValueText(pVal, SQLITE_UTF16BE); +} +SQLITE_API const void *sqlite3_value_text16le(sqlite3_value *pVal){ + return sqlite3ValueText(pVal, SQLITE_UTF16LE); +} +#endif /* SQLITE_OMIT_UTF16 */ +/* EVIDENCE-OF: R-12793-43283 Every value in SQLite has one of five +** fundamental datatypes: 64-bit signed integer 64-bit IEEE floating +** point number string BLOB NULL +*/ +SQLITE_API int sqlite3_value_type(sqlite3_value* pVal){ + static const u8 aType[] = { + SQLITE_BLOB, /* 0x00 (not possible) */ + SQLITE_NULL, /* 0x01 NULL */ + SQLITE_TEXT, /* 0x02 TEXT */ + SQLITE_NULL, /* 0x03 (not possible) */ + SQLITE_INTEGER, /* 0x04 INTEGER */ + SQLITE_NULL, /* 0x05 (not possible) */ + SQLITE_INTEGER, /* 0x06 INTEGER + TEXT */ + SQLITE_NULL, /* 0x07 (not possible) */ + SQLITE_FLOAT, /* 0x08 FLOAT */ + SQLITE_NULL, /* 0x09 (not possible) */ + SQLITE_FLOAT, /* 0x0a FLOAT + TEXT */ + SQLITE_NULL, /* 0x0b (not possible) */ + SQLITE_INTEGER, /* 0x0c (not possible) */ + SQLITE_NULL, /* 0x0d (not possible) */ + SQLITE_INTEGER, /* 0x0e (not possible) */ + SQLITE_NULL, /* 0x0f (not possible) */ + SQLITE_BLOB, /* 0x10 BLOB */ + SQLITE_NULL, /* 0x11 (not possible) */ + SQLITE_TEXT, /* 0x12 (not possible) */ + SQLITE_NULL, /* 0x13 (not possible) */ + SQLITE_INTEGER, /* 0x14 INTEGER + BLOB */ + SQLITE_NULL, /* 0x15 (not possible) */ + SQLITE_INTEGER, /* 0x16 (not possible) */ + SQLITE_NULL, /* 0x17 (not possible) */ + SQLITE_FLOAT, /* 0x18 FLOAT + BLOB */ + SQLITE_NULL, /* 0x19 (not possible) */ + SQLITE_FLOAT, /* 0x1a (not possible) */ + SQLITE_NULL, /* 0x1b (not possible) */ + SQLITE_INTEGER, /* 0x1c (not possible) */ + SQLITE_NULL, /* 0x1d (not possible) */ + SQLITE_INTEGER, /* 0x1e (not possible) */ + SQLITE_NULL, /* 0x1f (not possible) */ + SQLITE_FLOAT, /* 0x20 INTREAL */ + SQLITE_NULL, /* 0x21 (not possible) */ + SQLITE_TEXT, /* 0x22 INTREAL + TEXT */ + SQLITE_NULL, /* 0x23 (not possible) */ + SQLITE_FLOAT, /* 0x24 (not possible) */ + SQLITE_NULL, /* 0x25 (not possible) */ + SQLITE_FLOAT, /* 0x26 (not possible) */ + SQLITE_NULL, /* 0x27 (not possible) */ + SQLITE_FLOAT, /* 0x28 (not possible) */ + SQLITE_NULL, /* 0x29 (not possible) */ + SQLITE_FLOAT, /* 0x2a (not possible) */ + SQLITE_NULL, /* 0x2b (not possible) */ + SQLITE_FLOAT, /* 0x2c (not possible) */ + SQLITE_NULL, /* 0x2d (not possible) */ + SQLITE_FLOAT, /* 0x2e (not possible) */ + SQLITE_NULL, /* 0x2f (not possible) */ + SQLITE_BLOB, /* 0x30 (not possible) */ + SQLITE_NULL, /* 0x31 (not possible) */ + SQLITE_TEXT, /* 0x32 (not possible) */ + SQLITE_NULL, /* 0x33 (not possible) */ + SQLITE_FLOAT, /* 0x34 (not possible) */ + SQLITE_NULL, /* 0x35 (not possible) */ + SQLITE_FLOAT, /* 0x36 (not possible) */ + SQLITE_NULL, /* 0x37 (not possible) */ + SQLITE_FLOAT, /* 0x38 (not possible) */ + SQLITE_NULL, /* 0x39 (not possible) */ + SQLITE_FLOAT, /* 0x3a (not possible) */ + SQLITE_NULL, /* 0x3b (not possible) */ + SQLITE_FLOAT, /* 0x3c (not possible) */ + SQLITE_NULL, /* 0x3d (not possible) */ + SQLITE_FLOAT, /* 0x3e (not possible) */ + SQLITE_NULL, /* 0x3f (not possible) */ + }; +#ifdef SQLITE_DEBUG + { + int eType = SQLITE_BLOB; + if( pVal->flags & MEM_Null ){ + eType = SQLITE_NULL; + }else if( pVal->flags & (MEM_Real|MEM_IntReal) ){ + eType = SQLITE_FLOAT; + }else if( pVal->flags & MEM_Int ){ + eType = SQLITE_INTEGER; + }else if( pVal->flags & MEM_Str ){ + eType = SQLITE_TEXT; + } + assert( eType == aType[pVal->flags&MEM_AffMask] ); + } +#endif + return aType[pVal->flags&MEM_AffMask]; +} + +/* Return true if a parameter to xUpdate represents an unchanged column */ +SQLITE_API int sqlite3_value_nochange(sqlite3_value *pVal){ + return (pVal->flags&(MEM_Null|MEM_Zero))==(MEM_Null|MEM_Zero); +} + +/* Return true if a parameter value originated from an sqlite3_bind() */ +SQLITE_API int sqlite3_value_frombind(sqlite3_value *pVal){ + return (pVal->flags&MEM_FromBind)!=0; +} + +/* Make a copy of an sqlite3_value object +*/ +SQLITE_API sqlite3_value *sqlite3_value_dup(const sqlite3_value *pOrig){ + sqlite3_value *pNew; + if( pOrig==0 ) return 0; + pNew = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return 0; + memset(pNew, 0, sizeof(*pNew)); + memcpy(pNew, pOrig, MEMCELLSIZE); + pNew->flags &= ~MEM_Dyn; + pNew->db = 0; + if( pNew->flags&(MEM_Str|MEM_Blob) ){ + pNew->flags &= ~(MEM_Static|MEM_Dyn); + pNew->flags |= MEM_Ephem; + if( sqlite3VdbeMemMakeWriteable(pNew)!=SQLITE_OK ){ + sqlite3ValueFree(pNew); + pNew = 0; + } + } + return pNew; +} + +/* Destroy an sqlite3_value object previously obtained from +** sqlite3_value_dup(). +*/ +SQLITE_API void sqlite3_value_free(sqlite3_value *pOld){ + sqlite3ValueFree(pOld); +} + + +/**************************** sqlite3_result_ ******************************* +** The following routines are used by user-defined functions to specify +** the function result. +** +** The setStrOrError() function calls sqlite3VdbeMemSetStr() to store the +** result as a string or blob. Appropriate errors are set if the string/blob +** is too big or if an OOM occurs. +** +** The invokeValueDestructor(P,X) routine invokes destructor function X() +** on value P is not going to be used and need to be destroyed. +*/ +static void setResultStrOrError( + sqlite3_context *pCtx, /* Function context */ + const char *z, /* String pointer */ + int n, /* Bytes in string, or negative */ + u8 enc, /* Encoding of z. 0 for BLOBs */ + void (*xDel)(void*) /* Destructor function */ +){ + int rc = sqlite3VdbeMemSetStr(pCtx->pOut, z, n, enc, xDel); + if( rc ){ + if( rc==SQLITE_TOOBIG ){ + sqlite3_result_error_toobig(pCtx); + }else{ + /* The only errors possible from sqlite3VdbeMemSetStr are + ** SQLITE_TOOBIG and SQLITE_NOMEM */ + assert( rc==SQLITE_NOMEM ); + sqlite3_result_error_nomem(pCtx); + } + } +} +static int invokeValueDestructor( + const void *p, /* Value to destroy */ + void (*xDel)(void*), /* The destructor */ + sqlite3_context *pCtx /* Set a SQLITE_TOOBIG error if no NULL */ +){ + assert( xDel!=SQLITE_DYNAMIC ); + if( xDel==0 ){ + /* noop */ + }else if( xDel==SQLITE_TRANSIENT ){ + /* noop */ + }else{ + xDel((void*)p); + } + sqlite3_result_error_toobig(pCtx); + return SQLITE_TOOBIG; +} +SQLITE_API void sqlite3_result_blob( + sqlite3_context *pCtx, + const void *z, + int n, + void (*xDel)(void *) +){ + assert( n>=0 ); + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + setResultStrOrError(pCtx, z, n, 0, xDel); +} +SQLITE_API void sqlite3_result_blob64( + sqlite3_context *pCtx, + const void *z, + sqlite3_uint64 n, + void (*xDel)(void *) +){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + assert( xDel!=SQLITE_DYNAMIC ); + if( n>0x7fffffff ){ + (void)invokeValueDestructor(z, xDel, pCtx); + }else{ + setResultStrOrError(pCtx, z, (int)n, 0, xDel); + } +} +SQLITE_API void sqlite3_result_double(sqlite3_context *pCtx, double rVal){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemSetDouble(pCtx->pOut, rVal); +} +SQLITE_API void sqlite3_result_error(sqlite3_context *pCtx, const char *z, int n){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + pCtx->isError = SQLITE_ERROR; + sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF8, SQLITE_TRANSIENT); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API void sqlite3_result_error16(sqlite3_context *pCtx, const void *z, int n){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + pCtx->isError = SQLITE_ERROR; + sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT); +} +#endif +SQLITE_API void sqlite3_result_int(sqlite3_context *pCtx, int iVal){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemSetInt64(pCtx->pOut, (i64)iVal); +} +SQLITE_API void sqlite3_result_int64(sqlite3_context *pCtx, i64 iVal){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemSetInt64(pCtx->pOut, iVal); +} +SQLITE_API void sqlite3_result_null(sqlite3_context *pCtx){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemSetNull(pCtx->pOut); +} +SQLITE_API void sqlite3_result_pointer( + sqlite3_context *pCtx, + void *pPtr, + const char *zPType, + void (*xDestructor)(void*) +){ + Mem *pOut = pCtx->pOut; + assert( sqlite3_mutex_held(pOut->db->mutex) ); + sqlite3VdbeMemRelease(pOut); + pOut->flags = MEM_Null; + sqlite3VdbeMemSetPointer(pOut, pPtr, zPType, xDestructor); +} +SQLITE_API void sqlite3_result_subtype(sqlite3_context *pCtx, unsigned int eSubtype){ + Mem *pOut = pCtx->pOut; + assert( sqlite3_mutex_held(pOut->db->mutex) ); + pOut->eSubtype = eSubtype & 0xff; + pOut->flags |= MEM_Subtype; +} +SQLITE_API void sqlite3_result_text( + sqlite3_context *pCtx, + const char *z, + int n, + void (*xDel)(void *) +){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + setResultStrOrError(pCtx, z, n, SQLITE_UTF8, xDel); +} +SQLITE_API void sqlite3_result_text64( + sqlite3_context *pCtx, + const char *z, + sqlite3_uint64 n, + void (*xDel)(void *), + unsigned char enc +){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + assert( xDel!=SQLITE_DYNAMIC ); + if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE; + if( n>0x7fffffff ){ + (void)invokeValueDestructor(z, xDel, pCtx); + }else{ + setResultStrOrError(pCtx, z, (int)n, enc, xDel); + } +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API void sqlite3_result_text16( + sqlite3_context *pCtx, + const void *z, + int n, + void (*xDel)(void *) +){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + setResultStrOrError(pCtx, z, n, SQLITE_UTF16NATIVE, xDel); +} +SQLITE_API void sqlite3_result_text16be( + sqlite3_context *pCtx, + const void *z, + int n, + void (*xDel)(void *) +){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + setResultStrOrError(pCtx, z, n, SQLITE_UTF16BE, xDel); +} +SQLITE_API void sqlite3_result_text16le( + sqlite3_context *pCtx, + const void *z, + int n, + void (*xDel)(void *) +){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + setResultStrOrError(pCtx, z, n, SQLITE_UTF16LE, xDel); +} +#endif /* SQLITE_OMIT_UTF16 */ +SQLITE_API void sqlite3_result_value(sqlite3_context *pCtx, sqlite3_value *pValue){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemCopy(pCtx->pOut, pValue); +} +SQLITE_API void sqlite3_result_zeroblob(sqlite3_context *pCtx, int n){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemSetZeroBlob(pCtx->pOut, n); +} +SQLITE_API int sqlite3_result_zeroblob64(sqlite3_context *pCtx, u64 n){ + Mem *pOut = pCtx->pOut; + assert( sqlite3_mutex_held(pOut->db->mutex) ); + if( n>(u64)pOut->db->aLimit[SQLITE_LIMIT_LENGTH] ){ + return SQLITE_TOOBIG; + } +#ifndef SQLITE_OMIT_INCRBLOB + sqlite3VdbeMemSetZeroBlob(pCtx->pOut, (int)n); + return SQLITE_OK; +#else + return sqlite3VdbeMemSetZeroBlob(pCtx->pOut, (int)n); +#endif +} +SQLITE_API void sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){ + pCtx->isError = errCode ? errCode : -1; +#ifdef SQLITE_DEBUG + if( pCtx->pVdbe ) pCtx->pVdbe->rcApp = errCode; +#endif + if( pCtx->pOut->flags & MEM_Null ){ + sqlite3VdbeMemSetStr(pCtx->pOut, sqlite3ErrStr(errCode), -1, + SQLITE_UTF8, SQLITE_STATIC); + } +} + +/* Force an SQLITE_TOOBIG error. */ +SQLITE_API void sqlite3_result_error_toobig(sqlite3_context *pCtx){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + pCtx->isError = SQLITE_TOOBIG; + sqlite3VdbeMemSetStr(pCtx->pOut, "string or blob too big", -1, + SQLITE_UTF8, SQLITE_STATIC); +} + +/* An SQLITE_NOMEM error. */ +SQLITE_API void sqlite3_result_error_nomem(sqlite3_context *pCtx){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + sqlite3VdbeMemSetNull(pCtx->pOut); + pCtx->isError = SQLITE_NOMEM_BKPT; + sqlite3OomFault(pCtx->pOut->db); +} + +#ifndef SQLITE_UNTESTABLE +/* Force the INT64 value currently stored as the result to be +** a MEM_IntReal value. See the SQLITE_TESTCTRL_RESULT_INTREAL +** test-control. +*/ +SQLITE_PRIVATE void sqlite3ResultIntReal(sqlite3_context *pCtx){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + if( pCtx->pOut->flags & MEM_Int ){ + pCtx->pOut->flags &= ~MEM_Int; + pCtx->pOut->flags |= MEM_IntReal; + } +} +#endif + + +/* +** This function is called after a transaction has been committed. It +** invokes callbacks registered with sqlite3_wal_hook() as required. +*/ +static int doWalCallbacks(sqlite3 *db){ + int rc = SQLITE_OK; +#ifndef SQLITE_OMIT_WAL + int i; + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + int nEntry; + sqlite3BtreeEnter(pBt); + nEntry = sqlite3PagerWalCallback(sqlite3BtreePager(pBt)); + sqlite3BtreeLeave(pBt); + if( nEntry>0 && db->xWalCallback && rc==SQLITE_OK ){ + rc = db->xWalCallback(db->pWalArg, db, db->aDb[i].zDbSName, nEntry); + } + } + } +#endif + return rc; +} + + +/* +** Execute the statement pStmt, either until a row of data is ready, the +** statement is completely executed or an error occurs. +** +** This routine implements the bulk of the logic behind the sqlite_step() +** API. The only thing omitted is the automatic recompile if a +** schema change has occurred. That detail is handled by the +** outer sqlite3_step() wrapper procedure. +*/ +static int sqlite3Step(Vdbe *p){ + sqlite3 *db; + int rc; + + assert(p); + if( p->iVdbeMagic!=VDBE_MAGIC_RUN ){ + /* We used to require that sqlite3_reset() be called before retrying + ** sqlite3_step() after any error or after SQLITE_DONE. But beginning + ** with version 3.7.0, we changed this so that sqlite3_reset() would + ** be called automatically instead of throwing the SQLITE_MISUSE error. + ** This "automatic-reset" change is not technically an incompatibility, + ** since any application that receives an SQLITE_MISUSE is broken by + ** definition. + ** + ** Nevertheless, some published applications that were originally written + ** for version 3.6.23 or earlier do in fact depend on SQLITE_MISUSE + ** returns, and those were broken by the automatic-reset change. As a + ** a work-around, the SQLITE_OMIT_AUTORESET compile-time restores the + ** legacy behavior of returning SQLITE_MISUSE for cases where the + ** previous sqlite3_step() returned something other than a SQLITE_LOCKED + ** or SQLITE_BUSY error. + */ +#ifdef SQLITE_OMIT_AUTORESET + if( (rc = p->rc&0xff)==SQLITE_BUSY || rc==SQLITE_LOCKED ){ + sqlite3_reset((sqlite3_stmt*)p); + }else{ + return SQLITE_MISUSE_BKPT; + } +#else + sqlite3_reset((sqlite3_stmt*)p); +#endif + } + + /* Check that malloc() has not failed. If it has, return early. */ + db = p->db; + if( db->mallocFailed ){ + p->rc = SQLITE_NOMEM; + return SQLITE_NOMEM_BKPT; + } + + if( p->pc<0 && p->expired ){ + p->rc = SQLITE_SCHEMA; + rc = SQLITE_ERROR; + if( (p->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 ){ + /* If this statement was prepared using saved SQL and an + ** error has occurred, then return the error code in p->rc to the + ** caller. Set the error code in the database handle to the same value. + */ + rc = sqlite3VdbeTransferError(p); + } + goto end_of_step; + } + if( p->pc<0 ){ + /* If there are no other statements currently running, then + ** reset the interrupt flag. This prevents a call to sqlite3_interrupt + ** from interrupting a statement that has not yet started. + */ + if( db->nVdbeActive==0 ){ + AtomicStore(&db->u1.isInterrupted, 0); + } + + assert( db->nVdbeWrite>0 || db->autoCommit==0 + || (db->nDeferredCons==0 && db->nDeferredImmCons==0) + ); + +#ifndef SQLITE_OMIT_TRACE + if( (db->mTrace & (SQLITE_TRACE_PROFILE|SQLITE_TRACE_XPROFILE))!=0 + && !db->init.busy && p->zSql ){ + sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime); + }else{ + assert( p->startTime==0 ); + } +#endif + + db->nVdbeActive++; + if( p->readOnly==0 ) db->nVdbeWrite++; + if( p->bIsReader ) db->nVdbeRead++; + p->pc = 0; + } +#ifdef SQLITE_DEBUG + p->rcApp = SQLITE_OK; +#endif +#ifndef SQLITE_OMIT_EXPLAIN + if( p->explain ){ + rc = sqlite3VdbeList(p); + }else +#endif /* SQLITE_OMIT_EXPLAIN */ + { + db->nVdbeExec++; + rc = sqlite3VdbeExec(p); + db->nVdbeExec--; + } + + if( rc!=SQLITE_ROW ){ +#ifndef SQLITE_OMIT_TRACE + /* If the statement completed successfully, invoke the profile callback */ + checkProfileCallback(db, p); +#endif + + if( rc==SQLITE_DONE && db->autoCommit ){ + assert( p->rc==SQLITE_OK ); + p->rc = doWalCallbacks(db); + if( p->rc!=SQLITE_OK ){ + rc = SQLITE_ERROR; + } + }else if( rc!=SQLITE_DONE && (p->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 ){ + /* If this statement was prepared using saved SQL and an + ** error has occurred, then return the error code in p->rc to the + ** caller. Set the error code in the database handle to the same value. + */ + rc = sqlite3VdbeTransferError(p); + } + } + + db->errCode = rc; + if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){ + p->rc = SQLITE_NOMEM_BKPT; + if( (p->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 ) rc = p->rc; + } +end_of_step: + /* There are only a limited number of result codes allowed from the + ** statements prepared using the legacy sqlite3_prepare() interface */ + assert( (p->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 + || rc==SQLITE_ROW || rc==SQLITE_DONE || rc==SQLITE_ERROR + || (rc&0xff)==SQLITE_BUSY || rc==SQLITE_MISUSE + ); + return (rc&db->errMask); +} + +/* +** This is the top-level implementation of sqlite3_step(). Call +** sqlite3Step() to do most of the work. If a schema error occurs, +** call sqlite3Reprepare() and try again. +*/ +SQLITE_API int sqlite3_step(sqlite3_stmt *pStmt){ + int rc = SQLITE_OK; /* Result from sqlite3Step() */ + Vdbe *v = (Vdbe*)pStmt; /* the prepared statement */ + int cnt = 0; /* Counter to prevent infinite loop of reprepares */ + sqlite3 *db; /* The database connection */ + + if( vdbeSafetyNotNull(v) ){ + return SQLITE_MISUSE_BKPT; + } + db = v->db; + sqlite3_mutex_enter(db->mutex); + v->doingRerun = 0; + while( (rc = sqlite3Step(v))==SQLITE_SCHEMA + && cnt++ < SQLITE_MAX_SCHEMA_RETRY ){ + int savedPc = v->pc; + rc = sqlite3Reprepare(v); + if( rc!=SQLITE_OK ){ + /* This case occurs after failing to recompile an sql statement. + ** The error message from the SQL compiler has already been loaded + ** into the database handle. This block copies the error message + ** from the database handle into the statement and sets the statement + ** program counter to 0 to ensure that when the statement is + ** finalized or reset the parser error message is available via + ** sqlite3_errmsg() and sqlite3_errcode(). + */ + const char *zErr = (const char *)sqlite3_value_text(db->pErr); + sqlite3DbFree(db, v->zErrMsg); + if( !db->mallocFailed ){ + v->zErrMsg = sqlite3DbStrDup(db, zErr); + v->rc = rc = sqlite3ApiExit(db, rc); + } else { + v->zErrMsg = 0; + v->rc = rc = SQLITE_NOMEM_BKPT; + } + break; + } + sqlite3_reset(pStmt); + if( savedPc>=0 ) v->doingRerun = 1; + assert( v->expired==0 ); + } + sqlite3_mutex_leave(db->mutex); + return rc; +} + + +/* +** Extract the user data from a sqlite3_context structure and return a +** pointer to it. +*/ +SQLITE_API void *sqlite3_user_data(sqlite3_context *p){ + assert( p && p->pFunc ); + return p->pFunc->pUserData; +} + +/* +** Extract the user data from a sqlite3_context structure and return a +** pointer to it. +** +** IMPLEMENTATION-OF: R-46798-50301 The sqlite3_context_db_handle() interface +** returns a copy of the pointer to the database connection (the 1st +** parameter) of the sqlite3_create_function() and +** sqlite3_create_function16() routines that originally registered the +** application defined function. +*/ +SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){ + assert( p && p->pOut ); + return p->pOut->db; +} + +/* +** If this routine is invoked from within an xColumn method of a virtual +** table, then it returns true if and only if the the call is during an +** UPDATE operation and the value of the column will not be modified +** by the UPDATE. +** +** If this routine is called from any context other than within the +** xColumn method of a virtual table, then the return value is meaningless +** and arbitrary. +** +** Virtual table implements might use this routine to optimize their +** performance by substituting a NULL result, or some other light-weight +** value, as a signal to the xUpdate routine that the column is unchanged. +*/ +SQLITE_API int sqlite3_vtab_nochange(sqlite3_context *p){ + assert( p ); + return sqlite3_value_nochange(p->pOut); +} + +/* +** Implementation of sqlite3_vtab_in_first() (if bNext==0) and +** sqlite3_vtab_in_next() (if bNext!=0). +*/ +static int valueFromValueList( + sqlite3_value *pVal, /* Pointer to the ValueList object */ + sqlite3_value **ppOut, /* Store the next value from the list here */ + int bNext /* 1 for _next(). 0 for _first() */ +){ + int rc; + ValueList *pRhs; + + *ppOut = 0; + if( pVal==0 ) return SQLITE_MISUSE; + pRhs = (ValueList*)sqlite3_value_pointer(pVal, "ValueList"); + if( pRhs==0 ) return SQLITE_MISUSE; + if( bNext ){ + rc = sqlite3BtreeNext(pRhs->pCsr, 0); + }else{ + int dummy = 0; + rc = sqlite3BtreeFirst(pRhs->pCsr, &dummy); + assert( rc==SQLITE_OK || sqlite3BtreeEof(pRhs->pCsr) ); + if( sqlite3BtreeEof(pRhs->pCsr) ) rc = SQLITE_DONE; + } + if( rc==SQLITE_OK ){ + u32 sz; /* Size of current row in bytes */ + Mem sMem; /* Raw content of current row */ + memset(&sMem, 0, sizeof(sMem)); + sz = sqlite3BtreePayloadSize(pRhs->pCsr); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pRhs->pCsr,(int)sz,&sMem); + if( rc==SQLITE_OK ){ + u8 *zBuf = (u8*)sMem.z; + u32 iSerial; + sqlite3_value *pOut = pRhs->pOut; + int iOff = 1 + getVarint32(&zBuf[1], iSerial); + sqlite3VdbeSerialGet(&zBuf[iOff], iSerial, pOut); + pOut->enc = ENC(pOut->db); + if( (pOut->flags & MEM_Ephem)!=0 && sqlite3VdbeMemMakeWriteable(pOut) ){ + rc = SQLITE_NOMEM; + }else{ + *ppOut = pOut; + } + } + sqlite3VdbeMemRelease(&sMem); + } + return rc; +} + +/* +** Set the iterator value pVal to point to the first value in the set. +** Set (*ppOut) to point to this value before returning. +*/ +SQLITE_API int sqlite3_vtab_in_first(sqlite3_value *pVal, sqlite3_value **ppOut){ + return valueFromValueList(pVal, ppOut, 0); +} + +/* +** Set the iterator value pVal to point to the next value in the set. +** Set (*ppOut) to point to this value before returning. +*/ +SQLITE_API int sqlite3_vtab_in_next(sqlite3_value *pVal, sqlite3_value **ppOut){ + return valueFromValueList(pVal, ppOut, 1); +} + +/* +** Return the current time for a statement. If the current time +** is requested more than once within the same run of a single prepared +** statement, the exact same time is returned for each invocation regardless +** of the amount of time that elapses between invocations. In other words, +** the time returned is always the time of the first call. +*/ +SQLITE_PRIVATE sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context *p){ + int rc; +#ifndef SQLITE_ENABLE_STAT4 + sqlite3_int64 *piTime = &p->pVdbe->iCurrentTime; + assert( p->pVdbe!=0 ); +#else + sqlite3_int64 iTime = 0; + sqlite3_int64 *piTime = p->pVdbe!=0 ? &p->pVdbe->iCurrentTime : &iTime; +#endif + if( *piTime==0 ){ + rc = sqlite3OsCurrentTimeInt64(p->pOut->db->pVfs, piTime); + if( rc ) *piTime = 0; + } + return *piTime; +} + +/* +** Create a new aggregate context for p and return a pointer to +** its pMem->z element. +*/ +static SQLITE_NOINLINE void *createAggContext(sqlite3_context *p, int nByte){ + Mem *pMem = p->pMem; + assert( (pMem->flags & MEM_Agg)==0 ); + if( nByte<=0 ){ + sqlite3VdbeMemSetNull(pMem); + pMem->z = 0; + }else{ + sqlite3VdbeMemClearAndResize(pMem, nByte); + pMem->flags = MEM_Agg; + pMem->u.pDef = p->pFunc; + if( pMem->z ){ + memset(pMem->z, 0, nByte); + } + } + return (void*)pMem->z; +} + +/* +** Allocate or return the aggregate context for a user function. A new +** context is allocated on the first call. Subsequent calls return the +** same context that was returned on prior calls. +*/ +SQLITE_API void *sqlite3_aggregate_context(sqlite3_context *p, int nByte){ + assert( p && p->pFunc && p->pFunc->xFinalize ); + assert( sqlite3_mutex_held(p->pOut->db->mutex) ); + testcase( nByte<0 ); + if( (p->pMem->flags & MEM_Agg)==0 ){ + return createAggContext(p, nByte); + }else{ + return (void*)p->pMem->z; + } +} + +/* +** Return the auxiliary data pointer, if any, for the iArg'th argument to +** the user-function defined by pCtx. +** +** The left-most argument is 0. +** +** Undocumented behavior: If iArg is negative then access a cache of +** auxiliary data pointers that is available to all functions within a +** single prepared statement. The iArg values must match. +*/ +SQLITE_API void *sqlite3_get_auxdata(sqlite3_context *pCtx, int iArg){ + AuxData *pAuxData; + + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); +#if SQLITE_ENABLE_STAT4 + if( pCtx->pVdbe==0 ) return 0; +#else + assert( pCtx->pVdbe!=0 ); +#endif + for(pAuxData=pCtx->pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNextAux){ + if( pAuxData->iAuxArg==iArg && (pAuxData->iAuxOp==pCtx->iOp || iArg<0) ){ + return pAuxData->pAux; + } + } + return 0; +} + +/* +** Set the auxiliary data pointer and delete function, for the iArg'th +** argument to the user-function defined by pCtx. Any previous value is +** deleted by calling the delete function specified when it was set. +** +** The left-most argument is 0. +** +** Undocumented behavior: If iArg is negative then make the data available +** to all functions within the current prepared statement using iArg as an +** access code. +*/ +SQLITE_API void sqlite3_set_auxdata( + sqlite3_context *pCtx, + int iArg, + void *pAux, + void (*xDelete)(void*) +){ + AuxData *pAuxData; + Vdbe *pVdbe = pCtx->pVdbe; + + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); +#ifdef SQLITE_ENABLE_STAT4 + if( pVdbe==0 ) goto failed; +#else + assert( pVdbe!=0 ); +#endif + + for(pAuxData=pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNextAux){ + if( pAuxData->iAuxArg==iArg && (pAuxData->iAuxOp==pCtx->iOp || iArg<0) ){ + break; + } + } + if( pAuxData==0 ){ + pAuxData = sqlite3DbMallocZero(pVdbe->db, sizeof(AuxData)); + if( !pAuxData ) goto failed; + pAuxData->iAuxOp = pCtx->iOp; + pAuxData->iAuxArg = iArg; + pAuxData->pNextAux = pVdbe->pAuxData; + pVdbe->pAuxData = pAuxData; + if( pCtx->isError==0 ) pCtx->isError = -1; + }else if( pAuxData->xDeleteAux ){ + pAuxData->xDeleteAux(pAuxData->pAux); + } + + pAuxData->pAux = pAux; + pAuxData->xDeleteAux = xDelete; + return; + +failed: + if( xDelete ){ + xDelete(pAux); + } +} + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** Return the number of times the Step function of an aggregate has been +** called. +** +** This function is deprecated. Do not use it for new code. It is +** provide only to avoid breaking legacy code. New aggregate function +** implementations should keep their own counts within their aggregate +** context. +*/ +SQLITE_API int sqlite3_aggregate_count(sqlite3_context *p){ + assert( p && p->pMem && p->pFunc && p->pFunc->xFinalize ); + return p->pMem->n; +} +#endif + +/* +** Return the number of columns in the result set for the statement pStmt. +*/ +SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt){ + Vdbe *pVm = (Vdbe *)pStmt; + return pVm ? pVm->nResColumn : 0; +} + +/* +** Return the number of values available from the current row of the +** currently executing statement pStmt. +*/ +SQLITE_API int sqlite3_data_count(sqlite3_stmt *pStmt){ + Vdbe *pVm = (Vdbe *)pStmt; + if( pVm==0 || pVm->pResultSet==0 ) return 0; + return pVm->nResColumn; +} + +/* +** Return a pointer to static memory containing an SQL NULL value. +*/ +static const Mem *columnNullValue(void){ + /* Even though the Mem structure contains an element + ** of type i64, on certain architectures (x86) with certain compiler + ** switches (-Os), gcc may align this Mem object on a 4-byte boundary + ** instead of an 8-byte one. This all works fine, except that when + ** running with SQLITE_DEBUG defined the SQLite code sometimes assert()s + ** that a Mem structure is located on an 8-byte boundary. To prevent + ** these assert()s from failing, when building with SQLITE_DEBUG defined + ** using gcc, we force nullMem to be 8-byte aligned using the magical + ** __attribute__((aligned(8))) macro. */ + static const Mem nullMem +#if defined(SQLITE_DEBUG) && defined(__GNUC__) + __attribute__((aligned(8))) +#endif + = { + /* .u = */ {0}, + /* .flags = */ (u16)MEM_Null, + /* .enc = */ (u8)0, + /* .eSubtype = */ (u8)0, + /* .n = */ (int)0, + /* .z = */ (char*)0, + /* .zMalloc = */ (char*)0, + /* .szMalloc = */ (int)0, + /* .uTemp = */ (u32)0, + /* .db = */ (sqlite3*)0, + /* .xDel = */ (void(*)(void*))0, +#ifdef SQLITE_DEBUG + /* .pScopyFrom = */ (Mem*)0, + /* .mScopyFlags= */ 0, +#endif + }; + return &nullMem; +} + +/* +** Check to see if column iCol of the given statement is valid. If +** it is, return a pointer to the Mem for the value of that column. +** If iCol is not valid, return a pointer to a Mem which has a value +** of NULL. +*/ +static Mem *columnMem(sqlite3_stmt *pStmt, int i){ + Vdbe *pVm; + Mem *pOut; + + pVm = (Vdbe *)pStmt; + if( pVm==0 ) return (Mem*)columnNullValue(); + assert( pVm->db ); + sqlite3_mutex_enter(pVm->db->mutex); + if( pVm->pResultSet!=0 && inResColumn && i>=0 ){ + pOut = &pVm->pResultSet[i]; + }else{ + sqlite3Error(pVm->db, SQLITE_RANGE); + pOut = (Mem*)columnNullValue(); + } + return pOut; +} + +/* +** This function is called after invoking an sqlite3_value_XXX function on a +** column value (i.e. a value returned by evaluating an SQL expression in the +** select list of a SELECT statement) that may cause a malloc() failure. If +** malloc() has failed, the threads mallocFailed flag is cleared and the result +** code of statement pStmt set to SQLITE_NOMEM. +** +** Specifically, this is called from within: +** +** sqlite3_column_int() +** sqlite3_column_int64() +** sqlite3_column_text() +** sqlite3_column_text16() +** sqlite3_column_real() +** sqlite3_column_bytes() +** sqlite3_column_bytes16() +** sqiite3_column_blob() +*/ +static void columnMallocFailure(sqlite3_stmt *pStmt) +{ + /* If malloc() failed during an encoding conversion within an + ** sqlite3_column_XXX API, then set the return code of the statement to + ** SQLITE_NOMEM. The next call to _step() (if any) will return SQLITE_ERROR + ** and _finalize() will return NOMEM. + */ + Vdbe *p = (Vdbe *)pStmt; + if( p ){ + assert( p->db!=0 ); + assert( sqlite3_mutex_held(p->db->mutex) ); + p->rc = sqlite3ApiExit(p->db, p->rc); + sqlite3_mutex_leave(p->db->mutex); + } +} + +/**************************** sqlite3_column_ ******************************* +** The following routines are used to access elements of the current row +** in the result set. +*/ +SQLITE_API const void *sqlite3_column_blob(sqlite3_stmt *pStmt, int i){ + const void *val; + val = sqlite3_value_blob( columnMem(pStmt,i) ); + /* Even though there is no encoding conversion, value_blob() might + ** need to call malloc() to expand the result of a zeroblob() + ** expression. + */ + columnMallocFailure(pStmt); + return val; +} +SQLITE_API int sqlite3_column_bytes(sqlite3_stmt *pStmt, int i){ + int val = sqlite3_value_bytes( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +SQLITE_API int sqlite3_column_bytes16(sqlite3_stmt *pStmt, int i){ + int val = sqlite3_value_bytes16( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +SQLITE_API double sqlite3_column_double(sqlite3_stmt *pStmt, int i){ + double val = sqlite3_value_double( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +SQLITE_API int sqlite3_column_int(sqlite3_stmt *pStmt, int i){ + int val = sqlite3_value_int( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +SQLITE_API sqlite_int64 sqlite3_column_int64(sqlite3_stmt *pStmt, int i){ + sqlite_int64 val = sqlite3_value_int64( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +SQLITE_API const unsigned char *sqlite3_column_text(sqlite3_stmt *pStmt, int i){ + const unsigned char *val = sqlite3_value_text( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +SQLITE_API sqlite3_value *sqlite3_column_value(sqlite3_stmt *pStmt, int i){ + Mem *pOut = columnMem(pStmt, i); + if( pOut->flags&MEM_Static ){ + pOut->flags &= ~MEM_Static; + pOut->flags |= MEM_Ephem; + } + columnMallocFailure(pStmt); + return (sqlite3_value *)pOut; +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_column_text16(sqlite3_stmt *pStmt, int i){ + const void *val = sqlite3_value_text16( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return val; +} +#endif /* SQLITE_OMIT_UTF16 */ +SQLITE_API int sqlite3_column_type(sqlite3_stmt *pStmt, int i){ + int iType = sqlite3_value_type( columnMem(pStmt,i) ); + columnMallocFailure(pStmt); + return iType; +} + +/* +** Convert the N-th element of pStmt->pColName[] into a string using +** xFunc() then return that string. If N is out of range, return 0. +** +** There are up to 5 names for each column. useType determines which +** name is returned. Here are the names: +** +** 0 The column name as it should be displayed for output +** 1 The datatype name for the column +** 2 The name of the database that the column derives from +** 3 The name of the table that the column derives from +** 4 The name of the table column that the result column derives from +** +** If the result is not a simple column reference (if it is an expression +** or a constant) then useTypes 2, 3, and 4 return NULL. +*/ +static const void *columnName( + sqlite3_stmt *pStmt, /* The statement */ + int N, /* Which column to get the name for */ + int useUtf16, /* True to return the name as UTF16 */ + int useType /* What type of name */ +){ + const void *ret; + Vdbe *p; + int n; + sqlite3 *db; +#ifdef SQLITE_ENABLE_API_ARMOR + if( pStmt==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + ret = 0; + p = (Vdbe *)pStmt; + db = p->db; + assert( db!=0 ); + n = sqlite3_column_count(pStmt); + if( N=0 ){ + N += useType*n; + sqlite3_mutex_enter(db->mutex); + assert( db->mallocFailed==0 ); +#ifndef SQLITE_OMIT_UTF16 + if( useUtf16 ){ + ret = sqlite3_value_text16((sqlite3_value*)&p->aColName[N]); + }else +#endif + { + ret = sqlite3_value_text((sqlite3_value*)&p->aColName[N]); + } + /* A malloc may have failed inside of the _text() call. If this + ** is the case, clear the mallocFailed flag and return NULL. + */ + if( db->mallocFailed ){ + sqlite3OomClear(db); + ret = 0; + } + sqlite3_mutex_leave(db->mutex); + } + return ret; +} + +/* +** Return the name of the Nth column of the result set returned by SQL +** statement pStmt. +*/ +SQLITE_API const char *sqlite3_column_name(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 0, COLNAME_NAME); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_column_name16(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 1, COLNAME_NAME); +} +#endif + +/* +** Constraint: If you have ENABLE_COLUMN_METADATA then you must +** not define OMIT_DECLTYPE. +*/ +#if defined(SQLITE_OMIT_DECLTYPE) && defined(SQLITE_ENABLE_COLUMN_METADATA) +# error "Must not define both SQLITE_OMIT_DECLTYPE \ + and SQLITE_ENABLE_COLUMN_METADATA" +#endif + +#ifndef SQLITE_OMIT_DECLTYPE +/* +** Return the column declaration type (if applicable) of the 'i'th column +** of the result set of SQL statement pStmt. +*/ +SQLITE_API const char *sqlite3_column_decltype(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 0, COLNAME_DECLTYPE); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_column_decltype16(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 1, COLNAME_DECLTYPE); +} +#endif /* SQLITE_OMIT_UTF16 */ +#endif /* SQLITE_OMIT_DECLTYPE */ + +#ifdef SQLITE_ENABLE_COLUMN_METADATA +/* +** Return the name of the database from which a result column derives. +** NULL is returned if the result column is an expression or constant or +** anything else which is not an unambiguous reference to a database column. +*/ +SQLITE_API const char *sqlite3_column_database_name(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 0, COLNAME_DATABASE); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_column_database_name16(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 1, COLNAME_DATABASE); +} +#endif /* SQLITE_OMIT_UTF16 */ + +/* +** Return the name of the table from which a result column derives. +** NULL is returned if the result column is an expression or constant or +** anything else which is not an unambiguous reference to a database column. +*/ +SQLITE_API const char *sqlite3_column_table_name(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 0, COLNAME_TABLE); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_column_table_name16(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 1, COLNAME_TABLE); +} +#endif /* SQLITE_OMIT_UTF16 */ + +/* +** Return the name of the table column from which a result column derives. +** NULL is returned if the result column is an expression or constant or +** anything else which is not an unambiguous reference to a database column. +*/ +SQLITE_API const char *sqlite3_column_origin_name(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 0, COLNAME_COLUMN); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt *pStmt, int N){ + return columnName(pStmt, N, 1, COLNAME_COLUMN); +} +#endif /* SQLITE_OMIT_UTF16 */ +#endif /* SQLITE_ENABLE_COLUMN_METADATA */ + + +/******************************* sqlite3_bind_ *************************** +** +** Routines used to attach values to wildcards in a compiled SQL statement. +*/ +/* +** Unbind the value bound to variable i in virtual machine p. This is the +** the same as binding a NULL value to the column. If the "i" parameter is +** out of range, then SQLITE_RANGE is returned. Othewise SQLITE_OK. +** +** A successful evaluation of this routine acquires the mutex on p. +** the mutex is released if any kind of error occurs. +** +** The error code stored in database p->db is overwritten with the return +** value in any case. +*/ +static int vdbeUnbind(Vdbe *p, int i){ + Mem *pVar; + if( vdbeSafetyNotNull(p) ){ + return SQLITE_MISUSE_BKPT; + } + sqlite3_mutex_enter(p->db->mutex); + if( p->iVdbeMagic!=VDBE_MAGIC_RUN || p->pc>=0 ){ + sqlite3Error(p->db, SQLITE_MISUSE); + sqlite3_mutex_leave(p->db->mutex); + sqlite3_log(SQLITE_MISUSE, + "bind on a busy prepared statement: [%s]", p->zSql); + return SQLITE_MISUSE_BKPT; + } + if( i<1 || i>p->nVar ){ + sqlite3Error(p->db, SQLITE_RANGE); + sqlite3_mutex_leave(p->db->mutex); + return SQLITE_RANGE; + } + i--; + pVar = &p->aVar[i]; + sqlite3VdbeMemRelease(pVar); + pVar->flags = MEM_Null; + p->db->errCode = SQLITE_OK; + + /* If the bit corresponding to this variable in Vdbe.expmask is set, then + ** binding a new value to this variable invalidates the current query plan. + ** + ** IMPLEMENTATION-OF: R-57496-20354 If the specific value bound to a host + ** parameter in the WHERE clause might influence the choice of query plan + ** for a statement, then the statement will be automatically recompiled, + ** as if there had been a schema change, on the first sqlite3_step() call + ** following any change to the bindings of that parameter. + */ + assert( (p->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 || p->expmask==0 ); + if( p->expmask!=0 && (p->expmask & (i>=31 ? 0x80000000 : (u32)1<expired = 1; + } + return SQLITE_OK; +} + +/* +** Bind a text or BLOB value. +*/ +static int bindText( + sqlite3_stmt *pStmt, /* The statement to bind against */ + int i, /* Index of the parameter to bind */ + const void *zData, /* Pointer to the data to be bound */ + i64 nData, /* Number of bytes of data to be bound */ + void (*xDel)(void*), /* Destructor for the data */ + u8 encoding /* Encoding for the data */ +){ + Vdbe *p = (Vdbe *)pStmt; + Mem *pVar; + int rc; + + rc = vdbeUnbind(p, i); + if( rc==SQLITE_OK ){ + if( zData!=0 ){ + pVar = &p->aVar[i-1]; + rc = sqlite3VdbeMemSetStr(pVar, zData, nData, encoding, xDel); + if( rc==SQLITE_OK && encoding!=0 ){ + rc = sqlite3VdbeChangeEncoding(pVar, ENC(p->db)); + } + if( rc ){ + sqlite3Error(p->db, rc); + rc = sqlite3ApiExit(p->db, rc); + } + } + sqlite3_mutex_leave(p->db->mutex); + }else if( xDel!=SQLITE_STATIC && xDel!=SQLITE_TRANSIENT ){ + xDel((void*)zData); + } + return rc; +} + + +/* +** Bind a blob value to an SQL statement variable. +*/ +SQLITE_API int sqlite3_bind_blob( + sqlite3_stmt *pStmt, + int i, + const void *zData, + int nData, + void (*xDel)(void*) +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( nData<0 ) return SQLITE_MISUSE_BKPT; +#endif + return bindText(pStmt, i, zData, nData, xDel, 0); +} +SQLITE_API int sqlite3_bind_blob64( + sqlite3_stmt *pStmt, + int i, + const void *zData, + sqlite3_uint64 nData, + void (*xDel)(void*) +){ + assert( xDel!=SQLITE_DYNAMIC ); + return bindText(pStmt, i, zData, nData, xDel, 0); +} +SQLITE_API int sqlite3_bind_double(sqlite3_stmt *pStmt, int i, double rValue){ + int rc; + Vdbe *p = (Vdbe *)pStmt; + rc = vdbeUnbind(p, i); + if( rc==SQLITE_OK ){ + sqlite3VdbeMemSetDouble(&p->aVar[i-1], rValue); + sqlite3_mutex_leave(p->db->mutex); + } + return rc; +} +SQLITE_API int sqlite3_bind_int(sqlite3_stmt *p, int i, int iValue){ + return sqlite3_bind_int64(p, i, (i64)iValue); +} +SQLITE_API int sqlite3_bind_int64(sqlite3_stmt *pStmt, int i, sqlite_int64 iValue){ + int rc; + Vdbe *p = (Vdbe *)pStmt; + rc = vdbeUnbind(p, i); + if( rc==SQLITE_OK ){ + sqlite3VdbeMemSetInt64(&p->aVar[i-1], iValue); + sqlite3_mutex_leave(p->db->mutex); + } + return rc; +} +SQLITE_API int sqlite3_bind_null(sqlite3_stmt *pStmt, int i){ + int rc; + Vdbe *p = (Vdbe*)pStmt; + rc = vdbeUnbind(p, i); + if( rc==SQLITE_OK ){ + sqlite3_mutex_leave(p->db->mutex); + } + return rc; +} +SQLITE_API int sqlite3_bind_pointer( + sqlite3_stmt *pStmt, + int i, + void *pPtr, + const char *zPTtype, + void (*xDestructor)(void*) +){ + int rc; + Vdbe *p = (Vdbe*)pStmt; + rc = vdbeUnbind(p, i); + if( rc==SQLITE_OK ){ + sqlite3VdbeMemSetPointer(&p->aVar[i-1], pPtr, zPTtype, xDestructor); + sqlite3_mutex_leave(p->db->mutex); + }else if( xDestructor ){ + xDestructor(pPtr); + } + return rc; +} +SQLITE_API int sqlite3_bind_text( + sqlite3_stmt *pStmt, + int i, + const char *zData, + int nData, + void (*xDel)(void*) +){ + return bindText(pStmt, i, zData, nData, xDel, SQLITE_UTF8); +} +SQLITE_API int sqlite3_bind_text64( + sqlite3_stmt *pStmt, + int i, + const char *zData, + sqlite3_uint64 nData, + void (*xDel)(void*), + unsigned char enc +){ + assert( xDel!=SQLITE_DYNAMIC ); + if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE; + return bindText(pStmt, i, zData, nData, xDel, enc); +} +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API int sqlite3_bind_text16( + sqlite3_stmt *pStmt, + int i, + const void *zData, + int nData, + void (*xDel)(void*) +){ + return bindText(pStmt, i, zData, nData, xDel, SQLITE_UTF16NATIVE); +} +#endif /* SQLITE_OMIT_UTF16 */ +SQLITE_API int sqlite3_bind_value(sqlite3_stmt *pStmt, int i, const sqlite3_value *pValue){ + int rc; + switch( sqlite3_value_type((sqlite3_value*)pValue) ){ + case SQLITE_INTEGER: { + rc = sqlite3_bind_int64(pStmt, i, pValue->u.i); + break; + } + case SQLITE_FLOAT: { + assert( pValue->flags & (MEM_Real|MEM_IntReal) ); + rc = sqlite3_bind_double(pStmt, i, + (pValue->flags & MEM_Real) ? pValue->u.r : (double)pValue->u.i + ); + break; + } + case SQLITE_BLOB: { + if( pValue->flags & MEM_Zero ){ + rc = sqlite3_bind_zeroblob(pStmt, i, pValue->u.nZero); + }else{ + rc = sqlite3_bind_blob(pStmt, i, pValue->z, pValue->n,SQLITE_TRANSIENT); + } + break; + } + case SQLITE_TEXT: { + rc = bindText(pStmt,i, pValue->z, pValue->n, SQLITE_TRANSIENT, + pValue->enc); + break; + } + default: { + rc = sqlite3_bind_null(pStmt, i); + break; + } + } + return rc; +} +SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, int n){ + int rc; + Vdbe *p = (Vdbe *)pStmt; + rc = vdbeUnbind(p, i); + if( rc==SQLITE_OK ){ +#ifndef SQLITE_OMIT_INCRBLOB + sqlite3VdbeMemSetZeroBlob(&p->aVar[i-1], n); +#else + rc = sqlite3VdbeMemSetZeroBlob(&p->aVar[i-1], n); +#endif + sqlite3_mutex_leave(p->db->mutex); + } + return rc; +} +SQLITE_API int sqlite3_bind_zeroblob64(sqlite3_stmt *pStmt, int i, sqlite3_uint64 n){ + int rc; + Vdbe *p = (Vdbe *)pStmt; + sqlite3_mutex_enter(p->db->mutex); + if( n>(u64)p->db->aLimit[SQLITE_LIMIT_LENGTH] ){ + rc = SQLITE_TOOBIG; + }else{ + assert( (n & 0x7FFFFFFF)==n ); + rc = sqlite3_bind_zeroblob(pStmt, i, n); + } + rc = sqlite3ApiExit(p->db, rc); + sqlite3_mutex_leave(p->db->mutex); + return rc; +} + +/* +** Return the number of wildcards that can be potentially bound to. +** This routine is added to support DBD::SQLite. +*/ +SQLITE_API int sqlite3_bind_parameter_count(sqlite3_stmt *pStmt){ + Vdbe *p = (Vdbe*)pStmt; + return p ? p->nVar : 0; +} + +/* +** Return the name of a wildcard parameter. Return NULL if the index +** is out of range or if the wildcard is unnamed. +** +** The result is always UTF-8. +*/ +SQLITE_API const char *sqlite3_bind_parameter_name(sqlite3_stmt *pStmt, int i){ + Vdbe *p = (Vdbe*)pStmt; + if( p==0 ) return 0; + return sqlite3VListNumToName(p->pVList, i); +} + +/* +** Given a wildcard parameter name, return the index of the variable +** with that name. If there is no variable with the given name, +** return 0. +*/ +SQLITE_PRIVATE int sqlite3VdbeParameterIndex(Vdbe *p, const char *zName, int nName){ + if( p==0 || zName==0 ) return 0; + return sqlite3VListNameToNum(p->pVList, zName, nName); +} +SQLITE_API int sqlite3_bind_parameter_index(sqlite3_stmt *pStmt, const char *zName){ + return sqlite3VdbeParameterIndex((Vdbe*)pStmt, zName, sqlite3Strlen30(zName)); +} + +/* +** Transfer all bindings from the first statement over to the second. +*/ +SQLITE_PRIVATE int sqlite3TransferBindings(sqlite3_stmt *pFromStmt, sqlite3_stmt *pToStmt){ + Vdbe *pFrom = (Vdbe*)pFromStmt; + Vdbe *pTo = (Vdbe*)pToStmt; + int i; + assert( pTo->db==pFrom->db ); + assert( pTo->nVar==pFrom->nVar ); + sqlite3_mutex_enter(pTo->db->mutex); + for(i=0; inVar; i++){ + sqlite3VdbeMemMove(&pTo->aVar[i], &pFrom->aVar[i]); + } + sqlite3_mutex_leave(pTo->db->mutex); + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** Deprecated external interface. Internal/core SQLite code +** should call sqlite3TransferBindings. +** +** It is misuse to call this routine with statements from different +** database connections. But as this is a deprecated interface, we +** will not bother to check for that condition. +** +** If the two statements contain a different number of bindings, then +** an SQLITE_ERROR is returned. Nothing else can go wrong, so otherwise +** SQLITE_OK is returned. +*/ +SQLITE_API int sqlite3_transfer_bindings(sqlite3_stmt *pFromStmt, sqlite3_stmt *pToStmt){ + Vdbe *pFrom = (Vdbe*)pFromStmt; + Vdbe *pTo = (Vdbe*)pToStmt; + if( pFrom->nVar!=pTo->nVar ){ + return SQLITE_ERROR; + } + assert( (pTo->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 || pTo->expmask==0 ); + if( pTo->expmask ){ + pTo->expired = 1; + } + assert( (pFrom->prepFlags & SQLITE_PREPARE_SAVESQL)!=0 || pFrom->expmask==0 ); + if( pFrom->expmask ){ + pFrom->expired = 1; + } + return sqlite3TransferBindings(pFromStmt, pToStmt); +} +#endif + +/* +** Return the sqlite3* database handle to which the prepared statement given +** in the argument belongs. This is the same database handle that was +** the first argument to the sqlite3_prepare() that was used to create +** the statement in the first place. +*/ +SQLITE_API sqlite3 *sqlite3_db_handle(sqlite3_stmt *pStmt){ + return pStmt ? ((Vdbe*)pStmt)->db : 0; +} + +/* +** Return true if the prepared statement is guaranteed to not modify the +** database. +*/ +SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt){ + return pStmt ? ((Vdbe*)pStmt)->readOnly : 1; +} + +/* +** Return 1 if the statement is an EXPLAIN and return 2 if the +** statement is an EXPLAIN QUERY PLAN +*/ +SQLITE_API int sqlite3_stmt_isexplain(sqlite3_stmt *pStmt){ + return pStmt ? ((Vdbe*)pStmt)->explain : 0; +} + +/* +** Return true if the prepared statement is in need of being reset. +*/ +SQLITE_API int sqlite3_stmt_busy(sqlite3_stmt *pStmt){ + Vdbe *v = (Vdbe*)pStmt; + return v!=0 && v->iVdbeMagic==VDBE_MAGIC_RUN && v->pc>=0; +} + +/* +** Return a pointer to the next prepared statement after pStmt associated +** with database connection pDb. If pStmt is NULL, return the first +** prepared statement for the database connection. Return NULL if there +** are no more. +*/ +SQLITE_API sqlite3_stmt *sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt){ + sqlite3_stmt *pNext; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(pDb) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(pDb->mutex); + if( pStmt==0 ){ + pNext = (sqlite3_stmt*)pDb->pVdbe; + }else{ + pNext = (sqlite3_stmt*)((Vdbe*)pStmt)->pNext; + } + sqlite3_mutex_leave(pDb->mutex); + return pNext; +} + +/* +** Return the value of a status counter for a prepared statement +*/ +SQLITE_API int sqlite3_stmt_status(sqlite3_stmt *pStmt, int op, int resetFlag){ + Vdbe *pVdbe = (Vdbe*)pStmt; + u32 v; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !pStmt + || (op!=SQLITE_STMTSTATUS_MEMUSED && (op<0||op>=ArraySize(pVdbe->aCounter))) + ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + if( op==SQLITE_STMTSTATUS_MEMUSED ){ + sqlite3 *db = pVdbe->db; + sqlite3_mutex_enter(db->mutex); + v = 0; + db->pnBytesFreed = (int*)&v; + sqlite3VdbeClearObject(db, pVdbe); + sqlite3DbFree(db, pVdbe); + db->pnBytesFreed = 0; + sqlite3_mutex_leave(db->mutex); + }else{ + v = pVdbe->aCounter[op]; + if( resetFlag ) pVdbe->aCounter[op] = 0; + } + return (int)v; +} + +/* +** Return the SQL associated with a prepared statement +*/ +SQLITE_API const char *sqlite3_sql(sqlite3_stmt *pStmt){ + Vdbe *p = (Vdbe *)pStmt; + return p ? p->zSql : 0; +} + +/* +** Return the SQL associated with a prepared statement with +** bound parameters expanded. Space to hold the returned string is +** obtained from sqlite3_malloc(). The caller is responsible for +** freeing the returned string by passing it to sqlite3_free(). +** +** The SQLITE_TRACE_SIZE_LIMIT puts an upper bound on the size of +** expanded bound parameters. +*/ +SQLITE_API char *sqlite3_expanded_sql(sqlite3_stmt *pStmt){ +#ifdef SQLITE_OMIT_TRACE + return 0; +#else + char *z = 0; + const char *zSql = sqlite3_sql(pStmt); + if( zSql ){ + Vdbe *p = (Vdbe *)pStmt; + sqlite3_mutex_enter(p->db->mutex); + z = sqlite3VdbeExpandSql(p, zSql); + sqlite3_mutex_leave(p->db->mutex); + } + return z; +#endif +} + +#ifdef SQLITE_ENABLE_NORMALIZE +/* +** Return the normalized SQL associated with a prepared statement. +*/ +SQLITE_API const char *sqlite3_normalized_sql(sqlite3_stmt *pStmt){ + Vdbe *p = (Vdbe *)pStmt; + if( p==0 ) return 0; + if( p->zNormSql==0 && ALWAYS(p->zSql!=0) ){ + sqlite3_mutex_enter(p->db->mutex); + p->zNormSql = sqlite3Normalize(p, p->zSql); + sqlite3_mutex_leave(p->db->mutex); + } + return p->zNormSql; +} +#endif /* SQLITE_ENABLE_NORMALIZE */ + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** Allocate and populate an UnpackedRecord structure based on the serialized +** record in nKey/pKey. Return a pointer to the new UnpackedRecord structure +** if successful, or a NULL pointer if an OOM error is encountered. +*/ +static UnpackedRecord *vdbeUnpackRecord( + KeyInfo *pKeyInfo, + int nKey, + const void *pKey +){ + UnpackedRecord *pRet; /* Return value */ + + pRet = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pRet ){ + memset(pRet->aMem, 0, sizeof(Mem)*(pKeyInfo->nKeyField+1)); + sqlite3VdbeRecordUnpack(pKeyInfo, nKey, pKey, pRet); + } + return pRet; +} + +/* +** This function is called from within a pre-update callback to retrieve +** a field of the row currently being updated or deleted. +*/ +SQLITE_API int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppValue){ + PreUpdate *p = db->pPreUpdate; + Mem *pMem; + int rc = SQLITE_OK; + + /* Test that this call is being made from within an SQLITE_DELETE or + ** SQLITE_UPDATE pre-update callback, and that iIdx is within range. */ + if( !p || p->op==SQLITE_INSERT ){ + rc = SQLITE_MISUSE_BKPT; + goto preupdate_old_out; + } + if( p->pPk ){ + iIdx = sqlite3TableColumnToIndex(p->pPk, iIdx); + } + if( iIdx>=p->pCsr->nField || iIdx<0 ){ + rc = SQLITE_RANGE; + goto preupdate_old_out; + } + + /* If the old.* record has not yet been loaded into memory, do so now. */ + if( p->pUnpacked==0 ){ + u32 nRec; + u8 *aRec; + + assert( p->pCsr->eCurType==CURTYPE_BTREE ); + nRec = sqlite3BtreePayloadSize(p->pCsr->uc.pCursor); + aRec = sqlite3DbMallocRaw(db, nRec); + if( !aRec ) goto preupdate_old_out; + rc = sqlite3BtreePayload(p->pCsr->uc.pCursor, 0, nRec, aRec); + if( rc==SQLITE_OK ){ + p->pUnpacked = vdbeUnpackRecord(&p->keyinfo, nRec, aRec); + if( !p->pUnpacked ) rc = SQLITE_NOMEM; + } + if( rc!=SQLITE_OK ){ + sqlite3DbFree(db, aRec); + goto preupdate_old_out; + } + p->aRecord = aRec; + } + + pMem = *ppValue = &p->pUnpacked->aMem[iIdx]; + if( iIdx==p->pTab->iPKey ){ + sqlite3VdbeMemSetInt64(pMem, p->iKey1); + }else if( iIdx>=p->pUnpacked->nField ){ + *ppValue = (sqlite3_value *)columnNullValue(); + }else if( p->pTab->aCol[iIdx].affinity==SQLITE_AFF_REAL ){ + if( pMem->flags & (MEM_Int|MEM_IntReal) ){ + testcase( pMem->flags & MEM_Int ); + testcase( pMem->flags & MEM_IntReal ); + sqlite3VdbeMemRealify(pMem); + } + } + + preupdate_old_out: + sqlite3Error(db, rc); + return sqlite3ApiExit(db, rc); +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** This function is called from within a pre-update callback to retrieve +** the number of columns in the row being updated, deleted or inserted. +*/ +SQLITE_API int sqlite3_preupdate_count(sqlite3 *db){ + PreUpdate *p = db->pPreUpdate; + return (p ? p->keyinfo.nKeyField : 0); +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** This function is designed to be called from within a pre-update callback +** only. It returns zero if the change that caused the callback was made +** immediately by a user SQL statement. Or, if the change was made by a +** trigger program, it returns the number of trigger programs currently +** on the stack (1 for a top-level trigger, 2 for a trigger fired by a +** top-level trigger etc.). +** +** For the purposes of the previous paragraph, a foreign key CASCADE, SET NULL +** or SET DEFAULT action is considered a trigger. +*/ +SQLITE_API int sqlite3_preupdate_depth(sqlite3 *db){ + PreUpdate *p = db->pPreUpdate; + return (p ? p->v->nFrame : 0); +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** This function is designed to be called from within a pre-update callback +** only. +*/ +SQLITE_API int sqlite3_preupdate_blobwrite(sqlite3 *db){ + PreUpdate *p = db->pPreUpdate; + return (p ? p->iBlobWrite : -1); +} +#endif + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** This function is called from within a pre-update callback to retrieve +** a field of the row currently being updated or inserted. +*/ +SQLITE_API int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppValue){ + PreUpdate *p = db->pPreUpdate; + int rc = SQLITE_OK; + Mem *pMem; + + if( !p || p->op==SQLITE_DELETE ){ + rc = SQLITE_MISUSE_BKPT; + goto preupdate_new_out; + } + if( p->pPk && p->op!=SQLITE_UPDATE ){ + iIdx = sqlite3TableColumnToIndex(p->pPk, iIdx); + } + if( iIdx>=p->pCsr->nField || iIdx<0 ){ + rc = SQLITE_RANGE; + goto preupdate_new_out; + } + + if( p->op==SQLITE_INSERT ){ + /* For an INSERT, memory cell p->iNewReg contains the serialized record + ** that is being inserted. Deserialize it. */ + UnpackedRecord *pUnpack = p->pNewUnpacked; + if( !pUnpack ){ + Mem *pData = &p->v->aMem[p->iNewReg]; + rc = ExpandBlob(pData); + if( rc!=SQLITE_OK ) goto preupdate_new_out; + pUnpack = vdbeUnpackRecord(&p->keyinfo, pData->n, pData->z); + if( !pUnpack ){ + rc = SQLITE_NOMEM; + goto preupdate_new_out; + } + p->pNewUnpacked = pUnpack; + } + pMem = &pUnpack->aMem[iIdx]; + if( iIdx==p->pTab->iPKey ){ + sqlite3VdbeMemSetInt64(pMem, p->iKey2); + }else if( iIdx>=pUnpack->nField ){ + pMem = (sqlite3_value *)columnNullValue(); + } + }else{ + /* For an UPDATE, memory cell (p->iNewReg+1+iIdx) contains the required + ** value. Make a copy of the cell contents and return a pointer to it. + ** It is not safe to return a pointer to the memory cell itself as the + ** caller may modify the value text encoding. + */ + assert( p->op==SQLITE_UPDATE ); + if( !p->aNew ){ + p->aNew = (Mem *)sqlite3DbMallocZero(db, sizeof(Mem) * p->pCsr->nField); + if( !p->aNew ){ + rc = SQLITE_NOMEM; + goto preupdate_new_out; + } + } + assert( iIdx>=0 && iIdxpCsr->nField ); + pMem = &p->aNew[iIdx]; + if( pMem->flags==0 ){ + if( iIdx==p->pTab->iPKey ){ + sqlite3VdbeMemSetInt64(pMem, p->iKey2); + }else{ + rc = sqlite3VdbeMemCopy(pMem, &p->v->aMem[p->iNewReg+1+iIdx]); + if( rc!=SQLITE_OK ) goto preupdate_new_out; + } + } + } + *ppValue = pMem; + + preupdate_new_out: + sqlite3Error(db, rc); + return sqlite3ApiExit(db, rc); +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +/* +** Return status data for a single loop within query pStmt. +*/ +SQLITE_API int sqlite3_stmt_scanstatus( + sqlite3_stmt *pStmt, /* Prepared statement being queried */ + int idx, /* Index of loop to report on */ + int iScanStatusOp, /* Which metric to return */ + void *pOut /* OUT: Write the answer here */ +){ + Vdbe *p = (Vdbe*)pStmt; + ScanStatus *pScan; + if( idx<0 || idx>=p->nScan ) return 1; + pScan = &p->aScan[idx]; + switch( iScanStatusOp ){ + case SQLITE_SCANSTAT_NLOOP: { + *(sqlite3_int64*)pOut = p->anExec[pScan->addrLoop]; + break; + } + case SQLITE_SCANSTAT_NVISIT: { + *(sqlite3_int64*)pOut = p->anExec[pScan->addrVisit]; + break; + } + case SQLITE_SCANSTAT_EST: { + double r = 1.0; + LogEst x = pScan->nEst; + while( x<100 ){ + x += 10; + r *= 0.5; + } + *(double*)pOut = r*sqlite3LogEstToInt(x); + break; + } + case SQLITE_SCANSTAT_NAME: { + *(const char**)pOut = pScan->zName; + break; + } + case SQLITE_SCANSTAT_EXPLAIN: { + if( pScan->addrExplain ){ + *(const char**)pOut = p->aOp[ pScan->addrExplain ].p4.z; + }else{ + *(const char**)pOut = 0; + } + break; + } + case SQLITE_SCANSTAT_SELECTID: { + if( pScan->addrExplain ){ + *(int*)pOut = p->aOp[ pScan->addrExplain ].p1; + }else{ + *(int*)pOut = -1; + } + break; + } + default: { + return 1; + } + } + return 0; +} + +/* +** Zero all counters associated with the sqlite3_stmt_scanstatus() data. +*/ +SQLITE_API void sqlite3_stmt_scanstatus_reset(sqlite3_stmt *pStmt){ + Vdbe *p = (Vdbe*)pStmt; + memset(p->anExec, 0, p->nOp * sizeof(i64)); +} +#endif /* SQLITE_ENABLE_STMT_SCANSTATUS */ + +/************** End of vdbeapi.c *********************************************/ +/************** Begin file vdbetrace.c ***************************************/ +/* +** 2009 November 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code used to insert the values of host parameters +** (aka "wildcards") into the SQL text output by sqlite3_trace(). +** +** The Vdbe parse-tree explainer is also found here. +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +#ifndef SQLITE_OMIT_TRACE + +/* +** zSql is a zero-terminated string of UTF-8 SQL text. Return the number of +** bytes in this text up to but excluding the first character in +** a host parameter. If the text contains no host parameters, return +** the total number of bytes in the text. +*/ +static int findNextHostParameter(const char *zSql, int *pnToken){ + int tokenType; + int nTotal = 0; + int n; + + *pnToken = 0; + while( zSql[0] ){ + n = sqlite3GetToken((u8*)zSql, &tokenType); + assert( n>0 && tokenType!=TK_ILLEGAL ); + if( tokenType==TK_VARIABLE ){ + *pnToken = n; + break; + } + nTotal += n; + zSql += n; + } + return nTotal; +} + +/* +** This function returns a pointer to a nul-terminated string in memory +** obtained from sqlite3DbMalloc(). If sqlite3.nVdbeExec is 1, then the +** string contains a copy of zRawSql but with host parameters expanded to +** their current bindings. Or, if sqlite3.nVdbeExec is greater than 1, +** then the returned string holds a copy of zRawSql with "-- " prepended +** to each line of text. +** +** If the SQLITE_TRACE_SIZE_LIMIT macro is defined to an integer, then +** then long strings and blobs are truncated to that many bytes. This +** can be used to prevent unreasonably large trace strings when dealing +** with large (multi-megabyte) strings and blobs. +** +** The calling function is responsible for making sure the memory returned +** is eventually freed. +** +** ALGORITHM: Scan the input string looking for host parameters in any of +** these forms: ?, ?N, $A, @A, :A. Take care to avoid text within +** string literals, quoted identifier names, and comments. For text forms, +** the host parameter index is found by scanning the prepared +** statement for the corresponding OP_Variable opcode. Once the host +** parameter index is known, locate the value in p->aVar[]. Then render +** the value as a literal in place of the host parameter name. +*/ +SQLITE_PRIVATE char *sqlite3VdbeExpandSql( + Vdbe *p, /* The prepared statement being evaluated */ + const char *zRawSql /* Raw text of the SQL statement */ +){ + sqlite3 *db; /* The database connection */ + int idx = 0; /* Index of a host parameter */ + int nextIndex = 1; /* Index of next ? host parameter */ + int n; /* Length of a token prefix */ + int nToken; /* Length of the parameter token */ + int i; /* Loop counter */ + Mem *pVar; /* Value of a host parameter */ + StrAccum out; /* Accumulate the output here */ +#ifndef SQLITE_OMIT_UTF16 + Mem utf8; /* Used to convert UTF16 into UTF8 for display */ +#endif + + db = p->db; + sqlite3StrAccumInit(&out, 0, 0, 0, db->aLimit[SQLITE_LIMIT_LENGTH]); + if( db->nVdbeExec>1 ){ + while( *zRawSql ){ + const char *zStart = zRawSql; + while( *(zRawSql++)!='\n' && *zRawSql ); + sqlite3_str_append(&out, "-- ", 3); + assert( (zRawSql - zStart) > 0 ); + sqlite3_str_append(&out, zStart, (int)(zRawSql-zStart)); + } + }else if( p->nVar==0 ){ + sqlite3_str_append(&out, zRawSql, sqlite3Strlen30(zRawSql)); + }else{ + while( zRawSql[0] ){ + n = findNextHostParameter(zRawSql, &nToken); + assert( n>0 ); + sqlite3_str_append(&out, zRawSql, n); + zRawSql += n; + assert( zRawSql[0] || nToken==0 ); + if( nToken==0 ) break; + if( zRawSql[0]=='?' ){ + if( nToken>1 ){ + assert( sqlite3Isdigit(zRawSql[1]) ); + sqlite3GetInt32(&zRawSql[1], &idx); + }else{ + idx = nextIndex; + } + }else{ + assert( zRawSql[0]==':' || zRawSql[0]=='$' || + zRawSql[0]=='@' || zRawSql[0]=='#' ); + testcase( zRawSql[0]==':' ); + testcase( zRawSql[0]=='$' ); + testcase( zRawSql[0]=='@' ); + testcase( zRawSql[0]=='#' ); + idx = sqlite3VdbeParameterIndex(p, zRawSql, nToken); + assert( idx>0 ); + } + zRawSql += nToken; + nextIndex = MAX(idx + 1, nextIndex); + assert( idx>0 && idx<=p->nVar ); + pVar = &p->aVar[idx-1]; + if( pVar->flags & MEM_Null ){ + sqlite3_str_append(&out, "NULL", 4); + }else if( pVar->flags & (MEM_Int|MEM_IntReal) ){ + sqlite3_str_appendf(&out, "%lld", pVar->u.i); + }else if( pVar->flags & MEM_Real ){ + sqlite3_str_appendf(&out, "%!.15g", pVar->u.r); + }else if( pVar->flags & MEM_Str ){ + int nOut; /* Number of bytes of the string text to include in output */ +#ifndef SQLITE_OMIT_UTF16 + u8 enc = ENC(db); + if( enc!=SQLITE_UTF8 ){ + memset(&utf8, 0, sizeof(utf8)); + utf8.db = db; + sqlite3VdbeMemSetStr(&utf8, pVar->z, pVar->n, enc, SQLITE_STATIC); + if( SQLITE_NOMEM==sqlite3VdbeChangeEncoding(&utf8, SQLITE_UTF8) ){ + out.accError = SQLITE_NOMEM; + out.nAlloc = 0; + } + pVar = &utf8; + } +#endif + nOut = pVar->n; +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOut>SQLITE_TRACE_SIZE_LIMIT ){ + nOut = SQLITE_TRACE_SIZE_LIMIT; + while( nOutn && (pVar->z[nOut]&0xc0)==0x80 ){ nOut++; } + } +#endif + sqlite3_str_appendf(&out, "'%.*q'", nOut, pVar->z); +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOutn ){ + sqlite3_str_appendf(&out, "/*+%d bytes*/", pVar->n-nOut); + } +#endif +#ifndef SQLITE_OMIT_UTF16 + if( enc!=SQLITE_UTF8 ) sqlite3VdbeMemRelease(&utf8); +#endif + }else if( pVar->flags & MEM_Zero ){ + sqlite3_str_appendf(&out, "zeroblob(%d)", pVar->u.nZero); + }else{ + int nOut; /* Number of bytes of the blob to include in output */ + assert( pVar->flags & MEM_Blob ); + sqlite3_str_append(&out, "x'", 2); + nOut = pVar->n; +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOut>SQLITE_TRACE_SIZE_LIMIT ) nOut = SQLITE_TRACE_SIZE_LIMIT; +#endif + for(i=0; iz[i]&0xff); + } + sqlite3_str_append(&out, "'", 1); +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOutn ){ + sqlite3_str_appendf(&out, "/*+%d bytes*/", pVar->n-nOut); + } +#endif + } + } + } + if( out.accError ) sqlite3_str_reset(&out); + return sqlite3StrAccumFinish(&out); +} + +#endif /* #ifndef SQLITE_OMIT_TRACE */ + +/************** End of vdbetrace.c *******************************************/ +/************** Begin file vdbe.c ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** The code in this file implements the function that runs the +** bytecode of a prepared statement. +** +** Various scripts scan this source file in order to generate HTML +** documentation, headers files, or other derived files. The formatting +** of the code in this file is, therefore, important. See other comments +** in this file for details. If in doubt, do not deviate from existing +** commenting and indentation practices when changing or adding code. +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +/* +** Invoke this macro on memory cells just prior to changing the +** value of the cell. This macro verifies that shallow copies are +** not misused. A shallow copy of a string or blob just copies a +** pointer to the string or blob, not the content. If the original +** is changed while the copy is still in use, the string or blob might +** be changed out from under the copy. This macro verifies that nothing +** like that ever happens. +*/ +#ifdef SQLITE_DEBUG +# define memAboutToChange(P,M) sqlite3VdbeMemAboutToChange(P,M) +#else +# define memAboutToChange(P,M) +#endif + +/* +** The following global variable is incremented every time a cursor +** moves, either by the OP_SeekXX, OP_Next, or OP_Prev opcodes. The test +** procedures use this information to make sure that indices are +** working correctly. This variable has no function other than to +** help verify the correct operation of the library. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_search_count = 0; +#endif + +/* +** When this global variable is positive, it gets decremented once before +** each instruction in the VDBE. When it reaches zero, the u1.isInterrupted +** field of the sqlite3 structure is set in order to simulate an interrupt. +** +** This facility is used for testing purposes only. It does not function +** in an ordinary build. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_interrupt_count = 0; +#endif + +/* +** The next global variable is incremented each type the OP_Sort opcode +** is executed. The test procedures use this information to make sure that +** sorting is occurring or not occurring at appropriate times. This variable +** has no function other than to help verify the correct operation of the +** library. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_sort_count = 0; +#endif + +/* +** The next global variable records the size of the largest MEM_Blob +** or MEM_Str that has been used by a VDBE opcode. The test procedures +** use this information to make sure that the zero-blob functionality +** is working correctly. This variable has no function other than to +** help verify the correct operation of the library. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_max_blobsize = 0; +static void updateMaxBlobsize(Mem *p){ + if( (p->flags & (MEM_Str|MEM_Blob))!=0 && p->n>sqlite3_max_blobsize ){ + sqlite3_max_blobsize = p->n; + } +} +#endif + +/* +** This macro evaluates to true if either the update hook or the preupdate +** hook are enabled for database connect DB. +*/ +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +# define HAS_UPDATE_HOOK(DB) ((DB)->xPreUpdateCallback||(DB)->xUpdateCallback) +#else +# define HAS_UPDATE_HOOK(DB) ((DB)->xUpdateCallback) +#endif + +/* +** The next global variable is incremented each time the OP_Found opcode +** is executed. This is used to test whether or not the foreign key +** operation implemented using OP_FkIsZero is working. This variable +** has no function other than to help verify the correct operation of the +** library. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_found_count = 0; +#endif + +/* +** Test a register to see if it exceeds the current maximum blob size. +** If it does, record the new maximum blob size. +*/ +#if defined(SQLITE_TEST) && !defined(SQLITE_UNTESTABLE) +# define UPDATE_MAX_BLOBSIZE(P) updateMaxBlobsize(P) +#else +# define UPDATE_MAX_BLOBSIZE(P) +#endif + +#ifdef SQLITE_DEBUG +/* This routine provides a convenient place to set a breakpoint during +** tracing with PRAGMA vdbe_trace=on. The breakpoint fires right after +** each opcode is printed. Variables "pc" (program counter) and pOp are +** available to add conditionals to the breakpoint. GDB example: +** +** break test_trace_breakpoint if pc=22 +** +** Other useful labels for breakpoints include: +** test_addop_breakpoint(pc,pOp) +** sqlite3CorruptError(lineno) +** sqlite3MisuseError(lineno) +** sqlite3CantopenError(lineno) +*/ +static void test_trace_breakpoint(int pc, Op *pOp, Vdbe *v){ + static int n = 0; + n++; +} +#endif + +/* +** Invoke the VDBE coverage callback, if that callback is defined. This +** feature is used for test suite validation only and does not appear an +** production builds. +** +** M is the type of branch. I is the direction taken for this instance of +** the branch. +** +** M: 2 - two-way branch (I=0: fall-thru 1: jump ) +** 3 - two-way + NULL (I=0: fall-thru 1: jump 2: NULL ) +** 4 - OP_Jump (I=0: jump p1 1: jump p2 2: jump p3) +** +** In other words, if M is 2, then I is either 0 (for fall-through) or +** 1 (for when the branch is taken). If M is 3, the I is 0 for an +** ordinary fall-through, I is 1 if the branch was taken, and I is 2 +** if the result of comparison is NULL. For M=3, I=2 the jump may or +** may not be taken, depending on the SQLITE_JUMPIFNULL flags in p5. +** When M is 4, that means that an OP_Jump is being run. I is 0, 1, or 2 +** depending on if the operands are less than, equal, or greater than. +** +** iSrcLine is the source code line (from the __LINE__ macro) that +** generated the VDBE instruction combined with flag bits. The source +** code line number is in the lower 24 bits of iSrcLine and the upper +** 8 bytes are flags. The lower three bits of the flags indicate +** values for I that should never occur. For example, if the branch is +** always taken, the flags should be 0x05 since the fall-through and +** alternate branch are never taken. If a branch is never taken then +** flags should be 0x06 since only the fall-through approach is allowed. +** +** Bit 0x08 of the flags indicates an OP_Jump opcode that is only +** interested in equal or not-equal. In other words, I==0 and I==2 +** should be treated as equivalent +** +** Since only a line number is retained, not the filename, this macro +** only works for amalgamation builds. But that is ok, since these macros +** should be no-ops except for special builds used to measure test coverage. +*/ +#if !defined(SQLITE_VDBE_COVERAGE) +# define VdbeBranchTaken(I,M) +#else +# define VdbeBranchTaken(I,M) vdbeTakeBranch(pOp->iSrcLine,I,M) + static void vdbeTakeBranch(u32 iSrcLine, u8 I, u8 M){ + u8 mNever; + assert( I<=2 ); /* 0: fall through, 1: taken, 2: alternate taken */ + assert( M<=4 ); /* 2: two-way branch, 3: three-way branch, 4: OP_Jump */ + assert( I> 24; + assert( (I & mNever)==0 ); + if( sqlite3GlobalConfig.xVdbeBranch==0 ) return; /*NO_TEST*/ + /* Invoke the branch coverage callback with three arguments: + ** iSrcLine - the line number of the VdbeCoverage() macro, with + ** flags removed. + ** I - Mask of bits 0x07 indicating which cases are are + ** fulfilled by this instance of the jump. 0x01 means + ** fall-thru, 0x02 means taken, 0x04 means NULL. Any + ** impossible cases (ex: if the comparison is never NULL) + ** are filled in automatically so that the coverage + ** measurement logic does not flag those impossible cases + ** as missed coverage. + ** M - Type of jump. Same as M argument above + */ + I |= mNever; + if( M==2 ) I |= 0x04; + if( M==4 ){ + I |= 0x08; + if( (mNever&0x08)!=0 && (I&0x05)!=0) I |= 0x05; /*NO_TEST*/ + } + sqlite3GlobalConfig.xVdbeBranch(sqlite3GlobalConfig.pVdbeBranchArg, + iSrcLine&0xffffff, I, M); + } +#endif + +/* +** An ephemeral string value (signified by the MEM_Ephem flag) contains +** a pointer to a dynamically allocated string where some other entity +** is responsible for deallocating that string. Because the register +** does not control the string, it might be deleted without the register +** knowing it. +** +** This routine converts an ephemeral string into a dynamically allocated +** string that the register itself controls. In other words, it +** converts an MEM_Ephem string into a string with P.z==P.zMalloc. +*/ +#define Deephemeralize(P) \ + if( ((P)->flags&MEM_Ephem)!=0 \ + && sqlite3VdbeMemMakeWriteable(P) ){ goto no_mem;} + +/* Return true if the cursor was opened using the OP_OpenSorter opcode. */ +#define isSorter(x) ((x)->eCurType==CURTYPE_SORTER) + +/* +** Allocate VdbeCursor number iCur. Return a pointer to it. Return NULL +** if we run out of memory. +*/ +static VdbeCursor *allocateCursor( + Vdbe *p, /* The virtual machine */ + int iCur, /* Index of the new VdbeCursor */ + int nField, /* Number of fields in the table or index */ + u8 eCurType /* Type of the new cursor */ +){ + /* Find the memory cell that will be used to store the blob of memory + ** required for this VdbeCursor structure. It is convenient to use a + ** vdbe memory cell to manage the memory allocation required for a + ** VdbeCursor structure for the following reasons: + ** + ** * Sometimes cursor numbers are used for a couple of different + ** purposes in a vdbe program. The different uses might require + ** different sized allocations. Memory cells provide growable + ** allocations. + ** + ** * When using ENABLE_MEMORY_MANAGEMENT, memory cell buffers can + ** be freed lazily via the sqlite3_release_memory() API. This + ** minimizes the number of malloc calls made by the system. + ** + ** The memory cell for cursor 0 is aMem[0]. The rest are allocated from + ** the top of the register space. Cursor 1 is at Mem[p->nMem-1]. + ** Cursor 2 is at Mem[p->nMem-2]. And so forth. + */ + Mem *pMem = iCur>0 ? &p->aMem[p->nMem-iCur] : p->aMem; + + int nByte; + VdbeCursor *pCx = 0; + nByte = + ROUND8(sizeof(VdbeCursor)) + 2*sizeof(u32)*nField + + (eCurType==CURTYPE_BTREE?sqlite3BtreeCursorSize():0); + + assert( iCur>=0 && iCurnCursor ); + if( p->apCsr[iCur] ){ /*OPTIMIZATION-IF-FALSE*/ + sqlite3VdbeFreeCursor(p, p->apCsr[iCur]); + p->apCsr[iCur] = 0; + } + + /* There used to be a call to sqlite3VdbeMemClearAndResize() to make sure + ** the pMem used to hold space for the cursor has enough storage available + ** in pMem->zMalloc. But for the special case of the aMem[] entries used + ** to hold cursors, it is faster to in-line the logic. */ + assert( pMem->flags==MEM_Undefined ); + assert( (pMem->flags & MEM_Dyn)==0 ); + assert( pMem->szMalloc==0 || pMem->z==pMem->zMalloc ); + if( pMem->szMallocszMalloc>0 ){ + sqlite3DbFreeNN(pMem->db, pMem->zMalloc); + } + pMem->z = pMem->zMalloc = sqlite3DbMallocRaw(pMem->db, nByte); + if( pMem->zMalloc==0 ){ + pMem->szMalloc = 0; + return 0; + } + pMem->szMalloc = nByte; + } + + p->apCsr[iCur] = pCx = (VdbeCursor*)pMem->zMalloc; + memset(pCx, 0, offsetof(VdbeCursor,pAltCursor)); + pCx->eCurType = eCurType; + pCx->nField = nField; + pCx->aOffset = &pCx->aType[nField]; + if( eCurType==CURTYPE_BTREE ){ + pCx->uc.pCursor = (BtCursor*) + &pMem->z[ROUND8(sizeof(VdbeCursor))+2*sizeof(u32)*nField]; + sqlite3BtreeCursorZero(pCx->uc.pCursor); + } + return pCx; +} + +/* +** The string in pRec is known to look like an integer and to have a +** floating point value of rValue. Return true and set *piValue to the +** integer value if the string is in range to be an integer. Otherwise, +** return false. +*/ +static int alsoAnInt(Mem *pRec, double rValue, i64 *piValue){ + i64 iValue = (double)rValue; + if( sqlite3RealSameAsInt(rValue,iValue) ){ + *piValue = iValue; + return 1; + } + return 0==sqlite3Atoi64(pRec->z, piValue, pRec->n, pRec->enc); +} + +/* +** Try to convert a value into a numeric representation if we can +** do so without loss of information. In other words, if the string +** looks like a number, convert it into a number. If it does not +** look like a number, leave it alone. +** +** If the bTryForInt flag is true, then extra effort is made to give +** an integer representation. Strings that look like floating point +** values but which have no fractional component (example: '48.00') +** will have a MEM_Int representation when bTryForInt is true. +** +** If bTryForInt is false, then if the input string contains a decimal +** point or exponential notation, the result is only MEM_Real, even +** if there is an exact integer representation of the quantity. +*/ +static void applyNumericAffinity(Mem *pRec, int bTryForInt){ + double rValue; + u8 enc = pRec->enc; + int rc; + assert( (pRec->flags & (MEM_Str|MEM_Int|MEM_Real|MEM_IntReal))==MEM_Str ); + rc = sqlite3AtoF(pRec->z, &rValue, pRec->n, enc); + if( rc<=0 ) return; + if( rc==1 && alsoAnInt(pRec, rValue, &pRec->u.i) ){ + pRec->flags |= MEM_Int; + }else{ + pRec->u.r = rValue; + pRec->flags |= MEM_Real; + if( bTryForInt ) sqlite3VdbeIntegerAffinity(pRec); + } + /* TEXT->NUMERIC is many->one. Hence, it is important to invalidate the + ** string representation after computing a numeric equivalent, because the + ** string representation might not be the canonical representation for the + ** numeric value. Ticket [343634942dd54ab57b7024] 2018-01-31. */ + pRec->flags &= ~MEM_Str; +} + +/* +** Processing is determine by the affinity parameter: +** +** SQLITE_AFF_INTEGER: +** SQLITE_AFF_REAL: +** SQLITE_AFF_NUMERIC: +** Try to convert pRec to an integer representation or a +** floating-point representation if an integer representation +** is not possible. Note that the integer representation is +** always preferred, even if the affinity is REAL, because +** an integer representation is more space efficient on disk. +** +** SQLITE_AFF_TEXT: +** Convert pRec to a text representation. +** +** SQLITE_AFF_BLOB: +** SQLITE_AFF_NONE: +** No-op. pRec is unchanged. +*/ +static void applyAffinity( + Mem *pRec, /* The value to apply affinity to */ + char affinity, /* The affinity to be applied */ + u8 enc /* Use this text encoding */ +){ + if( affinity>=SQLITE_AFF_NUMERIC ){ + assert( affinity==SQLITE_AFF_INTEGER || affinity==SQLITE_AFF_REAL + || affinity==SQLITE_AFF_NUMERIC ); + if( (pRec->flags & MEM_Int)==0 ){ /*OPTIMIZATION-IF-FALSE*/ + if( (pRec->flags & MEM_Real)==0 ){ + if( pRec->flags & MEM_Str ) applyNumericAffinity(pRec,1); + }else{ + sqlite3VdbeIntegerAffinity(pRec); + } + } + }else if( affinity==SQLITE_AFF_TEXT ){ + /* Only attempt the conversion to TEXT if there is an integer or real + ** representation (blob and NULL do not get converted) but no string + ** representation. It would be harmless to repeat the conversion if + ** there is already a string rep, but it is pointless to waste those + ** CPU cycles. */ + if( 0==(pRec->flags&MEM_Str) ){ /*OPTIMIZATION-IF-FALSE*/ + if( (pRec->flags&(MEM_Real|MEM_Int|MEM_IntReal)) ){ + testcase( pRec->flags & MEM_Int ); + testcase( pRec->flags & MEM_Real ); + testcase( pRec->flags & MEM_IntReal ); + sqlite3VdbeMemStringify(pRec, enc, 1); + } + } + pRec->flags &= ~(MEM_Real|MEM_Int|MEM_IntReal); + } +} + +/* +** Try to convert the type of a function argument or a result column +** into a numeric representation. Use either INTEGER or REAL whichever +** is appropriate. But only do the conversion if it is possible without +** loss of information and return the revised type of the argument. +*/ +SQLITE_API int sqlite3_value_numeric_type(sqlite3_value *pVal){ + int eType = sqlite3_value_type(pVal); + if( eType==SQLITE_TEXT ){ + Mem *pMem = (Mem*)pVal; + applyNumericAffinity(pMem, 0); + eType = sqlite3_value_type(pVal); + } + return eType; +} + +/* +** Exported version of applyAffinity(). This one works on sqlite3_value*, +** not the internal Mem* type. +*/ +SQLITE_PRIVATE void sqlite3ValueApplyAffinity( + sqlite3_value *pVal, + u8 affinity, + u8 enc +){ + applyAffinity((Mem *)pVal, affinity, enc); +} + +/* +** pMem currently only holds a string type (or maybe a BLOB that we can +** interpret as a string if we want to). Compute its corresponding +** numeric type, if has one. Set the pMem->u.r and pMem->u.i fields +** accordingly. +*/ +static u16 SQLITE_NOINLINE computeNumericType(Mem *pMem){ + int rc; + sqlite3_int64 ix; + assert( (pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal))==0 ); + assert( (pMem->flags & (MEM_Str|MEM_Blob))!=0 ); + if( ExpandBlob(pMem) ){ + pMem->u.i = 0; + return MEM_Int; + } + rc = sqlite3AtoF(pMem->z, &pMem->u.r, pMem->n, pMem->enc); + if( rc<=0 ){ + if( rc==0 && sqlite3Atoi64(pMem->z, &ix, pMem->n, pMem->enc)<=1 ){ + pMem->u.i = ix; + return MEM_Int; + }else{ + return MEM_Real; + } + }else if( rc==1 && sqlite3Atoi64(pMem->z, &ix, pMem->n, pMem->enc)==0 ){ + pMem->u.i = ix; + return MEM_Int; + } + return MEM_Real; +} + +/* +** Return the numeric type for pMem, either MEM_Int or MEM_Real or both or +** none. +** +** Unlike applyNumericAffinity(), this routine does not modify pMem->flags. +** But it does set pMem->u.r and pMem->u.i appropriately. +*/ +static u16 numericType(Mem *pMem){ + if( pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal) ){ + testcase( pMem->flags & MEM_Int ); + testcase( pMem->flags & MEM_Real ); + testcase( pMem->flags & MEM_IntReal ); + return pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal); + } + if( pMem->flags & (MEM_Str|MEM_Blob) ){ + testcase( pMem->flags & MEM_Str ); + testcase( pMem->flags & MEM_Blob ); + return computeNumericType(pMem); + } + return 0; +} + +#ifdef SQLITE_DEBUG +/* +** Write a nice string representation of the contents of cell pMem +** into buffer zBuf, length nBuf. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemPrettyPrint(Mem *pMem, StrAccum *pStr){ + int f = pMem->flags; + static const char *const encnames[] = {"(X)", "(8)", "(16LE)", "(16BE)"}; + if( f&MEM_Blob ){ + int i; + char c; + if( f & MEM_Dyn ){ + c = 'z'; + assert( (f & (MEM_Static|MEM_Ephem))==0 ); + }else if( f & MEM_Static ){ + c = 't'; + assert( (f & (MEM_Dyn|MEM_Ephem))==0 ); + }else if( f & MEM_Ephem ){ + c = 'e'; + assert( (f & (MEM_Static|MEM_Dyn))==0 ); + }else{ + c = 's'; + } + sqlite3_str_appendf(pStr, "%cx[", c); + for(i=0; i<25 && in; i++){ + sqlite3_str_appendf(pStr, "%02X", ((int)pMem->z[i] & 0xFF)); + } + sqlite3_str_appendf(pStr, "|"); + for(i=0; i<25 && in; i++){ + char z = pMem->z[i]; + sqlite3_str_appendchar(pStr, 1, (z<32||z>126)?'.':z); + } + sqlite3_str_appendf(pStr,"]"); + if( f & MEM_Zero ){ + sqlite3_str_appendf(pStr, "+%dz",pMem->u.nZero); + } + }else if( f & MEM_Str ){ + int j; + u8 c; + if( f & MEM_Dyn ){ + c = 'z'; + assert( (f & (MEM_Static|MEM_Ephem))==0 ); + }else if( f & MEM_Static ){ + c = 't'; + assert( (f & (MEM_Dyn|MEM_Ephem))==0 ); + }else if( f & MEM_Ephem ){ + c = 'e'; + assert( (f & (MEM_Static|MEM_Dyn))==0 ); + }else{ + c = 's'; + } + sqlite3_str_appendf(pStr, " %c%d[", c, pMem->n); + for(j=0; j<25 && jn; j++){ + c = pMem->z[j]; + sqlite3_str_appendchar(pStr, 1, (c>=0x20&&c<=0x7f) ? c : '.'); + } + sqlite3_str_appendf(pStr, "]%s", encnames[pMem->enc]); + } +} +#endif + +#ifdef SQLITE_DEBUG +/* +** Print the value of a register for tracing purposes: +*/ +static void memTracePrint(Mem *p){ + if( p->flags & MEM_Undefined ){ + printf(" undefined"); + }else if( p->flags & MEM_Null ){ + printf(p->flags & MEM_Zero ? " NULL-nochng" : " NULL"); + }else if( (p->flags & (MEM_Int|MEM_Str))==(MEM_Int|MEM_Str) ){ + printf(" si:%lld", p->u.i); + }else if( (p->flags & (MEM_IntReal))!=0 ){ + printf(" ir:%lld", p->u.i); + }else if( p->flags & MEM_Int ){ + printf(" i:%lld", p->u.i); +#ifndef SQLITE_OMIT_FLOATING_POINT + }else if( p->flags & MEM_Real ){ + printf(" r:%.17g", p->u.r); +#endif + }else if( sqlite3VdbeMemIsRowSet(p) ){ + printf(" (rowset)"); + }else{ + StrAccum acc; + char zBuf[1000]; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + sqlite3VdbeMemPrettyPrint(p, &acc); + printf(" %s", sqlite3StrAccumFinish(&acc)); + } + if( p->flags & MEM_Subtype ) printf(" subtype=0x%02x", p->eSubtype); +} +static void registerTrace(int iReg, Mem *p){ + printf("R[%d] = ", iReg); + memTracePrint(p); + if( p->pScopyFrom ){ + printf(" <== R[%d]", (int)(p->pScopyFrom - &p[-iReg])); + } + printf("\n"); + sqlite3VdbeCheckMemInvariants(p); +} +/**/ void sqlite3PrintMem(Mem *pMem){ + memTracePrint(pMem); + printf("\n"); + fflush(stdout); +} +#endif + +#ifdef SQLITE_DEBUG +/* +** Show the values of all registers in the virtual machine. Used for +** interactive debugging. +*/ +SQLITE_PRIVATE void sqlite3VdbeRegisterDump(Vdbe *v){ + int i; + for(i=1; inMem; i++) registerTrace(i, v->aMem+i); +} +#endif /* SQLITE_DEBUG */ + + +#ifdef SQLITE_DEBUG +# define REGISTER_TRACE(R,M) if(db->flags&SQLITE_VdbeTrace)registerTrace(R,M) +#else +# define REGISTER_TRACE(R,M) +#endif + + +#ifdef VDBE_PROFILE + +/* +** hwtime.h contains inline assembler code for implementing +** high-performance timing routines. +*/ +/* #include "hwtime.h" */ + +#endif + +#ifndef NDEBUG +/* +** This function is only called from within an assert() expression. It +** checks that the sqlite3.nTransaction variable is correctly set to +** the number of non-transaction savepoints currently in the +** linked list starting at sqlite3.pSavepoint. +** +** Usage: +** +** assert( checkSavepointCount(db) ); +*/ +static int checkSavepointCount(sqlite3 *db){ + int n = 0; + Savepoint *p; + for(p=db->pSavepoint; p; p=p->pNext) n++; + assert( n==(db->nSavepoint + db->isTransactionSavepoint) ); + return 1; +} +#endif + +/* +** Return the register of pOp->p2 after first preparing it to be +** overwritten with an integer value. +*/ +static SQLITE_NOINLINE Mem *out2PrereleaseWithClear(Mem *pOut){ + sqlite3VdbeMemSetNull(pOut); + pOut->flags = MEM_Int; + return pOut; +} +static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){ + Mem *pOut; + assert( pOp->p2>0 ); + assert( pOp->p2<=(p->nMem+1 - p->nCursor) ); + pOut = &p->aMem[pOp->p2]; + memAboutToChange(p, pOut); + if( VdbeMemDynamic(pOut) ){ /*OPTIMIZATION-IF-FALSE*/ + return out2PrereleaseWithClear(pOut); + }else{ + pOut->flags = MEM_Int; + return pOut; + } +} + +/* +** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning +** with pOp->p3. Return the hash. +*/ +static u64 filterHash(const Mem *aMem, const Op *pOp){ + int i, mx; + u64 h = 0; + + assert( pOp->p4type==P4_INT32 ); + for(i=pOp->p3, mx=i+pOp->p4.i; iflags & (MEM_Int|MEM_IntReal) ){ + h += p->u.i; + }else if( p->flags & MEM_Real ){ + h += sqlite3VdbeIntValue(p); + }else if( p->flags & (MEM_Str|MEM_Blob) ){ + h += p->n; + if( p->flags & MEM_Zero ) h += p->u.nZero; + } + } + return h; +} + +/* +** Return the symbolic name for the data type of a pMem +*/ +static const char *vdbeMemTypeName(Mem *pMem){ + static const char *azTypes[] = { + /* SQLITE_INTEGER */ "INT", + /* SQLITE_FLOAT */ "REAL", + /* SQLITE_TEXT */ "TEXT", + /* SQLITE_BLOB */ "BLOB", + /* SQLITE_NULL */ "NULL" + }; + return azTypes[sqlite3_value_type(pMem)-1]; +} + +/* +** Execute as much of a VDBE program as we can. +** This is the core of sqlite3_step(). +*/ +SQLITE_PRIVATE int sqlite3VdbeExec( + Vdbe *p /* The VDBE */ +){ + Op *aOp = p->aOp; /* Copy of p->aOp */ + Op *pOp = aOp; /* Current operation */ +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) + Op *pOrigOp; /* Value of pOp at the top of the loop */ +#endif +#ifdef SQLITE_DEBUG + int nExtraDelete = 0; /* Verifies FORDELETE and AUXDELETE flags */ +#endif + int rc = SQLITE_OK; /* Value to return */ + sqlite3 *db = p->db; /* The database */ + u8 resetSchemaOnFault = 0; /* Reset schema after an error if positive */ + u8 encoding = ENC(db); /* The database encoding */ + int iCompare = 0; /* Result of last comparison */ + u64 nVmStep = 0; /* Number of virtual machine steps */ +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK + u64 nProgressLimit; /* Invoke xProgress() when nVmStep reaches this */ +#endif + Mem *aMem = p->aMem; /* Copy of p->aMem */ + Mem *pIn1 = 0; /* 1st input operand */ + Mem *pIn2 = 0; /* 2nd input operand */ + Mem *pIn3 = 0; /* 3rd input operand */ + Mem *pOut = 0; /* Output operand */ +#ifdef VDBE_PROFILE + u64 start; /* CPU clock count at start of opcode */ +#endif + /*** INSERT STACK UNION HERE ***/ + + assert( p->iVdbeMagic==VDBE_MAGIC_RUN ); /* sqlite3_step() verifies this */ + sqlite3VdbeEnter(p); +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK + if( db->xProgress ){ + u32 iPrior = p->aCounter[SQLITE_STMTSTATUS_VM_STEP]; + assert( 0 < db->nProgressOps ); + nProgressLimit = db->nProgressOps - (iPrior % db->nProgressOps); + }else{ + nProgressLimit = LARGEST_UINT64; + } +#endif + if( p->rc==SQLITE_NOMEM ){ + /* This happens if a malloc() inside a call to sqlite3_column_text() or + ** sqlite3_column_text16() failed. */ + goto no_mem; + } + assert( p->rc==SQLITE_OK || (p->rc&0xff)==SQLITE_BUSY ); + testcase( p->rc!=SQLITE_OK ); + p->rc = SQLITE_OK; + assert( p->bIsReader || p->readOnly!=0 ); + p->iCurrentTime = 0; + assert( p->explain==0 ); + p->pResultSet = 0; + db->busyHandler.nBusy = 0; + if( AtomicLoad(&db->u1.isInterrupted) ) goto abort_due_to_interrupt; + sqlite3VdbeIOTraceSql(p); +#ifdef SQLITE_DEBUG + sqlite3BeginBenignMalloc(); + if( p->pc==0 + && (p->db->flags & (SQLITE_VdbeListing|SQLITE_VdbeEQP|SQLITE_VdbeTrace))!=0 + ){ + int i; + int once = 1; + sqlite3VdbePrintSql(p); + if( p->db->flags & SQLITE_VdbeListing ){ + printf("VDBE Program Listing:\n"); + for(i=0; inOp; i++){ + sqlite3VdbePrintOp(stdout, i, &aOp[i]); + } + } + if( p->db->flags & SQLITE_VdbeEQP ){ + for(i=0; inOp; i++){ + if( aOp[i].opcode==OP_Explain ){ + if( once ) printf("VDBE Query Plan:\n"); + printf("%s\n", aOp[i].p4.z); + once = 0; + } + } + } + if( p->db->flags & SQLITE_VdbeTrace ) printf("VDBE Trace:\n"); + } + sqlite3EndBenignMalloc(); +#endif + for(pOp=&aOp[p->pc]; 1; pOp++){ + /* Errors are detected by individual opcodes, with an immediate + ** jumps to abort_due_to_error. */ + assert( rc==SQLITE_OK ); + + assert( pOp>=aOp && pOp<&aOp[p->nOp]); +#ifdef VDBE_PROFILE + start = sqlite3NProfileCnt ? sqlite3NProfileCnt : sqlite3Hwtime(); +#endif + nVmStep++; +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + if( p->anExec ) p->anExec[(int)(pOp-aOp)]++; +#endif + + /* Only allow tracing if SQLITE_DEBUG is defined. + */ +#ifdef SQLITE_DEBUG + if( db->flags & SQLITE_VdbeTrace ){ + sqlite3VdbePrintOp(stdout, (int)(pOp - aOp), pOp); + test_trace_breakpoint((int)(pOp - aOp),pOp,p); + } +#endif + + + /* Check to see if we need to simulate an interrupt. This only happens + ** if we have a special test build. + */ +#ifdef SQLITE_TEST + if( sqlite3_interrupt_count>0 ){ + sqlite3_interrupt_count--; + if( sqlite3_interrupt_count==0 ){ + sqlite3_interrupt(db); + } + } +#endif + + /* Sanity checking on other operands */ +#ifdef SQLITE_DEBUG + { + u8 opProperty = sqlite3OpcodeProperty[pOp->opcode]; + if( (opProperty & OPFLG_IN1)!=0 ){ + assert( pOp->p1>0 ); + assert( pOp->p1<=(p->nMem+1 - p->nCursor) ); + assert( memIsValid(&aMem[pOp->p1]) ); + assert( sqlite3VdbeCheckMemInvariants(&aMem[pOp->p1]) ); + REGISTER_TRACE(pOp->p1, &aMem[pOp->p1]); + } + if( (opProperty & OPFLG_IN2)!=0 ){ + assert( pOp->p2>0 ); + assert( pOp->p2<=(p->nMem+1 - p->nCursor) ); + assert( memIsValid(&aMem[pOp->p2]) ); + assert( sqlite3VdbeCheckMemInvariants(&aMem[pOp->p2]) ); + REGISTER_TRACE(pOp->p2, &aMem[pOp->p2]); + } + if( (opProperty & OPFLG_IN3)!=0 ){ + assert( pOp->p3>0 ); + assert( pOp->p3<=(p->nMem+1 - p->nCursor) ); + assert( memIsValid(&aMem[pOp->p3]) ); + assert( sqlite3VdbeCheckMemInvariants(&aMem[pOp->p3]) ); + REGISTER_TRACE(pOp->p3, &aMem[pOp->p3]); + } + if( (opProperty & OPFLG_OUT2)!=0 ){ + assert( pOp->p2>0 ); + assert( pOp->p2<=(p->nMem+1 - p->nCursor) ); + memAboutToChange(p, &aMem[pOp->p2]); + } + if( (opProperty & OPFLG_OUT3)!=0 ){ + assert( pOp->p3>0 ); + assert( pOp->p3<=(p->nMem+1 - p->nCursor) ); + memAboutToChange(p, &aMem[pOp->p3]); + } + } +#endif +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) + pOrigOp = pOp; +#endif + + switch( pOp->opcode ){ + +/***************************************************************************** +** What follows is a massive switch statement where each case implements a +** separate instruction in the virtual machine. If we follow the usual +** indentation conventions, each case should be indented by 6 spaces. But +** that is a lot of wasted space on the left margin. So the code within +** the switch statement will break with convention and be flush-left. Another +** big comment (similar to this one) will mark the point in the code where +** we transition back to normal indentation. +** +** The formatting of each case is important. The makefile for SQLite +** generates two C files "opcodes.h" and "opcodes.c" by scanning this +** file looking for lines that begin with "case OP_". The opcodes.h files +** will be filled with #defines that give unique integer values to each +** opcode and the opcodes.c file is filled with an array of strings where +** each string is the symbolic name for the corresponding opcode. If the +** case statement is followed by a comment of the form "/# same as ... #/" +** that comment is used to determine the particular value of the opcode. +** +** Other keywords in the comment that follows each case are used to +** construct the OPFLG_INITIALIZER value that initializes opcodeProperty[]. +** Keywords include: in1, in2, in3, out2, out3. See +** the mkopcodeh.awk script for additional information. +** +** Documentation about VDBE opcodes is generated by scanning this file +** for lines of that contain "Opcode:". That line and all subsequent +** comment lines are used in the generation of the opcode.html documentation +** file. +** +** SUMMARY: +** +** Formatting is important to scripts that scan this file. +** Do not deviate from the formatting style currently in use. +** +*****************************************************************************/ + +/* Opcode: Goto * P2 * * * +** +** An unconditional jump to address P2. +** The next instruction executed will be +** the one at index P2 from the beginning of +** the program. +** +** The P1 parameter is not actually used by this opcode. However, it +** is sometimes set to 1 instead of 0 as a hint to the command-line shell +** that this Goto is the bottom of a loop and that the lines from P2 down +** to the current line should be indented for EXPLAIN output. +*/ +case OP_Goto: { /* jump */ + +#ifdef SQLITE_DEBUG + /* In debuggging mode, when the p5 flags is set on an OP_Goto, that + ** means we should really jump back to the preceeding OP_ReleaseReg + ** instruction. */ + if( pOp->p5 ){ + assert( pOp->p2 < (int)(pOp - aOp) ); + assert( pOp->p2 > 1 ); + pOp = &aOp[pOp->p2 - 2]; + assert( pOp[1].opcode==OP_ReleaseReg ); + goto check_for_interrupt; + } +#endif + +jump_to_p2_and_check_for_interrupt: + pOp = &aOp[pOp->p2 - 1]; + + /* Opcodes that are used as the bottom of a loop (OP_Next, OP_Prev, + ** OP_VNext, or OP_SorterNext) all jump here upon + ** completion. Check to see if sqlite3_interrupt() has been called + ** or if the progress callback needs to be invoked. + ** + ** This code uses unstructured "goto" statements and does not look clean. + ** But that is not due to sloppy coding habits. The code is written this + ** way for performance, to avoid having to run the interrupt and progress + ** checks on every opcode. This helps sqlite3_step() to run about 1.5% + ** faster according to "valgrind --tool=cachegrind" */ +check_for_interrupt: + if( AtomicLoad(&db->u1.isInterrupted) ) goto abort_due_to_interrupt; +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK + /* Call the progress callback if it is configured and the required number + ** of VDBE ops have been executed (either since this invocation of + ** sqlite3VdbeExec() or since last time the progress callback was called). + ** If the progress callback returns non-zero, exit the virtual machine with + ** a return code SQLITE_ABORT. + */ + while( nVmStep>=nProgressLimit && db->xProgress!=0 ){ + assert( db->nProgressOps!=0 ); + nProgressLimit += db->nProgressOps; + if( db->xProgress(db->pProgressArg) ){ + nProgressLimit = LARGEST_UINT64; + rc = SQLITE_INTERRUPT; + goto abort_due_to_error; + } + } +#endif + + break; +} + +/* Opcode: Gosub P1 P2 * * * +** +** Write the current address onto register P1 +** and then jump to address P2. +*/ +case OP_Gosub: { /* jump */ + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( VdbeMemDynamic(pIn1)==0 ); + memAboutToChange(p, pIn1); + pIn1->flags = MEM_Int; + pIn1->u.i = (int)(pOp-aOp); + REGISTER_TRACE(pOp->p1, pIn1); + + /* Most jump operations do a goto to this spot in order to update + ** the pOp pointer. */ +jump_to_p2: + assert( pOp->p2>0 ); /* There are never any jumps to instruction 0 */ + assert( pOp->p2nOp ); /* Jumps must be in range */ + pOp = &aOp[pOp->p2 - 1]; + break; +} + +/* Opcode: Return P1 * * * * +** +** Jump to the next instruction after the address in register P1. After +** the jump, register P1 becomes undefined. +*/ +case OP_Return: { /* in1 */ + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags==MEM_Int ); + pOp = &aOp[pIn1->u.i]; + pIn1->flags = MEM_Undefined; + break; +} + +/* Opcode: InitCoroutine P1 P2 P3 * * +** +** Set up register P1 so that it will Yield to the coroutine +** located at address P3. +** +** If P2!=0 then the coroutine implementation immediately follows +** this opcode. So jump over the coroutine implementation to +** address P2. +** +** See also: EndCoroutine +*/ +case OP_InitCoroutine: { /* jump */ + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + assert( pOp->p2>=0 && pOp->p2nOp ); + assert( pOp->p3>=0 && pOp->p3nOp ); + pOut = &aMem[pOp->p1]; + assert( !VdbeMemDynamic(pOut) ); + pOut->u.i = pOp->p3 - 1; + pOut->flags = MEM_Int; + if( pOp->p2 ) goto jump_to_p2; + break; +} + +/* Opcode: EndCoroutine P1 * * * * +** +** The instruction at the address in register P1 is a Yield. +** Jump to the P2 parameter of that Yield. +** After the jump, register P1 becomes undefined. +** +** See also: InitCoroutine +*/ +case OP_EndCoroutine: { /* in1 */ + VdbeOp *pCaller; + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags==MEM_Int ); + assert( pIn1->u.i>=0 && pIn1->u.inOp ); + pCaller = &aOp[pIn1->u.i]; + assert( pCaller->opcode==OP_Yield ); + assert( pCaller->p2>=0 && pCaller->p2nOp ); + pOp = &aOp[pCaller->p2 - 1]; + pIn1->flags = MEM_Undefined; + break; +} + +/* Opcode: Yield P1 P2 * * * +** +** Swap the program counter with the value in register P1. This +** has the effect of yielding to a coroutine. +** +** If the coroutine that is launched by this instruction ends with +** Yield or Return then continue to the next instruction. But if +** the coroutine launched by this instruction ends with +** EndCoroutine, then jump to P2 rather than continuing with the +** next instruction. +** +** See also: InitCoroutine +*/ +case OP_Yield: { /* in1, jump */ + int pcDest; + pIn1 = &aMem[pOp->p1]; + assert( VdbeMemDynamic(pIn1)==0 ); + pIn1->flags = MEM_Int; + pcDest = (int)pIn1->u.i; + pIn1->u.i = (int)(pOp - aOp); + REGISTER_TRACE(pOp->p1, pIn1); + pOp = &aOp[pcDest]; + break; +} + +/* Opcode: HaltIfNull P1 P2 P3 P4 P5 +** Synopsis: if r[P3]=null halt +** +** Check the value in register P3. If it is NULL then Halt using +** parameter P1, P2, and P4 as if this were a Halt instruction. If the +** value in register P3 is not NULL, then this routine is a no-op. +** The P5 parameter should be 1. +*/ +case OP_HaltIfNull: { /* in3 */ + pIn3 = &aMem[pOp->p3]; +#ifdef SQLITE_DEBUG + if( pOp->p2==OE_Abort ){ sqlite3VdbeAssertAbortable(p); } +#endif + if( (pIn3->flags & MEM_Null)==0 ) break; + /* Fall through into OP_Halt */ + /* no break */ deliberate_fall_through +} + +/* Opcode: Halt P1 P2 * P4 P5 +** +** Exit immediately. All open cursors, etc are closed +** automatically. +** +** P1 is the result code returned by sqlite3_exec(), sqlite3_reset(), +** or sqlite3_finalize(). For a normal halt, this should be SQLITE_OK (0). +** For errors, it can be some other value. If P1!=0 then P2 will determine +** whether or not to rollback the current transaction. Do not rollback +** if P2==OE_Fail. Do the rollback if P2==OE_Rollback. If P2==OE_Abort, +** then back out all changes that have occurred during this execution of the +** VDBE, but do not rollback the transaction. +** +** If P4 is not null then it is an error message string. +** +** P5 is a value between 0 and 4, inclusive, that modifies the P4 string. +** +** 0: (no change) +** 1: NOT NULL contraint failed: P4 +** 2: UNIQUE constraint failed: P4 +** 3: CHECK constraint failed: P4 +** 4: FOREIGN KEY constraint failed: P4 +** +** If P5 is not zero and P4 is NULL, then everything after the ":" is +** omitted. +** +** There is an implied "Halt 0 0 0" instruction inserted at the very end of +** every program. So a jump past the last instruction of the program +** is the same as executing Halt. +*/ +case OP_Halt: { + VdbeFrame *pFrame; + int pcx; + + pcx = (int)(pOp - aOp); +#ifdef SQLITE_DEBUG + if( pOp->p2==OE_Abort ){ sqlite3VdbeAssertAbortable(p); } +#endif + if( pOp->p1==SQLITE_OK && p->pFrame ){ + /* Halt the sub-program. Return control to the parent frame. */ + pFrame = p->pFrame; + p->pFrame = pFrame->pParent; + p->nFrame--; + sqlite3VdbeSetChanges(db, p->nChange); + pcx = sqlite3VdbeFrameRestore(pFrame); + if( pOp->p2==OE_Ignore ){ + /* Instruction pcx is the OP_Program that invoked the sub-program + ** currently being halted. If the p2 instruction of this OP_Halt + ** instruction is set to OE_Ignore, then the sub-program is throwing + ** an IGNORE exception. In this case jump to the address specified + ** as the p2 of the calling OP_Program. */ + pcx = p->aOp[pcx].p2-1; + } + aOp = p->aOp; + aMem = p->aMem; + pOp = &aOp[pcx]; + break; + } + p->rc = pOp->p1; + p->errorAction = (u8)pOp->p2; + p->pc = pcx; + assert( pOp->p5<=4 ); + if( p->rc ){ + if( pOp->p5 ){ + static const char * const azType[] = { "NOT NULL", "UNIQUE", "CHECK", + "FOREIGN KEY" }; + testcase( pOp->p5==1 ); + testcase( pOp->p5==2 ); + testcase( pOp->p5==3 ); + testcase( pOp->p5==4 ); + sqlite3VdbeError(p, "%s constraint failed", azType[pOp->p5-1]); + if( pOp->p4.z ){ + p->zErrMsg = sqlite3MPrintf(db, "%z: %s", p->zErrMsg, pOp->p4.z); + } + }else{ + sqlite3VdbeError(p, "%s", pOp->p4.z); + } + sqlite3_log(pOp->p1, "abort at %d in [%s]: %s", pcx, p->zSql, p->zErrMsg); + } + rc = sqlite3VdbeHalt(p); + assert( rc==SQLITE_BUSY || rc==SQLITE_OK || rc==SQLITE_ERROR ); + if( rc==SQLITE_BUSY ){ + p->rc = SQLITE_BUSY; + }else{ + assert( rc==SQLITE_OK || (p->rc&0xff)==SQLITE_CONSTRAINT ); + assert( rc==SQLITE_OK || db->nDeferredCons>0 || db->nDeferredImmCons>0 ); + rc = p->rc ? SQLITE_ERROR : SQLITE_DONE; + } + goto vdbe_return; +} + +/* Opcode: Integer P1 P2 * * * +** Synopsis: r[P2]=P1 +** +** The 32-bit integer value P1 is written into register P2. +*/ +case OP_Integer: { /* out2 */ + pOut = out2Prerelease(p, pOp); + pOut->u.i = pOp->p1; + break; +} + +/* Opcode: Int64 * P2 * P4 * +** Synopsis: r[P2]=P4 +** +** P4 is a pointer to a 64-bit integer value. +** Write that value into register P2. +*/ +case OP_Int64: { /* out2 */ + pOut = out2Prerelease(p, pOp); + assert( pOp->p4.pI64!=0 ); + pOut->u.i = *pOp->p4.pI64; + break; +} + +#ifndef SQLITE_OMIT_FLOATING_POINT +/* Opcode: Real * P2 * P4 * +** Synopsis: r[P2]=P4 +** +** P4 is a pointer to a 64-bit floating point value. +** Write that value into register P2. +*/ +case OP_Real: { /* same as TK_FLOAT, out2 */ + pOut = out2Prerelease(p, pOp); + pOut->flags = MEM_Real; + assert( !sqlite3IsNaN(*pOp->p4.pReal) ); + pOut->u.r = *pOp->p4.pReal; + break; +} +#endif + +/* Opcode: String8 * P2 * P4 * +** Synopsis: r[P2]='P4' +** +** P4 points to a nul terminated UTF-8 string. This opcode is transformed +** into a String opcode before it is executed for the first time. During +** this transformation, the length of string P4 is computed and stored +** as the P1 parameter. +*/ +case OP_String8: { /* same as TK_STRING, out2 */ + assert( pOp->p4.z!=0 ); + pOut = out2Prerelease(p, pOp); + pOp->p1 = sqlite3Strlen30(pOp->p4.z); + +#ifndef SQLITE_OMIT_UTF16 + if( encoding!=SQLITE_UTF8 ){ + rc = sqlite3VdbeMemSetStr(pOut, pOp->p4.z, -1, SQLITE_UTF8, SQLITE_STATIC); + assert( rc==SQLITE_OK || rc==SQLITE_TOOBIG ); + if( rc ) goto too_big; + if( SQLITE_OK!=sqlite3VdbeChangeEncoding(pOut, encoding) ) goto no_mem; + assert( pOut->szMalloc>0 && pOut->zMalloc==pOut->z ); + assert( VdbeMemDynamic(pOut)==0 ); + pOut->szMalloc = 0; + pOut->flags |= MEM_Static; + if( pOp->p4type==P4_DYNAMIC ){ + sqlite3DbFree(db, pOp->p4.z); + } + pOp->p4type = P4_DYNAMIC; + pOp->p4.z = pOut->z; + pOp->p1 = pOut->n; + } +#endif + if( pOp->p1>db->aLimit[SQLITE_LIMIT_LENGTH] ){ + goto too_big; + } + pOp->opcode = OP_String; + assert( rc==SQLITE_OK ); + /* Fall through to the next case, OP_String */ + /* no break */ deliberate_fall_through +} + +/* Opcode: String P1 P2 P3 P4 P5 +** Synopsis: r[P2]='P4' (len=P1) +** +** The string value P4 of length P1 (bytes) is stored in register P2. +** +** If P3 is not zero and the content of register P3 is equal to P5, then +** the datatype of the register P2 is converted to BLOB. The content is +** the same sequence of bytes, it is merely interpreted as a BLOB instead +** of a string, as if it had been CAST. In other words: +** +** if( P3!=0 and reg[P3]==P5 ) reg[P2] := CAST(reg[P2] as BLOB) +*/ +case OP_String: { /* out2 */ + assert( pOp->p4.z!=0 ); + pOut = out2Prerelease(p, pOp); + pOut->flags = MEM_Str|MEM_Static|MEM_Term; + pOut->z = pOp->p4.z; + pOut->n = pOp->p1; + pOut->enc = encoding; + UPDATE_MAX_BLOBSIZE(pOut); +#ifndef SQLITE_LIKE_DOESNT_MATCH_BLOBS + if( pOp->p3>0 ){ + assert( pOp->p3<=(p->nMem+1 - p->nCursor) ); + pIn3 = &aMem[pOp->p3]; + assert( pIn3->flags & MEM_Int ); + if( pIn3->u.i==pOp->p5 ) pOut->flags = MEM_Blob|MEM_Static|MEM_Term; + } +#endif + break; +} + +/* Opcode: Null P1 P2 P3 * * +** Synopsis: r[P2..P3]=NULL +** +** Write a NULL into registers P2. If P3 greater than P2, then also write +** NULL into register P3 and every register in between P2 and P3. If P3 +** is less than P2 (typically P3 is zero) then only register P2 is +** set to NULL. +** +** If the P1 value is non-zero, then also set the MEM_Cleared flag so that +** NULL values will not compare equal even if SQLITE_NULLEQ is set on +** OP_Ne or OP_Eq. +*/ +case OP_Null: { /* out2 */ + int cnt; + u16 nullFlag; + pOut = out2Prerelease(p, pOp); + cnt = pOp->p3-pOp->p2; + assert( pOp->p3<=(p->nMem+1 - p->nCursor) ); + pOut->flags = nullFlag = pOp->p1 ? (MEM_Null|MEM_Cleared) : MEM_Null; + pOut->n = 0; +#ifdef SQLITE_DEBUG + pOut->uTemp = 0; +#endif + while( cnt>0 ){ + pOut++; + memAboutToChange(p, pOut); + sqlite3VdbeMemSetNull(pOut); + pOut->flags = nullFlag; + pOut->n = 0; + cnt--; + } + break; +} + +/* Opcode: SoftNull P1 * * * * +** Synopsis: r[P1]=NULL +** +** Set register P1 to have the value NULL as seen by the OP_MakeRecord +** instruction, but do not free any string or blob memory associated with +** the register, so that if the value was a string or blob that was +** previously copied using OP_SCopy, the copies will continue to be valid. +*/ +case OP_SoftNull: { + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pOut = &aMem[pOp->p1]; + pOut->flags = (pOut->flags&~(MEM_Undefined|MEM_AffMask))|MEM_Null; + break; +} + +/* Opcode: Blob P1 P2 * P4 * +** Synopsis: r[P2]=P4 (len=P1) +** +** P4 points to a blob of data P1 bytes long. Store this +** blob in register P2. If P4 is a NULL pointer, then construct +** a zero-filled blob that is P1 bytes long in P2. +*/ +case OP_Blob: { /* out2 */ + assert( pOp->p1 <= SQLITE_MAX_LENGTH ); + pOut = out2Prerelease(p, pOp); + if( pOp->p4.z==0 ){ + sqlite3VdbeMemSetZeroBlob(pOut, pOp->p1); + if( sqlite3VdbeMemExpandBlob(pOut) ) goto no_mem; + }else{ + sqlite3VdbeMemSetStr(pOut, pOp->p4.z, pOp->p1, 0, 0); + } + pOut->enc = encoding; + UPDATE_MAX_BLOBSIZE(pOut); + break; +} + +/* Opcode: Variable P1 P2 * P4 * +** Synopsis: r[P2]=parameter(P1,P4) +** +** Transfer the values of bound parameter P1 into register P2 +** +** If the parameter is named, then its name appears in P4. +** The P4 value is used by sqlite3_bind_parameter_name(). +*/ +case OP_Variable: { /* out2 */ + Mem *pVar; /* Value being transferred */ + + assert( pOp->p1>0 && pOp->p1<=p->nVar ); + assert( pOp->p4.z==0 || pOp->p4.z==sqlite3VListNumToName(p->pVList,pOp->p1) ); + pVar = &p->aVar[pOp->p1 - 1]; + if( sqlite3VdbeMemTooBig(pVar) ){ + goto too_big; + } + pOut = &aMem[pOp->p2]; + if( VdbeMemDynamic(pOut) ) sqlite3VdbeMemSetNull(pOut); + memcpy(pOut, pVar, MEMCELLSIZE); + pOut->flags &= ~(MEM_Dyn|MEM_Ephem); + pOut->flags |= MEM_Static|MEM_FromBind; + UPDATE_MAX_BLOBSIZE(pOut); + break; +} + +/* Opcode: Move P1 P2 P3 * * +** Synopsis: r[P2@P3]=r[P1@P3] +** +** Move the P3 values in register P1..P1+P3-1 over into +** registers P2..P2+P3-1. Registers P1..P1+P3-1 are +** left holding a NULL. It is an error for register ranges +** P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error +** for P3 to be less than 1. +*/ +case OP_Move: { + int n; /* Number of registers left to copy */ + int p1; /* Register to copy from */ + int p2; /* Register to copy to */ + + n = pOp->p3; + p1 = pOp->p1; + p2 = pOp->p2; + assert( n>0 && p1>0 && p2>0 ); + assert( p1+n<=p2 || p2+n<=p1 ); + + pIn1 = &aMem[p1]; + pOut = &aMem[p2]; + do{ + assert( pOut<=&aMem[(p->nMem+1 - p->nCursor)] ); + assert( pIn1<=&aMem[(p->nMem+1 - p->nCursor)] ); + assert( memIsValid(pIn1) ); + memAboutToChange(p, pOut); + sqlite3VdbeMemMove(pOut, pIn1); +#ifdef SQLITE_DEBUG + pIn1->pScopyFrom = 0; + { int i; + for(i=1; inMem; i++){ + if( aMem[i].pScopyFrom==pIn1 ){ + aMem[i].pScopyFrom = pOut; + } + } + } +#endif + Deephemeralize(pOut); + REGISTER_TRACE(p2++, pOut); + pIn1++; + pOut++; + }while( --n ); + break; +} + +/* Opcode: Copy P1 P2 P3 * * +** Synopsis: r[P2@P3+1]=r[P1@P3+1] +** +** Make a copy of registers P1..P1+P3 into registers P2..P2+P3. +** +** This instruction makes a deep copy of the value. A duplicate +** is made of any string or blob constant. See also OP_SCopy. +*/ +case OP_Copy: { + int n; + + n = pOp->p3; + pIn1 = &aMem[pOp->p1]; + pOut = &aMem[pOp->p2]; + assert( pOut!=pIn1 ); + while( 1 ){ + memAboutToChange(p, pOut); + sqlite3VdbeMemShallowCopy(pOut, pIn1, MEM_Ephem); + Deephemeralize(pOut); +#ifdef SQLITE_DEBUG + pOut->pScopyFrom = 0; +#endif + REGISTER_TRACE(pOp->p2+pOp->p3-n, pOut); + if( (n--)==0 ) break; + pOut++; + pIn1++; + } + break; +} + +/* Opcode: SCopy P1 P2 * * * +** Synopsis: r[P2]=r[P1] +** +** Make a shallow copy of register P1 into register P2. +** +** This instruction makes a shallow copy of the value. If the value +** is a string or blob, then the copy is only a pointer to the +** original and hence if the original changes so will the copy. +** Worse, if the original is deallocated, the copy becomes invalid. +** Thus the program must guarantee that the original will not change +** during the lifetime of the copy. Use OP_Copy to make a complete +** copy. +*/ +case OP_SCopy: { /* out2 */ + pIn1 = &aMem[pOp->p1]; + pOut = &aMem[pOp->p2]; + assert( pOut!=pIn1 ); + sqlite3VdbeMemShallowCopy(pOut, pIn1, MEM_Ephem); +#ifdef SQLITE_DEBUG + pOut->pScopyFrom = pIn1; + pOut->mScopyFlags = pIn1->flags; +#endif + break; +} + +/* Opcode: IntCopy P1 P2 * * * +** Synopsis: r[P2]=r[P1] +** +** Transfer the integer value held in register P1 into register P2. +** +** This is an optimized version of SCopy that works only for integer +** values. +*/ +case OP_IntCopy: { /* out2 */ + pIn1 = &aMem[pOp->p1]; + assert( (pIn1->flags & MEM_Int)!=0 ); + pOut = &aMem[pOp->p2]; + sqlite3VdbeMemSetInt64(pOut, pIn1->u.i); + break; +} + +/* Opcode: FkCheck * * * * * +** +** Halt with an SQLITE_CONSTRAINT error if there are any unresolved +** foreign key constraint violations. If there are no foreign key +** constraint violations, this is a no-op. +** +** FK constraint violations are also checked when the prepared statement +** exits. This opcode is used to raise foreign key constraint errors prior +** to returning results such as a row change count or the result of a +** RETURNING clause. +*/ +case OP_FkCheck: { + if( (rc = sqlite3VdbeCheckFk(p,0))!=SQLITE_OK ){ + goto abort_due_to_error; + } + break; +} + +/* Opcode: ResultRow P1 P2 * * * +** Synopsis: output=r[P1@P2] +** +** The registers P1 through P1+P2-1 contain a single row of +** results. This opcode causes the sqlite3_step() call to terminate +** with an SQLITE_ROW return code and it sets up the sqlite3_stmt +** structure to provide access to the r(P1)..r(P1+P2-1) values as +** the result row. +*/ +case OP_ResultRow: { + Mem *pMem; + int i; + assert( p->nResColumn==pOp->p2 ); + assert( pOp->p1>0 || CORRUPT_DB ); + assert( pOp->p1+pOp->p2<=(p->nMem+1 - p->nCursor)+1 ); + + /* Invalidate all ephemeral cursor row caches */ + p->cacheCtr = (p->cacheCtr + 2)|1; + + /* Make sure the results of the current row are \000 terminated + ** and have an assigned type. The results are de-ephemeralized as + ** a side effect. + */ + pMem = p->pResultSet = &aMem[pOp->p1]; + for(i=0; ip2; i++){ + assert( memIsValid(&pMem[i]) ); + Deephemeralize(&pMem[i]); + assert( (pMem[i].flags & MEM_Ephem)==0 + || (pMem[i].flags & (MEM_Str|MEM_Blob))==0 ); + sqlite3VdbeMemNulTerminate(&pMem[i]); + REGISTER_TRACE(pOp->p1+i, &pMem[i]); +#ifdef SQLITE_DEBUG + /* The registers in the result will not be used again when the + ** prepared statement restarts. This is because sqlite3_column() + ** APIs might have caused type conversions of made other changes to + ** the register values. Therefore, we can go ahead and break any + ** OP_SCopy dependencies. */ + pMem[i].pScopyFrom = 0; +#endif + } + if( db->mallocFailed ) goto no_mem; + + if( db->mTrace & SQLITE_TRACE_ROW ){ + db->trace.xV2(SQLITE_TRACE_ROW, db->pTraceArg, p, 0); + } + + + /* Return SQLITE_ROW + */ + p->pc = (int)(pOp - aOp) + 1; + rc = SQLITE_ROW; + goto vdbe_return; +} + +/* Opcode: Concat P1 P2 P3 * * +** Synopsis: r[P3]=r[P2]+r[P1] +** +** Add the text in register P1 onto the end of the text in +** register P2 and store the result in register P3. +** If either the P1 or P2 text are NULL then store NULL in P3. +** +** P3 = P2 || P1 +** +** It is illegal for P1 and P3 to be the same register. Sometimes, +** if P3 is the same register as P2, the implementation is able +** to avoid a memcpy(). +*/ +case OP_Concat: { /* same as TK_CONCAT, in1, in2, out3 */ + i64 nByte; /* Total size of the output string or blob */ + u16 flags1; /* Initial flags for P1 */ + u16 flags2; /* Initial flags for P2 */ + + pIn1 = &aMem[pOp->p1]; + pIn2 = &aMem[pOp->p2]; + pOut = &aMem[pOp->p3]; + testcase( pOut==pIn2 ); + assert( pIn1!=pOut ); + flags1 = pIn1->flags; + testcase( flags1 & MEM_Null ); + testcase( pIn2->flags & MEM_Null ); + if( (flags1 | pIn2->flags) & MEM_Null ){ + sqlite3VdbeMemSetNull(pOut); + break; + } + if( (flags1 & (MEM_Str|MEM_Blob))==0 ){ + if( sqlite3VdbeMemStringify(pIn1,encoding,0) ) goto no_mem; + flags1 = pIn1->flags & ~MEM_Str; + }else if( (flags1 & MEM_Zero)!=0 ){ + if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem; + flags1 = pIn1->flags & ~MEM_Str; + } + flags2 = pIn2->flags; + if( (flags2 & (MEM_Str|MEM_Blob))==0 ){ + if( sqlite3VdbeMemStringify(pIn2,encoding,0) ) goto no_mem; + flags2 = pIn2->flags & ~MEM_Str; + }else if( (flags2 & MEM_Zero)!=0 ){ + if( sqlite3VdbeMemExpandBlob(pIn2) ) goto no_mem; + flags2 = pIn2->flags & ~MEM_Str; + } + nByte = pIn1->n + pIn2->n; + if( nByte>db->aLimit[SQLITE_LIMIT_LENGTH] ){ + goto too_big; + } + if( sqlite3VdbeMemGrow(pOut, (int)nByte+3, pOut==pIn2) ){ + goto no_mem; + } + MemSetTypeFlag(pOut, MEM_Str); + if( pOut!=pIn2 ){ + memcpy(pOut->z, pIn2->z, pIn2->n); + assert( (pIn2->flags & MEM_Dyn) == (flags2 & MEM_Dyn) ); + pIn2->flags = flags2; + } + memcpy(&pOut->z[pIn2->n], pIn1->z, pIn1->n); + assert( (pIn1->flags & MEM_Dyn) == (flags1 & MEM_Dyn) ); + pIn1->flags = flags1; + pOut->z[nByte]=0; + pOut->z[nByte+1] = 0; + pOut->z[nByte+2] = 0; + pOut->flags |= MEM_Term; + pOut->n = (int)nByte; + pOut->enc = encoding; + UPDATE_MAX_BLOBSIZE(pOut); + break; +} + +/* Opcode: Add P1 P2 P3 * * +** Synopsis: r[P3]=r[P1]+r[P2] +** +** Add the value in register P1 to the value in register P2 +** and store the result in register P3. +** If either input is NULL, the result is NULL. +*/ +/* Opcode: Multiply P1 P2 P3 * * +** Synopsis: r[P3]=r[P1]*r[P2] +** +** +** Multiply the value in register P1 by the value in register P2 +** and store the result in register P3. +** If either input is NULL, the result is NULL. +*/ +/* Opcode: Subtract P1 P2 P3 * * +** Synopsis: r[P3]=r[P2]-r[P1] +** +** Subtract the value in register P1 from the value in register P2 +** and store the result in register P3. +** If either input is NULL, the result is NULL. +*/ +/* Opcode: Divide P1 P2 P3 * * +** Synopsis: r[P3]=r[P2]/r[P1] +** +** Divide the value in register P1 by the value in register P2 +** and store the result in register P3 (P3=P2/P1). If the value in +** register P1 is zero, then the result is NULL. If either input is +** NULL, the result is NULL. +*/ +/* Opcode: Remainder P1 P2 P3 * * +** Synopsis: r[P3]=r[P2]%r[P1] +** +** Compute the remainder after integer register P2 is divided by +** register P1 and store the result in register P3. +** If the value in register P1 is zero the result is NULL. +** If either operand is NULL, the result is NULL. +*/ +case OP_Add: /* same as TK_PLUS, in1, in2, out3 */ +case OP_Subtract: /* same as TK_MINUS, in1, in2, out3 */ +case OP_Multiply: /* same as TK_STAR, in1, in2, out3 */ +case OP_Divide: /* same as TK_SLASH, in1, in2, out3 */ +case OP_Remainder: { /* same as TK_REM, in1, in2, out3 */ + u16 flags; /* Combined MEM_* flags from both inputs */ + u16 type1; /* Numeric type of left operand */ + u16 type2; /* Numeric type of right operand */ + i64 iA; /* Integer value of left operand */ + i64 iB; /* Integer value of right operand */ + double rA; /* Real value of left operand */ + double rB; /* Real value of right operand */ + + pIn1 = &aMem[pOp->p1]; + type1 = numericType(pIn1); + pIn2 = &aMem[pOp->p2]; + type2 = numericType(pIn2); + pOut = &aMem[pOp->p3]; + flags = pIn1->flags | pIn2->flags; + if( (type1 & type2 & MEM_Int)!=0 ){ + iA = pIn1->u.i; + iB = pIn2->u.i; + switch( pOp->opcode ){ + case OP_Add: if( sqlite3AddInt64(&iB,iA) ) goto fp_math; break; + case OP_Subtract: if( sqlite3SubInt64(&iB,iA) ) goto fp_math; break; + case OP_Multiply: if( sqlite3MulInt64(&iB,iA) ) goto fp_math; break; + case OP_Divide: { + if( iA==0 ) goto arithmetic_result_is_null; + if( iA==-1 && iB==SMALLEST_INT64 ) goto fp_math; + iB /= iA; + break; + } + default: { + if( iA==0 ) goto arithmetic_result_is_null; + if( iA==-1 ) iA = 1; + iB %= iA; + break; + } + } + pOut->u.i = iB; + MemSetTypeFlag(pOut, MEM_Int); + }else if( (flags & MEM_Null)!=0 ){ + goto arithmetic_result_is_null; + }else{ +fp_math: + rA = sqlite3VdbeRealValue(pIn1); + rB = sqlite3VdbeRealValue(pIn2); + switch( pOp->opcode ){ + case OP_Add: rB += rA; break; + case OP_Subtract: rB -= rA; break; + case OP_Multiply: rB *= rA; break; + case OP_Divide: { + /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ + if( rA==(double)0 ) goto arithmetic_result_is_null; + rB /= rA; + break; + } + default: { + iA = sqlite3VdbeIntValue(pIn1); + iB = sqlite3VdbeIntValue(pIn2); + if( iA==0 ) goto arithmetic_result_is_null; + if( iA==-1 ) iA = 1; + rB = (double)(iB % iA); + break; + } + } +#ifdef SQLITE_OMIT_FLOATING_POINT + pOut->u.i = rB; + MemSetTypeFlag(pOut, MEM_Int); +#else + if( sqlite3IsNaN(rB) ){ + goto arithmetic_result_is_null; + } + pOut->u.r = rB; + MemSetTypeFlag(pOut, MEM_Real); +#endif + } + break; + +arithmetic_result_is_null: + sqlite3VdbeMemSetNull(pOut); + break; +} + +/* Opcode: CollSeq P1 * * P4 +** +** P4 is a pointer to a CollSeq object. If the next call to a user function +** or aggregate calls sqlite3GetFuncCollSeq(), this collation sequence will +** be returned. This is used by the built-in min(), max() and nullif() +** functions. +** +** If P1 is not zero, then it is a register that a subsequent min() or +** max() aggregate will set to 1 if the current row is not the minimum or +** maximum. The P1 register is initialized to 0 by this instruction. +** +** The interface used by the implementation of the aforementioned functions +** to retrieve the collation sequence set by this opcode is not available +** publicly. Only built-in functions have access to this feature. +*/ +case OP_CollSeq: { + assert( pOp->p4type==P4_COLLSEQ ); + if( pOp->p1 ){ + sqlite3VdbeMemSetInt64(&aMem[pOp->p1], 0); + } + break; +} + +/* Opcode: BitAnd P1 P2 P3 * * +** Synopsis: r[P3]=r[P1]&r[P2] +** +** Take the bit-wise AND of the values in register P1 and P2 and +** store the result in register P3. +** If either input is NULL, the result is NULL. +*/ +/* Opcode: BitOr P1 P2 P3 * * +** Synopsis: r[P3]=r[P1]|r[P2] +** +** Take the bit-wise OR of the values in register P1 and P2 and +** store the result in register P3. +** If either input is NULL, the result is NULL. +*/ +/* Opcode: ShiftLeft P1 P2 P3 * * +** Synopsis: r[P3]=r[P2]<>r[P1] +** +** Shift the integer value in register P2 to the right by the +** number of bits specified by the integer in register P1. +** Store the result in register P3. +** If either input is NULL, the result is NULL. +*/ +case OP_BitAnd: /* same as TK_BITAND, in1, in2, out3 */ +case OP_BitOr: /* same as TK_BITOR, in1, in2, out3 */ +case OP_ShiftLeft: /* same as TK_LSHIFT, in1, in2, out3 */ +case OP_ShiftRight: { /* same as TK_RSHIFT, in1, in2, out3 */ + i64 iA; + u64 uA; + i64 iB; + u8 op; + + pIn1 = &aMem[pOp->p1]; + pIn2 = &aMem[pOp->p2]; + pOut = &aMem[pOp->p3]; + if( (pIn1->flags | pIn2->flags) & MEM_Null ){ + sqlite3VdbeMemSetNull(pOut); + break; + } + iA = sqlite3VdbeIntValue(pIn2); + iB = sqlite3VdbeIntValue(pIn1); + op = pOp->opcode; + if( op==OP_BitAnd ){ + iA &= iB; + }else if( op==OP_BitOr ){ + iA |= iB; + }else if( iB!=0 ){ + assert( op==OP_ShiftRight || op==OP_ShiftLeft ); + + /* If shifting by a negative amount, shift in the other direction */ + if( iB<0 ){ + assert( OP_ShiftRight==OP_ShiftLeft+1 ); + op = 2*OP_ShiftLeft + 1 - op; + iB = iB>(-64) ? -iB : 64; + } + + if( iB>=64 ){ + iA = (iA>=0 || op==OP_ShiftLeft) ? 0 : -1; + }else{ + memcpy(&uA, &iA, sizeof(uA)); + if( op==OP_ShiftLeft ){ + uA <<= iB; + }else{ + uA >>= iB; + /* Sign-extend on a right shift of a negative number */ + if( iA<0 ) uA |= ((((u64)0xffffffff)<<32)|0xffffffff) << (64-iB); + } + memcpy(&iA, &uA, sizeof(iA)); + } + } + pOut->u.i = iA; + MemSetTypeFlag(pOut, MEM_Int); + break; +} + +/* Opcode: AddImm P1 P2 * * * +** Synopsis: r[P1]=r[P1]+P2 +** +** Add the constant P2 to the value in register P1. +** The result is always an integer. +** +** To force any register to be an integer, just add 0. +*/ +case OP_AddImm: { /* in1 */ + pIn1 = &aMem[pOp->p1]; + memAboutToChange(p, pIn1); + sqlite3VdbeMemIntegerify(pIn1); + pIn1->u.i += pOp->p2; + break; +} + +/* Opcode: MustBeInt P1 P2 * * * +** +** Force the value in register P1 to be an integer. If the value +** in P1 is not an integer and cannot be converted into an integer +** without data loss, then jump immediately to P2, or if P2==0 +** raise an SQLITE_MISMATCH exception. +*/ +case OP_MustBeInt: { /* jump, in1 */ + pIn1 = &aMem[pOp->p1]; + if( (pIn1->flags & MEM_Int)==0 ){ + applyAffinity(pIn1, SQLITE_AFF_NUMERIC, encoding); + if( (pIn1->flags & MEM_Int)==0 ){ + VdbeBranchTaken(1, 2); + if( pOp->p2==0 ){ + rc = SQLITE_MISMATCH; + goto abort_due_to_error; + }else{ + goto jump_to_p2; + } + } + } + VdbeBranchTaken(0, 2); + MemSetTypeFlag(pIn1, MEM_Int); + break; +} + +#ifndef SQLITE_OMIT_FLOATING_POINT +/* Opcode: RealAffinity P1 * * * * +** +** If register P1 holds an integer convert it to a real value. +** +** This opcode is used when extracting information from a column that +** has REAL affinity. Such column values may still be stored as +** integers, for space efficiency, but after extraction we want them +** to have only a real value. +*/ +case OP_RealAffinity: { /* in1 */ + pIn1 = &aMem[pOp->p1]; + if( pIn1->flags & (MEM_Int|MEM_IntReal) ){ + testcase( pIn1->flags & MEM_Int ); + testcase( pIn1->flags & MEM_IntReal ); + sqlite3VdbeMemRealify(pIn1); + REGISTER_TRACE(pOp->p1, pIn1); + } + break; +} +#endif + +#ifndef SQLITE_OMIT_CAST +/* Opcode: Cast P1 P2 * * * +** Synopsis: affinity(r[P1]) +** +** Force the value in register P1 to be the type defined by P2. +** +**
      +**
    • P2=='A' → BLOB +**
    • P2=='B' → TEXT +**
    • P2=='C' → NUMERIC +**
    • P2=='D' → INTEGER +**
    • P2=='E' → REAL +**
    +** +** A NULL value is not changed by this routine. It remains NULL. +*/ +case OP_Cast: { /* in1 */ + assert( pOp->p2>=SQLITE_AFF_BLOB && pOp->p2<=SQLITE_AFF_REAL ); + testcase( pOp->p2==SQLITE_AFF_TEXT ); + testcase( pOp->p2==SQLITE_AFF_BLOB ); + testcase( pOp->p2==SQLITE_AFF_NUMERIC ); + testcase( pOp->p2==SQLITE_AFF_INTEGER ); + testcase( pOp->p2==SQLITE_AFF_REAL ); + pIn1 = &aMem[pOp->p1]; + memAboutToChange(p, pIn1); + rc = ExpandBlob(pIn1); + if( rc ) goto abort_due_to_error; + rc = sqlite3VdbeMemCast(pIn1, pOp->p2, encoding); + if( rc ) goto abort_due_to_error; + UPDATE_MAX_BLOBSIZE(pIn1); + REGISTER_TRACE(pOp->p1, pIn1); + break; +} +#endif /* SQLITE_OMIT_CAST */ + +/* Opcode: Eq P1 P2 P3 P4 P5 +** Synopsis: IF r[P3]==r[P1] +** +** Compare the values in register P1 and P3. If reg(P3)==reg(P1) then +** jump to address P2. +** +** The SQLITE_AFF_MASK portion of P5 must be an affinity character - +** SQLITE_AFF_TEXT, SQLITE_AFF_INTEGER, and so forth. An attempt is made +** to coerce both inputs according to this affinity before the +** comparison is made. If the SQLITE_AFF_MASK is 0x00, then numeric +** affinity is used. Note that the affinity conversions are stored +** back into the input registers P1 and P3. So this opcode can cause +** persistent changes to registers P1 and P3. +** +** Once any conversions have taken place, and neither value is NULL, +** the values are compared. If both values are blobs then memcmp() is +** used to determine the results of the comparison. If both values +** are text, then the appropriate collating function specified in +** P4 is used to do the comparison. If P4 is not specified then +** memcmp() is used to compare text string. If both values are +** numeric, then a numeric comparison is used. If the two values +** are of different types, then numbers are considered less than +** strings and strings are considered less than blobs. +** +** If SQLITE_NULLEQ is set in P5 then the result of comparison is always either +** true or false and is never NULL. If both operands are NULL then the result +** of comparison is true. If either operand is NULL then the result is false. +** If neither operand is NULL the result is the same as it would be if +** the SQLITE_NULLEQ flag were omitted from P5. +** +** This opcode saves the result of comparison for use by the new +** OP_Jump opcode. +*/ +/* Opcode: Ne P1 P2 P3 P4 P5 +** Synopsis: IF r[P3]!=r[P1] +** +** This works just like the Eq opcode except that the jump is taken if +** the operands in registers P1 and P3 are not equal. See the Eq opcode for +** additional information. +*/ +/* Opcode: Lt P1 P2 P3 P4 P5 +** Synopsis: IF r[P3]r[P1] +** +** This works just like the Lt opcode except that the jump is taken if +** the content of register P3 is greater than the content of +** register P1. See the Lt opcode for additional information. +*/ +/* Opcode: Ge P1 P2 P3 P4 P5 +** Synopsis: IF r[P3]>=r[P1] +** +** This works just like the Lt opcode except that the jump is taken if +** the content of register P3 is greater than or equal to the content of +** register P1. See the Lt opcode for additional information. +*/ +case OP_Eq: /* same as TK_EQ, jump, in1, in3 */ +case OP_Ne: /* same as TK_NE, jump, in1, in3 */ +case OP_Lt: /* same as TK_LT, jump, in1, in3 */ +case OP_Le: /* same as TK_LE, jump, in1, in3 */ +case OP_Gt: /* same as TK_GT, jump, in1, in3 */ +case OP_Ge: { /* same as TK_GE, jump, in1, in3 */ + int res, res2; /* Result of the comparison of pIn1 against pIn3 */ + char affinity; /* Affinity to use for comparison */ + u16 flags1; /* Copy of initial value of pIn1->flags */ + u16 flags3; /* Copy of initial value of pIn3->flags */ + + pIn1 = &aMem[pOp->p1]; + pIn3 = &aMem[pOp->p3]; + flags1 = pIn1->flags; + flags3 = pIn3->flags; + if( (flags1 & flags3 & MEM_Int)!=0 ){ + assert( (pOp->p5 & SQLITE_AFF_MASK)!=SQLITE_AFF_TEXT || CORRUPT_DB ); + /* Common case of comparison of two integers */ + if( pIn3->u.i > pIn1->u.i ){ + iCompare = +1; + if( sqlite3aGTb[pOp->opcode] ){ + VdbeBranchTaken(1, (pOp->p5 & SQLITE_NULLEQ)?2:3); + goto jump_to_p2; + } + }else if( pIn3->u.i < pIn1->u.i ){ + iCompare = -1; + if( sqlite3aLTb[pOp->opcode] ){ + VdbeBranchTaken(1, (pOp->p5 & SQLITE_NULLEQ)?2:3); + goto jump_to_p2; + } + }else{ + iCompare = 0; + if( sqlite3aEQb[pOp->opcode] ){ + VdbeBranchTaken(1, (pOp->p5 & SQLITE_NULLEQ)?2:3); + goto jump_to_p2; + } + } + VdbeBranchTaken(0, (pOp->p5 & SQLITE_NULLEQ)?2:3); + break; + } + if( (flags1 | flags3)&MEM_Null ){ + /* One or both operands are NULL */ + if( pOp->p5 & SQLITE_NULLEQ ){ + /* If SQLITE_NULLEQ is set (which will only happen if the operator is + ** OP_Eq or OP_Ne) then take the jump or not depending on whether + ** or not both operands are null. + */ + assert( (flags1 & MEM_Cleared)==0 ); + assert( (pOp->p5 & SQLITE_JUMPIFNULL)==0 || CORRUPT_DB ); + testcase( (pOp->p5 & SQLITE_JUMPIFNULL)!=0 ); + if( (flags1&flags3&MEM_Null)!=0 + && (flags3&MEM_Cleared)==0 + ){ + res = 0; /* Operands are equal */ + }else{ + res = ((flags3 & MEM_Null) ? -1 : +1); /* Operands are not equal */ + } + }else{ + /* SQLITE_NULLEQ is clear and at least one operand is NULL, + ** then the result is always NULL. + ** The jump is taken if the SQLITE_JUMPIFNULL bit is set. + */ + iCompare = 1; /* Operands are not equal */ + VdbeBranchTaken(2,3); + if( pOp->p5 & SQLITE_JUMPIFNULL ){ + goto jump_to_p2; + } + break; + } + }else{ + /* Neither operand is NULL and we couldn't do the special high-speed + ** integer comparison case. So do a general-case comparison. */ + affinity = pOp->p5 & SQLITE_AFF_MASK; + if( affinity>=SQLITE_AFF_NUMERIC ){ + if( (flags1 | flags3)&MEM_Str ){ + if( (flags1 & (MEM_Int|MEM_IntReal|MEM_Real|MEM_Str))==MEM_Str ){ + applyNumericAffinity(pIn1,0); + testcase( flags3==pIn3->flags ); + flags3 = pIn3->flags; + } + if( (flags3 & (MEM_Int|MEM_IntReal|MEM_Real|MEM_Str))==MEM_Str ){ + applyNumericAffinity(pIn3,0); + } + } + }else if( affinity==SQLITE_AFF_TEXT ){ + if( (flags1 & MEM_Str)==0 && (flags1&(MEM_Int|MEM_Real|MEM_IntReal))!=0 ){ + testcase( pIn1->flags & MEM_Int ); + testcase( pIn1->flags & MEM_Real ); + testcase( pIn1->flags & MEM_IntReal ); + sqlite3VdbeMemStringify(pIn1, encoding, 1); + testcase( (flags1&MEM_Dyn) != (pIn1->flags&MEM_Dyn) ); + flags1 = (pIn1->flags & ~MEM_TypeMask) | (flags1 & MEM_TypeMask); + if( pIn1==pIn3 ) flags3 = flags1 | MEM_Str; + } + if( (flags3 & MEM_Str)==0 && (flags3&(MEM_Int|MEM_Real|MEM_IntReal))!=0 ){ + testcase( pIn3->flags & MEM_Int ); + testcase( pIn3->flags & MEM_Real ); + testcase( pIn3->flags & MEM_IntReal ); + sqlite3VdbeMemStringify(pIn3, encoding, 1); + testcase( (flags3&MEM_Dyn) != (pIn3->flags&MEM_Dyn) ); + flags3 = (pIn3->flags & ~MEM_TypeMask) | (flags3 & MEM_TypeMask); + } + } + assert( pOp->p4type==P4_COLLSEQ || pOp->p4.pColl==0 ); + res = sqlite3MemCompare(pIn3, pIn1, pOp->p4.pColl); + } + + /* At this point, res is negative, zero, or positive if reg[P1] is + ** less than, equal to, or greater than reg[P3], respectively. Compute + ** the answer to this operator in res2, depending on what the comparison + ** operator actually is. The next block of code depends on the fact + ** that the 6 comparison operators are consecutive integers in this + ** order: NE, EQ, GT, LE, LT, GE */ + assert( OP_Eq==OP_Ne+1 ); assert( OP_Gt==OP_Ne+2 ); assert( OP_Le==OP_Ne+3 ); + assert( OP_Lt==OP_Ne+4 ); assert( OP_Ge==OP_Ne+5 ); + if( res<0 ){ + res2 = sqlite3aLTb[pOp->opcode]; + }else if( res==0 ){ + res2 = sqlite3aEQb[pOp->opcode]; + }else{ + res2 = sqlite3aGTb[pOp->opcode]; + } + iCompare = res; + + /* Undo any changes made by applyAffinity() to the input registers. */ + assert( (pIn3->flags & MEM_Dyn) == (flags3 & MEM_Dyn) ); + pIn3->flags = flags3; + assert( (pIn1->flags & MEM_Dyn) == (flags1 & MEM_Dyn) ); + pIn1->flags = flags1; + + VdbeBranchTaken(res2!=0, (pOp->p5 & SQLITE_NULLEQ)?2:3); + if( res2 ){ + goto jump_to_p2; + } + break; +} + +/* Opcode: ElseEq * P2 * * * +** +** This opcode must follow an OP_Lt or OP_Gt comparison operator. There +** can be zero or more OP_ReleaseReg opcodes intervening, but no other +** opcodes are allowed to occur between this instruction and the previous +** OP_Lt or OP_Gt. +** +** If result of an OP_Eq comparison on the same two operands as the +** prior OP_Lt or OP_Gt would have been true, then jump to P2. +** If the result of an OP_Eq comparison on the two previous +** operands would have been false or NULL, then fall through. +*/ +case OP_ElseEq: { /* same as TK_ESCAPE, jump */ + +#ifdef SQLITE_DEBUG + /* Verify the preconditions of this opcode - that it follows an OP_Lt or + ** OP_Gt with zero or more intervening OP_ReleaseReg opcodes */ + int iAddr; + for(iAddr = (int)(pOp - aOp) - 1; ALWAYS(iAddr>=0); iAddr--){ + if( aOp[iAddr].opcode==OP_ReleaseReg ) continue; + assert( aOp[iAddr].opcode==OP_Lt || aOp[iAddr].opcode==OP_Gt ); + break; + } +#endif /* SQLITE_DEBUG */ + VdbeBranchTaken(iCompare==0, 2); + if( iCompare==0 ) goto jump_to_p2; + break; +} + + +/* Opcode: Permutation * * * P4 * +** +** Set the permutation used by the OP_Compare operator in the next +** instruction. The permutation is stored in the P4 operand. +** +** The permutation is only valid until the next OP_Compare that has +** the OPFLAG_PERMUTE bit set in P5. Typically the OP_Permutation should +** occur immediately prior to the OP_Compare. +** +** The first integer in the P4 integer array is the length of the array +** and does not become part of the permutation. +*/ +case OP_Permutation: { + assert( pOp->p4type==P4_INTARRAY ); + assert( pOp->p4.ai ); + assert( pOp[1].opcode==OP_Compare ); + assert( pOp[1].p5 & OPFLAG_PERMUTE ); + break; +} + +/* Opcode: Compare P1 P2 P3 P4 P5 +** Synopsis: r[P1@P3] <-> r[P2@P3] +** +** Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this +** vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of +** the comparison for use by the next OP_Jump instruct. +** +** If P5 has the OPFLAG_PERMUTE bit set, then the order of comparison is +** determined by the most recent OP_Permutation operator. If the +** OPFLAG_PERMUTE bit is clear, then register are compared in sequential +** order. +** +** P4 is a KeyInfo structure that defines collating sequences and sort +** orders for the comparison. The permutation applies to registers +** only. The KeyInfo elements are used sequentially. +** +** The comparison is a sort comparison, so NULLs compare equal, +** NULLs are less than numbers, numbers are less than strings, +** and strings are less than blobs. +*/ +case OP_Compare: { + int n; + int i; + int p1; + int p2; + const KeyInfo *pKeyInfo; + u32 idx; + CollSeq *pColl; /* Collating sequence to use on this term */ + int bRev; /* True for DESCENDING sort order */ + u32 *aPermute; /* The permutation */ + + if( (pOp->p5 & OPFLAG_PERMUTE)==0 ){ + aPermute = 0; + }else{ + assert( pOp>aOp ); + assert( pOp[-1].opcode==OP_Permutation ); + assert( pOp[-1].p4type==P4_INTARRAY ); + aPermute = pOp[-1].p4.ai + 1; + assert( aPermute!=0 ); + } + n = pOp->p3; + pKeyInfo = pOp->p4.pKeyInfo; + assert( n>0 ); + assert( pKeyInfo!=0 ); + p1 = pOp->p1; + p2 = pOp->p2; +#ifdef SQLITE_DEBUG + if( aPermute ){ + int k, mx = 0; + for(k=0; k(u32)mx ) mx = aPermute[k]; + assert( p1>0 && p1+mx<=(p->nMem+1 - p->nCursor)+1 ); + assert( p2>0 && p2+mx<=(p->nMem+1 - p->nCursor)+1 ); + }else{ + assert( p1>0 && p1+n<=(p->nMem+1 - p->nCursor)+1 ); + assert( p2>0 && p2+n<=(p->nMem+1 - p->nCursor)+1 ); + } +#endif /* SQLITE_DEBUG */ + for(i=0; inKeyField ); + pColl = pKeyInfo->aColl[i]; + bRev = (pKeyInfo->aSortFlags[i] & KEYINFO_ORDER_DESC); + iCompare = sqlite3MemCompare(&aMem[p1+idx], &aMem[p2+idx], pColl); + if( iCompare ){ + if( (pKeyInfo->aSortFlags[i] & KEYINFO_ORDER_BIGNULL) + && ((aMem[p1+idx].flags & MEM_Null) || (aMem[p2+idx].flags & MEM_Null)) + ){ + iCompare = -iCompare; + } + if( bRev ) iCompare = -iCompare; + break; + } + } + break; +} + +/* Opcode: Jump P1 P2 P3 * * +** +** Jump to the instruction at address P1, P2, or P3 depending on whether +** in the most recent OP_Compare instruction the P1 vector was less than +** equal to, or greater than the P2 vector, respectively. +*/ +case OP_Jump: { /* jump */ + if( iCompare<0 ){ + VdbeBranchTaken(0,4); pOp = &aOp[pOp->p1 - 1]; + }else if( iCompare==0 ){ + VdbeBranchTaken(1,4); pOp = &aOp[pOp->p2 - 1]; + }else{ + VdbeBranchTaken(2,4); pOp = &aOp[pOp->p3 - 1]; + } + break; +} + +/* Opcode: And P1 P2 P3 * * +** Synopsis: r[P3]=(r[P1] && r[P2]) +** +** Take the logical AND of the values in registers P1 and P2 and +** write the result into register P3. +** +** If either P1 or P2 is 0 (false) then the result is 0 even if +** the other input is NULL. A NULL and true or two NULLs give +** a NULL output. +*/ +/* Opcode: Or P1 P2 P3 * * +** Synopsis: r[P3]=(r[P1] || r[P2]) +** +** Take the logical OR of the values in register P1 and P2 and +** store the answer in register P3. +** +** If either P1 or P2 is nonzero (true) then the result is 1 (true) +** even if the other input is NULL. A NULL and false or two NULLs +** give a NULL output. +*/ +case OP_And: /* same as TK_AND, in1, in2, out3 */ +case OP_Or: { /* same as TK_OR, in1, in2, out3 */ + int v1; /* Left operand: 0==FALSE, 1==TRUE, 2==UNKNOWN or NULL */ + int v2; /* Right operand: 0==FALSE, 1==TRUE, 2==UNKNOWN or NULL */ + + v1 = sqlite3VdbeBooleanValue(&aMem[pOp->p1], 2); + v2 = sqlite3VdbeBooleanValue(&aMem[pOp->p2], 2); + if( pOp->opcode==OP_And ){ + static const unsigned char and_logic[] = { 0, 0, 0, 0, 1, 2, 0, 2, 2 }; + v1 = and_logic[v1*3+v2]; + }else{ + static const unsigned char or_logic[] = { 0, 1, 2, 1, 1, 1, 2, 1, 2 }; + v1 = or_logic[v1*3+v2]; + } + pOut = &aMem[pOp->p3]; + if( v1==2 ){ + MemSetTypeFlag(pOut, MEM_Null); + }else{ + pOut->u.i = v1; + MemSetTypeFlag(pOut, MEM_Int); + } + break; +} + +/* Opcode: IsTrue P1 P2 P3 P4 * +** Synopsis: r[P2] = coalesce(r[P1]==TRUE,P3) ^ P4 +** +** This opcode implements the IS TRUE, IS FALSE, IS NOT TRUE, and +** IS NOT FALSE operators. +** +** Interpret the value in register P1 as a boolean value. Store that +** boolean (a 0 or 1) in register P2. Or if the value in register P1 is +** NULL, then the P3 is stored in register P2. Invert the answer if P4 +** is 1. +** +** The logic is summarized like this: +** +**
      +**
    • If P3==0 and P4==0 then r[P2] := r[P1] IS TRUE +**
    • If P3==1 and P4==1 then r[P2] := r[P1] IS FALSE +**
    • If P3==0 and P4==1 then r[P2] := r[P1] IS NOT TRUE +**
    • If P3==1 and P4==0 then r[P2] := r[P1] IS NOT FALSE +**
    +*/ +case OP_IsTrue: { /* in1, out2 */ + assert( pOp->p4type==P4_INT32 ); + assert( pOp->p4.i==0 || pOp->p4.i==1 ); + assert( pOp->p3==0 || pOp->p3==1 ); + sqlite3VdbeMemSetInt64(&aMem[pOp->p2], + sqlite3VdbeBooleanValue(&aMem[pOp->p1], pOp->p3) ^ pOp->p4.i); + break; +} + +/* Opcode: Not P1 P2 * * * +** Synopsis: r[P2]= !r[P1] +** +** Interpret the value in register P1 as a boolean value. Store the +** boolean complement in register P2. If the value in register P1 is +** NULL, then a NULL is stored in P2. +*/ +case OP_Not: { /* same as TK_NOT, in1, out2 */ + pIn1 = &aMem[pOp->p1]; + pOut = &aMem[pOp->p2]; + if( (pIn1->flags & MEM_Null)==0 ){ + sqlite3VdbeMemSetInt64(pOut, !sqlite3VdbeBooleanValue(pIn1,0)); + }else{ + sqlite3VdbeMemSetNull(pOut); + } + break; +} + +/* Opcode: BitNot P1 P2 * * * +** Synopsis: r[P2]= ~r[P1] +** +** Interpret the content of register P1 as an integer. Store the +** ones-complement of the P1 value into register P2. If P1 holds +** a NULL then store a NULL in P2. +*/ +case OP_BitNot: { /* same as TK_BITNOT, in1, out2 */ + pIn1 = &aMem[pOp->p1]; + pOut = &aMem[pOp->p2]; + sqlite3VdbeMemSetNull(pOut); + if( (pIn1->flags & MEM_Null)==0 ){ + pOut->flags = MEM_Int; + pOut->u.i = ~sqlite3VdbeIntValue(pIn1); + } + break; +} + +/* Opcode: Once P1 P2 * * * +** +** Fall through to the next instruction the first time this opcode is +** encountered on each invocation of the byte-code program. Jump to P2 +** on the second and all subsequent encounters during the same invocation. +** +** Top-level programs determine first invocation by comparing the P1 +** operand against the P1 operand on the OP_Init opcode at the beginning +** of the program. If the P1 values differ, then fall through and make +** the P1 of this opcode equal to the P1 of OP_Init. If P1 values are +** the same then take the jump. +** +** For subprograms, there is a bitmask in the VdbeFrame that determines +** whether or not the jump should be taken. The bitmask is necessary +** because the self-altering code trick does not work for recursive +** triggers. +*/ +case OP_Once: { /* jump */ + u32 iAddr; /* Address of this instruction */ + assert( p->aOp[0].opcode==OP_Init ); + if( p->pFrame ){ + iAddr = (int)(pOp - p->aOp); + if( (p->pFrame->aOnce[iAddr/8] & (1<<(iAddr & 7)))!=0 ){ + VdbeBranchTaken(1, 2); + goto jump_to_p2; + } + p->pFrame->aOnce[iAddr/8] |= 1<<(iAddr & 7); + }else{ + if( p->aOp[0].p1==pOp->p1 ){ + VdbeBranchTaken(1, 2); + goto jump_to_p2; + } + } + VdbeBranchTaken(0, 2); + pOp->p1 = p->aOp[0].p1; + break; +} + +/* Opcode: If P1 P2 P3 * * +** +** Jump to P2 if the value in register P1 is true. The value +** is considered true if it is numeric and non-zero. If the value +** in P1 is NULL then take the jump if and only if P3 is non-zero. +*/ +case OP_If: { /* jump, in1 */ + int c; + c = sqlite3VdbeBooleanValue(&aMem[pOp->p1], pOp->p3); + VdbeBranchTaken(c!=0, 2); + if( c ) goto jump_to_p2; + break; +} + +/* Opcode: IfNot P1 P2 P3 * * +** +** Jump to P2 if the value in register P1 is False. The value +** is considered false if it has a numeric value of zero. If the value +** in P1 is NULL then take the jump if and only if P3 is non-zero. +*/ +case OP_IfNot: { /* jump, in1 */ + int c; + c = !sqlite3VdbeBooleanValue(&aMem[pOp->p1], !pOp->p3); + VdbeBranchTaken(c!=0, 2); + if( c ) goto jump_to_p2; + break; +} + +/* Opcode: IsNull P1 P2 * * * +** Synopsis: if r[P1]==NULL goto P2 +** +** Jump to P2 if the value in register P1 is NULL. +*/ +case OP_IsNull: { /* same as TK_ISNULL, jump, in1 */ + pIn1 = &aMem[pOp->p1]; + VdbeBranchTaken( (pIn1->flags & MEM_Null)!=0, 2); + if( (pIn1->flags & MEM_Null)!=0 ){ + goto jump_to_p2; + } + break; +} + +/* Opcode: IsNullOrType P1 P2 P3 * * +** Synopsis: if typeof(r[P1]) IN (P3,5) goto P2 +** +** Jump to P2 if the value in register P1 is NULL or has a datatype P3. +** P3 is an integer which should be one of SQLITE_INTEGER, SQLITE_FLOAT, +** SQLITE_BLOB, SQLITE_NULL, or SQLITE_TEXT. +*/ +case OP_IsNullOrType: { /* jump, in1 */ + int doTheJump; + pIn1 = &aMem[pOp->p1]; + doTheJump = (pIn1->flags & MEM_Null)!=0 || sqlite3_value_type(pIn1)==pOp->p3; + VdbeBranchTaken( doTheJump, 2); + if( doTheJump ) goto jump_to_p2; + break; +} + +/* Opcode: ZeroOrNull P1 P2 P3 * * +** Synopsis: r[P2] = 0 OR NULL +** +** If all both registers P1 and P3 are NOT NULL, then store a zero in +** register P2. If either registers P1 or P3 are NULL then put +** a NULL in register P2. +*/ +case OP_ZeroOrNull: { /* in1, in2, out2, in3 */ + if( (aMem[pOp->p1].flags & MEM_Null)!=0 + || (aMem[pOp->p3].flags & MEM_Null)!=0 + ){ + sqlite3VdbeMemSetNull(aMem + pOp->p2); + }else{ + sqlite3VdbeMemSetInt64(aMem + pOp->p2, 0); + } + break; +} + +/* Opcode: NotNull P1 P2 * * * +** Synopsis: if r[P1]!=NULL goto P2 +** +** Jump to P2 if the value in register P1 is not NULL. +*/ +case OP_NotNull: { /* same as TK_NOTNULL, jump, in1 */ + pIn1 = &aMem[pOp->p1]; + VdbeBranchTaken( (pIn1->flags & MEM_Null)==0, 2); + if( (pIn1->flags & MEM_Null)==0 ){ + goto jump_to_p2; + } + break; +} + +/* Opcode: IfNullRow P1 P2 P3 * * +** Synopsis: if P1.nullRow then r[P3]=NULL, goto P2 +** +** Check the cursor P1 to see if it is currently pointing at a NULL row. +** If it is, then set register P3 to NULL and jump immediately to P2. +** If P1 is not on a NULL row, then fall through without making any +** changes. +*/ +case OP_IfNullRow: { /* jump */ + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( p->apCsr[pOp->p1]!=0 ); + if( p->apCsr[pOp->p1]->nullRow ){ + sqlite3VdbeMemSetNull(aMem + pOp->p3); + goto jump_to_p2; + } + break; +} + +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC +/* Opcode: Offset P1 P2 P3 * * +** Synopsis: r[P3] = sqlite_offset(P1) +** +** Store in register r[P3] the byte offset into the database file that is the +** start of the payload for the record at which that cursor P1 is currently +** pointing. +** +** P2 is the column number for the argument to the sqlite_offset() function. +** This opcode does not use P2 itself, but the P2 value is used by the +** code generator. The P1, P2, and P3 operands to this opcode are the +** same as for OP_Column. +** +** This opcode is only available if SQLite is compiled with the +** -DSQLITE_ENABLE_OFFSET_SQL_FUNC option. +*/ +case OP_Offset: { /* out3 */ + VdbeCursor *pC; /* The VDBE cursor */ + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + pOut = &p->aMem[pOp->p3]; + if( pC==0 || pC->eCurType!=CURTYPE_BTREE ){ + sqlite3VdbeMemSetNull(pOut); + }else{ + if( pC->deferredMoveto ){ + rc = sqlite3VdbeFinishMoveto(pC); + if( rc ) goto abort_due_to_error; + } + if( sqlite3BtreeEof(pC->uc.pCursor) ){ + sqlite3VdbeMemSetNull(pOut); + }else{ + sqlite3VdbeMemSetInt64(pOut, sqlite3BtreeOffset(pC->uc.pCursor)); + } + } + break; +} +#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */ + +/* Opcode: Column P1 P2 P3 P4 P5 +** Synopsis: r[P3]=PX +** +** Interpret the data that cursor P1 points to as a structure built using +** the MakeRecord instruction. (See the MakeRecord opcode for additional +** information about the format of the data.) Extract the P2-th column +** from this record. If there are less that (P2+1) +** values in the record, extract a NULL. +** +** The value extracted is stored in register P3. +** +** If the record contains fewer than P2 fields, then extract a NULL. Or, +** if the P4 argument is a P4_MEM use the value of the P4 argument as +** the result. +** +** If the OPFLAG_LENGTHARG and OPFLAG_TYPEOFARG bits are set on P5 then +** the result is guaranteed to only be used as the argument of a length() +** or typeof() function, respectively. The loading of large blobs can be +** skipped for length() and all content loading can be skipped for typeof(). +*/ +case OP_Column: { + u32 p2; /* column number to retrieve */ + VdbeCursor *pC; /* The VDBE cursor */ + BtCursor *pCrsr; /* The BTree cursor */ + u32 *aOffset; /* aOffset[i] is offset to start of data for i-th column */ + int len; /* The length of the serialized data for the column */ + int i; /* Loop counter */ + Mem *pDest; /* Where to write the extracted value */ + Mem sMem; /* For storing the record being decoded */ + const u8 *zData; /* Part of the record being decoded */ + const u8 *zHdr; /* Next unparsed byte of the header */ + const u8 *zEndHdr; /* Pointer to first byte after the header */ + u64 offset64; /* 64-bit offset */ + u32 t; /* A type code from the record header */ + Mem *pReg; /* PseudoTable input register */ + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + p2 = (u32)pOp->p2; + + /* If the cursor cache is stale (meaning it is not currently point at + ** the correct row) then bring it up-to-date by doing the necessary + ** B-Tree seek. */ + rc = sqlite3VdbeCursorMoveto(&pC, &p2); + if( rc ) goto abort_due_to_error; + + assert( pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor) ); + pDest = &aMem[pOp->p3]; + memAboutToChange(p, pDest); + assert( pC!=0 ); + assert( p2<(u32)pC->nField ); + aOffset = pC->aOffset; + assert( aOffset==pC->aType+pC->nField ); + assert( pC->eCurType!=CURTYPE_VTAB ); + assert( pC->eCurType!=CURTYPE_PSEUDO || pC->nullRow ); + assert( pC->eCurType!=CURTYPE_SORTER ); + + if( pC->cacheStatus!=p->cacheCtr ){ /*OPTIMIZATION-IF-FALSE*/ + if( pC->nullRow ){ + if( pC->eCurType==CURTYPE_PSEUDO ){ + /* For the special case of as pseudo-cursor, the seekResult field + ** identifies the register that holds the record */ + assert( pC->seekResult>0 ); + pReg = &aMem[pC->seekResult]; + assert( pReg->flags & MEM_Blob ); + assert( memIsValid(pReg) ); + pC->payloadSize = pC->szRow = pReg->n; + pC->aRow = (u8*)pReg->z; + }else{ + sqlite3VdbeMemSetNull(pDest); + goto op_column_out; + } + }else{ + pCrsr = pC->uc.pCursor; + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pCrsr ); + assert( sqlite3BtreeCursorIsValid(pCrsr) ); + pC->payloadSize = sqlite3BtreePayloadSize(pCrsr); + pC->aRow = sqlite3BtreePayloadFetch(pCrsr, &pC->szRow); + assert( pC->szRow<=pC->payloadSize ); + assert( pC->szRow<=65536 ); /* Maximum page size is 64KiB */ + if( pC->payloadSize > (u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){ + goto too_big; + } + } + pC->cacheStatus = p->cacheCtr; + pC->iHdrOffset = getVarint32(pC->aRow, aOffset[0]); + pC->nHdrParsed = 0; + + + if( pC->szRowaRow does not have to hold the entire row, but it does at least + ** need to cover the header of the record. If pC->aRow does not contain + ** the complete header, then set it to zero, forcing the header to be + ** dynamically allocated. */ + pC->aRow = 0; + pC->szRow = 0; + + /* Make sure a corrupt database has not given us an oversize header. + ** Do this now to avoid an oversize memory allocation. + ** + ** Type entries can be between 1 and 5 bytes each. But 4 and 5 byte + ** types use so much data space that there can only be 4096 and 32 of + ** them, respectively. So the maximum header length results from a + ** 3-byte type for each of the maximum of 32768 columns plus three + ** extra bytes for the header length itself. 32768*3 + 3 = 98307. + */ + if( aOffset[0] > 98307 || aOffset[0] > pC->payloadSize ){ + goto op_column_corrupt; + } + }else{ + /* This is an optimization. By skipping over the first few tests + ** (ex: pC->nHdrParsed<=p2) in the next section, we achieve a + ** measurable performance gain. + ** + ** This branch is taken even if aOffset[0]==0. Such a record is never + ** generated by SQLite, and could be considered corruption, but we + ** accept it for historical reasons. When aOffset[0]==0, the code this + ** branch jumps to reads past the end of the record, but never more + ** than a few bytes. Even if the record occurs at the end of the page + ** content area, the "page header" comes after the page content and so + ** this overread is harmless. Similar overreads can occur for a corrupt + ** database file. + */ + zData = pC->aRow; + assert( pC->nHdrParsed<=p2 ); /* Conditional skipped */ + testcase( aOffset[0]==0 ); + goto op_column_read_header; + } + } + + /* Make sure at least the first p2+1 entries of the header have been + ** parsed and valid information is in aOffset[] and pC->aType[]. + */ + if( pC->nHdrParsed<=p2 ){ + /* If there is more header available for parsing in the record, try + ** to extract additional fields up through the p2+1-th field + */ + if( pC->iHdrOffsetaRow==0 ){ + memset(&sMem, 0, sizeof(sMem)); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pC->uc.pCursor,aOffset[0],&sMem); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + zData = (u8*)sMem.z; + }else{ + zData = pC->aRow; + } + + /* Fill in pC->aType[i] and aOffset[i] values through the p2-th field. */ + op_column_read_header: + i = pC->nHdrParsed; + offset64 = aOffset[i]; + zHdr = zData + pC->iHdrOffset; + zEndHdr = zData + aOffset[0]; + testcase( zHdr>=zEndHdr ); + do{ + if( (pC->aType[i] = t = zHdr[0])<0x80 ){ + zHdr++; + offset64 += sqlite3VdbeOneByteSerialTypeLen(t); + }else{ + zHdr += sqlite3GetVarint32(zHdr, &t); + pC->aType[i] = t; + offset64 += sqlite3VdbeSerialTypeLen(t); + } + aOffset[++i] = (u32)(offset64 & 0xffffffff); + }while( (u32)i<=p2 && zHdr=zEndHdr && (zHdr>zEndHdr || offset64!=pC->payloadSize)) + || (offset64 > pC->payloadSize) + ){ + if( aOffset[0]==0 ){ + i = 0; + zHdr = zEndHdr; + }else{ + if( pC->aRow==0 ) sqlite3VdbeMemRelease(&sMem); + goto op_column_corrupt; + } + } + + pC->nHdrParsed = i; + pC->iHdrOffset = (u32)(zHdr - zData); + if( pC->aRow==0 ) sqlite3VdbeMemRelease(&sMem); + }else{ + t = 0; + } + + /* If after trying to extract new entries from the header, nHdrParsed is + ** still not up to p2, that means that the record has fewer than p2 + ** columns. So the result will be either the default value or a NULL. + */ + if( pC->nHdrParsed<=p2 ){ + if( pOp->p4type==P4_MEM ){ + sqlite3VdbeMemShallowCopy(pDest, pOp->p4.pMem, MEM_Static); + }else{ + sqlite3VdbeMemSetNull(pDest); + } + goto op_column_out; + } + }else{ + t = pC->aType[p2]; + } + + /* Extract the content for the p2+1-th column. Control can only + ** reach this point if aOffset[p2], aOffset[p2+1], and pC->aType[p2] are + ** all valid. + */ + assert( p2nHdrParsed ); + assert( rc==SQLITE_OK ); + assert( sqlite3VdbeCheckMemInvariants(pDest) ); + if( VdbeMemDynamic(pDest) ){ + sqlite3VdbeMemSetNull(pDest); + } + assert( t==pC->aType[p2] ); + if( pC->szRow>=aOffset[p2+1] ){ + /* This is the common case where the desired content fits on the original + ** page - where the content is not on an overflow page */ + zData = pC->aRow + aOffset[p2]; + if( t<12 ){ + sqlite3VdbeSerialGet(zData, t, pDest); + }else{ + /* If the column value is a string, we need a persistent value, not + ** a MEM_Ephem value. This branch is a fast short-cut that is equivalent + ** to calling sqlite3VdbeSerialGet() and sqlite3VdbeDeephemeralize(). + */ + static const u16 aFlag[] = { MEM_Blob, MEM_Str|MEM_Term }; + pDest->n = len = (t-12)/2; + pDest->enc = encoding; + if( pDest->szMalloc < len+2 ){ + pDest->flags = MEM_Null; + if( sqlite3VdbeMemGrow(pDest, len+2, 0) ) goto no_mem; + }else{ + pDest->z = pDest->zMalloc; + } + memcpy(pDest->z, zData, len); + pDest->z[len] = 0; + pDest->z[len+1] = 0; + pDest->flags = aFlag[t&1]; + } + }else{ + pDest->enc = encoding; + /* This branch happens only when content is on overflow pages */ + if( ((pOp->p5 & (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG))!=0 + && ((t>=12 && (t&1)==0) || (pOp->p5 & OPFLAG_TYPEOFARG)!=0)) + || (len = sqlite3VdbeSerialTypeLen(t))==0 + ){ + /* Content is irrelevant for + ** 1. the typeof() function, + ** 2. the length(X) function if X is a blob, and + ** 3. if the content length is zero. + ** So we might as well use bogus content rather than reading + ** content from disk. + ** + ** Although sqlite3VdbeSerialGet() may read at most 8 bytes from the + ** buffer passed to it, debugging function VdbeMemPrettyPrint() may + ** read more. Use the global constant sqlite3CtypeMap[] as the array, + ** as that array is 256 bytes long (plenty for VdbeMemPrettyPrint()) + ** and it begins with a bunch of zeros. + */ + sqlite3VdbeSerialGet((u8*)sqlite3CtypeMap, t, pDest); + }else{ + rc = sqlite3VdbeMemFromBtree(pC->uc.pCursor, aOffset[p2], len, pDest); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + sqlite3VdbeSerialGet((const u8*)pDest->z, t, pDest); + pDest->flags &= ~MEM_Ephem; + } + } + +op_column_out: + UPDATE_MAX_BLOBSIZE(pDest); + REGISTER_TRACE(pOp->p3, pDest); + break; + +op_column_corrupt: + if( aOp[0].p3>0 ){ + pOp = &aOp[aOp[0].p3-1]; + break; + }else{ + rc = SQLITE_CORRUPT_BKPT; + goto abort_due_to_error; + } +} + +/* Opcode: TypeCheck P1 P2 P3 P4 * +** Synopsis: typecheck(r[P1@P2]) +** +** Apply affinities to the range of P2 registers beginning with P1. +** Take the affinities from the Table object in P4. If any value +** cannot be coerced into the correct type, then raise an error. +** +** This opcode is similar to OP_Affinity except that this opcode +** forces the register type to the Table column type. This is used +** to implement "strict affinity". +** +** GENERATED ALWAYS AS ... STATIC columns are only checked if P3 +** is zero. When P3 is non-zero, no type checking occurs for +** static generated columns. Virtual columns are computed at query time +** and so they are never checked. +** +** Preconditions: +** +**
      +**
    • P2 should be the number of non-virtual columns in the +** table of P4. +**
    • Table P4 should be a STRICT table. +**
    +** +** If any precondition is false, an assertion fault occurs. +*/ +case OP_TypeCheck: { + Table *pTab; + Column *aCol; + int i; + + assert( pOp->p4type==P4_TABLE ); + pTab = pOp->p4.pTab; + assert( pTab->tabFlags & TF_Strict ); + assert( pTab->nNVCol==pOp->p2 ); + aCol = pTab->aCol; + pIn1 = &aMem[pOp->p1]; + for(i=0; inCol; i++){ + if( aCol[i].colFlags & COLFLAG_GENERATED ){ + if( aCol[i].colFlags & COLFLAG_VIRTUAL ) continue; + if( pOp->p3 ){ pIn1++; continue; } + } + assert( pIn1 < &aMem[pOp->p1+pOp->p2] ); + applyAffinity(pIn1, aCol[i].affinity, encoding); + if( (pIn1->flags & MEM_Null)==0 ){ + switch( aCol[i].eCType ){ + case COLTYPE_BLOB: { + if( (pIn1->flags & MEM_Blob)==0 ) goto vdbe_type_error; + break; + } + case COLTYPE_INTEGER: + case COLTYPE_INT: { + if( (pIn1->flags & MEM_Int)==0 ) goto vdbe_type_error; + break; + } + case COLTYPE_TEXT: { + if( (pIn1->flags & MEM_Str)==0 ) goto vdbe_type_error; + break; + } + case COLTYPE_REAL: { + testcase( (pIn1->flags & (MEM_Real|MEM_IntReal))==MEM_Real ); + testcase( (pIn1->flags & (MEM_Real|MEM_IntReal))==MEM_IntReal ); + if( pIn1->flags & MEM_Int ){ + /* When applying REAL affinity, if the result is still an MEM_Int + ** that will fit in 6 bytes, then change the type to MEM_IntReal + ** so that we keep the high-resolution integer value but know that + ** the type really wants to be REAL. */ + testcase( pIn1->u.i==140737488355328LL ); + testcase( pIn1->u.i==140737488355327LL ); + testcase( pIn1->u.i==-140737488355328LL ); + testcase( pIn1->u.i==-140737488355329LL ); + if( pIn1->u.i<=140737488355327LL && pIn1->u.i>=-140737488355328LL){ + pIn1->flags |= MEM_IntReal; + pIn1->flags &= ~MEM_Int; + }else{ + pIn1->u.r = (double)pIn1->u.i; + pIn1->flags |= MEM_Real; + pIn1->flags &= ~MEM_Int; + } + }else if( (pIn1->flags & (MEM_Real|MEM_IntReal))==0 ){ + goto vdbe_type_error; + } + break; + } + default: { + /* COLTYPE_ANY. Accept anything. */ + break; + } + } + } + REGISTER_TRACE((int)(pIn1-aMem), pIn1); + pIn1++; + } + assert( pIn1 == &aMem[pOp->p1+pOp->p2] ); + break; + +vdbe_type_error: + sqlite3VdbeError(p, "cannot store %s value in %s column %s.%s", + vdbeMemTypeName(pIn1), sqlite3StdType[aCol[i].eCType-1], + pTab->zName, aCol[i].zCnName); + rc = SQLITE_CONSTRAINT_DATATYPE; + goto abort_due_to_error; +} + +/* Opcode: Affinity P1 P2 * P4 * +** Synopsis: affinity(r[P1@P2]) +** +** Apply affinities to a range of P2 registers starting with P1. +** +** P4 is a string that is P2 characters long. The N-th character of the +** string indicates the column affinity that should be used for the N-th +** memory cell in the range. +*/ +case OP_Affinity: { + const char *zAffinity; /* The affinity to be applied */ + + zAffinity = pOp->p4.z; + assert( zAffinity!=0 ); + assert( pOp->p2>0 ); + assert( zAffinity[pOp->p2]==0 ); + pIn1 = &aMem[pOp->p1]; + while( 1 /*exit-by-break*/ ){ + assert( pIn1 <= &p->aMem[(p->nMem+1 - p->nCursor)] ); + assert( zAffinity[0]==SQLITE_AFF_NONE || memIsValid(pIn1) ); + applyAffinity(pIn1, zAffinity[0], encoding); + if( zAffinity[0]==SQLITE_AFF_REAL && (pIn1->flags & MEM_Int)!=0 ){ + /* When applying REAL affinity, if the result is still an MEM_Int + ** that will fit in 6 bytes, then change the type to MEM_IntReal + ** so that we keep the high-resolution integer value but know that + ** the type really wants to be REAL. */ + testcase( pIn1->u.i==140737488355328LL ); + testcase( pIn1->u.i==140737488355327LL ); + testcase( pIn1->u.i==-140737488355328LL ); + testcase( pIn1->u.i==-140737488355329LL ); + if( pIn1->u.i<=140737488355327LL && pIn1->u.i>=-140737488355328LL ){ + pIn1->flags |= MEM_IntReal; + pIn1->flags &= ~MEM_Int; + }else{ + pIn1->u.r = (double)pIn1->u.i; + pIn1->flags |= MEM_Real; + pIn1->flags &= ~MEM_Int; + } + } + REGISTER_TRACE((int)(pIn1-aMem), pIn1); + zAffinity++; + if( zAffinity[0]==0 ) break; + pIn1++; + } + break; +} + +/* Opcode: MakeRecord P1 P2 P3 P4 * +** Synopsis: r[P3]=mkrec(r[P1@P2]) +** +** Convert P2 registers beginning with P1 into the [record format] +** use as a data record in a database table or as a key +** in an index. The OP_Column opcode can decode the record later. +** +** P4 may be a string that is P2 characters long. The N-th character of the +** string indicates the column affinity that should be used for the N-th +** field of the index key. +** +** The mapping from character to affinity is given by the SQLITE_AFF_ +** macros defined in sqliteInt.h. +** +** If P4 is NULL then all index fields have the affinity BLOB. +** +** The meaning of P5 depends on whether or not the SQLITE_ENABLE_NULL_TRIM +** compile-time option is enabled: +** +** * If SQLITE_ENABLE_NULL_TRIM is enabled, then the P5 is the index +** of the right-most table that can be null-trimmed. +** +** * If SQLITE_ENABLE_NULL_TRIM is omitted, then P5 has the value +** OPFLAG_NOCHNG_MAGIC if the OP_MakeRecord opcode is allowed to +** accept no-change records with serial_type 10. This value is +** only used inside an assert() and does not affect the end result. +*/ +case OP_MakeRecord: { + Mem *pRec; /* The new record */ + u64 nData; /* Number of bytes of data space */ + int nHdr; /* Number of bytes of header space */ + i64 nByte; /* Data space required for this record */ + i64 nZero; /* Number of zero bytes at the end of the record */ + int nVarint; /* Number of bytes in a varint */ + u32 serial_type; /* Type field */ + Mem *pData0; /* First field to be combined into the record */ + Mem *pLast; /* Last field of the record */ + int nField; /* Number of fields in the record */ + char *zAffinity; /* The affinity string for the record */ + int file_format; /* File format to use for encoding */ + u32 len; /* Length of a field */ + u8 *zHdr; /* Where to write next byte of the header */ + u8 *zPayload; /* Where to write next byte of the payload */ + + /* Assuming the record contains N fields, the record format looks + ** like this: + ** + ** ------------------------------------------------------------------------ + ** | hdr-size | type 0 | type 1 | ... | type N-1 | data0 | ... | data N-1 | + ** ------------------------------------------------------------------------ + ** + ** Data(0) is taken from register P1. Data(1) comes from register P1+1 + ** and so forth. + ** + ** Each type field is a varint representing the serial type of the + ** corresponding data element (see sqlite3VdbeSerialType()). The + ** hdr-size field is also a varint which is the offset from the beginning + ** of the record to data0. + */ + nData = 0; /* Number of bytes of data space */ + nHdr = 0; /* Number of bytes of header space */ + nZero = 0; /* Number of zero bytes at the end of the record */ + nField = pOp->p1; + zAffinity = pOp->p4.z; + assert( nField>0 && pOp->p2>0 && pOp->p2+nField<=(p->nMem+1 - p->nCursor)+1 ); + pData0 = &aMem[nField]; + nField = pOp->p2; + pLast = &pData0[nField-1]; + file_format = p->minWriteFileFormat; + + /* Identify the output register */ + assert( pOp->p3p1 || pOp->p3>=pOp->p1+pOp->p2 ); + pOut = &aMem[pOp->p3]; + memAboutToChange(p, pOut); + + /* Apply the requested affinity to all inputs + */ + assert( pData0<=pLast ); + if( zAffinity ){ + pRec = pData0; + do{ + applyAffinity(pRec, zAffinity[0], encoding); + if( zAffinity[0]==SQLITE_AFF_REAL && (pRec->flags & MEM_Int) ){ + pRec->flags |= MEM_IntReal; + pRec->flags &= ~(MEM_Int); + } + REGISTER_TRACE((int)(pRec-aMem), pRec); + zAffinity++; + pRec++; + assert( zAffinity[0]==0 || pRec<=pLast ); + }while( zAffinity[0] ); + } + +#ifdef SQLITE_ENABLE_NULL_TRIM + /* NULLs can be safely trimmed from the end of the record, as long as + ** as the schema format is 2 or more and none of the omitted columns + ** have a non-NULL default value. Also, the record must be left with + ** at least one field. If P5>0 then it will be one more than the + ** index of the right-most column with a non-NULL default value */ + if( pOp->p5 ){ + while( (pLast->flags & MEM_Null)!=0 && nField>pOp->p5 ){ + pLast--; + nField--; + } + } +#endif + + /* Loop through the elements that will make up the record to figure + ** out how much space is required for the new record. After this loop, + ** the Mem.uTemp field of each term should hold the serial-type that will + ** be used for that term in the generated record: + ** + ** Mem.uTemp value type + ** --------------- --------------- + ** 0 NULL + ** 1 1-byte signed integer + ** 2 2-byte signed integer + ** 3 3-byte signed integer + ** 4 4-byte signed integer + ** 5 6-byte signed integer + ** 6 8-byte signed integer + ** 7 IEEE float + ** 8 Integer constant 0 + ** 9 Integer constant 1 + ** 10,11 reserved for expansion + ** N>=12 and even BLOB + ** N>=13 and odd text + ** + ** The following additional values are computed: + ** nHdr Number of bytes needed for the record header + ** nData Number of bytes of data space needed for the record + ** nZero Zero bytes at the end of the record + */ + pRec = pLast; + do{ + assert( memIsValid(pRec) ); + if( pRec->flags & MEM_Null ){ + if( pRec->flags & MEM_Zero ){ + /* Values with MEM_Null and MEM_Zero are created by xColumn virtual + ** table methods that never invoke sqlite3_result_xxxxx() while + ** computing an unchanging column value in an UPDATE statement. + ** Give such values a special internal-use-only serial-type of 10 + ** so that they can be passed through to xUpdate and have + ** a true sqlite3_value_nochange(). */ +#ifndef SQLITE_ENABLE_NULL_TRIM + assert( pOp->p5==OPFLAG_NOCHNG_MAGIC || CORRUPT_DB ); +#endif + pRec->uTemp = 10; + }else{ + pRec->uTemp = 0; + } + nHdr++; + }else if( pRec->flags & (MEM_Int|MEM_IntReal) ){ + /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */ + i64 i = pRec->u.i; + u64 uu; + testcase( pRec->flags & MEM_Int ); + testcase( pRec->flags & MEM_IntReal ); + if( i<0 ){ + uu = ~i; + }else{ + uu = i; + } + nHdr++; + testcase( uu==127 ); testcase( uu==128 ); + testcase( uu==32767 ); testcase( uu==32768 ); + testcase( uu==8388607 ); testcase( uu==8388608 ); + testcase( uu==2147483647 ); testcase( uu==2147483648LL ); + testcase( uu==140737488355327LL ); testcase( uu==140737488355328LL ); + if( uu<=127 ){ + if( (i&1)==i && file_format>=4 ){ + pRec->uTemp = 8+(u32)uu; + }else{ + nData++; + pRec->uTemp = 1; + } + }else if( uu<=32767 ){ + nData += 2; + pRec->uTemp = 2; + }else if( uu<=8388607 ){ + nData += 3; + pRec->uTemp = 3; + }else if( uu<=2147483647 ){ + nData += 4; + pRec->uTemp = 4; + }else if( uu<=140737488355327LL ){ + nData += 6; + pRec->uTemp = 5; + }else{ + nData += 8; + if( pRec->flags & MEM_IntReal ){ + /* If the value is IntReal and is going to take up 8 bytes to store + ** as an integer, then we might as well make it an 8-byte floating + ** point value */ + pRec->u.r = (double)pRec->u.i; + pRec->flags &= ~MEM_IntReal; + pRec->flags |= MEM_Real; + pRec->uTemp = 7; + }else{ + pRec->uTemp = 6; + } + } + }else if( pRec->flags & MEM_Real ){ + nHdr++; + nData += 8; + pRec->uTemp = 7; + }else{ + assert( db->mallocFailed || pRec->flags&(MEM_Str|MEM_Blob) ); + assert( pRec->n>=0 ); + len = (u32)pRec->n; + serial_type = (len*2) + 12 + ((pRec->flags & MEM_Str)!=0); + if( pRec->flags & MEM_Zero ){ + serial_type += pRec->u.nZero*2; + if( nData ){ + if( sqlite3VdbeMemExpandBlob(pRec) ) goto no_mem; + len += pRec->u.nZero; + }else{ + nZero += pRec->u.nZero; + } + } + nData += len; + nHdr += sqlite3VarintLen(serial_type); + pRec->uTemp = serial_type; + } + if( pRec==pData0 ) break; + pRec--; + }while(1); + + /* EVIDENCE-OF: R-22564-11647 The header begins with a single varint + ** which determines the total number of bytes in the header. The varint + ** value is the size of the header in bytes including the size varint + ** itself. */ + testcase( nHdr==126 ); + testcase( nHdr==127 ); + if( nHdr<=126 ){ + /* The common case */ + nHdr += 1; + }else{ + /* Rare case of a really large header */ + nVarint = sqlite3VarintLen(nHdr); + nHdr += nVarint; + if( nVarintp3) is not allowed to + ** be one of the input registers (because the following call to + ** sqlite3VdbeMemClearAndResize() could clobber the value before it is used). + */ + if( nByte+nZero<=pOut->szMalloc ){ + /* The output register is already large enough to hold the record. + ** No error checks or buffer enlargement is required */ + pOut->z = pOut->zMalloc; + }else{ + /* Need to make sure that the output is not too big and then enlarge + ** the output register to hold the full result */ + if( nByte+nZero>db->aLimit[SQLITE_LIMIT_LENGTH] ){ + goto too_big; + } + if( sqlite3VdbeMemClearAndResize(pOut, (int)nByte) ){ + goto no_mem; + } + } + pOut->n = (int)nByte; + pOut->flags = MEM_Blob; + if( nZero ){ + pOut->u.nZero = nZero; + pOut->flags |= MEM_Zero; + } + UPDATE_MAX_BLOBSIZE(pOut); + zHdr = (u8 *)pOut->z; + zPayload = zHdr + nHdr; + + /* Write the record */ + zHdr += putVarint32(zHdr, nHdr); + assert( pData0<=pLast ); + pRec = pData0; + do{ + serial_type = pRec->uTemp; + /* EVIDENCE-OF: R-06529-47362 Following the size varint are one or more + ** additional varints, one per column. */ + zHdr += putVarint32(zHdr, serial_type); /* serial type */ + /* EVIDENCE-OF: R-64536-51728 The values for each column in the record + ** immediately follow the header. */ + zPayload += sqlite3VdbeSerialPut(zPayload, pRec, serial_type); /* content */ + }while( (++pRec)<=pLast ); + assert( nHdr==(int)(zHdr - (u8*)pOut->z) ); + assert( nByte==(int)(zPayload - (u8*)pOut->z) ); + + assert( pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor) ); + REGISTER_TRACE(pOp->p3, pOut); + break; +} + +/* Opcode: Count P1 P2 P3 * * +** Synopsis: r[P2]=count() +** +** Store the number of entries (an integer value) in the table or index +** opened by cursor P1 in register P2. +** +** If P3==0, then an exact count is obtained, which involves visiting +** every btree page of the table. But if P3 is non-zero, an estimate +** is returned based on the current cursor position. +*/ +case OP_Count: { /* out2 */ + i64 nEntry; + BtCursor *pCrsr; + + assert( p->apCsr[pOp->p1]->eCurType==CURTYPE_BTREE ); + pCrsr = p->apCsr[pOp->p1]->uc.pCursor; + assert( pCrsr ); + if( pOp->p3 ){ + nEntry = sqlite3BtreeRowCountEst(pCrsr); + }else{ + nEntry = 0; /* Not needed. Only used to silence a warning. */ + rc = sqlite3BtreeCount(db, pCrsr, &nEntry); + if( rc ) goto abort_due_to_error; + } + pOut = out2Prerelease(p, pOp); + pOut->u.i = nEntry; + goto check_for_interrupt; +} + +/* Opcode: Savepoint P1 * * P4 * +** +** Open, release or rollback the savepoint named by parameter P4, depending +** on the value of P1. To open a new savepoint set P1==0 (SAVEPOINT_BEGIN). +** To release (commit) an existing savepoint set P1==1 (SAVEPOINT_RELEASE). +** To rollback an existing savepoint set P1==2 (SAVEPOINT_ROLLBACK). +*/ +case OP_Savepoint: { + int p1; /* Value of P1 operand */ + char *zName; /* Name of savepoint */ + int nName; + Savepoint *pNew; + Savepoint *pSavepoint; + Savepoint *pTmp; + int iSavepoint; + int ii; + + p1 = pOp->p1; + zName = pOp->p4.z; + + /* Assert that the p1 parameter is valid. Also that if there is no open + ** transaction, then there cannot be any savepoints. + */ + assert( db->pSavepoint==0 || db->autoCommit==0 ); + assert( p1==SAVEPOINT_BEGIN||p1==SAVEPOINT_RELEASE||p1==SAVEPOINT_ROLLBACK ); + assert( db->pSavepoint || db->isTransactionSavepoint==0 ); + assert( checkSavepointCount(db) ); + assert( p->bIsReader ); + + if( p1==SAVEPOINT_BEGIN ){ + if( db->nVdbeWrite>0 ){ + /* A new savepoint cannot be created if there are active write + ** statements (i.e. open read/write incremental blob handles). + */ + sqlite3VdbeError(p, "cannot open savepoint - SQL statements in progress"); + rc = SQLITE_BUSY; + }else{ + nName = sqlite3Strlen30(zName); + +#ifndef SQLITE_OMIT_VIRTUALTABLE + /* This call is Ok even if this savepoint is actually a transaction + ** savepoint (and therefore should not prompt xSavepoint()) callbacks. + ** If this is a transaction savepoint being opened, it is guaranteed + ** that the db->aVTrans[] array is empty. */ + assert( db->autoCommit==0 || db->nVTrans==0 ); + rc = sqlite3VtabSavepoint(db, SAVEPOINT_BEGIN, + db->nStatement+db->nSavepoint); + if( rc!=SQLITE_OK ) goto abort_due_to_error; +#endif + + /* Create a new savepoint structure. */ + pNew = sqlite3DbMallocRawNN(db, sizeof(Savepoint)+nName+1); + if( pNew ){ + pNew->zName = (char *)&pNew[1]; + memcpy(pNew->zName, zName, nName+1); + + /* If there is no open transaction, then mark this as a special + ** "transaction savepoint". */ + if( db->autoCommit ){ + db->autoCommit = 0; + db->isTransactionSavepoint = 1; + }else{ + db->nSavepoint++; + } + + /* Link the new savepoint into the database handle's list. */ + pNew->pNext = db->pSavepoint; + db->pSavepoint = pNew; + pNew->nDeferredCons = db->nDeferredCons; + pNew->nDeferredImmCons = db->nDeferredImmCons; + } + } + }else{ + assert( p1==SAVEPOINT_RELEASE || p1==SAVEPOINT_ROLLBACK ); + iSavepoint = 0; + + /* Find the named savepoint. If there is no such savepoint, then an + ** an error is returned to the user. */ + for( + pSavepoint = db->pSavepoint; + pSavepoint && sqlite3StrICmp(pSavepoint->zName, zName); + pSavepoint = pSavepoint->pNext + ){ + iSavepoint++; + } + if( !pSavepoint ){ + sqlite3VdbeError(p, "no such savepoint: %s", zName); + rc = SQLITE_ERROR; + }else if( db->nVdbeWrite>0 && p1==SAVEPOINT_RELEASE ){ + /* It is not possible to release (commit) a savepoint if there are + ** active write statements. + */ + sqlite3VdbeError(p, "cannot release savepoint - " + "SQL statements in progress"); + rc = SQLITE_BUSY; + }else{ + + /* Determine whether or not this is a transaction savepoint. If so, + ** and this is a RELEASE command, then the current transaction + ** is committed. + */ + int isTransaction = pSavepoint->pNext==0 && db->isTransactionSavepoint; + if( isTransaction && p1==SAVEPOINT_RELEASE ){ + if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){ + goto vdbe_return; + } + db->autoCommit = 1; + if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ + p->pc = (int)(pOp - aOp); + db->autoCommit = 0; + p->rc = rc = SQLITE_BUSY; + goto vdbe_return; + } + rc = p->rc; + if( rc ){ + db->autoCommit = 0; + }else{ + db->isTransactionSavepoint = 0; + } + }else{ + int isSchemaChange; + iSavepoint = db->nSavepoint - iSavepoint - 1; + if( p1==SAVEPOINT_ROLLBACK ){ + isSchemaChange = (db->mDbFlags & DBFLAG_SchemaChange)!=0; + for(ii=0; iinDb; ii++){ + rc = sqlite3BtreeTripAllCursors(db->aDb[ii].pBt, + SQLITE_ABORT_ROLLBACK, + isSchemaChange==0); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + } + }else{ + assert( p1==SAVEPOINT_RELEASE ); + isSchemaChange = 0; + } + for(ii=0; iinDb; ii++){ + rc = sqlite3BtreeSavepoint(db->aDb[ii].pBt, p1, iSavepoint); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } + } + if( isSchemaChange ){ + sqlite3ExpirePreparedStatements(db, 0); + sqlite3ResetAllSchemasOfConnection(db); + db->mDbFlags |= DBFLAG_SchemaChange; + } + } + if( rc ) goto abort_due_to_error; + + /* Regardless of whether this is a RELEASE or ROLLBACK, destroy all + ** savepoints nested inside of the savepoint being operated on. */ + while( db->pSavepoint!=pSavepoint ){ + pTmp = db->pSavepoint; + db->pSavepoint = pTmp->pNext; + sqlite3DbFree(db, pTmp); + db->nSavepoint--; + } + + /* If it is a RELEASE, then destroy the savepoint being operated on + ** too. If it is a ROLLBACK TO, then set the number of deferred + ** constraint violations present in the database to the value stored + ** when the savepoint was created. */ + if( p1==SAVEPOINT_RELEASE ){ + assert( pSavepoint==db->pSavepoint ); + db->pSavepoint = pSavepoint->pNext; + sqlite3DbFree(db, pSavepoint); + if( !isTransaction ){ + db->nSavepoint--; + } + }else{ + assert( p1==SAVEPOINT_ROLLBACK ); + db->nDeferredCons = pSavepoint->nDeferredCons; + db->nDeferredImmCons = pSavepoint->nDeferredImmCons; + } + + if( !isTransaction || p1==SAVEPOINT_ROLLBACK ){ + rc = sqlite3VtabSavepoint(db, p1, iSavepoint); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + } + } + } + if( rc ) goto abort_due_to_error; + + break; +} + +/* Opcode: AutoCommit P1 P2 * * * +** +** Set the database auto-commit flag to P1 (1 or 0). If P2 is true, roll +** back any currently active btree transactions. If there are any active +** VMs (apart from this one), then a ROLLBACK fails. A COMMIT fails if +** there are active writing VMs or active VMs that use shared cache. +** +** This instruction causes the VM to halt. +*/ +case OP_AutoCommit: { + int desiredAutoCommit; + int iRollback; + + desiredAutoCommit = pOp->p1; + iRollback = pOp->p2; + assert( desiredAutoCommit==1 || desiredAutoCommit==0 ); + assert( desiredAutoCommit==1 || iRollback==0 ); + assert( db->nVdbeActive>0 ); /* At least this one VM is active */ + assert( p->bIsReader ); + + if( desiredAutoCommit!=db->autoCommit ){ + if( iRollback ){ + assert( desiredAutoCommit==1 ); + sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); + db->autoCommit = 1; + }else if( desiredAutoCommit && db->nVdbeWrite>0 ){ + /* If this instruction implements a COMMIT and other VMs are writing + ** return an error indicating that the other VMs must complete first. + */ + sqlite3VdbeError(p, "cannot commit transaction - " + "SQL statements in progress"); + rc = SQLITE_BUSY; + goto abort_due_to_error; + }else if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){ + goto vdbe_return; + }else{ + db->autoCommit = (u8)desiredAutoCommit; + } + if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ + p->pc = (int)(pOp - aOp); + db->autoCommit = (u8)(1-desiredAutoCommit); + p->rc = rc = SQLITE_BUSY; + goto vdbe_return; + } + sqlite3CloseSavepoints(db); + if( p->rc==SQLITE_OK ){ + rc = SQLITE_DONE; + }else{ + rc = SQLITE_ERROR; + } + goto vdbe_return; + }else{ + sqlite3VdbeError(p, + (!desiredAutoCommit)?"cannot start a transaction within a transaction":( + (iRollback)?"cannot rollback - no transaction is active": + "cannot commit - no transaction is active")); + + rc = SQLITE_ERROR; + goto abort_due_to_error; + } + /*NOTREACHED*/ assert(0); +} + +/* Opcode: Transaction P1 P2 P3 P4 P5 +** +** Begin a transaction on database P1 if a transaction is not already +** active. +** If P2 is non-zero, then a write-transaction is started, or if a +** read-transaction is already active, it is upgraded to a write-transaction. +** If P2 is zero, then a read-transaction is started. If P2 is 2 or more +** then an exclusive transaction is started. +** +** P1 is the index of the database file on which the transaction is +** started. Index 0 is the main database file and index 1 is the +** file used for temporary tables. Indices of 2 or more are used for +** attached databases. +** +** If a write-transaction is started and the Vdbe.usesStmtJournal flag is +** true (this flag is set if the Vdbe may modify more than one row and may +** throw an ABORT exception), a statement transaction may also be opened. +** More specifically, a statement transaction is opened iff the database +** connection is currently not in autocommit mode, or if there are other +** active statements. A statement transaction allows the changes made by this +** VDBE to be rolled back after an error without having to roll back the +** entire transaction. If no error is encountered, the statement transaction +** will automatically commit when the VDBE halts. +** +** If P5!=0 then this opcode also checks the schema cookie against P3 +** and the schema generation counter against P4. +** The cookie changes its value whenever the database schema changes. +** This operation is used to detect when that the cookie has changed +** and that the current process needs to reread the schema. If the schema +** cookie in P3 differs from the schema cookie in the database header or +** if the schema generation counter in P4 differs from the current +** generation counter, then an SQLITE_SCHEMA error is raised and execution +** halts. The sqlite3_step() wrapper function might then reprepare the +** statement and rerun it from the beginning. +*/ +case OP_Transaction: { + Btree *pBt; + int iMeta = 0; + + assert( p->bIsReader ); + assert( p->readOnly==0 || pOp->p2==0 ); + assert( pOp->p2>=0 && pOp->p2<=2 ); + assert( pOp->p1>=0 && pOp->p1nDb ); + assert( DbMaskTest(p->btreeMask, pOp->p1) ); + assert( rc==SQLITE_OK ); + if( pOp->p2 && (db->flags & (SQLITE_QueryOnly|SQLITE_CorruptRdOnly))!=0 ){ + if( db->flags & SQLITE_QueryOnly ){ + /* Writes prohibited by the "PRAGMA query_only=TRUE" statement */ + rc = SQLITE_READONLY; + }else{ + /* Writes prohibited due to a prior SQLITE_CORRUPT in the current + ** transaction */ + rc = SQLITE_CORRUPT; + } + goto abort_due_to_error; + } + pBt = db->aDb[pOp->p1].pBt; + + if( pBt ){ + rc = sqlite3BtreeBeginTrans(pBt, pOp->p2, &iMeta); + testcase( rc==SQLITE_BUSY_SNAPSHOT ); + testcase( rc==SQLITE_BUSY_RECOVERY ); + if( rc!=SQLITE_OK ){ + if( (rc&0xff)==SQLITE_BUSY ){ + p->pc = (int)(pOp - aOp); + p->rc = rc; + goto vdbe_return; + } + goto abort_due_to_error; + } + + if( p->usesStmtJournal + && pOp->p2 + && (db->autoCommit==0 || db->nVdbeRead>1) + ){ + assert( sqlite3BtreeTxnState(pBt)==SQLITE_TXN_WRITE ); + if( p->iStatement==0 ){ + assert( db->nStatement>=0 && db->nSavepoint>=0 ); + db->nStatement++; + p->iStatement = db->nSavepoint + db->nStatement; + } + + rc = sqlite3VtabSavepoint(db, SAVEPOINT_BEGIN, p->iStatement-1); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeBeginStmt(pBt, p->iStatement); + } + + /* Store the current value of the database handles deferred constraint + ** counter. If the statement transaction needs to be rolled back, + ** the value of this counter needs to be restored too. */ + p->nStmtDefCons = db->nDeferredCons; + p->nStmtDefImmCons = db->nDeferredImmCons; + } + } + assert( pOp->p5==0 || pOp->p4type==P4_INT32 ); + if( rc==SQLITE_OK + && pOp->p5 + && (iMeta!=pOp->p3 + || db->aDb[pOp->p1].pSchema->iGeneration!=pOp->p4.i) + ){ + /* + ** IMPLEMENTATION-OF: R-03189-51135 As each SQL statement runs, the schema + ** version is checked to ensure that the schema has not changed since the + ** SQL statement was prepared. + */ + sqlite3DbFree(db, p->zErrMsg); + p->zErrMsg = sqlite3DbStrDup(db, "database schema has changed"); + /* If the schema-cookie from the database file matches the cookie + ** stored with the in-memory representation of the schema, do + ** not reload the schema from the database file. + ** + ** If virtual-tables are in use, this is not just an optimization. + ** Often, v-tables store their data in other SQLite tables, which + ** are queried from within xNext() and other v-table methods using + ** prepared queries. If such a query is out-of-date, we do not want to + ** discard the database schema, as the user code implementing the + ** v-table would have to be ready for the sqlite3_vtab structure itself + ** to be invalidated whenever sqlite3_step() is called from within + ** a v-table method. + */ + if( db->aDb[pOp->p1].pSchema->schema_cookie!=iMeta ){ + sqlite3ResetOneSchema(db, pOp->p1); + } + p->expired = 1; + rc = SQLITE_SCHEMA; + } + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: ReadCookie P1 P2 P3 * * +** +** Read cookie number P3 from database P1 and write it into register P2. +** P3==1 is the schema version. P3==2 is the database format. +** P3==3 is the recommended pager cache size, and so forth. P1==0 is +** the main database file and P1==1 is the database file used to store +** temporary tables. +** +** There must be a read-lock on the database (either a transaction +** must be started or there must be an open cursor) before +** executing this instruction. +*/ +case OP_ReadCookie: { /* out2 */ + int iMeta; + int iDb; + int iCookie; + + assert( p->bIsReader ); + iDb = pOp->p1; + iCookie = pOp->p3; + assert( pOp->p3=0 && iDbnDb ); + assert( db->aDb[iDb].pBt!=0 ); + assert( DbMaskTest(p->btreeMask, iDb) ); + + sqlite3BtreeGetMeta(db->aDb[iDb].pBt, iCookie, (u32 *)&iMeta); + pOut = out2Prerelease(p, pOp); + pOut->u.i = iMeta; + break; +} + +/* Opcode: SetCookie P1 P2 P3 * P5 +** +** Write the integer value P3 into cookie number P2 of database P1. +** P2==1 is the schema version. P2==2 is the database format. +** P2==3 is the recommended pager cache +** size, and so forth. P1==0 is the main database file and P1==1 is the +** database file used to store temporary tables. +** +** A transaction must be started before executing this opcode. +** +** If P2 is the SCHEMA_VERSION cookie (cookie number 1) then the internal +** schema version is set to P3-P5. The "PRAGMA schema_version=N" statement +** has P5 set to 1, so that the internal schema version will be different +** from the database schema version, resulting in a schema reset. +*/ +case OP_SetCookie: { + Db *pDb; + + sqlite3VdbeIncrWriteCounter(p, 0); + assert( pOp->p2p1>=0 && pOp->p1nDb ); + assert( DbMaskTest(p->btreeMask, pOp->p1) ); + assert( p->readOnly==0 ); + pDb = &db->aDb[pOp->p1]; + assert( pDb->pBt!=0 ); + assert( sqlite3SchemaMutexHeld(db, pOp->p1, 0) ); + /* See note about index shifting on OP_ReadCookie */ + rc = sqlite3BtreeUpdateMeta(pDb->pBt, pOp->p2, pOp->p3); + if( pOp->p2==BTREE_SCHEMA_VERSION ){ + /* When the schema cookie changes, record the new cookie internally */ + pDb->pSchema->schema_cookie = pOp->p3 - pOp->p5; + db->mDbFlags |= DBFLAG_SchemaChange; + sqlite3FkClearTriggerCache(db, pOp->p1); + }else if( pOp->p2==BTREE_FILE_FORMAT ){ + /* Record changes in the file format */ + pDb->pSchema->file_format = pOp->p3; + } + if( pOp->p1==1 ){ + /* Invalidate all prepared statements whenever the TEMP database + ** schema is changed. Ticket #1644 */ + sqlite3ExpirePreparedStatements(db, 0); + p->expired = 0; + } + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: OpenRead P1 P2 P3 P4 P5 +** Synopsis: root=P2 iDb=P3 +** +** Open a read-only cursor for the database table whose root page is +** P2 in a database file. The database file is determined by P3. +** P3==0 means the main database, P3==1 means the database used for +** temporary tables, and P3>1 means used the corresponding attached +** database. Give the new cursor an identifier of P1. The P1 +** values need not be contiguous but all P1 values should be small integers. +** It is an error for P1 to be negative. +** +** Allowed P5 bits: +**
      +**
    • 0x02 OPFLAG_SEEKEQ: This cursor will only be used for +** equality lookups (implemented as a pair of opcodes OP_SeekGE/OP_IdxGT +** of OP_SeekLE/OP_IdxLT) +**
    +** +** The P4 value may be either an integer (P4_INT32) or a pointer to +** a KeyInfo structure (P4_KEYINFO). If it is a pointer to a KeyInfo +** object, then table being opened must be an [index b-tree] where the +** KeyInfo object defines the content and collating +** sequence of that index b-tree. Otherwise, if P4 is an integer +** value, then the table being opened must be a [table b-tree] with a +** number of columns no less than the value of P4. +** +** See also: OpenWrite, ReopenIdx +*/ +/* Opcode: ReopenIdx P1 P2 P3 P4 P5 +** Synopsis: root=P2 iDb=P3 +** +** The ReopenIdx opcode works like OP_OpenRead except that it first +** checks to see if the cursor on P1 is already open on the same +** b-tree and if it is this opcode becomes a no-op. In other words, +** if the cursor is already open, do not reopen it. +** +** The ReopenIdx opcode may only be used with P5==0 or P5==OPFLAG_SEEKEQ +** and with P4 being a P4_KEYINFO object. Furthermore, the P3 value must +** be the same as every other ReopenIdx or OpenRead for the same cursor +** number. +** +** Allowed P5 bits: +**
      +**
    • 0x02 OPFLAG_SEEKEQ: This cursor will only be used for +** equality lookups (implemented as a pair of opcodes OP_SeekGE/OP_IdxGT +** of OP_SeekLE/OP_IdxLT) +**
    +** +** See also: OP_OpenRead, OP_OpenWrite +*/ +/* Opcode: OpenWrite P1 P2 P3 P4 P5 +** Synopsis: root=P2 iDb=P3 +** +** Open a read/write cursor named P1 on the table or index whose root +** page is P2 (or whose root page is held in register P2 if the +** OPFLAG_P2ISREG bit is set in P5 - see below). +** +** The P4 value may be either an integer (P4_INT32) or a pointer to +** a KeyInfo structure (P4_KEYINFO). If it is a pointer to a KeyInfo +** object, then table being opened must be an [index b-tree] where the +** KeyInfo object defines the content and collating +** sequence of that index b-tree. Otherwise, if P4 is an integer +** value, then the table being opened must be a [table b-tree] with a +** number of columns no less than the value of P4. +** +** Allowed P5 bits: +**
      +**
    • 0x02 OPFLAG_SEEKEQ: This cursor will only be used for +** equality lookups (implemented as a pair of opcodes OP_SeekGE/OP_IdxGT +** of OP_SeekLE/OP_IdxLT) +**
    • 0x08 OPFLAG_FORDELETE: This cursor is used only to seek +** and subsequently delete entries in an index btree. This is a +** hint to the storage engine that the storage engine is allowed to +** ignore. The hint is not used by the official SQLite b*tree storage +** engine, but is used by COMDB2. +**
    • 0x10 OPFLAG_P2ISREG: Use the content of register P2 +** as the root page, not the value of P2 itself. +**
    +** +** This instruction works like OpenRead except that it opens the cursor +** in read/write mode. +** +** See also: OP_OpenRead, OP_ReopenIdx +*/ +case OP_ReopenIdx: { + int nField; + KeyInfo *pKeyInfo; + u32 p2; + int iDb; + int wrFlag; + Btree *pX; + VdbeCursor *pCur; + Db *pDb; + + assert( pOp->p5==0 || pOp->p5==OPFLAG_SEEKEQ ); + assert( pOp->p4type==P4_KEYINFO ); + pCur = p->apCsr[pOp->p1]; + if( pCur && pCur->pgnoRoot==(u32)pOp->p2 ){ + assert( pCur->iDb==pOp->p3 ); /* Guaranteed by the code generator */ + assert( pCur->eCurType==CURTYPE_BTREE ); + sqlite3BtreeClearCursor(pCur->uc.pCursor); + goto open_cursor_set_hints; + } + /* If the cursor is not currently open or is open on a different + ** index, then fall through into OP_OpenRead to force a reopen */ +case OP_OpenRead: +case OP_OpenWrite: + + assert( pOp->opcode==OP_OpenWrite || pOp->p5==0 || pOp->p5==OPFLAG_SEEKEQ ); + assert( p->bIsReader ); + assert( pOp->opcode==OP_OpenRead || pOp->opcode==OP_ReopenIdx + || p->readOnly==0 ); + + if( p->expired==1 ){ + rc = SQLITE_ABORT_ROLLBACK; + goto abort_due_to_error; + } + + nField = 0; + pKeyInfo = 0; + p2 = (u32)pOp->p2; + iDb = pOp->p3; + assert( iDb>=0 && iDbnDb ); + assert( DbMaskTest(p->btreeMask, iDb) ); + pDb = &db->aDb[iDb]; + pX = pDb->pBt; + assert( pX!=0 ); + if( pOp->opcode==OP_OpenWrite ){ + assert( OPFLAG_FORDELETE==BTREE_FORDELETE ); + wrFlag = BTREE_WRCSR | (pOp->p5 & OPFLAG_FORDELETE); + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( pDb->pSchema->file_format < p->minWriteFileFormat ){ + p->minWriteFileFormat = pDb->pSchema->file_format; + } + }else{ + wrFlag = 0; + } + if( pOp->p5 & OPFLAG_P2ISREG ){ + assert( p2>0 ); + assert( p2<=(u32)(p->nMem+1 - p->nCursor) ); + assert( pOp->opcode==OP_OpenWrite ); + pIn2 = &aMem[p2]; + assert( memIsValid(pIn2) ); + assert( (pIn2->flags & MEM_Int)!=0 ); + sqlite3VdbeMemIntegerify(pIn2); + p2 = (int)pIn2->u.i; + /* The p2 value always comes from a prior OP_CreateBtree opcode and + ** that opcode will always set the p2 value to 2 or more or else fail. + ** If there were a failure, the prepared statement would have halted + ** before reaching this instruction. */ + assert( p2>=2 ); + } + if( pOp->p4type==P4_KEYINFO ){ + pKeyInfo = pOp->p4.pKeyInfo; + assert( pKeyInfo->enc==ENC(db) ); + assert( pKeyInfo->db==db ); + nField = pKeyInfo->nAllField; + }else if( pOp->p4type==P4_INT32 ){ + nField = pOp->p4.i; + } + assert( pOp->p1>=0 ); + assert( nField>=0 ); + testcase( nField==0 ); /* Table with INTEGER PRIMARY KEY and nothing else */ + pCur = allocateCursor(p, pOp->p1, nField, CURTYPE_BTREE); + if( pCur==0 ) goto no_mem; + pCur->iDb = iDb; + pCur->nullRow = 1; + pCur->isOrdered = 1; + pCur->pgnoRoot = p2; +#ifdef SQLITE_DEBUG + pCur->wrFlag = wrFlag; +#endif + rc = sqlite3BtreeCursor(pX, p2, wrFlag, pKeyInfo, pCur->uc.pCursor); + pCur->pKeyInfo = pKeyInfo; + /* Set the VdbeCursor.isTable variable. Previous versions of + ** SQLite used to check if the root-page flags were sane at this point + ** and report database corruption if they were not, but this check has + ** since moved into the btree layer. */ + pCur->isTable = pOp->p4type!=P4_KEYINFO; + +open_cursor_set_hints: + assert( OPFLAG_BULKCSR==BTREE_BULKLOAD ); + assert( OPFLAG_SEEKEQ==BTREE_SEEK_EQ ); + testcase( pOp->p5 & OPFLAG_BULKCSR ); + testcase( pOp->p2 & OPFLAG_SEEKEQ ); + sqlite3BtreeCursorHintFlags(pCur->uc.pCursor, + (pOp->p5 & (OPFLAG_BULKCSR|OPFLAG_SEEKEQ))); + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: OpenDup P1 P2 * * * +** +** Open a new cursor P1 that points to the same ephemeral table as +** cursor P2. The P2 cursor must have been opened by a prior OP_OpenEphemeral +** opcode. Only ephemeral cursors may be duplicated. +** +** Duplicate ephemeral cursors are used for self-joins of materialized views. +*/ +case OP_OpenDup: { + VdbeCursor *pOrig; /* The original cursor to be duplicated */ + VdbeCursor *pCx; /* The new cursor */ + + pOrig = p->apCsr[pOp->p2]; + assert( pOrig ); + assert( pOrig->isEphemeral ); /* Only ephemeral cursors can be duplicated */ + + pCx = allocateCursor(p, pOp->p1, pOrig->nField, CURTYPE_BTREE); + if( pCx==0 ) goto no_mem; + pCx->nullRow = 1; + pCx->isEphemeral = 1; + pCx->pKeyInfo = pOrig->pKeyInfo; + pCx->isTable = pOrig->isTable; + pCx->pgnoRoot = pOrig->pgnoRoot; + pCx->isOrdered = pOrig->isOrdered; + pCx->ub.pBtx = pOrig->ub.pBtx; + pCx->hasBeenDuped = 1; + pOrig->hasBeenDuped = 1; + rc = sqlite3BtreeCursor(pCx->ub.pBtx, pCx->pgnoRoot, BTREE_WRCSR, + pCx->pKeyInfo, pCx->uc.pCursor); + /* The sqlite3BtreeCursor() routine can only fail for the first cursor + ** opened for a database. Since there is already an open cursor when this + ** opcode is run, the sqlite3BtreeCursor() cannot fail */ + assert( rc==SQLITE_OK ); + break; +} + + +/* Opcode: OpenEphemeral P1 P2 P3 P4 P5 +** Synopsis: nColumn=P2 +** +** Open a new cursor P1 to a transient table. +** The cursor is always opened read/write even if +** the main database is read-only. The ephemeral +** table is deleted automatically when the cursor is closed. +** +** If the cursor P1 is already opened on an ephemeral table, the table +** is cleared (all content is erased). +** +** P2 is the number of columns in the ephemeral table. +** The cursor points to a BTree table if P4==0 and to a BTree index +** if P4 is not 0. If P4 is not NULL, it points to a KeyInfo structure +** that defines the format of keys in the index. +** +** The P5 parameter can be a mask of the BTREE_* flags defined +** in btree.h. These flags control aspects of the operation of +** the btree. The BTREE_OMIT_JOURNAL and BTREE_SINGLE flags are +** added automatically. +** +** If P3 is positive, then reg[P3] is modified slightly so that it +** can be used as zero-length data for OP_Insert. This is an optimization +** that avoids an extra OP_Blob opcode to initialize that register. +*/ +/* Opcode: OpenAutoindex P1 P2 * P4 * +** Synopsis: nColumn=P2 +** +** This opcode works the same as OP_OpenEphemeral. It has a +** different name to distinguish its use. Tables created using +** by this opcode will be used for automatically created transient +** indices in joins. +*/ +case OP_OpenAutoindex: +case OP_OpenEphemeral: { + VdbeCursor *pCx; + KeyInfo *pKeyInfo; + + static const int vfsFlags = + SQLITE_OPEN_READWRITE | + SQLITE_OPEN_CREATE | + SQLITE_OPEN_EXCLUSIVE | + SQLITE_OPEN_DELETEONCLOSE | + SQLITE_OPEN_TRANSIENT_DB; + assert( pOp->p1>=0 ); + assert( pOp->p2>=0 ); + if( pOp->p3>0 ){ + /* Make register reg[P3] into a value that can be used as the data + ** form sqlite3BtreeInsert() where the length of the data is zero. */ + assert( pOp->p2==0 ); /* Only used when number of columns is zero */ + assert( pOp->opcode==OP_OpenEphemeral ); + assert( aMem[pOp->p3].flags & MEM_Null ); + aMem[pOp->p3].n = 0; + aMem[pOp->p3].z = ""; + } + pCx = p->apCsr[pOp->p1]; + if( pCx && !pCx->hasBeenDuped && ALWAYS(pOp->p2<=pCx->nField) ){ + /* If the ephermeral table is already open and has no duplicates from + ** OP_OpenDup, then erase all existing content so that the table is + ** empty again, rather than creating a new table. */ + assert( pCx->isEphemeral ); + pCx->seqCount = 0; + pCx->cacheStatus = CACHE_STALE; + rc = sqlite3BtreeClearTable(pCx->ub.pBtx, pCx->pgnoRoot, 0); + }else{ + pCx = allocateCursor(p, pOp->p1, pOp->p2, CURTYPE_BTREE); + if( pCx==0 ) goto no_mem; + pCx->isEphemeral = 1; + rc = sqlite3BtreeOpen(db->pVfs, 0, db, &pCx->ub.pBtx, + BTREE_OMIT_JOURNAL | BTREE_SINGLE | pOp->p5, + vfsFlags); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeBeginTrans(pCx->ub.pBtx, 1, 0); + if( rc==SQLITE_OK ){ + /* If a transient index is required, create it by calling + ** sqlite3BtreeCreateTable() with the BTREE_BLOBKEY flag before + ** opening it. If a transient table is required, just use the + ** automatically created table with root-page 1 (an BLOB_INTKEY table). + */ + if( (pCx->pKeyInfo = pKeyInfo = pOp->p4.pKeyInfo)!=0 ){ + assert( pOp->p4type==P4_KEYINFO ); + rc = sqlite3BtreeCreateTable(pCx->ub.pBtx, &pCx->pgnoRoot, + BTREE_BLOBKEY | pOp->p5); + if( rc==SQLITE_OK ){ + assert( pCx->pgnoRoot==SCHEMA_ROOT+1 ); + assert( pKeyInfo->db==db ); + assert( pKeyInfo->enc==ENC(db) ); + rc = sqlite3BtreeCursor(pCx->ub.pBtx, pCx->pgnoRoot, BTREE_WRCSR, + pKeyInfo, pCx->uc.pCursor); + } + pCx->isTable = 0; + }else{ + pCx->pgnoRoot = SCHEMA_ROOT; + rc = sqlite3BtreeCursor(pCx->ub.pBtx, SCHEMA_ROOT, BTREE_WRCSR, + 0, pCx->uc.pCursor); + pCx->isTable = 1; + } + } + pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); + if( rc ){ + sqlite3BtreeClose(pCx->ub.pBtx); + } + } + } + if( rc ) goto abort_due_to_error; + pCx->nullRow = 1; + break; +} + +/* Opcode: SorterOpen P1 P2 P3 P4 * +** +** This opcode works like OP_OpenEphemeral except that it opens +** a transient index that is specifically designed to sort large +** tables using an external merge-sort algorithm. +** +** If argument P3 is non-zero, then it indicates that the sorter may +** assume that a stable sort considering the first P3 fields of each +** key is sufficient to produce the required results. +*/ +case OP_SorterOpen: { + VdbeCursor *pCx; + + assert( pOp->p1>=0 ); + assert( pOp->p2>=0 ); + pCx = allocateCursor(p, pOp->p1, pOp->p2, CURTYPE_SORTER); + if( pCx==0 ) goto no_mem; + pCx->pKeyInfo = pOp->p4.pKeyInfo; + assert( pCx->pKeyInfo->db==db ); + assert( pCx->pKeyInfo->enc==ENC(db) ); + rc = sqlite3VdbeSorterInit(db, pOp->p3, pCx); + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: SequenceTest P1 P2 * * * +** Synopsis: if( cursor[P1].ctr++ ) pc = P2 +** +** P1 is a sorter cursor. If the sequence counter is currently zero, jump +** to P2. Regardless of whether or not the jump is taken, increment the +** the sequence value. +*/ +case OP_SequenceTest: { + VdbeCursor *pC; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( isSorter(pC) ); + if( (pC->seqCount++)==0 ){ + goto jump_to_p2; + } + break; +} + +/* Opcode: OpenPseudo P1 P2 P3 * * +** Synopsis: P3 columns in r[P2] +** +** Open a new cursor that points to a fake table that contains a single +** row of data. The content of that one row is the content of memory +** register P2. In other words, cursor P1 becomes an alias for the +** MEM_Blob content contained in register P2. +** +** A pseudo-table created by this opcode is used to hold a single +** row output from the sorter so that the row can be decomposed into +** individual columns using the OP_Column opcode. The OP_Column opcode +** is the only cursor opcode that works with a pseudo-table. +** +** P3 is the number of fields in the records that will be stored by +** the pseudo-table. +*/ +case OP_OpenPseudo: { + VdbeCursor *pCx; + + assert( pOp->p1>=0 ); + assert( pOp->p3>=0 ); + pCx = allocateCursor(p, pOp->p1, pOp->p3, CURTYPE_PSEUDO); + if( pCx==0 ) goto no_mem; + pCx->nullRow = 1; + pCx->seekResult = pOp->p2; + pCx->isTable = 1; + /* Give this pseudo-cursor a fake BtCursor pointer so that pCx + ** can be safely passed to sqlite3VdbeCursorMoveto(). This avoids a test + ** for pCx->eCurType==CURTYPE_BTREE inside of sqlite3VdbeCursorMoveto() + ** which is a performance optimization */ + pCx->uc.pCursor = sqlite3BtreeFakeValidCursor(); + assert( pOp->p5==0 ); + break; +} + +/* Opcode: Close P1 * * * * +** +** Close a cursor previously opened as P1. If P1 is not +** currently open, this instruction is a no-op. +*/ +case OP_Close: { + assert( pOp->p1>=0 && pOp->p1nCursor ); + sqlite3VdbeFreeCursor(p, p->apCsr[pOp->p1]); + p->apCsr[pOp->p1] = 0; + break; +} + +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK +/* Opcode: ColumnsUsed P1 * * P4 * +** +** This opcode (which only exists if SQLite was compiled with +** SQLITE_ENABLE_COLUMN_USED_MASK) identifies which columns of the +** table or index for cursor P1 are used. P4 is a 64-bit integer +** (P4_INT64) in which the first 63 bits are one for each of the +** first 63 columns of the table or index that are actually used +** by the cursor. The high-order bit is set if any column after +** the 64th is used. +*/ +case OP_ColumnsUsed: { + VdbeCursor *pC; + pC = p->apCsr[pOp->p1]; + assert( pC->eCurType==CURTYPE_BTREE ); + pC->maskUsed = *(u64*)pOp->p4.pI64; + break; +} +#endif + +/* Opcode: SeekGE P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), +** use the value in register P3 as the key. If cursor P1 refers +** to an SQL index, then P3 is the first in an array of P4 registers +** that are used as an unpacked index key. +** +** Reposition cursor P1 so that it points to the smallest entry that +** is greater than or equal to the key value. If there are no records +** greater than or equal to the key and P2 is not zero, then jump to P2. +** +** If the cursor P1 was opened using the OPFLAG_SEEKEQ flag, then this +** opcode will either land on a record that exactly matches the key, or +** else it will cause a jump to P2. When the cursor is OPFLAG_SEEKEQ, +** this opcode must be followed by an IdxLE opcode with the same arguments. +** The IdxGT opcode will be skipped if this opcode succeeds, but the +** IdxGT opcode will be used on subsequent loop iterations. The +** OPFLAG_SEEKEQ flags is a hint to the btree layer to say that this +** is an equality search. +** +** This opcode leaves the cursor configured to move in forward order, +** from the beginning toward the end. In other words, the cursor is +** configured to use Next, not Prev. +** +** See also: Found, NotFound, SeekLt, SeekGt, SeekLe +*/ +/* Opcode: SeekGT P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), +** use the value in register P3 as a key. If cursor P1 refers +** to an SQL index, then P3 is the first in an array of P4 registers +** that are used as an unpacked index key. +** +** Reposition cursor P1 so that it points to the smallest entry that +** is greater than the key value. If there are no records greater than +** the key and P2 is not zero, then jump to P2. +** +** This opcode leaves the cursor configured to move in forward order, +** from the beginning toward the end. In other words, the cursor is +** configured to use Next, not Prev. +** +** See also: Found, NotFound, SeekLt, SeekGe, SeekLe +*/ +/* Opcode: SeekLT P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), +** use the value in register P3 as a key. If cursor P1 refers +** to an SQL index, then P3 is the first in an array of P4 registers +** that are used as an unpacked index key. +** +** Reposition cursor P1 so that it points to the largest entry that +** is less than the key value. If there are no records less than +** the key and P2 is not zero, then jump to P2. +** +** This opcode leaves the cursor configured to move in reverse order, +** from the end toward the beginning. In other words, the cursor is +** configured to use Prev, not Next. +** +** See also: Found, NotFound, SeekGt, SeekGe, SeekLe +*/ +/* Opcode: SeekLE P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), +** use the value in register P3 as a key. If cursor P1 refers +** to an SQL index, then P3 is the first in an array of P4 registers +** that are used as an unpacked index key. +** +** Reposition cursor P1 so that it points to the largest entry that +** is less than or equal to the key value. If there are no records +** less than or equal to the key and P2 is not zero, then jump to P2. +** +** This opcode leaves the cursor configured to move in reverse order, +** from the end toward the beginning. In other words, the cursor is +** configured to use Prev, not Next. +** +** If the cursor P1 was opened using the OPFLAG_SEEKEQ flag, then this +** opcode will either land on a record that exactly matches the key, or +** else it will cause a jump to P2. When the cursor is OPFLAG_SEEKEQ, +** this opcode must be followed by an IdxLE opcode with the same arguments. +** The IdxGE opcode will be skipped if this opcode succeeds, but the +** IdxGE opcode will be used on subsequent loop iterations. The +** OPFLAG_SEEKEQ flags is a hint to the btree layer to say that this +** is an equality search. +** +** See also: Found, NotFound, SeekGt, SeekGe, SeekLt +*/ +case OP_SeekLT: /* jump, in3, group */ +case OP_SeekLE: /* jump, in3, group */ +case OP_SeekGE: /* jump, in3, group */ +case OP_SeekGT: { /* jump, in3, group */ + int res; /* Comparison result */ + int oc; /* Opcode */ + VdbeCursor *pC; /* The cursor to seek */ + UnpackedRecord r; /* The key to seek for */ + int nField; /* Number of columns or fields in the key */ + i64 iKey; /* The rowid we are to seek to */ + int eqOnly; /* Only interested in == results */ + + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( pOp->p2!=0 ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( OP_SeekLE == OP_SeekLT+1 ); + assert( OP_SeekGE == OP_SeekLT+2 ); + assert( OP_SeekGT == OP_SeekLT+3 ); + assert( pC->isOrdered ); + assert( pC->uc.pCursor!=0 ); + oc = pOp->opcode; + eqOnly = 0; + pC->nullRow = 0; +#ifdef SQLITE_DEBUG + pC->seekOp = pOp->opcode; +#endif + + pC->deferredMoveto = 0; + pC->cacheStatus = CACHE_STALE; + if( pC->isTable ){ + u16 flags3, newType; + /* The OPFLAG_SEEKEQ/BTREE_SEEK_EQ flag is only set on index cursors */ + assert( sqlite3BtreeCursorHasHint(pC->uc.pCursor, BTREE_SEEK_EQ)==0 + || CORRUPT_DB ); + + /* The input value in P3 might be of any type: integer, real, string, + ** blob, or NULL. But it needs to be an integer before we can do + ** the seek, so convert it. */ + pIn3 = &aMem[pOp->p3]; + flags3 = pIn3->flags; + if( (flags3 & (MEM_Int|MEM_Real|MEM_IntReal|MEM_Str))==MEM_Str ){ + applyNumericAffinity(pIn3, 0); + } + iKey = sqlite3VdbeIntValue(pIn3); /* Get the integer key value */ + newType = pIn3->flags; /* Record the type after applying numeric affinity */ + pIn3->flags = flags3; /* But convert the type back to its original */ + + /* If the P3 value could not be converted into an integer without + ** loss of information, then special processing is required... */ + if( (newType & (MEM_Int|MEM_IntReal))==0 ){ + int c; + if( (newType & MEM_Real)==0 ){ + if( (newType & MEM_Null) || oc>=OP_SeekGE ){ + VdbeBranchTaken(1,2); + goto jump_to_p2; + }else{ + rc = sqlite3BtreeLast(pC->uc.pCursor, &res); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + goto seek_not_found; + } + } + c = sqlite3IntFloatCompare(iKey, pIn3->u.r); + + /* If the approximation iKey is larger than the actual real search + ** term, substitute >= for > and < for <=. e.g. if the search term + ** is 4.9 and the integer approximation 5: + ** + ** (x > 4.9) -> (x >= 5) + ** (x <= 4.9) -> (x < 5) + */ + if( c>0 ){ + assert( OP_SeekGE==(OP_SeekGT-1) ); + assert( OP_SeekLT==(OP_SeekLE-1) ); + assert( (OP_SeekLE & 0x0001)==(OP_SeekGT & 0x0001) ); + if( (oc & 0x0001)==(OP_SeekGT & 0x0001) ) oc--; + } + + /* If the approximation iKey is smaller than the actual real search + ** term, substitute <= for < and > for >=. */ + else if( c<0 ){ + assert( OP_SeekLE==(OP_SeekLT+1) ); + assert( OP_SeekGT==(OP_SeekGE+1) ); + assert( (OP_SeekLT & 0x0001)==(OP_SeekGE & 0x0001) ); + if( (oc & 0x0001)==(OP_SeekLT & 0x0001) ) oc++; + } + } + rc = sqlite3BtreeTableMoveto(pC->uc.pCursor, (u64)iKey, 0, &res); + pC->movetoTarget = iKey; /* Used by OP_Delete */ + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } + }else{ + /* For a cursor with the OPFLAG_SEEKEQ/BTREE_SEEK_EQ hint, only the + ** OP_SeekGE and OP_SeekLE opcodes are allowed, and these must be + ** immediately followed by an OP_IdxGT or OP_IdxLT opcode, respectively, + ** with the same key. + */ + if( sqlite3BtreeCursorHasHint(pC->uc.pCursor, BTREE_SEEK_EQ) ){ + eqOnly = 1; + assert( pOp->opcode==OP_SeekGE || pOp->opcode==OP_SeekLE ); + assert( pOp[1].opcode==OP_IdxLT || pOp[1].opcode==OP_IdxGT ); + assert( pOp->opcode==OP_SeekGE || pOp[1].opcode==OP_IdxLT ); + assert( pOp->opcode==OP_SeekLE || pOp[1].opcode==OP_IdxGT ); + assert( pOp[1].p1==pOp[0].p1 ); + assert( pOp[1].p2==pOp[0].p2 ); + assert( pOp[1].p3==pOp[0].p3 ); + assert( pOp[1].p4.i==pOp[0].p4.i ); + } + + nField = pOp->p4.i; + assert( pOp->p4type==P4_INT32 ); + assert( nField>0 ); + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)nField; + + /* The next line of code computes as follows, only faster: + ** if( oc==OP_SeekGT || oc==OP_SeekLE ){ + ** r.default_rc = -1; + ** }else{ + ** r.default_rc = +1; + ** } + */ + r.default_rc = ((1 & (oc - OP_SeekLT)) ? -1 : +1); + assert( oc!=OP_SeekGT || r.default_rc==-1 ); + assert( oc!=OP_SeekLE || r.default_rc==-1 ); + assert( oc!=OP_SeekGE || r.default_rc==+1 ); + assert( oc!=OP_SeekLT || r.default_rc==+1 ); + + r.aMem = &aMem[pOp->p3]; +#ifdef SQLITE_DEBUG + { int i; for(i=0; iuc.pCursor, &r, &res); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } + if( eqOnly && r.eqSeen==0 ){ + assert( res!=0 ); + goto seek_not_found; + } + } +#ifdef SQLITE_TEST + sqlite3_search_count++; +#endif + if( oc>=OP_SeekGE ){ assert( oc==OP_SeekGE || oc==OP_SeekGT ); + if( res<0 || (res==0 && oc==OP_SeekGT) ){ + res = 0; + rc = sqlite3BtreeNext(pC->uc.pCursor, 0); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + res = 1; + }else{ + goto abort_due_to_error; + } + } + }else{ + res = 0; + } + }else{ + assert( oc==OP_SeekLT || oc==OP_SeekLE ); + if( res>0 || (res==0 && oc==OP_SeekLT) ){ + res = 0; + rc = sqlite3BtreePrevious(pC->uc.pCursor, 0); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + res = 1; + }else{ + goto abort_due_to_error; + } + } + }else{ + /* res might be negative because the table is empty. Check to + ** see if this is the case. + */ + res = sqlite3BtreeEof(pC->uc.pCursor); + } + } +seek_not_found: + assert( pOp->p2>0 ); + VdbeBranchTaken(res!=0,2); + if( res ){ + goto jump_to_p2; + }else if( eqOnly ){ + assert( pOp[1].opcode==OP_IdxLT || pOp[1].opcode==OP_IdxGT ); + pOp++; /* Skip the OP_IdxLt or OP_IdxGT that follows */ + } + break; +} + + +/* Opcode: SeekScan P1 P2 * * * +** Synopsis: Scan-ahead up to P1 rows +** +** This opcode is a prefix opcode to OP_SeekGE. In other words, this +** opcode must be immediately followed by OP_SeekGE. This constraint is +** checked by assert() statements. +** +** This opcode uses the P1 through P4 operands of the subsequent +** OP_SeekGE. In the text that follows, the operands of the subsequent +** OP_SeekGE opcode are denoted as SeekOP.P1 through SeekOP.P4. Only +** the P1 and P2 operands of this opcode are also used, and are called +** This.P1 and This.P2. +** +** This opcode helps to optimize IN operators on a multi-column index +** where the IN operator is on the later terms of the index by avoiding +** unnecessary seeks on the btree, substituting steps to the next row +** of the b-tree instead. A correct answer is obtained if this opcode +** is omitted or is a no-op. +** +** The SeekGE.P3 and SeekGE.P4 operands identify an unpacked key which +** is the desired entry that we want the cursor SeekGE.P1 to be pointing +** to. Call this SeekGE.P4/P5 row the "target". +** +** If the SeekGE.P1 cursor is not currently pointing to a valid row, +** then this opcode is a no-op and control passes through into the OP_SeekGE. +** +** If the SeekGE.P1 cursor is pointing to a valid row, then that row +** might be the target row, or it might be near and slightly before the +** target row. This opcode attempts to position the cursor on the target +** row by, perhaps by invoking sqlite3BtreeStep() on the cursor +** between 0 and This.P1 times. +** +** There are three possible outcomes from this opcode:
      +** +**
    1. If after This.P1 steps, the cursor is still pointing to a place that +** is earlier in the btree than the target row, then fall through +** into the subsquence OP_SeekGE opcode. +** +**
    2. If the cursor is successfully moved to the target row by 0 or more +** sqlite3BtreeNext() calls, then jump to This.P2, which will land just +** past the OP_IdxGT or OP_IdxGE opcode that follows the OP_SeekGE. +** +**
    3. If the cursor ends up past the target row (indicating the the target +** row does not exist in the btree) then jump to SeekOP.P2. +**
    +*/ +case OP_SeekScan: { + VdbeCursor *pC; + int res; + int nStep; + UnpackedRecord r; + + assert( pOp[1].opcode==OP_SeekGE ); + + /* pOp->p2 points to the first instruction past the OP_IdxGT that + ** follows the OP_SeekGE. */ + assert( pOp->p2>=(int)(pOp-aOp)+2 ); + assert( aOp[pOp->p2-1].opcode==OP_IdxGT || aOp[pOp->p2-1].opcode==OP_IdxGE ); + testcase( aOp[pOp->p2-1].opcode==OP_IdxGE ); + assert( pOp[1].p1==aOp[pOp->p2-1].p1 ); + assert( pOp[1].p2==aOp[pOp->p2-1].p2 ); + assert( pOp[1].p3==aOp[pOp->p2-1].p3 ); + + assert( pOp->p1>0 ); + pC = p->apCsr[pOp[1].p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( !pC->isTable ); + if( !sqlite3BtreeCursorIsValidNN(pC->uc.pCursor) ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... cursor not valid - fall through\n"); + } +#endif + break; + } + nStep = pOp->p1; + assert( nStep>=1 ); + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)pOp[1].p4.i; + r.default_rc = 0; + r.aMem = &aMem[pOp[1].p3]; +#ifdef SQLITE_DEBUG + { + int i; + for(i=0; i0 ){ + seekscan_search_fail: +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... %d steps and then skip\n", pOp->p1 - nStep); + } +#endif + VdbeBranchTaken(1,3); + pOp++; + goto jump_to_p2; + } + if( res==0 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... %d steps and then success\n", pOp->p1 - nStep); + } +#endif + VdbeBranchTaken(2,3); + goto jump_to_p2; + break; + } + if( nStep<=0 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... fall through after %d steps\n", pOp->p1); + } +#endif + VdbeBranchTaken(0,3); + break; + } + nStep--; + rc = sqlite3BtreeNext(pC->uc.pCursor, 0); + if( rc ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + goto seekscan_search_fail; + }else{ + goto abort_due_to_error; + } + } + } + + break; +} + + +/* Opcode: SeekHit P1 P2 P3 * * +** Synopsis: set P2<=seekHit<=P3 +** +** Increase or decrease the seekHit value for cursor P1, if necessary, +** so that it is no less than P2 and no greater than P3. +** +** The seekHit integer represents the maximum of terms in an index for which +** there is known to be at least one match. If the seekHit value is smaller +** than the total number of equality terms in an index lookup, then the +** OP_IfNoHope opcode might run to see if the IN loop can be abandoned +** early, thus saving work. This is part of the IN-early-out optimization. +** +** P1 must be a valid b-tree cursor. +*/ +case OP_SeekHit: { + VdbeCursor *pC; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pOp->p3>=pOp->p2 ); + if( pC->seekHitp2 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("seekHit changes from %d to %d\n", pC->seekHit, pOp->p2); + } +#endif + pC->seekHit = pOp->p2; + }else if( pC->seekHit>pOp->p3 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("seekHit changes from %d to %d\n", pC->seekHit, pOp->p3); + } +#endif + pC->seekHit = pOp->p3; + } + break; +} + +/* Opcode: IfNotOpen P1 P2 * * * +** Synopsis: if( !csr[P1] ) goto P2 +** +** If cursor P1 is not open, jump to instruction P2. Otherwise, fall through. +*/ +case OP_IfNotOpen: { /* jump */ + assert( pOp->p1>=0 && pOp->p1nCursor ); + VdbeBranchTaken(p->apCsr[pOp->p1]==0, 2); + if( !p->apCsr[pOp->p1] ){ + goto jump_to_p2_and_check_for_interrupt; + } + break; +} + +/* Opcode: Found P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If P4==0 then register P3 holds a blob constructed by MakeRecord. If +** P4>0 then register P3 is the first of P4 registers that form an unpacked +** record. +** +** Cursor P1 is on an index btree. If the record identified by P3 and P4 +** is a prefix of any entry in P1 then a jump is made to P2 and +** P1 is left pointing at the matching entry. +** +** This operation leaves the cursor in a state where it can be +** advanced in the forward direction. The Next instruction will work, +** but not the Prev instruction. +** +** See also: NotFound, NoConflict, NotExists. SeekGe +*/ +/* Opcode: NotFound P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If P4==0 then register P3 holds a blob constructed by MakeRecord. If +** P4>0 then register P3 is the first of P4 registers that form an unpacked +** record. +** +** Cursor P1 is on an index btree. If the record identified by P3 and P4 +** is not the prefix of any entry in P1 then a jump is made to P2. If P1 +** does contain an entry whose prefix matches the P3/P4 record then control +** falls through to the next instruction and P1 is left pointing at the +** matching entry. +** +** This operation leaves the cursor in a state where it cannot be +** advanced in either direction. In other words, the Next and Prev +** opcodes do not work after this operation. +** +** See also: Found, NotExists, NoConflict, IfNoHope +*/ +/* Opcode: IfNoHope P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** Register P3 is the first of P4 registers that form an unpacked +** record. Cursor P1 is an index btree. P2 is a jump destination. +** In other words, the operands to this opcode are the same as the +** operands to OP_NotFound and OP_IdxGT. +** +** This opcode is an optimization attempt only. If this opcode always +** falls through, the correct answer is still obtained, but extra works +** is performed. +** +** A value of N in the seekHit flag of cursor P1 means that there exists +** a key P3:N that will match some record in the index. We want to know +** if it is possible for a record P3:P4 to match some record in the +** index. If it is not possible, we can skips some work. So if seekHit +** is less than P4, attempt to find out if a match is possible by running +** OP_NotFound. +** +** This opcode is used in IN clause processing for a multi-column key. +** If an IN clause is attached to an element of the key other than the +** left-most element, and if there are no matches on the most recent +** seek over the whole key, then it might be that one of the key element +** to the left is prohibiting a match, and hence there is "no hope" of +** any match regardless of how many IN clause elements are checked. +** In such a case, we abandon the IN clause search early, using this +** opcode. The opcode name comes from the fact that the +** jump is taken if there is "no hope" of achieving a match. +** +** See also: NotFound, SeekHit +*/ +/* Opcode: NoConflict P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** If P4==0 then register P3 holds a blob constructed by MakeRecord. If +** P4>0 then register P3 is the first of P4 registers that form an unpacked +** record. +** +** Cursor P1 is on an index btree. If the record identified by P3 and P4 +** contains any NULL value, jump immediately to P2. If all terms of the +** record are not-NULL then a check is done to determine if any row in the +** P1 index btree has a matching key prefix. If there are no matches, jump +** immediately to P2. If there is a match, fall through and leave the P1 +** cursor pointing to the matching row. +** +** This opcode is similar to OP_NotFound with the exceptions that the +** branch is always taken if any part of the search key input is NULL. +** +** This operation leaves the cursor in a state where it cannot be +** advanced in either direction. In other words, the Next and Prev +** opcodes do not work after this operation. +** +** See also: NotFound, Found, NotExists +*/ +case OP_IfNoHope: { /* jump, in3 */ + VdbeCursor *pC; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("seekHit is %d\n", pC->seekHit); + } +#endif + if( pC->seekHit>=pOp->p4.i ) break; + /* Fall through into OP_NotFound */ + /* no break */ deliberate_fall_through +} +case OP_NoConflict: /* jump, in3 */ +case OP_NotFound: /* jump, in3 */ +case OP_Found: { /* jump, in3 */ + int alreadyExists; + int takeJump; + int ii; + VdbeCursor *pC; + int res; + UnpackedRecord *pFree; + UnpackedRecord *pIdxKey; + UnpackedRecord r; + +#ifdef SQLITE_TEST + if( pOp->opcode!=OP_NoConflict ) sqlite3_found_count++; +#endif + + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( pOp->p4type==P4_INT32 ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); +#ifdef SQLITE_DEBUG + pC->seekOp = pOp->opcode; +#endif + pIn3 = &aMem[pOp->p3]; + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->uc.pCursor!=0 ); + assert( pC->isTable==0 ); + if( pOp->p4.i>0 ){ + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)pOp->p4.i; + r.aMem = pIn3; +#ifdef SQLITE_DEBUG + for(ii=0; iip3+ii, &r.aMem[ii]); + } +#endif + pIdxKey = &r; + pFree = 0; + }else{ + assert( pIn3->flags & MEM_Blob ); + rc = ExpandBlob(pIn3); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + if( rc ) goto no_mem; + pFree = pIdxKey = sqlite3VdbeAllocUnpackedRecord(pC->pKeyInfo); + if( pIdxKey==0 ) goto no_mem; + sqlite3VdbeRecordUnpack(pC->pKeyInfo, pIn3->n, pIn3->z, pIdxKey); + } + pIdxKey->default_rc = 0; + takeJump = 0; + if( pOp->opcode==OP_NoConflict ){ + /* For the OP_NoConflict opcode, take the jump if any of the + ** input fields are NULL, since any key with a NULL will not + ** conflict */ + for(ii=0; iinField; ii++){ + if( pIdxKey->aMem[ii].flags & MEM_Null ){ + takeJump = 1; + break; + } + } + } + rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, pIdxKey, &res); + if( pFree ) sqlite3DbFreeNN(db, pFree); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } + pC->seekResult = res; + alreadyExists = (res==0); + pC->nullRow = 1-alreadyExists; + pC->deferredMoveto = 0; + pC->cacheStatus = CACHE_STALE; + if( pOp->opcode==OP_Found ){ + VdbeBranchTaken(alreadyExists!=0,2); + if( alreadyExists ) goto jump_to_p2; + }else{ + VdbeBranchTaken(takeJump||alreadyExists==0,2); + if( takeJump || !alreadyExists ) goto jump_to_p2; + if( pOp->opcode==OP_IfNoHope ) pC->seekHit = pOp->p4.i; + } + break; +} + +/* Opcode: SeekRowid P1 P2 P3 * * +** Synopsis: intkey=r[P3] +** +** P1 is the index of a cursor open on an SQL table btree (with integer +** keys). If register P3 does not contain an integer or if P1 does not +** contain a record with rowid P3 then jump immediately to P2. +** Or, if P2 is 0, raise an SQLITE_CORRUPT error. If P1 does contain +** a record with rowid P3 then +** leave the cursor pointing at that record and fall through to the next +** instruction. +** +** The OP_NotExists opcode performs the same operation, but with OP_NotExists +** the P3 register must be guaranteed to contain an integer value. With this +** opcode, register P3 might not contain an integer. +** +** The OP_NotFound opcode performs the same operation on index btrees +** (with arbitrary multi-value keys). +** +** This opcode leaves the cursor in a state where it cannot be advanced +** in either direction. In other words, the Next and Prev opcodes will +** not work following this opcode. +** +** See also: Found, NotFound, NoConflict, SeekRowid +*/ +/* Opcode: NotExists P1 P2 P3 * * +** Synopsis: intkey=r[P3] +** +** P1 is the index of a cursor open on an SQL table btree (with integer +** keys). P3 is an integer rowid. If P1 does not contain a record with +** rowid P3 then jump immediately to P2. Or, if P2 is 0, raise an +** SQLITE_CORRUPT error. If P1 does contain a record with rowid P3 then +** leave the cursor pointing at that record and fall through to the next +** instruction. +** +** The OP_SeekRowid opcode performs the same operation but also allows the +** P3 register to contain a non-integer value, in which case the jump is +** always taken. This opcode requires that P3 always contain an integer. +** +** The OP_NotFound opcode performs the same operation on index btrees +** (with arbitrary multi-value keys). +** +** This opcode leaves the cursor in a state where it cannot be advanced +** in either direction. In other words, the Next and Prev opcodes will +** not work following this opcode. +** +** See also: Found, NotFound, NoConflict, SeekRowid +*/ +case OP_SeekRowid: { /* jump, in3 */ + VdbeCursor *pC; + BtCursor *pCrsr; + int res; + u64 iKey; + + pIn3 = &aMem[pOp->p3]; + testcase( pIn3->flags & MEM_Int ); + testcase( pIn3->flags & MEM_IntReal ); + testcase( pIn3->flags & MEM_Real ); + testcase( (pIn3->flags & (MEM_Str|MEM_Int))==MEM_Str ); + if( (pIn3->flags & (MEM_Int|MEM_IntReal))==0 ){ + /* If pIn3->u.i does not contain an integer, compute iKey as the + ** integer value of pIn3. Jump to P2 if pIn3 cannot be converted + ** into an integer without loss of information. Take care to avoid + ** changing the datatype of pIn3, however, as it is used by other + ** parts of the prepared statement. */ + Mem x = pIn3[0]; + applyAffinity(&x, SQLITE_AFF_NUMERIC, encoding); + if( (x.flags & MEM_Int)==0 ) goto jump_to_p2; + iKey = x.u.i; + goto notExistsWithKey; + } + /* Fall through into OP_NotExists */ + /* no break */ deliberate_fall_through +case OP_NotExists: /* jump, in3 */ + pIn3 = &aMem[pOp->p3]; + assert( (pIn3->flags & MEM_Int)!=0 || pOp->opcode==OP_SeekRowid ); + assert( pOp->p1>=0 && pOp->p1nCursor ); + iKey = pIn3->u.i; +notExistsWithKey: + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); +#ifdef SQLITE_DEBUG + if( pOp->opcode==OP_SeekRowid ) pC->seekOp = OP_SeekRowid; +#endif + assert( pC->isTable ); + assert( pC->eCurType==CURTYPE_BTREE ); + pCrsr = pC->uc.pCursor; + assert( pCrsr!=0 ); + res = 0; + rc = sqlite3BtreeTableMoveto(pCrsr, iKey, 0, &res); + assert( rc==SQLITE_OK || res==0 ); + pC->movetoTarget = iKey; /* Used by OP_Delete */ + pC->nullRow = 0; + pC->cacheStatus = CACHE_STALE; + pC->deferredMoveto = 0; + VdbeBranchTaken(res!=0,2); + pC->seekResult = res; + if( res!=0 ){ + assert( rc==SQLITE_OK ); + if( pOp->p2==0 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + goto jump_to_p2; + } + } + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: Sequence P1 P2 * * * +** Synopsis: r[P2]=cursor[P1].ctr++ +** +** Find the next available sequence number for cursor P1. +** Write the sequence number into register P2. +** The sequence number on the cursor is incremented after this +** instruction. +*/ +case OP_Sequence: { /* out2 */ + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( p->apCsr[pOp->p1]!=0 ); + assert( p->apCsr[pOp->p1]->eCurType!=CURTYPE_VTAB ); + pOut = out2Prerelease(p, pOp); + pOut->u.i = p->apCsr[pOp->p1]->seqCount++; + break; +} + + +/* Opcode: NewRowid P1 P2 P3 * * +** Synopsis: r[P2]=rowid +** +** Get a new integer record number (a.k.a "rowid") used as the key to a table. +** The record number is not previously used as a key in the database +** table that cursor P1 points to. The new record number is written +** written to register P2. +** +** If P3>0 then P3 is a register in the root frame of this VDBE that holds +** the largest previously generated record number. No new record numbers are +** allowed to be less than this value. When this value reaches its maximum, +** an SQLITE_FULL error is generated. The P3 register is updated with the ' +** generated record number. This P3 mechanism is used to help implement the +** AUTOINCREMENT feature. +*/ +case OP_NewRowid: { /* out2 */ + i64 v; /* The new rowid */ + VdbeCursor *pC; /* Cursor of table to get the new rowid */ + int res; /* Result of an sqlite3BtreeLast() */ + int cnt; /* Counter to limit the number of searches */ +#ifndef SQLITE_OMIT_AUTOINCREMENT + Mem *pMem; /* Register holding largest rowid for AUTOINCREMENT */ + VdbeFrame *pFrame; /* Root frame of VDBE */ +#endif + + v = 0; + res = 0; + pOut = out2Prerelease(p, pOp); + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->isTable ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->uc.pCursor!=0 ); + { + /* The next rowid or record number (different terms for the same + ** thing) is obtained in a two-step algorithm. + ** + ** First we attempt to find the largest existing rowid and add one + ** to that. But if the largest existing rowid is already the maximum + ** positive integer, we have to fall through to the second + ** probabilistic algorithm + ** + ** The second algorithm is to select a rowid at random and see if + ** it already exists in the table. If it does not exist, we have + ** succeeded. If the random rowid does exist, we select a new one + ** and try again, up to 100 times. + */ + assert( pC->isTable ); + +#ifdef SQLITE_32BIT_ROWID +# define MAX_ROWID 0x7fffffff +#else + /* Some compilers complain about constants of the form 0x7fffffffffffffff. + ** Others complain about 0x7ffffffffffffffffLL. The following macro seems + ** to provide the constant while making all compilers happy. + */ +# define MAX_ROWID (i64)( (((u64)0x7fffffff)<<32) | (u64)0xffffffff ) +#endif + + if( !pC->useRandomRowid ){ + rc = sqlite3BtreeLast(pC->uc.pCursor, &res); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } + if( res ){ + v = 1; /* IMP: R-61914-48074 */ + }else{ + assert( sqlite3BtreeCursorIsValid(pC->uc.pCursor) ); + v = sqlite3BtreeIntegerKey(pC->uc.pCursor); + if( v>=MAX_ROWID ){ + pC->useRandomRowid = 1; + }else{ + v++; /* IMP: R-29538-34987 */ + } + } + } + +#ifndef SQLITE_OMIT_AUTOINCREMENT + if( pOp->p3 ){ + /* Assert that P3 is a valid memory cell. */ + assert( pOp->p3>0 ); + if( p->pFrame ){ + for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent); + /* Assert that P3 is a valid memory cell. */ + assert( pOp->p3<=pFrame->nMem ); + pMem = &pFrame->aMem[pOp->p3]; + }else{ + /* Assert that P3 is a valid memory cell. */ + assert( pOp->p3<=(p->nMem+1 - p->nCursor) ); + pMem = &aMem[pOp->p3]; + memAboutToChange(p, pMem); + } + assert( memIsValid(pMem) ); + + REGISTER_TRACE(pOp->p3, pMem); + sqlite3VdbeMemIntegerify(pMem); + assert( (pMem->flags & MEM_Int)!=0 ); /* mem(P3) holds an integer */ + if( pMem->u.i==MAX_ROWID || pC->useRandomRowid ){ + rc = SQLITE_FULL; /* IMP: R-17817-00630 */ + goto abort_due_to_error; + } + if( vu.i+1 ){ + v = pMem->u.i + 1; + } + pMem->u.i = v; + } +#endif + if( pC->useRandomRowid ){ + /* IMPLEMENTATION-OF: R-07677-41881 If the largest ROWID is equal to the + ** largest possible integer (9223372036854775807) then the database + ** engine starts picking positive candidate ROWIDs at random until + ** it finds one that is not previously used. */ + assert( pOp->p3==0 ); /* We cannot be in random rowid mode if this is + ** an AUTOINCREMENT table. */ + cnt = 0; + do{ + sqlite3_randomness(sizeof(v), &v); + v &= (MAX_ROWID>>1); v++; /* Ensure that v is greater than zero */ + }while( ((rc = sqlite3BtreeTableMoveto(pC->uc.pCursor, (u64)v, + 0, &res))==SQLITE_OK) + && (res==0) + && (++cnt<100)); + if( rc ) goto abort_due_to_error; + if( res==0 ){ + rc = SQLITE_FULL; /* IMP: R-38219-53002 */ + goto abort_due_to_error; + } + assert( v>0 ); /* EV: R-40812-03570 */ + } + pC->deferredMoveto = 0; + pC->cacheStatus = CACHE_STALE; + } + pOut->u.i = v; + break; +} + +/* Opcode: Insert P1 P2 P3 P4 P5 +** Synopsis: intkey=r[P3] data=r[P2] +** +** Write an entry into the table of cursor P1. A new entry is +** created if it doesn't already exist or the data for an existing +** entry is overwritten. The data is the value MEM_Blob stored in register +** number P2. The key is stored in register P3. The key must +** be a MEM_Int. +** +** If the OPFLAG_NCHANGE flag of P5 is set, then the row change count is +** incremented (otherwise not). If the OPFLAG_LASTROWID flag of P5 is set, +** then rowid is stored for subsequent return by the +** sqlite3_last_insert_rowid() function (otherwise it is unmodified). +** +** If the OPFLAG_USESEEKRESULT flag of P5 is set, the implementation might +** run faster by avoiding an unnecessary seek on cursor P1. However, +** the OPFLAG_USESEEKRESULT flag must only be set if there have been no prior +** seeks on the cursor or if the most recent seek used a key equal to P3. +** +** If the OPFLAG_ISUPDATE flag is set, then this opcode is part of an +** UPDATE operation. Otherwise (if the flag is clear) then this opcode +** is part of an INSERT operation. The difference is only important to +** the update hook. +** +** Parameter P4 may point to a Table structure, or may be NULL. If it is +** not NULL, then the update-hook (sqlite3.xUpdateCallback) is invoked +** following a successful insert. +** +** (WARNING/TODO: If P1 is a pseudo-cursor and P2 is dynamically +** allocated, then ownership of P2 is transferred to the pseudo-cursor +** and register P2 becomes ephemeral. If the cursor is changed, the +** value of register P2 will then change. Make sure this does not +** cause any problems.) +** +** This instruction only works on tables. The equivalent instruction +** for indices is OP_IdxInsert. +*/ +case OP_Insert: { + Mem *pData; /* MEM cell holding data for the record to be inserted */ + Mem *pKey; /* MEM cell holding key for the record */ + VdbeCursor *pC; /* Cursor to table into which insert is written */ + int seekResult; /* Result of prior seek or 0 if no USESEEKRESULT flag */ + const char *zDb; /* database name - used by the update hook */ + Table *pTab; /* Table structure - used by update and pre-update hooks */ + BtreePayload x; /* Payload to be inserted */ + + pData = &aMem[pOp->p2]; + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( memIsValid(pData) ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->deferredMoveto==0 ); + assert( pC->uc.pCursor!=0 ); + assert( (pOp->p5 & OPFLAG_ISNOOP) || pC->isTable ); + assert( pOp->p4type==P4_TABLE || pOp->p4type>=P4_STATIC ); + REGISTER_TRACE(pOp->p2, pData); + sqlite3VdbeIncrWriteCounter(p, pC); + + pKey = &aMem[pOp->p3]; + assert( pKey->flags & MEM_Int ); + assert( memIsValid(pKey) ); + REGISTER_TRACE(pOp->p3, pKey); + x.nKey = pKey->u.i; + + if( pOp->p4type==P4_TABLE && HAS_UPDATE_HOOK(db) ){ + assert( pC->iDb>=0 ); + zDb = db->aDb[pC->iDb].zDbSName; + pTab = pOp->p4.pTab; + assert( (pOp->p5 & OPFLAG_ISNOOP) || HasRowid(pTab) ); + }else{ + pTab = 0; + zDb = 0; + } + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + /* Invoke the pre-update hook, if any */ + if( pTab ){ + if( db->xPreUpdateCallback && !(pOp->p5 & OPFLAG_ISUPDATE) ){ + sqlite3VdbePreUpdateHook(p,pC,SQLITE_INSERT,zDb,pTab,x.nKey,pOp->p2,-1); + } + if( db->xUpdateCallback==0 || pTab->aCol==0 ){ + /* Prevent post-update hook from running in cases when it should not */ + pTab = 0; + } + } + if( pOp->p5 & OPFLAG_ISNOOP ) break; +#endif + + if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++; + if( pOp->p5 & OPFLAG_LASTROWID ) db->lastRowid = x.nKey; + assert( (pData->flags & (MEM_Blob|MEM_Str))!=0 || pData->n==0 ); + x.pData = pData->z; + x.nData = pData->n; + seekResult = ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0); + if( pData->flags & MEM_Zero ){ + x.nZero = pData->u.nZero; + }else{ + x.nZero = 0; + } + x.pKey = 0; + rc = sqlite3BtreeInsert(pC->uc.pCursor, &x, + (pOp->p5 & (OPFLAG_APPEND|OPFLAG_SAVEPOSITION|OPFLAG_PREFORMAT)), + seekResult + ); + pC->deferredMoveto = 0; + pC->cacheStatus = CACHE_STALE; + + /* Invoke the update-hook if required. */ + if( rc ) goto abort_due_to_error; + if( pTab ){ + assert( db->xUpdateCallback!=0 ); + assert( pTab->aCol!=0 ); + db->xUpdateCallback(db->pUpdateArg, + (pOp->p5 & OPFLAG_ISUPDATE) ? SQLITE_UPDATE : SQLITE_INSERT, + zDb, pTab->zName, x.nKey); + } + break; +} + +/* Opcode: RowCell P1 P2 P3 * * +** +** P1 and P2 are both open cursors. Both must be opened on the same type +** of table - intkey or index. This opcode is used as part of copying +** the current row from P2 into P1. If the cursors are opened on intkey +** tables, register P3 contains the rowid to use with the new record in +** P1. If they are opened on index tables, P3 is not used. +** +** This opcode must be followed by either an Insert or InsertIdx opcode +** with the OPFLAG_PREFORMAT flag set to complete the insert operation. +*/ +case OP_RowCell: { + VdbeCursor *pDest; /* Cursor to write to */ + VdbeCursor *pSrc; /* Cursor to read from */ + i64 iKey; /* Rowid value to insert with */ + assert( pOp[1].opcode==OP_Insert || pOp[1].opcode==OP_IdxInsert ); + assert( pOp[1].opcode==OP_Insert || pOp->p3==0 ); + assert( pOp[1].opcode==OP_IdxInsert || pOp->p3>0 ); + assert( pOp[1].p5 & OPFLAG_PREFORMAT ); + pDest = p->apCsr[pOp->p1]; + pSrc = p->apCsr[pOp->p2]; + iKey = pOp->p3 ? aMem[pOp->p3].u.i : 0; + rc = sqlite3BtreeTransferRow(pDest->uc.pCursor, pSrc->uc.pCursor, iKey); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + break; +}; + +/* Opcode: Delete P1 P2 P3 P4 P5 +** +** Delete the record at which the P1 cursor is currently pointing. +** +** If the OPFLAG_SAVEPOSITION bit of the P5 parameter is set, then +** the cursor will be left pointing at either the next or the previous +** record in the table. If it is left pointing at the next record, then +** the next Next instruction will be a no-op. As a result, in this case +** it is ok to delete a record from within a Next loop. If +** OPFLAG_SAVEPOSITION bit of P5 is clear, then the cursor will be +** left in an undefined state. +** +** If the OPFLAG_AUXDELETE bit is set on P5, that indicates that this +** delete one of several associated with deleting a table row and all its +** associated index entries. Exactly one of those deletes is the "primary" +** delete. The others are all on OPFLAG_FORDELETE cursors or else are +** marked with the AUXDELETE flag. +** +** If the OPFLAG_NCHANGE flag of P2 (NB: P2 not P5) is set, then the row +** change count is incremented (otherwise not). +** +** P1 must not be pseudo-table. It has to be a real table with +** multiple rows. +** +** If P4 is not NULL then it points to a Table object. In this case either +** the update or pre-update hook, or both, may be invoked. The P1 cursor must +** have been positioned using OP_NotFound prior to invoking this opcode in +** this case. Specifically, if one is configured, the pre-update hook is +** invoked if P4 is not NULL. The update-hook is invoked if one is configured, +** P4 is not NULL, and the OPFLAG_NCHANGE flag is set in P2. +** +** If the OPFLAG_ISUPDATE flag is set in P2, then P3 contains the address +** of the memory cell that contains the value that the rowid of the row will +** be set to by the update. +*/ +case OP_Delete: { + VdbeCursor *pC; + const char *zDb; + Table *pTab; + int opflags; + + opflags = pOp->p2; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->uc.pCursor!=0 ); + assert( pC->deferredMoveto==0 ); + sqlite3VdbeIncrWriteCounter(p, pC); + +#ifdef SQLITE_DEBUG + if( pOp->p4type==P4_TABLE + && HasRowid(pOp->p4.pTab) + && pOp->p5==0 + && sqlite3BtreeCursorIsValidNN(pC->uc.pCursor) + ){ + /* If p5 is zero, the seek operation that positioned the cursor prior to + ** OP_Delete will have also set the pC->movetoTarget field to the rowid of + ** the row that is being deleted */ + i64 iKey = sqlite3BtreeIntegerKey(pC->uc.pCursor); + assert( CORRUPT_DB || pC->movetoTarget==iKey ); + } +#endif + + /* If the update-hook or pre-update-hook will be invoked, set zDb to + ** the name of the db to pass as to it. Also set local pTab to a copy + ** of p4.pTab. Finally, if p5 is true, indicating that this cursor was + ** last moved with OP_Next or OP_Prev, not Seek or NotFound, set + ** VdbeCursor.movetoTarget to the current rowid. */ + if( pOp->p4type==P4_TABLE && HAS_UPDATE_HOOK(db) ){ + assert( pC->iDb>=0 ); + assert( pOp->p4.pTab!=0 ); + zDb = db->aDb[pC->iDb].zDbSName; + pTab = pOp->p4.pTab; + if( (pOp->p5 & OPFLAG_SAVEPOSITION)!=0 && pC->isTable ){ + pC->movetoTarget = sqlite3BtreeIntegerKey(pC->uc.pCursor); + } + }else{ + zDb = 0; + pTab = 0; + } + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + /* Invoke the pre-update-hook if required. */ + assert( db->xPreUpdateCallback==0 || pTab==pOp->p4.pTab ); + if( db->xPreUpdateCallback && pTab ){ + assert( !(opflags & OPFLAG_ISUPDATE) + || HasRowid(pTab)==0 + || (aMem[pOp->p3].flags & MEM_Int) + ); + sqlite3VdbePreUpdateHook(p, pC, + (opflags & OPFLAG_ISUPDATE) ? SQLITE_UPDATE : SQLITE_DELETE, + zDb, pTab, pC->movetoTarget, + pOp->p3, -1 + ); + } + if( opflags & OPFLAG_ISNOOP ) break; +#endif + + /* Only flags that can be set are SAVEPOISTION and AUXDELETE */ + assert( (pOp->p5 & ~(OPFLAG_SAVEPOSITION|OPFLAG_AUXDELETE))==0 ); + assert( OPFLAG_SAVEPOSITION==BTREE_SAVEPOSITION ); + assert( OPFLAG_AUXDELETE==BTREE_AUXDELETE ); + +#ifdef SQLITE_DEBUG + if( p->pFrame==0 ){ + if( pC->isEphemeral==0 + && (pOp->p5 & OPFLAG_AUXDELETE)==0 + && (pC->wrFlag & OPFLAG_FORDELETE)==0 + ){ + nExtraDelete++; + } + if( pOp->p2 & OPFLAG_NCHANGE ){ + nExtraDelete--; + } + } +#endif + + rc = sqlite3BtreeDelete(pC->uc.pCursor, pOp->p5); + pC->cacheStatus = CACHE_STALE; + pC->seekResult = 0; + if( rc ) goto abort_due_to_error; + + /* Invoke the update-hook if required. */ + if( opflags & OPFLAG_NCHANGE ){ + p->nChange++; + if( db->xUpdateCallback && ALWAYS(pTab!=0) && HasRowid(pTab) ){ + db->xUpdateCallback(db->pUpdateArg, SQLITE_DELETE, zDb, pTab->zName, + pC->movetoTarget); + assert( pC->iDb>=0 ); + } + } + + break; +} +/* Opcode: ResetCount * * * * * +** +** The value of the change counter is copied to the database handle +** change counter (returned by subsequent calls to sqlite3_changes()). +** Then the VMs internal change counter resets to 0. +** This is used by trigger programs. +*/ +case OP_ResetCount: { + sqlite3VdbeSetChanges(db, p->nChange); + p->nChange = 0; + break; +} + +/* Opcode: SorterCompare P1 P2 P3 P4 +** Synopsis: if key(P1)!=trim(r[P3],P4) goto P2 +** +** P1 is a sorter cursor. This instruction compares a prefix of the +** record blob in register P3 against a prefix of the entry that +** the sorter cursor currently points to. Only the first P4 fields +** of r[P3] and the sorter record are compared. +** +** If either P3 or the sorter contains a NULL in one of their significant +** fields (not counting the P4 fields at the end which are ignored) then +** the comparison is assumed to be equal. +** +** Fall through to next instruction if the two records compare equal to +** each other. Jump to P2 if they are different. +*/ +case OP_SorterCompare: { + VdbeCursor *pC; + int res; + int nKeyCol; + + pC = p->apCsr[pOp->p1]; + assert( isSorter(pC) ); + assert( pOp->p4type==P4_INT32 ); + pIn3 = &aMem[pOp->p3]; + nKeyCol = pOp->p4.i; + res = 0; + rc = sqlite3VdbeSorterCompare(pC, pIn3, nKeyCol, &res); + VdbeBranchTaken(res!=0,2); + if( rc ) goto abort_due_to_error; + if( res ) goto jump_to_p2; + break; +}; + +/* Opcode: SorterData P1 P2 P3 * * +** Synopsis: r[P2]=data +** +** Write into register P2 the current sorter data for sorter cursor P1. +** Then clear the column header cache on cursor P3. +** +** This opcode is normally use to move a record out of the sorter and into +** a register that is the source for a pseudo-table cursor created using +** OpenPseudo. That pseudo-table cursor is the one that is identified by +** parameter P3. Clearing the P3 column cache as part of this opcode saves +** us from having to issue a separate NullRow instruction to clear that cache. +*/ +case OP_SorterData: { + VdbeCursor *pC; + + pOut = &aMem[pOp->p2]; + pC = p->apCsr[pOp->p1]; + assert( isSorter(pC) ); + rc = sqlite3VdbeSorterRowkey(pC, pOut); + assert( rc!=SQLITE_OK || (pOut->flags & MEM_Blob) ); + assert( pOp->p1>=0 && pOp->p1nCursor ); + if( rc ) goto abort_due_to_error; + p->apCsr[pOp->p3]->cacheStatus = CACHE_STALE; + break; +} + +/* Opcode: RowData P1 P2 P3 * * +** Synopsis: r[P2]=data +** +** Write into register P2 the complete row content for the row at +** which cursor P1 is currently pointing. +** There is no interpretation of the data. +** It is just copied onto the P2 register exactly as +** it is found in the database file. +** +** If cursor P1 is an index, then the content is the key of the row. +** If cursor P2 is a table, then the content extracted is the data. +** +** If the P1 cursor must be pointing to a valid row (not a NULL row) +** of a real table, not a pseudo-table. +** +** If P3!=0 then this opcode is allowed to make an ephemeral pointer +** into the database page. That means that the content of the output +** register will be invalidated as soon as the cursor moves - including +** moves caused by other cursors that "save" the current cursors +** position in order that they can write to the same table. If P3==0 +** then a copy of the data is made into memory. P3!=0 is faster, but +** P3==0 is safer. +** +** If P3!=0 then the content of the P2 register is unsuitable for use +** in OP_Result and any OP_Result will invalidate the P2 register content. +** The P2 register content is invalidated by opcodes like OP_Function or +** by any use of another cursor pointing to the same table. +*/ +case OP_RowData: { + VdbeCursor *pC; + BtCursor *pCrsr; + u32 n; + + pOut = out2Prerelease(p, pOp); + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( isSorter(pC)==0 ); + assert( pC->nullRow==0 ); + assert( pC->uc.pCursor!=0 ); + pCrsr = pC->uc.pCursor; + + /* The OP_RowData opcodes always follow OP_NotExists or + ** OP_SeekRowid or OP_Rewind/Op_Next with no intervening instructions + ** that might invalidate the cursor. + ** If this where not the case, on of the following assert()s + ** would fail. Should this ever change (because of changes in the code + ** generator) then the fix would be to insert a call to + ** sqlite3VdbeCursorMoveto(). + */ + assert( pC->deferredMoveto==0 ); + assert( sqlite3BtreeCursorIsValid(pCrsr) ); + + n = sqlite3BtreePayloadSize(pCrsr); + if( n>(u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){ + goto too_big; + } + testcase( n==0 ); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pCrsr, n, pOut); + if( rc ) goto abort_due_to_error; + if( !pOp->p3 ) Deephemeralize(pOut); + UPDATE_MAX_BLOBSIZE(pOut); + REGISTER_TRACE(pOp->p2, pOut); + break; +} + +/* Opcode: Rowid P1 P2 * * * +** Synopsis: r[P2]=rowid +** +** Store in register P2 an integer which is the key of the table entry that +** P1 is currently point to. +** +** P1 can be either an ordinary table or a virtual table. There used to +** be a separate OP_VRowid opcode for use with virtual tables, but this +** one opcode now works for both table types. +*/ +case OP_Rowid: { /* out2 */ + VdbeCursor *pC; + i64 v; + sqlite3_vtab *pVtab; + const sqlite3_module *pModule; + + pOut = out2Prerelease(p, pOp); + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType!=CURTYPE_PSEUDO || pC->nullRow ); + if( pC->nullRow ){ + pOut->flags = MEM_Null; + break; + }else if( pC->deferredMoveto ){ + v = pC->movetoTarget; +#ifndef SQLITE_OMIT_VIRTUALTABLE + }else if( pC->eCurType==CURTYPE_VTAB ){ + assert( pC->uc.pVCur!=0 ); + pVtab = pC->uc.pVCur->pVtab; + pModule = pVtab->pModule; + assert( pModule->xRowid ); + rc = pModule->xRowid(pC->uc.pVCur, &v); + sqlite3VtabImportErrmsg(p, pVtab); + if( rc ) goto abort_due_to_error; +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + }else{ + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->uc.pCursor!=0 ); + rc = sqlite3VdbeCursorRestore(pC); + if( rc ) goto abort_due_to_error; + if( pC->nullRow ){ + pOut->flags = MEM_Null; + break; + } + v = sqlite3BtreeIntegerKey(pC->uc.pCursor); + } + pOut->u.i = v; + break; +} + +/* Opcode: NullRow P1 * * * * +** +** Move the cursor P1 to a null row. Any OP_Column operations +** that occur while the cursor is on the null row will always +** write a NULL. +** +** Or, if P1 is a Pseudo-Cursor (a cursor opened using OP_OpenPseudo) +** just reset the cache for that cursor. This causes the row of +** content held by the pseudo-cursor to be reparsed. +*/ +case OP_NullRow: { + VdbeCursor *pC; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + pC->nullRow = 1; + pC->cacheStatus = CACHE_STALE; + if( pC->eCurType==CURTYPE_BTREE ){ + assert( pC->uc.pCursor!=0 ); + sqlite3BtreeClearCursor(pC->uc.pCursor); + } +#ifdef SQLITE_DEBUG + if( pC->seekOp==0 ) pC->seekOp = OP_NullRow; +#endif + break; +} + +/* Opcode: SeekEnd P1 * * * * +** +** Position cursor P1 at the end of the btree for the purpose of +** appending a new entry onto the btree. +** +** It is assumed that the cursor is used only for appending and so +** if the cursor is valid, then the cursor must already be pointing +** at the end of the btree and so no changes are made to +** the cursor. +*/ +/* Opcode: Last P1 P2 * * * +** +** The next use of the Rowid or Column or Prev instruction for P1 +** will refer to the last entry in the database table or index. +** If the table or index is empty and P2>0, then jump immediately to P2. +** If P2 is 0 or if the table or index is not empty, fall through +** to the following instruction. +** +** This opcode leaves the cursor configured to move in reverse order, +** from the end toward the beginning. In other words, the cursor is +** configured to use Prev, not Next. +*/ +case OP_SeekEnd: +case OP_Last: { /* jump */ + VdbeCursor *pC; + BtCursor *pCrsr; + int res; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + pCrsr = pC->uc.pCursor; + res = 0; + assert( pCrsr!=0 ); +#ifdef SQLITE_DEBUG + pC->seekOp = pOp->opcode; +#endif + if( pOp->opcode==OP_SeekEnd ){ + assert( pOp->p2==0 ); + pC->seekResult = -1; + if( sqlite3BtreeCursorIsValidNN(pCrsr) ){ + break; + } + } + rc = sqlite3BtreeLast(pCrsr, &res); + pC->nullRow = (u8)res; + pC->deferredMoveto = 0; + pC->cacheStatus = CACHE_STALE; + if( rc ) goto abort_due_to_error; + if( pOp->p2>0 ){ + VdbeBranchTaken(res!=0,2); + if( res ) goto jump_to_p2; + } + break; +} + +/* Opcode: IfSmaller P1 P2 P3 * * +** +** Estimate the number of rows in the table P1. Jump to P2 if that +** estimate is less than approximately 2**(0.1*P3). +*/ +case OP_IfSmaller: { /* jump */ + VdbeCursor *pC; + BtCursor *pCrsr; + int res; + i64 sz; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + pCrsr = pC->uc.pCursor; + assert( pCrsr ); + rc = sqlite3BtreeFirst(pCrsr, &res); + if( rc ) goto abort_due_to_error; + if( res==0 ){ + sz = sqlite3BtreeRowCountEst(pCrsr); + if( ALWAYS(sz>=0) && sqlite3LogEst((u64)sz)p3 ) res = 1; + } + VdbeBranchTaken(res!=0,2); + if( res ) goto jump_to_p2; + break; +} + + +/* Opcode: SorterSort P1 P2 * * * +** +** After all records have been inserted into the Sorter object +** identified by P1, invoke this opcode to actually do the sorting. +** Jump to P2 if there are no records to be sorted. +** +** This opcode is an alias for OP_Sort and OP_Rewind that is used +** for Sorter objects. +*/ +/* Opcode: Sort P1 P2 * * * +** +** This opcode does exactly the same thing as OP_Rewind except that +** it increments an undocumented global variable used for testing. +** +** Sorting is accomplished by writing records into a sorting index, +** then rewinding that index and playing it back from beginning to +** end. We use the OP_Sort opcode instead of OP_Rewind to do the +** rewinding so that the global variable will be incremented and +** regression tests can determine whether or not the optimizer is +** correctly optimizing out sorts. +*/ +case OP_SorterSort: /* jump */ +case OP_Sort: { /* jump */ +#ifdef SQLITE_TEST + sqlite3_sort_count++; + sqlite3_search_count--; +#endif + p->aCounter[SQLITE_STMTSTATUS_SORT]++; + /* Fall through into OP_Rewind */ + /* no break */ deliberate_fall_through +} +/* Opcode: Rewind P1 P2 * * * +** +** The next use of the Rowid or Column or Next instruction for P1 +** will refer to the first entry in the database table or index. +** If the table or index is empty, jump immediately to P2. +** If the table or index is not empty, fall through to the following +** instruction. +** +** This opcode leaves the cursor configured to move in forward order, +** from the beginning toward the end. In other words, the cursor is +** configured to use Next, not Prev. +*/ +case OP_Rewind: { /* jump */ + VdbeCursor *pC; + BtCursor *pCrsr; + int res; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( pOp->p5==0 ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( isSorter(pC)==(pOp->opcode==OP_SorterSort) ); + res = 1; +#ifdef SQLITE_DEBUG + pC->seekOp = OP_Rewind; +#endif + if( isSorter(pC) ){ + rc = sqlite3VdbeSorterRewind(pC, &res); + }else{ + assert( pC->eCurType==CURTYPE_BTREE ); + pCrsr = pC->uc.pCursor; + assert( pCrsr ); + rc = sqlite3BtreeFirst(pCrsr, &res); + pC->deferredMoveto = 0; + pC->cacheStatus = CACHE_STALE; + } + if( rc ) goto abort_due_to_error; + pC->nullRow = (u8)res; + assert( pOp->p2>0 && pOp->p2nOp ); + VdbeBranchTaken(res!=0,2); + if( res ) goto jump_to_p2; + break; +} + +/* Opcode: Next P1 P2 P3 P4 P5 +** +** Advance cursor P1 so that it points to the next key/data pair in its +** table or index. If there are no more key/value pairs then fall through +** to the following instruction. But if the cursor advance was successful, +** jump immediately to P2. +** +** The Next opcode is only valid following an SeekGT, SeekGE, or +** OP_Rewind opcode used to position the cursor. Next is not allowed +** to follow SeekLT, SeekLE, or OP_Last. +** +** The P1 cursor must be for a real table, not a pseudo-table. P1 must have +** been opened prior to this opcode or the program will segfault. +** +** The P3 value is a hint to the btree implementation. If P3==1, that +** means P1 is an SQL index and that this instruction could have been +** omitted if that index had been unique. P3 is usually 0. P3 is +** always either 0 or 1. +** +** P4 is always of type P4_ADVANCE. The function pointer points to +** sqlite3BtreeNext(). +** +** If P5 is positive and the jump is taken, then event counter +** number P5-1 in the prepared statement is incremented. +** +** See also: Prev +*/ +/* Opcode: Prev P1 P2 P3 P4 P5 +** +** Back up cursor P1 so that it points to the previous key/data pair in its +** table or index. If there is no previous key/value pairs then fall through +** to the following instruction. But if the cursor backup was successful, +** jump immediately to P2. +** +** +** The Prev opcode is only valid following an SeekLT, SeekLE, or +** OP_Last opcode used to position the cursor. Prev is not allowed +** to follow SeekGT, SeekGE, or OP_Rewind. +** +** The P1 cursor must be for a real table, not a pseudo-table. If P1 is +** not open then the behavior is undefined. +** +** The P3 value is a hint to the btree implementation. If P3==1, that +** means P1 is an SQL index and that this instruction could have been +** omitted if that index had been unique. P3 is usually 0. P3 is +** always either 0 or 1. +** +** P4 is always of type P4_ADVANCE. The function pointer points to +** sqlite3BtreePrevious(). +** +** If P5 is positive and the jump is taken, then event counter +** number P5-1 in the prepared statement is incremented. +*/ +/* Opcode: SorterNext P1 P2 * * P5 +** +** This opcode works just like OP_Next except that P1 must be a +** sorter object for which the OP_SorterSort opcode has been +** invoked. This opcode advances the cursor to the next sorted +** record, or jumps to P2 if there are no more sorted records. +*/ +case OP_SorterNext: { /* jump */ + VdbeCursor *pC; + + pC = p->apCsr[pOp->p1]; + assert( isSorter(pC) ); + rc = sqlite3VdbeSorterNext(db, pC); + goto next_tail; +case OP_Prev: /* jump */ +case OP_Next: /* jump */ + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( pOp->p5aCounter) ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->deferredMoveto==0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext ); + assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious ); + + /* The Next opcode is only used after SeekGT, SeekGE, Rewind, and Found. + ** The Prev opcode is only used after SeekLT, SeekLE, and Last. */ + assert( pOp->opcode!=OP_Next + || pC->seekOp==OP_SeekGT || pC->seekOp==OP_SeekGE + || pC->seekOp==OP_Rewind || pC->seekOp==OP_Found + || pC->seekOp==OP_NullRow|| pC->seekOp==OP_SeekRowid + || pC->seekOp==OP_IfNoHope); + assert( pOp->opcode!=OP_Prev + || pC->seekOp==OP_SeekLT || pC->seekOp==OP_SeekLE + || pC->seekOp==OP_Last || pC->seekOp==OP_IfNoHope + || pC->seekOp==OP_NullRow); + + rc = pOp->p4.xAdvance(pC->uc.pCursor, pOp->p3); +next_tail: + pC->cacheStatus = CACHE_STALE; + VdbeBranchTaken(rc==SQLITE_OK,2); + if( rc==SQLITE_OK ){ + pC->nullRow = 0; + p->aCounter[pOp->p5]++; +#ifdef SQLITE_TEST + sqlite3_search_count++; +#endif + goto jump_to_p2_and_check_for_interrupt; + } + if( rc!=SQLITE_DONE ) goto abort_due_to_error; + rc = SQLITE_OK; + pC->nullRow = 1; + goto check_for_interrupt; +} + +/* Opcode: IdxInsert P1 P2 P3 P4 P5 +** Synopsis: key=r[P2] +** +** Register P2 holds an SQL index key made using the +** MakeRecord instructions. This opcode writes that key +** into the index P1. Data for the entry is nil. +** +** If P4 is not zero, then it is the number of values in the unpacked +** key of reg(P2). In that case, P3 is the index of the first register +** for the unpacked key. The availability of the unpacked key can sometimes +** be an optimization. +** +** If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer +** that this insert is likely to be an append. +** +** If P5 has the OPFLAG_NCHANGE bit set, then the change counter is +** incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, +** then the change counter is unchanged. +** +** If the OPFLAG_USESEEKRESULT flag of P5 is set, the implementation might +** run faster by avoiding an unnecessary seek on cursor P1. However, +** the OPFLAG_USESEEKRESULT flag must only be set if there have been no prior +** seeks on the cursor or if the most recent seek used a key equivalent +** to P2. +** +** This instruction only works for indices. The equivalent instruction +** for tables is OP_Insert. +*/ +case OP_IdxInsert: { /* in2 */ + VdbeCursor *pC; + BtreePayload x; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + sqlite3VdbeIncrWriteCounter(p, pC); + assert( pC!=0 ); + assert( !isSorter(pC) ); + pIn2 = &aMem[pOp->p2]; + assert( (pIn2->flags & MEM_Blob) || (pOp->p5 & OPFLAG_PREFORMAT) ); + if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++; + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->isTable==0 ); + rc = ExpandBlob(pIn2); + if( rc ) goto abort_due_to_error; + x.nKey = pIn2->n; + x.pKey = pIn2->z; + x.aMem = aMem + pOp->p3; + x.nMem = (u16)pOp->p4.i; + rc = sqlite3BtreeInsert(pC->uc.pCursor, &x, + (pOp->p5 & (OPFLAG_APPEND|OPFLAG_SAVEPOSITION|OPFLAG_PREFORMAT)), + ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) + ); + assert( pC->deferredMoveto==0 ); + pC->cacheStatus = CACHE_STALE; + if( rc) goto abort_due_to_error; + break; +} + +/* Opcode: SorterInsert P1 P2 * * * +** Synopsis: key=r[P2] +** +** Register P2 holds an SQL index key made using the +** MakeRecord instructions. This opcode writes that key +** into the sorter P1. Data for the entry is nil. +*/ +case OP_SorterInsert: { /* in2 */ + VdbeCursor *pC; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + sqlite3VdbeIncrWriteCounter(p, pC); + assert( pC!=0 ); + assert( isSorter(pC) ); + pIn2 = &aMem[pOp->p2]; + assert( pIn2->flags & MEM_Blob ); + assert( pC->isTable==0 ); + rc = ExpandBlob(pIn2); + if( rc ) goto abort_due_to_error; + rc = sqlite3VdbeSorterWrite(pC, pIn2); + if( rc) goto abort_due_to_error; + break; +} + +/* Opcode: IdxDelete P1 P2 P3 * P5 +** Synopsis: key=r[P2@P3] +** +** The content of P3 registers starting at register P2 form +** an unpacked index key. This opcode removes that entry from the +** index opened by cursor P1. +** +** If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error +** if no matching index entry is found. This happens when running +** an UPDATE or DELETE statement and the index entry to be updated +** or deleted is not found. For some uses of IdxDelete +** (example: the EXCEPT operator) it does not matter that no matching +** entry is found. For those cases, P5 is zero. Also, do not raise +** this (self-correcting and non-critical) error if in writable_schema mode. +*/ +case OP_IdxDelete: { + VdbeCursor *pC; + BtCursor *pCrsr; + int res; + UnpackedRecord r; + + assert( pOp->p3>0 ); + assert( pOp->p2>0 && pOp->p2+pOp->p3<=(p->nMem+1 - p->nCursor)+1 ); + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + sqlite3VdbeIncrWriteCounter(p, pC); + pCrsr = pC->uc.pCursor; + assert( pCrsr!=0 ); + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)pOp->p3; + r.default_rc = 0; + r.aMem = &aMem[pOp->p2]; + rc = sqlite3BtreeIndexMoveto(pCrsr, &r, &res); + if( rc ) goto abort_due_to_error; + if( res==0 ){ + rc = sqlite3BtreeDelete(pCrsr, BTREE_AUXDELETE); + if( rc ) goto abort_due_to_error; + }else if( pOp->p5 && !sqlite3WritableSchema(db) ){ + rc = sqlite3ReportError(SQLITE_CORRUPT_INDEX, __LINE__, "index corruption"); + goto abort_due_to_error; + } + assert( pC->deferredMoveto==0 ); + pC->cacheStatus = CACHE_STALE; + pC->seekResult = 0; + break; +} + +/* Opcode: DeferredSeek P1 * P3 P4 * +** Synopsis: Move P3 to P1.rowid if needed +** +** P1 is an open index cursor and P3 is a cursor on the corresponding +** table. This opcode does a deferred seek of the P3 table cursor +** to the row that corresponds to the current row of P1. +** +** This is a deferred seek. Nothing actually happens until +** the cursor is used to read a record. That way, if no reads +** occur, no unnecessary I/O happens. +** +** P4 may be an array of integers (type P4_INTARRAY) containing +** one entry for each column in the P3 table. If array entry a(i) +** is non-zero, then reading column a(i)-1 from cursor P3 is +** equivalent to performing the deferred seek and then reading column i +** from P1. This information is stored in P3 and used to redirect +** reads against P3 over to P1, thus possibly avoiding the need to +** seek and read cursor P3. +*/ +/* Opcode: IdxRowid P1 P2 * * * +** Synopsis: r[P2]=rowid +** +** Write into register P2 an integer which is the last entry in the record at +** the end of the index key pointed to by cursor P1. This integer should be +** the rowid of the table entry to which this index entry points. +** +** See also: Rowid, MakeRecord. +*/ +case OP_DeferredSeek: +case OP_IdxRowid: { /* out2 */ + VdbeCursor *pC; /* The P1 index cursor */ + VdbeCursor *pTabCur; /* The P2 table cursor (OP_DeferredSeek only) */ + i64 rowid; /* Rowid that P1 current points to */ + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->uc.pCursor!=0 ); + assert( pC->isTable==0 ); + assert( pC->deferredMoveto==0 ); + assert( !pC->nullRow || pOp->opcode==OP_IdxRowid ); + + /* The IdxRowid and Seek opcodes are combined because of the commonality + ** of sqlite3VdbeCursorRestore() and sqlite3VdbeIdxRowid(). */ + rc = sqlite3VdbeCursorRestore(pC); + + /* sqlite3VbeCursorRestore() can only fail if the record has been deleted + ** out from under the cursor. That will never happens for an IdxRowid + ** or Seek opcode */ + if( NEVER(rc!=SQLITE_OK) ) goto abort_due_to_error; + + if( !pC->nullRow ){ + rowid = 0; /* Not needed. Only used to silence a warning. */ + rc = sqlite3VdbeIdxRowid(db, pC->uc.pCursor, &rowid); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } + if( pOp->opcode==OP_DeferredSeek ){ + assert( pOp->p3>=0 && pOp->p3nCursor ); + pTabCur = p->apCsr[pOp->p3]; + assert( pTabCur!=0 ); + assert( pTabCur->eCurType==CURTYPE_BTREE ); + assert( pTabCur->uc.pCursor!=0 ); + assert( pTabCur->isTable ); + pTabCur->nullRow = 0; + pTabCur->movetoTarget = rowid; + pTabCur->deferredMoveto = 1; + assert( pOp->p4type==P4_INTARRAY || pOp->p4.ai==0 ); + assert( !pTabCur->isEphemeral ); + pTabCur->ub.aAltMap = pOp->p4.ai; + assert( !pC->isEphemeral ); + pTabCur->pAltCursor = pC; + }else{ + pOut = out2Prerelease(p, pOp); + pOut->u.i = rowid; + } + }else{ + assert( pOp->opcode==OP_IdxRowid ); + sqlite3VdbeMemSetNull(&aMem[pOp->p2]); + } + break; +} + +/* Opcode: FinishSeek P1 * * * * +** +** If cursor P1 was previously moved via OP_DeferredSeek, complete that +** seek operation now, without further delay. If the cursor seek has +** already occurred, this instruction is a no-op. +*/ +case OP_FinishSeek: { + VdbeCursor *pC; /* The P1 index cursor */ + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + if( pC->deferredMoveto ){ + rc = sqlite3VdbeFinishMoveto(pC); + if( rc ) goto abort_due_to_error; + } + break; +} + +/* Opcode: IdxGE P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** The P4 register values beginning with P3 form an unpacked index +** key that omits the PRIMARY KEY. Compare this key value against the index +** that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID +** fields at the end. +** +** If the P1 index entry is greater than or equal to the key value +** then jump to P2. Otherwise fall through to the next instruction. +*/ +/* Opcode: IdxGT P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** The P4 register values beginning with P3 form an unpacked index +** key that omits the PRIMARY KEY. Compare this key value against the index +** that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID +** fields at the end. +** +** If the P1 index entry is greater than the key value +** then jump to P2. Otherwise fall through to the next instruction. +*/ +/* Opcode: IdxLT P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** The P4 register values beginning with P3 form an unpacked index +** key that omits the PRIMARY KEY or ROWID. Compare this key value against +** the index that P1 is currently pointing to, ignoring the PRIMARY KEY or +** ROWID on the P1 index. +** +** If the P1 index entry is less than the key value then jump to P2. +** Otherwise fall through to the next instruction. +*/ +/* Opcode: IdxLE P1 P2 P3 P4 * +** Synopsis: key=r[P3@P4] +** +** The P4 register values beginning with P3 form an unpacked index +** key that omits the PRIMARY KEY or ROWID. Compare this key value against +** the index that P1 is currently pointing to, ignoring the PRIMARY KEY or +** ROWID on the P1 index. +** +** If the P1 index entry is less than or equal to the key value then jump +** to P2. Otherwise fall through to the next instruction. +*/ +case OP_IdxLE: /* jump */ +case OP_IdxGT: /* jump */ +case OP_IdxLT: /* jump */ +case OP_IdxGE: { /* jump */ + VdbeCursor *pC; + int res; + UnpackedRecord r; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->isOrdered ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->uc.pCursor!=0); + assert( pC->deferredMoveto==0 ); + assert( pOp->p4type==P4_INT32 ); + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)pOp->p4.i; + if( pOp->opcodeopcode==OP_IdxLE || pOp->opcode==OP_IdxGT ); + r.default_rc = -1; + }else{ + assert( pOp->opcode==OP_IdxGE || pOp->opcode==OP_IdxLT ); + r.default_rc = 0; + } + r.aMem = &aMem[pOp->p3]; +#ifdef SQLITE_DEBUG + { + int i; + for(i=0; ip3+i, &aMem[pOp->p3+i]); + } + } +#endif + + /* Inlined version of sqlite3VdbeIdxKeyCompare() */ + { + i64 nCellKey = 0; + BtCursor *pCur; + Mem m; + + assert( pC->eCurType==CURTYPE_BTREE ); + pCur = pC->uc.pCursor; + assert( sqlite3BtreeCursorIsValid(pCur) ); + nCellKey = sqlite3BtreePayloadSize(pCur); + /* nCellKey will always be between 0 and 0xffffffff because of the way + ** that btreeParseCellPtr() and sqlite3GetVarint32() are implemented */ + if( nCellKey<=0 || nCellKey>0x7fffffff ){ + rc = SQLITE_CORRUPT_BKPT; + goto abort_due_to_error; + } + sqlite3VdbeMemInit(&m, db, 0); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pCur, (u32)nCellKey, &m); + if( rc ) goto abort_due_to_error; + res = sqlite3VdbeRecordCompareWithSkip(m.n, m.z, &r, 0); + sqlite3VdbeMemRelease(&m); + } + /* End of inlined sqlite3VdbeIdxKeyCompare() */ + + assert( (OP_IdxLE&1)==(OP_IdxLT&1) && (OP_IdxGE&1)==(OP_IdxGT&1) ); + if( (pOp->opcode&1)==(OP_IdxLT&1) ){ + assert( pOp->opcode==OP_IdxLE || pOp->opcode==OP_IdxLT ); + res = -res; + }else{ + assert( pOp->opcode==OP_IdxGE || pOp->opcode==OP_IdxGT ); + res++; + } + VdbeBranchTaken(res>0,2); + assert( rc==SQLITE_OK ); + if( res>0 ) goto jump_to_p2; + break; +} + +/* Opcode: Destroy P1 P2 P3 * * +** +** Delete an entire database table or index whose root page in the database +** file is given by P1. +** +** The table being destroyed is in the main database file if P3==0. If +** P3==1 then the table to be clear is in the auxiliary database file +** that is used to store tables create using CREATE TEMPORARY TABLE. +** +** If AUTOVACUUM is enabled then it is possible that another root page +** might be moved into the newly deleted root page in order to keep all +** root pages contiguous at the beginning of the database. The former +** value of the root page that moved - its value before the move occurred - +** is stored in register P2. If no page movement was required (because the +** table being dropped was already the last one in the database) then a +** zero is stored in register P2. If AUTOVACUUM is disabled then a zero +** is stored in register P2. +** +** This opcode throws an error if there are any active reader VMs when +** it is invoked. This is done to avoid the difficulty associated with +** updating existing cursors when a root page is moved in an AUTOVACUUM +** database. This error is thrown even if the database is not an AUTOVACUUM +** db in order to avoid introducing an incompatibility between autovacuum +** and non-autovacuum modes. +** +** See also: Clear +*/ +case OP_Destroy: { /* out2 */ + int iMoved; + int iDb; + + sqlite3VdbeIncrWriteCounter(p, 0); + assert( p->readOnly==0 ); + assert( pOp->p1>1 ); + pOut = out2Prerelease(p, pOp); + pOut->flags = MEM_Null; + if( db->nVdbeRead > db->nVDestroy+1 ){ + rc = SQLITE_LOCKED; + p->errorAction = OE_Abort; + goto abort_due_to_error; + }else{ + iDb = pOp->p3; + assert( DbMaskTest(p->btreeMask, iDb) ); + iMoved = 0; /* Not needed. Only to silence a warning. */ + rc = sqlite3BtreeDropTable(db->aDb[iDb].pBt, pOp->p1, &iMoved); + pOut->flags = MEM_Int; + pOut->u.i = iMoved; + if( rc ) goto abort_due_to_error; +#ifndef SQLITE_OMIT_AUTOVACUUM + if( iMoved!=0 ){ + sqlite3RootPageMoved(db, iDb, iMoved, pOp->p1); + /* All OP_Destroy operations occur on the same btree */ + assert( resetSchemaOnFault==0 || resetSchemaOnFault==iDb+1 ); + resetSchemaOnFault = iDb+1; + } +#endif + } + break; +} + +/* Opcode: Clear P1 P2 P3 +** +** Delete all contents of the database table or index whose root page +** in the database file is given by P1. But, unlike Destroy, do not +** remove the table or index from the database file. +** +** The table being clear is in the main database file if P2==0. If +** P2==1 then the table to be clear is in the auxiliary database file +** that is used to store tables create using CREATE TEMPORARY TABLE. +** +** If the P3 value is non-zero, then the row change count is incremented +** by the number of rows in the table being cleared. If P3 is greater +** than zero, then the value stored in register P3 is also incremented +** by the number of rows in the table being cleared. +** +** See also: Destroy +*/ +case OP_Clear: { + i64 nChange; + + sqlite3VdbeIncrWriteCounter(p, 0); + nChange = 0; + assert( p->readOnly==0 ); + assert( DbMaskTest(p->btreeMask, pOp->p2) ); + rc = sqlite3BtreeClearTable(db->aDb[pOp->p2].pBt, (u32)pOp->p1, &nChange); + if( pOp->p3 ){ + p->nChange += nChange; + if( pOp->p3>0 ){ + assert( memIsValid(&aMem[pOp->p3]) ); + memAboutToChange(p, &aMem[pOp->p3]); + aMem[pOp->p3].u.i += nChange; + } + } + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: ResetSorter P1 * * * * +** +** Delete all contents from the ephemeral table or sorter +** that is open on cursor P1. +** +** This opcode only works for cursors used for sorting and +** opened with OP_OpenEphemeral or OP_SorterOpen. +*/ +case OP_ResetSorter: { + VdbeCursor *pC; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + if( isSorter(pC) ){ + sqlite3VdbeSorterReset(db, pC->uc.pSorter); + }else{ + assert( pC->eCurType==CURTYPE_BTREE ); + assert( pC->isEphemeral ); + rc = sqlite3BtreeClearTableOfCursor(pC->uc.pCursor); + if( rc ) goto abort_due_to_error; + } + break; +} + +/* Opcode: CreateBtree P1 P2 P3 * * +** Synopsis: r[P2]=root iDb=P1 flags=P3 +** +** Allocate a new b-tree in the main database file if P1==0 or in the +** TEMP database file if P1==1 or in an attached database if +** P1>1. The P3 argument must be 1 (BTREE_INTKEY) for a rowid table +** it must be 2 (BTREE_BLOBKEY) for an index or WITHOUT ROWID table. +** The root page number of the new b-tree is stored in register P2. +*/ +case OP_CreateBtree: { /* out2 */ + Pgno pgno; + Db *pDb; + + sqlite3VdbeIncrWriteCounter(p, 0); + pOut = out2Prerelease(p, pOp); + pgno = 0; + assert( pOp->p3==BTREE_INTKEY || pOp->p3==BTREE_BLOBKEY ); + assert( pOp->p1>=0 && pOp->p1nDb ); + assert( DbMaskTest(p->btreeMask, pOp->p1) ); + assert( p->readOnly==0 ); + pDb = &db->aDb[pOp->p1]; + assert( pDb->pBt!=0 ); + rc = sqlite3BtreeCreateTable(pDb->pBt, &pgno, pOp->p3); + if( rc ) goto abort_due_to_error; + pOut->u.i = pgno; + break; +} + +/* Opcode: SqlExec * * * P4 * +** +** Run the SQL statement or statements specified in the P4 string. +*/ +case OP_SqlExec: { + sqlite3VdbeIncrWriteCounter(p, 0); + db->nSqlExec++; + rc = sqlite3_exec(db, pOp->p4.z, 0, 0, 0); + db->nSqlExec--; + if( rc ) goto abort_due_to_error; + break; +} + +/* Opcode: ParseSchema P1 * * P4 * +** +** Read and parse all entries from the schema table of database P1 +** that match the WHERE clause P4. If P4 is a NULL pointer, then the +** entire schema for P1 is reparsed. +** +** This opcode invokes the parser to create a new virtual machine, +** then runs the new virtual machine. It is thus a re-entrant opcode. +*/ +case OP_ParseSchema: { + int iDb; + const char *zSchema; + char *zSql; + InitData initData; + + /* Any prepared statement that invokes this opcode will hold mutexes + ** on every btree. This is a prerequisite for invoking + ** sqlite3InitCallback(). + */ +#ifdef SQLITE_DEBUG + for(iDb=0; iDbnDb; iDb++){ + assert( iDb==1 || sqlite3BtreeHoldsMutex(db->aDb[iDb].pBt) ); + } +#endif + + iDb = pOp->p1; + assert( iDb>=0 && iDbnDb ); + assert( DbHasProperty(db, iDb, DB_SchemaLoaded) + || db->mallocFailed + || (CORRUPT_DB && (db->flags & SQLITE_NoSchemaError)!=0) ); + +#ifndef SQLITE_OMIT_ALTERTABLE + if( pOp->p4.z==0 ){ + sqlite3SchemaClear(db->aDb[iDb].pSchema); + db->mDbFlags &= ~DBFLAG_SchemaKnownOk; + rc = sqlite3InitOne(db, iDb, &p->zErrMsg, pOp->p5); + db->mDbFlags |= DBFLAG_SchemaChange; + p->expired = 0; + }else +#endif + { + zSchema = LEGACY_SCHEMA_TABLE; + initData.db = db; + initData.iDb = iDb; + initData.pzErrMsg = &p->zErrMsg; + initData.mInitFlags = 0; + initData.mxPage = sqlite3BtreeLastPage(db->aDb[iDb].pBt); + zSql = sqlite3MPrintf(db, + "SELECT*FROM\"%w\".%s WHERE %s ORDER BY rowid", + db->aDb[iDb].zDbSName, zSchema, pOp->p4.z); + if( zSql==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + assert( db->init.busy==0 ); + db->init.busy = 1; + initData.rc = SQLITE_OK; + initData.nInitRow = 0; + assert( !db->mallocFailed ); + rc = sqlite3_exec(db, zSql, sqlite3InitCallback, &initData, 0); + if( rc==SQLITE_OK ) rc = initData.rc; + if( rc==SQLITE_OK && initData.nInitRow==0 ){ + /* The OP_ParseSchema opcode with a non-NULL P4 argument should parse + ** at least one SQL statement. Any less than that indicates that + ** the sqlite_schema table is corrupt. */ + rc = SQLITE_CORRUPT_BKPT; + } + sqlite3DbFreeNN(db, zSql); + db->init.busy = 0; + } + } + if( rc ){ + sqlite3ResetAllSchemasOfConnection(db); + if( rc==SQLITE_NOMEM ){ + goto no_mem; + } + goto abort_due_to_error; + } + break; +} + +#if !defined(SQLITE_OMIT_ANALYZE) +/* Opcode: LoadAnalysis P1 * * * * +** +** Read the sqlite_stat1 table for database P1 and load the content +** of that table into the internal index hash table. This will cause +** the analysis to be used when preparing all subsequent queries. +*/ +case OP_LoadAnalysis: { + assert( pOp->p1>=0 && pOp->p1nDb ); + rc = sqlite3AnalysisLoad(db, pOp->p1); + if( rc ) goto abort_due_to_error; + break; +} +#endif /* !defined(SQLITE_OMIT_ANALYZE) */ + +/* Opcode: DropTable P1 * * P4 * +** +** Remove the internal (in-memory) data structures that describe +** the table named P4 in database P1. This is called after a table +** is dropped from disk (using the Destroy opcode) in order to keep +** the internal representation of the +** schema consistent with what is on disk. +*/ +case OP_DropTable: { + sqlite3VdbeIncrWriteCounter(p, 0); + sqlite3UnlinkAndDeleteTable(db, pOp->p1, pOp->p4.z); + break; +} + +/* Opcode: DropIndex P1 * * P4 * +** +** Remove the internal (in-memory) data structures that describe +** the index named P4 in database P1. This is called after an index +** is dropped from disk (using the Destroy opcode) +** in order to keep the internal representation of the +** schema consistent with what is on disk. +*/ +case OP_DropIndex: { + sqlite3VdbeIncrWriteCounter(p, 0); + sqlite3UnlinkAndDeleteIndex(db, pOp->p1, pOp->p4.z); + break; +} + +/* Opcode: DropTrigger P1 * * P4 * +** +** Remove the internal (in-memory) data structures that describe +** the trigger named P4 in database P1. This is called after a trigger +** is dropped from disk (using the Destroy opcode) in order to keep +** the internal representation of the +** schema consistent with what is on disk. +*/ +case OP_DropTrigger: { + sqlite3VdbeIncrWriteCounter(p, 0); + sqlite3UnlinkAndDeleteTrigger(db, pOp->p1, pOp->p4.z); + break; +} + + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* Opcode: IntegrityCk P1 P2 P3 P4 P5 +** +** Do an analysis of the currently open database. Store in +** register P1 the text of an error message describing any problems. +** If no problems are found, store a NULL in register P1. +** +** The register P3 contains one less than the maximum number of allowed errors. +** At most reg(P3) errors will be reported. +** In other words, the analysis stops as soon as reg(P1) errors are +** seen. Reg(P1) is updated with the number of errors remaining. +** +** The root page numbers of all tables in the database are integers +** stored in P4_INTARRAY argument. +** +** If P5 is not zero, the check is done on the auxiliary database +** file, not the main database file. +** +** This opcode is used to implement the integrity_check pragma. +*/ +case OP_IntegrityCk: { + int nRoot; /* Number of tables to check. (Number of root pages.) */ + Pgno *aRoot; /* Array of rootpage numbers for tables to be checked */ + int nErr; /* Number of errors reported */ + char *z; /* Text of the error report */ + Mem *pnErr; /* Register keeping track of errors remaining */ + + assert( p->bIsReader ); + nRoot = pOp->p2; + aRoot = pOp->p4.ai; + assert( nRoot>0 ); + assert( aRoot[0]==(Pgno)nRoot ); + assert( pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor) ); + pnErr = &aMem[pOp->p3]; + assert( (pnErr->flags & MEM_Int)!=0 ); + assert( (pnErr->flags & (MEM_Str|MEM_Blob))==0 ); + pIn1 = &aMem[pOp->p1]; + assert( pOp->p5nDb ); + assert( DbMaskTest(p->btreeMask, pOp->p5) ); + z = sqlite3BtreeIntegrityCheck(db, db->aDb[pOp->p5].pBt, &aRoot[1], nRoot, + (int)pnErr->u.i+1, &nErr); + sqlite3VdbeMemSetNull(pIn1); + if( nErr==0 ){ + assert( z==0 ); + }else if( z==0 ){ + goto no_mem; + }else{ + pnErr->u.i -= nErr-1; + sqlite3VdbeMemSetStr(pIn1, z, -1, SQLITE_UTF8, sqlite3_free); + } + UPDATE_MAX_BLOBSIZE(pIn1); + sqlite3VdbeChangeEncoding(pIn1, encoding); + goto check_for_interrupt; +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +/* Opcode: RowSetAdd P1 P2 * * * +** Synopsis: rowset(P1)=r[P2] +** +** Insert the integer value held by register P2 into a RowSet object +** held in register P1. +** +** An assertion fails if P2 is not an integer. +*/ +case OP_RowSetAdd: { /* in1, in2 */ + pIn1 = &aMem[pOp->p1]; + pIn2 = &aMem[pOp->p2]; + assert( (pIn2->flags & MEM_Int)!=0 ); + if( (pIn1->flags & MEM_Blob)==0 ){ + if( sqlite3VdbeMemSetRowSet(pIn1) ) goto no_mem; + } + assert( sqlite3VdbeMemIsRowSet(pIn1) ); + sqlite3RowSetInsert((RowSet*)pIn1->z, pIn2->u.i); + break; +} + +/* Opcode: RowSetRead P1 P2 P3 * * +** Synopsis: r[P3]=rowset(P1) +** +** Extract the smallest value from the RowSet object in P1 +** and put that value into register P3. +** Or, if RowSet object P1 is initially empty, leave P3 +** unchanged and jump to instruction P2. +*/ +case OP_RowSetRead: { /* jump, in1, out3 */ + i64 val; + + pIn1 = &aMem[pOp->p1]; + assert( (pIn1->flags & MEM_Blob)==0 || sqlite3VdbeMemIsRowSet(pIn1) ); + if( (pIn1->flags & MEM_Blob)==0 + || sqlite3RowSetNext((RowSet*)pIn1->z, &val)==0 + ){ + /* The boolean index is empty */ + sqlite3VdbeMemSetNull(pIn1); + VdbeBranchTaken(1,2); + goto jump_to_p2_and_check_for_interrupt; + }else{ + /* A value was pulled from the index */ + VdbeBranchTaken(0,2); + sqlite3VdbeMemSetInt64(&aMem[pOp->p3], val); + } + goto check_for_interrupt; +} + +/* Opcode: RowSetTest P1 P2 P3 P4 +** Synopsis: if r[P3] in rowset(P1) goto P2 +** +** Register P3 is assumed to hold a 64-bit integer value. If register P1 +** contains a RowSet object and that RowSet object contains +** the value held in P3, jump to register P2. Otherwise, insert the +** integer in P3 into the RowSet and continue on to the +** next opcode. +** +** The RowSet object is optimized for the case where sets of integers +** are inserted in distinct phases, which each set contains no duplicates. +** Each set is identified by a unique P4 value. The first set +** must have P4==0, the final set must have P4==-1, and for all other sets +** must have P4>0. +** +** This allows optimizations: (a) when P4==0 there is no need to test +** the RowSet object for P3, as it is guaranteed not to contain it, +** (b) when P4==-1 there is no need to insert the value, as it will +** never be tested for, and (c) when a value that is part of set X is +** inserted, there is no need to search to see if the same value was +** previously inserted as part of set X (only if it was previously +** inserted as part of some other set). +*/ +case OP_RowSetTest: { /* jump, in1, in3 */ + int iSet; + int exists; + + pIn1 = &aMem[pOp->p1]; + pIn3 = &aMem[pOp->p3]; + iSet = pOp->p4.i; + assert( pIn3->flags&MEM_Int ); + + /* If there is anything other than a rowset object in memory cell P1, + ** delete it now and initialize P1 with an empty rowset + */ + if( (pIn1->flags & MEM_Blob)==0 ){ + if( sqlite3VdbeMemSetRowSet(pIn1) ) goto no_mem; + } + assert( sqlite3VdbeMemIsRowSet(pIn1) ); + assert( pOp->p4type==P4_INT32 ); + assert( iSet==-1 || iSet>=0 ); + if( iSet ){ + exists = sqlite3RowSetTest((RowSet*)pIn1->z, iSet, pIn3->u.i); + VdbeBranchTaken(exists!=0,2); + if( exists ) goto jump_to_p2; + } + if( iSet>=0 ){ + sqlite3RowSetInsert((RowSet*)pIn1->z, pIn3->u.i); + } + break; +} + + +#ifndef SQLITE_OMIT_TRIGGER + +/* Opcode: Program P1 P2 P3 P4 P5 +** +** Execute the trigger program passed as P4 (type P4_SUBPROGRAM). +** +** P1 contains the address of the memory cell that contains the first memory +** cell in an array of values used as arguments to the sub-program. P2 +** contains the address to jump to if the sub-program throws an IGNORE +** exception using the RAISE() function. Register P3 contains the address +** of a memory cell in this (the parent) VM that is used to allocate the +** memory required by the sub-vdbe at runtime. +** +** P4 is a pointer to the VM containing the trigger program. +** +** If P5 is non-zero, then recursive program invocation is enabled. +*/ +case OP_Program: { /* jump */ + int nMem; /* Number of memory registers for sub-program */ + int nByte; /* Bytes of runtime space required for sub-program */ + Mem *pRt; /* Register to allocate runtime space */ + Mem *pMem; /* Used to iterate through memory cells */ + Mem *pEnd; /* Last memory cell in new array */ + VdbeFrame *pFrame; /* New vdbe frame to execute in */ + SubProgram *pProgram; /* Sub-program to execute */ + void *t; /* Token identifying trigger */ + + pProgram = pOp->p4.pProgram; + pRt = &aMem[pOp->p3]; + assert( pProgram->nOp>0 ); + + /* If the p5 flag is clear, then recursive invocation of triggers is + ** disabled for backwards compatibility (p5 is set if this sub-program + ** is really a trigger, not a foreign key action, and the flag set + ** and cleared by the "PRAGMA recursive_triggers" command is clear). + ** + ** It is recursive invocation of triggers, at the SQL level, that is + ** disabled. In some cases a single trigger may generate more than one + ** SubProgram (if the trigger may be executed with more than one different + ** ON CONFLICT algorithm). SubProgram structures associated with a + ** single trigger all have the same value for the SubProgram.token + ** variable. */ + if( pOp->p5 ){ + t = pProgram->token; + for(pFrame=p->pFrame; pFrame && pFrame->token!=t; pFrame=pFrame->pParent); + if( pFrame ) break; + } + + if( p->nFrame>=db->aLimit[SQLITE_LIMIT_TRIGGER_DEPTH] ){ + rc = SQLITE_ERROR; + sqlite3VdbeError(p, "too many levels of trigger recursion"); + goto abort_due_to_error; + } + + /* Register pRt is used to store the memory required to save the state + ** of the current program, and the memory required at runtime to execute + ** the trigger program. If this trigger has been fired before, then pRt + ** is already allocated. Otherwise, it must be initialized. */ + if( (pRt->flags&MEM_Blob)==0 ){ + /* SubProgram.nMem is set to the number of memory cells used by the + ** program stored in SubProgram.aOp. As well as these, one memory + ** cell is required for each cursor used by the program. Set local + ** variable nMem (and later, VdbeFrame.nChildMem) to this value. + */ + nMem = pProgram->nMem + pProgram->nCsr; + assert( nMem>0 ); + if( pProgram->nCsr==0 ) nMem++; + nByte = ROUND8(sizeof(VdbeFrame)) + + nMem * sizeof(Mem) + + pProgram->nCsr * sizeof(VdbeCursor*) + + (pProgram->nOp + 7)/8; + pFrame = sqlite3DbMallocZero(db, nByte); + if( !pFrame ){ + goto no_mem; + } + sqlite3VdbeMemRelease(pRt); + pRt->flags = MEM_Blob|MEM_Dyn; + pRt->z = (char*)pFrame; + pRt->n = nByte; + pRt->xDel = sqlite3VdbeFrameMemDel; + + pFrame->v = p; + pFrame->nChildMem = nMem; + pFrame->nChildCsr = pProgram->nCsr; + pFrame->pc = (int)(pOp - aOp); + pFrame->aMem = p->aMem; + pFrame->nMem = p->nMem; + pFrame->apCsr = p->apCsr; + pFrame->nCursor = p->nCursor; + pFrame->aOp = p->aOp; + pFrame->nOp = p->nOp; + pFrame->token = pProgram->token; +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + pFrame->anExec = p->anExec; +#endif +#ifdef SQLITE_DEBUG + pFrame->iFrameMagic = SQLITE_FRAME_MAGIC; +#endif + + pEnd = &VdbeFrameMem(pFrame)[pFrame->nChildMem]; + for(pMem=VdbeFrameMem(pFrame); pMem!=pEnd; pMem++){ + pMem->flags = MEM_Undefined; + pMem->db = db; + } + }else{ + pFrame = (VdbeFrame*)pRt->z; + assert( pRt->xDel==sqlite3VdbeFrameMemDel ); + assert( pProgram->nMem+pProgram->nCsr==pFrame->nChildMem + || (pProgram->nCsr==0 && pProgram->nMem+1==pFrame->nChildMem) ); + assert( pProgram->nCsr==pFrame->nChildCsr ); + assert( (int)(pOp - aOp)==pFrame->pc ); + } + + p->nFrame++; + pFrame->pParent = p->pFrame; + pFrame->lastRowid = db->lastRowid; + pFrame->nChange = p->nChange; + pFrame->nDbChange = p->db->nChange; + assert( pFrame->pAuxData==0 ); + pFrame->pAuxData = p->pAuxData; + p->pAuxData = 0; + p->nChange = 0; + p->pFrame = pFrame; + p->aMem = aMem = VdbeFrameMem(pFrame); + p->nMem = pFrame->nChildMem; + p->nCursor = (u16)pFrame->nChildCsr; + p->apCsr = (VdbeCursor **)&aMem[p->nMem]; + pFrame->aOnce = (u8*)&p->apCsr[pProgram->nCsr]; + memset(pFrame->aOnce, 0, (pProgram->nOp + 7)/8); + p->aOp = aOp = pProgram->aOp; + p->nOp = pProgram->nOp; +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + p->anExec = 0; +#endif +#ifdef SQLITE_DEBUG + /* Verify that second and subsequent executions of the same trigger do not + ** try to reuse register values from the first use. */ + { + int i; + for(i=0; inMem; i++){ + aMem[i].pScopyFrom = 0; /* Prevent false-positive AboutToChange() errs */ + MemSetTypeFlag(&aMem[i], MEM_Undefined); /* Fault if this reg is reused */ + } + } +#endif + pOp = &aOp[-1]; + goto check_for_interrupt; +} + +/* Opcode: Param P1 P2 * * * +** +** This opcode is only ever present in sub-programs called via the +** OP_Program instruction. Copy a value currently stored in a memory +** cell of the calling (parent) frame to cell P2 in the current frames +** address space. This is used by trigger programs to access the new.* +** and old.* values. +** +** The address of the cell in the parent frame is determined by adding +** the value of the P1 argument to the value of the P1 argument to the +** calling OP_Program instruction. +*/ +case OP_Param: { /* out2 */ + VdbeFrame *pFrame; + Mem *pIn; + pOut = out2Prerelease(p, pOp); + pFrame = p->pFrame; + pIn = &pFrame->aMem[pOp->p1 + pFrame->aOp[pFrame->pc].p1]; + sqlite3VdbeMemShallowCopy(pOut, pIn, MEM_Ephem); + break; +} + +#endif /* #ifndef SQLITE_OMIT_TRIGGER */ + +#ifndef SQLITE_OMIT_FOREIGN_KEY +/* Opcode: FkCounter P1 P2 * * * +** Synopsis: fkctr[P1]+=P2 +** +** Increment a "constraint counter" by P2 (P2 may be negative or positive). +** If P1 is non-zero, the database constraint counter is incremented +** (deferred foreign key constraints). Otherwise, if P1 is zero, the +** statement counter is incremented (immediate foreign key constraints). +*/ +case OP_FkCounter: { + if( db->flags & SQLITE_DeferFKs ){ + db->nDeferredImmCons += pOp->p2; + }else if( pOp->p1 ){ + db->nDeferredCons += pOp->p2; + }else{ + p->nFkConstraint += pOp->p2; + } + break; +} + +/* Opcode: FkIfZero P1 P2 * * * +** Synopsis: if fkctr[P1]==0 goto P2 +** +** This opcode tests if a foreign key constraint-counter is currently zero. +** If so, jump to instruction P2. Otherwise, fall through to the next +** instruction. +** +** If P1 is non-zero, then the jump is taken if the database constraint-counter +** is zero (the one that counts deferred constraint violations). If P1 is +** zero, the jump is taken if the statement constraint-counter is zero +** (immediate foreign key constraint violations). +*/ +case OP_FkIfZero: { /* jump */ + if( pOp->p1 ){ + VdbeBranchTaken(db->nDeferredCons==0 && db->nDeferredImmCons==0, 2); + if( db->nDeferredCons==0 && db->nDeferredImmCons==0 ) goto jump_to_p2; + }else{ + VdbeBranchTaken(p->nFkConstraint==0 && db->nDeferredImmCons==0, 2); + if( p->nFkConstraint==0 && db->nDeferredImmCons==0 ) goto jump_to_p2; + } + break; +} +#endif /* #ifndef SQLITE_OMIT_FOREIGN_KEY */ + +#ifndef SQLITE_OMIT_AUTOINCREMENT +/* Opcode: MemMax P1 P2 * * * +** Synopsis: r[P1]=max(r[P1],r[P2]) +** +** P1 is a register in the root frame of this VM (the root frame is +** different from the current frame if this instruction is being executed +** within a sub-program). Set the value of register P1 to the maximum of +** its current value and the value in register P2. +** +** This instruction throws an error if the memory cell is not initially +** an integer. +*/ +case OP_MemMax: { /* in2 */ + VdbeFrame *pFrame; + if( p->pFrame ){ + for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent); + pIn1 = &pFrame->aMem[pOp->p1]; + }else{ + pIn1 = &aMem[pOp->p1]; + } + assert( memIsValid(pIn1) ); + sqlite3VdbeMemIntegerify(pIn1); + pIn2 = &aMem[pOp->p2]; + sqlite3VdbeMemIntegerify(pIn2); + if( pIn1->u.iu.i){ + pIn1->u.i = pIn2->u.i; + } + break; +} +#endif /* SQLITE_OMIT_AUTOINCREMENT */ + +/* Opcode: IfPos P1 P2 P3 * * +** Synopsis: if r[P1]>0 then r[P1]-=P3, goto P2 +** +** Register P1 must contain an integer. +** If the value of register P1 is 1 or greater, subtract P3 from the +** value in P1 and jump to P2. +** +** If the initial value of register P1 is less than 1, then the +** value is unchanged and control passes through to the next instruction. +*/ +case OP_IfPos: { /* jump, in1 */ + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags&MEM_Int ); + VdbeBranchTaken( pIn1->u.i>0, 2); + if( pIn1->u.i>0 ){ + pIn1->u.i -= pOp->p3; + goto jump_to_p2; + } + break; +} + +/* Opcode: OffsetLimit P1 P2 P3 * * +** Synopsis: if r[P1]>0 then r[P2]=r[P1]+max(0,r[P3]) else r[P2]=(-1) +** +** This opcode performs a commonly used computation associated with +** LIMIT and OFFSET process. r[P1] holds the limit counter. r[P3] +** holds the offset counter. The opcode computes the combined value +** of the LIMIT and OFFSET and stores that value in r[P2]. The r[P2] +** value computed is the total number of rows that will need to be +** visited in order to complete the query. +** +** If r[P3] is zero or negative, that means there is no OFFSET +** and r[P2] is set to be the value of the LIMIT, r[P1]. +** +** if r[P1] is zero or negative, that means there is no LIMIT +** and r[P2] is set to -1. +** +** Otherwise, r[P2] is set to the sum of r[P1] and r[P3]. +*/ +case OP_OffsetLimit: { /* in1, out2, in3 */ + i64 x; + pIn1 = &aMem[pOp->p1]; + pIn3 = &aMem[pOp->p3]; + pOut = out2Prerelease(p, pOp); + assert( pIn1->flags & MEM_Int ); + assert( pIn3->flags & MEM_Int ); + x = pIn1->u.i; + if( x<=0 || sqlite3AddInt64(&x, pIn3->u.i>0?pIn3->u.i:0) ){ + /* If the LIMIT is less than or equal to zero, loop forever. This + ** is documented. But also, if the LIMIT+OFFSET exceeds 2^63 then + ** also loop forever. This is undocumented. In fact, one could argue + ** that the loop should terminate. But assuming 1 billion iterations + ** per second (far exceeding the capabilities of any current hardware) + ** it would take nearly 300 years to actually reach the limit. So + ** looping forever is a reasonable approximation. */ + pOut->u.i = -1; + }else{ + pOut->u.i = x; + } + break; +} + +/* Opcode: IfNotZero P1 P2 * * * +** Synopsis: if r[P1]!=0 then r[P1]--, goto P2 +** +** Register P1 must contain an integer. If the content of register P1 is +** initially greater than zero, then decrement the value in register P1. +** If it is non-zero (negative or positive) and then also jump to P2. +** If register P1 is initially zero, leave it unchanged and fall through. +*/ +case OP_IfNotZero: { /* jump, in1 */ + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags&MEM_Int ); + VdbeBranchTaken(pIn1->u.i<0, 2); + if( pIn1->u.i ){ + if( pIn1->u.i>0 ) pIn1->u.i--; + goto jump_to_p2; + } + break; +} + +/* Opcode: DecrJumpZero P1 P2 * * * +** Synopsis: if (--r[P1])==0 goto P2 +** +** Register P1 must hold an integer. Decrement the value in P1 +** and jump to P2 if the new value is exactly zero. +*/ +case OP_DecrJumpZero: { /* jump, in1 */ + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags&MEM_Int ); + if( pIn1->u.i>SMALLEST_INT64 ) pIn1->u.i--; + VdbeBranchTaken(pIn1->u.i==0, 2); + if( pIn1->u.i==0 ) goto jump_to_p2; + break; +} + + +/* Opcode: AggStep * P2 P3 P4 P5 +** Synopsis: accum=r[P3] step(r[P2@P5]) +** +** Execute the xStep function for an aggregate. +** The function has P5 arguments. P4 is a pointer to the +** FuncDef structure that specifies the function. Register P3 is the +** accumulator. +** +** The P5 arguments are taken from register P2 and its +** successors. +*/ +/* Opcode: AggInverse * P2 P3 P4 P5 +** Synopsis: accum=r[P3] inverse(r[P2@P5]) +** +** Execute the xInverse function for an aggregate. +** The function has P5 arguments. P4 is a pointer to the +** FuncDef structure that specifies the function. Register P3 is the +** accumulator. +** +** The P5 arguments are taken from register P2 and its +** successors. +*/ +/* Opcode: AggStep1 P1 P2 P3 P4 P5 +** Synopsis: accum=r[P3] step(r[P2@P5]) +** +** Execute the xStep (if P1==0) or xInverse (if P1!=0) function for an +** aggregate. The function has P5 arguments. P4 is a pointer to the +** FuncDef structure that specifies the function. Register P3 is the +** accumulator. +** +** The P5 arguments are taken from register P2 and its +** successors. +** +** This opcode is initially coded as OP_AggStep0. On first evaluation, +** the FuncDef stored in P4 is converted into an sqlite3_context and +** the opcode is changed. In this way, the initialization of the +** sqlite3_context only happens once, instead of on each call to the +** step function. +*/ +case OP_AggInverse: +case OP_AggStep: { + int n; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCDEF ); + n = pOp->p5; + assert( pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor) ); + assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem+1 - p->nCursor)+1) ); + assert( pOp->p3p2 || pOp->p3>=pOp->p2+n ); + pCtx = sqlite3DbMallocRawNN(db, n*sizeof(sqlite3_value*) + + (sizeof(pCtx[0]) + sizeof(Mem) - sizeof(sqlite3_value*))); + if( pCtx==0 ) goto no_mem; + pCtx->pMem = 0; + pCtx->pOut = (Mem*)&(pCtx->argv[n]); + sqlite3VdbeMemInit(pCtx->pOut, db, MEM_Null); + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->skipFlag = 0; + pCtx->isError = 0; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + + /* OP_AggInverse must have P1==1 and OP_AggStep must have P1==0 */ + assert( pOp->p1==(pOp->opcode==OP_AggInverse) ); + + pOp->opcode = OP_AggStep1; + /* Fall through into OP_AggStep */ + /* no break */ deliberate_fall_through +} +case OP_AggStep1: { + int i; + sqlite3_context *pCtx; + Mem *pMem; + + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + pMem = &aMem[pOp->p3]; + +#ifdef SQLITE_DEBUG + if( pOp->p1 ){ + /* This is an OP_AggInverse call. Verify that xStep has always + ** been called at least once prior to any xInverse call. */ + assert( pMem->uTemp==0x1122e0e3 ); + }else{ + /* This is an OP_AggStep call. Mark it as such. */ + pMem->uTemp = 0x1122e0e3; + } +#endif + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + if( pCtx->pMem != pMem ){ + pCtx->pMem = pMem; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; + } + +#ifdef SQLITE_DEBUG + for(i=0; iargc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + + pMem->n++; + assert( pCtx->pOut->flags==MEM_Null ); + assert( pCtx->isError==0 ); + assert( pCtx->skipFlag==0 ); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pOp->p1 ){ + (pCtx->pFunc->xInverse)(pCtx,pCtx->argc,pCtx->argv); + }else +#endif + (pCtx->pFunc->xSFunc)(pCtx,pCtx->argc,pCtx->argv); /* IMP: R-24505-23230 */ + + if( pCtx->isError ){ + if( pCtx->isError>0 ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pCtx->pOut)); + rc = pCtx->isError; + } + if( pCtx->skipFlag ){ + assert( pOp[-1].opcode==OP_CollSeq ); + i = pOp[-1].p1; + if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1); + pCtx->skipFlag = 0; + } + sqlite3VdbeMemRelease(pCtx->pOut); + pCtx->pOut->flags = MEM_Null; + pCtx->isError = 0; + if( rc ) goto abort_due_to_error; + } + assert( pCtx->pOut->flags==MEM_Null ); + assert( pCtx->skipFlag==0 ); + break; +} + +/* Opcode: AggFinal P1 P2 * P4 * +** Synopsis: accum=r[P1] N=P2 +** +** P1 is the memory location that is the accumulator for an aggregate +** or window function. Execute the finalizer function +** for an aggregate and store the result in P1. +** +** P2 is the number of arguments that the step function takes and +** P4 is a pointer to the FuncDef for this function. The P2 +** argument is not used by this opcode. It is only there to disambiguate +** functions that can take varying numbers of arguments. The +** P4 argument is only needed for the case where +** the step function was not previously called. +*/ +/* Opcode: AggValue * P2 P3 P4 * +** Synopsis: r[P3]=value N=P2 +** +** Invoke the xValue() function and store the result in register P3. +** +** P2 is the number of arguments that the step function takes and +** P4 is a pointer to the FuncDef for this function. The P2 +** argument is not used by this opcode. It is only there to disambiguate +** functions that can take varying numbers of arguments. The +** P4 argument is only needed for the case where +** the step function was not previously called. +*/ +case OP_AggValue: +case OP_AggFinal: { + Mem *pMem; + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + assert( pOp->p3==0 || pOp->opcode==OP_AggValue ); + pMem = &aMem[pOp->p1]; + assert( (pMem->flags & ~(MEM_Null|MEM_Agg))==0 ); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pOp->p3 ){ + memAboutToChange(p, &aMem[pOp->p3]); + rc = sqlite3VdbeMemAggValue(pMem, &aMem[pOp->p3], pOp->p4.pFunc); + pMem = &aMem[pOp->p3]; + }else +#endif + { + rc = sqlite3VdbeMemFinalize(pMem, pOp->p4.pFunc); + } + + if( rc ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pMem)); + goto abort_due_to_error; + } + sqlite3VdbeChangeEncoding(pMem, encoding); + UPDATE_MAX_BLOBSIZE(pMem); + if( sqlite3VdbeMemTooBig(pMem) ){ + goto too_big; + } + break; +} + +#ifndef SQLITE_OMIT_WAL +/* Opcode: Checkpoint P1 P2 P3 * * +** +** Checkpoint database P1. This is a no-op if P1 is not currently in +** WAL mode. Parameter P2 is one of SQLITE_CHECKPOINT_PASSIVE, FULL, +** RESTART, or TRUNCATE. Write 1 or 0 into mem[P3] if the checkpoint returns +** SQLITE_BUSY or not, respectively. Write the number of pages in the +** WAL after the checkpoint into mem[P3+1] and the number of pages +** in the WAL that have been checkpointed after the checkpoint +** completes into mem[P3+2]. However on an error, mem[P3+1] and +** mem[P3+2] are initialized to -1. +*/ +case OP_Checkpoint: { + int i; /* Loop counter */ + int aRes[3]; /* Results */ + Mem *pMem; /* Write results here */ + + assert( p->readOnly==0 ); + aRes[0] = 0; + aRes[1] = aRes[2] = -1; + assert( pOp->p2==SQLITE_CHECKPOINT_PASSIVE + || pOp->p2==SQLITE_CHECKPOINT_FULL + || pOp->p2==SQLITE_CHECKPOINT_RESTART + || pOp->p2==SQLITE_CHECKPOINT_TRUNCATE + ); + rc = sqlite3Checkpoint(db, pOp->p1, pOp->p2, &aRes[1], &aRes[2]); + if( rc ){ + if( rc!=SQLITE_BUSY ) goto abort_due_to_error; + rc = SQLITE_OK; + aRes[0] = 1; + } + for(i=0, pMem = &aMem[pOp->p3]; i<3; i++, pMem++){ + sqlite3VdbeMemSetInt64(pMem, (i64)aRes[i]); + } + break; +}; +#endif + +#ifndef SQLITE_OMIT_PRAGMA +/* Opcode: JournalMode P1 P2 P3 * * +** +** Change the journal mode of database P1 to P3. P3 must be one of the +** PAGER_JOURNALMODE_XXX values. If changing between the various rollback +** modes (delete, truncate, persist, off and memory), this is a simple +** operation. No IO is required. +** +** If changing into or out of WAL mode the procedure is more complicated. +** +** Write a string containing the final journal-mode to register P2. +*/ +case OP_JournalMode: { /* out2 */ + Btree *pBt; /* Btree to change journal mode of */ + Pager *pPager; /* Pager associated with pBt */ + int eNew; /* New journal mode */ + int eOld; /* The old journal mode */ +#ifndef SQLITE_OMIT_WAL + const char *zFilename; /* Name of database file for pPager */ +#endif + + pOut = out2Prerelease(p, pOp); + eNew = pOp->p3; + assert( eNew==PAGER_JOURNALMODE_DELETE + || eNew==PAGER_JOURNALMODE_TRUNCATE + || eNew==PAGER_JOURNALMODE_PERSIST + || eNew==PAGER_JOURNALMODE_OFF + || eNew==PAGER_JOURNALMODE_MEMORY + || eNew==PAGER_JOURNALMODE_WAL + || eNew==PAGER_JOURNALMODE_QUERY + ); + assert( pOp->p1>=0 && pOp->p1nDb ); + assert( p->readOnly==0 ); + + pBt = db->aDb[pOp->p1].pBt; + pPager = sqlite3BtreePager(pBt); + eOld = sqlite3PagerGetJournalMode(pPager); + if( eNew==PAGER_JOURNALMODE_QUERY ) eNew = eOld; + assert( sqlite3BtreeHoldsMutex(pBt) ); + if( !sqlite3PagerOkToChangeJournalMode(pPager) ) eNew = eOld; + +#ifndef SQLITE_OMIT_WAL + zFilename = sqlite3PagerFilename(pPager, 1); + + /* Do not allow a transition to journal_mode=WAL for a database + ** in temporary storage or if the VFS does not support shared memory + */ + if( eNew==PAGER_JOURNALMODE_WAL + && (sqlite3Strlen30(zFilename)==0 /* Temp file */ + || !sqlite3PagerWalSupported(pPager)) /* No shared-memory support */ + ){ + eNew = eOld; + } + + if( (eNew!=eOld) + && (eOld==PAGER_JOURNALMODE_WAL || eNew==PAGER_JOURNALMODE_WAL) + ){ + if( !db->autoCommit || db->nVdbeRead>1 ){ + rc = SQLITE_ERROR; + sqlite3VdbeError(p, + "cannot change %s wal mode from within a transaction", + (eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of") + ); + goto abort_due_to_error; + }else{ + + if( eOld==PAGER_JOURNALMODE_WAL ){ + /* If leaving WAL mode, close the log file. If successful, the call + ** to PagerCloseWal() checkpoints and deletes the write-ahead-log + ** file. An EXCLUSIVE lock may still be held on the database file + ** after a successful return. + */ + rc = sqlite3PagerCloseWal(pPager, db); + if( rc==SQLITE_OK ){ + sqlite3PagerSetJournalMode(pPager, eNew); + } + }else if( eOld==PAGER_JOURNALMODE_MEMORY ){ + /* Cannot transition directly from MEMORY to WAL. Use mode OFF + ** as an intermediate */ + sqlite3PagerSetJournalMode(pPager, PAGER_JOURNALMODE_OFF); + } + + /* Open a transaction on the database file. Regardless of the journal + ** mode, this transaction always uses a rollback journal. + */ + assert( sqlite3BtreeTxnState(pBt)!=SQLITE_TXN_WRITE ); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeSetVersion(pBt, (eNew==PAGER_JOURNALMODE_WAL ? 2 : 1)); + } + } + } +#endif /* ifndef SQLITE_OMIT_WAL */ + + if( rc ) eNew = eOld; + eNew = sqlite3PagerSetJournalMode(pPager, eNew); + + pOut->flags = MEM_Str|MEM_Static|MEM_Term; + pOut->z = (char *)sqlite3JournalModename(eNew); + pOut->n = sqlite3Strlen30(pOut->z); + pOut->enc = SQLITE_UTF8; + sqlite3VdbeChangeEncoding(pOut, encoding); + if( rc ) goto abort_due_to_error; + break; +}; +#endif /* SQLITE_OMIT_PRAGMA */ + +#if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) +/* Opcode: Vacuum P1 P2 * * * +** +** Vacuum the entire database P1. P1 is 0 for "main", and 2 or more +** for an attached database. The "temp" database may not be vacuumed. +** +** If P2 is not zero, then it is a register holding a string which is +** the file into which the result of vacuum should be written. When +** P2 is zero, the vacuum overwrites the original database. +*/ +case OP_Vacuum: { + assert( p->readOnly==0 ); + rc = sqlite3RunVacuum(&p->zErrMsg, db, pOp->p1, + pOp->p2 ? &aMem[pOp->p2] : 0); + if( rc ) goto abort_due_to_error; + break; +} +#endif + +#if !defined(SQLITE_OMIT_AUTOVACUUM) +/* Opcode: IncrVacuum P1 P2 * * * +** +** Perform a single step of the incremental vacuum procedure on +** the P1 database. If the vacuum has finished, jump to instruction +** P2. Otherwise, fall through to the next instruction. +*/ +case OP_IncrVacuum: { /* jump */ + Btree *pBt; + + assert( pOp->p1>=0 && pOp->p1nDb ); + assert( DbMaskTest(p->btreeMask, pOp->p1) ); + assert( p->readOnly==0 ); + pBt = db->aDb[pOp->p1].pBt; + rc = sqlite3BtreeIncrVacuum(pBt); + VdbeBranchTaken(rc==SQLITE_DONE,2); + if( rc ){ + if( rc!=SQLITE_DONE ) goto abort_due_to_error; + rc = SQLITE_OK; + goto jump_to_p2; + } + break; +} +#endif + +/* Opcode: Expire P1 P2 * * * +** +** Cause precompiled statements to expire. When an expired statement +** is executed using sqlite3_step() it will either automatically +** reprepare itself (if it was originally created using sqlite3_prepare_v2()) +** or it will fail with SQLITE_SCHEMA. +** +** If P1 is 0, then all SQL statements become expired. If P1 is non-zero, +** then only the currently executing statement is expired. +** +** If P2 is 0, then SQL statements are expired immediately. If P2 is 1, +** then running SQL statements are allowed to continue to run to completion. +** The P2==1 case occurs when a CREATE INDEX or similar schema change happens +** that might help the statement run faster but which does not affect the +** correctness of operation. +*/ +case OP_Expire: { + assert( pOp->p2==0 || pOp->p2==1 ); + if( !pOp->p1 ){ + sqlite3ExpirePreparedStatements(db, pOp->p2); + }else{ + p->expired = pOp->p2+1; + } + break; +} + +/* Opcode: CursorLock P1 * * * * +** +** Lock the btree to which cursor P1 is pointing so that the btree cannot be +** written by an other cursor. +*/ +case OP_CursorLock: { + VdbeCursor *pC; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + sqlite3BtreeCursorPin(pC->uc.pCursor); + break; +} + +/* Opcode: CursorUnlock P1 * * * * +** +** Unlock the btree to which cursor P1 is pointing so that it can be +** written by other cursors. +*/ +case OP_CursorUnlock: { + VdbeCursor *pC; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + sqlite3BtreeCursorUnpin(pC->uc.pCursor); + break; +} + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* Opcode: TableLock P1 P2 P3 P4 * +** Synopsis: iDb=P1 root=P2 write=P3 +** +** Obtain a lock on a particular table. This instruction is only used when +** the shared-cache feature is enabled. +** +** P1 is the index of the database in sqlite3.aDb[] of the database +** on which the lock is acquired. A readlock is obtained if P3==0 or +** a write lock if P3==1. +** +** P2 contains the root-page of the table to lock. +** +** P4 contains a pointer to the name of the table being locked. This is only +** used to generate an error message if the lock cannot be obtained. +*/ +case OP_TableLock: { + u8 isWriteLock = (u8)pOp->p3; + if( isWriteLock || 0==(db->flags&SQLITE_ReadUncommit) ){ + int p1 = pOp->p1; + assert( p1>=0 && p1nDb ); + assert( DbMaskTest(p->btreeMask, p1) ); + assert( isWriteLock==0 || isWriteLock==1 ); + rc = sqlite3BtreeLockTable(db->aDb[p1].pBt, pOp->p2, isWriteLock); + if( rc ){ + if( (rc&0xFF)==SQLITE_LOCKED ){ + const char *z = pOp->p4.z; + sqlite3VdbeError(p, "database table is locked: %s", z); + } + goto abort_due_to_error; + } + } + break; +} +#endif /* SQLITE_OMIT_SHARED_CACHE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VBegin * * * P4 * +** +** P4 may be a pointer to an sqlite3_vtab structure. If so, call the +** xBegin method for that table. +** +** Also, whether or not P4 is set, check that this is not being called from +** within a callback to a virtual table xSync() method. If it is, the error +** code will be set to SQLITE_LOCKED. +*/ +case OP_VBegin: { + VTable *pVTab; + pVTab = pOp->p4.pVtab; + rc = sqlite3VtabBegin(db, pVTab); + if( pVTab ) sqlite3VtabImportErrmsg(p, pVTab->pVtab); + if( rc ) goto abort_due_to_error; + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VCreate P1 P2 * * * +** +** P2 is a register that holds the name of a virtual table in database +** P1. Call the xCreate method for that table. +*/ +case OP_VCreate: { + Mem sMem; /* For storing the record being decoded */ + const char *zTab; /* Name of the virtual table */ + + memset(&sMem, 0, sizeof(sMem)); + sMem.db = db; + /* Because P2 is always a static string, it is impossible for the + ** sqlite3VdbeMemCopy() to fail */ + assert( (aMem[pOp->p2].flags & MEM_Str)!=0 ); + assert( (aMem[pOp->p2].flags & MEM_Static)!=0 ); + rc = sqlite3VdbeMemCopy(&sMem, &aMem[pOp->p2]); + assert( rc==SQLITE_OK ); + zTab = (const char*)sqlite3_value_text(&sMem); + assert( zTab || db->mallocFailed ); + if( zTab ){ + rc = sqlite3VtabCallCreate(db, pOp->p1, zTab, &p->zErrMsg); + } + sqlite3VdbeMemRelease(&sMem); + if( rc ) goto abort_due_to_error; + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VDestroy P1 * * P4 * +** +** P4 is the name of a virtual table in database P1. Call the xDestroy method +** of that table. +*/ +case OP_VDestroy: { + db->nVDestroy++; + rc = sqlite3VtabCallDestroy(db, pOp->p1, pOp->p4.z); + db->nVDestroy--; + assert( p->errorAction==OE_Abort && p->usesStmtJournal ); + if( rc ) goto abort_due_to_error; + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VOpen P1 * * P4 * +** +** P4 is a pointer to a virtual table object, an sqlite3_vtab structure. +** P1 is a cursor number. This opcode opens a cursor to the virtual +** table and stores that cursor in P1. +*/ +case OP_VOpen: { + VdbeCursor *pCur; + sqlite3_vtab_cursor *pVCur; + sqlite3_vtab *pVtab; + const sqlite3_module *pModule; + + assert( p->bIsReader ); + pCur = 0; + pVCur = 0; + pVtab = pOp->p4.pVtab->pVtab; + if( pVtab==0 || NEVER(pVtab->pModule==0) ){ + rc = SQLITE_LOCKED; + goto abort_due_to_error; + } + pModule = pVtab->pModule; + rc = pModule->xOpen(pVtab, &pVCur); + sqlite3VtabImportErrmsg(p, pVtab); + if( rc ) goto abort_due_to_error; + + /* Initialize sqlite3_vtab_cursor base class */ + pVCur->pVtab = pVtab; + + /* Initialize vdbe cursor object */ + pCur = allocateCursor(p, pOp->p1, 0, CURTYPE_VTAB); + if( pCur ){ + pCur->uc.pVCur = pVCur; + pVtab->nRef++; + }else{ + assert( db->mallocFailed ); + pModule->xClose(pVCur); + goto no_mem; + } + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VInitIn P1 P2 P3 * * +** Synopsis: r[P2]=ValueList(P1,P3) +** +** Set register P2 to be a pointer to a ValueList object for cursor P1 +** with cache register P3 and output register P3+1. This ValueList object +** can be used as the first argument to sqlite3_vtab_in_first() and +** sqlite3_vtab_in_next() to extract all of the values stored in the P1 +** cursor. Register P3 is used to hold the values returned by +** sqlite3_vtab_in_first() and sqlite3_vtab_in_next(). +*/ +case OP_VInitIn: { /* out2 */ + VdbeCursor *pC; /* The cursor containing the RHS values */ + ValueList *pRhs; /* New ValueList object to put in reg[P2] */ + + pC = p->apCsr[pOp->p1]; + pRhs = sqlite3_malloc64( sizeof(*pRhs) ); + if( pRhs==0 ) goto no_mem; + pRhs->pCsr = pC->uc.pCursor; + pRhs->pOut = &aMem[pOp->p3]; + pOut = out2Prerelease(p, pOp); + pOut->flags = MEM_Null; + sqlite3VdbeMemSetPointer(pOut, pRhs, "ValueList", sqlite3_free); + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VFilter P1 P2 P3 P4 * +** Synopsis: iplan=r[P3] zplan='P4' +** +** P1 is a cursor opened using VOpen. P2 is an address to jump to if +** the filtered result set is empty. +** +** P4 is either NULL or a string that was generated by the xBestIndex +** method of the module. The interpretation of the P4 string is left +** to the module implementation. +** +** This opcode invokes the xFilter method on the virtual table specified +** by P1. The integer query plan parameter to xFilter is stored in register +** P3. Register P3+1 stores the argc parameter to be passed to the +** xFilter method. Registers P3+2..P3+1+argc are the argc +** additional parameters which are passed to +** xFilter as argv. Register P3+2 becomes argv[0] when passed to xFilter. +** +** A jump is made to P2 if the result set after filtering would be empty. +*/ +case OP_VFilter: { /* jump */ + int nArg; + int iQuery; + const sqlite3_module *pModule; + Mem *pQuery; + Mem *pArgc; + sqlite3_vtab_cursor *pVCur; + sqlite3_vtab *pVtab; + VdbeCursor *pCur; + int res; + int i; + Mem **apArg; + + pQuery = &aMem[pOp->p3]; + pArgc = &pQuery[1]; + pCur = p->apCsr[pOp->p1]; + assert( memIsValid(pQuery) ); + REGISTER_TRACE(pOp->p3, pQuery); + assert( pCur!=0 ); + assert( pCur->eCurType==CURTYPE_VTAB ); + pVCur = pCur->uc.pVCur; + pVtab = pVCur->pVtab; + pModule = pVtab->pModule; + + /* Grab the index number and argc parameters */ + assert( (pQuery->flags&MEM_Int)!=0 && pArgc->flags==MEM_Int ); + nArg = (int)pArgc->u.i; + iQuery = (int)pQuery->u.i; + + /* Invoke the xFilter method */ + apArg = p->apArg; + for(i = 0; ixFilter(pVCur, iQuery, pOp->p4.z, nArg, apArg); + sqlite3VtabImportErrmsg(p, pVtab); + if( rc ) goto abort_due_to_error; + res = pModule->xEof(pVCur); + pCur->nullRow = 0; + VdbeBranchTaken(res!=0,2); + if( res ) goto jump_to_p2; + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VColumn P1 P2 P3 * P5 +** Synopsis: r[P3]=vcolumn(P2) +** +** Store in register P3 the value of the P2-th column of +** the current row of the virtual-table of cursor P1. +** +** If the VColumn opcode is being used to fetch the value of +** an unchanging column during an UPDATE operation, then the P5 +** value is OPFLAG_NOCHNG. This will cause the sqlite3_vtab_nochange() +** function to return true inside the xColumn method of the virtual +** table implementation. The P5 column might also contain other +** bits (OPFLAG_LENGTHARG or OPFLAG_TYPEOFARG) but those bits are +** unused by OP_VColumn. +*/ +case OP_VColumn: { + sqlite3_vtab *pVtab; + const sqlite3_module *pModule; + Mem *pDest; + sqlite3_context sContext; + + VdbeCursor *pCur = p->apCsr[pOp->p1]; + assert( pCur!=0 ); + assert( pCur->eCurType==CURTYPE_VTAB ); + assert( pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor) ); + pDest = &aMem[pOp->p3]; + memAboutToChange(p, pDest); + if( pCur->nullRow ){ + sqlite3VdbeMemSetNull(pDest); + break; + } + pVtab = pCur->uc.pVCur->pVtab; + pModule = pVtab->pModule; + assert( pModule->xColumn ); + memset(&sContext, 0, sizeof(sContext)); + sContext.pOut = pDest; + assert( pOp->p5==OPFLAG_NOCHNG || pOp->p5==0 ); + if( pOp->p5 & OPFLAG_NOCHNG ){ + sqlite3VdbeMemSetNull(pDest); + pDest->flags = MEM_Null|MEM_Zero; + pDest->u.nZero = 0; + }else{ + MemSetTypeFlag(pDest, MEM_Null); + } + rc = pModule->xColumn(pCur->uc.pVCur, &sContext, pOp->p2); + sqlite3VtabImportErrmsg(p, pVtab); + if( sContext.isError>0 ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pDest)); + rc = sContext.isError; + } + sqlite3VdbeChangeEncoding(pDest, encoding); + REGISTER_TRACE(pOp->p3, pDest); + UPDATE_MAX_BLOBSIZE(pDest); + + if( sqlite3VdbeMemTooBig(pDest) ){ + goto too_big; + } + if( rc ) goto abort_due_to_error; + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VNext P1 P2 * * * +** +** Advance virtual table P1 to the next row in its result set and +** jump to instruction P2. Or, if the virtual table has reached +** the end of its result set, then fall through to the next instruction. +*/ +case OP_VNext: { /* jump */ + sqlite3_vtab *pVtab; + const sqlite3_module *pModule; + int res; + VdbeCursor *pCur; + + pCur = p->apCsr[pOp->p1]; + assert( pCur!=0 ); + assert( pCur->eCurType==CURTYPE_VTAB ); + if( pCur->nullRow ){ + break; + } + pVtab = pCur->uc.pVCur->pVtab; + pModule = pVtab->pModule; + assert( pModule->xNext ); + + /* Invoke the xNext() method of the module. There is no way for the + ** underlying implementation to return an error if one occurs during + ** xNext(). Instead, if an error occurs, true is returned (indicating that + ** data is available) and the error code returned when xColumn or + ** some other method is next invoked on the save virtual table cursor. + */ + rc = pModule->xNext(pCur->uc.pVCur); + sqlite3VtabImportErrmsg(p, pVtab); + if( rc ) goto abort_due_to_error; + res = pModule->xEof(pCur->uc.pVCur); + VdbeBranchTaken(!res,2); + if( !res ){ + /* If there is data, jump to P2 */ + goto jump_to_p2_and_check_for_interrupt; + } + goto check_for_interrupt; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VRename P1 * * P4 * +** +** P4 is a pointer to a virtual table object, an sqlite3_vtab structure. +** This opcode invokes the corresponding xRename method. The value +** in register P1 is passed as the zName argument to the xRename method. +*/ +case OP_VRename: { + sqlite3_vtab *pVtab; + Mem *pName; + int isLegacy; + + isLegacy = (db->flags & SQLITE_LegacyAlter); + db->flags |= SQLITE_LegacyAlter; + pVtab = pOp->p4.pVtab->pVtab; + pName = &aMem[pOp->p1]; + assert( pVtab->pModule->xRename ); + assert( memIsValid(pName) ); + assert( p->readOnly==0 ); + REGISTER_TRACE(pOp->p1, pName); + assert( pName->flags & MEM_Str ); + testcase( pName->enc==SQLITE_UTF8 ); + testcase( pName->enc==SQLITE_UTF16BE ); + testcase( pName->enc==SQLITE_UTF16LE ); + rc = sqlite3VdbeChangeEncoding(pName, SQLITE_UTF8); + if( rc ) goto abort_due_to_error; + rc = pVtab->pModule->xRename(pVtab, pName->z); + if( isLegacy==0 ) db->flags &= ~(u64)SQLITE_LegacyAlter; + sqlite3VtabImportErrmsg(p, pVtab); + p->expired = 0; + if( rc ) goto abort_due_to_error; + break; +} +#endif + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Opcode: VUpdate P1 P2 P3 P4 P5 +** Synopsis: data=r[P3@P2] +** +** P4 is a pointer to a virtual table object, an sqlite3_vtab structure. +** This opcode invokes the corresponding xUpdate method. P2 values +** are contiguous memory cells starting at P3 to pass to the xUpdate +** invocation. The value in register (P3+P2-1) corresponds to the +** p2th element of the argv array passed to xUpdate. +** +** The xUpdate method will do a DELETE or an INSERT or both. +** The argv[0] element (which corresponds to memory cell P3) +** is the rowid of a row to delete. If argv[0] is NULL then no +** deletion occurs. The argv[1] element is the rowid of the new +** row. This can be NULL to have the virtual table select the new +** rowid for itself. The subsequent elements in the array are +** the values of columns in the new row. +** +** If P2==1 then no insert is performed. argv[0] is the rowid of +** a row to delete. +** +** P1 is a boolean flag. If it is set to true and the xUpdate call +** is successful, then the value returned by sqlite3_last_insert_rowid() +** is set to the value of the rowid for the row just inserted. +** +** P5 is the error actions (OE_Replace, OE_Fail, OE_Ignore, etc) to +** apply in the case of a constraint failure on an insert or update. +*/ +case OP_VUpdate: { + sqlite3_vtab *pVtab; + const sqlite3_module *pModule; + int nArg; + int i; + sqlite_int64 rowid = 0; + Mem **apArg; + Mem *pX; + + assert( pOp->p2==1 || pOp->p5==OE_Fail || pOp->p5==OE_Rollback + || pOp->p5==OE_Abort || pOp->p5==OE_Ignore || pOp->p5==OE_Replace + ); + assert( p->readOnly==0 ); + if( db->mallocFailed ) goto no_mem; + sqlite3VdbeIncrWriteCounter(p, 0); + pVtab = pOp->p4.pVtab->pVtab; + if( pVtab==0 || NEVER(pVtab->pModule==0) ){ + rc = SQLITE_LOCKED; + goto abort_due_to_error; + } + pModule = pVtab->pModule; + nArg = pOp->p2; + assert( pOp->p4type==P4_VTAB ); + if( ALWAYS(pModule->xUpdate) ){ + u8 vtabOnConflict = db->vtabOnConflict; + apArg = p->apArg; + pX = &aMem[pOp->p3]; + for(i=0; ivtabOnConflict = pOp->p5; + rc = pModule->xUpdate(pVtab, nArg, apArg, &rowid); + db->vtabOnConflict = vtabOnConflict; + sqlite3VtabImportErrmsg(p, pVtab); + if( rc==SQLITE_OK && pOp->p1 ){ + assert( nArg>1 && apArg[0] && (apArg[0]->flags&MEM_Null) ); + db->lastRowid = rowid; + } + if( (rc&0xff)==SQLITE_CONSTRAINT && pOp->p4.pVtab->bConstraint ){ + if( pOp->p5==OE_Ignore ){ + rc = SQLITE_OK; + }else{ + p->errorAction = ((pOp->p5==OE_Replace) ? OE_Abort : pOp->p5); + } + }else{ + p->nChange++; + } + if( rc ) goto abort_due_to_error; + } + break; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +/* Opcode: Pagecount P1 P2 * * * +** +** Write the current number of pages in database P1 to memory cell P2. +*/ +case OP_Pagecount: { /* out2 */ + pOut = out2Prerelease(p, pOp); + pOut->u.i = sqlite3BtreeLastPage(db->aDb[pOp->p1].pBt); + break; +} +#endif + + +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +/* Opcode: MaxPgcnt P1 P2 P3 * * +** +** Try to set the maximum page count for database P1 to the value in P3. +** Do not let the maximum page count fall below the current page count and +** do not change the maximum page count value if P3==0. +** +** Store the maximum page count after the change in register P2. +*/ +case OP_MaxPgcnt: { /* out2 */ + unsigned int newMax; + Btree *pBt; + + pOut = out2Prerelease(p, pOp); + pBt = db->aDb[pOp->p1].pBt; + newMax = 0; + if( pOp->p3 ){ + newMax = sqlite3BtreeLastPage(pBt); + if( newMax < (unsigned)pOp->p3 ) newMax = (unsigned)pOp->p3; + } + pOut->u.i = sqlite3BtreeMaxPageCount(pBt, newMax); + break; +} +#endif + +/* Opcode: Function P1 P2 P3 P4 * +** Synopsis: r[P3]=func(r[P2@NP]) +** +** Invoke a user function (P4 is a pointer to an sqlite3_context object that +** contains a pointer to the function to be run) with arguments taken +** from register P2 and successors. The number of arguments is in +** the sqlite3_context object that P4 points to. +** The result of the function is stored +** in register P3. Register P3 must not be one of the function inputs. +** +** P1 is a 32-bit bitmask indicating whether or not each argument to the +** function was determined to be constant at compile time. If the first +** argument was constant then bit 0 of P1 is set. This is used to determine +** whether meta data associated with a user function argument using the +** sqlite3_set_auxdata() API may be safely retained until the next +** invocation of this opcode. +** +** See also: AggStep, AggFinal, PureFunc +*/ +/* Opcode: PureFunc P1 P2 P3 P4 * +** Synopsis: r[P3]=func(r[P2@NP]) +** +** Invoke a user function (P4 is a pointer to an sqlite3_context object that +** contains a pointer to the function to be run) with arguments taken +** from register P2 and successors. The number of arguments is in +** the sqlite3_context object that P4 points to. +** The result of the function is stored +** in register P3. Register P3 must not be one of the function inputs. +** +** P1 is a 32-bit bitmask indicating whether or not each argument to the +** function was determined to be constant at compile time. If the first +** argument was constant then bit 0 of P1 is set. This is used to determine +** whether meta data associated with a user function argument using the +** sqlite3_set_auxdata() API may be safely retained until the next +** invocation of this opcode. +** +** This opcode works exactly like OP_Function. The only difference is in +** its name. This opcode is used in places where the function must be +** purely non-deterministic. Some built-in date/time functions can be +** either determinitic of non-deterministic, depending on their arguments. +** When those function are used in a non-deterministic way, they will check +** to see if they were called using OP_PureFunc instead of OP_Function, and +** if they were, they throw an error. +** +** See also: AggStep, AggFinal, Function +*/ +case OP_PureFunc: /* group */ +case OP_Function: { /* group */ + int i; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + pOut = &aMem[pOp->p3]; + if( pCtx->pOut != pOut ){ + pCtx->pVdbe = p; + pCtx->pOut = pOut; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; + } + assert( pCtx->pVdbe==p ); + + memAboutToChange(p, pOut); +#ifdef SQLITE_DEBUG + for(i=0; iargc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + MemSetTypeFlag(pOut, MEM_Null); + assert( pCtx->isError==0 ); + (*pCtx->pFunc->xSFunc)(pCtx, pCtx->argc, pCtx->argv);/* IMP: R-24505-23230 */ + + /* If the function returned an error, throw an exception */ + if( pCtx->isError ){ + if( pCtx->isError>0 ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pOut)); + rc = pCtx->isError; + } + sqlite3VdbeDeleteAuxData(db, &p->pAuxData, pCtx->iOp, pOp->p1); + pCtx->isError = 0; + if( rc ) goto abort_due_to_error; + } + + /* Copy the result of the function into register P3 */ + if( pOut->flags & (MEM_Str|MEM_Blob) ){ + sqlite3VdbeChangeEncoding(pOut, encoding); + if( sqlite3VdbeMemTooBig(pOut) ) goto too_big; + } + + REGISTER_TRACE(pOp->p3, pOut); + UPDATE_MAX_BLOBSIZE(pOut); + break; +} + +/* Opcode: FilterAdd P1 * P3 P4 * +** Synopsis: filter(P1) += key(P3@P4) +** +** Compute a hash on the P4 registers starting with r[P3] and +** add that hash to the bloom filter contained in r[P1]. +*/ +case OP_FilterAdd: { + u64 h; + + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags & MEM_Blob ); + assert( pIn1->n>0 ); + h = filterHash(aMem, pOp); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + int ii; + for(ii=pOp->p3; iip3+pOp->p4.i; ii++){ + registerTrace(ii, &aMem[ii]); + } + printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n)); + } +#endif + h %= pIn1->n; + pIn1->z[h/8] |= 1<<(h&7); + break; +} + +/* Opcode: Filter P1 P2 P3 P4 * +** Synopsis: if key(P3@P4) not in filter(P1) goto P2 +** +** Compute a hash on the key contained in the P4 registers starting +** with r[P3]. Check to see if that hash is found in the +** bloom filter hosted by register P1. If it is not present then +** maybe jump to P2. Otherwise fall through. +** +** False negatives are harmless. It is always safe to fall through, +** even if the value is in the bloom filter. A false negative causes +** more CPU cycles to be used, but it should still yield the correct +** answer. However, an incorrect answer may well arise from a +** false positive - if the jump is taken when it should fall through. +*/ +case OP_Filter: { /* jump */ + u64 h; + + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( (pIn1->flags & MEM_Blob)!=0 ); + assert( pIn1->n >= 1 ); + h = filterHash(aMem, pOp); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + int ii; + for(ii=pOp->p3; iip3+pOp->p4.i; ii++){ + registerTrace(ii, &aMem[ii]); + } + printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n)); + } +#endif + h %= pIn1->n; + if( (pIn1->z[h/8] & (1<<(h&7)))==0 ){ + VdbeBranchTaken(1, 2); + p->aCounter[SQLITE_STMTSTATUS_FILTER_HIT]++; + goto jump_to_p2; + }else{ + p->aCounter[SQLITE_STMTSTATUS_FILTER_MISS]++; + VdbeBranchTaken(0, 2); + } + break; +} + +/* Opcode: Trace P1 P2 * P4 * +** +** Write P4 on the statement trace output if statement tracing is +** enabled. +** +** Operand P1 must be 0x7fffffff and P2 must positive. +*/ +/* Opcode: Init P1 P2 P3 P4 * +** Synopsis: Start at P2 +** +** Programs contain a single instance of this opcode as the very first +** opcode. +** +** If tracing is enabled (by the sqlite3_trace()) interface, then +** the UTF-8 string contained in P4 is emitted on the trace callback. +** Or if P4 is blank, use the string returned by sqlite3_sql(). +** +** If P2 is not zero, jump to instruction P2. +** +** Increment the value of P1 so that OP_Once opcodes will jump the +** first time they are evaluated for this run. +** +** If P3 is not zero, then it is an address to jump to if an SQLITE_CORRUPT +** error is encountered. +*/ +case OP_Trace: +case OP_Init: { /* jump */ + int i; +#ifndef SQLITE_OMIT_TRACE + char *zTrace; +#endif + + /* If the P4 argument is not NULL, then it must be an SQL comment string. + ** The "--" string is broken up to prevent false-positives with srcck1.c. + ** + ** This assert() provides evidence for: + ** EVIDENCE-OF: R-50676-09860 The callback can compute the same text that + ** would have been returned by the legacy sqlite3_trace() interface by + ** using the X argument when X begins with "--" and invoking + ** sqlite3_expanded_sql(P) otherwise. + */ + assert( pOp->p4.z==0 || strncmp(pOp->p4.z, "-" "- ", 3)==0 ); + + /* OP_Init is always instruction 0 */ + assert( pOp==p->aOp || pOp->opcode==OP_Trace ); + +#ifndef SQLITE_OMIT_TRACE + if( (db->mTrace & (SQLITE_TRACE_STMT|SQLITE_TRACE_LEGACY))!=0 + && !p->doingRerun + && (zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0 + ){ +#ifndef SQLITE_OMIT_DEPRECATED + if( db->mTrace & SQLITE_TRACE_LEGACY ){ + char *z = sqlite3VdbeExpandSql(p, zTrace); + db->trace.xLegacy(db->pTraceArg, z); + sqlite3_free(z); + }else +#endif + if( db->nVdbeExec>1 ){ + char *z = sqlite3MPrintf(db, "-- %s", zTrace); + (void)db->trace.xV2(SQLITE_TRACE_STMT, db->pTraceArg, p, z); + sqlite3DbFree(db, z); + }else{ + (void)db->trace.xV2(SQLITE_TRACE_STMT, db->pTraceArg, p, zTrace); + } + } +#ifdef SQLITE_USE_FCNTL_TRACE + zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql); + if( zTrace ){ + int j; + for(j=0; jnDb; j++){ + if( DbMaskTest(p->btreeMask, j)==0 ) continue; + sqlite3_file_control(db, db->aDb[j].zDbSName, SQLITE_FCNTL_TRACE, zTrace); + } + } +#endif /* SQLITE_USE_FCNTL_TRACE */ +#ifdef SQLITE_DEBUG + if( (db->flags & SQLITE_SqlTrace)!=0 + && (zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0 + ){ + sqlite3DebugPrintf("SQL-trace: %s\n", zTrace); + } +#endif /* SQLITE_DEBUG */ +#endif /* SQLITE_OMIT_TRACE */ + assert( pOp->p2>0 ); + if( pOp->p1>=sqlite3GlobalConfig.iOnceResetThreshold ){ + if( pOp->opcode==OP_Trace ) break; + for(i=1; inOp; i++){ + if( p->aOp[i].opcode==OP_Once ) p->aOp[i].p1 = 0; + } + pOp->p1 = 0; + } + pOp->p1++; + p->aCounter[SQLITE_STMTSTATUS_RUN]++; + goto jump_to_p2; +} + +#ifdef SQLITE_ENABLE_CURSOR_HINTS +/* Opcode: CursorHint P1 * * P4 * +** +** Provide a hint to cursor P1 that it only needs to return rows that +** satisfy the Expr in P4. TK_REGISTER terms in the P4 expression refer +** to values currently held in registers. TK_COLUMN terms in the P4 +** expression refer to columns in the b-tree to which cursor P1 is pointing. +*/ +case OP_CursorHint: { + VdbeCursor *pC; + + assert( pOp->p1>=0 && pOp->p1nCursor ); + assert( pOp->p4type==P4_EXPR ); + pC = p->apCsr[pOp->p1]; + if( pC ){ + assert( pC->eCurType==CURTYPE_BTREE ); + sqlite3BtreeCursorHint(pC->uc.pCursor, BTREE_HINT_RANGE, + pOp->p4.pExpr, aMem); + } + break; +} +#endif /* SQLITE_ENABLE_CURSOR_HINTS */ + +#ifdef SQLITE_DEBUG +/* Opcode: Abortable * * * * * +** +** Verify that an Abort can happen. Assert if an Abort at this point +** might cause database corruption. This opcode only appears in debugging +** builds. +** +** An Abort is safe if either there have been no writes, or if there is +** an active statement journal. +*/ +case OP_Abortable: { + sqlite3VdbeAssertAbortable(p); + break; +} +#endif + +#ifdef SQLITE_DEBUG +/* Opcode: ReleaseReg P1 P2 P3 * P5 +** Synopsis: release r[P1@P2] mask P3 +** +** Release registers from service. Any content that was in the +** the registers is unreliable after this opcode completes. +** +** The registers released will be the P2 registers starting at P1, +** except if bit ii of P3 set, then do not release register P1+ii. +** In other words, P3 is a mask of registers to preserve. +** +** Releasing a register clears the Mem.pScopyFrom pointer. That means +** that if the content of the released register was set using OP_SCopy, +** a change to the value of the source register for the OP_SCopy will no longer +** generate an assertion fault in sqlite3VdbeMemAboutToChange(). +** +** If P5 is set, then all released registers have their type set +** to MEM_Undefined so that any subsequent attempt to read the released +** register (before it is reinitialized) will generate an assertion fault. +** +** P5 ought to be set on every call to this opcode. +** However, there are places in the code generator will release registers +** before their are used, under the (valid) assumption that the registers +** will not be reallocated for some other purpose before they are used and +** hence are safe to release. +** +** This opcode is only available in testing and debugging builds. It is +** not generated for release builds. The purpose of this opcode is to help +** validate the generated bytecode. This opcode does not actually contribute +** to computing an answer. +*/ +case OP_ReleaseReg: { + Mem *pMem; + int i; + u32 constMask; + assert( pOp->p1>0 ); + assert( pOp->p1+pOp->p2<=(p->nMem+1 - p->nCursor)+1 ); + pMem = &aMem[pOp->p1]; + constMask = pOp->p3; + for(i=0; ip2; i++, pMem++){ + if( i>=32 || (constMask & MASKBIT32(i))==0 ){ + pMem->pScopyFrom = 0; + if( i<32 && pOp->p5 ) MemSetTypeFlag(pMem, MEM_Undefined); + } + } + break; +} +#endif + +/* Opcode: Noop * * * * * +** +** Do nothing. This instruction is often useful as a jump +** destination. +*/ +/* +** The magic Explain opcode are only inserted when explain==2 (which +** is to say when the EXPLAIN QUERY PLAN syntax is used.) +** This opcode records information from the optimizer. It is the +** the same as a no-op. This opcodesnever appears in a real VM program. +*/ +default: { /* This is really OP_Noop, OP_Explain */ + assert( pOp->opcode==OP_Noop || pOp->opcode==OP_Explain ); + + break; +} + +/***************************************************************************** +** The cases of the switch statement above this line should all be indented +** by 6 spaces. But the left-most 6 spaces have been removed to improve the +** readability. From this point on down, the normal indentation rules are +** restored. +*****************************************************************************/ + } + +#ifdef VDBE_PROFILE + { + u64 endTime = sqlite3NProfileCnt ? sqlite3NProfileCnt : sqlite3Hwtime(); + if( endTime>start ) pOrigOp->cycles += endTime - start; + pOrigOp->cnt++; + } +#endif + + /* The following code adds nothing to the actual functionality + ** of the program. It is only here for testing and debugging. + ** On the other hand, it does burn CPU cycles every time through + ** the evaluator loop. So we can leave it out when NDEBUG is defined. + */ +#ifndef NDEBUG + assert( pOp>=&aOp[-1] && pOp<&aOp[p->nOp-1] ); + +#ifdef SQLITE_DEBUG + if( db->flags & SQLITE_VdbeTrace ){ + u8 opProperty = sqlite3OpcodeProperty[pOrigOp->opcode]; + if( rc!=0 ) printf("rc=%d\n",rc); + if( opProperty & (OPFLG_OUT2) ){ + registerTrace(pOrigOp->p2, &aMem[pOrigOp->p2]); + } + if( opProperty & OPFLG_OUT3 ){ + registerTrace(pOrigOp->p3, &aMem[pOrigOp->p3]); + } + if( opProperty==0xff ){ + /* Never happens. This code exists to avoid a harmless linkage + ** warning aboud sqlite3VdbeRegisterDump() being defined but not + ** used. */ + sqlite3VdbeRegisterDump(p); + } + } +#endif /* SQLITE_DEBUG */ +#endif /* NDEBUG */ + } /* The end of the for(;;) loop the loops through opcodes */ + + /* If we reach this point, it means that execution is finished with + ** an error of some kind. + */ +abort_due_to_error: + if( db->mallocFailed ){ + rc = SQLITE_NOMEM_BKPT; + }else if( rc==SQLITE_IOERR_CORRUPTFS ){ + rc = SQLITE_CORRUPT_BKPT; + } + assert( rc ); +#ifdef SQLITE_DEBUG + if( db->flags & SQLITE_VdbeTrace ){ + const char *zTrace = p->zSql; + if( zTrace==0 ){ + if( aOp[0].opcode==OP_Trace ){ + zTrace = aOp[0].p4.z; + } + if( zTrace==0 ) zTrace = "???"; + } + printf("ABORT-due-to-error (rc=%d): %s\n", rc, zTrace); + } +#endif + if( p->zErrMsg==0 && rc!=SQLITE_IOERR_NOMEM ){ + sqlite3VdbeError(p, "%s", sqlite3ErrStr(rc)); + } + p->rc = rc; + sqlite3SystemError(db, rc); + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(rc, "statement aborts at %d: [%s] %s", + (int)(pOp - aOp), p->zSql, p->zErrMsg); + sqlite3VdbeHalt(p); + if( rc==SQLITE_IOERR_NOMEM ) sqlite3OomFault(db); + if( rc==SQLITE_CORRUPT && db->autoCommit==0 ){ + db->flags |= SQLITE_CorruptRdOnly; + } + rc = SQLITE_ERROR; + if( resetSchemaOnFault>0 ){ + sqlite3ResetOneSchema(db, resetSchemaOnFault-1); + } + + /* This is the only way out of this procedure. We have to + ** release the mutexes on btrees that were acquired at the + ** top. */ +vdbe_return: +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK + while( nVmStep>=nProgressLimit && db->xProgress!=0 ){ + nProgressLimit += db->nProgressOps; + if( db->xProgress(db->pProgressArg) ){ + nProgressLimit = LARGEST_UINT64; + rc = SQLITE_INTERRUPT; + goto abort_due_to_error; + } + } +#endif + p->aCounter[SQLITE_STMTSTATUS_VM_STEP] += (int)nVmStep; + sqlite3VdbeLeave(p); + assert( rc!=SQLITE_OK || nExtraDelete==0 + || sqlite3_strlike("DELETE%",p->zSql,0)!=0 + ); + return rc; + + /* Jump to here if a string or blob larger than SQLITE_MAX_LENGTH + ** is encountered. + */ +too_big: + sqlite3VdbeError(p, "string or blob too big"); + rc = SQLITE_TOOBIG; + goto abort_due_to_error; + + /* Jump to here if a malloc() fails. + */ +no_mem: + sqlite3OomFault(db); + sqlite3VdbeError(p, "out of memory"); + rc = SQLITE_NOMEM_BKPT; + goto abort_due_to_error; + + /* Jump to here if the sqlite3_interrupt() API sets the interrupt + ** flag. + */ +abort_due_to_interrupt: + assert( AtomicLoad(&db->u1.isInterrupted) ); + rc = SQLITE_INTERRUPT; + goto abort_due_to_error; +} + + +/************** End of vdbe.c ************************************************/ +/************** Begin file vdbeblob.c ****************************************/ +/* +** 2007 May 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code used to implement incremental BLOB I/O. +*/ + +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +#ifndef SQLITE_OMIT_INCRBLOB + +/* +** Valid sqlite3_blob* handles point to Incrblob structures. +*/ +typedef struct Incrblob Incrblob; +struct Incrblob { + int nByte; /* Size of open blob, in bytes */ + int iOffset; /* Byte offset of blob in cursor data */ + u16 iCol; /* Table column this handle is open on */ + BtCursor *pCsr; /* Cursor pointing at blob row */ + sqlite3_stmt *pStmt; /* Statement holding cursor open */ + sqlite3 *db; /* The associated database */ + char *zDb; /* Database name */ + Table *pTab; /* Table object */ +}; + + +/* +** This function is used by both blob_open() and blob_reopen(). It seeks +** the b-tree cursor associated with blob handle p to point to row iRow. +** If successful, SQLITE_OK is returned and subsequent calls to +** sqlite3_blob_read() or sqlite3_blob_write() access the specified row. +** +** If an error occurs, or if the specified row does not exist or does not +** contain a value of type TEXT or BLOB in the column nominated when the +** blob handle was opened, then an error code is returned and *pzErr may +** be set to point to a buffer containing an error message. It is the +** responsibility of the caller to free the error message buffer using +** sqlite3DbFree(). +** +** If an error does occur, then the b-tree cursor is closed. All subsequent +** calls to sqlite3_blob_read(), blob_write() or blob_reopen() will +** immediately return SQLITE_ABORT. +*/ +static int blobSeekToRow(Incrblob *p, sqlite3_int64 iRow, char **pzErr){ + int rc; /* Error code */ + char *zErr = 0; /* Error message */ + Vdbe *v = (Vdbe *)p->pStmt; + + /* Set the value of register r[1] in the SQL statement to integer iRow. + ** This is done directly as a performance optimization + */ + v->aMem[1].flags = MEM_Int; + v->aMem[1].u.i = iRow; + + /* If the statement has been run before (and is paused at the OP_ResultRow) + ** then back it up to the point where it does the OP_NotExists. This could + ** have been down with an extra OP_Goto, but simply setting the program + ** counter is faster. */ + if( v->pc>4 ){ + v->pc = 4; + assert( v->aOp[v->pc].opcode==OP_NotExists ); + rc = sqlite3VdbeExec(v); + }else{ + rc = sqlite3_step(p->pStmt); + } + if( rc==SQLITE_ROW ){ + VdbeCursor *pC = v->apCsr[0]; + u32 type; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + type = pC->nHdrParsed>p->iCol ? pC->aType[p->iCol] : 0; + testcase( pC->nHdrParsed==p->iCol ); + testcase( pC->nHdrParsed==p->iCol+1 ); + if( type<12 ){ + zErr = sqlite3MPrintf(p->db, "cannot open value of type %s", + type==0?"null": type==7?"real": "integer" + ); + rc = SQLITE_ERROR; + sqlite3_finalize(p->pStmt); + p->pStmt = 0; + }else{ + p->iOffset = pC->aType[p->iCol + pC->nField]; + p->nByte = sqlite3VdbeSerialTypeLen(type); + p->pCsr = pC->uc.pCursor; + sqlite3BtreeIncrblobCursor(p->pCsr); + } + } + + if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + }else if( p->pStmt ){ + rc = sqlite3_finalize(p->pStmt); + p->pStmt = 0; + if( rc==SQLITE_OK ){ + zErr = sqlite3MPrintf(p->db, "no such rowid: %lld", iRow); + rc = SQLITE_ERROR; + }else{ + zErr = sqlite3MPrintf(p->db, "%s", sqlite3_errmsg(p->db)); + } + } + + assert( rc!=SQLITE_OK || zErr==0 ); + assert( rc!=SQLITE_ROW && rc!=SQLITE_DONE ); + + *pzErr = zErr; + return rc; +} + +/* +** Open a blob handle. +*/ +SQLITE_API int sqlite3_blob_open( + sqlite3* db, /* The database connection */ + const char *zDb, /* The attached database containing the blob */ + const char *zTable, /* The table containing the blob */ + const char *zColumn, /* The column containing the blob */ + sqlite_int64 iRow, /* The row containing the glob */ + int wrFlag, /* True -> read/write access, false -> read-only */ + sqlite3_blob **ppBlob /* Handle for accessing the blob returned here */ +){ + int nAttempt = 0; + int iCol; /* Index of zColumn in row-record */ + int rc = SQLITE_OK; + char *zErr = 0; + Table *pTab; + Incrblob *pBlob = 0; + Parse sParse; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppBlob==0 ){ + return SQLITE_MISUSE_BKPT; + } +#endif + *ppBlob = 0; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zTable==0 ){ + return SQLITE_MISUSE_BKPT; + } +#endif + wrFlag = !!wrFlag; /* wrFlag = (wrFlag ? 1 : 0); */ + + sqlite3_mutex_enter(db->mutex); + + pBlob = (Incrblob *)sqlite3DbMallocZero(db, sizeof(Incrblob)); + while(1){ + sqlite3ParseObjectInit(&sParse,db); + if( !pBlob ) goto blob_open_out; + sqlite3DbFree(db, zErr); + zErr = 0; + + sqlite3BtreeEnterAll(db); + pTab = sqlite3LocateTable(&sParse, 0, zTable, zDb); + if( pTab && IsVirtual(pTab) ){ + pTab = 0; + sqlite3ErrorMsg(&sParse, "cannot open virtual table: %s", zTable); + } + if( pTab && !HasRowid(pTab) ){ + pTab = 0; + sqlite3ErrorMsg(&sParse, "cannot open table without rowid: %s", zTable); + } +#ifndef SQLITE_OMIT_VIEW + if( pTab && IsView(pTab) ){ + pTab = 0; + sqlite3ErrorMsg(&sParse, "cannot open view: %s", zTable); + } +#endif + if( !pTab ){ + if( sParse.zErrMsg ){ + sqlite3DbFree(db, zErr); + zErr = sParse.zErrMsg; + sParse.zErrMsg = 0; + } + rc = SQLITE_ERROR; + sqlite3BtreeLeaveAll(db); + goto blob_open_out; + } + pBlob->pTab = pTab; + pBlob->zDb = db->aDb[sqlite3SchemaToIndex(db, pTab->pSchema)].zDbSName; + + /* Now search pTab for the exact column. */ + for(iCol=0; iColnCol; iCol++) { + if( sqlite3StrICmp(pTab->aCol[iCol].zCnName, zColumn)==0 ){ + break; + } + } + if( iCol==pTab->nCol ){ + sqlite3DbFree(db, zErr); + zErr = sqlite3MPrintf(db, "no such column: \"%s\"", zColumn); + rc = SQLITE_ERROR; + sqlite3BtreeLeaveAll(db); + goto blob_open_out; + } + + /* If the value is being opened for writing, check that the + ** column is not indexed, and that it is not part of a foreign key. + */ + if( wrFlag ){ + const char *zFault = 0; + Index *pIdx; +#ifndef SQLITE_OMIT_FOREIGN_KEY + if( db->flags&SQLITE_ForeignKeys ){ + /* Check that the column is not part of an FK child key definition. It + ** is not necessary to check if it is part of a parent key, as parent + ** key columns must be indexed. The check below will pick up this + ** case. */ + FKey *pFKey; + assert( IsOrdinaryTable(pTab) ); + for(pFKey=pTab->u.tab.pFKey; pFKey; pFKey=pFKey->pNextFrom){ + int j; + for(j=0; jnCol; j++){ + if( pFKey->aCol[j].iFrom==iCol ){ + zFault = "foreign key"; + } + } + } + } +#endif + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int j; + for(j=0; jnKeyCol; j++){ + /* FIXME: Be smarter about indexes that use expressions */ + if( pIdx->aiColumn[j]==iCol || pIdx->aiColumn[j]==XN_EXPR ){ + zFault = "indexed"; + } + } + } + if( zFault ){ + sqlite3DbFree(db, zErr); + zErr = sqlite3MPrintf(db, "cannot open %s column for writing", zFault); + rc = SQLITE_ERROR; + sqlite3BtreeLeaveAll(db); + goto blob_open_out; + } + } + + pBlob->pStmt = (sqlite3_stmt *)sqlite3VdbeCreate(&sParse); + assert( pBlob->pStmt || db->mallocFailed ); + if( pBlob->pStmt ){ + + /* This VDBE program seeks a btree cursor to the identified + ** db/table/row entry. The reason for using a vdbe program instead + ** of writing code to use the b-tree layer directly is that the + ** vdbe program will take advantage of the various transaction, + ** locking and error handling infrastructure built into the vdbe. + ** + ** After seeking the cursor, the vdbe executes an OP_ResultRow. + ** Code external to the Vdbe then "borrows" the b-tree cursor and + ** uses it to implement the blob_read(), blob_write() and + ** blob_bytes() functions. + ** + ** The sqlite3_blob_close() function finalizes the vdbe program, + ** which closes the b-tree cursor and (possibly) commits the + ** transaction. + */ + static const int iLn = VDBE_OFFSET_LINENO(2); + static const VdbeOpList openBlob[] = { + {OP_TableLock, 0, 0, 0}, /* 0: Acquire a read or write lock */ + {OP_OpenRead, 0, 0, 0}, /* 1: Open a cursor */ + /* blobSeekToRow() will initialize r[1] to the desired rowid */ + {OP_NotExists, 0, 5, 1}, /* 2: Seek the cursor to rowid=r[1] */ + {OP_Column, 0, 0, 1}, /* 3 */ + {OP_ResultRow, 1, 0, 0}, /* 4 */ + {OP_Halt, 0, 0, 0}, /* 5 */ + }; + Vdbe *v = (Vdbe *)pBlob->pStmt; + int iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + VdbeOp *aOp; + + sqlite3VdbeAddOp4Int(v, OP_Transaction, iDb, wrFlag, + pTab->pSchema->schema_cookie, + pTab->pSchema->iGeneration); + sqlite3VdbeChangeP5(v, 1); + assert( sqlite3VdbeCurrentAddr(v)==2 || db->mallocFailed ); + aOp = sqlite3VdbeAddOpList(v, ArraySize(openBlob), openBlob, iLn); + + /* Make sure a mutex is held on the table to be accessed */ + sqlite3VdbeUsesBtree(v, iDb); + + if( db->mallocFailed==0 ){ + assert( aOp!=0 ); + /* Configure the OP_TableLock instruction */ +#ifdef SQLITE_OMIT_SHARED_CACHE + aOp[0].opcode = OP_Noop; +#else + aOp[0].p1 = iDb; + aOp[0].p2 = pTab->tnum; + aOp[0].p3 = wrFlag; + sqlite3VdbeChangeP4(v, 2, pTab->zName, P4_TRANSIENT); + } + if( db->mallocFailed==0 ){ +#endif + + /* Remove either the OP_OpenWrite or OpenRead. Set the P2 + ** parameter of the other to pTab->tnum. */ + if( wrFlag ) aOp[1].opcode = OP_OpenWrite; + aOp[1].p2 = pTab->tnum; + aOp[1].p3 = iDb; + + /* Configure the number of columns. Configure the cursor to + ** think that the table has one more column than it really + ** does. An OP_Column to retrieve this imaginary column will + ** always return an SQL NULL. This is useful because it means + ** we can invoke OP_Column to fill in the vdbe cursors type + ** and offset cache without causing any IO. + */ + aOp[1].p4type = P4_INT32; + aOp[1].p4.i = pTab->nCol+1; + aOp[3].p2 = pTab->nCol; + + sParse.nVar = 0; + sParse.nMem = 1; + sParse.nTab = 1; + sqlite3VdbeMakeReady(v, &sParse); + } + } + + pBlob->iCol = iCol; + pBlob->db = db; + sqlite3BtreeLeaveAll(db); + if( db->mallocFailed ){ + goto blob_open_out; + } + rc = blobSeekToRow(pBlob, iRow, &zErr); + if( (++nAttempt)>=SQLITE_MAX_SCHEMA_RETRY || rc!=SQLITE_SCHEMA ) break; + sqlite3ParseObjectReset(&sParse); + } + +blob_open_out: + if( rc==SQLITE_OK && db->mallocFailed==0 ){ + *ppBlob = (sqlite3_blob *)pBlob; + }else{ + if( pBlob && pBlob->pStmt ) sqlite3VdbeFinalize((Vdbe *)pBlob->pStmt); + sqlite3DbFree(db, pBlob); + } + sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : 0), zErr); + sqlite3DbFree(db, zErr); + sqlite3ParseObjectReset(&sParse); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Close a blob handle that was previously created using +** sqlite3_blob_open(). +*/ +SQLITE_API int sqlite3_blob_close(sqlite3_blob *pBlob){ + Incrblob *p = (Incrblob *)pBlob; + int rc; + sqlite3 *db; + + if( p ){ + sqlite3_stmt *pStmt = p->pStmt; + db = p->db; + sqlite3_mutex_enter(db->mutex); + sqlite3DbFree(db, p); + sqlite3_mutex_leave(db->mutex); + rc = sqlite3_finalize(pStmt); + }else{ + rc = SQLITE_OK; + } + return rc; +} + +/* +** Perform a read or write operation on a blob +*/ +static int blobReadWrite( + sqlite3_blob *pBlob, + void *z, + int n, + int iOffset, + int (*xCall)(BtCursor*, u32, u32, void*) +){ + int rc; + Incrblob *p = (Incrblob *)pBlob; + Vdbe *v; + sqlite3 *db; + + if( p==0 ) return SQLITE_MISUSE_BKPT; + db = p->db; + sqlite3_mutex_enter(db->mutex); + v = (Vdbe*)p->pStmt; + + if( n<0 || iOffset<0 || ((sqlite3_int64)iOffset+n)>p->nByte ){ + /* Request is out of range. Return a transient error. */ + rc = SQLITE_ERROR; + }else if( v==0 ){ + /* If there is no statement handle, then the blob-handle has + ** already been invalidated. Return SQLITE_ABORT in this case. + */ + rc = SQLITE_ABORT; + }else{ + /* Call either BtreeData() or BtreePutData(). If SQLITE_ABORT is + ** returned, clean-up the statement handle. + */ + assert( db == v->db ); + sqlite3BtreeEnterCursor(p->pCsr); + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + if( xCall==sqlite3BtreePutData && db->xPreUpdateCallback ){ + /* If a pre-update hook is registered and this is a write cursor, + ** invoke it here. + ** + ** TODO: The preupdate-hook is passed SQLITE_DELETE, even though this + ** operation should really be an SQLITE_UPDATE. This is probably + ** incorrect, but is convenient because at this point the new.* values + ** are not easily obtainable. And for the sessions module, an + ** SQLITE_UPDATE where the PK columns do not change is handled in the + ** same way as an SQLITE_DELETE (the SQLITE_DELETE code is actually + ** slightly more efficient). Since you cannot write to a PK column + ** using the incremental-blob API, this works. For the sessions module + ** anyhow. + */ + sqlite3_int64 iKey; + iKey = sqlite3BtreeIntegerKey(p->pCsr); + assert( v->apCsr[0]!=0 ); + assert( v->apCsr[0]->eCurType==CURTYPE_BTREE ); + sqlite3VdbePreUpdateHook( + v, v->apCsr[0], SQLITE_DELETE, p->zDb, p->pTab, iKey, -1, p->iCol + ); + } +#endif + + rc = xCall(p->pCsr, iOffset+p->iOffset, n, z); + sqlite3BtreeLeaveCursor(p->pCsr); + if( rc==SQLITE_ABORT ){ + sqlite3VdbeFinalize(v); + p->pStmt = 0; + }else{ + v->rc = rc; + } + } + sqlite3Error(db, rc); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Read data from a blob handle. +*/ +SQLITE_API int sqlite3_blob_read(sqlite3_blob *pBlob, void *z, int n, int iOffset){ + return blobReadWrite(pBlob, z, n, iOffset, sqlite3BtreePayloadChecked); +} + +/* +** Write data to a blob handle. +*/ +SQLITE_API int sqlite3_blob_write(sqlite3_blob *pBlob, const void *z, int n, int iOffset){ + return blobReadWrite(pBlob, (void *)z, n, iOffset, sqlite3BtreePutData); +} + +/* +** Query a blob handle for the size of the data. +** +** The Incrblob.nByte field is fixed for the lifetime of the Incrblob +** so no mutex is required for access. +*/ +SQLITE_API int sqlite3_blob_bytes(sqlite3_blob *pBlob){ + Incrblob *p = (Incrblob *)pBlob; + return (p && p->pStmt) ? p->nByte : 0; +} + +/* +** Move an existing blob handle to point to a different row of the same +** database table. +** +** If an error occurs, or if the specified row does not exist or does not +** contain a blob or text value, then an error code is returned and the +** database handle error code and message set. If this happens, then all +** subsequent calls to sqlite3_blob_xxx() functions (except blob_close()) +** immediately return SQLITE_ABORT. +*/ +SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){ + int rc; + Incrblob *p = (Incrblob *)pBlob; + sqlite3 *db; + + if( p==0 ) return SQLITE_MISUSE_BKPT; + db = p->db; + sqlite3_mutex_enter(db->mutex); + + if( p->pStmt==0 ){ + /* If there is no statement handle, then the blob-handle has + ** already been invalidated. Return SQLITE_ABORT in this case. + */ + rc = SQLITE_ABORT; + }else{ + char *zErr; + ((Vdbe*)p->pStmt)->rc = SQLITE_OK; + rc = blobSeekToRow(p, iRow, &zErr); + if( rc!=SQLITE_OK ){ + sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : 0), zErr); + sqlite3DbFree(db, zErr); + } + assert( rc!=SQLITE_SCHEMA ); + } + + rc = sqlite3ApiExit(db, rc); + assert( rc==SQLITE_OK || p->pStmt==0 ); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +#endif /* #ifndef SQLITE_OMIT_INCRBLOB */ + +/************** End of vdbeblob.c ********************************************/ +/************** Begin file vdbesort.c ****************************************/ +/* +** 2011-07-09 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code for the VdbeSorter object, used in concert with +** a VdbeCursor to sort large numbers of keys for CREATE INDEX statements +** or by SELECT statements with ORDER BY clauses that cannot be satisfied +** using indexes and without LIMIT clauses. +** +** The VdbeSorter object implements a multi-threaded external merge sort +** algorithm that is efficient even if the number of elements being sorted +** exceeds the available memory. +** +** Here is the (internal, non-API) interface between this module and the +** rest of the SQLite system: +** +** sqlite3VdbeSorterInit() Create a new VdbeSorter object. +** +** sqlite3VdbeSorterWrite() Add a single new row to the VdbeSorter +** object. The row is a binary blob in the +** OP_MakeRecord format that contains both +** the ORDER BY key columns and result columns +** in the case of a SELECT w/ ORDER BY, or +** the complete record for an index entry +** in the case of a CREATE INDEX. +** +** sqlite3VdbeSorterRewind() Sort all content previously added. +** Position the read cursor on the +** first sorted element. +** +** sqlite3VdbeSorterNext() Advance the read cursor to the next sorted +** element. +** +** sqlite3VdbeSorterRowkey() Return the complete binary blob for the +** row currently under the read cursor. +** +** sqlite3VdbeSorterCompare() Compare the binary blob for the row +** currently under the read cursor against +** another binary blob X and report if +** X is strictly less than the read cursor. +** Used to enforce uniqueness in a +** CREATE UNIQUE INDEX statement. +** +** sqlite3VdbeSorterClose() Close the VdbeSorter object and reclaim +** all resources. +** +** sqlite3VdbeSorterReset() Refurbish the VdbeSorter for reuse. This +** is like Close() followed by Init() only +** much faster. +** +** The interfaces above must be called in a particular order. Write() can +** only occur in between Init()/Reset() and Rewind(). Next(), Rowkey(), and +** Compare() can only occur in between Rewind() and Close()/Reset(). i.e. +** +** Init() +** for each record: Write() +** Rewind() +** Rowkey()/Compare() +** Next() +** Close() +** +** Algorithm: +** +** Records passed to the sorter via calls to Write() are initially held +** unsorted in main memory. Assuming the amount of memory used never exceeds +** a threshold, when Rewind() is called the set of records is sorted using +** an in-memory merge sort. In this case, no temporary files are required +** and subsequent calls to Rowkey(), Next() and Compare() read records +** directly from main memory. +** +** If the amount of space used to store records in main memory exceeds the +** threshold, then the set of records currently in memory are sorted and +** written to a temporary file in "Packed Memory Array" (PMA) format. +** A PMA created at this point is known as a "level-0 PMA". Higher levels +** of PMAs may be created by merging existing PMAs together - for example +** merging two or more level-0 PMAs together creates a level-1 PMA. +** +** The threshold for the amount of main memory to use before flushing +** records to a PMA is roughly the same as the limit configured for the +** page-cache of the main database. Specifically, the threshold is set to +** the value returned by "PRAGMA main.page_size" multipled by +** that returned by "PRAGMA main.cache_size", in bytes. +** +** If the sorter is running in single-threaded mode, then all PMAs generated +** are appended to a single temporary file. Or, if the sorter is running in +** multi-threaded mode then up to (N+1) temporary files may be opened, where +** N is the configured number of worker threads. In this case, instead of +** sorting the records and writing the PMA to a temporary file itself, the +** calling thread usually launches a worker thread to do so. Except, if +** there are already N worker threads running, the main thread does the work +** itself. +** +** The sorter is running in multi-threaded mode if (a) the library was built +** with pre-processor symbol SQLITE_MAX_WORKER_THREADS set to a value greater +** than zero, and (b) worker threads have been enabled at runtime by calling +** "PRAGMA threads=N" with some value of N greater than 0. +** +** When Rewind() is called, any data remaining in memory is flushed to a +** final PMA. So at this point the data is stored in some number of sorted +** PMAs within temporary files on disk. +** +** If there are fewer than SORTER_MAX_MERGE_COUNT PMAs in total and the +** sorter is running in single-threaded mode, then these PMAs are merged +** incrementally as keys are retreived from the sorter by the VDBE. The +** MergeEngine object, described in further detail below, performs this +** merge. +** +** Or, if running in multi-threaded mode, then a background thread is +** launched to merge the existing PMAs. Once the background thread has +** merged T bytes of data into a single sorted PMA, the main thread +** begins reading keys from that PMA while the background thread proceeds +** with merging the next T bytes of data. And so on. +** +** Parameter T is set to half the value of the memory threshold used +** by Write() above to determine when to create a new PMA. +** +** If there are more than SORTER_MAX_MERGE_COUNT PMAs in total when +** Rewind() is called, then a hierarchy of incremental-merges is used. +** First, T bytes of data from the first SORTER_MAX_MERGE_COUNT PMAs on +** disk are merged together. Then T bytes of data from the second set, and +** so on, such that no operation ever merges more than SORTER_MAX_MERGE_COUNT +** PMAs at a time. This done is to improve locality. +** +** If running in multi-threaded mode and there are more than +** SORTER_MAX_MERGE_COUNT PMAs on disk when Rewind() is called, then more +** than one background thread may be created. Specifically, there may be +** one background thread for each temporary file on disk, and one background +** thread to merge the output of each of the others to a single PMA for +** the main thread to read from. +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +/* +** If SQLITE_DEBUG_SORTER_THREADS is defined, this module outputs various +** messages to stderr that may be helpful in understanding the performance +** characteristics of the sorter in multi-threaded mode. +*/ +#if 0 +# define SQLITE_DEBUG_SORTER_THREADS 1 +#endif + +/* +** Hard-coded maximum amount of data to accumulate in memory before flushing +** to a level 0 PMA. The purpose of this limit is to prevent various integer +** overflows. 512MiB. +*/ +#define SQLITE_MAX_PMASZ (1<<29) + +/* +** Private objects used by the sorter +*/ +typedef struct MergeEngine MergeEngine; /* Merge PMAs together */ +typedef struct PmaReader PmaReader; /* Incrementally read one PMA */ +typedef struct PmaWriter PmaWriter; /* Incrementally write one PMA */ +typedef struct SorterRecord SorterRecord; /* A record being sorted */ +typedef struct SortSubtask SortSubtask; /* A sub-task in the sort process */ +typedef struct SorterFile SorterFile; /* Temporary file object wrapper */ +typedef struct SorterList SorterList; /* In-memory list of records */ +typedef struct IncrMerger IncrMerger; /* Read & merge multiple PMAs */ + +/* +** A container for a temp file handle and the current amount of data +** stored in the file. +*/ +struct SorterFile { + sqlite3_file *pFd; /* File handle */ + i64 iEof; /* Bytes of data stored in pFd */ +}; + +/* +** An in-memory list of objects to be sorted. +** +** If aMemory==0 then each object is allocated separately and the objects +** are connected using SorterRecord.u.pNext. If aMemory!=0 then all objects +** are stored in the aMemory[] bulk memory, one right after the other, and +** are connected using SorterRecord.u.iNext. +*/ +struct SorterList { + SorterRecord *pList; /* Linked list of records */ + u8 *aMemory; /* If non-NULL, bulk memory to hold pList */ + int szPMA; /* Size of pList as PMA in bytes */ +}; + +/* +** The MergeEngine object is used to combine two or more smaller PMAs into +** one big PMA using a merge operation. Separate PMAs all need to be +** combined into one big PMA in order to be able to step through the sorted +** records in order. +** +** The aReadr[] array contains a PmaReader object for each of the PMAs being +** merged. An aReadr[] object either points to a valid key or else is at EOF. +** ("EOF" means "End Of File". When aReadr[] is at EOF there is no more data.) +** For the purposes of the paragraphs below, we assume that the array is +** actually N elements in size, where N is the smallest power of 2 greater +** to or equal to the number of PMAs being merged. The extra aReadr[] elements +** are treated as if they are empty (always at EOF). +** +** The aTree[] array is also N elements in size. The value of N is stored in +** the MergeEngine.nTree variable. +** +** The final (N/2) elements of aTree[] contain the results of comparing +** pairs of PMA keys together. Element i contains the result of +** comparing aReadr[2*i-N] and aReadr[2*i-N+1]. Whichever key is smaller, the +** aTree element is set to the index of it. +** +** For the purposes of this comparison, EOF is considered greater than any +** other key value. If the keys are equal (only possible with two EOF +** values), it doesn't matter which index is stored. +** +** The (N/4) elements of aTree[] that precede the final (N/2) described +** above contains the index of the smallest of each block of 4 PmaReaders +** And so on. So that aTree[1] contains the index of the PmaReader that +** currently points to the smallest key value. aTree[0] is unused. +** +** Example: +** +** aReadr[0] -> Banana +** aReadr[1] -> Feijoa +** aReadr[2] -> Elderberry +** aReadr[3] -> Currant +** aReadr[4] -> Grapefruit +** aReadr[5] -> Apple +** aReadr[6] -> Durian +** aReadr[7] -> EOF +** +** aTree[] = { X, 5 0, 5 0, 3, 5, 6 } +** +** The current element is "Apple" (the value of the key indicated by +** PmaReader 5). When the Next() operation is invoked, PmaReader 5 will +** be advanced to the next key in its segment. Say the next key is +** "Eggplant": +** +** aReadr[5] -> Eggplant +** +** The contents of aTree[] are updated first by comparing the new PmaReader +** 5 key to the current key of PmaReader 4 (still "Grapefruit"). The PmaReader +** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree. +** The value of PmaReader 6 - "Durian" - is now smaller than that of PmaReader +** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Bananafile2. And instead of using a +** background thread to prepare data for the PmaReader, with a single +** threaded IncrMerger the allocate part of pTask->file2 is "refilled" with +** keys from pMerger by the calling thread whenever the PmaReader runs out +** of data. +*/ +struct IncrMerger { + SortSubtask *pTask; /* Task that owns this merger */ + MergeEngine *pMerger; /* Merge engine thread reads data from */ + i64 iStartOff; /* Offset to start writing file at */ + int mxSz; /* Maximum bytes of data to store */ + int bEof; /* Set to true when merge is finished */ + int bUseThread; /* True to use a bg thread for this object */ + SorterFile aFile[2]; /* aFile[0] for reading, [1] for writing */ +}; + +/* +** An instance of this object is used for writing a PMA. +** +** The PMA is written one record at a time. Each record is of an arbitrary +** size. But I/O is more efficient if it occurs in page-sized blocks where +** each block is aligned on a page boundary. This object caches writes to +** the PMA so that aligned, page-size blocks are written. +*/ +struct PmaWriter { + int eFWErr; /* Non-zero if in an error state */ + u8 *aBuffer; /* Pointer to write buffer */ + int nBuffer; /* Size of write buffer in bytes */ + int iBufStart; /* First byte of buffer to write */ + int iBufEnd; /* Last byte of buffer to write */ + i64 iWriteOff; /* Offset of start of buffer in file */ + sqlite3_file *pFd; /* File handle to write to */ +}; + +/* +** This object is the header on a single record while that record is being +** held in memory and prior to being written out as part of a PMA. +** +** How the linked list is connected depends on how memory is being managed +** by this module. If using a separate allocation for each in-memory record +** (VdbeSorter.list.aMemory==0), then the list is always connected using the +** SorterRecord.u.pNext pointers. +** +** Or, if using the single large allocation method (VdbeSorter.list.aMemory!=0), +** then while records are being accumulated the list is linked using the +** SorterRecord.u.iNext offset. This is because the aMemory[] array may +** be sqlite3Realloc()ed while records are being accumulated. Once the VM +** has finished passing records to the sorter, or when the in-memory buffer +** is full, the list is sorted. As part of the sorting process, it is +** converted to use the SorterRecord.u.pNext pointers. See function +** vdbeSorterSort() for details. +*/ +struct SorterRecord { + int nVal; /* Size of the record in bytes */ + union { + SorterRecord *pNext; /* Pointer to next record in list */ + int iNext; /* Offset within aMemory of next record */ + } u; + /* The data for the record immediately follows this header */ +}; + +/* Return a pointer to the buffer containing the record data for SorterRecord +** object p. Should be used as if: +** +** void *SRVAL(SorterRecord *p) { return (void*)&p[1]; } +*/ +#define SRVAL(p) ((void*)((SorterRecord*)(p) + 1)) + + +/* Maximum number of PMAs that a single MergeEngine can merge */ +#define SORTER_MAX_MERGE_COUNT 16 + +static int vdbeIncrSwap(IncrMerger*); +static void vdbeIncrFree(IncrMerger *); + +/* +** Free all memory belonging to the PmaReader object passed as the +** argument. All structure fields are set to zero before returning. +*/ +static void vdbePmaReaderClear(PmaReader *pReadr){ + sqlite3_free(pReadr->aAlloc); + sqlite3_free(pReadr->aBuffer); + if( pReadr->aMap ) sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap); + vdbeIncrFree(pReadr->pIncr); + memset(pReadr, 0, sizeof(PmaReader)); +} + +/* +** Read the next nByte bytes of data from the PMA p. +** If successful, set *ppOut to point to a buffer containing the data +** and return SQLITE_OK. Otherwise, if an error occurs, return an SQLite +** error code. +** +** The buffer returned in *ppOut is only valid until the +** next call to this function. +*/ +static int vdbePmaReadBlob( + PmaReader *p, /* PmaReader from which to take the blob */ + int nByte, /* Bytes of data to read */ + u8 **ppOut /* OUT: Pointer to buffer containing data */ +){ + int iBuf; /* Offset within buffer to read from */ + int nAvail; /* Bytes of data available in buffer */ + + if( p->aMap ){ + *ppOut = &p->aMap[p->iReadOff]; + p->iReadOff += nByte; + return SQLITE_OK; + } + + assert( p->aBuffer ); + + /* If there is no more data to be read from the buffer, read the next + ** p->nBuffer bytes of data from the file into it. Or, if there are less + ** than p->nBuffer bytes remaining in the PMA, read all remaining data. */ + iBuf = p->iReadOff % p->nBuffer; + if( iBuf==0 ){ + int nRead; /* Bytes to read from disk */ + int rc; /* sqlite3OsRead() return code */ + + /* Determine how many bytes of data to read. */ + if( (p->iEof - p->iReadOff) > (i64)p->nBuffer ){ + nRead = p->nBuffer; + }else{ + nRead = (int)(p->iEof - p->iReadOff); + } + assert( nRead>0 ); + + /* Readr data from the file. Return early if an error occurs. */ + rc = sqlite3OsRead(p->pFd, p->aBuffer, nRead, p->iReadOff); + assert( rc!=SQLITE_IOERR_SHORT_READ ); + if( rc!=SQLITE_OK ) return rc; + } + nAvail = p->nBuffer - iBuf; + + if( nByte<=nAvail ){ + /* The requested data is available in the in-memory buffer. In this + ** case there is no need to make a copy of the data, just return a + ** pointer into the buffer to the caller. */ + *ppOut = &p->aBuffer[iBuf]; + p->iReadOff += nByte; + }else{ + /* The requested data is not all available in the in-memory buffer. + ** In this case, allocate space at p->aAlloc[] to copy the requested + ** range into. Then return a copy of pointer p->aAlloc to the caller. */ + int nRem; /* Bytes remaining to copy */ + + /* Extend the p->aAlloc[] allocation if required. */ + if( p->nAllocnAlloc); + while( nByte>nNew ) nNew = nNew*2; + aNew = sqlite3Realloc(p->aAlloc, nNew); + if( !aNew ) return SQLITE_NOMEM_BKPT; + p->nAlloc = nNew; + p->aAlloc = aNew; + } + + /* Copy as much data as is available in the buffer into the start of + ** p->aAlloc[]. */ + memcpy(p->aAlloc, &p->aBuffer[iBuf], nAvail); + p->iReadOff += nAvail; + nRem = nByte - nAvail; + + /* The following loop copies up to p->nBuffer bytes per iteration into + ** the p->aAlloc[] buffer. */ + while( nRem>0 ){ + int rc; /* vdbePmaReadBlob() return code */ + int nCopy; /* Number of bytes to copy */ + u8 *aNext; /* Pointer to buffer to copy data from */ + + nCopy = nRem; + if( nRem>p->nBuffer ) nCopy = p->nBuffer; + rc = vdbePmaReadBlob(p, nCopy, &aNext); + if( rc!=SQLITE_OK ) return rc; + assert( aNext!=p->aAlloc ); + memcpy(&p->aAlloc[nByte - nRem], aNext, nCopy); + nRem -= nCopy; + } + + *ppOut = p->aAlloc; + } + + return SQLITE_OK; +} + +/* +** Read a varint from the stream of data accessed by p. Set *pnOut to +** the value read. +*/ +static int vdbePmaReadVarint(PmaReader *p, u64 *pnOut){ + int iBuf; + + if( p->aMap ){ + p->iReadOff += sqlite3GetVarint(&p->aMap[p->iReadOff], pnOut); + }else{ + iBuf = p->iReadOff % p->nBuffer; + if( iBuf && (p->nBuffer-iBuf)>=9 ){ + p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut); + }else{ + u8 aVarint[16], *a; + int i = 0, rc; + do{ + rc = vdbePmaReadBlob(p, 1, &a); + if( rc ) return rc; + aVarint[(i++)&0xf] = a[0]; + }while( (a[0]&0x80)!=0 ); + sqlite3GetVarint(aVarint, pnOut); + } + } + + return SQLITE_OK; +} + +/* +** Attempt to memory map file pFile. If successful, set *pp to point to the +** new mapping and return SQLITE_OK. If the mapping is not attempted +** (because the file is too large or the VFS layer is configured not to use +** mmap), return SQLITE_OK and set *pp to NULL. +** +** Or, if an error occurs, return an SQLite error code. The final value of +** *pp is undefined in this case. +*/ +static int vdbeSorterMapFile(SortSubtask *pTask, SorterFile *pFile, u8 **pp){ + int rc = SQLITE_OK; + if( pFile->iEof<=(i64)(pTask->pSorter->db->nMaxSorterMmap) ){ + sqlite3_file *pFd = pFile->pFd; + if( pFd->pMethods->iVersion>=3 ){ + rc = sqlite3OsFetch(pFd, 0, (int)pFile->iEof, (void**)pp); + testcase( rc!=SQLITE_OK ); + } + } + return rc; +} + +/* +** Attach PmaReader pReadr to file pFile (if it is not already attached to +** that file) and seek it to offset iOff within the file. Return SQLITE_OK +** if successful, or an SQLite error code if an error occurs. +*/ +static int vdbePmaReaderSeek( + SortSubtask *pTask, /* Task context */ + PmaReader *pReadr, /* Reader whose cursor is to be moved */ + SorterFile *pFile, /* Sorter file to read from */ + i64 iOff /* Offset in pFile */ +){ + int rc = SQLITE_OK; + + assert( pReadr->pIncr==0 || pReadr->pIncr->bEof==0 ); + + if( sqlite3FaultSim(201) ) return SQLITE_IOERR_READ; + if( pReadr->aMap ){ + sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap); + pReadr->aMap = 0; + } + pReadr->iReadOff = iOff; + pReadr->iEof = pFile->iEof; + pReadr->pFd = pFile->pFd; + + rc = vdbeSorterMapFile(pTask, pFile, &pReadr->aMap); + if( rc==SQLITE_OK && pReadr->aMap==0 ){ + int pgsz = pTask->pSorter->pgsz; + int iBuf = pReadr->iReadOff % pgsz; + if( pReadr->aBuffer==0 ){ + pReadr->aBuffer = (u8*)sqlite3Malloc(pgsz); + if( pReadr->aBuffer==0 ) rc = SQLITE_NOMEM_BKPT; + pReadr->nBuffer = pgsz; + } + if( rc==SQLITE_OK && iBuf ){ + int nRead = pgsz - iBuf; + if( (pReadr->iReadOff + nRead) > pReadr->iEof ){ + nRead = (int)(pReadr->iEof - pReadr->iReadOff); + } + rc = sqlite3OsRead( + pReadr->pFd, &pReadr->aBuffer[iBuf], nRead, pReadr->iReadOff + ); + testcase( rc!=SQLITE_OK ); + } + } + + return rc; +} + +/* +** Advance PmaReader pReadr to the next key in its PMA. Return SQLITE_OK if +** no error occurs, or an SQLite error code if one does. +*/ +static int vdbePmaReaderNext(PmaReader *pReadr){ + int rc = SQLITE_OK; /* Return Code */ + u64 nRec = 0; /* Size of record in bytes */ + + + if( pReadr->iReadOff>=pReadr->iEof ){ + IncrMerger *pIncr = pReadr->pIncr; + int bEof = 1; + if( pIncr ){ + rc = vdbeIncrSwap(pIncr); + if( rc==SQLITE_OK && pIncr->bEof==0 ){ + rc = vdbePmaReaderSeek( + pIncr->pTask, pReadr, &pIncr->aFile[0], pIncr->iStartOff + ); + bEof = 0; + } + } + + if( bEof ){ + /* This is an EOF condition */ + vdbePmaReaderClear(pReadr); + testcase( rc!=SQLITE_OK ); + return rc; + } + } + + if( rc==SQLITE_OK ){ + rc = vdbePmaReadVarint(pReadr, &nRec); + } + if( rc==SQLITE_OK ){ + pReadr->nKey = (int)nRec; + rc = vdbePmaReadBlob(pReadr, (int)nRec, &pReadr->aKey); + testcase( rc!=SQLITE_OK ); + } + + return rc; +} + +/* +** Initialize PmaReader pReadr to scan through the PMA stored in file pFile +** starting at offset iStart and ending at offset iEof-1. This function +** leaves the PmaReader pointing to the first key in the PMA (or EOF if the +** PMA is empty). +** +** If the pnByte parameter is NULL, then it is assumed that the file +** contains a single PMA, and that that PMA omits the initial length varint. +*/ +static int vdbePmaReaderInit( + SortSubtask *pTask, /* Task context */ + SorterFile *pFile, /* Sorter file to read from */ + i64 iStart, /* Start offset in pFile */ + PmaReader *pReadr, /* PmaReader to populate */ + i64 *pnByte /* IN/OUT: Increment this value by PMA size */ +){ + int rc; + + assert( pFile->iEof>iStart ); + assert( pReadr->aAlloc==0 && pReadr->nAlloc==0 ); + assert( pReadr->aBuffer==0 ); + assert( pReadr->aMap==0 ); + + rc = vdbePmaReaderSeek(pTask, pReadr, pFile, iStart); + if( rc==SQLITE_OK ){ + u64 nByte = 0; /* Size of PMA in bytes */ + rc = vdbePmaReadVarint(pReadr, &nByte); + pReadr->iEof = pReadr->iReadOff + nByte; + *pnByte += nByte; + } + + if( rc==SQLITE_OK ){ + rc = vdbePmaReaderNext(pReadr); + } + return rc; +} + +/* +** A version of vdbeSorterCompare() that assumes that it has already been +** determined that the first field of key1 is equal to the first field of +** key2. +*/ +static int vdbeSorterCompareTail( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + UnpackedRecord *r2 = pTask->pUnpacked; + if( *pbKey2Cached==0 ){ + sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2); + *pbKey2Cached = 1; + } + return sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, r2, 1); +} + +/* +** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, +** size nKey2 bytes). Use (pTask->pKeyInfo) for the collation sequences +** used by the comparison. Return the result of the comparison. +** +** If IN/OUT parameter *pbKey2Cached is true when this function is called, +** it is assumed that (pTask->pUnpacked) contains the unpacked version +** of key2. If it is false, (pTask->pUnpacked) is populated with the unpacked +** version of key2 and *pbKey2Cached set to true before returning. +** +** If an OOM error is encountered, (pTask->pUnpacked->error_rc) is set +** to SQLITE_NOMEM. +*/ +static int vdbeSorterCompare( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + UnpackedRecord *r2 = pTask->pUnpacked; + if( !*pbKey2Cached ){ + sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2); + *pbKey2Cached = 1; + } + return sqlite3VdbeRecordCompare(nKey1, pKey1, r2); +} + +/* +** A specially optimized version of vdbeSorterCompare() that assumes that +** the first field of each key is a TEXT value and that the collation +** sequence to compare them with is BINARY. +*/ +static int vdbeSorterCompareText( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + const u8 * const p1 = (const u8 * const)pKey1; + const u8 * const p2 = (const u8 * const)pKey2; + const u8 * const v1 = &p1[ p1[0] ]; /* Pointer to value 1 */ + const u8 * const v2 = &p2[ p2[0] ]; /* Pointer to value 2 */ + + int n1; + int n2; + int res; + + getVarint32NR(&p1[1], n1); + getVarint32NR(&p2[1], n2); + res = memcmp(v1, v2, (MIN(n1, n2) - 13)/2); + if( res==0 ){ + res = n1 - n2; + } + + if( res==0 ){ + if( pTask->pSorter->pKeyInfo->nKeyField>1 ){ + res = vdbeSorterCompareTail( + pTask, pbKey2Cached, pKey1, nKey1, pKey2, nKey2 + ); + } + }else{ + assert( !(pTask->pSorter->pKeyInfo->aSortFlags[0]&KEYINFO_ORDER_BIGNULL) ); + if( pTask->pSorter->pKeyInfo->aSortFlags[0] ){ + res = res * -1; + } + } + + return res; +} + +/* +** A specially optimized version of vdbeSorterCompare() that assumes that +** the first field of each key is an INTEGER value. +*/ +static int vdbeSorterCompareInt( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + const u8 * const p1 = (const u8 * const)pKey1; + const u8 * const p2 = (const u8 * const)pKey2; + const int s1 = p1[1]; /* Left hand serial type */ + const int s2 = p2[1]; /* Right hand serial type */ + const u8 * const v1 = &p1[ p1[0] ]; /* Pointer to value 1 */ + const u8 * const v2 = &p2[ p2[0] ]; /* Pointer to value 2 */ + int res; /* Return value */ + + assert( (s1>0 && s1<7) || s1==8 || s1==9 ); + assert( (s2>0 && s2<7) || s2==8 || s2==9 ); + + if( s1==s2 ){ + /* The two values have the same sign. Compare using memcmp(). */ + static const u8 aLen[] = {0, 1, 2, 3, 4, 6, 8, 0, 0, 0 }; + const u8 n = aLen[s1]; + int i; + res = 0; + for(i=0; i7 && s2>7 ){ + res = s1 - s2; + }else{ + if( s2>7 ){ + res = +1; + }else if( s1>7 ){ + res = -1; + }else{ + res = s1 - s2; + } + assert( res!=0 ); + + if( res>0 ){ + if( *v1 & 0x80 ) res = -1; + }else{ + if( *v2 & 0x80 ) res = +1; + } + } + + if( res==0 ){ + if( pTask->pSorter->pKeyInfo->nKeyField>1 ){ + res = vdbeSorterCompareTail( + pTask, pbKey2Cached, pKey1, nKey1, pKey2, nKey2 + ); + } + }else if( pTask->pSorter->pKeyInfo->aSortFlags[0] ){ + assert( !(pTask->pSorter->pKeyInfo->aSortFlags[0]&KEYINFO_ORDER_BIGNULL) ); + res = res * -1; + } + + return res; +} + +/* +** Initialize the temporary index cursor just opened as a sorter cursor. +** +** Usually, the sorter module uses the value of (pCsr->pKeyInfo->nKeyField) +** to determine the number of fields that should be compared from the +** records being sorted. However, if the value passed as argument nField +** is non-zero and the sorter is able to guarantee a stable sort, nField +** is used instead. This is used when sorting records for a CREATE INDEX +** statement. In this case, keys are always delivered to the sorter in +** order of the primary key, which happens to be make up the final part +** of the records being sorted. So if the sort is stable, there is never +** any reason to compare PK fields and they can be ignored for a small +** performance boost. +** +** The sorter can guarantee a stable sort when running in single-threaded +** mode, but not in multi-threaded mode. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3VdbeSorterInit( + sqlite3 *db, /* Database connection (for malloc()) */ + int nField, /* Number of key fields in each record */ + VdbeCursor *pCsr /* Cursor that holds the new sorter */ +){ + int pgsz; /* Page size of main database */ + int i; /* Used to iterate through aTask[] */ + VdbeSorter *pSorter; /* The new sorter */ + KeyInfo *pKeyInfo; /* Copy of pCsr->pKeyInfo with db==0 */ + int szKeyInfo; /* Size of pCsr->pKeyInfo in bytes */ + int sz; /* Size of pSorter in bytes */ + int rc = SQLITE_OK; +#if SQLITE_MAX_WORKER_THREADS==0 +# define nWorker 0 +#else + int nWorker; +#endif + + /* Initialize the upper limit on the number of worker threads */ +#if SQLITE_MAX_WORKER_THREADS>0 + if( sqlite3TempInMemory(db) || sqlite3GlobalConfig.bCoreMutex==0 ){ + nWorker = 0; + }else{ + nWorker = db->aLimit[SQLITE_LIMIT_WORKER_THREADS]; + } +#endif + + /* Do not allow the total number of threads (main thread + all workers) + ** to exceed the maximum merge count */ +#if SQLITE_MAX_WORKER_THREADS>=SORTER_MAX_MERGE_COUNT + if( nWorker>=SORTER_MAX_MERGE_COUNT ){ + nWorker = SORTER_MAX_MERGE_COUNT-1; + } +#endif + + assert( pCsr->pKeyInfo ); + assert( !pCsr->isEphemeral ); + assert( pCsr->eCurType==CURTYPE_SORTER ); + szKeyInfo = sizeof(KeyInfo) + (pCsr->pKeyInfo->nKeyField-1)*sizeof(CollSeq*); + sz = sizeof(VdbeSorter) + nWorker * sizeof(SortSubtask); + + pSorter = (VdbeSorter*)sqlite3DbMallocZero(db, sz + szKeyInfo); + pCsr->uc.pSorter = pSorter; + if( pSorter==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + Btree *pBt = db->aDb[0].pBt; + pSorter->pKeyInfo = pKeyInfo = (KeyInfo*)((u8*)pSorter + sz); + memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo); + pKeyInfo->db = 0; + if( nField && nWorker==0 ){ + pKeyInfo->nKeyField = nField; + } + sqlite3BtreeEnter(pBt); + pSorter->pgsz = pgsz = sqlite3BtreeGetPageSize(pBt); + sqlite3BtreeLeave(pBt); + pSorter->nTask = nWorker + 1; + pSorter->iPrev = (u8)(nWorker - 1); + pSorter->bUseThreads = (pSorter->nTask>1); + pSorter->db = db; + for(i=0; inTask; i++){ + SortSubtask *pTask = &pSorter->aTask[i]; + pTask->pSorter = pSorter; + } + + if( !sqlite3TempInMemory(db) ){ + i64 mxCache; /* Cache size in bytes*/ + u32 szPma = sqlite3GlobalConfig.szPma; + pSorter->mnPmaSize = szPma * pgsz; + + mxCache = db->aDb[0].pSchema->cache_size; + if( mxCache<0 ){ + /* A negative cache-size value C indicates that the cache is abs(C) + ** KiB in size. */ + mxCache = mxCache * -1024; + }else{ + mxCache = mxCache * pgsz; + } + mxCache = MIN(mxCache, SQLITE_MAX_PMASZ); + pSorter->mxPmaSize = MAX(pSorter->mnPmaSize, (int)mxCache); + + /* Avoid large memory allocations if the application has requested + ** SQLITE_CONFIG_SMALL_MALLOC. */ + if( sqlite3GlobalConfig.bSmallMalloc==0 ){ + assert( pSorter->iMemory==0 ); + pSorter->nMemory = pgsz; + pSorter->list.aMemory = (u8*)sqlite3Malloc(pgsz); + if( !pSorter->list.aMemory ) rc = SQLITE_NOMEM_BKPT; + } + } + + if( pKeyInfo->nAllField<13 + && (pKeyInfo->aColl[0]==0 || pKeyInfo->aColl[0]==db->pDfltColl) + && (pKeyInfo->aSortFlags[0] & KEYINFO_ORDER_BIGNULL)==0 + ){ + pSorter->typeMask = SORTER_TYPE_INTEGER | SORTER_TYPE_TEXT; + } + } + + return rc; +} +#undef nWorker /* Defined at the top of this function */ + +/* +** Free the list of sorted records starting at pRecord. +*/ +static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){ + SorterRecord *p; + SorterRecord *pNext; + for(p=pRecord; p; p=pNext){ + pNext = p->u.pNext; + sqlite3DbFree(db, p); + } +} + +/* +** Free all resources owned by the object indicated by argument pTask. All +** fields of *pTask are zeroed before returning. +*/ +static void vdbeSortSubtaskCleanup(sqlite3 *db, SortSubtask *pTask){ + sqlite3DbFree(db, pTask->pUnpacked); +#if SQLITE_MAX_WORKER_THREADS>0 + /* pTask->list.aMemory can only be non-zero if it was handed memory + ** from the main thread. That only occurs SQLITE_MAX_WORKER_THREADS>0 */ + if( pTask->list.aMemory ){ + sqlite3_free(pTask->list.aMemory); + }else +#endif + { + assert( pTask->list.aMemory==0 ); + vdbeSorterRecordFree(0, pTask->list.pList); + } + if( pTask->file.pFd ){ + sqlite3OsCloseFree(pTask->file.pFd); + } + if( pTask->file2.pFd ){ + sqlite3OsCloseFree(pTask->file2.pFd); + } + memset(pTask, 0, sizeof(SortSubtask)); +} + +#ifdef SQLITE_DEBUG_SORTER_THREADS +static void vdbeSorterWorkDebug(SortSubtask *pTask, const char *zEvent){ + i64 t; + int iTask = (pTask - pTask->pSorter->aTask); + sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t); + fprintf(stderr, "%lld:%d %s\n", t, iTask, zEvent); +} +static void vdbeSorterRewindDebug(const char *zEvent){ + i64 t = 0; + sqlite3_vfs *pVfs = sqlite3_vfs_find(0); + if( ALWAYS(pVfs) ) sqlite3OsCurrentTimeInt64(pVfs, &t); + fprintf(stderr, "%lld:X %s\n", t, zEvent); +} +static void vdbeSorterPopulateDebug( + SortSubtask *pTask, + const char *zEvent +){ + i64 t; + int iTask = (pTask - pTask->pSorter->aTask); + sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t); + fprintf(stderr, "%lld:bg%d %s\n", t, iTask, zEvent); +} +static void vdbeSorterBlockDebug( + SortSubtask *pTask, + int bBlocked, + const char *zEvent +){ + if( bBlocked ){ + i64 t; + sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t); + fprintf(stderr, "%lld:main %s\n", t, zEvent); + } +} +#else +# define vdbeSorterWorkDebug(x,y) +# define vdbeSorterRewindDebug(y) +# define vdbeSorterPopulateDebug(x,y) +# define vdbeSorterBlockDebug(x,y,z) +#endif + +#if SQLITE_MAX_WORKER_THREADS>0 +/* +** Join thread pTask->thread. +*/ +static int vdbeSorterJoinThread(SortSubtask *pTask){ + int rc = SQLITE_OK; + if( pTask->pThread ){ +#ifdef SQLITE_DEBUG_SORTER_THREADS + int bDone = pTask->bDone; +#endif + void *pRet = SQLITE_INT_TO_PTR(SQLITE_ERROR); + vdbeSorterBlockDebug(pTask, !bDone, "enter"); + (void)sqlite3ThreadJoin(pTask->pThread, &pRet); + vdbeSorterBlockDebug(pTask, !bDone, "exit"); + rc = SQLITE_PTR_TO_INT(pRet); + assert( pTask->bDone==1 ); + pTask->bDone = 0; + pTask->pThread = 0; + } + return rc; +} + +/* +** Launch a background thread to run xTask(pIn). +*/ +static int vdbeSorterCreateThread( + SortSubtask *pTask, /* Thread will use this task object */ + void *(*xTask)(void*), /* Routine to run in a separate thread */ + void *pIn /* Argument passed into xTask() */ +){ + assert( pTask->pThread==0 && pTask->bDone==0 ); + return sqlite3ThreadCreate(&pTask->pThread, xTask, pIn); +} + +/* +** Join all outstanding threads launched by SorterWrite() to create +** level-0 PMAs. +*/ +static int vdbeSorterJoinAll(VdbeSorter *pSorter, int rcin){ + int rc = rcin; + int i; + + /* This function is always called by the main user thread. + ** + ** If this function is being called after SorterRewind() has been called, + ** it is possible that thread pSorter->aTask[pSorter->nTask-1].pThread + ** is currently attempt to join one of the other threads. To avoid a race + ** condition where this thread also attempts to join the same object, join + ** thread pSorter->aTask[pSorter->nTask-1].pThread first. */ + for(i=pSorter->nTask-1; i>=0; i--){ + SortSubtask *pTask = &pSorter->aTask[i]; + int rc2 = vdbeSorterJoinThread(pTask); + if( rc==SQLITE_OK ) rc = rc2; + } + return rc; +} +#else +# define vdbeSorterJoinAll(x,rcin) (rcin) +# define vdbeSorterJoinThread(pTask) SQLITE_OK +#endif + +/* +** Allocate a new MergeEngine object capable of handling up to +** nReader PmaReader inputs. +** +** nReader is automatically rounded up to the next power of two. +** nReader may not exceed SORTER_MAX_MERGE_COUNT even after rounding up. +*/ +static MergeEngine *vdbeMergeEngineNew(int nReader){ + int N = 2; /* Smallest power of two >= nReader */ + int nByte; /* Total bytes of space to allocate */ + MergeEngine *pNew; /* Pointer to allocated object to return */ + + assert( nReader<=SORTER_MAX_MERGE_COUNT ); + + while( NnTree = N; + pNew->pTask = 0; + pNew->aReadr = (PmaReader*)&pNew[1]; + pNew->aTree = (int*)&pNew->aReadr[N]; + } + return pNew; +} + +/* +** Free the MergeEngine object passed as the only argument. +*/ +static void vdbeMergeEngineFree(MergeEngine *pMerger){ + int i; + if( pMerger ){ + for(i=0; inTree; i++){ + vdbePmaReaderClear(&pMerger->aReadr[i]); + } + } + sqlite3_free(pMerger); +} + +/* +** Free all resources associated with the IncrMerger object indicated by +** the first argument. +*/ +static void vdbeIncrFree(IncrMerger *pIncr){ + if( pIncr ){ +#if SQLITE_MAX_WORKER_THREADS>0 + if( pIncr->bUseThread ){ + vdbeSorterJoinThread(pIncr->pTask); + if( pIncr->aFile[0].pFd ) sqlite3OsCloseFree(pIncr->aFile[0].pFd); + if( pIncr->aFile[1].pFd ) sqlite3OsCloseFree(pIncr->aFile[1].pFd); + } +#endif + vdbeMergeEngineFree(pIncr->pMerger); + sqlite3_free(pIncr); + } +} + +/* +** Reset a sorting cursor back to its original empty state. +*/ +SQLITE_PRIVATE void sqlite3VdbeSorterReset(sqlite3 *db, VdbeSorter *pSorter){ + int i; + (void)vdbeSorterJoinAll(pSorter, SQLITE_OK); + assert( pSorter->bUseThreads || pSorter->pReader==0 ); +#if SQLITE_MAX_WORKER_THREADS>0 + if( pSorter->pReader ){ + vdbePmaReaderClear(pSorter->pReader); + sqlite3DbFree(db, pSorter->pReader); + pSorter->pReader = 0; + } +#endif + vdbeMergeEngineFree(pSorter->pMerger); + pSorter->pMerger = 0; + for(i=0; inTask; i++){ + SortSubtask *pTask = &pSorter->aTask[i]; + vdbeSortSubtaskCleanup(db, pTask); + pTask->pSorter = pSorter; + } + if( pSorter->list.aMemory==0 ){ + vdbeSorterRecordFree(0, pSorter->list.pList); + } + pSorter->list.pList = 0; + pSorter->list.szPMA = 0; + pSorter->bUsePMA = 0; + pSorter->iMemory = 0; + pSorter->mxKeysize = 0; + sqlite3DbFree(db, pSorter->pUnpacked); + pSorter->pUnpacked = 0; +} + +/* +** Free any cursor components allocated by sqlite3VdbeSorterXXX routines. +*/ +SQLITE_PRIVATE void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){ + VdbeSorter *pSorter; + assert( pCsr->eCurType==CURTYPE_SORTER ); + pSorter = pCsr->uc.pSorter; + if( pSorter ){ + sqlite3VdbeSorterReset(db, pSorter); + sqlite3_free(pSorter->list.aMemory); + sqlite3DbFree(db, pSorter); + pCsr->uc.pSorter = 0; + } +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** The first argument is a file-handle open on a temporary file. The file +** is guaranteed to be nByte bytes or smaller in size. This function +** attempts to extend the file to nByte bytes in size and to ensure that +** the VFS has memory mapped it. +** +** Whether or not the file does end up memory mapped of course depends on +** the specific VFS implementation. +*/ +static void vdbeSorterExtendFile(sqlite3 *db, sqlite3_file *pFd, i64 nByte){ + if( nByte<=(i64)(db->nMaxSorterMmap) && pFd->pMethods->iVersion>=3 ){ + void *p = 0; + int chunksize = 4*1024; + sqlite3OsFileControlHint(pFd, SQLITE_FCNTL_CHUNK_SIZE, &chunksize); + sqlite3OsFileControlHint(pFd, SQLITE_FCNTL_SIZE_HINT, &nByte); + sqlite3OsFetch(pFd, 0, (int)nByte, &p); + if( p ) sqlite3OsUnfetch(pFd, 0, p); + } +} +#else +# define vdbeSorterExtendFile(x,y,z) +#endif + +/* +** Allocate space for a file-handle and open a temporary file. If successful, +** set *ppFd to point to the malloc'd file-handle and return SQLITE_OK. +** Otherwise, set *ppFd to 0 and return an SQLite error code. +*/ +static int vdbeSorterOpenTempFile( + sqlite3 *db, /* Database handle doing sort */ + i64 nExtend, /* Attempt to extend file to this size */ + sqlite3_file **ppFd +){ + int rc; + if( sqlite3FaultSim(202) ) return SQLITE_IOERR_ACCESS; + rc = sqlite3OsOpenMalloc(db->pVfs, 0, ppFd, + SQLITE_OPEN_TEMP_JOURNAL | + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &rc + ); + if( rc==SQLITE_OK ){ + i64 max = SQLITE_MAX_MMAP_SIZE; + sqlite3OsFileControlHint(*ppFd, SQLITE_FCNTL_MMAP_SIZE, (void*)&max); + if( nExtend>0 ){ + vdbeSorterExtendFile(db, *ppFd, nExtend); + } + } + return rc; +} + +/* +** If it has not already been allocated, allocate the UnpackedRecord +** structure at pTask->pUnpacked. Return SQLITE_OK if successful (or +** if no allocation was required), or SQLITE_NOMEM otherwise. +*/ +static int vdbeSortAllocUnpacked(SortSubtask *pTask){ + if( pTask->pUnpacked==0 ){ + pTask->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pTask->pSorter->pKeyInfo); + if( pTask->pUnpacked==0 ) return SQLITE_NOMEM_BKPT; + pTask->pUnpacked->nField = pTask->pSorter->pKeyInfo->nKeyField; + pTask->pUnpacked->errCode = 0; + } + return SQLITE_OK; +} + + +/* +** Merge the two sorted lists p1 and p2 into a single list. +*/ +static SorterRecord *vdbeSorterMerge( + SortSubtask *pTask, /* Calling thread context */ + SorterRecord *p1, /* First list to merge */ + SorterRecord *p2 /* Second list to merge */ +){ + SorterRecord *pFinal = 0; + SorterRecord **pp = &pFinal; + int bCached = 0; + + assert( p1!=0 && p2!=0 ); + for(;;){ + int res; + res = pTask->xCompare( + pTask, &bCached, SRVAL(p1), p1->nVal, SRVAL(p2), p2->nVal + ); + + if( res<=0 ){ + *pp = p1; + pp = &p1->u.pNext; + p1 = p1->u.pNext; + if( p1==0 ){ + *pp = p2; + break; + } + }else{ + *pp = p2; + pp = &p2->u.pNext; + p2 = p2->u.pNext; + bCached = 0; + if( p2==0 ){ + *pp = p1; + break; + } + } + } + return pFinal; +} + +/* +** Return the SorterCompare function to compare values collected by the +** sorter object passed as the only argument. +*/ +static SorterCompare vdbeSorterGetCompare(VdbeSorter *p){ + if( p->typeMask==SORTER_TYPE_INTEGER ){ + return vdbeSorterCompareInt; + }else if( p->typeMask==SORTER_TYPE_TEXT ){ + return vdbeSorterCompareText; + } + return vdbeSorterCompare; +} + +/* +** Sort the linked list of records headed at pTask->pList. Return +** SQLITE_OK if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if +** an error occurs. +*/ +static int vdbeSorterSort(SortSubtask *pTask, SorterList *pList){ + int i; + SorterRecord *p; + int rc; + SorterRecord *aSlot[64]; + + rc = vdbeSortAllocUnpacked(pTask); + if( rc!=SQLITE_OK ) return rc; + + p = pList->pList; + pTask->xCompare = vdbeSorterGetCompare(pTask->pSorter); + memset(aSlot, 0, sizeof(aSlot)); + + while( p ){ + SorterRecord *pNext; + if( pList->aMemory ){ + if( (u8*)p==pList->aMemory ){ + pNext = 0; + }else{ + assert( p->u.iNextaMemory) ); + pNext = (SorterRecord*)&pList->aMemory[p->u.iNext]; + } + }else{ + pNext = p->u.pNext; + } + + p->u.pNext = 0; + for(i=0; aSlot[i]; i++){ + p = vdbeSorterMerge(pTask, p, aSlot[i]); + aSlot[i] = 0; + } + aSlot[i] = p; + p = pNext; + } + + p = 0; + for(i=0; ipList = p; + + assert( pTask->pUnpacked->errCode==SQLITE_OK + || pTask->pUnpacked->errCode==SQLITE_NOMEM + ); + return pTask->pUnpacked->errCode; +} + +/* +** Initialize a PMA-writer object. +*/ +static void vdbePmaWriterInit( + sqlite3_file *pFd, /* File handle to write to */ + PmaWriter *p, /* Object to populate */ + int nBuf, /* Buffer size */ + i64 iStart /* Offset of pFd to begin writing at */ +){ + memset(p, 0, sizeof(PmaWriter)); + p->aBuffer = (u8*)sqlite3Malloc(nBuf); + if( !p->aBuffer ){ + p->eFWErr = SQLITE_NOMEM_BKPT; + }else{ + p->iBufEnd = p->iBufStart = (iStart % nBuf); + p->iWriteOff = iStart - p->iBufStart; + p->nBuffer = nBuf; + p->pFd = pFd; + } +} + +/* +** Write nData bytes of data to the PMA. Return SQLITE_OK +** if successful, or an SQLite error code if an error occurs. +*/ +static void vdbePmaWriteBlob(PmaWriter *p, u8 *pData, int nData){ + int nRem = nData; + while( nRem>0 && p->eFWErr==0 ){ + int nCopy = nRem; + if( nCopy>(p->nBuffer - p->iBufEnd) ){ + nCopy = p->nBuffer - p->iBufEnd; + } + + memcpy(&p->aBuffer[p->iBufEnd], &pData[nData-nRem], nCopy); + p->iBufEnd += nCopy; + if( p->iBufEnd==p->nBuffer ){ + p->eFWErr = sqlite3OsWrite(p->pFd, + &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, + p->iWriteOff + p->iBufStart + ); + p->iBufStart = p->iBufEnd = 0; + p->iWriteOff += p->nBuffer; + } + assert( p->iBufEndnBuffer ); + + nRem -= nCopy; + } +} + +/* +** Flush any buffered data to disk and clean up the PMA-writer object. +** The results of using the PMA-writer after this call are undefined. +** Return SQLITE_OK if flushing the buffered data succeeds or is not +** required. Otherwise, return an SQLite error code. +** +** Before returning, set *piEof to the offset immediately following the +** last byte written to the file. +*/ +static int vdbePmaWriterFinish(PmaWriter *p, i64 *piEof){ + int rc; + if( p->eFWErr==0 && ALWAYS(p->aBuffer) && p->iBufEnd>p->iBufStart ){ + p->eFWErr = sqlite3OsWrite(p->pFd, + &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, + p->iWriteOff + p->iBufStart + ); + } + *piEof = (p->iWriteOff + p->iBufEnd); + sqlite3_free(p->aBuffer); + rc = p->eFWErr; + memset(p, 0, sizeof(PmaWriter)); + return rc; +} + +/* +** Write value iVal encoded as a varint to the PMA. Return +** SQLITE_OK if successful, or an SQLite error code if an error occurs. +*/ +static void vdbePmaWriteVarint(PmaWriter *p, u64 iVal){ + int nByte; + u8 aByte[10]; + nByte = sqlite3PutVarint(aByte, iVal); + vdbePmaWriteBlob(p, aByte, nByte); +} + +/* +** Write the current contents of in-memory linked-list pList to a level-0 +** PMA in the temp file belonging to sub-task pTask. Return SQLITE_OK if +** successful, or an SQLite error code otherwise. +** +** The format of a PMA is: +** +** * A varint. This varint contains the total number of bytes of content +** in the PMA (not including the varint itself). +** +** * One or more records packed end-to-end in order of ascending keys. +** Each record consists of a varint followed by a blob of data (the +** key). The varint is the number of bytes in the blob of data. +*/ +static int vdbeSorterListToPMA(SortSubtask *pTask, SorterList *pList){ + sqlite3 *db = pTask->pSorter->db; + int rc = SQLITE_OK; /* Return code */ + PmaWriter writer; /* Object used to write to the file */ + +#ifdef SQLITE_DEBUG + /* Set iSz to the expected size of file pTask->file after writing the PMA. + ** This is used by an assert() statement at the end of this function. */ + i64 iSz = pList->szPMA + sqlite3VarintLen(pList->szPMA) + pTask->file.iEof; +#endif + + vdbeSorterWorkDebug(pTask, "enter"); + memset(&writer, 0, sizeof(PmaWriter)); + assert( pList->szPMA>0 ); + + /* If the first temporary PMA file has not been opened, open it now. */ + if( pTask->file.pFd==0 ){ + rc = vdbeSorterOpenTempFile(db, 0, &pTask->file.pFd); + assert( rc!=SQLITE_OK || pTask->file.pFd ); + assert( pTask->file.iEof==0 ); + assert( pTask->nPMA==0 ); + } + + /* Try to get the file to memory map */ + if( rc==SQLITE_OK ){ + vdbeSorterExtendFile(db, pTask->file.pFd, pTask->file.iEof+pList->szPMA+9); + } + + /* Sort the list */ + if( rc==SQLITE_OK ){ + rc = vdbeSorterSort(pTask, pList); + } + + if( rc==SQLITE_OK ){ + SorterRecord *p; + SorterRecord *pNext = 0; + + vdbePmaWriterInit(pTask->file.pFd, &writer, pTask->pSorter->pgsz, + pTask->file.iEof); + pTask->nPMA++; + vdbePmaWriteVarint(&writer, pList->szPMA); + for(p=pList->pList; p; p=pNext){ + pNext = p->u.pNext; + vdbePmaWriteVarint(&writer, p->nVal); + vdbePmaWriteBlob(&writer, SRVAL(p), p->nVal); + if( pList->aMemory==0 ) sqlite3_free(p); + } + pList->pList = p; + rc = vdbePmaWriterFinish(&writer, &pTask->file.iEof); + } + + vdbeSorterWorkDebug(pTask, "exit"); + assert( rc!=SQLITE_OK || pList->pList==0 ); + assert( rc!=SQLITE_OK || pTask->file.iEof==iSz ); + return rc; +} + +/* +** Advance the MergeEngine to its next entry. +** Set *pbEof to true there is no next entry because +** the MergeEngine has reached the end of all its inputs. +** +** Return SQLITE_OK if successful or an error code if an error occurs. +*/ +static int vdbeMergeEngineStep( + MergeEngine *pMerger, /* The merge engine to advance to the next row */ + int *pbEof /* Set TRUE at EOF. Set false for more content */ +){ + int rc; + int iPrev = pMerger->aTree[1];/* Index of PmaReader to advance */ + SortSubtask *pTask = pMerger->pTask; + + /* Advance the current PmaReader */ + rc = vdbePmaReaderNext(&pMerger->aReadr[iPrev]); + + /* Update contents of aTree[] */ + if( rc==SQLITE_OK ){ + int i; /* Index of aTree[] to recalculate */ + PmaReader *pReadr1; /* First PmaReader to compare */ + PmaReader *pReadr2; /* Second PmaReader to compare */ + int bCached = 0; + + /* Find the first two PmaReaders to compare. The one that was just + ** advanced (iPrev) and the one next to it in the array. */ + pReadr1 = &pMerger->aReadr[(iPrev & 0xFFFE)]; + pReadr2 = &pMerger->aReadr[(iPrev | 0x0001)]; + + for(i=(pMerger->nTree+iPrev)/2; i>0; i=i/2){ + /* Compare pReadr1 and pReadr2. Store the result in variable iRes. */ + int iRes; + if( pReadr1->pFd==0 ){ + iRes = +1; + }else if( pReadr2->pFd==0 ){ + iRes = -1; + }else{ + iRes = pTask->xCompare(pTask, &bCached, + pReadr1->aKey, pReadr1->nKey, pReadr2->aKey, pReadr2->nKey + ); + } + + /* If pReadr1 contained the smaller value, set aTree[i] to its index. + ** Then set pReadr2 to the next PmaReader to compare to pReadr1. In this + ** case there is no cache of pReadr2 in pTask->pUnpacked, so set + ** pKey2 to point to the record belonging to pReadr2. + ** + ** Alternatively, if pReadr2 contains the smaller of the two values, + ** set aTree[i] to its index and update pReadr1. If vdbeSorterCompare() + ** was actually called above, then pTask->pUnpacked now contains + ** a value equivalent to pReadr2. So set pKey2 to NULL to prevent + ** vdbeSorterCompare() from decoding pReadr2 again. + ** + ** If the two values were equal, then the value from the oldest + ** PMA should be considered smaller. The VdbeSorter.aReadr[] array + ** is sorted from oldest to newest, so pReadr1 contains older values + ** than pReadr2 iff (pReadr1aTree[i] = (int)(pReadr1 - pMerger->aReadr); + pReadr2 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ]; + bCached = 0; + }else{ + if( pReadr1->pFd ) bCached = 0; + pMerger->aTree[i] = (int)(pReadr2 - pMerger->aReadr); + pReadr1 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ]; + } + } + *pbEof = (pMerger->aReadr[pMerger->aTree[1]].pFd==0); + } + + return (rc==SQLITE_OK ? pTask->pUnpacked->errCode : rc); +} + +#if SQLITE_MAX_WORKER_THREADS>0 +/* +** The main routine for background threads that write level-0 PMAs. +*/ +static void *vdbeSorterFlushThread(void *pCtx){ + SortSubtask *pTask = (SortSubtask*)pCtx; + int rc; /* Return code */ + assert( pTask->bDone==0 ); + rc = vdbeSorterListToPMA(pTask, &pTask->list); + pTask->bDone = 1; + return SQLITE_INT_TO_PTR(rc); +} +#endif /* SQLITE_MAX_WORKER_THREADS>0 */ + +/* +** Flush the current contents of VdbeSorter.list to a new PMA, possibly +** using a background thread. +*/ +static int vdbeSorterFlushPMA(VdbeSorter *pSorter){ +#if SQLITE_MAX_WORKER_THREADS==0 + pSorter->bUsePMA = 1; + return vdbeSorterListToPMA(&pSorter->aTask[0], &pSorter->list); +#else + int rc = SQLITE_OK; + int i; + SortSubtask *pTask = 0; /* Thread context used to create new PMA */ + int nWorker = (pSorter->nTask-1); + + /* Set the flag to indicate that at least one PMA has been written. + ** Or will be, anyhow. */ + pSorter->bUsePMA = 1; + + /* Select a sub-task to sort and flush the current list of in-memory + ** records to disk. If the sorter is running in multi-threaded mode, + ** round-robin between the first (pSorter->nTask-1) tasks. Except, if + ** the background thread from a sub-tasks previous turn is still running, + ** skip it. If the first (pSorter->nTask-1) sub-tasks are all still busy, + ** fall back to using the final sub-task. The first (pSorter->nTask-1) + ** sub-tasks are prefered as they use background threads - the final + ** sub-task uses the main thread. */ + for(i=0; iiPrev + i + 1) % nWorker; + pTask = &pSorter->aTask[iTest]; + if( pTask->bDone ){ + rc = vdbeSorterJoinThread(pTask); + } + if( rc!=SQLITE_OK || pTask->pThread==0 ) break; + } + + if( rc==SQLITE_OK ){ + if( i==nWorker ){ + /* Use the foreground thread for this operation */ + rc = vdbeSorterListToPMA(&pSorter->aTask[nWorker], &pSorter->list); + }else{ + /* Launch a background thread for this operation */ + u8 *aMem; + void *pCtx; + + assert( pTask!=0 ); + assert( pTask->pThread==0 && pTask->bDone==0 ); + assert( pTask->list.pList==0 ); + assert( pTask->list.aMemory==0 || pSorter->list.aMemory!=0 ); + + aMem = pTask->list.aMemory; + pCtx = (void*)pTask; + pSorter->iPrev = (u8)(pTask - pSorter->aTask); + pTask->list = pSorter->list; + pSorter->list.pList = 0; + pSorter->list.szPMA = 0; + if( aMem ){ + pSorter->list.aMemory = aMem; + pSorter->nMemory = sqlite3MallocSize(aMem); + }else if( pSorter->list.aMemory ){ + pSorter->list.aMemory = sqlite3Malloc(pSorter->nMemory); + if( !pSorter->list.aMemory ) return SQLITE_NOMEM_BKPT; + } + + rc = vdbeSorterCreateThread(pTask, vdbeSorterFlushThread, pCtx); + } + } + + return rc; +#endif /* SQLITE_MAX_WORKER_THREADS!=0 */ +} + +/* +** Add a record to the sorter. +*/ +SQLITE_PRIVATE int sqlite3VdbeSorterWrite( + const VdbeCursor *pCsr, /* Sorter cursor */ + Mem *pVal /* Memory cell containing record */ +){ + VdbeSorter *pSorter; + int rc = SQLITE_OK; /* Return Code */ + SorterRecord *pNew; /* New list element */ + int bFlush; /* True to flush contents of memory to PMA */ + int nReq; /* Bytes of memory required */ + int nPMA; /* Bytes of PMA space required */ + int t; /* serial type of first record field */ + + assert( pCsr->eCurType==CURTYPE_SORTER ); + pSorter = pCsr->uc.pSorter; + getVarint32NR((const u8*)&pVal->z[1], t); + if( t>0 && t<10 && t!=7 ){ + pSorter->typeMask &= SORTER_TYPE_INTEGER; + }else if( t>10 && (t & 0x01) ){ + pSorter->typeMask &= SORTER_TYPE_TEXT; + }else{ + pSorter->typeMask = 0; + } + + assert( pSorter ); + + /* Figure out whether or not the current contents of memory should be + ** flushed to a PMA before continuing. If so, do so. + ** + ** If using the single large allocation mode (pSorter->aMemory!=0), then + ** flush the contents of memory to a new PMA if (a) at least one value is + ** already in memory and (b) the new value will not fit in memory. + ** + ** Or, if using separate allocations for each record, flush the contents + ** of memory to a PMA if either of the following are true: + ** + ** * The total memory allocated for the in-memory list is greater + ** than (page-size * cache-size), or + ** + ** * The total memory allocated for the in-memory list is greater + ** than (page-size * 10) and sqlite3HeapNearlyFull() returns true. + */ + nReq = pVal->n + sizeof(SorterRecord); + nPMA = pVal->n + sqlite3VarintLen(pVal->n); + if( pSorter->mxPmaSize ){ + if( pSorter->list.aMemory ){ + bFlush = pSorter->iMemory && (pSorter->iMemory+nReq) > pSorter->mxPmaSize; + }else{ + bFlush = ( + (pSorter->list.szPMA > pSorter->mxPmaSize) + || (pSorter->list.szPMA > pSorter->mnPmaSize && sqlite3HeapNearlyFull()) + ); + } + if( bFlush ){ + rc = vdbeSorterFlushPMA(pSorter); + pSorter->list.szPMA = 0; + pSorter->iMemory = 0; + assert( rc!=SQLITE_OK || pSorter->list.pList==0 ); + } + } + + pSorter->list.szPMA += nPMA; + if( nPMA>pSorter->mxKeysize ){ + pSorter->mxKeysize = nPMA; + } + + if( pSorter->list.aMemory ){ + int nMin = pSorter->iMemory + nReq; + + if( nMin>pSorter->nMemory ){ + u8 *aNew; + sqlite3_int64 nNew = 2 * (sqlite3_int64)pSorter->nMemory; + int iListOff = -1; + if( pSorter->list.pList ){ + iListOff = (u8*)pSorter->list.pList - pSorter->list.aMemory; + } + while( nNew < nMin ) nNew = nNew*2; + if( nNew > pSorter->mxPmaSize ) nNew = pSorter->mxPmaSize; + if( nNew < nMin ) nNew = nMin; + aNew = sqlite3Realloc(pSorter->list.aMemory, nNew); + if( !aNew ) return SQLITE_NOMEM_BKPT; + if( iListOff>=0 ){ + pSorter->list.pList = (SorterRecord*)&aNew[iListOff]; + } + pSorter->list.aMemory = aNew; + pSorter->nMemory = nNew; + } + + pNew = (SorterRecord*)&pSorter->list.aMemory[pSorter->iMemory]; + pSorter->iMemory += ROUND8(nReq); + if( pSorter->list.pList ){ + pNew->u.iNext = (int)((u8*)(pSorter->list.pList) - pSorter->list.aMemory); + } + }else{ + pNew = (SorterRecord *)sqlite3Malloc(nReq); + if( pNew==0 ){ + return SQLITE_NOMEM_BKPT; + } + pNew->u.pNext = pSorter->list.pList; + } + + memcpy(SRVAL(pNew), pVal->z, pVal->n); + pNew->nVal = pVal->n; + pSorter->list.pList = pNew; + + return rc; +} + +/* +** Read keys from pIncr->pMerger and populate pIncr->aFile[1]. The format +** of the data stored in aFile[1] is the same as that used by regular PMAs, +** except that the number-of-bytes varint is omitted from the start. +*/ +static int vdbeIncrPopulate(IncrMerger *pIncr){ + int rc = SQLITE_OK; + int rc2; + i64 iStart = pIncr->iStartOff; + SorterFile *pOut = &pIncr->aFile[1]; + SortSubtask *pTask = pIncr->pTask; + MergeEngine *pMerger = pIncr->pMerger; + PmaWriter writer; + assert( pIncr->bEof==0 ); + + vdbeSorterPopulateDebug(pTask, "enter"); + + vdbePmaWriterInit(pOut->pFd, &writer, pTask->pSorter->pgsz, iStart); + while( rc==SQLITE_OK ){ + int dummy; + PmaReader *pReader = &pMerger->aReadr[ pMerger->aTree[1] ]; + int nKey = pReader->nKey; + i64 iEof = writer.iWriteOff + writer.iBufEnd; + + /* Check if the output file is full or if the input has been exhausted. + ** In either case exit the loop. */ + if( pReader->pFd==0 ) break; + if( (iEof + nKey + sqlite3VarintLen(nKey))>(iStart + pIncr->mxSz) ) break; + + /* Write the next key to the output. */ + vdbePmaWriteVarint(&writer, nKey); + vdbePmaWriteBlob(&writer, pReader->aKey, nKey); + assert( pIncr->pMerger->pTask==pTask ); + rc = vdbeMergeEngineStep(pIncr->pMerger, &dummy); + } + + rc2 = vdbePmaWriterFinish(&writer, &pOut->iEof); + if( rc==SQLITE_OK ) rc = rc2; + vdbeSorterPopulateDebug(pTask, "exit"); + return rc; +} + +#if SQLITE_MAX_WORKER_THREADS>0 +/* +** The main routine for background threads that populate aFile[1] of +** multi-threaded IncrMerger objects. +*/ +static void *vdbeIncrPopulateThread(void *pCtx){ + IncrMerger *pIncr = (IncrMerger*)pCtx; + void *pRet = SQLITE_INT_TO_PTR( vdbeIncrPopulate(pIncr) ); + pIncr->pTask->bDone = 1; + return pRet; +} + +/* +** Launch a background thread to populate aFile[1] of pIncr. +*/ +static int vdbeIncrBgPopulate(IncrMerger *pIncr){ + void *p = (void*)pIncr; + assert( pIncr->bUseThread ); + return vdbeSorterCreateThread(pIncr->pTask, vdbeIncrPopulateThread, p); +} +#endif + +/* +** This function is called when the PmaReader corresponding to pIncr has +** finished reading the contents of aFile[0]. Its purpose is to "refill" +** aFile[0] such that the PmaReader should start rereading it from the +** beginning. +** +** For single-threaded objects, this is accomplished by literally reading +** keys from pIncr->pMerger and repopulating aFile[0]. +** +** For multi-threaded objects, all that is required is to wait until the +** background thread is finished (if it is not already) and then swap +** aFile[0] and aFile[1] in place. If the contents of pMerger have not +** been exhausted, this function also launches a new background thread +** to populate the new aFile[1]. +** +** SQLITE_OK is returned on success, or an SQLite error code otherwise. +*/ +static int vdbeIncrSwap(IncrMerger *pIncr){ + int rc = SQLITE_OK; + +#if SQLITE_MAX_WORKER_THREADS>0 + if( pIncr->bUseThread ){ + rc = vdbeSorterJoinThread(pIncr->pTask); + + if( rc==SQLITE_OK ){ + SorterFile f0 = pIncr->aFile[0]; + pIncr->aFile[0] = pIncr->aFile[1]; + pIncr->aFile[1] = f0; + } + + if( rc==SQLITE_OK ){ + if( pIncr->aFile[0].iEof==pIncr->iStartOff ){ + pIncr->bEof = 1; + }else{ + rc = vdbeIncrBgPopulate(pIncr); + } + } + }else +#endif + { + rc = vdbeIncrPopulate(pIncr); + pIncr->aFile[0] = pIncr->aFile[1]; + if( pIncr->aFile[0].iEof==pIncr->iStartOff ){ + pIncr->bEof = 1; + } + } + + return rc; +} + +/* +** Allocate and return a new IncrMerger object to read data from pMerger. +** +** If an OOM condition is encountered, return NULL. In this case free the +** pMerger argument before returning. +*/ +static int vdbeIncrMergerNew( + SortSubtask *pTask, /* The thread that will be using the new IncrMerger */ + MergeEngine *pMerger, /* The MergeEngine that the IncrMerger will control */ + IncrMerger **ppOut /* Write the new IncrMerger here */ +){ + int rc = SQLITE_OK; + IncrMerger *pIncr = *ppOut = (IncrMerger*) + (sqlite3FaultSim(100) ? 0 : sqlite3MallocZero(sizeof(*pIncr))); + if( pIncr ){ + pIncr->pMerger = pMerger; + pIncr->pTask = pTask; + pIncr->mxSz = MAX(pTask->pSorter->mxKeysize+9,pTask->pSorter->mxPmaSize/2); + pTask->file2.iEof += pIncr->mxSz; + }else{ + vdbeMergeEngineFree(pMerger); + rc = SQLITE_NOMEM_BKPT; + } + assert( *ppOut!=0 || rc!=SQLITE_OK ); + return rc; +} + +#if SQLITE_MAX_WORKER_THREADS>0 +/* +** Set the "use-threads" flag on object pIncr. +*/ +static void vdbeIncrMergerSetThreads(IncrMerger *pIncr){ + pIncr->bUseThread = 1; + pIncr->pTask->file2.iEof -= pIncr->mxSz; +} +#endif /* SQLITE_MAX_WORKER_THREADS>0 */ + + + +/* +** Recompute pMerger->aTree[iOut] by comparing the next keys on the +** two PmaReaders that feed that entry. Neither of the PmaReaders +** are advanced. This routine merely does the comparison. +*/ +static void vdbeMergeEngineCompare( + MergeEngine *pMerger, /* Merge engine containing PmaReaders to compare */ + int iOut /* Store the result in pMerger->aTree[iOut] */ +){ + int i1; + int i2; + int iRes; + PmaReader *p1; + PmaReader *p2; + + assert( iOutnTree && iOut>0 ); + + if( iOut>=(pMerger->nTree/2) ){ + i1 = (iOut - pMerger->nTree/2) * 2; + i2 = i1 + 1; + }else{ + i1 = pMerger->aTree[iOut*2]; + i2 = pMerger->aTree[iOut*2+1]; + } + + p1 = &pMerger->aReadr[i1]; + p2 = &pMerger->aReadr[i2]; + + if( p1->pFd==0 ){ + iRes = i2; + }else if( p2->pFd==0 ){ + iRes = i1; + }else{ + SortSubtask *pTask = pMerger->pTask; + int bCached = 0; + int res; + assert( pTask->pUnpacked!=0 ); /* from vdbeSortSubtaskMain() */ + res = pTask->xCompare( + pTask, &bCached, p1->aKey, p1->nKey, p2->aKey, p2->nKey + ); + if( res<=0 ){ + iRes = i1; + }else{ + iRes = i2; + } + } + + pMerger->aTree[iOut] = iRes; +} + +/* +** Allowed values for the eMode parameter to vdbeMergeEngineInit() +** and vdbePmaReaderIncrMergeInit(). +** +** Only INCRINIT_NORMAL is valid in single-threaded builds (when +** SQLITE_MAX_WORKER_THREADS==0). The other values are only used +** when there exists one or more separate worker threads. +*/ +#define INCRINIT_NORMAL 0 +#define INCRINIT_TASK 1 +#define INCRINIT_ROOT 2 + +/* +** Forward reference required as the vdbeIncrMergeInit() and +** vdbePmaReaderIncrInit() routines are called mutually recursively when +** building a merge tree. +*/ +static int vdbePmaReaderIncrInit(PmaReader *pReadr, int eMode); + +/* +** Initialize the MergeEngine object passed as the second argument. Once this +** function returns, the first key of merged data may be read from the +** MergeEngine object in the usual fashion. +** +** If argument eMode is INCRINIT_ROOT, then it is assumed that any IncrMerge +** objects attached to the PmaReader objects that the merger reads from have +** already been populated, but that they have not yet populated aFile[0] and +** set the PmaReader objects up to read from it. In this case all that is +** required is to call vdbePmaReaderNext() on each PmaReader to point it at +** its first key. +** +** Otherwise, if eMode is any value other than INCRINIT_ROOT, then use +** vdbePmaReaderIncrMergeInit() to initialize each PmaReader that feeds data +** to pMerger. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int vdbeMergeEngineInit( + SortSubtask *pTask, /* Thread that will run pMerger */ + MergeEngine *pMerger, /* MergeEngine to initialize */ + int eMode /* One of the INCRINIT_XXX constants */ +){ + int rc = SQLITE_OK; /* Return code */ + int i; /* For looping over PmaReader objects */ + int nTree; /* Number of subtrees to merge */ + + /* Failure to allocate the merge would have been detected prior to + ** invoking this routine */ + assert( pMerger!=0 ); + + /* eMode is always INCRINIT_NORMAL in single-threaded mode */ + assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL ); + + /* Verify that the MergeEngine is assigned to a single thread */ + assert( pMerger->pTask==0 ); + pMerger->pTask = pTask; + + nTree = pMerger->nTree; + for(i=0; i0 && eMode==INCRINIT_ROOT ){ + /* PmaReaders should be normally initialized in order, as if they are + ** reading from the same temp file this makes for more linear file IO. + ** However, in the INCRINIT_ROOT case, if PmaReader aReadr[nTask-1] is + ** in use it will block the vdbePmaReaderNext() call while it uses + ** the main thread to fill its buffer. So calling PmaReaderNext() + ** on this PmaReader before any of the multi-threaded PmaReaders takes + ** better advantage of multi-processor hardware. */ + rc = vdbePmaReaderNext(&pMerger->aReadr[nTree-i-1]); + }else{ + rc = vdbePmaReaderIncrInit(&pMerger->aReadr[i], INCRINIT_NORMAL); + } + if( rc!=SQLITE_OK ) return rc; + } + + for(i=pMerger->nTree-1; i>0; i--){ + vdbeMergeEngineCompare(pMerger, i); + } + return pTask->pUnpacked->errCode; +} + +/* +** The PmaReader passed as the first argument is guaranteed to be an +** incremental-reader (pReadr->pIncr!=0). This function serves to open +** and/or initialize the temp file related fields of the IncrMerge +** object at (pReadr->pIncr). +** +** If argument eMode is set to INCRINIT_NORMAL, then all PmaReaders +** in the sub-tree headed by pReadr are also initialized. Data is then +** loaded into the buffers belonging to pReadr and it is set to point to +** the first key in its range. +** +** If argument eMode is set to INCRINIT_TASK, then pReadr is guaranteed +** to be a multi-threaded PmaReader and this function is being called in a +** background thread. In this case all PmaReaders in the sub-tree are +** initialized as for INCRINIT_NORMAL and the aFile[1] buffer belonging to +** pReadr is populated. However, pReadr itself is not set up to point +** to its first key. A call to vdbePmaReaderNext() is still required to do +** that. +** +** The reason this function does not call vdbePmaReaderNext() immediately +** in the INCRINIT_TASK case is that vdbePmaReaderNext() assumes that it has +** to block on thread (pTask->thread) before accessing aFile[1]. But, since +** this entire function is being run by thread (pTask->thread), that will +** lead to the current background thread attempting to join itself. +** +** Finally, if argument eMode is set to INCRINIT_ROOT, it may be assumed +** that pReadr->pIncr is a multi-threaded IncrMerge objects, and that all +** child-trees have already been initialized using IncrInit(INCRINIT_TASK). +** In this case vdbePmaReaderNext() is called on all child PmaReaders and +** the current PmaReader set to point to the first key in its range. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){ + int rc = SQLITE_OK; + IncrMerger *pIncr = pReadr->pIncr; + SortSubtask *pTask = pIncr->pTask; + sqlite3 *db = pTask->pSorter->db; + + /* eMode is always INCRINIT_NORMAL in single-threaded mode */ + assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL ); + + rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode); + + /* Set up the required files for pIncr. A multi-theaded IncrMerge object + ** requires two temp files to itself, whereas a single-threaded object + ** only requires a region of pTask->file2. */ + if( rc==SQLITE_OK ){ + int mxSz = pIncr->mxSz; +#if SQLITE_MAX_WORKER_THREADS>0 + if( pIncr->bUseThread ){ + rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[0].pFd); + if( rc==SQLITE_OK ){ + rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[1].pFd); + } + }else +#endif + /*if( !pIncr->bUseThread )*/{ + if( pTask->file2.pFd==0 ){ + assert( pTask->file2.iEof>0 ); + rc = vdbeSorterOpenTempFile(db, pTask->file2.iEof, &pTask->file2.pFd); + pTask->file2.iEof = 0; + } + if( rc==SQLITE_OK ){ + pIncr->aFile[1].pFd = pTask->file2.pFd; + pIncr->iStartOff = pTask->file2.iEof; + pTask->file2.iEof += mxSz; + } + } + } + +#if SQLITE_MAX_WORKER_THREADS>0 + if( rc==SQLITE_OK && pIncr->bUseThread ){ + /* Use the current thread to populate aFile[1], even though this + ** PmaReader is multi-threaded. If this is an INCRINIT_TASK object, + ** then this function is already running in background thread + ** pIncr->pTask->thread. + ** + ** If this is the INCRINIT_ROOT object, then it is running in the + ** main VDBE thread. But that is Ok, as that thread cannot return + ** control to the VDBE or proceed with anything useful until the + ** first results are ready from this merger object anyway. + */ + assert( eMode==INCRINIT_ROOT || eMode==INCRINIT_TASK ); + rc = vdbeIncrPopulate(pIncr); + } +#endif + + if( rc==SQLITE_OK && (SQLITE_MAX_WORKER_THREADS==0 || eMode!=INCRINIT_TASK) ){ + rc = vdbePmaReaderNext(pReadr); + } + + return rc; +} + +#if SQLITE_MAX_WORKER_THREADS>0 +/* +** The main routine for vdbePmaReaderIncrMergeInit() operations run in +** background threads. +*/ +static void *vdbePmaReaderBgIncrInit(void *pCtx){ + PmaReader *pReader = (PmaReader*)pCtx; + void *pRet = SQLITE_INT_TO_PTR( + vdbePmaReaderIncrMergeInit(pReader,INCRINIT_TASK) + ); + pReader->pIncr->pTask->bDone = 1; + return pRet; +} +#endif + +/* +** If the PmaReader passed as the first argument is not an incremental-reader +** (if pReadr->pIncr==0), then this function is a no-op. Otherwise, it invokes +** the vdbePmaReaderIncrMergeInit() function with the parameters passed to +** this routine to initialize the incremental merge. +** +** If the IncrMerger object is multi-threaded (IncrMerger.bUseThread==1), +** then a background thread is launched to call vdbePmaReaderIncrMergeInit(). +** Or, if the IncrMerger is single threaded, the same function is called +** using the current thread. +*/ +static int vdbePmaReaderIncrInit(PmaReader *pReadr, int eMode){ + IncrMerger *pIncr = pReadr->pIncr; /* Incremental merger */ + int rc = SQLITE_OK; /* Return code */ + if( pIncr ){ +#if SQLITE_MAX_WORKER_THREADS>0 + assert( pIncr->bUseThread==0 || eMode==INCRINIT_TASK ); + if( pIncr->bUseThread ){ + void *pCtx = (void*)pReadr; + rc = vdbeSorterCreateThread(pIncr->pTask, vdbePmaReaderBgIncrInit, pCtx); + }else +#endif + { + rc = vdbePmaReaderIncrMergeInit(pReadr, eMode); + } + } + return rc; +} + +/* +** Allocate a new MergeEngine object to merge the contents of nPMA level-0 +** PMAs from pTask->file. If no error occurs, set *ppOut to point to +** the new object and return SQLITE_OK. Or, if an error does occur, set *ppOut +** to NULL and return an SQLite error code. +** +** When this function is called, *piOffset is set to the offset of the +** first PMA to read from pTask->file. Assuming no error occurs, it is +** set to the offset immediately following the last byte of the last +** PMA before returning. If an error does occur, then the final value of +** *piOffset is undefined. +*/ +static int vdbeMergeEngineLevel0( + SortSubtask *pTask, /* Sorter task to read from */ + int nPMA, /* Number of PMAs to read */ + i64 *piOffset, /* IN/OUT: Readr offset in pTask->file */ + MergeEngine **ppOut /* OUT: New merge-engine */ +){ + MergeEngine *pNew; /* Merge engine to return */ + i64 iOff = *piOffset; + int i; + int rc = SQLITE_OK; + + *ppOut = pNew = vdbeMergeEngineNew(nPMA); + if( pNew==0 ) rc = SQLITE_NOMEM_BKPT; + + for(i=0; iaReadr[i]; + rc = vdbePmaReaderInit(pTask, &pTask->file, iOff, pReadr, &nDummy); + iOff = pReadr->iEof; + } + + if( rc!=SQLITE_OK ){ + vdbeMergeEngineFree(pNew); + *ppOut = 0; + } + *piOffset = iOff; + return rc; +} + +/* +** Return the depth of a tree comprising nPMA PMAs, assuming a fanout of +** SORTER_MAX_MERGE_COUNT. The returned value does not include leaf nodes. +** +** i.e. +** +** nPMA<=16 -> TreeDepth() == 0 +** nPMA<=256 -> TreeDepth() == 1 +** nPMA<=65536 -> TreeDepth() == 2 +*/ +static int vdbeSorterTreeDepth(int nPMA){ + int nDepth = 0; + i64 nDiv = SORTER_MAX_MERGE_COUNT; + while( nDiv < (i64)nPMA ){ + nDiv = nDiv * SORTER_MAX_MERGE_COUNT; + nDepth++; + } + return nDepth; +} + +/* +** pRoot is the root of an incremental merge-tree with depth nDepth (according +** to vdbeSorterTreeDepth()). pLeaf is the iSeq'th leaf to be added to the +** tree, counting from zero. This function adds pLeaf to the tree. +** +** If successful, SQLITE_OK is returned. If an error occurs, an SQLite error +** code is returned and pLeaf is freed. +*/ +static int vdbeSorterAddToTree( + SortSubtask *pTask, /* Task context */ + int nDepth, /* Depth of tree according to TreeDepth() */ + int iSeq, /* Sequence number of leaf within tree */ + MergeEngine *pRoot, /* Root of tree */ + MergeEngine *pLeaf /* Leaf to add to tree */ +){ + int rc = SQLITE_OK; + int nDiv = 1; + int i; + MergeEngine *p = pRoot; + IncrMerger *pIncr; + + rc = vdbeIncrMergerNew(pTask, pLeaf, &pIncr); + + for(i=1; iaReadr[iIter]; + + if( pReadr->pIncr==0 ){ + MergeEngine *pNew = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT); + if( pNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + rc = vdbeIncrMergerNew(pTask, pNew, &pReadr->pIncr); + } + } + if( rc==SQLITE_OK ){ + p = pReadr->pIncr->pMerger; + nDiv = nDiv / SORTER_MAX_MERGE_COUNT; + } + } + + if( rc==SQLITE_OK ){ + p->aReadr[iSeq % SORTER_MAX_MERGE_COUNT].pIncr = pIncr; + }else{ + vdbeIncrFree(pIncr); + } + return rc; +} + +/* +** This function is called as part of a SorterRewind() operation on a sorter +** that has already written two or more level-0 PMAs to one or more temp +** files. It builds a tree of MergeEngine/IncrMerger/PmaReader objects that +** can be used to incrementally merge all PMAs on disk. +** +** If successful, SQLITE_OK is returned and *ppOut set to point to the +** MergeEngine object at the root of the tree before returning. Or, if an +** error occurs, an SQLite error code is returned and the final value +** of *ppOut is undefined. +*/ +static int vdbeSorterMergeTreeBuild( + VdbeSorter *pSorter, /* The VDBE cursor that implements the sort */ + MergeEngine **ppOut /* Write the MergeEngine here */ +){ + MergeEngine *pMain = 0; + int rc = SQLITE_OK; + int iTask; + +#if SQLITE_MAX_WORKER_THREADS>0 + /* If the sorter uses more than one task, then create the top-level + ** MergeEngine here. This MergeEngine will read data from exactly + ** one PmaReader per sub-task. */ + assert( pSorter->bUseThreads || pSorter->nTask==1 ); + if( pSorter->nTask>1 ){ + pMain = vdbeMergeEngineNew(pSorter->nTask); + if( pMain==0 ) rc = SQLITE_NOMEM_BKPT; + } +#endif + + for(iTask=0; rc==SQLITE_OK && iTasknTask; iTask++){ + SortSubtask *pTask = &pSorter->aTask[iTask]; + assert( pTask->nPMA>0 || SQLITE_MAX_WORKER_THREADS>0 ); + if( SQLITE_MAX_WORKER_THREADS==0 || pTask->nPMA ){ + MergeEngine *pRoot = 0; /* Root node of tree for this task */ + int nDepth = vdbeSorterTreeDepth(pTask->nPMA); + i64 iReadOff = 0; + + if( pTask->nPMA<=SORTER_MAX_MERGE_COUNT ){ + rc = vdbeMergeEngineLevel0(pTask, pTask->nPMA, &iReadOff, &pRoot); + }else{ + int i; + int iSeq = 0; + pRoot = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT); + if( pRoot==0 ) rc = SQLITE_NOMEM_BKPT; + for(i=0; inPMA && rc==SQLITE_OK; i += SORTER_MAX_MERGE_COUNT){ + MergeEngine *pMerger = 0; /* New level-0 PMA merger */ + int nReader; /* Number of level-0 PMAs to merge */ + + nReader = MIN(pTask->nPMA - i, SORTER_MAX_MERGE_COUNT); + rc = vdbeMergeEngineLevel0(pTask, nReader, &iReadOff, &pMerger); + if( rc==SQLITE_OK ){ + rc = vdbeSorterAddToTree(pTask, nDepth, iSeq++, pRoot, pMerger); + } + } + } + + if( rc==SQLITE_OK ){ +#if SQLITE_MAX_WORKER_THREADS>0 + if( pMain!=0 ){ + rc = vdbeIncrMergerNew(pTask, pRoot, &pMain->aReadr[iTask].pIncr); + }else +#endif + { + assert( pMain==0 ); + pMain = pRoot; + } + }else{ + vdbeMergeEngineFree(pRoot); + } + } + } + + if( rc!=SQLITE_OK ){ + vdbeMergeEngineFree(pMain); + pMain = 0; + } + *ppOut = pMain; + return rc; +} + +/* +** This function is called as part of an sqlite3VdbeSorterRewind() operation +** on a sorter that has written two or more PMAs to temporary files. It sets +** up either VdbeSorter.pMerger (for single threaded sorters) or pReader +** (for multi-threaded sorters) so that it can be used to iterate through +** all records stored in the sorter. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int vdbeSorterSetupMerge(VdbeSorter *pSorter){ + int rc; /* Return code */ + SortSubtask *pTask0 = &pSorter->aTask[0]; + MergeEngine *pMain = 0; +#if SQLITE_MAX_WORKER_THREADS + sqlite3 *db = pTask0->pSorter->db; + int i; + SorterCompare xCompare = vdbeSorterGetCompare(pSorter); + for(i=0; inTask; i++){ + pSorter->aTask[i].xCompare = xCompare; + } +#endif + + rc = vdbeSorterMergeTreeBuild(pSorter, &pMain); + if( rc==SQLITE_OK ){ +#if SQLITE_MAX_WORKER_THREADS + assert( pSorter->bUseThreads==0 || pSorter->nTask>1 ); + if( pSorter->bUseThreads ){ + int iTask; + PmaReader *pReadr = 0; + SortSubtask *pLast = &pSorter->aTask[pSorter->nTask-1]; + rc = vdbeSortAllocUnpacked(pLast); + if( rc==SQLITE_OK ){ + pReadr = (PmaReader*)sqlite3DbMallocZero(db, sizeof(PmaReader)); + pSorter->pReader = pReadr; + if( pReadr==0 ) rc = SQLITE_NOMEM_BKPT; + } + if( rc==SQLITE_OK ){ + rc = vdbeIncrMergerNew(pLast, pMain, &pReadr->pIncr); + if( rc==SQLITE_OK ){ + vdbeIncrMergerSetThreads(pReadr->pIncr); + for(iTask=0; iTask<(pSorter->nTask-1); iTask++){ + IncrMerger *pIncr; + if( (pIncr = pMain->aReadr[iTask].pIncr) ){ + vdbeIncrMergerSetThreads(pIncr); + assert( pIncr->pTask!=pLast ); + } + } + for(iTask=0; rc==SQLITE_OK && iTasknTask; iTask++){ + /* Check that: + ** + ** a) The incremental merge object is configured to use the + ** right task, and + ** b) If it is using task (nTask-1), it is configured to run + ** in single-threaded mode. This is important, as the + ** root merge (INCRINIT_ROOT) will be using the same task + ** object. + */ + PmaReader *p = &pMain->aReadr[iTask]; + assert( p->pIncr==0 || ( + (p->pIncr->pTask==&pSorter->aTask[iTask]) /* a */ + && (iTask!=pSorter->nTask-1 || p->pIncr->bUseThread==0) /* b */ + )); + rc = vdbePmaReaderIncrInit(p, INCRINIT_TASK); + } + } + pMain = 0; + } + if( rc==SQLITE_OK ){ + rc = vdbePmaReaderIncrMergeInit(pReadr, INCRINIT_ROOT); + } + }else +#endif + { + rc = vdbeMergeEngineInit(pTask0, pMain, INCRINIT_NORMAL); + pSorter->pMerger = pMain; + pMain = 0; + } + } + + if( rc!=SQLITE_OK ){ + vdbeMergeEngineFree(pMain); + } + return rc; +} + + +/* +** Once the sorter has been populated by calls to sqlite3VdbeSorterWrite, +** this function is called to prepare for iterating through the records +** in sorted order. +*/ +SQLITE_PRIVATE int sqlite3VdbeSorterRewind(const VdbeCursor *pCsr, int *pbEof){ + VdbeSorter *pSorter; + int rc = SQLITE_OK; /* Return code */ + + assert( pCsr->eCurType==CURTYPE_SORTER ); + pSorter = pCsr->uc.pSorter; + assert( pSorter ); + + /* If no data has been written to disk, then do not do so now. Instead, + ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly + ** from the in-memory list. */ + if( pSorter->bUsePMA==0 ){ + if( pSorter->list.pList ){ + *pbEof = 0; + rc = vdbeSorterSort(&pSorter->aTask[0], &pSorter->list); + }else{ + *pbEof = 1; + } + return rc; + } + + /* Write the current in-memory list to a PMA. When the VdbeSorterWrite() + ** function flushes the contents of memory to disk, it immediately always + ** creates a new list consisting of a single key immediately afterwards. + ** So the list is never empty at this point. */ + assert( pSorter->list.pList ); + rc = vdbeSorterFlushPMA(pSorter); + + /* Join all threads */ + rc = vdbeSorterJoinAll(pSorter, rc); + + vdbeSorterRewindDebug("rewind"); + + /* Assuming no errors have occurred, set up a merger structure to + ** incrementally read and merge all remaining PMAs. */ + assert( pSorter->pReader==0 ); + if( rc==SQLITE_OK ){ + rc = vdbeSorterSetupMerge(pSorter); + *pbEof = 0; + } + + vdbeSorterRewindDebug("rewinddone"); + return rc; +} + +/* +** Advance to the next element in the sorter. Return value: +** +** SQLITE_OK success +** SQLITE_DONE end of data +** otherwise some kind of error. +*/ +SQLITE_PRIVATE int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr){ + VdbeSorter *pSorter; + int rc; /* Return code */ + + assert( pCsr->eCurType==CURTYPE_SORTER ); + pSorter = pCsr->uc.pSorter; + assert( pSorter->bUsePMA || (pSorter->pReader==0 && pSorter->pMerger==0) ); + if( pSorter->bUsePMA ){ + assert( pSorter->pReader==0 || pSorter->pMerger==0 ); + assert( pSorter->bUseThreads==0 || pSorter->pReader ); + assert( pSorter->bUseThreads==1 || pSorter->pMerger ); +#if SQLITE_MAX_WORKER_THREADS>0 + if( pSorter->bUseThreads ){ + rc = vdbePmaReaderNext(pSorter->pReader); + if( rc==SQLITE_OK && pSorter->pReader->pFd==0 ) rc = SQLITE_DONE; + }else +#endif + /*if( !pSorter->bUseThreads )*/ { + int res = 0; + assert( pSorter->pMerger!=0 ); + assert( pSorter->pMerger->pTask==(&pSorter->aTask[0]) ); + rc = vdbeMergeEngineStep(pSorter->pMerger, &res); + if( rc==SQLITE_OK && res ) rc = SQLITE_DONE; + } + }else{ + SorterRecord *pFree = pSorter->list.pList; + pSorter->list.pList = pFree->u.pNext; + pFree->u.pNext = 0; + if( pSorter->list.aMemory==0 ) vdbeSorterRecordFree(db, pFree); + rc = pSorter->list.pList ? SQLITE_OK : SQLITE_DONE; + } + return rc; +} + +/* +** Return a pointer to a buffer owned by the sorter that contains the +** current key. +*/ +static void *vdbeSorterRowkey( + const VdbeSorter *pSorter, /* Sorter object */ + int *pnKey /* OUT: Size of current key in bytes */ +){ + void *pKey; + if( pSorter->bUsePMA ){ + PmaReader *pReader; +#if SQLITE_MAX_WORKER_THREADS>0 + if( pSorter->bUseThreads ){ + pReader = pSorter->pReader; + }else +#endif + /*if( !pSorter->bUseThreads )*/{ + pReader = &pSorter->pMerger->aReadr[pSorter->pMerger->aTree[1]]; + } + *pnKey = pReader->nKey; + pKey = pReader->aKey; + }else{ + *pnKey = pSorter->list.pList->nVal; + pKey = SRVAL(pSorter->list.pList); + } + return pKey; +} + +/* +** Copy the current sorter key into the memory cell pOut. +*/ +SQLITE_PRIVATE int sqlite3VdbeSorterRowkey(const VdbeCursor *pCsr, Mem *pOut){ + VdbeSorter *pSorter; + void *pKey; int nKey; /* Sorter key to copy into pOut */ + + assert( pCsr->eCurType==CURTYPE_SORTER ); + pSorter = pCsr->uc.pSorter; + pKey = vdbeSorterRowkey(pSorter, &nKey); + if( sqlite3VdbeMemClearAndResize(pOut, nKey) ){ + return SQLITE_NOMEM_BKPT; + } + pOut->n = nKey; + MemSetTypeFlag(pOut, MEM_Blob); + memcpy(pOut->z, pKey, nKey); + + return SQLITE_OK; +} + +/* +** Compare the key in memory cell pVal with the key that the sorter cursor +** passed as the first argument currently points to. For the purposes of +** the comparison, ignore the rowid field at the end of each record. +** +** If the sorter cursor key contains any NULL values, consider it to be +** less than pVal. Even if pVal also contains NULL values. +** +** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM). +** Otherwise, set *pRes to a negative, zero or positive value if the +** key in pVal is smaller than, equal to or larger than the current sorter +** key. +** +** This routine forms the core of the OP_SorterCompare opcode, which in +** turn is used to verify uniqueness when constructing a UNIQUE INDEX. +*/ +SQLITE_PRIVATE int sqlite3VdbeSorterCompare( + const VdbeCursor *pCsr, /* Sorter cursor */ + Mem *pVal, /* Value to compare to current sorter key */ + int nKeyCol, /* Compare this many columns */ + int *pRes /* OUT: Result of comparison */ +){ + VdbeSorter *pSorter; + UnpackedRecord *r2; + KeyInfo *pKeyInfo; + int i; + void *pKey; int nKey; /* Sorter key to compare pVal with */ + + assert( pCsr->eCurType==CURTYPE_SORTER ); + pSorter = pCsr->uc.pSorter; + r2 = pSorter->pUnpacked; + pKeyInfo = pCsr->pKeyInfo; + if( r2==0 ){ + r2 = pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( r2==0 ) return SQLITE_NOMEM_BKPT; + r2->nField = nKeyCol; + } + assert( r2->nField==nKeyCol ); + + pKey = vdbeSorterRowkey(pSorter, &nKey); + sqlite3VdbeRecordUnpack(pKeyInfo, nKey, pKey, r2); + for(i=0; iaMem[i].flags & MEM_Null ){ + *pRes = -1; + return SQLITE_OK; + } + } + + *pRes = sqlite3VdbeRecordCompare(pVal->n, pVal->z, r2); + return SQLITE_OK; +} + +/************** End of vdbesort.c ********************************************/ +/************** Begin file vdbevtab.c ****************************************/ +/* +** 2020-03-23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements virtual-tables for examining the bytecode content +** of a prepared statement. +*/ +/* #include "sqliteInt.h" */ +#if defined(SQLITE_ENABLE_BYTECODE_VTAB) && !defined(SQLITE_OMIT_VIRTUALTABLE) +/* #include "vdbeInt.h" */ + +/* An instance of the bytecode() table-valued function. +*/ +typedef struct bytecodevtab bytecodevtab; +struct bytecodevtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; /* Database connection */ + int bTablesUsed; /* 2 for tables_used(). 0 for bytecode(). */ +}; + +/* A cursor for scanning through the bytecode +*/ +typedef struct bytecodevtab_cursor bytecodevtab_cursor; +struct bytecodevtab_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + sqlite3_stmt *pStmt; /* The statement whose bytecode is displayed */ + int iRowid; /* The rowid of the output table */ + int iAddr; /* Address */ + int needFinalize; /* Cursors owns pStmt and must finalize it */ + int showSubprograms; /* Provide a listing of subprograms */ + Op *aOp; /* Operand array */ + char *zP4; /* Rendered P4 value */ + const char *zType; /* tables_used.type */ + const char *zSchema; /* tables_used.schema */ + const char *zName; /* tables_used.name */ + Mem sub; /* Subprograms */ +}; + +/* +** Create a new bytecode() table-valued function. +*/ +static int bytecodevtabConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + bytecodevtab *pNew; + int rc; + int isTabUsed = pAux!=0; + const char *azSchema[2] = { + /* bytecode() schema */ + "CREATE TABLE x(" + "addr INT," + "opcode TEXT," + "p1 INT," + "p2 INT," + "p3 INT," + "p4 TEXT," + "p5 INT," + "comment TEXT," + "subprog TEXT," + "stmt HIDDEN" + ");", + + /* Tables_used() schema */ + "CREATE TABLE x(" + "type TEXT," + "schema TEXT," + "name TEXT," + "wr INT," + "subprog TEXT," + "stmt HIDDEN" + ");" + }; + + rc = sqlite3_declare_vtab(db, azSchema[isTabUsed]); + if( rc==SQLITE_OK ){ + pNew = sqlite3_malloc( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + pNew->db = db; + pNew->bTablesUsed = isTabUsed*2; + } + return rc; +} + +/* +** This method is the destructor for bytecodevtab objects. +*/ +static int bytecodevtabDisconnect(sqlite3_vtab *pVtab){ + bytecodevtab *p = (bytecodevtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Constructor for a new bytecodevtab_cursor object. +*/ +static int bytecodevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + bytecodevtab *pVTab = (bytecodevtab*)p; + bytecodevtab_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + sqlite3VdbeMemInit(&pCur->sub, pVTab->db, 1); + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Clear all internal content from a bytecodevtab cursor. +*/ +static void bytecodevtabCursorClear(bytecodevtab_cursor *pCur){ + sqlite3_free(pCur->zP4); + pCur->zP4 = 0; + sqlite3VdbeMemRelease(&pCur->sub); + sqlite3VdbeMemSetNull(&pCur->sub); + if( pCur->needFinalize ){ + sqlite3_finalize(pCur->pStmt); + } + pCur->pStmt = 0; + pCur->needFinalize = 0; + pCur->zType = 0; + pCur->zSchema = 0; + pCur->zName = 0; +} + +/* +** Destructor for a bytecodevtab_cursor. +*/ +static int bytecodevtabClose(sqlite3_vtab_cursor *cur){ + bytecodevtab_cursor *pCur = (bytecodevtab_cursor*)cur; + bytecodevtabCursorClear(pCur); + sqlite3_free(pCur); + return SQLITE_OK; +} + + +/* +** Advance a bytecodevtab_cursor to its next row of output. +*/ +static int bytecodevtabNext(sqlite3_vtab_cursor *cur){ + bytecodevtab_cursor *pCur = (bytecodevtab_cursor*)cur; + bytecodevtab *pTab = (bytecodevtab*)cur->pVtab; + int rc; + if( pCur->zP4 ){ + sqlite3_free(pCur->zP4); + pCur->zP4 = 0; + } + if( pCur->zName ){ + pCur->zName = 0; + pCur->zType = 0; + pCur->zSchema = 0; + } + rc = sqlite3VdbeNextOpcode( + (Vdbe*)pCur->pStmt, + pCur->showSubprograms ? &pCur->sub : 0, + pTab->bTablesUsed, + &pCur->iRowid, + &pCur->iAddr, + &pCur->aOp); + if( rc!=SQLITE_OK ){ + sqlite3VdbeMemSetNull(&pCur->sub); + pCur->aOp = 0; + } + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int bytecodevtabEof(sqlite3_vtab_cursor *cur){ + bytecodevtab_cursor *pCur = (bytecodevtab_cursor*)cur; + return pCur->aOp==0; +} + +/* +** Return values of columns for the row at which the bytecodevtab_cursor +** is currently pointing. +*/ +static int bytecodevtabColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + bytecodevtab_cursor *pCur = (bytecodevtab_cursor*)cur; + bytecodevtab *pVTab = (bytecodevtab*)cur->pVtab; + Op *pOp = pCur->aOp + pCur->iAddr; + if( pVTab->bTablesUsed ){ + if( i==4 ){ + i = 8; + }else{ + if( i<=2 && pCur->zType==0 ){ + Schema *pSchema; + HashElem *k; + int iDb = pOp->p3; + Pgno iRoot = (Pgno)pOp->p2; + sqlite3 *db = pVTab->db; + pSchema = db->aDb[iDb].pSchema; + pCur->zSchema = db->aDb[iDb].zDbSName; + for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ + Table *pTab = (Table*)sqliteHashData(k); + if( !IsVirtual(pTab) && pTab->tnum==iRoot ){ + pCur->zName = pTab->zName; + pCur->zType = "table"; + break; + } + } + if( pCur->zName==0 ){ + for(k=sqliteHashFirst(&pSchema->idxHash); k; k=sqliteHashNext(k)){ + Index *pIdx = (Index*)sqliteHashData(k); + if( pIdx->tnum==iRoot ){ + pCur->zName = pIdx->zName; + pCur->zType = "index"; + } + } + } + } + i += 10; + } + } + switch( i ){ + case 0: /* addr */ + sqlite3_result_int(ctx, pCur->iAddr); + break; + case 1: /* opcode */ + sqlite3_result_text(ctx, (char*)sqlite3OpcodeName(pOp->opcode), + -1, SQLITE_STATIC); + break; + case 2: /* p1 */ + sqlite3_result_int(ctx, pOp->p1); + break; + case 3: /* p2 */ + sqlite3_result_int(ctx, pOp->p2); + break; + case 4: /* p3 */ + sqlite3_result_int(ctx, pOp->p3); + break; + case 5: /* p4 */ + case 7: /* comment */ + if( pCur->zP4==0 ){ + pCur->zP4 = sqlite3VdbeDisplayP4(pVTab->db, pOp); + } + if( i==5 ){ + sqlite3_result_text(ctx, pCur->zP4, -1, SQLITE_STATIC); + }else{ +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + char *zCom = sqlite3VdbeDisplayComment(pVTab->db, pOp, pCur->zP4); + sqlite3_result_text(ctx, zCom, -1, sqlite3_free); +#endif + } + break; + case 6: /* p5 */ + sqlite3_result_int(ctx, pOp->p5); + break; + case 8: { /* subprog */ + Op *aOp = pCur->aOp; + assert( aOp[0].opcode==OP_Init ); + assert( aOp[0].p4.z==0 || strncmp(aOp[0].p4.z,"-" "- ",3)==0 ); + if( pCur->iRowid==pCur->iAddr+1 ){ + break; /* Result is NULL for the main program */ + }else if( aOp[0].p4.z!=0 ){ + sqlite3_result_text(ctx, aOp[0].p4.z+3, -1, SQLITE_STATIC); + }else{ + sqlite3_result_text(ctx, "(FK)", 4, SQLITE_STATIC); + } + break; + } + case 10: /* tables_used.type */ + sqlite3_result_text(ctx, pCur->zType, -1, SQLITE_STATIC); + break; + case 11: /* tables_used.schema */ + sqlite3_result_text(ctx, pCur->zSchema, -1, SQLITE_STATIC); + break; + case 12: /* tables_used.name */ + sqlite3_result_text(ctx, pCur->zName, -1, SQLITE_STATIC); + break; + case 13: /* tables_used.wr */ + sqlite3_result_int(ctx, pOp->opcode==OP_OpenWrite); + break; + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. In this implementation, the +** rowid is the same as the output value. +*/ +static int bytecodevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + bytecodevtab_cursor *pCur = (bytecodevtab_cursor*)cur; + *pRowid = pCur->iRowid; + return SQLITE_OK; +} + +/* +** Initialize a cursor. +** +** idxNum==0 means show all subprograms +** idxNum==1 means show only the main bytecode and omit subprograms. +*/ +static int bytecodevtabFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + bytecodevtab_cursor *pCur = (bytecodevtab_cursor *)pVtabCursor; + bytecodevtab *pVTab = (bytecodevtab *)pVtabCursor->pVtab; + int rc = SQLITE_OK; + + bytecodevtabCursorClear(pCur); + pCur->iRowid = 0; + pCur->iAddr = 0; + pCur->showSubprograms = idxNum==0; + assert( argc==1 ); + if( sqlite3_value_type(argv[0])==SQLITE_TEXT ){ + const char *zSql = (const char*)sqlite3_value_text(argv[0]); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pStmt, 0); + pCur->needFinalize = 1; + } + }else{ + pCur->pStmt = (sqlite3_stmt*)sqlite3_value_pointer(argv[0],"stmt-pointer"); + } + if( pCur->pStmt==0 ){ + pVTab->base.zErrMsg = sqlite3_mprintf( + "argument to %s() is not a valid SQL statement", + pVTab->bTablesUsed ? "tables_used" : "bytecode" + ); + rc = SQLITE_ERROR; + }else{ + bytecodevtabNext(pVtabCursor); + } + return rc; +} + +/* +** We must have a single stmt=? constraint that will be passed through +** into the xFilter method. If there is no valid stmt=? constraint, +** then return an SQLITE_CONSTRAINT error. +*/ +static int bytecodevtabBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int i; + int rc = SQLITE_CONSTRAINT; + struct sqlite3_index_constraint *p; + bytecodevtab *pVTab = (bytecodevtab*)tab; + int iBaseCol = pVTab->bTablesUsed ? 4 : 8; + pIdxInfo->estimatedCost = (double)100; + pIdxInfo->estimatedRows = 100; + pIdxInfo->idxNum = 0; + for(i=0, p=pIdxInfo->aConstraint; inConstraint; i++, p++){ + if( p->usable==0 ) continue; + if( p->op==SQLITE_INDEX_CONSTRAINT_EQ && p->iColumn==iBaseCol+1 ){ + rc = SQLITE_OK; + pIdxInfo->aConstraintUsage[i].omit = 1; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + } + if( p->op==SQLITE_INDEX_CONSTRAINT_ISNULL && p->iColumn==iBaseCol ){ + pIdxInfo->aConstraintUsage[i].omit = 1; + pIdxInfo->idxNum = 1; + } + } + return rc; +} + +/* +** This following structure defines all the methods for the +** virtual table. +*/ +static sqlite3_module bytecodevtabModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ bytecodevtabConnect, + /* xBestIndex */ bytecodevtabBestIndex, + /* xDisconnect */ bytecodevtabDisconnect, + /* xDestroy */ 0, + /* xOpen */ bytecodevtabOpen, + /* xClose */ bytecodevtabClose, + /* xFilter */ bytecodevtabFilter, + /* xNext */ bytecodevtabNext, + /* xEof */ bytecodevtabEof, + /* xColumn */ bytecodevtabColumn, + /* xRowid */ bytecodevtabRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 +}; + + +SQLITE_PRIVATE int sqlite3VdbeBytecodeVtabInit(sqlite3 *db){ + int rc; + rc = sqlite3_create_module(db, "bytecode", &bytecodevtabModule, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module(db, "tables_used", &bytecodevtabModule, &db); + } + return rc; +} +#elif defined(SQLITE_ENABLE_BYTECODE_VTAB) +SQLITE_PRIVATE int sqlite3VdbeBytecodeVtabInit(sqlite3 *db){ return SQLITE_OK; } +#endif /* SQLITE_ENABLE_BYTECODE_VTAB */ + +/************** End of vdbevtab.c ********************************************/ +/************** Begin file memjournal.c **************************************/ +/* +** 2008 October 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code use to implement an in-memory rollback journal. +** The in-memory rollback journal is used to journal transactions for +** ":memory:" databases and when the journal_mode=MEMORY pragma is used. +** +** Update: The in-memory journal is also used to temporarily cache +** smaller journals that are not critical for power-loss recovery. +** For example, statement journals that are not too big will be held +** entirely in memory, thus reducing the number of file I/O calls, and +** more importantly, reducing temporary file creation events. If these +** journals become too large for memory, they are spilled to disk. But +** in the common case, they are usually small and no file I/O needs to +** occur. +*/ +/* #include "sqliteInt.h" */ + +/* Forward references to internal structures */ +typedef struct MemJournal MemJournal; +typedef struct FilePoint FilePoint; +typedef struct FileChunk FileChunk; + +/* +** The rollback journal is composed of a linked list of these structures. +** +** The zChunk array is always at least 8 bytes in size - usually much more. +** Its actual size is stored in the MemJournal.nChunkSize variable. +*/ +struct FileChunk { + FileChunk *pNext; /* Next chunk in the journal */ + u8 zChunk[8]; /* Content of this chunk */ +}; + +/* +** By default, allocate this many bytes of memory for each FileChunk object. +*/ +#define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024 + +/* +** For chunk size nChunkSize, return the number of bytes that should +** be allocated for each FileChunk structure. +*/ +#define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8)) + +/* +** An instance of this object serves as a cursor into the rollback journal. +** The cursor can be either for reading or writing. +*/ +struct FilePoint { + sqlite3_int64 iOffset; /* Offset from the beginning of the file */ + FileChunk *pChunk; /* Specific chunk into which cursor points */ +}; + +/* +** This structure is a subclass of sqlite3_file. Each open memory-journal +** is an instance of this class. +*/ +struct MemJournal { + const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ + int nChunkSize; /* In-memory chunk-size */ + + int nSpill; /* Bytes of data before flushing */ + FileChunk *pFirst; /* Head of in-memory chunk-list */ + FilePoint endpoint; /* Pointer to the end of the file */ + FilePoint readpoint; /* Pointer to the end of the last xRead() */ + + int flags; /* xOpen flags */ + sqlite3_vfs *pVfs; /* The "real" underlying VFS */ + const char *zJournal; /* Name of the journal file */ +}; + +/* +** Read data from the in-memory journal file. This is the implementation +** of the sqlite3_vfs.xRead method. +*/ +static int memjrnlRead( + sqlite3_file *pJfd, /* The journal file from which to read */ + void *zBuf, /* Put the results here */ + int iAmt, /* Number of bytes to read */ + sqlite_int64 iOfst /* Begin reading at this offset */ +){ + MemJournal *p = (MemJournal *)pJfd; + u8 *zOut = zBuf; + int nRead = iAmt; + int iChunkOffset; + FileChunk *pChunk; + + if( (iAmt+iOfst)>p->endpoint.iOffset ){ + return SQLITE_IOERR_SHORT_READ; + } + assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 ); + if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ + sqlite3_int64 iOff = 0; + for(pChunk=p->pFirst; + ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst; + pChunk=pChunk->pNext + ){ + iOff += p->nChunkSize; + } + }else{ + pChunk = p->readpoint.pChunk; + assert( pChunk!=0 ); + } + + iChunkOffset = (int)(iOfst%p->nChunkSize); + do { + int iSpace = p->nChunkSize - iChunkOffset; + int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset)); + memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy); + zOut += nCopy; + nRead -= iSpace; + iChunkOffset = 0; + } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); + p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0; + p->readpoint.pChunk = pChunk; + + return SQLITE_OK; +} + +/* +** Free the list of FileChunk structures headed at MemJournal.pFirst. +*/ +static void memjrnlFreeChunks(FileChunk *pFirst){ + FileChunk *pIter; + FileChunk *pNext; + for(pIter=pFirst; pIter; pIter=pNext){ + pNext = pIter->pNext; + sqlite3_free(pIter); + } +} + +/* +** Flush the contents of memory to a real file on disk. +*/ +static int memjrnlCreateFile(MemJournal *p){ + int rc; + sqlite3_file *pReal = (sqlite3_file*)p; + MemJournal copy = *p; + + memset(p, 0, sizeof(MemJournal)); + rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0); + if( rc==SQLITE_OK ){ + int nChunk = copy.nChunkSize; + i64 iOff = 0; + FileChunk *pIter; + for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){ + if( iOff + nChunk > copy.endpoint.iOffset ){ + nChunk = copy.endpoint.iOffset - iOff; + } + rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff); + if( rc ) break; + iOff += nChunk; + } + if( rc==SQLITE_OK ){ + /* No error has occurred. Free the in-memory buffers. */ + memjrnlFreeChunks(copy.pFirst); + } + } + if( rc!=SQLITE_OK ){ + /* If an error occurred while creating or writing to the file, restore + ** the original before returning. This way, SQLite uses the in-memory + ** journal data to roll back changes made to the internal page-cache + ** before this function was called. */ + sqlite3OsClose(pReal); + *p = copy; + } + return rc; +} + + +/* Forward reference */ +static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size); + +/* +** Write data to the file. +*/ +static int memjrnlWrite( + sqlite3_file *pJfd, /* The journal file into which to write */ + const void *zBuf, /* Take data to be written from here */ + int iAmt, /* Number of bytes to write */ + sqlite_int64 iOfst /* Begin writing at this offset into the file */ +){ + MemJournal *p = (MemJournal *)pJfd; + int nWrite = iAmt; + u8 *zWrite = (u8 *)zBuf; + + /* If the file should be created now, create it and write the new data + ** into the file on disk. */ + if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){ + int rc = memjrnlCreateFile(p); + if( rc==SQLITE_OK ){ + rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst); + } + return rc; + } + + /* If the contents of this write should be stored in memory */ + else{ + /* An in-memory journal file should only ever be appended to. Random + ** access writes are not required. The only exception to this is when + ** the in-memory journal is being used by a connection using the + ** atomic-write optimization. In this case the first 28 bytes of the + ** journal file may be written as part of committing the transaction. */ + assert( iOfst<=p->endpoint.iOffset ); + if( iOfst>0 && iOfst!=p->endpoint.iOffset ){ + memjrnlTruncate(pJfd, iOfst); + } + if( iOfst==0 && p->pFirst ){ + assert( p->nChunkSize>iAmt ); + memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); + }else{ + while( nWrite>0 ){ + FileChunk *pChunk = p->endpoint.pChunk; + int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize); + int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset); + + assert( pChunk!=0 || iChunkOffset==0 ); + if( iChunkOffset==0 ){ + /* New chunk is required to extend the file. */ + FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize)); + if( !pNew ){ + return SQLITE_IOERR_NOMEM_BKPT; + } + pNew->pNext = 0; + if( pChunk ){ + assert( p->pFirst ); + pChunk->pNext = pNew; + }else{ + assert( !p->pFirst ); + p->pFirst = pNew; + } + pChunk = p->endpoint.pChunk = pNew; + } + + assert( pChunk!=0 ); + memcpy((u8*)pChunk->zChunk + iChunkOffset, zWrite, iSpace); + zWrite += iSpace; + nWrite -= iSpace; + p->endpoint.iOffset += iSpace; + } + } + } + + return SQLITE_OK; +} + +/* +** Truncate the in-memory file. +*/ +static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ + MemJournal *p = (MemJournal *)pJfd; + assert( p->endpoint.pChunk==0 || p->endpoint.pChunk->pNext==0 ); + if( sizeendpoint.iOffset ){ + FileChunk *pIter = 0; + if( size==0 ){ + memjrnlFreeChunks(p->pFirst); + p->pFirst = 0; + }else{ + i64 iOff = p->nChunkSize; + for(pIter=p->pFirst; ALWAYS(pIter) && iOffpNext){ + iOff += p->nChunkSize; + } + if( ALWAYS(pIter) ){ + memjrnlFreeChunks(pIter->pNext); + pIter->pNext = 0; + } + } + + p->endpoint.pChunk = pIter; + p->endpoint.iOffset = size; + p->readpoint.pChunk = 0; + p->readpoint.iOffset = 0; + } + return SQLITE_OK; +} + +/* +** Close the file. +*/ +static int memjrnlClose(sqlite3_file *pJfd){ + MemJournal *p = (MemJournal *)pJfd; + memjrnlFreeChunks(p->pFirst); + return SQLITE_OK; +} + +/* +** Sync the file. +** +** If the real file has been created, call its xSync method. Otherwise, +** syncing an in-memory journal is a no-op. +*/ +static int memjrnlSync(sqlite3_file *pJfd, int flags){ + UNUSED_PARAMETER2(pJfd, flags); + return SQLITE_OK; +} + +/* +** Query the size of the file in bytes. +*/ +static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ + MemJournal *p = (MemJournal *)pJfd; + *pSize = (sqlite_int64) p->endpoint.iOffset; + return SQLITE_OK; +} + +/* +** Table of methods for MemJournal sqlite3_file object. +*/ +static const struct sqlite3_io_methods MemJournalMethods = { + 1, /* iVersion */ + memjrnlClose, /* xClose */ + memjrnlRead, /* xRead */ + memjrnlWrite, /* xWrite */ + memjrnlTruncate, /* xTruncate */ + memjrnlSync, /* xSync */ + memjrnlFileSize, /* xFileSize */ + 0, /* xLock */ + 0, /* xUnlock */ + 0, /* xCheckReservedLock */ + 0, /* xFileControl */ + 0, /* xSectorSize */ + 0, /* xDeviceCharacteristics */ + 0, /* xShmMap */ + 0, /* xShmLock */ + 0, /* xShmBarrier */ + 0, /* xShmUnmap */ + 0, /* xFetch */ + 0 /* xUnfetch */ +}; + +/* +** Open a journal file. +** +** The behaviour of the journal file depends on the value of parameter +** nSpill. If nSpill is 0, then the journal file is always create and +** accessed using the underlying VFS. If nSpill is less than zero, then +** all content is always stored in main-memory. Finally, if nSpill is a +** positive value, then the journal file is initially created in-memory +** but may be flushed to disk later on. In this case the journal file is +** flushed to disk either when it grows larger than nSpill bytes in size, +** or when sqlite3JournalCreate() is called. +*/ +SQLITE_PRIVATE int sqlite3JournalOpen( + sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ + const char *zName, /* Name of the journal file */ + sqlite3_file *pJfd, /* Preallocated, blank file handle */ + int flags, /* Opening flags */ + int nSpill /* Bytes buffered before opening the file */ +){ + MemJournal *p = (MemJournal*)pJfd; + + /* Zero the file-handle object. If nSpill was passed zero, initialize + ** it using the sqlite3OsOpen() function of the underlying VFS. In this + ** case none of the code in this module is executed as a result of calls + ** made on the journal file-handle. */ + memset(p, 0, sizeof(MemJournal)); + if( nSpill==0 ){ + return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); + } + + if( nSpill>0 ){ + p->nChunkSize = nSpill; + }else{ + p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk); + assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) ); + } + + pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods; + p->nSpill = nSpill; + p->flags = flags; + p->zJournal = zName; + p->pVfs = pVfs; + return SQLITE_OK; +} + +/* +** Open an in-memory journal file. +*/ +SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *pJfd){ + sqlite3JournalOpen(0, 0, pJfd, 0, -1); +} + +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +/* +** If the argument p points to a MemJournal structure that is not an +** in-memory-only journal file (i.e. is one that was opened with a +ve +** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying +** file has not yet been created, create it now. +*/ +SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *pJfd){ + int rc = SQLITE_OK; + MemJournal *p = (MemJournal*)pJfd; + if( pJfd->pMethods==&MemJournalMethods && ( +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + p->nSpill>0 +#else + /* While this appears to not be possible without ATOMIC_WRITE, the + ** paths are complex, so it seems prudent to leave the test in as + ** a NEVER(), in case our analysis is subtly flawed. */ + NEVER(p->nSpill>0) +#endif +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + || (p->flags & SQLITE_OPEN_MAIN_JOURNAL) +#endif + )){ + rc = memjrnlCreateFile(p); + } + return rc; +} +#endif + +/* +** The file-handle passed as the only argument is open on a journal file. +** Return true if this "journal file" is currently stored in heap memory, +** or false otherwise. +*/ +SQLITE_PRIVATE int sqlite3JournalIsInMemory(sqlite3_file *p){ + return p->pMethods==&MemJournalMethods; +} + +/* +** Return the number of bytes required to store a JournalFile that uses vfs +** pVfs to create the underlying on-disk files. +*/ +SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *pVfs){ + return MAX(pVfs->szOsFile, (int)sizeof(MemJournal)); +} + +/************** End of memjournal.c ******************************************/ +/************** Begin file walker.c ******************************************/ +/* +** 2008 August 16 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains routines used for walking the parser tree for +** an SQL statement. +*/ +/* #include "sqliteInt.h" */ +/* #include */ +/* #include */ + + +#if !defined(SQLITE_OMIT_WINDOWFUNC) +/* +** Walk all expressions linked into the list of Window objects passed +** as the second argument. +*/ +static int walkWindowList(Walker *pWalker, Window *pList, int bOneOnly){ + Window *pWin; + for(pWin=pList; pWin; pWin=pWin->pNextWin){ + int rc; + rc = sqlite3WalkExprList(pWalker, pWin->pOrderBy); + if( rc ) return WRC_Abort; + rc = sqlite3WalkExprList(pWalker, pWin->pPartition); + if( rc ) return WRC_Abort; + rc = sqlite3WalkExpr(pWalker, pWin->pFilter); + if( rc ) return WRC_Abort; + rc = sqlite3WalkExpr(pWalker, pWin->pStart); + if( rc ) return WRC_Abort; + rc = sqlite3WalkExpr(pWalker, pWin->pEnd); + if( rc ) return WRC_Abort; + if( bOneOnly ) break; + } + return WRC_Continue; +} +#endif + +/* +** Walk an expression tree. Invoke the callback once for each node +** of the expression, while descending. (In other words, the callback +** is invoked before visiting children.) +** +** The return value from the callback should be one of the WRC_* +** constants to specify how to proceed with the walk. +** +** WRC_Continue Continue descending down the tree. +** +** WRC_Prune Do not descend into child nodes, but allow +** the walk to continue with sibling nodes. +** +** WRC_Abort Do no more callbacks. Unwind the stack and +** return from the top-level walk call. +** +** The return value from this routine is WRC_Abort to abandon the tree walk +** and WRC_Continue to continue. +*/ +static SQLITE_NOINLINE int walkExpr(Walker *pWalker, Expr *pExpr){ + int rc; + testcase( ExprHasProperty(pExpr, EP_TokenOnly) ); + testcase( ExprHasProperty(pExpr, EP_Reduced) ); + while(1){ + rc = pWalker->xExprCallback(pWalker, pExpr); + if( rc ) return rc & WRC_Abort; + if( !ExprHasProperty(pExpr,(EP_TokenOnly|EP_Leaf)) ){ + assert( pExpr->x.pList==0 || pExpr->pRight==0 ); + if( pExpr->pLeft && walkExpr(pWalker, pExpr->pLeft) ) return WRC_Abort; + if( pExpr->pRight ){ + assert( !ExprHasProperty(pExpr, EP_WinFunc) ); + pExpr = pExpr->pRight; + continue; + }else if( ExprUseXSelect(pExpr) ){ + assert( !ExprHasProperty(pExpr, EP_WinFunc) ); + if( sqlite3WalkSelect(pWalker, pExpr->x.pSelect) ) return WRC_Abort; + }else{ + if( pExpr->x.pList ){ + if( sqlite3WalkExprList(pWalker, pExpr->x.pList) ) return WRC_Abort; + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + if( walkWindowList(pWalker, pExpr->y.pWin, 1) ) return WRC_Abort; + } +#endif + } + } + break; + } + return WRC_Continue; +} +SQLITE_PRIVATE int sqlite3WalkExpr(Walker *pWalker, Expr *pExpr){ + return pExpr ? walkExpr(pWalker,pExpr) : WRC_Continue; +} + +/* +** Call sqlite3WalkExpr() for every expression in list p or until +** an abort request is seen. +*/ +SQLITE_PRIVATE int sqlite3WalkExprList(Walker *pWalker, ExprList *p){ + int i; + struct ExprList_item *pItem; + if( p ){ + for(i=p->nExpr, pItem=p->a; i>0; i--, pItem++){ + if( sqlite3WalkExpr(pWalker, pItem->pExpr) ) return WRC_Abort; + } + } + return WRC_Continue; +} + +/* +** This is a no-op callback for Walker->xSelectCallback2. If this +** callback is set, then the Select->pWinDefn list is traversed. +*/ +SQLITE_PRIVATE void sqlite3WalkWinDefnDummyCallback(Walker *pWalker, Select *p){ + UNUSED_PARAMETER(pWalker); + UNUSED_PARAMETER(p); + /* No-op */ +} + +/* +** Walk all expressions associated with SELECT statement p. Do +** not invoke the SELECT callback on p, but do (of course) invoke +** any expr callbacks and SELECT callbacks that come from subqueries. +** Return WRC_Abort or WRC_Continue. +*/ +SQLITE_PRIVATE int sqlite3WalkSelectExpr(Walker *pWalker, Select *p){ + if( sqlite3WalkExprList(pWalker, p->pEList) ) return WRC_Abort; + if( sqlite3WalkExpr(pWalker, p->pWhere) ) return WRC_Abort; + if( sqlite3WalkExprList(pWalker, p->pGroupBy) ) return WRC_Abort; + if( sqlite3WalkExpr(pWalker, p->pHaving) ) return WRC_Abort; + if( sqlite3WalkExprList(pWalker, p->pOrderBy) ) return WRC_Abort; + if( sqlite3WalkExpr(pWalker, p->pLimit) ) return WRC_Abort; +#if !defined(SQLITE_OMIT_WINDOWFUNC) + if( p->pWinDefn ){ + Parse *pParse; + if( pWalker->xSelectCallback2==sqlite3WalkWinDefnDummyCallback + || ((pParse = pWalker->pParse)!=0 && IN_RENAME_OBJECT) +#ifndef SQLITE_OMIT_CTE + || pWalker->xSelectCallback2==sqlite3SelectPopWith +#endif + ){ + /* The following may return WRC_Abort if there are unresolvable + ** symbols (e.g. a table that does not exist) in a window definition. */ + int rc = walkWindowList(pWalker, p->pWinDefn, 0); + return rc; + } + } +#endif + return WRC_Continue; +} + +/* +** Walk the parse trees associated with all subqueries in the +** FROM clause of SELECT statement p. Do not invoke the select +** callback on p, but do invoke it on each FROM clause subquery +** and on any subqueries further down in the tree. Return +** WRC_Abort or WRC_Continue; +*/ +SQLITE_PRIVATE int sqlite3WalkSelectFrom(Walker *pWalker, Select *p){ + SrcList *pSrc; + int i; + SrcItem *pItem; + + pSrc = p->pSrc; + if( ALWAYS(pSrc) ){ + for(i=pSrc->nSrc, pItem=pSrc->a; i>0; i--, pItem++){ + if( pItem->pSelect && sqlite3WalkSelect(pWalker, pItem->pSelect) ){ + return WRC_Abort; + } + if( pItem->fg.isTabFunc + && sqlite3WalkExprList(pWalker, pItem->u1.pFuncArg) + ){ + return WRC_Abort; + } + } + } + return WRC_Continue; +} + +/* +** Call sqlite3WalkExpr() for every expression in Select statement p. +** Invoke sqlite3WalkSelect() for subqueries in the FROM clause and +** on the compound select chain, p->pPrior. +** +** If it is not NULL, the xSelectCallback() callback is invoked before +** the walk of the expressions and FROM clause. The xSelectCallback2() +** method is invoked following the walk of the expressions and FROM clause, +** but only if both xSelectCallback and xSelectCallback2 are both non-NULL +** and if the expressions and FROM clause both return WRC_Continue; +** +** Return WRC_Continue under normal conditions. Return WRC_Abort if +** there is an abort request. +** +** If the Walker does not have an xSelectCallback() then this routine +** is a no-op returning WRC_Continue. +*/ +SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){ + int rc; + if( p==0 ) return WRC_Continue; + if( pWalker->xSelectCallback==0 ) return WRC_Continue; + do{ + rc = pWalker->xSelectCallback(pWalker, p); + if( rc ) return rc & WRC_Abort; + if( sqlite3WalkSelectExpr(pWalker, p) + || sqlite3WalkSelectFrom(pWalker, p) + ){ + return WRC_Abort; + } + if( pWalker->xSelectCallback2 ){ + pWalker->xSelectCallback2(pWalker, p); + } + p = p->pPrior; + }while( p!=0 ); + return WRC_Continue; +} + +/* Increase the walkerDepth when entering a subquery, and +** descrease when leaving the subquery. +*/ +SQLITE_PRIVATE int sqlite3WalkerDepthIncrease(Walker *pWalker, Select *pSelect){ + UNUSED_PARAMETER(pSelect); + pWalker->walkerDepth++; + return WRC_Continue; +} +SQLITE_PRIVATE void sqlite3WalkerDepthDecrease(Walker *pWalker, Select *pSelect){ + UNUSED_PARAMETER(pSelect); + pWalker->walkerDepth--; +} + + +/* +** No-op routine for the parse-tree walker. +** +** When this routine is the Walker.xExprCallback then expression trees +** are walked without any actions being taken at each node. Presumably, +** when this routine is used for Walker.xExprCallback then +** Walker.xSelectCallback is set to do something useful for every +** subquery in the parser tree. +*/ +SQLITE_PRIVATE int sqlite3ExprWalkNoop(Walker *NotUsed, Expr *NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return WRC_Continue; +} + +/* +** No-op routine for the parse-tree walker for SELECT statements. +** subquery in the parser tree. +*/ +SQLITE_PRIVATE int sqlite3SelectWalkNoop(Walker *NotUsed, Select *NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return WRC_Continue; +} + +/************** End of walker.c **********************************************/ +/************** Begin file resolve.c *****************************************/ +/* +** 2008 August 18 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains routines used for walking the parser tree and +** resolve all identifiers by associating them with a particular +** table and column. +*/ +/* #include "sqliteInt.h" */ + +/* +** Magic table number to mean the EXCLUDED table in an UPSERT statement. +*/ +#define EXCLUDED_TABLE_NUMBER 2 + +/* +** Walk the expression tree pExpr and increase the aggregate function +** depth (the Expr.op2 field) by N on every TK_AGG_FUNCTION node. +** This needs to occur when copying a TK_AGG_FUNCTION node from an +** outer query into an inner subquery. +** +** incrAggFunctionDepth(pExpr,n) is the main routine. incrAggDepth(..) +** is a helper function - a callback for the tree walker. +** +** See also the sqlite3WindowExtraAggFuncDepth() routine in window.c +*/ +static int incrAggDepth(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_AGG_FUNCTION ) pExpr->op2 += pWalker->u.n; + return WRC_Continue; +} +static void incrAggFunctionDepth(Expr *pExpr, int N){ + if( N>0 ){ + Walker w; + memset(&w, 0, sizeof(w)); + w.xExprCallback = incrAggDepth; + w.u.n = N; + sqlite3WalkExpr(&w, pExpr); + } +} + +/* +** Turn the pExpr expression into an alias for the iCol-th column of the +** result set in pEList. +** +** If the reference is followed by a COLLATE operator, then make sure +** the COLLATE operator is preserved. For example: +** +** SELECT a+b, c+d FROM t1 ORDER BY 1 COLLATE nocase; +** +** Should be transformed into: +** +** SELECT a+b, c+d FROM t1 ORDER BY (a+b) COLLATE nocase; +** +** The nSubquery parameter specifies how many levels of subquery the +** alias is removed from the original expression. The usual value is +** zero but it might be more if the alias is contained within a subquery +** of the original expression. The Expr.op2 field of TK_AGG_FUNCTION +** structures must be increased by the nSubquery amount. +*/ +static void resolveAlias( + Parse *pParse, /* Parsing context */ + ExprList *pEList, /* A result set */ + int iCol, /* A column in the result set. 0..pEList->nExpr-1 */ + Expr *pExpr, /* Transform this into an alias to the result set */ + int nSubquery /* Number of subqueries that the label is moving */ +){ + Expr *pOrig; /* The iCol-th column of the result set */ + Expr *pDup; /* Copy of pOrig */ + sqlite3 *db; /* The database connection */ + + assert( iCol>=0 && iColnExpr ); + pOrig = pEList->a[iCol].pExpr; + assert( pOrig!=0 ); + db = pParse->db; + pDup = sqlite3ExprDup(db, pOrig, 0); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pDup); + pDup = 0; + }else{ + incrAggFunctionDepth(pDup, nSubquery); + if( pExpr->op==TK_COLLATE ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + pDup = sqlite3ExprAddCollateString(pParse, pDup, pExpr->u.zToken); + } + + /* Before calling sqlite3ExprDelete(), set the EP_Static flag. This + ** prevents ExprDelete() from deleting the Expr structure itself, + ** allowing it to be repopulated by the memcpy() on the following line. + ** The pExpr->u.zToken might point into memory that will be freed by the + ** sqlite3DbFree(db, pDup) on the last line of this block, so be sure to + ** make a copy of the token before doing the sqlite3DbFree(). + */ + ExprSetProperty(pExpr, EP_Static); + sqlite3ExprDelete(db, pExpr); + memcpy(pExpr, pDup, sizeof(*pExpr)); + if( !ExprHasProperty(pExpr, EP_IntValue) && pExpr->u.zToken!=0 ){ + assert( (pExpr->flags & (EP_Reduced|EP_TokenOnly))==0 ); + pExpr->u.zToken = sqlite3DbStrDup(db, pExpr->u.zToken); + pExpr->flags |= EP_MemToken; + } + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + if( ALWAYS(pExpr->y.pWin!=0) ){ + pExpr->y.pWin->pOwner = pExpr; + } + } + sqlite3DbFree(db, pDup); + } +} + + +/* +** Return TRUE if the name zCol occurs anywhere in the USING clause. +** +** Return FALSE if the USING clause is NULL or if it does not contain +** zCol. +*/ +static int nameInUsingClause(IdList *pUsing, const char *zCol){ + if( pUsing ){ + int k; + for(k=0; knId; k++){ + if( sqlite3StrICmp(pUsing->a[k].zName, zCol)==0 ) return 1; + } + } + return 0; +} + +/* +** Subqueries stores the original database, table and column names for their +** result sets in ExprList.a[].zSpan, in the form "DATABASE.TABLE.COLUMN". +** Check to see if the zSpan given to this routine matches the zDb, zTab, +** and zCol. If any of zDb, zTab, and zCol are NULL then those fields will +** match anything. +*/ +SQLITE_PRIVATE int sqlite3MatchEName( + const struct ExprList_item *pItem, + const char *zCol, + const char *zTab, + const char *zDb +){ + int n; + const char *zSpan; + if( pItem->eEName!=ENAME_TAB ) return 0; + zSpan = pItem->zEName; + for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){} + if( zDb && (sqlite3StrNICmp(zSpan, zDb, n)!=0 || zDb[n]!=0) ){ + return 0; + } + zSpan += n+1; + for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){} + if( zTab && (sqlite3StrNICmp(zSpan, zTab, n)!=0 || zTab[n]!=0) ){ + return 0; + } + zSpan += n+1; + if( zCol && sqlite3StrICmp(zSpan, zCol)!=0 ){ + return 0; + } + return 1; +} + +/* +** Return TRUE if the double-quoted string mis-feature should be supported. +*/ +static int areDoubleQuotedStringsEnabled(sqlite3 *db, NameContext *pTopNC){ + if( db->init.busy ) return 1; /* Always support for legacy schemas */ + if( pTopNC->ncFlags & NC_IsDDL ){ + /* Currently parsing a DDL statement */ + if( sqlite3WritableSchema(db) && (db->flags & SQLITE_DqsDML)!=0 ){ + return 1; + } + return (db->flags & SQLITE_DqsDDL)!=0; + }else{ + /* Currently parsing a DML statement */ + return (db->flags & SQLITE_DqsDML)!=0; + } +} + +/* +** The argument is guaranteed to be a non-NULL Expr node of type TK_COLUMN. +** return the appropriate colUsed mask. +*/ +SQLITE_PRIVATE Bitmask sqlite3ExprColUsed(Expr *pExpr){ + int n; + Table *pExTab; + + n = pExpr->iColumn; + assert( ExprUseYTab(pExpr) ); + pExTab = pExpr->y.pTab; + assert( pExTab!=0 ); + if( (pExTab->tabFlags & TF_HasGenerated)!=0 + && (pExTab->aCol[n].colFlags & COLFLAG_GENERATED)!=0 + ){ + testcase( pExTab->nCol==BMS-1 ); + testcase( pExTab->nCol==BMS ); + return pExTab->nCol>=BMS ? ALLBITS : MASKBIT(pExTab->nCol)-1; + }else{ + testcase( n==BMS-1 ); + testcase( n==BMS ); + if( n>=BMS ) n = BMS-1; + return ((Bitmask)1)<iDb Set the index in db->aDb[] of the database X +** (even if X is implied). +** pExpr->iTable Set to the cursor number for the table obtained +** from pSrcList. +** pExpr->y.pTab Points to the Table structure of X.Y (even if +** X and/or Y are implied.) +** pExpr->iColumn Set to the column number within the table. +** pExpr->op Set to TK_COLUMN. +** pExpr->pLeft Any expression this points to is deleted +** pExpr->pRight Any expression this points to is deleted. +** +** The zDb variable is the name of the database (the "X"). This value may be +** NULL meaning that name is of the form Y.Z or Z. Any available database +** can be used. The zTable variable is the name of the table (the "Y"). This +** value can be NULL if zDb is also NULL. If zTable is NULL it +** means that the form of the name is Z and that columns from any table +** can be used. +** +** If the name cannot be resolved unambiguously, leave an error message +** in pParse and return WRC_Abort. Return WRC_Prune on success. +*/ +static int lookupName( + Parse *pParse, /* The parsing context */ + const char *zDb, /* Name of the database containing table, or NULL */ + const char *zTab, /* Name of table containing column, or NULL */ + const char *zCol, /* Name of the column. */ + NameContext *pNC, /* The name context used to resolve the name */ + Expr *pExpr /* Make this EXPR node point to the selected column */ +){ + int i, j; /* Loop counters */ + int cnt = 0; /* Number of matching column names */ + int cntTab = 0; /* Number of matching table names */ + int nSubquery = 0; /* How many levels of subquery */ + sqlite3 *db = pParse->db; /* The database connection */ + SrcItem *pItem; /* Use for looping over pSrcList items */ + SrcItem *pMatch = 0; /* The matching pSrcList item */ + NameContext *pTopNC = pNC; /* First namecontext in the list */ + Schema *pSchema = 0; /* Schema of the expression */ + int eNewExprOp = TK_COLUMN; /* New value for pExpr->op on success */ + Table *pTab = 0; /* Table hold the row */ + Column *pCol; /* A column of pTab */ + + assert( pNC ); /* the name context cannot be NULL. */ + assert( zCol ); /* The Z in X.Y.Z cannot be NULL */ + assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) ); + + /* Initialize the node to no-match */ + pExpr->iTable = -1; + ExprSetVVAProperty(pExpr, EP_NoReduce); + + /* Translate the schema name in zDb into a pointer to the corresponding + ** schema. If not found, pSchema will remain NULL and nothing will match + ** resulting in an appropriate error message toward the end of this routine + */ + if( zDb ){ + testcase( pNC->ncFlags & NC_PartIdx ); + testcase( pNC->ncFlags & NC_IsCheck ); + if( (pNC->ncFlags & (NC_PartIdx|NC_IsCheck))!=0 ){ + /* Silently ignore database qualifiers inside CHECK constraints and + ** partial indices. Do not raise errors because that might break + ** legacy and because it does not hurt anything to just ignore the + ** database name. */ + zDb = 0; + }else{ + for(i=0; inDb; i++){ + assert( db->aDb[i].zDbSName ); + if( sqlite3StrICmp(db->aDb[i].zDbSName,zDb)==0 ){ + pSchema = db->aDb[i].pSchema; + break; + } + } + if( i==db->nDb && sqlite3StrICmp("main", zDb)==0 ){ + /* This branch is taken when the main database has been renamed + ** using SQLITE_DBCONFIG_MAINDBNAME. */ + pSchema = db->aDb[0].pSchema; + zDb = db->aDb[0].zDbSName; + } + } + } + + /* Start at the inner-most context and move outward until a match is found */ + assert( pNC && cnt==0 ); + do{ + ExprList *pEList; + SrcList *pSrcList = pNC->pSrcList; + + if( pSrcList ){ + for(i=0, pItem=pSrcList->a; inSrc; i++, pItem++){ + u8 hCol; + pTab = pItem->pTab; + assert( pTab!=0 && pTab->zName!=0 ); + assert( pTab->nCol>0 || pParse->nErr ); + if( pItem->pSelect && (pItem->pSelect->selFlags & SF_NestedFrom)!=0 ){ + int hit = 0; + pEList = pItem->pSelect->pEList; + for(j=0; jnExpr; j++){ + if( sqlite3MatchEName(&pEList->a[j], zCol, zTab, zDb) ){ + cnt++; + cntTab = 2; + pMatch = pItem; + pExpr->iColumn = j; + hit = 1; + } + } + if( hit || zTab==0 ) continue; + } + if( zDb ){ + if( pTab->pSchema!=pSchema ) continue; + if( pSchema==0 && strcmp(zDb,"*")!=0 ) continue; + } + if( zTab ){ + const char *zTabName = pItem->zAlias ? pItem->zAlias : pTab->zName; + assert( zTabName!=0 ); + if( sqlite3StrICmp(zTabName, zTab)!=0 ){ + continue; + } + assert( ExprUseYTab(pExpr) ); + if( IN_RENAME_OBJECT && pItem->zAlias ){ + sqlite3RenameTokenRemap(pParse, 0, (void*)&pExpr->y.pTab); + } + } + hCol = sqlite3StrIHash(zCol); + for(j=0, pCol=pTab->aCol; jnCol; j++, pCol++){ + if( pCol->hName==hCol + && sqlite3StrICmp(pCol->zCnName, zCol)==0 + ){ + /* If there has been exactly one prior match and this match + ** is for the right-hand table of a NATURAL JOIN or is in a + ** USING clause, then skip this match. + */ + if( cnt==1 ){ + if( pItem->fg.jointype & JT_NATURAL ) continue; + if( nameInUsingClause(pItem->pUsing, zCol) ) continue; + } + cnt++; + pMatch = pItem; + /* Substitute the rowid (column -1) for the INTEGER PRIMARY KEY */ + pExpr->iColumn = j==pTab->iPKey ? -1 : (i16)j; + break; + } + } + if( 0==cnt && VisibleRowid(pTab) ){ + cntTab++; + pMatch = pItem; + } + } + if( pMatch ){ + pExpr->iTable = pMatch->iCursor; + assert( ExprUseYTab(pExpr) ); + pExpr->y.pTab = pMatch->pTab; + /* RIGHT JOIN not (yet) supported */ + assert( (pMatch->fg.jointype & JT_RIGHT)==0 ); + if( (pMatch->fg.jointype & JT_LEFT)!=0 ){ + ExprSetProperty(pExpr, EP_CanBeNull); + } + pSchema = pExpr->y.pTab->pSchema; + } + } /* if( pSrcList ) */ + +#if !defined(SQLITE_OMIT_TRIGGER) || !defined(SQLITE_OMIT_UPSERT) + /* If we have not already resolved the name, then maybe + ** it is a new.* or old.* trigger argument reference. Or + ** maybe it is an excluded.* from an upsert. Or maybe it is + ** a reference in the RETURNING clause to a table being modified. + */ + if( cnt==0 && zDb==0 ){ + pTab = 0; +#ifndef SQLITE_OMIT_TRIGGER + if( pParse->pTriggerTab!=0 ){ + int op = pParse->eTriggerOp; + assert( op==TK_DELETE || op==TK_UPDATE || op==TK_INSERT ); + if( pParse->bReturning ){ + if( (pNC->ncFlags & NC_UBaseReg)!=0 + && (zTab==0 || sqlite3StrICmp(zTab,pParse->pTriggerTab->zName)==0) + ){ + pExpr->iTable = op!=TK_DELETE; + pTab = pParse->pTriggerTab; + } + }else if( op!=TK_DELETE && zTab && sqlite3StrICmp("new",zTab) == 0 ){ + pExpr->iTable = 1; + pTab = pParse->pTriggerTab; + }else if( op!=TK_INSERT && zTab && sqlite3StrICmp("old",zTab)==0 ){ + pExpr->iTable = 0; + pTab = pParse->pTriggerTab; + } + } +#endif /* SQLITE_OMIT_TRIGGER */ +#ifndef SQLITE_OMIT_UPSERT + if( (pNC->ncFlags & NC_UUpsert)!=0 && zTab!=0 ){ + Upsert *pUpsert = pNC->uNC.pUpsert; + if( pUpsert && sqlite3StrICmp("excluded",zTab)==0 ){ + pTab = pUpsert->pUpsertSrc->a[0].pTab; + pExpr->iTable = EXCLUDED_TABLE_NUMBER; + } + } +#endif /* SQLITE_OMIT_UPSERT */ + + if( pTab ){ + int iCol; + u8 hCol = sqlite3StrIHash(zCol); + pSchema = pTab->pSchema; + cntTab++; + for(iCol=0, pCol=pTab->aCol; iColnCol; iCol++, pCol++){ + if( pCol->hName==hCol + && sqlite3StrICmp(pCol->zCnName, zCol)==0 + ){ + if( iCol==pTab->iPKey ){ + iCol = -1; + } + break; + } + } + if( iCol>=pTab->nCol && sqlite3IsRowid(zCol) && VisibleRowid(pTab) ){ + /* IMP: R-51414-32910 */ + iCol = -1; + } + if( iColnCol ){ + cnt++; + pMatch = 0; +#ifndef SQLITE_OMIT_UPSERT + if( pExpr->iTable==EXCLUDED_TABLE_NUMBER ){ + testcase( iCol==(-1) ); + assert( ExprUseYTab(pExpr) ); + if( IN_RENAME_OBJECT ){ + pExpr->iColumn = iCol; + pExpr->y.pTab = pTab; + eNewExprOp = TK_COLUMN; + }else{ + pExpr->iTable = pNC->uNC.pUpsert->regData + + sqlite3TableColumnToStorage(pTab, iCol); + eNewExprOp = TK_REGISTER; + } + }else +#endif /* SQLITE_OMIT_UPSERT */ + { + assert( ExprUseYTab(pExpr) ); + pExpr->y.pTab = pTab; + if( pParse->bReturning ){ + eNewExprOp = TK_REGISTER; + pExpr->op2 = TK_COLUMN; + pExpr->iTable = pNC->uNC.iBaseReg + (pTab->nCol+1)*pExpr->iTable + + sqlite3TableColumnToStorage(pTab, iCol) + 1; + }else{ + pExpr->iColumn = (i16)iCol; + eNewExprOp = TK_TRIGGER; +#ifndef SQLITE_OMIT_TRIGGER + if( iCol<0 ){ + pExpr->affExpr = SQLITE_AFF_INTEGER; + }else if( pExpr->iTable==0 ){ + testcase( iCol==31 ); + testcase( iCol==32 ); + pParse->oldmask |= (iCol>=32 ? 0xffffffff : (((u32)1)<newmask |= (iCol>=32 ? 0xffffffff : (((u32)1)<ncFlags & (NC_IdxExpr|NC_GenCol))==0 + && sqlite3IsRowid(zCol) + && ALWAYS(VisibleRowid(pMatch->pTab)) + ){ + cnt = 1; + pExpr->iColumn = -1; + pExpr->affExpr = SQLITE_AFF_INTEGER; + } + + /* + ** If the input is of the form Z (not Y.Z or X.Y.Z) then the name Z + ** might refer to an result-set alias. This happens, for example, when + ** we are resolving names in the WHERE clause of the following command: + ** + ** SELECT a+b AS x FROM table WHERE x<10; + ** + ** In cases like this, replace pExpr with a copy of the expression that + ** forms the result set entry ("a+b" in the example) and return immediately. + ** Note that the expression in the result set should have already been + ** resolved by the time the WHERE clause is resolved. + ** + ** The ability to use an output result-set column in the WHERE, GROUP BY, + ** or HAVING clauses, or as part of a larger expression in the ORDER BY + ** clause is not standard SQL. This is a (goofy) SQLite extension, that + ** is supported for backwards compatibility only. Hence, we issue a warning + ** on sqlite3_log() whenever the capability is used. + */ + if( cnt==0 + && (pNC->ncFlags & NC_UEList)!=0 + && zTab==0 + ){ + pEList = pNC->uNC.pEList; + assert( pEList!=0 ); + for(j=0; jnExpr; j++){ + char *zAs = pEList->a[j].zEName; + if( pEList->a[j].eEName==ENAME_NAME + && sqlite3_stricmp(zAs, zCol)==0 + ){ + Expr *pOrig; + assert( pExpr->pLeft==0 && pExpr->pRight==0 ); + assert( ExprUseXList(pExpr)==0 || pExpr->x.pList==0 ); + assert( ExprUseXSelect(pExpr)==0 || pExpr->x.pSelect==0 ); + pOrig = pEList->a[j].pExpr; + if( (pNC->ncFlags&NC_AllowAgg)==0 && ExprHasProperty(pOrig, EP_Agg) ){ + sqlite3ErrorMsg(pParse, "misuse of aliased aggregate %s", zAs); + return WRC_Abort; + } + if( ExprHasProperty(pOrig, EP_Win) + && ((pNC->ncFlags&NC_AllowWin)==0 || pNC!=pTopNC ) + ){ + sqlite3ErrorMsg(pParse, "misuse of aliased window function %s",zAs); + return WRC_Abort; + } + if( sqlite3ExprVectorSize(pOrig)!=1 ){ + sqlite3ErrorMsg(pParse, "row value misused"); + return WRC_Abort; + } + resolveAlias(pParse, pEList, j, pExpr, nSubquery); + cnt = 1; + pMatch = 0; + assert( zTab==0 && zDb==0 ); + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, 0, (void*)pExpr); + } + goto lookupname_end; + } + } + } + + /* Advance to the next name context. The loop will exit when either + ** we have a match (cnt>0) or when we run out of name contexts. + */ + if( cnt ) break; + pNC = pNC->pNext; + nSubquery++; + }while( pNC ); + + + /* + ** If X and Y are NULL (in other words if only the column name Z is + ** supplied) and the value of Z is enclosed in double-quotes, then + ** Z is a string literal if it doesn't match any column names. In that + ** case, we need to return right away and not make any changes to + ** pExpr. + ** + ** Because no reference was made to outer contexts, the pNC->nRef + ** fields are not changed in any context. + */ + if( cnt==0 && zTab==0 ){ + assert( pExpr->op==TK_ID ); + if( ExprHasProperty(pExpr,EP_DblQuoted) + && areDoubleQuotedStringsEnabled(db, pTopNC) + ){ + /* If a double-quoted identifier does not match any known column name, + ** then treat it as a string. + ** + ** This hack was added in the early days of SQLite in a misguided attempt + ** to be compatible with MySQL 3.x, which used double-quotes for strings. + ** I now sorely regret putting in this hack. The effect of this hack is + ** that misspelled identifier names are silently converted into strings + ** rather than causing an error, to the frustration of countless + ** programmers. To all those frustrated programmers, my apologies. + ** + ** Someday, I hope to get rid of this hack. Unfortunately there is + ** a huge amount of legacy SQL that uses it. So for now, we just + ** issue a warning. + */ + sqlite3_log(SQLITE_WARNING, + "double-quoted string literal: \"%w\"", zCol); +#ifdef SQLITE_ENABLE_NORMALIZE + sqlite3VdbeAddDblquoteStr(db, pParse->pVdbe, zCol); +#endif + pExpr->op = TK_STRING; + memset(&pExpr->y, 0, sizeof(pExpr->y)); + return WRC_Prune; + } + if( sqlite3ExprIdToTrueFalse(pExpr) ){ + return WRC_Prune; + } + } + + /* + ** cnt==0 means there was not match. cnt>1 means there were two or + ** more matches. Either way, we have an error. + */ + if( cnt!=1 ){ + const char *zErr; + zErr = cnt==0 ? "no such column" : "ambiguous column name"; + if( zDb ){ + sqlite3ErrorMsg(pParse, "%s: %s.%s.%s", zErr, zDb, zTab, zCol); + }else if( zTab ){ + sqlite3ErrorMsg(pParse, "%s: %s.%s", zErr, zTab, zCol); + }else{ + sqlite3ErrorMsg(pParse, "%s: %s", zErr, zCol); + } + sqlite3RecordErrorOffsetOfExpr(pParse->db, pExpr); + pParse->checkSchema = 1; + pTopNC->nNcErr++; + } + + /* If a column from a table in pSrcList is referenced, then record + ** this fact in the pSrcList.a[].colUsed bitmask. Column 0 causes + ** bit 0 to be set. Column 1 sets bit 1. And so forth. Bit 63 is + ** set if the 63rd or any subsequent column is used. + ** + ** The colUsed mask is an optimization used to help determine if an + ** index is a covering index. The correct answer is still obtained + ** if the mask contains extra set bits. However, it is important to + ** avoid setting bits beyond the maximum column number of the table. + ** (See ticket [b92e5e8ec2cdbaa1]). + ** + ** If a generated column is referenced, set bits for every column + ** of the table. + */ + if( pExpr->iColumn>=0 && pMatch!=0 ){ + pMatch->colUsed |= sqlite3ExprColUsed(pExpr); + } + + /* Clean up and return + */ + if( !ExprHasProperty(pExpr,(EP_TokenOnly|EP_Leaf)) ){ + sqlite3ExprDelete(db, pExpr->pLeft); + pExpr->pLeft = 0; + sqlite3ExprDelete(db, pExpr->pRight); + pExpr->pRight = 0; + } + pExpr->op = eNewExprOp; + ExprSetProperty(pExpr, EP_Leaf); +lookupname_end: + if( cnt==1 ){ + assert( pNC!=0 ); +#ifndef SQLITE_OMIT_AUTHORIZATION + if( pParse->db->xAuth + && (pExpr->op==TK_COLUMN || pExpr->op==TK_TRIGGER) + ){ + sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList); + } +#endif + /* Increment the nRef value on all name contexts from TopNC up to + ** the point where the name matched. */ + for(;;){ + assert( pTopNC!=0 ); + pTopNC->nRef++; + if( pTopNC==pNC ) break; + pTopNC = pTopNC->pNext; + } + return WRC_Prune; + } else { + return WRC_Abort; + } +} + +/* +** Allocate and return a pointer to an expression to load the column iCol +** from datasource iSrc in SrcList pSrc. +*/ +SQLITE_PRIVATE Expr *sqlite3CreateColumnExpr(sqlite3 *db, SrcList *pSrc, int iSrc, int iCol){ + Expr *p = sqlite3ExprAlloc(db, TK_COLUMN, 0, 0); + if( p ){ + SrcItem *pItem = &pSrc->a[iSrc]; + Table *pTab; + assert( ExprUseYTab(p) ); + pTab = p->y.pTab = pItem->pTab; + p->iTable = pItem->iCursor; + if( p->y.pTab->iPKey==iCol ){ + p->iColumn = -1; + }else{ + p->iColumn = (ynVar)iCol; + if( (pTab->tabFlags & TF_HasGenerated)!=0 + && (pTab->aCol[iCol].colFlags & COLFLAG_GENERATED)!=0 + ){ + testcase( pTab->nCol==63 ); + testcase( pTab->nCol==64 ); + pItem->colUsed = pTab->nCol>=64 ? ALLBITS : MASKBIT(pTab->nCol)-1; + }else{ + testcase( iCol==BMS ); + testcase( iCol==BMS-1 ); + pItem->colUsed |= ((Bitmask)1)<<(iCol>=BMS ? BMS-1 : iCol); + } + } + } + return p; +} + +/* +** Report an error that an expression is not valid for some set of +** pNC->ncFlags values determined by validMask. +** +** static void notValid( +** Parse *pParse, // Leave error message here +** NameContext *pNC, // The name context +** const char *zMsg, // Type of error +** int validMask, // Set of contexts for which prohibited +** Expr *pExpr // Invalidate this expression on error +** ){...} +** +** As an optimization, since the conditional is almost always false +** (because errors are rare), the conditional is moved outside of the +** function call using a macro. +*/ +static void notValidImpl( + Parse *pParse, /* Leave error message here */ + NameContext *pNC, /* The name context */ + const char *zMsg, /* Type of error */ + Expr *pExpr, /* Invalidate this expression on error */ + Expr *pError /* Associate error with this expression */ +){ + const char *zIn = "partial index WHERE clauses"; + if( pNC->ncFlags & NC_IdxExpr ) zIn = "index expressions"; +#ifndef SQLITE_OMIT_CHECK + else if( pNC->ncFlags & NC_IsCheck ) zIn = "CHECK constraints"; +#endif +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + else if( pNC->ncFlags & NC_GenCol ) zIn = "generated columns"; +#endif + sqlite3ErrorMsg(pParse, "%s prohibited in %s", zMsg, zIn); + if( pExpr ) pExpr->op = TK_NULL; + sqlite3RecordErrorOffsetOfExpr(pParse->db, pError); +} +#define sqlite3ResolveNotValid(P,N,M,X,E,R) \ + assert( ((X)&~(NC_IsCheck|NC_PartIdx|NC_IdxExpr|NC_GenCol))==0 ); \ + if( ((N)->ncFlags & (X))!=0 ) notValidImpl(P,N,M,E,R); + +/* +** Expression p should encode a floating point value between 1.0 and 0.0. +** Return 1024 times this value. Or return -1 if p is not a floating point +** value between 1.0 and 0.0. +*/ +static int exprProbability(Expr *p){ + double r = -1.0; + if( p->op!=TK_FLOAT ) return -1; + assert( !ExprHasProperty(p, EP_IntValue) ); + sqlite3AtoF(p->u.zToken, &r, sqlite3Strlen30(p->u.zToken), SQLITE_UTF8); + assert( r>=0.0 ); + if( r>1.0 ) return -1; + return (int)(r*134217728.0); +} + +/* +** This routine is callback for sqlite3WalkExpr(). +** +** Resolve symbolic names into TK_COLUMN operators for the current +** node in the expression tree. Return 0 to continue the search down +** the tree or 2 to abort the tree walk. +** +** This routine also does error checking and name resolution for +** function names. The operator for aggregate functions is changed +** to TK_AGG_FUNCTION. +*/ +static int resolveExprStep(Walker *pWalker, Expr *pExpr){ + NameContext *pNC; + Parse *pParse; + + pNC = pWalker->u.pNC; + assert( pNC!=0 ); + pParse = pNC->pParse; + assert( pParse==pWalker->pParse ); + +#ifndef NDEBUG + if( pNC->pSrcList && pNC->pSrcList->nAlloc>0 ){ + SrcList *pSrcList = pNC->pSrcList; + int i; + for(i=0; ipSrcList->nSrc; i++){ + assert( pSrcList->a[i].iCursor>=0 && pSrcList->a[i].iCursornTab); + } + } +#endif + switch( pExpr->op ){ + + /* The special operator TK_ROW means use the rowid for the first + ** column in the FROM clause. This is used by the LIMIT and ORDER BY + ** clause processing on UPDATE and DELETE statements, and by + ** UPDATE ... FROM statement processing. + */ + case TK_ROW: { + SrcList *pSrcList = pNC->pSrcList; + SrcItem *pItem; + assert( pSrcList && pSrcList->nSrc>=1 ); + pItem = pSrcList->a; + pExpr->op = TK_COLUMN; + assert( ExprUseYTab(pExpr) ); + pExpr->y.pTab = pItem->pTab; + pExpr->iTable = pItem->iCursor; + pExpr->iColumn--; + pExpr->affExpr = SQLITE_AFF_INTEGER; + break; + } + + /* An optimization: Attempt to convert + ** + ** "expr IS NOT NULL" --> "TRUE" + ** "expr IS NULL" --> "FALSE" + ** + ** if we can prove that "expr" is never NULL. Call this the + ** "NOT NULL strength reduction optimization". + ** + ** If this optimization occurs, also restore the NameContext ref-counts + ** to the state they where in before the "column" LHS expression was + ** resolved. This prevents "column" from being counted as having been + ** referenced, which might prevent a SELECT from being erroneously + ** marked as correlated. + */ + case TK_NOTNULL: + case TK_ISNULL: { + int anRef[8]; + NameContext *p; + int i; + for(i=0, p=pNC; p && ipNext, i++){ + anRef[i] = p->nRef; + } + sqlite3WalkExpr(pWalker, pExpr->pLeft); + if( 0==sqlite3ExprCanBeNull(pExpr->pLeft) && !IN_RENAME_OBJECT ){ + testcase( ExprHasProperty(pExpr, EP_FromJoin) ); + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + if( pExpr->op==TK_NOTNULL ){ + pExpr->u.zToken = "true"; + ExprSetProperty(pExpr, EP_IsTrue); + }else{ + pExpr->u.zToken = "false"; + ExprSetProperty(pExpr, EP_IsFalse); + } + pExpr->op = TK_TRUEFALSE; + for(i=0, p=pNC; p && ipNext, i++){ + p->nRef = anRef[i]; + } + sqlite3ExprDelete(pParse->db, pExpr->pLeft); + pExpr->pLeft = 0; + } + return WRC_Prune; + } + + /* A column name: ID + ** Or table name and column name: ID.ID + ** Or a database, table and column: ID.ID.ID + ** + ** The TK_ID and TK_OUT cases are combined so that there will only + ** be one call to lookupName(). Then the compiler will in-line + ** lookupName() for a size reduction and performance increase. + */ + case TK_ID: + case TK_DOT: { + const char *zColumn; + const char *zTable; + const char *zDb; + Expr *pRight; + + if( pExpr->op==TK_ID ){ + zDb = 0; + zTable = 0; + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + zColumn = pExpr->u.zToken; + }else{ + Expr *pLeft = pExpr->pLeft; + testcase( pNC->ncFlags & NC_IdxExpr ); + testcase( pNC->ncFlags & NC_GenCol ); + sqlite3ResolveNotValid(pParse, pNC, "the \".\" operator", + NC_IdxExpr|NC_GenCol, 0, pExpr); + pRight = pExpr->pRight; + if( pRight->op==TK_ID ){ + zDb = 0; + }else{ + assert( pRight->op==TK_DOT ); + assert( !ExprHasProperty(pRight, EP_IntValue) ); + zDb = pLeft->u.zToken; + pLeft = pRight->pLeft; + pRight = pRight->pRight; + } + assert( ExprUseUToken(pLeft) && ExprUseUToken(pRight) ); + zTable = pLeft->u.zToken; + zColumn = pRight->u.zToken; + assert( ExprUseYTab(pExpr) ); + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, (void*)pExpr, (void*)pRight); + sqlite3RenameTokenRemap(pParse, (void*)&pExpr->y.pTab, (void*)pLeft); + } + } + return lookupName(pParse, zDb, zTable, zColumn, pNC, pExpr); + } + + /* Resolve function names + */ + case TK_FUNCTION: { + ExprList *pList = pExpr->x.pList; /* The argument list */ + int n = pList ? pList->nExpr : 0; /* Number of arguments */ + int no_such_func = 0; /* True if no such function exists */ + int wrong_num_args = 0; /* True if wrong number of arguments */ + int is_agg = 0; /* True if is an aggregate function */ + const char *zId; /* The function name. */ + FuncDef *pDef; /* Information about the function */ + u8 enc = ENC(pParse->db); /* The database encoding */ + int savedAllowFlags = (pNC->ncFlags & (NC_AllowAgg | NC_AllowWin)); +#ifndef SQLITE_OMIT_WINDOWFUNC + Window *pWin = (IsWindowFunc(pExpr) ? pExpr->y.pWin : 0); +#endif + assert( !ExprHasProperty(pExpr, EP_xIsSelect|EP_IntValue) ); + zId = pExpr->u.zToken; + pDef = sqlite3FindFunction(pParse->db, zId, n, enc, 0); + if( pDef==0 ){ + pDef = sqlite3FindFunction(pParse->db, zId, -2, enc, 0); + if( pDef==0 ){ + no_such_func = 1; + }else{ + wrong_num_args = 1; + } + }else{ + is_agg = pDef->xFinalize!=0; + if( pDef->funcFlags & SQLITE_FUNC_UNLIKELY ){ + ExprSetProperty(pExpr, EP_Unlikely); + if( n==2 ){ + pExpr->iTable = exprProbability(pList->a[1].pExpr); + if( pExpr->iTable<0 ){ + sqlite3ErrorMsg(pParse, + "second argument to %#T() must be a " + "constant between 0.0 and 1.0", pExpr); + pNC->nNcErr++; + } + }else{ + /* EVIDENCE-OF: R-61304-29449 The unlikely(X) function is + ** equivalent to likelihood(X, 0.0625). + ** EVIDENCE-OF: R-01283-11636 The unlikely(X) function is + ** short-hand for likelihood(X,0.0625). + ** EVIDENCE-OF: R-36850-34127 The likely(X) function is short-hand + ** for likelihood(X,0.9375). + ** EVIDENCE-OF: R-53436-40973 The likely(X) function is equivalent + ** to likelihood(X,0.9375). */ + /* TUNING: unlikely() probability is 0.0625. likely() is 0.9375 */ + pExpr->iTable = pDef->zName[0]=='u' ? 8388608 : 125829120; + } + } +#ifndef SQLITE_OMIT_AUTHORIZATION + { + int auth = sqlite3AuthCheck(pParse, SQLITE_FUNCTION, 0,pDef->zName,0); + if( auth!=SQLITE_OK ){ + if( auth==SQLITE_DENY ){ + sqlite3ErrorMsg(pParse, "not authorized to use function: %#T", + pExpr); + pNC->nNcErr++; + } + pExpr->op = TK_NULL; + return WRC_Prune; + } + } +#endif + if( pDef->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG) ){ + /* For the purposes of the EP_ConstFunc flag, date and time + ** functions and other functions that change slowly are considered + ** constant because they are constant for the duration of one query. + ** This allows them to be factored out of inner loops. */ + ExprSetProperty(pExpr,EP_ConstFunc); + } + if( (pDef->funcFlags & SQLITE_FUNC_CONSTANT)==0 ){ + /* Clearly non-deterministic functions like random(), but also + ** date/time functions that use 'now', and other functions like + ** sqlite_version() that might change over time cannot be used + ** in an index or generated column. Curiously, they can be used + ** in a CHECK constraint. SQLServer, MySQL, and PostgreSQL all + ** all this. */ + sqlite3ResolveNotValid(pParse, pNC, "non-deterministic functions", + NC_IdxExpr|NC_PartIdx|NC_GenCol, 0, pExpr); + }else{ + assert( (NC_SelfRef & 0xff)==NC_SelfRef ); /* Must fit in 8 bits */ + pExpr->op2 = pNC->ncFlags & NC_SelfRef; + if( pNC->ncFlags & NC_FromDDL ) ExprSetProperty(pExpr, EP_FromDDL); + } + if( (pDef->funcFlags & SQLITE_FUNC_INTERNAL)!=0 + && pParse->nested==0 + && (pParse->db->mDbFlags & DBFLAG_InternalFunc)==0 + ){ + /* Internal-use-only functions are disallowed unless the + ** SQL is being compiled using sqlite3NestedParse() or + ** the SQLITE_TESTCTRL_INTERNAL_FUNCTIONS test-control has be + ** used to activate internal functions for testing purposes */ + no_such_func = 1; + pDef = 0; + }else + if( (pDef->funcFlags & (SQLITE_FUNC_DIRECT|SQLITE_FUNC_UNSAFE))!=0 + && !IN_RENAME_OBJECT + ){ + sqlite3ExprFunctionUsable(pParse, pExpr, pDef); + } + } + + if( 0==IN_RENAME_OBJECT ){ +#ifndef SQLITE_OMIT_WINDOWFUNC + assert( is_agg==0 || (pDef->funcFlags & SQLITE_FUNC_MINMAX) + || (pDef->xValue==0 && pDef->xInverse==0) + || (pDef->xValue && pDef->xInverse && pDef->xSFunc && pDef->xFinalize) + ); + if( pDef && pDef->xValue==0 && pWin ){ + sqlite3ErrorMsg(pParse, + "%#T() may not be used as a window function", pExpr + ); + pNC->nNcErr++; + }else if( + (is_agg && (pNC->ncFlags & NC_AllowAgg)==0) + || (is_agg && (pDef->funcFlags&SQLITE_FUNC_WINDOW) && !pWin) + || (is_agg && pWin && (pNC->ncFlags & NC_AllowWin)==0) + ){ + const char *zType; + if( (pDef->funcFlags & SQLITE_FUNC_WINDOW) || pWin ){ + zType = "window"; + }else{ + zType = "aggregate"; + } + sqlite3ErrorMsg(pParse, "misuse of %s function %#T()",zType,pExpr); + pNC->nNcErr++; + is_agg = 0; + } +#else + if( (is_agg && (pNC->ncFlags & NC_AllowAgg)==0) ){ + sqlite3ErrorMsg(pParse,"misuse of aggregate function %#T()",pExpr); + pNC->nNcErr++; + is_agg = 0; + } +#endif + else if( no_such_func && pParse->db->init.busy==0 +#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION + && pParse->explain==0 +#endif + ){ + sqlite3ErrorMsg(pParse, "no such function: %#T", pExpr); + pNC->nNcErr++; + }else if( wrong_num_args ){ + sqlite3ErrorMsg(pParse,"wrong number of arguments to function %#T()", + pExpr); + pNC->nNcErr++; + } +#ifndef SQLITE_OMIT_WINDOWFUNC + else if( is_agg==0 && ExprHasProperty(pExpr, EP_WinFunc) ){ + sqlite3ErrorMsg(pParse, + "FILTER may not be used with non-aggregate %#T()", + pExpr + ); + pNC->nNcErr++; + } +#endif + if( is_agg ){ + /* Window functions may not be arguments of aggregate functions. + ** Or arguments of other window functions. But aggregate functions + ** may be arguments for window functions. */ +#ifndef SQLITE_OMIT_WINDOWFUNC + pNC->ncFlags &= ~(NC_AllowWin | (!pWin ? NC_AllowAgg : 0)); +#else + pNC->ncFlags &= ~NC_AllowAgg; +#endif + } + } +#ifndef SQLITE_OMIT_WINDOWFUNC + else if( ExprHasProperty(pExpr, EP_WinFunc) ){ + is_agg = 1; + } +#endif + sqlite3WalkExprList(pWalker, pList); + if( is_agg ){ +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pWin ){ + Select *pSel = pNC->pWinSelect; + assert( pWin==0 || (ExprUseYWin(pExpr) && pWin==pExpr->y.pWin) ); + if( IN_RENAME_OBJECT==0 ){ + sqlite3WindowUpdate(pParse, pSel ? pSel->pWinDefn : 0, pWin, pDef); + if( pParse->db->mallocFailed ) break; + } + sqlite3WalkExprList(pWalker, pWin->pPartition); + sqlite3WalkExprList(pWalker, pWin->pOrderBy); + sqlite3WalkExpr(pWalker, pWin->pFilter); + sqlite3WindowLink(pSel, pWin); + pNC->ncFlags |= NC_HasWin; + }else +#endif /* SQLITE_OMIT_WINDOWFUNC */ + { + NameContext *pNC2; /* For looping up thru outer contexts */ + pExpr->op = TK_AGG_FUNCTION; + pExpr->op2 = 0; +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + sqlite3WalkExpr(pWalker, pExpr->y.pWin->pFilter); + } +#endif + pNC2 = pNC; + while( pNC2 + && sqlite3ReferencesSrcList(pParse, pExpr, pNC2->pSrcList)==0 + ){ + pExpr->op2++; + pNC2 = pNC2->pNext; + } + assert( pDef!=0 || IN_RENAME_OBJECT ); + if( pNC2 && pDef ){ + assert( SQLITE_FUNC_MINMAX==NC_MinMaxAgg ); + assert( SQLITE_FUNC_ANYORDER==NC_OrderAgg ); + testcase( (pDef->funcFlags & SQLITE_FUNC_MINMAX)!=0 ); + testcase( (pDef->funcFlags & SQLITE_FUNC_ANYORDER)!=0 ); + pNC2->ncFlags |= NC_HasAgg + | ((pDef->funcFlags^SQLITE_FUNC_ANYORDER) + & (SQLITE_FUNC_MINMAX|SQLITE_FUNC_ANYORDER)); + } + } + pNC->ncFlags |= savedAllowFlags; + } + /* FIX ME: Compute pExpr->affinity based on the expected return + ** type of the function + */ + return WRC_Prune; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_SELECT: + case TK_EXISTS: testcase( pExpr->op==TK_EXISTS ); +#endif + case TK_IN: { + testcase( pExpr->op==TK_IN ); + if( ExprUseXSelect(pExpr) ){ + int nRef = pNC->nRef; + testcase( pNC->ncFlags & NC_IsCheck ); + testcase( pNC->ncFlags & NC_PartIdx ); + testcase( pNC->ncFlags & NC_IdxExpr ); + testcase( pNC->ncFlags & NC_GenCol ); + if( pNC->ncFlags & NC_SelfRef ){ + notValidImpl(pParse, pNC, "subqueries", pExpr, pExpr); + }else{ + sqlite3WalkSelect(pWalker, pExpr->x.pSelect); + } + assert( pNC->nRef>=nRef ); + if( nRef!=pNC->nRef ){ + ExprSetProperty(pExpr, EP_VarSelect); + pNC->ncFlags |= NC_VarSelect; + } + } + break; + } + case TK_VARIABLE: { + testcase( pNC->ncFlags & NC_IsCheck ); + testcase( pNC->ncFlags & NC_PartIdx ); + testcase( pNC->ncFlags & NC_IdxExpr ); + testcase( pNC->ncFlags & NC_GenCol ); + sqlite3ResolveNotValid(pParse, pNC, "parameters", + NC_IsCheck|NC_PartIdx|NC_IdxExpr|NC_GenCol, pExpr, pExpr); + break; + } + case TK_IS: + case TK_ISNOT: { + Expr *pRight = sqlite3ExprSkipCollateAndLikely(pExpr->pRight); + assert( !ExprHasProperty(pExpr, EP_Reduced) ); + /* Handle special cases of "x IS TRUE", "x IS FALSE", "x IS NOT TRUE", + ** and "x IS NOT FALSE". */ + if( ALWAYS(pRight) && (pRight->op==TK_ID || pRight->op==TK_TRUEFALSE) ){ + int rc = resolveExprStep(pWalker, pRight); + if( rc==WRC_Abort ) return WRC_Abort; + if( pRight->op==TK_TRUEFALSE ){ + pExpr->op2 = pExpr->op; + pExpr->op = TK_TRUTH; + return WRC_Continue; + } + } + /* no break */ deliberate_fall_through + } + case TK_BETWEEN: + case TK_EQ: + case TK_NE: + case TK_LT: + case TK_LE: + case TK_GT: + case TK_GE: { + int nLeft, nRight; + if( pParse->db->mallocFailed ) break; + assert( pExpr->pLeft!=0 ); + nLeft = sqlite3ExprVectorSize(pExpr->pLeft); + if( pExpr->op==TK_BETWEEN ){ + assert( ExprUseXList(pExpr) ); + nRight = sqlite3ExprVectorSize(pExpr->x.pList->a[0].pExpr); + if( nRight==nLeft ){ + nRight = sqlite3ExprVectorSize(pExpr->x.pList->a[1].pExpr); + } + }else{ + assert( pExpr->pRight!=0 ); + nRight = sqlite3ExprVectorSize(pExpr->pRight); + } + if( nLeft!=nRight ){ + testcase( pExpr->op==TK_EQ ); + testcase( pExpr->op==TK_NE ); + testcase( pExpr->op==TK_LT ); + testcase( pExpr->op==TK_LE ); + testcase( pExpr->op==TK_GT ); + testcase( pExpr->op==TK_GE ); + testcase( pExpr->op==TK_IS ); + testcase( pExpr->op==TK_ISNOT ); + testcase( pExpr->op==TK_BETWEEN ); + sqlite3ErrorMsg(pParse, "row value misused"); + sqlite3RecordErrorOffsetOfExpr(pParse->db, pExpr); + } + break; + } + } + assert( pParse->db->mallocFailed==0 || pParse->nErr!=0 ); + return pParse->nErr ? WRC_Abort : WRC_Continue; +} + +/* +** pEList is a list of expressions which are really the result set of the +** a SELECT statement. pE is a term in an ORDER BY or GROUP BY clause. +** This routine checks to see if pE is a simple identifier which corresponds +** to the AS-name of one of the terms of the expression list. If it is, +** this routine return an integer between 1 and N where N is the number of +** elements in pEList, corresponding to the matching entry. If there is +** no match, or if pE is not a simple identifier, then this routine +** return 0. +** +** pEList has been resolved. pE has not. +*/ +static int resolveAsName( + Parse *pParse, /* Parsing context for error messages */ + ExprList *pEList, /* List of expressions to scan */ + Expr *pE /* Expression we are trying to match */ +){ + int i; /* Loop counter */ + + UNUSED_PARAMETER(pParse); + + if( pE->op==TK_ID ){ + const char *zCol; + assert( !ExprHasProperty(pE, EP_IntValue) ); + zCol = pE->u.zToken; + for(i=0; inExpr; i++){ + if( pEList->a[i].eEName==ENAME_NAME + && sqlite3_stricmp(pEList->a[i].zEName, zCol)==0 + ){ + return i+1; + } + } + } + return 0; +} + +/* +** pE is a pointer to an expression which is a single term in the +** ORDER BY of a compound SELECT. The expression has not been +** name resolved. +** +** At the point this routine is called, we already know that the +** ORDER BY term is not an integer index into the result set. That +** case is handled by the calling routine. +** +** Attempt to match pE against result set columns in the left-most +** SELECT statement. Return the index i of the matching column, +** as an indication to the caller that it should sort by the i-th column. +** The left-most column is 1. In other words, the value returned is the +** same integer value that would be used in the SQL statement to indicate +** the column. +** +** If there is no match, return 0. Return -1 if an error occurs. +*/ +static int resolveOrderByTermToExprList( + Parse *pParse, /* Parsing context for error messages */ + Select *pSelect, /* The SELECT statement with the ORDER BY clause */ + Expr *pE /* The specific ORDER BY term */ +){ + int i; /* Loop counter */ + ExprList *pEList; /* The columns of the result set */ + NameContext nc; /* Name context for resolving pE */ + sqlite3 *db; /* Database connection */ + int rc; /* Return code from subprocedures */ + u8 savedSuppErr; /* Saved value of db->suppressErr */ + + assert( sqlite3ExprIsInteger(pE, &i)==0 ); + pEList = pSelect->pEList; + + /* Resolve all names in the ORDER BY term expression + */ + memset(&nc, 0, sizeof(nc)); + nc.pParse = pParse; + nc.pSrcList = pSelect->pSrc; + nc.uNC.pEList = pEList; + nc.ncFlags = NC_AllowAgg|NC_UEList|NC_NoSelect; + nc.nNcErr = 0; + db = pParse->db; + savedSuppErr = db->suppressErr; + db->suppressErr = 1; + rc = sqlite3ResolveExprNames(&nc, pE); + db->suppressErr = savedSuppErr; + if( rc ) return 0; + + /* Try to match the ORDER BY expression against an expression + ** in the result set. Return an 1-based index of the matching + ** result-set entry. + */ + for(i=0; inExpr; i++){ + if( sqlite3ExprCompare(0, pEList->a[i].pExpr, pE, -1)<2 ){ + return i+1; + } + } + + /* If no match, return 0. */ + return 0; +} + +/* +** Generate an ORDER BY or GROUP BY term out-of-range error. +*/ +static void resolveOutOfRangeError( + Parse *pParse, /* The error context into which to write the error */ + const char *zType, /* "ORDER" or "GROUP" */ + int i, /* The index (1-based) of the term out of range */ + int mx, /* Largest permissible value of i */ + Expr *pError /* Associate the error with the expression */ +){ + sqlite3ErrorMsg(pParse, + "%r %s BY term out of range - should be " + "between 1 and %d", i, zType, mx); + sqlite3RecordErrorOffsetOfExpr(pParse->db, pError); +} + +/* +** Analyze the ORDER BY clause in a compound SELECT statement. Modify +** each term of the ORDER BY clause is a constant integer between 1 +** and N where N is the number of columns in the compound SELECT. +** +** ORDER BY terms that are already an integer between 1 and N are +** unmodified. ORDER BY terms that are integers outside the range of +** 1 through N generate an error. ORDER BY terms that are expressions +** are matched against result set expressions of compound SELECT +** beginning with the left-most SELECT and working toward the right. +** At the first match, the ORDER BY expression is transformed into +** the integer column number. +** +** Return the number of errors seen. +*/ +static int resolveCompoundOrderBy( + Parse *pParse, /* Parsing context. Leave error messages here */ + Select *pSelect /* The SELECT statement containing the ORDER BY */ +){ + int i; + ExprList *pOrderBy; + ExprList *pEList; + sqlite3 *db; + int moreToDo = 1; + + pOrderBy = pSelect->pOrderBy; + if( pOrderBy==0 ) return 0; + db = pParse->db; + if( pOrderBy->nExpr>db->aLimit[SQLITE_LIMIT_COLUMN] ){ + sqlite3ErrorMsg(pParse, "too many terms in ORDER BY clause"); + return 1; + } + for(i=0; inExpr; i++){ + pOrderBy->a[i].done = 0; + } + pSelect->pNext = 0; + while( pSelect->pPrior ){ + pSelect->pPrior->pNext = pSelect; + pSelect = pSelect->pPrior; + } + while( pSelect && moreToDo ){ + struct ExprList_item *pItem; + moreToDo = 0; + pEList = pSelect->pEList; + assert( pEList!=0 ); + for(i=0, pItem=pOrderBy->a; inExpr; i++, pItem++){ + int iCol = -1; + Expr *pE, *pDup; + if( pItem->done ) continue; + pE = sqlite3ExprSkipCollateAndLikely(pItem->pExpr); + if( NEVER(pE==0) ) continue; + if( sqlite3ExprIsInteger(pE, &iCol) ){ + if( iCol<=0 || iCol>pEList->nExpr ){ + resolveOutOfRangeError(pParse, "ORDER", i+1, pEList->nExpr, pE); + return 1; + } + }else{ + iCol = resolveAsName(pParse, pEList, pE); + if( iCol==0 ){ + /* Now test if expression pE matches one of the values returned + ** by pSelect. In the usual case this is done by duplicating the + ** expression, resolving any symbols in it, and then comparing + ** it against each expression returned by the SELECT statement. + ** Once the comparisons are finished, the duplicate expression + ** is deleted. + ** + ** If this is running as part of an ALTER TABLE operation and + ** the symbols resolve successfully, also resolve the symbols in the + ** actual expression. This allows the code in alter.c to modify + ** column references within the ORDER BY expression as required. */ + pDup = sqlite3ExprDup(db, pE, 0); + if( !db->mallocFailed ){ + assert(pDup); + iCol = resolveOrderByTermToExprList(pParse, pSelect, pDup); + if( IN_RENAME_OBJECT && iCol>0 ){ + resolveOrderByTermToExprList(pParse, pSelect, pE); + } + } + sqlite3ExprDelete(db, pDup); + } + } + if( iCol>0 ){ + /* Convert the ORDER BY term into an integer column number iCol, + ** taking care to preserve the COLLATE clause if it exists. */ + if( !IN_RENAME_OBJECT ){ + Expr *pNew = sqlite3Expr(db, TK_INTEGER, 0); + if( pNew==0 ) return 1; + pNew->flags |= EP_IntValue; + pNew->u.iValue = iCol; + if( pItem->pExpr==pE ){ + pItem->pExpr = pNew; + }else{ + Expr *pParent = pItem->pExpr; + assert( pParent->op==TK_COLLATE ); + while( pParent->pLeft->op==TK_COLLATE ) pParent = pParent->pLeft; + assert( pParent->pLeft==pE ); + pParent->pLeft = pNew; + } + sqlite3ExprDelete(db, pE); + pItem->u.x.iOrderByCol = (u16)iCol; + } + pItem->done = 1; + }else{ + moreToDo = 1; + } + } + pSelect = pSelect->pNext; + } + for(i=0; inExpr; i++){ + if( pOrderBy->a[i].done==0 ){ + sqlite3ErrorMsg(pParse, "%r ORDER BY term does not match any " + "column in the result set", i+1); + return 1; + } + } + return 0; +} + +/* +** Check every term in the ORDER BY or GROUP BY clause pOrderBy of +** the SELECT statement pSelect. If any term is reference to a +** result set expression (as determined by the ExprList.a.u.x.iOrderByCol +** field) then convert that term into a copy of the corresponding result set +** column. +** +** If any errors are detected, add an error message to pParse and +** return non-zero. Return zero if no errors are seen. +*/ +SQLITE_PRIVATE int sqlite3ResolveOrderGroupBy( + Parse *pParse, /* Parsing context. Leave error messages here */ + Select *pSelect, /* The SELECT statement containing the clause */ + ExprList *pOrderBy, /* The ORDER BY or GROUP BY clause to be processed */ + const char *zType /* "ORDER" or "GROUP" */ +){ + int i; + sqlite3 *db = pParse->db; + ExprList *pEList; + struct ExprList_item *pItem; + + if( pOrderBy==0 || pParse->db->mallocFailed || IN_RENAME_OBJECT ) return 0; + if( pOrderBy->nExpr>db->aLimit[SQLITE_LIMIT_COLUMN] ){ + sqlite3ErrorMsg(pParse, "too many terms in %s BY clause", zType); + return 1; + } + pEList = pSelect->pEList; + assert( pEList!=0 ); /* sqlite3SelectNew() guarantees this */ + for(i=0, pItem=pOrderBy->a; inExpr; i++, pItem++){ + if( pItem->u.x.iOrderByCol ){ + if( pItem->u.x.iOrderByCol>pEList->nExpr ){ + resolveOutOfRangeError(pParse, zType, i+1, pEList->nExpr, 0); + return 1; + } + resolveAlias(pParse, pEList, pItem->u.x.iOrderByCol-1, pItem->pExpr,0); + } + } + return 0; +} + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** Walker callback for windowRemoveExprFromSelect(). +*/ +static int resolveRemoveWindowsCb(Walker *pWalker, Expr *pExpr){ + UNUSED_PARAMETER(pWalker); + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + Window *pWin = pExpr->y.pWin; + sqlite3WindowUnlinkFromSelect(pWin); + } + return WRC_Continue; +} + +/* +** Remove any Window objects owned by the expression pExpr from the +** Select.pWin list of Select object pSelect. +*/ +static void windowRemoveExprFromSelect(Select *pSelect, Expr *pExpr){ + if( pSelect->pWin ){ + Walker sWalker; + memset(&sWalker, 0, sizeof(Walker)); + sWalker.xExprCallback = resolveRemoveWindowsCb; + sWalker.u.pSelect = pSelect; + sqlite3WalkExpr(&sWalker, pExpr); + } +} +#else +# define windowRemoveExprFromSelect(a, b) +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/* +** pOrderBy is an ORDER BY or GROUP BY clause in SELECT statement pSelect. +** The Name context of the SELECT statement is pNC. zType is either +** "ORDER" or "GROUP" depending on which type of clause pOrderBy is. +** +** This routine resolves each term of the clause into an expression. +** If the order-by term is an integer I between 1 and N (where N is the +** number of columns in the result set of the SELECT) then the expression +** in the resolution is a copy of the I-th result-set expression. If +** the order-by term is an identifier that corresponds to the AS-name of +** a result-set expression, then the term resolves to a copy of the +** result-set expression. Otherwise, the expression is resolved in +** the usual way - using sqlite3ResolveExprNames(). +** +** This routine returns the number of errors. If errors occur, then +** an appropriate error message might be left in pParse. (OOM errors +** excepted.) +*/ +static int resolveOrderGroupBy( + NameContext *pNC, /* The name context of the SELECT statement */ + Select *pSelect, /* The SELECT statement holding pOrderBy */ + ExprList *pOrderBy, /* An ORDER BY or GROUP BY clause to resolve */ + const char *zType /* Either "ORDER" or "GROUP", as appropriate */ +){ + int i, j; /* Loop counters */ + int iCol; /* Column number */ + struct ExprList_item *pItem; /* A term of the ORDER BY clause */ + Parse *pParse; /* Parsing context */ + int nResult; /* Number of terms in the result set */ + + assert( pOrderBy!=0 ); + nResult = pSelect->pEList->nExpr; + pParse = pNC->pParse; + for(i=0, pItem=pOrderBy->a; inExpr; i++, pItem++){ + Expr *pE = pItem->pExpr; + Expr *pE2 = sqlite3ExprSkipCollateAndLikely(pE); + if( NEVER(pE2==0) ) continue; + if( zType[0]!='G' ){ + iCol = resolveAsName(pParse, pSelect->pEList, pE2); + if( iCol>0 ){ + /* If an AS-name match is found, mark this ORDER BY column as being + ** a copy of the iCol-th result-set column. The subsequent call to + ** sqlite3ResolveOrderGroupBy() will convert the expression to a + ** copy of the iCol-th result-set expression. */ + pItem->u.x.iOrderByCol = (u16)iCol; + continue; + } + } + if( sqlite3ExprIsInteger(pE2, &iCol) ){ + /* The ORDER BY term is an integer constant. Again, set the column + ** number so that sqlite3ResolveOrderGroupBy() will convert the + ** order-by term to a copy of the result-set expression */ + if( iCol<1 || iCol>0xffff ){ + resolveOutOfRangeError(pParse, zType, i+1, nResult, pE2); + return 1; + } + pItem->u.x.iOrderByCol = (u16)iCol; + continue; + } + + /* Otherwise, treat the ORDER BY term as an ordinary expression */ + pItem->u.x.iOrderByCol = 0; + if( sqlite3ResolveExprNames(pNC, pE) ){ + return 1; + } + for(j=0; jpEList->nExpr; j++){ + if( sqlite3ExprCompare(0, pE, pSelect->pEList->a[j].pExpr, -1)==0 ){ + /* Since this expresion is being changed into a reference + ** to an identical expression in the result set, remove all Window + ** objects belonging to the expression from the Select.pWin list. */ + windowRemoveExprFromSelect(pSelect, pE); + pItem->u.x.iOrderByCol = j+1; + } + } + } + return sqlite3ResolveOrderGroupBy(pParse, pSelect, pOrderBy, zType); +} + +/* +** Resolve names in the SELECT statement p and all of its descendants. +*/ +static int resolveSelectStep(Walker *pWalker, Select *p){ + NameContext *pOuterNC; /* Context that contains this SELECT */ + NameContext sNC; /* Name context of this SELECT */ + int isCompound; /* True if p is a compound select */ + int nCompound; /* Number of compound terms processed so far */ + Parse *pParse; /* Parsing context */ + int i; /* Loop counter */ + ExprList *pGroupBy; /* The GROUP BY clause */ + Select *pLeftmost; /* Left-most of SELECT of a compound */ + sqlite3 *db; /* Database connection */ + + + assert( p!=0 ); + if( p->selFlags & SF_Resolved ){ + return WRC_Prune; + } + pOuterNC = pWalker->u.pNC; + pParse = pWalker->pParse; + db = pParse->db; + + /* Normally sqlite3SelectExpand() will be called first and will have + ** already expanded this SELECT. However, if this is a subquery within + ** an expression, sqlite3ResolveExprNames() will be called without a + ** prior call to sqlite3SelectExpand(). When that happens, let + ** sqlite3SelectPrep() do all of the processing for this SELECT. + ** sqlite3SelectPrep() will invoke both sqlite3SelectExpand() and + ** this routine in the correct order. + */ + if( (p->selFlags & SF_Expanded)==0 ){ + sqlite3SelectPrep(pParse, p, pOuterNC); + return pParse->nErr ? WRC_Abort : WRC_Prune; + } + + isCompound = p->pPrior!=0; + nCompound = 0; + pLeftmost = p; + while( p ){ + assert( (p->selFlags & SF_Expanded)!=0 ); + assert( (p->selFlags & SF_Resolved)==0 ); + assert( db->suppressErr==0 ); /* SF_Resolved not set if errors suppressed */ + p->selFlags |= SF_Resolved; + + + /* Resolve the expressions in the LIMIT and OFFSET clauses. These + ** are not allowed to refer to any names, so pass an empty NameContext. + */ + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + sNC.pWinSelect = p; + if( sqlite3ResolveExprNames(&sNC, p->pLimit) ){ + return WRC_Abort; + } + + /* If the SF_Converted flags is set, then this Select object was + ** was created by the convertCompoundSelectToSubquery() function. + ** In this case the ORDER BY clause (p->pOrderBy) should be resolved + ** as if it were part of the sub-query, not the parent. This block + ** moves the pOrderBy down to the sub-query. It will be moved back + ** after the names have been resolved. */ + if( p->selFlags & SF_Converted ){ + Select *pSub = p->pSrc->a[0].pSelect; + assert( p->pSrc->nSrc==1 && p->pOrderBy ); + assert( pSub->pPrior && pSub->pOrderBy==0 ); + pSub->pOrderBy = p->pOrderBy; + p->pOrderBy = 0; + } + + /* Recursively resolve names in all subqueries in the FROM clause + */ + for(i=0; ipSrc->nSrc; i++){ + SrcItem *pItem = &p->pSrc->a[i]; + if( pItem->pSelect && (pItem->pSelect->selFlags & SF_Resolved)==0 ){ + int nRef = pOuterNC ? pOuterNC->nRef : 0; + const char *zSavedContext = pParse->zAuthContext; + + if( pItem->zName ) pParse->zAuthContext = pItem->zName; + sqlite3ResolveSelectNames(pParse, pItem->pSelect, pOuterNC); + pParse->zAuthContext = zSavedContext; + if( pParse->nErr ) return WRC_Abort; + assert( db->mallocFailed==0 ); + + /* If the number of references to the outer context changed when + ** expressions in the sub-select were resolved, the sub-select + ** is correlated. It is not required to check the refcount on any + ** but the innermost outer context object, as lookupName() increments + ** the refcount on all contexts between the current one and the + ** context containing the column when it resolves a name. */ + if( pOuterNC ){ + assert( pItem->fg.isCorrelated==0 && pOuterNC->nRef>=nRef ); + pItem->fg.isCorrelated = (pOuterNC->nRef>nRef); + } + } + } + + /* Set up the local name-context to pass to sqlite3ResolveExprNames() to + ** resolve the result-set expression list. + */ + sNC.ncFlags = NC_AllowAgg|NC_AllowWin; + sNC.pSrcList = p->pSrc; + sNC.pNext = pOuterNC; + + /* Resolve names in the result set. */ + if( sqlite3ResolveExprListNames(&sNC, p->pEList) ) return WRC_Abort; + sNC.ncFlags &= ~NC_AllowWin; + + /* If there are no aggregate functions in the result-set, and no GROUP BY + ** expression, do not allow aggregates in any of the other expressions. + */ + assert( (p->selFlags & SF_Aggregate)==0 ); + pGroupBy = p->pGroupBy; + if( pGroupBy || (sNC.ncFlags & NC_HasAgg)!=0 ){ + assert( NC_MinMaxAgg==SF_MinMaxAgg ); + assert( NC_OrderAgg==SF_OrderByReqd ); + p->selFlags |= SF_Aggregate | (sNC.ncFlags&(NC_MinMaxAgg|NC_OrderAgg)); + }else{ + sNC.ncFlags &= ~NC_AllowAgg; + } + + /* Add the output column list to the name-context before parsing the + ** other expressions in the SELECT statement. This is so that + ** expressions in the WHERE clause (etc.) can refer to expressions by + ** aliases in the result set. + ** + ** Minor point: If this is the case, then the expression will be + ** re-evaluated for each reference to it. + */ + assert( (sNC.ncFlags & (NC_UAggInfo|NC_UUpsert|NC_UBaseReg))==0 ); + sNC.uNC.pEList = p->pEList; + sNC.ncFlags |= NC_UEList; + if( p->pHaving ){ + if( !pGroupBy ){ + sqlite3ErrorMsg(pParse, "a GROUP BY clause is required before HAVING"); + return WRC_Abort; + } + if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort; + } + if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort; + + /* Resolve names in table-valued-function arguments */ + for(i=0; ipSrc->nSrc; i++){ + SrcItem *pItem = &p->pSrc->a[i]; + if( pItem->fg.isTabFunc + && sqlite3ResolveExprListNames(&sNC, pItem->u1.pFuncArg) + ){ + return WRC_Abort; + } + } + +#ifndef SQLITE_OMIT_WINDOWFUNC + if( IN_RENAME_OBJECT ){ + Window *pWin; + for(pWin=p->pWinDefn; pWin; pWin=pWin->pNextWin){ + if( sqlite3ResolveExprListNames(&sNC, pWin->pOrderBy) + || sqlite3ResolveExprListNames(&sNC, pWin->pPartition) + ){ + return WRC_Abort; + } + } + } +#endif + + /* The ORDER BY and GROUP BY clauses may not refer to terms in + ** outer queries + */ + sNC.pNext = 0; + sNC.ncFlags |= NC_AllowAgg|NC_AllowWin; + + /* If this is a converted compound query, move the ORDER BY clause from + ** the sub-query back to the parent query. At this point each term + ** within the ORDER BY clause has been transformed to an integer value. + ** These integers will be replaced by copies of the corresponding result + ** set expressions by the call to resolveOrderGroupBy() below. */ + if( p->selFlags & SF_Converted ){ + Select *pSub = p->pSrc->a[0].pSelect; + p->pOrderBy = pSub->pOrderBy; + pSub->pOrderBy = 0; + } + + /* Process the ORDER BY clause for singleton SELECT statements. + ** The ORDER BY clause for compounds SELECT statements is handled + ** below, after all of the result-sets for all of the elements of + ** the compound have been resolved. + ** + ** If there is an ORDER BY clause on a term of a compound-select other + ** than the right-most term, then that is a syntax error. But the error + ** is not detected until much later, and so we need to go ahead and + ** resolve those symbols on the incorrect ORDER BY for consistency. + */ + if( p->pOrderBy!=0 + && isCompound<=nCompound /* Defer right-most ORDER BY of a compound */ + && resolveOrderGroupBy(&sNC, p, p->pOrderBy, "ORDER") + ){ + return WRC_Abort; + } + if( db->mallocFailed ){ + return WRC_Abort; + } + sNC.ncFlags &= ~NC_AllowWin; + + /* Resolve the GROUP BY clause. At the same time, make sure + ** the GROUP BY clause does not contain aggregate functions. + */ + if( pGroupBy ){ + struct ExprList_item *pItem; + + if( resolveOrderGroupBy(&sNC, p, pGroupBy, "GROUP") || db->mallocFailed ){ + return WRC_Abort; + } + for(i=0, pItem=pGroupBy->a; inExpr; i++, pItem++){ + if( ExprHasProperty(pItem->pExpr, EP_Agg) ){ + sqlite3ErrorMsg(pParse, "aggregate functions are not allowed in " + "the GROUP BY clause"); + return WRC_Abort; + } + } + } + + /* If this is part of a compound SELECT, check that it has the right + ** number of expressions in the select list. */ + if( p->pNext && p->pEList->nExpr!=p->pNext->pEList->nExpr ){ + sqlite3SelectWrongNumTermsError(pParse, p->pNext); + return WRC_Abort; + } + + /* Advance to the next term of the compound + */ + p = p->pPrior; + nCompound++; + } + + /* Resolve the ORDER BY on a compound SELECT after all terms of + ** the compound have been resolved. + */ + if( isCompound && resolveCompoundOrderBy(pParse, pLeftmost) ){ + return WRC_Abort; + } + + return WRC_Prune; +} + +/* +** This routine walks an expression tree and resolves references to +** table columns and result-set columns. At the same time, do error +** checking on function usage and set a flag if any aggregate functions +** are seen. +** +** To resolve table columns references we look for nodes (or subtrees) of the +** form X.Y.Z or Y.Z or just Z where +** +** X: The name of a database. Ex: "main" or "temp" or +** the symbolic name assigned to an ATTACH-ed database. +** +** Y: The name of a table in a FROM clause. Or in a trigger +** one of the special names "old" or "new". +** +** Z: The name of a column in table Y. +** +** The node at the root of the subtree is modified as follows: +** +** Expr.op Changed to TK_COLUMN +** Expr.pTab Points to the Table object for X.Y +** Expr.iColumn The column index in X.Y. -1 for the rowid. +** Expr.iTable The VDBE cursor number for X.Y +** +** +** To resolve result-set references, look for expression nodes of the +** form Z (with no X and Y prefix) where the Z matches the right-hand +** size of an AS clause in the result-set of a SELECT. The Z expression +** is replaced by a copy of the left-hand side of the result-set expression. +** Table-name and function resolution occurs on the substituted expression +** tree. For example, in: +** +** SELECT a+b AS x, c+d AS y FROM t1 ORDER BY x; +** +** The "x" term of the order by is replaced by "a+b" to render: +** +** SELECT a+b AS x, c+d AS y FROM t1 ORDER BY a+b; +** +** Function calls are checked to make sure that the function is +** defined and that the correct number of arguments are specified. +** If the function is an aggregate function, then the NC_HasAgg flag is +** set and the opcode is changed from TK_FUNCTION to TK_AGG_FUNCTION. +** If an expression contains aggregate functions then the EP_Agg +** property on the expression is set. +** +** An error message is left in pParse if anything is amiss. The number +** if errors is returned. +*/ +SQLITE_PRIVATE int sqlite3ResolveExprNames( + NameContext *pNC, /* Namespace to resolve expressions in. */ + Expr *pExpr /* The expression to be analyzed. */ +){ + int savedHasAgg; + Walker w; + + if( pExpr==0 ) return SQLITE_OK; + savedHasAgg = pNC->ncFlags & (NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg); + pNC->ncFlags &= ~(NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg); + w.pParse = pNC->pParse; + w.xExprCallback = resolveExprStep; + w.xSelectCallback = (pNC->ncFlags & NC_NoSelect) ? 0 : resolveSelectStep; + w.xSelectCallback2 = 0; + w.u.pNC = pNC; +#if SQLITE_MAX_EXPR_DEPTH>0 + w.pParse->nHeight += pExpr->nHeight; + if( sqlite3ExprCheckHeight(w.pParse, w.pParse->nHeight) ){ + return SQLITE_ERROR; + } +#endif + sqlite3WalkExpr(&w, pExpr); +#if SQLITE_MAX_EXPR_DEPTH>0 + w.pParse->nHeight -= pExpr->nHeight; +#endif + assert( EP_Agg==NC_HasAgg ); + assert( EP_Win==NC_HasWin ); + testcase( pNC->ncFlags & NC_HasAgg ); + testcase( pNC->ncFlags & NC_HasWin ); + ExprSetProperty(pExpr, pNC->ncFlags & (NC_HasAgg|NC_HasWin) ); + pNC->ncFlags |= savedHasAgg; + return pNC->nNcErr>0 || w.pParse->nErr>0; +} + +/* +** Resolve all names for all expression in an expression list. This is +** just like sqlite3ResolveExprNames() except that it works for an expression +** list rather than a single expression. +*/ +SQLITE_PRIVATE int sqlite3ResolveExprListNames( + NameContext *pNC, /* Namespace to resolve expressions in. */ + ExprList *pList /* The expression list to be analyzed. */ +){ + int i; + int savedHasAgg = 0; + Walker w; + if( pList==0 ) return WRC_Continue; + w.pParse = pNC->pParse; + w.xExprCallback = resolveExprStep; + w.xSelectCallback = resolveSelectStep; + w.xSelectCallback2 = 0; + w.u.pNC = pNC; + savedHasAgg = pNC->ncFlags & (NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg); + pNC->ncFlags &= ~(NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg); + for(i=0; inExpr; i++){ + Expr *pExpr = pList->a[i].pExpr; + if( pExpr==0 ) continue; +#if SQLITE_MAX_EXPR_DEPTH>0 + w.pParse->nHeight += pExpr->nHeight; + if( sqlite3ExprCheckHeight(w.pParse, w.pParse->nHeight) ){ + return WRC_Abort; + } +#endif + sqlite3WalkExpr(&w, pExpr); +#if SQLITE_MAX_EXPR_DEPTH>0 + w.pParse->nHeight -= pExpr->nHeight; +#endif + assert( EP_Agg==NC_HasAgg ); + assert( EP_Win==NC_HasWin ); + testcase( pNC->ncFlags & NC_HasAgg ); + testcase( pNC->ncFlags & NC_HasWin ); + if( pNC->ncFlags & (NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg) ){ + ExprSetProperty(pExpr, pNC->ncFlags & (NC_HasAgg|NC_HasWin) ); + savedHasAgg |= pNC->ncFlags & + (NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg); + pNC->ncFlags &= ~(NC_HasAgg|NC_MinMaxAgg|NC_HasWin|NC_OrderAgg); + } + if( w.pParse->nErr>0 ) return WRC_Abort; + } + pNC->ncFlags |= savedHasAgg; + return WRC_Continue; +} + +/* +** Resolve all names in all expressions of a SELECT and in all +** decendents of the SELECT, including compounds off of p->pPrior, +** subqueries in expressions, and subqueries used as FROM clause +** terms. +** +** See sqlite3ResolveExprNames() for a description of the kinds of +** transformations that occur. +** +** All SELECT statements should have been expanded using +** sqlite3SelectExpand() prior to invoking this routine. +*/ +SQLITE_PRIVATE void sqlite3ResolveSelectNames( + Parse *pParse, /* The parser context */ + Select *p, /* The SELECT statement being coded. */ + NameContext *pOuterNC /* Name context for parent SELECT statement */ +){ + Walker w; + + assert( p!=0 ); + w.xExprCallback = resolveExprStep; + w.xSelectCallback = resolveSelectStep; + w.xSelectCallback2 = 0; + w.pParse = pParse; + w.u.pNC = pOuterNC; + sqlite3WalkSelect(&w, p); +} + +/* +** Resolve names in expressions that can only reference a single table +** or which cannot reference any tables at all. Examples: +** +** "type" flag +** ------------ +** (1) CHECK constraints NC_IsCheck +** (2) WHERE clauses on partial indices NC_PartIdx +** (3) Expressions in indexes on expressions NC_IdxExpr +** (4) Expression arguments to VACUUM INTO. 0 +** (5) GENERATED ALWAYS as expressions NC_GenCol +** +** In all cases except (4), the Expr.iTable value for Expr.op==TK_COLUMN +** nodes of the expression is set to -1 and the Expr.iColumn value is +** set to the column number. In case (4), TK_COLUMN nodes cause an error. +** +** Any errors cause an error message to be set in pParse. +*/ +SQLITE_PRIVATE int sqlite3ResolveSelfReference( + Parse *pParse, /* Parsing context */ + Table *pTab, /* The table being referenced, or NULL */ + int type, /* NC_IsCheck, NC_PartIdx, NC_IdxExpr, NC_GenCol, or 0 */ + Expr *pExpr, /* Expression to resolve. May be NULL. */ + ExprList *pList /* Expression list to resolve. May be NULL. */ +){ + SrcList sSrc; /* Fake SrcList for pParse->pNewTable */ + NameContext sNC; /* Name context for pParse->pNewTable */ + int rc; + + assert( type==0 || pTab!=0 ); + assert( type==NC_IsCheck || type==NC_PartIdx || type==NC_IdxExpr + || type==NC_GenCol || pTab==0 ); + memset(&sNC, 0, sizeof(sNC)); + memset(&sSrc, 0, sizeof(sSrc)); + if( pTab ){ + sSrc.nSrc = 1; + sSrc.a[0].zName = pTab->zName; + sSrc.a[0].pTab = pTab; + sSrc.a[0].iCursor = -1; + if( pTab->pSchema!=pParse->db->aDb[1].pSchema ){ + /* Cause EP_FromDDL to be set on TK_FUNCTION nodes of non-TEMP + ** schema elements */ + type |= NC_FromDDL; + } + } + sNC.pParse = pParse; + sNC.pSrcList = &sSrc; + sNC.ncFlags = type | NC_IsDDL; + if( (rc = sqlite3ResolveExprNames(&sNC, pExpr))!=SQLITE_OK ) return rc; + if( pList ) rc = sqlite3ResolveExprListNames(&sNC, pList); + return rc; +} + +/************** End of resolve.c *********************************************/ +/************** Begin file expr.c ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains routines used for analyzing expressions and +** for generating VDBE code that evaluates expressions in SQLite. +*/ +/* #include "sqliteInt.h" */ + +/* Forward declarations */ +static void exprCodeBetween(Parse*,Expr*,int,void(*)(Parse*,Expr*,int,int),int); +static int exprCodeVector(Parse *pParse, Expr *p, int *piToFree); + +/* +** Return the affinity character for a single column of a table. +*/ +SQLITE_PRIVATE char sqlite3TableColumnAffinity(const Table *pTab, int iCol){ + if( iCol<0 || NEVER(iCol>=pTab->nCol) ) return SQLITE_AFF_INTEGER; + return pTab->aCol[iCol].affinity; +} + +/* +** Return the 'affinity' of the expression pExpr if any. +** +** If pExpr is a column, a reference to a column via an 'AS' alias, +** or a sub-select with a column as the return value, then the +** affinity of that column is returned. Otherwise, 0x00 is returned, +** indicating no affinity for the expression. +** +** i.e. the WHERE clause expressions in the following statements all +** have an affinity: +** +** CREATE TABLE t1(a); +** SELECT * FROM t1 WHERE a; +** SELECT a AS b FROM t1 WHERE b; +** SELECT * FROM t1 WHERE (select a from t1); +*/ +SQLITE_PRIVATE char sqlite3ExprAffinity(const Expr *pExpr){ + int op; + while( ExprHasProperty(pExpr, EP_Skip|EP_IfNullRow) ){ + assert( pExpr->op==TK_COLLATE + || pExpr->op==TK_IF_NULL_ROW + || (pExpr->op==TK_REGISTER && pExpr->op2==TK_IF_NULL_ROW) ); + pExpr = pExpr->pLeft; + assert( pExpr!=0 ); + } + op = pExpr->op; + if( op==TK_REGISTER ) op = pExpr->op2; + if( op==TK_COLUMN || op==TK_AGG_COLUMN ){ + assert( ExprUseYTab(pExpr) ); + if( pExpr->y.pTab ){ + return sqlite3TableColumnAffinity(pExpr->y.pTab, pExpr->iColumn); + } + } + if( op==TK_SELECT ){ + assert( ExprUseXSelect(pExpr) ); + assert( pExpr->x.pSelect!=0 ); + assert( pExpr->x.pSelect->pEList!=0 ); + assert( pExpr->x.pSelect->pEList->a[0].pExpr!=0 ); + return sqlite3ExprAffinity(pExpr->x.pSelect->pEList->a[0].pExpr); + } +#ifndef SQLITE_OMIT_CAST + if( op==TK_CAST ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + return sqlite3AffinityType(pExpr->u.zToken, 0); + } +#endif + if( op==TK_SELECT_COLUMN ){ + assert( pExpr->pLeft!=0 && ExprUseXSelect(pExpr->pLeft) ); + assert( pExpr->iColumn < pExpr->iTable ); + assert( pExpr->iTable==pExpr->pLeft->x.pSelect->pEList->nExpr ); + return sqlite3ExprAffinity( + pExpr->pLeft->x.pSelect->pEList->a[pExpr->iColumn].pExpr + ); + } + if( op==TK_VECTOR ){ + assert( ExprUseXList(pExpr) ); + return sqlite3ExprAffinity(pExpr->x.pList->a[0].pExpr); + } + return pExpr->affExpr; +} + +/* +** Set the collating sequence for expression pExpr to be the collating +** sequence named by pToken. Return a pointer to a new Expr node that +** implements the COLLATE operator. +** +** If a memory allocation error occurs, that fact is recorded in pParse->db +** and the pExpr parameter is returned unchanged. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprAddCollateToken( + const Parse *pParse, /* Parsing context */ + Expr *pExpr, /* Add the "COLLATE" clause to this expression */ + const Token *pCollName, /* Name of collating sequence */ + int dequote /* True to dequote pCollName */ +){ + if( pCollName->n>0 ){ + Expr *pNew = sqlite3ExprAlloc(pParse->db, TK_COLLATE, pCollName, dequote); + if( pNew ){ + pNew->pLeft = pExpr; + pNew->flags |= EP_Collate|EP_Skip; + pExpr = pNew; + } + } + return pExpr; +} +SQLITE_PRIVATE Expr *sqlite3ExprAddCollateString( + const Parse *pParse, /* Parsing context */ + Expr *pExpr, /* Add the "COLLATE" clause to this expression */ + const char *zC /* The collating sequence name */ +){ + Token s; + assert( zC!=0 ); + sqlite3TokenInit(&s, (char*)zC); + return sqlite3ExprAddCollateToken(pParse, pExpr, &s, 0); +} + +/* +** Skip over any TK_COLLATE operators. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprSkipCollate(Expr *pExpr){ + while( pExpr && ExprHasProperty(pExpr, EP_Skip) ){ + assert( pExpr->op==TK_COLLATE ); + pExpr = pExpr->pLeft; + } + return pExpr; +} + +/* +** Skip over any TK_COLLATE operators and/or any unlikely() +** or likelihood() or likely() functions at the root of an +** expression. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprSkipCollateAndLikely(Expr *pExpr){ + while( pExpr && ExprHasProperty(pExpr, EP_Skip|EP_Unlikely) ){ + if( ExprHasProperty(pExpr, EP_Unlikely) ){ + assert( ExprUseXList(pExpr) ); + assert( pExpr->x.pList->nExpr>0 ); + assert( pExpr->op==TK_FUNCTION ); + pExpr = pExpr->x.pList->a[0].pExpr; + }else{ + assert( pExpr->op==TK_COLLATE ); + pExpr = pExpr->pLeft; + } + } + return pExpr; +} + +/* +** Return the collation sequence for the expression pExpr. If +** there is no defined collating sequence, return NULL. +** +** See also: sqlite3ExprNNCollSeq() +** +** The sqlite3ExprNNCollSeq() works the same exact that it returns the +** default collation if pExpr has no defined collation. +** +** The collating sequence might be determined by a COLLATE operator +** or by the presence of a column with a defined collating sequence. +** COLLATE operators take first precedence. Left operands take +** precedence over right operands. +*/ +SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, const Expr *pExpr){ + sqlite3 *db = pParse->db; + CollSeq *pColl = 0; + const Expr *p = pExpr; + while( p ){ + int op = p->op; + if( op==TK_REGISTER ) op = p->op2; + if( op==TK_AGG_COLUMN || op==TK_COLUMN || op==TK_TRIGGER ){ + assert( ExprUseYTab(p) ); + if( p->y.pTab!=0 ){ + /* op==TK_REGISTER && p->y.pTab!=0 happens when pExpr was originally + ** a TK_COLUMN but was previously evaluated and cached in a register */ + int j = p->iColumn; + if( j>=0 ){ + const char *zColl = sqlite3ColumnColl(&p->y.pTab->aCol[j]); + pColl = sqlite3FindCollSeq(db, ENC(db), zColl, 0); + } + break; + } + } + if( op==TK_CAST || op==TK_UPLUS ){ + p = p->pLeft; + continue; + } + if( op==TK_VECTOR ){ + assert( ExprUseXList(p) ); + p = p->x.pList->a[0].pExpr; + continue; + } + if( op==TK_COLLATE ){ + assert( !ExprHasProperty(p, EP_IntValue) ); + pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken); + break; + } + if( p->flags & EP_Collate ){ + if( p->pLeft && (p->pLeft->flags & EP_Collate)!=0 ){ + p = p->pLeft; + }else{ + Expr *pNext = p->pRight; + /* The Expr.x union is never used at the same time as Expr.pRight */ + assert( ExprUseXList(p) ); + assert( p->x.pList==0 || p->pRight==0 ); + if( p->x.pList!=0 && !db->mallocFailed ){ + int i; + for(i=0; ALWAYS(ix.pList->nExpr); i++){ + if( ExprHasProperty(p->x.pList->a[i].pExpr, EP_Collate) ){ + pNext = p->x.pList->a[i].pExpr; + break; + } + } + } + p = pNext; + } + }else{ + break; + } + } + if( sqlite3CheckCollSeq(pParse, pColl) ){ + pColl = 0; + } + return pColl; +} + +/* +** Return the collation sequence for the expression pExpr. If +** there is no defined collating sequence, return a pointer to the +** defautl collation sequence. +** +** See also: sqlite3ExprCollSeq() +** +** The sqlite3ExprCollSeq() routine works the same except that it +** returns NULL if there is no defined collation. +*/ +SQLITE_PRIVATE CollSeq *sqlite3ExprNNCollSeq(Parse *pParse, const Expr *pExpr){ + CollSeq *p = sqlite3ExprCollSeq(pParse, pExpr); + if( p==0 ) p = pParse->db->pDfltColl; + assert( p!=0 ); + return p; +} + +/* +** Return TRUE if the two expressions have equivalent collating sequences. +*/ +SQLITE_PRIVATE int sqlite3ExprCollSeqMatch(Parse *pParse, const Expr *pE1, const Expr *pE2){ + CollSeq *pColl1 = sqlite3ExprNNCollSeq(pParse, pE1); + CollSeq *pColl2 = sqlite3ExprNNCollSeq(pParse, pE2); + return sqlite3StrICmp(pColl1->zName, pColl2->zName)==0; +} + +/* +** pExpr is an operand of a comparison operator. aff2 is the +** type affinity of the other operand. This routine returns the +** type affinity that should be used for the comparison operator. +*/ +SQLITE_PRIVATE char sqlite3CompareAffinity(const Expr *pExpr, char aff2){ + char aff1 = sqlite3ExprAffinity(pExpr); + if( aff1>SQLITE_AFF_NONE && aff2>SQLITE_AFF_NONE ){ + /* Both sides of the comparison are columns. If one has numeric + ** affinity, use that. Otherwise use no affinity. + */ + if( sqlite3IsNumericAffinity(aff1) || sqlite3IsNumericAffinity(aff2) ){ + return SQLITE_AFF_NUMERIC; + }else{ + return SQLITE_AFF_BLOB; + } + }else{ + /* One side is a column, the other is not. Use the columns affinity. */ + assert( aff1<=SQLITE_AFF_NONE || aff2<=SQLITE_AFF_NONE ); + return (aff1<=SQLITE_AFF_NONE ? aff2 : aff1) | SQLITE_AFF_NONE; + } +} + +/* +** pExpr is a comparison operator. Return the type affinity that should +** be applied to both operands prior to doing the comparison. +*/ +static char comparisonAffinity(const Expr *pExpr){ + char aff; + assert( pExpr->op==TK_EQ || pExpr->op==TK_IN || pExpr->op==TK_LT || + pExpr->op==TK_GT || pExpr->op==TK_GE || pExpr->op==TK_LE || + pExpr->op==TK_NE || pExpr->op==TK_IS || pExpr->op==TK_ISNOT ); + assert( pExpr->pLeft ); + aff = sqlite3ExprAffinity(pExpr->pLeft); + if( pExpr->pRight ){ + aff = sqlite3CompareAffinity(pExpr->pRight, aff); + }else if( ExprUseXSelect(pExpr) ){ + aff = sqlite3CompareAffinity(pExpr->x.pSelect->pEList->a[0].pExpr, aff); + }else if( aff==0 ){ + aff = SQLITE_AFF_BLOB; + } + return aff; +} + +/* +** pExpr is a comparison expression, eg. '=', '<', IN(...) etc. +** idx_affinity is the affinity of an indexed column. Return true +** if the index with affinity idx_affinity may be used to implement +** the comparison in pExpr. +*/ +SQLITE_PRIVATE int sqlite3IndexAffinityOk(const Expr *pExpr, char idx_affinity){ + char aff = comparisonAffinity(pExpr); + if( affflags & EP_Collate ){ + pColl = sqlite3ExprCollSeq(pParse, pLeft); + }else if( pRight && (pRight->flags & EP_Collate)!=0 ){ + pColl = sqlite3ExprCollSeq(pParse, pRight); + }else{ + pColl = sqlite3ExprCollSeq(pParse, pLeft); + if( !pColl ){ + pColl = sqlite3ExprCollSeq(pParse, pRight); + } + } + return pColl; +} + +/* Expresssion p is a comparison operator. Return a collation sequence +** appropriate for the comparison operator. +** +** This is normally just a wrapper around sqlite3BinaryCompareCollSeq(). +** However, if the OP_Commuted flag is set, then the order of the operands +** is reversed in the sqlite3BinaryCompareCollSeq() call so that the +** correct collating sequence is found. +*/ +SQLITE_PRIVATE CollSeq *sqlite3ExprCompareCollSeq(Parse *pParse, const Expr *p){ + if( ExprHasProperty(p, EP_Commuted) ){ + return sqlite3BinaryCompareCollSeq(pParse, p->pRight, p->pLeft); + }else{ + return sqlite3BinaryCompareCollSeq(pParse, p->pLeft, p->pRight); + } +} + +/* +** Generate code for a comparison operator. +*/ +static int codeCompare( + Parse *pParse, /* The parsing (and code generating) context */ + Expr *pLeft, /* The left operand */ + Expr *pRight, /* The right operand */ + int opcode, /* The comparison opcode */ + int in1, int in2, /* Register holding operands */ + int dest, /* Jump here if true. */ + int jumpIfNull, /* If true, jump if either operand is NULL */ + int isCommuted /* The comparison has been commuted */ +){ + int p5; + int addr; + CollSeq *p4; + + if( pParse->nErr ) return 0; + if( isCommuted ){ + p4 = sqlite3BinaryCompareCollSeq(pParse, pRight, pLeft); + }else{ + p4 = sqlite3BinaryCompareCollSeq(pParse, pLeft, pRight); + } + p5 = binaryCompareP5(pLeft, pRight, jumpIfNull); + addr = sqlite3VdbeAddOp4(pParse->pVdbe, opcode, in2, dest, in1, + (void*)p4, P4_COLLSEQ); + sqlite3VdbeChangeP5(pParse->pVdbe, (u8)p5); + return addr; +} + +/* +** Return true if expression pExpr is a vector, or false otherwise. +** +** A vector is defined as any expression that results in two or more +** columns of result. Every TK_VECTOR node is an vector because the +** parser will not generate a TK_VECTOR with fewer than two entries. +** But a TK_SELECT might be either a vector or a scalar. It is only +** considered a vector if it has two or more result columns. +*/ +SQLITE_PRIVATE int sqlite3ExprIsVector(const Expr *pExpr){ + return sqlite3ExprVectorSize(pExpr)>1; +} + +/* +** If the expression passed as the only argument is of type TK_VECTOR +** return the number of expressions in the vector. Or, if the expression +** is a sub-select, return the number of columns in the sub-select. For +** any other type of expression, return 1. +*/ +SQLITE_PRIVATE int sqlite3ExprVectorSize(const Expr *pExpr){ + u8 op = pExpr->op; + if( op==TK_REGISTER ) op = pExpr->op2; + if( op==TK_VECTOR ){ + assert( ExprUseXList(pExpr) ); + return pExpr->x.pList->nExpr; + }else if( op==TK_SELECT ){ + assert( ExprUseXSelect(pExpr) ); + return pExpr->x.pSelect->pEList->nExpr; + }else{ + return 1; + } +} + +/* +** Return a pointer to a subexpression of pVector that is the i-th +** column of the vector (numbered starting with 0). The caller must +** ensure that i is within range. +** +** If pVector is really a scalar (and "scalar" here includes subqueries +** that return a single column!) then return pVector unmodified. +** +** pVector retains ownership of the returned subexpression. +** +** If the vector is a (SELECT ...) then the expression returned is +** just the expression for the i-th term of the result set, and may +** not be ready for evaluation because the table cursor has not yet +** been positioned. +*/ +SQLITE_PRIVATE Expr *sqlite3VectorFieldSubexpr(Expr *pVector, int i){ + assert( iop==TK_ERROR ); + if( sqlite3ExprIsVector(pVector) ){ + assert( pVector->op2==0 || pVector->op==TK_REGISTER ); + if( pVector->op==TK_SELECT || pVector->op2==TK_SELECT ){ + assert( ExprUseXSelect(pVector) ); + return pVector->x.pSelect->pEList->a[i].pExpr; + }else{ + assert( ExprUseXList(pVector) ); + return pVector->x.pList->a[i].pExpr; + } + } + return pVector; +} + +/* +** Compute and return a new Expr object which when passed to +** sqlite3ExprCode() will generate all necessary code to compute +** the iField-th column of the vector expression pVector. +** +** It is ok for pVector to be a scalar (as long as iField==0). +** In that case, this routine works like sqlite3ExprDup(). +** +** The caller owns the returned Expr object and is responsible for +** ensuring that the returned value eventually gets freed. +** +** The caller retains ownership of pVector. If pVector is a TK_SELECT, +** then the returned object will reference pVector and so pVector must remain +** valid for the life of the returned object. If pVector is a TK_VECTOR +** or a scalar expression, then it can be deleted as soon as this routine +** returns. +** +** A trick to cause a TK_SELECT pVector to be deleted together with +** the returned Expr object is to attach the pVector to the pRight field +** of the returned TK_SELECT_COLUMN Expr object. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprForVectorField( + Parse *pParse, /* Parsing context */ + Expr *pVector, /* The vector. List of expressions or a sub-SELECT */ + int iField, /* Which column of the vector to return */ + int nField /* Total number of columns in the vector */ +){ + Expr *pRet; + if( pVector->op==TK_SELECT ){ + assert( ExprUseXSelect(pVector) ); + /* The TK_SELECT_COLUMN Expr node: + ** + ** pLeft: pVector containing TK_SELECT. Not deleted. + ** pRight: not used. But recursively deleted. + ** iColumn: Index of a column in pVector + ** iTable: 0 or the number of columns on the LHS of an assignment + ** pLeft->iTable: First in an array of register holding result, or 0 + ** if the result is not yet computed. + ** + ** sqlite3ExprDelete() specifically skips the recursive delete of + ** pLeft on TK_SELECT_COLUMN nodes. But pRight is followed, so pVector + ** can be attached to pRight to cause this node to take ownership of + ** pVector. Typically there will be multiple TK_SELECT_COLUMN nodes + ** with the same pLeft pointer to the pVector, but only one of them + ** will own the pVector. + */ + pRet = sqlite3PExpr(pParse, TK_SELECT_COLUMN, 0, 0); + if( pRet ){ + pRet->iTable = nField; + pRet->iColumn = iField; + pRet->pLeft = pVector; + } + }else{ + if( pVector->op==TK_VECTOR ){ + Expr **ppVector; + assert( ExprUseXList(pVector) ); + ppVector = &pVector->x.pList->a[iField].pExpr; + pVector = *ppVector; + if( IN_RENAME_OBJECT ){ + /* This must be a vector UPDATE inside a trigger */ + *ppVector = 0; + return pVector; + } + } + pRet = sqlite3ExprDup(pParse->db, pVector, 0); + } + return pRet; +} + +/* +** If expression pExpr is of type TK_SELECT, generate code to evaluate +** it. Return the register in which the result is stored (or, if the +** sub-select returns more than one column, the first in an array +** of registers in which the result is stored). +** +** If pExpr is not a TK_SELECT expression, return 0. +*/ +static int exprCodeSubselect(Parse *pParse, Expr *pExpr){ + int reg = 0; +#ifndef SQLITE_OMIT_SUBQUERY + if( pExpr->op==TK_SELECT ){ + reg = sqlite3CodeSubselect(pParse, pExpr); + } +#endif + return reg; +} + +/* +** Argument pVector points to a vector expression - either a TK_VECTOR +** or TK_SELECT that returns more than one column. This function returns +** the register number of a register that contains the value of +** element iField of the vector. +** +** If pVector is a TK_SELECT expression, then code for it must have +** already been generated using the exprCodeSubselect() routine. In this +** case parameter regSelect should be the first in an array of registers +** containing the results of the sub-select. +** +** If pVector is of type TK_VECTOR, then code for the requested field +** is generated. In this case (*pRegFree) may be set to the number of +** a temporary register to be freed by the caller before returning. +** +** Before returning, output parameter (*ppExpr) is set to point to the +** Expr object corresponding to element iElem of the vector. +*/ +static int exprVectorRegister( + Parse *pParse, /* Parse context */ + Expr *pVector, /* Vector to extract element from */ + int iField, /* Field to extract from pVector */ + int regSelect, /* First in array of registers */ + Expr **ppExpr, /* OUT: Expression element */ + int *pRegFree /* OUT: Temp register to free */ +){ + u8 op = pVector->op; + assert( op==TK_VECTOR || op==TK_REGISTER || op==TK_SELECT || op==TK_ERROR ); + if( op==TK_REGISTER ){ + *ppExpr = sqlite3VectorFieldSubexpr(pVector, iField); + return pVector->iTable+iField; + } + if( op==TK_SELECT ){ + assert( ExprUseXSelect(pVector) ); + *ppExpr = pVector->x.pSelect->pEList->a[iField].pExpr; + return regSelect+iField; + } + if( op==TK_VECTOR ){ + assert( ExprUseXList(pVector) ); + *ppExpr = pVector->x.pList->a[iField].pExpr; + return sqlite3ExprCodeTemp(pParse, *ppExpr, pRegFree); + } + return 0; +} + +/* +** Expression pExpr is a comparison between two vector values. Compute +** the result of the comparison (1, 0, or NULL) and write that +** result into register dest. +** +** The caller must satisfy the following preconditions: +** +** if pExpr->op==TK_IS: op==TK_EQ and p5==SQLITE_NULLEQ +** if pExpr->op==TK_ISNOT: op==TK_NE and p5==SQLITE_NULLEQ +** otherwise: op==pExpr->op and p5==0 +*/ +static void codeVectorCompare( + Parse *pParse, /* Code generator context */ + Expr *pExpr, /* The comparison operation */ + int dest, /* Write results into this register */ + u8 op, /* Comparison operator */ + u8 p5 /* SQLITE_NULLEQ or zero */ +){ + Vdbe *v = pParse->pVdbe; + Expr *pLeft = pExpr->pLeft; + Expr *pRight = pExpr->pRight; + int nLeft = sqlite3ExprVectorSize(pLeft); + int i; + int regLeft = 0; + int regRight = 0; + u8 opx = op; + int addrCmp = 0; + int addrDone = sqlite3VdbeMakeLabel(pParse); + int isCommuted = ExprHasProperty(pExpr,EP_Commuted); + + assert( !ExprHasVVAProperty(pExpr,EP_Immutable) ); + if( pParse->nErr ) return; + if( nLeft!=sqlite3ExprVectorSize(pRight) ){ + sqlite3ErrorMsg(pParse, "row value misused"); + return; + } + assert( pExpr->op==TK_EQ || pExpr->op==TK_NE + || pExpr->op==TK_IS || pExpr->op==TK_ISNOT + || pExpr->op==TK_LT || pExpr->op==TK_GT + || pExpr->op==TK_LE || pExpr->op==TK_GE + ); + assert( pExpr->op==op || (pExpr->op==TK_IS && op==TK_EQ) + || (pExpr->op==TK_ISNOT && op==TK_NE) ); + assert( p5==0 || pExpr->op!=op ); + assert( p5==SQLITE_NULLEQ || pExpr->op==op ); + + if( op==TK_LE ) opx = TK_LT; + if( op==TK_GE ) opx = TK_GT; + if( op==TK_NE ) opx = TK_EQ; + + regLeft = exprCodeSubselect(pParse, pLeft); + regRight = exprCodeSubselect(pParse, pRight); + + sqlite3VdbeAddOp2(v, OP_Integer, 1, dest); + for(i=0; 1 /*Loop exits by "break"*/; i++){ + int regFree1 = 0, regFree2 = 0; + Expr *pL = 0, *pR = 0; + int r1, r2; + assert( i>=0 && i0 +/* +** Check that argument nHeight is less than or equal to the maximum +** expression depth allowed. If it is not, leave an error message in +** pParse. +*/ +SQLITE_PRIVATE int sqlite3ExprCheckHeight(Parse *pParse, int nHeight){ + int rc = SQLITE_OK; + int mxHeight = pParse->db->aLimit[SQLITE_LIMIT_EXPR_DEPTH]; + if( nHeight>mxHeight ){ + sqlite3ErrorMsg(pParse, + "Expression tree is too large (maximum depth %d)", mxHeight + ); + rc = SQLITE_ERROR; + } + return rc; +} + +/* The following three functions, heightOfExpr(), heightOfExprList() +** and heightOfSelect(), are used to determine the maximum height +** of any expression tree referenced by the structure passed as the +** first argument. +** +** If this maximum height is greater than the current value pointed +** to by pnHeight, the second parameter, then set *pnHeight to that +** value. +*/ +static void heightOfExpr(const Expr *p, int *pnHeight){ + if( p ){ + if( p->nHeight>*pnHeight ){ + *pnHeight = p->nHeight; + } + } +} +static void heightOfExprList(const ExprList *p, int *pnHeight){ + if( p ){ + int i; + for(i=0; inExpr; i++){ + heightOfExpr(p->a[i].pExpr, pnHeight); + } + } +} +static void heightOfSelect(const Select *pSelect, int *pnHeight){ + const Select *p; + for(p=pSelect; p; p=p->pPrior){ + heightOfExpr(p->pWhere, pnHeight); + heightOfExpr(p->pHaving, pnHeight); + heightOfExpr(p->pLimit, pnHeight); + heightOfExprList(p->pEList, pnHeight); + heightOfExprList(p->pGroupBy, pnHeight); + heightOfExprList(p->pOrderBy, pnHeight); + } +} + +/* +** Set the Expr.nHeight variable in the structure passed as an +** argument. An expression with no children, Expr.pList or +** Expr.pSelect member has a height of 1. Any other expression +** has a height equal to the maximum height of any other +** referenced Expr plus one. +** +** Also propagate EP_Propagate flags up from Expr.x.pList to Expr.flags, +** if appropriate. +*/ +static void exprSetHeight(Expr *p){ + int nHeight = p->pLeft ? p->pLeft->nHeight : 0; + if( p->pRight && p->pRight->nHeight>nHeight ) nHeight = p->pRight->nHeight; + if( ExprUseXSelect(p) ){ + heightOfSelect(p->x.pSelect, &nHeight); + }else if( p->x.pList ){ + heightOfExprList(p->x.pList, &nHeight); + p->flags |= EP_Propagate & sqlite3ExprListFlags(p->x.pList); + } + p->nHeight = nHeight + 1; +} + +/* +** Set the Expr.nHeight variable using the exprSetHeight() function. If +** the height is greater than the maximum allowed expression depth, +** leave an error in pParse. +** +** Also propagate all EP_Propagate flags from the Expr.x.pList into +** Expr.flags. +*/ +SQLITE_PRIVATE void sqlite3ExprSetHeightAndFlags(Parse *pParse, Expr *p){ + if( pParse->nErr ) return; + exprSetHeight(p); + sqlite3ExprCheckHeight(pParse, p->nHeight); +} + +/* +** Return the maximum height of any expression tree referenced +** by the select statement passed as an argument. +*/ +SQLITE_PRIVATE int sqlite3SelectExprHeight(const Select *p){ + int nHeight = 0; + heightOfSelect(p, &nHeight); + return nHeight; +} +#else /* ABOVE: Height enforcement enabled. BELOW: Height enforcement off */ +/* +** Propagate all EP_Propagate flags from the Expr.x.pList into +** Expr.flags. +*/ +SQLITE_PRIVATE void sqlite3ExprSetHeightAndFlags(Parse *pParse, Expr *p){ + if( pParse->nErr ) return; + if( p && ExprUseXList(p) && p->x.pList ){ + p->flags |= EP_Propagate & sqlite3ExprListFlags(p->x.pList); + } +} +#define exprSetHeight(y) +#endif /* SQLITE_MAX_EXPR_DEPTH>0 */ + +/* +** This routine is the core allocator for Expr nodes. +** +** Construct a new expression node and return a pointer to it. Memory +** for this node and for the pToken argument is a single allocation +** obtained from sqlite3DbMalloc(). The calling function +** is responsible for making sure the node eventually gets freed. +** +** If dequote is true, then the token (if it exists) is dequoted. +** If dequote is false, no dequoting is performed. The deQuote +** parameter is ignored if pToken is NULL or if the token does not +** appear to be quoted. If the quotes were of the form "..." (double-quotes) +** then the EP_DblQuoted flag is set on the expression node. +** +** Special case: If op==TK_INTEGER and pToken points to a string that +** can be translated into a 32-bit integer, then the token is not +** stored in u.zToken. Instead, the integer values is written +** into u.iValue and the EP_IntValue flag is set. No extra storage +** is allocated to hold the integer text and the dequote flag is ignored. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprAlloc( + sqlite3 *db, /* Handle for sqlite3DbMallocRawNN() */ + int op, /* Expression opcode */ + const Token *pToken, /* Token argument. Might be NULL */ + int dequote /* True to dequote */ +){ + Expr *pNew; + int nExtra = 0; + int iValue = 0; + + assert( db!=0 ); + if( pToken ){ + if( op!=TK_INTEGER || pToken->z==0 + || sqlite3GetInt32(pToken->z, &iValue)==0 ){ + nExtra = pToken->n+1; + assert( iValue>=0 ); + } + } + pNew = sqlite3DbMallocRawNN(db, sizeof(Expr)+nExtra); + if( pNew ){ + memset(pNew, 0, sizeof(Expr)); + pNew->op = (u8)op; + pNew->iAgg = -1; + if( pToken ){ + if( nExtra==0 ){ + pNew->flags |= EP_IntValue|EP_Leaf|(iValue?EP_IsTrue:EP_IsFalse); + pNew->u.iValue = iValue; + }else{ + pNew->u.zToken = (char*)&pNew[1]; + assert( pToken->z!=0 || pToken->n==0 ); + if( pToken->n ) memcpy(pNew->u.zToken, pToken->z, pToken->n); + pNew->u.zToken[pToken->n] = 0; + if( dequote && sqlite3Isquote(pNew->u.zToken[0]) ){ + sqlite3DequoteExpr(pNew); + } + } + } +#if SQLITE_MAX_EXPR_DEPTH>0 + pNew->nHeight = 1; +#endif + } + return pNew; +} + +/* +** Allocate a new expression node from a zero-terminated token that has +** already been dequoted. +*/ +SQLITE_PRIVATE Expr *sqlite3Expr( + sqlite3 *db, /* Handle for sqlite3DbMallocZero() (may be null) */ + int op, /* Expression opcode */ + const char *zToken /* Token argument. Might be NULL */ +){ + Token x; + x.z = zToken; + x.n = sqlite3Strlen30(zToken); + return sqlite3ExprAlloc(db, op, &x, 0); +} + +/* +** Attach subtrees pLeft and pRight to the Expr node pRoot. +** +** If pRoot==NULL that means that a memory allocation error has occurred. +** In that case, delete the subtrees pLeft and pRight. +*/ +SQLITE_PRIVATE void sqlite3ExprAttachSubtrees( + sqlite3 *db, + Expr *pRoot, + Expr *pLeft, + Expr *pRight +){ + if( pRoot==0 ){ + assert( db->mallocFailed ); + sqlite3ExprDelete(db, pLeft); + sqlite3ExprDelete(db, pRight); + }else{ + if( pRight ){ + pRoot->pRight = pRight; + pRoot->flags |= EP_Propagate & pRight->flags; + } + if( pLeft ){ + pRoot->pLeft = pLeft; + pRoot->flags |= EP_Propagate & pLeft->flags; + } + exprSetHeight(pRoot); + } +} + +/* +** Allocate an Expr node which joins as many as two subtrees. +** +** One or both of the subtrees can be NULL. Return a pointer to the new +** Expr node. Or, if an OOM error occurs, set pParse->db->mallocFailed, +** free the subtrees and return NULL. +*/ +SQLITE_PRIVATE Expr *sqlite3PExpr( + Parse *pParse, /* Parsing context */ + int op, /* Expression opcode */ + Expr *pLeft, /* Left operand */ + Expr *pRight /* Right operand */ +){ + Expr *p; + p = sqlite3DbMallocRawNN(pParse->db, sizeof(Expr)); + if( p ){ + memset(p, 0, sizeof(Expr)); + p->op = op & 0xff; + p->iAgg = -1; + sqlite3ExprAttachSubtrees(pParse->db, p, pLeft, pRight); + sqlite3ExprCheckHeight(pParse, p->nHeight); + }else{ + sqlite3ExprDelete(pParse->db, pLeft); + sqlite3ExprDelete(pParse->db, pRight); + } + return p; +} + +/* +** Add pSelect to the Expr.x.pSelect field. Or, if pExpr is NULL (due +** do a memory allocation failure) then delete the pSelect object. +*/ +SQLITE_PRIVATE void sqlite3PExprAddSelect(Parse *pParse, Expr *pExpr, Select *pSelect){ + if( pExpr ){ + pExpr->x.pSelect = pSelect; + ExprSetProperty(pExpr, EP_xIsSelect|EP_Subquery); + sqlite3ExprSetHeightAndFlags(pParse, pExpr); + }else{ + assert( pParse->db->mallocFailed ); + sqlite3SelectDelete(pParse->db, pSelect); + } +} + +/* +** Expression list pEList is a list of vector values. This function +** converts the contents of pEList to a VALUES(...) Select statement +** returning 1 row for each element of the list. For example, the +** expression list: +** +** ( (1,2), (3,4) (5,6) ) +** +** is translated to the equivalent of: +** +** VALUES(1,2), (3,4), (5,6) +** +** Each of the vector values in pEList must contain exactly nElem terms. +** If a list element that is not a vector or does not contain nElem terms, +** an error message is left in pParse. +** +** This is used as part of processing IN(...) expressions with a list +** of vectors on the RHS. e.g. "... IN ((1,2), (3,4), (5,6))". +*/ +SQLITE_PRIVATE Select *sqlite3ExprListToValues(Parse *pParse, int nElem, ExprList *pEList){ + int ii; + Select *pRet = 0; + assert( nElem>1 ); + for(ii=0; iinExpr; ii++){ + Select *pSel; + Expr *pExpr = pEList->a[ii].pExpr; + int nExprElem; + if( pExpr->op==TK_VECTOR ){ + assert( ExprUseXList(pExpr) ); + nExprElem = pExpr->x.pList->nExpr; + }else{ + nExprElem = 1; + } + if( nExprElem!=nElem ){ + sqlite3ErrorMsg(pParse, "IN(...) element has %d term%s - expected %d", + nExprElem, nExprElem>1?"s":"", nElem + ); + break; + } + assert( ExprUseXList(pExpr) ); + pSel = sqlite3SelectNew(pParse, pExpr->x.pList, 0, 0, 0, 0, 0, SF_Values,0); + pExpr->x.pList = 0; + if( pSel ){ + if( pRet ){ + pSel->op = TK_ALL; + pSel->pPrior = pRet; + } + pRet = pSel; + } + } + + if( pRet && pRet->pPrior ){ + pRet->selFlags |= SF_MultiValue; + } + sqlite3ExprListDelete(pParse->db, pEList); + return pRet; +} + +/* +** Join two expressions using an AND operator. If either expression is +** NULL, then just return the other expression. +** +** If one side or the other of the AND is known to be false, then instead +** of returning an AND expression, just return a constant expression with +** a value of false. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse *pParse, Expr *pLeft, Expr *pRight){ + sqlite3 *db = pParse->db; + if( pLeft==0 ){ + return pRight; + }else if( pRight==0 ){ + return pLeft; + }else if( (ExprAlwaysFalse(pLeft) || ExprAlwaysFalse(pRight)) + && !IN_RENAME_OBJECT + ){ + sqlite3ExprDeferredDelete(pParse, pLeft); + sqlite3ExprDeferredDelete(pParse, pRight); + return sqlite3Expr(db, TK_INTEGER, "0"); + }else{ + return sqlite3PExpr(pParse, TK_AND, pLeft, pRight); + } +} + +/* +** Construct a new expression node for a function with multiple +** arguments. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprFunction( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* Argument list */ + const Token *pToken, /* Name of the function */ + int eDistinct /* SF_Distinct or SF_ALL or 0 */ +){ + Expr *pNew; + sqlite3 *db = pParse->db; + assert( pToken ); + pNew = sqlite3ExprAlloc(db, TK_FUNCTION, pToken, 1); + if( pNew==0 ){ + sqlite3ExprListDelete(db, pList); /* Avoid memory leak when malloc fails */ + return 0; + } + pNew->w.iOfst = (int)(pToken->z - pParse->zTail); + if( pList + && pList->nExpr > pParse->db->aLimit[SQLITE_LIMIT_FUNCTION_ARG] + && !pParse->nested + ){ + sqlite3ErrorMsg(pParse, "too many arguments on function %T", pToken); + } + pNew->x.pList = pList; + ExprSetProperty(pNew, EP_HasFunc); + assert( ExprUseXList(pNew) ); + sqlite3ExprSetHeightAndFlags(pParse, pNew); + if( eDistinct==SF_Distinct ) ExprSetProperty(pNew, EP_Distinct); + return pNew; +} + +/* +** Check to see if a function is usable according to current access +** rules: +** +** SQLITE_FUNC_DIRECT - Only usable from top-level SQL +** +** SQLITE_FUNC_UNSAFE - Usable if TRUSTED_SCHEMA or from +** top-level SQL +** +** If the function is not usable, create an error. +*/ +SQLITE_PRIVATE void sqlite3ExprFunctionUsable( + Parse *pParse, /* Parsing and code generating context */ + const Expr *pExpr, /* The function invocation */ + const FuncDef *pDef /* The function being invoked */ +){ + assert( !IN_RENAME_OBJECT ); + assert( (pDef->funcFlags & (SQLITE_FUNC_DIRECT|SQLITE_FUNC_UNSAFE))!=0 ); + if( ExprHasProperty(pExpr, EP_FromDDL) ){ + if( (pDef->funcFlags & SQLITE_FUNC_DIRECT)!=0 + || (pParse->db->flags & SQLITE_TrustedSchema)==0 + ){ + /* Functions prohibited in triggers and views if: + ** (1) tagged with SQLITE_DIRECTONLY + ** (2) not tagged with SQLITE_INNOCUOUS (which means it + ** is tagged with SQLITE_FUNC_UNSAFE) and + ** SQLITE_DBCONFIG_TRUSTED_SCHEMA is off (meaning + ** that the schema is possibly tainted). + */ + sqlite3ErrorMsg(pParse, "unsafe use of %#T()", pExpr); + } + } +} + +/* +** Assign a variable number to an expression that encodes a wildcard +** in the original SQL statement. +** +** Wildcards consisting of a single "?" are assigned the next sequential +** variable number. +** +** Wildcards of the form "?nnn" are assigned the number "nnn". We make +** sure "nnn" is not too big to avoid a denial of service attack when +** the SQL statement comes from an external source. +** +** Wildcards of the form ":aaa", "@aaa", or "$aaa" are assigned the same number +** as the previous instance of the same wildcard. Or if this is the first +** instance of the wildcard, the next sequential variable number is +** assigned. +*/ +SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse *pParse, Expr *pExpr, u32 n){ + sqlite3 *db = pParse->db; + const char *z; + ynVar x; + + if( pExpr==0 ) return; + assert( !ExprHasProperty(pExpr, EP_IntValue|EP_Reduced|EP_TokenOnly) ); + z = pExpr->u.zToken; + assert( z!=0 ); + assert( z[0]!=0 ); + assert( n==(u32)sqlite3Strlen30(z) ); + if( z[1]==0 ){ + /* Wildcard of the form "?". Assign the next variable number */ + assert( z[0]=='?' ); + x = (ynVar)(++pParse->nVar); + }else{ + int doAdd = 0; + if( z[0]=='?' ){ + /* Wildcard of the form "?nnn". Convert "nnn" to an integer and + ** use it as the variable number */ + i64 i; + int bOk; + if( n==2 ){ /*OPTIMIZATION-IF-TRUE*/ + i = z[1]-'0'; /* The common case of ?N for a single digit N */ + bOk = 1; + }else{ + bOk = 0==sqlite3Atoi64(&z[1], &i, n-1, SQLITE_UTF8); + } + testcase( i==0 ); + testcase( i==1 ); + testcase( i==db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER]-1 ); + testcase( i==db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER] ); + if( bOk==0 || i<1 || i>db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER] ){ + sqlite3ErrorMsg(pParse, "variable number must be between ?1 and ?%d", + db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER]); + sqlite3RecordErrorOffsetOfExpr(pParse->db, pExpr); + return; + } + x = (ynVar)i; + if( x>pParse->nVar ){ + pParse->nVar = (int)x; + doAdd = 1; + }else if( sqlite3VListNumToName(pParse->pVList, x)==0 ){ + doAdd = 1; + } + }else{ + /* Wildcards like ":aaa", "$aaa" or "@aaa". Reuse the same variable + ** number as the prior appearance of the same name, or if the name + ** has never appeared before, reuse the same variable number + */ + x = (ynVar)sqlite3VListNameToNum(pParse->pVList, z, n); + if( x==0 ){ + x = (ynVar)(++pParse->nVar); + doAdd = 1; + } + } + if( doAdd ){ + pParse->pVList = sqlite3VListAdd(db, pParse->pVList, z, n, x); + } + } + pExpr->iColumn = x; + if( x>db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER] ){ + sqlite3ErrorMsg(pParse, "too many SQL variables"); + sqlite3RecordErrorOffsetOfExpr(pParse->db, pExpr); + } +} + +/* +** Recursively delete an expression tree. +*/ +static SQLITE_NOINLINE void sqlite3ExprDeleteNN(sqlite3 *db, Expr *p){ + assert( p!=0 ); + assert( !ExprUseUValue(p) || p->u.iValue>=0 ); + assert( !ExprUseYWin(p) || !ExprUseYSub(p) ); + assert( !ExprUseYWin(p) || p->y.pWin!=0 || db->mallocFailed ); + assert( p->op!=TK_FUNCTION || !ExprUseYSub(p) ); +#ifdef SQLITE_DEBUG + if( ExprHasProperty(p, EP_Leaf) && !ExprHasProperty(p, EP_TokenOnly) ){ + assert( p->pLeft==0 ); + assert( p->pRight==0 ); + assert( !ExprUseXSelect(p) || p->x.pSelect==0 ); + assert( !ExprUseXList(p) || p->x.pList==0 ); + } +#endif + if( !ExprHasProperty(p, (EP_TokenOnly|EP_Leaf)) ){ + /* The Expr.x union is never used at the same time as Expr.pRight */ + assert( (ExprUseXList(p) && p->x.pList==0) || p->pRight==0 ); + if( p->pLeft && p->op!=TK_SELECT_COLUMN ) sqlite3ExprDeleteNN(db, p->pLeft); + if( p->pRight ){ + assert( !ExprHasProperty(p, EP_WinFunc) ); + sqlite3ExprDeleteNN(db, p->pRight); + }else if( ExprUseXSelect(p) ){ + assert( !ExprHasProperty(p, EP_WinFunc) ); + sqlite3SelectDelete(db, p->x.pSelect); + }else{ + sqlite3ExprListDelete(db, p->x.pList); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(p, EP_WinFunc) ){ + sqlite3WindowDelete(db, p->y.pWin); + } +#endif + } + } + if( ExprHasProperty(p, EP_MemToken) ){ + assert( !ExprHasProperty(p, EP_IntValue) ); + sqlite3DbFree(db, p->u.zToken); + } + if( !ExprHasProperty(p, EP_Static) ){ + sqlite3DbFreeNN(db, p); + } +} +SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3 *db, Expr *p){ + if( p ) sqlite3ExprDeleteNN(db, p); +} + + +/* +** Arrange to cause pExpr to be deleted when the pParse is deleted. +** This is similar to sqlite3ExprDelete() except that the delete is +** deferred untilthe pParse is deleted. +** +** The pExpr might be deleted immediately on an OOM error. +** +** The deferred delete is (currently) implemented by adding the +** pExpr to the pParse->pConstExpr list with a register number of 0. +*/ +SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse *pParse, Expr *pExpr){ + pParse->pConstExpr = + sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr); +} + +/* Invoke sqlite3RenameExprUnmap() and sqlite3ExprDelete() on the +** expression. +*/ +SQLITE_PRIVATE void sqlite3ExprUnmapAndDelete(Parse *pParse, Expr *p){ + if( p ){ + if( IN_RENAME_OBJECT ){ + sqlite3RenameExprUnmap(pParse, p); + } + sqlite3ExprDeleteNN(pParse->db, p); + } +} + +/* +** Return the number of bytes allocated for the expression structure +** passed as the first argument. This is always one of EXPR_FULLSIZE, +** EXPR_REDUCEDSIZE or EXPR_TOKENONLYSIZE. +*/ +static int exprStructSize(const Expr *p){ + if( ExprHasProperty(p, EP_TokenOnly) ) return EXPR_TOKENONLYSIZE; + if( ExprHasProperty(p, EP_Reduced) ) return EXPR_REDUCEDSIZE; + return EXPR_FULLSIZE; +} + +/* +** The dupedExpr*Size() routines each return the number of bytes required +** to store a copy of an expression or expression tree. They differ in +** how much of the tree is measured. +** +** dupedExprStructSize() Size of only the Expr structure +** dupedExprNodeSize() Size of Expr + space for token +** dupedExprSize() Expr + token + subtree components +** +*************************************************************************** +** +** The dupedExprStructSize() function returns two values OR-ed together: +** (1) the space required for a copy of the Expr structure only and +** (2) the EP_xxx flags that indicate what the structure size should be. +** The return values is always one of: +** +** EXPR_FULLSIZE +** EXPR_REDUCEDSIZE | EP_Reduced +** EXPR_TOKENONLYSIZE | EP_TokenOnly +** +** The size of the structure can be found by masking the return value +** of this routine with 0xfff. The flags can be found by masking the +** return value with EP_Reduced|EP_TokenOnly. +** +** Note that with flags==EXPRDUP_REDUCE, this routines works on full-size +** (unreduced) Expr objects as they or originally constructed by the parser. +** During expression analysis, extra information is computed and moved into +** later parts of the Expr object and that extra information might get chopped +** off if the expression is reduced. Note also that it does not work to +** make an EXPRDUP_REDUCE copy of a reduced expression. It is only legal +** to reduce a pristine expression tree from the parser. The implementation +** of dupedExprStructSize() contain multiple assert() statements that attempt +** to enforce this constraint. +*/ +static int dupedExprStructSize(const Expr *p, int flags){ + int nSize; + assert( flags==EXPRDUP_REDUCE || flags==0 ); /* Only one flag value allowed */ + assert( EXPR_FULLSIZE<=0xfff ); + assert( (0xfff & (EP_Reduced|EP_TokenOnly))==0 ); + if( 0==flags || p->op==TK_SELECT_COLUMN +#ifndef SQLITE_OMIT_WINDOWFUNC + || ExprHasProperty(p, EP_WinFunc) +#endif + ){ + nSize = EXPR_FULLSIZE; + }else{ + assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) ); + assert( !ExprHasProperty(p, EP_FromJoin) ); + assert( !ExprHasProperty(p, EP_MemToken) ); + assert( !ExprHasVVAProperty(p, EP_NoReduce) ); + if( p->pLeft || p->x.pList ){ + nSize = EXPR_REDUCEDSIZE | EP_Reduced; + }else{ + assert( p->pRight==0 ); + nSize = EXPR_TOKENONLYSIZE | EP_TokenOnly; + } + } + return nSize; +} + +/* +** This function returns the space in bytes required to store the copy +** of the Expr structure and a copy of the Expr.u.zToken string (if that +** string is defined.) +*/ +static int dupedExprNodeSize(const Expr *p, int flags){ + int nByte = dupedExprStructSize(p, flags) & 0xfff; + if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){ + nByte += sqlite3Strlen30NN(p->u.zToken)+1; + } + return ROUND8(nByte); +} + +/* +** Return the number of bytes required to create a duplicate of the +** expression passed as the first argument. The second argument is a +** mask containing EXPRDUP_XXX flags. +** +** The value returned includes space to create a copy of the Expr struct +** itself and the buffer referred to by Expr.u.zToken, if any. +** +** If the EXPRDUP_REDUCE flag is set, then the return value includes +** space to duplicate all Expr nodes in the tree formed by Expr.pLeft +** and Expr.pRight variables (but not for any structures pointed to or +** descended from the Expr.x.pList or Expr.x.pSelect variables). +*/ +static int dupedExprSize(const Expr *p, int flags){ + int nByte = 0; + if( p ){ + nByte = dupedExprNodeSize(p, flags); + if( flags&EXPRDUP_REDUCE ){ + nByte += dupedExprSize(p->pLeft, flags) + dupedExprSize(p->pRight, flags); + } + } + return nByte; +} + +/* +** This function is similar to sqlite3ExprDup(), except that if pzBuffer +** is not NULL then *pzBuffer is assumed to point to a buffer large enough +** to store the copy of expression p, the copies of p->u.zToken +** (if applicable), and the copies of the p->pLeft and p->pRight expressions, +** if any. Before returning, *pzBuffer is set to the first byte past the +** portion of the buffer copied into by this function. +*/ +static Expr *exprDup(sqlite3 *db, const Expr *p, int dupFlags, u8 **pzBuffer){ + Expr *pNew; /* Value to return */ + u8 *zAlloc; /* Memory space from which to build Expr object */ + u32 staticFlag; /* EP_Static if space not obtained from malloc */ + + assert( db!=0 ); + assert( p ); + assert( dupFlags==0 || dupFlags==EXPRDUP_REDUCE ); + assert( pzBuffer==0 || dupFlags==EXPRDUP_REDUCE ); + + /* Figure out where to write the new Expr structure. */ + if( pzBuffer ){ + zAlloc = *pzBuffer; + staticFlag = EP_Static; + assert( zAlloc!=0 ); + }else{ + zAlloc = sqlite3DbMallocRawNN(db, dupedExprSize(p, dupFlags)); + staticFlag = 0; + } + pNew = (Expr *)zAlloc; + + if( pNew ){ + /* Set nNewSize to the size allocated for the structure pointed to + ** by pNew. This is either EXPR_FULLSIZE, EXPR_REDUCEDSIZE or + ** EXPR_TOKENONLYSIZE. nToken is set to the number of bytes consumed + ** by the copy of the p->u.zToken string (if any). + */ + const unsigned nStructSize = dupedExprStructSize(p, dupFlags); + const int nNewSize = nStructSize & 0xfff; + int nToken; + if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){ + nToken = sqlite3Strlen30(p->u.zToken) + 1; + }else{ + nToken = 0; + } + if( dupFlags ){ + assert( ExprHasProperty(p, EP_Reduced)==0 ); + memcpy(zAlloc, p, nNewSize); + }else{ + u32 nSize = (u32)exprStructSize(p); + memcpy(zAlloc, p, nSize); + if( nSizeflags &= ~(EP_Reduced|EP_TokenOnly|EP_Static|EP_MemToken); + pNew->flags |= nStructSize & (EP_Reduced|EP_TokenOnly); + pNew->flags |= staticFlag; + ExprClearVVAProperties(pNew); + if( dupFlags ){ + ExprSetVVAProperty(pNew, EP_Immutable); + } + + /* Copy the p->u.zToken string, if any. */ + if( nToken ){ + char *zToken = pNew->u.zToken = (char*)&zAlloc[nNewSize]; + memcpy(zToken, p->u.zToken, nToken); + } + + if( 0==((p->flags|pNew->flags) & (EP_TokenOnly|EP_Leaf)) ){ + /* Fill in the pNew->x.pSelect or pNew->x.pList member. */ + if( ExprUseXSelect(p) ){ + pNew->x.pSelect = sqlite3SelectDup(db, p->x.pSelect, dupFlags); + }else{ + pNew->x.pList = sqlite3ExprListDup(db, p->x.pList, dupFlags); + } + } + + /* Fill in pNew->pLeft and pNew->pRight. */ + if( ExprHasProperty(pNew, EP_Reduced|EP_TokenOnly|EP_WinFunc) ){ + zAlloc += dupedExprNodeSize(p, dupFlags); + if( !ExprHasProperty(pNew, EP_TokenOnly|EP_Leaf) ){ + pNew->pLeft = p->pLeft ? + exprDup(db, p->pLeft, EXPRDUP_REDUCE, &zAlloc) : 0; + pNew->pRight = p->pRight ? + exprDup(db, p->pRight, EXPRDUP_REDUCE, &zAlloc) : 0; + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(p, EP_WinFunc) ){ + pNew->y.pWin = sqlite3WindowDup(db, pNew, p->y.pWin); + assert( ExprHasProperty(pNew, EP_WinFunc) ); + } +#endif /* SQLITE_OMIT_WINDOWFUNC */ + if( pzBuffer ){ + *pzBuffer = zAlloc; + } + }else{ + if( !ExprHasProperty(p, EP_TokenOnly|EP_Leaf) ){ + if( pNew->op==TK_SELECT_COLUMN ){ + pNew->pLeft = p->pLeft; + assert( p->pRight==0 || p->pRight==p->pLeft + || ExprHasProperty(p->pLeft, EP_Subquery) ); + }else{ + pNew->pLeft = sqlite3ExprDup(db, p->pLeft, 0); + } + pNew->pRight = sqlite3ExprDup(db, p->pRight, 0); + } + } + } + return pNew; +} + +/* +** Create and return a deep copy of the object passed as the second +** argument. If an OOM condition is encountered, NULL is returned +** and the db->mallocFailed flag set. +*/ +#ifndef SQLITE_OMIT_CTE +SQLITE_PRIVATE With *sqlite3WithDup(sqlite3 *db, With *p){ + With *pRet = 0; + if( p ){ + sqlite3_int64 nByte = sizeof(*p) + sizeof(p->a[0]) * (p->nCte-1); + pRet = sqlite3DbMallocZero(db, nByte); + if( pRet ){ + int i; + pRet->nCte = p->nCte; + for(i=0; inCte; i++){ + pRet->a[i].pSelect = sqlite3SelectDup(db, p->a[i].pSelect, 0); + pRet->a[i].pCols = sqlite3ExprListDup(db, p->a[i].pCols, 0); + pRet->a[i].zName = sqlite3DbStrDup(db, p->a[i].zName); + } + } + } + return pRet; +} +#else +# define sqlite3WithDup(x,y) 0 +#endif + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** The gatherSelectWindows() procedure and its helper routine +** gatherSelectWindowsCallback() are used to scan all the expressions +** an a newly duplicated SELECT statement and gather all of the Window +** objects found there, assembling them onto the linked list at Select->pWin. +*/ +static int gatherSelectWindowsCallback(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_FUNCTION && ExprHasProperty(pExpr, EP_WinFunc) ){ + Select *pSelect = pWalker->u.pSelect; + Window *pWin = pExpr->y.pWin; + assert( pWin ); + assert( IsWindowFunc(pExpr) ); + assert( pWin->ppThis==0 ); + sqlite3WindowLink(pSelect, pWin); + } + return WRC_Continue; +} +static int gatherSelectWindowsSelectCallback(Walker *pWalker, Select *p){ + return p==pWalker->u.pSelect ? WRC_Continue : WRC_Prune; +} +static void gatherSelectWindows(Select *p){ + Walker w; + w.xExprCallback = gatherSelectWindowsCallback; + w.xSelectCallback = gatherSelectWindowsSelectCallback; + w.xSelectCallback2 = 0; + w.pParse = 0; + w.u.pSelect = p; + sqlite3WalkSelect(&w, p); +} +#endif + + +/* +** The following group of routines make deep copies of expressions, +** expression lists, ID lists, and select statements. The copies can +** be deleted (by being passed to their respective ...Delete() routines) +** without effecting the originals. +** +** The expression list, ID, and source lists return by sqlite3ExprListDup(), +** sqlite3IdListDup(), and sqlite3SrcListDup() can not be further expanded +** by subsequent calls to sqlite*ListAppend() routines. +** +** Any tables that the SrcList might point to are not duplicated. +** +** The flags parameter contains a combination of the EXPRDUP_XXX flags. +** If the EXPRDUP_REDUCE flag is set, then the structure returned is a +** truncated version of the usual Expr structure that will be stored as +** part of the in-memory representation of the database schema. +*/ +SQLITE_PRIVATE Expr *sqlite3ExprDup(sqlite3 *db, const Expr *p, int flags){ + assert( flags==0 || flags==EXPRDUP_REDUCE ); + return p ? exprDup(db, p, flags, 0) : 0; +} +SQLITE_PRIVATE ExprList *sqlite3ExprListDup(sqlite3 *db, const ExprList *p, int flags){ + ExprList *pNew; + struct ExprList_item *pItem; + const struct ExprList_item *pOldItem; + int i; + Expr *pPriorSelectColOld = 0; + Expr *pPriorSelectColNew = 0; + assert( db!=0 ); + if( p==0 ) return 0; + pNew = sqlite3DbMallocRawNN(db, sqlite3DbMallocSize(db, p)); + if( pNew==0 ) return 0; + pNew->nExpr = p->nExpr; + pNew->nAlloc = p->nAlloc; + pItem = pNew->a; + pOldItem = p->a; + for(i=0; inExpr; i++, pItem++, pOldItem++){ + Expr *pOldExpr = pOldItem->pExpr; + Expr *pNewExpr; + pItem->pExpr = sqlite3ExprDup(db, pOldExpr, flags); + if( pOldExpr + && pOldExpr->op==TK_SELECT_COLUMN + && (pNewExpr = pItem->pExpr)!=0 + ){ + if( pNewExpr->pRight ){ + pPriorSelectColOld = pOldExpr->pRight; + pPriorSelectColNew = pNewExpr->pRight; + pNewExpr->pLeft = pNewExpr->pRight; + }else{ + if( pOldExpr->pLeft!=pPriorSelectColOld ){ + pPriorSelectColOld = pOldExpr->pLeft; + pPriorSelectColNew = sqlite3ExprDup(db, pPriorSelectColOld, flags); + pNewExpr->pRight = pPriorSelectColNew; + } + pNewExpr->pLeft = pPriorSelectColNew; + } + } + pItem->zEName = sqlite3DbStrDup(db, pOldItem->zEName); + pItem->sortFlags = pOldItem->sortFlags; + pItem->eEName = pOldItem->eEName; + pItem->done = 0; + pItem->bNulls = pOldItem->bNulls; + pItem->bSorterRef = pOldItem->bSorterRef; + pItem->u = pOldItem->u; + } + return pNew; +} + +/* +** If cursors, triggers, views and subqueries are all omitted from +** the build, then none of the following routines, except for +** sqlite3SelectDup(), can be called. sqlite3SelectDup() is sometimes +** called with a NULL argument. +*/ +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_TRIGGER) \ + || !defined(SQLITE_OMIT_SUBQUERY) +SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3 *db, const SrcList *p, int flags){ + SrcList *pNew; + int i; + int nByte; + assert( db!=0 ); + if( p==0 ) return 0; + nByte = sizeof(*p) + (p->nSrc>0 ? sizeof(p->a[0]) * (p->nSrc-1) : 0); + pNew = sqlite3DbMallocRawNN(db, nByte ); + if( pNew==0 ) return 0; + pNew->nSrc = pNew->nAlloc = p->nSrc; + for(i=0; inSrc; i++){ + SrcItem *pNewItem = &pNew->a[i]; + const SrcItem *pOldItem = &p->a[i]; + Table *pTab; + pNewItem->pSchema = pOldItem->pSchema; + pNewItem->zDatabase = sqlite3DbStrDup(db, pOldItem->zDatabase); + pNewItem->zName = sqlite3DbStrDup(db, pOldItem->zName); + pNewItem->zAlias = sqlite3DbStrDup(db, pOldItem->zAlias); + pNewItem->fg = pOldItem->fg; + pNewItem->iCursor = pOldItem->iCursor; + pNewItem->addrFillSub = pOldItem->addrFillSub; + pNewItem->regReturn = pOldItem->regReturn; + if( pNewItem->fg.isIndexedBy ){ + pNewItem->u1.zIndexedBy = sqlite3DbStrDup(db, pOldItem->u1.zIndexedBy); + } + pNewItem->u2 = pOldItem->u2; + if( pNewItem->fg.isCte ){ + pNewItem->u2.pCteUse->nUse++; + } + if( pNewItem->fg.isTabFunc ){ + pNewItem->u1.pFuncArg = + sqlite3ExprListDup(db, pOldItem->u1.pFuncArg, flags); + } + pTab = pNewItem->pTab = pOldItem->pTab; + if( pTab ){ + pTab->nTabRef++; + } + pNewItem->pSelect = sqlite3SelectDup(db, pOldItem->pSelect, flags); + pNewItem->pOn = sqlite3ExprDup(db, pOldItem->pOn, flags); + pNewItem->pUsing = sqlite3IdListDup(db, pOldItem->pUsing); + pNewItem->colUsed = pOldItem->colUsed; + } + return pNew; +} +SQLITE_PRIVATE IdList *sqlite3IdListDup(sqlite3 *db, const IdList *p){ + IdList *pNew; + int i; + assert( db!=0 ); + if( p==0 ) return 0; + pNew = sqlite3DbMallocRawNN(db, sizeof(*pNew) ); + if( pNew==0 ) return 0; + pNew->nId = p->nId; + pNew->a = sqlite3DbMallocRawNN(db, p->nId*sizeof(p->a[0]) ); + if( pNew->a==0 ){ + sqlite3DbFreeNN(db, pNew); + return 0; + } + /* Note that because the size of the allocation for p->a[] is not + ** necessarily a power of two, sqlite3IdListAppend() may not be called + ** on the duplicate created by this function. */ + for(i=0; inId; i++){ + struct IdList_item *pNewItem = &pNew->a[i]; + struct IdList_item *pOldItem = &p->a[i]; + pNewItem->zName = sqlite3DbStrDup(db, pOldItem->zName); + pNewItem->idx = pOldItem->idx; + } + return pNew; +} +SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3 *db, const Select *pDup, int flags){ + Select *pRet = 0; + Select *pNext = 0; + Select **pp = &pRet; + const Select *p; + + assert( db!=0 ); + for(p=pDup; p; p=p->pPrior){ + Select *pNew = sqlite3DbMallocRawNN(db, sizeof(*p) ); + if( pNew==0 ) break; + pNew->pEList = sqlite3ExprListDup(db, p->pEList, flags); + pNew->pSrc = sqlite3SrcListDup(db, p->pSrc, flags); + pNew->pWhere = sqlite3ExprDup(db, p->pWhere, flags); + pNew->pGroupBy = sqlite3ExprListDup(db, p->pGroupBy, flags); + pNew->pHaving = sqlite3ExprDup(db, p->pHaving, flags); + pNew->pOrderBy = sqlite3ExprListDup(db, p->pOrderBy, flags); + pNew->op = p->op; + pNew->pNext = pNext; + pNew->pPrior = 0; + pNew->pLimit = sqlite3ExprDup(db, p->pLimit, flags); + pNew->iLimit = 0; + pNew->iOffset = 0; + pNew->selFlags = p->selFlags & ~SF_UsesEphemeral; + pNew->addrOpenEphm[0] = -1; + pNew->addrOpenEphm[1] = -1; + pNew->nSelectRow = p->nSelectRow; + pNew->pWith = sqlite3WithDup(db, p->pWith); +#ifndef SQLITE_OMIT_WINDOWFUNC + pNew->pWin = 0; + pNew->pWinDefn = sqlite3WindowListDup(db, p->pWinDefn); + if( p->pWin && db->mallocFailed==0 ) gatherSelectWindows(pNew); +#endif + pNew->selId = p->selId; + if( db->mallocFailed ){ + /* Any prior OOM might have left the Select object incomplete. + ** Delete the whole thing rather than allow an incomplete Select + ** to be used by the code generator. */ + pNew->pNext = 0; + sqlite3SelectDelete(db, pNew); + break; + } + *pp = pNew; + pp = &pNew->pPrior; + pNext = pNew; + } + + return pRet; +} +#else +SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3 *db, const Select *p, int flags){ + assert( p==0 ); + return 0; +} +#endif + + +/* +** Add a new element to the end of an expression list. If pList is +** initially NULL, then create a new expression list. +** +** The pList argument must be either NULL or a pointer to an ExprList +** obtained from a prior call to sqlite3ExprListAppend(). This routine +** may not be used with an ExprList obtained from sqlite3ExprListDup(). +** Reason: This routine assumes that the number of slots in pList->a[] +** is a power of two. That is true for sqlite3ExprListAppend() returns +** but is not necessarily true from the return value of sqlite3ExprListDup(). +** +** If a memory allocation error occurs, the entire list is freed and +** NULL is returned. If non-NULL is returned, then it is guaranteed +** that the new entry was successfully appended. +*/ +static const struct ExprList_item zeroItem = {0}; +SQLITE_PRIVATE SQLITE_NOINLINE ExprList *sqlite3ExprListAppendNew( + sqlite3 *db, /* Database handle. Used for memory allocation */ + Expr *pExpr /* Expression to be appended. Might be NULL */ +){ + struct ExprList_item *pItem; + ExprList *pList; + + pList = sqlite3DbMallocRawNN(db, sizeof(ExprList)+sizeof(pList->a[0])*4 ); + if( pList==0 ){ + sqlite3ExprDelete(db, pExpr); + return 0; + } + pList->nAlloc = 4; + pList->nExpr = 1; + pItem = &pList->a[0]; + *pItem = zeroItem; + pItem->pExpr = pExpr; + return pList; +} +SQLITE_PRIVATE SQLITE_NOINLINE ExprList *sqlite3ExprListAppendGrow( + sqlite3 *db, /* Database handle. Used for memory allocation */ + ExprList *pList, /* List to which to append. Might be NULL */ + Expr *pExpr /* Expression to be appended. Might be NULL */ +){ + struct ExprList_item *pItem; + ExprList *pNew; + pList->nAlloc *= 2; + pNew = sqlite3DbRealloc(db, pList, + sizeof(*pList)+(pList->nAlloc-1)*sizeof(pList->a[0])); + if( pNew==0 ){ + sqlite3ExprListDelete(db, pList); + sqlite3ExprDelete(db, pExpr); + return 0; + }else{ + pList = pNew; + } + pItem = &pList->a[pList->nExpr++]; + *pItem = zeroItem; + pItem->pExpr = pExpr; + return pList; +} +SQLITE_PRIVATE ExprList *sqlite3ExprListAppend( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* List to which to append. Might be NULL */ + Expr *pExpr /* Expression to be appended. Might be NULL */ +){ + struct ExprList_item *pItem; + if( pList==0 ){ + return sqlite3ExprListAppendNew(pParse->db,pExpr); + } + if( pList->nAllocnExpr+1 ){ + return sqlite3ExprListAppendGrow(pParse->db,pList,pExpr); + } + pItem = &pList->a[pList->nExpr++]; + *pItem = zeroItem; + pItem->pExpr = pExpr; + return pList; +} + +/* +** pColumns and pExpr form a vector assignment which is part of the SET +** clause of an UPDATE statement. Like this: +** +** (a,b,c) = (expr1,expr2,expr3) +** Or: (a,b,c) = (SELECT x,y,z FROM ....) +** +** For each term of the vector assignment, append new entries to the +** expression list pList. In the case of a subquery on the RHS, append +** TK_SELECT_COLUMN expressions. +*/ +SQLITE_PRIVATE ExprList *sqlite3ExprListAppendVector( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* List to which to append. Might be NULL */ + IdList *pColumns, /* List of names of LHS of the assignment */ + Expr *pExpr /* Vector expression to be appended. Might be NULL */ +){ + sqlite3 *db = pParse->db; + int n; + int i; + int iFirst = pList ? pList->nExpr : 0; + /* pColumns can only be NULL due to an OOM but an OOM will cause an + ** exit prior to this routine being invoked */ + if( NEVER(pColumns==0) ) goto vector_append_error; + if( pExpr==0 ) goto vector_append_error; + + /* If the RHS is a vector, then we can immediately check to see that + ** the size of the RHS and LHS match. But if the RHS is a SELECT, + ** wildcards ("*") in the result set of the SELECT must be expanded before + ** we can do the size check, so defer the size check until code generation. + */ + if( pExpr->op!=TK_SELECT && pColumns->nId!=(n=sqlite3ExprVectorSize(pExpr)) ){ + sqlite3ErrorMsg(pParse, "%d columns assigned %d values", + pColumns->nId, n); + goto vector_append_error; + } + + for(i=0; inId; i++){ + Expr *pSubExpr = sqlite3ExprForVectorField(pParse, pExpr, i, pColumns->nId); + assert( pSubExpr!=0 || db->mallocFailed ); + if( pSubExpr==0 ) continue; + pList = sqlite3ExprListAppend(pParse, pList, pSubExpr); + if( pList ){ + assert( pList->nExpr==iFirst+i+1 ); + pList->a[pList->nExpr-1].zEName = pColumns->a[i].zName; + pColumns->a[i].zName = 0; + } + } + + if( !db->mallocFailed && pExpr->op==TK_SELECT && ALWAYS(pList!=0) ){ + Expr *pFirst = pList->a[iFirst].pExpr; + assert( pFirst!=0 ); + assert( pFirst->op==TK_SELECT_COLUMN ); + + /* Store the SELECT statement in pRight so it will be deleted when + ** sqlite3ExprListDelete() is called */ + pFirst->pRight = pExpr; + pExpr = 0; + + /* Remember the size of the LHS in iTable so that we can check that + ** the RHS and LHS sizes match during code generation. */ + pFirst->iTable = pColumns->nId; + } + +vector_append_error: + sqlite3ExprUnmapAndDelete(pParse, pExpr); + sqlite3IdListDelete(db, pColumns); + return pList; +} + +/* +** Set the sort order for the last element on the given ExprList. +*/ +SQLITE_PRIVATE void sqlite3ExprListSetSortOrder(ExprList *p, int iSortOrder, int eNulls){ + struct ExprList_item *pItem; + if( p==0 ) return; + assert( p->nExpr>0 ); + + assert( SQLITE_SO_UNDEFINED<0 && SQLITE_SO_ASC==0 && SQLITE_SO_DESC>0 ); + assert( iSortOrder==SQLITE_SO_UNDEFINED + || iSortOrder==SQLITE_SO_ASC + || iSortOrder==SQLITE_SO_DESC + ); + assert( eNulls==SQLITE_SO_UNDEFINED + || eNulls==SQLITE_SO_ASC + || eNulls==SQLITE_SO_DESC + ); + + pItem = &p->a[p->nExpr-1]; + assert( pItem->bNulls==0 ); + if( iSortOrder==SQLITE_SO_UNDEFINED ){ + iSortOrder = SQLITE_SO_ASC; + } + pItem->sortFlags = (u8)iSortOrder; + + if( eNulls!=SQLITE_SO_UNDEFINED ){ + pItem->bNulls = 1; + if( iSortOrder!=eNulls ){ + pItem->sortFlags |= KEYINFO_ORDER_BIGNULL; + } + } +} + +/* +** Set the ExprList.a[].zEName element of the most recently added item +** on the expression list. +** +** pList might be NULL following an OOM error. But pName should never be +** NULL. If a memory allocation fails, the pParse->db->mallocFailed flag +** is set. +*/ +SQLITE_PRIVATE void sqlite3ExprListSetName( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* List to which to add the span. */ + const Token *pName, /* Name to be added */ + int dequote /* True to cause the name to be dequoted */ +){ + assert( pList!=0 || pParse->db->mallocFailed!=0 ); + assert( pParse->eParseMode!=PARSE_MODE_UNMAP || dequote==0 ); + if( pList ){ + struct ExprList_item *pItem; + assert( pList->nExpr>0 ); + pItem = &pList->a[pList->nExpr-1]; + assert( pItem->zEName==0 ); + assert( pItem->eEName==ENAME_NAME ); + pItem->zEName = sqlite3DbStrNDup(pParse->db, pName->z, pName->n); + if( dequote ){ + /* If dequote==0, then pName->z does not point to part of a DDL + ** statement handled by the parser. And so no token need be added + ** to the token-map. */ + sqlite3Dequote(pItem->zEName); + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenMap(pParse, (const void*)pItem->zEName, pName); + } + } + } +} + +/* +** Set the ExprList.a[].zSpan element of the most recently added item +** on the expression list. +** +** pList might be NULL following an OOM error. But pSpan should never be +** NULL. If a memory allocation fails, the pParse->db->mallocFailed flag +** is set. +*/ +SQLITE_PRIVATE void sqlite3ExprListSetSpan( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* List to which to add the span. */ + const char *zStart, /* Start of the span */ + const char *zEnd /* End of the span */ +){ + sqlite3 *db = pParse->db; + assert( pList!=0 || db->mallocFailed!=0 ); + if( pList ){ + struct ExprList_item *pItem = &pList->a[pList->nExpr-1]; + assert( pList->nExpr>0 ); + if( pItem->zEName==0 ){ + pItem->zEName = sqlite3DbSpanDup(db, zStart, zEnd); + pItem->eEName = ENAME_SPAN; + } + } +} + +/* +** If the expression list pEList contains more than iLimit elements, +** leave an error message in pParse. +*/ +SQLITE_PRIVATE void sqlite3ExprListCheckLength( + Parse *pParse, + ExprList *pEList, + const char *zObject +){ + int mx = pParse->db->aLimit[SQLITE_LIMIT_COLUMN]; + testcase( pEList && pEList->nExpr==mx ); + testcase( pEList && pEList->nExpr==mx+1 ); + if( pEList && pEList->nExpr>mx ){ + sqlite3ErrorMsg(pParse, "too many columns in %s", zObject); + } +} + +/* +** Delete an entire expression list. +*/ +static SQLITE_NOINLINE void exprListDeleteNN(sqlite3 *db, ExprList *pList){ + int i = pList->nExpr; + struct ExprList_item *pItem = pList->a; + assert( pList->nExpr>0 ); + do{ + sqlite3ExprDelete(db, pItem->pExpr); + sqlite3DbFree(db, pItem->zEName); + pItem++; + }while( --i>0 ); + sqlite3DbFreeNN(db, pList); +} +SQLITE_PRIVATE void sqlite3ExprListDelete(sqlite3 *db, ExprList *pList){ + if( pList ) exprListDeleteNN(db, pList); +} + +/* +** Return the bitwise-OR of all Expr.flags fields in the given +** ExprList. +*/ +SQLITE_PRIVATE u32 sqlite3ExprListFlags(const ExprList *pList){ + int i; + u32 m = 0; + assert( pList!=0 ); + for(i=0; inExpr; i++){ + Expr *pExpr = pList->a[i].pExpr; + assert( pExpr!=0 ); + m |= pExpr->flags; + } + return m; +} + +/* +** This is a SELECT-node callback for the expression walker that +** always "fails". By "fail" in this case, we mean set +** pWalker->eCode to zero and abort. +** +** This callback is used by multiple expression walkers. +*/ +SQLITE_PRIVATE int sqlite3SelectWalkFail(Walker *pWalker, Select *NotUsed){ + UNUSED_PARAMETER(NotUsed); + pWalker->eCode = 0; + return WRC_Abort; +} + +/* +** Check the input string to see if it is "true" or "false" (in any case). +** +** If the string is.... Return +** "true" EP_IsTrue +** "false" EP_IsFalse +** anything else 0 +*/ +SQLITE_PRIVATE u32 sqlite3IsTrueOrFalse(const char *zIn){ + if( sqlite3StrICmp(zIn, "true")==0 ) return EP_IsTrue; + if( sqlite3StrICmp(zIn, "false")==0 ) return EP_IsFalse; + return 0; +} + + +/* +** If the input expression is an ID with the name "true" or "false" +** then convert it into an TK_TRUEFALSE term. Return non-zero if +** the conversion happened, and zero if the expression is unaltered. +*/ +SQLITE_PRIVATE int sqlite3ExprIdToTrueFalse(Expr *pExpr){ + u32 v; + assert( pExpr->op==TK_ID || pExpr->op==TK_STRING ); + if( !ExprHasProperty(pExpr, EP_Quoted|EP_IntValue) + && (v = sqlite3IsTrueOrFalse(pExpr->u.zToken))!=0 + ){ + pExpr->op = TK_TRUEFALSE; + ExprSetProperty(pExpr, v); + return 1; + } + return 0; +} + +/* +** The argument must be a TK_TRUEFALSE Expr node. Return 1 if it is TRUE +** and 0 if it is FALSE. +*/ +SQLITE_PRIVATE int sqlite3ExprTruthValue(const Expr *pExpr){ + pExpr = sqlite3ExprSkipCollate((Expr*)pExpr); + assert( pExpr->op==TK_TRUEFALSE ); + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + assert( sqlite3StrICmp(pExpr->u.zToken,"true")==0 + || sqlite3StrICmp(pExpr->u.zToken,"false")==0 ); + return pExpr->u.zToken[4]==0; +} + +/* +** If pExpr is an AND or OR expression, try to simplify it by eliminating +** terms that are always true or false. Return the simplified expression. +** Or return the original expression if no simplification is possible. +** +** Examples: +** +** (x<10) AND true => (x<10) +** (x<10) AND false => false +** (x<10) AND (y=22 OR false) => (x<10) AND (y=22) +** (x<10) AND (y=22 OR true) => (x<10) +** (y=22) OR true => true +*/ +SQLITE_PRIVATE Expr *sqlite3ExprSimplifiedAndOr(Expr *pExpr){ + assert( pExpr!=0 ); + if( pExpr->op==TK_AND || pExpr->op==TK_OR ){ + Expr *pRight = sqlite3ExprSimplifiedAndOr(pExpr->pRight); + Expr *pLeft = sqlite3ExprSimplifiedAndOr(pExpr->pLeft); + if( ExprAlwaysTrue(pLeft) || ExprAlwaysFalse(pRight) ){ + pExpr = pExpr->op==TK_AND ? pRight : pLeft; + }else if( ExprAlwaysTrue(pRight) || ExprAlwaysFalse(pLeft) ){ + pExpr = pExpr->op==TK_AND ? pLeft : pRight; + } + } + return pExpr; +} + + +/* +** These routines are Walker callbacks used to check expressions to +** see if they are "constant" for some definition of constant. The +** Walker.eCode value determines the type of "constant" we are looking +** for. +** +** These callback routines are used to implement the following: +** +** sqlite3ExprIsConstant() pWalker->eCode==1 +** sqlite3ExprIsConstantNotJoin() pWalker->eCode==2 +** sqlite3ExprIsTableConstant() pWalker->eCode==3 +** sqlite3ExprIsConstantOrFunction() pWalker->eCode==4 or 5 +** +** In all cases, the callbacks set Walker.eCode=0 and abort if the expression +** is found to not be a constant. +** +** The sqlite3ExprIsConstantOrFunction() is used for evaluating DEFAULT +** expressions in a CREATE TABLE statement. The Walker.eCode value is 5 +** when parsing an existing schema out of the sqlite_schema table and 4 +** when processing a new CREATE TABLE statement. A bound parameter raises +** an error for new statements, but is silently converted +** to NULL for existing schemas. This allows sqlite_schema tables that +** contain a bound parameter because they were generated by older versions +** of SQLite to be parsed by newer versions of SQLite without raising a +** malformed schema error. +*/ +static int exprNodeIsConstant(Walker *pWalker, Expr *pExpr){ + + /* If pWalker->eCode is 2 then any term of the expression that comes from + ** the ON or USING clauses of a left join disqualifies the expression + ** from being considered constant. */ + if( pWalker->eCode==2 && ExprHasProperty(pExpr, EP_FromJoin) ){ + pWalker->eCode = 0; + return WRC_Abort; + } + + switch( pExpr->op ){ + /* Consider functions to be constant if all their arguments are constant + ** and either pWalker->eCode==4 or 5 or the function has the + ** SQLITE_FUNC_CONST flag. */ + case TK_FUNCTION: + if( (pWalker->eCode>=4 || ExprHasProperty(pExpr,EP_ConstFunc)) + && !ExprHasProperty(pExpr, EP_WinFunc) + ){ + if( pWalker->eCode==5 ) ExprSetProperty(pExpr, EP_FromDDL); + return WRC_Continue; + }else{ + pWalker->eCode = 0; + return WRC_Abort; + } + case TK_ID: + /* Convert "true" or "false" in a DEFAULT clause into the + ** appropriate TK_TRUEFALSE operator */ + if( sqlite3ExprIdToTrueFalse(pExpr) ){ + return WRC_Prune; + } + /* no break */ deliberate_fall_through + case TK_COLUMN: + case TK_AGG_FUNCTION: + case TK_AGG_COLUMN: + testcase( pExpr->op==TK_ID ); + testcase( pExpr->op==TK_COLUMN ); + testcase( pExpr->op==TK_AGG_FUNCTION ); + testcase( pExpr->op==TK_AGG_COLUMN ); + if( ExprHasProperty(pExpr, EP_FixedCol) && pWalker->eCode!=2 ){ + return WRC_Continue; + } + if( pWalker->eCode==3 && pExpr->iTable==pWalker->u.iCur ){ + return WRC_Continue; + } + /* no break */ deliberate_fall_through + case TK_IF_NULL_ROW: + case TK_REGISTER: + case TK_DOT: + testcase( pExpr->op==TK_REGISTER ); + testcase( pExpr->op==TK_IF_NULL_ROW ); + testcase( pExpr->op==TK_DOT ); + pWalker->eCode = 0; + return WRC_Abort; + case TK_VARIABLE: + if( pWalker->eCode==5 ){ + /* Silently convert bound parameters that appear inside of CREATE + ** statements into a NULL when parsing the CREATE statement text out + ** of the sqlite_schema table */ + pExpr->op = TK_NULL; + }else if( pWalker->eCode==4 ){ + /* A bound parameter in a CREATE statement that originates from + ** sqlite3_prepare() causes an error */ + pWalker->eCode = 0; + return WRC_Abort; + } + /* no break */ deliberate_fall_through + default: + testcase( pExpr->op==TK_SELECT ); /* sqlite3SelectWalkFail() disallows */ + testcase( pExpr->op==TK_EXISTS ); /* sqlite3SelectWalkFail() disallows */ + return WRC_Continue; + } +} +static int exprIsConst(Expr *p, int initFlag, int iCur){ + Walker w; + w.eCode = initFlag; + w.xExprCallback = exprNodeIsConstant; + w.xSelectCallback = sqlite3SelectWalkFail; +#ifdef SQLITE_DEBUG + w.xSelectCallback2 = sqlite3SelectWalkAssert2; +#endif + w.u.iCur = iCur; + sqlite3WalkExpr(&w, p); + return w.eCode; +} + +/* +** Walk an expression tree. Return non-zero if the expression is constant +** and 0 if it involves variables or function calls. +** +** For the purposes of this function, a double-quoted string (ex: "abc") +** is considered a variable but a single-quoted string (ex: 'abc') is +** a constant. +*/ +SQLITE_PRIVATE int sqlite3ExprIsConstant(Expr *p){ + return exprIsConst(p, 1, 0); +} + +/* +** Walk an expression tree. Return non-zero if +** +** (1) the expression is constant, and +** (2) the expression does originate in the ON or USING clause +** of a LEFT JOIN, and +** (3) the expression does not contain any EP_FixedCol TK_COLUMN +** operands created by the constant propagation optimization. +** +** When this routine returns true, it indicates that the expression +** can be added to the pParse->pConstExpr list and evaluated once when +** the prepared statement starts up. See sqlite3ExprCodeRunJustOnce(). +*/ +SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr *p){ + return exprIsConst(p, 2, 0); +} + +/* +** Walk an expression tree. Return non-zero if the expression is constant +** for any single row of the table with cursor iCur. In other words, the +** expression must not refer to any non-deterministic function nor any +** table other than iCur. +*/ +SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr *p, int iCur){ + return exprIsConst(p, 3, iCur); +} + +/* +** Check pExpr to see if it is an invariant constraint on data source pSrc. +** This is an optimization. False negatives will perhaps cause slower +** queries, but false positives will yield incorrect answers. So when in +** double, return 0. +** +** To be an invariant constraint, the following must be true: +** +** (1) pExpr cannot refer to any table other than pSrc->iCursor. +** +** (2) pExpr cannot use subqueries or non-deterministic functions. +** +** (*) ** Not applicable to this branch ** +** +** (4) If pSrc is the right operand of a LEFT JOIN, then... +** (4a) pExpr must come from an ON clause.. +** (4b) and specifically the ON clause associated with the LEFT JOIN. +** +** (5) If pSrc is not the right operand of a LEFT JOIN or the left +** operand of a RIGHT JOIN, then pExpr must be from the WHERE +** clause, not an ON clause. +*/ +SQLITE_PRIVATE int sqlite3ExprIsTableConstraint(Expr *pExpr, const SrcItem *pSrc){ + if( pSrc->fg.jointype & JT_LEFT ){ + if( !ExprHasProperty(pExpr, EP_FromJoin) ) return 0; /* rule (4a) */ + if( pExpr->w.iRightJoinTable!=pSrc->iCursor ) return 0; /* rule (4b) */ + }else{ + if( ExprHasProperty(pExpr, EP_FromJoin) ) return 0; /* rule (5) */ + } + return sqlite3ExprIsTableConstant(pExpr, pSrc->iCursor); /* rules (1), (2) */ +} + + +/* +** sqlite3WalkExpr() callback used by sqlite3ExprIsConstantOrGroupBy(). +*/ +static int exprNodeIsConstantOrGroupBy(Walker *pWalker, Expr *pExpr){ + ExprList *pGroupBy = pWalker->u.pGroupBy; + int i; + + /* Check if pExpr is identical to any GROUP BY term. If so, consider + ** it constant. */ + for(i=0; inExpr; i++){ + Expr *p = pGroupBy->a[i].pExpr; + if( sqlite3ExprCompare(0, pExpr, p, -1)<2 ){ + CollSeq *pColl = sqlite3ExprNNCollSeq(pWalker->pParse, p); + if( sqlite3IsBinary(pColl) ){ + return WRC_Prune; + } + } + } + + /* Check if pExpr is a sub-select. If so, consider it variable. */ + if( ExprUseXSelect(pExpr) ){ + pWalker->eCode = 0; + return WRC_Abort; + } + + return exprNodeIsConstant(pWalker, pExpr); +} + +/* +** Walk the expression tree passed as the first argument. Return non-zero +** if the expression consists entirely of constants or copies of terms +** in pGroupBy that sort with the BINARY collation sequence. +** +** This routine is used to determine if a term of the HAVING clause can +** be promoted into the WHERE clause. In order for such a promotion to work, +** the value of the HAVING clause term must be the same for all members of +** a "group". The requirement that the GROUP BY term must be BINARY +** assumes that no other collating sequence will have a finer-grained +** grouping than binary. In other words (A=B COLLATE binary) implies +** A=B in every other collating sequence. The requirement that the +** GROUP BY be BINARY is stricter than necessary. It would also work +** to promote HAVING clauses that use the same alternative collating +** sequence as the GROUP BY term, but that is much harder to check, +** alternative collating sequences are uncommon, and this is only an +** optimization, so we take the easy way out and simply require the +** GROUP BY to use the BINARY collating sequence. +*/ +SQLITE_PRIVATE int sqlite3ExprIsConstantOrGroupBy(Parse *pParse, Expr *p, ExprList *pGroupBy){ + Walker w; + w.eCode = 1; + w.xExprCallback = exprNodeIsConstantOrGroupBy; + w.xSelectCallback = 0; + w.u.pGroupBy = pGroupBy; + w.pParse = pParse; + sqlite3WalkExpr(&w, p); + return w.eCode; +} + +/* +** Walk an expression tree for the DEFAULT field of a column definition +** in a CREATE TABLE statement. Return non-zero if the expression is +** acceptable for use as a DEFAULT. That is to say, return non-zero if +** the expression is constant or a function call with constant arguments. +** Return and 0 if there are any variables. +** +** isInit is true when parsing from sqlite_schema. isInit is false when +** processing a new CREATE TABLE statement. When isInit is true, parameters +** (such as ? or $abc) in the expression are converted into NULL. When +** isInit is false, parameters raise an error. Parameters should not be +** allowed in a CREATE TABLE statement, but some legacy versions of SQLite +** allowed it, so we need to support it when reading sqlite_schema for +** backwards compatibility. +** +** If isInit is true, set EP_FromDDL on every TK_FUNCTION node. +** +** For the purposes of this function, a double-quoted string (ex: "abc") +** is considered a variable but a single-quoted string (ex: 'abc') is +** a constant. +*/ +SQLITE_PRIVATE int sqlite3ExprIsConstantOrFunction(Expr *p, u8 isInit){ + assert( isInit==0 || isInit==1 ); + return exprIsConst(p, 4+isInit, 0); +} + +#ifdef SQLITE_ENABLE_CURSOR_HINTS +/* +** Walk an expression tree. Return 1 if the expression contains a +** subquery of some kind. Return 0 if there are no subqueries. +*/ +SQLITE_PRIVATE int sqlite3ExprContainsSubquery(Expr *p){ + Walker w; + w.eCode = 1; + w.xExprCallback = sqlite3ExprWalkNoop; + w.xSelectCallback = sqlite3SelectWalkFail; +#ifdef SQLITE_DEBUG + w.xSelectCallback2 = sqlite3SelectWalkAssert2; +#endif + sqlite3WalkExpr(&w, p); + return w.eCode==0; +} +#endif + +/* +** If the expression p codes a constant integer that is small enough +** to fit in a 32-bit integer, return 1 and put the value of the integer +** in *pValue. If the expression is not an integer or if it is too big +** to fit in a signed 32-bit integer, return 0 and leave *pValue unchanged. +*/ +SQLITE_PRIVATE int sqlite3ExprIsInteger(const Expr *p, int *pValue){ + int rc = 0; + if( NEVER(p==0) ) return 0; /* Used to only happen following on OOM */ + + /* If an expression is an integer literal that fits in a signed 32-bit + ** integer, then the EP_IntValue flag will have already been set */ + assert( p->op!=TK_INTEGER || (p->flags & EP_IntValue)!=0 + || sqlite3GetInt32(p->u.zToken, &rc)==0 ); + + if( p->flags & EP_IntValue ){ + *pValue = p->u.iValue; + return 1; + } + switch( p->op ){ + case TK_UPLUS: { + rc = sqlite3ExprIsInteger(p->pLeft, pValue); + break; + } + case TK_UMINUS: { + int v = 0; + if( sqlite3ExprIsInteger(p->pLeft, &v) ){ + assert( ((unsigned int)v)!=0x80000000 ); + *pValue = -v; + rc = 1; + } + break; + } + default: break; + } + return rc; +} + +/* +** Return FALSE if there is no chance that the expression can be NULL. +** +** If the expression might be NULL or if the expression is too complex +** to tell return TRUE. +** +** This routine is used as an optimization, to skip OP_IsNull opcodes +** when we know that a value cannot be NULL. Hence, a false positive +** (returning TRUE when in fact the expression can never be NULL) might +** be a small performance hit but is otherwise harmless. On the other +** hand, a false negative (returning FALSE when the result could be NULL) +** will likely result in an incorrect answer. So when in doubt, return +** TRUE. +*/ +SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr *p){ + u8 op; + assert( p!=0 ); + while( p->op==TK_UPLUS || p->op==TK_UMINUS ){ + p = p->pLeft; + assert( p!=0 ); + } + op = p->op; + if( op==TK_REGISTER ) op = p->op2; + switch( op ){ + case TK_INTEGER: + case TK_STRING: + case TK_FLOAT: + case TK_BLOB: + return 0; + case TK_COLUMN: + assert( ExprUseYTab(p) ); + return ExprHasProperty(p, EP_CanBeNull) || + p->y.pTab==0 || /* Reference to column of index on expression */ + (p->iColumn>=0 + && p->y.pTab->aCol!=0 /* Possible due to prior error */ + && p->y.pTab->aCol[p->iColumn].notNull==0); + default: + return 1; + } +} + +/* +** Return TRUE if the given expression is a constant which would be +** unchanged by OP_Affinity with the affinity given in the second +** argument. +** +** This routine is used to determine if the OP_Affinity operation +** can be omitted. When in doubt return FALSE. A false negative +** is harmless. A false positive, however, can result in the wrong +** answer. +*/ +SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr *p, char aff){ + u8 op; + int unaryMinus = 0; + if( aff==SQLITE_AFF_BLOB ) return 1; + while( p->op==TK_UPLUS || p->op==TK_UMINUS ){ + if( p->op==TK_UMINUS ) unaryMinus = 1; + p = p->pLeft; + } + op = p->op; + if( op==TK_REGISTER ) op = p->op2; + switch( op ){ + case TK_INTEGER: { + return aff>=SQLITE_AFF_NUMERIC; + } + case TK_FLOAT: { + return aff>=SQLITE_AFF_NUMERIC; + } + case TK_STRING: { + return !unaryMinus && aff==SQLITE_AFF_TEXT; + } + case TK_BLOB: { + return !unaryMinus; + } + case TK_COLUMN: { + assert( p->iTable>=0 ); /* p cannot be part of a CHECK constraint */ + return aff>=SQLITE_AFF_NUMERIC && p->iColumn<0; + } + default: { + return 0; + } + } +} + +/* +** Return TRUE if the given string is a row-id column name. +*/ +SQLITE_PRIVATE int sqlite3IsRowid(const char *z){ + if( sqlite3StrICmp(z, "_ROWID_")==0 ) return 1; + if( sqlite3StrICmp(z, "ROWID")==0 ) return 1; + if( sqlite3StrICmp(z, "OID")==0 ) return 1; + return 0; +} + +/* +** pX is the RHS of an IN operator. If pX is a SELECT statement +** that can be simplified to a direct table access, then return +** a pointer to the SELECT statement. If pX is not a SELECT statement, +** or if the SELECT statement needs to be manifested into a transient +** table, then return NULL. +*/ +#ifndef SQLITE_OMIT_SUBQUERY +static Select *isCandidateForInOpt(const Expr *pX){ + Select *p; + SrcList *pSrc; + ExprList *pEList; + Table *pTab; + int i; + if( !ExprUseXSelect(pX) ) return 0; /* Not a subquery */ + if( ExprHasProperty(pX, EP_VarSelect) ) return 0; /* Correlated subq */ + p = pX->x.pSelect; + if( p->pPrior ) return 0; /* Not a compound SELECT */ + if( p->selFlags & (SF_Distinct|SF_Aggregate) ){ + testcase( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ); + testcase( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Aggregate ); + return 0; /* No DISTINCT keyword and no aggregate functions */ + } + assert( p->pGroupBy==0 ); /* Has no GROUP BY clause */ + if( p->pLimit ) return 0; /* Has no LIMIT clause */ + if( p->pWhere ) return 0; /* Has no WHERE clause */ + pSrc = p->pSrc; + assert( pSrc!=0 ); + if( pSrc->nSrc!=1 ) return 0; /* Single term in FROM clause */ + if( pSrc->a[0].pSelect ) return 0; /* FROM is not a subquery or view */ + pTab = pSrc->a[0].pTab; + assert( pTab!=0 ); + assert( !IsView(pTab) ); /* FROM clause is not a view */ + if( IsVirtual(pTab) ) return 0; /* FROM clause not a virtual table */ + pEList = p->pEList; + assert( pEList!=0 ); + /* All SELECT results must be columns. */ + for(i=0; inExpr; i++){ + Expr *pRes = pEList->a[i].pExpr; + if( pRes->op!=TK_COLUMN ) return 0; + assert( pRes->iTable==pSrc->a[0].iCursor ); /* Not a correlated subquery */ + } + return p; +} +#endif /* SQLITE_OMIT_SUBQUERY */ + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** Generate code that checks the left-most column of index table iCur to see if +** it contains any NULL entries. Cause the register at regHasNull to be set +** to a non-NULL value if iCur contains no NULLs. Cause register regHasNull +** to be set to NULL if iCur contains one or more NULL values. +*/ +static void sqlite3SetHasNullFlag(Vdbe *v, int iCur, int regHasNull){ + int addr1; + sqlite3VdbeAddOp2(v, OP_Integer, 0, regHasNull); + addr1 = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_Column, iCur, 0, regHasNull); + sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG); + VdbeComment((v, "first_entry_in(%d)", iCur)); + sqlite3VdbeJumpHere(v, addr1); +} +#endif + + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** The argument is an IN operator with a list (not a subquery) on the +** right-hand side. Return TRUE if that list is constant. +*/ +static int sqlite3InRhsIsConstant(Expr *pIn){ + Expr *pLHS; + int res; + assert( !ExprHasProperty(pIn, EP_xIsSelect) ); + pLHS = pIn->pLeft; + pIn->pLeft = 0; + res = sqlite3ExprIsConstant(pIn); + pIn->pLeft = pLHS; + return res; +} +#endif + +/* +** This function is used by the implementation of the IN (...) operator. +** The pX parameter is the expression on the RHS of the IN operator, which +** might be either a list of expressions or a subquery. +** +** The job of this routine is to find or create a b-tree object that can +** be used either to test for membership in the RHS set or to iterate through +** all members of the RHS set, skipping duplicates. +** +** A cursor is opened on the b-tree object that is the RHS of the IN operator +** and pX->iTable is set to the index of that cursor. +** +** The returned value of this function indicates the b-tree type, as follows: +** +** IN_INDEX_ROWID - The cursor was opened on a database table. +** IN_INDEX_INDEX_ASC - The cursor was opened on an ascending index. +** IN_INDEX_INDEX_DESC - The cursor was opened on a descending index. +** IN_INDEX_EPH - The cursor was opened on a specially created and +** populated epheremal table. +** IN_INDEX_NOOP - No cursor was allocated. The IN operator must be +** implemented as a sequence of comparisons. +** +** An existing b-tree might be used if the RHS expression pX is a simple +** subquery such as: +** +** SELECT , ... FROM +** +** If the RHS of the IN operator is a list or a more complex subquery, then +** an ephemeral table might need to be generated from the RHS and then +** pX->iTable made to point to the ephemeral table instead of an +** existing table. +** +** The inFlags parameter must contain, at a minimum, one of the bits +** IN_INDEX_MEMBERSHIP or IN_INDEX_LOOP but not both. If inFlags contains +** IN_INDEX_MEMBERSHIP, then the generated table will be used for a fast +** membership test. When the IN_INDEX_LOOP bit is set, the IN index will +** be used to loop over all values of the RHS of the IN operator. +** +** When IN_INDEX_LOOP is used (and the b-tree will be used to iterate +** through the set members) then the b-tree must not contain duplicates. +** An epheremal table will be created unless the selected columns are guaranteed +** to be unique - either because it is an INTEGER PRIMARY KEY or due to +** a UNIQUE constraint or index. +** +** When IN_INDEX_MEMBERSHIP is used (and the b-tree will be used +** for fast set membership tests) then an epheremal table must +** be used unless is a single INTEGER PRIMARY KEY column or an +** index can be found with the specified as its left-most. +** +** If the IN_INDEX_NOOP_OK and IN_INDEX_MEMBERSHIP are both set and +** if the RHS of the IN operator is a list (not a subquery) then this +** routine might decide that creating an ephemeral b-tree for membership +** testing is too expensive and return IN_INDEX_NOOP. In that case, the +** calling routine should implement the IN operator using a sequence +** of Eq or Ne comparison operations. +** +** When the b-tree is being used for membership tests, the calling function +** might need to know whether or not the RHS side of the IN operator +** contains a NULL. If prRhsHasNull is not a NULL pointer and +** if there is any chance that the (...) might contain a NULL value at +** runtime, then a register is allocated and the register number written +** to *prRhsHasNull. If there is no chance that the (...) contains a +** NULL value, then *prRhsHasNull is left unchanged. +** +** If a register is allocated and its location stored in *prRhsHasNull, then +** the value in that register will be NULL if the b-tree contains one or more +** NULL values, and it will be some non-NULL value if the b-tree contains no +** NULL values. +** +** If the aiMap parameter is not NULL, it must point to an array containing +** one element for each column returned by the SELECT statement on the RHS +** of the IN(...) operator. The i'th entry of the array is populated with the +** offset of the index column that matches the i'th column returned by the +** SELECT. For example, if the expression and selected index are: +** +** (?,?,?) IN (SELECT a, b, c FROM t1) +** CREATE INDEX i1 ON t1(b, c, a); +** +** then aiMap[] is populated with {2, 0, 1}. +*/ +#ifndef SQLITE_OMIT_SUBQUERY +SQLITE_PRIVATE int sqlite3FindInIndex( + Parse *pParse, /* Parsing context */ + Expr *pX, /* The IN expression */ + u32 inFlags, /* IN_INDEX_LOOP, _MEMBERSHIP, and/or _NOOP_OK */ + int *prRhsHasNull, /* Register holding NULL status. See notes */ + int *aiMap, /* Mapping from Index fields to RHS fields */ + int *piTab /* OUT: index to use */ +){ + Select *p; /* SELECT to the right of IN operator */ + int eType = 0; /* Type of RHS table. IN_INDEX_* */ + int iTab = pParse->nTab++; /* Cursor of the RHS table */ + int mustBeUnique; /* True if RHS must be unique */ + Vdbe *v = sqlite3GetVdbe(pParse); /* Virtual machine being coded */ + + assert( pX->op==TK_IN ); + mustBeUnique = (inFlags & IN_INDEX_LOOP)!=0; + + /* If the RHS of this IN(...) operator is a SELECT, and if it matters + ** whether or not the SELECT result contains NULL values, check whether + ** or not NULL is actually possible (it may not be, for example, due + ** to NOT NULL constraints in the schema). If no NULL values are possible, + ** set prRhsHasNull to 0 before continuing. */ + if( prRhsHasNull && ExprUseXSelect(pX) ){ + int i; + ExprList *pEList = pX->x.pSelect->pEList; + for(i=0; inExpr; i++){ + if( sqlite3ExprCanBeNull(pEList->a[i].pExpr) ) break; + } + if( i==pEList->nExpr ){ + prRhsHasNull = 0; + } + } + + /* Check to see if an existing table or index can be used to + ** satisfy the query. This is preferable to generating a new + ** ephemeral table. */ + if( pParse->nErr==0 && (p = isCandidateForInOpt(pX))!=0 ){ + sqlite3 *db = pParse->db; /* Database connection */ + Table *pTab; /* Table
    . */ + int iDb; /* Database idx for pTab */ + ExprList *pEList = p->pEList; + int nExpr = pEList->nExpr; + + assert( p->pEList!=0 ); /* Because of isCandidateForInOpt(p) */ + assert( p->pEList->a[0].pExpr!=0 ); /* Because of isCandidateForInOpt(p) */ + assert( p->pSrc!=0 ); /* Because of isCandidateForInOpt(p) */ + pTab = p->pSrc->a[0].pTab; + + /* Code an OP_Transaction and OP_TableLock for
    . */ + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iDb>=0 && iDbtnum, 0, pTab->zName); + + assert(v); /* sqlite3GetVdbe() has always been previously called */ + if( nExpr==1 && pEList->a[0].pExpr->iColumn<0 ){ + /* The "x IN (SELECT rowid FROM table)" case */ + int iAddr = sqlite3VdbeAddOp0(v, OP_Once); + VdbeCoverage(v); + + sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead); + eType = IN_INDEX_ROWID; + ExplainQueryPlan((pParse, 0, + "USING ROWID SEARCH ON TABLE %s FOR IN-OPERATOR",pTab->zName)); + sqlite3VdbeJumpHere(v, iAddr); + }else{ + Index *pIdx; /* Iterator variable */ + int affinity_ok = 1; + int i; + + /* Check that the affinity that will be used to perform each + ** comparison is the same as the affinity of each column in table + ** on the RHS of the IN operator. If it not, it is not possible to + ** use any index of the RHS table. */ + for(i=0; ipLeft, i); + int iCol = pEList->a[i].pExpr->iColumn; + char idxaff = sqlite3TableColumnAffinity(pTab,iCol); /* RHS table */ + char cmpaff = sqlite3CompareAffinity(pLhs, idxaff); + testcase( cmpaff==SQLITE_AFF_BLOB ); + testcase( cmpaff==SQLITE_AFF_TEXT ); + switch( cmpaff ){ + case SQLITE_AFF_BLOB: + break; + case SQLITE_AFF_TEXT: + /* sqlite3CompareAffinity() only returns TEXT if one side or the + ** other has no affinity and the other side is TEXT. Hence, + ** the only way for cmpaff to be TEXT is for idxaff to be TEXT + ** and for the term on the LHS of the IN to have no affinity. */ + assert( idxaff==SQLITE_AFF_TEXT ); + break; + default: + affinity_ok = sqlite3IsNumericAffinity(idxaff); + } + } + + if( affinity_ok ){ + /* Search for an existing index that will work for this IN operator */ + for(pIdx=pTab->pIndex; pIdx && eType==0; pIdx=pIdx->pNext){ + Bitmask colUsed; /* Columns of the index used */ + Bitmask mCol; /* Mask for the current column */ + if( pIdx->nColumnpPartIdxWhere!=0 ) continue; + /* Maximum nColumn is BMS-2, not BMS-1, so that we can compute + ** BITMASK(nExpr) without overflowing */ + testcase( pIdx->nColumn==BMS-2 ); + testcase( pIdx->nColumn==BMS-1 ); + if( pIdx->nColumn>=BMS-1 ) continue; + if( mustBeUnique ){ + if( pIdx->nKeyCol>nExpr + ||(pIdx->nColumn>nExpr && !IsUniqueIndex(pIdx)) + ){ + continue; /* This index is not unique over the IN RHS columns */ + } + } + + colUsed = 0; /* Columns of index used so far */ + for(i=0; ipLeft, i); + Expr *pRhs = pEList->a[i].pExpr; + CollSeq *pReq = sqlite3BinaryCompareCollSeq(pParse, pLhs, pRhs); + int j; + + assert( pReq!=0 || pRhs->iColumn==XN_ROWID || pParse->nErr ); + for(j=0; jaiColumn[j]!=pRhs->iColumn ) continue; + assert( pIdx->azColl[j] ); + if( pReq!=0 && sqlite3StrICmp(pReq->zName, pIdx->azColl[j])!=0 ){ + continue; + } + break; + } + if( j==nExpr ) break; + mCol = MASKBIT(j); + if( mCol & colUsed ) break; /* Each column used only once */ + colUsed |= mCol; + if( aiMap ) aiMap[i] = j; + } + + assert( i==nExpr || colUsed!=(MASKBIT(nExpr)-1) ); + if( colUsed==(MASKBIT(nExpr)-1) ){ + /* If we reach this point, that means the index pIdx is usable */ + int iAddr = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + ExplainQueryPlan((pParse, 0, + "USING INDEX %s FOR IN-OPERATOR",pIdx->zName)); + sqlite3VdbeAddOp3(v, OP_OpenRead, iTab, pIdx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + VdbeComment((v, "%s", pIdx->zName)); + assert( IN_INDEX_INDEX_DESC == IN_INDEX_INDEX_ASC+1 ); + eType = IN_INDEX_INDEX_ASC + pIdx->aSortOrder[0]; + + if( prRhsHasNull ){ +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + i64 mask = (1<nMem; + if( nExpr==1 ){ + sqlite3SetHasNullFlag(v, iTab, *prRhsHasNull); + } + } + sqlite3VdbeJumpHere(v, iAddr); + } + } /* End loop over indexes */ + } /* End if( affinity_ok ) */ + } /* End if not an rowid index */ + } /* End attempt to optimize using an index */ + + /* If no preexisting index is available for the IN clause + ** and IN_INDEX_NOOP is an allowed reply + ** and the RHS of the IN operator is a list, not a subquery + ** and the RHS is not constant or has two or fewer terms, + ** then it is not worth creating an ephemeral table to evaluate + ** the IN operator so return IN_INDEX_NOOP. + */ + if( eType==0 + && (inFlags & IN_INDEX_NOOP_OK) + && ExprUseXList(pX) + && (!sqlite3InRhsIsConstant(pX) || pX->x.pList->nExpr<=2) + ){ + eType = IN_INDEX_NOOP; + } + + if( eType==0 ){ + /* Could not find an existing table or index to use as the RHS b-tree. + ** We will have to generate an ephemeral table to do the job. + */ + u32 savedNQueryLoop = pParse->nQueryLoop; + int rMayHaveNull = 0; + eType = IN_INDEX_EPH; + if( inFlags & IN_INDEX_LOOP ){ + pParse->nQueryLoop = 0; + }else if( prRhsHasNull ){ + *prRhsHasNull = rMayHaveNull = ++pParse->nMem; + } + assert( pX->op==TK_IN ); + sqlite3CodeRhsOfIN(pParse, pX, iTab); + if( rMayHaveNull ){ + sqlite3SetHasNullFlag(v, iTab, rMayHaveNull); + } + pParse->nQueryLoop = savedNQueryLoop; + } + + if( aiMap && eType!=IN_INDEX_INDEX_ASC && eType!=IN_INDEX_INDEX_DESC ){ + int i, n; + n = sqlite3ExprVectorSize(pX->pLeft); + for(i=0; ipLeft; + int nVal = sqlite3ExprVectorSize(pLeft); + Select *pSelect = ExprUseXSelect(pExpr) ? pExpr->x.pSelect : 0; + char *zRet; + + assert( pExpr->op==TK_IN ); + zRet = sqlite3DbMallocRaw(pParse->db, nVal+1); + if( zRet ){ + int i; + for(i=0; ipEList->a[i].pExpr, a); + }else{ + zRet[i] = a; + } + } + zRet[nVal] = '\0'; + } + return zRet; +} +#endif + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** Load the Parse object passed as the first argument with an error +** message of the form: +** +** "sub-select returns N columns - expected M" +*/ +SQLITE_PRIVATE void sqlite3SubselectError(Parse *pParse, int nActual, int nExpect){ + if( pParse->nErr==0 ){ + const char *zFmt = "sub-select returns %d columns - expected %d"; + sqlite3ErrorMsg(pParse, zFmt, nActual, nExpect); + } +} +#endif + +/* +** Expression pExpr is a vector that has been used in a context where +** it is not permitted. If pExpr is a sub-select vector, this routine +** loads the Parse object with a message of the form: +** +** "sub-select returns N columns - expected 1" +** +** Or, if it is a regular scalar vector: +** +** "row value misused" +*/ +SQLITE_PRIVATE void sqlite3VectorErrorMsg(Parse *pParse, Expr *pExpr){ +#ifndef SQLITE_OMIT_SUBQUERY + if( ExprUseXSelect(pExpr) ){ + sqlite3SubselectError(pParse, pExpr->x.pSelect->pEList->nExpr, 1); + }else +#endif + { + sqlite3ErrorMsg(pParse, "row value misused"); + } +} + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** Generate code that will construct an ephemeral table containing all terms +** in the RHS of an IN operator. The IN operator can be in either of two +** forms: +** +** x IN (4,5,11) -- IN operator with list on right-hand side +** x IN (SELECT a FROM b) -- IN operator with subquery on the right +** +** The pExpr parameter is the IN operator. The cursor number for the +** constructed ephermeral table is returned. The first time the ephemeral +** table is computed, the cursor number is also stored in pExpr->iTable, +** however the cursor number returned might not be the same, as it might +** have been duplicated using OP_OpenDup. +** +** If the LHS expression ("x" in the examples) is a column value, or +** the SELECT statement returns a column value, then the affinity of that +** column is used to build the index keys. If both 'x' and the +** SELECT... statement are columns, then numeric affinity is used +** if either column has NUMERIC or INTEGER affinity. If neither +** 'x' nor the SELECT... statement are columns, then numeric affinity +** is used. +*/ +SQLITE_PRIVATE void sqlite3CodeRhsOfIN( + Parse *pParse, /* Parsing context */ + Expr *pExpr, /* The IN operator */ + int iTab /* Use this cursor number */ +){ + int addrOnce = 0; /* Address of the OP_Once instruction at top */ + int addr; /* Address of OP_OpenEphemeral instruction */ + Expr *pLeft; /* the LHS of the IN operator */ + KeyInfo *pKeyInfo = 0; /* Key information */ + int nVal; /* Size of vector pLeft */ + Vdbe *v; /* The prepared statement under construction */ + + v = pParse->pVdbe; + assert( v!=0 ); + + /* The evaluation of the IN must be repeated every time it + ** is encountered if any of the following is true: + ** + ** * The right-hand side is a correlated subquery + ** * The right-hand side is an expression list containing variables + ** * We are inside a trigger + ** + ** If all of the above are false, then we can compute the RHS just once + ** and reuse it many names. + */ + if( !ExprHasProperty(pExpr, EP_VarSelect) && pParse->iSelfTab==0 ){ + /* Reuse of the RHS is allowed */ + /* If this routine has already been coded, but the previous code + ** might not have been invoked yet, so invoke it now as a subroutine. + */ + if( ExprHasProperty(pExpr, EP_Subrtn) ){ + addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + if( ExprUseXSelect(pExpr) ){ + ExplainQueryPlan((pParse, 0, "REUSE LIST SUBQUERY %d", + pExpr->x.pSelect->selId)); + } + assert( ExprUseYSub(pExpr) ); + sqlite3VdbeAddOp2(v, OP_Gosub, pExpr->y.sub.regReturn, + pExpr->y.sub.iAddr); + sqlite3VdbeAddOp2(v, OP_OpenDup, iTab, pExpr->iTable); + sqlite3VdbeJumpHere(v, addrOnce); + return; + } + + /* Begin coding the subroutine */ + assert( !ExprUseYWin(pExpr) ); + ExprSetProperty(pExpr, EP_Subrtn); + assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) ); + pExpr->y.sub.regReturn = ++pParse->nMem; + pExpr->y.sub.iAddr = + sqlite3VdbeAddOp2(v, OP_Integer, 0, pExpr->y.sub.regReturn) + 1; + VdbeComment((v, "return address")); + + addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + } + + /* Check to see if this is a vector IN operator */ + pLeft = pExpr->pLeft; + nVal = sqlite3ExprVectorSize(pLeft); + + /* Construct the ephemeral table that will contain the content of + ** RHS of the IN operator. + */ + pExpr->iTable = iTab; + addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pExpr->iTable, nVal); +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + if( ExprUseXSelect(pExpr) ){ + VdbeComment((v, "Result of SELECT %u", pExpr->x.pSelect->selId)); + }else{ + VdbeComment((v, "RHS of IN operator")); + } +#endif + pKeyInfo = sqlite3KeyInfoAlloc(pParse->db, nVal, 1); + + if( ExprUseXSelect(pExpr) ){ + /* Case 1: expr IN (SELECT ...) + ** + ** Generate code to write the results of the select into the temporary + ** table allocated and opened above. + */ + Select *pSelect = pExpr->x.pSelect; + ExprList *pEList = pSelect->pEList; + + ExplainQueryPlan((pParse, 1, "%sLIST SUBQUERY %d", + addrOnce?"":"CORRELATED ", pSelect->selId + )); + /* If the LHS and RHS of the IN operator do not match, that + ** error will have been caught long before we reach this point. */ + if( ALWAYS(pEList->nExpr==nVal) ){ + Select *pCopy; + SelectDest dest; + int i; + int rc; + sqlite3SelectDestInit(&dest, SRT_Set, iTab); + dest.zAffSdst = exprINAffinity(pParse, pExpr); + pSelect->iLimit = 0; + testcase( pSelect->selFlags & SF_Distinct ); + testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ + pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); + rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); + sqlite3SelectDelete(pParse->db, pCopy); + sqlite3DbFree(pParse->db, dest.zAffSdst); + if( rc ){ + sqlite3KeyInfoUnref(pKeyInfo); + return; + } + assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */ + assert( pEList!=0 ); + assert( pEList->nExpr>0 ); + assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); + for(i=0; iaColl[i] = sqlite3BinaryCompareCollSeq( + pParse, p, pEList->a[i].pExpr + ); + } + } + }else if( ALWAYS(pExpr->x.pList!=0) ){ + /* Case 2: expr IN (exprlist) + ** + ** For each expression, build an index key from the evaluation and + ** store it in the temporary table. If is a column, then use + ** that columns affinity when building index keys. If is not + ** a column, use numeric affinity. + */ + char affinity; /* Affinity of the LHS of the IN */ + int i; + ExprList *pList = pExpr->x.pList; + struct ExprList_item *pItem; + int r1, r2; + affinity = sqlite3ExprAffinity(pLeft); + if( affinity<=SQLITE_AFF_NONE ){ + affinity = SQLITE_AFF_BLOB; + }else if( affinity==SQLITE_AFF_REAL ){ + affinity = SQLITE_AFF_NUMERIC; + } + if( pKeyInfo ){ + assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); + pKeyInfo->aColl[0] = sqlite3ExprCollSeq(pParse, pExpr->pLeft); + } + + /* Loop through each expression in . */ + r1 = sqlite3GetTempReg(pParse); + r2 = sqlite3GetTempReg(pParse); + for(i=pList->nExpr, pItem=pList->a; i>0; i--, pItem++){ + Expr *pE2 = pItem->pExpr; + + /* If the expression is not constant then we will need to + ** disable the test that was generated above that makes sure + ** this code only executes once. Because for a non-constant + ** expression we need to rerun this code each time. + */ + if( addrOnce && !sqlite3ExprIsConstant(pE2) ){ + sqlite3VdbeChangeToNoop(v, addrOnce); + ExprClearProperty(pExpr, EP_Subrtn); + addrOnce = 0; + } + + /* Evaluate the expression and insert it into the temp table */ + sqlite3ExprCode(pParse, pE2, r1); + sqlite3VdbeAddOp4(v, OP_MakeRecord, r1, 1, r2, &affinity, 1); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r2, r1, 1); + } + sqlite3ReleaseTempReg(pParse, r1); + sqlite3ReleaseTempReg(pParse, r2); + } + if( pKeyInfo ){ + sqlite3VdbeChangeP4(v, addr, (void *)pKeyInfo, P4_KEYINFO); + } + if( addrOnce ){ + sqlite3VdbeJumpHere(v, addrOnce); + /* Subroutine return */ + assert( ExprUseYSub(pExpr) ); + sqlite3VdbeAddOp1(v, OP_Return, pExpr->y.sub.regReturn); + sqlite3VdbeChangeP1(v, pExpr->y.sub.iAddr-1, sqlite3VdbeCurrentAddr(v)-1); + sqlite3ClearTempRegCache(pParse); + } +} +#endif /* SQLITE_OMIT_SUBQUERY */ + +/* +** Generate code for scalar subqueries used as a subquery expression +** or EXISTS operator: +** +** (SELECT a FROM b) -- subquery +** EXISTS (SELECT a FROM b) -- EXISTS subquery +** +** The pExpr parameter is the SELECT or EXISTS operator to be coded. +** +** Return the register that holds the result. For a multi-column SELECT, +** the result is stored in a contiguous array of registers and the +** return value is the register of the left-most result column. +** Return 0 if an error occurs. +*/ +#ifndef SQLITE_OMIT_SUBQUERY +SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *pParse, Expr *pExpr){ + int addrOnce = 0; /* Address of OP_Once at top of subroutine */ + int rReg = 0; /* Register storing resulting */ + Select *pSel; /* SELECT statement to encode */ + SelectDest dest; /* How to deal with SELECT result */ + int nReg; /* Registers to allocate */ + Expr *pLimit; /* New limit expression */ + + Vdbe *v = pParse->pVdbe; + assert( v!=0 ); + if( pParse->nErr ) return 0; + testcase( pExpr->op==TK_EXISTS ); + testcase( pExpr->op==TK_SELECT ); + assert( pExpr->op==TK_EXISTS || pExpr->op==TK_SELECT ); + assert( ExprUseXSelect(pExpr) ); + pSel = pExpr->x.pSelect; + + /* If this routine has already been coded, then invoke it as a + ** subroutine. */ + if( ExprHasProperty(pExpr, EP_Subrtn) ){ + ExplainQueryPlan((pParse, 0, "REUSE SUBQUERY %d", pSel->selId)); + assert( ExprUseYSub(pExpr) ); + sqlite3VdbeAddOp2(v, OP_Gosub, pExpr->y.sub.regReturn, + pExpr->y.sub.iAddr); + return pExpr->iTable; + } + + /* Begin coding the subroutine */ + assert( !ExprUseYWin(pExpr) ); + assert( !ExprHasProperty(pExpr, EP_Reduced|EP_TokenOnly) ); + ExprSetProperty(pExpr, EP_Subrtn); + pExpr->y.sub.regReturn = ++pParse->nMem; + pExpr->y.sub.iAddr = + sqlite3VdbeAddOp2(v, OP_Integer, 0, pExpr->y.sub.regReturn) + 1; + VdbeComment((v, "return address")); + + + /* The evaluation of the EXISTS/SELECT must be repeated every time it + ** is encountered if any of the following is true: + ** + ** * The right-hand side is a correlated subquery + ** * The right-hand side is an expression list containing variables + ** * We are inside a trigger + ** + ** If all of the above are false, then we can run this code just once + ** save the results, and reuse the same result on subsequent invocations. + */ + if( !ExprHasProperty(pExpr, EP_VarSelect) ){ + addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + } + + /* For a SELECT, generate code to put the values for all columns of + ** the first row into an array of registers and return the index of + ** the first register. + ** + ** If this is an EXISTS, write an integer 0 (not exists) or 1 (exists) + ** into a register and return that register number. + ** + ** In both cases, the query is augmented with "LIMIT 1". Any + ** preexisting limit is discarded in place of the new LIMIT 1. + */ + ExplainQueryPlan((pParse, 1, "%sSCALAR SUBQUERY %d", + addrOnce?"":"CORRELATED ", pSel->selId)); + nReg = pExpr->op==TK_SELECT ? pSel->pEList->nExpr : 1; + sqlite3SelectDestInit(&dest, 0, pParse->nMem+1); + pParse->nMem += nReg; + if( pExpr->op==TK_SELECT ){ + dest.eDest = SRT_Mem; + dest.iSdst = dest.iSDParm; + dest.nSdst = nReg; + sqlite3VdbeAddOp3(v, OP_Null, 0, dest.iSDParm, dest.iSDParm+nReg-1); + VdbeComment((v, "Init subquery result")); + }else{ + dest.eDest = SRT_Exists; + sqlite3VdbeAddOp2(v, OP_Integer, 0, dest.iSDParm); + VdbeComment((v, "Init EXISTS result")); + } + if( pSel->pLimit ){ + /* The subquery already has a limit. If the pre-existing limit is X + ** then make the new limit X<>0 so that the new limit is either 1 or 0 */ + sqlite3 *db = pParse->db; + pLimit = sqlite3Expr(db, TK_INTEGER, "0"); + if( pLimit ){ + pLimit->affExpr = SQLITE_AFF_NUMERIC; + pLimit = sqlite3PExpr(pParse, TK_NE, + sqlite3ExprDup(db, pSel->pLimit->pLeft, 0), pLimit); + } + sqlite3ExprDelete(db, pSel->pLimit->pLeft); + pSel->pLimit->pLeft = pLimit; + }else{ + /* If there is no pre-existing limit add a limit of 1 */ + pLimit = sqlite3Expr(pParse->db, TK_INTEGER, "1"); + pSel->pLimit = sqlite3PExpr(pParse, TK_LIMIT, pLimit, 0); + } + pSel->iLimit = 0; + if( sqlite3Select(pParse, pSel, &dest) ){ + pExpr->op2 = pExpr->op; + pExpr->op = TK_ERROR; + return 0; + } + pExpr->iTable = rReg = dest.iSDParm; + ExprSetVVAProperty(pExpr, EP_NoReduce); + if( addrOnce ){ + sqlite3VdbeJumpHere(v, addrOnce); + } + + /* Subroutine return */ + assert( ExprUseYSub(pExpr) ); + sqlite3VdbeAddOp1(v, OP_Return, pExpr->y.sub.regReturn); + sqlite3VdbeChangeP1(v, pExpr->y.sub.iAddr-1, sqlite3VdbeCurrentAddr(v)-1); + sqlite3ClearTempRegCache(pParse); + return rReg; +} +#endif /* SQLITE_OMIT_SUBQUERY */ + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** Expr pIn is an IN(...) expression. This function checks that the +** sub-select on the RHS of the IN() operator has the same number of +** columns as the vector on the LHS. Or, if the RHS of the IN() is not +** a sub-query, that the LHS is a vector of size 1. +*/ +SQLITE_PRIVATE int sqlite3ExprCheckIN(Parse *pParse, Expr *pIn){ + int nVector = sqlite3ExprVectorSize(pIn->pLeft); + if( ExprUseXSelect(pIn) && !pParse->db->mallocFailed ){ + if( nVector!=pIn->x.pSelect->pEList->nExpr ){ + sqlite3SubselectError(pParse, pIn->x.pSelect->pEList->nExpr, nVector); + return 1; + } + }else if( nVector!=1 ){ + sqlite3VectorErrorMsg(pParse, pIn->pLeft); + return 1; + } + return 0; +} +#endif + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** Generate code for an IN expression. +** +** x IN (SELECT ...) +** x IN (value, value, ...) +** +** The left-hand side (LHS) is a scalar or vector expression. The +** right-hand side (RHS) is an array of zero or more scalar values, or a +** subquery. If the RHS is a subquery, the number of result columns must +** match the number of columns in the vector on the LHS. If the RHS is +** a list of values, the LHS must be a scalar. +** +** The IN operator is true if the LHS value is contained within the RHS. +** The result is false if the LHS is definitely not in the RHS. The +** result is NULL if the presence of the LHS in the RHS cannot be +** determined due to NULLs. +** +** This routine generates code that jumps to destIfFalse if the LHS is not +** contained within the RHS. If due to NULLs we cannot determine if the LHS +** is contained in the RHS then jump to destIfNull. If the LHS is contained +** within the RHS then fall through. +** +** See the separate in-operator.md documentation file in the canonical +** SQLite source tree for additional information. +*/ +static void sqlite3ExprCodeIN( + Parse *pParse, /* Parsing and code generating context */ + Expr *pExpr, /* The IN expression */ + int destIfFalse, /* Jump here if LHS is not contained in the RHS */ + int destIfNull /* Jump here if the results are unknown due to NULLs */ +){ + int rRhsHasNull = 0; /* Register that is true if RHS contains NULL values */ + int eType; /* Type of the RHS */ + int rLhs; /* Register(s) holding the LHS values */ + int rLhsOrig; /* LHS values prior to reordering by aiMap[] */ + Vdbe *v; /* Statement under construction */ + int *aiMap = 0; /* Map from vector field to index column */ + char *zAff = 0; /* Affinity string for comparisons */ + int nVector; /* Size of vectors for this IN operator */ + int iDummy; /* Dummy parameter to exprCodeVector() */ + Expr *pLeft; /* The LHS of the IN operator */ + int i; /* loop counter */ + int destStep2; /* Where to jump when NULLs seen in step 2 */ + int destStep6 = 0; /* Start of code for Step 6 */ + int addrTruthOp; /* Address of opcode that determines the IN is true */ + int destNotNull; /* Jump here if a comparison is not true in step 6 */ + int addrTop; /* Top of the step-6 loop */ + int iTab = 0; /* Index to use */ + u8 okConstFactor = pParse->okConstFactor; + + assert( !ExprHasVVAProperty(pExpr,EP_Immutable) ); + pLeft = pExpr->pLeft; + if( sqlite3ExprCheckIN(pParse, pExpr) ) return; + zAff = exprINAffinity(pParse, pExpr); + nVector = sqlite3ExprVectorSize(pExpr->pLeft); + aiMap = (int*)sqlite3DbMallocZero( + pParse->db, nVector*(sizeof(int) + sizeof(char)) + 1 + ); + if( pParse->db->mallocFailed ) goto sqlite3ExprCodeIN_oom_error; + + /* Attempt to compute the RHS. After this step, if anything other than + ** IN_INDEX_NOOP is returned, the table opened with cursor iTab + ** contains the values that make up the RHS. If IN_INDEX_NOOP is returned, + ** the RHS has not yet been coded. */ + v = pParse->pVdbe; + assert( v!=0 ); /* OOM detected prior to this routine */ + VdbeNoopComment((v, "begin IN expr")); + eType = sqlite3FindInIndex(pParse, pExpr, + IN_INDEX_MEMBERSHIP | IN_INDEX_NOOP_OK, + destIfFalse==destIfNull ? 0 : &rRhsHasNull, + aiMap, &iTab); + + assert( pParse->nErr || nVector==1 || eType==IN_INDEX_EPH + || eType==IN_INDEX_INDEX_ASC || eType==IN_INDEX_INDEX_DESC + ); +#ifdef SQLITE_DEBUG + /* Confirm that aiMap[] contains nVector integer values between 0 and + ** nVector-1. */ + for(i=0; i from " IN (...)". If the LHS is a + ** vector, then it is stored in an array of nVector registers starting + ** at r1. + ** + ** sqlite3FindInIndex() might have reordered the fields of the LHS vector + ** so that the fields are in the same order as an existing index. The + ** aiMap[] array contains a mapping from the original LHS field order to + ** the field order that matches the RHS index. + ** + ** Avoid factoring the LHS of the IN(...) expression out of the loop, + ** even if it is constant, as OP_Affinity may be used on the register + ** by code generated below. */ + assert( pParse->okConstFactor==okConstFactor ); + pParse->okConstFactor = 0; + rLhsOrig = exprCodeVector(pParse, pLeft, &iDummy); + pParse->okConstFactor = okConstFactor; + for(i=0; ix.pList; + pColl = sqlite3ExprCollSeq(pParse, pExpr->pLeft); + if( destIfNull!=destIfFalse ){ + regCkNull = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_BitAnd, rLhs, rLhs, regCkNull); + } + for(ii=0; iinExpr; ii++){ + r2 = sqlite3ExprCodeTemp(pParse, pList->a[ii].pExpr, ®ToFree); + if( regCkNull && sqlite3ExprCanBeNull(pList->a[ii].pExpr) ){ + sqlite3VdbeAddOp3(v, OP_BitAnd, regCkNull, r2, regCkNull); + } + sqlite3ReleaseTempReg(pParse, regToFree); + if( iinExpr-1 || destIfNull!=destIfFalse ){ + int op = rLhs!=r2 ? OP_Eq : OP_NotNull; + sqlite3VdbeAddOp4(v, op, rLhs, labelOk, r2, + (void*)pColl, P4_COLLSEQ); + VdbeCoverageIf(v, iinExpr-1 && op==OP_Eq); + VdbeCoverageIf(v, ii==pList->nExpr-1 && op==OP_Eq); + VdbeCoverageIf(v, iinExpr-1 && op==OP_NotNull); + VdbeCoverageIf(v, ii==pList->nExpr-1 && op==OP_NotNull); + sqlite3VdbeChangeP5(v, zAff[0]); + }else{ + int op = rLhs!=r2 ? OP_Ne : OP_IsNull; + assert( destIfNull==destIfFalse ); + sqlite3VdbeAddOp4(v, op, rLhs, destIfFalse, r2, + (void*)pColl, P4_COLLSEQ); + VdbeCoverageIf(v, op==OP_Ne); + VdbeCoverageIf(v, op==OP_IsNull); + sqlite3VdbeChangeP5(v, zAff[0] | SQLITE_JUMPIFNULL); + } + } + if( regCkNull ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regCkNull, destIfNull); VdbeCoverage(v); + sqlite3VdbeGoto(v, destIfFalse); + } + sqlite3VdbeResolveLabel(v, labelOk); + sqlite3ReleaseTempReg(pParse, regCkNull); + goto sqlite3ExprCodeIN_finished; + } + + /* Step 2: Check to see if the LHS contains any NULL columns. If the + ** LHS does contain NULLs then the result must be either FALSE or NULL. + ** We will then skip the binary search of the RHS. + */ + if( destIfNull==destIfFalse ){ + destStep2 = destIfFalse; + }else{ + destStep2 = destStep6 = sqlite3VdbeMakeLabel(pParse); + } + for(i=0; ipLeft, i); + if( pParse->nErr ) goto sqlite3ExprCodeIN_oom_error; + if( sqlite3ExprCanBeNull(p) ){ + sqlite3VdbeAddOp2(v, OP_IsNull, rLhs+i, destStep2); + VdbeCoverage(v); + } + } + + /* Step 3. The LHS is now known to be non-NULL. Do the binary search + ** of the RHS using the LHS as a probe. If found, the result is + ** true. + */ + if( eType==IN_INDEX_ROWID ){ + /* In this case, the RHS is the ROWID of table b-tree and so we also + ** know that the RHS is non-NULL. Hence, we combine steps 3 and 4 + ** into a single opcode. */ + sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); + VdbeCoverage(v); + addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ + }else{ + sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); + if( destIfFalse==destIfNull ){ + /* Combine Step 3 and Step 5 into a single opcode */ + sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, + rLhs, nVector); VdbeCoverage(v); + goto sqlite3ExprCodeIN_finished; + } + /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ + addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, + rLhs, nVector); VdbeCoverage(v); + } + + /* Step 4. If the RHS is known to be non-NULL and we did not find + ** an match on the search above, then the result must be FALSE. + */ + if( rRhsHasNull && nVector==1 ){ + sqlite3VdbeAddOp2(v, OP_NotNull, rRhsHasNull, destIfFalse); + VdbeCoverage(v); + } + + /* Step 5. If we do not care about the difference between NULL and + ** FALSE, then just return false. + */ + if( destIfFalse==destIfNull ) sqlite3VdbeGoto(v, destIfFalse); + + /* Step 6: Loop through rows of the RHS. Compare each row to the LHS. + ** If any comparison is NULL, then the result is NULL. If all + ** comparisons are FALSE then the final result is FALSE. + ** + ** For a scalar LHS, it is sufficient to check just the first row + ** of the RHS. + */ + if( destStep6 ) sqlite3VdbeResolveLabel(v, destStep6); + addrTop = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, destIfFalse); + VdbeCoverage(v); + if( nVector>1 ){ + destNotNull = sqlite3VdbeMakeLabel(pParse); + }else{ + /* For nVector==1, combine steps 6 and 7 by immediately returning + ** FALSE if the first comparison is not NULL */ + destNotNull = destIfFalse; + } + for(i=0; i1 ){ + sqlite3VdbeResolveLabel(v, destNotNull); + sqlite3VdbeAddOp2(v, OP_Next, iTab, addrTop+1); + VdbeCoverage(v); + + /* Step 7: If we reach this point, we know that the result must + ** be false. */ + sqlite3VdbeAddOp2(v, OP_Goto, 0, destIfFalse); + } + + /* Jumps here in order to return true. */ + sqlite3VdbeJumpHere(v, addrTruthOp); + +sqlite3ExprCodeIN_finished: + if( rLhs!=rLhsOrig ) sqlite3ReleaseTempReg(pParse, rLhs); + VdbeComment((v, "end IN expr")); +sqlite3ExprCodeIN_oom_error: + sqlite3DbFree(pParse->db, aiMap); + sqlite3DbFree(pParse->db, zAff); +} +#endif /* SQLITE_OMIT_SUBQUERY */ + +#ifndef SQLITE_OMIT_FLOATING_POINT +/* +** Generate an instruction that will put the floating point +** value described by z[0..n-1] into register iMem. +** +** The z[] string will probably not be zero-terminated. But the +** z[n] character is guaranteed to be something that does not look +** like the continuation of the number. +*/ +static void codeReal(Vdbe *v, const char *z, int negateFlag, int iMem){ + if( ALWAYS(z!=0) ){ + double value; + sqlite3AtoF(z, &value, sqlite3Strlen30(z), SQLITE_UTF8); + assert( !sqlite3IsNaN(value) ); /* The new AtoF never returns NaN */ + if( negateFlag ) value = -value; + sqlite3VdbeAddOp4Dup8(v, OP_Real, 0, iMem, 0, (u8*)&value, P4_REAL); + } +} +#endif + + +/* +** Generate an instruction that will put the integer describe by +** text z[0..n-1] into register iMem. +** +** Expr.u.zToken is always UTF8 and zero-terminated. +*/ +static void codeInteger(Parse *pParse, Expr *pExpr, int negFlag, int iMem){ + Vdbe *v = pParse->pVdbe; + if( pExpr->flags & EP_IntValue ){ + int i = pExpr->u.iValue; + assert( i>=0 ); + if( negFlag ) i = -i; + sqlite3VdbeAddOp2(v, OP_Integer, i, iMem); + }else{ + int c; + i64 value; + const char *z = pExpr->u.zToken; + assert( z!=0 ); + c = sqlite3DecOrHexToI64(z, &value); + if( (c==3 && !negFlag) || (c==2) || (negFlag && value==SMALLEST_INT64)){ +#ifdef SQLITE_OMIT_FLOATING_POINT + sqlite3ErrorMsg(pParse, "oversized integer: %s%#T", negFlag?"-":"",pExpr); +#else +#ifndef SQLITE_OMIT_HEX_INTEGER + if( sqlite3_strnicmp(z,"0x",2)==0 ){ + sqlite3ErrorMsg(pParse, "hex literal too big: %s%#T", + negFlag?"-":"",pExpr); + }else +#endif + { + codeReal(v, z, negFlag, iMem); + } +#endif + }else{ + if( negFlag ){ value = c==3 ? SMALLEST_INT64 : -value; } + sqlite3VdbeAddOp4Dup8(v, OP_Int64, 0, iMem, 0, (u8*)&value, P4_INT64); + } + } +} + + +/* Generate code that will load into register regOut a value that is +** appropriate for the iIdxCol-th column of index pIdx. +*/ +SQLITE_PRIVATE void sqlite3ExprCodeLoadIndexColumn( + Parse *pParse, /* The parsing context */ + Index *pIdx, /* The index whose column is to be loaded */ + int iTabCur, /* Cursor pointing to a table row */ + int iIdxCol, /* The column of the index to be loaded */ + int regOut /* Store the index column value in this register */ +){ + i16 iTabCol = pIdx->aiColumn[iIdxCol]; + if( iTabCol==XN_EXPR ){ + assert( pIdx->aColExpr ); + assert( pIdx->aColExpr->nExpr>iIdxCol ); + pParse->iSelfTab = iTabCur + 1; + sqlite3ExprCodeCopy(pParse, pIdx->aColExpr->a[iIdxCol].pExpr, regOut); + pParse->iSelfTab = 0; + }else{ + sqlite3ExprCodeGetColumnOfTable(pParse->pVdbe, pIdx->pTable, iTabCur, + iTabCol, regOut); + } +} + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +/* +** Generate code that will compute the value of generated column pCol +** and store the result in register regOut +*/ +SQLITE_PRIVATE void sqlite3ExprCodeGeneratedColumn( + Parse *pParse, /* Parsing context */ + Table *pTab, /* Table containing the generated column */ + Column *pCol, /* The generated column */ + int regOut /* Put the result in this register */ +){ + int iAddr; + Vdbe *v = pParse->pVdbe; + assert( v!=0 ); + assert( pParse->iSelfTab!=0 ); + if( pParse->iSelfTab>0 ){ + iAddr = sqlite3VdbeAddOp3(v, OP_IfNullRow, pParse->iSelfTab-1, 0, regOut); + }else{ + iAddr = 0; + } + sqlite3ExprCodeCopy(pParse, sqlite3ColumnExpr(pTab,pCol), regOut); + if( pCol->affinity>=SQLITE_AFF_TEXT ){ + sqlite3VdbeAddOp4(v, OP_Affinity, regOut, 1, 0, &pCol->affinity, 1); + } + if( iAddr ) sqlite3VdbeJumpHere(v, iAddr); +} +#endif /* SQLITE_OMIT_GENERATED_COLUMNS */ + +/* +** Generate code to extract the value of the iCol-th column of a table. +*/ +SQLITE_PRIVATE void sqlite3ExprCodeGetColumnOfTable( + Vdbe *v, /* Parsing context */ + Table *pTab, /* The table containing the value */ + int iTabCur, /* The table cursor. Or the PK cursor for WITHOUT ROWID */ + int iCol, /* Index of the column to extract */ + int regOut /* Extract the value into this register */ +){ + Column *pCol; + assert( v!=0 ); + if( pTab==0 ){ + sqlite3VdbeAddOp3(v, OP_Column, iTabCur, iCol, regOut); + return; + } + if( iCol<0 || iCol==pTab->iPKey ){ + sqlite3VdbeAddOp2(v, OP_Rowid, iTabCur, regOut); + }else{ + int op; + int x; + if( IsVirtual(pTab) ){ + op = OP_VColumn; + x = iCol; +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + }else if( (pCol = &pTab->aCol[iCol])->colFlags & COLFLAG_VIRTUAL ){ + Parse *pParse = sqlite3VdbeParser(v); + if( pCol->colFlags & COLFLAG_BUSY ){ + sqlite3ErrorMsg(pParse, "generated column loop on \"%s\"", + pCol->zCnName); + }else{ + int savedSelfTab = pParse->iSelfTab; + pCol->colFlags |= COLFLAG_BUSY; + pParse->iSelfTab = iTabCur+1; + sqlite3ExprCodeGeneratedColumn(pParse, pTab, pCol, regOut); + pParse->iSelfTab = savedSelfTab; + pCol->colFlags &= ~COLFLAG_BUSY; + } + return; +#endif + }else if( !HasRowid(pTab) ){ + testcase( iCol!=sqlite3TableColumnToStorage(pTab, iCol) ); + x = sqlite3TableColumnToIndex(sqlite3PrimaryKeyIndex(pTab), iCol); + op = OP_Column; + }else{ + x = sqlite3TableColumnToStorage(pTab,iCol); + testcase( x!=iCol ); + op = OP_Column; + } + sqlite3VdbeAddOp3(v, op, iTabCur, x, regOut); + sqlite3ColumnDefault(v, pTab, iCol, regOut); + } +} + +/* +** Generate code that will extract the iColumn-th column from +** table pTab and store the column value in register iReg. +** +** There must be an open cursor to pTab in iTable when this routine +** is called. If iColumn<0 then code is generated that extracts the rowid. +*/ +SQLITE_PRIVATE int sqlite3ExprCodeGetColumn( + Parse *pParse, /* Parsing and code generating context */ + Table *pTab, /* Description of the table we are reading from */ + int iColumn, /* Index of the table column */ + int iTable, /* The cursor pointing to the table */ + int iReg, /* Store results here */ + u8 p5 /* P5 value for OP_Column + FLAGS */ +){ + assert( pParse->pVdbe!=0 ); + sqlite3ExprCodeGetColumnOfTable(pParse->pVdbe, pTab, iTable, iColumn, iReg); + if( p5 ){ + VdbeOp *pOp = sqlite3VdbeGetOp(pParse->pVdbe,-1); + if( pOp->opcode==OP_Column ) pOp->p5 = p5; + } + return iReg; +} + +/* +** Generate code to move content from registers iFrom...iFrom+nReg-1 +** over to iTo..iTo+nReg-1. +*/ +SQLITE_PRIVATE void sqlite3ExprCodeMove(Parse *pParse, int iFrom, int iTo, int nReg){ + sqlite3VdbeAddOp3(pParse->pVdbe, OP_Move, iFrom, iTo, nReg); +} + +/* +** Convert a scalar expression node to a TK_REGISTER referencing +** register iReg. The caller must ensure that iReg already contains +** the correct value for the expression. +*/ +static void exprToRegister(Expr *pExpr, int iReg){ + Expr *p = sqlite3ExprSkipCollateAndLikely(pExpr); + if( NEVER(p==0) ) return; + p->op2 = p->op; + p->op = TK_REGISTER; + p->iTable = iReg; + ExprClearProperty(p, EP_Skip); +} + +/* +** Evaluate an expression (either a vector or a scalar expression) and store +** the result in continguous temporary registers. Return the index of +** the first register used to store the result. +** +** If the returned result register is a temporary scalar, then also write +** that register number into *piFreeable. If the returned result register +** is not a temporary or if the expression is a vector set *piFreeable +** to 0. +*/ +static int exprCodeVector(Parse *pParse, Expr *p, int *piFreeable){ + int iResult; + int nResult = sqlite3ExprVectorSize(p); + if( nResult==1 ){ + iResult = sqlite3ExprCodeTemp(pParse, p, piFreeable); + }else{ + *piFreeable = 0; + if( p->op==TK_SELECT ){ +#if SQLITE_OMIT_SUBQUERY + iResult = 0; +#else + iResult = sqlite3CodeSubselect(pParse, p); +#endif + }else{ + int i; + iResult = pParse->nMem+1; + pParse->nMem += nResult; + assert( ExprUseXList(p) ); + for(i=0; ix.pList->a[i].pExpr, i+iResult); + } + } + } + return iResult; +} + +/* +** If the last opcode is a OP_Copy, then set the do-not-merge flag (p5) +** so that a subsequent copy will not be merged into this one. +*/ +static void setDoNotMergeFlagOnCopy(Vdbe *v){ + if( sqlite3VdbeGetOp(v, -1)->opcode==OP_Copy ){ + sqlite3VdbeChangeP5(v, 1); /* Tag trailing OP_Copy as not mergable */ + } +} + +/* +** Generate code to implement special SQL functions that are implemented +** in-line rather than by using the usual callbacks. +*/ +static int exprCodeInlineFunction( + Parse *pParse, /* Parsing context */ + ExprList *pFarg, /* List of function arguments */ + int iFuncId, /* Function ID. One of the INTFUNC_... values */ + int target /* Store function result in this register */ +){ + int nFarg; + Vdbe *v = pParse->pVdbe; + assert( v!=0 ); + assert( pFarg!=0 ); + nFarg = pFarg->nExpr; + assert( nFarg>0 ); /* All in-line functions have at least one argument */ + switch( iFuncId ){ + case INLINEFUNC_coalesce: { + /* Attempt a direct implementation of the built-in COALESCE() and + ** IFNULL() functions. This avoids unnecessary evaluation of + ** arguments past the first non-NULL argument. + */ + int endCoalesce = sqlite3VdbeMakeLabel(pParse); + int i; + assert( nFarg>=2 ); + sqlite3ExprCode(pParse, pFarg->a[0].pExpr, target); + for(i=1; ia[i].pExpr, target); + } + setDoNotMergeFlagOnCopy(v); + sqlite3VdbeResolveLabel(v, endCoalesce); + break; + } + case INLINEFUNC_iif: { + Expr caseExpr; + memset(&caseExpr, 0, sizeof(caseExpr)); + caseExpr.op = TK_CASE; + caseExpr.x.pList = pFarg; + return sqlite3ExprCodeTarget(pParse, &caseExpr, target); + } + + default: { + /* The UNLIKELY() function is a no-op. The result is the value + ** of the first argument. + */ + assert( nFarg==1 || nFarg==2 ); + target = sqlite3ExprCodeTarget(pParse, pFarg->a[0].pExpr, target); + break; + } + + /*********************************************************************** + ** Test-only SQL functions that are only usable if enabled + ** via SQLITE_TESTCTRL_INTERNAL_FUNCTIONS + */ +#if !defined(SQLITE_UNTESTABLE) + case INLINEFUNC_expr_compare: { + /* Compare two expressions using sqlite3ExprCompare() */ + assert( nFarg==2 ); + sqlite3VdbeAddOp2(v, OP_Integer, + sqlite3ExprCompare(0,pFarg->a[0].pExpr, pFarg->a[1].pExpr,-1), + target); + break; + } + + case INLINEFUNC_expr_implies_expr: { + /* Compare two expressions using sqlite3ExprImpliesExpr() */ + assert( nFarg==2 ); + sqlite3VdbeAddOp2(v, OP_Integer, + sqlite3ExprImpliesExpr(pParse,pFarg->a[0].pExpr, pFarg->a[1].pExpr,-1), + target); + break; + } + + case INLINEFUNC_implies_nonnull_row: { + /* REsult of sqlite3ExprImpliesNonNullRow() */ + Expr *pA1; + assert( nFarg==2 ); + pA1 = pFarg->a[1].pExpr; + if( pA1->op==TK_COLUMN ){ + sqlite3VdbeAddOp2(v, OP_Integer, + sqlite3ExprImpliesNonNullRow(pFarg->a[0].pExpr,pA1->iTable), + target); + }else{ + sqlite3VdbeAddOp2(v, OP_Null, 0, target); + } + break; + } + + case INLINEFUNC_affinity: { + /* The AFFINITY() function evaluates to a string that describes + ** the type affinity of the argument. This is used for testing of + ** the SQLite type logic. + */ + const char *azAff[] = { "blob", "text", "numeric", "integer", "real" }; + char aff; + assert( nFarg==1 ); + aff = sqlite3ExprAffinity(pFarg->a[0].pExpr); + sqlite3VdbeLoadString(v, target, + (aff<=SQLITE_AFF_NONE) ? "none" : azAff[aff-SQLITE_AFF_BLOB]); + break; + } +#endif /* !defined(SQLITE_UNTESTABLE) */ + } + return target; +} + + +/* +** Generate code into the current Vdbe to evaluate the given +** expression. Attempt to store the results in register "target". +** Return the register where results are stored. +** +** With this routine, there is no guarantee that results will +** be stored in target. The result might be stored in some other +** register if it is convenient to do so. The calling function +** must check the return code and move the results to the desired +** register. +*/ +SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target){ + Vdbe *v = pParse->pVdbe; /* The VM under construction */ + int op; /* The opcode being coded */ + int inReg = target; /* Results stored in register inReg */ + int regFree1 = 0; /* If non-zero free this temporary register */ + int regFree2 = 0; /* If non-zero free this temporary register */ + int r1, r2; /* Various register numbers */ + Expr tempX; /* Temporary expression node */ + int p5 = 0; + + assert( target>0 && target<=pParse->nMem ); + assert( v!=0 ); + +expr_code_doover: + if( pExpr==0 ){ + op = TK_NULL; + }else{ + assert( !ExprHasVVAProperty(pExpr,EP_Immutable) ); + op = pExpr->op; + } + switch( op ){ + case TK_AGG_COLUMN: { + AggInfo *pAggInfo = pExpr->pAggInfo; + struct AggInfo_col *pCol; + assert( pAggInfo!=0 ); + assert( pExpr->iAgg>=0 && pExpr->iAggnColumn ); + pCol = &pAggInfo->aCol[pExpr->iAgg]; + if( !pAggInfo->directMode ){ + assert( pCol->iMem>0 ); + return pCol->iMem; + }else if( pAggInfo->useSortingIdx ){ + Table *pTab = pCol->pTab; + sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab, + pCol->iSorterColumn, target); + if( pCol->iColumn<0 ){ + VdbeComment((v,"%s.rowid",pTab->zName)); + }else{ + VdbeComment((v,"%s.%s", + pTab->zName, pTab->aCol[pCol->iColumn].zCnName)); + if( pTab->aCol[pCol->iColumn].affinity==SQLITE_AFF_REAL ){ + sqlite3VdbeAddOp1(v, OP_RealAffinity, target); + } + } + return target; + } + /* Otherwise, fall thru into the TK_COLUMN case */ + /* no break */ deliberate_fall_through + } + case TK_COLUMN: { + int iTab = pExpr->iTable; + int iReg; + if( ExprHasProperty(pExpr, EP_FixedCol) ){ + /* This COLUMN expression is really a constant due to WHERE clause + ** constraints, and that constant is coded by the pExpr->pLeft + ** expresssion. However, make sure the constant has the correct + ** datatype by applying the Affinity of the table column to the + ** constant. + */ + int aff; + iReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft,target); + assert( ExprUseYTab(pExpr) ); + if( pExpr->y.pTab ){ + aff = sqlite3TableColumnAffinity(pExpr->y.pTab, pExpr->iColumn); + }else{ + aff = pExpr->affExpr; + } + if( aff>SQLITE_AFF_BLOB ){ + static const char zAff[] = "B\000C\000D\000E"; + assert( SQLITE_AFF_BLOB=='A' ); + assert( SQLITE_AFF_TEXT=='B' ); + sqlite3VdbeAddOp4(v, OP_Affinity, iReg, 1, 0, + &zAff[(aff-'B')*2], P4_STATIC); + } + return iReg; + } + if( iTab<0 ){ + if( pParse->iSelfTab<0 ){ + /* Other columns in the same row for CHECK constraints or + ** generated columns or for inserting into partial index. + ** The row is unpacked into registers beginning at + ** 0-(pParse->iSelfTab). The rowid (if any) is in a register + ** immediately prior to the first column. + */ + Column *pCol; + Table *pTab; + int iSrc; + int iCol = pExpr->iColumn; + assert( ExprUseYTab(pExpr) ); + pTab = pExpr->y.pTab; + assert( pTab!=0 ); + assert( iCol>=XN_ROWID ); + assert( iColnCol ); + if( iCol<0 ){ + return -1-pParse->iSelfTab; + } + pCol = pTab->aCol + iCol; + testcase( iCol!=sqlite3TableColumnToStorage(pTab,iCol) ); + iSrc = sqlite3TableColumnToStorage(pTab, iCol) - pParse->iSelfTab; +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( pCol->colFlags & COLFLAG_GENERATED ){ + if( pCol->colFlags & COLFLAG_BUSY ){ + sqlite3ErrorMsg(pParse, "generated column loop on \"%s\"", + pCol->zCnName); + return 0; + } + pCol->colFlags |= COLFLAG_BUSY; + if( pCol->colFlags & COLFLAG_NOTAVAIL ){ + sqlite3ExprCodeGeneratedColumn(pParse, pTab, pCol, iSrc); + } + pCol->colFlags &= ~(COLFLAG_BUSY|COLFLAG_NOTAVAIL); + return iSrc; + }else +#endif /* SQLITE_OMIT_GENERATED_COLUMNS */ + if( pCol->affinity==SQLITE_AFF_REAL ){ + sqlite3VdbeAddOp2(v, OP_SCopy, iSrc, target); + sqlite3VdbeAddOp1(v, OP_RealAffinity, target); + return target; + }else{ + return iSrc; + } + }else{ + /* Coding an expression that is part of an index where column names + ** in the index refer to the table to which the index belongs */ + iTab = pParse->iSelfTab - 1; + } + } + assert( ExprUseYTab(pExpr) ); + iReg = sqlite3ExprCodeGetColumn(pParse, pExpr->y.pTab, + pExpr->iColumn, iTab, target, + pExpr->op2); + if( pExpr->y.pTab==0 && pExpr->affExpr==SQLITE_AFF_REAL ){ + sqlite3VdbeAddOp1(v, OP_RealAffinity, iReg); + } + return iReg; + } + case TK_INTEGER: { + codeInteger(pParse, pExpr, 0, target); + return target; + } + case TK_TRUEFALSE: { + sqlite3VdbeAddOp2(v, OP_Integer, sqlite3ExprTruthValue(pExpr), target); + return target; + } +#ifndef SQLITE_OMIT_FLOATING_POINT + case TK_FLOAT: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + codeReal(v, pExpr->u.zToken, 0, target); + return target; + } +#endif + case TK_STRING: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3VdbeLoadString(v, target, pExpr->u.zToken); + return target; + } + default: { + /* Make NULL the default case so that if a bug causes an illegal + ** Expr node to be passed into this function, it will be handled + ** sanely and not crash. But keep the assert() to bring the problem + ** to the attention of the developers. */ + assert( op==TK_NULL || op==TK_ERROR || pParse->db->mallocFailed ); + sqlite3VdbeAddOp2(v, OP_Null, 0, target); + return target; + } +#ifndef SQLITE_OMIT_BLOB_LITERAL + case TK_BLOB: { + int n; + const char *z; + char *zBlob; + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + assert( pExpr->u.zToken[0]=='x' || pExpr->u.zToken[0]=='X' ); + assert( pExpr->u.zToken[1]=='\'' ); + z = &pExpr->u.zToken[2]; + n = sqlite3Strlen30(z) - 1; + assert( z[n]=='\'' ); + zBlob = sqlite3HexToBlob(sqlite3VdbeDb(v), z, n); + sqlite3VdbeAddOp4(v, OP_Blob, n/2, target, 0, zBlob, P4_DYNAMIC); + return target; + } +#endif + case TK_VARIABLE: { + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + assert( pExpr->u.zToken!=0 ); + assert( pExpr->u.zToken[0]!=0 ); + sqlite3VdbeAddOp2(v, OP_Variable, pExpr->iColumn, target); + if( pExpr->u.zToken[1]!=0 ){ + const char *z = sqlite3VListNumToName(pParse->pVList, pExpr->iColumn); + assert( pExpr->u.zToken[0]=='?' || (z && !strcmp(pExpr->u.zToken, z)) ); + pParse->pVList[0] = 0; /* Indicate VList may no longer be enlarged */ + sqlite3VdbeAppendP4(v, (char*)z, P4_STATIC); + } + return target; + } + case TK_REGISTER: { + return pExpr->iTable; + } +#ifndef SQLITE_OMIT_CAST + case TK_CAST: { + /* Expressions of the form: CAST(pLeft AS token) */ + inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target); + if( inReg!=target ){ + sqlite3VdbeAddOp2(v, OP_SCopy, inReg, target); + inReg = target; + } + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3VdbeAddOp2(v, OP_Cast, target, + sqlite3AffinityType(pExpr->u.zToken, 0)); + return inReg; + } +#endif /* SQLITE_OMIT_CAST */ + case TK_IS: + case TK_ISNOT: + op = (op==TK_IS) ? TK_EQ : TK_NE; + p5 = SQLITE_NULLEQ; + /* fall-through */ + case TK_LT: + case TK_LE: + case TK_GT: + case TK_GE: + case TK_NE: + case TK_EQ: { + Expr *pLeft = pExpr->pLeft; + if( sqlite3ExprIsVector(pLeft) ){ + codeVectorCompare(pParse, pExpr, target, op, p5); + }else{ + r1 = sqlite3ExprCodeTemp(pParse, pLeft, ®Free1); + r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, ®Free2); + sqlite3VdbeAddOp2(v, OP_Integer, 1, inReg); + codeCompare(pParse, pLeft, pExpr->pRight, op, r1, r2, + sqlite3VdbeCurrentAddr(v)+2, p5, + ExprHasProperty(pExpr,EP_Commuted)); + assert(TK_LT==OP_Lt); testcase(op==OP_Lt); VdbeCoverageIf(v,op==OP_Lt); + assert(TK_LE==OP_Le); testcase(op==OP_Le); VdbeCoverageIf(v,op==OP_Le); + assert(TK_GT==OP_Gt); testcase(op==OP_Gt); VdbeCoverageIf(v,op==OP_Gt); + assert(TK_GE==OP_Ge); testcase(op==OP_Ge); VdbeCoverageIf(v,op==OP_Ge); + assert(TK_EQ==OP_Eq); testcase(op==OP_Eq); VdbeCoverageIf(v,op==OP_Eq); + assert(TK_NE==OP_Ne); testcase(op==OP_Ne); VdbeCoverageIf(v,op==OP_Ne); + if( p5==SQLITE_NULLEQ ){ + sqlite3VdbeAddOp2(v, OP_Integer, 0, inReg); + }else{ + sqlite3VdbeAddOp3(v, OP_ZeroOrNull, r1, inReg, r2); + } + testcase( regFree1==0 ); + testcase( regFree2==0 ); + } + break; + } + case TK_AND: + case TK_OR: + case TK_PLUS: + case TK_STAR: + case TK_MINUS: + case TK_REM: + case TK_BITAND: + case TK_BITOR: + case TK_SLASH: + case TK_LSHIFT: + case TK_RSHIFT: + case TK_CONCAT: { + assert( TK_AND==OP_And ); testcase( op==TK_AND ); + assert( TK_OR==OP_Or ); testcase( op==TK_OR ); + assert( TK_PLUS==OP_Add ); testcase( op==TK_PLUS ); + assert( TK_MINUS==OP_Subtract ); testcase( op==TK_MINUS ); + assert( TK_REM==OP_Remainder ); testcase( op==TK_REM ); + assert( TK_BITAND==OP_BitAnd ); testcase( op==TK_BITAND ); + assert( TK_BITOR==OP_BitOr ); testcase( op==TK_BITOR ); + assert( TK_SLASH==OP_Divide ); testcase( op==TK_SLASH ); + assert( TK_LSHIFT==OP_ShiftLeft ); testcase( op==TK_LSHIFT ); + assert( TK_RSHIFT==OP_ShiftRight ); testcase( op==TK_RSHIFT ); + assert( TK_CONCAT==OP_Concat ); testcase( op==TK_CONCAT ); + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, ®Free2); + sqlite3VdbeAddOp3(v, op, r2, r1, target); + testcase( regFree1==0 ); + testcase( regFree2==0 ); + break; + } + case TK_UMINUS: { + Expr *pLeft = pExpr->pLeft; + assert( pLeft ); + if( pLeft->op==TK_INTEGER ){ + codeInteger(pParse, pLeft, 1, target); + return target; +#ifndef SQLITE_OMIT_FLOATING_POINT + }else if( pLeft->op==TK_FLOAT ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + codeReal(v, pLeft->u.zToken, 1, target); + return target; +#endif + }else{ + tempX.op = TK_INTEGER; + tempX.flags = EP_IntValue|EP_TokenOnly; + tempX.u.iValue = 0; + ExprClearVVAProperties(&tempX); + r1 = sqlite3ExprCodeTemp(pParse, &tempX, ®Free1); + r2 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free2); + sqlite3VdbeAddOp3(v, OP_Subtract, r2, r1, target); + testcase( regFree2==0 ); + } + break; + } + case TK_BITNOT: + case TK_NOT: { + assert( TK_BITNOT==OP_BitNot ); testcase( op==TK_BITNOT ); + assert( TK_NOT==OP_Not ); testcase( op==TK_NOT ); + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + testcase( regFree1==0 ); + sqlite3VdbeAddOp2(v, op, r1, inReg); + break; + } + case TK_TRUTH: { + int isTrue; /* IS TRUE or IS NOT TRUE */ + int bNormal; /* IS TRUE or IS FALSE */ + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + testcase( regFree1==0 ); + isTrue = sqlite3ExprTruthValue(pExpr->pRight); + bNormal = pExpr->op2==TK_IS; + testcase( isTrue && bNormal); + testcase( !isTrue && bNormal); + sqlite3VdbeAddOp4Int(v, OP_IsTrue, r1, inReg, !isTrue, isTrue ^ bNormal); + break; + } + case TK_ISNULL: + case TK_NOTNULL: { + int addr; + assert( TK_ISNULL==OP_IsNull ); testcase( op==TK_ISNULL ); + assert( TK_NOTNULL==OP_NotNull ); testcase( op==TK_NOTNULL ); + sqlite3VdbeAddOp2(v, OP_Integer, 1, target); + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + testcase( regFree1==0 ); + addr = sqlite3VdbeAddOp1(v, op, r1); + VdbeCoverageIf(v, op==TK_ISNULL); + VdbeCoverageIf(v, op==TK_NOTNULL); + sqlite3VdbeAddOp2(v, OP_Integer, 0, target); + sqlite3VdbeJumpHere(v, addr); + break; + } + case TK_AGG_FUNCTION: { + AggInfo *pInfo = pExpr->pAggInfo; + if( pInfo==0 + || NEVER(pExpr->iAgg<0) + || NEVER(pExpr->iAgg>=pInfo->nFunc) + ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3ErrorMsg(pParse, "misuse of aggregate: %#T()", pExpr); + }else{ + return pInfo->aFunc[pExpr->iAgg].iMem; + } + break; + } + case TK_FUNCTION: { + ExprList *pFarg; /* List of function arguments */ + int nFarg; /* Number of function arguments */ + FuncDef *pDef; /* The function definition object */ + const char *zId; /* The function name */ + u32 constMask = 0; /* Mask of function arguments that are constant */ + int i; /* Loop counter */ + sqlite3 *db = pParse->db; /* The database connection */ + u8 enc = ENC(db); /* The text encoding used by this database */ + CollSeq *pColl = 0; /* A collating sequence */ + +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + return pExpr->y.pWin->regResult; + } +#endif + + if( ConstFactorOk(pParse) && sqlite3ExprIsConstantNotJoin(pExpr) ){ + /* SQL functions can be expensive. So try to avoid running them + ** multiple times if we know they always give the same result */ + return sqlite3ExprCodeRunJustOnce(pParse, pExpr, -1); + } + assert( !ExprHasProperty(pExpr, EP_TokenOnly) ); + assert( ExprUseXList(pExpr) ); + pFarg = pExpr->x.pList; + nFarg = pFarg ? pFarg->nExpr : 0; + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + zId = pExpr->u.zToken; + pDef = sqlite3FindFunction(db, zId, nFarg, enc, 0); +#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION + if( pDef==0 && pParse->explain ){ + pDef = sqlite3FindFunction(db, "unknown", nFarg, enc, 0); + } +#endif + if( pDef==0 || pDef->xFinalize!=0 ){ + sqlite3ErrorMsg(pParse, "unknown function: %#T()", pExpr); + break; + } + if( pDef->funcFlags & SQLITE_FUNC_INLINE ){ + assert( (pDef->funcFlags & SQLITE_FUNC_UNSAFE)==0 ); + assert( (pDef->funcFlags & SQLITE_FUNC_DIRECT)==0 ); + return exprCodeInlineFunction(pParse, pFarg, + SQLITE_PTR_TO_INT(pDef->pUserData), target); + }else if( pDef->funcFlags & (SQLITE_FUNC_DIRECT|SQLITE_FUNC_UNSAFE) ){ + sqlite3ExprFunctionUsable(pParse, pExpr, pDef); + } + + for(i=0; ia[i].pExpr) ){ + testcase( i==31 ); + constMask |= MASKBIT32(i); + } + if( (pDef->funcFlags & SQLITE_FUNC_NEEDCOLL)!=0 && !pColl ){ + pColl = sqlite3ExprCollSeq(pParse, pFarg->a[i].pExpr); + } + } + if( pFarg ){ + if( constMask ){ + r1 = pParse->nMem+1; + pParse->nMem += nFarg; + }else{ + r1 = sqlite3GetTempRange(pParse, nFarg); + } + + /* For length() and typeof() functions with a column argument, + ** set the P5 parameter to the OP_Column opcode to OPFLAG_LENGTHARG + ** or OPFLAG_TYPEOFARG respectively, to avoid unnecessary data + ** loading. + */ + if( (pDef->funcFlags & (SQLITE_FUNC_LENGTH|SQLITE_FUNC_TYPEOF))!=0 ){ + u8 exprOp; + assert( nFarg==1 ); + assert( pFarg->a[0].pExpr!=0 ); + exprOp = pFarg->a[0].pExpr->op; + if( exprOp==TK_COLUMN || exprOp==TK_AGG_COLUMN ){ + assert( SQLITE_FUNC_LENGTH==OPFLAG_LENGTHARG ); + assert( SQLITE_FUNC_TYPEOF==OPFLAG_TYPEOFARG ); + testcase( pDef->funcFlags & OPFLAG_LENGTHARG ); + pFarg->a[0].pExpr->op2 = + pDef->funcFlags & (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG); + } + } + + sqlite3ExprCodeExprList(pParse, pFarg, r1, 0, + SQLITE_ECEL_DUP|SQLITE_ECEL_FACTOR); + }else{ + r1 = 0; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + /* Possibly overload the function if the first argument is + ** a virtual table column. + ** + ** For infix functions (LIKE, GLOB, REGEXP, and MATCH) use the + ** second argument, not the first, as the argument to test to + ** see if it is a column in a virtual table. This is done because + ** the left operand of infix functions (the operand we want to + ** control overloading) ends up as the second argument to the + ** function. The expression "A glob B" is equivalent to + ** "glob(B,A). We want to use the A in "A glob B" to test + ** for function overloading. But we use the B term in "glob(B,A)". + */ + if( nFarg>=2 && ExprHasProperty(pExpr, EP_InfixFunc) ){ + pDef = sqlite3VtabOverloadFunction(db, pDef, nFarg, pFarg->a[1].pExpr); + }else if( nFarg>0 ){ + pDef = sqlite3VtabOverloadFunction(db, pDef, nFarg, pFarg->a[0].pExpr); + } +#endif + if( pDef->funcFlags & SQLITE_FUNC_NEEDCOLL ){ + if( !pColl ) pColl = db->pDfltColl; + sqlite3VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ); + } +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC + if( (pDef->funcFlags & SQLITE_FUNC_OFFSET)!=0 && ALWAYS(pFarg!=0) ){ + Expr *pArg = pFarg->a[0].pExpr; + if( pArg->op==TK_COLUMN ){ + sqlite3VdbeAddOp3(v, OP_Offset, pArg->iTable, pArg->iColumn, target); + }else{ + sqlite3VdbeAddOp2(v, OP_Null, 0, target); + } + }else +#endif + { + sqlite3VdbeAddFunctionCall(pParse, constMask, r1, target, nFarg, + pDef, pExpr->op2); + } + if( nFarg ){ + if( constMask==0 ){ + sqlite3ReleaseTempRange(pParse, r1, nFarg); + }else{ + sqlite3VdbeReleaseRegisters(pParse, r1, nFarg, constMask, 1); + } + } + return target; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_EXISTS: + case TK_SELECT: { + int nCol; + testcase( op==TK_EXISTS ); + testcase( op==TK_SELECT ); + if( pParse->db->mallocFailed ){ + return 0; + }else if( op==TK_SELECT + && ALWAYS( ExprUseXSelect(pExpr) ) + && (nCol = pExpr->x.pSelect->pEList->nExpr)!=1 + ){ + sqlite3SubselectError(pParse, nCol, 1); + }else{ + return sqlite3CodeSubselect(pParse, pExpr); + } + break; + } + case TK_SELECT_COLUMN: { + int n; + if( pExpr->pLeft->iTable==0 ){ + pExpr->pLeft->iTable = sqlite3CodeSubselect(pParse, pExpr->pLeft); + } + assert( pExpr->pLeft->op==TK_SELECT || pExpr->pLeft->op==TK_ERROR ); + n = sqlite3ExprVectorSize(pExpr->pLeft); + if( pExpr->iTable!=n ){ + sqlite3ErrorMsg(pParse, "%d columns assigned %d values", + pExpr->iTable, n); + } + return pExpr->pLeft->iTable + pExpr->iColumn; + } + case TK_IN: { + int destIfFalse = sqlite3VdbeMakeLabel(pParse); + int destIfNull = sqlite3VdbeMakeLabel(pParse); + sqlite3VdbeAddOp2(v, OP_Null, 0, target); + sqlite3ExprCodeIN(pParse, pExpr, destIfFalse, destIfNull); + sqlite3VdbeAddOp2(v, OP_Integer, 1, target); + sqlite3VdbeResolveLabel(v, destIfFalse); + sqlite3VdbeAddOp2(v, OP_AddImm, target, 0); + sqlite3VdbeResolveLabel(v, destIfNull); + return target; + } +#endif /* SQLITE_OMIT_SUBQUERY */ + + + /* + ** x BETWEEN y AND z + ** + ** This is equivalent to + ** + ** x>=y AND x<=z + ** + ** X is stored in pExpr->pLeft. + ** Y is stored in pExpr->pList->a[0].pExpr. + ** Z is stored in pExpr->pList->a[1].pExpr. + */ + case TK_BETWEEN: { + exprCodeBetween(pParse, pExpr, target, 0, 0); + return target; + } + case TK_SPAN: + case TK_COLLATE: + case TK_UPLUS: { + pExpr = pExpr->pLeft; + goto expr_code_doover; /* 2018-04-28: Prevent deep recursion. OSSFuzz. */ + } + + case TK_TRIGGER: { + /* If the opcode is TK_TRIGGER, then the expression is a reference + ** to a column in the new.* or old.* pseudo-tables available to + ** trigger programs. In this case Expr.iTable is set to 1 for the + ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn + ** is set to the column of the pseudo-table to read, or to -1 to + ** read the rowid field. + ** + ** The expression is implemented using an OP_Param opcode. The p1 + ** parameter is set to 0 for an old.rowid reference, or to (i+1) + ** to reference another column of the old.* pseudo-table, where + ** i is the index of the column. For a new.rowid reference, p1 is + ** set to (n+1), where n is the number of columns in each pseudo-table. + ** For a reference to any other column in the new.* pseudo-table, p1 + ** is set to (n+2+i), where n and i are as defined previously. For + ** example, if the table on which triggers are being fired is + ** declared as: + ** + ** CREATE TABLE t1(a, b); + ** + ** Then p1 is interpreted as follows: + ** + ** p1==0 -> old.rowid p1==3 -> new.rowid + ** p1==1 -> old.a p1==4 -> new.a + ** p1==2 -> old.b p1==5 -> new.b + */ + Table *pTab; + int iCol; + int p1; + + assert( ExprUseYTab(pExpr) ); + pTab = pExpr->y.pTab; + iCol = pExpr->iColumn; + p1 = pExpr->iTable * (pTab->nCol+1) + 1 + + sqlite3TableColumnToStorage(pTab, iCol); + + assert( pExpr->iTable==0 || pExpr->iTable==1 ); + assert( iCol>=-1 && iColnCol ); + assert( pTab->iPKey<0 || iCol!=pTab->iPKey ); + assert( p1>=0 && p1<(pTab->nCol*2+2) ); + + sqlite3VdbeAddOp2(v, OP_Param, p1, target); + VdbeComment((v, "r[%d]=%s.%s", target, + (pExpr->iTable ? "new" : "old"), + (pExpr->iColumn<0 ? "rowid" : pExpr->y.pTab->aCol[iCol].zCnName) + )); + +#ifndef SQLITE_OMIT_FLOATING_POINT + /* If the column has REAL affinity, it may currently be stored as an + ** integer. Use OP_RealAffinity to make sure it is really real. + ** + ** EVIDENCE-OF: R-60985-57662 SQLite will convert the value back to + ** floating point when extracting it from the record. */ + if( iCol>=0 && pTab->aCol[iCol].affinity==SQLITE_AFF_REAL ){ + sqlite3VdbeAddOp1(v, OP_RealAffinity, target); + } +#endif + break; + } + + case TK_VECTOR: { + sqlite3ErrorMsg(pParse, "row value misused"); + break; + } + + /* TK_IF_NULL_ROW Expr nodes are inserted ahead of expressions + ** that derive from the right-hand table of a LEFT JOIN. The + ** Expr.iTable value is the table number for the right-hand table. + ** The expression is only evaluated if that table is not currently + ** on a LEFT JOIN NULL row. + */ + case TK_IF_NULL_ROW: { + int addrINR; + u8 okConstFactor = pParse->okConstFactor; + addrINR = sqlite3VdbeAddOp1(v, OP_IfNullRow, pExpr->iTable); + /* Temporarily disable factoring of constant expressions, since + ** even though expressions may appear to be constant, they are not + ** really constant because they originate from the right-hand side + ** of a LEFT JOIN. */ + pParse->okConstFactor = 0; + inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target); + pParse->okConstFactor = okConstFactor; + sqlite3VdbeJumpHere(v, addrINR); + sqlite3VdbeChangeP3(v, addrINR, inReg); + break; + } + + /* + ** Form A: + ** CASE x WHEN e1 THEN r1 WHEN e2 THEN r2 ... WHEN eN THEN rN ELSE y END + ** + ** Form B: + ** CASE WHEN e1 THEN r1 WHEN e2 THEN r2 ... WHEN eN THEN rN ELSE y END + ** + ** Form A is can be transformed into the equivalent form B as follows: + ** CASE WHEN x=e1 THEN r1 WHEN x=e2 THEN r2 ... + ** WHEN x=eN THEN rN ELSE y END + ** + ** X (if it exists) is in pExpr->pLeft. + ** Y is in the last element of pExpr->x.pList if pExpr->x.pList->nExpr is + ** odd. The Y is also optional. If the number of elements in x.pList + ** is even, then Y is omitted and the "otherwise" result is NULL. + ** Ei is in pExpr->pList->a[i*2] and Ri is pExpr->pList->a[i*2+1]. + ** + ** The result of the expression is the Ri for the first matching Ei, + ** or if there is no matching Ei, the ELSE term Y, or if there is + ** no ELSE term, NULL. + */ + case TK_CASE: { + int endLabel; /* GOTO label for end of CASE stmt */ + int nextCase; /* GOTO label for next WHEN clause */ + int nExpr; /* 2x number of WHEN terms */ + int i; /* Loop counter */ + ExprList *pEList; /* List of WHEN terms */ + struct ExprList_item *aListelem; /* Array of WHEN terms */ + Expr opCompare; /* The X==Ei expression */ + Expr *pX; /* The X expression */ + Expr *pTest = 0; /* X==Ei (form A) or just Ei (form B) */ + Expr *pDel = 0; + sqlite3 *db = pParse->db; + + assert( ExprUseXList(pExpr) && pExpr->x.pList!=0 ); + assert(pExpr->x.pList->nExpr > 0); + pEList = pExpr->x.pList; + aListelem = pEList->a; + nExpr = pEList->nExpr; + endLabel = sqlite3VdbeMakeLabel(pParse); + if( (pX = pExpr->pLeft)!=0 ){ + pDel = sqlite3ExprDup(db, pX, 0); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pDel); + break; + } + testcase( pX->op==TK_COLUMN ); + exprToRegister(pDel, exprCodeVector(pParse, pDel, ®Free1)); + testcase( regFree1==0 ); + memset(&opCompare, 0, sizeof(opCompare)); + opCompare.op = TK_EQ; + opCompare.pLeft = pDel; + pTest = &opCompare; + /* Ticket b351d95f9cd5ef17e9d9dbae18f5ca8611190001: + ** The value in regFree1 might get SCopy-ed into the file result. + ** So make sure that the regFree1 register is not reused for other + ** purposes and possibly overwritten. */ + regFree1 = 0; + } + for(i=0; iop==TK_COLUMN ); + sqlite3ExprIfFalse(pParse, pTest, nextCase, SQLITE_JUMPIFNULL); + testcase( aListelem[i+1].pExpr->op==TK_COLUMN ); + sqlite3ExprCode(pParse, aListelem[i+1].pExpr, target); + sqlite3VdbeGoto(v, endLabel); + sqlite3VdbeResolveLabel(v, nextCase); + } + if( (nExpr&1)!=0 ){ + sqlite3ExprCode(pParse, pEList->a[nExpr-1].pExpr, target); + }else{ + sqlite3VdbeAddOp2(v, OP_Null, 0, target); + } + sqlite3ExprDelete(db, pDel); + setDoNotMergeFlagOnCopy(v); + sqlite3VdbeResolveLabel(v, endLabel); + break; + } +#ifndef SQLITE_OMIT_TRIGGER + case TK_RAISE: { + assert( pExpr->affExpr==OE_Rollback + || pExpr->affExpr==OE_Abort + || pExpr->affExpr==OE_Fail + || pExpr->affExpr==OE_Ignore + ); + if( !pParse->pTriggerTab && !pParse->nested ){ + sqlite3ErrorMsg(pParse, + "RAISE() may only be used within a trigger-program"); + return 0; + } + if( pExpr->affExpr==OE_Abort ){ + sqlite3MayAbort(pParse); + } + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + if( pExpr->affExpr==OE_Ignore ){ + sqlite3VdbeAddOp4( + v, OP_Halt, SQLITE_OK, OE_Ignore, 0, pExpr->u.zToken,0); + VdbeCoverage(v); + }else{ + sqlite3HaltConstraint(pParse, + pParse->pTriggerTab ? SQLITE_CONSTRAINT_TRIGGER : SQLITE_ERROR, + pExpr->affExpr, pExpr->u.zToken, 0, 0); + } + + break; + } +#endif + } + sqlite3ReleaseTempReg(pParse, regFree1); + sqlite3ReleaseTempReg(pParse, regFree2); + return inReg; +} + +/* +** Generate code that will evaluate expression pExpr just one time +** per prepared statement execution. +** +** If the expression uses functions (that might throw an exception) then +** guard them with an OP_Once opcode to ensure that the code is only executed +** once. If no functions are involved, then factor the code out and put it at +** the end of the prepared statement in the initialization section. +** +** If regDest>=0 then the result is always stored in that register and the +** result is not reusable. If regDest<0 then this routine is free to +** store the value whereever it wants. The register where the expression +** is stored is returned. When regDest<0, two identical expressions might +** code to the same register, if they do not contain function calls and hence +** are factored out into the initialization section at the end of the +** prepared statement. +*/ +SQLITE_PRIVATE int sqlite3ExprCodeRunJustOnce( + Parse *pParse, /* Parsing context */ + Expr *pExpr, /* The expression to code when the VDBE initializes */ + int regDest /* Store the value in this register */ +){ + ExprList *p; + assert( ConstFactorOk(pParse) ); + p = pParse->pConstExpr; + if( regDest<0 && p ){ + struct ExprList_item *pItem; + int i; + for(pItem=p->a, i=p->nExpr; i>0; pItem++, i--){ + if( pItem->reusable && sqlite3ExprCompare(0,pItem->pExpr,pExpr,-1)==0 ){ + return pItem->u.iConstExprReg; + } + } + } + pExpr = sqlite3ExprDup(pParse->db, pExpr, 0); + if( pExpr!=0 && ExprHasProperty(pExpr, EP_HasFunc) ){ + Vdbe *v = pParse->pVdbe; + int addr; + assert( v ); + addr = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + pParse->okConstFactor = 0; + if( !pParse->db->mallocFailed ){ + if( regDest<0 ) regDest = ++pParse->nMem; + sqlite3ExprCode(pParse, pExpr, regDest); + } + pParse->okConstFactor = 1; + sqlite3ExprDelete(pParse->db, pExpr); + sqlite3VdbeJumpHere(v, addr); + }else{ + p = sqlite3ExprListAppend(pParse, p, pExpr); + if( p ){ + struct ExprList_item *pItem = &p->a[p->nExpr-1]; + pItem->reusable = regDest<0; + if( regDest<0 ) regDest = ++pParse->nMem; + pItem->u.iConstExprReg = regDest; + } + pParse->pConstExpr = p; + } + return regDest; +} + +/* +** Generate code to evaluate an expression and store the results +** into a register. Return the register number where the results +** are stored. +** +** If the register is a temporary register that can be deallocated, +** then write its number into *pReg. If the result register is not +** a temporary, then set *pReg to zero. +** +** If pExpr is a constant, then this routine might generate this +** code to fill the register in the initialization section of the +** VDBE program, in order to factor it out of the evaluation loop. +*/ +SQLITE_PRIVATE int sqlite3ExprCodeTemp(Parse *pParse, Expr *pExpr, int *pReg){ + int r2; + pExpr = sqlite3ExprSkipCollateAndLikely(pExpr); + if( ConstFactorOk(pParse) + && ALWAYS(pExpr!=0) + && pExpr->op!=TK_REGISTER + && sqlite3ExprIsConstantNotJoin(pExpr) + ){ + *pReg = 0; + r2 = sqlite3ExprCodeRunJustOnce(pParse, pExpr, -1); + }else{ + int r1 = sqlite3GetTempReg(pParse); + r2 = sqlite3ExprCodeTarget(pParse, pExpr, r1); + if( r2==r1 ){ + *pReg = r1; + }else{ + sqlite3ReleaseTempReg(pParse, r1); + *pReg = 0; + } + } + return r2; +} + +/* +** Generate code that will evaluate expression pExpr and store the +** results in register target. The results are guaranteed to appear +** in register target. +*/ +SQLITE_PRIVATE void sqlite3ExprCode(Parse *pParse, Expr *pExpr, int target){ + int inReg; + + assert( pExpr==0 || !ExprHasVVAProperty(pExpr,EP_Immutable) ); + assert( target>0 && target<=pParse->nMem ); + assert( pParse->pVdbe!=0 || pParse->db->mallocFailed ); + if( pParse->pVdbe==0 ) return; + inReg = sqlite3ExprCodeTarget(pParse, pExpr, target); + if( inReg!=target ){ + u8 op; + if( ALWAYS(pExpr) && ExprHasProperty(pExpr,EP_Subquery) ){ + op = OP_Copy; + }else{ + op = OP_SCopy; + } + sqlite3VdbeAddOp2(pParse->pVdbe, op, inReg, target); + } +} + +/* +** Make a transient copy of expression pExpr and then code it using +** sqlite3ExprCode(). This routine works just like sqlite3ExprCode() +** except that the input expression is guaranteed to be unchanged. +*/ +SQLITE_PRIVATE void sqlite3ExprCodeCopy(Parse *pParse, Expr *pExpr, int target){ + sqlite3 *db = pParse->db; + pExpr = sqlite3ExprDup(db, pExpr, 0); + if( !db->mallocFailed ) sqlite3ExprCode(pParse, pExpr, target); + sqlite3ExprDelete(db, pExpr); +} + +/* +** Generate code that will evaluate expression pExpr and store the +** results in register target. The results are guaranteed to appear +** in register target. If the expression is constant, then this routine +** might choose to code the expression at initialization time. +*/ +SQLITE_PRIVATE void sqlite3ExprCodeFactorable(Parse *pParse, Expr *pExpr, int target){ + if( pParse->okConstFactor && sqlite3ExprIsConstantNotJoin(pExpr) ){ + sqlite3ExprCodeRunJustOnce(pParse, pExpr, target); + }else{ + sqlite3ExprCodeCopy(pParse, pExpr, target); + } +} + +/* +** Generate code that pushes the value of every element of the given +** expression list into a sequence of registers beginning at target. +** +** Return the number of elements evaluated. The number returned will +** usually be pList->nExpr but might be reduced if SQLITE_ECEL_OMITREF +** is defined. +** +** The SQLITE_ECEL_DUP flag prevents the arguments from being +** filled using OP_SCopy. OP_Copy must be used instead. +** +** The SQLITE_ECEL_FACTOR argument allows constant arguments to be +** factored out into initialization code. +** +** The SQLITE_ECEL_REF flag means that expressions in the list with +** ExprList.a[].u.x.iOrderByCol>0 have already been evaluated and stored +** in registers at srcReg, and so the value can be copied from there. +** If SQLITE_ECEL_OMITREF is also set, then the values with u.x.iOrderByCol>0 +** are simply omitted rather than being copied from srcReg. +*/ +SQLITE_PRIVATE int sqlite3ExprCodeExprList( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* The expression list to be coded */ + int target, /* Where to write results */ + int srcReg, /* Source registers if SQLITE_ECEL_REF */ + u8 flags /* SQLITE_ECEL_* flags */ +){ + struct ExprList_item *pItem; + int i, j, n; + u8 copyOp = (flags & SQLITE_ECEL_DUP) ? OP_Copy : OP_SCopy; + Vdbe *v = pParse->pVdbe; + assert( pList!=0 ); + assert( target>0 ); + assert( pParse->pVdbe!=0 ); /* Never gets this far otherwise */ + n = pList->nExpr; + if( !ConstFactorOk(pParse) ) flags &= ~SQLITE_ECEL_FACTOR; + for(pItem=pList->a, i=0; ipExpr; +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + if( pItem->bSorterRef ){ + i--; + n--; + }else +#endif + if( (flags & SQLITE_ECEL_REF)!=0 && (j = pItem->u.x.iOrderByCol)>0 ){ + if( flags & SQLITE_ECEL_OMITREF ){ + i--; + n--; + }else{ + sqlite3VdbeAddOp2(v, copyOp, j+srcReg-1, target+i); + } + }else if( (flags & SQLITE_ECEL_FACTOR)!=0 + && sqlite3ExprIsConstantNotJoin(pExpr) + ){ + sqlite3ExprCodeRunJustOnce(pParse, pExpr, target+i); + }else{ + int inReg = sqlite3ExprCodeTarget(pParse, pExpr, target+i); + if( inReg!=target+i ){ + VdbeOp *pOp; + if( copyOp==OP_Copy + && (pOp=sqlite3VdbeGetOp(v, -1))->opcode==OP_Copy + && pOp->p1+pOp->p3+1==inReg + && pOp->p2+pOp->p3+1==target+i + && pOp->p5==0 /* The do-not-merge flag must be clear */ + ){ + pOp->p3++; + }else{ + sqlite3VdbeAddOp2(v, copyOp, inReg, target+i); + } + } + } + } + return n; +} + +/* +** Generate code for a BETWEEN operator. +** +** x BETWEEN y AND z +** +** The above is equivalent to +** +** x>=y AND x<=z +** +** Code it as such, taking care to do the common subexpression +** elimination of x. +** +** The xJumpIf parameter determines details: +** +** NULL: Store the boolean result in reg[dest] +** sqlite3ExprIfTrue: Jump to dest if true +** sqlite3ExprIfFalse: Jump to dest if false +** +** The jumpIfNull parameter is ignored if xJumpIf is NULL. +*/ +static void exprCodeBetween( + Parse *pParse, /* Parsing and code generating context */ + Expr *pExpr, /* The BETWEEN expression */ + int dest, /* Jump destination or storage location */ + void (*xJump)(Parse*,Expr*,int,int), /* Action to take */ + int jumpIfNull /* Take the jump if the BETWEEN is NULL */ +){ + Expr exprAnd; /* The AND operator in x>=y AND x<=z */ + Expr compLeft; /* The x>=y term */ + Expr compRight; /* The x<=z term */ + int regFree1 = 0; /* Temporary use register */ + Expr *pDel = 0; + sqlite3 *db = pParse->db; + + memset(&compLeft, 0, sizeof(Expr)); + memset(&compRight, 0, sizeof(Expr)); + memset(&exprAnd, 0, sizeof(Expr)); + + assert( ExprUseXList(pExpr) ); + pDel = sqlite3ExprDup(db, pExpr->pLeft, 0); + if( db->mallocFailed==0 ){ + exprAnd.op = TK_AND; + exprAnd.pLeft = &compLeft; + exprAnd.pRight = &compRight; + compLeft.op = TK_GE; + compLeft.pLeft = pDel; + compLeft.pRight = pExpr->x.pList->a[0].pExpr; + compRight.op = TK_LE; + compRight.pLeft = pDel; + compRight.pRight = pExpr->x.pList->a[1].pExpr; + exprToRegister(pDel, exprCodeVector(pParse, pDel, ®Free1)); + if( xJump ){ + xJump(pParse, &exprAnd, dest, jumpIfNull); + }else{ + /* Mark the expression is being from the ON or USING clause of a join + ** so that the sqlite3ExprCodeTarget() routine will not attempt to move + ** it into the Parse.pConstExpr list. We should use a new bit for this, + ** for clarity, but we are out of bits in the Expr.flags field so we + ** have to reuse the EP_FromJoin bit. Bummer. */ + pDel->flags |= EP_FromJoin; + sqlite3ExprCodeTarget(pParse, &exprAnd, dest); + } + sqlite3ReleaseTempReg(pParse, regFree1); + } + sqlite3ExprDelete(db, pDel); + + /* Ensure adequate test coverage */ + testcase( xJump==sqlite3ExprIfTrue && jumpIfNull==0 && regFree1==0 ); + testcase( xJump==sqlite3ExprIfTrue && jumpIfNull==0 && regFree1!=0 ); + testcase( xJump==sqlite3ExprIfTrue && jumpIfNull!=0 && regFree1==0 ); + testcase( xJump==sqlite3ExprIfTrue && jumpIfNull!=0 && regFree1!=0 ); + testcase( xJump==sqlite3ExprIfFalse && jumpIfNull==0 && regFree1==0 ); + testcase( xJump==sqlite3ExprIfFalse && jumpIfNull==0 && regFree1!=0 ); + testcase( xJump==sqlite3ExprIfFalse && jumpIfNull!=0 && regFree1==0 ); + testcase( xJump==sqlite3ExprIfFalse && jumpIfNull!=0 && regFree1!=0 ); + testcase( xJump==0 ); +} + +/* +** Generate code for a boolean expression such that a jump is made +** to the label "dest" if the expression is true but execution +** continues straight thru if the expression is false. +** +** If the expression evaluates to NULL (neither true nor false), then +** take the jump if the jumpIfNull flag is SQLITE_JUMPIFNULL. +** +** This code depends on the fact that certain token values (ex: TK_EQ) +** are the same as opcode values (ex: OP_Eq) that implement the corresponding +** operation. Special comments in vdbe.c and the mkopcodeh.awk script in +** the make process cause these values to align. Assert()s in the code +** below verify that the numbers are aligned correctly. +*/ +SQLITE_PRIVATE void sqlite3ExprIfTrue(Parse *pParse, Expr *pExpr, int dest, int jumpIfNull){ + Vdbe *v = pParse->pVdbe; + int op = 0; + int regFree1 = 0; + int regFree2 = 0; + int r1, r2; + + assert( jumpIfNull==SQLITE_JUMPIFNULL || jumpIfNull==0 ); + if( NEVER(v==0) ) return; /* Existence of VDBE checked by caller */ + if( NEVER(pExpr==0) ) return; /* No way this can happen */ + assert( !ExprHasVVAProperty(pExpr, EP_Immutable) ); + op = pExpr->op; + switch( op ){ + case TK_AND: + case TK_OR: { + Expr *pAlt = sqlite3ExprSimplifiedAndOr(pExpr); + if( pAlt!=pExpr ){ + sqlite3ExprIfTrue(pParse, pAlt, dest, jumpIfNull); + }else if( op==TK_AND ){ + int d2 = sqlite3VdbeMakeLabel(pParse); + testcase( jumpIfNull==0 ); + sqlite3ExprIfFalse(pParse, pExpr->pLeft, d2, + jumpIfNull^SQLITE_JUMPIFNULL); + sqlite3ExprIfTrue(pParse, pExpr->pRight, dest, jumpIfNull); + sqlite3VdbeResolveLabel(v, d2); + }else{ + testcase( jumpIfNull==0 ); + sqlite3ExprIfTrue(pParse, pExpr->pLeft, dest, jumpIfNull); + sqlite3ExprIfTrue(pParse, pExpr->pRight, dest, jumpIfNull); + } + break; + } + case TK_NOT: { + testcase( jumpIfNull==0 ); + sqlite3ExprIfFalse(pParse, pExpr->pLeft, dest, jumpIfNull); + break; + } + case TK_TRUTH: { + int isNot; /* IS NOT TRUE or IS NOT FALSE */ + int isTrue; /* IS TRUE or IS NOT TRUE */ + testcase( jumpIfNull==0 ); + isNot = pExpr->op2==TK_ISNOT; + isTrue = sqlite3ExprTruthValue(pExpr->pRight); + testcase( isTrue && isNot ); + testcase( !isTrue && isNot ); + if( isTrue ^ isNot ){ + sqlite3ExprIfTrue(pParse, pExpr->pLeft, dest, + isNot ? SQLITE_JUMPIFNULL : 0); + }else{ + sqlite3ExprIfFalse(pParse, pExpr->pLeft, dest, + isNot ? SQLITE_JUMPIFNULL : 0); + } + break; + } + case TK_IS: + case TK_ISNOT: + testcase( op==TK_IS ); + testcase( op==TK_ISNOT ); + op = (op==TK_IS) ? TK_EQ : TK_NE; + jumpIfNull = SQLITE_NULLEQ; + /* no break */ deliberate_fall_through + case TK_LT: + case TK_LE: + case TK_GT: + case TK_GE: + case TK_NE: + case TK_EQ: { + if( sqlite3ExprIsVector(pExpr->pLeft) ) goto default_expr; + testcase( jumpIfNull==0 ); + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, ®Free2); + codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op, + r1, r2, dest, jumpIfNull, ExprHasProperty(pExpr,EP_Commuted)); + assert(TK_LT==OP_Lt); testcase(op==OP_Lt); VdbeCoverageIf(v,op==OP_Lt); + assert(TK_LE==OP_Le); testcase(op==OP_Le); VdbeCoverageIf(v,op==OP_Le); + assert(TK_GT==OP_Gt); testcase(op==OP_Gt); VdbeCoverageIf(v,op==OP_Gt); + assert(TK_GE==OP_Ge); testcase(op==OP_Ge); VdbeCoverageIf(v,op==OP_Ge); + assert(TK_EQ==OP_Eq); testcase(op==OP_Eq); + VdbeCoverageIf(v, op==OP_Eq && jumpIfNull==SQLITE_NULLEQ); + VdbeCoverageIf(v, op==OP_Eq && jumpIfNull!=SQLITE_NULLEQ); + assert(TK_NE==OP_Ne); testcase(op==OP_Ne); + VdbeCoverageIf(v, op==OP_Ne && jumpIfNull==SQLITE_NULLEQ); + VdbeCoverageIf(v, op==OP_Ne && jumpIfNull!=SQLITE_NULLEQ); + testcase( regFree1==0 ); + testcase( regFree2==0 ); + break; + } + case TK_ISNULL: + case TK_NOTNULL: { + assert( TK_ISNULL==OP_IsNull ); testcase( op==TK_ISNULL ); + assert( TK_NOTNULL==OP_NotNull ); testcase( op==TK_NOTNULL ); + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + sqlite3VdbeAddOp2(v, op, r1, dest); + VdbeCoverageIf(v, op==TK_ISNULL); + VdbeCoverageIf(v, op==TK_NOTNULL); + testcase( regFree1==0 ); + break; + } + case TK_BETWEEN: { + testcase( jumpIfNull==0 ); + exprCodeBetween(pParse, pExpr, dest, sqlite3ExprIfTrue, jumpIfNull); + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_IN: { + int destIfFalse = sqlite3VdbeMakeLabel(pParse); + int destIfNull = jumpIfNull ? dest : destIfFalse; + sqlite3ExprCodeIN(pParse, pExpr, destIfFalse, destIfNull); + sqlite3VdbeGoto(v, dest); + sqlite3VdbeResolveLabel(v, destIfFalse); + break; + } +#endif + default: { + default_expr: + if( ExprAlwaysTrue(pExpr) ){ + sqlite3VdbeGoto(v, dest); + }else if( ExprAlwaysFalse(pExpr) ){ + /* No-op */ + }else{ + r1 = sqlite3ExprCodeTemp(pParse, pExpr, ®Free1); + sqlite3VdbeAddOp3(v, OP_If, r1, dest, jumpIfNull!=0); + VdbeCoverage(v); + testcase( regFree1==0 ); + testcase( jumpIfNull==0 ); + } + break; + } + } + sqlite3ReleaseTempReg(pParse, regFree1); + sqlite3ReleaseTempReg(pParse, regFree2); +} + +/* +** Generate code for a boolean expression such that a jump is made +** to the label "dest" if the expression is false but execution +** continues straight thru if the expression is true. +** +** If the expression evaluates to NULL (neither true nor false) then +** jump if jumpIfNull is SQLITE_JUMPIFNULL or fall through if jumpIfNull +** is 0. +*/ +SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse *pParse, Expr *pExpr, int dest, int jumpIfNull){ + Vdbe *v = pParse->pVdbe; + int op = 0; + int regFree1 = 0; + int regFree2 = 0; + int r1, r2; + + assert( jumpIfNull==SQLITE_JUMPIFNULL || jumpIfNull==0 ); + if( NEVER(v==0) ) return; /* Existence of VDBE checked by caller */ + if( pExpr==0 ) return; + assert( !ExprHasVVAProperty(pExpr,EP_Immutable) ); + + /* The value of pExpr->op and op are related as follows: + ** + ** pExpr->op op + ** --------- ---------- + ** TK_ISNULL OP_NotNull + ** TK_NOTNULL OP_IsNull + ** TK_NE OP_Eq + ** TK_EQ OP_Ne + ** TK_GT OP_Le + ** TK_LE OP_Gt + ** TK_GE OP_Lt + ** TK_LT OP_Ge + ** + ** For other values of pExpr->op, op is undefined and unused. + ** The value of TK_ and OP_ constants are arranged such that we + ** can compute the mapping above using the following expression. + ** Assert()s verify that the computation is correct. + */ + op = ((pExpr->op+(TK_ISNULL&1))^1)-(TK_ISNULL&1); + + /* Verify correct alignment of TK_ and OP_ constants + */ + assert( pExpr->op!=TK_ISNULL || op==OP_NotNull ); + assert( pExpr->op!=TK_NOTNULL || op==OP_IsNull ); + assert( pExpr->op!=TK_NE || op==OP_Eq ); + assert( pExpr->op!=TK_EQ || op==OP_Ne ); + assert( pExpr->op!=TK_LT || op==OP_Ge ); + assert( pExpr->op!=TK_LE || op==OP_Gt ); + assert( pExpr->op!=TK_GT || op==OP_Le ); + assert( pExpr->op!=TK_GE || op==OP_Lt ); + + switch( pExpr->op ){ + case TK_AND: + case TK_OR: { + Expr *pAlt = sqlite3ExprSimplifiedAndOr(pExpr); + if( pAlt!=pExpr ){ + sqlite3ExprIfFalse(pParse, pAlt, dest, jumpIfNull); + }else if( pExpr->op==TK_AND ){ + testcase( jumpIfNull==0 ); + sqlite3ExprIfFalse(pParse, pExpr->pLeft, dest, jumpIfNull); + sqlite3ExprIfFalse(pParse, pExpr->pRight, dest, jumpIfNull); + }else{ + int d2 = sqlite3VdbeMakeLabel(pParse); + testcase( jumpIfNull==0 ); + sqlite3ExprIfTrue(pParse, pExpr->pLeft, d2, + jumpIfNull^SQLITE_JUMPIFNULL); + sqlite3ExprIfFalse(pParse, pExpr->pRight, dest, jumpIfNull); + sqlite3VdbeResolveLabel(v, d2); + } + break; + } + case TK_NOT: { + testcase( jumpIfNull==0 ); + sqlite3ExprIfTrue(pParse, pExpr->pLeft, dest, jumpIfNull); + break; + } + case TK_TRUTH: { + int isNot; /* IS NOT TRUE or IS NOT FALSE */ + int isTrue; /* IS TRUE or IS NOT TRUE */ + testcase( jumpIfNull==0 ); + isNot = pExpr->op2==TK_ISNOT; + isTrue = sqlite3ExprTruthValue(pExpr->pRight); + testcase( isTrue && isNot ); + testcase( !isTrue && isNot ); + if( isTrue ^ isNot ){ + /* IS TRUE and IS NOT FALSE */ + sqlite3ExprIfFalse(pParse, pExpr->pLeft, dest, + isNot ? 0 : SQLITE_JUMPIFNULL); + + }else{ + /* IS FALSE and IS NOT TRUE */ + sqlite3ExprIfTrue(pParse, pExpr->pLeft, dest, + isNot ? 0 : SQLITE_JUMPIFNULL); + } + break; + } + case TK_IS: + case TK_ISNOT: + testcase( pExpr->op==TK_IS ); + testcase( pExpr->op==TK_ISNOT ); + op = (pExpr->op==TK_IS) ? TK_NE : TK_EQ; + jumpIfNull = SQLITE_NULLEQ; + /* no break */ deliberate_fall_through + case TK_LT: + case TK_LE: + case TK_GT: + case TK_GE: + case TK_NE: + case TK_EQ: { + if( sqlite3ExprIsVector(pExpr->pLeft) ) goto default_expr; + testcase( jumpIfNull==0 ); + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, ®Free2); + codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op, + r1, r2, dest, jumpIfNull,ExprHasProperty(pExpr,EP_Commuted)); + assert(TK_LT==OP_Lt); testcase(op==OP_Lt); VdbeCoverageIf(v,op==OP_Lt); + assert(TK_LE==OP_Le); testcase(op==OP_Le); VdbeCoverageIf(v,op==OP_Le); + assert(TK_GT==OP_Gt); testcase(op==OP_Gt); VdbeCoverageIf(v,op==OP_Gt); + assert(TK_GE==OP_Ge); testcase(op==OP_Ge); VdbeCoverageIf(v,op==OP_Ge); + assert(TK_EQ==OP_Eq); testcase(op==OP_Eq); + VdbeCoverageIf(v, op==OP_Eq && jumpIfNull!=SQLITE_NULLEQ); + VdbeCoverageIf(v, op==OP_Eq && jumpIfNull==SQLITE_NULLEQ); + assert(TK_NE==OP_Ne); testcase(op==OP_Ne); + VdbeCoverageIf(v, op==OP_Ne && jumpIfNull!=SQLITE_NULLEQ); + VdbeCoverageIf(v, op==OP_Ne && jumpIfNull==SQLITE_NULLEQ); + testcase( regFree1==0 ); + testcase( regFree2==0 ); + break; + } + case TK_ISNULL: + case TK_NOTNULL: { + r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, ®Free1); + sqlite3VdbeAddOp2(v, op, r1, dest); + testcase( op==TK_ISNULL ); VdbeCoverageIf(v, op==TK_ISNULL); + testcase( op==TK_NOTNULL ); VdbeCoverageIf(v, op==TK_NOTNULL); + testcase( regFree1==0 ); + break; + } + case TK_BETWEEN: { + testcase( jumpIfNull==0 ); + exprCodeBetween(pParse, pExpr, dest, sqlite3ExprIfFalse, jumpIfNull); + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_IN: { + if( jumpIfNull ){ + sqlite3ExprCodeIN(pParse, pExpr, dest, dest); + }else{ + int destIfNull = sqlite3VdbeMakeLabel(pParse); + sqlite3ExprCodeIN(pParse, pExpr, dest, destIfNull); + sqlite3VdbeResolveLabel(v, destIfNull); + } + break; + } +#endif + default: { + default_expr: + if( ExprAlwaysFalse(pExpr) ){ + sqlite3VdbeGoto(v, dest); + }else if( ExprAlwaysTrue(pExpr) ){ + /* no-op */ + }else{ + r1 = sqlite3ExprCodeTemp(pParse, pExpr, ®Free1); + sqlite3VdbeAddOp3(v, OP_IfNot, r1, dest, jumpIfNull!=0); + VdbeCoverage(v); + testcase( regFree1==0 ); + testcase( jumpIfNull==0 ); + } + break; + } + } + sqlite3ReleaseTempReg(pParse, regFree1); + sqlite3ReleaseTempReg(pParse, regFree2); +} + +/* +** Like sqlite3ExprIfFalse() except that a copy is made of pExpr before +** code generation, and that copy is deleted after code generation. This +** ensures that the original pExpr is unchanged. +*/ +SQLITE_PRIVATE void sqlite3ExprIfFalseDup(Parse *pParse, Expr *pExpr, int dest,int jumpIfNull){ + sqlite3 *db = pParse->db; + Expr *pCopy = sqlite3ExprDup(db, pExpr, 0); + if( db->mallocFailed==0 ){ + sqlite3ExprIfFalse(pParse, pCopy, dest, jumpIfNull); + } + sqlite3ExprDelete(db, pCopy); +} + +/* +** Expression pVar is guaranteed to be an SQL variable. pExpr may be any +** type of expression. +** +** If pExpr is a simple SQL value - an integer, real, string, blob +** or NULL value - then the VDBE currently being prepared is configured +** to re-prepare each time a new value is bound to variable pVar. +** +** Additionally, if pExpr is a simple SQL value and the value is the +** same as that currently bound to variable pVar, non-zero is returned. +** Otherwise, if the values are not the same or if pExpr is not a simple +** SQL value, zero is returned. +*/ +static int exprCompareVariable( + const Parse *pParse, + const Expr *pVar, + const Expr *pExpr +){ + int res = 0; + int iVar; + sqlite3_value *pL, *pR = 0; + + sqlite3ValueFromExpr(pParse->db, pExpr, SQLITE_UTF8, SQLITE_AFF_BLOB, &pR); + if( pR ){ + iVar = pVar->iColumn; + sqlite3VdbeSetVarmask(pParse->pVdbe, iVar); + pL = sqlite3VdbeGetBoundValue(pParse->pReprepare, iVar, SQLITE_AFF_BLOB); + if( pL ){ + if( sqlite3_value_type(pL)==SQLITE_TEXT ){ + sqlite3_value_text(pL); /* Make sure the encoding is UTF-8 */ + } + res = 0==sqlite3MemCompare(pL, pR, 0); + } + sqlite3ValueFree(pR); + sqlite3ValueFree(pL); + } + + return res; +} + +/* +** Do a deep comparison of two expression trees. Return 0 if the two +** expressions are completely identical. Return 1 if they differ only +** by a COLLATE operator at the top level. Return 2 if there are differences +** other than the top-level COLLATE operator. +** +** If any subelement of pB has Expr.iTable==(-1) then it is allowed +** to compare equal to an equivalent element in pA with Expr.iTable==iTab. +** +** The pA side might be using TK_REGISTER. If that is the case and pB is +** not using TK_REGISTER but is otherwise equivalent, then still return 0. +** +** Sometimes this routine will return 2 even if the two expressions +** really are equivalent. If we cannot prove that the expressions are +** identical, we return 2 just to be safe. So if this routine +** returns 2, then you do not really know for certain if the two +** expressions are the same. But if you get a 0 or 1 return, then you +** can be sure the expressions are the same. In the places where +** this routine is used, it does not hurt to get an extra 2 - that +** just might result in some slightly slower code. But returning +** an incorrect 0 or 1 could lead to a malfunction. +** +** If pParse is not NULL then TK_VARIABLE terms in pA with bindings in +** pParse->pReprepare can be matched against literals in pB. The +** pParse->pVdbe->expmask bitmask is updated for each variable referenced. +** If pParse is NULL (the normal case) then any TK_VARIABLE term in +** Argument pParse should normally be NULL. If it is not NULL and pA or +** pB causes a return value of 2. +*/ +SQLITE_PRIVATE int sqlite3ExprCompare( + const Parse *pParse, + const Expr *pA, + const Expr *pB, + int iTab +){ + u32 combinedFlags; + if( pA==0 || pB==0 ){ + return pB==pA ? 0 : 2; + } + if( pParse && pA->op==TK_VARIABLE && exprCompareVariable(pParse, pA, pB) ){ + return 0; + } + combinedFlags = pA->flags | pB->flags; + if( combinedFlags & EP_IntValue ){ + if( (pA->flags&pB->flags&EP_IntValue)!=0 && pA->u.iValue==pB->u.iValue ){ + return 0; + } + return 2; + } + if( pA->op!=pB->op || pA->op==TK_RAISE ){ + if( pA->op==TK_COLLATE && sqlite3ExprCompare(pParse, pA->pLeft,pB,iTab)<2 ){ + return 1; + } + if( pB->op==TK_COLLATE && sqlite3ExprCompare(pParse, pA,pB->pLeft,iTab)<2 ){ + return 1; + } + return 2; + } + assert( !ExprHasProperty(pA, EP_IntValue) ); + assert( !ExprHasProperty(pB, EP_IntValue) ); + if( pA->u.zToken ){ + if( pA->op==TK_FUNCTION || pA->op==TK_AGG_FUNCTION ){ + if( sqlite3StrICmp(pA->u.zToken,pB->u.zToken)!=0 ) return 2; +#ifndef SQLITE_OMIT_WINDOWFUNC + assert( pA->op==pB->op ); + if( ExprHasProperty(pA,EP_WinFunc)!=ExprHasProperty(pB,EP_WinFunc) ){ + return 2; + } + if( ExprHasProperty(pA,EP_WinFunc) ){ + if( sqlite3WindowCompare(pParse, pA->y.pWin, pB->y.pWin, 1)!=0 ){ + return 2; + } + } +#endif + }else if( pA->op==TK_NULL ){ + return 0; + }else if( pA->op==TK_COLLATE ){ + if( sqlite3_stricmp(pA->u.zToken,pB->u.zToken)!=0 ) return 2; + }else + if( pB->u.zToken!=0 + && pA->op!=TK_COLUMN + && pA->op!=TK_AGG_COLUMN + && strcmp(pA->u.zToken,pB->u.zToken)!=0 + ){ + return 2; + } + } + if( (pA->flags & (EP_Distinct|EP_Commuted)) + != (pB->flags & (EP_Distinct|EP_Commuted)) ) return 2; + if( ALWAYS((combinedFlags & EP_TokenOnly)==0) ){ + if( combinedFlags & EP_xIsSelect ) return 2; + if( (combinedFlags & EP_FixedCol)==0 + && sqlite3ExprCompare(pParse, pA->pLeft, pB->pLeft, iTab) ) return 2; + if( sqlite3ExprCompare(pParse, pA->pRight, pB->pRight, iTab) ) return 2; + if( sqlite3ExprListCompare(pA->x.pList, pB->x.pList, iTab) ) return 2; + if( pA->op!=TK_STRING + && pA->op!=TK_TRUEFALSE + && ALWAYS((combinedFlags & EP_Reduced)==0) + ){ + if( pA->iColumn!=pB->iColumn ) return 2; + if( pA->op2!=pB->op2 && pA->op==TK_TRUTH ) return 2; + if( pA->op!=TK_IN && pA->iTable!=pB->iTable && pA->iTable!=iTab ){ + return 2; + } + } + } + return 0; +} + +/* +** Compare two ExprList objects. Return 0 if they are identical, 1 +** if they are certainly different, or 2 if it is not possible to +** determine if they are identical or not. +** +** If any subelement of pB has Expr.iTable==(-1) then it is allowed +** to compare equal to an equivalent element in pA with Expr.iTable==iTab. +** +** This routine might return non-zero for equivalent ExprLists. The +** only consequence will be disabled optimizations. But this routine +** must never return 0 if the two ExprList objects are different, or +** a malfunction will result. +** +** Two NULL pointers are considered to be the same. But a NULL pointer +** always differs from a non-NULL pointer. +*/ +SQLITE_PRIVATE int sqlite3ExprListCompare(const ExprList *pA, const ExprList *pB, int iTab){ + int i; + if( pA==0 && pB==0 ) return 0; + if( pA==0 || pB==0 ) return 1; + if( pA->nExpr!=pB->nExpr ) return 1; + for(i=0; inExpr; i++){ + int res; + Expr *pExprA = pA->a[i].pExpr; + Expr *pExprB = pB->a[i].pExpr; + if( pA->a[i].sortFlags!=pB->a[i].sortFlags ) return 1; + if( (res = sqlite3ExprCompare(0, pExprA, pExprB, iTab)) ) return res; + } + return 0; +} + +/* +** Like sqlite3ExprCompare() except COLLATE operators at the top-level +** are ignored. +*/ +SQLITE_PRIVATE int sqlite3ExprCompareSkip(Expr *pA,Expr *pB, int iTab){ + return sqlite3ExprCompare(0, + sqlite3ExprSkipCollateAndLikely(pA), + sqlite3ExprSkipCollateAndLikely(pB), + iTab); +} + +/* +** Return non-zero if Expr p can only be true if pNN is not NULL. +** +** Or if seenNot is true, return non-zero if Expr p can only be +** non-NULL if pNN is not NULL +*/ +static int exprImpliesNotNull( + const Parse *pParse,/* Parsing context */ + const Expr *p, /* The expression to be checked */ + const Expr *pNN, /* The expression that is NOT NULL */ + int iTab, /* Table being evaluated */ + int seenNot /* Return true only if p can be any non-NULL value */ +){ + assert( p ); + assert( pNN ); + if( sqlite3ExprCompare(pParse, p, pNN, iTab)==0 ){ + return pNN->op!=TK_NULL; + } + switch( p->op ){ + case TK_IN: { + if( seenNot && ExprHasProperty(p, EP_xIsSelect) ) return 0; + assert( ExprUseXSelect(p) || (p->x.pList!=0 && p->x.pList->nExpr>0) ); + return exprImpliesNotNull(pParse, p->pLeft, pNN, iTab, 1); + } + case TK_BETWEEN: { + ExprList *pList; + assert( ExprUseXList(p) ); + pList = p->x.pList; + assert( pList!=0 ); + assert( pList->nExpr==2 ); + if( seenNot ) return 0; + if( exprImpliesNotNull(pParse, pList->a[0].pExpr, pNN, iTab, 1) + || exprImpliesNotNull(pParse, pList->a[1].pExpr, pNN, iTab, 1) + ){ + return 1; + } + return exprImpliesNotNull(pParse, p->pLeft, pNN, iTab, 1); + } + case TK_EQ: + case TK_NE: + case TK_LT: + case TK_LE: + case TK_GT: + case TK_GE: + case TK_PLUS: + case TK_MINUS: + case TK_BITOR: + case TK_LSHIFT: + case TK_RSHIFT: + case TK_CONCAT: + seenNot = 1; + /* no break */ deliberate_fall_through + case TK_STAR: + case TK_REM: + case TK_BITAND: + case TK_SLASH: { + if( exprImpliesNotNull(pParse, p->pRight, pNN, iTab, seenNot) ) return 1; + /* no break */ deliberate_fall_through + } + case TK_SPAN: + case TK_COLLATE: + case TK_UPLUS: + case TK_UMINUS: { + return exprImpliesNotNull(pParse, p->pLeft, pNN, iTab, seenNot); + } + case TK_TRUTH: { + if( seenNot ) return 0; + if( p->op2!=TK_IS ) return 0; + return exprImpliesNotNull(pParse, p->pLeft, pNN, iTab, 1); + } + case TK_BITNOT: + case TK_NOT: { + return exprImpliesNotNull(pParse, p->pLeft, pNN, iTab, 1); + } + } + return 0; +} + +/* +** Return true if we can prove the pE2 will always be true if pE1 is +** true. Return false if we cannot complete the proof or if pE2 might +** be false. Examples: +** +** pE1: x==5 pE2: x==5 Result: true +** pE1: x>0 pE2: x==5 Result: false +** pE1: x=21 pE2: x=21 OR y=43 Result: true +** pE1: x!=123 pE2: x IS NOT NULL Result: true +** pE1: x!=?1 pE2: x IS NOT NULL Result: true +** pE1: x IS NULL pE2: x IS NOT NULL Result: false +** pE1: x IS ?2 pE2: x IS NOT NULL Reuslt: false +** +** When comparing TK_COLUMN nodes between pE1 and pE2, if pE2 has +** Expr.iTable<0 then assume a table number given by iTab. +** +** If pParse is not NULL, then the values of bound variables in pE1 are +** compared against literal values in pE2 and pParse->pVdbe->expmask is +** modified to record which bound variables are referenced. If pParse +** is NULL, then false will be returned if pE1 contains any bound variables. +** +** When in doubt, return false. Returning true might give a performance +** improvement. Returning false might cause a performance reduction, but +** it will always give the correct answer and is hence always safe. +*/ +SQLITE_PRIVATE int sqlite3ExprImpliesExpr( + const Parse *pParse, + const Expr *pE1, + const Expr *pE2, + int iTab +){ + if( sqlite3ExprCompare(pParse, pE1, pE2, iTab)==0 ){ + return 1; + } + if( pE2->op==TK_OR + && (sqlite3ExprImpliesExpr(pParse, pE1, pE2->pLeft, iTab) + || sqlite3ExprImpliesExpr(pParse, pE1, pE2->pRight, iTab) ) + ){ + return 1; + } + if( pE2->op==TK_NOTNULL + && exprImpliesNotNull(pParse, pE1, pE2->pLeft, iTab, 0) + ){ + return 1; + } + return 0; +} + +/* +** This is the Expr node callback for sqlite3ExprImpliesNonNullRow(). +** If the expression node requires that the table at pWalker->iCur +** have one or more non-NULL column, then set pWalker->eCode to 1 and abort. +** +** This routine controls an optimization. False positives (setting +** pWalker->eCode to 1 when it should not be) are deadly, but false-negatives +** (never setting pWalker->eCode) is a harmless missed optimization. +*/ +static int impliesNotNullRow(Walker *pWalker, Expr *pExpr){ + testcase( pExpr->op==TK_AGG_COLUMN ); + testcase( pExpr->op==TK_AGG_FUNCTION ); + if( ExprHasProperty(pExpr, EP_FromJoin) ) return WRC_Prune; + switch( pExpr->op ){ + case TK_ISNOT: + case TK_ISNULL: + case TK_NOTNULL: + case TK_IS: + case TK_OR: + case TK_VECTOR: + case TK_CASE: + case TK_IN: + case TK_FUNCTION: + case TK_TRUTH: + testcase( pExpr->op==TK_ISNOT ); + testcase( pExpr->op==TK_ISNULL ); + testcase( pExpr->op==TK_NOTNULL ); + testcase( pExpr->op==TK_IS ); + testcase( pExpr->op==TK_OR ); + testcase( pExpr->op==TK_VECTOR ); + testcase( pExpr->op==TK_CASE ); + testcase( pExpr->op==TK_IN ); + testcase( pExpr->op==TK_FUNCTION ); + testcase( pExpr->op==TK_TRUTH ); + return WRC_Prune; + case TK_COLUMN: + if( pWalker->u.iCur==pExpr->iTable ){ + pWalker->eCode = 1; + return WRC_Abort; + } + return WRC_Prune; + + case TK_AND: + if( pWalker->eCode==0 ){ + sqlite3WalkExpr(pWalker, pExpr->pLeft); + if( pWalker->eCode ){ + pWalker->eCode = 0; + sqlite3WalkExpr(pWalker, pExpr->pRight); + } + } + return WRC_Prune; + + case TK_BETWEEN: + if( sqlite3WalkExpr(pWalker, pExpr->pLeft)==WRC_Abort ){ + assert( pWalker->eCode ); + return WRC_Abort; + } + return WRC_Prune; + + /* Virtual tables are allowed to use constraints like x=NULL. So + ** a term of the form x=y does not prove that y is not null if x + ** is the column of a virtual table */ + case TK_EQ: + case TK_NE: + case TK_LT: + case TK_LE: + case TK_GT: + case TK_GE: { + Expr *pLeft = pExpr->pLeft; + Expr *pRight = pExpr->pRight; + testcase( pExpr->op==TK_EQ ); + testcase( pExpr->op==TK_NE ); + testcase( pExpr->op==TK_LT ); + testcase( pExpr->op==TK_LE ); + testcase( pExpr->op==TK_GT ); + testcase( pExpr->op==TK_GE ); + /* The y.pTab=0 assignment in wherecode.c always happens after the + ** impliesNotNullRow() test */ + assert( pLeft->op!=TK_COLUMN || ExprUseYTab(pLeft) ); + assert( pRight->op!=TK_COLUMN || ExprUseYTab(pRight) ); + if( (pLeft->op==TK_COLUMN + && pLeft->y.pTab!=0 + && IsVirtual(pLeft->y.pTab)) + || (pRight->op==TK_COLUMN + && pRight->y.pTab!=0 + && IsVirtual(pRight->y.pTab)) + ){ + return WRC_Prune; + } + /* no break */ deliberate_fall_through + } + default: + return WRC_Continue; + } +} + +/* +** Return true (non-zero) if expression p can only be true if at least +** one column of table iTab is non-null. In other words, return true +** if expression p will always be NULL or false if every column of iTab +** is NULL. +** +** False negatives are acceptable. In other words, it is ok to return +** zero even if expression p will never be true of every column of iTab +** is NULL. A false negative is merely a missed optimization opportunity. +** +** False positives are not allowed, however. A false positive may result +** in an incorrect answer. +** +** Terms of p that are marked with EP_FromJoin (and hence that come from +** the ON or USING clauses of LEFT JOINS) are excluded from the analysis. +** +** This routine is used to check if a LEFT JOIN can be converted into +** an ordinary JOIN. The p argument is the WHERE clause. If the WHERE +** clause requires that some column of the right table of the LEFT JOIN +** be non-NULL, then the LEFT JOIN can be safely converted into an +** ordinary join. +*/ +SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr *p, int iTab){ + Walker w; + p = sqlite3ExprSkipCollateAndLikely(p); + if( p==0 ) return 0; + if( p->op==TK_NOTNULL ){ + p = p->pLeft; + }else{ + while( p->op==TK_AND ){ + if( sqlite3ExprImpliesNonNullRow(p->pLeft, iTab) ) return 1; + p = p->pRight; + } + } + w.xExprCallback = impliesNotNullRow; + w.xSelectCallback = 0; + w.xSelectCallback2 = 0; + w.eCode = 0; + w.u.iCur = iTab; + sqlite3WalkExpr(&w, p); + return w.eCode; +} + +/* +** An instance of the following structure is used by the tree walker +** to determine if an expression can be evaluated by reference to the +** index only, without having to do a search for the corresponding +** table entry. The IdxCover.pIdx field is the index. IdxCover.iCur +** is the cursor for the table. +*/ +struct IdxCover { + Index *pIdx; /* The index to be tested for coverage */ + int iCur; /* Cursor number for the table corresponding to the index */ +}; + +/* +** Check to see if there are references to columns in table +** pWalker->u.pIdxCover->iCur can be satisfied using the index +** pWalker->u.pIdxCover->pIdx. +*/ +static int exprIdxCover(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_COLUMN + && pExpr->iTable==pWalker->u.pIdxCover->iCur + && sqlite3TableColumnToIndex(pWalker->u.pIdxCover->pIdx, pExpr->iColumn)<0 + ){ + pWalker->eCode = 1; + return WRC_Abort; + } + return WRC_Continue; +} + +/* +** Determine if an index pIdx on table with cursor iCur contains will +** the expression pExpr. Return true if the index does cover the +** expression and false if the pExpr expression references table columns +** that are not found in the index pIdx. +** +** An index covering an expression means that the expression can be +** evaluated using only the index and without having to lookup the +** corresponding table entry. +*/ +SQLITE_PRIVATE int sqlite3ExprCoveredByIndex( + Expr *pExpr, /* The index to be tested */ + int iCur, /* The cursor number for the corresponding table */ + Index *pIdx /* The index that might be used for coverage */ +){ + Walker w; + struct IdxCover xcov; + memset(&w, 0, sizeof(w)); + xcov.iCur = iCur; + xcov.pIdx = pIdx; + w.xExprCallback = exprIdxCover; + w.u.pIdxCover = &xcov; + sqlite3WalkExpr(&w, pExpr); + return !w.eCode; +} + + +/* Structure used to pass information throught the Walker in order to +** implement sqlite3ReferencesSrcList(). +*/ +struct RefSrcList { + sqlite3 *db; /* Database connection used for sqlite3DbRealloc() */ + SrcList *pRef; /* Looking for references to these tables */ + i64 nExclude; /* Number of tables to exclude from the search */ + int *aiExclude; /* Cursor IDs for tables to exclude from the search */ +}; + +/* +** Walker SELECT callbacks for sqlite3ReferencesSrcList(). +** +** When entering a new subquery on the pExpr argument, add all FROM clause +** entries for that subquery to the exclude list. +** +** When leaving the subquery, remove those entries from the exclude list. +*/ +static int selectRefEnter(Walker *pWalker, Select *pSelect){ + struct RefSrcList *p = pWalker->u.pRefSrcList; + SrcList *pSrc = pSelect->pSrc; + i64 i, j; + int *piNew; + if( pSrc->nSrc==0 ) return WRC_Continue; + j = p->nExclude; + p->nExclude += pSrc->nSrc; + piNew = sqlite3DbRealloc(p->db, p->aiExclude, p->nExclude*sizeof(int)); + if( piNew==0 ){ + p->nExclude = 0; + return WRC_Abort; + }else{ + p->aiExclude = piNew; + } + for(i=0; inSrc; i++, j++){ + p->aiExclude[j] = pSrc->a[i].iCursor; + } + return WRC_Continue; +} +static void selectRefLeave(Walker *pWalker, Select *pSelect){ + struct RefSrcList *p = pWalker->u.pRefSrcList; + SrcList *pSrc = pSelect->pSrc; + if( p->nExclude ){ + assert( p->nExclude>=pSrc->nSrc ); + p->nExclude -= pSrc->nSrc; + } +} + +/* This is the Walker EXPR callback for sqlite3ReferencesSrcList(). +** +** Set the 0x01 bit of pWalker->eCode if there is a reference to any +** of the tables shown in RefSrcList.pRef. +** +** Set the 0x02 bit of pWalker->eCode if there is a reference to a +** table is in neither RefSrcList.pRef nor RefSrcList.aiExclude. +*/ +static int exprRefToSrcList(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_COLUMN + || pExpr->op==TK_AGG_COLUMN + ){ + int i; + struct RefSrcList *p = pWalker->u.pRefSrcList; + SrcList *pSrc = p->pRef; + int nSrc = pSrc ? pSrc->nSrc : 0; + for(i=0; iiTable==pSrc->a[i].iCursor ){ + pWalker->eCode |= 1; + return WRC_Continue; + } + } + for(i=0; inExclude && p->aiExclude[i]!=pExpr->iTable; i++){} + if( i>=p->nExclude ){ + pWalker->eCode |= 2; + } + } + return WRC_Continue; +} + +/* +** Check to see if pExpr references any tables in pSrcList. +** Possible return values: +** +** 1 pExpr does references a table in pSrcList. +** +** 0 pExpr references some table that is not defined in either +** pSrcList or in subqueries of pExpr itself. +** +** -1 pExpr only references no tables at all, or it only +** references tables defined in subqueries of pExpr itself. +** +** As currently used, pExpr is always an aggregate function call. That +** fact is exploited for efficiency. +*/ +SQLITE_PRIVATE int sqlite3ReferencesSrcList(Parse *pParse, Expr *pExpr, SrcList *pSrcList){ + Walker w; + struct RefSrcList x; + memset(&w, 0, sizeof(w)); + memset(&x, 0, sizeof(x)); + w.xExprCallback = exprRefToSrcList; + w.xSelectCallback = selectRefEnter; + w.xSelectCallback2 = selectRefLeave; + w.u.pRefSrcList = &x; + x.db = pParse->db; + x.pRef = pSrcList; + assert( pExpr->op==TK_AGG_FUNCTION ); + assert( ExprUseXList(pExpr) ); + sqlite3WalkExprList(&w, pExpr->x.pList); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + sqlite3WalkExpr(&w, pExpr->y.pWin->pFilter); + } +#endif + sqlite3DbFree(pParse->db, x.aiExclude); + if( w.eCode & 0x01 ){ + return 1; + }else if( w.eCode ){ + return 0; + }else{ + return -1; + } +} + +/* +** This is a Walker expression node callback. +** +** For Expr nodes that contain pAggInfo pointers, make sure the AggInfo +** object that is referenced does not refer directly to the Expr. If +** it does, make a copy. This is done because the pExpr argument is +** subject to change. +** +** The copy is stored on pParse->pConstExpr with a register number of 0. +** This will cause the expression to be deleted automatically when the +** Parse object is destroyed, but the zero register number means that it +** will not generate any code in the preamble. +*/ +static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){ + if( ALWAYS(!ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced)) + && pExpr->pAggInfo!=0 + ){ + AggInfo *pAggInfo = pExpr->pAggInfo; + int iAgg = pExpr->iAgg; + Parse *pParse = pWalker->pParse; + sqlite3 *db = pParse->db; + assert( pExpr->op==TK_AGG_COLUMN || pExpr->op==TK_AGG_FUNCTION ); + if( pExpr->op==TK_AGG_COLUMN ){ + assert( iAgg>=0 && iAggnColumn ); + if( pAggInfo->aCol[iAgg].pCExpr==pExpr ){ + pExpr = sqlite3ExprDup(db, pExpr, 0); + if( pExpr ){ + pAggInfo->aCol[iAgg].pCExpr = pExpr; + sqlite3ExprDeferredDelete(pParse, pExpr); + } + } + }else{ + assert( iAgg>=0 && iAggnFunc ); + if( pAggInfo->aFunc[iAgg].pFExpr==pExpr ){ + pExpr = sqlite3ExprDup(db, pExpr, 0); + if( pExpr ){ + pAggInfo->aFunc[iAgg].pFExpr = pExpr; + sqlite3ExprDeferredDelete(pParse, pExpr); + } + } + } + } + return WRC_Continue; +} + +/* +** Initialize a Walker object so that will persist AggInfo entries referenced +** by the tree that is walked. +*/ +SQLITE_PRIVATE void sqlite3AggInfoPersistWalkerInit(Walker *pWalker, Parse *pParse){ + memset(pWalker, 0, sizeof(*pWalker)); + pWalker->pParse = pParse; + pWalker->xExprCallback = agginfoPersistExprCb; + pWalker->xSelectCallback = sqlite3SelectWalkNoop; +} + +/* +** Add a new element to the pAggInfo->aCol[] array. Return the index of +** the new element. Return a negative number if malloc fails. +*/ +static int addAggInfoColumn(sqlite3 *db, AggInfo *pInfo){ + int i; + pInfo->aCol = sqlite3ArrayAllocate( + db, + pInfo->aCol, + sizeof(pInfo->aCol[0]), + &pInfo->nColumn, + &i + ); + return i; +} + +/* +** Add a new element to the pAggInfo->aFunc[] array. Return the index of +** the new element. Return a negative number if malloc fails. +*/ +static int addAggInfoFunc(sqlite3 *db, AggInfo *pInfo){ + int i; + pInfo->aFunc = sqlite3ArrayAllocate( + db, + pInfo->aFunc, + sizeof(pInfo->aFunc[0]), + &pInfo->nFunc, + &i + ); + return i; +} + +/* +** This is the xExprCallback for a tree walker. It is used to +** implement sqlite3ExprAnalyzeAggregates(). See sqlite3ExprAnalyzeAggregates +** for additional information. +*/ +static int analyzeAggregate(Walker *pWalker, Expr *pExpr){ + int i; + NameContext *pNC = pWalker->u.pNC; + Parse *pParse = pNC->pParse; + SrcList *pSrcList = pNC->pSrcList; + AggInfo *pAggInfo = pNC->uNC.pAggInfo; + + assert( pNC->ncFlags & NC_UAggInfo ); + switch( pExpr->op ){ + case TK_AGG_COLUMN: + case TK_COLUMN: { + testcase( pExpr->op==TK_AGG_COLUMN ); + testcase( pExpr->op==TK_COLUMN ); + /* Check to see if the column is in one of the tables in the FROM + ** clause of the aggregate query */ + if( ALWAYS(pSrcList!=0) ){ + SrcItem *pItem = pSrcList->a; + for(i=0; inSrc; i++, pItem++){ + struct AggInfo_col *pCol; + assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) ); + if( pExpr->iTable==pItem->iCursor ){ + /* If we reach this point, it means that pExpr refers to a table + ** that is in the FROM clause of the aggregate query. + ** + ** Make an entry for the column in pAggInfo->aCol[] if there + ** is not an entry there already. + */ + int k; + pCol = pAggInfo->aCol; + for(k=0; knColumn; k++, pCol++){ + if( pCol->iTable==pExpr->iTable && + pCol->iColumn==pExpr->iColumn ){ + break; + } + } + if( (k>=pAggInfo->nColumn) + && (k = addAggInfoColumn(pParse->db, pAggInfo))>=0 + ){ + pCol = &pAggInfo->aCol[k]; + assert( ExprUseYTab(pExpr) ); + pCol->pTab = pExpr->y.pTab; + pCol->iTable = pExpr->iTable; + pCol->iColumn = pExpr->iColumn; + pCol->iMem = ++pParse->nMem; + pCol->iSorterColumn = -1; + pCol->pCExpr = pExpr; + if( pAggInfo->pGroupBy ){ + int j, n; + ExprList *pGB = pAggInfo->pGroupBy; + struct ExprList_item *pTerm = pGB->a; + n = pGB->nExpr; + for(j=0; jpExpr; + if( pE->op==TK_COLUMN && pE->iTable==pExpr->iTable && + pE->iColumn==pExpr->iColumn ){ + pCol->iSorterColumn = j; + break; + } + } + } + if( pCol->iSorterColumn<0 ){ + pCol->iSorterColumn = pAggInfo->nSortingColumn++; + } + } + /* There is now an entry for pExpr in pAggInfo->aCol[] (either + ** because it was there before or because we just created it). + ** Convert the pExpr to be a TK_AGG_COLUMN referring to that + ** pAggInfo->aCol[] entry. + */ + ExprSetVVAProperty(pExpr, EP_NoReduce); + pExpr->pAggInfo = pAggInfo; + pExpr->op = TK_AGG_COLUMN; + pExpr->iAgg = (i16)k; + break; + } /* endif pExpr->iTable==pItem->iCursor */ + } /* end loop over pSrcList */ + } + return WRC_Prune; + } + case TK_AGG_FUNCTION: { + if( (pNC->ncFlags & NC_InAggFunc)==0 + && pWalker->walkerDepth==pExpr->op2 + ){ + /* Check to see if pExpr is a duplicate of another aggregate + ** function that is already in the pAggInfo structure + */ + struct AggInfo_func *pItem = pAggInfo->aFunc; + for(i=0; inFunc; i++, pItem++){ + if( pItem->pFExpr==pExpr ) break; + if( sqlite3ExprCompare(0, pItem->pFExpr, pExpr, -1)==0 ){ + break; + } + } + if( i>=pAggInfo->nFunc ){ + /* pExpr is original. Make a new entry in pAggInfo->aFunc[] + */ + u8 enc = ENC(pParse->db); + i = addAggInfoFunc(pParse->db, pAggInfo); + if( i>=0 ){ + assert( !ExprHasProperty(pExpr, EP_xIsSelect) ); + pItem = &pAggInfo->aFunc[i]; + pItem->pFExpr = pExpr; + pItem->iMem = ++pParse->nMem; + assert( ExprUseUToken(pExpr) ); + pItem->pFunc = sqlite3FindFunction(pParse->db, + pExpr->u.zToken, + pExpr->x.pList ? pExpr->x.pList->nExpr : 0, enc, 0); + if( pExpr->flags & EP_Distinct ){ + pItem->iDistinct = pParse->nTab++; + }else{ + pItem->iDistinct = -1; + } + } + } + /* Make pExpr point to the appropriate pAggInfo->aFunc[] entry + */ + assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) ); + ExprSetVVAProperty(pExpr, EP_NoReduce); + pExpr->iAgg = (i16)i; + pExpr->pAggInfo = pAggInfo; + return WRC_Prune; + }else{ + return WRC_Continue; + } + } + } + return WRC_Continue; +} + +/* +** Analyze the pExpr expression looking for aggregate functions and +** for variables that need to be added to AggInfo object that pNC->pAggInfo +** points to. Additional entries are made on the AggInfo object as +** necessary. +** +** This routine should only be called after the expression has been +** analyzed by sqlite3ResolveExprNames(). +*/ +SQLITE_PRIVATE void sqlite3ExprAnalyzeAggregates(NameContext *pNC, Expr *pExpr){ + Walker w; + w.xExprCallback = analyzeAggregate; + w.xSelectCallback = sqlite3WalkerDepthIncrease; + w.xSelectCallback2 = sqlite3WalkerDepthDecrease; + w.walkerDepth = 0; + w.u.pNC = pNC; + w.pParse = 0; + assert( pNC->pSrcList!=0 ); + sqlite3WalkExpr(&w, pExpr); +} + +/* +** Call sqlite3ExprAnalyzeAggregates() for every expression in an +** expression list. Return the number of errors. +** +** If an error is found, the analysis is cut short. +*/ +SQLITE_PRIVATE void sqlite3ExprAnalyzeAggList(NameContext *pNC, ExprList *pList){ + struct ExprList_item *pItem; + int i; + if( pList ){ + for(pItem=pList->a, i=0; inExpr; i++, pItem++){ + sqlite3ExprAnalyzeAggregates(pNC, pItem->pExpr); + } + } +} + +/* +** Allocate a single new register for use to hold some intermediate result. +*/ +SQLITE_PRIVATE int sqlite3GetTempReg(Parse *pParse){ + if( pParse->nTempReg==0 ){ + return ++pParse->nMem; + } + return pParse->aTempReg[--pParse->nTempReg]; +} + +/* +** Deallocate a register, making available for reuse for some other +** purpose. +*/ +SQLITE_PRIVATE void sqlite3ReleaseTempReg(Parse *pParse, int iReg){ + if( iReg ){ + sqlite3VdbeReleaseRegisters(pParse, iReg, 1, 0, 0); + if( pParse->nTempRegaTempReg) ){ + pParse->aTempReg[pParse->nTempReg++] = iReg; + } + } +} + +/* +** Allocate or deallocate a block of nReg consecutive registers. +*/ +SQLITE_PRIVATE int sqlite3GetTempRange(Parse *pParse, int nReg){ + int i, n; + if( nReg==1 ) return sqlite3GetTempReg(pParse); + i = pParse->iRangeReg; + n = pParse->nRangeReg; + if( nReg<=n ){ + pParse->iRangeReg += nReg; + pParse->nRangeReg -= nReg; + }else{ + i = pParse->nMem+1; + pParse->nMem += nReg; + } + return i; +} +SQLITE_PRIVATE void sqlite3ReleaseTempRange(Parse *pParse, int iReg, int nReg){ + if( nReg==1 ){ + sqlite3ReleaseTempReg(pParse, iReg); + return; + } + sqlite3VdbeReleaseRegisters(pParse, iReg, nReg, 0, 0); + if( nReg>pParse->nRangeReg ){ + pParse->nRangeReg = nReg; + pParse->iRangeReg = iReg; + } +} + +/* +** Mark all temporary registers as being unavailable for reuse. +** +** Always invoke this procedure after coding a subroutine or co-routine +** that might be invoked from other parts of the code, to ensure that +** the sub/co-routine does not use registers in common with the code that +** invokes the sub/co-routine. +*/ +SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse *pParse){ + pParse->nTempReg = 0; + pParse->nRangeReg = 0; +} + +/* +** Validate that no temporary register falls within the range of +** iFirst..iLast, inclusive. This routine is only call from within assert() +** statements. +*/ +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3NoTempsInRange(Parse *pParse, int iFirst, int iLast){ + int i; + if( pParse->nRangeReg>0 + && pParse->iRangeReg+pParse->nRangeReg > iFirst + && pParse->iRangeReg <= iLast + ){ + return 0; + } + for(i=0; inTempReg; i++){ + if( pParse->aTempReg[i]>=iFirst && pParse->aTempReg[i]<=iLast ){ + return 0; + } + } + return 1; +} +#endif /* SQLITE_DEBUG */ + +/************** End of expr.c ************************************************/ +/************** Begin file alter.c *******************************************/ +/* +** 2005 February 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains C code routines that used to generate VDBE code +** that implements the ALTER TABLE command. +*/ +/* #include "sqliteInt.h" */ + +/* +** The code in this file only exists if we are not omitting the +** ALTER TABLE logic from the build. +*/ +#ifndef SQLITE_OMIT_ALTERTABLE + +/* +** Parameter zName is the name of a table that is about to be altered +** (either with ALTER TABLE ... RENAME TO or ALTER TABLE ... ADD COLUMN). +** If the table is a system table, this function leaves an error message +** in pParse->zErr (system tables may not be altered) and returns non-zero. +** +** Or, if zName is not a system table, zero is returned. +*/ +static int isAlterableTable(Parse *pParse, Table *pTab){ + if( 0==sqlite3StrNICmp(pTab->zName, "sqlite_", 7) +#ifndef SQLITE_OMIT_VIRTUALTABLE + || (pTab->tabFlags & TF_Eponymous)!=0 + || ( (pTab->tabFlags & TF_Shadow)!=0 + && sqlite3ReadOnlyShadowTables(pParse->db) + ) +#endif + ){ + sqlite3ErrorMsg(pParse, "table %s may not be altered", pTab->zName); + return 1; + } + return 0; +} + +/* +** Generate code to verify that the schemas of database zDb and, if +** bTemp is not true, database "temp", can still be parsed. This is +** called at the end of the generation of an ALTER TABLE ... RENAME ... +** statement to ensure that the operation has not rendered any schema +** objects unusable. +*/ +static void renameTestSchema( + Parse *pParse, /* Parse context */ + const char *zDb, /* Name of db to verify schema of */ + int bTemp, /* True if this is the temp db */ + const char *zWhen, /* "when" part of error message */ + int bNoDQS /* Do not allow DQS in the schema */ +){ + pParse->colNamesSet = 1; + sqlite3NestedParse(pParse, + "SELECT 1 " + "FROM \"%w\"." LEGACY_SCHEMA_TABLE " " + "WHERE name NOT LIKE 'sqliteX_%%' ESCAPE 'X'" + " AND sql NOT LIKE 'create virtual%%'" + " AND sqlite_rename_test(%Q, sql, type, name, %d, %Q, %d)=NULL ", + zDb, + zDb, bTemp, zWhen, bNoDQS + ); + + if( bTemp==0 ){ + sqlite3NestedParse(pParse, + "SELECT 1 " + "FROM temp." LEGACY_SCHEMA_TABLE " " + "WHERE name NOT LIKE 'sqliteX_%%' ESCAPE 'X'" + " AND sql NOT LIKE 'create virtual%%'" + " AND sqlite_rename_test(%Q, sql, type, name, 1, %Q, %d)=NULL ", + zDb, zWhen, bNoDQS + ); + } +} + +/* +** Generate VM code to replace any double-quoted strings (but not double-quoted +** identifiers) within the "sql" column of the sqlite_schema table in +** database zDb with their single-quoted equivalents. If argument bTemp is +** not true, similarly update all SQL statements in the sqlite_schema table +** of the temp db. +*/ +static void renameFixQuotes(Parse *pParse, const char *zDb, int bTemp){ + sqlite3NestedParse(pParse, + "UPDATE \"%w\"." LEGACY_SCHEMA_TABLE + " SET sql = sqlite_rename_quotefix(%Q, sql)" + "WHERE name NOT LIKE 'sqliteX_%%' ESCAPE 'X'" + " AND sql NOT LIKE 'create virtual%%'" , zDb, zDb + ); + if( bTemp==0 ){ + sqlite3NestedParse(pParse, + "UPDATE temp." LEGACY_SCHEMA_TABLE + " SET sql = sqlite_rename_quotefix('temp', sql)" + "WHERE name NOT LIKE 'sqliteX_%%' ESCAPE 'X'" + " AND sql NOT LIKE 'create virtual%%'" + ); + } +} + +/* +** Generate code to reload the schema for database iDb. And, if iDb!=1, for +** the temp database as well. +*/ +static void renameReloadSchema(Parse *pParse, int iDb, u16 p5){ + Vdbe *v = pParse->pVdbe; + if( v ){ + sqlite3ChangeCookie(pParse, iDb); + sqlite3VdbeAddParseSchemaOp(pParse->pVdbe, iDb, 0, p5); + if( iDb!=1 ) sqlite3VdbeAddParseSchemaOp(pParse->pVdbe, 1, 0, p5); + } +} + +/* +** Generate code to implement the "ALTER TABLE xxx RENAME TO yyy" +** command. +*/ +SQLITE_PRIVATE void sqlite3AlterRenameTable( + Parse *pParse, /* Parser context. */ + SrcList *pSrc, /* The table to rename. */ + Token *pName /* The new table name. */ +){ + int iDb; /* Database that contains the table */ + char *zDb; /* Name of database iDb */ + Table *pTab; /* Table being renamed */ + char *zName = 0; /* NULL-terminated version of pName */ + sqlite3 *db = pParse->db; /* Database connection */ + int nTabName; /* Number of UTF-8 characters in zTabName */ + const char *zTabName; /* Original name of the table */ + Vdbe *v; + VTable *pVTab = 0; /* Non-zero if this is a v-tab with an xRename() */ + + if( NEVER(db->mallocFailed) ) goto exit_rename_table; + assert( pSrc->nSrc==1 ); + assert( sqlite3BtreeHoldsAllMutexes(pParse->db) ); + + pTab = sqlite3LocateTableItem(pParse, 0, &pSrc->a[0]); + if( !pTab ) goto exit_rename_table; + iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + zDb = db->aDb[iDb].zDbSName; + + /* Get a NULL terminated version of the new table name. */ + zName = sqlite3NameFromToken(db, pName); + if( !zName ) goto exit_rename_table; + + /* Check that a table or index named 'zName' does not already exist + ** in database iDb. If so, this is an error. + */ + if( sqlite3FindTable(db, zName, zDb) + || sqlite3FindIndex(db, zName, zDb) + || sqlite3IsShadowTableOf(db, pTab, zName) + ){ + sqlite3ErrorMsg(pParse, + "there is already another table or index with this name: %s", zName); + goto exit_rename_table; + } + + /* Make sure it is not a system table being altered, or a reserved name + ** that the table is being renamed to. + */ + if( SQLITE_OK!=isAlterableTable(pParse, pTab) ){ + goto exit_rename_table; + } + if( SQLITE_OK!=sqlite3CheckObjectName(pParse,zName,"table",zName) ){ + goto exit_rename_table; + } + +#ifndef SQLITE_OMIT_VIEW + if( IsView(pTab) ){ + sqlite3ErrorMsg(pParse, "view %s may not be altered", pTab->zName); + goto exit_rename_table; + } +#endif + +#ifndef SQLITE_OMIT_AUTHORIZATION + /* Invoke the authorization callback. */ + if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, 0) ){ + goto exit_rename_table; + } +#endif + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( sqlite3ViewGetColumnNames(pParse, pTab) ){ + goto exit_rename_table; + } + if( IsVirtual(pTab) ){ + pVTab = sqlite3GetVTable(db, pTab); + if( pVTab->pVtab->pModule->xRename==0 ){ + pVTab = 0; + } + } +#endif + + /* Begin a transaction for database iDb. Then modify the schema cookie + ** (since the ALTER TABLE modifies the schema). Call sqlite3MayAbort(), + ** as the scalar functions (e.g. sqlite_rename_table()) invoked by the + ** nested SQL may raise an exception. */ + v = sqlite3GetVdbe(pParse); + if( v==0 ){ + goto exit_rename_table; + } + sqlite3MayAbort(pParse); + + /* figure out how many UTF-8 characters are in zName */ + zTabName = pTab->zName; + nTabName = sqlite3Utf8CharLen(zTabName, -1); + + /* Rewrite all CREATE TABLE, INDEX, TRIGGER or VIEW statements in + ** the schema to use the new table name. */ + sqlite3NestedParse(pParse, + "UPDATE \"%w\"." LEGACY_SCHEMA_TABLE " SET " + "sql = sqlite_rename_table(%Q, type, name, sql, %Q, %Q, %d) " + "WHERE (type!='index' OR tbl_name=%Q COLLATE nocase)" + "AND name NOT LIKE 'sqliteX_%%' ESCAPE 'X'" + , zDb, zDb, zTabName, zName, (iDb==1), zTabName + ); + + /* Update the tbl_name and name columns of the sqlite_schema table + ** as required. */ + sqlite3NestedParse(pParse, + "UPDATE %Q." LEGACY_SCHEMA_TABLE " SET " + "tbl_name = %Q, " + "name = CASE " + "WHEN type='table' THEN %Q " + "WHEN name LIKE 'sqliteX_autoindex%%' ESCAPE 'X' " + " AND type='index' THEN " + "'sqlite_autoindex_' || %Q || substr(name,%d+18) " + "ELSE name END " + "WHERE tbl_name=%Q COLLATE nocase AND " + "(type='table' OR type='index' OR type='trigger');", + zDb, + zName, zName, zName, + nTabName, zTabName + ); + +#ifndef SQLITE_OMIT_AUTOINCREMENT + /* If the sqlite_sequence table exists in this database, then update + ** it with the new table name. + */ + if( sqlite3FindTable(db, "sqlite_sequence", zDb) ){ + sqlite3NestedParse(pParse, + "UPDATE \"%w\".sqlite_sequence set name = %Q WHERE name = %Q", + zDb, zName, pTab->zName); + } +#endif + + /* If the table being renamed is not itself part of the temp database, + ** edit view and trigger definitions within the temp database + ** as required. */ + if( iDb!=1 ){ + sqlite3NestedParse(pParse, + "UPDATE sqlite_temp_schema SET " + "sql = sqlite_rename_table(%Q, type, name, sql, %Q, %Q, 1), " + "tbl_name = " + "CASE WHEN tbl_name=%Q COLLATE nocase AND " + " sqlite_rename_test(%Q, sql, type, name, 1, 'after rename', 0) " + "THEN %Q ELSE tbl_name END " + "WHERE type IN ('view', 'trigger')" + , zDb, zTabName, zName, zTabName, zDb, zName); + } + + /* If this is a virtual table, invoke the xRename() function if + ** one is defined. The xRename() callback will modify the names + ** of any resources used by the v-table implementation (including other + ** SQLite tables) that are identified by the name of the virtual table. + */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( pVTab ){ + int i = ++pParse->nMem; + sqlite3VdbeLoadString(v, i, zName); + sqlite3VdbeAddOp4(v, OP_VRename, i, 0, 0,(const char*)pVTab, P4_VTAB); + } +#endif + + renameReloadSchema(pParse, iDb, INITFLAG_AlterRename); + renameTestSchema(pParse, zDb, iDb==1, "after rename", 0); + +exit_rename_table: + sqlite3SrcListDelete(db, pSrc); + sqlite3DbFree(db, zName); +} + +/* +** Write code that will raise an error if the table described by +** zDb and zTab is not empty. +*/ +static void sqlite3ErrorIfNotEmpty( + Parse *pParse, /* Parsing context */ + const char *zDb, /* Schema holding the table */ + const char *zTab, /* Table to check for empty */ + const char *zErr /* Error message text */ +){ + sqlite3NestedParse(pParse, + "SELECT raise(ABORT,%Q) FROM \"%w\".\"%w\"", + zErr, zDb, zTab + ); +} + +/* +** This function is called after an "ALTER TABLE ... ADD" statement +** has been parsed. Argument pColDef contains the text of the new +** column definition. +** +** The Table structure pParse->pNewTable was extended to include +** the new column during parsing. +*/ +SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *pParse, Token *pColDef){ + Table *pNew; /* Copy of pParse->pNewTable */ + Table *pTab; /* Table being altered */ + int iDb; /* Database number */ + const char *zDb; /* Database name */ + const char *zTab; /* Table name */ + char *zCol; /* Null-terminated column definition */ + Column *pCol; /* The new column */ + Expr *pDflt; /* Default value for the new column */ + sqlite3 *db; /* The database connection; */ + Vdbe *v; /* The prepared statement under construction */ + int r1; /* Temporary registers */ + + db = pParse->db; + assert( db->pParse==pParse ); + if( pParse->nErr ) return; + assert( db->mallocFailed==0 ); + pNew = pParse->pNewTable; + assert( pNew ); + + assert( sqlite3BtreeHoldsAllMutexes(db) ); + iDb = sqlite3SchemaToIndex(db, pNew->pSchema); + zDb = db->aDb[iDb].zDbSName; + zTab = &pNew->zName[16]; /* Skip the "sqlite_altertab_" prefix on the name */ + pCol = &pNew->aCol[pNew->nCol-1]; + pDflt = sqlite3ColumnExpr(pNew, pCol); + pTab = sqlite3FindTable(db, zTab, zDb); + assert( pTab ); + +#ifndef SQLITE_OMIT_AUTHORIZATION + /* Invoke the authorization callback. */ + if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, 0) ){ + return; + } +#endif + + + /* Check that the new column is not specified as PRIMARY KEY or UNIQUE. + ** If there is a NOT NULL constraint, then the default value for the + ** column must not be NULL. + */ + if( pCol->colFlags & COLFLAG_PRIMKEY ){ + sqlite3ErrorMsg(pParse, "Cannot add a PRIMARY KEY column"); + return; + } + if( pNew->pIndex ){ + sqlite3ErrorMsg(pParse, + "Cannot add a UNIQUE column"); + return; + } + if( (pCol->colFlags & COLFLAG_GENERATED)==0 ){ + /* If the default value for the new column was specified with a + ** literal NULL, then set pDflt to 0. This simplifies checking + ** for an SQL NULL default below. + */ + assert( pDflt==0 || pDflt->op==TK_SPAN ); + if( pDflt && pDflt->pLeft->op==TK_NULL ){ + pDflt = 0; + } + assert( IsOrdinaryTable(pNew) ); + if( (db->flags&SQLITE_ForeignKeys) && pNew->u.tab.pFKey && pDflt ){ + sqlite3ErrorIfNotEmpty(pParse, zDb, zTab, + "Cannot add a REFERENCES column with non-NULL default value"); + } + if( pCol->notNull && !pDflt ){ + sqlite3ErrorIfNotEmpty(pParse, zDb, zTab, + "Cannot add a NOT NULL column with default value NULL"); + } + + + /* Ensure the default expression is something that sqlite3ValueFromExpr() + ** can handle (i.e. not CURRENT_TIME etc.) + */ + if( pDflt ){ + sqlite3_value *pVal = 0; + int rc; + rc = sqlite3ValueFromExpr(db, pDflt, SQLITE_UTF8, SQLITE_AFF_BLOB, &pVal); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + if( rc!=SQLITE_OK ){ + assert( db->mallocFailed == 1 ); + return; + } + if( !pVal ){ + sqlite3ErrorIfNotEmpty(pParse, zDb, zTab, + "Cannot add a column with non-constant default"); + } + sqlite3ValueFree(pVal); + } + }else if( pCol->colFlags & COLFLAG_STORED ){ + sqlite3ErrorIfNotEmpty(pParse, zDb, zTab, "cannot add a STORED column"); + } + + + /* Modify the CREATE TABLE statement. */ + zCol = sqlite3DbStrNDup(db, (char*)pColDef->z, pColDef->n); + if( zCol ){ + char *zEnd = &zCol[pColDef->n-1]; + while( zEnd>zCol && (*zEnd==';' || sqlite3Isspace(*zEnd)) ){ + *zEnd-- = '\0'; + } + /* substr() operations on characters, but addColOffset is in bytes. So we + ** have to use printf() to translate between these units: */ + assert( IsOrdinaryTable(pTab) ); + assert( IsOrdinaryTable(pNew) ); + sqlite3NestedParse(pParse, + "UPDATE \"%w\"." LEGACY_SCHEMA_TABLE " SET " + "sql = printf('%%.%ds, ',sql) || %Q" + " || substr(sql,1+length(printf('%%.%ds',sql))) " + "WHERE type = 'table' AND name = %Q", + zDb, pNew->u.tab.addColOffset, zCol, pNew->u.tab.addColOffset, + zTab + ); + sqlite3DbFree(db, zCol); + } + + v = sqlite3GetVdbe(pParse); + if( v ){ + /* Make sure the schema version is at least 3. But do not upgrade + ** from less than 3 to 4, as that will corrupt any preexisting DESC + ** index. + */ + r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_ReadCookie, iDb, r1, BTREE_FILE_FORMAT); + sqlite3VdbeUsesBtree(v, iDb); + sqlite3VdbeAddOp2(v, OP_AddImm, r1, -2); + sqlite3VdbeAddOp2(v, OP_IfPos, r1, sqlite3VdbeCurrentAddr(v)+2); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_FILE_FORMAT, 3); + sqlite3ReleaseTempReg(pParse, r1); + + /* Reload the table definition */ + renameReloadSchema(pParse, iDb, INITFLAG_AlterAdd); + + /* Verify that constraints are still satisfied */ + if( pNew->pCheck!=0 + || (pCol->notNull && (pCol->colFlags & COLFLAG_GENERATED)!=0) + ){ + sqlite3NestedParse(pParse, + "SELECT CASE WHEN quick_check GLOB 'CHECK*'" + " THEN raise(ABORT,'CHECK constraint failed')" + " ELSE raise(ABORT,'NOT NULL constraint failed')" + " END" + " FROM pragma_quick_check(%Q,%Q)" + " WHERE quick_check GLOB 'CHECK*' OR quick_check GLOB 'NULL*'", + zTab, zDb + ); + } + } +} + +/* +** This function is called by the parser after the table-name in +** an "ALTER TABLE ADD" statement is parsed. Argument +** pSrc is the full-name of the table being altered. +** +** This routine makes a (partial) copy of the Table structure +** for the table being altered and sets Parse.pNewTable to point +** to it. Routines called by the parser as the column definition +** is parsed (i.e. sqlite3AddColumn()) add the new Column data to +** the copy. The copy of the Table structure is deleted by tokenize.c +** after parsing is finished. +** +** Routine sqlite3AlterFinishAddColumn() will be called to complete +** coding the "ALTER TABLE ... ADD" statement. +*/ +SQLITE_PRIVATE void sqlite3AlterBeginAddColumn(Parse *pParse, SrcList *pSrc){ + Table *pNew; + Table *pTab; + int iDb; + int i; + int nAlloc; + sqlite3 *db = pParse->db; + + /* Look up the table being altered. */ + assert( pParse->pNewTable==0 ); + assert( sqlite3BtreeHoldsAllMutexes(db) ); + if( db->mallocFailed ) goto exit_begin_add_column; + pTab = sqlite3LocateTableItem(pParse, 0, &pSrc->a[0]); + if( !pTab ) goto exit_begin_add_column; + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTab) ){ + sqlite3ErrorMsg(pParse, "virtual tables may not be altered"); + goto exit_begin_add_column; + } +#endif + + /* Make sure this is not an attempt to ALTER a view. */ + if( IsView(pTab) ){ + sqlite3ErrorMsg(pParse, "Cannot add a column to a view"); + goto exit_begin_add_column; + } + if( SQLITE_OK!=isAlterableTable(pParse, pTab) ){ + goto exit_begin_add_column; + } + + sqlite3MayAbort(pParse); + assert( IsOrdinaryTable(pTab) ); + assert( pTab->u.tab.addColOffset>0 ); + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + + /* Put a copy of the Table struct in Parse.pNewTable for the + ** sqlite3AddColumn() function and friends to modify. But modify + ** the name by adding an "sqlite_altertab_" prefix. By adding this + ** prefix, we insure that the name will not collide with an existing + ** table because user table are not allowed to have the "sqlite_" + ** prefix on their name. + */ + pNew = (Table*)sqlite3DbMallocZero(db, sizeof(Table)); + if( !pNew ) goto exit_begin_add_column; + pParse->pNewTable = pNew; + pNew->nTabRef = 1; + pNew->nCol = pTab->nCol; + assert( pNew->nCol>0 ); + nAlloc = (((pNew->nCol-1)/8)*8)+8; + assert( nAlloc>=pNew->nCol && nAlloc%8==0 && nAlloc-pNew->nCol<8 ); + pNew->aCol = (Column*)sqlite3DbMallocZero(db, sizeof(Column)*nAlloc); + pNew->zName = sqlite3MPrintf(db, "sqlite_altertab_%s", pTab->zName); + if( !pNew->aCol || !pNew->zName ){ + assert( db->mallocFailed ); + goto exit_begin_add_column; + } + memcpy(pNew->aCol, pTab->aCol, sizeof(Column)*pNew->nCol); + for(i=0; inCol; i++){ + Column *pCol = &pNew->aCol[i]; + pCol->zCnName = sqlite3DbStrDup(db, pCol->zCnName); + pCol->hName = sqlite3StrIHash(pCol->zCnName); + } + assert( IsOrdinaryTable(pNew) ); + pNew->u.tab.pDfltList = sqlite3ExprListDup(db, pTab->u.tab.pDfltList, 0); + pNew->pSchema = db->aDb[iDb].pSchema; + pNew->u.tab.addColOffset = pTab->u.tab.addColOffset; + pNew->nTabRef = 1; + +exit_begin_add_column: + sqlite3SrcListDelete(db, pSrc); + return; +} + +/* +** Parameter pTab is the subject of an ALTER TABLE ... RENAME COLUMN +** command. This function checks if the table is a view or virtual +** table (columns of views or virtual tables may not be renamed). If so, +** it loads an error message into pParse and returns non-zero. +** +** Or, if pTab is not a view or virtual table, zero is returned. +*/ +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) +static int isRealTable(Parse *pParse, Table *pTab, int bDrop){ + const char *zType = 0; +#ifndef SQLITE_OMIT_VIEW + if( IsView(pTab) ){ + zType = "view"; + } +#endif +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTab) ){ + zType = "virtual table"; + } +#endif + if( zType ){ + sqlite3ErrorMsg(pParse, "cannot %s %s \"%s\"", + (bDrop ? "drop column from" : "rename columns of"), + zType, pTab->zName + ); + return 1; + } + return 0; +} +#else /* !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) */ +# define isRealTable(x,y,z) (0) +#endif + +/* +** Handles the following parser reduction: +** +** cmd ::= ALTER TABLE pSrc RENAME COLUMN pOld TO pNew +*/ +SQLITE_PRIVATE void sqlite3AlterRenameColumn( + Parse *pParse, /* Parsing context */ + SrcList *pSrc, /* Table being altered. pSrc->nSrc==1 */ + Token *pOld, /* Name of column being changed */ + Token *pNew /* New column name */ +){ + sqlite3 *db = pParse->db; /* Database connection */ + Table *pTab; /* Table being updated */ + int iCol; /* Index of column being renamed */ + char *zOld = 0; /* Old column name */ + char *zNew = 0; /* New column name */ + const char *zDb; /* Name of schema containing the table */ + int iSchema; /* Index of the schema */ + int bQuote; /* True to quote the new name */ + + /* Locate the table to be altered */ + pTab = sqlite3LocateTableItem(pParse, 0, &pSrc->a[0]); + if( !pTab ) goto exit_rename_column; + + /* Cannot alter a system table */ + if( SQLITE_OK!=isAlterableTable(pParse, pTab) ) goto exit_rename_column; + if( SQLITE_OK!=isRealTable(pParse, pTab, 0) ) goto exit_rename_column; + + /* Which schema holds the table to be altered */ + iSchema = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iSchema>=0 ); + zDb = db->aDb[iSchema].zDbSName; + +#ifndef SQLITE_OMIT_AUTHORIZATION + /* Invoke the authorization callback. */ + if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, 0) ){ + goto exit_rename_column; + } +#endif + + /* Make sure the old name really is a column name in the table to be + ** altered. Set iCol to be the index of the column being renamed */ + zOld = sqlite3NameFromToken(db, pOld); + if( !zOld ) goto exit_rename_column; + for(iCol=0; iColnCol; iCol++){ + if( 0==sqlite3StrICmp(pTab->aCol[iCol].zCnName, zOld) ) break; + } + if( iCol==pTab->nCol ){ + sqlite3ErrorMsg(pParse, "no such column: \"%T\"", pOld); + goto exit_rename_column; + } + + /* Ensure the schema contains no double-quoted strings */ + renameTestSchema(pParse, zDb, iSchema==1, "", 0); + renameFixQuotes(pParse, zDb, iSchema==1); + + /* Do the rename operation using a recursive UPDATE statement that + ** uses the sqlite_rename_column() SQL function to compute the new + ** CREATE statement text for the sqlite_schema table. + */ + sqlite3MayAbort(pParse); + zNew = sqlite3NameFromToken(db, pNew); + if( !zNew ) goto exit_rename_column; + assert( pNew->n>0 ); + bQuote = sqlite3Isquote(pNew->z[0]); + sqlite3NestedParse(pParse, + "UPDATE \"%w\"." LEGACY_SCHEMA_TABLE " SET " + "sql = sqlite_rename_column(sql, type, name, %Q, %Q, %d, %Q, %d, %d) " + "WHERE name NOT LIKE 'sqliteX_%%' ESCAPE 'X' " + " AND (type != 'index' OR tbl_name = %Q)", + zDb, + zDb, pTab->zName, iCol, zNew, bQuote, iSchema==1, + pTab->zName + ); + + sqlite3NestedParse(pParse, + "UPDATE temp." LEGACY_SCHEMA_TABLE " SET " + "sql = sqlite_rename_column(sql, type, name, %Q, %Q, %d, %Q, %d, 1) " + "WHERE type IN ('trigger', 'view')", + zDb, pTab->zName, iCol, zNew, bQuote + ); + + /* Drop and reload the database schema. */ + renameReloadSchema(pParse, iSchema, INITFLAG_AlterRename); + renameTestSchema(pParse, zDb, iSchema==1, "after rename", 1); + + exit_rename_column: + sqlite3SrcListDelete(db, pSrc); + sqlite3DbFree(db, zOld); + sqlite3DbFree(db, zNew); + return; +} + +/* +** Each RenameToken object maps an element of the parse tree into +** the token that generated that element. The parse tree element +** might be one of: +** +** * A pointer to an Expr that represents an ID +** * The name of a table column in Column.zName +** +** A list of RenameToken objects can be constructed during parsing. +** Each new object is created by sqlite3RenameTokenMap(). +** As the parse tree is transformed, the sqlite3RenameTokenRemap() +** routine is used to keep the mapping current. +** +** After the parse finishes, renameTokenFind() routine can be used +** to look up the actual token value that created some element in +** the parse tree. +*/ +struct RenameToken { + const void *p; /* Parse tree element created by token t */ + Token t; /* The token that created parse tree element p */ + RenameToken *pNext; /* Next is a list of all RenameToken objects */ +}; + +/* +** The context of an ALTER TABLE RENAME COLUMN operation that gets passed +** down into the Walker. +*/ +typedef struct RenameCtx RenameCtx; +struct RenameCtx { + RenameToken *pList; /* List of tokens to overwrite */ + int nList; /* Number of tokens in pList */ + int iCol; /* Index of column being renamed */ + Table *pTab; /* Table being ALTERed */ + const char *zOld; /* Old column name */ +}; + +#ifdef SQLITE_DEBUG +/* +** This function is only for debugging. It performs two tasks: +** +** 1. Checks that pointer pPtr does not already appear in the +** rename-token list. +** +** 2. Dereferences each pointer in the rename-token list. +** +** The second is most effective when debugging under valgrind or +** address-sanitizer or similar. If any of these pointers no longer +** point to valid objects, an exception is raised by the memory-checking +** tool. +** +** The point of this is to prevent comparisons of invalid pointer values. +** Even though this always seems to work, it is undefined according to the +** C standard. Example of undefined comparison: +** +** sqlite3_free(x); +** if( x==y ) ... +** +** Technically, as x no longer points into a valid object or to the byte +** following a valid object, it may not be used in comparison operations. +*/ +static void renameTokenCheckAll(Parse *pParse, const void *pPtr){ + assert( pParse==pParse->db->pParse ); + assert( pParse->db->mallocFailed==0 || pParse->nErr!=0 ); + if( pParse->nErr==0 ){ + const RenameToken *p; + u8 i = 0; + for(p=pParse->pRename; p; p=p->pNext){ + if( p->p ){ + assert( p->p!=pPtr ); + i += *(u8*)(p->p); + } + } + } +} +#else +# define renameTokenCheckAll(x,y) +#endif + +/* +** Remember that the parser tree element pPtr was created using +** the token pToken. +** +** In other words, construct a new RenameToken object and add it +** to the list of RenameToken objects currently being built up +** in pParse->pRename. +** +** The pPtr argument is returned so that this routine can be used +** with tail recursion in tokenExpr() routine, for a small performance +** improvement. +*/ +SQLITE_PRIVATE const void *sqlite3RenameTokenMap( + Parse *pParse, + const void *pPtr, + const Token *pToken +){ + RenameToken *pNew; + assert( pPtr || pParse->db->mallocFailed ); + renameTokenCheckAll(pParse, pPtr); + if( ALWAYS(pParse->eParseMode!=PARSE_MODE_UNMAP) ){ + pNew = sqlite3DbMallocZero(pParse->db, sizeof(RenameToken)); + if( pNew ){ + pNew->p = pPtr; + pNew->t = *pToken; + pNew->pNext = pParse->pRename; + pParse->pRename = pNew; + } + } + + return pPtr; +} + +/* +** It is assumed that there is already a RenameToken object associated +** with parse tree element pFrom. This function remaps the associated token +** to parse tree element pTo. +*/ +SQLITE_PRIVATE void sqlite3RenameTokenRemap(Parse *pParse, const void *pTo, const void *pFrom){ + RenameToken *p; + renameTokenCheckAll(pParse, pTo); + for(p=pParse->pRename; p; p=p->pNext){ + if( p->p==pFrom ){ + p->p = pTo; + break; + } + } +} + +/* +** Walker callback used by sqlite3RenameExprUnmap(). +*/ +static int renameUnmapExprCb(Walker *pWalker, Expr *pExpr){ + Parse *pParse = pWalker->pParse; + sqlite3RenameTokenRemap(pParse, 0, (const void*)pExpr); + if( ExprUseYTab(pExpr) ){ + sqlite3RenameTokenRemap(pParse, 0, (const void*)&pExpr->y.pTab); + } + return WRC_Continue; +} + +/* +** Iterate through the Select objects that are part of WITH clauses attached +** to select statement pSelect. +*/ +static void renameWalkWith(Walker *pWalker, Select *pSelect){ + With *pWith = pSelect->pWith; + if( pWith ){ + Parse *pParse = pWalker->pParse; + int i; + With *pCopy = 0; + assert( pWith->nCte>0 ); + if( (pWith->a[0].pSelect->selFlags & SF_Expanded)==0 ){ + /* Push a copy of the With object onto the with-stack. We use a copy + ** here as the original will be expanded and resolved (flags SF_Expanded + ** and SF_Resolved) below. And the parser code that uses the with-stack + ** fails if the Select objects on it have already been expanded and + ** resolved. */ + pCopy = sqlite3WithDup(pParse->db, pWith); + pCopy = sqlite3WithPush(pParse, pCopy, 1); + } + for(i=0; inCte; i++){ + Select *p = pWith->a[i].pSelect; + NameContext sNC; + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + if( pCopy ) sqlite3SelectPrep(sNC.pParse, p, &sNC); + if( sNC.pParse->db->mallocFailed ) return; + sqlite3WalkSelect(pWalker, p); + sqlite3RenameExprlistUnmap(pParse, pWith->a[i].pCols); + } + if( pCopy && pParse->pWith==pCopy ){ + pParse->pWith = pCopy->pOuter; + } + } +} + +/* +** Unmap all tokens in the IdList object passed as the second argument. +*/ +static void unmapColumnIdlistNames( + Parse *pParse, + const IdList *pIdList +){ + if( pIdList ){ + int ii; + for(ii=0; iinId; ii++){ + sqlite3RenameTokenRemap(pParse, 0, (const void*)pIdList->a[ii].zName); + } + } +} + +/* +** Walker callback used by sqlite3RenameExprUnmap(). +*/ +static int renameUnmapSelectCb(Walker *pWalker, Select *p){ + Parse *pParse = pWalker->pParse; + int i; + if( pParse->nErr ) return WRC_Abort; + testcase( p->selFlags & SF_View ); + testcase( p->selFlags & SF_CopyCte ); + if( p->selFlags & (SF_View|SF_CopyCte) ){ + return WRC_Prune; + } + if( ALWAYS(p->pEList) ){ + ExprList *pList = p->pEList; + for(i=0; inExpr; i++){ + if( pList->a[i].zEName && pList->a[i].eEName==ENAME_NAME ){ + sqlite3RenameTokenRemap(pParse, 0, (void*)pList->a[i].zEName); + } + } + } + if( ALWAYS(p->pSrc) ){ /* Every Select as a SrcList, even if it is empty */ + SrcList *pSrc = p->pSrc; + for(i=0; inSrc; i++){ + sqlite3RenameTokenRemap(pParse, 0, (void*)pSrc->a[i].zName); + sqlite3WalkExpr(pWalker, pSrc->a[i].pOn); + unmapColumnIdlistNames(pParse, pSrc->a[i].pUsing); + } + } + + renameWalkWith(pWalker, p); + return WRC_Continue; +} + +/* +** Remove all nodes that are part of expression pExpr from the rename list. +*/ +SQLITE_PRIVATE void sqlite3RenameExprUnmap(Parse *pParse, Expr *pExpr){ + u8 eMode = pParse->eParseMode; + Walker sWalker; + memset(&sWalker, 0, sizeof(Walker)); + sWalker.pParse = pParse; + sWalker.xExprCallback = renameUnmapExprCb; + sWalker.xSelectCallback = renameUnmapSelectCb; + pParse->eParseMode = PARSE_MODE_UNMAP; + sqlite3WalkExpr(&sWalker, pExpr); + pParse->eParseMode = eMode; +} + +/* +** Remove all nodes that are part of expression-list pEList from the +** rename list. +*/ +SQLITE_PRIVATE void sqlite3RenameExprlistUnmap(Parse *pParse, ExprList *pEList){ + if( pEList ){ + int i; + Walker sWalker; + memset(&sWalker, 0, sizeof(Walker)); + sWalker.pParse = pParse; + sWalker.xExprCallback = renameUnmapExprCb; + sqlite3WalkExprList(&sWalker, pEList); + for(i=0; inExpr; i++){ + if( ALWAYS(pEList->a[i].eEName==ENAME_NAME) ){ + sqlite3RenameTokenRemap(pParse, 0, (void*)pEList->a[i].zEName); + } + } + } +} + +/* +** Free the list of RenameToken objects given in the second argument +*/ +static void renameTokenFree(sqlite3 *db, RenameToken *pToken){ + RenameToken *pNext; + RenameToken *p; + for(p=pToken; p; p=pNext){ + pNext = p->pNext; + sqlite3DbFree(db, p); + } +} + +/* +** Search the Parse object passed as the first argument for a RenameToken +** object associated with parse tree element pPtr. If found, return a pointer +** to it. Otherwise, return NULL. +** +** If the second argument passed to this function is not NULL and a matching +** RenameToken object is found, remove it from the Parse object and add it to +** the list maintained by the RenameCtx object. +*/ +static RenameToken *renameTokenFind( + Parse *pParse, + struct RenameCtx *pCtx, + const void *pPtr +){ + RenameToken **pp; + if( NEVER(pPtr==0) ){ + return 0; + } + for(pp=&pParse->pRename; (*pp); pp=&(*pp)->pNext){ + if( (*pp)->p==pPtr ){ + RenameToken *pToken = *pp; + if( pCtx ){ + *pp = pToken->pNext; + pToken->pNext = pCtx->pList; + pCtx->pList = pToken; + pCtx->nList++; + } + return pToken; + } + } + return 0; +} + +/* +** This is a Walker select callback. It does nothing. It is only required +** because without a dummy callback, sqlite3WalkExpr() and similar do not +** descend into sub-select statements. +*/ +static int renameColumnSelectCb(Walker *pWalker, Select *p){ + if( p->selFlags & (SF_View|SF_CopyCte) ){ + testcase( p->selFlags & SF_View ); + testcase( p->selFlags & SF_CopyCte ); + return WRC_Prune; + } + renameWalkWith(pWalker, p); + return WRC_Continue; +} + +/* +** This is a Walker expression callback. +** +** For every TK_COLUMN node in the expression tree, search to see +** if the column being references is the column being renamed by an +** ALTER TABLE statement. If it is, then attach its associated +** RenameToken object to the list of RenameToken objects being +** constructed in RenameCtx object at pWalker->u.pRename. +*/ +static int renameColumnExprCb(Walker *pWalker, Expr *pExpr){ + RenameCtx *p = pWalker->u.pRename; + if( pExpr->op==TK_TRIGGER + && pExpr->iColumn==p->iCol + && pWalker->pParse->pTriggerTab==p->pTab + ){ + renameTokenFind(pWalker->pParse, p, (void*)pExpr); + }else if( pExpr->op==TK_COLUMN + && pExpr->iColumn==p->iCol + && ALWAYS(ExprUseYTab(pExpr)) + && p->pTab==pExpr->y.pTab + ){ + renameTokenFind(pWalker->pParse, p, (void*)pExpr); + } + return WRC_Continue; +} + +/* +** The RenameCtx contains a list of tokens that reference a column that +** is being renamed by an ALTER TABLE statement. Return the "last" +** RenameToken in the RenameCtx and remove that RenameToken from the +** RenameContext. "Last" means the last RenameToken encountered when +** the input SQL is parsed from left to right. Repeated calls to this routine +** return all column name tokens in the order that they are encountered +** in the SQL statement. +*/ +static RenameToken *renameColumnTokenNext(RenameCtx *pCtx){ + RenameToken *pBest = pCtx->pList; + RenameToken *pToken; + RenameToken **pp; + + for(pToken=pBest->pNext; pToken; pToken=pToken->pNext){ + if( pToken->t.z>pBest->t.z ) pBest = pToken; + } + for(pp=&pCtx->pList; *pp!=pBest; pp=&(*pp)->pNext); + *pp = pBest->pNext; + + return pBest; +} + +/* +** An error occured while parsing or otherwise processing a database +** object (either pParse->pNewTable, pNewIndex or pNewTrigger) as part of an +** ALTER TABLE RENAME COLUMN program. The error message emitted by the +** sub-routine is currently stored in pParse->zErrMsg. This function +** adds context to the error message and then stores it in pCtx. +*/ +static void renameColumnParseError( + sqlite3_context *pCtx, + const char *zWhen, + sqlite3_value *pType, + sqlite3_value *pObject, + Parse *pParse +){ + const char *zT = (const char*)sqlite3_value_text(pType); + const char *zN = (const char*)sqlite3_value_text(pObject); + char *zErr; + + zErr = sqlite3MPrintf(pParse->db, "error in %s %s%s%s: %s", + zT, zN, (zWhen[0] ? " " : ""), zWhen, + pParse->zErrMsg + ); + sqlite3_result_error(pCtx, zErr, -1); + sqlite3DbFree(pParse->db, zErr); +} + +/* +** For each name in the the expression-list pEList (i.e. each +** pEList->a[i].zName) that matches the string in zOld, extract the +** corresponding rename-token from Parse object pParse and add it +** to the RenameCtx pCtx. +*/ +static void renameColumnElistNames( + Parse *pParse, + RenameCtx *pCtx, + const ExprList *pEList, + const char *zOld +){ + if( pEList ){ + int i; + for(i=0; inExpr; i++){ + const char *zName = pEList->a[i].zEName; + if( ALWAYS(pEList->a[i].eEName==ENAME_NAME) + && ALWAYS(zName!=0) + && 0==sqlite3_stricmp(zName, zOld) + ){ + renameTokenFind(pParse, pCtx, (const void*)zName); + } + } + } +} + +/* +** For each name in the the id-list pIdList (i.e. each pIdList->a[i].zName) +** that matches the string in zOld, extract the corresponding rename-token +** from Parse object pParse and add it to the RenameCtx pCtx. +*/ +static void renameColumnIdlistNames( + Parse *pParse, + RenameCtx *pCtx, + const IdList *pIdList, + const char *zOld +){ + if( pIdList ){ + int i; + for(i=0; inId; i++){ + const char *zName = pIdList->a[i].zName; + if( 0==sqlite3_stricmp(zName, zOld) ){ + renameTokenFind(pParse, pCtx, (const void*)zName); + } + } + } +} + + +/* +** Parse the SQL statement zSql using Parse object (*p). The Parse object +** is initialized by this function before it is used. +*/ +static int renameParseSql( + Parse *p, /* Memory to use for Parse object */ + const char *zDb, /* Name of schema SQL belongs to */ + sqlite3 *db, /* Database handle */ + const char *zSql, /* SQL to parse */ + int bTemp /* True if SQL is from temp schema */ +){ + int rc; + + sqlite3ParseObjectInit(p, db); + if( zSql==0 ){ + return SQLITE_NOMEM; + } + if( sqlite3StrNICmp(zSql,"CREATE ",7)!=0 ){ + return SQLITE_CORRUPT_BKPT; + } + db->init.iDb = bTemp ? 1 : sqlite3FindDbName(db, zDb); + p->eParseMode = PARSE_MODE_RENAME; + p->db = db; + p->nQueryLoop = 1; + rc = sqlite3RunParser(p, zSql); + if( db->mallocFailed ) rc = SQLITE_NOMEM; + if( rc==SQLITE_OK + && NEVER(p->pNewTable==0 && p->pNewIndex==0 && p->pNewTrigger==0) + ){ + rc = SQLITE_CORRUPT_BKPT; + } + +#ifdef SQLITE_DEBUG + /* Ensure that all mappings in the Parse.pRename list really do map to + ** a part of the input string. */ + if( rc==SQLITE_OK ){ + int nSql = sqlite3Strlen30(zSql); + RenameToken *pToken; + for(pToken=p->pRename; pToken; pToken=pToken->pNext){ + assert( pToken->t.z>=zSql && &pToken->t.z[pToken->t.n]<=&zSql[nSql] ); + } + } +#endif + + db->init.iDb = 0; + return rc; +} + +/* +** This function edits SQL statement zSql, replacing each token identified +** by the linked list pRename with the text of zNew. If argument bQuote is +** true, then zNew is always quoted first. If no error occurs, the result +** is loaded into context object pCtx as the result. +** +** Or, if an error occurs (i.e. an OOM condition), an error is left in +** pCtx and an SQLite error code returned. +*/ +static int renameEditSql( + sqlite3_context *pCtx, /* Return result here */ + RenameCtx *pRename, /* Rename context */ + const char *zSql, /* SQL statement to edit */ + const char *zNew, /* New token text */ + int bQuote /* True to always quote token */ +){ + i64 nNew = sqlite3Strlen30(zNew); + i64 nSql = sqlite3Strlen30(zSql); + sqlite3 *db = sqlite3_context_db_handle(pCtx); + int rc = SQLITE_OK; + char *zQuot = 0; + char *zOut; + i64 nQuot = 0; + char *zBuf1 = 0; + char *zBuf2 = 0; + + if( zNew ){ + /* Set zQuot to point to a buffer containing a quoted copy of the + ** identifier zNew. If the corresponding identifier in the original + ** ALTER TABLE statement was quoted (bQuote==1), then set zNew to + ** point to zQuot so that all substitutions are made using the + ** quoted version of the new column name. */ + zQuot = sqlite3MPrintf(db, "\"%w\" ", zNew); + if( zQuot==0 ){ + return SQLITE_NOMEM; + }else{ + nQuot = sqlite3Strlen30(zQuot)-1; + } + + assert( nQuot>=nNew ); + zOut = sqlite3DbMallocZero(db, nSql + pRename->nList*nQuot + 1); + }else{ + zOut = (char*)sqlite3DbMallocZero(db, (nSql*2+1) * 3); + if( zOut ){ + zBuf1 = &zOut[nSql*2+1]; + zBuf2 = &zOut[nSql*4+2]; + } + } + + /* At this point pRename->pList contains a list of RenameToken objects + ** corresponding to all tokens in the input SQL that must be replaced + ** with the new column name, or with single-quoted versions of themselves. + ** All that remains is to construct and return the edited SQL string. */ + if( zOut ){ + int nOut = nSql; + memcpy(zOut, zSql, nSql); + while( pRename->pList ){ + int iOff; /* Offset of token to replace in zOut */ + u32 nReplace; + const char *zReplace; + RenameToken *pBest = renameColumnTokenNext(pRename); + + if( zNew ){ + if( bQuote==0 && sqlite3IsIdChar(*pBest->t.z) ){ + nReplace = nNew; + zReplace = zNew; + }else{ + nReplace = nQuot; + zReplace = zQuot; + if( pBest->t.z[pBest->t.n]=='"' ) nReplace++; + } + }else{ + /* Dequote the double-quoted token. Then requote it again, this time + ** using single quotes. If the character immediately following the + ** original token within the input SQL was a single quote ('), then + ** add another space after the new, single-quoted version of the + ** token. This is so that (SELECT "string"'alias') maps to + ** (SELECT 'string' 'alias'), and not (SELECT 'string''alias'). */ + memcpy(zBuf1, pBest->t.z, pBest->t.n); + zBuf1[pBest->t.n] = 0; + sqlite3Dequote(zBuf1); + sqlite3_snprintf(nSql*2, zBuf2, "%Q%s", zBuf1, + pBest->t.z[pBest->t.n]=='\'' ? " " : "" + ); + zReplace = zBuf2; + nReplace = sqlite3Strlen30(zReplace); + } + + iOff = pBest->t.z - zSql; + if( pBest->t.n!=nReplace ){ + memmove(&zOut[iOff + nReplace], &zOut[iOff + pBest->t.n], + nOut - (iOff + pBest->t.n) + ); + nOut += nReplace - pBest->t.n; + zOut[nOut] = '\0'; + } + memcpy(&zOut[iOff], zReplace, nReplace); + sqlite3DbFree(db, pBest); + } + + sqlite3_result_text(pCtx, zOut, -1, SQLITE_TRANSIENT); + sqlite3DbFree(db, zOut); + }else{ + rc = SQLITE_NOMEM; + } + + sqlite3_free(zQuot); + return rc; +} + +/* +** Resolve all symbols in the trigger at pParse->pNewTrigger, assuming +** it was read from the schema of database zDb. Return SQLITE_OK if +** successful. Otherwise, return an SQLite error code and leave an error +** message in the Parse object. +*/ +static int renameResolveTrigger(Parse *pParse){ + sqlite3 *db = pParse->db; + Trigger *pNew = pParse->pNewTrigger; + TriggerStep *pStep; + NameContext sNC; + int rc = SQLITE_OK; + + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + assert( pNew->pTabSchema ); + pParse->pTriggerTab = sqlite3FindTable(db, pNew->table, + db->aDb[sqlite3SchemaToIndex(db, pNew->pTabSchema)].zDbSName + ); + pParse->eTriggerOp = pNew->op; + /* ALWAYS() because if the table of the trigger does not exist, the + ** error would have been hit before this point */ + if( ALWAYS(pParse->pTriggerTab) ){ + rc = sqlite3ViewGetColumnNames(pParse, pParse->pTriggerTab); + } + + /* Resolve symbols in WHEN clause */ + if( rc==SQLITE_OK && pNew->pWhen ){ + rc = sqlite3ResolveExprNames(&sNC, pNew->pWhen); + } + + for(pStep=pNew->step_list; rc==SQLITE_OK && pStep; pStep=pStep->pNext){ + if( pStep->pSelect ){ + sqlite3SelectPrep(pParse, pStep->pSelect, &sNC); + if( pParse->nErr ) rc = pParse->rc; + } + if( rc==SQLITE_OK && pStep->zTarget ){ + SrcList *pSrc = sqlite3TriggerStepSrc(pParse, pStep); + if( pSrc ){ + int i; + for(i=0; inSrc && rc==SQLITE_OK; i++){ + SrcItem *p = &pSrc->a[i]; + p->iCursor = pParse->nTab++; + if( p->pSelect ){ + sqlite3SelectPrep(pParse, p->pSelect, 0); + sqlite3ExpandSubquery(pParse, p); + assert( i>0 ); + assert( pStep->pFrom->a[i-1].pSelect ); + sqlite3SelectPrep(pParse, pStep->pFrom->a[i-1].pSelect, 0); + }else{ + p->pTab = sqlite3LocateTableItem(pParse, 0, p); + if( p->pTab==0 ){ + rc = SQLITE_ERROR; + }else{ + p->pTab->nTabRef++; + rc = sqlite3ViewGetColumnNames(pParse, p->pTab); + } + } + } + if( rc==SQLITE_OK && db->mallocFailed ){ + rc = SQLITE_NOMEM; + } + sNC.pSrcList = pSrc; + if( rc==SQLITE_OK && pStep->pWhere ){ + rc = sqlite3ResolveExprNames(&sNC, pStep->pWhere); + } + if( rc==SQLITE_OK ){ + rc = sqlite3ResolveExprListNames(&sNC, pStep->pExprList); + } + assert( !pStep->pUpsert || (!pStep->pWhere && !pStep->pExprList) ); + if( pStep->pUpsert && rc==SQLITE_OK ){ + Upsert *pUpsert = pStep->pUpsert; + pUpsert->pUpsertSrc = pSrc; + sNC.uNC.pUpsert = pUpsert; + sNC.ncFlags = NC_UUpsert; + rc = sqlite3ResolveExprListNames(&sNC, pUpsert->pUpsertTarget); + if( rc==SQLITE_OK ){ + ExprList *pUpsertSet = pUpsert->pUpsertSet; + rc = sqlite3ResolveExprListNames(&sNC, pUpsertSet); + } + if( rc==SQLITE_OK ){ + rc = sqlite3ResolveExprNames(&sNC, pUpsert->pUpsertWhere); + } + if( rc==SQLITE_OK ){ + rc = sqlite3ResolveExprNames(&sNC, pUpsert->pUpsertTargetWhere); + } + sNC.ncFlags = 0; + } + sNC.pSrcList = 0; + sqlite3SrcListDelete(db, pSrc); + }else{ + rc = SQLITE_NOMEM; + } + } + } + return rc; +} + +/* +** Invoke sqlite3WalkExpr() or sqlite3WalkSelect() on all Select or Expr +** objects that are part of the trigger passed as the second argument. +*/ +static void renameWalkTrigger(Walker *pWalker, Trigger *pTrigger){ + TriggerStep *pStep; + + /* Find tokens to edit in WHEN clause */ + sqlite3WalkExpr(pWalker, pTrigger->pWhen); + + /* Find tokens to edit in trigger steps */ + for(pStep=pTrigger->step_list; pStep; pStep=pStep->pNext){ + sqlite3WalkSelect(pWalker, pStep->pSelect); + sqlite3WalkExpr(pWalker, pStep->pWhere); + sqlite3WalkExprList(pWalker, pStep->pExprList); + if( pStep->pUpsert ){ + Upsert *pUpsert = pStep->pUpsert; + sqlite3WalkExprList(pWalker, pUpsert->pUpsertTarget); + sqlite3WalkExprList(pWalker, pUpsert->pUpsertSet); + sqlite3WalkExpr(pWalker, pUpsert->pUpsertWhere); + sqlite3WalkExpr(pWalker, pUpsert->pUpsertTargetWhere); + } + if( pStep->pFrom ){ + int i; + for(i=0; ipFrom->nSrc; i++){ + sqlite3WalkSelect(pWalker, pStep->pFrom->a[i].pSelect); + } + } + } +} + +/* +** Free the contents of Parse object (*pParse). Do not free the memory +** occupied by the Parse object itself. +*/ +static void renameParseCleanup(Parse *pParse){ + sqlite3 *db = pParse->db; + Index *pIdx; + if( pParse->pVdbe ){ + sqlite3VdbeFinalize(pParse->pVdbe); + } + sqlite3DeleteTable(db, pParse->pNewTable); + while( (pIdx = pParse->pNewIndex)!=0 ){ + pParse->pNewIndex = pIdx->pNext; + sqlite3FreeIndex(db, pIdx); + } + sqlite3DeleteTrigger(db, pParse->pNewTrigger); + sqlite3DbFree(db, pParse->zErrMsg); + renameTokenFree(db, pParse->pRename); + sqlite3ParseObjectReset(pParse); +} + +/* +** SQL function: +** +** sqlite_rename_column(SQL,TYPE,OBJ,DB,TABLE,COL,NEWNAME,QUOTE,TEMP) +** +** 0. zSql: SQL statement to rewrite +** 1. type: Type of object ("table", "view" etc.) +** 2. object: Name of object +** 3. Database: Database name (e.g. "main") +** 4. Table: Table name +** 5. iCol: Index of column to rename +** 6. zNew: New column name +** 7. bQuote: Non-zero if the new column name should be quoted. +** 8. bTemp: True if zSql comes from temp schema +** +** Do a column rename operation on the CREATE statement given in zSql. +** The iCol-th column (left-most is 0) of table zTable is renamed from zCol +** into zNew. The name should be quoted if bQuote is true. +** +** This function is used internally by the ALTER TABLE RENAME COLUMN command. +** It is only accessible to SQL created using sqlite3NestedParse(). It is +** not reachable from ordinary SQL passed into sqlite3_prepare() unless the +** SQLITE_TESTCTRL_INTERNAL_FUNCTIONS test setting is enabled. +*/ +static void renameColumnFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + sqlite3 *db = sqlite3_context_db_handle(context); + RenameCtx sCtx; + const char *zSql = (const char*)sqlite3_value_text(argv[0]); + const char *zDb = (const char*)sqlite3_value_text(argv[3]); + const char *zTable = (const char*)sqlite3_value_text(argv[4]); + int iCol = sqlite3_value_int(argv[5]); + const char *zNew = (const char*)sqlite3_value_text(argv[6]); + int bQuote = sqlite3_value_int(argv[7]); + int bTemp = sqlite3_value_int(argv[8]); + const char *zOld; + int rc; + Parse sParse; + Walker sWalker; + Index *pIdx; + int i; + Table *pTab; +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth = db->xAuth; +#endif + + UNUSED_PARAMETER(NotUsed); + if( zSql==0 ) return; + if( zTable==0 ) return; + if( zNew==0 ) return; + if( iCol<0 ) return; + sqlite3BtreeEnterAll(db); + pTab = sqlite3FindTable(db, zTable, zDb); + if( pTab==0 || iCol>=pTab->nCol ){ + sqlite3BtreeLeaveAll(db); + return; + } + zOld = pTab->aCol[iCol].zCnName; + memset(&sCtx, 0, sizeof(sCtx)); + sCtx.iCol = ((iCol==pTab->iPKey) ? -1 : iCol); + +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = 0; +#endif + rc = renameParseSql(&sParse, zDb, db, zSql, bTemp); + + /* Find tokens that need to be replaced. */ + memset(&sWalker, 0, sizeof(Walker)); + sWalker.pParse = &sParse; + sWalker.xExprCallback = renameColumnExprCb; + sWalker.xSelectCallback = renameColumnSelectCb; + sWalker.u.pRename = &sCtx; + + sCtx.pTab = pTab; + if( rc!=SQLITE_OK ) goto renameColumnFunc_done; + if( sParse.pNewTable ){ + if( IsView(sParse.pNewTable) ){ + Select *pSelect = sParse.pNewTable->u.view.pSelect; + pSelect->selFlags &= ~SF_View; + sParse.rc = SQLITE_OK; + sqlite3SelectPrep(&sParse, pSelect, 0); + rc = (db->mallocFailed ? SQLITE_NOMEM : sParse.rc); + if( rc==SQLITE_OK ){ + sqlite3WalkSelect(&sWalker, pSelect); + } + if( rc!=SQLITE_OK ) goto renameColumnFunc_done; + }else if( IsOrdinaryTable(sParse.pNewTable) ){ + /* A regular table */ + int bFKOnly = sqlite3_stricmp(zTable, sParse.pNewTable->zName); + FKey *pFKey; + sCtx.pTab = sParse.pNewTable; + if( bFKOnly==0 ){ + if( iColnCol ){ + renameTokenFind( + &sParse, &sCtx, (void*)sParse.pNewTable->aCol[iCol].zCnName + ); + } + if( sCtx.iCol<0 ){ + renameTokenFind(&sParse, &sCtx, (void*)&sParse.pNewTable->iPKey); + } + sqlite3WalkExprList(&sWalker, sParse.pNewTable->pCheck); + for(pIdx=sParse.pNewTable->pIndex; pIdx; pIdx=pIdx->pNext){ + sqlite3WalkExprList(&sWalker, pIdx->aColExpr); + } + for(pIdx=sParse.pNewIndex; pIdx; pIdx=pIdx->pNext){ + sqlite3WalkExprList(&sWalker, pIdx->aColExpr); + } +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + for(i=0; inCol; i++){ + Expr *pExpr = sqlite3ColumnExpr(sParse.pNewTable, + &sParse.pNewTable->aCol[i]); + sqlite3WalkExpr(&sWalker, pExpr); + } +#endif + } + + assert( IsOrdinaryTable(sParse.pNewTable) ); + for(pFKey=sParse.pNewTable->u.tab.pFKey; pFKey; pFKey=pFKey->pNextFrom){ + for(i=0; inCol; i++){ + if( bFKOnly==0 && pFKey->aCol[i].iFrom==iCol ){ + renameTokenFind(&sParse, &sCtx, (void*)&pFKey->aCol[i]); + } + if( 0==sqlite3_stricmp(pFKey->zTo, zTable) + && 0==sqlite3_stricmp(pFKey->aCol[i].zCol, zOld) + ){ + renameTokenFind(&sParse, &sCtx, (void*)pFKey->aCol[i].zCol); + } + } + } + } + }else if( sParse.pNewIndex ){ + sqlite3WalkExprList(&sWalker, sParse.pNewIndex->aColExpr); + sqlite3WalkExpr(&sWalker, sParse.pNewIndex->pPartIdxWhere); + }else{ + /* A trigger */ + TriggerStep *pStep; + rc = renameResolveTrigger(&sParse); + if( rc!=SQLITE_OK ) goto renameColumnFunc_done; + + for(pStep=sParse.pNewTrigger->step_list; pStep; pStep=pStep->pNext){ + if( pStep->zTarget ){ + Table *pTarget = sqlite3LocateTable(&sParse, 0, pStep->zTarget, zDb); + if( pTarget==pTab ){ + if( pStep->pUpsert ){ + ExprList *pUpsertSet = pStep->pUpsert->pUpsertSet; + renameColumnElistNames(&sParse, &sCtx, pUpsertSet, zOld); + } + renameColumnIdlistNames(&sParse, &sCtx, pStep->pIdList, zOld); + renameColumnElistNames(&sParse, &sCtx, pStep->pExprList, zOld); + } + } + } + + + /* Find tokens to edit in UPDATE OF clause */ + if( sParse.pTriggerTab==pTab ){ + renameColumnIdlistNames(&sParse, &sCtx,sParse.pNewTrigger->pColumns,zOld); + } + + /* Find tokens to edit in various expressions and selects */ + renameWalkTrigger(&sWalker, sParse.pNewTrigger); + } + + assert( rc==SQLITE_OK ); + rc = renameEditSql(context, &sCtx, zSql, zNew, bQuote); + +renameColumnFunc_done: + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_ERROR && sqlite3WritableSchema(db) ){ + sqlite3_result_value(context, argv[0]); + }else if( sParse.zErrMsg ){ + renameColumnParseError(context, "", argv[1], argv[2], &sParse); + }else{ + sqlite3_result_error_code(context, rc); + } + } + + renameParseCleanup(&sParse); + renameTokenFree(db, sCtx.pList); +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = xAuth; +#endif + sqlite3BtreeLeaveAll(db); +} + +/* +** Walker expression callback used by "RENAME TABLE". +*/ +static int renameTableExprCb(Walker *pWalker, Expr *pExpr){ + RenameCtx *p = pWalker->u.pRename; + if( pExpr->op==TK_COLUMN + && ALWAYS(ExprUseYTab(pExpr)) + && p->pTab==pExpr->y.pTab + ){ + renameTokenFind(pWalker->pParse, p, (void*)&pExpr->y.pTab); + } + return WRC_Continue; +} + +/* +** Walker select callback used by "RENAME TABLE". +*/ +static int renameTableSelectCb(Walker *pWalker, Select *pSelect){ + int i; + RenameCtx *p = pWalker->u.pRename; + SrcList *pSrc = pSelect->pSrc; + if( pSelect->selFlags & (SF_View|SF_CopyCte) ){ + testcase( pSelect->selFlags & SF_View ); + testcase( pSelect->selFlags & SF_CopyCte ); + return WRC_Prune; + } + if( NEVER(pSrc==0) ){ + assert( pWalker->pParse->db->mallocFailed ); + return WRC_Abort; + } + for(i=0; inSrc; i++){ + SrcItem *pItem = &pSrc->a[i]; + if( pItem->pTab==p->pTab ){ + renameTokenFind(pWalker->pParse, p, pItem->zName); + } + } + renameWalkWith(pWalker, pSelect); + + return WRC_Continue; +} + + +/* +** This C function implements an SQL user function that is used by SQL code +** generated by the ALTER TABLE ... RENAME command to modify the definition +** of any foreign key constraints that use the table being renamed as the +** parent table. It is passed three arguments: +** +** 0: The database containing the table being renamed. +** 1. type: Type of object ("table", "view" etc.) +** 2. object: Name of object +** 3: The complete text of the schema statement being modified, +** 4: The old name of the table being renamed, and +** 5: The new name of the table being renamed. +** 6: True if the schema statement comes from the temp db. +** +** It returns the new schema statement. For example: +** +** sqlite_rename_table('main', 'CREATE TABLE t1(a REFERENCES t2)','t2','t3',0) +** -> 'CREATE TABLE t1(a REFERENCES t3)' +*/ +static void renameTableFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + sqlite3 *db = sqlite3_context_db_handle(context); + const char *zDb = (const char*)sqlite3_value_text(argv[0]); + const char *zInput = (const char*)sqlite3_value_text(argv[3]); + const char *zOld = (const char*)sqlite3_value_text(argv[4]); + const char *zNew = (const char*)sqlite3_value_text(argv[5]); + int bTemp = sqlite3_value_int(argv[6]); + UNUSED_PARAMETER(NotUsed); + + if( zInput && zOld && zNew ){ + Parse sParse; + int rc; + int bQuote = 1; + RenameCtx sCtx; + Walker sWalker; + +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth = db->xAuth; + db->xAuth = 0; +#endif + + sqlite3BtreeEnterAll(db); + + memset(&sCtx, 0, sizeof(RenameCtx)); + sCtx.pTab = sqlite3FindTable(db, zOld, zDb); + memset(&sWalker, 0, sizeof(Walker)); + sWalker.pParse = &sParse; + sWalker.xExprCallback = renameTableExprCb; + sWalker.xSelectCallback = renameTableSelectCb; + sWalker.u.pRename = &sCtx; + + rc = renameParseSql(&sParse, zDb, db, zInput, bTemp); + + if( rc==SQLITE_OK ){ + int isLegacy = (db->flags & SQLITE_LegacyAlter); + if( sParse.pNewTable ){ + Table *pTab = sParse.pNewTable; + + if( IsView(pTab) ){ + if( isLegacy==0 ){ + Select *pSelect = pTab->u.view.pSelect; + NameContext sNC; + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = &sParse; + + assert( pSelect->selFlags & SF_View ); + pSelect->selFlags &= ~SF_View; + sqlite3SelectPrep(&sParse, pTab->u.view.pSelect, &sNC); + if( sParse.nErr ){ + rc = sParse.rc; + }else{ + sqlite3WalkSelect(&sWalker, pTab->u.view.pSelect); + } + } + }else{ + /* Modify any FK definitions to point to the new table. */ +#ifndef SQLITE_OMIT_FOREIGN_KEY + if( (isLegacy==0 || (db->flags & SQLITE_ForeignKeys)) + && !IsVirtual(pTab) + ){ + FKey *pFKey; + assert( IsOrdinaryTable(pTab) ); + for(pFKey=pTab->u.tab.pFKey; pFKey; pFKey=pFKey->pNextFrom){ + if( sqlite3_stricmp(pFKey->zTo, zOld)==0 ){ + renameTokenFind(&sParse, &sCtx, (void*)pFKey->zTo); + } + } + } +#endif + + /* If this is the table being altered, fix any table refs in CHECK + ** expressions. Also update the name that appears right after the + ** "CREATE [VIRTUAL] TABLE" bit. */ + if( sqlite3_stricmp(zOld, pTab->zName)==0 ){ + sCtx.pTab = pTab; + if( isLegacy==0 ){ + sqlite3WalkExprList(&sWalker, pTab->pCheck); + } + renameTokenFind(&sParse, &sCtx, pTab->zName); + } + } + } + + else if( sParse.pNewIndex ){ + renameTokenFind(&sParse, &sCtx, sParse.pNewIndex->zName); + if( isLegacy==0 ){ + sqlite3WalkExpr(&sWalker, sParse.pNewIndex->pPartIdxWhere); + } + } + +#ifndef SQLITE_OMIT_TRIGGER + else{ + Trigger *pTrigger = sParse.pNewTrigger; + TriggerStep *pStep; + if( 0==sqlite3_stricmp(sParse.pNewTrigger->table, zOld) + && sCtx.pTab->pSchema==pTrigger->pTabSchema + ){ + renameTokenFind(&sParse, &sCtx, sParse.pNewTrigger->table); + } + + if( isLegacy==0 ){ + rc = renameResolveTrigger(&sParse); + if( rc==SQLITE_OK ){ + renameWalkTrigger(&sWalker, pTrigger); + for(pStep=pTrigger->step_list; pStep; pStep=pStep->pNext){ + if( pStep->zTarget && 0==sqlite3_stricmp(pStep->zTarget, zOld) ){ + renameTokenFind(&sParse, &sCtx, pStep->zTarget); + } + } + } + } + } +#endif + } + + if( rc==SQLITE_OK ){ + rc = renameEditSql(context, &sCtx, zInput, zNew, bQuote); + } + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_ERROR && sqlite3WritableSchema(db) ){ + sqlite3_result_value(context, argv[3]); + }else if( sParse.zErrMsg ){ + renameColumnParseError(context, "", argv[1], argv[2], &sParse); + }else{ + sqlite3_result_error_code(context, rc); + } + } + + renameParseCleanup(&sParse); + renameTokenFree(db, sCtx.pList); + sqlite3BtreeLeaveAll(db); +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = xAuth; +#endif + } + + return; +} + +static int renameQuotefixExprCb(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_STRING && (pExpr->flags & EP_DblQuoted) ){ + renameTokenFind(pWalker->pParse, pWalker->u.pRename, (const void*)pExpr); + } + return WRC_Continue; +} + +/* SQL function: sqlite_rename_quotefix(DB,SQL) +** +** Rewrite the DDL statement "SQL" so that any string literals that use +** double-quotes use single quotes instead. +** +** Two arguments must be passed: +** +** 0: Database name ("main", "temp" etc.). +** 1: SQL statement to edit. +** +** The returned value is the modified SQL statement. For example, given +** the database schema: +** +** CREATE TABLE t1(a, b, c); +** +** SELECT sqlite_rename_quotefix('main', +** 'CREATE VIEW v1 AS SELECT "a", "string" FROM t1' +** ); +** +** returns the string: +** +** CREATE VIEW v1 AS SELECT "a", 'string' FROM t1 +** +** If there is a error in the input SQL, then raise an error, except +** if PRAGMA writable_schema=ON, then just return the input string +** unmodified following an error. +*/ +static void renameQuotefixFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + sqlite3 *db = sqlite3_context_db_handle(context); + char const *zDb = (const char*)sqlite3_value_text(argv[0]); + char const *zInput = (const char*)sqlite3_value_text(argv[1]); + +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth = db->xAuth; + db->xAuth = 0; +#endif + + sqlite3BtreeEnterAll(db); + + UNUSED_PARAMETER(NotUsed); + if( zDb && zInput ){ + int rc; + Parse sParse; + rc = renameParseSql(&sParse, zDb, db, zInput, 0); + + if( rc==SQLITE_OK ){ + RenameCtx sCtx; + Walker sWalker; + + /* Walker to find tokens that need to be replaced. */ + memset(&sCtx, 0, sizeof(RenameCtx)); + memset(&sWalker, 0, sizeof(Walker)); + sWalker.pParse = &sParse; + sWalker.xExprCallback = renameQuotefixExprCb; + sWalker.xSelectCallback = renameColumnSelectCb; + sWalker.u.pRename = &sCtx; + + if( sParse.pNewTable ){ + if( IsView(sParse.pNewTable) ){ + Select *pSelect = sParse.pNewTable->u.view.pSelect; + pSelect->selFlags &= ~SF_View; + sParse.rc = SQLITE_OK; + sqlite3SelectPrep(&sParse, pSelect, 0); + rc = (db->mallocFailed ? SQLITE_NOMEM : sParse.rc); + if( rc==SQLITE_OK ){ + sqlite3WalkSelect(&sWalker, pSelect); + } + }else{ + int i; + sqlite3WalkExprList(&sWalker, sParse.pNewTable->pCheck); +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + for(i=0; inCol; i++){ + sqlite3WalkExpr(&sWalker, + sqlite3ColumnExpr(sParse.pNewTable, + &sParse.pNewTable->aCol[i])); + } +#endif /* SQLITE_OMIT_GENERATED_COLUMNS */ + } + }else if( sParse.pNewIndex ){ + sqlite3WalkExprList(&sWalker, sParse.pNewIndex->aColExpr); + sqlite3WalkExpr(&sWalker, sParse.pNewIndex->pPartIdxWhere); + }else{ +#ifndef SQLITE_OMIT_TRIGGER + rc = renameResolveTrigger(&sParse); + if( rc==SQLITE_OK ){ + renameWalkTrigger(&sWalker, sParse.pNewTrigger); + } +#endif /* SQLITE_OMIT_TRIGGER */ + } + + if( rc==SQLITE_OK ){ + rc = renameEditSql(context, &sCtx, zInput, 0, 0); + } + renameTokenFree(db, sCtx.pList); + } + if( rc!=SQLITE_OK ){ + if( sqlite3WritableSchema(db) && rc==SQLITE_ERROR ){ + sqlite3_result_value(context, argv[1]); + }else{ + sqlite3_result_error_code(context, rc); + } + } + renameParseCleanup(&sParse); + } + +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = xAuth; +#endif + + sqlite3BtreeLeaveAll(db); +} + +/* Function: sqlite_rename_test(DB,SQL,TYPE,NAME,ISTEMP,WHEN,DQS) +** +** An SQL user function that checks that there are no parse or symbol +** resolution problems in a CREATE TRIGGER|TABLE|VIEW|INDEX statement. +** After an ALTER TABLE .. RENAME operation is performed and the schema +** reloaded, this function is called on each SQL statement in the schema +** to ensure that it is still usable. +** +** 0: Database name ("main", "temp" etc.). +** 1: SQL statement. +** 2: Object type ("view", "table", "trigger" or "index"). +** 3: Object name. +** 4: True if object is from temp schema. +** 5: "when" part of error message. +** 6: True to disable the DQS quirk when parsing SQL. +** +** The return value is computed as follows: +** +** A. If an error is seen and not in PRAGMA writable_schema=ON mode, +** then raise the error. +** B. Else if a trigger is created and the the table that the trigger is +** attached to is in database zDb, then return 1. +** C. Otherwise return NULL. +*/ +static void renameTableTest( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + sqlite3 *db = sqlite3_context_db_handle(context); + char const *zDb = (const char*)sqlite3_value_text(argv[0]); + char const *zInput = (const char*)sqlite3_value_text(argv[1]); + int bTemp = sqlite3_value_int(argv[4]); + int isLegacy = (db->flags & SQLITE_LegacyAlter); + char const *zWhen = (const char*)sqlite3_value_text(argv[5]); + int bNoDQS = sqlite3_value_int(argv[6]); + +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth = db->xAuth; + db->xAuth = 0; +#endif + + UNUSED_PARAMETER(NotUsed); + + if( zDb && zInput ){ + int rc; + Parse sParse; + int flags = db->flags; + if( bNoDQS ) db->flags &= ~(SQLITE_DqsDML|SQLITE_DqsDDL); + rc = renameParseSql(&sParse, zDb, db, zInput, bTemp); + db->flags |= (flags & (SQLITE_DqsDML|SQLITE_DqsDDL)); + if( rc==SQLITE_OK ){ + if( isLegacy==0 && sParse.pNewTable && IsView(sParse.pNewTable) ){ + NameContext sNC; + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = &sParse; + sqlite3SelectPrep(&sParse, sParse.pNewTable->u.view.pSelect, &sNC); + if( sParse.nErr ) rc = sParse.rc; + } + + else if( sParse.pNewTrigger ){ + if( isLegacy==0 ){ + rc = renameResolveTrigger(&sParse); + } + if( rc==SQLITE_OK ){ + int i1 = sqlite3SchemaToIndex(db, sParse.pNewTrigger->pTabSchema); + int i2 = sqlite3FindDbName(db, zDb); + if( i1==i2 ){ + /* Handle output case B */ + sqlite3_result_int(context, 1); + } + } + } + } + + if( rc!=SQLITE_OK && zWhen && !sqlite3WritableSchema(db) ){ + /* Output case A */ + renameColumnParseError(context, zWhen, argv[2], argv[3],&sParse); + } + renameParseCleanup(&sParse); + } + +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = xAuth; +#endif +} + +/* +** The implementation of internal UDF sqlite_drop_column(). +** +** Arguments: +** +** argv[0]: An integer - the index of the schema containing the table +** argv[1]: CREATE TABLE statement to modify. +** argv[2]: An integer - the index of the column to remove. +** +** The value returned is a string containing the CREATE TABLE statement +** with column argv[2] removed. +*/ +static void dropColumnFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + sqlite3 *db = sqlite3_context_db_handle(context); + int iSchema = sqlite3_value_int(argv[0]); + const char *zSql = (const char*)sqlite3_value_text(argv[1]); + int iCol = sqlite3_value_int(argv[2]); + const char *zDb = db->aDb[iSchema].zDbSName; + int rc; + Parse sParse; + RenameToken *pCol; + Table *pTab; + const char *zEnd; + char *zNew = 0; + +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth = db->xAuth; + db->xAuth = 0; +#endif + + UNUSED_PARAMETER(NotUsed); + rc = renameParseSql(&sParse, zDb, db, zSql, iSchema==1); + if( rc!=SQLITE_OK ) goto drop_column_done; + pTab = sParse.pNewTable; + if( pTab==0 || pTab->nCol==1 || iCol>=pTab->nCol ){ + /* This can happen if the sqlite_schema table is corrupt */ + rc = SQLITE_CORRUPT_BKPT; + goto drop_column_done; + } + + pCol = renameTokenFind(&sParse, 0, (void*)pTab->aCol[iCol].zCnName); + if( iColnCol-1 ){ + RenameToken *pEnd; + pEnd = renameTokenFind(&sParse, 0, (void*)pTab->aCol[iCol+1].zCnName); + zEnd = (const char*)pEnd->t.z; + }else{ + assert( IsOrdinaryTable(pTab) ); + zEnd = (const char*)&zSql[pTab->u.tab.addColOffset]; + while( ALWAYS(pCol->t.z[0]!=0) && pCol->t.z[0]!=',' ) pCol->t.z--; + } + + zNew = sqlite3MPrintf(db, "%.*s%s", pCol->t.z-zSql, zSql, zEnd); + sqlite3_result_text(context, zNew, -1, SQLITE_TRANSIENT); + sqlite3_free(zNew); + +drop_column_done: + renameParseCleanup(&sParse); +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = xAuth; +#endif + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(context, rc); + } +} + +/* +** This function is called by the parser upon parsing an +** +** ALTER TABLE pSrc DROP COLUMN pName +** +** statement. Argument pSrc contains the possibly qualified name of the +** table being edited, and token pName the name of the column to drop. +*/ +SQLITE_PRIVATE void sqlite3AlterDropColumn(Parse *pParse, SrcList *pSrc, const Token *pName){ + sqlite3 *db = pParse->db; /* Database handle */ + Table *pTab; /* Table to modify */ + int iDb; /* Index of db containing pTab in aDb[] */ + const char *zDb; /* Database containing pTab ("main" etc.) */ + char *zCol = 0; /* Name of column to drop */ + int iCol; /* Index of column zCol in pTab->aCol[] */ + + /* Look up the table being altered. */ + assert( pParse->pNewTable==0 ); + assert( sqlite3BtreeHoldsAllMutexes(db) ); + if( NEVER(db->mallocFailed) ) goto exit_drop_column; + pTab = sqlite3LocateTableItem(pParse, 0, &pSrc->a[0]); + if( !pTab ) goto exit_drop_column; + + /* Make sure this is not an attempt to ALTER a view, virtual table or + ** system table. */ + if( SQLITE_OK!=isAlterableTable(pParse, pTab) ) goto exit_drop_column; + if( SQLITE_OK!=isRealTable(pParse, pTab, 1) ) goto exit_drop_column; + + /* Find the index of the column being dropped. */ + zCol = sqlite3NameFromToken(db, pName); + if( zCol==0 ){ + assert( db->mallocFailed ); + goto exit_drop_column; + } + iCol = sqlite3ColumnIndex(pTab, zCol); + if( iCol<0 ){ + sqlite3ErrorMsg(pParse, "no such column: \"%T\"", pName); + goto exit_drop_column; + } + + /* Do not allow the user to drop a PRIMARY KEY column or a column + ** constrained by a UNIQUE constraint. */ + if( pTab->aCol[iCol].colFlags & (COLFLAG_PRIMKEY|COLFLAG_UNIQUE) ){ + sqlite3ErrorMsg(pParse, "cannot drop %s column: \"%s\"", + (pTab->aCol[iCol].colFlags&COLFLAG_PRIMKEY) ? "PRIMARY KEY" : "UNIQUE", + zCol + ); + goto exit_drop_column; + } + + /* Do not allow the number of columns to go to zero */ + if( pTab->nCol<=1 ){ + sqlite3ErrorMsg(pParse, "cannot drop column \"%s\": no other columns exist",zCol); + goto exit_drop_column; + } + + /* Edit the sqlite_schema table */ + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iDb>=0 ); + zDb = db->aDb[iDb].zDbSName; +#ifndef SQLITE_OMIT_AUTHORIZATION + /* Invoke the authorization callback. */ + if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, zCol) ){ + goto exit_drop_column; + } +#endif + renameTestSchema(pParse, zDb, iDb==1, "", 0); + renameFixQuotes(pParse, zDb, iDb==1); + sqlite3NestedParse(pParse, + "UPDATE \"%w\"." LEGACY_SCHEMA_TABLE " SET " + "sql = sqlite_drop_column(%d, sql, %d) " + "WHERE (type=='table' AND tbl_name=%Q COLLATE nocase)" + , zDb, iDb, iCol, pTab->zName + ); + + /* Drop and reload the database schema. */ + renameReloadSchema(pParse, iDb, INITFLAG_AlterDrop); + renameTestSchema(pParse, zDb, iDb==1, "after drop column", 1); + + /* Edit rows of table on disk */ + if( pParse->nErr==0 && (pTab->aCol[iCol].colFlags & COLFLAG_VIRTUAL)==0 ){ + int i; + int addr; + int reg; + int regRec; + Index *pPk = 0; + int nField = 0; /* Number of non-virtual columns after drop */ + int iCur; + Vdbe *v = sqlite3GetVdbe(pParse); + iCur = pParse->nTab++; + sqlite3OpenTable(pParse, iCur, iDb, pTab, OP_OpenWrite); + addr = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v); + reg = ++pParse->nMem; + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp2(v, OP_Rowid, iCur, reg); + pParse->nMem += pTab->nCol; + }else{ + pPk = sqlite3PrimaryKeyIndex(pTab); + pParse->nMem += pPk->nColumn; + for(i=0; inKeyCol; i++){ + sqlite3VdbeAddOp3(v, OP_Column, iCur, i, reg+i+1); + } + nField = pPk->nKeyCol; + } + regRec = ++pParse->nMem; + for(i=0; inCol; i++){ + if( i!=iCol && (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ){ + int regOut; + if( pPk ){ + int iPos = sqlite3TableColumnToIndex(pPk, i); + int iColPos = sqlite3TableColumnToIndex(pPk, iCol); + if( iPosnKeyCol ) continue; + regOut = reg+1+iPos-(iPos>iColPos); + }else{ + regOut = reg+1+nField; + } + if( i==pTab->iPKey ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regOut); + }else{ + sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, i, regOut); + } + nField++; + } + } + if( nField==0 ){ + /* dbsqlfuzz 5f09e7bcc78b4954d06bf9f2400d7715f48d1fef */ + pParse->nMem++; + sqlite3VdbeAddOp2(v, OP_Null, 0, reg+1); + nField = 1; + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, reg+1, nField, regRec); + if( pPk ){ + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iCur, regRec, reg+1, pPk->nKeyCol); + }else{ + sqlite3VdbeAddOp3(v, OP_Insert, iCur, regRec, reg); + } + sqlite3VdbeChangeP5(v, OPFLAG_SAVEPOSITION); + + sqlite3VdbeAddOp2(v, OP_Next, iCur, addr+1); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addr); + } + +exit_drop_column: + sqlite3DbFree(db, zCol); + sqlite3SrcListDelete(db, pSrc); +} + +/* +** Register built-in functions used to help implement ALTER TABLE +*/ +SQLITE_PRIVATE void sqlite3AlterFunctions(void){ + static FuncDef aAlterTableFuncs[] = { + INTERNAL_FUNCTION(sqlite_rename_column, 9, renameColumnFunc), + INTERNAL_FUNCTION(sqlite_rename_table, 7, renameTableFunc), + INTERNAL_FUNCTION(sqlite_rename_test, 7, renameTableTest), + INTERNAL_FUNCTION(sqlite_drop_column, 3, dropColumnFunc), + INTERNAL_FUNCTION(sqlite_rename_quotefix,2, renameQuotefixFunc), + }; + sqlite3InsertBuiltinFuncs(aAlterTableFuncs, ArraySize(aAlterTableFuncs)); +} +#endif /* SQLITE_ALTER_TABLE */ + +/************** End of alter.c ***********************************************/ +/************** Begin file analyze.c *****************************************/ +/* +** 2005-07-08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code associated with the ANALYZE command. +** +** The ANALYZE command gather statistics about the content of tables +** and indices. These statistics are made available to the query planner +** to help it make better decisions about how to perform queries. +** +** The following system tables are or have been supported: +** +** CREATE TABLE sqlite_stat1(tbl, idx, stat); +** CREATE TABLE sqlite_stat2(tbl, idx, sampleno, sample); +** CREATE TABLE sqlite_stat3(tbl, idx, nEq, nLt, nDLt, sample); +** CREATE TABLE sqlite_stat4(tbl, idx, nEq, nLt, nDLt, sample); +** +** Additional tables might be added in future releases of SQLite. +** The sqlite_stat2 table is not created or used unless the SQLite version +** is between 3.6.18 and 3.7.8, inclusive, and unless SQLite is compiled +** with SQLITE_ENABLE_STAT2. The sqlite_stat2 table is deprecated. +** The sqlite_stat2 table is superseded by sqlite_stat3, which is only +** created and used by SQLite versions 3.7.9 through 3.29.0 when +** SQLITE_ENABLE_STAT3 defined. The functionality of sqlite_stat3 +** is a superset of sqlite_stat2 and is also now deprecated. The +** sqlite_stat4 is an enhanced version of sqlite_stat3 and is only +** available when compiled with SQLITE_ENABLE_STAT4 and in SQLite +** versions 3.8.1 and later. STAT4 is the only variant that is still +** supported. +** +** For most applications, sqlite_stat1 provides all the statistics required +** for the query planner to make good choices. +** +** Format of sqlite_stat1: +** +** There is normally one row per index, with the index identified by the +** name in the idx column. The tbl column is the name of the table to +** which the index belongs. In each such row, the stat column will be +** a string consisting of a list of integers. The first integer in this +** list is the number of rows in the index. (This is the same as the +** number of rows in the table, except for partial indices.) The second +** integer is the average number of rows in the index that have the same +** value in the first column of the index. The third integer is the average +** number of rows in the index that have the same value for the first two +** columns. The N-th integer (for N>1) is the average number of rows in +** the index which have the same value for the first N-1 columns. For +** a K-column index, there will be K+1 integers in the stat column. If +** the index is unique, then the last integer will be 1. +** +** The list of integers in the stat column can optionally be followed +** by the keyword "unordered". The "unordered" keyword, if it is present, +** must be separated from the last integer by a single space. If the +** "unordered" keyword is present, then the query planner assumes that +** the index is unordered and will not use the index for a range query. +** +** If the sqlite_stat1.idx column is NULL, then the sqlite_stat1.stat +** column contains a single integer which is the (estimated) number of +** rows in the table identified by sqlite_stat1.tbl. +** +** Format of sqlite_stat2: +** +** The sqlite_stat2 is only created and is only used if SQLite is compiled +** with SQLITE_ENABLE_STAT2 and if the SQLite version number is between +** 3.6.18 and 3.7.8. The "stat2" table contains additional information +** about the distribution of keys within an index. The index is identified by +** the "idx" column and the "tbl" column is the name of the table to which +** the index belongs. There are usually 10 rows in the sqlite_stat2 +** table for each index. +** +** The sqlite_stat2 entries for an index that have sampleno between 0 and 9 +** inclusive are samples of the left-most key value in the index taken at +** evenly spaced points along the index. Let the number of samples be S +** (10 in the standard build) and let C be the number of rows in the index. +** Then the sampled rows are given by: +** +** rownumber = (i*C*2 + C)/(S*2) +** +** For i between 0 and S-1. Conceptually, the index space is divided into +** S uniform buckets and the samples are the middle row from each bucket. +** +** The format for sqlite_stat2 is recorded here for legacy reference. This +** version of SQLite does not support sqlite_stat2. It neither reads nor +** writes the sqlite_stat2 table. This version of SQLite only supports +** sqlite_stat3. +** +** Format for sqlite_stat3: +** +** The sqlite_stat3 format is a subset of sqlite_stat4. Hence, the +** sqlite_stat4 format will be described first. Further information +** about sqlite_stat3 follows the sqlite_stat4 description. +** +** Format for sqlite_stat4: +** +** As with sqlite_stat2, the sqlite_stat4 table contains histogram data +** to aid the query planner in choosing good indices based on the values +** that indexed columns are compared against in the WHERE clauses of +** queries. +** +** The sqlite_stat4 table contains multiple entries for each index. +** The idx column names the index and the tbl column is the table of the +** index. If the idx and tbl columns are the same, then the sample is +** of the INTEGER PRIMARY KEY. The sample column is a blob which is the +** binary encoding of a key from the index. The nEq column is a +** list of integers. The first integer is the approximate number +** of entries in the index whose left-most column exactly matches +** the left-most column of the sample. The second integer in nEq +** is the approximate number of entries in the index where the +** first two columns match the first two columns of the sample. +** And so forth. nLt is another list of integers that show the approximate +** number of entries that are strictly less than the sample. The first +** integer in nLt contains the number of entries in the index where the +** left-most column is less than the left-most column of the sample. +** The K-th integer in the nLt entry is the number of index entries +** where the first K columns are less than the first K columns of the +** sample. The nDLt column is like nLt except that it contains the +** number of distinct entries in the index that are less than the +** sample. +** +** There can be an arbitrary number of sqlite_stat4 entries per index. +** The ANALYZE command will typically generate sqlite_stat4 tables +** that contain between 10 and 40 samples which are distributed across +** the key space, though not uniformly, and which include samples with +** large nEq values. +** +** Format for sqlite_stat3 redux: +** +** The sqlite_stat3 table is like sqlite_stat4 except that it only +** looks at the left-most column of the index. The sqlite_stat3.sample +** column contains the actual value of the left-most column instead +** of a blob encoding of the complete index key as is found in +** sqlite_stat4.sample. The nEq, nLt, and nDLt entries of sqlite_stat3 +** all contain just a single integer which is the same as the first +** integer in the equivalent columns in sqlite_stat4. +*/ +#ifndef SQLITE_OMIT_ANALYZE +/* #include "sqliteInt.h" */ + +#if defined(SQLITE_ENABLE_STAT4) +# define IsStat4 1 +#else +# define IsStat4 0 +# undef SQLITE_STAT4_SAMPLES +# define SQLITE_STAT4_SAMPLES 1 +#endif + +/* +** This routine generates code that opens the sqlite_statN tables. +** The sqlite_stat1 table is always relevant. sqlite_stat2 is now +** obsolete. sqlite_stat3 and sqlite_stat4 are only opened when +** appropriate compile-time options are provided. +** +** If the sqlite_statN tables do not previously exist, it is created. +** +** Argument zWhere may be a pointer to a buffer containing a table name, +** or it may be a NULL pointer. If it is not NULL, then all entries in +** the sqlite_statN tables associated with the named table are deleted. +** If zWhere==0, then code is generated to delete all stat table entries. +*/ +static void openStatTable( + Parse *pParse, /* Parsing context */ + int iDb, /* The database we are looking in */ + int iStatCur, /* Open the sqlite_stat1 table on this cursor */ + const char *zWhere, /* Delete entries for this table or index */ + const char *zWhereType /* Either "tbl" or "idx" */ +){ + static const struct { + const char *zName; + const char *zCols; + } aTable[] = { + { "sqlite_stat1", "tbl,idx,stat" }, +#if defined(SQLITE_ENABLE_STAT4) + { "sqlite_stat4", "tbl,idx,neq,nlt,ndlt,sample" }, +#else + { "sqlite_stat4", 0 }, +#endif + { "sqlite_stat3", 0 }, + }; + int i; + sqlite3 *db = pParse->db; + Db *pDb; + Vdbe *v = sqlite3GetVdbe(pParse); + u32 aRoot[ArraySize(aTable)]; + u8 aCreateTbl[ArraySize(aTable)]; +#ifdef SQLITE_ENABLE_STAT4 + const int nToOpen = OptimizationEnabled(db,SQLITE_Stat4) ? 2 : 1; +#else + const int nToOpen = 1; +#endif + + if( v==0 ) return; + assert( sqlite3BtreeHoldsAllMutexes(db) ); + assert( sqlite3VdbeDb(v)==db ); + pDb = &db->aDb[iDb]; + + /* Create new statistic tables if they do not exist, or clear them + ** if they do already exist. + */ + for(i=0; izDbSName))==0 ){ + if( iregRoot. This is important + ** because the OpenWrite opcode below will be needing it. */ + sqlite3NestedParse(pParse, + "CREATE TABLE %Q.%s(%s)", pDb->zDbSName, zTab, aTable[i].zCols + ); + aRoot[i] = (u32)pParse->regRoot; + aCreateTbl[i] = OPFLAG_P2ISREG; + } + }else{ + /* The table already exists. If zWhere is not NULL, delete all entries + ** associated with the table zWhere. If zWhere is NULL, delete the + ** entire contents of the table. */ + aRoot[i] = pStat->tnum; + sqlite3TableLock(pParse, iDb, aRoot[i], 1, zTab); + if( zWhere ){ + sqlite3NestedParse(pParse, + "DELETE FROM %Q.%s WHERE %s=%Q", + pDb->zDbSName, zTab, zWhereType, zWhere + ); +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + }else if( db->xPreUpdateCallback ){ + sqlite3NestedParse(pParse, "DELETE FROM %Q.%s", pDb->zDbSName, zTab); +#endif + }else{ + /* The sqlite_stat[134] table already exists. Delete all rows. */ + sqlite3VdbeAddOp2(v, OP_Clear, (int)aRoot[i], iDb); + } + } + } + + /* Open the sqlite_stat[134] tables for writing. */ + for(i=0; inRowid ){ + sqlite3DbFree(db, p->u.aRowid); + p->nRowid = 0; + } +} +#endif + +/* Initialize the BLOB value of a ROWID +*/ +#ifdef SQLITE_ENABLE_STAT4 +static void sampleSetRowid(sqlite3 *db, StatSample *p, int n, const u8 *pData){ + assert( db!=0 ); + if( p->nRowid ) sqlite3DbFree(db, p->u.aRowid); + p->u.aRowid = sqlite3DbMallocRawNN(db, n); + if( p->u.aRowid ){ + p->nRowid = n; + memcpy(p->u.aRowid, pData, n); + }else{ + p->nRowid = 0; + } +} +#endif + +/* Initialize the INTEGER value of a ROWID. +*/ +#ifdef SQLITE_ENABLE_STAT4 +static void sampleSetRowidInt64(sqlite3 *db, StatSample *p, i64 iRowid){ + assert( db!=0 ); + if( p->nRowid ) sqlite3DbFree(db, p->u.aRowid); + p->nRowid = 0; + p->u.iRowid = iRowid; +} +#endif + + +/* +** Copy the contents of object (*pFrom) into (*pTo). +*/ +#ifdef SQLITE_ENABLE_STAT4 +static void sampleCopy(StatAccum *p, StatSample *pTo, StatSample *pFrom){ + pTo->isPSample = pFrom->isPSample; + pTo->iCol = pFrom->iCol; + pTo->iHash = pFrom->iHash; + memcpy(pTo->anEq, pFrom->anEq, sizeof(tRowcnt)*p->nCol); + memcpy(pTo->anLt, pFrom->anLt, sizeof(tRowcnt)*p->nCol); + memcpy(pTo->anDLt, pFrom->anDLt, sizeof(tRowcnt)*p->nCol); + if( pFrom->nRowid ){ + sampleSetRowid(p->db, pTo, pFrom->nRowid, pFrom->u.aRowid); + }else{ + sampleSetRowidInt64(p->db, pTo, pFrom->u.iRowid); + } +} +#endif + +/* +** Reclaim all memory of a StatAccum structure. +*/ +static void statAccumDestructor(void *pOld){ + StatAccum *p = (StatAccum*)pOld; +#ifdef SQLITE_ENABLE_STAT4 + if( p->mxSample ){ + int i; + for(i=0; inCol; i++) sampleClear(p->db, p->aBest+i); + for(i=0; imxSample; i++) sampleClear(p->db, p->a+i); + sampleClear(p->db, &p->current); + } +#endif + sqlite3DbFree(p->db, p); +} + +/* +** Implementation of the stat_init(N,K,C,L) SQL function. The four parameters +** are: +** N: The number of columns in the index including the rowid/pk (note 1) +** K: The number of columns in the index excluding the rowid/pk. +** C: Estimated number of rows in the index +** L: A limit on the number of rows to scan, or 0 for no-limit +** +** Note 1: In the special case of the covering index that implements a +** WITHOUT ROWID table, N is the number of PRIMARY KEY columns, not the +** total number of columns in the table. +** +** For indexes on ordinary rowid tables, N==K+1. But for indexes on +** WITHOUT ROWID tables, N=K+P where P is the number of columns in the +** PRIMARY KEY of the table. The covering index that implements the +** original WITHOUT ROWID table as N==K as a special case. +** +** This routine allocates the StatAccum object in heap memory. The return +** value is a pointer to the StatAccum object. The datatype of the +** return value is BLOB, but it is really just a pointer to the StatAccum +** object. +*/ +static void statInit( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + StatAccum *p; + int nCol; /* Number of columns in index being sampled */ + int nKeyCol; /* Number of key columns */ + int nColUp; /* nCol rounded up for alignment */ + int n; /* Bytes of space to allocate */ + sqlite3 *db = sqlite3_context_db_handle(context); /* Database connection */ +#ifdef SQLITE_ENABLE_STAT4 + /* Maximum number of samples. 0 if STAT4 data is not collected */ + int mxSample = OptimizationEnabled(db,SQLITE_Stat4) ?SQLITE_STAT4_SAMPLES :0; +#endif + + /* Decode the three function arguments */ + UNUSED_PARAMETER(argc); + nCol = sqlite3_value_int(argv[0]); + assert( nCol>0 ); + nColUp = sizeof(tRowcnt)<8 ? (nCol+1)&~1 : nCol; + nKeyCol = sqlite3_value_int(argv[1]); + assert( nKeyCol<=nCol ); + assert( nKeyCol>0 ); + + /* Allocate the space required for the StatAccum object */ + n = sizeof(*p) + + sizeof(tRowcnt)*nColUp /* StatAccum.anEq */ + + sizeof(tRowcnt)*nColUp; /* StatAccum.anDLt */ +#ifdef SQLITE_ENABLE_STAT4 + if( mxSample ){ + n += sizeof(tRowcnt)*nColUp /* StatAccum.anLt */ + + sizeof(StatSample)*(nCol+mxSample) /* StatAccum.aBest[], a[] */ + + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample); + } +#endif + p = sqlite3DbMallocZero(db, n); + if( p==0 ){ + sqlite3_result_error_nomem(context); + return; + } + + p->db = db; + p->nEst = sqlite3_value_int64(argv[2]); + p->nRow = 0; + p->nLimit = sqlite3_value_int64(argv[3]); + p->nCol = nCol; + p->nKeyCol = nKeyCol; + p->nSkipAhead = 0; + p->current.anDLt = (tRowcnt*)&p[1]; + p->current.anEq = &p->current.anDLt[nColUp]; + +#ifdef SQLITE_ENABLE_STAT4 + p->mxSample = p->nLimit==0 ? mxSample : 0; + if( mxSample ){ + u8 *pSpace; /* Allocated space not yet assigned */ + int i; /* Used to iterate through p->aSample[] */ + + p->iGet = -1; + p->nPSample = (tRowcnt)(p->nEst/(mxSample/3+1) + 1); + p->current.anLt = &p->current.anEq[nColUp]; + p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]); + + /* Set up the StatAccum.a[] and aBest[] arrays */ + p->a = (struct StatSample*)&p->current.anLt[nColUp]; + p->aBest = &p->a[mxSample]; + pSpace = (u8*)(&p->a[mxSample+nCol]); + for(i=0; i<(mxSample+nCol); i++){ + p->a[i].anEq = (tRowcnt *)pSpace; pSpace += (sizeof(tRowcnt) * nColUp); + p->a[i].anLt = (tRowcnt *)pSpace; pSpace += (sizeof(tRowcnt) * nColUp); + p->a[i].anDLt = (tRowcnt *)pSpace; pSpace += (sizeof(tRowcnt) * nColUp); + } + assert( (pSpace - (u8*)p)==n ); + + for(i=0; iaBest[i].iCol = i; + } + } +#endif + + /* Return a pointer to the allocated object to the caller. Note that + ** only the pointer (the 2nd parameter) matters. The size of the object + ** (given by the 3rd parameter) is never used and can be any positive + ** value. */ + sqlite3_result_blob(context, p, sizeof(*p), statAccumDestructor); +} +static const FuncDef statInitFuncdef = { + 4, /* nArg */ + SQLITE_UTF8, /* funcFlags */ + 0, /* pUserData */ + 0, /* pNext */ + statInit, /* xSFunc */ + 0, /* xFinalize */ + 0, 0, /* xValue, xInverse */ + "stat_init", /* zName */ + {0} +}; + +#ifdef SQLITE_ENABLE_STAT4 +/* +** pNew and pOld are both candidate non-periodic samples selected for +** the same column (pNew->iCol==pOld->iCol). Ignoring this column and +** considering only any trailing columns and the sample hash value, this +** function returns true if sample pNew is to be preferred over pOld. +** In other words, if we assume that the cardinalities of the selected +** column for pNew and pOld are equal, is pNew to be preferred over pOld. +** +** This function assumes that for each argument sample, the contents of +** the anEq[] array from pSample->anEq[pSample->iCol+1] onwards are valid. +*/ +static int sampleIsBetterPost( + StatAccum *pAccum, + StatSample *pNew, + StatSample *pOld +){ + int nCol = pAccum->nCol; + int i; + assert( pNew->iCol==pOld->iCol ); + for(i=pNew->iCol+1; ianEq[i]>pOld->anEq[i] ) return 1; + if( pNew->anEq[i]anEq[i] ) return 0; + } + if( pNew->iHash>pOld->iHash ) return 1; + return 0; +} +#endif + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Return true if pNew is to be preferred over pOld. +** +** This function assumes that for each argument sample, the contents of +** the anEq[] array from pSample->anEq[pSample->iCol] onwards are valid. +*/ +static int sampleIsBetter( + StatAccum *pAccum, + StatSample *pNew, + StatSample *pOld +){ + tRowcnt nEqNew = pNew->anEq[pNew->iCol]; + tRowcnt nEqOld = pOld->anEq[pOld->iCol]; + + assert( pOld->isPSample==0 && pNew->isPSample==0 ); + assert( IsStat4 || (pNew->iCol==0 && pOld->iCol==0) ); + + if( (nEqNew>nEqOld) ) return 1; + if( nEqNew==nEqOld ){ + if( pNew->iColiCol ) return 1; + return (pNew->iCol==pOld->iCol && sampleIsBetterPost(pAccum, pNew, pOld)); + } + return 0; +} + +/* +** Copy the contents of sample *pNew into the p->a[] array. If necessary, +** remove the least desirable sample from p->a[] to make room. +*/ +static void sampleInsert(StatAccum *p, StatSample *pNew, int nEqZero){ + StatSample *pSample = 0; + int i; + + assert( IsStat4 || nEqZero==0 ); + + /* StatAccum.nMaxEqZero is set to the maximum number of leading 0 + ** values in the anEq[] array of any sample in StatAccum.a[]. In + ** other words, if nMaxEqZero is n, then it is guaranteed that there + ** are no samples with StatSample.anEq[m]==0 for (m>=n). */ + if( nEqZero>p->nMaxEqZero ){ + p->nMaxEqZero = nEqZero; + } + if( pNew->isPSample==0 ){ + StatSample *pUpgrade = 0; + assert( pNew->anEq[pNew->iCol]>0 ); + + /* This sample is being added because the prefix that ends in column + ** iCol occurs many times in the table. However, if we have already + ** added a sample that shares this prefix, there is no need to add + ** this one. Instead, upgrade the priority of the highest priority + ** existing sample that shares this prefix. */ + for(i=p->nSample-1; i>=0; i--){ + StatSample *pOld = &p->a[i]; + if( pOld->anEq[pNew->iCol]==0 ){ + if( pOld->isPSample ) return; + assert( pOld->iCol>pNew->iCol ); + assert( sampleIsBetter(p, pNew, pOld) ); + if( pUpgrade==0 || sampleIsBetter(p, pOld, pUpgrade) ){ + pUpgrade = pOld; + } + } + } + if( pUpgrade ){ + pUpgrade->iCol = pNew->iCol; + pUpgrade->anEq[pUpgrade->iCol] = pNew->anEq[pUpgrade->iCol]; + goto find_new_min; + } + } + + /* If necessary, remove sample iMin to make room for the new sample. */ + if( p->nSample>=p->mxSample ){ + StatSample *pMin = &p->a[p->iMin]; + tRowcnt *anEq = pMin->anEq; + tRowcnt *anLt = pMin->anLt; + tRowcnt *anDLt = pMin->anDLt; + sampleClear(p->db, pMin); + memmove(pMin, &pMin[1], sizeof(p->a[0])*(p->nSample-p->iMin-1)); + pSample = &p->a[p->nSample-1]; + pSample->nRowid = 0; + pSample->anEq = anEq; + pSample->anDLt = anDLt; + pSample->anLt = anLt; + p->nSample = p->mxSample-1; + } + + /* The "rows less-than" for the rowid column must be greater than that + ** for the last sample in the p->a[] array. Otherwise, the samples would + ** be out of order. */ + assert( p->nSample==0 + || pNew->anLt[p->nCol-1] > p->a[p->nSample-1].anLt[p->nCol-1] ); + + /* Insert the new sample */ + pSample = &p->a[p->nSample]; + sampleCopy(p, pSample, pNew); + p->nSample++; + + /* Zero the first nEqZero entries in the anEq[] array. */ + memset(pSample->anEq, 0, sizeof(tRowcnt)*nEqZero); + +find_new_min: + if( p->nSample>=p->mxSample ){ + int iMin = -1; + for(i=0; imxSample; i++){ + if( p->a[i].isPSample ) continue; + if( iMin<0 || sampleIsBetter(p, &p->a[iMin], &p->a[i]) ){ + iMin = i; + } + } + assert( iMin>=0 ); + p->iMin = iMin; + } +} +#endif /* SQLITE_ENABLE_STAT4 */ + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Field iChng of the index being scanned has changed. So at this point +** p->current contains a sample that reflects the previous row of the +** index. The value of anEq[iChng] and subsequent anEq[] elements are +** correct at this point. +*/ +static void samplePushPrevious(StatAccum *p, int iChng){ + int i; + + /* Check if any samples from the aBest[] array should be pushed + ** into IndexSample.a[] at this point. */ + for(i=(p->nCol-2); i>=iChng; i--){ + StatSample *pBest = &p->aBest[i]; + pBest->anEq[i] = p->current.anEq[i]; + if( p->nSamplemxSample || sampleIsBetter(p, pBest, &p->a[p->iMin]) ){ + sampleInsert(p, pBest, i); + } + } + + /* Check that no sample contains an anEq[] entry with an index of + ** p->nMaxEqZero or greater set to zero. */ + for(i=p->nSample-1; i>=0; i--){ + int j; + for(j=p->nMaxEqZero; jnCol; j++) assert( p->a[i].anEq[j]>0 ); + } + + /* Update the anEq[] fields of any samples already collected. */ + if( iChngnMaxEqZero ){ + for(i=p->nSample-1; i>=0; i--){ + int j; + for(j=iChng; jnCol; j++){ + if( p->a[i].anEq[j]==0 ) p->a[i].anEq[j] = p->current.anEq[j]; + } + } + p->nMaxEqZero = iChng; + } +} +#endif /* SQLITE_ENABLE_STAT4 */ + +/* +** Implementation of the stat_push SQL function: stat_push(P,C,R) +** Arguments: +** +** P Pointer to the StatAccum object created by stat_init() +** C Index of left-most column to differ from previous row +** R Rowid for the current row. Might be a key record for +** WITHOUT ROWID tables. +** +** The purpose of this routine is to collect statistical data and/or +** samples from the index being analyzed into the StatAccum object. +** The stat_get() SQL function will be used afterwards to +** retrieve the information gathered. +** +** This SQL function usually returns NULL, but might return an integer +** if it wants the byte-code to do special processing. +** +** The R parameter is only used for STAT4 +*/ +static void statPush( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int i; + + /* The three function arguments */ + StatAccum *p = (StatAccum*)sqlite3_value_blob(argv[0]); + int iChng = sqlite3_value_int(argv[1]); + + UNUSED_PARAMETER( argc ); + UNUSED_PARAMETER( context ); + assert( p->nCol>0 ); + assert( iChngnCol ); + + if( p->nRow==0 ){ + /* This is the first call to this function. Do initialization. */ + for(i=0; inCol; i++) p->current.anEq[i] = 1; + }else{ + /* Second and subsequent calls get processed here */ +#ifdef SQLITE_ENABLE_STAT4 + if( p->mxSample ) samplePushPrevious(p, iChng); +#endif + + /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply + ** to the current row of the index. */ + for(i=0; icurrent.anEq[i]++; + } + for(i=iChng; inCol; i++){ + p->current.anDLt[i]++; +#ifdef SQLITE_ENABLE_STAT4 + if( p->mxSample ) p->current.anLt[i] += p->current.anEq[i]; +#endif + p->current.anEq[i] = 1; + } + } + + p->nRow++; +#ifdef SQLITE_ENABLE_STAT4 + if( p->mxSample ){ + tRowcnt nLt; + if( sqlite3_value_type(argv[2])==SQLITE_INTEGER ){ + sampleSetRowidInt64(p->db, &p->current, sqlite3_value_int64(argv[2])); + }else{ + sampleSetRowid(p->db, &p->current, sqlite3_value_bytes(argv[2]), + sqlite3_value_blob(argv[2])); + } + p->current.iHash = p->iPrn = p->iPrn*1103515245 + 12345; + + nLt = p->current.anLt[p->nCol-1]; + /* Check if this is to be a periodic sample. If so, add it. */ + if( (nLt/p->nPSample)!=(nLt+1)/p->nPSample ){ + p->current.isPSample = 1; + p->current.iCol = 0; + sampleInsert(p, &p->current, p->nCol-1); + p->current.isPSample = 0; + } + + /* Update the aBest[] array. */ + for(i=0; i<(p->nCol-1); i++){ + p->current.iCol = i; + if( i>=iChng || sampleIsBetterPost(p, &p->current, &p->aBest[i]) ){ + sampleCopy(p, &p->aBest[i], &p->current); + } + } + }else +#endif + if( p->nLimit && p->nRow>(tRowcnt)p->nLimit*(p->nSkipAhead+1) ){ + p->nSkipAhead++; + sqlite3_result_int(context, p->current.anDLt[0]>0); + } +} + +static const FuncDef statPushFuncdef = { + 2+IsStat4, /* nArg */ + SQLITE_UTF8, /* funcFlags */ + 0, /* pUserData */ + 0, /* pNext */ + statPush, /* xSFunc */ + 0, /* xFinalize */ + 0, 0, /* xValue, xInverse */ + "stat_push", /* zName */ + {0} +}; + +#define STAT_GET_STAT1 0 /* "stat" column of stat1 table */ +#define STAT_GET_ROWID 1 /* "rowid" column of stat[34] entry */ +#define STAT_GET_NEQ 2 /* "neq" column of stat[34] entry */ +#define STAT_GET_NLT 3 /* "nlt" column of stat[34] entry */ +#define STAT_GET_NDLT 4 /* "ndlt" column of stat[34] entry */ + +/* +** Implementation of the stat_get(P,J) SQL function. This routine is +** used to query statistical information that has been gathered into +** the StatAccum object by prior calls to stat_push(). The P parameter +** has type BLOB but it is really just a pointer to the StatAccum object. +** The content to returned is determined by the parameter J +** which is one of the STAT_GET_xxxx values defined above. +** +** The stat_get(P,J) function is not available to generic SQL. It is +** inserted as part of a manually constructed bytecode program. (See +** the callStatGet() routine below.) It is guaranteed that the P +** parameter will always be a pointer to a StatAccum object, never a +** NULL. +** +** If STAT4 is not enabled, then J is always +** STAT_GET_STAT1 and is hence omitted and this routine becomes +** a one-parameter function, stat_get(P), that always returns the +** stat1 table entry information. +*/ +static void statGet( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + StatAccum *p = (StatAccum*)sqlite3_value_blob(argv[0]); +#ifdef SQLITE_ENABLE_STAT4 + /* STAT4 has a parameter on this routine. */ + int eCall = sqlite3_value_int(argv[1]); + assert( argc==2 ); + assert( eCall==STAT_GET_STAT1 || eCall==STAT_GET_NEQ + || eCall==STAT_GET_ROWID || eCall==STAT_GET_NLT + || eCall==STAT_GET_NDLT + ); + assert( eCall==STAT_GET_STAT1 || p->mxSample ); + if( eCall==STAT_GET_STAT1 ) +#else + assert( argc==1 ); +#endif + { + /* Return the value to store in the "stat" column of the sqlite_stat1 + ** table for this index. + ** + ** The value is a string composed of a list of integers describing + ** the index. The first integer in the list is the total number of + ** entries in the index. There is one additional integer in the list + ** for each indexed column. This additional integer is an estimate of + ** the number of rows matched by a equality query on the index using + ** a key with the corresponding number of fields. In other words, + ** if the index is on columns (a,b) and the sqlite_stat1 value is + ** "100 10 2", then SQLite estimates that: + ** + ** * the index contains 100 rows, + ** * "WHERE a=?" matches 10 rows, and + ** * "WHERE a=? AND b=?" matches 2 rows. + ** + ** If D is the count of distinct values and K is the total number of + ** rows, then each estimate is computed as: + ** + ** I = (K+D-1)/D + */ + sqlite3_str sStat; /* Text of the constructed "stat" line */ + int i; /* Loop counter */ + + sqlite3StrAccumInit(&sStat, 0, 0, 0, (p->nKeyCol+1)*100); + sqlite3_str_appendf(&sStat, "%llu", + p->nSkipAhead ? (u64)p->nEst : (u64)p->nRow); + for(i=0; inKeyCol; i++){ + u64 nDistinct = p->current.anDLt[i] + 1; + u64 iVal = (p->nRow + nDistinct - 1) / nDistinct; + sqlite3_str_appendf(&sStat, " %llu", iVal); + assert( p->current.anEq[i] ); + } + sqlite3ResultStrAccum(context, &sStat); + } +#ifdef SQLITE_ENABLE_STAT4 + else if( eCall==STAT_GET_ROWID ){ + if( p->iGet<0 ){ + samplePushPrevious(p, 0); + p->iGet = 0; + } + if( p->iGetnSample ){ + StatSample *pS = p->a + p->iGet; + if( pS->nRowid==0 ){ + sqlite3_result_int64(context, pS->u.iRowid); + }else{ + sqlite3_result_blob(context, pS->u.aRowid, pS->nRowid, + SQLITE_TRANSIENT); + } + } + }else{ + tRowcnt *aCnt = 0; + sqlite3_str sStat; + int i; + + assert( p->iGetnSample ); + switch( eCall ){ + case STAT_GET_NEQ: aCnt = p->a[p->iGet].anEq; break; + case STAT_GET_NLT: aCnt = p->a[p->iGet].anLt; break; + default: { + aCnt = p->a[p->iGet].anDLt; + p->iGet++; + break; + } + } + sqlite3StrAccumInit(&sStat, 0, 0, 0, p->nCol*100); + for(i=0; inCol; i++){ + sqlite3_str_appendf(&sStat, "%llu ", (u64)aCnt[i]); + } + if( sStat.nChar ) sStat.nChar--; + sqlite3ResultStrAccum(context, &sStat); + } +#endif /* SQLITE_ENABLE_STAT4 */ +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER( argc ); +#endif +} +static const FuncDef statGetFuncdef = { + 1+IsStat4, /* nArg */ + SQLITE_UTF8, /* funcFlags */ + 0, /* pUserData */ + 0, /* pNext */ + statGet, /* xSFunc */ + 0, /* xFinalize */ + 0, 0, /* xValue, xInverse */ + "stat_get", /* zName */ + {0} +}; + +static void callStatGet(Parse *pParse, int regStat, int iParam, int regOut){ +#ifdef SQLITE_ENABLE_STAT4 + sqlite3VdbeAddOp2(pParse->pVdbe, OP_Integer, iParam, regStat+1); +#elif SQLITE_DEBUG + assert( iParam==STAT_GET_STAT1 ); +#else + UNUSED_PARAMETER( iParam ); +#endif + assert( regOut!=regStat && regOut!=regStat+1 ); + sqlite3VdbeAddFunctionCall(pParse, 0, regStat, regOut, 1+IsStat4, + &statGetFuncdef, 0); +} + +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS +/* Add a comment to the most recent VDBE opcode that is the name +** of the k-th column of the pIdx index. +*/ +static void analyzeVdbeCommentIndexWithColumnName( + Vdbe *v, /* Prepared statement under construction */ + Index *pIdx, /* Index whose column is being loaded */ + int k /* Which column index */ +){ + int i; /* Index of column in the table */ + assert( k>=0 && knColumn ); + i = pIdx->aiColumn[k]; + if( NEVER(i==XN_ROWID) ){ + VdbeComment((v,"%s.rowid",pIdx->zName)); + }else if( i==XN_EXPR ){ + VdbeComment((v,"%s.expr(%d)",pIdx->zName, k)); + }else{ + VdbeComment((v,"%s.%s", pIdx->zName, pIdx->pTable->aCol[i].zCnName)); + } +} +#else +# define analyzeVdbeCommentIndexWithColumnName(a,b,c) +#endif /* SQLITE_DEBUG */ + +/* +** Generate code to do an analysis of all indices associated with +** a single table. +*/ +static void analyzeOneTable( + Parse *pParse, /* Parser context */ + Table *pTab, /* Table whose indices are to be analyzed */ + Index *pOnlyIdx, /* If not NULL, only analyze this one index */ + int iStatCur, /* Index of VdbeCursor that writes the sqlite_stat1 table */ + int iMem, /* Available memory locations begin here */ + int iTab /* Next available cursor */ +){ + sqlite3 *db = pParse->db; /* Database handle */ + Index *pIdx; /* An index to being analyzed */ + int iIdxCur; /* Cursor open on index being analyzed */ + int iTabCur; /* Table cursor */ + Vdbe *v; /* The virtual machine being built up */ + int i; /* Loop counter */ + int jZeroRows = -1; /* Jump from here if number of rows is zero */ + int iDb; /* Index of database containing pTab */ + u8 needTableCnt = 1; /* True to count the table */ + int regNewRowid = iMem++; /* Rowid for the inserted record */ + int regStat = iMem++; /* Register to hold StatAccum object */ + int regChng = iMem++; /* Index of changed index field */ + int regRowid = iMem++; /* Rowid argument passed to stat_push() */ + int regTemp = iMem++; /* Temporary use register */ + int regTemp2 = iMem++; /* Second temporary use register */ + int regTabname = iMem++; /* Register containing table name */ + int regIdxname = iMem++; /* Register containing index name */ + int regStat1 = iMem++; /* Value for the stat column of sqlite_stat1 */ + int regPrev = iMem; /* MUST BE LAST (see below) */ +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + Table *pStat1 = 0; +#endif + + pParse->nMem = MAX(pParse->nMem, iMem); + v = sqlite3GetVdbe(pParse); + if( v==0 || NEVER(pTab==0) ){ + return; + } + if( !IsOrdinaryTable(pTab) ){ + /* Do not gather statistics on views or virtual tables */ + return; + } + if( sqlite3_strlike("sqlite\\_%", pTab->zName, '\\')==0 ){ + /* Do not gather statistics on system tables */ + return; + } + assert( sqlite3BtreeHoldsAllMutexes(db) ); + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iDb>=0 ); + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); +#ifndef SQLITE_OMIT_AUTHORIZATION + if( sqlite3AuthCheck(pParse, SQLITE_ANALYZE, pTab->zName, 0, + db->aDb[iDb].zDbSName ) ){ + return; + } +#endif + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + if( db->xPreUpdateCallback ){ + pStat1 = (Table*)sqlite3DbMallocZero(db, sizeof(Table) + 13); + if( pStat1==0 ) return; + pStat1->zName = (char*)&pStat1[1]; + memcpy(pStat1->zName, "sqlite_stat1", 13); + pStat1->nCol = 3; + pStat1->iPKey = -1; + sqlite3VdbeAddOp4(pParse->pVdbe, OP_Noop, 0, 0, 0,(char*)pStat1,P4_DYNBLOB); + } +#endif + + /* Establish a read-lock on the table at the shared-cache level. + ** Open a read-only cursor on the table. Also allocate a cursor number + ** to use for scanning indexes (iIdxCur). No index cursor is opened at + ** this time though. */ + sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); + iTabCur = iTab++; + iIdxCur = iTab++; + pParse->nTab = MAX(pParse->nTab, iTab); + sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead); + sqlite3VdbeLoadString(v, regTabname, pTab->zName); + + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int nCol; /* Number of columns in pIdx. "N" */ + int addrRewind; /* Address of "OP_Rewind iIdxCur" */ + int addrNextRow; /* Address of "next_row:" */ + const char *zIdxName; /* Name of the index */ + int nColTest; /* Number of columns to test for changes */ + + if( pOnlyIdx && pOnlyIdx!=pIdx ) continue; + if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0; + if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){ + nCol = pIdx->nKeyCol; + zIdxName = pTab->zName; + nColTest = nCol - 1; + }else{ + nCol = pIdx->nColumn; + zIdxName = pIdx->zName; + nColTest = pIdx->uniqNotNull ? pIdx->nKeyCol-1 : nCol-1; + } + + /* Populate the register containing the index name. */ + sqlite3VdbeLoadString(v, regIdxname, zIdxName); + VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName)); + + /* + ** Pseudo-code for loop that calls stat_push(): + ** + ** Rewind csr + ** if eof(csr) goto end_of_scan; + ** regChng = 0 + ** goto chng_addr_0; + ** + ** next_row: + ** regChng = 0 + ** if( idx(0) != regPrev(0) ) goto chng_addr_0 + ** regChng = 1 + ** if( idx(1) != regPrev(1) ) goto chng_addr_1 + ** ... + ** regChng = N + ** goto chng_addr_N + ** + ** chng_addr_0: + ** regPrev(0) = idx(0) + ** chng_addr_1: + ** regPrev(1) = idx(1) + ** ... + ** + ** endDistinctTest: + ** regRowid = idx(rowid) + ** stat_push(P, regChng, regRowid) + ** Next csr + ** if !eof(csr) goto next_row; + ** + ** end_of_scan: + */ + + /* Make sure there are enough memory cells allocated to accommodate + ** the regPrev array and a trailing rowid (the rowid slot is required + ** when building a record to insert into the sample column of + ** the sqlite_stat4 table. */ + pParse->nMem = MAX(pParse->nMem, regPrev+nColTest); + + /* Open a read-only cursor on the index being analyzed. */ + assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) ); + sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + VdbeComment((v, "%s", pIdx->zName)); + + /* Invoke the stat_init() function. The arguments are: + ** + ** (1) the number of columns in the index including the rowid + ** (or for a WITHOUT ROWID table, the number of PK columns), + ** (2) the number of columns in the key without the rowid/pk + ** (3) estimated number of rows in the index, + */ + sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat+1); + assert( regRowid==regStat+2 ); + sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regRowid); +#ifdef SQLITE_ENABLE_STAT4 + if( OptimizationEnabled(db, SQLITE_Stat4) ){ + sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regTemp); + addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur); + VdbeCoverage(v); + }else +#endif + { + addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_Count, iIdxCur, regTemp, 1); + } + assert( regTemp2==regStat+4 ); + sqlite3VdbeAddOp2(v, OP_Integer, db->nAnalysisLimit, regTemp2); + sqlite3VdbeAddFunctionCall(pParse, 0, regStat+1, regStat, 4, + &statInitFuncdef, 0); + + /* Implementation of the following: + ** + ** Rewind csr + ** if eof(csr) goto end_of_scan; + ** regChng = 0 + ** goto next_push_0; + ** + */ + sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng); + addrNextRow = sqlite3VdbeCurrentAddr(v); + + if( nColTest>0 ){ + int endDistinctTest = sqlite3VdbeMakeLabel(pParse); + int *aGotoChng; /* Array of jump instruction addresses */ + aGotoChng = sqlite3DbMallocRawNN(db, sizeof(int)*nColTest); + if( aGotoChng==0 ) continue; + + /* + ** next_row: + ** regChng = 0 + ** if( idx(0) != regPrev(0) ) goto chng_addr_0 + ** regChng = 1 + ** if( idx(1) != regPrev(1) ) goto chng_addr_1 + ** ... + ** regChng = N + ** goto endDistinctTest + */ + sqlite3VdbeAddOp0(v, OP_Goto); + addrNextRow = sqlite3VdbeCurrentAddr(v); + if( nColTest==1 && pIdx->nKeyCol==1 && IsUniqueIndex(pIdx) ){ + /* For a single-column UNIQUE index, once we have found a non-NULL + ** row, we know that all the rest will be distinct, so skip + ** subsequent distinctness tests. */ + sqlite3VdbeAddOp2(v, OP_NotNull, regPrev, endDistinctTest); + VdbeCoverage(v); + } + for(i=0; iazColl[i]); + sqlite3VdbeAddOp2(v, OP_Integer, i, regChng); + sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp); + analyzeVdbeCommentIndexWithColumnName(v,pIdx,i); + aGotoChng[i] = + sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ); + sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); + VdbeCoverage(v); + } + sqlite3VdbeAddOp2(v, OP_Integer, nColTest, regChng); + sqlite3VdbeGoto(v, endDistinctTest); + + + /* + ** chng_addr_0: + ** regPrev(0) = idx(0) + ** chng_addr_1: + ** regPrev(1) = idx(1) + ** ... + */ + sqlite3VdbeJumpHere(v, addrNextRow-1); + for(i=0; ipTable); + int j, k, regKey; + regKey = sqlite3GetTempRange(pParse, pPk->nKeyCol); + for(j=0; jnKeyCol; j++){ + k = sqlite3TableColumnToIndex(pIdx, pPk->aiColumn[j]); + assert( k>=0 && knColumn ); + sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, regKey+j); + analyzeVdbeCommentIndexWithColumnName(v,pIdx,k); + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, regKey, pPk->nKeyCol, regRowid); + sqlite3ReleaseTempRange(pParse, regKey, pPk->nKeyCol); + } + } +#endif + assert( regChng==(regStat+1) ); + { + sqlite3VdbeAddFunctionCall(pParse, 1, regStat, regTemp, 2+IsStat4, + &statPushFuncdef, 0); + if( db->nAnalysisLimit ){ + int j1, j2, j3; + j1 = sqlite3VdbeAddOp1(v, OP_IsNull, regTemp); VdbeCoverage(v); + j2 = sqlite3VdbeAddOp1(v, OP_If, regTemp); VdbeCoverage(v); + j3 = sqlite3VdbeAddOp4Int(v, OP_SeekGT, iIdxCur, 0, regPrev, 1); + VdbeCoverage(v); + sqlite3VdbeJumpHere(v, j1); + sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, j2); + sqlite3VdbeJumpHere(v, j3); + }else{ + sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v); + } + } + + /* Add the entry to the stat1 table. */ + callStatGet(pParse, regStat, STAT_GET_STAT1, regStat1); + assert( "BBB"[0]==SQLITE_AFF_TEXT ); + sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regTemp, "BBB", 0); + sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid); + sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regTemp, regNewRowid); +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + sqlite3VdbeChangeP4(v, -1, (char*)pStat1, P4_TABLE); +#endif + sqlite3VdbeChangeP5(v, OPFLAG_APPEND); + + /* Add the entries to the stat4 table. */ +#ifdef SQLITE_ENABLE_STAT4 + if( OptimizationEnabled(db, SQLITE_Stat4) && db->nAnalysisLimit==0 ){ + int regEq = regStat1; + int regLt = regStat1+1; + int regDLt = regStat1+2; + int regSample = regStat1+3; + int regCol = regStat1+4; + int regSampleRowid = regCol + nCol; + int addrNext; + int addrIsNull; + u8 seekOp = HasRowid(pTab) ? OP_NotExists : OP_NotFound; + + pParse->nMem = MAX(pParse->nMem, regCol+nCol); + + addrNext = sqlite3VdbeCurrentAddr(v); + callStatGet(pParse, regStat, STAT_GET_ROWID, regSampleRowid); + addrIsNull = sqlite3VdbeAddOp1(v, OP_IsNull, regSampleRowid); + VdbeCoverage(v); + callStatGet(pParse, regStat, STAT_GET_NEQ, regEq); + callStatGet(pParse, regStat, STAT_GET_NLT, regLt); + callStatGet(pParse, regStat, STAT_GET_NDLT, regDLt); + sqlite3VdbeAddOp4Int(v, seekOp, iTabCur, addrNext, regSampleRowid, 0); + VdbeCoverage(v); + for(i=0; izName)); + sqlite3VdbeAddOp2(v, OP_Count, iTabCur, regStat1); + jZeroRows = sqlite3VdbeAddOp1(v, OP_IfNot, regStat1); VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_Null, 0, regIdxname); + assert( "BBB"[0]==SQLITE_AFF_TEXT ); + sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regTemp, "BBB", 0); + sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid); + sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regTemp, regNewRowid); + sqlite3VdbeChangeP5(v, OPFLAG_APPEND); +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + sqlite3VdbeChangeP4(v, -1, (char*)pStat1, P4_TABLE); +#endif + sqlite3VdbeJumpHere(v, jZeroRows); + } +} + + +/* +** Generate code that will cause the most recent index analysis to +** be loaded into internal hash tables where is can be used. +*/ +static void loadAnalysis(Parse *pParse, int iDb){ + Vdbe *v = sqlite3GetVdbe(pParse); + if( v ){ + sqlite3VdbeAddOp1(v, OP_LoadAnalysis, iDb); + } +} + +/* +** Generate code that will do an analysis of an entire database +*/ +static void analyzeDatabase(Parse *pParse, int iDb){ + sqlite3 *db = pParse->db; + Schema *pSchema = db->aDb[iDb].pSchema; /* Schema of database iDb */ + HashElem *k; + int iStatCur; + int iMem; + int iTab; + + sqlite3BeginWriteOperation(pParse, 0, iDb); + iStatCur = pParse->nTab; + pParse->nTab += 3; + openStatTable(pParse, iDb, iStatCur, 0, 0); + iMem = pParse->nMem+1; + iTab = pParse->nTab; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ + Table *pTab = (Table*)sqliteHashData(k); + analyzeOneTable(pParse, pTab, 0, iStatCur, iMem, iTab); + } + loadAnalysis(pParse, iDb); +} + +/* +** Generate code that will do an analysis of a single table in +** a database. If pOnlyIdx is not NULL then it is a single index +** in pTab that should be analyzed. +*/ +static void analyzeTable(Parse *pParse, Table *pTab, Index *pOnlyIdx){ + int iDb; + int iStatCur; + + assert( pTab!=0 ); + assert( sqlite3BtreeHoldsAllMutexes(pParse->db) ); + iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + sqlite3BeginWriteOperation(pParse, 0, iDb); + iStatCur = pParse->nTab; + pParse->nTab += 3; + if( pOnlyIdx ){ + openStatTable(pParse, iDb, iStatCur, pOnlyIdx->zName, "idx"); + }else{ + openStatTable(pParse, iDb, iStatCur, pTab->zName, "tbl"); + } + analyzeOneTable(pParse, pTab, pOnlyIdx, iStatCur,pParse->nMem+1,pParse->nTab); + loadAnalysis(pParse, iDb); +} + +/* +** Generate code for the ANALYZE command. The parser calls this routine +** when it recognizes an ANALYZE command. +** +** ANALYZE -- 1 +** ANALYZE -- 2 +** ANALYZE ?.? -- 3 +** +** Form 1 causes all indices in all attached databases to be analyzed. +** Form 2 analyzes all indices the single database named. +** Form 3 analyzes all indices associated with the named table. +*/ +SQLITE_PRIVATE void sqlite3Analyze(Parse *pParse, Token *pName1, Token *pName2){ + sqlite3 *db = pParse->db; + int iDb; + int i; + char *z, *zDb; + Table *pTab; + Index *pIdx; + Token *pTableName; + Vdbe *v; + + /* Read the database schema. If an error occurs, leave an error message + ** and code in pParse and return NULL. */ + assert( sqlite3BtreeHoldsAllMutexes(pParse->db) ); + if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ + return; + } + + assert( pName2!=0 || pName1==0 ); + if( pName1==0 ){ + /* Form 1: Analyze everything */ + for(i=0; inDb; i++){ + if( i==1 ) continue; /* Do not analyze the TEMP database */ + analyzeDatabase(pParse, i); + } + }else if( pName2->n==0 && (iDb = sqlite3FindDb(db, pName1))>=0 ){ + /* Analyze the schema named as the argument */ + analyzeDatabase(pParse, iDb); + }else{ + /* Form 3: Analyze the table or index named as an argument */ + iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pTableName); + if( iDb>=0 ){ + zDb = pName2->n ? db->aDb[iDb].zDbSName : 0; + z = sqlite3NameFromToken(db, pTableName); + if( z ){ + if( (pIdx = sqlite3FindIndex(db, z, zDb))!=0 ){ + analyzeTable(pParse, pIdx->pTable, pIdx); + }else if( (pTab = sqlite3LocateTable(pParse, 0, z, zDb))!=0 ){ + analyzeTable(pParse, pTab, 0); + } + sqlite3DbFree(db, z); + } + } + } + if( db->nSqlExec==0 && (v = sqlite3GetVdbe(pParse))!=0 ){ + sqlite3VdbeAddOp0(v, OP_Expire); + } +} + +/* +** Used to pass information from the analyzer reader through to the +** callback routine. +*/ +typedef struct analysisInfo analysisInfo; +struct analysisInfo { + sqlite3 *db; + const char *zDatabase; +}; + +/* +** The first argument points to a nul-terminated string containing a +** list of space separated integers. Read the first nOut of these into +** the array aOut[]. +*/ +static void decodeIntArray( + char *zIntArray, /* String containing int array to decode */ + int nOut, /* Number of slots in aOut[] */ + tRowcnt *aOut, /* Store integers here */ + LogEst *aLog, /* Or, if aOut==0, here */ + Index *pIndex /* Handle extra flags for this index, if not NULL */ +){ + char *z = zIntArray; + int c; + int i; + tRowcnt v; + +#ifdef SQLITE_ENABLE_STAT4 + if( z==0 ) z = ""; +#else + assert( z!=0 ); +#endif + for(i=0; *z && i='0' && c<='9' ){ + v = v*10 + c - '0'; + z++; + } +#ifdef SQLITE_ENABLE_STAT4 + if( aOut ) aOut[i] = v; + if( aLog ) aLog[i] = sqlite3LogEst(v); +#else + assert( aOut==0 ); + UNUSED_PARAMETER(aOut); + assert( aLog!=0 ); + aLog[i] = sqlite3LogEst(v); +#endif + if( *z==' ' ) z++; + } +#ifndef SQLITE_ENABLE_STAT4 + assert( pIndex!=0 ); { +#else + if( pIndex ){ +#endif + pIndex->bUnordered = 0; + pIndex->noSkipScan = 0; + while( z[0] ){ + if( sqlite3_strglob("unordered*", z)==0 ){ + pIndex->bUnordered = 1; + }else if( sqlite3_strglob("sz=[0-9]*", z)==0 ){ + int sz = sqlite3Atoi(z+3); + if( sz<2 ) sz = 2; + pIndex->szIdxRow = sqlite3LogEst(sz); + }else if( sqlite3_strglob("noskipscan*", z)==0 ){ + pIndex->noSkipScan = 1; + } +#ifdef SQLITE_ENABLE_COSTMULT + else if( sqlite3_strglob("costmult=[0-9]*",z)==0 ){ + pIndex->pTable->costMult = sqlite3LogEst(sqlite3Atoi(z+9)); + } +#endif + while( z[0]!=0 && z[0]!=' ' ) z++; + while( z[0]==' ' ) z++; + } + } +} + +/* +** This callback is invoked once for each index when reading the +** sqlite_stat1 table. +** +** argv[0] = name of the table +** argv[1] = name of the index (might be NULL) +** argv[2] = results of analysis - on integer for each column +** +** Entries for which argv[1]==NULL simply record the number of rows in +** the table. +*/ +static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){ + analysisInfo *pInfo = (analysisInfo*)pData; + Index *pIndex; + Table *pTable; + const char *z; + + assert( argc==3 ); + UNUSED_PARAMETER2(NotUsed, argc); + + if( argv==0 || argv[0]==0 || argv[2]==0 ){ + return 0; + } + pTable = sqlite3FindTable(pInfo->db, argv[0], pInfo->zDatabase); + if( pTable==0 ){ + return 0; + } + if( argv[1]==0 ){ + pIndex = 0; + }else if( sqlite3_stricmp(argv[0],argv[1])==0 ){ + pIndex = sqlite3PrimaryKeyIndex(pTable); + }else{ + pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase); + } + z = argv[2]; + + if( pIndex ){ + tRowcnt *aiRowEst = 0; + int nCol = pIndex->nKeyCol+1; +#ifdef SQLITE_ENABLE_STAT4 + /* Index.aiRowEst may already be set here if there are duplicate + ** sqlite_stat1 entries for this index. In that case just clobber + ** the old data with the new instead of allocating a new array. */ + if( pIndex->aiRowEst==0 ){ + pIndex->aiRowEst = (tRowcnt*)sqlite3MallocZero(sizeof(tRowcnt) * nCol); + if( pIndex->aiRowEst==0 ) sqlite3OomFault(pInfo->db); + } + aiRowEst = pIndex->aiRowEst; +#endif + pIndex->bUnordered = 0; + decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex); + pIndex->hasStat1 = 1; + if( pIndex->pPartIdxWhere==0 ){ + pTable->nRowLogEst = pIndex->aiRowLogEst[0]; + pTable->tabFlags |= TF_HasStat1; + } + }else{ + Index fakeIdx; + fakeIdx.szIdxRow = pTable->szTabRow; +#ifdef SQLITE_ENABLE_COSTMULT + fakeIdx.pTable = pTable; +#endif + decodeIntArray((char*)z, 1, 0, &pTable->nRowLogEst, &fakeIdx); + pTable->szTabRow = fakeIdx.szIdxRow; + pTable->tabFlags |= TF_HasStat1; + } + + return 0; +} + +/* +** If the Index.aSample variable is not NULL, delete the aSample[] array +** and its contents. +*/ +SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){ +#ifdef SQLITE_ENABLE_STAT4 + if( pIdx->aSample ){ + int j; + for(j=0; jnSample; j++){ + IndexSample *p = &pIdx->aSample[j]; + sqlite3DbFree(db, p->p); + } + sqlite3DbFree(db, pIdx->aSample); + } + if( db && db->pnBytesFreed==0 ){ + pIdx->nSample = 0; + pIdx->aSample = 0; + } +#else + UNUSED_PARAMETER(db); + UNUSED_PARAMETER(pIdx); +#endif /* SQLITE_ENABLE_STAT4 */ +} + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Populate the pIdx->aAvgEq[] array based on the samples currently +** stored in pIdx->aSample[]. +*/ +static void initAvgEq(Index *pIdx){ + if( pIdx ){ + IndexSample *aSample = pIdx->aSample; + IndexSample *pFinal = &aSample[pIdx->nSample-1]; + int iCol; + int nCol = 1; + if( pIdx->nSampleCol>1 ){ + /* If this is stat4 data, then calculate aAvgEq[] values for all + ** sample columns except the last. The last is always set to 1, as + ** once the trailing PK fields are considered all index keys are + ** unique. */ + nCol = pIdx->nSampleCol-1; + pIdx->aAvgEq[nCol] = 1; + } + for(iCol=0; iColnSample; + int i; /* Used to iterate through samples */ + tRowcnt sumEq = 0; /* Sum of the nEq values */ + tRowcnt avgEq = 0; + tRowcnt nRow; /* Number of rows in index */ + i64 nSum100 = 0; /* Number of terms contributing to sumEq */ + i64 nDist100; /* Number of distinct values in index */ + + if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){ + nRow = pFinal->anLt[iCol]; + nDist100 = (i64)100 * pFinal->anDLt[iCol]; + nSample--; + }else{ + nRow = pIdx->aiRowEst[0]; + nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1]; + } + pIdx->nRowEst0 = nRow; + + /* Set nSum to the number of distinct (iCol+1) field prefixes that + ** occur in the stat4 table for this index. Set sumEq to the sum of + ** the nEq values for column iCol for the same set (adding the value + ** only once where there exist duplicate prefixes). */ + for(i=0; inSample-1) + || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] + ){ + sumEq += aSample[i].anEq[iCol]; + nSum100 += 100; + } + } + + if( nDist100>nSum100 && sumEqaAvgEq[iCol] = avgEq; + } + } +} + +/* +** Look up an index by name. Or, if the name of a WITHOUT ROWID table +** is supplied instead, find the PRIMARY KEY index for that table. +*/ +static Index *findIndexOrPrimaryKey( + sqlite3 *db, + const char *zName, + const char *zDb +){ + Index *pIdx = sqlite3FindIndex(db, zName, zDb); + if( pIdx==0 ){ + Table *pTab = sqlite3FindTable(db, zName, zDb); + if( pTab && !HasRowid(pTab) ) pIdx = sqlite3PrimaryKeyIndex(pTab); + } + return pIdx; +} + +/* +** Load the content from either the sqlite_stat4 +** into the relevant Index.aSample[] arrays. +** +** Arguments zSql1 and zSql2 must point to SQL statements that return +** data equivalent to the following: +** +** zSql1: SELECT idx,count(*) FROM %Q.sqlite_stat4 GROUP BY idx +** zSql2: SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat4 +** +** where %Q is replaced with the database name before the SQL is executed. +*/ +static int loadStatTbl( + sqlite3 *db, /* Database handle */ + const char *zSql1, /* SQL statement 1 (see above) */ + const char *zSql2, /* SQL statement 2 (see above) */ + const char *zDb /* Database name (e.g. "main") */ +){ + int rc; /* Result codes from subroutines */ + sqlite3_stmt *pStmt = 0; /* An SQL statement being run */ + char *zSql; /* Text of the SQL statement */ + Index *pPrevIdx = 0; /* Previous index in the loop */ + IndexSample *pSample; /* A slot in pIdx->aSample[] */ + + assert( db->lookaside.bDisable ); + zSql = sqlite3MPrintf(db, zSql1, zDb); + if( !zSql ){ + return SQLITE_NOMEM_BKPT; + } + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + sqlite3DbFree(db, zSql); + if( rc ) return rc; + + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + int nIdxCol = 1; /* Number of columns in stat4 records */ + + char *zIndex; /* Index name */ + Index *pIdx; /* Pointer to the index object */ + int nSample; /* Number of samples */ + int nByte; /* Bytes of space required */ + int i; /* Bytes of space required */ + tRowcnt *pSpace; + + zIndex = (char *)sqlite3_column_text(pStmt, 0); + if( zIndex==0 ) continue; + nSample = sqlite3_column_int(pStmt, 1); + pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); + assert( pIdx==0 || pIdx->nSample==0 ); + if( pIdx==0 ) continue; + assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + nIdxCol = pIdx->nKeyCol; + }else{ + nIdxCol = pIdx->nColumn; + } + pIdx->nSampleCol = nIdxCol; + nByte = sizeof(IndexSample) * nSample; + nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + + pIdx->aSample = sqlite3DbMallocZero(db, nByte); + if( pIdx->aSample==0 ){ + sqlite3_finalize(pStmt); + return SQLITE_NOMEM_BKPT; + } + pSpace = (tRowcnt*)&pIdx->aSample[nSample]; + pIdx->aAvgEq = pSpace; pSpace += nIdxCol; + pIdx->pTable->tabFlags |= TF_HasStat4; + for(i=0; iaSample[i].anEq = pSpace; pSpace += nIdxCol; + pIdx->aSample[i].anLt = pSpace; pSpace += nIdxCol; + pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; + } + assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); + } + rc = sqlite3_finalize(pStmt); + if( rc ) return rc; + + zSql = sqlite3MPrintf(db, zSql2, zDb); + if( !zSql ){ + return SQLITE_NOMEM_BKPT; + } + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + sqlite3DbFree(db, zSql); + if( rc ) return rc; + + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + char *zIndex; /* Index name */ + Index *pIdx; /* Pointer to the index object */ + int nCol = 1; /* Number of columns in index */ + + zIndex = (char *)sqlite3_column_text(pStmt, 0); + if( zIndex==0 ) continue; + pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); + if( pIdx==0 ) continue; + /* This next condition is true if data has already been loaded from + ** the sqlite_stat4 table. */ + nCol = pIdx->nSampleCol; + if( pIdx!=pPrevIdx ){ + initAvgEq(pPrevIdx); + pPrevIdx = pIdx; + } + pSample = &pIdx->aSample[pIdx->nSample]; + decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0); + decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0); + decodeIntArray((char*)sqlite3_column_text(pStmt,3),nCol,pSample->anDLt,0,0); + + /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer. + ** This is in case the sample record is corrupted. In that case, the + ** sqlite3VdbeRecordCompare() may read up to two varints past the + ** end of the allocated buffer before it realizes it is dealing with + ** a corrupt record. Adding the two 0x00 bytes prevents this from causing + ** a buffer overread. */ + pSample->n = sqlite3_column_bytes(pStmt, 4); + pSample->p = sqlite3DbMallocZero(db, pSample->n + 2); + if( pSample->p==0 ){ + sqlite3_finalize(pStmt); + return SQLITE_NOMEM_BKPT; + } + if( pSample->n ){ + memcpy(pSample->p, sqlite3_column_blob(pStmt, 4), pSample->n); + } + pIdx->nSample++; + } + rc = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); + return rc; +} + +/* +** Load content from the sqlite_stat4 table into +** the Index.aSample[] arrays of all indices. +*/ +static int loadStat4(sqlite3 *db, const char *zDb){ + int rc = SQLITE_OK; /* Result codes from subroutines */ + const Table *pStat4; + + assert( db->lookaside.bDisable ); + if( (pStat4 = sqlite3FindTable(db, "sqlite_stat4", zDb))!=0 + && IsOrdinaryTable(pStat4) + ){ + rc = loadStatTbl(db, + "SELECT idx,count(*) FROM %Q.sqlite_stat4 GROUP BY idx", + "SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat4", + zDb + ); + } + return rc; +} +#endif /* SQLITE_ENABLE_STAT4 */ + +/* +** Load the content of the sqlite_stat1 and sqlite_stat4 tables. The +** contents of sqlite_stat1 are used to populate the Index.aiRowEst[] +** arrays. The contents of sqlite_stat4 are used to populate the +** Index.aSample[] arrays. +** +** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR +** is returned. In this case, even if SQLITE_ENABLE_STAT4 was defined +** during compilation and the sqlite_stat4 table is present, no data is +** read from it. +** +** If SQLITE_ENABLE_STAT4 was defined during compilation and the +** sqlite_stat4 table is not present in the database, SQLITE_ERROR is +** returned. However, in this case, data is read from the sqlite_stat1 +** table (if it is present) before returning. +** +** If an OOM error occurs, this function always sets db->mallocFailed. +** This means if the caller does not care about other errors, the return +** code may be ignored. +*/ +SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ + analysisInfo sInfo; + HashElem *i; + char *zSql; + int rc = SQLITE_OK; + Schema *pSchema = db->aDb[iDb].pSchema; + const Table *pStat1; + + assert( iDb>=0 && iDbnDb ); + assert( db->aDb[iDb].pBt!=0 ); + + /* Clear any prior statistics */ + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + for(i=sqliteHashFirst(&pSchema->tblHash); i; i=sqliteHashNext(i)){ + Table *pTab = sqliteHashData(i); + pTab->tabFlags &= ~TF_HasStat1; + } + for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ + Index *pIdx = sqliteHashData(i); + pIdx->hasStat1 = 0; +#ifdef SQLITE_ENABLE_STAT4 + sqlite3DeleteIndexSamples(db, pIdx); + pIdx->aSample = 0; +#endif + } + + /* Load new statistics out of the sqlite_stat1 table */ + sInfo.db = db; + sInfo.zDatabase = db->aDb[iDb].zDbSName; + if( (pStat1 = sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)) + && IsOrdinaryTable(pStat1) + ){ + zSql = sqlite3MPrintf(db, + "SELECT tbl,idx,stat FROM %Q.sqlite_stat1", sInfo.zDatabase); + if( zSql==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + rc = sqlite3_exec(db, zSql, analysisLoader, &sInfo, 0); + sqlite3DbFree(db, zSql); + } + } + + /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ + Index *pIdx = sqliteHashData(i); + if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); + } + + /* Load the statistics from the sqlite_stat4 table. */ +#ifdef SQLITE_ENABLE_STAT4 + if( rc==SQLITE_OK ){ + DisableLookaside; + rc = loadStat4(db, sInfo.zDatabase); + EnableLookaside; + } + for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ + Index *pIdx = sqliteHashData(i); + sqlite3_free(pIdx->aiRowEst); + pIdx->aiRowEst = 0; + } +#endif + + if( rc==SQLITE_NOMEM ){ + sqlite3OomFault(db); + } + return rc; +} + + +#endif /* SQLITE_OMIT_ANALYZE */ + +/************** End of analyze.c *********************************************/ +/************** Begin file attach.c ******************************************/ +/* +** 2003 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used to implement the ATTACH and DETACH commands. +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_ATTACH +/* +** Resolve an expression that was part of an ATTACH or DETACH statement. This +** is slightly different from resolving a normal SQL expression, because simple +** identifiers are treated as strings, not possible column names or aliases. +** +** i.e. if the parser sees: +** +** ATTACH DATABASE abc AS def +** +** it treats the two expressions as literal strings 'abc' and 'def' instead of +** looking for columns of the same name. +** +** This only applies to the root node of pExpr, so the statement: +** +** ATTACH DATABASE abc||def AS 'db2' +** +** will fail because neither abc or def can be resolved. +*/ +static int resolveAttachExpr(NameContext *pName, Expr *pExpr) +{ + int rc = SQLITE_OK; + if( pExpr ){ + if( pExpr->op!=TK_ID ){ + rc = sqlite3ResolveExprNames(pName, pExpr); + }else{ + pExpr->op = TK_STRING; + } + } + return rc; +} + +/* +** Return true if zName points to a name that may be used to refer to +** database iDb attached to handle db. +*/ +SQLITE_PRIVATE int sqlite3DbIsNamed(sqlite3 *db, int iDb, const char *zName){ + return ( + sqlite3StrICmp(db->aDb[iDb].zDbSName, zName)==0 + || (iDb==0 && sqlite3StrICmp("main", zName)==0) + ); +} + +/* +** An SQL user-function registered to do the work of an ATTACH statement. The +** three arguments to the function come directly from an attach statement: +** +** ATTACH DATABASE x AS y KEY z +** +** SELECT sqlite_attach(x, y, z) +** +** If the optional "KEY z" syntax is omitted, an SQL NULL is passed as the +** third argument. +** +** If the db->init.reopenMemdb flags is set, then instead of attaching a +** new database, close the database on db->init.iDb and reopen it as an +** empty MemDB. +*/ +static void attachFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + int i; + int rc = 0; + sqlite3 *db = sqlite3_context_db_handle(context); + const char *zName; + const char *zFile; + char *zPath = 0; + char *zErr = 0; + unsigned int flags; + Db *aNew; /* New array of Db pointers */ + Db *pNew; /* Db object for the newly attached database */ + char *zErrDyn = 0; + sqlite3_vfs *pVfs; + + UNUSED_PARAMETER(NotUsed); + zFile = (const char *)sqlite3_value_text(argv[0]); + zName = (const char *)sqlite3_value_text(argv[1]); + if( zFile==0 ) zFile = ""; + if( zName==0 ) zName = ""; + +#ifndef SQLITE_OMIT_DESERIALIZE +# define REOPEN_AS_MEMDB(db) (db->init.reopenMemdb) +#else +# define REOPEN_AS_MEMDB(db) (0) +#endif + + if( REOPEN_AS_MEMDB(db) ){ + /* This is not a real ATTACH. Instead, this routine is being called + ** from sqlite3_deserialize() to close database db->init.iDb and + ** reopen it as a MemDB */ + pVfs = sqlite3_vfs_find("memdb"); + if( pVfs==0 ) return; + pNew = &db->aDb[db->init.iDb]; + if( pNew->pBt ) sqlite3BtreeClose(pNew->pBt); + pNew->pBt = 0; + pNew->pSchema = 0; + rc = sqlite3BtreeOpen(pVfs, "x\0", db, &pNew->pBt, 0, SQLITE_OPEN_MAIN_DB); + }else{ + /* This is a real ATTACH + ** + ** Check for the following errors: + ** + ** * Too many attached databases, + ** * Transaction currently open + ** * Specified database name already being used. + */ + if( db->nDb>=db->aLimit[SQLITE_LIMIT_ATTACHED]+2 ){ + zErrDyn = sqlite3MPrintf(db, "too many attached databases - max %d", + db->aLimit[SQLITE_LIMIT_ATTACHED] + ); + goto attach_error; + } + for(i=0; inDb; i++){ + assert( zName ); + if( sqlite3DbIsNamed(db, i, zName) ){ + zErrDyn = sqlite3MPrintf(db, "database %s is already in use", zName); + goto attach_error; + } + } + + /* Allocate the new entry in the db->aDb[] array and initialize the schema + ** hash tables. + */ + if( db->aDb==db->aDbStatic ){ + aNew = sqlite3DbMallocRawNN(db, sizeof(db->aDb[0])*3 ); + if( aNew==0 ) return; + memcpy(aNew, db->aDb, sizeof(db->aDb[0])*2); + }else{ + aNew = sqlite3DbRealloc(db, db->aDb, sizeof(db->aDb[0])*(db->nDb+1) ); + if( aNew==0 ) return; + } + db->aDb = aNew; + pNew = &db->aDb[db->nDb]; + memset(pNew, 0, sizeof(*pNew)); + + /* Open the database file. If the btree is successfully opened, use + ** it to obtain the database schema. At this point the schema may + ** or may not be initialized. + */ + flags = db->openFlags; + rc = sqlite3ParseUri(db->pVfs->zName, zFile, &flags, &pVfs, &zPath, &zErr); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ) sqlite3OomFault(db); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); + return; + } + assert( pVfs ); + flags |= SQLITE_OPEN_MAIN_DB; + rc = sqlite3BtreeOpen(pVfs, zPath, db, &pNew->pBt, 0, flags); + db->nDb++; + pNew->zDbSName = sqlite3DbStrDup(db, zName); + } + db->noSharedCache = 0; + if( rc==SQLITE_CONSTRAINT ){ + rc = SQLITE_ERROR; + zErrDyn = sqlite3MPrintf(db, "database is already attached"); + }else if( rc==SQLITE_OK ){ + Pager *pPager; + pNew->pSchema = sqlite3SchemaGet(db, pNew->pBt); + if( !pNew->pSchema ){ + rc = SQLITE_NOMEM_BKPT; + }else if( pNew->pSchema->file_format && pNew->pSchema->enc!=ENC(db) ){ + zErrDyn = sqlite3MPrintf(db, + "attached databases must use the same text encoding as main database"); + rc = SQLITE_ERROR; + } + sqlite3BtreeEnter(pNew->pBt); + pPager = sqlite3BtreePager(pNew->pBt); + sqlite3PagerLockingMode(pPager, db->dfltLockMode); + sqlite3BtreeSecureDelete(pNew->pBt, + sqlite3BtreeSecureDelete(db->aDb[0].pBt,-1) ); +#ifndef SQLITE_OMIT_PAGER_PRAGMAS + sqlite3BtreeSetPagerFlags(pNew->pBt, + PAGER_SYNCHRONOUS_FULL | (db->flags & PAGER_FLAGS_MASK)); +#endif + sqlite3BtreeLeave(pNew->pBt); + } + pNew->safety_level = SQLITE_DEFAULT_SYNCHRONOUS+1; + if( rc==SQLITE_OK && pNew->zDbSName==0 ){ + rc = SQLITE_NOMEM_BKPT; + } + sqlite3_free_filename( zPath ); + + /* If the file was opened successfully, read the schema for the new database. + ** If this fails, or if opening the file failed, then close the file and + ** remove the entry from the db->aDb[] array. i.e. put everything back the + ** way we found it. + */ + if( rc==SQLITE_OK ){ + sqlite3BtreeEnterAll(db); + db->init.iDb = 0; + db->mDbFlags &= ~(DBFLAG_SchemaKnownOk); + if( !REOPEN_AS_MEMDB(db) ){ + rc = sqlite3Init(db, &zErrDyn); + } + sqlite3BtreeLeaveAll(db); + assert( zErrDyn==0 || rc!=SQLITE_OK ); + } +#ifdef SQLITE_USER_AUTHENTICATION + if( rc==SQLITE_OK && !REOPEN_AS_MEMDB(db) ){ + u8 newAuth = 0; + rc = sqlite3UserAuthCheckLogin(db, zName, &newAuth); + if( newAuthauth.authLevel ){ + rc = SQLITE_AUTH_USER; + } + } +#endif + if( rc ){ + if( !REOPEN_AS_MEMDB(db) ){ + int iDb = db->nDb - 1; + assert( iDb>=2 ); + if( db->aDb[iDb].pBt ){ + sqlite3BtreeClose(db->aDb[iDb].pBt); + db->aDb[iDb].pBt = 0; + db->aDb[iDb].pSchema = 0; + } + sqlite3ResetAllSchemasOfConnection(db); + db->nDb = iDb; + if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ + sqlite3OomFault(db); + sqlite3DbFree(db, zErrDyn); + zErrDyn = sqlite3MPrintf(db, "out of memory"); + }else if( zErrDyn==0 ){ + zErrDyn = sqlite3MPrintf(db, "unable to open database: %s", zFile); + } + } + goto attach_error; + } + + return; + +attach_error: + /* Return an error if we get here */ + if( zErrDyn ){ + sqlite3_result_error(context, zErrDyn, -1); + sqlite3DbFree(db, zErrDyn); + } + if( rc ) sqlite3_result_error_code(context, rc); +} + +/* +** An SQL user-function registered to do the work of an DETACH statement. The +** three arguments to the function come directly from a detach statement: +** +** DETACH DATABASE x +** +** SELECT sqlite_detach(x) +*/ +static void detachFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + const char *zName = (const char *)sqlite3_value_text(argv[0]); + sqlite3 *db = sqlite3_context_db_handle(context); + int i; + Db *pDb = 0; + HashElem *pEntry; + char zErr[128]; + + UNUSED_PARAMETER(NotUsed); + + if( zName==0 ) zName = ""; + for(i=0; inDb; i++){ + pDb = &db->aDb[i]; + if( pDb->pBt==0 ) continue; + if( sqlite3DbIsNamed(db, i, zName) ) break; + } + + if( i>=db->nDb ){ + sqlite3_snprintf(sizeof(zErr),zErr, "no such database: %s", zName); + goto detach_error; + } + if( i<2 ){ + sqlite3_snprintf(sizeof(zErr),zErr, "cannot detach database %s", zName); + goto detach_error; + } + if( sqlite3BtreeTxnState(pDb->pBt)!=SQLITE_TXN_NONE + || sqlite3BtreeIsInBackup(pDb->pBt) + ){ + sqlite3_snprintf(sizeof(zErr),zErr, "database %s is locked", zName); + goto detach_error; + } + + /* If any TEMP triggers reference the schema being detached, move those + ** triggers to reference the TEMP schema itself. */ + assert( db->aDb[1].pSchema ); + pEntry = sqliteHashFirst(&db->aDb[1].pSchema->trigHash); + while( pEntry ){ + Trigger *pTrig = (Trigger*)sqliteHashData(pEntry); + if( pTrig->pTabSchema==pDb->pSchema ){ + pTrig->pTabSchema = pTrig->pSchema; + } + pEntry = sqliteHashNext(pEntry); + } + + sqlite3BtreeClose(pDb->pBt); + pDb->pBt = 0; + pDb->pSchema = 0; + sqlite3CollapseDatabaseArray(db); + return; + +detach_error: + sqlite3_result_error(context, zErr, -1); +} + +/* +** This procedure generates VDBE code for a single invocation of either the +** sqlite_detach() or sqlite_attach() SQL user functions. +*/ +static void codeAttach( + Parse *pParse, /* The parser context */ + int type, /* Either SQLITE_ATTACH or SQLITE_DETACH */ + FuncDef const *pFunc,/* FuncDef wrapper for detachFunc() or attachFunc() */ + Expr *pAuthArg, /* Expression to pass to authorization callback */ + Expr *pFilename, /* Name of database file */ + Expr *pDbname, /* Name of the database to use internally */ + Expr *pKey /* Database key for encryption extension */ +){ + int rc; + NameContext sName; + Vdbe *v; + sqlite3* db = pParse->db; + int regArgs; + + if( pParse->nErr ) goto attach_end; + memset(&sName, 0, sizeof(NameContext)); + sName.pParse = pParse; + + if( + SQLITE_OK!=resolveAttachExpr(&sName, pFilename) || + SQLITE_OK!=resolveAttachExpr(&sName, pDbname) || + SQLITE_OK!=resolveAttachExpr(&sName, pKey) + ){ + goto attach_end; + } + +#ifndef SQLITE_OMIT_AUTHORIZATION + if( ALWAYS(pAuthArg) ){ + char *zAuthArg; + if( pAuthArg->op==TK_STRING ){ + assert( !ExprHasProperty(pAuthArg, EP_IntValue) ); + zAuthArg = pAuthArg->u.zToken; + }else{ + zAuthArg = 0; + } + rc = sqlite3AuthCheck(pParse, type, zAuthArg, 0, 0); + if(rc!=SQLITE_OK ){ + goto attach_end; + } + } +#endif /* SQLITE_OMIT_AUTHORIZATION */ + + + v = sqlite3GetVdbe(pParse); + regArgs = sqlite3GetTempRange(pParse, 4); + sqlite3ExprCode(pParse, pFilename, regArgs); + sqlite3ExprCode(pParse, pDbname, regArgs+1); + sqlite3ExprCode(pParse, pKey, regArgs+2); + + assert( v || db->mallocFailed ); + if( v ){ + sqlite3VdbeAddFunctionCall(pParse, 0, regArgs+3-pFunc->nArg, regArgs+3, + pFunc->nArg, pFunc, 0); + /* Code an OP_Expire. For an ATTACH statement, set P1 to true (expire this + ** statement only). For DETACH, set it to false (expire all existing + ** statements). + */ + sqlite3VdbeAddOp1(v, OP_Expire, (type==SQLITE_ATTACH)); + } + +attach_end: + sqlite3ExprDelete(db, pFilename); + sqlite3ExprDelete(db, pDbname); + sqlite3ExprDelete(db, pKey); +} + +/* +** Called by the parser to compile a DETACH statement. +** +** DETACH pDbname +*/ +SQLITE_PRIVATE void sqlite3Detach(Parse *pParse, Expr *pDbname){ + static const FuncDef detach_func = { + 1, /* nArg */ + SQLITE_UTF8, /* funcFlags */ + 0, /* pUserData */ + 0, /* pNext */ + detachFunc, /* xSFunc */ + 0, /* xFinalize */ + 0, 0, /* xValue, xInverse */ + "sqlite_detach", /* zName */ + {0} + }; + codeAttach(pParse, SQLITE_DETACH, &detach_func, pDbname, 0, 0, pDbname); +} + +/* +** Called by the parser to compile an ATTACH statement. +** +** ATTACH p AS pDbname KEY pKey +*/ +SQLITE_PRIVATE void sqlite3Attach(Parse *pParse, Expr *p, Expr *pDbname, Expr *pKey){ + static const FuncDef attach_func = { + 3, /* nArg */ + SQLITE_UTF8, /* funcFlags */ + 0, /* pUserData */ + 0, /* pNext */ + attachFunc, /* xSFunc */ + 0, /* xFinalize */ + 0, 0, /* xValue, xInverse */ + "sqlite_attach", /* zName */ + {0} + }; + codeAttach(pParse, SQLITE_ATTACH, &attach_func, p, p, pDbname, pKey); +} +#endif /* SQLITE_OMIT_ATTACH */ + +/* +** Expression callback used by sqlite3FixAAAA() routines. +*/ +static int fixExprCb(Walker *p, Expr *pExpr){ + DbFixer *pFix = p->u.pFix; + if( !pFix->bTemp ) ExprSetProperty(pExpr, EP_FromDDL); + if( pExpr->op==TK_VARIABLE ){ + if( pFix->pParse->db->init.busy ){ + pExpr->op = TK_NULL; + }else{ + sqlite3ErrorMsg(pFix->pParse, "%s cannot use variables", pFix->zType); + return WRC_Abort; + } + } + return WRC_Continue; +} + +/* +** Select callback used by sqlite3FixAAAA() routines. +*/ +static int fixSelectCb(Walker *p, Select *pSelect){ + DbFixer *pFix = p->u.pFix; + int i; + SrcItem *pItem; + sqlite3 *db = pFix->pParse->db; + int iDb = sqlite3FindDbName(db, pFix->zDb); + SrcList *pList = pSelect->pSrc; + + if( NEVER(pList==0) ) return WRC_Continue; + for(i=0, pItem=pList->a; inSrc; i++, pItem++){ + if( pFix->bTemp==0 ){ + if( pItem->zDatabase ){ + if( iDb!=sqlite3FindDbName(db, pItem->zDatabase) ){ + sqlite3ErrorMsg(pFix->pParse, + "%s %T cannot reference objects in database %s", + pFix->zType, pFix->pName, pItem->zDatabase); + return WRC_Abort; + } + sqlite3DbFree(db, pItem->zDatabase); + pItem->zDatabase = 0; + pItem->fg.notCte = 1; + } + pItem->pSchema = pFix->pSchema; + pItem->fg.fromDDL = 1; + } +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_TRIGGER) + if( sqlite3WalkExpr(&pFix->w, pList->a[i].pOn) ) return WRC_Abort; +#endif + } + if( pSelect->pWith ){ + for(i=0; ipWith->nCte; i++){ + if( sqlite3WalkSelect(p, pSelect->pWith->a[i].pSelect) ){ + return WRC_Abort; + } + } + } + return WRC_Continue; +} + +/* +** Initialize a DbFixer structure. This routine must be called prior +** to passing the structure to one of the sqliteFixAAAA() routines below. +*/ +SQLITE_PRIVATE void sqlite3FixInit( + DbFixer *pFix, /* The fixer to be initialized */ + Parse *pParse, /* Error messages will be written here */ + int iDb, /* This is the database that must be used */ + const char *zType, /* "view", "trigger", or "index" */ + const Token *pName /* Name of the view, trigger, or index */ +){ + sqlite3 *db = pParse->db; + assert( db->nDb>iDb ); + pFix->pParse = pParse; + pFix->zDb = db->aDb[iDb].zDbSName; + pFix->pSchema = db->aDb[iDb].pSchema; + pFix->zType = zType; + pFix->pName = pName; + pFix->bTemp = (iDb==1); + pFix->w.pParse = pParse; + pFix->w.xExprCallback = fixExprCb; + pFix->w.xSelectCallback = fixSelectCb; + pFix->w.xSelectCallback2 = sqlite3WalkWinDefnDummyCallback; + pFix->w.walkerDepth = 0; + pFix->w.eCode = 0; + pFix->w.u.pFix = pFix; +} + +/* +** The following set of routines walk through the parse tree and assign +** a specific database to all table references where the database name +** was left unspecified in the original SQL statement. The pFix structure +** must have been initialized by a prior call to sqlite3FixInit(). +** +** These routines are used to make sure that an index, trigger, or +** view in one database does not refer to objects in a different database. +** (Exception: indices, triggers, and views in the TEMP database are +** allowed to refer to anything.) If a reference is explicitly made +** to an object in a different database, an error message is added to +** pParse->zErrMsg and these routines return non-zero. If everything +** checks out, these routines return 0. +*/ +SQLITE_PRIVATE int sqlite3FixSrcList( + DbFixer *pFix, /* Context of the fixation */ + SrcList *pList /* The Source list to check and modify */ +){ + int res = 0; + if( pList ){ + Select s; + memset(&s, 0, sizeof(s)); + s.pSrc = pList; + res = sqlite3WalkSelect(&pFix->w, &s); + } + return res; +} +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_TRIGGER) +SQLITE_PRIVATE int sqlite3FixSelect( + DbFixer *pFix, /* Context of the fixation */ + Select *pSelect /* The SELECT statement to be fixed to one database */ +){ + return sqlite3WalkSelect(&pFix->w, pSelect); +} +SQLITE_PRIVATE int sqlite3FixExpr( + DbFixer *pFix, /* Context of the fixation */ + Expr *pExpr /* The expression to be fixed to one database */ +){ + return sqlite3WalkExpr(&pFix->w, pExpr); +} +#endif + +#ifndef SQLITE_OMIT_TRIGGER +SQLITE_PRIVATE int sqlite3FixTriggerStep( + DbFixer *pFix, /* Context of the fixation */ + TriggerStep *pStep /* The trigger step be fixed to one database */ +){ + while( pStep ){ + if( sqlite3WalkSelect(&pFix->w, pStep->pSelect) + || sqlite3WalkExpr(&pFix->w, pStep->pWhere) + || sqlite3WalkExprList(&pFix->w, pStep->pExprList) + || sqlite3FixSrcList(pFix, pStep->pFrom) + ){ + return 1; + } +#ifndef SQLITE_OMIT_UPSERT + { + Upsert *pUp; + for(pUp=pStep->pUpsert; pUp; pUp=pUp->pNextUpsert){ + if( sqlite3WalkExprList(&pFix->w, pUp->pUpsertTarget) + || sqlite3WalkExpr(&pFix->w, pUp->pUpsertTargetWhere) + || sqlite3WalkExprList(&pFix->w, pUp->pUpsertSet) + || sqlite3WalkExpr(&pFix->w, pUp->pUpsertWhere) + ){ + return 1; + } + } + } +#endif + pStep = pStep->pNext; + } + + return 0; +} +#endif + +/************** End of attach.c **********************************************/ +/************** Begin file auth.c ********************************************/ +/* +** 2003 January 11 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used to implement the sqlite3_set_authorizer() +** API. This facility is an optional feature of the library. Embedded +** systems that do not need this facility may omit it by recompiling +** the library with -DSQLITE_OMIT_AUTHORIZATION=1 +*/ +/* #include "sqliteInt.h" */ + +/* +** All of the code in this file may be omitted by defining a single +** macro. +*/ +#ifndef SQLITE_OMIT_AUTHORIZATION + +/* +** Set or clear the access authorization function. +** +** The access authorization function is be called during the compilation +** phase to verify that the user has read and/or write access permission on +** various fields of the database. The first argument to the auth function +** is a copy of the 3rd argument to this routine. The second argument +** to the auth function is one of these constants: +** +** SQLITE_CREATE_INDEX +** SQLITE_CREATE_TABLE +** SQLITE_CREATE_TEMP_INDEX +** SQLITE_CREATE_TEMP_TABLE +** SQLITE_CREATE_TEMP_TRIGGER +** SQLITE_CREATE_TEMP_VIEW +** SQLITE_CREATE_TRIGGER +** SQLITE_CREATE_VIEW +** SQLITE_DELETE +** SQLITE_DROP_INDEX +** SQLITE_DROP_TABLE +** SQLITE_DROP_TEMP_INDEX +** SQLITE_DROP_TEMP_TABLE +** SQLITE_DROP_TEMP_TRIGGER +** SQLITE_DROP_TEMP_VIEW +** SQLITE_DROP_TRIGGER +** SQLITE_DROP_VIEW +** SQLITE_INSERT +** SQLITE_PRAGMA +** SQLITE_READ +** SQLITE_SELECT +** SQLITE_TRANSACTION +** SQLITE_UPDATE +** +** The third and fourth arguments to the auth function are the name of +** the table and the column that are being accessed. The auth function +** should return either SQLITE_OK, SQLITE_DENY, or SQLITE_IGNORE. If +** SQLITE_OK is returned, it means that access is allowed. SQLITE_DENY +** means that the SQL statement will never-run - the sqlite3_exec() call +** will return with an error. SQLITE_IGNORE means that the SQL statement +** should run but attempts to read the specified column will return NULL +** and attempts to write the column will be ignored. +** +** Setting the auth function to NULL disables this hook. The default +** setting of the auth function is NULL. +*/ +SQLITE_API int sqlite3_set_authorizer( + sqlite3 *db, + int (*xAuth)(void*,int,const char*,const char*,const char*,const char*), + void *pArg +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->xAuth = (sqlite3_xauth)xAuth; + db->pAuthArg = pArg; + if( db->xAuth ) sqlite3ExpirePreparedStatements(db, 1); + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +/* +** Write an error message into pParse->zErrMsg that explains that the +** user-supplied authorization function returned an illegal value. +*/ +static void sqliteAuthBadReturnCode(Parse *pParse){ + sqlite3ErrorMsg(pParse, "authorizer malfunction"); + pParse->rc = SQLITE_ERROR; +} + +/* +** Invoke the authorization callback for permission to read column zCol from +** table zTab in database zDb. This function assumes that an authorization +** callback has been registered (i.e. that sqlite3.xAuth is not NULL). +** +** If SQLITE_IGNORE is returned and pExpr is not NULL, then pExpr is changed +** to an SQL NULL expression. Otherwise, if pExpr is NULL, then SQLITE_IGNORE +** is treated as SQLITE_DENY. In this case an error is left in pParse. +*/ +SQLITE_PRIVATE int sqlite3AuthReadCol( + Parse *pParse, /* The parser context */ + const char *zTab, /* Table name */ + const char *zCol, /* Column name */ + int iDb /* Index of containing database. */ +){ + sqlite3 *db = pParse->db; /* Database handle */ + char *zDb = db->aDb[iDb].zDbSName; /* Schema name of attached database */ + int rc; /* Auth callback return code */ + + if( db->init.busy ) return SQLITE_OK; + rc = db->xAuth(db->pAuthArg, SQLITE_READ, zTab,zCol,zDb,pParse->zAuthContext +#ifdef SQLITE_USER_AUTHENTICATION + ,db->auth.zAuthUser +#endif + ); + if( rc==SQLITE_DENY ){ + char *z = sqlite3_mprintf("%s.%s", zTab, zCol); + if( db->nDb>2 || iDb!=0 ) z = sqlite3_mprintf("%s.%z", zDb, z); + sqlite3ErrorMsg(pParse, "access to %z is prohibited", z); + pParse->rc = SQLITE_AUTH; + }else if( rc!=SQLITE_IGNORE && rc!=SQLITE_OK ){ + sqliteAuthBadReturnCode(pParse); + } + return rc; +} + +/* +** The pExpr should be a TK_COLUMN expression. The table referred to +** is in pTabList or else it is the NEW or OLD table of a trigger. +** Check to see if it is OK to read this particular column. +** +** If the auth function returns SQLITE_IGNORE, change the TK_COLUMN +** instruction into a TK_NULL. If the auth function returns SQLITE_DENY, +** then generate an error. +*/ +SQLITE_PRIVATE void sqlite3AuthRead( + Parse *pParse, /* The parser context */ + Expr *pExpr, /* The expression to check authorization on */ + Schema *pSchema, /* The schema of the expression */ + SrcList *pTabList /* All table that pExpr might refer to */ +){ + Table *pTab = 0; /* The table being read */ + const char *zCol; /* Name of the column of the table */ + int iSrc; /* Index in pTabList->a[] of table being read */ + int iDb; /* The index of the database the expression refers to */ + int iCol; /* Index of column in table */ + + assert( pExpr->op==TK_COLUMN || pExpr->op==TK_TRIGGER ); + assert( !IN_RENAME_OBJECT ); + assert( pParse->db->xAuth!=0 ); + iDb = sqlite3SchemaToIndex(pParse->db, pSchema); + if( iDb<0 ){ + /* An attempt to read a column out of a subquery or other + ** temporary table. */ + return; + } + + if( pExpr->op==TK_TRIGGER ){ + pTab = pParse->pTriggerTab; + }else{ + assert( pTabList ); + for(iSrc=0; iSrcnSrc; iSrc++){ + if( pExpr->iTable==pTabList->a[iSrc].iCursor ){ + pTab = pTabList->a[iSrc].pTab; + break; + } + } + } + iCol = pExpr->iColumn; + if( pTab==0 ) return; + + if( iCol>=0 ){ + assert( iColnCol ); + zCol = pTab->aCol[iCol].zCnName; + }else if( pTab->iPKey>=0 ){ + assert( pTab->iPKeynCol ); + zCol = pTab->aCol[pTab->iPKey].zCnName; + }else{ + zCol = "ROWID"; + } + assert( iDb>=0 && iDbdb->nDb ); + if( SQLITE_IGNORE==sqlite3AuthReadCol(pParse, pTab->zName, zCol, iDb) ){ + pExpr->op = TK_NULL; + } +} + +/* +** Do an authorization check using the code and arguments given. Return +** either SQLITE_OK (zero) or SQLITE_IGNORE or SQLITE_DENY. If SQLITE_DENY +** is returned, then the error count and error message in pParse are +** modified appropriately. +*/ +SQLITE_PRIVATE int sqlite3AuthCheck( + Parse *pParse, + int code, + const char *zArg1, + const char *zArg2, + const char *zArg3 +){ + sqlite3 *db = pParse->db; + int rc; + + /* Don't do any authorization checks if the database is initialising + ** or if the parser is being invoked from within sqlite3_declare_vtab. + */ + assert( !IN_RENAME_OBJECT || db->xAuth==0 ); + if( db->xAuth==0 || db->init.busy || IN_SPECIAL_PARSE ){ + return SQLITE_OK; + } + + /* EVIDENCE-OF: R-43249-19882 The third through sixth parameters to the + ** callback are either NULL pointers or zero-terminated strings that + ** contain additional details about the action to be authorized. + ** + ** The following testcase() macros show that any of the 3rd through 6th + ** parameters can be either NULL or a string. */ + testcase( zArg1==0 ); + testcase( zArg2==0 ); + testcase( zArg3==0 ); + testcase( pParse->zAuthContext==0 ); + + rc = db->xAuth(db->pAuthArg, code, zArg1, zArg2, zArg3, pParse->zAuthContext +#ifdef SQLITE_USER_AUTHENTICATION + ,db->auth.zAuthUser +#endif + ); + if( rc==SQLITE_DENY ){ + sqlite3ErrorMsg(pParse, "not authorized"); + pParse->rc = SQLITE_AUTH; + }else if( rc!=SQLITE_OK && rc!=SQLITE_IGNORE ){ + rc = SQLITE_DENY; + sqliteAuthBadReturnCode(pParse); + } + return rc; +} + +/* +** Push an authorization context. After this routine is called, the +** zArg3 argument to authorization callbacks will be zContext until +** popped. Or if pParse==0, this routine is a no-op. +*/ +SQLITE_PRIVATE void sqlite3AuthContextPush( + Parse *pParse, + AuthContext *pContext, + const char *zContext +){ + assert( pParse ); + pContext->pParse = pParse; + pContext->zAuthContext = pParse->zAuthContext; + pParse->zAuthContext = zContext; +} + +/* +** Pop an authorization context that was previously pushed +** by sqlite3AuthContextPush +*/ +SQLITE_PRIVATE void sqlite3AuthContextPop(AuthContext *pContext){ + if( pContext->pParse ){ + pContext->pParse->zAuthContext = pContext->zAuthContext; + pContext->pParse = 0; + } +} + +#endif /* SQLITE_OMIT_AUTHORIZATION */ + +/************** End of auth.c ************************************************/ +/************** Begin file build.c *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains C code routines that are called by the SQLite parser +** when syntax rules are reduced. The routines in this file handle the +** following kinds of SQL syntax: +** +** CREATE TABLE +** DROP TABLE +** CREATE INDEX +** DROP INDEX +** creating ID lists +** BEGIN TRANSACTION +** COMMIT +** ROLLBACK +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** The TableLock structure is only used by the sqlite3TableLock() and +** codeTableLocks() functions. +*/ +struct TableLock { + int iDb; /* The database containing the table to be locked */ + Pgno iTab; /* The root page of the table to be locked */ + u8 isWriteLock; /* True for write lock. False for a read lock */ + const char *zLockName; /* Name of the table */ +}; + +/* +** Record the fact that we want to lock a table at run-time. +** +** The table to be locked has root page iTab and is found in database iDb. +** A read or a write lock can be taken depending on isWritelock. +** +** This routine just records the fact that the lock is desired. The +** code to make the lock occur is generated by a later call to +** codeTableLocks() which occurs during sqlite3FinishCoding(). +*/ +static SQLITE_NOINLINE void lockTable( + Parse *pParse, /* Parsing context */ + int iDb, /* Index of the database containing the table to lock */ + Pgno iTab, /* Root page number of the table to be locked */ + u8 isWriteLock, /* True for a write lock */ + const char *zName /* Name of the table to be locked */ +){ + Parse *pToplevel; + int i; + int nBytes; + TableLock *p; + assert( iDb>=0 ); + + pToplevel = sqlite3ParseToplevel(pParse); + for(i=0; inTableLock; i++){ + p = &pToplevel->aTableLock[i]; + if( p->iDb==iDb && p->iTab==iTab ){ + p->isWriteLock = (p->isWriteLock || isWriteLock); + return; + } + } + + nBytes = sizeof(TableLock) * (pToplevel->nTableLock+1); + pToplevel->aTableLock = + sqlite3DbReallocOrFree(pToplevel->db, pToplevel->aTableLock, nBytes); + if( pToplevel->aTableLock ){ + p = &pToplevel->aTableLock[pToplevel->nTableLock++]; + p->iDb = iDb; + p->iTab = iTab; + p->isWriteLock = isWriteLock; + p->zLockName = zName; + }else{ + pToplevel->nTableLock = 0; + sqlite3OomFault(pToplevel->db); + } +} +SQLITE_PRIVATE void sqlite3TableLock( + Parse *pParse, /* Parsing context */ + int iDb, /* Index of the database containing the table to lock */ + Pgno iTab, /* Root page number of the table to be locked */ + u8 isWriteLock, /* True for a write lock */ + const char *zName /* Name of the table to be locked */ +){ + if( iDb==1 ) return; + if( !sqlite3BtreeSharable(pParse->db->aDb[iDb].pBt) ) return; + lockTable(pParse, iDb, iTab, isWriteLock, zName); +} + +/* +** Code an OP_TableLock instruction for each table locked by the +** statement (configured by calls to sqlite3TableLock()). +*/ +static void codeTableLocks(Parse *pParse){ + int i; + Vdbe *pVdbe = pParse->pVdbe; + assert( pVdbe!=0 ); + + for(i=0; inTableLock; i++){ + TableLock *p = &pParse->aTableLock[i]; + int p1 = p->iDb; + sqlite3VdbeAddOp4(pVdbe, OP_TableLock, p1, p->iTab, p->isWriteLock, + p->zLockName, P4_STATIC); + } +} +#else + #define codeTableLocks(x) +#endif + +/* +** Return TRUE if the given yDbMask object is empty - if it contains no +** 1 bits. This routine is used by the DbMaskAllZero() and DbMaskNotZero() +** macros when SQLITE_MAX_ATTACHED is greater than 30. +*/ +#if SQLITE_MAX_ATTACHED>30 +SQLITE_PRIVATE int sqlite3DbMaskAllZero(yDbMask m){ + int i; + for(i=0; ipToplevel==0 ); + db = pParse->db; + assert( db->pParse==pParse ); + if( pParse->nested ) return; + if( pParse->nErr ){ + if( db->mallocFailed ) pParse->rc = SQLITE_NOMEM; + return; + } + assert( db->mallocFailed==0 ); + + /* Begin by generating some termination code at the end of the + ** vdbe program + */ + v = pParse->pVdbe; + if( v==0 ){ + if( db->init.busy ){ + pParse->rc = SQLITE_DONE; + return; + } + v = sqlite3GetVdbe(pParse); + if( v==0 ) pParse->rc = SQLITE_ERROR; + } + assert( !pParse->isMultiWrite + || sqlite3VdbeAssertMayAbort(v, pParse->mayAbort)); + if( v ){ + if( pParse->bReturning ){ + Returning *pReturning = pParse->u1.pReturning; + int addrRewind; + int i; + int reg; + + if( NEVER(pReturning->nRetCol==0) ){ + assert( CORRUPT_DB ); + }else{ + sqlite3VdbeAddOp0(v, OP_FkCheck); + addrRewind = + sqlite3VdbeAddOp1(v, OP_Rewind, pReturning->iRetCur); + VdbeCoverage(v); + reg = pReturning->iRetReg; + for(i=0; inRetCol; i++){ + sqlite3VdbeAddOp3(v, OP_Column, pReturning->iRetCur, i, reg+i); + } + sqlite3VdbeAddOp2(v, OP_ResultRow, reg, i); + sqlite3VdbeAddOp2(v, OP_Next, pReturning->iRetCur, addrRewind+1); + VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addrRewind); + } + } + sqlite3VdbeAddOp0(v, OP_Halt); + +#if SQLITE_USER_AUTHENTICATION + if( pParse->nTableLock>0 && db->init.busy==0 ){ + sqlite3UserAuthInit(db); + if( db->auth.authLevelrc = SQLITE_AUTH_USER; + return; + } + } +#endif + + /* The cookie mask contains one bit for each database file open. + ** (Bit 0 is for main, bit 1 is for temp, and so forth.) Bits are + ** set for each database that is used. Generate code to start a + ** transaction on each used database and to verify the schema cookie + ** on each used database. + */ + if( db->mallocFailed==0 + && (DbMaskNonZero(pParse->cookieMask) || pParse->pConstExpr) + ){ + int iDb, i; + assert( sqlite3VdbeGetOp(v, 0)->opcode==OP_Init ); + sqlite3VdbeJumpHere(v, 0); + for(iDb=0; iDbnDb; iDb++){ + Schema *pSchema; + if( DbMaskTest(pParse->cookieMask, iDb)==0 ) continue; + sqlite3VdbeUsesBtree(v, iDb); + pSchema = db->aDb[iDb].pSchema; + sqlite3VdbeAddOp4Int(v, + OP_Transaction, /* Opcode */ + iDb, /* P1 */ + DbMaskTest(pParse->writeMask,iDb), /* P2 */ + pSchema->schema_cookie, /* P3 */ + pSchema->iGeneration /* P4 */ + ); + if( db->init.busy==0 ) sqlite3VdbeChangeP5(v, 1); + VdbeComment((v, + "usesStmtJournal=%d", pParse->mayAbort && pParse->isMultiWrite)); + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + for(i=0; inVtabLock; i++){ + char *vtab = (char *)sqlite3GetVTable(db, pParse->apVtabLock[i]); + sqlite3VdbeAddOp4(v, OP_VBegin, 0, 0, 0, vtab, P4_VTAB); + } + pParse->nVtabLock = 0; +#endif + + /* Once all the cookies have been verified and transactions opened, + ** obtain the required table-locks. This is a no-op unless the + ** shared-cache feature is enabled. + */ + codeTableLocks(pParse); + + /* Initialize any AUTOINCREMENT data structures required. + */ + sqlite3AutoincrementBegin(pParse); + + /* Code constant expressions that where factored out of inner loops. + ** + ** The pConstExpr list might also contain expressions that we simply + ** want to keep around until the Parse object is deleted. Such + ** expressions have iConstExprReg==0. Do not generate code for + ** those expressions, of course. + */ + if( pParse->pConstExpr ){ + ExprList *pEL = pParse->pConstExpr; + pParse->okConstFactor = 0; + for(i=0; inExpr; i++){ + int iReg = pEL->a[i].u.iConstExprReg; + if( iReg>0 ){ + sqlite3ExprCode(pParse, pEL->a[i].pExpr, iReg); + } + } + } + + if( pParse->bReturning ){ + Returning *pRet = pParse->u1.pReturning; + if( NEVER(pRet->nRetCol==0) ){ + assert( CORRUPT_DB ); + }else{ + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pRet->iRetCur, pRet->nRetCol); + } + } + + /* Finally, jump back to the beginning of the executable code. */ + sqlite3VdbeGoto(v, 1); + } + } + + /* Get the VDBE program ready for execution + */ + assert( v!=0 || pParse->nErr ); + assert( db->mallocFailed==0 || pParse->nErr ); + if( pParse->nErr==0 ){ + /* A minimum of one cursor is required if autoincrement is used + * See ticket [a696379c1f08866] */ + assert( pParse->pAinc==0 || pParse->nTab>0 ); + sqlite3VdbeMakeReady(v, pParse); + pParse->rc = SQLITE_DONE; + }else{ + pParse->rc = SQLITE_ERROR; + } +} + +/* +** Run the parser and code generator recursively in order to generate +** code for the SQL statement given onto the end of the pParse context +** currently under construction. Notes: +** +** * The final OP_Halt is not appended and other initialization +** and finalization steps are omitted because those are handling by the +** outermost parser. +** +** * Built-in SQL functions always take precedence over application-defined +** SQL functions. In other words, it is not possible to override a +** built-in function. +*/ +SQLITE_PRIVATE void sqlite3NestedParse(Parse *pParse, const char *zFormat, ...){ + va_list ap; + char *zSql; + sqlite3 *db = pParse->db; + u32 savedDbFlags = db->mDbFlags; + char saveBuf[PARSE_TAIL_SZ]; + + if( pParse->nErr ) return; + assert( pParse->nested<10 ); /* Nesting should only be of limited depth */ + va_start(ap, zFormat); + zSql = sqlite3VMPrintf(db, zFormat, ap); + va_end(ap); + if( zSql==0 ){ + /* This can result either from an OOM or because the formatted string + ** exceeds SQLITE_LIMIT_LENGTH. In the latter case, we need to set + ** an error */ + if( !db->mallocFailed ) pParse->rc = SQLITE_TOOBIG; + pParse->nErr++; + return; + } + pParse->nested++; + memcpy(saveBuf, PARSE_TAIL(pParse), PARSE_TAIL_SZ); + memset(PARSE_TAIL(pParse), 0, PARSE_TAIL_SZ); + db->mDbFlags |= DBFLAG_PreferBuiltin; + sqlite3RunParser(pParse, zSql); + sqlite3DbFree(db, pParse->zErrMsg); + pParse->zErrMsg = 0; + db->mDbFlags = savedDbFlags; + sqlite3DbFree(db, zSql); + memcpy(PARSE_TAIL(pParse), saveBuf, PARSE_TAIL_SZ); + pParse->nested--; +} + +#if SQLITE_USER_AUTHENTICATION +/* +** Return TRUE if zTable is the name of the system table that stores the +** list of users and their access credentials. +*/ +SQLITE_PRIVATE int sqlite3UserAuthTable(const char *zTable){ + return sqlite3_stricmp(zTable, "sqlite_user")==0; +} +#endif + +/* +** Locate the in-memory structure that describes a particular database +** table given the name of that table and (optionally) the name of the +** database containing the table. Return NULL if not found. +** +** If zDatabase is 0, all databases are searched for the table and the +** first matching table is returned. (No checking for duplicate table +** names is done.) The search order is TEMP first, then MAIN, then any +** auxiliary databases added using the ATTACH command. +** +** See also sqlite3LocateTable(). +*/ +SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3 *db, const char *zName, const char *zDatabase){ + Table *p = 0; + int i; + + /* All mutexes are required for schema access. Make sure we hold them. */ + assert( zDatabase!=0 || sqlite3BtreeHoldsAllMutexes(db) ); +#if SQLITE_USER_AUTHENTICATION + /* Only the admin user is allowed to know that the sqlite_user table + ** exists */ + if( db->auth.authLevelnDb; i++){ + if( sqlite3StrICmp(zDatabase, db->aDb[i].zDbSName)==0 ) break; + } + if( i>=db->nDb ){ + /* No match against the official names. But always match "main" + ** to schema 0 as a legacy fallback. */ + if( sqlite3StrICmp(zDatabase,"main")==0 ){ + i = 0; + }else{ + return 0; + } + } + p = sqlite3HashFind(&db->aDb[i].pSchema->tblHash, zName); + if( p==0 && sqlite3StrNICmp(zName, "sqlite_", 7)==0 ){ + if( i==1 ){ + if( sqlite3StrICmp(zName+7, &PREFERRED_TEMP_SCHEMA_TABLE[7])==0 + || sqlite3StrICmp(zName+7, &PREFERRED_SCHEMA_TABLE[7])==0 + || sqlite3StrICmp(zName+7, &LEGACY_SCHEMA_TABLE[7])==0 + ){ + p = sqlite3HashFind(&db->aDb[1].pSchema->tblHash, + LEGACY_TEMP_SCHEMA_TABLE); + } + }else{ + if( sqlite3StrICmp(zName+7, &PREFERRED_SCHEMA_TABLE[7])==0 ){ + p = sqlite3HashFind(&db->aDb[i].pSchema->tblHash, + LEGACY_SCHEMA_TABLE); + } + } + } + }else{ + /* Match against TEMP first */ + p = sqlite3HashFind(&db->aDb[1].pSchema->tblHash, zName); + if( p ) return p; + /* The main database is second */ + p = sqlite3HashFind(&db->aDb[0].pSchema->tblHash, zName); + if( p ) return p; + /* Attached databases are in order of attachment */ + for(i=2; inDb; i++){ + assert( sqlite3SchemaMutexHeld(db, i, 0) ); + p = sqlite3HashFind(&db->aDb[i].pSchema->tblHash, zName); + if( p ) break; + } + if( p==0 && sqlite3StrNICmp(zName, "sqlite_", 7)==0 ){ + if( sqlite3StrICmp(zName+7, &PREFERRED_SCHEMA_TABLE[7])==0 ){ + p = sqlite3HashFind(&db->aDb[0].pSchema->tblHash, LEGACY_SCHEMA_TABLE); + }else if( sqlite3StrICmp(zName+7, &PREFERRED_TEMP_SCHEMA_TABLE[7])==0 ){ + p = sqlite3HashFind(&db->aDb[1].pSchema->tblHash, + LEGACY_TEMP_SCHEMA_TABLE); + } + } + } + return p; +} + +/* +** Locate the in-memory structure that describes a particular database +** table given the name of that table and (optionally) the name of the +** database containing the table. Return NULL if not found. Also leave an +** error message in pParse->zErrMsg. +** +** The difference between this routine and sqlite3FindTable() is that this +** routine leaves an error message in pParse->zErrMsg where +** sqlite3FindTable() does not. +*/ +SQLITE_PRIVATE Table *sqlite3LocateTable( + Parse *pParse, /* context in which to report errors */ + u32 flags, /* LOCATE_VIEW or LOCATE_NOERR */ + const char *zName, /* Name of the table we are looking for */ + const char *zDbase /* Name of the database. Might be NULL */ +){ + Table *p; + sqlite3 *db = pParse->db; + + /* Read the database schema. If an error occurs, leave an error message + ** and code in pParse and return NULL. */ + if( (db->mDbFlags & DBFLAG_SchemaKnownOk)==0 + && SQLITE_OK!=sqlite3ReadSchema(pParse) + ){ + return 0; + } + + p = sqlite3FindTable(db, zName, zDbase); + if( p==0 ){ +#ifndef SQLITE_OMIT_VIRTUALTABLE + /* If zName is the not the name of a table in the schema created using + ** CREATE, then check to see if it is the name of an virtual table that + ** can be an eponymous virtual table. */ + if( pParse->disableVtab==0 && db->init.busy==0 ){ + Module *pMod = (Module*)sqlite3HashFind(&db->aModule, zName); + if( pMod==0 && sqlite3_strnicmp(zName, "pragma_", 7)==0 ){ + pMod = sqlite3PragmaVtabRegister(db, zName); + } + if( pMod && sqlite3VtabEponymousTableInit(pParse, pMod) ){ + testcase( pMod->pEpoTab==0 ); + return pMod->pEpoTab; + } + } +#endif + if( flags & LOCATE_NOERR ) return 0; + pParse->checkSchema = 1; + }else if( IsVirtual(p) && pParse->disableVtab ){ + p = 0; + } + + if( p==0 ){ + const char *zMsg = flags & LOCATE_VIEW ? "no such view" : "no such table"; + if( zDbase ){ + sqlite3ErrorMsg(pParse, "%s: %s.%s", zMsg, zDbase, zName); + }else{ + sqlite3ErrorMsg(pParse, "%s: %s", zMsg, zName); + } + }else{ + assert( HasRowid(p) || p->iPKey<0 ); + } + + return p; +} + +/* +** Locate the table identified by *p. +** +** This is a wrapper around sqlite3LocateTable(). The difference between +** sqlite3LocateTable() and this function is that this function restricts +** the search to schema (p->pSchema) if it is not NULL. p->pSchema may be +** non-NULL if it is part of a view or trigger program definition. See +** sqlite3FixSrcList() for details. +*/ +SQLITE_PRIVATE Table *sqlite3LocateTableItem( + Parse *pParse, + u32 flags, + SrcItem *p +){ + const char *zDb; + assert( p->pSchema==0 || p->zDatabase==0 ); + if( p->pSchema ){ + int iDb = sqlite3SchemaToIndex(pParse->db, p->pSchema); + zDb = pParse->db->aDb[iDb].zDbSName; + }else{ + zDb = p->zDatabase; + } + return sqlite3LocateTable(pParse, flags, p->zName, zDb); +} + +/* +** Return the preferred table name for system tables. Translate legacy +** names into the new preferred names, as appropriate. +*/ +SQLITE_PRIVATE const char *sqlite3PreferredTableName(const char *zName){ + if( sqlite3StrNICmp(zName, "sqlite_", 7)==0 ){ + if( sqlite3StrICmp(zName+7, &LEGACY_SCHEMA_TABLE[7])==0 ){ + return PREFERRED_SCHEMA_TABLE; + } + if( sqlite3StrICmp(zName+7, &LEGACY_TEMP_SCHEMA_TABLE[7])==0 ){ + return PREFERRED_TEMP_SCHEMA_TABLE; + } + } + return zName; +} + +/* +** Locate the in-memory structure that describes +** a particular index given the name of that index +** and the name of the database that contains the index. +** Return NULL if not found. +** +** If zDatabase is 0, all databases are searched for the +** table and the first matching index is returned. (No checking +** for duplicate index names is done.) The search order is +** TEMP first, then MAIN, then any auxiliary databases added +** using the ATTACH command. +*/ +SQLITE_PRIVATE Index *sqlite3FindIndex(sqlite3 *db, const char *zName, const char *zDb){ + Index *p = 0; + int i; + /* All mutexes are required for schema access. Make sure we hold them. */ + assert( zDb!=0 || sqlite3BtreeHoldsAllMutexes(db) ); + for(i=OMIT_TEMPDB; inDb; i++){ + int j = (i<2) ? i^1 : i; /* Search TEMP before MAIN */ + Schema *pSchema = db->aDb[j].pSchema; + assert( pSchema ); + if( zDb && sqlite3DbIsNamed(db, j, zDb)==0 ) continue; + assert( sqlite3SchemaMutexHeld(db, j, 0) ); + p = sqlite3HashFind(&pSchema->idxHash, zName); + if( p ) break; + } + return p; +} + +/* +** Reclaim the memory used by an index +*/ +SQLITE_PRIVATE void sqlite3FreeIndex(sqlite3 *db, Index *p){ +#ifndef SQLITE_OMIT_ANALYZE + sqlite3DeleteIndexSamples(db, p); +#endif + sqlite3ExprDelete(db, p->pPartIdxWhere); + sqlite3ExprListDelete(db, p->aColExpr); + sqlite3DbFree(db, p->zColAff); + if( p->isResized ) sqlite3DbFree(db, (void *)p->azColl); +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(p->aiRowEst); +#endif + sqlite3DbFree(db, p); +} + +/* +** For the index called zIdxName which is found in the database iDb, +** unlike that index from its Table then remove the index from +** the index hash table and free all memory structures associated +** with the index. +*/ +SQLITE_PRIVATE void sqlite3UnlinkAndDeleteIndex(sqlite3 *db, int iDb, const char *zIdxName){ + Index *pIndex; + Hash *pHash; + + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + pHash = &db->aDb[iDb].pSchema->idxHash; + pIndex = sqlite3HashInsert(pHash, zIdxName, 0); + if( ALWAYS(pIndex) ){ + if( pIndex->pTable->pIndex==pIndex ){ + pIndex->pTable->pIndex = pIndex->pNext; + }else{ + Index *p; + /* Justification of ALWAYS(); The index must be on the list of + ** indices. */ + p = pIndex->pTable->pIndex; + while( ALWAYS(p) && p->pNext!=pIndex ){ p = p->pNext; } + if( ALWAYS(p && p->pNext==pIndex) ){ + p->pNext = pIndex->pNext; + } + } + sqlite3FreeIndex(db, pIndex); + } + db->mDbFlags |= DBFLAG_SchemaChange; +} + +/* +** Look through the list of open database files in db->aDb[] and if +** any have been closed, remove them from the list. Reallocate the +** db->aDb[] structure to a smaller size, if possible. +** +** Entry 0 (the "main" database) and entry 1 (the "temp" database) +** are never candidates for being collapsed. +*/ +SQLITE_PRIVATE void sqlite3CollapseDatabaseArray(sqlite3 *db){ + int i, j; + for(i=j=2; inDb; i++){ + struct Db *pDb = &db->aDb[i]; + if( pDb->pBt==0 ){ + sqlite3DbFree(db, pDb->zDbSName); + pDb->zDbSName = 0; + continue; + } + if( jaDb[j] = db->aDb[i]; + } + j++; + } + db->nDb = j; + if( db->nDb<=2 && db->aDb!=db->aDbStatic ){ + memcpy(db->aDbStatic, db->aDb, 2*sizeof(db->aDb[0])); + sqlite3DbFree(db, db->aDb); + db->aDb = db->aDbStatic; + } +} + +/* +** Reset the schema for the database at index iDb. Also reset the +** TEMP schema. The reset is deferred if db->nSchemaLock is not zero. +** Deferred resets may be run by calling with iDb<0. +*/ +SQLITE_PRIVATE void sqlite3ResetOneSchema(sqlite3 *db, int iDb){ + int i; + assert( iDbnDb ); + + if( iDb>=0 ){ + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + DbSetProperty(db, iDb, DB_ResetWanted); + DbSetProperty(db, 1, DB_ResetWanted); + db->mDbFlags &= ~DBFLAG_SchemaKnownOk; + } + + if( db->nSchemaLock==0 ){ + for(i=0; inDb; i++){ + if( DbHasProperty(db, i, DB_ResetWanted) ){ + sqlite3SchemaClear(db->aDb[i].pSchema); + } + } + } +} + +/* +** Erase all schema information from all attached databases (including +** "main" and "temp") for a single database connection. +*/ +SQLITE_PRIVATE void sqlite3ResetAllSchemasOfConnection(sqlite3 *db){ + int i; + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ + Db *pDb = &db->aDb[i]; + if( pDb->pSchema ){ + if( db->nSchemaLock==0 ){ + sqlite3SchemaClear(pDb->pSchema); + }else{ + DbSetProperty(db, i, DB_ResetWanted); + } + } + } + db->mDbFlags &= ~(DBFLAG_SchemaChange|DBFLAG_SchemaKnownOk); + sqlite3VtabUnlockList(db); + sqlite3BtreeLeaveAll(db); + if( db->nSchemaLock==0 ){ + sqlite3CollapseDatabaseArray(db); + } +} + +/* +** This routine is called when a commit occurs. +*/ +SQLITE_PRIVATE void sqlite3CommitInternalChanges(sqlite3 *db){ + db->mDbFlags &= ~DBFLAG_SchemaChange; +} + +/* +** Set the expression associated with a column. This is usually +** the DEFAULT value, but might also be the expression that computes +** the value for a generated column. +*/ +SQLITE_PRIVATE void sqlite3ColumnSetExpr( + Parse *pParse, /* Parsing context */ + Table *pTab, /* The table containing the column */ + Column *pCol, /* The column to receive the new DEFAULT expression */ + Expr *pExpr /* The new default expression */ +){ + ExprList *pList; + assert( IsOrdinaryTable(pTab) ); + pList = pTab->u.tab.pDfltList; + if( pCol->iDflt==0 + || NEVER(pList==0) + || NEVER(pList->nExpriDflt) + ){ + pCol->iDflt = pList==0 ? 1 : pList->nExpr+1; + pTab->u.tab.pDfltList = sqlite3ExprListAppend(pParse, pList, pExpr); + }else{ + sqlite3ExprDelete(pParse->db, pList->a[pCol->iDflt-1].pExpr); + pList->a[pCol->iDflt-1].pExpr = pExpr; + } +} + +/* +** Return the expression associated with a column. The expression might be +** the DEFAULT clause or the AS clause of a generated column. +** Return NULL if the column has no associated expression. +*/ +SQLITE_PRIVATE Expr *sqlite3ColumnExpr(Table *pTab, Column *pCol){ + if( pCol->iDflt==0 ) return 0; + if( NEVER(!IsOrdinaryTable(pTab)) ) return 0; + if( NEVER(pTab->u.tab.pDfltList==0) ) return 0; + if( NEVER(pTab->u.tab.pDfltList->nExpriDflt) ) return 0; + return pTab->u.tab.pDfltList->a[pCol->iDflt-1].pExpr; +} + +/* +** Set the collating sequence name for a column. +*/ +SQLITE_PRIVATE void sqlite3ColumnSetColl( + sqlite3 *db, + Column *pCol, + const char *zColl +){ + i64 nColl; + i64 n; + char *zNew; + assert( zColl!=0 ); + n = sqlite3Strlen30(pCol->zCnName) + 1; + if( pCol->colFlags & COLFLAG_HASTYPE ){ + n += sqlite3Strlen30(pCol->zCnName+n) + 1; + } + nColl = sqlite3Strlen30(zColl) + 1; + zNew = sqlite3DbRealloc(db, pCol->zCnName, nColl+n); + if( zNew ){ + pCol->zCnName = zNew; + memcpy(pCol->zCnName + n, zColl, nColl); + pCol->colFlags |= COLFLAG_HASCOLL; + } +} + +/* +** Return the collating squence name for a column +*/ +SQLITE_PRIVATE const char *sqlite3ColumnColl(Column *pCol){ + const char *z; + if( (pCol->colFlags & COLFLAG_HASCOLL)==0 ) return 0; + z = pCol->zCnName; + while( *z ){ z++; } + if( pCol->colFlags & COLFLAG_HASTYPE ){ + do{ z++; }while( *z ); + } + return z+1; +} + +/* +** Delete memory allocated for the column names of a table or view (the +** Table.aCol[] array). +*/ +SQLITE_PRIVATE void sqlite3DeleteColumnNames(sqlite3 *db, Table *pTable){ + int i; + Column *pCol; + assert( pTable!=0 ); + if( (pCol = pTable->aCol)!=0 ){ + for(i=0; inCol; i++, pCol++){ + assert( pCol->zCnName==0 || pCol->hName==sqlite3StrIHash(pCol->zCnName) ); + sqlite3DbFree(db, pCol->zCnName); + } + sqlite3DbFree(db, pTable->aCol); + if( IsOrdinaryTable(pTable) ){ + sqlite3ExprListDelete(db, pTable->u.tab.pDfltList); + } + if( db==0 || db->pnBytesFreed==0 ){ + pTable->aCol = 0; + pTable->nCol = 0; + if( IsOrdinaryTable(pTable) ){ + pTable->u.tab.pDfltList = 0; + } + } + } +} + +/* +** Remove the memory data structures associated with the given +** Table. No changes are made to disk by this routine. +** +** This routine just deletes the data structure. It does not unlink +** the table data structure from the hash table. But it does destroy +** memory structures of the indices and foreign keys associated with +** the table. +** +** The db parameter is optional. It is needed if the Table object +** contains lookaside memory. (Table objects in the schema do not use +** lookaside memory, but some ephemeral Table objects do.) Or the +** db parameter can be used with db->pnBytesFreed to measure the memory +** used by the Table object. +*/ +static void SQLITE_NOINLINE deleteTable(sqlite3 *db, Table *pTable){ + Index *pIndex, *pNext; + +#ifdef SQLITE_DEBUG + /* Record the number of outstanding lookaside allocations in schema Tables + ** prior to doing any free() operations. Since schema Tables do not use + ** lookaside, this number should not change. + ** + ** If malloc has already failed, it may be that it failed while allocating + ** a Table object that was going to be marked ephemeral. So do not check + ** that no lookaside memory is used in this case either. */ + int nLookaside = 0; + if( db && !db->mallocFailed && (pTable->tabFlags & TF_Ephemeral)==0 ){ + nLookaside = sqlite3LookasideUsed(db, 0); + } +#endif + + /* Delete all indices associated with this table. */ + for(pIndex = pTable->pIndex; pIndex; pIndex=pNext){ + pNext = pIndex->pNext; + assert( pIndex->pSchema==pTable->pSchema + || (IsVirtual(pTable) && pIndex->idxType!=SQLITE_IDXTYPE_APPDEF) ); + if( (db==0 || db->pnBytesFreed==0) && !IsVirtual(pTable) ){ + char *zName = pIndex->zName; + TESTONLY ( Index *pOld = ) sqlite3HashInsert( + &pIndex->pSchema->idxHash, zName, 0 + ); + assert( db==0 || sqlite3SchemaMutexHeld(db, 0, pIndex->pSchema) ); + assert( pOld==pIndex || pOld==0 ); + } + sqlite3FreeIndex(db, pIndex); + } + + if( IsOrdinaryTable(pTable) ){ + sqlite3FkDelete(db, pTable); + } +#ifndef SQLITE_OMIT_VIRTUAL_TABLE + else if( IsVirtual(pTable) ){ + sqlite3VtabClear(db, pTable); + } +#endif + else{ + assert( IsView(pTable) ); + sqlite3SelectDelete(db, pTable->u.view.pSelect); + } + + /* Delete the Table structure itself. + */ + sqlite3DeleteColumnNames(db, pTable); + sqlite3DbFree(db, pTable->zName); + sqlite3DbFree(db, pTable->zColAff); + sqlite3ExprListDelete(db, pTable->pCheck); + sqlite3DbFree(db, pTable); + + /* Verify that no lookaside memory was used by schema tables */ + assert( nLookaside==0 || nLookaside==sqlite3LookasideUsed(db,0) ); +} +SQLITE_PRIVATE void sqlite3DeleteTable(sqlite3 *db, Table *pTable){ + /* Do not delete the table until the reference count reaches zero. */ + if( !pTable ) return; + if( ((!db || db->pnBytesFreed==0) && (--pTable->nTabRef)>0) ) return; + deleteTable(db, pTable); +} + + +/* +** Unlink the given table from the hash tables and the delete the +** table structure with all its indices and foreign keys. +*/ +SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTable(sqlite3 *db, int iDb, const char *zTabName){ + Table *p; + Db *pDb; + + assert( db!=0 ); + assert( iDb>=0 && iDbnDb ); + assert( zTabName ); + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + testcase( zTabName[0]==0 ); /* Zero-length table names are allowed */ + pDb = &db->aDb[iDb]; + p = sqlite3HashInsert(&pDb->pSchema->tblHash, zTabName, 0); + sqlite3DeleteTable(db, p); + db->mDbFlags |= DBFLAG_SchemaChange; +} + +/* +** Given a token, return a string that consists of the text of that +** token. Space to hold the returned string +** is obtained from sqliteMalloc() and must be freed by the calling +** function. +** +** Any quotation marks (ex: "name", 'name', [name], or `name`) that +** surround the body of the token are removed. +** +** Tokens are often just pointers into the original SQL text and so +** are not \000 terminated and are not persistent. The returned string +** is \000 terminated and is persistent. +*/ +SQLITE_PRIVATE char *sqlite3NameFromToken(sqlite3 *db, const Token *pName){ + char *zName; + if( pName ){ + zName = sqlite3DbStrNDup(db, (const char*)pName->z, pName->n); + sqlite3Dequote(zName); + }else{ + zName = 0; + } + return zName; +} + +/* +** Open the sqlite_schema table stored in database number iDb for +** writing. The table is opened using cursor 0. +*/ +SQLITE_PRIVATE void sqlite3OpenSchemaTable(Parse *p, int iDb){ + Vdbe *v = sqlite3GetVdbe(p); + sqlite3TableLock(p, iDb, SCHEMA_ROOT, 1, LEGACY_SCHEMA_TABLE); + sqlite3VdbeAddOp4Int(v, OP_OpenWrite, 0, SCHEMA_ROOT, iDb, 5); + if( p->nTab==0 ){ + p->nTab = 1; + } +} + +/* +** Parameter zName points to a nul-terminated buffer containing the name +** of a database ("main", "temp" or the name of an attached db). This +** function returns the index of the named database in db->aDb[], or +** -1 if the named db cannot be found. +*/ +SQLITE_PRIVATE int sqlite3FindDbName(sqlite3 *db, const char *zName){ + int i = -1; /* Database number */ + if( zName ){ + Db *pDb; + for(i=(db->nDb-1), pDb=&db->aDb[i]; i>=0; i--, pDb--){ + if( 0==sqlite3_stricmp(pDb->zDbSName, zName) ) break; + /* "main" is always an acceptable alias for the primary database + ** even if it has been renamed using SQLITE_DBCONFIG_MAINDBNAME. */ + if( i==0 && 0==sqlite3_stricmp("main", zName) ) break; + } + } + return i; +} + +/* +** The token *pName contains the name of a database (either "main" or +** "temp" or the name of an attached db). This routine returns the +** index of the named database in db->aDb[], or -1 if the named db +** does not exist. +*/ +SQLITE_PRIVATE int sqlite3FindDb(sqlite3 *db, Token *pName){ + int i; /* Database number */ + char *zName; /* Name we are searching for */ + zName = sqlite3NameFromToken(db, pName); + i = sqlite3FindDbName(db, zName); + sqlite3DbFree(db, zName); + return i; +} + +/* The table or view or trigger name is passed to this routine via tokens +** pName1 and pName2. If the table name was fully qualified, for example: +** +** CREATE TABLE xxx.yyy (...); +** +** Then pName1 is set to "xxx" and pName2 "yyy". On the other hand if +** the table name is not fully qualified, i.e.: +** +** CREATE TABLE yyy(...); +** +** Then pName1 is set to "yyy" and pName2 is "". +** +** This routine sets the *ppUnqual pointer to point at the token (pName1 or +** pName2) that stores the unqualified table name. The index of the +** database "xxx" is returned. +*/ +SQLITE_PRIVATE int sqlite3TwoPartName( + Parse *pParse, /* Parsing and code generating context */ + Token *pName1, /* The "xxx" in the name "xxx.yyy" or "xxx" */ + Token *pName2, /* The "yyy" in the name "xxx.yyy" */ + Token **pUnqual /* Write the unqualified object name here */ +){ + int iDb; /* Database holding the object */ + sqlite3 *db = pParse->db; + + assert( pName2!=0 ); + if( pName2->n>0 ){ + if( db->init.busy ) { + sqlite3ErrorMsg(pParse, "corrupt database"); + return -1; + } + *pUnqual = pName2; + iDb = sqlite3FindDb(db, pName1); + if( iDb<0 ){ + sqlite3ErrorMsg(pParse, "unknown database %T", pName1); + return -1; + } + }else{ + assert( db->init.iDb==0 || db->init.busy || IN_SPECIAL_PARSE + || (db->mDbFlags & DBFLAG_Vacuum)!=0); + iDb = db->init.iDb; + *pUnqual = pName1; + } + return iDb; +} + +/* +** True if PRAGMA writable_schema is ON +*/ +SQLITE_PRIVATE int sqlite3WritableSchema(sqlite3 *db){ + testcase( (db->flags&(SQLITE_WriteSchema|SQLITE_Defensive))==0 ); + testcase( (db->flags&(SQLITE_WriteSchema|SQLITE_Defensive))== + SQLITE_WriteSchema ); + testcase( (db->flags&(SQLITE_WriteSchema|SQLITE_Defensive))== + SQLITE_Defensive ); + testcase( (db->flags&(SQLITE_WriteSchema|SQLITE_Defensive))== + (SQLITE_WriteSchema|SQLITE_Defensive) ); + return (db->flags&(SQLITE_WriteSchema|SQLITE_Defensive))==SQLITE_WriteSchema; +} + +/* +** This routine is used to check if the UTF-8 string zName is a legal +** unqualified name for a new schema object (table, index, view or +** trigger). All names are legal except those that begin with the string +** "sqlite_" (in upper, lower or mixed case). This portion of the namespace +** is reserved for internal use. +** +** When parsing the sqlite_schema table, this routine also checks to +** make sure the "type", "name", and "tbl_name" columns are consistent +** with the SQL. +*/ +SQLITE_PRIVATE int sqlite3CheckObjectName( + Parse *pParse, /* Parsing context */ + const char *zName, /* Name of the object to check */ + const char *zType, /* Type of this object */ + const char *zTblName /* Parent table name for triggers and indexes */ +){ + sqlite3 *db = pParse->db; + if( sqlite3WritableSchema(db) + || db->init.imposterTable + || !sqlite3Config.bExtraSchemaChecks + ){ + /* Skip these error checks for writable_schema=ON */ + return SQLITE_OK; + } + if( db->init.busy ){ + if( sqlite3_stricmp(zType, db->init.azInit[0]) + || sqlite3_stricmp(zName, db->init.azInit[1]) + || sqlite3_stricmp(zTblName, db->init.azInit[2]) + ){ + sqlite3ErrorMsg(pParse, ""); /* corruptSchema() will supply the error */ + return SQLITE_ERROR; + } + }else{ + if( (pParse->nested==0 && 0==sqlite3StrNICmp(zName, "sqlite_", 7)) + || (sqlite3ReadOnlyShadowTables(db) && sqlite3ShadowTableName(db, zName)) + ){ + sqlite3ErrorMsg(pParse, "object name reserved for internal use: %s", + zName); + return SQLITE_ERROR; + } + + } + return SQLITE_OK; +} + +/* +** Return the PRIMARY KEY index of a table +*/ +SQLITE_PRIVATE Index *sqlite3PrimaryKeyIndex(Table *pTab){ + Index *p; + for(p=pTab->pIndex; p && !IsPrimaryKeyIndex(p); p=p->pNext){} + return p; +} + +/* +** Convert an table column number into a index column number. That is, +** for the column iCol in the table (as defined by the CREATE TABLE statement) +** find the (first) offset of that column in index pIdx. Or return -1 +** if column iCol is not used in index pIdx. +*/ +SQLITE_PRIVATE i16 sqlite3TableColumnToIndex(Index *pIdx, i16 iCol){ + int i; + for(i=0; inColumn; i++){ + if( iCol==pIdx->aiColumn[i] ) return i; + } + return -1; +} + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +/* Convert a storage column number into a table column number. +** +** The storage column number (0,1,2,....) is the index of the value +** as it appears in the record on disk. The true column number +** is the index (0,1,2,...) of the column in the CREATE TABLE statement. +** +** The storage column number is less than the table column number if +** and only there are VIRTUAL columns to the left. +** +** If SQLITE_OMIT_GENERATED_COLUMNS, this routine is a no-op macro. +*/ +SQLITE_PRIVATE i16 sqlite3StorageColumnToTable(Table *pTab, i16 iCol){ + if( pTab->tabFlags & TF_HasVirtual ){ + int i; + for(i=0; i<=iCol; i++){ + if( pTab->aCol[i].colFlags & COLFLAG_VIRTUAL ) iCol++; + } + } + return iCol; +} +#endif + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +/* Convert a table column number into a storage column number. +** +** The storage column number (0,1,2,....) is the index of the value +** as it appears in the record on disk. Or, if the input column is +** the N-th virtual column (zero-based) then the storage number is +** the number of non-virtual columns in the table plus N. +** +** The true column number is the index (0,1,2,...) of the column in +** the CREATE TABLE statement. +** +** If the input column is a VIRTUAL column, then it should not appear +** in storage. But the value sometimes is cached in registers that +** follow the range of registers used to construct storage. This +** avoids computing the same VIRTUAL column multiple times, and provides +** values for use by OP_Param opcodes in triggers. Hence, if the +** input column is a VIRTUAL table, put it after all the other columns. +** +** In the following, N means "normal column", S means STORED, and +** V means VIRTUAL. Suppose the CREATE TABLE has columns like this: +** +** CREATE TABLE ex(N,S,V,N,S,V,N,S,V); +** -- 0 1 2 3 4 5 6 7 8 +** +** Then the mapping from this function is as follows: +** +** INPUTS: 0 1 2 3 4 5 6 7 8 +** OUTPUTS: 0 1 6 2 3 7 4 5 8 +** +** So, in other words, this routine shifts all the virtual columns to +** the end. +** +** If SQLITE_OMIT_GENERATED_COLUMNS then there are no virtual columns and +** this routine is a no-op macro. If the pTab does not have any virtual +** columns, then this routine is no-op that always return iCol. If iCol +** is negative (indicating the ROWID column) then this routine return iCol. +*/ +SQLITE_PRIVATE i16 sqlite3TableColumnToStorage(Table *pTab, i16 iCol){ + int i; + i16 n; + assert( iColnCol ); + if( (pTab->tabFlags & TF_HasVirtual)==0 || iCol<0 ) return iCol; + for(i=0, n=0; iaCol[i].colFlags & COLFLAG_VIRTUAL)==0 ) n++; + } + if( pTab->aCol[i].colFlags & COLFLAG_VIRTUAL ){ + /* iCol is a virtual column itself */ + return pTab->nNVCol + i - n; + }else{ + /* iCol is a normal or stored column */ + return n; + } +} +#endif + +/* +** Insert a single OP_JournalMode query opcode in order to force the +** prepared statement to return false for sqlite3_stmt_readonly(). This +** is used by CREATE TABLE IF NOT EXISTS and similar if the table already +** exists, so that the prepared statement for CREATE TABLE IF NOT EXISTS +** will return false for sqlite3_stmt_readonly() even if that statement +** is a read-only no-op. +*/ +static void sqlite3ForceNotReadOnly(Parse *pParse){ + int iReg = ++pParse->nMem; + Vdbe *v = sqlite3GetVdbe(pParse); + if( v ){ + sqlite3VdbeAddOp3(v, OP_JournalMode, 0, iReg, PAGER_JOURNALMODE_QUERY); + sqlite3VdbeUsesBtree(v, 0); + } +} + +/* +** Begin constructing a new table representation in memory. This is +** the first of several action routines that get called in response +** to a CREATE TABLE statement. In particular, this routine is called +** after seeing tokens "CREATE" and "TABLE" and the table name. The isTemp +** flag is true if the table should be stored in the auxiliary database +** file instead of in the main database file. This is normally the case +** when the "TEMP" or "TEMPORARY" keyword occurs in between +** CREATE and TABLE. +** +** The new table record is initialized and put in pParse->pNewTable. +** As more of the CREATE TABLE statement is parsed, additional action +** routines will be called to add more information to this record. +** At the end of the CREATE TABLE statement, the sqlite3EndTable() routine +** is called to complete the construction of the new table record. +*/ +SQLITE_PRIVATE void sqlite3StartTable( + Parse *pParse, /* Parser context */ + Token *pName1, /* First part of the name of the table or view */ + Token *pName2, /* Second part of the name of the table or view */ + int isTemp, /* True if this is a TEMP table */ + int isView, /* True if this is a VIEW */ + int isVirtual, /* True if this is a VIRTUAL table */ + int noErr /* Do nothing if table already exists */ +){ + Table *pTable; + char *zName = 0; /* The name of the new table */ + sqlite3 *db = pParse->db; + Vdbe *v; + int iDb; /* Database number to create the table in */ + Token *pName; /* Unqualified name of the table to create */ + + if( db->init.busy && db->init.newTnum==1 ){ + /* Special case: Parsing the sqlite_schema or sqlite_temp_schema schema */ + iDb = db->init.iDb; + zName = sqlite3DbStrDup(db, SCHEMA_TABLE(iDb)); + pName = pName1; + }else{ + /* The common case */ + iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName); + if( iDb<0 ) return; + if( !OMIT_TEMPDB && isTemp && pName2->n>0 && iDb!=1 ){ + /* If creating a temp table, the name may not be qualified. Unless + ** the database name is "temp" anyway. */ + sqlite3ErrorMsg(pParse, "temporary table name must be unqualified"); + return; + } + if( !OMIT_TEMPDB && isTemp ) iDb = 1; + zName = sqlite3NameFromToken(db, pName); + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenMap(pParse, (void*)zName, pName); + } + } + pParse->sNameToken = *pName; + if( zName==0 ) return; + if( sqlite3CheckObjectName(pParse, zName, isView?"view":"table", zName) ){ + goto begin_table_error; + } + if( db->init.iDb==1 ) isTemp = 1; +#ifndef SQLITE_OMIT_AUTHORIZATION + assert( isTemp==0 || isTemp==1 ); + assert( isView==0 || isView==1 ); + { + static const u8 aCode[] = { + SQLITE_CREATE_TABLE, + SQLITE_CREATE_TEMP_TABLE, + SQLITE_CREATE_VIEW, + SQLITE_CREATE_TEMP_VIEW + }; + char *zDb = db->aDb[iDb].zDbSName; + if( sqlite3AuthCheck(pParse, SQLITE_INSERT, SCHEMA_TABLE(isTemp), 0, zDb) ){ + goto begin_table_error; + } + if( !isVirtual && sqlite3AuthCheck(pParse, (int)aCode[isTemp+2*isView], + zName, 0, zDb) ){ + goto begin_table_error; + } + } +#endif + + /* Make sure the new table name does not collide with an existing + ** index or table name in the same database. Issue an error message if + ** it does. The exception is if the statement being parsed was passed + ** to an sqlite3_declare_vtab() call. In that case only the column names + ** and types will be used, so there is no need to test for namespace + ** collisions. + */ + if( !IN_SPECIAL_PARSE ){ + char *zDb = db->aDb[iDb].zDbSName; + if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ + goto begin_table_error; + } + pTable = sqlite3FindTable(db, zName, zDb); + if( pTable ){ + if( !noErr ){ + sqlite3ErrorMsg(pParse, "%s %T already exists", + (IsView(pTable)? "view" : "table"), pName); + }else{ + assert( !db->init.busy || CORRUPT_DB ); + sqlite3CodeVerifySchema(pParse, iDb); + sqlite3ForceNotReadOnly(pParse); + } + goto begin_table_error; + } + if( sqlite3FindIndex(db, zName, zDb)!=0 ){ + sqlite3ErrorMsg(pParse, "there is already an index named %s", zName); + goto begin_table_error; + } + } + + pTable = sqlite3DbMallocZero(db, sizeof(Table)); + if( pTable==0 ){ + assert( db->mallocFailed ); + pParse->rc = SQLITE_NOMEM_BKPT; + pParse->nErr++; + goto begin_table_error; + } + pTable->zName = zName; + pTable->iPKey = -1; + pTable->pSchema = db->aDb[iDb].pSchema; + pTable->nTabRef = 1; +#ifdef SQLITE_DEFAULT_ROWEST + pTable->nRowLogEst = sqlite3LogEst(SQLITE_DEFAULT_ROWEST); +#else + pTable->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); +#endif + assert( pParse->pNewTable==0 ); + pParse->pNewTable = pTable; + + /* Begin generating the code that will insert the table record into + ** the schema table. Note in particular that we must go ahead + ** and allocate the record number for the table entry now. Before any + ** PRIMARY KEY or UNIQUE keywords are parsed. Those keywords will cause + ** indices to be created and the table record must come before the + ** indices. Hence, the record number for the table must be allocated + ** now. + */ + if( !db->init.busy && (v = sqlite3GetVdbe(pParse))!=0 ){ + int addr1; + int fileFormat; + int reg1, reg2, reg3; + /* nullRow[] is an OP_Record encoding of a row containing 5 NULLs */ + static const char nullRow[] = { 6, 0, 0, 0, 0, 0 }; + sqlite3BeginWriteOperation(pParse, 1, iDb); + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( isVirtual ){ + sqlite3VdbeAddOp0(v, OP_VBegin); + } +#endif + + /* If the file format and encoding in the database have not been set, + ** set them now. + */ + reg1 = pParse->regRowid = ++pParse->nMem; + reg2 = pParse->regRoot = ++pParse->nMem; + reg3 = ++pParse->nMem; + sqlite3VdbeAddOp3(v, OP_ReadCookie, iDb, reg3, BTREE_FILE_FORMAT); + sqlite3VdbeUsesBtree(v, iDb); + addr1 = sqlite3VdbeAddOp1(v, OP_If, reg3); VdbeCoverage(v); + fileFormat = (db->flags & SQLITE_LegacyFileFmt)!=0 ? + 1 : SQLITE_MAX_FILE_FORMAT; + sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_FILE_FORMAT, fileFormat); + sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_TEXT_ENCODING, ENC(db)); + sqlite3VdbeJumpHere(v, addr1); + + /* This just creates a place-holder record in the sqlite_schema table. + ** The record created does not contain anything yet. It will be replaced + ** by the real entry in code generated at sqlite3EndTable(). + ** + ** The rowid for the new entry is left in register pParse->regRowid. + ** The root page number of the new table is left in reg pParse->regRoot. + ** The rowid and root page number values are needed by the code that + ** sqlite3EndTable will generate. + */ +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) + if( isView || isVirtual ){ + sqlite3VdbeAddOp2(v, OP_Integer, 0, reg2); + }else +#endif + { + assert( !pParse->bReturning ); + pParse->u1.addrCrTab = + sqlite3VdbeAddOp3(v, OP_CreateBtree, iDb, reg2, BTREE_INTKEY); + } + sqlite3OpenSchemaTable(pParse, iDb); + sqlite3VdbeAddOp2(v, OP_NewRowid, 0, reg1); + sqlite3VdbeAddOp4(v, OP_Blob, 6, reg3, 0, nullRow, P4_STATIC); + sqlite3VdbeAddOp3(v, OP_Insert, 0, reg3, reg1); + sqlite3VdbeChangeP5(v, OPFLAG_APPEND); + sqlite3VdbeAddOp0(v, OP_Close); + } + + /* Normal (non-error) return. */ + return; + + /* If an error occurs, we jump here */ +begin_table_error: + pParse->checkSchema = 1; + sqlite3DbFree(db, zName); + return; +} + +/* Set properties of a table column based on the (magical) +** name of the column. +*/ +#if SQLITE_ENABLE_HIDDEN_COLUMNS +SQLITE_PRIVATE void sqlite3ColumnPropertiesFromName(Table *pTab, Column *pCol){ + if( sqlite3_strnicmp(pCol->zCnName, "__hidden__", 10)==0 ){ + pCol->colFlags |= COLFLAG_HIDDEN; + if( pTab ) pTab->tabFlags |= TF_HasHidden; + }else if( pTab && pCol!=pTab->aCol && (pCol[-1].colFlags & COLFLAG_HIDDEN) ){ + pTab->tabFlags |= TF_OOOHidden; + } +} +#endif + +/* +** Name of the special TEMP trigger used to implement RETURNING. The +** name begins with "sqlite_" so that it is guaranteed not to collide +** with any application-generated triggers. +*/ +#define RETURNING_TRIGGER_NAME "sqlite_returning" + +/* +** Clean up the data structures associated with the RETURNING clause. +*/ +static void sqlite3DeleteReturning(sqlite3 *db, Returning *pRet){ + Hash *pHash; + pHash = &(db->aDb[1].pSchema->trigHash); + sqlite3HashInsert(pHash, RETURNING_TRIGGER_NAME, 0); + sqlite3ExprListDelete(db, pRet->pReturnEL); + sqlite3DbFree(db, pRet); +} + +/* +** Add the RETURNING clause to the parse currently underway. +** +** This routine creates a special TEMP trigger that will fire for each row +** of the DML statement. That TEMP trigger contains a single SELECT +** statement with a result set that is the argument of the RETURNING clause. +** The trigger has the Trigger.bReturning flag and an opcode of +** TK_RETURNING instead of TK_SELECT, so that the trigger code generator +** knows to handle it specially. The TEMP trigger is automatically +** removed at the end of the parse. +** +** When this routine is called, we do not yet know if the RETURNING clause +** is attached to a DELETE, INSERT, or UPDATE, so construct it as a +** RETURNING trigger instead. It will then be converted into the appropriate +** type on the first call to sqlite3TriggersExist(). +*/ +SQLITE_PRIVATE void sqlite3AddReturning(Parse *pParse, ExprList *pList){ + Returning *pRet; + Hash *pHash; + sqlite3 *db = pParse->db; + if( pParse->pNewTrigger ){ + sqlite3ErrorMsg(pParse, "cannot use RETURNING in a trigger"); + }else{ + assert( pParse->bReturning==0 ); + } + pParse->bReturning = 1; + pRet = sqlite3DbMallocZero(db, sizeof(*pRet)); + if( pRet==0 ){ + sqlite3ExprListDelete(db, pList); + return; + } + pParse->u1.pReturning = pRet; + pRet->pParse = pParse; + pRet->pReturnEL = pList; + sqlite3ParserAddCleanup(pParse, + (void(*)(sqlite3*,void*))sqlite3DeleteReturning, pRet); + testcase( pParse->earlyCleanup ); + if( db->mallocFailed ) return; + pRet->retTrig.zName = RETURNING_TRIGGER_NAME; + pRet->retTrig.op = TK_RETURNING; + pRet->retTrig.tr_tm = TRIGGER_AFTER; + pRet->retTrig.bReturning = 1; + pRet->retTrig.pSchema = db->aDb[1].pSchema; + pRet->retTrig.pTabSchema = db->aDb[1].pSchema; + pRet->retTrig.step_list = &pRet->retTStep; + pRet->retTStep.op = TK_RETURNING; + pRet->retTStep.pTrig = &pRet->retTrig; + pRet->retTStep.pExprList = pList; + pHash = &(db->aDb[1].pSchema->trigHash); + assert( sqlite3HashFind(pHash, RETURNING_TRIGGER_NAME)==0 || pParse->nErr ); + if( sqlite3HashInsert(pHash, RETURNING_TRIGGER_NAME, &pRet->retTrig) + ==&pRet->retTrig ){ + sqlite3OomFault(db); + } +} + +/* +** Add a new column to the table currently being constructed. +** +** The parser calls this routine once for each column declaration +** in a CREATE TABLE statement. sqlite3StartTable() gets called +** first to get things going. Then this routine is called for each +** column. +*/ +SQLITE_PRIVATE void sqlite3AddColumn(Parse *pParse, Token sName, Token sType){ + Table *p; + int i; + char *z; + char *zType; + Column *pCol; + sqlite3 *db = pParse->db; + u8 hName; + Column *aNew; + u8 eType = COLTYPE_CUSTOM; + u8 szEst = 1; + char affinity = SQLITE_AFF_BLOB; + + if( (p = pParse->pNewTable)==0 ) return; + if( p->nCol+1>db->aLimit[SQLITE_LIMIT_COLUMN] ){ + sqlite3ErrorMsg(pParse, "too many columns on %s", p->zName); + return; + } + if( !IN_RENAME_OBJECT ) sqlite3DequoteToken(&sName); + + /* Because keywords GENERATE ALWAYS can be converted into indentifiers + ** by the parser, we can sometimes end up with a typename that ends + ** with "generated always". Check for this case and omit the surplus + ** text. */ + if( sType.n>=16 + && sqlite3_strnicmp(sType.z+(sType.n-6),"always",6)==0 + ){ + sType.n -= 6; + while( ALWAYS(sType.n>0) && sqlite3Isspace(sType.z[sType.n-1]) ) sType.n--; + if( sType.n>=9 + && sqlite3_strnicmp(sType.z+(sType.n-9),"generated",9)==0 + ){ + sType.n -= 9; + while( sType.n>0 && sqlite3Isspace(sType.z[sType.n-1]) ) sType.n--; + } + } + + /* Check for standard typenames. For standard typenames we will + ** set the Column.eType field rather than storing the typename after + ** the column name, in order to save space. */ + if( sType.n>=3 ){ + sqlite3DequoteToken(&sType); + for(i=0; i0) ); + if( z==0 ) return; + if( IN_RENAME_OBJECT ) sqlite3RenameTokenMap(pParse, (void*)z, &sName); + memcpy(z, sName.z, sName.n); + z[sName.n] = 0; + sqlite3Dequote(z); + hName = sqlite3StrIHash(z); + for(i=0; inCol; i++){ + if( p->aCol[i].hName==hName && sqlite3StrICmp(z, p->aCol[i].zCnName)==0 ){ + sqlite3ErrorMsg(pParse, "duplicate column name: %s", z); + sqlite3DbFree(db, z); + return; + } + } + aNew = sqlite3DbRealloc(db,p->aCol,((i64)p->nCol+1)*sizeof(p->aCol[0])); + if( aNew==0 ){ + sqlite3DbFree(db, z); + return; + } + p->aCol = aNew; + pCol = &p->aCol[p->nCol]; + memset(pCol, 0, sizeof(p->aCol[0])); + pCol->zCnName = z; + pCol->hName = hName; + sqlite3ColumnPropertiesFromName(p, pCol); + + if( sType.n==0 ){ + /* If there is no type specified, columns have the default affinity + ** 'BLOB' with a default size of 4 bytes. */ + pCol->affinity = affinity; + pCol->eCType = eType; + pCol->szEst = szEst; +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + if( affinity==SQLITE_AFF_BLOB ){ + if( 4>=sqlite3GlobalConfig.szSorterRef ){ + pCol->colFlags |= COLFLAG_SORTERREF; + } + } +#endif + }else{ + zType = z + sqlite3Strlen30(z) + 1; + memcpy(zType, sType.z, sType.n); + zType[sType.n] = 0; + sqlite3Dequote(zType); + pCol->affinity = sqlite3AffinityType(zType, pCol); + pCol->colFlags |= COLFLAG_HASTYPE; + } + p->nCol++; + p->nNVCol++; + pParse->constraintName.n = 0; +} + +/* +** This routine is called by the parser while in the middle of +** parsing a CREATE TABLE statement. A "NOT NULL" constraint has +** been seen on a column. This routine sets the notNull flag on +** the column currently under construction. +*/ +SQLITE_PRIVATE void sqlite3AddNotNull(Parse *pParse, int onError){ + Table *p; + Column *pCol; + p = pParse->pNewTable; + if( p==0 || NEVER(p->nCol<1) ) return; + pCol = &p->aCol[p->nCol-1]; + pCol->notNull = (u8)onError; + p->tabFlags |= TF_HasNotNull; + + /* Set the uniqNotNull flag on any UNIQUE or PK indexes already created + ** on this column. */ + if( pCol->colFlags & COLFLAG_UNIQUE ){ + Index *pIdx; + for(pIdx=p->pIndex; pIdx; pIdx=pIdx->pNext){ + assert( pIdx->nKeyCol==1 && pIdx->onError!=OE_None ); + if( pIdx->aiColumn[0]==p->nCol-1 ){ + pIdx->uniqNotNull = 1; + } + } + } +} + +/* +** Scan the column type name zType (length nType) and return the +** associated affinity type. +** +** This routine does a case-independent search of zType for the +** substrings in the following table. If one of the substrings is +** found, the corresponding affinity is returned. If zType contains +** more than one of the substrings, entries toward the top of +** the table take priority. For example, if zType is 'BLOBINT', +** SQLITE_AFF_INTEGER is returned. +** +** Substring | Affinity +** -------------------------------- +** 'INT' | SQLITE_AFF_INTEGER +** 'CHAR' | SQLITE_AFF_TEXT +** 'CLOB' | SQLITE_AFF_TEXT +** 'TEXT' | SQLITE_AFF_TEXT +** 'BLOB' | SQLITE_AFF_BLOB +** 'REAL' | SQLITE_AFF_REAL +** 'FLOA' | SQLITE_AFF_REAL +** 'DOUB' | SQLITE_AFF_REAL +** +** If none of the substrings in the above table are found, +** SQLITE_AFF_NUMERIC is returned. +*/ +SQLITE_PRIVATE char sqlite3AffinityType(const char *zIn, Column *pCol){ + u32 h = 0; + char aff = SQLITE_AFF_NUMERIC; + const char *zChar = 0; + + assert( zIn!=0 ); + while( zIn[0] ){ + h = (h<<8) + sqlite3UpperToLower[(*zIn)&0xff]; + zIn++; + if( h==(('c'<<24)+('h'<<16)+('a'<<8)+'r') ){ /* CHAR */ + aff = SQLITE_AFF_TEXT; + zChar = zIn; + }else if( h==(('c'<<24)+('l'<<16)+('o'<<8)+'b') ){ /* CLOB */ + aff = SQLITE_AFF_TEXT; + }else if( h==(('t'<<24)+('e'<<16)+('x'<<8)+'t') ){ /* TEXT */ + aff = SQLITE_AFF_TEXT; + }else if( h==(('b'<<24)+('l'<<16)+('o'<<8)+'b') /* BLOB */ + && (aff==SQLITE_AFF_NUMERIC || aff==SQLITE_AFF_REAL) ){ + aff = SQLITE_AFF_BLOB; + if( zIn[0]=='(' ) zChar = zIn; +#ifndef SQLITE_OMIT_FLOATING_POINT + }else if( h==(('r'<<24)+('e'<<16)+('a'<<8)+'l') /* REAL */ + && aff==SQLITE_AFF_NUMERIC ){ + aff = SQLITE_AFF_REAL; + }else if( h==(('f'<<24)+('l'<<16)+('o'<<8)+'a') /* FLOA */ + && aff==SQLITE_AFF_NUMERIC ){ + aff = SQLITE_AFF_REAL; + }else if( h==(('d'<<24)+('o'<<16)+('u'<<8)+'b') /* DOUB */ + && aff==SQLITE_AFF_NUMERIC ){ + aff = SQLITE_AFF_REAL; +#endif + }else if( (h&0x00FFFFFF)==(('i'<<16)+('n'<<8)+'t') ){ /* INT */ + aff = SQLITE_AFF_INTEGER; + break; + } + } + + /* If pCol is not NULL, store an estimate of the field size. The + ** estimate is scaled so that the size of an integer is 1. */ + if( pCol ){ + int v = 0; /* default size is approx 4 bytes */ + if( aff r=(k/4+1) */ + sqlite3GetInt32(zChar, &v); + break; + } + zChar++; + } + }else{ + v = 16; /* BLOB, TEXT, CLOB -> r=5 (approx 20 bytes)*/ + } + } +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + if( v>=sqlite3GlobalConfig.szSorterRef ){ + pCol->colFlags |= COLFLAG_SORTERREF; + } +#endif + v = v/4 + 1; + if( v>255 ) v = 255; + pCol->szEst = v; + } + return aff; +} + +/* +** The expression is the default value for the most recently added column +** of the table currently under construction. +** +** Default value expressions must be constant. Raise an exception if this +** is not the case. +** +** This routine is called by the parser while in the middle of +** parsing a CREATE TABLE statement. +*/ +SQLITE_PRIVATE void sqlite3AddDefaultValue( + Parse *pParse, /* Parsing context */ + Expr *pExpr, /* The parsed expression of the default value */ + const char *zStart, /* Start of the default value text */ + const char *zEnd /* First character past end of defaut value text */ +){ + Table *p; + Column *pCol; + sqlite3 *db = pParse->db; + p = pParse->pNewTable; + if( p!=0 ){ + int isInit = db->init.busy && db->init.iDb!=1; + pCol = &(p->aCol[p->nCol-1]); + if( !sqlite3ExprIsConstantOrFunction(pExpr, isInit) ){ + sqlite3ErrorMsg(pParse, "default value of column [%s] is not constant", + pCol->zCnName); +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + }else if( pCol->colFlags & COLFLAG_GENERATED ){ + testcase( pCol->colFlags & COLFLAG_VIRTUAL ); + testcase( pCol->colFlags & COLFLAG_STORED ); + sqlite3ErrorMsg(pParse, "cannot use DEFAULT on a generated column"); +#endif + }else{ + /* A copy of pExpr is used instead of the original, as pExpr contains + ** tokens that point to volatile memory. + */ + Expr x, *pDfltExpr; + memset(&x, 0, sizeof(x)); + x.op = TK_SPAN; + x.u.zToken = sqlite3DbSpanDup(db, zStart, zEnd); + x.pLeft = pExpr; + x.flags = EP_Skip; + pDfltExpr = sqlite3ExprDup(db, &x, EXPRDUP_REDUCE); + sqlite3DbFree(db, x.u.zToken); + sqlite3ColumnSetExpr(pParse, p, pCol, pDfltExpr); + } + } + if( IN_RENAME_OBJECT ){ + sqlite3RenameExprUnmap(pParse, pExpr); + } + sqlite3ExprDelete(db, pExpr); +} + +/* +** Backwards Compatibility Hack: +** +** Historical versions of SQLite accepted strings as column names in +** indexes and PRIMARY KEY constraints and in UNIQUE constraints. Example: +** +** CREATE TABLE xyz(a,b,c,d,e,PRIMARY KEY('a'),UNIQUE('b','c' COLLATE trim) +** CREATE INDEX abc ON xyz('c','d' DESC,'e' COLLATE nocase DESC); +** +** This is goofy. But to preserve backwards compatibility we continue to +** accept it. This routine does the necessary conversion. It converts +** the expression given in its argument from a TK_STRING into a TK_ID +** if the expression is just a TK_STRING with an optional COLLATE clause. +** If the expression is anything other than TK_STRING, the expression is +** unchanged. +*/ +static void sqlite3StringToId(Expr *p){ + if( p->op==TK_STRING ){ + p->op = TK_ID; + }else if( p->op==TK_COLLATE && p->pLeft->op==TK_STRING ){ + p->pLeft->op = TK_ID; + } +} + +/* +** Tag the given column as being part of the PRIMARY KEY +*/ +static void makeColumnPartOfPrimaryKey(Parse *pParse, Column *pCol){ + pCol->colFlags |= COLFLAG_PRIMKEY; +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( pCol->colFlags & COLFLAG_GENERATED ){ + testcase( pCol->colFlags & COLFLAG_VIRTUAL ); + testcase( pCol->colFlags & COLFLAG_STORED ); + sqlite3ErrorMsg(pParse, + "generated columns cannot be part of the PRIMARY KEY"); + } +#endif +} + +/* +** Designate the PRIMARY KEY for the table. pList is a list of names +** of columns that form the primary key. If pList is NULL, then the +** most recently added column of the table is the primary key. +** +** A table can have at most one primary key. If the table already has +** a primary key (and this is the second primary key) then create an +** error. +** +** If the PRIMARY KEY is on a single column whose datatype is INTEGER, +** then we will try to use that column as the rowid. Set the Table.iPKey +** field of the table under construction to be the index of the +** INTEGER PRIMARY KEY column. Table.iPKey is set to -1 if there is +** no INTEGER PRIMARY KEY. +** +** If the key is not an INTEGER PRIMARY KEY, then create a unique +** index for the key. No index is created for INTEGER PRIMARY KEYs. +*/ +SQLITE_PRIVATE void sqlite3AddPrimaryKey( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* List of field names to be indexed */ + int onError, /* What to do with a uniqueness conflict */ + int autoInc, /* True if the AUTOINCREMENT keyword is present */ + int sortOrder /* SQLITE_SO_ASC or SQLITE_SO_DESC */ +){ + Table *pTab = pParse->pNewTable; + Column *pCol = 0; + int iCol = -1, i; + int nTerm; + if( pTab==0 ) goto primary_key_exit; + if( pTab->tabFlags & TF_HasPrimaryKey ){ + sqlite3ErrorMsg(pParse, + "table \"%s\" has more than one primary key", pTab->zName); + goto primary_key_exit; + } + pTab->tabFlags |= TF_HasPrimaryKey; + if( pList==0 ){ + iCol = pTab->nCol - 1; + pCol = &pTab->aCol[iCol]; + makeColumnPartOfPrimaryKey(pParse, pCol); + nTerm = 1; + }else{ + nTerm = pList->nExpr; + for(i=0; ia[i].pExpr); + assert( pCExpr!=0 ); + sqlite3StringToId(pCExpr); + if( pCExpr->op==TK_ID ){ + const char *zCName; + assert( !ExprHasProperty(pCExpr, EP_IntValue) ); + zCName = pCExpr->u.zToken; + for(iCol=0; iColnCol; iCol++){ + if( sqlite3StrICmp(zCName, pTab->aCol[iCol].zCnName)==0 ){ + pCol = &pTab->aCol[iCol]; + makeColumnPartOfPrimaryKey(pParse, pCol); + break; + } + } + } + } + } + if( nTerm==1 + && pCol + && pCol->eCType==COLTYPE_INTEGER + && sortOrder!=SQLITE_SO_DESC + ){ + if( IN_RENAME_OBJECT && pList ){ + Expr *pCExpr = sqlite3ExprSkipCollate(pList->a[0].pExpr); + sqlite3RenameTokenRemap(pParse, &pTab->iPKey, pCExpr); + } + pTab->iPKey = iCol; + pTab->keyConf = (u8)onError; + assert( autoInc==0 || autoInc==1 ); + pTab->tabFlags |= autoInc*TF_Autoincrement; + if( pList ) pParse->iPkSortOrder = pList->a[0].sortFlags; + (void)sqlite3HasExplicitNulls(pParse, pList); + }else if( autoInc ){ +#ifndef SQLITE_OMIT_AUTOINCREMENT + sqlite3ErrorMsg(pParse, "AUTOINCREMENT is only allowed on an " + "INTEGER PRIMARY KEY"); +#endif + }else{ + sqlite3CreateIndex(pParse, 0, 0, 0, pList, onError, 0, + 0, sortOrder, 0, SQLITE_IDXTYPE_PRIMARYKEY); + pList = 0; + } + +primary_key_exit: + sqlite3ExprListDelete(pParse->db, pList); + return; +} + +/* +** Add a new CHECK constraint to the table currently under construction. +*/ +SQLITE_PRIVATE void sqlite3AddCheckConstraint( + Parse *pParse, /* Parsing context */ + Expr *pCheckExpr, /* The check expression */ + const char *zStart, /* Opening "(" */ + const char *zEnd /* Closing ")" */ +){ +#ifndef SQLITE_OMIT_CHECK + Table *pTab = pParse->pNewTable; + sqlite3 *db = pParse->db; + if( pTab && !IN_DECLARE_VTAB + && !sqlite3BtreeIsReadonly(db->aDb[db->init.iDb].pBt) + ){ + pTab->pCheck = sqlite3ExprListAppend(pParse, pTab->pCheck, pCheckExpr); + if( pParse->constraintName.n ){ + sqlite3ExprListSetName(pParse, pTab->pCheck, &pParse->constraintName, 1); + }else{ + Token t; + for(zStart++; sqlite3Isspace(zStart[0]); zStart++){} + while( sqlite3Isspace(zEnd[-1]) ){ zEnd--; } + t.z = zStart; + t.n = (int)(zEnd - t.z); + sqlite3ExprListSetName(pParse, pTab->pCheck, &t, 1); + } + }else +#endif + { + sqlite3ExprDelete(pParse->db, pCheckExpr); + } +} + +/* +** Set the collation function of the most recently parsed table column +** to the CollSeq given. +*/ +SQLITE_PRIVATE void sqlite3AddCollateType(Parse *pParse, Token *pToken){ + Table *p; + int i; + char *zColl; /* Dequoted name of collation sequence */ + sqlite3 *db; + + if( (p = pParse->pNewTable)==0 || IN_RENAME_OBJECT ) return; + i = p->nCol-1; + db = pParse->db; + zColl = sqlite3NameFromToken(db, pToken); + if( !zColl ) return; + + if( sqlite3LocateCollSeq(pParse, zColl) ){ + Index *pIdx; + sqlite3ColumnSetColl(db, &p->aCol[i], zColl); + + /* If the column is declared as " PRIMARY KEY COLLATE ", + ** then an index may have been created on this column before the + ** collation type was added. Correct this if it is the case. + */ + for(pIdx=p->pIndex; pIdx; pIdx=pIdx->pNext){ + assert( pIdx->nKeyCol==1 ); + if( pIdx->aiColumn[0]==i ){ + pIdx->azColl[0] = sqlite3ColumnColl(&p->aCol[i]); + } + } + } + sqlite3DbFree(db, zColl); +} + +/* Change the most recently parsed column to be a GENERATED ALWAYS AS +** column. +*/ +SQLITE_PRIVATE void sqlite3AddGenerated(Parse *pParse, Expr *pExpr, Token *pType){ +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + u8 eType = COLFLAG_VIRTUAL; + Table *pTab = pParse->pNewTable; + Column *pCol; + if( pTab==0 ){ + /* generated column in an CREATE TABLE IF NOT EXISTS that already exists */ + goto generated_done; + } + pCol = &(pTab->aCol[pTab->nCol-1]); + if( IN_DECLARE_VTAB ){ + sqlite3ErrorMsg(pParse, "virtual tables cannot use computed columns"); + goto generated_done; + } + if( pCol->iDflt>0 ) goto generated_error; + if( pType ){ + if( pType->n==7 && sqlite3StrNICmp("virtual",pType->z,7)==0 ){ + /* no-op */ + }else if( pType->n==6 && sqlite3StrNICmp("stored",pType->z,6)==0 ){ + eType = COLFLAG_STORED; + }else{ + goto generated_error; + } + } + if( eType==COLFLAG_VIRTUAL ) pTab->nNVCol--; + pCol->colFlags |= eType; + assert( TF_HasVirtual==COLFLAG_VIRTUAL ); + assert( TF_HasStored==COLFLAG_STORED ); + pTab->tabFlags |= eType; + if( pCol->colFlags & COLFLAG_PRIMKEY ){ + makeColumnPartOfPrimaryKey(pParse, pCol); /* For the error message */ + } + sqlite3ColumnSetExpr(pParse, pTab, pCol, pExpr); + pExpr = 0; + goto generated_done; + +generated_error: + sqlite3ErrorMsg(pParse, "error in generated column \"%s\"", + pCol->zCnName); +generated_done: + sqlite3ExprDelete(pParse->db, pExpr); +#else + /* Throw and error for the GENERATED ALWAYS AS clause if the + ** SQLITE_OMIT_GENERATED_COLUMNS compile-time option is used. */ + sqlite3ErrorMsg(pParse, "generated columns not supported"); + sqlite3ExprDelete(pParse->db, pExpr); +#endif +} + +/* +** Generate code that will increment the schema cookie. +** +** The schema cookie is used to determine when the schema for the +** database changes. After each schema change, the cookie value +** changes. When a process first reads the schema it records the +** cookie. Thereafter, whenever it goes to access the database, +** it checks the cookie to make sure the schema has not changed +** since it was last read. +** +** This plan is not completely bullet-proof. It is possible for +** the schema to change multiple times and for the cookie to be +** set back to prior value. But schema changes are infrequent +** and the probability of hitting the same cookie value is only +** 1 chance in 2^32. So we're safe enough. +** +** IMPLEMENTATION-OF: R-34230-56049 SQLite automatically increments +** the schema-version whenever the schema changes. +*/ +SQLITE_PRIVATE void sqlite3ChangeCookie(Parse *pParse, int iDb){ + sqlite3 *db = pParse->db; + Vdbe *v = pParse->pVdbe; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_SCHEMA_VERSION, + (int)(1+(unsigned)db->aDb[iDb].pSchema->schema_cookie)); +} + +/* +** Measure the number of characters needed to output the given +** identifier. The number returned includes any quotes used +** but does not include the null terminator. +** +** The estimate is conservative. It might be larger that what is +** really needed. +*/ +static int identLength(const char *z){ + int n; + for(n=0; *z; n++, z++){ + if( *z=='"' ){ n++; } + } + return n + 2; +} + +/* +** The first parameter is a pointer to an output buffer. The second +** parameter is a pointer to an integer that contains the offset at +** which to write into the output buffer. This function copies the +** nul-terminated string pointed to by the third parameter, zSignedIdent, +** to the specified offset in the buffer and updates *pIdx to refer +** to the first byte after the last byte written before returning. +** +** If the string zSignedIdent consists entirely of alpha-numeric +** characters, does not begin with a digit and is not an SQL keyword, +** then it is copied to the output buffer exactly as it is. Otherwise, +** it is quoted using double-quotes. +*/ +static void identPut(char *z, int *pIdx, char *zSignedIdent){ + unsigned char *zIdent = (unsigned char*)zSignedIdent; + int i, j, needQuote; + i = *pIdx; + + for(j=0; zIdent[j]; j++){ + if( !sqlite3Isalnum(zIdent[j]) && zIdent[j]!='_' ) break; + } + needQuote = sqlite3Isdigit(zIdent[0]) + || sqlite3KeywordCode(zIdent, j)!=TK_ID + || zIdent[j]!=0 + || j==0; + + if( needQuote ) z[i++] = '"'; + for(j=0; zIdent[j]; j++){ + z[i++] = zIdent[j]; + if( zIdent[j]=='"' ) z[i++] = '"'; + } + if( needQuote ) z[i++] = '"'; + z[i] = 0; + *pIdx = i; +} + +/* +** Generate a CREATE TABLE statement appropriate for the given +** table. Memory to hold the text of the statement is obtained +** from sqliteMalloc() and must be freed by the calling function. +*/ +static char *createTableStmt(sqlite3 *db, Table *p){ + int i, k, n; + char *zStmt; + char *zSep, *zSep2, *zEnd; + Column *pCol; + n = 0; + for(pCol = p->aCol, i=0; inCol; i++, pCol++){ + n += identLength(pCol->zCnName) + 5; + } + n += identLength(p->zName); + if( n<50 ){ + zSep = ""; + zSep2 = ","; + zEnd = ")"; + }else{ + zSep = "\n "; + zSep2 = ",\n "; + zEnd = "\n)"; + } + n += 35 + 6*p->nCol; + zStmt = sqlite3DbMallocRaw(0, n); + if( zStmt==0 ){ + sqlite3OomFault(db); + return 0; + } + sqlite3_snprintf(n, zStmt, "CREATE TABLE "); + k = sqlite3Strlen30(zStmt); + identPut(zStmt, &k, p->zName); + zStmt[k++] = '('; + for(pCol=p->aCol, i=0; inCol; i++, pCol++){ + static const char * const azType[] = { + /* SQLITE_AFF_BLOB */ "", + /* SQLITE_AFF_TEXT */ " TEXT", + /* SQLITE_AFF_NUMERIC */ " NUM", + /* SQLITE_AFF_INTEGER */ " INT", + /* SQLITE_AFF_REAL */ " REAL" + }; + int len; + const char *zType; + + sqlite3_snprintf(n-k, &zStmt[k], zSep); + k += sqlite3Strlen30(&zStmt[k]); + zSep = zSep2; + identPut(zStmt, &k, pCol->zCnName); + assert( pCol->affinity-SQLITE_AFF_BLOB >= 0 ); + assert( pCol->affinity-SQLITE_AFF_BLOB < ArraySize(azType) ); + testcase( pCol->affinity==SQLITE_AFF_BLOB ); + testcase( pCol->affinity==SQLITE_AFF_TEXT ); + testcase( pCol->affinity==SQLITE_AFF_NUMERIC ); + testcase( pCol->affinity==SQLITE_AFF_INTEGER ); + testcase( pCol->affinity==SQLITE_AFF_REAL ); + + zType = azType[pCol->affinity - SQLITE_AFF_BLOB]; + len = sqlite3Strlen30(zType); + assert( pCol->affinity==SQLITE_AFF_BLOB + || pCol->affinity==sqlite3AffinityType(zType, 0) ); + memcpy(&zStmt[k], zType, len); + k += len; + assert( k<=n ); + } + sqlite3_snprintf(n-k, &zStmt[k], "%s", zEnd); + return zStmt; +} + +/* +** Resize an Index object to hold N columns total. Return SQLITE_OK +** on success and SQLITE_NOMEM on an OOM error. +*/ +static int resizeIndexObject(sqlite3 *db, Index *pIdx, int N){ + char *zExtra; + int nByte; + if( pIdx->nColumn>=N ) return SQLITE_OK; + assert( pIdx->isResized==0 ); + nByte = (sizeof(char*) + sizeof(LogEst) + sizeof(i16) + 1)*N; + zExtra = sqlite3DbMallocZero(db, nByte); + if( zExtra==0 ) return SQLITE_NOMEM_BKPT; + memcpy(zExtra, pIdx->azColl, sizeof(char*)*pIdx->nColumn); + pIdx->azColl = (const char**)zExtra; + zExtra += sizeof(char*)*N; + memcpy(zExtra, pIdx->aiRowLogEst, sizeof(LogEst)*(pIdx->nKeyCol+1)); + pIdx->aiRowLogEst = (LogEst*)zExtra; + zExtra += sizeof(LogEst)*N; + memcpy(zExtra, pIdx->aiColumn, sizeof(i16)*pIdx->nColumn); + pIdx->aiColumn = (i16*)zExtra; + zExtra += sizeof(i16)*N; + memcpy(zExtra, pIdx->aSortOrder, pIdx->nColumn); + pIdx->aSortOrder = (u8*)zExtra; + pIdx->nColumn = N; + pIdx->isResized = 1; + return SQLITE_OK; +} + +/* +** Estimate the total row width for a table. +*/ +static void estimateTableWidth(Table *pTab){ + unsigned wTable = 0; + const Column *pTabCol; + int i; + for(i=pTab->nCol, pTabCol=pTab->aCol; i>0; i--, pTabCol++){ + wTable += pTabCol->szEst; + } + if( pTab->iPKey<0 ) wTable++; + pTab->szTabRow = sqlite3LogEst(wTable*4); +} + +/* +** Estimate the average size of a row for an index. +*/ +static void estimateIndexWidth(Index *pIdx){ + unsigned wIndex = 0; + int i; + const Column *aCol = pIdx->pTable->aCol; + for(i=0; inColumn; i++){ + i16 x = pIdx->aiColumn[i]; + assert( xpTable->nCol ); + wIndex += x<0 ? 1 : aCol[pIdx->aiColumn[i]].szEst; + } + pIdx->szIdxRow = sqlite3LogEst(wIndex*4); +} + +/* Return true if column number x is any of the first nCol entries of aiCol[]. +** This is used to determine if the column number x appears in any of the +** first nCol entries of an index. +*/ +static int hasColumn(const i16 *aiCol, int nCol, int x){ + while( nCol-- > 0 ){ + if( x==*(aiCol++) ){ + return 1; + } + } + return 0; +} + +/* +** Return true if any of the first nKey entries of index pIdx exactly +** match the iCol-th entry of pPk. pPk is always a WITHOUT ROWID +** PRIMARY KEY index. pIdx is an index on the same table. pIdx may +** or may not be the same index as pPk. +** +** The first nKey entries of pIdx are guaranteed to be ordinary columns, +** not a rowid or expression. +** +** This routine differs from hasColumn() in that both the column and the +** collating sequence must match for this routine, but for hasColumn() only +** the column name must match. +*/ +static int isDupColumn(Index *pIdx, int nKey, Index *pPk, int iCol){ + int i, j; + assert( nKey<=pIdx->nColumn ); + assert( iColnColumn,pPk->nKeyCol) ); + assert( pPk->idxType==SQLITE_IDXTYPE_PRIMARYKEY ); + assert( pPk->pTable->tabFlags & TF_WithoutRowid ); + assert( pPk->pTable==pIdx->pTable ); + testcase( pPk==pIdx ); + j = pPk->aiColumn[iCol]; + assert( j!=XN_ROWID && j!=XN_EXPR ); + for(i=0; iaiColumn[i]>=0 || j>=0 ); + if( pIdx->aiColumn[i]==j + && sqlite3StrICmp(pIdx->azColl[i], pPk->azColl[iCol])==0 + ){ + return 1; + } + } + return 0; +} + +/* Recompute the colNotIdxed field of the Index. +** +** colNotIdxed is a bitmask that has a 0 bit representing each indexed +** columns that are within the first 63 columns of the table. The +** high-order bit of colNotIdxed is always 1. All unindexed columns +** of the table have a 1. +** +** 2019-10-24: For the purpose of this computation, virtual columns are +** not considered to be covered by the index, even if they are in the +** index, because we do not trust the logic in whereIndexExprTrans() to be +** able to find all instances of a reference to the indexed table column +** and convert them into references to the index. Hence we always want +** the actual table at hand in order to recompute the virtual column, if +** necessary. +** +** The colNotIdxed mask is AND-ed with the SrcList.a[].colUsed mask +** to determine if the index is covering index. +*/ +static void recomputeColumnsNotIndexed(Index *pIdx){ + Bitmask m = 0; + int j; + Table *pTab = pIdx->pTable; + for(j=pIdx->nColumn-1; j>=0; j--){ + int x = pIdx->aiColumn[j]; + if( x>=0 && (pTab->aCol[x].colFlags & COLFLAG_VIRTUAL)==0 ){ + testcase( x==BMS-1 ); + testcase( x==BMS-2 ); + if( xcolNotIdxed = ~m; + assert( (pIdx->colNotIdxed>>63)==1 ); +} + +/* +** This routine runs at the end of parsing a CREATE TABLE statement that +** has a WITHOUT ROWID clause. The job of this routine is to convert both +** internal schema data structures and the generated VDBE code so that they +** are appropriate for a WITHOUT ROWID table instead of a rowid table. +** Changes include: +** +** (1) Set all columns of the PRIMARY KEY schema object to be NOT NULL. +** (2) Convert P3 parameter of the OP_CreateBtree from BTREE_INTKEY +** into BTREE_BLOBKEY. +** (3) Bypass the creation of the sqlite_schema table entry +** for the PRIMARY KEY as the primary key index is now +** identified by the sqlite_schema table entry of the table itself. +** (4) Set the Index.tnum of the PRIMARY KEY Index object in the +** schema to the rootpage from the main table. +** (5) Add all table columns to the PRIMARY KEY Index object +** so that the PRIMARY KEY is a covering index. The surplus +** columns are part of KeyInfo.nAllField and are not used for +** sorting or lookup or uniqueness checks. +** (6) Replace the rowid tail on all automatically generated UNIQUE +** indices with the PRIMARY KEY columns. +** +** For virtual tables, only (1) is performed. +*/ +static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){ + Index *pIdx; + Index *pPk; + int nPk; + int nExtra; + int i, j; + sqlite3 *db = pParse->db; + Vdbe *v = pParse->pVdbe; + + /* Mark every PRIMARY KEY column as NOT NULL (except for imposter tables) + */ + if( !db->init.imposterTable ){ + for(i=0; inCol; i++){ + if( (pTab->aCol[i].colFlags & COLFLAG_PRIMKEY)!=0 + && (pTab->aCol[i].notNull==OE_None) + ){ + pTab->aCol[i].notNull = OE_Abort; + } + } + pTab->tabFlags |= TF_HasNotNull; + } + + /* Convert the P3 operand of the OP_CreateBtree opcode from BTREE_INTKEY + ** into BTREE_BLOBKEY. + */ + assert( !pParse->bReturning ); + if( pParse->u1.addrCrTab ){ + assert( v ); + sqlite3VdbeChangeP3(v, pParse->u1.addrCrTab, BTREE_BLOBKEY); + } + + /* Locate the PRIMARY KEY index. Or, if this table was originally + ** an INTEGER PRIMARY KEY table, create a new PRIMARY KEY index. + */ + if( pTab->iPKey>=0 ){ + ExprList *pList; + Token ipkToken; + sqlite3TokenInit(&ipkToken, pTab->aCol[pTab->iPKey].zCnName); + pList = sqlite3ExprListAppend(pParse, 0, + sqlite3ExprAlloc(db, TK_ID, &ipkToken, 0)); + if( pList==0 ){ + pTab->tabFlags &= ~TF_WithoutRowid; + return; + } + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, pList->a[0].pExpr, &pTab->iPKey); + } + pList->a[0].sortFlags = pParse->iPkSortOrder; + assert( pParse->pNewTable==pTab ); + pTab->iPKey = -1; + sqlite3CreateIndex(pParse, 0, 0, 0, pList, pTab->keyConf, 0, 0, 0, 0, + SQLITE_IDXTYPE_PRIMARYKEY); + if( pParse->nErr ){ + pTab->tabFlags &= ~TF_WithoutRowid; + return; + } + assert( db->mallocFailed==0 ); + pPk = sqlite3PrimaryKeyIndex(pTab); + assert( pPk->nKeyCol==1 ); + }else{ + pPk = sqlite3PrimaryKeyIndex(pTab); + assert( pPk!=0 ); + + /* + ** Remove all redundant columns from the PRIMARY KEY. For example, change + ** "PRIMARY KEY(a,b,a,b,c,b,c,d)" into just "PRIMARY KEY(a,b,c,d)". Later + ** code assumes the PRIMARY KEY contains no repeated columns. + */ + for(i=j=1; inKeyCol; i++){ + if( isDupColumn(pPk, j, pPk, i) ){ + pPk->nColumn--; + }else{ + testcase( hasColumn(pPk->aiColumn, j, pPk->aiColumn[i]) ); + pPk->azColl[j] = pPk->azColl[i]; + pPk->aSortOrder[j] = pPk->aSortOrder[i]; + pPk->aiColumn[j++] = pPk->aiColumn[i]; + } + } + pPk->nKeyCol = j; + } + assert( pPk!=0 ); + pPk->isCovering = 1; + if( !db->init.imposterTable ) pPk->uniqNotNull = 1; + nPk = pPk->nColumn = pPk->nKeyCol; + + /* Bypass the creation of the PRIMARY KEY btree and the sqlite_schema + ** table entry. This is only required if currently generating VDBE + ** code for a CREATE TABLE (not when parsing one as part of reading + ** a database schema). */ + if( v && pPk->tnum>0 ){ + assert( db->init.busy==0 ); + sqlite3VdbeChangeOpcode(v, (int)pPk->tnum, OP_Goto); + } + + /* The root page of the PRIMARY KEY is the table root page */ + pPk->tnum = pTab->tnum; + + /* Update the in-memory representation of all UNIQUE indices by converting + ** the final rowid column into one or more columns of the PRIMARY KEY. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int n; + if( IsPrimaryKeyIndex(pIdx) ) continue; + for(i=n=0; inKeyCol, pPk, i) ){ + testcase( hasColumn(pIdx->aiColumn, pIdx->nKeyCol, pPk->aiColumn[i]) ); + n++; + } + } + if( n==0 ){ + /* This index is a superset of the primary key */ + pIdx->nColumn = pIdx->nKeyCol; + continue; + } + if( resizeIndexObject(db, pIdx, pIdx->nKeyCol+n) ) return; + for(i=0, j=pIdx->nKeyCol; inKeyCol, pPk, i) ){ + testcase( hasColumn(pIdx->aiColumn, pIdx->nKeyCol, pPk->aiColumn[i]) ); + pIdx->aiColumn[j] = pPk->aiColumn[i]; + pIdx->azColl[j] = pPk->azColl[i]; + if( pPk->aSortOrder[i] ){ + /* See ticket https://www.sqlite.org/src/info/bba7b69f9849b5bf */ + pIdx->bAscKeyBug = 1; + } + j++; + } + } + assert( pIdx->nColumn>=pIdx->nKeyCol+n ); + assert( pIdx->nColumn>=j ); + } + + /* Add all table columns to the PRIMARY KEY index + */ + nExtra = 0; + for(i=0; inCol; i++){ + if( !hasColumn(pPk->aiColumn, nPk, i) + && (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ) nExtra++; + } + if( resizeIndexObject(db, pPk, nPk+nExtra) ) return; + for(i=0, j=nPk; inCol; i++){ + if( !hasColumn(pPk->aiColumn, j, i) + && (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 + ){ + assert( jnColumn ); + pPk->aiColumn[j] = i; + pPk->azColl[j] = sqlite3StrBINARY; + j++; + } + } + assert( pPk->nColumn==j ); + assert( pTab->nNVCol<=j ); + recomputeColumnsNotIndexed(pPk); +} + + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Return true if pTab is a virtual table and zName is a shadow table name +** for that virtual table. +*/ +SQLITE_PRIVATE int sqlite3IsShadowTableOf(sqlite3 *db, Table *pTab, const char *zName){ + int nName; /* Length of zName */ + Module *pMod; /* Module for the virtual table */ + + if( !IsVirtual(pTab) ) return 0; + nName = sqlite3Strlen30(pTab->zName); + if( sqlite3_strnicmp(zName, pTab->zName, nName)!=0 ) return 0; + if( zName[nName]!='_' ) return 0; + pMod = (Module*)sqlite3HashFind(&db->aModule, pTab->u.vtab.azArg[0]); + if( pMod==0 ) return 0; + if( pMod->pModule->iVersion<3 ) return 0; + if( pMod->pModule->xShadowName==0 ) return 0; + return pMod->pModule->xShadowName(zName+nName+1); +} +#endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Table pTab is a virtual table. If it the virtual table implementation +** exists and has an xShadowName method, then loop over all other ordinary +** tables within the same schema looking for shadow tables of pTab, and mark +** any shadow tables seen using the TF_Shadow flag. +*/ +SQLITE_PRIVATE void sqlite3MarkAllShadowTablesOf(sqlite3 *db, Table *pTab){ + int nName; /* Length of pTab->zName */ + Module *pMod; /* Module for the virtual table */ + HashElem *k; /* For looping through the symbol table */ + + assert( IsVirtual(pTab) ); + pMod = (Module*)sqlite3HashFind(&db->aModule, pTab->u.vtab.azArg[0]); + if( pMod==0 ) return; + if( NEVER(pMod->pModule==0) ) return; + if( pMod->pModule->iVersion<3 ) return; + if( pMod->pModule->xShadowName==0 ) return; + assert( pTab->zName!=0 ); + nName = sqlite3Strlen30(pTab->zName); + for(k=sqliteHashFirst(&pTab->pSchema->tblHash); k; k=sqliteHashNext(k)){ + Table *pOther = sqliteHashData(k); + assert( pOther->zName!=0 ); + if( !IsOrdinaryTable(pOther) ) continue; + if( pOther->tabFlags & TF_Shadow ) continue; + if( sqlite3StrNICmp(pOther->zName, pTab->zName, nName)==0 + && pOther->zName[nName]=='_' + && pMod->pModule->xShadowName(pOther->zName+nName+1) + ){ + pOther->tabFlags |= TF_Shadow; + } + } +} +#endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Return true if zName is a shadow table name in the current database +** connection. +** +** zName is temporarily modified while this routine is running, but is +** restored to its original value prior to this routine returning. +*/ +SQLITE_PRIVATE int sqlite3ShadowTableName(sqlite3 *db, const char *zName){ + char *zTail; /* Pointer to the last "_" in zName */ + Table *pTab; /* Table that zName is a shadow of */ + zTail = strrchr(zName, '_'); + if( zTail==0 ) return 0; + *zTail = 0; + pTab = sqlite3FindTable(db, zName, 0); + *zTail = '_'; + if( pTab==0 ) return 0; + if( !IsVirtual(pTab) ) return 0; + return sqlite3IsShadowTableOf(db, pTab, zName); +} +#endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */ + + +#ifdef SQLITE_DEBUG +/* +** Mark all nodes of an expression as EP_Immutable, indicating that +** they should not be changed. Expressions attached to a table or +** index definition are tagged this way to help ensure that we do +** not pass them into code generator routines by mistake. +*/ +static int markImmutableExprStep(Walker *pWalker, Expr *pExpr){ + ExprSetVVAProperty(pExpr, EP_Immutable); + return WRC_Continue; +} +static void markExprListImmutable(ExprList *pList){ + if( pList ){ + Walker w; + memset(&w, 0, sizeof(w)); + w.xExprCallback = markImmutableExprStep; + w.xSelectCallback = sqlite3SelectWalkNoop; + w.xSelectCallback2 = 0; + sqlite3WalkExprList(&w, pList); + } +} +#else +#define markExprListImmutable(X) /* no-op */ +#endif /* SQLITE_DEBUG */ + + +/* +** This routine is called to report the final ")" that terminates +** a CREATE TABLE statement. +** +** The table structure that other action routines have been building +** is added to the internal hash tables, assuming no errors have +** occurred. +** +** An entry for the table is made in the schema table on disk, unless +** this is a temporary table or db->init.busy==1. When db->init.busy==1 +** it means we are reading the sqlite_schema table because we just +** connected to the database or because the sqlite_schema table has +** recently changed, so the entry for this table already exists in +** the sqlite_schema table. We do not want to create it again. +** +** If the pSelect argument is not NULL, it means that this routine +** was called to create a table generated from a +** "CREATE TABLE ... AS SELECT ..." statement. The column names of +** the new table will match the result set of the SELECT. +*/ +SQLITE_PRIVATE void sqlite3EndTable( + Parse *pParse, /* Parse context */ + Token *pCons, /* The ',' token after the last column defn. */ + Token *pEnd, /* The ')' before options in the CREATE TABLE */ + u32 tabOpts, /* Extra table options. Usually 0. */ + Select *pSelect /* Select from a "CREATE ... AS SELECT" */ +){ + Table *p; /* The new table */ + sqlite3 *db = pParse->db; /* The database connection */ + int iDb; /* Database in which the table lives */ + Index *pIdx; /* An implied index of the table */ + + if( pEnd==0 && pSelect==0 ){ + return; + } + p = pParse->pNewTable; + if( p==0 ) return; + + if( pSelect==0 && sqlite3ShadowTableName(db, p->zName) ){ + p->tabFlags |= TF_Shadow; + } + + /* If the db->init.busy is 1 it means we are reading the SQL off the + ** "sqlite_schema" or "sqlite_temp_schema" table on the disk. + ** So do not write to the disk again. Extract the root page number + ** for the table from the db->init.newTnum field. (The page number + ** should have been put there by the sqliteOpenCb routine.) + ** + ** If the root page number is 1, that means this is the sqlite_schema + ** table itself. So mark it read-only. + */ + if( db->init.busy ){ + if( pSelect || (!IsOrdinaryTable(p) && db->init.newTnum) ){ + sqlite3ErrorMsg(pParse, ""); + return; + } + p->tnum = db->init.newTnum; + if( p->tnum==1 ) p->tabFlags |= TF_Readonly; + } + + /* Special processing for tables that include the STRICT keyword: + ** + ** * Do not allow custom column datatypes. Every column must have + ** a datatype that is one of INT, INTEGER, REAL, TEXT, or BLOB. + ** + ** * If a PRIMARY KEY is defined, other than the INTEGER PRIMARY KEY, + ** then all columns of the PRIMARY KEY must have a NOT NULL + ** constraint. + */ + if( tabOpts & TF_Strict ){ + int ii; + p->tabFlags |= TF_Strict; + for(ii=0; iinCol; ii++){ + Column *pCol = &p->aCol[ii]; + if( pCol->eCType==COLTYPE_CUSTOM ){ + if( pCol->colFlags & COLFLAG_HASTYPE ){ + sqlite3ErrorMsg(pParse, + "unknown datatype for %s.%s: \"%s\"", + p->zName, pCol->zCnName, sqlite3ColumnType(pCol, "") + ); + }else{ + sqlite3ErrorMsg(pParse, "missing datatype for %s.%s", + p->zName, pCol->zCnName); + } + return; + }else if( pCol->eCType==COLTYPE_ANY ){ + pCol->affinity = SQLITE_AFF_BLOB; + } + if( (pCol->colFlags & COLFLAG_PRIMKEY)!=0 + && p->iPKey!=ii + && pCol->notNull == OE_None + ){ + pCol->notNull = OE_Abort; + p->tabFlags |= TF_HasNotNull; + } + } + } + + assert( (p->tabFlags & TF_HasPrimaryKey)==0 + || p->iPKey>=0 || sqlite3PrimaryKeyIndex(p)!=0 ); + assert( (p->tabFlags & TF_HasPrimaryKey)!=0 + || (p->iPKey<0 && sqlite3PrimaryKeyIndex(p)==0) ); + + /* Special processing for WITHOUT ROWID Tables */ + if( tabOpts & TF_WithoutRowid ){ + if( (p->tabFlags & TF_Autoincrement) ){ + sqlite3ErrorMsg(pParse, + "AUTOINCREMENT not allowed on WITHOUT ROWID tables"); + return; + } + if( (p->tabFlags & TF_HasPrimaryKey)==0 ){ + sqlite3ErrorMsg(pParse, "PRIMARY KEY missing on table %s", p->zName); + return; + } + p->tabFlags |= TF_WithoutRowid | TF_NoVisibleRowid; + convertToWithoutRowidTable(pParse, p); + } + iDb = sqlite3SchemaToIndex(db, p->pSchema); + +#ifndef SQLITE_OMIT_CHECK + /* Resolve names in all CHECK constraint expressions. + */ + if( p->pCheck ){ + sqlite3ResolveSelfReference(pParse, p, NC_IsCheck, 0, p->pCheck); + if( pParse->nErr ){ + /* If errors are seen, delete the CHECK constraints now, else they might + ** actually be used if PRAGMA writable_schema=ON is set. */ + sqlite3ExprListDelete(db, p->pCheck); + p->pCheck = 0; + }else{ + markExprListImmutable(p->pCheck); + } + } +#endif /* !defined(SQLITE_OMIT_CHECK) */ +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( p->tabFlags & TF_HasGenerated ){ + int ii, nNG = 0; + testcase( p->tabFlags & TF_HasVirtual ); + testcase( p->tabFlags & TF_HasStored ); + for(ii=0; iinCol; ii++){ + u32 colFlags = p->aCol[ii].colFlags; + if( (colFlags & COLFLAG_GENERATED)!=0 ){ + Expr *pX = sqlite3ColumnExpr(p, &p->aCol[ii]); + testcase( colFlags & COLFLAG_VIRTUAL ); + testcase( colFlags & COLFLAG_STORED ); + if( sqlite3ResolveSelfReference(pParse, p, NC_GenCol, pX, 0) ){ + /* If there are errors in resolving the expression, change the + ** expression to a NULL. This prevents code generators that operate + ** on the expression from inserting extra parts into the expression + ** tree that have been allocated from lookaside memory, which is + ** illegal in a schema and will lead to errors or heap corruption + ** when the database connection closes. */ + sqlite3ColumnSetExpr(pParse, p, &p->aCol[ii], + sqlite3ExprAlloc(db, TK_NULL, 0, 0)); + } + }else{ + nNG++; + } + } + if( nNG==0 ){ + sqlite3ErrorMsg(pParse, "must have at least one non-generated column"); + return; + } + } +#endif + + /* Estimate the average row size for the table and for all implied indices */ + estimateTableWidth(p); + for(pIdx=p->pIndex; pIdx; pIdx=pIdx->pNext){ + estimateIndexWidth(pIdx); + } + + /* If not initializing, then create a record for the new table + ** in the schema table of the database. + ** + ** If this is a TEMPORARY table, write the entry into the auxiliary + ** file instead of into the main database file. + */ + if( !db->init.busy ){ + int n; + Vdbe *v; + char *zType; /* "view" or "table" */ + char *zType2; /* "VIEW" or "TABLE" */ + char *zStmt; /* Text of the CREATE TABLE or CREATE VIEW statement */ + + v = sqlite3GetVdbe(pParse); + if( NEVER(v==0) ) return; + + sqlite3VdbeAddOp1(v, OP_Close, 0); + + /* + ** Initialize zType for the new view or table. + */ + if( IsOrdinaryTable(p) ){ + /* A regular table */ + zType = "table"; + zType2 = "TABLE"; +#ifndef SQLITE_OMIT_VIEW + }else{ + /* A view */ + zType = "view"; + zType2 = "VIEW"; +#endif + } + + /* If this is a CREATE TABLE xx AS SELECT ..., execute the SELECT + ** statement to populate the new table. The root-page number for the + ** new table is in register pParse->regRoot. + ** + ** Once the SELECT has been coded by sqlite3Select(), it is in a + ** suitable state to query for the column names and types to be used + ** by the new table. + ** + ** A shared-cache write-lock is not required to write to the new table, + ** as a schema-lock must have already been obtained to create it. Since + ** a schema-lock excludes all other database users, the write-lock would + ** be redundant. + */ + if( pSelect ){ + SelectDest dest; /* Where the SELECT should store results */ + int regYield; /* Register holding co-routine entry-point */ + int addrTop; /* Top of the co-routine */ + int regRec; /* A record to be insert into the new table */ + int regRowid; /* Rowid of the next row to insert */ + int addrInsLoop; /* Top of the loop for inserting rows */ + Table *pSelTab; /* A table that describes the SELECT results */ + + if( IN_SPECIAL_PARSE ){ + pParse->rc = SQLITE_ERROR; + pParse->nErr++; + return; + } + regYield = ++pParse->nMem; + regRec = ++pParse->nMem; + regRowid = ++pParse->nMem; + assert(pParse->nTab==1); + sqlite3MayAbort(pParse); + sqlite3VdbeAddOp3(v, OP_OpenWrite, 1, pParse->regRoot, iDb); + sqlite3VdbeChangeP5(v, OPFLAG_P2ISREG); + pParse->nTab = 2; + addrTop = sqlite3VdbeCurrentAddr(v) + 1; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, addrTop); + if( pParse->nErr ) return; + pSelTab = sqlite3ResultSetOfSelect(pParse, pSelect, SQLITE_AFF_BLOB); + if( pSelTab==0 ) return; + assert( p->aCol==0 ); + p->nCol = p->nNVCol = pSelTab->nCol; + p->aCol = pSelTab->aCol; + pSelTab->nCol = 0; + pSelTab->aCol = 0; + sqlite3DeleteTable(db, pSelTab); + sqlite3SelectDestInit(&dest, SRT_Coroutine, regYield); + sqlite3Select(pParse, pSelect, &dest); + if( pParse->nErr ) return; + sqlite3VdbeEndCoroutine(v, regYield); + sqlite3VdbeJumpHere(v, addrTop - 1); + addrInsLoop = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_MakeRecord, dest.iSdst, dest.nSdst, regRec); + sqlite3TableAffinity(v, p, 0); + sqlite3VdbeAddOp2(v, OP_NewRowid, 1, regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, 1, regRec, regRowid); + sqlite3VdbeGoto(v, addrInsLoop); + sqlite3VdbeJumpHere(v, addrInsLoop); + sqlite3VdbeAddOp1(v, OP_Close, 1); + } + + /* Compute the complete text of the CREATE statement */ + if( pSelect ){ + zStmt = createTableStmt(db, p); + }else{ + Token *pEnd2 = tabOpts ? &pParse->sLastToken : pEnd; + n = (int)(pEnd2->z - pParse->sNameToken.z); + if( pEnd2->z[0]!=';' ) n += pEnd2->n; + zStmt = sqlite3MPrintf(db, + "CREATE %s %.*s", zType2, n, pParse->sNameToken.z + ); + } + + /* A slot for the record has already been allocated in the + ** schema table. We just need to update that slot with all + ** the information we've collected. + */ + sqlite3NestedParse(pParse, + "UPDATE %Q." LEGACY_SCHEMA_TABLE + " SET type='%s', name=%Q, tbl_name=%Q, rootpage=#%d, sql=%Q" + " WHERE rowid=#%d", + db->aDb[iDb].zDbSName, + zType, + p->zName, + p->zName, + pParse->regRoot, + zStmt, + pParse->regRowid + ); + sqlite3DbFree(db, zStmt); + sqlite3ChangeCookie(pParse, iDb); + +#ifndef SQLITE_OMIT_AUTOINCREMENT + /* Check to see if we need to create an sqlite_sequence table for + ** keeping track of autoincrement keys. + */ + if( (p->tabFlags & TF_Autoincrement)!=0 && !IN_SPECIAL_PARSE ){ + Db *pDb = &db->aDb[iDb]; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( pDb->pSchema->pSeqTab==0 ){ + sqlite3NestedParse(pParse, + "CREATE TABLE %Q.sqlite_sequence(name,seq)", + pDb->zDbSName + ); + } + } +#endif + + /* Reparse everything to update our internal data structures */ + sqlite3VdbeAddParseSchemaOp(v, iDb, + sqlite3MPrintf(db, "tbl_name='%q' AND type!='trigger'", p->zName),0); + } + + /* Add the table to the in-memory representation of the database. + */ + if( db->init.busy ){ + Table *pOld; + Schema *pSchema = p->pSchema; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + assert( HasRowid(p) || p->iPKey<0 ); + pOld = sqlite3HashInsert(&pSchema->tblHash, p->zName, p); + if( pOld ){ + assert( p==pOld ); /* Malloc must have failed inside HashInsert() */ + sqlite3OomFault(db); + return; + } + pParse->pNewTable = 0; + db->mDbFlags |= DBFLAG_SchemaChange; + + /* If this is the magic sqlite_sequence table used by autoincrement, + ** then record a pointer to this table in the main database structure + ** so that INSERT can find the table easily. */ + assert( !pParse->nested ); +#ifndef SQLITE_OMIT_AUTOINCREMENT + if( strcmp(p->zName, "sqlite_sequence")==0 ){ + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + p->pSchema->pSeqTab = p; + } +#endif + } + +#ifndef SQLITE_OMIT_ALTERTABLE + if( !pSelect && IsOrdinaryTable(p) ){ + assert( pCons && pEnd ); + if( pCons->z==0 ){ + pCons = pEnd; + } + p->u.tab.addColOffset = 13 + (int)(pCons->z - pParse->sNameToken.z); + } +#endif +} + +#ifndef SQLITE_OMIT_VIEW +/* +** The parser calls this routine in order to create a new VIEW +*/ +SQLITE_PRIVATE void sqlite3CreateView( + Parse *pParse, /* The parsing context */ + Token *pBegin, /* The CREATE token that begins the statement */ + Token *pName1, /* The token that holds the name of the view */ + Token *pName2, /* The token that holds the name of the view */ + ExprList *pCNames, /* Optional list of view column names */ + Select *pSelect, /* A SELECT statement that will become the new view */ + int isTemp, /* TRUE for a TEMPORARY view */ + int noErr /* Suppress error messages if VIEW already exists */ +){ + Table *p; + int n; + const char *z; + Token sEnd; + DbFixer sFix; + Token *pName = 0; + int iDb; + sqlite3 *db = pParse->db; + + if( pParse->nVar>0 ){ + sqlite3ErrorMsg(pParse, "parameters are not allowed in views"); + goto create_view_fail; + } + sqlite3StartTable(pParse, pName1, pName2, isTemp, 1, 0, noErr); + p = pParse->pNewTable; + if( p==0 || pParse->nErr ) goto create_view_fail; + + /* Legacy versions of SQLite allowed the use of the magic "rowid" column + ** on a view, even though views do not have rowids. The following flag + ** setting fixes this problem. But the fix can be disabled by compiling + ** with -DSQLITE_ALLOW_ROWID_IN_VIEW in case there are legacy apps that + ** depend upon the old buggy behavior. */ +#ifndef SQLITE_ALLOW_ROWID_IN_VIEW + p->tabFlags |= TF_NoVisibleRowid; +#endif + + sqlite3TwoPartName(pParse, pName1, pName2, &pName); + iDb = sqlite3SchemaToIndex(db, p->pSchema); + sqlite3FixInit(&sFix, pParse, iDb, "view", pName); + if( sqlite3FixSelect(&sFix, pSelect) ) goto create_view_fail; + + /* Make a copy of the entire SELECT statement that defines the view. + ** This will force all the Expr.token.z values to be dynamically + ** allocated rather than point to the input string - which means that + ** they will persist after the current sqlite3_exec() call returns. + */ + pSelect->selFlags |= SF_View; + if( IN_RENAME_OBJECT ){ + p->u.view.pSelect = pSelect; + pSelect = 0; + }else{ + p->u.view.pSelect = sqlite3SelectDup(db, pSelect, EXPRDUP_REDUCE); + } + p->pCheck = sqlite3ExprListDup(db, pCNames, EXPRDUP_REDUCE); + p->eTabType = TABTYP_VIEW; + if( db->mallocFailed ) goto create_view_fail; + + /* Locate the end of the CREATE VIEW statement. Make sEnd point to + ** the end. + */ + sEnd = pParse->sLastToken; + assert( sEnd.z[0]!=0 || sEnd.n==0 ); + if( sEnd.z[0]!=';' ){ + sEnd.z += sEnd.n; + } + sEnd.n = 0; + n = (int)(sEnd.z - pBegin->z); + assert( n>0 ); + z = pBegin->z; + while( sqlite3Isspace(z[n-1]) ){ n--; } + sEnd.z = &z[n-1]; + sEnd.n = 1; + + /* Use sqlite3EndTable() to add the view to the schema table */ + sqlite3EndTable(pParse, 0, &sEnd, 0, 0); + +create_view_fail: + sqlite3SelectDelete(db, pSelect); + if( IN_RENAME_OBJECT ){ + sqlite3RenameExprlistUnmap(pParse, pCNames); + } + sqlite3ExprListDelete(db, pCNames); + return; +} +#endif /* SQLITE_OMIT_VIEW */ + +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) +/* +** The Table structure pTable is really a VIEW. Fill in the names of +** the columns of the view in the pTable structure. Return the number +** of errors. If an error is seen leave an error message in pParse->zErrMsg. +*/ +SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){ + Table *pSelTab; /* A fake table from which we get the result set */ + Select *pSel; /* Copy of the SELECT that implements the view */ + int nErr = 0; /* Number of errors encountered */ + int n; /* Temporarily holds the number of cursors assigned */ + sqlite3 *db = pParse->db; /* Database connection for malloc errors */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + int rc; +#endif +#ifndef SQLITE_OMIT_AUTHORIZATION + sqlite3_xauth xAuth; /* Saved xAuth pointer */ +#endif + + assert( pTable ); + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTable) ){ + db->nSchemaLock++; + rc = sqlite3VtabCallConnect(pParse, pTable); + db->nSchemaLock--; + return rc; + } +#endif + +#ifndef SQLITE_OMIT_VIEW + /* A positive nCol means the columns names for this view are + ** already known. + */ + if( pTable->nCol>0 ) return 0; + + /* A negative nCol is a special marker meaning that we are currently + ** trying to compute the column names. If we enter this routine with + ** a negative nCol, it means two or more views form a loop, like this: + ** + ** CREATE VIEW one AS SELECT * FROM two; + ** CREATE VIEW two AS SELECT * FROM one; + ** + ** Actually, the error above is now caught prior to reaching this point. + ** But the following test is still important as it does come up + ** in the following: + ** + ** CREATE TABLE main.ex1(a); + ** CREATE TEMP VIEW ex1 AS SELECT a FROM ex1; + ** SELECT * FROM temp.ex1; + */ + if( pTable->nCol<0 ){ + sqlite3ErrorMsg(pParse, "view %s is circularly defined", pTable->zName); + return 1; + } + assert( pTable->nCol>=0 ); + + /* If we get this far, it means we need to compute the table names. + ** Note that the call to sqlite3ResultSetOfSelect() will expand any + ** "*" elements in the results set of the view and will assign cursors + ** to the elements of the FROM clause. But we do not want these changes + ** to be permanent. So the computation is done on a copy of the SELECT + ** statement that defines the view. + */ + assert( IsView(pTable) ); + pSel = sqlite3SelectDup(db, pTable->u.view.pSelect, 0); + if( pSel ){ + u8 eParseMode = pParse->eParseMode; + pParse->eParseMode = PARSE_MODE_NORMAL; + n = pParse->nTab; + sqlite3SrcListAssignCursors(pParse, pSel->pSrc); + pTable->nCol = -1; + DisableLookaside; +#ifndef SQLITE_OMIT_AUTHORIZATION + xAuth = db->xAuth; + db->xAuth = 0; + pSelTab = sqlite3ResultSetOfSelect(pParse, pSel, SQLITE_AFF_NONE); + db->xAuth = xAuth; +#else + pSelTab = sqlite3ResultSetOfSelect(pParse, pSel, SQLITE_AFF_NONE); +#endif + pParse->nTab = n; + if( pSelTab==0 ){ + pTable->nCol = 0; + nErr++; + }else if( pTable->pCheck ){ + /* CREATE VIEW name(arglist) AS ... + ** The names of the columns in the table are taken from + ** arglist which is stored in pTable->pCheck. The pCheck field + ** normally holds CHECK constraints on an ordinary table, but for + ** a VIEW it holds the list of column names. + */ + sqlite3ColumnsFromExprList(pParse, pTable->pCheck, + &pTable->nCol, &pTable->aCol); + if( pParse->nErr==0 + && pTable->nCol==pSel->pEList->nExpr + ){ + assert( db->mallocFailed==0 ); + sqlite3SelectAddColumnTypeAndCollation(pParse, pTable, pSel, + SQLITE_AFF_NONE); + } + }else{ + /* CREATE VIEW name AS... without an argument list. Construct + ** the column names from the SELECT statement that defines the view. + */ + assert( pTable->aCol==0 ); + pTable->nCol = pSelTab->nCol; + pTable->aCol = pSelTab->aCol; + pTable->tabFlags |= (pSelTab->tabFlags & COLFLAG_NOINSERT); + pSelTab->nCol = 0; + pSelTab->aCol = 0; + assert( sqlite3SchemaMutexHeld(db, 0, pTable->pSchema) ); + } + pTable->nNVCol = pTable->nCol; + sqlite3DeleteTable(db, pSelTab); + sqlite3SelectDelete(db, pSel); + EnableLookaside; + pParse->eParseMode = eParseMode; + } else { + nErr++; + } + pTable->pSchema->schemaFlags |= DB_UnresetViews; + if( db->mallocFailed ){ + sqlite3DeleteColumnNames(db, pTable); + } +#endif /* SQLITE_OMIT_VIEW */ + return nErr; +} +#endif /* !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) */ + +#ifndef SQLITE_OMIT_VIEW +/* +** Clear the column names from every VIEW in database idx. +*/ +static void sqliteViewResetAll(sqlite3 *db, int idx){ + HashElem *i; + assert( sqlite3SchemaMutexHeld(db, idx, 0) ); + if( !DbHasProperty(db, idx, DB_UnresetViews) ) return; + for(i=sqliteHashFirst(&db->aDb[idx].pSchema->tblHash); i;i=sqliteHashNext(i)){ + Table *pTab = sqliteHashData(i); + if( IsView(pTab) ){ + sqlite3DeleteColumnNames(db, pTab); + } + } + DbClearProperty(db, idx, DB_UnresetViews); +} +#else +# define sqliteViewResetAll(A,B) +#endif /* SQLITE_OMIT_VIEW */ + +/* +** This function is called by the VDBE to adjust the internal schema +** used by SQLite when the btree layer moves a table root page. The +** root-page of a table or index in database iDb has changed from iFrom +** to iTo. +** +** Ticket #1728: The symbol table might still contain information +** on tables and/or indices that are the process of being deleted. +** If you are unlucky, one of those deleted indices or tables might +** have the same rootpage number as the real table or index that is +** being moved. So we cannot stop searching after the first match +** because the first match might be for one of the deleted indices +** or tables and not the table/index that is actually being moved. +** We must continue looping until all tables and indices with +** rootpage==iFrom have been converted to have a rootpage of iTo +** in order to be certain that we got the right one. +*/ +#ifndef SQLITE_OMIT_AUTOVACUUM +SQLITE_PRIVATE void sqlite3RootPageMoved(sqlite3 *db, int iDb, Pgno iFrom, Pgno iTo){ + HashElem *pElem; + Hash *pHash; + Db *pDb; + + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + pDb = &db->aDb[iDb]; + pHash = &pDb->pSchema->tblHash; + for(pElem=sqliteHashFirst(pHash); pElem; pElem=sqliteHashNext(pElem)){ + Table *pTab = sqliteHashData(pElem); + if( pTab->tnum==iFrom ){ + pTab->tnum = iTo; + } + } + pHash = &pDb->pSchema->idxHash; + for(pElem=sqliteHashFirst(pHash); pElem; pElem=sqliteHashNext(pElem)){ + Index *pIdx = sqliteHashData(pElem); + if( pIdx->tnum==iFrom ){ + pIdx->tnum = iTo; + } + } +} +#endif + +/* +** Write code to erase the table with root-page iTable from database iDb. +** Also write code to modify the sqlite_schema table and internal schema +** if a root-page of another table is moved by the btree-layer whilst +** erasing iTable (this can happen with an auto-vacuum database). +*/ +static void destroyRootPage(Parse *pParse, int iTable, int iDb){ + Vdbe *v = sqlite3GetVdbe(pParse); + int r1 = sqlite3GetTempReg(pParse); + if( iTable<2 ) sqlite3ErrorMsg(pParse, "corrupt schema"); + sqlite3VdbeAddOp3(v, OP_Destroy, iTable, r1, iDb); + sqlite3MayAbort(pParse); +#ifndef SQLITE_OMIT_AUTOVACUUM + /* OP_Destroy stores an in integer r1. If this integer + ** is non-zero, then it is the root page number of a table moved to + ** location iTable. The following code modifies the sqlite_schema table to + ** reflect this. + ** + ** The "#NNN" in the SQL is a special constant that means whatever value + ** is in register NNN. See grammar rules associated with the TK_REGISTER + ** token for additional information. + */ + sqlite3NestedParse(pParse, + "UPDATE %Q." LEGACY_SCHEMA_TABLE + " SET rootpage=%d WHERE #%d AND rootpage=#%d", + pParse->db->aDb[iDb].zDbSName, iTable, r1, r1); +#endif + sqlite3ReleaseTempReg(pParse, r1); +} + +/* +** Write VDBE code to erase table pTab and all associated indices on disk. +** Code to update the sqlite_schema tables and internal schema definitions +** in case a root-page belonging to another table is moved by the btree layer +** is also added (this can happen with an auto-vacuum database). +*/ +static void destroyTable(Parse *pParse, Table *pTab){ + /* If the database may be auto-vacuum capable (if SQLITE_OMIT_AUTOVACUUM + ** is not defined), then it is important to call OP_Destroy on the + ** table and index root-pages in order, starting with the numerically + ** largest root-page number. This guarantees that none of the root-pages + ** to be destroyed is relocated by an earlier OP_Destroy. i.e. if the + ** following were coded: + ** + ** OP_Destroy 4 0 + ** ... + ** OP_Destroy 5 0 + ** + ** and root page 5 happened to be the largest root-page number in the + ** database, then root page 5 would be moved to page 4 by the + ** "OP_Destroy 4 0" opcode. The subsequent "OP_Destroy 5 0" would hit + ** a free-list page. + */ + Pgno iTab = pTab->tnum; + Pgno iDestroyed = 0; + + while( 1 ){ + Index *pIdx; + Pgno iLargest = 0; + + if( iDestroyed==0 || iTabpIndex; pIdx; pIdx=pIdx->pNext){ + Pgno iIdx = pIdx->tnum; + assert( pIdx->pSchema==pTab->pSchema ); + if( (iDestroyed==0 || (iIdxiLargest ){ + iLargest = iIdx; + } + } + if( iLargest==0 ){ + return; + }else{ + int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + assert( iDb>=0 && iDbdb->nDb ); + destroyRootPage(pParse, iLargest, iDb); + iDestroyed = iLargest; + } + } +} + +/* +** Remove entries from the sqlite_statN tables (for N in (1,2,3)) +** after a DROP INDEX or DROP TABLE command. +*/ +static void sqlite3ClearStatTables( + Parse *pParse, /* The parsing context */ + int iDb, /* The database number */ + const char *zType, /* "idx" or "tbl" */ + const char *zName /* Name of index or table */ +){ + int i; + const char *zDbName = pParse->db->aDb[iDb].zDbSName; + for(i=1; i<=4; i++){ + char zTab[24]; + sqlite3_snprintf(sizeof(zTab),zTab,"sqlite_stat%d",i); + if( sqlite3FindTable(pParse->db, zTab, zDbName) ){ + sqlite3NestedParse(pParse, + "DELETE FROM %Q.%s WHERE %s=%Q", + zDbName, zTab, zType, zName + ); + } + } +} + +/* +** Generate code to drop a table. +*/ +SQLITE_PRIVATE void sqlite3CodeDropTable(Parse *pParse, Table *pTab, int iDb, int isView){ + Vdbe *v; + sqlite3 *db = pParse->db; + Trigger *pTrigger; + Db *pDb = &db->aDb[iDb]; + + v = sqlite3GetVdbe(pParse); + assert( v!=0 ); + sqlite3BeginWriteOperation(pParse, 1, iDb); + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTab) ){ + sqlite3VdbeAddOp0(v, OP_VBegin); + } +#endif + + /* Drop all triggers associated with the table being dropped. Code + ** is generated to remove entries from sqlite_schema and/or + ** sqlite_temp_schema if required. + */ + pTrigger = sqlite3TriggerList(pParse, pTab); + while( pTrigger ){ + assert( pTrigger->pSchema==pTab->pSchema || + pTrigger->pSchema==db->aDb[1].pSchema ); + sqlite3DropTriggerPtr(pParse, pTrigger); + pTrigger = pTrigger->pNext; + } + +#ifndef SQLITE_OMIT_AUTOINCREMENT + /* Remove any entries of the sqlite_sequence table associated with + ** the table being dropped. This is done before the table is dropped + ** at the btree level, in case the sqlite_sequence table needs to + ** move as a result of the drop (can happen in auto-vacuum mode). + */ + if( pTab->tabFlags & TF_Autoincrement ){ + sqlite3NestedParse(pParse, + "DELETE FROM %Q.sqlite_sequence WHERE name=%Q", + pDb->zDbSName, pTab->zName + ); + } +#endif + + /* Drop all entries in the schema table that refer to the + ** table. The program name loops through the schema table and deletes + ** every row that refers to a table of the same name as the one being + ** dropped. Triggers are handled separately because a trigger can be + ** created in the temp database that refers to a table in another + ** database. + */ + sqlite3NestedParse(pParse, + "DELETE FROM %Q." LEGACY_SCHEMA_TABLE + " WHERE tbl_name=%Q and type!='trigger'", + pDb->zDbSName, pTab->zName); + if( !isView && !IsVirtual(pTab) ){ + destroyTable(pParse, pTab); + } + + /* Remove the table entry from SQLite's internal schema and modify + ** the schema cookie. + */ + if( IsVirtual(pTab) ){ + sqlite3VdbeAddOp4(v, OP_VDestroy, iDb, 0, 0, pTab->zName, 0); + sqlite3MayAbort(pParse); + } + sqlite3VdbeAddOp4(v, OP_DropTable, iDb, 0, 0, pTab->zName, 0); + sqlite3ChangeCookie(pParse, iDb); + sqliteViewResetAll(db, iDb); +} + +/* +** Return TRUE if shadow tables should be read-only in the current +** context. +*/ +SQLITE_PRIVATE int sqlite3ReadOnlyShadowTables(sqlite3 *db){ +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( (db->flags & SQLITE_Defensive)!=0 + && db->pVtabCtx==0 + && db->nVdbeExec==0 + && !sqlite3VtabInSync(db) + ){ + return 1; + } +#endif + return 0; +} + +/* +** Return true if it is not allowed to drop the given table +*/ +static int tableMayNotBeDropped(sqlite3 *db, Table *pTab){ + if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 ){ + if( sqlite3StrNICmp(pTab->zName+7, "stat", 4)==0 ) return 0; + if( sqlite3StrNICmp(pTab->zName+7, "parameters", 10)==0 ) return 0; + return 1; + } + if( (pTab->tabFlags & TF_Shadow)!=0 && sqlite3ReadOnlyShadowTables(db) ){ + return 1; + } + if( pTab->tabFlags & TF_Eponymous ){ + return 1; + } + return 0; +} + +/* +** This routine is called to do the work of a DROP TABLE statement. +** pName is the name of the table to be dropped. +*/ +SQLITE_PRIVATE void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView, int noErr){ + Table *pTab; + Vdbe *v; + sqlite3 *db = pParse->db; + int iDb; + + if( db->mallocFailed ){ + goto exit_drop_table; + } + assert( pParse->nErr==0 ); + assert( pName->nSrc==1 ); + if( sqlite3ReadSchema(pParse) ) goto exit_drop_table; + if( noErr ) db->suppressErr++; + assert( isView==0 || isView==LOCATE_VIEW ); + pTab = sqlite3LocateTableItem(pParse, isView, &pName->a[0]); + if( noErr ) db->suppressErr--; + + if( pTab==0 ){ + if( noErr ){ + sqlite3CodeVerifyNamedSchema(pParse, pName->a[0].zDatabase); + sqlite3ForceNotReadOnly(pParse); + } + goto exit_drop_table; + } + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iDb>=0 && iDbnDb ); + + /* If pTab is a virtual table, call ViewGetColumnNames() to ensure + ** it is initialized. + */ + if( IsVirtual(pTab) && sqlite3ViewGetColumnNames(pParse, pTab) ){ + goto exit_drop_table; + } +#ifndef SQLITE_OMIT_AUTHORIZATION + { + int code; + const char *zTab = SCHEMA_TABLE(iDb); + const char *zDb = db->aDb[iDb].zDbSName; + const char *zArg2 = 0; + if( sqlite3AuthCheck(pParse, SQLITE_DELETE, zTab, 0, zDb)){ + goto exit_drop_table; + } + if( isView ){ + if( !OMIT_TEMPDB && iDb==1 ){ + code = SQLITE_DROP_TEMP_VIEW; + }else{ + code = SQLITE_DROP_VIEW; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + }else if( IsVirtual(pTab) ){ + code = SQLITE_DROP_VTABLE; + zArg2 = sqlite3GetVTable(db, pTab)->pMod->zName; +#endif + }else{ + if( !OMIT_TEMPDB && iDb==1 ){ + code = SQLITE_DROP_TEMP_TABLE; + }else{ + code = SQLITE_DROP_TABLE; + } + } + if( sqlite3AuthCheck(pParse, code, pTab->zName, zArg2, zDb) ){ + goto exit_drop_table; + } + if( sqlite3AuthCheck(pParse, SQLITE_DELETE, pTab->zName, 0, zDb) ){ + goto exit_drop_table; + } + } +#endif + if( tableMayNotBeDropped(db, pTab) ){ + sqlite3ErrorMsg(pParse, "table %s may not be dropped", pTab->zName); + goto exit_drop_table; + } + +#ifndef SQLITE_OMIT_VIEW + /* Ensure DROP TABLE is not used on a view, and DROP VIEW is not used + ** on a table. + */ + if( isView && !IsView(pTab) ){ + sqlite3ErrorMsg(pParse, "use DROP TABLE to delete table %s", pTab->zName); + goto exit_drop_table; + } + if( !isView && IsView(pTab) ){ + sqlite3ErrorMsg(pParse, "use DROP VIEW to delete view %s", pTab->zName); + goto exit_drop_table; + } +#endif + + /* Generate code to remove the table from the schema table + ** on disk. + */ + v = sqlite3GetVdbe(pParse); + if( v ){ + sqlite3BeginWriteOperation(pParse, 1, iDb); + if( !isView ){ + sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName); + sqlite3FkDropTable(pParse, pName, pTab); + } + sqlite3CodeDropTable(pParse, pTab, iDb, isView); + } + +exit_drop_table: + sqlite3SrcListDelete(db, pName); +} + +/* +** This routine is called to create a new foreign key on the table +** currently under construction. pFromCol determines which columns +** in the current table point to the foreign key. If pFromCol==0 then +** connect the key to the last column inserted. pTo is the name of +** the table referred to (a.k.a the "parent" table). pToCol is a list +** of tables in the parent pTo table. flags contains all +** information about the conflict resolution algorithms specified +** in the ON DELETE, ON UPDATE and ON INSERT clauses. +** +** An FKey structure is created and added to the table currently +** under construction in the pParse->pNewTable field. +** +** The foreign key is set for IMMEDIATE processing. A subsequent call +** to sqlite3DeferForeignKey() might change this to DEFERRED. +*/ +SQLITE_PRIVATE void sqlite3CreateForeignKey( + Parse *pParse, /* Parsing context */ + ExprList *pFromCol, /* Columns in this table that point to other table */ + Token *pTo, /* Name of the other table */ + ExprList *pToCol, /* Columns in the other table */ + int flags /* Conflict resolution algorithms. */ +){ + sqlite3 *db = pParse->db; +#ifndef SQLITE_OMIT_FOREIGN_KEY + FKey *pFKey = 0; + FKey *pNextTo; + Table *p = pParse->pNewTable; + i64 nByte; + int i; + int nCol; + char *z; + + assert( pTo!=0 ); + if( p==0 || IN_DECLARE_VTAB ) goto fk_end; + if( pFromCol==0 ){ + int iCol = p->nCol-1; + if( NEVER(iCol<0) ) goto fk_end; + if( pToCol && pToCol->nExpr!=1 ){ + sqlite3ErrorMsg(pParse, "foreign key on %s" + " should reference only one column of table %T", + p->aCol[iCol].zCnName, pTo); + goto fk_end; + } + nCol = 1; + }else if( pToCol && pToCol->nExpr!=pFromCol->nExpr ){ + sqlite3ErrorMsg(pParse, + "number of columns in foreign key does not match the number of " + "columns in the referenced table"); + goto fk_end; + }else{ + nCol = pFromCol->nExpr; + } + nByte = sizeof(*pFKey) + (nCol-1)*sizeof(pFKey->aCol[0]) + pTo->n + 1; + if( pToCol ){ + for(i=0; inExpr; i++){ + nByte += sqlite3Strlen30(pToCol->a[i].zEName) + 1; + } + } + pFKey = sqlite3DbMallocZero(db, nByte ); + if( pFKey==0 ){ + goto fk_end; + } + pFKey->pFrom = p; + assert( IsOrdinaryTable(p) ); + pFKey->pNextFrom = p->u.tab.pFKey; + z = (char*)&pFKey->aCol[nCol]; + pFKey->zTo = z; + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenMap(pParse, (void*)z, pTo); + } + memcpy(z, pTo->z, pTo->n); + z[pTo->n] = 0; + sqlite3Dequote(z); + z += pTo->n+1; + pFKey->nCol = nCol; + if( pFromCol==0 ){ + pFKey->aCol[0].iFrom = p->nCol-1; + }else{ + for(i=0; inCol; j++){ + if( sqlite3StrICmp(p->aCol[j].zCnName, pFromCol->a[i].zEName)==0 ){ + pFKey->aCol[i].iFrom = j; + break; + } + } + if( j>=p->nCol ){ + sqlite3ErrorMsg(pParse, + "unknown column \"%s\" in foreign key definition", + pFromCol->a[i].zEName); + goto fk_end; + } + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, &pFKey->aCol[i], pFromCol->a[i].zEName); + } + } + } + if( pToCol ){ + for(i=0; ia[i].zEName); + pFKey->aCol[i].zCol = z; + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, z, pToCol->a[i].zEName); + } + memcpy(z, pToCol->a[i].zEName, n); + z[n] = 0; + z += n+1; + } + } + pFKey->isDeferred = 0; + pFKey->aAction[0] = (u8)(flags & 0xff); /* ON DELETE action */ + pFKey->aAction[1] = (u8)((flags >> 8 ) & 0xff); /* ON UPDATE action */ + + assert( sqlite3SchemaMutexHeld(db, 0, p->pSchema) ); + pNextTo = (FKey *)sqlite3HashInsert(&p->pSchema->fkeyHash, + pFKey->zTo, (void *)pFKey + ); + if( pNextTo==pFKey ){ + sqlite3OomFault(db); + goto fk_end; + } + if( pNextTo ){ + assert( pNextTo->pPrevTo==0 ); + pFKey->pNextTo = pNextTo; + pNextTo->pPrevTo = pFKey; + } + + /* Link the foreign key to the table as the last step. + */ + assert( IsOrdinaryTable(p) ); + p->u.tab.pFKey = pFKey; + pFKey = 0; + +fk_end: + sqlite3DbFree(db, pFKey); +#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */ + sqlite3ExprListDelete(db, pFromCol); + sqlite3ExprListDelete(db, pToCol); +} + +/* +** This routine is called when an INITIALLY IMMEDIATE or INITIALLY DEFERRED +** clause is seen as part of a foreign key definition. The isDeferred +** parameter is 1 for INITIALLY DEFERRED and 0 for INITIALLY IMMEDIATE. +** The behavior of the most recently created foreign key is adjusted +** accordingly. +*/ +SQLITE_PRIVATE void sqlite3DeferForeignKey(Parse *pParse, int isDeferred){ +#ifndef SQLITE_OMIT_FOREIGN_KEY + Table *pTab; + FKey *pFKey; + if( (pTab = pParse->pNewTable)==0 ) return; + if( NEVER(!IsOrdinaryTable(pTab)) ) return; + if( (pFKey = pTab->u.tab.pFKey)==0 ) return; + assert( isDeferred==0 || isDeferred==1 ); /* EV: R-30323-21917 */ + pFKey->isDeferred = (u8)isDeferred; +#endif +} + +/* +** Generate code that will erase and refill index *pIdx. This is +** used to initialize a newly created index or to recompute the +** content of an index in response to a REINDEX command. +** +** if memRootPage is not negative, it means that the index is newly +** created. The register specified by memRootPage contains the +** root page number of the index. If memRootPage is negative, then +** the index already exists and must be cleared before being refilled and +** the root page number of the index is taken from pIndex->tnum. +*/ +static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ + Table *pTab = pIndex->pTable; /* The table that is indexed */ + int iTab = pParse->nTab++; /* Btree cursor used for pTab */ + int iIdx = pParse->nTab++; /* Btree cursor used for pIndex */ + int iSorter; /* Cursor opened by OpenSorter (if in use) */ + int addr1; /* Address of top of loop */ + int addr2; /* Address to jump to for next iteration */ + Pgno tnum; /* Root page of index */ + int iPartIdxLabel; /* Jump to this label to skip a row */ + Vdbe *v; /* Generate code into this virtual machine */ + KeyInfo *pKey; /* KeyInfo for index */ + int regRecord; /* Register holding assembled index record */ + sqlite3 *db = pParse->db; /* The database connection */ + int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema); + +#ifndef SQLITE_OMIT_AUTHORIZATION + if( sqlite3AuthCheck(pParse, SQLITE_REINDEX, pIndex->zName, 0, + db->aDb[iDb].zDbSName ) ){ + return; + } +#endif + + /* Require a write-lock on the table to perform this operation */ + sqlite3TableLock(pParse, iDb, pTab->tnum, 1, pTab->zName); + + v = sqlite3GetVdbe(pParse); + if( v==0 ) return; + if( memRootPage>=0 ){ + tnum = (Pgno)memRootPage; + }else{ + tnum = pIndex->tnum; + } + pKey = sqlite3KeyInfoOfIndex(pParse, pIndex); + assert( pKey!=0 || pParse->nErr ); + + /* Open the sorter cursor if we are to use one. */ + iSorter = pParse->nTab++; + sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, pIndex->nKeyCol, (char*) + sqlite3KeyInfoRef(pKey), P4_KEYINFO); + + /* Open the table. Loop through all rows of the table, inserting index + ** records into the sorter. */ + sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead); + addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeCoverage(v); + regRecord = sqlite3GetTempReg(pParse); + sqlite3MultiWrite(pParse); + + sqlite3GenerateIndexKey(pParse,pIndex,iTab,regRecord,0,&iPartIdxLabel,0,0); + sqlite3VdbeAddOp2(v, OP_SorterInsert, iSorter, regRecord); + sqlite3ResolvePartIdxLabel(pParse, iPartIdxLabel); + sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addr1); + if( memRootPage<0 ) sqlite3VdbeAddOp2(v, OP_Clear, tnum, iDb); + sqlite3VdbeAddOp4(v, OP_OpenWrite, iIdx, (int)tnum, iDb, + (char *)pKey, P4_KEYINFO); + sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR|((memRootPage>=0)?OPFLAG_P2ISREG:0)); + + addr1 = sqlite3VdbeAddOp2(v, OP_SorterSort, iSorter, 0); VdbeCoverage(v); + if( IsUniqueIndex(pIndex) ){ + int j2 = sqlite3VdbeGoto(v, 1); + addr2 = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeVerifyAbortable(v, OE_Abort); + sqlite3VdbeAddOp4Int(v, OP_SorterCompare, iSorter, j2, regRecord, + pIndex->nKeyCol); VdbeCoverage(v); + sqlite3UniqueConstraint(pParse, OE_Abort, pIndex); + sqlite3VdbeJumpHere(v, j2); + }else{ + /* Most CREATE INDEX and REINDEX statements that are not UNIQUE can not + ** abort. The exception is if one of the indexed expressions contains a + ** user function that throws an exception when it is evaluated. But the + ** overhead of adding a statement journal to a CREATE INDEX statement is + ** very small (since most of the pages written do not contain content that + ** needs to be restored if the statement aborts), so we call + ** sqlite3MayAbort() for all CREATE INDEX statements. */ + sqlite3MayAbort(pParse); + addr2 = sqlite3VdbeCurrentAddr(v); + } + sqlite3VdbeAddOp3(v, OP_SorterData, iSorter, regRecord, iIdx); + if( !pIndex->bAscKeyBug ){ + /* This OP_SeekEnd opcode makes index insert for a REINDEX go much + ** faster by avoiding unnecessary seeks. But the optimization does + ** not work for UNIQUE constraint indexes on WITHOUT ROWID tables + ** with DESC primary keys, since those indexes have there keys in + ** a different order from the main table. + ** See ticket: https://www.sqlite.org/src/info/bba7b69f9849b5bf + */ + sqlite3VdbeAddOp1(v, OP_SeekEnd, iIdx); + } + sqlite3VdbeAddOp2(v, OP_IdxInsert, iIdx, regRecord); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + sqlite3ReleaseTempReg(pParse, regRecord); + sqlite3VdbeAddOp2(v, OP_SorterNext, iSorter, addr2); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addr1); + + sqlite3VdbeAddOp1(v, OP_Close, iTab); + sqlite3VdbeAddOp1(v, OP_Close, iIdx); + sqlite3VdbeAddOp1(v, OP_Close, iSorter); +} + +/* +** Allocate heap space to hold an Index object with nCol columns. +** +** Increase the allocation size to provide an extra nExtra bytes +** of 8-byte aligned space after the Index object and return a +** pointer to this extra space in *ppExtra. +*/ +SQLITE_PRIVATE Index *sqlite3AllocateIndexObject( + sqlite3 *db, /* Database connection */ + i16 nCol, /* Total number of columns in the index */ + int nExtra, /* Number of bytes of extra space to alloc */ + char **ppExtra /* Pointer to the "extra" space */ +){ + Index *p; /* Allocated index object */ + int nByte; /* Bytes of space for Index object + arrays */ + + nByte = ROUND8(sizeof(Index)) + /* Index structure */ + ROUND8(sizeof(char*)*nCol) + /* Index.azColl */ + ROUND8(sizeof(LogEst)*(nCol+1) + /* Index.aiRowLogEst */ + sizeof(i16)*nCol + /* Index.aiColumn */ + sizeof(u8)*nCol); /* Index.aSortOrder */ + p = sqlite3DbMallocZero(db, nByte + nExtra); + if( p ){ + char *pExtra = ((char*)p)+ROUND8(sizeof(Index)); + p->azColl = (const char**)pExtra; pExtra += ROUND8(sizeof(char*)*nCol); + p->aiRowLogEst = (LogEst*)pExtra; pExtra += sizeof(LogEst)*(nCol+1); + p->aiColumn = (i16*)pExtra; pExtra += sizeof(i16)*nCol; + p->aSortOrder = (u8*)pExtra; + p->nColumn = nCol; + p->nKeyCol = nCol - 1; + *ppExtra = ((char*)p) + nByte; + } + return p; +} + +/* +** If expression list pList contains an expression that was parsed with +** an explicit "NULLS FIRST" or "NULLS LAST" clause, leave an error in +** pParse and return non-zero. Otherwise, return zero. +*/ +SQLITE_PRIVATE int sqlite3HasExplicitNulls(Parse *pParse, ExprList *pList){ + if( pList ){ + int i; + for(i=0; inExpr; i++){ + if( pList->a[i].bNulls ){ + u8 sf = pList->a[i].sortFlags; + sqlite3ErrorMsg(pParse, "unsupported use of NULLS %s", + (sf==0 || sf==3) ? "FIRST" : "LAST" + ); + return 1; + } + } + } + return 0; +} + +/* +** Create a new index for an SQL table. pName1.pName2 is the name of the index +** and pTblList is the name of the table that is to be indexed. Both will +** be NULL for a primary key or an index that is created to satisfy a +** UNIQUE constraint. If pTable and pIndex are NULL, use pParse->pNewTable +** as the table to be indexed. pParse->pNewTable is a table that is +** currently being constructed by a CREATE TABLE statement. +** +** pList is a list of columns to be indexed. pList will be NULL if this +** is a primary key or unique-constraint on the most recent column added +** to the table currently under construction. +*/ +SQLITE_PRIVATE void sqlite3CreateIndex( + Parse *pParse, /* All information about this parse */ + Token *pName1, /* First part of index name. May be NULL */ + Token *pName2, /* Second part of index name. May be NULL */ + SrcList *pTblName, /* Table to index. Use pParse->pNewTable if 0 */ + ExprList *pList, /* A list of columns to be indexed */ + int onError, /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ + Token *pStart, /* The CREATE token that begins this statement */ + Expr *pPIWhere, /* WHERE clause for partial indices */ + int sortOrder, /* Sort order of primary key when pList==NULL */ + int ifNotExist, /* Omit error if index already exists */ + u8 idxType /* The index type */ +){ + Table *pTab = 0; /* Table to be indexed */ + Index *pIndex = 0; /* The index to be created */ + char *zName = 0; /* Name of the index */ + int nName; /* Number of characters in zName */ + int i, j; + DbFixer sFix; /* For assigning database names to pTable */ + int sortOrderMask; /* 1 to honor DESC in index. 0 to ignore. */ + sqlite3 *db = pParse->db; + Db *pDb; /* The specific table containing the indexed database */ + int iDb; /* Index of the database that is being written */ + Token *pName = 0; /* Unqualified name of the index to create */ + struct ExprList_item *pListItem; /* For looping over pList */ + int nExtra = 0; /* Space allocated for zExtra[] */ + int nExtraCol; /* Number of extra columns needed */ + char *zExtra = 0; /* Extra space after the Index object */ + Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + + assert( db->pParse==pParse ); + if( pParse->nErr ){ + goto exit_create_index; + } + assert( db->mallocFailed==0 ); + if( IN_DECLARE_VTAB && idxType!=SQLITE_IDXTYPE_PRIMARYKEY ){ + goto exit_create_index; + } + if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ + goto exit_create_index; + } + if( sqlite3HasExplicitNulls(pParse, pList) ){ + goto exit_create_index; + } + + /* + ** Find the table that is to be indexed. Return early if not found. + */ + if( pTblName!=0 ){ + + /* Use the two-part index name to determine the database + ** to search for the table. 'Fix' the table name to this db + ** before looking up the table. + */ + assert( pName1 && pName2 ); + iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName); + if( iDb<0 ) goto exit_create_index; + assert( pName && pName->z ); + +#ifndef SQLITE_OMIT_TEMPDB + /* If the index name was unqualified, check if the table + ** is a temp table. If so, set the database to 1. Do not do this + ** if initialising a database schema. + */ + if( !db->init.busy ){ + pTab = sqlite3SrcListLookup(pParse, pTblName); + if( pName2->n==0 && pTab && pTab->pSchema==db->aDb[1].pSchema ){ + iDb = 1; + } + } +#endif + + sqlite3FixInit(&sFix, pParse, iDb, "index", pName); + if( sqlite3FixSrcList(&sFix, pTblName) ){ + /* Because the parser constructs pTblName from a single identifier, + ** sqlite3FixSrcList can never fail. */ + assert(0); + } + pTab = sqlite3LocateTableItem(pParse, 0, &pTblName->a[0]); + assert( db->mallocFailed==0 || pTab==0 ); + if( pTab==0 ) goto exit_create_index; + if( iDb==1 && db->aDb[iDb].pSchema!=pTab->pSchema ){ + sqlite3ErrorMsg(pParse, + "cannot create a TEMP index on non-TEMP table \"%s\"", + pTab->zName); + goto exit_create_index; + } + if( !HasRowid(pTab) ) pPk = sqlite3PrimaryKeyIndex(pTab); + }else{ + assert( pName==0 ); + assert( pStart==0 ); + pTab = pParse->pNewTable; + if( !pTab ) goto exit_create_index; + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + } + pDb = &db->aDb[iDb]; + + assert( pTab!=0 ); + if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 + && db->init.busy==0 + && pTblName!=0 +#if SQLITE_USER_AUTHENTICATION + && sqlite3UserAuthTable(pTab->zName)==0 +#endif + ){ + sqlite3ErrorMsg(pParse, "table %s may not be indexed", pTab->zName); + goto exit_create_index; + } +#ifndef SQLITE_OMIT_VIEW + if( IsView(pTab) ){ + sqlite3ErrorMsg(pParse, "views may not be indexed"); + goto exit_create_index; + } +#endif +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTab) ){ + sqlite3ErrorMsg(pParse, "virtual tables may not be indexed"); + goto exit_create_index; + } +#endif + + /* + ** Find the name of the index. Make sure there is not already another + ** index or table with the same name. + ** + ** Exception: If we are reading the names of permanent indices from the + ** sqlite_schema table (because some other process changed the schema) and + ** one of the index names collides with the name of a temporary table or + ** index, then we will continue to process this index. + ** + ** If pName==0 it means that we are + ** dealing with a primary key or UNIQUE constraint. We have to invent our + ** own name. + */ + if( pName ){ + zName = sqlite3NameFromToken(db, pName); + if( zName==0 ) goto exit_create_index; + assert( pName->z!=0 ); + if( SQLITE_OK!=sqlite3CheckObjectName(pParse, zName,"index",pTab->zName) ){ + goto exit_create_index; + } + if( !IN_RENAME_OBJECT ){ + if( !db->init.busy ){ + if( sqlite3FindTable(db, zName, 0)!=0 ){ + sqlite3ErrorMsg(pParse, "there is already a table named %s", zName); + goto exit_create_index; + } + } + if( sqlite3FindIndex(db, zName, pDb->zDbSName)!=0 ){ + if( !ifNotExist ){ + sqlite3ErrorMsg(pParse, "index %s already exists", zName); + }else{ + assert( !db->init.busy ); + sqlite3CodeVerifySchema(pParse, iDb); + sqlite3ForceNotReadOnly(pParse); + } + goto exit_create_index; + } + } + }else{ + int n; + Index *pLoop; + for(pLoop=pTab->pIndex, n=1; pLoop; pLoop=pLoop->pNext, n++){} + zName = sqlite3MPrintf(db, "sqlite_autoindex_%s_%d", pTab->zName, n); + if( zName==0 ){ + goto exit_create_index; + } + + /* Automatic index names generated from within sqlite3_declare_vtab() + ** must have names that are distinct from normal automatic index names. + ** The following statement converts "sqlite3_autoindex..." into + ** "sqlite3_butoindex..." in order to make the names distinct. + ** The "vtab_err.test" test demonstrates the need of this statement. */ + if( IN_SPECIAL_PARSE ) zName[7]++; + } + + /* Check for authorization to create an index. + */ +#ifndef SQLITE_OMIT_AUTHORIZATION + if( !IN_RENAME_OBJECT ){ + const char *zDb = pDb->zDbSName; + if( sqlite3AuthCheck(pParse, SQLITE_INSERT, SCHEMA_TABLE(iDb), 0, zDb) ){ + goto exit_create_index; + } + i = SQLITE_CREATE_INDEX; + if( !OMIT_TEMPDB && iDb==1 ) i = SQLITE_CREATE_TEMP_INDEX; + if( sqlite3AuthCheck(pParse, i, zName, pTab->zName, zDb) ){ + goto exit_create_index; + } + } +#endif + + /* If pList==0, it means this routine was called to make a primary + ** key out of the last column added to the table under construction. + ** So create a fake list to simulate this. + */ + if( pList==0 ){ + Token prevCol; + Column *pCol = &pTab->aCol[pTab->nCol-1]; + pCol->colFlags |= COLFLAG_UNIQUE; + sqlite3TokenInit(&prevCol, pCol->zCnName); + pList = sqlite3ExprListAppend(pParse, 0, + sqlite3ExprAlloc(db, TK_ID, &prevCol, 0)); + if( pList==0 ) goto exit_create_index; + assert( pList->nExpr==1 ); + sqlite3ExprListSetSortOrder(pList, sortOrder, SQLITE_SO_UNDEFINED); + }else{ + sqlite3ExprListCheckLength(pParse, pList, "index"); + if( pParse->nErr ) goto exit_create_index; + } + + /* Figure out how many bytes of space are required to store explicitly + ** specified collation sequence names. + */ + for(i=0; inExpr; i++){ + Expr *pExpr = pList->a[i].pExpr; + assert( pExpr!=0 ); + if( pExpr->op==TK_COLLATE ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + nExtra += (1 + sqlite3Strlen30(pExpr->u.zToken)); + } + } + + /* + ** Allocate the index structure. + */ + nName = sqlite3Strlen30(zName); + nExtraCol = pPk ? pPk->nKeyCol : 1; + assert( pList->nExpr + nExtraCol <= 32767 /* Fits in i16 */ ); + pIndex = sqlite3AllocateIndexObject(db, pList->nExpr + nExtraCol, + nName + nExtra + 1, &zExtra); + if( db->mallocFailed ){ + goto exit_create_index; + } + assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowLogEst) ); + assert( EIGHT_BYTE_ALIGNMENT(pIndex->azColl) ); + pIndex->zName = zExtra; + zExtra += nName + 1; + memcpy(pIndex->zName, zName, nName+1); + pIndex->pTable = pTab; + pIndex->onError = (u8)onError; + pIndex->uniqNotNull = onError!=OE_None; + pIndex->idxType = idxType; + pIndex->pSchema = db->aDb[iDb].pSchema; + pIndex->nKeyCol = pList->nExpr; + if( pPIWhere ){ + sqlite3ResolveSelfReference(pParse, pTab, NC_PartIdx, pPIWhere, 0); + pIndex->pPartIdxWhere = pPIWhere; + pPIWhere = 0; + } + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + + /* Check to see if we should honor DESC requests on index columns + */ + if( pDb->pSchema->file_format>=4 ){ + sortOrderMask = -1; /* Honor DESC */ + }else{ + sortOrderMask = 0; /* Ignore DESC */ + } + + /* Analyze the list of expressions that form the terms of the index and + ** report any errors. In the common case where the expression is exactly + ** a table column, store that column in aiColumn[]. For general expressions, + ** populate pIndex->aColExpr and store XN_EXPR (-2) in aiColumn[]. + ** + ** TODO: Issue a warning if two or more columns of the index are identical. + ** TODO: Issue a warning if the table primary key is used as part of the + ** index key. + */ + pListItem = pList->a; + if( IN_RENAME_OBJECT ){ + pIndex->aColExpr = pList; + pList = 0; + } + for(i=0; inKeyCol; i++, pListItem++){ + Expr *pCExpr; /* The i-th index expression */ + int requestedSortOrder; /* ASC or DESC on the i-th expression */ + const char *zColl; /* Collation sequence name */ + + sqlite3StringToId(pListItem->pExpr); + sqlite3ResolveSelfReference(pParse, pTab, NC_IdxExpr, pListItem->pExpr, 0); + if( pParse->nErr ) goto exit_create_index; + pCExpr = sqlite3ExprSkipCollate(pListItem->pExpr); + if( pCExpr->op!=TK_COLUMN ){ + if( pTab==pParse->pNewTable ){ + sqlite3ErrorMsg(pParse, "expressions prohibited in PRIMARY KEY and " + "UNIQUE constraints"); + goto exit_create_index; + } + if( pIndex->aColExpr==0 ){ + pIndex->aColExpr = pList; + pList = 0; + } + j = XN_EXPR; + pIndex->aiColumn[i] = XN_EXPR; + pIndex->uniqNotNull = 0; + }else{ + j = pCExpr->iColumn; + assert( j<=0x7fff ); + if( j<0 ){ + j = pTab->iPKey; + }else{ + if( pTab->aCol[j].notNull==0 ){ + pIndex->uniqNotNull = 0; + } + if( pTab->aCol[j].colFlags & COLFLAG_VIRTUAL ){ + pIndex->bHasVCol = 1; + } + } + pIndex->aiColumn[i] = (i16)j; + } + zColl = 0; + if( pListItem->pExpr->op==TK_COLLATE ){ + int nColl; + assert( !ExprHasProperty(pListItem->pExpr, EP_IntValue) ); + zColl = pListItem->pExpr->u.zToken; + nColl = sqlite3Strlen30(zColl) + 1; + assert( nExtra>=nColl ); + memcpy(zExtra, zColl, nColl); + zColl = zExtra; + zExtra += nColl; + nExtra -= nColl; + }else if( j>=0 ){ + zColl = sqlite3ColumnColl(&pTab->aCol[j]); + } + if( !zColl ) zColl = sqlite3StrBINARY; + if( !db->init.busy && !sqlite3LocateCollSeq(pParse, zColl) ){ + goto exit_create_index; + } + pIndex->azColl[i] = zColl; + requestedSortOrder = pListItem->sortFlags & sortOrderMask; + pIndex->aSortOrder[i] = (u8)requestedSortOrder; + } + + /* Append the table key to the end of the index. For WITHOUT ROWID + ** tables (when pPk!=0) this will be the declared PRIMARY KEY. For + ** normal tables (when pPk==0) this will be the rowid. + */ + if( pPk ){ + for(j=0; jnKeyCol; j++){ + int x = pPk->aiColumn[j]; + assert( x>=0 ); + if( isDupColumn(pIndex, pIndex->nKeyCol, pPk, j) ){ + pIndex->nColumn--; + }else{ + testcase( hasColumn(pIndex->aiColumn,pIndex->nKeyCol,x) ); + pIndex->aiColumn[i] = x; + pIndex->azColl[i] = pPk->azColl[j]; + pIndex->aSortOrder[i] = pPk->aSortOrder[j]; + i++; + } + } + assert( i==pIndex->nColumn ); + }else{ + pIndex->aiColumn[i] = XN_ROWID; + pIndex->azColl[i] = sqlite3StrBINARY; + } + sqlite3DefaultRowEst(pIndex); + if( pParse->pNewTable==0 ) estimateIndexWidth(pIndex); + + /* If this index contains every column of its table, then mark + ** it as a covering index */ + assert( HasRowid(pTab) + || pTab->iPKey<0 || sqlite3TableColumnToIndex(pIndex, pTab->iPKey)>=0 ); + recomputeColumnsNotIndexed(pIndex); + if( pTblName!=0 && pIndex->nColumn>=pTab->nCol ){ + pIndex->isCovering = 1; + for(j=0; jnCol; j++){ + if( j==pTab->iPKey ) continue; + if( sqlite3TableColumnToIndex(pIndex,j)>=0 ) continue; + pIndex->isCovering = 0; + break; + } + } + + if( pTab==pParse->pNewTable ){ + /* This routine has been called to create an automatic index as a + ** result of a PRIMARY KEY or UNIQUE clause on a column definition, or + ** a PRIMARY KEY or UNIQUE clause following the column definitions. + ** i.e. one of: + ** + ** CREATE TABLE t(x PRIMARY KEY, y); + ** CREATE TABLE t(x, y, UNIQUE(x, y)); + ** + ** Either way, check to see if the table already has such an index. If + ** so, don't bother creating this one. This only applies to + ** automatically created indices. Users can do as they wish with + ** explicit indices. + ** + ** Two UNIQUE or PRIMARY KEY constraints are considered equivalent + ** (and thus suppressing the second one) even if they have different + ** sort orders. + ** + ** If there are different collating sequences or if the columns of + ** the constraint occur in different orders, then the constraints are + ** considered distinct and both result in separate indices. + */ + Index *pIdx; + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int k; + assert( IsUniqueIndex(pIdx) ); + assert( pIdx->idxType!=SQLITE_IDXTYPE_APPDEF ); + assert( IsUniqueIndex(pIndex) ); + + if( pIdx->nKeyCol!=pIndex->nKeyCol ) continue; + for(k=0; knKeyCol; k++){ + const char *z1; + const char *z2; + assert( pIdx->aiColumn[k]>=0 ); + if( pIdx->aiColumn[k]!=pIndex->aiColumn[k] ) break; + z1 = pIdx->azColl[k]; + z2 = pIndex->azColl[k]; + if( sqlite3StrICmp(z1, z2) ) break; + } + if( k==pIdx->nKeyCol ){ + if( pIdx->onError!=pIndex->onError ){ + /* This constraint creates the same index as a previous + ** constraint specified somewhere in the CREATE TABLE statement. + ** However the ON CONFLICT clauses are different. If both this + ** constraint and the previous equivalent constraint have explicit + ** ON CONFLICT clauses this is an error. Otherwise, use the + ** explicitly specified behavior for the index. + */ + if( !(pIdx->onError==OE_Default || pIndex->onError==OE_Default) ){ + sqlite3ErrorMsg(pParse, + "conflicting ON CONFLICT clauses specified", 0); + } + if( pIdx->onError==OE_Default ){ + pIdx->onError = pIndex->onError; + } + } + if( idxType==SQLITE_IDXTYPE_PRIMARYKEY ) pIdx->idxType = idxType; + if( IN_RENAME_OBJECT ){ + pIndex->pNext = pParse->pNewIndex; + pParse->pNewIndex = pIndex; + pIndex = 0; + } + goto exit_create_index; + } + } + } + + if( !IN_RENAME_OBJECT ){ + + /* Link the new Index structure to its table and to the other + ** in-memory database structures. + */ + assert( pParse->nErr==0 ); + if( db->init.busy ){ + Index *p; + assert( !IN_SPECIAL_PARSE ); + assert( sqlite3SchemaMutexHeld(db, 0, pIndex->pSchema) ); + if( pTblName!=0 ){ + pIndex->tnum = db->init.newTnum; + if( sqlite3IndexHasDuplicateRootPage(pIndex) ){ + sqlite3ErrorMsg(pParse, "invalid rootpage"); + pParse->rc = SQLITE_CORRUPT_BKPT; + goto exit_create_index; + } + } + p = sqlite3HashInsert(&pIndex->pSchema->idxHash, + pIndex->zName, pIndex); + if( p ){ + assert( p==pIndex ); /* Malloc must have failed */ + sqlite3OomFault(db); + goto exit_create_index; + } + db->mDbFlags |= DBFLAG_SchemaChange; + } + + /* If this is the initial CREATE INDEX statement (or CREATE TABLE if the + ** index is an implied index for a UNIQUE or PRIMARY KEY constraint) then + ** emit code to allocate the index rootpage on disk and make an entry for + ** the index in the sqlite_schema table and populate the index with + ** content. But, do not do this if we are simply reading the sqlite_schema + ** table to parse the schema, or if this index is the PRIMARY KEY index + ** of a WITHOUT ROWID table. + ** + ** If pTblName==0 it means this index is generated as an implied PRIMARY KEY + ** or UNIQUE index in a CREATE TABLE statement. Since the table + ** has just been created, it contains no data and the index initialization + ** step can be skipped. + */ + else if( HasRowid(pTab) || pTblName!=0 ){ + Vdbe *v; + char *zStmt; + int iMem = ++pParse->nMem; + + v = sqlite3GetVdbe(pParse); + if( v==0 ) goto exit_create_index; + + sqlite3BeginWriteOperation(pParse, 1, iDb); + + /* Create the rootpage for the index using CreateIndex. But before + ** doing so, code a Noop instruction and store its address in + ** Index.tnum. This is required in case this index is actually a + ** PRIMARY KEY and the table is actually a WITHOUT ROWID table. In + ** that case the convertToWithoutRowidTable() routine will replace + ** the Noop with a Goto to jump over the VDBE code generated below. */ + pIndex->tnum = (Pgno)sqlite3VdbeAddOp0(v, OP_Noop); + sqlite3VdbeAddOp3(v, OP_CreateBtree, iDb, iMem, BTREE_BLOBKEY); + + /* Gather the complete text of the CREATE INDEX statement into + ** the zStmt variable + */ + assert( pName!=0 || pStart==0 ); + if( pStart ){ + int n = (int)(pParse->sLastToken.z - pName->z) + pParse->sLastToken.n; + if( pName->z[n-1]==';' ) n--; + /* A named index with an explicit CREATE INDEX statement */ + zStmt = sqlite3MPrintf(db, "CREATE%s INDEX %.*s", + onError==OE_None ? "" : " UNIQUE", n, pName->z); + }else{ + /* An automatic index created by a PRIMARY KEY or UNIQUE constraint */ + /* zStmt = sqlite3MPrintf(""); */ + zStmt = 0; + } + + /* Add an entry in sqlite_schema for this index + */ + sqlite3NestedParse(pParse, + "INSERT INTO %Q." LEGACY_SCHEMA_TABLE " VALUES('index',%Q,%Q,#%d,%Q);", + db->aDb[iDb].zDbSName, + pIndex->zName, + pTab->zName, + iMem, + zStmt + ); + sqlite3DbFree(db, zStmt); + + /* Fill the index with data and reparse the schema. Code an OP_Expire + ** to invalidate all pre-compiled statements. + */ + if( pTblName ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + sqlite3ChangeCookie(pParse, iDb); + sqlite3VdbeAddParseSchemaOp(v, iDb, + sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); + sqlite3VdbeAddOp2(v, OP_Expire, 0, 1); + } + + sqlite3VdbeJumpHere(v, (int)pIndex->tnum); + } + } + if( db->init.busy || pTblName==0 ){ + pIndex->pNext = pTab->pIndex; + pTab->pIndex = pIndex; + pIndex = 0; + } + else if( IN_RENAME_OBJECT ){ + assert( pParse->pNewIndex==0 ); + pParse->pNewIndex = pIndex; + pIndex = 0; + } + + /* Clean up before exiting */ +exit_create_index: + if( pIndex ) sqlite3FreeIndex(db, pIndex); + if( pTab ){ + /* Ensure all REPLACE indexes on pTab are at the end of the pIndex list. + ** The list was already ordered when this routine was entered, so at this + ** point at most a single index (the newly added index) will be out of + ** order. So we have to reorder at most one index. */ + Index **ppFrom; + Index *pThis; + for(ppFrom=&pTab->pIndex; (pThis = *ppFrom)!=0; ppFrom=&pThis->pNext){ + Index *pNext; + if( pThis->onError!=OE_Replace ) continue; + while( (pNext = pThis->pNext)!=0 && pNext->onError!=OE_Replace ){ + *ppFrom = pNext; + pThis->pNext = pNext->pNext; + pNext->pNext = pThis; + ppFrom = &pNext->pNext; + } + break; + } +#ifdef SQLITE_DEBUG + /* Verify that all REPLACE indexes really are now at the end + ** of the index list. In other words, no other index type ever + ** comes after a REPLACE index on the list. */ + for(pThis = pTab->pIndex; pThis; pThis=pThis->pNext){ + assert( pThis->onError!=OE_Replace + || pThis->pNext==0 + || pThis->pNext->onError==OE_Replace ); + } +#endif + } + sqlite3ExprDelete(db, pPIWhere); + sqlite3ExprListDelete(db, pList); + sqlite3SrcListDelete(db, pTblName); + sqlite3DbFree(db, zName); +} + +/* +** Fill the Index.aiRowEst[] array with default information - information +** to be used when we have not run the ANALYZE command. +** +** aiRowEst[0] is supposed to contain the number of elements in the index. +** Since we do not know, guess 1 million. aiRowEst[1] is an estimate of the +** number of rows in the table that match any particular value of the +** first column of the index. aiRowEst[2] is an estimate of the number +** of rows that match any particular combination of the first 2 columns +** of the index. And so forth. It must always be the case that +* +** aiRowEst[N]<=aiRowEst[N-1] +** aiRowEst[N]>=1 +** +** Apart from that, we have little to go on besides intuition as to +** how aiRowEst[] should be initialized. The numbers generated here +** are based on typical values found in actual indices. +*/ +SQLITE_PRIVATE void sqlite3DefaultRowEst(Index *pIdx){ + /* 10, 9, 8, 7, 6 */ + static const LogEst aVal[] = { 33, 32, 30, 28, 26 }; + LogEst *a = pIdx->aiRowLogEst; + LogEst x; + int nCopy = MIN(ArraySize(aVal), pIdx->nKeyCol); + int i; + + /* Indexes with default row estimates should not have stat1 data */ + assert( !pIdx->hasStat1 ); + + /* Set the first entry (number of rows in the index) to the estimated + ** number of rows in the table, or half the number of rows in the table + ** for a partial index. + ** + ** 2020-05-27: If some of the stat data is coming from the sqlite_stat1 + ** table but other parts we are having to guess at, then do not let the + ** estimated number of rows in the table be less than 1000 (LogEst 99). + ** Failure to do this can cause the indexes for which we do not have + ** stat1 data to be ignored by the query planner. + */ + x = pIdx->pTable->nRowLogEst; + assert( 99==sqlite3LogEst(1000) ); + if( x<99 ){ + pIdx->pTable->nRowLogEst = x = 99; + } + if( pIdx->pPartIdxWhere!=0 ){ x -= 10; assert( 10==sqlite3LogEst(2) ); } + a[0] = x; + + /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is + ** 6 and each subsequent value (if any) is 5. */ + memcpy(&a[1], aVal, nCopy*sizeof(LogEst)); + for(i=nCopy+1; i<=pIdx->nKeyCol; i++){ + a[i] = 23; assert( 23==sqlite3LogEst(5) ); + } + + assert( 0==sqlite3LogEst(1) ); + if( IsUniqueIndex(pIdx) ) a[pIdx->nKeyCol] = 0; +} + +/* +** This routine will drop an existing named index. This routine +** implements the DROP INDEX statement. +*/ +SQLITE_PRIVATE void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){ + Index *pIndex; + Vdbe *v; + sqlite3 *db = pParse->db; + int iDb; + + if( db->mallocFailed ){ + goto exit_drop_index; + } + assert( pParse->nErr==0 ); /* Never called with prior non-OOM errors */ + assert( pName->nSrc==1 ); + if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ + goto exit_drop_index; + } + pIndex = sqlite3FindIndex(db, pName->a[0].zName, pName->a[0].zDatabase); + if( pIndex==0 ){ + if( !ifExists ){ + sqlite3ErrorMsg(pParse, "no such index: %S", pName->a); + }else{ + sqlite3CodeVerifyNamedSchema(pParse, pName->a[0].zDatabase); + sqlite3ForceNotReadOnly(pParse); + } + pParse->checkSchema = 1; + goto exit_drop_index; + } + if( pIndex->idxType!=SQLITE_IDXTYPE_APPDEF ){ + sqlite3ErrorMsg(pParse, "index associated with UNIQUE " + "or PRIMARY KEY constraint cannot be dropped", 0); + goto exit_drop_index; + } + iDb = sqlite3SchemaToIndex(db, pIndex->pSchema); +#ifndef SQLITE_OMIT_AUTHORIZATION + { + int code = SQLITE_DROP_INDEX; + Table *pTab = pIndex->pTable; + const char *zDb = db->aDb[iDb].zDbSName; + const char *zTab = SCHEMA_TABLE(iDb); + if( sqlite3AuthCheck(pParse, SQLITE_DELETE, zTab, 0, zDb) ){ + goto exit_drop_index; + } + if( !OMIT_TEMPDB && iDb==1 ) code = SQLITE_DROP_TEMP_INDEX; + if( sqlite3AuthCheck(pParse, code, pIndex->zName, pTab->zName, zDb) ){ + goto exit_drop_index; + } + } +#endif + + /* Generate code to remove the index and from the schema table */ + v = sqlite3GetVdbe(pParse); + if( v ){ + sqlite3BeginWriteOperation(pParse, 1, iDb); + sqlite3NestedParse(pParse, + "DELETE FROM %Q." LEGACY_SCHEMA_TABLE " WHERE name=%Q AND type='index'", + db->aDb[iDb].zDbSName, pIndex->zName + ); + sqlite3ClearStatTables(pParse, iDb, "idx", pIndex->zName); + sqlite3ChangeCookie(pParse, iDb); + destroyRootPage(pParse, pIndex->tnum, iDb); + sqlite3VdbeAddOp4(v, OP_DropIndex, iDb, 0, 0, pIndex->zName, 0); + } + +exit_drop_index: + sqlite3SrcListDelete(db, pName); +} + +/* +** pArray is a pointer to an array of objects. Each object in the +** array is szEntry bytes in size. This routine uses sqlite3DbRealloc() +** to extend the array so that there is space for a new object at the end. +** +** When this function is called, *pnEntry contains the current size of +** the array (in entries - so the allocation is ((*pnEntry) * szEntry) bytes +** in total). +** +** If the realloc() is successful (i.e. if no OOM condition occurs), the +** space allocated for the new object is zeroed, *pnEntry updated to +** reflect the new size of the array and a pointer to the new allocation +** returned. *pIdx is set to the index of the new array entry in this case. +** +** Otherwise, if the realloc() fails, *pIdx is set to -1, *pnEntry remains +** unchanged and a copy of pArray returned. +*/ +SQLITE_PRIVATE void *sqlite3ArrayAllocate( + sqlite3 *db, /* Connection to notify of malloc failures */ + void *pArray, /* Array of objects. Might be reallocated */ + int szEntry, /* Size of each object in the array */ + int *pnEntry, /* Number of objects currently in use */ + int *pIdx /* Write the index of a new slot here */ +){ + char *z; + sqlite3_int64 n = *pIdx = *pnEntry; + if( (n & (n-1))==0 ){ + sqlite3_int64 sz = (n==0) ? 1 : 2*n; + void *pNew = sqlite3DbRealloc(db, pArray, sz*szEntry); + if( pNew==0 ){ + *pIdx = -1; + return pArray; + } + pArray = pNew; + } + z = (char*)pArray; + memset(&z[n * szEntry], 0, szEntry); + ++*pnEntry; + return pArray; +} + +/* +** Append a new element to the given IdList. Create a new IdList if +** need be. +** +** A new IdList is returned, or NULL if malloc() fails. +*/ +SQLITE_PRIVATE IdList *sqlite3IdListAppend(Parse *pParse, IdList *pList, Token *pToken){ + sqlite3 *db = pParse->db; + int i; + if( pList==0 ){ + pList = sqlite3DbMallocZero(db, sizeof(IdList) ); + if( pList==0 ) return 0; + } + pList->a = sqlite3ArrayAllocate( + db, + pList->a, + sizeof(pList->a[0]), + &pList->nId, + &i + ); + if( i<0 ){ + sqlite3IdListDelete(db, pList); + return 0; + } + pList->a[i].zName = sqlite3NameFromToken(db, pToken); + if( IN_RENAME_OBJECT && pList->a[i].zName ){ + sqlite3RenameTokenMap(pParse, (void*)pList->a[i].zName, pToken); + } + return pList; +} + +/* +** Delete an IdList. +*/ +SQLITE_PRIVATE void sqlite3IdListDelete(sqlite3 *db, IdList *pList){ + int i; + if( pList==0 ) return; + for(i=0; inId; i++){ + sqlite3DbFree(db, pList->a[i].zName); + } + sqlite3DbFree(db, pList->a); + sqlite3DbFreeNN(db, pList); +} + +/* +** Return the index in pList of the identifier named zId. Return -1 +** if not found. +*/ +SQLITE_PRIVATE int sqlite3IdListIndex(IdList *pList, const char *zName){ + int i; + if( pList==0 ) return -1; + for(i=0; inId; i++){ + if( sqlite3StrICmp(pList->a[i].zName, zName)==0 ) return i; + } + return -1; +} + +/* +** Maximum size of a SrcList object. +** The SrcList object is used to represent the FROM clause of a +** SELECT statement, and the query planner cannot deal with more +** than 64 tables in a join. So any value larger than 64 here +** is sufficient for most uses. Smaller values, like say 10, are +** appropriate for small and memory-limited applications. +*/ +#ifndef SQLITE_MAX_SRCLIST +# define SQLITE_MAX_SRCLIST 200 +#endif + +/* +** Expand the space allocated for the given SrcList object by +** creating nExtra new slots beginning at iStart. iStart is zero based. +** New slots are zeroed. +** +** For example, suppose a SrcList initially contains two entries: A,B. +** To append 3 new entries onto the end, do this: +** +** sqlite3SrcListEnlarge(db, pSrclist, 3, 2); +** +** After the call above it would contain: A, B, nil, nil, nil. +** If the iStart argument had been 1 instead of 2, then the result +** would have been: A, nil, nil, nil, B. To prepend the new slots, +** the iStart value would be 0. The result then would +** be: nil, nil, nil, A, B. +** +** If a memory allocation fails or the SrcList becomes too large, leave +** the original SrcList unchanged, return NULL, and leave an error message +** in pParse. +*/ +SQLITE_PRIVATE SrcList *sqlite3SrcListEnlarge( + Parse *pParse, /* Parsing context into which errors are reported */ + SrcList *pSrc, /* The SrcList to be enlarged */ + int nExtra, /* Number of new slots to add to pSrc->a[] */ + int iStart /* Index in pSrc->a[] of first new slot */ +){ + int i; + + /* Sanity checking on calling parameters */ + assert( iStart>=0 ); + assert( nExtra>=1 ); + assert( pSrc!=0 ); + assert( iStart<=pSrc->nSrc ); + + /* Allocate additional space if needed */ + if( (u32)pSrc->nSrc+nExtra>pSrc->nAlloc ){ + SrcList *pNew; + sqlite3_int64 nAlloc = 2*(sqlite3_int64)pSrc->nSrc+nExtra; + sqlite3 *db = pParse->db; + + if( pSrc->nSrc+nExtra>=SQLITE_MAX_SRCLIST ){ + sqlite3ErrorMsg(pParse, "too many FROM clause terms, max: %d", + SQLITE_MAX_SRCLIST); + return 0; + } + if( nAlloc>SQLITE_MAX_SRCLIST ) nAlloc = SQLITE_MAX_SRCLIST; + pNew = sqlite3DbRealloc(db, pSrc, + sizeof(*pSrc) + (nAlloc-1)*sizeof(pSrc->a[0]) ); + if( pNew==0 ){ + assert( db->mallocFailed ); + return 0; + } + pSrc = pNew; + pSrc->nAlloc = nAlloc; + } + + /* Move existing slots that come after the newly inserted slots + ** out of the way */ + for(i=pSrc->nSrc-1; i>=iStart; i--){ + pSrc->a[i+nExtra] = pSrc->a[i]; + } + pSrc->nSrc += nExtra; + + /* Zero the newly allocated slots */ + memset(&pSrc->a[iStart], 0, sizeof(pSrc->a[0])*nExtra); + for(i=iStart; ia[i].iCursor = -1; + } + + /* Return a pointer to the enlarged SrcList */ + return pSrc; +} + + +/* +** Append a new table name to the given SrcList. Create a new SrcList if +** need be. A new entry is created in the SrcList even if pTable is NULL. +** +** A SrcList is returned, or NULL if there is an OOM error or if the +** SrcList grows to large. The returned +** SrcList might be the same as the SrcList that was input or it might be +** a new one. If an OOM error does occurs, then the prior value of pList +** that is input to this routine is automatically freed. +** +** If pDatabase is not null, it means that the table has an optional +** database name prefix. Like this: "database.table". The pDatabase +** points to the table name and the pTable points to the database name. +** The SrcList.a[].zName field is filled with the table name which might +** come from pTable (if pDatabase is NULL) or from pDatabase. +** SrcList.a[].zDatabase is filled with the database name from pTable, +** or with NULL if no database is specified. +** +** In other words, if call like this: +** +** sqlite3SrcListAppend(D,A,B,0); +** +** Then B is a table name and the database name is unspecified. If called +** like this: +** +** sqlite3SrcListAppend(D,A,B,C); +** +** Then C is the table name and B is the database name. If C is defined +** then so is B. In other words, we never have a case where: +** +** sqlite3SrcListAppend(D,A,0,C); +** +** Both pTable and pDatabase are assumed to be quoted. They are dequoted +** before being added to the SrcList. +*/ +SQLITE_PRIVATE SrcList *sqlite3SrcListAppend( + Parse *pParse, /* Parsing context, in which errors are reported */ + SrcList *pList, /* Append to this SrcList. NULL creates a new SrcList */ + Token *pTable, /* Table to append */ + Token *pDatabase /* Database of the table */ +){ + SrcItem *pItem; + sqlite3 *db; + assert( pDatabase==0 || pTable!=0 ); /* Cannot have C without B */ + assert( pParse!=0 ); + assert( pParse->db!=0 ); + db = pParse->db; + if( pList==0 ){ + pList = sqlite3DbMallocRawNN(pParse->db, sizeof(SrcList) ); + if( pList==0 ) return 0; + pList->nAlloc = 1; + pList->nSrc = 1; + memset(&pList->a[0], 0, sizeof(pList->a[0])); + pList->a[0].iCursor = -1; + }else{ + SrcList *pNew = sqlite3SrcListEnlarge(pParse, pList, 1, pList->nSrc); + if( pNew==0 ){ + sqlite3SrcListDelete(db, pList); + return 0; + }else{ + pList = pNew; + } + } + pItem = &pList->a[pList->nSrc-1]; + if( pDatabase && pDatabase->z==0 ){ + pDatabase = 0; + } + if( pDatabase ){ + pItem->zName = sqlite3NameFromToken(db, pDatabase); + pItem->zDatabase = sqlite3NameFromToken(db, pTable); + }else{ + pItem->zName = sqlite3NameFromToken(db, pTable); + pItem->zDatabase = 0; + } + return pList; +} + +/* +** Assign VdbeCursor index numbers to all tables in a SrcList +*/ +SQLITE_PRIVATE void sqlite3SrcListAssignCursors(Parse *pParse, SrcList *pList){ + int i; + SrcItem *pItem; + assert( pList || pParse->db->mallocFailed ); + if( ALWAYS(pList) ){ + for(i=0, pItem=pList->a; inSrc; i++, pItem++){ + if( pItem->iCursor>=0 ) continue; + pItem->iCursor = pParse->nTab++; + if( pItem->pSelect ){ + sqlite3SrcListAssignCursors(pParse, pItem->pSelect->pSrc); + } + } + } +} + +/* +** Delete an entire SrcList including all its substructure. +*/ +SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3 *db, SrcList *pList){ + int i; + SrcItem *pItem; + if( pList==0 ) return; + for(pItem=pList->a, i=0; inSrc; i++, pItem++){ + if( pItem->zDatabase ) sqlite3DbFreeNN(db, pItem->zDatabase); + sqlite3DbFree(db, pItem->zName); + if( pItem->zAlias ) sqlite3DbFreeNN(db, pItem->zAlias); + if( pItem->fg.isIndexedBy ) sqlite3DbFree(db, pItem->u1.zIndexedBy); + if( pItem->fg.isTabFunc ) sqlite3ExprListDelete(db, pItem->u1.pFuncArg); + sqlite3DeleteTable(db, pItem->pTab); + if( pItem->pSelect ) sqlite3SelectDelete(db, pItem->pSelect); + if( pItem->pOn ) sqlite3ExprDelete(db, pItem->pOn); + if( pItem->pUsing ) sqlite3IdListDelete(db, pItem->pUsing); + } + sqlite3DbFreeNN(db, pList); +} + +/* +** This routine is called by the parser to add a new term to the +** end of a growing FROM clause. The "p" parameter is the part of +** the FROM clause that has already been constructed. "p" is NULL +** if this is the first term of the FROM clause. pTable and pDatabase +** are the name of the table and database named in the FROM clause term. +** pDatabase is NULL if the database name qualifier is missing - the +** usual case. If the term has an alias, then pAlias points to the +** alias token. If the term is a subquery, then pSubquery is the +** SELECT statement that the subquery encodes. The pTable and +** pDatabase parameters are NULL for subqueries. The pOn and pUsing +** parameters are the content of the ON and USING clauses. +** +** Return a new SrcList which encodes is the FROM with the new +** term added. +*/ +SQLITE_PRIVATE SrcList *sqlite3SrcListAppendFromTerm( + Parse *pParse, /* Parsing context */ + SrcList *p, /* The left part of the FROM clause already seen */ + Token *pTable, /* Name of the table to add to the FROM clause */ + Token *pDatabase, /* Name of the database containing pTable */ + Token *pAlias, /* The right-hand side of the AS subexpression */ + Select *pSubquery, /* A subquery used in place of a table name */ + Expr *pOn, /* The ON clause of a join */ + IdList *pUsing /* The USING clause of a join */ +){ + SrcItem *pItem; + sqlite3 *db = pParse->db; + if( !p && (pOn || pUsing) ){ + sqlite3ErrorMsg(pParse, "a JOIN clause is required before %s", + (pOn ? "ON" : "USING") + ); + goto append_from_error; + } + p = sqlite3SrcListAppend(pParse, p, pTable, pDatabase); + if( p==0 ){ + goto append_from_error; + } + assert( p->nSrc>0 ); + pItem = &p->a[p->nSrc-1]; + assert( (pTable==0)==(pDatabase==0) ); + assert( pItem->zName==0 || pDatabase!=0 ); + if( IN_RENAME_OBJECT && pItem->zName ){ + Token *pToken = (ALWAYS(pDatabase) && pDatabase->z) ? pDatabase : pTable; + sqlite3RenameTokenMap(pParse, pItem->zName, pToken); + } + assert( pAlias!=0 ); + if( pAlias->n ){ + pItem->zAlias = sqlite3NameFromToken(db, pAlias); + } + pItem->pSelect = pSubquery; + pItem->pOn = pOn; + pItem->pUsing = pUsing; + return p; + +append_from_error: + assert( p==0 ); + sqlite3ExprDelete(db, pOn); + sqlite3IdListDelete(db, pUsing); + sqlite3SelectDelete(db, pSubquery); + return 0; +} + +/* +** Add an INDEXED BY or NOT INDEXED clause to the most recently added +** element of the source-list passed as the second argument. +*/ +SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pIndexedBy){ + assert( pIndexedBy!=0 ); + if( p && pIndexedBy->n>0 ){ + SrcItem *pItem; + assert( p->nSrc>0 ); + pItem = &p->a[p->nSrc-1]; + assert( pItem->fg.notIndexed==0 ); + assert( pItem->fg.isIndexedBy==0 ); + assert( pItem->fg.isTabFunc==0 ); + if( pIndexedBy->n==1 && !pIndexedBy->z ){ + /* A "NOT INDEXED" clause was supplied. See parse.y + ** construct "indexed_opt" for details. */ + pItem->fg.notIndexed = 1; + }else{ + pItem->u1.zIndexedBy = sqlite3NameFromToken(pParse->db, pIndexedBy); + pItem->fg.isIndexedBy = 1; + assert( pItem->fg.isCte==0 ); /* No collision on union u2 */ + } + } +} + +/* +** Append the contents of SrcList p2 to SrcList p1 and return the resulting +** SrcList. Or, if an error occurs, return NULL. In all cases, p1 and p2 +** are deleted by this function. +*/ +SQLITE_PRIVATE SrcList *sqlite3SrcListAppendList(Parse *pParse, SrcList *p1, SrcList *p2){ + assert( p1 && p1->nSrc==1 ); + if( p2 ){ + SrcList *pNew = sqlite3SrcListEnlarge(pParse, p1, p2->nSrc, 1); + if( pNew==0 ){ + sqlite3SrcListDelete(pParse->db, p2); + }else{ + p1 = pNew; + memcpy(&p1->a[1], p2->a, p2->nSrc*sizeof(SrcItem)); + sqlite3DbFree(pParse->db, p2); + } + } + return p1; +} + +/* +** Add the list of function arguments to the SrcList entry for a +** table-valued-function. +*/ +SQLITE_PRIVATE void sqlite3SrcListFuncArgs(Parse *pParse, SrcList *p, ExprList *pList){ + if( p ){ + SrcItem *pItem = &p->a[p->nSrc-1]; + assert( pItem->fg.notIndexed==0 ); + assert( pItem->fg.isIndexedBy==0 ); + assert( pItem->fg.isTabFunc==0 ); + pItem->u1.pFuncArg = pList; + pItem->fg.isTabFunc = 1; + }else{ + sqlite3ExprListDelete(pParse->db, pList); + } +} + +/* +** When building up a FROM clause in the parser, the join operator +** is initially attached to the left operand. But the code generator +** expects the join operator to be on the right operand. This routine +** Shifts all join operators from left to right for an entire FROM +** clause. +** +** Example: Suppose the join is like this: +** +** A natural cross join B +** +** The operator is "natural cross join". The A and B operands are stored +** in p->a[0] and p->a[1], respectively. The parser initially stores the +** operator with A. This routine shifts that operator over to B. +*/ +SQLITE_PRIVATE void sqlite3SrcListShiftJoinType(SrcList *p){ + if( p ){ + int i; + for(i=p->nSrc-1; i>0; i--){ + p->a[i].fg.jointype = p->a[i-1].fg.jointype; + } + p->a[0].fg.jointype = 0; + } +} + +/* +** Generate VDBE code for a BEGIN statement. +*/ +SQLITE_PRIVATE void sqlite3BeginTransaction(Parse *pParse, int type){ + sqlite3 *db; + Vdbe *v; + int i; + + assert( pParse!=0 ); + db = pParse->db; + assert( db!=0 ); + if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "BEGIN", 0, 0) ){ + return; + } + v = sqlite3GetVdbe(pParse); + if( !v ) return; + if( type!=TK_DEFERRED ){ + for(i=0; inDb; i++){ + int eTxnType; + Btree *pBt = db->aDb[i].pBt; + if( pBt && sqlite3BtreeIsReadonly(pBt) ){ + eTxnType = 0; /* Read txn */ + }else if( type==TK_EXCLUSIVE ){ + eTxnType = 2; /* Exclusive txn */ + }else{ + eTxnType = 1; /* Write txn */ + } + sqlite3VdbeAddOp2(v, OP_Transaction, i, eTxnType); + sqlite3VdbeUsesBtree(v, i); + } + } + sqlite3VdbeAddOp0(v, OP_AutoCommit); +} + +/* +** Generate VDBE code for a COMMIT or ROLLBACK statement. +** Code for ROLLBACK is generated if eType==TK_ROLLBACK. Otherwise +** code is generated for a COMMIT. +*/ +SQLITE_PRIVATE void sqlite3EndTransaction(Parse *pParse, int eType){ + Vdbe *v; + int isRollback; + + assert( pParse!=0 ); + assert( pParse->db!=0 ); + assert( eType==TK_COMMIT || eType==TK_END || eType==TK_ROLLBACK ); + isRollback = eType==TK_ROLLBACK; + if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, + isRollback ? "ROLLBACK" : "COMMIT", 0, 0) ){ + return; + } + v = sqlite3GetVdbe(pParse); + if( v ){ + sqlite3VdbeAddOp2(v, OP_AutoCommit, 1, isRollback); + } +} + +/* +** This function is called by the parser when it parses a command to create, +** release or rollback an SQL savepoint. +*/ +SQLITE_PRIVATE void sqlite3Savepoint(Parse *pParse, int op, Token *pName){ + char *zName = sqlite3NameFromToken(pParse->db, pName); + if( zName ){ + Vdbe *v = sqlite3GetVdbe(pParse); +#ifndef SQLITE_OMIT_AUTHORIZATION + static const char * const az[] = { "BEGIN", "RELEASE", "ROLLBACK" }; + assert( !SAVEPOINT_BEGIN && SAVEPOINT_RELEASE==1 && SAVEPOINT_ROLLBACK==2 ); +#endif + if( !v || sqlite3AuthCheck(pParse, SQLITE_SAVEPOINT, az[op], zName, 0) ){ + sqlite3DbFree(pParse->db, zName); + return; + } + sqlite3VdbeAddOp4(v, OP_Savepoint, op, 0, 0, zName, P4_DYNAMIC); + } +} + +/* +** Make sure the TEMP database is open and available for use. Return +** the number of errors. Leave any error messages in the pParse structure. +*/ +SQLITE_PRIVATE int sqlite3OpenTempDatabase(Parse *pParse){ + sqlite3 *db = pParse->db; + if( db->aDb[1].pBt==0 && !pParse->explain ){ + int rc; + Btree *pBt; + static const int flags = + SQLITE_OPEN_READWRITE | + SQLITE_OPEN_CREATE | + SQLITE_OPEN_EXCLUSIVE | + SQLITE_OPEN_DELETEONCLOSE | + SQLITE_OPEN_TEMP_DB; + + rc = sqlite3BtreeOpen(db->pVfs, 0, db, &pBt, 0, flags); + if( rc!=SQLITE_OK ){ + sqlite3ErrorMsg(pParse, "unable to open a temporary database " + "file for storing temporary tables"); + pParse->rc = rc; + return 1; + } + db->aDb[1].pBt = pBt; + assert( db->aDb[1].pSchema ); + if( SQLITE_NOMEM==sqlite3BtreeSetPageSize(pBt, db->nextPagesize, 0, 0) ){ + sqlite3OomFault(db); + return 1; + } + } + return 0; +} + +/* +** Record the fact that the schema cookie will need to be verified +** for database iDb. The code to actually verify the schema cookie +** will occur at the end of the top-level VDBE and will be generated +** later, by sqlite3FinishCoding(). +*/ +static void sqlite3CodeVerifySchemaAtToplevel(Parse *pToplevel, int iDb){ + assert( iDb>=0 && iDbdb->nDb ); + assert( pToplevel->db->aDb[iDb].pBt!=0 || iDb==1 ); + assert( iDbdb, iDb, 0) ); + if( DbMaskTest(pToplevel->cookieMask, iDb)==0 ){ + DbMaskSet(pToplevel->cookieMask, iDb); + if( !OMIT_TEMPDB && iDb==1 ){ + sqlite3OpenTempDatabase(pToplevel); + } + } +} +SQLITE_PRIVATE void sqlite3CodeVerifySchema(Parse *pParse, int iDb){ + sqlite3CodeVerifySchemaAtToplevel(sqlite3ParseToplevel(pParse), iDb); +} + + +/* +** If argument zDb is NULL, then call sqlite3CodeVerifySchema() for each +** attached database. Otherwise, invoke it for the database named zDb only. +*/ +SQLITE_PRIVATE void sqlite3CodeVerifyNamedSchema(Parse *pParse, const char *zDb){ + sqlite3 *db = pParse->db; + int i; + for(i=0; inDb; i++){ + Db *pDb = &db->aDb[i]; + if( pDb->pBt && (!zDb || 0==sqlite3StrICmp(zDb, pDb->zDbSName)) ){ + sqlite3CodeVerifySchema(pParse, i); + } + } +} + +/* +** Generate VDBE code that prepares for doing an operation that +** might change the database. +** +** This routine starts a new transaction if we are not already within +** a transaction. If we are already within a transaction, then a checkpoint +** is set if the setStatement parameter is true. A checkpoint should +** be set for operations that might fail (due to a constraint) part of +** the way through and which will need to undo some writes without having to +** rollback the whole transaction. For operations where all constraints +** can be checked before any changes are made to the database, it is never +** necessary to undo a write and the checkpoint should not be set. +*/ +SQLITE_PRIVATE void sqlite3BeginWriteOperation(Parse *pParse, int setStatement, int iDb){ + Parse *pToplevel = sqlite3ParseToplevel(pParse); + sqlite3CodeVerifySchemaAtToplevel(pToplevel, iDb); + DbMaskSet(pToplevel->writeMask, iDb); + pToplevel->isMultiWrite |= setStatement; +} + +/* +** Indicate that the statement currently under construction might write +** more than one entry (example: deleting one row then inserting another, +** inserting multiple rows in a table, or inserting a row and index entries.) +** If an abort occurs after some of these writes have completed, then it will +** be necessary to undo the completed writes. +*/ +SQLITE_PRIVATE void sqlite3MultiWrite(Parse *pParse){ + Parse *pToplevel = sqlite3ParseToplevel(pParse); + pToplevel->isMultiWrite = 1; +} + +/* +** The code generator calls this routine if is discovers that it is +** possible to abort a statement prior to completion. In order to +** perform this abort without corrupting the database, we need to make +** sure that the statement is protected by a statement transaction. +** +** Technically, we only need to set the mayAbort flag if the +** isMultiWrite flag was previously set. There is a time dependency +** such that the abort must occur after the multiwrite. This makes +** some statements involving the REPLACE conflict resolution algorithm +** go a little faster. But taking advantage of this time dependency +** makes it more difficult to prove that the code is correct (in +** particular, it prevents us from writing an effective +** implementation of sqlite3AssertMayAbort()) and so we have chosen +** to take the safe route and skip the optimization. +*/ +SQLITE_PRIVATE void sqlite3MayAbort(Parse *pParse){ + Parse *pToplevel = sqlite3ParseToplevel(pParse); + pToplevel->mayAbort = 1; +} + +/* +** Code an OP_Halt that causes the vdbe to return an SQLITE_CONSTRAINT +** error. The onError parameter determines which (if any) of the statement +** and/or current transaction is rolled back. +*/ +SQLITE_PRIVATE void sqlite3HaltConstraint( + Parse *pParse, /* Parsing context */ + int errCode, /* extended error code */ + int onError, /* Constraint type */ + char *p4, /* Error message */ + i8 p4type, /* P4_STATIC or P4_TRANSIENT */ + u8 p5Errmsg /* P5_ErrMsg type */ +){ + Vdbe *v; + assert( pParse->pVdbe!=0 ); + v = sqlite3GetVdbe(pParse); + assert( (errCode&0xff)==SQLITE_CONSTRAINT || pParse->nested ); + if( onError==OE_Abort ){ + sqlite3MayAbort(pParse); + } + sqlite3VdbeAddOp4(v, OP_Halt, errCode, onError, 0, p4, p4type); + sqlite3VdbeChangeP5(v, p5Errmsg); +} + +/* +** Code an OP_Halt due to UNIQUE or PRIMARY KEY constraint violation. +*/ +SQLITE_PRIVATE void sqlite3UniqueConstraint( + Parse *pParse, /* Parsing context */ + int onError, /* Constraint type */ + Index *pIdx /* The index that triggers the constraint */ +){ + char *zErr; + int j; + StrAccum errMsg; + Table *pTab = pIdx->pTable; + + sqlite3StrAccumInit(&errMsg, pParse->db, 0, 0, + pParse->db->aLimit[SQLITE_LIMIT_LENGTH]); + if( pIdx->aColExpr ){ + sqlite3_str_appendf(&errMsg, "index '%q'", pIdx->zName); + }else{ + for(j=0; jnKeyCol; j++){ + char *zCol; + assert( pIdx->aiColumn[j]>=0 ); + zCol = pTab->aCol[pIdx->aiColumn[j]].zCnName; + if( j ) sqlite3_str_append(&errMsg, ", ", 2); + sqlite3_str_appendall(&errMsg, pTab->zName); + sqlite3_str_append(&errMsg, ".", 1); + sqlite3_str_appendall(&errMsg, zCol); + } + } + zErr = sqlite3StrAccumFinish(&errMsg); + sqlite3HaltConstraint(pParse, + IsPrimaryKeyIndex(pIdx) ? SQLITE_CONSTRAINT_PRIMARYKEY + : SQLITE_CONSTRAINT_UNIQUE, + onError, zErr, P4_DYNAMIC, P5_ConstraintUnique); +} + + +/* +** Code an OP_Halt due to non-unique rowid. +*/ +SQLITE_PRIVATE void sqlite3RowidConstraint( + Parse *pParse, /* Parsing context */ + int onError, /* Conflict resolution algorithm */ + Table *pTab /* The table with the non-unique rowid */ +){ + char *zMsg; + int rc; + if( pTab->iPKey>=0 ){ + zMsg = sqlite3MPrintf(pParse->db, "%s.%s", pTab->zName, + pTab->aCol[pTab->iPKey].zCnName); + rc = SQLITE_CONSTRAINT_PRIMARYKEY; + }else{ + zMsg = sqlite3MPrintf(pParse->db, "%s.rowid", pTab->zName); + rc = SQLITE_CONSTRAINT_ROWID; + } + sqlite3HaltConstraint(pParse, rc, onError, zMsg, P4_DYNAMIC, + P5_ConstraintUnique); +} + +/* +** Check to see if pIndex uses the collating sequence pColl. Return +** true if it does and false if it does not. +*/ +#ifndef SQLITE_OMIT_REINDEX +static int collationMatch(const char *zColl, Index *pIndex){ + int i; + assert( zColl!=0 ); + for(i=0; inColumn; i++){ + const char *z = pIndex->azColl[i]; + assert( z!=0 || pIndex->aiColumn[i]<0 ); + if( pIndex->aiColumn[i]>=0 && 0==sqlite3StrICmp(z, zColl) ){ + return 1; + } + } + return 0; +} +#endif + +/* +** Recompute all indices of pTab that use the collating sequence pColl. +** If pColl==0 then recompute all indices of pTab. +*/ +#ifndef SQLITE_OMIT_REINDEX +static void reindexTable(Parse *pParse, Table *pTab, char const *zColl){ + if( !IsVirtual(pTab) ){ + Index *pIndex; /* An index associated with pTab */ + + for(pIndex=pTab->pIndex; pIndex; pIndex=pIndex->pNext){ + if( zColl==0 || collationMatch(zColl, pIndex) ){ + int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + sqlite3BeginWriteOperation(pParse, 0, iDb); + sqlite3RefillIndex(pParse, pIndex, -1); + } + } + } +} +#endif + +/* +** Recompute all indices of all tables in all databases where the +** indices use the collating sequence pColl. If pColl==0 then recompute +** all indices everywhere. +*/ +#ifndef SQLITE_OMIT_REINDEX +static void reindexDatabases(Parse *pParse, char const *zColl){ + Db *pDb; /* A single database */ + int iDb; /* The database index number */ + sqlite3 *db = pParse->db; /* The database connection */ + HashElem *k; /* For looping over tables in pDb */ + Table *pTab; /* A table in the database */ + + assert( sqlite3BtreeHoldsAllMutexes(db) ); /* Needed for schema access */ + for(iDb=0, pDb=db->aDb; iDbnDb; iDb++, pDb++){ + assert( pDb!=0 ); + for(k=sqliteHashFirst(&pDb->pSchema->tblHash); k; k=sqliteHashNext(k)){ + pTab = (Table*)sqliteHashData(k); + reindexTable(pParse, pTab, zColl); + } + } +} +#endif + +/* +** Generate code for the REINDEX command. +** +** REINDEX -- 1 +** REINDEX -- 2 +** REINDEX ?.? -- 3 +** REINDEX ?.? -- 4 +** +** Form 1 causes all indices in all attached databases to be rebuilt. +** Form 2 rebuilds all indices in all databases that use the named +** collating function. Forms 3 and 4 rebuild the named index or all +** indices associated with the named table. +*/ +#ifndef SQLITE_OMIT_REINDEX +SQLITE_PRIVATE void sqlite3Reindex(Parse *pParse, Token *pName1, Token *pName2){ + CollSeq *pColl; /* Collating sequence to be reindexed, or NULL */ + char *z; /* Name of a table or index */ + const char *zDb; /* Name of the database */ + Table *pTab; /* A table in the database */ + Index *pIndex; /* An index associated with pTab */ + int iDb; /* The database index number */ + sqlite3 *db = pParse->db; /* The database connection */ + Token *pObjName; /* Name of the table or index to be reindexed */ + + /* Read the database schema. If an error occurs, leave an error message + ** and code in pParse and return NULL. */ + if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ + return; + } + + if( pName1==0 ){ + reindexDatabases(pParse, 0); + return; + }else if( NEVER(pName2==0) || pName2->z==0 ){ + char *zColl; + assert( pName1->z ); + zColl = sqlite3NameFromToken(pParse->db, pName1); + if( !zColl ) return; + pColl = sqlite3FindCollSeq(db, ENC(db), zColl, 0); + if( pColl ){ + reindexDatabases(pParse, zColl); + sqlite3DbFree(db, zColl); + return; + } + sqlite3DbFree(db, zColl); + } + iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pObjName); + if( iDb<0 ) return; + z = sqlite3NameFromToken(db, pObjName); + if( z==0 ) return; + zDb = db->aDb[iDb].zDbSName; + pTab = sqlite3FindTable(db, z, zDb); + if( pTab ){ + reindexTable(pParse, pTab, 0); + sqlite3DbFree(db, z); + return; + } + pIndex = sqlite3FindIndex(db, z, zDb); + sqlite3DbFree(db, z); + if( pIndex ){ + sqlite3BeginWriteOperation(pParse, 0, iDb); + sqlite3RefillIndex(pParse, pIndex, -1); + return; + } + sqlite3ErrorMsg(pParse, "unable to identify the object to be reindexed"); +} +#endif + +/* +** Return a KeyInfo structure that is appropriate for the given Index. +** +** The caller should invoke sqlite3KeyInfoUnref() on the returned object +** when it has finished using it. +*/ +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){ + int i; + int nCol = pIdx->nColumn; + int nKey = pIdx->nKeyCol; + KeyInfo *pKey; + if( pParse->nErr ) return 0; + if( pIdx->uniqNotNull ){ + pKey = sqlite3KeyInfoAlloc(pParse->db, nKey, nCol-nKey); + }else{ + pKey = sqlite3KeyInfoAlloc(pParse->db, nCol, 0); + } + if( pKey ){ + assert( sqlite3KeyInfoIsWriteable(pKey) ); + for(i=0; iazColl[i]; + pKey->aColl[i] = zColl==sqlite3StrBINARY ? 0 : + sqlite3LocateCollSeq(pParse, zColl); + pKey->aSortFlags[i] = pIdx->aSortOrder[i]; + assert( 0==(pKey->aSortFlags[i] & KEYINFO_ORDER_BIGNULL) ); + } + if( pParse->nErr ){ + assert( pParse->rc==SQLITE_ERROR_MISSING_COLLSEQ ); + if( pIdx->bNoQuery==0 ){ + /* Deactivate the index because it contains an unknown collating + ** sequence. The only way to reactive the index is to reload the + ** schema. Adding the missing collating sequence later does not + ** reactive the index. The application had the chance to register + ** the missing index using the collation-needed callback. For + ** simplicity, SQLite will not give the application a second chance. + */ + pIdx->bNoQuery = 1; + pParse->rc = SQLITE_ERROR_RETRY; + } + sqlite3KeyInfoUnref(pKey); + pKey = 0; + } + } + return pKey; +} + +#ifndef SQLITE_OMIT_CTE +/* +** Create a new CTE object +*/ +SQLITE_PRIVATE Cte *sqlite3CteNew( + Parse *pParse, /* Parsing context */ + Token *pName, /* Name of the common-table */ + ExprList *pArglist, /* Optional column name list for the table */ + Select *pQuery, /* Query used to initialize the table */ + u8 eM10d /* The MATERIALIZED flag */ +){ + Cte *pNew; + sqlite3 *db = pParse->db; + + pNew = sqlite3DbMallocZero(db, sizeof(*pNew)); + assert( pNew!=0 || db->mallocFailed ); + + if( db->mallocFailed ){ + sqlite3ExprListDelete(db, pArglist); + sqlite3SelectDelete(db, pQuery); + }else{ + pNew->pSelect = pQuery; + pNew->pCols = pArglist; + pNew->zName = sqlite3NameFromToken(pParse->db, pName); + pNew->eM10d = eM10d; + } + return pNew; +} + +/* +** Clear information from a Cte object, but do not deallocate storage +** for the object itself. +*/ +static void cteClear(sqlite3 *db, Cte *pCte){ + assert( pCte!=0 ); + sqlite3ExprListDelete(db, pCte->pCols); + sqlite3SelectDelete(db, pCte->pSelect); + sqlite3DbFree(db, pCte->zName); +} + +/* +** Free the contents of the CTE object passed as the second argument. +*/ +SQLITE_PRIVATE void sqlite3CteDelete(sqlite3 *db, Cte *pCte){ + assert( pCte!=0 ); + cteClear(db, pCte); + sqlite3DbFree(db, pCte); +} + +/* +** This routine is invoked once per CTE by the parser while parsing a +** WITH clause. The CTE described by teh third argument is added to +** the WITH clause of the second argument. If the second argument is +** NULL, then a new WITH argument is created. +*/ +SQLITE_PRIVATE With *sqlite3WithAdd( + Parse *pParse, /* Parsing context */ + With *pWith, /* Existing WITH clause, or NULL */ + Cte *pCte /* CTE to add to the WITH clause */ +){ + sqlite3 *db = pParse->db; + With *pNew; + char *zName; + + if( pCte==0 ){ + return pWith; + } + + /* Check that the CTE name is unique within this WITH clause. If + ** not, store an error in the Parse structure. */ + zName = pCte->zName; + if( zName && pWith ){ + int i; + for(i=0; inCte; i++){ + if( sqlite3StrICmp(zName, pWith->a[i].zName)==0 ){ + sqlite3ErrorMsg(pParse, "duplicate WITH table name: %s", zName); + } + } + } + + if( pWith ){ + sqlite3_int64 nByte = sizeof(*pWith) + (sizeof(pWith->a[1]) * pWith->nCte); + pNew = sqlite3DbRealloc(db, pWith, nByte); + }else{ + pNew = sqlite3DbMallocZero(db, sizeof(*pWith)); + } + assert( (pNew!=0 && zName!=0) || db->mallocFailed ); + + if( db->mallocFailed ){ + sqlite3CteDelete(db, pCte); + pNew = pWith; + }else{ + pNew->a[pNew->nCte++] = *pCte; + sqlite3DbFree(db, pCte); + } + + return pNew; +} + +/* +** Free the contents of the With object passed as the second argument. +*/ +SQLITE_PRIVATE void sqlite3WithDelete(sqlite3 *db, With *pWith){ + if( pWith ){ + int i; + for(i=0; inCte; i++){ + cteClear(db, &pWith->a[i]); + } + sqlite3DbFree(db, pWith); + } +} +#endif /* !defined(SQLITE_OMIT_CTE) */ + +/************** End of build.c ***********************************************/ +/************** Begin file callback.c ****************************************/ +/* +** 2005 May 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains functions used to access the internal hash tables +** of user defined functions and collation sequences. +*/ + +/* #include "sqliteInt.h" */ + +/* +** Invoke the 'collation needed' callback to request a collation sequence +** in the encoding enc of name zName, length nName. +*/ +static void callCollNeeded(sqlite3 *db, int enc, const char *zName){ + assert( !db->xCollNeeded || !db->xCollNeeded16 ); + if( db->xCollNeeded ){ + char *zExternal = sqlite3DbStrDup(db, zName); + if( !zExternal ) return; + db->xCollNeeded(db->pCollNeededArg, db, enc, zExternal); + sqlite3DbFree(db, zExternal); + } +#ifndef SQLITE_OMIT_UTF16 + if( db->xCollNeeded16 ){ + char const *zExternal; + sqlite3_value *pTmp = sqlite3ValueNew(db); + sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF8, SQLITE_STATIC); + zExternal = sqlite3ValueText(pTmp, SQLITE_UTF16NATIVE); + if( zExternal ){ + db->xCollNeeded16(db->pCollNeededArg, db, (int)ENC(db), zExternal); + } + sqlite3ValueFree(pTmp); + } +#endif +} + +/* +** This routine is called if the collation factory fails to deliver a +** collation function in the best encoding but there may be other versions +** of this collation function (for other text encodings) available. Use one +** of these instead if they exist. Avoid a UTF-8 <-> UTF-16 conversion if +** possible. +*/ +static int synthCollSeq(sqlite3 *db, CollSeq *pColl){ + CollSeq *pColl2; + char *z = pColl->zName; + int i; + static const u8 aEnc[] = { SQLITE_UTF16BE, SQLITE_UTF16LE, SQLITE_UTF8 }; + for(i=0; i<3; i++){ + pColl2 = sqlite3FindCollSeq(db, aEnc[i], z, 0); + if( pColl2->xCmp!=0 ){ + memcpy(pColl, pColl2, sizeof(CollSeq)); + pColl->xDel = 0; /* Do not copy the destructor */ + return SQLITE_OK; + } + } + return SQLITE_ERROR; +} + +/* +** This routine is called on a collation sequence before it is used to +** check that it is defined. An undefined collation sequence exists when +** a database is loaded that contains references to collation sequences +** that have not been defined by sqlite3_create_collation() etc. +** +** If required, this routine calls the 'collation needed' callback to +** request a definition of the collating sequence. If this doesn't work, +** an equivalent collating sequence that uses a text encoding different +** from the main database is substituted, if one is available. +*/ +SQLITE_PRIVATE int sqlite3CheckCollSeq(Parse *pParse, CollSeq *pColl){ + if( pColl && pColl->xCmp==0 ){ + const char *zName = pColl->zName; + sqlite3 *db = pParse->db; + CollSeq *p = sqlite3GetCollSeq(pParse, ENC(db), pColl, zName); + if( !p ){ + return SQLITE_ERROR; + } + assert( p==pColl ); + } + return SQLITE_OK; +} + + + +/* +** Locate and return an entry from the db.aCollSeq hash table. If the entry +** specified by zName and nName is not found and parameter 'create' is +** true, then create a new entry. Otherwise return NULL. +** +** Each pointer stored in the sqlite3.aCollSeq hash table contains an +** array of three CollSeq structures. The first is the collation sequence +** preferred for UTF-8, the second UTF-16le, and the third UTF-16be. +** +** Stored immediately after the three collation sequences is a copy of +** the collation sequence name. A pointer to this string is stored in +** each collation sequence structure. +*/ +static CollSeq *findCollSeqEntry( + sqlite3 *db, /* Database connection */ + const char *zName, /* Name of the collating sequence */ + int create /* Create a new entry if true */ +){ + CollSeq *pColl; + pColl = sqlite3HashFind(&db->aCollSeq, zName); + + if( 0==pColl && create ){ + int nName = sqlite3Strlen30(zName) + 1; + pColl = sqlite3DbMallocZero(db, 3*sizeof(*pColl) + nName); + if( pColl ){ + CollSeq *pDel = 0; + pColl[0].zName = (char*)&pColl[3]; + pColl[0].enc = SQLITE_UTF8; + pColl[1].zName = (char*)&pColl[3]; + pColl[1].enc = SQLITE_UTF16LE; + pColl[2].zName = (char*)&pColl[3]; + pColl[2].enc = SQLITE_UTF16BE; + memcpy(pColl[0].zName, zName, nName); + pDel = sqlite3HashInsert(&db->aCollSeq, pColl[0].zName, pColl); + + /* If a malloc() failure occurred in sqlite3HashInsert(), it will + ** return the pColl pointer to be deleted (because it wasn't added + ** to the hash table). + */ + assert( pDel==0 || pDel==pColl ); + if( pDel!=0 ){ + sqlite3OomFault(db); + sqlite3DbFree(db, pDel); + pColl = 0; + } + } + } + return pColl; +} + +/* +** Parameter zName points to a UTF-8 encoded string nName bytes long. +** Return the CollSeq* pointer for the collation sequence named zName +** for the encoding 'enc' from the database 'db'. +** +** If the entry specified is not found and 'create' is true, then create a +** new entry. Otherwise return NULL. +** +** A separate function sqlite3LocateCollSeq() is a wrapper around +** this routine. sqlite3LocateCollSeq() invokes the collation factory +** if necessary and generates an error message if the collating sequence +** cannot be found. +** +** See also: sqlite3LocateCollSeq(), sqlite3GetCollSeq() +*/ +SQLITE_PRIVATE CollSeq *sqlite3FindCollSeq( + sqlite3 *db, /* Database connection to search */ + u8 enc, /* Desired text encoding */ + const char *zName, /* Name of the collating sequence. Might be NULL */ + int create /* True to create CollSeq if doesn't already exist */ +){ + CollSeq *pColl; + assert( SQLITE_UTF8==1 && SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); + assert( enc>=SQLITE_UTF8 && enc<=SQLITE_UTF16BE ); + if( zName ){ + pColl = findCollSeqEntry(db, zName, create); + if( pColl ) pColl += enc-1; + }else{ + pColl = db->pDfltColl; + } + return pColl; +} + +/* +** Change the text encoding for a database connection. This means that +** the pDfltColl must change as well. +*/ +SQLITE_PRIVATE void sqlite3SetTextEncoding(sqlite3 *db, u8 enc){ + assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); + db->enc = enc; + /* EVIDENCE-OF: R-08308-17224 The default collating function for all + ** strings is BINARY. + */ + db->pDfltColl = sqlite3FindCollSeq(db, enc, sqlite3StrBINARY, 0); +} + +/* +** This function is responsible for invoking the collation factory callback +** or substituting a collation sequence of a different encoding when the +** requested collation sequence is not available in the desired encoding. +** +** If it is not NULL, then pColl must point to the database native encoding +** collation sequence with name zName, length nName. +** +** The return value is either the collation sequence to be used in database +** db for collation type name zName, length nName, or NULL, if no collation +** sequence can be found. If no collation is found, leave an error message. +** +** See also: sqlite3LocateCollSeq(), sqlite3FindCollSeq() +*/ +SQLITE_PRIVATE CollSeq *sqlite3GetCollSeq( + Parse *pParse, /* Parsing context */ + u8 enc, /* The desired encoding for the collating sequence */ + CollSeq *pColl, /* Collating sequence with native encoding, or NULL */ + const char *zName /* Collating sequence name */ +){ + CollSeq *p; + sqlite3 *db = pParse->db; + + p = pColl; + if( !p ){ + p = sqlite3FindCollSeq(db, enc, zName, 0); + } + if( !p || !p->xCmp ){ + /* No collation sequence of this type for this encoding is registered. + ** Call the collation factory to see if it can supply us with one. + */ + callCollNeeded(db, enc, zName); + p = sqlite3FindCollSeq(db, enc, zName, 0); + } + if( p && !p->xCmp && synthCollSeq(db, p) ){ + p = 0; + } + assert( !p || p->xCmp ); + if( p==0 ){ + sqlite3ErrorMsg(pParse, "no such collation sequence: %s", zName); + pParse->rc = SQLITE_ERROR_MISSING_COLLSEQ; + } + return p; +} + +/* +** This function returns the collation sequence for database native text +** encoding identified by the string zName. +** +** If the requested collation sequence is not available, or not available +** in the database native encoding, the collation factory is invoked to +** request it. If the collation factory does not supply such a sequence, +** and the sequence is available in another text encoding, then that is +** returned instead. +** +** If no versions of the requested collations sequence are available, or +** another error occurs, NULL is returned and an error message written into +** pParse. +** +** This routine is a wrapper around sqlite3FindCollSeq(). This routine +** invokes the collation factory if the named collation cannot be found +** and generates an error message. +** +** See also: sqlite3FindCollSeq(), sqlite3GetCollSeq() +*/ +SQLITE_PRIVATE CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName){ + sqlite3 *db = pParse->db; + u8 enc = ENC(db); + u8 initbusy = db->init.busy; + CollSeq *pColl; + + pColl = sqlite3FindCollSeq(db, enc, zName, initbusy); + if( !initbusy && (!pColl || !pColl->xCmp) ){ + pColl = sqlite3GetCollSeq(pParse, enc, pColl, zName); + } + + return pColl; +} + +/* During the search for the best function definition, this procedure +** is called to test how well the function passed as the first argument +** matches the request for a function with nArg arguments in a system +** that uses encoding enc. The value returned indicates how well the +** request is matched. A higher value indicates a better match. +** +** If nArg is -1 that means to only return a match (non-zero) if p->nArg +** is also -1. In other words, we are searching for a function that +** takes a variable number of arguments. +** +** If nArg is -2 that means that we are searching for any function +** regardless of the number of arguments it uses, so return a positive +** match score for any +** +** The returned value is always between 0 and 6, as follows: +** +** 0: Not a match. +** 1: UTF8/16 conversion required and function takes any number of arguments. +** 2: UTF16 byte order change required and function takes any number of args. +** 3: encoding matches and function takes any number of arguments +** 4: UTF8/16 conversion required - argument count matches exactly +** 5: UTF16 byte order conversion required - argument count matches exactly +** 6: Perfect match: encoding and argument count match exactly. +** +** If nArg==(-2) then any function with a non-null xSFunc is +** a perfect match and any function with xSFunc NULL is +** a non-match. +*/ +#define FUNC_PERFECT_MATCH 6 /* The score for a perfect match */ +static int matchQuality( + FuncDef *p, /* The function we are evaluating for match quality */ + int nArg, /* Desired number of arguments. (-1)==any */ + u8 enc /* Desired text encoding */ +){ + int match; + assert( p->nArg>=-1 ); + + /* Wrong number of arguments means "no match" */ + if( p->nArg!=nArg ){ + if( nArg==(-2) ) return (p->xSFunc==0) ? 0 : FUNC_PERFECT_MATCH; + if( p->nArg>=0 ) return 0; + } + + /* Give a better score to a function with a specific number of arguments + ** than to function that accepts any number of arguments. */ + if( p->nArg==nArg ){ + match = 4; + }else{ + match = 1; + } + + /* Bonus points if the text encoding matches */ + if( enc==(p->funcFlags & SQLITE_FUNC_ENCMASK) ){ + match += 2; /* Exact encoding match */ + }else if( (enc & p->funcFlags & 2)!=0 ){ + match += 1; /* Both are UTF16, but with different byte orders */ + } + + return match; +} + +/* +** Search a FuncDefHash for a function with the given name. Return +** a pointer to the matching FuncDef if found, or 0 if there is no match. +*/ +SQLITE_PRIVATE FuncDef *sqlite3FunctionSearch( + int h, /* Hash of the name */ + const char *zFunc /* Name of function */ +){ + FuncDef *p; + for(p=sqlite3BuiltinFunctions.a[h]; p; p=p->u.pHash){ + assert( p->funcFlags & SQLITE_FUNC_BUILTIN ); + if( sqlite3StrICmp(p->zName, zFunc)==0 ){ + return p; + } + } + return 0; +} + +/* +** Insert a new FuncDef into a FuncDefHash hash table. +*/ +SQLITE_PRIVATE void sqlite3InsertBuiltinFuncs( + FuncDef *aDef, /* List of global functions to be inserted */ + int nDef /* Length of the apDef[] list */ +){ + int i; + for(i=0; ipNext!=&aDef[i] ); + aDef[i].pNext = pOther->pNext; + pOther->pNext = &aDef[i]; + }else{ + aDef[i].pNext = 0; + aDef[i].u.pHash = sqlite3BuiltinFunctions.a[h]; + sqlite3BuiltinFunctions.a[h] = &aDef[i]; + } + } +} + + + +/* +** Locate a user function given a name, a number of arguments and a flag +** indicating whether the function prefers UTF-16 over UTF-8. Return a +** pointer to the FuncDef structure that defines that function, or return +** NULL if the function does not exist. +** +** If the createFlag argument is true, then a new (blank) FuncDef +** structure is created and liked into the "db" structure if a +** no matching function previously existed. +** +** If nArg is -2, then the first valid function found is returned. A +** function is valid if xSFunc is non-zero. The nArg==(-2) +** case is used to see if zName is a valid function name for some number +** of arguments. If nArg is -2, then createFlag must be 0. +** +** If createFlag is false, then a function with the required name and +** number of arguments may be returned even if the eTextRep flag does not +** match that requested. +*/ +SQLITE_PRIVATE FuncDef *sqlite3FindFunction( + sqlite3 *db, /* An open database */ + const char *zName, /* Name of the function. zero-terminated */ + int nArg, /* Number of arguments. -1 means any number */ + u8 enc, /* Preferred text encoding */ + u8 createFlag /* Create new entry if true and does not otherwise exist */ +){ + FuncDef *p; /* Iterator variable */ + FuncDef *pBest = 0; /* Best match found so far */ + int bestScore = 0; /* Score of best match */ + int h; /* Hash value */ + int nName; /* Length of the name */ + + assert( nArg>=(-2) ); + assert( nArg>=(-1) || createFlag==0 ); + nName = sqlite3Strlen30(zName); + + /* First search for a match amongst the application-defined functions. + */ + p = (FuncDef*)sqlite3HashFind(&db->aFunc, zName); + while( p ){ + int score = matchQuality(p, nArg, enc); + if( score>bestScore ){ + pBest = p; + bestScore = score; + } + p = p->pNext; + } + + /* If no match is found, search the built-in functions. + ** + ** If the DBFLAG_PreferBuiltin flag is set, then search the built-in + ** functions even if a prior app-defined function was found. And give + ** priority to built-in functions. + ** + ** Except, if createFlag is true, that means that we are trying to + ** install a new function. Whatever FuncDef structure is returned it will + ** have fields overwritten with new information appropriate for the + ** new function. But the FuncDefs for built-in functions are read-only. + ** So we must not search for built-ins when creating a new function. + */ + if( !createFlag && (pBest==0 || (db->mDbFlags & DBFLAG_PreferBuiltin)!=0) ){ + bestScore = 0; + h = SQLITE_FUNC_HASH(sqlite3UpperToLower[(u8)zName[0]], nName); + p = sqlite3FunctionSearch(h, zName); + while( p ){ + int score = matchQuality(p, nArg, enc); + if( score>bestScore ){ + pBest = p; + bestScore = score; + } + p = p->pNext; + } + } + + /* If the createFlag parameter is true and the search did not reveal an + ** exact match for the name, number of arguments and encoding, then add a + ** new entry to the hash table and return it. + */ + if( createFlag && bestScorezName = (const char*)&pBest[1]; + pBest->nArg = (u16)nArg; + pBest->funcFlags = enc; + memcpy((char*)&pBest[1], zName, nName+1); + for(z=(u8*)pBest->zName; *z; z++) *z = sqlite3UpperToLower[*z]; + pOther = (FuncDef*)sqlite3HashInsert(&db->aFunc, pBest->zName, pBest); + if( pOther==pBest ){ + sqlite3DbFree(db, pBest); + sqlite3OomFault(db); + return 0; + }else{ + pBest->pNext = pOther; + } + } + + if( pBest && (pBest->xSFunc || createFlag) ){ + return pBest; + } + return 0; +} + +/* +** Free all resources held by the schema structure. The void* argument points +** at a Schema struct. This function does not call sqlite3DbFree(db, ) on the +** pointer itself, it just cleans up subsidiary resources (i.e. the contents +** of the schema hash tables). +** +** The Schema.cache_size variable is not cleared. +*/ +SQLITE_PRIVATE void sqlite3SchemaClear(void *p){ + Hash temp1; + Hash temp2; + HashElem *pElem; + Schema *pSchema = (Schema *)p; + + temp1 = pSchema->tblHash; + temp2 = pSchema->trigHash; + sqlite3HashInit(&pSchema->trigHash); + sqlite3HashClear(&pSchema->idxHash); + for(pElem=sqliteHashFirst(&temp2); pElem; pElem=sqliteHashNext(pElem)){ + sqlite3DeleteTrigger(0, (Trigger*)sqliteHashData(pElem)); + } + sqlite3HashClear(&temp2); + sqlite3HashInit(&pSchema->tblHash); + for(pElem=sqliteHashFirst(&temp1); pElem; pElem=sqliteHashNext(pElem)){ + Table *pTab = sqliteHashData(pElem); + sqlite3DeleteTable(0, pTab); + } + sqlite3HashClear(&temp1); + sqlite3HashClear(&pSchema->fkeyHash); + pSchema->pSeqTab = 0; + if( pSchema->schemaFlags & DB_SchemaLoaded ){ + pSchema->iGeneration++; + } + pSchema->schemaFlags &= ~(DB_SchemaLoaded|DB_ResetWanted); +} + +/* +** Find and return the schema associated with a BTree. Create +** a new one if necessary. +*/ +SQLITE_PRIVATE Schema *sqlite3SchemaGet(sqlite3 *db, Btree *pBt){ + Schema * p; + if( pBt ){ + p = (Schema *)sqlite3BtreeSchema(pBt, sizeof(Schema), sqlite3SchemaClear); + }else{ + p = (Schema *)sqlite3DbMallocZero(0, sizeof(Schema)); + } + if( !p ){ + sqlite3OomFault(db); + }else if ( 0==p->file_format ){ + sqlite3HashInit(&p->tblHash); + sqlite3HashInit(&p->idxHash); + sqlite3HashInit(&p->trigHash); + sqlite3HashInit(&p->fkeyHash); + p->enc = SQLITE_UTF8; + } + return p; +} + +/************** End of callback.c ********************************************/ +/************** Begin file delete.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains C code routines that are called by the parser +** in order to generate code for DELETE FROM statements. +*/ +/* #include "sqliteInt.h" */ + +/* +** While a SrcList can in general represent multiple tables and subqueries +** (as in the FROM clause of a SELECT statement) in this case it contains +** the name of a single table, as one might find in an INSERT, DELETE, +** or UPDATE statement. Look up that table in the symbol table and +** return a pointer. Set an error message and return NULL if the table +** name is not found or if any other error occurs. +** +** The following fields are initialized appropriate in pSrc: +** +** pSrc->a[0].pTab Pointer to the Table object +** pSrc->a[0].pIndex Pointer to the INDEXED BY index, if there is one +** +*/ +SQLITE_PRIVATE Table *sqlite3SrcListLookup(Parse *pParse, SrcList *pSrc){ + SrcItem *pItem = pSrc->a; + Table *pTab; + assert( pItem && pSrc->nSrc>=1 ); + pTab = sqlite3LocateTableItem(pParse, 0, pItem); + sqlite3DeleteTable(pParse->db, pItem->pTab); + pItem->pTab = pTab; + if( pTab ){ + pTab->nTabRef++; + if( pItem->fg.isIndexedBy && sqlite3IndexedByLookup(pParse, pItem) ){ + pTab = 0; + } + } + return pTab; +} + +/* Generate byte-code that will report the number of rows modified +** by a DELETE, INSERT, or UPDATE statement. +*/ +SQLITE_PRIVATE void sqlite3CodeChangeCount(Vdbe *v, int regCounter, const char *zColName){ + sqlite3VdbeAddOp0(v, OP_FkCheck); + sqlite3VdbeAddOp2(v, OP_ResultRow, regCounter, 1); + sqlite3VdbeSetNumCols(v, 1); + sqlite3VdbeSetColName(v, 0, COLNAME_NAME, zColName, SQLITE_STATIC); +} + +/* Return true if table pTab is read-only. +** +** A table is read-only if any of the following are true: +** +** 1) It is a virtual table and no implementation of the xUpdate method +** has been provided +** +** 2) It is a system table (i.e. sqlite_schema), this call is not +** part of a nested parse and writable_schema pragma has not +** been specified +** +** 3) The table is a shadow table, the database connection is in +** defensive mode, and the current sqlite3_prepare() +** is for a top-level SQL statement. +*/ +static int tabIsReadOnly(Parse *pParse, Table *pTab){ + sqlite3 *db; + if( IsVirtual(pTab) ){ + return sqlite3GetVTable(pParse->db, pTab)->pMod->pModule->xUpdate==0; + } + if( (pTab->tabFlags & (TF_Readonly|TF_Shadow))==0 ) return 0; + db = pParse->db; + if( (pTab->tabFlags & TF_Readonly)!=0 ){ + return sqlite3WritableSchema(db)==0 && pParse->nested==0; + } + assert( pTab->tabFlags & TF_Shadow ); + return sqlite3ReadOnlyShadowTables(db); +} + +/* +** Check to make sure the given table is writable. If it is not +** writable, generate an error message and return 1. If it is +** writable return 0; +*/ +SQLITE_PRIVATE int sqlite3IsReadOnly(Parse *pParse, Table *pTab, int viewOk){ + if( tabIsReadOnly(pParse, pTab) ){ + sqlite3ErrorMsg(pParse, "table %s may not be modified", pTab->zName); + return 1; + } +#ifndef SQLITE_OMIT_VIEW + if( !viewOk && IsView(pTab) ){ + sqlite3ErrorMsg(pParse,"cannot modify %s because it is a view",pTab->zName); + return 1; + } +#endif + return 0; +} + + +#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER) +/* +** Evaluate a view and store its result in an ephemeral table. The +** pWhere argument is an optional WHERE clause that restricts the +** set of rows in the view that are to be added to the ephemeral table. +*/ +SQLITE_PRIVATE void sqlite3MaterializeView( + Parse *pParse, /* Parsing context */ + Table *pView, /* View definition */ + Expr *pWhere, /* Optional WHERE clause to be added */ + ExprList *pOrderBy, /* Optional ORDER BY clause */ + Expr *pLimit, /* Optional LIMIT clause */ + int iCur /* Cursor number for ephemeral table */ +){ + SelectDest dest; + Select *pSel; + SrcList *pFrom; + sqlite3 *db = pParse->db; + int iDb = sqlite3SchemaToIndex(db, pView->pSchema); + pWhere = sqlite3ExprDup(db, pWhere, 0); + pFrom = sqlite3SrcListAppend(pParse, 0, 0, 0); + if( pFrom ){ + assert( pFrom->nSrc==1 ); + pFrom->a[0].zName = sqlite3DbStrDup(db, pView->zName); + pFrom->a[0].zDatabase = sqlite3DbStrDup(db, db->aDb[iDb].zDbSName); + assert( pFrom->a[0].pOn==0 ); + assert( pFrom->a[0].pUsing==0 ); + } + pSel = sqlite3SelectNew(pParse, 0, pFrom, pWhere, 0, 0, pOrderBy, + SF_IncludeHidden, pLimit); + sqlite3SelectDestInit(&dest, SRT_EphemTab, iCur); + sqlite3Select(pParse, pSel, &dest); + sqlite3SelectDelete(db, pSel); +} +#endif /* !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER) */ + +#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY) +/* +** Generate an expression tree to implement the WHERE, ORDER BY, +** and LIMIT/OFFSET portion of DELETE and UPDATE statements. +** +** DELETE FROM table_wxyz WHERE a<5 ORDER BY a LIMIT 1; +** \__________________________/ +** pLimitWhere (pInClause) +*/ +SQLITE_PRIVATE Expr *sqlite3LimitWhere( + Parse *pParse, /* The parser context */ + SrcList *pSrc, /* the FROM clause -- which tables to scan */ + Expr *pWhere, /* The WHERE clause. May be null */ + ExprList *pOrderBy, /* The ORDER BY clause. May be null */ + Expr *pLimit, /* The LIMIT clause. May be null */ + char *zStmtType /* Either DELETE or UPDATE. For err msgs. */ +){ + sqlite3 *db = pParse->db; + Expr *pLhs = NULL; /* LHS of IN(SELECT...) operator */ + Expr *pInClause = NULL; /* WHERE rowid IN ( select ) */ + ExprList *pEList = NULL; /* Expression list contaning only pSelectRowid */ + SrcList *pSelectSrc = NULL; /* SELECT rowid FROM x ... (dup of pSrc) */ + Select *pSelect = NULL; /* Complete SELECT tree */ + Table *pTab; + + /* Check that there isn't an ORDER BY without a LIMIT clause. + */ + if( pOrderBy && pLimit==0 ) { + sqlite3ErrorMsg(pParse, "ORDER BY without LIMIT on %s", zStmtType); + sqlite3ExprDelete(pParse->db, pWhere); + sqlite3ExprListDelete(pParse->db, pOrderBy); + return 0; + } + + /* We only need to generate a select expression if there + ** is a limit/offset term to enforce. + */ + if( pLimit == 0 ) { + return pWhere; + } + + /* Generate a select expression tree to enforce the limit/offset + ** term for the DELETE or UPDATE statement. For example: + ** DELETE FROM table_a WHERE col1=1 ORDER BY col2 LIMIT 1 OFFSET 1 + ** becomes: + ** DELETE FROM table_a WHERE rowid IN ( + ** SELECT rowid FROM table_a WHERE col1=1 ORDER BY col2 LIMIT 1 OFFSET 1 + ** ); + */ + + pTab = pSrc->a[0].pTab; + if( HasRowid(pTab) ){ + pLhs = sqlite3PExpr(pParse, TK_ROW, 0, 0); + pEList = sqlite3ExprListAppend( + pParse, 0, sqlite3PExpr(pParse, TK_ROW, 0, 0) + ); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + if( pPk->nKeyCol==1 ){ + const char *zName = pTab->aCol[pPk->aiColumn[0]].zCnName; + pLhs = sqlite3Expr(db, TK_ID, zName); + pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ID, zName)); + }else{ + int i; + for(i=0; inKeyCol; i++){ + Expr *p = sqlite3Expr(db, TK_ID, pTab->aCol[pPk->aiColumn[i]].zCnName); + pEList = sqlite3ExprListAppend(pParse, pEList, p); + } + pLhs = sqlite3PExpr(pParse, TK_VECTOR, 0, 0); + if( pLhs ){ + pLhs->x.pList = sqlite3ExprListDup(db, pEList, 0); + } + } + } + + /* duplicate the FROM clause as it is needed by both the DELETE/UPDATE tree + ** and the SELECT subtree. */ + pSrc->a[0].pTab = 0; + pSelectSrc = sqlite3SrcListDup(db, pSrc, 0); + pSrc->a[0].pTab = pTab; + if( pSrc->a[0].fg.isIndexedBy ){ + assert( pSrc->a[0].fg.isCte==0 ); + pSrc->a[0].u2.pIBIndex = 0; + pSrc->a[0].fg.isIndexedBy = 0; + sqlite3DbFree(db, pSrc->a[0].u1.zIndexedBy); + }else if( pSrc->a[0].fg.isCte ){ + pSrc->a[0].u2.pCteUse->nUse++; + } + + /* generate the SELECT expression tree. */ + pSelect = sqlite3SelectNew(pParse, pEList, pSelectSrc, pWhere, 0 ,0, + pOrderBy,0,pLimit + ); + + /* now generate the new WHERE rowid IN clause for the DELETE/UDPATE */ + pInClause = sqlite3PExpr(pParse, TK_IN, pLhs, 0); + sqlite3PExprAddSelect(pParse, pInClause, pSelect); + return pInClause; +} +#endif /* defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) */ + /* && !defined(SQLITE_OMIT_SUBQUERY) */ + +/* +** Generate code for a DELETE FROM statement. +** +** DELETE FROM table_wxyz WHERE a<5 AND b NOT NULL; +** \________/ \________________/ +** pTabList pWhere +*/ +SQLITE_PRIVATE void sqlite3DeleteFrom( + Parse *pParse, /* The parser context */ + SrcList *pTabList, /* The table from which we should delete things */ + Expr *pWhere, /* The WHERE clause. May be null */ + ExprList *pOrderBy, /* ORDER BY clause. May be null */ + Expr *pLimit /* LIMIT clause. May be null */ +){ + Vdbe *v; /* The virtual database engine */ + Table *pTab; /* The table from which records will be deleted */ + int i; /* Loop counter */ + WhereInfo *pWInfo; /* Information about the WHERE clause */ + Index *pIdx; /* For looping over indices of the table */ + int iTabCur; /* Cursor number for the table */ + int iDataCur = 0; /* VDBE cursor for the canonical data source */ + int iIdxCur = 0; /* Cursor number of the first index */ + int nIdx; /* Number of indices */ + sqlite3 *db; /* Main database structure */ + AuthContext sContext; /* Authorization context */ + NameContext sNC; /* Name context to resolve expressions in */ + int iDb; /* Database number */ + int memCnt = 0; /* Memory cell used for change counting */ + int rcauth; /* Value returned by authorization callback */ + int eOnePass; /* ONEPASS_OFF or _SINGLE or _MULTI */ + int aiCurOnePass[2]; /* The write cursors opened by WHERE_ONEPASS */ + u8 *aToOpen = 0; /* Open cursor iTabCur+j if aToOpen[j] is true */ + Index *pPk; /* The PRIMARY KEY index on the table */ + int iPk = 0; /* First of nPk registers holding PRIMARY KEY value */ + i16 nPk = 1; /* Number of columns in the PRIMARY KEY */ + int iKey; /* Memory cell holding key of row to be deleted */ + i16 nKey; /* Number of memory cells in the row key */ + int iEphCur = 0; /* Ephemeral table holding all primary key values */ + int iRowSet = 0; /* Register for rowset of rows to delete */ + int addrBypass = 0; /* Address of jump over the delete logic */ + int addrLoop = 0; /* Top of the delete loop */ + int addrEphOpen = 0; /* Instruction to open the Ephemeral table */ + int bComplex; /* True if there are triggers or FKs or + ** subqueries in the WHERE clause */ + +#ifndef SQLITE_OMIT_TRIGGER + int isView; /* True if attempting to delete from a view */ + Trigger *pTrigger; /* List of table triggers, if required */ +#endif + + memset(&sContext, 0, sizeof(sContext)); + db = pParse->db; + assert( db->pParse==pParse ); + if( pParse->nErr ){ + goto delete_from_cleanup; + } + assert( db->mallocFailed==0 ); + assert( pTabList->nSrc==1 ); + + + /* Locate the table which we want to delete. This table has to be + ** put in an SrcList structure because some of the subroutines we + ** will be calling are designed to work with multiple tables and expect + ** an SrcList* parameter instead of just a Table* parameter. + */ + pTab = sqlite3SrcListLookup(pParse, pTabList); + if( pTab==0 ) goto delete_from_cleanup; + + /* Figure out if we have any triggers and if the table being + ** deleted from is a view + */ +#ifndef SQLITE_OMIT_TRIGGER + pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0); + isView = IsView(pTab); +#else +# define pTrigger 0 +# define isView 0 +#endif + bComplex = pTrigger || sqlite3FkRequired(pParse, pTab, 0, 0); +#ifdef SQLITE_OMIT_VIEW +# undef isView +# define isView 0 +#endif + +#ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( !isView ){ + pWhere = sqlite3LimitWhere( + pParse, pTabList, pWhere, pOrderBy, pLimit, "DELETE" + ); + pOrderBy = 0; + pLimit = 0; + } +#endif + + /* If pTab is really a view, make sure it has been initialized. + */ + if( sqlite3ViewGetColumnNames(pParse, pTab) ){ + goto delete_from_cleanup; + } + + if( sqlite3IsReadOnly(pParse, pTab, (pTrigger?1:0)) ){ + goto delete_from_cleanup; + } + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iDbnDb ); + rcauth = sqlite3AuthCheck(pParse, SQLITE_DELETE, pTab->zName, 0, + db->aDb[iDb].zDbSName); + assert( rcauth==SQLITE_OK || rcauth==SQLITE_DENY || rcauth==SQLITE_IGNORE ); + if( rcauth==SQLITE_DENY ){ + goto delete_from_cleanup; + } + assert(!isView || pTrigger); + + /* Assign cursor numbers to the table and all its indices. + */ + assert( pTabList->nSrc==1 ); + iTabCur = pTabList->a[0].iCursor = pParse->nTab++; + for(nIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nIdx++){ + pParse->nTab++; + } + + /* Start the view context + */ + if( isView ){ + sqlite3AuthContextPush(pParse, &sContext, pTab->zName); + } + + /* Begin generating code. + */ + v = sqlite3GetVdbe(pParse); + if( v==0 ){ + goto delete_from_cleanup; + } + if( pParse->nested==0 ) sqlite3VdbeCountChanges(v); + sqlite3BeginWriteOperation(pParse, bComplex, iDb); + + /* If we are trying to delete from a view, realize that view into + ** an ephemeral table. + */ +#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER) + if( isView ){ + sqlite3MaterializeView(pParse, pTab, + pWhere, pOrderBy, pLimit, iTabCur + ); + iDataCur = iIdxCur = iTabCur; + pOrderBy = 0; + pLimit = 0; + } +#endif + + /* Resolve the column names in the WHERE clause. + */ + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + sNC.pSrcList = pTabList; + if( sqlite3ResolveExprNames(&sNC, pWhere) ){ + goto delete_from_cleanup; + } + + /* Initialize the counter of the number of rows deleted, if + ** we are counting rows. + */ + if( (db->flags & SQLITE_CountRows)!=0 + && !pParse->nested + && !pParse->pTriggerTab + && !pParse->bReturning + ){ + memCnt = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, memCnt); + } + +#ifndef SQLITE_OMIT_TRUNCATE_OPTIMIZATION + /* Special case: A DELETE without a WHERE clause deletes everything. + ** It is easier just to erase the whole table. Prior to version 3.6.5, + ** this optimization caused the row change count (the value returned by + ** API function sqlite3_count_changes) to be set incorrectly. + ** + ** The "rcauth==SQLITE_OK" terms is the + ** IMPLEMENTATION-OF: R-17228-37124 If the action code is SQLITE_DELETE and + ** the callback returns SQLITE_IGNORE then the DELETE operation proceeds but + ** the truncate optimization is disabled and all rows are deleted + ** individually. + */ + if( rcauth==SQLITE_OK + && pWhere==0 + && !bComplex + && !IsVirtual(pTab) +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + && db->xPreUpdateCallback==0 +#endif + ){ + assert( !isView ); + sqlite3TableLock(pParse, iDb, pTab->tnum, 1, pTab->zName); + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp4(v, OP_Clear, pTab->tnum, iDb, memCnt ? memCnt : -1, + pTab->zName, P4_STATIC); + } + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + assert( pIdx->pSchema==pTab->pSchema ); + sqlite3VdbeAddOp2(v, OP_Clear, pIdx->tnum, iDb); + if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) ){ + sqlite3VdbeChangeP3(v, -1, memCnt ? memCnt : -1); + } + } + }else +#endif /* SQLITE_OMIT_TRUNCATE_OPTIMIZATION */ + { + u16 wcf = WHERE_ONEPASS_DESIRED|WHERE_DUPLICATES_OK; + if( sNC.ncFlags & NC_VarSelect ) bComplex = 1; + wcf |= (bComplex ? 0 : WHERE_ONEPASS_MULTIROW); + if( HasRowid(pTab) ){ + /* For a rowid table, initialize the RowSet to an empty set */ + pPk = 0; + nPk = 1; + iRowSet = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet); + }else{ + /* For a WITHOUT ROWID table, create an ephemeral table used to + ** hold all primary keys for rows to be deleted. */ + pPk = sqlite3PrimaryKeyIndex(pTab); + assert( pPk!=0 ); + nPk = pPk->nKeyCol; + iPk = pParse->nMem+1; + pParse->nMem += nPk; + iEphCur = pParse->nTab++; + addrEphOpen = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iEphCur, nPk); + sqlite3VdbeSetP4KeyInfo(pParse, pPk); + } + + /* Construct a query to find the rowid or primary key for every row + ** to be deleted, based on the WHERE clause. Set variable eOnePass + ** to indicate the strategy used to implement this delete: + ** + ** ONEPASS_OFF: Two-pass approach - use a FIFO for rowids/PK values. + ** ONEPASS_SINGLE: One-pass approach - at most one row deleted. + ** ONEPASS_MULTI: One-pass approach - any number of rows may be deleted. + */ + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, 0, 0,0,wcf,iTabCur+1); + if( pWInfo==0 ) goto delete_from_cleanup; + eOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass); + assert( IsVirtual(pTab)==0 || eOnePass!=ONEPASS_MULTI ); + assert( IsVirtual(pTab) || bComplex || eOnePass!=ONEPASS_OFF ); + if( eOnePass!=ONEPASS_SINGLE ) sqlite3MultiWrite(pParse); + if( sqlite3WhereUsesDeferredSeek(pWInfo) ){ + sqlite3VdbeAddOp1(v, OP_FinishSeek, iTabCur); + } + + /* Keep track of the number of rows to be deleted */ + if( memCnt ){ + sqlite3VdbeAddOp2(v, OP_AddImm, memCnt, 1); + } + + /* Extract the rowid or primary key for the current row */ + if( pPk ){ + for(i=0; iaiColumn[i]>=0 ); + sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, + pPk->aiColumn[i], iPk+i); + } + iKey = iPk; + }else{ + iKey = ++pParse->nMem; + sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, -1, iKey); + } + + if( eOnePass!=ONEPASS_OFF ){ + /* For ONEPASS, no need to store the rowid/primary-key. There is only + ** one, so just keep it in its register(s) and fall through to the + ** delete code. */ + nKey = nPk; /* OP_Found will use an unpacked key */ + aToOpen = sqlite3DbMallocRawNN(db, nIdx+2); + if( aToOpen==0 ){ + sqlite3WhereEnd(pWInfo); + goto delete_from_cleanup; + } + memset(aToOpen, 1, nIdx+1); + aToOpen[nIdx+1] = 0; + if( aiCurOnePass[0]>=0 ) aToOpen[aiCurOnePass[0]-iTabCur] = 0; + if( aiCurOnePass[1]>=0 ) aToOpen[aiCurOnePass[1]-iTabCur] = 0; + if( addrEphOpen ) sqlite3VdbeChangeToNoop(v, addrEphOpen); + addrBypass = sqlite3VdbeMakeLabel(pParse); + }else{ + if( pPk ){ + /* Add the PK key for this row to the temporary table */ + iKey = ++pParse->nMem; + nKey = 0; /* Zero tells OP_Found to use a composite key */ + sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey, + sqlite3IndexAffinityStr(pParse->db, pPk), nPk); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEphCur, iKey, iPk, nPk); + }else{ + /* Add the rowid of the row to be deleted to the RowSet */ + nKey = 1; /* OP_DeferredSeek always uses a single rowid */ + sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, iKey); + } + sqlite3WhereEnd(pWInfo); + } + + /* Unless this is a view, open cursors for the table we are + ** deleting from and all its indices. If this is a view, then the + ** only effect this statement has is to fire the INSTEAD OF + ** triggers. + */ + if( !isView ){ + int iAddrOnce = 0; + if( eOnePass==ONEPASS_MULTI ){ + iAddrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + } + testcase( IsVirtual(pTab) ); + sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, OPFLAG_FORDELETE, + iTabCur, aToOpen, &iDataCur, &iIdxCur); + assert( pPk || IsVirtual(pTab) || iDataCur==iTabCur ); + assert( pPk || IsVirtual(pTab) || iIdxCur==iDataCur+1 ); + if( eOnePass==ONEPASS_MULTI ){ + sqlite3VdbeJumpHereOrPopInst(v, iAddrOnce); + } + } + + /* Set up a loop over the rowids/primary-keys that were found in the + ** where-clause loop above. + */ + if( eOnePass!=ONEPASS_OFF ){ + assert( nKey==nPk ); /* OP_Found will use an unpacked key */ + if( !IsVirtual(pTab) && aToOpen[iDataCur-iTabCur] ){ + assert( pPk!=0 || IsView(pTab) ); + sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, addrBypass, iKey, nKey); + VdbeCoverage(v); + } + }else if( pPk ){ + addrLoop = sqlite3VdbeAddOp1(v, OP_Rewind, iEphCur); VdbeCoverage(v); + if( IsVirtual(pTab) ){ + sqlite3VdbeAddOp3(v, OP_Column, iEphCur, 0, iKey); + }else{ + sqlite3VdbeAddOp2(v, OP_RowData, iEphCur, iKey); + } + assert( nKey==0 ); /* OP_Found will use a composite key */ + }else{ + addrLoop = sqlite3VdbeAddOp3(v, OP_RowSetRead, iRowSet, 0, iKey); + VdbeCoverage(v); + assert( nKey==1 ); + } + + /* Delete the row */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTab) ){ + const char *pVTab = (const char *)sqlite3GetVTable(db, pTab); + sqlite3VtabMakeWritable(pParse, pTab); + assert( eOnePass==ONEPASS_OFF || eOnePass==ONEPASS_SINGLE ); + sqlite3MayAbort(pParse); + if( eOnePass==ONEPASS_SINGLE ){ + sqlite3VdbeAddOp1(v, OP_Close, iTabCur); + if( sqlite3IsToplevel(pParse) ){ + pParse->isMultiWrite = 0; + } + } + sqlite3VdbeAddOp4(v, OP_VUpdate, 0, 1, iKey, pVTab, P4_VTAB); + sqlite3VdbeChangeP5(v, OE_Abort); + }else +#endif + { + int count = (pParse->nested==0); /* True to count changes */ + sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur, + iKey, nKey, count, OE_Default, eOnePass, aiCurOnePass[1]); + } + + /* End of the loop over all rowids/primary-keys. */ + if( eOnePass!=ONEPASS_OFF ){ + sqlite3VdbeResolveLabel(v, addrBypass); + sqlite3WhereEnd(pWInfo); + }else if( pPk ){ + sqlite3VdbeAddOp2(v, OP_Next, iEphCur, addrLoop+1); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addrLoop); + }else{ + sqlite3VdbeGoto(v, addrLoop); + sqlite3VdbeJumpHere(v, addrLoop); + } + } /* End non-truncate path */ + + /* Update the sqlite_sequence table by storing the content of the + ** maximum rowid counter values recorded while inserting into + ** autoincrement tables. + */ + if( pParse->nested==0 && pParse->pTriggerTab==0 ){ + sqlite3AutoincrementEnd(pParse); + } + + /* Return the number of rows that were deleted. If this routine is + ** generating code because of a call to sqlite3NestedParse(), do not + ** invoke the callback function. + */ + if( memCnt ){ + sqlite3CodeChangeCount(v, memCnt, "rows deleted"); + } + +delete_from_cleanup: + sqlite3AuthContextPop(&sContext); + sqlite3SrcListDelete(db, pTabList); + sqlite3ExprDelete(db, pWhere); +#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) + sqlite3ExprListDelete(db, pOrderBy); + sqlite3ExprDelete(db, pLimit); +#endif + sqlite3DbFree(db, aToOpen); + return; +} +/* Make sure "isView" and other macros defined above are undefined. Otherwise +** they may interfere with compilation of other functions in this file +** (or in another file, if this file becomes part of the amalgamation). */ +#ifdef isView + #undef isView +#endif +#ifdef pTrigger + #undef pTrigger +#endif + +/* +** This routine generates VDBE code that causes a single row of a +** single table to be deleted. Both the original table entry and +** all indices are removed. +** +** Preconditions: +** +** 1. iDataCur is an open cursor on the btree that is the canonical data +** store for the table. (This will be either the table itself, +** in the case of a rowid table, or the PRIMARY KEY index in the case +** of a WITHOUT ROWID table.) +** +** 2. Read/write cursors for all indices of pTab must be open as +** cursor number iIdxCur+i for the i-th index. +** +** 3. The primary key for the row to be deleted must be stored in a +** sequence of nPk memory cells starting at iPk. If nPk==0 that means +** that a search record formed from OP_MakeRecord is contained in the +** single memory location iPk. +** +** eMode: +** Parameter eMode may be passed either ONEPASS_OFF (0), ONEPASS_SINGLE, or +** ONEPASS_MULTI. If eMode is not ONEPASS_OFF, then the cursor +** iDataCur already points to the row to delete. If eMode is ONEPASS_OFF +** then this function must seek iDataCur to the entry identified by iPk +** and nPk before reading from it. +** +** If eMode is ONEPASS_MULTI, then this call is being made as part +** of a ONEPASS delete that affects multiple rows. In this case, if +** iIdxNoSeek is a valid cursor number (>=0) and is not the same as +** iDataCur, then its position should be preserved following the delete +** operation. Or, if iIdxNoSeek is not a valid cursor number, the +** position of iDataCur should be preserved instead. +** +** iIdxNoSeek: +** If iIdxNoSeek is a valid cursor number (>=0) not equal to iDataCur, +** then it identifies an index cursor (from within array of cursors +** starting at iIdxCur) that already points to the index entry to be deleted. +** Except, this optimization is disabled if there are BEFORE triggers since +** the trigger body might have moved the cursor. +*/ +SQLITE_PRIVATE void sqlite3GenerateRowDelete( + Parse *pParse, /* Parsing context */ + Table *pTab, /* Table containing the row to be deleted */ + Trigger *pTrigger, /* List of triggers to (potentially) fire */ + int iDataCur, /* Cursor from which column data is extracted */ + int iIdxCur, /* First index cursor */ + int iPk, /* First memory cell containing the PRIMARY KEY */ + i16 nPk, /* Number of PRIMARY KEY memory cells */ + u8 count, /* If non-zero, increment the row change counter */ + u8 onconf, /* Default ON CONFLICT policy for triggers */ + u8 eMode, /* ONEPASS_OFF, _SINGLE, or _MULTI. See above */ + int iIdxNoSeek /* Cursor number of cursor that does not need seeking */ +){ + Vdbe *v = pParse->pVdbe; /* Vdbe */ + int iOld = 0; /* First register in OLD.* array */ + int iLabel; /* Label resolved to end of generated code */ + u8 opSeek; /* Seek opcode */ + + /* Vdbe is guaranteed to have been allocated by this stage. */ + assert( v ); + VdbeModuleComment((v, "BEGIN: GenRowDel(%d,%d,%d,%d)", + iDataCur, iIdxCur, iPk, (int)nPk)); + + /* Seek cursor iCur to the row to delete. If this row no longer exists + ** (this can happen if a trigger program has already deleted it), do + ** not attempt to delete it or fire any DELETE triggers. */ + iLabel = sqlite3VdbeMakeLabel(pParse); + opSeek = HasRowid(pTab) ? OP_NotExists : OP_NotFound; + if( eMode==ONEPASS_OFF ){ + sqlite3VdbeAddOp4Int(v, opSeek, iDataCur, iLabel, iPk, nPk); + VdbeCoverageIf(v, opSeek==OP_NotExists); + VdbeCoverageIf(v, opSeek==OP_NotFound); + } + + /* If there are any triggers to fire, allocate a range of registers to + ** use for the old.* references in the triggers. */ + if( sqlite3FkRequired(pParse, pTab, 0, 0) || pTrigger ){ + u32 mask; /* Mask of OLD.* columns in use */ + int iCol; /* Iterator used while populating OLD.* */ + int addrStart; /* Start of BEFORE trigger programs */ + + /* TODO: Could use temporary registers here. Also could attempt to + ** avoid copying the contents of the rowid register. */ + mask = sqlite3TriggerColmask( + pParse, pTrigger, 0, 0, TRIGGER_BEFORE|TRIGGER_AFTER, pTab, onconf + ); + mask |= sqlite3FkOldmask(pParse, pTab); + iOld = pParse->nMem+1; + pParse->nMem += (1 + pTab->nCol); + + /* Populate the OLD.* pseudo-table register array. These values will be + ** used by any BEFORE and AFTER triggers that exist. */ + sqlite3VdbeAddOp2(v, OP_Copy, iPk, iOld); + for(iCol=0; iColnCol; iCol++){ + testcase( mask!=0xffffffff && iCol==31 ); + testcase( mask!=0xffffffff && iCol==32 ); + if( mask==0xffffffff || (iCol<=31 && (mask & MASKBIT32(iCol))!=0) ){ + int kk = sqlite3TableColumnToStorage(pTab, iCol); + sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, iCol, iOld+kk+1); + } + } + + /* Invoke BEFORE DELETE trigger programs. */ + addrStart = sqlite3VdbeCurrentAddr(v); + sqlite3CodeRowTrigger(pParse, pTrigger, + TK_DELETE, 0, TRIGGER_BEFORE, pTab, iOld, onconf, iLabel + ); + + /* If any BEFORE triggers were coded, then seek the cursor to the + ** row to be deleted again. It may be that the BEFORE triggers moved + ** the cursor or already deleted the row that the cursor was + ** pointing to. + ** + ** Also disable the iIdxNoSeek optimization since the BEFORE trigger + ** may have moved that cursor. + */ + if( addrStart=0 ); + iIdxNoSeek = -1; + } + + /* Do FK processing. This call checks that any FK constraints that + ** refer to this table (i.e. constraints attached to other tables) + ** are not violated by deleting this row. */ + sqlite3FkCheck(pParse, pTab, iOld, 0, 0, 0); + } + + /* Delete the index and table entries. Skip this step if pTab is really + ** a view (in which case the only effect of the DELETE statement is to + ** fire the INSTEAD OF triggers). + ** + ** If variable 'count' is non-zero, then this OP_Delete instruction should + ** invoke the update-hook. The pre-update-hook, on the other hand should + ** be invoked unless table pTab is a system table. The difference is that + ** the update-hook is not invoked for rows removed by REPLACE, but the + ** pre-update-hook is. + */ + if( !IsView(pTab) ){ + u8 p5 = 0; + sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur,0,iIdxNoSeek); + sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, (count?OPFLAG_NCHANGE:0)); + if( pParse->nested==0 || 0==sqlite3_stricmp(pTab->zName, "sqlite_stat1") ){ + sqlite3VdbeAppendP4(v, (char*)pTab, P4_TABLE); + } + if( eMode!=ONEPASS_OFF ){ + sqlite3VdbeChangeP5(v, OPFLAG_AUXDELETE); + } + if( iIdxNoSeek>=0 && iIdxNoSeek!=iDataCur ){ + sqlite3VdbeAddOp1(v, OP_Delete, iIdxNoSeek); + } + if( eMode==ONEPASS_MULTI ) p5 |= OPFLAG_SAVEPOSITION; + sqlite3VdbeChangeP5(v, p5); + } + + /* Do any ON CASCADE, SET NULL or SET DEFAULT operations required to + ** handle rows (possibly in other tables) that refer via a foreign key + ** to the row just deleted. */ + sqlite3FkActions(pParse, pTab, 0, iOld, 0, 0); + + /* Invoke AFTER DELETE trigger programs. */ + sqlite3CodeRowTrigger(pParse, pTrigger, + TK_DELETE, 0, TRIGGER_AFTER, pTab, iOld, onconf, iLabel + ); + + /* Jump here if the row had already been deleted before any BEFORE + ** trigger programs were invoked. Or if a trigger program throws a + ** RAISE(IGNORE) exception. */ + sqlite3VdbeResolveLabel(v, iLabel); + VdbeModuleComment((v, "END: GenRowDel()")); +} + +/* +** This routine generates VDBE code that causes the deletion of all +** index entries associated with a single row of a single table, pTab +** +** Preconditions: +** +** 1. A read/write cursor "iDataCur" must be open on the canonical storage +** btree for the table pTab. (This will be either the table itself +** for rowid tables or to the primary key index for WITHOUT ROWID +** tables.) +** +** 2. Read/write cursors for all indices of pTab must be open as +** cursor number iIdxCur+i for the i-th index. (The pTab->pIndex +** index is the 0-th index.) +** +** 3. The "iDataCur" cursor must be already be positioned on the row +** that is to be deleted. +*/ +SQLITE_PRIVATE void sqlite3GenerateRowIndexDelete( + Parse *pParse, /* Parsing and code generating context */ + Table *pTab, /* Table containing the row to be deleted */ + int iDataCur, /* Cursor of table holding data. */ + int iIdxCur, /* First index cursor */ + int *aRegIdx, /* Only delete if aRegIdx!=0 && aRegIdx[i]>0 */ + int iIdxNoSeek /* Do not delete from this cursor */ +){ + int i; /* Index loop counter */ + int r1 = -1; /* Register holding an index key */ + int iPartIdxLabel; /* Jump destination for skipping partial index entries */ + Index *pIdx; /* Current index */ + Index *pPrior = 0; /* Prior index */ + Vdbe *v; /* The prepared statement under construction */ + Index *pPk; /* PRIMARY KEY index, or NULL for rowid tables */ + + v = pParse->pVdbe; + pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab); + for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){ + assert( iIdxCur+i!=iDataCur || pPk==pIdx ); + if( aRegIdx!=0 && aRegIdx[i]==0 ) continue; + if( pIdx==pPk ) continue; + if( iIdxCur+i==iIdxNoSeek ) continue; + VdbeModuleComment((v, "GenRowIdxDel for %s", pIdx->zName)); + r1 = sqlite3GenerateIndexKey(pParse, pIdx, iDataCur, 0, 1, + &iPartIdxLabel, pPrior, r1); + sqlite3VdbeAddOp3(v, OP_IdxDelete, iIdxCur+i, r1, + pIdx->uniqNotNull ? pIdx->nKeyCol : pIdx->nColumn); + sqlite3VdbeChangeP5(v, 1); /* Cause IdxDelete to error if no entry found */ + sqlite3ResolvePartIdxLabel(pParse, iPartIdxLabel); + pPrior = pIdx; + } +} + +/* +** Generate code that will assemble an index key and stores it in register +** regOut. The key with be for index pIdx which is an index on pTab. +** iCur is the index of a cursor open on the pTab table and pointing to +** the entry that needs indexing. If pTab is a WITHOUT ROWID table, then +** iCur must be the cursor of the PRIMARY KEY index. +** +** Return a register number which is the first in a block of +** registers that holds the elements of the index key. The +** block of registers has already been deallocated by the time +** this routine returns. +** +** If *piPartIdxLabel is not NULL, fill it in with a label and jump +** to that label if pIdx is a partial index that should be skipped. +** The label should be resolved using sqlite3ResolvePartIdxLabel(). +** A partial index should be skipped if its WHERE clause evaluates +** to false or null. If pIdx is not a partial index, *piPartIdxLabel +** will be set to zero which is an empty label that is ignored by +** sqlite3ResolvePartIdxLabel(). +** +** The pPrior and regPrior parameters are used to implement a cache to +** avoid unnecessary register loads. If pPrior is not NULL, then it is +** a pointer to a different index for which an index key has just been +** computed into register regPrior. If the current pIdx index is generating +** its key into the same sequence of registers and if pPrior and pIdx share +** a column in common, then the register corresponding to that column already +** holds the correct value and the loading of that register is skipped. +** This optimization is helpful when doing a DELETE or an INTEGRITY_CHECK +** on a table with multiple indices, and especially with the ROWID or +** PRIMARY KEY columns of the index. +*/ +SQLITE_PRIVATE int sqlite3GenerateIndexKey( + Parse *pParse, /* Parsing context */ + Index *pIdx, /* The index for which to generate a key */ + int iDataCur, /* Cursor number from which to take column data */ + int regOut, /* Put the new key into this register if not 0 */ + int prefixOnly, /* Compute only a unique prefix of the key */ + int *piPartIdxLabel, /* OUT: Jump to this label to skip partial index */ + Index *pPrior, /* Previously generated index key */ + int regPrior /* Register holding previous generated key */ +){ + Vdbe *v = pParse->pVdbe; + int j; + int regBase; + int nCol; + + if( piPartIdxLabel ){ + if( pIdx->pPartIdxWhere ){ + *piPartIdxLabel = sqlite3VdbeMakeLabel(pParse); + pParse->iSelfTab = iDataCur + 1; + sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, + SQLITE_JUMPIFNULL); + pParse->iSelfTab = 0; + pPrior = 0; /* Ticket a9efb42811fa41ee 2019-11-02; + ** pPartIdxWhere may have corrupted regPrior registers */ + }else{ + *piPartIdxLabel = 0; + } + } + nCol = (prefixOnly && pIdx->uniqNotNull) ? pIdx->nKeyCol : pIdx->nColumn; + regBase = sqlite3GetTempRange(pParse, nCol); + if( pPrior && (regBase!=regPrior || pPrior->pPartIdxWhere) ) pPrior = 0; + for(j=0; jaiColumn[j]==pIdx->aiColumn[j] + && pPrior->aiColumn[j]!=XN_EXPR + ){ + /* This column was already computed by the previous index */ + continue; + } + sqlite3ExprCodeLoadIndexColumn(pParse, pIdx, iDataCur, j, regBase+j); + if( pIdx->aiColumn[j]>=0 ){ + /* If the column affinity is REAL but the number is an integer, then it + ** might be stored in the table as an integer (using a compact + ** representation) then converted to REAL by an OP_RealAffinity opcode. + ** But we are getting ready to store this value back into an index, where + ** it should be converted by to INTEGER again. So omit the + ** OP_RealAffinity opcode if it is present */ + sqlite3VdbeDeletePriorOpcode(v, OP_RealAffinity); + } + } + if( regOut ){ + sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regOut); + } + sqlite3ReleaseTempRange(pParse, regBase, nCol); + return regBase; +} + +/* +** If a prior call to sqlite3GenerateIndexKey() generated a jump-over label +** because it was a partial index, then this routine should be called to +** resolve that label. +*/ +SQLITE_PRIVATE void sqlite3ResolvePartIdxLabel(Parse *pParse, int iLabel){ + if( iLabel ){ + sqlite3VdbeResolveLabel(pParse->pVdbe, iLabel); + } +} + +/************** End of delete.c **********************************************/ +/************** Begin file func.c ********************************************/ +/* +** 2002 February 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C-language implementations for many of the SQL +** functions of SQLite. (Some function, and in particular the date and +** time functions, are implemented separately.) +*/ +/* #include "sqliteInt.h" */ +/* #include */ +/* #include */ +#ifndef SQLITE_OMIT_FLOATING_POINT +/* #include */ +#endif +/* #include "vdbeInt.h" */ + +/* +** Return the collating function associated with a function. +*/ +static CollSeq *sqlite3GetFuncCollSeq(sqlite3_context *context){ + VdbeOp *pOp; + assert( context->pVdbe!=0 ); + pOp = &context->pVdbe->aOp[context->iOp-1]; + assert( pOp->opcode==OP_CollSeq ); + assert( pOp->p4type==P4_COLLSEQ ); + return pOp->p4.pColl; +} + +/* +** Indicate that the accumulator load should be skipped on this +** iteration of the aggregate loop. +*/ +static void sqlite3SkipAccumulatorLoad(sqlite3_context *context){ + assert( context->isError<=0 ); + context->isError = -1; + context->skipFlag = 1; +} + +/* +** Implementation of the non-aggregate min() and max() functions +*/ +static void minmaxFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int i; + int mask; /* 0 for min() or 0xffffffff for max() */ + int iBest; + CollSeq *pColl; + + assert( argc>1 ); + mask = sqlite3_user_data(context)==0 ? 0 : -1; + pColl = sqlite3GetFuncCollSeq(context); + assert( pColl ); + assert( mask==-1 || mask==0 ); + iBest = 0; + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + for(i=1; i=0 ){ + testcase( mask==0 ); + iBest = i; + } + } + sqlite3_result_value(context, argv[iBest]); +} + +/* +** Return the type of the argument. +*/ +static void typeofFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + static const char *azType[] = { "integer", "real", "text", "blob", "null" }; + int i = sqlite3_value_type(argv[0]) - 1; + UNUSED_PARAMETER(NotUsed); + assert( i>=0 && i=0xc0 ){ + while( (*z & 0xc0)==0x80 ){ z++; z0++; } + } + } + sqlite3_result_int(context, (int)(z-z0)); + break; + } + default: { + sqlite3_result_null(context); + break; + } + } +} + +/* +** Implementation of the abs() function. +** +** IMP: R-23979-26855 The abs(X) function returns the absolute value of +** the numeric argument X. +*/ +static void absFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ + assert( argc==1 ); + UNUSED_PARAMETER(argc); + switch( sqlite3_value_type(argv[0]) ){ + case SQLITE_INTEGER: { + i64 iVal = sqlite3_value_int64(argv[0]); + if( iVal<0 ){ + if( iVal==SMALLEST_INT64 ){ + /* IMP: R-31676-45509 If X is the integer -9223372036854775808 + ** then abs(X) throws an integer overflow error since there is no + ** equivalent positive 64-bit two complement value. */ + sqlite3_result_error(context, "integer overflow", -1); + return; + } + iVal = -iVal; + } + sqlite3_result_int64(context, iVal); + break; + } + case SQLITE_NULL: { + /* IMP: R-37434-19929 Abs(X) returns NULL if X is NULL. */ + sqlite3_result_null(context); + break; + } + default: { + /* Because sqlite3_value_double() returns 0.0 if the argument is not + ** something that can be converted into a number, we have: + ** IMP: R-01992-00519 Abs(X) returns 0.0 if X is a string or blob + ** that cannot be converted to a numeric value. + */ + double rVal = sqlite3_value_double(argv[0]); + if( rVal<0 ) rVal = -rVal; + sqlite3_result_double(context, rVal); + break; + } + } +} + +/* +** Implementation of the instr() function. +** +** instr(haystack,needle) finds the first occurrence of needle +** in haystack and returns the number of previous characters plus 1, +** or 0 if needle does not occur within haystack. +** +** If both haystack and needle are BLOBs, then the result is one more than +** the number of bytes in haystack prior to the first occurrence of needle, +** or 0 if needle never occurs in haystack. +*/ +static void instrFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zHaystack; + const unsigned char *zNeedle; + int nHaystack; + int nNeedle; + int typeHaystack, typeNeedle; + int N = 1; + int isText; + unsigned char firstChar; + sqlite3_value *pC1 = 0; + sqlite3_value *pC2 = 0; + + UNUSED_PARAMETER(argc); + typeHaystack = sqlite3_value_type(argv[0]); + typeNeedle = sqlite3_value_type(argv[1]); + if( typeHaystack==SQLITE_NULL || typeNeedle==SQLITE_NULL ) return; + nHaystack = sqlite3_value_bytes(argv[0]); + nNeedle = sqlite3_value_bytes(argv[1]); + if( nNeedle>0 ){ + if( typeHaystack==SQLITE_BLOB && typeNeedle==SQLITE_BLOB ){ + zHaystack = sqlite3_value_blob(argv[0]); + zNeedle = sqlite3_value_blob(argv[1]); + isText = 0; + }else if( typeHaystack!=SQLITE_BLOB && typeNeedle!=SQLITE_BLOB ){ + zHaystack = sqlite3_value_text(argv[0]); + zNeedle = sqlite3_value_text(argv[1]); + isText = 1; + }else{ + pC1 = sqlite3_value_dup(argv[0]); + zHaystack = sqlite3_value_text(pC1); + if( zHaystack==0 ) goto endInstrOOM; + nHaystack = sqlite3_value_bytes(pC1); + pC2 = sqlite3_value_dup(argv[1]); + zNeedle = sqlite3_value_text(pC2); + if( zNeedle==0 ) goto endInstrOOM; + nNeedle = sqlite3_value_bytes(pC2); + isText = 1; + } + if( zNeedle==0 || (nHaystack && zHaystack==0) ) goto endInstrOOM; + firstChar = zNeedle[0]; + while( nNeedle<=nHaystack + && (zHaystack[0]!=firstChar || memcmp(zHaystack, zNeedle, nNeedle)!=0) + ){ + N++; + do{ + nHaystack--; + zHaystack++; + }while( isText && (zHaystack[0]&0xc0)==0x80 ); + } + if( nNeedle>nHaystack ) N = 0; + } + sqlite3_result_int(context, N); +endInstr: + sqlite3_value_free(pC1); + sqlite3_value_free(pC2); + return; +endInstrOOM: + sqlite3_result_error_nomem(context); + goto endInstr; +} + +/* +** Implementation of the printf() (a.k.a. format()) SQL function. +*/ +static void printfFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + PrintfArguments x; + StrAccum str; + const char *zFormat; + int n; + sqlite3 *db = sqlite3_context_db_handle(context); + + if( argc>=1 && (zFormat = (const char*)sqlite3_value_text(argv[0]))!=0 ){ + x.nArg = argc-1; + x.nUsed = 0; + x.apArg = argv+1; + sqlite3StrAccumInit(&str, db, 0, 0, db->aLimit[SQLITE_LIMIT_LENGTH]); + str.printfFlags = SQLITE_PRINTF_SQLFUNC; + sqlite3_str_appendf(&str, zFormat, &x); + n = str.nChar; + sqlite3_result_text(context, sqlite3StrAccumFinish(&str), n, + SQLITE_DYNAMIC); + } +} + +/* +** Implementation of the substr() function. +** +** substr(x,p1,p2) returns p2 characters of x[] beginning with p1. +** p1 is 1-indexed. So substr(x,1,1) returns the first character +** of x. If x is text, then we actually count UTF-8 characters. +** If x is a blob, then we count bytes. +** +** If p1 is negative, then we begin abs(p1) from the end of x[]. +** +** If p2 is negative, return the p2 characters preceding p1. +*/ +static void substrFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *z; + const unsigned char *z2; + int len; + int p0type; + i64 p1, p2; + int negP2 = 0; + + assert( argc==3 || argc==2 ); + if( sqlite3_value_type(argv[1])==SQLITE_NULL + || (argc==3 && sqlite3_value_type(argv[2])==SQLITE_NULL) + ){ + return; + } + p0type = sqlite3_value_type(argv[0]); + p1 = sqlite3_value_int(argv[1]); + if( p0type==SQLITE_BLOB ){ + len = sqlite3_value_bytes(argv[0]); + z = sqlite3_value_blob(argv[0]); + if( z==0 ) return; + assert( len==sqlite3_value_bytes(argv[0]) ); + }else{ + z = sqlite3_value_text(argv[0]); + if( z==0 ) return; + len = 0; + if( p1<0 ){ + for(z2=z; *z2; len++){ + SQLITE_SKIP_UTF8(z2); + } + } + } +#ifdef SQLITE_SUBSTR_COMPATIBILITY + /* If SUBSTR_COMPATIBILITY is defined then substr(X,0,N) work the same as + ** as substr(X,1,N) - it returns the first N characters of X. This + ** is essentially a back-out of the bug-fix in check-in [5fc125d362df4b8] + ** from 2009-02-02 for compatibility of applications that exploited the + ** old buggy behavior. */ + if( p1==0 ) p1 = 1; /* */ +#endif + if( argc==3 ){ + p2 = sqlite3_value_int(argv[2]); + if( p2<0 ){ + p2 = -p2; + negP2 = 1; + } + }else{ + p2 = sqlite3_context_db_handle(context)->aLimit[SQLITE_LIMIT_LENGTH]; + } + if( p1<0 ){ + p1 += len; + if( p1<0 ){ + p2 += p1; + if( p2<0 ) p2 = 0; + p1 = 0; + } + }else if( p1>0 ){ + p1--; + }else if( p2>0 ){ + p2--; + } + if( negP2 ){ + p1 -= p2; + if( p1<0 ){ + p2 += p1; + p1 = 0; + } + } + assert( p1>=0 && p2>=0 ); + if( p0type!=SQLITE_BLOB ){ + while( *z && p1 ){ + SQLITE_SKIP_UTF8(z); + p1--; + } + for(z2=z; *z2 && p2; p2--){ + SQLITE_SKIP_UTF8(z2); + } + sqlite3_result_text64(context, (char*)z, z2-z, SQLITE_TRANSIENT, + SQLITE_UTF8); + }else{ + if( p1+p2>len ){ + p2 = len-p1; + if( p2<0 ) p2 = 0; + } + sqlite3_result_blob64(context, (char*)&z[p1], (u64)p2, SQLITE_TRANSIENT); + } +} + +/* +** Implementation of the round() function +*/ +#ifndef SQLITE_OMIT_FLOATING_POINT +static void roundFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ + int n = 0; + double r; + char *zBuf; + assert( argc==1 || argc==2 ); + if( argc==2 ){ + if( SQLITE_NULL==sqlite3_value_type(argv[1]) ) return; + n = sqlite3_value_int(argv[1]); + if( n>30 ) n = 30; + if( n<0 ) n = 0; + } + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + r = sqlite3_value_double(argv[0]); + /* If Y==0 and X will fit in a 64-bit int, + ** handle the rounding directly, + ** otherwise use printf. + */ + if( r<-4503599627370496.0 || r>+4503599627370496.0 ){ + /* The value has no fractional part so there is nothing to round */ + }else if( n==0 ){ + r = (double)((sqlite_int64)(r+(r<0?-0.5:+0.5))); + }else{ + zBuf = sqlite3_mprintf("%.*f",n,r); + if( zBuf==0 ){ + sqlite3_result_error_nomem(context); + return; + } + sqlite3AtoF(zBuf, &r, sqlite3Strlen30(zBuf), SQLITE_UTF8); + sqlite3_free(zBuf); + } + sqlite3_result_double(context, r); +} +#endif + +/* +** Allocate nByte bytes of space using sqlite3Malloc(). If the +** allocation fails, call sqlite3_result_error_nomem() to notify +** the database handle that malloc() has failed and return NULL. +** If nByte is larger than the maximum string or blob length, then +** raise an SQLITE_TOOBIG exception and return NULL. +*/ +static void *contextMalloc(sqlite3_context *context, i64 nByte){ + char *z; + sqlite3 *db = sqlite3_context_db_handle(context); + assert( nByte>0 ); + testcase( nByte==db->aLimit[SQLITE_LIMIT_LENGTH] ); + testcase( nByte==db->aLimit[SQLITE_LIMIT_LENGTH]+1 ); + if( nByte>db->aLimit[SQLITE_LIMIT_LENGTH] ){ + sqlite3_result_error_toobig(context); + z = 0; + }else{ + z = sqlite3Malloc(nByte); + if( !z ){ + sqlite3_result_error_nomem(context); + } + } + return z; +} + +/* +** Implementation of the upper() and lower() SQL functions. +*/ +static void upperFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ + char *z1; + const char *z2; + int i, n; + UNUSED_PARAMETER(argc); + z2 = (char*)sqlite3_value_text(argv[0]); + n = sqlite3_value_bytes(argv[0]); + /* Verify that the call to _bytes() does not invalidate the _text() pointer */ + assert( z2==(char*)sqlite3_value_text(argv[0]) ); + if( z2 ){ + z1 = contextMalloc(context, ((i64)n)+1); + if( z1 ){ + for(i=0; imatchOne; /* "?" or "_" */ + u32 matchAll = pInfo->matchAll; /* "*" or "%" */ + u8 noCase = pInfo->noCase; /* True if uppercase==lowercase */ + const u8 *zEscaped = 0; /* One past the last escaped input char */ + + while( (c = Utf8Read(zPattern))!=0 ){ + if( c==matchAll ){ /* Match "*" */ + /* Skip over multiple "*" characters in the pattern. If there + ** are also "?" characters, skip those as well, but consume a + ** single character of the input string for each "?" skipped */ + while( (c=Utf8Read(zPattern)) == matchAll + || (c == matchOne && matchOne!=0) ){ + if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){ + return SQLITE_NOWILDCARDMATCH; + } + } + if( c==0 ){ + return SQLITE_MATCH; /* "*" at the end of the pattern matches */ + }else if( c==matchOther ){ + if( pInfo->matchSet==0 ){ + c = sqlite3Utf8Read(&zPattern); + if( c==0 ) return SQLITE_NOWILDCARDMATCH; + }else{ + /* "[...]" immediately follows the "*". We have to do a slow + ** recursive search in this case, but it is an unusual case. */ + assert( matchOther<0x80 ); /* '[' is a single-byte character */ + while( *zString ){ + int bMatch = patternCompare(&zPattern[-1],zString,pInfo,matchOther); + if( bMatch!=SQLITE_NOMATCH ) return bMatch; + SQLITE_SKIP_UTF8(zString); + } + return SQLITE_NOWILDCARDMATCH; + } + } + + /* At this point variable c contains the first character of the + ** pattern string past the "*". Search in the input string for the + ** first matching character and recursively continue the match from + ** that point. + ** + ** For a case-insensitive search, set variable cx to be the same as + ** c but in the other case and search the input string for either + ** c or cx. + */ + if( c<=0x80 ){ + char zStop[3]; + int bMatch; + if( noCase ){ + zStop[0] = sqlite3Toupper(c); + zStop[1] = sqlite3Tolower(c); + zStop[2] = 0; + }else{ + zStop[0] = c; + zStop[1] = 0; + } + while(1){ + zString += strcspn((const char*)zString, zStop); + if( zString[0]==0 ) break; + zString++; + bMatch = patternCompare(zPattern,zString,pInfo,matchOther); + if( bMatch!=SQLITE_NOMATCH ) return bMatch; + } + }else{ + int bMatch; + while( (c2 = Utf8Read(zString))!=0 ){ + if( c2!=c ) continue; + bMatch = patternCompare(zPattern,zString,pInfo,matchOther); + if( bMatch!=SQLITE_NOMATCH ) return bMatch; + } + } + return SQLITE_NOWILDCARDMATCH; + } + if( c==matchOther ){ + if( pInfo->matchSet==0 ){ + c = sqlite3Utf8Read(&zPattern); + if( c==0 ) return SQLITE_NOMATCH; + zEscaped = zPattern; + }else{ + u32 prior_c = 0; + int seen = 0; + int invert = 0; + c = sqlite3Utf8Read(&zString); + if( c==0 ) return SQLITE_NOMATCH; + c2 = sqlite3Utf8Read(&zPattern); + if( c2=='^' ){ + invert = 1; + c2 = sqlite3Utf8Read(&zPattern); + } + if( c2==']' ){ + if( c==']' ) seen = 1; + c2 = sqlite3Utf8Read(&zPattern); + } + while( c2 && c2!=']' ){ + if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){ + c2 = sqlite3Utf8Read(&zPattern); + if( c>=prior_c && c<=c2 ) seen = 1; + prior_c = 0; + }else{ + if( c==c2 ){ + seen = 1; + } + prior_c = c2; + } + c2 = sqlite3Utf8Read(&zPattern); + } + if( c2==0 || (seen ^ invert)==0 ){ + return SQLITE_NOMATCH; + } + continue; + } + } + c2 = Utf8Read(zString); + if( c==c2 ) continue; + if( noCase && sqlite3Tolower(c)==sqlite3Tolower(c2) && c<0x80 && c2<0x80 ){ + continue; + } + if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue; + return SQLITE_NOMATCH; + } + return *zString==0 ? SQLITE_MATCH : SQLITE_NOMATCH; +} + +/* +** The sqlite3_strglob() interface. Return 0 on a match (like strcmp()) and +** non-zero if there is no match. +*/ +SQLITE_API int sqlite3_strglob(const char *zGlobPattern, const char *zString){ + return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '['); +} + +/* +** The sqlite3_strlike() interface. Return 0 on a match and non-zero for +** a miss - like strcmp(). +*/ +SQLITE_API int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc){ + return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc); +} + +/* +** Count the number of times that the LIKE operator (or GLOB which is +** just a variation of LIKE) gets called. This is used for testing +** only. +*/ +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_like_count = 0; +#endif + + +/* +** Implementation of the like() SQL function. This function implements +** the build-in LIKE operator. The first argument to the function is the +** pattern and the second argument is the string. So, the SQL statements: +** +** A LIKE B +** +** is implemented as like(B,A). +** +** This same function (with a different compareInfo structure) computes +** the GLOB operator. +*/ +static void likeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zA, *zB; + u32 escape; + int nPat; + sqlite3 *db = sqlite3_context_db_handle(context); + struct compareInfo *pInfo = sqlite3_user_data(context); + struct compareInfo backupInfo; + +#ifdef SQLITE_LIKE_DOESNT_MATCH_BLOBS + if( sqlite3_value_type(argv[0])==SQLITE_BLOB + || sqlite3_value_type(argv[1])==SQLITE_BLOB + ){ +#ifdef SQLITE_TEST + sqlite3_like_count++; +#endif + sqlite3_result_int(context, 0); + return; + } +#endif + + /* Limit the length of the LIKE or GLOB pattern to avoid problems + ** of deep recursion and N*N behavior in patternCompare(). + */ + nPat = sqlite3_value_bytes(argv[0]); + testcase( nPat==db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH] ); + testcase( nPat==db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]+1 ); + if( nPat > db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH] ){ + sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); + return; + } + if( argc==3 ){ + /* The escape character string must consist of a single UTF-8 character. + ** Otherwise, return an error. + */ + const unsigned char *zEsc = sqlite3_value_text(argv[2]); + if( zEsc==0 ) return; + if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){ + sqlite3_result_error(context, + "ESCAPE expression must be a single character", -1); + return; + } + escape = sqlite3Utf8Read(&zEsc); + if( escape==pInfo->matchAll || escape==pInfo->matchOne ){ + memcpy(&backupInfo, pInfo, sizeof(backupInfo)); + pInfo = &backupInfo; + if( escape==pInfo->matchAll ) pInfo->matchAll = 0; + if( escape==pInfo->matchOne ) pInfo->matchOne = 0; + } + }else{ + escape = pInfo->matchSet; + } + zB = sqlite3_value_text(argv[0]); + zA = sqlite3_value_text(argv[1]); + if( zA && zB ){ +#ifdef SQLITE_TEST + sqlite3_like_count++; +#endif + sqlite3_result_int(context, + patternCompare(zB, zA, pInfo, escape)==SQLITE_MATCH); + } +} + +/* +** Implementation of the NULLIF(x,y) function. The result is the first +** argument if the arguments are different. The result is NULL if the +** arguments are equal to each other. +*/ +static void nullifFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + CollSeq *pColl = sqlite3GetFuncCollSeq(context); + UNUSED_PARAMETER(NotUsed); + if( sqlite3MemCompare(argv[0], argv[1], pColl)!=0 ){ + sqlite3_result_value(context, argv[0]); + } +} + +/* +** Implementation of the sqlite_version() function. The result is the version +** of the SQLite library that is running. +*/ +static void versionFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **NotUsed2 +){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + /* IMP: R-48699-48617 This function is an SQL wrapper around the + ** sqlite3_libversion() C-interface. */ + sqlite3_result_text(context, sqlite3_libversion(), -1, SQLITE_STATIC); +} + +/* +** Implementation of the sqlite_source_id() function. The result is a string +** that identifies the particular version of the source code used to build +** SQLite. +*/ +static void sourceidFunc( + sqlite3_context *context, + int NotUsed, + sqlite3_value **NotUsed2 +){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + /* IMP: R-24470-31136 This function is an SQL wrapper around the + ** sqlite3_sourceid() C interface. */ + sqlite3_result_text(context, sqlite3_sourceid(), -1, SQLITE_STATIC); +} + +/* +** Implementation of the sqlite_log() function. This is a wrapper around +** sqlite3_log(). The return value is NULL. The function exists purely for +** its side-effects. +*/ +static void errlogFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(context); + sqlite3_log(sqlite3_value_int(argv[0]), "%s", sqlite3_value_text(argv[1])); +} + +/* +** Implementation of the sqlite_compileoption_used() function. +** The result is an integer that identifies if the compiler option +** was used to build SQLite. +*/ +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +static void compileoptionusedFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *zOptName; + assert( argc==1 ); + UNUSED_PARAMETER(argc); + /* IMP: R-39564-36305 The sqlite_compileoption_used() SQL + ** function is a wrapper around the sqlite3_compileoption_used() C/C++ + ** function. + */ + if( (zOptName = (const char*)sqlite3_value_text(argv[0]))!=0 ){ + sqlite3_result_int(context, sqlite3_compileoption_used(zOptName)); + } +} +#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + +/* +** Implementation of the sqlite_compileoption_get() function. +** The result is a string that identifies the compiler options +** used to build SQLite. +*/ +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +static void compileoptiongetFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int n; + assert( argc==1 ); + UNUSED_PARAMETER(argc); + /* IMP: R-04922-24076 The sqlite_compileoption_get() SQL function + ** is a wrapper around the sqlite3_compileoption_get() C/C++ function. + */ + n = sqlite3_value_int(argv[0]); + sqlite3_result_text(context, sqlite3_compileoption_get(n), -1, SQLITE_STATIC); +} +#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + +/* Array for converting from half-bytes (nybbles) into ASCII hex +** digits. */ +static const char hexdigits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' +}; + +/* +** Append to pStr text that is the SQL literal representation of the +** value contained in pValue. +*/ +SQLITE_PRIVATE void sqlite3QuoteValue(StrAccum *pStr, sqlite3_value *pValue){ + /* As currently implemented, the string must be initially empty. + ** we might relax this requirement in the future, but that will + ** require enhancements to the implementation. */ + assert( pStr!=0 && pStr->nChar==0 ); + + switch( sqlite3_value_type(pValue) ){ + case SQLITE_FLOAT: { + double r1, r2; + const char *zVal; + r1 = sqlite3_value_double(pValue); + sqlite3_str_appendf(pStr, "%!.15g", r1); + zVal = sqlite3_str_value(pStr); + if( zVal ){ + sqlite3AtoF(zVal, &r2, pStr->nChar, SQLITE_UTF8); + if( r1!=r2 ){ + sqlite3_str_reset(pStr); + sqlite3_str_appendf(pStr, "%!.20e", r1); + } + } + break; + } + case SQLITE_INTEGER: { + sqlite3_str_appendf(pStr, "%lld", sqlite3_value_int64(pValue)); + break; + } + case SQLITE_BLOB: { + char const *zBlob = sqlite3_value_blob(pValue); + int nBlob = sqlite3_value_bytes(pValue); + assert( zBlob==sqlite3_value_blob(pValue) ); /* No encoding change */ + sqlite3StrAccumEnlarge(pStr, nBlob*2 + 4); + if( pStr->accError==0 ){ + char *zText = pStr->zText; + int i; + for(i=0; i>4)&0x0F]; + zText[(i*2)+3] = hexdigits[(zBlob[i])&0x0F]; + } + zText[(nBlob*2)+2] = '\''; + zText[(nBlob*2)+3] = '\0'; + zText[0] = 'X'; + zText[1] = '\''; + pStr->nChar = nBlob*2 + 3; + } + break; + } + case SQLITE_TEXT: { + const unsigned char *zArg = sqlite3_value_text(pValue); + sqlite3_str_appendf(pStr, "%Q", zArg); + break; + } + default: { + assert( sqlite3_value_type(pValue)==SQLITE_NULL ); + sqlite3_str_append(pStr, "NULL", 4); + break; + } + } +} + +/* +** Implementation of the QUOTE() function. +** +** The quote(X) function returns the text of an SQL literal which is the +** value of its argument suitable for inclusion into an SQL statement. +** Strings are surrounded by single-quotes with escapes on interior quotes +** as needed. BLOBs are encoded as hexadecimal literals. Strings with +** embedded NUL characters cannot be represented as string literals in SQL +** and hence the returned string literal is truncated prior to the first NUL. +*/ +static void quoteFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ + sqlite3_str str; + sqlite3 *db = sqlite3_context_db_handle(context); + assert( argc==1 ); + UNUSED_PARAMETER(argc); + sqlite3StrAccumInit(&str, db, 0, 0, db->aLimit[SQLITE_LIMIT_LENGTH]); + sqlite3QuoteValue(&str,argv[0]); + sqlite3_result_text(context, sqlite3StrAccumFinish(&str), str.nChar, + SQLITE_DYNAMIC); + if( str.accError!=SQLITE_OK ){ + sqlite3_result_null(context); + sqlite3_result_error_code(context, str.accError); + } +} + +/* +** The unicode() function. Return the integer unicode code-point value +** for the first character of the input string. +*/ +static void unicodeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *z = sqlite3_value_text(argv[0]); + (void)argc; + if( z && z[0] ) sqlite3_result_int(context, sqlite3Utf8Read(&z)); +} + +/* +** The char() function takes zero or more arguments, each of which is +** an integer. It constructs a string where each character of the string +** is the unicode character for the corresponding integer argument. +*/ +static void charFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + unsigned char *z, *zOut; + int i; + zOut = z = sqlite3_malloc64( argc*4+1 ); + if( z==0 ){ + sqlite3_result_error_nomem(context); + return; + } + for(i=0; i0x10ffff ) x = 0xfffd; + c = (unsigned)(x & 0x1fffff); + if( c<0x00080 ){ + *zOut++ = (u8)(c&0xFF); + }else if( c<0x00800 ){ + *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); + *zOut++ = 0x80 + (u8)(c & 0x3F); + }else if( c<0x10000 ){ + *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); + *zOut++ = 0x80 + (u8)(c & 0x3F); + }else{ + *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); + *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); + *zOut++ = 0x80 + (u8)(c & 0x3F); + } \ + } + sqlite3_result_text64(context, (char*)z, zOut-z, sqlite3_free, SQLITE_UTF8); +} + +/* +** The hex() function. Interpret the argument as a blob. Return +** a hexadecimal rendering as text. +*/ +static void hexFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int i, n; + const unsigned char *pBlob; + char *zHex, *z; + assert( argc==1 ); + UNUSED_PARAMETER(argc); + pBlob = sqlite3_value_blob(argv[0]); + n = sqlite3_value_bytes(argv[0]); + assert( pBlob==sqlite3_value_blob(argv[0]) ); /* No encoding change */ + z = zHex = contextMalloc(context, ((i64)n)*2 + 1); + if( zHex ){ + for(i=0; i>4)&0xf]; + *(z++) = hexdigits[c&0xf]; + } + *z = 0; + sqlite3_result_text(context, zHex, n*2, sqlite3_free); + } +} + +/* +** The zeroblob(N) function returns a zero-filled blob of size N bytes. +*/ +static void zeroblobFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + i64 n; + int rc; + assert( argc==1 ); + UNUSED_PARAMETER(argc); + n = sqlite3_value_int64(argv[0]); + if( n<0 ) n = 0; + rc = sqlite3_result_zeroblob64(context, n); /* IMP: R-00293-64994 */ + if( rc ){ + sqlite3_result_error_code(context, rc); + } +} + +/* +** The replace() function. Three arguments are all strings: call +** them A, B, and C. The result is also a string which is derived +** from A by replacing every occurrence of B with C. The match +** must be exact. Collating sequences are not used. +*/ +static void replaceFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zStr; /* The input string A */ + const unsigned char *zPattern; /* The pattern string B */ + const unsigned char *zRep; /* The replacement string C */ + unsigned char *zOut; /* The output */ + int nStr; /* Size of zStr */ + int nPattern; /* Size of zPattern */ + int nRep; /* Size of zRep */ + i64 nOut; /* Maximum size of zOut */ + int loopLimit; /* Last zStr[] that might match zPattern[] */ + int i, j; /* Loop counters */ + unsigned cntExpand; /* Number zOut expansions */ + sqlite3 *db = sqlite3_context_db_handle(context); + + assert( argc==3 ); + UNUSED_PARAMETER(argc); + zStr = sqlite3_value_text(argv[0]); + if( zStr==0 ) return; + nStr = sqlite3_value_bytes(argv[0]); + assert( zStr==sqlite3_value_text(argv[0]) ); /* No encoding change */ + zPattern = sqlite3_value_text(argv[1]); + if( zPattern==0 ){ + assert( sqlite3_value_type(argv[1])==SQLITE_NULL + || sqlite3_context_db_handle(context)->mallocFailed ); + return; + } + if( zPattern[0]==0 ){ + assert( sqlite3_value_type(argv[1])!=SQLITE_NULL ); + sqlite3_result_value(context, argv[0]); + return; + } + nPattern = sqlite3_value_bytes(argv[1]); + assert( zPattern==sqlite3_value_text(argv[1]) ); /* No encoding change */ + zRep = sqlite3_value_text(argv[2]); + if( zRep==0 ) return; + nRep = sqlite3_value_bytes(argv[2]); + assert( zRep==sqlite3_value_text(argv[2]) ); + nOut = nStr + 1; + assert( nOutnPattern ){ + nOut += nRep - nPattern; + testcase( nOut-1==db->aLimit[SQLITE_LIMIT_LENGTH] ); + testcase( nOut-2==db->aLimit[SQLITE_LIMIT_LENGTH] ); + if( nOut-1>db->aLimit[SQLITE_LIMIT_LENGTH] ){ + sqlite3_result_error_toobig(context); + sqlite3_free(zOut); + return; + } + cntExpand++; + if( (cntExpand&(cntExpand-1))==0 ){ + /* Grow the size of the output buffer only on substitutions + ** whose index is a power of two: 1, 2, 4, 8, 16, 32, ... */ + u8 *zOld; + zOld = zOut; + zOut = sqlite3Realloc(zOut, (int)nOut + (nOut - nStr - 1)); + if( zOut==0 ){ + sqlite3_result_error_nomem(context); + sqlite3_free(zOld); + return; + } + } + } + memcpy(&zOut[j], zRep, nRep); + j += nRep; + i += nPattern-1; + } + } + assert( j+nStr-i+1<=nOut ); + memcpy(&zOut[j], &zStr[i], nStr-i); + j += nStr - i; + assert( j<=nOut ); + zOut[j] = 0; + sqlite3_result_text(context, (char*)zOut, j, sqlite3_free); +} + +/* +** Implementation of the TRIM(), LTRIM(), and RTRIM() functions. +** The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +*/ +static void trimFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zIn; /* Input string */ + const unsigned char *zCharSet; /* Set of characters to trim */ + unsigned int nIn; /* Number of bytes in input */ + int flags; /* 1: trimleft 2: trimright 3: trim */ + int i; /* Loop counter */ + unsigned int *aLen = 0; /* Length of each character in zCharSet */ + unsigned char **azChar = 0; /* Individual characters in zCharSet */ + int nChar; /* Number of characters in zCharSet */ + + if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ + return; + } + zIn = sqlite3_value_text(argv[0]); + if( zIn==0 ) return; + nIn = (unsigned)sqlite3_value_bytes(argv[0]); + assert( zIn==sqlite3_value_text(argv[0]) ); + if( argc==1 ){ + static const unsigned lenOne[] = { 1 }; + static unsigned char * const azOne[] = { (u8*)" " }; + nChar = 1; + aLen = (unsigned*)lenOne; + azChar = (unsigned char **)azOne; + zCharSet = 0; + }else if( (zCharSet = sqlite3_value_text(argv[1]))==0 ){ + return; + }else{ + const unsigned char *z; + for(z=zCharSet, nChar=0; *z; nChar++){ + SQLITE_SKIP_UTF8(z); + } + if( nChar>0 ){ + azChar = contextMalloc(context, + ((i64)nChar)*(sizeof(char*)+sizeof(unsigned))); + if( azChar==0 ){ + return; + } + aLen = (unsigned*)&azChar[nChar]; + for(z=zCharSet, nChar=0; *z; nChar++){ + azChar[nChar] = (unsigned char *)z; + SQLITE_SKIP_UTF8(z); + aLen[nChar] = (unsigned)(z - azChar[nChar]); + } + } + } + if( nChar>0 ){ + flags = SQLITE_PTR_TO_INT(sqlite3_user_data(context)); + if( flags & 1 ){ + while( nIn>0 ){ + unsigned int len = 0; + for(i=0; i=nChar ) break; + zIn += len; + nIn -= len; + } + } + if( flags & 2 ){ + while( nIn>0 ){ + unsigned int len = 0; + for(i=0; i=nChar ) break; + nIn -= len; + } + } + if( zCharSet ){ + sqlite3_free(azChar); + } + } + sqlite3_result_text(context, (char*)zIn, nIn, SQLITE_TRANSIENT); +} + + +#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION +/* +** The "unknown" function is automatically substituted in place of +** any unrecognized function name when doing an EXPLAIN or EXPLAIN QUERY PLAN +** when the SQLITE_ENABLE_UNKNOWN_FUNCTION compile-time option is used. +** When the "sqlite3" command-line shell is built using this functionality, +** that allows an EXPLAIN or EXPLAIN QUERY PLAN for complex queries +** involving application-defined functions to be examined in a generic +** sqlite3 shell. +*/ +static void unknownFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + /* no-op */ +} +#endif /*SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION*/ + + +/* IMP: R-25361-16150 This function is omitted from SQLite by default. It +** is only available if the SQLITE_SOUNDEX compile-time option is used +** when SQLite is built. +*/ +#ifdef SQLITE_SOUNDEX +/* +** Compute the soundex encoding of a word. +** +** IMP: R-59782-00072 The soundex(X) function returns a string that is the +** soundex encoding of the string X. +*/ +static void soundexFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char zResult[8]; + const u8 *zIn; + int i, j; + static const unsigned char iCode[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, + 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, + 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, + }; + assert( argc==1 ); + zIn = (u8*)sqlite3_value_text(argv[0]); + if( zIn==0 ) zIn = (u8*)""; + for(i=0; zIn[i] && !sqlite3Isalpha(zIn[i]); i++){} + if( zIn[i] ){ + u8 prevcode = iCode[zIn[i]&0x7f]; + zResult[0] = sqlite3Toupper(zIn[i]); + for(j=1; j<4 && zIn[i]; i++){ + int code = iCode[zIn[i]&0x7f]; + if( code>0 ){ + if( code!=prevcode ){ + prevcode = code; + zResult[j++] = code + '0'; + } + }else{ + prevcode = 0; + } + } + while( j<4 ){ + zResult[j++] = '0'; + } + zResult[j] = 0; + sqlite3_result_text(context, zResult, 4, SQLITE_TRANSIENT); + }else{ + /* IMP: R-64894-50321 The string "?000" is returned if the argument + ** is NULL or contains no ASCII alphabetic characters. */ + sqlite3_result_text(context, "?000", 4, SQLITE_STATIC); + } +} +#endif /* SQLITE_SOUNDEX */ + +#ifndef SQLITE_OMIT_LOAD_EXTENSION +/* +** A function that loads a shared-library extension then returns NULL. +*/ +static void loadExt(sqlite3_context *context, int argc, sqlite3_value **argv){ + const char *zFile = (const char *)sqlite3_value_text(argv[0]); + const char *zProc; + sqlite3 *db = sqlite3_context_db_handle(context); + char *zErrMsg = 0; + + /* Disallow the load_extension() SQL function unless the SQLITE_LoadExtFunc + ** flag is set. See the sqlite3_enable_load_extension() API. + */ + if( (db->flags & SQLITE_LoadExtFunc)==0 ){ + sqlite3_result_error(context, "not authorized", -1); + return; + } + + if( argc==2 ){ + zProc = (const char *)sqlite3_value_text(argv[1]); + }else{ + zProc = 0; + } + if( zFile && sqlite3_load_extension(db, zFile, zProc, &zErrMsg) ){ + sqlite3_result_error(context, zErrMsg, -1); + sqlite3_free(zErrMsg); + } +} +#endif + + +/* +** An instance of the following structure holds the context of a +** sum() or avg() aggregate computation. +*/ +typedef struct SumCtx SumCtx; +struct SumCtx { + double rSum; /* Floating point sum */ + i64 iSum; /* Integer sum */ + i64 cnt; /* Number of elements summed */ + u8 overflow; /* True if integer overflow seen */ + u8 approx; /* True if non-integer value was input to the sum */ +}; + +/* +** Routines used to compute the sum, average, and total. +** +** The SUM() function follows the (broken) SQL standard which means +** that it returns NULL if it sums over no inputs. TOTAL returns +** 0.0 in that case. In addition, TOTAL always returns a float where +** SUM might return an integer if it never encounters a floating point +** value. TOTAL never fails, but SUM might through an exception if +** it overflows an integer. +*/ +static void sumStep(sqlite3_context *context, int argc, sqlite3_value **argv){ + SumCtx *p; + int type; + assert( argc==1 ); + UNUSED_PARAMETER(argc); + p = sqlite3_aggregate_context(context, sizeof(*p)); + type = sqlite3_value_numeric_type(argv[0]); + if( p && type!=SQLITE_NULL ){ + p->cnt++; + if( type==SQLITE_INTEGER ){ + i64 v = sqlite3_value_int64(argv[0]); + p->rSum += v; + if( (p->approx|p->overflow)==0 && sqlite3AddInt64(&p->iSum, v) ){ + p->approx = p->overflow = 1; + } + }else{ + p->rSum += sqlite3_value_double(argv[0]); + p->approx = 1; + } + } +} +#ifndef SQLITE_OMIT_WINDOWFUNC +static void sumInverse(sqlite3_context *context, int argc, sqlite3_value**argv){ + SumCtx *p; + int type; + assert( argc==1 ); + UNUSED_PARAMETER(argc); + p = sqlite3_aggregate_context(context, sizeof(*p)); + type = sqlite3_value_numeric_type(argv[0]); + /* p is always non-NULL because sumStep() will have been called first + ** to initialize it */ + if( ALWAYS(p) && type!=SQLITE_NULL ){ + assert( p->cnt>0 ); + p->cnt--; + assert( type==SQLITE_INTEGER || p->approx ); + if( type==SQLITE_INTEGER && p->approx==0 ){ + i64 v = sqlite3_value_int64(argv[0]); + p->rSum -= v; + p->iSum -= v; + }else{ + p->rSum -= sqlite3_value_double(argv[0]); + } + } +} +#else +# define sumInverse 0 +#endif /* SQLITE_OMIT_WINDOWFUNC */ +static void sumFinalize(sqlite3_context *context){ + SumCtx *p; + p = sqlite3_aggregate_context(context, 0); + if( p && p->cnt>0 ){ + if( p->overflow ){ + sqlite3_result_error(context,"integer overflow",-1); + }else if( p->approx ){ + sqlite3_result_double(context, p->rSum); + }else{ + sqlite3_result_int64(context, p->iSum); + } + } +} +static void avgFinalize(sqlite3_context *context){ + SumCtx *p; + p = sqlite3_aggregate_context(context, 0); + if( p && p->cnt>0 ){ + sqlite3_result_double(context, p->rSum/(double)p->cnt); + } +} +static void totalFinalize(sqlite3_context *context){ + SumCtx *p; + p = sqlite3_aggregate_context(context, 0); + /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ + sqlite3_result_double(context, p ? p->rSum : (double)0); +} + +/* +** The following structure keeps track of state information for the +** count() aggregate function. +*/ +typedef struct CountCtx CountCtx; +struct CountCtx { + i64 n; +#ifdef SQLITE_DEBUG + int bInverse; /* True if xInverse() ever called */ +#endif +}; + +/* +** Routines to implement the count() aggregate function. +*/ +static void countStep(sqlite3_context *context, int argc, sqlite3_value **argv){ + CountCtx *p; + p = sqlite3_aggregate_context(context, sizeof(*p)); + if( (argc==0 || SQLITE_NULL!=sqlite3_value_type(argv[0])) && p ){ + p->n++; + } + +#ifndef SQLITE_OMIT_DEPRECATED + /* The sqlite3_aggregate_count() function is deprecated. But just to make + ** sure it still operates correctly, verify that its count agrees with our + ** internal count when using count(*) and when the total count can be + ** expressed as a 32-bit integer. */ + assert( argc==1 || p==0 || p->n>0x7fffffff || p->bInverse + || p->n==sqlite3_aggregate_count(context) ); +#endif +} +static void countFinalize(sqlite3_context *context){ + CountCtx *p; + p = sqlite3_aggregate_context(context, 0); + sqlite3_result_int64(context, p ? p->n : 0); +} +#ifndef SQLITE_OMIT_WINDOWFUNC +static void countInverse(sqlite3_context *ctx, int argc, sqlite3_value **argv){ + CountCtx *p; + p = sqlite3_aggregate_context(ctx, sizeof(*p)); + /* p is always non-NULL since countStep() will have been called first */ + if( (argc==0 || SQLITE_NULL!=sqlite3_value_type(argv[0])) && ALWAYS(p) ){ + p->n--; +#ifdef SQLITE_DEBUG + p->bInverse = 1; +#endif + } +} +#else +# define countInverse 0 +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/* +** Routines to implement min() and max() aggregate functions. +*/ +static void minmaxStep( + sqlite3_context *context, + int NotUsed, + sqlite3_value **argv +){ + Mem *pArg = (Mem *)argv[0]; + Mem *pBest; + UNUSED_PARAMETER(NotUsed); + + pBest = (Mem *)sqlite3_aggregate_context(context, sizeof(*pBest)); + if( !pBest ) return; + + if( sqlite3_value_type(pArg)==SQLITE_NULL ){ + if( pBest->flags ) sqlite3SkipAccumulatorLoad(context); + }else if( pBest->flags ){ + int max; + int cmp; + CollSeq *pColl = sqlite3GetFuncCollSeq(context); + /* This step function is used for both the min() and max() aggregates, + ** the only difference between the two being that the sense of the + ** comparison is inverted. For the max() aggregate, the + ** sqlite3_user_data() function returns (void *)-1. For min() it + ** returns (void *)db, where db is the sqlite3* database pointer. + ** Therefore the next statement sets variable 'max' to 1 for the max() + ** aggregate, or 0 for min(). + */ + max = sqlite3_user_data(context)!=0; + cmp = sqlite3MemCompare(pBest, pArg, pColl); + if( (max && cmp<0) || (!max && cmp>0) ){ + sqlite3VdbeMemCopy(pBest, pArg); + }else{ + sqlite3SkipAccumulatorLoad(context); + } + }else{ + pBest->db = sqlite3_context_db_handle(context); + sqlite3VdbeMemCopy(pBest, pArg); + } +} +static void minMaxValueFinalize(sqlite3_context *context, int bValue){ + sqlite3_value *pRes; + pRes = (sqlite3_value *)sqlite3_aggregate_context(context, 0); + if( pRes ){ + if( pRes->flags ){ + sqlite3_result_value(context, pRes); + } + if( bValue==0 ) sqlite3VdbeMemRelease(pRes); + } +} +#ifndef SQLITE_OMIT_WINDOWFUNC +static void minMaxValue(sqlite3_context *context){ + minMaxValueFinalize(context, 1); +} +#else +# define minMaxValue 0 +#endif /* SQLITE_OMIT_WINDOWFUNC */ +static void minMaxFinalize(sqlite3_context *context){ + minMaxValueFinalize(context, 0); +} + +/* +** group_concat(EXPR, ?SEPARATOR?) +** +** The SEPARATOR goes before the EXPR string. This is tragic. The +** groupConcatInverse() implementation would have been easier if the +** SEPARATOR were appended after EXPR. And the order is undocumented, +** so we could change it, in theory. But the old behavior has been +** around for so long that we dare not, for fear of breaking something. +*/ +typedef struct { + StrAccum str; /* The accumulated concatenation */ +#ifndef SQLITE_OMIT_WINDOWFUNC + int nAccum; /* Number of strings presently concatenated */ + int nFirstSepLength; /* Used to detect separator length change */ + /* If pnSepLengths!=0, refs an array of inter-string separator lengths, + ** stored as actually incorporated into presently accumulated result. + ** (Hence, its slots in use number nAccum-1 between method calls.) + ** If pnSepLengths==0, nFirstSepLength is the length used throughout. + */ + int *pnSepLengths; +#endif +} GroupConcatCtx; + +static void groupConcatStep( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *zVal; + GroupConcatCtx *pGCC; + const char *zSep; + int nVal, nSep; + assert( argc==1 || argc==2 ); + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + pGCC = (GroupConcatCtx*)sqlite3_aggregate_context(context, sizeof(*pGCC)); + if( pGCC ){ + sqlite3 *db = sqlite3_context_db_handle(context); + int firstTerm = pGCC->str.mxAlloc==0; + pGCC->str.mxAlloc = db->aLimit[SQLITE_LIMIT_LENGTH]; + if( argc==1 ){ + if( !firstTerm ){ + sqlite3_str_appendchar(&pGCC->str, 1, ','); + } +#ifndef SQLITE_OMIT_WINDOWFUNC + else{ + pGCC->nFirstSepLength = 1; + } +#endif + }else if( !firstTerm ){ + zSep = (char*)sqlite3_value_text(argv[1]); + nSep = sqlite3_value_bytes(argv[1]); + if( zSep ){ + sqlite3_str_append(&pGCC->str, zSep, nSep); + } +#ifndef SQLITE_OMIT_WINDOWFUNC + else{ + nSep = 0; + } + if( nSep != pGCC->nFirstSepLength || pGCC->pnSepLengths != 0 ){ + int *pnsl = pGCC->pnSepLengths; + if( pnsl == 0 ){ + /* First separator length variation seen, start tracking them. */ + pnsl = (int*)sqlite3_malloc64((pGCC->nAccum+1) * sizeof(int)); + if( pnsl!=0 ){ + int i = 0, nA = pGCC->nAccum-1; + while( inFirstSepLength; + } + }else{ + pnsl = (int*)sqlite3_realloc64(pnsl, pGCC->nAccum * sizeof(int)); + } + if( pnsl!=0 ){ + if( ALWAYS(pGCC->nAccum>0) ){ + pnsl[pGCC->nAccum-1] = nSep; + } + pGCC->pnSepLengths = pnsl; + }else{ + sqlite3StrAccumSetError(&pGCC->str, SQLITE_NOMEM); + } + } +#endif + } +#ifndef SQLITE_OMIT_WINDOWFUNC + else{ + pGCC->nFirstSepLength = sqlite3_value_bytes(argv[1]); + } + pGCC->nAccum += 1; +#endif + zVal = (char*)sqlite3_value_text(argv[0]); + nVal = sqlite3_value_bytes(argv[0]); + if( zVal ) sqlite3_str_append(&pGCC->str, zVal, nVal); + } +} + +#ifndef SQLITE_OMIT_WINDOWFUNC +static void groupConcatInverse( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GroupConcatCtx *pGCC; + assert( argc==1 || argc==2 ); + (void)argc; /* Suppress unused parameter warning */ + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + pGCC = (GroupConcatCtx*)sqlite3_aggregate_context(context, sizeof(*pGCC)); + /* pGCC is always non-NULL since groupConcatStep() will have always + ** run frist to initialize it */ + if( ALWAYS(pGCC) ){ + int nVS; + /* Must call sqlite3_value_text() to convert the argument into text prior + ** to invoking sqlite3_value_bytes(), in case the text encoding is UTF16 */ + (void)sqlite3_value_text(argv[0]); + nVS = sqlite3_value_bytes(argv[0]); + pGCC->nAccum -= 1; + if( pGCC->pnSepLengths!=0 ){ + assert(pGCC->nAccum >= 0); + if( pGCC->nAccum>0 ){ + nVS += *pGCC->pnSepLengths; + memmove(pGCC->pnSepLengths, pGCC->pnSepLengths+1, + (pGCC->nAccum-1)*sizeof(int)); + } + }else{ + /* If removing single accumulated string, harmlessly over-do. */ + nVS += pGCC->nFirstSepLength; + } + if( nVS>=(int)pGCC->str.nChar ){ + pGCC->str.nChar = 0; + }else{ + pGCC->str.nChar -= nVS; + memmove(pGCC->str.zText, &pGCC->str.zText[nVS], pGCC->str.nChar); + } + if( pGCC->str.nChar==0 ){ + pGCC->str.mxAlloc = 0; + sqlite3_free(pGCC->pnSepLengths); + pGCC->pnSepLengths = 0; + } + } +} +#else +# define groupConcatInverse 0 +#endif /* SQLITE_OMIT_WINDOWFUNC */ +static void groupConcatFinalize(sqlite3_context *context){ + GroupConcatCtx *pGCC + = (GroupConcatCtx*)sqlite3_aggregate_context(context, 0); + if( pGCC ){ + sqlite3ResultStrAccum(context, &pGCC->str); +#ifndef SQLITE_OMIT_WINDOWFUNC + sqlite3_free(pGCC->pnSepLengths); +#endif + } +} +#ifndef SQLITE_OMIT_WINDOWFUNC +static void groupConcatValue(sqlite3_context *context){ + GroupConcatCtx *pGCC + = (GroupConcatCtx*)sqlite3_aggregate_context(context, 0); + if( pGCC ){ + StrAccum *pAccum = &pGCC->str; + if( pAccum->accError==SQLITE_TOOBIG ){ + sqlite3_result_error_toobig(context); + }else if( pAccum->accError==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + }else{ + const char *zText = sqlite3_str_value(pAccum); + sqlite3_result_text(context, zText, pAccum->nChar, SQLITE_TRANSIENT); + } + } +} +#else +# define groupConcatValue 0 +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/* +** This routine does per-connection function registration. Most +** of the built-in functions above are part of the global function set. +** This routine only deals with those that are not global. +*/ +SQLITE_PRIVATE void sqlite3RegisterPerConnectionBuiltinFunctions(sqlite3 *db){ + int rc = sqlite3_overload_function(db, "MATCH", 2); + assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); + if( rc==SQLITE_NOMEM ){ + sqlite3OomFault(db); + } +} + +/* +** Re-register the built-in LIKE functions. The caseSensitive +** parameter determines whether or not the LIKE operator is case +** sensitive. +*/ +SQLITE_PRIVATE void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive){ + struct compareInfo *pInfo; + int flags; + if( caseSensitive ){ + pInfo = (struct compareInfo*)&likeInfoAlt; + flags = SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE; + }else{ + pInfo = (struct compareInfo*)&likeInfoNorm; + flags = SQLITE_FUNC_LIKE; + } + sqlite3CreateFunc(db, "like", 2, SQLITE_UTF8, pInfo, likeFunc, 0, 0, 0, 0, 0); + sqlite3CreateFunc(db, "like", 3, SQLITE_UTF8, pInfo, likeFunc, 0, 0, 0, 0, 0); + sqlite3FindFunction(db, "like", 2, SQLITE_UTF8, 0)->funcFlags |= flags; + sqlite3FindFunction(db, "like", 3, SQLITE_UTF8, 0)->funcFlags |= flags; +} + +/* +** pExpr points to an expression which implements a function. If +** it is appropriate to apply the LIKE optimization to that function +** then set aWc[0] through aWc[2] to the wildcard characters and the +** escape character and then return TRUE. If the function is not a +** LIKE-style function then return FALSE. +** +** The expression "a LIKE b ESCAPE c" is only considered a valid LIKE +** operator if c is a string literal that is exactly one byte in length. +** That one byte is stored in aWc[3]. aWc[3] is set to zero if there is +** no ESCAPE clause. +** +** *pIsNocase is set to true if uppercase and lowercase are equivalent for +** the function (default for LIKE). If the function makes the distinction +** between uppercase and lowercase (as does GLOB) then *pIsNocase is set to +** false. +*/ +SQLITE_PRIVATE int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){ + FuncDef *pDef; + int nExpr; + assert( pExpr!=0 ); + assert( pExpr->op==TK_FUNCTION ); + assert( ExprUseXList(pExpr) ); + if( !pExpr->x.pList ){ + return 0; + } + nExpr = pExpr->x.pList->nExpr; + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + pDef = sqlite3FindFunction(db, pExpr->u.zToken, nExpr, SQLITE_UTF8, 0); +#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION + if( pDef==0 ) return 0; +#endif + if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){ + return 0; + } + + /* The memcpy() statement assumes that the wildcard characters are + ** the first three statements in the compareInfo structure. The + ** asserts() that follow verify that assumption + */ + memcpy(aWc, pDef->pUserData, 3); + assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll ); + assert( &((char*)&likeInfoAlt)[1] == (char*)&likeInfoAlt.matchOne ); + assert( &((char*)&likeInfoAlt)[2] == (char*)&likeInfoAlt.matchSet ); + + if( nExpr<3 ){ + aWc[3] = 0; + }else{ + Expr *pEscape = pExpr->x.pList->a[2].pExpr; + char *zEscape; + if( pEscape->op!=TK_STRING ) return 0; + assert( !ExprHasProperty(pEscape, EP_IntValue) ); + zEscape = pEscape->u.zToken; + if( zEscape[0]==0 || zEscape[1]!=0 ) return 0; + if( zEscape[0]==aWc[0] ) return 0; + if( zEscape[0]==aWc[1] ) return 0; + aWc[3] = zEscape[0]; + } + + *pIsNocase = (pDef->funcFlags & SQLITE_FUNC_CASE)==0; + return 1; +} + +/* Mathematical Constants */ +#ifndef M_PI +# define M_PI 3.141592653589793238462643383279502884 +#endif +#ifndef M_LN10 +# define M_LN10 2.302585092994045684017991454684364208 +#endif +#ifndef M_LN2 +# define M_LN2 0.693147180559945309417232121458176568 +#endif + + +/* Extra math functions that require linking with -lm +*/ +#ifdef SQLITE_ENABLE_MATH_FUNCTIONS +/* +** Implementation SQL functions: +** +** ceil(X) +** ceiling(X) +** floor(X) +** +** The sqlite3_user_data() pointer is a pointer to the libm implementation +** of the underlying C function. +*/ +static void ceilingFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + assert( argc==1 ); + switch( sqlite3_value_numeric_type(argv[0]) ){ + case SQLITE_INTEGER: { + sqlite3_result_int64(context, sqlite3_value_int64(argv[0])); + break; + } + case SQLITE_FLOAT: { + double (*x)(double) = (double(*)(double))sqlite3_user_data(context); + sqlite3_result_double(context, x(sqlite3_value_double(argv[0]))); + break; + } + default: { + break; + } + } +} + +/* +** On some systems, ceil() and floor() are intrinsic function. You are +** unable to take a pointer to these functions. Hence, we here wrap them +** in our own actual functions. +*/ +static double xCeil(double x){ return ceil(x); } +static double xFloor(double x){ return floor(x); } + +/* +** Implementation of SQL functions: +** +** ln(X) - natural logarithm +** log(X) - log X base 10 +** log10(X) - log X base 10 +** log(B,X) - log X base B +*/ +static void logFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + double x, b, ans; + assert( argc==1 || argc==2 ); + switch( sqlite3_value_numeric_type(argv[0]) ){ + case SQLITE_INTEGER: + case SQLITE_FLOAT: + x = sqlite3_value_double(argv[0]); + if( x<=0.0 ) return; + break; + default: + return; + } + if( argc==2 ){ + switch( sqlite3_value_numeric_type(argv[0]) ){ + case SQLITE_INTEGER: + case SQLITE_FLOAT: + b = log(x); + if( b<=0.0 ) return; + x = sqlite3_value_double(argv[1]); + if( x<=0.0 ) return; + break; + default: + return; + } + ans = log(x)/b; + }else{ + ans = log(x); + switch( SQLITE_PTR_TO_INT(sqlite3_user_data(context)) ){ + case 1: + /* Convert from natural logarithm to log base 10 */ + ans *= 1.0/M_LN10; + break; + case 2: + /* Convert from natural logarithm to log base 2 */ + ans *= 1.0/M_LN2; + break; + default: + break; + } + } + sqlite3_result_double(context, ans); +} + +/* +** Functions to converts degrees to radians and radians to degrees. +*/ +static double degToRad(double x){ return x*(M_PI/180.0); } +static double radToDeg(double x){ return x*(180.0/M_PI); } + +/* +** Implementation of 1-argument SQL math functions: +** +** exp(X) - Compute e to the X-th power +*/ +static void math1Func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int type0; + double v0, ans; + double (*x)(double); + assert( argc==1 ); + type0 = sqlite3_value_numeric_type(argv[0]); + if( type0!=SQLITE_INTEGER && type0!=SQLITE_FLOAT ) return; + v0 = sqlite3_value_double(argv[0]); + x = (double(*)(double))sqlite3_user_data(context); + ans = x(v0); + sqlite3_result_double(context, ans); +} + +/* +** Implementation of 2-argument SQL math functions: +** +** power(X,Y) - Compute X to the Y-th power +*/ +static void math2Func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int type0, type1; + double v0, v1, ans; + double (*x)(double,double); + assert( argc==2 ); + type0 = sqlite3_value_numeric_type(argv[0]); + if( type0!=SQLITE_INTEGER && type0!=SQLITE_FLOAT ) return; + type1 = sqlite3_value_numeric_type(argv[1]); + if( type1!=SQLITE_INTEGER && type1!=SQLITE_FLOAT ) return; + v0 = sqlite3_value_double(argv[0]); + v1 = sqlite3_value_double(argv[1]); + x = (double(*)(double,double))sqlite3_user_data(context); + ans = x(v0, v1); + sqlite3_result_double(context, ans); +} + +/* +** Implementation of 0-argument pi() function. +*/ +static void piFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + assert( argc==0 ); + sqlite3_result_double(context, M_PI); +} + +#endif /* SQLITE_ENABLE_MATH_FUNCTIONS */ + +/* +** Implementation of sign(X) function. +*/ +static void signFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int type0; + double x; + UNUSED_PARAMETER(argc); + assert( argc==1 ); + type0 = sqlite3_value_numeric_type(argv[0]); + if( type0!=SQLITE_INTEGER && type0!=SQLITE_FLOAT ) return; + x = sqlite3_value_double(argv[0]); + sqlite3_result_int(context, x<0.0 ? -1 : x>0.0 ? +1 : 0); +} + +/* +** All of the FuncDef structures in the aBuiltinFunc[] array above +** to the global function hash table. This occurs at start-time (as +** a consequence of calling sqlite3_initialize()). +** +** After this routine runs +*/ +SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(void){ + /* + ** The following array holds FuncDef structures for all of the functions + ** defined in this file. + ** + ** The array cannot be constant since changes are made to the + ** FuncDef.pHash elements at start-time. The elements of this array + ** are read-only after initialization is complete. + ** + ** For peak efficiency, put the most frequently used function last. + */ + static FuncDef aBuiltinFunc[] = { +/***** Functions only available with SQLITE_TESTCTRL_INTERNAL_FUNCTIONS *****/ +#if !defined(SQLITE_UNTESTABLE) + TEST_FUNC(implies_nonnull_row, 2, INLINEFUNC_implies_nonnull_row, 0), + TEST_FUNC(expr_compare, 2, INLINEFUNC_expr_compare, 0), + TEST_FUNC(expr_implies_expr, 2, INLINEFUNC_expr_implies_expr, 0), + TEST_FUNC(affinity, 1, INLINEFUNC_affinity, 0), +#endif /* !defined(SQLITE_UNTESTABLE) */ +/***** Regular functions *****/ +#ifdef SQLITE_SOUNDEX + FUNCTION(soundex, 1, 0, 0, soundexFunc ), +#endif +#ifndef SQLITE_OMIT_LOAD_EXTENSION + SFUNCTION(load_extension, 1, 0, 0, loadExt ), + SFUNCTION(load_extension, 2, 0, 0, loadExt ), +#endif +#if SQLITE_USER_AUTHENTICATION + FUNCTION(sqlite_crypt, 2, 0, 0, sqlite3CryptFunc ), +#endif +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS + DFUNCTION(sqlite_compileoption_used,1, 0, 0, compileoptionusedFunc ), + DFUNCTION(sqlite_compileoption_get, 1, 0, 0, compileoptiongetFunc ), +#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + INLINE_FUNC(unlikely, 1, INLINEFUNC_unlikely, SQLITE_FUNC_UNLIKELY), + INLINE_FUNC(likelihood, 2, INLINEFUNC_unlikely, SQLITE_FUNC_UNLIKELY), + INLINE_FUNC(likely, 1, INLINEFUNC_unlikely, SQLITE_FUNC_UNLIKELY), +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC + {1, SQLITE_FUNC_BUILTIN|SQLITE_UTF8|SQLITE_FUNC_OFFSET|SQLITE_FUNC_TYPEOF, + 0, 0, noopFunc, 0, 0, 0, "sqlite_offset", {0} }, +#endif + FUNCTION(ltrim, 1, 1, 0, trimFunc ), + FUNCTION(ltrim, 2, 1, 0, trimFunc ), + FUNCTION(rtrim, 1, 2, 0, trimFunc ), + FUNCTION(rtrim, 2, 2, 0, trimFunc ), + FUNCTION(trim, 1, 3, 0, trimFunc ), + FUNCTION(trim, 2, 3, 0, trimFunc ), + FUNCTION(min, -1, 0, 1, minmaxFunc ), + FUNCTION(min, 0, 0, 1, 0 ), + WAGGREGATE(min, 1, 0, 1, minmaxStep, minMaxFinalize, minMaxValue, 0, + SQLITE_FUNC_MINMAX|SQLITE_FUNC_ANYORDER ), + FUNCTION(max, -1, 1, 1, minmaxFunc ), + FUNCTION(max, 0, 1, 1, 0 ), + WAGGREGATE(max, 1, 1, 1, minmaxStep, minMaxFinalize, minMaxValue, 0, + SQLITE_FUNC_MINMAX|SQLITE_FUNC_ANYORDER ), + FUNCTION2(typeof, 1, 0, 0, typeofFunc, SQLITE_FUNC_TYPEOF), + FUNCTION2(subtype, 1, 0, 0, subtypeFunc, SQLITE_FUNC_TYPEOF), + FUNCTION2(length, 1, 0, 0, lengthFunc, SQLITE_FUNC_LENGTH), + FUNCTION(instr, 2, 0, 0, instrFunc ), + FUNCTION(printf, -1, 0, 0, printfFunc ), + FUNCTION(format, -1, 0, 0, printfFunc ), + FUNCTION(unicode, 1, 0, 0, unicodeFunc ), + FUNCTION(char, -1, 0, 0, charFunc ), + FUNCTION(abs, 1, 0, 0, absFunc ), +#ifndef SQLITE_OMIT_FLOATING_POINT + FUNCTION(round, 1, 0, 0, roundFunc ), + FUNCTION(round, 2, 0, 0, roundFunc ), +#endif + FUNCTION(upper, 1, 0, 0, upperFunc ), + FUNCTION(lower, 1, 0, 0, lowerFunc ), + FUNCTION(hex, 1, 0, 0, hexFunc ), + INLINE_FUNC(ifnull, 2, INLINEFUNC_coalesce, 0 ), + VFUNCTION(random, 0, 0, 0, randomFunc ), + VFUNCTION(randomblob, 1, 0, 0, randomBlob ), + FUNCTION(nullif, 2, 0, 1, nullifFunc ), + DFUNCTION(sqlite_version, 0, 0, 0, versionFunc ), + DFUNCTION(sqlite_source_id, 0, 0, 0, sourceidFunc ), + FUNCTION(sqlite_log, 2, 0, 0, errlogFunc ), + FUNCTION(quote, 1, 0, 0, quoteFunc ), + VFUNCTION(last_insert_rowid, 0, 0, 0, last_insert_rowid), + VFUNCTION(changes, 0, 0, 0, changes ), + VFUNCTION(total_changes, 0, 0, 0, total_changes ), + FUNCTION(replace, 3, 0, 0, replaceFunc ), + FUNCTION(zeroblob, 1, 0, 0, zeroblobFunc ), + FUNCTION(substr, 2, 0, 0, substrFunc ), + FUNCTION(substr, 3, 0, 0, substrFunc ), + FUNCTION(substring, 2, 0, 0, substrFunc ), + FUNCTION(substring, 3, 0, 0, substrFunc ), + WAGGREGATE(sum, 1,0,0, sumStep, sumFinalize, sumFinalize, sumInverse, 0), + WAGGREGATE(total, 1,0,0, sumStep,totalFinalize,totalFinalize,sumInverse, 0), + WAGGREGATE(avg, 1,0,0, sumStep, avgFinalize, avgFinalize, sumInverse, 0), + WAGGREGATE(count, 0,0,0, countStep, + countFinalize, countFinalize, countInverse, + SQLITE_FUNC_COUNT|SQLITE_FUNC_ANYORDER ), + WAGGREGATE(count, 1,0,0, countStep, + countFinalize, countFinalize, countInverse, SQLITE_FUNC_ANYORDER ), + WAGGREGATE(group_concat, 1, 0, 0, groupConcatStep, + groupConcatFinalize, groupConcatValue, groupConcatInverse, 0), + WAGGREGATE(group_concat, 2, 0, 0, groupConcatStep, + groupConcatFinalize, groupConcatValue, groupConcatInverse, 0), + + LIKEFUNC(glob, 2, &globInfo, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE), +#ifdef SQLITE_CASE_SENSITIVE_LIKE + LIKEFUNC(like, 2, &likeInfoAlt, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE), + LIKEFUNC(like, 3, &likeInfoAlt, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE), +#else + LIKEFUNC(like, 2, &likeInfoNorm, SQLITE_FUNC_LIKE), + LIKEFUNC(like, 3, &likeInfoNorm, SQLITE_FUNC_LIKE), +#endif +#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION + FUNCTION(unknown, -1, 0, 0, unknownFunc ), +#endif + FUNCTION(coalesce, 1, 0, 0, 0 ), + FUNCTION(coalesce, 0, 0, 0, 0 ), +#ifdef SQLITE_ENABLE_MATH_FUNCTIONS + MFUNCTION(ceil, 1, xCeil, ceilingFunc ), + MFUNCTION(ceiling, 1, xCeil, ceilingFunc ), + MFUNCTION(floor, 1, xFloor, ceilingFunc ), +#if SQLITE_HAVE_C99_MATH_FUNCS + MFUNCTION(trunc, 1, trunc, ceilingFunc ), +#endif + FUNCTION(ln, 1, 0, 0, logFunc ), + FUNCTION(log, 1, 1, 0, logFunc ), + FUNCTION(log10, 1, 1, 0, logFunc ), + FUNCTION(log2, 1, 2, 0, logFunc ), + FUNCTION(log, 2, 0, 0, logFunc ), + MFUNCTION(exp, 1, exp, math1Func ), + MFUNCTION(pow, 2, pow, math2Func ), + MFUNCTION(power, 2, pow, math2Func ), + MFUNCTION(mod, 2, fmod, math2Func ), + MFUNCTION(acos, 1, acos, math1Func ), + MFUNCTION(asin, 1, asin, math1Func ), + MFUNCTION(atan, 1, atan, math1Func ), + MFUNCTION(atan2, 2, atan2, math2Func ), + MFUNCTION(cos, 1, cos, math1Func ), + MFUNCTION(sin, 1, sin, math1Func ), + MFUNCTION(tan, 1, tan, math1Func ), + MFUNCTION(cosh, 1, cosh, math1Func ), + MFUNCTION(sinh, 1, sinh, math1Func ), + MFUNCTION(tanh, 1, tanh, math1Func ), +#if SQLITE_HAVE_C99_MATH_FUNCS + MFUNCTION(acosh, 1, acosh, math1Func ), + MFUNCTION(asinh, 1, asinh, math1Func ), + MFUNCTION(atanh, 1, atanh, math1Func ), +#endif + MFUNCTION(sqrt, 1, sqrt, math1Func ), + MFUNCTION(radians, 1, degToRad, math1Func ), + MFUNCTION(degrees, 1, radToDeg, math1Func ), + FUNCTION(pi, 0, 0, 0, piFunc ), +#endif /* SQLITE_ENABLE_MATH_FUNCTIONS */ + FUNCTION(sign, 1, 0, 0, signFunc ), + INLINE_FUNC(coalesce, -1, INLINEFUNC_coalesce, 0 ), + INLINE_FUNC(iif, 3, INLINEFUNC_iif, 0 ), + }; +#ifndef SQLITE_OMIT_ALTERTABLE + sqlite3AlterFunctions(); +#endif + sqlite3WindowFunctions(); + sqlite3RegisterDateTimeFunctions(); + sqlite3RegisterJsonFunctions(); + sqlite3InsertBuiltinFuncs(aBuiltinFunc, ArraySize(aBuiltinFunc)); + +#if 0 /* Enable to print out how the built-in functions are hashed */ + { + int i; + FuncDef *p; + for(i=0; iu.pHash){ + int n = sqlite3Strlen30(p->zName); + int h = p->zName[0] + n; + assert( p->funcFlags & SQLITE_FUNC_BUILTIN ); + printf(" %s(%d)", p->zName, h); + } + printf("\n"); + } + } +#endif +} + +/************** End of func.c ************************************************/ +/************** Begin file fkey.c ********************************************/ +/* +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used by the compiler to add foreign key +** support to compiled SQL statements. +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_FOREIGN_KEY +#ifndef SQLITE_OMIT_TRIGGER + +/* +** Deferred and Immediate FKs +** -------------------------- +** +** Foreign keys in SQLite come in two flavours: deferred and immediate. +** If an immediate foreign key constraint is violated, +** SQLITE_CONSTRAINT_FOREIGNKEY is returned and the current +** statement transaction rolled back. If a +** deferred foreign key constraint is violated, no action is taken +** immediately. However if the application attempts to commit the +** transaction before fixing the constraint violation, the attempt fails. +** +** Deferred constraints are implemented using a simple counter associated +** with the database handle. The counter is set to zero each time a +** database transaction is opened. Each time a statement is executed +** that causes a foreign key violation, the counter is incremented. Each +** time a statement is executed that removes an existing violation from +** the database, the counter is decremented. When the transaction is +** committed, the commit fails if the current value of the counter is +** greater than zero. This scheme has two big drawbacks: +** +** * When a commit fails due to a deferred foreign key constraint, +** there is no way to tell which foreign constraint is not satisfied, +** or which row it is not satisfied for. +** +** * If the database contains foreign key violations when the +** transaction is opened, this may cause the mechanism to malfunction. +** +** Despite these problems, this approach is adopted as it seems simpler +** than the alternatives. +** +** INSERT operations: +** +** I.1) For each FK for which the table is the child table, search +** the parent table for a match. If none is found increment the +** constraint counter. +** +** I.2) For each FK for which the table is the parent table, +** search the child table for rows that correspond to the new +** row in the parent table. Decrement the counter for each row +** found (as the constraint is now satisfied). +** +** DELETE operations: +** +** D.1) For each FK for which the table is the child table, +** search the parent table for a row that corresponds to the +** deleted row in the child table. If such a row is not found, +** decrement the counter. +** +** D.2) For each FK for which the table is the parent table, search +** the child table for rows that correspond to the deleted row +** in the parent table. For each found increment the counter. +** +** UPDATE operations: +** +** An UPDATE command requires that all 4 steps above are taken, but only +** for FK constraints for which the affected columns are actually +** modified (values must be compared at runtime). +** +** Note that I.1 and D.1 are very similar operations, as are I.2 and D.2. +** This simplifies the implementation a bit. +** +** For the purposes of immediate FK constraints, the OR REPLACE conflict +** resolution is considered to delete rows before the new row is inserted. +** If a delete caused by OR REPLACE violates an FK constraint, an exception +** is thrown, even if the FK constraint would be satisfied after the new +** row is inserted. +** +** Immediate constraints are usually handled similarly. The only difference +** is that the counter used is stored as part of each individual statement +** object (struct Vdbe). If, after the statement has run, its immediate +** constraint counter is greater than zero, +** it returns SQLITE_CONSTRAINT_FOREIGNKEY +** and the statement transaction is rolled back. An exception is an INSERT +** statement that inserts a single row only (no triggers). In this case, +** instead of using a counter, an exception is thrown immediately if the +** INSERT violates a foreign key constraint. This is necessary as such +** an INSERT does not open a statement transaction. +** +** TODO: How should dropping a table be handled? How should renaming a +** table be handled? +** +** +** Query API Notes +** --------------- +** +** Before coding an UPDATE or DELETE row operation, the code-generator +** for those two operations needs to know whether or not the operation +** requires any FK processing and, if so, which columns of the original +** row are required by the FK processing VDBE code (i.e. if FKs were +** implemented using triggers, which of the old.* columns would be +** accessed). No information is required by the code-generator before +** coding an INSERT operation. The functions used by the UPDATE/DELETE +** generation code to query for this information are: +** +** sqlite3FkRequired() - Test to see if FK processing is required. +** sqlite3FkOldmask() - Query for the set of required old.* columns. +** +** +** Externally accessible module functions +** -------------------------------------- +** +** sqlite3FkCheck() - Check for foreign key violations. +** sqlite3FkActions() - Code triggers for ON UPDATE/ON DELETE actions. +** sqlite3FkDelete() - Delete an FKey structure. +*/ + +/* +** VDBE Calling Convention +** ----------------------- +** +** Example: +** +** For the following INSERT statement: +** +** CREATE TABLE t1(a, b INTEGER PRIMARY KEY, c); +** INSERT INTO t1 VALUES(1, 2, 3.1); +** +** Register (x): 2 (type integer) +** Register (x+1): 1 (type integer) +** Register (x+2): NULL (type NULL) +** Register (x+3): 3.1 (type real) +*/ + +/* +** A foreign key constraint requires that the key columns in the parent +** table are collectively subject to a UNIQUE or PRIMARY KEY constraint. +** Given that pParent is the parent table for foreign key constraint pFKey, +** search the schema for a unique index on the parent key columns. +** +** If successful, zero is returned. If the parent key is an INTEGER PRIMARY +** KEY column, then output variable *ppIdx is set to NULL. Otherwise, *ppIdx +** is set to point to the unique index. +** +** If the parent key consists of a single column (the foreign key constraint +** is not a composite foreign key), output variable *paiCol is set to NULL. +** Otherwise, it is set to point to an allocated array of size N, where +** N is the number of columns in the parent key. The first element of the +** array is the index of the child table column that is mapped by the FK +** constraint to the parent table column stored in the left-most column +** of index *ppIdx. The second element of the array is the index of the +** child table column that corresponds to the second left-most column of +** *ppIdx, and so on. +** +** If the required index cannot be found, either because: +** +** 1) The named parent key columns do not exist, or +** +** 2) The named parent key columns do exist, but are not subject to a +** UNIQUE or PRIMARY KEY constraint, or +** +** 3) No parent key columns were provided explicitly as part of the +** foreign key definition, and the parent table does not have a +** PRIMARY KEY, or +** +** 4) No parent key columns were provided explicitly as part of the +** foreign key definition, and the PRIMARY KEY of the parent table +** consists of a different number of columns to the child key in +** the child table. +** +** then non-zero is returned, and a "foreign key mismatch" error loaded +** into pParse. If an OOM error occurs, non-zero is returned and the +** pParse->db->mallocFailed flag is set. +*/ +SQLITE_PRIVATE int sqlite3FkLocateIndex( + Parse *pParse, /* Parse context to store any error in */ + Table *pParent, /* Parent table of FK constraint pFKey */ + FKey *pFKey, /* Foreign key to find index for */ + Index **ppIdx, /* OUT: Unique index on parent table */ + int **paiCol /* OUT: Map of index columns in pFKey */ +){ + Index *pIdx = 0; /* Value to return via *ppIdx */ + int *aiCol = 0; /* Value to return via *paiCol */ + int nCol = pFKey->nCol; /* Number of columns in parent key */ + char *zKey = pFKey->aCol[0].zCol; /* Name of left-most parent key column */ + + /* The caller is responsible for zeroing output parameters. */ + assert( ppIdx && *ppIdx==0 ); + assert( !paiCol || *paiCol==0 ); + assert( pParse ); + + /* If this is a non-composite (single column) foreign key, check if it + ** maps to the INTEGER PRIMARY KEY of table pParent. If so, leave *ppIdx + ** and *paiCol set to zero and return early. + ** + ** Otherwise, for a composite foreign key (more than one column), allocate + ** space for the aiCol array (returned via output parameter *paiCol). + ** Non-composite foreign keys do not require the aiCol array. + */ + if( nCol==1 ){ + /* The FK maps to the IPK if any of the following are true: + ** + ** 1) There is an INTEGER PRIMARY KEY column and the FK is implicitly + ** mapped to the primary key of table pParent, or + ** 2) The FK is explicitly mapped to a column declared as INTEGER + ** PRIMARY KEY. + */ + if( pParent->iPKey>=0 ){ + if( !zKey ) return 0; + if( !sqlite3StrICmp(pParent->aCol[pParent->iPKey].zCnName, zKey) ){ + return 0; + } + } + }else if( paiCol ){ + assert( nCol>1 ); + aiCol = (int *)sqlite3DbMallocRawNN(pParse->db, nCol*sizeof(int)); + if( !aiCol ) return 1; + *paiCol = aiCol; + } + + for(pIdx=pParent->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->nKeyCol==nCol && IsUniqueIndex(pIdx) && pIdx->pPartIdxWhere==0 ){ + /* pIdx is a UNIQUE index (or a PRIMARY KEY) and has the right number + ** of columns. If each indexed column corresponds to a foreign key + ** column of pFKey, then this index is a winner. */ + + if( zKey==0 ){ + /* If zKey is NULL, then this foreign key is implicitly mapped to + ** the PRIMARY KEY of table pParent. The PRIMARY KEY index may be + ** identified by the test. */ + if( IsPrimaryKeyIndex(pIdx) ){ + if( aiCol ){ + int i; + for(i=0; iaCol[i].iFrom; + } + break; + } + }else{ + /* If zKey is non-NULL, then this foreign key was declared to + ** map to an explicit list of columns in table pParent. Check if this + ** index matches those columns. Also, check that the index uses + ** the default collation sequences for each column. */ + int i, j; + for(i=0; iaiColumn[i]; /* Index of column in parent tbl */ + const char *zDfltColl; /* Def. collation for column */ + char *zIdxCol; /* Name of indexed column */ + + if( iCol<0 ) break; /* No foreign keys against expression indexes */ + + /* If the index uses a collation sequence that is different from + ** the default collation sequence for the column, this index is + ** unusable. Bail out early in this case. */ + zDfltColl = sqlite3ColumnColl(&pParent->aCol[iCol]); + if( !zDfltColl ) zDfltColl = sqlite3StrBINARY; + if( sqlite3StrICmp(pIdx->azColl[i], zDfltColl) ) break; + + zIdxCol = pParent->aCol[iCol].zCnName; + for(j=0; jaCol[j].zCol, zIdxCol)==0 ){ + if( aiCol ) aiCol[i] = pFKey->aCol[j].iFrom; + break; + } + } + if( j==nCol ) break; + } + if( i==nCol ) break; /* pIdx is usable */ + } + } + } + + if( !pIdx ){ + if( !pParse->disableTriggers ){ + sqlite3ErrorMsg(pParse, + "foreign key mismatch - \"%w\" referencing \"%w\"", + pFKey->pFrom->zName, pFKey->zTo); + } + sqlite3DbFree(pParse->db, aiCol); + return 1; + } + + *ppIdx = pIdx; + return 0; +} + +/* +** This function is called when a row is inserted into or deleted from the +** child table of foreign key constraint pFKey. If an SQL UPDATE is executed +** on the child table of pFKey, this function is invoked twice for each row +** affected - once to "delete" the old row, and then again to "insert" the +** new row. +** +** Each time it is called, this function generates VDBE code to locate the +** row in the parent table that corresponds to the row being inserted into +** or deleted from the child table. If the parent row can be found, no +** special action is taken. Otherwise, if the parent row can *not* be +** found in the parent table: +** +** Operation | FK type | Action taken +** -------------------------------------------------------------------------- +** INSERT immediate Increment the "immediate constraint counter". +** +** DELETE immediate Decrement the "immediate constraint counter". +** +** INSERT deferred Increment the "deferred constraint counter". +** +** DELETE deferred Decrement the "deferred constraint counter". +** +** These operations are identified in the comment at the top of this file +** (fkey.c) as "I.1" and "D.1". +*/ +static void fkLookupParent( + Parse *pParse, /* Parse context */ + int iDb, /* Index of database housing pTab */ + Table *pTab, /* Parent table of FK pFKey */ + Index *pIdx, /* Unique index on parent key columns in pTab */ + FKey *pFKey, /* Foreign key constraint */ + int *aiCol, /* Map from parent key columns to child table columns */ + int regData, /* Address of array containing child table row */ + int nIncr, /* Increment constraint counter by this */ + int isIgnore /* If true, pretend pTab contains all NULL values */ +){ + int i; /* Iterator variable */ + Vdbe *v = sqlite3GetVdbe(pParse); /* Vdbe to add code to */ + int iCur = pParse->nTab - 1; /* Cursor number to use */ + int iOk = sqlite3VdbeMakeLabel(pParse); /* jump here if parent key found */ + + sqlite3VdbeVerifyAbortable(v, + (!pFKey->isDeferred + && !(pParse->db->flags & SQLITE_DeferFKs) + && !pParse->pToplevel + && !pParse->isMultiWrite) ? OE_Abort : OE_Ignore); + + /* If nIncr is less than zero, then check at runtime if there are any + ** outstanding constraints to resolve. If there are not, there is no need + ** to check if deleting this row resolves any outstanding violations. + ** + ** Check if any of the key columns in the child table row are NULL. If + ** any are, then the constraint is considered satisfied. No need to + ** search for a matching row in the parent table. */ + if( nIncr<0 ){ + sqlite3VdbeAddOp2(v, OP_FkIfZero, pFKey->isDeferred, iOk); + VdbeCoverage(v); + } + for(i=0; inCol; i++){ + int iReg = sqlite3TableColumnToStorage(pFKey->pFrom,aiCol[i]) + regData + 1; + sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iOk); VdbeCoverage(v); + } + + if( isIgnore==0 ){ + if( pIdx==0 ){ + /* If pIdx is NULL, then the parent key is the INTEGER PRIMARY KEY + ** column of the parent table (table pTab). */ + int iMustBeInt; /* Address of MustBeInt instruction */ + int regTemp = sqlite3GetTempReg(pParse); + + /* Invoke MustBeInt to coerce the child key value to an integer (i.e. + ** apply the affinity of the parent key). If this fails, then there + ** is no matching parent key. Before using MustBeInt, make a copy of + ** the value. Otherwise, the value inserted into the child key column + ** will have INTEGER affinity applied to it, which may not be correct. */ + sqlite3VdbeAddOp2(v, OP_SCopy, + sqlite3TableColumnToStorage(pFKey->pFrom,aiCol[0])+1+regData, regTemp); + iMustBeInt = sqlite3VdbeAddOp2(v, OP_MustBeInt, regTemp, 0); + VdbeCoverage(v); + + /* If the parent table is the same as the child table, and we are about + ** to increment the constraint-counter (i.e. this is an INSERT operation), + ** then check if the row being inserted matches itself. If so, do not + ** increment the constraint-counter. */ + if( pTab==pFKey->pFrom && nIncr==1 ){ + sqlite3VdbeAddOp3(v, OP_Eq, regData, iOk, regTemp); VdbeCoverage(v); + sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); + } + + sqlite3OpenTable(pParse, iCur, iDb, pTab, OP_OpenRead); + sqlite3VdbeAddOp3(v, OP_NotExists, iCur, 0, regTemp); VdbeCoverage(v); + sqlite3VdbeGoto(v, iOk); + sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-2); + sqlite3VdbeJumpHere(v, iMustBeInt); + sqlite3ReleaseTempReg(pParse, regTemp); + }else{ + int nCol = pFKey->nCol; + int regTemp = sqlite3GetTempRange(pParse, nCol); + int regRec = sqlite3GetTempReg(pParse); + + sqlite3VdbeAddOp3(v, OP_OpenRead, iCur, pIdx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + for(i=0; ipFrom, aiCol[i])+1+regData, + regTemp+i); + } + + /* If the parent table is the same as the child table, and we are about + ** to increment the constraint-counter (i.e. this is an INSERT operation), + ** then check if the row being inserted matches itself. If so, do not + ** increment the constraint-counter. + ** + ** If any of the parent-key values are NULL, then the row cannot match + ** itself. So set JUMPIFNULL to make sure we do the OP_Found if any + ** of the parent-key values are NULL (at this point it is known that + ** none of the child key values are). + */ + if( pTab==pFKey->pFrom && nIncr==1 ){ + int iJump = sqlite3VdbeCurrentAddr(v) + nCol + 1; + for(i=0; ipFrom,aiCol[i]) + +1+regData; + int iParent = 1+regData; + iParent += sqlite3TableColumnToStorage(pIdx->pTable, + pIdx->aiColumn[i]); + assert( pIdx->aiColumn[i]>=0 ); + assert( aiCol[i]!=pTab->iPKey ); + if( pIdx->aiColumn[i]==pTab->iPKey ){ + /* The parent key is a composite key that includes the IPK column */ + iParent = regData; + } + sqlite3VdbeAddOp3(v, OP_Ne, iChild, iJump, iParent); VdbeCoverage(v); + sqlite3VdbeChangeP5(v, SQLITE_JUMPIFNULL); + } + sqlite3VdbeGoto(v, iOk); + } + + sqlite3VdbeAddOp4(v, OP_MakeRecord, regTemp, nCol, regRec, + sqlite3IndexAffinityStr(pParse->db,pIdx), nCol); + sqlite3VdbeAddOp4Int(v, OP_Found, iCur, iOk, regRec, 0); VdbeCoverage(v); + + sqlite3ReleaseTempReg(pParse, regRec); + sqlite3ReleaseTempRange(pParse, regTemp, nCol); + } + } + + if( !pFKey->isDeferred && !(pParse->db->flags & SQLITE_DeferFKs) + && !pParse->pToplevel + && !pParse->isMultiWrite + ){ + /* Special case: If this is an INSERT statement that will insert exactly + ** one row into the table, raise a constraint immediately instead of + ** incrementing a counter. This is necessary as the VM code is being + ** generated for will not open a statement transaction. */ + assert( nIncr==1 ); + sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_FOREIGNKEY, + OE_Abort, 0, P4_STATIC, P5_ConstraintFK); + }else{ + if( nIncr>0 && pFKey->isDeferred==0 ){ + sqlite3MayAbort(pParse); + } + sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr); + } + + sqlite3VdbeResolveLabel(v, iOk); + sqlite3VdbeAddOp1(v, OP_Close, iCur); +} + + +/* +** Return an Expr object that refers to a memory register corresponding +** to column iCol of table pTab. +** +** regBase is the first of an array of register that contains the data +** for pTab. regBase itself holds the rowid. regBase+1 holds the first +** column. regBase+2 holds the second column, and so forth. +*/ +static Expr *exprTableRegister( + Parse *pParse, /* Parsing and code generating context */ + Table *pTab, /* The table whose content is at r[regBase]... */ + int regBase, /* Contents of table pTab */ + i16 iCol /* Which column of pTab is desired */ +){ + Expr *pExpr; + Column *pCol; + const char *zColl; + sqlite3 *db = pParse->db; + + pExpr = sqlite3Expr(db, TK_REGISTER, 0); + if( pExpr ){ + if( iCol>=0 && iCol!=pTab->iPKey ){ + pCol = &pTab->aCol[iCol]; + pExpr->iTable = regBase + sqlite3TableColumnToStorage(pTab,iCol) + 1; + pExpr->affExpr = pCol->affinity; + zColl = sqlite3ColumnColl(pCol); + if( zColl==0 ) zColl = db->pDfltColl->zName; + pExpr = sqlite3ExprAddCollateString(pParse, pExpr, zColl); + }else{ + pExpr->iTable = regBase; + pExpr->affExpr = SQLITE_AFF_INTEGER; + } + } + return pExpr; +} + +/* +** Return an Expr object that refers to column iCol of table pTab which +** has cursor iCur. +*/ +static Expr *exprTableColumn( + sqlite3 *db, /* The database connection */ + Table *pTab, /* The table whose column is desired */ + int iCursor, /* The open cursor on the table */ + i16 iCol /* The column that is wanted */ +){ + Expr *pExpr = sqlite3Expr(db, TK_COLUMN, 0); + if( pExpr ){ + assert( ExprUseYTab(pExpr) ); + pExpr->y.pTab = pTab; + pExpr->iTable = iCursor; + pExpr->iColumn = iCol; + } + return pExpr; +} + +/* +** This function is called to generate code executed when a row is deleted +** from the parent table of foreign key constraint pFKey and, if pFKey is +** deferred, when a row is inserted into the same table. When generating +** code for an SQL UPDATE operation, this function may be called twice - +** once to "delete" the old row and once to "insert" the new row. +** +** Parameter nIncr is passed -1 when inserting a row (as this may decrease +** the number of FK violations in the db) or +1 when deleting one (as this +** may increase the number of FK constraint problems). +** +** The code generated by this function scans through the rows in the child +** table that correspond to the parent table row being deleted or inserted. +** For each child row found, one of the following actions is taken: +** +** Operation | FK type | Action taken +** -------------------------------------------------------------------------- +** DELETE immediate Increment the "immediate constraint counter". +** Or, if the ON (UPDATE|DELETE) action is RESTRICT, +** throw a "FOREIGN KEY constraint failed" exception. +** +** INSERT immediate Decrement the "immediate constraint counter". +** +** DELETE deferred Increment the "deferred constraint counter". +** Or, if the ON (UPDATE|DELETE) action is RESTRICT, +** throw a "FOREIGN KEY constraint failed" exception. +** +** INSERT deferred Decrement the "deferred constraint counter". +** +** These operations are identified in the comment at the top of this file +** (fkey.c) as "I.2" and "D.2". +*/ +static void fkScanChildren( + Parse *pParse, /* Parse context */ + SrcList *pSrc, /* The child table to be scanned */ + Table *pTab, /* The parent table */ + Index *pIdx, /* Index on parent covering the foreign key */ + FKey *pFKey, /* The foreign key linking pSrc to pTab */ + int *aiCol, /* Map from pIdx cols to child table cols */ + int regData, /* Parent row data starts here */ + int nIncr /* Amount to increment deferred counter by */ +){ + sqlite3 *db = pParse->db; /* Database handle */ + int i; /* Iterator variable */ + Expr *pWhere = 0; /* WHERE clause to scan with */ + NameContext sNameContext; /* Context used to resolve WHERE clause */ + WhereInfo *pWInfo; /* Context used by sqlite3WhereXXX() */ + int iFkIfZero = 0; /* Address of OP_FkIfZero */ + Vdbe *v = sqlite3GetVdbe(pParse); + + assert( pIdx==0 || pIdx->pTable==pTab ); + assert( pIdx==0 || pIdx->nKeyCol==pFKey->nCol ); + assert( pIdx!=0 || pFKey->nCol==1 ); + assert( pIdx!=0 || HasRowid(pTab) ); + + if( nIncr<0 ){ + iFkIfZero = sqlite3VdbeAddOp2(v, OP_FkIfZero, pFKey->isDeferred, 0); + VdbeCoverage(v); + } + + /* Create an Expr object representing an SQL expression like: + ** + ** = AND = ... + ** + ** The collation sequence used for the comparison should be that of + ** the parent key columns. The affinity of the parent key column should + ** be applied to each child key value before the comparison takes place. + */ + for(i=0; inCol; i++){ + Expr *pLeft; /* Value from parent table row */ + Expr *pRight; /* Column ref to child table */ + Expr *pEq; /* Expression (pLeft = pRight) */ + i16 iCol; /* Index of column in child table */ + const char *zCol; /* Name of column in child table */ + + iCol = pIdx ? pIdx->aiColumn[i] : -1; + pLeft = exprTableRegister(pParse, pTab, regData, iCol); + iCol = aiCol ? aiCol[i] : pFKey->aCol[0].iFrom; + assert( iCol>=0 ); + zCol = pFKey->pFrom->aCol[iCol].zCnName; + pRight = sqlite3Expr(db, TK_ID, zCol); + pEq = sqlite3PExpr(pParse, TK_EQ, pLeft, pRight); + pWhere = sqlite3ExprAnd(pParse, pWhere, pEq); + } + + /* If the child table is the same as the parent table, then add terms + ** to the WHERE clause that prevent this entry from being scanned. + ** The added WHERE clause terms are like this: + ** + ** $current_rowid!=rowid + ** NOT( $current_a==a AND $current_b==b AND ... ) + ** + ** The first form is used for rowid tables. The second form is used + ** for WITHOUT ROWID tables. In the second form, the *parent* key is + ** (a,b,...). Either the parent or primary key could be used to + ** uniquely identify the current row, but the parent key is more convenient + ** as the required values have already been loaded into registers + ** by the caller. + */ + if( pTab==pFKey->pFrom && nIncr>0 ){ + Expr *pNe; /* Expression (pLeft != pRight) */ + Expr *pLeft; /* Value from parent table row */ + Expr *pRight; /* Column ref to child table */ + if( HasRowid(pTab) ){ + pLeft = exprTableRegister(pParse, pTab, regData, -1); + pRight = exprTableColumn(db, pTab, pSrc->a[0].iCursor, -1); + pNe = sqlite3PExpr(pParse, TK_NE, pLeft, pRight); + }else{ + Expr *pEq, *pAll = 0; + assert( pIdx!=0 ); + for(i=0; inKeyCol; i++){ + i16 iCol = pIdx->aiColumn[i]; + assert( iCol>=0 ); + pLeft = exprTableRegister(pParse, pTab, regData, iCol); + pRight = sqlite3Expr(db, TK_ID, pTab->aCol[iCol].zCnName); + pEq = sqlite3PExpr(pParse, TK_IS, pLeft, pRight); + pAll = sqlite3ExprAnd(pParse, pAll, pEq); + } + pNe = sqlite3PExpr(pParse, TK_NOT, pAll, 0); + } + pWhere = sqlite3ExprAnd(pParse, pWhere, pNe); + } + + /* Resolve the references in the WHERE clause. */ + memset(&sNameContext, 0, sizeof(NameContext)); + sNameContext.pSrcList = pSrc; + sNameContext.pParse = pParse; + sqlite3ResolveExprNames(&sNameContext, pWhere); + + /* Create VDBE to loop through the entries in pSrc that match the WHERE + ** clause. For each row found, increment either the deferred or immediate + ** foreign key constraint counter. */ + if( pParse->nErr==0 ){ + pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0, 0, 0); + sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr); + if( pWInfo ){ + sqlite3WhereEnd(pWInfo); + } + } + + /* Clean up the WHERE clause constructed above. */ + sqlite3ExprDelete(db, pWhere); + if( iFkIfZero ){ + sqlite3VdbeJumpHereOrPopInst(v, iFkIfZero); + } +} + +/* +** This function returns a linked list of FKey objects (connected by +** FKey.pNextTo) holding all children of table pTab. For example, +** given the following schema: +** +** CREATE TABLE t1(a PRIMARY KEY); +** CREATE TABLE t2(b REFERENCES t1(a); +** +** Calling this function with table "t1" as an argument returns a pointer +** to the FKey structure representing the foreign key constraint on table +** "t2". Calling this function with "t2" as the argument would return a +** NULL pointer (as there are no FK constraints for which t2 is the parent +** table). +*/ +SQLITE_PRIVATE FKey *sqlite3FkReferences(Table *pTab){ + return (FKey *)sqlite3HashFind(&pTab->pSchema->fkeyHash, pTab->zName); +} + +/* +** The second argument is a Trigger structure allocated by the +** fkActionTrigger() routine. This function deletes the Trigger structure +** and all of its sub-components. +** +** The Trigger structure or any of its sub-components may be allocated from +** the lookaside buffer belonging to database handle dbMem. +*/ +static void fkTriggerDelete(sqlite3 *dbMem, Trigger *p){ + if( p ){ + TriggerStep *pStep = p->step_list; + sqlite3ExprDelete(dbMem, pStep->pWhere); + sqlite3ExprListDelete(dbMem, pStep->pExprList); + sqlite3SelectDelete(dbMem, pStep->pSelect); + sqlite3ExprDelete(dbMem, p->pWhen); + sqlite3DbFree(dbMem, p); + } +} + +/* +** Clear the apTrigger[] cache of CASCADE triggers for all foreign keys +** in a particular database. This needs to happen when the schema +** changes. +*/ +SQLITE_PRIVATE void sqlite3FkClearTriggerCache(sqlite3 *db, int iDb){ + HashElem *k; + Hash *pHash = &db->aDb[iDb].pSchema->tblHash; + for(k=sqliteHashFirst(pHash); k; k=sqliteHashNext(k)){ + Table *pTab = sqliteHashData(k); + FKey *pFKey; + if( !IsOrdinaryTable(pTab) ) continue; + for(pFKey=pTab->u.tab.pFKey; pFKey; pFKey=pFKey->pNextFrom){ + fkTriggerDelete(db, pFKey->apTrigger[0]); pFKey->apTrigger[0] = 0; + fkTriggerDelete(db, pFKey->apTrigger[1]); pFKey->apTrigger[1] = 0; + } + } +} + +/* +** This function is called to generate code that runs when table pTab is +** being dropped from the database. The SrcList passed as the second argument +** to this function contains a single entry guaranteed to resolve to +** table pTab. +** +** Normally, no code is required. However, if either +** +** (a) The table is the parent table of a FK constraint, or +** (b) The table is the child table of a deferred FK constraint and it is +** determined at runtime that there are outstanding deferred FK +** constraint violations in the database, +** +** then the equivalent of "DELETE FROM " is executed before dropping +** the table from the database. Triggers are disabled while running this +** DELETE, but foreign key actions are not. +*/ +SQLITE_PRIVATE void sqlite3FkDropTable(Parse *pParse, SrcList *pName, Table *pTab){ + sqlite3 *db = pParse->db; + if( (db->flags&SQLITE_ForeignKeys) && IsOrdinaryTable(pTab) ){ + int iSkip = 0; + Vdbe *v = sqlite3GetVdbe(pParse); + + assert( v ); /* VDBE has already been allocated */ + assert( IsOrdinaryTable(pTab) ); + if( sqlite3FkReferences(pTab)==0 ){ + /* Search for a deferred foreign key constraint for which this table + ** is the child table. If one cannot be found, return without + ** generating any VDBE code. If one can be found, then jump over + ** the entire DELETE if there are no outstanding deferred constraints + ** when this statement is run. */ + FKey *p; + for(p=pTab->u.tab.pFKey; p; p=p->pNextFrom){ + if( p->isDeferred || (db->flags & SQLITE_DeferFKs) ) break; + } + if( !p ) return; + iSkip = sqlite3VdbeMakeLabel(pParse); + sqlite3VdbeAddOp2(v, OP_FkIfZero, 1, iSkip); VdbeCoverage(v); + } + + pParse->disableTriggers = 1; + sqlite3DeleteFrom(pParse, sqlite3SrcListDup(db, pName, 0), 0, 0, 0); + pParse->disableTriggers = 0; + + /* If the DELETE has generated immediate foreign key constraint + ** violations, halt the VDBE and return an error at this point, before + ** any modifications to the schema are made. This is because statement + ** transactions are not able to rollback schema changes. + ** + ** If the SQLITE_DeferFKs flag is set, then this is not required, as + ** the statement transaction will not be rolled back even if FK + ** constraints are violated. + */ + if( (db->flags & SQLITE_DeferFKs)==0 ){ + sqlite3VdbeVerifyAbortable(v, OE_Abort); + sqlite3VdbeAddOp2(v, OP_FkIfZero, 0, sqlite3VdbeCurrentAddr(v)+2); + VdbeCoverage(v); + sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_FOREIGNKEY, + OE_Abort, 0, P4_STATIC, P5_ConstraintFK); + } + + if( iSkip ){ + sqlite3VdbeResolveLabel(v, iSkip); + } + } +} + + +/* +** The second argument points to an FKey object representing a foreign key +** for which pTab is the child table. An UPDATE statement against pTab +** is currently being processed. For each column of the table that is +** actually updated, the corresponding element in the aChange[] array +** is zero or greater (if a column is unmodified the corresponding element +** is set to -1). If the rowid column is modified by the UPDATE statement +** the bChngRowid argument is non-zero. +** +** This function returns true if any of the columns that are part of the +** child key for FK constraint *p are modified. +*/ +static int fkChildIsModified( + Table *pTab, /* Table being updated */ + FKey *p, /* Foreign key for which pTab is the child */ + int *aChange, /* Array indicating modified columns */ + int bChngRowid /* True if rowid is modified by this update */ +){ + int i; + for(i=0; inCol; i++){ + int iChildKey = p->aCol[i].iFrom; + if( aChange[iChildKey]>=0 ) return 1; + if( iChildKey==pTab->iPKey && bChngRowid ) return 1; + } + return 0; +} + +/* +** The second argument points to an FKey object representing a foreign key +** for which pTab is the parent table. An UPDATE statement against pTab +** is currently being processed. For each column of the table that is +** actually updated, the corresponding element in the aChange[] array +** is zero or greater (if a column is unmodified the corresponding element +** is set to -1). If the rowid column is modified by the UPDATE statement +** the bChngRowid argument is non-zero. +** +** This function returns true if any of the columns that are part of the +** parent key for FK constraint *p are modified. +*/ +static int fkParentIsModified( + Table *pTab, + FKey *p, + int *aChange, + int bChngRowid +){ + int i; + for(i=0; inCol; i++){ + char *zKey = p->aCol[i].zCol; + int iKey; + for(iKey=0; iKeynCol; iKey++){ + if( aChange[iKey]>=0 || (iKey==pTab->iPKey && bChngRowid) ){ + Column *pCol = &pTab->aCol[iKey]; + if( zKey ){ + if( 0==sqlite3StrICmp(pCol->zCnName, zKey) ) return 1; + }else if( pCol->colFlags & COLFLAG_PRIMKEY ){ + return 1; + } + } + } + } + return 0; +} + +/* +** Return true if the parser passed as the first argument is being +** used to code a trigger that is really a "SET NULL" action belonging +** to trigger pFKey. +*/ +static int isSetNullAction(Parse *pParse, FKey *pFKey){ + Parse *pTop = sqlite3ParseToplevel(pParse); + if( pTop->pTriggerPrg ){ + Trigger *p = pTop->pTriggerPrg->pTrigger; + if( (p==pFKey->apTrigger[0] && pFKey->aAction[0]==OE_SetNull) + || (p==pFKey->apTrigger[1] && pFKey->aAction[1]==OE_SetNull) + ){ + return 1; + } + } + return 0; +} + +/* +** This function is called when inserting, deleting or updating a row of +** table pTab to generate VDBE code to perform foreign key constraint +** processing for the operation. +** +** For a DELETE operation, parameter regOld is passed the index of the +** first register in an array of (pTab->nCol+1) registers containing the +** rowid of the row being deleted, followed by each of the column values +** of the row being deleted, from left to right. Parameter regNew is passed +** zero in this case. +** +** For an INSERT operation, regOld is passed zero and regNew is passed the +** first register of an array of (pTab->nCol+1) registers containing the new +** row data. +** +** For an UPDATE operation, this function is called twice. Once before +** the original record is deleted from the table using the calling convention +** described for DELETE. Then again after the original record is deleted +** but before the new record is inserted using the INSERT convention. +*/ +SQLITE_PRIVATE void sqlite3FkCheck( + Parse *pParse, /* Parse context */ + Table *pTab, /* Row is being deleted from this table */ + int regOld, /* Previous row data is stored here */ + int regNew, /* New row data is stored here */ + int *aChange, /* Array indicating UPDATEd columns (or 0) */ + int bChngRowid /* True if rowid is UPDATEd */ +){ + sqlite3 *db = pParse->db; /* Database handle */ + FKey *pFKey; /* Used to iterate through FKs */ + int iDb; /* Index of database containing pTab */ + const char *zDb; /* Name of database containing pTab */ + int isIgnoreErrors = pParse->disableTriggers; + + /* Exactly one of regOld and regNew should be non-zero. */ + assert( (regOld==0)!=(regNew==0) ); + + /* If foreign-keys are disabled, this function is a no-op. */ + if( (db->flags&SQLITE_ForeignKeys)==0 ) return; + if( !IsOrdinaryTable(pTab) ) return; + + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + zDb = db->aDb[iDb].zDbSName; + + /* Loop through all the foreign key constraints for which pTab is the + ** child table (the table that the foreign key definition is part of). */ + for(pFKey=pTab->u.tab.pFKey; pFKey; pFKey=pFKey->pNextFrom){ + Table *pTo; /* Parent table of foreign key pFKey */ + Index *pIdx = 0; /* Index on key columns in pTo */ + int *aiFree = 0; + int *aiCol; + int iCol; + int i; + int bIgnore = 0; + + if( aChange + && sqlite3_stricmp(pTab->zName, pFKey->zTo)!=0 + && fkChildIsModified(pTab, pFKey, aChange, bChngRowid)==0 + ){ + continue; + } + + /* Find the parent table of this foreign key. Also find a unique index + ** on the parent key columns in the parent table. If either of these + ** schema items cannot be located, set an error in pParse and return + ** early. */ + if( pParse->disableTriggers ){ + pTo = sqlite3FindTable(db, pFKey->zTo, zDb); + }else{ + pTo = sqlite3LocateTable(pParse, 0, pFKey->zTo, zDb); + } + if( !pTo || sqlite3FkLocateIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){ + assert( isIgnoreErrors==0 || (regOld!=0 && regNew==0) ); + if( !isIgnoreErrors || db->mallocFailed ) return; + if( pTo==0 ){ + /* If isIgnoreErrors is true, then a table is being dropped. In this + ** case SQLite runs a "DELETE FROM xxx" on the table being dropped + ** before actually dropping it in order to check FK constraints. + ** If the parent table of an FK constraint on the current table is + ** missing, behave as if it is empty. i.e. decrement the relevant + ** FK counter for each row of the current table with non-NULL keys. + */ + Vdbe *v = sqlite3GetVdbe(pParse); + int iJump = sqlite3VdbeCurrentAddr(v) + pFKey->nCol + 1; + for(i=0; inCol; i++){ + int iFromCol, iReg; + iFromCol = pFKey->aCol[i].iFrom; + iReg = sqlite3TableColumnToStorage(pFKey->pFrom,iFromCol) + regOld+1; + sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iJump); VdbeCoverage(v); + } + sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, -1); + } + continue; + } + assert( pFKey->nCol==1 || (aiFree && pIdx) ); + + if( aiFree ){ + aiCol = aiFree; + }else{ + iCol = pFKey->aCol[0].iFrom; + aiCol = &iCol; + } + for(i=0; inCol; i++){ + if( aiCol[i]==pTab->iPKey ){ + aiCol[i] = -1; + } + assert( pIdx==0 || pIdx->aiColumn[i]>=0 ); +#ifndef SQLITE_OMIT_AUTHORIZATION + /* Request permission to read the parent key columns. If the + ** authorization callback returns SQLITE_IGNORE, behave as if any + ** values read from the parent table are NULL. */ + if( db->xAuth ){ + int rcauth; + char *zCol = pTo->aCol[pIdx ? pIdx->aiColumn[i] : pTo->iPKey].zCnName; + rcauth = sqlite3AuthReadCol(pParse, pTo->zName, zCol, iDb); + bIgnore = (rcauth==SQLITE_IGNORE); + } +#endif + } + + /* Take a shared-cache advisory read-lock on the parent table. Allocate + ** a cursor to use to search the unique index on the parent key columns + ** in the parent table. */ + sqlite3TableLock(pParse, iDb, pTo->tnum, 0, pTo->zName); + pParse->nTab++; + + if( regOld!=0 ){ + /* A row is being removed from the child table. Search for the parent. + ** If the parent does not exist, removing the child row resolves an + ** outstanding foreign key constraint violation. */ + fkLookupParent(pParse, iDb, pTo, pIdx, pFKey, aiCol, regOld, -1, bIgnore); + } + if( regNew!=0 && !isSetNullAction(pParse, pFKey) ){ + /* A row is being added to the child table. If a parent row cannot + ** be found, adding the child row has violated the FK constraint. + ** + ** If this operation is being performed as part of a trigger program + ** that is actually a "SET NULL" action belonging to this very + ** foreign key, then omit this scan altogether. As all child key + ** values are guaranteed to be NULL, it is not possible for adding + ** this row to cause an FK violation. */ + fkLookupParent(pParse, iDb, pTo, pIdx, pFKey, aiCol, regNew, +1, bIgnore); + } + + sqlite3DbFree(db, aiFree); + } + + /* Loop through all the foreign key constraints that refer to this table. + ** (the "child" constraints) */ + for(pFKey = sqlite3FkReferences(pTab); pFKey; pFKey=pFKey->pNextTo){ + Index *pIdx = 0; /* Foreign key index for pFKey */ + SrcList *pSrc; + int *aiCol = 0; + + if( aChange && fkParentIsModified(pTab, pFKey, aChange, bChngRowid)==0 ){ + continue; + } + + if( !pFKey->isDeferred && !(db->flags & SQLITE_DeferFKs) + && !pParse->pToplevel && !pParse->isMultiWrite + ){ + assert( regOld==0 && regNew!=0 ); + /* Inserting a single row into a parent table cannot cause (or fix) + ** an immediate foreign key violation. So do nothing in this case. */ + continue; + } + + if( sqlite3FkLocateIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ){ + if( !isIgnoreErrors || db->mallocFailed ) return; + continue; + } + assert( aiCol || pFKey->nCol==1 ); + + /* Create a SrcList structure containing the child table. We need the + ** child table as a SrcList for sqlite3WhereBegin() */ + pSrc = sqlite3SrcListAppend(pParse, 0, 0, 0); + if( pSrc ){ + SrcItem *pItem = pSrc->a; + pItem->pTab = pFKey->pFrom; + pItem->zName = pFKey->pFrom->zName; + pItem->pTab->nTabRef++; + pItem->iCursor = pParse->nTab++; + + if( regNew!=0 ){ + fkScanChildren(pParse, pSrc, pTab, pIdx, pFKey, aiCol, regNew, -1); + } + if( regOld!=0 ){ + int eAction = pFKey->aAction[aChange!=0]; + fkScanChildren(pParse, pSrc, pTab, pIdx, pFKey, aiCol, regOld, 1); + /* If this is a deferred FK constraint, or a CASCADE or SET NULL + ** action applies, then any foreign key violations caused by + ** removing the parent key will be rectified by the action trigger. + ** So do not set the "may-abort" flag in this case. + ** + ** Note 1: If the FK is declared "ON UPDATE CASCADE", then the + ** may-abort flag will eventually be set on this statement anyway + ** (when this function is called as part of processing the UPDATE + ** within the action trigger). + ** + ** Note 2: At first glance it may seem like SQLite could simply omit + ** all OP_FkCounter related scans when either CASCADE or SET NULL + ** applies. The trouble starts if the CASCADE or SET NULL action + ** trigger causes other triggers or action rules attached to the + ** child table to fire. In these cases the fk constraint counters + ** might be set incorrectly if any OP_FkCounter related scans are + ** omitted. */ + if( !pFKey->isDeferred && eAction!=OE_Cascade && eAction!=OE_SetNull ){ + sqlite3MayAbort(pParse); + } + } + pItem->zName = 0; + sqlite3SrcListDelete(db, pSrc); + } + sqlite3DbFree(db, aiCol); + } +} + +#define COLUMN_MASK(x) (((x)>31) ? 0xffffffff : ((u32)1<<(x))) + +/* +** This function is called before generating code to update or delete a +** row contained in table pTab. +*/ +SQLITE_PRIVATE u32 sqlite3FkOldmask( + Parse *pParse, /* Parse context */ + Table *pTab /* Table being modified */ +){ + u32 mask = 0; + if( pParse->db->flags&SQLITE_ForeignKeys && IsOrdinaryTable(pTab) ){ + FKey *p; + int i; + for(p=pTab->u.tab.pFKey; p; p=p->pNextFrom){ + for(i=0; inCol; i++) mask |= COLUMN_MASK(p->aCol[i].iFrom); + } + for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){ + Index *pIdx = 0; + sqlite3FkLocateIndex(pParse, pTab, p, &pIdx, 0); + if( pIdx ){ + for(i=0; inKeyCol; i++){ + assert( pIdx->aiColumn[i]>=0 ); + mask |= COLUMN_MASK(pIdx->aiColumn[i]); + } + } + } + } + return mask; +} + + +/* +** This function is called before generating code to update or delete a +** row contained in table pTab. If the operation is a DELETE, then +** parameter aChange is passed a NULL value. For an UPDATE, aChange points +** to an array of size N, where N is the number of columns in table pTab. +** If the i'th column is not modified by the UPDATE, then the corresponding +** entry in the aChange[] array is set to -1. If the column is modified, +** the value is 0 or greater. Parameter chngRowid is set to true if the +** UPDATE statement modifies the rowid fields of the table. +** +** If any foreign key processing will be required, this function returns +** non-zero. If there is no foreign key related processing, this function +** returns zero. +** +** For an UPDATE, this function returns 2 if: +** +** * There are any FKs for which pTab is the child and the parent table +** and any FK processing at all is required (even of a different FK), or +** +** * the UPDATE modifies one or more parent keys for which the action is +** not "NO ACTION" (i.e. is CASCADE, SET DEFAULT or SET NULL). +** +** Or, assuming some other foreign key processing is required, 1. +*/ +SQLITE_PRIVATE int sqlite3FkRequired( + Parse *pParse, /* Parse context */ + Table *pTab, /* Table being modified */ + int *aChange, /* Non-NULL for UPDATE operations */ + int chngRowid /* True for UPDATE that affects rowid */ +){ + int eRet = 1; /* Value to return if bHaveFK is true */ + int bHaveFK = 0; /* If FK processing is required */ + if( pParse->db->flags&SQLITE_ForeignKeys && IsOrdinaryTable(pTab) ){ + if( !aChange ){ + /* A DELETE operation. Foreign key processing is required if the + ** table in question is either the child or parent table for any + ** foreign key constraint. */ + bHaveFK = (sqlite3FkReferences(pTab) || pTab->u.tab.pFKey); + }else{ + /* This is an UPDATE. Foreign key processing is only required if the + ** operation modifies one or more child or parent key columns. */ + FKey *p; + + /* Check if any child key columns are being modified. */ + for(p=pTab->u.tab.pFKey; p; p=p->pNextFrom){ + if( fkChildIsModified(pTab, p, aChange, chngRowid) ){ + if( 0==sqlite3_stricmp(pTab->zName, p->zTo) ) eRet = 2; + bHaveFK = 1; + } + } + + /* Check if any parent key columns are being modified. */ + for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){ + if( fkParentIsModified(pTab, p, aChange, chngRowid) ){ + if( p->aAction[1]!=OE_None ) return 2; + bHaveFK = 1; + } + } + } + } + return bHaveFK ? eRet : 0; +} + +/* +** This function is called when an UPDATE or DELETE operation is being +** compiled on table pTab, which is the parent table of foreign-key pFKey. +** If the current operation is an UPDATE, then the pChanges parameter is +** passed a pointer to the list of columns being modified. If it is a +** DELETE, pChanges is passed a NULL pointer. +** +** It returns a pointer to a Trigger structure containing a trigger +** equivalent to the ON UPDATE or ON DELETE action specified by pFKey. +** If the action is "NO ACTION" or "RESTRICT", then a NULL pointer is +** returned (these actions require no special handling by the triggers +** sub-system, code for them is created by fkScanChildren()). +** +** For example, if pFKey is the foreign key and pTab is table "p" in +** the following schema: +** +** CREATE TABLE p(pk PRIMARY KEY); +** CREATE TABLE c(ck REFERENCES p ON DELETE CASCADE); +** +** then the returned trigger structure is equivalent to: +** +** CREATE TRIGGER ... DELETE ON p BEGIN +** DELETE FROM c WHERE ck = old.pk; +** END; +** +** The returned pointer is cached as part of the foreign key object. It +** is eventually freed along with the rest of the foreign key object by +** sqlite3FkDelete(). +*/ +static Trigger *fkActionTrigger( + Parse *pParse, /* Parse context */ + Table *pTab, /* Table being updated or deleted from */ + FKey *pFKey, /* Foreign key to get action for */ + ExprList *pChanges /* Change-list for UPDATE, NULL for DELETE */ +){ + sqlite3 *db = pParse->db; /* Database handle */ + int action; /* One of OE_None, OE_Cascade etc. */ + Trigger *pTrigger; /* Trigger definition to return */ + int iAction = (pChanges!=0); /* 1 for UPDATE, 0 for DELETE */ + + action = pFKey->aAction[iAction]; + if( action==OE_Restrict && (db->flags & SQLITE_DeferFKs) ){ + return 0; + } + pTrigger = pFKey->apTrigger[iAction]; + + if( action!=OE_None && !pTrigger ){ + char const *zFrom; /* Name of child table */ + int nFrom; /* Length in bytes of zFrom */ + Index *pIdx = 0; /* Parent key index for this FK */ + int *aiCol = 0; /* child table cols -> parent key cols */ + TriggerStep *pStep = 0; /* First (only) step of trigger program */ + Expr *pWhere = 0; /* WHERE clause of trigger step */ + ExprList *pList = 0; /* Changes list if ON UPDATE CASCADE */ + Select *pSelect = 0; /* If RESTRICT, "SELECT RAISE(...)" */ + int i; /* Iterator variable */ + Expr *pWhen = 0; /* WHEN clause for the trigger */ + + if( sqlite3FkLocateIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ) return 0; + assert( aiCol || pFKey->nCol==1 ); + + for(i=0; inCol; i++){ + Token tOld = { "old", 3 }; /* Literal "old" token */ + Token tNew = { "new", 3 }; /* Literal "new" token */ + Token tFromCol; /* Name of column in child table */ + Token tToCol; /* Name of column in parent table */ + int iFromCol; /* Idx of column in child table */ + Expr *pEq; /* tFromCol = OLD.tToCol */ + + iFromCol = aiCol ? aiCol[i] : pFKey->aCol[0].iFrom; + assert( iFromCol>=0 ); + assert( pIdx!=0 || (pTab->iPKey>=0 && pTab->iPKeynCol) ); + assert( pIdx==0 || pIdx->aiColumn[i]>=0 ); + sqlite3TokenInit(&tToCol, + pTab->aCol[pIdx ? pIdx->aiColumn[i] : pTab->iPKey].zCnName); + sqlite3TokenInit(&tFromCol, pFKey->pFrom->aCol[iFromCol].zCnName); + + /* Create the expression "OLD.zToCol = zFromCol". It is important + ** that the "OLD.zToCol" term is on the LHS of the = operator, so + ** that the affinity and collation sequence associated with the + ** parent table are used for the comparison. */ + pEq = sqlite3PExpr(pParse, TK_EQ, + sqlite3PExpr(pParse, TK_DOT, + sqlite3ExprAlloc(db, TK_ID, &tOld, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0)), + sqlite3ExprAlloc(db, TK_ID, &tFromCol, 0) + ); + pWhere = sqlite3ExprAnd(pParse, pWhere, pEq); + + /* For ON UPDATE, construct the next term of the WHEN clause. + ** The final WHEN clause will be like this: + ** + ** WHEN NOT(old.col1 IS new.col1 AND ... AND old.colN IS new.colN) + */ + if( pChanges ){ + pEq = sqlite3PExpr(pParse, TK_IS, + sqlite3PExpr(pParse, TK_DOT, + sqlite3ExprAlloc(db, TK_ID, &tOld, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0)), + sqlite3PExpr(pParse, TK_DOT, + sqlite3ExprAlloc(db, TK_ID, &tNew, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0)) + ); + pWhen = sqlite3ExprAnd(pParse, pWhen, pEq); + } + + if( action!=OE_Restrict && (action!=OE_Cascade || pChanges) ){ + Expr *pNew; + if( action==OE_Cascade ){ + pNew = sqlite3PExpr(pParse, TK_DOT, + sqlite3ExprAlloc(db, TK_ID, &tNew, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0)); + }else if( action==OE_SetDflt ){ + Column *pCol = pFKey->pFrom->aCol + iFromCol; + Expr *pDflt; + if( pCol->colFlags & COLFLAG_GENERATED ){ + testcase( pCol->colFlags & COLFLAG_VIRTUAL ); + testcase( pCol->colFlags & COLFLAG_STORED ); + pDflt = 0; + }else{ + pDflt = sqlite3ColumnExpr(pFKey->pFrom, pCol); + } + if( pDflt ){ + pNew = sqlite3ExprDup(db, pDflt, 0); + }else{ + pNew = sqlite3ExprAlloc(db, TK_NULL, 0, 0); + } + }else{ + pNew = sqlite3ExprAlloc(db, TK_NULL, 0, 0); + } + pList = sqlite3ExprListAppend(pParse, pList, pNew); + sqlite3ExprListSetName(pParse, pList, &tFromCol, 0); + } + } + sqlite3DbFree(db, aiCol); + + zFrom = pFKey->pFrom->zName; + nFrom = sqlite3Strlen30(zFrom); + + if( action==OE_Restrict ){ + Token tFrom; + Expr *pRaise; + + tFrom.z = zFrom; + tFrom.n = nFrom; + pRaise = sqlite3Expr(db, TK_RAISE, "FOREIGN KEY constraint failed"); + if( pRaise ){ + pRaise->affExpr = OE_Abort; + } + pSelect = sqlite3SelectNew(pParse, + sqlite3ExprListAppend(pParse, 0, pRaise), + sqlite3SrcListAppend(pParse, 0, &tFrom, 0), + pWhere, + 0, 0, 0, 0, 0 + ); + pWhere = 0; + } + + /* Disable lookaside memory allocation */ + DisableLookaside; + + pTrigger = (Trigger *)sqlite3DbMallocZero(db, + sizeof(Trigger) + /* struct Trigger */ + sizeof(TriggerStep) + /* Single step in trigger program */ + nFrom + 1 /* Space for pStep->zTarget */ + ); + if( pTrigger ){ + pStep = pTrigger->step_list = (TriggerStep *)&pTrigger[1]; + pStep->zTarget = (char *)&pStep[1]; + memcpy((char *)pStep->zTarget, zFrom, nFrom); + + pStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE); + pStep->pExprList = sqlite3ExprListDup(db, pList, EXPRDUP_REDUCE); + pStep->pSelect = sqlite3SelectDup(db, pSelect, EXPRDUP_REDUCE); + if( pWhen ){ + pWhen = sqlite3PExpr(pParse, TK_NOT, pWhen, 0); + pTrigger->pWhen = sqlite3ExprDup(db, pWhen, EXPRDUP_REDUCE); + } + } + + /* Re-enable the lookaside buffer, if it was disabled earlier. */ + EnableLookaside; + + sqlite3ExprDelete(db, pWhere); + sqlite3ExprDelete(db, pWhen); + sqlite3ExprListDelete(db, pList); + sqlite3SelectDelete(db, pSelect); + if( db->mallocFailed==1 ){ + fkTriggerDelete(db, pTrigger); + return 0; + } + assert( pStep!=0 ); + assert( pTrigger!=0 ); + + switch( action ){ + case OE_Restrict: + pStep->op = TK_SELECT; + break; + case OE_Cascade: + if( !pChanges ){ + pStep->op = TK_DELETE; + break; + } + /* no break */ deliberate_fall_through + default: + pStep->op = TK_UPDATE; + } + pStep->pTrig = pTrigger; + pTrigger->pSchema = pTab->pSchema; + pTrigger->pTabSchema = pTab->pSchema; + pFKey->apTrigger[iAction] = pTrigger; + pTrigger->op = (pChanges ? TK_UPDATE : TK_DELETE); + } + + return pTrigger; +} + +/* +** This function is called when deleting or updating a row to implement +** any required CASCADE, SET NULL or SET DEFAULT actions. +*/ +SQLITE_PRIVATE void sqlite3FkActions( + Parse *pParse, /* Parse context */ + Table *pTab, /* Table being updated or deleted from */ + ExprList *pChanges, /* Change-list for UPDATE, NULL for DELETE */ + int regOld, /* Address of array containing old row */ + int *aChange, /* Array indicating UPDATEd columns (or 0) */ + int bChngRowid /* True if rowid is UPDATEd */ +){ + /* If foreign-key support is enabled, iterate through all FKs that + ** refer to table pTab. If there is an action associated with the FK + ** for this operation (either update or delete), invoke the associated + ** trigger sub-program. */ + if( pParse->db->flags&SQLITE_ForeignKeys ){ + FKey *pFKey; /* Iterator variable */ + for(pFKey = sqlite3FkReferences(pTab); pFKey; pFKey=pFKey->pNextTo){ + if( aChange==0 || fkParentIsModified(pTab, pFKey, aChange, bChngRowid) ){ + Trigger *pAct = fkActionTrigger(pParse, pTab, pFKey, pChanges); + if( pAct ){ + sqlite3CodeRowTriggerDirect(pParse, pAct, pTab, regOld, OE_Abort, 0); + } + } + } + } +} + +#endif /* ifndef SQLITE_OMIT_TRIGGER */ + +/* +** Free all memory associated with foreign key definitions attached to +** table pTab. Remove the deleted foreign keys from the Schema.fkeyHash +** hash table. +*/ +SQLITE_PRIVATE void sqlite3FkDelete(sqlite3 *db, Table *pTab){ + FKey *pFKey; /* Iterator variable */ + FKey *pNext; /* Copy of pFKey->pNextFrom */ + + assert( IsOrdinaryTable(pTab) ); + for(pFKey=pTab->u.tab.pFKey; pFKey; pFKey=pNext){ + assert( db==0 || sqlite3SchemaMutexHeld(db, 0, pTab->pSchema) ); + + /* Remove the FK from the fkeyHash hash table. */ + if( !db || db->pnBytesFreed==0 ){ + if( pFKey->pPrevTo ){ + pFKey->pPrevTo->pNextTo = pFKey->pNextTo; + }else{ + void *p = (void *)pFKey->pNextTo; + const char *z = (p ? pFKey->pNextTo->zTo : pFKey->zTo); + sqlite3HashInsert(&pTab->pSchema->fkeyHash, z, p); + } + if( pFKey->pNextTo ){ + pFKey->pNextTo->pPrevTo = pFKey->pPrevTo; + } + } + + /* EV: R-30323-21917 Each foreign key constraint in SQLite is + ** classified as either immediate or deferred. + */ + assert( pFKey->isDeferred==0 || pFKey->isDeferred==1 ); + + /* Delete any triggers created to implement actions for this FK. */ +#ifndef SQLITE_OMIT_TRIGGER + fkTriggerDelete(db, pFKey->apTrigger[0]); + fkTriggerDelete(db, pFKey->apTrigger[1]); +#endif + + pNext = pFKey->pNextFrom; + sqlite3DbFree(db, pFKey); + } +} +#endif /* ifndef SQLITE_OMIT_FOREIGN_KEY */ + +/************** End of fkey.c ************************************************/ +/************** Begin file insert.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains C code routines that are called by the parser +** to handle INSERT statements in SQLite. +*/ +/* #include "sqliteInt.h" */ + +/* +** Generate code that will +** +** (1) acquire a lock for table pTab then +** (2) open pTab as cursor iCur. +** +** If pTab is a WITHOUT ROWID table, then it is the PRIMARY KEY index +** for that table that is actually opened. +*/ +SQLITE_PRIVATE void sqlite3OpenTable( + Parse *pParse, /* Generate code into this VDBE */ + int iCur, /* The cursor number of the table */ + int iDb, /* The database index in sqlite3.aDb[] */ + Table *pTab, /* The table to be opened */ + int opcode /* OP_OpenRead or OP_OpenWrite */ +){ + Vdbe *v; + assert( !IsVirtual(pTab) ); + assert( pParse->pVdbe!=0 ); + v = pParse->pVdbe; + assert( opcode==OP_OpenWrite || opcode==OP_OpenRead ); + sqlite3TableLock(pParse, iDb, pTab->tnum, + (opcode==OP_OpenWrite)?1:0, pTab->zName); + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp4Int(v, opcode, iCur, pTab->tnum, iDb, pTab->nNVCol); + VdbeComment((v, "%s", pTab->zName)); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + assert( pPk!=0 ); + assert( pPk->tnum==pTab->tnum || CORRUPT_DB ); + sqlite3VdbeAddOp3(v, opcode, iCur, pPk->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pPk); + VdbeComment((v, "%s", pTab->zName)); + } +} + +/* +** Return a pointer to the column affinity string associated with index +** pIdx. A column affinity string has one character for each column in +** the table, according to the affinity of the column: +** +** Character Column affinity +** ------------------------------ +** 'A' BLOB +** 'B' TEXT +** 'C' NUMERIC +** 'D' INTEGER +** 'F' REAL +** +** An extra 'D' is appended to the end of the string to cover the +** rowid that appears as the last column in every index. +** +** Memory for the buffer containing the column index affinity string +** is managed along with the rest of the Index structure. It will be +** released when sqlite3DeleteIndex() is called. +*/ +SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3 *db, Index *pIdx){ + if( !pIdx->zColAff ){ + /* The first time a column affinity string for a particular index is + ** required, it is allocated and populated here. It is then stored as + ** a member of the Index structure for subsequent use. + ** + ** The column affinity string will eventually be deleted by + ** sqliteDeleteIndex() when the Index structure itself is cleaned + ** up. + */ + int n; + Table *pTab = pIdx->pTable; + pIdx->zColAff = (char *)sqlite3DbMallocRaw(0, pIdx->nColumn+1); + if( !pIdx->zColAff ){ + sqlite3OomFault(db); + return 0; + } + for(n=0; nnColumn; n++){ + i16 x = pIdx->aiColumn[n]; + char aff; + if( x>=0 ){ + aff = pTab->aCol[x].affinity; + }else if( x==XN_ROWID ){ + aff = SQLITE_AFF_INTEGER; + }else{ + assert( x==XN_EXPR ); + assert( pIdx->aColExpr!=0 ); + aff = sqlite3ExprAffinity(pIdx->aColExpr->a[n].pExpr); + } + if( affSQLITE_AFF_NUMERIC) aff = SQLITE_AFF_NUMERIC; + pIdx->zColAff[n] = aff; + } + pIdx->zColAff[n] = 0; + } + + return pIdx->zColAff; +} + +/* +** Make changes to the evolving bytecode to do affinity transformations +** of values that are about to be gathered into a row for table pTab. +** +** For ordinary (legacy, non-strict) tables: +** ----------------------------------------- +** +** Compute the affinity string for table pTab, if it has not already been +** computed. As an optimization, omit trailing SQLITE_AFF_BLOB affinities. +** +** If the affinity string is empty (because it was all SQLITE_AFF_BLOB entries +** which were then optimized out) then this routine becomes a no-op. +** +** Otherwise if iReg>0 then code an OP_Affinity opcode that will set the +** affinities for register iReg and following. Or if iReg==0, +** then just set the P4 operand of the previous opcode (which should be +** an OP_MakeRecord) to the affinity string. +** +** A column affinity string has one character per column: +** +** Character Column affinity +** --------- --------------- +** 'A' BLOB +** 'B' TEXT +** 'C' NUMERIC +** 'D' INTEGER +** 'E' REAL +** +** For STRICT tables: +** ------------------ +** +** Generate an appropropriate OP_TypeCheck opcode that will verify the +** datatypes against the column definitions in pTab. If iReg==0, that +** means an OP_MakeRecord opcode has already been generated and should be +** the last opcode generated. The new OP_TypeCheck needs to be inserted +** before the OP_MakeRecord. The new OP_TypeCheck should use the same +** register set as the OP_MakeRecord. If iReg>0 then register iReg is +** the first of a series of registers that will form the new record. +** Apply the type checking to that array of registers. +*/ +SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){ + int i, j; + char *zColAff; + if( pTab->tabFlags & TF_Strict ){ + if( iReg==0 ){ + /* Move the previous opcode (which should be OP_MakeRecord) forward + ** by one slot and insert a new OP_TypeCheck where the current + ** OP_MakeRecord is found */ + VdbeOp *pPrev; + sqlite3VdbeAppendP4(v, pTab, P4_TABLE); + pPrev = sqlite3VdbeGetOp(v, -1); + assert( pPrev!=0 ); + assert( pPrev->opcode==OP_MakeRecord || sqlite3VdbeDb(v)->mallocFailed ); + pPrev->opcode = OP_TypeCheck; + sqlite3VdbeAddOp3(v, OP_MakeRecord, pPrev->p1, pPrev->p2, pPrev->p3); + }else{ + /* Insert an isolated OP_Typecheck */ + sqlite3VdbeAddOp2(v, OP_TypeCheck, iReg, pTab->nNVCol); + sqlite3VdbeAppendP4(v, pTab, P4_TABLE); + } + return; + } + zColAff = pTab->zColAff; + if( zColAff==0 ){ + sqlite3 *db = sqlite3VdbeDb(v); + zColAff = (char *)sqlite3DbMallocRaw(0, pTab->nCol+1); + if( !zColAff ){ + sqlite3OomFault(db); + return; + } + + for(i=j=0; inCol; i++){ + assert( pTab->aCol[i].affinity!=0 || sqlite3VdbeParser(v)->nErr>0 ); + if( (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ){ + zColAff[j++] = pTab->aCol[i].affinity; + } + } + do{ + zColAff[j--] = 0; + }while( j>=0 && zColAff[j]<=SQLITE_AFF_BLOB ); + pTab->zColAff = zColAff; + } + assert( zColAff!=0 ); + i = sqlite3Strlen30NN(zColAff); + if( i ){ + if( iReg ){ + sqlite3VdbeAddOp4(v, OP_Affinity, iReg, i, 0, zColAff, i); + }else{ + assert( sqlite3VdbeGetOp(v, -1)->opcode==OP_MakeRecord + || sqlite3VdbeDb(v)->mallocFailed ); + sqlite3VdbeChangeP4(v, -1, zColAff, i); + } + } +} + +/* +** Return non-zero if the table pTab in database iDb or any of its indices +** have been opened at any point in the VDBE program. This is used to see if +** a statement of the form "INSERT INTO SELECT ..." can +** run without using a temporary table for the results of the SELECT. +*/ +static int readsTable(Parse *p, int iDb, Table *pTab){ + Vdbe *v = sqlite3GetVdbe(p); + int i; + int iEnd = sqlite3VdbeCurrentAddr(v); +#ifndef SQLITE_OMIT_VIRTUALTABLE + VTable *pVTab = IsVirtual(pTab) ? sqlite3GetVTable(p->db, pTab) : 0; +#endif + + for(i=1; iopcode==OP_OpenRead && pOp->p3==iDb ){ + Index *pIndex; + Pgno tnum = pOp->p2; + if( tnum==pTab->tnum ){ + return 1; + } + for(pIndex=pTab->pIndex; pIndex; pIndex=pIndex->pNext){ + if( tnum==pIndex->tnum ){ + return 1; + } + } + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( pOp->opcode==OP_VOpen && pOp->p4.pVtab==pVTab ){ + assert( pOp->p4.pVtab!=0 ); + assert( pOp->p4type==P4_VTAB ); + return 1; + } +#endif + } + return 0; +} + +/* This walker callback will compute the union of colFlags flags for all +** referenced columns in a CHECK constraint or generated column expression. +*/ +static int exprColumnFlagUnion(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_COLUMN && pExpr->iColumn>=0 ){ + assert( pExpr->iColumn < pWalker->u.pTab->nCol ); + pWalker->eCode |= pWalker->u.pTab->aCol[pExpr->iColumn].colFlags; + } + return WRC_Continue; +} + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +/* +** All regular columns for table pTab have been puts into registers +** starting with iRegStore. The registers that correspond to STORED +** or VIRTUAL columns have not yet been initialized. This routine goes +** back and computes the values for those columns based on the previously +** computed normal columns. +*/ +SQLITE_PRIVATE void sqlite3ComputeGeneratedColumns( + Parse *pParse, /* Parsing context */ + int iRegStore, /* Register holding the first column */ + Table *pTab /* The table */ +){ + int i; + Walker w; + Column *pRedo; + int eProgress; + VdbeOp *pOp; + + assert( pTab->tabFlags & TF_HasGenerated ); + testcase( pTab->tabFlags & TF_HasVirtual ); + testcase( pTab->tabFlags & TF_HasStored ); + + /* Before computing generated columns, first go through and make sure + ** that appropriate affinity has been applied to the regular columns + */ + sqlite3TableAffinity(pParse->pVdbe, pTab, iRegStore); + if( (pTab->tabFlags & TF_HasStored)!=0 ){ + pOp = sqlite3VdbeGetOp(pParse->pVdbe,-1); + if( pOp->opcode==OP_Affinity ){ + /* Change the OP_Affinity argument to '@' (NONE) for all stored + ** columns. '@' is the no-op affinity and those columns have not + ** yet been computed. */ + int ii, jj; + char *zP4 = pOp->p4.z; + assert( zP4!=0 ); + assert( pOp->p4type==P4_DYNAMIC ); + for(ii=jj=0; zP4[jj]; ii++){ + if( pTab->aCol[ii].colFlags & COLFLAG_VIRTUAL ){ + continue; + } + if( pTab->aCol[ii].colFlags & COLFLAG_STORED ){ + zP4[jj] = SQLITE_AFF_NONE; + } + jj++; + } + }else if( pOp->opcode==OP_TypeCheck ){ + /* If an OP_TypeCheck was generated because the table is STRICT, + ** then set the P3 operand to indicate that generated columns should + ** not be checked */ + pOp->p3 = 1; + } + } + + /* Because there can be multiple generated columns that refer to one another, + ** this is a two-pass algorithm. On the first pass, mark all generated + ** columns as "not available". + */ + for(i=0; inCol; i++){ + if( pTab->aCol[i].colFlags & COLFLAG_GENERATED ){ + testcase( pTab->aCol[i].colFlags & COLFLAG_VIRTUAL ); + testcase( pTab->aCol[i].colFlags & COLFLAG_STORED ); + pTab->aCol[i].colFlags |= COLFLAG_NOTAVAIL; + } + } + + w.u.pTab = pTab; + w.xExprCallback = exprColumnFlagUnion; + w.xSelectCallback = 0; + w.xSelectCallback2 = 0; + + /* On the second pass, compute the value of each NOT-AVAILABLE column. + ** Companion code in the TK_COLUMN case of sqlite3ExprCodeTarget() will + ** compute dependencies and mark remove the COLSPAN_NOTAVAIL mark, as + ** they are needed. + */ + pParse->iSelfTab = -iRegStore; + do{ + eProgress = 0; + pRedo = 0; + for(i=0; inCol; i++){ + Column *pCol = pTab->aCol + i; + if( (pCol->colFlags & COLFLAG_NOTAVAIL)!=0 ){ + int x; + pCol->colFlags |= COLFLAG_BUSY; + w.eCode = 0; + sqlite3WalkExpr(&w, sqlite3ColumnExpr(pTab, pCol)); + pCol->colFlags &= ~COLFLAG_BUSY; + if( w.eCode & COLFLAG_NOTAVAIL ){ + pRedo = pCol; + continue; + } + eProgress = 1; + assert( pCol->colFlags & COLFLAG_GENERATED ); + x = sqlite3TableColumnToStorage(pTab, i) + iRegStore; + sqlite3ExprCodeGeneratedColumn(pParse, pTab, pCol, x); + pCol->colFlags &= ~COLFLAG_NOTAVAIL; + } + } + }while( pRedo && eProgress ); + if( pRedo ){ + sqlite3ErrorMsg(pParse, "generated column loop on \"%s\"", pRedo->zCnName); + } + pParse->iSelfTab = 0; +} +#endif /* SQLITE_OMIT_GENERATED_COLUMNS */ + + +#ifndef SQLITE_OMIT_AUTOINCREMENT +/* +** Locate or create an AutoincInfo structure associated with table pTab +** which is in database iDb. Return the register number for the register +** that holds the maximum rowid. Return zero if pTab is not an AUTOINCREMENT +** table. (Also return zero when doing a VACUUM since we do not want to +** update the AUTOINCREMENT counters during a VACUUM.) +** +** There is at most one AutoincInfo structure per table even if the +** same table is autoincremented multiple times due to inserts within +** triggers. A new AutoincInfo structure is created if this is the +** first use of table pTab. On 2nd and subsequent uses, the original +** AutoincInfo structure is used. +** +** Four consecutive registers are allocated: +** +** (1) The name of the pTab table. +** (2) The maximum ROWID of pTab. +** (3) The rowid in sqlite_sequence of pTab +** (4) The original value of the max ROWID in pTab, or NULL if none +** +** The 2nd register is the one that is returned. That is all the +** insert routine needs to know about. +*/ +static int autoIncBegin( + Parse *pParse, /* Parsing context */ + int iDb, /* Index of the database holding pTab */ + Table *pTab /* The table we are writing to */ +){ + int memId = 0; /* Register holding maximum rowid */ + assert( pParse->db->aDb[iDb].pSchema!=0 ); + if( (pTab->tabFlags & TF_Autoincrement)!=0 + && (pParse->db->mDbFlags & DBFLAG_Vacuum)==0 + ){ + Parse *pToplevel = sqlite3ParseToplevel(pParse); + AutoincInfo *pInfo; + Table *pSeqTab = pParse->db->aDb[iDb].pSchema->pSeqTab; + + /* Verify that the sqlite_sequence table exists and is an ordinary + ** rowid table with exactly two columns. + ** Ticket d8dc2b3a58cd5dc2918a1d4acb 2018-05-23 */ + if( pSeqTab==0 + || !HasRowid(pSeqTab) + || NEVER(IsVirtual(pSeqTab)) + || pSeqTab->nCol!=2 + ){ + pParse->nErr++; + pParse->rc = SQLITE_CORRUPT_SEQUENCE; + return 0; + } + + pInfo = pToplevel->pAinc; + while( pInfo && pInfo->pTab!=pTab ){ pInfo = pInfo->pNext; } + if( pInfo==0 ){ + pInfo = sqlite3DbMallocRawNN(pParse->db, sizeof(*pInfo)); + sqlite3ParserAddCleanup(pToplevel, sqlite3DbFree, pInfo); + testcase( pParse->earlyCleanup ); + if( pParse->db->mallocFailed ) return 0; + pInfo->pNext = pToplevel->pAinc; + pToplevel->pAinc = pInfo; + pInfo->pTab = pTab; + pInfo->iDb = iDb; + pToplevel->nMem++; /* Register to hold name of table */ + pInfo->regCtr = ++pToplevel->nMem; /* Max rowid register */ + pToplevel->nMem +=2; /* Rowid in sqlite_sequence + orig max val */ + } + memId = pInfo->regCtr; + } + return memId; +} + +/* +** This routine generates code that will initialize all of the +** register used by the autoincrement tracker. +*/ +SQLITE_PRIVATE void sqlite3AutoincrementBegin(Parse *pParse){ + AutoincInfo *p; /* Information about an AUTOINCREMENT */ + sqlite3 *db = pParse->db; /* The database connection */ + Db *pDb; /* Database only autoinc table */ + int memId; /* Register holding max rowid */ + Vdbe *v = pParse->pVdbe; /* VDBE under construction */ + + /* This routine is never called during trigger-generation. It is + ** only called from the top-level */ + assert( pParse->pTriggerTab==0 ); + assert( sqlite3IsToplevel(pParse) ); + + assert( v ); /* We failed long ago if this is not so */ + for(p = pParse->pAinc; p; p = p->pNext){ + static const int iLn = VDBE_OFFSET_LINENO(2); + static const VdbeOpList autoInc[] = { + /* 0 */ {OP_Null, 0, 0, 0}, + /* 1 */ {OP_Rewind, 0, 10, 0}, + /* 2 */ {OP_Column, 0, 0, 0}, + /* 3 */ {OP_Ne, 0, 9, 0}, + /* 4 */ {OP_Rowid, 0, 0, 0}, + /* 5 */ {OP_Column, 0, 1, 0}, + /* 6 */ {OP_AddImm, 0, 0, 0}, + /* 7 */ {OP_Copy, 0, 0, 0}, + /* 8 */ {OP_Goto, 0, 11, 0}, + /* 9 */ {OP_Next, 0, 2, 0}, + /* 10 */ {OP_Integer, 0, 0, 0}, + /* 11 */ {OP_Close, 0, 0, 0} + }; + VdbeOp *aOp; + pDb = &db->aDb[p->iDb]; + memId = p->regCtr; + assert( sqlite3SchemaMutexHeld(db, 0, pDb->pSchema) ); + sqlite3OpenTable(pParse, 0, p->iDb, pDb->pSchema->pSeqTab, OP_OpenRead); + sqlite3VdbeLoadString(v, memId-1, p->pTab->zName); + aOp = sqlite3VdbeAddOpList(v, ArraySize(autoInc), autoInc, iLn); + if( aOp==0 ) break; + aOp[0].p2 = memId; + aOp[0].p3 = memId+2; + aOp[2].p3 = memId; + aOp[3].p1 = memId-1; + aOp[3].p3 = memId; + aOp[3].p5 = SQLITE_JUMPIFNULL; + aOp[4].p2 = memId+1; + aOp[5].p3 = memId; + aOp[6].p1 = memId; + aOp[7].p2 = memId+2; + aOp[7].p1 = memId; + aOp[10].p2 = memId; + if( pParse->nTab==0 ) pParse->nTab = 1; + } +} + +/* +** Update the maximum rowid for an autoincrement calculation. +** +** This routine should be called when the regRowid register holds a +** new rowid that is about to be inserted. If that new rowid is +** larger than the maximum rowid in the memId memory cell, then the +** memory cell is updated. +*/ +static void autoIncStep(Parse *pParse, int memId, int regRowid){ + if( memId>0 ){ + sqlite3VdbeAddOp2(pParse->pVdbe, OP_MemMax, memId, regRowid); + } +} + +/* +** This routine generates the code needed to write autoincrement +** maximum rowid values back into the sqlite_sequence register. +** Every statement that might do an INSERT into an autoincrement +** table (either directly or through triggers) needs to call this +** routine just before the "exit" code. +*/ +static SQLITE_NOINLINE void autoIncrementEnd(Parse *pParse){ + AutoincInfo *p; + Vdbe *v = pParse->pVdbe; + sqlite3 *db = pParse->db; + + assert( v ); + for(p = pParse->pAinc; p; p = p->pNext){ + static const int iLn = VDBE_OFFSET_LINENO(2); + static const VdbeOpList autoIncEnd[] = { + /* 0 */ {OP_NotNull, 0, 2, 0}, + /* 1 */ {OP_NewRowid, 0, 0, 0}, + /* 2 */ {OP_MakeRecord, 0, 2, 0}, + /* 3 */ {OP_Insert, 0, 0, 0}, + /* 4 */ {OP_Close, 0, 0, 0} + }; + VdbeOp *aOp; + Db *pDb = &db->aDb[p->iDb]; + int iRec; + int memId = p->regCtr; + + iRec = sqlite3GetTempReg(pParse); + assert( sqlite3SchemaMutexHeld(db, 0, pDb->pSchema) ); + sqlite3VdbeAddOp3(v, OP_Le, memId+2, sqlite3VdbeCurrentAddr(v)+7, memId); + VdbeCoverage(v); + sqlite3OpenTable(pParse, 0, p->iDb, pDb->pSchema->pSeqTab, OP_OpenWrite); + aOp = sqlite3VdbeAddOpList(v, ArraySize(autoIncEnd), autoIncEnd, iLn); + if( aOp==0 ) break; + aOp[0].p1 = memId+1; + aOp[1].p2 = memId+1; + aOp[2].p1 = memId-1; + aOp[2].p3 = iRec; + aOp[3].p2 = iRec; + aOp[3].p3 = memId+1; + aOp[3].p5 = OPFLAG_APPEND; + sqlite3ReleaseTempReg(pParse, iRec); + } +} +SQLITE_PRIVATE void sqlite3AutoincrementEnd(Parse *pParse){ + if( pParse->pAinc ) autoIncrementEnd(pParse); +} +#else +/* +** If SQLITE_OMIT_AUTOINCREMENT is defined, then the three routines +** above are all no-ops +*/ +# define autoIncBegin(A,B,C) (0) +# define autoIncStep(A,B,C) +#endif /* SQLITE_OMIT_AUTOINCREMENT */ + + +/* Forward declaration */ +static int xferOptimization( + Parse *pParse, /* Parser context */ + Table *pDest, /* The table we are inserting into */ + Select *pSelect, /* A SELECT statement to use as the data source */ + int onError, /* How to handle constraint errors */ + int iDbDest /* The database of pDest */ +); + +/* +** This routine is called to handle SQL of the following forms: +** +** insert into TABLE (IDLIST) values(EXPRLIST),(EXPRLIST),... +** insert into TABLE (IDLIST) select +** insert into TABLE (IDLIST) default values +** +** The IDLIST following the table name is always optional. If omitted, +** then a list of all (non-hidden) columns for the table is substituted. +** The IDLIST appears in the pColumn parameter. pColumn is NULL if IDLIST +** is omitted. +** +** For the pSelect parameter holds the values to be inserted for the +** first two forms shown above. A VALUES clause is really just short-hand +** for a SELECT statement that omits the FROM clause and everything else +** that follows. If the pSelect parameter is NULL, that means that the +** DEFAULT VALUES form of the INSERT statement is intended. +** +** The code generated follows one of four templates. For a simple +** insert with data coming from a single-row VALUES clause, the code executes +** once straight down through. Pseudo-code follows (we call this +** the "1st template"): +** +** open write cursor to
    and its indices +** put VALUES clause expressions into registers +** write the resulting record into
    +** cleanup +** +** The three remaining templates assume the statement is of the form +** +** INSERT INTO
    SELECT ... +** +** If the SELECT clause is of the restricted form "SELECT * FROM " - +** in other words if the SELECT pulls all columns from a single table +** and there is no WHERE or LIMIT or GROUP BY or ORDER BY clauses, and +** if and are distinct tables but have identical +** schemas, including all the same indices, then a special optimization +** is invoked that copies raw records from over to . +** See the xferOptimization() function for the implementation of this +** template. This is the 2nd template. +** +** open a write cursor to
    +** open read cursor on +** transfer all records in over to
    +** close cursors +** foreach index on
    +** open a write cursor on the
    index +** open a read cursor on the corresponding index +** transfer all records from the read to the write cursors +** close cursors +** end foreach +** +** The 3rd template is for when the second template does not apply +** and the SELECT clause does not read from
    at any time. +** The generated code follows this template: +** +** X <- A +** goto B +** A: setup for the SELECT +** loop over the rows in the SELECT +** load values into registers R..R+n +** yield X +** end loop +** cleanup after the SELECT +** end-coroutine X +** B: open write cursor to
    and its indices +** C: yield X, at EOF goto D +** insert the select result into
    from R..R+n +** goto C +** D: cleanup +** +** The 4th template is used if the insert statement takes its +** values from a SELECT but the data is being inserted into a table +** that is also read as part of the SELECT. In the third form, +** we have to use an intermediate table to store the results of +** the select. The template is like this: +** +** X <- A +** goto B +** A: setup for the SELECT +** loop over the tables in the SELECT +** load value into register R..R+n +** yield X +** end loop +** cleanup after the SELECT +** end co-routine R +** B: open temp table +** L: yield X, at EOF goto M +** insert row from R..R+n into temp table +** goto L +** M: open write cursor to
    and its indices +** rewind temp table +** C: loop over rows of intermediate table +** transfer values form intermediate table into
    +** end loop +** D: cleanup +*/ +SQLITE_PRIVATE void sqlite3Insert( + Parse *pParse, /* Parser context */ + SrcList *pTabList, /* Name of table into which we are inserting */ + Select *pSelect, /* A SELECT statement to use as the data source */ + IdList *pColumn, /* Column names corresponding to IDLIST, or NULL. */ + int onError, /* How to handle constraint errors */ + Upsert *pUpsert /* ON CONFLICT clauses for upsert, or NULL */ +){ + sqlite3 *db; /* The main database structure */ + Table *pTab; /* The table to insert into. aka TABLE */ + int i, j; /* Loop counters */ + Vdbe *v; /* Generate code into this virtual machine */ + Index *pIdx; /* For looping over indices of the table */ + int nColumn; /* Number of columns in the data */ + int nHidden = 0; /* Number of hidden columns if TABLE is virtual */ + int iDataCur = 0; /* VDBE cursor that is the main data repository */ + int iIdxCur = 0; /* First index cursor */ + int ipkColumn = -1; /* Column that is the INTEGER PRIMARY KEY */ + int endOfLoop; /* Label for the end of the insertion loop */ + int srcTab = 0; /* Data comes from this temporary cursor if >=0 */ + int addrInsTop = 0; /* Jump to label "D" */ + int addrCont = 0; /* Top of insert loop. Label "C" in templates 3 and 4 */ + SelectDest dest; /* Destination for SELECT on rhs of INSERT */ + int iDb; /* Index of database holding TABLE */ + u8 useTempTable = 0; /* Store SELECT results in intermediate table */ + u8 appendFlag = 0; /* True if the insert is likely to be an append */ + u8 withoutRowid; /* 0 for normal table. 1 for WITHOUT ROWID table */ + u8 bIdListInOrder; /* True if IDLIST is in table order */ + ExprList *pList = 0; /* List of VALUES() to be inserted */ + int iRegStore; /* Register in which to store next column */ + + /* Register allocations */ + int regFromSelect = 0;/* Base register for data coming from SELECT */ + int regAutoinc = 0; /* Register holding the AUTOINCREMENT counter */ + int regRowCount = 0; /* Memory cell used for the row counter */ + int regIns; /* Block of regs holding rowid+data being inserted */ + int regRowid; /* registers holding insert rowid */ + int regData; /* register holding first column to insert */ + int *aRegIdx = 0; /* One register allocated to each index */ + +#ifndef SQLITE_OMIT_TRIGGER + int isView; /* True if attempting to insert into a view */ + Trigger *pTrigger; /* List of triggers on pTab, if required */ + int tmask; /* Mask of trigger times */ +#endif + + db = pParse->db; + assert( db->pParse==pParse ); + if( pParse->nErr ){ + goto insert_cleanup; + } + assert( db->mallocFailed==0 ); + dest.iSDParm = 0; /* Suppress a harmless compiler warning */ + + /* If the Select object is really just a simple VALUES() list with a + ** single row (the common case) then keep that one row of values + ** and discard the other (unused) parts of the pSelect object + */ + if( pSelect && (pSelect->selFlags & SF_Values)!=0 && pSelect->pPrior==0 ){ + pList = pSelect->pEList; + pSelect->pEList = 0; + sqlite3SelectDelete(db, pSelect); + pSelect = 0; + } + + /* Locate the table into which we will be inserting new information. + */ + assert( pTabList->nSrc==1 ); + pTab = sqlite3SrcListLookup(pParse, pTabList); + if( pTab==0 ){ + goto insert_cleanup; + } + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + assert( iDbnDb ); + if( sqlite3AuthCheck(pParse, SQLITE_INSERT, pTab->zName, 0, + db->aDb[iDb].zDbSName) ){ + goto insert_cleanup; + } + withoutRowid = !HasRowid(pTab); + + /* Figure out if we have any triggers and if the table being + ** inserted into is a view + */ +#ifndef SQLITE_OMIT_TRIGGER + pTrigger = sqlite3TriggersExist(pParse, pTab, TK_INSERT, 0, &tmask); + isView = IsView(pTab); +#else +# define pTrigger 0 +# define tmask 0 +# define isView 0 +#endif +#ifdef SQLITE_OMIT_VIEW +# undef isView +# define isView 0 +#endif + assert( (pTrigger && tmask) || (pTrigger==0 && tmask==0) ); + + /* If pTab is really a view, make sure it has been initialized. + ** ViewGetColumnNames() is a no-op if pTab is not a view. + */ + if( sqlite3ViewGetColumnNames(pParse, pTab) ){ + goto insert_cleanup; + } + + /* Cannot insert into a read-only table. + */ + if( sqlite3IsReadOnly(pParse, pTab, tmask) ){ + goto insert_cleanup; + } + + /* Allocate a VDBE + */ + v = sqlite3GetVdbe(pParse); + if( v==0 ) goto insert_cleanup; + if( pParse->nested==0 ) sqlite3VdbeCountChanges(v); + sqlite3BeginWriteOperation(pParse, pSelect || pTrigger, iDb); + +#ifndef SQLITE_OMIT_XFER_OPT + /* If the statement is of the form + ** + ** INSERT INTO SELECT * FROM ; + ** + ** Then special optimizations can be applied that make the transfer + ** very fast and which reduce fragmentation of indices. + ** + ** This is the 2nd template. + */ + if( pColumn==0 + && pSelect!=0 + && pTrigger==0 + && xferOptimization(pParse, pTab, pSelect, onError, iDb) + ){ + assert( !pTrigger ); + assert( pList==0 ); + goto insert_end; + } +#endif /* SQLITE_OMIT_XFER_OPT */ + + /* If this is an AUTOINCREMENT table, look up the sequence number in the + ** sqlite_sequence table and store it in memory cell regAutoinc. + */ + regAutoinc = autoIncBegin(pParse, iDb, pTab); + + /* Allocate a block registers to hold the rowid and the values + ** for all columns of the new row. + */ + regRowid = regIns = pParse->nMem+1; + pParse->nMem += pTab->nCol + 1; + if( IsVirtual(pTab) ){ + regRowid++; + pParse->nMem++; + } + regData = regRowid+1; + + /* If the INSERT statement included an IDLIST term, then make sure + ** all elements of the IDLIST really are columns of the table and + ** remember the column indices. + ** + ** If the table has an INTEGER PRIMARY KEY column and that column + ** is named in the IDLIST, then record in the ipkColumn variable + ** the index into IDLIST of the primary key column. ipkColumn is + ** the index of the primary key as it appears in IDLIST, not as + ** is appears in the original table. (The index of the INTEGER + ** PRIMARY KEY in the original table is pTab->iPKey.) After this + ** loop, if ipkColumn==(-1), that means that integer primary key + ** is unspecified, and hence the table is either WITHOUT ROWID or + ** it will automatically generated an integer primary key. + ** + ** bIdListInOrder is true if the columns in IDLIST are in storage + ** order. This enables an optimization that avoids shuffling the + ** columns into storage order. False negatives are harmless, + ** but false positives will cause database corruption. + */ + bIdListInOrder = (pTab->tabFlags & (TF_OOOHidden|TF_HasStored))==0; + if( pColumn ){ + for(i=0; inId; i++){ + pColumn->a[i].idx = -1; + } + for(i=0; inId; i++){ + for(j=0; jnCol; j++){ + if( sqlite3StrICmp(pColumn->a[i].zName, pTab->aCol[j].zCnName)==0 ){ + pColumn->a[i].idx = j; + if( i!=j ) bIdListInOrder = 0; + if( j==pTab->iPKey ){ + ipkColumn = i; assert( !withoutRowid ); + } +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( pTab->aCol[j].colFlags & (COLFLAG_STORED|COLFLAG_VIRTUAL) ){ + sqlite3ErrorMsg(pParse, + "cannot INSERT into generated column \"%s\"", + pTab->aCol[j].zCnName); + goto insert_cleanup; + } +#endif + break; + } + } + if( j>=pTab->nCol ){ + if( sqlite3IsRowid(pColumn->a[i].zName) && !withoutRowid ){ + ipkColumn = i; + bIdListInOrder = 0; + }else{ + sqlite3ErrorMsg(pParse, "table %S has no column named %s", + pTabList->a, pColumn->a[i].zName); + pParse->checkSchema = 1; + goto insert_cleanup; + } + } + } + } + + /* Figure out how many columns of data are supplied. If the data + ** is coming from a SELECT statement, then generate a co-routine that + ** produces a single row of the SELECT on each invocation. The + ** co-routine is the common header to the 3rd and 4th templates. + */ + if( pSelect ){ + /* Data is coming from a SELECT or from a multi-row VALUES clause. + ** Generate a co-routine to run the SELECT. */ + int regYield; /* Register holding co-routine entry-point */ + int addrTop; /* Top of the co-routine */ + int rc; /* Result code */ + + regYield = ++pParse->nMem; + addrTop = sqlite3VdbeCurrentAddr(v) + 1; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, addrTop); + sqlite3SelectDestInit(&dest, SRT_Coroutine, regYield); + dest.iSdst = bIdListInOrder ? regData : 0; + dest.nSdst = pTab->nCol; + rc = sqlite3Select(pParse, pSelect, &dest); + regFromSelect = dest.iSdst; + assert( db->pParse==pParse ); + if( rc || pParse->nErr ) goto insert_cleanup; + assert( db->mallocFailed==0 ); + sqlite3VdbeEndCoroutine(v, regYield); + sqlite3VdbeJumpHere(v, addrTop - 1); /* label B: */ + assert( pSelect->pEList ); + nColumn = pSelect->pEList->nExpr; + + /* Set useTempTable to TRUE if the result of the SELECT statement + ** should be written into a temporary table (template 4). Set to + ** FALSE if each output row of the SELECT can be written directly into + ** the destination table (template 3). + ** + ** A temp table must be used if the table being updated is also one + ** of the tables being read by the SELECT statement. Also use a + ** temp table in the case of row triggers. + */ + if( pTrigger || readsTable(pParse, iDb, pTab) ){ + useTempTable = 1; + } + + if( useTempTable ){ + /* Invoke the coroutine to extract information from the SELECT + ** and add it to a transient table srcTab. The code generated + ** here is from the 4th template: + ** + ** B: open temp table + ** L: yield X, goto M at EOF + ** insert row from R..R+n into temp table + ** goto L + ** M: ... + */ + int regRec; /* Register to hold packed record */ + int regTempRowid; /* Register to hold temp table ROWID */ + int addrL; /* Label "L" */ + + srcTab = pParse->nTab++; + regRec = sqlite3GetTempReg(pParse); + regTempRowid = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, srcTab, nColumn); + addrL = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm); VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regFromSelect, nColumn, regRec); + sqlite3VdbeAddOp2(v, OP_NewRowid, srcTab, regTempRowid); + sqlite3VdbeAddOp3(v, OP_Insert, srcTab, regRec, regTempRowid); + sqlite3VdbeGoto(v, addrL); + sqlite3VdbeJumpHere(v, addrL); + sqlite3ReleaseTempReg(pParse, regRec); + sqlite3ReleaseTempReg(pParse, regTempRowid); + } + }else{ + /* This is the case if the data for the INSERT is coming from a + ** single-row VALUES clause + */ + NameContext sNC; + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + srcTab = -1; + assert( useTempTable==0 ); + if( pList ){ + nColumn = pList->nExpr; + if( sqlite3ResolveExprListNames(&sNC, pList) ){ + goto insert_cleanup; + } + }else{ + nColumn = 0; + } + } + + /* If there is no IDLIST term but the table has an integer primary + ** key, the set the ipkColumn variable to the integer primary key + ** column index in the original table definition. + */ + if( pColumn==0 && nColumn>0 ){ + ipkColumn = pTab->iPKey; +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( ipkColumn>=0 && (pTab->tabFlags & TF_HasGenerated)!=0 ){ + testcase( pTab->tabFlags & TF_HasVirtual ); + testcase( pTab->tabFlags & TF_HasStored ); + for(i=ipkColumn-1; i>=0; i--){ + if( pTab->aCol[i].colFlags & COLFLAG_GENERATED ){ + testcase( pTab->aCol[i].colFlags & COLFLAG_VIRTUAL ); + testcase( pTab->aCol[i].colFlags & COLFLAG_STORED ); + ipkColumn--; + } + } + } +#endif + + /* Make sure the number of columns in the source data matches the number + ** of columns to be inserted into the table. + */ + assert( TF_HasHidden==COLFLAG_HIDDEN ); + assert( TF_HasGenerated==COLFLAG_GENERATED ); + assert( COLFLAG_NOINSERT==(COLFLAG_GENERATED|COLFLAG_HIDDEN) ); + if( (pTab->tabFlags & (TF_HasGenerated|TF_HasHidden))!=0 ){ + for(i=0; inCol; i++){ + if( pTab->aCol[i].colFlags & COLFLAG_NOINSERT ) nHidden++; + } + } + if( nColumn!=(pTab->nCol-nHidden) ){ + sqlite3ErrorMsg(pParse, + "table %S has %d columns but %d values were supplied", + pTabList->a, pTab->nCol-nHidden, nColumn); + goto insert_cleanup; + } + } + if( pColumn!=0 && nColumn!=pColumn->nId ){ + sqlite3ErrorMsg(pParse, "%d values for %d columns", nColumn, pColumn->nId); + goto insert_cleanup; + } + + /* Initialize the count of rows to be inserted + */ + if( (db->flags & SQLITE_CountRows)!=0 + && !pParse->nested + && !pParse->pTriggerTab + && !pParse->bReturning + ){ + regRowCount = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, regRowCount); + } + + /* If this is not a view, open the table and and all indices */ + if( !isView ){ + int nIdx; + nIdx = sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, 0, -1, 0, + &iDataCur, &iIdxCur); + aRegIdx = sqlite3DbMallocRawNN(db, sizeof(int)*(nIdx+2)); + if( aRegIdx==0 ){ + goto insert_cleanup; + } + for(i=0, pIdx=pTab->pIndex; ipNext, i++){ + assert( pIdx ); + aRegIdx[i] = ++pParse->nMem; + pParse->nMem += pIdx->nColumn; + } + aRegIdx[i] = ++pParse->nMem; /* Register to store the table record */ + } +#ifndef SQLITE_OMIT_UPSERT + if( pUpsert ){ + Upsert *pNx; + if( IsVirtual(pTab) ){ + sqlite3ErrorMsg(pParse, "UPSERT not implemented for virtual table \"%s\"", + pTab->zName); + goto insert_cleanup; + } + if( IsView(pTab) ){ + sqlite3ErrorMsg(pParse, "cannot UPSERT a view"); + goto insert_cleanup; + } + if( sqlite3HasExplicitNulls(pParse, pUpsert->pUpsertTarget) ){ + goto insert_cleanup; + } + pTabList->a[0].iCursor = iDataCur; + pNx = pUpsert; + do{ + pNx->pUpsertSrc = pTabList; + pNx->regData = regData; + pNx->iDataCur = iDataCur; + pNx->iIdxCur = iIdxCur; + if( pNx->pUpsertTarget ){ + if( sqlite3UpsertAnalyzeTarget(pParse, pTabList, pNx) ){ + goto insert_cleanup; + } + } + pNx = pNx->pNextUpsert; + }while( pNx!=0 ); + } +#endif + + + /* This is the top of the main insertion loop */ + if( useTempTable ){ + /* This block codes the top of loop only. The complete loop is the + ** following pseudocode (template 4): + ** + ** rewind temp table, if empty goto D + ** C: loop over rows of intermediate table + ** transfer values form intermediate table into
    + ** end loop + ** D: ... + */ + addrInsTop = sqlite3VdbeAddOp1(v, OP_Rewind, srcTab); VdbeCoverage(v); + addrCont = sqlite3VdbeCurrentAddr(v); + }else if( pSelect ){ + /* This block codes the top of loop only. The complete loop is the + ** following pseudocode (template 3): + ** + ** C: yield X, at EOF goto D + ** insert the select result into
    from R..R+n + ** goto C + ** D: ... + */ + sqlite3VdbeReleaseRegisters(pParse, regData, pTab->nCol, 0, 0); + addrInsTop = addrCont = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm); + VdbeCoverage(v); + if( ipkColumn>=0 ){ + /* tag-20191021-001: If the INTEGER PRIMARY KEY is being generated by the + ** SELECT, go ahead and copy the value into the rowid slot now, so that + ** the value does not get overwritten by a NULL at tag-20191021-002. */ + sqlite3VdbeAddOp2(v, OP_Copy, regFromSelect+ipkColumn, regRowid); + } + } + + /* Compute data for ordinary columns of the new entry. Values + ** are written in storage order into registers starting with regData. + ** Only ordinary columns are computed in this loop. The rowid + ** (if there is one) is computed later and generated columns are + ** computed after the rowid since they might depend on the value + ** of the rowid. + */ + nHidden = 0; + iRegStore = regData; assert( regData==regRowid+1 ); + for(i=0; inCol; i++, iRegStore++){ + int k; + u32 colFlags; + assert( i>=nHidden ); + if( i==pTab->iPKey ){ + /* tag-20191021-002: References to the INTEGER PRIMARY KEY are filled + ** using the rowid. So put a NULL in the IPK slot of the record to avoid + ** using excess space. The file format definition requires this extra + ** NULL - we cannot optimize further by skipping the column completely */ + sqlite3VdbeAddOp1(v, OP_SoftNull, iRegStore); + continue; + } + if( ((colFlags = pTab->aCol[i].colFlags) & COLFLAG_NOINSERT)!=0 ){ + nHidden++; + if( (colFlags & COLFLAG_VIRTUAL)!=0 ){ + /* Virtual columns do not participate in OP_MakeRecord. So back up + ** iRegStore by one slot to compensate for the iRegStore++ in the + ** outer for() loop */ + iRegStore--; + continue; + }else if( (colFlags & COLFLAG_STORED)!=0 ){ + /* Stored columns are computed later. But if there are BEFORE + ** triggers, the slots used for stored columns will be OP_Copy-ed + ** to a second block of registers, so the register needs to be + ** initialized to NULL to avoid an uninitialized register read */ + if( tmask & TRIGGER_BEFORE ){ + sqlite3VdbeAddOp1(v, OP_SoftNull, iRegStore); + } + continue; + }else if( pColumn==0 ){ + /* Hidden columns that are not explicitly named in the INSERT + ** get there default value */ + sqlite3ExprCodeFactorable(pParse, + sqlite3ColumnExpr(pTab, &pTab->aCol[i]), + iRegStore); + continue; + } + } + if( pColumn ){ + for(j=0; jnId && pColumn->a[j].idx!=i; j++){} + if( j>=pColumn->nId ){ + /* A column not named in the insert column list gets its + ** default value */ + sqlite3ExprCodeFactorable(pParse, + sqlite3ColumnExpr(pTab, &pTab->aCol[i]), + iRegStore); + continue; + } + k = j; + }else if( nColumn==0 ){ + /* This is INSERT INTO ... DEFAULT VALUES. Load the default value. */ + sqlite3ExprCodeFactorable(pParse, + sqlite3ColumnExpr(pTab, &pTab->aCol[i]), + iRegStore); + continue; + }else{ + k = i - nHidden; + } + + if( useTempTable ){ + sqlite3VdbeAddOp3(v, OP_Column, srcTab, k, iRegStore); + }else if( pSelect ){ + if( regFromSelect!=regData ){ + sqlite3VdbeAddOp2(v, OP_SCopy, regFromSelect+k, iRegStore); + } + }else{ + sqlite3ExprCode(pParse, pList->a[k].pExpr, iRegStore); + } + } + + + /* Run the BEFORE and INSTEAD OF triggers, if there are any + */ + endOfLoop = sqlite3VdbeMakeLabel(pParse); + if( tmask & TRIGGER_BEFORE ){ + int regCols = sqlite3GetTempRange(pParse, pTab->nCol+1); + + /* build the NEW.* reference row. Note that if there is an INTEGER + ** PRIMARY KEY into which a NULL is being inserted, that NULL will be + ** translated into a unique ID for the row. But on a BEFORE trigger, + ** we do not know what the unique ID will be (because the insert has + ** not happened yet) so we substitute a rowid of -1 + */ + if( ipkColumn<0 ){ + sqlite3VdbeAddOp2(v, OP_Integer, -1, regCols); + }else{ + int addr1; + assert( !withoutRowid ); + if( useTempTable ){ + sqlite3VdbeAddOp3(v, OP_Column, srcTab, ipkColumn, regCols); + }else{ + assert( pSelect==0 ); /* Otherwise useTempTable is true */ + sqlite3ExprCode(pParse, pList->a[ipkColumn].pExpr, regCols); + } + addr1 = sqlite3VdbeAddOp1(v, OP_NotNull, regCols); VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_Integer, -1, regCols); + sqlite3VdbeJumpHere(v, addr1); + sqlite3VdbeAddOp1(v, OP_MustBeInt, regCols); VdbeCoverage(v); + } + + /* Copy the new data already generated. */ + assert( pTab->nNVCol>0 ); + sqlite3VdbeAddOp3(v, OP_Copy, regRowid+1, regCols+1, pTab->nNVCol-1); + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + /* Compute the new value for generated columns after all other + ** columns have already been computed. This must be done after + ** computing the ROWID in case one of the generated columns + ** refers to the ROWID. */ + if( pTab->tabFlags & TF_HasGenerated ){ + testcase( pTab->tabFlags & TF_HasVirtual ); + testcase( pTab->tabFlags & TF_HasStored ); + sqlite3ComputeGeneratedColumns(pParse, regCols+1, pTab); + } +#endif + + /* If this is an INSERT on a view with an INSTEAD OF INSERT trigger, + ** do not attempt any conversions before assembling the record. + ** If this is a real table, attempt conversions as required by the + ** table column affinities. + */ + if( !isView ){ + sqlite3TableAffinity(v, pTab, regCols+1); + } + + /* Fire BEFORE or INSTEAD OF triggers */ + sqlite3CodeRowTrigger(pParse, pTrigger, TK_INSERT, 0, TRIGGER_BEFORE, + pTab, regCols-pTab->nCol-1, onError, endOfLoop); + + sqlite3ReleaseTempRange(pParse, regCols, pTab->nCol+1); + } + + if( !isView ){ + if( IsVirtual(pTab) ){ + /* The row that the VUpdate opcode will delete: none */ + sqlite3VdbeAddOp2(v, OP_Null, 0, regIns); + } + if( ipkColumn>=0 ){ + /* Compute the new rowid */ + if( useTempTable ){ + sqlite3VdbeAddOp3(v, OP_Column, srcTab, ipkColumn, regRowid); + }else if( pSelect ){ + /* Rowid already initialized at tag-20191021-001 */ + }else{ + Expr *pIpk = pList->a[ipkColumn].pExpr; + if( pIpk->op==TK_NULL && !IsVirtual(pTab) ){ + sqlite3VdbeAddOp3(v, OP_NewRowid, iDataCur, regRowid, regAutoinc); + appendFlag = 1; + }else{ + sqlite3ExprCode(pParse, pList->a[ipkColumn].pExpr, regRowid); + } + } + /* If the PRIMARY KEY expression is NULL, then use OP_NewRowid + ** to generate a unique primary key value. + */ + if( !appendFlag ){ + int addr1; + if( !IsVirtual(pTab) ){ + addr1 = sqlite3VdbeAddOp1(v, OP_NotNull, regRowid); VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_NewRowid, iDataCur, regRowid, regAutoinc); + sqlite3VdbeJumpHere(v, addr1); + }else{ + addr1 = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_IsNull, regRowid, addr1+2); VdbeCoverage(v); + } + sqlite3VdbeAddOp1(v, OP_MustBeInt, regRowid); VdbeCoverage(v); + } + }else if( IsVirtual(pTab) || withoutRowid ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regRowid); + }else{ + sqlite3VdbeAddOp3(v, OP_NewRowid, iDataCur, regRowid, regAutoinc); + appendFlag = 1; + } + autoIncStep(pParse, regAutoinc, regRowid); + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + /* Compute the new value for generated columns after all other + ** columns have already been computed. This must be done after + ** computing the ROWID in case one of the generated columns + ** is derived from the INTEGER PRIMARY KEY. */ + if( pTab->tabFlags & TF_HasGenerated ){ + sqlite3ComputeGeneratedColumns(pParse, regRowid+1, pTab); + } +#endif + + /* Generate code to check constraints and generate index keys and + ** do the insertion. + */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pTab) ){ + const char *pVTab = (const char *)sqlite3GetVTable(db, pTab); + sqlite3VtabMakeWritable(pParse, pTab); + sqlite3VdbeAddOp4(v, OP_VUpdate, 1, pTab->nCol+2, regIns, pVTab, P4_VTAB); + sqlite3VdbeChangeP5(v, onError==OE_Default ? OE_Abort : onError); + sqlite3MayAbort(pParse); + }else +#endif + { + int isReplace = 0;/* Set to true if constraints may cause a replace */ + int bUseSeek; /* True to use OPFLAG_SEEKRESULT */ + sqlite3GenerateConstraintChecks(pParse, pTab, aRegIdx, iDataCur, iIdxCur, + regIns, 0, ipkColumn>=0, onError, endOfLoop, &isReplace, 0, pUpsert + ); + sqlite3FkCheck(pParse, pTab, 0, regIns, 0, 0); + + /* Set the OPFLAG_USESEEKRESULT flag if either (a) there are no REPLACE + ** constraints or (b) there are no triggers and this table is not a + ** parent table in a foreign key constraint. It is safe to set the + ** flag in the second case as if any REPLACE constraint is hit, an + ** OP_Delete or OP_IdxDelete instruction will be executed on each + ** cursor that is disturbed. And these instructions both clear the + ** VdbeCursor.seekResult variable, disabling the OPFLAG_USESEEKRESULT + ** functionality. */ + bUseSeek = (isReplace==0 || !sqlite3VdbeHasSubProgram(v)); + sqlite3CompleteInsertion(pParse, pTab, iDataCur, iIdxCur, + regIns, aRegIdx, 0, appendFlag, bUseSeek + ); + } +#ifdef SQLITE_ALLOW_ROWID_IN_VIEW + }else if( pParse->bReturning ){ + /* If there is a RETURNING clause, populate the rowid register with + ** constant value -1, in case one or more of the returned expressions + ** refer to the "rowid" of the view. */ + sqlite3VdbeAddOp2(v, OP_Integer, -1, regRowid); +#endif + } + + /* Update the count of rows that are inserted + */ + if( regRowCount ){ + sqlite3VdbeAddOp2(v, OP_AddImm, regRowCount, 1); + } + + if( pTrigger ){ + /* Code AFTER triggers */ + sqlite3CodeRowTrigger(pParse, pTrigger, TK_INSERT, 0, TRIGGER_AFTER, + pTab, regData-2-pTab->nCol, onError, endOfLoop); + } + + /* The bottom of the main insertion loop, if the data source + ** is a SELECT statement. + */ + sqlite3VdbeResolveLabel(v, endOfLoop); + if( useTempTable ){ + sqlite3VdbeAddOp2(v, OP_Next, srcTab, addrCont); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addrInsTop); + sqlite3VdbeAddOp1(v, OP_Close, srcTab); + }else if( pSelect ){ + sqlite3VdbeGoto(v, addrCont); +#ifdef SQLITE_DEBUG + /* If we are jumping back to an OP_Yield that is preceded by an + ** OP_ReleaseReg, set the p5 flag on the OP_Goto so that the + ** OP_ReleaseReg will be included in the loop. */ + if( sqlite3VdbeGetOp(v, addrCont-1)->opcode==OP_ReleaseReg ){ + assert( sqlite3VdbeGetOp(v, addrCont)->opcode==OP_Yield ); + sqlite3VdbeChangeP5(v, 1); + } +#endif + sqlite3VdbeJumpHere(v, addrInsTop); + } + +#ifndef SQLITE_OMIT_XFER_OPT +insert_end: +#endif /* SQLITE_OMIT_XFER_OPT */ + /* Update the sqlite_sequence table by storing the content of the + ** maximum rowid counter values recorded while inserting into + ** autoincrement tables. + */ + if( pParse->nested==0 && pParse->pTriggerTab==0 ){ + sqlite3AutoincrementEnd(pParse); + } + + /* + ** Return the number of rows inserted. If this routine is + ** generating code because of a call to sqlite3NestedParse(), do not + ** invoke the callback function. + */ + if( regRowCount ){ + sqlite3CodeChangeCount(v, regRowCount, "rows inserted"); + } + +insert_cleanup: + sqlite3SrcListDelete(db, pTabList); + sqlite3ExprListDelete(db, pList); + sqlite3UpsertDelete(db, pUpsert); + sqlite3SelectDelete(db, pSelect); + sqlite3IdListDelete(db, pColumn); + sqlite3DbFree(db, aRegIdx); +} + +/* Make sure "isView" and other macros defined above are undefined. Otherwise +** they may interfere with compilation of other functions in this file +** (or in another file, if this file becomes part of the amalgamation). */ +#ifdef isView + #undef isView +#endif +#ifdef pTrigger + #undef pTrigger +#endif +#ifdef tmask + #undef tmask +#endif + +/* +** Meanings of bits in of pWalker->eCode for +** sqlite3ExprReferencesUpdatedColumn() +*/ +#define CKCNSTRNT_COLUMN 0x01 /* CHECK constraint uses a changing column */ +#define CKCNSTRNT_ROWID 0x02 /* CHECK constraint references the ROWID */ + +/* This is the Walker callback from sqlite3ExprReferencesUpdatedColumn(). +* Set bit 0x01 of pWalker->eCode if pWalker->eCode to 0 and if this +** expression node references any of the +** columns that are being modifed by an UPDATE statement. +*/ +static int checkConstraintExprNode(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_COLUMN ){ + assert( pExpr->iColumn>=0 || pExpr->iColumn==-1 ); + if( pExpr->iColumn>=0 ){ + if( pWalker->u.aiCol[pExpr->iColumn]>=0 ){ + pWalker->eCode |= CKCNSTRNT_COLUMN; + } + }else{ + pWalker->eCode |= CKCNSTRNT_ROWID; + } + } + return WRC_Continue; +} + +/* +** pExpr is a CHECK constraint on a row that is being UPDATE-ed. The +** only columns that are modified by the UPDATE are those for which +** aiChng[i]>=0, and also the ROWID is modified if chngRowid is true. +** +** Return true if CHECK constraint pExpr uses any of the +** changing columns (or the rowid if it is changing). In other words, +** return true if this CHECK constraint must be validated for +** the new row in the UPDATE statement. +** +** 2018-09-15: pExpr might also be an expression for an index-on-expressions. +** The operation of this routine is the same - return true if an only if +** the expression uses one or more of columns identified by the second and +** third arguments. +*/ +SQLITE_PRIVATE int sqlite3ExprReferencesUpdatedColumn( + Expr *pExpr, /* The expression to be checked */ + int *aiChng, /* aiChng[x]>=0 if column x changed by the UPDATE */ + int chngRowid /* True if UPDATE changes the rowid */ +){ + Walker w; + memset(&w, 0, sizeof(w)); + w.eCode = 0; + w.xExprCallback = checkConstraintExprNode; + w.u.aiCol = aiChng; + sqlite3WalkExpr(&w, pExpr); + if( !chngRowid ){ + testcase( (w.eCode & CKCNSTRNT_ROWID)!=0 ); + w.eCode &= ~CKCNSTRNT_ROWID; + } + testcase( w.eCode==0 ); + testcase( w.eCode==CKCNSTRNT_COLUMN ); + testcase( w.eCode==CKCNSTRNT_ROWID ); + testcase( w.eCode==(CKCNSTRNT_ROWID|CKCNSTRNT_COLUMN) ); + return w.eCode!=0; +} + +/* +** The sqlite3GenerateConstraintChecks() routine usually wants to visit +** the indexes of a table in the order provided in the Table->pIndex list. +** However, sometimes (rarely - when there is an upsert) it wants to visit +** the indexes in a different order. The following data structures accomplish +** this. +** +** The IndexIterator object is used to walk through all of the indexes +** of a table in either Index.pNext order, or in some other order established +** by an array of IndexListTerm objects. +*/ +typedef struct IndexListTerm IndexListTerm; +typedef struct IndexIterator IndexIterator; +struct IndexIterator { + int eType; /* 0 for Index.pNext list. 1 for an array of IndexListTerm */ + int i; /* Index of the current item from the list */ + union { + struct { /* Use this object for eType==0: A Index.pNext list */ + Index *pIdx; /* The current Index */ + } lx; + struct { /* Use this object for eType==1; Array of IndexListTerm */ + int nIdx; /* Size of the array */ + IndexListTerm *aIdx; /* Array of IndexListTerms */ + } ax; + } u; +}; + +/* When IndexIterator.eType==1, then each index is an array of instances +** of the following object +*/ +struct IndexListTerm { + Index *p; /* The index */ + int ix; /* Which entry in the original Table.pIndex list is this index*/ +}; + +/* Return the first index on the list */ +static Index *indexIteratorFirst(IndexIterator *pIter, int *pIx){ + assert( pIter->i==0 ); + if( pIter->eType ){ + *pIx = pIter->u.ax.aIdx[0].ix; + return pIter->u.ax.aIdx[0].p; + }else{ + *pIx = 0; + return pIter->u.lx.pIdx; + } +} + +/* Return the next index from the list. Return NULL when out of indexes */ +static Index *indexIteratorNext(IndexIterator *pIter, int *pIx){ + if( pIter->eType ){ + int i = ++pIter->i; + if( i>=pIter->u.ax.nIdx ){ + *pIx = i; + return 0; + } + *pIx = pIter->u.ax.aIdx[i].ix; + return pIter->u.ax.aIdx[i].p; + }else{ + ++(*pIx); + pIter->u.lx.pIdx = pIter->u.lx.pIdx->pNext; + return pIter->u.lx.pIdx; + } +} + +/* +** Generate code to do constraint checks prior to an INSERT or an UPDATE +** on table pTab. +** +** The regNewData parameter is the first register in a range that contains +** the data to be inserted or the data after the update. There will be +** pTab->nCol+1 registers in this range. The first register (the one +** that regNewData points to) will contain the new rowid, or NULL in the +** case of a WITHOUT ROWID table. The second register in the range will +** contain the content of the first table column. The third register will +** contain the content of the second table column. And so forth. +** +** The regOldData parameter is similar to regNewData except that it contains +** the data prior to an UPDATE rather than afterwards. regOldData is zero +** for an INSERT. This routine can distinguish between UPDATE and INSERT by +** checking regOldData for zero. +** +** For an UPDATE, the pkChng boolean is true if the true primary key (the +** rowid for a normal table or the PRIMARY KEY for a WITHOUT ROWID table) +** might be modified by the UPDATE. If pkChng is false, then the key of +** the iDataCur content table is guaranteed to be unchanged by the UPDATE. +** +** For an INSERT, the pkChng boolean indicates whether or not the rowid +** was explicitly specified as part of the INSERT statement. If pkChng +** is zero, it means that the either rowid is computed automatically or +** that the table is a WITHOUT ROWID table and has no rowid. On an INSERT, +** pkChng will only be true if the INSERT statement provides an integer +** value for either the rowid column or its INTEGER PRIMARY KEY alias. +** +** The code generated by this routine will store new index entries into +** registers identified by aRegIdx[]. No index entry is created for +** indices where aRegIdx[i]==0. The order of indices in aRegIdx[] is +** the same as the order of indices on the linked list of indices +** at pTab->pIndex. +** +** (2019-05-07) The generated code also creates a new record for the +** main table, if pTab is a rowid table, and stores that record in the +** register identified by aRegIdx[nIdx] - in other words in the first +** entry of aRegIdx[] past the last index. It is important that the +** record be generated during constraint checks to avoid affinity changes +** to the register content that occur after constraint checks but before +** the new record is inserted. +** +** The caller must have already opened writeable cursors on the main +** table and all applicable indices (that is to say, all indices for which +** aRegIdx[] is not zero). iDataCur is the cursor for the main table when +** inserting or updating a rowid table, or the cursor for the PRIMARY KEY +** index when operating on a WITHOUT ROWID table. iIdxCur is the cursor +** for the first index in the pTab->pIndex list. Cursors for other indices +** are at iIdxCur+N for the N-th element of the pTab->pIndex list. +** +** This routine also generates code to check constraints. NOT NULL, +** CHECK, and UNIQUE constraints are all checked. If a constraint fails, +** then the appropriate action is performed. There are five possible +** actions: ROLLBACK, ABORT, FAIL, REPLACE, and IGNORE. +** +** Constraint type Action What Happens +** --------------- ---------- ---------------------------------------- +** any ROLLBACK The current transaction is rolled back and +** sqlite3_step() returns immediately with a +** return code of SQLITE_CONSTRAINT. +** +** any ABORT Back out changes from the current command +** only (do not do a complete rollback) then +** cause sqlite3_step() to return immediately +** with SQLITE_CONSTRAINT. +** +** any FAIL Sqlite3_step() returns immediately with a +** return code of SQLITE_CONSTRAINT. The +** transaction is not rolled back and any +** changes to prior rows are retained. +** +** any IGNORE The attempt in insert or update the current +** row is skipped, without throwing an error. +** Processing continues with the next row. +** (There is an immediate jump to ignoreDest.) +** +** NOT NULL REPLACE The NULL value is replace by the default +** value for that column. If the default value +** is NULL, the action is the same as ABORT. +** +** UNIQUE REPLACE The other row that conflicts with the row +** being inserted is removed. +** +** CHECK REPLACE Illegal. The results in an exception. +** +** Which action to take is determined by the overrideError parameter. +** Or if overrideError==OE_Default, then the pParse->onError parameter +** is used. Or if pParse->onError==OE_Default then the onError value +** for the constraint is used. +*/ +SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( + Parse *pParse, /* The parser context */ + Table *pTab, /* The table being inserted or updated */ + int *aRegIdx, /* Use register aRegIdx[i] for index i. 0 for unused */ + int iDataCur, /* Canonical data cursor (main table or PK index) */ + int iIdxCur, /* First index cursor */ + int regNewData, /* First register in a range holding values to insert */ + int regOldData, /* Previous content. 0 for INSERTs */ + u8 pkChng, /* Non-zero if the rowid or PRIMARY KEY changed */ + u8 overrideError, /* Override onError to this if not OE_Default */ + int ignoreDest, /* Jump to this label on an OE_Ignore resolution */ + int *pbMayReplace, /* OUT: Set to true if constraint may cause a replace */ + int *aiChng, /* column i is unchanged if aiChng[i]<0 */ + Upsert *pUpsert /* ON CONFLICT clauses, if any. NULL otherwise */ +){ + Vdbe *v; /* VDBE under constrution */ + Index *pIdx; /* Pointer to one of the indices */ + Index *pPk = 0; /* The PRIMARY KEY index for WITHOUT ROWID tables */ + sqlite3 *db; /* Database connection */ + int i; /* loop counter */ + int ix; /* Index loop counter */ + int nCol; /* Number of columns */ + int onError; /* Conflict resolution strategy */ + int seenReplace = 0; /* True if REPLACE is used to resolve INT PK conflict */ + int nPkField; /* Number of fields in PRIMARY KEY. 1 for ROWID tables */ + Upsert *pUpsertClause = 0; /* The specific ON CONFLICT clause for pIdx */ + u8 isUpdate; /* True if this is an UPDATE operation */ + u8 bAffinityDone = 0; /* True if the OP_Affinity operation has been run */ + int upsertIpkReturn = 0; /* Address of Goto at end of IPK uniqueness check */ + int upsertIpkDelay = 0; /* Address of Goto to bypass initial IPK check */ + int ipkTop = 0; /* Top of the IPK uniqueness check */ + int ipkBottom = 0; /* OP_Goto at the end of the IPK uniqueness check */ + /* Variables associated with retesting uniqueness constraints after + ** replace triggers fire have run */ + int regTrigCnt; /* Register used to count replace trigger invocations */ + int addrRecheck = 0; /* Jump here to recheck all uniqueness constraints */ + int lblRecheckOk = 0; /* Each recheck jumps to this label if it passes */ + Trigger *pTrigger; /* List of DELETE triggers on the table pTab */ + int nReplaceTrig = 0; /* Number of replace triggers coded */ + IndexIterator sIdxIter; /* Index iterator */ + + isUpdate = regOldData!=0; + db = pParse->db; + v = pParse->pVdbe; + assert( v!=0 ); + assert( !IsView(pTab) ); /* This table is not a VIEW */ + nCol = pTab->nCol; + + /* pPk is the PRIMARY KEY index for WITHOUT ROWID tables and NULL for + ** normal rowid tables. nPkField is the number of key fields in the + ** pPk index or 1 for a rowid table. In other words, nPkField is the + ** number of fields in the true primary key of the table. */ + if( HasRowid(pTab) ){ + pPk = 0; + nPkField = 1; + }else{ + pPk = sqlite3PrimaryKeyIndex(pTab); + nPkField = pPk->nKeyCol; + } + + /* Record that this module has started */ + VdbeModuleComment((v, "BEGIN: GenCnstCks(%d,%d,%d,%d,%d)", + iDataCur, iIdxCur, regNewData, regOldData, pkChng)); + + /* Test all NOT NULL constraints. + */ + if( pTab->tabFlags & TF_HasNotNull ){ + int b2ndPass = 0; /* True if currently running 2nd pass */ + int nSeenReplace = 0; /* Number of ON CONFLICT REPLACE operations */ + int nGenerated = 0; /* Number of generated columns with NOT NULL */ + while(1){ /* Make 2 passes over columns. Exit loop via "break" */ + for(i=0; iaCol[i]; /* The column to check for NOT NULL */ + int isGenerated; /* non-zero if column is generated */ + onError = pCol->notNull; + if( onError==OE_None ) continue; /* No NOT NULL on this column */ + if( i==pTab->iPKey ){ + continue; /* ROWID is never NULL */ + } + isGenerated = pCol->colFlags & COLFLAG_GENERATED; + if( isGenerated && !b2ndPass ){ + nGenerated++; + continue; /* Generated columns processed on 2nd pass */ + } + if( aiChng && aiChng[i]<0 && !isGenerated ){ + /* Do not check NOT NULL on columns that do not change */ + continue; + } + if( overrideError!=OE_Default ){ + onError = overrideError; + }else if( onError==OE_Default ){ + onError = OE_Abort; + } + if( onError==OE_Replace ){ + if( b2ndPass /* REPLACE becomes ABORT on the 2nd pass */ + || pCol->iDflt==0 /* REPLACE is ABORT if no DEFAULT value */ + ){ + testcase( pCol->colFlags & COLFLAG_VIRTUAL ); + testcase( pCol->colFlags & COLFLAG_STORED ); + testcase( pCol->colFlags & COLFLAG_GENERATED ); + onError = OE_Abort; + }else{ + assert( !isGenerated ); + } + }else if( b2ndPass && !isGenerated ){ + continue; + } + assert( onError==OE_Rollback || onError==OE_Abort || onError==OE_Fail + || onError==OE_Ignore || onError==OE_Replace ); + testcase( i!=sqlite3TableColumnToStorage(pTab, i) ); + iReg = sqlite3TableColumnToStorage(pTab, i) + regNewData + 1; + switch( onError ){ + case OE_Replace: { + int addr1 = sqlite3VdbeAddOp1(v, OP_NotNull, iReg); + VdbeCoverage(v); + assert( (pCol->colFlags & COLFLAG_GENERATED)==0 ); + nSeenReplace++; + sqlite3ExprCodeCopy(pParse, + sqlite3ColumnExpr(pTab, pCol), iReg); + sqlite3VdbeJumpHere(v, addr1); + break; + } + case OE_Abort: + sqlite3MayAbort(pParse); + /* no break */ deliberate_fall_through + case OE_Rollback: + case OE_Fail: { + char *zMsg = sqlite3MPrintf(db, "%s.%s", pTab->zName, + pCol->zCnName); + sqlite3VdbeAddOp3(v, OP_HaltIfNull, SQLITE_CONSTRAINT_NOTNULL, + onError, iReg); + sqlite3VdbeAppendP4(v, zMsg, P4_DYNAMIC); + sqlite3VdbeChangeP5(v, P5_ConstraintNotNull); + VdbeCoverage(v); + break; + } + default: { + assert( onError==OE_Ignore ); + sqlite3VdbeAddOp2(v, OP_IsNull, iReg, ignoreDest); + VdbeCoverage(v); + break; + } + } /* end switch(onError) */ + } /* end loop i over columns */ + if( nGenerated==0 && nSeenReplace==0 ){ + /* If there are no generated columns with NOT NULL constraints + ** and no NOT NULL ON CONFLICT REPLACE constraints, then a single + ** pass is sufficient */ + break; + } + if( b2ndPass ) break; /* Never need more than 2 passes */ + b2ndPass = 1; +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( nSeenReplace>0 && (pTab->tabFlags & TF_HasGenerated)!=0 ){ + /* If any NOT NULL ON CONFLICT REPLACE constraints fired on the + ** first pass, recomputed values for all generated columns, as + ** those values might depend on columns affected by the REPLACE. + */ + sqlite3ComputeGeneratedColumns(pParse, regNewData+1, pTab); + } +#endif + } /* end of 2-pass loop */ + } /* end if( has-not-null-constraints ) */ + + /* Test all CHECK constraints + */ +#ifndef SQLITE_OMIT_CHECK + if( pTab->pCheck && (db->flags & SQLITE_IgnoreChecks)==0 ){ + ExprList *pCheck = pTab->pCheck; + pParse->iSelfTab = -(regNewData+1); + onError = overrideError!=OE_Default ? overrideError : OE_Abort; + for(i=0; inExpr; i++){ + int allOk; + Expr *pCopy; + Expr *pExpr = pCheck->a[i].pExpr; + if( aiChng + && !sqlite3ExprReferencesUpdatedColumn(pExpr, aiChng, pkChng) + ){ + /* The check constraints do not reference any of the columns being + ** updated so there is no point it verifying the check constraint */ + continue; + } + if( bAffinityDone==0 ){ + sqlite3TableAffinity(v, pTab, regNewData+1); + bAffinityDone = 1; + } + allOk = sqlite3VdbeMakeLabel(pParse); + sqlite3VdbeVerifyAbortable(v, onError); + pCopy = sqlite3ExprDup(db, pExpr, 0); + if( !db->mallocFailed ){ + sqlite3ExprIfTrue(pParse, pCopy, allOk, SQLITE_JUMPIFNULL); + } + sqlite3ExprDelete(db, pCopy); + if( onError==OE_Ignore ){ + sqlite3VdbeGoto(v, ignoreDest); + }else{ + char *zName = pCheck->a[i].zEName; + assert( zName!=0 || pParse->db->mallocFailed ); + if( onError==OE_Replace ) onError = OE_Abort; /* IMP: R-26383-51744 */ + sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_CHECK, + onError, zName, P4_TRANSIENT, + P5_ConstraintCheck); + } + sqlite3VdbeResolveLabel(v, allOk); + } + pParse->iSelfTab = 0; + } +#endif /* !defined(SQLITE_OMIT_CHECK) */ + + /* UNIQUE and PRIMARY KEY constraints should be handled in the following + ** order: + ** + ** (1) OE_Update + ** (2) OE_Abort, OE_Fail, OE_Rollback, OE_Ignore + ** (3) OE_Replace + ** + ** OE_Fail and OE_Ignore must happen before any changes are made. + ** OE_Update guarantees that only a single row will change, so it + ** must happen before OE_Replace. Technically, OE_Abort and OE_Rollback + ** could happen in any order, but they are grouped up front for + ** convenience. + ** + ** 2018-08-14: Ticket https://www.sqlite.org/src/info/908f001483982c43 + ** The order of constraints used to have OE_Update as (2) and OE_Abort + ** and so forth as (1). But apparently PostgreSQL checks the OE_Update + ** constraint before any others, so it had to be moved. + ** + ** Constraint checking code is generated in this order: + ** (A) The rowid constraint + ** (B) Unique index constraints that do not have OE_Replace as their + ** default conflict resolution strategy + ** (C) Unique index that do use OE_Replace by default. + ** + ** The ordering of (2) and (3) is accomplished by making sure the linked + ** list of indexes attached to a table puts all OE_Replace indexes last + ** in the list. See sqlite3CreateIndex() for where that happens. + */ + sIdxIter.eType = 0; + sIdxIter.i = 0; + sIdxIter.u.ax.aIdx = 0; /* Silence harmless compiler warning */ + sIdxIter.u.lx.pIdx = pTab->pIndex; + if( pUpsert ){ + if( pUpsert->pUpsertTarget==0 ){ + /* There is just on ON CONFLICT clause and it has no constraint-target */ + assert( pUpsert->pNextUpsert==0 ); + if( pUpsert->isDoUpdate==0 ){ + /* A single ON CONFLICT DO NOTHING clause, without a constraint-target. + ** Make all unique constraint resolution be OE_Ignore */ + overrideError = OE_Ignore; + pUpsert = 0; + }else{ + /* A single ON CONFLICT DO UPDATE. Make all resolutions OE_Update */ + overrideError = OE_Update; + } + }else if( pTab->pIndex!=0 ){ + /* Otherwise, we'll need to run the IndexListTerm array version of the + ** iterator to ensure that all of the ON CONFLICT conditions are + ** checked first and in order. */ + int nIdx, jj; + u64 nByte; + Upsert *pTerm; + u8 *bUsed; + for(nIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nIdx++){ + assert( aRegIdx[nIdx]>0 ); + } + sIdxIter.eType = 1; + sIdxIter.u.ax.nIdx = nIdx; + nByte = (sizeof(IndexListTerm)+1)*nIdx + nIdx; + sIdxIter.u.ax.aIdx = sqlite3DbMallocZero(db, nByte); + if( sIdxIter.u.ax.aIdx==0 ) return; /* OOM */ + bUsed = (u8*)&sIdxIter.u.ax.aIdx[nIdx]; + pUpsert->pToFree = sIdxIter.u.ax.aIdx; + for(i=0, pTerm=pUpsert; pTerm; pTerm=pTerm->pNextUpsert){ + if( pTerm->pUpsertTarget==0 ) break; + if( pTerm->pUpsertIdx==0 ) continue; /* Skip ON CONFLICT for the IPK */ + jj = 0; + pIdx = pTab->pIndex; + while( ALWAYS(pIdx!=0) && pIdx!=pTerm->pUpsertIdx ){ + pIdx = pIdx->pNext; + jj++; + } + if( bUsed[jj] ) continue; /* Duplicate ON CONFLICT clause ignored */ + bUsed[jj] = 1; + sIdxIter.u.ax.aIdx[i].p = pIdx; + sIdxIter.u.ax.aIdx[i].ix = jj; + i++; + } + for(jj=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, jj++){ + if( bUsed[jj] ) continue; + sIdxIter.u.ax.aIdx[i].p = pIdx; + sIdxIter.u.ax.aIdx[i].ix = jj; + i++; + } + assert( i==nIdx ); + } + } + + /* Determine if it is possible that triggers (either explicitly coded + ** triggers or FK resolution actions) might run as a result of deletes + ** that happen when OE_Replace conflict resolution occurs. (Call these + ** "replace triggers".) If any replace triggers run, we will need to + ** recheck all of the uniqueness constraints after they have all run. + ** But on the recheck, the resolution is OE_Abort instead of OE_Replace. + ** + ** If replace triggers are a possibility, then + ** + ** (1) Allocate register regTrigCnt and initialize it to zero. + ** That register will count the number of replace triggers that + ** fire. Constraint recheck only occurs if the number is positive. + ** (2) Initialize pTrigger to the list of all DELETE triggers on pTab. + ** (3) Initialize addrRecheck and lblRecheckOk + ** + ** The uniqueness rechecking code will create a series of tests to run + ** in a second pass. The addrRecheck and lblRecheckOk variables are + ** used to link together these tests which are separated from each other + ** in the generate bytecode. + */ + if( (db->flags & (SQLITE_RecTriggers|SQLITE_ForeignKeys))==0 ){ + /* There are not DELETE triggers nor FK constraints. No constraint + ** rechecks are needed. */ + pTrigger = 0; + regTrigCnt = 0; + }else{ + if( db->flags&SQLITE_RecTriggers ){ + pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0); + regTrigCnt = pTrigger!=0 || sqlite3FkRequired(pParse, pTab, 0, 0); + }else{ + pTrigger = 0; + regTrigCnt = sqlite3FkRequired(pParse, pTab, 0, 0); + } + if( regTrigCnt ){ + /* Replace triggers might exist. Allocate the counter and + ** initialize it to zero. */ + regTrigCnt = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, regTrigCnt); + VdbeComment((v, "trigger count")); + lblRecheckOk = sqlite3VdbeMakeLabel(pParse); + addrRecheck = lblRecheckOk; + } + } + + /* If rowid is changing, make sure the new rowid does not previously + ** exist in the table. + */ + if( pkChng && pPk==0 ){ + int addrRowidOk = sqlite3VdbeMakeLabel(pParse); + + /* Figure out what action to take in case of a rowid collision */ + onError = pTab->keyConf; + if( overrideError!=OE_Default ){ + onError = overrideError; + }else if( onError==OE_Default ){ + onError = OE_Abort; + } + + /* figure out whether or not upsert applies in this case */ + if( pUpsert ){ + pUpsertClause = sqlite3UpsertOfIndex(pUpsert,0); + if( pUpsertClause!=0 ){ + if( pUpsertClause->isDoUpdate==0 ){ + onError = OE_Ignore; /* DO NOTHING is the same as INSERT OR IGNORE */ + }else{ + onError = OE_Update; /* DO UPDATE */ + } + } + if( pUpsertClause!=pUpsert ){ + /* The first ON CONFLICT clause has a conflict target other than + ** the IPK. We have to jump ahead to that first ON CONFLICT clause + ** and then come back here and deal with the IPK afterwards */ + upsertIpkDelay = sqlite3VdbeAddOp0(v, OP_Goto); + } + } + + /* If the response to a rowid conflict is REPLACE but the response + ** to some other UNIQUE constraint is FAIL or IGNORE, then we need + ** to defer the running of the rowid conflict checking until after + ** the UNIQUE constraints have run. + */ + if( onError==OE_Replace /* IPK rule is REPLACE */ + && onError!=overrideError /* Rules for other constraints are different */ + && pTab->pIndex /* There exist other constraints */ + && !upsertIpkDelay /* IPK check already deferred by UPSERT */ + ){ + ipkTop = sqlite3VdbeAddOp0(v, OP_Goto)+1; + VdbeComment((v, "defer IPK REPLACE until last")); + } + + if( isUpdate ){ + /* pkChng!=0 does not mean that the rowid has changed, only that + ** it might have changed. Skip the conflict logic below if the rowid + ** is unchanged. */ + sqlite3VdbeAddOp3(v, OP_Eq, regNewData, addrRowidOk, regOldData); + sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); + VdbeCoverage(v); + } + + /* Check to see if the new rowid already exists in the table. Skip + ** the following conflict logic if it does not. */ + VdbeNoopComment((v, "uniqueness check for ROWID")); + sqlite3VdbeVerifyAbortable(v, onError); + sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRowidOk, regNewData); + VdbeCoverage(v); + + switch( onError ){ + default: { + onError = OE_Abort; + /* no break */ deliberate_fall_through + } + case OE_Rollback: + case OE_Abort: + case OE_Fail: { + testcase( onError==OE_Rollback ); + testcase( onError==OE_Abort ); + testcase( onError==OE_Fail ); + sqlite3RowidConstraint(pParse, onError, pTab); + break; + } + case OE_Replace: { + /* If there are DELETE triggers on this table and the + ** recursive-triggers flag is set, call GenerateRowDelete() to + ** remove the conflicting row from the table. This will fire + ** the triggers and remove both the table and index b-tree entries. + ** + ** Otherwise, if there are no triggers or the recursive-triggers + ** flag is not set, but the table has one or more indexes, call + ** GenerateRowIndexDelete(). This removes the index b-tree entries + ** only. The table b-tree entry will be replaced by the new entry + ** when it is inserted. + ** + ** If either GenerateRowDelete() or GenerateRowIndexDelete() is called, + ** also invoke MultiWrite() to indicate that this VDBE may require + ** statement rollback (if the statement is aborted after the delete + ** takes place). Earlier versions called sqlite3MultiWrite() regardless, + ** but being more selective here allows statements like: + ** + ** REPLACE INTO t(rowid) VALUES($newrowid) + ** + ** to run without a statement journal if there are no indexes on the + ** table. + */ + if( regTrigCnt ){ + sqlite3MultiWrite(pParse); + sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur, + regNewData, 1, 0, OE_Replace, 1, -1); + sqlite3VdbeAddOp2(v, OP_AddImm, regTrigCnt, 1); /* incr trigger cnt */ + nReplaceTrig++; + }else{ +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + assert( HasRowid(pTab) ); + /* This OP_Delete opcode fires the pre-update-hook only. It does + ** not modify the b-tree. It is more efficient to let the coming + ** OP_Insert replace the existing entry than it is to delete the + ** existing entry and then insert a new one. */ + sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, OPFLAG_ISNOOP); + sqlite3VdbeAppendP4(v, pTab, P4_TABLE); +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + if( pTab->pIndex ){ + sqlite3MultiWrite(pParse); + sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur,0,-1); + } + } + seenReplace = 1; + break; + } +#ifndef SQLITE_OMIT_UPSERT + case OE_Update: { + sqlite3UpsertDoUpdate(pParse, pUpsert, pTab, 0, iDataCur); + /* no break */ deliberate_fall_through + } +#endif + case OE_Ignore: { + testcase( onError==OE_Ignore ); + sqlite3VdbeGoto(v, ignoreDest); + break; + } + } + sqlite3VdbeResolveLabel(v, addrRowidOk); + if( pUpsert && pUpsertClause!=pUpsert ){ + upsertIpkReturn = sqlite3VdbeAddOp0(v, OP_Goto); + }else if( ipkTop ){ + ipkBottom = sqlite3VdbeAddOp0(v, OP_Goto); + sqlite3VdbeJumpHere(v, ipkTop-1); + } + } + + /* Test all UNIQUE constraints by creating entries for each UNIQUE + ** index and making sure that duplicate entries do not already exist. + ** Compute the revised record entries for indices as we go. + ** + ** This loop also handles the case of the PRIMARY KEY index for a + ** WITHOUT ROWID table. + */ + for(pIdx = indexIteratorFirst(&sIdxIter, &ix); + pIdx; + pIdx = indexIteratorNext(&sIdxIter, &ix) + ){ + int regIdx; /* Range of registers hold conent for pIdx */ + int regR; /* Range of registers holding conflicting PK */ + int iThisCur; /* Cursor for this UNIQUE index */ + int addrUniqueOk; /* Jump here if the UNIQUE constraint is satisfied */ + int addrConflictCk; /* First opcode in the conflict check logic */ + + if( aRegIdx[ix]==0 ) continue; /* Skip indices that do not change */ + if( pUpsert ){ + pUpsertClause = sqlite3UpsertOfIndex(pUpsert, pIdx); + if( upsertIpkDelay && pUpsertClause==pUpsert ){ + sqlite3VdbeJumpHere(v, upsertIpkDelay); + } + } + addrUniqueOk = sqlite3VdbeMakeLabel(pParse); + if( bAffinityDone==0 ){ + sqlite3TableAffinity(v, pTab, regNewData+1); + bAffinityDone = 1; + } + VdbeNoopComment((v, "prep index %s", pIdx->zName)); + iThisCur = iIdxCur+ix; + + + /* Skip partial indices for which the WHERE clause is not true */ + if( pIdx->pPartIdxWhere ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, aRegIdx[ix]); + pParse->iSelfTab = -(regNewData+1); + sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, addrUniqueOk, + SQLITE_JUMPIFNULL); + pParse->iSelfTab = 0; + } + + /* Create a record for this index entry as it should appear after + ** the insert or update. Store that record in the aRegIdx[ix] register + */ + regIdx = aRegIdx[ix]+1; + for(i=0; inColumn; i++){ + int iField = pIdx->aiColumn[i]; + int x; + if( iField==XN_EXPR ){ + pParse->iSelfTab = -(regNewData+1); + sqlite3ExprCodeCopy(pParse, pIdx->aColExpr->a[i].pExpr, regIdx+i); + pParse->iSelfTab = 0; + VdbeComment((v, "%s column %d", pIdx->zName, i)); + }else if( iField==XN_ROWID || iField==pTab->iPKey ){ + x = regNewData; + sqlite3VdbeAddOp2(v, OP_IntCopy, x, regIdx+i); + VdbeComment((v, "rowid")); + }else{ + testcase( sqlite3TableColumnToStorage(pTab, iField)!=iField ); + x = sqlite3TableColumnToStorage(pTab, iField) + regNewData + 1; + sqlite3VdbeAddOp2(v, OP_SCopy, x, regIdx+i); + VdbeComment((v, "%s", pTab->aCol[iField].zCnName)); + } + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, regIdx, pIdx->nColumn, aRegIdx[ix]); + VdbeComment((v, "for %s", pIdx->zName)); +#ifdef SQLITE_ENABLE_NULL_TRIM + if( pIdx->idxType==SQLITE_IDXTYPE_PRIMARYKEY ){ + sqlite3SetMakeRecordP5(v, pIdx->pTable); + } +#endif + sqlite3VdbeReleaseRegisters(pParse, regIdx, pIdx->nColumn, 0, 0); + + /* In an UPDATE operation, if this index is the PRIMARY KEY index + ** of a WITHOUT ROWID table and there has been no change the + ** primary key, then no collision is possible. The collision detection + ** logic below can all be skipped. */ + if( isUpdate && pPk==pIdx && pkChng==0 ){ + sqlite3VdbeResolveLabel(v, addrUniqueOk); + continue; + } + + /* Find out what action to take in case there is a uniqueness conflict */ + onError = pIdx->onError; + if( onError==OE_None ){ + sqlite3VdbeResolveLabel(v, addrUniqueOk); + continue; /* pIdx is not a UNIQUE index */ + } + if( overrideError!=OE_Default ){ + onError = overrideError; + }else if( onError==OE_Default ){ + onError = OE_Abort; + } + + /* Figure out if the upsert clause applies to this index */ + if( pUpsertClause ){ + if( pUpsertClause->isDoUpdate==0 ){ + onError = OE_Ignore; /* DO NOTHING is the same as INSERT OR IGNORE */ + }else{ + onError = OE_Update; /* DO UPDATE */ + } + } + + /* Collision detection may be omitted if all of the following are true: + ** (1) The conflict resolution algorithm is REPLACE + ** (2) The table is a WITHOUT ROWID table + ** (3) There are no secondary indexes on the table + ** (4) No delete triggers need to be fired if there is a conflict + ** (5) No FK constraint counters need to be updated if a conflict occurs. + ** + ** This is not possible for ENABLE_PREUPDATE_HOOK builds, as the row + ** must be explicitly deleted in order to ensure any pre-update hook + ** is invoked. */ + assert( IsOrdinaryTable(pTab) ); +#ifndef SQLITE_ENABLE_PREUPDATE_HOOK + if( (ix==0 && pIdx->pNext==0) /* Condition 3 */ + && pPk==pIdx /* Condition 2 */ + && onError==OE_Replace /* Condition 1 */ + && ( 0==(db->flags&SQLITE_RecTriggers) || /* Condition 4 */ + 0==sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0)) + && ( 0==(db->flags&SQLITE_ForeignKeys) || /* Condition 5 */ + (0==pTab->u.tab.pFKey && 0==sqlite3FkReferences(pTab))) + ){ + sqlite3VdbeResolveLabel(v, addrUniqueOk); + continue; + } +#endif /* ifndef SQLITE_ENABLE_PREUPDATE_HOOK */ + + /* Check to see if the new index entry will be unique */ + sqlite3VdbeVerifyAbortable(v, onError); + addrConflictCk = + sqlite3VdbeAddOp4Int(v, OP_NoConflict, iThisCur, addrUniqueOk, + regIdx, pIdx->nKeyCol); VdbeCoverage(v); + + /* Generate code to handle collisions */ + regR = pIdx==pPk ? regIdx : sqlite3GetTempRange(pParse, nPkField); + if( isUpdate || onError==OE_Replace ){ + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp2(v, OP_IdxRowid, iThisCur, regR); + /* Conflict only if the rowid of the existing index entry + ** is different from old-rowid */ + if( isUpdate ){ + sqlite3VdbeAddOp3(v, OP_Eq, regR, addrUniqueOk, regOldData); + sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); + VdbeCoverage(v); + } + }else{ + int x; + /* Extract the PRIMARY KEY from the end of the index entry and + ** store it in registers regR..regR+nPk-1 */ + if( pIdx!=pPk ){ + for(i=0; inKeyCol; i++){ + assert( pPk->aiColumn[i]>=0 ); + x = sqlite3TableColumnToIndex(pIdx, pPk->aiColumn[i]); + sqlite3VdbeAddOp3(v, OP_Column, iThisCur, x, regR+i); + VdbeComment((v, "%s.%s", pTab->zName, + pTab->aCol[pPk->aiColumn[i]].zCnName)); + } + } + if( isUpdate ){ + /* If currently processing the PRIMARY KEY of a WITHOUT ROWID + ** table, only conflict if the new PRIMARY KEY values are actually + ** different from the old. + ** + ** For a UNIQUE index, only conflict if the PRIMARY KEY values + ** of the matched index row are different from the original PRIMARY + ** KEY values of this row before the update. */ + int addrJump = sqlite3VdbeCurrentAddr(v)+pPk->nKeyCol; + int op = OP_Ne; + int regCmp = (IsPrimaryKeyIndex(pIdx) ? regIdx : regR); + + for(i=0; inKeyCol; i++){ + char *p4 = (char*)sqlite3LocateCollSeq(pParse, pPk->azColl[i]); + x = pPk->aiColumn[i]; + assert( x>=0 ); + if( i==(pPk->nKeyCol-1) ){ + addrJump = addrUniqueOk; + op = OP_Eq; + } + x = sqlite3TableColumnToStorage(pTab, x); + sqlite3VdbeAddOp4(v, op, + regOldData+1+x, addrJump, regCmp+i, p4, P4_COLLSEQ + ); + sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); + VdbeCoverageIf(v, op==OP_Eq); + VdbeCoverageIf(v, op==OP_Ne); + } + } + } + } + + /* Generate code that executes if the new index entry is not unique */ + assert( onError==OE_Rollback || onError==OE_Abort || onError==OE_Fail + || onError==OE_Ignore || onError==OE_Replace || onError==OE_Update ); + switch( onError ){ + case OE_Rollback: + case OE_Abort: + case OE_Fail: { + testcase( onError==OE_Rollback ); + testcase( onError==OE_Abort ); + testcase( onError==OE_Fail ); + sqlite3UniqueConstraint(pParse, onError, pIdx); + break; + } +#ifndef SQLITE_OMIT_UPSERT + case OE_Update: { + sqlite3UpsertDoUpdate(pParse, pUpsert, pTab, pIdx, iIdxCur+ix); + /* no break */ deliberate_fall_through + } +#endif + case OE_Ignore: { + testcase( onError==OE_Ignore ); + sqlite3VdbeGoto(v, ignoreDest); + break; + } + default: { + int nConflictCk; /* Number of opcodes in conflict check logic */ + + assert( onError==OE_Replace ); + nConflictCk = sqlite3VdbeCurrentAddr(v) - addrConflictCk; + assert( nConflictCk>0 || db->mallocFailed ); + testcase( nConflictCk<=0 ); + testcase( nConflictCk>1 ); + if( regTrigCnt ){ + sqlite3MultiWrite(pParse); + nReplaceTrig++; + } + if( pTrigger && isUpdate ){ + sqlite3VdbeAddOp1(v, OP_CursorLock, iDataCur); + } + sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur, + regR, nPkField, 0, OE_Replace, + (pIdx==pPk ? ONEPASS_SINGLE : ONEPASS_OFF), iThisCur); + if( pTrigger && isUpdate ){ + sqlite3VdbeAddOp1(v, OP_CursorUnlock, iDataCur); + } + if( regTrigCnt ){ + int addrBypass; /* Jump destination to bypass recheck logic */ + + sqlite3VdbeAddOp2(v, OP_AddImm, regTrigCnt, 1); /* incr trigger cnt */ + addrBypass = sqlite3VdbeAddOp0(v, OP_Goto); /* Bypass recheck */ + VdbeComment((v, "bypass recheck")); + + /* Here we insert code that will be invoked after all constraint + ** checks have run, if and only if one or more replace triggers + ** fired. */ + sqlite3VdbeResolveLabel(v, lblRecheckOk); + lblRecheckOk = sqlite3VdbeMakeLabel(pParse); + if( pIdx->pPartIdxWhere ){ + /* Bypass the recheck if this partial index is not defined + ** for the current row */ + sqlite3VdbeAddOp2(v, OP_IsNull, regIdx-1, lblRecheckOk); + VdbeCoverage(v); + } + /* Copy the constraint check code from above, except change + ** the constraint-ok jump destination to be the address of + ** the next retest block */ + while( nConflictCk>0 ){ + VdbeOp x; /* Conflict check opcode to copy */ + /* The sqlite3VdbeAddOp4() call might reallocate the opcode array. + ** Hence, make a complete copy of the opcode, rather than using + ** a pointer to the opcode. */ + x = *sqlite3VdbeGetOp(v, addrConflictCk); + if( x.opcode!=OP_IdxRowid ){ + int p2; /* New P2 value for copied conflict check opcode */ + const char *zP4; + if( sqlite3OpcodeProperty[x.opcode]&OPFLG_JUMP ){ + p2 = lblRecheckOk; + }else{ + p2 = x.p2; + } + zP4 = x.p4type==P4_INT32 ? SQLITE_INT_TO_PTR(x.p4.i) : x.p4.z; + sqlite3VdbeAddOp4(v, x.opcode, x.p1, p2, x.p3, zP4, x.p4type); + sqlite3VdbeChangeP5(v, x.p5); + VdbeCoverageIf(v, p2!=x.p2); + } + nConflictCk--; + addrConflictCk++; + } + /* If the retest fails, issue an abort */ + sqlite3UniqueConstraint(pParse, OE_Abort, pIdx); + + sqlite3VdbeJumpHere(v, addrBypass); /* Terminate the recheck bypass */ + } + seenReplace = 1; + break; + } + } + sqlite3VdbeResolveLabel(v, addrUniqueOk); + if( regR!=regIdx ) sqlite3ReleaseTempRange(pParse, regR, nPkField); + if( pUpsertClause + && upsertIpkReturn + && sqlite3UpsertNextIsIPK(pUpsertClause) + ){ + sqlite3VdbeGoto(v, upsertIpkDelay+1); + sqlite3VdbeJumpHere(v, upsertIpkReturn); + upsertIpkReturn = 0; + } + } + + /* If the IPK constraint is a REPLACE, run it last */ + if( ipkTop ){ + sqlite3VdbeGoto(v, ipkTop); + VdbeComment((v, "Do IPK REPLACE")); + assert( ipkBottom>0 ); + sqlite3VdbeJumpHere(v, ipkBottom); + } + + /* Recheck all uniqueness constraints after replace triggers have run */ + testcase( regTrigCnt!=0 && nReplaceTrig==0 ); + assert( regTrigCnt!=0 || nReplaceTrig==0 ); + if( nReplaceTrig ){ + sqlite3VdbeAddOp2(v, OP_IfNot, regTrigCnt, lblRecheckOk);VdbeCoverage(v); + if( !pPk ){ + if( isUpdate ){ + sqlite3VdbeAddOp3(v, OP_Eq, regNewData, addrRecheck, regOldData); + sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); + VdbeCoverage(v); + } + sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRecheck, regNewData); + VdbeCoverage(v); + sqlite3RowidConstraint(pParse, OE_Abort, pTab); + }else{ + sqlite3VdbeGoto(v, addrRecheck); + } + sqlite3VdbeResolveLabel(v, lblRecheckOk); + } + + /* Generate the table record */ + if( HasRowid(pTab) ){ + int regRec = aRegIdx[ix]; + sqlite3VdbeAddOp3(v, OP_MakeRecord, regNewData+1, pTab->nNVCol, regRec); + sqlite3SetMakeRecordP5(v, pTab); + if( !bAffinityDone ){ + sqlite3TableAffinity(v, pTab, 0); + } + } + + *pbMayReplace = seenReplace; + VdbeModuleComment((v, "END: GenCnstCks(%d)", seenReplace)); +} + +#ifdef SQLITE_ENABLE_NULL_TRIM +/* +** Change the P5 operand on the last opcode (which should be an OP_MakeRecord) +** to be the number of columns in table pTab that must not be NULL-trimmed. +** +** Or if no columns of pTab may be NULL-trimmed, leave P5 at zero. +*/ +SQLITE_PRIVATE void sqlite3SetMakeRecordP5(Vdbe *v, Table *pTab){ + u16 i; + + /* Records with omitted columns are only allowed for schema format + ** version 2 and later (SQLite version 3.1.4, 2005-02-20). */ + if( pTab->pSchema->file_format<2 ) return; + + for(i=pTab->nCol-1; i>0; i--){ + if( pTab->aCol[i].iDflt!=0 ) break; + if( pTab->aCol[i].colFlags & COLFLAG_PRIMKEY ) break; + } + sqlite3VdbeChangeP5(v, i+1); +} +#endif + +/* +** Table pTab is a WITHOUT ROWID table that is being written to. The cursor +** number is iCur, and register regData contains the new record for the +** PK index. This function adds code to invoke the pre-update hook, +** if one is registered. +*/ +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +static void codeWithoutRowidPreupdate( + Parse *pParse, /* Parse context */ + Table *pTab, /* Table being updated */ + int iCur, /* Cursor number for table */ + int regData /* Data containing new record */ +){ + Vdbe *v = pParse->pVdbe; + int r = sqlite3GetTempReg(pParse); + assert( !HasRowid(pTab) ); + assert( 0==(pParse->db->mDbFlags & DBFLAG_Vacuum) || CORRUPT_DB ); + sqlite3VdbeAddOp2(v, OP_Integer, 0, r); + sqlite3VdbeAddOp4(v, OP_Insert, iCur, regData, r, (char*)pTab, P4_TABLE); + sqlite3VdbeChangeP5(v, OPFLAG_ISNOOP); + sqlite3ReleaseTempReg(pParse, r); +} +#else +# define codeWithoutRowidPreupdate(a,b,c,d) +#endif + +/* +** This routine generates code to finish the INSERT or UPDATE operation +** that was started by a prior call to sqlite3GenerateConstraintChecks. +** A consecutive range of registers starting at regNewData contains the +** rowid and the content to be inserted. +** +** The arguments to this routine should be the same as the first six +** arguments to sqlite3GenerateConstraintChecks. +*/ +SQLITE_PRIVATE void sqlite3CompleteInsertion( + Parse *pParse, /* The parser context */ + Table *pTab, /* the table into which we are inserting */ + int iDataCur, /* Cursor of the canonical data source */ + int iIdxCur, /* First index cursor */ + int regNewData, /* Range of content */ + int *aRegIdx, /* Register used by each index. 0 for unused indices */ + int update_flags, /* True for UPDATE, False for INSERT */ + int appendBias, /* True if this is likely to be an append */ + int useSeekResult /* True to set the USESEEKRESULT flag on OP_[Idx]Insert */ +){ + Vdbe *v; /* Prepared statements under construction */ + Index *pIdx; /* An index being inserted or updated */ + u8 pik_flags; /* flag values passed to the btree insert */ + int i; /* Loop counter */ + + assert( update_flags==0 + || update_flags==OPFLAG_ISUPDATE + || update_flags==(OPFLAG_ISUPDATE|OPFLAG_SAVEPOSITION) + ); + + v = pParse->pVdbe; + assert( v!=0 ); + assert( !IsView(pTab) ); /* This table is not a VIEW */ + for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){ + /* All REPLACE indexes are at the end of the list */ + assert( pIdx->onError!=OE_Replace + || pIdx->pNext==0 + || pIdx->pNext->onError==OE_Replace ); + if( aRegIdx[i]==0 ) continue; + if( pIdx->pPartIdxWhere ){ + sqlite3VdbeAddOp2(v, OP_IsNull, aRegIdx[i], sqlite3VdbeCurrentAddr(v)+2); + VdbeCoverage(v); + } + pik_flags = (useSeekResult ? OPFLAG_USESEEKRESULT : 0); + if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) ){ + pik_flags |= OPFLAG_NCHANGE; + pik_flags |= (update_flags & OPFLAG_SAVEPOSITION); + if( update_flags==0 ){ + codeWithoutRowidPreupdate(pParse, pTab, iIdxCur+i, aRegIdx[i]); + } + } + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iIdxCur+i, aRegIdx[i], + aRegIdx[i]+1, + pIdx->uniqNotNull ? pIdx->nKeyCol: pIdx->nColumn); + sqlite3VdbeChangeP5(v, pik_flags); + } + if( !HasRowid(pTab) ) return; + if( pParse->nested ){ + pik_flags = 0; + }else{ + pik_flags = OPFLAG_NCHANGE; + pik_flags |= (update_flags?update_flags:OPFLAG_LASTROWID); + } + if( appendBias ){ + pik_flags |= OPFLAG_APPEND; + } + if( useSeekResult ){ + pik_flags |= OPFLAG_USESEEKRESULT; + } + sqlite3VdbeAddOp3(v, OP_Insert, iDataCur, aRegIdx[i], regNewData); + if( !pParse->nested ){ + sqlite3VdbeAppendP4(v, pTab, P4_TABLE); + } + sqlite3VdbeChangeP5(v, pik_flags); +} + +/* +** Allocate cursors for the pTab table and all its indices and generate +** code to open and initialized those cursors. +** +** The cursor for the object that contains the complete data (normally +** the table itself, but the PRIMARY KEY index in the case of a WITHOUT +** ROWID table) is returned in *piDataCur. The first index cursor is +** returned in *piIdxCur. The number of indices is returned. +** +** Use iBase as the first cursor (either the *piDataCur for rowid tables +** or the first index for WITHOUT ROWID tables) if it is non-negative. +** If iBase is negative, then allocate the next available cursor. +** +** For a rowid table, *piDataCur will be exactly one less than *piIdxCur. +** For a WITHOUT ROWID table, *piDataCur will be somewhere in the range +** of *piIdxCurs, depending on where the PRIMARY KEY index appears on the +** pTab->pIndex list. +** +** If pTab is a virtual table, then this routine is a no-op and the +** *piDataCur and *piIdxCur values are left uninitialized. +*/ +SQLITE_PRIVATE int sqlite3OpenTableAndIndices( + Parse *pParse, /* Parsing context */ + Table *pTab, /* Table to be opened */ + int op, /* OP_OpenRead or OP_OpenWrite */ + u8 p5, /* P5 value for OP_Open* opcodes (except on WITHOUT ROWID) */ + int iBase, /* Use this for the table cursor, if there is one */ + u8 *aToOpen, /* If not NULL: boolean for each table and index */ + int *piDataCur, /* Write the database source cursor number here */ + int *piIdxCur /* Write the first index cursor number here */ +){ + int i; + int iDb; + int iDataCur; + Index *pIdx; + Vdbe *v; + + assert( op==OP_OpenRead || op==OP_OpenWrite ); + assert( op==OP_OpenWrite || p5==0 ); + if( IsVirtual(pTab) ){ + /* This routine is a no-op for virtual tables. Leave the output + ** variables *piDataCur and *piIdxCur set to illegal cursor numbers + ** for improved error detection. */ + *piDataCur = *piIdxCur = -999; + return 0; + } + iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + v = pParse->pVdbe; + assert( v!=0 ); + if( iBase<0 ) iBase = pParse->nTab; + iDataCur = iBase++; + if( piDataCur ) *piDataCur = iDataCur; + if( HasRowid(pTab) && (aToOpen==0 || aToOpen[0]) ){ + sqlite3OpenTable(pParse, iDataCur, iDb, pTab, op); + }else{ + sqlite3TableLock(pParse, iDb, pTab->tnum, op==OP_OpenWrite, pTab->zName); + } + if( piIdxCur ) *piIdxCur = iBase; + for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){ + int iIdxCur = iBase++; + assert( pIdx->pSchema==pTab->pSchema ); + if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) ){ + if( piDataCur ) *piDataCur = iIdxCur; + p5 = 0; + } + if( aToOpen==0 || aToOpen[i+1] ){ + sqlite3VdbeAddOp3(v, op, iIdxCur, pIdx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + sqlite3VdbeChangeP5(v, p5); + VdbeComment((v, "%s", pIdx->zName)); + } + } + if( iBase>pParse->nTab ) pParse->nTab = iBase; + return i; +} + + +#ifdef SQLITE_TEST +/* +** The following global variable is incremented whenever the +** transfer optimization is used. This is used for testing +** purposes only - to make sure the transfer optimization really +** is happening when it is supposed to. +*/ +SQLITE_API int sqlite3_xferopt_count; +#endif /* SQLITE_TEST */ + + +#ifndef SQLITE_OMIT_XFER_OPT +/* +** Check to see if index pSrc is compatible as a source of data +** for index pDest in an insert transfer optimization. The rules +** for a compatible index: +** +** * The index is over the same set of columns +** * The same DESC and ASC markings occurs on all columns +** * The same onError processing (OE_Abort, OE_Ignore, etc) +** * The same collating sequence on each column +** * The index has the exact same WHERE clause +*/ +static int xferCompatibleIndex(Index *pDest, Index *pSrc){ + int i; + assert( pDest && pSrc ); + assert( pDest->pTable!=pSrc->pTable ); + if( pDest->nKeyCol!=pSrc->nKeyCol || pDest->nColumn!=pSrc->nColumn ){ + return 0; /* Different number of columns */ + } + if( pDest->onError!=pSrc->onError ){ + return 0; /* Different conflict resolution strategies */ + } + for(i=0; inKeyCol; i++){ + if( pSrc->aiColumn[i]!=pDest->aiColumn[i] ){ + return 0; /* Different columns indexed */ + } + if( pSrc->aiColumn[i]==XN_EXPR ){ + assert( pSrc->aColExpr!=0 && pDest->aColExpr!=0 ); + if( sqlite3ExprCompare(0, pSrc->aColExpr->a[i].pExpr, + pDest->aColExpr->a[i].pExpr, -1)!=0 ){ + return 0; /* Different expressions in the index */ + } + } + if( pSrc->aSortOrder[i]!=pDest->aSortOrder[i] ){ + return 0; /* Different sort orders */ + } + if( sqlite3_stricmp(pSrc->azColl[i],pDest->azColl[i])!=0 ){ + return 0; /* Different collating sequences */ + } + } + if( sqlite3ExprCompare(0, pSrc->pPartIdxWhere, pDest->pPartIdxWhere, -1) ){ + return 0; /* Different WHERE clauses */ + } + + /* If no test above fails then the indices must be compatible */ + return 1; +} + +/* +** Attempt the transfer optimization on INSERTs of the form +** +** INSERT INTO tab1 SELECT * FROM tab2; +** +** The xfer optimization transfers raw records from tab2 over to tab1. +** Columns are not decoded and reassembled, which greatly improves +** performance. Raw index records are transferred in the same way. +** +** The xfer optimization is only attempted if tab1 and tab2 are compatible. +** There are lots of rules for determining compatibility - see comments +** embedded in the code for details. +** +** This routine returns TRUE if the optimization is guaranteed to be used. +** Sometimes the xfer optimization will only work if the destination table +** is empty - a factor that can only be determined at run-time. In that +** case, this routine generates code for the xfer optimization but also +** does a test to see if the destination table is empty and jumps over the +** xfer optimization code if the test fails. In that case, this routine +** returns FALSE so that the caller will know to go ahead and generate +** an unoptimized transfer. This routine also returns FALSE if there +** is no chance that the xfer optimization can be applied. +** +** This optimization is particularly useful at making VACUUM run faster. +*/ +static int xferOptimization( + Parse *pParse, /* Parser context */ + Table *pDest, /* The table we are inserting into */ + Select *pSelect, /* A SELECT statement to use as the data source */ + int onError, /* How to handle constraint errors */ + int iDbDest /* The database of pDest */ +){ + sqlite3 *db = pParse->db; + ExprList *pEList; /* The result set of the SELECT */ + Table *pSrc; /* The table in the FROM clause of SELECT */ + Index *pSrcIdx, *pDestIdx; /* Source and destination indices */ + SrcItem *pItem; /* An element of pSelect->pSrc */ + int i; /* Loop counter */ + int iDbSrc; /* The database of pSrc */ + int iSrc, iDest; /* Cursors from source and destination */ + int addr1, addr2; /* Loop addresses */ + int emptyDestTest = 0; /* Address of test for empty pDest */ + int emptySrcTest = 0; /* Address of test for empty pSrc */ + Vdbe *v; /* The VDBE we are building */ + int regAutoinc; /* Memory register used by AUTOINC */ + int destHasUniqueIdx = 0; /* True if pDest has a UNIQUE index */ + int regData, regRowid; /* Registers holding data and rowid */ + + assert( pSelect!=0 ); + if( pParse->pWith || pSelect->pWith ){ + /* Do not attempt to process this query if there are an WITH clauses + ** attached to it. Proceeding may generate a false "no such table: xxx" + ** error if pSelect reads from a CTE named "xxx". */ + return 0; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pDest) ){ + return 0; /* tab1 must not be a virtual table */ + } +#endif + if( onError==OE_Default ){ + if( pDest->iPKey>=0 ) onError = pDest->keyConf; + if( onError==OE_Default ) onError = OE_Abort; + } + assert(pSelect->pSrc); /* allocated even if there is no FROM clause */ + if( pSelect->pSrc->nSrc!=1 ){ + return 0; /* FROM clause must have exactly one term */ + } + if( pSelect->pSrc->a[0].pSelect ){ + return 0; /* FROM clause cannot contain a subquery */ + } + if( pSelect->pWhere ){ + return 0; /* SELECT may not have a WHERE clause */ + } + if( pSelect->pOrderBy ){ + return 0; /* SELECT may not have an ORDER BY clause */ + } + /* Do not need to test for a HAVING clause. If HAVING is present but + ** there is no ORDER BY, we will get an error. */ + if( pSelect->pGroupBy ){ + return 0; /* SELECT may not have a GROUP BY clause */ + } + if( pSelect->pLimit ){ + return 0; /* SELECT may not have a LIMIT clause */ + } + if( pSelect->pPrior ){ + return 0; /* SELECT may not be a compound query */ + } + if( pSelect->selFlags & SF_Distinct ){ + return 0; /* SELECT may not be DISTINCT */ + } + pEList = pSelect->pEList; + assert( pEList!=0 ); + if( pEList->nExpr!=1 ){ + return 0; /* The result set must have exactly one column */ + } + assert( pEList->a[0].pExpr ); + if( pEList->a[0].pExpr->op!=TK_ASTERISK ){ + return 0; /* The result set must be the special operator "*" */ + } + + /* At this point we have established that the statement is of the + ** correct syntactic form to participate in this optimization. Now + ** we have to check the semantics. + */ + pItem = pSelect->pSrc->a; + pSrc = sqlite3LocateTableItem(pParse, 0, pItem); + if( pSrc==0 ){ + return 0; /* FROM clause does not contain a real table */ + } + if( pSrc->tnum==pDest->tnum && pSrc->pSchema==pDest->pSchema ){ + testcase( pSrc!=pDest ); /* Possible due to bad sqlite_schema.rootpage */ + return 0; /* tab1 and tab2 may not be the same table */ + } + if( HasRowid(pDest)!=HasRowid(pSrc) ){ + return 0; /* source and destination must both be WITHOUT ROWID or not */ + } + if( !IsOrdinaryTable(pSrc) ){ + return 0; /* tab2 may not be a view or virtual table */ + } + if( pDest->nCol!=pSrc->nCol ){ + return 0; /* Number of columns must be the same in tab1 and tab2 */ + } + if( pDest->iPKey!=pSrc->iPKey ){ + return 0; /* Both tables must have the same INTEGER PRIMARY KEY */ + } + if( (pDest->tabFlags & TF_Strict)!=0 && (pSrc->tabFlags & TF_Strict)==0 ){ + return 0; /* Cannot feed from a non-strict into a strict table */ + } + for(i=0; inCol; i++){ + Column *pDestCol = &pDest->aCol[i]; + Column *pSrcCol = &pSrc->aCol[i]; +#ifdef SQLITE_ENABLE_HIDDEN_COLUMNS + if( (db->mDbFlags & DBFLAG_Vacuum)==0 + && (pDestCol->colFlags | pSrcCol->colFlags) & COLFLAG_HIDDEN + ){ + return 0; /* Neither table may have __hidden__ columns */ + } +#endif +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + /* Even if tables t1 and t2 have identical schemas, if they contain + ** generated columns, then this statement is semantically incorrect: + ** + ** INSERT INTO t2 SELECT * FROM t1; + ** + ** The reason is that generated column values are returned by the + ** the SELECT statement on the right but the INSERT statement on the + ** left wants them to be omitted. + ** + ** Nevertheless, this is a useful notational shorthand to tell SQLite + ** to do a bulk transfer all of the content from t1 over to t2. + ** + ** We could, in theory, disable this (except for internal use by the + ** VACUUM command where it is actually needed). But why do that? It + ** seems harmless enough, and provides a useful service. + */ + if( (pDestCol->colFlags & COLFLAG_GENERATED) != + (pSrcCol->colFlags & COLFLAG_GENERATED) ){ + return 0; /* Both columns have the same generated-column type */ + } + /* But the transfer is only allowed if both the source and destination + ** tables have the exact same expressions for generated columns. + ** This requirement could be relaxed for VIRTUAL columns, I suppose. + */ + if( (pDestCol->colFlags & COLFLAG_GENERATED)!=0 ){ + if( sqlite3ExprCompare(0, + sqlite3ColumnExpr(pSrc, pSrcCol), + sqlite3ColumnExpr(pDest, pDestCol), -1)!=0 ){ + testcase( pDestCol->colFlags & COLFLAG_VIRTUAL ); + testcase( pDestCol->colFlags & COLFLAG_STORED ); + return 0; /* Different generator expressions */ + } + } +#endif + if( pDestCol->affinity!=pSrcCol->affinity ){ + return 0; /* Affinity must be the same on all columns */ + } + if( sqlite3_stricmp(sqlite3ColumnColl(pDestCol), + sqlite3ColumnColl(pSrcCol))!=0 ){ + return 0; /* Collating sequence must be the same on all columns */ + } + if( pDestCol->notNull && !pSrcCol->notNull ){ + return 0; /* tab2 must be NOT NULL if tab1 is */ + } + /* Default values for second and subsequent columns need to match. */ + if( (pDestCol->colFlags & COLFLAG_GENERATED)==0 && i>0 ){ + Expr *pDestExpr = sqlite3ColumnExpr(pDest, pDestCol); + Expr *pSrcExpr = sqlite3ColumnExpr(pSrc, pSrcCol); + assert( pDestExpr==0 || pDestExpr->op==TK_SPAN ); + assert( pDestExpr==0 || !ExprHasProperty(pDestExpr, EP_IntValue) ); + assert( pSrcExpr==0 || pSrcExpr->op==TK_SPAN ); + assert( pSrcExpr==0 || !ExprHasProperty(pSrcExpr, EP_IntValue) ); + if( (pDestExpr==0)!=(pSrcExpr==0) + || (pDestExpr!=0 && strcmp(pDestExpr->u.zToken, + pSrcExpr->u.zToken)!=0) + ){ + return 0; /* Default values must be the same for all columns */ + } + } + } + for(pDestIdx=pDest->pIndex; pDestIdx; pDestIdx=pDestIdx->pNext){ + if( IsUniqueIndex(pDestIdx) ){ + destHasUniqueIdx = 1; + } + for(pSrcIdx=pSrc->pIndex; pSrcIdx; pSrcIdx=pSrcIdx->pNext){ + if( xferCompatibleIndex(pDestIdx, pSrcIdx) ) break; + } + if( pSrcIdx==0 ){ + return 0; /* pDestIdx has no corresponding index in pSrc */ + } + if( pSrcIdx->tnum==pDestIdx->tnum && pSrc->pSchema==pDest->pSchema + && sqlite3FaultSim(411)==SQLITE_OK ){ + /* The sqlite3FaultSim() call allows this corruption test to be + ** bypassed during testing, in order to exercise other corruption tests + ** further downstream. */ + return 0; /* Corrupt schema - two indexes on the same btree */ + } + } +#ifndef SQLITE_OMIT_CHECK + if( pDest->pCheck && sqlite3ExprListCompare(pSrc->pCheck,pDest->pCheck,-1) ){ + return 0; /* Tables have different CHECK constraints. Ticket #2252 */ + } +#endif +#ifndef SQLITE_OMIT_FOREIGN_KEY + /* Disallow the transfer optimization if the destination table constains + ** any foreign key constraints. This is more restrictive than necessary. + ** But the main beneficiary of the transfer optimization is the VACUUM + ** command, and the VACUUM command disables foreign key constraints. So + ** the extra complication to make this rule less restrictive is probably + ** not worth the effort. Ticket [6284df89debdfa61db8073e062908af0c9b6118e] + */ + assert( IsOrdinaryTable(pDest) ); + if( (db->flags & SQLITE_ForeignKeys)!=0 && pDest->u.tab.pFKey!=0 ){ + return 0; + } +#endif + if( (db->flags & SQLITE_CountRows)!=0 ){ + return 0; /* xfer opt does not play well with PRAGMA count_changes */ + } + + /* If we get this far, it means that the xfer optimization is at + ** least a possibility, though it might only work if the destination + ** table (tab1) is initially empty. + */ +#ifdef SQLITE_TEST + sqlite3_xferopt_count++; +#endif + iDbSrc = sqlite3SchemaToIndex(db, pSrc->pSchema); + v = sqlite3GetVdbe(pParse); + sqlite3CodeVerifySchema(pParse, iDbSrc); + iSrc = pParse->nTab++; + iDest = pParse->nTab++; + regAutoinc = autoIncBegin(pParse, iDbDest, pDest); + regData = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp2(v, OP_Null, 0, regData); + regRowid = sqlite3GetTempReg(pParse); + sqlite3OpenTable(pParse, iDest, iDbDest, pDest, OP_OpenWrite); + assert( HasRowid(pDest) || destHasUniqueIdx ); + if( (db->mDbFlags & DBFLAG_Vacuum)==0 && ( + (pDest->iPKey<0 && pDest->pIndex!=0) /* (1) */ + || destHasUniqueIdx /* (2) */ + || (onError!=OE_Abort && onError!=OE_Rollback) /* (3) */ + )){ + /* In some circumstances, we are able to run the xfer optimization + ** only if the destination table is initially empty. Unless the + ** DBFLAG_Vacuum flag is set, this block generates code to make + ** that determination. If DBFLAG_Vacuum is set, then the destination + ** table is always empty. + ** + ** Conditions under which the destination must be empty: + ** + ** (1) There is no INTEGER PRIMARY KEY but there are indices. + ** (If the destination is not initially empty, the rowid fields + ** of index entries might need to change.) + ** + ** (2) The destination has a unique index. (The xfer optimization + ** is unable to test uniqueness.) + ** + ** (3) onError is something other than OE_Abort and OE_Rollback. + */ + addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iDest, 0); VdbeCoverage(v); + emptyDestTest = sqlite3VdbeAddOp0(v, OP_Goto); + sqlite3VdbeJumpHere(v, addr1); + } + if( HasRowid(pSrc) ){ + u8 insFlags; + sqlite3OpenTable(pParse, iSrc, iDbSrc, pSrc, OP_OpenRead); + emptySrcTest = sqlite3VdbeAddOp2(v, OP_Rewind, iSrc, 0); VdbeCoverage(v); + if( pDest->iPKey>=0 ){ + addr1 = sqlite3VdbeAddOp2(v, OP_Rowid, iSrc, regRowid); + if( (db->mDbFlags & DBFLAG_Vacuum)==0 ){ + sqlite3VdbeVerifyAbortable(v, onError); + addr2 = sqlite3VdbeAddOp3(v, OP_NotExists, iDest, 0, regRowid); + VdbeCoverage(v); + sqlite3RowidConstraint(pParse, onError, pDest); + sqlite3VdbeJumpHere(v, addr2); + } + autoIncStep(pParse, regAutoinc, regRowid); + }else if( pDest->pIndex==0 && !(db->mDbFlags & DBFLAG_VacuumInto) ){ + addr1 = sqlite3VdbeAddOp2(v, OP_NewRowid, iDest, regRowid); + }else{ + addr1 = sqlite3VdbeAddOp2(v, OP_Rowid, iSrc, regRowid); + assert( (pDest->tabFlags & TF_Autoincrement)==0 ); + } + + if( db->mDbFlags & DBFLAG_Vacuum ){ + sqlite3VdbeAddOp1(v, OP_SeekEnd, iDest); + insFlags = OPFLAG_APPEND|OPFLAG_USESEEKRESULT|OPFLAG_PREFORMAT; + }else{ + insFlags = OPFLAG_NCHANGE|OPFLAG_LASTROWID|OPFLAG_APPEND|OPFLAG_PREFORMAT; + } +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + if( (db->mDbFlags & DBFLAG_Vacuum)==0 ){ + sqlite3VdbeAddOp3(v, OP_RowData, iSrc, regData, 1); + insFlags &= ~OPFLAG_PREFORMAT; + }else +#endif + { + sqlite3VdbeAddOp3(v, OP_RowCell, iDest, iSrc, regRowid); + } + sqlite3VdbeAddOp3(v, OP_Insert, iDest, regData, regRowid); + if( (db->mDbFlags & DBFLAG_Vacuum)==0 ){ + sqlite3VdbeChangeP4(v, -1, (char*)pDest, P4_TABLE); + } + sqlite3VdbeChangeP5(v, insFlags); + + sqlite3VdbeAddOp2(v, OP_Next, iSrc, addr1); VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_Close, iSrc, 0); + sqlite3VdbeAddOp2(v, OP_Close, iDest, 0); + }else{ + sqlite3TableLock(pParse, iDbDest, pDest->tnum, 1, pDest->zName); + sqlite3TableLock(pParse, iDbSrc, pSrc->tnum, 0, pSrc->zName); + } + for(pDestIdx=pDest->pIndex; pDestIdx; pDestIdx=pDestIdx->pNext){ + u8 idxInsFlags = 0; + for(pSrcIdx=pSrc->pIndex; ALWAYS(pSrcIdx); pSrcIdx=pSrcIdx->pNext){ + if( xferCompatibleIndex(pDestIdx, pSrcIdx) ) break; + } + assert( pSrcIdx ); + sqlite3VdbeAddOp3(v, OP_OpenRead, iSrc, pSrcIdx->tnum, iDbSrc); + sqlite3VdbeSetP4KeyInfo(pParse, pSrcIdx); + VdbeComment((v, "%s", pSrcIdx->zName)); + sqlite3VdbeAddOp3(v, OP_OpenWrite, iDest, pDestIdx->tnum, iDbDest); + sqlite3VdbeSetP4KeyInfo(pParse, pDestIdx); + sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR); + VdbeComment((v, "%s", pDestIdx->zName)); + addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iSrc, 0); VdbeCoverage(v); + if( db->mDbFlags & DBFLAG_Vacuum ){ + /* This INSERT command is part of a VACUUM operation, which guarantees + ** that the destination table is empty. If all indexed columns use + ** collation sequence BINARY, then it can also be assumed that the + ** index will be populated by inserting keys in strictly sorted + ** order. In this case, instead of seeking within the b-tree as part + ** of every OP_IdxInsert opcode, an OP_SeekEnd is added before the + ** OP_IdxInsert to seek to the point within the b-tree where each key + ** should be inserted. This is faster. + ** + ** If any of the indexed columns use a collation sequence other than + ** BINARY, this optimization is disabled. This is because the user + ** might change the definition of a collation sequence and then run + ** a VACUUM command. In that case keys may not be written in strictly + ** sorted order. */ + for(i=0; inColumn; i++){ + const char *zColl = pSrcIdx->azColl[i]; + if( sqlite3_stricmp(sqlite3StrBINARY, zColl) ) break; + } + if( i==pSrcIdx->nColumn ){ + idxInsFlags = OPFLAG_USESEEKRESULT|OPFLAG_PREFORMAT; + sqlite3VdbeAddOp1(v, OP_SeekEnd, iDest); + sqlite3VdbeAddOp2(v, OP_RowCell, iDest, iSrc); + } + }else if( !HasRowid(pSrc) && pDestIdx->idxType==SQLITE_IDXTYPE_PRIMARYKEY ){ + idxInsFlags |= OPFLAG_NCHANGE; + } + if( idxInsFlags!=(OPFLAG_USESEEKRESULT|OPFLAG_PREFORMAT) ){ + sqlite3VdbeAddOp3(v, OP_RowData, iSrc, regData, 1); + if( (db->mDbFlags & DBFLAG_Vacuum)==0 + && !HasRowid(pDest) + && IsPrimaryKeyIndex(pDestIdx) + ){ + codeWithoutRowidPreupdate(pParse, pDest, iDest, regData); + } + } + sqlite3VdbeAddOp2(v, OP_IdxInsert, iDest, regData); + sqlite3VdbeChangeP5(v, idxInsFlags|OPFLAG_APPEND); + sqlite3VdbeAddOp2(v, OP_Next, iSrc, addr1+1); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addr1); + sqlite3VdbeAddOp2(v, OP_Close, iSrc, 0); + sqlite3VdbeAddOp2(v, OP_Close, iDest, 0); + } + if( emptySrcTest ) sqlite3VdbeJumpHere(v, emptySrcTest); + sqlite3ReleaseTempReg(pParse, regRowid); + sqlite3ReleaseTempReg(pParse, regData); + if( emptyDestTest ){ + sqlite3AutoincrementEnd(pParse); + sqlite3VdbeAddOp2(v, OP_Halt, SQLITE_OK, 0); + sqlite3VdbeJumpHere(v, emptyDestTest); + sqlite3VdbeAddOp2(v, OP_Close, iDest, 0); + return 0; + }else{ + return 1; + } +} +#endif /* SQLITE_OMIT_XFER_OPT */ + +/************** End of insert.c **********************************************/ +/************** Begin file legacy.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Main file for the SQLite library. The routines in this file +** implement the programmer interface to the library. Routines in +** other files are for internal use by SQLite and should not be +** accessed by users of the library. +*/ + +/* #include "sqliteInt.h" */ + +/* +** Execute SQL code. Return one of the SQLITE_ success/failure +** codes. Also write an error message into memory obtained from +** malloc() and make *pzErrMsg point to that message. +** +** If the SQL is a query, then for each row in the query result +** the xCallback() function is called. pArg becomes the first +** argument to xCallback(). If xCallback=NULL then no callback +** is invoked, even for queries. +*/ +SQLITE_API int sqlite3_exec( + sqlite3 *db, /* The database on which the SQL executes */ + const char *zSql, /* The SQL to be executed */ + sqlite3_callback xCallback, /* Invoke this callback routine */ + void *pArg, /* First argument to xCallback() */ + char **pzErrMsg /* Write error messages here */ +){ + int rc = SQLITE_OK; /* Return code */ + const char *zLeftover; /* Tail of unprocessed SQL */ + sqlite3_stmt *pStmt = 0; /* The current SQL statement */ + char **azCols = 0; /* Names of result columns */ + int callbackIsInit; /* True if callback data is initialized */ + + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; + if( zSql==0 ) zSql = ""; + + sqlite3_mutex_enter(db->mutex); + sqlite3Error(db, SQLITE_OK); + while( rc==SQLITE_OK && zSql[0] ){ + int nCol = 0; + char **azVals = 0; + + pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zLeftover); + assert( rc==SQLITE_OK || pStmt==0 ); + if( rc!=SQLITE_OK ){ + continue; + } + if( !pStmt ){ + /* this happens for a comment or white-space */ + zSql = zLeftover; + continue; + } + callbackIsInit = 0; + + while( 1 ){ + int i; + rc = sqlite3_step(pStmt); + + /* Invoke the callback function if required */ + if( xCallback && (SQLITE_ROW==rc || + (SQLITE_DONE==rc && !callbackIsInit + && db->flags&SQLITE_NullCallback)) ){ + if( !callbackIsInit ){ + nCol = sqlite3_column_count(pStmt); + azCols = sqlite3DbMallocRaw(db, (2*nCol+1)*sizeof(const char*)); + if( azCols==0 ){ + goto exec_out; + } + for(i=0; ierrMask)==rc ); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/************** End of legacy.c **********************************************/ +/************** Begin file loadext.c *****************************************/ +/* +** 2006 June 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used to dynamically load extensions into +** the SQLite library. +*/ + +#ifndef SQLITE_CORE + #define SQLITE_CORE 1 /* Disable the API redefinition in sqlite3ext.h */ +#endif +/************** Include sqlite3ext.h in the middle of loadext.c **************/ +/************** Begin file sqlite3ext.h **************************************/ +/* +** 2006 June 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the SQLite interface for use by +** shared libraries that want to be imported as extensions into +** an SQLite instance. Shared libraries that intend to be loaded +** as extensions by SQLite should #include this file instead of +** sqlite3.h. +*/ +#ifndef SQLITE3EXT_H +#define SQLITE3EXT_H +/* #include "sqlite3.h" */ + +/* +** The following structure holds pointers to all of the SQLite API +** routines. +** +** WARNING: In order to maintain backwards compatibility, add new +** interfaces to the end of this structure only. If you insert new +** interfaces in the middle of this structure, then older different +** versions of SQLite will not be able to load each other's shared +** libraries! +*/ +struct sqlite3_api_routines { + void * (*aggregate_context)(sqlite3_context*,int nBytes); + int (*aggregate_count)(sqlite3_context*); + int (*bind_blob)(sqlite3_stmt*,int,const void*,int n,void(*)(void*)); + int (*bind_double)(sqlite3_stmt*,int,double); + int (*bind_int)(sqlite3_stmt*,int,int); + int (*bind_int64)(sqlite3_stmt*,int,sqlite_int64); + int (*bind_null)(sqlite3_stmt*,int); + int (*bind_parameter_count)(sqlite3_stmt*); + int (*bind_parameter_index)(sqlite3_stmt*,const char*zName); + const char * (*bind_parameter_name)(sqlite3_stmt*,int); + int (*bind_text)(sqlite3_stmt*,int,const char*,int n,void(*)(void*)); + int (*bind_text16)(sqlite3_stmt*,int,const void*,int,void(*)(void*)); + int (*bind_value)(sqlite3_stmt*,int,const sqlite3_value*); + int (*busy_handler)(sqlite3*,int(*)(void*,int),void*); + int (*busy_timeout)(sqlite3*,int ms); + int (*changes)(sqlite3*); + int (*close)(sqlite3*); + int (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*, + int eTextRep,const char*)); + int (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*, + int eTextRep,const void*)); + const void * (*column_blob)(sqlite3_stmt*,int iCol); + int (*column_bytes)(sqlite3_stmt*,int iCol); + int (*column_bytes16)(sqlite3_stmt*,int iCol); + int (*column_count)(sqlite3_stmt*pStmt); + const char * (*column_database_name)(sqlite3_stmt*,int); + const void * (*column_database_name16)(sqlite3_stmt*,int); + const char * (*column_decltype)(sqlite3_stmt*,int i); + const void * (*column_decltype16)(sqlite3_stmt*,int); + double (*column_double)(sqlite3_stmt*,int iCol); + int (*column_int)(sqlite3_stmt*,int iCol); + sqlite_int64 (*column_int64)(sqlite3_stmt*,int iCol); + const char * (*column_name)(sqlite3_stmt*,int); + const void * (*column_name16)(sqlite3_stmt*,int); + const char * (*column_origin_name)(sqlite3_stmt*,int); + const void * (*column_origin_name16)(sqlite3_stmt*,int); + const char * (*column_table_name)(sqlite3_stmt*,int); + const void * (*column_table_name16)(sqlite3_stmt*,int); + const unsigned char * (*column_text)(sqlite3_stmt*,int iCol); + const void * (*column_text16)(sqlite3_stmt*,int iCol); + int (*column_type)(sqlite3_stmt*,int iCol); + sqlite3_value* (*column_value)(sqlite3_stmt*,int iCol); + void * (*commit_hook)(sqlite3*,int(*)(void*),void*); + int (*complete)(const char*sql); + int (*complete16)(const void*sql); + int (*create_collation)(sqlite3*,const char*,int,void*, + int(*)(void*,int,const void*,int,const void*)); + int (*create_collation16)(sqlite3*,const void*,int,void*, + int(*)(void*,int,const void*,int,const void*)); + int (*create_function)(sqlite3*,const char*,int,int,void*, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*)); + int (*create_function16)(sqlite3*,const void*,int,int,void*, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*)); + int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); + int (*data_count)(sqlite3_stmt*pStmt); + sqlite3 * (*db_handle)(sqlite3_stmt*); + int (*declare_vtab)(sqlite3*,const char*); + int (*enable_shared_cache)(int); + int (*errcode)(sqlite3*db); + const char * (*errmsg)(sqlite3*); + const void * (*errmsg16)(sqlite3*); + int (*exec)(sqlite3*,const char*,sqlite3_callback,void*,char**); + int (*expired)(sqlite3_stmt*); + int (*finalize)(sqlite3_stmt*pStmt); + void (*free)(void*); + void (*free_table)(char**result); + int (*get_autocommit)(sqlite3*); + void * (*get_auxdata)(sqlite3_context*,int); + int (*get_table)(sqlite3*,const char*,char***,int*,int*,char**); + int (*global_recover)(void); + void (*interruptx)(sqlite3*); + sqlite_int64 (*last_insert_rowid)(sqlite3*); + const char * (*libversion)(void); + int (*libversion_number)(void); + void *(*malloc)(int); + char * (*mprintf)(const char*,...); + int (*open)(const char*,sqlite3**); + int (*open16)(const void*,sqlite3**); + int (*prepare)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); + int (*prepare16)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); + void * (*profile)(sqlite3*,void(*)(void*,const char*,sqlite_uint64),void*); + void (*progress_handler)(sqlite3*,int,int(*)(void*),void*); + void *(*realloc)(void*,int); + int (*reset)(sqlite3_stmt*pStmt); + void (*result_blob)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_double)(sqlite3_context*,double); + void (*result_error)(sqlite3_context*,const char*,int); + void (*result_error16)(sqlite3_context*,const void*,int); + void (*result_int)(sqlite3_context*,int); + void (*result_int64)(sqlite3_context*,sqlite_int64); + void (*result_null)(sqlite3_context*); + void (*result_text)(sqlite3_context*,const char*,int,void(*)(void*)); + void (*result_text16)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_text16be)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_value)(sqlite3_context*,sqlite3_value*); + void * (*rollback_hook)(sqlite3*,void(*)(void*),void*); + int (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*, + const char*,const char*),void*); + void (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*)); + char * (*xsnprintf)(int,char*,const char*,...); + int (*step)(sqlite3_stmt*); + int (*table_column_metadata)(sqlite3*,const char*,const char*,const char*, + char const**,char const**,int*,int*,int*); + void (*thread_cleanup)(void); + int (*total_changes)(sqlite3*); + void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*); + int (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*); + void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*, + sqlite_int64),void*); + void * (*user_data)(sqlite3_context*); + const void * (*value_blob)(sqlite3_value*); + int (*value_bytes)(sqlite3_value*); + int (*value_bytes16)(sqlite3_value*); + double (*value_double)(sqlite3_value*); + int (*value_int)(sqlite3_value*); + sqlite_int64 (*value_int64)(sqlite3_value*); + int (*value_numeric_type)(sqlite3_value*); + const unsigned char * (*value_text)(sqlite3_value*); + const void * (*value_text16)(sqlite3_value*); + const void * (*value_text16be)(sqlite3_value*); + const void * (*value_text16le)(sqlite3_value*); + int (*value_type)(sqlite3_value*); + char *(*vmprintf)(const char*,va_list); + /* Added ??? */ + int (*overload_function)(sqlite3*, const char *zFuncName, int nArg); + /* Added by 3.3.13 */ + int (*prepare_v2)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); + int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); + int (*clear_bindings)(sqlite3_stmt*); + /* Added by 3.4.1 */ + int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*, + void (*xDestroy)(void *)); + /* Added by 3.5.0 */ + int (*bind_zeroblob)(sqlite3_stmt*,int,int); + int (*blob_bytes)(sqlite3_blob*); + int (*blob_close)(sqlite3_blob*); + int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64, + int,sqlite3_blob**); + int (*blob_read)(sqlite3_blob*,void*,int,int); + int (*blob_write)(sqlite3_blob*,const void*,int,int); + int (*create_collation_v2)(sqlite3*,const char*,int,void*, + int(*)(void*,int,const void*,int,const void*), + void(*)(void*)); + int (*file_control)(sqlite3*,const char*,int,void*); + sqlite3_int64 (*memory_highwater)(int); + sqlite3_int64 (*memory_used)(void); + sqlite3_mutex *(*mutex_alloc)(int); + void (*mutex_enter)(sqlite3_mutex*); + void (*mutex_free)(sqlite3_mutex*); + void (*mutex_leave)(sqlite3_mutex*); + int (*mutex_try)(sqlite3_mutex*); + int (*open_v2)(const char*,sqlite3**,int,const char*); + int (*release_memory)(int); + void (*result_error_nomem)(sqlite3_context*); + void (*result_error_toobig)(sqlite3_context*); + int (*sleep)(int); + void (*soft_heap_limit)(int); + sqlite3_vfs *(*vfs_find)(const char*); + int (*vfs_register)(sqlite3_vfs*,int); + int (*vfs_unregister)(sqlite3_vfs*); + int (*xthreadsafe)(void); + void (*result_zeroblob)(sqlite3_context*,int); + void (*result_error_code)(sqlite3_context*,int); + int (*test_control)(int, ...); + void (*randomness)(int,void*); + sqlite3 *(*context_db_handle)(sqlite3_context*); + int (*extended_result_codes)(sqlite3*,int); + int (*limit)(sqlite3*,int,int); + sqlite3_stmt *(*next_stmt)(sqlite3*,sqlite3_stmt*); + const char *(*sql)(sqlite3_stmt*); + int (*status)(int,int*,int*,int); + int (*backup_finish)(sqlite3_backup*); + sqlite3_backup *(*backup_init)(sqlite3*,const char*,sqlite3*,const char*); + int (*backup_pagecount)(sqlite3_backup*); + int (*backup_remaining)(sqlite3_backup*); + int (*backup_step)(sqlite3_backup*,int); + const char *(*compileoption_get)(int); + int (*compileoption_used)(const char*); + int (*create_function_v2)(sqlite3*,const char*,int,int,void*, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void(*xDestroy)(void*)); + int (*db_config)(sqlite3*,int,...); + sqlite3_mutex *(*db_mutex)(sqlite3*); + int (*db_status)(sqlite3*,int,int*,int*,int); + int (*extended_errcode)(sqlite3*); + void (*log)(int,const char*,...); + sqlite3_int64 (*soft_heap_limit64)(sqlite3_int64); + const char *(*sourceid)(void); + int (*stmt_status)(sqlite3_stmt*,int,int); + int (*strnicmp)(const char*,const char*,int); + int (*unlock_notify)(sqlite3*,void(*)(void**,int),void*); + int (*wal_autocheckpoint)(sqlite3*,int); + int (*wal_checkpoint)(sqlite3*,const char*); + void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*); + int (*blob_reopen)(sqlite3_blob*,sqlite3_int64); + int (*vtab_config)(sqlite3*,int op,...); + int (*vtab_on_conflict)(sqlite3*); + /* Version 3.7.16 and later */ + int (*close_v2)(sqlite3*); + const char *(*db_filename)(sqlite3*,const char*); + int (*db_readonly)(sqlite3*,const char*); + int (*db_release_memory)(sqlite3*); + const char *(*errstr)(int); + int (*stmt_busy)(sqlite3_stmt*); + int (*stmt_readonly)(sqlite3_stmt*); + int (*stricmp)(const char*,const char*); + int (*uri_boolean)(const char*,const char*,int); + sqlite3_int64 (*uri_int64)(const char*,const char*,sqlite3_int64); + const char *(*uri_parameter)(const char*,const char*); + char *(*xvsnprintf)(int,char*,const char*,va_list); + int (*wal_checkpoint_v2)(sqlite3*,const char*,int,int*,int*); + /* Version 3.8.7 and later */ + int (*auto_extension)(void(*)(void)); + int (*bind_blob64)(sqlite3_stmt*,int,const void*,sqlite3_uint64, + void(*)(void*)); + int (*bind_text64)(sqlite3_stmt*,int,const char*,sqlite3_uint64, + void(*)(void*),unsigned char); + int (*cancel_auto_extension)(void(*)(void)); + int (*load_extension)(sqlite3*,const char*,const char*,char**); + void *(*malloc64)(sqlite3_uint64); + sqlite3_uint64 (*msize)(void*); + void *(*realloc64)(void*,sqlite3_uint64); + void (*reset_auto_extension)(void); + void (*result_blob64)(sqlite3_context*,const void*,sqlite3_uint64, + void(*)(void*)); + void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64, + void(*)(void*), unsigned char); + int (*strglob)(const char*,const char*); + /* Version 3.8.11 and later */ + sqlite3_value *(*value_dup)(const sqlite3_value*); + void (*value_free)(sqlite3_value*); + int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); + int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); + /* Version 3.9.0 and later */ + unsigned int (*value_subtype)(sqlite3_value*); + void (*result_subtype)(sqlite3_context*,unsigned int); + /* Version 3.10.0 and later */ + int (*status64)(int,sqlite3_int64*,sqlite3_int64*,int); + int (*strlike)(const char*,const char*,unsigned int); + int (*db_cacheflush)(sqlite3*); + /* Version 3.12.0 and later */ + int (*system_errno)(sqlite3*); + /* Version 3.14.0 and later */ + int (*trace_v2)(sqlite3*,unsigned,int(*)(unsigned,void*,void*,void*),void*); + char *(*expanded_sql)(sqlite3_stmt*); + /* Version 3.18.0 and later */ + void (*set_last_insert_rowid)(sqlite3*,sqlite3_int64); + /* Version 3.20.0 and later */ + int (*prepare_v3)(sqlite3*,const char*,int,unsigned int, + sqlite3_stmt**,const char**); + int (*prepare16_v3)(sqlite3*,const void*,int,unsigned int, + sqlite3_stmt**,const void**); + int (*bind_pointer)(sqlite3_stmt*,int,void*,const char*,void(*)(void*)); + void (*result_pointer)(sqlite3_context*,void*,const char*,void(*)(void*)); + void *(*value_pointer)(sqlite3_value*,const char*); + int (*vtab_nochange)(sqlite3_context*); + int (*value_nochange)(sqlite3_value*); + const char *(*vtab_collation)(sqlite3_index_info*,int); + /* Version 3.24.0 and later */ + int (*keyword_count)(void); + int (*keyword_name)(int,const char**,int*); + int (*keyword_check)(const char*,int); + sqlite3_str *(*str_new)(sqlite3*); + char *(*str_finish)(sqlite3_str*); + void (*str_appendf)(sqlite3_str*, const char *zFormat, ...); + void (*str_vappendf)(sqlite3_str*, const char *zFormat, va_list); + void (*str_append)(sqlite3_str*, const char *zIn, int N); + void (*str_appendall)(sqlite3_str*, const char *zIn); + void (*str_appendchar)(sqlite3_str*, int N, char C); + void (*str_reset)(sqlite3_str*); + int (*str_errcode)(sqlite3_str*); + int (*str_length)(sqlite3_str*); + char *(*str_value)(sqlite3_str*); + /* Version 3.25.0 and later */ + int (*create_window_function)(sqlite3*,const char*,int,int,void*, + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void (*xValue)(sqlite3_context*), + void (*xInv)(sqlite3_context*,int,sqlite3_value**), + void(*xDestroy)(void*)); + /* Version 3.26.0 and later */ + const char *(*normalized_sql)(sqlite3_stmt*); + /* Version 3.28.0 and later */ + int (*stmt_isexplain)(sqlite3_stmt*); + int (*value_frombind)(sqlite3_value*); + /* Version 3.30.0 and later */ + int (*drop_modules)(sqlite3*,const char**); + /* Version 3.31.0 and later */ + sqlite3_int64 (*hard_heap_limit64)(sqlite3_int64); + const char *(*uri_key)(const char*,int); + const char *(*filename_database)(const char*); + const char *(*filename_journal)(const char*); + const char *(*filename_wal)(const char*); + /* Version 3.32.0 and later */ + char *(*create_filename)(const char*,const char*,const char*, + int,const char**); + void (*free_filename)(char*); + sqlite3_file *(*database_file_object)(const char*); + /* Version 3.34.0 and later */ + int (*txn_state)(sqlite3*,const char*); + /* Version 3.36.1 and later */ + sqlite3_int64 (*changes64)(sqlite3*); + sqlite3_int64 (*total_changes64)(sqlite3*); + /* Version 3.37.0 and later */ + int (*autovacuum_pages)(sqlite3*, + unsigned int(*)(void*,const char*,unsigned int,unsigned int,unsigned int), + void*, void(*)(void*)); + /* Version 3.38.0 and later */ + int (*error_offset)(sqlite3*); + int (*vtab_rhs_value)(sqlite3_index_info*,int,sqlite3_value**); + int (*vtab_distinct)(sqlite3_index_info*); + int (*vtab_in)(sqlite3_index_info*,int,int); + int (*vtab_in_first)(sqlite3_value*,sqlite3_value**); + int (*vtab_in_next)(sqlite3_value*,sqlite3_value**); +}; + +/* +** This is the function signature used for all extension entry points. It +** is also defined in the file "loadext.c". +*/ +typedef int (*sqlite3_loadext_entry)( + sqlite3 *db, /* Handle to the database. */ + char **pzErrMsg, /* Used to set error string on failure. */ + const sqlite3_api_routines *pThunk /* Extension API function pointers. */ +); + +/* +** The following macros redefine the API routines so that they are +** redirected through the global sqlite3_api structure. +** +** This header file is also used by the loadext.c source file +** (part of the main SQLite library - not an extension) so that +** it can get access to the sqlite3_api_routines structure +** definition. But the main library does not want to redefine +** the API. So the redefinition macros are only valid if the +** SQLITE_CORE macros is undefined. +*/ +#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) +#define sqlite3_aggregate_context sqlite3_api->aggregate_context +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_aggregate_count sqlite3_api->aggregate_count +#endif +#define sqlite3_bind_blob sqlite3_api->bind_blob +#define sqlite3_bind_double sqlite3_api->bind_double +#define sqlite3_bind_int sqlite3_api->bind_int +#define sqlite3_bind_int64 sqlite3_api->bind_int64 +#define sqlite3_bind_null sqlite3_api->bind_null +#define sqlite3_bind_parameter_count sqlite3_api->bind_parameter_count +#define sqlite3_bind_parameter_index sqlite3_api->bind_parameter_index +#define sqlite3_bind_parameter_name sqlite3_api->bind_parameter_name +#define sqlite3_bind_text sqlite3_api->bind_text +#define sqlite3_bind_text16 sqlite3_api->bind_text16 +#define sqlite3_bind_value sqlite3_api->bind_value +#define sqlite3_busy_handler sqlite3_api->busy_handler +#define sqlite3_busy_timeout sqlite3_api->busy_timeout +#define sqlite3_changes sqlite3_api->changes +#define sqlite3_close sqlite3_api->close +#define sqlite3_collation_needed sqlite3_api->collation_needed +#define sqlite3_collation_needed16 sqlite3_api->collation_needed16 +#define sqlite3_column_blob sqlite3_api->column_blob +#define sqlite3_column_bytes sqlite3_api->column_bytes +#define sqlite3_column_bytes16 sqlite3_api->column_bytes16 +#define sqlite3_column_count sqlite3_api->column_count +#define sqlite3_column_database_name sqlite3_api->column_database_name +#define sqlite3_column_database_name16 sqlite3_api->column_database_name16 +#define sqlite3_column_decltype sqlite3_api->column_decltype +#define sqlite3_column_decltype16 sqlite3_api->column_decltype16 +#define sqlite3_column_double sqlite3_api->column_double +#define sqlite3_column_int sqlite3_api->column_int +#define sqlite3_column_int64 sqlite3_api->column_int64 +#define sqlite3_column_name sqlite3_api->column_name +#define sqlite3_column_name16 sqlite3_api->column_name16 +#define sqlite3_column_origin_name sqlite3_api->column_origin_name +#define sqlite3_column_origin_name16 sqlite3_api->column_origin_name16 +#define sqlite3_column_table_name sqlite3_api->column_table_name +#define sqlite3_column_table_name16 sqlite3_api->column_table_name16 +#define sqlite3_column_text sqlite3_api->column_text +#define sqlite3_column_text16 sqlite3_api->column_text16 +#define sqlite3_column_type sqlite3_api->column_type +#define sqlite3_column_value sqlite3_api->column_value +#define sqlite3_commit_hook sqlite3_api->commit_hook +#define sqlite3_complete sqlite3_api->complete +#define sqlite3_complete16 sqlite3_api->complete16 +#define sqlite3_create_collation sqlite3_api->create_collation +#define sqlite3_create_collation16 sqlite3_api->create_collation16 +#define sqlite3_create_function sqlite3_api->create_function +#define sqlite3_create_function16 sqlite3_api->create_function16 +#define sqlite3_create_module sqlite3_api->create_module +#define sqlite3_create_module_v2 sqlite3_api->create_module_v2 +#define sqlite3_data_count sqlite3_api->data_count +#define sqlite3_db_handle sqlite3_api->db_handle +#define sqlite3_declare_vtab sqlite3_api->declare_vtab +#define sqlite3_enable_shared_cache sqlite3_api->enable_shared_cache +#define sqlite3_errcode sqlite3_api->errcode +#define sqlite3_errmsg sqlite3_api->errmsg +#define sqlite3_errmsg16 sqlite3_api->errmsg16 +#define sqlite3_exec sqlite3_api->exec +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_expired sqlite3_api->expired +#endif +#define sqlite3_finalize sqlite3_api->finalize +#define sqlite3_free sqlite3_api->free +#define sqlite3_free_table sqlite3_api->free_table +#define sqlite3_get_autocommit sqlite3_api->get_autocommit +#define sqlite3_get_auxdata sqlite3_api->get_auxdata +#define sqlite3_get_table sqlite3_api->get_table +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_global_recover sqlite3_api->global_recover +#endif +#define sqlite3_interrupt sqlite3_api->interruptx +#define sqlite3_last_insert_rowid sqlite3_api->last_insert_rowid +#define sqlite3_libversion sqlite3_api->libversion +#define sqlite3_libversion_number sqlite3_api->libversion_number +#define sqlite3_malloc sqlite3_api->malloc +#define sqlite3_mprintf sqlite3_api->mprintf +#define sqlite3_open sqlite3_api->open +#define sqlite3_open16 sqlite3_api->open16 +#define sqlite3_prepare sqlite3_api->prepare +#define sqlite3_prepare16 sqlite3_api->prepare16 +#define sqlite3_prepare_v2 sqlite3_api->prepare_v2 +#define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 +#define sqlite3_profile sqlite3_api->profile +#define sqlite3_progress_handler sqlite3_api->progress_handler +#define sqlite3_realloc sqlite3_api->realloc +#define sqlite3_reset sqlite3_api->reset +#define sqlite3_result_blob sqlite3_api->result_blob +#define sqlite3_result_double sqlite3_api->result_double +#define sqlite3_result_error sqlite3_api->result_error +#define sqlite3_result_error16 sqlite3_api->result_error16 +#define sqlite3_result_int sqlite3_api->result_int +#define sqlite3_result_int64 sqlite3_api->result_int64 +#define sqlite3_result_null sqlite3_api->result_null +#define sqlite3_result_text sqlite3_api->result_text +#define sqlite3_result_text16 sqlite3_api->result_text16 +#define sqlite3_result_text16be sqlite3_api->result_text16be +#define sqlite3_result_text16le sqlite3_api->result_text16le +#define sqlite3_result_value sqlite3_api->result_value +#define sqlite3_rollback_hook sqlite3_api->rollback_hook +#define sqlite3_set_authorizer sqlite3_api->set_authorizer +#define sqlite3_set_auxdata sqlite3_api->set_auxdata +#define sqlite3_snprintf sqlite3_api->xsnprintf +#define sqlite3_step sqlite3_api->step +#define sqlite3_table_column_metadata sqlite3_api->table_column_metadata +#define sqlite3_thread_cleanup sqlite3_api->thread_cleanup +#define sqlite3_total_changes sqlite3_api->total_changes +#define sqlite3_trace sqlite3_api->trace +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_transfer_bindings sqlite3_api->transfer_bindings +#endif +#define sqlite3_update_hook sqlite3_api->update_hook +#define sqlite3_user_data sqlite3_api->user_data +#define sqlite3_value_blob sqlite3_api->value_blob +#define sqlite3_value_bytes sqlite3_api->value_bytes +#define sqlite3_value_bytes16 sqlite3_api->value_bytes16 +#define sqlite3_value_double sqlite3_api->value_double +#define sqlite3_value_int sqlite3_api->value_int +#define sqlite3_value_int64 sqlite3_api->value_int64 +#define sqlite3_value_numeric_type sqlite3_api->value_numeric_type +#define sqlite3_value_text sqlite3_api->value_text +#define sqlite3_value_text16 sqlite3_api->value_text16 +#define sqlite3_value_text16be sqlite3_api->value_text16be +#define sqlite3_value_text16le sqlite3_api->value_text16le +#define sqlite3_value_type sqlite3_api->value_type +#define sqlite3_vmprintf sqlite3_api->vmprintf +#define sqlite3_vsnprintf sqlite3_api->xvsnprintf +#define sqlite3_overload_function sqlite3_api->overload_function +#define sqlite3_prepare_v2 sqlite3_api->prepare_v2 +#define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 +#define sqlite3_clear_bindings sqlite3_api->clear_bindings +#define sqlite3_bind_zeroblob sqlite3_api->bind_zeroblob +#define sqlite3_blob_bytes sqlite3_api->blob_bytes +#define sqlite3_blob_close sqlite3_api->blob_close +#define sqlite3_blob_open sqlite3_api->blob_open +#define sqlite3_blob_read sqlite3_api->blob_read +#define sqlite3_blob_write sqlite3_api->blob_write +#define sqlite3_create_collation_v2 sqlite3_api->create_collation_v2 +#define sqlite3_file_control sqlite3_api->file_control +#define sqlite3_memory_highwater sqlite3_api->memory_highwater +#define sqlite3_memory_used sqlite3_api->memory_used +#define sqlite3_mutex_alloc sqlite3_api->mutex_alloc +#define sqlite3_mutex_enter sqlite3_api->mutex_enter +#define sqlite3_mutex_free sqlite3_api->mutex_free +#define sqlite3_mutex_leave sqlite3_api->mutex_leave +#define sqlite3_mutex_try sqlite3_api->mutex_try +#define sqlite3_open_v2 sqlite3_api->open_v2 +#define sqlite3_release_memory sqlite3_api->release_memory +#define sqlite3_result_error_nomem sqlite3_api->result_error_nomem +#define sqlite3_result_error_toobig sqlite3_api->result_error_toobig +#define sqlite3_sleep sqlite3_api->sleep +#define sqlite3_soft_heap_limit sqlite3_api->soft_heap_limit +#define sqlite3_vfs_find sqlite3_api->vfs_find +#define sqlite3_vfs_register sqlite3_api->vfs_register +#define sqlite3_vfs_unregister sqlite3_api->vfs_unregister +#define sqlite3_threadsafe sqlite3_api->xthreadsafe +#define sqlite3_result_zeroblob sqlite3_api->result_zeroblob +#define sqlite3_result_error_code sqlite3_api->result_error_code +#define sqlite3_test_control sqlite3_api->test_control +#define sqlite3_randomness sqlite3_api->randomness +#define sqlite3_context_db_handle sqlite3_api->context_db_handle +#define sqlite3_extended_result_codes sqlite3_api->extended_result_codes +#define sqlite3_limit sqlite3_api->limit +#define sqlite3_next_stmt sqlite3_api->next_stmt +#define sqlite3_sql sqlite3_api->sql +#define sqlite3_status sqlite3_api->status +#define sqlite3_backup_finish sqlite3_api->backup_finish +#define sqlite3_backup_init sqlite3_api->backup_init +#define sqlite3_backup_pagecount sqlite3_api->backup_pagecount +#define sqlite3_backup_remaining sqlite3_api->backup_remaining +#define sqlite3_backup_step sqlite3_api->backup_step +#define sqlite3_compileoption_get sqlite3_api->compileoption_get +#define sqlite3_compileoption_used sqlite3_api->compileoption_used +#define sqlite3_create_function_v2 sqlite3_api->create_function_v2 +#define sqlite3_db_config sqlite3_api->db_config +#define sqlite3_db_mutex sqlite3_api->db_mutex +#define sqlite3_db_status sqlite3_api->db_status +#define sqlite3_extended_errcode sqlite3_api->extended_errcode +#define sqlite3_log sqlite3_api->log +#define sqlite3_soft_heap_limit64 sqlite3_api->soft_heap_limit64 +#define sqlite3_sourceid sqlite3_api->sourceid +#define sqlite3_stmt_status sqlite3_api->stmt_status +#define sqlite3_strnicmp sqlite3_api->strnicmp +#define sqlite3_unlock_notify sqlite3_api->unlock_notify +#define sqlite3_wal_autocheckpoint sqlite3_api->wal_autocheckpoint +#define sqlite3_wal_checkpoint sqlite3_api->wal_checkpoint +#define sqlite3_wal_hook sqlite3_api->wal_hook +#define sqlite3_blob_reopen sqlite3_api->blob_reopen +#define sqlite3_vtab_config sqlite3_api->vtab_config +#define sqlite3_vtab_on_conflict sqlite3_api->vtab_on_conflict +/* Version 3.7.16 and later */ +#define sqlite3_close_v2 sqlite3_api->close_v2 +#define sqlite3_db_filename sqlite3_api->db_filename +#define sqlite3_db_readonly sqlite3_api->db_readonly +#define sqlite3_db_release_memory sqlite3_api->db_release_memory +#define sqlite3_errstr sqlite3_api->errstr +#define sqlite3_stmt_busy sqlite3_api->stmt_busy +#define sqlite3_stmt_readonly sqlite3_api->stmt_readonly +#define sqlite3_stricmp sqlite3_api->stricmp +#define sqlite3_uri_boolean sqlite3_api->uri_boolean +#define sqlite3_uri_int64 sqlite3_api->uri_int64 +#define sqlite3_uri_parameter sqlite3_api->uri_parameter +#define sqlite3_uri_vsnprintf sqlite3_api->xvsnprintf +#define sqlite3_wal_checkpoint_v2 sqlite3_api->wal_checkpoint_v2 +/* Version 3.8.7 and later */ +#define sqlite3_auto_extension sqlite3_api->auto_extension +#define sqlite3_bind_blob64 sqlite3_api->bind_blob64 +#define sqlite3_bind_text64 sqlite3_api->bind_text64 +#define sqlite3_cancel_auto_extension sqlite3_api->cancel_auto_extension +#define sqlite3_load_extension sqlite3_api->load_extension +#define sqlite3_malloc64 sqlite3_api->malloc64 +#define sqlite3_msize sqlite3_api->msize +#define sqlite3_realloc64 sqlite3_api->realloc64 +#define sqlite3_reset_auto_extension sqlite3_api->reset_auto_extension +#define sqlite3_result_blob64 sqlite3_api->result_blob64 +#define sqlite3_result_text64 sqlite3_api->result_text64 +#define sqlite3_strglob sqlite3_api->strglob +/* Version 3.8.11 and later */ +#define sqlite3_value_dup sqlite3_api->value_dup +#define sqlite3_value_free sqlite3_api->value_free +#define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 +#define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 +/* Version 3.9.0 and later */ +#define sqlite3_value_subtype sqlite3_api->value_subtype +#define sqlite3_result_subtype sqlite3_api->result_subtype +/* Version 3.10.0 and later */ +#define sqlite3_status64 sqlite3_api->status64 +#define sqlite3_strlike sqlite3_api->strlike +#define sqlite3_db_cacheflush sqlite3_api->db_cacheflush +/* Version 3.12.0 and later */ +#define sqlite3_system_errno sqlite3_api->system_errno +/* Version 3.14.0 and later */ +#define sqlite3_trace_v2 sqlite3_api->trace_v2 +#define sqlite3_expanded_sql sqlite3_api->expanded_sql +/* Version 3.18.0 and later */ +#define sqlite3_set_last_insert_rowid sqlite3_api->set_last_insert_rowid +/* Version 3.20.0 and later */ +#define sqlite3_prepare_v3 sqlite3_api->prepare_v3 +#define sqlite3_prepare16_v3 sqlite3_api->prepare16_v3 +#define sqlite3_bind_pointer sqlite3_api->bind_pointer +#define sqlite3_result_pointer sqlite3_api->result_pointer +#define sqlite3_value_pointer sqlite3_api->value_pointer +/* Version 3.22.0 and later */ +#define sqlite3_vtab_nochange sqlite3_api->vtab_nochange +#define sqlite3_value_nochange sqlite3_api->value_nochange +#define sqlite3_vtab_collation sqlite3_api->vtab_collation +/* Version 3.24.0 and later */ +#define sqlite3_keyword_count sqlite3_api->keyword_count +#define sqlite3_keyword_name sqlite3_api->keyword_name +#define sqlite3_keyword_check sqlite3_api->keyword_check +#define sqlite3_str_new sqlite3_api->str_new +#define sqlite3_str_finish sqlite3_api->str_finish +#define sqlite3_str_appendf sqlite3_api->str_appendf +#define sqlite3_str_vappendf sqlite3_api->str_vappendf +#define sqlite3_str_append sqlite3_api->str_append +#define sqlite3_str_appendall sqlite3_api->str_appendall +#define sqlite3_str_appendchar sqlite3_api->str_appendchar +#define sqlite3_str_reset sqlite3_api->str_reset +#define sqlite3_str_errcode sqlite3_api->str_errcode +#define sqlite3_str_length sqlite3_api->str_length +#define sqlite3_str_value sqlite3_api->str_value +/* Version 3.25.0 and later */ +#define sqlite3_create_window_function sqlite3_api->create_window_function +/* Version 3.26.0 and later */ +#define sqlite3_normalized_sql sqlite3_api->normalized_sql +/* Version 3.28.0 and later */ +#define sqlite3_stmt_isexplain sqlite3_api->stmt_isexplain +#define sqlite3_value_frombind sqlite3_api->value_frombind +/* Version 3.30.0 and later */ +#define sqlite3_drop_modules sqlite3_api->drop_modules +/* Version 3.31.0 and later */ +#define sqlite3_hard_heap_limit64 sqlite3_api->hard_heap_limit64 +#define sqlite3_uri_key sqlite3_api->uri_key +#define sqlite3_filename_database sqlite3_api->filename_database +#define sqlite3_filename_journal sqlite3_api->filename_journal +#define sqlite3_filename_wal sqlite3_api->filename_wal +/* Version 3.32.0 and later */ +#define sqlite3_create_filename sqlite3_api->create_filename +#define sqlite3_free_filename sqlite3_api->free_filename +#define sqlite3_database_file_object sqlite3_api->database_file_object +/* Version 3.34.0 and later */ +#define sqlite3_txn_state sqlite3_api->txn_state +/* Version 3.36.1 and later */ +#define sqlite3_changes64 sqlite3_api->changes64 +#define sqlite3_total_changes64 sqlite3_api->total_changes64 +/* Version 3.37.0 and later */ +#define sqlite3_autovacuum_pages sqlite3_api->autovacuum_pages +/* Version 3.38.0 and later */ +#define sqlite3_error_offset sqlite3_api->error_offset +#define sqlite3_vtab_rhs_value sqlite3_api->vtab_rhs_value +#define sqlite3_vtab_distinct sqlite3_api->vtab_distinct +#define sqlite3_vtab_in sqlite3_api->vtab_in +#define sqlite3_vtab_in_first sqlite3_api->vtab_in_first +#define sqlite3_vtab_in_next sqlite3_api->vtab_in_next +#endif /* !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) */ + +#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) + /* This case when the file really is being compiled as a loadable + ** extension */ +# define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; +# define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; +# define SQLITE_EXTENSION_INIT3 \ + extern const sqlite3_api_routines *sqlite3_api; +#else + /* This case when the file is being statically linked into the + ** application */ +# define SQLITE_EXTENSION_INIT1 /*no-op*/ +# define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */ +# define SQLITE_EXTENSION_INIT3 /*no-op*/ +#endif + +#endif /* SQLITE3EXT_H */ + +/************** End of sqlite3ext.h ******************************************/ +/************** Continuing where we left off in loadext.c ********************/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_LOAD_EXTENSION +/* +** Some API routines are omitted when various features are +** excluded from a build of SQLite. Substitute a NULL pointer +** for any missing APIs. +*/ +#ifndef SQLITE_ENABLE_COLUMN_METADATA +# define sqlite3_column_database_name 0 +# define sqlite3_column_database_name16 0 +# define sqlite3_column_table_name 0 +# define sqlite3_column_table_name16 0 +# define sqlite3_column_origin_name 0 +# define sqlite3_column_origin_name16 0 +#endif + +#ifdef SQLITE_OMIT_AUTHORIZATION +# define sqlite3_set_authorizer 0 +#endif + +#ifdef SQLITE_OMIT_UTF16 +# define sqlite3_bind_text16 0 +# define sqlite3_collation_needed16 0 +# define sqlite3_column_decltype16 0 +# define sqlite3_column_name16 0 +# define sqlite3_column_text16 0 +# define sqlite3_complete16 0 +# define sqlite3_create_collation16 0 +# define sqlite3_create_function16 0 +# define sqlite3_errmsg16 0 +# define sqlite3_open16 0 +# define sqlite3_prepare16 0 +# define sqlite3_prepare16_v2 0 +# define sqlite3_prepare16_v3 0 +# define sqlite3_result_error16 0 +# define sqlite3_result_text16 0 +# define sqlite3_result_text16be 0 +# define sqlite3_result_text16le 0 +# define sqlite3_value_text16 0 +# define sqlite3_value_text16be 0 +# define sqlite3_value_text16le 0 +# define sqlite3_column_database_name16 0 +# define sqlite3_column_table_name16 0 +# define sqlite3_column_origin_name16 0 +#endif + +#ifdef SQLITE_OMIT_COMPLETE +# define sqlite3_complete 0 +# define sqlite3_complete16 0 +#endif + +#ifdef SQLITE_OMIT_DECLTYPE +# define sqlite3_column_decltype16 0 +# define sqlite3_column_decltype 0 +#endif + +#ifdef SQLITE_OMIT_PROGRESS_CALLBACK +# define sqlite3_progress_handler 0 +#endif + +#ifdef SQLITE_OMIT_VIRTUALTABLE +# define sqlite3_create_module 0 +# define sqlite3_create_module_v2 0 +# define sqlite3_declare_vtab 0 +# define sqlite3_vtab_config 0 +# define sqlite3_vtab_on_conflict 0 +# define sqlite3_vtab_collation 0 +#endif + +#ifdef SQLITE_OMIT_SHARED_CACHE +# define sqlite3_enable_shared_cache 0 +#endif + +#if defined(SQLITE_OMIT_TRACE) || defined(SQLITE_OMIT_DEPRECATED) +# define sqlite3_profile 0 +# define sqlite3_trace 0 +#endif + +#ifdef SQLITE_OMIT_GET_TABLE +# define sqlite3_free_table 0 +# define sqlite3_get_table 0 +#endif + +#ifdef SQLITE_OMIT_INCRBLOB +#define sqlite3_bind_zeroblob 0 +#define sqlite3_blob_bytes 0 +#define sqlite3_blob_close 0 +#define sqlite3_blob_open 0 +#define sqlite3_blob_read 0 +#define sqlite3_blob_write 0 +#define sqlite3_blob_reopen 0 +#endif + +#if defined(SQLITE_OMIT_TRACE) +# define sqlite3_trace_v2 0 +#endif + +/* +** The following structure contains pointers to all SQLite API routines. +** A pointer to this structure is passed into extensions when they are +** loaded so that the extension can make calls back into the SQLite +** library. +** +** When adding new APIs, add them to the bottom of this structure +** in order to preserve backwards compatibility. +** +** Extensions that use newer APIs should first call the +** sqlite3_libversion_number() to make sure that the API they +** intend to use is supported by the library. Extensions should +** also check to make sure that the pointer to the function is +** not NULL before calling it. +*/ +static const sqlite3_api_routines sqlite3Apis = { + sqlite3_aggregate_context, +#ifndef SQLITE_OMIT_DEPRECATED + sqlite3_aggregate_count, +#else + 0, +#endif + sqlite3_bind_blob, + sqlite3_bind_double, + sqlite3_bind_int, + sqlite3_bind_int64, + sqlite3_bind_null, + sqlite3_bind_parameter_count, + sqlite3_bind_parameter_index, + sqlite3_bind_parameter_name, + sqlite3_bind_text, + sqlite3_bind_text16, + sqlite3_bind_value, + sqlite3_busy_handler, + sqlite3_busy_timeout, + sqlite3_changes, + sqlite3_close, + sqlite3_collation_needed, + sqlite3_collation_needed16, + sqlite3_column_blob, + sqlite3_column_bytes, + sqlite3_column_bytes16, + sqlite3_column_count, + sqlite3_column_database_name, + sqlite3_column_database_name16, + sqlite3_column_decltype, + sqlite3_column_decltype16, + sqlite3_column_double, + sqlite3_column_int, + sqlite3_column_int64, + sqlite3_column_name, + sqlite3_column_name16, + sqlite3_column_origin_name, + sqlite3_column_origin_name16, + sqlite3_column_table_name, + sqlite3_column_table_name16, + sqlite3_column_text, + sqlite3_column_text16, + sqlite3_column_type, + sqlite3_column_value, + sqlite3_commit_hook, + sqlite3_complete, + sqlite3_complete16, + sqlite3_create_collation, + sqlite3_create_collation16, + sqlite3_create_function, + sqlite3_create_function16, + sqlite3_create_module, + sqlite3_data_count, + sqlite3_db_handle, + sqlite3_declare_vtab, + sqlite3_enable_shared_cache, + sqlite3_errcode, + sqlite3_errmsg, + sqlite3_errmsg16, + sqlite3_exec, +#ifndef SQLITE_OMIT_DEPRECATED + sqlite3_expired, +#else + 0, +#endif + sqlite3_finalize, + sqlite3_free, + sqlite3_free_table, + sqlite3_get_autocommit, + sqlite3_get_auxdata, + sqlite3_get_table, + 0, /* Was sqlite3_global_recover(), but that function is deprecated */ + sqlite3_interrupt, + sqlite3_last_insert_rowid, + sqlite3_libversion, + sqlite3_libversion_number, + sqlite3_malloc, + sqlite3_mprintf, + sqlite3_open, + sqlite3_open16, + sqlite3_prepare, + sqlite3_prepare16, + sqlite3_profile, + sqlite3_progress_handler, + sqlite3_realloc, + sqlite3_reset, + sqlite3_result_blob, + sqlite3_result_double, + sqlite3_result_error, + sqlite3_result_error16, + sqlite3_result_int, + sqlite3_result_int64, + sqlite3_result_null, + sqlite3_result_text, + sqlite3_result_text16, + sqlite3_result_text16be, + sqlite3_result_text16le, + sqlite3_result_value, + sqlite3_rollback_hook, + sqlite3_set_authorizer, + sqlite3_set_auxdata, + sqlite3_snprintf, + sqlite3_step, + sqlite3_table_column_metadata, +#ifndef SQLITE_OMIT_DEPRECATED + sqlite3_thread_cleanup, +#else + 0, +#endif + sqlite3_total_changes, + sqlite3_trace, +#ifndef SQLITE_OMIT_DEPRECATED + sqlite3_transfer_bindings, +#else + 0, +#endif + sqlite3_update_hook, + sqlite3_user_data, + sqlite3_value_blob, + sqlite3_value_bytes, + sqlite3_value_bytes16, + sqlite3_value_double, + sqlite3_value_int, + sqlite3_value_int64, + sqlite3_value_numeric_type, + sqlite3_value_text, + sqlite3_value_text16, + sqlite3_value_text16be, + sqlite3_value_text16le, + sqlite3_value_type, + sqlite3_vmprintf, + /* + ** The original API set ends here. All extensions can call any + ** of the APIs above provided that the pointer is not NULL. But + ** before calling APIs that follow, extension should check the + ** sqlite3_libversion_number() to make sure they are dealing with + ** a library that is new enough to support that API. + ************************************************************************* + */ + sqlite3_overload_function, + + /* + ** Added after 3.3.13 + */ + sqlite3_prepare_v2, + sqlite3_prepare16_v2, + sqlite3_clear_bindings, + + /* + ** Added for 3.4.1 + */ + sqlite3_create_module_v2, + + /* + ** Added for 3.5.0 + */ + sqlite3_bind_zeroblob, + sqlite3_blob_bytes, + sqlite3_blob_close, + sqlite3_blob_open, + sqlite3_blob_read, + sqlite3_blob_write, + sqlite3_create_collation_v2, + sqlite3_file_control, + sqlite3_memory_highwater, + sqlite3_memory_used, +#ifdef SQLITE_MUTEX_OMIT + 0, + 0, + 0, + 0, + 0, +#else + sqlite3_mutex_alloc, + sqlite3_mutex_enter, + sqlite3_mutex_free, + sqlite3_mutex_leave, + sqlite3_mutex_try, +#endif + sqlite3_open_v2, + sqlite3_release_memory, + sqlite3_result_error_nomem, + sqlite3_result_error_toobig, + sqlite3_sleep, + sqlite3_soft_heap_limit, + sqlite3_vfs_find, + sqlite3_vfs_register, + sqlite3_vfs_unregister, + + /* + ** Added for 3.5.8 + */ + sqlite3_threadsafe, + sqlite3_result_zeroblob, + sqlite3_result_error_code, + sqlite3_test_control, + sqlite3_randomness, + sqlite3_context_db_handle, + + /* + ** Added for 3.6.0 + */ + sqlite3_extended_result_codes, + sqlite3_limit, + sqlite3_next_stmt, + sqlite3_sql, + sqlite3_status, + + /* + ** Added for 3.7.4 + */ + sqlite3_backup_finish, + sqlite3_backup_init, + sqlite3_backup_pagecount, + sqlite3_backup_remaining, + sqlite3_backup_step, +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS + sqlite3_compileoption_get, + sqlite3_compileoption_used, +#else + 0, + 0, +#endif + sqlite3_create_function_v2, + sqlite3_db_config, + sqlite3_db_mutex, + sqlite3_db_status, + sqlite3_extended_errcode, + sqlite3_log, + sqlite3_soft_heap_limit64, + sqlite3_sourceid, + sqlite3_stmt_status, + sqlite3_strnicmp, +#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY + sqlite3_unlock_notify, +#else + 0, +#endif +#ifndef SQLITE_OMIT_WAL + sqlite3_wal_autocheckpoint, + sqlite3_wal_checkpoint, + sqlite3_wal_hook, +#else + 0, + 0, + 0, +#endif + sqlite3_blob_reopen, + sqlite3_vtab_config, + sqlite3_vtab_on_conflict, + sqlite3_close_v2, + sqlite3_db_filename, + sqlite3_db_readonly, + sqlite3_db_release_memory, + sqlite3_errstr, + sqlite3_stmt_busy, + sqlite3_stmt_readonly, + sqlite3_stricmp, + sqlite3_uri_boolean, + sqlite3_uri_int64, + sqlite3_uri_parameter, + sqlite3_vsnprintf, + sqlite3_wal_checkpoint_v2, + /* Version 3.8.7 and later */ + sqlite3_auto_extension, + sqlite3_bind_blob64, + sqlite3_bind_text64, + sqlite3_cancel_auto_extension, + sqlite3_load_extension, + sqlite3_malloc64, + sqlite3_msize, + sqlite3_realloc64, + sqlite3_reset_auto_extension, + sqlite3_result_blob64, + sqlite3_result_text64, + sqlite3_strglob, + /* Version 3.8.11 and later */ + (sqlite3_value*(*)(const sqlite3_value*))sqlite3_value_dup, + sqlite3_value_free, + sqlite3_result_zeroblob64, + sqlite3_bind_zeroblob64, + /* Version 3.9.0 and later */ + sqlite3_value_subtype, + sqlite3_result_subtype, + /* Version 3.10.0 and later */ + sqlite3_status64, + sqlite3_strlike, + sqlite3_db_cacheflush, + /* Version 3.12.0 and later */ + sqlite3_system_errno, + /* Version 3.14.0 and later */ + sqlite3_trace_v2, + sqlite3_expanded_sql, + /* Version 3.18.0 and later */ + sqlite3_set_last_insert_rowid, + /* Version 3.20.0 and later */ + sqlite3_prepare_v3, + sqlite3_prepare16_v3, + sqlite3_bind_pointer, + sqlite3_result_pointer, + sqlite3_value_pointer, + /* Version 3.22.0 and later */ + sqlite3_vtab_nochange, + sqlite3_value_nochange, + sqlite3_vtab_collation, + /* Version 3.24.0 and later */ + sqlite3_keyword_count, + sqlite3_keyword_name, + sqlite3_keyword_check, + sqlite3_str_new, + sqlite3_str_finish, + sqlite3_str_appendf, + sqlite3_str_vappendf, + sqlite3_str_append, + sqlite3_str_appendall, + sqlite3_str_appendchar, + sqlite3_str_reset, + sqlite3_str_errcode, + sqlite3_str_length, + sqlite3_str_value, + /* Version 3.25.0 and later */ + sqlite3_create_window_function, + /* Version 3.26.0 and later */ +#ifdef SQLITE_ENABLE_NORMALIZE + sqlite3_normalized_sql, +#else + 0, +#endif + /* Version 3.28.0 and later */ + sqlite3_stmt_isexplain, + sqlite3_value_frombind, + /* Version 3.30.0 and later */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + sqlite3_drop_modules, +#else + 0, +#endif + /* Version 3.31.0 and later */ + sqlite3_hard_heap_limit64, + sqlite3_uri_key, + sqlite3_filename_database, + sqlite3_filename_journal, + sqlite3_filename_wal, + /* Version 3.32.0 and later */ + sqlite3_create_filename, + sqlite3_free_filename, + sqlite3_database_file_object, + /* Version 3.34.0 and later */ + sqlite3_txn_state, + /* Version 3.36.1 and later */ + sqlite3_changes64, + sqlite3_total_changes64, + /* Version 3.37.0 and later */ + sqlite3_autovacuum_pages, + /* Version 3.38.0 and later */ + sqlite3_error_offset, + sqlite3_vtab_rhs_value, + sqlite3_vtab_distinct, + sqlite3_vtab_in, + sqlite3_vtab_in_first, + sqlite3_vtab_in_next +}; + +/* True if x is the directory separator character +*/ +#if SQLITE_OS_WIN +# define DirSep(X) ((X)=='/'||(X)=='\\') +#else +# define DirSep(X) ((X)=='/') +#endif + +/* +** Attempt to load an SQLite extension library contained in the file +** zFile. The entry point is zProc. zProc may be 0 in which case a +** default entry point name (sqlite3_extension_init) is used. Use +** of the default name is recommended. +** +** Return SQLITE_OK on success and SQLITE_ERROR if something goes wrong. +** +** If an error occurs and pzErrMsg is not 0, then fill *pzErrMsg with +** error message text. The calling function should free this memory +** by calling sqlite3DbFree(db, ). +*/ +static int sqlite3LoadExtension( + sqlite3 *db, /* Load the extension into this database connection */ + const char *zFile, /* Name of the shared library containing extension */ + const char *zProc, /* Entry point. Use "sqlite3_extension_init" if 0 */ + char **pzErrMsg /* Put error message here if not 0 */ +){ + sqlite3_vfs *pVfs = db->pVfs; + void *handle; + sqlite3_loadext_entry xInit; + char *zErrmsg = 0; + const char *zEntry; + char *zAltEntry = 0; + void **aHandle; + u64 nMsg = strlen(zFile); + int ii; + int rc; + + /* Shared library endings to try if zFile cannot be loaded as written */ + static const char *azEndings[] = { +#if SQLITE_OS_WIN + "dll" +#elif defined(__APPLE__) + "dylib" +#else + "so" +#endif + }; + + + if( pzErrMsg ) *pzErrMsg = 0; + + /* Ticket #1863. To avoid a creating security problems for older + ** applications that relink against newer versions of SQLite, the + ** ability to run load_extension is turned off by default. One + ** must call either sqlite3_enable_load_extension(db) or + ** sqlite3_db_config(db, SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION, 1, 0) + ** to turn on extension loading. + */ + if( (db->flags & SQLITE_LoadExtension)==0 ){ + if( pzErrMsg ){ + *pzErrMsg = sqlite3_mprintf("not authorized"); + } + return SQLITE_ERROR; + } + + zEntry = zProc ? zProc : "sqlite3_extension_init"; + + /* tag-20210611-1. Some dlopen() implementations will segfault if given + ** an oversize filename. Most filesystems have a pathname limit of 4K, + ** so limit the extension filename length to about twice that. + ** https://sqlite.org/forum/forumpost/08a0d6d9bf */ + if( nMsg>SQLITE_MAX_PATHLEN ) goto extension_not_found; + + handle = sqlite3OsDlOpen(pVfs, zFile); +#if SQLITE_OS_UNIX || SQLITE_OS_WIN + for(ii=0; ii sqlite3_example_init + ** C:/lib/mathfuncs.dll ==> sqlite3_mathfuncs_init + */ + if( xInit==0 && zProc==0 ){ + int iFile, iEntry, c; + int ncFile = sqlite3Strlen30(zFile); + zAltEntry = sqlite3_malloc64(ncFile+30); + if( zAltEntry==0 ){ + sqlite3OsDlClose(pVfs, handle); + return SQLITE_NOMEM_BKPT; + } + memcpy(zAltEntry, "sqlite3_", 8); + for(iFile=ncFile-1; iFile>=0 && !DirSep(zFile[iFile]); iFile--){} + iFile++; + if( sqlite3_strnicmp(zFile+iFile, "lib", 3)==0 ) iFile += 3; + for(iEntry=8; (c = zFile[iFile])!=0 && c!='.'; iFile++){ + if( sqlite3Isalpha(c) ){ + zAltEntry[iEntry++] = (char)sqlite3UpperToLower[(unsigned)c]; + } + } + memcpy(zAltEntry+iEntry, "_init", 6); + zEntry = zAltEntry; + xInit = (sqlite3_loadext_entry)sqlite3OsDlSym(pVfs, handle, zEntry); + } + if( xInit==0 ){ + if( pzErrMsg ){ + nMsg += strlen(zEntry) + 300; + *pzErrMsg = zErrmsg = sqlite3_malloc64(nMsg); + if( zErrmsg ){ + assert( nMsg<0x7fffffff ); /* zErrmsg would be NULL if not so */ + sqlite3_snprintf((int)nMsg, zErrmsg, + "no entry point [%s] in shared library [%s]", zEntry, zFile); + sqlite3OsDlError(pVfs, nMsg-1, zErrmsg); + } + } + sqlite3OsDlClose(pVfs, handle); + sqlite3_free(zAltEntry); + return SQLITE_ERROR; + } + sqlite3_free(zAltEntry); + rc = xInit(db, &zErrmsg, &sqlite3Apis); + if( rc ){ + if( rc==SQLITE_OK_LOAD_PERMANENTLY ) return SQLITE_OK; + if( pzErrMsg ){ + *pzErrMsg = sqlite3_mprintf("error during initialization: %s", zErrmsg); + } + sqlite3_free(zErrmsg); + sqlite3OsDlClose(pVfs, handle); + return SQLITE_ERROR; + } + + /* Append the new shared library handle to the db->aExtension array. */ + aHandle = sqlite3DbMallocZero(db, sizeof(handle)*(db->nExtension+1)); + if( aHandle==0 ){ + return SQLITE_NOMEM_BKPT; + } + if( db->nExtension>0 ){ + memcpy(aHandle, db->aExtension, sizeof(handle)*db->nExtension); + } + sqlite3DbFree(db, db->aExtension); + db->aExtension = aHandle; + + db->aExtension[db->nExtension++] = handle; + return SQLITE_OK; + +extension_not_found: + if( pzErrMsg ){ + nMsg += 300; + *pzErrMsg = zErrmsg = sqlite3_malloc64(nMsg); + if( zErrmsg ){ + assert( nMsg<0x7fffffff ); /* zErrmsg would be NULL if not so */ + sqlite3_snprintf((int)nMsg, zErrmsg, + "unable to open shared library [%.*s]", SQLITE_MAX_PATHLEN, zFile); + sqlite3OsDlError(pVfs, nMsg-1, zErrmsg); + } + } + return SQLITE_ERROR; +} +SQLITE_API int sqlite3_load_extension( + sqlite3 *db, /* Load the extension into this database connection */ + const char *zFile, /* Name of the shared library containing extension */ + const char *zProc, /* Entry point. Use "sqlite3_extension_init" if 0 */ + char **pzErrMsg /* Put error message here if not 0 */ +){ + int rc; + sqlite3_mutex_enter(db->mutex); + rc = sqlite3LoadExtension(db, zFile, zProc, pzErrMsg); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Call this routine when the database connection is closing in order +** to clean up loaded extensions +*/ +SQLITE_PRIVATE void sqlite3CloseExtensions(sqlite3 *db){ + int i; + assert( sqlite3_mutex_held(db->mutex) ); + for(i=0; inExtension; i++){ + sqlite3OsDlClose(db->pVfs, db->aExtension[i]); + } + sqlite3DbFree(db, db->aExtension); +} + +/* +** Enable or disable extension loading. Extension loading is disabled by +** default so as not to open security holes in older applications. +*/ +SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff){ + sqlite3_mutex_enter(db->mutex); + if( onoff ){ + db->flags |= SQLITE_LoadExtension|SQLITE_LoadExtFunc; + }else{ + db->flags &= ~(u64)(SQLITE_LoadExtension|SQLITE_LoadExtFunc); + } + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +#endif /* !defined(SQLITE_OMIT_LOAD_EXTENSION) */ + +/* +** The following object holds the list of automatically loaded +** extensions. +** +** This list is shared across threads. The SQLITE_MUTEX_STATIC_MAIN +** mutex must be held while accessing this list. +*/ +typedef struct sqlite3AutoExtList sqlite3AutoExtList; +static SQLITE_WSD struct sqlite3AutoExtList { + u32 nExt; /* Number of entries in aExt[] */ + void (**aExt)(void); /* Pointers to the extension init functions */ +} sqlite3Autoext = { 0, 0 }; + +/* The "wsdAutoext" macro will resolve to the autoextension +** state vector. If writable static data is unsupported on the target, +** we have to locate the state vector at run-time. In the more common +** case where writable static data is supported, wsdStat can refer directly +** to the "sqlite3Autoext" state vector declared above. +*/ +#ifdef SQLITE_OMIT_WSD +# define wsdAutoextInit \ + sqlite3AutoExtList *x = &GLOBAL(sqlite3AutoExtList,sqlite3Autoext) +# define wsdAutoext x[0] +#else +# define wsdAutoextInit +# define wsdAutoext sqlite3Autoext +#endif + + +/* +** Register a statically linked extension that is automatically +** loaded by every new database connection. +*/ +SQLITE_API int sqlite3_auto_extension( + void (*xInit)(void) +){ + int rc = SQLITE_OK; +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ){ + return rc; + }else +#endif + { + u32 i; +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); +#endif + wsdAutoextInit; + sqlite3_mutex_enter(mutex); + for(i=0; i=0; i--){ + if( wsdAutoext.aExt[i]==xInit ){ + wsdAutoext.nExt--; + wsdAutoext.aExt[i] = wsdAutoext.aExt[wsdAutoext.nExt]; + n++; + break; + } + } + sqlite3_mutex_leave(mutex); + return n; +} + +/* +** Reset the automatic extension loading mechanism. +*/ +SQLITE_API void sqlite3_reset_auto_extension(void){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3_initialize()==SQLITE_OK ) +#endif + { +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); +#endif + wsdAutoextInit; + sqlite3_mutex_enter(mutex); + sqlite3_free(wsdAutoext.aExt); + wsdAutoext.aExt = 0; + wsdAutoext.nExt = 0; + sqlite3_mutex_leave(mutex); + } +} + +/* +** Load all automatic extensions. +** +** If anything goes wrong, set an error in the database connection. +*/ +SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3 *db){ + u32 i; + int go = 1; + int rc; + sqlite3_loadext_entry xInit; + + wsdAutoextInit; + if( wsdAutoext.nExt==0 ){ + /* Common case: early out without every having to acquire a mutex */ + return; + } + for(i=0; go; i++){ + char *zErrmsg; +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); +#endif +#ifdef SQLITE_OMIT_LOAD_EXTENSION + const sqlite3_api_routines *pThunk = 0; +#else + const sqlite3_api_routines *pThunk = &sqlite3Apis; +#endif + sqlite3_mutex_enter(mutex); + if( i>=wsdAutoext.nExt ){ + xInit = 0; + go = 0; + }else{ + xInit = (sqlite3_loadext_entry)wsdAutoext.aExt[i]; + } + sqlite3_mutex_leave(mutex); + zErrmsg = 0; + if( xInit && (rc = xInit(db, &zErrmsg, pThunk))!=0 ){ + sqlite3ErrorWithMsg(db, rc, + "automatic extension loading failed: %s", zErrmsg); + go = 0; + } + sqlite3_free(zErrmsg); + } +} + +/************** End of loadext.c *********************************************/ +/************** Begin file pragma.c ******************************************/ +/* +** 2003 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used to implement the PRAGMA command. +*/ +/* #include "sqliteInt.h" */ + +#if !defined(SQLITE_ENABLE_LOCKING_STYLE) +# if defined(__APPLE__) +# define SQLITE_ENABLE_LOCKING_STYLE 1 +# else +# define SQLITE_ENABLE_LOCKING_STYLE 0 +# endif +#endif + +/*************************************************************************** +** The "pragma.h" include file is an automatically generated file that +** that includes the PragType_XXXX macro definitions and the aPragmaName[] +** object. This ensures that the aPragmaName[] table is arranged in +** lexicographical order to facility a binary search of the pragma name. +** Do not edit pragma.h directly. Edit and rerun the script in at +** ../tool/mkpragmatab.tcl. */ +/************** Include pragma.h in the middle of pragma.c *******************/ +/************** Begin file pragma.h ******************************************/ +/* DO NOT EDIT! +** This file is automatically generated by the script at +** ../tool/mkpragmatab.tcl. To update the set of pragmas, edit +** that script and rerun it. +*/ + +/* The various pragma types */ +#define PragTyp_ACTIVATE_EXTENSIONS 0 +#define PragTyp_ANALYSIS_LIMIT 1 +#define PragTyp_HEADER_VALUE 2 +#define PragTyp_AUTO_VACUUM 3 +#define PragTyp_FLAG 4 +#define PragTyp_BUSY_TIMEOUT 5 +#define PragTyp_CACHE_SIZE 6 +#define PragTyp_CACHE_SPILL 7 +#define PragTyp_CASE_SENSITIVE_LIKE 8 +#define PragTyp_COLLATION_LIST 9 +#define PragTyp_COMPILE_OPTIONS 10 +#define PragTyp_DATA_STORE_DIRECTORY 11 +#define PragTyp_DATABASE_LIST 12 +#define PragTyp_DEFAULT_CACHE_SIZE 13 +#define PragTyp_ENCODING 14 +#define PragTyp_FOREIGN_KEY_CHECK 15 +#define PragTyp_FOREIGN_KEY_LIST 16 +#define PragTyp_FUNCTION_LIST 17 +#define PragTyp_HARD_HEAP_LIMIT 18 +#define PragTyp_INCREMENTAL_VACUUM 19 +#define PragTyp_INDEX_INFO 20 +#define PragTyp_INDEX_LIST 21 +#define PragTyp_INTEGRITY_CHECK 22 +#define PragTyp_JOURNAL_MODE 23 +#define PragTyp_JOURNAL_SIZE_LIMIT 24 +#define PragTyp_LOCK_PROXY_FILE 25 +#define PragTyp_LOCKING_MODE 26 +#define PragTyp_PAGE_COUNT 27 +#define PragTyp_MMAP_SIZE 28 +#define PragTyp_MODULE_LIST 29 +#define PragTyp_OPTIMIZE 30 +#define PragTyp_PAGE_SIZE 31 +#define PragTyp_PRAGMA_LIST 32 +#define PragTyp_SECURE_DELETE 33 +#define PragTyp_SHRINK_MEMORY 34 +#define PragTyp_SOFT_HEAP_LIMIT 35 +#define PragTyp_SYNCHRONOUS 36 +#define PragTyp_TABLE_INFO 37 +#define PragTyp_TABLE_LIST 38 +#define PragTyp_TEMP_STORE 39 +#define PragTyp_TEMP_STORE_DIRECTORY 40 +#define PragTyp_THREADS 41 +#define PragTyp_WAL_AUTOCHECKPOINT 42 +#define PragTyp_WAL_CHECKPOINT 43 +#define PragTyp_LOCK_STATUS 44 +#define PragTyp_STATS 45 + +/* Property flags associated with various pragma. */ +#define PragFlg_NeedSchema 0x01 /* Force schema load before running */ +#define PragFlg_NoColumns 0x02 /* OP_ResultRow called with zero columns */ +#define PragFlg_NoColumns1 0x04 /* zero columns if RHS argument is present */ +#define PragFlg_ReadOnly 0x08 /* Read-only HEADER_VALUE */ +#define PragFlg_Result0 0x10 /* Acts as query when no argument */ +#define PragFlg_Result1 0x20 /* Acts as query when has one argument */ +#define PragFlg_SchemaOpt 0x40 /* Schema restricts name search if present */ +#define PragFlg_SchemaReq 0x80 /* Schema required - "main" is default */ + +/* Names of columns for pragmas that return multi-column result +** or that return single-column results where the name of the +** result column is different from the name of the pragma +*/ +static const char *const pragCName[] = { + /* 0 */ "id", /* Used by: foreign_key_list */ + /* 1 */ "seq", + /* 2 */ "table", + /* 3 */ "from", + /* 4 */ "to", + /* 5 */ "on_update", + /* 6 */ "on_delete", + /* 7 */ "match", + /* 8 */ "cid", /* Used by: table_xinfo */ + /* 9 */ "name", + /* 10 */ "type", + /* 11 */ "notnull", + /* 12 */ "dflt_value", + /* 13 */ "pk", + /* 14 */ "hidden", + /* table_info reuses 8 */ + /* 15 */ "schema", /* Used by: table_list */ + /* 16 */ "name", + /* 17 */ "type", + /* 18 */ "ncol", + /* 19 */ "wr", + /* 20 */ "strict", + /* 21 */ "seqno", /* Used by: index_xinfo */ + /* 22 */ "cid", + /* 23 */ "name", + /* 24 */ "desc", + /* 25 */ "coll", + /* 26 */ "key", + /* 27 */ "name", /* Used by: function_list */ + /* 28 */ "builtin", + /* 29 */ "type", + /* 30 */ "enc", + /* 31 */ "narg", + /* 32 */ "flags", + /* 33 */ "tbl", /* Used by: stats */ + /* 34 */ "idx", + /* 35 */ "wdth", + /* 36 */ "hght", + /* 37 */ "flgs", + /* 38 */ "seq", /* Used by: index_list */ + /* 39 */ "name", + /* 40 */ "unique", + /* 41 */ "origin", + /* 42 */ "partial", + /* 43 */ "table", /* Used by: foreign_key_check */ + /* 44 */ "rowid", + /* 45 */ "parent", + /* 46 */ "fkid", + /* index_info reuses 21 */ + /* 47 */ "seq", /* Used by: database_list */ + /* 48 */ "name", + /* 49 */ "file", + /* 50 */ "busy", /* Used by: wal_checkpoint */ + /* 51 */ "log", + /* 52 */ "checkpointed", + /* collation_list reuses 38 */ + /* 53 */ "database", /* Used by: lock_status */ + /* 54 */ "status", + /* 55 */ "cache_size", /* Used by: default_cache_size */ + /* module_list pragma_list reuses 9 */ + /* 56 */ "timeout", /* Used by: busy_timeout */ +}; + +/* Definitions of all built-in pragmas */ +typedef struct PragmaName { + const char *const zName; /* Name of pragma */ + u8 ePragTyp; /* PragTyp_XXX value */ + u8 mPragFlg; /* Zero or more PragFlg_XXX values */ + u8 iPragCName; /* Start of column names in pragCName[] */ + u8 nPragCName; /* Num of col names. 0 means use pragma name */ + u64 iArg; /* Extra argument */ +} PragmaName; +static const PragmaName aPragmaName[] = { +#if defined(SQLITE_ENABLE_CEROD) + {/* zName: */ "activate_extensions", + /* ePragTyp: */ PragTyp_ACTIVATE_EXTENSIONS, + /* ePragFlg: */ 0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif + {/* zName: */ "analysis_limit", + /* ePragTyp: */ PragTyp_ANALYSIS_LIMIT, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS) + {/* zName: */ "application_id", + /* ePragTyp: */ PragTyp_HEADER_VALUE, + /* ePragFlg: */ PragFlg_NoColumns1|PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ BTREE_APPLICATION_ID }, +#endif +#if !defined(SQLITE_OMIT_AUTOVACUUM) + {/* zName: */ "auto_vacuum", + /* ePragTyp: */ PragTyp_AUTO_VACUUM, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if !defined(SQLITE_OMIT_AUTOMATIC_INDEX) + {/* zName: */ "automatic_index", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_AutoIndex }, +#endif +#endif + {/* zName: */ "busy_timeout", + /* ePragTyp: */ PragTyp_BUSY_TIMEOUT, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 56, 1, + /* iArg: */ 0 }, +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "cache_size", + /* ePragTyp: */ PragTyp_CACHE_SIZE, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "cache_spill", + /* ePragTyp: */ PragTyp_CACHE_SPILL, + /* ePragFlg: */ PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_CASE_SENSITIVE_LIKE_PRAGMA) + {/* zName: */ "case_sensitive_like", + /* ePragTyp: */ PragTyp_CASE_SENSITIVE_LIKE, + /* ePragFlg: */ PragFlg_NoColumns, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif + {/* zName: */ "cell_size_check", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_CellSizeCk }, +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "checkpoint_fullfsync", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_CkptFullFSync }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) + {/* zName: */ "collation_list", + /* ePragTyp: */ PragTyp_COLLATION_LIST, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 38, 2, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_COMPILEOPTION_DIAGS) + {/* zName: */ "compile_options", + /* ePragTyp: */ PragTyp_COMPILE_OPTIONS, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "count_changes", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_CountRows }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && SQLITE_OS_WIN + {/* zName: */ "data_store_directory", + /* ePragTyp: */ PragTyp_DATA_STORE_DIRECTORY, + /* ePragFlg: */ PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS) + {/* zName: */ "data_version", + /* ePragTyp: */ PragTyp_HEADER_VALUE, + /* ePragFlg: */ PragFlg_ReadOnly|PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ BTREE_DATA_VERSION }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) + {/* zName: */ "database_list", + /* ePragTyp: */ PragTyp_DATABASE_LIST, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0, + /* ColNames: */ 47, 3, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && !defined(SQLITE_OMIT_DEPRECATED) + {/* zName: */ "default_cache_size", + /* ePragTyp: */ PragTyp_DEFAULT_CACHE_SIZE, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, + /* ColNames: */ 55, 1, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER) + {/* zName: */ "defer_foreign_keys", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_DeferFKs }, +#endif +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "empty_result_callbacks", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_NullCallback }, +#endif +#if !defined(SQLITE_OMIT_UTF16) + {/* zName: */ "encoding", + /* ePragTyp: */ PragTyp_ENCODING, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER) + {/* zName: */ "foreign_key_check", + /* ePragTyp: */ PragTyp_FOREIGN_KEY_CHECK, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 43, 4, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FOREIGN_KEY) + {/* zName: */ "foreign_key_list", + /* ePragTyp: */ PragTyp_FOREIGN_KEY_LIST, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 0, 8, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER) + {/* zName: */ "foreign_keys", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_ForeignKeys }, +#endif +#endif +#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS) + {/* zName: */ "freelist_count", + /* ePragTyp: */ PragTyp_HEADER_VALUE, + /* ePragFlg: */ PragFlg_ReadOnly|PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ BTREE_FREE_PAGE_COUNT }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "full_column_names", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_FullColNames }, + {/* zName: */ "fullfsync", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_FullFSync }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) +#if !defined(SQLITE_OMIT_INTROSPECTION_PRAGMAS) + {/* zName: */ "function_list", + /* ePragTyp: */ PragTyp_FUNCTION_LIST, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 27, 6, + /* iArg: */ 0 }, +#endif +#endif + {/* zName: */ "hard_heap_limit", + /* ePragTyp: */ PragTyp_HARD_HEAP_LIMIT, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if !defined(SQLITE_OMIT_CHECK) + {/* zName: */ "ignore_check_constraints", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_IgnoreChecks }, +#endif +#endif +#if !defined(SQLITE_OMIT_AUTOVACUUM) + {/* zName: */ "incremental_vacuum", + /* ePragTyp: */ PragTyp_INCREMENTAL_VACUUM, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_NoColumns, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) + {/* zName: */ "index_info", + /* ePragTyp: */ PragTyp_INDEX_INFO, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 21, 3, + /* iArg: */ 0 }, + {/* zName: */ "index_list", + /* ePragTyp: */ PragTyp_INDEX_LIST, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 38, 5, + /* iArg: */ 0 }, + {/* zName: */ "index_xinfo", + /* ePragTyp: */ PragTyp_INDEX_INFO, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 21, 6, + /* iArg: */ 1 }, +#endif +#if !defined(SQLITE_OMIT_INTEGRITY_CHECK) + {/* zName: */ "integrity_check", + /* ePragTyp: */ PragTyp_INTEGRITY_CHECK, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "journal_mode", + /* ePragTyp: */ PragTyp_JOURNAL_MODE, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "journal_size_limit", + /* ePragTyp: */ PragTyp_JOURNAL_SIZE_LIMIT, + /* ePragFlg: */ PragFlg_Result0|PragFlg_SchemaReq, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "legacy_alter_table", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_LegacyAlter }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && SQLITE_ENABLE_LOCKING_STYLE + {/* zName: */ "lock_proxy_file", + /* ePragTyp: */ PragTyp_LOCK_PROXY_FILE, + /* ePragFlg: */ PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + {/* zName: */ "lock_status", + /* ePragTyp: */ PragTyp_LOCK_STATUS, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 53, 2, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "locking_mode", + /* ePragTyp: */ PragTyp_LOCKING_MODE, + /* ePragFlg: */ PragFlg_Result0|PragFlg_SchemaReq, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "max_page_count", + /* ePragTyp: */ PragTyp_PAGE_COUNT, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "mmap_size", + /* ePragTyp: */ PragTyp_MMAP_SIZE, + /* ePragFlg: */ 0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) +#if !defined(SQLITE_OMIT_VIRTUALTABLE) +#if !defined(SQLITE_OMIT_INTROSPECTION_PRAGMAS) + {/* zName: */ "module_list", + /* ePragTyp: */ PragTyp_MODULE_LIST, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 9, 1, + /* iArg: */ 0 }, +#endif +#endif +#endif + {/* zName: */ "optimize", + /* ePragTyp: */ PragTyp_OPTIMIZE, + /* ePragFlg: */ PragFlg_Result1|PragFlg_NeedSchema, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "page_count", + /* ePragTyp: */ PragTyp_PAGE_COUNT, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "page_size", + /* ePragTyp: */ PragTyp_PAGE_SIZE, + /* ePragFlg: */ PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if defined(SQLITE_DEBUG) + {/* zName: */ "parser_trace", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_ParserTrace }, +#endif +#endif +#if !defined(SQLITE_OMIT_INTROSPECTION_PRAGMAS) + {/* zName: */ "pragma_list", + /* ePragTyp: */ PragTyp_PRAGMA_LIST, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 9, 1, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "query_only", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_QueryOnly }, +#endif +#if !defined(SQLITE_OMIT_INTEGRITY_CHECK) + {/* zName: */ "quick_check", + /* ePragTyp: */ PragTyp_INTEGRITY_CHECK, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "read_uncommitted", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_ReadUncommit }, + {/* zName: */ "recursive_triggers", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_RecTriggers }, + {/* zName: */ "reverse_unordered_selects", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_ReverseOrder }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS) + {/* zName: */ "schema_version", + /* ePragTyp: */ PragTyp_HEADER_VALUE, + /* ePragFlg: */ PragFlg_NoColumns1|PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ BTREE_SCHEMA_VERSION }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "secure_delete", + /* ePragTyp: */ PragTyp_SECURE_DELETE, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "short_column_names", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_ShortColNames }, +#endif + {/* zName: */ "shrink_memory", + /* ePragTyp: */ PragTyp_SHRINK_MEMORY, + /* ePragFlg: */ PragFlg_NoColumns, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "soft_heap_limit", + /* ePragTyp: */ PragTyp_SOFT_HEAP_LIMIT, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if defined(SQLITE_DEBUG) + {/* zName: */ "sql_trace", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_SqlTrace }, +#endif +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) && defined(SQLITE_DEBUG) + {/* zName: */ "stats", + /* ePragTyp: */ PragTyp_STATS, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq, + /* ColNames: */ 33, 5, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "synchronous", + /* ePragTyp: */ PragTyp_SYNCHRONOUS, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) + {/* zName: */ "table_info", + /* ePragTyp: */ PragTyp_TABLE_INFO, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 8, 6, + /* iArg: */ 0 }, + {/* zName: */ "table_list", + /* ePragTyp: */ PragTyp_TABLE_LIST, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1, + /* ColNames: */ 15, 6, + /* iArg: */ 0 }, + {/* zName: */ "table_xinfo", + /* ePragTyp: */ PragTyp_TABLE_INFO, + /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, + /* ColNames: */ 8, 7, + /* iArg: */ 1 }, +#endif +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + {/* zName: */ "temp_store", + /* ePragTyp: */ PragTyp_TEMP_STORE, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "temp_store_directory", + /* ePragTyp: */ PragTyp_TEMP_STORE_DIRECTORY, + /* ePragFlg: */ PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#endif + {/* zName: */ "threads", + /* ePragTyp: */ PragTyp_THREADS, + /* ePragFlg: */ PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "trusted_schema", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_TrustedSchema }, +#endif +#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS) + {/* zName: */ "user_version", + /* ePragTyp: */ PragTyp_HEADER_VALUE, + /* ePragFlg: */ PragFlg_NoColumns1|PragFlg_Result0, + /* ColNames: */ 0, 0, + /* iArg: */ BTREE_USER_VERSION }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) +#if defined(SQLITE_DEBUG) + {/* zName: */ "vdbe_addoptrace", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_VdbeAddopTrace }, + {/* zName: */ "vdbe_debug", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_SqlTrace|SQLITE_VdbeListing|SQLITE_VdbeTrace }, + {/* zName: */ "vdbe_eqp", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_VdbeEQP }, + {/* zName: */ "vdbe_listing", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_VdbeListing }, + {/* zName: */ "vdbe_trace", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_VdbeTrace }, +#endif +#endif +#if !defined(SQLITE_OMIT_WAL) + {/* zName: */ "wal_autocheckpoint", + /* ePragTyp: */ PragTyp_WAL_AUTOCHECKPOINT, + /* ePragFlg: */ 0, + /* ColNames: */ 0, 0, + /* iArg: */ 0 }, + {/* zName: */ "wal_checkpoint", + /* ePragTyp: */ PragTyp_WAL_CHECKPOINT, + /* ePragFlg: */ PragFlg_NeedSchema, + /* ColNames: */ 50, 3, + /* iArg: */ 0 }, +#endif +#if !defined(SQLITE_OMIT_FLAG_PRAGMAS) + {/* zName: */ "writable_schema", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlg: */ PragFlg_Result0|PragFlg_NoColumns1, + /* ColNames: */ 0, 0, + /* iArg: */ SQLITE_WriteSchema|SQLITE_NoSchemaError }, +#endif +}; +/* Number of pragmas: 68 on by default, 78 total. */ + +/************** End of pragma.h **********************************************/ +/************** Continuing where we left off in pragma.c *********************/ + +/* +** Interpret the given string as a safety level. Return 0 for OFF, +** 1 for ON or NORMAL, 2 for FULL, and 3 for EXTRA. Return 1 for an empty or +** unrecognized string argument. The FULL and EXTRA option is disallowed +** if the omitFull parameter it 1. +** +** Note that the values returned are one less that the values that +** should be passed into sqlite3BtreeSetSafetyLevel(). The is done +** to support legacy SQL code. The safety level used to be boolean +** and older scripts may have used numbers 0 for OFF and 1 for ON. +*/ +static u8 getSafetyLevel(const char *z, int omitFull, u8 dflt){ + /* 123456789 123456789 123 */ + static const char zText[] = "onoffalseyestruextrafull"; + static const u8 iOffset[] = {0, 1, 2, 4, 9, 12, 15, 20}; + static const u8 iLength[] = {2, 2, 3, 5, 3, 4, 5, 4}; + static const u8 iValue[] = {1, 0, 0, 0, 1, 1, 3, 2}; + /* on no off false yes true extra full */ + int i, n; + if( sqlite3Isdigit(*z) ){ + return (u8)sqlite3Atoi(z); + } + n = sqlite3Strlen30(z); + for(i=0; i=0&&i<=2)?i:0); +} +#endif /* ifndef SQLITE_OMIT_AUTOVACUUM */ + +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +/* +** Interpret the given string as a temp db location. Return 1 for file +** backed temporary databases, 2 for the Red-Black tree in memory database +** and 0 to use the compile-time default. +*/ +static int getTempStore(const char *z){ + if( z[0]>='0' && z[0]<='2' ){ + return z[0] - '0'; + }else if( sqlite3StrICmp(z, "file")==0 ){ + return 1; + }else if( sqlite3StrICmp(z, "memory")==0 ){ + return 2; + }else{ + return 0; + } +} +#endif /* SQLITE_PAGER_PRAGMAS */ + +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +/* +** Invalidate temp storage, either when the temp storage is changed +** from default, or when 'file' and the temp_store_directory has changed +*/ +static int invalidateTempStorage(Parse *pParse){ + sqlite3 *db = pParse->db; + if( db->aDb[1].pBt!=0 ){ + if( !db->autoCommit + || sqlite3BtreeTxnState(db->aDb[1].pBt)!=SQLITE_TXN_NONE + ){ + sqlite3ErrorMsg(pParse, "temporary storage cannot be changed " + "from within a transaction"); + return SQLITE_ERROR; + } + sqlite3BtreeClose(db->aDb[1].pBt); + db->aDb[1].pBt = 0; + sqlite3ResetAllSchemasOfConnection(db); + } + return SQLITE_OK; +} +#endif /* SQLITE_PAGER_PRAGMAS */ + +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +/* +** If the TEMP database is open, close it and mark the database schema +** as needing reloading. This must be done when using the SQLITE_TEMP_STORE +** or DEFAULT_TEMP_STORE pragmas. +*/ +static int changeTempStorage(Parse *pParse, const char *zStorageType){ + int ts = getTempStore(zStorageType); + sqlite3 *db = pParse->db; + if( db->temp_store==ts ) return SQLITE_OK; + if( invalidateTempStorage( pParse ) != SQLITE_OK ){ + return SQLITE_ERROR; + } + db->temp_store = (u8)ts; + return SQLITE_OK; +} +#endif /* SQLITE_PAGER_PRAGMAS */ + +/* +** Set result column names for a pragma. +*/ +static void setPragmaResultColumnNames( + Vdbe *v, /* The query under construction */ + const PragmaName *pPragma /* The pragma */ +){ + u8 n = pPragma->nPragCName; + sqlite3VdbeSetNumCols(v, n==0 ? 1 : n); + if( n==0 ){ + sqlite3VdbeSetColName(v, 0, COLNAME_NAME, pPragma->zName, SQLITE_STATIC); + }else{ + int i, j; + for(i=0, j=pPragma->iPragCName; iautoCommit ){ + Db *pDb = db->aDb; + int n = db->nDb; + assert( SQLITE_FullFSync==PAGER_FULLFSYNC ); + assert( SQLITE_CkptFullFSync==PAGER_CKPT_FULLFSYNC ); + assert( SQLITE_CacheSpill==PAGER_CACHESPILL ); + assert( (PAGER_FULLFSYNC | PAGER_CKPT_FULLFSYNC | PAGER_CACHESPILL) + == PAGER_FLAGS_MASK ); + assert( (pDb->safety_level & PAGER_SYNCHRONOUS_MASK)==pDb->safety_level ); + while( (n--) > 0 ){ + if( pDb->pBt ){ + sqlite3BtreeSetPagerFlags(pDb->pBt, + pDb->safety_level | (db->flags & PAGER_FLAGS_MASK) ); + } + pDb++; + } + } +} +#else +# define setAllPagerFlags(X) /* no-op */ +#endif + + +/* +** Return a human-readable name for a constraint resolution action. +*/ +#ifndef SQLITE_OMIT_FOREIGN_KEY +static const char *actionName(u8 action){ + const char *zName; + switch( action ){ + case OE_SetNull: zName = "SET NULL"; break; + case OE_SetDflt: zName = "SET DEFAULT"; break; + case OE_Cascade: zName = "CASCADE"; break; + case OE_Restrict: zName = "RESTRICT"; break; + default: zName = "NO ACTION"; + assert( action==OE_None ); break; + } + return zName; +} +#endif + + +/* +** Parameter eMode must be one of the PAGER_JOURNALMODE_XXX constants +** defined in pager.h. This function returns the associated lowercase +** journal-mode name. +*/ +SQLITE_PRIVATE const char *sqlite3JournalModename(int eMode){ + static char * const azModeName[] = { + "delete", "persist", "off", "truncate", "memory" +#ifndef SQLITE_OMIT_WAL + , "wal" +#endif + }; + assert( PAGER_JOURNALMODE_DELETE==0 ); + assert( PAGER_JOURNALMODE_PERSIST==1 ); + assert( PAGER_JOURNALMODE_OFF==2 ); + assert( PAGER_JOURNALMODE_TRUNCATE==3 ); + assert( PAGER_JOURNALMODE_MEMORY==4 ); + assert( PAGER_JOURNALMODE_WAL==5 ); + assert( eMode>=0 && eMode<=ArraySize(azModeName) ); + + if( eMode==ArraySize(azModeName) ) return 0; + return azModeName[eMode]; +} + +/* +** Locate a pragma in the aPragmaName[] array. +*/ +static const PragmaName *pragmaLocate(const char *zName){ + int upr, lwr, mid = 0, rc; + lwr = 0; + upr = ArraySize(aPragmaName)-1; + while( lwr<=upr ){ + mid = (lwr+upr)/2; + rc = sqlite3_stricmp(zName, aPragmaName[mid].zName); + if( rc==0 ) break; + if( rc<0 ){ + upr = mid - 1; + }else{ + lwr = mid + 1; + } + } + return lwr>upr ? 0 : &aPragmaName[mid]; +} + +/* +** Create zero or more entries in the output for the SQL functions +** defined by FuncDef p. +*/ +static void pragmaFunclistLine( + Vdbe *v, /* The prepared statement being created */ + FuncDef *p, /* A particular function definition */ + int isBuiltin, /* True if this is a built-in function */ + int showInternFuncs /* True if showing internal functions */ +){ + for(; p; p=p->pNext){ + const char *zType; + static const u32 mask = + SQLITE_DETERMINISTIC | + SQLITE_DIRECTONLY | + SQLITE_SUBTYPE | + SQLITE_INNOCUOUS | + SQLITE_FUNC_INTERNAL + ; + static const char *azEnc[] = { 0, "utf8", "utf16le", "utf16be" }; + + assert( SQLITE_FUNC_ENCMASK==0x3 ); + assert( strcmp(azEnc[SQLITE_UTF8],"utf8")==0 ); + assert( strcmp(azEnc[SQLITE_UTF16LE],"utf16le")==0 ); + assert( strcmp(azEnc[SQLITE_UTF16BE],"utf16be")==0 ); + + if( p->xSFunc==0 ) continue; + if( (p->funcFlags & SQLITE_FUNC_INTERNAL)!=0 + && showInternFuncs==0 + ){ + continue; + } + if( p->xValue!=0 ){ + zType = "w"; + }else if( p->xFinalize!=0 ){ + zType = "a"; + }else{ + zType = "s"; + } + sqlite3VdbeMultiLoad(v, 1, "sissii", + p->zName, isBuiltin, + zType, azEnc[p->funcFlags&SQLITE_FUNC_ENCMASK], + p->nArg, + (p->funcFlags & mask) ^ SQLITE_INNOCUOUS + ); + } +} + + +/* +** Helper subroutine for PRAGMA integrity_check: +** +** Generate code to output a single-column result row with a value of the +** string held in register 3. Decrement the result count in register 1 +** and halt if the maximum number of result rows have been issued. +*/ +static int integrityCheckResultRow(Vdbe *v){ + int addr; + sqlite3VdbeAddOp2(v, OP_ResultRow, 3, 1); + addr = sqlite3VdbeAddOp3(v, OP_IfPos, 1, sqlite3VdbeCurrentAddr(v)+2, 1); + VdbeCoverage(v); + sqlite3VdbeAddOp0(v, OP_Halt); + return addr; +} + +/* +** Process a pragma statement. +** +** Pragmas are of this form: +** +** PRAGMA [schema.]id [= value] +** +** The identifier might also be a string. The value is a string, and +** identifier, or a number. If minusFlag is true, then the value is +** a number that was preceded by a minus sign. +** +** If the left side is "database.id" then pId1 is the database name +** and pId2 is the id. If the left side is just "id" then pId1 is the +** id and pId2 is any empty string. +*/ +SQLITE_PRIVATE void sqlite3Pragma( + Parse *pParse, + Token *pId1, /* First part of [schema.]id field */ + Token *pId2, /* Second part of [schema.]id field, or NULL */ + Token *pValue, /* Token for , or NULL */ + int minusFlag /* True if a '-' sign preceded */ +){ + char *zLeft = 0; /* Nul-terminated UTF-8 string */ + char *zRight = 0; /* Nul-terminated UTF-8 string , or NULL */ + const char *zDb = 0; /* The database name */ + Token *pId; /* Pointer to token */ + char *aFcntl[4]; /* Argument to SQLITE_FCNTL_PRAGMA */ + int iDb; /* Database index for */ + int rc; /* return value form SQLITE_FCNTL_PRAGMA */ + sqlite3 *db = pParse->db; /* The database connection */ + Db *pDb; /* The specific database being pragmaed */ + Vdbe *v = sqlite3GetVdbe(pParse); /* Prepared statement */ + const PragmaName *pPragma; /* The pragma */ + + if( v==0 ) return; + sqlite3VdbeRunOnlyOnce(v); + pParse->nMem = 2; + + /* Interpret the [schema.] part of the pragma statement. iDb is the + ** index of the database this pragma is being applied to in db.aDb[]. */ + iDb = sqlite3TwoPartName(pParse, pId1, pId2, &pId); + if( iDb<0 ) return; + pDb = &db->aDb[iDb]; + + /* If the temp database has been explicitly named as part of the + ** pragma, make sure it is open. + */ + if( iDb==1 && sqlite3OpenTempDatabase(pParse) ){ + return; + } + + zLeft = sqlite3NameFromToken(db, pId); + if( !zLeft ) return; + if( minusFlag ){ + zRight = sqlite3MPrintf(db, "-%T", pValue); + }else{ + zRight = sqlite3NameFromToken(db, pValue); + } + + assert( pId2 ); + zDb = pId2->n>0 ? pDb->zDbSName : 0; + if( sqlite3AuthCheck(pParse, SQLITE_PRAGMA, zLeft, zRight, zDb) ){ + goto pragma_out; + } + + /* Send an SQLITE_FCNTL_PRAGMA file-control to the underlying VFS + ** connection. If it returns SQLITE_OK, then assume that the VFS + ** handled the pragma and generate a no-op prepared statement. + ** + ** IMPLEMENTATION-OF: R-12238-55120 Whenever a PRAGMA statement is parsed, + ** an SQLITE_FCNTL_PRAGMA file control is sent to the open sqlite3_file + ** object corresponding to the database file to which the pragma + ** statement refers. + ** + ** IMPLEMENTATION-OF: R-29875-31678 The argument to the SQLITE_FCNTL_PRAGMA + ** file control is an array of pointers to strings (char**) in which the + ** second element of the array is the name of the pragma and the third + ** element is the argument to the pragma or NULL if the pragma has no + ** argument. + */ + aFcntl[0] = 0; + aFcntl[1] = zLeft; + aFcntl[2] = zRight; + aFcntl[3] = 0; + db->busyHandler.nBusy = 0; + rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl); + if( rc==SQLITE_OK ){ + sqlite3VdbeSetNumCols(v, 1); + sqlite3VdbeSetColName(v, 0, COLNAME_NAME, aFcntl[0], SQLITE_TRANSIENT); + returnSingleText(v, aFcntl[0]); + sqlite3_free(aFcntl[0]); + goto pragma_out; + } + if( rc!=SQLITE_NOTFOUND ){ + if( aFcntl[0] ){ + sqlite3ErrorMsg(pParse, "%s", aFcntl[0]); + sqlite3_free(aFcntl[0]); + } + pParse->nErr++; + pParse->rc = rc; + goto pragma_out; + } + + /* Locate the pragma in the lookup table */ + pPragma = pragmaLocate(zLeft); + if( pPragma==0 ){ + /* IMP: R-43042-22504 No error messages are generated if an + ** unknown pragma is issued. */ + goto pragma_out; + } + + /* Make sure the database schema is loaded if the pragma requires that */ + if( (pPragma->mPragFlg & PragFlg_NeedSchema)!=0 ){ + if( sqlite3ReadSchema(pParse) ) goto pragma_out; + } + + /* Register the result column names for pragmas that return results */ + if( (pPragma->mPragFlg & PragFlg_NoColumns)==0 + && ((pPragma->mPragFlg & PragFlg_NoColumns1)==0 || zRight==0) + ){ + setPragmaResultColumnNames(v, pPragma); + } + + /* Jump to the appropriate pragma handler */ + switch( pPragma->ePragTyp ){ + +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && !defined(SQLITE_OMIT_DEPRECATED) + /* + ** PRAGMA [schema.]default_cache_size + ** PRAGMA [schema.]default_cache_size=N + ** + ** The first form reports the current persistent setting for the + ** page cache size. The value returned is the maximum number of + ** pages in the page cache. The second form sets both the current + ** page cache size value and the persistent page cache size value + ** stored in the database file. + ** + ** Older versions of SQLite would set the default cache size to a + ** negative number to indicate synchronous=OFF. These days, synchronous + ** is always on by default regardless of the sign of the default cache + ** size. But continue to take the absolute value of the default cache + ** size of historical compatibility. + */ + case PragTyp_DEFAULT_CACHE_SIZE: { + static const int iLn = VDBE_OFFSET_LINENO(2); + static const VdbeOpList getCacheSize[] = { + { OP_Transaction, 0, 0, 0}, /* 0 */ + { OP_ReadCookie, 0, 1, BTREE_DEFAULT_CACHE_SIZE}, /* 1 */ + { OP_IfPos, 1, 8, 0}, + { OP_Integer, 0, 2, 0}, + { OP_Subtract, 1, 2, 1}, + { OP_IfPos, 1, 8, 0}, + { OP_Integer, 0, 1, 0}, /* 6 */ + { OP_Noop, 0, 0, 0}, + { OP_ResultRow, 1, 1, 0}, + }; + VdbeOp *aOp; + sqlite3VdbeUsesBtree(v, iDb); + if( !zRight ){ + pParse->nMem += 2; + sqlite3VdbeVerifyNoMallocRequired(v, ArraySize(getCacheSize)); + aOp = sqlite3VdbeAddOpList(v, ArraySize(getCacheSize), getCacheSize, iLn); + if( ONLY_IF_REALLOC_STRESS(aOp==0) ) break; + aOp[0].p1 = iDb; + aOp[1].p1 = iDb; + aOp[6].p1 = SQLITE_DEFAULT_CACHE_SIZE; + }else{ + int size = sqlite3AbsInt32(sqlite3Atoi(zRight)); + sqlite3BeginWriteOperation(pParse, 0, iDb); + sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_DEFAULT_CACHE_SIZE, size); + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + pDb->pSchema->cache_size = size; + sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); + } + break; + } +#endif /* !SQLITE_OMIT_PAGER_PRAGMAS && !SQLITE_OMIT_DEPRECATED */ + +#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) + /* + ** PRAGMA [schema.]page_size + ** PRAGMA [schema.]page_size=N + ** + ** The first form reports the current setting for the + ** database page size in bytes. The second form sets the + ** database page size value. The value can only be set if + ** the database has not yet been created. + */ + case PragTyp_PAGE_SIZE: { + Btree *pBt = pDb->pBt; + assert( pBt!=0 ); + if( !zRight ){ + int size = ALWAYS(pBt) ? sqlite3BtreeGetPageSize(pBt) : 0; + returnSingleInt(v, size); + }else{ + /* Malloc may fail when setting the page-size, as there is an internal + ** buffer that the pager module resizes using sqlite3_realloc(). + */ + db->nextPagesize = sqlite3Atoi(zRight); + if( SQLITE_NOMEM==sqlite3BtreeSetPageSize(pBt, db->nextPagesize,0,0) ){ + sqlite3OomFault(db); + } + } + break; + } + + /* + ** PRAGMA [schema.]secure_delete + ** PRAGMA [schema.]secure_delete=ON/OFF/FAST + ** + ** The first form reports the current setting for the + ** secure_delete flag. The second form changes the secure_delete + ** flag setting and reports the new value. + */ + case PragTyp_SECURE_DELETE: { + Btree *pBt = pDb->pBt; + int b = -1; + assert( pBt!=0 ); + if( zRight ){ + if( sqlite3_stricmp(zRight, "fast")==0 ){ + b = 2; + }else{ + b = sqlite3GetBoolean(zRight, 0); + } + } + if( pId2->n==0 && b>=0 ){ + int ii; + for(ii=0; iinDb; ii++){ + sqlite3BtreeSecureDelete(db->aDb[ii].pBt, b); + } + } + b = sqlite3BtreeSecureDelete(pBt, b); + returnSingleInt(v, b); + break; + } + + /* + ** PRAGMA [schema.]max_page_count + ** PRAGMA [schema.]max_page_count=N + ** + ** The first form reports the current setting for the + ** maximum number of pages in the database file. The + ** second form attempts to change this setting. Both + ** forms return the current setting. + ** + ** The absolute value of N is used. This is undocumented and might + ** change. The only purpose is to provide an easy way to test + ** the sqlite3AbsInt32() function. + ** + ** PRAGMA [schema.]page_count + ** + ** Return the number of pages in the specified database. + */ + case PragTyp_PAGE_COUNT: { + int iReg; + i64 x = 0; + sqlite3CodeVerifySchema(pParse, iDb); + iReg = ++pParse->nMem; + if( sqlite3Tolower(zLeft[0])=='p' ){ + sqlite3VdbeAddOp2(v, OP_Pagecount, iDb, iReg); + }else{ + if( zRight && sqlite3DecOrHexToI64(zRight,&x)==0 ){ + if( x<0 ) x = 0; + else if( x>0xfffffffe ) x = 0xfffffffe; + }else{ + x = 0; + } + sqlite3VdbeAddOp3(v, OP_MaxPgcnt, iDb, iReg, (int)x); + } + sqlite3VdbeAddOp2(v, OP_ResultRow, iReg, 1); + break; + } + + /* + ** PRAGMA [schema.]locking_mode + ** PRAGMA [schema.]locking_mode = (normal|exclusive) + */ + case PragTyp_LOCKING_MODE: { + const char *zRet = "normal"; + int eMode = getLockingMode(zRight); + + if( pId2->n==0 && eMode==PAGER_LOCKINGMODE_QUERY ){ + /* Simple "PRAGMA locking_mode;" statement. This is a query for + ** the current default locking mode (which may be different to + ** the locking-mode of the main database). + */ + eMode = db->dfltLockMode; + }else{ + Pager *pPager; + if( pId2->n==0 ){ + /* This indicates that no database name was specified as part + ** of the PRAGMA command. In this case the locking-mode must be + ** set on all attached databases, as well as the main db file. + ** + ** Also, the sqlite3.dfltLockMode variable is set so that + ** any subsequently attached databases also use the specified + ** locking mode. + */ + int ii; + assert(pDb==&db->aDb[0]); + for(ii=2; iinDb; ii++){ + pPager = sqlite3BtreePager(db->aDb[ii].pBt); + sqlite3PagerLockingMode(pPager, eMode); + } + db->dfltLockMode = (u8)eMode; + } + pPager = sqlite3BtreePager(pDb->pBt); + eMode = sqlite3PagerLockingMode(pPager, eMode); + } + + assert( eMode==PAGER_LOCKINGMODE_NORMAL + || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); + if( eMode==PAGER_LOCKINGMODE_EXCLUSIVE ){ + zRet = "exclusive"; + } + returnSingleText(v, zRet); + break; + } + + /* + ** PRAGMA [schema.]journal_mode + ** PRAGMA [schema.]journal_mode = + ** (delete|persist|off|truncate|memory|wal|off) + */ + case PragTyp_JOURNAL_MODE: { + int eMode; /* One of the PAGER_JOURNALMODE_XXX symbols */ + int ii; /* Loop counter */ + + if( zRight==0 ){ + /* If there is no "=MODE" part of the pragma, do a query for the + ** current mode */ + eMode = PAGER_JOURNALMODE_QUERY; + }else{ + const char *zMode; + int n = sqlite3Strlen30(zRight); + for(eMode=0; (zMode = sqlite3JournalModename(eMode))!=0; eMode++){ + if( sqlite3StrNICmp(zRight, zMode, n)==0 ) break; + } + if( !zMode ){ + /* If the "=MODE" part does not match any known journal mode, + ** then do a query */ + eMode = PAGER_JOURNALMODE_QUERY; + } + if( eMode==PAGER_JOURNALMODE_OFF && (db->flags & SQLITE_Defensive)!=0 ){ + /* Do not allow journal-mode "OFF" in defensive since the database + ** can become corrupted using ordinary SQL when the journal is off */ + eMode = PAGER_JOURNALMODE_QUERY; + } + } + if( eMode==PAGER_JOURNALMODE_QUERY && pId2->n==0 ){ + /* Convert "PRAGMA journal_mode" into "PRAGMA main.journal_mode" */ + iDb = 0; + pId2->n = 1; + } + for(ii=db->nDb-1; ii>=0; ii--){ + if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){ + sqlite3VdbeUsesBtree(v, ii); + sqlite3VdbeAddOp3(v, OP_JournalMode, ii, 1, eMode); + } + } + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1); + break; + } + + /* + ** PRAGMA [schema.]journal_size_limit + ** PRAGMA [schema.]journal_size_limit=N + ** + ** Get or set the size limit on rollback journal files. + */ + case PragTyp_JOURNAL_SIZE_LIMIT: { + Pager *pPager = sqlite3BtreePager(pDb->pBt); + i64 iLimit = -2; + if( zRight ){ + sqlite3DecOrHexToI64(zRight, &iLimit); + if( iLimit<-1 ) iLimit = -1; + } + iLimit = sqlite3PagerJournalSizeLimit(pPager, iLimit); + returnSingleInt(v, iLimit); + break; + } + +#endif /* SQLITE_OMIT_PAGER_PRAGMAS */ + + /* + ** PRAGMA [schema.]auto_vacuum + ** PRAGMA [schema.]auto_vacuum=N + ** + ** Get or set the value of the database 'auto-vacuum' parameter. + ** The value is one of: 0 NONE 1 FULL 2 INCREMENTAL + */ +#ifndef SQLITE_OMIT_AUTOVACUUM + case PragTyp_AUTO_VACUUM: { + Btree *pBt = pDb->pBt; + assert( pBt!=0 ); + if( !zRight ){ + returnSingleInt(v, sqlite3BtreeGetAutoVacuum(pBt)); + }else{ + int eAuto = getAutoVacuum(zRight); + assert( eAuto>=0 && eAuto<=2 ); + db->nextAutovac = (u8)eAuto; + /* Call SetAutoVacuum() to set initialize the internal auto and + ** incr-vacuum flags. This is required in case this connection + ** creates the database file. It is important that it is created + ** as an auto-vacuum capable db. + */ + rc = sqlite3BtreeSetAutoVacuum(pBt, eAuto); + if( rc==SQLITE_OK && (eAuto==1 || eAuto==2) ){ + /* When setting the auto_vacuum mode to either "full" or + ** "incremental", write the value of meta[6] in the database + ** file. Before writing to meta[6], check that meta[3] indicates + ** that this really is an auto-vacuum capable database. + */ + static const int iLn = VDBE_OFFSET_LINENO(2); + static const VdbeOpList setMeta6[] = { + { OP_Transaction, 0, 1, 0}, /* 0 */ + { OP_ReadCookie, 0, 1, BTREE_LARGEST_ROOT_PAGE}, + { OP_If, 1, 0, 0}, /* 2 */ + { OP_Halt, SQLITE_OK, OE_Abort, 0}, /* 3 */ + { OP_SetCookie, 0, BTREE_INCR_VACUUM, 0}, /* 4 */ + }; + VdbeOp *aOp; + int iAddr = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeVerifyNoMallocRequired(v, ArraySize(setMeta6)); + aOp = sqlite3VdbeAddOpList(v, ArraySize(setMeta6), setMeta6, iLn); + if( ONLY_IF_REALLOC_STRESS(aOp==0) ) break; + aOp[0].p1 = iDb; + aOp[1].p1 = iDb; + aOp[2].p2 = iAddr+4; + aOp[4].p1 = iDb; + aOp[4].p3 = eAuto - 1; + sqlite3VdbeUsesBtree(v, iDb); + } + } + break; + } +#endif + + /* + ** PRAGMA [schema.]incremental_vacuum(N) + ** + ** Do N steps of incremental vacuuming on a database. + */ +#ifndef SQLITE_OMIT_AUTOVACUUM + case PragTyp_INCREMENTAL_VACUUM: { + int iLimit, addr; + if( zRight==0 || !sqlite3GetInt32(zRight, &iLimit) || iLimit<=0 ){ + iLimit = 0x7fffffff; + } + sqlite3BeginWriteOperation(pParse, 0, iDb); + sqlite3VdbeAddOp2(v, OP_Integer, iLimit, 1); + addr = sqlite3VdbeAddOp1(v, OP_IncrVacuum, iDb); VdbeCoverage(v); + sqlite3VdbeAddOp1(v, OP_ResultRow, 1); + sqlite3VdbeAddOp2(v, OP_AddImm, 1, -1); + sqlite3VdbeAddOp2(v, OP_IfPos, 1, addr); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addr); + break; + } +#endif + +#ifndef SQLITE_OMIT_PAGER_PRAGMAS + /* + ** PRAGMA [schema.]cache_size + ** PRAGMA [schema.]cache_size=N + ** + ** The first form reports the current local setting for the + ** page cache size. The second form sets the local + ** page cache size value. If N is positive then that is the + ** number of pages in the cache. If N is negative, then the + ** number of pages is adjusted so that the cache uses -N kibibytes + ** of memory. + */ + case PragTyp_CACHE_SIZE: { + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( !zRight ){ + returnSingleInt(v, pDb->pSchema->cache_size); + }else{ + int size = sqlite3Atoi(zRight); + pDb->pSchema->cache_size = size; + sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); + } + break; + } + + /* + ** PRAGMA [schema.]cache_spill + ** PRAGMA cache_spill=BOOLEAN + ** PRAGMA [schema.]cache_spill=N + ** + ** The first form reports the current local setting for the + ** page cache spill size. The second form turns cache spill on + ** or off. When turnning cache spill on, the size is set to the + ** current cache_size. The third form sets a spill size that + ** may be different form the cache size. + ** If N is positive then that is the + ** number of pages in the cache. If N is negative, then the + ** number of pages is adjusted so that the cache uses -N kibibytes + ** of memory. + ** + ** If the number of cache_spill pages is less then the number of + ** cache_size pages, no spilling occurs until the page count exceeds + ** the number of cache_size pages. + ** + ** The cache_spill=BOOLEAN setting applies to all attached schemas, + ** not just the schema specified. + */ + case PragTyp_CACHE_SPILL: { + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( !zRight ){ + returnSingleInt(v, + (db->flags & SQLITE_CacheSpill)==0 ? 0 : + sqlite3BtreeSetSpillSize(pDb->pBt,0)); + }else{ + int size = 1; + if( sqlite3GetInt32(zRight, &size) ){ + sqlite3BtreeSetSpillSize(pDb->pBt, size); + } + if( sqlite3GetBoolean(zRight, size!=0) ){ + db->flags |= SQLITE_CacheSpill; + }else{ + db->flags &= ~(u64)SQLITE_CacheSpill; + } + setAllPagerFlags(db); + } + break; + } + + /* + ** PRAGMA [schema.]mmap_size(N) + ** + ** Used to set mapping size limit. The mapping size limit is + ** used to limit the aggregate size of all memory mapped regions of the + ** database file. If this parameter is set to zero, then memory mapping + ** is not used at all. If N is negative, then the default memory map + ** limit determined by sqlite3_config(SQLITE_CONFIG_MMAP_SIZE) is set. + ** The parameter N is measured in bytes. + ** + ** This value is advisory. The underlying VFS is free to memory map + ** as little or as much as it wants. Except, if N is set to 0 then the + ** upper layers will never invoke the xFetch interfaces to the VFS. + */ + case PragTyp_MMAP_SIZE: { + sqlite3_int64 sz; +#if SQLITE_MAX_MMAP_SIZE>0 + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( zRight ){ + int ii; + sqlite3DecOrHexToI64(zRight, &sz); + if( sz<0 ) sz = sqlite3GlobalConfig.szMmap; + if( pId2->n==0 ) db->szMmap = sz; + for(ii=db->nDb-1; ii>=0; ii--){ + if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){ + sqlite3BtreeSetMmapLimit(db->aDb[ii].pBt, sz); + } + } + } + sz = -1; + rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_MMAP_SIZE, &sz); +#else + sz = 0; + rc = SQLITE_OK; +#endif + if( rc==SQLITE_OK ){ + returnSingleInt(v, sz); + }else if( rc!=SQLITE_NOTFOUND ){ + pParse->nErr++; + pParse->rc = rc; + } + break; + } + + /* + ** PRAGMA temp_store + ** PRAGMA temp_store = "default"|"memory"|"file" + ** + ** Return or set the local value of the temp_store flag. Changing + ** the local value does not make changes to the disk file and the default + ** value will be restored the next time the database is opened. + ** + ** Note that it is possible for the library compile-time options to + ** override this setting + */ + case PragTyp_TEMP_STORE: { + if( !zRight ){ + returnSingleInt(v, db->temp_store); + }else{ + changeTempStorage(pParse, zRight); + } + break; + } + + /* + ** PRAGMA temp_store_directory + ** PRAGMA temp_store_directory = ""|"directory_name" + ** + ** Return or set the local value of the temp_store_directory flag. Changing + ** the value sets a specific directory to be used for temporary files. + ** Setting to a null string reverts to the default temporary directory search. + ** If temporary directory is changed, then invalidateTempStorage. + ** + */ + case PragTyp_TEMP_STORE_DIRECTORY: { + if( !zRight ){ + returnSingleText(v, sqlite3_temp_directory); + }else{ +#ifndef SQLITE_OMIT_WSD + if( zRight[0] ){ + int res; + rc = sqlite3OsAccess(db->pVfs, zRight, SQLITE_ACCESS_READWRITE, &res); + if( rc!=SQLITE_OK || res==0 ){ + sqlite3ErrorMsg(pParse, "not a writable directory"); + goto pragma_out; + } + } + if( SQLITE_TEMP_STORE==0 + || (SQLITE_TEMP_STORE==1 && db->temp_store<=1) + || (SQLITE_TEMP_STORE==2 && db->temp_store==1) + ){ + invalidateTempStorage(pParse); + } + sqlite3_free(sqlite3_temp_directory); + if( zRight[0] ){ + sqlite3_temp_directory = sqlite3_mprintf("%s", zRight); + }else{ + sqlite3_temp_directory = 0; + } +#endif /* SQLITE_OMIT_WSD */ + } + break; + } + +#if SQLITE_OS_WIN + /* + ** PRAGMA data_store_directory + ** PRAGMA data_store_directory = ""|"directory_name" + ** + ** Return or set the local value of the data_store_directory flag. Changing + ** the value sets a specific directory to be used for database files that + ** were specified with a relative pathname. Setting to a null string reverts + ** to the default database directory, which for database files specified with + ** a relative path will probably be based on the current directory for the + ** process. Database file specified with an absolute path are not impacted + ** by this setting, regardless of its value. + ** + */ + case PragTyp_DATA_STORE_DIRECTORY: { + if( !zRight ){ + returnSingleText(v, sqlite3_data_directory); + }else{ +#ifndef SQLITE_OMIT_WSD + if( zRight[0] ){ + int res; + rc = sqlite3OsAccess(db->pVfs, zRight, SQLITE_ACCESS_READWRITE, &res); + if( rc!=SQLITE_OK || res==0 ){ + sqlite3ErrorMsg(pParse, "not a writable directory"); + goto pragma_out; + } + } + sqlite3_free(sqlite3_data_directory); + if( zRight[0] ){ + sqlite3_data_directory = sqlite3_mprintf("%s", zRight); + }else{ + sqlite3_data_directory = 0; + } +#endif /* SQLITE_OMIT_WSD */ + } + break; + } +#endif + +#if SQLITE_ENABLE_LOCKING_STYLE + /* + ** PRAGMA [schema.]lock_proxy_file + ** PRAGMA [schema.]lock_proxy_file = ":auto:"|"lock_file_path" + ** + ** Return or set the value of the lock_proxy_file flag. Changing + ** the value sets a specific file to be used for database access locks. + ** + */ + case PragTyp_LOCK_PROXY_FILE: { + if( !zRight ){ + Pager *pPager = sqlite3BtreePager(pDb->pBt); + char *proxy_file_path = NULL; + sqlite3_file *pFile = sqlite3PagerFile(pPager); + sqlite3OsFileControlHint(pFile, SQLITE_GET_LOCKPROXYFILE, + &proxy_file_path); + returnSingleText(v, proxy_file_path); + }else{ + Pager *pPager = sqlite3BtreePager(pDb->pBt); + sqlite3_file *pFile = sqlite3PagerFile(pPager); + int res; + if( zRight[0] ){ + res=sqlite3OsFileControl(pFile, SQLITE_SET_LOCKPROXYFILE, + zRight); + } else { + res=sqlite3OsFileControl(pFile, SQLITE_SET_LOCKPROXYFILE, + NULL); + } + if( res!=SQLITE_OK ){ + sqlite3ErrorMsg(pParse, "failed to set lock proxy file"); + goto pragma_out; + } + } + break; + } +#endif /* SQLITE_ENABLE_LOCKING_STYLE */ + + /* + ** PRAGMA [schema.]synchronous + ** PRAGMA [schema.]synchronous=OFF|ON|NORMAL|FULL|EXTRA + ** + ** Return or set the local value of the synchronous flag. Changing + ** the local value does not make changes to the disk file and the + ** default value will be restored the next time the database is + ** opened. + */ + case PragTyp_SYNCHRONOUS: { + if( !zRight ){ + returnSingleInt(v, pDb->safety_level-1); + }else{ + if( !db->autoCommit ){ + sqlite3ErrorMsg(pParse, + "Safety level may not be changed inside a transaction"); + }else if( iDb!=1 ){ + int iLevel = (getSafetyLevel(zRight,0,1)+1) & PAGER_SYNCHRONOUS_MASK; + if( iLevel==0 ) iLevel = 1; + pDb->safety_level = iLevel; + pDb->bSyncSet = 1; + setAllPagerFlags(db); + } + } + break; + } +#endif /* SQLITE_OMIT_PAGER_PRAGMAS */ + +#ifndef SQLITE_OMIT_FLAG_PRAGMAS + case PragTyp_FLAG: { + if( zRight==0 ){ + setPragmaResultColumnNames(v, pPragma); + returnSingleInt(v, (db->flags & pPragma->iArg)!=0 ); + }else{ + u64 mask = pPragma->iArg; /* Mask of bits to set or clear. */ + if( db->autoCommit==0 ){ + /* Foreign key support may not be enabled or disabled while not + ** in auto-commit mode. */ + mask &= ~(SQLITE_ForeignKeys); + } +#if SQLITE_USER_AUTHENTICATION + if( db->auth.authLevel==UAUTH_User ){ + /* Do not allow non-admin users to modify the schema arbitrarily */ + mask &= ~(SQLITE_WriteSchema); + } +#endif + + if( sqlite3GetBoolean(zRight, 0) ){ + db->flags |= mask; + }else{ + db->flags &= ~mask; + if( mask==SQLITE_DeferFKs ) db->nDeferredImmCons = 0; + if( (mask & SQLITE_WriteSchema)!=0 + && sqlite3_stricmp(zRight, "reset")==0 + ){ + /* IMP: R-60817-01178 If the argument is "RESET" then schema + ** writing is disabled (as with "PRAGMA writable_schema=OFF") and, + ** in addition, the schema is reloaded. */ + sqlite3ResetAllSchemasOfConnection(db); + } + } + + /* Many of the flag-pragmas modify the code generated by the SQL + ** compiler (eg. count_changes). So add an opcode to expire all + ** compiled SQL statements after modifying a pragma value. + */ + sqlite3VdbeAddOp0(v, OP_Expire); + setAllPagerFlags(db); + } + break; + } +#endif /* SQLITE_OMIT_FLAG_PRAGMAS */ + +#ifndef SQLITE_OMIT_SCHEMA_PRAGMAS + /* + ** PRAGMA table_info(
    ) + ** + ** Return a single row for each column of the named table. The columns of + ** the returned data set are: + ** + ** cid: Column id (numbered from left to right, starting at 0) + ** name: Column name + ** type: Column declaration type. + ** notnull: True if 'NOT NULL' is part of column declaration + ** dflt_value: The default value for the column, if any. + ** pk: Non-zero for PK fields. + */ + case PragTyp_TABLE_INFO: if( zRight ){ + Table *pTab; + sqlite3CodeVerifyNamedSchema(pParse, zDb); + pTab = sqlite3LocateTable(pParse, LOCATE_NOERR, zRight, zDb); + if( pTab ){ + int i, k; + int nHidden = 0; + Column *pCol; + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + pParse->nMem = 7; + sqlite3ViewGetColumnNames(pParse, pTab); + for(i=0, pCol=pTab->aCol; inCol; i++, pCol++){ + int isHidden = 0; + const Expr *pColExpr; + if( pCol->colFlags & COLFLAG_NOINSERT ){ + if( pPragma->iArg==0 ){ + nHidden++; + continue; + } + if( pCol->colFlags & COLFLAG_VIRTUAL ){ + isHidden = 2; /* GENERATED ALWAYS AS ... VIRTUAL */ + }else if( pCol->colFlags & COLFLAG_STORED ){ + isHidden = 3; /* GENERATED ALWAYS AS ... STORED */ + }else{ assert( pCol->colFlags & COLFLAG_HIDDEN ); + isHidden = 1; /* HIDDEN */ + } + } + if( (pCol->colFlags & COLFLAG_PRIMKEY)==0 ){ + k = 0; + }else if( pPk==0 ){ + k = 1; + }else{ + for(k=1; k<=pTab->nCol && pPk->aiColumn[k-1]!=i; k++){} + } + pColExpr = sqlite3ColumnExpr(pTab,pCol); + assert( pColExpr==0 || pColExpr->op==TK_SPAN || isHidden>=2 ); + assert( pColExpr==0 || !ExprHasProperty(pColExpr, EP_IntValue) + || isHidden>=2 ); + sqlite3VdbeMultiLoad(v, 1, pPragma->iArg ? "issisii" : "issisi", + i-nHidden, + pCol->zCnName, + sqlite3ColumnType(pCol,""), + pCol->notNull ? 1 : 0, + (isHidden>=2 || pColExpr==0) ? 0 : pColExpr->u.zToken, + k, + isHidden); + } + } + } + break; + + /* + ** PRAGMA table_list + ** + ** Return a single row for each table, virtual table, or view in the + ** entire schema. + ** + ** schema: Name of attached database hold this table + ** name: Name of the table itself + ** type: "table", "view", "virtual", "shadow" + ** ncol: Number of columns + ** wr: True for a WITHOUT ROWID table + ** strict: True for a STRICT table + */ + case PragTyp_TABLE_LIST: { + int ii; + pParse->nMem = 6; + sqlite3CodeVerifyNamedSchema(pParse, zDb); + for(ii=0; iinDb; ii++){ + HashElem *k; + Hash *pHash; + int initNCol; + if( zDb && sqlite3_stricmp(zDb, db->aDb[ii].zDbSName)!=0 ) continue; + + /* Ensure that the Table.nCol field is initialized for all views + ** and virtual tables. Each time we initialize a Table.nCol value + ** for a table, that can potentially disrupt the hash table, so restart + ** the initialization scan. + */ + pHash = &db->aDb[ii].pSchema->tblHash; + initNCol = sqliteHashCount(pHash); + while( initNCol-- ){ + for(k=sqliteHashFirst(pHash); 1; k=sqliteHashNext(k) ){ + Table *pTab; + if( k==0 ){ initNCol = 0; break; } + pTab = sqliteHashData(k); + if( pTab->nCol==0 ){ + char *zSql = sqlite3MPrintf(db, "SELECT*FROM\"%w\"", pTab->zName); + if( zSql ){ + sqlite3_stmt *pDummy = 0; + (void)sqlite3_prepare(db, zSql, -1, &pDummy, 0); + (void)sqlite3_finalize(pDummy); + sqlite3DbFree(db, zSql); + } + if( db->mallocFailed ){ + sqlite3ErrorMsg(db->pParse, "out of memory"); + db->pParse->rc = SQLITE_NOMEM_BKPT; + } + pHash = &db->aDb[ii].pSchema->tblHash; + break; + } + } + } + + for(k=sqliteHashFirst(pHash); k; k=sqliteHashNext(k) ){ + Table *pTab = sqliteHashData(k); + const char *zType; + if( zRight && sqlite3_stricmp(zRight, pTab->zName)!=0 ) continue; + if( IsView(pTab) ){ + zType = "view"; + }else if( IsVirtual(pTab) ){ + zType = "virtual"; + }else if( pTab->tabFlags & TF_Shadow ){ + zType = "shadow"; + }else{ + zType = "table"; + } + sqlite3VdbeMultiLoad(v, 1, "sssiii", + db->aDb[ii].zDbSName, + sqlite3PreferredTableName(pTab->zName), + zType, + pTab->nCol, + (pTab->tabFlags & TF_WithoutRowid)!=0, + (pTab->tabFlags & TF_Strict)!=0 + ); + } + } + } + break; + +#ifdef SQLITE_DEBUG + case PragTyp_STATS: { + Index *pIdx; + HashElem *i; + pParse->nMem = 5; + sqlite3CodeVerifySchema(pParse, iDb); + for(i=sqliteHashFirst(&pDb->pSchema->tblHash); i; i=sqliteHashNext(i)){ + Table *pTab = sqliteHashData(i); + sqlite3VdbeMultiLoad(v, 1, "ssiii", + sqlite3PreferredTableName(pTab->zName), + 0, + pTab->szTabRow, + pTab->nRowLogEst, + pTab->tabFlags); + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + sqlite3VdbeMultiLoad(v, 2, "siiiX", + pIdx->zName, + pIdx->szIdxRow, + pIdx->aiRowLogEst[0], + pIdx->hasStat1); + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 5); + } + } + } + break; +#endif + + case PragTyp_INDEX_INFO: if( zRight ){ + Index *pIdx; + Table *pTab; + pIdx = sqlite3FindIndex(db, zRight, zDb); + if( pIdx==0 ){ + /* If there is no index named zRight, check to see if there is a + ** WITHOUT ROWID table named zRight, and if there is, show the + ** structure of the PRIMARY KEY index for that table. */ + pTab = sqlite3LocateTable(pParse, LOCATE_NOERR, zRight, zDb); + if( pTab && !HasRowid(pTab) ){ + pIdx = sqlite3PrimaryKeyIndex(pTab); + } + } + if( pIdx ){ + int iIdxDb = sqlite3SchemaToIndex(db, pIdx->pSchema); + int i; + int mx; + if( pPragma->iArg ){ + /* PRAGMA index_xinfo (newer version with more rows and columns) */ + mx = pIdx->nColumn; + pParse->nMem = 6; + }else{ + /* PRAGMA index_info (legacy version) */ + mx = pIdx->nKeyCol; + pParse->nMem = 3; + } + pTab = pIdx->pTable; + sqlite3CodeVerifySchema(pParse, iIdxDb); + assert( pParse->nMem<=pPragma->nPragCName ); + for(i=0; iaiColumn[i]; + sqlite3VdbeMultiLoad(v, 1, "iisX", i, cnum, + cnum<0 ? 0 : pTab->aCol[cnum].zCnName); + if( pPragma->iArg ){ + sqlite3VdbeMultiLoad(v, 4, "isiX", + pIdx->aSortOrder[i], + pIdx->azColl[i], + inKeyCol); + } + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, pParse->nMem); + } + } + } + break; + + case PragTyp_INDEX_LIST: if( zRight ){ + Index *pIdx; + Table *pTab; + int i; + pTab = sqlite3FindTable(db, zRight, zDb); + if( pTab ){ + int iTabDb = sqlite3SchemaToIndex(db, pTab->pSchema); + pParse->nMem = 5; + sqlite3CodeVerifySchema(pParse, iTabDb); + for(pIdx=pTab->pIndex, i=0; pIdx; pIdx=pIdx->pNext, i++){ + const char *azOrigin[] = { "c", "u", "pk" }; + sqlite3VdbeMultiLoad(v, 1, "isisi", + i, + pIdx->zName, + IsUniqueIndex(pIdx), + azOrigin[pIdx->idxType], + pIdx->pPartIdxWhere!=0); + } + } + } + break; + + case PragTyp_DATABASE_LIST: { + int i; + pParse->nMem = 3; + for(i=0; inDb; i++){ + if( db->aDb[i].pBt==0 ) continue; + assert( db->aDb[i].zDbSName!=0 ); + sqlite3VdbeMultiLoad(v, 1, "iss", + i, + db->aDb[i].zDbSName, + sqlite3BtreeGetFilename(db->aDb[i].pBt)); + } + } + break; + + case PragTyp_COLLATION_LIST: { + int i = 0; + HashElem *p; + pParse->nMem = 2; + for(p=sqliteHashFirst(&db->aCollSeq); p; p=sqliteHashNext(p)){ + CollSeq *pColl = (CollSeq *)sqliteHashData(p); + sqlite3VdbeMultiLoad(v, 1, "is", i++, pColl->zName); + } + } + break; + +#ifndef SQLITE_OMIT_INTROSPECTION_PRAGMAS + case PragTyp_FUNCTION_LIST: { + int i; + HashElem *j; + FuncDef *p; + int showInternFunc = (db->mDbFlags & DBFLAG_InternalFunc)!=0; + pParse->nMem = 6; + for(i=0; iu.pHash ){ + assert( p->funcFlags & SQLITE_FUNC_BUILTIN ); + pragmaFunclistLine(v, p, 1, showInternFunc); + } + } + for(j=sqliteHashFirst(&db->aFunc); j; j=sqliteHashNext(j)){ + p = (FuncDef*)sqliteHashData(j); + assert( (p->funcFlags & SQLITE_FUNC_BUILTIN)==0 ); + pragmaFunclistLine(v, p, 0, showInternFunc); + } + } + break; + +#ifndef SQLITE_OMIT_VIRTUALTABLE + case PragTyp_MODULE_LIST: { + HashElem *j; + pParse->nMem = 1; + for(j=sqliteHashFirst(&db->aModule); j; j=sqliteHashNext(j)){ + Module *pMod = (Module*)sqliteHashData(j); + sqlite3VdbeMultiLoad(v, 1, "s", pMod->zName); + } + } + break; +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + case PragTyp_PRAGMA_LIST: { + int i; + for(i=0; iu.tab.pFKey; + if( pFK ){ + int iTabDb = sqlite3SchemaToIndex(db, pTab->pSchema); + int i = 0; + pParse->nMem = 8; + sqlite3CodeVerifySchema(pParse, iTabDb); + while(pFK){ + int j; + for(j=0; jnCol; j++){ + sqlite3VdbeMultiLoad(v, 1, "iissssss", + i, + j, + pFK->zTo, + pTab->aCol[pFK->aCol[j].iFrom].zCnName, + pFK->aCol[j].zCol, + actionName(pFK->aAction[1]), /* ON UPDATE */ + actionName(pFK->aAction[0]), /* ON DELETE */ + "NONE"); + } + ++i; + pFK = pFK->pNextFrom; + } + } + } + } + break; +#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */ + +#ifndef SQLITE_OMIT_FOREIGN_KEY +#ifndef SQLITE_OMIT_TRIGGER + case PragTyp_FOREIGN_KEY_CHECK: { + FKey *pFK; /* A foreign key constraint */ + Table *pTab; /* Child table contain "REFERENCES" keyword */ + Table *pParent; /* Parent table that child points to */ + Index *pIdx; /* Index in the parent table */ + int i; /* Loop counter: Foreign key number for pTab */ + int j; /* Loop counter: Field of the foreign key */ + HashElem *k; /* Loop counter: Next table in schema */ + int x; /* result variable */ + int regResult; /* 3 registers to hold a result row */ + int regKey; /* Register to hold key for checking the FK */ + int regRow; /* Registers to hold a row from pTab */ + int addrTop; /* Top of a loop checking foreign keys */ + int addrOk; /* Jump here if the key is OK */ + int *aiCols; /* child to parent column mapping */ + + regResult = pParse->nMem+1; + pParse->nMem += 4; + regKey = ++pParse->nMem; + regRow = ++pParse->nMem; + k = sqliteHashFirst(&db->aDb[iDb].pSchema->tblHash); + while( k ){ + if( zRight ){ + pTab = sqlite3LocateTable(pParse, 0, zRight, zDb); + k = 0; + }else{ + pTab = (Table*)sqliteHashData(k); + k = sqliteHashNext(k); + } + if( pTab==0 || !IsOrdinaryTable(pTab) || pTab->u.tab.pFKey==0 ) continue; + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + zDb = db->aDb[iDb].zDbSName; + sqlite3CodeVerifySchema(pParse, iDb); + sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); + if( pTab->nCol+regRow>pParse->nMem ) pParse->nMem = pTab->nCol + regRow; + sqlite3OpenTable(pParse, 0, iDb, pTab, OP_OpenRead); + sqlite3VdbeLoadString(v, regResult, pTab->zName); + assert( IsOrdinaryTable(pTab) ); + for(i=1, pFK=pTab->u.tab.pFKey; pFK; i++, pFK=pFK->pNextFrom){ + pParent = sqlite3FindTable(db, pFK->zTo, zDb); + if( pParent==0 ) continue; + pIdx = 0; + sqlite3TableLock(pParse, iDb, pParent->tnum, 0, pParent->zName); + x = sqlite3FkLocateIndex(pParse, pParent, pFK, &pIdx, 0); + if( x==0 ){ + if( pIdx==0 ){ + sqlite3OpenTable(pParse, i, iDb, pParent, OP_OpenRead); + }else{ + sqlite3VdbeAddOp3(v, OP_OpenRead, i, pIdx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + } + }else{ + k = 0; + break; + } + } + assert( pParse->nErr>0 || pFK==0 ); + if( pFK ) break; + if( pParse->nTabnTab = i; + addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, 0); VdbeCoverage(v); + assert( IsOrdinaryTable(pTab) ); + for(i=1, pFK=pTab->u.tab.pFKey; pFK; i++, pFK=pFK->pNextFrom){ + pParent = sqlite3FindTable(db, pFK->zTo, zDb); + pIdx = 0; + aiCols = 0; + if( pParent ){ + x = sqlite3FkLocateIndex(pParse, pParent, pFK, &pIdx, &aiCols); + assert( x==0 || db->mallocFailed ); + } + addrOk = sqlite3VdbeMakeLabel(pParse); + + /* Generate code to read the child key values into registers + ** regRow..regRow+n. If any of the child key values are NULL, this + ** row cannot cause an FK violation. Jump directly to addrOk in + ** this case. */ + if( regRow+pFK->nCol>pParse->nMem ) pParse->nMem = regRow+pFK->nCol; + for(j=0; jnCol; j++){ + int iCol = aiCols ? aiCols[j] : pFK->aCol[j].iFrom; + sqlite3ExprCodeGetColumnOfTable(v, pTab, 0, iCol, regRow+j); + sqlite3VdbeAddOp2(v, OP_IsNull, regRow+j, addrOk); VdbeCoverage(v); + } + + /* Generate code to query the parent index for a matching parent + ** key. If a match is found, jump to addrOk. */ + if( pIdx ){ + sqlite3VdbeAddOp4(v, OP_MakeRecord, regRow, pFK->nCol, regKey, + sqlite3IndexAffinityStr(db,pIdx), pFK->nCol); + sqlite3VdbeAddOp4Int(v, OP_Found, i, addrOk, regKey, 0); + VdbeCoverage(v); + }else if( pParent ){ + int jmp = sqlite3VdbeCurrentAddr(v)+2; + sqlite3VdbeAddOp3(v, OP_SeekRowid, i, jmp, regRow); VdbeCoverage(v); + sqlite3VdbeGoto(v, addrOk); + assert( pFK->nCol==1 || db->mallocFailed ); + } + + /* Generate code to report an FK violation to the caller. */ + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp2(v, OP_Rowid, 0, regResult+1); + }else{ + sqlite3VdbeAddOp2(v, OP_Null, 0, regResult+1); + } + sqlite3VdbeMultiLoad(v, regResult+2, "siX", pFK->zTo, i-1); + sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, 4); + sqlite3VdbeResolveLabel(v, addrOk); + sqlite3DbFree(db, aiCols); + } + sqlite3VdbeAddOp2(v, OP_Next, 0, addrTop+1); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addrTop); + } + } + break; +#endif /* !defined(SQLITE_OMIT_TRIGGER) */ +#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */ + +#ifndef SQLITE_OMIT_CASE_SENSITIVE_LIKE_PRAGMA + /* Reinstall the LIKE and GLOB functions. The variant of LIKE + ** used will be case sensitive or not depending on the RHS. + */ + case PragTyp_CASE_SENSITIVE_LIKE: { + if( zRight ){ + sqlite3RegisterLikeFunctions(db, sqlite3GetBoolean(zRight, 0)); + } + } + break; +#endif /* SQLITE_OMIT_CASE_SENSITIVE_LIKE_PRAGMA */ + +#ifndef SQLITE_INTEGRITY_CHECK_ERROR_MAX +# define SQLITE_INTEGRITY_CHECK_ERROR_MAX 100 +#endif + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK + /* PRAGMA integrity_check + ** PRAGMA integrity_check(N) + ** PRAGMA quick_check + ** PRAGMA quick_check(N) + ** + ** Verify the integrity of the database. + ** + ** The "quick_check" is reduced version of + ** integrity_check designed to detect most database corruption + ** without the overhead of cross-checking indexes. Quick_check + ** is linear time wherease integrity_check is O(NlogN). + ** + ** The maximum nubmer of errors is 100 by default. A different default + ** can be specified using a numeric parameter N. + ** + ** Or, the parameter N can be the name of a table. In that case, only + ** the one table named is verified. The freelist is only verified if + ** the named table is "sqlite_schema" (or one of its aliases). + ** + ** All schemas are checked by default. To check just a single + ** schema, use the form: + ** + ** PRAGMA schema.integrity_check; + */ + case PragTyp_INTEGRITY_CHECK: { + int i, j, addr, mxErr; + Table *pObjTab = 0; /* Check only this one table, if not NULL */ + + int isQuick = (sqlite3Tolower(zLeft[0])=='q'); + + /* If the PRAGMA command was of the form "PRAGMA .integrity_check", + ** then iDb is set to the index of the database identified by . + ** In this case, the integrity of database iDb only is verified by + ** the VDBE created below. + ** + ** Otherwise, if the command was simply "PRAGMA integrity_check" (or + ** "PRAGMA quick_check"), then iDb is set to 0. In this case, set iDb + ** to -1 here, to indicate that the VDBE should verify the integrity + ** of all attached databases. */ + assert( iDb>=0 ); + assert( iDb==0 || pId2->z ); + if( pId2->z==0 ) iDb = -1; + + /* Initialize the VDBE program */ + pParse->nMem = 6; + + /* Set the maximum error count */ + mxErr = SQLITE_INTEGRITY_CHECK_ERROR_MAX; + if( zRight ){ + if( sqlite3GetInt32(zRight, &mxErr) ){ + if( mxErr<=0 ){ + mxErr = SQLITE_INTEGRITY_CHECK_ERROR_MAX; + } + }else{ + pObjTab = sqlite3LocateTable(pParse, 0, zRight, + iDb>=0 ? db->aDb[iDb].zDbSName : 0); + } + } + sqlite3VdbeAddOp2(v, OP_Integer, mxErr-1, 1); /* reg[1] holds errors left */ + + /* Do an integrity check on each database file */ + for(i=0; inDb; i++){ + HashElem *x; /* For looping over tables in the schema */ + Hash *pTbls; /* Set of all tables in the schema */ + int *aRoot; /* Array of root page numbers of all btrees */ + int cnt = 0; /* Number of entries in aRoot[] */ + int mxIdx = 0; /* Maximum number of indexes for any table */ + + if( OMIT_TEMPDB && i==1 ) continue; + if( iDb>=0 && i!=iDb ) continue; + + sqlite3CodeVerifySchema(pParse, i); + + /* Do an integrity check of the B-Tree + ** + ** Begin by finding the root pages numbers + ** for all tables and indices in the database. + */ + assert( sqlite3SchemaMutexHeld(db, i, 0) ); + pTbls = &db->aDb[i].pSchema->tblHash; + for(cnt=0, x=sqliteHashFirst(pTbls); x; x=sqliteHashNext(x)){ + Table *pTab = sqliteHashData(x); /* Current table */ + Index *pIdx; /* An index on pTab */ + int nIdx; /* Number of indexes on pTab */ + if( pObjTab && pObjTab!=pTab ) continue; + if( HasRowid(pTab) ) cnt++; + for(nIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nIdx++){ cnt++; } + if( nIdx>mxIdx ) mxIdx = nIdx; + } + if( cnt==0 ) continue; + if( pObjTab ) cnt++; + aRoot = sqlite3DbMallocRawNN(db, sizeof(int)*(cnt+1)); + if( aRoot==0 ) break; + cnt = 0; + if( pObjTab ) aRoot[++cnt] = 0; + for(x=sqliteHashFirst(pTbls); x; x=sqliteHashNext(x)){ + Table *pTab = sqliteHashData(x); + Index *pIdx; + if( pObjTab && pObjTab!=pTab ) continue; + if( HasRowid(pTab) ) aRoot[++cnt] = pTab->tnum; + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + aRoot[++cnt] = pIdx->tnum; + } + } + aRoot[0] = cnt; + + /* Make sure sufficient number of registers have been allocated */ + pParse->nMem = MAX( pParse->nMem, 8+mxIdx ); + sqlite3ClearTempRegCache(pParse); + + /* Do the b-tree integrity checks */ + sqlite3VdbeAddOp4(v, OP_IntegrityCk, 2, cnt, 1, (char*)aRoot,P4_INTARRAY); + sqlite3VdbeChangeP5(v, (u8)i); + addr = sqlite3VdbeAddOp1(v, OP_IsNull, 2); VdbeCoverage(v); + sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, + sqlite3MPrintf(db, "*** in database %s ***\n", db->aDb[i].zDbSName), + P4_DYNAMIC); + sqlite3VdbeAddOp3(v, OP_Concat, 2, 3, 3); + integrityCheckResultRow(v); + sqlite3VdbeJumpHere(v, addr); + + /* Make sure all the indices are constructed correctly. + */ + for(x=sqliteHashFirst(pTbls); x; x=sqliteHashNext(x)){ + Table *pTab = sqliteHashData(x); + Index *pIdx, *pPk; + Index *pPrior = 0; + int loopTop; + int iDataCur, iIdxCur; + int r1 = -1; + int bStrict; + + if( !IsOrdinaryTable(pTab) ) continue; + if( pObjTab && pObjTab!=pTab ) continue; + pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab); + sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenRead, 0, + 1, 0, &iDataCur, &iIdxCur); + /* reg[7] counts the number of entries in the table. + ** reg[8+i] counts the number of entries in the i-th index + */ + sqlite3VdbeAddOp2(v, OP_Integer, 0, 7); + for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){ + sqlite3VdbeAddOp2(v, OP_Integer, 0, 8+j); /* index entries counter */ + } + assert( pParse->nMem>=8+j ); + assert( sqlite3NoTempsInRange(pParse,1,7+j) ); + sqlite3VdbeAddOp2(v, OP_Rewind, iDataCur, 0); VdbeCoverage(v); + loopTop = sqlite3VdbeAddOp2(v, OP_AddImm, 7, 1); + if( !isQuick ){ + /* Sanity check on record header decoding */ + sqlite3VdbeAddOp3(v, OP_Column, iDataCur, pTab->nNVCol-1,3); + sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG); + VdbeComment((v, "(right-most column)")); + } + /* Verify that all NOT NULL columns really are NOT NULL. At the + ** same time verify the type of the content of STRICT tables */ + bStrict = (pTab->tabFlags & TF_Strict)!=0; + for(j=0; jnCol; j++){ + char *zErr; + Column *pCol = pTab->aCol + j; + int doError, jmp2; + if( j==pTab->iPKey ) continue; + if( pCol->notNull==0 && !bStrict ) continue; + doError = bStrict ? sqlite3VdbeMakeLabel(pParse) : 0; + sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, j, 3); + if( sqlite3VdbeGetOp(v,-1)->opcode==OP_Column ){ + sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG); + } + if( pCol->notNull ){ + jmp2 = sqlite3VdbeAddOp1(v, OP_NotNull, 3); VdbeCoverage(v); + zErr = sqlite3MPrintf(db, "NULL value in %s.%s", pTab->zName, + pCol->zCnName); + sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC); + if( bStrict && pCol->eCType!=COLTYPE_ANY ){ + sqlite3VdbeGoto(v, doError); + }else{ + integrityCheckResultRow(v); + } + sqlite3VdbeJumpHere(v, jmp2); + } + if( (pTab->tabFlags & TF_Strict)!=0 + && pCol->eCType!=COLTYPE_ANY + ){ + jmp2 = sqlite3VdbeAddOp3(v, OP_IsNullOrType, 3, 0, + sqlite3StdTypeMap[pCol->eCType-1]); + VdbeCoverage(v); + zErr = sqlite3MPrintf(db, "non-%s value in %s.%s", + sqlite3StdType[pCol->eCType-1], + pTab->zName, pTab->aCol[j].zCnName); + sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC); + sqlite3VdbeResolveLabel(v, doError); + integrityCheckResultRow(v); + sqlite3VdbeJumpHere(v, jmp2); + } + } + /* Verify CHECK constraints */ + if( pTab->pCheck && (db->flags & SQLITE_IgnoreChecks)==0 ){ + ExprList *pCheck = sqlite3ExprListDup(db, pTab->pCheck, 0); + if( db->mallocFailed==0 ){ + int addrCkFault = sqlite3VdbeMakeLabel(pParse); + int addrCkOk = sqlite3VdbeMakeLabel(pParse); + char *zErr; + int k; + pParse->iSelfTab = iDataCur + 1; + for(k=pCheck->nExpr-1; k>0; k--){ + sqlite3ExprIfFalse(pParse, pCheck->a[k].pExpr, addrCkFault, 0); + } + sqlite3ExprIfTrue(pParse, pCheck->a[0].pExpr, addrCkOk, + SQLITE_JUMPIFNULL); + sqlite3VdbeResolveLabel(v, addrCkFault); + pParse->iSelfTab = 0; + zErr = sqlite3MPrintf(db, "CHECK constraint failed in %s", + pTab->zName); + sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC); + integrityCheckResultRow(v); + sqlite3VdbeResolveLabel(v, addrCkOk); + } + sqlite3ExprListDelete(db, pCheck); + } + if( !isQuick ){ /* Omit the remaining tests for quick_check */ + /* Validate index entries for the current row */ + for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){ + int jmp2, jmp3, jmp4, jmp5; + int ckUniq = sqlite3VdbeMakeLabel(pParse); + if( pPk==pIdx ) continue; + r1 = sqlite3GenerateIndexKey(pParse, pIdx, iDataCur, 0, 0, &jmp3, + pPrior, r1); + pPrior = pIdx; + sqlite3VdbeAddOp2(v, OP_AddImm, 8+j, 1);/* increment entry count */ + /* Verify that an index entry exists for the current table row */ + jmp2 = sqlite3VdbeAddOp4Int(v, OP_Found, iIdxCur+j, ckUniq, r1, + pIdx->nColumn); VdbeCoverage(v); + sqlite3VdbeLoadString(v, 3, "row "); + sqlite3VdbeAddOp3(v, OP_Concat, 7, 3, 3); + sqlite3VdbeLoadString(v, 4, " missing from index "); + sqlite3VdbeAddOp3(v, OP_Concat, 4, 3, 3); + jmp5 = sqlite3VdbeLoadString(v, 4, pIdx->zName); + sqlite3VdbeAddOp3(v, OP_Concat, 4, 3, 3); + jmp4 = integrityCheckResultRow(v); + sqlite3VdbeJumpHere(v, jmp2); + /* For UNIQUE indexes, verify that only one entry exists with the + ** current key. The entry is unique if (1) any column is NULL + ** or (2) the next entry has a different key */ + if( IsUniqueIndex(pIdx) ){ + int uniqOk = sqlite3VdbeMakeLabel(pParse); + int jmp6; + int kk; + for(kk=0; kknKeyCol; kk++){ + int iCol = pIdx->aiColumn[kk]; + assert( iCol!=XN_ROWID && iColnCol ); + if( iCol>=0 && pTab->aCol[iCol].notNull ) continue; + sqlite3VdbeAddOp2(v, OP_IsNull, r1+kk, uniqOk); + VdbeCoverage(v); + } + jmp6 = sqlite3VdbeAddOp1(v, OP_Next, iIdxCur+j); VdbeCoverage(v); + sqlite3VdbeGoto(v, uniqOk); + sqlite3VdbeJumpHere(v, jmp6); + sqlite3VdbeAddOp4Int(v, OP_IdxGT, iIdxCur+j, uniqOk, r1, + pIdx->nKeyCol); VdbeCoverage(v); + sqlite3VdbeLoadString(v, 3, "non-unique entry in index "); + sqlite3VdbeGoto(v, jmp5); + sqlite3VdbeResolveLabel(v, uniqOk); + } + sqlite3VdbeJumpHere(v, jmp4); + sqlite3ResolvePartIdxLabel(pParse, jmp3); + } + } + sqlite3VdbeAddOp2(v, OP_Next, iDataCur, loopTop); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, loopTop-1); + if( !isQuick ){ + sqlite3VdbeLoadString(v, 2, "wrong # of entries in index "); + for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){ + if( pPk==pIdx ) continue; + sqlite3VdbeAddOp2(v, OP_Count, iIdxCur+j, 3); + addr = sqlite3VdbeAddOp3(v, OP_Eq, 8+j, 0, 3); VdbeCoverage(v); + sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); + sqlite3VdbeLoadString(v, 4, pIdx->zName); + sqlite3VdbeAddOp3(v, OP_Concat, 4, 2, 3); + integrityCheckResultRow(v); + sqlite3VdbeJumpHere(v, addr); + } + } + } + } + { + static const int iLn = VDBE_OFFSET_LINENO(2); + static const VdbeOpList endCode[] = { + { OP_AddImm, 1, 0, 0}, /* 0 */ + { OP_IfNotZero, 1, 4, 0}, /* 1 */ + { OP_String8, 0, 3, 0}, /* 2 */ + { OP_ResultRow, 3, 1, 0}, /* 3 */ + { OP_Halt, 0, 0, 0}, /* 4 */ + { OP_String8, 0, 3, 0}, /* 5 */ + { OP_Goto, 0, 3, 0}, /* 6 */ + }; + VdbeOp *aOp; + + aOp = sqlite3VdbeAddOpList(v, ArraySize(endCode), endCode, iLn); + if( aOp ){ + aOp[0].p2 = 1-mxErr; + aOp[2].p4type = P4_STATIC; + aOp[2].p4.z = "ok"; + aOp[5].p4type = P4_STATIC; + aOp[5].p4.z = (char*)sqlite3ErrStr(SQLITE_CORRUPT); + } + sqlite3VdbeChangeP3(v, 0, sqlite3VdbeCurrentAddr(v)-2); + } + } + break; +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +#ifndef SQLITE_OMIT_UTF16 + /* + ** PRAGMA encoding + ** PRAGMA encoding = "utf-8"|"utf-16"|"utf-16le"|"utf-16be" + ** + ** In its first form, this pragma returns the encoding of the main + ** database. If the database is not initialized, it is initialized now. + ** + ** The second form of this pragma is a no-op if the main database file + ** has not already been initialized. In this case it sets the default + ** encoding that will be used for the main database file if a new file + ** is created. If an existing main database file is opened, then the + ** default text encoding for the existing database is used. + ** + ** In all cases new databases created using the ATTACH command are + ** created to use the same default text encoding as the main database. If + ** the main database has not been initialized and/or created when ATTACH + ** is executed, this is done before the ATTACH operation. + ** + ** In the second form this pragma sets the text encoding to be used in + ** new database files created using this database handle. It is only + ** useful if invoked immediately after the main database i + */ + case PragTyp_ENCODING: { + static const struct EncName { + char *zName; + u8 enc; + } encnames[] = { + { "UTF8", SQLITE_UTF8 }, + { "UTF-8", SQLITE_UTF8 }, /* Must be element [1] */ + { "UTF-16le", SQLITE_UTF16LE }, /* Must be element [2] */ + { "UTF-16be", SQLITE_UTF16BE }, /* Must be element [3] */ + { "UTF16le", SQLITE_UTF16LE }, + { "UTF16be", SQLITE_UTF16BE }, + { "UTF-16", 0 }, /* SQLITE_UTF16NATIVE */ + { "UTF16", 0 }, /* SQLITE_UTF16NATIVE */ + { 0, 0 } + }; + const struct EncName *pEnc; + if( !zRight ){ /* "PRAGMA encoding" */ + if( sqlite3ReadSchema(pParse) ) goto pragma_out; + assert( encnames[SQLITE_UTF8].enc==SQLITE_UTF8 ); + assert( encnames[SQLITE_UTF16LE].enc==SQLITE_UTF16LE ); + assert( encnames[SQLITE_UTF16BE].enc==SQLITE_UTF16BE ); + returnSingleText(v, encnames[ENC(pParse->db)].zName); + }else{ /* "PRAGMA encoding = XXX" */ + /* Only change the value of sqlite.enc if the database handle is not + ** initialized. If the main database exists, the new sqlite.enc value + ** will be overwritten when the schema is next loaded. If it does not + ** already exists, it will be created to use the new encoding value. + */ + if( (db->mDbFlags & DBFLAG_EncodingFixed)==0 ){ + for(pEnc=&encnames[0]; pEnc->zName; pEnc++){ + if( 0==sqlite3StrICmp(zRight, pEnc->zName) ){ + u8 enc = pEnc->enc ? pEnc->enc : SQLITE_UTF16NATIVE; + SCHEMA_ENC(db) = enc; + sqlite3SetTextEncoding(db, enc); + break; + } + } + if( !pEnc->zName ){ + sqlite3ErrorMsg(pParse, "unsupported encoding: %s", zRight); + } + } + } + } + break; +#endif /* SQLITE_OMIT_UTF16 */ + +#ifndef SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS + /* + ** PRAGMA [schema.]schema_version + ** PRAGMA [schema.]schema_version = + ** + ** PRAGMA [schema.]user_version + ** PRAGMA [schema.]user_version = + ** + ** PRAGMA [schema.]freelist_count + ** + ** PRAGMA [schema.]data_version + ** + ** PRAGMA [schema.]application_id + ** PRAGMA [schema.]application_id = + ** + ** The pragma's schema_version and user_version are used to set or get + ** the value of the schema-version and user-version, respectively. Both + ** the schema-version and the user-version are 32-bit signed integers + ** stored in the database header. + ** + ** The schema-cookie is usually only manipulated internally by SQLite. It + ** is incremented by SQLite whenever the database schema is modified (by + ** creating or dropping a table or index). The schema version is used by + ** SQLite each time a query is executed to ensure that the internal cache + ** of the schema used when compiling the SQL query matches the schema of + ** the database against which the compiled query is actually executed. + ** Subverting this mechanism by using "PRAGMA schema_version" to modify + ** the schema-version is potentially dangerous and may lead to program + ** crashes or database corruption. Use with caution! + ** + ** The user-version is not used internally by SQLite. It may be used by + ** applications for any purpose. + */ + case PragTyp_HEADER_VALUE: { + int iCookie = pPragma->iArg; /* Which cookie to read or write */ + sqlite3VdbeUsesBtree(v, iDb); + if( zRight && (pPragma->mPragFlg & PragFlg_ReadOnly)==0 ){ + /* Write the specified cookie value */ + static const VdbeOpList setCookie[] = { + { OP_Transaction, 0, 1, 0}, /* 0 */ + { OP_SetCookie, 0, 0, 0}, /* 1 */ + }; + VdbeOp *aOp; + sqlite3VdbeVerifyNoMallocRequired(v, ArraySize(setCookie)); + aOp = sqlite3VdbeAddOpList(v, ArraySize(setCookie), setCookie, 0); + if( ONLY_IF_REALLOC_STRESS(aOp==0) ) break; + aOp[0].p1 = iDb; + aOp[1].p1 = iDb; + aOp[1].p2 = iCookie; + aOp[1].p3 = sqlite3Atoi(zRight); + aOp[1].p5 = 1; + }else{ + /* Read the specified cookie value */ + static const VdbeOpList readCookie[] = { + { OP_Transaction, 0, 0, 0}, /* 0 */ + { OP_ReadCookie, 0, 1, 0}, /* 1 */ + { OP_ResultRow, 1, 1, 0} + }; + VdbeOp *aOp; + sqlite3VdbeVerifyNoMallocRequired(v, ArraySize(readCookie)); + aOp = sqlite3VdbeAddOpList(v, ArraySize(readCookie),readCookie,0); + if( ONLY_IF_REALLOC_STRESS(aOp==0) ) break; + aOp[0].p1 = iDb; + aOp[1].p1 = iDb; + aOp[1].p3 = iCookie; + sqlite3VdbeReusable(v); + } + } + break; +#endif /* SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS */ + +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS + /* + ** PRAGMA compile_options + ** + ** Return the names of all compile-time options used in this build, + ** one option per row. + */ + case PragTyp_COMPILE_OPTIONS: { + int i = 0; + const char *zOpt; + pParse->nMem = 1; + while( (zOpt = sqlite3_compileoption_get(i++))!=0 ){ + sqlite3VdbeLoadString(v, 1, zOpt); + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1); + } + sqlite3VdbeReusable(v); + } + break; +#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + +#ifndef SQLITE_OMIT_WAL + /* + ** PRAGMA [schema.]wal_checkpoint = passive|full|restart|truncate + ** + ** Checkpoint the database. + */ + case PragTyp_WAL_CHECKPOINT: { + int iBt = (pId2->z?iDb:SQLITE_MAX_DB); + int eMode = SQLITE_CHECKPOINT_PASSIVE; + if( zRight ){ + if( sqlite3StrICmp(zRight, "full")==0 ){ + eMode = SQLITE_CHECKPOINT_FULL; + }else if( sqlite3StrICmp(zRight, "restart")==0 ){ + eMode = SQLITE_CHECKPOINT_RESTART; + }else if( sqlite3StrICmp(zRight, "truncate")==0 ){ + eMode = SQLITE_CHECKPOINT_TRUNCATE; + } + } + pParse->nMem = 3; + sqlite3VdbeAddOp3(v, OP_Checkpoint, iBt, eMode, 1); + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 3); + } + break; + + /* + ** PRAGMA wal_autocheckpoint + ** PRAGMA wal_autocheckpoint = N + ** + ** Configure a database connection to automatically checkpoint a database + ** after accumulating N frames in the log. Or query for the current value + ** of N. + */ + case PragTyp_WAL_AUTOCHECKPOINT: { + if( zRight ){ + sqlite3_wal_autocheckpoint(db, sqlite3Atoi(zRight)); + } + returnSingleInt(v, + db->xWalCallback==sqlite3WalDefaultHook ? + SQLITE_PTR_TO_INT(db->pWalArg) : 0); + } + break; +#endif + + /* + ** PRAGMA shrink_memory + ** + ** IMPLEMENTATION-OF: R-23445-46109 This pragma causes the database + ** connection on which it is invoked to free up as much memory as it + ** can, by calling sqlite3_db_release_memory(). + */ + case PragTyp_SHRINK_MEMORY: { + sqlite3_db_release_memory(db); + break; + } + + /* + ** PRAGMA optimize + ** PRAGMA optimize(MASK) + ** PRAGMA schema.optimize + ** PRAGMA schema.optimize(MASK) + ** + ** Attempt to optimize the database. All schemas are optimized in the first + ** two forms, and only the specified schema is optimized in the latter two. + ** + ** The details of optimizations performed by this pragma are expected + ** to change and improve over time. Applications should anticipate that + ** this pragma will perform new optimizations in future releases. + ** + ** The optional argument is a bitmask of optimizations to perform: + ** + ** 0x0001 Debugging mode. Do not actually perform any optimizations + ** but instead return one line of text for each optimization + ** that would have been done. Off by default. + ** + ** 0x0002 Run ANALYZE on tables that might benefit. On by default. + ** See below for additional information. + ** + ** 0x0004 (Not yet implemented) Record usage and performance + ** information from the current session in the + ** database file so that it will be available to "optimize" + ** pragmas run by future database connections. + ** + ** 0x0008 (Not yet implemented) Create indexes that might have + ** been helpful to recent queries + ** + ** The default MASK is and always shall be 0xfffe. 0xfffe means perform all + ** of the optimizations listed above except Debug Mode, including new + ** optimizations that have not yet been invented. If new optimizations are + ** ever added that should be off by default, those off-by-default + ** optimizations will have bitmasks of 0x10000 or larger. + ** + ** DETERMINATION OF WHEN TO RUN ANALYZE + ** + ** In the current implementation, a table is analyzed if only if all of + ** the following are true: + ** + ** (1) MASK bit 0x02 is set. + ** + ** (2) The query planner used sqlite_stat1-style statistics for one or + ** more indexes of the table at some point during the lifetime of + ** the current connection. + ** + ** (3) One or more indexes of the table are currently unanalyzed OR + ** the number of rows in the table has increased by 25 times or more + ** since the last time ANALYZE was run. + ** + ** The rules for when tables are analyzed are likely to change in + ** future releases. + */ + case PragTyp_OPTIMIZE: { + int iDbLast; /* Loop termination point for the schema loop */ + int iTabCur; /* Cursor for a table whose size needs checking */ + HashElem *k; /* Loop over tables of a schema */ + Schema *pSchema; /* The current schema */ + Table *pTab; /* A table in the schema */ + Index *pIdx; /* An index of the table */ + LogEst szThreshold; /* Size threshold above which reanalysis is needd */ + char *zSubSql; /* SQL statement for the OP_SqlExec opcode */ + u32 opMask; /* Mask of operations to perform */ + + if( zRight ){ + opMask = (u32)sqlite3Atoi(zRight); + if( (opMask & 0x02)==0 ) break; + }else{ + opMask = 0xfffe; + } + iTabCur = pParse->nTab++; + for(iDbLast = zDb?iDb:db->nDb-1; iDb<=iDbLast; iDb++){ + if( iDb==1 ) continue; + sqlite3CodeVerifySchema(pParse, iDb); + pSchema = db->aDb[iDb].pSchema; + for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ + pTab = (Table*)sqliteHashData(k); + + /* If table pTab has not been used in a way that would benefit from + ** having analysis statistics during the current session, then skip it. + ** This also has the effect of skipping virtual tables and views */ + if( (pTab->tabFlags & TF_StatsUsed)==0 ) continue; + + /* Reanalyze if the table is 25 times larger than the last analysis */ + szThreshold = pTab->nRowLogEst + 46; assert( sqlite3LogEst(25)==46 ); + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( !pIdx->hasStat1 ){ + szThreshold = 0; /* Always analyze if any index lacks statistics */ + break; + } + } + if( szThreshold ){ + sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead); + sqlite3VdbeAddOp3(v, OP_IfSmaller, iTabCur, + sqlite3VdbeCurrentAddr(v)+2+(opMask&1), szThreshold); + VdbeCoverage(v); + } + zSubSql = sqlite3MPrintf(db, "ANALYZE \"%w\".\"%w\"", + db->aDb[iDb].zDbSName, pTab->zName); + if( opMask & 0x01 ){ + int r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp4(v, OP_String8, 0, r1, 0, zSubSql, P4_DYNAMIC); + sqlite3VdbeAddOp2(v, OP_ResultRow, r1, 1); + }else{ + sqlite3VdbeAddOp4(v, OP_SqlExec, 0, 0, 0, zSubSql, P4_DYNAMIC); + } + } + } + sqlite3VdbeAddOp0(v, OP_Expire); + break; + } + + /* + ** PRAGMA busy_timeout + ** PRAGMA busy_timeout = N + ** + ** Call sqlite3_busy_timeout(db, N). Return the current timeout value + ** if one is set. If no busy handler or a different busy handler is set + ** then 0 is returned. Setting the busy_timeout to 0 or negative + ** disables the timeout. + */ + /*case PragTyp_BUSY_TIMEOUT*/ default: { + assert( pPragma->ePragTyp==PragTyp_BUSY_TIMEOUT ); + if( zRight ){ + sqlite3_busy_timeout(db, sqlite3Atoi(zRight)); + } + returnSingleInt(v, db->busyTimeout); + break; + } + + /* + ** PRAGMA soft_heap_limit + ** PRAGMA soft_heap_limit = N + ** + ** IMPLEMENTATION-OF: R-26343-45930 This pragma invokes the + ** sqlite3_soft_heap_limit64() interface with the argument N, if N is + ** specified and is a non-negative integer. + ** IMPLEMENTATION-OF: R-64451-07163 The soft_heap_limit pragma always + ** returns the same integer that would be returned by the + ** sqlite3_soft_heap_limit64(-1) C-language function. + */ + case PragTyp_SOFT_HEAP_LIMIT: { + sqlite3_int64 N; + if( zRight && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK ){ + sqlite3_soft_heap_limit64(N); + } + returnSingleInt(v, sqlite3_soft_heap_limit64(-1)); + break; + } + + /* + ** PRAGMA hard_heap_limit + ** PRAGMA hard_heap_limit = N + ** + ** Invoke sqlite3_hard_heap_limit64() to query or set the hard heap + ** limit. The hard heap limit can be activated or lowered by this + ** pragma, but not raised or deactivated. Only the + ** sqlite3_hard_heap_limit64() C-language API can raise or deactivate + ** the hard heap limit. This allows an application to set a heap limit + ** constraint that cannot be relaxed by an untrusted SQL script. + */ + case PragTyp_HARD_HEAP_LIMIT: { + sqlite3_int64 N; + if( zRight && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK ){ + sqlite3_int64 iPrior = sqlite3_hard_heap_limit64(-1); + if( N>0 && (iPrior==0 || iPrior>N) ) sqlite3_hard_heap_limit64(N); + } + returnSingleInt(v, sqlite3_hard_heap_limit64(-1)); + break; + } + + /* + ** PRAGMA threads + ** PRAGMA threads = N + ** + ** Configure the maximum number of worker threads. Return the new + ** maximum, which might be less than requested. + */ + case PragTyp_THREADS: { + sqlite3_int64 N; + if( zRight + && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK + && N>=0 + ){ + sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, (int)(N&0x7fffffff)); + } + returnSingleInt(v, sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, -1)); + break; + } + + /* + ** PRAGMA analysis_limit + ** PRAGMA analysis_limit = N + ** + ** Configure the maximum number of rows that ANALYZE will examine + ** in each index that it looks at. Return the new limit. + */ + case PragTyp_ANALYSIS_LIMIT: { + sqlite3_int64 N; + if( zRight + && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK /* IMP: R-40975-20399 */ + && N>=0 + ){ + db->nAnalysisLimit = (int)(N&0x7fffffff); + } + returnSingleInt(v, db->nAnalysisLimit); /* IMP: R-57594-65522 */ + break; + } + +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + /* + ** Report the current state of file logs for all databases + */ + case PragTyp_LOCK_STATUS: { + static const char *const azLockName[] = { + "unlocked", "shared", "reserved", "pending", "exclusive" + }; + int i; + pParse->nMem = 2; + for(i=0; inDb; i++){ + Btree *pBt; + const char *zState = "unknown"; + int j; + if( db->aDb[i].zDbSName==0 ) continue; + pBt = db->aDb[i].pBt; + if( pBt==0 || sqlite3BtreePager(pBt)==0 ){ + zState = "closed"; + }else if( sqlite3_file_control(db, i ? db->aDb[i].zDbSName : 0, + SQLITE_FCNTL_LOCKSTATE, &j)==SQLITE_OK ){ + zState = azLockName[j]; + } + sqlite3VdbeMultiLoad(v, 1, "ss", db->aDb[i].zDbSName, zState); + } + break; + } +#endif + +#if defined(SQLITE_ENABLE_CEROD) + case PragTyp_ACTIVATE_EXTENSIONS: if( zRight ){ + if( sqlite3StrNICmp(zRight, "cerod-", 6)==0 ){ + sqlite3_activate_cerod(&zRight[6]); + } + } + break; +#endif + + } /* End of the PRAGMA switch */ + + /* The following block is a no-op unless SQLITE_DEBUG is defined. Its only + ** purpose is to execute assert() statements to verify that if the + ** PragFlg_NoColumns1 flag is set and the caller specified an argument + ** to the PRAGMA, the implementation has not added any OP_ResultRow + ** instructions to the VM. */ + if( (pPragma->mPragFlg & PragFlg_NoColumns1) && zRight ){ + sqlite3VdbeVerifyNoResultRow(v); + } + +pragma_out: + sqlite3DbFree(db, zLeft); + sqlite3DbFree(db, zRight); +} +#ifndef SQLITE_OMIT_VIRTUALTABLE +/***************************************************************************** +** Implementation of an eponymous virtual table that runs a pragma. +** +*/ +typedef struct PragmaVtab PragmaVtab; +typedef struct PragmaVtabCursor PragmaVtabCursor; +struct PragmaVtab { + sqlite3_vtab base; /* Base class. Must be first */ + sqlite3 *db; /* The database connection to which it belongs */ + const PragmaName *pName; /* Name of the pragma */ + u8 nHidden; /* Number of hidden columns */ + u8 iHidden; /* Index of the first hidden column */ +}; +struct PragmaVtabCursor { + sqlite3_vtab_cursor base; /* Base class. Must be first */ + sqlite3_stmt *pPragma; /* The pragma statement to run */ + sqlite_int64 iRowid; /* Current rowid */ + char *azArg[2]; /* Value of the argument and schema */ +}; + +/* +** Pragma virtual table module xConnect method. +*/ +static int pragmaVtabConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + const PragmaName *pPragma = (const PragmaName*)pAux; + PragmaVtab *pTab = 0; + int rc; + int i, j; + char cSep = '('; + StrAccum acc; + char zBuf[200]; + + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + sqlite3_str_appendall(&acc, "CREATE TABLE x"); + for(i=0, j=pPragma->iPragCName; inPragCName; i++, j++){ + sqlite3_str_appendf(&acc, "%c\"%s\"", cSep, pragCName[j]); + cSep = ','; + } + if( i==0 ){ + sqlite3_str_appendf(&acc, "(\"%s\"", pPragma->zName); + i++; + } + j = 0; + if( pPragma->mPragFlg & PragFlg_Result1 ){ + sqlite3_str_appendall(&acc, ",arg HIDDEN"); + j++; + } + if( pPragma->mPragFlg & (PragFlg_SchemaOpt|PragFlg_SchemaReq) ){ + sqlite3_str_appendall(&acc, ",schema HIDDEN"); + j++; + } + sqlite3_str_append(&acc, ")", 1); + sqlite3StrAccumFinish(&acc); + assert( strlen(zBuf) < sizeof(zBuf)-1 ); + rc = sqlite3_declare_vtab(db, zBuf); + if( rc==SQLITE_OK ){ + pTab = (PragmaVtab*)sqlite3_malloc(sizeof(PragmaVtab)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pTab, 0, sizeof(PragmaVtab)); + pTab->pName = pPragma; + pTab->db = db; + pTab->iHidden = i; + pTab->nHidden = j; + } + }else{ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } + + *ppVtab = (sqlite3_vtab*)pTab; + return rc; +} + +/* +** Pragma virtual table module xDisconnect method. +*/ +static int pragmaVtabDisconnect(sqlite3_vtab *pVtab){ + PragmaVtab *pTab = (PragmaVtab*)pVtab; + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* Figure out the best index to use to search a pragma virtual table. +** +** There are not really any index choices. But we want to encourage the +** query planner to give == constraints on as many hidden parameters as +** possible, and especially on the first hidden parameter. So return a +** high cost if hidden parameters are unconstrained. +*/ +static int pragmaVtabBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + PragmaVtab *pTab = (PragmaVtab*)tab; + const struct sqlite3_index_constraint *pConstraint; + int i, j; + int seen[2]; + + pIdxInfo->estimatedCost = (double)1; + if( pTab->nHidden==0 ){ return SQLITE_OK; } + pConstraint = pIdxInfo->aConstraint; + seen[0] = 0; + seen[1] = 0; + for(i=0; inConstraint; i++, pConstraint++){ + if( pConstraint->usable==0 ) continue; + if( pConstraint->op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; + if( pConstraint->iColumn < pTab->iHidden ) continue; + j = pConstraint->iColumn - pTab->iHidden; + assert( j < 2 ); + seen[j] = i+1; + } + if( seen[0]==0 ){ + pIdxInfo->estimatedCost = (double)2147483647; + pIdxInfo->estimatedRows = 2147483647; + return SQLITE_OK; + } + j = seen[0]-1; + pIdxInfo->aConstraintUsage[j].argvIndex = 1; + pIdxInfo->aConstraintUsage[j].omit = 1; + if( seen[1]==0 ) return SQLITE_OK; + pIdxInfo->estimatedCost = (double)20; + pIdxInfo->estimatedRows = 20; + j = seen[1]-1; + pIdxInfo->aConstraintUsage[j].argvIndex = 2; + pIdxInfo->aConstraintUsage[j].omit = 1; + return SQLITE_OK; +} + +/* Create a new cursor for the pragma virtual table */ +static int pragmaVtabOpen(sqlite3_vtab *pVtab, sqlite3_vtab_cursor **ppCursor){ + PragmaVtabCursor *pCsr; + pCsr = (PragmaVtabCursor*)sqlite3_malloc(sizeof(*pCsr)); + if( pCsr==0 ) return SQLITE_NOMEM; + memset(pCsr, 0, sizeof(PragmaVtabCursor)); + pCsr->base.pVtab = pVtab; + *ppCursor = &pCsr->base; + return SQLITE_OK; +} + +/* Clear all content from pragma virtual table cursor. */ +static void pragmaVtabCursorClear(PragmaVtabCursor *pCsr){ + int i; + sqlite3_finalize(pCsr->pPragma); + pCsr->pPragma = 0; + for(i=0; iazArg); i++){ + sqlite3_free(pCsr->azArg[i]); + pCsr->azArg[i] = 0; + } +} + +/* Close a pragma virtual table cursor */ +static int pragmaVtabClose(sqlite3_vtab_cursor *cur){ + PragmaVtabCursor *pCsr = (PragmaVtabCursor*)cur; + pragmaVtabCursorClear(pCsr); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* Advance the pragma virtual table cursor to the next row */ +static int pragmaVtabNext(sqlite3_vtab_cursor *pVtabCursor){ + PragmaVtabCursor *pCsr = (PragmaVtabCursor*)pVtabCursor; + int rc = SQLITE_OK; + + /* Increment the xRowid value */ + pCsr->iRowid++; + assert( pCsr->pPragma ); + if( SQLITE_ROW!=sqlite3_step(pCsr->pPragma) ){ + rc = sqlite3_finalize(pCsr->pPragma); + pCsr->pPragma = 0; + pragmaVtabCursorClear(pCsr); + } + return rc; +} + +/* +** Pragma virtual table module xFilter method. +*/ +static int pragmaVtabFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + PragmaVtabCursor *pCsr = (PragmaVtabCursor*)pVtabCursor; + PragmaVtab *pTab = (PragmaVtab*)(pVtabCursor->pVtab); + int rc; + int i, j; + StrAccum acc; + char *zSql; + + UNUSED_PARAMETER(idxNum); + UNUSED_PARAMETER(idxStr); + pragmaVtabCursorClear(pCsr); + j = (pTab->pName->mPragFlg & PragFlg_Result1)!=0 ? 0 : 1; + for(i=0; iazArg) ); + assert( pCsr->azArg[j]==0 ); + if( zText ){ + pCsr->azArg[j] = sqlite3_mprintf("%s", zText); + if( pCsr->azArg[j]==0 ){ + return SQLITE_NOMEM; + } + } + } + sqlite3StrAccumInit(&acc, 0, 0, 0, pTab->db->aLimit[SQLITE_LIMIT_SQL_LENGTH]); + sqlite3_str_appendall(&acc, "PRAGMA "); + if( pCsr->azArg[1] ){ + sqlite3_str_appendf(&acc, "%Q.", pCsr->azArg[1]); + } + sqlite3_str_appendall(&acc, pTab->pName->zName); + if( pCsr->azArg[0] ){ + sqlite3_str_appendf(&acc, "=%Q", pCsr->azArg[0]); + } + zSql = sqlite3StrAccumFinish(&acc); + if( zSql==0 ) return SQLITE_NOMEM; + rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pPragma, 0); + sqlite3_free(zSql); + if( rc!=SQLITE_OK ){ + pTab->base.zErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pTab->db)); + return rc; + } + return pragmaVtabNext(pVtabCursor); +} + +/* +** Pragma virtual table module xEof method. +*/ +static int pragmaVtabEof(sqlite3_vtab_cursor *pVtabCursor){ + PragmaVtabCursor *pCsr = (PragmaVtabCursor*)pVtabCursor; + return (pCsr->pPragma==0); +} + +/* The xColumn method simply returns the corresponding column from +** the PRAGMA. +*/ +static int pragmaVtabColumn( + sqlite3_vtab_cursor *pVtabCursor, + sqlite3_context *ctx, + int i +){ + PragmaVtabCursor *pCsr = (PragmaVtabCursor*)pVtabCursor; + PragmaVtab *pTab = (PragmaVtab*)(pVtabCursor->pVtab); + if( iiHidden ){ + sqlite3_result_value(ctx, sqlite3_column_value(pCsr->pPragma, i)); + }else{ + sqlite3_result_text(ctx, pCsr->azArg[i-pTab->iHidden],-1,SQLITE_TRANSIENT); + } + return SQLITE_OK; +} + +/* +** Pragma virtual table module xRowid method. +*/ +static int pragmaVtabRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *p){ + PragmaVtabCursor *pCsr = (PragmaVtabCursor*)pVtabCursor; + *p = pCsr->iRowid; + return SQLITE_OK; +} + +/* The pragma virtual table object */ +static const sqlite3_module pragmaVtabModule = { + 0, /* iVersion */ + 0, /* xCreate - create a table */ + pragmaVtabConnect, /* xConnect - connect to an existing table */ + pragmaVtabBestIndex, /* xBestIndex - Determine search strategy */ + pragmaVtabDisconnect, /* xDisconnect - Disconnect from a table */ + 0, /* xDestroy - Drop a table */ + pragmaVtabOpen, /* xOpen - open a cursor */ + pragmaVtabClose, /* xClose - close a cursor */ + pragmaVtabFilter, /* xFilter - configure scan constraints */ + pragmaVtabNext, /* xNext - advance a cursor */ + pragmaVtabEof, /* xEof */ + pragmaVtabColumn, /* xColumn - read data */ + pragmaVtabRowid, /* xRowid - read data */ + 0, /* xUpdate - write data */ + 0, /* xBegin - begin transaction */ + 0, /* xSync - sync transaction */ + 0, /* xCommit - commit transaction */ + 0, /* xRollback - rollback transaction */ + 0, /* xFindFunction - function overloading */ + 0, /* xRename - rename the table */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + 0 /* xShadowName */ +}; + +/* +** Check to see if zTabName is really the name of a pragma. If it is, +** then register an eponymous virtual table for that pragma and return +** a pointer to the Module object for the new virtual table. +*/ +SQLITE_PRIVATE Module *sqlite3PragmaVtabRegister(sqlite3 *db, const char *zName){ + const PragmaName *pName; + assert( sqlite3_strnicmp(zName, "pragma_", 7)==0 ); + pName = pragmaLocate(zName+7); + if( pName==0 ) return 0; + if( (pName->mPragFlg & (PragFlg_Result0|PragFlg_Result1))==0 ) return 0; + assert( sqlite3HashFind(&db->aModule, zName)==0 ); + return sqlite3VtabCreateModule(db, zName, &pragmaVtabModule, (void*)pName, 0); +} + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#endif /* SQLITE_OMIT_PRAGMA */ + +/************** End of pragma.c **********************************************/ +/************** Begin file prepare.c *****************************************/ +/* +** 2005 May 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the implementation of the sqlite3_prepare() +** interface, and routines that contribute to loading the database schema +** from disk. +*/ +/* #include "sqliteInt.h" */ + +/* +** Fill the InitData structure with an error message that indicates +** that the database is corrupt. +*/ +static void corruptSchema( + InitData *pData, /* Initialization context */ + char **azObj, /* Type and name of object being parsed */ + const char *zExtra /* Error information */ +){ + sqlite3 *db = pData->db; + if( db->mallocFailed ){ + pData->rc = SQLITE_NOMEM_BKPT; + }else if( pData->pzErrMsg[0]!=0 ){ + /* A error message has already been generated. Do not overwrite it */ + }else if( pData->mInitFlags & (INITFLAG_AlterMask) ){ + static const char *azAlterType[] = { + "rename", + "drop column", + "add column" + }; + *pData->pzErrMsg = sqlite3MPrintf(db, + "error in %s %s after %s: %s", azObj[0], azObj[1], + azAlterType[(pData->mInitFlags&INITFLAG_AlterMask)-1], + zExtra + ); + pData->rc = SQLITE_ERROR; + }else if( db->flags & SQLITE_WriteSchema ){ + pData->rc = SQLITE_CORRUPT_BKPT; + }else{ + char *z; + const char *zObj = azObj[1] ? azObj[1] : "?"; + z = sqlite3MPrintf(db, "malformed database schema (%s)", zObj); + if( zExtra && zExtra[0] ) z = sqlite3MPrintf(db, "%z - %s", z, zExtra); + *pData->pzErrMsg = z; + pData->rc = SQLITE_CORRUPT_BKPT; + } +} + +/* +** Check to see if any sibling index (another index on the same table) +** of pIndex has the same root page number, and if it does, return true. +** This would indicate a corrupt schema. +*/ +SQLITE_PRIVATE int sqlite3IndexHasDuplicateRootPage(Index *pIndex){ + Index *p; + for(p=pIndex->pTable->pIndex; p; p=p->pNext){ + if( p->tnum==pIndex->tnum && p!=pIndex ) return 1; + } + return 0; +} + +/* forward declaration */ +static int sqlite3Prepare( + sqlite3 *db, /* Database handle. */ + const char *zSql, /* UTF-8 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + u32 prepFlags, /* Zero or more SQLITE_PREPARE_* flags */ + Vdbe *pReprepare, /* VM being reprepared */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const char **pzTail /* OUT: End of parsed string */ +); + + +/* +** This is the callback routine for the code that initializes the +** database. See sqlite3Init() below for additional information. +** This routine is also called from the OP_ParseSchema opcode of the VDBE. +** +** Each callback contains the following information: +** +** argv[0] = type of object: "table", "index", "trigger", or "view". +** argv[1] = name of thing being created +** argv[2] = associated table if an index or trigger +** argv[3] = root page number for table or index. 0 for trigger or view. +** argv[4] = SQL text for the CREATE statement. +** +*/ +SQLITE_PRIVATE int sqlite3InitCallback(void *pInit, int argc, char **argv, char **NotUsed){ + InitData *pData = (InitData*)pInit; + sqlite3 *db = pData->db; + int iDb = pData->iDb; + + assert( argc==5 ); + UNUSED_PARAMETER2(NotUsed, argc); + assert( sqlite3_mutex_held(db->mutex) ); + db->mDbFlags |= DBFLAG_EncodingFixed; + if( argv==0 ) return 0; /* Might happen if EMPTY_RESULT_CALLBACKS are on */ + pData->nInitRow++; + if( db->mallocFailed ){ + corruptSchema(pData, argv, 0); + return 1; + } + + assert( iDb>=0 && iDbnDb ); + if( argv[3]==0 ){ + corruptSchema(pData, argv, 0); + }else if( argv[4] + && 'c'==sqlite3UpperToLower[(unsigned char)argv[4][0]] + && 'r'==sqlite3UpperToLower[(unsigned char)argv[4][1]] ){ + /* Call the parser to process a CREATE TABLE, INDEX or VIEW. + ** But because db->init.busy is set to 1, no VDBE code is generated + ** or executed. All the parser does is build the internal data + ** structures that describe the table, index, or view. + ** + ** No other valid SQL statement, other than the variable CREATE statements, + ** can begin with the letters "C" and "R". Thus, it is not possible run + ** any other kind of statement while parsing the schema, even a corrupt + ** schema. + */ + int rc; + u8 saved_iDb = db->init.iDb; + sqlite3_stmt *pStmt; + TESTONLY(int rcp); /* Return code from sqlite3_prepare() */ + + assert( db->init.busy ); + db->init.iDb = iDb; + if( sqlite3GetUInt32(argv[3], &db->init.newTnum)==0 + || (db->init.newTnum>pData->mxPage && pData->mxPage>0) + ){ + if( sqlite3Config.bExtraSchemaChecks ){ + corruptSchema(pData, argv, "invalid rootpage"); + } + } + db->init.orphanTrigger = 0; + db->init.azInit = (const char**)argv; + pStmt = 0; + TESTONLY(rcp = ) sqlite3Prepare(db, argv[4], -1, 0, 0, &pStmt, 0); + rc = db->errCode; + assert( (rc&0xFF)==(rcp&0xFF) ); + db->init.iDb = saved_iDb; + /* assert( saved_iDb==0 || (db->mDbFlags & DBFLAG_Vacuum)!=0 ); */ + if( SQLITE_OK!=rc ){ + if( db->init.orphanTrigger ){ + assert( iDb==1 ); + }else{ + if( rc > pData->rc ) pData->rc = rc; + if( rc==SQLITE_NOMEM ){ + sqlite3OomFault(db); + }else if( rc!=SQLITE_INTERRUPT && (rc&0xFF)!=SQLITE_LOCKED ){ + corruptSchema(pData, argv, sqlite3_errmsg(db)); + } + } + } + db->init.azInit = sqlite3StdType; /* Any array of string ptrs will do */ + sqlite3_finalize(pStmt); + }else if( argv[1]==0 || (argv[4]!=0 && argv[4][0]!=0) ){ + corruptSchema(pData, argv, 0); + }else{ + /* If the SQL column is blank it means this is an index that + ** was created to be the PRIMARY KEY or to fulfill a UNIQUE + ** constraint for a CREATE TABLE. The index should have already + ** been created when we processed the CREATE TABLE. All we have + ** to do here is record the root page number for that index. + */ + Index *pIndex; + pIndex = sqlite3FindIndex(db, argv[1], db->aDb[iDb].zDbSName); + if( pIndex==0 ){ + corruptSchema(pData, argv, "orphan index"); + }else + if( sqlite3GetUInt32(argv[3],&pIndex->tnum)==0 + || pIndex->tnum<2 + || pIndex->tnum>pData->mxPage + || sqlite3IndexHasDuplicateRootPage(pIndex) + ){ + if( sqlite3Config.bExtraSchemaChecks ){ + corruptSchema(pData, argv, "invalid rootpage"); + } + } + } + return 0; +} + +/* +** Attempt to read the database schema and initialize internal +** data structures for a single database file. The index of the +** database file is given by iDb. iDb==0 is used for the main +** database. iDb==1 should never be used. iDb>=2 is used for +** auxiliary databases. Return one of the SQLITE_ error codes to +** indicate success or failure. +*/ +SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFlags){ + int rc; + int i; +#ifndef SQLITE_OMIT_DEPRECATED + int size; +#endif + Db *pDb; + char const *azArg[6]; + int meta[5]; + InitData initData; + const char *zSchemaTabName; + int openedTransaction = 0; + int mask = ((db->mDbFlags & DBFLAG_EncodingFixed) | ~DBFLAG_EncodingFixed); + + assert( (db->mDbFlags & DBFLAG_SchemaKnownOk)==0 ); + assert( iDb>=0 && iDbnDb ); + assert( db->aDb[iDb].pSchema ); + assert( sqlite3_mutex_held(db->mutex) ); + assert( iDb==1 || sqlite3BtreeHoldsMutex(db->aDb[iDb].pBt) ); + + db->init.busy = 1; + + /* Construct the in-memory representation schema tables (sqlite_schema or + ** sqlite_temp_schema) by invoking the parser directly. The appropriate + ** table name will be inserted automatically by the parser so we can just + ** use the abbreviation "x" here. The parser will also automatically tag + ** the schema table as read-only. */ + azArg[0] = "table"; + azArg[1] = zSchemaTabName = SCHEMA_TABLE(iDb); + azArg[2] = azArg[1]; + azArg[3] = "1"; + azArg[4] = "CREATE TABLE x(type text,name text,tbl_name text," + "rootpage int,sql text)"; + azArg[5] = 0; + initData.db = db; + initData.iDb = iDb; + initData.rc = SQLITE_OK; + initData.pzErrMsg = pzErrMsg; + initData.mInitFlags = mFlags; + initData.nInitRow = 0; + initData.mxPage = 0; + sqlite3InitCallback(&initData, 5, (char **)azArg, 0); + db->mDbFlags &= mask; + if( initData.rc ){ + rc = initData.rc; + goto error_out; + } + + /* Create a cursor to hold the database open + */ + pDb = &db->aDb[iDb]; + if( pDb->pBt==0 ){ + assert( iDb==1 ); + DbSetProperty(db, 1, DB_SchemaLoaded); + rc = SQLITE_OK; + goto error_out; + } + + /* If there is not already a read-only (or read-write) transaction opened + ** on the b-tree database, open one now. If a transaction is opened, it + ** will be closed before this function returns. */ + sqlite3BtreeEnter(pDb->pBt); + if( sqlite3BtreeTxnState(pDb->pBt)==SQLITE_TXN_NONE ){ + rc = sqlite3BtreeBeginTrans(pDb->pBt, 0, 0); + if( rc!=SQLITE_OK ){ + sqlite3SetString(pzErrMsg, db, sqlite3ErrStr(rc)); + goto initone_error_out; + } + openedTransaction = 1; + } + + /* Get the database meta information. + ** + ** Meta values are as follows: + ** meta[0] Schema cookie. Changes with each schema change. + ** meta[1] File format of schema layer. + ** meta[2] Size of the page cache. + ** meta[3] Largest rootpage (auto/incr_vacuum mode) + ** meta[4] Db text encoding. 1:UTF-8 2:UTF-16LE 3:UTF-16BE + ** meta[5] User version + ** meta[6] Incremental vacuum mode + ** meta[7] unused + ** meta[8] unused + ** meta[9] unused + ** + ** Note: The #defined SQLITE_UTF* symbols in sqliteInt.h correspond to + ** the possible values of meta[4]. + */ + for(i=0; ipBt, i+1, (u32 *)&meta[i]); + } + if( (db->flags & SQLITE_ResetDatabase)!=0 ){ + memset(meta, 0, sizeof(meta)); + } + pDb->pSchema->schema_cookie = meta[BTREE_SCHEMA_VERSION-1]; + + /* If opening a non-empty database, check the text encoding. For the + ** main database, set sqlite3.enc to the encoding of the main database. + ** For an attached db, it is an error if the encoding is not the same + ** as sqlite3.enc. + */ + if( meta[BTREE_TEXT_ENCODING-1] ){ /* text encoding */ + if( iDb==0 && (db->mDbFlags & DBFLAG_EncodingFixed)==0 ){ + u8 encoding; +#ifndef SQLITE_OMIT_UTF16 + /* If opening the main database, set ENC(db). */ + encoding = (u8)meta[BTREE_TEXT_ENCODING-1] & 3; + if( encoding==0 ) encoding = SQLITE_UTF8; +#else + encoding = SQLITE_UTF8; +#endif + sqlite3SetTextEncoding(db, encoding); + }else{ + /* If opening an attached database, the encoding much match ENC(db) */ + if( (meta[BTREE_TEXT_ENCODING-1] & 3)!=ENC(db) ){ + sqlite3SetString(pzErrMsg, db, "attached databases must use the same" + " text encoding as main database"); + rc = SQLITE_ERROR; + goto initone_error_out; + } + } + } + pDb->pSchema->enc = ENC(db); + + if( pDb->pSchema->cache_size==0 ){ +#ifndef SQLITE_OMIT_DEPRECATED + size = sqlite3AbsInt32(meta[BTREE_DEFAULT_CACHE_SIZE-1]); + if( size==0 ){ size = SQLITE_DEFAULT_CACHE_SIZE; } + pDb->pSchema->cache_size = size; +#else + pDb->pSchema->cache_size = SQLITE_DEFAULT_CACHE_SIZE; +#endif + sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); + } + + /* + ** file_format==1 Version 3.0.0. + ** file_format==2 Version 3.1.3. // ALTER TABLE ADD COLUMN + ** file_format==3 Version 3.1.4. // ditto but with non-NULL defaults + ** file_format==4 Version 3.3.0. // DESC indices. Boolean constants + */ + pDb->pSchema->file_format = (u8)meta[BTREE_FILE_FORMAT-1]; + if( pDb->pSchema->file_format==0 ){ + pDb->pSchema->file_format = 1; + } + if( pDb->pSchema->file_format>SQLITE_MAX_FILE_FORMAT ){ + sqlite3SetString(pzErrMsg, db, "unsupported file format"); + rc = SQLITE_ERROR; + goto initone_error_out; + } + + /* Ticket #2804: When we open a database in the newer file format, + ** clear the legacy_file_format pragma flag so that a VACUUM will + ** not downgrade the database and thus invalidate any descending + ** indices that the user might have created. + */ + if( iDb==0 && meta[BTREE_FILE_FORMAT-1]>=4 ){ + db->flags &= ~(u64)SQLITE_LegacyFileFmt; + } + + /* Read the schema information out of the schema tables + */ + assert( db->init.busy ); + initData.mxPage = sqlite3BtreeLastPage(pDb->pBt); + { + char *zSql; + zSql = sqlite3MPrintf(db, + "SELECT*FROM\"%w\".%s ORDER BY rowid", + db->aDb[iDb].zDbSName, zSchemaTabName); +#ifndef SQLITE_OMIT_AUTHORIZATION + { + sqlite3_xauth xAuth; + xAuth = db->xAuth; + db->xAuth = 0; +#endif + rc = sqlite3_exec(db, zSql, sqlite3InitCallback, &initData, 0); +#ifndef SQLITE_OMIT_AUTHORIZATION + db->xAuth = xAuth; + } +#endif + if( rc==SQLITE_OK ) rc = initData.rc; + sqlite3DbFree(db, zSql); +#ifndef SQLITE_OMIT_ANALYZE + if( rc==SQLITE_OK ){ + sqlite3AnalysisLoad(db, iDb); + } +#endif + } + assert( pDb == &(db->aDb[iDb]) ); + if( db->mallocFailed ){ + rc = SQLITE_NOMEM_BKPT; + sqlite3ResetAllSchemasOfConnection(db); + pDb = &db->aDb[iDb]; + }else + if( rc==SQLITE_OK || ((db->flags&SQLITE_NoSchemaError) && rc!=SQLITE_NOMEM)){ + /* Hack: If the SQLITE_NoSchemaError flag is set, then consider + ** the schema loaded, even if errors (other than OOM) occurred. In + ** this situation the current sqlite3_prepare() operation will fail, + ** but the following one will attempt to compile the supplied statement + ** against whatever subset of the schema was loaded before the error + ** occurred. + ** + ** The primary purpose of this is to allow access to the sqlite_schema + ** table even when its contents have been corrupted. + */ + DbSetProperty(db, iDb, DB_SchemaLoaded); + rc = SQLITE_OK; + } + + /* Jump here for an error that occurs after successfully allocating + ** curMain and calling sqlite3BtreeEnter(). For an error that occurs + ** before that point, jump to error_out. + */ +initone_error_out: + if( openedTransaction ){ + sqlite3BtreeCommit(pDb->pBt); + } + sqlite3BtreeLeave(pDb->pBt); + +error_out: + if( rc ){ + if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ + sqlite3OomFault(db); + } + sqlite3ResetOneSchema(db, iDb); + } + db->init.busy = 0; + return rc; +} + +/* +** Initialize all database files - the main database file, the file +** used to store temporary tables, and any additional database files +** created using ATTACH statements. Return a success code. If an +** error occurs, write an error message into *pzErrMsg. +** +** After a database is initialized, the DB_SchemaLoaded bit is set +** bit is set in the flags field of the Db structure. +*/ +SQLITE_PRIVATE int sqlite3Init(sqlite3 *db, char **pzErrMsg){ + int i, rc; + int commit_internal = !(db->mDbFlags&DBFLAG_SchemaChange); + + assert( sqlite3_mutex_held(db->mutex) ); + assert( sqlite3BtreeHoldsMutex(db->aDb[0].pBt) ); + assert( db->init.busy==0 ); + ENC(db) = SCHEMA_ENC(db); + assert( db->nDb>0 ); + /* Do the main schema first */ + if( !DbHasProperty(db, 0, DB_SchemaLoaded) ){ + rc = sqlite3InitOne(db, 0, pzErrMsg, 0); + if( rc ) return rc; + } + /* All other schemas after the main schema. The "temp" schema must be last */ + for(i=db->nDb-1; i>0; i--){ + assert( i==1 || sqlite3BtreeHoldsMutex(db->aDb[i].pBt) ); + if( !DbHasProperty(db, i, DB_SchemaLoaded) ){ + rc = sqlite3InitOne(db, i, pzErrMsg, 0); + if( rc ) return rc; + } + } + if( commit_internal ){ + sqlite3CommitInternalChanges(db); + } + return SQLITE_OK; +} + +/* +** This routine is a no-op if the database schema is already initialized. +** Otherwise, the schema is loaded. An error code is returned. +*/ +SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse){ + int rc = SQLITE_OK; + sqlite3 *db = pParse->db; + assert( sqlite3_mutex_held(db->mutex) ); + if( !db->init.busy ){ + rc = sqlite3Init(db, &pParse->zErrMsg); + if( rc!=SQLITE_OK ){ + pParse->rc = rc; + pParse->nErr++; + }else if( db->noSharedCache ){ + db->mDbFlags |= DBFLAG_SchemaKnownOk; + } + } + return rc; +} + + +/* +** Check schema cookies in all databases. If any cookie is out +** of date set pParse->rc to SQLITE_SCHEMA. If all schema cookies +** make no changes to pParse->rc. +*/ +static void schemaIsValid(Parse *pParse){ + sqlite3 *db = pParse->db; + int iDb; + int rc; + int cookie; + + assert( pParse->checkSchema ); + assert( sqlite3_mutex_held(db->mutex) ); + for(iDb=0; iDbnDb; iDb++){ + int openedTransaction = 0; /* True if a transaction is opened */ + Btree *pBt = db->aDb[iDb].pBt; /* Btree database to read cookie from */ + if( pBt==0 ) continue; + + /* If there is not already a read-only (or read-write) transaction opened + ** on the b-tree database, open one now. If a transaction is opened, it + ** will be closed immediately after reading the meta-value. */ + if( sqlite3BtreeTxnState(pBt)==SQLITE_TXN_NONE ){ + rc = sqlite3BtreeBeginTrans(pBt, 0, 0); + if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ + sqlite3OomFault(db); + pParse->rc = SQLITE_NOMEM; + } + if( rc!=SQLITE_OK ) return; + openedTransaction = 1; + } + + /* Read the schema cookie from the database. If it does not match the + ** value stored as part of the in-memory schema representation, + ** set Parse.rc to SQLITE_SCHEMA. */ + sqlite3BtreeGetMeta(pBt, BTREE_SCHEMA_VERSION, (u32 *)&cookie); + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( cookie!=db->aDb[iDb].pSchema->schema_cookie ){ + sqlite3ResetOneSchema(db, iDb); + pParse->rc = SQLITE_SCHEMA; + } + + /* Close the transaction, if one was opened. */ + if( openedTransaction ){ + sqlite3BtreeCommit(pBt); + } + } +} + +/* +** Convert a schema pointer into the iDb index that indicates +** which database file in db->aDb[] the schema refers to. +** +** If the same database is attached more than once, the first +** attached database is returned. +*/ +SQLITE_PRIVATE int sqlite3SchemaToIndex(sqlite3 *db, Schema *pSchema){ + int i = -32768; + + /* If pSchema is NULL, then return -32768. This happens when code in + ** expr.c is trying to resolve a reference to a transient table (i.e. one + ** created by a sub-select). In this case the return value of this + ** function should never be used. + ** + ** We return -32768 instead of the more usual -1 simply because using + ** -32768 as the incorrect index into db->aDb[] is much + ** more likely to cause a segfault than -1 (of course there are assert() + ** statements too, but it never hurts to play the odds) and + ** -32768 will still fit into a 16-bit signed integer. + */ + assert( sqlite3_mutex_held(db->mutex) ); + if( pSchema ){ + for(i=0; 1; i++){ + assert( inDb ); + if( db->aDb[i].pSchema==pSchema ){ + break; + } + } + assert( i>=0 && inDb ); + } + return i; +} + +/* +** Free all memory allocations in the pParse object +*/ +SQLITE_PRIVATE void sqlite3ParseObjectReset(Parse *pParse){ + sqlite3 *db = pParse->db; + assert( db!=0 ); + assert( db->pParse==pParse ); + assert( pParse->nested==0 ); +#ifndef SQLITE_OMIT_SHARED_CACHE + sqlite3DbFree(db, pParse->aTableLock); +#endif + while( pParse->pCleanup ){ + ParseCleanup *pCleanup = pParse->pCleanup; + pParse->pCleanup = pCleanup->pNext; + pCleanup->xCleanup(db, pCleanup->pPtr); + sqlite3DbFreeNN(db, pCleanup); + } + sqlite3DbFree(db, pParse->aLabel); + if( pParse->pConstExpr ){ + sqlite3ExprListDelete(db, pParse->pConstExpr); + } + assert( db->lookaside.bDisable >= pParse->disableLookaside ); + db->lookaside.bDisable -= pParse->disableLookaside; + db->lookaside.sz = db->lookaside.bDisable ? 0 : db->lookaside.szTrue; + assert( pParse->db->pParse==pParse ); + db->pParse = pParse->pOuterParse; + pParse->db = 0; + pParse->disableLookaside = 0; +} + +/* +** Add a new cleanup operation to a Parser. The cleanup should happen when +** the parser object is destroyed. But, beware: the cleanup might happen +** immediately. +** +** Use this mechanism for uncommon cleanups. There is a higher setup +** cost for this mechansim (an extra malloc), so it should not be used +** for common cleanups that happen on most calls. But for less +** common cleanups, we save a single NULL-pointer comparison in +** sqlite3ParseObjectReset(), which reduces the total CPU cycle count. +** +** If a memory allocation error occurs, then the cleanup happens immediately. +** When either SQLITE_DEBUG or SQLITE_COVERAGE_TEST are defined, the +** pParse->earlyCleanup flag is set in that case. Calling code show verify +** that test cases exist for which this happens, to guard against possible +** use-after-free errors following an OOM. The preferred way to do this is +** to immediately follow the call to this routine with: +** +** testcase( pParse->earlyCleanup ); +** +** This routine returns a copy of its pPtr input (the third parameter) +** except if an early cleanup occurs, in which case it returns NULL. So +** another way to check for early cleanup is to check the return value. +** Or, stop using the pPtr parameter with this call and use only its +** return value thereafter. Something like this: +** +** pObj = sqlite3ParserAddCleanup(pParse, destructor, pObj); +*/ +SQLITE_PRIVATE void *sqlite3ParserAddCleanup( + Parse *pParse, /* Destroy when this Parser finishes */ + void (*xCleanup)(sqlite3*,void*), /* The cleanup routine */ + void *pPtr /* Pointer to object to be cleaned up */ +){ + ParseCleanup *pCleanup = sqlite3DbMallocRaw(pParse->db, sizeof(*pCleanup)); + if( pCleanup ){ + pCleanup->pNext = pParse->pCleanup; + pParse->pCleanup = pCleanup; + pCleanup->pPtr = pPtr; + pCleanup->xCleanup = xCleanup; + }else{ + xCleanup(pParse->db, pPtr); + pPtr = 0; +#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) + pParse->earlyCleanup = 1; +#endif + } + return pPtr; +} + +/* +** Turn bulk memory into a valid Parse object and link that Parse object +** into database connection db. +** +** Call sqlite3ParseObjectReset() to undo this operation. +** +** Caution: Do not confuse this routine with sqlite3ParseObjectInit() which +** is generated by Lemon. +*/ +SQLITE_PRIVATE void sqlite3ParseObjectInit(Parse *pParse, sqlite3 *db){ + memset(PARSE_HDR(pParse), 0, PARSE_HDR_SZ); + memset(PARSE_TAIL(pParse), 0, PARSE_TAIL_SZ); + assert( db->pParse!=pParse ); + pParse->pOuterParse = db->pParse; + db->pParse = pParse; + pParse->db = db; + if( db->mallocFailed ) sqlite3ErrorMsg(pParse, "out of memory"); +} + +/* +** Maximum number of times that we will try again to prepare a statement +** that returns SQLITE_ERROR_RETRY. +*/ +#ifndef SQLITE_MAX_PREPARE_RETRY +# define SQLITE_MAX_PREPARE_RETRY 25 +#endif + +/* +** Compile the UTF-8 encoded SQL statement zSql into a statement handle. +*/ +static int sqlite3Prepare( + sqlite3 *db, /* Database handle. */ + const char *zSql, /* UTF-8 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + u32 prepFlags, /* Zero or more SQLITE_PREPARE_* flags */ + Vdbe *pReprepare, /* VM being reprepared */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const char **pzTail /* OUT: End of parsed string */ +){ + int rc = SQLITE_OK; /* Result code */ + int i; /* Loop counter */ + Parse sParse; /* Parsing context */ + + /* sqlite3ParseObjectInit(&sParse, db); // inlined for performance */ + memset(PARSE_HDR(&sParse), 0, PARSE_HDR_SZ); + memset(PARSE_TAIL(&sParse), 0, PARSE_TAIL_SZ); + sParse.pOuterParse = db->pParse; + db->pParse = &sParse; + sParse.db = db; + sParse.pReprepare = pReprepare; + assert( ppStmt && *ppStmt==0 ); + if( db->mallocFailed ) sqlite3ErrorMsg(&sParse, "out of memory"); + assert( sqlite3_mutex_held(db->mutex) ); + + /* For a long-term use prepared statement avoid the use of + ** lookaside memory. + */ + if( prepFlags & SQLITE_PREPARE_PERSISTENT ){ + sParse.disableLookaside++; + DisableLookaside; + } + sParse.disableVtab = (prepFlags & SQLITE_PREPARE_NO_VTAB)!=0; + + /* Check to verify that it is possible to get a read lock on all + ** database schemas. The inability to get a read lock indicates that + ** some other database connection is holding a write-lock, which in + ** turn means that the other connection has made uncommitted changes + ** to the schema. + ** + ** Were we to proceed and prepare the statement against the uncommitted + ** schema changes and if those schema changes are subsequently rolled + ** back and different changes are made in their place, then when this + ** prepared statement goes to run the schema cookie would fail to detect + ** the schema change. Disaster would follow. + ** + ** This thread is currently holding mutexes on all Btrees (because + ** of the sqlite3BtreeEnterAll() in sqlite3LockAndPrepare()) so it + ** is not possible for another thread to start a new schema change + ** while this routine is running. Hence, we do not need to hold + ** locks on the schema, we just need to make sure nobody else is + ** holding them. + ** + ** Note that setting READ_UNCOMMITTED overrides most lock detection, + ** but it does *not* override schema lock detection, so this all still + ** works even if READ_UNCOMMITTED is set. + */ + if( !db->noSharedCache ){ + for(i=0; inDb; i++) { + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + assert( sqlite3BtreeHoldsMutex(pBt) ); + rc = sqlite3BtreeSchemaLocked(pBt); + if( rc ){ + const char *zDb = db->aDb[i].zDbSName; + sqlite3ErrorWithMsg(db, rc, "database schema is locked: %s", zDb); + testcase( db->flags & SQLITE_ReadUncommit ); + goto end_prepare; + } + } + } + } + + sqlite3VtabUnlockList(db); + + if( nBytes>=0 && (nBytes==0 || zSql[nBytes-1]!=0) ){ + char *zSqlCopy; + int mxLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; + testcase( nBytes==mxLen ); + testcase( nBytes==mxLen+1 ); + if( nBytes>mxLen ){ + sqlite3ErrorWithMsg(db, SQLITE_TOOBIG, "statement too long"); + rc = sqlite3ApiExit(db, SQLITE_TOOBIG); + goto end_prepare; + } + zSqlCopy = sqlite3DbStrNDup(db, zSql, nBytes); + if( zSqlCopy ){ + sqlite3RunParser(&sParse, zSqlCopy); + sParse.zTail = &zSql[sParse.zTail-zSqlCopy]; + sqlite3DbFree(db, zSqlCopy); + }else{ + sParse.zTail = &zSql[nBytes]; + } + }else{ + sqlite3RunParser(&sParse, zSql); + } + assert( 0==sParse.nQueryLoop ); + + if( pzTail ){ + *pzTail = sParse.zTail; + } + + if( db->init.busy==0 ){ + sqlite3VdbeSetSql(sParse.pVdbe, zSql, (int)(sParse.zTail-zSql), prepFlags); + } + if( db->mallocFailed ){ + sParse.rc = SQLITE_NOMEM_BKPT; + sParse.checkSchema = 0; + } + if( sParse.rc!=SQLITE_OK && sParse.rc!=SQLITE_DONE ){ + if( sParse.checkSchema && db->init.busy==0 ){ + schemaIsValid(&sParse); + } + if( sParse.pVdbe ){ + sqlite3VdbeFinalize(sParse.pVdbe); + } + assert( 0==(*ppStmt) ); + rc = sParse.rc; + if( sParse.zErrMsg ){ + sqlite3ErrorWithMsg(db, rc, "%s", sParse.zErrMsg); + sqlite3DbFree(db, sParse.zErrMsg); + }else{ + sqlite3Error(db, rc); + } + }else{ + assert( sParse.zErrMsg==0 ); + *ppStmt = (sqlite3_stmt*)sParse.pVdbe; + rc = SQLITE_OK; + sqlite3ErrorClear(db); + } + + + /* Delete any TriggerPrg structures allocated while parsing this statement. */ + while( sParse.pTriggerPrg ){ + TriggerPrg *pT = sParse.pTriggerPrg; + sParse.pTriggerPrg = pT->pNext; + sqlite3DbFree(db, pT); + } + +end_prepare: + + sqlite3ParseObjectReset(&sParse); + return rc; +} +static int sqlite3LockAndPrepare( + sqlite3 *db, /* Database handle. */ + const char *zSql, /* UTF-8 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + u32 prepFlags, /* Zero or more SQLITE_PREPARE_* flags */ + Vdbe *pOld, /* VM being reprepared */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const char **pzTail /* OUT: End of parsed string */ +){ + int rc; + int cnt = 0; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppStmt==0 ) return SQLITE_MISUSE_BKPT; +#endif + *ppStmt = 0; + if( !sqlite3SafetyCheckOk(db)||zSql==0 ){ + return SQLITE_MISUSE_BKPT; + } + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeEnterAll(db); + do{ + /* Make multiple attempts to compile the SQL, until it either succeeds + ** or encounters a permanent error. A schema problem after one schema + ** reset is considered a permanent error. */ + rc = sqlite3Prepare(db, zSql, nBytes, prepFlags, pOld, ppStmt, pzTail); + assert( rc==SQLITE_OK || *ppStmt==0 ); + if( rc==SQLITE_OK || db->mallocFailed ) break; + }while( (rc==SQLITE_ERROR_RETRY && (cnt++)errMask)==rc ); + db->busyHandler.nBusy = 0; + sqlite3_mutex_leave(db->mutex); + return rc; +} + + +/* +** Rerun the compilation of a statement after a schema change. +** +** If the statement is successfully recompiled, return SQLITE_OK. Otherwise, +** if the statement cannot be recompiled because another connection has +** locked the sqlite3_schema table, return SQLITE_LOCKED. If any other error +** occurs, return SQLITE_SCHEMA. +*/ +SQLITE_PRIVATE int sqlite3Reprepare(Vdbe *p){ + int rc; + sqlite3_stmt *pNew; + const char *zSql; + sqlite3 *db; + u8 prepFlags; + + assert( sqlite3_mutex_held(sqlite3VdbeDb(p)->mutex) ); + zSql = sqlite3_sql((sqlite3_stmt *)p); + assert( zSql!=0 ); /* Reprepare only called for prepare_v2() statements */ + db = sqlite3VdbeDb(p); + assert( sqlite3_mutex_held(db->mutex) ); + prepFlags = sqlite3VdbePrepareFlags(p); + rc = sqlite3LockAndPrepare(db, zSql, -1, prepFlags, p, &pNew, 0); + if( rc ){ + if( rc==SQLITE_NOMEM ){ + sqlite3OomFault(db); + } + assert( pNew==0 ); + return rc; + }else{ + assert( pNew!=0 ); + } + sqlite3VdbeSwap((Vdbe*)pNew, p); + sqlite3TransferBindings(pNew, (sqlite3_stmt*)p); + sqlite3VdbeResetStepResult((Vdbe*)pNew); + sqlite3VdbeFinalize((Vdbe*)pNew); + return SQLITE_OK; +} + + +/* +** Two versions of the official API. Legacy and new use. In the legacy +** version, the original SQL text is not saved in the prepared statement +** and so if a schema change occurs, SQLITE_SCHEMA is returned by +** sqlite3_step(). In the new version, the original SQL text is retained +** and the statement is automatically recompiled if an schema change +** occurs. +*/ +SQLITE_API int sqlite3_prepare( + sqlite3 *db, /* Database handle. */ + const char *zSql, /* UTF-8 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const char **pzTail /* OUT: End of parsed string */ +){ + int rc; + rc = sqlite3LockAndPrepare(db,zSql,nBytes,0,0,ppStmt,pzTail); + assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 ); /* VERIFY: F13021 */ + return rc; +} +SQLITE_API int sqlite3_prepare_v2( + sqlite3 *db, /* Database handle. */ + const char *zSql, /* UTF-8 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const char **pzTail /* OUT: End of parsed string */ +){ + int rc; + /* EVIDENCE-OF: R-37923-12173 The sqlite3_prepare_v2() interface works + ** exactly the same as sqlite3_prepare_v3() with a zero prepFlags + ** parameter. + ** + ** Proof in that the 5th parameter to sqlite3LockAndPrepare is 0 */ + rc = sqlite3LockAndPrepare(db,zSql,nBytes,SQLITE_PREPARE_SAVESQL,0, + ppStmt,pzTail); + assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 ); + return rc; +} +SQLITE_API int sqlite3_prepare_v3( + sqlite3 *db, /* Database handle. */ + const char *zSql, /* UTF-8 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + unsigned int prepFlags, /* Zero or more SQLITE_PREPARE_* flags */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const char **pzTail /* OUT: End of parsed string */ +){ + int rc; + /* EVIDENCE-OF: R-56861-42673 sqlite3_prepare_v3() differs from + ** sqlite3_prepare_v2() only in having the extra prepFlags parameter, + ** which is a bit array consisting of zero or more of the + ** SQLITE_PREPARE_* flags. + ** + ** Proof by comparison to the implementation of sqlite3_prepare_v2() + ** directly above. */ + rc = sqlite3LockAndPrepare(db,zSql,nBytes, + SQLITE_PREPARE_SAVESQL|(prepFlags&SQLITE_PREPARE_MASK), + 0,ppStmt,pzTail); + assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 ); + return rc; +} + + +#ifndef SQLITE_OMIT_UTF16 +/* +** Compile the UTF-16 encoded SQL statement zSql into a statement handle. +*/ +static int sqlite3Prepare16( + sqlite3 *db, /* Database handle. */ + const void *zSql, /* UTF-16 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + u32 prepFlags, /* Zero or more SQLITE_PREPARE_* flags */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const void **pzTail /* OUT: End of parsed string */ +){ + /* This function currently works by first transforming the UTF-16 + ** encoded string to UTF-8, then invoking sqlite3_prepare(). The + ** tricky bit is figuring out the pointer to return in *pzTail. + */ + char *zSql8; + const char *zTail8 = 0; + int rc = SQLITE_OK; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppStmt==0 ) return SQLITE_MISUSE_BKPT; +#endif + *ppStmt = 0; + if( !sqlite3SafetyCheckOk(db)||zSql==0 ){ + return SQLITE_MISUSE_BKPT; + } + if( nBytes>=0 ){ + int sz; + const char *z = (const char*)zSql; + for(sz=0; szmutex); + zSql8 = sqlite3Utf16to8(db, zSql, nBytes, SQLITE_UTF16NATIVE); + if( zSql8 ){ + rc = sqlite3LockAndPrepare(db, zSql8, -1, prepFlags, 0, ppStmt, &zTail8); + } + + if( zTail8 && pzTail ){ + /* If sqlite3_prepare returns a tail pointer, we calculate the + ** equivalent pointer into the UTF-16 string by counting the unicode + ** characters between zSql8 and zTail8, and then returning a pointer + ** the same number of characters into the UTF-16 string. + */ + int chars_parsed = sqlite3Utf8CharLen(zSql8, (int)(zTail8-zSql8)); + *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, chars_parsed); + } + sqlite3DbFree(db, zSql8); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Two versions of the official API. Legacy and new use. In the legacy +** version, the original SQL text is not saved in the prepared statement +** and so if a schema change occurs, SQLITE_SCHEMA is returned by +** sqlite3_step(). In the new version, the original SQL text is retained +** and the statement is automatically recompiled if an schema change +** occurs. +*/ +SQLITE_API int sqlite3_prepare16( + sqlite3 *db, /* Database handle. */ + const void *zSql, /* UTF-16 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const void **pzTail /* OUT: End of parsed string */ +){ + int rc; + rc = sqlite3Prepare16(db,zSql,nBytes,0,ppStmt,pzTail); + assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 ); /* VERIFY: F13021 */ + return rc; +} +SQLITE_API int sqlite3_prepare16_v2( + sqlite3 *db, /* Database handle. */ + const void *zSql, /* UTF-16 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const void **pzTail /* OUT: End of parsed string */ +){ + int rc; + rc = sqlite3Prepare16(db,zSql,nBytes,SQLITE_PREPARE_SAVESQL,ppStmt,pzTail); + assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 ); /* VERIFY: F13021 */ + return rc; +} +SQLITE_API int sqlite3_prepare16_v3( + sqlite3 *db, /* Database handle. */ + const void *zSql, /* UTF-16 encoded SQL statement. */ + int nBytes, /* Length of zSql in bytes. */ + unsigned int prepFlags, /* Zero or more SQLITE_PREPARE_* flags */ + sqlite3_stmt **ppStmt, /* OUT: A pointer to the prepared statement */ + const void **pzTail /* OUT: End of parsed string */ +){ + int rc; + rc = sqlite3Prepare16(db,zSql,nBytes, + SQLITE_PREPARE_SAVESQL|(prepFlags&SQLITE_PREPARE_MASK), + ppStmt,pzTail); + assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 ); /* VERIFY: F13021 */ + return rc; +} + +#endif /* SQLITE_OMIT_UTF16 */ + +/************** End of prepare.c *********************************************/ +/************** Begin file select.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains C code routines that are called by the parser +** to handle SELECT statements in SQLite. +*/ +/* #include "sqliteInt.h" */ + +/* +** An instance of the following object is used to record information about +** how to process the DISTINCT keyword, to simplify passing that information +** into the selectInnerLoop() routine. +*/ +typedef struct DistinctCtx DistinctCtx; +struct DistinctCtx { + u8 isTnct; /* True if the DISTINCT keyword is present */ + u8 eTnctType; /* One of the WHERE_DISTINCT_* operators */ + int tabTnct; /* Ephemeral table used for DISTINCT processing */ + int addrTnct; /* Address of OP_OpenEphemeral opcode for tabTnct */ +}; + +/* +** An instance of the following object is used to record information about +** the ORDER BY (or GROUP BY) clause of query is being coded. +** +** The aDefer[] array is used by the sorter-references optimization. For +** example, assuming there is no index that can be used for the ORDER BY, +** for the query: +** +** SELECT a, bigblob FROM t1 ORDER BY a LIMIT 10; +** +** it may be more efficient to add just the "a" values to the sorter, and +** retrieve the associated "bigblob" values directly from table t1 as the +** 10 smallest "a" values are extracted from the sorter. +** +** When the sorter-reference optimization is used, there is one entry in the +** aDefer[] array for each database table that may be read as values are +** extracted from the sorter. +*/ +typedef struct SortCtx SortCtx; +struct SortCtx { + ExprList *pOrderBy; /* The ORDER BY (or GROUP BY clause) */ + int nOBSat; /* Number of ORDER BY terms satisfied by indices */ + int iECursor; /* Cursor number for the sorter */ + int regReturn; /* Register holding block-output return address */ + int labelBkOut; /* Start label for the block-output subroutine */ + int addrSortIndex; /* Address of the OP_SorterOpen or OP_OpenEphemeral */ + int labelDone; /* Jump here when done, ex: LIMIT reached */ + int labelOBLopt; /* Jump here when sorter is full */ + u8 sortFlags; /* Zero or more SORTFLAG_* bits */ +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + u8 nDefer; /* Number of valid entries in aDefer[] */ + struct DeferredCsr { + Table *pTab; /* Table definition */ + int iCsr; /* Cursor number for table */ + int nKey; /* Number of PK columns for table pTab (>=1) */ + } aDefer[4]; +#endif + struct RowLoadInfo *pDeferredRowLoad; /* Deferred row loading info or NULL */ +}; +#define SORTFLAG_UseSorter 0x01 /* Use SorterOpen instead of OpenEphemeral */ + +/* +** Delete all the content of a Select structure. Deallocate the structure +** itself depending on the value of bFree +** +** If bFree==1, call sqlite3DbFree() on the p object. +** If bFree==0, Leave the first Select object unfreed +*/ +static void clearSelect(sqlite3 *db, Select *p, int bFree){ + while( p ){ + Select *pPrior = p->pPrior; + sqlite3ExprListDelete(db, p->pEList); + sqlite3SrcListDelete(db, p->pSrc); + sqlite3ExprDelete(db, p->pWhere); + sqlite3ExprListDelete(db, p->pGroupBy); + sqlite3ExprDelete(db, p->pHaving); + sqlite3ExprListDelete(db, p->pOrderBy); + sqlite3ExprDelete(db, p->pLimit); + if( OK_IF_ALWAYS_TRUE(p->pWith) ) sqlite3WithDelete(db, p->pWith); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( OK_IF_ALWAYS_TRUE(p->pWinDefn) ){ + sqlite3WindowListDelete(db, p->pWinDefn); + } + while( p->pWin ){ + assert( p->pWin->ppThis==&p->pWin ); + sqlite3WindowUnlinkFromSelect(p->pWin); + } +#endif + if( bFree ) sqlite3DbFreeNN(db, p); + p = pPrior; + bFree = 1; + } +} + +/* +** Initialize a SelectDest structure. +*/ +SQLITE_PRIVATE void sqlite3SelectDestInit(SelectDest *pDest, int eDest, int iParm){ + pDest->eDest = (u8)eDest; + pDest->iSDParm = iParm; + pDest->iSDParm2 = 0; + pDest->zAffSdst = 0; + pDest->iSdst = 0; + pDest->nSdst = 0; +} + + +/* +** Allocate a new Select structure and return a pointer to that +** structure. +*/ +SQLITE_PRIVATE Select *sqlite3SelectNew( + Parse *pParse, /* Parsing context */ + ExprList *pEList, /* which columns to include in the result */ + SrcList *pSrc, /* the FROM clause -- which tables to scan */ + Expr *pWhere, /* the WHERE clause */ + ExprList *pGroupBy, /* the GROUP BY clause */ + Expr *pHaving, /* the HAVING clause */ + ExprList *pOrderBy, /* the ORDER BY clause */ + u32 selFlags, /* Flag parameters, such as SF_Distinct */ + Expr *pLimit /* LIMIT value. NULL means not used */ +){ + Select *pNew, *pAllocated; + Select standin; + pAllocated = pNew = sqlite3DbMallocRawNN(pParse->db, sizeof(*pNew) ); + if( pNew==0 ){ + assert( pParse->db->mallocFailed ); + pNew = &standin; + } + if( pEList==0 ){ + pEList = sqlite3ExprListAppend(pParse, 0, + sqlite3Expr(pParse->db,TK_ASTERISK,0)); + } + pNew->pEList = pEList; + pNew->op = TK_SELECT; + pNew->selFlags = selFlags; + pNew->iLimit = 0; + pNew->iOffset = 0; + pNew->selId = ++pParse->nSelect; + pNew->addrOpenEphm[0] = -1; + pNew->addrOpenEphm[1] = -1; + pNew->nSelectRow = 0; + if( pSrc==0 ) pSrc = sqlite3DbMallocZero(pParse->db, sizeof(*pSrc)); + pNew->pSrc = pSrc; + pNew->pWhere = pWhere; + pNew->pGroupBy = pGroupBy; + pNew->pHaving = pHaving; + pNew->pOrderBy = pOrderBy; + pNew->pPrior = 0; + pNew->pNext = 0; + pNew->pLimit = pLimit; + pNew->pWith = 0; +#ifndef SQLITE_OMIT_WINDOWFUNC + pNew->pWin = 0; + pNew->pWinDefn = 0; +#endif + if( pParse->db->mallocFailed ) { + clearSelect(pParse->db, pNew, pNew!=&standin); + pAllocated = 0; + }else{ + assert( pNew->pSrc!=0 || pParse->nErr>0 ); + } + return pAllocated; +} + + +/* +** Delete the given Select structure and all of its substructures. +*/ +SQLITE_PRIVATE void sqlite3SelectDelete(sqlite3 *db, Select *p){ + if( OK_IF_ALWAYS_TRUE(p) ) clearSelect(db, p, 1); +} + +/* +** Return a pointer to the right-most SELECT statement in a compound. +*/ +static Select *findRightmost(Select *p){ + while( p->pNext ) p = p->pNext; + return p; +} + +/* +** Given 1 to 3 identifiers preceding the JOIN keyword, determine the +** type of join. Return an integer constant that expresses that type +** in terms of the following bit values: +** +** JT_INNER +** JT_CROSS +** JT_OUTER +** JT_NATURAL +** JT_LEFT +** JT_RIGHT +** +** A full outer join is the combination of JT_LEFT and JT_RIGHT. +** +** If an illegal or unsupported join type is seen, then still return +** a join type, but put an error in the pParse structure. +*/ +SQLITE_PRIVATE int sqlite3JoinType(Parse *pParse, Token *pA, Token *pB, Token *pC){ + int jointype = 0; + Token *apAll[3]; + Token *p; + /* 0123456789 123456789 123456789 123 */ + static const char zKeyText[] = "naturaleftouterightfullinnercross"; + static const struct { + u8 i; /* Beginning of keyword text in zKeyText[] */ + u8 nChar; /* Length of the keyword in characters */ + u8 code; /* Join type mask */ + } aKeyword[] = { + /* natural */ { 0, 7, JT_NATURAL }, + /* left */ { 6, 4, JT_LEFT|JT_OUTER }, + /* outer */ { 10, 5, JT_OUTER }, + /* right */ { 14, 5, JT_RIGHT|JT_OUTER }, + /* full */ { 19, 4, JT_LEFT|JT_RIGHT|JT_OUTER }, + /* inner */ { 23, 5, JT_INNER }, + /* cross */ { 28, 5, JT_INNER|JT_CROSS }, + }; + int i, j; + apAll[0] = pA; + apAll[1] = pB; + apAll[2] = pC; + for(i=0; i<3 && apAll[i]; i++){ + p = apAll[i]; + for(j=0; jn==aKeyword[j].nChar + && sqlite3StrNICmp((char*)p->z, &zKeyText[aKeyword[j].i], p->n)==0 ){ + jointype |= aKeyword[j].code; + break; + } + } + testcase( j==0 || j==1 || j==2 || j==3 || j==4 || j==5 || j==6 ); + if( j>=ArraySize(aKeyword) ){ + jointype |= JT_ERROR; + break; + } + } + if( + (jointype & (JT_INNER|JT_OUTER))==(JT_INNER|JT_OUTER) || + (jointype & JT_ERROR)!=0 + ){ + const char *zSp = " "; + assert( pB!=0 ); + if( pC==0 ){ zSp++; } + sqlite3ErrorMsg(pParse, "unknown or unsupported join type: " + "%T %T%s%T", pA, pB, zSp, pC); + jointype = JT_INNER; + }else if( (jointype & JT_OUTER)!=0 + && (jointype & (JT_LEFT|JT_RIGHT))!=JT_LEFT ){ + sqlite3ErrorMsg(pParse, + "RIGHT and FULL OUTER JOINs are not currently supported"); + jointype = JT_INNER; + } + return jointype; +} + +/* +** Return the index of a column in a table. Return -1 if the column +** is not contained in the table. +*/ +SQLITE_PRIVATE int sqlite3ColumnIndex(Table *pTab, const char *zCol){ + int i; + u8 h = sqlite3StrIHash(zCol); + Column *pCol; + for(pCol=pTab->aCol, i=0; inCol; pCol++, i++){ + if( pCol->hName==h && sqlite3StrICmp(pCol->zCnName, zCol)==0 ) return i; + } + return -1; +} + +/* +** Search the first N tables in pSrc, from left to right, looking for a +** table that has a column named zCol. +** +** When found, set *piTab and *piCol to the table index and column index +** of the matching column and return TRUE. +** +** If not found, return FALSE. +*/ +static int tableAndColumnIndex( + SrcList *pSrc, /* Array of tables to search */ + int N, /* Number of tables in pSrc->a[] to search */ + const char *zCol, /* Name of the column we are looking for */ + int *piTab, /* Write index of pSrc->a[] here */ + int *piCol, /* Write index of pSrc->a[*piTab].pTab->aCol[] here */ + int bIgnoreHidden /* True to ignore hidden columns */ +){ + int i; /* For looping over tables in pSrc */ + int iCol; /* Index of column matching zCol */ + + assert( (piTab==0)==(piCol==0) ); /* Both or neither are NULL */ + for(i=0; ia[i].pTab, zCol); + if( iCol>=0 + && (bIgnoreHidden==0 || IsHiddenColumn(&pSrc->a[i].pTab->aCol[iCol])==0) + ){ + if( piTab ){ + *piTab = i; + *piCol = iCol; + } + return 1; + } + } + return 0; +} + +/* +** This function is used to add terms implied by JOIN syntax to the +** WHERE clause expression of a SELECT statement. The new term, which +** is ANDed with the existing WHERE clause, is of the form: +** +** (tab1.col1 = tab2.col2) +** +** where tab1 is the iSrc'th table in SrcList pSrc and tab2 is the +** (iSrc+1)'th. Column col1 is column iColLeft of tab1, and col2 is +** column iColRight of tab2. +*/ +static void addWhereTerm( + Parse *pParse, /* Parsing context */ + SrcList *pSrc, /* List of tables in FROM clause */ + int iLeft, /* Index of first table to join in pSrc */ + int iColLeft, /* Index of column in first table */ + int iRight, /* Index of second table in pSrc */ + int iColRight, /* Index of column in second table */ + int isOuterJoin, /* True if this is an OUTER join */ + Expr **ppWhere /* IN/OUT: The WHERE clause to add to */ +){ + sqlite3 *db = pParse->db; + Expr *pE1; + Expr *pE2; + Expr *pEq; + + assert( iLeftnSrc>iRight ); + assert( pSrc->a[iLeft].pTab ); + assert( pSrc->a[iRight].pTab ); + + pE1 = sqlite3CreateColumnExpr(db, pSrc, iLeft, iColLeft); + pE2 = sqlite3CreateColumnExpr(db, pSrc, iRight, iColRight); + + pEq = sqlite3PExpr(pParse, TK_EQ, pE1, pE2); + assert( pE2!=0 || pEq==0 ); /* Due to db->mallocFailed test + ** in sqlite3DbMallocRawNN() called from + ** sqlite3PExpr(). */ + if( pEq && isOuterJoin ){ + ExprSetProperty(pEq, EP_FromJoin); + assert( !ExprHasProperty(pEq, EP_TokenOnly|EP_Reduced) ); + ExprSetVVAProperty(pEq, EP_NoReduce); + pEq->w.iRightJoinTable = pE2->iTable; + } + *ppWhere = sqlite3ExprAnd(pParse, *ppWhere, pEq); +} + +/* +** Set the EP_FromJoin property on all terms of the given expression. +** And set the Expr.w.iRightJoinTable to iTable for every term in the +** expression. +** +** The EP_FromJoin property is used on terms of an expression to tell +** the LEFT OUTER JOIN processing logic that this term is part of the +** join restriction specified in the ON or USING clause and not a part +** of the more general WHERE clause. These terms are moved over to the +** WHERE clause during join processing but we need to remember that they +** originated in the ON or USING clause. +** +** The Expr.w.iRightJoinTable tells the WHERE clause processing that the +** expression depends on table w.iRightJoinTable even if that table is not +** explicitly mentioned in the expression. That information is needed +** for cases like this: +** +** SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.b AND t1.x=5 +** +** The where clause needs to defer the handling of the t1.x=5 +** term until after the t2 loop of the join. In that way, a +** NULL t2 row will be inserted whenever t1.x!=5. If we do not +** defer the handling of t1.x=5, it will be processed immediately +** after the t1 loop and rows with t1.x!=5 will never appear in +** the output, which is incorrect. +*/ +SQLITE_PRIVATE void sqlite3SetJoinExpr(Expr *p, int iTable){ + while( p ){ + ExprSetProperty(p, EP_FromJoin); + assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) ); + ExprSetVVAProperty(p, EP_NoReduce); + p->w.iRightJoinTable = iTable; + if( p->op==TK_FUNCTION ){ + assert( ExprUseXList(p) ); + if( p->x.pList ){ + int i; + for(i=0; ix.pList->nExpr; i++){ + sqlite3SetJoinExpr(p->x.pList->a[i].pExpr, iTable); + } + } + } + sqlite3SetJoinExpr(p->pLeft, iTable); + p = p->pRight; + } +} + +/* Undo the work of sqlite3SetJoinExpr(). In the expression p, convert every +** term that is marked with EP_FromJoin and w.iRightJoinTable==iTable into +** an ordinary term that omits the EP_FromJoin mark. +** +** This happens when a LEFT JOIN is simplified into an ordinary JOIN. +*/ +static void unsetJoinExpr(Expr *p, int iTable){ + while( p ){ + if( ExprHasProperty(p, EP_FromJoin) + && (iTable<0 || p->w.iRightJoinTable==iTable) ){ + ExprClearProperty(p, EP_FromJoin); + } + if( p->op==TK_COLUMN && p->iTable==iTable ){ + ExprClearProperty(p, EP_CanBeNull); + } + if( p->op==TK_FUNCTION ){ + assert( ExprUseXList(p) ); + if( p->x.pList ){ + int i; + for(i=0; ix.pList->nExpr; i++){ + unsetJoinExpr(p->x.pList->a[i].pExpr, iTable); + } + } + } + unsetJoinExpr(p->pLeft, iTable); + p = p->pRight; + } +} + +/* +** This routine processes the join information for a SELECT statement. +** ON and USING clauses are converted into extra terms of the WHERE clause. +** NATURAL joins also create extra WHERE clause terms. +** +** The terms of a FROM clause are contained in the Select.pSrc structure. +** The left most table is the first entry in Select.pSrc. The right-most +** table is the last entry. The join operator is held in the entry to +** the left. Thus entry 0 contains the join operator for the join between +** entries 0 and 1. Any ON or USING clauses associated with the join are +** also attached to the left entry. +** +** This routine returns the number of errors encountered. +*/ +static int sqliteProcessJoin(Parse *pParse, Select *p){ + SrcList *pSrc; /* All tables in the FROM clause */ + int i, j; /* Loop counters */ + SrcItem *pLeft; /* Left table being joined */ + SrcItem *pRight; /* Right table being joined */ + + pSrc = p->pSrc; + pLeft = &pSrc->a[0]; + pRight = &pLeft[1]; + for(i=0; inSrc-1; i++, pRight++, pLeft++){ + Table *pRightTab = pRight->pTab; + int isOuter; + + if( NEVER(pLeft->pTab==0 || pRightTab==0) ) continue; + isOuter = (pRight->fg.jointype & JT_OUTER)!=0; + + /* When the NATURAL keyword is present, add WHERE clause terms for + ** every column that the two tables have in common. + */ + if( pRight->fg.jointype & JT_NATURAL ){ + if( pRight->pOn || pRight->pUsing ){ + sqlite3ErrorMsg(pParse, "a NATURAL join may not have " + "an ON or USING clause", 0); + return 1; + } + for(j=0; jnCol; j++){ + char *zName; /* Name of column in the right table */ + int iLeft; /* Matching left table */ + int iLeftCol; /* Matching column in the left table */ + + if( IsHiddenColumn(&pRightTab->aCol[j]) ) continue; + zName = pRightTab->aCol[j].zCnName; + if( tableAndColumnIndex(pSrc, i+1, zName, &iLeft, &iLeftCol, 1) ){ + addWhereTerm(pParse, pSrc, iLeft, iLeftCol, i+1, j, + isOuter, &p->pWhere); + } + } + } + + /* Disallow both ON and USING clauses in the same join + */ + if( pRight->pOn && pRight->pUsing ){ + sqlite3ErrorMsg(pParse, "cannot have both ON and USING " + "clauses in the same join"); + return 1; + } + + /* Add the ON clause to the end of the WHERE clause, connected by + ** an AND operator. + */ + if( pRight->pOn ){ + if( isOuter ) sqlite3SetJoinExpr(pRight->pOn, pRight->iCursor); + p->pWhere = sqlite3ExprAnd(pParse, p->pWhere, pRight->pOn); + pRight->pOn = 0; + } + + /* Create extra terms on the WHERE clause for each column named + ** in the USING clause. Example: If the two tables to be joined are + ** A and B and the USING clause names X, Y, and Z, then add this + ** to the WHERE clause: A.X=B.X AND A.Y=B.Y AND A.Z=B.Z + ** Report an error if any column mentioned in the USING clause is + ** not contained in both tables to be joined. + */ + if( pRight->pUsing ){ + IdList *pList = pRight->pUsing; + for(j=0; jnId; j++){ + char *zName; /* Name of the term in the USING clause */ + int iLeft; /* Table on the left with matching column name */ + int iLeftCol; /* Column number of matching column on the left */ + int iRightCol; /* Column number of matching column on the right */ + + zName = pList->a[j].zName; + iRightCol = sqlite3ColumnIndex(pRightTab, zName); + if( iRightCol<0 + || !tableAndColumnIndex(pSrc, i+1, zName, &iLeft, &iLeftCol, 0) + ){ + sqlite3ErrorMsg(pParse, "cannot join using column %s - column " + "not present in both tables", zName); + return 1; + } + addWhereTerm(pParse, pSrc, iLeft, iLeftCol, i+1, iRightCol, + isOuter, &p->pWhere); + } + } + } + return 0; +} + +/* +** An instance of this object holds information (beyond pParse and pSelect) +** needed to load the next result row that is to be added to the sorter. +*/ +typedef struct RowLoadInfo RowLoadInfo; +struct RowLoadInfo { + int regResult; /* Store results in array of registers here */ + u8 ecelFlags; /* Flag argument to ExprCodeExprList() */ +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + ExprList *pExtra; /* Extra columns needed by sorter refs */ + int regExtraResult; /* Where to load the extra columns */ +#endif +}; + +/* +** This routine does the work of loading query data into an array of +** registers so that it can be added to the sorter. +*/ +static void innerLoopLoadRow( + Parse *pParse, /* Statement under construction */ + Select *pSelect, /* The query being coded */ + RowLoadInfo *pInfo /* Info needed to complete the row load */ +){ + sqlite3ExprCodeExprList(pParse, pSelect->pEList, pInfo->regResult, + 0, pInfo->ecelFlags); +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + if( pInfo->pExtra ){ + sqlite3ExprCodeExprList(pParse, pInfo->pExtra, pInfo->regExtraResult, 0, 0); + sqlite3ExprListDelete(pParse->db, pInfo->pExtra); + } +#endif +} + +/* +** Code the OP_MakeRecord instruction that generates the entry to be +** added into the sorter. +** +** Return the register in which the result is stored. +*/ +static int makeSorterRecord( + Parse *pParse, + SortCtx *pSort, + Select *pSelect, + int regBase, + int nBase +){ + int nOBSat = pSort->nOBSat; + Vdbe *v = pParse->pVdbe; + int regOut = ++pParse->nMem; + if( pSort->pDeferredRowLoad ){ + innerLoopLoadRow(pParse, pSelect, pSort->pDeferredRowLoad); + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nBase-nOBSat, regOut); + return regOut; +} + +/* +** Generate code that will push the record in registers regData +** through regData+nData-1 onto the sorter. +*/ +static void pushOntoSorter( + Parse *pParse, /* Parser context */ + SortCtx *pSort, /* Information about the ORDER BY clause */ + Select *pSelect, /* The whole SELECT statement */ + int regData, /* First register holding data to be sorted */ + int regOrigData, /* First register holding data before packing */ + int nData, /* Number of elements in the regData data array */ + int nPrefixReg /* No. of reg prior to regData available for use */ +){ + Vdbe *v = pParse->pVdbe; /* Stmt under construction */ + int bSeq = ((pSort->sortFlags & SORTFLAG_UseSorter)==0); + int nExpr = pSort->pOrderBy->nExpr; /* No. of ORDER BY terms */ + int nBase = nExpr + bSeq + nData; /* Fields in sorter record */ + int regBase; /* Regs for sorter record */ + int regRecord = 0; /* Assembled sorter record */ + int nOBSat = pSort->nOBSat; /* ORDER BY terms to skip */ + int op; /* Opcode to add sorter record to sorter */ + int iLimit; /* LIMIT counter */ + int iSkip = 0; /* End of the sorter insert loop */ + + assert( bSeq==0 || bSeq==1 ); + + /* Three cases: + ** (1) The data to be sorted has already been packed into a Record + ** by a prior OP_MakeRecord. In this case nData==1 and regData + ** will be completely unrelated to regOrigData. + ** (2) All output columns are included in the sort record. In that + ** case regData==regOrigData. + ** (3) Some output columns are omitted from the sort record due to + ** the SQLITE_ENABLE_SORTER_REFERENCE optimization, or due to the + ** SQLITE_ECEL_OMITREF optimization, or due to the + ** SortCtx.pDeferredRowLoad optimiation. In any of these cases + ** regOrigData is 0 to prevent this routine from trying to copy + ** values that might not yet exist. + */ + assert( nData==1 || regData==regOrigData || regOrigData==0 ); + + if( nPrefixReg ){ + assert( nPrefixReg==nExpr+bSeq ); + regBase = regData - nPrefixReg; + }else{ + regBase = pParse->nMem + 1; + pParse->nMem += nBase; + } + assert( pSelect->iOffset==0 || pSelect->iLimit!=0 ); + iLimit = pSelect->iOffset ? pSelect->iOffset+1 : pSelect->iLimit; + pSort->labelDone = sqlite3VdbeMakeLabel(pParse); + sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, regOrigData, + SQLITE_ECEL_DUP | (regOrigData? SQLITE_ECEL_REF : 0)); + if( bSeq ){ + sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr); + } + if( nPrefixReg==0 && nData>0 ){ + sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+bSeq, nData); + } + if( nOBSat>0 ){ + int regPrevKey; /* The first nOBSat columns of the previous row */ + int addrFirst; /* Address of the OP_IfNot opcode */ + int addrJmp; /* Address of the OP_Jump opcode */ + VdbeOp *pOp; /* Opcode that opens the sorter */ + int nKey; /* Number of sorting key columns, including OP_Sequence */ + KeyInfo *pKI; /* Original KeyInfo on the sorter table */ + + regRecord = makeSorterRecord(pParse, pSort, pSelect, regBase, nBase); + regPrevKey = pParse->nMem+1; + pParse->nMem += pSort->nOBSat; + nKey = nExpr - pSort->nOBSat + bSeq; + if( bSeq ){ + addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); + }else{ + addrFirst = sqlite3VdbeAddOp1(v, OP_SequenceTest, pSort->iECursor); + } + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_Compare, regPrevKey, regBase, pSort->nOBSat); + pOp = sqlite3VdbeGetOp(v, pSort->addrSortIndex); + if( pParse->db->mallocFailed ) return; + pOp->p2 = nKey + nData; + pKI = pOp->p4.pKeyInfo; + memset(pKI->aSortFlags, 0, pKI->nKeyField); /* Makes OP_Jump testable */ + sqlite3VdbeChangeP4(v, -1, (char*)pKI, P4_KEYINFO); + testcase( pKI->nAllField > pKI->nKeyField+2 ); + pOp->p4.pKeyInfo = sqlite3KeyInfoFromExprList(pParse,pSort->pOrderBy,nOBSat, + pKI->nAllField-pKI->nKeyField-1); + pOp = 0; /* Ensure pOp not used after sqltie3VdbeAddOp3() */ + addrJmp = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp3(v, OP_Jump, addrJmp+1, 0, addrJmp+1); VdbeCoverage(v); + pSort->labelBkOut = sqlite3VdbeMakeLabel(pParse); + pSort->regReturn = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut); + sqlite3VdbeAddOp1(v, OP_ResetSorter, pSort->iECursor); + if( iLimit ){ + sqlite3VdbeAddOp2(v, OP_IfNot, iLimit, pSort->labelDone); + VdbeCoverage(v); + } + sqlite3VdbeJumpHere(v, addrFirst); + sqlite3ExprCodeMove(pParse, regBase, regPrevKey, pSort->nOBSat); + sqlite3VdbeJumpHere(v, addrJmp); + } + if( iLimit ){ + /* At this point the values for the new sorter entry are stored + ** in an array of registers. They need to be composed into a record + ** and inserted into the sorter if either (a) there are currently + ** less than LIMIT+OFFSET items or (b) the new record is smaller than + ** the largest record currently in the sorter. If (b) is true and there + ** are already LIMIT+OFFSET items in the sorter, delete the largest + ** entry before inserting the new one. This way there are never more + ** than LIMIT+OFFSET items in the sorter. + ** + ** If the new record does not need to be inserted into the sorter, + ** jump to the next iteration of the loop. If the pSort->labelOBLopt + ** value is not zero, then it is a label of where to jump. Otherwise, + ** just bypass the row insert logic. See the header comment on the + ** sqlite3WhereOrderByLimitOptLabel() function for additional info. + */ + int iCsr = pSort->iECursor; + sqlite3VdbeAddOp2(v, OP_IfNotZero, iLimit, sqlite3VdbeCurrentAddr(v)+4); + VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_Last, iCsr, 0); + iSkip = sqlite3VdbeAddOp4Int(v, OP_IdxLE, + iCsr, 0, regBase+nOBSat, nExpr-nOBSat); + VdbeCoverage(v); + sqlite3VdbeAddOp1(v, OP_Delete, iCsr); + } + if( regRecord==0 ){ + regRecord = makeSorterRecord(pParse, pSort, pSelect, regBase, nBase); + } + if( pSort->sortFlags & SORTFLAG_UseSorter ){ + op = OP_SorterInsert; + }else{ + op = OP_IdxInsert; + } + sqlite3VdbeAddOp4Int(v, op, pSort->iECursor, regRecord, + regBase+nOBSat, nBase-nOBSat); + if( iSkip ){ + sqlite3VdbeChangeP2(v, iSkip, + pSort->labelOBLopt ? pSort->labelOBLopt : sqlite3VdbeCurrentAddr(v)); + } +} + +/* +** Add code to implement the OFFSET +*/ +static void codeOffset( + Vdbe *v, /* Generate code into this VM */ + int iOffset, /* Register holding the offset counter */ + int iContinue /* Jump here to skip the current record */ +){ + if( iOffset>0 ){ + sqlite3VdbeAddOp3(v, OP_IfPos, iOffset, iContinue, 1); VdbeCoverage(v); + VdbeComment((v, "OFFSET")); + } +} + +/* +** Add code that will check to make sure the array of registers starting at +** iMem form a distinct entry. This is used by both "SELECT DISTINCT ..." and +** distinct aggregates ("SELECT count(DISTINCT ) ..."). Three strategies +** are available. Which is used depends on the value of parameter eTnctType, +** as follows: +** +** WHERE_DISTINCT_UNORDERED/WHERE_DISTINCT_NOOP: +** Build an ephemeral table that contains all entries seen before and +** skip entries which have been seen before. +** +** Parameter iTab is the cursor number of an ephemeral table that must +** be opened before the VM code generated by this routine is executed. +** The ephemeral cursor table is queried for a record identical to the +** record formed by the current array of registers. If one is found, +** jump to VM address addrRepeat. Otherwise, insert a new record into +** the ephemeral cursor and proceed. +** +** The returned value in this case is a copy of parameter iTab. +** +** WHERE_DISTINCT_ORDERED: +** In this case rows are being delivered sorted order. The ephermal +** table is not required. Instead, the current set of values +** is compared against previous row. If they match, the new row +** is not distinct and control jumps to VM address addrRepeat. Otherwise, +** the VM program proceeds with processing the new row. +** +** The returned value in this case is the register number of the first +** in an array of registers used to store the previous result row so that +** it can be compared to the next. The caller must ensure that this +** register is initialized to NULL. (The fixDistinctOpenEph() routine +** will take care of this initialization.) +** +** WHERE_DISTINCT_UNIQUE: +** In this case it has already been determined that the rows are distinct. +** No special action is required. The return value is zero. +** +** Parameter pEList is the list of expressions used to generated the +** contents of each row. It is used by this routine to determine (a) +** how many elements there are in the array of registers and (b) the +** collation sequences that should be used for the comparisons if +** eTnctType is WHERE_DISTINCT_ORDERED. +*/ +static int codeDistinct( + Parse *pParse, /* Parsing and code generating context */ + int eTnctType, /* WHERE_DISTINCT_* value */ + int iTab, /* A sorting index used to test for distinctness */ + int addrRepeat, /* Jump to here if not distinct */ + ExprList *pEList, /* Expression for each element */ + int regElem /* First element */ +){ + int iRet = 0; + int nResultCol = pEList->nExpr; + Vdbe *v = pParse->pVdbe; + + switch( eTnctType ){ + case WHERE_DISTINCT_ORDERED: { + int i; + int iJump; /* Jump destination */ + int regPrev; /* Previous row content */ + + /* Allocate space for the previous row */ + iRet = regPrev = pParse->nMem+1; + pParse->nMem += nResultCol; + + iJump = sqlite3VdbeCurrentAddr(v) + nResultCol; + for(i=0; ia[i].pExpr); + if( idb->mallocFailed ); + sqlite3VdbeAddOp3(v, OP_Copy, regElem, regPrev, nResultCol-1); + break; + } + + case WHERE_DISTINCT_UNIQUE: { + /* nothing to do */ + break; + } + + default: { + int r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, regElem, nResultCol); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regElem, nResultCol, r1); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r1, regElem, nResultCol); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + sqlite3ReleaseTempReg(pParse, r1); + iRet = iTab; + break; + } + } + + return iRet; +} + +/* +** This routine runs after codeDistinct(). It makes necessary +** adjustments to the OP_OpenEphemeral opcode that the codeDistinct() +** routine made use of. This processing must be done separately since +** sometimes codeDistinct is called before the OP_OpenEphemeral is actually +** laid down. +** +** WHERE_DISTINCT_NOOP: +** WHERE_DISTINCT_UNORDERED: +** +** No adjustments necessary. This function is a no-op. +** +** WHERE_DISTINCT_UNIQUE: +** +** The ephemeral table is not needed. So change the +** OP_OpenEphemeral opcode into an OP_Noop. +** +** WHERE_DISTINCT_ORDERED: +** +** The ephemeral table is not needed. But we do need register +** iVal to be initialized to NULL. So change the OP_OpenEphemeral +** into an OP_Null on the iVal register. +*/ +static void fixDistinctOpenEph( + Parse *pParse, /* Parsing and code generating context */ + int eTnctType, /* WHERE_DISTINCT_* value */ + int iVal, /* Value returned by codeDistinct() */ + int iOpenEphAddr /* Address of OP_OpenEphemeral instruction for iTab */ +){ + if( pParse->nErr==0 + && (eTnctType==WHERE_DISTINCT_UNIQUE || eTnctType==WHERE_DISTINCT_ORDERED) + ){ + Vdbe *v = pParse->pVdbe; + sqlite3VdbeChangeToNoop(v, iOpenEphAddr); + if( sqlite3VdbeGetOp(v, iOpenEphAddr+1)->opcode==OP_Explain ){ + sqlite3VdbeChangeToNoop(v, iOpenEphAddr+1); + } + if( eTnctType==WHERE_DISTINCT_ORDERED ){ + /* Change the OP_OpenEphemeral to an OP_Null that sets the MEM_Cleared + ** bit on the first register of the previous value. This will cause the + ** OP_Ne added in codeDistinct() to always fail on the first iteration of + ** the loop even if the first row is all NULLs. */ + VdbeOp *pOp = sqlite3VdbeGetOp(v, iOpenEphAddr); + pOp->opcode = OP_Null; + pOp->p1 = 1; + pOp->p2 = iVal; + } + } +} + +#ifdef SQLITE_ENABLE_SORTER_REFERENCES +/* +** This function is called as part of inner-loop generation for a SELECT +** statement with an ORDER BY that is not optimized by an index. It +** determines the expressions, if any, that the sorter-reference +** optimization should be used for. The sorter-reference optimization +** is used for SELECT queries like: +** +** SELECT a, bigblob FROM t1 ORDER BY a LIMIT 10 +** +** If the optimization is used for expression "bigblob", then instead of +** storing values read from that column in the sorter records, the PK of +** the row from table t1 is stored instead. Then, as records are extracted from +** the sorter to return to the user, the required value of bigblob is +** retrieved directly from table t1. If the values are very large, this +** can be more efficient than storing them directly in the sorter records. +** +** The ExprList_item.bSorterRef flag is set for each expression in pEList +** for which the sorter-reference optimization should be enabled. +** Additionally, the pSort->aDefer[] array is populated with entries +** for all cursors required to evaluate all selected expressions. Finally. +** output variable (*ppExtra) is set to an expression list containing +** expressions for all extra PK values that should be stored in the +** sorter records. +*/ +static void selectExprDefer( + Parse *pParse, /* Leave any error here */ + SortCtx *pSort, /* Sorter context */ + ExprList *pEList, /* Expressions destined for sorter */ + ExprList **ppExtra /* Expressions to append to sorter record */ +){ + int i; + int nDefer = 0; + ExprList *pExtra = 0; + for(i=0; inExpr; i++){ + struct ExprList_item *pItem = &pEList->a[i]; + if( pItem->u.x.iOrderByCol==0 ){ + Expr *pExpr = pItem->pExpr; + Table *pTab; + if( pExpr->op==TK_COLUMN + && pExpr->iColumn>=0 + && ALWAYS( ExprUseYTab(pExpr) ) + && (pTab = pExpr->y.pTab)!=0 + && IsOrdinaryTable(pTab) + && (pTab->aCol[pExpr->iColumn].colFlags & COLFLAG_SORTERREF)!=0 + ){ + int j; + for(j=0; jaDefer[j].iCsr==pExpr->iTable ) break; + } + if( j==nDefer ){ + if( nDefer==ArraySize(pSort->aDefer) ){ + continue; + }else{ + int nKey = 1; + int k; + Index *pPk = 0; + if( !HasRowid(pTab) ){ + pPk = sqlite3PrimaryKeyIndex(pTab); + nKey = pPk->nKeyCol; + } + for(k=0; kiTable = pExpr->iTable; + assert( ExprUseYTab(pNew) ); + pNew->y.pTab = pExpr->y.pTab; + pNew->iColumn = pPk ? pPk->aiColumn[k] : -1; + pExtra = sqlite3ExprListAppend(pParse, pExtra, pNew); + } + } + pSort->aDefer[nDefer].pTab = pExpr->y.pTab; + pSort->aDefer[nDefer].iCsr = pExpr->iTable; + pSort->aDefer[nDefer].nKey = nKey; + nDefer++; + } + } + pItem->bSorterRef = 1; + } + } + } + pSort->nDefer = (u8)nDefer; + *ppExtra = pExtra; +} +#endif + +/* +** This routine generates the code for the inside of the inner loop +** of a SELECT. +** +** If srcTab is negative, then the p->pEList expressions +** are evaluated in order to get the data for this row. If srcTab is +** zero or more, then data is pulled from srcTab and p->pEList is used only +** to get the number of columns and the collation sequence for each column. +*/ +static void selectInnerLoop( + Parse *pParse, /* The parser context */ + Select *p, /* The complete select statement being coded */ + int srcTab, /* Pull data from this table if non-negative */ + SortCtx *pSort, /* If not NULL, info on how to process ORDER BY */ + DistinctCtx *pDistinct, /* If not NULL, info on how to process DISTINCT */ + SelectDest *pDest, /* How to dispose of the results */ + int iContinue, /* Jump here to continue with next row */ + int iBreak /* Jump here to break out of the inner loop */ +){ + Vdbe *v = pParse->pVdbe; + int i; + int hasDistinct; /* True if the DISTINCT keyword is present */ + int eDest = pDest->eDest; /* How to dispose of results */ + int iParm = pDest->iSDParm; /* First argument to disposal method */ + int nResultCol; /* Number of result columns */ + int nPrefixReg = 0; /* Number of extra registers before regResult */ + RowLoadInfo sRowLoadInfo; /* Info for deferred row loading */ + + /* Usually, regResult is the first cell in an array of memory cells + ** containing the current result row. In this case regOrig is set to the + ** same value. However, if the results are being sent to the sorter, the + ** values for any expressions that are also part of the sort-key are omitted + ** from this array. In this case regOrig is set to zero. */ + int regResult; /* Start of memory holding current results */ + int regOrig; /* Start of memory holding full result (or 0) */ + + assert( v ); + assert( p->pEList!=0 ); + hasDistinct = pDistinct ? pDistinct->eTnctType : WHERE_DISTINCT_NOOP; + if( pSort && pSort->pOrderBy==0 ) pSort = 0; + if( pSort==0 && !hasDistinct ){ + assert( iContinue!=0 ); + codeOffset(v, p->iOffset, iContinue); + } + + /* Pull the requested columns. + */ + nResultCol = p->pEList->nExpr; + + if( pDest->iSdst==0 ){ + if( pSort ){ + nPrefixReg = pSort->pOrderBy->nExpr; + if( !(pSort->sortFlags & SORTFLAG_UseSorter) ) nPrefixReg++; + pParse->nMem += nPrefixReg; + } + pDest->iSdst = pParse->nMem+1; + pParse->nMem += nResultCol; + }else if( pDest->iSdst+nResultCol > pParse->nMem ){ + /* This is an error condition that can result, for example, when a SELECT + ** on the right-hand side of an INSERT contains more result columns than + ** there are columns in the table on the left. The error will be caught + ** and reported later. But we need to make sure enough memory is allocated + ** to avoid other spurious errors in the meantime. */ + pParse->nMem += nResultCol; + } + pDest->nSdst = nResultCol; + regOrig = regResult = pDest->iSdst; + if( srcTab>=0 ){ + for(i=0; ipEList->a[i].zEName)); + } + }else if( eDest!=SRT_Exists ){ +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + ExprList *pExtra = 0; +#endif + /* If the destination is an EXISTS(...) expression, the actual + ** values returned by the SELECT are not required. + */ + u8 ecelFlags; /* "ecel" is an abbreviation of "ExprCodeExprList" */ + ExprList *pEList; + if( eDest==SRT_Mem || eDest==SRT_Output || eDest==SRT_Coroutine ){ + ecelFlags = SQLITE_ECEL_DUP; + }else{ + ecelFlags = 0; + } + if( pSort && hasDistinct==0 && eDest!=SRT_EphemTab && eDest!=SRT_Table ){ + /* For each expression in p->pEList that is a copy of an expression in + ** the ORDER BY clause (pSort->pOrderBy), set the associated + ** iOrderByCol value to one more than the index of the ORDER BY + ** expression within the sort-key that pushOntoSorter() will generate. + ** This allows the p->pEList field to be omitted from the sorted record, + ** saving space and CPU cycles. */ + ecelFlags |= (SQLITE_ECEL_OMITREF|SQLITE_ECEL_REF); + + for(i=pSort->nOBSat; ipOrderBy->nExpr; i++){ + int j; + if( (j = pSort->pOrderBy->a[i].u.x.iOrderByCol)>0 ){ + p->pEList->a[j-1].u.x.iOrderByCol = i+1-pSort->nOBSat; + } + } +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + selectExprDefer(pParse, pSort, p->pEList, &pExtra); + if( pExtra && pParse->db->mallocFailed==0 ){ + /* If there are any extra PK columns to add to the sorter records, + ** allocate extra memory cells and adjust the OpenEphemeral + ** instruction to account for the larger records. This is only + ** required if there are one or more WITHOUT ROWID tables with + ** composite primary keys in the SortCtx.aDefer[] array. */ + VdbeOp *pOp = sqlite3VdbeGetOp(v, pSort->addrSortIndex); + pOp->p2 += (pExtra->nExpr - pSort->nDefer); + pOp->p4.pKeyInfo->nAllField += (pExtra->nExpr - pSort->nDefer); + pParse->nMem += pExtra->nExpr; + } +#endif + + /* Adjust nResultCol to account for columns that are omitted + ** from the sorter by the optimizations in this branch */ + pEList = p->pEList; + for(i=0; inExpr; i++){ + if( pEList->a[i].u.x.iOrderByCol>0 +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + || pEList->a[i].bSorterRef +#endif + ){ + nResultCol--; + regOrig = 0; + } + } + + testcase( regOrig ); + testcase( eDest==SRT_Set ); + testcase( eDest==SRT_Mem ); + testcase( eDest==SRT_Coroutine ); + testcase( eDest==SRT_Output ); + assert( eDest==SRT_Set || eDest==SRT_Mem + || eDest==SRT_Coroutine || eDest==SRT_Output + || eDest==SRT_Upfrom ); + } + sRowLoadInfo.regResult = regResult; + sRowLoadInfo.ecelFlags = ecelFlags; +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + sRowLoadInfo.pExtra = pExtra; + sRowLoadInfo.regExtraResult = regResult + nResultCol; + if( pExtra ) nResultCol += pExtra->nExpr; +#endif + if( p->iLimit + && (ecelFlags & SQLITE_ECEL_OMITREF)!=0 + && nPrefixReg>0 + ){ + assert( pSort!=0 ); + assert( hasDistinct==0 ); + pSort->pDeferredRowLoad = &sRowLoadInfo; + regOrig = 0; + }else{ + innerLoopLoadRow(pParse, p, &sRowLoadInfo); + } + } + + /* If the DISTINCT keyword was present on the SELECT statement + ** and this row has been seen before, then do not make this row + ** part of the result. + */ + if( hasDistinct ){ + int eType = pDistinct->eTnctType; + int iTab = pDistinct->tabTnct; + assert( nResultCol==p->pEList->nExpr ); + iTab = codeDistinct(pParse, eType, iTab, iContinue, p->pEList, regResult); + fixDistinctOpenEph(pParse, eType, iTab, pDistinct->addrTnct); + if( pSort==0 ){ + codeOffset(v, p->iOffset, iContinue); + } + } + + switch( eDest ){ + /* In this mode, write each query result to the key of the temporary + ** table iParm. + */ +#ifndef SQLITE_OMIT_COMPOUND_SELECT + case SRT_Union: { + int r1; + r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); + sqlite3ReleaseTempReg(pParse, r1); + break; + } + + /* Construct a record from the query result, but instead of + ** saving that record, use it as a key to delete elements from + ** the temporary table iParm. + */ + case SRT_Except: { + sqlite3VdbeAddOp3(v, OP_IdxDelete, iParm, regResult, nResultCol); + break; + } +#endif /* SQLITE_OMIT_COMPOUND_SELECT */ + + /* Store the result as data using a unique key. + */ + case SRT_Fifo: + case SRT_DistFifo: + case SRT_Table: + case SRT_EphemTab: { + int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1); + testcase( eDest==SRT_Table ); + testcase( eDest==SRT_EphemTab ); + testcase( eDest==SRT_Fifo ); + testcase( eDest==SRT_DistFifo ); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg); +#ifndef SQLITE_OMIT_CTE + if( eDest==SRT_DistFifo ){ + /* If the destination is DistFifo, then cursor (iParm+1) is open + ** on an ephemeral index. If the current row is already present + ** in the index, do not write it to the output. If not, add the + ** current row to the index and proceed with writing it to the + ** output table as well. */ + int addr = sqlite3VdbeCurrentAddr(v) + 4; + sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); + VdbeCoverage(v); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm+1, r1,regResult,nResultCol); + assert( pSort==0 ); + } +#endif + if( pSort ){ + assert( regResult==regOrig ); + pushOntoSorter(pParse, pSort, p, r1+nPrefixReg, regOrig, 1, nPrefixReg); + }else{ + int r2 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, r2); + sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, r2); + sqlite3VdbeChangeP5(v, OPFLAG_APPEND); + sqlite3ReleaseTempReg(pParse, r2); + } + sqlite3ReleaseTempRange(pParse, r1, nPrefixReg+1); + break; + } + + case SRT_Upfrom: { + if( pSort ){ + pushOntoSorter( + pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); + }else{ + int i2 = pDest->iSDParm2; + int r1 = sqlite3GetTempReg(pParse); + + /* If the UPDATE FROM join is an aggregate that matches no rows, it + ** might still be trying to return one row, because that is what + ** aggregates do. Don't record that empty row in the output table. */ + sqlite3VdbeAddOp2(v, OP_IsNull, regResult, iBreak); VdbeCoverage(v); + + sqlite3VdbeAddOp3(v, OP_MakeRecord, + regResult+(i2<0), nResultCol-(i2<0), r1); + if( i2<0 ){ + sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, regResult); + }else{ + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, i2); + } + } + break; + } + +#ifndef SQLITE_OMIT_SUBQUERY + /* If we are creating a set for an "expr IN (SELECT ...)" construct, + ** then there should be a single item on the stack. Write this + ** item into the set table with bogus data. + */ + case SRT_Set: { + if( pSort ){ + /* At first glance you would think we could optimize out the + ** ORDER BY in this case since the order of entries in the set + ** does not matter. But there might be a LIMIT clause, in which + ** case the order does matter */ + pushOntoSorter( + pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); + }else{ + int r1 = sqlite3GetTempReg(pParse); + assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); + sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, + r1, pDest->zAffSdst, nResultCol); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); + sqlite3ReleaseTempReg(pParse, r1); + } + break; + } + + + /* If any row exist in the result set, record that fact and abort. + */ + case SRT_Exists: { + sqlite3VdbeAddOp2(v, OP_Integer, 1, iParm); + /* The LIMIT clause will terminate the loop for us */ + break; + } + + /* If this is a scalar select that is part of an expression, then + ** store the results in the appropriate memory cell or array of + ** memory cells and break out of the scan loop. + */ + case SRT_Mem: { + if( pSort ){ + assert( nResultCol<=pDest->nSdst ); + pushOntoSorter( + pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); + }else{ + assert( nResultCol==pDest->nSdst ); + assert( regResult==iParm ); + /* The LIMIT clause will jump out of the loop for us */ + } + break; + } +#endif /* #ifndef SQLITE_OMIT_SUBQUERY */ + + case SRT_Coroutine: /* Send data to a co-routine */ + case SRT_Output: { /* Return the results */ + testcase( eDest==SRT_Coroutine ); + testcase( eDest==SRT_Output ); + if( pSort ){ + pushOntoSorter(pParse, pSort, p, regResult, regOrig, nResultCol, + nPrefixReg); + }else if( eDest==SRT_Coroutine ){ + sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm); + }else{ + sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, nResultCol); + } + break; + } + +#ifndef SQLITE_OMIT_CTE + /* Write the results into a priority queue that is order according to + ** pDest->pOrderBy (in pSO). pDest->iSDParm (in iParm) is the cursor for an + ** index with pSO->nExpr+2 columns. Build a key using pSO for the first + ** pSO->nExpr columns, then make sure all keys are unique by adding a + ** final OP_Sequence column. The last column is the record as a blob. + */ + case SRT_DistQueue: + case SRT_Queue: { + int nKey; + int r1, r2, r3; + int addrTest = 0; + ExprList *pSO; + pSO = pDest->pOrderBy; + assert( pSO ); + nKey = pSO->nExpr; + r1 = sqlite3GetTempReg(pParse); + r2 = sqlite3GetTempRange(pParse, nKey+2); + r3 = r2+nKey+1; + if( eDest==SRT_DistQueue ){ + /* If the destination is DistQueue, then cursor (iParm+1) is open + ** on a second ephemeral index that holds all values every previously + ** added to the queue. */ + addrTest = sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, 0, + regResult, nResultCol); + VdbeCoverage(v); + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r3); + if( eDest==SRT_DistQueue ){ + sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r3); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + } + for(i=0; ia[i].u.x.iOrderByCol - 1, + r2+i); + } + sqlite3VdbeAddOp2(v, OP_Sequence, iParm, r2+nKey); + sqlite3VdbeAddOp3(v, OP_MakeRecord, r2, nKey+2, r1); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, r2, nKey+2); + if( addrTest ) sqlite3VdbeJumpHere(v, addrTest); + sqlite3ReleaseTempReg(pParse, r1); + sqlite3ReleaseTempRange(pParse, r2, nKey+2); + break; + } +#endif /* SQLITE_OMIT_CTE */ + + + +#if !defined(SQLITE_OMIT_TRIGGER) + /* Discard the results. This is used for SELECT statements inside + ** the body of a TRIGGER. The purpose of such selects is to call + ** user-defined functions that have side effects. We do not care + ** about the actual results of the select. + */ + default: { + assert( eDest==SRT_Discard ); + break; + } +#endif + } + + /* Jump to the end of the loop if the LIMIT is reached. Except, if + ** there is a sorter, in which case the sorter has already limited + ** the output for us. + */ + if( pSort==0 && p->iLimit ){ + sqlite3VdbeAddOp2(v, OP_DecrJumpZero, p->iLimit, iBreak); VdbeCoverage(v); + } +} + +/* +** Allocate a KeyInfo object sufficient for an index of N key columns and +** X extra columns. +*/ +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){ + int nExtra = (N+X)*(sizeof(CollSeq*)+1) - sizeof(CollSeq*); + KeyInfo *p = sqlite3DbMallocRawNN(db, sizeof(KeyInfo) + nExtra); + if( p ){ + p->aSortFlags = (u8*)&p->aColl[N+X]; + p->nKeyField = (u16)N; + p->nAllField = (u16)(N+X); + p->enc = ENC(db); + p->db = db; + p->nRef = 1; + memset(&p[1], 0, nExtra); + }else{ + return (KeyInfo*)sqlite3OomFault(db); + } + return p; +} + +/* +** Deallocate a KeyInfo object +*/ +SQLITE_PRIVATE void sqlite3KeyInfoUnref(KeyInfo *p){ + if( p ){ + assert( p->nRef>0 ); + p->nRef--; + if( p->nRef==0 ) sqlite3DbFreeNN(p->db, p); + } +} + +/* +** Make a new pointer to a KeyInfo object +*/ +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoRef(KeyInfo *p){ + if( p ){ + assert( p->nRef>0 ); + p->nRef++; + } + return p; +} + +#ifdef SQLITE_DEBUG +/* +** Return TRUE if a KeyInfo object can be change. The KeyInfo object +** can only be changed if this is just a single reference to the object. +** +** This routine is used only inside of assert() statements. +*/ +SQLITE_PRIVATE int sqlite3KeyInfoIsWriteable(KeyInfo *p){ return p->nRef==1; } +#endif /* SQLITE_DEBUG */ + +/* +** Given an expression list, generate a KeyInfo structure that records +** the collating sequence for each expression in that expression list. +** +** If the ExprList is an ORDER BY or GROUP BY clause then the resulting +** KeyInfo structure is appropriate for initializing a virtual index to +** implement that clause. If the ExprList is the result set of a SELECT +** then the KeyInfo structure is appropriate for initializing a virtual +** index to implement a DISTINCT test. +** +** Space to hold the KeyInfo structure is obtained from malloc. The calling +** function is responsible for seeing that this structure is eventually +** freed. +*/ +SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoFromExprList( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* Form the KeyInfo object from this ExprList */ + int iStart, /* Begin with this column of pList */ + int nExtra /* Add this many extra columns to the end */ +){ + int nExpr; + KeyInfo *pInfo; + struct ExprList_item *pItem; + sqlite3 *db = pParse->db; + int i; + + nExpr = pList->nExpr; + pInfo = sqlite3KeyInfoAlloc(db, nExpr-iStart, nExtra+1); + if( pInfo ){ + assert( sqlite3KeyInfoIsWriteable(pInfo) ); + for(i=iStart, pItem=pList->a+iStart; iaColl[i-iStart] = sqlite3ExprNNCollSeq(pParse, pItem->pExpr); + pInfo->aSortFlags[i-iStart] = pItem->sortFlags; + } + } + return pInfo; +} + +/* +** Name of the connection operator, used for error messages. +*/ +SQLITE_PRIVATE const char *sqlite3SelectOpName(int id){ + char *z; + switch( id ){ + case TK_ALL: z = "UNION ALL"; break; + case TK_INTERSECT: z = "INTERSECT"; break; + case TK_EXCEPT: z = "EXCEPT"; break; + default: z = "UNION"; break; + } + return z; +} + +#ifndef SQLITE_OMIT_EXPLAIN +/* +** Unless an "EXPLAIN QUERY PLAN" command is being processed, this function +** is a no-op. Otherwise, it adds a single row of output to the EQP result, +** where the caption is of the form: +** +** "USE TEMP B-TREE FOR xxx" +** +** where xxx is one of "DISTINCT", "ORDER BY" or "GROUP BY". Exactly which +** is determined by the zUsage argument. +*/ +static void explainTempTable(Parse *pParse, const char *zUsage){ + ExplainQueryPlan((pParse, 0, "USE TEMP B-TREE FOR %s", zUsage)); +} + +/* +** Assign expression b to lvalue a. A second, no-op, version of this macro +** is provided when SQLITE_OMIT_EXPLAIN is defined. This allows the code +** in sqlite3Select() to assign values to structure member variables that +** only exist if SQLITE_OMIT_EXPLAIN is not defined without polluting the +** code with #ifndef directives. +*/ +# define explainSetInteger(a, b) a = b + +#else +/* No-op versions of the explainXXX() functions and macros. */ +# define explainTempTable(y,z) +# define explainSetInteger(y,z) +#endif + + +/* +** If the inner loop was generated using a non-null pOrderBy argument, +** then the results were placed in a sorter. After the loop is terminated +** we need to run the sorter and output the results. The following +** routine generates the code needed to do that. +*/ +static void generateSortTail( + Parse *pParse, /* Parsing context */ + Select *p, /* The SELECT statement */ + SortCtx *pSort, /* Information on the ORDER BY clause */ + int nColumn, /* Number of columns of data */ + SelectDest *pDest /* Write the sorted results here */ +){ + Vdbe *v = pParse->pVdbe; /* The prepared statement */ + int addrBreak = pSort->labelDone; /* Jump here to exit loop */ + int addrContinue = sqlite3VdbeMakeLabel(pParse);/* Jump here for next cycle */ + int addr; /* Top of output loop. Jump for Next. */ + int addrOnce = 0; + int iTab; + ExprList *pOrderBy = pSort->pOrderBy; + int eDest = pDest->eDest; + int iParm = pDest->iSDParm; + int regRow; + int regRowid; + int iCol; + int nKey; /* Number of key columns in sorter record */ + int iSortTab; /* Sorter cursor to read from */ + int i; + int bSeq; /* True if sorter record includes seq. no. */ + int nRefKey = 0; + struct ExprList_item *aOutEx = p->pEList->a; + + assert( addrBreak<0 ); + if( pSort->labelBkOut ){ + sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut); + sqlite3VdbeGoto(v, addrBreak); + sqlite3VdbeResolveLabel(v, pSort->labelBkOut); + } + +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + /* Open any cursors needed for sorter-reference expressions */ + for(i=0; inDefer; i++){ + Table *pTab = pSort->aDefer[i].pTab; + int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + sqlite3OpenTable(pParse, pSort->aDefer[i].iCsr, iDb, pTab, OP_OpenRead); + nRefKey = MAX(nRefKey, pSort->aDefer[i].nKey); + } +#endif + + iTab = pSort->iECursor; + if( eDest==SRT_Output || eDest==SRT_Coroutine || eDest==SRT_Mem ){ + if( eDest==SRT_Mem && p->iOffset ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, pDest->iSdst); + } + regRowid = 0; + regRow = pDest->iSdst; + }else{ + regRowid = sqlite3GetTempReg(pParse); + if( eDest==SRT_EphemTab || eDest==SRT_Table ){ + regRow = sqlite3GetTempReg(pParse); + nColumn = 0; + }else{ + regRow = sqlite3GetTempRange(pParse, nColumn); + } + } + nKey = pOrderBy->nExpr - pSort->nOBSat; + if( pSort->sortFlags & SORTFLAG_UseSorter ){ + int regSortOut = ++pParse->nMem; + iSortTab = pParse->nTab++; + if( pSort->labelBkOut ){ + addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + } + sqlite3VdbeAddOp3(v, OP_OpenPseudo, iSortTab, regSortOut, + nKey+1+nColumn+nRefKey); + if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce); + addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak); + VdbeCoverage(v); + codeOffset(v, p->iOffset, addrContinue); + sqlite3VdbeAddOp3(v, OP_SorterData, iTab, regSortOut, iSortTab); + bSeq = 0; + }else{ + addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); VdbeCoverage(v); + codeOffset(v, p->iOffset, addrContinue); + iSortTab = iTab; + bSeq = 1; + } + for(i=0, iCol=nKey+bSeq-1; inDefer ){ + int iKey = iCol+1; + int regKey = sqlite3GetTempRange(pParse, nRefKey); + + for(i=0; inDefer; i++){ + int iCsr = pSort->aDefer[i].iCsr; + Table *pTab = pSort->aDefer[i].pTab; + int nKey = pSort->aDefer[i].nKey; + + sqlite3VdbeAddOp1(v, OP_NullRow, iCsr); + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp3(v, OP_Column, iSortTab, iKey++, regKey); + sqlite3VdbeAddOp3(v, OP_SeekRowid, iCsr, + sqlite3VdbeCurrentAddr(v)+1, regKey); + }else{ + int k; + int iJmp; + assert( sqlite3PrimaryKeyIndex(pTab)->nKeyCol==nKey ); + for(k=0; k=0; i--){ +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + if( aOutEx[i].bSorterRef ){ + sqlite3ExprCode(pParse, aOutEx[i].pExpr, regRow+i); + }else +#endif + { + int iRead; + if( aOutEx[i].u.x.iOrderByCol ){ + iRead = aOutEx[i].u.x.iOrderByCol-1; + }else{ + iRead = iCol--; + } + sqlite3VdbeAddOp3(v, OP_Column, iSortTab, iRead, regRow+i); + VdbeComment((v, "%s", aOutEx[i].zEName)); + } + } + switch( eDest ){ + case SRT_Table: + case SRT_EphemTab: { + sqlite3VdbeAddOp3(v, OP_Column, iSortTab, nKey+bSeq, regRow); + sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, iParm, regRow, regRowid); + sqlite3VdbeChangeP5(v, OPFLAG_APPEND); + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case SRT_Set: { + assert( nColumn==sqlite3Strlen30(pDest->zAffSdst) ); + sqlite3VdbeAddOp4(v, OP_MakeRecord, regRow, nColumn, regRowid, + pDest->zAffSdst, nColumn); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, regRowid, regRow, nColumn); + break; + } + case SRT_Mem: { + /* The LIMIT clause will terminate the loop for us */ + break; + } +#endif + case SRT_Upfrom: { + int i2 = pDest->iSDParm2; + int r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_MakeRecord,regRow+(i2<0),nColumn-(i2<0),r1); + if( i2<0 ){ + sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, regRow); + }else{ + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regRow, i2); + } + break; + } + default: { + assert( eDest==SRT_Output || eDest==SRT_Coroutine ); + testcase( eDest==SRT_Output ); + testcase( eDest==SRT_Coroutine ); + if( eDest==SRT_Output ){ + sqlite3VdbeAddOp2(v, OP_ResultRow, pDest->iSdst, nColumn); + }else{ + sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm); + } + break; + } + } + if( regRowid ){ + if( eDest==SRT_Set ){ + sqlite3ReleaseTempRange(pParse, regRow, nColumn); + }else{ + sqlite3ReleaseTempReg(pParse, regRow); + } + sqlite3ReleaseTempReg(pParse, regRowid); + } + /* The bottom of the loop + */ + sqlite3VdbeResolveLabel(v, addrContinue); + if( pSort->sortFlags & SORTFLAG_UseSorter ){ + sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr); VdbeCoverage(v); + }else{ + sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); VdbeCoverage(v); + } + if( pSort->regReturn ) sqlite3VdbeAddOp1(v, OP_Return, pSort->regReturn); + sqlite3VdbeResolveLabel(v, addrBreak); +} + +/* +** Return a pointer to a string containing the 'declaration type' of the +** expression pExpr. The string may be treated as static by the caller. +** +** Also try to estimate the size of the returned value and return that +** result in *pEstWidth. +** +** The declaration type is the exact datatype definition extracted from the +** original CREATE TABLE statement if the expression is a column. The +** declaration type for a ROWID field is INTEGER. Exactly when an expression +** is considered a column can be complex in the presence of subqueries. The +** result-set expression in all of the following SELECT statements is +** considered a column by this function. +** +** SELECT col FROM tbl; +** SELECT (SELECT col FROM tbl; +** SELECT (SELECT col FROM tbl); +** SELECT abc FROM (SELECT col AS abc FROM tbl); +** +** The declaration type for any expression other than a column is NULL. +** +** This routine has either 3 or 6 parameters depending on whether or not +** the SQLITE_ENABLE_COLUMN_METADATA compile-time option is used. +*/ +#ifdef SQLITE_ENABLE_COLUMN_METADATA +# define columnType(A,B,C,D,E) columnTypeImpl(A,B,C,D,E) +#else /* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */ +# define columnType(A,B,C,D,E) columnTypeImpl(A,B) +#endif +static const char *columnTypeImpl( + NameContext *pNC, +#ifndef SQLITE_ENABLE_COLUMN_METADATA + Expr *pExpr +#else + Expr *pExpr, + const char **pzOrigDb, + const char **pzOrigTab, + const char **pzOrigCol +#endif +){ + char const *zType = 0; + int j; +#ifdef SQLITE_ENABLE_COLUMN_METADATA + char const *zOrigDb = 0; + char const *zOrigTab = 0; + char const *zOrigCol = 0; +#endif + + assert( pExpr!=0 ); + assert( pNC->pSrcList!=0 ); + switch( pExpr->op ){ + case TK_COLUMN: { + /* The expression is a column. Locate the table the column is being + ** extracted from in NameContext.pSrcList. This table may be real + ** database table or a subquery. + */ + Table *pTab = 0; /* Table structure column is extracted from */ + Select *pS = 0; /* Select the column is extracted from */ + int iCol = pExpr->iColumn; /* Index of column in pTab */ + while( pNC && !pTab ){ + SrcList *pTabList = pNC->pSrcList; + for(j=0;jnSrc && pTabList->a[j].iCursor!=pExpr->iTable;j++); + if( jnSrc ){ + pTab = pTabList->a[j].pTab; + pS = pTabList->a[j].pSelect; + }else{ + pNC = pNC->pNext; + } + } + + if( pTab==0 ){ + /* At one time, code such as "SELECT new.x" within a trigger would + ** cause this condition to run. Since then, we have restructured how + ** trigger code is generated and so this condition is no longer + ** possible. However, it can still be true for statements like + ** the following: + ** + ** CREATE TABLE t1(col INTEGER); + ** SELECT (SELECT t1.col) FROM FROM t1; + ** + ** when columnType() is called on the expression "t1.col" in the + ** sub-select. In this case, set the column type to NULL, even + ** though it should really be "INTEGER". + ** + ** This is not a problem, as the column type of "t1.col" is never + ** used. When columnType() is called on the expression + ** "(SELECT t1.col)", the correct type is returned (see the TK_SELECT + ** branch below. */ + break; + } + + assert( pTab && ExprUseYTab(pExpr) && pExpr->y.pTab==pTab ); + if( pS ){ + /* The "table" is actually a sub-select or a view in the FROM clause + ** of the SELECT statement. Return the declaration type and origin + ** data for the result-set column of the sub-select. + */ + if( iColpEList->nExpr +#ifdef SQLITE_ALLOW_ROWID_IN_VIEW + && iCol>=0 +#else + && ALWAYS(iCol>=0) +#endif + ){ + /* If iCol is less than zero, then the expression requests the + ** rowid of the sub-select or view. This expression is legal (see + ** test case misc2.2.2) - it always evaluates to NULL. + */ + NameContext sNC; + Expr *p = pS->pEList->a[iCol].pExpr; + sNC.pSrcList = pS->pSrc; + sNC.pNext = pNC; + sNC.pParse = pNC->pParse; + zType = columnType(&sNC, p,&zOrigDb,&zOrigTab,&zOrigCol); + } + }else{ + /* A real table or a CTE table */ + assert( !pS ); +#ifdef SQLITE_ENABLE_COLUMN_METADATA + if( iCol<0 ) iCol = pTab->iPKey; + assert( iCol==XN_ROWID || (iCol>=0 && iColnCol) ); + if( iCol<0 ){ + zType = "INTEGER"; + zOrigCol = "rowid"; + }else{ + zOrigCol = pTab->aCol[iCol].zCnName; + zType = sqlite3ColumnType(&pTab->aCol[iCol],0); + } + zOrigTab = pTab->zName; + if( pNC->pParse && pTab->pSchema ){ + int iDb = sqlite3SchemaToIndex(pNC->pParse->db, pTab->pSchema); + zOrigDb = pNC->pParse->db->aDb[iDb].zDbSName; + } +#else + assert( iCol==XN_ROWID || (iCol>=0 && iColnCol) ); + if( iCol<0 ){ + zType = "INTEGER"; + }else{ + zType = sqlite3ColumnType(&pTab->aCol[iCol],0); + } +#endif + } + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_SELECT: { + /* The expression is a sub-select. Return the declaration type and + ** origin info for the single column in the result set of the SELECT + ** statement. + */ + NameContext sNC; + Select *pS; + Expr *p; + assert( ExprUseXSelect(pExpr) ); + pS = pExpr->x.pSelect; + p = pS->pEList->a[0].pExpr; + sNC.pSrcList = pS->pSrc; + sNC.pNext = pNC; + sNC.pParse = pNC->pParse; + zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol); + break; + } +#endif + } + +#ifdef SQLITE_ENABLE_COLUMN_METADATA + if( pzOrigDb ){ + assert( pzOrigTab && pzOrigCol ); + *pzOrigDb = zOrigDb; + *pzOrigTab = zOrigTab; + *pzOrigCol = zOrigCol; + } +#endif + return zType; +} + +/* +** Generate code that will tell the VDBE the declaration types of columns +** in the result set. +*/ +static void generateColumnTypes( + Parse *pParse, /* Parser context */ + SrcList *pTabList, /* List of tables */ + ExprList *pEList /* Expressions defining the result set */ +){ +#ifndef SQLITE_OMIT_DECLTYPE + Vdbe *v = pParse->pVdbe; + int i; + NameContext sNC; + sNC.pSrcList = pTabList; + sNC.pParse = pParse; + sNC.pNext = 0; + for(i=0; inExpr; i++){ + Expr *p = pEList->a[i].pExpr; + const char *zType; +#ifdef SQLITE_ENABLE_COLUMN_METADATA + const char *zOrigDb = 0; + const char *zOrigTab = 0; + const char *zOrigCol = 0; + zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol); + + /* The vdbe must make its own copy of the column-type and other + ** column specific strings, in case the schema is reset before this + ** virtual machine is deleted. + */ + sqlite3VdbeSetColName(v, i, COLNAME_DATABASE, zOrigDb, SQLITE_TRANSIENT); + sqlite3VdbeSetColName(v, i, COLNAME_TABLE, zOrigTab, SQLITE_TRANSIENT); + sqlite3VdbeSetColName(v, i, COLNAME_COLUMN, zOrigCol, SQLITE_TRANSIENT); +#else + zType = columnType(&sNC, p, 0, 0, 0); +#endif + sqlite3VdbeSetColName(v, i, COLNAME_DECLTYPE, zType, SQLITE_TRANSIENT); + } +#endif /* !defined(SQLITE_OMIT_DECLTYPE) */ +} + + +/* +** Compute the column names for a SELECT statement. +** +** The only guarantee that SQLite makes about column names is that if the +** column has an AS clause assigning it a name, that will be the name used. +** That is the only documented guarantee. However, countless applications +** developed over the years have made baseless assumptions about column names +** and will break if those assumptions changes. Hence, use extreme caution +** when modifying this routine to avoid breaking legacy. +** +** See Also: sqlite3ColumnsFromExprList() +** +** The PRAGMA short_column_names and PRAGMA full_column_names settings are +** deprecated. The default setting is short=ON, full=OFF. 99.9% of all +** applications should operate this way. Nevertheless, we need to support the +** other modes for legacy: +** +** short=OFF, full=OFF: Column name is the text of the expression has it +** originally appears in the SELECT statement. In +** other words, the zSpan of the result expression. +** +** short=ON, full=OFF: (This is the default setting). If the result +** refers directly to a table column, then the +** result column name is just the table column +** name: COLUMN. Otherwise use zSpan. +** +** full=ON, short=ANY: If the result refers directly to a table column, +** then the result column name with the table name +** prefix, ex: TABLE.COLUMN. Otherwise use zSpan. +*/ +SQLITE_PRIVATE void sqlite3GenerateColumnNames( + Parse *pParse, /* Parser context */ + Select *pSelect /* Generate column names for this SELECT statement */ +){ + Vdbe *v = pParse->pVdbe; + int i; + Table *pTab; + SrcList *pTabList; + ExprList *pEList; + sqlite3 *db = pParse->db; + int fullName; /* TABLE.COLUMN if no AS clause and is a direct table ref */ + int srcName; /* COLUMN or TABLE.COLUMN if no AS clause and is direct */ + +#ifndef SQLITE_OMIT_EXPLAIN + /* If this is an EXPLAIN, skip this step */ + if( pParse->explain ){ + return; + } +#endif + + if( pParse->colNamesSet ) return; + /* Column names are determined by the left-most term of a compound select */ + while( pSelect->pPrior ) pSelect = pSelect->pPrior; + SELECTTRACE(1,pParse,pSelect,("generating column names\n")); + pTabList = pSelect->pSrc; + pEList = pSelect->pEList; + assert( v!=0 ); + assert( pTabList!=0 ); + pParse->colNamesSet = 1; + fullName = (db->flags & SQLITE_FullColNames)!=0; + srcName = (db->flags & SQLITE_ShortColNames)!=0 || fullName; + sqlite3VdbeSetNumCols(v, pEList->nExpr); + for(i=0; inExpr; i++){ + Expr *p = pEList->a[i].pExpr; + + assert( p!=0 ); + assert( p->op!=TK_AGG_COLUMN ); /* Agg processing has not run yet */ + assert( p->op!=TK_COLUMN + || (ExprUseYTab(p) && p->y.pTab!=0) ); /* Covering idx not yet coded */ + if( pEList->a[i].zEName && pEList->a[i].eEName==ENAME_NAME ){ + /* An AS clause always takes first priority */ + char *zName = pEList->a[i].zEName; + sqlite3VdbeSetColName(v, i, COLNAME_NAME, zName, SQLITE_TRANSIENT); + }else if( srcName && p->op==TK_COLUMN ){ + char *zCol; + int iCol = p->iColumn; + pTab = p->y.pTab; + assert( pTab!=0 ); + if( iCol<0 ) iCol = pTab->iPKey; + assert( iCol==-1 || (iCol>=0 && iColnCol) ); + if( iCol<0 ){ + zCol = "rowid"; + }else{ + zCol = pTab->aCol[iCol].zCnName; + } + if( fullName ){ + char *zName = 0; + zName = sqlite3MPrintf(db, "%s.%s", pTab->zName, zCol); + sqlite3VdbeSetColName(v, i, COLNAME_NAME, zName, SQLITE_DYNAMIC); + }else{ + sqlite3VdbeSetColName(v, i, COLNAME_NAME, zCol, SQLITE_TRANSIENT); + } + }else{ + const char *z = pEList->a[i].zEName; + z = z==0 ? sqlite3MPrintf(db, "column%d", i+1) : sqlite3DbStrDup(db, z); + sqlite3VdbeSetColName(v, i, COLNAME_NAME, z, SQLITE_DYNAMIC); + } + } + generateColumnTypes(pParse, pTabList, pEList); +} + +/* +** Given an expression list (which is really the list of expressions +** that form the result set of a SELECT statement) compute appropriate +** column names for a table that would hold the expression list. +** +** All column names will be unique. +** +** Only the column names are computed. Column.zType, Column.zColl, +** and other fields of Column are zeroed. +** +** Return SQLITE_OK on success. If a memory allocation error occurs, +** store NULL in *paCol and 0 in *pnCol and return SQLITE_NOMEM. +** +** The only guarantee that SQLite makes about column names is that if the +** column has an AS clause assigning it a name, that will be the name used. +** That is the only documented guarantee. However, countless applications +** developed over the years have made baseless assumptions about column names +** and will break if those assumptions changes. Hence, use extreme caution +** when modifying this routine to avoid breaking legacy. +** +** See Also: sqlite3GenerateColumnNames() +*/ +SQLITE_PRIVATE int sqlite3ColumnsFromExprList( + Parse *pParse, /* Parsing context */ + ExprList *pEList, /* Expr list from which to derive column names */ + i16 *pnCol, /* Write the number of columns here */ + Column **paCol /* Write the new column list here */ +){ + sqlite3 *db = pParse->db; /* Database connection */ + int i, j; /* Loop counters */ + u32 cnt; /* Index added to make the name unique */ + Column *aCol, *pCol; /* For looping over result columns */ + int nCol; /* Number of columns in the result set */ + char *zName; /* Column name */ + int nName; /* Size of name in zName[] */ + Hash ht; /* Hash table of column names */ + Table *pTab; + + sqlite3HashInit(&ht); + if( pEList ){ + nCol = pEList->nExpr; + aCol = sqlite3DbMallocZero(db, sizeof(aCol[0])*nCol); + testcase( aCol==0 ); + if( NEVER(nCol>32767) ) nCol = 32767; + }else{ + nCol = 0; + aCol = 0; + } + assert( nCol==(i16)nCol ); + *pnCol = nCol; + *paCol = aCol; + + for(i=0, pCol=aCol; imallocFailed; i++, pCol++){ + /* Get an appropriate name for the column + */ + if( (zName = pEList->a[i].zEName)!=0 && pEList->a[i].eEName==ENAME_NAME ){ + /* If the column contains an "AS " phrase, use as the name */ + }else{ + Expr *pColExpr = sqlite3ExprSkipCollateAndLikely(pEList->a[i].pExpr); + while( ALWAYS(pColExpr!=0) && pColExpr->op==TK_DOT ){ + pColExpr = pColExpr->pRight; + assert( pColExpr!=0 ); + } + if( pColExpr->op==TK_COLUMN + && ALWAYS( ExprUseYTab(pColExpr) ) + && (pTab = pColExpr->y.pTab)!=0 + ){ + /* For columns use the column name name */ + int iCol = pColExpr->iColumn; + if( iCol<0 ) iCol = pTab->iPKey; + zName = iCol>=0 ? pTab->aCol[iCol].zCnName : "rowid"; + }else if( pColExpr->op==TK_ID ){ + assert( !ExprHasProperty(pColExpr, EP_IntValue) ); + zName = pColExpr->u.zToken; + }else{ + /* Use the original text of the column expression as its name */ + zName = pEList->a[i].zEName; + } + } + if( zName && !sqlite3IsTrueOrFalse(zName) ){ + zName = sqlite3DbStrDup(db, zName); + }else{ + zName = sqlite3MPrintf(db,"column%d",i+1); + } + + /* Make sure the column name is unique. If the name is not unique, + ** append an integer to the name so that it becomes unique. + */ + cnt = 0; + while( zName && sqlite3HashFind(&ht, zName)!=0 ){ + nName = sqlite3Strlen30(zName); + if( nName>0 ){ + for(j=nName-1; j>0 && sqlite3Isdigit(zName[j]); j--){} + if( zName[j]==':' ) nName = j; + } + zName = sqlite3MPrintf(db, "%.*z:%u", nName, zName, ++cnt); + if( cnt>3 ) sqlite3_randomness(sizeof(cnt), &cnt); + } + pCol->zCnName = zName; + pCol->hName = sqlite3StrIHash(zName); + sqlite3ColumnPropertiesFromName(0, pCol); + if( zName && sqlite3HashInsert(&ht, zName, pCol)==pCol ){ + sqlite3OomFault(db); + } + } + sqlite3HashClear(&ht); + if( db->mallocFailed ){ + for(j=0; jdb; + NameContext sNC; + Column *pCol; + CollSeq *pColl; + int i; + Expr *p; + struct ExprList_item *a; + + assert( pSelect!=0 ); + assert( (pSelect->selFlags & SF_Resolved)!=0 ); + assert( pTab->nCol==pSelect->pEList->nExpr || db->mallocFailed ); + if( db->mallocFailed ) return; + memset(&sNC, 0, sizeof(sNC)); + sNC.pSrcList = pSelect->pSrc; + a = pSelect->pEList->a; + for(i=0, pCol=pTab->aCol; inCol; i++, pCol++){ + const char *zType; + i64 n, m; + pTab->tabFlags |= (pCol->colFlags & COLFLAG_NOINSERT); + p = a[i].pExpr; + zType = columnType(&sNC, p, 0, 0, 0); + /* pCol->szEst = ... // Column size est for SELECT tables never used */ + pCol->affinity = sqlite3ExprAffinity(p); + if( zType ){ + m = sqlite3Strlen30(zType); + n = sqlite3Strlen30(pCol->zCnName); + pCol->zCnName = sqlite3DbReallocOrFree(db, pCol->zCnName, n+m+2); + if( pCol->zCnName ){ + memcpy(&pCol->zCnName[n+1], zType, m+1); + pCol->colFlags |= COLFLAG_HASTYPE; + }else{ + testcase( pCol->colFlags & COLFLAG_HASTYPE ); + pCol->colFlags &= ~(COLFLAG_HASTYPE|COLFLAG_HASCOLL); + } + } + if( pCol->affinity<=SQLITE_AFF_NONE ) pCol->affinity = aff; + pColl = sqlite3ExprCollSeq(pParse, p); + if( pColl ){ + assert( pTab->pIndex==0 ); + sqlite3ColumnSetColl(db, pCol, pColl->zName); + } + } + pTab->szTabRow = 1; /* Any non-zero value works */ +} + +/* +** Given a SELECT statement, generate a Table structure that describes +** the result set of that SELECT. +*/ +SQLITE_PRIVATE Table *sqlite3ResultSetOfSelect(Parse *pParse, Select *pSelect, char aff){ + Table *pTab; + sqlite3 *db = pParse->db; + u64 savedFlags; + + savedFlags = db->flags; + db->flags &= ~(u64)SQLITE_FullColNames; + db->flags |= SQLITE_ShortColNames; + sqlite3SelectPrep(pParse, pSelect, 0); + db->flags = savedFlags; + if( pParse->nErr ) return 0; + while( pSelect->pPrior ) pSelect = pSelect->pPrior; + pTab = sqlite3DbMallocZero(db, sizeof(Table) ); + if( pTab==0 ){ + return 0; + } + pTab->nTabRef = 1; + pTab->zName = 0; + pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); + sqlite3ColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol); + sqlite3SelectAddColumnTypeAndCollation(pParse, pTab, pSelect, aff); + pTab->iPKey = -1; + if( db->mallocFailed ){ + sqlite3DeleteTable(db, pTab); + return 0; + } + return pTab; +} + +/* +** Get a VDBE for the given parser context. Create a new one if necessary. +** If an error occurs, return NULL and leave a message in pParse. +*/ +SQLITE_PRIVATE Vdbe *sqlite3GetVdbe(Parse *pParse){ + if( pParse->pVdbe ){ + return pParse->pVdbe; + } + if( pParse->pToplevel==0 + && OptimizationEnabled(pParse->db,SQLITE_FactorOutConst) + ){ + pParse->okConstFactor = 1; + } + return sqlite3VdbeCreate(pParse); +} + + +/* +** Compute the iLimit and iOffset fields of the SELECT based on the +** pLimit expressions. pLimit->pLeft and pLimit->pRight hold the expressions +** that appear in the original SQL statement after the LIMIT and OFFSET +** keywords. Or NULL if those keywords are omitted. iLimit and iOffset +** are the integer memory register numbers for counters used to compute +** the limit and offset. If there is no limit and/or offset, then +** iLimit and iOffset are negative. +** +** This routine changes the values of iLimit and iOffset only if +** a limit or offset is defined by pLimit->pLeft and pLimit->pRight. iLimit +** and iOffset should have been preset to appropriate default values (zero) +** prior to calling this routine. +** +** The iOffset register (if it exists) is initialized to the value +** of the OFFSET. The iLimit register is initialized to LIMIT. Register +** iOffset+1 is initialized to LIMIT+OFFSET. +** +** Only if pLimit->pLeft!=0 do the limit registers get +** redefined. The UNION ALL operator uses this property to force +** the reuse of the same limit and offset registers across multiple +** SELECT statements. +*/ +static void computeLimitRegisters(Parse *pParse, Select *p, int iBreak){ + Vdbe *v = 0; + int iLimit = 0; + int iOffset; + int n; + Expr *pLimit = p->pLimit; + + if( p->iLimit ) return; + + /* + ** "LIMIT -1" always shows all rows. There is some + ** controversy about what the correct behavior should be. + ** The current implementation interprets "LIMIT 0" to mean + ** no rows. + */ + if( pLimit ){ + assert( pLimit->op==TK_LIMIT ); + assert( pLimit->pLeft!=0 ); + p->iLimit = iLimit = ++pParse->nMem; + v = sqlite3GetVdbe(pParse); + assert( v!=0 ); + if( sqlite3ExprIsInteger(pLimit->pLeft, &n) ){ + sqlite3VdbeAddOp2(v, OP_Integer, n, iLimit); + VdbeComment((v, "LIMIT counter")); + if( n==0 ){ + sqlite3VdbeGoto(v, iBreak); + }else if( n>=0 && p->nSelectRow>sqlite3LogEst((u64)n) ){ + p->nSelectRow = sqlite3LogEst((u64)n); + p->selFlags |= SF_FixedLimit; + } + }else{ + sqlite3ExprCode(pParse, pLimit->pLeft, iLimit); + sqlite3VdbeAddOp1(v, OP_MustBeInt, iLimit); VdbeCoverage(v); + VdbeComment((v, "LIMIT counter")); + sqlite3VdbeAddOp2(v, OP_IfNot, iLimit, iBreak); VdbeCoverage(v); + } + if( pLimit->pRight ){ + p->iOffset = iOffset = ++pParse->nMem; + pParse->nMem++; /* Allocate an extra register for limit+offset */ + sqlite3ExprCode(pParse, pLimit->pRight, iOffset); + sqlite3VdbeAddOp1(v, OP_MustBeInt, iOffset); VdbeCoverage(v); + VdbeComment((v, "OFFSET counter")); + sqlite3VdbeAddOp3(v, OP_OffsetLimit, iLimit, iOffset+1, iOffset); + VdbeComment((v, "LIMIT+OFFSET")); + } + } +} + +#ifndef SQLITE_OMIT_COMPOUND_SELECT +/* +** Return the appropriate collating sequence for the iCol-th column of +** the result set for the compound-select statement "p". Return NULL if +** the column has no default collating sequence. +** +** The collating sequence for the compound select is taken from the +** left-most term of the select that has a collating sequence. +*/ +static CollSeq *multiSelectCollSeq(Parse *pParse, Select *p, int iCol){ + CollSeq *pRet; + if( p->pPrior ){ + pRet = multiSelectCollSeq(pParse, p->pPrior, iCol); + }else{ + pRet = 0; + } + assert( iCol>=0 ); + /* iCol must be less than p->pEList->nExpr. Otherwise an error would + ** have been thrown during name resolution and we would not have gotten + ** this far */ + if( pRet==0 && ALWAYS(iColpEList->nExpr) ){ + pRet = sqlite3ExprCollSeq(pParse, p->pEList->a[iCol].pExpr); + } + return pRet; +} + +/* +** The select statement passed as the second parameter is a compound SELECT +** with an ORDER BY clause. This function allocates and returns a KeyInfo +** structure suitable for implementing the ORDER BY. +** +** Space to hold the KeyInfo structure is obtained from malloc. The calling +** function is responsible for ensuring that this structure is eventually +** freed. +*/ +static KeyInfo *multiSelectOrderByKeyInfo(Parse *pParse, Select *p, int nExtra){ + ExprList *pOrderBy = p->pOrderBy; + int nOrderBy = ALWAYS(pOrderBy!=0) ? pOrderBy->nExpr : 0; + sqlite3 *db = pParse->db; + KeyInfo *pRet = sqlite3KeyInfoAlloc(db, nOrderBy+nExtra, 1); + if( pRet ){ + int i; + for(i=0; ia[i]; + Expr *pTerm = pItem->pExpr; + CollSeq *pColl; + + if( pTerm->flags & EP_Collate ){ + pColl = sqlite3ExprCollSeq(pParse, pTerm); + }else{ + pColl = multiSelectCollSeq(pParse, p, pItem->u.x.iOrderByCol-1); + if( pColl==0 ) pColl = db->pDfltColl; + pOrderBy->a[i].pExpr = + sqlite3ExprAddCollateString(pParse, pTerm, pColl->zName); + } + assert( sqlite3KeyInfoIsWriteable(pRet) ); + pRet->aColl[i] = pColl; + pRet->aSortFlags[i] = pOrderBy->a[i].sortFlags; + } + } + + return pRet; +} + +#ifndef SQLITE_OMIT_CTE +/* +** This routine generates VDBE code to compute the content of a WITH RECURSIVE +** query of the form: +** +** AS ( UNION [ALL] ) +** \___________/ \_______________/ +** p->pPrior p +** +** +** There is exactly one reference to the recursive-table in the FROM clause +** of recursive-query, marked with the SrcList->a[].fg.isRecursive flag. +** +** The setup-query runs once to generate an initial set of rows that go +** into a Queue table. Rows are extracted from the Queue table one by +** one. Each row extracted from Queue is output to pDest. Then the single +** extracted row (now in the iCurrent table) becomes the content of the +** recursive-table for a recursive-query run. The output of the recursive-query +** is added back into the Queue table. Then another row is extracted from Queue +** and the iteration continues until the Queue table is empty. +** +** If the compound query operator is UNION then no duplicate rows are ever +** inserted into the Queue table. The iDistinct table keeps a copy of all rows +** that have ever been inserted into Queue and causes duplicates to be +** discarded. If the operator is UNION ALL, then duplicates are allowed. +** +** If the query has an ORDER BY, then entries in the Queue table are kept in +** ORDER BY order and the first entry is extracted for each cycle. Without +** an ORDER BY, the Queue table is just a FIFO. +** +** If a LIMIT clause is provided, then the iteration stops after LIMIT rows +** have been output to pDest. A LIMIT of zero means to output no rows and a +** negative LIMIT means to output all rows. If there is also an OFFSET clause +** with a positive value, then the first OFFSET outputs are discarded rather +** than being sent to pDest. The LIMIT count does not begin until after OFFSET +** rows have been skipped. +*/ +static void generateWithRecursiveQuery( + Parse *pParse, /* Parsing context */ + Select *p, /* The recursive SELECT to be coded */ + SelectDest *pDest /* What to do with query results */ +){ + SrcList *pSrc = p->pSrc; /* The FROM clause of the recursive query */ + int nCol = p->pEList->nExpr; /* Number of columns in the recursive table */ + Vdbe *v = pParse->pVdbe; /* The prepared statement under construction */ + Select *pSetup; /* The setup query */ + Select *pFirstRec; /* Left-most recursive term */ + int addrTop; /* Top of the loop */ + int addrCont, addrBreak; /* CONTINUE and BREAK addresses */ + int iCurrent = 0; /* The Current table */ + int regCurrent; /* Register holding Current table */ + int iQueue; /* The Queue table */ + int iDistinct = 0; /* To ensure unique results if UNION */ + int eDest = SRT_Fifo; /* How to write to Queue */ + SelectDest destQueue; /* SelectDest targetting the Queue table */ + int i; /* Loop counter */ + int rc; /* Result code */ + ExprList *pOrderBy; /* The ORDER BY clause */ + Expr *pLimit; /* Saved LIMIT and OFFSET */ + int regLimit, regOffset; /* Registers used by LIMIT and OFFSET */ + +#ifndef SQLITE_OMIT_WINDOWFUNC + if( p->pWin ){ + sqlite3ErrorMsg(pParse, "cannot use window functions in recursive queries"); + return; + } +#endif + + /* Obtain authorization to do a recursive query */ + if( sqlite3AuthCheck(pParse, SQLITE_RECURSIVE, 0, 0, 0) ) return; + + /* Process the LIMIT and OFFSET clauses, if they exist */ + addrBreak = sqlite3VdbeMakeLabel(pParse); + p->nSelectRow = 320; /* 4 billion rows */ + computeLimitRegisters(pParse, p, addrBreak); + pLimit = p->pLimit; + regLimit = p->iLimit; + regOffset = p->iOffset; + p->pLimit = 0; + p->iLimit = p->iOffset = 0; + pOrderBy = p->pOrderBy; + + /* Locate the cursor number of the Current table */ + for(i=0; ALWAYS(inSrc); i++){ + if( pSrc->a[i].fg.isRecursive ){ + iCurrent = pSrc->a[i].iCursor; + break; + } + } + + /* Allocate cursors numbers for Queue and Distinct. The cursor number for + ** the Distinct table must be exactly one greater than Queue in order + ** for the SRT_DistFifo and SRT_DistQueue destinations to work. */ + iQueue = pParse->nTab++; + if( p->op==TK_UNION ){ + eDest = pOrderBy ? SRT_DistQueue : SRT_DistFifo; + iDistinct = pParse->nTab++; + }else{ + eDest = pOrderBy ? SRT_Queue : SRT_Fifo; + } + sqlite3SelectDestInit(&destQueue, eDest, iQueue); + + /* Allocate cursors for Current, Queue, and Distinct. */ + regCurrent = ++pParse->nMem; + sqlite3VdbeAddOp3(v, OP_OpenPseudo, iCurrent, regCurrent, nCol); + if( pOrderBy ){ + KeyInfo *pKeyInfo = multiSelectOrderByKeyInfo(pParse, p, 1); + sqlite3VdbeAddOp4(v, OP_OpenEphemeral, iQueue, pOrderBy->nExpr+2, 0, + (char*)pKeyInfo, P4_KEYINFO); + destQueue.pOrderBy = pOrderBy; + }else{ + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iQueue, nCol); + } + VdbeComment((v, "Queue table")); + if( iDistinct ){ + p->addrOpenEphm[0] = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iDistinct, 0); + p->selFlags |= SF_UsesEphemeral; + } + + /* Detach the ORDER BY clause from the compound SELECT */ + p->pOrderBy = 0; + + /* Figure out how many elements of the compound SELECT are part of the + ** recursive query. Make sure no recursive elements use aggregate + ** functions. Mark the recursive elements as UNION ALL even if they + ** are really UNION because the distinctness will be enforced by the + ** iDistinct table. pFirstRec is left pointing to the left-most + ** recursive term of the CTE. + */ + for(pFirstRec=p; ALWAYS(pFirstRec!=0); pFirstRec=pFirstRec->pPrior){ + if( pFirstRec->selFlags & SF_Aggregate ){ + sqlite3ErrorMsg(pParse, "recursive aggregate queries not supported"); + goto end_of_recursive_query; + } + pFirstRec->op = TK_ALL; + if( (pFirstRec->pPrior->selFlags & SF_Recursive)==0 ) break; + } + + /* Store the results of the setup-query in Queue. */ + pSetup = pFirstRec->pPrior; + pSetup->pNext = 0; + ExplainQueryPlan((pParse, 1, "SETUP")); + rc = sqlite3Select(pParse, pSetup, &destQueue); + pSetup->pNext = p; + if( rc ) goto end_of_recursive_query; + + /* Find the next row in the Queue and output that row */ + addrTop = sqlite3VdbeAddOp2(v, OP_Rewind, iQueue, addrBreak); VdbeCoverage(v); + + /* Transfer the next row in Queue over to Current */ + sqlite3VdbeAddOp1(v, OP_NullRow, iCurrent); /* To reset column cache */ + if( pOrderBy ){ + sqlite3VdbeAddOp3(v, OP_Column, iQueue, pOrderBy->nExpr+1, regCurrent); + }else{ + sqlite3VdbeAddOp2(v, OP_RowData, iQueue, regCurrent); + } + sqlite3VdbeAddOp1(v, OP_Delete, iQueue); + + /* Output the single row in Current */ + addrCont = sqlite3VdbeMakeLabel(pParse); + codeOffset(v, regOffset, addrCont); + selectInnerLoop(pParse, p, iCurrent, + 0, 0, pDest, addrCont, addrBreak); + if( regLimit ){ + sqlite3VdbeAddOp2(v, OP_DecrJumpZero, regLimit, addrBreak); + VdbeCoverage(v); + } + sqlite3VdbeResolveLabel(v, addrCont); + + /* Execute the recursive SELECT taking the single row in Current as + ** the value for the recursive-table. Store the results in the Queue. + */ + pFirstRec->pPrior = 0; + ExplainQueryPlan((pParse, 1, "RECURSIVE STEP")); + sqlite3Select(pParse, p, &destQueue); + assert( pFirstRec->pPrior==0 ); + pFirstRec->pPrior = pSetup; + + /* Keep running the loop until the Queue is empty */ + sqlite3VdbeGoto(v, addrTop); + sqlite3VdbeResolveLabel(v, addrBreak); + +end_of_recursive_query: + sqlite3ExprListDelete(pParse->db, p->pOrderBy); + p->pOrderBy = pOrderBy; + p->pLimit = pLimit; + return; +} +#endif /* SQLITE_OMIT_CTE */ + +/* Forward references */ +static int multiSelectOrderBy( + Parse *pParse, /* Parsing context */ + Select *p, /* The right-most of SELECTs to be coded */ + SelectDest *pDest /* What to do with query results */ +); + +/* +** Handle the special case of a compound-select that originates from a +** VALUES clause. By handling this as a special case, we avoid deep +** recursion, and thus do not need to enforce the SQLITE_LIMIT_COMPOUND_SELECT +** on a VALUES clause. +** +** Because the Select object originates from a VALUES clause: +** (1) There is no LIMIT or OFFSET or else there is a LIMIT of exactly 1 +** (2) All terms are UNION ALL +** (3) There is no ORDER BY clause +** +** The "LIMIT of exactly 1" case of condition (1) comes about when a VALUES +** clause occurs within scalar expression (ex: "SELECT (VALUES(1),(2),(3))"). +** The sqlite3CodeSubselect will have added the LIMIT 1 clause in tht case. +** Since the limit is exactly 1, we only need to evalutes the left-most VALUES. +*/ +static int multiSelectValues( + Parse *pParse, /* Parsing context */ + Select *p, /* The right-most of SELECTs to be coded */ + SelectDest *pDest /* What to do with query results */ +){ + int nRow = 1; + int rc = 0; + int bShowAll = p->pLimit==0; + assert( p->selFlags & SF_MultiValue ); + do{ + assert( p->selFlags & SF_Values ); + assert( p->op==TK_ALL || (p->op==TK_SELECT && p->pPrior==0) ); + assert( p->pNext==0 || p->pEList->nExpr==p->pNext->pEList->nExpr ); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( p->pWin ) return -1; +#endif + if( p->pPrior==0 ) break; + assert( p->pPrior->pNext==p ); + p = p->pPrior; + nRow += bShowAll; + }while(1); + ExplainQueryPlan((pParse, 0, "SCAN %d CONSTANT ROW%s", nRow, + nRow==1 ? "" : "S")); + while( p ){ + selectInnerLoop(pParse, p, -1, 0, 0, pDest, 1, 1); + if( !bShowAll ) break; + p->nSelectRow = nRow; + p = p->pNext; + } + return rc; +} + +/* +** Return true if the SELECT statement which is known to be the recursive +** part of a recursive CTE still has its anchor terms attached. If the +** anchor terms have already been removed, then return false. +*/ +static int hasAnchor(Select *p){ + while( p && (p->selFlags & SF_Recursive)!=0 ){ p = p->pPrior; } + return p!=0; +} + +/* +** This routine is called to process a compound query form from +** two or more separate queries using UNION, UNION ALL, EXCEPT, or +** INTERSECT +** +** "p" points to the right-most of the two queries. the query on the +** left is p->pPrior. The left query could also be a compound query +** in which case this routine will be called recursively. +** +** The results of the total query are to be written into a destination +** of type eDest with parameter iParm. +** +** Example 1: Consider a three-way compound SQL statement. +** +** SELECT a FROM t1 UNION SELECT b FROM t2 UNION SELECT c FROM t3 +** +** This statement is parsed up as follows: +** +** SELECT c FROM t3 +** | +** `-----> SELECT b FROM t2 +** | +** `------> SELECT a FROM t1 +** +** The arrows in the diagram above represent the Select.pPrior pointer. +** So if this routine is called with p equal to the t3 query, then +** pPrior will be the t2 query. p->op will be TK_UNION in this case. +** +** Notice that because of the way SQLite parses compound SELECTs, the +** individual selects always group from left to right. +*/ +static int multiSelect( + Parse *pParse, /* Parsing context */ + Select *p, /* The right-most of SELECTs to be coded */ + SelectDest *pDest /* What to do with query results */ +){ + int rc = SQLITE_OK; /* Success code from a subroutine */ + Select *pPrior; /* Another SELECT immediately to our left */ + Vdbe *v; /* Generate code to this VDBE */ + SelectDest dest; /* Alternative data destination */ + Select *pDelete = 0; /* Chain of simple selects to delete */ + sqlite3 *db; /* Database connection */ + + /* Make sure there is no ORDER BY or LIMIT clause on prior SELECTs. Only + ** the last (right-most) SELECT in the series may have an ORDER BY or LIMIT. + */ + assert( p && p->pPrior ); /* Calling function guarantees this much */ + assert( (p->selFlags & SF_Recursive)==0 || p->op==TK_ALL || p->op==TK_UNION ); + assert( p->selFlags & SF_Compound ); + db = pParse->db; + pPrior = p->pPrior; + dest = *pDest; + assert( pPrior->pOrderBy==0 ); + assert( pPrior->pLimit==0 ); + + v = sqlite3GetVdbe(pParse); + assert( v!=0 ); /* The VDBE already created by calling function */ + + /* Create the destination temporary table if necessary + */ + if( dest.eDest==SRT_EphemTab ){ + assert( p->pEList ); + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, dest.iSDParm, p->pEList->nExpr); + dest.eDest = SRT_Table; + } + + /* Special handling for a compound-select that originates as a VALUES clause. + */ + if( p->selFlags & SF_MultiValue ){ + rc = multiSelectValues(pParse, p, &dest); + if( rc>=0 ) goto multi_select_end; + rc = SQLITE_OK; + } + + /* Make sure all SELECTs in the statement have the same number of elements + ** in their result sets. + */ + assert( p->pEList && pPrior->pEList ); + assert( p->pEList->nExpr==pPrior->pEList->nExpr ); + +#ifndef SQLITE_OMIT_CTE + if( (p->selFlags & SF_Recursive)!=0 && hasAnchor(p) ){ + generateWithRecursiveQuery(pParse, p, &dest); + }else +#endif + + /* Compound SELECTs that have an ORDER BY clause are handled separately. + */ + if( p->pOrderBy ){ + return multiSelectOrderBy(pParse, p, pDest); + }else{ + +#ifndef SQLITE_OMIT_EXPLAIN + if( pPrior->pPrior==0 ){ + ExplainQueryPlan((pParse, 1, "COMPOUND QUERY")); + ExplainQueryPlan((pParse, 1, "LEFT-MOST SUBQUERY")); + } +#endif + + /* Generate code for the left and right SELECT statements. + */ + switch( p->op ){ + case TK_ALL: { + int addr = 0; + int nLimit = 0; /* Initialize to suppress harmless compiler warning */ + assert( !pPrior->pLimit ); + pPrior->iLimit = p->iLimit; + pPrior->iOffset = p->iOffset; + pPrior->pLimit = p->pLimit; + SELECTTRACE(1, pParse, p, ("multiSelect UNION ALL left...\n")); + rc = sqlite3Select(pParse, pPrior, &dest); + pPrior->pLimit = 0; + if( rc ){ + goto multi_select_end; + } + p->pPrior = 0; + p->iLimit = pPrior->iLimit; + p->iOffset = pPrior->iOffset; + if( p->iLimit ){ + addr = sqlite3VdbeAddOp1(v, OP_IfNot, p->iLimit); VdbeCoverage(v); + VdbeComment((v, "Jump ahead if LIMIT reached")); + if( p->iOffset ){ + sqlite3VdbeAddOp3(v, OP_OffsetLimit, + p->iLimit, p->iOffset+1, p->iOffset); + } + } + ExplainQueryPlan((pParse, 1, "UNION ALL")); + SELECTTRACE(1, pParse, p, ("multiSelect UNION ALL right...\n")); + rc = sqlite3Select(pParse, p, &dest); + testcase( rc!=SQLITE_OK ); + pDelete = p->pPrior; + p->pPrior = pPrior; + p->nSelectRow = sqlite3LogEstAdd(p->nSelectRow, pPrior->nSelectRow); + if( p->pLimit + && sqlite3ExprIsInteger(p->pLimit->pLeft, &nLimit) + && nLimit>0 && p->nSelectRow > sqlite3LogEst((u64)nLimit) + ){ + p->nSelectRow = sqlite3LogEst((u64)nLimit); + } + if( addr ){ + sqlite3VdbeJumpHere(v, addr); + } + break; + } + case TK_EXCEPT: + case TK_UNION: { + int unionTab; /* Cursor number of the temp table holding result */ + u8 op = 0; /* One of the SRT_ operations to apply to self */ + int priorOp; /* The SRT_ operation to apply to prior selects */ + Expr *pLimit; /* Saved values of p->nLimit */ + int addr; + SelectDest uniondest; + + testcase( p->op==TK_EXCEPT ); + testcase( p->op==TK_UNION ); + priorOp = SRT_Union; + if( dest.eDest==priorOp ){ + /* We can reuse a temporary table generated by a SELECT to our + ** right. + */ + assert( p->pLimit==0 ); /* Not allowed on leftward elements */ + unionTab = dest.iSDParm; + }else{ + /* We will need to create our own temporary table to hold the + ** intermediate results. + */ + unionTab = pParse->nTab++; + assert( p->pOrderBy==0 ); + addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, unionTab, 0); + assert( p->addrOpenEphm[0] == -1 ); + p->addrOpenEphm[0] = addr; + findRightmost(p)->selFlags |= SF_UsesEphemeral; + assert( p->pEList ); + } + + + /* Code the SELECT statements to our left + */ + assert( !pPrior->pOrderBy ); + sqlite3SelectDestInit(&uniondest, priorOp, unionTab); + SELECTTRACE(1, pParse, p, ("multiSelect EXCEPT/UNION left...\n")); + rc = sqlite3Select(pParse, pPrior, &uniondest); + if( rc ){ + goto multi_select_end; + } + + /* Code the current SELECT statement + */ + if( p->op==TK_EXCEPT ){ + op = SRT_Except; + }else{ + assert( p->op==TK_UNION ); + op = SRT_Union; + } + p->pPrior = 0; + pLimit = p->pLimit; + p->pLimit = 0; + uniondest.eDest = op; + ExplainQueryPlan((pParse, 1, "%s USING TEMP B-TREE", + sqlite3SelectOpName(p->op))); + SELECTTRACE(1, pParse, p, ("multiSelect EXCEPT/UNION right...\n")); + rc = sqlite3Select(pParse, p, &uniondest); + testcase( rc!=SQLITE_OK ); + assert( p->pOrderBy==0 ); + pDelete = p->pPrior; + p->pPrior = pPrior; + p->pOrderBy = 0; + if( p->op==TK_UNION ){ + p->nSelectRow = sqlite3LogEstAdd(p->nSelectRow, pPrior->nSelectRow); + } + sqlite3ExprDelete(db, p->pLimit); + p->pLimit = pLimit; + p->iLimit = 0; + p->iOffset = 0; + + /* Convert the data in the temporary table into whatever form + ** it is that we currently need. + */ + assert( unionTab==dest.iSDParm || dest.eDest!=priorOp ); + assert( p->pEList || db->mallocFailed ); + if( dest.eDest!=priorOp && db->mallocFailed==0 ){ + int iCont, iBreak, iStart; + iBreak = sqlite3VdbeMakeLabel(pParse); + iCont = sqlite3VdbeMakeLabel(pParse); + computeLimitRegisters(pParse, p, iBreak); + sqlite3VdbeAddOp2(v, OP_Rewind, unionTab, iBreak); VdbeCoverage(v); + iStart = sqlite3VdbeCurrentAddr(v); + selectInnerLoop(pParse, p, unionTab, + 0, 0, &dest, iCont, iBreak); + sqlite3VdbeResolveLabel(v, iCont); + sqlite3VdbeAddOp2(v, OP_Next, unionTab, iStart); VdbeCoverage(v); + sqlite3VdbeResolveLabel(v, iBreak); + sqlite3VdbeAddOp2(v, OP_Close, unionTab, 0); + } + break; + } + default: assert( p->op==TK_INTERSECT ); { + int tab1, tab2; + int iCont, iBreak, iStart; + Expr *pLimit; + int addr; + SelectDest intersectdest; + int r1; + + /* INTERSECT is different from the others since it requires + ** two temporary tables. Hence it has its own case. Begin + ** by allocating the tables we will need. + */ + tab1 = pParse->nTab++; + tab2 = pParse->nTab++; + assert( p->pOrderBy==0 ); + + addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, tab1, 0); + assert( p->addrOpenEphm[0] == -1 ); + p->addrOpenEphm[0] = addr; + findRightmost(p)->selFlags |= SF_UsesEphemeral; + assert( p->pEList ); + + /* Code the SELECTs to our left into temporary table "tab1". + */ + sqlite3SelectDestInit(&intersectdest, SRT_Union, tab1); + SELECTTRACE(1, pParse, p, ("multiSelect INTERSECT left...\n")); + rc = sqlite3Select(pParse, pPrior, &intersectdest); + if( rc ){ + goto multi_select_end; + } + + /* Code the current SELECT into temporary table "tab2" + */ + addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, tab2, 0); + assert( p->addrOpenEphm[1] == -1 ); + p->addrOpenEphm[1] = addr; + p->pPrior = 0; + pLimit = p->pLimit; + p->pLimit = 0; + intersectdest.iSDParm = tab2; + ExplainQueryPlan((pParse, 1, "%s USING TEMP B-TREE", + sqlite3SelectOpName(p->op))); + SELECTTRACE(1, pParse, p, ("multiSelect INTERSECT right...\n")); + rc = sqlite3Select(pParse, p, &intersectdest); + testcase( rc!=SQLITE_OK ); + pDelete = p->pPrior; + p->pPrior = pPrior; + if( p->nSelectRow>pPrior->nSelectRow ){ + p->nSelectRow = pPrior->nSelectRow; + } + sqlite3ExprDelete(db, p->pLimit); + p->pLimit = pLimit; + + /* Generate code to take the intersection of the two temporary + ** tables. + */ + if( rc ) break; + assert( p->pEList ); + iBreak = sqlite3VdbeMakeLabel(pParse); + iCont = sqlite3VdbeMakeLabel(pParse); + computeLimitRegisters(pParse, p, iBreak); + sqlite3VdbeAddOp2(v, OP_Rewind, tab1, iBreak); VdbeCoverage(v); + r1 = sqlite3GetTempReg(pParse); + iStart = sqlite3VdbeAddOp2(v, OP_RowData, tab1, r1); + sqlite3VdbeAddOp4Int(v, OP_NotFound, tab2, iCont, r1, 0); + VdbeCoverage(v); + sqlite3ReleaseTempReg(pParse, r1); + selectInnerLoop(pParse, p, tab1, + 0, 0, &dest, iCont, iBreak); + sqlite3VdbeResolveLabel(v, iCont); + sqlite3VdbeAddOp2(v, OP_Next, tab1, iStart); VdbeCoverage(v); + sqlite3VdbeResolveLabel(v, iBreak); + sqlite3VdbeAddOp2(v, OP_Close, tab2, 0); + sqlite3VdbeAddOp2(v, OP_Close, tab1, 0); + break; + } + } + + #ifndef SQLITE_OMIT_EXPLAIN + if( p->pNext==0 ){ + ExplainQueryPlanPop(pParse); + } + #endif + } + if( pParse->nErr ) goto multi_select_end; + + /* Compute collating sequences used by + ** temporary tables needed to implement the compound select. + ** Attach the KeyInfo structure to all temporary tables. + ** + ** This section is run by the right-most SELECT statement only. + ** SELECT statements to the left always skip this part. The right-most + ** SELECT might also skip this part if it has no ORDER BY clause and + ** no temp tables are required. + */ + if( p->selFlags & SF_UsesEphemeral ){ + int i; /* Loop counter */ + KeyInfo *pKeyInfo; /* Collating sequence for the result set */ + Select *pLoop; /* For looping through SELECT statements */ + CollSeq **apColl; /* For looping through pKeyInfo->aColl[] */ + int nCol; /* Number of columns in result set */ + + assert( p->pNext==0 ); + assert( p->pEList!=0 ); + nCol = p->pEList->nExpr; + pKeyInfo = sqlite3KeyInfoAlloc(db, nCol, 1); + if( !pKeyInfo ){ + rc = SQLITE_NOMEM_BKPT; + goto multi_select_end; + } + for(i=0, apColl=pKeyInfo->aColl; ipDfltColl; + } + } + + for(pLoop=p; pLoop; pLoop=pLoop->pPrior){ + for(i=0; i<2; i++){ + int addr = pLoop->addrOpenEphm[i]; + if( addr<0 ){ + /* If [0] is unused then [1] is also unused. So we can + ** always safely abort as soon as the first unused slot is found */ + assert( pLoop->addrOpenEphm[1]<0 ); + break; + } + sqlite3VdbeChangeP2(v, addr, nCol); + sqlite3VdbeChangeP4(v, addr, (char*)sqlite3KeyInfoRef(pKeyInfo), + P4_KEYINFO); + pLoop->addrOpenEphm[i] = -1; + } + } + sqlite3KeyInfoUnref(pKeyInfo); + } + +multi_select_end: + pDest->iSdst = dest.iSdst; + pDest->nSdst = dest.nSdst; + if( pDelete ){ + sqlite3ParserAddCleanup(pParse, + (void(*)(sqlite3*,void*))sqlite3SelectDelete, + pDelete); + } + return rc; +} +#endif /* SQLITE_OMIT_COMPOUND_SELECT */ + +/* +** Error message for when two or more terms of a compound select have different +** size result sets. +*/ +SQLITE_PRIVATE void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p){ + if( p->selFlags & SF_Values ){ + sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms"); + }else{ + sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s" + " do not have the same number of result columns", + sqlite3SelectOpName(p->op)); + } +} + +/* +** Code an output subroutine for a coroutine implementation of a +** SELECT statment. +** +** The data to be output is contained in pIn->iSdst. There are +** pIn->nSdst columns to be output. pDest is where the output should +** be sent. +** +** regReturn is the number of the register holding the subroutine +** return address. +** +** If regPrev>0 then it is the first register in a vector that +** records the previous output. mem[regPrev] is a flag that is false +** if there has been no previous output. If regPrev>0 then code is +** generated to suppress duplicates. pKeyInfo is used for comparing +** keys. +** +** If the LIMIT found in p->iLimit is reached, jump immediately to +** iBreak. +*/ +static int generateOutputSubroutine( + Parse *pParse, /* Parsing context */ + Select *p, /* The SELECT statement */ + SelectDest *pIn, /* Coroutine supplying data */ + SelectDest *pDest, /* Where to send the data */ + int regReturn, /* The return address register */ + int regPrev, /* Previous result register. No uniqueness if 0 */ + KeyInfo *pKeyInfo, /* For comparing with previous entry */ + int iBreak /* Jump here if we hit the LIMIT */ +){ + Vdbe *v = pParse->pVdbe; + int iContinue; + int addr; + + addr = sqlite3VdbeCurrentAddr(v); + iContinue = sqlite3VdbeMakeLabel(pParse); + + /* Suppress duplicates for UNION, EXCEPT, and INTERSECT + */ + if( regPrev ){ + int addr1, addr2; + addr1 = sqlite3VdbeAddOp1(v, OP_IfNot, regPrev); VdbeCoverage(v); + addr2 = sqlite3VdbeAddOp4(v, OP_Compare, pIn->iSdst, regPrev+1, pIn->nSdst, + (char*)sqlite3KeyInfoRef(pKeyInfo), P4_KEYINFO); + sqlite3VdbeAddOp3(v, OP_Jump, addr2+2, iContinue, addr2+2); VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addr1); + sqlite3VdbeAddOp3(v, OP_Copy, pIn->iSdst, regPrev+1, pIn->nSdst-1); + sqlite3VdbeAddOp2(v, OP_Integer, 1, regPrev); + } + if( pParse->db->mallocFailed ) return 0; + + /* Suppress the first OFFSET entries if there is an OFFSET clause + */ + codeOffset(v, p->iOffset, iContinue); + + assert( pDest->eDest!=SRT_Exists ); + assert( pDest->eDest!=SRT_Table ); + switch( pDest->eDest ){ + /* Store the result as data using a unique key. + */ + case SRT_EphemTab: { + int r1 = sqlite3GetTempReg(pParse); + int r2 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1); + sqlite3VdbeAddOp2(v, OP_NewRowid, pDest->iSDParm, r2); + sqlite3VdbeAddOp3(v, OP_Insert, pDest->iSDParm, r1, r2); + sqlite3VdbeChangeP5(v, OPFLAG_APPEND); + sqlite3ReleaseTempReg(pParse, r2); + sqlite3ReleaseTempReg(pParse, r1); + break; + } + +#ifndef SQLITE_OMIT_SUBQUERY + /* If we are creating a set for an "expr IN (SELECT ...)". + */ + case SRT_Set: { + int r1; + testcase( pIn->nSdst>1 ); + r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, + r1, pDest->zAffSdst, pIn->nSdst); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, + pIn->iSdst, pIn->nSdst); + sqlite3ReleaseTempReg(pParse, r1); + break; + } + + /* If this is a scalar select that is part of an expression, then + ** store the results in the appropriate memory cell and break out + ** of the scan loop. Note that the select might return multiple columns + ** if it is the RHS of a row-value IN operator. + */ + case SRT_Mem: { + testcase( pIn->nSdst>1 ); + sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSDParm, pIn->nSdst); + /* The LIMIT clause will jump out of the loop for us */ + break; + } +#endif /* #ifndef SQLITE_OMIT_SUBQUERY */ + + /* The results are stored in a sequence of registers + ** starting at pDest->iSdst. Then the co-routine yields. + */ + case SRT_Coroutine: { + if( pDest->iSdst==0 ){ + pDest->iSdst = sqlite3GetTempRange(pParse, pIn->nSdst); + pDest->nSdst = pIn->nSdst; + } + sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSdst, pIn->nSdst); + sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm); + break; + } + + /* If none of the above, then the result destination must be + ** SRT_Output. This routine is never called with any other + ** destination other than the ones handled above or SRT_Output. + ** + ** For SRT_Output, results are stored in a sequence of registers. + ** Then the OP_ResultRow opcode is used to cause sqlite3_step() to + ** return the next row of result. + */ + default: { + assert( pDest->eDest==SRT_Output ); + sqlite3VdbeAddOp2(v, OP_ResultRow, pIn->iSdst, pIn->nSdst); + break; + } + } + + /* Jump to the end of the loop if the LIMIT is reached. + */ + if( p->iLimit ){ + sqlite3VdbeAddOp2(v, OP_DecrJumpZero, p->iLimit, iBreak); VdbeCoverage(v); + } + + /* Generate the subroutine return + */ + sqlite3VdbeResolveLabel(v, iContinue); + sqlite3VdbeAddOp1(v, OP_Return, regReturn); + + return addr; +} + +/* +** Alternative compound select code generator for cases when there +** is an ORDER BY clause. +** +** We assume a query of the following form: +** +** ORDER BY +** +** is one of UNION ALL, UNION, EXCEPT, or INTERSECT. The idea +** is to code both and with the ORDER BY clause as +** co-routines. Then run the co-routines in parallel and merge the results +** into the output. In addition to the two coroutines (called selectA and +** selectB) there are 7 subroutines: +** +** outA: Move the output of the selectA coroutine into the output +** of the compound query. +** +** outB: Move the output of the selectB coroutine into the output +** of the compound query. (Only generated for UNION and +** UNION ALL. EXCEPT and INSERTSECT never output a row that +** appears only in B.) +** +** AltB: Called when there is data from both coroutines and AB. +** +** EofA: Called when data is exhausted from selectA. +** +** EofB: Called when data is exhausted from selectB. +** +** The implementation of the latter five subroutines depend on which +** is used: +** +** +** UNION ALL UNION EXCEPT INTERSECT +** ------------- ----------------- -------------- ----------------- +** AltB: outA, nextA outA, nextA outA, nextA nextA +** +** AeqB: outA, nextA nextA nextA outA, nextA +** +** AgtB: outB, nextB outB, nextB nextB nextB +** +** EofA: outB, nextB outB, nextB halt halt +** +** EofB: outA, nextA outA, nextA outA, nextA halt +** +** In the AltB, AeqB, and AgtB subroutines, an EOF on A following nextA +** causes an immediate jump to EofA and an EOF on B following nextB causes +** an immediate jump to EofB. Within EofA and EofB, and EOF on entry or +** following nextX causes a jump to the end of the select processing. +** +** Duplicate removal in the UNION, EXCEPT, and INTERSECT cases is handled +** within the output subroutine. The regPrev register set holds the previously +** output value. A comparison is made against this value and the output +** is skipped if the next results would be the same as the previous. +** +** The implementation plan is to implement the two coroutines and seven +** subroutines first, then put the control logic at the bottom. Like this: +** +** goto Init +** coA: coroutine for left query (A) +** coB: coroutine for right query (B) +** outA: output one row of A +** outB: output one row of B (UNION and UNION ALL only) +** EofA: ... +** EofB: ... +** AltB: ... +** AeqB: ... +** AgtB: ... +** Init: initialize coroutine registers +** yield coA +** if eof(A) goto EofA +** yield coB +** if eof(B) goto EofB +** Cmpr: Compare A, B +** Jump AltB, AeqB, AgtB +** End: ... +** +** We call AltB, AeqB, AgtB, EofA, and EofB "subroutines" but they are not +** actually called using Gosub and they do not Return. EofA and EofB loop +** until all data is exhausted then jump to the "end" labe. AltB, AeqB, +** and AgtB jump to either L2 or to one of EofA or EofB. +*/ +#ifndef SQLITE_OMIT_COMPOUND_SELECT +static int multiSelectOrderBy( + Parse *pParse, /* Parsing context */ + Select *p, /* The right-most of SELECTs to be coded */ + SelectDest *pDest /* What to do with query results */ +){ + int i, j; /* Loop counters */ + Select *pPrior; /* Another SELECT immediately to our left */ + Select *pSplit; /* Left-most SELECT in the right-hand group */ + int nSelect; /* Number of SELECT statements in the compound */ + Vdbe *v; /* Generate code to this VDBE */ + SelectDest destA; /* Destination for coroutine A */ + SelectDest destB; /* Destination for coroutine B */ + int regAddrA; /* Address register for select-A coroutine */ + int regAddrB; /* Address register for select-B coroutine */ + int addrSelectA; /* Address of the select-A coroutine */ + int addrSelectB; /* Address of the select-B coroutine */ + int regOutA; /* Address register for the output-A subroutine */ + int regOutB; /* Address register for the output-B subroutine */ + int addrOutA; /* Address of the output-A subroutine */ + int addrOutB = 0; /* Address of the output-B subroutine */ + int addrEofA; /* Address of the select-A-exhausted subroutine */ + int addrEofA_noB; /* Alternate addrEofA if B is uninitialized */ + int addrEofB; /* Address of the select-B-exhausted subroutine */ + int addrAltB; /* Address of the AB subroutine */ + int regLimitA; /* Limit register for select-A */ + int regLimitB; /* Limit register for select-A */ + int regPrev; /* A range of registers to hold previous output */ + int savedLimit; /* Saved value of p->iLimit */ + int savedOffset; /* Saved value of p->iOffset */ + int labelCmpr; /* Label for the start of the merge algorithm */ + int labelEnd; /* Label for the end of the overall SELECT stmt */ + int addr1; /* Jump instructions that get retargetted */ + int op; /* One of TK_ALL, TK_UNION, TK_EXCEPT, TK_INTERSECT */ + KeyInfo *pKeyDup = 0; /* Comparison information for duplicate removal */ + KeyInfo *pKeyMerge; /* Comparison information for merging rows */ + sqlite3 *db; /* Database connection */ + ExprList *pOrderBy; /* The ORDER BY clause */ + int nOrderBy; /* Number of terms in the ORDER BY clause */ + u32 *aPermute; /* Mapping from ORDER BY terms to result set columns */ + + assert( p->pOrderBy!=0 ); + assert( pKeyDup==0 ); /* "Managed" code needs this. Ticket #3382. */ + db = pParse->db; + v = pParse->pVdbe; + assert( v!=0 ); /* Already thrown the error if VDBE alloc failed */ + labelEnd = sqlite3VdbeMakeLabel(pParse); + labelCmpr = sqlite3VdbeMakeLabel(pParse); + + + /* Patch up the ORDER BY clause + */ + op = p->op; + assert( p->pPrior->pOrderBy==0 ); + pOrderBy = p->pOrderBy; + assert( pOrderBy ); + nOrderBy = pOrderBy->nExpr; + + /* For operators other than UNION ALL we have to make sure that + ** the ORDER BY clause covers every term of the result set. Add + ** terms to the ORDER BY clause as necessary. + */ + if( op!=TK_ALL ){ + for(i=1; db->mallocFailed==0 && i<=p->pEList->nExpr; i++){ + struct ExprList_item *pItem; + for(j=0, pItem=pOrderBy->a; ju.x.iOrderByCol>0 ); + if( pItem->u.x.iOrderByCol==i ) break; + } + if( j==nOrderBy ){ + Expr *pNew = sqlite3Expr(db, TK_INTEGER, 0); + if( pNew==0 ) return SQLITE_NOMEM_BKPT; + pNew->flags |= EP_IntValue; + pNew->u.iValue = i; + p->pOrderBy = pOrderBy = sqlite3ExprListAppend(pParse, pOrderBy, pNew); + if( pOrderBy ) pOrderBy->a[nOrderBy++].u.x.iOrderByCol = (u16)i; + } + } + } + + /* Compute the comparison permutation and keyinfo that is used with + ** the permutation used to determine if the next + ** row of results comes from selectA or selectB. Also add explicit + ** collations to the ORDER BY clause terms so that when the subqueries + ** to the right and the left are evaluated, they use the correct + ** collation. + */ + aPermute = sqlite3DbMallocRawNN(db, sizeof(u32)*(nOrderBy + 1)); + if( aPermute ){ + struct ExprList_item *pItem; + aPermute[0] = nOrderBy; + for(i=1, pItem=pOrderBy->a; i<=nOrderBy; i++, pItem++){ + assert( pItem!=0 ); + assert( pItem->u.x.iOrderByCol>0 ); + assert( pItem->u.x.iOrderByCol<=p->pEList->nExpr ); + aPermute[i] = pItem->u.x.iOrderByCol - 1; + } + pKeyMerge = multiSelectOrderByKeyInfo(pParse, p, 1); + }else{ + pKeyMerge = 0; + } + + /* Allocate a range of temporary registers and the KeyInfo needed + ** for the logic that removes duplicate result rows when the + ** operator is UNION, EXCEPT, or INTERSECT (but not UNION ALL). + */ + if( op==TK_ALL ){ + regPrev = 0; + }else{ + int nExpr = p->pEList->nExpr; + assert( nOrderBy>=nExpr || db->mallocFailed ); + regPrev = pParse->nMem+1; + pParse->nMem += nExpr+1; + sqlite3VdbeAddOp2(v, OP_Integer, 0, regPrev); + pKeyDup = sqlite3KeyInfoAlloc(db, nExpr, 1); + if( pKeyDup ){ + assert( sqlite3KeyInfoIsWriteable(pKeyDup) ); + for(i=0; iaColl[i] = multiSelectCollSeq(pParse, p, i); + pKeyDup->aSortFlags[i] = 0; + } + } + } + + /* Separate the left and the right query from one another + */ + nSelect = 1; + if( (op==TK_ALL || op==TK_UNION) + && OptimizationEnabled(db, SQLITE_BalancedMerge) + ){ + for(pSplit=p; pSplit->pPrior!=0 && pSplit->op==op; pSplit=pSplit->pPrior){ + nSelect++; + assert( pSplit->pPrior->pNext==pSplit ); + } + } + if( nSelect<=3 ){ + pSplit = p; + }else{ + pSplit = p; + for(i=2; ipPrior; } + } + pPrior = pSplit->pPrior; + assert( pPrior!=0 ); + pSplit->pPrior = 0; + pPrior->pNext = 0; + assert( p->pOrderBy == pOrderBy ); + assert( pOrderBy!=0 || db->mallocFailed ); + pPrior->pOrderBy = sqlite3ExprListDup(pParse->db, pOrderBy, 0); + sqlite3ResolveOrderGroupBy(pParse, p, p->pOrderBy, "ORDER"); + sqlite3ResolveOrderGroupBy(pParse, pPrior, pPrior->pOrderBy, "ORDER"); + + /* Compute the limit registers */ + computeLimitRegisters(pParse, p, labelEnd); + if( p->iLimit && op==TK_ALL ){ + regLimitA = ++pParse->nMem; + regLimitB = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Copy, p->iOffset ? p->iOffset+1 : p->iLimit, + regLimitA); + sqlite3VdbeAddOp2(v, OP_Copy, regLimitA, regLimitB); + }else{ + regLimitA = regLimitB = 0; + } + sqlite3ExprDelete(db, p->pLimit); + p->pLimit = 0; + + regAddrA = ++pParse->nMem; + regAddrB = ++pParse->nMem; + regOutA = ++pParse->nMem; + regOutB = ++pParse->nMem; + sqlite3SelectDestInit(&destA, SRT_Coroutine, regAddrA); + sqlite3SelectDestInit(&destB, SRT_Coroutine, regAddrB); + + ExplainQueryPlan((pParse, 1, "MERGE (%s)", sqlite3SelectOpName(p->op))); + + /* Generate a coroutine to evaluate the SELECT statement to the + ** left of the compound operator - the "A" select. + */ + addrSelectA = sqlite3VdbeCurrentAddr(v) + 1; + addr1 = sqlite3VdbeAddOp3(v, OP_InitCoroutine, regAddrA, 0, addrSelectA); + VdbeComment((v, "left SELECT")); + pPrior->iLimit = regLimitA; + ExplainQueryPlan((pParse, 1, "LEFT")); + sqlite3Select(pParse, pPrior, &destA); + sqlite3VdbeEndCoroutine(v, regAddrA); + sqlite3VdbeJumpHere(v, addr1); + + /* Generate a coroutine to evaluate the SELECT statement on + ** the right - the "B" select + */ + addrSelectB = sqlite3VdbeCurrentAddr(v) + 1; + addr1 = sqlite3VdbeAddOp3(v, OP_InitCoroutine, regAddrB, 0, addrSelectB); + VdbeComment((v, "right SELECT")); + savedLimit = p->iLimit; + savedOffset = p->iOffset; + p->iLimit = regLimitB; + p->iOffset = 0; + ExplainQueryPlan((pParse, 1, "RIGHT")); + sqlite3Select(pParse, p, &destB); + p->iLimit = savedLimit; + p->iOffset = savedOffset; + sqlite3VdbeEndCoroutine(v, regAddrB); + + /* Generate a subroutine that outputs the current row of the A + ** select as the next output row of the compound select. + */ + VdbeNoopComment((v, "Output routine for A")); + addrOutA = generateOutputSubroutine(pParse, + p, &destA, pDest, regOutA, + regPrev, pKeyDup, labelEnd); + + /* Generate a subroutine that outputs the current row of the B + ** select as the next output row of the compound select. + */ + if( op==TK_ALL || op==TK_UNION ){ + VdbeNoopComment((v, "Output routine for B")); + addrOutB = generateOutputSubroutine(pParse, + p, &destB, pDest, regOutB, + regPrev, pKeyDup, labelEnd); + } + sqlite3KeyInfoUnref(pKeyDup); + + /* Generate a subroutine to run when the results from select A + ** are exhausted and only data in select B remains. + */ + if( op==TK_EXCEPT || op==TK_INTERSECT ){ + addrEofA_noB = addrEofA = labelEnd; + }else{ + VdbeNoopComment((v, "eof-A subroutine")); + addrEofA = sqlite3VdbeAddOp2(v, OP_Gosub, regOutB, addrOutB); + addrEofA_noB = sqlite3VdbeAddOp2(v, OP_Yield, regAddrB, labelEnd); + VdbeCoverage(v); + sqlite3VdbeGoto(v, addrEofA); + p->nSelectRow = sqlite3LogEstAdd(p->nSelectRow, pPrior->nSelectRow); + } + + /* Generate a subroutine to run when the results from select B + ** are exhausted and only data in select A remains. + */ + if( op==TK_INTERSECT ){ + addrEofB = addrEofA; + if( p->nSelectRow > pPrior->nSelectRow ) p->nSelectRow = pPrior->nSelectRow; + }else{ + VdbeNoopComment((v, "eof-B subroutine")); + addrEofB = sqlite3VdbeAddOp2(v, OP_Gosub, regOutA, addrOutA); + sqlite3VdbeAddOp2(v, OP_Yield, regAddrA, labelEnd); VdbeCoverage(v); + sqlite3VdbeGoto(v, addrEofB); + } + + /* Generate code to handle the case of AB + */ + VdbeNoopComment((v, "A-gt-B subroutine")); + addrAgtB = sqlite3VdbeCurrentAddr(v); + if( op==TK_ALL || op==TK_UNION ){ + sqlite3VdbeAddOp2(v, OP_Gosub, regOutB, addrOutB); + } + sqlite3VdbeAddOp2(v, OP_Yield, regAddrB, addrEofB); VdbeCoverage(v); + sqlite3VdbeGoto(v, labelCmpr); + + /* This code runs once to initialize everything. + */ + sqlite3VdbeJumpHere(v, addr1); + sqlite3VdbeAddOp2(v, OP_Yield, regAddrA, addrEofA_noB); VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_Yield, regAddrB, addrEofB); VdbeCoverage(v); + + /* Implement the main merge loop + */ + sqlite3VdbeResolveLabel(v, labelCmpr); + sqlite3VdbeAddOp4(v, OP_Permutation, 0, 0, 0, (char*)aPermute, P4_INTARRAY); + sqlite3VdbeAddOp4(v, OP_Compare, destA.iSdst, destB.iSdst, nOrderBy, + (char*)pKeyMerge, P4_KEYINFO); + sqlite3VdbeChangeP5(v, OPFLAG_PERMUTE); + sqlite3VdbeAddOp3(v, OP_Jump, addrAltB, addrAeqB, addrAgtB); VdbeCoverage(v); + + /* Jump to the this point in order to terminate the query. + */ + sqlite3VdbeResolveLabel(v, labelEnd); + + /* Reassembly the compound query so that it will be freed correctly + ** by the calling function */ + if( pSplit->pPrior ){ + sqlite3SelectDelete(db, pSplit->pPrior); + } + pSplit->pPrior = pPrior; + pPrior->pNext = pSplit; + sqlite3ExprListDelete(db, pPrior->pOrderBy); + pPrior->pOrderBy = 0; + + /*** TBD: Insert subroutine calls to close cursors on incomplete + **** subqueries ****/ + ExplainQueryPlanPop(pParse); + return pParse->nErr!=0; +} +#endif + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) + +/* An instance of the SubstContext object describes an substitution edit +** to be performed on a parse tree. +** +** All references to columns in table iTable are to be replaced by corresponding +** expressions in pEList. +*/ +typedef struct SubstContext { + Parse *pParse; /* The parsing context */ + int iTable; /* Replace references to this table */ + int iNewTable; /* New table number */ + int isLeftJoin; /* Add TK_IF_NULL_ROW opcodes on each replacement */ + ExprList *pEList; /* Replacement expressions */ +} SubstContext; + +/* Forward Declarations */ +static void substExprList(SubstContext*, ExprList*); +static void substSelect(SubstContext*, Select*, int); + +/* +** Scan through the expression pExpr. Replace every reference to +** a column in table number iTable with a copy of the iColumn-th +** entry in pEList. (But leave references to the ROWID column +** unchanged.) +** +** This routine is part of the flattening procedure. A subquery +** whose result set is defined by pEList appears as entry in the +** FROM clause of a SELECT such that the VDBE cursor assigned to that +** FORM clause entry is iTable. This routine makes the necessary +** changes to pExpr so that it refers directly to the source table +** of the subquery rather the result set of the subquery. +*/ +static Expr *substExpr( + SubstContext *pSubst, /* Description of the substitution */ + Expr *pExpr /* Expr in which substitution occurs */ +){ + if( pExpr==0 ) return 0; + if( ExprHasProperty(pExpr, EP_FromJoin) + && pExpr->w.iRightJoinTable==pSubst->iTable + ){ + pExpr->w.iRightJoinTable = pSubst->iNewTable; + } + if( pExpr->op==TK_COLUMN + && pExpr->iTable==pSubst->iTable + && !ExprHasProperty(pExpr, EP_FixedCol) + ){ +#ifdef SQLITE_ALLOW_ROWID_IN_VIEW + if( pExpr->iColumn<0 ){ + pExpr->op = TK_NULL; + }else +#endif + { + Expr *pNew; + Expr *pCopy = pSubst->pEList->a[pExpr->iColumn].pExpr; + Expr ifNullRow; + assert( pSubst->pEList!=0 && pExpr->iColumnpEList->nExpr ); + assert( pExpr->pRight==0 ); + if( sqlite3ExprIsVector(pCopy) ){ + sqlite3VectorErrorMsg(pSubst->pParse, pCopy); + }else{ + sqlite3 *db = pSubst->pParse->db; + if( pSubst->isLeftJoin && pCopy->op!=TK_COLUMN ){ + memset(&ifNullRow, 0, sizeof(ifNullRow)); + ifNullRow.op = TK_IF_NULL_ROW; + ifNullRow.pLeft = pCopy; + ifNullRow.iTable = pSubst->iNewTable; + ifNullRow.flags = EP_IfNullRow; + pCopy = &ifNullRow; + } + testcase( ExprHasProperty(pCopy, EP_Subquery) ); + pNew = sqlite3ExprDup(db, pCopy, 0); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pNew); + return pExpr; + } + if( pSubst->isLeftJoin ){ + ExprSetProperty(pNew, EP_CanBeNull); + } + if( ExprHasProperty(pExpr,EP_FromJoin) ){ + sqlite3SetJoinExpr(pNew, pExpr->w.iRightJoinTable); + } + sqlite3ExprDelete(db, pExpr); + pExpr = pNew; + + /* Ensure that the expression now has an implicit collation sequence, + ** just as it did when it was a column of a view or sub-query. */ + if( pExpr->op!=TK_COLUMN && pExpr->op!=TK_COLLATE ){ + CollSeq *pColl = sqlite3ExprCollSeq(pSubst->pParse, pExpr); + pExpr = sqlite3ExprAddCollateString(pSubst->pParse, pExpr, + (pColl ? pColl->zName : "BINARY") + ); + } + ExprClearProperty(pExpr, EP_Collate); + } + } + }else{ + if( pExpr->op==TK_IF_NULL_ROW && pExpr->iTable==pSubst->iTable ){ + pExpr->iTable = pSubst->iNewTable; + } + pExpr->pLeft = substExpr(pSubst, pExpr->pLeft); + pExpr->pRight = substExpr(pSubst, pExpr->pRight); + if( ExprUseXSelect(pExpr) ){ + substSelect(pSubst, pExpr->x.pSelect, 1); + }else{ + substExprList(pSubst, pExpr->x.pList); + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + Window *pWin = pExpr->y.pWin; + pWin->pFilter = substExpr(pSubst, pWin->pFilter); + substExprList(pSubst, pWin->pPartition); + substExprList(pSubst, pWin->pOrderBy); + } +#endif + } + return pExpr; +} +static void substExprList( + SubstContext *pSubst, /* Description of the substitution */ + ExprList *pList /* List to scan and in which to make substitutes */ +){ + int i; + if( pList==0 ) return; + for(i=0; inExpr; i++){ + pList->a[i].pExpr = substExpr(pSubst, pList->a[i].pExpr); + } +} +static void substSelect( + SubstContext *pSubst, /* Description of the substitution */ + Select *p, /* SELECT statement in which to make substitutions */ + int doPrior /* Do substitutes on p->pPrior too */ +){ + SrcList *pSrc; + SrcItem *pItem; + int i; + if( !p ) return; + do{ + substExprList(pSubst, p->pEList); + substExprList(pSubst, p->pGroupBy); + substExprList(pSubst, p->pOrderBy); + p->pHaving = substExpr(pSubst, p->pHaving); + p->pWhere = substExpr(pSubst, p->pWhere); + pSrc = p->pSrc; + assert( pSrc!=0 ); + for(i=pSrc->nSrc, pItem=pSrc->a; i>0; i--, pItem++){ + substSelect(pSubst, pItem->pSelect, 1); + if( pItem->fg.isTabFunc ){ + substExprList(pSubst, pItem->u1.pFuncArg); + } + } + }while( doPrior && (p = p->pPrior)!=0 ); +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +/* +** pSelect is a SELECT statement and pSrcItem is one item in the FROM +** clause of that SELECT. +** +** This routine scans the entire SELECT statement and recomputes the +** pSrcItem->colUsed mask. +*/ +static int recomputeColumnsUsedExpr(Walker *pWalker, Expr *pExpr){ + SrcItem *pItem; + if( pExpr->op!=TK_COLUMN ) return WRC_Continue; + pItem = pWalker->u.pSrcItem; + if( pItem->iCursor!=pExpr->iTable ) return WRC_Continue; + if( pExpr->iColumn<0 ) return WRC_Continue; + pItem->colUsed |= sqlite3ExprColUsed(pExpr); + return WRC_Continue; +} +static void recomputeColumnsUsed( + Select *pSelect, /* The complete SELECT statement */ + SrcItem *pSrcItem /* Which FROM clause item to recompute */ +){ + Walker w; + if( NEVER(pSrcItem->pTab==0) ) return; + memset(&w, 0, sizeof(w)); + w.xExprCallback = recomputeColumnsUsedExpr; + w.xSelectCallback = sqlite3SelectWalkNoop; + w.u.pSrcItem = pSrcItem; + pSrcItem->colUsed = 0; + sqlite3WalkSelect(&w, pSelect); +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +/* +** Assign new cursor numbers to each of the items in pSrc. For each +** new cursor number assigned, set an entry in the aCsrMap[] array +** to map the old cursor number to the new: +** +** aCsrMap[iOld+1] = iNew; +** +** The array is guaranteed by the caller to be large enough for all +** existing cursor numbers in pSrc. aCsrMap[0] is the array size. +** +** If pSrc contains any sub-selects, call this routine recursively +** on the FROM clause of each such sub-select, with iExcept set to -1. +*/ +static void srclistRenumberCursors( + Parse *pParse, /* Parse context */ + int *aCsrMap, /* Array to store cursor mappings in */ + SrcList *pSrc, /* FROM clause to renumber */ + int iExcept /* FROM clause item to skip */ +){ + int i; + SrcItem *pItem; + for(i=0, pItem=pSrc->a; inSrc; i++, pItem++){ + if( i!=iExcept ){ + Select *p; + assert( pItem->iCursor < aCsrMap[0] ); + if( !pItem->fg.isRecursive || aCsrMap[pItem->iCursor+1]==0 ){ + aCsrMap[pItem->iCursor+1] = pParse->nTab++; + } + pItem->iCursor = aCsrMap[pItem->iCursor+1]; + for(p=pItem->pSelect; p; p=p->pPrior){ + srclistRenumberCursors(pParse, aCsrMap, p->pSrc, -1); + } + } + } +} + +/* +** *piCursor is a cursor number. Change it if it needs to be mapped. +*/ +static void renumberCursorDoMapping(Walker *pWalker, int *piCursor){ + int *aCsrMap = pWalker->u.aiCol; + int iCsr = *piCursor; + if( iCsr < aCsrMap[0] && aCsrMap[iCsr+1]>0 ){ + *piCursor = aCsrMap[iCsr+1]; + } +} + +/* +** Expression walker callback used by renumberCursors() to update +** Expr objects to match newly assigned cursor numbers. +*/ +static int renumberCursorsCb(Walker *pWalker, Expr *pExpr){ + int op = pExpr->op; + if( op==TK_COLUMN || op==TK_IF_NULL_ROW ){ + renumberCursorDoMapping(pWalker, &pExpr->iTable); + } + if( ExprHasProperty(pExpr, EP_FromJoin) ){ + renumberCursorDoMapping(pWalker, &pExpr->w.iRightJoinTable); + } + return WRC_Continue; +} + +/* +** Assign a new cursor number to each cursor in the FROM clause (Select.pSrc) +** of the SELECT statement passed as the second argument, and to each +** cursor in the FROM clause of any FROM clause sub-selects, recursively. +** Except, do not assign a new cursor number to the iExcept'th element in +** the FROM clause of (*p). Update all expressions and other references +** to refer to the new cursor numbers. +** +** Argument aCsrMap is an array that may be used for temporary working +** space. Two guarantees are made by the caller: +** +** * the array is larger than the largest cursor number used within the +** select statement passed as an argument, and +** +** * the array entries for all cursor numbers that do *not* appear in +** FROM clauses of the select statement as described above are +** initialized to zero. +*/ +static void renumberCursors( + Parse *pParse, /* Parse context */ + Select *p, /* Select to renumber cursors within */ + int iExcept, /* FROM clause item to skip */ + int *aCsrMap /* Working space */ +){ + Walker w; + srclistRenumberCursors(pParse, aCsrMap, p->pSrc, iExcept); + memset(&w, 0, sizeof(w)); + w.u.aiCol = aCsrMap; + w.xExprCallback = renumberCursorsCb; + w.xSelectCallback = sqlite3SelectWalkNoop; + sqlite3WalkSelect(&w, p); +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +/* +** This routine attempts to flatten subqueries as a performance optimization. +** This routine returns 1 if it makes changes and 0 if no flattening occurs. +** +** To understand the concept of flattening, consider the following +** query: +** +** SELECT a FROM (SELECT x+y AS a FROM t1 WHERE z<100) WHERE a>5 +** +** The default way of implementing this query is to execute the +** subquery first and store the results in a temporary table, then +** run the outer query on that temporary table. This requires two +** passes over the data. Furthermore, because the temporary table +** has no indices, the WHERE clause on the outer query cannot be +** optimized. +** +** This routine attempts to rewrite queries such as the above into +** a single flat select, like this: +** +** SELECT x+y AS a FROM t1 WHERE z<100 AND a>5 +** +** The code generated for this simplification gives the same result +** but only has to scan the data once. And because indices might +** exist on the table t1, a complete scan of the data might be +** avoided. +** +** Flattening is subject to the following constraints: +** +** (**) We no longer attempt to flatten aggregate subqueries. Was: +** The subquery and the outer query cannot both be aggregates. +** +** (**) We no longer attempt to flatten aggregate subqueries. Was: +** (2) If the subquery is an aggregate then +** (2a) the outer query must not be a join and +** (2b) the outer query must not use subqueries +** other than the one FROM-clause subquery that is a candidate +** for flattening. (This is due to ticket [2f7170d73bf9abf80] +** from 2015-02-09.) +** +** (3) If the subquery is the right operand of a LEFT JOIN then +** (3a) the subquery may not be a join and +** (3b) the FROM clause of the subquery may not contain a virtual +** table and +** (3c) the outer query may not be an aggregate. +** (3d) the outer query may not be DISTINCT. +** +** (4) The subquery can not be DISTINCT. +** +** (**) At one point restrictions (4) and (5) defined a subset of DISTINCT +** sub-queries that were excluded from this optimization. Restriction +** (4) has since been expanded to exclude all DISTINCT subqueries. +** +** (**) We no longer attempt to flatten aggregate subqueries. Was: +** If the subquery is aggregate, the outer query may not be DISTINCT. +** +** (7) The subquery must have a FROM clause. TODO: For subqueries without +** A FROM clause, consider adding a FROM clause with the special +** table sqlite_once that consists of a single row containing a +** single NULL. +** +** (8) If the subquery uses LIMIT then the outer query may not be a join. +** +** (9) If the subquery uses LIMIT then the outer query may not be aggregate. +** +** (**) Restriction (10) was removed from the code on 2005-02-05 but we +** accidently carried the comment forward until 2014-09-15. Original +** constraint: "If the subquery is aggregate then the outer query +** may not use LIMIT." +** +** (11) The subquery and the outer query may not both have ORDER BY clauses. +** +** (**) Not implemented. Subsumed into restriction (3). Was previously +** a separate restriction deriving from ticket #350. +** +** (13) The subquery and outer query may not both use LIMIT. +** +** (14) The subquery may not use OFFSET. +** +** (15) If the outer query is part of a compound select, then the +** subquery may not use LIMIT. +** (See ticket #2339 and ticket [02a8e81d44]). +** +** (16) If the outer query is aggregate, then the subquery may not +** use ORDER BY. (Ticket #2942) This used to not matter +** until we introduced the group_concat() function. +** +** (17) If the subquery is a compound select, then +** (17a) all compound operators must be a UNION ALL, and +** (17b) no terms within the subquery compound may be aggregate +** or DISTINCT, and +** (17c) every term within the subquery compound must have a FROM clause +** (17d) the outer query may not be +** (17d1) aggregate, or +** (17d2) DISTINCT +** (17e) the subquery may not contain window functions, and +** (17f) the subquery must not be the RHS of a LEFT JOIN. +** +** The parent and sub-query may contain WHERE clauses. Subject to +** rules (11), (13) and (14), they may also contain ORDER BY, +** LIMIT and OFFSET clauses. The subquery cannot use any compound +** operator other than UNION ALL because all the other compound +** operators have an implied DISTINCT which is disallowed by +** restriction (4). +** +** Also, each component of the sub-query must return the same number +** of result columns. This is actually a requirement for any compound +** SELECT statement, but all the code here does is make sure that no +** such (illegal) sub-query is flattened. The caller will detect the +** syntax error and return a detailed message. +** +** (18) If the sub-query is a compound select, then all terms of the +** ORDER BY clause of the parent must be copies of a term returned +** by the parent query. +** +** (19) If the subquery uses LIMIT then the outer query may not +** have a WHERE clause. +** +** (20) If the sub-query is a compound select, then it must not use +** an ORDER BY clause. Ticket #3773. We could relax this constraint +** somewhat by saying that the terms of the ORDER BY clause must +** appear as unmodified result columns in the outer query. But we +** have other optimizations in mind to deal with that case. +** +** (21) If the subquery uses LIMIT then the outer query may not be +** DISTINCT. (See ticket [752e1646fc]). +** +** (22) The subquery may not be a recursive CTE. +** +** (23) If the outer query is a recursive CTE, then the sub-query may not be +** a compound query. This restriction is because transforming the +** parent to a compound query confuses the code that handles +** recursive queries in multiSelect(). +** +** (**) We no longer attempt to flatten aggregate subqueries. Was: +** The subquery may not be an aggregate that uses the built-in min() or +** or max() functions. (Without this restriction, a query like: +** "SELECT x FROM (SELECT max(y), x FROM t1)" would not necessarily +** return the value X for which Y was maximal.) +** +** (25) If either the subquery or the parent query contains a window +** function in the select list or ORDER BY clause, flattening +** is not attempted. +** +** +** In this routine, the "p" parameter is a pointer to the outer query. +** The subquery is p->pSrc->a[iFrom]. isAgg is true if the outer query +** uses aggregates. +** +** If flattening is not attempted, this routine is a no-op and returns 0. +** If flattening is attempted this routine returns 1. +** +** All of the expression analysis must occur on both the outer query and +** the subquery before this routine runs. +*/ +static int flattenSubquery( + Parse *pParse, /* Parsing context */ + Select *p, /* The parent or outer SELECT statement */ + int iFrom, /* Index in p->pSrc->a[] of the inner subquery */ + int isAgg /* True if outer SELECT uses aggregate functions */ +){ + const char *zSavedAuthContext = pParse->zAuthContext; + Select *pParent; /* Current UNION ALL term of the other query */ + Select *pSub; /* The inner query or "subquery" */ + Select *pSub1; /* Pointer to the rightmost select in sub-query */ + SrcList *pSrc; /* The FROM clause of the outer query */ + SrcList *pSubSrc; /* The FROM clause of the subquery */ + int iParent; /* VDBE cursor number of the pSub result set temp table */ + int iNewParent = -1;/* Replacement table for iParent */ + int isLeftJoin = 0; /* True if pSub is the right side of a LEFT JOIN */ + int i; /* Loop counter */ + Expr *pWhere; /* The WHERE clause */ + SrcItem *pSubitem; /* The subquery */ + sqlite3 *db = pParse->db; + Walker w; /* Walker to persist agginfo data */ + int *aCsrMap = 0; + + /* Check to see if flattening is permitted. Return 0 if not. + */ + assert( p!=0 ); + assert( p->pPrior==0 ); + if( OptimizationDisabled(db, SQLITE_QueryFlattener) ) return 0; + pSrc = p->pSrc; + assert( pSrc && iFrom>=0 && iFromnSrc ); + pSubitem = &pSrc->a[iFrom]; + iParent = pSubitem->iCursor; + pSub = pSubitem->pSelect; + assert( pSub!=0 ); + +#ifndef SQLITE_OMIT_WINDOWFUNC + if( p->pWin || pSub->pWin ) return 0; /* Restriction (25) */ +#endif + + pSubSrc = pSub->pSrc; + assert( pSubSrc ); + /* Prior to version 3.1.2, when LIMIT and OFFSET had to be simple constants, + ** not arbitrary expressions, we allowed some combining of LIMIT and OFFSET + ** because they could be computed at compile-time. But when LIMIT and OFFSET + ** became arbitrary expressions, we were forced to add restrictions (13) + ** and (14). */ + if( pSub->pLimit && p->pLimit ) return 0; /* Restriction (13) */ + if( pSub->pLimit && pSub->pLimit->pRight ) return 0; /* Restriction (14) */ + if( (p->selFlags & SF_Compound)!=0 && pSub->pLimit ){ + return 0; /* Restriction (15) */ + } + if( pSubSrc->nSrc==0 ) return 0; /* Restriction (7) */ + if( pSub->selFlags & SF_Distinct ) return 0; /* Restriction (4) */ + if( pSub->pLimit && (pSrc->nSrc>1 || isAgg) ){ + return 0; /* Restrictions (8)(9) */ + } + if( p->pOrderBy && pSub->pOrderBy ){ + return 0; /* Restriction (11) */ + } + if( isAgg && pSub->pOrderBy ) return 0; /* Restriction (16) */ + if( pSub->pLimit && p->pWhere ) return 0; /* Restriction (19) */ + if( pSub->pLimit && (p->selFlags & SF_Distinct)!=0 ){ + return 0; /* Restriction (21) */ + } + if( pSub->selFlags & (SF_Recursive) ){ + return 0; /* Restrictions (22) */ + } + + /* + ** If the subquery is the right operand of a LEFT JOIN, then the + ** subquery may not be a join itself (3a). Example of why this is not + ** allowed: + ** + ** t1 LEFT OUTER JOIN (t2 JOIN t3) + ** + ** If we flatten the above, we would get + ** + ** (t1 LEFT OUTER JOIN t2) JOIN t3 + ** + ** which is not at all the same thing. + ** + ** If the subquery is the right operand of a LEFT JOIN, then the outer + ** query cannot be an aggregate. (3c) This is an artifact of the way + ** aggregates are processed - there is no mechanism to determine if + ** the LEFT JOIN table should be all-NULL. + ** + ** See also tickets #306, #350, and #3300. + */ + if( (pSubitem->fg.jointype & JT_OUTER)!=0 ){ + isLeftJoin = 1; + if( pSubSrc->nSrc>1 /* (3a) */ + || isAgg /* (3b) */ + || IsVirtual(pSubSrc->a[0].pTab) /* (3c) */ + || (p->selFlags & SF_Distinct)!=0 /* (3d) */ + ){ + return 0; + } + } +#ifdef SQLITE_EXTRA_IFNULLROW + else if( iFrom>0 && !isAgg ){ + /* Setting isLeftJoin to -1 causes OP_IfNullRow opcodes to be generated for + ** every reference to any result column from subquery in a join, even + ** though they are not necessary. This will stress-test the OP_IfNullRow + ** opcode. */ + isLeftJoin = -1; + } +#endif + + /* Restriction (17): If the sub-query is a compound SELECT, then it must + ** use only the UNION ALL operator. And none of the simple select queries + ** that make up the compound SELECT are allowed to be aggregate or distinct + ** queries. + */ + if( pSub->pPrior ){ + if( pSub->pOrderBy ){ + return 0; /* Restriction (20) */ + } + if( isAgg || (p->selFlags & SF_Distinct)!=0 || isLeftJoin>0 ){ + return 0; /* (17d1), (17d2), or (17f) */ + } + for(pSub1=pSub; pSub1; pSub1=pSub1->pPrior){ + testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ); + testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Aggregate ); + assert( pSub->pSrc!=0 ); + assert( (pSub->selFlags & SF_Recursive)==0 ); + assert( pSub->pEList->nExpr==pSub1->pEList->nExpr ); + if( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))!=0 /* (17b) */ + || (pSub1->pPrior && pSub1->op!=TK_ALL) /* (17a) */ + || pSub1->pSrc->nSrc<1 /* (17c) */ +#ifndef SQLITE_OMIT_WINDOWFUNC + || pSub1->pWin /* (17e) */ +#endif + ){ + return 0; + } + testcase( pSub1->pSrc->nSrc>1 ); + } + + /* Restriction (18). */ + if( p->pOrderBy ){ + int ii; + for(ii=0; iipOrderBy->nExpr; ii++){ + if( p->pOrderBy->a[ii].u.x.iOrderByCol==0 ) return 0; + } + } + + /* Restriction (23) */ + if( (p->selFlags & SF_Recursive) ) return 0; + + if( pSrc->nSrc>1 ){ + if( pParse->nSelect>500 ) return 0; + aCsrMap = sqlite3DbMallocZero(db, ((i64)pParse->nTab+1)*sizeof(int)); + if( aCsrMap ) aCsrMap[0] = pParse->nTab; + } + } + + /***** If we reach this point, flattening is permitted. *****/ + SELECTTRACE(1,pParse,p,("flatten %u.%p from term %d\n", + pSub->selId, pSub, iFrom)); + + /* Authorize the subquery */ + pParse->zAuthContext = pSubitem->zName; + TESTONLY(i =) sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0); + testcase( i==SQLITE_DENY ); + pParse->zAuthContext = zSavedAuthContext; + + /* Delete the transient structures associated with thesubquery */ + pSub1 = pSubitem->pSelect; + sqlite3DbFree(db, pSubitem->zDatabase); + sqlite3DbFree(db, pSubitem->zName); + sqlite3DbFree(db, pSubitem->zAlias); + pSubitem->zDatabase = 0; + pSubitem->zName = 0; + pSubitem->zAlias = 0; + pSubitem->pSelect = 0; + assert( pSubitem->pOn==0 ); + + /* If the sub-query is a compound SELECT statement, then (by restrictions + ** 17 and 18 above) it must be a UNION ALL and the parent query must + ** be of the form: + ** + ** SELECT FROM () + ** + ** followed by any ORDER BY, LIMIT and/or OFFSET clauses. This block + ** creates N-1 copies of the parent query without any ORDER BY, LIMIT or + ** OFFSET clauses and joins them to the left-hand-side of the original + ** using UNION ALL operators. In this case N is the number of simple + ** select statements in the compound sub-query. + ** + ** Example: + ** + ** SELECT a+1 FROM ( + ** SELECT x FROM tab + ** UNION ALL + ** SELECT y FROM tab + ** UNION ALL + ** SELECT abs(z*2) FROM tab2 + ** ) WHERE a!=5 ORDER BY 1 + ** + ** Transformed into: + ** + ** SELECT x+1 FROM tab WHERE x+1!=5 + ** UNION ALL + ** SELECT y+1 FROM tab WHERE y+1!=5 + ** UNION ALL + ** SELECT abs(z*2)+1 FROM tab2 WHERE abs(z*2)+1!=5 + ** ORDER BY 1 + ** + ** We call this the "compound-subquery flattening". + */ + for(pSub=pSub->pPrior; pSub; pSub=pSub->pPrior){ + Select *pNew; + ExprList *pOrderBy = p->pOrderBy; + Expr *pLimit = p->pLimit; + Select *pPrior = p->pPrior; + Table *pItemTab = pSubitem->pTab; + pSubitem->pTab = 0; + p->pOrderBy = 0; + p->pPrior = 0; + p->pLimit = 0; + pNew = sqlite3SelectDup(db, p, 0); + p->pLimit = pLimit; + p->pOrderBy = pOrderBy; + p->op = TK_ALL; + pSubitem->pTab = pItemTab; + if( pNew==0 ){ + p->pPrior = pPrior; + }else{ + pNew->selId = ++pParse->nSelect; + if( aCsrMap && ALWAYS(db->mallocFailed==0) ){ + renumberCursors(pParse, pNew, iFrom, aCsrMap); + } + pNew->pPrior = pPrior; + if( pPrior ) pPrior->pNext = pNew; + pNew->pNext = p; + p->pPrior = pNew; + SELECTTRACE(2,pParse,p,("compound-subquery flattener" + " creates %u as peer\n",pNew->selId)); + } + assert( pSubitem->pSelect==0 ); + } + sqlite3DbFree(db, aCsrMap); + if( db->mallocFailed ){ + pSubitem->pSelect = pSub1; + return 1; + } + + /* Defer deleting the Table object associated with the + ** subquery until code generation is + ** complete, since there may still exist Expr.pTab entries that + ** refer to the subquery even after flattening. Ticket #3346. + ** + ** pSubitem->pTab is always non-NULL by test restrictions and tests above. + */ + if( ALWAYS(pSubitem->pTab!=0) ){ + Table *pTabToDel = pSubitem->pTab; + if( pTabToDel->nTabRef==1 ){ + Parse *pToplevel = sqlite3ParseToplevel(pParse); + sqlite3ParserAddCleanup(pToplevel, + (void(*)(sqlite3*,void*))sqlite3DeleteTable, + pTabToDel); + testcase( pToplevel->earlyCleanup ); + }else{ + pTabToDel->nTabRef--; + } + pSubitem->pTab = 0; + } + + /* The following loop runs once for each term in a compound-subquery + ** flattening (as described above). If we are doing a different kind + ** of flattening - a flattening other than a compound-subquery flattening - + ** then this loop only runs once. + ** + ** This loop moves all of the FROM elements of the subquery into the + ** the FROM clause of the outer query. Before doing this, remember + ** the cursor number for the original outer query FROM element in + ** iParent. The iParent cursor will never be used. Subsequent code + ** will scan expressions looking for iParent references and replace + ** those references with expressions that resolve to the subquery FROM + ** elements we are now copying in. + */ + pSub = pSub1; + for(pParent=p; pParent; pParent=pParent->pPrior, pSub=pSub->pPrior){ + int nSubSrc; + u8 jointype = 0; + assert( pSub!=0 ); + pSubSrc = pSub->pSrc; /* FROM clause of subquery */ + nSubSrc = pSubSrc->nSrc; /* Number of terms in subquery FROM clause */ + pSrc = pParent->pSrc; /* FROM clause of the outer query */ + + if( pParent==p ){ + jointype = pSubitem->fg.jointype; /* First time through the loop */ + } + + /* The subquery uses a single slot of the FROM clause of the outer + ** query. If the subquery has more than one element in its FROM clause, + ** then expand the outer query to make space for it to hold all elements + ** of the subquery. + ** + ** Example: + ** + ** SELECT * FROM tabA, (SELECT * FROM sub1, sub2), tabB; + ** + ** The outer query has 3 slots in its FROM clause. One slot of the + ** outer query (the middle slot) is used by the subquery. The next + ** block of code will expand the outer query FROM clause to 4 slots. + ** The middle slot is expanded to two slots in order to make space + ** for the two elements in the FROM clause of the subquery. + */ + if( nSubSrc>1 ){ + pSrc = sqlite3SrcListEnlarge(pParse, pSrc, nSubSrc-1,iFrom+1); + if( pSrc==0 ) break; + pParent->pSrc = pSrc; + } + + /* Transfer the FROM clause terms from the subquery into the + ** outer query. + */ + for(i=0; ia[i+iFrom].pUsing); + assert( pSrc->a[i+iFrom].fg.isTabFunc==0 ); + pSrc->a[i+iFrom] = pSubSrc->a[i]; + iNewParent = pSubSrc->a[i].iCursor; + memset(&pSubSrc->a[i], 0, sizeof(pSubSrc->a[i])); + } + pSrc->a[iFrom].fg.jointype = jointype; + + /* Now begin substituting subquery result set expressions for + ** references to the iParent in the outer query. + ** + ** Example: + ** + ** SELECT a+5, b*10 FROM (SELECT x*3 AS a, y+10 AS b FROM t1) WHERE a>b; + ** \ \_____________ subquery __________/ / + ** \_____________________ outer query ______________________________/ + ** + ** We look at every expression in the outer query and every place we see + ** "a" we substitute "x*3" and every place we see "b" we substitute "y+10". + */ + if( pSub->pOrderBy && (pParent->selFlags & SF_NoopOrderBy)==0 ){ + /* At this point, any non-zero iOrderByCol values indicate that the + ** ORDER BY column expression is identical to the iOrderByCol'th + ** expression returned by SELECT statement pSub. Since these values + ** do not necessarily correspond to columns in SELECT statement pParent, + ** zero them before transfering the ORDER BY clause. + ** + ** Not doing this may cause an error if a subsequent call to this + ** function attempts to flatten a compound sub-query into pParent + ** (the only way this can happen is if the compound sub-query is + ** currently part of pSub->pSrc). See ticket [d11a6e908f]. */ + ExprList *pOrderBy = pSub->pOrderBy; + for(i=0; inExpr; i++){ + pOrderBy->a[i].u.x.iOrderByCol = 0; + } + assert( pParent->pOrderBy==0 ); + pParent->pOrderBy = pOrderBy; + pSub->pOrderBy = 0; + } + pWhere = pSub->pWhere; + pSub->pWhere = 0; + if( isLeftJoin>0 ){ + sqlite3SetJoinExpr(pWhere, iNewParent); + } + if( pWhere ){ + if( pParent->pWhere ){ + pParent->pWhere = sqlite3PExpr(pParse, TK_AND, pWhere, pParent->pWhere); + }else{ + pParent->pWhere = pWhere; + } + } + if( db->mallocFailed==0 ){ + SubstContext x; + x.pParse = pParse; + x.iTable = iParent; + x.iNewTable = iNewParent; + x.isLeftJoin = isLeftJoin; + x.pEList = pSub->pEList; + substSelect(&x, pParent, 0); + } + + /* The flattened query is a compound if either the inner or the + ** outer query is a compound. */ + pParent->selFlags |= pSub->selFlags & SF_Compound; + assert( (pSub->selFlags & SF_Distinct)==0 ); /* restriction (17b) */ + + /* + ** SELECT ... FROM (SELECT ... LIMIT a OFFSET b) LIMIT x OFFSET y; + ** + ** One is tempted to try to add a and b to combine the limits. But this + ** does not work if either limit is negative. + */ + if( pSub->pLimit ){ + pParent->pLimit = pSub->pLimit; + pSub->pLimit = 0; + } + + /* Recompute the SrcList_item.colUsed masks for the flattened + ** tables. */ + for(i=0; ia[i+iFrom]); + } + } + + /* Finially, delete what is left of the subquery and return + ** success. + */ + sqlite3AggInfoPersistWalkerInit(&w, pParse); + sqlite3WalkSelect(&w,pSub1); + sqlite3SelectDelete(db, pSub1); + +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x100 ){ + SELECTTRACE(0x100,pParse,p,("After flattening:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + + return 1; +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + +/* +** A structure to keep track of all of the column values that are fixed to +** a known value due to WHERE clause constraints of the form COLUMN=VALUE. +*/ +typedef struct WhereConst WhereConst; +struct WhereConst { + Parse *pParse; /* Parsing context */ + u8 *pOomFault; /* Pointer to pParse->db->mallocFailed */ + int nConst; /* Number for COLUMN=CONSTANT terms */ + int nChng; /* Number of times a constant is propagated */ + int bHasAffBlob; /* At least one column in apExpr[] as affinity BLOB */ + Expr **apExpr; /* [i*2] is COLUMN and [i*2+1] is VALUE */ +}; + +/* +** Add a new entry to the pConst object. Except, do not add duplicate +** pColumn entires. Also, do not add if doing so would not be appropriate. +** +** The caller guarantees the pColumn is a column and pValue is a constant. +** This routine has to do some additional checks before completing the +** insert. +*/ +static void constInsert( + WhereConst *pConst, /* The WhereConst into which we are inserting */ + Expr *pColumn, /* The COLUMN part of the constraint */ + Expr *pValue, /* The VALUE part of the constraint */ + Expr *pExpr /* Overall expression: COLUMN=VALUE or VALUE=COLUMN */ +){ + int i; + assert( pColumn->op==TK_COLUMN ); + assert( sqlite3ExprIsConstant(pValue) ); + + if( ExprHasProperty(pColumn, EP_FixedCol) ) return; + if( sqlite3ExprAffinity(pValue)!=0 ) return; + if( !sqlite3IsBinary(sqlite3ExprCompareCollSeq(pConst->pParse,pExpr)) ){ + return; + } + + /* 2018-10-25 ticket [cf5ed20f] + ** Make sure the same pColumn is not inserted more than once */ + for(i=0; inConst; i++){ + const Expr *pE2 = pConst->apExpr[i*2]; + assert( pE2->op==TK_COLUMN ); + if( pE2->iTable==pColumn->iTable + && pE2->iColumn==pColumn->iColumn + ){ + return; /* Already present. Return without doing anything. */ + } + } + if( sqlite3ExprAffinity(pColumn)==SQLITE_AFF_BLOB ){ + pConst->bHasAffBlob = 1; + } + + pConst->nConst++; + pConst->apExpr = sqlite3DbReallocOrFree(pConst->pParse->db, pConst->apExpr, + pConst->nConst*2*sizeof(Expr*)); + if( pConst->apExpr==0 ){ + pConst->nConst = 0; + }else{ + pConst->apExpr[pConst->nConst*2-2] = pColumn; + pConst->apExpr[pConst->nConst*2-1] = pValue; + } +} + +/* +** Find all terms of COLUMN=VALUE or VALUE=COLUMN in pExpr where VALUE +** is a constant expression and where the term must be true because it +** is part of the AND-connected terms of the expression. For each term +** found, add it to the pConst structure. +*/ +static void findConstInWhere(WhereConst *pConst, Expr *pExpr){ + Expr *pRight, *pLeft; + if( NEVER(pExpr==0) ) return; + if( ExprHasProperty(pExpr, EP_FromJoin) ) return; + if( pExpr->op==TK_AND ){ + findConstInWhere(pConst, pExpr->pRight); + findConstInWhere(pConst, pExpr->pLeft); + return; + } + if( pExpr->op!=TK_EQ ) return; + pRight = pExpr->pRight; + pLeft = pExpr->pLeft; + assert( pRight!=0 ); + assert( pLeft!=0 ); + if( pRight->op==TK_COLUMN && sqlite3ExprIsConstant(pLeft) ){ + constInsert(pConst,pRight,pLeft,pExpr); + } + if( pLeft->op==TK_COLUMN && sqlite3ExprIsConstant(pRight) ){ + constInsert(pConst,pLeft,pRight,pExpr); + } +} + +/* +** This is a helper function for Walker callback propagateConstantExprRewrite(). +** +** Argument pExpr is a candidate expression to be replaced by a value. If +** pExpr is equivalent to one of the columns named in pWalker->u.pConst, +** then overwrite it with the corresponding value. Except, do not do so +** if argument bIgnoreAffBlob is non-zero and the affinity of pExpr +** is SQLITE_AFF_BLOB. +*/ +static int propagateConstantExprRewriteOne( + WhereConst *pConst, + Expr *pExpr, + int bIgnoreAffBlob +){ + int i; + if( pConst->pOomFault[0] ) return WRC_Prune; + if( pExpr->op!=TK_COLUMN ) return WRC_Continue; + if( ExprHasProperty(pExpr, EP_FixedCol|EP_FromJoin) ){ + testcase( ExprHasProperty(pExpr, EP_FixedCol) ); + testcase( ExprHasProperty(pExpr, EP_FromJoin) ); + return WRC_Continue; + } + for(i=0; inConst; i++){ + Expr *pColumn = pConst->apExpr[i*2]; + if( pColumn==pExpr ) continue; + if( pColumn->iTable!=pExpr->iTable ) continue; + if( pColumn->iColumn!=pExpr->iColumn ) continue; + if( bIgnoreAffBlob && sqlite3ExprAffinity(pColumn)==SQLITE_AFF_BLOB ){ + break; + } + /* A match is found. Add the EP_FixedCol property */ + pConst->nChng++; + ExprClearProperty(pExpr, EP_Leaf); + ExprSetProperty(pExpr, EP_FixedCol); + assert( pExpr->pLeft==0 ); + pExpr->pLeft = sqlite3ExprDup(pConst->pParse->db, pConst->apExpr[i*2+1], 0); + if( pConst->pParse->db->mallocFailed ) return WRC_Prune; + break; + } + return WRC_Prune; +} + +/* +** This is a Walker expression callback. pExpr is a node from the WHERE +** clause of a SELECT statement. This function examines pExpr to see if +** any substitutions based on the contents of pWalker->u.pConst should +** be made to pExpr or its immediate children. +** +** A substitution is made if: +** +** + pExpr is a column with an affinity other than BLOB that matches +** one of the columns in pWalker->u.pConst, or +** +** + pExpr is a binary comparison operator (=, <=, >=, <, >) that +** uses an affinity other than TEXT and one of its immediate +** children is a column that matches one of the columns in +** pWalker->u.pConst. +*/ +static int propagateConstantExprRewrite(Walker *pWalker, Expr *pExpr){ + WhereConst *pConst = pWalker->u.pConst; + assert( TK_GT==TK_EQ+1 ); + assert( TK_LE==TK_EQ+2 ); + assert( TK_LT==TK_EQ+3 ); + assert( TK_GE==TK_EQ+4 ); + if( pConst->bHasAffBlob ){ + if( (pExpr->op>=TK_EQ && pExpr->op<=TK_GE) + || pExpr->op==TK_IS + ){ + propagateConstantExprRewriteOne(pConst, pExpr->pLeft, 0); + if( pConst->pOomFault[0] ) return WRC_Prune; + if( sqlite3ExprAffinity(pExpr->pLeft)!=SQLITE_AFF_TEXT ){ + propagateConstantExprRewriteOne(pConst, pExpr->pRight, 0); + } + } + } + return propagateConstantExprRewriteOne(pConst, pExpr, pConst->bHasAffBlob); +} + +/* +** The WHERE-clause constant propagation optimization. +** +** If the WHERE clause contains terms of the form COLUMN=CONSTANT or +** CONSTANT=COLUMN that are top-level AND-connected terms that are not +** part of a ON clause from a LEFT JOIN, then throughout the query +** replace all other occurrences of COLUMN with CONSTANT. +** +** For example, the query: +** +** SELECT * FROM t1, t2, t3 WHERE t1.a=39 AND t2.b=t1.a AND t3.c=t2.b +** +** Is transformed into +** +** SELECT * FROM t1, t2, t3 WHERE t1.a=39 AND t2.b=39 AND t3.c=39 +** +** Return true if any transformations where made and false if not. +** +** Implementation note: Constant propagation is tricky due to affinity +** and collating sequence interactions. Consider this example: +** +** CREATE TABLE t1(a INT,b TEXT); +** INSERT INTO t1 VALUES(123,'0123'); +** SELECT * FROM t1 WHERE a=123 AND b=a; +** SELECT * FROM t1 WHERE a=123 AND b=123; +** +** The two SELECT statements above should return different answers. b=a +** is alway true because the comparison uses numeric affinity, but b=123 +** is false because it uses text affinity and '0123' is not the same as '123'. +** To work around this, the expression tree is not actually changed from +** "b=a" to "b=123" but rather the "a" in "b=a" is tagged with EP_FixedCol +** and the "123" value is hung off of the pLeft pointer. Code generator +** routines know to generate the constant "123" instead of looking up the +** column value. Also, to avoid collation problems, this optimization is +** only attempted if the "a=123" term uses the default BINARY collation. +** +** 2021-05-25 forum post 6a06202608: Another troublesome case is... +** +** CREATE TABLE t1(x); +** INSERT INTO t1 VALUES(10.0); +** SELECT 1 FROM t1 WHERE x=10 AND x LIKE 10; +** +** The query should return no rows, because the t1.x value is '10.0' not '10' +** and '10.0' is not LIKE '10'. But if we are not careful, the first WHERE +** term "x=10" will cause the second WHERE term to become "10 LIKE 10", +** resulting in a false positive. To avoid this, constant propagation for +** columns with BLOB affinity is only allowed if the constant is used with +** operators ==, <=, <, >=, >, or IS in a way that will cause the correct +** type conversions to occur. See logic associated with the bHasAffBlob flag +** for details. +*/ +static int propagateConstants( + Parse *pParse, /* The parsing context */ + Select *p /* The query in which to propagate constants */ +){ + WhereConst x; + Walker w; + int nChng = 0; + x.pParse = pParse; + x.pOomFault = &pParse->db->mallocFailed; + do{ + x.nConst = 0; + x.nChng = 0; + x.apExpr = 0; + x.bHasAffBlob = 0; + findConstInWhere(&x, p->pWhere); + if( x.nConst ){ + memset(&w, 0, sizeof(w)); + w.pParse = pParse; + w.xExprCallback = propagateConstantExprRewrite; + w.xSelectCallback = sqlite3SelectWalkNoop; + w.xSelectCallback2 = 0; + w.walkerDepth = 0; + w.u.pConst = &x; + sqlite3WalkExpr(&w, p->pWhere); + sqlite3DbFree(x.pParse->db, x.apExpr); + nChng += x.nChng; + } + }while( x.nChng ); + return nChng; +} + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +# if !defined(SQLITE_OMIT_WINDOWFUNC) +/* +** This function is called to determine whether or not it is safe to +** push WHERE clause expression pExpr down to FROM clause sub-query +** pSubq, which contains at least one window function. Return 1 +** if it is safe and the expression should be pushed down, or 0 +** otherwise. +** +** It is only safe to push the expression down if it consists only +** of constants and copies of expressions that appear in the PARTITION +** BY clause of all window function used by the sub-query. It is safe +** to filter out entire partitions, but not rows within partitions, as +** this may change the results of the window functions. +** +** At the time this function is called it is guaranteed that +** +** * the sub-query uses only one distinct window frame, and +** * that the window frame has a PARTITION BY clase. +*/ +static int pushDownWindowCheck(Parse *pParse, Select *pSubq, Expr *pExpr){ + assert( pSubq->pWin->pPartition ); + assert( (pSubq->selFlags & SF_MultiPart)==0 ); + assert( pSubq->pPrior==0 ); + return sqlite3ExprIsConstantOrGroupBy(pParse, pExpr, pSubq->pWin->pPartition); +} +# endif /* SQLITE_OMIT_WINDOWFUNC */ +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +/* +** Make copies of relevant WHERE clause terms of the outer query into +** the WHERE clause of subquery. Example: +** +** SELECT * FROM (SELECT a AS x, c-d AS y FROM t1) WHERE x=5 AND y=10; +** +** Transformed into: +** +** SELECT * FROM (SELECT a AS x, c-d AS y FROM t1 WHERE a=5 AND c-d=10) +** WHERE x=5 AND y=10; +** +** The hope is that the terms added to the inner query will make it more +** efficient. +** +** Do not attempt this optimization if: +** +** (1) (** This restriction was removed on 2017-09-29. We used to +** disallow this optimization for aggregate subqueries, but now +** it is allowed by putting the extra terms on the HAVING clause. +** The added HAVING clause is pointless if the subquery lacks +** a GROUP BY clause. But such a HAVING clause is also harmless +** so there does not appear to be any reason to add extra logic +** to suppress it. **) +** +** (2) The inner query is the recursive part of a common table expression. +** +** (3) The inner query has a LIMIT clause (since the changes to the WHERE +** clause would change the meaning of the LIMIT). +** +** (4) The inner query is the right operand of a LEFT JOIN and the +** expression to be pushed down does not come from the ON clause +** on that LEFT JOIN. +** +** (5) The WHERE clause expression originates in the ON or USING clause +** of a LEFT JOIN where iCursor is not the right-hand table of that +** left join. An example: +** +** SELECT * +** FROM (SELECT 1 AS a1 UNION ALL SELECT 2) AS aa +** JOIN (SELECT 1 AS b2 UNION ALL SELECT 2) AS bb ON (a1=b2) +** LEFT JOIN (SELECT 8 AS c3 UNION ALL SELECT 9) AS cc ON (b2=2); +** +** The correct answer is three rows: (1,1,NULL),(2,2,8),(2,2,9). +** But if the (b2=2) term were to be pushed down into the bb subquery, +** then the (1,1,NULL) row would be suppressed. +** +** (6) Window functions make things tricky as changes to the WHERE clause +** of the inner query could change the window over which window +** functions are calculated. Therefore, do not attempt the optimization +** if: +** +** (6a) The inner query uses multiple incompatible window partitions. +** +** (6b) The inner query is a compound and uses window-functions. +** +** (6c) The WHERE clause does not consist entirely of constants and +** copies of expressions found in the PARTITION BY clause of +** all window-functions used by the sub-query. It is safe to +** filter out entire partitions, as this does not change the +** window over which any window-function is calculated. +** +** (7) The inner query is a Common Table Expression (CTE) that should +** be materialized. (This restriction is implemented in the calling +** routine.) +** +** Return 0 if no changes are made and non-zero if one or more WHERE clause +** terms are duplicated into the subquery. +*/ +static int pushDownWhereTerms( + Parse *pParse, /* Parse context (for malloc() and error reporting) */ + Select *pSubq, /* The subquery whose WHERE clause is to be augmented */ + Expr *pWhere, /* The WHERE clause of the outer query */ + SrcItem *pSrc /* The subquery term of the outer FROM clause */ +){ + Expr *pNew; + int nChng = 0; + if( pWhere==0 ) return 0; + if( pSubq->selFlags & (SF_Recursive|SF_MultiPart) ) return 0; + +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pSubq->pPrior ){ + Select *pSel; + for(pSel=pSubq; pSel; pSel=pSel->pPrior){ + if( pSel->pWin ) return 0; /* restriction (6b) */ + } + }else{ + if( pSubq->pWin && pSubq->pWin->pPartition==0 ) return 0; + } +#endif + +#ifdef SQLITE_DEBUG + /* Only the first term of a compound can have a WITH clause. But make + ** sure no other terms are marked SF_Recursive in case something changes + ** in the future. + */ + { + Select *pX; + for(pX=pSubq; pX; pX=pX->pPrior){ + assert( (pX->selFlags & (SF_Recursive))==0 ); + } + } +#endif + + if( pSubq->pLimit!=0 ){ + return 0; /* restriction (3) */ + } + while( pWhere->op==TK_AND ){ + nChng += pushDownWhereTerms(pParse, pSubq, pWhere->pRight, pSrc); + pWhere = pWhere->pLeft; + } + +#if 0 /* Legacy code. Checks now done by sqlite3ExprIsTableConstraint() */ + if( isLeftJoin + && (ExprHasProperty(pWhere,EP_FromJoin)==0 + || pWhere->w.iRightJoinTable!=iCursor) + ){ + return 0; /* restriction (4) */ + } + if( ExprHasProperty(pWhere,EP_FromJoin) + && pWhere->w.iRightJoinTable!=iCursor + ){ + return 0; /* restriction (5) */ + } +#endif + + if( sqlite3ExprIsTableConstraint(pWhere, pSrc) ){ + nChng++; + pSubq->selFlags |= SF_PushDown; + while( pSubq ){ + SubstContext x; + pNew = sqlite3ExprDup(pParse->db, pWhere, 0); + unsetJoinExpr(pNew, -1); + x.pParse = pParse; + x.iTable = pSrc->iCursor; + x.iNewTable = pSrc->iCursor; + x.isLeftJoin = 0; + x.pEList = pSubq->pEList; + pNew = substExpr(&x, pNew); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pSubq->pWin && 0==pushDownWindowCheck(pParse, pSubq, pNew) ){ + /* Restriction 6c has prevented push-down in this case */ + sqlite3ExprDelete(pParse->db, pNew); + nChng--; + break; + } +#endif + if( pSubq->selFlags & SF_Aggregate ){ + pSubq->pHaving = sqlite3ExprAnd(pParse, pSubq->pHaving, pNew); + }else{ + pSubq->pWhere = sqlite3ExprAnd(pParse, pSubq->pWhere, pNew); + } + pSubq = pSubq->pPrior; + } + } + return nChng; +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + +/* +** The pFunc is the only aggregate function in the query. Check to see +** if the query is a candidate for the min/max optimization. +** +** If the query is a candidate for the min/max optimization, then set +** *ppMinMax to be an ORDER BY clause to be used for the optimization +** and return either WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX depending on +** whether pFunc is a min() or max() function. +** +** If the query is not a candidate for the min/max optimization, return +** WHERE_ORDERBY_NORMAL (which must be zero). +** +** This routine must be called after aggregate functions have been +** located but before their arguments have been subjected to aggregate +** analysis. +*/ +static u8 minMaxQuery(sqlite3 *db, Expr *pFunc, ExprList **ppMinMax){ + int eRet = WHERE_ORDERBY_NORMAL; /* Return value */ + ExprList *pEList; /* Arguments to agg function */ + const char *zFunc; /* Name of aggregate function pFunc */ + ExprList *pOrderBy; + u8 sortFlags = 0; + + assert( *ppMinMax==0 ); + assert( pFunc->op==TK_AGG_FUNCTION ); + assert( !IsWindowFunc(pFunc) ); + assert( ExprUseXList(pFunc) ); + pEList = pFunc->x.pList; + if( pEList==0 + || pEList->nExpr!=1 + || ExprHasProperty(pFunc, EP_WinFunc) + || OptimizationDisabled(db, SQLITE_MinMaxOpt) + ){ + return eRet; + } + assert( !ExprHasProperty(pFunc, EP_IntValue) ); + zFunc = pFunc->u.zToken; + if( sqlite3StrICmp(zFunc, "min")==0 ){ + eRet = WHERE_ORDERBY_MIN; + if( sqlite3ExprCanBeNull(pEList->a[0].pExpr) ){ + sortFlags = KEYINFO_ORDER_BIGNULL; + } + }else if( sqlite3StrICmp(zFunc, "max")==0 ){ + eRet = WHERE_ORDERBY_MAX; + sortFlags = KEYINFO_ORDER_DESC; + }else{ + return eRet; + } + *ppMinMax = pOrderBy = sqlite3ExprListDup(db, pEList, 0); + assert( pOrderBy!=0 || db->mallocFailed ); + if( pOrderBy ) pOrderBy->a[0].sortFlags = sortFlags; + return eRet; +} + +/* +** The select statement passed as the first argument is an aggregate query. +** The second argument is the associated aggregate-info object. This +** function tests if the SELECT is of the form: +** +** SELECT count(*) FROM +** +** where table is a database table, not a sub-select or view. If the query +** does match this pattern, then a pointer to the Table object representing +** is returned. Otherwise, NULL is returned. +** +** This routine checks to see if it is safe to use the count optimization. +** A correct answer is still obtained (though perhaps more slowly) if +** this routine returns NULL when it could have returned a table pointer. +** But returning the pointer when NULL should have been returned can +** result in incorrect answers and/or crashes. So, when in doubt, return NULL. +*/ +static Table *isSimpleCount(Select *p, AggInfo *pAggInfo){ + Table *pTab; + Expr *pExpr; + + assert( !p->pGroupBy ); + + if( p->pWhere + || p->pEList->nExpr!=1 + || p->pSrc->nSrc!=1 + || p->pSrc->a[0].pSelect + || pAggInfo->nFunc!=1 + ){ + return 0; + } + pTab = p->pSrc->a[0].pTab; + assert( pTab!=0 ); + assert( !IsView(pTab) ); + if( !IsOrdinaryTable(pTab) ) return 0; + pExpr = p->pEList->a[0].pExpr; + assert( pExpr!=0 ); + if( pExpr->op!=TK_AGG_FUNCTION ) return 0; + if( pExpr->pAggInfo!=pAggInfo ) return 0; + if( (pAggInfo->aFunc[0].pFunc->funcFlags&SQLITE_FUNC_COUNT)==0 ) return 0; + assert( pAggInfo->aFunc[0].pFExpr==pExpr ); + testcase( ExprHasProperty(pExpr, EP_Distinct) ); + testcase( ExprHasProperty(pExpr, EP_WinFunc) ); + if( ExprHasProperty(pExpr, EP_Distinct|EP_WinFunc) ) return 0; + + return pTab; +} + +/* +** If the source-list item passed as an argument was augmented with an +** INDEXED BY clause, then try to locate the specified index. If there +** was such a clause and the named index cannot be found, return +** SQLITE_ERROR and leave an error in pParse. Otherwise, populate +** pFrom->pIndex and return SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *pParse, SrcItem *pFrom){ + Table *pTab = pFrom->pTab; + char *zIndexedBy = pFrom->u1.zIndexedBy; + Index *pIdx; + assert( pTab!=0 ); + assert( pFrom->fg.isIndexedBy!=0 ); + + for(pIdx=pTab->pIndex; + pIdx && sqlite3StrICmp(pIdx->zName, zIndexedBy); + pIdx=pIdx->pNext + ); + if( !pIdx ){ + sqlite3ErrorMsg(pParse, "no such index: %s", zIndexedBy, 0); + pParse->checkSchema = 1; + return SQLITE_ERROR; + } + assert( pFrom->fg.isCte==0 ); + pFrom->u2.pIBIndex = pIdx; + return SQLITE_OK; +} + +/* +** Detect compound SELECT statements that use an ORDER BY clause with +** an alternative collating sequence. +** +** SELECT ... FROM t1 EXCEPT SELECT ... FROM t2 ORDER BY .. COLLATE ... +** +** These are rewritten as a subquery: +** +** SELECT * FROM (SELECT ... FROM t1 EXCEPT SELECT ... FROM t2) +** ORDER BY ... COLLATE ... +** +** This transformation is necessary because the multiSelectOrderBy() routine +** above that generates the code for a compound SELECT with an ORDER BY clause +** uses a merge algorithm that requires the same collating sequence on the +** result columns as on the ORDER BY clause. See ticket +** http://www.sqlite.org/src/info/6709574d2a +** +** This transformation is only needed for EXCEPT, INTERSECT, and UNION. +** The UNION ALL operator works fine with multiSelectOrderBy() even when +** there are COLLATE terms in the ORDER BY. +*/ +static int convertCompoundSelectToSubquery(Walker *pWalker, Select *p){ + int i; + Select *pNew; + Select *pX; + sqlite3 *db; + struct ExprList_item *a; + SrcList *pNewSrc; + Parse *pParse; + Token dummy; + + if( p->pPrior==0 ) return WRC_Continue; + if( p->pOrderBy==0 ) return WRC_Continue; + for(pX=p; pX && (pX->op==TK_ALL || pX->op==TK_SELECT); pX=pX->pPrior){} + if( pX==0 ) return WRC_Continue; + a = p->pOrderBy->a; +#ifndef SQLITE_OMIT_WINDOWFUNC + /* If iOrderByCol is already non-zero, then it has already been matched + ** to a result column of the SELECT statement. This occurs when the + ** SELECT is rewritten for window-functions processing and then passed + ** to sqlite3SelectPrep() and similar a second time. The rewriting done + ** by this function is not required in this case. */ + if( a[0].u.x.iOrderByCol ) return WRC_Continue; +#endif + for(i=p->pOrderBy->nExpr-1; i>=0; i--){ + if( a[i].pExpr->flags & EP_Collate ) break; + } + if( i<0 ) return WRC_Continue; + + /* If we reach this point, that means the transformation is required. */ + + pParse = pWalker->pParse; + db = pParse->db; + pNew = sqlite3DbMallocZero(db, sizeof(*pNew) ); + if( pNew==0 ) return WRC_Abort; + memset(&dummy, 0, sizeof(dummy)); + pNewSrc = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&dummy,pNew,0,0); + if( pNewSrc==0 ) return WRC_Abort; + *pNew = *p; + p->pSrc = pNewSrc; + p->pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ASTERISK, 0)); + p->op = TK_SELECT; + p->pWhere = 0; + pNew->pGroupBy = 0; + pNew->pHaving = 0; + pNew->pOrderBy = 0; + p->pPrior = 0; + p->pNext = 0; + p->pWith = 0; +#ifndef SQLITE_OMIT_WINDOWFUNC + p->pWinDefn = 0; +#endif + p->selFlags &= ~SF_Compound; + assert( (p->selFlags & SF_Converted)==0 ); + p->selFlags |= SF_Converted; + assert( pNew->pPrior!=0 ); + pNew->pPrior->pNext = pNew; + pNew->pLimit = 0; + return WRC_Continue; +} + +/* +** Check to see if the FROM clause term pFrom has table-valued function +** arguments. If it does, leave an error message in pParse and return +** non-zero, since pFrom is not allowed to be a table-valued function. +*/ +static int cannotBeFunction(Parse *pParse, SrcItem *pFrom){ + if( pFrom->fg.isTabFunc ){ + sqlite3ErrorMsg(pParse, "'%s' is not a function", pFrom->zName); + return 1; + } + return 0; +} + +#ifndef SQLITE_OMIT_CTE +/* +** Argument pWith (which may be NULL) points to a linked list of nested +** WITH contexts, from inner to outermost. If the table identified by +** FROM clause element pItem is really a common-table-expression (CTE) +** then return a pointer to the CTE definition for that table. Otherwise +** return NULL. +** +** If a non-NULL value is returned, set *ppContext to point to the With +** object that the returned CTE belongs to. +*/ +static struct Cte *searchWith( + With *pWith, /* Current innermost WITH clause */ + SrcItem *pItem, /* FROM clause element to resolve */ + With **ppContext /* OUT: WITH clause return value belongs to */ +){ + const char *zName = pItem->zName; + With *p; + assert( pItem->zDatabase==0 ); + assert( zName!=0 ); + for(p=pWith; p; p=p->pOuter){ + int i; + for(i=0; inCte; i++){ + if( sqlite3StrICmp(zName, p->a[i].zName)==0 ){ + *ppContext = p; + return &p->a[i]; + } + } + if( p->bView ) break; + } + return 0; +} + +/* The code generator maintains a stack of active WITH clauses +** with the inner-most WITH clause being at the top of the stack. +** +** This routine pushes the WITH clause passed as the second argument +** onto the top of the stack. If argument bFree is true, then this +** WITH clause will never be popped from the stack but should instead +** be freed along with the Parse object. In other cases, when +** bFree==0, the With object will be freed along with the SELECT +** statement with which it is associated. +** +** This routine returns a copy of pWith. Or, if bFree is true and +** the pWith object is destroyed immediately due to an OOM condition, +** then this routine return NULL. +** +** If bFree is true, do not continue to use the pWith pointer after +** calling this routine, Instead, use only the return value. +*/ +SQLITE_PRIVATE With *sqlite3WithPush(Parse *pParse, With *pWith, u8 bFree){ + if( pWith ){ + if( bFree ){ + pWith = (With*)sqlite3ParserAddCleanup(pParse, + (void(*)(sqlite3*,void*))sqlite3WithDelete, + pWith); + if( pWith==0 ) return 0; + } + if( pParse->nErr==0 ){ + assert( pParse->pWith!=pWith ); + pWith->pOuter = pParse->pWith; + pParse->pWith = pWith; + } + } + return pWith; +} + +/* +** This function checks if argument pFrom refers to a CTE declared by +** a WITH clause on the stack currently maintained by the parser (on the +** pParse->pWith linked list). And if currently processing a CTE +** CTE expression, through routine checks to see if the reference is +** a recursive reference to the CTE. +** +** If pFrom matches a CTE according to either of these two above, pFrom->pTab +** and other fields are populated accordingly. +** +** Return 0 if no match is found. +** Return 1 if a match is found. +** Return 2 if an error condition is detected. +*/ +static int resolveFromTermToCte( + Parse *pParse, /* The parsing context */ + Walker *pWalker, /* Current tree walker */ + SrcItem *pFrom /* The FROM clause term to check */ +){ + Cte *pCte; /* Matched CTE (or NULL if no match) */ + With *pWith; /* The matching WITH */ + + assert( pFrom->pTab==0 ); + if( pParse->pWith==0 ){ + /* There are no WITH clauses in the stack. No match is possible */ + return 0; + } + if( pParse->nErr ){ + /* Prior errors might have left pParse->pWith in a goofy state, so + ** go no further. */ + return 0; + } + if( pFrom->zDatabase!=0 ){ + /* The FROM term contains a schema qualifier (ex: main.t1) and so + ** it cannot possibly be a CTE reference. */ + return 0; + } + if( pFrom->fg.notCte ){ + /* The FROM term is specifically excluded from matching a CTE. + ** (1) It is part of a trigger that used to have zDatabase but had + ** zDatabase removed by sqlite3FixTriggerStep(). + ** (2) This is the first term in the FROM clause of an UPDATE. + */ + return 0; + } + pCte = searchWith(pParse->pWith, pFrom, &pWith); + if( pCte ){ + sqlite3 *db = pParse->db; + Table *pTab; + ExprList *pEList; + Select *pSel; + Select *pLeft; /* Left-most SELECT statement */ + Select *pRecTerm; /* Left-most recursive term */ + int bMayRecursive; /* True if compound joined by UNION [ALL] */ + With *pSavedWith; /* Initial value of pParse->pWith */ + int iRecTab = -1; /* Cursor for recursive table */ + CteUse *pCteUse; + + /* If pCte->zCteErr is non-NULL at this point, then this is an illegal + ** recursive reference to CTE pCte. Leave an error in pParse and return + ** early. If pCte->zCteErr is NULL, then this is not a recursive reference. + ** In this case, proceed. */ + if( pCte->zCteErr ){ + sqlite3ErrorMsg(pParse, pCte->zCteErr, pCte->zName); + return 2; + } + if( cannotBeFunction(pParse, pFrom) ) return 2; + + assert( pFrom->pTab==0 ); + pTab = sqlite3DbMallocZero(db, sizeof(Table)); + if( pTab==0 ) return 2; + pCteUse = pCte->pUse; + if( pCteUse==0 ){ + pCte->pUse = pCteUse = sqlite3DbMallocZero(db, sizeof(pCteUse[0])); + if( pCteUse==0 + || sqlite3ParserAddCleanup(pParse,sqlite3DbFree,pCteUse)==0 + ){ + sqlite3DbFree(db, pTab); + return 2; + } + pCteUse->eM10d = pCte->eM10d; + } + pFrom->pTab = pTab; + pTab->nTabRef = 1; + pTab->zName = sqlite3DbStrDup(db, pCte->zName); + pTab->iPKey = -1; + pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); + pTab->tabFlags |= TF_Ephemeral | TF_NoVisibleRowid; + pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0); + if( db->mallocFailed ) return 2; + pFrom->pSelect->selFlags |= SF_CopyCte; + assert( pFrom->pSelect ); + if( pFrom->fg.isIndexedBy ){ + sqlite3ErrorMsg(pParse, "no such index: \"%s\"", pFrom->u1.zIndexedBy); + return 2; + } + pFrom->fg.isCte = 1; + pFrom->u2.pCteUse = pCteUse; + pCteUse->nUse++; + if( pCteUse->nUse>=2 && pCteUse->eM10d==M10d_Any ){ + pCteUse->eM10d = M10d_Yes; + } + + /* Check if this is a recursive CTE. */ + pRecTerm = pSel = pFrom->pSelect; + bMayRecursive = ( pSel->op==TK_ALL || pSel->op==TK_UNION ); + while( bMayRecursive && pRecTerm->op==pSel->op ){ + int i; + SrcList *pSrc = pRecTerm->pSrc; + assert( pRecTerm->pPrior!=0 ); + for(i=0; inSrc; i++){ + SrcItem *pItem = &pSrc->a[i]; + if( pItem->zDatabase==0 + && pItem->zName!=0 + && 0==sqlite3StrICmp(pItem->zName, pCte->zName) + ){ + pItem->pTab = pTab; + pTab->nTabRef++; + pItem->fg.isRecursive = 1; + if( pRecTerm->selFlags & SF_Recursive ){ + sqlite3ErrorMsg(pParse, + "multiple references to recursive table: %s", pCte->zName + ); + return 2; + } + pRecTerm->selFlags |= SF_Recursive; + if( iRecTab<0 ) iRecTab = pParse->nTab++; + pItem->iCursor = iRecTab; + } + } + if( (pRecTerm->selFlags & SF_Recursive)==0 ) break; + pRecTerm = pRecTerm->pPrior; + } + + pCte->zCteErr = "circular reference: %s"; + pSavedWith = pParse->pWith; + pParse->pWith = pWith; + if( pSel->selFlags & SF_Recursive ){ + int rc; + assert( pRecTerm!=0 ); + assert( (pRecTerm->selFlags & SF_Recursive)==0 ); + assert( pRecTerm->pNext!=0 ); + assert( (pRecTerm->pNext->selFlags & SF_Recursive)!=0 ); + assert( pRecTerm->pWith==0 ); + pRecTerm->pWith = pSel->pWith; + rc = sqlite3WalkSelect(pWalker, pRecTerm); + pRecTerm->pWith = 0; + if( rc ){ + pParse->pWith = pSavedWith; + return 2; + } + }else{ + if( sqlite3WalkSelect(pWalker, pSel) ){ + pParse->pWith = pSavedWith; + return 2; + } + } + pParse->pWith = pWith; + + for(pLeft=pSel; pLeft->pPrior; pLeft=pLeft->pPrior); + pEList = pLeft->pEList; + if( pCte->pCols ){ + if( pEList && pEList->nExpr!=pCte->pCols->nExpr ){ + sqlite3ErrorMsg(pParse, "table %s has %d values for %d columns", + pCte->zName, pEList->nExpr, pCte->pCols->nExpr + ); + pParse->pWith = pSavedWith; + return 2; + } + pEList = pCte->pCols; + } + + sqlite3ColumnsFromExprList(pParse, pEList, &pTab->nCol, &pTab->aCol); + if( bMayRecursive ){ + if( pSel->selFlags & SF_Recursive ){ + pCte->zCteErr = "multiple recursive references: %s"; + }else{ + pCte->zCteErr = "recursive reference in a subquery: %s"; + } + sqlite3WalkSelect(pWalker, pSel); + } + pCte->zCteErr = 0; + pParse->pWith = pSavedWith; + return 1; /* Success */ + } + return 0; /* No match */ +} +#endif + +#ifndef SQLITE_OMIT_CTE +/* +** If the SELECT passed as the second argument has an associated WITH +** clause, pop it from the stack stored as part of the Parse object. +** +** This function is used as the xSelectCallback2() callback by +** sqlite3SelectExpand() when walking a SELECT tree to resolve table +** names and other FROM clause elements. +*/ +SQLITE_PRIVATE void sqlite3SelectPopWith(Walker *pWalker, Select *p){ + Parse *pParse = pWalker->pParse; + if( OK_IF_ALWAYS_TRUE(pParse->pWith) && p->pPrior==0 ){ + With *pWith = findRightmost(p)->pWith; + if( pWith!=0 ){ + assert( pParse->pWith==pWith || pParse->nErr ); + pParse->pWith = pWith->pOuter; + } + } +} +#endif + +/* +** The SrcList_item structure passed as the second argument represents a +** sub-query in the FROM clause of a SELECT statement. This function +** allocates and populates the SrcList_item.pTab object. If successful, +** SQLITE_OK is returned. Otherwise, if an OOM error is encountered, +** SQLITE_NOMEM. +*/ +SQLITE_PRIVATE int sqlite3ExpandSubquery(Parse *pParse, SrcItem *pFrom){ + Select *pSel = pFrom->pSelect; + Table *pTab; + + assert( pSel ); + pFrom->pTab = pTab = sqlite3DbMallocZero(pParse->db, sizeof(Table)); + if( pTab==0 ) return SQLITE_NOMEM; + pTab->nTabRef = 1; + if( pFrom->zAlias ){ + pTab->zName = sqlite3DbStrDup(pParse->db, pFrom->zAlias); + }else{ + pTab->zName = sqlite3MPrintf(pParse->db, "subquery_%u", pSel->selId); + } + while( pSel->pPrior ){ pSel = pSel->pPrior; } + sqlite3ColumnsFromExprList(pParse, pSel->pEList,&pTab->nCol,&pTab->aCol); + pTab->iPKey = -1; + pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); +#ifndef SQLITE_ALLOW_ROWID_IN_VIEW + /* The usual case - do not allow ROWID on a subquery */ + pTab->tabFlags |= TF_Ephemeral | TF_NoVisibleRowid; +#else + pTab->tabFlags |= TF_Ephemeral; /* Legacy compatibility mode */ +#endif + + + return pParse->nErr ? SQLITE_ERROR : SQLITE_OK; +} + +/* +** This routine is a Walker callback for "expanding" a SELECT statement. +** "Expanding" means to do the following: +** +** (1) Make sure VDBE cursor numbers have been assigned to every +** element of the FROM clause. +** +** (2) Fill in the pTabList->a[].pTab fields in the SrcList that +** defines FROM clause. When views appear in the FROM clause, +** fill pTabList->a[].pSelect with a copy of the SELECT statement +** that implements the view. A copy is made of the view's SELECT +** statement so that we can freely modify or delete that statement +** without worrying about messing up the persistent representation +** of the view. +** +** (3) Add terms to the WHERE clause to accommodate the NATURAL keyword +** on joins and the ON and USING clause of joins. +** +** (4) Scan the list of columns in the result set (pEList) looking +** for instances of the "*" operator or the TABLE.* operator. +** If found, expand each "*" to be every column in every table +** and TABLE.* to be every column in TABLE. +** +*/ +static int selectExpander(Walker *pWalker, Select *p){ + Parse *pParse = pWalker->pParse; + int i, j, k, rc; + SrcList *pTabList; + ExprList *pEList; + SrcItem *pFrom; + sqlite3 *db = pParse->db; + Expr *pE, *pRight, *pExpr; + u16 selFlags = p->selFlags; + u32 elistFlags = 0; + + p->selFlags |= SF_Expanded; + if( db->mallocFailed ){ + return WRC_Abort; + } + assert( p->pSrc!=0 ); + if( (selFlags & SF_Expanded)!=0 ){ + return WRC_Prune; + } + if( pWalker->eCode ){ + /* Renumber selId because it has been copied from a view */ + p->selId = ++pParse->nSelect; + } + pTabList = p->pSrc; + pEList = p->pEList; + if( pParse->pWith && (p->selFlags & SF_View) ){ + if( p->pWith==0 ){ + p->pWith = (With*)sqlite3DbMallocZero(db, sizeof(With)); + if( p->pWith==0 ){ + return WRC_Abort; + } + } + p->pWith->bView = 1; + } + sqlite3WithPush(pParse, p->pWith, 0); + + /* Make sure cursor numbers have been assigned to all entries in + ** the FROM clause of the SELECT statement. + */ + sqlite3SrcListAssignCursors(pParse, pTabList); + + /* Look up every table named in the FROM clause of the select. If + ** an entry of the FROM clause is a subquery instead of a table or view, + ** then create a transient table structure to describe the subquery. + */ + for(i=0, pFrom=pTabList->a; inSrc; i++, pFrom++){ + Table *pTab; + assert( pFrom->fg.isRecursive==0 || pFrom->pTab!=0 ); + if( pFrom->pTab ) continue; + assert( pFrom->fg.isRecursive==0 ); + if( pFrom->zName==0 ){ +#ifndef SQLITE_OMIT_SUBQUERY + Select *pSel = pFrom->pSelect; + /* A sub-query in the FROM clause of a SELECT */ + assert( pSel!=0 ); + assert( pFrom->pTab==0 ); + if( sqlite3WalkSelect(pWalker, pSel) ) return WRC_Abort; + if( sqlite3ExpandSubquery(pParse, pFrom) ) return WRC_Abort; +#endif +#ifndef SQLITE_OMIT_CTE + }else if( (rc = resolveFromTermToCte(pParse, pWalker, pFrom))!=0 ){ + if( rc>1 ) return WRC_Abort; + pTab = pFrom->pTab; + assert( pTab!=0 ); +#endif + }else{ + /* An ordinary table or view name in the FROM clause */ + assert( pFrom->pTab==0 ); + pFrom->pTab = pTab = sqlite3LocateTableItem(pParse, 0, pFrom); + if( pTab==0 ) return WRC_Abort; + if( pTab->nTabRef>=0xffff ){ + sqlite3ErrorMsg(pParse, "too many references to \"%s\": max 65535", + pTab->zName); + pFrom->pTab = 0; + return WRC_Abort; + } + pTab->nTabRef++; + if( !IsVirtual(pTab) && cannotBeFunction(pParse, pFrom) ){ + return WRC_Abort; + } +#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) + if( !IsOrdinaryTable(pTab) ){ + i16 nCol; + u8 eCodeOrig = pWalker->eCode; + if( sqlite3ViewGetColumnNames(pParse, pTab) ) return WRC_Abort; + assert( pFrom->pSelect==0 ); + if( IsView(pTab) ){ + if( (db->flags & SQLITE_EnableView)==0 + && pTab->pSchema!=db->aDb[1].pSchema + ){ + sqlite3ErrorMsg(pParse, "access to view \"%s\" prohibited", + pTab->zName); + } + pFrom->pSelect = sqlite3SelectDup(db, pTab->u.view.pSelect, 0); + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + else if( ALWAYS(IsVirtual(pTab)) + && pFrom->fg.fromDDL + && ALWAYS(pTab->u.vtab.p!=0) + && pTab->u.vtab.p->eVtabRisk > ((db->flags & SQLITE_TrustedSchema)!=0) + ){ + sqlite3ErrorMsg(pParse, "unsafe use of virtual table \"%s\"", + pTab->zName); + } + assert( SQLITE_VTABRISK_Normal==1 && SQLITE_VTABRISK_High==2 ); +#endif + nCol = pTab->nCol; + pTab->nCol = -1; + pWalker->eCode = 1; /* Turn on Select.selId renumbering */ + sqlite3WalkSelect(pWalker, pFrom->pSelect); + pWalker->eCode = eCodeOrig; + pTab->nCol = nCol; + } +#endif + } + + /* Locate the index named by the INDEXED BY clause, if any. */ + if( pFrom->fg.isIndexedBy && sqlite3IndexedByLookup(pParse, pFrom) ){ + return WRC_Abort; + } + } + + /* Process NATURAL keywords, and ON and USING clauses of joins. + */ + assert( db->mallocFailed==0 || pParse->nErr!=0 ); + if( pParse->nErr || sqliteProcessJoin(pParse, p) ){ + return WRC_Abort; + } + + /* For every "*" that occurs in the column list, insert the names of + ** all columns in all tables. And for every TABLE.* insert the names + ** of all columns in TABLE. The parser inserted a special expression + ** with the TK_ASTERISK operator for each "*" that it found in the column + ** list. The following code just has to locate the TK_ASTERISK + ** expressions and expand each one to the list of all columns in + ** all tables. + ** + ** The first loop just checks to see if there are any "*" operators + ** that need expanding. + */ + for(k=0; knExpr; k++){ + pE = pEList->a[k].pExpr; + if( pE->op==TK_ASTERISK ) break; + assert( pE->op!=TK_DOT || pE->pRight!=0 ); + assert( pE->op!=TK_DOT || (pE->pLeft!=0 && pE->pLeft->op==TK_ID) ); + if( pE->op==TK_DOT && pE->pRight->op==TK_ASTERISK ) break; + elistFlags |= pE->flags; + } + if( knExpr ){ + /* + ** If we get here it means the result set contains one or more "*" + ** operators that need to be expanded. Loop through each expression + ** in the result set and expand them one by one. + */ + struct ExprList_item *a = pEList->a; + ExprList *pNew = 0; + int flags = pParse->db->flags; + int longNames = (flags & SQLITE_FullColNames)!=0 + && (flags & SQLITE_ShortColNames)==0; + + for(k=0; knExpr; k++){ + pE = a[k].pExpr; + elistFlags |= pE->flags; + pRight = pE->pRight; + assert( pE->op!=TK_DOT || pRight!=0 ); + if( pE->op!=TK_ASTERISK + && (pE->op!=TK_DOT || pRight->op!=TK_ASTERISK) + ){ + /* This particular expression does not need to be expanded. + */ + pNew = sqlite3ExprListAppend(pParse, pNew, a[k].pExpr); + if( pNew ){ + pNew->a[pNew->nExpr-1].zEName = a[k].zEName; + pNew->a[pNew->nExpr-1].eEName = a[k].eEName; + a[k].zEName = 0; + } + a[k].pExpr = 0; + }else{ + /* This expression is a "*" or a "TABLE.*" and needs to be + ** expanded. */ + int tableSeen = 0; /* Set to 1 when TABLE matches */ + char *zTName = 0; /* text of name of TABLE */ + if( pE->op==TK_DOT ){ + assert( pE->pLeft!=0 ); + assert( !ExprHasProperty(pE->pLeft, EP_IntValue) ); + zTName = pE->pLeft->u.zToken; + } + for(i=0, pFrom=pTabList->a; inSrc; i++, pFrom++){ + Table *pTab = pFrom->pTab; + Select *pSub = pFrom->pSelect; + char *zTabName = pFrom->zAlias; + const char *zSchemaName = 0; + int iDb; + if( zTabName==0 ){ + zTabName = pTab->zName; + } + if( db->mallocFailed ) break; + if( pSub==0 || (pSub->selFlags & SF_NestedFrom)==0 ){ + pSub = 0; + if( zTName && sqlite3StrICmp(zTName, zTabName)!=0 ){ + continue; + } + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + zSchemaName = iDb>=0 ? db->aDb[iDb].zDbSName : "*"; + } + for(j=0; jnCol; j++){ + char *zName = pTab->aCol[j].zCnName; + char *zColname; /* The computed column name */ + char *zToFree; /* Malloced string that needs to be freed */ + Token sColname; /* Computed column name as a token */ + + assert( zName ); + if( zTName && pSub + && sqlite3MatchEName(&pSub->pEList->a[j], 0, zTName, 0)==0 + ){ + continue; + } + + /* If a column is marked as 'hidden', omit it from the expanded + ** result-set list unless the SELECT has the SF_IncludeHidden + ** bit set. + */ + if( (p->selFlags & SF_IncludeHidden)==0 + && IsHiddenColumn(&pTab->aCol[j]) + ){ + continue; + } + tableSeen = 1; + + if( i>0 && zTName==0 ){ + if( (pFrom->fg.jointype & JT_NATURAL)!=0 + && tableAndColumnIndex(pTabList, i, zName, 0, 0, 1) + ){ + /* In a NATURAL join, omit the join columns from the + ** table to the right of the join */ + continue; + } + if( sqlite3IdListIndex(pFrom->pUsing, zName)>=0 ){ + /* In a join with a USING clause, omit columns in the + ** using clause from the table on the right. */ + continue; + } + } + pRight = sqlite3Expr(db, TK_ID, zName); + zColname = zName; + zToFree = 0; + if( longNames || pTabList->nSrc>1 ){ + Expr *pLeft; + pLeft = sqlite3Expr(db, TK_ID, zTabName); + pExpr = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight); + if( zSchemaName ){ + pLeft = sqlite3Expr(db, TK_ID, zSchemaName); + pExpr = sqlite3PExpr(pParse, TK_DOT, pLeft, pExpr); + } + if( longNames ){ + zColname = sqlite3MPrintf(db, "%s.%s", zTabName, zName); + zToFree = zColname; + } + }else{ + pExpr = pRight; + } + pNew = sqlite3ExprListAppend(pParse, pNew, pExpr); + sqlite3TokenInit(&sColname, zColname); + sqlite3ExprListSetName(pParse, pNew, &sColname, 0); + if( pNew && (p->selFlags & SF_NestedFrom)!=0 && !IN_RENAME_OBJECT ){ + struct ExprList_item *pX = &pNew->a[pNew->nExpr-1]; + sqlite3DbFree(db, pX->zEName); + if( pSub ){ + pX->zEName = sqlite3DbStrDup(db, pSub->pEList->a[j].zEName); + testcase( pX->zEName==0 ); + }else{ + pX->zEName = sqlite3MPrintf(db, "%s.%s.%s", + zSchemaName, zTabName, zColname); + testcase( pX->zEName==0 ); + } + pX->eEName = ENAME_TAB; + } + sqlite3DbFree(db, zToFree); + } + } + if( !tableSeen ){ + if( zTName ){ + sqlite3ErrorMsg(pParse, "no such table: %s", zTName); + }else{ + sqlite3ErrorMsg(pParse, "no tables specified"); + } + } + } + } + sqlite3ExprListDelete(db, pEList); + p->pEList = pNew; + } + if( p->pEList ){ + if( p->pEList->nExpr>db->aLimit[SQLITE_LIMIT_COLUMN] ){ + sqlite3ErrorMsg(pParse, "too many columns in result set"); + return WRC_Abort; + } + if( (elistFlags & (EP_HasFunc|EP_Subquery))!=0 ){ + p->selFlags |= SF_ComplexResult; + } + } + return WRC_Continue; +} + +#if SQLITE_DEBUG +/* +** Always assert. This xSelectCallback2 implementation proves that the +** xSelectCallback2 is never invoked. +*/ +SQLITE_PRIVATE void sqlite3SelectWalkAssert2(Walker *NotUsed, Select *NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + assert( 0 ); +} +#endif +/* +** This routine "expands" a SELECT statement and all of its subqueries. +** For additional information on what it means to "expand" a SELECT +** statement, see the comment on the selectExpand worker callback above. +** +** Expanding a SELECT statement is the first step in processing a +** SELECT statement. The SELECT statement must be expanded before +** name resolution is performed. +** +** If anything goes wrong, an error message is written into pParse. +** The calling function can detect the problem by looking at pParse->nErr +** and/or pParse->db->mallocFailed. +*/ +static void sqlite3SelectExpand(Parse *pParse, Select *pSelect){ + Walker w; + w.xExprCallback = sqlite3ExprWalkNoop; + w.pParse = pParse; + if( OK_IF_ALWAYS_TRUE(pParse->hasCompound) ){ + w.xSelectCallback = convertCompoundSelectToSubquery; + w.xSelectCallback2 = 0; + sqlite3WalkSelect(&w, pSelect); + } + w.xSelectCallback = selectExpander; + w.xSelectCallback2 = sqlite3SelectPopWith; + w.eCode = 0; + sqlite3WalkSelect(&w, pSelect); +} + + +#ifndef SQLITE_OMIT_SUBQUERY +/* +** This is a Walker.xSelectCallback callback for the sqlite3SelectTypeInfo() +** interface. +** +** For each FROM-clause subquery, add Column.zType and Column.zColl +** information to the Table structure that represents the result set +** of that subquery. +** +** The Table structure that represents the result set was constructed +** by selectExpander() but the type and collation information was omitted +** at that point because identifiers had not yet been resolved. This +** routine is called after identifier resolution. +*/ +static void selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){ + Parse *pParse; + int i; + SrcList *pTabList; + SrcItem *pFrom; + + assert( p->selFlags & SF_Resolved ); + if( p->selFlags & SF_HasTypeInfo ) return; + p->selFlags |= SF_HasTypeInfo; + pParse = pWalker->pParse; + pTabList = p->pSrc; + for(i=0, pFrom=pTabList->a; inSrc; i++, pFrom++){ + Table *pTab = pFrom->pTab; + assert( pTab!=0 ); + if( (pTab->tabFlags & TF_Ephemeral)!=0 ){ + /* A sub-query in the FROM clause of a SELECT */ + Select *pSel = pFrom->pSelect; + if( pSel ){ + while( pSel->pPrior ) pSel = pSel->pPrior; + sqlite3SelectAddColumnTypeAndCollation(pParse, pTab, pSel, + SQLITE_AFF_NONE); + } + } + } +} +#endif + + +/* +** This routine adds datatype and collating sequence information to +** the Table structures of all FROM-clause subqueries in a +** SELECT statement. +** +** Use this routine after name resolution. +*/ +static void sqlite3SelectAddTypeInfo(Parse *pParse, Select *pSelect){ +#ifndef SQLITE_OMIT_SUBQUERY + Walker w; + w.xSelectCallback = sqlite3SelectWalkNoop; + w.xSelectCallback2 = selectAddSubqueryTypeInfo; + w.xExprCallback = sqlite3ExprWalkNoop; + w.pParse = pParse; + sqlite3WalkSelect(&w, pSelect); +#endif +} + + +/* +** This routine sets up a SELECT statement for processing. The +** following is accomplished: +** +** * VDBE Cursor numbers are assigned to all FROM-clause terms. +** * Ephemeral Table objects are created for all FROM-clause subqueries. +** * ON and USING clauses are shifted into WHERE statements +** * Wildcards "*" and "TABLE.*" in result sets are expanded. +** * Identifiers in expression are matched to tables. +** +** This routine acts recursively on all subqueries within the SELECT. +*/ +SQLITE_PRIVATE void sqlite3SelectPrep( + Parse *pParse, /* The parser context */ + Select *p, /* The SELECT statement being coded. */ + NameContext *pOuterNC /* Name context for container */ +){ + assert( p!=0 || pParse->db->mallocFailed ); + assert( pParse->db->pParse==pParse ); + if( pParse->db->mallocFailed ) return; + if( p->selFlags & SF_HasTypeInfo ) return; + sqlite3SelectExpand(pParse, p); + if( pParse->nErr ) return; + sqlite3ResolveSelectNames(pParse, p, pOuterNC); + if( pParse->nErr ) return; + sqlite3SelectAddTypeInfo(pParse, p); +} + +/* +** Reset the aggregate accumulator. +** +** The aggregate accumulator is a set of memory cells that hold +** intermediate results while calculating an aggregate. This +** routine generates code that stores NULLs in all of those memory +** cells. +*/ +static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){ + Vdbe *v = pParse->pVdbe; + int i; + struct AggInfo_func *pFunc; + int nReg = pAggInfo->nFunc + pAggInfo->nColumn; + assert( pParse->db->pParse==pParse ); + assert( pParse->db->mallocFailed==0 || pParse->nErr!=0 ); + if( nReg==0 ) return; + if( pParse->nErr ) return; +#ifdef SQLITE_DEBUG + /* Verify that all AggInfo registers are within the range specified by + ** AggInfo.mnReg..AggInfo.mxReg */ + assert( nReg==pAggInfo->mxReg-pAggInfo->mnReg+1 ); + for(i=0; inColumn; i++){ + assert( pAggInfo->aCol[i].iMem>=pAggInfo->mnReg + && pAggInfo->aCol[i].iMem<=pAggInfo->mxReg ); + } + for(i=0; inFunc; i++){ + assert( pAggInfo->aFunc[i].iMem>=pAggInfo->mnReg + && pAggInfo->aFunc[i].iMem<=pAggInfo->mxReg ); + } +#endif + sqlite3VdbeAddOp3(v, OP_Null, 0, pAggInfo->mnReg, pAggInfo->mxReg); + for(pFunc=pAggInfo->aFunc, i=0; inFunc; i++, pFunc++){ + if( pFunc->iDistinct>=0 ){ + Expr *pE = pFunc->pFExpr; + assert( ExprUseXList(pE) ); + if( pE->x.pList==0 || pE->x.pList->nExpr!=1 ){ + sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one " + "argument"); + pFunc->iDistinct = -1; + }else{ + KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pE->x.pList,0,0); + pFunc->iDistAddr = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, + pFunc->iDistinct, 0, 0, (char*)pKeyInfo, P4_KEYINFO); + ExplainQueryPlan((pParse, 0, "USE TEMP B-TREE FOR %s(DISTINCT)", + pFunc->pFunc->zName)); + } + } + } +} + +/* +** Invoke the OP_AggFinalize opcode for every aggregate function +** in the AggInfo structure. +*/ +static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){ + Vdbe *v = pParse->pVdbe; + int i; + struct AggInfo_func *pF; + for(i=0, pF=pAggInfo->aFunc; inFunc; i++, pF++){ + ExprList *pList; + assert( ExprUseXList(pF->pFExpr) ); + pList = pF->pFExpr->x.pList; + sqlite3VdbeAddOp2(v, OP_AggFinal, pF->iMem, pList ? pList->nExpr : 0); + sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); + } +} + + +/* +** Update the accumulator memory cells for an aggregate based on +** the current cursor position. +** +** If regAcc is non-zero and there are no min() or max() aggregates +** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator +** registers if register regAcc contains 0. The caller will take care +** of setting and clearing regAcc. +*/ +static void updateAccumulator( + Parse *pParse, + int regAcc, + AggInfo *pAggInfo, + int eDistinctType +){ + Vdbe *v = pParse->pVdbe; + int i; + int regHit = 0; + int addrHitTest = 0; + struct AggInfo_func *pF; + struct AggInfo_col *pC; + + pAggInfo->directMode = 1; + for(i=0, pF=pAggInfo->aFunc; inFunc; i++, pF++){ + int nArg; + int addrNext = 0; + int regAgg; + ExprList *pList; + assert( ExprUseXList(pF->pFExpr) ); + assert( !IsWindowFunc(pF->pFExpr) ); + pList = pF->pFExpr->x.pList; + if( ExprHasProperty(pF->pFExpr, EP_WinFunc) ){ + Expr *pFilter = pF->pFExpr->y.pWin->pFilter; + if( pAggInfo->nAccumulator + && (pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL) + && regAcc + ){ + /* If regAcc==0, there there exists some min() or max() function + ** without a FILTER clause that will ensure the magnet registers + ** are populated. */ + if( regHit==0 ) regHit = ++pParse->nMem; + /* If this is the first row of the group (regAcc contains 0), clear the + ** "magnet" register regHit so that the accumulator registers + ** are populated if the FILTER clause jumps over the the + ** invocation of min() or max() altogether. Or, if this is not + ** the first row (regAcc contains 1), set the magnet register so that + ** the accumulators are not populated unless the min()/max() is invoked + ** and indicates that they should be. */ + sqlite3VdbeAddOp2(v, OP_Copy, regAcc, regHit); + } + addrNext = sqlite3VdbeMakeLabel(pParse); + sqlite3ExprIfFalse(pParse, pFilter, addrNext, SQLITE_JUMPIFNULL); + } + if( pList ){ + nArg = pList->nExpr; + regAgg = sqlite3GetTempRange(pParse, nArg); + sqlite3ExprCodeExprList(pParse, pList, regAgg, 0, SQLITE_ECEL_DUP); + }else{ + nArg = 0; + regAgg = 0; + } + if( pF->iDistinct>=0 && pList ){ + if( addrNext==0 ){ + addrNext = sqlite3VdbeMakeLabel(pParse); + } + pF->iDistinct = codeDistinct(pParse, eDistinctType, + pF->iDistinct, addrNext, pList, regAgg); + } + if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){ + CollSeq *pColl = 0; + struct ExprList_item *pItem; + int j; + assert( pList!=0 ); /* pList!=0 if pF->pFunc has NEEDCOLL */ + for(j=0, pItem=pList->a; !pColl && jpExpr); + } + if( !pColl ){ + pColl = pParse->db->pDfltColl; + } + if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem; + sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0, (char *)pColl, P4_COLLSEQ); + } + sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, pF->iMem); + sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); + sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3ReleaseTempRange(pParse, regAgg, nArg); + if( addrNext ){ + sqlite3VdbeResolveLabel(v, addrNext); + } + } + if( regHit==0 && pAggInfo->nAccumulator ){ + regHit = regAcc; + } + if( regHit ){ + addrHitTest = sqlite3VdbeAddOp1(v, OP_If, regHit); VdbeCoverage(v); + } + for(i=0, pC=pAggInfo->aCol; inAccumulator; i++, pC++){ + sqlite3ExprCode(pParse, pC->pCExpr, pC->iMem); + } + + pAggInfo->directMode = 0; + if( addrHitTest ){ + sqlite3VdbeJumpHereOrPopInst(v, addrHitTest); + } +} + +/* +** Add a single OP_Explain instruction to the VDBE to explain a simple +** count(*) query ("SELECT count(*) FROM pTab"). +*/ +#ifndef SQLITE_OMIT_EXPLAIN +static void explainSimpleCount( + Parse *pParse, /* Parse context */ + Table *pTab, /* Table being queried */ + Index *pIdx /* Index used to optimize scan, or NULL */ +){ + if( pParse->explain==2 ){ + int bCover = (pIdx!=0 && (HasRowid(pTab) || !IsPrimaryKeyIndex(pIdx))); + sqlite3VdbeExplain(pParse, 0, "SCAN %s%s%s", + pTab->zName, + bCover ? " USING COVERING INDEX " : "", + bCover ? pIdx->zName : "" + ); + } +} +#else +# define explainSimpleCount(a,b,c) +#endif + +/* +** sqlite3WalkExpr() callback used by havingToWhere(). +** +** If the node passed to the callback is a TK_AND node, return +** WRC_Continue to tell sqlite3WalkExpr() to iterate through child nodes. +** +** Otherwise, return WRC_Prune. In this case, also check if the +** sub-expression matches the criteria for being moved to the WHERE +** clause. If so, add it to the WHERE clause and replace the sub-expression +** within the HAVING expression with a constant "1". +*/ +static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){ + if( pExpr->op!=TK_AND ){ + Select *pS = pWalker->u.pSelect; + /* This routine is called before the HAVING clause of the current + ** SELECT is analyzed for aggregates. So if pExpr->pAggInfo is set + ** here, it indicates that the expression is a correlated reference to a + ** column from an outer aggregate query, or an aggregate function that + ** belongs to an outer query. Do not move the expression to the WHERE + ** clause in this obscure case, as doing so may corrupt the outer Select + ** statements AggInfo structure. */ + if( sqlite3ExprIsConstantOrGroupBy(pWalker->pParse, pExpr, pS->pGroupBy) + && ExprAlwaysFalse(pExpr)==0 + && pExpr->pAggInfo==0 + ){ + sqlite3 *db = pWalker->pParse->db; + Expr *pNew = sqlite3Expr(db, TK_INTEGER, "1"); + if( pNew ){ + Expr *pWhere = pS->pWhere; + SWAP(Expr, *pNew, *pExpr); + pNew = sqlite3ExprAnd(pWalker->pParse, pWhere, pNew); + pS->pWhere = pNew; + pWalker->eCode = 1; + } + } + return WRC_Prune; + } + return WRC_Continue; +} + +/* +** Transfer eligible terms from the HAVING clause of a query, which is +** processed after grouping, to the WHERE clause, which is processed before +** grouping. For example, the query: +** +** SELECT * FROM WHERE a=? GROUP BY b HAVING b=? AND c=? +** +** can be rewritten as: +** +** SELECT * FROM WHERE a=? AND b=? GROUP BY b HAVING c=? +** +** A term of the HAVING expression is eligible for transfer if it consists +** entirely of constants and expressions that are also GROUP BY terms that +** use the "BINARY" collation sequence. +*/ +static void havingToWhere(Parse *pParse, Select *p){ + Walker sWalker; + memset(&sWalker, 0, sizeof(sWalker)); + sWalker.pParse = pParse; + sWalker.xExprCallback = havingToWhereExprCb; + sWalker.u.pSelect = p; + sqlite3WalkExpr(&sWalker, p->pHaving); +#if SELECTTRACE_ENABLED + if( sWalker.eCode && (sqlite3SelectTrace & 0x100)!=0 ){ + SELECTTRACE(0x100,pParse,p,("Move HAVING terms into WHERE:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif +} + +/* +** Check to see if the pThis entry of pTabList is a self-join of a prior view. +** If it is, then return the SrcList_item for the prior view. If it is not, +** then return 0. +*/ +static SrcItem *isSelfJoinView( + SrcList *pTabList, /* Search for self-joins in this FROM clause */ + SrcItem *pThis /* Search for prior reference to this subquery */ +){ + SrcItem *pItem; + assert( pThis->pSelect!=0 ); + if( pThis->pSelect->selFlags & SF_PushDown ) return 0; + for(pItem = pTabList->a; pItempSelect==0 ) continue; + if( pItem->fg.viaCoroutine ) continue; + if( pItem->zName==0 ) continue; + assert( pItem->pTab!=0 ); + assert( pThis->pTab!=0 ); + if( pItem->pTab->pSchema!=pThis->pTab->pSchema ) continue; + if( sqlite3_stricmp(pItem->zName, pThis->zName)!=0 ) continue; + pS1 = pItem->pSelect; + if( pItem->pTab->pSchema==0 && pThis->pSelect->selId!=pS1->selId ){ + /* The query flattener left two different CTE tables with identical + ** names in the same FROM clause. */ + continue; + } + if( pItem->pSelect->selFlags & SF_PushDown ){ + /* The view was modified by some other optimization such as + ** pushDownWhereTerms() */ + continue; + } + return pItem; + } + return 0; +} + +/* +** Deallocate a single AggInfo object +*/ +static void agginfoFree(sqlite3 *db, AggInfo *p){ + sqlite3DbFree(db, p->aCol); + sqlite3DbFree(db, p->aFunc); + sqlite3DbFreeNN(db, p); +} + +#ifdef SQLITE_COUNTOFVIEW_OPTIMIZATION +/* +** Attempt to transform a query of the form +** +** SELECT count(*) FROM (SELECT x FROM t1 UNION ALL SELECT y FROM t2) +** +** Into this: +** +** SELECT (SELECT count(*) FROM t1)+(SELECT count(*) FROM t2) +** +** The transformation only works if all of the following are true: +** +** * The subquery is a UNION ALL of two or more terms +** * The subquery does not have a LIMIT clause +** * There is no WHERE or GROUP BY or HAVING clauses on the subqueries +** * The outer query is a simple count(*) with no WHERE clause or other +** extraneous syntax. +** +** Return TRUE if the optimization is undertaken. +*/ +static int countOfViewOptimization(Parse *pParse, Select *p){ + Select *pSub, *pPrior; + Expr *pExpr; + Expr *pCount; + sqlite3 *db; + if( (p->selFlags & SF_Aggregate)==0 ) return 0; /* This is an aggregate */ + if( p->pEList->nExpr!=1 ) return 0; /* Single result column */ + if( p->pWhere ) return 0; + if( p->pGroupBy ) return 0; + pExpr = p->pEList->a[0].pExpr; + if( pExpr->op!=TK_AGG_FUNCTION ) return 0; /* Result is an aggregate */ + assert( ExprUseUToken(pExpr) ); + if( sqlite3_stricmp(pExpr->u.zToken,"count") ) return 0; /* Is count() */ + assert( ExprUseXList(pExpr) ); + if( pExpr->x.pList!=0 ) return 0; /* Must be count(*) */ + if( p->pSrc->nSrc!=1 ) return 0; /* One table in FROM */ + pSub = p->pSrc->a[0].pSelect; + if( pSub==0 ) return 0; /* The FROM is a subquery */ + if( pSub->pPrior==0 ) return 0; /* Must be a compound ry */ + do{ + if( pSub->op!=TK_ALL && pSub->pPrior ) return 0; /* Must be UNION ALL */ + if( pSub->pWhere ) return 0; /* No WHERE clause */ + if( pSub->pLimit ) return 0; /* No LIMIT clause */ + if( pSub->selFlags & SF_Aggregate ) return 0; /* Not an aggregate */ + pSub = pSub->pPrior; /* Repeat over compound */ + }while( pSub ); + + /* If we reach this point then it is OK to perform the transformation */ + + db = pParse->db; + pCount = pExpr; + pExpr = 0; + pSub = p->pSrc->a[0].pSelect; + p->pSrc->a[0].pSelect = 0; + sqlite3SrcListDelete(db, p->pSrc); + p->pSrc = sqlite3DbMallocZero(pParse->db, sizeof(*p->pSrc)); + while( pSub ){ + Expr *pTerm; + pPrior = pSub->pPrior; + pSub->pPrior = 0; + pSub->pNext = 0; + pSub->selFlags |= SF_Aggregate; + pSub->selFlags &= ~SF_Compound; + pSub->nSelectRow = 0; + sqlite3ExprListDelete(db, pSub->pEList); + pTerm = pPrior ? sqlite3ExprDup(db, pCount, 0) : pCount; + pSub->pEList = sqlite3ExprListAppend(pParse, 0, pTerm); + pTerm = sqlite3PExpr(pParse, TK_SELECT, 0, 0); + sqlite3PExprAddSelect(pParse, pTerm, pSub); + if( pExpr==0 ){ + pExpr = pTerm; + }else{ + pExpr = sqlite3PExpr(pParse, TK_PLUS, pTerm, pExpr); + } + pSub = pPrior; + } + p->pEList->a[0].pExpr = pExpr; + p->selFlags &= ~SF_Aggregate; + +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x400 ){ + SELECTTRACE(0x400,pParse,p,("After count-of-view optimization:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + return 1; +} +#endif /* SQLITE_COUNTOFVIEW_OPTIMIZATION */ + +/* +** Generate code for the SELECT statement given in the p argument. +** +** The results are returned according to the SelectDest structure. +** See comments in sqliteInt.h for further information. +** +** This routine returns the number of errors. If any errors are +** encountered, then an appropriate error message is left in +** pParse->zErrMsg. +** +** This routine does NOT free the Select structure passed in. The +** calling function needs to do that. +*/ +SQLITE_PRIVATE int sqlite3Select( + Parse *pParse, /* The parser context */ + Select *p, /* The SELECT statement being coded. */ + SelectDest *pDest /* What to do with the query results */ +){ + int i, j; /* Loop counters */ + WhereInfo *pWInfo; /* Return from sqlite3WhereBegin() */ + Vdbe *v; /* The virtual machine under construction */ + int isAgg; /* True for select lists like "count(*)" */ + ExprList *pEList = 0; /* List of columns to extract. */ + SrcList *pTabList; /* List of tables to select from */ + Expr *pWhere; /* The WHERE clause. May be NULL */ + ExprList *pGroupBy; /* The GROUP BY clause. May be NULL */ + Expr *pHaving; /* The HAVING clause. May be NULL */ + AggInfo *pAggInfo = 0; /* Aggregate information */ + int rc = 1; /* Value to return from this function */ + DistinctCtx sDistinct; /* Info on how to code the DISTINCT keyword */ + SortCtx sSort; /* Info on how to code the ORDER BY clause */ + int iEnd; /* Address of the end of the query */ + sqlite3 *db; /* The database connection */ + ExprList *pMinMaxOrderBy = 0; /* Added ORDER BY for min/max queries */ + u8 minMaxFlag; /* Flag for min/max queries */ + + db = pParse->db; + assert( pParse==db->pParse ); + v = sqlite3GetVdbe(pParse); + if( p==0 || pParse->nErr ){ + return 1; + } + assert( db->mallocFailed==0 ); + if( sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0) ) return 1; +#if SELECTTRACE_ENABLED + SELECTTRACE(1,pParse,p, ("begin processing:\n", pParse->addrExplain)); + if( sqlite3SelectTrace & 0x100 ){ + sqlite3TreeViewSelect(0, p, 0); + } +#endif + + assert( p->pOrderBy==0 || pDest->eDest!=SRT_DistFifo ); + assert( p->pOrderBy==0 || pDest->eDest!=SRT_Fifo ); + assert( p->pOrderBy==0 || pDest->eDest!=SRT_DistQueue ); + assert( p->pOrderBy==0 || pDest->eDest!=SRT_Queue ); + if( IgnorableDistinct(pDest) ){ + assert(pDest->eDest==SRT_Exists || pDest->eDest==SRT_Union || + pDest->eDest==SRT_Except || pDest->eDest==SRT_Discard || + pDest->eDest==SRT_DistQueue || pDest->eDest==SRT_DistFifo ); + /* All of these destinations are also able to ignore the ORDER BY clause */ + if( p->pOrderBy ){ +#if SELECTTRACE_ENABLED + SELECTTRACE(1,pParse,p, ("dropping superfluous ORDER BY:\n")); + if( sqlite3SelectTrace & 0x100 ){ + sqlite3TreeViewExprList(0, p->pOrderBy, 0, "ORDERBY"); + } +#endif + sqlite3ParserAddCleanup(pParse, + (void(*)(sqlite3*,void*))sqlite3ExprListDelete, + p->pOrderBy); + testcase( pParse->earlyCleanup ); + p->pOrderBy = 0; + } + p->selFlags &= ~SF_Distinct; + p->selFlags |= SF_NoopOrderBy; + } + sqlite3SelectPrep(pParse, p, 0); + if( pParse->nErr ){ + goto select_end; + } + assert( db->mallocFailed==0 ); + assert( p->pEList!=0 ); +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x104 ){ + SELECTTRACE(0x104,pParse,p, ("after name resolution:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + + /* If the SF_UFSrcCheck flag is set, then this function is being called + ** as part of populating the temp table for an UPDATE...FROM statement. + ** In this case, it is an error if the target object (pSrc->a[0]) name + ** or alias is duplicated within FROM clause (pSrc->a[1..n]). + ** + ** Postgres disallows this case too. The reason is that some other + ** systems handle this case differently, and not all the same way, + ** which is just confusing. To avoid this, we follow PG's lead and + ** disallow it altogether. */ + if( p->selFlags & SF_UFSrcCheck ){ + SrcItem *p0 = &p->pSrc->a[0]; + for(i=1; ipSrc->nSrc; i++){ + SrcItem *p1 = &p->pSrc->a[i]; + if( p0->pTab==p1->pTab && 0==sqlite3_stricmp(p0->zAlias, p1->zAlias) ){ + sqlite3ErrorMsg(pParse, + "target object/alias may not appear in FROM clause: %s", + p0->zAlias ? p0->zAlias : p0->pTab->zName + ); + goto select_end; + } + } + + /* Clear the SF_UFSrcCheck flag. The check has already been performed, + ** and leaving this flag set can cause errors if a compound sub-query + ** in p->pSrc is flattened into this query and this function called + ** again as part of compound SELECT processing. */ + p->selFlags &= ~SF_UFSrcCheck; + } + + if( pDest->eDest==SRT_Output ){ + sqlite3GenerateColumnNames(pParse, p); + } + +#ifndef SQLITE_OMIT_WINDOWFUNC + if( sqlite3WindowRewrite(pParse, p) ){ + assert( pParse->nErr ); + goto select_end; + } +#if SELECTTRACE_ENABLED + if( p->pWin && (sqlite3SelectTrace & 0x108)!=0 ){ + SELECTTRACE(0x104,pParse,p, ("after window rewrite:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif +#endif /* SQLITE_OMIT_WINDOWFUNC */ + pTabList = p->pSrc; + isAgg = (p->selFlags & SF_Aggregate)!=0; + memset(&sSort, 0, sizeof(sSort)); + sSort.pOrderBy = p->pOrderBy; + + /* Try to do various optimizations (flattening subqueries, and strength + ** reduction of join operators) in the FROM clause up into the main query + */ +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) + for(i=0; !p->pPrior && inSrc; i++){ + SrcItem *pItem = &pTabList->a[i]; + Select *pSub = pItem->pSelect; + Table *pTab = pItem->pTab; + + /* The expander should have already created transient Table objects + ** even for FROM clause elements such as subqueries that do not correspond + ** to a real table */ + assert( pTab!=0 ); + + /* Convert LEFT JOIN into JOIN if there are terms of the right table + ** of the LEFT JOIN used in the WHERE clause. + */ + if( (pItem->fg.jointype & JT_LEFT)!=0 + && sqlite3ExprImpliesNonNullRow(p->pWhere, pItem->iCursor) + && OptimizationEnabled(db, SQLITE_SimplifyJoin) + ){ + SELECTTRACE(0x100,pParse,p, + ("LEFT-JOIN simplifies to JOIN on term %d\n",i)); + pItem->fg.jointype &= ~(JT_LEFT|JT_OUTER); + unsetJoinExpr(p->pWhere, pItem->iCursor); + } + + /* No futher action if this term of the FROM clause is no a subquery */ + if( pSub==0 ) continue; + + /* Catch mismatch in the declared columns of a view and the number of + ** columns in the SELECT on the RHS */ + if( pTab->nCol!=pSub->pEList->nExpr ){ + sqlite3ErrorMsg(pParse, "expected %d columns for '%s' but got %d", + pTab->nCol, pTab->zName, pSub->pEList->nExpr); + goto select_end; + } + + /* Do not try to flatten an aggregate subquery. + ** + ** Flattening an aggregate subquery is only possible if the outer query + ** is not a join. But if the outer query is not a join, then the subquery + ** will be implemented as a co-routine and there is no advantage to + ** flattening in that case. + */ + if( (pSub->selFlags & SF_Aggregate)!=0 ) continue; + assert( pSub->pGroupBy==0 ); + + /* If a FROM-clause subquery has an ORDER BY clause that is not + ** really doing anything, then delete it now so that it does not + ** interfere with query flattening. See the discussion at + ** https://sqlite.org/forum/forumpost/2d76f2bcf65d256a + ** + ** Beware of these cases where the ORDER BY clause may not be safely + ** omitted: + ** + ** (1) There is also a LIMIT clause + ** (2) The subquery was added to help with window-function + ** processing + ** (3) The subquery is in the FROM clause of an UPDATE + ** (4) The outer query uses an aggregate function other than + ** the built-in count(), min(), or max(). + ** (5) The ORDER BY isn't going to accomplish anything because + ** one of: + ** (a) The outer query has a different ORDER BY clause + ** (b) The subquery is part of a join + ** See forum post 062d576715d277c8 + */ + if( pSub->pOrderBy!=0 + && (p->pOrderBy!=0 || pTabList->nSrc>1) /* Condition (5) */ + && pSub->pLimit==0 /* Condition (1) */ + && (pSub->selFlags & SF_OrderByReqd)==0 /* Condition (2) */ + && (p->selFlags & SF_OrderByReqd)==0 /* Condition (3) and (4) */ + && OptimizationEnabled(db, SQLITE_OmitOrderBy) + ){ + SELECTTRACE(0x100,pParse,p, + ("omit superfluous ORDER BY on %r FROM-clause subquery\n",i+1)); + sqlite3ExprListDelete(db, pSub->pOrderBy); + pSub->pOrderBy = 0; + } + + /* If the outer query contains a "complex" result set (that is, + ** if the result set of the outer query uses functions or subqueries) + ** and if the subquery contains an ORDER BY clause and if + ** it will be implemented as a co-routine, then do not flatten. This + ** restriction allows SQL constructs like this: + ** + ** SELECT expensive_function(x) + ** FROM (SELECT x FROM tab ORDER BY y LIMIT 10); + ** + ** The expensive_function() is only computed on the 10 rows that + ** are output, rather than every row of the table. + ** + ** The requirement that the outer query have a complex result set + ** means that flattening does occur on simpler SQL constraints without + ** the expensive_function() like: + ** + ** SELECT x FROM (SELECT x FROM tab ORDER BY y LIMIT 10); + */ + if( pSub->pOrderBy!=0 + && i==0 + && (p->selFlags & SF_ComplexResult)!=0 + && (pTabList->nSrc==1 + || (pTabList->a[1].fg.jointype&(JT_LEFT|JT_CROSS))!=0) + ){ + continue; + } + + if( flattenSubquery(pParse, p, i, isAgg) ){ + if( pParse->nErr ) goto select_end; + /* This subquery can be absorbed into its parent. */ + i = -1; + } + pTabList = p->pSrc; + if( db->mallocFailed ) goto select_end; + if( !IgnorableOrderby(pDest) ){ + sSort.pOrderBy = p->pOrderBy; + } + } +#endif + +#ifndef SQLITE_OMIT_COMPOUND_SELECT + /* Handle compound SELECT statements using the separate multiSelect() + ** procedure. + */ + if( p->pPrior ){ + rc = multiSelect(pParse, p, pDest); +#if SELECTTRACE_ENABLED + SELECTTRACE(0x1,pParse,p,("end compound-select processing\n")); + if( (sqlite3SelectTrace & 0x2000)!=0 && ExplainQueryPlanParent(pParse)==0 ){ + sqlite3TreeViewSelect(0, p, 0); + } +#endif + if( p->pNext==0 ) ExplainQueryPlanPop(pParse); + return rc; + } +#endif + + /* Do the WHERE-clause constant propagation optimization if this is + ** a join. No need to speed time on this operation for non-join queries + ** as the equivalent optimization will be handled by query planner in + ** sqlite3WhereBegin(). + */ + if( p->pWhere!=0 + && p->pWhere->op==TK_AND + && OptimizationEnabled(db, SQLITE_PropagateConst) + && propagateConstants(pParse, p) + ){ +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x100 ){ + SELECTTRACE(0x100,pParse,p,("After constant propagation:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + }else{ + SELECTTRACE(0x100,pParse,p,("Constant propagation not helpful\n")); + } + +#ifdef SQLITE_COUNTOFVIEW_OPTIMIZATION + if( OptimizationEnabled(db, SQLITE_QueryFlattener|SQLITE_CountOfView) + && countOfViewOptimization(pParse, p) + ){ + if( db->mallocFailed ) goto select_end; + pEList = p->pEList; + pTabList = p->pSrc; + } +#endif + + /* For each term in the FROM clause, do two things: + ** (1) Authorized unreferenced tables + ** (2) Generate code for all sub-queries + */ + for(i=0; inSrc; i++){ + SrcItem *pItem = &pTabList->a[i]; + SrcItem *pPrior; + SelectDest dest; + Select *pSub; +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) + const char *zSavedAuthContext; +#endif + + /* Issue SQLITE_READ authorizations with a fake column name for any + ** tables that are referenced but from which no values are extracted. + ** Examples of where these kinds of null SQLITE_READ authorizations + ** would occur: + ** + ** SELECT count(*) FROM t1; -- SQLITE_READ t1."" + ** SELECT t1.* FROM t1, t2; -- SQLITE_READ t2."" + ** + ** The fake column name is an empty string. It is possible for a table to + ** have a column named by the empty string, in which case there is no way to + ** distinguish between an unreferenced table and an actual reference to the + ** "" column. The original design was for the fake column name to be a NULL, + ** which would be unambiguous. But legacy authorization callbacks might + ** assume the column name is non-NULL and segfault. The use of an empty + ** string for the fake column name seems safer. + */ + if( pItem->colUsed==0 && pItem->zName!=0 ){ + sqlite3AuthCheck(pParse, SQLITE_READ, pItem->zName, "", pItem->zDatabase); + } + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) + /* Generate code for all sub-queries in the FROM clause + */ + pSub = pItem->pSelect; + if( pSub==0 ) continue; + + /* The code for a subquery should only be generated once. */ + assert( pItem->addrFillSub==0 ); + + /* Increment Parse.nHeight by the height of the largest expression + ** tree referred to by this, the parent select. The child select + ** may contain expression trees of at most + ** (SQLITE_MAX_EXPR_DEPTH-Parse.nHeight) height. This is a bit + ** more conservative than necessary, but much easier than enforcing + ** an exact limit. + */ + pParse->nHeight += sqlite3SelectExprHeight(p); + + /* Make copies of constant WHERE-clause terms in the outer query down + ** inside the subquery. This can help the subquery to run more efficiently. + */ + if( OptimizationEnabled(db, SQLITE_PushDown) + && (pItem->fg.isCte==0 + || (pItem->u2.pCteUse->eM10d!=M10d_Yes && pItem->u2.pCteUse->nUse<2)) + && pushDownWhereTerms(pParse, pSub, p->pWhere, pItem) + ){ +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x100 ){ + SELECTTRACE(0x100,pParse,p, + ("After WHERE-clause push-down into subquery %d:\n", pSub->selId)); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + assert( pItem->pSelect && (pItem->pSelect->selFlags & SF_PushDown)!=0 ); + }else{ + SELECTTRACE(0x100,pParse,p,("Push-down not possible\n")); + } + + zSavedAuthContext = pParse->zAuthContext; + pParse->zAuthContext = pItem->zName; + + /* Generate code to implement the subquery + ** + ** The subquery is implemented as a co-routine if: + ** (1) the subquery is guaranteed to be the outer loop (so that + ** it does not need to be computed more than once), and + ** (2) the subquery is not a CTE that should be materialized + ** + ** TODO: Are there other reasons beside (1) and (2) to use a co-routine + ** implementation? + */ + if( i==0 + && (pTabList->nSrc==1 + || (pTabList->a[1].fg.jointype&(JT_LEFT|JT_CROSS))!=0) /* (1) */ + && (pItem->fg.isCte==0 || pItem->u2.pCteUse->eM10d!=M10d_Yes) /* (2) */ + ){ + /* Implement a co-routine that will return a single row of the result + ** set on each invocation. + */ + int addrTop = sqlite3VdbeCurrentAddr(v)+1; + + pItem->regReturn = ++pParse->nMem; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, pItem->regReturn, 0, addrTop); + VdbeComment((v, "%!S", pItem)); + pItem->addrFillSub = addrTop; + sqlite3SelectDestInit(&dest, SRT_Coroutine, pItem->regReturn); + ExplainQueryPlan((pParse, 1, "CO-ROUTINE %!S", pItem)); + sqlite3Select(pParse, pSub, &dest); + pItem->pTab->nRowLogEst = pSub->nSelectRow; + pItem->fg.viaCoroutine = 1; + pItem->regResult = dest.iSdst; + sqlite3VdbeEndCoroutine(v, pItem->regReturn); + sqlite3VdbeJumpHere(v, addrTop-1); + sqlite3ClearTempRegCache(pParse); + }else if( pItem->fg.isCte && pItem->u2.pCteUse->addrM9e>0 ){ + /* This is a CTE for which materialization code has already been + ** generated. Invoke the subroutine to compute the materialization, + ** the make the pItem->iCursor be a copy of the ephemerial table that + ** holds the result of the materialization. */ + CteUse *pCteUse = pItem->u2.pCteUse; + sqlite3VdbeAddOp2(v, OP_Gosub, pCteUse->regRtn, pCteUse->addrM9e); + if( pItem->iCursor!=pCteUse->iCur ){ + sqlite3VdbeAddOp2(v, OP_OpenDup, pItem->iCursor, pCteUse->iCur); + VdbeComment((v, "%!S", pItem)); + } + pSub->nSelectRow = pCteUse->nRowEst; + }else if( (pPrior = isSelfJoinView(pTabList, pItem))!=0 ){ + /* This view has already been materialized by a prior entry in + ** this same FROM clause. Reuse it. */ + if( pPrior->addrFillSub ){ + sqlite3VdbeAddOp2(v, OP_Gosub, pPrior->regReturn, pPrior->addrFillSub); + } + sqlite3VdbeAddOp2(v, OP_OpenDup, pItem->iCursor, pPrior->iCursor); + pSub->nSelectRow = pPrior->pSelect->nSelectRow; + }else{ + /* Materialize the view. If the view is not correlated, generate a + ** subroutine to do the materialization so that subsequent uses of + ** the same view can reuse the materialization. */ + int topAddr; + int onceAddr = 0; + int retAddr; + + pItem->regReturn = ++pParse->nMem; + topAddr = sqlite3VdbeAddOp2(v, OP_Integer, 0, pItem->regReturn); + pItem->addrFillSub = topAddr+1; + if( pItem->fg.isCorrelated==0 ){ + /* If the subquery is not correlated and if we are not inside of + ** a trigger, then we only need to compute the value of the subquery + ** once. */ + onceAddr = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + VdbeComment((v, "materialize %!S", pItem)); + }else{ + VdbeNoopComment((v, "materialize %!S", pItem)); + } + sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor); + ExplainQueryPlan((pParse, 1, "MATERIALIZE %!S", pItem)); + sqlite3Select(pParse, pSub, &dest); + pItem->pTab->nRowLogEst = pSub->nSelectRow; + if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr); + retAddr = sqlite3VdbeAddOp1(v, OP_Return, pItem->regReturn); + VdbeComment((v, "end %!S", pItem)); + sqlite3VdbeChangeP1(v, topAddr, retAddr); + sqlite3ClearTempRegCache(pParse); + if( pItem->fg.isCte && pItem->fg.isCorrelated==0 ){ + CteUse *pCteUse = pItem->u2.pCteUse; + pCteUse->addrM9e = pItem->addrFillSub; + pCteUse->regRtn = pItem->regReturn; + pCteUse->iCur = pItem->iCursor; + pCteUse->nRowEst = pSub->nSelectRow; + } + } + if( db->mallocFailed ) goto select_end; + pParse->nHeight -= sqlite3SelectExprHeight(p); + pParse->zAuthContext = zSavedAuthContext; +#endif + } + + /* Various elements of the SELECT copied into local variables for + ** convenience */ + pEList = p->pEList; + pWhere = p->pWhere; + pGroupBy = p->pGroupBy; + pHaving = p->pHaving; + sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0; + +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x400 ){ + SELECTTRACE(0x400,pParse,p,("After all FROM-clause analysis:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + + /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and + ** if the select-list is the same as the ORDER BY list, then this query + ** can be rewritten as a GROUP BY. In other words, this: + ** + ** SELECT DISTINCT xyz FROM ... ORDER BY xyz + ** + ** is transformed to: + ** + ** SELECT xyz FROM ... GROUP BY xyz ORDER BY xyz + ** + ** The second form is preferred as a single index (or temp-table) may be + ** used for both the ORDER BY and DISTINCT processing. As originally + ** written the query must use a temp-table for at least one of the ORDER + ** BY and DISTINCT, and an index or separate temp-table for the other. + */ + if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct + && sqlite3ExprListCompare(sSort.pOrderBy, pEList, -1)==0 +#ifndef SQLITE_OMIT_WINDOWFUNC + && p->pWin==0 +#endif + ){ + p->selFlags &= ~SF_Distinct; + pGroupBy = p->pGroupBy = sqlite3ExprListDup(db, pEList, 0); + p->selFlags |= SF_Aggregate; + /* Notice that even thought SF_Distinct has been cleared from p->selFlags, + ** the sDistinct.isTnct is still set. Hence, isTnct represents the + ** original setting of the SF_Distinct flag, not the current setting */ + assert( sDistinct.isTnct ); + +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x400 ){ + SELECTTRACE(0x400,pParse,p,("Transform DISTINCT into GROUP BY:\n")); + sqlite3TreeViewSelect(0, p, 0); + } +#endif + } + + /* If there is an ORDER BY clause, then create an ephemeral index to + ** do the sorting. But this sorting ephemeral index might end up + ** being unused if the data can be extracted in pre-sorted order. + ** If that is the case, then the OP_OpenEphemeral instruction will be + ** changed to an OP_Noop once we figure out that the sorting index is + ** not needed. The sSort.addrSortIndex variable is used to facilitate + ** that change. + */ + if( sSort.pOrderBy ){ + KeyInfo *pKeyInfo; + pKeyInfo = sqlite3KeyInfoFromExprList( + pParse, sSort.pOrderBy, 0, pEList->nExpr); + sSort.iECursor = pParse->nTab++; + sSort.addrSortIndex = + sqlite3VdbeAddOp4(v, OP_OpenEphemeral, + sSort.iECursor, sSort.pOrderBy->nExpr+1+pEList->nExpr, 0, + (char*)pKeyInfo, P4_KEYINFO + ); + }else{ + sSort.addrSortIndex = -1; + } + + /* If the output is destined for a temporary table, open that table. + */ + if( pDest->eDest==SRT_EphemTab ){ + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pDest->iSDParm, pEList->nExpr); + } + + /* Set the limiter. + */ + iEnd = sqlite3VdbeMakeLabel(pParse); + if( (p->selFlags & SF_FixedLimit)==0 ){ + p->nSelectRow = 320; /* 4 billion rows */ + } + computeLimitRegisters(pParse, p, iEnd); + if( p->iLimit==0 && sSort.addrSortIndex>=0 ){ + sqlite3VdbeChangeOpcode(v, sSort.addrSortIndex, OP_SorterOpen); + sSort.sortFlags |= SORTFLAG_UseSorter; + } + + /* Open an ephemeral index to use for the distinct set. + */ + if( p->selFlags & SF_Distinct ){ + sDistinct.tabTnct = pParse->nTab++; + sDistinct.addrTnct = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, + sDistinct.tabTnct, 0, 0, + (char*)sqlite3KeyInfoFromExprList(pParse, p->pEList,0,0), + P4_KEYINFO); + sqlite3VdbeChangeP5(v, BTREE_UNORDERED); + sDistinct.eTnctType = WHERE_DISTINCT_UNORDERED; + }else{ + sDistinct.eTnctType = WHERE_DISTINCT_NOOP; + } + + if( !isAgg && pGroupBy==0 ){ + /* No aggregate functions and no GROUP BY clause */ + u16 wctrlFlags = (sDistinct.isTnct ? WHERE_WANT_DISTINCT : 0) + | (p->selFlags & SF_FixedLimit); +#ifndef SQLITE_OMIT_WINDOWFUNC + Window *pWin = p->pWin; /* Main window object (or NULL) */ + if( pWin ){ + sqlite3WindowCodeInit(pParse, p); + } +#endif + assert( WHERE_USE_LIMIT==SF_FixedLimit ); + + + /* Begin the database scan. */ + SELECTTRACE(1,pParse,p,("WhereBegin\n")); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, sSort.pOrderBy, + p->pEList, p, wctrlFlags, p->nSelectRow); + if( pWInfo==0 ) goto select_end; + if( sqlite3WhereOutputRowCount(pWInfo) < p->nSelectRow ){ + p->nSelectRow = sqlite3WhereOutputRowCount(pWInfo); + } + if( sDistinct.isTnct && sqlite3WhereIsDistinct(pWInfo) ){ + sDistinct.eTnctType = sqlite3WhereIsDistinct(pWInfo); + } + if( sSort.pOrderBy ){ + sSort.nOBSat = sqlite3WhereIsOrdered(pWInfo); + sSort.labelOBLopt = sqlite3WhereOrderByLimitOptLabel(pWInfo); + if( sSort.nOBSat==sSort.pOrderBy->nExpr ){ + sSort.pOrderBy = 0; + } + } + SELECTTRACE(1,pParse,p,("WhereBegin returns\n")); + + /* If sorting index that was created by a prior OP_OpenEphemeral + ** instruction ended up not being needed, then change the OP_OpenEphemeral + ** into an OP_Noop. + */ + if( sSort.addrSortIndex>=0 && sSort.pOrderBy==0 ){ + sqlite3VdbeChangeToNoop(v, sSort.addrSortIndex); + } + + assert( p->pEList==pEList ); +#ifndef SQLITE_OMIT_WINDOWFUNC + if( pWin ){ + int addrGosub = sqlite3VdbeMakeLabel(pParse); + int iCont = sqlite3VdbeMakeLabel(pParse); + int iBreak = sqlite3VdbeMakeLabel(pParse); + int regGosub = ++pParse->nMem; + + sqlite3WindowCodeStep(pParse, p, pWInfo, regGosub, addrGosub); + + sqlite3VdbeAddOp2(v, OP_Goto, 0, iBreak); + sqlite3VdbeResolveLabel(v, addrGosub); + VdbeNoopComment((v, "inner-loop subroutine")); + sSort.labelOBLopt = 0; + selectInnerLoop(pParse, p, -1, &sSort, &sDistinct, pDest, iCont, iBreak); + sqlite3VdbeResolveLabel(v, iCont); + sqlite3VdbeAddOp1(v, OP_Return, regGosub); + VdbeComment((v, "end inner-loop subroutine")); + sqlite3VdbeResolveLabel(v, iBreak); + }else +#endif /* SQLITE_OMIT_WINDOWFUNC */ + { + /* Use the standard inner loop. */ + selectInnerLoop(pParse, p, -1, &sSort, &sDistinct, pDest, + sqlite3WhereContinueLabel(pWInfo), + sqlite3WhereBreakLabel(pWInfo)); + + /* End the database scan loop. + */ + SELECTTRACE(1,pParse,p,("WhereEnd\n")); + sqlite3WhereEnd(pWInfo); + } + }else{ + /* This case when there exist aggregate functions or a GROUP BY clause + ** or both */ + NameContext sNC; /* Name context for processing aggregate information */ + int iAMem; /* First Mem address for storing current GROUP BY */ + int iBMem; /* First Mem address for previous GROUP BY */ + int iUseFlag; /* Mem address holding flag indicating that at least + ** one row of the input to the aggregator has been + ** processed */ + int iAbortFlag; /* Mem address which causes query abort if positive */ + int groupBySort; /* Rows come from source in GROUP BY order */ + int addrEnd; /* End of processing for this SELECT */ + int sortPTab = 0; /* Pseudotable used to decode sorting results */ + int sortOut = 0; /* Output register from the sorter */ + int orderByGrp = 0; /* True if the GROUP BY and ORDER BY are the same */ + + /* Remove any and all aliases between the result set and the + ** GROUP BY clause. + */ + if( pGroupBy ){ + int k; /* Loop counter */ + struct ExprList_item *pItem; /* For looping over expression in a list */ + + for(k=p->pEList->nExpr, pItem=p->pEList->a; k>0; k--, pItem++){ + pItem->u.x.iAlias = 0; + } + for(k=pGroupBy->nExpr, pItem=pGroupBy->a; k>0; k--, pItem++){ + pItem->u.x.iAlias = 0; + } + assert( 66==sqlite3LogEst(100) ); + if( p->nSelectRow>66 ) p->nSelectRow = 66; + + /* If there is both a GROUP BY and an ORDER BY clause and they are + ** identical, then it may be possible to disable the ORDER BY clause + ** on the grounds that the GROUP BY will cause elements to come out + ** in the correct order. It also may not - the GROUP BY might use a + ** database index that causes rows to be grouped together as required + ** but not actually sorted. Either way, record the fact that the + ** ORDER BY and GROUP BY clauses are the same by setting the orderByGrp + ** variable. */ + if( sSort.pOrderBy && pGroupBy->nExpr==sSort.pOrderBy->nExpr ){ + int ii; + /* The GROUP BY processing doesn't care whether rows are delivered in + ** ASC or DESC order - only that each group is returned contiguously. + ** So set the ASC/DESC flags in the GROUP BY to match those in the + ** ORDER BY to maximize the chances of rows being delivered in an + ** order that makes the ORDER BY redundant. */ + for(ii=0; iinExpr; ii++){ + u8 sortFlags = sSort.pOrderBy->a[ii].sortFlags & KEYINFO_ORDER_DESC; + pGroupBy->a[ii].sortFlags = sortFlags; + } + if( sqlite3ExprListCompare(pGroupBy, sSort.pOrderBy, -1)==0 ){ + orderByGrp = 1; + } + } + }else{ + assert( 0==sqlite3LogEst(1) ); + p->nSelectRow = 0; + } + + /* Create a label to jump to when we want to abort the query */ + addrEnd = sqlite3VdbeMakeLabel(pParse); + + /* Convert TK_COLUMN nodes into TK_AGG_COLUMN and make entries in + ** sAggInfo for all TK_AGG_FUNCTION nodes in expressions of the + ** SELECT statement. + */ + pAggInfo = sqlite3DbMallocZero(db, sizeof(*pAggInfo) ); + if( pAggInfo ){ + sqlite3ParserAddCleanup(pParse, + (void(*)(sqlite3*,void*))agginfoFree, pAggInfo); + testcase( pParse->earlyCleanup ); + } + if( db->mallocFailed ){ + goto select_end; + } + pAggInfo->selId = p->selId; + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + sNC.pSrcList = pTabList; + sNC.uNC.pAggInfo = pAggInfo; + VVA_ONLY( sNC.ncFlags = NC_UAggInfo; ) + pAggInfo->mnReg = pParse->nMem+1; + pAggInfo->nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0; + pAggInfo->pGroupBy = pGroupBy; + sqlite3ExprAnalyzeAggList(&sNC, pEList); + sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); + if( pHaving ){ + if( pGroupBy ){ + assert( pWhere==p->pWhere ); + assert( pHaving==p->pHaving ); + assert( pGroupBy==p->pGroupBy ); + havingToWhere(pParse, p); + pWhere = p->pWhere; + } + sqlite3ExprAnalyzeAggregates(&sNC, pHaving); + } + pAggInfo->nAccumulator = pAggInfo->nColumn; + if( p->pGroupBy==0 && p->pHaving==0 && pAggInfo->nFunc==1 ){ + minMaxFlag = minMaxQuery(db, pAggInfo->aFunc[0].pFExpr, &pMinMaxOrderBy); + }else{ + minMaxFlag = WHERE_ORDERBY_NORMAL; + } + for(i=0; inFunc; i++){ + Expr *pExpr = pAggInfo->aFunc[i].pFExpr; + assert( ExprUseXList(pExpr) ); + sNC.ncFlags |= NC_InAggFunc; + sqlite3ExprAnalyzeAggList(&sNC, pExpr->x.pList); +#ifndef SQLITE_OMIT_WINDOWFUNC + assert( !IsWindowFunc(pExpr) ); + if( ExprHasProperty(pExpr, EP_WinFunc) ){ + sqlite3ExprAnalyzeAggregates(&sNC, pExpr->y.pWin->pFilter); + } +#endif + sNC.ncFlags &= ~NC_InAggFunc; + } + pAggInfo->mxReg = pParse->nMem; + if( db->mallocFailed ) goto select_end; +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x400 ){ + int ii; + SELECTTRACE(0x400,pParse,p,("After aggregate analysis %p:\n", pAggInfo)); + sqlite3TreeViewSelect(0, p, 0); + if( minMaxFlag ){ + sqlite3DebugPrintf("MIN/MAX Optimization (0x%02x) adds:\n", minMaxFlag); + sqlite3TreeViewExprList(0, pMinMaxOrderBy, 0, "ORDERBY"); + } + for(ii=0; iinColumn; ii++){ + sqlite3DebugPrintf("agg-column[%d] iMem=%d\n", + ii, pAggInfo->aCol[ii].iMem); + sqlite3TreeViewExpr(0, pAggInfo->aCol[ii].pCExpr, 0); + } + for(ii=0; iinFunc; ii++){ + sqlite3DebugPrintf("agg-func[%d]: iMem=%d\n", + ii, pAggInfo->aFunc[ii].iMem); + sqlite3TreeViewExpr(0, pAggInfo->aFunc[ii].pFExpr, 0); + } + } +#endif + + + /* Processing for aggregates with GROUP BY is very different and + ** much more complex than aggregates without a GROUP BY. + */ + if( pGroupBy ){ + KeyInfo *pKeyInfo; /* Keying information for the group by clause */ + int addr1; /* A-vs-B comparision jump */ + int addrOutputRow; /* Start of subroutine that outputs a result row */ + int regOutputRow; /* Return address register for output subroutine */ + int addrSetAbort; /* Set the abort flag and return */ + int addrTopOfLoop; /* Top of the input loop */ + int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */ + int addrReset; /* Subroutine for resetting the accumulator */ + int regReset; /* Return address register for reset subroutine */ + ExprList *pDistinct = 0; + u16 distFlag = 0; + int eDist = WHERE_DISTINCT_NOOP; + + if( pAggInfo->nFunc==1 + && pAggInfo->aFunc[0].iDistinct>=0 + && ALWAYS(pAggInfo->aFunc[0].pFExpr!=0) + && ALWAYS(ExprUseXList(pAggInfo->aFunc[0].pFExpr)) + && pAggInfo->aFunc[0].pFExpr->x.pList!=0 + ){ + Expr *pExpr = pAggInfo->aFunc[0].pFExpr->x.pList->a[0].pExpr; + pExpr = sqlite3ExprDup(db, pExpr, 0); + pDistinct = sqlite3ExprListDup(db, pGroupBy, 0); + pDistinct = sqlite3ExprListAppend(pParse, pDistinct, pExpr); + distFlag = pDistinct ? (WHERE_WANT_DISTINCT|WHERE_AGG_DISTINCT) : 0; + } + + /* If there is a GROUP BY clause we might need a sorting index to + ** implement it. Allocate that sorting index now. If it turns out + ** that we do not need it after all, the OP_SorterOpen instruction + ** will be converted into a Noop. + */ + pAggInfo->sortingIdx = pParse->nTab++; + pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pGroupBy, + 0, pAggInfo->nColumn); + addrSortingIdx = sqlite3VdbeAddOp4(v, OP_SorterOpen, + pAggInfo->sortingIdx, pAggInfo->nSortingColumn, + 0, (char*)pKeyInfo, P4_KEYINFO); + + /* Initialize memory locations used by GROUP BY aggregate processing + */ + iUseFlag = ++pParse->nMem; + iAbortFlag = ++pParse->nMem; + regOutputRow = ++pParse->nMem; + addrOutputRow = sqlite3VdbeMakeLabel(pParse); + regReset = ++pParse->nMem; + addrReset = sqlite3VdbeMakeLabel(pParse); + iAMem = pParse->nMem + 1; + pParse->nMem += pGroupBy->nExpr; + iBMem = pParse->nMem + 1; + pParse->nMem += pGroupBy->nExpr; + sqlite3VdbeAddOp2(v, OP_Integer, 0, iAbortFlag); + VdbeComment((v, "clear abort flag")); + sqlite3VdbeAddOp3(v, OP_Null, 0, iAMem, iAMem+pGroupBy->nExpr-1); + + /* Begin a loop that will extract all source rows in GROUP BY order. + ** This might involve two separate loops with an OP_Sort in between, or + ** it might be a single loop that uses an index to extract information + ** in the right order to begin with. + */ + sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); + SELECTTRACE(1,pParse,p,("WhereBegin\n")); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, pDistinct, + 0, (WHERE_GROUPBY|(orderByGrp ? WHERE_SORTBYGROUP : 0)|distFlag), 0 + ); + if( pWInfo==0 ){ + sqlite3ExprListDelete(db, pDistinct); + goto select_end; + } + eDist = sqlite3WhereIsDistinct(pWInfo); + SELECTTRACE(1,pParse,p,("WhereBegin returns\n")); + if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){ + /* The optimizer is able to deliver rows in group by order so + ** we do not have to sort. The OP_OpenEphemeral table will be + ** cancelled later because we still need to use the pKeyInfo + */ + groupBySort = 0; + }else{ + /* Rows are coming out in undetermined order. We have to push + ** each row into a sorting index, terminate the first loop, + ** then loop over the sorting index in order to get the output + ** in sorted order + */ + int regBase; + int regRecord; + int nCol; + int nGroupBy; + + explainTempTable(pParse, + (sDistinct.isTnct && (p->selFlags&SF_Distinct)==0) ? + "DISTINCT" : "GROUP BY"); + + groupBySort = 1; + nGroupBy = pGroupBy->nExpr; + nCol = nGroupBy; + j = nGroupBy; + for(i=0; inColumn; i++){ + if( pAggInfo->aCol[i].iSorterColumn>=j ){ + nCol++; + j++; + } + } + regBase = sqlite3GetTempRange(pParse, nCol); + sqlite3ExprCodeExprList(pParse, pGroupBy, regBase, 0, 0); + j = nGroupBy; + for(i=0; inColumn; i++){ + struct AggInfo_col *pCol = &pAggInfo->aCol[i]; + if( pCol->iSorterColumn>=j ){ + int r1 = j + regBase; + sqlite3ExprCodeGetColumnOfTable(v, + pCol->pTab, pCol->iTable, pCol->iColumn, r1); + j++; + } + } + regRecord = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regRecord); + sqlite3VdbeAddOp2(v, OP_SorterInsert, pAggInfo->sortingIdx, regRecord); + sqlite3ReleaseTempReg(pParse, regRecord); + sqlite3ReleaseTempRange(pParse, regBase, nCol); + SELECTTRACE(1,pParse,p,("WhereEnd\n")); + sqlite3WhereEnd(pWInfo); + pAggInfo->sortingIdxPTab = sortPTab = pParse->nTab++; + sortOut = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_OpenPseudo, sortPTab, sortOut, nCol); + sqlite3VdbeAddOp2(v, OP_SorterSort, pAggInfo->sortingIdx, addrEnd); + VdbeComment((v, "GROUP BY sort")); VdbeCoverage(v); + pAggInfo->useSortingIdx = 1; + } + + /* If the index or temporary table used by the GROUP BY sort + ** will naturally deliver rows in the order required by the ORDER BY + ** clause, cancel the ephemeral table open coded earlier. + ** + ** This is an optimization - the correct answer should result regardless. + ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER to + ** disable this optimization for testing purposes. */ + if( orderByGrp && OptimizationEnabled(db, SQLITE_GroupByOrder) + && (groupBySort || sqlite3WhereIsSorted(pWInfo)) + ){ + sSort.pOrderBy = 0; + sqlite3VdbeChangeToNoop(v, sSort.addrSortIndex); + } + + /* Evaluate the current GROUP BY terms and store in b0, b1, b2... + ** (b0 is memory location iBMem+0, b1 is iBMem+1, and so forth) + ** Then compare the current GROUP BY terms against the GROUP BY terms + ** from the previous row currently stored in a0, a1, a2... + */ + addrTopOfLoop = sqlite3VdbeCurrentAddr(v); + if( groupBySort ){ + sqlite3VdbeAddOp3(v, OP_SorterData, pAggInfo->sortingIdx, + sortOut, sortPTab); + } + for(j=0; jnExpr; j++){ + if( groupBySort ){ + sqlite3VdbeAddOp3(v, OP_Column, sortPTab, j, iBMem+j); + }else{ + pAggInfo->directMode = 1; + sqlite3ExprCode(pParse, pGroupBy->a[j].pExpr, iBMem+j); + } + } + sqlite3VdbeAddOp4(v, OP_Compare, iAMem, iBMem, pGroupBy->nExpr, + (char*)sqlite3KeyInfoRef(pKeyInfo), P4_KEYINFO); + addr1 = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp3(v, OP_Jump, addr1+1, 0, addr1+1); VdbeCoverage(v); + + /* Generate code that runs whenever the GROUP BY changes. + ** Changes in the GROUP BY are detected by the previous code + ** block. If there were no changes, this block is skipped. + ** + ** This code copies current group by terms in b0,b1,b2,... + ** over to a0,a1,a2. It then calls the output subroutine + ** and resets the aggregate accumulator registers in preparation + ** for the next GROUP BY batch. + */ + sqlite3ExprCodeMove(pParse, iBMem, iAMem, pGroupBy->nExpr); + sqlite3VdbeAddOp2(v, OP_Gosub, regOutputRow, addrOutputRow); + VdbeComment((v, "output one row")); + sqlite3VdbeAddOp2(v, OP_IfPos, iAbortFlag, addrEnd); VdbeCoverage(v); + VdbeComment((v, "check abort flag")); + sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); + VdbeComment((v, "reset accumulator")); + + /* Update the aggregate accumulators based on the content of + ** the current row + */ + sqlite3VdbeJumpHere(v, addr1); + updateAccumulator(pParse, iUseFlag, pAggInfo, eDist); + sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag); + VdbeComment((v, "indicate data in accumulator")); + + /* End of the loop + */ + if( groupBySort ){ + sqlite3VdbeAddOp2(v, OP_SorterNext, pAggInfo->sortingIdx,addrTopOfLoop); + VdbeCoverage(v); + }else{ + SELECTTRACE(1,pParse,p,("WhereEnd\n")); + sqlite3WhereEnd(pWInfo); + sqlite3VdbeChangeToNoop(v, addrSortingIdx); + } + sqlite3ExprListDelete(db, pDistinct); + + /* Output the final row of result + */ + sqlite3VdbeAddOp2(v, OP_Gosub, regOutputRow, addrOutputRow); + VdbeComment((v, "output final row")); + + /* Jump over the subroutines + */ + sqlite3VdbeGoto(v, addrEnd); + + /* Generate a subroutine that outputs a single row of the result + ** set. This subroutine first looks at the iUseFlag. If iUseFlag + ** is less than or equal to zero, the subroutine is a no-op. If + ** the processing calls for the query to abort, this subroutine + ** increments the iAbortFlag memory location before returning in + ** order to signal the caller to abort. + */ + addrSetAbort = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_Integer, 1, iAbortFlag); + VdbeComment((v, "set abort flag")); + sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); + sqlite3VdbeResolveLabel(v, addrOutputRow); + addrOutputRow = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_IfPos, iUseFlag, addrOutputRow+2); + VdbeCoverage(v); + VdbeComment((v, "Groupby result generator entry point")); + sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); + finalizeAggFunctions(pParse, pAggInfo); + sqlite3ExprIfFalse(pParse, pHaving, addrOutputRow+1, SQLITE_JUMPIFNULL); + selectInnerLoop(pParse, p, -1, &sSort, + &sDistinct, pDest, + addrOutputRow+1, addrSetAbort); + sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); + VdbeComment((v, "end groupby result generator")); + + /* Generate a subroutine that will reset the group-by accumulator + */ + sqlite3VdbeResolveLabel(v, addrReset); + resetAccumulator(pParse, pAggInfo); + sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag); + VdbeComment((v, "indicate accumulator empty")); + sqlite3VdbeAddOp1(v, OP_Return, regReset); + + if( eDist!=WHERE_DISTINCT_NOOP ){ + struct AggInfo_func *pF = &pAggInfo->aFunc[0]; + fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr); + } + } /* endif pGroupBy. Begin aggregate queries without GROUP BY: */ + else { + Table *pTab; + if( (pTab = isSimpleCount(p, pAggInfo))!=0 ){ + /* If isSimpleCount() returns a pointer to a Table structure, then + ** the SQL statement is of the form: + ** + ** SELECT count(*) FROM + ** + ** where the Table structure returned represents table . + ** + ** This statement is so common that it is optimized specially. The + ** OP_Count instruction is executed either on the intkey table that + ** contains the data for table or on one of its indexes. It + ** is better to execute the op on an index, as indexes are almost + ** always spread across less pages than their corresponding tables. + */ + const int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + const int iCsr = pParse->nTab++; /* Cursor to scan b-tree */ + Index *pIdx; /* Iterator variable */ + KeyInfo *pKeyInfo = 0; /* Keyinfo for scanned index */ + Index *pBest = 0; /* Best index found so far */ + Pgno iRoot = pTab->tnum; /* Root page of scanned b-tree */ + + sqlite3CodeVerifySchema(pParse, iDb); + sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); + + /* Search for the index that has the lowest scan cost. + ** + ** (2011-04-15) Do not do a full scan of an unordered index. + ** + ** (2013-10-03) Do not count the entries in a partial index. + ** + ** In practice the KeyInfo structure will not be used. It is only + ** passed to keep OP_OpenRead happy. + */ + if( !HasRowid(pTab) ) pBest = sqlite3PrimaryKeyIndex(pTab); + if( !p->pSrc->a[0].fg.notIndexed ){ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->bUnordered==0 + && pIdx->szIdxRowszTabRow + && pIdx->pPartIdxWhere==0 + && (!pBest || pIdx->szIdxRowszIdxRow) + ){ + pBest = pIdx; + } + } + } + if( pBest ){ + iRoot = pBest->tnum; + pKeyInfo = sqlite3KeyInfoOfIndex(pParse, pBest); + } + + /* Open a read-only cursor, execute the OP_Count, close the cursor. */ + sqlite3VdbeAddOp4Int(v, OP_OpenRead, iCsr, (int)iRoot, iDb, 1); + if( pKeyInfo ){ + sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO); + } + sqlite3VdbeAddOp2(v, OP_Count, iCsr, pAggInfo->aFunc[0].iMem); + sqlite3VdbeAddOp1(v, OP_Close, iCsr); + explainSimpleCount(pParse, pTab, pBest); + }else{ + int regAcc = 0; /* "populate accumulators" flag */ + ExprList *pDistinct = 0; + u16 distFlag = 0; + int eDist; + + /* If there are accumulator registers but no min() or max() functions + ** without FILTER clauses, allocate register regAcc. Register regAcc + ** will contain 0 the first time the inner loop runs, and 1 thereafter. + ** The code generated by updateAccumulator() uses this to ensure + ** that the accumulator registers are (a) updated only once if + ** there are no min() or max functions or (b) always updated for the + ** first row visited by the aggregate, so that they are updated at + ** least once even if the FILTER clause means the min() or max() + ** function visits zero rows. */ + if( pAggInfo->nAccumulator ){ + for(i=0; inFunc; i++){ + if( ExprHasProperty(pAggInfo->aFunc[i].pFExpr, EP_WinFunc) ){ + continue; + } + if( pAggInfo->aFunc[i].pFunc->funcFlags&SQLITE_FUNC_NEEDCOLL ){ + break; + } + } + if( i==pAggInfo->nFunc ){ + regAcc = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc); + } + }else if( pAggInfo->nFunc==1 && pAggInfo->aFunc[0].iDistinct>=0 ){ + assert( ExprUseXList(pAggInfo->aFunc[0].pFExpr) ); + pDistinct = pAggInfo->aFunc[0].pFExpr->x.pList; + distFlag = pDistinct ? (WHERE_WANT_DISTINCT|WHERE_AGG_DISTINCT) : 0; + } + + /* This case runs if the aggregate has no GROUP BY clause. The + ** processing is much simpler since there is only a single row + ** of output. + */ + assert( p->pGroupBy==0 ); + resetAccumulator(pParse, pAggInfo); + + /* If this query is a candidate for the min/max optimization, then + ** minMaxFlag will have been previously set to either + ** WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX and pMinMaxOrderBy will + ** be an appropriate ORDER BY expression for the optimization. + */ + assert( minMaxFlag==WHERE_ORDERBY_NORMAL || pMinMaxOrderBy!=0 ); + assert( pMinMaxOrderBy==0 || pMinMaxOrderBy->nExpr==1 ); + + SELECTTRACE(1,pParse,p,("WhereBegin\n")); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy, + pDistinct, 0, minMaxFlag|distFlag, 0); + if( pWInfo==0 ){ + goto select_end; + } + SELECTTRACE(1,pParse,p,("WhereBegin returns\n")); + eDist = sqlite3WhereIsDistinct(pWInfo); + updateAccumulator(pParse, regAcc, pAggInfo, eDist); + if( eDist!=WHERE_DISTINCT_NOOP ){ + struct AggInfo_func *pF = &pAggInfo->aFunc[0]; + fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr); + } + + if( regAcc ) sqlite3VdbeAddOp2(v, OP_Integer, 1, regAcc); + if( minMaxFlag ){ + sqlite3WhereMinMaxOptEarlyOut(v, pWInfo); + } + SELECTTRACE(1,pParse,p,("WhereEnd\n")); + sqlite3WhereEnd(pWInfo); + finalizeAggFunctions(pParse, pAggInfo); + } + + sSort.pOrderBy = 0; + sqlite3ExprIfFalse(pParse, pHaving, addrEnd, SQLITE_JUMPIFNULL); + selectInnerLoop(pParse, p, -1, 0, 0, + pDest, addrEnd, addrEnd); + } + sqlite3VdbeResolveLabel(v, addrEnd); + + } /* endif aggregate query */ + + if( sDistinct.eTnctType==WHERE_DISTINCT_UNORDERED ){ + explainTempTable(pParse, "DISTINCT"); + } + + /* If there is an ORDER BY clause, then we need to sort the results + ** and send them to the callback one by one. + */ + if( sSort.pOrderBy ){ + explainTempTable(pParse, + sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY"); + assert( p->pEList==pEList ); + generateSortTail(pParse, p, &sSort, pEList->nExpr, pDest); + } + + /* Jump here to skip this query + */ + sqlite3VdbeResolveLabel(v, iEnd); + + /* The SELECT has been coded. If there is an error in the Parse structure, + ** set the return code to 1. Otherwise 0. */ + rc = (pParse->nErr>0); + + /* Control jumps to here if an error is encountered above, or upon + ** successful coding of the SELECT. + */ +select_end: + assert( db->mallocFailed==0 || db->mallocFailed==1 ); + assert( db->mallocFailed==0 || pParse->nErr!=0 ); + sqlite3ExprListDelete(db, pMinMaxOrderBy); +#ifdef SQLITE_DEBUG + if( pAggInfo && !db->mallocFailed ){ + for(i=0; inColumn; i++){ + Expr *pExpr = pAggInfo->aCol[i].pCExpr; + assert( pExpr!=0 ); + assert( pExpr->pAggInfo==pAggInfo ); + assert( pExpr->iAgg==i ); + } + for(i=0; inFunc; i++){ + Expr *pExpr = pAggInfo->aFunc[i].pFExpr; + assert( pExpr!=0 ); + assert( pExpr->pAggInfo==pAggInfo ); + assert( pExpr->iAgg==i ); + } + } +#endif + +#if SELECTTRACE_ENABLED + SELECTTRACE(0x1,pParse,p,("end processing\n")); + if( (sqlite3SelectTrace & 0x2000)!=0 && ExplainQueryPlanParent(pParse)==0 ){ + sqlite3TreeViewSelect(0, p, 0); + } +#endif + ExplainQueryPlanPop(pParse); + return rc; +} + +/************** End of select.c **********************************************/ +/************** Begin file table.c *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the sqlite3_get_table() and sqlite3_free_table() +** interface routines. These are just wrappers around the main +** interface routine of sqlite3_exec(). +** +** These routines are in a separate files so that they will not be linked +** if they are not used. +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_GET_TABLE + +/* +** This structure is used to pass data from sqlite3_get_table() through +** to the callback function is uses to build the result. +*/ +typedef struct TabResult { + char **azResult; /* Accumulated output */ + char *zErrMsg; /* Error message text, if an error occurs */ + u32 nAlloc; /* Slots allocated for azResult[] */ + u32 nRow; /* Number of rows in the result */ + u32 nColumn; /* Number of columns in the result */ + u32 nData; /* Slots used in azResult[]. (nRow+1)*nColumn */ + int rc; /* Return code from sqlite3_exec() */ +} TabResult; + +/* +** This routine is called once for each row in the result table. Its job +** is to fill in the TabResult structure appropriately, allocating new +** memory as necessary. +*/ +static int sqlite3_get_table_cb(void *pArg, int nCol, char **argv, char **colv){ + TabResult *p = (TabResult*)pArg; /* Result accumulator */ + int need; /* Slots needed in p->azResult[] */ + int i; /* Loop counter */ + char *z; /* A single column of result */ + + /* Make sure there is enough space in p->azResult to hold everything + ** we need to remember from this invocation of the callback. + */ + if( p->nRow==0 && argv!=0 ){ + need = nCol*2; + }else{ + need = nCol; + } + if( p->nData + need > p->nAlloc ){ + char **azNew; + p->nAlloc = p->nAlloc*2 + need; + azNew = sqlite3Realloc( p->azResult, sizeof(char*)*p->nAlloc ); + if( azNew==0 ) goto malloc_failed; + p->azResult = azNew; + } + + /* If this is the first row, then generate an extra row containing + ** the names of all columns. + */ + if( p->nRow==0 ){ + p->nColumn = nCol; + for(i=0; iazResult[p->nData++] = z; + } + }else if( (int)p->nColumn!=nCol ){ + sqlite3_free(p->zErrMsg); + p->zErrMsg = sqlite3_mprintf( + "sqlite3_get_table() called with two or more incompatible queries" + ); + p->rc = SQLITE_ERROR; + return 1; + } + + /* Copy over the row data + */ + if( argv!=0 ){ + for(i=0; iazResult[p->nData++] = z; + } + p->nRow++; + } + return 0; + +malloc_failed: + p->rc = SQLITE_NOMEM_BKPT; + return 1; +} + +/* +** Query the database. But instead of invoking a callback for each row, +** malloc() for space to hold the result and return the entire results +** at the conclusion of the call. +** +** The result that is written to ***pazResult is held in memory obtained +** from malloc(). But the caller cannot free this memory directly. +** Instead, the entire table should be passed to sqlite3_free_table() when +** the calling procedure is finished using it. +*/ +SQLITE_API int sqlite3_get_table( + sqlite3 *db, /* The database on which the SQL executes */ + const char *zSql, /* The SQL to be executed */ + char ***pazResult, /* Write the result table here */ + int *pnRow, /* Write the number of rows in the result here */ + int *pnColumn, /* Write the number of columns of result here */ + char **pzErrMsg /* Write error messages here */ +){ + int rc; + TabResult res; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || pazResult==0 ) return SQLITE_MISUSE_BKPT; +#endif + *pazResult = 0; + if( pnColumn ) *pnColumn = 0; + if( pnRow ) *pnRow = 0; + if( pzErrMsg ) *pzErrMsg = 0; + res.zErrMsg = 0; + res.nRow = 0; + res.nColumn = 0; + res.nData = 1; + res.nAlloc = 20; + res.rc = SQLITE_OK; + res.azResult = sqlite3_malloc64(sizeof(char*)*res.nAlloc ); + if( res.azResult==0 ){ + db->errCode = SQLITE_NOMEM; + return SQLITE_NOMEM_BKPT; + } + res.azResult[0] = 0; + rc = sqlite3_exec(db, zSql, sqlite3_get_table_cb, &res, pzErrMsg); + assert( sizeof(res.azResult[0])>= sizeof(res.nData) ); + res.azResult[0] = SQLITE_INT_TO_PTR(res.nData); + if( (rc&0xff)==SQLITE_ABORT ){ + sqlite3_free_table(&res.azResult[1]); + if( res.zErrMsg ){ + if( pzErrMsg ){ + sqlite3_free(*pzErrMsg); + *pzErrMsg = sqlite3_mprintf("%s",res.zErrMsg); + } + sqlite3_free(res.zErrMsg); + } + db->errCode = res.rc; /* Assume 32-bit assignment is atomic */ + return res.rc; + } + sqlite3_free(res.zErrMsg); + if( rc!=SQLITE_OK ){ + sqlite3_free_table(&res.azResult[1]); + return rc; + } + if( res.nAlloc>res.nData ){ + char **azNew; + azNew = sqlite3Realloc( res.azResult, sizeof(char*)*res.nData ); + if( azNew==0 ){ + sqlite3_free_table(&res.azResult[1]); + db->errCode = SQLITE_NOMEM; + return SQLITE_NOMEM_BKPT; + } + res.azResult = azNew; + } + *pazResult = &res.azResult[1]; + if( pnColumn ) *pnColumn = res.nColumn; + if( pnRow ) *pnRow = res.nRow; + return rc; +} + +/* +** This routine frees the space the sqlite3_get_table() malloced. +*/ +SQLITE_API void sqlite3_free_table( + char **azResult /* Result returned from sqlite3_get_table() */ +){ + if( azResult ){ + int i, n; + azResult--; + assert( azResult!=0 ); + n = SQLITE_PTR_TO_INT(azResult[0]); + for(i=1; ipNext; + + sqlite3ExprDelete(db, pTmp->pWhere); + sqlite3ExprListDelete(db, pTmp->pExprList); + sqlite3SelectDelete(db, pTmp->pSelect); + sqlite3IdListDelete(db, pTmp->pIdList); + sqlite3UpsertDelete(db, pTmp->pUpsert); + sqlite3SrcListDelete(db, pTmp->pFrom); + sqlite3DbFree(db, pTmp->zSpan); + + sqlite3DbFree(db, pTmp); + } +} + +/* +** Given table pTab, return a list of all the triggers attached to +** the table. The list is connected by Trigger.pNext pointers. +** +** All of the triggers on pTab that are in the same database as pTab +** are already attached to pTab->pTrigger. But there might be additional +** triggers on pTab in the TEMP schema. This routine prepends all +** TEMP triggers on pTab to the beginning of the pTab->pTrigger list +** and returns the combined list. +** +** To state it another way: This routine returns a list of all triggers +** that fire off of pTab. The list will include any TEMP triggers on +** pTab as well as the triggers lised in pTab->pTrigger. +*/ +SQLITE_PRIVATE Trigger *sqlite3TriggerList(Parse *pParse, Table *pTab){ + Schema *pTmpSchema; /* Schema of the pTab table */ + Trigger *pList; /* List of triggers to return */ + HashElem *p; /* Loop variable for TEMP triggers */ + + if( pParse->disableTriggers ){ + return 0; + } + pTmpSchema = pParse->db->aDb[1].pSchema; + p = sqliteHashFirst(&pTmpSchema->trigHash); + pList = pTab->pTrigger; + while( p ){ + Trigger *pTrig = (Trigger *)sqliteHashData(p); + if( pTrig->pTabSchema==pTab->pSchema + && pTrig->table + && 0==sqlite3StrICmp(pTrig->table, pTab->zName) + && pTrig->pTabSchema!=pTmpSchema + ){ + pTrig->pNext = pList; + pList = pTrig; + }else if( pTrig->op==TK_RETURNING +#ifndef SQLITE_OMIT_VIRTUALTABLE + && pParse->db->pVtabCtx==0 +#endif + ){ + assert( pParse->bReturning ); + assert( &(pParse->u1.pReturning->retTrig) == pTrig ); + pTrig->table = pTab->zName; + pTrig->pTabSchema = pTab->pSchema; + pTrig->pNext = pList; + pList = pTrig; + } + p = sqliteHashNext(p); + } +#if 0 + if( pList ){ + Trigger *pX; + printf("Triggers for %s:", pTab->zName); + for(pX=pList; pX; pX=pX->pNext){ + printf(" %s", pX->zName); + } + printf("\n"); + fflush(stdout); + } +#endif + return pList; +} + +/* +** This is called by the parser when it sees a CREATE TRIGGER statement +** up to the point of the BEGIN before the trigger actions. A Trigger +** structure is generated based on the information available and stored +** in pParse->pNewTrigger. After the trigger actions have been parsed, the +** sqlite3FinishTrigger() function is called to complete the trigger +** construction process. +*/ +SQLITE_PRIVATE void sqlite3BeginTrigger( + Parse *pParse, /* The parse context of the CREATE TRIGGER statement */ + Token *pName1, /* The name of the trigger */ + Token *pName2, /* The name of the trigger */ + int tr_tm, /* One of TK_BEFORE, TK_AFTER, TK_INSTEAD */ + int op, /* One of TK_INSERT, TK_UPDATE, TK_DELETE */ + IdList *pColumns, /* column list if this is an UPDATE OF trigger */ + SrcList *pTableName,/* The name of the table/view the trigger applies to */ + Expr *pWhen, /* WHEN clause */ + int isTemp, /* True if the TEMPORARY keyword is present */ + int noErr /* Suppress errors if the trigger already exists */ +){ + Trigger *pTrigger = 0; /* The new trigger */ + Table *pTab; /* Table that the trigger fires off of */ + char *zName = 0; /* Name of the trigger */ + sqlite3 *db = pParse->db; /* The database connection */ + int iDb; /* The database to store the trigger in */ + Token *pName; /* The unqualified db name */ + DbFixer sFix; /* State vector for the DB fixer */ + + assert( pName1!=0 ); /* pName1->z might be NULL, but not pName1 itself */ + assert( pName2!=0 ); + assert( op==TK_INSERT || op==TK_UPDATE || op==TK_DELETE ); + assert( op>0 && op<0xff ); + if( isTemp ){ + /* If TEMP was specified, then the trigger name may not be qualified. */ + if( pName2->n>0 ){ + sqlite3ErrorMsg(pParse, "temporary trigger may not have qualified name"); + goto trigger_cleanup; + } + iDb = 1; + pName = pName1; + }else{ + /* Figure out the db that the trigger will be created in */ + iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName); + if( iDb<0 ){ + goto trigger_cleanup; + } + } + if( !pTableName || db->mallocFailed ){ + goto trigger_cleanup; + } + + /* A long-standing parser bug is that this syntax was allowed: + ** + ** CREATE TRIGGER attached.demo AFTER INSERT ON attached.tab .... + ** ^^^^^^^^ + ** + ** To maintain backwards compatibility, ignore the database + ** name on pTableName if we are reparsing out of the schema table + */ + if( db->init.busy && iDb!=1 ){ + sqlite3DbFree(db, pTableName->a[0].zDatabase); + pTableName->a[0].zDatabase = 0; + } + + /* If the trigger name was unqualified, and the table is a temp table, + ** then set iDb to 1 to create the trigger in the temporary database. + ** If sqlite3SrcListLookup() returns 0, indicating the table does not + ** exist, the error is caught by the block below. + */ + pTab = sqlite3SrcListLookup(pParse, pTableName); + if( db->init.busy==0 && pName2->n==0 && pTab + && pTab->pSchema==db->aDb[1].pSchema ){ + iDb = 1; + } + + /* Ensure the table name matches database name and that the table exists */ + if( db->mallocFailed ) goto trigger_cleanup; + assert( pTableName->nSrc==1 ); + sqlite3FixInit(&sFix, pParse, iDb, "trigger", pName); + if( sqlite3FixSrcList(&sFix, pTableName) ){ + goto trigger_cleanup; + } + pTab = sqlite3SrcListLookup(pParse, pTableName); + if( !pTab ){ + /* The table does not exist. */ + goto trigger_orphan_error; + } + if( IsVirtual(pTab) ){ + sqlite3ErrorMsg(pParse, "cannot create triggers on virtual tables"); + goto trigger_orphan_error; + } + + /* Check that the trigger name is not reserved and that no trigger of the + ** specified name exists */ + zName = sqlite3NameFromToken(db, pName); + if( zName==0 ){ + assert( db->mallocFailed ); + goto trigger_cleanup; + } + if( sqlite3CheckObjectName(pParse, zName, "trigger", pTab->zName) ){ + goto trigger_cleanup; + } + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( !IN_RENAME_OBJECT ){ + if( sqlite3HashFind(&(db->aDb[iDb].pSchema->trigHash),zName) ){ + if( !noErr ){ + sqlite3ErrorMsg(pParse, "trigger %T already exists", pName); + }else{ + assert( !db->init.busy ); + sqlite3CodeVerifySchema(pParse, iDb); + } + goto trigger_cleanup; + } + } + + /* Do not create a trigger on a system table */ + if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 ){ + sqlite3ErrorMsg(pParse, "cannot create trigger on system table"); + goto trigger_cleanup; + } + + /* INSTEAD of triggers are only for views and views only support INSTEAD + ** of triggers. + */ + if( IsView(pTab) && tr_tm!=TK_INSTEAD ){ + sqlite3ErrorMsg(pParse, "cannot create %s trigger on view: %S", + (tr_tm == TK_BEFORE)?"BEFORE":"AFTER", pTableName->a); + goto trigger_orphan_error; + } + if( !IsView(pTab) && tr_tm==TK_INSTEAD ){ + sqlite3ErrorMsg(pParse, "cannot create INSTEAD OF" + " trigger on table: %S", pTableName->a); + goto trigger_orphan_error; + } + +#ifndef SQLITE_OMIT_AUTHORIZATION + if( !IN_RENAME_OBJECT ){ + int iTabDb = sqlite3SchemaToIndex(db, pTab->pSchema); + int code = SQLITE_CREATE_TRIGGER; + const char *zDb = db->aDb[iTabDb].zDbSName; + const char *zDbTrig = isTemp ? db->aDb[1].zDbSName : zDb; + if( iTabDb==1 || isTemp ) code = SQLITE_CREATE_TEMP_TRIGGER; + if( sqlite3AuthCheck(pParse, code, zName, pTab->zName, zDbTrig) ){ + goto trigger_cleanup; + } + if( sqlite3AuthCheck(pParse, SQLITE_INSERT, SCHEMA_TABLE(iTabDb),0,zDb)){ + goto trigger_cleanup; + } + } +#endif + + /* INSTEAD OF triggers can only appear on views and BEFORE triggers + ** cannot appear on views. So we might as well translate every + ** INSTEAD OF trigger into a BEFORE trigger. It simplifies code + ** elsewhere. + */ + if (tr_tm == TK_INSTEAD){ + tr_tm = TK_BEFORE; + } + + /* Build the Trigger object */ + pTrigger = (Trigger*)sqlite3DbMallocZero(db, sizeof(Trigger)); + if( pTrigger==0 ) goto trigger_cleanup; + pTrigger->zName = zName; + zName = 0; + pTrigger->table = sqlite3DbStrDup(db, pTableName->a[0].zName); + pTrigger->pSchema = db->aDb[iDb].pSchema; + pTrigger->pTabSchema = pTab->pSchema; + pTrigger->op = (u8)op; + pTrigger->tr_tm = tr_tm==TK_BEFORE ? TRIGGER_BEFORE : TRIGGER_AFTER; + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, pTrigger->table, pTableName->a[0].zName); + pTrigger->pWhen = pWhen; + pWhen = 0; + }else{ + pTrigger->pWhen = sqlite3ExprDup(db, pWhen, EXPRDUP_REDUCE); + } + pTrigger->pColumns = pColumns; + pColumns = 0; + assert( pParse->pNewTrigger==0 ); + pParse->pNewTrigger = pTrigger; + +trigger_cleanup: + sqlite3DbFree(db, zName); + sqlite3SrcListDelete(db, pTableName); + sqlite3IdListDelete(db, pColumns); + sqlite3ExprDelete(db, pWhen); + if( !pParse->pNewTrigger ){ + sqlite3DeleteTrigger(db, pTrigger); + }else{ + assert( pParse->pNewTrigger==pTrigger ); + } + return; + +trigger_orphan_error: + if( db->init.iDb==1 ){ + /* Ticket #3810. + ** Normally, whenever a table is dropped, all associated triggers are + ** dropped too. But if a TEMP trigger is created on a non-TEMP table + ** and the table is dropped by a different database connection, the + ** trigger is not visible to the database connection that does the + ** drop so the trigger cannot be dropped. This results in an + ** "orphaned trigger" - a trigger whose associated table is missing. + ** + ** 2020-11-05 see also https://sqlite.org/forum/forumpost/157dc791df + */ + db->init.orphanTrigger = 1; + } + goto trigger_cleanup; +} + +/* +** This routine is called after all of the trigger actions have been parsed +** in order to complete the process of building the trigger. +*/ +SQLITE_PRIVATE void sqlite3FinishTrigger( + Parse *pParse, /* Parser context */ + TriggerStep *pStepList, /* The triggered program */ + Token *pAll /* Token that describes the complete CREATE TRIGGER */ +){ + Trigger *pTrig = pParse->pNewTrigger; /* Trigger being finished */ + char *zName; /* Name of trigger */ + sqlite3 *db = pParse->db; /* The database */ + DbFixer sFix; /* Fixer object */ + int iDb; /* Database containing the trigger */ + Token nameToken; /* Trigger name for error reporting */ + + pParse->pNewTrigger = 0; + if( NEVER(pParse->nErr) || !pTrig ) goto triggerfinish_cleanup; + zName = pTrig->zName; + iDb = sqlite3SchemaToIndex(pParse->db, pTrig->pSchema); + pTrig->step_list = pStepList; + while( pStepList ){ + pStepList->pTrig = pTrig; + pStepList = pStepList->pNext; + } + sqlite3TokenInit(&nameToken, pTrig->zName); + sqlite3FixInit(&sFix, pParse, iDb, "trigger", &nameToken); + if( sqlite3FixTriggerStep(&sFix, pTrig->step_list) + || sqlite3FixExpr(&sFix, pTrig->pWhen) + ){ + goto triggerfinish_cleanup; + } + +#ifndef SQLITE_OMIT_ALTERTABLE + if( IN_RENAME_OBJECT ){ + assert( !db->init.busy ); + pParse->pNewTrigger = pTrig; + pTrig = 0; + }else +#endif + + /* if we are not initializing, + ** build the sqlite_schema entry + */ + if( !db->init.busy ){ + Vdbe *v; + char *z; + + /* Make an entry in the sqlite_schema table */ + v = sqlite3GetVdbe(pParse); + if( v==0 ) goto triggerfinish_cleanup; + sqlite3BeginWriteOperation(pParse, 0, iDb); + z = sqlite3DbStrNDup(db, (char*)pAll->z, pAll->n); + testcase( z==0 ); + sqlite3NestedParse(pParse, + "INSERT INTO %Q." LEGACY_SCHEMA_TABLE + " VALUES('trigger',%Q,%Q,0,'CREATE TRIGGER %q')", + db->aDb[iDb].zDbSName, zName, + pTrig->table, z); + sqlite3DbFree(db, z); + sqlite3ChangeCookie(pParse, iDb); + sqlite3VdbeAddParseSchemaOp(v, iDb, + sqlite3MPrintf(db, "type='trigger' AND name='%q'", zName), 0); + } + + if( db->init.busy ){ + Trigger *pLink = pTrig; + Hash *pHash = &db->aDb[iDb].pSchema->trigHash; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + assert( pLink!=0 ); + pTrig = sqlite3HashInsert(pHash, zName, pTrig); + if( pTrig ){ + sqlite3OomFault(db); + }else if( pLink->pSchema==pLink->pTabSchema ){ + Table *pTab; + pTab = sqlite3HashFind(&pLink->pTabSchema->tblHash, pLink->table); + assert( pTab!=0 ); + pLink->pNext = pTab->pTrigger; + pTab->pTrigger = pLink; + } + } + +triggerfinish_cleanup: + sqlite3DeleteTrigger(db, pTrig); + assert( IN_RENAME_OBJECT || !pParse->pNewTrigger ); + sqlite3DeleteTriggerStep(db, pStepList); +} + +/* +** Duplicate a range of text from an SQL statement, then convert all +** whitespace characters into ordinary space characters. +*/ +static char *triggerSpanDup(sqlite3 *db, const char *zStart, const char *zEnd){ + char *z = sqlite3DbSpanDup(db, zStart, zEnd); + int i; + if( z ) for(i=0; z[i]; i++) if( sqlite3Isspace(z[i]) ) z[i] = ' '; + return z; +} + +/* +** Turn a SELECT statement (that the pSelect parameter points to) into +** a trigger step. Return a pointer to a TriggerStep structure. +** +** The parser calls this routine when it finds a SELECT statement in +** body of a TRIGGER. +*/ +SQLITE_PRIVATE TriggerStep *sqlite3TriggerSelectStep( + sqlite3 *db, /* Database connection */ + Select *pSelect, /* The SELECT statement */ + const char *zStart, /* Start of SQL text */ + const char *zEnd /* End of SQL text */ +){ + TriggerStep *pTriggerStep = sqlite3DbMallocZero(db, sizeof(TriggerStep)); + if( pTriggerStep==0 ) { + sqlite3SelectDelete(db, pSelect); + return 0; + } + pTriggerStep->op = TK_SELECT; + pTriggerStep->pSelect = pSelect; + pTriggerStep->orconf = OE_Default; + pTriggerStep->zSpan = triggerSpanDup(db, zStart, zEnd); + return pTriggerStep; +} + +/* +** Allocate space to hold a new trigger step. The allocated space +** holds both the TriggerStep object and the TriggerStep.target.z string. +** +** If an OOM error occurs, NULL is returned and db->mallocFailed is set. +*/ +static TriggerStep *triggerStepAllocate( + Parse *pParse, /* Parser context */ + u8 op, /* Trigger opcode */ + Token *pName, /* The target name */ + const char *zStart, /* Start of SQL text */ + const char *zEnd /* End of SQL text */ +){ + sqlite3 *db = pParse->db; + TriggerStep *pTriggerStep; + + if( pParse->nErr ) return 0; + pTriggerStep = sqlite3DbMallocZero(db, sizeof(TriggerStep) + pName->n + 1); + if( pTriggerStep ){ + char *z = (char*)&pTriggerStep[1]; + memcpy(z, pName->z, pName->n); + sqlite3Dequote(z); + pTriggerStep->zTarget = z; + pTriggerStep->op = op; + pTriggerStep->zSpan = triggerSpanDup(db, zStart, zEnd); + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenMap(pParse, pTriggerStep->zTarget, pName); + } + } + return pTriggerStep; +} + +/* +** Build a trigger step out of an INSERT statement. Return a pointer +** to the new trigger step. +** +** The parser calls this routine when it sees an INSERT inside the +** body of a trigger. +*/ +SQLITE_PRIVATE TriggerStep *sqlite3TriggerInsertStep( + Parse *pParse, /* Parser */ + Token *pTableName, /* Name of the table into which we insert */ + IdList *pColumn, /* List of columns in pTableName to insert into */ + Select *pSelect, /* A SELECT statement that supplies values */ + u8 orconf, /* The conflict algorithm (OE_Abort, OE_Replace, etc.) */ + Upsert *pUpsert, /* ON CONFLICT clauses for upsert */ + const char *zStart, /* Start of SQL text */ + const char *zEnd /* End of SQL text */ +){ + sqlite3 *db = pParse->db; + TriggerStep *pTriggerStep; + + assert(pSelect != 0 || db->mallocFailed); + + pTriggerStep = triggerStepAllocate(pParse, TK_INSERT, pTableName,zStart,zEnd); + if( pTriggerStep ){ + if( IN_RENAME_OBJECT ){ + pTriggerStep->pSelect = pSelect; + pSelect = 0; + }else{ + pTriggerStep->pSelect = sqlite3SelectDup(db, pSelect, EXPRDUP_REDUCE); + } + pTriggerStep->pIdList = pColumn; + pTriggerStep->pUpsert = pUpsert; + pTriggerStep->orconf = orconf; + if( pUpsert ){ + sqlite3HasExplicitNulls(pParse, pUpsert->pUpsertTarget); + } + }else{ + testcase( pColumn ); + sqlite3IdListDelete(db, pColumn); + testcase( pUpsert ); + sqlite3UpsertDelete(db, pUpsert); + } + sqlite3SelectDelete(db, pSelect); + + return pTriggerStep; +} + +/* +** Construct a trigger step that implements an UPDATE statement and return +** a pointer to that trigger step. The parser calls this routine when it +** sees an UPDATE statement inside the body of a CREATE TRIGGER. +*/ +SQLITE_PRIVATE TriggerStep *sqlite3TriggerUpdateStep( + Parse *pParse, /* Parser */ + Token *pTableName, /* Name of the table to be updated */ + SrcList *pFrom, + ExprList *pEList, /* The SET clause: list of column and new values */ + Expr *pWhere, /* The WHERE clause */ + u8 orconf, /* The conflict algorithm. (OE_Abort, OE_Ignore, etc) */ + const char *zStart, /* Start of SQL text */ + const char *zEnd /* End of SQL text */ +){ + sqlite3 *db = pParse->db; + TriggerStep *pTriggerStep; + + pTriggerStep = triggerStepAllocate(pParse, TK_UPDATE, pTableName,zStart,zEnd); + if( pTriggerStep ){ + if( IN_RENAME_OBJECT ){ + pTriggerStep->pExprList = pEList; + pTriggerStep->pWhere = pWhere; + pTriggerStep->pFrom = pFrom; + pEList = 0; + pWhere = 0; + pFrom = 0; + }else{ + pTriggerStep->pExprList = sqlite3ExprListDup(db, pEList, EXPRDUP_REDUCE); + pTriggerStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE); + pTriggerStep->pFrom = sqlite3SrcListDup(db, pFrom, EXPRDUP_REDUCE); + } + pTriggerStep->orconf = orconf; + } + sqlite3ExprListDelete(db, pEList); + sqlite3ExprDelete(db, pWhere); + sqlite3SrcListDelete(db, pFrom); + return pTriggerStep; +} + +/* +** Construct a trigger step that implements a DELETE statement and return +** a pointer to that trigger step. The parser calls this routine when it +** sees a DELETE statement inside the body of a CREATE TRIGGER. +*/ +SQLITE_PRIVATE TriggerStep *sqlite3TriggerDeleteStep( + Parse *pParse, /* Parser */ + Token *pTableName, /* The table from which rows are deleted */ + Expr *pWhere, /* The WHERE clause */ + const char *zStart, /* Start of SQL text */ + const char *zEnd /* End of SQL text */ +){ + sqlite3 *db = pParse->db; + TriggerStep *pTriggerStep; + + pTriggerStep = triggerStepAllocate(pParse, TK_DELETE, pTableName,zStart,zEnd); + if( pTriggerStep ){ + if( IN_RENAME_OBJECT ){ + pTriggerStep->pWhere = pWhere; + pWhere = 0; + }else{ + pTriggerStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE); + } + pTriggerStep->orconf = OE_Default; + } + sqlite3ExprDelete(db, pWhere); + return pTriggerStep; +} + +/* +** Recursively delete a Trigger structure +*/ +SQLITE_PRIVATE void sqlite3DeleteTrigger(sqlite3 *db, Trigger *pTrigger){ + if( pTrigger==0 || pTrigger->bReturning ) return; + sqlite3DeleteTriggerStep(db, pTrigger->step_list); + sqlite3DbFree(db, pTrigger->zName); + sqlite3DbFree(db, pTrigger->table); + sqlite3ExprDelete(db, pTrigger->pWhen); + sqlite3IdListDelete(db, pTrigger->pColumns); + sqlite3DbFree(db, pTrigger); +} + +/* +** This function is called to drop a trigger from the database schema. +** +** This may be called directly from the parser and therefore identifies +** the trigger by name. The sqlite3DropTriggerPtr() routine does the +** same job as this routine except it takes a pointer to the trigger +** instead of the trigger name. +**/ +SQLITE_PRIVATE void sqlite3DropTrigger(Parse *pParse, SrcList *pName, int noErr){ + Trigger *pTrigger = 0; + int i; + const char *zDb; + const char *zName; + sqlite3 *db = pParse->db; + + if( db->mallocFailed ) goto drop_trigger_cleanup; + if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ + goto drop_trigger_cleanup; + } + + assert( pName->nSrc==1 ); + zDb = pName->a[0].zDatabase; + zName = pName->a[0].zName; + assert( zDb!=0 || sqlite3BtreeHoldsAllMutexes(db) ); + for(i=OMIT_TEMPDB; inDb; i++){ + int j = (i<2) ? i^1 : i; /* Search TEMP before MAIN */ + if( zDb && sqlite3DbIsNamed(db, j, zDb)==0 ) continue; + assert( sqlite3SchemaMutexHeld(db, j, 0) ); + pTrigger = sqlite3HashFind(&(db->aDb[j].pSchema->trigHash), zName); + if( pTrigger ) break; + } + if( !pTrigger ){ + if( !noErr ){ + sqlite3ErrorMsg(pParse, "no such trigger: %S", pName->a); + }else{ + sqlite3CodeVerifyNamedSchema(pParse, zDb); + } + pParse->checkSchema = 1; + goto drop_trigger_cleanup; + } + sqlite3DropTriggerPtr(pParse, pTrigger); + +drop_trigger_cleanup: + sqlite3SrcListDelete(db, pName); +} + +/* +** Return a pointer to the Table structure for the table that a trigger +** is set on. +*/ +static Table *tableOfTrigger(Trigger *pTrigger){ + return sqlite3HashFind(&pTrigger->pTabSchema->tblHash, pTrigger->table); +} + + +/* +** Drop a trigger given a pointer to that trigger. +*/ +SQLITE_PRIVATE void sqlite3DropTriggerPtr(Parse *pParse, Trigger *pTrigger){ + Table *pTable; + Vdbe *v; + sqlite3 *db = pParse->db; + int iDb; + + iDb = sqlite3SchemaToIndex(pParse->db, pTrigger->pSchema); + assert( iDb>=0 && iDbnDb ); + pTable = tableOfTrigger(pTrigger); + assert( (pTable && pTable->pSchema==pTrigger->pSchema) || iDb==1 ); +#ifndef SQLITE_OMIT_AUTHORIZATION + if( pTable ){ + int code = SQLITE_DROP_TRIGGER; + const char *zDb = db->aDb[iDb].zDbSName; + const char *zTab = SCHEMA_TABLE(iDb); + if( iDb==1 ) code = SQLITE_DROP_TEMP_TRIGGER; + if( sqlite3AuthCheck(pParse, code, pTrigger->zName, pTable->zName, zDb) || + sqlite3AuthCheck(pParse, SQLITE_DELETE, zTab, 0, zDb) ){ + return; + } + } +#endif + + /* Generate code to destroy the database record of the trigger. + */ + if( (v = sqlite3GetVdbe(pParse))!=0 ){ + sqlite3NestedParse(pParse, + "DELETE FROM %Q." LEGACY_SCHEMA_TABLE " WHERE name=%Q AND type='trigger'", + db->aDb[iDb].zDbSName, pTrigger->zName + ); + sqlite3ChangeCookie(pParse, iDb); + sqlite3VdbeAddOp4(v, OP_DropTrigger, iDb, 0, 0, pTrigger->zName, 0); + } +} + +/* +** Remove a trigger from the hash tables of the sqlite* pointer. +*/ +SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTrigger(sqlite3 *db, int iDb, const char *zName){ + Trigger *pTrigger; + Hash *pHash; + + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + pHash = &(db->aDb[iDb].pSchema->trigHash); + pTrigger = sqlite3HashInsert(pHash, zName, 0); + if( ALWAYS(pTrigger) ){ + if( pTrigger->pSchema==pTrigger->pTabSchema ){ + Table *pTab = tableOfTrigger(pTrigger); + if( pTab ){ + Trigger **pp; + for(pp=&pTab->pTrigger; *pp; pp=&((*pp)->pNext)){ + if( *pp==pTrigger ){ + *pp = (*pp)->pNext; + break; + } + } + } + } + sqlite3DeleteTrigger(db, pTrigger); + db->mDbFlags |= DBFLAG_SchemaChange; + } +} + +/* +** pEList is the SET clause of an UPDATE statement. Each entry +** in pEList is of the format =. If any of the entries +** in pEList have an which matches an identifier in pIdList, +** then return TRUE. If pIdList==NULL, then it is considered a +** wildcard that matches anything. Likewise if pEList==NULL then +** it matches anything so always return true. Return false only +** if there is no match. +*/ +static int checkColumnOverlap(IdList *pIdList, ExprList *pEList){ + int e; + if( pIdList==0 || NEVER(pEList==0) ) return 1; + for(e=0; enExpr; e++){ + if( sqlite3IdListIndex(pIdList, pEList->a[e].zEName)>=0 ) return 1; + } + return 0; +} + +/* +** Return a list of all triggers on table pTab if there exists at least +** one trigger that must be fired when an operation of type 'op' is +** performed on the table, and, if that operation is an UPDATE, if at +** least one of the columns in pChanges is being modified. +*/ +SQLITE_PRIVATE Trigger *sqlite3TriggersExist( + Parse *pParse, /* Parse context */ + Table *pTab, /* The table the contains the triggers */ + int op, /* one of TK_DELETE, TK_INSERT, TK_UPDATE */ + ExprList *pChanges, /* Columns that change in an UPDATE statement */ + int *pMask /* OUT: Mask of TRIGGER_BEFORE|TRIGGER_AFTER */ +){ + int mask = 0; + Trigger *pList = 0; + Trigger *p; + + pList = sqlite3TriggerList(pParse, pTab); + assert( pList==0 || IsVirtual(pTab)==0 + || (pList->bReturning && pList->pNext==0) ); + if( pList!=0 ){ + p = pList; + if( (pParse->db->flags & SQLITE_EnableTrigger)==0 + && pTab->pTrigger!=0 + ){ + /* The SQLITE_DBCONFIG_ENABLE_TRIGGER setting is off. That means that + ** only TEMP triggers are allowed. Truncate the pList so that it + ** includes only TEMP triggers */ + if( pList==pTab->pTrigger ){ + pList = 0; + goto exit_triggers_exist; + } + while( ALWAYS(p->pNext) && p->pNext!=pTab->pTrigger ) p = p->pNext; + p->pNext = 0; + p = pList; + } + do{ + if( p->op==op && checkColumnOverlap(p->pColumns, pChanges) ){ + mask |= p->tr_tm; + }else if( p->op==TK_RETURNING ){ + /* The first time a RETURNING trigger is seen, the "op" value tells + ** us what time of trigger it should be. */ + assert( sqlite3IsToplevel(pParse) ); + p->op = op; + if( IsVirtual(pTab) ){ + if( op!=TK_INSERT ){ + sqlite3ErrorMsg(pParse, + "%s RETURNING is not available on virtual tables", + op==TK_DELETE ? "DELETE" : "UPDATE"); + } + p->tr_tm = TRIGGER_BEFORE; + }else{ + p->tr_tm = TRIGGER_AFTER; + } + mask |= p->tr_tm; + }else if( p->bReturning && p->op==TK_INSERT && op==TK_UPDATE + && sqlite3IsToplevel(pParse) ){ + /* Also fire a RETURNING trigger for an UPSERT */ + mask |= p->tr_tm; + } + p = p->pNext; + }while( p ); + } +exit_triggers_exist: + if( pMask ){ + *pMask = mask; + } + return (mask ? pList : 0); +} + +/* +** Convert the pStep->zTarget string into a SrcList and return a pointer +** to that SrcList. +** +** This routine adds a specific database name, if needed, to the target when +** forming the SrcList. This prevents a trigger in one database from +** referring to a target in another database. An exception is when the +** trigger is in TEMP in which case it can refer to any other database it +** wants. +*/ +SQLITE_PRIVATE SrcList *sqlite3TriggerStepSrc( + Parse *pParse, /* The parsing context */ + TriggerStep *pStep /* The trigger containing the target token */ +){ + sqlite3 *db = pParse->db; + SrcList *pSrc; /* SrcList to be returned */ + char *zName = sqlite3DbStrDup(db, pStep->zTarget); + pSrc = sqlite3SrcListAppend(pParse, 0, 0, 0); + assert( pSrc==0 || pSrc->nSrc==1 ); + assert( zName || pSrc==0 ); + if( pSrc ){ + Schema *pSchema = pStep->pTrig->pSchema; + pSrc->a[0].zName = zName; + if( pSchema!=db->aDb[1].pSchema ){ + pSrc->a[0].pSchema = pSchema; + } + if( pStep->pFrom ){ + SrcList *pDup = sqlite3SrcListDup(db, pStep->pFrom, 0); + pSrc = sqlite3SrcListAppendList(pParse, pSrc, pDup); + } + }else{ + sqlite3DbFree(db, zName); + } + return pSrc; +} + +/* +** Return true if the pExpr term from the RETURNING clause argument +** list is of the form "*". Raise an error if the terms if of the +** form "table.*". +*/ +static int isAsteriskTerm( + Parse *pParse, /* Parsing context */ + Expr *pTerm /* A term in the RETURNING clause */ +){ + assert( pTerm!=0 ); + if( pTerm->op==TK_ASTERISK ) return 1; + if( pTerm->op!=TK_DOT ) return 0; + assert( pTerm->pRight!=0 ); + assert( pTerm->pLeft!=0 ); + if( pTerm->pRight->op!=TK_ASTERISK ) return 0; + sqlite3ErrorMsg(pParse, "RETURNING may not use \"TABLE.*\" wildcards"); + return 1; +} + +/* The input list pList is the list of result set terms from a RETURNING +** clause. The table that we are returning from is pTab. +** +** This routine makes a copy of the pList, and at the same time expands +** any "*" wildcards to be the complete set of columns from pTab. +*/ +static ExprList *sqlite3ExpandReturning( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* The arguments to RETURNING */ + Table *pTab /* The table being updated */ +){ + ExprList *pNew = 0; + sqlite3 *db = pParse->db; + int i; + + for(i=0; inExpr; i++){ + Expr *pOldExpr = pList->a[i].pExpr; + if( NEVER(pOldExpr==0) ) continue; + if( isAsteriskTerm(pParse, pOldExpr) ){ + int jj; + for(jj=0; jjnCol; jj++){ + Expr *pNewExpr; + if( IsHiddenColumn(pTab->aCol+jj) ) continue; + pNewExpr = sqlite3Expr(db, TK_ID, pTab->aCol[jj].zCnName); + pNew = sqlite3ExprListAppend(pParse, pNew, pNewExpr); + if( !db->mallocFailed ){ + struct ExprList_item *pItem = &pNew->a[pNew->nExpr-1]; + pItem->zEName = sqlite3DbStrDup(db, pTab->aCol[jj].zCnName); + pItem->eEName = ENAME_NAME; + } + } + }else{ + Expr *pNewExpr = sqlite3ExprDup(db, pOldExpr, 0); + pNew = sqlite3ExprListAppend(pParse, pNew, pNewExpr); + if( !db->mallocFailed && ALWAYS(pList->a[i].zEName!=0) ){ + struct ExprList_item *pItem = &pNew->a[pNew->nExpr-1]; + pItem->zEName = sqlite3DbStrDup(db, pList->a[i].zEName); + pItem->eEName = pList->a[i].eEName; + } + } + } + return pNew; +} + +/* +** Generate code for the RETURNING trigger. Unlike other triggers +** that invoke a subprogram in the bytecode, the code for RETURNING +** is generated in-line. +*/ +static void codeReturningTrigger( + Parse *pParse, /* Parse context */ + Trigger *pTrigger, /* The trigger step that defines the RETURNING */ + Table *pTab, /* The table to code triggers from */ + int regIn /* The first in an array of registers */ +){ + Vdbe *v = pParse->pVdbe; + sqlite3 *db = pParse->db; + ExprList *pNew; + Returning *pReturning; + Select sSelect; + SrcList sFrom; + + assert( v!=0 ); + assert( pParse->bReturning ); + assert( db->pParse==pParse ); + pReturning = pParse->u1.pReturning; + assert( pTrigger == &(pReturning->retTrig) ); + memset(&sSelect, 0, sizeof(sSelect)); + memset(&sFrom, 0, sizeof(sFrom)); + sSelect.pEList = sqlite3ExprListDup(db, pReturning->pReturnEL, 0); + sSelect.pSrc = &sFrom; + sFrom.nSrc = 1; + sFrom.a[0].pTab = pTab; + sFrom.a[0].iCursor = -1; + sqlite3SelectPrep(pParse, &sSelect, 0); + if( pParse->nErr==0 ){ + assert( db->mallocFailed==0 ); + sqlite3GenerateColumnNames(pParse, &sSelect); + } + sqlite3ExprListDelete(db, sSelect.pEList); + pNew = sqlite3ExpandReturning(pParse, pReturning->pReturnEL, pTab); + if( !db->mallocFailed ){ + NameContext sNC; + memset(&sNC, 0, sizeof(sNC)); + if( pReturning->nRetCol==0 ){ + pReturning->nRetCol = pNew->nExpr; + pReturning->iRetCur = pParse->nTab++; + } + sNC.pParse = pParse; + sNC.uNC.iBaseReg = regIn; + sNC.ncFlags = NC_UBaseReg; + pParse->eTriggerOp = pTrigger->op; + pParse->pTriggerTab = pTab; + if( sqlite3ResolveExprListNames(&sNC, pNew)==SQLITE_OK + && ALWAYS(!db->mallocFailed) + ){ + int i; + int nCol = pNew->nExpr; + int reg = pParse->nMem+1; + pParse->nMem += nCol+2; + pReturning->iRetReg = reg; + for(i=0; ia[i].pExpr; + assert( pCol!=0 ); /* Due to !db->mallocFailed ~9 lines above */ + sqlite3ExprCodeFactorable(pParse, pCol, reg+i); + if( sqlite3ExprAffinity(pCol)==SQLITE_AFF_REAL ){ + sqlite3VdbeAddOp1(v, OP_RealAffinity, reg+i); + } + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, reg, i, reg+i); + sqlite3VdbeAddOp2(v, OP_NewRowid, pReturning->iRetCur, reg+i+1); + sqlite3VdbeAddOp3(v, OP_Insert, pReturning->iRetCur, reg+i, reg+i+1); + } + } + sqlite3ExprListDelete(db, pNew); + pParse->eTriggerOp = 0; + pParse->pTriggerTab = 0; +} + + + +/* +** Generate VDBE code for the statements inside the body of a single +** trigger. +*/ +static int codeTriggerProgram( + Parse *pParse, /* The parser context */ + TriggerStep *pStepList, /* List of statements inside the trigger body */ + int orconf /* Conflict algorithm. (OE_Abort, etc) */ +){ + TriggerStep *pStep; + Vdbe *v = pParse->pVdbe; + sqlite3 *db = pParse->db; + + assert( pParse->pTriggerTab && pParse->pToplevel ); + assert( pStepList ); + assert( v!=0 ); + for(pStep=pStepList; pStep; pStep=pStep->pNext){ + /* Figure out the ON CONFLICT policy that will be used for this step + ** of the trigger program. If the statement that caused this trigger + ** to fire had an explicit ON CONFLICT, then use it. Otherwise, use + ** the ON CONFLICT policy that was specified as part of the trigger + ** step statement. Example: + ** + ** CREATE TRIGGER AFTER INSERT ON t1 BEGIN; + ** INSERT OR REPLACE INTO t2 VALUES(new.a, new.b); + ** END; + ** + ** INSERT INTO t1 ... ; -- insert into t2 uses REPLACE policy + ** INSERT OR IGNORE INTO t1 ... ; -- insert into t2 uses IGNORE policy + */ + pParse->eOrconf = (orconf==OE_Default)?pStep->orconf:(u8)orconf; + assert( pParse->okConstFactor==0 ); + +#ifndef SQLITE_OMIT_TRACE + if( pStep->zSpan ){ + sqlite3VdbeAddOp4(v, OP_Trace, 0x7fffffff, 1, 0, + sqlite3MPrintf(db, "-- %s", pStep->zSpan), + P4_DYNAMIC); + } +#endif + + switch( pStep->op ){ + case TK_UPDATE: { + sqlite3Update(pParse, + sqlite3TriggerStepSrc(pParse, pStep), + sqlite3ExprListDup(db, pStep->pExprList, 0), + sqlite3ExprDup(db, pStep->pWhere, 0), + pParse->eOrconf, 0, 0, 0 + ); + sqlite3VdbeAddOp0(v, OP_ResetCount); + break; + } + case TK_INSERT: { + sqlite3Insert(pParse, + sqlite3TriggerStepSrc(pParse, pStep), + sqlite3SelectDup(db, pStep->pSelect, 0), + sqlite3IdListDup(db, pStep->pIdList), + pParse->eOrconf, + sqlite3UpsertDup(db, pStep->pUpsert) + ); + sqlite3VdbeAddOp0(v, OP_ResetCount); + break; + } + case TK_DELETE: { + sqlite3DeleteFrom(pParse, + sqlite3TriggerStepSrc(pParse, pStep), + sqlite3ExprDup(db, pStep->pWhere, 0), 0, 0 + ); + sqlite3VdbeAddOp0(v, OP_ResetCount); + break; + } + default: assert( pStep->op==TK_SELECT ); { + SelectDest sDest; + Select *pSelect = sqlite3SelectDup(db, pStep->pSelect, 0); + sqlite3SelectDestInit(&sDest, SRT_Discard, 0); + sqlite3Select(pParse, pSelect, &sDest); + sqlite3SelectDelete(db, pSelect); + break; + } + } + } + + return 0; +} + +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS +/* +** This function is used to add VdbeComment() annotations to a VDBE +** program. It is not used in production code, only for debugging. +*/ +static const char *onErrorText(int onError){ + switch( onError ){ + case OE_Abort: return "abort"; + case OE_Rollback: return "rollback"; + case OE_Fail: return "fail"; + case OE_Replace: return "replace"; + case OE_Ignore: return "ignore"; + case OE_Default: return "default"; + } + return "n/a"; +} +#endif + +/* +** Parse context structure pFrom has just been used to create a sub-vdbe +** (trigger program). If an error has occurred, transfer error information +** from pFrom to pTo. +*/ +static void transferParseError(Parse *pTo, Parse *pFrom){ + assert( pFrom->zErrMsg==0 || pFrom->nErr ); + assert( pTo->zErrMsg==0 || pTo->nErr ); + if( pTo->nErr==0 ){ + pTo->zErrMsg = pFrom->zErrMsg; + pTo->nErr = pFrom->nErr; + pTo->rc = pFrom->rc; + }else{ + sqlite3DbFree(pFrom->db, pFrom->zErrMsg); + } +} + +/* +** Create and populate a new TriggerPrg object with a sub-program +** implementing trigger pTrigger with ON CONFLICT policy orconf. +*/ +static TriggerPrg *codeRowTrigger( + Parse *pParse, /* Current parse context */ + Trigger *pTrigger, /* Trigger to code */ + Table *pTab, /* The table pTrigger is attached to */ + int orconf /* ON CONFLICT policy to code trigger program with */ +){ + Parse *pTop = sqlite3ParseToplevel(pParse); + sqlite3 *db = pParse->db; /* Database handle */ + TriggerPrg *pPrg; /* Value to return */ + Expr *pWhen = 0; /* Duplicate of trigger WHEN expression */ + Vdbe *v; /* Temporary VM */ + NameContext sNC; /* Name context for sub-vdbe */ + SubProgram *pProgram = 0; /* Sub-vdbe for trigger program */ + int iEndTrigger = 0; /* Label to jump to if WHEN is false */ + Parse sSubParse; /* Parse context for sub-vdbe */ + + assert( pTrigger->zName==0 || pTab==tableOfTrigger(pTrigger) ); + assert( pTop->pVdbe ); + + /* Allocate the TriggerPrg and SubProgram objects. To ensure that they + ** are freed if an error occurs, link them into the Parse.pTriggerPrg + ** list of the top-level Parse object sooner rather than later. */ + pPrg = sqlite3DbMallocZero(db, sizeof(TriggerPrg)); + if( !pPrg ) return 0; + pPrg->pNext = pTop->pTriggerPrg; + pTop->pTriggerPrg = pPrg; + pPrg->pProgram = pProgram = sqlite3DbMallocZero(db, sizeof(SubProgram)); + if( !pProgram ) return 0; + sqlite3VdbeLinkSubProgram(pTop->pVdbe, pProgram); + pPrg->pTrigger = pTrigger; + pPrg->orconf = orconf; + pPrg->aColmask[0] = 0xffffffff; + pPrg->aColmask[1] = 0xffffffff; + + /* Allocate and populate a new Parse context to use for coding the + ** trigger sub-program. */ + sqlite3ParseObjectInit(&sSubParse, db); + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = &sSubParse; + sSubParse.pTriggerTab = pTab; + sSubParse.pToplevel = pTop; + sSubParse.zAuthContext = pTrigger->zName; + sSubParse.eTriggerOp = pTrigger->op; + sSubParse.nQueryLoop = pParse->nQueryLoop; + sSubParse.disableVtab = pParse->disableVtab; + + v = sqlite3GetVdbe(&sSubParse); + if( v ){ + VdbeComment((v, "Start: %s.%s (%s %s%s%s ON %s)", + pTrigger->zName, onErrorText(orconf), + (pTrigger->tr_tm==TRIGGER_BEFORE ? "BEFORE" : "AFTER"), + (pTrigger->op==TK_UPDATE ? "UPDATE" : ""), + (pTrigger->op==TK_INSERT ? "INSERT" : ""), + (pTrigger->op==TK_DELETE ? "DELETE" : ""), + pTab->zName + )); +#ifndef SQLITE_OMIT_TRACE + if( pTrigger->zName ){ + sqlite3VdbeChangeP4(v, -1, + sqlite3MPrintf(db, "-- TRIGGER %s", pTrigger->zName), P4_DYNAMIC + ); + } +#endif + + /* If one was specified, code the WHEN clause. If it evaluates to false + ** (or NULL) the sub-vdbe is immediately halted by jumping to the + ** OP_Halt inserted at the end of the program. */ + if( pTrigger->pWhen ){ + pWhen = sqlite3ExprDup(db, pTrigger->pWhen, 0); + if( db->mallocFailed==0 + && SQLITE_OK==sqlite3ResolveExprNames(&sNC, pWhen) + ){ + iEndTrigger = sqlite3VdbeMakeLabel(&sSubParse); + sqlite3ExprIfFalse(&sSubParse, pWhen, iEndTrigger, SQLITE_JUMPIFNULL); + } + sqlite3ExprDelete(db, pWhen); + } + + /* Code the trigger program into the sub-vdbe. */ + codeTriggerProgram(&sSubParse, pTrigger->step_list, orconf); + + /* Insert an OP_Halt at the end of the sub-program. */ + if( iEndTrigger ){ + sqlite3VdbeResolveLabel(v, iEndTrigger); + } + sqlite3VdbeAddOp0(v, OP_Halt); + VdbeComment((v, "End: %s.%s", pTrigger->zName, onErrorText(orconf))); + transferParseError(pParse, &sSubParse); + + if( pParse->nErr==0 ){ + assert( db->mallocFailed==0 ); + pProgram->aOp = sqlite3VdbeTakeOpArray(v, &pProgram->nOp, &pTop->nMaxArg); + } + pProgram->nMem = sSubParse.nMem; + pProgram->nCsr = sSubParse.nTab; + pProgram->token = (void *)pTrigger; + pPrg->aColmask[0] = sSubParse.oldmask; + pPrg->aColmask[1] = sSubParse.newmask; + sqlite3VdbeDelete(v); + }else{ + transferParseError(pParse, &sSubParse); + } + + assert( !sSubParse.pTriggerPrg && !sSubParse.nMaxArg ); + sqlite3ParseObjectReset(&sSubParse); + return pPrg; +} + +/* +** Return a pointer to a TriggerPrg object containing the sub-program for +** trigger pTrigger with default ON CONFLICT algorithm orconf. If no such +** TriggerPrg object exists, a new object is allocated and populated before +** being returned. +*/ +static TriggerPrg *getRowTrigger( + Parse *pParse, /* Current parse context */ + Trigger *pTrigger, /* Trigger to code */ + Table *pTab, /* The table trigger pTrigger is attached to */ + int orconf /* ON CONFLICT algorithm. */ +){ + Parse *pRoot = sqlite3ParseToplevel(pParse); + TriggerPrg *pPrg; + + assert( pTrigger->zName==0 || pTab==tableOfTrigger(pTrigger) ); + + /* It may be that this trigger has already been coded (or is in the + ** process of being coded). If this is the case, then an entry with + ** a matching TriggerPrg.pTrigger field will be present somewhere + ** in the Parse.pTriggerPrg list. Search for such an entry. */ + for(pPrg=pRoot->pTriggerPrg; + pPrg && (pPrg->pTrigger!=pTrigger || pPrg->orconf!=orconf); + pPrg=pPrg->pNext + ); + + /* If an existing TriggerPrg could not be located, create a new one. */ + if( !pPrg ){ + pPrg = codeRowTrigger(pParse, pTrigger, pTab, orconf); + pParse->db->errByteOffset = -1; + } + + return pPrg; +} + +/* +** Generate code for the trigger program associated with trigger p on +** table pTab. The reg, orconf and ignoreJump parameters passed to this +** function are the same as those described in the header function for +** sqlite3CodeRowTrigger() +*/ +SQLITE_PRIVATE void sqlite3CodeRowTriggerDirect( + Parse *pParse, /* Parse context */ + Trigger *p, /* Trigger to code */ + Table *pTab, /* The table to code triggers from */ + int reg, /* Reg array containing OLD.* and NEW.* values */ + int orconf, /* ON CONFLICT policy */ + int ignoreJump /* Instruction to jump to for RAISE(IGNORE) */ +){ + Vdbe *v = sqlite3GetVdbe(pParse); /* Main VM */ + TriggerPrg *pPrg; + pPrg = getRowTrigger(pParse, p, pTab, orconf); + assert( pPrg || pParse->nErr ); + + /* Code the OP_Program opcode in the parent VDBE. P4 of the OP_Program + ** is a pointer to the sub-vdbe containing the trigger program. */ + if( pPrg ){ + int bRecursive = (p->zName && 0==(pParse->db->flags&SQLITE_RecTriggers)); + + sqlite3VdbeAddOp4(v, OP_Program, reg, ignoreJump, ++pParse->nMem, + (const char *)pPrg->pProgram, P4_SUBPROGRAM); + VdbeComment( + (v, "Call: %s.%s", (p->zName?p->zName:"fkey"), onErrorText(orconf))); + + /* Set the P5 operand of the OP_Program instruction to non-zero if + ** recursive invocation of this trigger program is disallowed. Recursive + ** invocation is disallowed if (a) the sub-program is really a trigger, + ** not a foreign key action, and (b) the flag to enable recursive triggers + ** is clear. */ + sqlite3VdbeChangeP5(v, (u8)bRecursive); + } +} + +/* +** This is called to code the required FOR EACH ROW triggers for an operation +** on table pTab. The operation to code triggers for (INSERT, UPDATE or DELETE) +** is given by the op parameter. The tr_tm parameter determines whether the +** BEFORE or AFTER triggers are coded. If the operation is an UPDATE, then +** parameter pChanges is passed the list of columns being modified. +** +** If there are no triggers that fire at the specified time for the specified +** operation on pTab, this function is a no-op. +** +** The reg argument is the address of the first in an array of registers +** that contain the values substituted for the new.* and old.* references +** in the trigger program. If N is the number of columns in table pTab +** (a copy of pTab->nCol), then registers are populated as follows: +** +** Register Contains +** ------------------------------------------------------ +** reg+0 OLD.rowid +** reg+1 OLD.* value of left-most column of pTab +** ... ... +** reg+N OLD.* value of right-most column of pTab +** reg+N+1 NEW.rowid +** reg+N+2 NEW.* value of left-most column of pTab +** ... ... +** reg+N+N+1 NEW.* value of right-most column of pTab +** +** For ON DELETE triggers, the registers containing the NEW.* values will +** never be accessed by the trigger program, so they are not allocated or +** populated by the caller (there is no data to populate them with anyway). +** Similarly, for ON INSERT triggers the values stored in the OLD.* registers +** are never accessed, and so are not allocated by the caller. So, for an +** ON INSERT trigger, the value passed to this function as parameter reg +** is not a readable register, although registers (reg+N) through +** (reg+N+N+1) are. +** +** Parameter orconf is the default conflict resolution algorithm for the +** trigger program to use (REPLACE, IGNORE etc.). Parameter ignoreJump +** is the instruction that control should jump to if a trigger program +** raises an IGNORE exception. +*/ +SQLITE_PRIVATE void sqlite3CodeRowTrigger( + Parse *pParse, /* Parse context */ + Trigger *pTrigger, /* List of triggers on table pTab */ + int op, /* One of TK_UPDATE, TK_INSERT, TK_DELETE */ + ExprList *pChanges, /* Changes list for any UPDATE OF triggers */ + int tr_tm, /* One of TRIGGER_BEFORE, TRIGGER_AFTER */ + Table *pTab, /* The table to code triggers from */ + int reg, /* The first in an array of registers (see above) */ + int orconf, /* ON CONFLICT policy */ + int ignoreJump /* Instruction to jump to for RAISE(IGNORE) */ +){ + Trigger *p; /* Used to iterate through pTrigger list */ + + assert( op==TK_UPDATE || op==TK_INSERT || op==TK_DELETE ); + assert( tr_tm==TRIGGER_BEFORE || tr_tm==TRIGGER_AFTER ); + assert( (op==TK_UPDATE)==(pChanges!=0) ); + + for(p=pTrigger; p; p=p->pNext){ + + /* Sanity checking: The schema for the trigger and for the table are + ** always defined. The trigger must be in the same schema as the table + ** or else it must be a TEMP trigger. */ + assert( p->pSchema!=0 ); + assert( p->pTabSchema!=0 ); + assert( p->pSchema==p->pTabSchema + || p->pSchema==pParse->db->aDb[1].pSchema ); + + /* Determine whether we should code this trigger. One of two choices: + ** 1. The trigger is an exact match to the current DML statement + ** 2. This is a RETURNING trigger for INSERT but we are currently + ** doing the UPDATE part of an UPSERT. + */ + if( (p->op==op || (p->bReturning && p->op==TK_INSERT && op==TK_UPDATE)) + && p->tr_tm==tr_tm + && checkColumnOverlap(p->pColumns, pChanges) + ){ + if( !p->bReturning ){ + sqlite3CodeRowTriggerDirect(pParse, p, pTab, reg, orconf, ignoreJump); + }else if( sqlite3IsToplevel(pParse) ){ + codeReturningTrigger(pParse, p, pTab, reg); + } + } + } +} + +/* +** Triggers may access values stored in the old.* or new.* pseudo-table. +** This function returns a 32-bit bitmask indicating which columns of the +** old.* or new.* tables actually are used by triggers. This information +** may be used by the caller, for example, to avoid having to load the entire +** old.* record into memory when executing an UPDATE or DELETE command. +** +** Bit 0 of the returned mask is set if the left-most column of the +** table may be accessed using an [old|new].reference. Bit 1 is set if +** the second leftmost column value is required, and so on. If there +** are more than 32 columns in the table, and at least one of the columns +** with an index greater than 32 may be accessed, 0xffffffff is returned. +** +** It is not possible to determine if the old.rowid or new.rowid column is +** accessed by triggers. The caller must always assume that it is. +** +** Parameter isNew must be either 1 or 0. If it is 0, then the mask returned +** applies to the old.* table. If 1, the new.* table. +** +** Parameter tr_tm must be a mask with one or both of the TRIGGER_BEFORE +** and TRIGGER_AFTER bits set. Values accessed by BEFORE triggers are only +** included in the returned mask if the TRIGGER_BEFORE bit is set in the +** tr_tm parameter. Similarly, values accessed by AFTER triggers are only +** included in the returned mask if the TRIGGER_AFTER bit is set in tr_tm. +*/ +SQLITE_PRIVATE u32 sqlite3TriggerColmask( + Parse *pParse, /* Parse context */ + Trigger *pTrigger, /* List of triggers on table pTab */ + ExprList *pChanges, /* Changes list for any UPDATE OF triggers */ + int isNew, /* 1 for new.* ref mask, 0 for old.* ref mask */ + int tr_tm, /* Mask of TRIGGER_BEFORE|TRIGGER_AFTER */ + Table *pTab, /* The table to code triggers from */ + int orconf /* Default ON CONFLICT policy for trigger steps */ +){ + const int op = pChanges ? TK_UPDATE : TK_DELETE; + u32 mask = 0; + Trigger *p; + + assert( isNew==1 || isNew==0 ); + for(p=pTrigger; p; p=p->pNext){ + if( p->op==op + && (tr_tm&p->tr_tm) + && checkColumnOverlap(p->pColumns,pChanges) + ){ + if( p->bReturning ){ + mask = 0xffffffff; + }else{ + TriggerPrg *pPrg; + pPrg = getRowTrigger(pParse, p, pTab, orconf); + if( pPrg ){ + mask |= pPrg->aColmask[isNew]; + } + } + } + } + + return mask; +} + +#endif /* !defined(SQLITE_OMIT_TRIGGER) */ + +/************** End of trigger.c *********************************************/ +/************** Begin file update.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains C code routines that are called by the parser +** to handle UPDATE statements. +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* Forward declaration */ +static void updateVirtualTable( + Parse *pParse, /* The parsing context */ + SrcList *pSrc, /* The virtual table to be modified */ + Table *pTab, /* The virtual table */ + ExprList *pChanges, /* The columns to change in the UPDATE statement */ + Expr *pRowidExpr, /* Expression used to recompute the rowid */ + int *aXRef, /* Mapping from columns of pTab to entries in pChanges */ + Expr *pWhere, /* WHERE clause of the UPDATE statement */ + int onError /* ON CONFLICT strategy */ +); +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** The most recently coded instruction was an OP_Column to retrieve the +** i-th column of table pTab. This routine sets the P4 parameter of the +** OP_Column to the default value, if any. +** +** The default value of a column is specified by a DEFAULT clause in the +** column definition. This was either supplied by the user when the table +** was created, or added later to the table definition by an ALTER TABLE +** command. If the latter, then the row-records in the table btree on disk +** may not contain a value for the column and the default value, taken +** from the P4 parameter of the OP_Column instruction, is returned instead. +** If the former, then all row-records are guaranteed to include a value +** for the column and the P4 value is not required. +** +** Column definitions created by an ALTER TABLE command may only have +** literal default values specified: a number, null or a string. (If a more +** complicated default expression value was provided, it is evaluated +** when the ALTER TABLE is executed and one of the literal values written +** into the sqlite_schema table.) +** +** Therefore, the P4 parameter is only required if the default value for +** the column is a literal number, string or null. The sqlite3ValueFromExpr() +** function is capable of transforming these types of expressions into +** sqlite3_value objects. +** +** If column as REAL affinity and the table is an ordinary b-tree table +** (not a virtual table) then the value might have been stored as an +** integer. In that case, add an OP_RealAffinity opcode to make sure +** it has been converted into REAL. +*/ +SQLITE_PRIVATE void sqlite3ColumnDefault(Vdbe *v, Table *pTab, int i, int iReg){ + assert( pTab!=0 ); + if( !IsView(pTab) ){ + sqlite3_value *pValue = 0; + u8 enc = ENC(sqlite3VdbeDb(v)); + Column *pCol = &pTab->aCol[i]; + VdbeComment((v, "%s.%s", pTab->zName, pCol->zCnName)); + assert( inCol ); + sqlite3ValueFromExpr(sqlite3VdbeDb(v), + sqlite3ColumnExpr(pTab,pCol), enc, + pCol->affinity, &pValue); + if( pValue ){ + sqlite3VdbeAppendP4(v, pValue, P4_MEM); + } + } +#ifndef SQLITE_OMIT_FLOATING_POINT + if( pTab->aCol[i].affinity==SQLITE_AFF_REAL && !IsVirtual(pTab) ){ + sqlite3VdbeAddOp1(v, OP_RealAffinity, iReg); + } +#endif +} + +/* +** Check to see if column iCol of index pIdx references any of the +** columns defined by aXRef and chngRowid. Return true if it does +** and false if not. This is an optimization. False-positives are a +** performance degradation, but false-negatives can result in a corrupt +** index and incorrect answers. +** +** aXRef[j] will be non-negative if column j of the original table is +** being updated. chngRowid will be true if the rowid of the table is +** being updated. +*/ +static int indexColumnIsBeingUpdated( + Index *pIdx, /* The index to check */ + int iCol, /* Which column of the index to check */ + int *aXRef, /* aXRef[j]>=0 if column j is being updated */ + int chngRowid /* true if the rowid is being updated */ +){ + i16 iIdxCol = pIdx->aiColumn[iCol]; + assert( iIdxCol!=XN_ROWID ); /* Cannot index rowid */ + if( iIdxCol>=0 ){ + return aXRef[iIdxCol]>=0; + } + assert( iIdxCol==XN_EXPR ); + assert( pIdx->aColExpr!=0 ); + assert( pIdx->aColExpr->a[iCol].pExpr!=0 ); + return sqlite3ExprReferencesUpdatedColumn(pIdx->aColExpr->a[iCol].pExpr, + aXRef,chngRowid); +} + +/* +** Check to see if index pIdx is a partial index whose conditional +** expression might change values due to an UPDATE. Return true if +** the index is subject to change and false if the index is guaranteed +** to be unchanged. This is an optimization. False-positives are a +** performance degradation, but false-negatives can result in a corrupt +** index and incorrect answers. +** +** aXRef[j] will be non-negative if column j of the original table is +** being updated. chngRowid will be true if the rowid of the table is +** being updated. +*/ +static int indexWhereClauseMightChange( + Index *pIdx, /* The index to check */ + int *aXRef, /* aXRef[j]>=0 if column j is being updated */ + int chngRowid /* true if the rowid is being updated */ +){ + if( pIdx->pPartIdxWhere==0 ) return 0; + return sqlite3ExprReferencesUpdatedColumn(pIdx->pPartIdxWhere, + aXRef, chngRowid); +} + +/* +** Allocate and return a pointer to an expression of type TK_ROW with +** Expr.iColumn set to value (iCol+1). The resolver will modify the +** expression to be a TK_COLUMN reading column iCol of the first +** table in the source-list (pSrc->a[0]). +*/ +static Expr *exprRowColumn(Parse *pParse, int iCol){ + Expr *pRet = sqlite3PExpr(pParse, TK_ROW, 0, 0); + if( pRet ) pRet->iColumn = iCol+1; + return pRet; +} + +/* +** Assuming both the pLimit and pOrderBy parameters are NULL, this function +** generates VM code to run the query: +** +** SELECT , pChanges FROM pTabList WHERE pWhere +** +** and write the results to the ephemeral table already opened as cursor +** iEph. None of pChanges, pTabList or pWhere are modified or consumed by +** this function, they must be deleted by the caller. +** +** Or, if pLimit and pOrderBy are not NULL, and pTab is not a view: +** +** SELECT , pChanges FROM pTabList +** WHERE pWhere +** GROUP BY +** ORDER BY pOrderBy LIMIT pLimit +** +** If pTab is a view, the GROUP BY clause is omitted. +** +** Exactly how results are written to table iEph, and exactly what +** the in the query above are is determined by the type +** of table pTabList->a[0].pTab. +** +** If the table is a WITHOUT ROWID table, then argument pPk must be its +** PRIMARY KEY. In this case are the primary key columns +** of the table, in order. The results of the query are written to ephemeral +** table iEph as index keys, using OP_IdxInsert. +** +** If the table is actually a view, then are all columns of +** the view. The results are written to the ephemeral table iEph as records +** with automatically assigned integer keys. +** +** If the table is a virtual or ordinary intkey table, then +** is its rowid. For a virtual table, the results are written to iEph as +** records with automatically assigned integer keys For intkey tables, the +** rowid value in is used as the integer key, and the +** remaining fields make up the table record. +*/ +static void updateFromSelect( + Parse *pParse, /* Parse context */ + int iEph, /* Cursor for open eph. table */ + Index *pPk, /* PK if table 0 is WITHOUT ROWID */ + ExprList *pChanges, /* List of expressions to return */ + SrcList *pTabList, /* List of tables to select from */ + Expr *pWhere, /* WHERE clause for query */ + ExprList *pOrderBy, /* ORDER BY clause */ + Expr *pLimit /* LIMIT clause */ +){ + int i; + SelectDest dest; + Select *pSelect = 0; + ExprList *pList = 0; + ExprList *pGrp = 0; + Expr *pLimit2 = 0; + ExprList *pOrderBy2 = 0; + sqlite3 *db = pParse->db; + Table *pTab = pTabList->a[0].pTab; + SrcList *pSrc; + Expr *pWhere2; + int eDest; + +#ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( pOrderBy && pLimit==0 ) { + sqlite3ErrorMsg(pParse, "ORDER BY without LIMIT on UPDATE"); + return; + } + pOrderBy2 = sqlite3ExprListDup(db, pOrderBy, 0); + pLimit2 = sqlite3ExprDup(db, pLimit, 0); +#else + UNUSED_PARAMETER(pOrderBy); + UNUSED_PARAMETER(pLimit); +#endif + + pSrc = sqlite3SrcListDup(db, pTabList, 0); + pWhere2 = sqlite3ExprDup(db, pWhere, 0); + + assert( pTabList->nSrc>1 ); + if( pSrc ){ + pSrc->a[0].fg.notCte = 1; + pSrc->a[0].iCursor = -1; + pSrc->a[0].pTab->nTabRef--; + pSrc->a[0].pTab = 0; + } + if( pPk ){ + for(i=0; inKeyCol; i++){ + Expr *pNew = exprRowColumn(pParse, pPk->aiColumn[i]); +#ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( pLimit ){ + pGrp = sqlite3ExprListAppend(pParse, pGrp, sqlite3ExprDup(db, pNew, 0)); + } +#endif + pList = sqlite3ExprListAppend(pParse, pList, pNew); + } + eDest = IsVirtual(pTab) ? SRT_Table : SRT_Upfrom; + }else if( IsView(pTab) ){ + for(i=0; inCol; i++){ + pList = sqlite3ExprListAppend(pParse, pList, exprRowColumn(pParse, i)); + } + eDest = SRT_Table; + }else{ + eDest = IsVirtual(pTab) ? SRT_Table : SRT_Upfrom; + pList = sqlite3ExprListAppend(pParse, 0, sqlite3PExpr(pParse,TK_ROW,0,0)); +#ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( pLimit ){ + pGrp = sqlite3ExprListAppend(pParse, 0, sqlite3PExpr(pParse,TK_ROW,0,0)); + } +#endif + } + assert( pChanges!=0 || pParse->db->mallocFailed ); + if( pChanges ){ + for(i=0; inExpr; i++){ + pList = sqlite3ExprListAppend(pParse, pList, + sqlite3ExprDup(db, pChanges->a[i].pExpr, 0) + ); + } + } + pSelect = sqlite3SelectNew(pParse, pList, + pSrc, pWhere2, pGrp, 0, pOrderBy2, SF_UFSrcCheck|SF_IncludeHidden, pLimit2 + ); + if( pSelect ) pSelect->selFlags |= SF_OrderByReqd; + sqlite3SelectDestInit(&dest, eDest, iEph); + dest.iSDParm2 = (pPk ? pPk->nKeyCol : -1); + sqlite3Select(pParse, pSelect, &dest); + sqlite3SelectDelete(db, pSelect); +} + +/* +** Process an UPDATE statement. +** +** UPDATE OR IGNORE tbl SET a=b, c=d FROM tbl2... WHERE e<5 AND f NOT NULL; +** \_______/ \_/ \______/ \_____/ \________________/ +** onError | pChanges | pWhere +** \_______________________/ +** pTabList +*/ +SQLITE_PRIVATE void sqlite3Update( + Parse *pParse, /* The parser context */ + SrcList *pTabList, /* The table in which we should change things */ + ExprList *pChanges, /* Things to be changed */ + Expr *pWhere, /* The WHERE clause. May be null */ + int onError, /* How to handle constraint errors */ + ExprList *pOrderBy, /* ORDER BY clause. May be null */ + Expr *pLimit, /* LIMIT clause. May be null */ + Upsert *pUpsert /* ON CONFLICT clause, or null */ +){ + int i, j, k; /* Loop counters */ + Table *pTab; /* The table to be updated */ + int addrTop = 0; /* VDBE instruction address of the start of the loop */ + WhereInfo *pWInfo = 0; /* Information about the WHERE clause */ + Vdbe *v; /* The virtual database engine */ + Index *pIdx; /* For looping over indices */ + Index *pPk; /* The PRIMARY KEY index for WITHOUT ROWID tables */ + int nIdx; /* Number of indices that need updating */ + int nAllIdx; /* Total number of indexes */ + int iBaseCur; /* Base cursor number */ + int iDataCur; /* Cursor for the canonical data btree */ + int iIdxCur; /* Cursor for the first index */ + sqlite3 *db; /* The database structure */ + int *aRegIdx = 0; /* Registers for to each index and the main table */ + int *aXRef = 0; /* aXRef[i] is the index in pChanges->a[] of the + ** an expression for the i-th column of the table. + ** aXRef[i]==-1 if the i-th column is not changed. */ + u8 *aToOpen; /* 1 for tables and indices to be opened */ + u8 chngPk; /* PRIMARY KEY changed in a WITHOUT ROWID table */ + u8 chngRowid; /* Rowid changed in a normal table */ + u8 chngKey; /* Either chngPk or chngRowid */ + Expr *pRowidExpr = 0; /* Expression defining the new record number */ + int iRowidExpr = -1; /* Index of "rowid=" (or IPK) assignment in pChanges */ + AuthContext sContext; /* The authorization context */ + NameContext sNC; /* The name-context to resolve expressions in */ + int iDb; /* Database containing the table being updated */ + int eOnePass; /* ONEPASS_XXX value from where.c */ + int hasFK; /* True if foreign key processing is required */ + int labelBreak; /* Jump here to break out of UPDATE loop */ + int labelContinue; /* Jump here to continue next step of UPDATE loop */ + int flags; /* Flags for sqlite3WhereBegin() */ + +#ifndef SQLITE_OMIT_TRIGGER + int isView; /* True when updating a view (INSTEAD OF trigger) */ + Trigger *pTrigger; /* List of triggers on pTab, if required */ + int tmask; /* Mask of TRIGGER_BEFORE|TRIGGER_AFTER */ +#endif + int newmask; /* Mask of NEW.* columns accessed by BEFORE triggers */ + int iEph = 0; /* Ephemeral table holding all primary key values */ + int nKey = 0; /* Number of elements in regKey for WITHOUT ROWID */ + int aiCurOnePass[2]; /* The write cursors opened by WHERE_ONEPASS */ + int addrOpen = 0; /* Address of OP_OpenEphemeral */ + int iPk = 0; /* First of nPk cells holding PRIMARY KEY value */ + i16 nPk = 0; /* Number of components of the PRIMARY KEY */ + int bReplace = 0; /* True if REPLACE conflict resolution might happen */ + int bFinishSeek = 1; /* The OP_FinishSeek opcode is needed */ + int nChangeFrom = 0; /* If there is a FROM, pChanges->nExpr, else 0 */ + + /* Register Allocations */ + int regRowCount = 0; /* A count of rows changed */ + int regOldRowid = 0; /* The old rowid */ + int regNewRowid = 0; /* The new rowid */ + int regNew = 0; /* Content of the NEW.* table in triggers */ + int regOld = 0; /* Content of OLD.* table in triggers */ + int regRowSet = 0; /* Rowset of rows to be updated */ + int regKey = 0; /* composite PRIMARY KEY value */ + + memset(&sContext, 0, sizeof(sContext)); + db = pParse->db; + assert( db->pParse==pParse ); + if( pParse->nErr ){ + goto update_cleanup; + } + assert( db->mallocFailed==0 ); + + /* Locate the table which we want to update. + */ + pTab = sqlite3SrcListLookup(pParse, pTabList); + if( pTab==0 ) goto update_cleanup; + iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema); + + /* Figure out if we have any triggers and if the table being + ** updated is a view. + */ +#ifndef SQLITE_OMIT_TRIGGER + pTrigger = sqlite3TriggersExist(pParse, pTab, TK_UPDATE, pChanges, &tmask); + isView = IsView(pTab); + assert( pTrigger || tmask==0 ); +#else +# define pTrigger 0 +# define isView 0 +# define tmask 0 +#endif +#ifdef SQLITE_OMIT_VIEW +# undef isView +# define isView 0 +#endif + + /* If there was a FROM clause, set nChangeFrom to the number of expressions + ** in the change-list. Otherwise, set it to 0. There cannot be a FROM + ** clause if this function is being called to generate code for part of + ** an UPSERT statement. */ + nChangeFrom = (pTabList->nSrc>1) ? pChanges->nExpr : 0; + assert( nChangeFrom==0 || pUpsert==0 ); + +#ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( !isView && nChangeFrom==0 ){ + pWhere = sqlite3LimitWhere( + pParse, pTabList, pWhere, pOrderBy, pLimit, "UPDATE" + ); + pOrderBy = 0; + pLimit = 0; + } +#endif + + if( sqlite3ViewGetColumnNames(pParse, pTab) ){ + goto update_cleanup; + } + if( sqlite3IsReadOnly(pParse, pTab, tmask) ){ + goto update_cleanup; + } + + /* Allocate a cursors for the main database table and for all indices. + ** The index cursors might not be used, but if they are used they + ** need to occur right after the database cursor. So go ahead and + ** allocate enough space, just in case. + */ + iBaseCur = iDataCur = pParse->nTab++; + iIdxCur = iDataCur+1; + pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab); + testcase( pPk!=0 && pPk!=pTab->pIndex ); + for(nIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nIdx++){ + if( pPk==pIdx ){ + iDataCur = pParse->nTab; + } + pParse->nTab++; + } + if( pUpsert ){ + /* On an UPSERT, reuse the same cursors already opened by INSERT */ + iDataCur = pUpsert->iDataCur; + iIdxCur = pUpsert->iIdxCur; + pParse->nTab = iBaseCur; + } + pTabList->a[0].iCursor = iDataCur; + + /* Allocate space for aXRef[], aRegIdx[], and aToOpen[]. + ** Initialize aXRef[] and aToOpen[] to their default values. + */ + aXRef = sqlite3DbMallocRawNN(db, sizeof(int) * (pTab->nCol+nIdx+1) + nIdx+2 ); + if( aXRef==0 ) goto update_cleanup; + aRegIdx = aXRef+pTab->nCol; + aToOpen = (u8*)(aRegIdx+nIdx+1); + memset(aToOpen, 1, nIdx+1); + aToOpen[nIdx+1] = 0; + for(i=0; inCol; i++) aXRef[i] = -1; + + /* Initialize the name-context */ + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + sNC.pSrcList = pTabList; + sNC.uNC.pUpsert = pUpsert; + sNC.ncFlags = NC_UUpsert; + + /* Begin generating code. */ + v = sqlite3GetVdbe(pParse); + if( v==0 ) goto update_cleanup; + + /* Resolve the column names in all the expressions of the + ** of the UPDATE statement. Also find the column index + ** for each column to be updated in the pChanges array. For each + ** column to be updated, make sure we have authorization to change + ** that column. + */ + chngRowid = chngPk = 0; + for(i=0; inExpr; i++){ + u8 hCol = sqlite3StrIHash(pChanges->a[i].zEName); + /* If this is an UPDATE with a FROM clause, do not resolve expressions + ** here. The call to sqlite3Select() below will do that. */ + if( nChangeFrom==0 && sqlite3ResolveExprNames(&sNC, pChanges->a[i].pExpr) ){ + goto update_cleanup; + } + for(j=0; jnCol; j++){ + if( pTab->aCol[j].hName==hCol + && sqlite3StrICmp(pTab->aCol[j].zCnName, pChanges->a[i].zEName)==0 + ){ + if( j==pTab->iPKey ){ + chngRowid = 1; + pRowidExpr = pChanges->a[i].pExpr; + iRowidExpr = i; + }else if( pPk && (pTab->aCol[j].colFlags & COLFLAG_PRIMKEY)!=0 ){ + chngPk = 1; + } +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + else if( pTab->aCol[j].colFlags & COLFLAG_GENERATED ){ + testcase( pTab->aCol[j].colFlags & COLFLAG_VIRTUAL ); + testcase( pTab->aCol[j].colFlags & COLFLAG_STORED ); + sqlite3ErrorMsg(pParse, + "cannot UPDATE generated column \"%s\"", + pTab->aCol[j].zCnName); + goto update_cleanup; + } +#endif + aXRef[j] = i; + break; + } + } + if( j>=pTab->nCol ){ + if( pPk==0 && sqlite3IsRowid(pChanges->a[i].zEName) ){ + j = -1; + chngRowid = 1; + pRowidExpr = pChanges->a[i].pExpr; + iRowidExpr = i; + }else{ + sqlite3ErrorMsg(pParse, "no such column: %s", pChanges->a[i].zEName); + pParse->checkSchema = 1; + goto update_cleanup; + } + } +#ifndef SQLITE_OMIT_AUTHORIZATION + { + int rc; + rc = sqlite3AuthCheck(pParse, SQLITE_UPDATE, pTab->zName, + j<0 ? "ROWID" : pTab->aCol[j].zCnName, + db->aDb[iDb].zDbSName); + if( rc==SQLITE_DENY ){ + goto update_cleanup; + }else if( rc==SQLITE_IGNORE ){ + aXRef[j] = -1; + } + } +#endif + } + assert( (chngRowid & chngPk)==0 ); + assert( chngRowid==0 || chngRowid==1 ); + assert( chngPk==0 || chngPk==1 ); + chngKey = chngRowid + chngPk; + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + /* Mark generated columns as changing if their generator expressions + ** reference any changing column. The actual aXRef[] value for + ** generated expressions is not used, other than to check to see that it + ** is non-negative, so the value of aXRef[] for generated columns can be + ** set to any non-negative number. We use 99999 so that the value is + ** obvious when looking at aXRef[] in a symbolic debugger. + */ + if( pTab->tabFlags & TF_HasGenerated ){ + int bProgress; + testcase( pTab->tabFlags & TF_HasVirtual ); + testcase( pTab->tabFlags & TF_HasStored ); + do{ + bProgress = 0; + for(i=0; inCol; i++){ + if( aXRef[i]>=0 ) continue; + if( (pTab->aCol[i].colFlags & COLFLAG_GENERATED)==0 ) continue; + if( sqlite3ExprReferencesUpdatedColumn( + sqlite3ColumnExpr(pTab, &pTab->aCol[i]), + aXRef, chngRowid) + ){ + aXRef[i] = 99999; + bProgress = 1; + } + } + }while( bProgress ); + } +#endif + + /* The SET expressions are not actually used inside the WHERE loop. + ** So reset the colUsed mask. Unless this is a virtual table. In that + ** case, set all bits of the colUsed mask (to ensure that the virtual + ** table implementation makes all columns available). + */ + pTabList->a[0].colUsed = IsVirtual(pTab) ? ALLBITS : 0; + + hasFK = sqlite3FkRequired(pParse, pTab, aXRef, chngKey); + + /* There is one entry in the aRegIdx[] array for each index on the table + ** being updated. Fill in aRegIdx[] with a register number that will hold + ** the key for accessing each index. + */ + if( onError==OE_Replace ) bReplace = 1; + for(nAllIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nAllIdx++){ + int reg; + if( chngKey || hasFK>1 || pIdx==pPk + || indexWhereClauseMightChange(pIdx,aXRef,chngRowid) + ){ + reg = ++pParse->nMem; + pParse->nMem += pIdx->nColumn; + }else{ + reg = 0; + for(i=0; inKeyCol; i++){ + if( indexColumnIsBeingUpdated(pIdx, i, aXRef, chngRowid) ){ + reg = ++pParse->nMem; + pParse->nMem += pIdx->nColumn; + if( onError==OE_Default && pIdx->onError==OE_Replace ){ + bReplace = 1; + } + break; + } + } + } + if( reg==0 ) aToOpen[nAllIdx+1] = 0; + aRegIdx[nAllIdx] = reg; + } + aRegIdx[nAllIdx] = ++pParse->nMem; /* Register storing the table record */ + if( bReplace ){ + /* If REPLACE conflict resolution might be invoked, open cursors on all + ** indexes in case they are needed to delete records. */ + memset(aToOpen, 1, nIdx+1); + } + + if( pParse->nested==0 ) sqlite3VdbeCountChanges(v); + sqlite3BeginWriteOperation(pParse, pTrigger || hasFK, iDb); + + /* Allocate required registers. */ + if( !IsVirtual(pTab) ){ + /* For now, regRowSet and aRegIdx[nAllIdx] share the same register. + ** If regRowSet turns out to be needed, then aRegIdx[nAllIdx] will be + ** reallocated. aRegIdx[nAllIdx] is the register in which the main + ** table record is written. regRowSet holds the RowSet for the + ** two-pass update algorithm. */ + assert( aRegIdx[nAllIdx]==pParse->nMem ); + regRowSet = aRegIdx[nAllIdx]; + regOldRowid = regNewRowid = ++pParse->nMem; + if( chngPk || pTrigger || hasFK ){ + regOld = pParse->nMem + 1; + pParse->nMem += pTab->nCol; + } + if( chngKey || pTrigger || hasFK ){ + regNewRowid = ++pParse->nMem; + } + regNew = pParse->nMem + 1; + pParse->nMem += pTab->nCol; + } + + /* Start the view context. */ + if( isView ){ + sqlite3AuthContextPush(pParse, &sContext, pTab->zName); + } + + /* If we are trying to update a view, realize that view into + ** an ephemeral table. + */ +#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER) + if( nChangeFrom==0 && isView ){ + sqlite3MaterializeView(pParse, pTab, + pWhere, pOrderBy, pLimit, iDataCur + ); + pOrderBy = 0; + pLimit = 0; + } +#endif + + /* Resolve the column names in all the expressions in the + ** WHERE clause. + */ + if( nChangeFrom==0 && sqlite3ResolveExprNames(&sNC, pWhere) ){ + goto update_cleanup; + } + +#ifndef SQLITE_OMIT_VIRTUALTABLE + /* Virtual tables must be handled separately */ + if( IsVirtual(pTab) ){ + updateVirtualTable(pParse, pTabList, pTab, pChanges, pRowidExpr, aXRef, + pWhere, onError); + goto update_cleanup; + } +#endif + + /* Jump to labelBreak to abandon further processing of this UPDATE */ + labelContinue = labelBreak = sqlite3VdbeMakeLabel(pParse); + + /* Not an UPSERT. Normal processing. Begin by + ** initialize the count of updated rows */ + if( (db->flags&SQLITE_CountRows)!=0 + && !pParse->pTriggerTab + && !pParse->nested + && !pParse->bReturning + && pUpsert==0 + ){ + regRowCount = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, regRowCount); + } + + if( nChangeFrom==0 && HasRowid(pTab) ){ + sqlite3VdbeAddOp3(v, OP_Null, 0, regRowSet, regOldRowid); + iEph = pParse->nTab++; + addrOpen = sqlite3VdbeAddOp3(v, OP_OpenEphemeral, iEph, 0, regRowSet); + }else{ + assert( pPk!=0 || HasRowid(pTab) ); + nPk = pPk ? pPk->nKeyCol : 0; + iPk = pParse->nMem+1; + pParse->nMem += nPk; + pParse->nMem += nChangeFrom; + regKey = ++pParse->nMem; + if( pUpsert==0 ){ + int nEphCol = nPk + nChangeFrom + (isView ? pTab->nCol : 0); + iEph = pParse->nTab++; + if( pPk ) sqlite3VdbeAddOp3(v, OP_Null, 0, iPk, iPk+nPk-1); + addrOpen = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iEph, nEphCol); + if( pPk ){ + KeyInfo *pKeyInfo = sqlite3KeyInfoOfIndex(pParse, pPk); + if( pKeyInfo ){ + pKeyInfo->nAllField = nEphCol; + sqlite3VdbeAppendP4(v, pKeyInfo, P4_KEYINFO); + } + } + if( nChangeFrom ){ + updateFromSelect( + pParse, iEph, pPk, pChanges, pTabList, pWhere, pOrderBy, pLimit + ); +#ifndef SQLITE_OMIT_SUBQUERY + if( isView ) iDataCur = iEph; +#endif + } + } + } + + if( nChangeFrom ){ + sqlite3MultiWrite(pParse); + eOnePass = ONEPASS_OFF; + nKey = nPk; + regKey = iPk; + }else{ + if( pUpsert ){ + /* If this is an UPSERT, then all cursors have already been opened by + ** the outer INSERT and the data cursor should be pointing at the row + ** that is to be updated. So bypass the code that searches for the + ** row(s) to be updated. + */ + pWInfo = 0; + eOnePass = ONEPASS_SINGLE; + sqlite3ExprIfFalse(pParse, pWhere, labelBreak, SQLITE_JUMPIFNULL); + bFinishSeek = 0; + }else{ + /* Begin the database scan. + ** + ** Do not consider a single-pass strategy for a multi-row update if + ** there are any triggers or foreign keys to process, or rows may + ** be deleted as a result of REPLACE conflict handling. Any of these + ** things might disturb a cursor being used to scan through the table + ** or index, causing a single-pass approach to malfunction. */ + flags = WHERE_ONEPASS_DESIRED; + if( !pParse->nested && !pTrigger && !hasFK && !chngKey && !bReplace ){ + flags |= WHERE_ONEPASS_MULTIROW; + } + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,0,0,flags,iIdxCur); + if( pWInfo==0 ) goto update_cleanup; + + /* A one-pass strategy that might update more than one row may not + ** be used if any column of the index used for the scan is being + ** updated. Otherwise, if there is an index on "b", statements like + ** the following could create an infinite loop: + ** + ** UPDATE t1 SET b=b+1 WHERE b>? + ** + ** Fall back to ONEPASS_OFF if where.c has selected a ONEPASS_MULTI + ** strategy that uses an index for which one or more columns are being + ** updated. */ + eOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass); + bFinishSeek = sqlite3WhereUsesDeferredSeek(pWInfo); + if( eOnePass!=ONEPASS_SINGLE ){ + sqlite3MultiWrite(pParse); + if( eOnePass==ONEPASS_MULTI ){ + int iCur = aiCurOnePass[1]; + if( iCur>=0 && iCur!=iDataCur && aToOpen[iCur-iBaseCur] ){ + eOnePass = ONEPASS_OFF; + } + assert( iCur!=iDataCur || !HasRowid(pTab) ); + } + } + } + + if( HasRowid(pTab) ){ + /* Read the rowid of the current row of the WHERE scan. In ONEPASS_OFF + ** mode, write the rowid into the FIFO. In either of the one-pass modes, + ** leave it in register regOldRowid. */ + sqlite3VdbeAddOp2(v, OP_Rowid, iDataCur, regOldRowid); + if( eOnePass==ONEPASS_OFF ){ + aRegIdx[nAllIdx] = ++pParse->nMem; + sqlite3VdbeAddOp3(v, OP_Insert, iEph, regRowSet, regOldRowid); + }else{ + if( ALWAYS(addrOpen) ) sqlite3VdbeChangeToNoop(v, addrOpen); + } + }else{ + /* Read the PK of the current row into an array of registers. In + ** ONEPASS_OFF mode, serialize the array into a record and store it in + ** the ephemeral table. Or, in ONEPASS_SINGLE or MULTI mode, change + ** the OP_OpenEphemeral instruction to a Noop (the ephemeral table + ** is not required) and leave the PK fields in the array of registers. */ + for(i=0; iaiColumn[i]>=0 ); + sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, + pPk->aiColumn[i], iPk+i); + } + if( eOnePass ){ + if( addrOpen ) sqlite3VdbeChangeToNoop(v, addrOpen); + nKey = nPk; + regKey = iPk; + }else{ + sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, regKey, + sqlite3IndexAffinityStr(db, pPk), nPk); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEph, regKey, iPk, nPk); + } + } + } + + if( pUpsert==0 ){ + if( nChangeFrom==0 && eOnePass!=ONEPASS_MULTI ){ + sqlite3WhereEnd(pWInfo); + } + + if( !isView ){ + int addrOnce = 0; + + /* Open every index that needs updating. */ + if( eOnePass!=ONEPASS_OFF ){ + if( aiCurOnePass[0]>=0 ) aToOpen[aiCurOnePass[0]-iBaseCur] = 0; + if( aiCurOnePass[1]>=0 ) aToOpen[aiCurOnePass[1]-iBaseCur] = 0; + } + + if( eOnePass==ONEPASS_MULTI && (nIdx-(aiCurOnePass[1]>=0))>0 ){ + addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + } + sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, 0, iBaseCur, + aToOpen, 0, 0); + if( addrOnce ){ + sqlite3VdbeJumpHereOrPopInst(v, addrOnce); + } + } + + /* Top of the update loop */ + if( eOnePass!=ONEPASS_OFF ){ + if( aiCurOnePass[0]!=iDataCur + && aiCurOnePass[1]!=iDataCur +#ifdef SQLITE_ALLOW_ROWID_IN_VIEW + && !isView +#endif + ){ + assert( pPk ); + sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, labelBreak, regKey,nKey); + VdbeCoverage(v); + } + if( eOnePass!=ONEPASS_SINGLE ){ + labelContinue = sqlite3VdbeMakeLabel(pParse); + } + sqlite3VdbeAddOp2(v, OP_IsNull, pPk ? regKey : regOldRowid, labelBreak); + VdbeCoverageIf(v, pPk==0); + VdbeCoverageIf(v, pPk!=0); + }else if( pPk || nChangeFrom ){ + labelContinue = sqlite3VdbeMakeLabel(pParse); + sqlite3VdbeAddOp2(v, OP_Rewind, iEph, labelBreak); VdbeCoverage(v); + addrTop = sqlite3VdbeCurrentAddr(v); + if( nChangeFrom ){ + if( !isView ){ + if( pPk ){ + for(i=0; i=0 ); + if( nChangeFrom==0 ){ + sqlite3ExprCode(pParse, pRowidExpr, regNewRowid); + }else{ + sqlite3VdbeAddOp3(v, OP_Column, iEph, iRowidExpr, regNewRowid); + } + sqlite3VdbeAddOp1(v, OP_MustBeInt, regNewRowid); VdbeCoverage(v); + } + + /* Compute the old pre-UPDATE content of the row being changed, if that + ** information is needed */ + if( chngPk || hasFK || pTrigger ){ + u32 oldmask = (hasFK ? sqlite3FkOldmask(pParse, pTab) : 0); + oldmask |= sqlite3TriggerColmask(pParse, + pTrigger, pChanges, 0, TRIGGER_BEFORE|TRIGGER_AFTER, pTab, onError + ); + for(i=0; inCol; i++){ + u32 colFlags = pTab->aCol[i].colFlags; + k = sqlite3TableColumnToStorage(pTab, i) + regOld; + if( oldmask==0xffffffff + || (i<32 && (oldmask & MASKBIT32(i))!=0) + || (colFlags & COLFLAG_PRIMKEY)!=0 + ){ + testcase( oldmask!=0xffffffff && i==31 ); + sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, i, k); + }else{ + sqlite3VdbeAddOp2(v, OP_Null, 0, k); + } + } + if( chngRowid==0 && pPk==0 ){ + sqlite3VdbeAddOp2(v, OP_Copy, regOldRowid, regNewRowid); + } + } + + /* Populate the array of registers beginning at regNew with the new + ** row data. This array is used to check constants, create the new + ** table and index records, and as the values for any new.* references + ** made by triggers. + ** + ** If there are one or more BEFORE triggers, then do not populate the + ** registers associated with columns that are (a) not modified by + ** this UPDATE statement and (b) not accessed by new.* references. The + ** values for registers not modified by the UPDATE must be reloaded from + ** the database after the BEFORE triggers are fired anyway (as the trigger + ** may have modified them). So not loading those that are not going to + ** be used eliminates some redundant opcodes. + */ + newmask = sqlite3TriggerColmask( + pParse, pTrigger, pChanges, 1, TRIGGER_BEFORE, pTab, onError + ); + for(i=0, k=regNew; inCol; i++, k++){ + if( i==pTab->iPKey ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, k); + }else if( (pTab->aCol[i].colFlags & COLFLAG_GENERATED)!=0 ){ + if( pTab->aCol[i].colFlags & COLFLAG_VIRTUAL ) k--; + }else{ + j = aXRef[i]; + if( j>=0 ){ + if( nChangeFrom ){ + int nOff = (isView ? pTab->nCol : nPk); + assert( eOnePass==ONEPASS_OFF ); + sqlite3VdbeAddOp3(v, OP_Column, iEph, nOff+j, k); + }else{ + sqlite3ExprCode(pParse, pChanges->a[j].pExpr, k); + } + }else if( 0==(tmask&TRIGGER_BEFORE) || i>31 || (newmask & MASKBIT32(i)) ){ + /* This branch loads the value of a column that will not be changed + ** into a register. This is done if there are no BEFORE triggers, or + ** if there are one or more BEFORE triggers that use this value via + ** a new.* reference in a trigger program. + */ + testcase( i==31 ); + testcase( i==32 ); + sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, i, k); + bFinishSeek = 0; + }else{ + sqlite3VdbeAddOp2(v, OP_Null, 0, k); + } + } + } +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( pTab->tabFlags & TF_HasGenerated ){ + testcase( pTab->tabFlags & TF_HasVirtual ); + testcase( pTab->tabFlags & TF_HasStored ); + sqlite3ComputeGeneratedColumns(pParse, regNew, pTab); + } +#endif + + /* Fire any BEFORE UPDATE triggers. This happens before constraints are + ** verified. One could argue that this is wrong. + */ + if( tmask&TRIGGER_BEFORE ){ + sqlite3TableAffinity(v, pTab, regNew); + sqlite3CodeRowTrigger(pParse, pTrigger, TK_UPDATE, pChanges, + TRIGGER_BEFORE, pTab, regOldRowid, onError, labelContinue); + + if( !isView ){ + /* The row-trigger may have deleted the row being updated. In this + ** case, jump to the next row. No updates or AFTER triggers are + ** required. This behavior - what happens when the row being updated + ** is deleted or renamed by a BEFORE trigger - is left undefined in the + ** documentation. + */ + if( pPk ){ + sqlite3VdbeAddOp4Int(v, OP_NotFound,iDataCur,labelContinue,regKey,nKey); + VdbeCoverage(v); + }else{ + sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, labelContinue,regOldRowid); + VdbeCoverage(v); + } + + /* After-BEFORE-trigger-reload-loop: + ** If it did not delete it, the BEFORE trigger may still have modified + ** some of the columns of the row being updated. Load the values for + ** all columns not modified by the update statement into their registers + ** in case this has happened. Only unmodified columns are reloaded. + ** The values computed for modified columns use the values before the + ** BEFORE trigger runs. See test case trigger1-18.0 (added 2018-04-26) + ** for an example. + */ + for(i=0, k=regNew; inCol; i++, k++){ + if( pTab->aCol[i].colFlags & COLFLAG_GENERATED ){ + if( pTab->aCol[i].colFlags & COLFLAG_VIRTUAL ) k--; + }else if( aXRef[i]<0 && i!=pTab->iPKey ){ + sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, i, k); + } + } +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + if( pTab->tabFlags & TF_HasGenerated ){ + testcase( pTab->tabFlags & TF_HasVirtual ); + testcase( pTab->tabFlags & TF_HasStored ); + sqlite3ComputeGeneratedColumns(pParse, regNew, pTab); + } +#endif + } + } + + if( !isView ){ + /* Do constraint checks. */ + assert( regOldRowid>0 ); + sqlite3GenerateConstraintChecks(pParse, pTab, aRegIdx, iDataCur, iIdxCur, + regNewRowid, regOldRowid, chngKey, onError, labelContinue, &bReplace, + aXRef, 0); + + /* If REPLACE conflict handling may have been used, or if the PK of the + ** row is changing, then the GenerateConstraintChecks() above may have + ** moved cursor iDataCur. Reseek it. */ + if( bReplace || chngKey ){ + if( pPk ){ + sqlite3VdbeAddOp4Int(v, OP_NotFound,iDataCur,labelContinue,regKey,nKey); + }else{ + sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, labelContinue,regOldRowid); + } + VdbeCoverageNeverTaken(v); + } + + /* Do FK constraint checks. */ + if( hasFK ){ + sqlite3FkCheck(pParse, pTab, regOldRowid, 0, aXRef, chngKey); + } + + /* Delete the index entries associated with the current record. */ + sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, aRegIdx, -1); + + /* We must run the OP_FinishSeek opcode to resolve a prior + ** OP_DeferredSeek if there is any possibility that there have been + ** no OP_Column opcodes since the OP_DeferredSeek was issued. But + ** we want to avoid the OP_FinishSeek if possible, as running it + ** costs CPU cycles. */ + if( bFinishSeek ){ + sqlite3VdbeAddOp1(v, OP_FinishSeek, iDataCur); + } + + /* If changing the rowid value, or if there are foreign key constraints + ** to process, delete the old record. Otherwise, add a noop OP_Delete + ** to invoke the pre-update hook. + ** + ** That (regNew==regnewRowid+1) is true is also important for the + ** pre-update hook. If the caller invokes preupdate_new(), the returned + ** value is copied from memory cell (regNewRowid+1+iCol), where iCol + ** is the column index supplied by the user. + */ + assert( regNew==regNewRowid+1 ); +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK + sqlite3VdbeAddOp3(v, OP_Delete, iDataCur, + OPFLAG_ISUPDATE | ((hasFK>1 || chngKey) ? 0 : OPFLAG_ISNOOP), + regNewRowid + ); + if( eOnePass==ONEPASS_MULTI ){ + assert( hasFK==0 && chngKey==0 ); + sqlite3VdbeChangeP5(v, OPFLAG_SAVEPOSITION); + } + if( !pParse->nested ){ + sqlite3VdbeAppendP4(v, pTab, P4_TABLE); + } +#else + if( hasFK>1 || chngKey ){ + sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, 0); + } +#endif + + if( hasFK ){ + sqlite3FkCheck(pParse, pTab, 0, regNewRowid, aXRef, chngKey); + } + + /* Insert the new index entries and the new record. */ + sqlite3CompleteInsertion( + pParse, pTab, iDataCur, iIdxCur, regNewRowid, aRegIdx, + OPFLAG_ISUPDATE | (eOnePass==ONEPASS_MULTI ? OPFLAG_SAVEPOSITION : 0), + 0, 0 + ); + + /* Do any ON CASCADE, SET NULL or SET DEFAULT operations required to + ** handle rows (possibly in other tables) that refer via a foreign key + ** to the row just updated. */ + if( hasFK ){ + sqlite3FkActions(pParse, pTab, pChanges, regOldRowid, aXRef, chngKey); + } + } + + /* Increment the row counter + */ + if( regRowCount ){ + sqlite3VdbeAddOp2(v, OP_AddImm, regRowCount, 1); + } + + sqlite3CodeRowTrigger(pParse, pTrigger, TK_UPDATE, pChanges, + TRIGGER_AFTER, pTab, regOldRowid, onError, labelContinue); + + /* Repeat the above with the next record to be updated, until + ** all record selected by the WHERE clause have been updated. + */ + if( eOnePass==ONEPASS_SINGLE ){ + /* Nothing to do at end-of-loop for a single-pass */ + }else if( eOnePass==ONEPASS_MULTI ){ + sqlite3VdbeResolveLabel(v, labelContinue); + sqlite3WhereEnd(pWInfo); + }else{ + sqlite3VdbeResolveLabel(v, labelContinue); + sqlite3VdbeAddOp2(v, OP_Next, iEph, addrTop); VdbeCoverage(v); + } + sqlite3VdbeResolveLabel(v, labelBreak); + + /* Update the sqlite_sequence table by storing the content of the + ** maximum rowid counter values recorded while inserting into + ** autoincrement tables. + */ + if( pParse->nested==0 && pParse->pTriggerTab==0 && pUpsert==0 ){ + sqlite3AutoincrementEnd(pParse); + } + + /* + ** Return the number of rows that were changed, if we are tracking + ** that information. + */ + if( regRowCount ){ + sqlite3CodeChangeCount(v, regRowCount, "rows updated"); + } + +update_cleanup: + sqlite3AuthContextPop(&sContext); + sqlite3DbFree(db, aXRef); /* Also frees aRegIdx[] and aToOpen[] */ + sqlite3SrcListDelete(db, pTabList); + sqlite3ExprListDelete(db, pChanges); + sqlite3ExprDelete(db, pWhere); +#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) + sqlite3ExprListDelete(db, pOrderBy); + sqlite3ExprDelete(db, pLimit); +#endif + return; +} +/* Make sure "isView" and other macros defined above are undefined. Otherwise +** they may interfere with compilation of other functions in this file +** (or in another file, if this file becomes part of the amalgamation). */ +#ifdef isView + #undef isView +#endif +#ifdef pTrigger + #undef pTrigger +#endif + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Generate code for an UPDATE of a virtual table. +** +** There are two possible strategies - the default and the special +** "onepass" strategy. Onepass is only used if the virtual table +** implementation indicates that pWhere may match at most one row. +** +** The default strategy is to create an ephemeral table that contains +** for each row to be changed: +** +** (A) The original rowid of that row. +** (B) The revised rowid for the row. +** (C) The content of every column in the row. +** +** Then loop through the contents of this ephemeral table executing a +** VUpdate for each row. When finished, drop the ephemeral table. +** +** The "onepass" strategy does not use an ephemeral table. Instead, it +** stores the same values (A, B and C above) in a register array and +** makes a single invocation of VUpdate. +*/ +static void updateVirtualTable( + Parse *pParse, /* The parsing context */ + SrcList *pSrc, /* The virtual table to be modified */ + Table *pTab, /* The virtual table */ + ExprList *pChanges, /* The columns to change in the UPDATE statement */ + Expr *pRowid, /* Expression used to recompute the rowid */ + int *aXRef, /* Mapping from columns of pTab to entries in pChanges */ + Expr *pWhere, /* WHERE clause of the UPDATE statement */ + int onError /* ON CONFLICT strategy */ +){ + Vdbe *v = pParse->pVdbe; /* Virtual machine under construction */ + int ephemTab; /* Table holding the result of the SELECT */ + int i; /* Loop counter */ + sqlite3 *db = pParse->db; /* Database connection */ + const char *pVTab = (const char*)sqlite3GetVTable(db, pTab); + WhereInfo *pWInfo = 0; + int nArg = 2 + pTab->nCol; /* Number of arguments to VUpdate */ + int regArg; /* First register in VUpdate arg array */ + int regRec; /* Register in which to assemble record */ + int regRowid; /* Register for ephem table rowid */ + int iCsr = pSrc->a[0].iCursor; /* Cursor used for virtual table scan */ + int aDummy[2]; /* Unused arg for sqlite3WhereOkOnePass() */ + int eOnePass; /* True to use onepass strategy */ + int addr; /* Address of OP_OpenEphemeral */ + + /* Allocate nArg registers in which to gather the arguments for VUpdate. Then + ** create and open the ephemeral table in which the records created from + ** these arguments will be temporarily stored. */ + assert( v ); + ephemTab = pParse->nTab++; + addr= sqlite3VdbeAddOp2(v, OP_OpenEphemeral, ephemTab, nArg); + regArg = pParse->nMem + 1; + pParse->nMem += nArg; + if( pSrc->nSrc>1 ){ + Index *pPk = 0; + Expr *pRow; + ExprList *pList; + if( HasRowid(pTab) ){ + if( pRowid ){ + pRow = sqlite3ExprDup(db, pRowid, 0); + }else{ + pRow = sqlite3PExpr(pParse, TK_ROW, 0, 0); + } + }else{ + i16 iPk; /* PRIMARY KEY column */ + pPk = sqlite3PrimaryKeyIndex(pTab); + assert( pPk!=0 ); + assert( pPk->nKeyCol==1 ); + iPk = pPk->aiColumn[0]; + if( aXRef[iPk]>=0 ){ + pRow = sqlite3ExprDup(db, pChanges->a[aXRef[iPk]].pExpr, 0); + }else{ + pRow = exprRowColumn(pParse, iPk); + } + } + pList = sqlite3ExprListAppend(pParse, 0, pRow); + + for(i=0; inCol; i++){ + if( aXRef[i]>=0 ){ + pList = sqlite3ExprListAppend(pParse, pList, + sqlite3ExprDup(db, pChanges->a[aXRef[i]].pExpr, 0) + ); + }else{ + pList = sqlite3ExprListAppend(pParse, pList, exprRowColumn(pParse, i)); + } + } + + updateFromSelect(pParse, ephemTab, pPk, pList, pSrc, pWhere, 0, 0); + sqlite3ExprListDelete(db, pList); + eOnePass = ONEPASS_OFF; + }else{ + regRec = ++pParse->nMem; + regRowid = ++pParse->nMem; + + /* Start scanning the virtual table */ + pWInfo = sqlite3WhereBegin( + pParse, pSrc, pWhere, 0, 0, 0, WHERE_ONEPASS_DESIRED, 0 + ); + if( pWInfo==0 ) return; + + /* Populate the argument registers. */ + for(i=0; inCol; i++){ + assert( (pTab->aCol[i].colFlags & COLFLAG_GENERATED)==0 ); + if( aXRef[i]>=0 ){ + sqlite3ExprCode(pParse, pChanges->a[aXRef[i]].pExpr, regArg+2+i); + }else{ + sqlite3VdbeAddOp3(v, OP_VColumn, iCsr, i, regArg+2+i); + sqlite3VdbeChangeP5(v, OPFLAG_NOCHNG);/* For sqlite3_vtab_nochange() */ + } + } + if( HasRowid(pTab) ){ + sqlite3VdbeAddOp2(v, OP_Rowid, iCsr, regArg); + if( pRowid ){ + sqlite3ExprCode(pParse, pRowid, regArg+1); + }else{ + sqlite3VdbeAddOp2(v, OP_Rowid, iCsr, regArg+1); + } + }else{ + Index *pPk; /* PRIMARY KEY index */ + i16 iPk; /* PRIMARY KEY column */ + pPk = sqlite3PrimaryKeyIndex(pTab); + assert( pPk!=0 ); + assert( pPk->nKeyCol==1 ); + iPk = pPk->aiColumn[0]; + sqlite3VdbeAddOp3(v, OP_VColumn, iCsr, iPk, regArg); + sqlite3VdbeAddOp2(v, OP_SCopy, regArg+2+iPk, regArg+1); + } + + eOnePass = sqlite3WhereOkOnePass(pWInfo, aDummy); + + /* There is no ONEPASS_MULTI on virtual tables */ + assert( eOnePass==ONEPASS_OFF || eOnePass==ONEPASS_SINGLE ); + + if( eOnePass ){ + /* If using the onepass strategy, no-op out the OP_OpenEphemeral coded + ** above. */ + sqlite3VdbeChangeToNoop(v, addr); + sqlite3VdbeAddOp1(v, OP_Close, iCsr); + }else{ + /* Create a record from the argument register contents and insert it into + ** the ephemeral table. */ + sqlite3MultiWrite(pParse); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regArg, nArg, regRec); +#if defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_NULL_TRIM) + /* Signal an assert() within OP_MakeRecord that it is allowed to + ** accept no-change records with serial_type 10 */ + sqlite3VdbeChangeP5(v, OPFLAG_NOCHNG_MAGIC); +#endif + sqlite3VdbeAddOp2(v, OP_NewRowid, ephemTab, regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, ephemTab, regRec, regRowid); + } + } + + + if( eOnePass==ONEPASS_OFF ){ + /* End the virtual table scan */ + if( pSrc->nSrc==1 ){ + sqlite3WhereEnd(pWInfo); + } + + /* Begin scannning through the ephemeral table. */ + addr = sqlite3VdbeAddOp1(v, OP_Rewind, ephemTab); VdbeCoverage(v); + + /* Extract arguments from the current row of the ephemeral table and + ** invoke the VUpdate method. */ + for(i=0; ipNextUpsert; + sqlite3ExprListDelete(db, p->pUpsertTarget); + sqlite3ExprDelete(db, p->pUpsertTargetWhere); + sqlite3ExprListDelete(db, p->pUpsertSet); + sqlite3ExprDelete(db, p->pUpsertWhere); + sqlite3DbFree(db, p->pToFree); + sqlite3DbFree(db, p); + p = pNext; + }while( p ); +} +SQLITE_PRIVATE void sqlite3UpsertDelete(sqlite3 *db, Upsert *p){ + if( p ) upsertDelete(db, p); +} + + +/* +** Duplicate an Upsert object. +*/ +SQLITE_PRIVATE Upsert *sqlite3UpsertDup(sqlite3 *db, Upsert *p){ + if( p==0 ) return 0; + return sqlite3UpsertNew(db, + sqlite3ExprListDup(db, p->pUpsertTarget, 0), + sqlite3ExprDup(db, p->pUpsertTargetWhere, 0), + sqlite3ExprListDup(db, p->pUpsertSet, 0), + sqlite3ExprDup(db, p->pUpsertWhere, 0), + sqlite3UpsertDup(db, p->pNextUpsert) + ); +} + +/* +** Create a new Upsert object. +*/ +SQLITE_PRIVATE Upsert *sqlite3UpsertNew( + sqlite3 *db, /* Determines which memory allocator to use */ + ExprList *pTarget, /* Target argument to ON CONFLICT, or NULL */ + Expr *pTargetWhere, /* Optional WHERE clause on the target */ + ExprList *pSet, /* UPDATE columns, or NULL for a DO NOTHING */ + Expr *pWhere, /* WHERE clause for the ON CONFLICT UPDATE */ + Upsert *pNext /* Next ON CONFLICT clause in the list */ +){ + Upsert *pNew; + pNew = sqlite3DbMallocZero(db, sizeof(Upsert)); + if( pNew==0 ){ + sqlite3ExprListDelete(db, pTarget); + sqlite3ExprDelete(db, pTargetWhere); + sqlite3ExprListDelete(db, pSet); + sqlite3ExprDelete(db, pWhere); + sqlite3UpsertDelete(db, pNext); + return 0; + }else{ + pNew->pUpsertTarget = pTarget; + pNew->pUpsertTargetWhere = pTargetWhere; + pNew->pUpsertSet = pSet; + pNew->pUpsertWhere = pWhere; + pNew->isDoUpdate = pSet!=0; + pNew->pNextUpsert = pNext; + } + return pNew; +} + +/* +** Analyze the ON CONFLICT clause described by pUpsert. Resolve all +** symbols in the conflict-target. +** +** Return SQLITE_OK if everything works, or an error code is something +** is wrong. +*/ +SQLITE_PRIVATE int sqlite3UpsertAnalyzeTarget( + Parse *pParse, /* The parsing context */ + SrcList *pTabList, /* Table into which we are inserting */ + Upsert *pUpsert /* The ON CONFLICT clauses */ +){ + Table *pTab; /* That table into which we are inserting */ + int rc; /* Result code */ + int iCursor; /* Cursor used by pTab */ + Index *pIdx; /* One of the indexes of pTab */ + ExprList *pTarget; /* The conflict-target clause */ + Expr *pTerm; /* One term of the conflict-target clause */ + NameContext sNC; /* Context for resolving symbolic names */ + Expr sCol[2]; /* Index column converted into an Expr */ + int nClause = 0; /* Counter of ON CONFLICT clauses */ + + assert( pTabList->nSrc==1 ); + assert( pTabList->a[0].pTab!=0 ); + assert( pUpsert!=0 ); + assert( pUpsert->pUpsertTarget!=0 ); + + /* Resolve all symbolic names in the conflict-target clause, which + ** includes both the list of columns and the optional partial-index + ** WHERE clause. + */ + memset(&sNC, 0, sizeof(sNC)); + sNC.pParse = pParse; + sNC.pSrcList = pTabList; + for(; pUpsert && pUpsert->pUpsertTarget; + pUpsert=pUpsert->pNextUpsert, nClause++){ + rc = sqlite3ResolveExprListNames(&sNC, pUpsert->pUpsertTarget); + if( rc ) return rc; + rc = sqlite3ResolveExprNames(&sNC, pUpsert->pUpsertTargetWhere); + if( rc ) return rc; + + /* Check to see if the conflict target matches the rowid. */ + pTab = pTabList->a[0].pTab; + pTarget = pUpsert->pUpsertTarget; + iCursor = pTabList->a[0].iCursor; + if( HasRowid(pTab) + && pTarget->nExpr==1 + && (pTerm = pTarget->a[0].pExpr)->op==TK_COLUMN + && pTerm->iColumn==XN_ROWID + ){ + /* The conflict-target is the rowid of the primary table */ + assert( pUpsert->pUpsertIdx==0 ); + continue; + } + + /* Initialize sCol[0..1] to be an expression parse tree for a + ** single column of an index. The sCol[0] node will be the TK_COLLATE + ** operator and sCol[1] will be the TK_COLUMN operator. Code below + ** will populate the specific collation and column number values + ** prior to comparing against the conflict-target expression. + */ + memset(sCol, 0, sizeof(sCol)); + sCol[0].op = TK_COLLATE; + sCol[0].pLeft = &sCol[1]; + sCol[1].op = TK_COLUMN; + sCol[1].iTable = pTabList->a[0].iCursor; + + /* Check for matches against other indexes */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int ii, jj, nn; + if( !IsUniqueIndex(pIdx) ) continue; + if( pTarget->nExpr!=pIdx->nKeyCol ) continue; + if( pIdx->pPartIdxWhere ){ + if( pUpsert->pUpsertTargetWhere==0 ) continue; + if( sqlite3ExprCompare(pParse, pUpsert->pUpsertTargetWhere, + pIdx->pPartIdxWhere, iCursor)!=0 ){ + continue; + } + } + nn = pIdx->nKeyCol; + for(ii=0; iiazColl[ii]; + if( pIdx->aiColumn[ii]==XN_EXPR ){ + assert( pIdx->aColExpr!=0 ); + assert( pIdx->aColExpr->nExpr>ii ); + pExpr = pIdx->aColExpr->a[ii].pExpr; + if( pExpr->op!=TK_COLLATE ){ + sCol[0].pLeft = pExpr; + pExpr = &sCol[0]; + } + }else{ + sCol[0].pLeft = &sCol[1]; + sCol[1].iColumn = pIdx->aiColumn[ii]; + pExpr = &sCol[0]; + } + for(jj=0; jja[jj].pExpr,pExpr,iCursor)<2 ){ + break; /* Column ii of the index matches column jj of target */ + } + } + if( jj>=nn ){ + /* The target contains no match for column jj of the index */ + break; + } + } + if( iipUpsertIdx = pIdx; + break; + } + if( pUpsert->pUpsertIdx==0 ){ + char zWhich[16]; + if( nClause==0 && pUpsert->pNextUpsert==0 ){ + zWhich[0] = 0; + }else{ + sqlite3_snprintf(sizeof(zWhich),zWhich,"%r ", nClause+1); + } + sqlite3ErrorMsg(pParse, "%sON CONFLICT clause does not match any " + "PRIMARY KEY or UNIQUE constraint", zWhich); + return SQLITE_ERROR; + } + } + return SQLITE_OK; +} + +/* +** Return true if pUpsert is the last ON CONFLICT clause with a +** conflict target, or if pUpsert is followed by another ON CONFLICT +** clause that targets the INTEGER PRIMARY KEY. +*/ +SQLITE_PRIVATE int sqlite3UpsertNextIsIPK(Upsert *pUpsert){ + Upsert *pNext; + if( NEVER(pUpsert==0) ) return 0; + pNext = pUpsert->pNextUpsert; + if( pNext==0 ) return 1; + if( pNext->pUpsertTarget==0 ) return 1; + if( pNext->pUpsertIdx==0 ) return 1; + return 0; +} + +/* +** Given the list of ON CONFLICT clauses described by pUpsert, and +** a particular index pIdx, return a pointer to the particular ON CONFLICT +** clause that applies to the index. Or, if the index is not subject to +** any ON CONFLICT clause, return NULL. +*/ +SQLITE_PRIVATE Upsert *sqlite3UpsertOfIndex(Upsert *pUpsert, Index *pIdx){ + while( + pUpsert + && pUpsert->pUpsertTarget!=0 + && pUpsert->pUpsertIdx!=pIdx + ){ + pUpsert = pUpsert->pNextUpsert; + } + return pUpsert; +} + +/* +** Generate bytecode that does an UPDATE as part of an upsert. +** +** If pIdx is NULL, then the UNIQUE constraint that failed was the IPK. +** In this case parameter iCur is a cursor open on the table b-tree that +** currently points to the conflicting table row. Otherwise, if pIdx +** is not NULL, then pIdx is the constraint that failed and iCur is a +** cursor points to the conflicting row. +*/ +SQLITE_PRIVATE void sqlite3UpsertDoUpdate( + Parse *pParse, /* The parsing and code-generating context */ + Upsert *pUpsert, /* The ON CONFLICT clause for the upsert */ + Table *pTab, /* The table being updated */ + Index *pIdx, /* The UNIQUE constraint that failed */ + int iCur /* Cursor for pIdx (or pTab if pIdx==NULL) */ +){ + Vdbe *v = pParse->pVdbe; + sqlite3 *db = pParse->db; + SrcList *pSrc; /* FROM clause for the UPDATE */ + int iDataCur; + int i; + Upsert *pTop = pUpsert; + + assert( v!=0 ); + assert( pUpsert!=0 ); + iDataCur = pUpsert->iDataCur; + pUpsert = sqlite3UpsertOfIndex(pTop, pIdx); + VdbeNoopComment((v, "Begin DO UPDATE of UPSERT")); + if( pIdx && iCur!=iDataCur ){ + if( HasRowid(pTab) ){ + int regRowid = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp2(v, OP_IdxRowid, iCur, regRowid); + sqlite3VdbeAddOp3(v, OP_SeekRowid, iDataCur, 0, regRowid); + VdbeCoverage(v); + sqlite3ReleaseTempReg(pParse, regRowid); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + int nPk = pPk->nKeyCol; + int iPk = pParse->nMem+1; + pParse->nMem += nPk; + for(i=0; iaiColumn[i]>=0 ); + k = sqlite3TableColumnToIndex(pIdx, pPk->aiColumn[i]); + sqlite3VdbeAddOp3(v, OP_Column, iCur, k, iPk+i); + VdbeComment((v, "%s.%s", pIdx->zName, + pTab->aCol[pPk->aiColumn[i]].zCnName)); + } + sqlite3VdbeVerifyAbortable(v, OE_Abort); + i = sqlite3VdbeAddOp4Int(v, OP_Found, iDataCur, 0, iPk, nPk); + VdbeCoverage(v); + sqlite3VdbeAddOp4(v, OP_Halt, SQLITE_CORRUPT, OE_Abort, 0, + "corrupt database", P4_STATIC); + sqlite3MayAbort(pParse); + sqlite3VdbeJumpHere(v, i); + } + } + /* pUpsert does not own pTop->pUpsertSrc - the outer INSERT statement does. + ** So we have to make a copy before passing it down into sqlite3Update() */ + pSrc = sqlite3SrcListDup(db, pTop->pUpsertSrc, 0); + /* excluded.* columns of type REAL need to be converted to a hard real */ + for(i=0; inCol; i++){ + if( pTab->aCol[i].affinity==SQLITE_AFF_REAL ){ + sqlite3VdbeAddOp1(v, OP_RealAffinity, pTop->regData+i); + } + } + sqlite3Update(pParse, pSrc, sqlite3ExprListDup(db,pUpsert->pUpsertSet,0), + sqlite3ExprDup(db,pUpsert->pUpsertWhere,0), OE_Abort, 0, 0, pUpsert); + VdbeNoopComment((v, "End DO UPDATE of UPSERT")); +} + +#endif /* SQLITE_OMIT_UPSERT */ + +/************** End of upsert.c **********************************************/ +/************** Begin file vacuum.c ******************************************/ +/* +** 2003 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used to implement the VACUUM command. +** +** Most of the code in this file may be omitted by defining the +** SQLITE_OMIT_VACUUM macro. +*/ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +#if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) + +/* +** Execute zSql on database db. +** +** If zSql returns rows, then each row will have exactly one +** column. (This will only happen if zSql begins with "SELECT".) +** Take each row of result and call execSql() again recursively. +** +** The execSqlF() routine does the same thing, except it accepts +** a format string as its third argument +*/ +static int execSql(sqlite3 *db, char **pzErrMsg, const char *zSql){ + sqlite3_stmt *pStmt; + int rc; + + /* printf("SQL: [%s]\n", zSql); fflush(stdout); */ + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + while( SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ + const char *zSubSql = (const char*)sqlite3_column_text(pStmt,0); + assert( sqlite3_strnicmp(zSql,"SELECT",6)==0 ); + /* The secondary SQL must be one of CREATE TABLE, CREATE INDEX, + ** or INSERT. Historically there have been attacks that first + ** corrupt the sqlite_schema.sql field with other kinds of statements + ** then run VACUUM to get those statements to execute at inappropriate + ** times. */ + if( zSubSql + && (strncmp(zSubSql,"CRE",3)==0 || strncmp(zSubSql,"INS",3)==0) + ){ + rc = execSql(db, pzErrMsg, zSubSql); + if( rc!=SQLITE_OK ) break; + } + } + assert( rc!=SQLITE_ROW ); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + if( rc ){ + sqlite3SetString(pzErrMsg, db, sqlite3_errmsg(db)); + } + (void)sqlite3_finalize(pStmt); + return rc; +} +static int execSqlF(sqlite3 *db, char **pzErrMsg, const char *zSql, ...){ + char *z; + va_list ap; + int rc; + va_start(ap, zSql); + z = sqlite3VMPrintf(db, zSql, ap); + va_end(ap); + if( z==0 ) return SQLITE_NOMEM; + rc = execSql(db, pzErrMsg, z); + sqlite3DbFree(db, z); + return rc; +} + +/* +** The VACUUM command is used to clean up the database, +** collapse free space, etc. It is modelled after the VACUUM command +** in PostgreSQL. The VACUUM command works as follows: +** +** (1) Create a new transient database file +** (2) Copy all content from the database being vacuumed into +** the new transient database file +** (3) Copy content from the transient database back into the +** original database. +** +** The transient database requires temporary disk space approximately +** equal to the size of the original database. The copy operation of +** step (3) requires additional temporary disk space approximately equal +** to the size of the original database for the rollback journal. +** Hence, temporary disk space that is approximately 2x the size of the +** original database is required. Every page of the database is written +** approximately 3 times: Once for step (2) and twice for step (3). +** Two writes per page are required in step (3) because the original +** database content must be written into the rollback journal prior to +** overwriting the database with the vacuumed content. +** +** Only 1x temporary space and only 1x writes would be required if +** the copy of step (3) were replaced by deleting the original database +** and renaming the transient database as the original. But that will +** not work if other processes are attached to the original database. +** And a power loss in between deleting the original and renaming the +** transient would cause the database file to appear to be deleted +** following reboot. +*/ +SQLITE_PRIVATE void sqlite3Vacuum(Parse *pParse, Token *pNm, Expr *pInto){ + Vdbe *v = sqlite3GetVdbe(pParse); + int iDb = 0; + if( v==0 ) goto build_vacuum_end; + if( pParse->nErr ) goto build_vacuum_end; + if( pNm ){ +#ifndef SQLITE_BUG_COMPATIBLE_20160819 + /* Default behavior: Report an error if the argument to VACUUM is + ** not recognized */ + iDb = sqlite3TwoPartName(pParse, pNm, pNm, &pNm); + if( iDb<0 ) goto build_vacuum_end; +#else + /* When SQLITE_BUG_COMPATIBLE_20160819 is defined, unrecognized arguments + ** to VACUUM are silently ignored. This is a back-out of a bug fix that + ** occurred on 2016-08-19 (https://www.sqlite.org/src/info/083f9e6270). + ** The buggy behavior is required for binary compatibility with some + ** legacy applications. */ + iDb = sqlite3FindDb(pParse->db, pNm); + if( iDb<0 ) iDb = 0; +#endif + } + if( iDb!=1 ){ + int iIntoReg = 0; + if( pInto && sqlite3ResolveSelfReference(pParse,0,0,pInto,0)==0 ){ + iIntoReg = ++pParse->nMem; + sqlite3ExprCode(pParse, pInto, iIntoReg); + } + sqlite3VdbeAddOp2(v, OP_Vacuum, iDb, iIntoReg); + sqlite3VdbeUsesBtree(v, iDb); + } +build_vacuum_end: + sqlite3ExprDelete(pParse->db, pInto); + return; +} + +/* +** This routine implements the OP_Vacuum opcode of the VDBE. +*/ +SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( + char **pzErrMsg, /* Write error message here */ + sqlite3 *db, /* Database connection */ + int iDb, /* Which attached DB to vacuum */ + sqlite3_value *pOut /* Write results here, if not NULL. VACUUM INTO */ +){ + int rc = SQLITE_OK; /* Return code from service routines */ + Btree *pMain; /* The database being vacuumed */ + Btree *pTemp; /* The temporary database we vacuum into */ + u32 saved_mDbFlags; /* Saved value of db->mDbFlags */ + u64 saved_flags; /* Saved value of db->flags */ + i64 saved_nChange; /* Saved value of db->nChange */ + i64 saved_nTotalChange; /* Saved value of db->nTotalChange */ + u32 saved_openFlags; /* Saved value of db->openFlags */ + u8 saved_mTrace; /* Saved trace settings */ + Db *pDb = 0; /* Database to detach at end of vacuum */ + int isMemDb; /* True if vacuuming a :memory: database */ + int nRes; /* Bytes of reserved space at the end of each page */ + int nDb; /* Number of attached databases */ + const char *zDbMain; /* Schema name of database to vacuum */ + const char *zOut; /* Name of output file */ + + if( !db->autoCommit ){ + sqlite3SetString(pzErrMsg, db, "cannot VACUUM from within a transaction"); + return SQLITE_ERROR; /* IMP: R-12218-18073 */ + } + if( db->nVdbeActive>1 ){ + sqlite3SetString(pzErrMsg, db,"cannot VACUUM - SQL statements in progress"); + return SQLITE_ERROR; /* IMP: R-15610-35227 */ + } + saved_openFlags = db->openFlags; + if( pOut ){ + if( sqlite3_value_type(pOut)!=SQLITE_TEXT ){ + sqlite3SetString(pzErrMsg, db, "non-text filename"); + return SQLITE_ERROR; + } + zOut = (const char*)sqlite3_value_text(pOut); + db->openFlags &= ~SQLITE_OPEN_READONLY; + db->openFlags |= SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE; + }else{ + zOut = ""; + } + + /* Save the current value of the database flags so that it can be + ** restored before returning. Then set the writable-schema flag, and + ** disable CHECK and foreign key constraints. */ + saved_flags = db->flags; + saved_mDbFlags = db->mDbFlags; + saved_nChange = db->nChange; + saved_nTotalChange = db->nTotalChange; + saved_mTrace = db->mTrace; + db->flags |= SQLITE_WriteSchema | SQLITE_IgnoreChecks; + db->mDbFlags |= DBFLAG_PreferBuiltin | DBFLAG_Vacuum; + db->flags &= ~(u64)(SQLITE_ForeignKeys | SQLITE_ReverseOrder + | SQLITE_Defensive | SQLITE_CountRows); + db->mTrace = 0; + + zDbMain = db->aDb[iDb].zDbSName; + pMain = db->aDb[iDb].pBt; + isMemDb = sqlite3PagerIsMemdb(sqlite3BtreePager(pMain)); + + /* Attach the temporary database as 'vacuum_db'. The synchronous pragma + ** can be set to 'off' for this file, as it is not recovered if a crash + ** occurs anyway. The integrity of the database is maintained by a + ** (possibly synchronous) transaction opened on the main database before + ** sqlite3BtreeCopyFile() is called. + ** + ** An optimisation would be to use a non-journaled pager. + ** (Later:) I tried setting "PRAGMA vacuum_db.journal_mode=OFF" but + ** that actually made the VACUUM run slower. Very little journalling + ** actually occurs when doing a vacuum since the vacuum_db is initially + ** empty. Only the journal header is written. Apparently it takes more + ** time to parse and run the PRAGMA to turn journalling off than it does + ** to write the journal header file. + */ + nDb = db->nDb; + rc = execSqlF(db, pzErrMsg, "ATTACH %Q AS vacuum_db", zOut); + db->openFlags = saved_openFlags; + if( rc!=SQLITE_OK ) goto end_of_vacuum; + assert( (db->nDb-1)==nDb ); + pDb = &db->aDb[nDb]; + assert( strcmp(pDb->zDbSName,"vacuum_db")==0 ); + pTemp = pDb->pBt; + if( pOut ){ + sqlite3_file *id = sqlite3PagerFile(sqlite3BtreePager(pTemp)); + i64 sz = 0; + if( id->pMethods!=0 && (sqlite3OsFileSize(id, &sz)!=SQLITE_OK || sz>0) ){ + rc = SQLITE_ERROR; + sqlite3SetString(pzErrMsg, db, "output file already exists"); + goto end_of_vacuum; + } + db->mDbFlags |= DBFLAG_VacuumInto; + } + nRes = sqlite3BtreeGetRequestedReserve(pMain); + + sqlite3BtreeSetCacheSize(pTemp, db->aDb[iDb].pSchema->cache_size); + sqlite3BtreeSetSpillSize(pTemp, sqlite3BtreeSetSpillSize(pMain,0)); + sqlite3BtreeSetPagerFlags(pTemp, PAGER_SYNCHRONOUS_OFF|PAGER_CACHESPILL); + + /* Begin a transaction and take an exclusive lock on the main database + ** file. This is done before the sqlite3BtreeGetPageSize(pMain) call below, + ** to ensure that we do not try to change the page-size on a WAL database. + */ + rc = execSql(db, pzErrMsg, "BEGIN"); + if( rc!=SQLITE_OK ) goto end_of_vacuum; + rc = sqlite3BtreeBeginTrans(pMain, pOut==0 ? 2 : 0, 0); + if( rc!=SQLITE_OK ) goto end_of_vacuum; + + /* Do not attempt to change the page size for a WAL database */ + if( sqlite3PagerGetJournalMode(sqlite3BtreePager(pMain)) + ==PAGER_JOURNALMODE_WAL + && pOut==0 + ){ + db->nextPagesize = 0; + } + + if( sqlite3BtreeSetPageSize(pTemp, sqlite3BtreeGetPageSize(pMain), nRes, 0) + || (!isMemDb && sqlite3BtreeSetPageSize(pTemp, db->nextPagesize, nRes, 0)) + || NEVER(db->mallocFailed) + ){ + rc = SQLITE_NOMEM_BKPT; + goto end_of_vacuum; + } + +#ifndef SQLITE_OMIT_AUTOVACUUM + sqlite3BtreeSetAutoVacuum(pTemp, db->nextAutovac>=0 ? db->nextAutovac : + sqlite3BtreeGetAutoVacuum(pMain)); +#endif + + /* Query the schema of the main database. Create a mirror schema + ** in the temporary database. + */ + db->init.iDb = nDb; /* force new CREATE statements into vacuum_db */ + rc = execSqlF(db, pzErrMsg, + "SELECT sql FROM \"%w\".sqlite_schema" + " WHERE type='table'AND name<>'sqlite_sequence'" + " AND coalesce(rootpage,1)>0", + zDbMain + ); + if( rc!=SQLITE_OK ) goto end_of_vacuum; + rc = execSqlF(db, pzErrMsg, + "SELECT sql FROM \"%w\".sqlite_schema" + " WHERE type='index'", + zDbMain + ); + if( rc!=SQLITE_OK ) goto end_of_vacuum; + db->init.iDb = 0; + + /* Loop through the tables in the main database. For each, do + ** an "INSERT INTO vacuum_db.xxx SELECT * FROM main.xxx;" to copy + ** the contents to the temporary database. + */ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0", + zDbMain + ); + assert( (db->mDbFlags & DBFLAG_Vacuum)!=0 ); + db->mDbFlags &= ~DBFLAG_Vacuum; + if( rc!=SQLITE_OK ) goto end_of_vacuum; + + /* Copy the triggers, views, and virtual tables from the main database + ** over to the temporary database. None of these objects has any + ** associated storage, so all we have to do is copy their entries + ** from the schema table. + */ + rc = execSqlF(db, pzErrMsg, + "INSERT INTO vacuum_db.sqlite_schema" + " SELECT*FROM \"%w\".sqlite_schema" + " WHERE type IN('view','trigger')" + " OR(type='table'AND rootpage=0)", + zDbMain + ); + if( rc ) goto end_of_vacuum; + + /* At this point, there is a write transaction open on both the + ** vacuum database and the main database. Assuming no error occurs, + ** both transactions are closed by this block - the main database + ** transaction by sqlite3BtreeCopyFile() and the other by an explicit + ** call to sqlite3BtreeCommit(). + */ + { + u32 meta; + int i; + + /* This array determines which meta meta values are preserved in the + ** vacuum. Even entries are the meta value number and odd entries + ** are an increment to apply to the meta value after the vacuum. + ** The increment is used to increase the schema cookie so that other + ** connections to the same database will know to reread the schema. + */ + static const unsigned char aCopy[] = { + BTREE_SCHEMA_VERSION, 1, /* Add one to the old schema cookie */ + BTREE_DEFAULT_CACHE_SIZE, 0, /* Preserve the default page cache size */ + BTREE_TEXT_ENCODING, 0, /* Preserve the text encoding */ + BTREE_USER_VERSION, 0, /* Preserve the user version */ + BTREE_APPLICATION_ID, 0, /* Preserve the application id */ + }; + + assert( SQLITE_TXN_WRITE==sqlite3BtreeTxnState(pTemp) ); + assert( pOut!=0 || SQLITE_TXN_WRITE==sqlite3BtreeTxnState(pMain) ); + + /* Copy Btree meta values */ + for(i=0; iflags */ + db->init.iDb = 0; + db->mDbFlags = saved_mDbFlags; + db->flags = saved_flags; + db->nChange = saved_nChange; + db->nTotalChange = saved_nTotalChange; + db->mTrace = saved_mTrace; + sqlite3BtreeSetPageSize(pMain, -1, 0, 1); + + /* Currently there is an SQL level transaction open on the vacuum + ** database. No locks are held on any other files (since the main file + ** was committed at the btree level). So it safe to end the transaction + ** by manually setting the autoCommit flag to true and detaching the + ** vacuum database. The vacuum_db journal file is deleted when the pager + ** is closed by the DETACH. + */ + db->autoCommit = 1; + + if( pDb ){ + sqlite3BtreeClose(pDb->pBt); + pDb->pBt = 0; + pDb->pSchema = 0; + } + + /* This both clears the schemas and reduces the size of the db->aDb[] + ** array. */ + sqlite3ResetAllSchemasOfConnection(db); + + return rc; +} + +#endif /* SQLITE_OMIT_VACUUM && SQLITE_OMIT_ATTACH */ + +/************** End of vacuum.c **********************************************/ +/************** Begin file vtab.c ********************************************/ +/* +** 2006 June 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used to help implement virtual tables. +*/ +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* #include "sqliteInt.h" */ + +/* +** Before a virtual table xCreate() or xConnect() method is invoked, the +** sqlite3.pVtabCtx member variable is set to point to an instance of +** this struct allocated on the stack. It is used by the implementation of +** the sqlite3_declare_vtab() and sqlite3_vtab_config() APIs, both of which +** are invoked only from within xCreate and xConnect methods. +*/ +struct VtabCtx { + VTable *pVTable; /* The virtual table being constructed */ + Table *pTab; /* The Table object to which the virtual table belongs */ + VtabCtx *pPrior; /* Parent context (if any) */ + int bDeclared; /* True after sqlite3_declare_vtab() is called */ +}; + +/* +** Construct and install a Module object for a virtual table. When this +** routine is called, it is guaranteed that all appropriate locks are held +** and the module is not already part of the connection. +** +** If there already exists a module with zName, replace it with the new one. +** If pModule==0, then delete the module zName if it exists. +*/ +SQLITE_PRIVATE Module *sqlite3VtabCreateModule( + sqlite3 *db, /* Database in which module is registered */ + const char *zName, /* Name assigned to this module */ + const sqlite3_module *pModule, /* The definition of the module */ + void *pAux, /* Context pointer for xCreate/xConnect */ + void (*xDestroy)(void *) /* Module destructor function */ +){ + Module *pMod; + Module *pDel; + char *zCopy; + if( pModule==0 ){ + zCopy = (char*)zName; + pMod = 0; + }else{ + int nName = sqlite3Strlen30(zName); + pMod = (Module *)sqlite3Malloc(sizeof(Module) + nName + 1); + if( pMod==0 ){ + sqlite3OomFault(db); + return 0; + } + zCopy = (char *)(&pMod[1]); + memcpy(zCopy, zName, nName+1); + pMod->zName = zCopy; + pMod->pModule = pModule; + pMod->pAux = pAux; + pMod->xDestroy = xDestroy; + pMod->pEpoTab = 0; + pMod->nRefModule = 1; + } + pDel = (Module *)sqlite3HashInsert(&db->aModule,zCopy,(void*)pMod); + if( pDel ){ + if( pDel==pMod ){ + sqlite3OomFault(db); + sqlite3DbFree(db, pDel); + pMod = 0; + }else{ + sqlite3VtabEponymousTableClear(db, pDel); + sqlite3VtabModuleUnref(db, pDel); + } + } + return pMod; +} + +/* +** The actual function that does the work of creating a new module. +** This function implements the sqlite3_create_module() and +** sqlite3_create_module_v2() interfaces. +*/ +static int createModule( + sqlite3 *db, /* Database in which module is registered */ + const char *zName, /* Name assigned to this module */ + const sqlite3_module *pModule, /* The definition of the module */ + void *pAux, /* Context pointer for xCreate/xConnect */ + void (*xDestroy)(void *) /* Module destructor function */ +){ + int rc = SQLITE_OK; + + sqlite3_mutex_enter(db->mutex); + (void)sqlite3VtabCreateModule(db, zName, pModule, pAux, xDestroy); + rc = sqlite3ApiExit(db, rc); + if( rc!=SQLITE_OK && xDestroy ) xDestroy(pAux); + sqlite3_mutex_leave(db->mutex); + return rc; +} + + +/* +** External API function used to create a new virtual-table module. +*/ +SQLITE_API int sqlite3_create_module( + sqlite3 *db, /* Database in which module is registered */ + const char *zName, /* Name assigned to this module */ + const sqlite3_module *pModule, /* The definition of the module */ + void *pAux /* Context pointer for xCreate/xConnect */ +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; +#endif + return createModule(db, zName, pModule, pAux, 0); +} + +/* +** External API function used to create a new virtual-table module. +*/ +SQLITE_API int sqlite3_create_module_v2( + sqlite3 *db, /* Database in which module is registered */ + const char *zName, /* Name assigned to this module */ + const sqlite3_module *pModule, /* The definition of the module */ + void *pAux, /* Context pointer for xCreate/xConnect */ + void (*xDestroy)(void *) /* Module destructor function */ +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; +#endif + return createModule(db, zName, pModule, pAux, xDestroy); +} + +/* +** External API to drop all virtual-table modules, except those named +** on the azNames list. +*/ +SQLITE_API int sqlite3_drop_modules(sqlite3 *db, const char** azNames){ + HashElem *pThis, *pNext; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + for(pThis=sqliteHashFirst(&db->aModule); pThis; pThis=pNext){ + Module *pMod = (Module*)sqliteHashData(pThis); + pNext = sqliteHashNext(pThis); + if( azNames ){ + int ii; + for(ii=0; azNames[ii]!=0 && strcmp(azNames[ii],pMod->zName)!=0; ii++){} + if( azNames[ii]!=0 ) continue; + } + createModule(db, pMod->zName, 0, 0, 0); + } + return SQLITE_OK; +} + +/* +** Decrement the reference count on a Module object. Destroy the +** module when the reference count reaches zero. +*/ +SQLITE_PRIVATE void sqlite3VtabModuleUnref(sqlite3 *db, Module *pMod){ + assert( pMod->nRefModule>0 ); + pMod->nRefModule--; + if( pMod->nRefModule==0 ){ + if( pMod->xDestroy ){ + pMod->xDestroy(pMod->pAux); + } + assert( pMod->pEpoTab==0 ); + sqlite3DbFree(db, pMod); + } +} + +/* +** Lock the virtual table so that it cannot be disconnected. +** Locks nest. Every lock should have a corresponding unlock. +** If an unlock is omitted, resources leaks will occur. +** +** If a disconnect is attempted while a virtual table is locked, +** the disconnect is deferred until all locks have been removed. +*/ +SQLITE_PRIVATE void sqlite3VtabLock(VTable *pVTab){ + pVTab->nRef++; +} + + +/* +** pTab is a pointer to a Table structure representing a virtual-table. +** Return a pointer to the VTable object used by connection db to access +** this virtual-table, if one has been created, or NULL otherwise. +*/ +SQLITE_PRIVATE VTable *sqlite3GetVTable(sqlite3 *db, Table *pTab){ + VTable *pVtab; + assert( IsVirtual(pTab) ); + for(pVtab=pTab->u.vtab.p; pVtab && pVtab->db!=db; pVtab=pVtab->pNext); + return pVtab; +} + +/* +** Decrement the ref-count on a virtual table object. When the ref-count +** reaches zero, call the xDisconnect() method to delete the object. +*/ +SQLITE_PRIVATE void sqlite3VtabUnlock(VTable *pVTab){ + sqlite3 *db = pVTab->db; + + assert( db ); + assert( pVTab->nRef>0 ); + assert( db->eOpenState==SQLITE_STATE_OPEN + || db->eOpenState==SQLITE_STATE_ZOMBIE ); + + pVTab->nRef--; + if( pVTab->nRef==0 ){ + sqlite3_vtab *p = pVTab->pVtab; + sqlite3VtabModuleUnref(pVTab->db, pVTab->pMod); + if( p ){ + p->pModule->xDisconnect(p); + } + sqlite3DbFree(db, pVTab); + } +} + +/* +** Table p is a virtual table. This function moves all elements in the +** p->u.vtab.p list to the sqlite3.pDisconnect lists of their associated +** database connections to be disconnected at the next opportunity. +** Except, if argument db is not NULL, then the entry associated with +** connection db is left in the p->u.vtab.p list. +*/ +static VTable *vtabDisconnectAll(sqlite3 *db, Table *p){ + VTable *pRet = 0; + VTable *pVTable; + + assert( IsVirtual(p) ); + pVTable = p->u.vtab.p; + p->u.vtab.p = 0; + + /* Assert that the mutex (if any) associated with the BtShared database + ** that contains table p is held by the caller. See header comments + ** above function sqlite3VtabUnlockList() for an explanation of why + ** this makes it safe to access the sqlite3.pDisconnect list of any + ** database connection that may have an entry in the p->u.vtab.p list. + */ + assert( db==0 || sqlite3SchemaMutexHeld(db, 0, p->pSchema) ); + + while( pVTable ){ + sqlite3 *db2 = pVTable->db; + VTable *pNext = pVTable->pNext; + assert( db2 ); + if( db2==db ){ + pRet = pVTable; + p->u.vtab.p = pRet; + pRet->pNext = 0; + }else{ + pVTable->pNext = db2->pDisconnect; + db2->pDisconnect = pVTable; + } + pVTable = pNext; + } + + assert( !db || pRet ); + return pRet; +} + +/* +** Table *p is a virtual table. This function removes the VTable object +** for table *p associated with database connection db from the linked +** list in p->pVTab. It also decrements the VTable ref count. This is +** used when closing database connection db to free all of its VTable +** objects without disturbing the rest of the Schema object (which may +** be being used by other shared-cache connections). +*/ +SQLITE_PRIVATE void sqlite3VtabDisconnect(sqlite3 *db, Table *p){ + VTable **ppVTab; + + assert( IsVirtual(p) ); + assert( sqlite3BtreeHoldsAllMutexes(db) ); + assert( sqlite3_mutex_held(db->mutex) ); + + for(ppVTab=&p->u.vtab.p; *ppVTab; ppVTab=&(*ppVTab)->pNext){ + if( (*ppVTab)->db==db ){ + VTable *pVTab = *ppVTab; + *ppVTab = pVTab->pNext; + sqlite3VtabUnlock(pVTab); + break; + } + } +} + + +/* +** Disconnect all the virtual table objects in the sqlite3.pDisconnect list. +** +** This function may only be called when the mutexes associated with all +** shared b-tree databases opened using connection db are held by the +** caller. This is done to protect the sqlite3.pDisconnect list. The +** sqlite3.pDisconnect list is accessed only as follows: +** +** 1) By this function. In this case, all BtShared mutexes and the mutex +** associated with the database handle itself must be held. +** +** 2) By function vtabDisconnectAll(), when it adds a VTable entry to +** the sqlite3.pDisconnect list. In this case either the BtShared mutex +** associated with the database the virtual table is stored in is held +** or, if the virtual table is stored in a non-sharable database, then +** the database handle mutex is held. +** +** As a result, a sqlite3.pDisconnect cannot be accessed simultaneously +** by multiple threads. It is thread-safe. +*/ +SQLITE_PRIVATE void sqlite3VtabUnlockList(sqlite3 *db){ + VTable *p = db->pDisconnect; + + assert( sqlite3BtreeHoldsAllMutexes(db) ); + assert( sqlite3_mutex_held(db->mutex) ); + + if( p ){ + db->pDisconnect = 0; + sqlite3ExpirePreparedStatements(db, 0); + do { + VTable *pNext = p->pNext; + sqlite3VtabUnlock(p); + p = pNext; + }while( p ); + } +} + +/* +** Clear any and all virtual-table information from the Table record. +** This routine is called, for example, just before deleting the Table +** record. +** +** Since it is a virtual-table, the Table structure contains a pointer +** to the head of a linked list of VTable structures. Each VTable +** structure is associated with a single sqlite3* user of the schema. +** The reference count of the VTable structure associated with database +** connection db is decremented immediately (which may lead to the +** structure being xDisconnected and free). Any other VTable structures +** in the list are moved to the sqlite3.pDisconnect list of the associated +** database connection. +*/ +SQLITE_PRIVATE void sqlite3VtabClear(sqlite3 *db, Table *p){ + assert( IsVirtual(p) ); + if( !db || db->pnBytesFreed==0 ) vtabDisconnectAll(0, p); + if( p->u.vtab.azArg ){ + int i; + for(i=0; iu.vtab.nArg; i++){ + if( i!=1 ) sqlite3DbFree(db, p->u.vtab.azArg[i]); + } + sqlite3DbFree(db, p->u.vtab.azArg); + } +} + +/* +** Add a new module argument to pTable->u.vtab.azArg[]. +** The string is not copied - the pointer is stored. The +** string will be freed automatically when the table is +** deleted. +*/ +static void addModuleArgument(Parse *pParse, Table *pTable, char *zArg){ + sqlite3_int64 nBytes; + char **azModuleArg; + sqlite3 *db = pParse->db; + + assert( IsVirtual(pTable) ); + nBytes = sizeof(char *)*(2+pTable->u.vtab.nArg); + if( pTable->u.vtab.nArg+3>=db->aLimit[SQLITE_LIMIT_COLUMN] ){ + sqlite3ErrorMsg(pParse, "too many columns on %s", pTable->zName); + } + azModuleArg = sqlite3DbRealloc(db, pTable->u.vtab.azArg, nBytes); + if( azModuleArg==0 ){ + sqlite3DbFree(db, zArg); + }else{ + int i = pTable->u.vtab.nArg++; + azModuleArg[i] = zArg; + azModuleArg[i+1] = 0; + pTable->u.vtab.azArg = azModuleArg; + } +} + +/* +** The parser calls this routine when it first sees a CREATE VIRTUAL TABLE +** statement. The module name has been parsed, but the optional list +** of parameters that follow the module name are still pending. +*/ +SQLITE_PRIVATE void sqlite3VtabBeginParse( + Parse *pParse, /* Parsing context */ + Token *pName1, /* Name of new table, or database name */ + Token *pName2, /* Name of new table or NULL */ + Token *pModuleName, /* Name of the module for the virtual table */ + int ifNotExists /* No error if the table already exists */ +){ + Table *pTable; /* The new virtual table */ + sqlite3 *db; /* Database connection */ + + sqlite3StartTable(pParse, pName1, pName2, 0, 0, 1, ifNotExists); + pTable = pParse->pNewTable; + if( pTable==0 ) return; + assert( 0==pTable->pIndex ); + pTable->eTabType = TABTYP_VTAB; + + db = pParse->db; + + assert( pTable->u.vtab.nArg==0 ); + addModuleArgument(pParse, pTable, sqlite3NameFromToken(db, pModuleName)); + addModuleArgument(pParse, pTable, 0); + addModuleArgument(pParse, pTable, sqlite3DbStrDup(db, pTable->zName)); + assert( (pParse->sNameToken.z==pName2->z && pName2->z!=0) + || (pParse->sNameToken.z==pName1->z && pName2->z==0) + ); + pParse->sNameToken.n = (int)( + &pModuleName->z[pModuleName->n] - pParse->sNameToken.z + ); + +#ifndef SQLITE_OMIT_AUTHORIZATION + /* Creating a virtual table invokes the authorization callback twice. + ** The first invocation, to obtain permission to INSERT a row into the + ** sqlite_schema table, has already been made by sqlite3StartTable(). + ** The second call, to obtain permission to create the table, is made now. + */ + if( pTable->u.vtab.azArg ){ + int iDb = sqlite3SchemaToIndex(db, pTable->pSchema); + assert( iDb>=0 ); /* The database the table is being created in */ + sqlite3AuthCheck(pParse, SQLITE_CREATE_VTABLE, pTable->zName, + pTable->u.vtab.azArg[0], pParse->db->aDb[iDb].zDbSName); + } +#endif +} + +/* +** This routine takes the module argument that has been accumulating +** in pParse->zArg[] and appends it to the list of arguments on the +** virtual table currently under construction in pParse->pTable. +*/ +static void addArgumentToVtab(Parse *pParse){ + if( pParse->sArg.z && pParse->pNewTable ){ + const char *z = (const char*)pParse->sArg.z; + int n = pParse->sArg.n; + sqlite3 *db = pParse->db; + addModuleArgument(pParse, pParse->pNewTable, sqlite3DbStrNDup(db, z, n)); + } +} + +/* +** The parser calls this routine after the CREATE VIRTUAL TABLE statement +** has been completely parsed. +*/ +SQLITE_PRIVATE void sqlite3VtabFinishParse(Parse *pParse, Token *pEnd){ + Table *pTab = pParse->pNewTable; /* The table being constructed */ + sqlite3 *db = pParse->db; /* The database connection */ + + if( pTab==0 ) return; + assert( IsVirtual(pTab) ); + addArgumentToVtab(pParse); + pParse->sArg.z = 0; + if( pTab->u.vtab.nArg<1 ) return; + + /* If the CREATE VIRTUAL TABLE statement is being entered for the + ** first time (in other words if the virtual table is actually being + ** created now instead of just being read out of sqlite_schema) then + ** do additional initialization work and store the statement text + ** in the sqlite_schema table. + */ + if( !db->init.busy ){ + char *zStmt; + char *zWhere; + int iDb; + int iReg; + Vdbe *v; + + sqlite3MayAbort(pParse); + + /* Compute the complete text of the CREATE VIRTUAL TABLE statement */ + if( pEnd ){ + pParse->sNameToken.n = (int)(pEnd->z - pParse->sNameToken.z) + pEnd->n; + } + zStmt = sqlite3MPrintf(db, "CREATE VIRTUAL TABLE %T", &pParse->sNameToken); + + /* A slot for the record has already been allocated in the + ** schema table. We just need to update that slot with all + ** the information we've collected. + ** + ** The VM register number pParse->regRowid holds the rowid of an + ** entry in the sqlite_schema table tht was created for this vtab + ** by sqlite3StartTable(). + */ + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + sqlite3NestedParse(pParse, + "UPDATE %Q." LEGACY_SCHEMA_TABLE " " + "SET type='table', name=%Q, tbl_name=%Q, rootpage=0, sql=%Q " + "WHERE rowid=#%d", + db->aDb[iDb].zDbSName, + pTab->zName, + pTab->zName, + zStmt, + pParse->regRowid + ); + v = sqlite3GetVdbe(pParse); + sqlite3ChangeCookie(pParse, iDb); + + sqlite3VdbeAddOp0(v, OP_Expire); + zWhere = sqlite3MPrintf(db, "name=%Q AND sql=%Q", pTab->zName, zStmt); + sqlite3VdbeAddParseSchemaOp(v, iDb, zWhere, 0); + sqlite3DbFree(db, zStmt); + + iReg = ++pParse->nMem; + sqlite3VdbeLoadString(v, iReg, pTab->zName); + sqlite3VdbeAddOp2(v, OP_VCreate, iDb, iReg); + }else{ + /* If we are rereading the sqlite_schema table create the in-memory + ** record of the table. */ + Table *pOld; + Schema *pSchema = pTab->pSchema; + const char *zName = pTab->zName; + assert( zName!=0 ); + sqlite3MarkAllShadowTablesOf(db, pTab); + pOld = sqlite3HashInsert(&pSchema->tblHash, zName, pTab); + if( pOld ){ + sqlite3OomFault(db); + assert( pTab==pOld ); /* Malloc must have failed inside HashInsert() */ + return; + } + pParse->pNewTable = 0; + } +} + +/* +** The parser calls this routine when it sees the first token +** of an argument to the module name in a CREATE VIRTUAL TABLE statement. +*/ +SQLITE_PRIVATE void sqlite3VtabArgInit(Parse *pParse){ + addArgumentToVtab(pParse); + pParse->sArg.z = 0; + pParse->sArg.n = 0; +} + +/* +** The parser calls this routine for each token after the first token +** in an argument to the module name in a CREATE VIRTUAL TABLE statement. +*/ +SQLITE_PRIVATE void sqlite3VtabArgExtend(Parse *pParse, Token *p){ + Token *pArg = &pParse->sArg; + if( pArg->z==0 ){ + pArg->z = p->z; + pArg->n = p->n; + }else{ + assert(pArg->z <= p->z); + pArg->n = (int)(&p->z[p->n] - pArg->z); + } +} + +/* +** Invoke a virtual table constructor (either xCreate or xConnect). The +** pointer to the function to invoke is passed as the fourth parameter +** to this procedure. +*/ +static int vtabCallConstructor( + sqlite3 *db, + Table *pTab, + Module *pMod, + int (*xConstruct)(sqlite3*,void*,int,const char*const*,sqlite3_vtab**,char**), + char **pzErr +){ + VtabCtx sCtx; + VTable *pVTable; + int rc; + const char *const*azArg; + int nArg = pTab->u.vtab.nArg; + char *zErr = 0; + char *zModuleName; + int iDb; + VtabCtx *pCtx; + + assert( IsVirtual(pTab) ); + azArg = (const char *const*)pTab->u.vtab.azArg; + + /* Check that the virtual-table is not already being initialized */ + for(pCtx=db->pVtabCtx; pCtx; pCtx=pCtx->pPrior){ + if( pCtx->pTab==pTab ){ + *pzErr = sqlite3MPrintf(db, + "vtable constructor called recursively: %s", pTab->zName + ); + return SQLITE_LOCKED; + } + } + + zModuleName = sqlite3DbStrDup(db, pTab->zName); + if( !zModuleName ){ + return SQLITE_NOMEM_BKPT; + } + + pVTable = sqlite3MallocZero(sizeof(VTable)); + if( !pVTable ){ + sqlite3OomFault(db); + sqlite3DbFree(db, zModuleName); + return SQLITE_NOMEM_BKPT; + } + pVTable->db = db; + pVTable->pMod = pMod; + pVTable->eVtabRisk = SQLITE_VTABRISK_Normal; + + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + pTab->u.vtab.azArg[1] = db->aDb[iDb].zDbSName; + + /* Invoke the virtual table constructor */ + assert( &db->pVtabCtx ); + assert( xConstruct ); + sCtx.pTab = pTab; + sCtx.pVTable = pVTable; + sCtx.pPrior = db->pVtabCtx; + sCtx.bDeclared = 0; + db->pVtabCtx = &sCtx; + rc = xConstruct(db, pMod->pAux, nArg, azArg, &pVTable->pVtab, &zErr); + db->pVtabCtx = sCtx.pPrior; + if( rc==SQLITE_NOMEM ) sqlite3OomFault(db); + assert( sCtx.pTab==pTab ); + + if( SQLITE_OK!=rc ){ + if( zErr==0 ){ + *pzErr = sqlite3MPrintf(db, "vtable constructor failed: %s", zModuleName); + }else { + *pzErr = sqlite3MPrintf(db, "%s", zErr); + sqlite3_free(zErr); + } + sqlite3DbFree(db, pVTable); + }else if( ALWAYS(pVTable->pVtab) ){ + /* Justification of ALWAYS(): A correct vtab constructor must allocate + ** the sqlite3_vtab object if successful. */ + memset(pVTable->pVtab, 0, sizeof(pVTable->pVtab[0])); + pVTable->pVtab->pModule = pMod->pModule; + pMod->nRefModule++; + pVTable->nRef = 1; + if( sCtx.bDeclared==0 ){ + const char *zFormat = "vtable constructor did not declare schema: %s"; + *pzErr = sqlite3MPrintf(db, zFormat, pTab->zName); + sqlite3VtabUnlock(pVTable); + rc = SQLITE_ERROR; + }else{ + int iCol; + u16 oooHidden = 0; + /* If everything went according to plan, link the new VTable structure + ** into the linked list headed by pTab->u.vtab.p. Then loop through the + ** columns of the table to see if any of them contain the token "hidden". + ** If so, set the Column COLFLAG_HIDDEN flag and remove the token from + ** the type string. */ + pVTable->pNext = pTab->u.vtab.p; + pTab->u.vtab.p = pVTable; + + for(iCol=0; iColnCol; iCol++){ + char *zType = sqlite3ColumnType(&pTab->aCol[iCol], ""); + int nType; + int i = 0; + nType = sqlite3Strlen30(zType); + for(i=0; i0 ){ + assert(zType[i-1]==' '); + zType[i-1] = '\0'; + } + pTab->aCol[iCol].colFlags |= COLFLAG_HIDDEN; + pTab->tabFlags |= TF_HasHidden; + oooHidden = TF_OOOHidden; + }else{ + pTab->tabFlags |= oooHidden; + } + } + } + } + + sqlite3DbFree(db, zModuleName); + return rc; +} + +/* +** This function is invoked by the parser to call the xConnect() method +** of the virtual table pTab. If an error occurs, an error code is returned +** and an error left in pParse. +** +** This call is a no-op if table pTab is not a virtual table. +*/ +SQLITE_PRIVATE int sqlite3VtabCallConnect(Parse *pParse, Table *pTab){ + sqlite3 *db = pParse->db; + const char *zMod; + Module *pMod; + int rc; + + assert( pTab ); + assert( IsVirtual(pTab) ); + if( sqlite3GetVTable(db, pTab) ){ + return SQLITE_OK; + } + + /* Locate the required virtual table module */ + zMod = pTab->u.vtab.azArg[0]; + pMod = (Module*)sqlite3HashFind(&db->aModule, zMod); + + if( !pMod ){ + const char *zModule = pTab->u.vtab.azArg[0]; + sqlite3ErrorMsg(pParse, "no such module: %s", zModule); + rc = SQLITE_ERROR; + }else{ + char *zErr = 0; + rc = vtabCallConstructor(db, pTab, pMod, pMod->pModule->xConnect, &zErr); + if( rc!=SQLITE_OK ){ + sqlite3ErrorMsg(pParse, "%s", zErr); + pParse->rc = rc; + } + sqlite3DbFree(db, zErr); + } + + return rc; +} +/* +** Grow the db->aVTrans[] array so that there is room for at least one +** more v-table. Return SQLITE_NOMEM if a malloc fails, or SQLITE_OK otherwise. +*/ +static int growVTrans(sqlite3 *db){ + const int ARRAY_INCR = 5; + + /* Grow the sqlite3.aVTrans array if required */ + if( (db->nVTrans%ARRAY_INCR)==0 ){ + VTable **aVTrans; + sqlite3_int64 nBytes = sizeof(sqlite3_vtab*)* + ((sqlite3_int64)db->nVTrans + ARRAY_INCR); + aVTrans = sqlite3DbRealloc(db, (void *)db->aVTrans, nBytes); + if( !aVTrans ){ + return SQLITE_NOMEM_BKPT; + } + memset(&aVTrans[db->nVTrans], 0, sizeof(sqlite3_vtab *)*ARRAY_INCR); + db->aVTrans = aVTrans; + } + + return SQLITE_OK; +} + +/* +** Add the virtual table pVTab to the array sqlite3.aVTrans[]. Space should +** have already been reserved using growVTrans(). +*/ +static void addToVTrans(sqlite3 *db, VTable *pVTab){ + /* Add pVtab to the end of sqlite3.aVTrans */ + db->aVTrans[db->nVTrans++] = pVTab; + sqlite3VtabLock(pVTab); +} + +/* +** This function is invoked by the vdbe to call the xCreate method +** of the virtual table named zTab in database iDb. +** +** If an error occurs, *pzErr is set to point to an English language +** description of the error and an SQLITE_XXX error code is returned. +** In this case the caller must call sqlite3DbFree(db, ) on *pzErr. +*/ +SQLITE_PRIVATE int sqlite3VtabCallCreate(sqlite3 *db, int iDb, const char *zTab, char **pzErr){ + int rc = SQLITE_OK; + Table *pTab; + Module *pMod; + const char *zMod; + + pTab = sqlite3FindTable(db, zTab, db->aDb[iDb].zDbSName); + assert( pTab && IsVirtual(pTab) && !pTab->u.vtab.p ); + + /* Locate the required virtual table module */ + zMod = pTab->u.vtab.azArg[0]; + pMod = (Module*)sqlite3HashFind(&db->aModule, zMod); + + /* If the module has been registered and includes a Create method, + ** invoke it now. If the module has not been registered, return an + ** error. Otherwise, do nothing. + */ + if( pMod==0 || pMod->pModule->xCreate==0 || pMod->pModule->xDestroy==0 ){ + *pzErr = sqlite3MPrintf(db, "no such module: %s", zMod); + rc = SQLITE_ERROR; + }else{ + rc = vtabCallConstructor(db, pTab, pMod, pMod->pModule->xCreate, pzErr); + } + + /* Justification of ALWAYS(): The xConstructor method is required to + ** create a valid sqlite3_vtab if it returns SQLITE_OK. */ + if( rc==SQLITE_OK && ALWAYS(sqlite3GetVTable(db, pTab)) ){ + rc = growVTrans(db); + if( rc==SQLITE_OK ){ + addToVTrans(db, sqlite3GetVTable(db, pTab)); + } + } + + return rc; +} + +/* +** This function is used to set the schema of a virtual table. It is only +** valid to call this function from within the xCreate() or xConnect() of a +** virtual table module. +*/ +SQLITE_API int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){ + VtabCtx *pCtx; + int rc = SQLITE_OK; + Table *pTab; + Parse sParse; + int initBusy; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zCreateTable==0 ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + pCtx = db->pVtabCtx; + if( !pCtx || pCtx->bDeclared ){ + sqlite3Error(db, SQLITE_MISUSE); + sqlite3_mutex_leave(db->mutex); + return SQLITE_MISUSE_BKPT; + } + pTab = pCtx->pTab; + assert( IsVirtual(pTab) ); + + sqlite3ParseObjectInit(&sParse, db); + sParse.eParseMode = PARSE_MODE_DECLARE_VTAB; + sParse.disableTriggers = 1; + /* We should never be able to reach this point while loading the + ** schema. Nevertheless, defend against that (turn off db->init.busy) + ** in case a bug arises. */ + assert( db->init.busy==0 ); + initBusy = db->init.busy; + db->init.busy = 0; + sParse.nQueryLoop = 1; + if( SQLITE_OK==sqlite3RunParser(&sParse, zCreateTable) + && ALWAYS(sParse.pNewTable!=0) + && ALWAYS(!db->mallocFailed) + && IsOrdinaryTable(sParse.pNewTable) + ){ + assert( sParse.zErrMsg==0 ); + if( !pTab->aCol ){ + Table *pNew = sParse.pNewTable; + Index *pIdx; + pTab->aCol = pNew->aCol; + sqlite3ExprListDelete(db, pNew->u.tab.pDfltList); + pTab->nNVCol = pTab->nCol = pNew->nCol; + pTab->tabFlags |= pNew->tabFlags & (TF_WithoutRowid|TF_NoVisibleRowid); + pNew->nCol = 0; + pNew->aCol = 0; + assert( pTab->pIndex==0 ); + assert( HasRowid(pNew) || sqlite3PrimaryKeyIndex(pNew)!=0 ); + if( !HasRowid(pNew) + && pCtx->pVTable->pMod->pModule->xUpdate!=0 + && sqlite3PrimaryKeyIndex(pNew)->nKeyCol!=1 + ){ + /* WITHOUT ROWID virtual tables must either be read-only (xUpdate==0) + ** or else must have a single-column PRIMARY KEY */ + rc = SQLITE_ERROR; + } + pIdx = pNew->pIndex; + if( pIdx ){ + assert( pIdx->pNext==0 ); + pTab->pIndex = pIdx; + pNew->pIndex = 0; + pIdx->pTable = pTab; + } + } + pCtx->bDeclared = 1; + }else{ + sqlite3ErrorWithMsg(db, SQLITE_ERROR, + (sParse.zErrMsg ? "%s" : 0), sParse.zErrMsg); + sqlite3DbFree(db, sParse.zErrMsg); + rc = SQLITE_ERROR; + } + sParse.eParseMode = PARSE_MODE_NORMAL; + + if( sParse.pVdbe ){ + sqlite3VdbeFinalize(sParse.pVdbe); + } + sqlite3DeleteTable(db, sParse.pNewTable); + sqlite3ParseObjectReset(&sParse); + db->init.busy = initBusy; + + assert( (rc&0xff)==rc ); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** This function is invoked by the vdbe to call the xDestroy method +** of the virtual table named zTab in database iDb. This occurs +** when a DROP TABLE is mentioned. +** +** This call is a no-op if zTab is not a virtual table. +*/ +SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3 *db, int iDb, const char *zTab){ + int rc = SQLITE_OK; + Table *pTab; + + pTab = sqlite3FindTable(db, zTab, db->aDb[iDb].zDbSName); + if( ALWAYS(pTab!=0) + && ALWAYS(IsVirtual(pTab)) + && ALWAYS(pTab->u.vtab.p!=0) + ){ + VTable *p; + int (*xDestroy)(sqlite3_vtab *); + for(p=pTab->u.vtab.p; p; p=p->pNext){ + assert( p->pVtab ); + if( p->pVtab->nRef>0 ){ + return SQLITE_LOCKED; + } + } + p = vtabDisconnectAll(db, pTab); + xDestroy = p->pMod->pModule->xDestroy; + if( xDestroy==0 ) xDestroy = p->pMod->pModule->xDisconnect; + assert( xDestroy!=0 ); + pTab->nTabRef++; + rc = xDestroy(p->pVtab); + /* Remove the sqlite3_vtab* from the aVTrans[] array, if applicable */ + if( rc==SQLITE_OK ){ + assert( pTab->u.vtab.p==p && p->pNext==0 ); + p->pVtab = 0; + pTab->u.vtab.p = 0; + sqlite3VtabUnlock(p); + } + sqlite3DeleteTable(db, pTab); + } + + return rc; +} + +/* +** This function invokes either the xRollback or xCommit method +** of each of the virtual tables in the sqlite3.aVTrans array. The method +** called is identified by the second argument, "offset", which is +** the offset of the method to call in the sqlite3_module structure. +** +** The array is cleared after invoking the callbacks. +*/ +static void callFinaliser(sqlite3 *db, int offset){ + int i; + if( db->aVTrans ){ + VTable **aVTrans = db->aVTrans; + db->aVTrans = 0; + for(i=0; inVTrans; i++){ + VTable *pVTab = aVTrans[i]; + sqlite3_vtab *p = pVTab->pVtab; + if( p ){ + int (*x)(sqlite3_vtab *); + x = *(int (**)(sqlite3_vtab *))((char *)p->pModule + offset); + if( x ) x(p); + } + pVTab->iSavepoint = 0; + sqlite3VtabUnlock(pVTab); + } + sqlite3DbFree(db, aVTrans); + db->nVTrans = 0; + } +} + +/* +** Invoke the xSync method of all virtual tables in the sqlite3.aVTrans +** array. Return the error code for the first error that occurs, or +** SQLITE_OK if all xSync operations are successful. +** +** If an error message is available, leave it in p->zErrMsg. +*/ +SQLITE_PRIVATE int sqlite3VtabSync(sqlite3 *db, Vdbe *p){ + int i; + int rc = SQLITE_OK; + VTable **aVTrans = db->aVTrans; + + db->aVTrans = 0; + for(i=0; rc==SQLITE_OK && inVTrans; i++){ + int (*x)(sqlite3_vtab *); + sqlite3_vtab *pVtab = aVTrans[i]->pVtab; + if( pVtab && (x = pVtab->pModule->xSync)!=0 ){ + rc = x(pVtab); + sqlite3VtabImportErrmsg(p, pVtab); + } + } + db->aVTrans = aVTrans; + return rc; +} + +/* +** Invoke the xRollback method of all virtual tables in the +** sqlite3.aVTrans array. Then clear the array itself. +*/ +SQLITE_PRIVATE int sqlite3VtabRollback(sqlite3 *db){ + callFinaliser(db, offsetof(sqlite3_module,xRollback)); + return SQLITE_OK; +} + +/* +** Invoke the xCommit method of all virtual tables in the +** sqlite3.aVTrans array. Then clear the array itself. +*/ +SQLITE_PRIVATE int sqlite3VtabCommit(sqlite3 *db){ + callFinaliser(db, offsetof(sqlite3_module,xCommit)); + return SQLITE_OK; +} + +/* +** If the virtual table pVtab supports the transaction interface +** (xBegin/xRollback/xCommit and optionally xSync) and a transaction is +** not currently open, invoke the xBegin method now. +** +** If the xBegin call is successful, place the sqlite3_vtab pointer +** in the sqlite3.aVTrans array. +*/ +SQLITE_PRIVATE int sqlite3VtabBegin(sqlite3 *db, VTable *pVTab){ + int rc = SQLITE_OK; + const sqlite3_module *pModule; + + /* Special case: If db->aVTrans is NULL and db->nVTrans is greater + ** than zero, then this function is being called from within a + ** virtual module xSync() callback. It is illegal to write to + ** virtual module tables in this case, so return SQLITE_LOCKED. + */ + if( sqlite3VtabInSync(db) ){ + return SQLITE_LOCKED; + } + if( !pVTab ){ + return SQLITE_OK; + } + pModule = pVTab->pVtab->pModule; + + if( pModule->xBegin ){ + int i; + + /* If pVtab is already in the aVTrans array, return early */ + for(i=0; inVTrans; i++){ + if( db->aVTrans[i]==pVTab ){ + return SQLITE_OK; + } + } + + /* Invoke the xBegin method. If successful, add the vtab to the + ** sqlite3.aVTrans[] array. */ + rc = growVTrans(db); + if( rc==SQLITE_OK ){ + rc = pModule->xBegin(pVTab->pVtab); + if( rc==SQLITE_OK ){ + int iSvpt = db->nStatement + db->nSavepoint; + addToVTrans(db, pVTab); + if( iSvpt && pModule->xSavepoint ){ + pVTab->iSavepoint = iSvpt; + rc = pModule->xSavepoint(pVTab->pVtab, iSvpt-1); + } + } + } + } + return rc; +} + +/* +** Invoke either the xSavepoint, xRollbackTo or xRelease method of all +** virtual tables that currently have an open transaction. Pass iSavepoint +** as the second argument to the virtual table method invoked. +** +** If op is SAVEPOINT_BEGIN, the xSavepoint method is invoked. If it is +** SAVEPOINT_ROLLBACK, the xRollbackTo method. Otherwise, if op is +** SAVEPOINT_RELEASE, then the xRelease method of each virtual table with +** an open transaction is invoked. +** +** If any virtual table method returns an error code other than SQLITE_OK, +** processing is abandoned and the error returned to the caller of this +** function immediately. If all calls to virtual table methods are successful, +** SQLITE_OK is returned. +*/ +SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *db, int op, int iSavepoint){ + int rc = SQLITE_OK; + + assert( op==SAVEPOINT_RELEASE||op==SAVEPOINT_ROLLBACK||op==SAVEPOINT_BEGIN ); + assert( iSavepoint>=-1 ); + if( db->aVTrans ){ + int i; + for(i=0; rc==SQLITE_OK && inVTrans; i++){ + VTable *pVTab = db->aVTrans[i]; + const sqlite3_module *pMod = pVTab->pMod->pModule; + if( pVTab->pVtab && pMod->iVersion>=2 ){ + int (*xMethod)(sqlite3_vtab *, int); + sqlite3VtabLock(pVTab); + switch( op ){ + case SAVEPOINT_BEGIN: + xMethod = pMod->xSavepoint; + pVTab->iSavepoint = iSavepoint+1; + break; + case SAVEPOINT_ROLLBACK: + xMethod = pMod->xRollbackTo; + break; + default: + xMethod = pMod->xRelease; + break; + } + if( xMethod && pVTab->iSavepoint>iSavepoint ){ + rc = xMethod(pVTab->pVtab, iSavepoint); + } + sqlite3VtabUnlock(pVTab); + } + } + } + return rc; +} + +/* +** The first parameter (pDef) is a function implementation. The +** second parameter (pExpr) is the first argument to this function. +** If pExpr is a column in a virtual table, then let the virtual +** table implementation have an opportunity to overload the function. +** +** This routine is used to allow virtual table implementations to +** overload MATCH, LIKE, GLOB, and REGEXP operators. +** +** Return either the pDef argument (indicating no change) or a +** new FuncDef structure that is marked as ephemeral using the +** SQLITE_FUNC_EPHEM flag. +*/ +SQLITE_PRIVATE FuncDef *sqlite3VtabOverloadFunction( + sqlite3 *db, /* Database connection for reporting malloc problems */ + FuncDef *pDef, /* Function to possibly overload */ + int nArg, /* Number of arguments to the function */ + Expr *pExpr /* First argument to the function */ +){ + Table *pTab; + sqlite3_vtab *pVtab; + sqlite3_module *pMod; + void (*xSFunc)(sqlite3_context*,int,sqlite3_value**) = 0; + void *pArg = 0; + FuncDef *pNew; + int rc = 0; + + /* Check to see the left operand is a column in a virtual table */ + if( NEVER(pExpr==0) ) return pDef; + if( pExpr->op!=TK_COLUMN ) return pDef; + assert( ExprUseYTab(pExpr) ); + pTab = pExpr->y.pTab; + if( pTab==0 ) return pDef; + if( !IsVirtual(pTab) ) return pDef; + pVtab = sqlite3GetVTable(db, pTab)->pVtab; + assert( pVtab!=0 ); + assert( pVtab->pModule!=0 ); + pMod = (sqlite3_module *)pVtab->pModule; + if( pMod->xFindFunction==0 ) return pDef; + + /* Call the xFindFunction method on the virtual table implementation + ** to see if the implementation wants to overload this function. + ** + ** Though undocumented, we have historically always invoked xFindFunction + ** with an all lower-case function name. Continue in this tradition to + ** avoid any chance of an incompatibility. + */ +#ifdef SQLITE_DEBUG + { + int i; + for(i=0; pDef->zName[i]; i++){ + unsigned char x = (unsigned char)pDef->zName[i]; + assert( x==sqlite3UpperToLower[x] ); + } + } +#endif + rc = pMod->xFindFunction(pVtab, nArg, pDef->zName, &xSFunc, &pArg); + if( rc==0 ){ + return pDef; + } + + /* Create a new ephemeral function definition for the overloaded + ** function */ + pNew = sqlite3DbMallocZero(db, sizeof(*pNew) + + sqlite3Strlen30(pDef->zName) + 1); + if( pNew==0 ){ + return pDef; + } + *pNew = *pDef; + pNew->zName = (const char*)&pNew[1]; + memcpy((char*)&pNew[1], pDef->zName, sqlite3Strlen30(pDef->zName)+1); + pNew->xSFunc = xSFunc; + pNew->pUserData = pArg; + pNew->funcFlags |= SQLITE_FUNC_EPHEM; + return pNew; +} + +/* +** Make sure virtual table pTab is contained in the pParse->apVirtualLock[] +** array so that an OP_VBegin will get generated for it. Add pTab to the +** array if it is missing. If pTab is already in the array, this routine +** is a no-op. +*/ +SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse *pParse, Table *pTab){ + Parse *pToplevel = sqlite3ParseToplevel(pParse); + int i, n; + Table **apVtabLock; + + assert( IsVirtual(pTab) ); + for(i=0; inVtabLock; i++){ + if( pTab==pToplevel->apVtabLock[i] ) return; + } + n = (pToplevel->nVtabLock+1)*sizeof(pToplevel->apVtabLock[0]); + apVtabLock = sqlite3Realloc(pToplevel->apVtabLock, n); + if( apVtabLock ){ + pToplevel->apVtabLock = apVtabLock; + pToplevel->apVtabLock[pToplevel->nVtabLock++] = pTab; + }else{ + sqlite3OomFault(pToplevel->db); + } +} + +/* +** Check to see if virtual table module pMod can be have an eponymous +** virtual table instance. If it can, create one if one does not already +** exist. Return non-zero if either the eponymous virtual table instance +** exists when this routine returns or if an attempt to create it failed +** and an error message was left in pParse. +** +** An eponymous virtual table instance is one that is named after its +** module, and more importantly, does not require a CREATE VIRTUAL TABLE +** statement in order to come into existance. Eponymous virtual table +** instances always exist. They cannot be DROP-ed. +** +** Any virtual table module for which xConnect and xCreate are the same +** method can have an eponymous virtual table instance. +*/ +SQLITE_PRIVATE int sqlite3VtabEponymousTableInit(Parse *pParse, Module *pMod){ + const sqlite3_module *pModule = pMod->pModule; + Table *pTab; + char *zErr = 0; + int rc; + sqlite3 *db = pParse->db; + if( pMod->pEpoTab ) return 1; + if( pModule->xCreate!=0 && pModule->xCreate!=pModule->xConnect ) return 0; + pTab = sqlite3DbMallocZero(db, sizeof(Table)); + if( pTab==0 ) return 0; + pTab->zName = sqlite3DbStrDup(db, pMod->zName); + if( pTab->zName==0 ){ + sqlite3DbFree(db, pTab); + return 0; + } + pMod->pEpoTab = pTab; + pTab->nTabRef = 1; + pTab->eTabType = TABTYP_VTAB; + pTab->pSchema = db->aDb[0].pSchema; + assert( pTab->u.vtab.nArg==0 ); + pTab->iPKey = -1; + pTab->tabFlags |= TF_Eponymous; + addModuleArgument(pParse, pTab, sqlite3DbStrDup(db, pTab->zName)); + addModuleArgument(pParse, pTab, 0); + addModuleArgument(pParse, pTab, sqlite3DbStrDup(db, pTab->zName)); + rc = vtabCallConstructor(db, pTab, pMod, pModule->xConnect, &zErr); + if( rc ){ + sqlite3ErrorMsg(pParse, "%s", zErr); + sqlite3DbFree(db, zErr); + sqlite3VtabEponymousTableClear(db, pMod); + } + return 1; +} + +/* +** Erase the eponymous virtual table instance associated with +** virtual table module pMod, if it exists. +*/ +SQLITE_PRIVATE void sqlite3VtabEponymousTableClear(sqlite3 *db, Module *pMod){ + Table *pTab = pMod->pEpoTab; + if( pTab!=0 ){ + /* Mark the table as Ephemeral prior to deleting it, so that the + ** sqlite3DeleteTable() routine will know that it is not stored in + ** the schema. */ + pTab->tabFlags |= TF_Ephemeral; + sqlite3DeleteTable(db, pTab); + pMod->pEpoTab = 0; + } +} + +/* +** Return the ON CONFLICT resolution mode in effect for the virtual +** table update operation currently in progress. +** +** The results of this routine are undefined unless it is called from +** within an xUpdate method. +*/ +SQLITE_API int sqlite3_vtab_on_conflict(sqlite3 *db){ + static const unsigned char aMap[] = { + SQLITE_ROLLBACK, SQLITE_ABORT, SQLITE_FAIL, SQLITE_IGNORE, SQLITE_REPLACE + }; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + assert( OE_Rollback==1 && OE_Abort==2 && OE_Fail==3 ); + assert( OE_Ignore==4 && OE_Replace==5 ); + assert( db->vtabOnConflict>=1 && db->vtabOnConflict<=5 ); + return (int)aMap[db->vtabOnConflict-1]; +} + +/* +** Call from within the xCreate() or xConnect() methods to provide +** the SQLite core with additional information about the behavior +** of the virtual table being implemented. +*/ +SQLITE_API int sqlite3_vtab_config(sqlite3 *db, int op, ...){ + va_list ap; + int rc = SQLITE_OK; + VtabCtx *p; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + p = db->pVtabCtx; + if( !p ){ + rc = SQLITE_MISUSE_BKPT; + }else{ + assert( p->pTab==0 || IsVirtual(p->pTab) ); + va_start(ap, op); + switch( op ){ + case SQLITE_VTAB_CONSTRAINT_SUPPORT: { + p->pVTable->bConstraint = (u8)va_arg(ap, int); + break; + } + case SQLITE_VTAB_INNOCUOUS: { + p->pVTable->eVtabRisk = SQLITE_VTABRISK_Low; + break; + } + case SQLITE_VTAB_DIRECTONLY: { + p->pVTable->eVtabRisk = SQLITE_VTABRISK_High; + break; + } + default: { + rc = SQLITE_MISUSE_BKPT; + break; + } + } + va_end(ap); + } + + if( rc!=SQLITE_OK ) sqlite3Error(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/************** End of vtab.c ************************************************/ +/************** Begin file wherecode.c ***************************************/ +/* +** 2015-06-06 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. +** +** This file was split off from where.c on 2015-06-06 in order to reduce the +** size of where.c and make it easier to edit. This file contains the routines +** that actually generate the bulk of the WHERE loop code. The original where.c +** file retains the code that does query planning and analysis. +*/ +/* #include "sqliteInt.h" */ +/************** Include whereInt.h in the middle of wherecode.c **************/ +/************** Begin file whereInt.h ****************************************/ +/* +** 2013-11-12 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains structure and macro definitions for the query +** planner logic in "where.c". These definitions are broken out into +** a separate source file for easier editing. +*/ +#ifndef SQLITE_WHEREINT_H +#define SQLITE_WHEREINT_H + + +/* Forward references +*/ +typedef struct WhereClause WhereClause; +typedef struct WhereMaskSet WhereMaskSet; +typedef struct WhereOrInfo WhereOrInfo; +typedef struct WhereAndInfo WhereAndInfo; +typedef struct WhereLevel WhereLevel; +typedef struct WhereLoop WhereLoop; +typedef struct WherePath WherePath; +typedef struct WhereTerm WhereTerm; +typedef struct WhereLoopBuilder WhereLoopBuilder; +typedef struct WhereScan WhereScan; +typedef struct WhereOrCost WhereOrCost; +typedef struct WhereOrSet WhereOrSet; + +/* +** This object contains information needed to implement a single nested +** loop in WHERE clause. +** +** Contrast this object with WhereLoop. This object describes the +** implementation of the loop. WhereLoop describes the algorithm. +** This object contains a pointer to the WhereLoop algorithm as one of +** its elements. +** +** The WhereInfo object contains a single instance of this object for +** each term in the FROM clause (which is to say, for each of the +** nested loops as implemented). The order of WhereLevel objects determines +** the loop nested order, with WhereInfo.a[0] being the outer loop and +** WhereInfo.a[WhereInfo.nLevel-1] being the inner loop. +*/ +struct WhereLevel { + int iLeftJoin; /* Memory cell used to implement LEFT OUTER JOIN */ + int iTabCur; /* The VDBE cursor used to access the table */ + int iIdxCur; /* The VDBE cursor used to access pIdx */ + int addrBrk; /* Jump here to break out of the loop */ + int addrNxt; /* Jump here to start the next IN combination */ + int addrSkip; /* Jump here for next iteration of skip-scan */ + int addrCont; /* Jump here to continue with the next loop cycle */ + int addrFirst; /* First instruction of interior of the loop */ + int addrBody; /* Beginning of the body of this loop */ + int regBignull; /* big-null flag reg. True if a NULL-scan is needed */ + int addrBignull; /* Jump here for next part of big-null scan */ +#ifndef SQLITE_LIKE_DOESNT_MATCH_BLOBS + u32 iLikeRepCntr; /* LIKE range processing counter register (times 2) */ + int addrLikeRep; /* LIKE range processing address */ +#endif + int regFilter; /* Bloom filter */ + u8 iFrom; /* Which entry in the FROM clause */ + u8 op, p3, p5; /* Opcode, P3 & P5 of the opcode that ends the loop */ + int p1, p2; /* Operands of the opcode used to end the loop */ + union { /* Information that depends on pWLoop->wsFlags */ + struct { + int nIn; /* Number of entries in aInLoop[] */ + struct InLoop { + int iCur; /* The VDBE cursor used by this IN operator */ + int addrInTop; /* Top of the IN loop */ + int iBase; /* Base register of multi-key index record */ + int nPrefix; /* Number of prior entires in the key */ + u8 eEndLoopOp; /* IN Loop terminator. OP_Next or OP_Prev */ + } *aInLoop; /* Information about each nested IN operator */ + } in; /* Used when pWLoop->wsFlags&WHERE_IN_ABLE */ + Index *pCoveringIdx; /* Possible covering index for WHERE_MULTI_OR */ + } u; + struct WhereLoop *pWLoop; /* The selected WhereLoop object */ + Bitmask notReady; /* FROM entries not usable at this level */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + int addrVisit; /* Address at which row is visited */ +#endif +}; + +/* +** Each instance of this object represents an algorithm for evaluating one +** term of a join. Every term of the FROM clause will have at least +** one corresponding WhereLoop object (unless INDEXED BY constraints +** prevent a query solution - which is an error) and many terms of the +** FROM clause will have multiple WhereLoop objects, each describing a +** potential way of implementing that FROM-clause term, together with +** dependencies and cost estimates for using the chosen algorithm. +** +** Query planning consists of building up a collection of these WhereLoop +** objects, then computing a particular sequence of WhereLoop objects, with +** one WhereLoop object per FROM clause term, that satisfy all dependencies +** and that minimize the overall cost. +*/ +struct WhereLoop { + Bitmask prereq; /* Bitmask of other loops that must run first */ + Bitmask maskSelf; /* Bitmask identifying table iTab */ +#ifdef SQLITE_DEBUG + char cId; /* Symbolic ID of this loop for debugging use */ +#endif + u8 iTab; /* Position in FROM clause of table for this loop */ + u8 iSortIdx; /* Sorting index number. 0==None */ + LogEst rSetup; /* One-time setup cost (ex: create transient index) */ + LogEst rRun; /* Cost of running each loop */ + LogEst nOut; /* Estimated number of output rows */ + union { + struct { /* Information for internal btree tables */ + u16 nEq; /* Number of equality constraints */ + u16 nBtm; /* Size of BTM vector */ + u16 nTop; /* Size of TOP vector */ + u16 nDistinctCol; /* Index columns used to sort for DISTINCT */ + Index *pIndex; /* Index used, or NULL */ + } btree; + struct { /* Information for virtual tables */ + int idxNum; /* Index number */ + u32 needFree : 1; /* True if sqlite3_free(idxStr) is needed */ + u32 bOmitOffset : 1; /* True to let virtual table handle offset */ + i8 isOrdered; /* True if satisfies ORDER BY */ + u16 omitMask; /* Terms that may be omitted */ + char *idxStr; /* Index identifier string */ + u32 mHandleIn; /* Terms to handle as IN(...) instead of == */ + } vtab; + } u; + u32 wsFlags; /* WHERE_* flags describing the plan */ + u16 nLTerm; /* Number of entries in aLTerm[] */ + u16 nSkip; /* Number of NULL aLTerm[] entries */ + /**** whereLoopXfer() copies fields above ***********************/ +# define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot) + u16 nLSlot; /* Number of slots allocated for aLTerm[] */ + WhereTerm **aLTerm; /* WhereTerms used */ + WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */ + WhereTerm *aLTermSpace[3]; /* Initial aLTerm[] space */ +}; + +/* This object holds the prerequisites and the cost of running a +** subquery on one operand of an OR operator in the WHERE clause. +** See WhereOrSet for additional information +*/ +struct WhereOrCost { + Bitmask prereq; /* Prerequisites */ + LogEst rRun; /* Cost of running this subquery */ + LogEst nOut; /* Number of outputs for this subquery */ +}; + +/* The WhereOrSet object holds a set of possible WhereOrCosts that +** correspond to the subquery(s) of OR-clause processing. Only the +** best N_OR_COST elements are retained. +*/ +#define N_OR_COST 3 +struct WhereOrSet { + u16 n; /* Number of valid a[] entries */ + WhereOrCost a[N_OR_COST]; /* Set of best costs */ +}; + +/* +** Each instance of this object holds a sequence of WhereLoop objects +** that implement some or all of a query plan. +** +** Think of each WhereLoop object as a node in a graph with arcs +** showing dependencies and costs for travelling between nodes. (That is +** not a completely accurate description because WhereLoop costs are a +** vector, not a scalar, and because dependencies are many-to-one, not +** one-to-one as are graph nodes. But it is a useful visualization aid.) +** Then a WherePath object is a path through the graph that visits some +** or all of the WhereLoop objects once. +** +** The "solver" works by creating the N best WherePath objects of length +** 1. Then using those as a basis to compute the N best WherePath objects +** of length 2. And so forth until the length of WherePaths equals the +** number of nodes in the FROM clause. The best (lowest cost) WherePath +** at the end is the chosen query plan. +*/ +struct WherePath { + Bitmask maskLoop; /* Bitmask of all WhereLoop objects in this path */ + Bitmask revLoop; /* aLoop[]s that should be reversed for ORDER BY */ + LogEst nRow; /* Estimated number of rows generated by this path */ + LogEst rCost; /* Total cost of this path */ + LogEst rUnsorted; /* Total cost of this path ignoring sorting costs */ + i8 isOrdered; /* No. of ORDER BY terms satisfied. -1 for unknown */ + WhereLoop **aLoop; /* Array of WhereLoop objects implementing this path */ +}; + +/* +** The query generator uses an array of instances of this structure to +** help it analyze the subexpressions of the WHERE clause. Each WHERE +** clause subexpression is separated from the others by AND operators, +** usually, or sometimes subexpressions separated by OR. +** +** All WhereTerms are collected into a single WhereClause structure. +** The following identity holds: +** +** WhereTerm.pWC->a[WhereTerm.idx] == WhereTerm +** +** When a term is of the form: +** +** X +** +** where X is a column name and is one of certain operators, +** then WhereTerm.leftCursor and WhereTerm.u.leftColumn record the +** cursor number and column number for X. WhereTerm.eOperator records +** the using a bitmask encoding defined by WO_xxx below. The +** use of a bitmask encoding for the operator allows us to search +** quickly for terms that match any of several different operators. +** +** A WhereTerm might also be two or more subterms connected by OR: +** +** (t1.X ) OR (t1.Y ) OR .... +** +** In this second case, wtFlag has the TERM_ORINFO bit set and eOperator==WO_OR +** and the WhereTerm.u.pOrInfo field points to auxiliary information that +** is collected about the OR clause. +** +** If a term in the WHERE clause does not match either of the two previous +** categories, then eOperator==0. The WhereTerm.pExpr field is still set +** to the original subexpression content and wtFlags is set up appropriately +** but no other fields in the WhereTerm object are meaningful. +** +** When eOperator!=0, prereqRight and prereqAll record sets of cursor numbers, +** but they do so indirectly. A single WhereMaskSet structure translates +** cursor number into bits and the translated bit is stored in the prereq +** fields. The translation is used in order to maximize the number of +** bits that will fit in a Bitmask. The VDBE cursor numbers might be +** spread out over the non-negative integers. For example, the cursor +** numbers might be 3, 8, 9, 10, 20, 23, 41, and 45. The WhereMaskSet +** translates these sparse cursor numbers into consecutive integers +** beginning with 0 in order to make the best possible use of the available +** bits in the Bitmask. So, in the example above, the cursor numbers +** would be mapped into integers 0 through 7. +** +** The number of terms in a join is limited by the number of bits +** in prereqRight and prereqAll. The default is 64 bits, hence SQLite +** is only able to process joins with 64 or fewer tables. +*/ +struct WhereTerm { + Expr *pExpr; /* Pointer to the subexpression that is this term */ + WhereClause *pWC; /* The clause this term is part of */ + LogEst truthProb; /* Probability of truth for this expression */ + u16 wtFlags; /* TERM_xxx bit flags. See below */ + u16 eOperator; /* A WO_xx value describing */ + u8 nChild; /* Number of children that must disable us */ + u8 eMatchOp; /* Op for vtab MATCH/LIKE/GLOB/REGEXP terms */ + int iParent; /* Disable pWC->a[iParent] when this term disabled */ + int leftCursor; /* Cursor number of X in "X " */ + union { + struct { + int leftColumn; /* Column number of X in "X " */ + int iField; /* Field in (?,?,?) IN (SELECT...) vector */ + } x; /* Opcode other than OP_OR or OP_AND */ + WhereOrInfo *pOrInfo; /* Extra information if (eOperator & WO_OR)!=0 */ + WhereAndInfo *pAndInfo; /* Extra information if (eOperator& WO_AND)!=0 */ + } u; + Bitmask prereqRight; /* Bitmask of tables used by pExpr->pRight */ + Bitmask prereqAll; /* Bitmask of tables referenced by pExpr */ +}; + +/* +** Allowed values of WhereTerm.wtFlags +*/ +#define TERM_DYNAMIC 0x0001 /* Need to call sqlite3ExprDelete(db, pExpr) */ +#define TERM_VIRTUAL 0x0002 /* Added by the optimizer. Do not code */ +#define TERM_CODED 0x0004 /* This term is already coded */ +#define TERM_COPIED 0x0008 /* Has a child */ +#define TERM_ORINFO 0x0010 /* Need to free the WhereTerm.u.pOrInfo object */ +#define TERM_ANDINFO 0x0020 /* Need to free the WhereTerm.u.pAndInfo obj */ +#define TERM_OK 0x0040 /* Used during OR-clause processing */ +#define TERM_VNULL 0x0080 /* Manufactured x>NULL or x<=NULL term */ +#define TERM_LIKEOPT 0x0100 /* Virtual terms from the LIKE optimization */ +#define TERM_LIKECOND 0x0200 /* Conditionally this LIKE operator term */ +#define TERM_LIKE 0x0400 /* The original LIKE operator */ +#define TERM_IS 0x0800 /* Term.pExpr is an IS operator */ +#define TERM_VARSELECT 0x1000 /* Term.pExpr contains a correlated sub-query */ +#define TERM_HEURTRUTH 0x2000 /* Heuristic truthProb used */ +#ifdef SQLITE_ENABLE_STAT4 +# define TERM_HIGHTRUTH 0x4000 /* Term excludes few rows */ +#else +# define TERM_HIGHTRUTH 0 /* Only used with STAT4 */ +#endif +#define TERM_SLICE 0x8000 /* One slice of a row-value/vector comparison */ + +/* +** An instance of the WhereScan object is used as an iterator for locating +** terms in the WHERE clause that are useful to the query planner. +*/ +struct WhereScan { + WhereClause *pOrigWC; /* Original, innermost WhereClause */ + WhereClause *pWC; /* WhereClause currently being scanned */ + const char *zCollName; /* Required collating sequence, if not NULL */ + Expr *pIdxExpr; /* Search for this index expression */ + int k; /* Resume scanning at this->pWC->a[this->k] */ + u32 opMask; /* Acceptable operators */ + char idxaff; /* Must match this affinity, if zCollName!=NULL */ + unsigned char iEquiv; /* Current slot in aiCur[] and aiColumn[] */ + unsigned char nEquiv; /* Number of entries in aiCur[] and aiColumn[] */ + int aiCur[11]; /* Cursors in the equivalence class */ + i16 aiColumn[11]; /* Corresponding column number in the eq-class */ +}; + +/* +** An instance of the following structure holds all information about a +** WHERE clause. Mostly this is a container for one or more WhereTerms. +** +** Explanation of pOuter: For a WHERE clause of the form +** +** a AND ((b AND c) OR (d AND e)) AND f +** +** There are separate WhereClause objects for the whole clause and for +** the subclauses "(b AND c)" and "(d AND e)". The pOuter field of the +** subclauses points to the WhereClause object for the whole clause. +*/ +struct WhereClause { + WhereInfo *pWInfo; /* WHERE clause processing context */ + WhereClause *pOuter; /* Outer conjunction */ + u8 op; /* Split operator. TK_AND or TK_OR */ + u8 hasOr; /* True if any a[].eOperator is WO_OR */ + int nTerm; /* Number of terms */ + int nSlot; /* Number of entries in a[] */ + int nBase; /* Number of terms through the last non-Virtual */ + WhereTerm *a; /* Each a[] describes a term of the WHERE cluase */ +#if defined(SQLITE_SMALL_STACK) + WhereTerm aStatic[1]; /* Initial static space for a[] */ +#else + WhereTerm aStatic[8]; /* Initial static space for a[] */ +#endif +}; + +/* +** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to +** a dynamically allocated instance of the following structure. +*/ +struct WhereOrInfo { + WhereClause wc; /* Decomposition into subterms */ + Bitmask indexable; /* Bitmask of all indexable tables in the clause */ +}; + +/* +** A WhereTerm with eOperator==WO_AND has its u.pAndInfo pointer set to +** a dynamically allocated instance of the following structure. +*/ +struct WhereAndInfo { + WhereClause wc; /* The subexpression broken out */ +}; + +/* +** An instance of the following structure keeps track of a mapping +** between VDBE cursor numbers and bits of the bitmasks in WhereTerm. +** +** The VDBE cursor numbers are small integers contained in +** SrcList_item.iCursor and Expr.iTable fields. For any given WHERE +** clause, the cursor numbers might not begin with 0 and they might +** contain gaps in the numbering sequence. But we want to make maximum +** use of the bits in our bitmasks. This structure provides a mapping +** from the sparse cursor numbers into consecutive integers beginning +** with 0. +** +** If WhereMaskSet.ix[A]==B it means that The A-th bit of a Bitmask +** corresponds VDBE cursor number B. The A-th bit of a bitmask is 1<3, 5->1, 8->2, 29->0, +** 57->5, 73->4. Or one of 719 other combinations might be used. It +** does not really matter. What is important is that sparse cursor +** numbers all get mapped into bit numbers that begin with 0 and contain +** no gaps. +*/ +struct WhereMaskSet { + int bVarSelect; /* Used by sqlite3WhereExprUsage() */ + int n; /* Number of assigned cursor values */ + int ix[BMS]; /* Cursor assigned to each bit */ +}; + +/* +** This object is a convenience wrapper holding all information needed +** to construct WhereLoop objects for a particular query. +*/ +struct WhereLoopBuilder { + WhereInfo *pWInfo; /* Information about this WHERE */ + WhereClause *pWC; /* WHERE clause terms */ + WhereLoop *pNew; /* Template WhereLoop */ + WhereOrSet *pOrSet; /* Record best loops here, if not NULL */ +#ifdef SQLITE_ENABLE_STAT4 + UnpackedRecord *pRec; /* Probe for stat4 (if required) */ + int nRecValid; /* Number of valid fields currently in pRec */ +#endif + unsigned char bldFlags1; /* First set of SQLITE_BLDF_* flags */ + unsigned char bldFlags2; /* Second set of SQLITE_BLDF_* flags */ + unsigned int iPlanLimit; /* Search limiter */ +}; + +/* Allowed values for WhereLoopBuider.bldFlags */ +#define SQLITE_BLDF1_INDEXED 0x0001 /* An index is used */ +#define SQLITE_BLDF1_UNIQUE 0x0002 /* All keys of a UNIQUE index used */ + +#define SQLITE_BLDF2_2NDPASS 0x0004 /* Second builder pass needed */ + +/* The WhereLoopBuilder.iPlanLimit is used to limit the number of +** index+constraint combinations the query planner will consider for a +** particular query. If this parameter is unlimited, then certain +** pathological queries can spend excess time in the sqlite3WhereBegin() +** routine. The limit is high enough that is should not impact real-world +** queries. +** +** SQLITE_QUERY_PLANNER_LIMIT is the baseline limit. The limit is +** increased by SQLITE_QUERY_PLANNER_LIMIT_INCR before each term of the FROM +** clause is processed, so that every table in a join is guaranteed to be +** able to propose a some index+constraint combinations even if the initial +** baseline limit was exhausted by prior tables of the join. +*/ +#ifndef SQLITE_QUERY_PLANNER_LIMIT +# define SQLITE_QUERY_PLANNER_LIMIT 20000 +#endif +#ifndef SQLITE_QUERY_PLANNER_LIMIT_INCR +# define SQLITE_QUERY_PLANNER_LIMIT_INCR 1000 +#endif + +/* +** Each instance of this object records a change to a single node +** in an expression tree to cause that node to point to a column +** of an index rather than an expression or a virtual column. All +** such transformations need to be undone at the end of WHERE clause +** processing. +*/ +typedef struct WhereExprMod WhereExprMod; +struct WhereExprMod { + WhereExprMod *pNext; /* Next translation on a list of them all */ + Expr *pExpr; /* The Expr node that was transformed */ + Expr orig; /* Original value of the Expr node */ +}; + +/* +** The WHERE clause processing routine has two halves. The +** first part does the start of the WHERE loop and the second +** half does the tail of the WHERE loop. An instance of +** this structure is returned by the first half and passed +** into the second half to give some continuity. +** +** An instance of this object holds the complete state of the query +** planner. +*/ +struct WhereInfo { + Parse *pParse; /* Parsing and code generating context */ + SrcList *pTabList; /* List of tables in the join */ + ExprList *pOrderBy; /* The ORDER BY clause or NULL */ + ExprList *pResultSet; /* Result set of the query */ + Expr *pWhere; /* The complete WHERE clause */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + Select *pLimit; /* Used to access LIMIT expr/registers for vtabs */ +#endif + int aiCurOnePass[2]; /* OP_OpenWrite cursors for the ONEPASS opt */ + int iContinue; /* Jump here to continue with next record */ + int iBreak; /* Jump here to break out of the loop */ + int savedNQueryLoop; /* pParse->nQueryLoop outside the WHERE loop */ + u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ + LogEst iLimit; /* LIMIT if wctrlFlags has WHERE_USE_LIMIT */ + u8 nLevel; /* Number of nested loop */ + i8 nOBSat; /* Number of ORDER BY terms satisfied by indices */ + u8 eOnePass; /* ONEPASS_OFF, or _SINGLE, or _MULTI */ + u8 eDistinct; /* One of the WHERE_DISTINCT_* values */ + unsigned bDeferredSeek :1; /* Uses OP_DeferredSeek */ + unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */ + unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */ + unsigned sorted :1; /* True if really sorted (not just grouped) */ + LogEst nRowOut; /* Estimated number of output rows */ + int iTop; /* The very beginning of the WHERE loop */ + int iEndWhere; /* End of the WHERE clause itself */ + WhereLoop *pLoops; /* List of all WhereLoop objects */ + WhereExprMod *pExprMods; /* Expression modifications */ + Bitmask revMask; /* Mask of ORDER BY terms that need reversing */ + WhereClause sWC; /* Decomposition of the WHERE clause */ + WhereMaskSet sMaskSet; /* Map cursor numbers to bitmasks */ + WhereLevel a[1]; /* Information about each nest loop in WHERE */ +}; + +/* +** Private interfaces - callable only by other where.c routines. +** +** where.c: +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereGetMask(WhereMaskSet*,int); +#ifdef WHERETRACE_ENABLED +SQLITE_PRIVATE void sqlite3WhereClausePrint(WhereClause *pWC); +SQLITE_PRIVATE void sqlite3WhereTermPrint(WhereTerm *pTerm, int iTerm); +SQLITE_PRIVATE void sqlite3WhereLoopPrint(WhereLoop *p, WhereClause *pWC); +#endif +SQLITE_PRIVATE WhereTerm *sqlite3WhereFindTerm( + WhereClause *pWC, /* The WHERE clause to be searched */ + int iCur, /* Cursor number of LHS */ + int iColumn, /* Column number of LHS */ + Bitmask notReady, /* RHS must not overlap with this mask */ + u32 op, /* Mask of WO_xx values describing operator */ + Index *pIdx /* Must be compatible with this index, if not NULL */ +); + +/* wherecode.c: */ +#ifndef SQLITE_OMIT_EXPLAIN +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +); +SQLITE_PRIVATE int sqlite3WhereExplainBloomFilter( + const Parse *pParse, /* Parse context */ + const WhereInfo *pWInfo, /* WHERE clause */ + const WhereLevel *pLevel /* Bloom filter on this level */ +); +#else +# define sqlite3WhereExplainOneScan(u,v,w,x) 0 +# define sqlite3WhereExplainBloomFilter(u,v,w) 0 +#endif /* SQLITE_OMIT_EXPLAIN */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +SQLITE_PRIVATE void sqlite3WhereAddScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +); +#else +# define sqlite3WhereAddScanStatus(a, b, c, d) ((void)d) +#endif +SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart( + Parse *pParse, /* Parsing context */ + Vdbe *v, /* Prepared statement under construction */ + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + WhereLevel *pLevel, /* The current level pointer */ + Bitmask notReady /* Which tables are currently available */ +); + +/* whereexpr.c: */ +SQLITE_PRIVATE void sqlite3WhereClauseInit(WhereClause*,WhereInfo*); +SQLITE_PRIVATE void sqlite3WhereClauseClear(WhereClause*); +SQLITE_PRIVATE void sqlite3WhereSplit(WhereClause*,Expr*,u8); +SQLITE_PRIVATE void sqlite3WhereAddLimit(WhereClause*, Select*); +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsage(WhereMaskSet*, Expr*); +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsageNN(WhereMaskSet*, Expr*); +SQLITE_PRIVATE Bitmask sqlite3WhereExprListUsage(WhereMaskSet*, ExprList*); +SQLITE_PRIVATE void sqlite3WhereExprAnalyze(SrcList*, WhereClause*); +SQLITE_PRIVATE void sqlite3WhereTabFuncArgs(Parse*, SrcItem*, WhereClause*); + + + + + +/* +** Bitmasks for the operators on WhereTerm objects. These are all +** operators that are of interest to the query planner. An +** OR-ed combination of these values can be used when searching for +** particular WhereTerms within a WhereClause. +** +** Value constraints: +** WO_EQ == SQLITE_INDEX_CONSTRAINT_EQ +** WO_LT == SQLITE_INDEX_CONSTRAINT_LT +** WO_LE == SQLITE_INDEX_CONSTRAINT_LE +** WO_GT == SQLITE_INDEX_CONSTRAINT_GT +** WO_GE == SQLITE_INDEX_CONSTRAINT_GE +*/ +#define WO_IN 0x0001 +#define WO_EQ 0x0002 +#define WO_LT (WO_EQ<<(TK_LT-TK_EQ)) +#define WO_LE (WO_EQ<<(TK_LE-TK_EQ)) +#define WO_GT (WO_EQ<<(TK_GT-TK_EQ)) +#define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) +#define WO_AUX 0x0040 /* Op useful to virtual tables only */ +#define WO_IS 0x0080 +#define WO_ISNULL 0x0100 +#define WO_OR 0x0200 /* Two or more OR-connected terms */ +#define WO_AND 0x0400 /* Two or more AND-connected terms */ +#define WO_EQUIV 0x0800 /* Of the form A==B, both columns */ +#define WO_NOOP 0x1000 /* This term does not restrict search space */ + +#define WO_ALL 0x1fff /* Mask of all possible WO_* values */ +#define WO_SINGLE 0x01ff /* Mask of all non-compound WO_* values */ + +/* +** These are definitions of bits in the WhereLoop.wsFlags field. +** The particular combination of bits in each WhereLoop help to +** determine the algorithm that WhereLoop represents. +*/ +#define WHERE_COLUMN_EQ 0x00000001 /* x=EXPR */ +#define WHERE_COLUMN_RANGE 0x00000002 /* xEXPR */ +#define WHERE_COLUMN_IN 0x00000004 /* x IN (...) */ +#define WHERE_COLUMN_NULL 0x00000008 /* x IS NULL */ +#define WHERE_CONSTRAINT 0x0000000f /* Any of the WHERE_COLUMN_xxx values */ +#define WHERE_TOP_LIMIT 0x00000010 /* xEXPR or x>=EXPR constraint */ +#define WHERE_BOTH_LIMIT 0x00000030 /* Both x>EXPR and xaiColumn[i]; + if( i==XN_EXPR ) return ""; + if( i==XN_ROWID ) return "rowid"; + return pIdx->pTable->aCol[i].zCnName; +} + +/* +** This routine is a helper for explainIndexRange() below +** +** pStr holds the text of an expression that we are building up one term +** at a time. This routine adds a new term to the end of the expression. +** Terms are separated by AND so add the "AND" text for second and subsequent +** terms only. +*/ +static void explainAppendTerm( + StrAccum *pStr, /* The text expression being built */ + Index *pIdx, /* Index to read column names from */ + int nTerm, /* Number of terms */ + int iTerm, /* Zero-based index of first term. */ + int bAnd, /* Non-zero to append " AND " */ + const char *zOp /* Name of the operator */ +){ + int i; + + assert( nTerm>=1 ); + if( bAnd ) sqlite3_str_append(pStr, " AND ", 5); + + if( nTerm>1 ) sqlite3_str_append(pStr, "(", 1); + for(i=0; i1 ) sqlite3_str_append(pStr, ")", 1); + + sqlite3_str_append(pStr, zOp, 1); + + if( nTerm>1 ) sqlite3_str_append(pStr, "(", 1); + for(i=0; i1 ) sqlite3_str_append(pStr, ")", 1); +} + +/* +** Argument pLevel describes a strategy for scanning table pTab. This +** function appends text to pStr that describes the subset of table +** rows scanned by the strategy in the form of an SQL expression. +** +** For example, if the query: +** +** SELECT * FROM t1 WHERE a=1 AND b>2; +** +** is run and there is an index on (a, b), then this function returns a +** string similar to: +** +** "a=? AND b>?" +*/ +static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop){ + Index *pIndex = pLoop->u.btree.pIndex; + u16 nEq = pLoop->u.btree.nEq; + u16 nSkip = pLoop->nSkip; + int i, j; + + if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return; + sqlite3_str_append(pStr, " (", 2); + for(i=0; i=nSkip ? "%s=?" : "ANY(%s)", z); + } + + j = i; + if( pLoop->wsFlags&WHERE_BTM_LIMIT ){ + explainAppendTerm(pStr, pIndex, pLoop->u.btree.nBtm, j, i, ">"); + i = 1; + } + if( pLoop->wsFlags&WHERE_TOP_LIMIT ){ + explainAppendTerm(pStr, pIndex, pLoop->u.btree.nTop, j, i, "<"); + } + sqlite3_str_append(pStr, ")", 1); +} + +/* +** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN +** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was +** defined at compile-time. If it is not a no-op, a single OP_Explain opcode +** is added to the output to describe the table scan strategy in pLevel. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. +*/ +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +){ + int ret = 0; +#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS) + if( sqlite3ParseToplevel(pParse)->explain==2 ) +#endif + { + SrcItem *pItem = &pTabList->a[pLevel->iFrom]; + Vdbe *v = pParse->pVdbe; /* VM being constructed */ + sqlite3 *db = pParse->db; /* Database handle */ + int isSearch; /* True for a SEARCH. False for SCAN. */ + WhereLoop *pLoop; /* The controlling WhereLoop object */ + u32 flags; /* Flags that describe this loop */ + char *zMsg; /* Text to add to EQP output */ + StrAccum str; /* EQP output string */ + char zBuf[100]; /* Initial space for EQP output string */ + + pLoop = pLevel->pWLoop; + flags = pLoop->wsFlags; + if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_OR_SUBCLAUSE) ) return 0; + + isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 + || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) + || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX)); + + sqlite3StrAccumInit(&str, db, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH); + str.printfFlags = SQLITE_PRINTF_INTERNAL; + sqlite3_str_appendf(&str, "%s %S", isSearch ? "SEARCH" : "SCAN", pItem); + if( (flags & (WHERE_IPK|WHERE_VIRTUALTABLE))==0 ){ + const char *zFmt = 0; + Index *pIdx; + + assert( pLoop->u.btree.pIndex!=0 ); + pIdx = pLoop->u.btree.pIndex; + assert( !(flags&WHERE_AUTO_INDEX) || (flags&WHERE_IDX_ONLY) ); + if( !HasRowid(pItem->pTab) && IsPrimaryKeyIndex(pIdx) ){ + if( isSearch ){ + zFmt = "PRIMARY KEY"; + } + }else if( flags & WHERE_PARTIALIDX ){ + zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; + }else if( flags & WHERE_AUTO_INDEX ){ + zFmt = "AUTOMATIC COVERING INDEX"; + }else if( flags & WHERE_IDX_ONLY ){ + zFmt = "COVERING INDEX %s"; + }else{ + zFmt = "INDEX %s"; + } + if( zFmt ){ + sqlite3_str_append(&str, " USING ", 7); + sqlite3_str_appendf(&str, zFmt, pIdx->zName); + explainIndexRange(&str, pLoop); + } + }else if( (flags & WHERE_IPK)!=0 && (flags & WHERE_CONSTRAINT)!=0 ){ + char cRangeOp; +#if 0 /* Better output, but breaks many tests */ + const Table *pTab = pItem->pTab; + const char *zRowid = pTab->iPKey>=0 ? pTab->aCol[pTab->iPKey].zCnName: + "rowid"; +#else + const char *zRowid = "rowid"; +#endif + sqlite3_str_appendf(&str, " USING INTEGER PRIMARY KEY (%s", zRowid); + if( flags&(WHERE_COLUMN_EQ|WHERE_COLUMN_IN) ){ + cRangeOp = '='; + }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){ + sqlite3_str_appendf(&str, ">? AND %s", zRowid); + cRangeOp = '<'; + }else if( flags&WHERE_BTM_LIMIT ){ + cRangeOp = '>'; + }else{ + assert( flags&WHERE_TOP_LIMIT); + cRangeOp = '<'; + } + sqlite3_str_appendf(&str, "%c?)", cRangeOp); + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + else if( (flags & WHERE_VIRTUALTABLE)!=0 ){ + sqlite3_str_appendf(&str, " VIRTUAL TABLE INDEX %d:%s", + pLoop->u.vtab.idxNum, pLoop->u.vtab.idxStr); + } +#endif +#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS + if( pLoop->nOut>=10 ){ + sqlite3_str_appendf(&str, " (~%llu rows)", + sqlite3LogEstToInt(pLoop->nOut)); + }else{ + sqlite3_str_append(&str, " (~1 row)", 9); + } +#endif + zMsg = sqlite3StrAccumFinish(&str); + sqlite3ExplainBreakpoint("",zMsg); + ret = sqlite3VdbeAddOp4(v, OP_Explain, sqlite3VdbeCurrentAddr(v), + pParse->addrExplain, 0, zMsg,P4_DYNAMIC); + } + return ret; +} + +/* +** Add a single OP_Explain opcode that describes a Bloom filter. +** +** Or if not processing EXPLAIN QUERY PLAN and not in a SQLITE_DEBUG and/or +** SQLITE_ENABLE_STMT_SCANSTATUS build, then OP_Explain opcodes are not +** required and this routine is a no-op. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. +*/ +SQLITE_PRIVATE int sqlite3WhereExplainBloomFilter( + const Parse *pParse, /* Parse context */ + const WhereInfo *pWInfo, /* WHERE clause */ + const WhereLevel *pLevel /* Bloom filter on this level */ +){ + int ret = 0; + SrcItem *pItem = &pWInfo->pTabList->a[pLevel->iFrom]; + Vdbe *v = pParse->pVdbe; /* VM being constructed */ + sqlite3 *db = pParse->db; /* Database handle */ + char *zMsg; /* Text to add to EQP output */ + int i; /* Loop counter */ + WhereLoop *pLoop; /* The where loop */ + StrAccum str; /* EQP output string */ + char zBuf[100]; /* Initial space for EQP output string */ + + sqlite3StrAccumInit(&str, db, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH); + str.printfFlags = SQLITE_PRINTF_INTERNAL; + sqlite3_str_appendf(&str, "BLOOM FILTER ON %S (", pItem); + pLoop = pLevel->pWLoop; + if( pLoop->wsFlags & WHERE_IPK ){ + const Table *pTab = pItem->pTab; + if( pTab->iPKey>=0 ){ + sqlite3_str_appendf(&str, "%s=?", pTab->aCol[pTab->iPKey].zCnName); + }else{ + sqlite3_str_appendf(&str, "rowid=?"); + } + }else{ + for(i=pLoop->nSkip; iu.btree.nEq; i++){ + const char *z = explainIndexColumnName(pLoop->u.btree.pIndex, i); + if( i>pLoop->nSkip ) sqlite3_str_append(&str, " AND ", 5); + sqlite3_str_appendf(&str, "%s=?", z); + } + } + sqlite3_str_append(&str, ")", 1); + zMsg = sqlite3StrAccumFinish(&str); + ret = sqlite3VdbeAddOp4(v, OP_Explain, sqlite3VdbeCurrentAddr(v), + pParse->addrExplain, 0, zMsg,P4_DYNAMIC); + return ret; +} +#endif /* SQLITE_OMIT_EXPLAIN */ + +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +/* +** Configure the VM passed as the first argument with an +** sqlite3_stmt_scanstatus() entry corresponding to the scan used to +** implement level pLvl. Argument pSrclist is a pointer to the FROM +** clause that the scan reads data from. +** +** If argument addrExplain is not 0, it must be the address of an +** OP_Explain instruction that describes the same loop. +*/ +SQLITE_PRIVATE void sqlite3WhereAddScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +){ + const char *zObj = 0; + WhereLoop *pLoop = pLvl->pWLoop; + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 && pLoop->u.btree.pIndex!=0 ){ + zObj = pLoop->u.btree.pIndex->zName; + }else{ + zObj = pSrclist->a[pLvl->iFrom].zName; + } + sqlite3VdbeScanStatus( + v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj + ); +} +#endif + + +/* +** Disable a term in the WHERE clause. Except, do not disable the term +** if it controls a LEFT OUTER JOIN and it did not originate in the ON +** or USING clause of that join. +** +** Consider the term t2.z='ok' in the following queries: +** +** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok' +** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok' +** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok' +** +** The t2.z='ok' is disabled in the in (2) because it originates +** in the ON clause. The term is disabled in (3) because it is not part +** of a LEFT OUTER JOIN. In (1), the term is not disabled. +** +** Disabling a term causes that term to not be tested in the inner loop +** of the join. Disabling is an optimization. When terms are satisfied +** by indices, we disable them to prevent redundant tests in the inner +** loop. We would get the correct results if nothing were ever disabled, +** but joins might run a little slower. The trick is to disable as much +** as we can without disabling too much. If we disabled in (1), we'd get +** the wrong answer. See ticket #813. +** +** If all the children of a term are disabled, then that term is also +** automatically disabled. In this way, terms get disabled if derived +** virtual terms are tested first. For example: +** +** x GLOB 'abc*' AND x>='abc' AND x<'acd' +** \___________/ \______/ \_____/ +** parent child1 child2 +** +** Only the parent term was in the original WHERE clause. The child1 +** and child2 terms were added by the LIKE optimization. If both of +** the virtual child terms are valid, then testing of the parent can be +** skipped. +** +** Usually the parent term is marked as TERM_CODED. But if the parent +** term was originally TERM_LIKE, then the parent gets TERM_LIKECOND instead. +** The TERM_LIKECOND marking indicates that the term should be coded inside +** a conditional such that is only evaluated on the second pass of a +** LIKE-optimization loop, when scanning BLOBs instead of strings. +*/ +static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ + int nLoop = 0; + assert( pTerm!=0 ); + while( (pTerm->wtFlags & TERM_CODED)==0 + && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + && (pLevel->notReady & pTerm->prereqAll)==0 + ){ + if( nLoop && (pTerm->wtFlags & TERM_LIKE)!=0 ){ + pTerm->wtFlags |= TERM_LIKECOND; + }else{ + pTerm->wtFlags |= TERM_CODED; + } +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace & 0x20000 ){ + sqlite3DebugPrintf("DISABLE-"); + sqlite3WhereTermPrint(pTerm, (int)(pTerm - (pTerm->pWC->a))); + } +#endif + if( pTerm->iParent<0 ) break; + pTerm = &pTerm->pWC->a[pTerm->iParent]; + assert( pTerm!=0 ); + pTerm->nChild--; + if( pTerm->nChild!=0 ) break; + nLoop++; + } +} + +/* +** Code an OP_Affinity opcode to apply the column affinity string zAff +** to the n registers starting at base. +** +** As an optimization, SQLITE_AFF_BLOB and SQLITE_AFF_NONE entries (which +** are no-ops) at the beginning and end of zAff are ignored. If all entries +** in zAff are SQLITE_AFF_BLOB or SQLITE_AFF_NONE, then no code gets generated. +** +** This routine makes its own copy of zAff so that the caller is free +** to modify zAff after this routine returns. +*/ +static void codeApplyAffinity(Parse *pParse, int base, int n, char *zAff){ + Vdbe *v = pParse->pVdbe; + if( zAff==0 ){ + assert( pParse->db->mallocFailed ); + return; + } + assert( v!=0 ); + + /* Adjust base and n to skip over SQLITE_AFF_BLOB and SQLITE_AFF_NONE + ** entries at the beginning and end of the affinity string. + */ + assert( SQLITE_AFF_NONE0 && zAff[0]<=SQLITE_AFF_BLOB ){ + n--; + base++; + zAff++; + } + while( n>1 && zAff[n-1]<=SQLITE_AFF_BLOB ){ + n--; + } + + /* Code the OP_Affinity opcode if there is anything left to do. */ + if( n>0 ){ + sqlite3VdbeAddOp4(v, OP_Affinity, base, n, 0, zAff, n); + } +} + +/* +** Expression pRight, which is the RHS of a comparison operation, is +** either a vector of n elements or, if n==1, a scalar expression. +** Before the comparison operation, affinity zAff is to be applied +** to the pRight values. This function modifies characters within the +** affinity string to SQLITE_AFF_BLOB if either: +** +** * the comparison will be performed with no affinity, or +** * the affinity change in zAff is guaranteed not to change the value. +*/ +static void updateRangeAffinityStr( + Expr *pRight, /* RHS of comparison */ + int n, /* Number of vector elements in comparison */ + char *zAff /* Affinity string to modify */ +){ + int i; + for(i=0; idb; + Expr *pNew; + pNew = sqlite3ExprDup(db, pX, 0); + if( db->mallocFailed==0 ){ + ExprList *pOrigRhs; /* Original unmodified RHS */ + ExprList *pOrigLhs; /* Original unmodified LHS */ + ExprList *pRhs = 0; /* New RHS after modifications */ + ExprList *pLhs = 0; /* New LHS after mods */ + int i; /* Loop counter */ + Select *pSelect; /* Pointer to the SELECT on the RHS */ + + assert( ExprUseXSelect(pNew) ); + pOrigRhs = pNew->x.pSelect->pEList; + assert( pNew->pLeft!=0 ); + assert( ExprUseXList(pNew->pLeft) ); + pOrigLhs = pNew->pLeft->x.pList; + for(i=iEq; inLTerm; i++){ + if( pLoop->aLTerm[i]->pExpr==pX ){ + int iField; + assert( (pLoop->aLTerm[i]->eOperator & (WO_OR|WO_AND))==0 ); + iField = pLoop->aLTerm[i]->u.x.iField - 1; + if( pOrigRhs->a[iField].pExpr==0 ) continue; /* Duplicate PK column */ + pRhs = sqlite3ExprListAppend(pParse, pRhs, pOrigRhs->a[iField].pExpr); + pOrigRhs->a[iField].pExpr = 0; + assert( pOrigLhs->a[iField].pExpr!=0 ); + pLhs = sqlite3ExprListAppend(pParse, pLhs, pOrigLhs->a[iField].pExpr); + pOrigLhs->a[iField].pExpr = 0; + } + } + sqlite3ExprListDelete(db, pOrigRhs); + sqlite3ExprListDelete(db, pOrigLhs); + pNew->pLeft->x.pList = pLhs; + pNew->x.pSelect->pEList = pRhs; + if( pLhs && pLhs->nExpr==1 ){ + /* Take care here not to generate a TK_VECTOR containing only a + ** single value. Since the parser never creates such a vector, some + ** of the subroutines do not handle this case. */ + Expr *p = pLhs->a[0].pExpr; + pLhs->a[0].pExpr = 0; + sqlite3ExprDelete(db, pNew->pLeft); + pNew->pLeft = p; + } + pSelect = pNew->x.pSelect; + if( pSelect->pOrderBy ){ + /* If the SELECT statement has an ORDER BY clause, zero the + ** iOrderByCol variables. These are set to non-zero when an + ** ORDER BY term exactly matches one of the terms of the + ** result-set. Since the result-set of the SELECT statement may + ** have been modified or reordered, these variables are no longer + ** set correctly. Since setting them is just an optimization, + ** it's easiest just to zero them here. */ + ExprList *pOrderBy = pSelect->pOrderBy; + for(i=0; inExpr; i++){ + pOrderBy->a[i].u.x.iOrderByCol = 0; + } + } + +#if 0 + printf("For indexing, change the IN expr:\n"); + sqlite3TreeViewExpr(0, pX, 0); + printf("Into:\n"); + sqlite3TreeViewExpr(0, pNew, 0); +#endif + } + return pNew; +} + + +/* +** Generate code for a single equality term of the WHERE clause. An equality +** term can be either X=expr or X IN (...). pTerm is the term to be +** coded. +** +** The current value for the constraint is left in a register, the index +** of which is returned. An attempt is made store the result in iTarget but +** this is only guaranteed for TK_ISNULL and TK_IN constraints. If the +** constraint is a TK_EQ or TK_IS, then the current value might be left in +** some other register and it is the caller's responsibility to compensate. +** +** For a constraint of the form X=expr, the expression is evaluated in +** straight-line code. For constraints of the form X IN (...) +** this routine sets up a loop that will iterate over all values of X. +*/ +static int codeEqualityTerm( + Parse *pParse, /* The parsing context */ + WhereTerm *pTerm, /* The term of the WHERE clause to be coded */ + WhereLevel *pLevel, /* The level of the FROM clause we are working on */ + int iEq, /* Index of the equality term within this level */ + int bRev, /* True for reverse-order IN operations */ + int iTarget /* Attempt to leave results in this register */ +){ + Expr *pX = pTerm->pExpr; + Vdbe *v = pParse->pVdbe; + int iReg; /* Register holding results */ + + assert( pLevel->pWLoop->aLTerm[iEq]==pTerm ); + assert( iTarget>0 ); + if( pX->op==TK_EQ || pX->op==TK_IS ){ + iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget); + }else if( pX->op==TK_ISNULL ){ + iReg = iTarget; + sqlite3VdbeAddOp2(v, OP_Null, 0, iReg); +#ifndef SQLITE_OMIT_SUBQUERY + }else{ + int eType = IN_INDEX_NOOP; + int iTab; + struct InLoop *pIn; + WhereLoop *pLoop = pLevel->pWLoop; + int i; + int nEq = 0; + int *aiMap = 0; + + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 + && pLoop->u.btree.pIndex!=0 + && pLoop->u.btree.pIndex->aSortOrder[iEq] + ){ + testcase( iEq==0 ); + testcase( bRev ); + bRev = !bRev; + } + assert( pX->op==TK_IN ); + iReg = iTarget; + + for(i=0; iaLTerm[i] && pLoop->aLTerm[i]->pExpr==pX ){ + disableTerm(pLevel, pTerm); + return iTarget; + } + } + for(i=iEq;inLTerm; i++){ + assert( pLoop->aLTerm[i]!=0 ); + if( pLoop->aLTerm[i]->pExpr==pX ) nEq++; + } + + iTab = 0; + if( !ExprUseXSelect(pX) || pX->x.pSelect->pEList->nExpr==1 ){ + eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0, 0, &iTab); + }else{ + sqlite3 *db = pParse->db; + pX = removeUnindexableInClauseTerms(pParse, iEq, pLoop, pX); + + if( !db->mallocFailed ){ + aiMap = (int*)sqlite3DbMallocZero(pParse->db, sizeof(int)*nEq); + eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0, aiMap, &iTab); + pTerm->pExpr->iTable = iTab; + } + sqlite3ExprDelete(db, pX); + pX = pTerm->pExpr; + } + + if( eType==IN_INDEX_INDEX_DESC ){ + testcase( bRev ); + bRev = !bRev; + } + sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iTab, 0); + VdbeCoverageIf(v, bRev); + VdbeCoverageIf(v, !bRev); + + assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 ); + pLoop->wsFlags |= WHERE_IN_ABLE; + if( pLevel->u.in.nIn==0 ){ + pLevel->addrNxt = sqlite3VdbeMakeLabel(pParse); + } + if( iEq>0 && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 ){ + pLoop->wsFlags |= WHERE_IN_EARLYOUT; + } + + i = pLevel->u.in.nIn; + pLevel->u.in.nIn += nEq; + pLevel->u.in.aInLoop = + sqlite3DbReallocOrFree(pParse->db, pLevel->u.in.aInLoop, + sizeof(pLevel->u.in.aInLoop[0])*pLevel->u.in.nIn); + pIn = pLevel->u.in.aInLoop; + if( pIn ){ + int iMap = 0; /* Index in aiMap[] */ + pIn += i; + for(i=iEq;inLTerm; i++){ + if( pLoop->aLTerm[i]->pExpr==pX ){ + int iOut = iReg + i - iEq; + if( eType==IN_INDEX_ROWID ){ + pIn->addrInTop = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iOut); + }else{ + int iCol = aiMap ? aiMap[iMap++] : 0; + pIn->addrInTop = sqlite3VdbeAddOp3(v,OP_Column,iTab, iCol, iOut); + } + sqlite3VdbeAddOp1(v, OP_IsNull, iOut); VdbeCoverage(v); + if( i==iEq ){ + pIn->iCur = iTab; + pIn->eEndLoopOp = bRev ? OP_Prev : OP_Next; + if( iEq>0 ){ + pIn->iBase = iReg - i; + pIn->nPrefix = i; + }else{ + pIn->nPrefix = 0; + } + }else{ + pIn->eEndLoopOp = OP_Noop; + } + pIn++; + } + } + testcase( iEq>0 + && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 + && (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ); + if( iEq>0 + && (pLoop->wsFlags & (WHERE_IN_SEEKSCAN|WHERE_VIRTUALTABLE))==0 + ){ + sqlite3VdbeAddOp3(v, OP_SeekHit, pLevel->iIdxCur, 0, iEq); + } + }else{ + pLevel->u.in.nIn = 0; + } + sqlite3DbFree(pParse->db, aiMap); +#endif + } + + /* As an optimization, try to disable the WHERE clause term that is + ** driving the index as it will always be true. The correct answer is + ** obtained regardless, but we might get the answer with fewer CPU cycles + ** by omitting the term. + ** + ** But do not disable the term unless we are certain that the term is + ** not a transitive constraint. For an example of where that does not + ** work, see https://sqlite.org/forum/forumpost/eb8613976a (2021-05-04) + */ + if( (pLevel->pWLoop->wsFlags & WHERE_TRANSCONS)==0 + || (pTerm->eOperator & WO_EQUIV)==0 + ){ + disableTerm(pLevel, pTerm); + } + + return iReg; +} + +/* +** Generate code that will evaluate all == and IN constraints for an +** index scan. +** +** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c). +** Suppose the WHERE clause is this: a==5 AND b IN (1,2,3) AND c>5 AND c<10 +** The index has as many as three equality constraints, but in this +** example, the third "c" value is an inequality. So only two +** constraints are coded. This routine will generate code to evaluate +** a==5 and b IN (1,2,3). The current values for a and b will be stored +** in consecutive registers and the index of the first register is returned. +** +** In the example above nEq==2. But this subroutine works for any value +** of nEq including 0. If nEq==0, this routine is nearly a no-op. +** The only thing it does is allocate the pLevel->iMem memory cell and +** compute the affinity string. +** +** The nExtraReg parameter is 0 or 1. It is 0 if all WHERE clause constraints +** are == or IN and are covered by the nEq. nExtraReg is 1 if there is +** an inequality constraint (such as the "c>=5 AND c<10" in the example) that +** occurs after the nEq quality constraints. +** +** This routine allocates a range of nEq+nExtraReg memory cells and returns +** the index of the first memory cell in that range. The code that +** calls this routine will use that memory range to store keys for +** start and termination conditions of the loop. +** key value of the loop. If one or more IN operators appear, then +** this routine allocates an additional nEq memory cells for internal +** use. +** +** Before returning, *pzAff is set to point to a buffer containing a +** copy of the column affinity string of the index allocated using +** sqlite3DbMalloc(). Except, entries in the copy of the string associated +** with equality constraints that use BLOB or NONE affinity are set to +** SQLITE_AFF_BLOB. This is to deal with SQL such as the following: +** +** CREATE TABLE t1(a TEXT PRIMARY KEY, b); +** SELECT ... FROM t1 AS t2, t1 WHERE t1.a = t2.b; +** +** In the example above, the index on t1(a) has TEXT affinity. But since +** the right hand side of the equality constraint (t2.b) has BLOB/NONE affinity, +** no conversion should be attempted before using a t2.b value as part of +** a key to search the index. Hence the first byte in the returned affinity +** string in this example would be set to SQLITE_AFF_BLOB. +*/ +static int codeAllEqualityTerms( + Parse *pParse, /* Parsing context */ + WhereLevel *pLevel, /* Which nested loop of the FROM we are coding */ + int bRev, /* Reverse the order of IN operators */ + int nExtraReg, /* Number of extra registers to allocate */ + char **pzAff /* OUT: Set to point to affinity string */ +){ + u16 nEq; /* The number of == or IN constraints to code */ + u16 nSkip; /* Number of left-most columns to skip */ + Vdbe *v = pParse->pVdbe; /* The vm under construction */ + Index *pIdx; /* The index being used for this loop */ + WhereTerm *pTerm; /* A single constraint term */ + WhereLoop *pLoop; /* The WhereLoop object */ + int j; /* Loop counter */ + int regBase; /* Base register */ + int nReg; /* Number of registers to allocate */ + char *zAff; /* Affinity string to return */ + + /* This module is only called on query plans that use an index. */ + pLoop = pLevel->pWLoop; + assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ); + nEq = pLoop->u.btree.nEq; + nSkip = pLoop->nSkip; + pIdx = pLoop->u.btree.pIndex; + assert( pIdx!=0 ); + + /* Figure out how many memory cells we will need then allocate them. + */ + regBase = pParse->nMem + 1; + nReg = pLoop->u.btree.nEq + nExtraReg; + pParse->nMem += nReg; + + zAff = sqlite3DbStrDup(pParse->db,sqlite3IndexAffinityStr(pParse->db,pIdx)); + assert( zAff!=0 || pParse->db->mallocFailed ); + + if( nSkip ){ + int iIdxCur = pLevel->iIdxCur; + sqlite3VdbeAddOp3(v, OP_Null, 0, regBase, regBase+nSkip-1); + sqlite3VdbeAddOp1(v, (bRev?OP_Last:OP_Rewind), iIdxCur); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + VdbeComment((v, "begin skip-scan on %s", pIdx->zName)); + j = sqlite3VdbeAddOp0(v, OP_Goto); + assert( pLevel->addrSkip==0 ); + pLevel->addrSkip = sqlite3VdbeAddOp4Int(v, (bRev?OP_SeekLT:OP_SeekGT), + iIdxCur, 0, regBase, nSkip); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + sqlite3VdbeJumpHere(v, j); + for(j=0; jaiColumn[j]==XN_EXPR ); + VdbeComment((v, "%s", explainIndexColumnName(pIdx, j))); + } + } + + /* Evaluate the equality constraints + */ + assert( zAff==0 || (int)strlen(zAff)>=nEq ); + for(j=nSkip; jaLTerm[j]; + assert( pTerm!=0 ); + /* The following testcase is true for indices with redundant columns. + ** Ex: CREATE INDEX i1 ON t1(a,b,a); SELECT * FROM t1 WHERE a=0 AND b=0; */ + testcase( (pTerm->wtFlags & TERM_CODED)!=0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + r1 = codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, regBase+j); + if( r1!=regBase+j ){ + if( nReg==1 ){ + sqlite3ReleaseTempReg(pParse, regBase); + regBase = r1; + }else{ + sqlite3VdbeAddOp2(v, OP_Copy, r1, regBase+j); + } + } + } + for(j=nSkip; jaLTerm[j]; + if( pTerm->eOperator & WO_IN ){ + if( pTerm->pExpr->flags & EP_xIsSelect ){ + /* No affinity ever needs to be (or should be) applied to a value + ** from the RHS of an "? IN (SELECT ...)" expression. The + ** sqlite3FindInIndex() routine has already ensured that the + ** affinity of the comparison has been applied to the value. */ + if( zAff ) zAff[j] = SQLITE_AFF_BLOB; + } + }else if( (pTerm->eOperator & WO_ISNULL)==0 ){ + Expr *pRight = pTerm->pExpr->pRight; + if( (pTerm->wtFlags & TERM_IS)==0 && sqlite3ExprCanBeNull(pRight) ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+j, pLevel->addrBrk); + VdbeCoverage(v); + } + if( pParse->nErr==0 ){ + assert( pParse->db->mallocFailed==0 ); + if( sqlite3CompareAffinity(pRight, zAff[j])==SQLITE_AFF_BLOB ){ + zAff[j] = SQLITE_AFF_BLOB; + } + if( sqlite3ExprNeedsNoAffinityChange(pRight, zAff[j]) ){ + zAff[j] = SQLITE_AFF_BLOB; + } + } + } + } + *pzAff = zAff; + return regBase; +} + +#ifndef SQLITE_LIKE_DOESNT_MATCH_BLOBS +/* +** If the most recently coded instruction is a constant range constraint +** (a string literal) that originated from the LIKE optimization, then +** set P3 and P5 on the OP_String opcode so that the string will be cast +** to a BLOB at appropriate times. +** +** The LIKE optimization trys to evaluate "x LIKE 'abc%'" as a range +** expression: "x>='ABC' AND x<'abd'". But this requires that the range +** scan loop run twice, once for strings and a second time for BLOBs. +** The OP_String opcodes on the second pass convert the upper and lower +** bound string constants to blobs. This routine makes the necessary changes +** to the OP_String opcodes for that to happen. +** +** Except, of course, if SQLITE_LIKE_DOESNT_MATCH_BLOBS is defined, then +** only the one pass through the string space is required, so this routine +** becomes a no-op. +*/ +static void whereLikeOptimizationStringFixup( + Vdbe *v, /* prepared statement under construction */ + WhereLevel *pLevel, /* The loop that contains the LIKE operator */ + WhereTerm *pTerm /* The upper or lower bound just coded */ +){ + if( pTerm->wtFlags & TERM_LIKEOPT ){ + VdbeOp *pOp; + assert( pLevel->iLikeRepCntr>0 ); + pOp = sqlite3VdbeGetOp(v, -1); + assert( pOp!=0 ); + assert( pOp->opcode==OP_String8 + || pTerm->pWC->pWInfo->pParse->db->mallocFailed ); + pOp->p3 = (int)(pLevel->iLikeRepCntr>>1); /* Register holding counter */ + pOp->p5 = (u8)(pLevel->iLikeRepCntr&1); /* ASC or DESC */ + } +} +#else +# define whereLikeOptimizationStringFixup(A,B,C) +#endif + +#ifdef SQLITE_ENABLE_CURSOR_HINTS +/* +** Information is passed from codeCursorHint() down to individual nodes of +** the expression tree (by sqlite3WalkExpr()) using an instance of this +** structure. +*/ +struct CCurHint { + int iTabCur; /* Cursor for the main table */ + int iIdxCur; /* Cursor for the index, if pIdx!=0. Unused otherwise */ + Index *pIdx; /* The index used to access the table */ +}; + +/* +** This function is called for every node of an expression that is a candidate +** for a cursor hint on an index cursor. For TK_COLUMN nodes that reference +** the table CCurHint.iTabCur, verify that the same column can be +** accessed through the index. If it cannot, then set pWalker->eCode to 1. +*/ +static int codeCursorHintCheckExpr(Walker *pWalker, Expr *pExpr){ + struct CCurHint *pHint = pWalker->u.pCCurHint; + assert( pHint->pIdx!=0 ); + if( pExpr->op==TK_COLUMN + && pExpr->iTable==pHint->iTabCur + && sqlite3TableColumnToIndex(pHint->pIdx, pExpr->iColumn)<0 + ){ + pWalker->eCode = 1; + } + return WRC_Continue; +} + +/* +** Test whether or not expression pExpr, which was part of a WHERE clause, +** should be included in the cursor-hint for a table that is on the rhs +** of a LEFT JOIN. Set Walker.eCode to non-zero before returning if the +** expression is not suitable. +** +** An expression is unsuitable if it might evaluate to non NULL even if +** a TK_COLUMN node that does affect the value of the expression is set +** to NULL. For example: +** +** col IS NULL +** col IS NOT NULL +** coalesce(col, 1) +** CASE WHEN col THEN 0 ELSE 1 END +*/ +static int codeCursorHintIsOrFunction(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_IS + || pExpr->op==TK_ISNULL || pExpr->op==TK_ISNOT + || pExpr->op==TK_NOTNULL || pExpr->op==TK_CASE + ){ + pWalker->eCode = 1; + }else if( pExpr->op==TK_FUNCTION ){ + int d1; + char d2[4]; + if( 0==sqlite3IsLikeFunction(pWalker->pParse->db, pExpr, &d1, d2) ){ + pWalker->eCode = 1; + } + } + + return WRC_Continue; +} + + +/* +** This function is called on every node of an expression tree used as an +** argument to the OP_CursorHint instruction. If the node is a TK_COLUMN +** that accesses any table other than the one identified by +** CCurHint.iTabCur, then do the following: +** +** 1) allocate a register and code an OP_Column instruction to read +** the specified column into the new register, and +** +** 2) transform the expression node to a TK_REGISTER node that reads +** from the newly populated register. +** +** Also, if the node is a TK_COLUMN that does access the table idenified +** by pCCurHint.iTabCur, and an index is being used (which we will +** know because CCurHint.pIdx!=0) then transform the TK_COLUMN into +** an access of the index rather than the original table. +*/ +static int codeCursorHintFixExpr(Walker *pWalker, Expr *pExpr){ + int rc = WRC_Continue; + struct CCurHint *pHint = pWalker->u.pCCurHint; + if( pExpr->op==TK_COLUMN ){ + if( pExpr->iTable!=pHint->iTabCur ){ + int reg = ++pWalker->pParse->nMem; /* Register for column value */ + sqlite3ExprCode(pWalker->pParse, pExpr, reg); + pExpr->op = TK_REGISTER; + pExpr->iTable = reg; + }else if( pHint->pIdx!=0 ){ + pExpr->iTable = pHint->iIdxCur; + pExpr->iColumn = sqlite3TableColumnToIndex(pHint->pIdx, pExpr->iColumn); + assert( pExpr->iColumn>=0 ); + } + }else if( pExpr->op==TK_AGG_FUNCTION ){ + /* An aggregate function in the WHERE clause of a query means this must + ** be a correlated sub-query, and expression pExpr is an aggregate from + ** the parent context. Do not walk the function arguments in this case. + ** + ** todo: It should be possible to replace this node with a TK_REGISTER + ** expression, as the result of the expression must be stored in a + ** register at this point. The same holds for TK_AGG_COLUMN nodes. */ + rc = WRC_Prune; + } + return rc; +} + +/* +** Insert an OP_CursorHint instruction if it is appropriate to do so. +*/ +static void codeCursorHint( + SrcItem *pTabItem, /* FROM clause item */ + WhereInfo *pWInfo, /* The where clause */ + WhereLevel *pLevel, /* Which loop to provide hints for */ + WhereTerm *pEndRange /* Hint this end-of-scan boundary term if not NULL */ +){ + Parse *pParse = pWInfo->pParse; + sqlite3 *db = pParse->db; + Vdbe *v = pParse->pVdbe; + Expr *pExpr = 0; + WhereLoop *pLoop = pLevel->pWLoop; + int iCur; + WhereClause *pWC; + WhereTerm *pTerm; + int i, j; + struct CCurHint sHint; + Walker sWalker; + + if( OptimizationDisabled(db, SQLITE_CursorHints) ) return; + iCur = pLevel->iTabCur; + assert( iCur==pWInfo->pTabList->a[pLevel->iFrom].iCursor ); + sHint.iTabCur = iCur; + sHint.iIdxCur = pLevel->iIdxCur; + sHint.pIdx = pLoop->u.btree.pIndex; + memset(&sWalker, 0, sizeof(sWalker)); + sWalker.pParse = pParse; + sWalker.u.pCCurHint = &sHint; + pWC = &pWInfo->sWC; + for(i=0; inBase; i++){ + pTerm = &pWC->a[i]; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( pTerm->prereqAll & pLevel->notReady ) continue; + + /* Any terms specified as part of the ON(...) clause for any LEFT + ** JOIN for which the current table is not the rhs are omitted + ** from the cursor-hint. + ** + ** If this table is the rhs of a LEFT JOIN, "IS" or "IS NULL" terms + ** that were specified as part of the WHERE clause must be excluded. + ** This is to address the following: + ** + ** SELECT ... t1 LEFT JOIN t2 ON (t1.a=t2.b) WHERE t2.c IS NULL; + ** + ** Say there is a single row in t2 that matches (t1.a=t2.b), but its + ** t2.c values is not NULL. If the (t2.c IS NULL) constraint is + ** pushed down to the cursor, this row is filtered out, causing + ** SQLite to synthesize a row of NULL values. Which does match the + ** WHERE clause, and so the query returns a row. Which is incorrect. + ** + ** For the same reason, WHERE terms such as: + ** + ** WHERE 1 = (t2.c IS NULL) + ** + ** are also excluded. See codeCursorHintIsOrFunction() for details. + */ + if( pTabItem->fg.jointype & JT_LEFT ){ + Expr *pExpr = pTerm->pExpr; + if( !ExprHasProperty(pExpr, EP_FromJoin) + || pExpr->w.iRightJoinTable!=pTabItem->iCursor + ){ + sWalker.eCode = 0; + sWalker.xExprCallback = codeCursorHintIsOrFunction; + sqlite3WalkExpr(&sWalker, pTerm->pExpr); + if( sWalker.eCode ) continue; + } + }else{ + if( ExprHasProperty(pTerm->pExpr, EP_FromJoin) ) continue; + } + + /* All terms in pWLoop->aLTerm[] except pEndRange are used to initialize + ** the cursor. These terms are not needed as hints for a pure range + ** scan (that has no == terms) so omit them. */ + if( pLoop->u.btree.nEq==0 && pTerm!=pEndRange ){ + for(j=0; jnLTerm && pLoop->aLTerm[j]!=pTerm; j++){} + if( jnLTerm ) continue; + } + + /* No subqueries or non-deterministic functions allowed */ + if( sqlite3ExprContainsSubquery(pTerm->pExpr) ) continue; + + /* For an index scan, make sure referenced columns are actually in + ** the index. */ + if( sHint.pIdx!=0 ){ + sWalker.eCode = 0; + sWalker.xExprCallback = codeCursorHintCheckExpr; + sqlite3WalkExpr(&sWalker, pTerm->pExpr); + if( sWalker.eCode ) continue; + } + + /* If we survive all prior tests, that means this term is worth hinting */ + pExpr = sqlite3ExprAnd(pParse, pExpr, sqlite3ExprDup(db, pTerm->pExpr, 0)); + } + if( pExpr!=0 ){ + sWalker.xExprCallback = codeCursorHintFixExpr; + sqlite3WalkExpr(&sWalker, pExpr); + sqlite3VdbeAddOp4(v, OP_CursorHint, + (sHint.pIdx ? sHint.iIdxCur : sHint.iTabCur), 0, 0, + (const char*)pExpr, P4_EXPR); + } +} +#else +# define codeCursorHint(A,B,C,D) /* No-op */ +#endif /* SQLITE_ENABLE_CURSOR_HINTS */ + +/* +** Cursor iCur is open on an intkey b-tree (a table). Register iRowid contains +** a rowid value just read from cursor iIdxCur, open on index pIdx. This +** function generates code to do a deferred seek of cursor iCur to the +** rowid stored in register iRowid. +** +** Normally, this is just: +** +** OP_DeferredSeek $iCur $iRowid +** +** However, if the scan currently being coded is a branch of an OR-loop and +** the statement currently being coded is a SELECT, then P3 of OP_DeferredSeek +** is set to iIdxCur and P4 is set to point to an array of integers +** containing one entry for each column of the table cursor iCur is open +** on. For each table column, if the column is the i'th column of the +** index, then the corresponding array entry is set to (i+1). If the column +** does not appear in the index at all, the array entry is set to 0. +*/ +static void codeDeferredSeek( + WhereInfo *pWInfo, /* Where clause context */ + Index *pIdx, /* Index scan is using */ + int iCur, /* Cursor for IPK b-tree */ + int iIdxCur /* Index cursor */ +){ + Parse *pParse = pWInfo->pParse; /* Parse context */ + Vdbe *v = pParse->pVdbe; /* Vdbe to generate code within */ + + assert( iIdxCur>0 ); + assert( pIdx->aiColumn[pIdx->nColumn-1]==-1 ); + + pWInfo->bDeferredSeek = 1; + sqlite3VdbeAddOp3(v, OP_DeferredSeek, iIdxCur, 0, iCur); + if( (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) + && DbMaskAllZero(sqlite3ParseToplevel(pParse)->writeMask) + ){ + int i; + Table *pTab = pIdx->pTable; + u32 *ai = (u32*)sqlite3DbMallocZero(pParse->db, sizeof(u32)*(pTab->nCol+1)); + if( ai ){ + ai[0] = pTab->nCol; + for(i=0; inColumn-1; i++){ + int x1, x2; + assert( pIdx->aiColumn[i]nCol ); + x1 = pIdx->aiColumn[i]; + x2 = sqlite3TableColumnToStorage(pTab, x1); + testcase( x1!=x2 ); + if( x1>=0 ) ai[x2+1] = i+1; + } + sqlite3VdbeChangeP4(v, -1, (char*)ai, P4_INTARRAY); + } + } +} + +/* +** If the expression passed as the second argument is a vector, generate +** code to write the first nReg elements of the vector into an array +** of registers starting with iReg. +** +** If the expression is not a vector, then nReg must be passed 1. In +** this case, generate code to evaluate the expression and leave the +** result in register iReg. +*/ +static void codeExprOrVector(Parse *pParse, Expr *p, int iReg, int nReg){ + assert( nReg>0 ); + if( p && sqlite3ExprIsVector(p) ){ +#ifndef SQLITE_OMIT_SUBQUERY + if( ExprUseXSelect(p) ){ + Vdbe *v = pParse->pVdbe; + int iSelect; + assert( p->op==TK_SELECT ); + iSelect = sqlite3CodeSubselect(pParse, p); + sqlite3VdbeAddOp3(v, OP_Copy, iSelect, iReg, nReg-1); + }else +#endif + { + int i; + const ExprList *pList; + assert( ExprUseXList(p) ); + pList = p->x.pList; + assert( nReg<=pList->nExpr ); + for(i=0; ia[i].pExpr, iReg+i); + } + } + }else{ + assert( nReg==1 || pParse->nErr ); + sqlite3ExprCode(pParse, p, iReg); + } +} + +/* An instance of the IdxExprTrans object carries information about a +** mapping from an expression on table columns into a column in an index +** down through the Walker. +*/ +typedef struct IdxExprTrans { + Expr *pIdxExpr; /* The index expression */ + int iTabCur; /* The cursor of the corresponding table */ + int iIdxCur; /* The cursor for the index */ + int iIdxCol; /* The column for the index */ + int iTabCol; /* The column for the table */ + WhereInfo *pWInfo; /* Complete WHERE clause information */ + sqlite3 *db; /* Database connection (for malloc()) */ +} IdxExprTrans; + +/* +** Preserve pExpr on the WhereETrans list of the WhereInfo. +*/ +static void preserveExpr(IdxExprTrans *pTrans, Expr *pExpr){ + WhereExprMod *pNew; + pNew = sqlite3DbMallocRaw(pTrans->db, sizeof(*pNew)); + if( pNew==0 ) return; + pNew->pNext = pTrans->pWInfo->pExprMods; + pTrans->pWInfo->pExprMods = pNew; + pNew->pExpr = pExpr; + memcpy(&pNew->orig, pExpr, sizeof(*pExpr)); +} + +/* The walker node callback used to transform matching expressions into +** a reference to an index column for an index on an expression. +** +** If pExpr matches, then transform it into a reference to the index column +** that contains the value of pExpr. +*/ +static int whereIndexExprTransNode(Walker *p, Expr *pExpr){ + IdxExprTrans *pX = p->u.pIdxTrans; + if( sqlite3ExprCompare(0, pExpr, pX->pIdxExpr, pX->iTabCur)==0 ){ + pExpr = sqlite3ExprSkipCollate(pExpr); + preserveExpr(pX, pExpr); + pExpr->affExpr = sqlite3ExprAffinity(pExpr); + pExpr->op = TK_COLUMN; + pExpr->iTable = pX->iIdxCur; + pExpr->iColumn = pX->iIdxCol; + testcase( ExprHasProperty(pExpr, EP_Skip) ); + testcase( ExprHasProperty(pExpr, EP_Unlikely) ); + ExprClearProperty(pExpr, EP_Skip|EP_Unlikely|EP_WinFunc|EP_Subrtn); + pExpr->y.pTab = 0; + return WRC_Prune; + }else{ + return WRC_Continue; + } +} + +#ifndef SQLITE_OMIT_GENERATED_COLUMNS +/* A walker node callback that translates a column reference to a table +** into a corresponding column reference of an index. +*/ +static int whereIndexExprTransColumn(Walker *p, Expr *pExpr){ + if( pExpr->op==TK_COLUMN ){ + IdxExprTrans *pX = p->u.pIdxTrans; + if( pExpr->iTable==pX->iTabCur && pExpr->iColumn==pX->iTabCol ){ + assert( ExprUseYTab(pExpr) && pExpr->y.pTab!=0 ); + preserveExpr(pX, pExpr); + pExpr->affExpr = sqlite3TableColumnAffinity(pExpr->y.pTab,pExpr->iColumn); + pExpr->iTable = pX->iIdxCur; + pExpr->iColumn = pX->iIdxCol; + pExpr->y.pTab = 0; + } + } + return WRC_Continue; +} +#endif /* SQLITE_OMIT_GENERATED_COLUMNS */ + +/* +** For an indexes on expression X, locate every instance of expression X +** in pExpr and change that subexpression into a reference to the appropriate +** column of the index. +** +** 2019-10-24: Updated to also translate references to a VIRTUAL column in +** the table into references to the corresponding (stored) column of the +** index. +*/ +static void whereIndexExprTrans( + Index *pIdx, /* The Index */ + int iTabCur, /* Cursor of the table that is being indexed */ + int iIdxCur, /* Cursor of the index itself */ + WhereInfo *pWInfo /* Transform expressions in this WHERE clause */ +){ + int iIdxCol; /* Column number of the index */ + ExprList *aColExpr; /* Expressions that are indexed */ + Table *pTab; + Walker w; + IdxExprTrans x; + aColExpr = pIdx->aColExpr; + if( aColExpr==0 && !pIdx->bHasVCol ){ + /* The index does not reference any expressions or virtual columns + ** so no translations are needed. */ + return; + } + pTab = pIdx->pTable; + memset(&w, 0, sizeof(w)); + w.u.pIdxTrans = &x; + x.iTabCur = iTabCur; + x.iIdxCur = iIdxCur; + x.pWInfo = pWInfo; + x.db = pWInfo->pParse->db; + for(iIdxCol=0; iIdxColnColumn; iIdxCol++){ + i16 iRef = pIdx->aiColumn[iIdxCol]; + if( iRef==XN_EXPR ){ + assert( aColExpr!=0 && aColExpr->a[iIdxCol].pExpr!=0 ); + x.pIdxExpr = aColExpr->a[iIdxCol].pExpr; + if( sqlite3ExprIsConstant(x.pIdxExpr) ) continue; + w.xExprCallback = whereIndexExprTransNode; +#ifndef SQLITE_OMIT_GENERATED_COLUMNS + }else if( iRef>=0 + && (pTab->aCol[iRef].colFlags & COLFLAG_VIRTUAL)!=0 + && ((pTab->aCol[iRef].colFlags & COLFLAG_HASCOLL)==0 + || sqlite3StrICmp(sqlite3ColumnColl(&pTab->aCol[iRef]), + sqlite3StrBINARY)==0) + ){ + /* Check to see if there are direct references to generated columns + ** that are contained in the index. Pulling the generated column + ** out of the index is an optimization only - the main table is always + ** available if the index cannot be used. To avoid unnecessary + ** complication, omit this optimization if the collating sequence for + ** the column is non-standard */ + x.iTabCol = iRef; + w.xExprCallback = whereIndexExprTransColumn; +#endif /* SQLITE_OMIT_GENERATED_COLUMNS */ + }else{ + continue; + } + x.iIdxCol = iIdxCol; + sqlite3WalkExpr(&w, pWInfo->pWhere); + sqlite3WalkExprList(&w, pWInfo->pOrderBy); + sqlite3WalkExprList(&w, pWInfo->pResultSet); + } +} + +/* +** The pTruth expression is always true because it is the WHERE clause +** a partial index that is driving a query loop. Look through all of the +** WHERE clause terms on the query, and if any of those terms must be +** true because pTruth is true, then mark those WHERE clause terms as +** coded. +*/ +static void whereApplyPartialIndexConstraints( + Expr *pTruth, + int iTabCur, + WhereClause *pWC +){ + int i; + WhereTerm *pTerm; + while( pTruth->op==TK_AND ){ + whereApplyPartialIndexConstraints(pTruth->pLeft, iTabCur, pWC); + pTruth = pTruth->pRight; + } + for(i=0, pTerm=pWC->a; inTerm; i++, pTerm++){ + Expr *pExpr; + if( pTerm->wtFlags & TERM_CODED ) continue; + pExpr = pTerm->pExpr; + if( sqlite3ExprCompare(0, pExpr, pTruth, iTabCur)==0 ){ + pTerm->wtFlags |= TERM_CODED; + } + } +} + +/* +** This routine is called right after An OP_Filter has been generated and +** before the corresponding index search has been performed. This routine +** checks to see if there are additional Bloom filters in inner loops that +** can be checked prior to doing the index lookup. If there are available +** inner-loop Bloom filters, then evaluate those filters now, before the +** index lookup. The idea is that a Bloom filter check is way faster than +** an index lookup, and the Bloom filter might return false, meaning that +** the index lookup can be skipped. +** +** We know that an inner loop uses a Bloom filter because it has the +** WhereLevel.regFilter set. If an inner-loop Bloom filter is checked, +** then clear the WhereLevel.regFilter value to prevent the Bloom filter +** from being checked a second time when the inner loop is evaluated. +*/ +static SQLITE_NOINLINE void filterPullDown( + Parse *pParse, /* Parsing context */ + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + int addrNxt, /* Jump here to bypass inner loops */ + Bitmask notReady /* Loops that are not ready */ +){ + while( ++iLevel < pWInfo->nLevel ){ + WhereLevel *pLevel = &pWInfo->a[iLevel]; + WhereLoop *pLoop = pLevel->pWLoop; + if( pLevel->regFilter==0 ) continue; + if( pLevel->pWLoop->nSkip ) continue; + /* ,--- Because sqlite3ConstructBloomFilter() has will not have set + ** vvvvv--' pLevel->regFilter if this were true. */ + if( NEVER(pLoop->prereq & notReady) ) continue; + assert( pLevel->addrBrk==0 ); + pLevel->addrBrk = addrNxt; + if( pLoop->wsFlags & WHERE_IPK ){ + WhereTerm *pTerm = pLoop->aLTerm[0]; + int regRowid; + assert( pTerm!=0 ); + assert( pTerm->pExpr!=0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + regRowid = sqlite3GetTempReg(pParse); + regRowid = codeEqualityTerm(pParse, pTerm, pLevel, 0, 0, regRowid); + sqlite3VdbeAddOp4Int(pParse->pVdbe, OP_Filter, pLevel->regFilter, + addrNxt, regRowid, 1); + VdbeCoverage(pParse->pVdbe); + }else{ + u16 nEq = pLoop->u.btree.nEq; + int r1; + char *zStartAff; + + assert( pLoop->wsFlags & WHERE_INDEXED ); + assert( (pLoop->wsFlags & WHERE_COLUMN_IN)==0 ); + r1 = codeAllEqualityTerms(pParse,pLevel,0,0,&zStartAff); + codeApplyAffinity(pParse, r1, nEq, zStartAff); + sqlite3DbFree(pParse->db, zStartAff); + sqlite3VdbeAddOp4Int(pParse->pVdbe, OP_Filter, pLevel->regFilter, + addrNxt, r1, nEq); + VdbeCoverage(pParse->pVdbe); + } + pLevel->regFilter = 0; + pLevel->addrBrk = 0; + } +} + +/* +** Generate code for the start of the iLevel-th loop in the WHERE clause +** implementation described by pWInfo. +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart( + Parse *pParse, /* Parsing context */ + Vdbe *v, /* Prepared statement under construction */ + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + WhereLevel *pLevel, /* The current level pointer */ + Bitmask notReady /* Which tables are currently available */ +){ + int j, k; /* Loop counters */ + int iCur; /* The VDBE cursor for the table */ + int addrNxt; /* Where to jump to continue with the next IN case */ + int bRev; /* True if we need to scan in reverse order */ + WhereLoop *pLoop; /* The WhereLoop object being coded */ + WhereClause *pWC; /* Decomposition of the entire WHERE clause */ + WhereTerm *pTerm; /* A WHERE clause term */ + sqlite3 *db; /* Database connection */ + SrcItem *pTabItem; /* FROM clause term being coded */ + int addrBrk; /* Jump here to break out of the loop */ + int addrHalt; /* addrBrk for the outermost loop */ + int addrCont; /* Jump here to continue with next cycle */ + int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ + int iReleaseReg = 0; /* Temp register to free before returning */ + Index *pIdx = 0; /* Index used by loop (if any) */ + int iLoop; /* Iteration of constraint generator loop */ + + pWC = &pWInfo->sWC; + db = pParse->db; + pLoop = pLevel->pWLoop; + pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; + iCur = pTabItem->iCursor; + pLevel->notReady = notReady & ~sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur); + bRev = (pWInfo->revMask>>iLevel)&1; + VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName)); +#if WHERETRACE_ENABLED /* 0x20800 */ + if( sqlite3WhereTrace & 0x800 ){ + sqlite3DebugPrintf("Coding level %d of %d: notReady=%llx iFrom=%d\n", + iLevel, pWInfo->nLevel, (u64)notReady, pLevel->iFrom); + sqlite3WhereLoopPrint(pLoop, pWC); + } + if( sqlite3WhereTrace & 0x20000 ){ + if( iLevel==0 ){ + sqlite3DebugPrintf("WHERE clause being coded:\n"); + sqlite3TreeViewExpr(0, pWInfo->pWhere, 0); + } + sqlite3DebugPrintf("All WHERE-clause terms before coding:\n"); + sqlite3WhereClausePrint(pWC); + } +#endif + + /* Create labels for the "break" and "continue" instructions + ** for the current loop. Jump to addrBrk to break out of a loop. + ** Jump to cont to go immediately to the next iteration of the + ** loop. + ** + ** When there is an IN operator, we also have a "addrNxt" label that + ** means to continue with the next IN value combination. When + ** there are no IN operators in the constraints, the "addrNxt" label + ** is the same as "addrBrk". + */ + addrBrk = pLevel->addrBrk = pLevel->addrNxt = sqlite3VdbeMakeLabel(pParse); + addrCont = pLevel->addrCont = sqlite3VdbeMakeLabel(pParse); + + /* If this is the right table of a LEFT OUTER JOIN, allocate and + ** initialize a memory cell that records if this table matches any + ** row of the left table of the join. + */ + assert( (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) + || pLevel->iFrom>0 || (pTabItem[0].fg.jointype & JT_LEFT)==0 + ); + if( pLevel->iFrom>0 && (pTabItem[0].fg.jointype & JT_LEFT)!=0 ){ + pLevel->iLeftJoin = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, pLevel->iLeftJoin); + VdbeComment((v, "init LEFT JOIN no-match flag")); + } + + /* Compute a safe address to jump to if we discover that the table for + ** this loop is empty and can never contribute content. */ + for(j=iLevel; j>0 && pWInfo->a[j].iLeftJoin==0; j--){} + addrHalt = pWInfo->a[j].addrBrk; + + /* Special case of a FROM clause subquery implemented as a co-routine */ + if( pTabItem->fg.viaCoroutine ){ + int regYield = pTabItem->regReturn; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub); + pLevel->p2 = sqlite3VdbeAddOp2(v, OP_Yield, regYield, addrBrk); + VdbeCoverage(v); + VdbeComment((v, "next row of %s", pTabItem->pTab->zName)); + pLevel->op = OP_Goto; + }else + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){ + /* Case 1: The table is a virtual-table. Use the VFilter and VNext + ** to access the data. + */ + int iReg; /* P3 Value for OP_VFilter */ + int addrNotFound; + int nConstraint = pLoop->nLTerm; + + iReg = sqlite3GetTempRange(pParse, nConstraint+2); + addrNotFound = pLevel->addrBrk; + for(j=0; jaLTerm[j]; + if( NEVER(pTerm==0) ) continue; + if( pTerm->eOperator & WO_IN ){ + if( SMASKBIT32(j) & pLoop->u.vtab.mHandleIn ){ + int iTab = pParse->nTab++; + int iCache = ++pParse->nMem; + sqlite3CodeRhsOfIN(pParse, pTerm->pExpr, iTab); + sqlite3VdbeAddOp3(v, OP_VInitIn, iTab, iTarget, iCache); + }else{ + codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, iTarget); + addrNotFound = pLevel->addrNxt; + } + }else{ + Expr *pRight = pTerm->pExpr->pRight; + codeExprOrVector(pParse, pRight, iTarget, 1); + if( pTerm->eMatchOp==SQLITE_INDEX_CONSTRAINT_OFFSET + && pLoop->u.vtab.bOmitOffset + ){ + assert( pTerm->eOperator==WO_AUX ); + assert( pWInfo->pLimit!=0 ); + assert( pWInfo->pLimit->iOffset>0 ); + sqlite3VdbeAddOp2(v, OP_Integer, 0, pWInfo->pLimit->iOffset); + VdbeComment((v,"Zero OFFSET counter")); + } + } + } + sqlite3VdbeAddOp2(v, OP_Integer, pLoop->u.vtab.idxNum, iReg); + sqlite3VdbeAddOp2(v, OP_Integer, nConstraint, iReg+1); + sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, + pLoop->u.vtab.idxStr, + pLoop->u.vtab.needFree ? P4_DYNAMIC : P4_STATIC); + VdbeCoverage(v); + pLoop->u.vtab.needFree = 0; + /* An OOM inside of AddOp4(OP_VFilter) instruction above might have freed + ** the u.vtab.idxStr. NULL it out to prevent a use-after-free */ + if( db->mallocFailed ) pLoop->u.vtab.idxStr = 0; + pLevel->p1 = iCur; + pLevel->op = pWInfo->eOnePass ? OP_Noop : OP_VNext; + pLevel->p2 = sqlite3VdbeCurrentAddr(v); + assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 ); + + for(j=0; jaLTerm[j]; + if( j<16 && (pLoop->u.vtab.omitMask>>j)&1 ){ + disableTerm(pLevel, pTerm); + continue; + } + if( (pTerm->eOperator & WO_IN)!=0 + && (SMASKBIT32(j) & pLoop->u.vtab.mHandleIn)==0 + && !db->mallocFailed + ){ + Expr *pCompare; /* The comparison operator */ + Expr *pRight; /* RHS of the comparison */ + VdbeOp *pOp; /* Opcode to access the value of the IN constraint */ + int iIn; /* IN loop corresponding to the j-th constraint */ + + /* Reload the constraint value into reg[iReg+j+2]. The same value + ** was loaded into the same register prior to the OP_VFilter, but + ** the xFilter implementation might have changed the datatype or + ** encoding of the value in the register, so it *must* be reloaded. + */ + for(iIn=0; ALWAYS(iInu.in.nIn); iIn++){ + pOp = sqlite3VdbeGetOp(v, pLevel->u.in.aInLoop[iIn].addrInTop); + if( (pOp->opcode==OP_Column && pOp->p3==iReg+j+2) + || (pOp->opcode==OP_Rowid && pOp->p2==iReg+j+2) + ){ + testcase( pOp->opcode==OP_Rowid ); + sqlite3VdbeAddOp3(v, pOp->opcode, pOp->p1, pOp->p2, pOp->p3); + break; + } + } + + /* Generate code that will continue to the next row if + ** the IN constraint is not satisfied + */ + pCompare = sqlite3PExpr(pParse, TK_EQ, 0, 0); + if( !db->mallocFailed ){ + int iFld = pTerm->u.x.iField; + Expr *pLeft = pTerm->pExpr->pLeft; + assert( pLeft!=0 ); + if( iFld>0 ){ + assert( pLeft->op==TK_VECTOR ); + assert( ExprUseXList(pLeft) ); + assert( iFld<=pLeft->x.pList->nExpr ); + pCompare->pLeft = pLeft->x.pList->a[iFld-1].pExpr; + }else{ + pCompare->pLeft = pLeft; + } + pCompare->pRight = pRight = sqlite3Expr(db, TK_REGISTER, 0); + if( pRight ){ + pRight->iTable = iReg+j+2; + sqlite3ExprIfFalse( + pParse, pCompare, pLevel->addrCont, SQLITE_JUMPIFNULL + ); + } + pCompare->pLeft = 0; + } + sqlite3ExprDelete(db, pCompare); + } + } + + /* These registers need to be preserved in case there is an IN operator + ** loop. So we could deallocate the registers here (and potentially + ** reuse them later) if (pLoop->wsFlags & WHERE_IN_ABLE)==0. But it seems + ** simpler and safer to simply not reuse the registers. + ** + ** sqlite3ReleaseTempRange(pParse, iReg, nConstraint+2); + */ + }else +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + if( (pLoop->wsFlags & WHERE_IPK)!=0 + && (pLoop->wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_EQ))!=0 + ){ + /* Case 2: We can directly reference a single row using an + ** equality comparison against the ROWID field. Or + ** we reference multiple rows using a "rowid IN (...)" + ** construct. + */ + assert( pLoop->u.btree.nEq==1 ); + pTerm = pLoop->aLTerm[0]; + assert( pTerm!=0 ); + assert( pTerm->pExpr!=0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + iReleaseReg = ++pParse->nMem; + iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); + if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); + addrNxt = pLevel->addrNxt; + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt, + iRowidReg, 1); + VdbeCoverage(v); + filterPullDown(pParse, pWInfo, iLevel, addrNxt, notReady); + } + sqlite3VdbeAddOp3(v, OP_SeekRowid, iCur, addrNxt, iRowidReg); + VdbeCoverage(v); + pLevel->op = OP_Noop; + }else if( (pLoop->wsFlags & WHERE_IPK)!=0 + && (pLoop->wsFlags & WHERE_COLUMN_RANGE)!=0 + ){ + /* Case 3: We have an inequality comparison against the ROWID field. + */ + int testOp = OP_Noop; + int start; + int memEndValue = 0; + WhereTerm *pStart, *pEnd; + + j = 0; + pStart = pEnd = 0; + if( pLoop->wsFlags & WHERE_BTM_LIMIT ) pStart = pLoop->aLTerm[j++]; + if( pLoop->wsFlags & WHERE_TOP_LIMIT ) pEnd = pLoop->aLTerm[j++]; + assert( pStart!=0 || pEnd!=0 ); + if( bRev ){ + pTerm = pStart; + pStart = pEnd; + pEnd = pTerm; + } + codeCursorHint(pTabItem, pWInfo, pLevel, pEnd); + if( pStart ){ + Expr *pX; /* The expression that defines the start bound */ + int r1, rTemp; /* Registers for holding the start boundary */ + int op; /* Cursor seek operation */ + + /* The following constant maps TK_xx codes into corresponding + ** seek opcodes. It depends on a particular ordering of TK_xx + */ + const u8 aMoveOp[] = { + /* TK_GT */ OP_SeekGT, + /* TK_LE */ OP_SeekLE, + /* TK_LT */ OP_SeekLT, + /* TK_GE */ OP_SeekGE + }; + assert( TK_LE==TK_GT+1 ); /* Make sure the ordering.. */ + assert( TK_LT==TK_GT+2 ); /* ... of the TK_xx values... */ + assert( TK_GE==TK_GT+3 ); /* ... is correcct. */ + + assert( (pStart->wtFlags & TERM_VNULL)==0 ); + testcase( pStart->wtFlags & TERM_VIRTUAL ); + pX = pStart->pExpr; + assert( pX!=0 ); + testcase( pStart->leftCursor!=iCur ); /* transitive constraints */ + if( sqlite3ExprIsVector(pX->pRight) ){ + r1 = rTemp = sqlite3GetTempReg(pParse); + codeExprOrVector(pParse, pX->pRight, r1, 1); + testcase( pX->op==TK_GT ); + testcase( pX->op==TK_GE ); + testcase( pX->op==TK_LT ); + testcase( pX->op==TK_LE ); + op = aMoveOp[((pX->op - TK_GT - 1) & 0x3) | 0x1]; + assert( pX->op!=TK_GT || op==OP_SeekGE ); + assert( pX->op!=TK_GE || op==OP_SeekGE ); + assert( pX->op!=TK_LT || op==OP_SeekLE ); + assert( pX->op!=TK_LE || op==OP_SeekLE ); + }else{ + r1 = sqlite3ExprCodeTemp(pParse, pX->pRight, &rTemp); + disableTerm(pLevel, pStart); + op = aMoveOp[(pX->op - TK_GT)]; + } + sqlite3VdbeAddOp3(v, op, iCur, addrBrk, r1); + VdbeComment((v, "pk")); + VdbeCoverageIf(v, pX->op==TK_GT); + VdbeCoverageIf(v, pX->op==TK_LE); + VdbeCoverageIf(v, pX->op==TK_LT); + VdbeCoverageIf(v, pX->op==TK_GE); + sqlite3ReleaseTempReg(pParse, rTemp); + }else{ + sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iCur, addrHalt); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + } + if( pEnd ){ + Expr *pX; + pX = pEnd->pExpr; + assert( pX!=0 ); + assert( (pEnd->wtFlags & TERM_VNULL)==0 ); + testcase( pEnd->leftCursor!=iCur ); /* Transitive constraints */ + testcase( pEnd->wtFlags & TERM_VIRTUAL ); + memEndValue = ++pParse->nMem; + codeExprOrVector(pParse, pX->pRight, memEndValue, 1); + if( 0==sqlite3ExprIsVector(pX->pRight) + && (pX->op==TK_LT || pX->op==TK_GT) + ){ + testOp = bRev ? OP_Le : OP_Ge; + }else{ + testOp = bRev ? OP_Lt : OP_Gt; + } + if( 0==sqlite3ExprIsVector(pX->pRight) ){ + disableTerm(pLevel, pEnd); + } + } + start = sqlite3VdbeCurrentAddr(v); + pLevel->op = bRev ? OP_Prev : OP_Next; + pLevel->p1 = iCur; + pLevel->p2 = start; + assert( pLevel->p5==0 ); + if( testOp!=OP_Noop ){ + iRowidReg = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Rowid, iCur, iRowidReg); + sqlite3VdbeAddOp3(v, testOp, memEndValue, addrBrk, iRowidReg); + VdbeCoverageIf(v, testOp==OP_Le); + VdbeCoverageIf(v, testOp==OP_Lt); + VdbeCoverageIf(v, testOp==OP_Ge); + VdbeCoverageIf(v, testOp==OP_Gt); + sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC | SQLITE_JUMPIFNULL); + } + }else if( pLoop->wsFlags & WHERE_INDEXED ){ + /* Case 4: A scan using an index. + ** + ** The WHERE clause may contain zero or more equality + ** terms ("==" or "IN" operators) that refer to the N + ** left-most columns of the index. It may also contain + ** inequality constraints (>, <, >= or <=) on the indexed + ** column that immediately follows the N equalities. Only + ** the right-most column can be an inequality - the rest must + ** use the "==" and "IN" operators. For example, if the + ** index is on (x,y,z), then the following clauses are all + ** optimized: + ** + ** x=5 + ** x=5 AND y=10 + ** x=5 AND y<10 + ** x=5 AND y>5 AND y<10 + ** x=5 AND y=5 AND z<=10 + ** + ** The z<10 term of the following cannot be used, only + ** the x=5 term: + ** + ** x=5 AND z<10 + ** + ** N may be zero if there are inequality constraints. + ** If there are no inequality constraints, then N is at + ** least one. + ** + ** This case is also used when there are no WHERE clause + ** constraints but an index is selected anyway, in order + ** to force the output order to conform to an ORDER BY. + */ + static const u8 aStartOp[] = { + 0, + 0, + OP_Rewind, /* 2: (!start_constraints && startEq && !bRev) */ + OP_Last, /* 3: (!start_constraints && startEq && bRev) */ + OP_SeekGT, /* 4: (start_constraints && !startEq && !bRev) */ + OP_SeekLT, /* 5: (start_constraints && !startEq && bRev) */ + OP_SeekGE, /* 6: (start_constraints && startEq && !bRev) */ + OP_SeekLE /* 7: (start_constraints && startEq && bRev) */ + }; + static const u8 aEndOp[] = { + OP_IdxGE, /* 0: (end_constraints && !bRev && !endEq) */ + OP_IdxGT, /* 1: (end_constraints && !bRev && endEq) */ + OP_IdxLE, /* 2: (end_constraints && bRev && !endEq) */ + OP_IdxLT, /* 3: (end_constraints && bRev && endEq) */ + }; + u16 nEq = pLoop->u.btree.nEq; /* Number of == or IN terms */ + u16 nBtm = pLoop->u.btree.nBtm; /* Length of BTM vector */ + u16 nTop = pLoop->u.btree.nTop; /* Length of TOP vector */ + int regBase; /* Base register holding constraint values */ + WhereTerm *pRangeStart = 0; /* Inequality constraint at range start */ + WhereTerm *pRangeEnd = 0; /* Inequality constraint at range end */ + int startEq; /* True if range start uses ==, >= or <= */ + int endEq; /* True if range end uses ==, >= or <= */ + int start_constraints; /* Start of range is constrained */ + int nConstraint; /* Number of constraint terms */ + int iIdxCur; /* The VDBE cursor for the index */ + int nExtraReg = 0; /* Number of extra registers needed */ + int op; /* Instruction opcode */ + char *zStartAff; /* Affinity for start of range constraint */ + char *zEndAff = 0; /* Affinity for end of range constraint */ + u8 bSeekPastNull = 0; /* True to seek past initial nulls */ + u8 bStopAtNull = 0; /* Add condition to terminate at NULLs */ + int omitTable; /* True if we use the index only */ + int regBignull = 0; /* big-null flag register */ + int addrSeekScan = 0; /* Opcode of the OP_SeekScan, if any */ + + pIdx = pLoop->u.btree.pIndex; + iIdxCur = pLevel->iIdxCur; + assert( nEq>=pLoop->nSkip ); + + /* Find any inequality constraint terms for the start and end + ** of the range. + */ + j = nEq; + if( pLoop->wsFlags & WHERE_BTM_LIMIT ){ + pRangeStart = pLoop->aLTerm[j++]; + nExtraReg = MAX(nExtraReg, pLoop->u.btree.nBtm); + /* Like optimization range constraints always occur in pairs */ + assert( (pRangeStart->wtFlags & TERM_LIKEOPT)==0 || + (pLoop->wsFlags & WHERE_TOP_LIMIT)!=0 ); + } + if( pLoop->wsFlags & WHERE_TOP_LIMIT ){ + pRangeEnd = pLoop->aLTerm[j++]; + nExtraReg = MAX(nExtraReg, pLoop->u.btree.nTop); +#ifndef SQLITE_LIKE_DOESNT_MATCH_BLOBS + if( (pRangeEnd->wtFlags & TERM_LIKEOPT)!=0 ){ + assert( pRangeStart!=0 ); /* LIKE opt constraints */ + assert( pRangeStart->wtFlags & TERM_LIKEOPT ); /* occur in pairs */ + pLevel->iLikeRepCntr = (u32)++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 1, (int)pLevel->iLikeRepCntr); + VdbeComment((v, "LIKE loop counter")); + pLevel->addrLikeRep = sqlite3VdbeCurrentAddr(v); + /* iLikeRepCntr actually stores 2x the counter register number. The + ** bottom bit indicates whether the search order is ASC or DESC. */ + testcase( bRev ); + testcase( pIdx->aSortOrder[nEq]==SQLITE_SO_DESC ); + assert( (bRev & ~1)==0 ); + pLevel->iLikeRepCntr <<=1; + pLevel->iLikeRepCntr |= bRev ^ (pIdx->aSortOrder[nEq]==SQLITE_SO_DESC); + } +#endif + if( pRangeStart==0 ){ + j = pIdx->aiColumn[nEq]; + if( (j>=0 && pIdx->pTable->aCol[j].notNull==0) || j==XN_EXPR ){ + bSeekPastNull = 1; + } + } + } + assert( pRangeEnd==0 || (pRangeEnd->wtFlags & TERM_VNULL)==0 ); + + /* If the WHERE_BIGNULL_SORT flag is set, then index column nEq uses + ** a non-default "big-null" sort (either ASC NULLS LAST or DESC NULLS + ** FIRST). In both cases separate ordered scans are made of those + ** index entries for which the column is null and for those for which + ** it is not. For an ASC sort, the non-NULL entries are scanned first. + ** For DESC, NULL entries are scanned first. + */ + if( (pLoop->wsFlags & (WHERE_TOP_LIMIT|WHERE_BTM_LIMIT))==0 + && (pLoop->wsFlags & WHERE_BIGNULL_SORT)!=0 + ){ + assert( bSeekPastNull==0 && nExtraReg==0 && nBtm==0 && nTop==0 ); + assert( pRangeEnd==0 && pRangeStart==0 ); + testcase( pLoop->nSkip>0 ); + nExtraReg = 1; + bSeekPastNull = 1; + pLevel->regBignull = regBignull = ++pParse->nMem; + if( pLevel->iLeftJoin ){ + sqlite3VdbeAddOp2(v, OP_Integer, 0, regBignull); + } + pLevel->addrBignull = sqlite3VdbeMakeLabel(pParse); + } + + /* If we are doing a reverse order scan on an ascending index, or + ** a forward order scan on a descending index, interchange the + ** start and end terms (pRangeStart and pRangeEnd). + */ + if( (nEqnColumn && bRev==(pIdx->aSortOrder[nEq]==SQLITE_SO_ASC)) ){ + SWAP(WhereTerm *, pRangeEnd, pRangeStart); + SWAP(u8, bSeekPastNull, bStopAtNull); + SWAP(u8, nBtm, nTop); + } + + if( iLevel>0 && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)!=0 ){ + /* In case OP_SeekScan is used, ensure that the index cursor does not + ** point to a valid row for the first iteration of this loop. */ + sqlite3VdbeAddOp1(v, OP_NullRow, iIdxCur); + } + + /* Generate code to evaluate all constraint terms using == or IN + ** and store the values of those terms in an array of registers + ** starting at regBase. + */ + codeCursorHint(pTabItem, pWInfo, pLevel, pRangeEnd); + regBase = codeAllEqualityTerms(pParse,pLevel,bRev,nExtraReg,&zStartAff); + assert( zStartAff==0 || sqlite3Strlen30(zStartAff)>=nEq ); + if( zStartAff && nTop ){ + zEndAff = sqlite3DbStrDup(db, &zStartAff[nEq]); + } + addrNxt = (regBignull ? pLevel->addrBignull : pLevel->addrNxt); + + testcase( pRangeStart && (pRangeStart->eOperator & WO_LE)!=0 ); + testcase( pRangeStart && (pRangeStart->eOperator & WO_GE)!=0 ); + testcase( pRangeEnd && (pRangeEnd->eOperator & WO_LE)!=0 ); + testcase( pRangeEnd && (pRangeEnd->eOperator & WO_GE)!=0 ); + startEq = !pRangeStart || pRangeStart->eOperator & (WO_LE|WO_GE); + endEq = !pRangeEnd || pRangeEnd->eOperator & (WO_LE|WO_GE); + start_constraints = pRangeStart || nEq>0; + + /* Seek the index cursor to the start of the range. */ + nConstraint = nEq; + if( pRangeStart ){ + Expr *pRight = pRangeStart->pExpr->pRight; + codeExprOrVector(pParse, pRight, regBase+nEq, nBtm); + whereLikeOptimizationStringFixup(v, pLevel, pRangeStart); + if( (pRangeStart->wtFlags & TERM_VNULL)==0 + && sqlite3ExprCanBeNull(pRight) + ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); + VdbeCoverage(v); + } + if( zStartAff ){ + updateRangeAffinityStr(pRight, nBtm, &zStartAff[nEq]); + } + nConstraint += nBtm; + testcase( pRangeStart->wtFlags & TERM_VIRTUAL ); + if( sqlite3ExprIsVector(pRight)==0 ){ + disableTerm(pLevel, pRangeStart); + }else{ + startEq = 1; + } + bSeekPastNull = 0; + }else if( bSeekPastNull ){ + startEq = 0; + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + start_constraints = 1; + nConstraint++; + }else if( regBignull ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + start_constraints = 1; + nConstraint++; + } + codeApplyAffinity(pParse, regBase, nConstraint - bSeekPastNull, zStartAff); + if( pLoop->nSkip>0 && nConstraint==pLoop->nSkip ){ + /* The skip-scan logic inside the call to codeAllEqualityConstraints() + ** above has already left the cursor sitting on the correct row, + ** so no further seeking is needed */ + }else{ + if( regBignull ){ + sqlite3VdbeAddOp2(v, OP_Integer, 1, regBignull); + VdbeComment((v, "NULL-scan pass ctr")); + } + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt, + regBase, nEq); + VdbeCoverage(v); + filterPullDown(pParse, pWInfo, iLevel, addrNxt, notReady); + } + + op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; + assert( op!=0 ); + if( (pLoop->wsFlags & WHERE_IN_SEEKSCAN)!=0 && op==OP_SeekGE ){ + assert( regBignull==0 ); + /* TUNING: The OP_SeekScan opcode seeks to reduce the number + ** of expensive seek operations by replacing a single seek with + ** 1 or more step operations. The question is, how many steps + ** should we try before giving up and going with a seek. The cost + ** of a seek is proportional to the logarithm of the of the number + ** of entries in the tree, so basing the number of steps to try + ** on the estimated number of rows in the btree seems like a good + ** guess. */ + addrSeekScan = sqlite3VdbeAddOp1(v, OP_SeekScan, + (pIdx->aiRowLogEst[0]+9)/10); + VdbeCoverage(v); + } + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); + VdbeCoverage(v); + VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); + VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); + VdbeCoverageIf(v, op==OP_SeekGT); testcase( op==OP_SeekGT ); + VdbeCoverageIf(v, op==OP_SeekGE); testcase( op==OP_SeekGE ); + VdbeCoverageIf(v, op==OP_SeekLE); testcase( op==OP_SeekLE ); + VdbeCoverageIf(v, op==OP_SeekLT); testcase( op==OP_SeekLT ); + + assert( bSeekPastNull==0 || bStopAtNull==0 ); + if( regBignull ){ + assert( bSeekPastNull==1 || bStopAtNull==1 ); + assert( bSeekPastNull==!bStopAtNull ); + assert( bStopAtNull==startEq ); + sqlite3VdbeAddOp2(v, OP_Goto, 0, sqlite3VdbeCurrentAddr(v)+2); + op = aStartOp[(nConstraint>1)*4 + 2 + bRev]; + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, + nConstraint-startEq); + VdbeCoverage(v); + VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); + VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); + VdbeCoverageIf(v, op==OP_SeekGE); testcase( op==OP_SeekGE ); + VdbeCoverageIf(v, op==OP_SeekLE); testcase( op==OP_SeekLE ); + assert( op==OP_Rewind || op==OP_Last || op==OP_SeekGE || op==OP_SeekLE); + } + } + + /* Load the value for the inequality constraint at the end of the + ** range (if any). + */ + nConstraint = nEq; + assert( pLevel->p2==0 ); + if( pRangeEnd ){ + Expr *pRight = pRangeEnd->pExpr->pRight; + if( addrSeekScan ){ + /* For a seek-scan that has a range on the lowest term of the index, + ** we have to make the top of the loop be code that sets the end + ** condition of the range. Otherwise, the OP_SeekScan might jump + ** over that initialization, leaving the range-end value set to the + ** range-start value, resulting in a wrong answer. + ** See ticket 5981a8c041a3c2f3 (2021-11-02). + */ + pLevel->p2 = sqlite3VdbeCurrentAddr(v); + } + codeExprOrVector(pParse, pRight, regBase+nEq, nTop); + whereLikeOptimizationStringFixup(v, pLevel, pRangeEnd); + if( (pRangeEnd->wtFlags & TERM_VNULL)==0 + && sqlite3ExprCanBeNull(pRight) + ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); + VdbeCoverage(v); + } + if( zEndAff ){ + updateRangeAffinityStr(pRight, nTop, zEndAff); + codeApplyAffinity(pParse, regBase+nEq, nTop, zEndAff); + }else{ + assert( pParse->db->mallocFailed ); + } + nConstraint += nTop; + testcase( pRangeEnd->wtFlags & TERM_VIRTUAL ); + + if( sqlite3ExprIsVector(pRight)==0 ){ + disableTerm(pLevel, pRangeEnd); + }else{ + endEq = 1; + } + }else if( bStopAtNull ){ + if( regBignull==0 ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + endEq = 0; + } + nConstraint++; + } + sqlite3DbFree(db, zStartAff); + sqlite3DbFree(db, zEndAff); + + /* Top of the loop body */ + if( pLevel->p2==0 ) pLevel->p2 = sqlite3VdbeCurrentAddr(v); + + /* Check if the index cursor is past the end of the range. */ + if( nConstraint ){ + if( regBignull ){ + /* Except, skip the end-of-range check while doing the NULL-scan */ + sqlite3VdbeAddOp2(v, OP_IfNot, regBignull, sqlite3VdbeCurrentAddr(v)+3); + VdbeComment((v, "If NULL-scan 2nd pass")); + VdbeCoverage(v); + } + op = aEndOp[bRev*2 + endEq]; + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); + testcase( op==OP_IdxGT ); VdbeCoverageIf(v, op==OP_IdxGT ); + testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); + testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); + testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); + if( addrSeekScan ) sqlite3VdbeJumpHere(v, addrSeekScan); + } + if( regBignull ){ + /* During a NULL-scan, check to see if we have reached the end of + ** the NULLs */ + assert( bSeekPastNull==!bStopAtNull ); + assert( bSeekPastNull+bStopAtNull==1 ); + assert( nConstraint+bSeekPastNull>0 ); + sqlite3VdbeAddOp2(v, OP_If, regBignull, sqlite3VdbeCurrentAddr(v)+2); + VdbeComment((v, "If NULL-scan 1st pass")); + VdbeCoverage(v); + op = aEndOp[bRev*2 + bSeekPastNull]; + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, + nConstraint+bSeekPastNull); + testcase( op==OP_IdxGT ); VdbeCoverageIf(v, op==OP_IdxGT ); + testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); + testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); + testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); + } + + if( (pLoop->wsFlags & WHERE_IN_EARLYOUT)!=0 ){ + sqlite3VdbeAddOp3(v, OP_SeekHit, iIdxCur, nEq, nEq); + } + + /* Seek the table cursor, if required */ + omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 + && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE)==0; + if( omitTable ){ + /* pIdx is a covering index. No need to access the main table. */ + }else if( HasRowid(pIdx->pTable) ){ + codeDeferredSeek(pWInfo, pIdx, iCur, iIdxCur); + }else if( iCur!=iIdxCur ){ + Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); + iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol); + for(j=0; jnKeyCol; j++){ + k = sqlite3TableColumnToIndex(pIdx, pPk->aiColumn[j]); + sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, iRowidReg+j); + } + sqlite3VdbeAddOp4Int(v, OP_NotFound, iCur, addrCont, + iRowidReg, pPk->nKeyCol); VdbeCoverage(v); + } + + if( pLevel->iLeftJoin==0 ){ + /* If pIdx is an index on one or more expressions, then look through + ** all the expressions in pWInfo and try to transform matching expressions + ** into reference to index columns. Also attempt to translate references + ** to virtual columns in the table into references to (stored) columns + ** of the index. + ** + ** Do not do this for the RHS of a LEFT JOIN. This is because the + ** expression may be evaluated after OP_NullRow has been executed on + ** the cursor. In this case it is important to do the full evaluation, + ** as the result of the expression may not be NULL, even if all table + ** column values are. https://www.sqlite.org/src/info/7fa8049685b50b5a + ** + ** Also, do not do this when processing one index an a multi-index + ** OR clause, since the transformation will become invalid once we + ** move forward to the next index. + ** https://sqlite.org/src/info/4e8e4857d32d401f + */ + if( (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE)==0 ){ + whereIndexExprTrans(pIdx, iCur, iIdxCur, pWInfo); + } + + /* If a partial index is driving the loop, try to eliminate WHERE clause + ** terms from the query that must be true due to the WHERE clause of + ** the partial index. + ** + ** 2019-11-02 ticket 623eff57e76d45f6: This optimization does not work + ** for a LEFT JOIN. + */ + if( pIdx->pPartIdxWhere ){ + whereApplyPartialIndexConstraints(pIdx->pPartIdxWhere, iCur, pWC); + } + }else{ + testcase( pIdx->pPartIdxWhere ); + /* The following assert() is not a requirement, merely an observation: + ** The OR-optimization doesn't work for the right hand table of + ** a LEFT JOIN: */ + assert( (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE)==0 ); + } + + /* Record the instruction used to terminate the loop. */ + if( pLoop->wsFlags & WHERE_ONEROW ){ + pLevel->op = OP_Noop; + }else if( bRev ){ + pLevel->op = OP_Prev; + }else{ + pLevel->op = OP_Next; + } + pLevel->p1 = iIdxCur; + pLevel->p3 = (pLoop->wsFlags&WHERE_UNQ_WANTED)!=0 ? 1:0; + if( (pLoop->wsFlags & WHERE_CONSTRAINT)==0 ){ + pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + }else{ + assert( pLevel->p5==0 ); + } + if( omitTable ) pIdx = 0; + }else + +#ifndef SQLITE_OMIT_OR_OPTIMIZATION + if( pLoop->wsFlags & WHERE_MULTI_OR ){ + /* Case 5: Two or more separately indexed terms connected by OR + ** + ** Example: + ** + ** CREATE TABLE t1(a,b,c,d); + ** CREATE INDEX i1 ON t1(a); + ** CREATE INDEX i2 ON t1(b); + ** CREATE INDEX i3 ON t1(c); + ** + ** SELECT * FROM t1 WHERE a=5 OR b=7 OR (c=11 AND d=13) + ** + ** In the example, there are three indexed terms connected by OR. + ** The top of the loop looks like this: + ** + ** Null 1 # Zero the rowset in reg 1 + ** + ** Then, for each indexed term, the following. The arguments to + ** RowSetTest are such that the rowid of the current row is inserted + ** into the RowSet. If it is already present, control skips the + ** Gosub opcode and jumps straight to the code generated by WhereEnd(). + ** + ** sqlite3WhereBegin() + ** RowSetTest # Insert rowid into rowset + ** Gosub 2 A + ** sqlite3WhereEnd() + ** + ** Following the above, code to terminate the loop. Label A, the target + ** of the Gosub above, jumps to the instruction right after the Goto. + ** + ** Null 1 # Zero the rowset in reg 1 + ** Goto B # The loop is finished. + ** + ** A: # Return data, whatever. + ** + ** Return 2 # Jump back to the Gosub + ** + ** B: + ** + ** Added 2014-05-26: If the table is a WITHOUT ROWID table, then + ** use an ephemeral index instead of a RowSet to record the primary + ** keys of the rows we have already seen. + ** + */ + WhereClause *pOrWc; /* The OR-clause broken out into subterms */ + SrcList *pOrTab; /* Shortened table list or OR-clause generation */ + Index *pCov = 0; /* Potential covering index (or NULL) */ + int iCovCur = pParse->nTab++; /* Cursor used for index scans (if any) */ + + int regReturn = ++pParse->nMem; /* Register used with OP_Gosub */ + int regRowset = 0; /* Register for RowSet object */ + int regRowid = 0; /* Register holding rowid */ + int iLoopBody = sqlite3VdbeMakeLabel(pParse);/* Start of loop body */ + int iRetInit; /* Address of regReturn init */ + int untestedTerms = 0; /* Some terms not completely tested */ + int ii; /* Loop counter */ + Expr *pAndExpr = 0; /* An ".. AND (...)" expression */ + Table *pTab = pTabItem->pTab; + + pTerm = pLoop->aLTerm[0]; + assert( pTerm!=0 ); + assert( pTerm->eOperator & WO_OR ); + assert( (pTerm->wtFlags & TERM_ORINFO)!=0 ); + pOrWc = &pTerm->u.pOrInfo->wc; + pLevel->op = OP_Return; + pLevel->p1 = regReturn; + + /* Set up a new SrcList in pOrTab containing the table being scanned + ** by this loop in the a[0] slot and all notReady tables in a[1..] slots. + ** This becomes the SrcList in the recursive call to sqlite3WhereBegin(). + */ + if( pWInfo->nLevel>1 ){ + int nNotReady; /* The number of notReady tables */ + SrcItem *origSrc; /* Original list of tables */ + nNotReady = pWInfo->nLevel - iLevel - 1; + pOrTab = sqlite3StackAllocRaw(db, + sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0])); + if( pOrTab==0 ) return notReady; + pOrTab->nAlloc = (u8)(nNotReady + 1); + pOrTab->nSrc = pOrTab->nAlloc; + memcpy(pOrTab->a, pTabItem, sizeof(*pTabItem)); + origSrc = pWInfo->pTabList->a; + for(k=1; k<=nNotReady; k++){ + memcpy(&pOrTab->a[k], &origSrc[pLevel[k].iFrom], sizeof(pOrTab->a[k])); + } + }else{ + pOrTab = pWInfo->pTabList; + } + + /* Initialize the rowset register to contain NULL. An SQL NULL is + ** equivalent to an empty rowset. Or, create an ephemeral index + ** capable of holding primary keys in the case of a WITHOUT ROWID. + ** + ** Also initialize regReturn to contain the address of the instruction + ** immediately following the OP_Return at the bottom of the loop. This + ** is required in a few obscure LEFT JOIN cases where control jumps + ** over the top of the loop into the body of it. In this case the + ** correct response for the end-of-loop code (the OP_Return) is to + ** fall through to the next instruction, just as an OP_Next does if + ** called on an uninitialized cursor. + */ + if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ + if( HasRowid(pTab) ){ + regRowset = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Null, 0, regRowset); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + regRowset = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, regRowset, pPk->nKeyCol); + sqlite3VdbeSetP4KeyInfo(pParse, pPk); + } + regRowid = ++pParse->nMem; + } + iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn); + + /* If the original WHERE clause is z of the form: (x1 OR x2 OR ...) AND y + ** Then for every term xN, evaluate as the subexpression: xN AND y + ** That way, terms in y that are factored into the disjunction will + ** be picked up by the recursive calls to sqlite3WhereBegin() below. + ** + ** Actually, each subexpression is converted to "xN AND w" where w is + ** the "interesting" terms of z - terms that did not originate in the + ** ON or USING clause of a LEFT JOIN, and terms that are usable as + ** indices. + ** + ** This optimization also only applies if the (x1 OR x2 OR ...) term + ** is not contained in the ON clause of a LEFT JOIN. + ** See ticket http://www.sqlite.org/src/info/f2369304e4 + ** + ** 2022-02-04: Do not push down slices of a row-value comparison. + ** In other words, "w" or "y" may not be a slice of a vector. Otherwise, + ** the initialization of the right-hand operand of the vector comparison + ** might not occur, or might occur only in an OR branch that is not + ** taken. dbsqlfuzz 80a9fade844b4fb43564efc972bcb2c68270f5d1. + ** + ** 2022-03-03: Do not push down expressions that involve subqueries. + ** The subquery might get coded as a subroutine. Any table-references + ** in the subquery might be resolved to index-references for the index on + ** the OR branch in which the subroutine is coded. But if the subroutine + ** is invoked from a different OR branch that uses a different index, such + ** index-references will not work. tag-20220303a + ** https://sqlite.org/forum/forumpost/36937b197273d403 + */ + if( pWC->nTerm>1 ){ + int iTerm; + for(iTerm=0; iTermnTerm; iTerm++){ + Expr *pExpr = pWC->a[iTerm].pExpr; + if( &pWC->a[iTerm] == pTerm ) continue; + testcase( pWC->a[iTerm].wtFlags & TERM_VIRTUAL ); + testcase( pWC->a[iTerm].wtFlags & TERM_CODED ); + testcase( pWC->a[iTerm].wtFlags & TERM_SLICE ); + if( (pWC->a[iTerm].wtFlags & (TERM_VIRTUAL|TERM_CODED|TERM_SLICE))!=0 ){ + continue; + } + if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue; + if( ExprHasProperty(pExpr, EP_Subquery) ) continue; /* tag-20220303a */ + pExpr = sqlite3ExprDup(db, pExpr, 0); + pAndExpr = sqlite3ExprAnd(pParse, pAndExpr, pExpr); + } + if( pAndExpr ){ + /* The extra 0x10000 bit on the opcode is masked off and does not + ** become part of the new Expr.op. However, it does make the + ** op==TK_AND comparison inside of sqlite3PExpr() false, and this + ** prevents sqlite3PExpr() from applying the AND short-circuit + ** optimization, which we do not want here. */ + pAndExpr = sqlite3PExpr(pParse, TK_AND|0x10000, 0, pAndExpr); + } + } + + /* Run a separate WHERE clause for each term of the OR clause. After + ** eliminating duplicates from other WHERE clauses, the action for each + ** sub-WHERE clause is to to invoke the main loop body as a subroutine. + */ + ExplainQueryPlan((pParse, 1, "MULTI-INDEX OR")); + for(ii=0; iinTerm; ii++){ + WhereTerm *pOrTerm = &pOrWc->a[ii]; + if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){ + WhereInfo *pSubWInfo; /* Info for single OR-term scan */ + Expr *pOrExpr = pOrTerm->pExpr; /* Current OR clause term */ + Expr *pDelete; /* Local copy of OR clause term */ + int jmp1 = 0; /* Address of jump operation */ + testcase( (pTabItem[0].fg.jointype & JT_LEFT)!=0 + && !ExprHasProperty(pOrExpr, EP_FromJoin) + ); /* See TH3 vtab25.400 and ticket 614b25314c766238 */ + pDelete = pOrExpr = sqlite3ExprDup(db, pOrExpr, 0); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pDelete); + continue; + } + if( pAndExpr ){ + pAndExpr->pLeft = pOrExpr; + pOrExpr = pAndExpr; + } + /* Loop through table entries that match term pOrTerm. */ + ExplainQueryPlan((pParse, 1, "INDEX %d", ii+1)); + WHERETRACE(0xffff, ("Subplan for OR-clause:\n")); + pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0, 0, + WHERE_OR_SUBCLAUSE, iCovCur); + assert( pSubWInfo || pParse->nErr ); + if( pSubWInfo ){ + WhereLoop *pSubLoop; + int addrExplain = sqlite3WhereExplainOneScan( + pParse, pOrTab, &pSubWInfo->a[0], 0 + ); + sqlite3WhereAddScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain); + + /* This is the sub-WHERE clause body. First skip over + ** duplicate rows from prior sub-WHERE clauses, and record the + ** rowid (or PRIMARY KEY) for the current row so that the same + ** row will be skipped in subsequent sub-WHERE clauses. + */ + if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ + int iSet = ((ii==pOrWc->nTerm-1)?-1:ii); + if( HasRowid(pTab) ){ + sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, -1, regRowid); + jmp1 = sqlite3VdbeAddOp4Int(v, OP_RowSetTest, regRowset, 0, + regRowid, iSet); + VdbeCoverage(v); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + int nPk = pPk->nKeyCol; + int iPk; + int r; + + /* Read the PK into an array of temp registers. */ + r = sqlite3GetTempRange(pParse, nPk); + for(iPk=0; iPkaiColumn[iPk]; + sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, iCol,r+iPk); + } + + /* Check if the temp table already contains this key. If so, + ** the row has already been included in the result set and + ** can be ignored (by jumping past the Gosub below). Otherwise, + ** insert the key into the temp table and proceed with processing + ** the row. + ** + ** Use some of the same optimizations as OP_RowSetTest: If iSet + ** is zero, assume that the key cannot already be present in + ** the temp table. And if iSet is -1, assume that there is no + ** need to insert the key into the temp table, as it will never + ** be tested for. */ + if( iSet ){ + jmp1 = sqlite3VdbeAddOp4Int(v, OP_Found, regRowset, 0, r, nPk); + VdbeCoverage(v); + } + if( iSet>=0 ){ + sqlite3VdbeAddOp3(v, OP_MakeRecord, r, nPk, regRowid); + sqlite3VdbeAddOp4Int(v, OP_IdxInsert, regRowset, regRowid, + r, nPk); + if( iSet ) sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + } + + /* Release the array of temp registers */ + sqlite3ReleaseTempRange(pParse, r, nPk); + } + } + + /* Invoke the main loop body as a subroutine */ + sqlite3VdbeAddOp2(v, OP_Gosub, regReturn, iLoopBody); + + /* Jump here (skipping the main loop body subroutine) if the + ** current sub-WHERE row is a duplicate from prior sub-WHEREs. */ + if( jmp1 ) sqlite3VdbeJumpHere(v, jmp1); + + /* The pSubWInfo->untestedTerms flag means that this OR term + ** contained one or more AND term from a notReady table. The + ** terms from the notReady table could not be tested and will + ** need to be tested later. + */ + if( pSubWInfo->untestedTerms ) untestedTerms = 1; + + /* If all of the OR-connected terms are optimized using the same + ** index, and the index is opened using the same cursor number + ** by each call to sqlite3WhereBegin() made by this loop, it may + ** be possible to use that index as a covering index. + ** + ** If the call to sqlite3WhereBegin() above resulted in a scan that + ** uses an index, and this is either the first OR-connected term + ** processed or the index is the same as that used by all previous + ** terms, set pCov to the candidate covering index. Otherwise, set + ** pCov to NULL to indicate that no candidate covering index will + ** be available. + */ + pSubLoop = pSubWInfo->a[0].pWLoop; + assert( (pSubLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); + if( (pSubLoop->wsFlags & WHERE_INDEXED)!=0 + && (ii==0 || pSubLoop->u.btree.pIndex==pCov) + && (HasRowid(pTab) || !IsPrimaryKeyIndex(pSubLoop->u.btree.pIndex)) + ){ + assert( pSubWInfo->a[0].iIdxCur==iCovCur ); + pCov = pSubLoop->u.btree.pIndex; + }else{ + pCov = 0; + } + if( sqlite3WhereUsesDeferredSeek(pSubWInfo) ){ + pWInfo->bDeferredSeek = 1; + } + + /* Finish the loop through table entries that match term pOrTerm. */ + sqlite3WhereEnd(pSubWInfo); + ExplainQueryPlanPop(pParse); + } + sqlite3ExprDelete(db, pDelete); + } + } + ExplainQueryPlanPop(pParse); + assert( pLevel->pWLoop==pLoop ); + assert( (pLoop->wsFlags & WHERE_MULTI_OR)!=0 ); + assert( (pLoop->wsFlags & WHERE_IN_ABLE)==0 ); + pLevel->u.pCoveringIdx = pCov; + if( pCov ) pLevel->iIdxCur = iCovCur; + if( pAndExpr ){ + pAndExpr->pLeft = 0; + sqlite3ExprDelete(db, pAndExpr); + } + sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v)); + sqlite3VdbeGoto(v, pLevel->addrBrk); + sqlite3VdbeResolveLabel(v, iLoopBody); + + if( pWInfo->nLevel>1 ){ sqlite3StackFree(db, pOrTab); } + if( !untestedTerms ) disableTerm(pLevel, pTerm); + }else +#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + + { + /* Case 6: There is no usable index. We must do a complete + ** scan of the entire table. + */ + static const u8 aStep[] = { OP_Next, OP_Prev }; + static const u8 aStart[] = { OP_Rewind, OP_Last }; + assert( bRev==0 || bRev==1 ); + if( pTabItem->fg.isRecursive ){ + /* Tables marked isRecursive have only a single row that is stored in + ** a pseudo-cursor. No need to Rewind or Next such cursors. */ + pLevel->op = OP_Noop; + }else{ + codeCursorHint(pTabItem, pWInfo, pLevel, 0); + pLevel->op = aStep[bRev]; + pLevel->p1 = iCur; + pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrHalt); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + } + } + +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + pLevel->addrVisit = sqlite3VdbeCurrentAddr(v); +#endif + + /* Insert code to test every subexpression that can be completely + ** computed using the current set of tables. + ** + ** This loop may run between one and three times, depending on the + ** constraints to be generated. The value of stack variable iLoop + ** determines the constraints coded by each iteration, as follows: + ** + ** iLoop==1: Code only expressions that are entirely covered by pIdx. + ** iLoop==2: Code remaining expressions that do not contain correlated + ** sub-queries. + ** iLoop==3: Code all remaining expressions. + ** + ** An effort is made to skip unnecessary iterations of the loop. + */ + iLoop = (pIdx ? 1 : 2); + do{ + int iNext = 0; /* Next value for iLoop */ + for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ + Expr *pE; + int skipLikeAddr = 0; + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + testcase( pTerm->wtFlags & TERM_CODED ); + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ + testcase( pWInfo->untestedTerms==0 + && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE)!=0 ); + pWInfo->untestedTerms = 1; + continue; + } + pE = pTerm->pExpr; + assert( pE!=0 ); + if( (pTabItem->fg.jointype&JT_LEFT) && !ExprHasProperty(pE,EP_FromJoin) ){ + continue; + } + + if( iLoop==1 && !sqlite3ExprCoveredByIndex(pE, pLevel->iTabCur, pIdx) ){ + iNext = 2; + continue; + } + if( iLoop<3 && (pTerm->wtFlags & TERM_VARSELECT) ){ + if( iNext==0 ) iNext = 3; + continue; + } + + if( (pTerm->wtFlags & TERM_LIKECOND)!=0 ){ + /* If the TERM_LIKECOND flag is set, that means that the range search + ** is sufficient to guarantee that the LIKE operator is true, so we + ** can skip the call to the like(A,B) function. But this only works + ** for strings. So do not skip the call to the function on the pass + ** that compares BLOBs. */ +#ifdef SQLITE_LIKE_DOESNT_MATCH_BLOBS + continue; +#else + u32 x = pLevel->iLikeRepCntr; + if( x>0 ){ + skipLikeAddr = sqlite3VdbeAddOp1(v, (x&1)?OP_IfNot:OP_If,(int)(x>>1)); + VdbeCoverageIf(v, (x&1)==1); + VdbeCoverageIf(v, (x&1)==0); + } +#endif + } +#ifdef WHERETRACE_ENABLED /* 0xffff */ + if( sqlite3WhereTrace ){ + VdbeNoopComment((v, "WhereTerm[%d] (%p) priority=%d", + pWC->nTerm-j, pTerm, iLoop)); + } + if( sqlite3WhereTrace & 0x800 ){ + sqlite3DebugPrintf("Coding auxiliary constraint:\n"); + sqlite3WhereTermPrint(pTerm, pWC->nTerm-j); + } +#endif + sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); + if( skipLikeAddr ) sqlite3VdbeJumpHere(v, skipLikeAddr); + pTerm->wtFlags |= TERM_CODED; + } + iLoop = iNext; + }while( iLoop>0 ); + + /* Insert code to test for implied constraints based on transitivity + ** of the "==" operator. + ** + ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" + ** and we are coding the t1 loop and the t2 loop has not yet coded, + ** then we cannot use the "t1.a=t2.b" constraint, but we can code + ** the implied "t1.a=123" constraint. + */ + for(pTerm=pWC->a, j=pWC->nBase; j>0; j--, pTerm++){ + Expr *pE, sEAlt; + WhereTerm *pAlt; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) continue; + if( (pTerm->eOperator & WO_EQUIV)==0 ) continue; + if( pTerm->leftCursor!=iCur ) continue; + if( pTabItem->fg.jointype & JT_LEFT ) continue; + pE = pTerm->pExpr; +#ifdef WHERETRACE_ENABLED /* 0x800 */ + if( sqlite3WhereTrace & 0x800 ){ + sqlite3DebugPrintf("Coding transitive constraint:\n"); + sqlite3WhereTermPrint(pTerm, pWC->nTerm-j); + } +#endif + assert( !ExprHasProperty(pE, EP_FromJoin) ); + assert( (pTerm->prereqRight & pLevel->notReady)!=0 ); + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + pAlt = sqlite3WhereFindTerm(pWC, iCur, pTerm->u.x.leftColumn, notReady, + WO_EQ|WO_IN|WO_IS, 0); + if( pAlt==0 ) continue; + if( pAlt->wtFlags & (TERM_CODED) ) continue; + if( (pAlt->eOperator & WO_IN) + && ExprUseXSelect(pAlt->pExpr) + && (pAlt->pExpr->x.pSelect->pEList->nExpr>1) + ){ + continue; + } + testcase( pAlt->eOperator & WO_EQ ); + testcase( pAlt->eOperator & WO_IS ); + testcase( pAlt->eOperator & WO_IN ); + VdbeModuleComment((v, "begin transitive constraint")); + sEAlt = *pAlt->pExpr; + sEAlt.pLeft = pE->pLeft; + sqlite3ExprIfFalse(pParse, &sEAlt, addrCont, SQLITE_JUMPIFNULL); + pAlt->wtFlags |= TERM_CODED; + } + + /* For a LEFT OUTER JOIN, generate code that will record the fact that + ** at least one row of the right table has matched the left table. + */ + if( pLevel->iLeftJoin ){ + pLevel->addrFirst = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_Integer, 1, pLevel->iLeftJoin); + VdbeComment((v, "record LEFT JOIN hit")); + for(pTerm=pWC->a, j=0; jnBase; j++, pTerm++){ + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + testcase( pTerm->wtFlags & TERM_CODED ); + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ + assert( pWInfo->untestedTerms ); + continue; + } + assert( pTerm->pExpr ); + sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL); + pTerm->wtFlags |= TERM_CODED; + } + } + +#if WHERETRACE_ENABLED /* 0x20800 */ + if( sqlite3WhereTrace & 0x20000 ){ + sqlite3DebugPrintf("All WHERE-clause terms after coding level %d:\n", + iLevel); + sqlite3WhereClausePrint(pWC); + } + if( sqlite3WhereTrace & 0x800 ){ + sqlite3DebugPrintf("End Coding level %d: notReady=%llx\n", + iLevel, (u64)pLevel->notReady); + } +#endif + return pLevel->notReady; +} + +/************** End of wherecode.c *******************************************/ +/************** Begin file whereexpr.c ***************************************/ +/* +** 2015-06-08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. +** +** This file was originally part of where.c but was split out to improve +** readability and editabiliity. This file contains utility routines for +** analyzing Expr objects in the WHERE clause. +*/ +/* #include "sqliteInt.h" */ +/* #include "whereInt.h" */ + +/* Forward declarations */ +static void exprAnalyze(SrcList*, WhereClause*, int); + +/* +** Deallocate all memory associated with a WhereOrInfo object. +*/ +static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ + sqlite3WhereClauseClear(&p->wc); + sqlite3DbFree(db, p); +} + +/* +** Deallocate all memory associated with a WhereAndInfo object. +*/ +static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ + sqlite3WhereClauseClear(&p->wc); + sqlite3DbFree(db, p); +} + +/* +** Add a single new WhereTerm entry to the WhereClause object pWC. +** The new WhereTerm object is constructed from Expr p and with wtFlags. +** The index in pWC->a[] of the new WhereTerm is returned on success. +** 0 is returned if the new WhereTerm could not be added due to a memory +** allocation error. The memory allocation failure will be recorded in +** the db->mallocFailed flag so that higher-level functions can detect it. +** +** This routine will increase the size of the pWC->a[] array as necessary. +** +** If the wtFlags argument includes TERM_DYNAMIC, then responsibility +** for freeing the expression p is assumed by the WhereClause object pWC. +** This is true even if this routine fails to allocate a new WhereTerm. +** +** WARNING: This routine might reallocate the space used to store +** WhereTerms. All pointers to WhereTerms should be invalidated after +** calling this routine. Such pointers may be reinitialized by referencing +** the pWC->a[] array. +*/ +static int whereClauseInsert(WhereClause *pWC, Expr *p, u16 wtFlags){ + WhereTerm *pTerm; + int idx; + testcase( wtFlags & TERM_VIRTUAL ); + if( pWC->nTerm>=pWC->nSlot ){ + WhereTerm *pOld = pWC->a; + sqlite3 *db = pWC->pWInfo->pParse->db; + pWC->a = sqlite3DbMallocRawNN(db, sizeof(pWC->a[0])*pWC->nSlot*2 ); + if( pWC->a==0 ){ + if( wtFlags & TERM_DYNAMIC ){ + sqlite3ExprDelete(db, p); + } + pWC->a = pOld; + return 0; + } + memcpy(pWC->a, pOld, sizeof(pWC->a[0])*pWC->nTerm); + if( pOld!=pWC->aStatic ){ + sqlite3DbFree(db, pOld); + } + pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); + } + pTerm = &pWC->a[idx = pWC->nTerm++]; + if( (wtFlags & TERM_VIRTUAL)==0 ) pWC->nBase = pWC->nTerm; + if( p && ExprHasProperty(p, EP_Unlikely) ){ + pTerm->truthProb = sqlite3LogEst(p->iTable) - 270; + }else{ + pTerm->truthProb = 1; + } + pTerm->pExpr = sqlite3ExprSkipCollateAndLikely(p); + pTerm->wtFlags = wtFlags; + pTerm->pWC = pWC; + pTerm->iParent = -1; + memset(&pTerm->eOperator, 0, + sizeof(WhereTerm) - offsetof(WhereTerm,eOperator)); + return idx; +} + +/* +** Return TRUE if the given operator is one of the operators that is +** allowed for an indexable WHERE clause term. The allowed operators are +** "=", "<", ">", "<=", ">=", "IN", "IS", and "IS NULL" +*/ +static int allowedOp(int op){ + assert( TK_GT>TK_EQ && TK_GTTK_EQ && TK_LTTK_EQ && TK_LE=TK_EQ && op<=TK_GE) || op==TK_ISNULL || op==TK_IS; +} + +/* +** Commute a comparison operator. Expressions of the form "X op Y" +** are converted into "Y op X". +*/ +static u16 exprCommute(Parse *pParse, Expr *pExpr){ + if( pExpr->pLeft->op==TK_VECTOR + || pExpr->pRight->op==TK_VECTOR + || sqlite3BinaryCompareCollSeq(pParse, pExpr->pLeft, pExpr->pRight) != + sqlite3BinaryCompareCollSeq(pParse, pExpr->pRight, pExpr->pLeft) + ){ + pExpr->flags ^= EP_Commuted; + } + SWAP(Expr*,pExpr->pRight,pExpr->pLeft); + if( pExpr->op>=TK_GT ){ + assert( TK_LT==TK_GT+2 ); + assert( TK_GE==TK_LE+2 ); + assert( TK_GT>TK_EQ ); + assert( TK_GTop>=TK_GT && pExpr->op<=TK_GE ); + pExpr->op = ((pExpr->op-TK_GT)^2)+TK_GT; + } + return 0; +} + +/* +** Translate from TK_xx operator to WO_xx bitmask. +*/ +static u16 operatorMask(int op){ + u16 c; + assert( allowedOp(op) ); + if( op==TK_IN ){ + c = WO_IN; + }else if( op==TK_ISNULL ){ + c = WO_ISNULL; + }else if( op==TK_IS ){ + c = WO_IS; + }else{ + assert( (WO_EQ<<(op-TK_EQ)) < 0x7fff ); + c = (u16)(WO_EQ<<(op-TK_EQ)); + } + assert( op!=TK_ISNULL || c==WO_ISNULL ); + assert( op!=TK_IN || c==WO_IN ); + assert( op!=TK_EQ || c==WO_EQ ); + assert( op!=TK_LT || c==WO_LT ); + assert( op!=TK_LE || c==WO_LE ); + assert( op!=TK_GT || c==WO_GT ); + assert( op!=TK_GE || c==WO_GE ); + assert( op!=TK_IS || c==WO_IS ); + return c; +} + + +#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION +/* +** Check to see if the given expression is a LIKE or GLOB operator that +** can be optimized using inequality constraints. Return TRUE if it is +** so and false if not. +** +** In order for the operator to be optimizible, the RHS must be a string +** literal that does not begin with a wildcard. The LHS must be a column +** that may only be NULL, a string, or a BLOB, never a number. (This means +** that virtual tables cannot participate in the LIKE optimization.) The +** collating sequence for the column on the LHS must be appropriate for +** the operator. +*/ +static int isLikeOrGlob( + Parse *pParse, /* Parsing and code generating context */ + Expr *pExpr, /* Test this expression */ + Expr **ppPrefix, /* Pointer to TK_STRING expression with pattern prefix */ + int *pisComplete, /* True if the only wildcard is % in the last character */ + int *pnoCase /* True if uppercase is equivalent to lowercase */ +){ + const u8 *z = 0; /* String on RHS of LIKE operator */ + Expr *pRight, *pLeft; /* Right and left size of LIKE operator */ + ExprList *pList; /* List of operands to the LIKE operator */ + u8 c; /* One character in z[] */ + int cnt; /* Number of non-wildcard prefix characters */ + u8 wc[4]; /* Wildcard characters */ + sqlite3 *db = pParse->db; /* Database connection */ + sqlite3_value *pVal = 0; + int op; /* Opcode of pRight */ + int rc; /* Result code to return */ + + if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, (char*)wc) ){ + return 0; + } +#ifdef SQLITE_EBCDIC + if( *pnoCase ) return 0; +#endif + assert( ExprUseXList(pExpr) ); + pList = pExpr->x.pList; + pLeft = pList->a[1].pExpr; + + pRight = sqlite3ExprSkipCollate(pList->a[0].pExpr); + op = pRight->op; + if( op==TK_VARIABLE && (db->flags & SQLITE_EnableQPSG)==0 ){ + Vdbe *pReprepare = pParse->pReprepare; + int iCol = pRight->iColumn; + pVal = sqlite3VdbeGetBoundValue(pReprepare, iCol, SQLITE_AFF_BLOB); + if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){ + z = sqlite3_value_text(pVal); + } + sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); + assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER ); + }else if( op==TK_STRING ){ + assert( !ExprHasProperty(pRight, EP_IntValue) ); + z = (u8*)pRight->u.zToken; + } + if( z ){ + + /* Count the number of prefix characters prior to the first wildcard */ + cnt = 0; + while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ + cnt++; + if( c==wc[3] && z[cnt]!=0 ) cnt++; + } + + /* The optimization is possible only if (1) the pattern does not begin + ** with a wildcard and if (2) the non-wildcard prefix does not end with + ** an (illegal 0xff) character, or (3) the pattern does not consist of + ** a single escape character. The second condition is necessary so + ** that we can increment the prefix key to find an upper bound for the + ** range search. The third is because the caller assumes that the pattern + ** consists of at least one character after all escapes have been + ** removed. */ + if( cnt!=0 && 255!=(u8)z[cnt-1] && (cnt>1 || z[0]!=wc[3]) ){ + Expr *pPrefix; + + /* A "complete" match if the pattern ends with "*" or "%" */ + *pisComplete = c==wc[0] && z[cnt+1]==0; + + /* Get the pattern prefix. Remove all escapes from the prefix. */ + pPrefix = sqlite3Expr(db, TK_STRING, (char*)z); + if( pPrefix ){ + int iFrom, iTo; + char *zNew; + assert( !ExprHasProperty(pPrefix, EP_IntValue) ); + zNew = pPrefix->u.zToken; + zNew[cnt] = 0; + for(iFrom=iTo=0; iFrom0 ); + + /* If the LHS is not an ordinary column with TEXT affinity, then the + ** pattern prefix boundaries (both the start and end boundaries) must + ** not look like a number. Otherwise the pattern might be treated as + ** a number, which will invalidate the LIKE optimization. + ** + ** Getting this right has been a persistent source of bugs in the + ** LIKE optimization. See, for example: + ** 2018-09-10 https://sqlite.org/src/info/c94369cae9b561b1 + ** 2019-05-02 https://sqlite.org/src/info/b043a54c3de54b28 + ** 2019-06-10 https://sqlite.org/src/info/fd76310a5e843e07 + ** 2019-06-14 https://sqlite.org/src/info/ce8717f0885af975 + ** 2019-09-03 https://sqlite.org/src/info/0f0428096f17252a + */ + if( pLeft->op!=TK_COLUMN + || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT + || (ALWAYS( ExprUseYTab(pLeft) ) + && pLeft->y.pTab + && IsVirtual(pLeft->y.pTab)) /* Might be numeric */ + ){ + int isNum; + double rDummy; + isNum = sqlite3AtoF(zNew, &rDummy, iTo, SQLITE_UTF8); + if( isNum<=0 ){ + if( iTo==1 && zNew[0]=='-' ){ + isNum = +1; + }else{ + zNew[iTo-1]++; + isNum = sqlite3AtoF(zNew, &rDummy, iTo, SQLITE_UTF8); + zNew[iTo-1]--; + } + } + if( isNum>0 ){ + sqlite3ExprDelete(db, pPrefix); + sqlite3ValueFree(pVal); + return 0; + } + } + } + *ppPrefix = pPrefix; + + /* If the RHS pattern is a bound parameter, make arrangements to + ** reprepare the statement when that parameter is rebound */ + if( op==TK_VARIABLE ){ + Vdbe *v = pParse->pVdbe; + sqlite3VdbeSetVarmask(v, pRight->iColumn); + assert( !ExprHasProperty(pRight, EP_IntValue) ); + if( *pisComplete && pRight->u.zToken[1] ){ + /* If the rhs of the LIKE expression is a variable, and the current + ** value of the variable means there is no need to invoke the LIKE + ** function, then no OP_Variable will be added to the program. + ** This causes problems for the sqlite3_bind_parameter_name() + ** API. To work around them, add a dummy OP_Variable here. + */ + int r1 = sqlite3GetTempReg(pParse); + sqlite3ExprCodeTarget(pParse, pRight, r1); + sqlite3VdbeChangeP3(v, sqlite3VdbeCurrentAddr(v)-1, 0); + sqlite3ReleaseTempReg(pParse, r1); + } + } + }else{ + z = 0; + } + } + + rc = (z!=0); + sqlite3ValueFree(pVal); + return rc; +} +#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ + + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Check to see if the pExpr expression is a form that needs to be passed +** to the xBestIndex method of virtual tables. Forms of interest include: +** +** Expression Virtual Table Operator +** ----------------------- --------------------------------- +** 1. column MATCH expr SQLITE_INDEX_CONSTRAINT_MATCH +** 2. column GLOB expr SQLITE_INDEX_CONSTRAINT_GLOB +** 3. column LIKE expr SQLITE_INDEX_CONSTRAINT_LIKE +** 4. column REGEXP expr SQLITE_INDEX_CONSTRAINT_REGEXP +** 5. column != expr SQLITE_INDEX_CONSTRAINT_NE +** 6. expr != column SQLITE_INDEX_CONSTRAINT_NE +** 7. column IS NOT expr SQLITE_INDEX_CONSTRAINT_ISNOT +** 8. expr IS NOT column SQLITE_INDEX_CONSTRAINT_ISNOT +** 9. column IS NOT NULL SQLITE_INDEX_CONSTRAINT_ISNOTNULL +** +** In every case, "column" must be a column of a virtual table. If there +** is a match, set *ppLeft to the "column" expression, set *ppRight to the +** "expr" expression (even though in forms (6) and (8) the column is on the +** right and the expression is on the left). Also set *peOp2 to the +** appropriate virtual table operator. The return value is 1 or 2 if there +** is a match. The usual return is 1, but if the RHS is also a column +** of virtual table in forms (5) or (7) then return 2. +** +** If the expression matches none of the patterns above, return 0. +*/ +static int isAuxiliaryVtabOperator( + sqlite3 *db, /* Parsing context */ + Expr *pExpr, /* Test this expression */ + unsigned char *peOp2, /* OUT: 0 for MATCH, or else an op2 value */ + Expr **ppLeft, /* Column expression to left of MATCH/op2 */ + Expr **ppRight /* Expression to left of MATCH/op2 */ +){ + if( pExpr->op==TK_FUNCTION ){ + static const struct Op2 { + const char *zOp; + unsigned char eOp2; + } aOp[] = { + { "match", SQLITE_INDEX_CONSTRAINT_MATCH }, + { "glob", SQLITE_INDEX_CONSTRAINT_GLOB }, + { "like", SQLITE_INDEX_CONSTRAINT_LIKE }, + { "regexp", SQLITE_INDEX_CONSTRAINT_REGEXP } + }; + ExprList *pList; + Expr *pCol; /* Column reference */ + int i; + + assert( ExprUseXList(pExpr) ); + pList = pExpr->x.pList; + if( pList==0 || pList->nExpr!=2 ){ + return 0; + } + + /* Built-in operators MATCH, GLOB, LIKE, and REGEXP attach to a + ** virtual table on their second argument, which is the same as + ** the left-hand side operand in their in-fix form. + ** + ** vtab_column MATCH expression + ** MATCH(expression,vtab_column) + */ + pCol = pList->a[1].pExpr; + assert( pCol->op!=TK_COLUMN || ExprUseYTab(pCol) ); + testcase( pCol->op==TK_COLUMN && pCol->y.pTab==0 ); + if( ExprIsVtab(pCol) ){ + for(i=0; iu.zToken, aOp[i].zOp)==0 ){ + *peOp2 = aOp[i].eOp2; + *ppRight = pList->a[0].pExpr; + *ppLeft = pCol; + return 1; + } + } + } + + /* We can also match against the first column of overloaded + ** functions where xFindFunction returns a value of at least + ** SQLITE_INDEX_CONSTRAINT_FUNCTION. + ** + ** OVERLOADED(vtab_column,expression) + ** + ** Historically, xFindFunction expected to see lower-case function + ** names. But for this use case, xFindFunction is expected to deal + ** with function names in an arbitrary case. + */ + pCol = pList->a[0].pExpr; + assert( pCol->op!=TK_COLUMN || ExprUseYTab(pCol) ); + testcase( pCol->op==TK_COLUMN && pCol->y.pTab==0 ); + if( ExprIsVtab(pCol) ){ + sqlite3_vtab *pVtab; + sqlite3_module *pMod; + void (*xNotUsed)(sqlite3_context*,int,sqlite3_value**); + void *pNotUsed; + pVtab = sqlite3GetVTable(db, pCol->y.pTab)->pVtab; + assert( pVtab!=0 ); + assert( pVtab->pModule!=0 ); + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + pMod = (sqlite3_module *)pVtab->pModule; + if( pMod->xFindFunction!=0 ){ + i = pMod->xFindFunction(pVtab,2, pExpr->u.zToken, &xNotUsed, &pNotUsed); + if( i>=SQLITE_INDEX_CONSTRAINT_FUNCTION ){ + *peOp2 = i; + *ppRight = pList->a[1].pExpr; + *ppLeft = pCol; + return 1; + } + } + } + }else if( pExpr->op==TK_NE || pExpr->op==TK_ISNOT || pExpr->op==TK_NOTNULL ){ + int res = 0; + Expr *pLeft = pExpr->pLeft; + Expr *pRight = pExpr->pRight; + assert( pLeft->op!=TK_COLUMN || ExprUseYTab(pLeft) ); + testcase( pLeft->op==TK_COLUMN && pLeft->y.pTab==0 ); + if( ExprIsVtab(pLeft) ){ + res++; + } + assert( pRight==0 || pRight->op!=TK_COLUMN || ExprUseYTab(pRight) ); + testcase( pRight && pRight->op==TK_COLUMN && pRight->y.pTab==0 ); + if( pRight && ExprIsVtab(pRight) ){ + res++; + SWAP(Expr*, pLeft, pRight); + } + *ppLeft = pLeft; + *ppRight = pRight; + if( pExpr->op==TK_NE ) *peOp2 = SQLITE_INDEX_CONSTRAINT_NE; + if( pExpr->op==TK_ISNOT ) *peOp2 = SQLITE_INDEX_CONSTRAINT_ISNOT; + if( pExpr->op==TK_NOTNULL ) *peOp2 = SQLITE_INDEX_CONSTRAINT_ISNOTNULL; + return res; + } + return 0; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** If the pBase expression originated in the ON or USING clause of +** a join, then transfer the appropriate markings over to derived. +*/ +static void transferJoinMarkings(Expr *pDerived, Expr *pBase){ + if( pDerived ){ + pDerived->flags |= pBase->flags & EP_FromJoin; + pDerived->w.iRightJoinTable = pBase->w.iRightJoinTable; + } +} + +/* +** Mark term iChild as being a child of term iParent +*/ +static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ + pWC->a[iChild].iParent = iParent; + pWC->a[iChild].truthProb = pWC->a[iParent].truthProb; + pWC->a[iParent].nChild++; +} + +/* +** Return the N-th AND-connected subterm of pTerm. Or if pTerm is not +** a conjunction, then return just pTerm when N==0. If N is exceeds +** the number of available subterms, return NULL. +*/ +static WhereTerm *whereNthSubterm(WhereTerm *pTerm, int N){ + if( pTerm->eOperator!=WO_AND ){ + return N==0 ? pTerm : 0; + } + if( Nu.pAndInfo->wc.nTerm ){ + return &pTerm->u.pAndInfo->wc.a[N]; + } + return 0; +} + +/* +** Subterms pOne and pTwo are contained within WHERE clause pWC. The +** two subterms are in disjunction - they are OR-ed together. +** +** If these two terms are both of the form: "A op B" with the same +** A and B values but different operators and if the operators are +** compatible (if one is = and the other is <, for example) then +** add a new virtual AND term to pWC that is the combination of the +** two. +** +** Some examples: +** +** x x<=y +** x=y OR x=y --> x=y +** x<=y OR x x<=y +** +** The following is NOT generated: +** +** xy --> x!=y +*/ +static void whereCombineDisjuncts( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* The complete WHERE clause */ + WhereTerm *pOne, /* First disjunct */ + WhereTerm *pTwo /* Second disjunct */ +){ + u16 eOp = pOne->eOperator | pTwo->eOperator; + sqlite3 *db; /* Database connection (for malloc) */ + Expr *pNew; /* New virtual expression */ + int op; /* Operator for the combined expression */ + int idxNew; /* Index in pWC of the next virtual term */ + + if( (pOne->wtFlags | pTwo->wtFlags) & TERM_VNULL ) return; + if( (pOne->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; + if( (pTwo->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; + if( (eOp & (WO_EQ|WO_LT|WO_LE))!=eOp + && (eOp & (WO_EQ|WO_GT|WO_GE))!=eOp ) return; + assert( pOne->pExpr->pLeft!=0 && pOne->pExpr->pRight!=0 ); + assert( pTwo->pExpr->pLeft!=0 && pTwo->pExpr->pRight!=0 ); + if( sqlite3ExprCompare(0,pOne->pExpr->pLeft, pTwo->pExpr->pLeft, -1) ) return; + if( sqlite3ExprCompare(0,pOne->pExpr->pRight, pTwo->pExpr->pRight,-1) )return; + /* If we reach this point, it means the two subterms can be combined */ + if( (eOp & (eOp-1))!=0 ){ + if( eOp & (WO_LT|WO_LE) ){ + eOp = WO_LE; + }else{ + assert( eOp & (WO_GT|WO_GE) ); + eOp = WO_GE; + } + } + db = pWC->pWInfo->pParse->db; + pNew = sqlite3ExprDup(db, pOne->pExpr, 0); + if( pNew==0 ) return; + for(op=TK_EQ; eOp!=(WO_EQ<<(op-TK_EQ)); op++){ assert( opop = op; + idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); + exprAnalyze(pSrc, pWC, idxNew); +} + +#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) +/* +** Analyze a term that consists of two or more OR-connected +** subterms. So in: +** +** ... WHERE (a=5) AND (b=7 OR c=9 OR d=13) AND (d=13) +** ^^^^^^^^^^^^^^^^^^^^ +** +** This routine analyzes terms such as the middle term in the above example. +** A WhereOrTerm object is computed and attached to the term under +** analysis, regardless of the outcome of the analysis. Hence: +** +** WhereTerm.wtFlags |= TERM_ORINFO +** WhereTerm.u.pOrInfo = a dynamically allocated WhereOrTerm object +** +** The term being analyzed must have two or more of OR-connected subterms. +** A single subterm might be a set of AND-connected sub-subterms. +** Examples of terms under analysis: +** +** (A) t1.x=t2.y OR t1.x=t2.z OR t1.y=15 OR t1.z=t3.a+5 +** (B) x=expr1 OR expr2=x OR x=expr3 +** (C) t1.x=t2.y OR (t1.x=t2.z AND t1.y=15) +** (D) x=expr1 OR (y>11 AND y<22 AND z LIKE '*hello*') +** (E) (p.a=1 AND q.b=2 AND r.c=3) OR (p.x=4 AND q.y=5 AND r.z=6) +** (F) x>A OR (x=A AND y>=B) +** +** CASE 1: +** +** If all subterms are of the form T.C=expr for some single column of C and +** a single table T (as shown in example B above) then create a new virtual +** term that is an equivalent IN expression. In other words, if the term +** being analyzed is: +** +** x = expr1 OR expr2 = x OR x = expr3 +** +** then create a new virtual term like this: +** +** x IN (expr1,expr2,expr3) +** +** CASE 2: +** +** If there are exactly two disjuncts and one side has x>A and the other side +** has x=A (for the same x and A) then add a new virtual conjunct term to the +** WHERE clause of the form "x>=A". Example: +** +** x>A OR (x=A AND y>B) adds: x>=A +** +** The added conjunct can sometimes be helpful in query planning. +** +** CASE 3: +** +** If all subterms are indexable by a single table T, then set +** +** WhereTerm.eOperator = WO_OR +** WhereTerm.u.pOrInfo->indexable |= the cursor number for table T +** +** A subterm is "indexable" if it is of the form +** "T.C " where C is any column of table T and +** is one of "=", "<", "<=", ">", ">=", "IS NULL", or "IN". +** A subterm is also indexable if it is an AND of two or more +** subsubterms at least one of which is indexable. Indexable AND +** subterms have their eOperator set to WO_AND and they have +** u.pAndInfo set to a dynamically allocated WhereAndTerm object. +** +** From another point of view, "indexable" means that the subterm could +** potentially be used with an index if an appropriate index exists. +** This analysis does not consider whether or not the index exists; that +** is decided elsewhere. This analysis only looks at whether subterms +** appropriate for indexing exist. +** +** All examples A through E above satisfy case 3. But if a term +** also satisfies case 1 (such as B) we know that the optimizer will +** always prefer case 1, so in that case we pretend that case 3 is not +** satisfied. +** +** It might be the case that multiple tables are indexable. For example, +** (E) above is indexable on tables P, Q, and R. +** +** Terms that satisfy case 3 are candidates for lookup by using +** separate indices to find rowids for each subterm and composing +** the union of all rowids using a RowSet object. This is similar +** to "bitmap indices" in other database engines. +** +** OTHERWISE: +** +** If none of cases 1, 2, or 3 apply, then leave the eOperator set to +** zero. This term is not useful for search. +*/ +static void exprAnalyzeOrTerm( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* the complete WHERE clause */ + int idxTerm /* Index of the OR-term to be analyzed */ +){ + WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */ + Parse *pParse = pWInfo->pParse; /* Parser context */ + sqlite3 *db = pParse->db; /* Database connection */ + WhereTerm *pTerm = &pWC->a[idxTerm]; /* The term to be analyzed */ + Expr *pExpr = pTerm->pExpr; /* The expression of the term */ + int i; /* Loop counters */ + WhereClause *pOrWc; /* Breakup of pTerm into subterms */ + WhereTerm *pOrTerm; /* A Sub-term within the pOrWc */ + WhereOrInfo *pOrInfo; /* Additional information associated with pTerm */ + Bitmask chngToIN; /* Tables that might satisfy case 1 */ + Bitmask indexable; /* Tables that are indexable, satisfying case 2 */ + + /* + ** Break the OR clause into its separate subterms. The subterms are + ** stored in a WhereClause structure containing within the WhereOrInfo + ** object that is attached to the original OR clause term. + */ + assert( (pTerm->wtFlags & (TERM_DYNAMIC|TERM_ORINFO|TERM_ANDINFO))==0 ); + assert( pExpr->op==TK_OR ); + pTerm->u.pOrInfo = pOrInfo = sqlite3DbMallocZero(db, sizeof(*pOrInfo)); + if( pOrInfo==0 ) return; + pTerm->wtFlags |= TERM_ORINFO; + pOrWc = &pOrInfo->wc; + memset(pOrWc->aStatic, 0, sizeof(pOrWc->aStatic)); + sqlite3WhereClauseInit(pOrWc, pWInfo); + sqlite3WhereSplit(pOrWc, pExpr, TK_OR); + sqlite3WhereExprAnalyze(pSrc, pOrWc); + if( db->mallocFailed ) return; + assert( pOrWc->nTerm>=2 ); + + /* + ** Compute the set of tables that might satisfy cases 1 or 3. + */ + indexable = ~(Bitmask)0; + chngToIN = ~(Bitmask)0; + for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){ + if( (pOrTerm->eOperator & WO_SINGLE)==0 ){ + WhereAndInfo *pAndInfo; + assert( (pOrTerm->wtFlags & (TERM_ANDINFO|TERM_ORINFO))==0 ); + chngToIN = 0; + pAndInfo = sqlite3DbMallocRawNN(db, sizeof(*pAndInfo)); + if( pAndInfo ){ + WhereClause *pAndWC; + WhereTerm *pAndTerm; + int j; + Bitmask b = 0; + pOrTerm->u.pAndInfo = pAndInfo; + pOrTerm->wtFlags |= TERM_ANDINFO; + pOrTerm->eOperator = WO_AND; + pOrTerm->leftCursor = -1; + pAndWC = &pAndInfo->wc; + memset(pAndWC->aStatic, 0, sizeof(pAndWC->aStatic)); + sqlite3WhereClauseInit(pAndWC, pWC->pWInfo); + sqlite3WhereSplit(pAndWC, pOrTerm->pExpr, TK_AND); + sqlite3WhereExprAnalyze(pSrc, pAndWC); + pAndWC->pOuter = pWC; + if( !db->mallocFailed ){ + for(j=0, pAndTerm=pAndWC->a; jnTerm; j++, pAndTerm++){ + assert( pAndTerm->pExpr ); + if( allowedOp(pAndTerm->pExpr->op) + || pAndTerm->eOperator==WO_AUX + ){ + b |= sqlite3WhereGetMask(&pWInfo->sMaskSet, pAndTerm->leftCursor); + } + } + } + indexable &= b; + } + }else if( pOrTerm->wtFlags & TERM_COPIED ){ + /* Skip this term for now. We revisit it when we process the + ** corresponding TERM_VIRTUAL term */ + }else{ + Bitmask b; + b = sqlite3WhereGetMask(&pWInfo->sMaskSet, pOrTerm->leftCursor); + if( pOrTerm->wtFlags & TERM_VIRTUAL ){ + WhereTerm *pOther = &pOrWc->a[pOrTerm->iParent]; + b |= sqlite3WhereGetMask(&pWInfo->sMaskSet, pOther->leftCursor); + } + indexable &= b; + if( (pOrTerm->eOperator & WO_EQ)==0 ){ + chngToIN = 0; + }else{ + chngToIN &= b; + } + } + } + + /* + ** Record the set of tables that satisfy case 3. The set might be + ** empty. + */ + pOrInfo->indexable = indexable; + pTerm->eOperator = WO_OR; + pTerm->leftCursor = -1; + if( indexable ){ + pWC->hasOr = 1; + } + + /* For a two-way OR, attempt to implementation case 2. + */ + if( indexable && pOrWc->nTerm==2 ){ + int iOne = 0; + WhereTerm *pOne; + while( (pOne = whereNthSubterm(&pOrWc->a[0],iOne++))!=0 ){ + int iTwo = 0; + WhereTerm *pTwo; + while( (pTwo = whereNthSubterm(&pOrWc->a[1],iTwo++))!=0 ){ + whereCombineDisjuncts(pSrc, pWC, pOne, pTwo); + } + } + } + + /* + ** chngToIN holds a set of tables that *might* satisfy case 1. But + ** we have to do some additional checking to see if case 1 really + ** is satisfied. + ** + ** chngToIN will hold either 0, 1, or 2 bits. The 0-bit case means + ** that there is no possibility of transforming the OR clause into an + ** IN operator because one or more terms in the OR clause contain + ** something other than == on a column in the single table. The 1-bit + ** case means that every term of the OR clause is of the form + ** "table.column=expr" for some single table. The one bit that is set + ** will correspond to the common table. We still need to check to make + ** sure the same column is used on all terms. The 2-bit case is when + ** the all terms are of the form "table1.column=table2.column". It + ** might be possible to form an IN operator with either table1.column + ** or table2.column as the LHS if either is common to every term of + ** the OR clause. + ** + ** Note that terms of the form "table.column1=table.column2" (the + ** same table on both sizes of the ==) cannot be optimized. + */ + if( chngToIN ){ + int okToChngToIN = 0; /* True if the conversion to IN is valid */ + int iColumn = -1; /* Column index on lhs of IN operator */ + int iCursor = -1; /* Table cursor common to all terms */ + int j = 0; /* Loop counter */ + + /* Search for a table and column that appears on one side or the + ** other of the == operator in every subterm. That table and column + ** will be recorded in iCursor and iColumn. There might not be any + ** such table and column. Set okToChngToIN if an appropriate table + ** and column is found but leave okToChngToIN false if not found. + */ + for(j=0; j<2 && !okToChngToIN; j++){ + Expr *pLeft = 0; + pOrTerm = pOrWc->a; + for(i=pOrWc->nTerm-1; i>=0; i--, pOrTerm++){ + assert( pOrTerm->eOperator & WO_EQ ); + pOrTerm->wtFlags &= ~TERM_OK; + if( pOrTerm->leftCursor==iCursor ){ + /* This is the 2-bit case and we are on the second iteration and + ** current term is from the first iteration. So skip this term. */ + assert( j==1 ); + continue; + } + if( (chngToIN & sqlite3WhereGetMask(&pWInfo->sMaskSet, + pOrTerm->leftCursor))==0 ){ + /* This term must be of the form t1.a==t2.b where t2 is in the + ** chngToIN set but t1 is not. This term will be either preceded + ** or follwed by an inverted copy (t2.b==t1.a). Skip this term + ** and use its inversion. */ + testcase( pOrTerm->wtFlags & TERM_COPIED ); + testcase( pOrTerm->wtFlags & TERM_VIRTUAL ); + assert( pOrTerm->wtFlags & (TERM_COPIED|TERM_VIRTUAL) ); + continue; + } + assert( (pOrTerm->eOperator & (WO_OR|WO_AND))==0 ); + iColumn = pOrTerm->u.x.leftColumn; + iCursor = pOrTerm->leftCursor; + pLeft = pOrTerm->pExpr->pLeft; + break; + } + if( i<0 ){ + /* No candidate table+column was found. This can only occur + ** on the second iteration */ + assert( j==1 ); + assert( IsPowerOfTwo(chngToIN) ); + assert( chngToIN==sqlite3WhereGetMask(&pWInfo->sMaskSet, iCursor) ); + break; + } + testcase( j==1 ); + + /* We have found a candidate table and column. Check to see if that + ** table and column is common to every term in the OR clause */ + okToChngToIN = 1; + for(; i>=0 && okToChngToIN; i--, pOrTerm++){ + assert( pOrTerm->eOperator & WO_EQ ); + assert( (pOrTerm->eOperator & (WO_OR|WO_AND))==0 ); + if( pOrTerm->leftCursor!=iCursor ){ + pOrTerm->wtFlags &= ~TERM_OK; + }else if( pOrTerm->u.x.leftColumn!=iColumn || (iColumn==XN_EXPR + && sqlite3ExprCompare(pParse, pOrTerm->pExpr->pLeft, pLeft, -1) + )){ + okToChngToIN = 0; + }else{ + int affLeft, affRight; + /* If the right-hand side is also a column, then the affinities + ** of both right and left sides must be such that no type + ** conversions are required on the right. (Ticket #2249) + */ + affRight = sqlite3ExprAffinity(pOrTerm->pExpr->pRight); + affLeft = sqlite3ExprAffinity(pOrTerm->pExpr->pLeft); + if( affRight!=0 && affRight!=affLeft ){ + okToChngToIN = 0; + }else{ + pOrTerm->wtFlags |= TERM_OK; + } + } + } + } + + /* At this point, okToChngToIN is true if original pTerm satisfies + ** case 1. In that case, construct a new virtual term that is + ** pTerm converted into an IN operator. + */ + if( okToChngToIN ){ + Expr *pDup; /* A transient duplicate expression */ + ExprList *pList = 0; /* The RHS of the IN operator */ + Expr *pLeft = 0; /* The LHS of the IN operator */ + Expr *pNew; /* The complete IN operator */ + + for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0; i--, pOrTerm++){ + if( (pOrTerm->wtFlags & TERM_OK)==0 ) continue; + assert( pOrTerm->eOperator & WO_EQ ); + assert( (pOrTerm->eOperator & (WO_OR|WO_AND))==0 ); + assert( pOrTerm->leftCursor==iCursor ); + assert( pOrTerm->u.x.leftColumn==iColumn ); + pDup = sqlite3ExprDup(db, pOrTerm->pExpr->pRight, 0); + pList = sqlite3ExprListAppend(pWInfo->pParse, pList, pDup); + pLeft = pOrTerm->pExpr->pLeft; + } + assert( pLeft!=0 ); + pDup = sqlite3ExprDup(db, pLeft, 0); + pNew = sqlite3PExpr(pParse, TK_IN, pDup, 0); + if( pNew ){ + int idxNew; + transferJoinMarkings(pNew, pExpr); + assert( ExprUseXList(pNew) ); + pNew->x.pList = pList; + idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); + testcase( idxNew==0 ); + exprAnalyze(pSrc, pWC, idxNew); + /* pTerm = &pWC->a[idxTerm]; // would be needed if pTerm where reused */ + markTermAsChild(pWC, idxNew, idxTerm); + }else{ + sqlite3ExprListDelete(db, pList); + } + } + } +} +#endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ + +/* +** We already know that pExpr is a binary operator where both operands are +** column references. This routine checks to see if pExpr is an equivalence +** relation: +** 1. The SQLITE_Transitive optimization must be enabled +** 2. Must be either an == or an IS operator +** 3. Not originating in the ON clause of an OUTER JOIN +** 4. The affinities of A and B must be compatible +** 5a. Both operands use the same collating sequence OR +** 5b. The overall collating sequence is BINARY +** If this routine returns TRUE, that means that the RHS can be substituted +** for the LHS anyplace else in the WHERE clause where the LHS column occurs. +** This is an optimization. No harm comes from returning 0. But if 1 is +** returned when it should not be, then incorrect answers might result. +*/ +static int termIsEquivalence(Parse *pParse, Expr *pExpr){ + char aff1, aff2; + CollSeq *pColl; + if( !OptimizationEnabled(pParse->db, SQLITE_Transitive) ) return 0; + if( pExpr->op!=TK_EQ && pExpr->op!=TK_IS ) return 0; + if( ExprHasProperty(pExpr, EP_FromJoin) ) return 0; + aff1 = sqlite3ExprAffinity(pExpr->pLeft); + aff2 = sqlite3ExprAffinity(pExpr->pRight); + if( aff1!=aff2 + && (!sqlite3IsNumericAffinity(aff1) || !sqlite3IsNumericAffinity(aff2)) + ){ + return 0; + } + pColl = sqlite3ExprCompareCollSeq(pParse, pExpr); + if( sqlite3IsBinary(pColl) ) return 1; + return sqlite3ExprCollSeqMatch(pParse, pExpr->pLeft, pExpr->pRight); +} + +/* +** Recursively walk the expressions of a SELECT statement and generate +** a bitmask indicating which tables are used in that expression +** tree. +*/ +static Bitmask exprSelectUsage(WhereMaskSet *pMaskSet, Select *pS){ + Bitmask mask = 0; + while( pS ){ + SrcList *pSrc = pS->pSrc; + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pEList); + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pGroupBy); + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pOrderBy); + mask |= sqlite3WhereExprUsage(pMaskSet, pS->pWhere); + mask |= sqlite3WhereExprUsage(pMaskSet, pS->pHaving); + if( ALWAYS(pSrc!=0) ){ + int i; + for(i=0; inSrc; i++){ + mask |= exprSelectUsage(pMaskSet, pSrc->a[i].pSelect); + mask |= sqlite3WhereExprUsage(pMaskSet, pSrc->a[i].pOn); + if( pSrc->a[i].fg.isTabFunc ){ + mask |= sqlite3WhereExprListUsage(pMaskSet, pSrc->a[i].u1.pFuncArg); + } + } + } + pS = pS->pPrior; + } + return mask; +} + +/* +** Expression pExpr is one operand of a comparison operator that might +** be useful for indexing. This routine checks to see if pExpr appears +** in any index. Return TRUE (1) if pExpr is an indexed term and return +** FALSE (0) if not. If TRUE is returned, also set aiCurCol[0] to the cursor +** number of the table that is indexed and aiCurCol[1] to the column number +** of the column that is indexed, or XN_EXPR (-2) if an expression is being +** indexed. +** +** If pExpr is a TK_COLUMN column reference, then this routine always returns +** true even if that particular column is not indexed, because the column +** might be added to an automatic index later. +*/ +static SQLITE_NOINLINE int exprMightBeIndexed2( + SrcList *pFrom, /* The FROM clause */ + Bitmask mPrereq, /* Bitmask of FROM clause terms referenced by pExpr */ + int *aiCurCol, /* Write the referenced table cursor and column here */ + Expr *pExpr /* An operand of a comparison operator */ +){ + Index *pIdx; + int i; + int iCur; + for(i=0; mPrereq>1; i++, mPrereq>>=1){} + iCur = pFrom->a[i].iCursor; + for(pIdx=pFrom->a[i].pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->aColExpr==0 ) continue; + for(i=0; inKeyCol; i++){ + if( pIdx->aiColumn[i]!=XN_EXPR ) continue; + if( sqlite3ExprCompareSkip(pExpr, pIdx->aColExpr->a[i].pExpr, iCur)==0 ){ + aiCurCol[0] = iCur; + aiCurCol[1] = XN_EXPR; + return 1; + } + } + } + return 0; +} +static int exprMightBeIndexed( + SrcList *pFrom, /* The FROM clause */ + Bitmask mPrereq, /* Bitmask of FROM clause terms referenced by pExpr */ + int *aiCurCol, /* Write the referenced table cursor & column here */ + Expr *pExpr, /* An operand of a comparison operator */ + int op /* The specific comparison operator */ +){ + /* If this expression is a vector to the left or right of a + ** inequality constraint (>, <, >= or <=), perform the processing + ** on the first element of the vector. */ + assert( TK_GT+1==TK_LE && TK_GT+2==TK_LT && TK_GT+3==TK_GE ); + assert( TK_ISop==TK_VECTOR && (op>=TK_GT && ALWAYS(op<=TK_GE)) ){ + assert( ExprUseXList(pExpr) ); + pExpr = pExpr->x.pList->a[0].pExpr; + + } + + if( pExpr->op==TK_COLUMN ){ + aiCurCol[0] = pExpr->iTable; + aiCurCol[1] = pExpr->iColumn; + return 1; + } + if( mPrereq==0 ) return 0; /* No table references */ + if( (mPrereq&(mPrereq-1))!=0 ) return 0; /* Refs more than one table */ + return exprMightBeIndexed2(pFrom,mPrereq,aiCurCol,pExpr); +} + + +/* +** The input to this routine is an WhereTerm structure with only the +** "pExpr" field filled in. The job of this routine is to analyze the +** subexpression and populate all the other fields of the WhereTerm +** structure. +** +** If the expression is of the form " X" it gets commuted +** to the standard form of "X ". +** +** If the expression is of the form "X Y" where both X and Y are +** columns, then the original expression is unchanged and a new virtual +** term of the form "Y X" is added to the WHERE clause and +** analyzed separately. The original term is marked with TERM_COPIED +** and the new term is marked with TERM_DYNAMIC (because it's pExpr +** needs to be freed with the WhereClause) and TERM_VIRTUAL (because it +** is a commuted copy of a prior term.) The original term has nChild=1 +** and the copy has idxParent set to the index of the original term. +*/ +static void exprAnalyze( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* the WHERE clause */ + int idxTerm /* Index of the term to be analyzed */ +){ + WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */ + WhereTerm *pTerm; /* The term to be analyzed */ + WhereMaskSet *pMaskSet; /* Set of table index masks */ + Expr *pExpr; /* The expression to be analyzed */ + Bitmask prereqLeft; /* Prerequesites of the pExpr->pLeft */ + Bitmask prereqAll; /* Prerequesites of pExpr */ + Bitmask extraRight = 0; /* Extra dependencies on LEFT JOIN */ + Expr *pStr1 = 0; /* RHS of LIKE/GLOB operator */ + int isComplete = 0; /* RHS of LIKE/GLOB ends with wildcard */ + int noCase = 0; /* uppercase equivalent to lowercase */ + int op; /* Top-level operator. pExpr->op */ + Parse *pParse = pWInfo->pParse; /* Parsing context */ + sqlite3 *db = pParse->db; /* Database connection */ + unsigned char eOp2 = 0; /* op2 value for LIKE/REGEXP/GLOB */ + int nLeft; /* Number of elements on left side vector */ + + if( db->mallocFailed ){ + return; + } + assert( pWC->nTerm > idxTerm ); + pTerm = &pWC->a[idxTerm]; + pMaskSet = &pWInfo->sMaskSet; + pExpr = pTerm->pExpr; + assert( pExpr!=0 ); /* Because malloc() has not failed */ + assert( pExpr->op!=TK_AS && pExpr->op!=TK_COLLATE ); + pMaskSet->bVarSelect = 0; + prereqLeft = sqlite3WhereExprUsage(pMaskSet, pExpr->pLeft); + op = pExpr->op; + if( op==TK_IN ){ + assert( pExpr->pRight==0 ); + if( sqlite3ExprCheckIN(pParse, pExpr) ) return; + if( ExprUseXSelect(pExpr) ){ + pTerm->prereqRight = exprSelectUsage(pMaskSet, pExpr->x.pSelect); + }else{ + pTerm->prereqRight = sqlite3WhereExprListUsage(pMaskSet, pExpr->x.pList); + } + prereqAll = prereqLeft | pTerm->prereqRight; + }else{ + pTerm->prereqRight = sqlite3WhereExprUsage(pMaskSet, pExpr->pRight); + if( pExpr->pLeft==0 + || ExprHasProperty(pExpr, EP_xIsSelect|EP_IfNullRow) + || pExpr->x.pList!=0 + ){ + prereqAll = sqlite3WhereExprUsageNN(pMaskSet, pExpr); + }else{ + prereqAll = prereqLeft | pTerm->prereqRight; + } + } + if( pMaskSet->bVarSelect ) pTerm->wtFlags |= TERM_VARSELECT; + +#ifdef SQLITE_DEBUG + if( prereqAll!=sqlite3WhereExprUsageNN(pMaskSet, pExpr) ){ + printf("\n*** Incorrect prereqAll computed for:\n"); + sqlite3TreeViewExpr(0,pExpr,0); + abort(); + } +#endif + + if( ExprHasProperty(pExpr, EP_FromJoin) ){ + Bitmask x = sqlite3WhereGetMask(pMaskSet, pExpr->w.iRightJoinTable); + prereqAll |= x; + extraRight = x-1; /* ON clause terms may not be used with an index + ** on left table of a LEFT JOIN. Ticket #3015 */ + if( (prereqAll>>1)>=x ){ + sqlite3ErrorMsg(pParse, "ON clause references tables to its right"); + return; + } + } + pTerm->prereqAll = prereqAll; + pTerm->leftCursor = -1; + pTerm->iParent = -1; + pTerm->eOperator = 0; + if( allowedOp(op) ){ + int aiCurCol[2]; + Expr *pLeft = sqlite3ExprSkipCollate(pExpr->pLeft); + Expr *pRight = sqlite3ExprSkipCollate(pExpr->pRight); + u16 opMask = (pTerm->prereqRight & prereqLeft)==0 ? WO_ALL : WO_EQUIV; + + if( pTerm->u.x.iField>0 ){ + assert( op==TK_IN ); + assert( pLeft->op==TK_VECTOR ); + assert( ExprUseXList(pLeft) ); + pLeft = pLeft->x.pList->a[pTerm->u.x.iField-1].pExpr; + } + + if( exprMightBeIndexed(pSrc, prereqLeft, aiCurCol, pLeft, op) ){ + pTerm->leftCursor = aiCurCol[0]; + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + pTerm->u.x.leftColumn = aiCurCol[1]; + pTerm->eOperator = operatorMask(op) & opMask; + } + if( op==TK_IS ) pTerm->wtFlags |= TERM_IS; + if( pRight + && exprMightBeIndexed(pSrc, pTerm->prereqRight, aiCurCol, pRight, op) + && !ExprHasProperty(pRight, EP_FixedCol) + ){ + WhereTerm *pNew; + Expr *pDup; + u16 eExtraOp = 0; /* Extra bits for pNew->eOperator */ + assert( pTerm->u.x.iField==0 ); + if( pTerm->leftCursor>=0 ){ + int idxNew; + pDup = sqlite3ExprDup(db, pExpr, 0); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pDup); + return; + } + idxNew = whereClauseInsert(pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC); + if( idxNew==0 ) return; + pNew = &pWC->a[idxNew]; + markTermAsChild(pWC, idxNew, idxTerm); + if( op==TK_IS ) pNew->wtFlags |= TERM_IS; + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; + + if( termIsEquivalence(pParse, pDup) ){ + pTerm->eOperator |= WO_EQUIV; + eExtraOp = WO_EQUIV; + } + }else{ + pDup = pExpr; + pNew = pTerm; + } + pNew->wtFlags |= exprCommute(pParse, pDup); + pNew->leftCursor = aiCurCol[0]; + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + pNew->u.x.leftColumn = aiCurCol[1]; + testcase( (prereqLeft | extraRight) != prereqLeft ); + pNew->prereqRight = prereqLeft | extraRight; + pNew->prereqAll = prereqAll; + pNew->eOperator = (operatorMask(pDup->op) + eExtraOp) & opMask; + }else + if( op==TK_ISNULL + && !ExprHasProperty(pExpr,EP_FromJoin) + && 0==sqlite3ExprCanBeNull(pLeft) + ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + pExpr->op = TK_TRUEFALSE; + pExpr->u.zToken = "false"; + ExprSetProperty(pExpr, EP_IsFalse); + pTerm->prereqAll = 0; + pTerm->eOperator = 0; + } + } + +#ifndef SQLITE_OMIT_BETWEEN_OPTIMIZATION + /* If a term is the BETWEEN operator, create two new virtual terms + ** that define the range that the BETWEEN implements. For example: + ** + ** a BETWEEN b AND c + ** + ** is converted into: + ** + ** (a BETWEEN b AND c) AND (a>=b) AND (a<=c) + ** + ** The two new terms are added onto the end of the WhereClause object. + ** The new terms are "dynamic" and are children of the original BETWEEN + ** term. That means that if the BETWEEN term is coded, the children are + ** skipped. Or, if the children are satisfied by an index, the original + ** BETWEEN term is skipped. + */ + else if( pExpr->op==TK_BETWEEN && pWC->op==TK_AND ){ + ExprList *pList; + int i; + static const u8 ops[] = {TK_GE, TK_LE}; + assert( ExprUseXList(pExpr) ); + pList = pExpr->x.pList; + assert( pList!=0 ); + assert( pList->nExpr==2 ); + for(i=0; i<2; i++){ + Expr *pNewExpr; + int idxNew; + pNewExpr = sqlite3PExpr(pParse, ops[i], + sqlite3ExprDup(db, pExpr->pLeft, 0), + sqlite3ExprDup(db, pList->a[i].pExpr, 0)); + transferJoinMarkings(pNewExpr, pExpr); + idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC); + testcase( idxNew==0 ); + exprAnalyze(pSrc, pWC, idxNew); + pTerm = &pWC->a[idxTerm]; + markTermAsChild(pWC, idxNew, idxTerm); + } + } +#endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */ + +#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) + /* Analyze a term that is composed of two or more subterms connected by + ** an OR operator. + */ + else if( pExpr->op==TK_OR ){ + assert( pWC->op==TK_AND ); + exprAnalyzeOrTerm(pSrc, pWC, idxTerm); + pTerm = &pWC->a[idxTerm]; + } +#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + /* The form "x IS NOT NULL" can sometimes be evaluated more efficiently + ** as "x>NULL" if x is not an INTEGER PRIMARY KEY. So construct a + ** virtual term of that form. + ** + ** The virtual term must be tagged with TERM_VNULL. + */ + else if( pExpr->op==TK_NOTNULL ){ + if( pExpr->pLeft->op==TK_COLUMN + && pExpr->pLeft->iColumn>=0 + && !ExprHasProperty(pExpr, EP_FromJoin) + ){ + Expr *pNewExpr; + Expr *pLeft = pExpr->pLeft; + int idxNew; + WhereTerm *pNewTerm; + + pNewExpr = sqlite3PExpr(pParse, TK_GT, + sqlite3ExprDup(db, pLeft, 0), + sqlite3ExprAlloc(db, TK_NULL, 0, 0)); + + idxNew = whereClauseInsert(pWC, pNewExpr, + TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); + if( idxNew ){ + pNewTerm = &pWC->a[idxNew]; + pNewTerm->prereqRight = 0; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.x.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_GT; + markTermAsChild(pWC, idxNew, idxTerm); + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } + } + } + + +#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION + /* Add constraints to reduce the search space on a LIKE or GLOB + ** operator. + ** + ** A like pattern of the form "x LIKE 'aBc%'" is changed into constraints + ** + ** x>='ABC' AND x<'abd' AND x LIKE 'aBc%' + ** + ** The last character of the prefix "abc" is incremented to form the + ** termination condition "abd". If case is not significant (the default + ** for LIKE) then the lower-bound is made all uppercase and the upper- + ** bound is made all lowercase so that the bounds also work when comparing + ** BLOBs. + */ + else if( pExpr->op==TK_FUNCTION + && pWC->op==TK_AND + && isLikeOrGlob(pParse, pExpr, &pStr1, &isComplete, &noCase) + ){ + Expr *pLeft; /* LHS of LIKE/GLOB operator */ + Expr *pStr2; /* Copy of pStr1 - RHS of LIKE/GLOB operator */ + Expr *pNewExpr1; + Expr *pNewExpr2; + int idxNew1; + int idxNew2; + const char *zCollSeqName; /* Name of collating sequence */ + const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC; + + assert( ExprUseXList(pExpr) ); + pLeft = pExpr->x.pList->a[1].pExpr; + pStr2 = sqlite3ExprDup(db, pStr1, 0); + assert( pStr1==0 || !ExprHasProperty(pStr1, EP_IntValue) ); + assert( pStr2==0 || !ExprHasProperty(pStr2, EP_IntValue) ); + + + /* Convert the lower bound to upper-case and the upper bound to + ** lower-case (upper-case is less than lower-case in ASCII) so that + ** the range constraints also work for BLOBs + */ + if( noCase && !pParse->db->mallocFailed ){ + int i; + char c; + pTerm->wtFlags |= TERM_LIKE; + for(i=0; (c = pStr1->u.zToken[i])!=0; i++){ + pStr1->u.zToken[i] = sqlite3Toupper(c); + pStr2->u.zToken[i] = sqlite3Tolower(c); + } + } + + if( !db->mallocFailed ){ + u8 c, *pC; /* Last character before the first wildcard */ + pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; + c = *pC; + if( noCase ){ + /* The point is to increment the last character before the first + ** wildcard. But if we increment '@', that will push it into the + ** alphabetic range where case conversions will mess up the + ** inequality. To avoid this, make sure to also run the full + ** LIKE on all candidate expressions by clearing the isComplete flag + */ + if( c=='A'-1 ) isComplete = 0; + c = sqlite3UpperToLower[c]; + } + *pC = c + 1; + } + zCollSeqName = noCase ? "NOCASE" : sqlite3StrBINARY; + pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); + pNewExpr1 = sqlite3PExpr(pParse, TK_GE, + sqlite3ExprAddCollateString(pParse,pNewExpr1,zCollSeqName), + pStr1); + transferJoinMarkings(pNewExpr1, pExpr); + idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags); + testcase( idxNew1==0 ); + exprAnalyze(pSrc, pWC, idxNew1); + pNewExpr2 = sqlite3ExprDup(db, pLeft, 0); + pNewExpr2 = sqlite3PExpr(pParse, TK_LT, + sqlite3ExprAddCollateString(pParse,pNewExpr2,zCollSeqName), + pStr2); + transferJoinMarkings(pNewExpr2, pExpr); + idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags); + testcase( idxNew2==0 ); + exprAnalyze(pSrc, pWC, idxNew2); + pTerm = &pWC->a[idxTerm]; + if( isComplete ){ + markTermAsChild(pWC, idxNew1, idxTerm); + markTermAsChild(pWC, idxNew2, idxTerm); + } + } +#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ + + /* If there is a vector == or IS term - e.g. "(a, b) == (?, ?)" - create + ** new terms for each component comparison - "a = ?" and "b = ?". The + ** new terms completely replace the original vector comparison, which is + ** no longer used. + ** + ** This is only required if at least one side of the comparison operation + ** is not a sub-select. + ** + ** tag-20220128a + */ + if( (pExpr->op==TK_EQ || pExpr->op==TK_IS) + && (nLeft = sqlite3ExprVectorSize(pExpr->pLeft))>1 + && sqlite3ExprVectorSize(pExpr->pRight)==nLeft + && ( (pExpr->pLeft->flags & EP_xIsSelect)==0 + || (pExpr->pRight->flags & EP_xIsSelect)==0) + && pWC->op==TK_AND + ){ + int i; + for(i=0; ipLeft, i, nLeft); + Expr *pRight = sqlite3ExprForVectorField(pParse, pExpr->pRight, i, nLeft); + + pNew = sqlite3PExpr(pParse, pExpr->op, pLeft, pRight); + transferJoinMarkings(pNew, pExpr); + idxNew = whereClauseInsert(pWC, pNew, TERM_DYNAMIC|TERM_SLICE); + exprAnalyze(pSrc, pWC, idxNew); + } + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_CODED|TERM_VIRTUAL; /* Disable the original */ + pTerm->eOperator = 0; + } + + /* If there is a vector IN term - e.g. "(a, b) IN (SELECT ...)" - create + ** a virtual term for each vector component. The expression object + ** used by each such virtual term is pExpr (the full vector IN(...) + ** expression). The WhereTerm.u.x.iField variable identifies the index within + ** the vector on the LHS that the virtual term represents. + ** + ** This only works if the RHS is a simple SELECT (not a compound) that does + ** not use window functions. + */ + else if( pExpr->op==TK_IN + && pTerm->u.x.iField==0 + && pExpr->pLeft->op==TK_VECTOR + && ALWAYS( ExprUseXSelect(pExpr) ) + && pExpr->x.pSelect->pPrior==0 +#ifndef SQLITE_OMIT_WINDOWFUNC + && pExpr->x.pSelect->pWin==0 +#endif + && pWC->op==TK_AND + ){ + int i; + for(i=0; ipLeft); i++){ + int idxNew; + idxNew = whereClauseInsert(pWC, pExpr, TERM_VIRTUAL|TERM_SLICE); + pWC->a[idxNew].u.x.iField = i+1; + exprAnalyze(pSrc, pWC, idxNew); + markTermAsChild(pWC, idxNew, idxTerm); + } + } + +#ifndef SQLITE_OMIT_VIRTUALTABLE + /* Add a WO_AUX auxiliary term to the constraint set if the + ** current expression is of the form "column OP expr" where OP + ** is an operator that gets passed into virtual tables but which is + ** not normally optimized for ordinary tables. In other words, OP + ** is one of MATCH, LIKE, GLOB, REGEXP, !=, IS, IS NOT, or NOT NULL. + ** This information is used by the xBestIndex methods of + ** virtual tables. The native query optimizer does not attempt + ** to do anything with MATCH functions. + */ + else if( pWC->op==TK_AND ){ + Expr *pRight = 0, *pLeft = 0; + int res = isAuxiliaryVtabOperator(db, pExpr, &eOp2, &pLeft, &pRight); + while( res-- > 0 ){ + int idxNew; + WhereTerm *pNewTerm; + Bitmask prereqColumn, prereqExpr; + + prereqExpr = sqlite3WhereExprUsage(pMaskSet, pRight); + prereqColumn = sqlite3WhereExprUsage(pMaskSet, pLeft); + if( (prereqExpr & prereqColumn)==0 ){ + Expr *pNewExpr; + pNewExpr = sqlite3PExpr(pParse, TK_MATCH, + 0, sqlite3ExprDup(db, pRight, 0)); + if( ExprHasProperty(pExpr, EP_FromJoin) && pNewExpr ){ + ExprSetProperty(pNewExpr, EP_FromJoin); + pNewExpr->w.iRightJoinTable = pExpr->w.iRightJoinTable; + } + idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC); + testcase( idxNew==0 ); + pNewTerm = &pWC->a[idxNew]; + pNewTerm->prereqRight = prereqExpr; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.x.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_AUX; + pNewTerm->eMatchOp = eOp2; + markTermAsChild(pWC, idxNew, idxTerm); + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } + SWAP(Expr*, pLeft, pRight); + } + } +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + /* Prevent ON clause terms of a LEFT JOIN from being used to drive + ** an index for tables to the left of the join. + */ + testcase( pTerm!=&pWC->a[idxTerm] ); + pTerm = &pWC->a[idxTerm]; + pTerm->prereqRight |= extraRight; +} + +/*************************************************************************** +** Routines with file scope above. Interface to the rest of the where.c +** subsystem follows. +***************************************************************************/ + +/* +** This routine identifies subexpressions in the WHERE clause where +** each subexpression is separated by the AND operator or some other +** operator specified in the op parameter. The WhereClause structure +** is filled with pointers to subexpressions. For example: +** +** WHERE a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22) +** \________/ \_______________/ \________________/ +** slot[0] slot[1] slot[2] +** +** The original WHERE clause in pExpr is unaltered. All this routine +** does is make slot[] entries point to substructure within pExpr. +** +** In the previous sentence and in the diagram, "slot[]" refers to +** the WhereClause.a[] array. The slot[] array grows as needed to contain +** all terms of the WHERE clause. +*/ +SQLITE_PRIVATE void sqlite3WhereSplit(WhereClause *pWC, Expr *pExpr, u8 op){ + Expr *pE2 = sqlite3ExprSkipCollateAndLikely(pExpr); + pWC->op = op; + assert( pE2!=0 || pExpr==0 ); + if( pE2==0 ) return; + if( pE2->op!=op ){ + whereClauseInsert(pWC, pExpr, 0); + }else{ + sqlite3WhereSplit(pWC, pE2->pLeft, op); + sqlite3WhereSplit(pWC, pE2->pRight, op); + } +} + +/* +** Add either a LIMIT (if eMatchOp==SQLITE_INDEX_CONSTRAINT_LIMIT) or +** OFFSET (if eMatchOp==SQLITE_INDEX_CONSTRAINT_OFFSET) term to the +** where-clause passed as the first argument. The value for the term +** is found in register iReg. +** +** In the common case where the value is a simple integer +** (example: "LIMIT 5 OFFSET 10") then the expression codes as a +** TK_INTEGER so that it will be available to sqlite3_vtab_rhs_value(). +** If not, then it codes as a TK_REGISTER expression. +*/ +static void whereAddLimitExpr( + WhereClause *pWC, /* Add the constraint to this WHERE clause */ + int iReg, /* Register that will hold value of the limit/offset */ + Expr *pExpr, /* Expression that defines the limit/offset */ + int iCsr, /* Cursor to which the constraint applies */ + int eMatchOp /* SQLITE_INDEX_CONSTRAINT_LIMIT or _OFFSET */ +){ + Parse *pParse = pWC->pWInfo->pParse; + sqlite3 *db = pParse->db; + Expr *pNew; + int iVal = 0; + + if( sqlite3ExprIsInteger(pExpr, &iVal) && iVal>=0 ){ + Expr *pVal = sqlite3Expr(db, TK_INTEGER, 0); + if( pVal==0 ) return; + ExprSetProperty(pVal, EP_IntValue); + pVal->u.iValue = iVal; + pNew = sqlite3PExpr(pParse, TK_MATCH, 0, pVal); + }else{ + Expr *pVal = sqlite3Expr(db, TK_REGISTER, 0); + if( pVal==0 ) return; + pVal->iTable = iReg; + pNew = sqlite3PExpr(pParse, TK_MATCH, 0, pVal); + } + if( pNew ){ + WhereTerm *pTerm; + int idx; + idx = whereClauseInsert(pWC, pNew, TERM_DYNAMIC|TERM_VIRTUAL); + pTerm = &pWC->a[idx]; + pTerm->leftCursor = iCsr; + pTerm->eOperator = WO_AUX; + pTerm->eMatchOp = eMatchOp; + } +} + +/* +** Possibly add terms corresponding to the LIMIT and OFFSET clauses of the +** SELECT statement passed as the second argument. These terms are only +** added if: +** +** 1. The SELECT statement has a LIMIT clause, and +** 2. The SELECT statement is not an aggregate or DISTINCT query, and +** 3. The SELECT statement has exactly one object in its from clause, and +** that object is a virtual table, and +** 4. There are no terms in the WHERE clause that will not be passed +** to the virtual table xBestIndex method. +** 5. The ORDER BY clause, if any, will be made available to the xBestIndex +** method. +** +** LIMIT and OFFSET terms are ignored by most of the planner code. They +** exist only so that they may be passed to the xBestIndex method of the +** single virtual table in the FROM clause of the SELECT. +*/ +SQLITE_PRIVATE void sqlite3WhereAddLimit(WhereClause *pWC, Select *p){ + assert( p==0 || (p->pGroupBy==0 && (p->selFlags & SF_Aggregate)==0) ); + if( (p && p->pLimit) /* 1 */ + && (p->selFlags & (SF_Distinct|SF_Aggregate))==0 /* 2 */ + && (p->pSrc->nSrc==1 && IsVirtual(p->pSrc->a[0].pTab)) /* 3 */ + ){ + ExprList *pOrderBy = p->pOrderBy; + int iCsr = p->pSrc->a[0].iCursor; + int ii; + + /* Check condition (4). Return early if it is not met. */ + for(ii=0; iinTerm; ii++){ + if( pWC->a[ii].wtFlags & TERM_CODED ){ + /* This term is a vector operation that has been decomposed into + ** other, subsequent terms. It can be ignored. See tag-20220128a */ + assert( pWC->a[ii].wtFlags & TERM_VIRTUAL ); + assert( pWC->a[ii].eOperator==0 ); + continue; + } + if( pWC->a[ii].leftCursor!=iCsr ) return; + } + + /* Check condition (5). Return early if it is not met. */ + if( pOrderBy ){ + for(ii=0; iinExpr; ii++){ + Expr *pExpr = pOrderBy->a[ii].pExpr; + if( pExpr->op!=TK_COLUMN ) return; + if( pExpr->iTable!=iCsr ) return; + if( pOrderBy->a[ii].sortFlags & KEYINFO_ORDER_BIGNULL ) return; + } + } + + /* All conditions are met. Add the terms to the where-clause object. */ + assert( p->pLimit->op==TK_LIMIT ); + whereAddLimitExpr(pWC, p->iLimit, p->pLimit->pLeft, + iCsr, SQLITE_INDEX_CONSTRAINT_LIMIT); + if( p->iOffset>0 ){ + whereAddLimitExpr(pWC, p->iOffset, p->pLimit->pRight, + iCsr, SQLITE_INDEX_CONSTRAINT_OFFSET); + } + } +} + +/* +** Initialize a preallocated WhereClause structure. +*/ +SQLITE_PRIVATE void sqlite3WhereClauseInit( + WhereClause *pWC, /* The WhereClause to be initialized */ + WhereInfo *pWInfo /* The WHERE processing context */ +){ + pWC->pWInfo = pWInfo; + pWC->hasOr = 0; + pWC->pOuter = 0; + pWC->nTerm = 0; + pWC->nBase = 0; + pWC->nSlot = ArraySize(pWC->aStatic); + pWC->a = pWC->aStatic; +} + +/* +** Deallocate a WhereClause structure. The WhereClause structure +** itself is not freed. This routine is the inverse of +** sqlite3WhereClauseInit(). +*/ +SQLITE_PRIVATE void sqlite3WhereClauseClear(WhereClause *pWC){ + sqlite3 *db = pWC->pWInfo->pParse->db; + assert( pWC->nTerm>=pWC->nBase ); + if( pWC->nTerm>0 ){ + WhereTerm *a = pWC->a; + WhereTerm *aLast = &pWC->a[pWC->nTerm-1]; +#ifdef SQLITE_DEBUG + int i; + /* Verify that every term past pWC->nBase is virtual */ + for(i=pWC->nBase; inTerm; i++){ + assert( (pWC->a[i].wtFlags & TERM_VIRTUAL)!=0 ); + } +#endif + while(1){ + assert( a->eMatchOp==0 || a->eOperator==WO_AUX ); + if( a->wtFlags & TERM_DYNAMIC ){ + sqlite3ExprDelete(db, a->pExpr); + } + if( a->wtFlags & (TERM_ORINFO|TERM_ANDINFO) ){ + if( a->wtFlags & TERM_ORINFO ){ + assert( (a->wtFlags & TERM_ANDINFO)==0 ); + whereOrInfoDelete(db, a->u.pOrInfo); + }else{ + assert( (a->wtFlags & TERM_ANDINFO)!=0 ); + whereAndInfoDelete(db, a->u.pAndInfo); + } + } + if( a==aLast ) break; + a++; + } + } + if( pWC->a!=pWC->aStatic ){ + sqlite3DbFree(db, pWC->a); + } +} + + +/* +** These routines walk (recursively) an expression tree and generate +** a bitmask indicating which tables are used in that expression +** tree. +** +** sqlite3WhereExprUsage(MaskSet, Expr) -> +** +** Return a Bitmask of all tables referenced by Expr. Expr can be +** be NULL, in which case 0 is returned. +** +** sqlite3WhereExprUsageNN(MaskSet, Expr) -> +** +** Same as sqlite3WhereExprUsage() except that Expr must not be +** NULL. The "NN" suffix on the name stands for "Not Null". +** +** sqlite3WhereExprListUsage(MaskSet, ExprList) -> +** +** Return a Bitmask of all tables referenced by every expression +** in the expression list ExprList. ExprList can be NULL, in which +** case 0 is returned. +** +** sqlite3WhereExprUsageFull(MaskSet, ExprList) -> +** +** Internal use only. Called only by sqlite3WhereExprUsageNN() for +** complex expressions that require pushing register values onto +** the stack. Many calls to sqlite3WhereExprUsageNN() do not need +** the more complex analysis done by this routine. Hence, the +** computations done by this routine are broken out into a separate +** "no-inline" function to avoid the stack push overhead in the +** common case where it is not needed. +*/ +static SQLITE_NOINLINE Bitmask sqlite3WhereExprUsageFull( + WhereMaskSet *pMaskSet, + Expr *p +){ + Bitmask mask; + mask = (p->op==TK_IF_NULL_ROW) ? sqlite3WhereGetMask(pMaskSet, p->iTable) : 0; + if( p->pLeft ) mask |= sqlite3WhereExprUsageNN(pMaskSet, p->pLeft); + if( p->pRight ){ + mask |= sqlite3WhereExprUsageNN(pMaskSet, p->pRight); + assert( p->x.pList==0 ); + }else if( ExprUseXSelect(p) ){ + if( ExprHasProperty(p, EP_VarSelect) ) pMaskSet->bVarSelect = 1; + mask |= exprSelectUsage(pMaskSet, p->x.pSelect); + }else if( p->x.pList ){ + mask |= sqlite3WhereExprListUsage(pMaskSet, p->x.pList); + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( (p->op==TK_FUNCTION || p->op==TK_AGG_FUNCTION) && ExprUseYWin(p) ){ + assert( p->y.pWin!=0 ); + mask |= sqlite3WhereExprListUsage(pMaskSet, p->y.pWin->pPartition); + mask |= sqlite3WhereExprListUsage(pMaskSet, p->y.pWin->pOrderBy); + mask |= sqlite3WhereExprUsage(pMaskSet, p->y.pWin->pFilter); + } +#endif + return mask; +} +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsageNN(WhereMaskSet *pMaskSet, Expr *p){ + if( p->op==TK_COLUMN && !ExprHasProperty(p, EP_FixedCol) ){ + return sqlite3WhereGetMask(pMaskSet, p->iTable); + }else if( ExprHasProperty(p, EP_TokenOnly|EP_Leaf) ){ + assert( p->op!=TK_IF_NULL_ROW ); + return 0; + } + return sqlite3WhereExprUsageFull(pMaskSet, p); +} +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsage(WhereMaskSet *pMaskSet, Expr *p){ + return p ? sqlite3WhereExprUsageNN(pMaskSet,p) : 0; +} +SQLITE_PRIVATE Bitmask sqlite3WhereExprListUsage(WhereMaskSet *pMaskSet, ExprList *pList){ + int i; + Bitmask mask = 0; + if( pList ){ + for(i=0; inExpr; i++){ + mask |= sqlite3WhereExprUsage(pMaskSet, pList->a[i].pExpr); + } + } + return mask; +} + + +/* +** Call exprAnalyze on all terms in a WHERE clause. +** +** Note that exprAnalyze() might add new virtual terms onto the +** end of the WHERE clause. We do not want to analyze these new +** virtual terms, so start analyzing at the end and work forward +** so that the added virtual terms are never processed. +*/ +SQLITE_PRIVATE void sqlite3WhereExprAnalyze( + SrcList *pTabList, /* the FROM clause */ + WhereClause *pWC /* the WHERE clause to be analyzed */ +){ + int i; + for(i=pWC->nTerm-1; i>=0; i--){ + exprAnalyze(pTabList, pWC, i); + } +} + +/* +** For table-valued-functions, transform the function arguments into +** new WHERE clause terms. +** +** Each function argument translates into an equality constraint against +** a HIDDEN column in the table. +*/ +SQLITE_PRIVATE void sqlite3WhereTabFuncArgs( + Parse *pParse, /* Parsing context */ + SrcItem *pItem, /* The FROM clause term to process */ + WhereClause *pWC /* Xfer function arguments to here */ +){ + Table *pTab; + int j, k; + ExprList *pArgs; + Expr *pColRef; + Expr *pTerm; + if( pItem->fg.isTabFunc==0 ) return; + pTab = pItem->pTab; + assert( pTab!=0 ); + pArgs = pItem->u1.pFuncArg; + if( pArgs==0 ) return; + for(j=k=0; jnExpr; j++){ + Expr *pRhs; + while( knCol && (pTab->aCol[k].colFlags & COLFLAG_HIDDEN)==0 ){k++;} + if( k>=pTab->nCol ){ + sqlite3ErrorMsg(pParse, "too many arguments on %s() - max %d", + pTab->zName, j); + return; + } + pColRef = sqlite3ExprAlloc(pParse->db, TK_COLUMN, 0, 0); + if( pColRef==0 ) return; + pColRef->iTable = pItem->iCursor; + pColRef->iColumn = k++; + assert( ExprUseYTab(pColRef) ); + pColRef->y.pTab = pTab; + pItem->colUsed |= sqlite3ExprColUsed(pColRef); + pRhs = sqlite3PExpr(pParse, TK_UPLUS, + sqlite3ExprDup(pParse->db, pArgs->a[j].pExpr, 0), 0); + pTerm = sqlite3PExpr(pParse, TK_EQ, pColRef, pRhs); + if( pItem->fg.jointype & JT_LEFT ){ + sqlite3SetJoinExpr(pTerm, pItem->iCursor); + } + whereClauseInsert(pWC, pTerm, TERM_DYNAMIC); + } +} + +/************** End of whereexpr.c *******************************************/ +/************** Begin file where.c *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. This module is responsible for +** generating the code that loops through a table looking for applicable +** rows. Indices are selected and used to speed the search when doing +** so is applicable. Because this module is responsible for selecting +** indices, you might also think of this module as the "query optimizer". +*/ +/* #include "sqliteInt.h" */ +/* #include "whereInt.h" */ + +/* +** Extra information appended to the end of sqlite3_index_info but not +** visible to the xBestIndex function, at least not directly. The +** sqlite3_vtab_collation() interface knows how to reach it, however. +** +** This object is not an API and can be changed from one release to the +** next. As long as allocateIndexInfo() and sqlite3_vtab_collation() +** agree on the structure, all will be well. +*/ +typedef struct HiddenIndexInfo HiddenIndexInfo; +struct HiddenIndexInfo { + WhereClause *pWC; /* The Where clause being analyzed */ + Parse *pParse; /* The parsing context */ + int eDistinct; /* Value to return from sqlite3_vtab_distinct() */ + u32 mIn; /* Mask of terms that are IN (...) */ + u32 mHandleIn; /* Terms that vtab will handle as IN (...) */ + sqlite3_value *aRhs[1]; /* RHS values for constraints. MUST BE LAST + ** because extra space is allocated to hold up + ** to nTerm such values */ +}; + +/* Forward declaration of methods */ +static int whereLoopResize(sqlite3*, WhereLoop*, int); + +/* +** Return the estimated number of output rows from a WHERE clause +*/ +SQLITE_PRIVATE LogEst sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ + return pWInfo->nRowOut; +} + +/* +** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this +** WHERE clause returns outputs for DISTINCT processing. +*/ +SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){ + return pWInfo->eDistinct; +} + +/* +** Return the number of ORDER BY terms that are satisfied by the +** WHERE clause. A return of 0 means that the output must be +** completely sorted. A return equal to the number of ORDER BY +** terms means that no sorting is needed at all. A return that +** is positive but less than the number of ORDER BY terms means that +** block sorting is required. +*/ +SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){ + return pWInfo->nOBSat; +} + +/* +** In the ORDER BY LIMIT optimization, if the inner-most loop is known +** to emit rows in increasing order, and if the last row emitted by the +** inner-most loop did not fit within the sorter, then we can skip all +** subsequent rows for the current iteration of the inner loop (because they +** will not fit in the sorter either) and continue with the second inner +** loop - the loop immediately outside the inner-most. +** +** When a row does not fit in the sorter (because the sorter already +** holds LIMIT+OFFSET rows that are smaller), then a jump is made to the +** label returned by this function. +** +** If the ORDER BY LIMIT optimization applies, the jump destination should +** be the continuation for the second-inner-most loop. If the ORDER BY +** LIMIT optimization does not apply, then the jump destination should +** be the continuation for the inner-most loop. +** +** It is always safe for this routine to return the continuation of the +** inner-most loop, in the sense that a correct answer will result. +** Returning the continuation the second inner loop is an optimization +** that might make the code run a little faster, but should not change +** the final answer. +*/ +SQLITE_PRIVATE int sqlite3WhereOrderByLimitOptLabel(WhereInfo *pWInfo){ + WhereLevel *pInner; + if( !pWInfo->bOrderedInnerLoop ){ + /* The ORDER BY LIMIT optimization does not apply. Jump to the + ** continuation of the inner-most loop. */ + return pWInfo->iContinue; + } + pInner = &pWInfo->a[pWInfo->nLevel-1]; + assert( pInner->addrNxt!=0 ); + return pInner->addrNxt; +} + +/* +** While generating code for the min/max optimization, after handling +** the aggregate-step call to min() or max(), check to see if any +** additional looping is required. If the output order is such that +** we are certain that the correct answer has already been found, then +** code an OP_Goto to by pass subsequent processing. +** +** Any extra OP_Goto that is coded here is an optimization. The +** correct answer should be obtained regardless. This OP_Goto just +** makes the answer appear faster. +*/ +SQLITE_PRIVATE void sqlite3WhereMinMaxOptEarlyOut(Vdbe *v, WhereInfo *pWInfo){ + WhereLevel *pInner; + int i; + if( !pWInfo->bOrderedInnerLoop ) return; + if( pWInfo->nOBSat==0 ) return; + for(i=pWInfo->nLevel-1; i>=0; i--){ + pInner = &pWInfo->a[i]; + if( (pInner->pWLoop->wsFlags & WHERE_COLUMN_IN)!=0 ){ + sqlite3VdbeGoto(v, pInner->addrNxt); + return; + } + } + sqlite3VdbeGoto(v, pWInfo->iBreak); +} + +/* +** Return the VDBE address or label to jump to in order to continue +** immediately with the next row of a WHERE clause. +*/ +SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo *pWInfo){ + assert( pWInfo->iContinue!=0 ); + return pWInfo->iContinue; +} + +/* +** Return the VDBE address or label to jump to in order to break +** out of a WHERE loop. +*/ +SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo *pWInfo){ + return pWInfo->iBreak; +} + +/* +** Return ONEPASS_OFF (0) if an UPDATE or DELETE statement is unable to +** operate directly on the rowids returned by a WHERE clause. Return +** ONEPASS_SINGLE (1) if the statement can operation directly because only +** a single row is to be changed. Return ONEPASS_MULTI (2) if the one-pass +** optimization can be used on multiple +** +** If the ONEPASS optimization is used (if this routine returns true) +** then also write the indices of open cursors used by ONEPASS +** into aiCur[0] and aiCur[1]. iaCur[0] gets the cursor of the data +** table and iaCur[1] gets the cursor used by an auxiliary index. +** Either value may be -1, indicating that cursor is not used. +** Any cursors returned will have been opened for writing. +** +** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is +** unable to use the ONEPASS optimization. +*/ +SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){ + memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2); +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace && pWInfo->eOnePass!=ONEPASS_OFF ){ + sqlite3DebugPrintf("%s cursors: %d %d\n", + pWInfo->eOnePass==ONEPASS_SINGLE ? "ONEPASS_SINGLE" : "ONEPASS_MULTI", + aiCur[0], aiCur[1]); + } +#endif + return pWInfo->eOnePass; +} + +/* +** Return TRUE if the WHERE loop uses the OP_DeferredSeek opcode to move +** the data cursor to the row selected by the index cursor. +*/ +SQLITE_PRIVATE int sqlite3WhereUsesDeferredSeek(WhereInfo *pWInfo){ + return pWInfo->bDeferredSeek; +} + +/* +** Move the content of pSrc into pDest +*/ +static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){ + pDest->n = pSrc->n; + memcpy(pDest->a, pSrc->a, pDest->n*sizeof(pDest->a[0])); +} + +/* +** Try to insert a new prerequisite/cost entry into the WhereOrSet pSet. +** +** The new entry might overwrite an existing entry, or it might be +** appended, or it might be discarded. Do whatever is the right thing +** so that pSet keeps the N_OR_COST best entries seen so far. +*/ +static int whereOrInsert( + WhereOrSet *pSet, /* The WhereOrSet to be updated */ + Bitmask prereq, /* Prerequisites of the new entry */ + LogEst rRun, /* Run-cost of the new entry */ + LogEst nOut /* Number of outputs for the new entry */ +){ + u16 i; + WhereOrCost *p; + for(i=pSet->n, p=pSet->a; i>0; i--, p++){ + if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){ + goto whereOrInsert_done; + } + if( p->rRun<=rRun && (p->prereq & prereq)==p->prereq ){ + return 0; + } + } + if( pSet->na[pSet->n++]; + p->nOut = nOut; + }else{ + p = pSet->a; + for(i=1; in; i++){ + if( p->rRun>pSet->a[i].rRun ) p = pSet->a + i; + } + if( p->rRun<=rRun ) return 0; + } +whereOrInsert_done: + p->prereq = prereq; + p->rRun = rRun; + if( p->nOut>nOut ) p->nOut = nOut; + return 1; +} + +/* +** Return the bitmask for the given cursor number. Return 0 if +** iCursor is not in the set. +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereGetMask(WhereMaskSet *pMaskSet, int iCursor){ + int i; + assert( pMaskSet->n<=(int)sizeof(Bitmask)*8 ); + assert( pMaskSet->n>0 || pMaskSet->ix[0]<0 ); + assert( iCursor>=-1 ); + if( pMaskSet->ix[0]==iCursor ){ + return 1; + } + for(i=1; in; i++){ + if( pMaskSet->ix[i]==iCursor ){ + return MASKBIT(i); + } + } + return 0; +} + +/* +** Create a new mask for cursor iCursor. +** +** There is one cursor per table in the FROM clause. The number of +** tables in the FROM clause is limited by a test early in the +** sqlite3WhereBegin() routine. So we know that the pMaskSet->ix[] +** array will never overflow. +*/ +static void createMask(WhereMaskSet *pMaskSet, int iCursor){ + assert( pMaskSet->n < ArraySize(pMaskSet->ix) ); + pMaskSet->ix[pMaskSet->n++] = iCursor; +} + +/* +** If the right-hand branch of the expression is a TK_COLUMN, then return +** a pointer to the right-hand branch. Otherwise, return NULL. +*/ +static Expr *whereRightSubexprIsColumn(Expr *p){ + p = sqlite3ExprSkipCollateAndLikely(p->pRight); + if( ALWAYS(p!=0) && p->op==TK_COLUMN && !ExprHasProperty(p, EP_FixedCol) ){ + return p; + } + return 0; +} + +/* +** Advance to the next WhereTerm that matches according to the criteria +** established when the pScan object was initialized by whereScanInit(). +** Return NULL if there are no more matching WhereTerms. +*/ +static WhereTerm *whereScanNext(WhereScan *pScan){ + int iCur; /* The cursor on the LHS of the term */ + i16 iColumn; /* The column on the LHS of the term. -1 for IPK */ + Expr *pX; /* An expression being tested */ + WhereClause *pWC; /* Shorthand for pScan->pWC */ + WhereTerm *pTerm; /* The term being tested */ + int k = pScan->k; /* Where to start scanning */ + + assert( pScan->iEquiv<=pScan->nEquiv ); + pWC = pScan->pWC; + while(1){ + iColumn = pScan->aiColumn[pScan->iEquiv-1]; + iCur = pScan->aiCur[pScan->iEquiv-1]; + assert( pWC!=0 ); + assert( iCur>=0 ); + do{ + for(pTerm=pWC->a+k; knTerm; k++, pTerm++){ + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 || pTerm->leftCursor<0 ); + if( pTerm->leftCursor==iCur + && pTerm->u.x.leftColumn==iColumn + && (iColumn!=XN_EXPR + || sqlite3ExprCompareSkip(pTerm->pExpr->pLeft, + pScan->pIdxExpr,iCur)==0) + && (pScan->iEquiv<=1 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + ){ + if( (pTerm->eOperator & WO_EQUIV)!=0 + && pScan->nEquivaiCur) + && (pX = whereRightSubexprIsColumn(pTerm->pExpr))!=0 + ){ + int j; + for(j=0; jnEquiv; j++){ + if( pScan->aiCur[j]==pX->iTable + && pScan->aiColumn[j]==pX->iColumn ){ + break; + } + } + if( j==pScan->nEquiv ){ + pScan->aiCur[j] = pX->iTable; + pScan->aiColumn[j] = pX->iColumn; + pScan->nEquiv++; + } + } + if( (pTerm->eOperator & pScan->opMask)!=0 ){ + /* Verify the affinity and collating sequence match */ + if( pScan->zCollName && (pTerm->eOperator & WO_ISNULL)==0 ){ + CollSeq *pColl; + Parse *pParse = pWC->pWInfo->pParse; + pX = pTerm->pExpr; + if( !sqlite3IndexAffinityOk(pX, pScan->idxaff) ){ + continue; + } + assert(pX->pLeft); + pColl = sqlite3ExprCompareCollSeq(pParse, pX); + if( pColl==0 ) pColl = pParse->db->pDfltColl; + if( sqlite3StrICmp(pColl->zName, pScan->zCollName) ){ + continue; + } + } + if( (pTerm->eOperator & (WO_EQ|WO_IS))!=0 + && (pX = pTerm->pExpr->pRight, ALWAYS(pX!=0)) + && pX->op==TK_COLUMN + && pX->iTable==pScan->aiCur[0] + && pX->iColumn==pScan->aiColumn[0] + ){ + testcase( pTerm->eOperator & WO_IS ); + continue; + } + pScan->pWC = pWC; + pScan->k = k+1; +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace & 0x20000 ){ + int ii; + sqlite3DebugPrintf("SCAN-TERM %p: nEquiv=%d", + pTerm, pScan->nEquiv); + for(ii=0; iinEquiv; ii++){ + sqlite3DebugPrintf(" {%d:%d}", + pScan->aiCur[ii], pScan->aiColumn[ii]); + } + sqlite3DebugPrintf("\n"); + } +#endif + return pTerm; + } + } + } + pWC = pWC->pOuter; + k = 0; + }while( pWC!=0 ); + if( pScan->iEquiv>=pScan->nEquiv ) break; + pWC = pScan->pOrigWC; + k = 0; + pScan->iEquiv++; + } + return 0; +} + +/* +** This is whereScanInit() for the case of an index on an expression. +** It is factored out into a separate tail-recursion subroutine so that +** the normal whereScanInit() routine, which is a high-runner, does not +** need to push registers onto the stack as part of its prologue. +*/ +static SQLITE_NOINLINE WhereTerm *whereScanInitIndexExpr(WhereScan *pScan){ + pScan->idxaff = sqlite3ExprAffinity(pScan->pIdxExpr); + return whereScanNext(pScan); +} + +/* +** Initialize a WHERE clause scanner object. Return a pointer to the +** first match. Return NULL if there are no matches. +** +** The scanner will be searching the WHERE clause pWC. It will look +** for terms of the form "X " where X is column iColumn of table +** iCur. Or if pIdx!=0 then X is column iColumn of index pIdx. pIdx +** must be one of the indexes of table iCur. +** +** The must be one of the operators described by opMask. +** +** If the search is for X and the WHERE clause contains terms of the +** form X=Y then this routine might also return terms of the form +** "Y ". The number of levels of transitivity is limited, +** but is enough to handle most commonly occurring SQL statements. +** +** If X is not the INTEGER PRIMARY KEY then X must be compatible with +** index pIdx. +*/ +static WhereTerm *whereScanInit( + WhereScan *pScan, /* The WhereScan object being initialized */ + WhereClause *pWC, /* The WHERE clause to be scanned */ + int iCur, /* Cursor to scan for */ + int iColumn, /* Column to scan for */ + u32 opMask, /* Operator(s) to scan for */ + Index *pIdx /* Must be compatible with this index */ +){ + pScan->pOrigWC = pWC; + pScan->pWC = pWC; + pScan->pIdxExpr = 0; + pScan->idxaff = 0; + pScan->zCollName = 0; + pScan->opMask = opMask; + pScan->k = 0; + pScan->aiCur[0] = iCur; + pScan->nEquiv = 1; + pScan->iEquiv = 1; + if( pIdx ){ + int j = iColumn; + iColumn = pIdx->aiColumn[j]; + if( iColumn==pIdx->pTable->iPKey ){ + iColumn = XN_ROWID; + }else if( iColumn>=0 ){ + pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity; + pScan->zCollName = pIdx->azColl[j]; + }else if( iColumn==XN_EXPR ){ + pScan->pIdxExpr = pIdx->aColExpr->a[j].pExpr; + pScan->zCollName = pIdx->azColl[j]; + pScan->aiColumn[0] = XN_EXPR; + return whereScanInitIndexExpr(pScan); + } + }else if( iColumn==XN_EXPR ){ + return 0; + } + pScan->aiColumn[0] = iColumn; + return whereScanNext(pScan); +} + +/* +** Search for a term in the WHERE clause that is of the form "X " +** where X is a reference to the iColumn of table iCur or of index pIdx +** if pIdx!=0 and is one of the WO_xx operator codes specified by +** the op parameter. Return a pointer to the term. Return 0 if not found. +** +** If pIdx!=0 then it must be one of the indexes of table iCur. +** Search for terms matching the iColumn-th column of pIdx +** rather than the iColumn-th column of table iCur. +** +** The term returned might by Y= if there is another constraint in +** the WHERE clause that specifies that X=Y. Any such constraints will be +** identified by the WO_EQUIV bit in the pTerm->eOperator field. The +** aiCur[]/iaColumn[] arrays hold X and all its equivalents. There are 11 +** slots in aiCur[]/aiColumn[] so that means we can look for X plus up to 10 +** other equivalent values. Hence a search for X will return if X=A1 +** and A1=A2 and A2=A3 and ... and A9=A10 and A10=. +** +** If there are multiple terms in the WHERE clause of the form "X " +** then try for the one with no dependencies on - in other words where +** is a constant expression of some kind. Only return entries of +** the form "X Y" where Y is a column in another table if no terms of +** the form "X " exist. If no terms with a constant RHS +** exist, try to return a term that does not use WO_EQUIV. +*/ +SQLITE_PRIVATE WhereTerm *sqlite3WhereFindTerm( + WhereClause *pWC, /* The WHERE clause to be searched */ + int iCur, /* Cursor number of LHS */ + int iColumn, /* Column number of LHS */ + Bitmask notReady, /* RHS must not overlap with this mask */ + u32 op, /* Mask of WO_xx values describing operator */ + Index *pIdx /* Must be compatible with this index, if not NULL */ +){ + WhereTerm *pResult = 0; + WhereTerm *p; + WhereScan scan; + + p = whereScanInit(&scan, pWC, iCur, iColumn, op, pIdx); + op &= WO_EQ|WO_IS; + while( p ){ + if( (p->prereqRight & notReady)==0 ){ + if( p->prereqRight==0 && (p->eOperator&op)!=0 ){ + testcase( p->eOperator & WO_IS ); + return p; + } + if( pResult==0 ) pResult = p; + } + p = whereScanNext(&scan); + } + return pResult; +} + +/* +** This function searches pList for an entry that matches the iCol-th column +** of index pIdx. +** +** If such an expression is found, its index in pList->a[] is returned. If +** no expression is found, -1 is returned. +*/ +static int findIndexCol( + Parse *pParse, /* Parse context */ + ExprList *pList, /* Expression list to search */ + int iBase, /* Cursor for table associated with pIdx */ + Index *pIdx, /* Index to match column of */ + int iCol /* Column of index to match */ +){ + int i; + const char *zColl = pIdx->azColl[iCol]; + + for(i=0; inExpr; i++){ + Expr *p = sqlite3ExprSkipCollateAndLikely(pList->a[i].pExpr); + if( ALWAYS(p!=0) + && (p->op==TK_COLUMN || p->op==TK_AGG_COLUMN) + && p->iColumn==pIdx->aiColumn[iCol] + && p->iTable==iBase + ){ + CollSeq *pColl = sqlite3ExprNNCollSeq(pParse, pList->a[i].pExpr); + if( 0==sqlite3StrICmp(pColl->zName, zColl) ){ + return i; + } + } + } + + return -1; +} + +/* +** Return TRUE if the iCol-th column of index pIdx is NOT NULL +*/ +static int indexColumnNotNull(Index *pIdx, int iCol){ + int j; + assert( pIdx!=0 ); + assert( iCol>=0 && iColnColumn ); + j = pIdx->aiColumn[iCol]; + if( j>=0 ){ + return pIdx->pTable->aCol[j].notNull; + }else if( j==(-1) ){ + return 1; + }else{ + assert( j==(-2) ); + return 0; /* Assume an indexed expression can always yield a NULL */ + + } +} + +/* +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. +** +** A DISTINCT list is redundant if any subset of the columns in the +** DISTINCT list are collectively unique and individually non-null. +*/ +static int isDistinctRedundant( + Parse *pParse, /* Parsing context */ + SrcList *pTabList, /* The FROM clause */ + WhereClause *pWC, /* The WHERE clause */ + ExprList *pDistinct /* The result set that needs to be DISTINCT */ +){ + Table *pTab; + Index *pIdx; + int i; + int iBase; + + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; + + /* If any of the expressions is an IPK column on table iBase, then return + ** true. Note: The (p->iTable==iBase) part of this test may be false if the + ** current SELECT is a correlated sub-query. + */ + for(i=0; inExpr; i++){ + Expr *p = sqlite3ExprSkipCollateAndLikely(pDistinct->a[i].pExpr); + if( NEVER(p==0) ) continue; + if( p->op!=TK_COLUMN && p->op!=TK_AGG_COLUMN ) continue; + if( p->iTable==iBase && p->iColumn<0 ) return 1; + } + + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + ** + ** 3. All of those index columns for which the WHERE clause does not + ** contain a "col=X" term are subject to a NOT NULL constraint. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( !IsUniqueIndex(pIdx) ) continue; + if( pIdx->pPartIdxWhere ) continue; + for(i=0; inKeyCol; i++){ + if( 0==sqlite3WhereFindTerm(pWC, iBase, i, ~(Bitmask)0, WO_EQ, pIdx) ){ + if( findIndexCol(pParse, pDistinct, iBase, pIdx, i)<0 ) break; + if( indexColumnNotNull(pIdx, i)==0 ) break; + } + } + if( i==pIdx->nKeyCol ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; + } + } + + return 0; +} + + +/* +** Estimate the logarithm of the input value to base 2. +*/ +static LogEst estLog(LogEst N){ + return N<=10 ? 0 : sqlite3LogEst(N) - 33; +} + +/* +** Convert OP_Column opcodes to OP_Copy in previously generated code. +** +** This routine runs over generated VDBE code and translates OP_Column +** opcodes into OP_Copy when the table is being accessed via co-routine +** instead of via table lookup. +** +** If the iAutoidxCur is not zero, then any OP_Rowid instructions on +** cursor iTabCur are transformed into OP_Sequence opcode for the +** iAutoidxCur cursor, in order to generate unique rowids for the +** automatic index being generated. +*/ +static void translateColumnToCopy( + Parse *pParse, /* Parsing context */ + int iStart, /* Translate from this opcode to the end */ + int iTabCur, /* OP_Column/OP_Rowid references to this table */ + int iRegister, /* The first column is in this register */ + int iAutoidxCur /* If non-zero, cursor of autoindex being generated */ +){ + Vdbe *v = pParse->pVdbe; + VdbeOp *pOp = sqlite3VdbeGetOp(v, iStart); + int iEnd = sqlite3VdbeCurrentAddr(v); + if( pParse->db->mallocFailed ) return; + for(; iStartp1!=iTabCur ) continue; + if( pOp->opcode==OP_Column ){ + pOp->opcode = OP_Copy; + pOp->p1 = pOp->p2 + iRegister; + pOp->p2 = pOp->p3; + pOp->p3 = 0; + }else if( pOp->opcode==OP_Rowid ){ + pOp->opcode = OP_Sequence; + pOp->p1 = iAutoidxCur; +#ifdef SQLITE_ALLOW_ROWID_IN_VIEW + if( iAutoidxCur==0 ){ + pOp->opcode = OP_Null; + pOp->p3 = 0; + } +#endif + } + } +} + +/* +** Two routines for printing the content of an sqlite3_index_info +** structure. Used for testing and debugging only. If neither +** SQLITE_TEST or SQLITE_DEBUG are defined, then these routines +** are no-ops. +*/ +#if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED) +static void whereTraceIndexInfoInputs(sqlite3_index_info *p){ + int i; + if( !sqlite3WhereTrace ) return; + for(i=0; inConstraint; i++){ + sqlite3DebugPrintf( + " constraint[%d]: col=%d termid=%d op=%d usabled=%d collseq=%s\n", + i, + p->aConstraint[i].iColumn, + p->aConstraint[i].iTermOffset, + p->aConstraint[i].op, + p->aConstraint[i].usable, + sqlite3_vtab_collation(p,i)); + } + for(i=0; inOrderBy; i++){ + sqlite3DebugPrintf(" orderby[%d]: col=%d desc=%d\n", + i, + p->aOrderBy[i].iColumn, + p->aOrderBy[i].desc); + } +} +static void whereTraceIndexInfoOutputs(sqlite3_index_info *p){ + int i; + if( !sqlite3WhereTrace ) return; + for(i=0; inConstraint; i++){ + sqlite3DebugPrintf(" usage[%d]: argvIdx=%d omit=%d\n", + i, + p->aConstraintUsage[i].argvIndex, + p->aConstraintUsage[i].omit); + } + sqlite3DebugPrintf(" idxNum=%d\n", p->idxNum); + sqlite3DebugPrintf(" idxStr=%s\n", p->idxStr); + sqlite3DebugPrintf(" orderByConsumed=%d\n", p->orderByConsumed); + sqlite3DebugPrintf(" estimatedCost=%g\n", p->estimatedCost); + sqlite3DebugPrintf(" estimatedRows=%lld\n", p->estimatedRows); +} +#else +#define whereTraceIndexInfoInputs(A) +#define whereTraceIndexInfoOutputs(A) +#endif + +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX +/* +** Return TRUE if the WHERE clause term pTerm is of a form where it +** could be used with an index to access pSrc, assuming an appropriate +** index existed. +*/ +static int termCanDriveIndex( + const WhereTerm *pTerm, /* WHERE clause term to check */ + const SrcItem *pSrc, /* Table we are trying to access */ + const Bitmask notReady /* Tables in outer loops of the join */ +){ + char aff; + if( pTerm->leftCursor!=pSrc->iCursor ) return 0; + if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) return 0; + if( (pSrc->fg.jointype & JT_LEFT) + && !ExprHasProperty(pTerm->pExpr, EP_FromJoin) + && (pTerm->eOperator & WO_IS) + ){ + /* Cannot use an IS term from the WHERE clause as an index driver for + ** the RHS of a LEFT JOIN. Such a term can only be used if it is from + ** the ON clause. */ + return 0; + } + if( (pTerm->prereqRight & notReady)!=0 ) return 0; + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + if( pTerm->u.x.leftColumn<0 ) return 0; + aff = pSrc->pTab->aCol[pTerm->u.x.leftColumn].affinity; + if( !sqlite3IndexAffinityOk(pTerm->pExpr, aff) ) return 0; + testcase( pTerm->pExpr->op==TK_IS ); + return 1; +} +#endif + + +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX +/* +** Generate code to construct the Index object for an automatic index +** and to set up the WhereLevel object pLevel so that the code generator +** makes use of the automatic index. +*/ +static SQLITE_NOINLINE void constructAutomaticIndex( + Parse *pParse, /* The parsing context */ + const WhereClause *pWC, /* The WHERE clause */ + const SrcItem *pSrc, /* The FROM clause term to get the next index */ + const Bitmask notReady, /* Mask of cursors that are not available */ + WhereLevel *pLevel /* Write new index here */ +){ + int nKeyCol; /* Number of columns in the constructed index */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ + WhereTerm *pWCEnd; /* End of pWC->a[] */ + Index *pIdx; /* Object describing the transient index */ + Vdbe *v; /* Prepared statement under construction */ + int addrInit; /* Address of the initialization bypass jump */ + Table *pTable; /* The table being indexed */ + int addrTop; /* Top of the index fill loop */ + int regRecord; /* Register holding an index record */ + int n; /* Column counter */ + int i; /* Loop counter */ + int mxBitCol; /* Maximum column in pSrc->colUsed */ + CollSeq *pColl; /* Collating sequence to on a column */ + WhereLoop *pLoop; /* The Loop object */ + char *zNotUsed; /* Extra space on the end of pIdx */ + Bitmask idxCols; /* Bitmap of columns used for indexing */ + Bitmask extraCols; /* Bitmap of additional columns */ + u8 sentWarning = 0; /* True if a warnning has been issued */ + Expr *pPartial = 0; /* Partial Index Expression */ + int iContinue = 0; /* Jump here to skip excluded rows */ + SrcItem *pTabItem; /* FROM clause term being indexed */ + int addrCounter = 0; /* Address where integer counter is initialized */ + int regBase; /* Array of registers where record is assembled */ + + /* Generate code to skip over the creation and initialization of the + ** transient index on 2nd and subsequent iterations of the loop. */ + v = pParse->pVdbe; + assert( v!=0 ); + addrInit = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + + /* Count the number of columns that will be added to the index + ** and used to match WHERE clause constraints */ + nKeyCol = 0; + pTable = pSrc->pTab; + pWCEnd = &pWC->a[pWC->nTerm]; + pLoop = pLevel->pWLoop; + idxCols = 0; + for(pTerm=pWC->a; pTermpExpr; + /* Make the automatic index a partial index if there are terms in the + ** WHERE clause (or the ON clause of a LEFT join) that constrain which + ** rows of the target table (pSrc) that can be used. */ + if( (pTerm->wtFlags & TERM_VIRTUAL)==0 + && sqlite3ExprIsTableConstraint(pExpr, pSrc) + ){ + pPartial = sqlite3ExprAnd(pParse, pPartial, + sqlite3ExprDup(pParse->db, pExpr, 0)); + } + if( termCanDriveIndex(pTerm, pSrc, notReady) ){ + int iCol; + Bitmask cMask; + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + iCol = pTerm->u.x.leftColumn; + cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); + testcase( iCol==BMS ); + testcase( iCol==BMS-1 ); + if( !sentWarning ){ + sqlite3_log(SQLITE_WARNING_AUTOINDEX, + "automatic index on %s(%s)", pTable->zName, + pTable->aCol[iCol].zCnName); + sentWarning = 1; + } + if( (idxCols & cMask)==0 ){ + if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){ + goto end_auto_index_create; + } + pLoop->aLTerm[nKeyCol++] = pTerm; + idxCols |= cMask; + } + } + } + assert( nKeyCol>0 || pParse->db->mallocFailed ); + pLoop->u.btree.nEq = pLoop->nLTerm = nKeyCol; + pLoop->wsFlags = WHERE_COLUMN_EQ | WHERE_IDX_ONLY | WHERE_INDEXED + | WHERE_AUTO_INDEX; + + /* Count the number of additional columns needed to create a + ** covering index. A "covering index" is an index that contains all + ** columns that are needed by the query. With a covering index, the + ** original table never needs to be accessed. Automatic indices must + ** be a covering index because the index will not be updated if the + ** original table changes and the index and table cannot both be used + ** if they go out of sync. + */ + extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1)); + mxBitCol = MIN(BMS-1,pTable->nCol); + testcase( pTable->nCol==BMS-1 ); + testcase( pTable->nCol==BMS-2 ); + for(i=0; icolUsed & MASKBIT(BMS-1) ){ + nKeyCol += pTable->nCol - BMS + 1; + } + + /* Construct the Index object to describe this index */ + pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed); + if( pIdx==0 ) goto end_auto_index_create; + pLoop->u.btree.pIndex = pIdx; + pIdx->zName = "auto-index"; + pIdx->pTable = pTable; + n = 0; + idxCols = 0; + for(pTerm=pWC->a; pTermeOperator & (WO_OR|WO_AND))==0 ); + iCol = pTerm->u.x.leftColumn; + cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); + testcase( iCol==BMS-1 ); + testcase( iCol==BMS ); + if( (idxCols & cMask)==0 ){ + Expr *pX = pTerm->pExpr; + idxCols |= cMask; + pIdx->aiColumn[n] = pTerm->u.x.leftColumn; + pColl = sqlite3ExprCompareCollSeq(pParse, pX); + assert( pColl!=0 || pParse->nErr>0 ); /* TH3 collate01.800 */ + pIdx->azColl[n] = pColl ? pColl->zName : sqlite3StrBINARY; + n++; + } + } + } + assert( (u32)n==pLoop->u.btree.nEq ); + + /* Add additional columns needed to make the automatic index into + ** a covering index */ + for(i=0; iaiColumn[n] = i; + pIdx->azColl[n] = sqlite3StrBINARY; + n++; + } + } + if( pSrc->colUsed & MASKBIT(BMS-1) ){ + for(i=BMS-1; inCol; i++){ + pIdx->aiColumn[n] = i; + pIdx->azColl[n] = sqlite3StrBINARY; + n++; + } + } + assert( n==nKeyCol ); + pIdx->aiColumn[n] = XN_ROWID; + pIdx->azColl[n] = sqlite3StrBINARY; + + /* Create the automatic index */ + assert( pLevel->iIdxCur>=0 ); + pLevel->iIdxCur = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + VdbeComment((v, "for %s", pTable->zName)); + if( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){ + pLevel->regFilter = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Blob, 10000, pLevel->regFilter); + } + + /* Fill the automatic index with content */ + pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; + if( pTabItem->fg.viaCoroutine ){ + int regYield = pTabItem->regReturn; + addrCounter = sqlite3VdbeAddOp2(v, OP_Integer, 0, 0); + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub); + addrTop = sqlite3VdbeAddOp1(v, OP_Yield, regYield); + VdbeCoverage(v); + VdbeComment((v, "next row of %s", pTabItem->pTab->zName)); + }else{ + addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v); + } + if( pPartial ){ + iContinue = sqlite3VdbeMakeLabel(pParse); + sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL); + pLoop->wsFlags |= WHERE_PARTIALIDX; + } + regRecord = sqlite3GetTempReg(pParse); + regBase = sqlite3GenerateIndexKey( + pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0 + ); + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0, + regBase, pLoop->u.btree.nEq); + } + sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); + if( pTabItem->fg.viaCoroutine ){ + sqlite3VdbeChangeP2(v, addrCounter, regBase+n); + testcase( pParse->db->mallocFailed ); + assert( pLevel->iIdxCur>0 ); + translateColumnToCopy(pParse, addrTop, pLevel->iTabCur, + pTabItem->regResult, pLevel->iIdxCur); + sqlite3VdbeGoto(v, addrTop); + pTabItem->fg.viaCoroutine = 0; + }else{ + sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v); + sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX); + } + sqlite3VdbeJumpHere(v, addrTop); + sqlite3ReleaseTempReg(pParse, regRecord); + + /* Jump here when skipping the initialization */ + sqlite3VdbeJumpHere(v, addrInit); + +end_auto_index_create: + sqlite3ExprDelete(pParse->db, pPartial); +} +#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ + +/* +** Generate bytecode that will initialize a Bloom filter that is appropriate +** for pLevel. +** +** If there are inner loops within pLevel that have the WHERE_BLOOMFILTER +** flag set, initialize a Bloomfilter for them as well. Except don't do +** this recursive initialization if the SQLITE_BloomPulldown optimization has +** been turned off. +** +** When the Bloom filter is initialized, the WHERE_BLOOMFILTER flag is cleared +** from the loop, but the regFilter value is set to a register that implements +** the Bloom filter. When regFilter is positive, the +** sqlite3WhereCodeOneLoopStart() will generate code to test the Bloom filter +** and skip the subsequence B-Tree seek if the Bloom filter indicates that +** no matching rows exist. +** +** This routine may only be called if it has previously been determined that +** the loop would benefit from a Bloom filter, and the WHERE_BLOOMFILTER bit +** is set. +*/ +static SQLITE_NOINLINE void sqlite3ConstructBloomFilter( + WhereInfo *pWInfo, /* The WHERE clause */ + int iLevel, /* Index in pWInfo->a[] that is pLevel */ + WhereLevel *pLevel, /* Make a Bloom filter for this FROM term */ + Bitmask notReady /* Loops that are not ready */ +){ + int addrOnce; /* Address of opening OP_Once */ + int addrTop; /* Address of OP_Rewind */ + int addrCont; /* Jump here to skip a row */ + const WhereTerm *pTerm; /* For looping over WHERE clause terms */ + const WhereTerm *pWCEnd; /* Last WHERE clause term */ + Parse *pParse = pWInfo->pParse; /* Parsing context */ + Vdbe *v = pParse->pVdbe; /* VDBE under construction */ + WhereLoop *pLoop = pLevel->pWLoop; /* The loop being coded */ + int iCur; /* Cursor for table getting the filter */ + + assert( pLoop!=0 ); + assert( v!=0 ); + assert( pLoop->wsFlags & WHERE_BLOOMFILTER ); + + addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v); + do{ + const SrcItem *pItem; + const Table *pTab; + u64 sz; + sqlite3WhereExplainBloomFilter(pParse, pWInfo, pLevel); + addrCont = sqlite3VdbeMakeLabel(pParse); + iCur = pLevel->iTabCur; + pLevel->regFilter = ++pParse->nMem; + + /* The Bloom filter is a Blob held in a register. Initialize it + ** to zero-filled blob of at least 80K bits, but maybe more if the + ** estimated size of the table is larger. We could actually + ** measure the size of the table at run-time using OP_Count with + ** P3==1 and use that value to initialize the blob. But that makes + ** testing complicated. By basing the blob size on the value in the + ** sqlite_stat1 table, testing is much easier. + */ + pItem = &pWInfo->pTabList->a[pLevel->iFrom]; + assert( pItem!=0 ); + pTab = pItem->pTab; + assert( pTab!=0 ); + sz = sqlite3LogEstToInt(pTab->nRowLogEst); + if( sz<10000 ){ + sz = 10000; + }else if( sz>10000000 ){ + sz = 10000000; + } + sqlite3VdbeAddOp2(v, OP_Blob, (int)sz, pLevel->regFilter); + + addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v); + pWCEnd = &pWInfo->sWC.a[pWInfo->sWC.nTerm]; + for(pTerm=pWInfo->sWC.a; pTermpExpr; + if( (pTerm->wtFlags & TERM_VIRTUAL)==0 + && sqlite3ExprIsTableConstraint(pExpr, pItem) + ){ + sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL); + } + } + if( pLoop->wsFlags & WHERE_IPK ){ + int r1 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp2(v, OP_Rowid, iCur, r1); + sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0, r1, 1); + sqlite3ReleaseTempReg(pParse, r1); + }else{ + Index *pIdx = pLoop->u.btree.pIndex; + int n = pLoop->u.btree.nEq; + int r1 = sqlite3GetTempRange(pParse, n); + int jj; + for(jj=0; jjaiColumn[jj]; + assert( pIdx->pTable==pItem->pTab ); + sqlite3ExprCodeGetColumnOfTable(v, pIdx->pTable, iCur, iCol,r1+jj); + } + sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0, r1, n); + sqlite3ReleaseTempRange(pParse, r1, n); + } + sqlite3VdbeResolveLabel(v, addrCont); + sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); + VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addrTop); + pLoop->wsFlags &= ~WHERE_BLOOMFILTER; + if( OptimizationDisabled(pParse->db, SQLITE_BloomPulldown) ) break; + while( ++iLevel < pWInfo->nLevel ){ + const SrcItem *pTabItem; + pLevel = &pWInfo->a[iLevel]; + pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; + if( pTabItem->fg.jointype & JT_LEFT ) continue; + pLoop = pLevel->pWLoop; + if( NEVER(pLoop==0) ) continue; + if( pLoop->prereq & notReady ) continue; + if( (pLoop->wsFlags & (WHERE_BLOOMFILTER|WHERE_COLUMN_IN)) + ==WHERE_BLOOMFILTER + ){ + /* This is a candidate for bloom-filter pull-down (early evaluation). + ** The test that WHERE_COLUMN_IN is omitted is important, as we are + ** not able to do early evaluation of bloom filters that make use of + ** the IN operator */ + break; + } + } + }while( iLevel < pWInfo->nLevel ); + sqlite3VdbeJumpHere(v, addrOnce); +} + + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Allocate and populate an sqlite3_index_info structure. It is the +** responsibility of the caller to eventually release the structure +** by passing the pointer returned by this function to freeIndexInfo(). +*/ +static sqlite3_index_info *allocateIndexInfo( + WhereInfo *pWInfo, /* The WHERE clause */ + WhereClause *pWC, /* The WHERE clause being analyzed */ + Bitmask mUnusable, /* Ignore terms with these prereqs */ + SrcItem *pSrc, /* The FROM clause term that is the vtab */ + u16 *pmNoOmit /* Mask of terms not to omit */ +){ + int i, j; + int nTerm; + Parse *pParse = pWInfo->pParse; + struct sqlite3_index_constraint *pIdxCons; + struct sqlite3_index_orderby *pIdxOrderBy; + struct sqlite3_index_constraint_usage *pUsage; + struct HiddenIndexInfo *pHidden; + WhereTerm *pTerm; + int nOrderBy; + sqlite3_index_info *pIdxInfo; + u16 mNoOmit = 0; + const Table *pTab; + int eDistinct = 0; + ExprList *pOrderBy = pWInfo->pOrderBy; + + assert( pSrc!=0 ); + pTab = pSrc->pTab; + assert( pTab!=0 ); + assert( IsVirtual(pTab) ); + + /* Find all WHERE clause constraints referring to this virtual table. + ** Mark each term with the TERM_OK flag. Set nTerm to the number of + ** terms found. + */ + for(i=nTerm=0, pTerm=pWC->a; inTerm; i++, pTerm++){ + pTerm->wtFlags &= ~TERM_OK; + if( pTerm->leftCursor != pSrc->iCursor ) continue; + if( pTerm->prereqRight & mUnusable ) continue; + assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); + testcase( pTerm->eOperator & WO_IN ); + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_IS ); + testcase( pTerm->eOperator & WO_ALL ); + if( (pTerm->eOperator & ~(WO_EQUIV))==0 ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; + + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + assert( pTerm->u.x.leftColumn>=XN_ROWID ); + assert( pTerm->u.x.leftColumnnCol ); + + /* tag-20191211-002: WHERE-clause constraints are not useful to the + ** right-hand table of a LEFT JOIN. See tag-20191211-001 for the + ** equivalent restriction for ordinary tables. */ + if( (pSrc->fg.jointype & JT_LEFT)!=0 + && !ExprHasProperty(pTerm->pExpr, EP_FromJoin) + ){ + continue; + } + nTerm++; + pTerm->wtFlags |= TERM_OK; + } + + /* If the ORDER BY clause contains only columns in the current + ** virtual table then allocate space for the aOrderBy part of + ** the sqlite3_index_info structure. + */ + nOrderBy = 0; + if( pOrderBy ){ + int n = pOrderBy->nExpr; + for(i=0; ia[i].pExpr; + Expr *pE2; + + /* Skip over constant terms in the ORDER BY clause */ + if( sqlite3ExprIsConstant(pExpr) ){ + continue; + } + + /* Virtual tables are unable to deal with NULLS FIRST */ + if( pOrderBy->a[i].sortFlags & KEYINFO_ORDER_BIGNULL ) break; + + /* First case - a direct column references without a COLLATE operator */ + if( pExpr->op==TK_COLUMN && pExpr->iTable==pSrc->iCursor ){ + assert( pExpr->iColumn>=XN_ROWID && pExpr->iColumnnCol ); + continue; + } + + /* 2nd case - a column reference with a COLLATE operator. Only match + ** of the COLLATE operator matches the collation of the column. */ + if( pExpr->op==TK_COLLATE + && (pE2 = pExpr->pLeft)->op==TK_COLUMN + && pE2->iTable==pSrc->iCursor + ){ + const char *zColl; /* The collating sequence name */ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + assert( pExpr->u.zToken!=0 ); + assert( pE2->iColumn>=XN_ROWID && pE2->iColumnnCol ); + pExpr->iColumn = pE2->iColumn; + if( pE2->iColumn<0 ) continue; /* Collseq does not matter for rowid */ + zColl = sqlite3ColumnColl(&pTab->aCol[pE2->iColumn]); + if( zColl==0 ) zColl = sqlite3StrBINARY; + if( sqlite3_stricmp(pExpr->u.zToken, zColl)==0 ) continue; + } + + /* No matches cause a break out of the loop */ + break; + } + if( i==n ){ + nOrderBy = n; + if( (pWInfo->wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY)) ){ + eDistinct = 1 + ((pWInfo->wctrlFlags & WHERE_DISTINCTBY)!=0); + } + } + } + + /* Allocate the sqlite3_index_info structure + */ + pIdxInfo = sqlite3DbMallocZero(pParse->db, sizeof(*pIdxInfo) + + (sizeof(*pIdxCons) + sizeof(*pUsage))*nTerm + + sizeof(*pIdxOrderBy)*nOrderBy + sizeof(*pHidden) + + sizeof(sqlite3_value*)*nTerm ); + if( pIdxInfo==0 ){ + sqlite3ErrorMsg(pParse, "out of memory"); + return 0; + } + pHidden = (struct HiddenIndexInfo*)&pIdxInfo[1]; + pIdxCons = (struct sqlite3_index_constraint*)&pHidden->aRhs[nTerm]; + pIdxOrderBy = (struct sqlite3_index_orderby*)&pIdxCons[nTerm]; + pUsage = (struct sqlite3_index_constraint_usage*)&pIdxOrderBy[nOrderBy]; + pIdxInfo->aConstraint = pIdxCons; + pIdxInfo->aOrderBy = pIdxOrderBy; + pIdxInfo->aConstraintUsage = pUsage; + pHidden->pWC = pWC; + pHidden->pParse = pParse; + pHidden->eDistinct = eDistinct; + pHidden->mIn = 0; + for(i=j=0, pTerm=pWC->a; inTerm; i++, pTerm++){ + u16 op; + if( (pTerm->wtFlags & TERM_OK)==0 ) continue; + pIdxCons[j].iColumn = pTerm->u.x.leftColumn; + pIdxCons[j].iTermOffset = i; + op = pTerm->eOperator & WO_ALL; + if( op==WO_IN ){ + if( (pTerm->wtFlags & TERM_SLICE)==0 ){ + pHidden->mIn |= SMASKBIT32(j); + } + op = WO_EQ; + } + if( op==WO_AUX ){ + pIdxCons[j].op = pTerm->eMatchOp; + }else if( op & (WO_ISNULL|WO_IS) ){ + if( op==WO_ISNULL ){ + pIdxCons[j].op = SQLITE_INDEX_CONSTRAINT_ISNULL; + }else{ + pIdxCons[j].op = SQLITE_INDEX_CONSTRAINT_IS; + } + }else{ + pIdxCons[j].op = (u8)op; + /* The direct assignment in the previous line is possible only because + ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The + ** following asserts verify this fact. */ + assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ ); + assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT ); + assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE ); + assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); + assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); + assert( pTerm->eOperator&(WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_AUX) ); + + if( op & (WO_LT|WO_LE|WO_GT|WO_GE) + && sqlite3ExprIsVector(pTerm->pExpr->pRight) + ){ + testcase( j!=i ); + if( j<16 ) mNoOmit |= (1 << j); + if( op==WO_LT ) pIdxCons[j].op = WO_LE; + if( op==WO_GT ) pIdxCons[j].op = WO_GE; + } + } + + j++; + } + assert( j==nTerm ); + pIdxInfo->nConstraint = j; + for(i=j=0; ia[i].pExpr; + if( sqlite3ExprIsConstant(pExpr) ) continue; + assert( pExpr->op==TK_COLUMN + || (pExpr->op==TK_COLLATE && pExpr->pLeft->op==TK_COLUMN + && pExpr->iColumn==pExpr->pLeft->iColumn) ); + pIdxOrderBy[j].iColumn = pExpr->iColumn; + pIdxOrderBy[j].desc = pOrderBy->a[i].sortFlags & KEYINFO_ORDER_DESC; + j++; + } + pIdxInfo->nOrderBy = j; + + *pmNoOmit = mNoOmit; + return pIdxInfo; +} + +/* +** Free an sqlite3_index_info structure allocated by allocateIndexInfo() +** and possibly modified by xBestIndex methods. +*/ +static void freeIndexInfo(sqlite3 *db, sqlite3_index_info *pIdxInfo){ + HiddenIndexInfo *pHidden; + int i; + assert( pIdxInfo!=0 ); + pHidden = (HiddenIndexInfo*)&pIdxInfo[1]; + assert( pHidden->pParse!=0 ); + assert( pHidden->pParse->db==db ); + for(i=0; inConstraint; i++){ + sqlite3ValueFree(pHidden->aRhs[i]); /* IMP: R-14553-25174 */ + pHidden->aRhs[i] = 0; + } + sqlite3DbFree(db, pIdxInfo); +} + +/* +** The table object reference passed as the second argument to this function +** must represent a virtual table. This function invokes the xBestIndex() +** method of the virtual table with the sqlite3_index_info object that +** comes in as the 3rd argument to this function. +** +** If an error occurs, pParse is populated with an error message and an +** appropriate error code is returned. A return of SQLITE_CONSTRAINT from +** xBestIndex is not considered an error. SQLITE_CONSTRAINT indicates that +** the current configuration of "unusable" flags in sqlite3_index_info can +** not result in a valid plan. +** +** Whether or not an error is returned, it is the responsibility of the +** caller to eventually free p->idxStr if p->needToFreeIdxStr indicates +** that this is required. +*/ +static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ + sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; + int rc; + + whereTraceIndexInfoInputs(p); + pParse->db->nSchemaLock++; + rc = pVtab->pModule->xBestIndex(pVtab, p); + pParse->db->nSchemaLock--; + whereTraceIndexInfoOutputs(p); + + if( rc!=SQLITE_OK && rc!=SQLITE_CONSTRAINT ){ + if( rc==SQLITE_NOMEM ){ + sqlite3OomFault(pParse->db); + }else if( !pVtab->zErrMsg ){ + sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc)); + }else{ + sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg); + } + } + sqlite3_free(pVtab->zErrMsg); + pVtab->zErrMsg = 0; + return rc; +} +#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Estimate the location of a particular key among all keys in an +** index. Store the results in aStat as follows: +** +** aStat[0] Est. number of rows less than pRec +** aStat[1] Est. number of rows equal to pRec +** +** Return the index of the sample that is the smallest sample that +** is greater than or equal to pRec. Note that this index is not an index +** into the aSample[] array - it is an index into a virtual set of samples +** based on the contents of aSample[] and the number of fields in record +** pRec. +*/ +static int whereKeyStats( + Parse *pParse, /* Database connection */ + Index *pIdx, /* Index to consider domain of */ + UnpackedRecord *pRec, /* Vector of values to consider */ + int roundUp, /* Round up if true. Round down if false */ + tRowcnt *aStat /* OUT: stats written here */ +){ + IndexSample *aSample = pIdx->aSample; + int iCol; /* Index of required stats in anEq[] etc. */ + int i; /* Index of first sample >= pRec */ + int iSample; /* Smallest sample larger than or equal to pRec */ + int iMin = 0; /* Smallest sample not yet tested */ + int iTest; /* Next sample to test */ + int res; /* Result of comparison operation */ + int nField; /* Number of fields in pRec */ + tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ + +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER( pParse ); +#endif + assert( pRec!=0 ); + assert( pIdx->nSample>0 ); + assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); + + /* Do a binary search to find the first sample greater than or equal + ** to pRec. If pRec contains a single field, the set of samples to search + ** is simply the aSample[] array. If the samples in aSample[] contain more + ** than one fields, all fields following the first are ignored. + ** + ** If pRec contains N fields, where N is more than one, then as well as the + ** samples in aSample[] (truncated to N fields), the search also has to + ** consider prefixes of those samples. For example, if the set of samples + ** in aSample is: + ** + ** aSample[0] = (a, 5) + ** aSample[1] = (a, 10) + ** aSample[2] = (b, 5) + ** aSample[3] = (c, 100) + ** aSample[4] = (c, 105) + ** + ** Then the search space should ideally be the samples above and the + ** unique prefixes [a], [b] and [c]. But since that is hard to organize, + ** the code actually searches this set: + ** + ** 0: (a) + ** 1: (a, 5) + ** 2: (a, 10) + ** 3: (a, 10) + ** 4: (b) + ** 5: (b, 5) + ** 6: (c) + ** 7: (c, 100) + ** 8: (c, 105) + ** 9: (c, 105) + ** + ** For each sample in the aSample[] array, N samples are present in the + ** effective sample array. In the above, samples 0 and 1 are based on + ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. + ** + ** Often, sample i of each block of N effective samples has (i+1) fields. + ** Except, each sample may be extended to ensure that it is greater than or + ** equal to the previous sample in the array. For example, in the above, + ** sample 2 is the first sample of a block of N samples, so at first it + ** appears that it should be 1 field in size. However, that would make it + ** smaller than sample 1, so the binary search would not work. As a result, + ** it is extended to two fields. The duplicates that this creates do not + ** cause any problems. + */ + nField = pRec->nField; + iCol = 0; + iSample = pIdx->nSample * nField; + do{ + int iSamp; /* Index in aSample[] of test sample */ + int n; /* Number of fields in test sample */ + + iTest = (iMin+iSample)/2; + iSamp = iTest / nField; + if( iSamp>0 ){ + /* The proposed effective sample is a prefix of sample aSample[iSamp]. + ** Specifically, the shortest prefix of at least (1 + iTest%nField) + ** fields that is greater than the previous effective sample. */ + for(n=(iTest % nField) + 1; nnField = n; + res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); + if( res<0 ){ + iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; + iMin = iTest+1; + }else if( res==0 && ndb->mallocFailed==0 ){ + if( res==0 ){ + /* If (res==0) is true, then pRec must be equal to sample i. */ + assert( inSample ); + assert( iCol==nField-1 ); + pRec->nField = nField; + assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) + || pParse->db->mallocFailed + ); + }else{ + /* Unless i==pIdx->nSample, indicating that pRec is larger than + ** all samples in the aSample[] array, pRec must be smaller than the + ** (iCol+1) field prefix of sample i. */ + assert( i<=pIdx->nSample && i>=0 ); + pRec->nField = iCol+1; + assert( i==pIdx->nSample + || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 + || pParse->db->mallocFailed ); + + /* if i==0 and iCol==0, then record pRec is smaller than all samples + ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must + ** be greater than or equal to the (iCol) field prefix of sample i. + ** If (i>0), then pRec must also be greater than sample (i-1). */ + if( iCol>0 ){ + pRec->nField = iCol; + assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 + || pParse->db->mallocFailed ); + } + if( i>0 ){ + pRec->nField = nField; + assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 + || pParse->db->mallocFailed ); + } + } + } +#endif /* ifdef SQLITE_DEBUG */ + + if( res==0 ){ + /* Record pRec is equal to sample i */ + assert( iCol==nField-1 ); + aStat[0] = aSample[i].anLt[iCol]; + aStat[1] = aSample[i].anEq[iCol]; + }else{ + /* At this point, the (iCol+1) field prefix of aSample[i] is the first + ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec + ** is larger than all samples in the array. */ + tRowcnt iUpper, iGap; + if( i>=pIdx->nSample ){ + iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); + }else{ + iUpper = aSample[i].anLt[iCol]; + } + + if( iLower>=iUpper ){ + iGap = 0; + }else{ + iGap = iUpper - iLower; + } + if( roundUp ){ + iGap = (iGap*2)/3; + }else{ + iGap = iGap/3; + } + aStat[0] = iLower + iGap; + aStat[1] = pIdx->aAvgEq[nField-1]; + } + + /* Restore the pRec->nField value before returning. */ + pRec->nField = nField; + return i; +} +#endif /* SQLITE_ENABLE_STAT4 */ + +/* +** If it is not NULL, pTerm is a term that provides an upper or lower +** bound on a range scan. Without considering pTerm, it is estimated +** that the scan will visit nNew rows. This function returns the number +** estimated to be visited after taking pTerm into account. +** +** If the user explicitly specified a likelihood() value for this term, +** then the return value is the likelihood multiplied by the number of +** input rows. Otherwise, this function assumes that an "IS NOT NULL" term +** has a likelihood of 0.50, and any other term a likelihood of 0.25. +*/ +static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ + LogEst nRet = nNew; + if( pTerm ){ + if( pTerm->truthProb<=0 ){ + nRet += pTerm->truthProb; + }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ + nRet -= 20; assert( 20==sqlite3LogEst(4) ); + } + } + return nRet; +} + + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Return the affinity for a single column of an index. +*/ +SQLITE_PRIVATE char sqlite3IndexColumnAffinity(sqlite3 *db, Index *pIdx, int iCol){ + assert( iCol>=0 && iColnColumn ); + if( !pIdx->zColAff ){ + if( sqlite3IndexAffinityStr(db, pIdx)==0 ) return SQLITE_AFF_BLOB; + } + assert( pIdx->zColAff[iCol]!=0 ); + return pIdx->zColAff[iCol]; +} +#endif + + +#ifdef SQLITE_ENABLE_STAT4 +/* +** This function is called to estimate the number of rows visited by a +** range-scan on a skip-scan index. For example: +** +** CREATE INDEX i1 ON t1(a, b, c); +** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; +** +** Value pLoop->nOut is currently set to the estimated number of rows +** visited for scanning (a=? AND b=?). This function reduces that estimate +** by some factor to account for the (c BETWEEN ? AND ?) expression based +** on the stat4 data for the index. this scan will be peformed multiple +** times (once for each (a,b) combination that matches a=?) is dealt with +** by the caller. +** +** It does this by scanning through all stat4 samples, comparing values +** extracted from pLower and pUpper with the corresponding column in each +** sample. If L and U are the number of samples found to be less than or +** equal to the values extracted from pLower and pUpper respectively, and +** N is the total number of samples, the pLoop->nOut value is adjusted +** as follows: +** +** nOut = nOut * ( min(U - L, 1) / N ) +** +** If pLower is NULL, or a value cannot be extracted from the term, L is +** set to zero. If pUpper is NULL, or a value cannot be extracted from it, +** U is set to N. +** +** Normally, this function sets *pbDone to 1 before returning. However, +** if no value can be extracted from either pLower or pUpper (and so the +** estimate of the number of rows delivered remains unchanged), *pbDone +** is left as is. +** +** If an error occurs, an SQLite error code is returned. Otherwise, +** SQLITE_OK. +*/ +static int whereRangeSkipScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop, /* Update the .nOut value of this loop */ + int *pbDone /* Set to true if at least one expr. value extracted */ +){ + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; + sqlite3 *db = pParse->db; + int nLower = -1; + int nUpper = p->nSample+1; + int rc = SQLITE_OK; + u8 aff = sqlite3IndexColumnAffinity(db, p, nEq); + CollSeq *pColl; + + sqlite3_value *p1 = 0; /* Value extracted from pLower */ + sqlite3_value *p2 = 0; /* Value extracted from pUpper */ + sqlite3_value *pVal = 0; /* Value extracted from record */ + + pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); + if( pLower ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); + nLower = 0; + } + if( pUpper && rc==SQLITE_OK ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); + nUpper = p2 ? 0 : p->nSample; + } + + if( p1 || p2 ){ + int i; + int nDiff; + for(i=0; rc==SQLITE_OK && inSample; i++){ + rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); + if( rc==SQLITE_OK && p1 ){ + int res = sqlite3MemCompare(p1, pVal, pColl); + if( res>=0 ) nLower++; + } + if( rc==SQLITE_OK && p2 ){ + int res = sqlite3MemCompare(p2, pVal, pColl); + if( res>=0 ) nUpper++; + } + } + nDiff = (nUpper - nLower); + if( nDiff<=0 ) nDiff = 1; + + /* If there is both an upper and lower bound specified, and the + ** comparisons indicate that they are close together, use the fallback + ** method (assume that the scan visits 1/64 of the rows) for estimating + ** the number of rows visited. Otherwise, estimate the number of rows + ** using the method described in the header comment for this function. */ + if( nDiff!=1 || pUpper==0 || pLower==0 ){ + int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); + pLoop->nOut -= nAdjust; + *pbDone = 1; + WHERETRACE(0x10, ("range skip-scan regions: %u..%u adjust=%d est=%d\n", + nLower, nUpper, nAdjust*-1, pLoop->nOut)); + } + + }else{ + assert( *pbDone==0 ); + } + + sqlite3ValueFree(p1); + sqlite3ValueFree(p2); + sqlite3ValueFree(pVal); + + return rc; +} +#endif /* SQLITE_ENABLE_STAT4 */ + +/* +** This function is used to estimate the number of rows that will be visited +** by scanning an index for a range of values. The range may have an upper +** bound, a lower bound, or both. The WHERE clause terms that set the upper +** and lower bounds are represented by pLower and pUpper respectively. For +** example, assuming that index p is on t1(a): +** +** ... FROM t1 WHERE a > ? AND a < ? ... +** |_____| |_____| +** | | +** pLower pUpper +** +** If either of the upper or lower bound is not present, then NULL is passed in +** place of the corresponding WhereTerm. +** +** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index +** column subject to the range constraint. Or, equivalently, the number of +** equality constraints optimized by the proposed index scan. For example, +** assuming index p is on t1(a, b), and the SQL query is: +** +** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ... +** +** then nEq is set to 1 (as the range restricted column, b, is the second +** left-most column of the index). Or, if the query is: +** +** ... FROM t1 WHERE a > ? AND a < ? ... +** +** then nEq is set to 0. +** +** When this function is called, *pnOut is set to the sqlite3LogEst() of the +** number of rows that the index scan is expected to visit without +** considering the range constraints. If nEq is 0, then *pnOut is the number of +** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) +** to account for the range constraints pLower and pUpper. +** +** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be +** used, a single range inequality reduces the search space by a factor of 4. +** and a pair of constraints (x>? AND x123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ +){ + int rc = SQLITE_OK; + int nOut = pLoop->nOut; + LogEst nNew; + +#ifdef SQLITE_ENABLE_STAT4 + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; + + if( p->nSample>0 && ALWAYS(nEqnSampleCol) + && OptimizationEnabled(pParse->db, SQLITE_Stat4) + ){ + if( nEq==pBuilder->nRecValid ){ + UnpackedRecord *pRec = pBuilder->pRec; + tRowcnt a[2]; + int nBtm = pLoop->u.btree.nBtm; + int nTop = pLoop->u.btree.nTop; + + /* Variable iLower will be set to the estimate of the number of rows in + ** the index that are less than the lower bound of the range query. The + ** lower bound being the concatenation of $P and $L, where $P is the + ** key-prefix formed by the nEq values matched against the nEq left-most + ** columns of the index, and $L is the value in pLower. + ** + ** Or, if pLower is NULL or $L cannot be extracted from it (because it + ** is not a simple variable or literal value), the lower bound of the + ** range is $P. Due to a quirk in the way whereKeyStats() works, even + ** if $L is available, whereKeyStats() is called for both ($P) and + ** ($P:$L) and the larger of the two returned values is used. + ** + ** Similarly, iUpper is to be set to the estimate of the number of rows + ** less than the upper bound of the range query. Where the upper bound + ** is either ($P) or ($P:$U). Again, even if $U is available, both values + ** of iUpper are requested of whereKeyStats() and the smaller used. + ** + ** The number of rows between the two bounds is then just iUpper-iLower. + */ + tRowcnt iLower; /* Rows less than the lower bound */ + tRowcnt iUpper; /* Rows less than the upper bound */ + int iLwrIdx = -2; /* aSample[] for the lower bound */ + int iUprIdx = -1; /* aSample[] for the upper bound */ + + if( pRec ){ + testcase( pRec->nField!=pBuilder->nRecValid ); + pRec->nField = pBuilder->nRecValid; + } + /* Determine iLower and iUpper using ($P) only. */ + if( nEq==0 ){ + iLower = 0; + iUpper = p->nRowEst0; + }else{ + /* Note: this call could be optimized away - since the same values must + ** have been requested when testing key $P in whereEqualScanEst(). */ + whereKeyStats(pParse, p, pRec, 0, a); + iLower = a[0]; + iUpper = a[0] + a[1]; + } + + assert( pLower==0 || (pLower->eOperator & (WO_GT|WO_GE))!=0 ); + assert( pUpper==0 || (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); + assert( p->aSortOrder!=0 ); + if( p->aSortOrder[nEq] ){ + /* The roles of pLower and pUpper are swapped for a DESC index */ + SWAP(WhereTerm*, pLower, pUpper); + SWAP(int, nBtm, nTop); + } + + /* If possible, improve on the iLower estimate using ($P:$L). */ + if( pLower ){ + int n; /* Values extracted from pExpr */ + Expr *pExpr = pLower->pExpr->pRight; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, nBtm, nEq, &n); + if( rc==SQLITE_OK && n ){ + tRowcnt iNew; + u16 mask = WO_GT|WO_LE; + if( sqlite3ExprVectorSize(pExpr)>n ) mask = (WO_LE|WO_LT); + iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a); + iNew = a[0] + ((pLower->eOperator & mask) ? a[1] : 0); + if( iNew>iLower ) iLower = iNew; + nOut--; + pLower = 0; + } + } + + /* If possible, improve on the iUpper estimate using ($P:$U). */ + if( pUpper ){ + int n; /* Values extracted from pExpr */ + Expr *pExpr = pUpper->pExpr->pRight; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, nTop, nEq, &n); + if( rc==SQLITE_OK && n ){ + tRowcnt iNew; + u16 mask = WO_GT|WO_LE; + if( sqlite3ExprVectorSize(pExpr)>n ) mask = (WO_LE|WO_LT); + iUprIdx = whereKeyStats(pParse, p, pRec, 1, a); + iNew = a[0] + ((pUpper->eOperator & mask) ? a[1] : 0); + if( iNewpRec = pRec; + if( rc==SQLITE_OK ){ + if( iUpper>iLower ){ + nNew = sqlite3LogEst(iUpper - iLower); + /* TUNING: If both iUpper and iLower are derived from the same + ** sample, then assume they are 4x more selective. This brings + ** the estimated selectivity more in line with what it would be + ** if estimated without the use of STAT4 tables. */ + if( iLwrIdx==iUprIdx ) nNew -= 20; assert( 20==sqlite3LogEst(4) ); + }else{ + nNew = 10; assert( 10==sqlite3LogEst(2) ); + } + if( nNewwtFlags & TERM_VNULL)==0 ); + nNew = whereRangeAdjust(pLower, nOut); + nNew = whereRangeAdjust(pUpper, nNew); + + /* TUNING: If there is both an upper and lower limit and neither limit + ** has an application-defined likelihood(), assume the range is + ** reduced by an additional 75%. This means that, by default, an open-ended + ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the + ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to + ** match 1/64 of the index. */ + if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){ + nNew -= 20; + } + + nOut -= (pLower!=0) + (pUpper!=0); + if( nNew<10 ) nNew = 10; + if( nNewnOut>nOut ){ + WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n", + pLoop->nOut, nOut)); + } +#endif + pLoop->nOut = (LogEst)nOut; + return rc; +} + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Estimate the number of rows that will be returned based on +** an equality constraint x=VALUE and where that VALUE occurs in +** the histogram data. This only works when x is the left-most +** column of an index and sqlite_stat4 histogram data is available +** for that index. When pExpr==NULL that means the constraint is +** "x IS NULL" instead of "x=VALUE". +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +static int whereEqualScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ + tRowcnt *pnRow /* Write the revised row estimate here */ +){ + Index *p = pBuilder->pNew->u.btree.pIndex; + int nEq = pBuilder->pNew->u.btree.nEq; + UnpackedRecord *pRec = pBuilder->pRec; + int rc; /* Subfunction return code */ + tRowcnt a[2]; /* Statistics */ + int bOk; + + assert( nEq>=1 ); + assert( nEq<=p->nColumn ); + assert( p->aSample!=0 ); + assert( p->nSample>0 ); + assert( pBuilder->nRecValidnRecValid<(nEq-1) ){ + return SQLITE_NOTFOUND; + } + + /* This is an optimization only. The call to sqlite3Stat4ProbeSetValue() + ** below would return the same value. */ + if( nEq>=p->nColumn ){ + *pnRow = 1; + return SQLITE_OK; + } + + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, 1, nEq-1, &bOk); + pBuilder->pRec = pRec; + if( rc!=SQLITE_OK ) return rc; + if( bOk==0 ) return SQLITE_NOTFOUND; + pBuilder->nRecValid = nEq; + + whereKeyStats(pParse, p, pRec, 0, a); + WHERETRACE(0x10,("equality scan regions %s(%d): %d\n", + p->zName, nEq-1, (int)a[1])); + *pnRow = a[1]; + + return rc; +} +#endif /* SQLITE_ENABLE_STAT4 */ + +#ifdef SQLITE_ENABLE_STAT4 +/* +** Estimate the number of rows that will be returned based on +** an IN constraint where the right-hand side of the IN operator +** is a list of values. Example: +** +** WHERE x IN (1,2,3,4) +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +static int whereInScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ + tRowcnt *pnRow /* Write the revised row estimate here */ +){ + Index *p = pBuilder->pNew->u.btree.pIndex; + i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); + int nRecValid = pBuilder->nRecValid; + int rc = SQLITE_OK; /* Subfunction return code */ + tRowcnt nEst; /* Number of rows for a single term */ + tRowcnt nRowEst = 0; /* New estimate of the number of rows */ + int i; /* Loop counter */ + + assert( p->aSample!=0 ); + for(i=0; rc==SQLITE_OK && inExpr; i++){ + nEst = nRow0; + rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst); + nRowEst += nEst; + pBuilder->nRecValid = nRecValid; + } + + if( rc==SQLITE_OK ){ + if( nRowEst > nRow0 ) nRowEst = nRow0; + *pnRow = nRowEst; + WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst)); + } + assert( pBuilder->nRecValid==nRecValid ); + return rc; +} +#endif /* SQLITE_ENABLE_STAT4 */ + + +#ifdef WHERETRACE_ENABLED +/* +** Print the content of a WhereTerm object +*/ +SQLITE_PRIVATE void sqlite3WhereTermPrint(WhereTerm *pTerm, int iTerm){ + if( pTerm==0 ){ + sqlite3DebugPrintf("TERM-%-3d NULL\n", iTerm); + }else{ + char zType[8]; + char zLeft[50]; + memcpy(zType, "....", 5); + if( pTerm->wtFlags & TERM_VIRTUAL ) zType[0] = 'V'; + if( pTerm->eOperator & WO_EQUIV ) zType[1] = 'E'; + if( ExprHasProperty(pTerm->pExpr, EP_FromJoin) ) zType[2] = 'L'; + if( pTerm->wtFlags & TERM_CODED ) zType[3] = 'C'; + if( pTerm->eOperator & WO_SINGLE ){ + assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 ); + sqlite3_snprintf(sizeof(zLeft),zLeft,"left={%d:%d}", + pTerm->leftCursor, pTerm->u.x.leftColumn); + }else if( (pTerm->eOperator & WO_OR)!=0 && pTerm->u.pOrInfo!=0 ){ + sqlite3_snprintf(sizeof(zLeft),zLeft,"indexable=0x%llx", + pTerm->u.pOrInfo->indexable); + }else{ + sqlite3_snprintf(sizeof(zLeft),zLeft,"left=%d", pTerm->leftCursor); + } + sqlite3DebugPrintf( + "TERM-%-3d %p %s %-12s op=%03x wtFlags=%04x", + iTerm, pTerm, zType, zLeft, pTerm->eOperator, pTerm->wtFlags); + /* The 0x10000 .wheretrace flag causes extra information to be + ** shown about each Term */ + if( sqlite3WhereTrace & 0x10000 ){ + sqlite3DebugPrintf(" prob=%-3d prereq=%llx,%llx", + pTerm->truthProb, (u64)pTerm->prereqAll, (u64)pTerm->prereqRight); + } + if( (pTerm->eOperator & (WO_OR|WO_AND))==0 && pTerm->u.x.iField ){ + sqlite3DebugPrintf(" iField=%d", pTerm->u.x.iField); + } + if( pTerm->iParent>=0 ){ + sqlite3DebugPrintf(" iParent=%d", pTerm->iParent); + } + sqlite3DebugPrintf("\n"); + sqlite3TreeViewExpr(0, pTerm->pExpr, 0); + } +} +#endif + +#ifdef WHERETRACE_ENABLED +/* +** Show the complete content of a WhereClause +*/ +SQLITE_PRIVATE void sqlite3WhereClausePrint(WhereClause *pWC){ + int i; + for(i=0; inTerm; i++){ + sqlite3WhereTermPrint(&pWC->a[i], i); + } +} +#endif + +#ifdef WHERETRACE_ENABLED +/* +** Print a WhereLoop object for debugging purposes +*/ +SQLITE_PRIVATE void sqlite3WhereLoopPrint(WhereLoop *p, WhereClause *pWC){ + WhereInfo *pWInfo = pWC->pWInfo; + int nb = 1+(pWInfo->pTabList->nSrc+3)/4; + SrcItem *pItem = pWInfo->pTabList->a + p->iTab; + Table *pTab = pItem->pTab; + Bitmask mAll = (((Bitmask)1)<<(nb*4)) - 1; + sqlite3DebugPrintf("%c%2d.%0*llx.%0*llx", p->cId, + p->iTab, nb, p->maskSelf, nb, p->prereq & mAll); + sqlite3DebugPrintf(" %12s", + pItem->zAlias ? pItem->zAlias : pTab->zName); + if( (p->wsFlags & WHERE_VIRTUALTABLE)==0 ){ + const char *zName; + if( p->u.btree.pIndex && (zName = p->u.btree.pIndex->zName)!=0 ){ + if( strncmp(zName, "sqlite_autoindex_", 17)==0 ){ + int i = sqlite3Strlen30(zName) - 1; + while( zName[i]!='_' ) i--; + zName += i; + } + sqlite3DebugPrintf(".%-16s %2d", zName, p->u.btree.nEq); + }else{ + sqlite3DebugPrintf("%20s",""); + } + }else{ + char *z; + if( p->u.vtab.idxStr ){ + z = sqlite3_mprintf("(%d,\"%s\",%#x)", + p->u.vtab.idxNum, p->u.vtab.idxStr, p->u.vtab.omitMask); + }else{ + z = sqlite3_mprintf("(%d,%x)", p->u.vtab.idxNum, p->u.vtab.omitMask); + } + sqlite3DebugPrintf(" %-19s", z); + sqlite3_free(z); + } + if( p->wsFlags & WHERE_SKIPSCAN ){ + sqlite3DebugPrintf(" f %06x %d-%d", p->wsFlags, p->nLTerm,p->nSkip); + }else{ + sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm); + } + sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + if( p->nLTerm && (sqlite3WhereTrace & 0x100)!=0 ){ + int i; + for(i=0; inLTerm; i++){ + sqlite3WhereTermPrint(p->aLTerm[i], i); + } + } +} +#endif + +/* +** Convert bulk memory into a valid WhereLoop that can be passed +** to whereLoopClear harmlessly. +*/ +static void whereLoopInit(WhereLoop *p){ + p->aLTerm = p->aLTermSpace; + p->nLTerm = 0; + p->nLSlot = ArraySize(p->aLTermSpace); + p->wsFlags = 0; +} + +/* +** Clear the WhereLoop.u union. Leave WhereLoop.pLTerm intact. +*/ +static void whereLoopClearUnion(sqlite3 *db, WhereLoop *p){ + if( p->wsFlags & (WHERE_VIRTUALTABLE|WHERE_AUTO_INDEX) ){ + if( (p->wsFlags & WHERE_VIRTUALTABLE)!=0 && p->u.vtab.needFree ){ + sqlite3_free(p->u.vtab.idxStr); + p->u.vtab.needFree = 0; + p->u.vtab.idxStr = 0; + }else if( (p->wsFlags & WHERE_AUTO_INDEX)!=0 && p->u.btree.pIndex!=0 ){ + sqlite3DbFree(db, p->u.btree.pIndex->zColAff); + sqlite3DbFreeNN(db, p->u.btree.pIndex); + p->u.btree.pIndex = 0; + } + } +} + +/* +** Deallocate internal memory used by a WhereLoop object +*/ +static void whereLoopClear(sqlite3 *db, WhereLoop *p){ + if( p->aLTerm!=p->aLTermSpace ) sqlite3DbFreeNN(db, p->aLTerm); + whereLoopClearUnion(db, p); + whereLoopInit(p); +} + +/* +** Increase the memory allocation for pLoop->aLTerm[] to be at least n. +*/ +static int whereLoopResize(sqlite3 *db, WhereLoop *p, int n){ + WhereTerm **paNew; + if( p->nLSlot>=n ) return SQLITE_OK; + n = (n+7)&~7; + paNew = sqlite3DbMallocRawNN(db, sizeof(p->aLTerm[0])*n); + if( paNew==0 ) return SQLITE_NOMEM_BKPT; + memcpy(paNew, p->aLTerm, sizeof(p->aLTerm[0])*p->nLSlot); + if( p->aLTerm!=p->aLTermSpace ) sqlite3DbFreeNN(db, p->aLTerm); + p->aLTerm = paNew; + p->nLSlot = n; + return SQLITE_OK; +} + +/* +** Transfer content from the second pLoop into the first. +*/ +static int whereLoopXfer(sqlite3 *db, WhereLoop *pTo, WhereLoop *pFrom){ + whereLoopClearUnion(db, pTo); + if( whereLoopResize(db, pTo, pFrom->nLTerm) ){ + memset(pTo, 0, WHERE_LOOP_XFER_SZ); + return SQLITE_NOMEM_BKPT; + } + memcpy(pTo, pFrom, WHERE_LOOP_XFER_SZ); + memcpy(pTo->aLTerm, pFrom->aLTerm, pTo->nLTerm*sizeof(pTo->aLTerm[0])); + if( pFrom->wsFlags & WHERE_VIRTUALTABLE ){ + pFrom->u.vtab.needFree = 0; + }else if( (pFrom->wsFlags & WHERE_AUTO_INDEX)!=0 ){ + pFrom->u.btree.pIndex = 0; + } + return SQLITE_OK; +} + +/* +** Delete a WhereLoop object +*/ +static void whereLoopDelete(sqlite3 *db, WhereLoop *p){ + whereLoopClear(db, p); + sqlite3DbFreeNN(db, p); +} + +/* +** Free a WhereInfo structure +*/ +static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ + int i; + assert( pWInfo!=0 ); + for(i=0; inLevel; i++){ + WhereLevel *pLevel = &pWInfo->a[i]; + if( pLevel->pWLoop && (pLevel->pWLoop->wsFlags & WHERE_IN_ABLE)!=0 ){ + assert( (pLevel->pWLoop->wsFlags & WHERE_MULTI_OR)==0 ); + sqlite3DbFree(db, pLevel->u.in.aInLoop); + } + } + sqlite3WhereClauseClear(&pWInfo->sWC); + while( pWInfo->pLoops ){ + WhereLoop *p = pWInfo->pLoops; + pWInfo->pLoops = p->pNextLoop; + whereLoopDelete(db, p); + } + assert( pWInfo->pExprMods==0 ); + sqlite3DbFreeNN(db, pWInfo); +} + +/* Undo all Expr node modifications +*/ +static void whereUndoExprMods(WhereInfo *pWInfo){ + while( pWInfo->pExprMods ){ + WhereExprMod *p = pWInfo->pExprMods; + pWInfo->pExprMods = p->pNext; + memcpy(p->pExpr, &p->orig, sizeof(p->orig)); + sqlite3DbFree(pWInfo->pParse->db, p); + } +} + +/* +** Return TRUE if all of the following are true: +** +** (1) X has the same or lower cost, or returns the same or fewer rows, +** than Y. +** (2) X uses fewer WHERE clause terms than Y +** (3) Every WHERE clause term used by X is also used by Y +** (4) X skips at least as many columns as Y +** (5) If X is a covering index, than Y is too +** +** Conditions (2) and (3) mean that X is a "proper subset" of Y. +** If X is a proper subset of Y then Y is a better choice and ought +** to have a lower cost. This routine returns TRUE when that cost +** relationship is inverted and needs to be adjusted. Constraint (4) +** was added because if X uses skip-scan less than Y it still might +** deserve a lower cost even if it is a proper subset of Y. Constraint (5) +** was added because a covering index probably deserves to have a lower cost +** than a non-covering index even if it is a proper subset. +*/ +static int whereLoopCheaperProperSubset( + const WhereLoop *pX, /* First WhereLoop to compare */ + const WhereLoop *pY /* Compare against this WhereLoop */ +){ + int i, j; + if( pX->nLTerm-pX->nSkip >= pY->nLTerm-pY->nSkip ){ + return 0; /* X is not a subset of Y */ + } + if( pX->rRun>pY->rRun && pX->nOut>pY->nOut ) return 0; + if( pY->nSkip > pX->nSkip ) return 0; + for(i=pX->nLTerm-1; i>=0; i--){ + if( pX->aLTerm[i]==0 ) continue; + for(j=pY->nLTerm-1; j>=0; j--){ + if( pY->aLTerm[j]==pX->aLTerm[i] ) break; + } + if( j<0 ) return 0; /* X not a subset of Y since term X[i] not used by Y */ + } + if( (pX->wsFlags&WHERE_IDX_ONLY)!=0 + && (pY->wsFlags&WHERE_IDX_ONLY)==0 ){ + return 0; /* Constraint (5) */ + } + return 1; /* All conditions meet */ +} + +/* +** Try to adjust the cost and number of output rows of WhereLoop pTemplate +** upwards or downwards so that: +** +** (1) pTemplate costs less than any other WhereLoops that are a proper +** subset of pTemplate +** +** (2) pTemplate costs more than any other WhereLoops for which pTemplate +** is a proper subset. +** +** To say "WhereLoop X is a proper subset of Y" means that X uses fewer +** WHERE clause terms than Y and that every WHERE clause term used by X is +** also used by Y. +*/ +static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){ + if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return; + for(; p; p=p->pNextLoop){ + if( p->iTab!=pTemplate->iTab ) continue; + if( (p->wsFlags & WHERE_INDEXED)==0 ) continue; + if( whereLoopCheaperProperSubset(p, pTemplate) ){ + /* Adjust pTemplate cost downward so that it is cheaper than its + ** subset p. */ + WHERETRACE(0x80,("subset cost adjustment %d,%d to %d,%d\n", + pTemplate->rRun, pTemplate->nOut, + MIN(p->rRun, pTemplate->rRun), + MIN(p->nOut - 1, pTemplate->nOut))); + pTemplate->rRun = MIN(p->rRun, pTemplate->rRun); + pTemplate->nOut = MIN(p->nOut - 1, pTemplate->nOut); + }else if( whereLoopCheaperProperSubset(pTemplate, p) ){ + /* Adjust pTemplate cost upward so that it is costlier than p since + ** pTemplate is a proper subset of p */ + WHERETRACE(0x80,("subset cost adjustment %d,%d to %d,%d\n", + pTemplate->rRun, pTemplate->nOut, + MAX(p->rRun, pTemplate->rRun), + MAX(p->nOut + 1, pTemplate->nOut))); + pTemplate->rRun = MAX(p->rRun, pTemplate->rRun); + pTemplate->nOut = MAX(p->nOut + 1, pTemplate->nOut); + } + } +} + +/* +** Search the list of WhereLoops in *ppPrev looking for one that can be +** replaced by pTemplate. +** +** Return NULL if pTemplate does not belong on the WhereLoop list. +** In other words if pTemplate ought to be dropped from further consideration. +** +** If pX is a WhereLoop that pTemplate can replace, then return the +** link that points to pX. +** +** If pTemplate cannot replace any existing element of the list but needs +** to be added to the list as a new entry, then return a pointer to the +** tail of the list. +*/ +static WhereLoop **whereLoopFindLesser( + WhereLoop **ppPrev, + const WhereLoop *pTemplate +){ + WhereLoop *p; + for(p=(*ppPrev); p; ppPrev=&p->pNextLoop, p=*ppPrev){ + if( p->iTab!=pTemplate->iTab || p->iSortIdx!=pTemplate->iSortIdx ){ + /* If either the iTab or iSortIdx values for two WhereLoop are different + ** then those WhereLoops need to be considered separately. Neither is + ** a candidate to replace the other. */ + continue; + } + /* In the current implementation, the rSetup value is either zero + ** or the cost of building an automatic index (NlogN) and the NlogN + ** is the same for compatible WhereLoops. */ + assert( p->rSetup==0 || pTemplate->rSetup==0 + || p->rSetup==pTemplate->rSetup ); + + /* whereLoopAddBtree() always generates and inserts the automatic index + ** case first. Hence compatible candidate WhereLoops never have a larger + ** rSetup. Call this SETUP-INVARIANT */ + assert( p->rSetup>=pTemplate->rSetup ); + + /* Any loop using an appliation-defined index (or PRIMARY KEY or + ** UNIQUE constraint) with one or more == constraints is better + ** than an automatic index. Unless it is a skip-scan. */ + if( (p->wsFlags & WHERE_AUTO_INDEX)!=0 + && (pTemplate->nSkip)==0 + && (pTemplate->wsFlags & WHERE_INDEXED)!=0 + && (pTemplate->wsFlags & WHERE_COLUMN_EQ)!=0 + && (p->prereq & pTemplate->prereq)==pTemplate->prereq + ){ + break; + } + + /* If existing WhereLoop p is better than pTemplate, pTemplate can be + ** discarded. WhereLoop p is better if: + ** (1) p has no more dependencies than pTemplate, and + ** (2) p has an equal or lower cost than pTemplate + */ + if( (p->prereq & pTemplate->prereq)==p->prereq /* (1) */ + && p->rSetup<=pTemplate->rSetup /* (2a) */ + && p->rRun<=pTemplate->rRun /* (2b) */ + && p->nOut<=pTemplate->nOut /* (2c) */ + ){ + return 0; /* Discard pTemplate */ + } + + /* If pTemplate is always better than p, then cause p to be overwritten + ** with pTemplate. pTemplate is better than p if: + ** (1) pTemplate has no more dependences than p, and + ** (2) pTemplate has an equal or lower cost than p. + */ + if( (p->prereq & pTemplate->prereq)==pTemplate->prereq /* (1) */ + && p->rRun>=pTemplate->rRun /* (2a) */ + && p->nOut>=pTemplate->nOut /* (2b) */ + ){ + assert( p->rSetup>=pTemplate->rSetup ); /* SETUP-INVARIANT above */ + break; /* Cause p to be overwritten by pTemplate */ + } + } + return ppPrev; +} + +/* +** Insert or replace a WhereLoop entry using the template supplied. +** +** An existing WhereLoop entry might be overwritten if the new template +** is better and has fewer dependencies. Or the template will be ignored +** and no insert will occur if an existing WhereLoop is faster and has +** fewer dependencies than the template. Otherwise a new WhereLoop is +** added based on the template. +** +** If pBuilder->pOrSet is not NULL then we care about only the +** prerequisites and rRun and nOut costs of the N best loops. That +** information is gathered in the pBuilder->pOrSet object. This special +** processing mode is used only for OR clause processing. +** +** When accumulating multiple loops (when pBuilder->pOrSet is NULL) we +** still might overwrite similar loops with the new template if the +** new template is better. Loops may be overwritten if the following +** conditions are met: +** +** (1) They have the same iTab. +** (2) They have the same iSortIdx. +** (3) The template has same or fewer dependencies than the current loop +** (4) The template has the same or lower cost than the current loop +*/ +static int whereLoopInsert(WhereLoopBuilder *pBuilder, WhereLoop *pTemplate){ + WhereLoop **ppPrev, *p; + WhereInfo *pWInfo = pBuilder->pWInfo; + sqlite3 *db = pWInfo->pParse->db; + int rc; + + /* Stop the search once we hit the query planner search limit */ + if( pBuilder->iPlanLimit==0 ){ + WHERETRACE(0xffffffff,("=== query planner search limit reached ===\n")); + if( pBuilder->pOrSet ) pBuilder->pOrSet->n = 0; + return SQLITE_DONE; + } + pBuilder->iPlanLimit--; + + whereLoopAdjustCost(pWInfo->pLoops, pTemplate); + + /* If pBuilder->pOrSet is defined, then only keep track of the costs + ** and prereqs. + */ + if( pBuilder->pOrSet!=0 ){ + if( pTemplate->nLTerm ){ +#if WHERETRACE_ENABLED + u16 n = pBuilder->pOrSet->n; + int x = +#endif + whereOrInsert(pBuilder->pOrSet, pTemplate->prereq, pTemplate->rRun, + pTemplate->nOut); +#if WHERETRACE_ENABLED /* 0x8 */ + if( sqlite3WhereTrace & 0x8 ){ + sqlite3DebugPrintf(x?" or-%d: ":" or-X: ", n); + sqlite3WhereLoopPrint(pTemplate, pBuilder->pWC); + } +#endif + } + return SQLITE_OK; + } + + /* Look for an existing WhereLoop to replace with pTemplate + */ + ppPrev = whereLoopFindLesser(&pWInfo->pLoops, pTemplate); + + if( ppPrev==0 ){ + /* There already exists a WhereLoop on the list that is better + ** than pTemplate, so just ignore pTemplate */ +#if WHERETRACE_ENABLED /* 0x8 */ + if( sqlite3WhereTrace & 0x8 ){ + sqlite3DebugPrintf(" skip: "); + sqlite3WhereLoopPrint(pTemplate, pBuilder->pWC); + } +#endif + return SQLITE_OK; + }else{ + p = *ppPrev; + } + + /* If we reach this point it means that either p[] should be overwritten + ** with pTemplate[] if p[] exists, or if p==NULL then allocate a new + ** WhereLoop and insert it. + */ +#if WHERETRACE_ENABLED /* 0x8 */ + if( sqlite3WhereTrace & 0x8 ){ + if( p!=0 ){ + sqlite3DebugPrintf("replace: "); + sqlite3WhereLoopPrint(p, pBuilder->pWC); + sqlite3DebugPrintf(" with: "); + }else{ + sqlite3DebugPrintf(" add: "); + } + sqlite3WhereLoopPrint(pTemplate, pBuilder->pWC); + } +#endif + if( p==0 ){ + /* Allocate a new WhereLoop to add to the end of the list */ + *ppPrev = p = sqlite3DbMallocRawNN(db, sizeof(WhereLoop)); + if( p==0 ) return SQLITE_NOMEM_BKPT; + whereLoopInit(p); + p->pNextLoop = 0; + }else{ + /* We will be overwriting WhereLoop p[]. But before we do, first + ** go through the rest of the list and delete any other entries besides + ** p[] that are also supplated by pTemplate */ + WhereLoop **ppTail = &p->pNextLoop; + WhereLoop *pToDel; + while( *ppTail ){ + ppTail = whereLoopFindLesser(ppTail, pTemplate); + if( ppTail==0 ) break; + pToDel = *ppTail; + if( pToDel==0 ) break; + *ppTail = pToDel->pNextLoop; +#if WHERETRACE_ENABLED /* 0x8 */ + if( sqlite3WhereTrace & 0x8 ){ + sqlite3DebugPrintf(" delete: "); + sqlite3WhereLoopPrint(pToDel, pBuilder->pWC); + } +#endif + whereLoopDelete(db, pToDel); + } + } + rc = whereLoopXfer(db, p, pTemplate); + if( (p->wsFlags & WHERE_VIRTUALTABLE)==0 ){ + Index *pIndex = p->u.btree.pIndex; + if( pIndex && pIndex->idxType==SQLITE_IDXTYPE_IPK ){ + p->u.btree.pIndex = 0; + } + } + return rc; +} + +/* +** Adjust the WhereLoop.nOut value downward to account for terms of the +** WHERE clause that reference the loop but which are not used by an +** index. +* +** For every WHERE clause term that is not used by the index +** and which has a truth probability assigned by one of the likelihood(), +** likely(), or unlikely() SQL functions, reduce the estimated number +** of output rows by the probability specified. +** +** TUNING: For every WHERE clause term that is not used by the index +** and which does not have an assigned truth probability, heuristics +** described below are used to try to estimate the truth probability. +** TODO --> Perhaps this is something that could be improved by better +** table statistics. +** +** Heuristic 1: Estimate the truth probability as 93.75%. The 93.75% +** value corresponds to -1 in LogEst notation, so this means decrement +** the WhereLoop.nOut field for every such WHERE clause term. +** +** Heuristic 2: If there exists one or more WHERE clause terms of the +** form "x==EXPR" and EXPR is not a constant 0 or 1, then make sure the +** final output row estimate is no greater than 1/4 of the total number +** of rows in the table. In other words, assume that x==EXPR will filter +** out at least 3 out of 4 rows. If EXPR is -1 or 0 or 1, then maybe the +** "x" column is boolean or else -1 or 0 or 1 is a common default value +** on the "x" column and so in that case only cap the output row estimate +** at 1/2 instead of 1/4. +*/ +static void whereLoopOutputAdjust( + WhereClause *pWC, /* The WHERE clause */ + WhereLoop *pLoop, /* The loop to adjust downward */ + LogEst nRow /* Number of rows in the entire table */ +){ + WhereTerm *pTerm, *pX; + Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf); + int i, j; + LogEst iReduce = 0; /* pLoop->nOut should not exceed nRow-iReduce */ + + assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); + for(i=pWC->nBase, pTerm=pWC->a; i>0; i--, pTerm++){ + assert( pTerm!=0 ); + if( (pTerm->prereqAll & notAllowed)!=0 ) continue; + if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue; + if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) continue; + for(j=pLoop->nLTerm-1; j>=0; j--){ + pX = pLoop->aLTerm[j]; + if( pX==0 ) continue; + if( pX==pTerm ) break; + if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break; + } + if( j<0 ){ + if( pLoop->maskSelf==pTerm->prereqAll ){ + /* If there are extra terms in the WHERE clause not used by an index + ** that depend only on the table being scanned, and that will tend to + ** cause many rows to be omitted, then mark that table as + ** "self-culling". + ** + ** 2022-03-24: Self-culling only applies if either the extra terms + ** are straight comparison operators that are non-true with NULL + ** operand, or if the loop is not a LEFT JOIN. + */ + if( (pTerm->eOperator & 0x3f)!=0 + || (pWC->pWInfo->pTabList->a[pLoop->iTab].fg.jointype & JT_LEFT)==0 + ){ + pLoop->wsFlags |= WHERE_SELFCULL; + } + } + if( pTerm->truthProb<=0 ){ + /* If a truth probability is specified using the likelihood() hints, + ** then use the probability provided by the application. */ + pLoop->nOut += pTerm->truthProb; + }else{ + /* In the absence of explicit truth probabilities, use heuristics to + ** guess a reasonable truth probability. */ + pLoop->nOut--; + if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0 + && (pTerm->wtFlags & TERM_HIGHTRUTH)==0 /* tag-20200224-1 */ + ){ + Expr *pRight = pTerm->pExpr->pRight; + int k = 0; + testcase( pTerm->pExpr->op==TK_IS ); + if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ + k = 10; + }else{ + k = 20; + } + if( iReducewtFlags |= TERM_HEURTRUTH; + iReduce = k; + } + } + } + } + } + if( pLoop->nOut > nRow-iReduce ){ + pLoop->nOut = nRow - iReduce; + } +} + +/* +** Term pTerm is a vector range comparison operation. The first comparison +** in the vector can be optimized using column nEq of the index. This +** function returns the total number of vector elements that can be used +** as part of the range comparison. +** +** For example, if the query is: +** +** WHERE a = ? AND (b, c, d) > (?, ?, ?) +** +** and the index: +** +** CREATE INDEX ... ON (a, b, c, d, e) +** +** then this function would be invoked with nEq=1. The value returned in +** this case is 3. +*/ +static int whereRangeVectorLen( + Parse *pParse, /* Parsing context */ + int iCur, /* Cursor open on pIdx */ + Index *pIdx, /* The index to be used for a inequality constraint */ + int nEq, /* Number of prior equality constraints on same index */ + WhereTerm *pTerm /* The vector inequality constraint */ +){ + int nCmp = sqlite3ExprVectorSize(pTerm->pExpr->pLeft); + int i; + + nCmp = MIN(nCmp, (pIdx->nColumn - nEq)); + for(i=1; ipExpr->pLeft) ); + pLhs = pTerm->pExpr->pLeft->x.pList->a[i].pExpr; + pRhs = pTerm->pExpr->pRight; + if( ExprUseXSelect(pRhs) ){ + pRhs = pRhs->x.pSelect->pEList->a[i].pExpr; + }else{ + pRhs = pRhs->x.pList->a[i].pExpr; + } + + /* Check that the LHS of the comparison is a column reference to + ** the right column of the right source table. And that the sort + ** order of the index column is the same as the sort order of the + ** leftmost index column. */ + if( pLhs->op!=TK_COLUMN + || pLhs->iTable!=iCur + || pLhs->iColumn!=pIdx->aiColumn[i+nEq] + || pIdx->aSortOrder[i+nEq]!=pIdx->aSortOrder[nEq] + ){ + break; + } + + testcase( pLhs->iColumn==XN_ROWID ); + aff = sqlite3CompareAffinity(pRhs, sqlite3ExprAffinity(pLhs)); + idxaff = sqlite3TableColumnAffinity(pIdx->pTable, pLhs->iColumn); + if( aff!=idxaff ) break; + + pColl = sqlite3BinaryCompareCollSeq(pParse, pLhs, pRhs); + if( pColl==0 ) break; + if( sqlite3StrICmp(pColl->zName, pIdx->azColl[i+nEq]) ) break; + } + return i; +} + +/* +** Adjust the cost C by the costMult facter T. This only occurs if +** compiled with -DSQLITE_ENABLE_COSTMULT +*/ +#ifdef SQLITE_ENABLE_COSTMULT +# define ApplyCostMultiplier(C,T) C += T +#else +# define ApplyCostMultiplier(C,T) +#endif + +/* +** We have so far matched pBuilder->pNew->u.btree.nEq terms of the +** index pIndex. Try to match one more. +** +** When this function is called, pBuilder->pNew->nOut contains the +** number of rows expected to be visited by filtering using the nEq +** terms only. If it is modified, this value is restored before this +** function returns. +** +** If pProbe->idxType==SQLITE_IDXTYPE_IPK, that means pIndex is +** a fake index used for the INTEGER PRIMARY KEY. +*/ +static int whereLoopAddBtreeIndex( + WhereLoopBuilder *pBuilder, /* The WhereLoop factory */ + SrcItem *pSrc, /* FROM clause term being analyzed */ + Index *pProbe, /* An index on pSrc */ + LogEst nInMul /* log(Number of iterations due to IN) */ +){ + WhereInfo *pWInfo = pBuilder->pWInfo; /* WHERE analyse context */ + Parse *pParse = pWInfo->pParse; /* Parsing context */ + sqlite3 *db = pParse->db; /* Database connection malloc context */ + WhereLoop *pNew; /* Template WhereLoop under construction */ + WhereTerm *pTerm; /* A WhereTerm under consideration */ + int opMask; /* Valid operators for constraints */ + WhereScan scan; /* Iterator for WHERE terms */ + Bitmask saved_prereq; /* Original value of pNew->prereq */ + u16 saved_nLTerm; /* Original value of pNew->nLTerm */ + u16 saved_nEq; /* Original value of pNew->u.btree.nEq */ + u16 saved_nBtm; /* Original value of pNew->u.btree.nBtm */ + u16 saved_nTop; /* Original value of pNew->u.btree.nTop */ + u16 saved_nSkip; /* Original value of pNew->nSkip */ + u32 saved_wsFlags; /* Original value of pNew->wsFlags */ + LogEst saved_nOut; /* Original value of pNew->nOut */ + int rc = SQLITE_OK; /* Return code */ + LogEst rSize; /* Number of rows in the table */ + LogEst rLogSize; /* Logarithm of table size */ + WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ + + pNew = pBuilder->pNew; + if( db->mallocFailed ) return SQLITE_NOMEM_BKPT; + WHERETRACE(0x800, ("BEGIN %s.addBtreeIdx(%s), nEq=%d, nSkip=%d, rRun=%d\n", + pProbe->pTable->zName,pProbe->zName, + pNew->u.btree.nEq, pNew->nSkip, pNew->rRun)); + + assert( (pNew->wsFlags & WHERE_VIRTUALTABLE)==0 ); + assert( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 ); + if( pNew->wsFlags & WHERE_BTM_LIMIT ){ + opMask = WO_LT|WO_LE; + }else{ + assert( pNew->u.btree.nBtm==0 ); + opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS; + } + if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); + + assert( pNew->u.btree.nEqnColumn ); + assert( pNew->u.btree.nEqnKeyCol + || pProbe->idxType!=SQLITE_IDXTYPE_PRIMARYKEY ); + + saved_nEq = pNew->u.btree.nEq; + saved_nBtm = pNew->u.btree.nBtm; + saved_nTop = pNew->u.btree.nTop; + saved_nSkip = pNew->nSkip; + saved_nLTerm = pNew->nLTerm; + saved_wsFlags = pNew->wsFlags; + saved_prereq = pNew->prereq; + saved_nOut = pNew->nOut; + pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, saved_nEq, + opMask, pProbe); + pNew->rSetup = 0; + rSize = pProbe->aiRowLogEst[0]; + rLogSize = estLog(rSize); + for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ + u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ + LogEst rCostIdx; + LogEst nOutUnadjusted; /* nOut before IN() and WHERE adjustments */ + int nIn = 0; +#ifdef SQLITE_ENABLE_STAT4 + int nRecValid = pBuilder->nRecValid; +#endif + if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) + && indexColumnNotNull(pProbe, saved_nEq) + ){ + continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */ + } + if( pTerm->prereqRight & pNew->maskSelf ) continue; + + /* Do not allow the upper bound of a LIKE optimization range constraint + ** to mix with a lower range bound from some other source */ + if( pTerm->wtFlags & TERM_LIKEOPT && pTerm->eOperator==WO_LT ) continue; + + /* tag-20191211-001: Do not allow constraints from the WHERE clause to + ** be used by the right table of a LEFT JOIN. Only constraints in the + ** ON clause are allowed. See tag-20191211-002 for the vtab equivalent. */ + if( (pSrc->fg.jointype & JT_LEFT)!=0 + && !ExprHasProperty(pTerm->pExpr, EP_FromJoin) + ){ + continue; + } + + if( IsUniqueIndex(pProbe) && saved_nEq==pProbe->nKeyCol-1 ){ + pBuilder->bldFlags1 |= SQLITE_BLDF1_UNIQUE; + }else{ + pBuilder->bldFlags1 |= SQLITE_BLDF1_INDEXED; + } + pNew->wsFlags = saved_wsFlags; + pNew->u.btree.nEq = saved_nEq; + pNew->u.btree.nBtm = saved_nBtm; + pNew->u.btree.nTop = saved_nTop; + pNew->nLTerm = saved_nLTerm; + if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */ + pNew->aLTerm[pNew->nLTerm++] = pTerm; + pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf; + + assert( nInMul==0 + || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 + || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 + || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 + ); + + if( eOp & WO_IN ){ + Expr *pExpr = pTerm->pExpr; + if( ExprUseXSelect(pExpr) ){ + /* "x IN (SELECT ...)": TUNING: the SELECT returns 25 rows */ + int i; + nIn = 46; assert( 46==sqlite3LogEst(25) ); + + /* The expression may actually be of the form (x, y) IN (SELECT...). + ** In this case there is a separate term for each of (x) and (y). + ** However, the nIn multiplier should only be applied once, not once + ** for each such term. The following loop checks that pTerm is the + ** first such term in use, and sets nIn back to 0 if it is not. */ + for(i=0; inLTerm-1; i++){ + if( pNew->aLTerm[i] && pNew->aLTerm[i]->pExpr==pExpr ) nIn = 0; + } + }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ + /* "x IN (value, value, ...)" */ + nIn = sqlite3LogEst(pExpr->x.pList->nExpr); + } + if( pProbe->hasStat1 && rLogSize>=10 ){ + LogEst M, logK, x; + /* Let: + ** N = the total number of rows in the table + ** K = the number of entries on the RHS of the IN operator + ** M = the number of rows in the table that match terms to the + ** to the left in the same index. If the IN operator is on + ** the left-most index column, M==N. + ** + ** Given the definitions above, it is better to omit the IN operator + ** from the index lookup and instead do a scan of the M elements, + ** testing each scanned row against the IN operator separately, if: + ** + ** M*log(K) < K*log(N) + ** + ** Our estimates for M, K, and N might be inaccurate, so we build in + ** a safety margin of 2 (LogEst: 10) that favors using the IN operator + ** with the index, as using an index has better worst-case behavior. + ** If we do not have real sqlite_stat1 data, always prefer to use + ** the index. Do not bother with this optimization on very small + ** tables (less than 2 rows) as it is pointless in that case. + */ + M = pProbe->aiRowLogEst[saved_nEq]; + logK = estLog(nIn); + /* TUNING v----- 10 to bias toward indexed IN */ + x = M + logK + 10 - (nIn + rLogSize); + if( x>=0 ){ + WHERETRACE(0x40, + ("IN operator (N=%d M=%d logK=%d nIn=%d rLogSize=%d x=%d) " + "prefers indexed lookup\n", + saved_nEq, M, logK, nIn, rLogSize, x)); + }else if( nInMul<2 && OptimizationEnabled(db, SQLITE_SeekScan) ){ + WHERETRACE(0x40, + ("IN operator (N=%d M=%d logK=%d nIn=%d rLogSize=%d x=%d" + " nInMul=%d) prefers skip-scan\n", + saved_nEq, M, logK, nIn, rLogSize, x, nInMul)); + pNew->wsFlags |= WHERE_IN_SEEKSCAN; + }else{ + WHERETRACE(0x40, + ("IN operator (N=%d M=%d logK=%d nIn=%d rLogSize=%d x=%d" + " nInMul=%d) prefers normal scan\n", + saved_nEq, M, logK, nIn, rLogSize, x, nInMul)); + continue; + } + } + pNew->wsFlags |= WHERE_COLUMN_IN; + }else if( eOp & (WO_EQ|WO_IS) ){ + int iCol = pProbe->aiColumn[saved_nEq]; + pNew->wsFlags |= WHERE_COLUMN_EQ; + assert( saved_nEq==pNew->u.btree.nEq ); + if( iCol==XN_ROWID + || (iCol>=0 && nInMul==0 && saved_nEq==pProbe->nKeyCol-1) + ){ + if( iCol==XN_ROWID || pProbe->uniqNotNull + || (pProbe->nKeyCol==1 && pProbe->onError && eOp==WO_EQ) + ){ + pNew->wsFlags |= WHERE_ONEROW; + }else{ + pNew->wsFlags |= WHERE_UNQ_WANTED; + } + } + if( scan.iEquiv>1 ) pNew->wsFlags |= WHERE_TRANSCONS; + }else if( eOp & WO_ISNULL ){ + pNew->wsFlags |= WHERE_COLUMN_NULL; + }else if( eOp & (WO_GT|WO_GE) ){ + testcase( eOp & WO_GT ); + testcase( eOp & WO_GE ); + pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT; + pNew->u.btree.nBtm = whereRangeVectorLen( + pParse, pSrc->iCursor, pProbe, saved_nEq, pTerm + ); + pBtm = pTerm; + pTop = 0; + if( pTerm->wtFlags & TERM_LIKEOPT ){ + /* Range constraints that come from the LIKE optimization are + ** always used in pairs. */ + pTop = &pTerm[1]; + assert( (pTop-(pTerm->pWC->a))pWC->nTerm ); + assert( pTop->wtFlags & TERM_LIKEOPT ); + assert( pTop->eOperator==WO_LT ); + if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */ + pNew->aLTerm[pNew->nLTerm++] = pTop; + pNew->wsFlags |= WHERE_TOP_LIMIT; + pNew->u.btree.nTop = 1; + } + }else{ + assert( eOp & (WO_LT|WO_LE) ); + testcase( eOp & WO_LT ); + testcase( eOp & WO_LE ); + pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT; + pNew->u.btree.nTop = whereRangeVectorLen( + pParse, pSrc->iCursor, pProbe, saved_nEq, pTerm + ); + pTop = pTerm; + pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ? + pNew->aLTerm[pNew->nLTerm-2] : 0; + } + + /* At this point pNew->nOut is set to the number of rows expected to + ** be visited by the index scan before considering term pTerm, or the + ** values of nIn and nInMul. In other words, assuming that all + ** "x IN(...)" terms are replaced with "x = ?". This block updates + ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul). */ + assert( pNew->nOut==saved_nOut ); + if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ + /* Adjust nOut using stat4 data. Or, if there is no stat4 + ** data, using some other estimate. */ + whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); + }else{ + int nEq = ++pNew->u.btree.nEq; + assert( eOp & (WO_ISNULL|WO_EQ|WO_IN|WO_IS) ); + + assert( pNew->nOut==saved_nOut ); + if( pTerm->truthProb<=0 && pProbe->aiColumn[saved_nEq]>=0 ){ + assert( (eOp & WO_IN) || nIn==0 ); + testcase( eOp & WO_IN ); + pNew->nOut += pTerm->truthProb; + pNew->nOut -= nIn; + }else{ +#ifdef SQLITE_ENABLE_STAT4 + tRowcnt nOut = 0; + if( nInMul==0 + && pProbe->nSample + && ALWAYS(pNew->u.btree.nEq<=pProbe->nSampleCol) + && ((eOp & WO_IN)==0 || ExprUseXList(pTerm->pExpr)) + && OptimizationEnabled(db, SQLITE_Stat4) + ){ + Expr *pExpr = pTerm->pExpr; + if( (eOp & (WO_EQ|WO_ISNULL|WO_IS))!=0 ){ + testcase( eOp & WO_EQ ); + testcase( eOp & WO_IS ); + testcase( eOp & WO_ISNULL ); + rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); + }else{ + rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); + } + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ + if( nOut ){ + pNew->nOut = sqlite3LogEst(nOut); + if( nEq==1 + /* TUNING: Mark terms as "low selectivity" if they seem likely + ** to be true for half or more of the rows in the table. + ** See tag-202002240-1 */ + && pNew->nOut+10 > pProbe->aiRowLogEst[0] + ){ +#if WHERETRACE_ENABLED /* 0x01 */ + if( sqlite3WhereTrace & 0x01 ){ + sqlite3DebugPrintf( + "STAT4 determines term has low selectivity:\n"); + sqlite3WhereTermPrint(pTerm, 999); + } +#endif + pTerm->wtFlags |= TERM_HIGHTRUTH; + if( pTerm->wtFlags & TERM_HEURTRUTH ){ + /* If the term has previously been used with an assumption of + ** higher selectivity, then set the flag to rerun the + ** loop computations. */ + pBuilder->bldFlags2 |= SQLITE_BLDF2_2NDPASS; + } + } + if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; + pNew->nOut -= nIn; + } + } + if( nOut==0 ) +#endif + { + pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]); + if( eOp & WO_ISNULL ){ + /* TUNING: If there is no likelihood() value, assume that a + ** "col IS NULL" expression matches twice as many rows + ** as (col=?). */ + pNew->nOut += 10; + } + } + } + } + + /* Set rCostIdx to the cost of visiting selected rows in index. Add + ** it to pNew->rRun, which is currently set to the cost of the index + ** seek only. Then, if this is a non-covering index, add the cost of + ** visiting the rows in the main table. */ + assert( pSrc->pTab->szTabRow>0 ); + rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow; + pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx); + if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16); + } + ApplyCostMultiplier(pNew->rRun, pProbe->pTable->costMult); + + nOutUnadjusted = pNew->nOut; + pNew->rRun += nInMul + nIn; + pNew->nOut += nInMul + nIn; + whereLoopOutputAdjust(pBuilder->pWC, pNew, rSize); + rc = whereLoopInsert(pBuilder, pNew); + + if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ + pNew->nOut = saved_nOut; + }else{ + pNew->nOut = nOutUnadjusted; + } + + if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 + && pNew->u.btree.nEqnColumn + && (pNew->u.btree.nEqnKeyCol || + pProbe->idxType!=SQLITE_IDXTYPE_PRIMARYKEY) + ){ + whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn); + } + pNew->nOut = saved_nOut; +#ifdef SQLITE_ENABLE_STAT4 + pBuilder->nRecValid = nRecValid; +#endif + } + pNew->prereq = saved_prereq; + pNew->u.btree.nEq = saved_nEq; + pNew->u.btree.nBtm = saved_nBtm; + pNew->u.btree.nTop = saved_nTop; + pNew->nSkip = saved_nSkip; + pNew->wsFlags = saved_wsFlags; + pNew->nOut = saved_nOut; + pNew->nLTerm = saved_nLTerm; + + /* Consider using a skip-scan if there are no WHERE clause constraints + ** available for the left-most terms of the index, and if the average + ** number of repeats in the left-most terms is at least 18. + ** + ** The magic number 18 is selected on the basis that scanning 17 rows + ** is almost always quicker than an index seek (even though if the index + ** contains fewer than 2^17 rows we assume otherwise in other parts of + ** the code). And, even if it is not, it should not be too much slower. + ** On the other hand, the extra seeks could end up being significantly + ** more expensive. */ + assert( 42==sqlite3LogEst(18) ); + if( saved_nEq==saved_nSkip + && saved_nEq+1nKeyCol + && saved_nEq==pNew->nLTerm + && pProbe->noSkipScan==0 + && pProbe->hasStat1!=0 + && OptimizationEnabled(db, SQLITE_SkipScan) + && pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */ + && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK + ){ + LogEst nIter; + pNew->u.btree.nEq++; + pNew->nSkip++; + pNew->aLTerm[pNew->nLTerm++] = 0; + pNew->wsFlags |= WHERE_SKIPSCAN; + nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1]; + pNew->nOut -= nIter; + /* TUNING: Because uncertainties in the estimates for skip-scan queries, + ** add a 1.375 fudge factor to make skip-scan slightly less likely. */ + nIter += 5; + whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul); + pNew->nOut = saved_nOut; + pNew->u.btree.nEq = saved_nEq; + pNew->nSkip = saved_nSkip; + pNew->wsFlags = saved_wsFlags; + } + + WHERETRACE(0x800, ("END %s.addBtreeIdx(%s), nEq=%d, rc=%d\n", + pProbe->pTable->zName, pProbe->zName, saved_nEq, rc)); + return rc; +} + +/* +** Return True if it is possible that pIndex might be useful in +** implementing the ORDER BY clause in pBuilder. +** +** Return False if pBuilder does not contain an ORDER BY clause or +** if there is no way for pIndex to be useful in implementing that +** ORDER BY clause. +*/ +static int indexMightHelpWithOrderBy( + WhereLoopBuilder *pBuilder, + Index *pIndex, + int iCursor +){ + ExprList *pOB; + ExprList *aColExpr; + int ii, jj; + + if( pIndex->bUnordered ) return 0; + if( (pOB = pBuilder->pWInfo->pOrderBy)==0 ) return 0; + for(ii=0; iinExpr; ii++){ + Expr *pExpr = sqlite3ExprSkipCollateAndLikely(pOB->a[ii].pExpr); + if( NEVER(pExpr==0) ) continue; + if( pExpr->op==TK_COLUMN && pExpr->iTable==iCursor ){ + if( pExpr->iColumn<0 ) return 1; + for(jj=0; jjnKeyCol; jj++){ + if( pExpr->iColumn==pIndex->aiColumn[jj] ) return 1; + } + }else if( (aColExpr = pIndex->aColExpr)!=0 ){ + for(jj=0; jjnKeyCol; jj++){ + if( pIndex->aiColumn[jj]!=XN_EXPR ) continue; + if( sqlite3ExprCompareSkip(pExpr,aColExpr->a[jj].pExpr,iCursor)==0 ){ + return 1; + } + } + } + } + return 0; +} + +/* Check to see if a partial index with pPartIndexWhere can be used +** in the current query. Return true if it can be and false if not. +*/ +static int whereUsablePartialIndex( + int iTab, /* The table for which we want an index */ + int isLeft, /* True if iTab is the right table of a LEFT JOIN */ + WhereClause *pWC, /* The WHERE clause of the query */ + Expr *pWhere /* The WHERE clause from the partial index */ +){ + int i; + WhereTerm *pTerm; + Parse *pParse = pWC->pWInfo->pParse; + while( pWhere->op==TK_AND ){ + if( !whereUsablePartialIndex(iTab,isLeft,pWC,pWhere->pLeft) ) return 0; + pWhere = pWhere->pRight; + } + if( pParse->db->flags & SQLITE_EnableQPSG ) pParse = 0; + for(i=0, pTerm=pWC->a; inTerm; i++, pTerm++){ + Expr *pExpr; + pExpr = pTerm->pExpr; + if( (!ExprHasProperty(pExpr, EP_FromJoin) || pExpr->w.iRightJoinTable==iTab) + && (isLeft==0 || ExprHasProperty(pExpr, EP_FromJoin)) + && sqlite3ExprImpliesExpr(pParse, pExpr, pWhere, iTab) + && (pTerm->wtFlags & TERM_VNULL)==0 + ){ + return 1; + } + } + return 0; +} + +/* +** Add all WhereLoop objects for a single table of the join where the table +** is identified by pBuilder->pNew->iTab. That table is guaranteed to be +** a b-tree table, not a virtual table. +** +** The costs (WhereLoop.rRun) of the b-tree loops added by this function +** are calculated as follows: +** +** For a full scan, assuming the table (or index) contains nRow rows: +** +** cost = nRow * 3.0 // full-table scan +** cost = nRow * K // scan of covering index +** cost = nRow * (K+3.0) // scan of non-covering index +** +** where K is a value between 1.1 and 3.0 set based on the relative +** estimated average size of the index and table records. +** +** For an index scan, where nVisit is the number of index rows visited +** by the scan, and nSeek is the number of seek operations required on +** the index b-tree: +** +** cost = nSeek * (log(nRow) + K * nVisit) // covering index +** cost = nSeek * (log(nRow) + (K+3.0) * nVisit) // non-covering index +** +** Normally, nSeek is 1. nSeek values greater than 1 come about if the +** WHERE clause includes "x IN (....)" terms used in place of "x=?". Or when +** implicit "x IN (SELECT x FROM tbl)" terms are added for skip-scans. +** +** The estimated values (nRow, nVisit, nSeek) often contain a large amount +** of uncertainty. For this reason, scoring is designed to pick plans that +** "do the least harm" if the estimates are inaccurate. For example, a +** log(nRow) factor is omitted from a non-covering index scan in order to +** bias the scoring in favor of using an index, since the worst-case +** performance of using an index is far better than the worst-case performance +** of a full table scan. +*/ +static int whereLoopAddBtree( + WhereLoopBuilder *pBuilder, /* WHERE clause information */ + Bitmask mPrereq /* Extra prerequesites for using this table */ +){ + WhereInfo *pWInfo; /* WHERE analysis context */ + Index *pProbe; /* An index we are evaluating */ + Index sPk; /* A fake index object for the primary key */ + LogEst aiRowEstPk[2]; /* The aiRowLogEst[] value for the sPk index */ + i16 aiColumnPk = -1; /* The aColumn[] value for the sPk index */ + SrcList *pTabList; /* The FROM clause */ + SrcItem *pSrc; /* The FROM clause btree term to add */ + WhereLoop *pNew; /* Template WhereLoop object */ + int rc = SQLITE_OK; /* Return code */ + int iSortIdx = 1; /* Index number */ + int b; /* A boolean value */ + LogEst rSize; /* number of rows in the table */ + WhereClause *pWC; /* The parsed WHERE clause */ + Table *pTab; /* Table being queried */ + + pNew = pBuilder->pNew; + pWInfo = pBuilder->pWInfo; + pTabList = pWInfo->pTabList; + pSrc = pTabList->a + pNew->iTab; + pTab = pSrc->pTab; + pWC = pBuilder->pWC; + assert( !IsVirtual(pSrc->pTab) ); + + if( pSrc->fg.isIndexedBy ){ + assert( pSrc->fg.isCte==0 ); + /* An INDEXED BY clause specifies a particular index to use */ + pProbe = pSrc->u2.pIBIndex; + }else if( !HasRowid(pTab) ){ + pProbe = pTab->pIndex; + }else{ + /* There is no INDEXED BY clause. Create a fake Index object in local + ** variable sPk to represent the rowid primary key index. Make this + ** fake index the first in a chain of Index objects with all of the real + ** indices to follow */ + Index *pFirst; /* First of real indices on the table */ + memset(&sPk, 0, sizeof(Index)); + sPk.nKeyCol = 1; + sPk.nColumn = 1; + sPk.aiColumn = &aiColumnPk; + sPk.aiRowLogEst = aiRowEstPk; + sPk.onError = OE_Replace; + sPk.pTable = pTab; + sPk.szIdxRow = pTab->szTabRow; + sPk.idxType = SQLITE_IDXTYPE_IPK; + aiRowEstPk[0] = pTab->nRowLogEst; + aiRowEstPk[1] = 0; + pFirst = pSrc->pTab->pIndex; + if( pSrc->fg.notIndexed==0 ){ + /* The real indices of the table are only considered if the + ** NOT INDEXED qualifier is omitted from the FROM clause */ + sPk.pNext = pFirst; + } + pProbe = &sPk; + } + rSize = pTab->nRowLogEst; + +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX + /* Automatic indexes */ + if( !pBuilder->pOrSet /* Not part of an OR optimization */ + && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE)==0 + && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0 + && !pSrc->fg.isIndexedBy /* Has no INDEXED BY clause */ + && !pSrc->fg.notIndexed /* Has no NOT INDEXED clause */ + && HasRowid(pTab) /* Not WITHOUT ROWID table. (FIXME: Why not?) */ + && !pSrc->fg.isCorrelated /* Not a correlated subquery */ + && !pSrc->fg.isRecursive /* Not a recursive common table expression. */ + ){ + /* Generate auto-index WhereLoops */ + LogEst rLogSize; /* Logarithm of the number of rows in the table */ + WhereTerm *pTerm; + WhereTerm *pWCEnd = pWC->a + pWC->nTerm; + rLogSize = estLog(rSize); + for(pTerm=pWC->a; rc==SQLITE_OK && pTermprereqRight & pNew->maskSelf ) continue; + if( termCanDriveIndex(pTerm, pSrc, 0) ){ + pNew->u.btree.nEq = 1; + pNew->nSkip = 0; + pNew->u.btree.pIndex = 0; + pNew->nLTerm = 1; + pNew->aLTerm[0] = pTerm; + /* TUNING: One-time cost for computing the automatic index is + ** estimated to be X*N*log2(N) where N is the number of rows in + ** the table being indexed and where X is 7 (LogEst=28) for normal + ** tables or 0.5 (LogEst=-10) for views and subqueries. The value + ** of X is smaller for views and subqueries so that the query planner + ** will be more aggressive about generating automatic indexes for + ** those objects, since there is no opportunity to add schema + ** indexes on subqueries and views. */ + pNew->rSetup = rLogSize + rSize; + if( !IsView(pTab) && (pTab->tabFlags & TF_Ephemeral)==0 ){ + pNew->rSetup += 28; + }else{ + pNew->rSetup -= 10; + } + ApplyCostMultiplier(pNew->rSetup, pTab->costMult); + if( pNew->rSetup<0 ) pNew->rSetup = 0; + /* TUNING: Each index lookup yields 20 rows in the table. This + ** is more than the usual guess of 10 rows, since we have no way + ** of knowing how selective the index will ultimately be. It would + ** not be unreasonable to make this value much larger. */ + pNew->nOut = 43; assert( 43==sqlite3LogEst(20) ); + pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut); + pNew->wsFlags = WHERE_AUTO_INDEX; + pNew->prereq = mPrereq | pTerm->prereqRight; + rc = whereLoopInsert(pBuilder, pNew); + } + } + } +#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ + + /* Loop over all indices. If there was an INDEXED BY clause, then only + ** consider index pProbe. */ + for(; rc==SQLITE_OK && pProbe; + pProbe=(pSrc->fg.isIndexedBy ? 0 : pProbe->pNext), iSortIdx++ + ){ + int isLeft = (pSrc->fg.jointype & JT_OUTER)!=0; + if( pProbe->pPartIdxWhere!=0 + && !whereUsablePartialIndex(pSrc->iCursor, isLeft, pWC, + pProbe->pPartIdxWhere) + ){ + testcase( pNew->iTab!=pSrc->iCursor ); /* See ticket [98d973b8f5] */ + continue; /* Partial index inappropriate for this query */ + } + if( pProbe->bNoQuery ) continue; + rSize = pProbe->aiRowLogEst[0]; + pNew->u.btree.nEq = 0; + pNew->u.btree.nBtm = 0; + pNew->u.btree.nTop = 0; + pNew->nSkip = 0; + pNew->nLTerm = 0; + pNew->iSortIdx = 0; + pNew->rSetup = 0; + pNew->prereq = mPrereq; + pNew->nOut = rSize; + pNew->u.btree.pIndex = pProbe; + b = indexMightHelpWithOrderBy(pBuilder, pProbe, pSrc->iCursor); + + /* The ONEPASS_DESIRED flags never occurs together with ORDER BY */ + assert( (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || b==0 ); + if( pProbe->idxType==SQLITE_IDXTYPE_IPK ){ + /* Integer primary key index */ + pNew->wsFlags = WHERE_IPK; + + /* Full table scan */ + pNew->iSortIdx = b ? iSortIdx : 0; + /* TUNING: Cost of full table scan is 3.0*N. The 3.0 factor is an + ** extra cost designed to discourage the use of full table scans, + ** since index lookups have better worst-case performance if our + ** stat guesses are wrong. Reduce the 3.0 penalty slightly + ** (to 2.75) if we have valid STAT4 information for the table. + ** At 2.75, a full table scan is preferred over using an index on + ** a column with just two distinct values where each value has about + ** an equal number of appearances. Without STAT4 data, we still want + ** to use an index in that case, since the constraint might be for + ** the scarcer of the two values, and in that case an index lookup is + ** better. + */ +#ifdef SQLITE_ENABLE_STAT4 + pNew->rRun = rSize + 16 - 2*((pTab->tabFlags & TF_HasStat4)!=0); +#else + pNew->rRun = rSize + 16; +#endif + ApplyCostMultiplier(pNew->rRun, pTab->costMult); + whereLoopOutputAdjust(pWC, pNew, rSize); + rc = whereLoopInsert(pBuilder, pNew); + pNew->nOut = rSize; + if( rc ) break; + }else{ + Bitmask m; + if( pProbe->isCovering ){ + pNew->wsFlags = WHERE_IDX_ONLY | WHERE_INDEXED; + m = 0; + }else{ + m = pSrc->colUsed & pProbe->colNotIdxed; + pNew->wsFlags = (m==0) ? (WHERE_IDX_ONLY|WHERE_INDEXED) : WHERE_INDEXED; + } + + /* Full scan via index */ + if( b + || !HasRowid(pTab) + || pProbe->pPartIdxWhere!=0 + || pSrc->fg.isIndexedBy + || ( m==0 + && pProbe->bUnordered==0 + && (pProbe->szIdxRowszTabRow) + && (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 + && sqlite3GlobalConfig.bUseCis + && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan) + ) + ){ + pNew->iSortIdx = b ? iSortIdx : 0; + + /* The cost of visiting the index rows is N*K, where K is + ** between 1.1 and 3.0, depending on the relative sizes of the + ** index and table rows. */ + pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow; + if( m!=0 ){ + /* If this is a non-covering index scan, add in the cost of + ** doing table lookups. The cost will be 3x the number of + ** lookups. Take into account WHERE clause terms that can be + ** satisfied using just the index, and that do not require a + ** table lookup. */ + LogEst nLookup = rSize + 16; /* Base cost: N*3 */ + int ii; + int iCur = pSrc->iCursor; + WhereClause *pWC2 = &pWInfo->sWC; + for(ii=0; iinTerm; ii++){ + WhereTerm *pTerm = &pWC2->a[ii]; + if( !sqlite3ExprCoveredByIndex(pTerm->pExpr, iCur, pProbe) ){ + break; + } + /* pTerm can be evaluated using just the index. So reduce + ** the expected number of table lookups accordingly */ + if( pTerm->truthProb<=0 ){ + nLookup += pTerm->truthProb; + }else{ + nLookup--; + if( pTerm->eOperator & (WO_EQ|WO_IS) ) nLookup -= 19; + } + } + + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, nLookup); + } + ApplyCostMultiplier(pNew->rRun, pTab->costMult); + whereLoopOutputAdjust(pWC, pNew, rSize); + rc = whereLoopInsert(pBuilder, pNew); + pNew->nOut = rSize; + if( rc ) break; + } + } + + pBuilder->bldFlags1 = 0; + rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0); + if( pBuilder->bldFlags1==SQLITE_BLDF1_INDEXED ){ + /* If a non-unique index is used, or if a prefix of the key for + ** unique index is used (making the index functionally non-unique) + ** then the sqlite_stat1 data becomes important for scoring the + ** plan */ + pTab->tabFlags |= TF_StatsUsed; + } +#ifdef SQLITE_ENABLE_STAT4 + sqlite3Stat4ProbeFree(pBuilder->pRec); + pBuilder->nRecValid = 0; + pBuilder->pRec = 0; +#endif + } + return rc; +} + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Return true if pTerm is a virtual table LIMIT or OFFSET term. +*/ +static int isLimitTerm(WhereTerm *pTerm){ + assert( pTerm->eOperator==WO_AUX || pTerm->eMatchOp==0 ); + return pTerm->eMatchOp>=SQLITE_INDEX_CONSTRAINT_LIMIT + && pTerm->eMatchOp<=SQLITE_INDEX_CONSTRAINT_OFFSET; +} + +/* +** Argument pIdxInfo is already populated with all constraints that may +** be used by the virtual table identified by pBuilder->pNew->iTab. This +** function marks a subset of those constraints usable, invokes the +** xBestIndex method and adds the returned plan to pBuilder. +** +** A constraint is marked usable if: +** +** * Argument mUsable indicates that its prerequisites are available, and +** +** * It is not one of the operators specified in the mExclude mask passed +** as the fourth argument (which in practice is either WO_IN or 0). +** +** Argument mPrereq is a mask of tables that must be scanned before the +** virtual table in question. These are added to the plans prerequisites +** before it is added to pBuilder. +** +** Output parameter *pbIn is set to true if the plan added to pBuilder +** uses one or more WO_IN terms, or false otherwise. +*/ +static int whereLoopAddVirtualOne( + WhereLoopBuilder *pBuilder, + Bitmask mPrereq, /* Mask of tables that must be used. */ + Bitmask mUsable, /* Mask of usable tables */ + u16 mExclude, /* Exclude terms using these operators */ + sqlite3_index_info *pIdxInfo, /* Populated object for xBestIndex */ + u16 mNoOmit, /* Do not omit these constraints */ + int *pbIn, /* OUT: True if plan uses an IN(...) op */ + int *pbRetryLimit /* OUT: Retry without LIMIT/OFFSET */ +){ + WhereClause *pWC = pBuilder->pWC; + HiddenIndexInfo *pHidden = (HiddenIndexInfo*)&pIdxInfo[1]; + struct sqlite3_index_constraint *pIdxCons; + struct sqlite3_index_constraint_usage *pUsage = pIdxInfo->aConstraintUsage; + int i; + int mxTerm; + int rc = SQLITE_OK; + WhereLoop *pNew = pBuilder->pNew; + Parse *pParse = pBuilder->pWInfo->pParse; + SrcItem *pSrc = &pBuilder->pWInfo->pTabList->a[pNew->iTab]; + int nConstraint = pIdxInfo->nConstraint; + + assert( (mUsable & mPrereq)==mPrereq ); + *pbIn = 0; + pNew->prereq = mPrereq; + + /* Set the usable flag on the subset of constraints identified by + ** arguments mUsable and mExclude. */ + pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; + for(i=0; ia[pIdxCons->iTermOffset]; + pIdxCons->usable = 0; + if( (pTerm->prereqRight & mUsable)==pTerm->prereqRight + && (pTerm->eOperator & mExclude)==0 + && (pbRetryLimit || !isLimitTerm(pTerm)) + ){ + pIdxCons->usable = 1; + } + } + + /* Initialize the output fields of the sqlite3_index_info structure */ + memset(pUsage, 0, sizeof(pUsage[0])*nConstraint); + assert( pIdxInfo->needToFreeIdxStr==0 ); + pIdxInfo->idxStr = 0; + pIdxInfo->idxNum = 0; + pIdxInfo->orderByConsumed = 0; + pIdxInfo->estimatedCost = SQLITE_BIG_DBL / (double)2; + pIdxInfo->estimatedRows = 25; + pIdxInfo->idxFlags = 0; + pIdxInfo->colUsed = (sqlite3_int64)pSrc->colUsed; + pHidden->mHandleIn = 0; + + /* Invoke the virtual table xBestIndex() method */ + rc = vtabBestIndex(pParse, pSrc->pTab, pIdxInfo); + if( rc ){ + if( rc==SQLITE_CONSTRAINT ){ + /* If the xBestIndex method returns SQLITE_CONSTRAINT, that means + ** that the particular combination of parameters provided is unusable. + ** Make no entries in the loop table. + */ + WHERETRACE(0xffff, (" ^^^^--- non-viable plan rejected!\n")); + return SQLITE_OK; + } + return rc; + } + + mxTerm = -1; + assert( pNew->nLSlot>=nConstraint ); + memset(pNew->aLTerm, 0, sizeof(pNew->aLTerm[0])*nConstraint ); + memset(&pNew->u.vtab, 0, sizeof(pNew->u.vtab)); + pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; + for(i=0; i=0 ){ + WhereTerm *pTerm; + int j = pIdxCons->iTermOffset; + if( iTerm>=nConstraint + || j<0 + || j>=pWC->nTerm + || pNew->aLTerm[iTerm]!=0 + || pIdxCons->usable==0 + ){ + sqlite3ErrorMsg(pParse,"%s.xBestIndex malfunction",pSrc->pTab->zName); + testcase( pIdxInfo->needToFreeIdxStr ); + return SQLITE_ERROR; + } + testcase( iTerm==nConstraint-1 ); + testcase( j==0 ); + testcase( j==pWC->nTerm-1 ); + pTerm = &pWC->a[j]; + pNew->prereq |= pTerm->prereqRight; + assert( iTermnLSlot ); + pNew->aLTerm[iTerm] = pTerm; + if( iTerm>mxTerm ) mxTerm = iTerm; + testcase( iTerm==15 ); + testcase( iTerm==16 ); + if( pUsage[i].omit ){ + if( i<16 && ((1<u.vtab.omitMask |= 1<eMatchOp==SQLITE_INDEX_CONSTRAINT_OFFSET ){ + pNew->u.vtab.bOmitOffset = 1; + } + } + if( SMASKBIT32(i) & pHidden->mHandleIn ){ + pNew->u.vtab.mHandleIn |= MASKBIT32(iTerm); + }else if( (pTerm->eOperator & WO_IN)!=0 ){ + /* A virtual table that is constrained by an IN clause may not + ** consume the ORDER BY clause because (1) the order of IN terms + ** is not necessarily related to the order of output terms and + ** (2) Multiple outputs from a single IN value will not merge + ** together. */ + pIdxInfo->orderByConsumed = 0; + pIdxInfo->idxFlags &= ~SQLITE_INDEX_SCAN_UNIQUE; + *pbIn = 1; assert( (mExclude & WO_IN)==0 ); + } + + if( isLimitTerm(pTerm) && *pbIn ){ + /* If there is an IN(...) term handled as an == (separate call to + ** xFilter for each value on the RHS of the IN) and a LIMIT or + ** OFFSET term handled as well, the plan is unusable. Set output + ** variable *pbRetryLimit to true to tell the caller to retry with + ** LIMIT and OFFSET disabled. */ + if( pIdxInfo->needToFreeIdxStr ){ + sqlite3_free(pIdxInfo->idxStr); + pIdxInfo->idxStr = 0; + pIdxInfo->needToFreeIdxStr = 0; + } + *pbRetryLimit = 1; + return SQLITE_OK; + } + } + } + + pNew->nLTerm = mxTerm+1; + for(i=0; i<=mxTerm; i++){ + if( pNew->aLTerm[i]==0 ){ + /* The non-zero argvIdx values must be contiguous. Raise an + ** error if they are not */ + sqlite3ErrorMsg(pParse,"%s.xBestIndex malfunction",pSrc->pTab->zName); + testcase( pIdxInfo->needToFreeIdxStr ); + return SQLITE_ERROR; + } + } + assert( pNew->nLTerm<=pNew->nLSlot ); + pNew->u.vtab.idxNum = pIdxInfo->idxNum; + pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr; + pIdxInfo->needToFreeIdxStr = 0; + pNew->u.vtab.idxStr = pIdxInfo->idxStr; + pNew->u.vtab.isOrdered = (i8)(pIdxInfo->orderByConsumed ? + pIdxInfo->nOrderBy : 0); + pNew->rSetup = 0; + pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost); + pNew->nOut = sqlite3LogEst(pIdxInfo->estimatedRows); + + /* Set the WHERE_ONEROW flag if the xBestIndex() method indicated + ** that the scan will visit at most one row. Clear it otherwise. */ + if( pIdxInfo->idxFlags & SQLITE_INDEX_SCAN_UNIQUE ){ + pNew->wsFlags |= WHERE_ONEROW; + }else{ + pNew->wsFlags &= ~WHERE_ONEROW; + } + rc = whereLoopInsert(pBuilder, pNew); + if( pNew->u.vtab.needFree ){ + sqlite3_free(pNew->u.vtab.idxStr); + pNew->u.vtab.needFree = 0; + } + WHERETRACE(0xffff, (" bIn=%d prereqIn=%04llx prereqOut=%04llx\n", + *pbIn, (sqlite3_uint64)mPrereq, + (sqlite3_uint64)(pNew->prereq & ~mPrereq))); + + return rc; +} + +/* +** Return the collating sequence for a constraint passed into xBestIndex. +** +** pIdxInfo must be an sqlite3_index_info structure passed into xBestIndex. +** This routine depends on there being a HiddenIndexInfo structure immediately +** following the sqlite3_index_info structure. +** +** Return a pointer to the collation name: +** +** 1. If there is an explicit COLLATE operator on the constaint, return it. +** +** 2. Else, if the column has an alternative collation, return that. +** +** 3. Otherwise, return "BINARY". +*/ +SQLITE_API const char *sqlite3_vtab_collation(sqlite3_index_info *pIdxInfo, int iCons){ + HiddenIndexInfo *pHidden = (HiddenIndexInfo*)&pIdxInfo[1]; + const char *zRet = 0; + if( iCons>=0 && iConsnConstraint ){ + CollSeq *pC = 0; + int iTerm = pIdxInfo->aConstraint[iCons].iTermOffset; + Expr *pX = pHidden->pWC->a[iTerm].pExpr; + if( pX->pLeft ){ + pC = sqlite3ExprCompareCollSeq(pHidden->pParse, pX); + } + zRet = (pC ? pC->zName : sqlite3StrBINARY); + } + return zRet; +} + +/* +** Return true if constraint iCons is really an IN(...) constraint, or +** false otherwise. If iCons is an IN(...) constraint, set (if bHandle!=0) +** or clear (if bHandle==0) the flag to handle it using an iterator. +*/ +SQLITE_API int sqlite3_vtab_in(sqlite3_index_info *pIdxInfo, int iCons, int bHandle){ + HiddenIndexInfo *pHidden = (HiddenIndexInfo*)&pIdxInfo[1]; + u32 m = SMASKBIT32(iCons); + if( m & pHidden->mIn ){ + if( bHandle==0 ){ + pHidden->mHandleIn &= ~m; + }else if( bHandle>0 ){ + pHidden->mHandleIn |= m; + } + return 1; + } + return 0; +} + +/* +** This interface is callable from within the xBestIndex callback only. +** +** If possible, set (*ppVal) to point to an object containing the value +** on the right-hand-side of constraint iCons. +*/ +SQLITE_API int sqlite3_vtab_rhs_value( + sqlite3_index_info *pIdxInfo, /* Copy of first argument to xBestIndex */ + int iCons, /* Constraint for which RHS is wanted */ + sqlite3_value **ppVal /* Write value extracted here */ +){ + HiddenIndexInfo *pH = (HiddenIndexInfo*)&pIdxInfo[1]; + sqlite3_value *pVal = 0; + int rc = SQLITE_OK; + if( iCons<0 || iCons>=pIdxInfo->nConstraint ){ + rc = SQLITE_MISUSE; /* EV: R-30545-25046 */ + }else{ + if( pH->aRhs[iCons]==0 ){ + WhereTerm *pTerm = &pH->pWC->a[pIdxInfo->aConstraint[iCons].iTermOffset]; + rc = sqlite3ValueFromExpr( + pH->pParse->db, pTerm->pExpr->pRight, ENC(pH->pParse->db), + SQLITE_AFF_BLOB, &pH->aRhs[iCons] + ); + testcase( rc!=SQLITE_OK ); + } + pVal = pH->aRhs[iCons]; + } + *ppVal = pVal; + + if( rc==SQLITE_OK && pVal==0 ){ /* IMP: R-19933-32160 */ + rc = SQLITE_NOTFOUND; /* IMP: R-36424-56542 */ + } + + return rc; +} + +/* +** Return true if ORDER BY clause may be handled as DISTINCT. +*/ +SQLITE_API int sqlite3_vtab_distinct(sqlite3_index_info *pIdxInfo){ + HiddenIndexInfo *pHidden = (HiddenIndexInfo*)&pIdxInfo[1]; + assert( pHidden->eDistinct==0 + || pHidden->eDistinct==1 + || pHidden->eDistinct==2 ); + return pHidden->eDistinct; +} + +#if (defined(SQLITE_ENABLE_DBPAGE_VTAB) || defined(SQLITE_TEST)) \ + && !defined(SQLITE_OMIT_VIRTUALTABLE) +/* +** Cause the prepared statement that is associated with a call to +** xBestIndex to open write transactions on all attached schemas. +** This is used by the (built-in) sqlite_dbpage virtual table. +*/ +SQLITE_PRIVATE void sqlite3VtabWriteAll(sqlite3_index_info *pIdxInfo){ + HiddenIndexInfo *pHidden = (HiddenIndexInfo*)&pIdxInfo[1]; + Parse *pParse = pHidden->pParse; + int nDb = pParse->db->nDb; + int i; + for(i=0; ipNew->iTab. That table is guaranteed to be a virtual table. +** +** If there are no LEFT or CROSS JOIN joins in the query, both mPrereq and +** mUnusable are set to 0. Otherwise, mPrereq is a mask of all FROM clause +** entries that occur before the virtual table in the FROM clause and are +** separated from it by at least one LEFT or CROSS JOIN. Similarly, the +** mUnusable mask contains all FROM clause entries that occur after the +** virtual table and are separated from it by at least one LEFT or +** CROSS JOIN. +** +** For example, if the query were: +** +** ... FROM t1, t2 LEFT JOIN t3, t4, vt CROSS JOIN t5, t6; +** +** then mPrereq corresponds to (t1, t2) and mUnusable to (t5, t6). +** +** All the tables in mPrereq must be scanned before the current virtual +** table. So any terms for which all prerequisites are satisfied by +** mPrereq may be specified as "usable" in all calls to xBestIndex. +** Conversely, all tables in mUnusable must be scanned after the current +** virtual table, so any terms for which the prerequisites overlap with +** mUnusable should always be configured as "not-usable" for xBestIndex. +*/ +static int whereLoopAddVirtual( + WhereLoopBuilder *pBuilder, /* WHERE clause information */ + Bitmask mPrereq, /* Tables that must be scanned before this one */ + Bitmask mUnusable /* Tables that must be scanned after this one */ +){ + int rc = SQLITE_OK; /* Return code */ + WhereInfo *pWInfo; /* WHERE analysis context */ + Parse *pParse; /* The parsing context */ + WhereClause *pWC; /* The WHERE clause */ + SrcItem *pSrc; /* The FROM clause term to search */ + sqlite3_index_info *p; /* Object to pass to xBestIndex() */ + int nConstraint; /* Number of constraints in p */ + int bIn; /* True if plan uses IN(...) operator */ + WhereLoop *pNew; + Bitmask mBest; /* Tables used by best possible plan */ + u16 mNoOmit; + int bRetry = 0; /* True to retry with LIMIT/OFFSET disabled */ + + assert( (mPrereq & mUnusable)==0 ); + pWInfo = pBuilder->pWInfo; + pParse = pWInfo->pParse; + pWC = pBuilder->pWC; + pNew = pBuilder->pNew; + pSrc = &pWInfo->pTabList->a[pNew->iTab]; + assert( IsVirtual(pSrc->pTab) ); + p = allocateIndexInfo(pWInfo, pWC, mUnusable, pSrc, &mNoOmit); + if( p==0 ) return SQLITE_NOMEM_BKPT; + pNew->rSetup = 0; + pNew->wsFlags = WHERE_VIRTUALTABLE; + pNew->nLTerm = 0; + pNew->u.vtab.needFree = 0; + nConstraint = p->nConstraint; + if( whereLoopResize(pParse->db, pNew, nConstraint) ){ + freeIndexInfo(pParse->db, p); + return SQLITE_NOMEM_BKPT; + } + + /* First call xBestIndex() with all constraints usable. */ + WHERETRACE(0x800, ("BEGIN %s.addVirtual()\n", pSrc->pTab->zName)); + WHERETRACE(0x40, (" VirtualOne: all usable\n")); + rc = whereLoopAddVirtualOne( + pBuilder, mPrereq, ALLBITS, 0, p, mNoOmit, &bIn, &bRetry + ); + if( bRetry ){ + assert( rc==SQLITE_OK ); + rc = whereLoopAddVirtualOne( + pBuilder, mPrereq, ALLBITS, 0, p, mNoOmit, &bIn, 0 + ); + } + + /* If the call to xBestIndex() with all terms enabled produced a plan + ** that does not require any source tables (IOW: a plan with mBest==0) + ** and does not use an IN(...) operator, then there is no point in making + ** any further calls to xBestIndex() since they will all return the same + ** result (if the xBestIndex() implementation is sane). */ + if( rc==SQLITE_OK && ((mBest = (pNew->prereq & ~mPrereq))!=0 || bIn) ){ + int seenZero = 0; /* True if a plan with no prereqs seen */ + int seenZeroNoIN = 0; /* Plan with no prereqs and no IN(...) seen */ + Bitmask mPrev = 0; + Bitmask mBestNoIn = 0; + + /* If the plan produced by the earlier call uses an IN(...) term, call + ** xBestIndex again, this time with IN(...) terms disabled. */ + if( bIn ){ + WHERETRACE(0x40, (" VirtualOne: all usable w/o IN\n")); + rc = whereLoopAddVirtualOne( + pBuilder, mPrereq, ALLBITS, WO_IN, p, mNoOmit, &bIn, 0); + assert( bIn==0 ); + mBestNoIn = pNew->prereq & ~mPrereq; + if( mBestNoIn==0 ){ + seenZero = 1; + seenZeroNoIN = 1; + } + } + + /* Call xBestIndex once for each distinct value of (prereqRight & ~mPrereq) + ** in the set of terms that apply to the current virtual table. */ + while( rc==SQLITE_OK ){ + int i; + Bitmask mNext = ALLBITS; + assert( mNext>0 ); + for(i=0; ia[p->aConstraint[i].iTermOffset].prereqRight & ~mPrereq + ); + if( mThis>mPrev && mThisprereq==mPrereq ){ + seenZero = 1; + if( bIn==0 ) seenZeroNoIN = 1; + } + } + + /* If the calls to xBestIndex() in the above loop did not find a plan + ** that requires no source tables at all (i.e. one guaranteed to be + ** usable), make a call here with all source tables disabled */ + if( rc==SQLITE_OK && seenZero==0 ){ + WHERETRACE(0x40, (" VirtualOne: all disabled\n")); + rc = whereLoopAddVirtualOne( + pBuilder, mPrereq, mPrereq, 0, p, mNoOmit, &bIn, 0); + if( bIn==0 ) seenZeroNoIN = 1; + } + + /* If the calls to xBestIndex() have so far failed to find a plan + ** that requires no source tables at all and does not use an IN(...) + ** operator, make a final call to obtain one here. */ + if( rc==SQLITE_OK && seenZeroNoIN==0 ){ + WHERETRACE(0x40, (" VirtualOne: all disabled and w/o IN\n")); + rc = whereLoopAddVirtualOne( + pBuilder, mPrereq, mPrereq, WO_IN, p, mNoOmit, &bIn, 0); + } + } + + if( p->needToFreeIdxStr ) sqlite3_free(p->idxStr); + freeIndexInfo(pParse->db, p); + WHERETRACE(0x800, ("END %s.addVirtual(), rc=%d\n", pSrc->pTab->zName, rc)); + return rc; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Add WhereLoop entries to handle OR terms. This works for either +** btrees or virtual tables. +*/ +static int whereLoopAddOr( + WhereLoopBuilder *pBuilder, + Bitmask mPrereq, + Bitmask mUnusable +){ + WhereInfo *pWInfo = pBuilder->pWInfo; + WhereClause *pWC; + WhereLoop *pNew; + WhereTerm *pTerm, *pWCEnd; + int rc = SQLITE_OK; + int iCur; + WhereClause tempWC; + WhereLoopBuilder sSubBuild; + WhereOrSet sSum, sCur; + SrcItem *pItem; + + pWC = pBuilder->pWC; + pWCEnd = pWC->a + pWC->nTerm; + pNew = pBuilder->pNew; + memset(&sSum, 0, sizeof(sSum)); + pItem = pWInfo->pTabList->a + pNew->iTab; + iCur = pItem->iCursor; + + for(pTerm=pWC->a; pTermeOperator & WO_OR)!=0 + && (pTerm->u.pOrInfo->indexable & pNew->maskSelf)!=0 + ){ + WhereClause * const pOrWC = &pTerm->u.pOrInfo->wc; + WhereTerm * const pOrWCEnd = &pOrWC->a[pOrWC->nTerm]; + WhereTerm *pOrTerm; + int once = 1; + int i, j; + + sSubBuild = *pBuilder; + sSubBuild.pOrSet = &sCur; + + WHERETRACE(0x200, ("Begin processing OR-clause %p\n", pTerm)); + for(pOrTerm=pOrWC->a; pOrTermeOperator & WO_AND)!=0 ){ + sSubBuild.pWC = &pOrTerm->u.pAndInfo->wc; + }else if( pOrTerm->leftCursor==iCur ){ + tempWC.pWInfo = pWC->pWInfo; + tempWC.pOuter = pWC; + tempWC.op = TK_AND; + tempWC.nTerm = 1; + tempWC.nBase = 1; + tempWC.a = pOrTerm; + sSubBuild.pWC = &tempWC; + }else{ + continue; + } + sCur.n = 0; +#ifdef WHERETRACE_ENABLED + WHERETRACE(0x200, ("OR-term %d of %p has %d subterms:\n", + (int)(pOrTerm-pOrWC->a), pTerm, sSubBuild.pWC->nTerm)); + if( sqlite3WhereTrace & 0x400 ){ + sqlite3WhereClausePrint(sSubBuild.pWC); + } +#endif +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pItem->pTab) ){ + rc = whereLoopAddVirtual(&sSubBuild, mPrereq, mUnusable); + }else +#endif + { + rc = whereLoopAddBtree(&sSubBuild, mPrereq); + } + if( rc==SQLITE_OK ){ + rc = whereLoopAddOr(&sSubBuild, mPrereq, mUnusable); + } + assert( rc==SQLITE_OK || rc==SQLITE_DONE || sCur.n==0 + || rc==SQLITE_NOMEM ); + testcase( rc==SQLITE_NOMEM && sCur.n>0 ); + testcase( rc==SQLITE_DONE ); + if( sCur.n==0 ){ + sSum.n = 0; + break; + }else if( once ){ + whereOrMove(&sSum, &sCur); + once = 0; + }else{ + WhereOrSet sPrev; + whereOrMove(&sPrev, &sSum); + sSum.n = 0; + for(i=0; inLTerm = 1; + pNew->aLTerm[0] = pTerm; + pNew->wsFlags = WHERE_MULTI_OR; + pNew->rSetup = 0; + pNew->iSortIdx = 0; + memset(&pNew->u, 0, sizeof(pNew->u)); + for(i=0; rc==SQLITE_OK && irRun = sSum.a[i].rRun + 1; + pNew->nOut = sSum.a[i].nOut; + pNew->prereq = sSum.a[i].prereq; + rc = whereLoopInsert(pBuilder, pNew); + } + WHERETRACE(0x200, ("End processing OR-clause %p\n", pTerm)); + } + } + return rc; +} + +/* +** Add all WhereLoop objects for all tables +*/ +static int whereLoopAddAll(WhereLoopBuilder *pBuilder){ + WhereInfo *pWInfo = pBuilder->pWInfo; + Bitmask mPrereq = 0; + Bitmask mPrior = 0; + int iTab; + SrcList *pTabList = pWInfo->pTabList; + SrcItem *pItem; + SrcItem *pEnd = &pTabList->a[pWInfo->nLevel]; + sqlite3 *db = pWInfo->pParse->db; + int rc = SQLITE_OK; + WhereLoop *pNew; + + /* Loop over the tables in the join, from left to right */ + pNew = pBuilder->pNew; + whereLoopInit(pNew); + pBuilder->iPlanLimit = SQLITE_QUERY_PLANNER_LIMIT; + for(iTab=0, pItem=pTabList->a; pItemiTab = iTab; + pBuilder->iPlanLimit += SQLITE_QUERY_PLANNER_LIMIT_INCR; + pNew->maskSelf = sqlite3WhereGetMask(&pWInfo->sMaskSet, pItem->iCursor); + if( (pItem->fg.jointype & (JT_LEFT|JT_CROSS))!=0 ){ + /* This condition is true when pItem is the FROM clause term on the + ** right-hand-side of a LEFT or CROSS JOIN. */ + mPrereq = mPrior; + }else{ + mPrereq = 0; + } +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( IsVirtual(pItem->pTab) ){ + SrcItem *p; + for(p=&pItem[1]; pfg.jointype & (JT_LEFT|JT_CROSS)) ){ + mUnusable |= sqlite3WhereGetMask(&pWInfo->sMaskSet, p->iCursor); + } + } + rc = whereLoopAddVirtual(pBuilder, mPrereq, mUnusable); + }else +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + { + rc = whereLoopAddBtree(pBuilder, mPrereq); + } + if( rc==SQLITE_OK && pBuilder->pWC->hasOr ){ + rc = whereLoopAddOr(pBuilder, mPrereq, mUnusable); + } + mPrior |= pNew->maskSelf; + if( rc || db->mallocFailed ){ + if( rc==SQLITE_DONE ){ + /* We hit the query planner search limit set by iPlanLimit */ + sqlite3_log(SQLITE_WARNING, "abbreviated query algorithm search"); + rc = SQLITE_OK; + }else{ + break; + } + } + } + + whereLoopClear(db, pNew); + return rc; +} + +/* +** Examine a WherePath (with the addition of the extra WhereLoop of the 6th +** parameters) to see if it outputs rows in the requested ORDER BY +** (or GROUP BY) without requiring a separate sort operation. Return N: +** +** N>0: N terms of the ORDER BY clause are satisfied +** N==0: No terms of the ORDER BY clause are satisfied +** N<0: Unknown yet how many terms of ORDER BY might be satisfied. +** +** Note that processing for WHERE_GROUPBY and WHERE_DISTINCTBY is not as +** strict. With GROUP BY and DISTINCT the only requirement is that +** equivalent rows appear immediately adjacent to one another. GROUP BY +** and DISTINCT do not require rows to appear in any particular order as long +** as equivalent rows are grouped together. Thus for GROUP BY and DISTINCT +** the pOrderBy terms can be matched in any order. With ORDER BY, the +** pOrderBy terms must be matched in strict left-to-right order. +*/ +static i8 wherePathSatisfiesOrderBy( + WhereInfo *pWInfo, /* The WHERE clause */ + ExprList *pOrderBy, /* ORDER BY or GROUP BY or DISTINCT clause to check */ + WherePath *pPath, /* The WherePath to check */ + u16 wctrlFlags, /* WHERE_GROUPBY or _DISTINCTBY or _ORDERBY_LIMIT */ + u16 nLoop, /* Number of entries in pPath->aLoop[] */ + WhereLoop *pLast, /* Add this WhereLoop to the end of pPath->aLoop[] */ + Bitmask *pRevMask /* OUT: Mask of WhereLoops to run in reverse order */ +){ + u8 revSet; /* True if rev is known */ + u8 rev; /* Composite sort order */ + u8 revIdx; /* Index sort order */ + u8 isOrderDistinct; /* All prior WhereLoops are order-distinct */ + u8 distinctColumns; /* True if the loop has UNIQUE NOT NULL columns */ + u8 isMatch; /* iColumn matches a term of the ORDER BY clause */ + u16 eqOpMask; /* Allowed equality operators */ + u16 nKeyCol; /* Number of key columns in pIndex */ + u16 nColumn; /* Total number of ordered columns in the index */ + u16 nOrderBy; /* Number terms in the ORDER BY clause */ + int iLoop; /* Index of WhereLoop in pPath being processed */ + int i, j; /* Loop counters */ + int iCur; /* Cursor number for current WhereLoop */ + int iColumn; /* A column number within table iCur */ + WhereLoop *pLoop = 0; /* Current WhereLoop being processed. */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ + Expr *pOBExpr; /* An expression from the ORDER BY clause */ + CollSeq *pColl; /* COLLATE function from an ORDER BY clause term */ + Index *pIndex; /* The index associated with pLoop */ + sqlite3 *db = pWInfo->pParse->db; /* Database connection */ + Bitmask obSat = 0; /* Mask of ORDER BY terms satisfied so far */ + Bitmask obDone; /* Mask of all ORDER BY terms */ + Bitmask orderDistinctMask; /* Mask of all well-ordered loops */ + Bitmask ready; /* Mask of inner loops */ + + /* + ** We say the WhereLoop is "one-row" if it generates no more than one + ** row of output. A WhereLoop is one-row if all of the following are true: + ** (a) All index columns match with WHERE_COLUMN_EQ. + ** (b) The index is unique + ** Any WhereLoop with an WHERE_COLUMN_EQ constraint on the rowid is one-row. + ** Every one-row WhereLoop will have the WHERE_ONEROW bit set in wsFlags. + ** + ** We say the WhereLoop is "order-distinct" if the set of columns from + ** that WhereLoop that are in the ORDER BY clause are different for every + ** row of the WhereLoop. Every one-row WhereLoop is automatically + ** order-distinct. A WhereLoop that has no columns in the ORDER BY clause + ** is not order-distinct. To be order-distinct is not quite the same as being + ** UNIQUE since a UNIQUE column or index can have multiple rows that + ** are NULL and NULL values are equivalent for the purpose of order-distinct. + ** To be order-distinct, the columns must be UNIQUE and NOT NULL. + ** + ** The rowid for a table is always UNIQUE and NOT NULL so whenever the + ** rowid appears in the ORDER BY clause, the corresponding WhereLoop is + ** automatically order-distinct. + */ + + assert( pOrderBy!=0 ); + if( nLoop && OptimizationDisabled(db, SQLITE_OrderByIdxJoin) ) return 0; + + nOrderBy = pOrderBy->nExpr; + testcase( nOrderBy==BMS-1 ); + if( nOrderBy>BMS-1 ) return 0; /* Cannot optimize overly large ORDER BYs */ + isOrderDistinct = 1; + obDone = MASKBIT(nOrderBy)-1; + orderDistinctMask = 0; + ready = 0; + eqOpMask = WO_EQ | WO_IS | WO_ISNULL; + if( wctrlFlags & (WHERE_ORDERBY_LIMIT|WHERE_ORDERBY_MAX|WHERE_ORDERBY_MIN) ){ + eqOpMask |= WO_IN; + } + for(iLoop=0; isOrderDistinct && obSat0 ) ready |= pLoop->maskSelf; + if( iLoopaLoop[iLoop]; + if( wctrlFlags & WHERE_ORDERBY_LIMIT ) continue; + }else{ + pLoop = pLast; + } + if( pLoop->wsFlags & WHERE_VIRTUALTABLE ){ + if( pLoop->u.vtab.isOrdered && (wctrlFlags & WHERE_DISTINCTBY)==0 ){ + obSat = obDone; + } + break; + }else if( wctrlFlags & WHERE_DISTINCTBY ){ + pLoop->u.btree.nDistinctCol = 0; + } + iCur = pWInfo->pTabList->a[pLoop->iTab].iCursor; + + /* Mark off any ORDER BY term X that is a column in the table of + ** the current loop for which there is term in the WHERE + ** clause of the form X IS NULL or X=? that reference only outer + ** loops. + */ + for(i=0; ia[i].pExpr); + if( NEVER(pOBExpr==0) ) continue; + if( pOBExpr->op!=TK_COLUMN && pOBExpr->op!=TK_AGG_COLUMN ) continue; + if( pOBExpr->iTable!=iCur ) continue; + pTerm = sqlite3WhereFindTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn, + ~ready, eqOpMask, 0); + if( pTerm==0 ) continue; + if( pTerm->eOperator==WO_IN ){ + /* IN terms are only valid for sorting in the ORDER BY LIMIT + ** optimization, and then only if they are actually used + ** by the query plan */ + assert( wctrlFlags & + (WHERE_ORDERBY_LIMIT|WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX) ); + for(j=0; jnLTerm && pTerm!=pLoop->aLTerm[j]; j++){} + if( j>=pLoop->nLTerm ) continue; + } + if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0 && pOBExpr->iColumn>=0 ){ + Parse *pParse = pWInfo->pParse; + CollSeq *pColl1 = sqlite3ExprNNCollSeq(pParse, pOrderBy->a[i].pExpr); + CollSeq *pColl2 = sqlite3ExprCompareCollSeq(pParse, pTerm->pExpr); + assert( pColl1 ); + if( pColl2==0 || sqlite3StrICmp(pColl1->zName, pColl2->zName) ){ + continue; + } + testcase( pTerm->pExpr->op==TK_IS ); + } + obSat |= MASKBIT(i); + } + + if( (pLoop->wsFlags & WHERE_ONEROW)==0 ){ + if( pLoop->wsFlags & WHERE_IPK ){ + pIndex = 0; + nKeyCol = 0; + nColumn = 1; + }else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered ){ + return 0; + }else{ + nKeyCol = pIndex->nKeyCol; + nColumn = pIndex->nColumn; + assert( nColumn==nKeyCol+1 || !HasRowid(pIndex->pTable) ); + assert( pIndex->aiColumn[nColumn-1]==XN_ROWID + || !HasRowid(pIndex->pTable)); + /* All relevant terms of the index must also be non-NULL in order + ** for isOrderDistinct to be true. So the isOrderDistint value + ** computed here might be a false positive. Corrections will be + ** made at tag-20210426-1 below */ + isOrderDistinct = IsUniqueIndex(pIndex) + && (pLoop->wsFlags & WHERE_SKIPSCAN)==0; + } + + /* Loop through all columns of the index and deal with the ones + ** that are not constrained by == or IN. + */ + rev = revSet = 0; + distinctColumns = 0; + for(j=0; j=pLoop->u.btree.nEq + || (pLoop->aLTerm[j]==0)==(jnSkip) + ); + if( ju.btree.nEq && j>=pLoop->nSkip ){ + u16 eOp = pLoop->aLTerm[j]->eOperator; + + /* Skip over == and IS and ISNULL terms. (Also skip IN terms when + ** doing WHERE_ORDERBY_LIMIT processing). Except, IS and ISNULL + ** terms imply that the index is not UNIQUE NOT NULL in which case + ** the loop need to be marked as not order-distinct because it can + ** have repeated NULL rows. + ** + ** If the current term is a column of an ((?,?) IN (SELECT...)) + ** expression for which the SELECT returns more than one column, + ** check that it is the only column used by this loop. Otherwise, + ** if it is one of two or more, none of the columns can be + ** considered to match an ORDER BY term. + */ + if( (eOp & eqOpMask)!=0 ){ + if( eOp & (WO_ISNULL|WO_IS) ){ + testcase( eOp & WO_ISNULL ); + testcase( eOp & WO_IS ); + testcase( isOrderDistinct ); + isOrderDistinct = 0; + } + continue; + }else if( ALWAYS(eOp & WO_IN) ){ + /* ALWAYS() justification: eOp is an equality operator due to the + ** ju.btree.nEq constraint above. Any equality other + ** than WO_IN is captured by the previous "if". So this one + ** always has to be WO_IN. */ + Expr *pX = pLoop->aLTerm[j]->pExpr; + for(i=j+1; iu.btree.nEq; i++){ + if( pLoop->aLTerm[i]->pExpr==pX ){ + assert( (pLoop->aLTerm[i]->eOperator & WO_IN) ); + bOnce = 0; + break; + } + } + } + } + + /* Get the column number in the table (iColumn) and sort order + ** (revIdx) for the j-th column of the index. + */ + if( pIndex ){ + iColumn = pIndex->aiColumn[j]; + revIdx = pIndex->aSortOrder[j] & KEYINFO_ORDER_DESC; + if( iColumn==pIndex->pTable->iPKey ) iColumn = XN_ROWID; + }else{ + iColumn = XN_ROWID; + revIdx = 0; + } + + /* An unconstrained column that might be NULL means that this + ** WhereLoop is not well-ordered. tag-20210426-1 + */ + if( isOrderDistinct ){ + if( iColumn>=0 + && j>=pLoop->u.btree.nEq + && pIndex->pTable->aCol[iColumn].notNull==0 + ){ + isOrderDistinct = 0; + } + if( iColumn==XN_EXPR ){ + isOrderDistinct = 0; + } + } + + /* Find the ORDER BY term that corresponds to the j-th column + ** of the index and mark that ORDER BY term off + */ + isMatch = 0; + for(i=0; bOnce && ia[i].pExpr); + testcase( wctrlFlags & WHERE_GROUPBY ); + testcase( wctrlFlags & WHERE_DISTINCTBY ); + if( NEVER(pOBExpr==0) ) continue; + if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0; + if( iColumn>=XN_ROWID ){ + if( pOBExpr->op!=TK_COLUMN && pOBExpr->op!=TK_AGG_COLUMN ) continue; + if( pOBExpr->iTable!=iCur ) continue; + if( pOBExpr->iColumn!=iColumn ) continue; + }else{ + Expr *pIdxExpr = pIndex->aColExpr->a[j].pExpr; + if( sqlite3ExprCompareSkip(pOBExpr, pIdxExpr, iCur) ){ + continue; + } + } + if( iColumn!=XN_ROWID ){ + pColl = sqlite3ExprNNCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr); + if( sqlite3StrICmp(pColl->zName, pIndex->azColl[j])!=0 ) continue; + } + if( wctrlFlags & WHERE_DISTINCTBY ){ + pLoop->u.btree.nDistinctCol = j+1; + } + isMatch = 1; + break; + } + if( isMatch && (wctrlFlags & WHERE_GROUPBY)==0 ){ + /* Make sure the sort order is compatible in an ORDER BY clause. + ** Sort order is irrelevant for a GROUP BY clause. */ + if( revSet ){ + if( (rev ^ revIdx)!=(pOrderBy->a[i].sortFlags&KEYINFO_ORDER_DESC) ){ + isMatch = 0; + } + }else{ + rev = revIdx ^ (pOrderBy->a[i].sortFlags & KEYINFO_ORDER_DESC); + if( rev ) *pRevMask |= MASKBIT(iLoop); + revSet = 1; + } + } + if( isMatch && (pOrderBy->a[i].sortFlags & KEYINFO_ORDER_BIGNULL) ){ + if( j==pLoop->u.btree.nEq ){ + pLoop->wsFlags |= WHERE_BIGNULL_SORT; + }else{ + isMatch = 0; + } + } + if( isMatch ){ + if( iColumn==XN_ROWID ){ + testcase( distinctColumns==0 ); + distinctColumns = 1; + } + obSat |= MASKBIT(i); + }else{ + /* No match found */ + if( j==0 || jmaskSelf; + for(i=0; ia[i].pExpr; + mTerm = sqlite3WhereExprUsage(&pWInfo->sMaskSet,p); + if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue; + if( (mTerm&~orderDistinctMask)==0 ){ + obSat |= MASKBIT(i); + } + } + } + } /* End the loop over all WhereLoops from outer-most down to inner-most */ + if( obSat==obDone ) return (i8)nOrderBy; + if( !isOrderDistinct ){ + for(i=nOrderBy-1; i>0; i--){ + Bitmask m = ALWAYS(iwctrlFlags & WHERE_GROUPBY ); + assert( pWInfo->wctrlFlags & WHERE_SORTBYGROUP ); + return pWInfo->sorted; +} + +#ifdef WHERETRACE_ENABLED +/* For debugging use only: */ +static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){ + static char zName[65]; + int i; + for(i=0; iaLoop[i]->cId; } + if( pLast ) zName[i++] = pLast->cId; + zName[i] = 0; + return zName; +} +#endif + +/* +** Return the cost of sorting nRow rows, assuming that the keys have +** nOrderby columns and that the first nSorted columns are already in +** order. +*/ +static LogEst whereSortingCost( + WhereInfo *pWInfo, + LogEst nRow, + int nOrderBy, + int nSorted +){ + /* TUNING: Estimated cost of a full external sort, where N is + ** the number of rows to sort is: + ** + ** cost = (3.0 * N * log(N)). + ** + ** Or, if the order-by clause has X terms but only the last Y + ** terms are out of order, then block-sorting will reduce the + ** sorting cost to: + ** + ** cost = (3.0 * N * log(N)) * (Y/X) + ** + ** The (Y/X) term is implemented using stack variable rScale + ** below. + */ + LogEst rScale, rSortCost; + assert( nOrderBy>0 && 66==sqlite3LogEst(100) ); + rScale = sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66; + rSortCost = nRow + rScale + 16; + + /* Multiple by log(M) where M is the number of output rows. + ** Use the LIMIT for M if it is smaller. Or if this sort is for + ** a DISTINCT operator, M will be the number of distinct output + ** rows, so fudge it downwards a bit. + */ + if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 && pWInfo->iLimitiLimit; + }else if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT) ){ + /* TUNING: In the sort for a DISTINCT operator, assume that the DISTINCT + ** reduces the number of output rows by a factor of 2 */ + if( nRow>10 ){ nRow -= 10; assert( 10==sqlite3LogEst(2) ); } + } + rSortCost += estLog(nRow); + return rSortCost; +} + +/* +** Given the list of WhereLoop objects at pWInfo->pLoops, this routine +** attempts to find the lowest cost path that visits each WhereLoop +** once. This path is then loaded into the pWInfo->a[].pWLoop fields. +** +** Assume that the total number of output rows that will need to be sorted +** will be nRowEst (in the 10*log2 representation). Or, ignore sorting +** costs if nRowEst==0. +** +** Return SQLITE_OK on success or SQLITE_NOMEM of a memory allocation +** error occurs. +*/ +static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ + int mxChoice; /* Maximum number of simultaneous paths tracked */ + int nLoop; /* Number of terms in the join */ + Parse *pParse; /* Parsing context */ + sqlite3 *db; /* The database connection */ + int iLoop; /* Loop counter over the terms of the join */ + int ii, jj; /* Loop counters */ + int mxI = 0; /* Index of next entry to replace */ + int nOrderBy; /* Number of ORDER BY clause terms */ + LogEst mxCost = 0; /* Maximum cost of a set of paths */ + LogEst mxUnsorted = 0; /* Maximum unsorted cost of a set of path */ + int nTo, nFrom; /* Number of valid entries in aTo[] and aFrom[] */ + WherePath *aFrom; /* All nFrom paths at the previous level */ + WherePath *aTo; /* The nTo best paths at the current level */ + WherePath *pFrom; /* An element of aFrom[] that we are working on */ + WherePath *pTo; /* An element of aTo[] that we are working on */ + WhereLoop *pWLoop; /* One of the WhereLoop objects */ + WhereLoop **pX; /* Used to divy up the pSpace memory */ + LogEst *aSortCost = 0; /* Sorting and partial sorting costs */ + char *pSpace; /* Temporary memory used by this routine */ + int nSpace; /* Bytes of space allocated at pSpace */ + + pParse = pWInfo->pParse; + db = pParse->db; + nLoop = pWInfo->nLevel; + /* TUNING: For simple queries, only the best path is tracked. + ** For 2-way joins, the 5 best paths are followed. + ** For joins of 3 or more tables, track the 10 best paths */ + mxChoice = (nLoop<=1) ? 1 : (nLoop==2 ? 5 : 10); + assert( nLoop<=pWInfo->pTabList->nSrc ); + WHERETRACE(0x002, ("---- begin solver. (nRowEst=%d)\n", nRowEst)); + + /* If nRowEst is zero and there is an ORDER BY clause, ignore it. In this + ** case the purpose of this call is to estimate the number of rows returned + ** by the overall query. Once this estimate has been obtained, the caller + ** will invoke this function a second time, passing the estimate as the + ** nRowEst parameter. */ + if( pWInfo->pOrderBy==0 || nRowEst==0 ){ + nOrderBy = 0; + }else{ + nOrderBy = pWInfo->pOrderBy->nExpr; + } + + /* Allocate and initialize space for aTo, aFrom and aSortCost[] */ + nSpace = (sizeof(WherePath)+sizeof(WhereLoop*)*nLoop)*mxChoice*2; + nSpace += sizeof(LogEst) * nOrderBy; + pSpace = sqlite3DbMallocRawNN(db, nSpace); + if( pSpace==0 ) return SQLITE_NOMEM_BKPT; + aTo = (WherePath*)pSpace; + aFrom = aTo+mxChoice; + memset(aFrom, 0, sizeof(aFrom[0])); + pX = (WhereLoop**)(aFrom+mxChoice); + for(ii=mxChoice*2, pFrom=aTo; ii>0; ii--, pFrom++, pX += nLoop){ + pFrom->aLoop = pX; + } + if( nOrderBy ){ + /* If there is an ORDER BY clause and it is not being ignored, set up + ** space for the aSortCost[] array. Each element of the aSortCost array + ** is either zero - meaning it has not yet been initialized - or the + ** cost of sorting nRowEst rows of data where the first X terms of + ** the ORDER BY clause are already in order, where X is the array + ** index. */ + aSortCost = (LogEst*)pX; + memset(aSortCost, 0, sizeof(LogEst) * nOrderBy); + } + assert( aSortCost==0 || &pSpace[nSpace]==(char*)&aSortCost[nOrderBy] ); + assert( aSortCost!=0 || &pSpace[nSpace]==(char*)pX ); + + /* Seed the search with a single WherePath containing zero WhereLoops. + ** + ** TUNING: Do not let the number of iterations go above 28. If the cost + ** of computing an automatic index is not paid back within the first 28 + ** rows, then do not use the automatic index. */ + aFrom[0].nRow = MIN(pParse->nQueryLoop, 48); assert( 48==sqlite3LogEst(28) ); + nFrom = 1; + assert( aFrom[0].isOrdered==0 ); + if( nOrderBy ){ + /* If nLoop is zero, then there are no FROM terms in the query. Since + ** in this case the query may return a maximum of one row, the results + ** are already in the requested order. Set isOrdered to nOrderBy to + ** indicate this. Or, if nLoop is greater than zero, set isOrdered to + ** -1, indicating that the result set may or may not be ordered, + ** depending on the loops added to the current plan. */ + aFrom[0].isOrdered = nLoop>0 ? -1 : nOrderBy; + } + + /* Compute successively longer WherePaths using the previous generation + ** of WherePaths as the basis for the next. Keep track of the mxChoice + ** best paths at each generation */ + for(iLoop=0; iLooppLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ + LogEst nOut; /* Rows visited by (pFrom+pWLoop) */ + LogEst rCost; /* Cost of path (pFrom+pWLoop) */ + LogEst rUnsorted; /* Unsorted cost of (pFrom+pWLoop) */ + i8 isOrdered = pFrom->isOrdered; /* isOrdered for (pFrom+pWLoop) */ + Bitmask maskNew; /* Mask of src visited by (..) */ + Bitmask revMask = 0; /* Mask of rev-order loops for (..) */ + + if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue; + if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue; + if( (pWLoop->wsFlags & WHERE_AUTO_INDEX)!=0 && pFrom->nRow<3 ){ + /* Do not use an automatic index if the this loop is expected + ** to run less than 1.25 times. It is tempting to also exclude + ** automatic index usage on an outer loop, but sometimes an automatic + ** index is useful in the outer loop of a correlated subquery. */ + assert( 10==sqlite3LogEst(2) ); + continue; + } + + /* At this point, pWLoop is a candidate to be the next loop. + ** Compute its cost */ + rUnsorted = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow); + rUnsorted = sqlite3LogEstAdd(rUnsorted, pFrom->rUnsorted); + nOut = pFrom->nRow + pWLoop->nOut; + maskNew = pFrom->maskLoop | pWLoop->maskSelf; + if( isOrdered<0 ){ + isOrdered = wherePathSatisfiesOrderBy(pWInfo, + pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags, + iLoop, pWLoop, &revMask); + }else{ + revMask = pFrom->revLoop; + } + if( isOrdered>=0 && isOrderedisOrdered^isOrdered)&0x80)==0" is equivalent + ** to (pTo->isOrdered==(-1))==(isOrdered==(-1))" for the range + ** of legal values for isOrdered, -1..64. + */ + for(jj=0, pTo=aTo; jjmaskLoop==maskNew + && ((pTo->isOrdered^isOrdered)&0x80)==0 + ){ + testcase( jj==nTo-1 ); + break; + } + } + if( jj>=nTo ){ + /* None of the existing best-so-far paths match the candidate. */ + if( nTo>=mxChoice + && (rCost>mxCost || (rCost==mxCost && rUnsorted>=mxUnsorted)) + ){ + /* The current candidate is no better than any of the mxChoice + ** paths currently in the best-so-far buffer. So discard + ** this candidate as not viable. */ +#ifdef WHERETRACE_ENABLED /* 0x4 */ + if( sqlite3WhereTrace&0x4 ){ + sqlite3DebugPrintf("Skip %s cost=%-3d,%3d,%3d order=%c\n", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, rUnsorted, + isOrdered>=0 ? isOrdered+'0' : '?'); + } +#endif + continue; + } + /* If we reach this points it means that the new candidate path + ** needs to be added to the set of best-so-far paths. */ + if( nTo=0 ? isOrdered+'0' : '?'); + } +#endif + }else{ + /* Control reaches here if best-so-far path pTo=aTo[jj] covers the + ** same set of loops and has the same isOrdered setting as the + ** candidate path. Check to see if the candidate should replace + ** pTo or if the candidate should be skipped. + ** + ** The conditional is an expanded vector comparison equivalent to: + ** (pTo->rCost,pTo->nRow,pTo->rUnsorted) <= (rCost,nOut,rUnsorted) + */ + if( pTo->rCostrCost==rCost + && (pTo->nRownRow==nOut && pTo->rUnsorted<=rUnsorted) + ) + ) + ){ +#ifdef WHERETRACE_ENABLED /* 0x4 */ + if( sqlite3WhereTrace&0x4 ){ + sqlite3DebugPrintf( + "Skip %s cost=%-3d,%3d,%3d order=%c", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, rUnsorted, + isOrdered>=0 ? isOrdered+'0' : '?'); + sqlite3DebugPrintf(" vs %s cost=%-3d,%3d,%3d order=%c\n", + wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, + pTo->rUnsorted, pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?'); + } +#endif + /* Discard the candidate path from further consideration */ + testcase( pTo->rCost==rCost ); + continue; + } + testcase( pTo->rCost==rCost+1 ); + /* Control reaches here if the candidate path is better than the + ** pTo path. Replace pTo with the candidate. */ +#ifdef WHERETRACE_ENABLED /* 0x4 */ + if( sqlite3WhereTrace&0x4 ){ + sqlite3DebugPrintf( + "Update %s cost=%-3d,%3d,%3d order=%c", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, rUnsorted, + isOrdered>=0 ? isOrdered+'0' : '?'); + sqlite3DebugPrintf(" was %s cost=%-3d,%3d,%3d order=%c\n", + wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, + pTo->rUnsorted, pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?'); + } +#endif + } + /* pWLoop is a winner. Add it to the set of best so far */ + pTo->maskLoop = pFrom->maskLoop | pWLoop->maskSelf; + pTo->revLoop = revMask; + pTo->nRow = nOut; + pTo->rCost = rCost; + pTo->rUnsorted = rUnsorted; + pTo->isOrdered = isOrdered; + memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop); + pTo->aLoop[iLoop] = pWLoop; + if( nTo>=mxChoice ){ + mxI = 0; + mxCost = aTo[0].rCost; + mxUnsorted = aTo[0].nRow; + for(jj=1, pTo=&aTo[1]; jjrCost>mxCost + || (pTo->rCost==mxCost && pTo->rUnsorted>mxUnsorted) + ){ + mxCost = pTo->rCost; + mxUnsorted = pTo->rUnsorted; + mxI = jj; + } + } + } + } + } + +#ifdef WHERETRACE_ENABLED /* >=2 */ + if( sqlite3WhereTrace & 0x02 ){ + sqlite3DebugPrintf("---- after round %d ----\n", iLoop); + for(ii=0, pTo=aTo; iirCost, pTo->nRow, + pTo->isOrdered>=0 ? (pTo->isOrdered+'0') : '?'); + if( pTo->isOrdered>0 ){ + sqlite3DebugPrintf(" rev=0x%llx\n", pTo->revLoop); + }else{ + sqlite3DebugPrintf("\n"); + } + } + } +#endif + + /* Swap the roles of aFrom and aTo for the next generation */ + pFrom = aTo; + aTo = aFrom; + aFrom = pFrom; + nFrom = nTo; + } + + if( nFrom==0 ){ + sqlite3ErrorMsg(pParse, "no query solution"); + sqlite3DbFreeNN(db, pSpace); + return SQLITE_ERROR; + } + + /* Find the lowest cost path. pFrom will be left pointing to that path */ + pFrom = aFrom; + for(ii=1; iirCost>aFrom[ii].rCost ) pFrom = &aFrom[ii]; + } + assert( pWInfo->nLevel==nLoop ); + /* Load the lowest cost path into pWInfo */ + for(iLoop=0; iLoopa + iLoop; + pLevel->pWLoop = pWLoop = pFrom->aLoop[iLoop]; + pLevel->iFrom = pWLoop->iTab; + pLevel->iTabCur = pWInfo->pTabList->a[pLevel->iFrom].iCursor; + } + if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT)!=0 + && (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0 + && pWInfo->eDistinct==WHERE_DISTINCT_NOOP + && nRowEst + ){ + Bitmask notUsed; + int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pResultSet, pFrom, + WHERE_DISTINCTBY, nLoop-1, pFrom->aLoop[nLoop-1], ¬Used); + if( rc==pWInfo->pResultSet->nExpr ){ + pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; + } + } + pWInfo->bOrderedInnerLoop = 0; + if( pWInfo->pOrderBy ){ + if( pWInfo->wctrlFlags & WHERE_DISTINCTBY ){ + if( pFrom->isOrdered==pWInfo->pOrderBy->nExpr ){ + pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; + } + }else{ + pWInfo->nOBSat = pFrom->isOrdered; + pWInfo->revMask = pFrom->revLoop; + if( pWInfo->nOBSat<=0 ){ + pWInfo->nOBSat = 0; + if( nLoop>0 ){ + u32 wsFlags = pFrom->aLoop[nLoop-1]->wsFlags; + if( (wsFlags & WHERE_ONEROW)==0 + && (wsFlags&(WHERE_IPK|WHERE_COLUMN_IN))!=(WHERE_IPK|WHERE_COLUMN_IN) + ){ + Bitmask m = 0; + int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, pFrom, + WHERE_ORDERBY_LIMIT, nLoop-1, pFrom->aLoop[nLoop-1], &m); + testcase( wsFlags & WHERE_IPK ); + testcase( wsFlags & WHERE_COLUMN_IN ); + if( rc==pWInfo->pOrderBy->nExpr ){ + pWInfo->bOrderedInnerLoop = 1; + pWInfo->revMask = m; + } + } + } + }else if( nLoop + && pWInfo->nOBSat==1 + && (pWInfo->wctrlFlags & (WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX))!=0 + ){ + pWInfo->bOrderedInnerLoop = 1; + } + } + if( (pWInfo->wctrlFlags & WHERE_SORTBYGROUP) + && pWInfo->nOBSat==pWInfo->pOrderBy->nExpr && nLoop>0 + ){ + Bitmask revMask = 0; + int nOrder = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, + pFrom, 0, nLoop-1, pFrom->aLoop[nLoop-1], &revMask + ); + assert( pWInfo->sorted==0 ); + if( nOrder==pWInfo->pOrderBy->nExpr ){ + pWInfo->sorted = 1; + pWInfo->revMask = revMask; + } + } + } + + + pWInfo->nRowOut = pFrom->nRow; + + /* Free temporary memory and return success */ + sqlite3DbFreeNN(db, pSpace); + return SQLITE_OK; +} + +/* +** Most queries use only a single table (they are not joins) and have +** simple == constraints against indexed fields. This routine attempts +** to plan those simple cases using much less ceremony than the +** general-purpose query planner, and thereby yield faster sqlite3_prepare() +** times for the common case. +** +** Return non-zero on success, if this query can be handled by this +** no-frills query planner. Return zero if this query needs the +** general-purpose query planner. +*/ +static int whereShortCut(WhereLoopBuilder *pBuilder){ + WhereInfo *pWInfo; + SrcItem *pItem; + WhereClause *pWC; + WhereTerm *pTerm; + WhereLoop *pLoop; + int iCur; + int j; + Table *pTab; + Index *pIdx; + WhereScan scan; + + pWInfo = pBuilder->pWInfo; + if( pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE ) return 0; + assert( pWInfo->pTabList->nSrc>=1 ); + pItem = pWInfo->pTabList->a; + pTab = pItem->pTab; + if( IsVirtual(pTab) ) return 0; + if( pItem->fg.isIndexedBy ) return 0; + iCur = pItem->iCursor; + pWC = &pWInfo->sWC; + pLoop = pBuilder->pNew; + pLoop->wsFlags = 0; + pLoop->nSkip = 0; + pTerm = whereScanInit(&scan, pWC, iCur, -1, WO_EQ|WO_IS, 0); + while( pTerm && pTerm->prereqRight ) pTerm = whereScanNext(&scan); + if( pTerm ){ + testcase( pTerm->eOperator & WO_IS ); + pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW; + pLoop->aLTerm[0] = pTerm; + pLoop->nLTerm = 1; + pLoop->u.btree.nEq = 1; + /* TUNING: Cost of a rowid lookup is 10 */ + pLoop->rRun = 33; /* 33==sqlite3LogEst(10) */ + }else{ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int opMask; + assert( pLoop->aLTermSpace==pLoop->aLTerm ); + if( !IsUniqueIndex(pIdx) + || pIdx->pPartIdxWhere!=0 + || pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace) + ) continue; + opMask = pIdx->uniqNotNull ? (WO_EQ|WO_IS) : WO_EQ; + for(j=0; jnKeyCol; j++){ + pTerm = whereScanInit(&scan, pWC, iCur, j, opMask, pIdx); + while( pTerm && pTerm->prereqRight ) pTerm = whereScanNext(&scan); + if( pTerm==0 ) break; + testcase( pTerm->eOperator & WO_IS ); + pLoop->aLTerm[j] = pTerm; + } + if( j!=pIdx->nKeyCol ) continue; + pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_ONEROW|WHERE_INDEXED; + if( pIdx->isCovering || (pItem->colUsed & pIdx->colNotIdxed)==0 ){ + pLoop->wsFlags |= WHERE_IDX_ONLY; + } + pLoop->nLTerm = j; + pLoop->u.btree.nEq = j; + pLoop->u.btree.pIndex = pIdx; + /* TUNING: Cost of a unique index lookup is 15 */ + pLoop->rRun = 39; /* 39==sqlite3LogEst(15) */ + break; + } + } + if( pLoop->wsFlags ){ + pLoop->nOut = (LogEst)1; + pWInfo->a[0].pWLoop = pLoop; + assert( pWInfo->sMaskSet.n==1 && iCur==pWInfo->sMaskSet.ix[0] ); + pLoop->maskSelf = 1; /* sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur); */ + pWInfo->a[0].iTabCur = iCur; + pWInfo->nRowOut = 1; + if( pWInfo->pOrderBy ) pWInfo->nOBSat = pWInfo->pOrderBy->nExpr; + if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){ + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + } + if( scan.iEquiv>1 ) pLoop->wsFlags |= WHERE_TRANSCONS; +#ifdef SQLITE_DEBUG + pLoop->cId = '0'; +#endif +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace ){ + sqlite3DebugPrintf("whereShortCut() used to compute solution\n"); + } +#endif + return 1; + } + return 0; +} + +/* +** Helper function for exprIsDeterministic(). +*/ +static int exprNodeIsDeterministic(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_FUNCTION && ExprHasProperty(pExpr, EP_ConstFunc)==0 ){ + pWalker->eCode = 0; + return WRC_Abort; + } + return WRC_Continue; +} + +/* +** Return true if the expression contains no non-deterministic SQL +** functions. Do not consider non-deterministic SQL functions that are +** part of sub-select statements. +*/ +static int exprIsDeterministic(Expr *p){ + Walker w; + memset(&w, 0, sizeof(w)); + w.eCode = 1; + w.xExprCallback = exprNodeIsDeterministic; + w.xSelectCallback = sqlite3SelectWalkFail; + sqlite3WalkExpr(&w, p); + return w.eCode; +} + + +#ifdef WHERETRACE_ENABLED +/* +** Display all WhereLoops in pWInfo +*/ +static void showAllWhereLoops(WhereInfo *pWInfo, WhereClause *pWC){ + if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ + WhereLoop *p; + int i; + static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" + "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; + for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ + p->cId = zLabel[i%(sizeof(zLabel)-1)]; + sqlite3WhereLoopPrint(p, pWC); + } + } +} +# define WHERETRACE_ALL_LOOPS(W,C) showAllWhereLoops(W,C) +#else +# define WHERETRACE_ALL_LOOPS(W,C) +#endif + +/* Attempt to omit tables from a join that do not affect the result. +** For a table to not affect the result, the following must be true: +** +** 1) The query must not be an aggregate. +** 2) The table must be the RHS of a LEFT JOIN. +** 3) Either the query must be DISTINCT, or else the ON or USING clause +** must contain a constraint that limits the scan of the table to +** at most a single row. +** 4) The table must not be referenced by any part of the query apart +** from its own USING or ON clause. +** +** For example, given: +** +** CREATE TABLE t1(ipk INTEGER PRIMARY KEY, v1); +** CREATE TABLE t2(ipk INTEGER PRIMARY KEY, v2); +** CREATE TABLE t3(ipk INTEGER PRIMARY KEY, v3); +** +** then table t2 can be omitted from the following: +** +** SELECT v1, v3 FROM t1 +** LEFT JOIN t2 ON (t1.ipk=t2.ipk) +** LEFT JOIN t3 ON (t1.ipk=t3.ipk) +** +** or from: +** +** SELECT DISTINCT v1, v3 FROM t1 +** LEFT JOIN t2 +** LEFT JOIN t3 ON (t1.ipk=t3.ipk) +*/ +static SQLITE_NOINLINE Bitmask whereOmitNoopJoin( + WhereInfo *pWInfo, + Bitmask notReady +){ + int i; + Bitmask tabUsed; + + /* Preconditions checked by the caller */ + assert( pWInfo->nLevel>=2 ); + assert( OptimizationEnabled(pWInfo->pParse->db, SQLITE_OmitNoopJoin) ); + + /* These two preconditions checked by the caller combine to guarantee + ** condition (1) of the header comment */ + assert( pWInfo->pResultSet!=0 ); + assert( 0==(pWInfo->wctrlFlags & WHERE_AGG_DISTINCT) ); + + tabUsed = sqlite3WhereExprListUsage(&pWInfo->sMaskSet, pWInfo->pResultSet); + if( pWInfo->pOrderBy ){ + tabUsed |= sqlite3WhereExprListUsage(&pWInfo->sMaskSet, pWInfo->pOrderBy); + } + for(i=pWInfo->nLevel-1; i>=1; i--){ + WhereTerm *pTerm, *pEnd; + SrcItem *pItem; + WhereLoop *pLoop; + pLoop = pWInfo->a[i].pWLoop; + pItem = &pWInfo->pTabList->a[pLoop->iTab]; + if( (pItem->fg.jointype & JT_LEFT)==0 ) continue; + if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT)==0 + && (pLoop->wsFlags & WHERE_ONEROW)==0 + ){ + continue; + } + if( (tabUsed & pLoop->maskSelf)!=0 ) continue; + pEnd = pWInfo->sWC.a + pWInfo->sWC.nTerm; + for(pTerm=pWInfo->sWC.a; pTermprereqAll & pLoop->maskSelf)!=0 ){ + if( !ExprHasProperty(pTerm->pExpr, EP_FromJoin) + || pTerm->pExpr->w.iRightJoinTable!=pItem->iCursor + ){ + break; + } + } + } + if( pTerm drop loop %c not used\n", pLoop->cId)); + notReady &= ~pLoop->maskSelf; + for(pTerm=pWInfo->sWC.a; pTermprereqAll & pLoop->maskSelf)!=0 ){ + pTerm->wtFlags |= TERM_CODED; + } + } + if( i!=pWInfo->nLevel-1 ){ + int nByte = (pWInfo->nLevel-1-i) * sizeof(WhereLevel); + memmove(&pWInfo->a[i], &pWInfo->a[i+1], nByte); + } + pWInfo->nLevel--; + assert( pWInfo->nLevel>0 ); + } + return notReady; +} + +/* +** Check to see if there are any SEARCH loops that might benefit from +** using a Bloom filter. Consider a Bloom filter if: +** +** (1) The SEARCH happens more than N times where N is the number +** of rows in the table that is being considered for the Bloom +** filter. +** (2) Some searches are expected to find zero rows. (This is determined +** by the WHERE_SELFCULL flag on the term.) +** (3) Bloom-filter processing is not disabled. (Checked by the +** caller.) +** (4) The size of the table being searched is known by ANALYZE. +** +** This block of code merely checks to see if a Bloom filter would be +** appropriate, and if so sets the WHERE_BLOOMFILTER flag on the +** WhereLoop. The implementation of the Bloom filter comes further +** down where the code for each WhereLoop is generated. +*/ +static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( + const WhereInfo *pWInfo +){ + int i; + LogEst nSearch; + + assert( pWInfo->nLevel>=2 ); + assert( OptimizationEnabled(pWInfo->pParse->db, SQLITE_BloomFilter) ); + nSearch = pWInfo->a[0].pWLoop->nOut; + for(i=1; inLevel; i++){ + WhereLoop *pLoop = pWInfo->a[i].pWLoop; + const unsigned int reqFlags = (WHERE_SELFCULL|WHERE_COLUMN_EQ); + if( (pLoop->wsFlags & reqFlags)==reqFlags + /* vvvvvv--- Always the case if WHERE_COLUMN_EQ is defined */ + && ALWAYS((pLoop->wsFlags & (WHERE_IPK|WHERE_INDEXED))!=0) + ){ + SrcItem *pItem = &pWInfo->pTabList->a[pLoop->iTab]; + Table *pTab = pItem->pTab; + pTab->tabFlags |= TF_StatsUsed; + if( nSearch > pTab->nRowLogEst + && (pTab->tabFlags & TF_HasStat1)!=0 + ){ + testcase( pItem->fg.jointype & JT_LEFT ); + pLoop->wsFlags |= WHERE_BLOOMFILTER; + pLoop->wsFlags &= ~WHERE_IDX_ONLY; + WHERETRACE(0xffff, ( + "-> use Bloom-filter on loop %c because there are ~%.1e " + "lookups into %s which has only ~%.1e rows\n", + pLoop->cId, (double)sqlite3LogEstToInt(nSearch), pTab->zName, + (double)sqlite3LogEstToInt(pTab->nRowLogEst))); + } + } + nSearch += pLoop->nOut; + } +} + +/* +** Generate the beginning of the loop used for WHERE clause processing. +** The return value is a pointer to an opaque structure that contains +** information needed to terminate the loop. Later, the calling routine +** should invoke sqlite3WhereEnd() with the return value of this function +** in order to complete the WHERE clause processing. +** +** If an error occurs, this routine returns NULL. +** +** The basic idea is to do a nested loop, one loop for each table in +** the FROM clause of a select. (INSERT and UPDATE statements are the +** same as a SELECT with only a single table in the FROM clause.) For +** example, if the SQL is this: +** +** SELECT * FROM t1, t2, t3 WHERE ...; +** +** Then the code generated is conceptually like the following: +** +** foreach row1 in t1 do \ Code generated +** foreach row2 in t2 do |-- by sqlite3WhereBegin() +** foreach row3 in t3 do / +** ... +** end \ Code generated +** end |-- by sqlite3WhereEnd() +** end / +** +** Note that the loops might not be nested in the order in which they +** appear in the FROM clause if a different order is better able to make +** use of indices. Note also that when the IN operator appears in +** the WHERE clause, it might result in additional nested loops for +** scanning through all values on the right-hand side of the IN. +** +** There are Btree cursors associated with each table. t1 uses cursor +** number pTabList->a[0].iCursor. t2 uses the cursor pTabList->a[1].iCursor. +** And so forth. This routine generates code to open those VDBE cursors +** and sqlite3WhereEnd() generates the code to close them. +** +** The code that sqlite3WhereBegin() generates leaves the cursors named +** in pTabList pointing at their appropriate entries. The [...] code +** can use OP_Column and OP_Rowid opcodes on these cursors to extract +** data from the various tables of the loop. +** +** If the WHERE clause is empty, the foreach loops must each scan their +** entire tables. Thus a three-way join is an O(N^3) operation. But if +** the tables have indices and there are terms in the WHERE clause that +** refer to those indices, a complete table scan can be avoided and the +** code will run much faster. Most of the work of this routine is checking +** to see if there are indices that can be used to speed up the loop. +** +** Terms of the WHERE clause are also used to limit which rows actually +** make it to the "..." in the middle of the loop. After each "foreach", +** terms of the WHERE clause that use only terms in that loop and outer +** loops are evaluated and if false a jump is made around all subsequent +** inner loops (or around the "..." if the test occurs within the inner- +** most loop) +** +** OUTER JOINS +** +** An outer join of tables t1 and t2 is conceptally coded as follows: +** +** foreach row1 in t1 do +** flag = 0 +** foreach row2 in t2 do +** start: +** ... +** flag = 1 +** end +** if flag==0 then +** move the row2 cursor to a null row +** goto start +** fi +** end +** +** ORDER BY CLAUSE PROCESSING +** +** pOrderBy is a pointer to the ORDER BY clause (or the GROUP BY clause +** if the WHERE_GROUPBY flag is set in wctrlFlags) of a SELECT statement +** if there is one. If there is no ORDER BY clause or if this routine +** is called from an UPDATE or DELETE statement, then pOrderBy is NULL. +** +** The iIdxCur parameter is the cursor number of an index. If +** WHERE_OR_SUBCLAUSE is set, iIdxCur is the cursor number of an index +** to use for OR clause processing. The WHERE clause should use this +** specific cursor. If WHERE_ONEPASS_DESIRED is set, then iIdxCur is +** the first cursor in an array of cursors for all indices. iIdxCur should +** be used to compute the appropriate cursor depending on which index is +** used. +*/ +SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( + Parse *pParse, /* The parser context */ + SrcList *pTabList, /* FROM clause: A list of all tables to be scanned */ + Expr *pWhere, /* The WHERE clause */ + ExprList *pOrderBy, /* An ORDER BY (or GROUP BY) clause, or NULL */ + ExprList *pResultSet, /* Query result set. Req'd for DISTINCT */ + Select *pLimit, /* Use this LIMIT/OFFSET clause, if any */ + u16 wctrlFlags, /* The WHERE_* flags defined in sqliteInt.h */ + int iAuxArg /* If WHERE_OR_SUBCLAUSE is set, index cursor number + ** If WHERE_USE_LIMIT, then the limit amount */ +){ + int nByteWInfo; /* Num. bytes allocated for WhereInfo struct */ + int nTabList; /* Number of elements in pTabList */ + WhereInfo *pWInfo; /* Will become the return value of this function */ + Vdbe *v = pParse->pVdbe; /* The virtual database engine */ + Bitmask notReady; /* Cursors that are not yet positioned */ + WhereLoopBuilder sWLB; /* The WhereLoop builder */ + WhereMaskSet *pMaskSet; /* The expression mask set */ + WhereLevel *pLevel; /* A single level in pWInfo->a[] */ + WhereLoop *pLoop; /* Pointer to a single WhereLoop object */ + int ii; /* Loop counter */ + sqlite3 *db; /* Database connection */ + int rc; /* Return code */ + u8 bFordelete = 0; /* OPFLAG_FORDELETE or zero, as appropriate */ + + assert( (wctrlFlags & WHERE_ONEPASS_MULTIROW)==0 || ( + (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 + && (wctrlFlags & WHERE_OR_SUBCLAUSE)==0 + )); + + /* Only one of WHERE_OR_SUBCLAUSE or WHERE_USE_LIMIT */ + assert( (wctrlFlags & WHERE_OR_SUBCLAUSE)==0 + || (wctrlFlags & WHERE_USE_LIMIT)==0 ); + + /* Variable initialization */ + db = pParse->db; + memset(&sWLB, 0, sizeof(sWLB)); + + /* An ORDER/GROUP BY clause of more than 63 terms cannot be optimized */ + testcase( pOrderBy && pOrderBy->nExpr==BMS-1 ); + if( pOrderBy && pOrderBy->nExpr>=BMS ) pOrderBy = 0; + + /* The number of tables in the FROM clause is limited by the number of + ** bits in a Bitmask + */ + testcase( pTabList->nSrc==BMS ); + if( pTabList->nSrc>BMS ){ + sqlite3ErrorMsg(pParse, "at most %d tables in a join", BMS); + return 0; + } + + /* This function normally generates a nested loop for all tables in + ** pTabList. But if the WHERE_OR_SUBCLAUSE flag is set, then we should + ** only generate code for the first table in pTabList and assume that + ** any cursors associated with subsequent tables are uninitialized. + */ + nTabList = (wctrlFlags & WHERE_OR_SUBCLAUSE) ? 1 : pTabList->nSrc; + + /* Allocate and initialize the WhereInfo structure that will become the + ** return value. A single allocation is used to store the WhereInfo + ** struct, the contents of WhereInfo.a[], the WhereClause structure + ** and the WhereMaskSet structure. Since WhereClause contains an 8-byte + ** field (type Bitmask) it must be aligned on an 8-byte boundary on + ** some architectures. Hence the ROUND8() below. + */ + nByteWInfo = ROUND8(sizeof(WhereInfo)+(nTabList-1)*sizeof(WhereLevel)); + pWInfo = sqlite3DbMallocRawNN(db, nByteWInfo + sizeof(WhereLoop)); + if( db->mallocFailed ){ + sqlite3DbFree(db, pWInfo); + pWInfo = 0; + goto whereBeginError; + } + pWInfo->pParse = pParse; + pWInfo->pTabList = pTabList; + pWInfo->pOrderBy = pOrderBy; + pWInfo->pWhere = pWhere; + pWInfo->pResultSet = pResultSet; + pWInfo->aiCurOnePass[0] = pWInfo->aiCurOnePass[1] = -1; + pWInfo->nLevel = nTabList; + pWInfo->iBreak = pWInfo->iContinue = sqlite3VdbeMakeLabel(pParse); + pWInfo->wctrlFlags = wctrlFlags; + pWInfo->iLimit = iAuxArg; + pWInfo->savedNQueryLoop = pParse->nQueryLoop; +#ifndef SQLITE_OMIT_VIRTUALTABLE + pWInfo->pLimit = pLimit; +#endif + memset(&pWInfo->nOBSat, 0, + offsetof(WhereInfo,sWC) - offsetof(WhereInfo,nOBSat)); + memset(&pWInfo->a[0], 0, sizeof(WhereLoop)+nTabList*sizeof(WhereLevel)); + assert( pWInfo->eOnePass==ONEPASS_OFF ); /* ONEPASS defaults to OFF */ + pMaskSet = &pWInfo->sMaskSet; + pMaskSet->n = 0; + pMaskSet->ix[0] = -99; /* Initialize ix[0] to a value that can never be + ** a valid cursor number, to avoid an initial + ** test for pMaskSet->n==0 in sqlite3WhereGetMask() */ + sWLB.pWInfo = pWInfo; + sWLB.pWC = &pWInfo->sWC; + sWLB.pNew = (WhereLoop*)(((char*)pWInfo)+nByteWInfo); + assert( EIGHT_BYTE_ALIGNMENT(sWLB.pNew) ); + whereLoopInit(sWLB.pNew); +#ifdef SQLITE_DEBUG + sWLB.pNew->cId = '*'; +#endif + + /* Split the WHERE clause into separate subexpressions where each + ** subexpression is separated by an AND operator. + */ + sqlite3WhereClauseInit(&pWInfo->sWC, pWInfo); + sqlite3WhereSplit(&pWInfo->sWC, pWhere, TK_AND); + + /* Special case: No FROM clause + */ + if( nTabList==0 ){ + if( pOrderBy ) pWInfo->nOBSat = pOrderBy->nExpr; + if( (wctrlFlags & WHERE_WANT_DISTINCT)!=0 + && OptimizationEnabled(db, SQLITE_DistinctOpt) + ){ + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + } + ExplainQueryPlan((pParse, 0, "SCAN CONSTANT ROW")); + }else{ + /* Assign a bit from the bitmask to every term in the FROM clause. + ** + ** The N-th term of the FROM clause is assigned a bitmask of 1<nSrc tables in + ** pTabList, not just the first nTabList tables. nTabList is normally + ** equal to pTabList->nSrc but might be shortened to 1 if the + ** WHERE_OR_SUBCLAUSE flag is set. + */ + ii = 0; + do{ + createMask(pMaskSet, pTabList->a[ii].iCursor); + sqlite3WhereTabFuncArgs(pParse, &pTabList->a[ii], &pWInfo->sWC); + }while( (++ii)nSrc ); + #ifdef SQLITE_DEBUG + { + Bitmask mx = 0; + for(ii=0; iinSrc; ii++){ + Bitmask m = sqlite3WhereGetMask(pMaskSet, pTabList->a[ii].iCursor); + assert( m>=mx ); + mx = m; + } + } + #endif + } + + /* Analyze all of the subexpressions. */ + sqlite3WhereExprAnalyze(pTabList, &pWInfo->sWC); + sqlite3WhereAddLimit(&pWInfo->sWC, pLimit); + if( db->mallocFailed ) goto whereBeginError; + + /* Special case: WHERE terms that do not refer to any tables in the join + ** (constant expressions). Evaluate each such term, and jump over all the + ** generated code if the result is not true. + ** + ** Do not do this if the expression contains non-deterministic functions + ** that are not within a sub-select. This is not strictly required, but + ** preserves SQLite's legacy behaviour in the following two cases: + ** + ** FROM ... WHERE random()>0; -- eval random() once per row + ** FROM ... WHERE (SELECT random())>0; -- eval random() once overall + */ + for(ii=0; iinBase; ii++){ + WhereTerm *pT = &sWLB.pWC->a[ii]; + if( pT->wtFlags & TERM_VIRTUAL ) continue; + if( pT->prereqAll==0 && (nTabList==0 || exprIsDeterministic(pT->pExpr)) ){ + sqlite3ExprIfFalse(pParse, pT->pExpr, pWInfo->iBreak, SQLITE_JUMPIFNULL); + pT->wtFlags |= TERM_CODED; + } + } + + if( wctrlFlags & WHERE_WANT_DISTINCT ){ + if( OptimizationDisabled(db, SQLITE_DistinctOpt) ){ + /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via + ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */ + wctrlFlags &= ~WHERE_WANT_DISTINCT; + pWInfo->wctrlFlags &= ~WHERE_WANT_DISTINCT; + }else if( isDistinctRedundant(pParse, pTabList, &pWInfo->sWC, pResultSet) ){ + /* The DISTINCT marking is pointless. Ignore it. */ + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + }else if( pOrderBy==0 ){ + /* Try to ORDER BY the result set to make distinct processing easier */ + pWInfo->wctrlFlags |= WHERE_DISTINCTBY; + pWInfo->pOrderBy = pResultSet; + } + } + + /* Construct the WhereLoop objects */ +#if defined(WHERETRACE_ENABLED) + if( sqlite3WhereTrace & 0xffff ){ + sqlite3DebugPrintf("*** Optimizer Start *** (wctrlFlags: 0x%x",wctrlFlags); + if( wctrlFlags & WHERE_USE_LIMIT ){ + sqlite3DebugPrintf(", limit: %d", iAuxArg); + } + sqlite3DebugPrintf(")\n"); + if( sqlite3WhereTrace & 0x100 ){ + Select sSelect; + memset(&sSelect, 0, sizeof(sSelect)); + sSelect.selFlags = SF_WhereBegin; + sSelect.pSrc = pTabList; + sSelect.pWhere = pWhere; + sSelect.pOrderBy = pOrderBy; + sSelect.pEList = pResultSet; + sqlite3TreeViewSelect(0, &sSelect, 0); + } + } + if( sqlite3WhereTrace & 0x100 ){ /* Display all terms of the WHERE clause */ + sqlite3DebugPrintf("---- WHERE clause at start of analysis:\n"); + sqlite3WhereClausePrint(sWLB.pWC); + } +#endif + + if( nTabList!=1 || whereShortCut(&sWLB)==0 ){ + rc = whereLoopAddAll(&sWLB); + if( rc ) goto whereBeginError; + +#ifdef SQLITE_ENABLE_STAT4 + /* If one or more WhereTerm.truthProb values were used in estimating + ** loop parameters, but then those truthProb values were subsequently + ** changed based on STAT4 information while computing subsequent loops, + ** then we need to rerun the whole loop building process so that all + ** loops will be built using the revised truthProb values. */ + if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){ + WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); + WHERETRACE(0xffff, + ("**** Redo all loop computations due to" + " TERM_HIGHTRUTH changes ****\n")); + while( pWInfo->pLoops ){ + WhereLoop *p = pWInfo->pLoops; + pWInfo->pLoops = p->pNextLoop; + whereLoopDelete(db, p); + } + rc = whereLoopAddAll(&sWLB); + if( rc ) goto whereBeginError; + } +#endif + WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC); + + wherePathSolver(pWInfo, 0); + if( db->mallocFailed ) goto whereBeginError; + if( pWInfo->pOrderBy ){ + wherePathSolver(pWInfo, pWInfo->nRowOut+1); + if( db->mallocFailed ) goto whereBeginError; + } + } + if( pWInfo->pOrderBy==0 && (db->flags & SQLITE_ReverseOrder)!=0 ){ + pWInfo->revMask = ALLBITS; + } + if( pParse->nErr ){ + goto whereBeginError; + } + assert( db->mallocFailed==0 ); +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace ){ + sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); + if( pWInfo->nOBSat>0 ){ + sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask); + } + switch( pWInfo->eDistinct ){ + case WHERE_DISTINCT_UNIQUE: { + sqlite3DebugPrintf(" DISTINCT=unique"); + break; + } + case WHERE_DISTINCT_ORDERED: { + sqlite3DebugPrintf(" DISTINCT=ordered"); + break; + } + case WHERE_DISTINCT_UNORDERED: { + sqlite3DebugPrintf(" DISTINCT=unordered"); + break; + } + } + sqlite3DebugPrintf("\n"); + for(ii=0; iinLevel; ii++){ + sqlite3WhereLoopPrint(pWInfo->a[ii].pWLoop, sWLB.pWC); + } + } +#endif + + /* Attempt to omit tables from a join that do not affect the result. + ** See the comment on whereOmitNoopJoin() for further information. + ** + ** This query optimization is factored out into a separate "no-inline" + ** procedure to keep the sqlite3WhereBegin() procedure from becoming + ** too large. If sqlite3WhereBegin() becomes too large, that prevents + ** some C-compiler optimizers from in-lining the + ** sqlite3WhereCodeOneLoopStart() procedure, and it is important to + ** in-line sqlite3WhereCodeOneLoopStart() for performance reasons. + */ + notReady = ~(Bitmask)0; + if( pWInfo->nLevel>=2 + && pResultSet!=0 /* these two combine to guarantee */ + && 0==(wctrlFlags & WHERE_AGG_DISTINCT) /* condition (1) above */ + && OptimizationEnabled(db, SQLITE_OmitNoopJoin) + ){ + notReady = whereOmitNoopJoin(pWInfo, notReady); + nTabList = pWInfo->nLevel; + assert( nTabList>0 ); + } + + /* Check to see if there are any SEARCH loops that might benefit from + ** using a Bloom filter. + */ + if( pWInfo->nLevel>=2 + && OptimizationEnabled(db, SQLITE_BloomFilter) + ){ + whereCheckIfBloomFilterIsUseful(pWInfo); + } + +#if defined(WHERETRACE_ENABLED) + if( sqlite3WhereTrace & 0x100 ){ /* Display all terms of the WHERE clause */ + sqlite3DebugPrintf("---- WHERE clause at end of analysis:\n"); + sqlite3WhereClausePrint(sWLB.pWC); + } + WHERETRACE(0xffff,("*** Optimizer Finished ***\n")); +#endif + pWInfo->pParse->nQueryLoop += pWInfo->nRowOut; + + /* If the caller is an UPDATE or DELETE statement that is requesting + ** to use a one-pass algorithm, determine if this is appropriate. + ** + ** A one-pass approach can be used if the caller has requested one + ** and either (a) the scan visits at most one row or (b) each + ** of the following are true: + ** + ** * the caller has indicated that a one-pass approach can be used + ** with multiple rows (by setting WHERE_ONEPASS_MULTIROW), and + ** * the table is not a virtual table, and + ** * either the scan does not use the OR optimization or the caller + ** is a DELETE operation (WHERE_DUPLICATES_OK is only specified + ** for DELETE). + ** + ** The last qualification is because an UPDATE statement uses + ** WhereInfo.aiCurOnePass[1] to determine whether or not it really can + ** use a one-pass approach, and this is not set accurately for scans + ** that use the OR optimization. + */ + assert( (wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || pWInfo->nLevel==1 ); + if( (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 ){ + int wsFlags = pWInfo->a[0].pWLoop->wsFlags; + int bOnerow = (wsFlags & WHERE_ONEROW)!=0; + assert( !(wsFlags & WHERE_VIRTUALTABLE) || IsVirtual(pTabList->a[0].pTab) ); + if( bOnerow || ( + 0!=(wctrlFlags & WHERE_ONEPASS_MULTIROW) + && !IsVirtual(pTabList->a[0].pTab) + && (0==(wsFlags & WHERE_MULTI_OR) || (wctrlFlags & WHERE_DUPLICATES_OK)) + )){ + pWInfo->eOnePass = bOnerow ? ONEPASS_SINGLE : ONEPASS_MULTI; + if( HasRowid(pTabList->a[0].pTab) && (wsFlags & WHERE_IDX_ONLY) ){ + if( wctrlFlags & WHERE_ONEPASS_MULTIROW ){ + bFordelete = OPFLAG_FORDELETE; + } + pWInfo->a[0].pWLoop->wsFlags = (wsFlags & ~WHERE_IDX_ONLY); + } + } + } + + /* Open all tables in the pTabList and any indices selected for + ** searching those tables. + */ + for(ii=0, pLevel=pWInfo->a; iia[pLevel->iFrom]; + pTab = pTabItem->pTab; + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + pLoop = pLevel->pWLoop; + if( (pTab->tabFlags & TF_Ephemeral)!=0 || IsView(pTab) ){ + /* Do nothing */ + }else +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){ + const char *pVTab = (const char *)sqlite3GetVTable(db, pTab); + int iCur = pTabItem->iCursor; + sqlite3VdbeAddOp4(v, OP_VOpen, iCur, 0, 0, pVTab, P4_VTAB); + }else if( IsVirtual(pTab) ){ + /* noop */ + }else +#endif + if( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 + && (wctrlFlags & WHERE_OR_SUBCLAUSE)==0 ){ + int op = OP_OpenRead; + if( pWInfo->eOnePass!=ONEPASS_OFF ){ + op = OP_OpenWrite; + pWInfo->aiCurOnePass[0] = pTabItem->iCursor; + }; + sqlite3OpenTable(pParse, pTabItem->iCursor, iDb, pTab, op); + assert( pTabItem->iCursor==pLevel->iTabCur ); + testcase( pWInfo->eOnePass==ONEPASS_OFF && pTab->nCol==BMS-1 ); + testcase( pWInfo->eOnePass==ONEPASS_OFF && pTab->nCol==BMS ); + if( pWInfo->eOnePass==ONEPASS_OFF + && pTab->nColtabFlags & (TF_HasGenerated|TF_WithoutRowid))==0 + && (pLoop->wsFlags & (WHERE_AUTO_INDEX|WHERE_BLOOMFILTER))==0 + ){ + /* If we know that only a prefix of the record will be used, + ** it is advantageous to reduce the "column count" field in + ** the P4 operand of the OP_OpenRead/Write opcode. */ + Bitmask b = pTabItem->colUsed; + int n = 0; + for(; b; b=b>>1, n++){} + sqlite3VdbeChangeP4(v, -1, SQLITE_INT_TO_PTR(n), P4_INT32); + assert( n<=pTab->nCol ); + } +#ifdef SQLITE_ENABLE_CURSOR_HINTS + if( pLoop->u.btree.pIndex!=0 ){ + sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ|bFordelete); + }else +#endif + { + sqlite3VdbeChangeP5(v, bFordelete); + } +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + sqlite3VdbeAddOp4Dup8(v, OP_ColumnsUsed, pTabItem->iCursor, 0, 0, + (const u8*)&pTabItem->colUsed, P4_INT64); +#endif + }else{ + sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); + } + if( pLoop->wsFlags & WHERE_INDEXED ){ + Index *pIx = pLoop->u.btree.pIndex; + int iIndexCur; + int op = OP_OpenRead; + /* iAuxArg is always set to a positive value if ONEPASS is possible */ + assert( iAuxArg!=0 || (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 ); + if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIx) + && (wctrlFlags & WHERE_OR_SUBCLAUSE)!=0 + ){ + /* This is one term of an OR-optimization using the PRIMARY KEY of a + ** WITHOUT ROWID table. No need for a separate index */ + iIndexCur = pLevel->iTabCur; + op = 0; + }else if( pWInfo->eOnePass!=ONEPASS_OFF ){ + Index *pJ = pTabItem->pTab->pIndex; + iIndexCur = iAuxArg; + assert( wctrlFlags & WHERE_ONEPASS_DESIRED ); + while( ALWAYS(pJ) && pJ!=pIx ){ + iIndexCur++; + pJ = pJ->pNext; + } + op = OP_OpenWrite; + pWInfo->aiCurOnePass[1] = iIndexCur; + }else if( iAuxArg && (wctrlFlags & WHERE_OR_SUBCLAUSE)!=0 ){ + iIndexCur = iAuxArg; + op = OP_ReopenIdx; + }else{ + iIndexCur = pParse->nTab++; + } + pLevel->iIdxCur = iIndexCur; + assert( pIx->pSchema==pTab->pSchema ); + assert( iIndexCur>=0 ); + if( op ){ + sqlite3VdbeAddOp3(v, op, iIndexCur, pIx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIx); + if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 + && (pLoop->wsFlags & (WHERE_COLUMN_RANGE|WHERE_SKIPSCAN))==0 + && (pLoop->wsFlags & WHERE_BIGNULL_SORT)==0 + && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 + && (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 + && pWInfo->eDistinct!=WHERE_DISTINCT_ORDERED + ){ + sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ); + } + VdbeComment((v, "%s", pIx->zName)); +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + { + u64 colUsed = 0; + int ii, jj; + for(ii=0; iinColumn; ii++){ + jj = pIx->aiColumn[ii]; + if( jj<0 ) continue; + if( jj>63 ) jj = 63; + if( (pTabItem->colUsed & MASKBIT(jj))==0 ) continue; + colUsed |= ((u64)1)<<(ii<63 ? ii : 63); + } + sqlite3VdbeAddOp4Dup8(v, OP_ColumnsUsed, iIndexCur, 0, 0, + (u8*)&colUsed, P4_INT64); + } +#endif /* SQLITE_ENABLE_COLUMN_USED_MASK */ + } + } + if( iDb>=0 ) sqlite3CodeVerifySchema(pParse, iDb); + } + pWInfo->iTop = sqlite3VdbeCurrentAddr(v); + if( db->mallocFailed ) goto whereBeginError; + + /* Generate the code to do the search. Each iteration of the for + ** loop below generates code for a single nested loop of the VM + ** program. + */ + for(ii=0; iinErr ) goto whereBeginError; + pLevel = &pWInfo->a[ii]; + wsFlags = pLevel->pWLoop->wsFlags; + if( (wsFlags & (WHERE_AUTO_INDEX|WHERE_BLOOMFILTER))!=0 ){ + if( (wsFlags & WHERE_AUTO_INDEX)!=0 ){ +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX + constructAutomaticIndex(pParse, &pWInfo->sWC, + &pTabList->a[pLevel->iFrom], notReady, pLevel); +#endif + }else{ + sqlite3ConstructBloomFilter(pWInfo, ii, pLevel, notReady); + } + if( db->mallocFailed ) goto whereBeginError; + } + addrExplain = sqlite3WhereExplainOneScan( + pParse, pTabList, pLevel, wctrlFlags + ); + pLevel->addrBody = sqlite3VdbeCurrentAddr(v); + notReady = sqlite3WhereCodeOneLoopStart(pParse,v,pWInfo,ii,pLevel,notReady); + pWInfo->iContinue = pLevel->addrCont; + if( (wsFlags&WHERE_MULTI_OR)==0 && (wctrlFlags&WHERE_OR_SUBCLAUSE)==0 ){ + sqlite3WhereAddScanStatus(v, pTabList, pLevel, addrExplain); + } + } + + /* Done. */ + VdbeModuleComment((v, "Begin WHERE-core")); + pWInfo->iEndWhere = sqlite3VdbeCurrentAddr(v); + return pWInfo; + + /* Jump here if malloc fails */ +whereBeginError: + if( pWInfo ){ + testcase( pWInfo->pExprMods!=0 ); + whereUndoExprMods(pWInfo); + pParse->nQueryLoop = pWInfo->savedNQueryLoop; + whereInfoFree(db, pWInfo); + } + return 0; +} + +/* +** Part of sqlite3WhereEnd() will rewrite opcodes to reference the +** index rather than the main table. In SQLITE_DEBUG mode, we want +** to trace those changes if PRAGMA vdbe_addoptrace=on. This routine +** does that. +*/ +#ifndef SQLITE_DEBUG +# define OpcodeRewriteTrace(D,K,P) /* no-op */ +#else +# define OpcodeRewriteTrace(D,K,P) sqlite3WhereOpcodeRewriteTrace(D,K,P) + static void sqlite3WhereOpcodeRewriteTrace( + sqlite3 *db, + int pc, + VdbeOp *pOp + ){ + if( (db->flags & SQLITE_VdbeAddopTrace)==0 ) return; + sqlite3VdbePrintOp(0, pc, pOp); + } +#endif + +#ifdef SQLITE_DEBUG +/* +** Return true if cursor iCur is opened by instruction k of the +** bytecode. Used inside of assert() only. +*/ +static int cursorIsOpen(Vdbe *v, int iCur, int k){ + while( k>=0 ){ + VdbeOp *pOp = sqlite3VdbeGetOp(v,k--); + if( pOp->p1!=iCur ) continue; + if( pOp->opcode==OP_Close ) return 0; + if( pOp->opcode==OP_OpenRead ) return 1; + if( pOp->opcode==OP_OpenWrite ) return 1; + if( pOp->opcode==OP_OpenDup ) return 1; + if( pOp->opcode==OP_OpenAutoindex ) return 1; + if( pOp->opcode==OP_OpenEphemeral ) return 1; + } + return 0; +} +#endif /* SQLITE_DEBUG */ + +/* +** Generate the end of the WHERE loop. See comments on +** sqlite3WhereBegin() for additional information. +*/ +SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ + Parse *pParse = pWInfo->pParse; + Vdbe *v = pParse->pVdbe; + int i; + WhereLevel *pLevel; + WhereLoop *pLoop; + SrcList *pTabList = pWInfo->pTabList; + sqlite3 *db = pParse->db; + int iEnd = sqlite3VdbeCurrentAddr(v); + + /* Generate loop termination code. + */ + VdbeModuleComment((v, "End WHERE-core")); + for(i=pWInfo->nLevel-1; i>=0; i--){ + int addr; + pLevel = &pWInfo->a[i]; + pLoop = pLevel->pWLoop; + if( pLevel->op!=OP_Noop ){ +#ifndef SQLITE_DISABLE_SKIPAHEAD_DISTINCT + int addrSeek = 0; + Index *pIdx; + int n; + if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED + && i==pWInfo->nLevel-1 /* Ticket [ef9318757b152e3] 2017-10-21 */ + && (pLoop->wsFlags & WHERE_INDEXED)!=0 + && (pIdx = pLoop->u.btree.pIndex)->hasStat1 + && (n = pLoop->u.btree.nDistinctCol)>0 + && pIdx->aiRowLogEst[n]>=36 + ){ + int r1 = pParse->nMem+1; + int j, op; + for(j=0; jiIdxCur, j, r1+j); + } + pParse->nMem += n+1; + op = pLevel->op==OP_Prev ? OP_SeekLT : OP_SeekGT; + addrSeek = sqlite3VdbeAddOp4Int(v, op, pLevel->iIdxCur, 0, r1, n); + VdbeCoverageIf(v, op==OP_SeekLT); + VdbeCoverageIf(v, op==OP_SeekGT); + sqlite3VdbeAddOp2(v, OP_Goto, 1, pLevel->p2); + } +#endif /* SQLITE_DISABLE_SKIPAHEAD_DISTINCT */ + /* The common case: Advance to the next row */ + sqlite3VdbeResolveLabel(v, pLevel->addrCont); + sqlite3VdbeAddOp3(v, pLevel->op, pLevel->p1, pLevel->p2, pLevel->p3); + sqlite3VdbeChangeP5(v, pLevel->p5); + VdbeCoverage(v); + VdbeCoverageIf(v, pLevel->op==OP_Next); + VdbeCoverageIf(v, pLevel->op==OP_Prev); + VdbeCoverageIf(v, pLevel->op==OP_VNext); + if( pLevel->regBignull ){ + sqlite3VdbeResolveLabel(v, pLevel->addrBignull); + sqlite3VdbeAddOp2(v, OP_DecrJumpZero, pLevel->regBignull, pLevel->p2-1); + VdbeCoverage(v); + } +#ifndef SQLITE_DISABLE_SKIPAHEAD_DISTINCT + if( addrSeek ) sqlite3VdbeJumpHere(v, addrSeek); +#endif + }else{ + sqlite3VdbeResolveLabel(v, pLevel->addrCont); + } + if( (pLoop->wsFlags & WHERE_IN_ABLE)!=0 && pLevel->u.in.nIn>0 ){ + struct InLoop *pIn; + int j; + sqlite3VdbeResolveLabel(v, pLevel->addrNxt); + for(j=pLevel->u.in.nIn, pIn=&pLevel->u.in.aInLoop[j-1]; j>0; j--, pIn--){ + assert( sqlite3VdbeGetOp(v, pIn->addrInTop+1)->opcode==OP_IsNull + || pParse->db->mallocFailed ); + sqlite3VdbeJumpHere(v, pIn->addrInTop+1); + if( pIn->eEndLoopOp!=OP_Noop ){ + if( pIn->nPrefix ){ + int bEarlyOut = + (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 + && (pLoop->wsFlags & WHERE_IN_EARLYOUT)!=0; + if( pLevel->iLeftJoin ){ + /* For LEFT JOIN queries, cursor pIn->iCur may not have been + ** opened yet. This occurs for WHERE clauses such as + ** "a = ? AND b IN (...)", where the index is on (a, b). If + ** the RHS of the (a=?) is NULL, then the "b IN (...)" may + ** never have been coded, but the body of the loop run to + ** return the null-row. So, if the cursor is not open yet, + ** jump over the OP_Next or OP_Prev instruction about to + ** be coded. */ + sqlite3VdbeAddOp2(v, OP_IfNotOpen, pIn->iCur, + sqlite3VdbeCurrentAddr(v) + 2 + bEarlyOut); + VdbeCoverage(v); + } + if( bEarlyOut ){ + sqlite3VdbeAddOp4Int(v, OP_IfNoHope, pLevel->iIdxCur, + sqlite3VdbeCurrentAddr(v)+2, + pIn->iBase, pIn->nPrefix); + VdbeCoverage(v); + /* Retarget the OP_IsNull against the left operand of IN so + ** it jumps past the OP_IfNoHope. This is because the + ** OP_IsNull also bypasses the OP_Affinity opcode that is + ** required by OP_IfNoHope. */ + sqlite3VdbeJumpHere(v, pIn->addrInTop+1); + } + } + sqlite3VdbeAddOp2(v, pIn->eEndLoopOp, pIn->iCur, pIn->addrInTop); + VdbeCoverage(v); + VdbeCoverageIf(v, pIn->eEndLoopOp==OP_Prev); + VdbeCoverageIf(v, pIn->eEndLoopOp==OP_Next); + } + sqlite3VdbeJumpHere(v, pIn->addrInTop-1); + } + } + sqlite3VdbeResolveLabel(v, pLevel->addrBrk); + if( pLevel->addrSkip ){ + sqlite3VdbeGoto(v, pLevel->addrSkip); + VdbeComment((v, "next skip-scan on %s", pLoop->u.btree.pIndex->zName)); + sqlite3VdbeJumpHere(v, pLevel->addrSkip); + sqlite3VdbeJumpHere(v, pLevel->addrSkip-2); + } +#ifndef SQLITE_LIKE_DOESNT_MATCH_BLOBS + if( pLevel->addrLikeRep ){ + sqlite3VdbeAddOp2(v, OP_DecrJumpZero, (int)(pLevel->iLikeRepCntr>>1), + pLevel->addrLikeRep); + VdbeCoverage(v); + } +#endif + if( pLevel->iLeftJoin ){ + int ws = pLoop->wsFlags; + addr = sqlite3VdbeAddOp1(v, OP_IfPos, pLevel->iLeftJoin); VdbeCoverage(v); + assert( (ws & WHERE_IDX_ONLY)==0 || (ws & WHERE_INDEXED)!=0 ); + if( (ws & WHERE_IDX_ONLY)==0 ){ + assert( pLevel->iTabCur==pTabList->a[pLevel->iFrom].iCursor ); + sqlite3VdbeAddOp1(v, OP_NullRow, pLevel->iTabCur); + } + if( (ws & WHERE_INDEXED) + || ((ws & WHERE_MULTI_OR) && pLevel->u.pCoveringIdx) + ){ + if( ws & WHERE_MULTI_OR ){ + Index *pIx = pLevel->u.pCoveringIdx; + int iDb = sqlite3SchemaToIndex(db, pIx->pSchema); + sqlite3VdbeAddOp3(v, OP_ReopenIdx, pLevel->iIdxCur, pIx->tnum, iDb); + sqlite3VdbeSetP4KeyInfo(pParse, pIx); + } + sqlite3VdbeAddOp1(v, OP_NullRow, pLevel->iIdxCur); + } + if( pLevel->op==OP_Return ){ + sqlite3VdbeAddOp2(v, OP_Gosub, pLevel->p1, pLevel->addrFirst); + }else{ + sqlite3VdbeGoto(v, pLevel->addrFirst); + } + sqlite3VdbeJumpHere(v, addr); + } + VdbeModuleComment((v, "End WHERE-loop%d: %s", i, + pWInfo->pTabList->a[pLevel->iFrom].pTab->zName)); + } + + /* The "break" point is here, just past the end of the outer loop. + ** Set it. + */ + sqlite3VdbeResolveLabel(v, pWInfo->iBreak); + + assert( pWInfo->nLevel<=pTabList->nSrc ); + for(i=0, pLevel=pWInfo->a; inLevel; i++, pLevel++){ + int k, last; + VdbeOp *pOp, *pLastOp; + Index *pIdx = 0; + SrcItem *pTabItem = &pTabList->a[pLevel->iFrom]; + Table *pTab = pTabItem->pTab; + assert( pTab!=0 ); + pLoop = pLevel->pWLoop; + + /* For a co-routine, change all OP_Column references to the table of + ** the co-routine into OP_Copy of result contained in a register. + ** OP_Rowid becomes OP_Null. + */ + if( pTabItem->fg.viaCoroutine ){ + testcase( pParse->db->mallocFailed ); + translateColumnToCopy(pParse, pLevel->addrBody, pLevel->iTabCur, + pTabItem->regResult, 0); + continue; + } + +#ifdef SQLITE_ENABLE_EARLY_CURSOR_CLOSE + /* Close all of the cursors that were opened by sqlite3WhereBegin. + ** Except, do not close cursors that will be reused by the OR optimization + ** (WHERE_OR_SUBCLAUSE). And do not close the OP_OpenWrite cursors + ** created for the ONEPASS optimization. + */ + if( (pTab->tabFlags & TF_Ephemeral)==0 + && !IsView(pTab) + && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE)==0 + ){ + int ws = pLoop->wsFlags; + if( pWInfo->eOnePass==ONEPASS_OFF && (ws & WHERE_IDX_ONLY)==0 ){ + sqlite3VdbeAddOp1(v, OP_Close, pTabItem->iCursor); + } + if( (ws & WHERE_INDEXED)!=0 + && (ws & (WHERE_IPK|WHERE_AUTO_INDEX))==0 + && pLevel->iIdxCur!=pWInfo->aiCurOnePass[1] + ){ + sqlite3VdbeAddOp1(v, OP_Close, pLevel->iIdxCur); + } + } +#endif + + /* If this scan uses an index, make VDBE code substitutions to read data + ** from the index instead of from the table where possible. In some cases + ** this optimization prevents the table from ever being read, which can + ** yield a significant performance boost. + ** + ** Calls to the code generator in between sqlite3WhereBegin and + ** sqlite3WhereEnd will have created code that references the table + ** directly. This loop scans all that code looking for opcodes + ** that reference the table and converts them into opcodes that + ** reference the index. + */ + if( pLoop->wsFlags & (WHERE_INDEXED|WHERE_IDX_ONLY) ){ + pIdx = pLoop->u.btree.pIndex; + }else if( pLoop->wsFlags & WHERE_MULTI_OR ){ + pIdx = pLevel->u.pCoveringIdx; + } + if( pIdx + && !db->mallocFailed + ){ + if( pWInfo->eOnePass==ONEPASS_OFF || !HasRowid(pIdx->pTable) ){ + last = iEnd; + }else{ + last = pWInfo->iEndWhere; + } + k = pLevel->addrBody + 1; +#ifdef SQLITE_DEBUG + if( db->flags & SQLITE_VdbeAddopTrace ){ + printf("TRANSLATE opcodes in range %d..%d\n", k, last-1); + } + /* Proof that the "+1" on the k value above is safe */ + pOp = sqlite3VdbeGetOp(v, k - 1); + assert( pOp->opcode!=OP_Column || pOp->p1!=pLevel->iTabCur ); + assert( pOp->opcode!=OP_Rowid || pOp->p1!=pLevel->iTabCur ); + assert( pOp->opcode!=OP_IfNullRow || pOp->p1!=pLevel->iTabCur ); +#endif + pOp = sqlite3VdbeGetOp(v, k); + pLastOp = pOp + (last - k); + assert( pOp<=pLastOp ); + do{ + if( pOp->p1!=pLevel->iTabCur ){ + /* no-op */ + }else if( pOp->opcode==OP_Column +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC + || pOp->opcode==OP_Offset +#endif + ){ + int x = pOp->p2; + assert( pIdx->pTable==pTab ); +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC + if( pOp->opcode==OP_Offset ){ + /* Do not need to translate the column number */ + }else +#endif + if( !HasRowid(pTab) ){ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + x = pPk->aiColumn[x]; + assert( x>=0 ); + }else{ + testcase( x!=sqlite3StorageColumnToTable(pTab,x) ); + x = sqlite3StorageColumnToTable(pTab,x); + } + x = sqlite3TableColumnToIndex(pIdx, x); + if( x>=0 ){ + pOp->p2 = x; + pOp->p1 = pLevel->iIdxCur; + OpcodeRewriteTrace(db, k, pOp); + }else{ + /* Unable to translate the table reference into an index + ** reference. Verify that this is harmless - that the + ** table being referenced really is open. + */ +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC + assert( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 + || cursorIsOpen(v,pOp->p1,k) + || pOp->opcode==OP_Offset + ); +#else + assert( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 + || cursorIsOpen(v,pOp->p1,k) + ); +#endif + } + }else if( pOp->opcode==OP_Rowid ){ + pOp->p1 = pLevel->iIdxCur; + pOp->opcode = OP_IdxRowid; + OpcodeRewriteTrace(db, k, pOp); + }else if( pOp->opcode==OP_IfNullRow ){ + pOp->p1 = pLevel->iIdxCur; + OpcodeRewriteTrace(db, k, pOp); + } +#ifdef SQLITE_DEBUG + k++; +#endif + }while( (++pOp)flags & SQLITE_VdbeAddopTrace ) printf("TRANSLATE complete\n"); +#endif + } + } + + /* Final cleanup + */ + if( pWInfo->pExprMods ) whereUndoExprMods(pWInfo); + pParse->nQueryLoop = pWInfo->savedNQueryLoop; + whereInfoFree(db, pWInfo); + return; +} + +/************** End of where.c ***********************************************/ +/************** Begin file window.c ******************************************/ +/* +** 2018 May 08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_OMIT_WINDOWFUNC + +/* +** SELECT REWRITING +** +** Any SELECT statement that contains one or more window functions in +** either the select list or ORDER BY clause (the only two places window +** functions may be used) is transformed by function sqlite3WindowRewrite() +** in order to support window function processing. For example, with the +** schema: +** +** CREATE TABLE t1(a, b, c, d, e, f, g); +** +** the statement: +** +** SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM t1 ORDER BY e; +** +** is transformed to: +** +** SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM ( +** SELECT a, e, c, d, b FROM t1 ORDER BY c, d +** ) ORDER BY e; +** +** The flattening optimization is disabled when processing this transformed +** SELECT statement. This allows the implementation of the window function +** (in this case max()) to process rows sorted in order of (c, d), which +** makes things easier for obvious reasons. More generally: +** +** * FROM, WHERE, GROUP BY and HAVING clauses are all moved to +** the sub-query. +** +** * ORDER BY, LIMIT and OFFSET remain part of the parent query. +** +** * Terminals from each of the expression trees that make up the +** select-list and ORDER BY expressions in the parent query are +** selected by the sub-query. For the purposes of the transformation, +** terminals are column references and aggregate functions. +** +** If there is more than one window function in the SELECT that uses +** the same window declaration (the OVER bit), then a single scan may +** be used to process more than one window function. For example: +** +** SELECT max(b) OVER (PARTITION BY c ORDER BY d), +** min(e) OVER (PARTITION BY c ORDER BY d) +** FROM t1; +** +** is transformed in the same way as the example above. However: +** +** SELECT max(b) OVER (PARTITION BY c ORDER BY d), +** min(e) OVER (PARTITION BY a ORDER BY b) +** FROM t1; +** +** Must be transformed to: +** +** SELECT max(b) OVER (PARTITION BY c ORDER BY d) FROM ( +** SELECT e, min(e) OVER (PARTITION BY a ORDER BY b), c, d, b FROM +** SELECT a, e, c, d, b FROM t1 ORDER BY a, b +** ) ORDER BY c, d +** ) ORDER BY e; +** +** so that both min() and max() may process rows in the order defined by +** their respective window declarations. +** +** INTERFACE WITH SELECT.C +** +** When processing the rewritten SELECT statement, code in select.c calls +** sqlite3WhereBegin() to begin iterating through the results of the +** sub-query, which is always implemented as a co-routine. It then calls +** sqlite3WindowCodeStep() to process rows and finish the scan by calling +** sqlite3WhereEnd(). +** +** sqlite3WindowCodeStep() generates VM code so that, for each row returned +** by the sub-query a sub-routine (OP_Gosub) coded by select.c is invoked. +** When the sub-routine is invoked: +** +** * The results of all window-functions for the row are stored +** in the associated Window.regResult registers. +** +** * The required terminal values are stored in the current row of +** temp table Window.iEphCsr. +** +** In some cases, depending on the window frame and the specific window +** functions invoked, sqlite3WindowCodeStep() caches each entire partition +** in a temp table before returning any rows. In other cases it does not. +** This detail is encapsulated within this file, the code generated by +** select.c is the same in either case. +** +** BUILT-IN WINDOW FUNCTIONS +** +** This implementation features the following built-in window functions: +** +** row_number() +** rank() +** dense_rank() +** percent_rank() +** cume_dist() +** ntile(N) +** lead(expr [, offset [, default]]) +** lag(expr [, offset [, default]]) +** first_value(expr) +** last_value(expr) +** nth_value(expr, N) +** +** These are the same built-in window functions supported by Postgres. +** Although the behaviour of aggregate window functions (functions that +** can be used as either aggregates or window funtions) allows them to +** be implemented using an API, built-in window functions are much more +** esoteric. Additionally, some window functions (e.g. nth_value()) +** may only be implemented by caching the entire partition in memory. +** As such, some built-in window functions use the same API as aggregate +** window functions and some are implemented directly using VDBE +** instructions. Additionally, for those functions that use the API, the +** window frame is sometimes modified before the SELECT statement is +** rewritten. For example, regardless of the specified window frame, the +** row_number() function always uses: +** +** ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +** +** See sqlite3WindowUpdate() for details. +** +** As well as some of the built-in window functions, aggregate window +** functions min() and max() are implemented using VDBE instructions if +** the start of the window frame is declared as anything other than +** UNBOUNDED PRECEDING. +*/ + +/* +** Implementation of built-in window function row_number(). Assumes that the +** window frame has been coerced to: +** +** ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +*/ +static void row_numberStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + i64 *p = (i64*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ) (*p)++; + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); +} +static void row_numberValueFunc(sqlite3_context *pCtx){ + i64 *p = (i64*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + sqlite3_result_int64(pCtx, (p ? *p : 0)); +} + +/* +** Context object type used by rank(), dense_rank(), percent_rank() and +** cume_dist(). +*/ +struct CallCount { + i64 nValue; + i64 nStep; + i64 nTotal; +}; + +/* +** Implementation of built-in window function dense_rank(). Assumes that +** the window frame has been set to: +** +** RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +*/ +static void dense_rankStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct CallCount *p; + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ) p->nStep = 1; + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); +} +static void dense_rankValueFunc(sqlite3_context *pCtx){ + struct CallCount *p; + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + if( p->nStep ){ + p->nValue++; + p->nStep = 0; + } + sqlite3_result_int64(pCtx, p->nValue); + } +} + +/* +** Implementation of built-in window function nth_value(). This +** implementation is used in "slow mode" only - when the EXCLUDE clause +** is not set to the default value "NO OTHERS". +*/ +struct NthValueCtx { + i64 nStep; + sqlite3_value *pValue; +}; +static void nth_valueStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct NthValueCtx *p; + p = (struct NthValueCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + i64 iVal; + switch( sqlite3_value_numeric_type(apArg[1]) ){ + case SQLITE_INTEGER: + iVal = sqlite3_value_int64(apArg[1]); + break; + case SQLITE_FLOAT: { + double fVal = sqlite3_value_double(apArg[1]); + if( ((i64)fVal)!=fVal ) goto error_out; + iVal = (i64)fVal; + break; + } + default: + goto error_out; + } + if( iVal<=0 ) goto error_out; + + p->nStep++; + if( iVal==p->nStep ){ + p->pValue = sqlite3_value_dup(apArg[0]); + if( !p->pValue ){ + sqlite3_result_error_nomem(pCtx); + } + } + } + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); + return; + + error_out: + sqlite3_result_error( + pCtx, "second argument to nth_value must be a positive integer", -1 + ); +} +static void nth_valueFinalizeFunc(sqlite3_context *pCtx){ + struct NthValueCtx *p; + p = (struct NthValueCtx*)sqlite3_aggregate_context(pCtx, 0); + if( p && p->pValue ){ + sqlite3_result_value(pCtx, p->pValue); + sqlite3_value_free(p->pValue); + p->pValue = 0; + } +} +#define nth_valueInvFunc noopStepFunc +#define nth_valueValueFunc noopValueFunc + +static void first_valueStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct NthValueCtx *p; + p = (struct NthValueCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p && p->pValue==0 ){ + p->pValue = sqlite3_value_dup(apArg[0]); + if( !p->pValue ){ + sqlite3_result_error_nomem(pCtx); + } + } + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); +} +static void first_valueFinalizeFunc(sqlite3_context *pCtx){ + struct NthValueCtx *p; + p = (struct NthValueCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p && p->pValue ){ + sqlite3_result_value(pCtx, p->pValue); + sqlite3_value_free(p->pValue); + p->pValue = 0; + } +} +#define first_valueInvFunc noopStepFunc +#define first_valueValueFunc noopValueFunc + +/* +** Implementation of built-in window function rank(). Assumes that +** the window frame has been set to: +** +** RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +*/ +static void rankStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct CallCount *p; + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + p->nStep++; + if( p->nValue==0 ){ + p->nValue = p->nStep; + } + } + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); +} +static void rankValueFunc(sqlite3_context *pCtx){ + struct CallCount *p; + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + sqlite3_result_int64(pCtx, p->nValue); + p->nValue = 0; + } +} + +/* +** Implementation of built-in window function percent_rank(). Assumes that +** the window frame has been set to: +** +** GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +*/ +static void percent_rankStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct CallCount *p; + UNUSED_PARAMETER(nArg); assert( nArg==0 ); + UNUSED_PARAMETER(apArg); + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + p->nTotal++; + } +} +static void percent_rankInvFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct CallCount *p; + UNUSED_PARAMETER(nArg); assert( nArg==0 ); + UNUSED_PARAMETER(apArg); + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + p->nStep++; +} +static void percent_rankValueFunc(sqlite3_context *pCtx){ + struct CallCount *p; + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + p->nValue = p->nStep; + if( p->nTotal>1 ){ + double r = (double)p->nValue / (double)(p->nTotal-1); + sqlite3_result_double(pCtx, r); + }else{ + sqlite3_result_double(pCtx, 0.0); + } + } +} +#define percent_rankFinalizeFunc percent_rankValueFunc + +/* +** Implementation of built-in window function cume_dist(). Assumes that +** the window frame has been set to: +** +** GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING +*/ +static void cume_distStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct CallCount *p; + UNUSED_PARAMETER(nArg); assert( nArg==0 ); + UNUSED_PARAMETER(apArg); + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + p->nTotal++; + } +} +static void cume_distInvFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct CallCount *p; + UNUSED_PARAMETER(nArg); assert( nArg==0 ); + UNUSED_PARAMETER(apArg); + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + p->nStep++; +} +static void cume_distValueFunc(sqlite3_context *pCtx){ + struct CallCount *p; + p = (struct CallCount*)sqlite3_aggregate_context(pCtx, 0); + if( p ){ + double r = (double)(p->nStep) / (double)(p->nTotal); + sqlite3_result_double(pCtx, r); + } +} +#define cume_distFinalizeFunc cume_distValueFunc + +/* +** Context object for ntile() window function. +*/ +struct NtileCtx { + i64 nTotal; /* Total rows in partition */ + i64 nParam; /* Parameter passed to ntile(N) */ + i64 iRow; /* Current row */ +}; + +/* +** Implementation of ntile(). This assumes that the window frame has +** been coerced to: +** +** ROWS CURRENT ROW AND UNBOUNDED FOLLOWING +*/ +static void ntileStepFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct NtileCtx *p; + assert( nArg==1 ); UNUSED_PARAMETER(nArg); + p = (struct NtileCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p ){ + if( p->nTotal==0 ){ + p->nParam = sqlite3_value_int64(apArg[0]); + if( p->nParam<=0 ){ + sqlite3_result_error( + pCtx, "argument of ntile must be a positive integer", -1 + ); + } + } + p->nTotal++; + } +} +static void ntileInvFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct NtileCtx *p; + assert( nArg==1 ); UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); + p = (struct NtileCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + p->iRow++; +} +static void ntileValueFunc(sqlite3_context *pCtx){ + struct NtileCtx *p; + p = (struct NtileCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p && p->nParam>0 ){ + int nSize = (p->nTotal / p->nParam); + if( nSize==0 ){ + sqlite3_result_int64(pCtx, p->iRow+1); + }else{ + i64 nLarge = p->nTotal - p->nParam*nSize; + i64 iSmall = nLarge*(nSize+1); + i64 iRow = p->iRow; + + assert( (nLarge*(nSize+1) + (p->nParam-nLarge)*nSize)==p->nTotal ); + + if( iRowpVal); + p->pVal = sqlite3_value_dup(apArg[0]); + if( p->pVal==0 ){ + sqlite3_result_error_nomem(pCtx); + }else{ + p->nVal++; + } + } +} +static void last_valueInvFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + struct LastValueCtx *p; + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(apArg); + p = (struct LastValueCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( ALWAYS(p) ){ + p->nVal--; + if( p->nVal==0 ){ + sqlite3_value_free(p->pVal); + p->pVal = 0; + } + } +} +static void last_valueValueFunc(sqlite3_context *pCtx){ + struct LastValueCtx *p; + p = (struct LastValueCtx*)sqlite3_aggregate_context(pCtx, 0); + if( p && p->pVal ){ + sqlite3_result_value(pCtx, p->pVal); + } +} +static void last_valueFinalizeFunc(sqlite3_context *pCtx){ + struct LastValueCtx *p; + p = (struct LastValueCtx*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p && p->pVal ){ + sqlite3_result_value(pCtx, p->pVal); + sqlite3_value_free(p->pVal); + p->pVal = 0; + } +} + +/* +** Static names for the built-in window function names. These static +** names are used, rather than string literals, so that FuncDef objects +** can be associated with a particular window function by direct +** comparison of the zName pointer. Example: +** +** if( pFuncDef->zName==row_valueName ){ ... } +*/ +static const char row_numberName[] = "row_number"; +static const char dense_rankName[] = "dense_rank"; +static const char rankName[] = "rank"; +static const char percent_rankName[] = "percent_rank"; +static const char cume_distName[] = "cume_dist"; +static const char ntileName[] = "ntile"; +static const char last_valueName[] = "last_value"; +static const char nth_valueName[] = "nth_value"; +static const char first_valueName[] = "first_value"; +static const char leadName[] = "lead"; +static const char lagName[] = "lag"; + +/* +** No-op implementations of xStep() and xFinalize(). Used as place-holders +** for built-in window functions that never call those interfaces. +** +** The noopValueFunc() is called but is expected to do nothing. The +** noopStepFunc() is never called, and so it is marked with NO_TEST to +** let the test coverage routine know not to expect this function to be +** invoked. +*/ +static void noopStepFunc( /*NO_TEST*/ + sqlite3_context *p, /*NO_TEST*/ + int n, /*NO_TEST*/ + sqlite3_value **a /*NO_TEST*/ +){ /*NO_TEST*/ + UNUSED_PARAMETER(p); /*NO_TEST*/ + UNUSED_PARAMETER(n); /*NO_TEST*/ + UNUSED_PARAMETER(a); /*NO_TEST*/ + assert(0); /*NO_TEST*/ +} /*NO_TEST*/ +static void noopValueFunc(sqlite3_context *p){ UNUSED_PARAMETER(p); /*no-op*/ } + +/* Window functions that use all window interfaces: xStep, xFinal, +** xValue, and xInverse */ +#define WINDOWFUNCALL(name,nArg,extra) { \ + nArg, (SQLITE_FUNC_BUILTIN|SQLITE_UTF8|SQLITE_FUNC_WINDOW|extra), 0, 0, \ + name ## StepFunc, name ## FinalizeFunc, name ## ValueFunc, \ + name ## InvFunc, name ## Name, {0} \ +} + +/* Window functions that are implemented using bytecode and thus have +** no-op routines for their methods */ +#define WINDOWFUNCNOOP(name,nArg,extra) { \ + nArg, (SQLITE_FUNC_BUILTIN|SQLITE_UTF8|SQLITE_FUNC_WINDOW|extra), 0, 0, \ + noopStepFunc, noopValueFunc, noopValueFunc, \ + noopStepFunc, name ## Name, {0} \ +} + +/* Window functions that use all window interfaces: xStep, the +** same routine for xFinalize and xValue and which never call +** xInverse. */ +#define WINDOWFUNCX(name,nArg,extra) { \ + nArg, (SQLITE_FUNC_BUILTIN|SQLITE_UTF8|SQLITE_FUNC_WINDOW|extra), 0, 0, \ + name ## StepFunc, name ## ValueFunc, name ## ValueFunc, \ + noopStepFunc, name ## Name, {0} \ +} + + +/* +** Register those built-in window functions that are not also aggregates. +*/ +SQLITE_PRIVATE void sqlite3WindowFunctions(void){ + static FuncDef aWindowFuncs[] = { + WINDOWFUNCX(row_number, 0, 0), + WINDOWFUNCX(dense_rank, 0, 0), + WINDOWFUNCX(rank, 0, 0), + WINDOWFUNCALL(percent_rank, 0, 0), + WINDOWFUNCALL(cume_dist, 0, 0), + WINDOWFUNCALL(ntile, 1, 0), + WINDOWFUNCALL(last_value, 1, 0), + WINDOWFUNCALL(nth_value, 2, 0), + WINDOWFUNCALL(first_value, 1, 0), + WINDOWFUNCNOOP(lead, 1, 0), + WINDOWFUNCNOOP(lead, 2, 0), + WINDOWFUNCNOOP(lead, 3, 0), + WINDOWFUNCNOOP(lag, 1, 0), + WINDOWFUNCNOOP(lag, 2, 0), + WINDOWFUNCNOOP(lag, 3, 0), + }; + sqlite3InsertBuiltinFuncs(aWindowFuncs, ArraySize(aWindowFuncs)); +} + +static Window *windowFind(Parse *pParse, Window *pList, const char *zName){ + Window *p; + for(p=pList; p; p=p->pNextWin){ + if( sqlite3StrICmp(p->zName, zName)==0 ) break; + } + if( p==0 ){ + sqlite3ErrorMsg(pParse, "no such window: %s", zName); + } + return p; +} + +/* +** This function is called immediately after resolving the function name +** for a window function within a SELECT statement. Argument pList is a +** linked list of WINDOW definitions for the current SELECT statement. +** Argument pFunc is the function definition just resolved and pWin +** is the Window object representing the associated OVER clause. This +** function updates the contents of pWin as follows: +** +** * If the OVER clause refered to a named window (as in "max(x) OVER win"), +** search list pList for a matching WINDOW definition, and update pWin +** accordingly. If no such WINDOW clause can be found, leave an error +** in pParse. +** +** * If the function is a built-in window function that requires the +** window to be coerced (see "BUILT-IN WINDOW FUNCTIONS" at the top +** of this file), pWin is updated here. +*/ +SQLITE_PRIVATE void sqlite3WindowUpdate( + Parse *pParse, + Window *pList, /* List of named windows for this SELECT */ + Window *pWin, /* Window frame to update */ + FuncDef *pFunc /* Window function definition */ +){ + if( pWin->zName && pWin->eFrmType==0 ){ + Window *p = windowFind(pParse, pList, pWin->zName); + if( p==0 ) return; + pWin->pPartition = sqlite3ExprListDup(pParse->db, p->pPartition, 0); + pWin->pOrderBy = sqlite3ExprListDup(pParse->db, p->pOrderBy, 0); + pWin->pStart = sqlite3ExprDup(pParse->db, p->pStart, 0); + pWin->pEnd = sqlite3ExprDup(pParse->db, p->pEnd, 0); + pWin->eStart = p->eStart; + pWin->eEnd = p->eEnd; + pWin->eFrmType = p->eFrmType; + pWin->eExclude = p->eExclude; + }else{ + sqlite3WindowChain(pParse, pWin, pList); + } + if( (pWin->eFrmType==TK_RANGE) + && (pWin->pStart || pWin->pEnd) + && (pWin->pOrderBy==0 || pWin->pOrderBy->nExpr!=1) + ){ + sqlite3ErrorMsg(pParse, + "RANGE with offset PRECEDING/FOLLOWING requires one ORDER BY expression" + ); + }else + if( pFunc->funcFlags & SQLITE_FUNC_WINDOW ){ + sqlite3 *db = pParse->db; + if( pWin->pFilter ){ + sqlite3ErrorMsg(pParse, + "FILTER clause may only be used with aggregate window functions" + ); + }else{ + struct WindowUpdate { + const char *zFunc; + int eFrmType; + int eStart; + int eEnd; + } aUp[] = { + { row_numberName, TK_ROWS, TK_UNBOUNDED, TK_CURRENT }, + { dense_rankName, TK_RANGE, TK_UNBOUNDED, TK_CURRENT }, + { rankName, TK_RANGE, TK_UNBOUNDED, TK_CURRENT }, + { percent_rankName, TK_GROUPS, TK_CURRENT, TK_UNBOUNDED }, + { cume_distName, TK_GROUPS, TK_FOLLOWING, TK_UNBOUNDED }, + { ntileName, TK_ROWS, TK_CURRENT, TK_UNBOUNDED }, + { leadName, TK_ROWS, TK_UNBOUNDED, TK_UNBOUNDED }, + { lagName, TK_ROWS, TK_UNBOUNDED, TK_CURRENT }, + }; + int i; + for(i=0; izName==aUp[i].zFunc ){ + sqlite3ExprDelete(db, pWin->pStart); + sqlite3ExprDelete(db, pWin->pEnd); + pWin->pEnd = pWin->pStart = 0; + pWin->eFrmType = aUp[i].eFrmType; + pWin->eStart = aUp[i].eStart; + pWin->eEnd = aUp[i].eEnd; + pWin->eExclude = 0; + if( pWin->eStart==TK_FOLLOWING ){ + pWin->pStart = sqlite3Expr(db, TK_INTEGER, "1"); + } + break; + } + } + } + } + pWin->pFunc = pFunc; +} + +/* +** Context object passed through sqlite3WalkExprList() to +** selectWindowRewriteExprCb() by selectWindowRewriteEList(). +*/ +typedef struct WindowRewrite WindowRewrite; +struct WindowRewrite { + Window *pWin; + SrcList *pSrc; + ExprList *pSub; + Table *pTab; + Select *pSubSelect; /* Current sub-select, if any */ +}; + +/* +** Callback function used by selectWindowRewriteEList(). If necessary, +** this function appends to the output expression-list and updates +** expression (*ppExpr) in place. +*/ +static int selectWindowRewriteExprCb(Walker *pWalker, Expr *pExpr){ + struct WindowRewrite *p = pWalker->u.pRewrite; + Parse *pParse = pWalker->pParse; + assert( p!=0 ); + assert( p->pWin!=0 ); + + /* If this function is being called from within a scalar sub-select + ** that used by the SELECT statement being processed, only process + ** TK_COLUMN expressions that refer to it (the outer SELECT). Do + ** not process aggregates or window functions at all, as they belong + ** to the scalar sub-select. */ + if( p->pSubSelect ){ + if( pExpr->op!=TK_COLUMN ){ + return WRC_Continue; + }else{ + int nSrc = p->pSrc->nSrc; + int i; + for(i=0; iiTable==p->pSrc->a[i].iCursor ) break; + } + if( i==nSrc ) return WRC_Continue; + } + } + + switch( pExpr->op ){ + + case TK_FUNCTION: + if( !ExprHasProperty(pExpr, EP_WinFunc) ){ + break; + }else{ + Window *pWin; + for(pWin=p->pWin; pWin; pWin=pWin->pNextWin){ + if( pExpr->y.pWin==pWin ){ + assert( pWin->pOwner==pExpr ); + return WRC_Prune; + } + } + } + /* no break */ deliberate_fall_through + + case TK_AGG_FUNCTION: + case TK_COLUMN: { + int iCol = -1; + if( pParse->db->mallocFailed ) return WRC_Abort; + if( p->pSub ){ + int i; + for(i=0; ipSub->nExpr; i++){ + if( 0==sqlite3ExprCompare(0, p->pSub->a[i].pExpr, pExpr, -1) ){ + iCol = i; + break; + } + } + } + if( iCol<0 ){ + Expr *pDup = sqlite3ExprDup(pParse->db, pExpr, 0); + if( pDup && pDup->op==TK_AGG_FUNCTION ) pDup->op = TK_FUNCTION; + p->pSub = sqlite3ExprListAppend(pParse, p->pSub, pDup); + } + if( p->pSub ){ + int f = pExpr->flags & EP_Collate; + assert( ExprHasProperty(pExpr, EP_Static)==0 ); + ExprSetProperty(pExpr, EP_Static); + sqlite3ExprDelete(pParse->db, pExpr); + ExprClearProperty(pExpr, EP_Static); + memset(pExpr, 0, sizeof(Expr)); + + pExpr->op = TK_COLUMN; + pExpr->iColumn = (iCol<0 ? p->pSub->nExpr-1: iCol); + pExpr->iTable = p->pWin->iEphCsr; + pExpr->y.pTab = p->pTab; + pExpr->flags = f; + } + if( pParse->db->mallocFailed ) return WRC_Abort; + break; + } + + default: /* no-op */ + break; + } + + return WRC_Continue; +} +static int selectWindowRewriteSelectCb(Walker *pWalker, Select *pSelect){ + struct WindowRewrite *p = pWalker->u.pRewrite; + Select *pSave = p->pSubSelect; + if( pSave==pSelect ){ + return WRC_Continue; + }else{ + p->pSubSelect = pSelect; + sqlite3WalkSelect(pWalker, pSelect); + p->pSubSelect = pSave; + } + return WRC_Prune; +} + + +/* +** Iterate through each expression in expression-list pEList. For each: +** +** * TK_COLUMN, +** * aggregate function, or +** * window function with a Window object that is not a member of the +** Window list passed as the second argument (pWin). +** +** Append the node to output expression-list (*ppSub). And replace it +** with a TK_COLUMN that reads the (N-1)th element of table +** pWin->iEphCsr, where N is the number of elements in (*ppSub) after +** appending the new one. +*/ +static void selectWindowRewriteEList( + Parse *pParse, + Window *pWin, + SrcList *pSrc, + ExprList *pEList, /* Rewrite expressions in this list */ + Table *pTab, + ExprList **ppSub /* IN/OUT: Sub-select expression-list */ +){ + Walker sWalker; + WindowRewrite sRewrite; + + assert( pWin!=0 ); + memset(&sWalker, 0, sizeof(Walker)); + memset(&sRewrite, 0, sizeof(WindowRewrite)); + + sRewrite.pSub = *ppSub; + sRewrite.pWin = pWin; + sRewrite.pSrc = pSrc; + sRewrite.pTab = pTab; + + sWalker.pParse = pParse; + sWalker.xExprCallback = selectWindowRewriteExprCb; + sWalker.xSelectCallback = selectWindowRewriteSelectCb; + sWalker.u.pRewrite = &sRewrite; + + (void)sqlite3WalkExprList(&sWalker, pEList); + + *ppSub = sRewrite.pSub; +} + +/* +** Append a copy of each expression in expression-list pAppend to +** expression list pList. Return a pointer to the result list. +*/ +static ExprList *exprListAppendList( + Parse *pParse, /* Parsing context */ + ExprList *pList, /* List to which to append. Might be NULL */ + ExprList *pAppend, /* List of values to append. Might be NULL */ + int bIntToNull +){ + if( pAppend ){ + int i; + int nInit = pList ? pList->nExpr : 0; + for(i=0; inExpr; i++){ + sqlite3 *db = pParse->db; + Expr *pDup = sqlite3ExprDup(db, pAppend->a[i].pExpr, 0); + assert( pDup==0 || !ExprHasProperty(pDup, EP_MemToken) ); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pDup); + break; + } + if( bIntToNull ){ + int iDummy; + Expr *pSub; + pSub = sqlite3ExprSkipCollateAndLikely(pDup); + if( sqlite3ExprIsInteger(pSub, &iDummy) ){ + pSub->op = TK_NULL; + pSub->flags &= ~(EP_IntValue|EP_IsTrue|EP_IsFalse); + pSub->u.zToken = 0; + } + } + pList = sqlite3ExprListAppend(pParse, pList, pDup); + if( pList ) pList->a[nInit+i].sortFlags = pAppend->a[i].sortFlags; + } + } + return pList; +} + +/* +** When rewriting a query, if the new subquery in the FROM clause +** contains TK_AGG_FUNCTION nodes that refer to an outer query, +** then we have to increase the Expr->op2 values of those nodes +** due to the extra subquery layer that was added. +** +** See also the incrAggDepth() routine in resolve.c +*/ +static int sqlite3WindowExtraAggFuncDepth(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_AGG_FUNCTION + && pExpr->op2>=pWalker->walkerDepth + ){ + pExpr->op2++; + } + return WRC_Continue; +} + +static int disallowAggregatesInOrderByCb(Walker *pWalker, Expr *pExpr){ + if( pExpr->op==TK_AGG_FUNCTION && pExpr->pAggInfo==0 ){ + assert( !ExprHasProperty(pExpr, EP_IntValue) ); + sqlite3ErrorMsg(pWalker->pParse, + "misuse of aggregate: %s()", pExpr->u.zToken); + } + return WRC_Continue; +} + +/* +** If the SELECT statement passed as the second argument does not invoke +** any SQL window functions, this function is a no-op. Otherwise, it +** rewrites the SELECT statement so that window function xStep functions +** are invoked in the correct order as described under "SELECT REWRITING" +** at the top of this file. +*/ +SQLITE_PRIVATE int sqlite3WindowRewrite(Parse *pParse, Select *p){ + int rc = SQLITE_OK; + if( p->pWin + && p->pPrior==0 + && ALWAYS((p->selFlags & SF_WinRewrite)==0) + && ALWAYS(!IN_RENAME_OBJECT) + ){ + Vdbe *v = sqlite3GetVdbe(pParse); + sqlite3 *db = pParse->db; + Select *pSub = 0; /* The subquery */ + SrcList *pSrc = p->pSrc; + Expr *pWhere = p->pWhere; + ExprList *pGroupBy = p->pGroupBy; + Expr *pHaving = p->pHaving; + ExprList *pSort = 0; + + ExprList *pSublist = 0; /* Expression list for sub-query */ + Window *pMWin = p->pWin; /* Main window object */ + Window *pWin; /* Window object iterator */ + Table *pTab; + Walker w; + + u32 selFlags = p->selFlags; + + pTab = sqlite3DbMallocZero(db, sizeof(Table)); + if( pTab==0 ){ + return sqlite3ErrorToParser(db, SQLITE_NOMEM); + } + sqlite3AggInfoPersistWalkerInit(&w, pParse); + sqlite3WalkSelect(&w, p); + if( (p->selFlags & SF_Aggregate)==0 ){ + w.xExprCallback = disallowAggregatesInOrderByCb; + w.xSelectCallback = 0; + sqlite3WalkExprList(&w, p->pOrderBy); + } + + p->pSrc = 0; + p->pWhere = 0; + p->pGroupBy = 0; + p->pHaving = 0; + p->selFlags &= ~SF_Aggregate; + p->selFlags |= SF_WinRewrite; + + /* Create the ORDER BY clause for the sub-select. This is the concatenation + ** of the window PARTITION and ORDER BY clauses. Then, if this makes it + ** redundant, remove the ORDER BY from the parent SELECT. */ + pSort = exprListAppendList(pParse, 0, pMWin->pPartition, 1); + pSort = exprListAppendList(pParse, pSort, pMWin->pOrderBy, 1); + if( pSort && p->pOrderBy && p->pOrderBy->nExpr<=pSort->nExpr ){ + int nSave = pSort->nExpr; + pSort->nExpr = p->pOrderBy->nExpr; + if( sqlite3ExprListCompare(pSort, p->pOrderBy, -1)==0 ){ + sqlite3ExprListDelete(db, p->pOrderBy); + p->pOrderBy = 0; + } + pSort->nExpr = nSave; + } + + /* Assign a cursor number for the ephemeral table used to buffer rows. + ** The OpenEphemeral instruction is coded later, after it is known how + ** many columns the table will have. */ + pMWin->iEphCsr = pParse->nTab++; + pParse->nTab += 3; + + selectWindowRewriteEList(pParse, pMWin, pSrc, p->pEList, pTab, &pSublist); + selectWindowRewriteEList(pParse, pMWin, pSrc, p->pOrderBy, pTab, &pSublist); + pMWin->nBufferCol = (pSublist ? pSublist->nExpr : 0); + + /* Append the PARTITION BY and ORDER BY expressions to the to the + ** sub-select expression list. They are required to figure out where + ** boundaries for partitions and sets of peer rows lie. */ + pSublist = exprListAppendList(pParse, pSublist, pMWin->pPartition, 0); + pSublist = exprListAppendList(pParse, pSublist, pMWin->pOrderBy, 0); + + /* Append the arguments passed to each window function to the + ** sub-select expression list. Also allocate two registers for each + ** window function - one for the accumulator, another for interim + ** results. */ + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + ExprList *pArgs; + assert( ExprUseXList(pWin->pOwner) ); + assert( pWin->pFunc!=0 ); + pArgs = pWin->pOwner->x.pList; + if( pWin->pFunc->funcFlags & SQLITE_FUNC_SUBTYPE ){ + selectWindowRewriteEList(pParse, pMWin, pSrc, pArgs, pTab, &pSublist); + pWin->iArgCol = (pSublist ? pSublist->nExpr : 0); + pWin->bExprArgs = 1; + }else{ + pWin->iArgCol = (pSublist ? pSublist->nExpr : 0); + pSublist = exprListAppendList(pParse, pSublist, pArgs, 0); + } + if( pWin->pFilter ){ + Expr *pFilter = sqlite3ExprDup(db, pWin->pFilter, 0); + pSublist = sqlite3ExprListAppend(pParse, pSublist, pFilter); + } + pWin->regAccum = ++pParse->nMem; + pWin->regResult = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regAccum); + } + + /* If there is no ORDER BY or PARTITION BY clause, and the window + ** function accepts zero arguments, and there are no other columns + ** selected (e.g. "SELECT row_number() OVER () FROM t1"), it is possible + ** that pSublist is still NULL here. Add a constant expression here to + ** keep everything legal in this case. + */ + if( pSublist==0 ){ + pSublist = sqlite3ExprListAppend(pParse, 0, + sqlite3Expr(db, TK_INTEGER, "0") + ); + } + + pSub = sqlite3SelectNew( + pParse, pSublist, pSrc, pWhere, pGroupBy, pHaving, pSort, 0, 0 + ); + SELECTTRACE(1,pParse,pSub, + ("New window-function subquery in FROM clause of (%u/%p)\n", + p->selId, p)); + p->pSrc = sqlite3SrcListAppend(pParse, 0, 0, 0); + assert( pSub!=0 || p->pSrc==0 ); /* Due to db->mallocFailed test inside + ** of sqlite3DbMallocRawNN() called from + ** sqlite3SrcListAppend() */ + if( p->pSrc ){ + Table *pTab2; + p->pSrc->a[0].pSelect = pSub; + sqlite3SrcListAssignCursors(pParse, p->pSrc); + pSub->selFlags |= SF_Expanded|SF_OrderByReqd; + pTab2 = sqlite3ResultSetOfSelect(pParse, pSub, SQLITE_AFF_NONE); + pSub->selFlags |= (selFlags & SF_Aggregate); + if( pTab2==0 ){ + /* Might actually be some other kind of error, but in that case + ** pParse->nErr will be set, so if SQLITE_NOMEM is set, we will get + ** the correct error message regardless. */ + rc = SQLITE_NOMEM; + }else{ + memcpy(pTab, pTab2, sizeof(Table)); + pTab->tabFlags |= TF_Ephemeral; + p->pSrc->a[0].pTab = pTab; + pTab = pTab2; + memset(&w, 0, sizeof(w)); + w.xExprCallback = sqlite3WindowExtraAggFuncDepth; + w.xSelectCallback = sqlite3WalkerDepthIncrease; + w.xSelectCallback2 = sqlite3WalkerDepthDecrease; + sqlite3WalkSelect(&w, pSub); + } + }else{ + sqlite3SelectDelete(db, pSub); + } + if( db->mallocFailed ) rc = SQLITE_NOMEM; + + /* Defer deleting the temporary table pTab because if an error occurred, + ** there could still be references to that table embedded in the + ** result-set or ORDER BY clause of the SELECT statement p. */ + sqlite3ParserAddCleanup(pParse, sqlite3DbFree, pTab); + } + + assert( rc==SQLITE_OK || pParse->nErr!=0 ); + return rc; +} + +/* +** Unlink the Window object from the Select to which it is attached, +** if it is attached. +*/ +SQLITE_PRIVATE void sqlite3WindowUnlinkFromSelect(Window *p){ + if( p->ppThis ){ + *p->ppThis = p->pNextWin; + if( p->pNextWin ) p->pNextWin->ppThis = p->ppThis; + p->ppThis = 0; + } +} + +/* +** Free the Window object passed as the second argument. +*/ +SQLITE_PRIVATE void sqlite3WindowDelete(sqlite3 *db, Window *p){ + if( p ){ + sqlite3WindowUnlinkFromSelect(p); + sqlite3ExprDelete(db, p->pFilter); + sqlite3ExprListDelete(db, p->pPartition); + sqlite3ExprListDelete(db, p->pOrderBy); + sqlite3ExprDelete(db, p->pEnd); + sqlite3ExprDelete(db, p->pStart); + sqlite3DbFree(db, p->zName); + sqlite3DbFree(db, p->zBase); + sqlite3DbFree(db, p); + } +} + +/* +** Free the linked list of Window objects starting at the second argument. +*/ +SQLITE_PRIVATE void sqlite3WindowListDelete(sqlite3 *db, Window *p){ + while( p ){ + Window *pNext = p->pNextWin; + sqlite3WindowDelete(db, p); + p = pNext; + } +} + +/* +** The argument expression is an PRECEDING or FOLLOWING offset. The +** value should be a non-negative integer. If the value is not a +** constant, change it to NULL. The fact that it is then a non-negative +** integer will be caught later. But it is important not to leave +** variable values in the expression tree. +*/ +static Expr *sqlite3WindowOffsetExpr(Parse *pParse, Expr *pExpr){ + if( 0==sqlite3ExprIsConstant(pExpr) ){ + if( IN_RENAME_OBJECT ) sqlite3RenameExprUnmap(pParse, pExpr); + sqlite3ExprDelete(pParse->db, pExpr); + pExpr = sqlite3ExprAlloc(pParse->db, TK_NULL, 0, 0); + } + return pExpr; +} + +/* +** Allocate and return a new Window object describing a Window Definition. +*/ +SQLITE_PRIVATE Window *sqlite3WindowAlloc( + Parse *pParse, /* Parsing context */ + int eType, /* Frame type. TK_RANGE, TK_ROWS, TK_GROUPS, or 0 */ + int eStart, /* Start type: CURRENT, PRECEDING, FOLLOWING, UNBOUNDED */ + Expr *pStart, /* Start window size if TK_PRECEDING or FOLLOWING */ + int eEnd, /* End type: CURRENT, FOLLOWING, TK_UNBOUNDED, PRECEDING */ + Expr *pEnd, /* End window size if TK_FOLLOWING or PRECEDING */ + u8 eExclude /* EXCLUDE clause */ +){ + Window *pWin = 0; + int bImplicitFrame = 0; + + /* Parser assures the following: */ + assert( eType==0 || eType==TK_RANGE || eType==TK_ROWS || eType==TK_GROUPS ); + assert( eStart==TK_CURRENT || eStart==TK_PRECEDING + || eStart==TK_UNBOUNDED || eStart==TK_FOLLOWING ); + assert( eEnd==TK_CURRENT || eEnd==TK_FOLLOWING + || eEnd==TK_UNBOUNDED || eEnd==TK_PRECEDING ); + assert( (eStart==TK_PRECEDING || eStart==TK_FOLLOWING)==(pStart!=0) ); + assert( (eEnd==TK_FOLLOWING || eEnd==TK_PRECEDING)==(pEnd!=0) ); + + if( eType==0 ){ + bImplicitFrame = 1; + eType = TK_RANGE; + } + + /* Additionally, the + ** starting boundary type may not occur earlier in the following list than + ** the ending boundary type: + ** + ** UNBOUNDED PRECEDING + ** PRECEDING + ** CURRENT ROW + ** FOLLOWING + ** UNBOUNDED FOLLOWING + ** + ** The parser ensures that "UNBOUNDED PRECEDING" cannot be used as an ending + ** boundary, and than "UNBOUNDED FOLLOWING" cannot be used as a starting + ** frame boundary. + */ + if( (eStart==TK_CURRENT && eEnd==TK_PRECEDING) + || (eStart==TK_FOLLOWING && (eEnd==TK_PRECEDING || eEnd==TK_CURRENT)) + ){ + sqlite3ErrorMsg(pParse, "unsupported frame specification"); + goto windowAllocErr; + } + + pWin = (Window*)sqlite3DbMallocZero(pParse->db, sizeof(Window)); + if( pWin==0 ) goto windowAllocErr; + pWin->eFrmType = eType; + pWin->eStart = eStart; + pWin->eEnd = eEnd; + if( eExclude==0 && OptimizationDisabled(pParse->db, SQLITE_WindowFunc) ){ + eExclude = TK_NO; + } + pWin->eExclude = eExclude; + pWin->bImplicitFrame = bImplicitFrame; + pWin->pEnd = sqlite3WindowOffsetExpr(pParse, pEnd); + pWin->pStart = sqlite3WindowOffsetExpr(pParse, pStart); + return pWin; + +windowAllocErr: + sqlite3ExprDelete(pParse->db, pEnd); + sqlite3ExprDelete(pParse->db, pStart); + return 0; +} + +/* +** Attach PARTITION and ORDER BY clauses pPartition and pOrderBy to window +** pWin. Also, if parameter pBase is not NULL, set pWin->zBase to the +** equivalent nul-terminated string. +*/ +SQLITE_PRIVATE Window *sqlite3WindowAssemble( + Parse *pParse, + Window *pWin, + ExprList *pPartition, + ExprList *pOrderBy, + Token *pBase +){ + if( pWin ){ + pWin->pPartition = pPartition; + pWin->pOrderBy = pOrderBy; + if( pBase ){ + pWin->zBase = sqlite3DbStrNDup(pParse->db, pBase->z, pBase->n); + } + }else{ + sqlite3ExprListDelete(pParse->db, pPartition); + sqlite3ExprListDelete(pParse->db, pOrderBy); + } + return pWin; +} + +/* +** Window *pWin has just been created from a WINDOW clause. Tokne pBase +** is the base window. Earlier windows from the same WINDOW clause are +** stored in the linked list starting at pWin->pNextWin. This function +** either updates *pWin according to the base specification, or else +** leaves an error in pParse. +*/ +SQLITE_PRIVATE void sqlite3WindowChain(Parse *pParse, Window *pWin, Window *pList){ + if( pWin->zBase ){ + sqlite3 *db = pParse->db; + Window *pExist = windowFind(pParse, pList, pWin->zBase); + if( pExist ){ + const char *zErr = 0; + /* Check for errors */ + if( pWin->pPartition ){ + zErr = "PARTITION clause"; + }else if( pExist->pOrderBy && pWin->pOrderBy ){ + zErr = "ORDER BY clause"; + }else if( pExist->bImplicitFrame==0 ){ + zErr = "frame specification"; + } + if( zErr ){ + sqlite3ErrorMsg(pParse, + "cannot override %s of window: %s", zErr, pWin->zBase + ); + }else{ + pWin->pPartition = sqlite3ExprListDup(db, pExist->pPartition, 0); + if( pExist->pOrderBy ){ + assert( pWin->pOrderBy==0 ); + pWin->pOrderBy = sqlite3ExprListDup(db, pExist->pOrderBy, 0); + } + sqlite3DbFree(db, pWin->zBase); + pWin->zBase = 0; + } + } + } +} + +/* +** Attach window object pWin to expression p. +*/ +SQLITE_PRIVATE void sqlite3WindowAttach(Parse *pParse, Expr *p, Window *pWin){ + if( p ){ + assert( p->op==TK_FUNCTION ); + assert( pWin ); + p->y.pWin = pWin; + ExprSetProperty(p, EP_WinFunc); + pWin->pOwner = p; + if( (p->flags & EP_Distinct) && pWin->eFrmType!=TK_FILTER ){ + sqlite3ErrorMsg(pParse, + "DISTINCT is not supported for window functions" + ); + } + }else{ + sqlite3WindowDelete(pParse->db, pWin); + } +} + +/* +** Possibly link window pWin into the list at pSel->pWin (window functions +** to be processed as part of SELECT statement pSel). The window is linked +** in if either (a) there are no other windows already linked to this +** SELECT, or (b) the windows already linked use a compatible window frame. +*/ +SQLITE_PRIVATE void sqlite3WindowLink(Select *pSel, Window *pWin){ + if( pSel ){ + if( 0==pSel->pWin || 0==sqlite3WindowCompare(0, pSel->pWin, pWin, 0) ){ + pWin->pNextWin = pSel->pWin; + if( pSel->pWin ){ + pSel->pWin->ppThis = &pWin->pNextWin; + } + pSel->pWin = pWin; + pWin->ppThis = &pSel->pWin; + }else{ + if( sqlite3ExprListCompare(pWin->pPartition, pSel->pWin->pPartition,-1) ){ + pSel->selFlags |= SF_MultiPart; + } + } + } +} + +/* +** Return 0 if the two window objects are identical, 1 if they are +** different, or 2 if it cannot be determined if the objects are identical +** or not. Identical window objects can be processed in a single scan. +*/ +SQLITE_PRIVATE int sqlite3WindowCompare( + const Parse *pParse, + const Window *p1, + const Window *p2, + int bFilter +){ + int res; + if( NEVER(p1==0) || NEVER(p2==0) ) return 1; + if( p1->eFrmType!=p2->eFrmType ) return 1; + if( p1->eStart!=p2->eStart ) return 1; + if( p1->eEnd!=p2->eEnd ) return 1; + if( p1->eExclude!=p2->eExclude ) return 1; + if( sqlite3ExprCompare(pParse, p1->pStart, p2->pStart, -1) ) return 1; + if( sqlite3ExprCompare(pParse, p1->pEnd, p2->pEnd, -1) ) return 1; + if( (res = sqlite3ExprListCompare(p1->pPartition, p2->pPartition, -1)) ){ + return res; + } + if( (res = sqlite3ExprListCompare(p1->pOrderBy, p2->pOrderBy, -1)) ){ + return res; + } + if( bFilter ){ + if( (res = sqlite3ExprCompare(pParse, p1->pFilter, p2->pFilter, -1)) ){ + return res; + } + } + return 0; +} + + +/* +** This is called by code in select.c before it calls sqlite3WhereBegin() +** to begin iterating through the sub-query results. It is used to allocate +** and initialize registers and cursors used by sqlite3WindowCodeStep(). +*/ +SQLITE_PRIVATE void sqlite3WindowCodeInit(Parse *pParse, Select *pSelect){ + int nEphExpr = pSelect->pSrc->a[0].pSelect->pEList->nExpr; + Window *pMWin = pSelect->pWin; + Window *pWin; + Vdbe *v = sqlite3GetVdbe(pParse); + + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pMWin->iEphCsr, nEphExpr); + sqlite3VdbeAddOp2(v, OP_OpenDup, pMWin->iEphCsr+1, pMWin->iEphCsr); + sqlite3VdbeAddOp2(v, OP_OpenDup, pMWin->iEphCsr+2, pMWin->iEphCsr); + sqlite3VdbeAddOp2(v, OP_OpenDup, pMWin->iEphCsr+3, pMWin->iEphCsr); + + /* Allocate registers to use for PARTITION BY values, if any. Initialize + ** said registers to NULL. */ + if( pMWin->pPartition ){ + int nExpr = pMWin->pPartition->nExpr; + pMWin->regPart = pParse->nMem+1; + pParse->nMem += nExpr; + sqlite3VdbeAddOp3(v, OP_Null, 0, pMWin->regPart, pMWin->regPart+nExpr-1); + } + + pMWin->regOne = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 1, pMWin->regOne); + + if( pMWin->eExclude ){ + pMWin->regStartRowid = ++pParse->nMem; + pMWin->regEndRowid = ++pParse->nMem; + pMWin->csrApp = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_Integer, 1, pMWin->regStartRowid); + sqlite3VdbeAddOp2(v, OP_Integer, 0, pMWin->regEndRowid); + sqlite3VdbeAddOp2(v, OP_OpenDup, pMWin->csrApp, pMWin->iEphCsr); + return; + } + + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + FuncDef *p = pWin->pFunc; + if( (p->funcFlags & SQLITE_FUNC_MINMAX) && pWin->eStart!=TK_UNBOUNDED ){ + /* The inline versions of min() and max() require a single ephemeral + ** table and 3 registers. The registers are used as follows: + ** + ** regApp+0: slot to copy min()/max() argument to for MakeRecord + ** regApp+1: integer value used to ensure keys are unique + ** regApp+2: output of MakeRecord + */ + ExprList *pList; + KeyInfo *pKeyInfo; + assert( ExprUseXList(pWin->pOwner) ); + pList = pWin->pOwner->x.pList; + pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pList, 0, 0); + pWin->csrApp = pParse->nTab++; + pWin->regApp = pParse->nMem+1; + pParse->nMem += 3; + if( pKeyInfo && pWin->pFunc->zName[1]=='i' ){ + assert( pKeyInfo->aSortFlags[0]==0 ); + pKeyInfo->aSortFlags[0] = KEYINFO_ORDER_DESC; + } + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pWin->csrApp, 2); + sqlite3VdbeAppendP4(v, pKeyInfo, P4_KEYINFO); + sqlite3VdbeAddOp2(v, OP_Integer, 0, pWin->regApp+1); + } + else if( p->zName==nth_valueName || p->zName==first_valueName ){ + /* Allocate two registers at pWin->regApp. These will be used to + ** store the start and end index of the current frame. */ + pWin->regApp = pParse->nMem+1; + pWin->csrApp = pParse->nTab++; + pParse->nMem += 2; + sqlite3VdbeAddOp2(v, OP_OpenDup, pWin->csrApp, pMWin->iEphCsr); + } + else if( p->zName==leadName || p->zName==lagName ){ + pWin->csrApp = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenDup, pWin->csrApp, pMWin->iEphCsr); + } + } +} + +#define WINDOW_STARTING_INT 0 +#define WINDOW_ENDING_INT 1 +#define WINDOW_NTH_VALUE_INT 2 +#define WINDOW_STARTING_NUM 3 +#define WINDOW_ENDING_NUM 4 + +/* +** A "PRECEDING " (eCond==0) or "FOLLOWING " (eCond==1) or the +** value of the second argument to nth_value() (eCond==2) has just been +** evaluated and the result left in register reg. This function generates VM +** code to check that the value is a non-negative integer and throws an +** exception if it is not. +*/ +static void windowCheckValue(Parse *pParse, int reg, int eCond){ + static const char *azErr[] = { + "frame starting offset must be a non-negative integer", + "frame ending offset must be a non-negative integer", + "second argument to nth_value must be a positive integer", + "frame starting offset must be a non-negative number", + "frame ending offset must be a non-negative number", + }; + static int aOp[] = { OP_Ge, OP_Ge, OP_Gt, OP_Ge, OP_Ge }; + Vdbe *v = sqlite3GetVdbe(pParse); + int regZero = sqlite3GetTempReg(pParse); + assert( eCond>=0 && eCond=WINDOW_STARTING_NUM ){ + int regString = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp4(v, OP_String8, 0, regString, 0, "", P4_STATIC); + sqlite3VdbeAddOp3(v, OP_Ge, regString, sqlite3VdbeCurrentAddr(v)+2, reg); + sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC|SQLITE_JUMPIFNULL); + VdbeCoverage(v); + assert( eCond==3 || eCond==4 ); + VdbeCoverageIf(v, eCond==3); + VdbeCoverageIf(v, eCond==4); + }else{ + sqlite3VdbeAddOp2(v, OP_MustBeInt, reg, sqlite3VdbeCurrentAddr(v)+2); + VdbeCoverage(v); + assert( eCond==0 || eCond==1 || eCond==2 ); + VdbeCoverageIf(v, eCond==0); + VdbeCoverageIf(v, eCond==1); + VdbeCoverageIf(v, eCond==2); + } + sqlite3VdbeAddOp3(v, aOp[eCond], regZero, sqlite3VdbeCurrentAddr(v)+2, reg); + sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC); + VdbeCoverageNeverNullIf(v, eCond==0); /* NULL case captured by */ + VdbeCoverageNeverNullIf(v, eCond==1); /* the OP_MustBeInt */ + VdbeCoverageNeverNullIf(v, eCond==2); + VdbeCoverageNeverNullIf(v, eCond==3); /* NULL case caught by */ + VdbeCoverageNeverNullIf(v, eCond==4); /* the OP_Ge */ + sqlite3MayAbort(pParse); + sqlite3VdbeAddOp2(v, OP_Halt, SQLITE_ERROR, OE_Abort); + sqlite3VdbeAppendP4(v, (void*)azErr[eCond], P4_STATIC); + sqlite3ReleaseTempReg(pParse, regZero); +} + +/* +** Return the number of arguments passed to the window-function associated +** with the object passed as the only argument to this function. +*/ +static int windowArgCount(Window *pWin){ + const ExprList *pList; + assert( ExprUseXList(pWin->pOwner) ); + pList = pWin->pOwner->x.pList; + return (pList ? pList->nExpr : 0); +} + +typedef struct WindowCodeArg WindowCodeArg; +typedef struct WindowCsrAndReg WindowCsrAndReg; + +/* +** See comments above struct WindowCodeArg. +*/ +struct WindowCsrAndReg { + int csr; /* Cursor number */ + int reg; /* First in array of peer values */ +}; + +/* +** A single instance of this structure is allocated on the stack by +** sqlite3WindowCodeStep() and a pointer to it passed to the various helper +** routines. This is to reduce the number of arguments required by each +** helper function. +** +** regArg: +** Each window function requires an accumulator register (just as an +** ordinary aggregate function does). This variable is set to the first +** in an array of accumulator registers - one for each window function +** in the WindowCodeArg.pMWin list. +** +** eDelete: +** The window functions implementation sometimes caches the input rows +** that it processes in a temporary table. If it is not zero, this +** variable indicates when rows may be removed from the temp table (in +** order to reduce memory requirements - it would always be safe just +** to leave them there). Possible values for eDelete are: +** +** WINDOW_RETURN_ROW: +** An input row can be discarded after it is returned to the caller. +** +** WINDOW_AGGINVERSE: +** An input row can be discarded after the window functions xInverse() +** callbacks have been invoked in it. +** +** WINDOW_AGGSTEP: +** An input row can be discarded after the window functions xStep() +** callbacks have been invoked in it. +** +** start,current,end +** Consider a window-frame similar to the following: +** +** (ORDER BY a, b GROUPS BETWEEN 2 PRECEDING AND 2 FOLLOWING) +** +** The windows functions implmentation caches the input rows in a temp +** table, sorted by "a, b" (it actually populates the cache lazily, and +** aggressively removes rows once they are no longer required, but that's +** a mere detail). It keeps three cursors open on the temp table. One +** (current) that points to the next row to return to the query engine +** once its window function values have been calculated. Another (end) +** points to the next row to call the xStep() method of each window function +** on (so that it is 2 groups ahead of current). And a third (start) that +** points to the next row to call the xInverse() method of each window +** function on. +** +** Each cursor (start, current and end) consists of a VDBE cursor +** (WindowCsrAndReg.csr) and an array of registers (starting at +** WindowCodeArg.reg) that always contains a copy of the peer values +** read from the corresponding cursor. +** +** Depending on the window-frame in question, all three cursors may not +** be required. In this case both WindowCodeArg.csr and reg are set to +** 0. +*/ +struct WindowCodeArg { + Parse *pParse; /* Parse context */ + Window *pMWin; /* First in list of functions being processed */ + Vdbe *pVdbe; /* VDBE object */ + int addrGosub; /* OP_Gosub to this address to return one row */ + int regGosub; /* Register used with OP_Gosub(addrGosub) */ + int regArg; /* First in array of accumulator registers */ + int eDelete; /* See above */ + int regRowid; + + WindowCsrAndReg start; + WindowCsrAndReg current; + WindowCsrAndReg end; +}; + +/* +** Generate VM code to read the window frames peer values from cursor csr into +** an array of registers starting at reg. +*/ +static void windowReadPeerValues( + WindowCodeArg *p, + int csr, + int reg +){ + Window *pMWin = p->pMWin; + ExprList *pOrderBy = pMWin->pOrderBy; + if( pOrderBy ){ + Vdbe *v = sqlite3GetVdbe(p->pParse); + ExprList *pPart = pMWin->pPartition; + int iColOff = pMWin->nBufferCol + (pPart ? pPart->nExpr : 0); + int i; + for(i=0; inExpr; i++){ + sqlite3VdbeAddOp3(v, OP_Column, csr, iColOff+i, reg+i); + } + } +} + +/* +** Generate VM code to invoke either xStep() (if bInverse is 0) or +** xInverse (if bInverse is non-zero) for each window function in the +** linked list starting at pMWin. Or, for built-in window functions +** that do not use the standard function API, generate the required +** inline VM code. +** +** If argument csr is greater than or equal to 0, then argument reg is +** the first register in an array of registers guaranteed to be large +** enough to hold the array of arguments for each function. In this case +** the arguments are extracted from the current row of csr into the +** array of registers before invoking OP_AggStep or OP_AggInverse +** +** Or, if csr is less than zero, then the array of registers at reg is +** already populated with all columns from the current row of the sub-query. +** +** If argument regPartSize is non-zero, then it is a register containing the +** number of rows in the current partition. +*/ +static void windowAggStep( + WindowCodeArg *p, + Window *pMWin, /* Linked list of window functions */ + int csr, /* Read arguments from this cursor */ + int bInverse, /* True to invoke xInverse instead of xStep */ + int reg /* Array of registers */ +){ + Parse *pParse = p->pParse; + Vdbe *v = sqlite3GetVdbe(pParse); + Window *pWin; + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + FuncDef *pFunc = pWin->pFunc; + int regArg; + int nArg = pWin->bExprArgs ? 0 : windowArgCount(pWin); + int i; + + assert( bInverse==0 || pWin->eStart!=TK_UNBOUNDED ); + + /* All OVER clauses in the same window function aggregate step must + ** be the same. */ + assert( pWin==pMWin || sqlite3WindowCompare(pParse,pWin,pMWin,0)!=1 ); + + for(i=0; izName!=nth_valueName ){ + sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol+i, reg+i); + }else{ + sqlite3VdbeAddOp3(v, OP_Column, pMWin->iEphCsr, pWin->iArgCol+i, reg+i); + } + } + regArg = reg; + + if( pMWin->regStartRowid==0 + && (pFunc->funcFlags & SQLITE_FUNC_MINMAX) + && (pWin->eStart!=TK_UNBOUNDED) + ){ + int addrIsNull = sqlite3VdbeAddOp1(v, OP_IsNull, regArg); + VdbeCoverage(v); + if( bInverse==0 ){ + sqlite3VdbeAddOp2(v, OP_AddImm, pWin->regApp+1, 1); + sqlite3VdbeAddOp2(v, OP_SCopy, regArg, pWin->regApp); + sqlite3VdbeAddOp3(v, OP_MakeRecord, pWin->regApp, 2, pWin->regApp+2); + sqlite3VdbeAddOp2(v, OP_IdxInsert, pWin->csrApp, pWin->regApp+2); + }else{ + sqlite3VdbeAddOp4Int(v, OP_SeekGE, pWin->csrApp, 0, regArg, 1); + VdbeCoverageNeverTaken(v); + sqlite3VdbeAddOp1(v, OP_Delete, pWin->csrApp); + sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-2); + } + sqlite3VdbeJumpHere(v, addrIsNull); + }else if( pWin->regApp ){ + assert( pFunc->zName==nth_valueName + || pFunc->zName==first_valueName + ); + assert( bInverse==0 || bInverse==1 ); + sqlite3VdbeAddOp2(v, OP_AddImm, pWin->regApp+1-bInverse, 1); + }else if( pFunc->xSFunc!=noopStepFunc ){ + int addrIf = 0; + if( pWin->pFilter ){ + int regTmp; + assert( ExprUseXList(pWin->pOwner) ); + assert( pWin->bExprArgs || !nArg ||nArg==pWin->pOwner->x.pList->nExpr ); + assert( pWin->bExprArgs || nArg ||pWin->pOwner->x.pList==0 ); + regTmp = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol+nArg,regTmp); + addrIf = sqlite3VdbeAddOp3(v, OP_IfNot, regTmp, 0, 1); + VdbeCoverage(v); + sqlite3ReleaseTempReg(pParse, regTmp); + } + + if( pWin->bExprArgs ){ + int iOp = sqlite3VdbeCurrentAddr(v); + int iEnd; + + assert( ExprUseXList(pWin->pOwner) ); + nArg = pWin->pOwner->x.pList->nExpr; + regArg = sqlite3GetTempRange(pParse, nArg); + sqlite3ExprCodeExprList(pParse, pWin->pOwner->x.pList, regArg, 0, 0); + + for(iEnd=sqlite3VdbeCurrentAddr(v); iOpopcode==OP_Column && pOp->p1==pMWin->iEphCsr ){ + pOp->p1 = csr; + } + } + } + if( pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){ + CollSeq *pColl; + assert( nArg>0 ); + assert( ExprUseXList(pWin->pOwner) ); + pColl = sqlite3ExprNNCollSeq(pParse, pWin->pOwner->x.pList->a[0].pExpr); + sqlite3VdbeAddOp4(v, OP_CollSeq, 0,0,0, (const char*)pColl, P4_COLLSEQ); + } + sqlite3VdbeAddOp3(v, bInverse? OP_AggInverse : OP_AggStep, + bInverse, regArg, pWin->regAccum); + sqlite3VdbeAppendP4(v, pFunc, P4_FUNCDEF); + sqlite3VdbeChangeP5(v, (u8)nArg); + if( pWin->bExprArgs ){ + sqlite3ReleaseTempRange(pParse, regArg, nArg); + } + if( addrIf ) sqlite3VdbeJumpHere(v, addrIf); + } + } +} + +/* +** Values that may be passed as the second argument to windowCodeOp(). +*/ +#define WINDOW_RETURN_ROW 1 +#define WINDOW_AGGINVERSE 2 +#define WINDOW_AGGSTEP 3 + +/* +** Generate VM code to invoke either xValue() (bFin==0) or xFinalize() +** (bFin==1) for each window function in the linked list starting at +** pMWin. Or, for built-in window-functions that do not use the standard +** API, generate the equivalent VM code. +*/ +static void windowAggFinal(WindowCodeArg *p, int bFin){ + Parse *pParse = p->pParse; + Window *pMWin = p->pMWin; + Vdbe *v = sqlite3GetVdbe(pParse); + Window *pWin; + + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + if( pMWin->regStartRowid==0 + && (pWin->pFunc->funcFlags & SQLITE_FUNC_MINMAX) + && (pWin->eStart!=TK_UNBOUNDED) + ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regResult); + sqlite3VdbeAddOp1(v, OP_Last, pWin->csrApp); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_Column, pWin->csrApp, 0, pWin->regResult); + sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-2); + }else if( pWin->regApp ){ + assert( pMWin->regStartRowid==0 ); + }else{ + int nArg = windowArgCount(pWin); + if( bFin ){ + sqlite3VdbeAddOp2(v, OP_AggFinal, pWin->regAccum, nArg); + sqlite3VdbeAppendP4(v, pWin->pFunc, P4_FUNCDEF); + sqlite3VdbeAddOp2(v, OP_Copy, pWin->regAccum, pWin->regResult); + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regAccum); + }else{ + sqlite3VdbeAddOp3(v, OP_AggValue,pWin->regAccum,nArg,pWin->regResult); + sqlite3VdbeAppendP4(v, pWin->pFunc, P4_FUNCDEF); + } + } + } +} + +/* +** Generate code to calculate the current values of all window functions in the +** p->pMWin list by doing a full scan of the current window frame. Store the +** results in the Window.regResult registers, ready to return the upper +** layer. +*/ +static void windowFullScan(WindowCodeArg *p){ + Window *pWin; + Parse *pParse = p->pParse; + Window *pMWin = p->pMWin; + Vdbe *v = p->pVdbe; + + int regCRowid = 0; /* Current rowid value */ + int regCPeer = 0; /* Current peer values */ + int regRowid = 0; /* AggStep rowid value */ + int regPeer = 0; /* AggStep peer values */ + + int nPeer; + int lblNext; + int lblBrk; + int addrNext; + int csr; + + VdbeModuleComment((v, "windowFullScan begin")); + + assert( pMWin!=0 ); + csr = pMWin->csrApp; + nPeer = (pMWin->pOrderBy ? pMWin->pOrderBy->nExpr : 0); + + lblNext = sqlite3VdbeMakeLabel(pParse); + lblBrk = sqlite3VdbeMakeLabel(pParse); + + regCRowid = sqlite3GetTempReg(pParse); + regRowid = sqlite3GetTempReg(pParse); + if( nPeer ){ + regCPeer = sqlite3GetTempRange(pParse, nPeer); + regPeer = sqlite3GetTempRange(pParse, nPeer); + } + + sqlite3VdbeAddOp2(v, OP_Rowid, pMWin->iEphCsr, regCRowid); + windowReadPeerValues(p, pMWin->iEphCsr, regCPeer); + + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regAccum); + } + + sqlite3VdbeAddOp3(v, OP_SeekGE, csr, lblBrk, pMWin->regStartRowid); + VdbeCoverage(v); + addrNext = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_Rowid, csr, regRowid); + sqlite3VdbeAddOp3(v, OP_Gt, pMWin->regEndRowid, lblBrk, regRowid); + VdbeCoverageNeverNull(v); + + if( pMWin->eExclude==TK_CURRENT ){ + sqlite3VdbeAddOp3(v, OP_Eq, regCRowid, lblNext, regRowid); + VdbeCoverageNeverNull(v); + }else if( pMWin->eExclude!=TK_NO ){ + int addr; + int addrEq = 0; + KeyInfo *pKeyInfo = 0; + + if( pMWin->pOrderBy ){ + pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pMWin->pOrderBy, 0, 0); + } + if( pMWin->eExclude==TK_TIES ){ + addrEq = sqlite3VdbeAddOp3(v, OP_Eq, regCRowid, 0, regRowid); + VdbeCoverageNeverNull(v); + } + if( pKeyInfo ){ + windowReadPeerValues(p, csr, regPeer); + sqlite3VdbeAddOp3(v, OP_Compare, regPeer, regCPeer, nPeer); + sqlite3VdbeAppendP4(v, (void*)pKeyInfo, P4_KEYINFO); + addr = sqlite3VdbeCurrentAddr(v)+1; + sqlite3VdbeAddOp3(v, OP_Jump, addr, lblNext, addr); + VdbeCoverageEqNe(v); + }else{ + sqlite3VdbeAddOp2(v, OP_Goto, 0, lblNext); + } + if( addrEq ) sqlite3VdbeJumpHere(v, addrEq); + } + + windowAggStep(p, pMWin, csr, 0, p->regArg); + + sqlite3VdbeResolveLabel(v, lblNext); + sqlite3VdbeAddOp2(v, OP_Next, csr, addrNext); + VdbeCoverage(v); + sqlite3VdbeJumpHere(v, addrNext-1); + sqlite3VdbeJumpHere(v, addrNext+1); + sqlite3ReleaseTempReg(pParse, regRowid); + sqlite3ReleaseTempReg(pParse, regCRowid); + if( nPeer ){ + sqlite3ReleaseTempRange(pParse, regPeer, nPeer); + sqlite3ReleaseTempRange(pParse, regCPeer, nPeer); + } + + windowAggFinal(p, 1); + VdbeModuleComment((v, "windowFullScan end")); +} + +/* +** Invoke the sub-routine at regGosub (generated by code in select.c) to +** return the current row of Window.iEphCsr. If all window functions are +** aggregate window functions that use the standard API, a single +** OP_Gosub instruction is all that this routine generates. Extra VM code +** for per-row processing is only generated for the following built-in window +** functions: +** +** nth_value() +** first_value() +** lag() +** lead() +*/ +static void windowReturnOneRow(WindowCodeArg *p){ + Window *pMWin = p->pMWin; + Vdbe *v = p->pVdbe; + + if( pMWin->regStartRowid ){ + windowFullScan(p); + }else{ + Parse *pParse = p->pParse; + Window *pWin; + + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + FuncDef *pFunc = pWin->pFunc; + assert( ExprUseXList(pWin->pOwner) ); + if( pFunc->zName==nth_valueName + || pFunc->zName==first_valueName + ){ + int csr = pWin->csrApp; + int lbl = sqlite3VdbeMakeLabel(pParse); + int tmpReg = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regResult); + + if( pFunc->zName==nth_valueName ){ + sqlite3VdbeAddOp3(v, OP_Column,pMWin->iEphCsr,pWin->iArgCol+1,tmpReg); + windowCheckValue(pParse, tmpReg, 2); + }else{ + sqlite3VdbeAddOp2(v, OP_Integer, 1, tmpReg); + } + sqlite3VdbeAddOp3(v, OP_Add, tmpReg, pWin->regApp, tmpReg); + sqlite3VdbeAddOp3(v, OP_Gt, pWin->regApp+1, lbl, tmpReg); + VdbeCoverageNeverNull(v); + sqlite3VdbeAddOp3(v, OP_SeekRowid, csr, 0, tmpReg); + VdbeCoverageNeverTaken(v); + sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol, pWin->regResult); + sqlite3VdbeResolveLabel(v, lbl); + sqlite3ReleaseTempReg(pParse, tmpReg); + } + else if( pFunc->zName==leadName || pFunc->zName==lagName ){ + int nArg = pWin->pOwner->x.pList->nExpr; + int csr = pWin->csrApp; + int lbl = sqlite3VdbeMakeLabel(pParse); + int tmpReg = sqlite3GetTempReg(pParse); + int iEph = pMWin->iEphCsr; + + if( nArg<3 ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regResult); + }else{ + sqlite3VdbeAddOp3(v, OP_Column, iEph,pWin->iArgCol+2,pWin->regResult); + } + sqlite3VdbeAddOp2(v, OP_Rowid, iEph, tmpReg); + if( nArg<2 ){ + int val = (pFunc->zName==leadName ? 1 : -1); + sqlite3VdbeAddOp2(v, OP_AddImm, tmpReg, val); + }else{ + int op = (pFunc->zName==leadName ? OP_Add : OP_Subtract); + int tmpReg2 = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_Column, iEph, pWin->iArgCol+1, tmpReg2); + sqlite3VdbeAddOp3(v, op, tmpReg2, tmpReg, tmpReg); + sqlite3ReleaseTempReg(pParse, tmpReg2); + } + + sqlite3VdbeAddOp3(v, OP_SeekRowid, csr, lbl, tmpReg); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol, pWin->regResult); + sqlite3VdbeResolveLabel(v, lbl); + sqlite3ReleaseTempReg(pParse, tmpReg); + } + } + } + sqlite3VdbeAddOp2(v, OP_Gosub, p->regGosub, p->addrGosub); +} + +/* +** Generate code to set the accumulator register for each window function +** in the linked list passed as the second argument to NULL. And perform +** any equivalent initialization required by any built-in window functions +** in the list. +*/ +static int windowInitAccum(Parse *pParse, Window *pMWin){ + Vdbe *v = sqlite3GetVdbe(pParse); + int regArg; + int nArg = 0; + Window *pWin; + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + FuncDef *pFunc = pWin->pFunc; + assert( pWin->regAccum ); + sqlite3VdbeAddOp2(v, OP_Null, 0, pWin->regAccum); + nArg = MAX(nArg, windowArgCount(pWin)); + if( pMWin->regStartRowid==0 ){ + if( pFunc->zName==nth_valueName || pFunc->zName==first_valueName ){ + sqlite3VdbeAddOp2(v, OP_Integer, 0, pWin->regApp); + sqlite3VdbeAddOp2(v, OP_Integer, 0, pWin->regApp+1); + } + + if( (pFunc->funcFlags & SQLITE_FUNC_MINMAX) && pWin->csrApp ){ + assert( pWin->eStart!=TK_UNBOUNDED ); + sqlite3VdbeAddOp1(v, OP_ResetSorter, pWin->csrApp); + sqlite3VdbeAddOp2(v, OP_Integer, 0, pWin->regApp+1); + } + } + } + regArg = pParse->nMem+1; + pParse->nMem += nArg; + return regArg; +} + +/* +** Return true if the current frame should be cached in the ephemeral table, +** even if there are no xInverse() calls required. +*/ +static int windowCacheFrame(Window *pMWin){ + Window *pWin; + if( pMWin->regStartRowid ) return 1; + for(pWin=pMWin; pWin; pWin=pWin->pNextWin){ + FuncDef *pFunc = pWin->pFunc; + if( (pFunc->zName==nth_valueName) + || (pFunc->zName==first_valueName) + || (pFunc->zName==leadName) + || (pFunc->zName==lagName) + ){ + return 1; + } + } + return 0; +} + +/* +** regOld and regNew are each the first register in an array of size +** pOrderBy->nExpr. This function generates code to compare the two +** arrays of registers using the collation sequences and other comparison +** parameters specified by pOrderBy. +** +** If the two arrays are not equal, the contents of regNew is copied to +** regOld and control falls through. Otherwise, if the contents of the arrays +** are equal, an OP_Goto is executed. The address of the OP_Goto is returned. +*/ +static void windowIfNewPeer( + Parse *pParse, + ExprList *pOrderBy, + int regNew, /* First in array of new values */ + int regOld, /* First in array of old values */ + int addr /* Jump here */ +){ + Vdbe *v = sqlite3GetVdbe(pParse); + if( pOrderBy ){ + int nVal = pOrderBy->nExpr; + KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pOrderBy, 0, 0); + sqlite3VdbeAddOp3(v, OP_Compare, regOld, regNew, nVal); + sqlite3VdbeAppendP4(v, (void*)pKeyInfo, P4_KEYINFO); + sqlite3VdbeAddOp3(v, OP_Jump, + sqlite3VdbeCurrentAddr(v)+1, addr, sqlite3VdbeCurrentAddr(v)+1 + ); + VdbeCoverageEqNe(v); + sqlite3VdbeAddOp3(v, OP_Copy, regNew, regOld, nVal-1); + }else{ + sqlite3VdbeAddOp2(v, OP_Goto, 0, addr); + } +} + +/* +** This function is called as part of generating VM programs for RANGE +** offset PRECEDING/FOLLOWING frame boundaries. Assuming "ASC" order for +** the ORDER BY term in the window, and that argument op is OP_Ge, it generates +** code equivalent to: +** +** if( csr1.peerVal + regVal >= csr2.peerVal ) goto lbl; +** +** The value of parameter op may also be OP_Gt or OP_Le. In these cases the +** operator in the above pseudo-code is replaced with ">" or "<=", respectively. +** +** If the sort-order for the ORDER BY term in the window is DESC, then the +** comparison is reversed. Instead of adding regVal to csr1.peerVal, it is +** subtracted. And the comparison operator is inverted to - ">=" becomes "<=", +** ">" becomes "<", and so on. So, with DESC sort order, if the argument op +** is OP_Ge, the generated code is equivalent to: +** +** if( csr1.peerVal - regVal <= csr2.peerVal ) goto lbl; +** +** A special type of arithmetic is used such that if csr1.peerVal is not +** a numeric type (real or integer), then the result of the addition +** or subtraction is a a copy of csr1.peerVal. +*/ +static void windowCodeRangeTest( + WindowCodeArg *p, + int op, /* OP_Ge, OP_Gt, or OP_Le */ + int csr1, /* Cursor number for cursor 1 */ + int regVal, /* Register containing non-negative number */ + int csr2, /* Cursor number for cursor 2 */ + int lbl /* Jump destination if condition is true */ +){ + Parse *pParse = p->pParse; + Vdbe *v = sqlite3GetVdbe(pParse); + ExprList *pOrderBy = p->pMWin->pOrderBy; /* ORDER BY clause for window */ + int reg1 = sqlite3GetTempReg(pParse); /* Reg. for csr1.peerVal+regVal */ + int reg2 = sqlite3GetTempReg(pParse); /* Reg. for csr2.peerVal */ + int regString = ++pParse->nMem; /* Reg. for constant value '' */ + int arith = OP_Add; /* OP_Add or OP_Subtract */ + int addrGe; /* Jump destination */ + int addrDone = sqlite3VdbeMakeLabel(pParse); /* Address past OP_Ge */ + CollSeq *pColl; + + /* Read the peer-value from each cursor into a register */ + windowReadPeerValues(p, csr1, reg1); + windowReadPeerValues(p, csr2, reg2); + + assert( op==OP_Ge || op==OP_Gt || op==OP_Le ); + assert( pOrderBy && pOrderBy->nExpr==1 ); + if( pOrderBy->a[0].sortFlags & KEYINFO_ORDER_DESC ){ + switch( op ){ + case OP_Ge: op = OP_Le; break; + case OP_Gt: op = OP_Lt; break; + default: assert( op==OP_Le ); op = OP_Ge; break; + } + arith = OP_Subtract; + } + + VdbeModuleComment((v, "CodeRangeTest: if( R%d %s R%d %s R%d ) goto lbl", + reg1, (arith==OP_Add ? "+" : "-"), regVal, + ((op==OP_Ge) ? ">=" : (op==OP_Le) ? "<=" : (op==OP_Gt) ? ">" : "<"), reg2 + )); + + /* If the BIGNULL flag is set for the ORDER BY, then it is required to + ** consider NULL values to be larger than all other values, instead of + ** the usual smaller. The VDBE opcodes OP_Ge and so on do not handle this + ** (and adding that capability causes a performance regression), so + ** instead if the BIGNULL flag is set then cases where either reg1 or + ** reg2 are NULL are handled separately in the following block. The code + ** generated is equivalent to: + ** + ** if( reg1 IS NULL ){ + ** if( op==OP_Ge ) goto lbl; + ** if( op==OP_Gt && reg2 IS NOT NULL ) goto lbl; + ** if( op==OP_Le && reg2 IS NULL ) goto lbl; + ** }else if( reg2 IS NULL ){ + ** if( op==OP_Le ) goto lbl; + ** } + ** + ** Additionally, if either reg1 or reg2 are NULL but the jump to lbl is + ** not taken, control jumps over the comparison operator coded below this + ** block. */ + if( pOrderBy->a[0].sortFlags & KEYINFO_ORDER_BIGNULL ){ + /* This block runs if reg1 contains a NULL. */ + int addr = sqlite3VdbeAddOp1(v, OP_NotNull, reg1); VdbeCoverage(v); + switch( op ){ + case OP_Ge: + sqlite3VdbeAddOp2(v, OP_Goto, 0, lbl); + break; + case OP_Gt: + sqlite3VdbeAddOp2(v, OP_NotNull, reg2, lbl); + VdbeCoverage(v); + break; + case OP_Le: + sqlite3VdbeAddOp2(v, OP_IsNull, reg2, lbl); + VdbeCoverage(v); + break; + default: assert( op==OP_Lt ); /* no-op */ break; + } + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrDone); + + /* This block runs if reg1 is not NULL, but reg2 is. */ + sqlite3VdbeJumpHere(v, addr); + sqlite3VdbeAddOp2(v, OP_IsNull, reg2, lbl); VdbeCoverage(v); + if( op==OP_Gt || op==OP_Ge ){ + sqlite3VdbeChangeP2(v, -1, addrDone); + } + } + + /* Register reg1 currently contains csr1.peerVal (the peer-value from csr1). + ** This block adds (or subtracts for DESC) the numeric value in regVal + ** from it. Or, if reg1 is not numeric (it is a NULL, a text value or a blob), + ** then leave reg1 as it is. In pseudo-code, this is implemented as: + ** + ** if( reg1>='' ) goto addrGe; + ** reg1 = reg1 +/- regVal + ** addrGe: + ** + ** Since all strings and blobs are greater-than-or-equal-to an empty string, + ** the add/subtract is skipped for these, as required. If reg1 is a NULL, + ** then the arithmetic is performed, but since adding or subtracting from + ** NULL is always NULL anyway, this case is handled as required too. */ + sqlite3VdbeAddOp4(v, OP_String8, 0, regString, 0, "", P4_STATIC); + addrGe = sqlite3VdbeAddOp3(v, OP_Ge, regString, 0, reg1); + VdbeCoverage(v); + if( (op==OP_Ge && arith==OP_Add) || (op==OP_Le && arith==OP_Subtract) ){ + sqlite3VdbeAddOp3(v, op, reg2, lbl, reg1); VdbeCoverage(v); + } + sqlite3VdbeAddOp3(v, arith, regVal, reg1, reg1); + sqlite3VdbeJumpHere(v, addrGe); + + /* Compare registers reg2 and reg1, taking the jump if required. Note that + ** control skips over this test if the BIGNULL flag is set and either + ** reg1 or reg2 contain a NULL value. */ + sqlite3VdbeAddOp3(v, op, reg2, lbl, reg1); VdbeCoverage(v); + pColl = sqlite3ExprNNCollSeq(pParse, pOrderBy->a[0].pExpr); + sqlite3VdbeAppendP4(v, (void*)pColl, P4_COLLSEQ); + sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); + sqlite3VdbeResolveLabel(v, addrDone); + + assert( op==OP_Ge || op==OP_Gt || op==OP_Lt || op==OP_Le ); + testcase(op==OP_Ge); VdbeCoverageIf(v, op==OP_Ge); + testcase(op==OP_Lt); VdbeCoverageIf(v, op==OP_Lt); + testcase(op==OP_Le); VdbeCoverageIf(v, op==OP_Le); + testcase(op==OP_Gt); VdbeCoverageIf(v, op==OP_Gt); + sqlite3ReleaseTempReg(pParse, reg1); + sqlite3ReleaseTempReg(pParse, reg2); + + VdbeModuleComment((v, "CodeRangeTest: end")); +} + +/* +** Helper function for sqlite3WindowCodeStep(). Each call to this function +** generates VM code for a single RETURN_ROW, AGGSTEP or AGGINVERSE +** operation. Refer to the header comment for sqlite3WindowCodeStep() for +** details. +*/ +static int windowCodeOp( + WindowCodeArg *p, /* Context object */ + int op, /* WINDOW_RETURN_ROW, AGGSTEP or AGGINVERSE */ + int regCountdown, /* Register for OP_IfPos countdown */ + int jumpOnEof /* Jump here if stepped cursor reaches EOF */ +){ + int csr, reg; + Parse *pParse = p->pParse; + Window *pMWin = p->pMWin; + int ret = 0; + Vdbe *v = p->pVdbe; + int addrContinue = 0; + int bPeer = (pMWin->eFrmType!=TK_ROWS); + + int lblDone = sqlite3VdbeMakeLabel(pParse); + int addrNextRange = 0; + + /* Special case - WINDOW_AGGINVERSE is always a no-op if the frame + ** starts with UNBOUNDED PRECEDING. */ + if( op==WINDOW_AGGINVERSE && pMWin->eStart==TK_UNBOUNDED ){ + assert( regCountdown==0 && jumpOnEof==0 ); + return 0; + } + + if( regCountdown>0 ){ + if( pMWin->eFrmType==TK_RANGE ){ + addrNextRange = sqlite3VdbeCurrentAddr(v); + assert( op==WINDOW_AGGINVERSE || op==WINDOW_AGGSTEP ); + if( op==WINDOW_AGGINVERSE ){ + if( pMWin->eStart==TK_FOLLOWING ){ + windowCodeRangeTest( + p, OP_Le, p->current.csr, regCountdown, p->start.csr, lblDone + ); + }else{ + windowCodeRangeTest( + p, OP_Ge, p->start.csr, regCountdown, p->current.csr, lblDone + ); + } + }else{ + windowCodeRangeTest( + p, OP_Gt, p->end.csr, regCountdown, p->current.csr, lblDone + ); + } + }else{ + sqlite3VdbeAddOp3(v, OP_IfPos, regCountdown, lblDone, 1); + VdbeCoverage(v); + } + } + + if( op==WINDOW_RETURN_ROW && pMWin->regStartRowid==0 ){ + windowAggFinal(p, 0); + } + addrContinue = sqlite3VdbeCurrentAddr(v); + + /* If this is a (RANGE BETWEEN a FOLLOWING AND b FOLLOWING) or + ** (RANGE BETWEEN b PRECEDING AND a PRECEDING) frame, ensure the + ** start cursor does not advance past the end cursor within the + ** temporary table. It otherwise might, if (a>b). Also ensure that, + ** if the input cursor is still finding new rows, that the end + ** cursor does not go past it to EOF. */ + if( pMWin->eStart==pMWin->eEnd && regCountdown + && pMWin->eFrmType==TK_RANGE + ){ + int regRowid1 = sqlite3GetTempReg(pParse); + int regRowid2 = sqlite3GetTempReg(pParse); + if( op==WINDOW_AGGINVERSE ){ + sqlite3VdbeAddOp2(v, OP_Rowid, p->start.csr, regRowid1); + sqlite3VdbeAddOp2(v, OP_Rowid, p->end.csr, regRowid2); + sqlite3VdbeAddOp3(v, OP_Ge, regRowid2, lblDone, regRowid1); + VdbeCoverage(v); + }else if( p->regRowid ){ + sqlite3VdbeAddOp2(v, OP_Rowid, p->end.csr, regRowid1); + sqlite3VdbeAddOp3(v, OP_Ge, p->regRowid, lblDone, regRowid1); + VdbeCoverageNeverNull(v); + } + sqlite3ReleaseTempReg(pParse, regRowid1); + sqlite3ReleaseTempReg(pParse, regRowid2); + assert( pMWin->eStart==TK_PRECEDING || pMWin->eStart==TK_FOLLOWING ); + } + + switch( op ){ + case WINDOW_RETURN_ROW: + csr = p->current.csr; + reg = p->current.reg; + windowReturnOneRow(p); + break; + + case WINDOW_AGGINVERSE: + csr = p->start.csr; + reg = p->start.reg; + if( pMWin->regStartRowid ){ + assert( pMWin->regEndRowid ); + sqlite3VdbeAddOp2(v, OP_AddImm, pMWin->regStartRowid, 1); + }else{ + windowAggStep(p, pMWin, csr, 1, p->regArg); + } + break; + + default: + assert( op==WINDOW_AGGSTEP ); + csr = p->end.csr; + reg = p->end.reg; + if( pMWin->regStartRowid ){ + assert( pMWin->regEndRowid ); + sqlite3VdbeAddOp2(v, OP_AddImm, pMWin->regEndRowid, 1); + }else{ + windowAggStep(p, pMWin, csr, 0, p->regArg); + } + break; + } + + if( op==p->eDelete ){ + sqlite3VdbeAddOp1(v, OP_Delete, csr); + sqlite3VdbeChangeP5(v, OPFLAG_SAVEPOSITION); + } + + if( jumpOnEof ){ + sqlite3VdbeAddOp2(v, OP_Next, csr, sqlite3VdbeCurrentAddr(v)+2); + VdbeCoverage(v); + ret = sqlite3VdbeAddOp0(v, OP_Goto); + }else{ + sqlite3VdbeAddOp2(v, OP_Next, csr, sqlite3VdbeCurrentAddr(v)+1+bPeer); + VdbeCoverage(v); + if( bPeer ){ + sqlite3VdbeAddOp2(v, OP_Goto, 0, lblDone); + } + } + + if( bPeer ){ + int nReg = (pMWin->pOrderBy ? pMWin->pOrderBy->nExpr : 0); + int regTmp = (nReg ? sqlite3GetTempRange(pParse, nReg) : 0); + windowReadPeerValues(p, csr, regTmp); + windowIfNewPeer(pParse, pMWin->pOrderBy, regTmp, reg, addrContinue); + sqlite3ReleaseTempRange(pParse, regTmp, nReg); + } + + if( addrNextRange ){ + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrNextRange); + } + sqlite3VdbeResolveLabel(v, lblDone); + return ret; +} + + +/* +** Allocate and return a duplicate of the Window object indicated by the +** third argument. Set the Window.pOwner field of the new object to +** pOwner. +*/ +SQLITE_PRIVATE Window *sqlite3WindowDup(sqlite3 *db, Expr *pOwner, Window *p){ + Window *pNew = 0; + if( ALWAYS(p) ){ + pNew = sqlite3DbMallocZero(db, sizeof(Window)); + if( pNew ){ + pNew->zName = sqlite3DbStrDup(db, p->zName); + pNew->zBase = sqlite3DbStrDup(db, p->zBase); + pNew->pFilter = sqlite3ExprDup(db, p->pFilter, 0); + pNew->pFunc = p->pFunc; + pNew->pPartition = sqlite3ExprListDup(db, p->pPartition, 0); + pNew->pOrderBy = sqlite3ExprListDup(db, p->pOrderBy, 0); + pNew->eFrmType = p->eFrmType; + pNew->eEnd = p->eEnd; + pNew->eStart = p->eStart; + pNew->eExclude = p->eExclude; + pNew->regResult = p->regResult; + pNew->regAccum = p->regAccum; + pNew->iArgCol = p->iArgCol; + pNew->iEphCsr = p->iEphCsr; + pNew->bExprArgs = p->bExprArgs; + pNew->pStart = sqlite3ExprDup(db, p->pStart, 0); + pNew->pEnd = sqlite3ExprDup(db, p->pEnd, 0); + pNew->pOwner = pOwner; + pNew->bImplicitFrame = p->bImplicitFrame; + } + } + return pNew; +} + +/* +** Return a copy of the linked list of Window objects passed as the +** second argument. +*/ +SQLITE_PRIVATE Window *sqlite3WindowListDup(sqlite3 *db, Window *p){ + Window *pWin; + Window *pRet = 0; + Window **pp = &pRet; + + for(pWin=p; pWin; pWin=pWin->pNextWin){ + *pp = sqlite3WindowDup(db, 0, pWin); + if( *pp==0 ) break; + pp = &((*pp)->pNextWin); + } + + return pRet; +} + +/* +** Return true if it can be determined at compile time that expression +** pExpr evaluates to a value that, when cast to an integer, is greater +** than zero. False otherwise. +** +** If an OOM error occurs, this function sets the Parse.db.mallocFailed +** flag and returns zero. +*/ +static int windowExprGtZero(Parse *pParse, Expr *pExpr){ + int ret = 0; + sqlite3 *db = pParse->db; + sqlite3_value *pVal = 0; + sqlite3ValueFromExpr(db, pExpr, db->enc, SQLITE_AFF_NUMERIC, &pVal); + if( pVal && sqlite3_value_int(pVal)>0 ){ + ret = 1; + } + sqlite3ValueFree(pVal); + return ret; +} + +/* +** sqlite3WhereBegin() has already been called for the SELECT statement +** passed as the second argument when this function is invoked. It generates +** code to populate the Window.regResult register for each window function +** and invoke the sub-routine at instruction addrGosub once for each row. +** sqlite3WhereEnd() is always called before returning. +** +** This function handles several different types of window frames, which +** require slightly different processing. The following pseudo code is +** used to implement window frames of the form: +** +** ROWS BETWEEN PRECEDING AND FOLLOWING +** +** Other window frame types use variants of the following: +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** +** if( first row of partition ){ +** // Rewind three cursors, all open on the eph table. +** Rewind(csrEnd); +** Rewind(csrStart); +** Rewind(csrCurrent); +** +** regEnd = // FOLLOWING expression +** regStart = // PRECEDING expression +** }else{ +** // First time this branch is taken, the eph table contains two +** // rows. The first row in the partition, which all three cursors +** // currently point to, and the following row. +** AGGSTEP +** if( (regEnd--)<=0 ){ +** RETURN_ROW +** if( (regStart--)<=0 ){ +** AGGINVERSE +** } +** } +** } +** } +** flush: +** AGGSTEP +** while( 1 ){ +** RETURN ROW +** if( csrCurrent is EOF ) break; +** if( (regStart--)<=0 ){ +** AggInverse(csrStart) +** Next(csrStart) +** } +** } +** +** The pseudo-code above uses the following shorthand: +** +** AGGSTEP: invoke the aggregate xStep() function for each window function +** with arguments read from the current row of cursor csrEnd, then +** step cursor csrEnd forward one row (i.e. sqlite3BtreeNext()). +** +** RETURN_ROW: return a row to the caller based on the contents of the +** current row of csrCurrent and the current state of all +** aggregates. Then step cursor csrCurrent forward one row. +** +** AGGINVERSE: invoke the aggregate xInverse() function for each window +** functions with arguments read from the current row of cursor +** csrStart. Then step csrStart forward one row. +** +** There are two other ROWS window frames that are handled significantly +** differently from the above - "BETWEEN PRECEDING AND PRECEDING" +** and "BETWEEN FOLLOWING AND FOLLOWING". These are special +** cases because they change the order in which the three cursors (csrStart, +** csrCurrent and csrEnd) iterate through the ephemeral table. Cases that +** use UNBOUNDED or CURRENT ROW are much simpler variations on one of these +** three. +** +** ROWS BETWEEN PRECEDING AND PRECEDING +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = +** }else{ +** if( (regEnd--)<=0 ){ +** AGGSTEP +** } +** RETURN_ROW +** if( (regStart--)<=0 ){ +** AGGINVERSE +** } +** } +** } +** flush: +** if( (regEnd--)<=0 ){ +** AGGSTEP +** } +** RETURN_ROW +** +** +** ROWS BETWEEN FOLLOWING AND FOLLOWING +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = regEnd - +** }else{ +** AGGSTEP +** if( (regEnd--)<=0 ){ +** RETURN_ROW +** } +** if( (regStart--)<=0 ){ +** AGGINVERSE +** } +** } +** } +** flush: +** AGGSTEP +** while( 1 ){ +** if( (regEnd--)<=0 ){ +** RETURN_ROW +** if( eof ) break; +** } +** if( (regStart--)<=0 ){ +** AGGINVERSE +** if( eof ) break +** } +** } +** while( !eof csrCurrent ){ +** RETURN_ROW +** } +** +** For the most part, the patterns above are adapted to support UNBOUNDED by +** assuming that it is equivalent to "infinity PRECEDING/FOLLOWING" and +** CURRENT ROW by assuming that it is equivilent to "0 PRECEDING/FOLLOWING". +** This is optimized of course - branches that will never be taken and +** conditions that are always true are omitted from the VM code. The only +** exceptional case is: +** +** ROWS BETWEEN FOLLOWING AND UNBOUNDED FOLLOWING +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regStart = +** }else{ +** AGGSTEP +** } +** } +** flush: +** AGGSTEP +** while( 1 ){ +** if( (regStart--)<=0 ){ +** AGGINVERSE +** if( eof ) break +** } +** RETURN_ROW +** } +** while( !eof csrCurrent ){ +** RETURN_ROW +** } +** +** Also requiring special handling are the cases: +** +** ROWS BETWEEN PRECEDING AND PRECEDING +** ROWS BETWEEN FOLLOWING AND FOLLOWING +** +** when (expr1 < expr2). This is detected at runtime, not by this function. +** To handle this case, the pseudo-code programs depicted above are modified +** slightly to be: +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = +** if( regEnd < regStart ){ +** RETURN_ROW +** delete eph table contents +** continue +** } +** ... +** +** The new "continue" statement in the above jumps to the next iteration +** of the outer loop - the one started by sqlite3WhereBegin(). +** +** The various GROUPS cases are implemented using the same patterns as +** ROWS. The VM code is modified slightly so that: +** +** 1. The else branch in the main loop is only taken if the row just +** added to the ephemeral table is the start of a new group. In +** other words, it becomes: +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = +** }else if( new group ){ +** ... +** } +** } +** +** 2. Instead of processing a single row, each RETURN_ROW, AGGSTEP or +** AGGINVERSE step processes the current row of the relevant cursor and +** all subsequent rows belonging to the same group. +** +** RANGE window frames are a little different again. As for GROUPS, the +** main loop runs once per group only. And RETURN_ROW, AGGSTEP and AGGINVERSE +** deal in groups instead of rows. As for ROWS and GROUPS, there are three +** basic cases: +** +** RANGE BETWEEN PRECEDING AND FOLLOWING +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = +** }else{ +** AGGSTEP +** while( (csrCurrent.key + regEnd) < csrEnd.key ){ +** RETURN_ROW +** while( csrStart.key + regStart) < csrCurrent.key ){ +** AGGINVERSE +** } +** } +** } +** } +** flush: +** AGGSTEP +** while( 1 ){ +** RETURN ROW +** if( csrCurrent is EOF ) break; +** while( csrStart.key + regStart) < csrCurrent.key ){ +** AGGINVERSE +** } +** } +** } +** +** In the above notation, "csr.key" means the current value of the ORDER BY +** expression (there is only ever 1 for a RANGE that uses an FOLLOWING +** or PRECEDING AND PRECEDING +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = +** }else{ +** while( (csrEnd.key + regEnd) <= csrCurrent.key ){ +** AGGSTEP +** } +** while( (csrStart.key + regStart) < csrCurrent.key ){ +** AGGINVERSE +** } +** RETURN_ROW +** } +** } +** flush: +** while( (csrEnd.key + regEnd) <= csrCurrent.key ){ +** AGGSTEP +** } +** while( (csrStart.key + regStart) < csrCurrent.key ){ +** AGGINVERSE +** } +** RETURN_ROW +** +** RANGE BETWEEN FOLLOWING AND FOLLOWING +** +** ... loop started by sqlite3WhereBegin() ... +** if( new partition ){ +** Gosub flush +** } +** Insert new row into eph table. +** if( first row of partition ){ +** Rewind(csrEnd) ; Rewind(csrStart) ; Rewind(csrCurrent) +** regEnd = +** regStart = +** }else{ +** AGGSTEP +** while( (csrCurrent.key + regEnd) < csrEnd.key ){ +** while( (csrCurrent.key + regStart) > csrStart.key ){ +** AGGINVERSE +** } +** RETURN_ROW +** } +** } +** } +** flush: +** AGGSTEP +** while( 1 ){ +** while( (csrCurrent.key + regStart) > csrStart.key ){ +** AGGINVERSE +** if( eof ) break "while( 1 )" loop. +** } +** RETURN_ROW +** } +** while( !eof csrCurrent ){ +** RETURN_ROW +** } +** +** The text above leaves out many details. Refer to the code and comments +** below for a more complete picture. +*/ +SQLITE_PRIVATE void sqlite3WindowCodeStep( + Parse *pParse, /* Parse context */ + Select *p, /* Rewritten SELECT statement */ + WhereInfo *pWInfo, /* Context returned by sqlite3WhereBegin() */ + int regGosub, /* Register for OP_Gosub */ + int addrGosub /* OP_Gosub here to return each row */ +){ + Window *pMWin = p->pWin; + ExprList *pOrderBy = pMWin->pOrderBy; + Vdbe *v = sqlite3GetVdbe(pParse); + int csrWrite; /* Cursor used to write to eph. table */ + int csrInput = p->pSrc->a[0].iCursor; /* Cursor of sub-select */ + int nInput = p->pSrc->a[0].pTab->nCol; /* Number of cols returned by sub */ + int iInput; /* To iterate through sub cols */ + int addrNe; /* Address of OP_Ne */ + int addrGosubFlush = 0; /* Address of OP_Gosub to flush: */ + int addrInteger = 0; /* Address of OP_Integer */ + int addrEmpty; /* Address of OP_Rewind in flush: */ + int regNew; /* Array of registers holding new input row */ + int regRecord; /* regNew array in record form */ + int regNewPeer = 0; /* Peer values for new row (part of regNew) */ + int regPeer = 0; /* Peer values for current row */ + int regFlushPart = 0; /* Register for "Gosub flush_partition" */ + WindowCodeArg s; /* Context object for sub-routines */ + int lblWhereEnd; /* Label just before sqlite3WhereEnd() code */ + int regStart = 0; /* Value of PRECEDING */ + int regEnd = 0; /* Value of FOLLOWING */ + + assert( pMWin->eStart==TK_PRECEDING || pMWin->eStart==TK_CURRENT + || pMWin->eStart==TK_FOLLOWING || pMWin->eStart==TK_UNBOUNDED + ); + assert( pMWin->eEnd==TK_FOLLOWING || pMWin->eEnd==TK_CURRENT + || pMWin->eEnd==TK_UNBOUNDED || pMWin->eEnd==TK_PRECEDING + ); + assert( pMWin->eExclude==0 || pMWin->eExclude==TK_CURRENT + || pMWin->eExclude==TK_GROUP || pMWin->eExclude==TK_TIES + || pMWin->eExclude==TK_NO + ); + + lblWhereEnd = sqlite3VdbeMakeLabel(pParse); + + /* Fill in the context object */ + memset(&s, 0, sizeof(WindowCodeArg)); + s.pParse = pParse; + s.pMWin = pMWin; + s.pVdbe = v; + s.regGosub = regGosub; + s.addrGosub = addrGosub; + s.current.csr = pMWin->iEphCsr; + csrWrite = s.current.csr+1; + s.start.csr = s.current.csr+2; + s.end.csr = s.current.csr+3; + + /* Figure out when rows may be deleted from the ephemeral table. There + ** are four options - they may never be deleted (eDelete==0), they may + ** be deleted as soon as they are no longer part of the window frame + ** (eDelete==WINDOW_AGGINVERSE), they may be deleted as after the row + ** has been returned to the caller (WINDOW_RETURN_ROW), or they may + ** be deleted after they enter the frame (WINDOW_AGGSTEP). */ + switch( pMWin->eStart ){ + case TK_FOLLOWING: + if( pMWin->eFrmType!=TK_RANGE + && windowExprGtZero(pParse, pMWin->pStart) + ){ + s.eDelete = WINDOW_RETURN_ROW; + } + break; + case TK_UNBOUNDED: + if( windowCacheFrame(pMWin)==0 ){ + if( pMWin->eEnd==TK_PRECEDING ){ + if( pMWin->eFrmType!=TK_RANGE + && windowExprGtZero(pParse, pMWin->pEnd) + ){ + s.eDelete = WINDOW_AGGSTEP; + } + }else{ + s.eDelete = WINDOW_RETURN_ROW; + } + } + break; + default: + s.eDelete = WINDOW_AGGINVERSE; + break; + } + + /* Allocate registers for the array of values from the sub-query, the + ** samve values in record form, and the rowid used to insert said record + ** into the ephemeral table. */ + regNew = pParse->nMem+1; + pParse->nMem += nInput; + regRecord = ++pParse->nMem; + s.regRowid = ++pParse->nMem; + + /* If the window frame contains an " PRECEDING" or " FOLLOWING" + ** clause, allocate registers to store the results of evaluating each + ** . */ + if( pMWin->eStart==TK_PRECEDING || pMWin->eStart==TK_FOLLOWING ){ + regStart = ++pParse->nMem; + } + if( pMWin->eEnd==TK_PRECEDING || pMWin->eEnd==TK_FOLLOWING ){ + regEnd = ++pParse->nMem; + } + + /* If this is not a "ROWS BETWEEN ..." frame, then allocate arrays of + ** registers to store copies of the ORDER BY expressions (peer values) + ** for the main loop, and for each cursor (start, current and end). */ + if( pMWin->eFrmType!=TK_ROWS ){ + int nPeer = (pOrderBy ? pOrderBy->nExpr : 0); + regNewPeer = regNew + pMWin->nBufferCol; + if( pMWin->pPartition ) regNewPeer += pMWin->pPartition->nExpr; + regPeer = pParse->nMem+1; pParse->nMem += nPeer; + s.start.reg = pParse->nMem+1; pParse->nMem += nPeer; + s.current.reg = pParse->nMem+1; pParse->nMem += nPeer; + s.end.reg = pParse->nMem+1; pParse->nMem += nPeer; + } + + /* Load the column values for the row returned by the sub-select + ** into an array of registers starting at regNew. Assemble them into + ** a record in register regRecord. */ + for(iInput=0; iInputpPartition ){ + int addr; + ExprList *pPart = pMWin->pPartition; + int nPart = pPart->nExpr; + int regNewPart = regNew + pMWin->nBufferCol; + KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pPart, 0, 0); + + regFlushPart = ++pParse->nMem; + addr = sqlite3VdbeAddOp3(v, OP_Compare, regNewPart, pMWin->regPart, nPart); + sqlite3VdbeAppendP4(v, (void*)pKeyInfo, P4_KEYINFO); + sqlite3VdbeAddOp3(v, OP_Jump, addr+2, addr+4, addr+2); + VdbeCoverageEqNe(v); + addrGosubFlush = sqlite3VdbeAddOp1(v, OP_Gosub, regFlushPart); + VdbeComment((v, "call flush_partition")); + sqlite3VdbeAddOp3(v, OP_Copy, regNewPart, pMWin->regPart, nPart-1); + } + + /* Insert the new row into the ephemeral table */ + sqlite3VdbeAddOp2(v, OP_NewRowid, csrWrite, s.regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, csrWrite, regRecord, s.regRowid); + addrNe = sqlite3VdbeAddOp3(v, OP_Ne, pMWin->regOne, 0, s.regRowid); + VdbeCoverageNeverNull(v); + + /* This block is run for the first row of each partition */ + s.regArg = windowInitAccum(pParse, pMWin); + + if( regStart ){ + sqlite3ExprCode(pParse, pMWin->pStart, regStart); + windowCheckValue(pParse, regStart, 0 + (pMWin->eFrmType==TK_RANGE?3:0)); + } + if( regEnd ){ + sqlite3ExprCode(pParse, pMWin->pEnd, regEnd); + windowCheckValue(pParse, regEnd, 1 + (pMWin->eFrmType==TK_RANGE?3:0)); + } + + if( pMWin->eFrmType!=TK_RANGE && pMWin->eStart==pMWin->eEnd && regStart ){ + int op = ((pMWin->eStart==TK_FOLLOWING) ? OP_Ge : OP_Le); + int addrGe = sqlite3VdbeAddOp3(v, op, regStart, 0, regEnd); + VdbeCoverageNeverNullIf(v, op==OP_Ge); /* NeverNull because bound */ + VdbeCoverageNeverNullIf(v, op==OP_Le); /* values previously checked */ + windowAggFinal(&s, 0); + sqlite3VdbeAddOp2(v, OP_Rewind, s.current.csr, 1); + VdbeCoverageNeverTaken(v); + windowReturnOneRow(&s); + sqlite3VdbeAddOp1(v, OP_ResetSorter, s.current.csr); + sqlite3VdbeAddOp2(v, OP_Goto, 0, lblWhereEnd); + sqlite3VdbeJumpHere(v, addrGe); + } + if( pMWin->eStart==TK_FOLLOWING && pMWin->eFrmType!=TK_RANGE && regEnd ){ + assert( pMWin->eEnd==TK_FOLLOWING ); + sqlite3VdbeAddOp3(v, OP_Subtract, regStart, regEnd, regStart); + } + + if( pMWin->eStart!=TK_UNBOUNDED ){ + sqlite3VdbeAddOp2(v, OP_Rewind, s.start.csr, 1); + VdbeCoverageNeverTaken(v); + } + sqlite3VdbeAddOp2(v, OP_Rewind, s.current.csr, 1); + VdbeCoverageNeverTaken(v); + sqlite3VdbeAddOp2(v, OP_Rewind, s.end.csr, 1); + VdbeCoverageNeverTaken(v); + if( regPeer && pOrderBy ){ + sqlite3VdbeAddOp3(v, OP_Copy, regNewPeer, regPeer, pOrderBy->nExpr-1); + sqlite3VdbeAddOp3(v, OP_Copy, regPeer, s.start.reg, pOrderBy->nExpr-1); + sqlite3VdbeAddOp3(v, OP_Copy, regPeer, s.current.reg, pOrderBy->nExpr-1); + sqlite3VdbeAddOp3(v, OP_Copy, regPeer, s.end.reg, pOrderBy->nExpr-1); + } + + sqlite3VdbeAddOp2(v, OP_Goto, 0, lblWhereEnd); + + sqlite3VdbeJumpHere(v, addrNe); + + /* Beginning of the block executed for the second and subsequent rows. */ + if( regPeer ){ + windowIfNewPeer(pParse, pOrderBy, regNewPeer, regPeer, lblWhereEnd); + } + if( pMWin->eStart==TK_FOLLOWING ){ + windowCodeOp(&s, WINDOW_AGGSTEP, 0, 0); + if( pMWin->eEnd!=TK_UNBOUNDED ){ + if( pMWin->eFrmType==TK_RANGE ){ + int lbl = sqlite3VdbeMakeLabel(pParse); + int addrNext = sqlite3VdbeCurrentAddr(v); + windowCodeRangeTest(&s, OP_Ge, s.current.csr, regEnd, s.end.csr, lbl); + windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 0); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrNext); + sqlite3VdbeResolveLabel(v, lbl); + }else{ + windowCodeOp(&s, WINDOW_RETURN_ROW, regEnd, 0); + windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + } + } + }else + if( pMWin->eEnd==TK_PRECEDING ){ + int bRPS = (pMWin->eStart==TK_PRECEDING && pMWin->eFrmType==TK_RANGE); + windowCodeOp(&s, WINDOW_AGGSTEP, regEnd, 0); + if( bRPS ) windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 0); + if( !bRPS ) windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + }else{ + int addr = 0; + windowCodeOp(&s, WINDOW_AGGSTEP, 0, 0); + if( pMWin->eEnd!=TK_UNBOUNDED ){ + if( pMWin->eFrmType==TK_RANGE ){ + int lbl = 0; + addr = sqlite3VdbeCurrentAddr(v); + if( regEnd ){ + lbl = sqlite3VdbeMakeLabel(pParse); + windowCodeRangeTest(&s, OP_Ge, s.current.csr, regEnd, s.end.csr, lbl); + } + windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 0); + windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + if( regEnd ){ + sqlite3VdbeAddOp2(v, OP_Goto, 0, addr); + sqlite3VdbeResolveLabel(v, lbl); + } + }else{ + if( regEnd ){ + addr = sqlite3VdbeAddOp3(v, OP_IfPos, regEnd, 0, 1); + VdbeCoverage(v); + } + windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 0); + windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + if( regEnd ) sqlite3VdbeJumpHere(v, addr); + } + } + } + + /* End of the main input loop */ + sqlite3VdbeResolveLabel(v, lblWhereEnd); + sqlite3WhereEnd(pWInfo); + + /* Fall through */ + if( pMWin->pPartition ){ + addrInteger = sqlite3VdbeAddOp2(v, OP_Integer, 0, regFlushPart); + sqlite3VdbeJumpHere(v, addrGosubFlush); + } + + s.regRowid = 0; + addrEmpty = sqlite3VdbeAddOp1(v, OP_Rewind, csrWrite); + VdbeCoverage(v); + if( pMWin->eEnd==TK_PRECEDING ){ + int bRPS = (pMWin->eStart==TK_PRECEDING && pMWin->eFrmType==TK_RANGE); + windowCodeOp(&s, WINDOW_AGGSTEP, regEnd, 0); + if( bRPS ) windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 0); + }else if( pMWin->eStart==TK_FOLLOWING ){ + int addrStart; + int addrBreak1; + int addrBreak2; + int addrBreak3; + windowCodeOp(&s, WINDOW_AGGSTEP, 0, 0); + if( pMWin->eFrmType==TK_RANGE ){ + addrStart = sqlite3VdbeCurrentAddr(v); + addrBreak2 = windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 1); + addrBreak1 = windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 1); + }else + if( pMWin->eEnd==TK_UNBOUNDED ){ + addrStart = sqlite3VdbeCurrentAddr(v); + addrBreak1 = windowCodeOp(&s, WINDOW_RETURN_ROW, regStart, 1); + addrBreak2 = windowCodeOp(&s, WINDOW_AGGINVERSE, 0, 1); + }else{ + assert( pMWin->eEnd==TK_FOLLOWING ); + addrStart = sqlite3VdbeCurrentAddr(v); + addrBreak1 = windowCodeOp(&s, WINDOW_RETURN_ROW, regEnd, 1); + addrBreak2 = windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 1); + } + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrStart); + sqlite3VdbeJumpHere(v, addrBreak2); + addrStart = sqlite3VdbeCurrentAddr(v); + addrBreak3 = windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 1); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrStart); + sqlite3VdbeJumpHere(v, addrBreak1); + sqlite3VdbeJumpHere(v, addrBreak3); + }else{ + int addrBreak; + int addrStart; + windowCodeOp(&s, WINDOW_AGGSTEP, 0, 0); + addrStart = sqlite3VdbeCurrentAddr(v); + addrBreak = windowCodeOp(&s, WINDOW_RETURN_ROW, 0, 1); + windowCodeOp(&s, WINDOW_AGGINVERSE, regStart, 0); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrStart); + sqlite3VdbeJumpHere(v, addrBreak); + } + sqlite3VdbeJumpHere(v, addrEmpty); + + sqlite3VdbeAddOp1(v, OP_ResetSorter, s.current.csr); + if( pMWin->pPartition ){ + if( pMWin->regStartRowid ){ + sqlite3VdbeAddOp2(v, OP_Integer, 1, pMWin->regStartRowid); + sqlite3VdbeAddOp2(v, OP_Integer, 0, pMWin->regEndRowid); + } + sqlite3VdbeChangeP1(v, addrInteger, sqlite3VdbeCurrentAddr(v)); + sqlite3VdbeAddOp1(v, OP_Return, regFlushPart); + } +} + +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/************** End of window.c **********************************************/ +/************** Begin file parse.c *******************************************/ +/* This file is automatically generated by Lemon from input grammar +** source file "parse.y". */ +/* +** 2001-09-15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains SQLite's SQL parser. +** +** The canonical source code to this file ("parse.y") is a Lemon grammar +** file that specifies the input grammar and actions to take while parsing. +** That input file is processed by Lemon to generate a C-language +** implementation of a parser for the given grammer. You might be reading +** this comment as part of the translated C-code. Edits should be made +** to the original parse.y sources. +*/ + +/* #include "sqliteInt.h" */ + +/* +** Disable all error recovery processing in the parser push-down +** automaton. +*/ +#define YYNOERRORRECOVERY 1 + +/* +** Make yytestcase() the same as testcase() +*/ +#define yytestcase(X) testcase(X) + +/* +** Indicate that sqlite3ParserFree() will never be called with a null +** pointer. +*/ +#define YYPARSEFREENEVERNULL 1 + +/* +** In the amalgamation, the parse.c file generated by lemon and the +** tokenize.c file are concatenated. In that case, sqlite3RunParser() +** has access to the the size of the yyParser object and so the parser +** engine can be allocated from stack. In that case, only the +** sqlite3ParserInit() and sqlite3ParserFinalize() routines are invoked +** and the sqlite3ParserAlloc() and sqlite3ParserFree() routines can be +** omitted. +*/ +#ifdef SQLITE_AMALGAMATION +# define sqlite3Parser_ENGINEALWAYSONSTACK 1 +#endif + +/* +** Alternative datatype for the argument to the malloc() routine passed +** into sqlite3ParserAlloc(). The default is size_t. +*/ +#define YYMALLOCARGTYPE u64 + +/* +** An instance of the following structure describes the event of a +** TRIGGER. "a" is the event type, one of TK_UPDATE, TK_INSERT, +** TK_DELETE, or TK_INSTEAD. If the event is of the form +** +** UPDATE ON (a,b,c) +** +** Then the "b" IdList records the list "a,b,c". +*/ +struct TrigEvent { int a; IdList * b; }; + +struct FrameBound { int eType; Expr *pExpr; }; + +/* +** Disable lookaside memory allocation for objects that might be +** shared across database connections. +*/ +static void disableLookaside(Parse *pParse){ + sqlite3 *db = pParse->db; + pParse->disableLookaside++; + DisableLookaside; +} + +#if !defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) \ + && defined(SQLITE_UDL_CAPABLE_PARSER) +/* +** Issue an error message if an ORDER BY or LIMIT clause occurs on an +** UPDATE or DELETE statement. +*/ +static void updateDeleteLimitError( + Parse *pParse, + ExprList *pOrderBy, + Expr *pLimit +){ + if( pOrderBy ){ + sqlite3ErrorMsg(pParse, "syntax error near \"ORDER BY\""); + }else{ + sqlite3ErrorMsg(pParse, "syntax error near \"LIMIT\""); + } + sqlite3ExprListDelete(pParse->db, pOrderBy); + sqlite3ExprDelete(pParse->db, pLimit); +} +#endif /* SQLITE_ENABLE_UPDATE_DELETE_LIMIT */ + + + /* + ** For a compound SELECT statement, make sure p->pPrior->pNext==p for + ** all elements in the list. And make sure list length does not exceed + ** SQLITE_LIMIT_COMPOUND_SELECT. + */ + static void parserDoubleLinkSelect(Parse *pParse, Select *p){ + assert( p!=0 ); + if( p->pPrior ){ + Select *pNext = 0, *pLoop = p; + int mxSelect, cnt = 1; + while(1){ + pLoop->pNext = pNext; + pLoop->selFlags |= SF_Compound; + pNext = pLoop; + pLoop = pLoop->pPrior; + if( pLoop==0 ) break; + cnt++; + if( pLoop->pOrderBy || pLoop->pLimit ){ + sqlite3ErrorMsg(pParse,"%s clause should come after %s not before", + pLoop->pOrderBy!=0 ? "ORDER BY" : "LIMIT", + sqlite3SelectOpName(pNext->op)); + break; + } + } + if( (p->selFlags & SF_MultiValue)==0 && + (mxSelect = pParse->db->aLimit[SQLITE_LIMIT_COMPOUND_SELECT])>0 && + cnt>mxSelect + ){ + sqlite3ErrorMsg(pParse, "too many terms in compound SELECT"); + } + } + } + + /* Attach a With object describing the WITH clause to a Select + ** object describing the query for which the WITH clause is a prefix. + */ + static Select *attachWithToSelect(Parse *pParse, Select *pSelect, With *pWith){ + if( pSelect ){ + pSelect->pWith = pWith; + parserDoubleLinkSelect(pParse, pSelect); + }else{ + sqlite3WithDelete(pParse->db, pWith); + } + return pSelect; + } + + + /* Construct a new Expr object from a single token */ + static Expr *tokenExpr(Parse *pParse, int op, Token t){ + Expr *p = sqlite3DbMallocRawNN(pParse->db, sizeof(Expr)+t.n+1); + if( p ){ + /* memset(p, 0, sizeof(Expr)); */ + p->op = (u8)op; + p->affExpr = 0; + p->flags = EP_Leaf; + ExprClearVVAProperties(p); + p->iAgg = -1; + p->pLeft = p->pRight = 0; + p->pAggInfo = 0; + memset(&p->x, 0, sizeof(p->x)); + memset(&p->y, 0, sizeof(p->y)); + p->op2 = 0; + p->iTable = 0; + p->iColumn = 0; + p->u.zToken = (char*)&p[1]; + memcpy(p->u.zToken, t.z, t.n); + p->u.zToken[t.n] = 0; + p->w.iOfst = (int)(t.z - pParse->zTail); + if( sqlite3Isquote(p->u.zToken[0]) ){ + sqlite3DequoteExpr(p); + } +#if SQLITE_MAX_EXPR_DEPTH>0 + p->nHeight = 1; +#endif + if( IN_RENAME_OBJECT ){ + return (Expr*)sqlite3RenameTokenMap(pParse, (void*)p, &t); + } + } + return p; + } + + + /* A routine to convert a binary TK_IS or TK_ISNOT expression into a + ** unary TK_ISNULL or TK_NOTNULL expression. */ + static void binaryToUnaryIfNull(Parse *pParse, Expr *pY, Expr *pA, int op){ + sqlite3 *db = pParse->db; + if( pA && pY && pY->op==TK_NULL && !IN_RENAME_OBJECT ){ + pA->op = (u8)op; + sqlite3ExprDelete(db, pA->pRight); + pA->pRight = 0; + } + } + + /* Add a single new term to an ExprList that is used to store a + ** list of identifiers. Report an error if the ID list contains + ** a COLLATE clause or an ASC or DESC keyword, except ignore the + ** error while parsing a legacy schema. + */ + static ExprList *parserAddExprIdListTerm( + Parse *pParse, + ExprList *pPrior, + Token *pIdToken, + int hasCollate, + int sortOrder + ){ + ExprList *p = sqlite3ExprListAppend(pParse, pPrior, 0); + if( (hasCollate || sortOrder!=SQLITE_SO_UNDEFINED) + && pParse->db->init.busy==0 + ){ + sqlite3ErrorMsg(pParse, "syntax error after column name \"%.*s\"", + pIdToken->n, pIdToken->z); + } + sqlite3ExprListSetName(pParse, p, pIdToken, 1); + return p; + } + +#if TK_SPAN>255 +# error too many tokens in the grammar +#endif +/**************** End of %include directives **********************************/ +/* These constants specify the various numeric values for terminal symbols. +***************** Begin token definitions *************************************/ +#ifndef TK_SEMI +#define TK_SEMI 1 +#define TK_EXPLAIN 2 +#define TK_QUERY 3 +#define TK_PLAN 4 +#define TK_BEGIN 5 +#define TK_TRANSACTION 6 +#define TK_DEFERRED 7 +#define TK_IMMEDIATE 8 +#define TK_EXCLUSIVE 9 +#define TK_COMMIT 10 +#define TK_END 11 +#define TK_ROLLBACK 12 +#define TK_SAVEPOINT 13 +#define TK_RELEASE 14 +#define TK_TO 15 +#define TK_TABLE 16 +#define TK_CREATE 17 +#define TK_IF 18 +#define TK_NOT 19 +#define TK_EXISTS 20 +#define TK_TEMP 21 +#define TK_LP 22 +#define TK_RP 23 +#define TK_AS 24 +#define TK_COMMA 25 +#define TK_WITHOUT 26 +#define TK_ABORT 27 +#define TK_ACTION 28 +#define TK_AFTER 29 +#define TK_ANALYZE 30 +#define TK_ASC 31 +#define TK_ATTACH 32 +#define TK_BEFORE 33 +#define TK_BY 34 +#define TK_CASCADE 35 +#define TK_CAST 36 +#define TK_CONFLICT 37 +#define TK_DATABASE 38 +#define TK_DESC 39 +#define TK_DETACH 40 +#define TK_EACH 41 +#define TK_FAIL 42 +#define TK_OR 43 +#define TK_AND 44 +#define TK_IS 45 +#define TK_MATCH 46 +#define TK_LIKE_KW 47 +#define TK_BETWEEN 48 +#define TK_IN 49 +#define TK_ISNULL 50 +#define TK_NOTNULL 51 +#define TK_NE 52 +#define TK_EQ 53 +#define TK_GT 54 +#define TK_LE 55 +#define TK_LT 56 +#define TK_GE 57 +#define TK_ESCAPE 58 +#define TK_ID 59 +#define TK_COLUMNKW 60 +#define TK_DO 61 +#define TK_FOR 62 +#define TK_IGNORE 63 +#define TK_INITIALLY 64 +#define TK_INSTEAD 65 +#define TK_NO 66 +#define TK_KEY 67 +#define TK_OF 68 +#define TK_OFFSET 69 +#define TK_PRAGMA 70 +#define TK_RAISE 71 +#define TK_RECURSIVE 72 +#define TK_REPLACE 73 +#define TK_RESTRICT 74 +#define TK_ROW 75 +#define TK_ROWS 76 +#define TK_TRIGGER 77 +#define TK_VACUUM 78 +#define TK_VIEW 79 +#define TK_VIRTUAL 80 +#define TK_WITH 81 +#define TK_NULLS 82 +#define TK_FIRST 83 +#define TK_LAST 84 +#define TK_CURRENT 85 +#define TK_FOLLOWING 86 +#define TK_PARTITION 87 +#define TK_PRECEDING 88 +#define TK_RANGE 89 +#define TK_UNBOUNDED 90 +#define TK_EXCLUDE 91 +#define TK_GROUPS 92 +#define TK_OTHERS 93 +#define TK_TIES 94 +#define TK_GENERATED 95 +#define TK_ALWAYS 96 +#define TK_MATERIALIZED 97 +#define TK_REINDEX 98 +#define TK_RENAME 99 +#define TK_CTIME_KW 100 +#define TK_ANY 101 +#define TK_BITAND 102 +#define TK_BITOR 103 +#define TK_LSHIFT 104 +#define TK_RSHIFT 105 +#define TK_PLUS 106 +#define TK_MINUS 107 +#define TK_STAR 108 +#define TK_SLASH 109 +#define TK_REM 110 +#define TK_CONCAT 111 +#define TK_PTR 112 +#define TK_COLLATE 113 +#define TK_BITNOT 114 +#define TK_ON 115 +#define TK_INDEXED 116 +#define TK_STRING 117 +#define TK_JOIN_KW 118 +#define TK_CONSTRAINT 119 +#define TK_DEFAULT 120 +#define TK_NULL 121 +#define TK_PRIMARY 122 +#define TK_UNIQUE 123 +#define TK_CHECK 124 +#define TK_REFERENCES 125 +#define TK_AUTOINCR 126 +#define TK_INSERT 127 +#define TK_DELETE 128 +#define TK_UPDATE 129 +#define TK_SET 130 +#define TK_DEFERRABLE 131 +#define TK_FOREIGN 132 +#define TK_DROP 133 +#define TK_UNION 134 +#define TK_ALL 135 +#define TK_EXCEPT 136 +#define TK_INTERSECT 137 +#define TK_SELECT 138 +#define TK_VALUES 139 +#define TK_DISTINCT 140 +#define TK_DOT 141 +#define TK_FROM 142 +#define TK_JOIN 143 +#define TK_USING 144 +#define TK_ORDER 145 +#define TK_GROUP 146 +#define TK_HAVING 147 +#define TK_LIMIT 148 +#define TK_WHERE 149 +#define TK_RETURNING 150 +#define TK_INTO 151 +#define TK_NOTHING 152 +#define TK_FLOAT 153 +#define TK_BLOB 154 +#define TK_INTEGER 155 +#define TK_VARIABLE 156 +#define TK_CASE 157 +#define TK_WHEN 158 +#define TK_THEN 159 +#define TK_ELSE 160 +#define TK_INDEX 161 +#define TK_ALTER 162 +#define TK_ADD 163 +#define TK_WINDOW 164 +#define TK_OVER 165 +#define TK_FILTER 166 +#define TK_COLUMN 167 +#define TK_AGG_FUNCTION 168 +#define TK_AGG_COLUMN 169 +#define TK_TRUEFALSE 170 +#define TK_ISNOT 171 +#define TK_FUNCTION 172 +#define TK_UMINUS 173 +#define TK_UPLUS 174 +#define TK_TRUTH 175 +#define TK_REGISTER 176 +#define TK_VECTOR 177 +#define TK_SELECT_COLUMN 178 +#define TK_IF_NULL_ROW 179 +#define TK_ASTERISK 180 +#define TK_SPAN 181 +#define TK_ERROR 182 +#define TK_SPACE 183 +#define TK_ILLEGAL 184 +#endif +/**************** End token definitions ***************************************/ + +/* The next sections is a series of control #defines. +** various aspects of the generated parser. +** YYCODETYPE is the data type used to store the integer codes +** that represent terminal and non-terminal symbols. +** "unsigned char" is used if there are fewer than +** 256 symbols. Larger types otherwise. +** YYNOCODE is a number of type YYCODETYPE that is not used for +** any terminal or nonterminal symbol. +** YYFALLBACK If defined, this indicates that one or more tokens +** (also known as: "terminal symbols") have fall-back +** values which should be used if the original symbol +** would not parse. This permits keywords to sometimes +** be used as identifiers, for example. +** YYACTIONTYPE is the data type used for "action codes" - numbers +** that indicate what to do in response to the next +** token. +** sqlite3ParserTOKENTYPE is the data type used for minor type for terminal +** symbols. Background: A "minor type" is a semantic +** value associated with a terminal or non-terminal +** symbols. For example, for an "ID" terminal symbol, +** the minor type might be the name of the identifier. +** Each non-terminal can have a different minor type. +** Terminal symbols all have the same minor type, though. +** This macros defines the minor type for terminal +** symbols. +** YYMINORTYPE is the data type used for all minor types. +** This is typically a union of many types, one of +** which is sqlite3ParserTOKENTYPE. The entry in the union +** for terminal symbols is called "yy0". +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** sqlite3ParserARG_SDECL A static variable declaration for the %extra_argument +** sqlite3ParserARG_PDECL A parameter declaration for the %extra_argument +** sqlite3ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter +** sqlite3ParserARG_STORE Code to store %extra_argument into yypParser +** sqlite3ParserARG_FETCH Code to extract %extra_argument from yypParser +** sqlite3ParserCTX_* As sqlite3ParserARG_ except for %extra_context +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YYNTOKEN Number of terminal symbols +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op +** YY_MIN_REDUCE Minimum value for reduce actions +** YY_MAX_REDUCE Maximum value for reduce actions +*/ +#ifndef INTERFACE +# define INTERFACE 1 +#endif +/************* Begin control #defines *****************************************/ +#define YYCODETYPE unsigned short int +#define YYNOCODE 319 +#define YYACTIONTYPE unsigned short int +#define YYWILDCARD 101 +#define sqlite3ParserTOKENTYPE Token +typedef union { + int yyinit; + sqlite3ParserTOKENTYPE yy0; + TriggerStep* yy33; + Window* yy41; + Select* yy47; + SrcList* yy131; + struct TrigEvent yy180; + struct {int value; int mask;} yy231; + IdList* yy254; + u32 yy285; + ExprList* yy322; + Cte* yy385; + int yy394; + Upsert* yy444; + u8 yy516; + With* yy521; + const char* yy522; + Expr* yy528; + struct FrameBound yy595; +} YYMINORTYPE; +#ifndef YYSTACKDEPTH +#define YYSTACKDEPTH 100 +#endif +#define sqlite3ParserARG_SDECL +#define sqlite3ParserARG_PDECL +#define sqlite3ParserARG_PARAM +#define sqlite3ParserARG_FETCH +#define sqlite3ParserARG_STORE +#define sqlite3ParserCTX_SDECL Parse *pParse; +#define sqlite3ParserCTX_PDECL ,Parse *pParse +#define sqlite3ParserCTX_PARAM ,pParse +#define sqlite3ParserCTX_FETCH Parse *pParse=yypParser->pParse; +#define sqlite3ParserCTX_STORE yypParser->pParse=pParse; +#define YYFALLBACK 1 +#define YYNSTATE 578 +#define YYNRULE 402 +#define YYNRULE_WITH_ACTION 340 +#define YYNTOKEN 185 +#define YY_MAX_SHIFT 577 +#define YY_MIN_SHIFTREDUCE 835 +#define YY_MAX_SHIFTREDUCE 1236 +#define YY_ERROR_ACTION 1237 +#define YY_ACCEPT_ACTION 1238 +#define YY_NO_ACTION 1239 +#define YY_MIN_REDUCE 1240 +#define YY_MAX_REDUCE 1641 +/************* End control #defines *******************************************/ +#define YY_NLOOKAHEAD ((int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0]))) + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +#ifndef yytestcase +# define yytestcase(X) +#endif + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N == YY_ERROR_ACTION A syntax error has occurred. +** +** N == YY_ACCEPT_ACTION The parser accepts its input. +** +** N == YY_NO_ACTION No such action. Denotes unused +** slots in the yy_action[] table. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as either: +** +** (A) N = yy_action[ yy_shift_ofst[S] + X ] +** (B) N = yy_default[S] +** +** The (A) formula is preferred. The B formula is used instead if +** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X. +** +** The formulas above are for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +** +*********** Begin parsing tables **********************************************/ +#define YY_ACTTAB_COUNT (2071) +static const YYACTIONTYPE yy_action[] = { + /* 0 */ 570, 1311, 570, 1290, 201, 201, 570, 116, 112, 222, + /* 10 */ 570, 1311, 381, 570, 116, 112, 222, 401, 412, 413, + /* 20 */ 1264, 382, 1273, 41, 41, 41, 41, 1416, 1521, 71, + /* 30 */ 71, 971, 1262, 41, 41, 495, 71, 71, 272, 972, + /* 40 */ 298, 480, 298, 123, 124, 114, 1214, 1214, 1048, 1051, + /* 50 */ 1040, 1040, 121, 121, 122, 122, 122, 122, 547, 413, + /* 60 */ 1238, 1, 1, 577, 2, 1242, 552, 116, 112, 222, + /* 70 */ 309, 484, 142, 552, 1276, 528, 116, 112, 222, 1324, + /* 80 */ 421, 527, 551, 123, 124, 114, 1214, 1214, 1048, 1051, + /* 90 */ 1040, 1040, 121, 121, 122, 122, 122, 122, 428, 116, + /* 100 */ 112, 222, 120, 120, 120, 120, 119, 119, 118, 118, + /* 110 */ 118, 117, 113, 448, 277, 277, 277, 277, 564, 564, + /* 120 */ 564, 1562, 380, 1564, 1190, 379, 1161, 567, 1161, 567, + /* 130 */ 413, 1562, 541, 252, 219, 1557, 99, 141, 453, 6, + /* 140 */ 369, 233, 120, 120, 120, 120, 119, 119, 118, 118, + /* 150 */ 118, 117, 113, 448, 123, 124, 114, 1214, 1214, 1048, + /* 160 */ 1051, 1040, 1040, 121, 121, 122, 122, 122, 122, 138, + /* 170 */ 289, 1190, 1550, 452, 118, 118, 118, 117, 113, 448, + /* 180 */ 125, 1190, 1191, 1192, 144, 469, 338, 570, 150, 127, + /* 190 */ 448, 122, 122, 122, 122, 115, 120, 120, 120, 120, + /* 200 */ 119, 119, 118, 118, 118, 117, 113, 448, 458, 423, + /* 210 */ 13, 13, 215, 120, 120, 120, 120, 119, 119, 118, + /* 220 */ 118, 118, 117, 113, 448, 426, 308, 561, 1190, 1191, + /* 230 */ 1192, 445, 444, 413, 1275, 122, 122, 122, 122, 120, + /* 240 */ 120, 120, 120, 119, 119, 118, 118, 118, 117, 113, + /* 250 */ 448, 1547, 98, 1037, 1037, 1049, 1052, 123, 124, 114, + /* 260 */ 1214, 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, + /* 270 */ 122, 122, 570, 410, 409, 1190, 570, 413, 1221, 319, + /* 280 */ 1221, 80, 81, 120, 120, 120, 120, 119, 119, 118, + /* 290 */ 118, 118, 117, 113, 448, 70, 70, 1190, 1608, 71, + /* 300 */ 71, 123, 124, 114, 1214, 1214, 1048, 1051, 1040, 1040, + /* 310 */ 121, 121, 122, 122, 122, 122, 120, 120, 120, 120, + /* 320 */ 119, 119, 118, 118, 118, 117, 113, 448, 1041, 210, + /* 330 */ 1190, 369, 1190, 1191, 1192, 245, 552, 403, 508, 505, + /* 340 */ 504, 108, 562, 138, 4, 520, 937, 437, 503, 217, + /* 350 */ 518, 526, 356, 883, 1190, 1191, 1192, 387, 565, 570, + /* 360 */ 120, 120, 120, 120, 119, 119, 118, 118, 118, 117, + /* 370 */ 113, 448, 277, 277, 16, 16, 1602, 445, 444, 153, + /* 380 */ 413, 449, 13, 13, 1283, 567, 1218, 1190, 1191, 1192, + /* 390 */ 1007, 1220, 264, 559, 1578, 186, 570, 431, 138, 1219, + /* 400 */ 308, 561, 476, 138, 123, 124, 114, 1214, 1214, 1048, + /* 410 */ 1051, 1040, 1040, 121, 121, 122, 122, 122, 122, 55, + /* 420 */ 55, 417, 1027, 511, 1221, 1190, 1221, 478, 106, 106, + /* 430 */ 1316, 1316, 1190, 171, 570, 388, 107, 384, 449, 572, + /* 440 */ 571, 434, 1547, 1017, 336, 553, 569, 263, 280, 364, + /* 450 */ 514, 359, 513, 250, 495, 308, 561, 71, 71, 355, + /* 460 */ 308, 561, 378, 120, 120, 120, 120, 119, 119, 118, + /* 470 */ 118, 118, 117, 113, 448, 1017, 1017, 1019, 1020, 27, + /* 480 */ 277, 277, 1190, 1191, 1192, 1156, 570, 532, 413, 1190, + /* 490 */ 1191, 1192, 352, 567, 552, 1264, 537, 521, 1156, 1520, + /* 500 */ 317, 1156, 285, 554, 489, 573, 570, 573, 486, 51, + /* 510 */ 51, 207, 123, 124, 114, 1214, 1214, 1048, 1051, 1040, + /* 520 */ 1040, 121, 121, 122, 122, 122, 122, 171, 1416, 13, + /* 530 */ 13, 413, 277, 277, 1190, 509, 119, 119, 118, 118, + /* 540 */ 118, 117, 113, 448, 433, 567, 522, 220, 519, 1556, + /* 550 */ 369, 550, 1190, 6, 536, 123, 124, 114, 1214, 1214, + /* 560 */ 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, 122, + /* 570 */ 145, 120, 120, 120, 120, 119, 119, 118, 118, 118, + /* 580 */ 117, 113, 448, 245, 570, 478, 508, 505, 504, 570, + /* 590 */ 1485, 1190, 1191, 1192, 1314, 1314, 503, 1190, 149, 429, + /* 600 */ 1190, 484, 413, 274, 369, 956, 876, 56, 56, 1190, + /* 610 */ 1191, 1192, 71, 71, 120, 120, 120, 120, 119, 119, + /* 620 */ 118, 118, 118, 117, 113, 448, 123, 124, 114, 1214, + /* 630 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 640 */ 122, 413, 545, 1556, 83, 869, 98, 6, 932, 533, + /* 650 */ 852, 547, 151, 931, 1190, 1191, 1192, 1190, 1191, 1192, + /* 660 */ 290, 1547, 187, 1637, 399, 123, 124, 114, 1214, 1214, + /* 670 */ 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, 122, + /* 680 */ 570, 958, 570, 457, 957, 120, 120, 120, 120, 119, + /* 690 */ 119, 118, 118, 118, 117, 113, 448, 1156, 221, 1190, + /* 700 */ 335, 457, 456, 13, 13, 13, 13, 1007, 369, 467, + /* 710 */ 1156, 193, 413, 1156, 386, 1547, 1174, 32, 297, 478, + /* 720 */ 195, 1531, 5, 956, 120, 120, 120, 120, 119, 119, + /* 730 */ 118, 118, 118, 117, 113, 448, 123, 124, 114, 1214, + /* 740 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 750 */ 122, 413, 1071, 423, 1190, 1028, 1190, 1191, 1192, 1190, + /* 760 */ 423, 336, 464, 322, 548, 1549, 446, 446, 446, 570, + /* 770 */ 3, 117, 113, 448, 457, 123, 124, 114, 1214, 1214, + /* 780 */ 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, 122, + /* 790 */ 1477, 570, 15, 15, 293, 120, 120, 120, 120, 119, + /* 800 */ 119, 118, 118, 118, 117, 113, 448, 1190, 570, 1490, + /* 810 */ 1416, 1190, 1191, 1192, 13, 13, 1190, 1191, 1192, 1548, + /* 820 */ 271, 271, 413, 286, 308, 561, 1012, 1490, 1492, 196, + /* 830 */ 288, 71, 71, 567, 120, 120, 120, 120, 119, 119, + /* 840 */ 118, 118, 118, 117, 113, 448, 123, 124, 114, 1214, + /* 850 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 860 */ 122, 413, 201, 1091, 1190, 1191, 1192, 1328, 304, 1533, + /* 870 */ 392, 278, 278, 454, 568, 406, 926, 926, 570, 567, + /* 880 */ 570, 430, 495, 484, 567, 123, 124, 114, 1214, 1214, + /* 890 */ 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, 122, + /* 900 */ 1490, 71, 71, 13, 13, 120, 120, 120, 120, 119, + /* 910 */ 119, 118, 118, 118, 117, 113, 448, 570, 549, 570, + /* 920 */ 1581, 577, 2, 1242, 1096, 1096, 492, 1484, 309, 1529, + /* 930 */ 142, 328, 413, 840, 841, 842, 312, 1324, 305, 367, + /* 940 */ 43, 43, 57, 57, 120, 120, 120, 120, 119, 119, + /* 950 */ 118, 118, 118, 117, 113, 448, 123, 124, 114, 1214, + /* 960 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 970 */ 122, 12, 277, 277, 570, 1156, 413, 576, 432, 1242, + /* 980 */ 469, 338, 296, 478, 309, 567, 142, 249, 1156, 308, + /* 990 */ 561, 1156, 325, 1324, 327, 495, 459, 71, 71, 233, + /* 1000 */ 283, 101, 114, 1214, 1214, 1048, 1051, 1040, 1040, 121, + /* 1010 */ 121, 122, 122, 122, 122, 120, 120, 120, 120, 119, + /* 1020 */ 119, 118, 118, 118, 117, 113, 448, 1112, 277, 277, + /* 1030 */ 1416, 452, 398, 1234, 443, 277, 277, 248, 247, 246, + /* 1040 */ 1323, 567, 1113, 313, 198, 294, 495, 1322, 567, 468, + /* 1050 */ 570, 1431, 398, 1134, 1027, 233, 418, 1114, 295, 120, + /* 1060 */ 120, 120, 120, 119, 119, 118, 118, 118, 117, 113, + /* 1070 */ 448, 1018, 104, 71, 71, 1017, 326, 500, 912, 570, + /* 1080 */ 277, 277, 277, 277, 1112, 1265, 419, 452, 913, 365, + /* 1090 */ 1575, 1319, 413, 567, 956, 567, 9, 202, 255, 1113, + /* 1100 */ 316, 491, 44, 44, 249, 563, 419, 1017, 1017, 1019, + /* 1110 */ 447, 1235, 413, 1607, 1114, 901, 123, 124, 114, 1214, + /* 1120 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 1130 */ 122, 1235, 413, 1211, 215, 558, 123, 124, 114, 1214, + /* 1140 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 1150 */ 122, 1135, 1635, 474, 1635, 255, 123, 111, 114, 1214, + /* 1160 */ 1214, 1048, 1051, 1040, 1040, 121, 121, 122, 122, 122, + /* 1170 */ 122, 1135, 1636, 418, 1636, 120, 120, 120, 120, 119, + /* 1180 */ 119, 118, 118, 118, 117, 113, 448, 221, 209, 355, + /* 1190 */ 1211, 1211, 147, 1430, 495, 120, 120, 120, 120, 119, + /* 1200 */ 119, 118, 118, 118, 117, 113, 448, 1260, 543, 523, + /* 1210 */ 892, 555, 956, 12, 570, 120, 120, 120, 120, 119, + /* 1220 */ 119, 118, 118, 118, 117, 113, 448, 542, 570, 864, + /* 1230 */ 1133, 365, 1575, 350, 1360, 413, 1167, 58, 58, 343, + /* 1240 */ 1359, 512, 277, 277, 277, 277, 277, 277, 1211, 893, + /* 1250 */ 1133, 59, 59, 463, 367, 567, 570, 567, 96, 567, + /* 1260 */ 124, 114, 1214, 1214, 1048, 1051, 1040, 1040, 121, 121, + /* 1270 */ 122, 122, 122, 122, 570, 1416, 570, 281, 1190, 60, + /* 1280 */ 60, 110, 396, 396, 395, 266, 393, 864, 1167, 849, + /* 1290 */ 570, 485, 570, 440, 345, 1156, 348, 61, 61, 62, + /* 1300 */ 62, 971, 227, 1554, 315, 435, 544, 6, 1156, 972, + /* 1310 */ 570, 1156, 314, 45, 45, 46, 46, 516, 120, 120, + /* 1320 */ 120, 120, 119, 119, 118, 118, 118, 117, 113, 448, + /* 1330 */ 420, 173, 1536, 47, 47, 1190, 1191, 1192, 108, 562, + /* 1340 */ 329, 4, 229, 1555, 932, 570, 441, 6, 570, 931, + /* 1350 */ 164, 570, 1294, 137, 1194, 565, 570, 1553, 570, 1093, + /* 1360 */ 570, 6, 570, 1093, 535, 570, 872, 8, 49, 49, + /* 1370 */ 228, 50, 50, 570, 63, 63, 570, 461, 449, 64, + /* 1380 */ 64, 65, 65, 14, 14, 66, 66, 411, 129, 129, + /* 1390 */ 559, 570, 462, 570, 1509, 490, 67, 67, 570, 52, + /* 1400 */ 52, 550, 411, 471, 539, 414, 226, 1027, 570, 538, + /* 1410 */ 308, 561, 1194, 411, 68, 68, 69, 69, 570, 1027, + /* 1420 */ 570, 53, 53, 872, 1018, 106, 106, 529, 1017, 570, + /* 1430 */ 1508, 159, 159, 107, 455, 449, 572, 571, 475, 307, + /* 1440 */ 1017, 160, 160, 76, 76, 570, 1552, 470, 411, 411, + /* 1450 */ 6, 1229, 54, 54, 482, 276, 219, 570, 891, 890, + /* 1460 */ 1017, 1017, 1019, 84, 206, 1210, 230, 282, 72, 72, + /* 1470 */ 333, 487, 1017, 1017, 1019, 1020, 27, 1580, 1178, 451, + /* 1480 */ 130, 130, 281, 148, 105, 38, 103, 396, 396, 395, + /* 1490 */ 266, 393, 570, 1130, 849, 400, 570, 108, 562, 570, + /* 1500 */ 4, 311, 570, 30, 17, 570, 279, 227, 570, 315, + /* 1510 */ 108, 562, 472, 4, 565, 73, 73, 314, 570, 157, + /* 1520 */ 157, 570, 131, 131, 530, 132, 132, 565, 128, 128, + /* 1530 */ 570, 158, 158, 570, 31, 291, 570, 449, 334, 525, + /* 1540 */ 98, 152, 152, 424, 136, 136, 1009, 229, 254, 559, + /* 1550 */ 449, 483, 340, 135, 135, 164, 133, 133, 137, 134, + /* 1560 */ 134, 879, 559, 539, 570, 477, 570, 254, 540, 479, + /* 1570 */ 339, 254, 98, 898, 899, 228, 539, 570, 1027, 570, + /* 1580 */ 1078, 538, 210, 232, 106, 106, 1356, 75, 75, 77, + /* 1590 */ 77, 1027, 107, 344, 449, 572, 571, 106, 106, 1017, + /* 1600 */ 74, 74, 42, 42, 570, 107, 347, 449, 572, 571, + /* 1610 */ 414, 501, 1017, 251, 363, 308, 561, 1139, 353, 879, + /* 1620 */ 98, 1074, 349, 251, 362, 1595, 351, 48, 48, 1021, + /* 1630 */ 1307, 1017, 1017, 1019, 1020, 27, 1293, 1291, 1078, 455, + /* 1640 */ 965, 929, 254, 110, 1017, 1017, 1019, 1020, 27, 1178, + /* 1650 */ 451, 974, 975, 281, 108, 562, 1292, 4, 396, 396, + /* 1660 */ 395, 266, 393, 1347, 1090, 849, 1090, 1089, 862, 1089, + /* 1670 */ 146, 565, 930, 358, 110, 303, 368, 557, 227, 1368, + /* 1680 */ 315, 108, 562, 1415, 4, 1343, 496, 1021, 314, 1354, + /* 1690 */ 1569, 556, 1421, 1272, 449, 204, 1263, 1251, 565, 1250, + /* 1700 */ 1252, 1588, 269, 1340, 371, 373, 559, 375, 11, 212, + /* 1710 */ 397, 225, 321, 284, 1402, 460, 287, 331, 229, 332, + /* 1720 */ 292, 449, 324, 216, 337, 1407, 164, 481, 377, 137, + /* 1730 */ 1406, 404, 506, 559, 1290, 1027, 361, 1481, 199, 1591, + /* 1740 */ 211, 106, 106, 936, 1480, 1229, 228, 560, 175, 107, + /* 1750 */ 200, 449, 572, 571, 258, 391, 1017, 1528, 1526, 223, + /* 1760 */ 1226, 422, 1027, 83, 208, 79, 82, 184, 106, 106, + /* 1770 */ 1486, 126, 1397, 550, 169, 320, 107, 1403, 449, 572, + /* 1780 */ 571, 414, 177, 1017, 1390, 323, 308, 561, 1017, 1017, + /* 1790 */ 1019, 1020, 27, 465, 35, 235, 100, 562, 499, 4, + /* 1800 */ 179, 180, 181, 466, 182, 96, 402, 1409, 473, 1408, + /* 1810 */ 455, 36, 1411, 565, 188, 1017, 1017, 1019, 1020, 27, + /* 1820 */ 405, 1475, 488, 239, 89, 494, 270, 192, 1497, 342, + /* 1830 */ 241, 497, 346, 242, 515, 243, 449, 1253, 1310, 1309, + /* 1840 */ 407, 91, 436, 1308, 883, 217, 438, 439, 559, 524, + /* 1850 */ 531, 408, 1351, 1606, 1301, 301, 1280, 1605, 360, 1279, + /* 1860 */ 1278, 1604, 1574, 302, 95, 366, 370, 372, 1300, 1352, + /* 1870 */ 1350, 374, 256, 257, 442, 10, 1349, 1027, 1461, 385, + /* 1880 */ 97, 1375, 102, 106, 106, 534, 1560, 34, 1559, 574, + /* 1890 */ 1184, 107, 265, 449, 572, 571, 267, 268, 1017, 203, + /* 1900 */ 1333, 383, 389, 1332, 390, 575, 376, 1248, 1243, 1513, + /* 1910 */ 161, 143, 1374, 1514, 1512, 162, 299, 1511, 163, 213, + /* 1920 */ 836, 214, 78, 450, 205, 310, 224, 1088, 140, 1086, + /* 1930 */ 1017, 1017, 1019, 1020, 27, 318, 306, 176, 165, 1210, + /* 1940 */ 178, 231, 915, 234, 330, 1102, 183, 166, 167, 425, + /* 1950 */ 427, 185, 85, 86, 87, 168, 88, 415, 1105, 236, + /* 1960 */ 174, 237, 416, 1101, 154, 18, 238, 341, 1223, 240, + /* 1970 */ 254, 493, 190, 1094, 37, 189, 851, 498, 362, 244, + /* 1980 */ 354, 510, 191, 90, 170, 502, 19, 20, 507, 93, + /* 1990 */ 881, 357, 92, 300, 894, 155, 517, 218, 1172, 156, + /* 2000 */ 1054, 959, 1141, 94, 39, 1140, 273, 275, 964, 194, + /* 2010 */ 110, 1158, 1162, 1160, 253, 21, 1166, 7, 1146, 33, + /* 2020 */ 22, 197, 23, 24, 25, 1165, 546, 26, 98, 1069, + /* 2030 */ 1055, 1053, 1057, 1111, 1058, 1110, 259, 260, 28, 40, + /* 2040 */ 1180, 1022, 863, 109, 29, 566, 394, 1179, 139, 172, + /* 2050 */ 925, 261, 1239, 1239, 1239, 1239, 1239, 1239, 1239, 1239, + /* 2060 */ 262, 1239, 1239, 1239, 1239, 1597, 1239, 1239, 1239, 1239, + /* 2070 */ 1596, +}; +static const YYCODETYPE yy_lookahead[] = { + /* 0 */ 193, 223, 193, 225, 193, 193, 193, 274, 275, 276, + /* 10 */ 193, 233, 219, 193, 274, 275, 276, 206, 206, 19, + /* 20 */ 193, 219, 216, 216, 217, 216, 217, 193, 295, 216, + /* 30 */ 217, 31, 205, 216, 217, 193, 216, 217, 213, 39, + /* 40 */ 228, 193, 230, 43, 44, 45, 46, 47, 48, 49, + /* 50 */ 50, 51, 52, 53, 54, 55, 56, 57, 193, 19, + /* 60 */ 185, 186, 187, 188, 189, 190, 253, 274, 275, 276, + /* 70 */ 195, 193, 197, 253, 216, 262, 274, 275, 276, 204, + /* 80 */ 238, 204, 262, 43, 44, 45, 46, 47, 48, 49, + /* 90 */ 50, 51, 52, 53, 54, 55, 56, 57, 264, 274, + /* 100 */ 275, 276, 102, 103, 104, 105, 106, 107, 108, 109, + /* 110 */ 110, 111, 112, 113, 239, 240, 239, 240, 210, 211, + /* 120 */ 212, 314, 315, 314, 59, 316, 86, 252, 88, 252, + /* 130 */ 19, 314, 315, 256, 257, 309, 25, 72, 296, 313, + /* 140 */ 193, 266, 102, 103, 104, 105, 106, 107, 108, 109, + /* 150 */ 110, 111, 112, 113, 43, 44, 45, 46, 47, 48, + /* 160 */ 49, 50, 51, 52, 53, 54, 55, 56, 57, 81, + /* 170 */ 292, 59, 307, 298, 108, 109, 110, 111, 112, 113, + /* 180 */ 69, 116, 117, 118, 72, 128, 129, 193, 241, 22, + /* 190 */ 113, 54, 55, 56, 57, 58, 102, 103, 104, 105, + /* 200 */ 106, 107, 108, 109, 110, 111, 112, 113, 120, 193, + /* 210 */ 216, 217, 25, 102, 103, 104, 105, 106, 107, 108, + /* 220 */ 109, 110, 111, 112, 113, 231, 138, 139, 116, 117, + /* 230 */ 118, 106, 107, 19, 216, 54, 55, 56, 57, 102, + /* 240 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 250 */ 113, 304, 25, 46, 47, 48, 49, 43, 44, 45, + /* 260 */ 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + /* 270 */ 56, 57, 193, 106, 107, 59, 193, 19, 153, 263, + /* 280 */ 155, 67, 24, 102, 103, 104, 105, 106, 107, 108, + /* 290 */ 109, 110, 111, 112, 113, 216, 217, 59, 230, 216, + /* 300 */ 217, 43, 44, 45, 46, 47, 48, 49, 50, 51, + /* 310 */ 52, 53, 54, 55, 56, 57, 102, 103, 104, 105, + /* 320 */ 106, 107, 108, 109, 110, 111, 112, 113, 121, 142, + /* 330 */ 59, 193, 116, 117, 118, 119, 253, 204, 122, 123, + /* 340 */ 124, 19, 20, 81, 22, 262, 108, 19, 132, 165, + /* 350 */ 166, 193, 24, 126, 116, 117, 118, 278, 36, 193, + /* 360 */ 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + /* 370 */ 112, 113, 239, 240, 216, 217, 215, 106, 107, 241, + /* 380 */ 19, 59, 216, 217, 223, 252, 115, 116, 117, 118, + /* 390 */ 73, 120, 26, 71, 193, 22, 193, 231, 81, 128, + /* 400 */ 138, 139, 269, 81, 43, 44, 45, 46, 47, 48, + /* 410 */ 49, 50, 51, 52, 53, 54, 55, 56, 57, 216, + /* 420 */ 217, 198, 100, 95, 153, 59, 155, 193, 106, 107, + /* 430 */ 235, 236, 59, 193, 193, 249, 114, 251, 116, 117, + /* 440 */ 118, 113, 304, 121, 127, 204, 193, 119, 120, 121, + /* 450 */ 122, 123, 124, 125, 193, 138, 139, 216, 217, 131, + /* 460 */ 138, 139, 193, 102, 103, 104, 105, 106, 107, 108, + /* 470 */ 109, 110, 111, 112, 113, 153, 154, 155, 156, 157, + /* 480 */ 239, 240, 116, 117, 118, 76, 193, 193, 19, 116, + /* 490 */ 117, 118, 23, 252, 253, 193, 87, 204, 89, 238, + /* 500 */ 193, 92, 268, 262, 281, 203, 193, 205, 285, 216, + /* 510 */ 217, 150, 43, 44, 45, 46, 47, 48, 49, 50, + /* 520 */ 51, 52, 53, 54, 55, 56, 57, 193, 193, 216, + /* 530 */ 217, 19, 239, 240, 59, 23, 106, 107, 108, 109, + /* 540 */ 110, 111, 112, 113, 231, 252, 253, 193, 308, 309, + /* 550 */ 193, 145, 59, 313, 145, 43, 44, 45, 46, 47, + /* 560 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 570 */ 164, 102, 103, 104, 105, 106, 107, 108, 109, 110, + /* 580 */ 111, 112, 113, 119, 193, 193, 122, 123, 124, 193, + /* 590 */ 283, 116, 117, 118, 235, 236, 132, 59, 241, 264, + /* 600 */ 59, 193, 19, 23, 193, 25, 23, 216, 217, 116, + /* 610 */ 117, 118, 216, 217, 102, 103, 104, 105, 106, 107, + /* 620 */ 108, 109, 110, 111, 112, 113, 43, 44, 45, 46, + /* 630 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 640 */ 57, 19, 308, 309, 151, 23, 25, 313, 135, 253, + /* 650 */ 21, 193, 241, 140, 116, 117, 118, 116, 117, 118, + /* 660 */ 268, 304, 22, 301, 302, 43, 44, 45, 46, 47, + /* 670 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 680 */ 193, 143, 193, 193, 143, 102, 103, 104, 105, 106, + /* 690 */ 107, 108, 109, 110, 111, 112, 113, 76, 118, 59, + /* 700 */ 292, 211, 212, 216, 217, 216, 217, 73, 193, 80, + /* 710 */ 89, 25, 19, 92, 193, 304, 23, 22, 231, 193, + /* 720 */ 231, 193, 22, 143, 102, 103, 104, 105, 106, 107, + /* 730 */ 108, 109, 110, 111, 112, 113, 43, 44, 45, 46, + /* 740 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 750 */ 57, 19, 123, 193, 59, 23, 116, 117, 118, 59, + /* 760 */ 193, 127, 128, 129, 306, 307, 210, 211, 212, 193, + /* 770 */ 22, 111, 112, 113, 284, 43, 44, 45, 46, 47, + /* 780 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 790 */ 161, 193, 216, 217, 268, 102, 103, 104, 105, 106, + /* 800 */ 107, 108, 109, 110, 111, 112, 113, 59, 193, 193, + /* 810 */ 193, 116, 117, 118, 216, 217, 116, 117, 118, 304, + /* 820 */ 239, 240, 19, 263, 138, 139, 23, 211, 212, 231, + /* 830 */ 263, 216, 217, 252, 102, 103, 104, 105, 106, 107, + /* 840 */ 108, 109, 110, 111, 112, 113, 43, 44, 45, 46, + /* 850 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 860 */ 57, 19, 193, 11, 116, 117, 118, 240, 253, 193, + /* 870 */ 201, 239, 240, 193, 134, 206, 136, 137, 193, 252, + /* 880 */ 193, 264, 193, 193, 252, 43, 44, 45, 46, 47, + /* 890 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 900 */ 284, 216, 217, 216, 217, 102, 103, 104, 105, 106, + /* 910 */ 107, 108, 109, 110, 111, 112, 113, 193, 231, 193, + /* 920 */ 187, 188, 189, 190, 127, 128, 129, 238, 195, 193, + /* 930 */ 197, 16, 19, 7, 8, 9, 193, 204, 253, 193, + /* 940 */ 216, 217, 216, 217, 102, 103, 104, 105, 106, 107, + /* 950 */ 108, 109, 110, 111, 112, 113, 43, 44, 45, 46, + /* 960 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 970 */ 57, 213, 239, 240, 193, 76, 19, 188, 232, 190, + /* 980 */ 128, 129, 292, 193, 195, 252, 197, 46, 89, 138, + /* 990 */ 139, 92, 77, 204, 79, 193, 269, 216, 217, 266, + /* 1000 */ 204, 159, 45, 46, 47, 48, 49, 50, 51, 52, + /* 1010 */ 53, 54, 55, 56, 57, 102, 103, 104, 105, 106, + /* 1020 */ 107, 108, 109, 110, 111, 112, 113, 12, 239, 240, + /* 1030 */ 193, 298, 22, 23, 253, 239, 240, 127, 128, 129, + /* 1040 */ 238, 252, 27, 193, 286, 204, 193, 204, 252, 291, + /* 1050 */ 193, 273, 22, 23, 100, 266, 115, 42, 268, 102, + /* 1060 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1070 */ 113, 117, 159, 216, 217, 121, 161, 19, 63, 193, + /* 1080 */ 239, 240, 239, 240, 12, 208, 209, 298, 73, 311, + /* 1090 */ 312, 238, 19, 252, 25, 252, 22, 24, 24, 27, + /* 1100 */ 193, 264, 216, 217, 46, 208, 209, 153, 154, 155, + /* 1110 */ 253, 101, 19, 23, 42, 25, 43, 44, 45, 46, + /* 1120 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 1130 */ 57, 101, 19, 59, 25, 63, 43, 44, 45, 46, + /* 1140 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 1150 */ 57, 22, 23, 115, 25, 24, 43, 44, 45, 46, + /* 1160 */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 1170 */ 57, 22, 23, 115, 25, 102, 103, 104, 105, 106, + /* 1180 */ 107, 108, 109, 110, 111, 112, 113, 118, 150, 131, + /* 1190 */ 59, 117, 22, 273, 193, 102, 103, 104, 105, 106, + /* 1200 */ 107, 108, 109, 110, 111, 112, 113, 204, 66, 204, + /* 1210 */ 35, 204, 143, 213, 193, 102, 103, 104, 105, 106, + /* 1220 */ 107, 108, 109, 110, 111, 112, 113, 85, 193, 59, + /* 1230 */ 101, 311, 312, 16, 193, 19, 94, 216, 217, 238, + /* 1240 */ 193, 66, 239, 240, 239, 240, 239, 240, 117, 74, + /* 1250 */ 101, 216, 217, 193, 193, 252, 193, 252, 149, 252, + /* 1260 */ 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + /* 1270 */ 54, 55, 56, 57, 193, 193, 193, 5, 59, 216, + /* 1280 */ 217, 25, 10, 11, 12, 13, 14, 117, 146, 17, + /* 1290 */ 193, 291, 193, 232, 77, 76, 79, 216, 217, 216, + /* 1300 */ 217, 31, 30, 309, 32, 130, 87, 313, 89, 39, + /* 1310 */ 193, 92, 40, 216, 217, 216, 217, 108, 102, 103, + /* 1320 */ 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + /* 1330 */ 299, 300, 193, 216, 217, 116, 117, 118, 19, 20, + /* 1340 */ 193, 22, 70, 309, 135, 193, 264, 313, 193, 140, + /* 1350 */ 78, 193, 226, 81, 59, 36, 193, 309, 193, 29, + /* 1360 */ 193, 313, 193, 33, 145, 193, 59, 48, 216, 217, + /* 1370 */ 98, 216, 217, 193, 216, 217, 193, 244, 59, 216, + /* 1380 */ 217, 216, 217, 216, 217, 216, 217, 254, 216, 217, + /* 1390 */ 71, 193, 244, 193, 193, 65, 216, 217, 193, 216, + /* 1400 */ 217, 145, 254, 244, 85, 133, 15, 100, 193, 90, + /* 1410 */ 138, 139, 117, 254, 216, 217, 216, 217, 193, 100, + /* 1420 */ 193, 216, 217, 116, 117, 106, 107, 19, 121, 193, + /* 1430 */ 193, 216, 217, 114, 162, 116, 117, 118, 244, 244, + /* 1440 */ 121, 216, 217, 216, 217, 193, 309, 129, 254, 254, + /* 1450 */ 313, 60, 216, 217, 19, 256, 257, 193, 120, 121, + /* 1460 */ 153, 154, 155, 149, 150, 25, 24, 99, 216, 217, + /* 1470 */ 152, 193, 153, 154, 155, 156, 157, 0, 1, 2, + /* 1480 */ 216, 217, 5, 22, 158, 24, 160, 10, 11, 12, + /* 1490 */ 13, 14, 193, 23, 17, 25, 193, 19, 20, 193, + /* 1500 */ 22, 133, 193, 22, 22, 193, 22, 30, 193, 32, + /* 1510 */ 19, 20, 129, 22, 36, 216, 217, 40, 193, 216, + /* 1520 */ 217, 193, 216, 217, 116, 216, 217, 36, 216, 217, + /* 1530 */ 193, 216, 217, 193, 53, 152, 193, 59, 23, 19, + /* 1540 */ 25, 216, 217, 61, 216, 217, 23, 70, 25, 71, + /* 1550 */ 59, 116, 193, 216, 217, 78, 216, 217, 81, 216, + /* 1560 */ 217, 59, 71, 85, 193, 23, 193, 25, 90, 23, + /* 1570 */ 23, 25, 25, 7, 8, 98, 85, 193, 100, 193, + /* 1580 */ 59, 90, 142, 141, 106, 107, 193, 216, 217, 216, + /* 1590 */ 217, 100, 114, 193, 116, 117, 118, 106, 107, 121, + /* 1600 */ 216, 217, 216, 217, 193, 114, 193, 116, 117, 118, + /* 1610 */ 133, 23, 121, 25, 121, 138, 139, 97, 23, 117, + /* 1620 */ 25, 23, 193, 25, 131, 141, 193, 216, 217, 59, + /* 1630 */ 193, 153, 154, 155, 156, 157, 226, 193, 117, 162, + /* 1640 */ 23, 23, 25, 25, 153, 154, 155, 156, 157, 1, + /* 1650 */ 2, 83, 84, 5, 19, 20, 226, 22, 10, 11, + /* 1660 */ 12, 13, 14, 258, 153, 17, 155, 153, 23, 155, + /* 1670 */ 25, 36, 23, 193, 25, 255, 193, 236, 30, 193, + /* 1680 */ 32, 19, 20, 193, 22, 193, 288, 117, 40, 193, + /* 1690 */ 318, 193, 193, 193, 59, 242, 193, 193, 36, 193, + /* 1700 */ 193, 193, 287, 255, 255, 255, 71, 255, 243, 214, + /* 1710 */ 191, 297, 267, 245, 271, 259, 259, 293, 70, 246, + /* 1720 */ 246, 59, 267, 229, 245, 271, 78, 293, 259, 81, + /* 1730 */ 271, 271, 220, 71, 225, 100, 219, 219, 249, 196, + /* 1740 */ 243, 106, 107, 108, 219, 60, 98, 280, 297, 114, + /* 1750 */ 249, 116, 117, 118, 141, 245, 121, 200, 200, 297, + /* 1760 */ 38, 200, 100, 151, 150, 294, 294, 22, 106, 107, + /* 1770 */ 283, 148, 250, 145, 43, 249, 114, 272, 116, 117, + /* 1780 */ 118, 133, 234, 121, 250, 249, 138, 139, 153, 154, + /* 1790 */ 155, 156, 157, 18, 270, 199, 19, 20, 18, 22, + /* 1800 */ 237, 237, 237, 200, 237, 149, 246, 272, 246, 272, + /* 1810 */ 162, 270, 234, 36, 234, 153, 154, 155, 156, 157, + /* 1820 */ 246, 246, 200, 199, 158, 62, 200, 22, 290, 289, + /* 1830 */ 199, 221, 200, 199, 115, 199, 59, 200, 218, 218, + /* 1840 */ 221, 22, 64, 218, 126, 165, 24, 113, 71, 305, + /* 1850 */ 144, 221, 261, 224, 227, 282, 218, 224, 218, 220, + /* 1860 */ 218, 218, 312, 282, 115, 221, 260, 260, 227, 261, + /* 1870 */ 261, 260, 200, 91, 82, 22, 261, 100, 277, 200, + /* 1880 */ 147, 265, 158, 106, 107, 146, 317, 25, 317, 202, + /* 1890 */ 13, 114, 194, 116, 117, 118, 194, 6, 121, 248, + /* 1900 */ 250, 249, 247, 250, 246, 192, 260, 192, 192, 213, + /* 1910 */ 207, 222, 265, 213, 213, 207, 222, 213, 207, 214, + /* 1920 */ 4, 214, 213, 3, 22, 163, 15, 23, 16, 23, + /* 1930 */ 153, 154, 155, 156, 157, 139, 279, 151, 130, 25, + /* 1940 */ 142, 24, 20, 144, 16, 1, 142, 130, 130, 61, + /* 1950 */ 37, 151, 53, 53, 53, 130, 53, 303, 116, 34, + /* 1960 */ 300, 141, 303, 1, 5, 22, 115, 161, 75, 141, + /* 1970 */ 25, 41, 115, 68, 24, 68, 20, 19, 131, 125, + /* 1980 */ 23, 96, 22, 22, 37, 67, 22, 22, 67, 149, + /* 1990 */ 59, 24, 22, 67, 28, 23, 22, 141, 23, 23, + /* 2000 */ 23, 143, 23, 25, 22, 97, 23, 23, 116, 22, + /* 2010 */ 25, 88, 75, 86, 34, 34, 75, 44, 23, 22, + /* 2020 */ 34, 25, 34, 34, 34, 93, 24, 34, 25, 23, + /* 2030 */ 23, 23, 23, 23, 11, 23, 25, 22, 22, 22, + /* 2040 */ 1, 23, 23, 22, 22, 25, 15, 1, 23, 25, + /* 2050 */ 135, 141, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2060 */ 141, 319, 319, 319, 319, 141, 319, 319, 319, 319, + /* 2070 */ 141, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2080 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2090 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2100 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2110 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2120 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2130 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2140 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2150 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2160 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2170 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2180 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2190 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2200 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2210 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2220 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2230 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2240 */ 319, 319, 319, 319, 319, 319, 319, 319, 319, 319, + /* 2250 */ 319, 319, 319, 319, 319, 319, +}; +#define YY_SHIFT_COUNT (577) +#define YY_SHIFT_MIN (0) +#define YY_SHIFT_MAX (2046) +static const unsigned short int yy_shift_ofst[] = { + /* 0 */ 1648, 1477, 1272, 322, 322, 262, 1319, 1478, 1491, 1662, + /* 10 */ 1662, 1662, 317, 0, 0, 214, 1093, 1662, 1662, 1662, + /* 20 */ 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, + /* 30 */ 271, 271, 1219, 1219, 216, 88, 262, 262, 262, 262, + /* 40 */ 262, 40, 111, 258, 361, 469, 512, 583, 622, 693, + /* 50 */ 732, 803, 842, 913, 1073, 1093, 1093, 1093, 1093, 1093, + /* 60 */ 1093, 1093, 1093, 1093, 1093, 1093, 1093, 1093, 1093, 1093, + /* 70 */ 1093, 1093, 1093, 1113, 1093, 1216, 957, 957, 1635, 1662, + /* 80 */ 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, + /* 90 */ 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, + /* 100 */ 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, + /* 110 */ 1662, 1662, 1662, 1662, 1777, 1662, 1662, 1662, 1662, 1662, + /* 120 */ 1662, 1662, 1662, 1662, 1662, 1662, 1662, 1662, 137, 181, + /* 130 */ 181, 181, 181, 181, 94, 430, 66, 65, 112, 366, + /* 140 */ 475, 475, 629, 1058, 475, 475, 125, 125, 475, 686, + /* 150 */ 686, 686, 660, 686, 57, 184, 184, 77, 77, 2071, + /* 160 */ 2071, 328, 328, 328, 493, 373, 373, 373, 373, 1015, + /* 170 */ 1015, 409, 366, 1129, 1149, 475, 475, 475, 475, 475, + /* 180 */ 475, 475, 475, 475, 475, 475, 475, 475, 475, 475, + /* 190 */ 475, 475, 475, 475, 475, 621, 621, 475, 852, 899, + /* 200 */ 899, 1295, 1295, 406, 851, 2071, 2071, 2071, 2071, 2071, + /* 210 */ 2071, 2071, 1307, 954, 954, 640, 464, 695, 238, 700, + /* 220 */ 538, 541, 748, 475, 475, 475, 475, 475, 475, 475, + /* 230 */ 475, 475, 475, 634, 475, 475, 475, 475, 475, 475, + /* 240 */ 475, 475, 475, 475, 475, 475, 1175, 1175, 1175, 475, + /* 250 */ 475, 475, 580, 475, 475, 475, 1074, 1142, 475, 475, + /* 260 */ 1072, 475, 475, 475, 475, 475, 475, 475, 475, 797, + /* 270 */ 1330, 740, 1131, 1131, 1131, 1131, 1069, 740, 740, 1209, + /* 280 */ 167, 926, 1391, 1038, 1314, 187, 1408, 1314, 1408, 1435, + /* 290 */ 1109, 1038, 1038, 1109, 1038, 187, 1435, 227, 1090, 941, + /* 300 */ 1270, 1270, 1270, 1408, 1256, 1256, 1326, 1440, 513, 1461, + /* 310 */ 1685, 1685, 1613, 1613, 1722, 1722, 1613, 1612, 1614, 1745, + /* 320 */ 1623, 1628, 1731, 1623, 1628, 1775, 1775, 1775, 1775, 1613, + /* 330 */ 1780, 1656, 1614, 1614, 1656, 1745, 1731, 1656, 1731, 1656, + /* 340 */ 1613, 1780, 1666, 1763, 1613, 1780, 1805, 1613, 1780, 1613, + /* 350 */ 1780, 1805, 1719, 1719, 1719, 1778, 1819, 1819, 1805, 1719, + /* 360 */ 1718, 1719, 1778, 1719, 1719, 1680, 1822, 1734, 1734, 1805, + /* 370 */ 1706, 1749, 1706, 1749, 1706, 1749, 1706, 1749, 1613, 1782, + /* 380 */ 1782, 1792, 1792, 1623, 1628, 1853, 1613, 1724, 1623, 1733, + /* 390 */ 1739, 1656, 1862, 1877, 1877, 1891, 1891, 1891, 2071, 2071, + /* 400 */ 2071, 2071, 2071, 2071, 2071, 2071, 2071, 2071, 2071, 2071, + /* 410 */ 2071, 2071, 2071, 207, 915, 1010, 1030, 1217, 910, 1170, + /* 420 */ 1470, 1368, 1481, 1442, 1318, 1383, 1515, 1482, 1523, 1542, + /* 430 */ 1546, 1547, 1588, 1595, 1502, 1338, 1566, 1493, 1520, 1521, + /* 440 */ 1598, 1617, 1568, 1618, 1511, 1514, 1645, 1649, 1570, 1484, + /* 450 */ 1916, 1920, 1902, 1762, 1911, 1912, 1904, 1906, 1796, 1786, + /* 460 */ 1808, 1914, 1914, 1917, 1798, 1922, 1799, 1928, 1944, 1804, + /* 470 */ 1817, 1914, 1818, 1888, 1913, 1914, 1800, 1899, 1900, 1901, + /* 480 */ 1903, 1825, 1842, 1925, 1820, 1962, 1959, 1943, 1851, 1806, + /* 490 */ 1905, 1945, 1907, 1893, 1930, 1828, 1857, 1950, 1956, 1958, + /* 500 */ 1847, 1854, 1960, 1918, 1961, 1964, 1957, 1965, 1921, 1931, + /* 510 */ 1967, 1885, 1966, 1970, 1926, 1947, 1972, 1840, 1974, 1975, + /* 520 */ 1976, 1977, 1978, 1979, 1982, 1908, 1856, 1983, 1984, 1892, + /* 530 */ 1980, 1987, 1858, 1985, 1981, 1986, 1988, 1989, 1923, 1937, + /* 540 */ 1927, 1973, 1941, 1932, 1990, 1995, 1997, 2002, 1996, 2003, + /* 550 */ 1993, 2006, 1985, 2007, 2008, 2009, 2010, 2011, 2012, 2015, + /* 560 */ 2023, 2016, 2017, 2018, 2019, 2021, 2022, 2020, 1915, 1910, + /* 570 */ 1919, 1924, 1929, 2024, 2025, 2031, 2039, 2046, +}; +#define YY_REDUCE_COUNT (412) +#define YY_REDUCE_MIN (-267) +#define YY_REDUCE_MAX (1716) +static const short yy_reduce_ofst[] = { + /* 0 */ -125, 733, 789, 241, 293, -123, -193, -191, -183, -187, + /* 10 */ -180, 83, 133, -207, -198, -267, -175, -6, 166, 313, + /* 20 */ 487, 396, 489, 598, 615, 685, 687, 79, 781, 857, + /* 30 */ 490, 616, 240, 334, -188, 796, 841, 843, 1003, 1005, + /* 40 */ 1007, -260, -260, -260, -260, -260, -260, -260, -260, -260, + /* 50 */ -260, -260, -260, -260, -260, -260, -260, -260, -260, -260, + /* 60 */ -260, -260, -260, -260, -260, -260, -260, -260, -260, -260, + /* 70 */ -260, -260, -260, -260, -260, -260, -260, -260, 158, 203, + /* 80 */ 391, 576, 724, 726, 886, 1021, 1035, 1063, 1081, 1083, + /* 90 */ 1097, 1099, 1117, 1152, 1155, 1158, 1163, 1165, 1167, 1169, + /* 100 */ 1172, 1180, 1183, 1198, 1200, 1205, 1215, 1225, 1227, 1236, + /* 110 */ 1252, 1264, 1299, 1303, 1306, 1309, 1312, 1315, 1325, 1328, + /* 120 */ 1337, 1340, 1343, 1371, 1373, 1384, 1386, 1411, -260, -260, + /* 130 */ -260, -260, -260, -260, -260, -260, -260, -53, 138, 302, + /* 140 */ -158, 357, 223, -222, 411, 458, -92, 556, 669, 581, + /* 150 */ 632, 581, -260, 632, 758, 778, 920, -260, -260, -260, + /* 160 */ -260, 161, 161, 161, 307, 234, 392, 526, 790, 195, + /* 170 */ 359, -174, -173, 362, 362, -189, 16, 560, 567, 261, + /* 180 */ 689, 802, 853, -122, -166, 408, 335, 617, 690, 837, + /* 190 */ 1001, 746, 1061, 515, 1082, 994, 1034, -135, 1000, 1048, + /* 200 */ 1137, 877, 897, 186, 627, 1031, 1133, 1148, 1159, 1194, + /* 210 */ 1199, 1195, -194, -142, 18, -152, 68, 201, 253, 269, + /* 220 */ 294, 354, 521, 528, 676, 680, 736, 743, 850, 907, + /* 230 */ 1041, 1047, 1060, 727, 1139, 1147, 1201, 1237, 1278, 1359, + /* 240 */ 1393, 1400, 1413, 1429, 1433, 1437, 1126, 1410, 1430, 1444, + /* 250 */ 1480, 1483, 1405, 1486, 1490, 1492, 1420, 1372, 1496, 1498, + /* 260 */ 1441, 1499, 253, 1500, 1503, 1504, 1506, 1507, 1508, 1398, + /* 270 */ 1415, 1453, 1448, 1449, 1450, 1452, 1405, 1453, 1453, 1465, + /* 280 */ 1495, 1519, 1414, 1443, 1445, 1468, 1456, 1455, 1457, 1424, + /* 290 */ 1473, 1454, 1459, 1474, 1460, 1479, 1434, 1512, 1494, 1509, + /* 300 */ 1517, 1518, 1525, 1469, 1489, 1501, 1467, 1510, 1497, 1543, + /* 310 */ 1451, 1462, 1557, 1558, 1471, 1472, 1561, 1487, 1505, 1524, + /* 320 */ 1522, 1526, 1548, 1534, 1536, 1563, 1564, 1565, 1567, 1603, + /* 330 */ 1596, 1560, 1535, 1537, 1562, 1541, 1578, 1574, 1580, 1575, + /* 340 */ 1622, 1624, 1538, 1540, 1626, 1631, 1610, 1632, 1634, 1637, + /* 350 */ 1636, 1619, 1620, 1621, 1625, 1627, 1629, 1633, 1630, 1638, + /* 360 */ 1639, 1640, 1641, 1642, 1643, 1550, 1544, 1573, 1581, 1644, + /* 370 */ 1591, 1606, 1608, 1607, 1609, 1611, 1615, 1646, 1672, 1569, + /* 380 */ 1571, 1616, 1647, 1650, 1652, 1601, 1679, 1657, 1653, 1651, + /* 390 */ 1655, 1658, 1687, 1698, 1702, 1713, 1715, 1716, 1654, 1659, + /* 400 */ 1660, 1703, 1696, 1700, 1701, 1704, 1708, 1689, 1694, 1705, + /* 410 */ 1707, 1709, 1711, +}; +static const YYACTIONTYPE yy_default[] = { + /* 0 */ 1641, 1641, 1641, 1470, 1237, 1348, 1237, 1237, 1237, 1470, + /* 10 */ 1470, 1470, 1237, 1378, 1378, 1523, 1270, 1237, 1237, 1237, + /* 20 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1469, 1237, 1237, + /* 30 */ 1237, 1237, 1558, 1558, 1237, 1237, 1237, 1237, 1237, 1237, + /* 40 */ 1237, 1237, 1387, 1237, 1394, 1237, 1237, 1237, 1237, 1237, + /* 50 */ 1471, 1472, 1237, 1237, 1237, 1522, 1524, 1487, 1401, 1400, + /* 60 */ 1399, 1398, 1505, 1365, 1392, 1385, 1389, 1465, 1466, 1464, + /* 70 */ 1468, 1472, 1471, 1237, 1388, 1435, 1449, 1434, 1237, 1237, + /* 80 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 90 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 100 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 110 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 120 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1443, 1448, + /* 130 */ 1455, 1447, 1444, 1437, 1436, 1438, 1439, 1237, 1237, 1261, + /* 140 */ 1237, 1237, 1258, 1312, 1237, 1237, 1237, 1237, 1237, 1542, + /* 150 */ 1541, 1237, 1440, 1237, 1270, 1429, 1428, 1452, 1441, 1451, + /* 160 */ 1450, 1530, 1594, 1593, 1488, 1237, 1237, 1237, 1237, 1237, + /* 170 */ 1237, 1558, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 180 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 190 */ 1237, 1237, 1237, 1237, 1237, 1558, 1558, 1237, 1270, 1558, + /* 200 */ 1558, 1266, 1266, 1372, 1237, 1537, 1339, 1339, 1339, 1339, + /* 210 */ 1348, 1339, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 220 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1527, 1525, 1237, + /* 230 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 240 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 250 */ 1237, 1237, 1237, 1237, 1237, 1237, 1344, 1237, 1237, 1237, + /* 260 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1587, 1237, + /* 270 */ 1500, 1326, 1344, 1344, 1344, 1344, 1346, 1327, 1325, 1338, + /* 280 */ 1271, 1244, 1633, 1404, 1393, 1345, 1367, 1393, 1367, 1630, + /* 290 */ 1391, 1404, 1404, 1391, 1404, 1345, 1630, 1287, 1610, 1282, + /* 300 */ 1378, 1378, 1378, 1367, 1372, 1372, 1467, 1345, 1338, 1237, + /* 310 */ 1633, 1633, 1353, 1353, 1632, 1632, 1353, 1488, 1617, 1413, + /* 320 */ 1386, 1372, 1315, 1386, 1372, 1321, 1321, 1321, 1321, 1353, + /* 330 */ 1255, 1391, 1617, 1617, 1391, 1413, 1315, 1391, 1315, 1391, + /* 340 */ 1353, 1255, 1504, 1627, 1353, 1255, 1478, 1353, 1255, 1353, + /* 350 */ 1255, 1478, 1313, 1313, 1313, 1302, 1237, 1237, 1478, 1313, + /* 360 */ 1287, 1313, 1302, 1313, 1313, 1576, 1237, 1482, 1482, 1478, + /* 370 */ 1371, 1366, 1371, 1366, 1371, 1366, 1371, 1366, 1353, 1568, + /* 380 */ 1568, 1381, 1381, 1386, 1372, 1473, 1353, 1237, 1386, 1384, + /* 390 */ 1382, 1391, 1305, 1590, 1590, 1586, 1586, 1586, 1638, 1638, + /* 400 */ 1537, 1603, 1270, 1270, 1270, 1270, 1603, 1289, 1289, 1271, + /* 410 */ 1271, 1270, 1603, 1237, 1237, 1237, 1237, 1237, 1237, 1598, + /* 420 */ 1237, 1532, 1489, 1357, 1237, 1237, 1237, 1237, 1237, 1237, + /* 430 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1543, 1237, + /* 440 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1418, + /* 450 */ 1237, 1240, 1534, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 460 */ 1237, 1395, 1396, 1358, 1237, 1237, 1237, 1237, 1237, 1237, + /* 470 */ 1237, 1410, 1237, 1237, 1237, 1405, 1237, 1237, 1237, 1237, + /* 480 */ 1237, 1237, 1237, 1237, 1629, 1237, 1237, 1237, 1237, 1237, + /* 490 */ 1237, 1503, 1502, 1237, 1237, 1355, 1237, 1237, 1237, 1237, + /* 500 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1285, + /* 510 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 520 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 530 */ 1237, 1237, 1237, 1383, 1237, 1237, 1237, 1237, 1237, 1237, + /* 540 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1573, 1373, + /* 550 */ 1237, 1237, 1620, 1237, 1237, 1237, 1237, 1237, 1237, 1237, + /* 560 */ 1237, 1237, 1237, 1237, 1237, 1237, 1237, 1614, 1329, 1420, + /* 570 */ 1237, 1419, 1423, 1259, 1237, 1249, 1237, 1237, +}; +/********** End of lemon-generated parsing tables *****************************/ + +/* The next table maps tokens (terminal symbols) into fallback tokens. +** If a construct like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +** +** This feature can be used, for example, to cause some keywords in a language +** to revert to identifiers if they keyword does not apply in the context where +** it appears. +*/ +#ifdef YYFALLBACK +static const YYCODETYPE yyFallback[] = { + 0, /* $ => nothing */ + 0, /* SEMI => nothing */ + 59, /* EXPLAIN => ID */ + 59, /* QUERY => ID */ + 59, /* PLAN => ID */ + 59, /* BEGIN => ID */ + 0, /* TRANSACTION => nothing */ + 59, /* DEFERRED => ID */ + 59, /* IMMEDIATE => ID */ + 59, /* EXCLUSIVE => ID */ + 0, /* COMMIT => nothing */ + 59, /* END => ID */ + 59, /* ROLLBACK => ID */ + 59, /* SAVEPOINT => ID */ + 59, /* RELEASE => ID */ + 0, /* TO => nothing */ + 0, /* TABLE => nothing */ + 0, /* CREATE => nothing */ + 59, /* IF => ID */ + 0, /* NOT => nothing */ + 0, /* EXISTS => nothing */ + 59, /* TEMP => ID */ + 0, /* LP => nothing */ + 0, /* RP => nothing */ + 0, /* AS => nothing */ + 0, /* COMMA => nothing */ + 59, /* WITHOUT => ID */ + 59, /* ABORT => ID */ + 59, /* ACTION => ID */ + 59, /* AFTER => ID */ + 59, /* ANALYZE => ID */ + 59, /* ASC => ID */ + 59, /* ATTACH => ID */ + 59, /* BEFORE => ID */ + 59, /* BY => ID */ + 59, /* CASCADE => ID */ + 59, /* CAST => ID */ + 59, /* CONFLICT => ID */ + 59, /* DATABASE => ID */ + 59, /* DESC => ID */ + 59, /* DETACH => ID */ + 59, /* EACH => ID */ + 59, /* FAIL => ID */ + 0, /* OR => nothing */ + 0, /* AND => nothing */ + 0, /* IS => nothing */ + 59, /* MATCH => ID */ + 59, /* LIKE_KW => ID */ + 0, /* BETWEEN => nothing */ + 0, /* IN => nothing */ + 0, /* ISNULL => nothing */ + 0, /* NOTNULL => nothing */ + 0, /* NE => nothing */ + 0, /* EQ => nothing */ + 0, /* GT => nothing */ + 0, /* LE => nothing */ + 0, /* LT => nothing */ + 0, /* GE => nothing */ + 0, /* ESCAPE => nothing */ + 0, /* ID => nothing */ + 59, /* COLUMNKW => ID */ + 59, /* DO => ID */ + 59, /* FOR => ID */ + 59, /* IGNORE => ID */ + 59, /* INITIALLY => ID */ + 59, /* INSTEAD => ID */ + 59, /* NO => ID */ + 59, /* KEY => ID */ + 59, /* OF => ID */ + 59, /* OFFSET => ID */ + 59, /* PRAGMA => ID */ + 59, /* RAISE => ID */ + 59, /* RECURSIVE => ID */ + 59, /* REPLACE => ID */ + 59, /* RESTRICT => ID */ + 59, /* ROW => ID */ + 59, /* ROWS => ID */ + 59, /* TRIGGER => ID */ + 59, /* VACUUM => ID */ + 59, /* VIEW => ID */ + 59, /* VIRTUAL => ID */ + 59, /* WITH => ID */ + 59, /* NULLS => ID */ + 59, /* FIRST => ID */ + 59, /* LAST => ID */ + 59, /* CURRENT => ID */ + 59, /* FOLLOWING => ID */ + 59, /* PARTITION => ID */ + 59, /* PRECEDING => ID */ + 59, /* RANGE => ID */ + 59, /* UNBOUNDED => ID */ + 59, /* EXCLUDE => ID */ + 59, /* GROUPS => ID */ + 59, /* OTHERS => ID */ + 59, /* TIES => ID */ + 59, /* GENERATED => ID */ + 59, /* ALWAYS => ID */ + 59, /* MATERIALIZED => ID */ + 59, /* REINDEX => ID */ + 59, /* RENAME => ID */ + 59, /* CTIME_KW => ID */ + 0, /* ANY => nothing */ + 0, /* BITAND => nothing */ + 0, /* BITOR => nothing */ + 0, /* LSHIFT => nothing */ + 0, /* RSHIFT => nothing */ + 0, /* PLUS => nothing */ + 0, /* MINUS => nothing */ + 0, /* STAR => nothing */ + 0, /* SLASH => nothing */ + 0, /* REM => nothing */ + 0, /* CONCAT => nothing */ + 0, /* PTR => nothing */ + 0, /* COLLATE => nothing */ + 0, /* BITNOT => nothing */ + 0, /* ON => nothing */ + 0, /* INDEXED => nothing */ + 0, /* STRING => nothing */ + 0, /* JOIN_KW => nothing */ + 0, /* CONSTRAINT => nothing */ + 0, /* DEFAULT => nothing */ + 0, /* NULL => nothing */ + 0, /* PRIMARY => nothing */ + 0, /* UNIQUE => nothing */ + 0, /* CHECK => nothing */ + 0, /* REFERENCES => nothing */ + 0, /* AUTOINCR => nothing */ + 0, /* INSERT => nothing */ + 0, /* DELETE => nothing */ + 0, /* UPDATE => nothing */ + 0, /* SET => nothing */ + 0, /* DEFERRABLE => nothing */ + 0, /* FOREIGN => nothing */ + 0, /* DROP => nothing */ + 0, /* UNION => nothing */ + 0, /* ALL => nothing */ + 0, /* EXCEPT => nothing */ + 0, /* INTERSECT => nothing */ + 0, /* SELECT => nothing */ + 0, /* VALUES => nothing */ + 0, /* DISTINCT => nothing */ + 0, /* DOT => nothing */ + 0, /* FROM => nothing */ + 0, /* JOIN => nothing */ + 0, /* USING => nothing */ + 0, /* ORDER => nothing */ + 0, /* GROUP => nothing */ + 0, /* HAVING => nothing */ + 0, /* LIMIT => nothing */ + 0, /* WHERE => nothing */ + 0, /* RETURNING => nothing */ + 0, /* INTO => nothing */ + 0, /* NOTHING => nothing */ + 0, /* FLOAT => nothing */ + 0, /* BLOB => nothing */ + 0, /* INTEGER => nothing */ + 0, /* VARIABLE => nothing */ + 0, /* CASE => nothing */ + 0, /* WHEN => nothing */ + 0, /* THEN => nothing */ + 0, /* ELSE => nothing */ + 0, /* INDEX => nothing */ + 0, /* ALTER => nothing */ + 0, /* ADD => nothing */ + 0, /* WINDOW => nothing */ + 0, /* OVER => nothing */ + 0, /* FILTER => nothing */ + 0, /* COLUMN => nothing */ + 0, /* AGG_FUNCTION => nothing */ + 0, /* AGG_COLUMN => nothing */ + 0, /* TRUEFALSE => nothing */ + 0, /* ISNOT => nothing */ + 0, /* FUNCTION => nothing */ + 0, /* UMINUS => nothing */ + 0, /* UPLUS => nothing */ + 0, /* TRUTH => nothing */ + 0, /* REGISTER => nothing */ + 0, /* VECTOR => nothing */ + 0, /* SELECT_COLUMN => nothing */ + 0, /* IF_NULL_ROW => nothing */ + 0, /* ASTERISK => nothing */ + 0, /* SPAN => nothing */ + 0, /* ERROR => nothing */ + 0, /* SPACE => nothing */ + 0, /* ILLEGAL => nothing */ +}; +#endif /* YYFALLBACK */ + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. +*/ +struct yyStackEntry { + YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ + YYCODETYPE major; /* The major token value. This is the code + ** number for the token at this stack level */ + YYMINORTYPE minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; +typedef struct yyStackEntry yyStackEntry; + +/* The state of the parser is completely contained in an instance of +** the following structure */ +struct yyParser { + yyStackEntry *yytos; /* Pointer to top element of the stack */ +#ifdef YYTRACKMAXSTACKDEPTH + int yyhwm; /* High-water mark of the stack */ +#endif +#ifndef YYNOERRORRECOVERY + int yyerrcnt; /* Shifts left before out of the error */ +#endif + sqlite3ParserARG_SDECL /* A place to hold %extra_argument */ + sqlite3ParserCTX_SDECL /* A place to hold %extra_context */ +#if YYSTACKDEPTH<=0 + int yystksz; /* Current side of the stack */ + yyStackEntry *yystack; /* The parser's stack */ + yyStackEntry yystk0; /* First stack entry */ +#else + yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ + yyStackEntry *yystackEnd; /* Last entry in the stack */ +#endif +}; +typedef struct yyParser yyParser; + +/* #include */ +#ifndef NDEBUG +/* #include */ +static FILE *yyTraceFILE = 0; +static char *yyTracePrompt = 0; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
      +**
    • A FILE* to which trace output should be written. +** If NULL, then tracing is turned off. +**
    • A prefix string written at the beginning of every +** line of trace output. If NULL, then tracing is +** turned off. +**
    +** +** Outputs: +** None. +*/ +SQLITE_PRIVATE void sqlite3ParserTrace(FILE *TraceFILE, char *zTracePrompt){ + yyTraceFILE = TraceFILE; + yyTracePrompt = zTracePrompt; + if( yyTraceFILE==0 ) yyTracePrompt = 0; + else if( yyTracePrompt==0 ) yyTraceFILE = 0; +} +#endif /* NDEBUG */ + +#if defined(YYCOVERAGE) || !defined(NDEBUG) +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +static const char *const yyTokenName[] = { + /* 0 */ "$", + /* 1 */ "SEMI", + /* 2 */ "EXPLAIN", + /* 3 */ "QUERY", + /* 4 */ "PLAN", + /* 5 */ "BEGIN", + /* 6 */ "TRANSACTION", + /* 7 */ "DEFERRED", + /* 8 */ "IMMEDIATE", + /* 9 */ "EXCLUSIVE", + /* 10 */ "COMMIT", + /* 11 */ "END", + /* 12 */ "ROLLBACK", + /* 13 */ "SAVEPOINT", + /* 14 */ "RELEASE", + /* 15 */ "TO", + /* 16 */ "TABLE", + /* 17 */ "CREATE", + /* 18 */ "IF", + /* 19 */ "NOT", + /* 20 */ "EXISTS", + /* 21 */ "TEMP", + /* 22 */ "LP", + /* 23 */ "RP", + /* 24 */ "AS", + /* 25 */ "COMMA", + /* 26 */ "WITHOUT", + /* 27 */ "ABORT", + /* 28 */ "ACTION", + /* 29 */ "AFTER", + /* 30 */ "ANALYZE", + /* 31 */ "ASC", + /* 32 */ "ATTACH", + /* 33 */ "BEFORE", + /* 34 */ "BY", + /* 35 */ "CASCADE", + /* 36 */ "CAST", + /* 37 */ "CONFLICT", + /* 38 */ "DATABASE", + /* 39 */ "DESC", + /* 40 */ "DETACH", + /* 41 */ "EACH", + /* 42 */ "FAIL", + /* 43 */ "OR", + /* 44 */ "AND", + /* 45 */ "IS", + /* 46 */ "MATCH", + /* 47 */ "LIKE_KW", + /* 48 */ "BETWEEN", + /* 49 */ "IN", + /* 50 */ "ISNULL", + /* 51 */ "NOTNULL", + /* 52 */ "NE", + /* 53 */ "EQ", + /* 54 */ "GT", + /* 55 */ "LE", + /* 56 */ "LT", + /* 57 */ "GE", + /* 58 */ "ESCAPE", + /* 59 */ "ID", + /* 60 */ "COLUMNKW", + /* 61 */ "DO", + /* 62 */ "FOR", + /* 63 */ "IGNORE", + /* 64 */ "INITIALLY", + /* 65 */ "INSTEAD", + /* 66 */ "NO", + /* 67 */ "KEY", + /* 68 */ "OF", + /* 69 */ "OFFSET", + /* 70 */ "PRAGMA", + /* 71 */ "RAISE", + /* 72 */ "RECURSIVE", + /* 73 */ "REPLACE", + /* 74 */ "RESTRICT", + /* 75 */ "ROW", + /* 76 */ "ROWS", + /* 77 */ "TRIGGER", + /* 78 */ "VACUUM", + /* 79 */ "VIEW", + /* 80 */ "VIRTUAL", + /* 81 */ "WITH", + /* 82 */ "NULLS", + /* 83 */ "FIRST", + /* 84 */ "LAST", + /* 85 */ "CURRENT", + /* 86 */ "FOLLOWING", + /* 87 */ "PARTITION", + /* 88 */ "PRECEDING", + /* 89 */ "RANGE", + /* 90 */ "UNBOUNDED", + /* 91 */ "EXCLUDE", + /* 92 */ "GROUPS", + /* 93 */ "OTHERS", + /* 94 */ "TIES", + /* 95 */ "GENERATED", + /* 96 */ "ALWAYS", + /* 97 */ "MATERIALIZED", + /* 98 */ "REINDEX", + /* 99 */ "RENAME", + /* 100 */ "CTIME_KW", + /* 101 */ "ANY", + /* 102 */ "BITAND", + /* 103 */ "BITOR", + /* 104 */ "LSHIFT", + /* 105 */ "RSHIFT", + /* 106 */ "PLUS", + /* 107 */ "MINUS", + /* 108 */ "STAR", + /* 109 */ "SLASH", + /* 110 */ "REM", + /* 111 */ "CONCAT", + /* 112 */ "PTR", + /* 113 */ "COLLATE", + /* 114 */ "BITNOT", + /* 115 */ "ON", + /* 116 */ "INDEXED", + /* 117 */ "STRING", + /* 118 */ "JOIN_KW", + /* 119 */ "CONSTRAINT", + /* 120 */ "DEFAULT", + /* 121 */ "NULL", + /* 122 */ "PRIMARY", + /* 123 */ "UNIQUE", + /* 124 */ "CHECK", + /* 125 */ "REFERENCES", + /* 126 */ "AUTOINCR", + /* 127 */ "INSERT", + /* 128 */ "DELETE", + /* 129 */ "UPDATE", + /* 130 */ "SET", + /* 131 */ "DEFERRABLE", + /* 132 */ "FOREIGN", + /* 133 */ "DROP", + /* 134 */ "UNION", + /* 135 */ "ALL", + /* 136 */ "EXCEPT", + /* 137 */ "INTERSECT", + /* 138 */ "SELECT", + /* 139 */ "VALUES", + /* 140 */ "DISTINCT", + /* 141 */ "DOT", + /* 142 */ "FROM", + /* 143 */ "JOIN", + /* 144 */ "USING", + /* 145 */ "ORDER", + /* 146 */ "GROUP", + /* 147 */ "HAVING", + /* 148 */ "LIMIT", + /* 149 */ "WHERE", + /* 150 */ "RETURNING", + /* 151 */ "INTO", + /* 152 */ "NOTHING", + /* 153 */ "FLOAT", + /* 154 */ "BLOB", + /* 155 */ "INTEGER", + /* 156 */ "VARIABLE", + /* 157 */ "CASE", + /* 158 */ "WHEN", + /* 159 */ "THEN", + /* 160 */ "ELSE", + /* 161 */ "INDEX", + /* 162 */ "ALTER", + /* 163 */ "ADD", + /* 164 */ "WINDOW", + /* 165 */ "OVER", + /* 166 */ "FILTER", + /* 167 */ "COLUMN", + /* 168 */ "AGG_FUNCTION", + /* 169 */ "AGG_COLUMN", + /* 170 */ "TRUEFALSE", + /* 171 */ "ISNOT", + /* 172 */ "FUNCTION", + /* 173 */ "UMINUS", + /* 174 */ "UPLUS", + /* 175 */ "TRUTH", + /* 176 */ "REGISTER", + /* 177 */ "VECTOR", + /* 178 */ "SELECT_COLUMN", + /* 179 */ "IF_NULL_ROW", + /* 180 */ "ASTERISK", + /* 181 */ "SPAN", + /* 182 */ "ERROR", + /* 183 */ "SPACE", + /* 184 */ "ILLEGAL", + /* 185 */ "input", + /* 186 */ "cmdlist", + /* 187 */ "ecmd", + /* 188 */ "cmdx", + /* 189 */ "explain", + /* 190 */ "cmd", + /* 191 */ "transtype", + /* 192 */ "trans_opt", + /* 193 */ "nm", + /* 194 */ "savepoint_opt", + /* 195 */ "create_table", + /* 196 */ "create_table_args", + /* 197 */ "createkw", + /* 198 */ "temp", + /* 199 */ "ifnotexists", + /* 200 */ "dbnm", + /* 201 */ "columnlist", + /* 202 */ "conslist_opt", + /* 203 */ "table_option_set", + /* 204 */ "select", + /* 205 */ "table_option", + /* 206 */ "columnname", + /* 207 */ "carglist", + /* 208 */ "typetoken", + /* 209 */ "typename", + /* 210 */ "signed", + /* 211 */ "plus_num", + /* 212 */ "minus_num", + /* 213 */ "scanpt", + /* 214 */ "scantok", + /* 215 */ "ccons", + /* 216 */ "term", + /* 217 */ "expr", + /* 218 */ "onconf", + /* 219 */ "sortorder", + /* 220 */ "autoinc", + /* 221 */ "eidlist_opt", + /* 222 */ "refargs", + /* 223 */ "defer_subclause", + /* 224 */ "generated", + /* 225 */ "refarg", + /* 226 */ "refact", + /* 227 */ "init_deferred_pred_opt", + /* 228 */ "conslist", + /* 229 */ "tconscomma", + /* 230 */ "tcons", + /* 231 */ "sortlist", + /* 232 */ "eidlist", + /* 233 */ "defer_subclause_opt", + /* 234 */ "orconf", + /* 235 */ "resolvetype", + /* 236 */ "raisetype", + /* 237 */ "ifexists", + /* 238 */ "fullname", + /* 239 */ "selectnowith", + /* 240 */ "oneselect", + /* 241 */ "wqlist", + /* 242 */ "multiselect_op", + /* 243 */ "distinct", + /* 244 */ "selcollist", + /* 245 */ "from", + /* 246 */ "where_opt", + /* 247 */ "groupby_opt", + /* 248 */ "having_opt", + /* 249 */ "orderby_opt", + /* 250 */ "limit_opt", + /* 251 */ "window_clause", + /* 252 */ "values", + /* 253 */ "nexprlist", + /* 254 */ "sclp", + /* 255 */ "as", + /* 256 */ "seltablist", + /* 257 */ "stl_prefix", + /* 258 */ "joinop", + /* 259 */ "indexed_opt", + /* 260 */ "on_opt", + /* 261 */ "using_opt", + /* 262 */ "exprlist", + /* 263 */ "xfullname", + /* 264 */ "idlist", + /* 265 */ "nulls", + /* 266 */ "with", + /* 267 */ "where_opt_ret", + /* 268 */ "setlist", + /* 269 */ "insert_cmd", + /* 270 */ "idlist_opt", + /* 271 */ "upsert", + /* 272 */ "returning", + /* 273 */ "filter_over", + /* 274 */ "likeop", + /* 275 */ "between_op", + /* 276 */ "in_op", + /* 277 */ "paren_exprlist", + /* 278 */ "case_operand", + /* 279 */ "case_exprlist", + /* 280 */ "case_else", + /* 281 */ "uniqueflag", + /* 282 */ "collate", + /* 283 */ "vinto", + /* 284 */ "nmnum", + /* 285 */ "trigger_decl", + /* 286 */ "trigger_cmd_list", + /* 287 */ "trigger_time", + /* 288 */ "trigger_event", + /* 289 */ "foreach_clause", + /* 290 */ "when_clause", + /* 291 */ "trigger_cmd", + /* 292 */ "trnm", + /* 293 */ "tridxby", + /* 294 */ "database_kw_opt", + /* 295 */ "key_opt", + /* 296 */ "add_column_fullname", + /* 297 */ "kwcolumn_opt", + /* 298 */ "create_vtab", + /* 299 */ "vtabarglist", + /* 300 */ "vtabarg", + /* 301 */ "vtabargtoken", + /* 302 */ "lp", + /* 303 */ "anylist", + /* 304 */ "wqitem", + /* 305 */ "wqas", + /* 306 */ "windowdefn_list", + /* 307 */ "windowdefn", + /* 308 */ "window", + /* 309 */ "frame_opt", + /* 310 */ "part_opt", + /* 311 */ "filter_clause", + /* 312 */ "over_clause", + /* 313 */ "range_or_rows", + /* 314 */ "frame_bound", + /* 315 */ "frame_bound_s", + /* 316 */ "frame_bound_e", + /* 317 */ "frame_exclude_opt", + /* 318 */ "frame_exclude", +}; +#endif /* defined(YYCOVERAGE) || !defined(NDEBUG) */ + +#ifndef NDEBUG +/* For tracing reduce actions, the names of all rules are required. +*/ +static const char *const yyRuleName[] = { + /* 0 */ "explain ::= EXPLAIN", + /* 1 */ "explain ::= EXPLAIN QUERY PLAN", + /* 2 */ "cmdx ::= cmd", + /* 3 */ "cmd ::= BEGIN transtype trans_opt", + /* 4 */ "transtype ::=", + /* 5 */ "transtype ::= DEFERRED", + /* 6 */ "transtype ::= IMMEDIATE", + /* 7 */ "transtype ::= EXCLUSIVE", + /* 8 */ "cmd ::= COMMIT|END trans_opt", + /* 9 */ "cmd ::= ROLLBACK trans_opt", + /* 10 */ "cmd ::= SAVEPOINT nm", + /* 11 */ "cmd ::= RELEASE savepoint_opt nm", + /* 12 */ "cmd ::= ROLLBACK trans_opt TO savepoint_opt nm", + /* 13 */ "create_table ::= createkw temp TABLE ifnotexists nm dbnm", + /* 14 */ "createkw ::= CREATE", + /* 15 */ "ifnotexists ::=", + /* 16 */ "ifnotexists ::= IF NOT EXISTS", + /* 17 */ "temp ::= TEMP", + /* 18 */ "temp ::=", + /* 19 */ "create_table_args ::= LP columnlist conslist_opt RP table_option_set", + /* 20 */ "create_table_args ::= AS select", + /* 21 */ "table_option_set ::=", + /* 22 */ "table_option_set ::= table_option_set COMMA table_option", + /* 23 */ "table_option ::= WITHOUT nm", + /* 24 */ "table_option ::= nm", + /* 25 */ "columnname ::= nm typetoken", + /* 26 */ "typetoken ::=", + /* 27 */ "typetoken ::= typename LP signed RP", + /* 28 */ "typetoken ::= typename LP signed COMMA signed RP", + /* 29 */ "typename ::= typename ID|STRING", + /* 30 */ "scanpt ::=", + /* 31 */ "scantok ::=", + /* 32 */ "ccons ::= CONSTRAINT nm", + /* 33 */ "ccons ::= DEFAULT scantok term", + /* 34 */ "ccons ::= DEFAULT LP expr RP", + /* 35 */ "ccons ::= DEFAULT PLUS scantok term", + /* 36 */ "ccons ::= DEFAULT MINUS scantok term", + /* 37 */ "ccons ::= DEFAULT scantok ID|INDEXED", + /* 38 */ "ccons ::= NOT NULL onconf", + /* 39 */ "ccons ::= PRIMARY KEY sortorder onconf autoinc", + /* 40 */ "ccons ::= UNIQUE onconf", + /* 41 */ "ccons ::= CHECK LP expr RP", + /* 42 */ "ccons ::= REFERENCES nm eidlist_opt refargs", + /* 43 */ "ccons ::= defer_subclause", + /* 44 */ "ccons ::= COLLATE ID|STRING", + /* 45 */ "generated ::= LP expr RP", + /* 46 */ "generated ::= LP expr RP ID", + /* 47 */ "autoinc ::=", + /* 48 */ "autoinc ::= AUTOINCR", + /* 49 */ "refargs ::=", + /* 50 */ "refargs ::= refargs refarg", + /* 51 */ "refarg ::= MATCH nm", + /* 52 */ "refarg ::= ON INSERT refact", + /* 53 */ "refarg ::= ON DELETE refact", + /* 54 */ "refarg ::= ON UPDATE refact", + /* 55 */ "refact ::= SET NULL", + /* 56 */ "refact ::= SET DEFAULT", + /* 57 */ "refact ::= CASCADE", + /* 58 */ "refact ::= RESTRICT", + /* 59 */ "refact ::= NO ACTION", + /* 60 */ "defer_subclause ::= NOT DEFERRABLE init_deferred_pred_opt", + /* 61 */ "defer_subclause ::= DEFERRABLE init_deferred_pred_opt", + /* 62 */ "init_deferred_pred_opt ::=", + /* 63 */ "init_deferred_pred_opt ::= INITIALLY DEFERRED", + /* 64 */ "init_deferred_pred_opt ::= INITIALLY IMMEDIATE", + /* 65 */ "conslist_opt ::=", + /* 66 */ "tconscomma ::= COMMA", + /* 67 */ "tcons ::= CONSTRAINT nm", + /* 68 */ "tcons ::= PRIMARY KEY LP sortlist autoinc RP onconf", + /* 69 */ "tcons ::= UNIQUE LP sortlist RP onconf", + /* 70 */ "tcons ::= CHECK LP expr RP onconf", + /* 71 */ "tcons ::= FOREIGN KEY LP eidlist RP REFERENCES nm eidlist_opt refargs defer_subclause_opt", + /* 72 */ "defer_subclause_opt ::=", + /* 73 */ "onconf ::=", + /* 74 */ "onconf ::= ON CONFLICT resolvetype", + /* 75 */ "orconf ::=", + /* 76 */ "orconf ::= OR resolvetype", + /* 77 */ "resolvetype ::= IGNORE", + /* 78 */ "resolvetype ::= REPLACE", + /* 79 */ "cmd ::= DROP TABLE ifexists fullname", + /* 80 */ "ifexists ::= IF EXISTS", + /* 81 */ "ifexists ::=", + /* 82 */ "cmd ::= createkw temp VIEW ifnotexists nm dbnm eidlist_opt AS select", + /* 83 */ "cmd ::= DROP VIEW ifexists fullname", + /* 84 */ "cmd ::= select", + /* 85 */ "select ::= WITH wqlist selectnowith", + /* 86 */ "select ::= WITH RECURSIVE wqlist selectnowith", + /* 87 */ "select ::= selectnowith", + /* 88 */ "selectnowith ::= selectnowith multiselect_op oneselect", + /* 89 */ "multiselect_op ::= UNION", + /* 90 */ "multiselect_op ::= UNION ALL", + /* 91 */ "multiselect_op ::= EXCEPT|INTERSECT", + /* 92 */ "oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt orderby_opt limit_opt", + /* 93 */ "oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt window_clause orderby_opt limit_opt", + /* 94 */ "values ::= VALUES LP nexprlist RP", + /* 95 */ "values ::= values COMMA LP nexprlist RP", + /* 96 */ "distinct ::= DISTINCT", + /* 97 */ "distinct ::= ALL", + /* 98 */ "distinct ::=", + /* 99 */ "sclp ::=", + /* 100 */ "selcollist ::= sclp scanpt expr scanpt as", + /* 101 */ "selcollist ::= sclp scanpt STAR", + /* 102 */ "selcollist ::= sclp scanpt nm DOT STAR", + /* 103 */ "as ::= AS nm", + /* 104 */ "as ::=", + /* 105 */ "from ::=", + /* 106 */ "from ::= FROM seltablist", + /* 107 */ "stl_prefix ::= seltablist joinop", + /* 108 */ "stl_prefix ::=", + /* 109 */ "seltablist ::= stl_prefix nm dbnm as indexed_opt on_opt using_opt", + /* 110 */ "seltablist ::= stl_prefix nm dbnm LP exprlist RP as on_opt using_opt", + /* 111 */ "seltablist ::= stl_prefix LP select RP as on_opt using_opt", + /* 112 */ "seltablist ::= stl_prefix LP seltablist RP as on_opt using_opt", + /* 113 */ "dbnm ::=", + /* 114 */ "dbnm ::= DOT nm", + /* 115 */ "fullname ::= nm", + /* 116 */ "fullname ::= nm DOT nm", + /* 117 */ "xfullname ::= nm", + /* 118 */ "xfullname ::= nm DOT nm", + /* 119 */ "xfullname ::= nm DOT nm AS nm", + /* 120 */ "xfullname ::= nm AS nm", + /* 121 */ "joinop ::= COMMA|JOIN", + /* 122 */ "joinop ::= JOIN_KW JOIN", + /* 123 */ "joinop ::= JOIN_KW nm JOIN", + /* 124 */ "joinop ::= JOIN_KW nm nm JOIN", + /* 125 */ "on_opt ::= ON expr", + /* 126 */ "on_opt ::=", + /* 127 */ "indexed_opt ::=", + /* 128 */ "indexed_opt ::= INDEXED BY nm", + /* 129 */ "indexed_opt ::= NOT INDEXED", + /* 130 */ "using_opt ::= USING LP idlist RP", + /* 131 */ "using_opt ::=", + /* 132 */ "orderby_opt ::=", + /* 133 */ "orderby_opt ::= ORDER BY sortlist", + /* 134 */ "sortlist ::= sortlist COMMA expr sortorder nulls", + /* 135 */ "sortlist ::= expr sortorder nulls", + /* 136 */ "sortorder ::= ASC", + /* 137 */ "sortorder ::= DESC", + /* 138 */ "sortorder ::=", + /* 139 */ "nulls ::= NULLS FIRST", + /* 140 */ "nulls ::= NULLS LAST", + /* 141 */ "nulls ::=", + /* 142 */ "groupby_opt ::=", + /* 143 */ "groupby_opt ::= GROUP BY nexprlist", + /* 144 */ "having_opt ::=", + /* 145 */ "having_opt ::= HAVING expr", + /* 146 */ "limit_opt ::=", + /* 147 */ "limit_opt ::= LIMIT expr", + /* 148 */ "limit_opt ::= LIMIT expr OFFSET expr", + /* 149 */ "limit_opt ::= LIMIT expr COMMA expr", + /* 150 */ "cmd ::= with DELETE FROM xfullname indexed_opt where_opt_ret orderby_opt limit_opt", + /* 151 */ "where_opt ::=", + /* 152 */ "where_opt ::= WHERE expr", + /* 153 */ "where_opt_ret ::=", + /* 154 */ "where_opt_ret ::= WHERE expr", + /* 155 */ "where_opt_ret ::= RETURNING selcollist", + /* 156 */ "where_opt_ret ::= WHERE expr RETURNING selcollist", + /* 157 */ "cmd ::= with UPDATE orconf xfullname indexed_opt SET setlist from where_opt_ret orderby_opt limit_opt", + /* 158 */ "setlist ::= setlist COMMA nm EQ expr", + /* 159 */ "setlist ::= setlist COMMA LP idlist RP EQ expr", + /* 160 */ "setlist ::= nm EQ expr", + /* 161 */ "setlist ::= LP idlist RP EQ expr", + /* 162 */ "cmd ::= with insert_cmd INTO xfullname idlist_opt select upsert", + /* 163 */ "cmd ::= with insert_cmd INTO xfullname idlist_opt DEFAULT VALUES returning", + /* 164 */ "upsert ::=", + /* 165 */ "upsert ::= RETURNING selcollist", + /* 166 */ "upsert ::= ON CONFLICT LP sortlist RP where_opt DO UPDATE SET setlist where_opt upsert", + /* 167 */ "upsert ::= ON CONFLICT LP sortlist RP where_opt DO NOTHING upsert", + /* 168 */ "upsert ::= ON CONFLICT DO NOTHING returning", + /* 169 */ "upsert ::= ON CONFLICT DO UPDATE SET setlist where_opt returning", + /* 170 */ "returning ::= RETURNING selcollist", + /* 171 */ "insert_cmd ::= INSERT orconf", + /* 172 */ "insert_cmd ::= REPLACE", + /* 173 */ "idlist_opt ::=", + /* 174 */ "idlist_opt ::= LP idlist RP", + /* 175 */ "idlist ::= idlist COMMA nm", + /* 176 */ "idlist ::= nm", + /* 177 */ "expr ::= LP expr RP", + /* 178 */ "expr ::= ID|INDEXED", + /* 179 */ "expr ::= JOIN_KW", + /* 180 */ "expr ::= nm DOT nm", + /* 181 */ "expr ::= nm DOT nm DOT nm", + /* 182 */ "term ::= NULL|FLOAT|BLOB", + /* 183 */ "term ::= STRING", + /* 184 */ "term ::= INTEGER", + /* 185 */ "expr ::= VARIABLE", + /* 186 */ "expr ::= expr COLLATE ID|STRING", + /* 187 */ "expr ::= CAST LP expr AS typetoken RP", + /* 188 */ "expr ::= ID|INDEXED LP distinct exprlist RP", + /* 189 */ "expr ::= ID|INDEXED LP STAR RP", + /* 190 */ "expr ::= ID|INDEXED LP distinct exprlist RP filter_over", + /* 191 */ "expr ::= ID|INDEXED LP STAR RP filter_over", + /* 192 */ "term ::= CTIME_KW", + /* 193 */ "expr ::= LP nexprlist COMMA expr RP", + /* 194 */ "expr ::= expr AND expr", + /* 195 */ "expr ::= expr OR expr", + /* 196 */ "expr ::= expr LT|GT|GE|LE expr", + /* 197 */ "expr ::= expr EQ|NE expr", + /* 198 */ "expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr", + /* 199 */ "expr ::= expr PLUS|MINUS expr", + /* 200 */ "expr ::= expr STAR|SLASH|REM expr", + /* 201 */ "expr ::= expr CONCAT expr", + /* 202 */ "likeop ::= NOT LIKE_KW|MATCH", + /* 203 */ "expr ::= expr likeop expr", + /* 204 */ "expr ::= expr likeop expr ESCAPE expr", + /* 205 */ "expr ::= expr ISNULL|NOTNULL", + /* 206 */ "expr ::= expr NOT NULL", + /* 207 */ "expr ::= expr IS expr", + /* 208 */ "expr ::= expr IS NOT expr", + /* 209 */ "expr ::= NOT expr", + /* 210 */ "expr ::= BITNOT expr", + /* 211 */ "expr ::= PLUS|MINUS expr", + /* 212 */ "expr ::= expr PTR expr", + /* 213 */ "between_op ::= BETWEEN", + /* 214 */ "between_op ::= NOT BETWEEN", + /* 215 */ "expr ::= expr between_op expr AND expr", + /* 216 */ "in_op ::= IN", + /* 217 */ "in_op ::= NOT IN", + /* 218 */ "expr ::= expr in_op LP exprlist RP", + /* 219 */ "expr ::= LP select RP", + /* 220 */ "expr ::= expr in_op LP select RP", + /* 221 */ "expr ::= expr in_op nm dbnm paren_exprlist", + /* 222 */ "expr ::= EXISTS LP select RP", + /* 223 */ "expr ::= CASE case_operand case_exprlist case_else END", + /* 224 */ "case_exprlist ::= case_exprlist WHEN expr THEN expr", + /* 225 */ "case_exprlist ::= WHEN expr THEN expr", + /* 226 */ "case_else ::= ELSE expr", + /* 227 */ "case_else ::=", + /* 228 */ "case_operand ::= expr", + /* 229 */ "case_operand ::=", + /* 230 */ "exprlist ::=", + /* 231 */ "nexprlist ::= nexprlist COMMA expr", + /* 232 */ "nexprlist ::= expr", + /* 233 */ "paren_exprlist ::=", + /* 234 */ "paren_exprlist ::= LP exprlist RP", + /* 235 */ "cmd ::= createkw uniqueflag INDEX ifnotexists nm dbnm ON nm LP sortlist RP where_opt", + /* 236 */ "uniqueflag ::= UNIQUE", + /* 237 */ "uniqueflag ::=", + /* 238 */ "eidlist_opt ::=", + /* 239 */ "eidlist_opt ::= LP eidlist RP", + /* 240 */ "eidlist ::= eidlist COMMA nm collate sortorder", + /* 241 */ "eidlist ::= nm collate sortorder", + /* 242 */ "collate ::=", + /* 243 */ "collate ::= COLLATE ID|STRING", + /* 244 */ "cmd ::= DROP INDEX ifexists fullname", + /* 245 */ "cmd ::= VACUUM vinto", + /* 246 */ "cmd ::= VACUUM nm vinto", + /* 247 */ "vinto ::= INTO expr", + /* 248 */ "vinto ::=", + /* 249 */ "cmd ::= PRAGMA nm dbnm", + /* 250 */ "cmd ::= PRAGMA nm dbnm EQ nmnum", + /* 251 */ "cmd ::= PRAGMA nm dbnm LP nmnum RP", + /* 252 */ "cmd ::= PRAGMA nm dbnm EQ minus_num", + /* 253 */ "cmd ::= PRAGMA nm dbnm LP minus_num RP", + /* 254 */ "plus_num ::= PLUS INTEGER|FLOAT", + /* 255 */ "minus_num ::= MINUS INTEGER|FLOAT", + /* 256 */ "cmd ::= createkw trigger_decl BEGIN trigger_cmd_list END", + /* 257 */ "trigger_decl ::= temp TRIGGER ifnotexists nm dbnm trigger_time trigger_event ON fullname foreach_clause when_clause", + /* 258 */ "trigger_time ::= BEFORE|AFTER", + /* 259 */ "trigger_time ::= INSTEAD OF", + /* 260 */ "trigger_time ::=", + /* 261 */ "trigger_event ::= DELETE|INSERT", + /* 262 */ "trigger_event ::= UPDATE", + /* 263 */ "trigger_event ::= UPDATE OF idlist", + /* 264 */ "when_clause ::=", + /* 265 */ "when_clause ::= WHEN expr", + /* 266 */ "trigger_cmd_list ::= trigger_cmd_list trigger_cmd SEMI", + /* 267 */ "trigger_cmd_list ::= trigger_cmd SEMI", + /* 268 */ "trnm ::= nm DOT nm", + /* 269 */ "tridxby ::= INDEXED BY nm", + /* 270 */ "tridxby ::= NOT INDEXED", + /* 271 */ "trigger_cmd ::= UPDATE orconf trnm tridxby SET setlist from where_opt scanpt", + /* 272 */ "trigger_cmd ::= scanpt insert_cmd INTO trnm idlist_opt select upsert scanpt", + /* 273 */ "trigger_cmd ::= DELETE FROM trnm tridxby where_opt scanpt", + /* 274 */ "trigger_cmd ::= scanpt select scanpt", + /* 275 */ "expr ::= RAISE LP IGNORE RP", + /* 276 */ "expr ::= RAISE LP raisetype COMMA nm RP", + /* 277 */ "raisetype ::= ROLLBACK", + /* 278 */ "raisetype ::= ABORT", + /* 279 */ "raisetype ::= FAIL", + /* 280 */ "cmd ::= DROP TRIGGER ifexists fullname", + /* 281 */ "cmd ::= ATTACH database_kw_opt expr AS expr key_opt", + /* 282 */ "cmd ::= DETACH database_kw_opt expr", + /* 283 */ "key_opt ::=", + /* 284 */ "key_opt ::= KEY expr", + /* 285 */ "cmd ::= REINDEX", + /* 286 */ "cmd ::= REINDEX nm dbnm", + /* 287 */ "cmd ::= ANALYZE", + /* 288 */ "cmd ::= ANALYZE nm dbnm", + /* 289 */ "cmd ::= ALTER TABLE fullname RENAME TO nm", + /* 290 */ "cmd ::= ALTER TABLE add_column_fullname ADD kwcolumn_opt columnname carglist", + /* 291 */ "cmd ::= ALTER TABLE fullname DROP kwcolumn_opt nm", + /* 292 */ "add_column_fullname ::= fullname", + /* 293 */ "cmd ::= ALTER TABLE fullname RENAME kwcolumn_opt nm TO nm", + /* 294 */ "cmd ::= create_vtab", + /* 295 */ "cmd ::= create_vtab LP vtabarglist RP", + /* 296 */ "create_vtab ::= createkw VIRTUAL TABLE ifnotexists nm dbnm USING nm", + /* 297 */ "vtabarg ::=", + /* 298 */ "vtabargtoken ::= ANY", + /* 299 */ "vtabargtoken ::= lp anylist RP", + /* 300 */ "lp ::= LP", + /* 301 */ "with ::= WITH wqlist", + /* 302 */ "with ::= WITH RECURSIVE wqlist", + /* 303 */ "wqas ::= AS", + /* 304 */ "wqas ::= AS MATERIALIZED", + /* 305 */ "wqas ::= AS NOT MATERIALIZED", + /* 306 */ "wqitem ::= nm eidlist_opt wqas LP select RP", + /* 307 */ "wqlist ::= wqitem", + /* 308 */ "wqlist ::= wqlist COMMA wqitem", + /* 309 */ "windowdefn_list ::= windowdefn", + /* 310 */ "windowdefn_list ::= windowdefn_list COMMA windowdefn", + /* 311 */ "windowdefn ::= nm AS LP window RP", + /* 312 */ "window ::= PARTITION BY nexprlist orderby_opt frame_opt", + /* 313 */ "window ::= nm PARTITION BY nexprlist orderby_opt frame_opt", + /* 314 */ "window ::= ORDER BY sortlist frame_opt", + /* 315 */ "window ::= nm ORDER BY sortlist frame_opt", + /* 316 */ "window ::= frame_opt", + /* 317 */ "window ::= nm frame_opt", + /* 318 */ "frame_opt ::=", + /* 319 */ "frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt", + /* 320 */ "frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt", + /* 321 */ "range_or_rows ::= RANGE|ROWS|GROUPS", + /* 322 */ "frame_bound_s ::= frame_bound", + /* 323 */ "frame_bound_s ::= UNBOUNDED PRECEDING", + /* 324 */ "frame_bound_e ::= frame_bound", + /* 325 */ "frame_bound_e ::= UNBOUNDED FOLLOWING", + /* 326 */ "frame_bound ::= expr PRECEDING|FOLLOWING", + /* 327 */ "frame_bound ::= CURRENT ROW", + /* 328 */ "frame_exclude_opt ::=", + /* 329 */ "frame_exclude_opt ::= EXCLUDE frame_exclude", + /* 330 */ "frame_exclude ::= NO OTHERS", + /* 331 */ "frame_exclude ::= CURRENT ROW", + /* 332 */ "frame_exclude ::= GROUP|TIES", + /* 333 */ "window_clause ::= WINDOW windowdefn_list", + /* 334 */ "filter_over ::= filter_clause over_clause", + /* 335 */ "filter_over ::= over_clause", + /* 336 */ "filter_over ::= filter_clause", + /* 337 */ "over_clause ::= OVER LP window RP", + /* 338 */ "over_clause ::= OVER nm", + /* 339 */ "filter_clause ::= FILTER LP WHERE expr RP", + /* 340 */ "input ::= cmdlist", + /* 341 */ "cmdlist ::= cmdlist ecmd", + /* 342 */ "cmdlist ::= ecmd", + /* 343 */ "ecmd ::= SEMI", + /* 344 */ "ecmd ::= cmdx SEMI", + /* 345 */ "ecmd ::= explain cmdx SEMI", + /* 346 */ "trans_opt ::=", + /* 347 */ "trans_opt ::= TRANSACTION", + /* 348 */ "trans_opt ::= TRANSACTION nm", + /* 349 */ "savepoint_opt ::= SAVEPOINT", + /* 350 */ "savepoint_opt ::=", + /* 351 */ "cmd ::= create_table create_table_args", + /* 352 */ "table_option_set ::= table_option", + /* 353 */ "columnlist ::= columnlist COMMA columnname carglist", + /* 354 */ "columnlist ::= columnname carglist", + /* 355 */ "nm ::= ID|INDEXED", + /* 356 */ "nm ::= STRING", + /* 357 */ "nm ::= JOIN_KW", + /* 358 */ "typetoken ::= typename", + /* 359 */ "typename ::= ID|STRING", + /* 360 */ "signed ::= plus_num", + /* 361 */ "signed ::= minus_num", + /* 362 */ "carglist ::= carglist ccons", + /* 363 */ "carglist ::=", + /* 364 */ "ccons ::= NULL onconf", + /* 365 */ "ccons ::= GENERATED ALWAYS AS generated", + /* 366 */ "ccons ::= AS generated", + /* 367 */ "conslist_opt ::= COMMA conslist", + /* 368 */ "conslist ::= conslist tconscomma tcons", + /* 369 */ "conslist ::= tcons", + /* 370 */ "tconscomma ::=", + /* 371 */ "defer_subclause_opt ::= defer_subclause", + /* 372 */ "resolvetype ::= raisetype", + /* 373 */ "selectnowith ::= oneselect", + /* 374 */ "oneselect ::= values", + /* 375 */ "sclp ::= selcollist COMMA", + /* 376 */ "as ::= ID|STRING", + /* 377 */ "returning ::=", + /* 378 */ "expr ::= term", + /* 379 */ "likeop ::= LIKE_KW|MATCH", + /* 380 */ "exprlist ::= nexprlist", + /* 381 */ "nmnum ::= plus_num", + /* 382 */ "nmnum ::= nm", + /* 383 */ "nmnum ::= ON", + /* 384 */ "nmnum ::= DELETE", + /* 385 */ "nmnum ::= DEFAULT", + /* 386 */ "plus_num ::= INTEGER|FLOAT", + /* 387 */ "foreach_clause ::=", + /* 388 */ "foreach_clause ::= FOR EACH ROW", + /* 389 */ "trnm ::= nm", + /* 390 */ "tridxby ::=", + /* 391 */ "database_kw_opt ::= DATABASE", + /* 392 */ "database_kw_opt ::=", + /* 393 */ "kwcolumn_opt ::=", + /* 394 */ "kwcolumn_opt ::= COLUMNKW", + /* 395 */ "vtabarglist ::= vtabarg", + /* 396 */ "vtabarglist ::= vtabarglist COMMA vtabarg", + /* 397 */ "vtabarg ::= vtabarg vtabargtoken", + /* 398 */ "anylist ::=", + /* 399 */ "anylist ::= anylist LP anylist RP", + /* 400 */ "anylist ::= anylist ANY", + /* 401 */ "with ::=", +}; +#endif /* NDEBUG */ + + +#if YYSTACKDEPTH<=0 +/* +** Try to increase the size of the parser stack. Return the number +** of errors. Return 0 on success. +*/ +static int yyGrowStack(yyParser *p){ + int newSize; + int idx; + yyStackEntry *pNew; + + newSize = p->yystksz*2 + 100; + idx = p->yytos ? (int)(p->yytos - p->yystack) : 0; + if( p->yystack==&p->yystk0 ){ + pNew = malloc(newSize*sizeof(pNew[0])); + if( pNew ) pNew[0] = p->yystk0; + }else{ + pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); + } + if( pNew ){ + p->yystack = pNew; + p->yytos = &p->yystack[idx]; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n", + yyTracePrompt, p->yystksz, newSize); + } +#endif + p->yystksz = newSize; + } + return pNew==0; +} +#endif + +/* Datatype of the argument to the memory allocated passed as the +** second argument to sqlite3ParserAlloc() below. This can be changed by +** putting an appropriate #define in the %include section of the input +** grammar. +*/ +#ifndef YYMALLOCARGTYPE +# define YYMALLOCARGTYPE size_t +#endif + +/* Initialize a new parser that has already been allocated. +*/ +SQLITE_PRIVATE void sqlite3ParserInit(void *yypRawParser sqlite3ParserCTX_PDECL){ + yyParser *yypParser = (yyParser*)yypRawParser; + sqlite3ParserCTX_STORE +#ifdef YYTRACKMAXSTACKDEPTH + yypParser->yyhwm = 0; +#endif +#if YYSTACKDEPTH<=0 + yypParser->yytos = NULL; + yypParser->yystack = NULL; + yypParser->yystksz = 0; + if( yyGrowStack(yypParser) ){ + yypParser->yystack = &yypParser->yystk0; + yypParser->yystksz = 1; + } +#endif +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + yypParser->yytos = yypParser->yystack; + yypParser->yystack[0].stateno = 0; + yypParser->yystack[0].major = 0; +#if YYSTACKDEPTH>0 + yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1]; +#endif +} + +#ifndef sqlite3Parser_ENGINEALWAYSONSTACK +/* +** This function allocates a new parser. +** The only argument is a pointer to a function which works like +** malloc. +** +** Inputs: +** A pointer to the function used to allocate memory. +** +** Outputs: +** A pointer to a parser. This pointer is used in subsequent calls +** to sqlite3Parser and sqlite3ParserFree. +*/ +SQLITE_PRIVATE void *sqlite3ParserAlloc(void *(*mallocProc)(YYMALLOCARGTYPE) sqlite3ParserCTX_PDECL){ + yyParser *yypParser; + yypParser = (yyParser*)(*mallocProc)( (YYMALLOCARGTYPE)sizeof(yyParser) ); + if( yypParser ){ + sqlite3ParserCTX_STORE + sqlite3ParserInit(yypParser sqlite3ParserCTX_PARAM); + } + return (void*)yypParser; +} +#endif /* sqlite3Parser_ENGINEALWAYSONSTACK */ + + +/* The following function deletes the "minor type" or semantic value +** associated with a symbol. The symbol can be either a terminal +** or nonterminal. "yymajor" is the symbol code, and "yypminor" is +** a pointer to the value to be deleted. The code used to do the +** deletions is derived from the %destructor and/or %token_destructor +** directives of the input grammar. +*/ +static void yy_destructor( + yyParser *yypParser, /* The parser */ + YYCODETYPE yymajor, /* Type code for object to destroy */ + YYMINORTYPE *yypminor /* The object to be destroyed */ +){ + sqlite3ParserARG_FETCH + sqlite3ParserCTX_FETCH + switch( yymajor ){ + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ +/********* Begin destructor definitions ***************************************/ + case 204: /* select */ + case 239: /* selectnowith */ + case 240: /* oneselect */ + case 252: /* values */ +{ +sqlite3SelectDelete(pParse->db, (yypminor->yy47)); +} + break; + case 216: /* term */ + case 217: /* expr */ + case 246: /* where_opt */ + case 248: /* having_opt */ + case 260: /* on_opt */ + case 267: /* where_opt_ret */ + case 278: /* case_operand */ + case 280: /* case_else */ + case 283: /* vinto */ + case 290: /* when_clause */ + case 295: /* key_opt */ + case 311: /* filter_clause */ +{ +sqlite3ExprDelete(pParse->db, (yypminor->yy528)); +} + break; + case 221: /* eidlist_opt */ + case 231: /* sortlist */ + case 232: /* eidlist */ + case 244: /* selcollist */ + case 247: /* groupby_opt */ + case 249: /* orderby_opt */ + case 253: /* nexprlist */ + case 254: /* sclp */ + case 262: /* exprlist */ + case 268: /* setlist */ + case 277: /* paren_exprlist */ + case 279: /* case_exprlist */ + case 310: /* part_opt */ +{ +sqlite3ExprListDelete(pParse->db, (yypminor->yy322)); +} + break; + case 238: /* fullname */ + case 245: /* from */ + case 256: /* seltablist */ + case 257: /* stl_prefix */ + case 263: /* xfullname */ +{ +sqlite3SrcListDelete(pParse->db, (yypminor->yy131)); +} + break; + case 241: /* wqlist */ +{ +sqlite3WithDelete(pParse->db, (yypminor->yy521)); +} + break; + case 251: /* window_clause */ + case 306: /* windowdefn_list */ +{ +sqlite3WindowListDelete(pParse->db, (yypminor->yy41)); +} + break; + case 261: /* using_opt */ + case 264: /* idlist */ + case 270: /* idlist_opt */ +{ +sqlite3IdListDelete(pParse->db, (yypminor->yy254)); +} + break; + case 273: /* filter_over */ + case 307: /* windowdefn */ + case 308: /* window */ + case 309: /* frame_opt */ + case 312: /* over_clause */ +{ +sqlite3WindowDelete(pParse->db, (yypminor->yy41)); +} + break; + case 286: /* trigger_cmd_list */ + case 291: /* trigger_cmd */ +{ +sqlite3DeleteTriggerStep(pParse->db, (yypminor->yy33)); +} + break; + case 288: /* trigger_event */ +{ +sqlite3IdListDelete(pParse->db, (yypminor->yy180).b); +} + break; + case 314: /* frame_bound */ + case 315: /* frame_bound_s */ + case 316: /* frame_bound_e */ +{ +sqlite3ExprDelete(pParse->db, (yypminor->yy595).pExpr); +} + break; +/********* End destructor definitions *****************************************/ + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +*/ +static void yy_pop_parser_stack(yyParser *pParser){ + yyStackEntry *yytos; + assert( pParser->yytos!=0 ); + assert( pParser->yytos > pParser->yystack ); + yytos = pParser->yytos--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sPopping %s\n", + yyTracePrompt, + yyTokenName[yytos->major]); + } +#endif + yy_destructor(pParser, yytos->major, &yytos->minor); +} + +/* +** Clear all secondary memory allocations from the parser +*/ +SQLITE_PRIVATE void sqlite3ParserFinalize(void *p){ + yyParser *pParser = (yyParser*)p; + while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser); +#if YYSTACKDEPTH<=0 + if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack); +#endif +} + +#ifndef sqlite3Parser_ENGINEALWAYSONSTACK +/* +** Deallocate and destroy a parser. Destructors are called for +** all stack elements before shutting the parser down. +** +** If the YYPARSEFREENEVERNULL macro exists (for example because it +** is defined in a %include section of the input grammar) then it is +** assumed that the input pointer is never NULL. +*/ +SQLITE_PRIVATE void sqlite3ParserFree( + void *p, /* The parser to be deleted */ + void (*freeProc)(void*) /* Function used to reclaim memory */ +){ +#ifndef YYPARSEFREENEVERNULL + if( p==0 ) return; +#endif + sqlite3ParserFinalize(p); + (*freeProc)(p); +} +#endif /* sqlite3Parser_ENGINEALWAYSONSTACK */ + +/* +** Return the peak depth of the stack for a parser. +*/ +#ifdef YYTRACKMAXSTACKDEPTH +SQLITE_PRIVATE int sqlite3ParserStackPeak(void *p){ + yyParser *pParser = (yyParser*)p; + return pParser->yyhwm; +} +#endif + +/* This array of booleans keeps track of the parser statement +** coverage. The element yycoverage[X][Y] is set when the parser +** is in state X and has a lookahead token Y. In a well-tested +** systems, every element of this matrix should end up being set. +*/ +#if defined(YYCOVERAGE) +static unsigned char yycoverage[YYNSTATE][YYNTOKEN]; +#endif + +/* +** Write into out a description of every state/lookahead combination that +** +** (1) has not been used by the parser, and +** (2) is not a syntax error. +** +** Return the number of missed state/lookahead combinations. +*/ +#if defined(YYCOVERAGE) +SQLITE_PRIVATE int sqlite3ParserCoverage(FILE *out){ + int stateno, iLookAhead, i; + int nMissed = 0; + for(stateno=0; statenoYY_MAX_SHIFT ) return stateno; + assert( stateno <= YY_SHIFT_COUNT ); +#if defined(YYCOVERAGE) + yycoverage[stateno][iLookAhead] = 1; +#endif + do{ + i = yy_shift_ofst[stateno]; + assert( i>=0 ); + assert( i<=YY_ACTTAB_COUNT ); + assert( i+YYNTOKEN<=(int)YY_NLOOKAHEAD ); + assert( iLookAhead!=YYNOCODE ); + assert( iLookAhead < YYNTOKEN ); + i += iLookAhead; + assert( i<(int)YY_NLOOKAHEAD ); + if( yy_lookahead[i]!=iLookAhead ){ +#ifdef YYFALLBACK + YYCODETYPE iFallback; /* Fallback token */ + assert( iLookAhead %s\n", + yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]); + } +#endif + assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } +#endif +#ifdef YYWILDCARD + { + int j = i - iLookAhead + YYWILDCARD; + assert( j<(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) ); + if( yy_lookahead[j]==YYWILDCARD && iLookAhead>0 ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n", + yyTracePrompt, yyTokenName[iLookAhead], + yyTokenName[YYWILDCARD]); + } +#endif /* NDEBUG */ + return yy_action[j]; + } + } +#endif /* YYWILDCARD */ + return yy_default[stateno]; + }else{ + assert( i>=0 && i<(int)(sizeof(yy_action)/sizeof(yy_action[0])) ); + return yy_action[i]; + } + }while(1); +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +*/ +static YYACTIONTYPE yy_find_reduce_action( + YYACTIONTYPE stateno, /* Current state number */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; +#ifdef YYERRORSYMBOL + if( stateno>YY_REDUCE_COUNT ){ + return yy_default[stateno]; + } +#else + assert( stateno<=YY_REDUCE_COUNT ); +#endif + i = yy_reduce_ofst[stateno]; + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; +#ifdef YYERRORSYMBOL + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + return yy_default[stateno]; + } +#else + assert( i>=0 && iyytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +/******** Begin %stack_overflow code ******************************************/ + + sqlite3ErrorMsg(pParse, "parser stack overflow"); +/******** End %stack_overflow code ********************************************/ + sqlite3ParserARG_STORE /* Suppress warning about unused %extra_argument var */ + sqlite3ParserCTX_STORE +} + +/* +** Print tracing information for a SHIFT action +*/ +#ifndef NDEBUG +static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){ + if( yyTraceFILE ){ + if( yyNewStateyytos->major], + yyNewState); + }else{ + fprintf(yyTraceFILE,"%s%s '%s', pending reduce %d\n", + yyTracePrompt, zTag, yyTokenName[yypParser->yytos->major], + yyNewState - YY_MIN_REDUCE); + } + } +} +#else +# define yyTraceShift(X,Y,Z) +#endif + +/* +** Perform a shift action. +*/ +static void yy_shift( + yyParser *yypParser, /* The parser to be shifted */ + YYACTIONTYPE yyNewState, /* The new state to shift in */ + YYCODETYPE yyMajor, /* The major token to shift in */ + sqlite3ParserTOKENTYPE yyMinor /* The minor token to shift in */ +){ + yyStackEntry *yytos; + yypParser->yytos++; +#ifdef YYTRACKMAXSTACKDEPTH + if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>yypParser->yystackEnd ){ + yypParser->yytos--; + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){ + if( yyGrowStack(yypParser) ){ + yypParser->yytos--; + yyStackOverflow(yypParser); + return; + } + } +#endif + if( yyNewState > YY_MAX_SHIFT ){ + yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + } + yytos = yypParser->yytos; + yytos->stateno = yyNewState; + yytos->major = yyMajor; + yytos->minor.yy0 = yyMinor; + yyTraceShift(yypParser, yyNewState, "Shift"); +} + +/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side +** of that rule */ +static const YYCODETYPE yyRuleInfoLhs[] = { + 189, /* (0) explain ::= EXPLAIN */ + 189, /* (1) explain ::= EXPLAIN QUERY PLAN */ + 188, /* (2) cmdx ::= cmd */ + 190, /* (3) cmd ::= BEGIN transtype trans_opt */ + 191, /* (4) transtype ::= */ + 191, /* (5) transtype ::= DEFERRED */ + 191, /* (6) transtype ::= IMMEDIATE */ + 191, /* (7) transtype ::= EXCLUSIVE */ + 190, /* (8) cmd ::= COMMIT|END trans_opt */ + 190, /* (9) cmd ::= ROLLBACK trans_opt */ + 190, /* (10) cmd ::= SAVEPOINT nm */ + 190, /* (11) cmd ::= RELEASE savepoint_opt nm */ + 190, /* (12) cmd ::= ROLLBACK trans_opt TO savepoint_opt nm */ + 195, /* (13) create_table ::= createkw temp TABLE ifnotexists nm dbnm */ + 197, /* (14) createkw ::= CREATE */ + 199, /* (15) ifnotexists ::= */ + 199, /* (16) ifnotexists ::= IF NOT EXISTS */ + 198, /* (17) temp ::= TEMP */ + 198, /* (18) temp ::= */ + 196, /* (19) create_table_args ::= LP columnlist conslist_opt RP table_option_set */ + 196, /* (20) create_table_args ::= AS select */ + 203, /* (21) table_option_set ::= */ + 203, /* (22) table_option_set ::= table_option_set COMMA table_option */ + 205, /* (23) table_option ::= WITHOUT nm */ + 205, /* (24) table_option ::= nm */ + 206, /* (25) columnname ::= nm typetoken */ + 208, /* (26) typetoken ::= */ + 208, /* (27) typetoken ::= typename LP signed RP */ + 208, /* (28) typetoken ::= typename LP signed COMMA signed RP */ + 209, /* (29) typename ::= typename ID|STRING */ + 213, /* (30) scanpt ::= */ + 214, /* (31) scantok ::= */ + 215, /* (32) ccons ::= CONSTRAINT nm */ + 215, /* (33) ccons ::= DEFAULT scantok term */ + 215, /* (34) ccons ::= DEFAULT LP expr RP */ + 215, /* (35) ccons ::= DEFAULT PLUS scantok term */ + 215, /* (36) ccons ::= DEFAULT MINUS scantok term */ + 215, /* (37) ccons ::= DEFAULT scantok ID|INDEXED */ + 215, /* (38) ccons ::= NOT NULL onconf */ + 215, /* (39) ccons ::= PRIMARY KEY sortorder onconf autoinc */ + 215, /* (40) ccons ::= UNIQUE onconf */ + 215, /* (41) ccons ::= CHECK LP expr RP */ + 215, /* (42) ccons ::= REFERENCES nm eidlist_opt refargs */ + 215, /* (43) ccons ::= defer_subclause */ + 215, /* (44) ccons ::= COLLATE ID|STRING */ + 224, /* (45) generated ::= LP expr RP */ + 224, /* (46) generated ::= LP expr RP ID */ + 220, /* (47) autoinc ::= */ + 220, /* (48) autoinc ::= AUTOINCR */ + 222, /* (49) refargs ::= */ + 222, /* (50) refargs ::= refargs refarg */ + 225, /* (51) refarg ::= MATCH nm */ + 225, /* (52) refarg ::= ON INSERT refact */ + 225, /* (53) refarg ::= ON DELETE refact */ + 225, /* (54) refarg ::= ON UPDATE refact */ + 226, /* (55) refact ::= SET NULL */ + 226, /* (56) refact ::= SET DEFAULT */ + 226, /* (57) refact ::= CASCADE */ + 226, /* (58) refact ::= RESTRICT */ + 226, /* (59) refact ::= NO ACTION */ + 223, /* (60) defer_subclause ::= NOT DEFERRABLE init_deferred_pred_opt */ + 223, /* (61) defer_subclause ::= DEFERRABLE init_deferred_pred_opt */ + 227, /* (62) init_deferred_pred_opt ::= */ + 227, /* (63) init_deferred_pred_opt ::= INITIALLY DEFERRED */ + 227, /* (64) init_deferred_pred_opt ::= INITIALLY IMMEDIATE */ + 202, /* (65) conslist_opt ::= */ + 229, /* (66) tconscomma ::= COMMA */ + 230, /* (67) tcons ::= CONSTRAINT nm */ + 230, /* (68) tcons ::= PRIMARY KEY LP sortlist autoinc RP onconf */ + 230, /* (69) tcons ::= UNIQUE LP sortlist RP onconf */ + 230, /* (70) tcons ::= CHECK LP expr RP onconf */ + 230, /* (71) tcons ::= FOREIGN KEY LP eidlist RP REFERENCES nm eidlist_opt refargs defer_subclause_opt */ + 233, /* (72) defer_subclause_opt ::= */ + 218, /* (73) onconf ::= */ + 218, /* (74) onconf ::= ON CONFLICT resolvetype */ + 234, /* (75) orconf ::= */ + 234, /* (76) orconf ::= OR resolvetype */ + 235, /* (77) resolvetype ::= IGNORE */ + 235, /* (78) resolvetype ::= REPLACE */ + 190, /* (79) cmd ::= DROP TABLE ifexists fullname */ + 237, /* (80) ifexists ::= IF EXISTS */ + 237, /* (81) ifexists ::= */ + 190, /* (82) cmd ::= createkw temp VIEW ifnotexists nm dbnm eidlist_opt AS select */ + 190, /* (83) cmd ::= DROP VIEW ifexists fullname */ + 190, /* (84) cmd ::= select */ + 204, /* (85) select ::= WITH wqlist selectnowith */ + 204, /* (86) select ::= WITH RECURSIVE wqlist selectnowith */ + 204, /* (87) select ::= selectnowith */ + 239, /* (88) selectnowith ::= selectnowith multiselect_op oneselect */ + 242, /* (89) multiselect_op ::= UNION */ + 242, /* (90) multiselect_op ::= UNION ALL */ + 242, /* (91) multiselect_op ::= EXCEPT|INTERSECT */ + 240, /* (92) oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt orderby_opt limit_opt */ + 240, /* (93) oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt window_clause orderby_opt limit_opt */ + 252, /* (94) values ::= VALUES LP nexprlist RP */ + 252, /* (95) values ::= values COMMA LP nexprlist RP */ + 243, /* (96) distinct ::= DISTINCT */ + 243, /* (97) distinct ::= ALL */ + 243, /* (98) distinct ::= */ + 254, /* (99) sclp ::= */ + 244, /* (100) selcollist ::= sclp scanpt expr scanpt as */ + 244, /* (101) selcollist ::= sclp scanpt STAR */ + 244, /* (102) selcollist ::= sclp scanpt nm DOT STAR */ + 255, /* (103) as ::= AS nm */ + 255, /* (104) as ::= */ + 245, /* (105) from ::= */ + 245, /* (106) from ::= FROM seltablist */ + 257, /* (107) stl_prefix ::= seltablist joinop */ + 257, /* (108) stl_prefix ::= */ + 256, /* (109) seltablist ::= stl_prefix nm dbnm as indexed_opt on_opt using_opt */ + 256, /* (110) seltablist ::= stl_prefix nm dbnm LP exprlist RP as on_opt using_opt */ + 256, /* (111) seltablist ::= stl_prefix LP select RP as on_opt using_opt */ + 256, /* (112) seltablist ::= stl_prefix LP seltablist RP as on_opt using_opt */ + 200, /* (113) dbnm ::= */ + 200, /* (114) dbnm ::= DOT nm */ + 238, /* (115) fullname ::= nm */ + 238, /* (116) fullname ::= nm DOT nm */ + 263, /* (117) xfullname ::= nm */ + 263, /* (118) xfullname ::= nm DOT nm */ + 263, /* (119) xfullname ::= nm DOT nm AS nm */ + 263, /* (120) xfullname ::= nm AS nm */ + 258, /* (121) joinop ::= COMMA|JOIN */ + 258, /* (122) joinop ::= JOIN_KW JOIN */ + 258, /* (123) joinop ::= JOIN_KW nm JOIN */ + 258, /* (124) joinop ::= JOIN_KW nm nm JOIN */ + 260, /* (125) on_opt ::= ON expr */ + 260, /* (126) on_opt ::= */ + 259, /* (127) indexed_opt ::= */ + 259, /* (128) indexed_opt ::= INDEXED BY nm */ + 259, /* (129) indexed_opt ::= NOT INDEXED */ + 261, /* (130) using_opt ::= USING LP idlist RP */ + 261, /* (131) using_opt ::= */ + 249, /* (132) orderby_opt ::= */ + 249, /* (133) orderby_opt ::= ORDER BY sortlist */ + 231, /* (134) sortlist ::= sortlist COMMA expr sortorder nulls */ + 231, /* (135) sortlist ::= expr sortorder nulls */ + 219, /* (136) sortorder ::= ASC */ + 219, /* (137) sortorder ::= DESC */ + 219, /* (138) sortorder ::= */ + 265, /* (139) nulls ::= NULLS FIRST */ + 265, /* (140) nulls ::= NULLS LAST */ + 265, /* (141) nulls ::= */ + 247, /* (142) groupby_opt ::= */ + 247, /* (143) groupby_opt ::= GROUP BY nexprlist */ + 248, /* (144) having_opt ::= */ + 248, /* (145) having_opt ::= HAVING expr */ + 250, /* (146) limit_opt ::= */ + 250, /* (147) limit_opt ::= LIMIT expr */ + 250, /* (148) limit_opt ::= LIMIT expr OFFSET expr */ + 250, /* (149) limit_opt ::= LIMIT expr COMMA expr */ + 190, /* (150) cmd ::= with DELETE FROM xfullname indexed_opt where_opt_ret orderby_opt limit_opt */ + 246, /* (151) where_opt ::= */ + 246, /* (152) where_opt ::= WHERE expr */ + 267, /* (153) where_opt_ret ::= */ + 267, /* (154) where_opt_ret ::= WHERE expr */ + 267, /* (155) where_opt_ret ::= RETURNING selcollist */ + 267, /* (156) where_opt_ret ::= WHERE expr RETURNING selcollist */ + 190, /* (157) cmd ::= with UPDATE orconf xfullname indexed_opt SET setlist from where_opt_ret orderby_opt limit_opt */ + 268, /* (158) setlist ::= setlist COMMA nm EQ expr */ + 268, /* (159) setlist ::= setlist COMMA LP idlist RP EQ expr */ + 268, /* (160) setlist ::= nm EQ expr */ + 268, /* (161) setlist ::= LP idlist RP EQ expr */ + 190, /* (162) cmd ::= with insert_cmd INTO xfullname idlist_opt select upsert */ + 190, /* (163) cmd ::= with insert_cmd INTO xfullname idlist_opt DEFAULT VALUES returning */ + 271, /* (164) upsert ::= */ + 271, /* (165) upsert ::= RETURNING selcollist */ + 271, /* (166) upsert ::= ON CONFLICT LP sortlist RP where_opt DO UPDATE SET setlist where_opt upsert */ + 271, /* (167) upsert ::= ON CONFLICT LP sortlist RP where_opt DO NOTHING upsert */ + 271, /* (168) upsert ::= ON CONFLICT DO NOTHING returning */ + 271, /* (169) upsert ::= ON CONFLICT DO UPDATE SET setlist where_opt returning */ + 272, /* (170) returning ::= RETURNING selcollist */ + 269, /* (171) insert_cmd ::= INSERT orconf */ + 269, /* (172) insert_cmd ::= REPLACE */ + 270, /* (173) idlist_opt ::= */ + 270, /* (174) idlist_opt ::= LP idlist RP */ + 264, /* (175) idlist ::= idlist COMMA nm */ + 264, /* (176) idlist ::= nm */ + 217, /* (177) expr ::= LP expr RP */ + 217, /* (178) expr ::= ID|INDEXED */ + 217, /* (179) expr ::= JOIN_KW */ + 217, /* (180) expr ::= nm DOT nm */ + 217, /* (181) expr ::= nm DOT nm DOT nm */ + 216, /* (182) term ::= NULL|FLOAT|BLOB */ + 216, /* (183) term ::= STRING */ + 216, /* (184) term ::= INTEGER */ + 217, /* (185) expr ::= VARIABLE */ + 217, /* (186) expr ::= expr COLLATE ID|STRING */ + 217, /* (187) expr ::= CAST LP expr AS typetoken RP */ + 217, /* (188) expr ::= ID|INDEXED LP distinct exprlist RP */ + 217, /* (189) expr ::= ID|INDEXED LP STAR RP */ + 217, /* (190) expr ::= ID|INDEXED LP distinct exprlist RP filter_over */ + 217, /* (191) expr ::= ID|INDEXED LP STAR RP filter_over */ + 216, /* (192) term ::= CTIME_KW */ + 217, /* (193) expr ::= LP nexprlist COMMA expr RP */ + 217, /* (194) expr ::= expr AND expr */ + 217, /* (195) expr ::= expr OR expr */ + 217, /* (196) expr ::= expr LT|GT|GE|LE expr */ + 217, /* (197) expr ::= expr EQ|NE expr */ + 217, /* (198) expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */ + 217, /* (199) expr ::= expr PLUS|MINUS expr */ + 217, /* (200) expr ::= expr STAR|SLASH|REM expr */ + 217, /* (201) expr ::= expr CONCAT expr */ + 274, /* (202) likeop ::= NOT LIKE_KW|MATCH */ + 217, /* (203) expr ::= expr likeop expr */ + 217, /* (204) expr ::= expr likeop expr ESCAPE expr */ + 217, /* (205) expr ::= expr ISNULL|NOTNULL */ + 217, /* (206) expr ::= expr NOT NULL */ + 217, /* (207) expr ::= expr IS expr */ + 217, /* (208) expr ::= expr IS NOT expr */ + 217, /* (209) expr ::= NOT expr */ + 217, /* (210) expr ::= BITNOT expr */ + 217, /* (211) expr ::= PLUS|MINUS expr */ + 217, /* (212) expr ::= expr PTR expr */ + 275, /* (213) between_op ::= BETWEEN */ + 275, /* (214) between_op ::= NOT BETWEEN */ + 217, /* (215) expr ::= expr between_op expr AND expr */ + 276, /* (216) in_op ::= IN */ + 276, /* (217) in_op ::= NOT IN */ + 217, /* (218) expr ::= expr in_op LP exprlist RP */ + 217, /* (219) expr ::= LP select RP */ + 217, /* (220) expr ::= expr in_op LP select RP */ + 217, /* (221) expr ::= expr in_op nm dbnm paren_exprlist */ + 217, /* (222) expr ::= EXISTS LP select RP */ + 217, /* (223) expr ::= CASE case_operand case_exprlist case_else END */ + 279, /* (224) case_exprlist ::= case_exprlist WHEN expr THEN expr */ + 279, /* (225) case_exprlist ::= WHEN expr THEN expr */ + 280, /* (226) case_else ::= ELSE expr */ + 280, /* (227) case_else ::= */ + 278, /* (228) case_operand ::= expr */ + 278, /* (229) case_operand ::= */ + 262, /* (230) exprlist ::= */ + 253, /* (231) nexprlist ::= nexprlist COMMA expr */ + 253, /* (232) nexprlist ::= expr */ + 277, /* (233) paren_exprlist ::= */ + 277, /* (234) paren_exprlist ::= LP exprlist RP */ + 190, /* (235) cmd ::= createkw uniqueflag INDEX ifnotexists nm dbnm ON nm LP sortlist RP where_opt */ + 281, /* (236) uniqueflag ::= UNIQUE */ + 281, /* (237) uniqueflag ::= */ + 221, /* (238) eidlist_opt ::= */ + 221, /* (239) eidlist_opt ::= LP eidlist RP */ + 232, /* (240) eidlist ::= eidlist COMMA nm collate sortorder */ + 232, /* (241) eidlist ::= nm collate sortorder */ + 282, /* (242) collate ::= */ + 282, /* (243) collate ::= COLLATE ID|STRING */ + 190, /* (244) cmd ::= DROP INDEX ifexists fullname */ + 190, /* (245) cmd ::= VACUUM vinto */ + 190, /* (246) cmd ::= VACUUM nm vinto */ + 283, /* (247) vinto ::= INTO expr */ + 283, /* (248) vinto ::= */ + 190, /* (249) cmd ::= PRAGMA nm dbnm */ + 190, /* (250) cmd ::= PRAGMA nm dbnm EQ nmnum */ + 190, /* (251) cmd ::= PRAGMA nm dbnm LP nmnum RP */ + 190, /* (252) cmd ::= PRAGMA nm dbnm EQ minus_num */ + 190, /* (253) cmd ::= PRAGMA nm dbnm LP minus_num RP */ + 211, /* (254) plus_num ::= PLUS INTEGER|FLOAT */ + 212, /* (255) minus_num ::= MINUS INTEGER|FLOAT */ + 190, /* (256) cmd ::= createkw trigger_decl BEGIN trigger_cmd_list END */ + 285, /* (257) trigger_decl ::= temp TRIGGER ifnotexists nm dbnm trigger_time trigger_event ON fullname foreach_clause when_clause */ + 287, /* (258) trigger_time ::= BEFORE|AFTER */ + 287, /* (259) trigger_time ::= INSTEAD OF */ + 287, /* (260) trigger_time ::= */ + 288, /* (261) trigger_event ::= DELETE|INSERT */ + 288, /* (262) trigger_event ::= UPDATE */ + 288, /* (263) trigger_event ::= UPDATE OF idlist */ + 290, /* (264) when_clause ::= */ + 290, /* (265) when_clause ::= WHEN expr */ + 286, /* (266) trigger_cmd_list ::= trigger_cmd_list trigger_cmd SEMI */ + 286, /* (267) trigger_cmd_list ::= trigger_cmd SEMI */ + 292, /* (268) trnm ::= nm DOT nm */ + 293, /* (269) tridxby ::= INDEXED BY nm */ + 293, /* (270) tridxby ::= NOT INDEXED */ + 291, /* (271) trigger_cmd ::= UPDATE orconf trnm tridxby SET setlist from where_opt scanpt */ + 291, /* (272) trigger_cmd ::= scanpt insert_cmd INTO trnm idlist_opt select upsert scanpt */ + 291, /* (273) trigger_cmd ::= DELETE FROM trnm tridxby where_opt scanpt */ + 291, /* (274) trigger_cmd ::= scanpt select scanpt */ + 217, /* (275) expr ::= RAISE LP IGNORE RP */ + 217, /* (276) expr ::= RAISE LP raisetype COMMA nm RP */ + 236, /* (277) raisetype ::= ROLLBACK */ + 236, /* (278) raisetype ::= ABORT */ + 236, /* (279) raisetype ::= FAIL */ + 190, /* (280) cmd ::= DROP TRIGGER ifexists fullname */ + 190, /* (281) cmd ::= ATTACH database_kw_opt expr AS expr key_opt */ + 190, /* (282) cmd ::= DETACH database_kw_opt expr */ + 295, /* (283) key_opt ::= */ + 295, /* (284) key_opt ::= KEY expr */ + 190, /* (285) cmd ::= REINDEX */ + 190, /* (286) cmd ::= REINDEX nm dbnm */ + 190, /* (287) cmd ::= ANALYZE */ + 190, /* (288) cmd ::= ANALYZE nm dbnm */ + 190, /* (289) cmd ::= ALTER TABLE fullname RENAME TO nm */ + 190, /* (290) cmd ::= ALTER TABLE add_column_fullname ADD kwcolumn_opt columnname carglist */ + 190, /* (291) cmd ::= ALTER TABLE fullname DROP kwcolumn_opt nm */ + 296, /* (292) add_column_fullname ::= fullname */ + 190, /* (293) cmd ::= ALTER TABLE fullname RENAME kwcolumn_opt nm TO nm */ + 190, /* (294) cmd ::= create_vtab */ + 190, /* (295) cmd ::= create_vtab LP vtabarglist RP */ + 298, /* (296) create_vtab ::= createkw VIRTUAL TABLE ifnotexists nm dbnm USING nm */ + 300, /* (297) vtabarg ::= */ + 301, /* (298) vtabargtoken ::= ANY */ + 301, /* (299) vtabargtoken ::= lp anylist RP */ + 302, /* (300) lp ::= LP */ + 266, /* (301) with ::= WITH wqlist */ + 266, /* (302) with ::= WITH RECURSIVE wqlist */ + 305, /* (303) wqas ::= AS */ + 305, /* (304) wqas ::= AS MATERIALIZED */ + 305, /* (305) wqas ::= AS NOT MATERIALIZED */ + 304, /* (306) wqitem ::= nm eidlist_opt wqas LP select RP */ + 241, /* (307) wqlist ::= wqitem */ + 241, /* (308) wqlist ::= wqlist COMMA wqitem */ + 306, /* (309) windowdefn_list ::= windowdefn */ + 306, /* (310) windowdefn_list ::= windowdefn_list COMMA windowdefn */ + 307, /* (311) windowdefn ::= nm AS LP window RP */ + 308, /* (312) window ::= PARTITION BY nexprlist orderby_opt frame_opt */ + 308, /* (313) window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */ + 308, /* (314) window ::= ORDER BY sortlist frame_opt */ + 308, /* (315) window ::= nm ORDER BY sortlist frame_opt */ + 308, /* (316) window ::= frame_opt */ + 308, /* (317) window ::= nm frame_opt */ + 309, /* (318) frame_opt ::= */ + 309, /* (319) frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */ + 309, /* (320) frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */ + 313, /* (321) range_or_rows ::= RANGE|ROWS|GROUPS */ + 315, /* (322) frame_bound_s ::= frame_bound */ + 315, /* (323) frame_bound_s ::= UNBOUNDED PRECEDING */ + 316, /* (324) frame_bound_e ::= frame_bound */ + 316, /* (325) frame_bound_e ::= UNBOUNDED FOLLOWING */ + 314, /* (326) frame_bound ::= expr PRECEDING|FOLLOWING */ + 314, /* (327) frame_bound ::= CURRENT ROW */ + 317, /* (328) frame_exclude_opt ::= */ + 317, /* (329) frame_exclude_opt ::= EXCLUDE frame_exclude */ + 318, /* (330) frame_exclude ::= NO OTHERS */ + 318, /* (331) frame_exclude ::= CURRENT ROW */ + 318, /* (332) frame_exclude ::= GROUP|TIES */ + 251, /* (333) window_clause ::= WINDOW windowdefn_list */ + 273, /* (334) filter_over ::= filter_clause over_clause */ + 273, /* (335) filter_over ::= over_clause */ + 273, /* (336) filter_over ::= filter_clause */ + 312, /* (337) over_clause ::= OVER LP window RP */ + 312, /* (338) over_clause ::= OVER nm */ + 311, /* (339) filter_clause ::= FILTER LP WHERE expr RP */ + 185, /* (340) input ::= cmdlist */ + 186, /* (341) cmdlist ::= cmdlist ecmd */ + 186, /* (342) cmdlist ::= ecmd */ + 187, /* (343) ecmd ::= SEMI */ + 187, /* (344) ecmd ::= cmdx SEMI */ + 187, /* (345) ecmd ::= explain cmdx SEMI */ + 192, /* (346) trans_opt ::= */ + 192, /* (347) trans_opt ::= TRANSACTION */ + 192, /* (348) trans_opt ::= TRANSACTION nm */ + 194, /* (349) savepoint_opt ::= SAVEPOINT */ + 194, /* (350) savepoint_opt ::= */ + 190, /* (351) cmd ::= create_table create_table_args */ + 203, /* (352) table_option_set ::= table_option */ + 201, /* (353) columnlist ::= columnlist COMMA columnname carglist */ + 201, /* (354) columnlist ::= columnname carglist */ + 193, /* (355) nm ::= ID|INDEXED */ + 193, /* (356) nm ::= STRING */ + 193, /* (357) nm ::= JOIN_KW */ + 208, /* (358) typetoken ::= typename */ + 209, /* (359) typename ::= ID|STRING */ + 210, /* (360) signed ::= plus_num */ + 210, /* (361) signed ::= minus_num */ + 207, /* (362) carglist ::= carglist ccons */ + 207, /* (363) carglist ::= */ + 215, /* (364) ccons ::= NULL onconf */ + 215, /* (365) ccons ::= GENERATED ALWAYS AS generated */ + 215, /* (366) ccons ::= AS generated */ + 202, /* (367) conslist_opt ::= COMMA conslist */ + 228, /* (368) conslist ::= conslist tconscomma tcons */ + 228, /* (369) conslist ::= tcons */ + 229, /* (370) tconscomma ::= */ + 233, /* (371) defer_subclause_opt ::= defer_subclause */ + 235, /* (372) resolvetype ::= raisetype */ + 239, /* (373) selectnowith ::= oneselect */ + 240, /* (374) oneselect ::= values */ + 254, /* (375) sclp ::= selcollist COMMA */ + 255, /* (376) as ::= ID|STRING */ + 272, /* (377) returning ::= */ + 217, /* (378) expr ::= term */ + 274, /* (379) likeop ::= LIKE_KW|MATCH */ + 262, /* (380) exprlist ::= nexprlist */ + 284, /* (381) nmnum ::= plus_num */ + 284, /* (382) nmnum ::= nm */ + 284, /* (383) nmnum ::= ON */ + 284, /* (384) nmnum ::= DELETE */ + 284, /* (385) nmnum ::= DEFAULT */ + 211, /* (386) plus_num ::= INTEGER|FLOAT */ + 289, /* (387) foreach_clause ::= */ + 289, /* (388) foreach_clause ::= FOR EACH ROW */ + 292, /* (389) trnm ::= nm */ + 293, /* (390) tridxby ::= */ + 294, /* (391) database_kw_opt ::= DATABASE */ + 294, /* (392) database_kw_opt ::= */ + 297, /* (393) kwcolumn_opt ::= */ + 297, /* (394) kwcolumn_opt ::= COLUMNKW */ + 299, /* (395) vtabarglist ::= vtabarg */ + 299, /* (396) vtabarglist ::= vtabarglist COMMA vtabarg */ + 300, /* (397) vtabarg ::= vtabarg vtabargtoken */ + 303, /* (398) anylist ::= */ + 303, /* (399) anylist ::= anylist LP anylist RP */ + 303, /* (400) anylist ::= anylist ANY */ + 266, /* (401) with ::= */ +}; + +/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number +** of symbols on the right-hand side of that rule. */ +static const signed char yyRuleInfoNRhs[] = { + -1, /* (0) explain ::= EXPLAIN */ + -3, /* (1) explain ::= EXPLAIN QUERY PLAN */ + -1, /* (2) cmdx ::= cmd */ + -3, /* (3) cmd ::= BEGIN transtype trans_opt */ + 0, /* (4) transtype ::= */ + -1, /* (5) transtype ::= DEFERRED */ + -1, /* (6) transtype ::= IMMEDIATE */ + -1, /* (7) transtype ::= EXCLUSIVE */ + -2, /* (8) cmd ::= COMMIT|END trans_opt */ + -2, /* (9) cmd ::= ROLLBACK trans_opt */ + -2, /* (10) cmd ::= SAVEPOINT nm */ + -3, /* (11) cmd ::= RELEASE savepoint_opt nm */ + -5, /* (12) cmd ::= ROLLBACK trans_opt TO savepoint_opt nm */ + -6, /* (13) create_table ::= createkw temp TABLE ifnotexists nm dbnm */ + -1, /* (14) createkw ::= CREATE */ + 0, /* (15) ifnotexists ::= */ + -3, /* (16) ifnotexists ::= IF NOT EXISTS */ + -1, /* (17) temp ::= TEMP */ + 0, /* (18) temp ::= */ + -5, /* (19) create_table_args ::= LP columnlist conslist_opt RP table_option_set */ + -2, /* (20) create_table_args ::= AS select */ + 0, /* (21) table_option_set ::= */ + -3, /* (22) table_option_set ::= table_option_set COMMA table_option */ + -2, /* (23) table_option ::= WITHOUT nm */ + -1, /* (24) table_option ::= nm */ + -2, /* (25) columnname ::= nm typetoken */ + 0, /* (26) typetoken ::= */ + -4, /* (27) typetoken ::= typename LP signed RP */ + -6, /* (28) typetoken ::= typename LP signed COMMA signed RP */ + -2, /* (29) typename ::= typename ID|STRING */ + 0, /* (30) scanpt ::= */ + 0, /* (31) scantok ::= */ + -2, /* (32) ccons ::= CONSTRAINT nm */ + -3, /* (33) ccons ::= DEFAULT scantok term */ + -4, /* (34) ccons ::= DEFAULT LP expr RP */ + -4, /* (35) ccons ::= DEFAULT PLUS scantok term */ + -4, /* (36) ccons ::= DEFAULT MINUS scantok term */ + -3, /* (37) ccons ::= DEFAULT scantok ID|INDEXED */ + -3, /* (38) ccons ::= NOT NULL onconf */ + -5, /* (39) ccons ::= PRIMARY KEY sortorder onconf autoinc */ + -2, /* (40) ccons ::= UNIQUE onconf */ + -4, /* (41) ccons ::= CHECK LP expr RP */ + -4, /* (42) ccons ::= REFERENCES nm eidlist_opt refargs */ + -1, /* (43) ccons ::= defer_subclause */ + -2, /* (44) ccons ::= COLLATE ID|STRING */ + -3, /* (45) generated ::= LP expr RP */ + -4, /* (46) generated ::= LP expr RP ID */ + 0, /* (47) autoinc ::= */ + -1, /* (48) autoinc ::= AUTOINCR */ + 0, /* (49) refargs ::= */ + -2, /* (50) refargs ::= refargs refarg */ + -2, /* (51) refarg ::= MATCH nm */ + -3, /* (52) refarg ::= ON INSERT refact */ + -3, /* (53) refarg ::= ON DELETE refact */ + -3, /* (54) refarg ::= ON UPDATE refact */ + -2, /* (55) refact ::= SET NULL */ + -2, /* (56) refact ::= SET DEFAULT */ + -1, /* (57) refact ::= CASCADE */ + -1, /* (58) refact ::= RESTRICT */ + -2, /* (59) refact ::= NO ACTION */ + -3, /* (60) defer_subclause ::= NOT DEFERRABLE init_deferred_pred_opt */ + -2, /* (61) defer_subclause ::= DEFERRABLE init_deferred_pred_opt */ + 0, /* (62) init_deferred_pred_opt ::= */ + -2, /* (63) init_deferred_pred_opt ::= INITIALLY DEFERRED */ + -2, /* (64) init_deferred_pred_opt ::= INITIALLY IMMEDIATE */ + 0, /* (65) conslist_opt ::= */ + -1, /* (66) tconscomma ::= COMMA */ + -2, /* (67) tcons ::= CONSTRAINT nm */ + -7, /* (68) tcons ::= PRIMARY KEY LP sortlist autoinc RP onconf */ + -5, /* (69) tcons ::= UNIQUE LP sortlist RP onconf */ + -5, /* (70) tcons ::= CHECK LP expr RP onconf */ + -10, /* (71) tcons ::= FOREIGN KEY LP eidlist RP REFERENCES nm eidlist_opt refargs defer_subclause_opt */ + 0, /* (72) defer_subclause_opt ::= */ + 0, /* (73) onconf ::= */ + -3, /* (74) onconf ::= ON CONFLICT resolvetype */ + 0, /* (75) orconf ::= */ + -2, /* (76) orconf ::= OR resolvetype */ + -1, /* (77) resolvetype ::= IGNORE */ + -1, /* (78) resolvetype ::= REPLACE */ + -4, /* (79) cmd ::= DROP TABLE ifexists fullname */ + -2, /* (80) ifexists ::= IF EXISTS */ + 0, /* (81) ifexists ::= */ + -9, /* (82) cmd ::= createkw temp VIEW ifnotexists nm dbnm eidlist_opt AS select */ + -4, /* (83) cmd ::= DROP VIEW ifexists fullname */ + -1, /* (84) cmd ::= select */ + -3, /* (85) select ::= WITH wqlist selectnowith */ + -4, /* (86) select ::= WITH RECURSIVE wqlist selectnowith */ + -1, /* (87) select ::= selectnowith */ + -3, /* (88) selectnowith ::= selectnowith multiselect_op oneselect */ + -1, /* (89) multiselect_op ::= UNION */ + -2, /* (90) multiselect_op ::= UNION ALL */ + -1, /* (91) multiselect_op ::= EXCEPT|INTERSECT */ + -9, /* (92) oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt orderby_opt limit_opt */ + -10, /* (93) oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt window_clause orderby_opt limit_opt */ + -4, /* (94) values ::= VALUES LP nexprlist RP */ + -5, /* (95) values ::= values COMMA LP nexprlist RP */ + -1, /* (96) distinct ::= DISTINCT */ + -1, /* (97) distinct ::= ALL */ + 0, /* (98) distinct ::= */ + 0, /* (99) sclp ::= */ + -5, /* (100) selcollist ::= sclp scanpt expr scanpt as */ + -3, /* (101) selcollist ::= sclp scanpt STAR */ + -5, /* (102) selcollist ::= sclp scanpt nm DOT STAR */ + -2, /* (103) as ::= AS nm */ + 0, /* (104) as ::= */ + 0, /* (105) from ::= */ + -2, /* (106) from ::= FROM seltablist */ + -2, /* (107) stl_prefix ::= seltablist joinop */ + 0, /* (108) stl_prefix ::= */ + -7, /* (109) seltablist ::= stl_prefix nm dbnm as indexed_opt on_opt using_opt */ + -9, /* (110) seltablist ::= stl_prefix nm dbnm LP exprlist RP as on_opt using_opt */ + -7, /* (111) seltablist ::= stl_prefix LP select RP as on_opt using_opt */ + -7, /* (112) seltablist ::= stl_prefix LP seltablist RP as on_opt using_opt */ + 0, /* (113) dbnm ::= */ + -2, /* (114) dbnm ::= DOT nm */ + -1, /* (115) fullname ::= nm */ + -3, /* (116) fullname ::= nm DOT nm */ + -1, /* (117) xfullname ::= nm */ + -3, /* (118) xfullname ::= nm DOT nm */ + -5, /* (119) xfullname ::= nm DOT nm AS nm */ + -3, /* (120) xfullname ::= nm AS nm */ + -1, /* (121) joinop ::= COMMA|JOIN */ + -2, /* (122) joinop ::= JOIN_KW JOIN */ + -3, /* (123) joinop ::= JOIN_KW nm JOIN */ + -4, /* (124) joinop ::= JOIN_KW nm nm JOIN */ + -2, /* (125) on_opt ::= ON expr */ + 0, /* (126) on_opt ::= */ + 0, /* (127) indexed_opt ::= */ + -3, /* (128) indexed_opt ::= INDEXED BY nm */ + -2, /* (129) indexed_opt ::= NOT INDEXED */ + -4, /* (130) using_opt ::= USING LP idlist RP */ + 0, /* (131) using_opt ::= */ + 0, /* (132) orderby_opt ::= */ + -3, /* (133) orderby_opt ::= ORDER BY sortlist */ + -5, /* (134) sortlist ::= sortlist COMMA expr sortorder nulls */ + -3, /* (135) sortlist ::= expr sortorder nulls */ + -1, /* (136) sortorder ::= ASC */ + -1, /* (137) sortorder ::= DESC */ + 0, /* (138) sortorder ::= */ + -2, /* (139) nulls ::= NULLS FIRST */ + -2, /* (140) nulls ::= NULLS LAST */ + 0, /* (141) nulls ::= */ + 0, /* (142) groupby_opt ::= */ + -3, /* (143) groupby_opt ::= GROUP BY nexprlist */ + 0, /* (144) having_opt ::= */ + -2, /* (145) having_opt ::= HAVING expr */ + 0, /* (146) limit_opt ::= */ + -2, /* (147) limit_opt ::= LIMIT expr */ + -4, /* (148) limit_opt ::= LIMIT expr OFFSET expr */ + -4, /* (149) limit_opt ::= LIMIT expr COMMA expr */ + -8, /* (150) cmd ::= with DELETE FROM xfullname indexed_opt where_opt_ret orderby_opt limit_opt */ + 0, /* (151) where_opt ::= */ + -2, /* (152) where_opt ::= WHERE expr */ + 0, /* (153) where_opt_ret ::= */ + -2, /* (154) where_opt_ret ::= WHERE expr */ + -2, /* (155) where_opt_ret ::= RETURNING selcollist */ + -4, /* (156) where_opt_ret ::= WHERE expr RETURNING selcollist */ + -11, /* (157) cmd ::= with UPDATE orconf xfullname indexed_opt SET setlist from where_opt_ret orderby_opt limit_opt */ + -5, /* (158) setlist ::= setlist COMMA nm EQ expr */ + -7, /* (159) setlist ::= setlist COMMA LP idlist RP EQ expr */ + -3, /* (160) setlist ::= nm EQ expr */ + -5, /* (161) setlist ::= LP idlist RP EQ expr */ + -7, /* (162) cmd ::= with insert_cmd INTO xfullname idlist_opt select upsert */ + -8, /* (163) cmd ::= with insert_cmd INTO xfullname idlist_opt DEFAULT VALUES returning */ + 0, /* (164) upsert ::= */ + -2, /* (165) upsert ::= RETURNING selcollist */ + -12, /* (166) upsert ::= ON CONFLICT LP sortlist RP where_opt DO UPDATE SET setlist where_opt upsert */ + -9, /* (167) upsert ::= ON CONFLICT LP sortlist RP where_opt DO NOTHING upsert */ + -5, /* (168) upsert ::= ON CONFLICT DO NOTHING returning */ + -8, /* (169) upsert ::= ON CONFLICT DO UPDATE SET setlist where_opt returning */ + -2, /* (170) returning ::= RETURNING selcollist */ + -2, /* (171) insert_cmd ::= INSERT orconf */ + -1, /* (172) insert_cmd ::= REPLACE */ + 0, /* (173) idlist_opt ::= */ + -3, /* (174) idlist_opt ::= LP idlist RP */ + -3, /* (175) idlist ::= idlist COMMA nm */ + -1, /* (176) idlist ::= nm */ + -3, /* (177) expr ::= LP expr RP */ + -1, /* (178) expr ::= ID|INDEXED */ + -1, /* (179) expr ::= JOIN_KW */ + -3, /* (180) expr ::= nm DOT nm */ + -5, /* (181) expr ::= nm DOT nm DOT nm */ + -1, /* (182) term ::= NULL|FLOAT|BLOB */ + -1, /* (183) term ::= STRING */ + -1, /* (184) term ::= INTEGER */ + -1, /* (185) expr ::= VARIABLE */ + -3, /* (186) expr ::= expr COLLATE ID|STRING */ + -6, /* (187) expr ::= CAST LP expr AS typetoken RP */ + -5, /* (188) expr ::= ID|INDEXED LP distinct exprlist RP */ + -4, /* (189) expr ::= ID|INDEXED LP STAR RP */ + -6, /* (190) expr ::= ID|INDEXED LP distinct exprlist RP filter_over */ + -5, /* (191) expr ::= ID|INDEXED LP STAR RP filter_over */ + -1, /* (192) term ::= CTIME_KW */ + -5, /* (193) expr ::= LP nexprlist COMMA expr RP */ + -3, /* (194) expr ::= expr AND expr */ + -3, /* (195) expr ::= expr OR expr */ + -3, /* (196) expr ::= expr LT|GT|GE|LE expr */ + -3, /* (197) expr ::= expr EQ|NE expr */ + -3, /* (198) expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */ + -3, /* (199) expr ::= expr PLUS|MINUS expr */ + -3, /* (200) expr ::= expr STAR|SLASH|REM expr */ + -3, /* (201) expr ::= expr CONCAT expr */ + -2, /* (202) likeop ::= NOT LIKE_KW|MATCH */ + -3, /* (203) expr ::= expr likeop expr */ + -5, /* (204) expr ::= expr likeop expr ESCAPE expr */ + -2, /* (205) expr ::= expr ISNULL|NOTNULL */ + -3, /* (206) expr ::= expr NOT NULL */ + -3, /* (207) expr ::= expr IS expr */ + -4, /* (208) expr ::= expr IS NOT expr */ + -2, /* (209) expr ::= NOT expr */ + -2, /* (210) expr ::= BITNOT expr */ + -2, /* (211) expr ::= PLUS|MINUS expr */ + -3, /* (212) expr ::= expr PTR expr */ + -1, /* (213) between_op ::= BETWEEN */ + -2, /* (214) between_op ::= NOT BETWEEN */ + -5, /* (215) expr ::= expr between_op expr AND expr */ + -1, /* (216) in_op ::= IN */ + -2, /* (217) in_op ::= NOT IN */ + -5, /* (218) expr ::= expr in_op LP exprlist RP */ + -3, /* (219) expr ::= LP select RP */ + -5, /* (220) expr ::= expr in_op LP select RP */ + -5, /* (221) expr ::= expr in_op nm dbnm paren_exprlist */ + -4, /* (222) expr ::= EXISTS LP select RP */ + -5, /* (223) expr ::= CASE case_operand case_exprlist case_else END */ + -5, /* (224) case_exprlist ::= case_exprlist WHEN expr THEN expr */ + -4, /* (225) case_exprlist ::= WHEN expr THEN expr */ + -2, /* (226) case_else ::= ELSE expr */ + 0, /* (227) case_else ::= */ + -1, /* (228) case_operand ::= expr */ + 0, /* (229) case_operand ::= */ + 0, /* (230) exprlist ::= */ + -3, /* (231) nexprlist ::= nexprlist COMMA expr */ + -1, /* (232) nexprlist ::= expr */ + 0, /* (233) paren_exprlist ::= */ + -3, /* (234) paren_exprlist ::= LP exprlist RP */ + -12, /* (235) cmd ::= createkw uniqueflag INDEX ifnotexists nm dbnm ON nm LP sortlist RP where_opt */ + -1, /* (236) uniqueflag ::= UNIQUE */ + 0, /* (237) uniqueflag ::= */ + 0, /* (238) eidlist_opt ::= */ + -3, /* (239) eidlist_opt ::= LP eidlist RP */ + -5, /* (240) eidlist ::= eidlist COMMA nm collate sortorder */ + -3, /* (241) eidlist ::= nm collate sortorder */ + 0, /* (242) collate ::= */ + -2, /* (243) collate ::= COLLATE ID|STRING */ + -4, /* (244) cmd ::= DROP INDEX ifexists fullname */ + -2, /* (245) cmd ::= VACUUM vinto */ + -3, /* (246) cmd ::= VACUUM nm vinto */ + -2, /* (247) vinto ::= INTO expr */ + 0, /* (248) vinto ::= */ + -3, /* (249) cmd ::= PRAGMA nm dbnm */ + -5, /* (250) cmd ::= PRAGMA nm dbnm EQ nmnum */ + -6, /* (251) cmd ::= PRAGMA nm dbnm LP nmnum RP */ + -5, /* (252) cmd ::= PRAGMA nm dbnm EQ minus_num */ + -6, /* (253) cmd ::= PRAGMA nm dbnm LP minus_num RP */ + -2, /* (254) plus_num ::= PLUS INTEGER|FLOAT */ + -2, /* (255) minus_num ::= MINUS INTEGER|FLOAT */ + -5, /* (256) cmd ::= createkw trigger_decl BEGIN trigger_cmd_list END */ + -11, /* (257) trigger_decl ::= temp TRIGGER ifnotexists nm dbnm trigger_time trigger_event ON fullname foreach_clause when_clause */ + -1, /* (258) trigger_time ::= BEFORE|AFTER */ + -2, /* (259) trigger_time ::= INSTEAD OF */ + 0, /* (260) trigger_time ::= */ + -1, /* (261) trigger_event ::= DELETE|INSERT */ + -1, /* (262) trigger_event ::= UPDATE */ + -3, /* (263) trigger_event ::= UPDATE OF idlist */ + 0, /* (264) when_clause ::= */ + -2, /* (265) when_clause ::= WHEN expr */ + -3, /* (266) trigger_cmd_list ::= trigger_cmd_list trigger_cmd SEMI */ + -2, /* (267) trigger_cmd_list ::= trigger_cmd SEMI */ + -3, /* (268) trnm ::= nm DOT nm */ + -3, /* (269) tridxby ::= INDEXED BY nm */ + -2, /* (270) tridxby ::= NOT INDEXED */ + -9, /* (271) trigger_cmd ::= UPDATE orconf trnm tridxby SET setlist from where_opt scanpt */ + -8, /* (272) trigger_cmd ::= scanpt insert_cmd INTO trnm idlist_opt select upsert scanpt */ + -6, /* (273) trigger_cmd ::= DELETE FROM trnm tridxby where_opt scanpt */ + -3, /* (274) trigger_cmd ::= scanpt select scanpt */ + -4, /* (275) expr ::= RAISE LP IGNORE RP */ + -6, /* (276) expr ::= RAISE LP raisetype COMMA nm RP */ + -1, /* (277) raisetype ::= ROLLBACK */ + -1, /* (278) raisetype ::= ABORT */ + -1, /* (279) raisetype ::= FAIL */ + -4, /* (280) cmd ::= DROP TRIGGER ifexists fullname */ + -6, /* (281) cmd ::= ATTACH database_kw_opt expr AS expr key_opt */ + -3, /* (282) cmd ::= DETACH database_kw_opt expr */ + 0, /* (283) key_opt ::= */ + -2, /* (284) key_opt ::= KEY expr */ + -1, /* (285) cmd ::= REINDEX */ + -3, /* (286) cmd ::= REINDEX nm dbnm */ + -1, /* (287) cmd ::= ANALYZE */ + -3, /* (288) cmd ::= ANALYZE nm dbnm */ + -6, /* (289) cmd ::= ALTER TABLE fullname RENAME TO nm */ + -7, /* (290) cmd ::= ALTER TABLE add_column_fullname ADD kwcolumn_opt columnname carglist */ + -6, /* (291) cmd ::= ALTER TABLE fullname DROP kwcolumn_opt nm */ + -1, /* (292) add_column_fullname ::= fullname */ + -8, /* (293) cmd ::= ALTER TABLE fullname RENAME kwcolumn_opt nm TO nm */ + -1, /* (294) cmd ::= create_vtab */ + -4, /* (295) cmd ::= create_vtab LP vtabarglist RP */ + -8, /* (296) create_vtab ::= createkw VIRTUAL TABLE ifnotexists nm dbnm USING nm */ + 0, /* (297) vtabarg ::= */ + -1, /* (298) vtabargtoken ::= ANY */ + -3, /* (299) vtabargtoken ::= lp anylist RP */ + -1, /* (300) lp ::= LP */ + -2, /* (301) with ::= WITH wqlist */ + -3, /* (302) with ::= WITH RECURSIVE wqlist */ + -1, /* (303) wqas ::= AS */ + -2, /* (304) wqas ::= AS MATERIALIZED */ + -3, /* (305) wqas ::= AS NOT MATERIALIZED */ + -6, /* (306) wqitem ::= nm eidlist_opt wqas LP select RP */ + -1, /* (307) wqlist ::= wqitem */ + -3, /* (308) wqlist ::= wqlist COMMA wqitem */ + -1, /* (309) windowdefn_list ::= windowdefn */ + -3, /* (310) windowdefn_list ::= windowdefn_list COMMA windowdefn */ + -5, /* (311) windowdefn ::= nm AS LP window RP */ + -5, /* (312) window ::= PARTITION BY nexprlist orderby_opt frame_opt */ + -6, /* (313) window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */ + -4, /* (314) window ::= ORDER BY sortlist frame_opt */ + -5, /* (315) window ::= nm ORDER BY sortlist frame_opt */ + -1, /* (316) window ::= frame_opt */ + -2, /* (317) window ::= nm frame_opt */ + 0, /* (318) frame_opt ::= */ + -3, /* (319) frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */ + -6, /* (320) frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */ + -1, /* (321) range_or_rows ::= RANGE|ROWS|GROUPS */ + -1, /* (322) frame_bound_s ::= frame_bound */ + -2, /* (323) frame_bound_s ::= UNBOUNDED PRECEDING */ + -1, /* (324) frame_bound_e ::= frame_bound */ + -2, /* (325) frame_bound_e ::= UNBOUNDED FOLLOWING */ + -2, /* (326) frame_bound ::= expr PRECEDING|FOLLOWING */ + -2, /* (327) frame_bound ::= CURRENT ROW */ + 0, /* (328) frame_exclude_opt ::= */ + -2, /* (329) frame_exclude_opt ::= EXCLUDE frame_exclude */ + -2, /* (330) frame_exclude ::= NO OTHERS */ + -2, /* (331) frame_exclude ::= CURRENT ROW */ + -1, /* (332) frame_exclude ::= GROUP|TIES */ + -2, /* (333) window_clause ::= WINDOW windowdefn_list */ + -2, /* (334) filter_over ::= filter_clause over_clause */ + -1, /* (335) filter_over ::= over_clause */ + -1, /* (336) filter_over ::= filter_clause */ + -4, /* (337) over_clause ::= OVER LP window RP */ + -2, /* (338) over_clause ::= OVER nm */ + -5, /* (339) filter_clause ::= FILTER LP WHERE expr RP */ + -1, /* (340) input ::= cmdlist */ + -2, /* (341) cmdlist ::= cmdlist ecmd */ + -1, /* (342) cmdlist ::= ecmd */ + -1, /* (343) ecmd ::= SEMI */ + -2, /* (344) ecmd ::= cmdx SEMI */ + -3, /* (345) ecmd ::= explain cmdx SEMI */ + 0, /* (346) trans_opt ::= */ + -1, /* (347) trans_opt ::= TRANSACTION */ + -2, /* (348) trans_opt ::= TRANSACTION nm */ + -1, /* (349) savepoint_opt ::= SAVEPOINT */ + 0, /* (350) savepoint_opt ::= */ + -2, /* (351) cmd ::= create_table create_table_args */ + -1, /* (352) table_option_set ::= table_option */ + -4, /* (353) columnlist ::= columnlist COMMA columnname carglist */ + -2, /* (354) columnlist ::= columnname carglist */ + -1, /* (355) nm ::= ID|INDEXED */ + -1, /* (356) nm ::= STRING */ + -1, /* (357) nm ::= JOIN_KW */ + -1, /* (358) typetoken ::= typename */ + -1, /* (359) typename ::= ID|STRING */ + -1, /* (360) signed ::= plus_num */ + -1, /* (361) signed ::= minus_num */ + -2, /* (362) carglist ::= carglist ccons */ + 0, /* (363) carglist ::= */ + -2, /* (364) ccons ::= NULL onconf */ + -4, /* (365) ccons ::= GENERATED ALWAYS AS generated */ + -2, /* (366) ccons ::= AS generated */ + -2, /* (367) conslist_opt ::= COMMA conslist */ + -3, /* (368) conslist ::= conslist tconscomma tcons */ + -1, /* (369) conslist ::= tcons */ + 0, /* (370) tconscomma ::= */ + -1, /* (371) defer_subclause_opt ::= defer_subclause */ + -1, /* (372) resolvetype ::= raisetype */ + -1, /* (373) selectnowith ::= oneselect */ + -1, /* (374) oneselect ::= values */ + -2, /* (375) sclp ::= selcollist COMMA */ + -1, /* (376) as ::= ID|STRING */ + 0, /* (377) returning ::= */ + -1, /* (378) expr ::= term */ + -1, /* (379) likeop ::= LIKE_KW|MATCH */ + -1, /* (380) exprlist ::= nexprlist */ + -1, /* (381) nmnum ::= plus_num */ + -1, /* (382) nmnum ::= nm */ + -1, /* (383) nmnum ::= ON */ + -1, /* (384) nmnum ::= DELETE */ + -1, /* (385) nmnum ::= DEFAULT */ + -1, /* (386) plus_num ::= INTEGER|FLOAT */ + 0, /* (387) foreach_clause ::= */ + -3, /* (388) foreach_clause ::= FOR EACH ROW */ + -1, /* (389) trnm ::= nm */ + 0, /* (390) tridxby ::= */ + -1, /* (391) database_kw_opt ::= DATABASE */ + 0, /* (392) database_kw_opt ::= */ + 0, /* (393) kwcolumn_opt ::= */ + -1, /* (394) kwcolumn_opt ::= COLUMNKW */ + -1, /* (395) vtabarglist ::= vtabarg */ + -3, /* (396) vtabarglist ::= vtabarglist COMMA vtabarg */ + -2, /* (397) vtabarg ::= vtabarg vtabargtoken */ + 0, /* (398) anylist ::= */ + -4, /* (399) anylist ::= anylist LP anylist RP */ + -2, /* (400) anylist ::= anylist ANY */ + 0, /* (401) with ::= */ +}; + +static void yy_accept(yyParser*); /* Forward Declaration */ + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +** +** The yyLookahead and yyLookaheadToken parameters provide reduce actions +** access to the lookahead token (if any). The yyLookahead will be YYNOCODE +** if the lookahead token has already been consumed. As this procedure is +** only called from one place, optimizing compilers will in-line it, which +** means that the extra parameters have no performance impact. +*/ +static YYACTIONTYPE yy_reduce( + yyParser *yypParser, /* The parser */ + unsigned int yyruleno, /* Number of the rule by which to reduce */ + int yyLookahead, /* Lookahead token, or YYNOCODE if none */ + sqlite3ParserTOKENTYPE yyLookaheadToken /* Value of the lookahead token */ + sqlite3ParserCTX_PDECL /* %extra_context */ +){ + int yygoto; /* The next state */ + YYACTIONTYPE yyact; /* The next action */ + yyStackEntry *yymsp; /* The top of the parser's stack */ + int yysize; /* Amount to pop the stack */ + sqlite3ParserARG_FETCH + (void)yyLookahead; + (void)yyLookaheadToken; + yymsp = yypParser->yytos; + + switch( yyruleno ){ + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ +/********** Begin reduce actions **********************************************/ + YYMINORTYPE yylhsminor; + case 0: /* explain ::= EXPLAIN */ +{ pParse->explain = 1; } + break; + case 1: /* explain ::= EXPLAIN QUERY PLAN */ +{ pParse->explain = 2; } + break; + case 2: /* cmdx ::= cmd */ +{ sqlite3FinishCoding(pParse); } + break; + case 3: /* cmd ::= BEGIN transtype trans_opt */ +{sqlite3BeginTransaction(pParse, yymsp[-1].minor.yy394);} + break; + case 4: /* transtype ::= */ +{yymsp[1].minor.yy394 = TK_DEFERRED;} + break; + case 5: /* transtype ::= DEFERRED */ + case 6: /* transtype ::= IMMEDIATE */ yytestcase(yyruleno==6); + case 7: /* transtype ::= EXCLUSIVE */ yytestcase(yyruleno==7); + case 321: /* range_or_rows ::= RANGE|ROWS|GROUPS */ yytestcase(yyruleno==321); +{yymsp[0].minor.yy394 = yymsp[0].major; /*A-overwrites-X*/} + break; + case 8: /* cmd ::= COMMIT|END trans_opt */ + case 9: /* cmd ::= ROLLBACK trans_opt */ yytestcase(yyruleno==9); +{sqlite3EndTransaction(pParse,yymsp[-1].major);} + break; + case 10: /* cmd ::= SAVEPOINT nm */ +{ + sqlite3Savepoint(pParse, SAVEPOINT_BEGIN, &yymsp[0].minor.yy0); +} + break; + case 11: /* cmd ::= RELEASE savepoint_opt nm */ +{ + sqlite3Savepoint(pParse, SAVEPOINT_RELEASE, &yymsp[0].minor.yy0); +} + break; + case 12: /* cmd ::= ROLLBACK trans_opt TO savepoint_opt nm */ +{ + sqlite3Savepoint(pParse, SAVEPOINT_ROLLBACK, &yymsp[0].minor.yy0); +} + break; + case 13: /* create_table ::= createkw temp TABLE ifnotexists nm dbnm */ +{ + sqlite3StartTable(pParse,&yymsp[-1].minor.yy0,&yymsp[0].minor.yy0,yymsp[-4].minor.yy394,0,0,yymsp[-2].minor.yy394); +} + break; + case 14: /* createkw ::= CREATE */ +{disableLookaside(pParse);} + break; + case 15: /* ifnotexists ::= */ + case 18: /* temp ::= */ yytestcase(yyruleno==18); + case 47: /* autoinc ::= */ yytestcase(yyruleno==47); + case 62: /* init_deferred_pred_opt ::= */ yytestcase(yyruleno==62); + case 72: /* defer_subclause_opt ::= */ yytestcase(yyruleno==72); + case 81: /* ifexists ::= */ yytestcase(yyruleno==81); + case 98: /* distinct ::= */ yytestcase(yyruleno==98); + case 242: /* collate ::= */ yytestcase(yyruleno==242); +{yymsp[1].minor.yy394 = 0;} + break; + case 16: /* ifnotexists ::= IF NOT EXISTS */ +{yymsp[-2].minor.yy394 = 1;} + break; + case 17: /* temp ::= TEMP */ +{yymsp[0].minor.yy394 = pParse->db->init.busy==0;} + break; + case 19: /* create_table_args ::= LP columnlist conslist_opt RP table_option_set */ +{ + sqlite3EndTable(pParse,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0,yymsp[0].minor.yy285,0); +} + break; + case 20: /* create_table_args ::= AS select */ +{ + sqlite3EndTable(pParse,0,0,0,yymsp[0].minor.yy47); + sqlite3SelectDelete(pParse->db, yymsp[0].minor.yy47); +} + break; + case 21: /* table_option_set ::= */ +{yymsp[1].minor.yy285 = 0;} + break; + case 22: /* table_option_set ::= table_option_set COMMA table_option */ +{yylhsminor.yy285 = yymsp[-2].minor.yy285|yymsp[0].minor.yy285;} + yymsp[-2].minor.yy285 = yylhsminor.yy285; + break; + case 23: /* table_option ::= WITHOUT nm */ +{ + if( yymsp[0].minor.yy0.n==5 && sqlite3_strnicmp(yymsp[0].minor.yy0.z,"rowid",5)==0 ){ + yymsp[-1].minor.yy285 = TF_WithoutRowid | TF_NoVisibleRowid; + }else{ + yymsp[-1].minor.yy285 = 0; + sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z); + } +} + break; + case 24: /* table_option ::= nm */ +{ + if( yymsp[0].minor.yy0.n==6 && sqlite3_strnicmp(yymsp[0].minor.yy0.z,"strict",6)==0 ){ + yylhsminor.yy285 = TF_Strict; + }else{ + yylhsminor.yy285 = 0; + sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z); + } +} + yymsp[0].minor.yy285 = yylhsminor.yy285; + break; + case 25: /* columnname ::= nm typetoken */ +{sqlite3AddColumn(pParse,yymsp[-1].minor.yy0,yymsp[0].minor.yy0);} + break; + case 26: /* typetoken ::= */ + case 65: /* conslist_opt ::= */ yytestcase(yyruleno==65); + case 104: /* as ::= */ yytestcase(yyruleno==104); +{yymsp[1].minor.yy0.n = 0; yymsp[1].minor.yy0.z = 0;} + break; + case 27: /* typetoken ::= typename LP signed RP */ +{ + yymsp[-3].minor.yy0.n = (int)(&yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n] - yymsp[-3].minor.yy0.z); +} + break; + case 28: /* typetoken ::= typename LP signed COMMA signed RP */ +{ + yymsp[-5].minor.yy0.n = (int)(&yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n] - yymsp[-5].minor.yy0.z); +} + break; + case 29: /* typename ::= typename ID|STRING */ +{yymsp[-1].minor.yy0.n=yymsp[0].minor.yy0.n+(int)(yymsp[0].minor.yy0.z-yymsp[-1].minor.yy0.z);} + break; + case 30: /* scanpt ::= */ +{ + assert( yyLookahead!=YYNOCODE ); + yymsp[1].minor.yy522 = yyLookaheadToken.z; +} + break; + case 31: /* scantok ::= */ +{ + assert( yyLookahead!=YYNOCODE ); + yymsp[1].minor.yy0 = yyLookaheadToken; +} + break; + case 32: /* ccons ::= CONSTRAINT nm */ + case 67: /* tcons ::= CONSTRAINT nm */ yytestcase(yyruleno==67); +{pParse->constraintName = yymsp[0].minor.yy0;} + break; + case 33: /* ccons ::= DEFAULT scantok term */ +{sqlite3AddDefaultValue(pParse,yymsp[0].minor.yy528,yymsp[-1].minor.yy0.z,&yymsp[-1].minor.yy0.z[yymsp[-1].minor.yy0.n]);} + break; + case 34: /* ccons ::= DEFAULT LP expr RP */ +{sqlite3AddDefaultValue(pParse,yymsp[-1].minor.yy528,yymsp[-2].minor.yy0.z+1,yymsp[0].minor.yy0.z);} + break; + case 35: /* ccons ::= DEFAULT PLUS scantok term */ +{sqlite3AddDefaultValue(pParse,yymsp[0].minor.yy528,yymsp[-2].minor.yy0.z,&yymsp[-1].minor.yy0.z[yymsp[-1].minor.yy0.n]);} + break; + case 36: /* ccons ::= DEFAULT MINUS scantok term */ +{ + Expr *p = sqlite3PExpr(pParse, TK_UMINUS, yymsp[0].minor.yy528, 0); + sqlite3AddDefaultValue(pParse,p,yymsp[-2].minor.yy0.z,&yymsp[-1].minor.yy0.z[yymsp[-1].minor.yy0.n]); +} + break; + case 37: /* ccons ::= DEFAULT scantok ID|INDEXED */ +{ + Expr *p = tokenExpr(pParse, TK_STRING, yymsp[0].minor.yy0); + if( p ){ + sqlite3ExprIdToTrueFalse(p); + testcase( p->op==TK_TRUEFALSE && sqlite3ExprTruthValue(p) ); + } + sqlite3AddDefaultValue(pParse,p,yymsp[0].minor.yy0.z,yymsp[0].minor.yy0.z+yymsp[0].minor.yy0.n); +} + break; + case 38: /* ccons ::= NOT NULL onconf */ +{sqlite3AddNotNull(pParse, yymsp[0].minor.yy394);} + break; + case 39: /* ccons ::= PRIMARY KEY sortorder onconf autoinc */ +{sqlite3AddPrimaryKey(pParse,0,yymsp[-1].minor.yy394,yymsp[0].minor.yy394,yymsp[-2].minor.yy394);} + break; + case 40: /* ccons ::= UNIQUE onconf */ +{sqlite3CreateIndex(pParse,0,0,0,0,yymsp[0].minor.yy394,0,0,0,0, + SQLITE_IDXTYPE_UNIQUE);} + break; + case 41: /* ccons ::= CHECK LP expr RP */ +{sqlite3AddCheckConstraint(pParse,yymsp[-1].minor.yy528,yymsp[-2].minor.yy0.z,yymsp[0].minor.yy0.z);} + break; + case 42: /* ccons ::= REFERENCES nm eidlist_opt refargs */ +{sqlite3CreateForeignKey(pParse,0,&yymsp[-2].minor.yy0,yymsp[-1].minor.yy322,yymsp[0].minor.yy394);} + break; + case 43: /* ccons ::= defer_subclause */ +{sqlite3DeferForeignKey(pParse,yymsp[0].minor.yy394);} + break; + case 44: /* ccons ::= COLLATE ID|STRING */ +{sqlite3AddCollateType(pParse, &yymsp[0].minor.yy0);} + break; + case 45: /* generated ::= LP expr RP */ +{sqlite3AddGenerated(pParse,yymsp[-1].minor.yy528,0);} + break; + case 46: /* generated ::= LP expr RP ID */ +{sqlite3AddGenerated(pParse,yymsp[-2].minor.yy528,&yymsp[0].minor.yy0);} + break; + case 48: /* autoinc ::= AUTOINCR */ +{yymsp[0].minor.yy394 = 1;} + break; + case 49: /* refargs ::= */ +{ yymsp[1].minor.yy394 = OE_None*0x0101; /* EV: R-19803-45884 */} + break; + case 50: /* refargs ::= refargs refarg */ +{ yymsp[-1].minor.yy394 = (yymsp[-1].minor.yy394 & ~yymsp[0].minor.yy231.mask) | yymsp[0].minor.yy231.value; } + break; + case 51: /* refarg ::= MATCH nm */ +{ yymsp[-1].minor.yy231.value = 0; yymsp[-1].minor.yy231.mask = 0x000000; } + break; + case 52: /* refarg ::= ON INSERT refact */ +{ yymsp[-2].minor.yy231.value = 0; yymsp[-2].minor.yy231.mask = 0x000000; } + break; + case 53: /* refarg ::= ON DELETE refact */ +{ yymsp[-2].minor.yy231.value = yymsp[0].minor.yy394; yymsp[-2].minor.yy231.mask = 0x0000ff; } + break; + case 54: /* refarg ::= ON UPDATE refact */ +{ yymsp[-2].minor.yy231.value = yymsp[0].minor.yy394<<8; yymsp[-2].minor.yy231.mask = 0x00ff00; } + break; + case 55: /* refact ::= SET NULL */ +{ yymsp[-1].minor.yy394 = OE_SetNull; /* EV: R-33326-45252 */} + break; + case 56: /* refact ::= SET DEFAULT */ +{ yymsp[-1].minor.yy394 = OE_SetDflt; /* EV: R-33326-45252 */} + break; + case 57: /* refact ::= CASCADE */ +{ yymsp[0].minor.yy394 = OE_Cascade; /* EV: R-33326-45252 */} + break; + case 58: /* refact ::= RESTRICT */ +{ yymsp[0].minor.yy394 = OE_Restrict; /* EV: R-33326-45252 */} + break; + case 59: /* refact ::= NO ACTION */ +{ yymsp[-1].minor.yy394 = OE_None; /* EV: R-33326-45252 */} + break; + case 60: /* defer_subclause ::= NOT DEFERRABLE init_deferred_pred_opt */ +{yymsp[-2].minor.yy394 = 0;} + break; + case 61: /* defer_subclause ::= DEFERRABLE init_deferred_pred_opt */ + case 76: /* orconf ::= OR resolvetype */ yytestcase(yyruleno==76); + case 171: /* insert_cmd ::= INSERT orconf */ yytestcase(yyruleno==171); +{yymsp[-1].minor.yy394 = yymsp[0].minor.yy394;} + break; + case 63: /* init_deferred_pred_opt ::= INITIALLY DEFERRED */ + case 80: /* ifexists ::= IF EXISTS */ yytestcase(yyruleno==80); + case 214: /* between_op ::= NOT BETWEEN */ yytestcase(yyruleno==214); + case 217: /* in_op ::= NOT IN */ yytestcase(yyruleno==217); + case 243: /* collate ::= COLLATE ID|STRING */ yytestcase(yyruleno==243); +{yymsp[-1].minor.yy394 = 1;} + break; + case 64: /* init_deferred_pred_opt ::= INITIALLY IMMEDIATE */ +{yymsp[-1].minor.yy394 = 0;} + break; + case 66: /* tconscomma ::= COMMA */ +{pParse->constraintName.n = 0;} + break; + case 68: /* tcons ::= PRIMARY KEY LP sortlist autoinc RP onconf */ +{sqlite3AddPrimaryKey(pParse,yymsp[-3].minor.yy322,yymsp[0].minor.yy394,yymsp[-2].minor.yy394,0);} + break; + case 69: /* tcons ::= UNIQUE LP sortlist RP onconf */ +{sqlite3CreateIndex(pParse,0,0,0,yymsp[-2].minor.yy322,yymsp[0].minor.yy394,0,0,0,0, + SQLITE_IDXTYPE_UNIQUE);} + break; + case 70: /* tcons ::= CHECK LP expr RP onconf */ +{sqlite3AddCheckConstraint(pParse,yymsp[-2].minor.yy528,yymsp[-3].minor.yy0.z,yymsp[-1].minor.yy0.z);} + break; + case 71: /* tcons ::= FOREIGN KEY LP eidlist RP REFERENCES nm eidlist_opt refargs defer_subclause_opt */ +{ + sqlite3CreateForeignKey(pParse, yymsp[-6].minor.yy322, &yymsp[-3].minor.yy0, yymsp[-2].minor.yy322, yymsp[-1].minor.yy394); + sqlite3DeferForeignKey(pParse, yymsp[0].minor.yy394); +} + break; + case 73: /* onconf ::= */ + case 75: /* orconf ::= */ yytestcase(yyruleno==75); +{yymsp[1].minor.yy394 = OE_Default;} + break; + case 74: /* onconf ::= ON CONFLICT resolvetype */ +{yymsp[-2].minor.yy394 = yymsp[0].minor.yy394;} + break; + case 77: /* resolvetype ::= IGNORE */ +{yymsp[0].minor.yy394 = OE_Ignore;} + break; + case 78: /* resolvetype ::= REPLACE */ + case 172: /* insert_cmd ::= REPLACE */ yytestcase(yyruleno==172); +{yymsp[0].minor.yy394 = OE_Replace;} + break; + case 79: /* cmd ::= DROP TABLE ifexists fullname */ +{ + sqlite3DropTable(pParse, yymsp[0].minor.yy131, 0, yymsp[-1].minor.yy394); +} + break; + case 82: /* cmd ::= createkw temp VIEW ifnotexists nm dbnm eidlist_opt AS select */ +{ + sqlite3CreateView(pParse, &yymsp[-8].minor.yy0, &yymsp[-4].minor.yy0, &yymsp[-3].minor.yy0, yymsp[-2].minor.yy322, yymsp[0].minor.yy47, yymsp[-7].minor.yy394, yymsp[-5].minor.yy394); +} + break; + case 83: /* cmd ::= DROP VIEW ifexists fullname */ +{ + sqlite3DropTable(pParse, yymsp[0].minor.yy131, 1, yymsp[-1].minor.yy394); +} + break; + case 84: /* cmd ::= select */ +{ + SelectDest dest = {SRT_Output, 0, 0, 0, 0, 0, 0}; + sqlite3Select(pParse, yymsp[0].minor.yy47, &dest); + sqlite3SelectDelete(pParse->db, yymsp[0].minor.yy47); +} + break; + case 85: /* select ::= WITH wqlist selectnowith */ +{yymsp[-2].minor.yy47 = attachWithToSelect(pParse,yymsp[0].minor.yy47,yymsp[-1].minor.yy521);} + break; + case 86: /* select ::= WITH RECURSIVE wqlist selectnowith */ +{yymsp[-3].minor.yy47 = attachWithToSelect(pParse,yymsp[0].minor.yy47,yymsp[-1].minor.yy521);} + break; + case 87: /* select ::= selectnowith */ +{ + Select *p = yymsp[0].minor.yy47; + if( p ){ + parserDoubleLinkSelect(pParse, p); + } + yymsp[0].minor.yy47 = p; /*A-overwrites-X*/ +} + break; + case 88: /* selectnowith ::= selectnowith multiselect_op oneselect */ +{ + Select *pRhs = yymsp[0].minor.yy47; + Select *pLhs = yymsp[-2].minor.yy47; + if( pRhs && pRhs->pPrior ){ + SrcList *pFrom; + Token x; + x.n = 0; + parserDoubleLinkSelect(pParse, pRhs); + pFrom = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&x,pRhs,0,0); + pRhs = sqlite3SelectNew(pParse,0,pFrom,0,0,0,0,0,0); + } + if( pRhs ){ + pRhs->op = (u8)yymsp[-1].minor.yy394; + pRhs->pPrior = pLhs; + if( ALWAYS(pLhs) ) pLhs->selFlags &= ~SF_MultiValue; + pRhs->selFlags &= ~SF_MultiValue; + if( yymsp[-1].minor.yy394!=TK_ALL ) pParse->hasCompound = 1; + }else{ + sqlite3SelectDelete(pParse->db, pLhs); + } + yymsp[-2].minor.yy47 = pRhs; +} + break; + case 89: /* multiselect_op ::= UNION */ + case 91: /* multiselect_op ::= EXCEPT|INTERSECT */ yytestcase(yyruleno==91); +{yymsp[0].minor.yy394 = yymsp[0].major; /*A-overwrites-OP*/} + break; + case 90: /* multiselect_op ::= UNION ALL */ +{yymsp[-1].minor.yy394 = TK_ALL;} + break; + case 92: /* oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt orderby_opt limit_opt */ +{ + yymsp[-8].minor.yy47 = sqlite3SelectNew(pParse,yymsp[-6].minor.yy322,yymsp[-5].minor.yy131,yymsp[-4].minor.yy528,yymsp[-3].minor.yy322,yymsp[-2].minor.yy528,yymsp[-1].minor.yy322,yymsp[-7].minor.yy394,yymsp[0].minor.yy528); +} + break; + case 93: /* oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt window_clause orderby_opt limit_opt */ +{ + yymsp[-9].minor.yy47 = sqlite3SelectNew(pParse,yymsp[-7].minor.yy322,yymsp[-6].minor.yy131,yymsp[-5].minor.yy528,yymsp[-4].minor.yy322,yymsp[-3].minor.yy528,yymsp[-1].minor.yy322,yymsp[-8].minor.yy394,yymsp[0].minor.yy528); + if( yymsp[-9].minor.yy47 ){ + yymsp[-9].minor.yy47->pWinDefn = yymsp[-2].minor.yy41; + }else{ + sqlite3WindowListDelete(pParse->db, yymsp[-2].minor.yy41); + } +} + break; + case 94: /* values ::= VALUES LP nexprlist RP */ +{ + yymsp[-3].minor.yy47 = sqlite3SelectNew(pParse,yymsp[-1].minor.yy322,0,0,0,0,0,SF_Values,0); +} + break; + case 95: /* values ::= values COMMA LP nexprlist RP */ +{ + Select *pRight, *pLeft = yymsp[-4].minor.yy47; + pRight = sqlite3SelectNew(pParse,yymsp[-1].minor.yy322,0,0,0,0,0,SF_Values|SF_MultiValue,0); + if( ALWAYS(pLeft) ) pLeft->selFlags &= ~SF_MultiValue; + if( pRight ){ + pRight->op = TK_ALL; + pRight->pPrior = pLeft; + yymsp[-4].minor.yy47 = pRight; + }else{ + yymsp[-4].minor.yy47 = pLeft; + } +} + break; + case 96: /* distinct ::= DISTINCT */ +{yymsp[0].minor.yy394 = SF_Distinct;} + break; + case 97: /* distinct ::= ALL */ +{yymsp[0].minor.yy394 = SF_All;} + break; + case 99: /* sclp ::= */ + case 132: /* orderby_opt ::= */ yytestcase(yyruleno==132); + case 142: /* groupby_opt ::= */ yytestcase(yyruleno==142); + case 230: /* exprlist ::= */ yytestcase(yyruleno==230); + case 233: /* paren_exprlist ::= */ yytestcase(yyruleno==233); + case 238: /* eidlist_opt ::= */ yytestcase(yyruleno==238); +{yymsp[1].minor.yy322 = 0;} + break; + case 100: /* selcollist ::= sclp scanpt expr scanpt as */ +{ + yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse, yymsp[-4].minor.yy322, yymsp[-2].minor.yy528); + if( yymsp[0].minor.yy0.n>0 ) sqlite3ExprListSetName(pParse, yymsp[-4].minor.yy322, &yymsp[0].minor.yy0, 1); + sqlite3ExprListSetSpan(pParse,yymsp[-4].minor.yy322,yymsp[-3].minor.yy522,yymsp[-1].minor.yy522); +} + break; + case 101: /* selcollist ::= sclp scanpt STAR */ +{ + Expr *p = sqlite3Expr(pParse->db, TK_ASTERISK, 0); + yymsp[-2].minor.yy322 = sqlite3ExprListAppend(pParse, yymsp[-2].minor.yy322, p); +} + break; + case 102: /* selcollist ::= sclp scanpt nm DOT STAR */ +{ + Expr *pRight = sqlite3PExpr(pParse, TK_ASTERISK, 0, 0); + Expr *pLeft = tokenExpr(pParse, TK_ID, yymsp[-2].minor.yy0); + Expr *pDot = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight); + yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322, pDot); +} + break; + case 103: /* as ::= AS nm */ + case 114: /* dbnm ::= DOT nm */ yytestcase(yyruleno==114); + case 254: /* plus_num ::= PLUS INTEGER|FLOAT */ yytestcase(yyruleno==254); + case 255: /* minus_num ::= MINUS INTEGER|FLOAT */ yytestcase(yyruleno==255); +{yymsp[-1].minor.yy0 = yymsp[0].minor.yy0;} + break; + case 105: /* from ::= */ + case 108: /* stl_prefix ::= */ yytestcase(yyruleno==108); +{yymsp[1].minor.yy131 = 0;} + break; + case 106: /* from ::= FROM seltablist */ +{ + yymsp[-1].minor.yy131 = yymsp[0].minor.yy131; + sqlite3SrcListShiftJoinType(yymsp[-1].minor.yy131); +} + break; + case 107: /* stl_prefix ::= seltablist joinop */ +{ + if( ALWAYS(yymsp[-1].minor.yy131 && yymsp[-1].minor.yy131->nSrc>0) ) yymsp[-1].minor.yy131->a[yymsp[-1].minor.yy131->nSrc-1].fg.jointype = (u8)yymsp[0].minor.yy394; +} + break; + case 109: /* seltablist ::= stl_prefix nm dbnm as indexed_opt on_opt using_opt */ +{ + yymsp[-6].minor.yy131 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy131,&yymsp[-5].minor.yy0,&yymsp[-4].minor.yy0,&yymsp[-3].minor.yy0,0,yymsp[-1].minor.yy528,yymsp[0].minor.yy254); + sqlite3SrcListIndexedBy(pParse, yymsp[-6].minor.yy131, &yymsp[-2].minor.yy0); +} + break; + case 110: /* seltablist ::= stl_prefix nm dbnm LP exprlist RP as on_opt using_opt */ +{ + yymsp[-8].minor.yy131 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-8].minor.yy131,&yymsp[-7].minor.yy0,&yymsp[-6].minor.yy0,&yymsp[-2].minor.yy0,0,yymsp[-1].minor.yy528,yymsp[0].minor.yy254); + sqlite3SrcListFuncArgs(pParse, yymsp[-8].minor.yy131, yymsp[-4].minor.yy322); +} + break; + case 111: /* seltablist ::= stl_prefix LP select RP as on_opt using_opt */ +{ + yymsp[-6].minor.yy131 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy131,0,0,&yymsp[-2].minor.yy0,yymsp[-4].minor.yy47,yymsp[-1].minor.yy528,yymsp[0].minor.yy254); + } + break; + case 112: /* seltablist ::= stl_prefix LP seltablist RP as on_opt using_opt */ +{ + if( yymsp[-6].minor.yy131==0 && yymsp[-2].minor.yy0.n==0 && yymsp[-1].minor.yy528==0 && yymsp[0].minor.yy254==0 ){ + yymsp[-6].minor.yy131 = yymsp[-4].minor.yy131; + }else if( yymsp[-4].minor.yy131->nSrc==1 ){ + yymsp[-6].minor.yy131 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy131,0,0,&yymsp[-2].minor.yy0,0,yymsp[-1].minor.yy528,yymsp[0].minor.yy254); + if( yymsp[-6].minor.yy131 ){ + SrcItem *pNew = &yymsp[-6].minor.yy131->a[yymsp[-6].minor.yy131->nSrc-1]; + SrcItem *pOld = yymsp[-4].minor.yy131->a; + pNew->zName = pOld->zName; + pNew->zDatabase = pOld->zDatabase; + pNew->pSelect = pOld->pSelect; + if( pOld->fg.isTabFunc ){ + pNew->u1.pFuncArg = pOld->u1.pFuncArg; + pOld->u1.pFuncArg = 0; + pOld->fg.isTabFunc = 0; + pNew->fg.isTabFunc = 1; + } + pOld->zName = pOld->zDatabase = 0; + pOld->pSelect = 0; + } + sqlite3SrcListDelete(pParse->db, yymsp[-4].minor.yy131); + }else{ + Select *pSubquery; + sqlite3SrcListShiftJoinType(yymsp[-4].minor.yy131); + pSubquery = sqlite3SelectNew(pParse,0,yymsp[-4].minor.yy131,0,0,0,0,SF_NestedFrom,0); + yymsp[-6].minor.yy131 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy131,0,0,&yymsp[-2].minor.yy0,pSubquery,yymsp[-1].minor.yy528,yymsp[0].minor.yy254); + } + } + break; + case 113: /* dbnm ::= */ + case 127: /* indexed_opt ::= */ yytestcase(yyruleno==127); +{yymsp[1].minor.yy0.z=0; yymsp[1].minor.yy0.n=0;} + break; + case 115: /* fullname ::= nm */ +{ + yylhsminor.yy131 = sqlite3SrcListAppend(pParse,0,&yymsp[0].minor.yy0,0); + if( IN_RENAME_OBJECT && yylhsminor.yy131 ) sqlite3RenameTokenMap(pParse, yylhsminor.yy131->a[0].zName, &yymsp[0].minor.yy0); +} + yymsp[0].minor.yy131 = yylhsminor.yy131; + break; + case 116: /* fullname ::= nm DOT nm */ +{ + yylhsminor.yy131 = sqlite3SrcListAppend(pParse,0,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0); + if( IN_RENAME_OBJECT && yylhsminor.yy131 ) sqlite3RenameTokenMap(pParse, yylhsminor.yy131->a[0].zName, &yymsp[0].minor.yy0); +} + yymsp[-2].minor.yy131 = yylhsminor.yy131; + break; + case 117: /* xfullname ::= nm */ +{yymsp[0].minor.yy131 = sqlite3SrcListAppend(pParse,0,&yymsp[0].minor.yy0,0); /*A-overwrites-X*/} + break; + case 118: /* xfullname ::= nm DOT nm */ +{yymsp[-2].minor.yy131 = sqlite3SrcListAppend(pParse,0,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0); /*A-overwrites-X*/} + break; + case 119: /* xfullname ::= nm DOT nm AS nm */ +{ + yymsp[-4].minor.yy131 = sqlite3SrcListAppend(pParse,0,&yymsp[-4].minor.yy0,&yymsp[-2].minor.yy0); /*A-overwrites-X*/ + if( yymsp[-4].minor.yy131 ) yymsp[-4].minor.yy131->a[0].zAlias = sqlite3NameFromToken(pParse->db, &yymsp[0].minor.yy0); +} + break; + case 120: /* xfullname ::= nm AS nm */ +{ + yymsp[-2].minor.yy131 = sqlite3SrcListAppend(pParse,0,&yymsp[-2].minor.yy0,0); /*A-overwrites-X*/ + if( yymsp[-2].minor.yy131 ) yymsp[-2].minor.yy131->a[0].zAlias = sqlite3NameFromToken(pParse->db, &yymsp[0].minor.yy0); +} + break; + case 121: /* joinop ::= COMMA|JOIN */ +{ yymsp[0].minor.yy394 = JT_INNER; } + break; + case 122: /* joinop ::= JOIN_KW JOIN */ +{yymsp[-1].minor.yy394 = sqlite3JoinType(pParse,&yymsp[-1].minor.yy0,0,0); /*X-overwrites-A*/} + break; + case 123: /* joinop ::= JOIN_KW nm JOIN */ +{yymsp[-2].minor.yy394 = sqlite3JoinType(pParse,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0,0); /*X-overwrites-A*/} + break; + case 124: /* joinop ::= JOIN_KW nm nm JOIN */ +{yymsp[-3].minor.yy394 = sqlite3JoinType(pParse,&yymsp[-3].minor.yy0,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0);/*X-overwrites-A*/} + break; + case 125: /* on_opt ::= ON expr */ + case 145: /* having_opt ::= HAVING expr */ yytestcase(yyruleno==145); + case 152: /* where_opt ::= WHERE expr */ yytestcase(yyruleno==152); + case 154: /* where_opt_ret ::= WHERE expr */ yytestcase(yyruleno==154); + case 226: /* case_else ::= ELSE expr */ yytestcase(yyruleno==226); + case 247: /* vinto ::= INTO expr */ yytestcase(yyruleno==247); +{yymsp[-1].minor.yy528 = yymsp[0].minor.yy528;} + break; + case 126: /* on_opt ::= */ + case 144: /* having_opt ::= */ yytestcase(yyruleno==144); + case 146: /* limit_opt ::= */ yytestcase(yyruleno==146); + case 151: /* where_opt ::= */ yytestcase(yyruleno==151); + case 153: /* where_opt_ret ::= */ yytestcase(yyruleno==153); + case 227: /* case_else ::= */ yytestcase(yyruleno==227); + case 229: /* case_operand ::= */ yytestcase(yyruleno==229); + case 248: /* vinto ::= */ yytestcase(yyruleno==248); +{yymsp[1].minor.yy528 = 0;} + break; + case 128: /* indexed_opt ::= INDEXED BY nm */ +{yymsp[-2].minor.yy0 = yymsp[0].minor.yy0;} + break; + case 129: /* indexed_opt ::= NOT INDEXED */ +{yymsp[-1].minor.yy0.z=0; yymsp[-1].minor.yy0.n=1;} + break; + case 130: /* using_opt ::= USING LP idlist RP */ +{yymsp[-3].minor.yy254 = yymsp[-1].minor.yy254;} + break; + case 131: /* using_opt ::= */ + case 173: /* idlist_opt ::= */ yytestcase(yyruleno==173); +{yymsp[1].minor.yy254 = 0;} + break; + case 133: /* orderby_opt ::= ORDER BY sortlist */ + case 143: /* groupby_opt ::= GROUP BY nexprlist */ yytestcase(yyruleno==143); +{yymsp[-2].minor.yy322 = yymsp[0].minor.yy322;} + break; + case 134: /* sortlist ::= sortlist COMMA expr sortorder nulls */ +{ + yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322,yymsp[-2].minor.yy528); + sqlite3ExprListSetSortOrder(yymsp[-4].minor.yy322,yymsp[-1].minor.yy394,yymsp[0].minor.yy394); +} + break; + case 135: /* sortlist ::= expr sortorder nulls */ +{ + yymsp[-2].minor.yy322 = sqlite3ExprListAppend(pParse,0,yymsp[-2].minor.yy528); /*A-overwrites-Y*/ + sqlite3ExprListSetSortOrder(yymsp[-2].minor.yy322,yymsp[-1].minor.yy394,yymsp[0].minor.yy394); +} + break; + case 136: /* sortorder ::= ASC */ +{yymsp[0].minor.yy394 = SQLITE_SO_ASC;} + break; + case 137: /* sortorder ::= DESC */ +{yymsp[0].minor.yy394 = SQLITE_SO_DESC;} + break; + case 138: /* sortorder ::= */ + case 141: /* nulls ::= */ yytestcase(yyruleno==141); +{yymsp[1].minor.yy394 = SQLITE_SO_UNDEFINED;} + break; + case 139: /* nulls ::= NULLS FIRST */ +{yymsp[-1].minor.yy394 = SQLITE_SO_ASC;} + break; + case 140: /* nulls ::= NULLS LAST */ +{yymsp[-1].minor.yy394 = SQLITE_SO_DESC;} + break; + case 147: /* limit_opt ::= LIMIT expr */ +{yymsp[-1].minor.yy528 = sqlite3PExpr(pParse,TK_LIMIT,yymsp[0].minor.yy528,0);} + break; + case 148: /* limit_opt ::= LIMIT expr OFFSET expr */ +{yymsp[-3].minor.yy528 = sqlite3PExpr(pParse,TK_LIMIT,yymsp[-2].minor.yy528,yymsp[0].minor.yy528);} + break; + case 149: /* limit_opt ::= LIMIT expr COMMA expr */ +{yymsp[-3].minor.yy528 = sqlite3PExpr(pParse,TK_LIMIT,yymsp[0].minor.yy528,yymsp[-2].minor.yy528);} + break; + case 150: /* cmd ::= with DELETE FROM xfullname indexed_opt where_opt_ret orderby_opt limit_opt */ +{ + sqlite3SrcListIndexedBy(pParse, yymsp[-4].minor.yy131, &yymsp[-3].minor.yy0); +#ifndef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( yymsp[-1].minor.yy322 || yymsp[0].minor.yy528 ){ + updateDeleteLimitError(pParse,yymsp[-1].minor.yy322,yymsp[0].minor.yy528); + yymsp[-1].minor.yy322 = 0; + yymsp[0].minor.yy528 = 0; + } +#endif + sqlite3DeleteFrom(pParse,yymsp[-4].minor.yy131,yymsp[-2].minor.yy528,yymsp[-1].minor.yy322,yymsp[0].minor.yy528); +} + break; + case 155: /* where_opt_ret ::= RETURNING selcollist */ +{sqlite3AddReturning(pParse,yymsp[0].minor.yy322); yymsp[-1].minor.yy528 = 0;} + break; + case 156: /* where_opt_ret ::= WHERE expr RETURNING selcollist */ +{sqlite3AddReturning(pParse,yymsp[0].minor.yy322); yymsp[-3].minor.yy528 = yymsp[-2].minor.yy528;} + break; + case 157: /* cmd ::= with UPDATE orconf xfullname indexed_opt SET setlist from where_opt_ret orderby_opt limit_opt */ +{ + sqlite3SrcListIndexedBy(pParse, yymsp[-7].minor.yy131, &yymsp[-6].minor.yy0); + yymsp[-7].minor.yy131 = sqlite3SrcListAppendList(pParse, yymsp[-7].minor.yy131, yymsp[-3].minor.yy131); + sqlite3ExprListCheckLength(pParse,yymsp[-4].minor.yy322,"set list"); +#ifndef SQLITE_ENABLE_UPDATE_DELETE_LIMIT + if( yymsp[-1].minor.yy322 || yymsp[0].minor.yy528 ){ + updateDeleteLimitError(pParse,yymsp[-1].minor.yy322,yymsp[0].minor.yy528); + yymsp[-1].minor.yy322 = 0; + yymsp[0].minor.yy528 = 0; + } +#endif + sqlite3Update(pParse,yymsp[-7].minor.yy131,yymsp[-4].minor.yy322,yymsp[-2].minor.yy528,yymsp[-8].minor.yy394,yymsp[-1].minor.yy322,yymsp[0].minor.yy528,0); +} + break; + case 158: /* setlist ::= setlist COMMA nm EQ expr */ +{ + yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse, yymsp[-4].minor.yy322, yymsp[0].minor.yy528); + sqlite3ExprListSetName(pParse, yymsp[-4].minor.yy322, &yymsp[-2].minor.yy0, 1); +} + break; + case 159: /* setlist ::= setlist COMMA LP idlist RP EQ expr */ +{ + yymsp[-6].minor.yy322 = sqlite3ExprListAppendVector(pParse, yymsp[-6].minor.yy322, yymsp[-3].minor.yy254, yymsp[0].minor.yy528); +} + break; + case 160: /* setlist ::= nm EQ expr */ +{ + yylhsminor.yy322 = sqlite3ExprListAppend(pParse, 0, yymsp[0].minor.yy528); + sqlite3ExprListSetName(pParse, yylhsminor.yy322, &yymsp[-2].minor.yy0, 1); +} + yymsp[-2].minor.yy322 = yylhsminor.yy322; + break; + case 161: /* setlist ::= LP idlist RP EQ expr */ +{ + yymsp[-4].minor.yy322 = sqlite3ExprListAppendVector(pParse, 0, yymsp[-3].minor.yy254, yymsp[0].minor.yy528); +} + break; + case 162: /* cmd ::= with insert_cmd INTO xfullname idlist_opt select upsert */ +{ + sqlite3Insert(pParse, yymsp[-3].minor.yy131, yymsp[-1].minor.yy47, yymsp[-2].minor.yy254, yymsp[-5].minor.yy394, yymsp[0].minor.yy444); +} + break; + case 163: /* cmd ::= with insert_cmd INTO xfullname idlist_opt DEFAULT VALUES returning */ +{ + sqlite3Insert(pParse, yymsp[-4].minor.yy131, 0, yymsp[-3].minor.yy254, yymsp[-6].minor.yy394, 0); +} + break; + case 164: /* upsert ::= */ +{ yymsp[1].minor.yy444 = 0; } + break; + case 165: /* upsert ::= RETURNING selcollist */ +{ yymsp[-1].minor.yy444 = 0; sqlite3AddReturning(pParse,yymsp[0].minor.yy322); } + break; + case 166: /* upsert ::= ON CONFLICT LP sortlist RP where_opt DO UPDATE SET setlist where_opt upsert */ +{ yymsp[-11].minor.yy444 = sqlite3UpsertNew(pParse->db,yymsp[-8].minor.yy322,yymsp[-6].minor.yy528,yymsp[-2].minor.yy322,yymsp[-1].minor.yy528,yymsp[0].minor.yy444);} + break; + case 167: /* upsert ::= ON CONFLICT LP sortlist RP where_opt DO NOTHING upsert */ +{ yymsp[-8].minor.yy444 = sqlite3UpsertNew(pParse->db,yymsp[-5].minor.yy322,yymsp[-3].minor.yy528,0,0,yymsp[0].minor.yy444); } + break; + case 168: /* upsert ::= ON CONFLICT DO NOTHING returning */ +{ yymsp[-4].minor.yy444 = sqlite3UpsertNew(pParse->db,0,0,0,0,0); } + break; + case 169: /* upsert ::= ON CONFLICT DO UPDATE SET setlist where_opt returning */ +{ yymsp[-7].minor.yy444 = sqlite3UpsertNew(pParse->db,0,0,yymsp[-2].minor.yy322,yymsp[-1].minor.yy528,0);} + break; + case 170: /* returning ::= RETURNING selcollist */ +{sqlite3AddReturning(pParse,yymsp[0].minor.yy322);} + break; + case 174: /* idlist_opt ::= LP idlist RP */ +{yymsp[-2].minor.yy254 = yymsp[-1].minor.yy254;} + break; + case 175: /* idlist ::= idlist COMMA nm */ +{yymsp[-2].minor.yy254 = sqlite3IdListAppend(pParse,yymsp[-2].minor.yy254,&yymsp[0].minor.yy0);} + break; + case 176: /* idlist ::= nm */ +{yymsp[0].minor.yy254 = sqlite3IdListAppend(pParse,0,&yymsp[0].minor.yy0); /*A-overwrites-Y*/} + break; + case 177: /* expr ::= LP expr RP */ +{yymsp[-2].minor.yy528 = yymsp[-1].minor.yy528;} + break; + case 178: /* expr ::= ID|INDEXED */ + case 179: /* expr ::= JOIN_KW */ yytestcase(yyruleno==179); +{yymsp[0].minor.yy528=tokenExpr(pParse,TK_ID,yymsp[0].minor.yy0); /*A-overwrites-X*/} + break; + case 180: /* expr ::= nm DOT nm */ +{ + Expr *temp1 = tokenExpr(pParse,TK_ID,yymsp[-2].minor.yy0); + Expr *temp2 = tokenExpr(pParse,TK_ID,yymsp[0].minor.yy0); + yylhsminor.yy528 = sqlite3PExpr(pParse, TK_DOT, temp1, temp2); +} + yymsp[-2].minor.yy528 = yylhsminor.yy528; + break; + case 181: /* expr ::= nm DOT nm DOT nm */ +{ + Expr *temp1 = tokenExpr(pParse,TK_ID,yymsp[-4].minor.yy0); + Expr *temp2 = tokenExpr(pParse,TK_ID,yymsp[-2].minor.yy0); + Expr *temp3 = tokenExpr(pParse,TK_ID,yymsp[0].minor.yy0); + Expr *temp4 = sqlite3PExpr(pParse, TK_DOT, temp2, temp3); + if( IN_RENAME_OBJECT ){ + sqlite3RenameTokenRemap(pParse, 0, temp1); + } + yylhsminor.yy528 = sqlite3PExpr(pParse, TK_DOT, temp1, temp4); +} + yymsp[-4].minor.yy528 = yylhsminor.yy528; + break; + case 182: /* term ::= NULL|FLOAT|BLOB */ + case 183: /* term ::= STRING */ yytestcase(yyruleno==183); +{yymsp[0].minor.yy528=tokenExpr(pParse,yymsp[0].major,yymsp[0].minor.yy0); /*A-overwrites-X*/} + break; + case 184: /* term ::= INTEGER */ +{ + yylhsminor.yy528 = sqlite3ExprAlloc(pParse->db, TK_INTEGER, &yymsp[0].minor.yy0, 1); + if( yylhsminor.yy528 ) yylhsminor.yy528->w.iOfst = (int)(yymsp[0].minor.yy0.z - pParse->zTail); +} + yymsp[0].minor.yy528 = yylhsminor.yy528; + break; + case 185: /* expr ::= VARIABLE */ +{ + if( !(yymsp[0].minor.yy0.z[0]=='#' && sqlite3Isdigit(yymsp[0].minor.yy0.z[1])) ){ + u32 n = yymsp[0].minor.yy0.n; + yymsp[0].minor.yy528 = tokenExpr(pParse, TK_VARIABLE, yymsp[0].minor.yy0); + sqlite3ExprAssignVarNumber(pParse, yymsp[0].minor.yy528, n); + }else{ + /* When doing a nested parse, one can include terms in an expression + ** that look like this: #1 #2 ... These terms refer to registers + ** in the virtual machine. #N is the N-th register. */ + Token t = yymsp[0].minor.yy0; /*A-overwrites-X*/ + assert( t.n>=2 ); + if( pParse->nested==0 ){ + sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &t); + yymsp[0].minor.yy528 = 0; + }else{ + yymsp[0].minor.yy528 = sqlite3PExpr(pParse, TK_REGISTER, 0, 0); + if( yymsp[0].minor.yy528 ) sqlite3GetInt32(&t.z[1], &yymsp[0].minor.yy528->iTable); + } + } +} + break; + case 186: /* expr ::= expr COLLATE ID|STRING */ +{ + yymsp[-2].minor.yy528 = sqlite3ExprAddCollateToken(pParse, yymsp[-2].minor.yy528, &yymsp[0].minor.yy0, 1); +} + break; + case 187: /* expr ::= CAST LP expr AS typetoken RP */ +{ + yymsp[-5].minor.yy528 = sqlite3ExprAlloc(pParse->db, TK_CAST, &yymsp[-1].minor.yy0, 1); + sqlite3ExprAttachSubtrees(pParse->db, yymsp[-5].minor.yy528, yymsp[-3].minor.yy528, 0); +} + break; + case 188: /* expr ::= ID|INDEXED LP distinct exprlist RP */ +{ + yylhsminor.yy528 = sqlite3ExprFunction(pParse, yymsp[-1].minor.yy322, &yymsp[-4].minor.yy0, yymsp[-2].minor.yy394); +} + yymsp[-4].minor.yy528 = yylhsminor.yy528; + break; + case 189: /* expr ::= ID|INDEXED LP STAR RP */ +{ + yylhsminor.yy528 = sqlite3ExprFunction(pParse, 0, &yymsp[-3].minor.yy0, 0); +} + yymsp[-3].minor.yy528 = yylhsminor.yy528; + break; + case 190: /* expr ::= ID|INDEXED LP distinct exprlist RP filter_over */ +{ + yylhsminor.yy528 = sqlite3ExprFunction(pParse, yymsp[-2].minor.yy322, &yymsp[-5].minor.yy0, yymsp[-3].minor.yy394); + sqlite3WindowAttach(pParse, yylhsminor.yy528, yymsp[0].minor.yy41); +} + yymsp[-5].minor.yy528 = yylhsminor.yy528; + break; + case 191: /* expr ::= ID|INDEXED LP STAR RP filter_over */ +{ + yylhsminor.yy528 = sqlite3ExprFunction(pParse, 0, &yymsp[-4].minor.yy0, 0); + sqlite3WindowAttach(pParse, yylhsminor.yy528, yymsp[0].minor.yy41); +} + yymsp[-4].minor.yy528 = yylhsminor.yy528; + break; + case 192: /* term ::= CTIME_KW */ +{ + yylhsminor.yy528 = sqlite3ExprFunction(pParse, 0, &yymsp[0].minor.yy0, 0); +} + yymsp[0].minor.yy528 = yylhsminor.yy528; + break; + case 193: /* expr ::= LP nexprlist COMMA expr RP */ +{ + ExprList *pList = sqlite3ExprListAppend(pParse, yymsp[-3].minor.yy322, yymsp[-1].minor.yy528); + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_VECTOR, 0, 0); + if( yymsp[-4].minor.yy528 ){ + yymsp[-4].minor.yy528->x.pList = pList; + if( ALWAYS(pList->nExpr) ){ + yymsp[-4].minor.yy528->flags |= pList->a[0].pExpr->flags & EP_Propagate; + } + }else{ + sqlite3ExprListDelete(pParse->db, pList); + } +} + break; + case 194: /* expr ::= expr AND expr */ +{yymsp[-2].minor.yy528=sqlite3ExprAnd(pParse,yymsp[-2].minor.yy528,yymsp[0].minor.yy528);} + break; + case 195: /* expr ::= expr OR expr */ + case 196: /* expr ::= expr LT|GT|GE|LE expr */ yytestcase(yyruleno==196); + case 197: /* expr ::= expr EQ|NE expr */ yytestcase(yyruleno==197); + case 198: /* expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */ yytestcase(yyruleno==198); + case 199: /* expr ::= expr PLUS|MINUS expr */ yytestcase(yyruleno==199); + case 200: /* expr ::= expr STAR|SLASH|REM expr */ yytestcase(yyruleno==200); + case 201: /* expr ::= expr CONCAT expr */ yytestcase(yyruleno==201); +{yymsp[-2].minor.yy528=sqlite3PExpr(pParse,yymsp[-1].major,yymsp[-2].minor.yy528,yymsp[0].minor.yy528);} + break; + case 202: /* likeop ::= NOT LIKE_KW|MATCH */ +{yymsp[-1].minor.yy0=yymsp[0].minor.yy0; yymsp[-1].minor.yy0.n|=0x80000000; /*yymsp[-1].minor.yy0-overwrite-yymsp[0].minor.yy0*/} + break; + case 203: /* expr ::= expr likeop expr */ +{ + ExprList *pList; + int bNot = yymsp[-1].minor.yy0.n & 0x80000000; + yymsp[-1].minor.yy0.n &= 0x7fffffff; + pList = sqlite3ExprListAppend(pParse,0, yymsp[0].minor.yy528); + pList = sqlite3ExprListAppend(pParse,pList, yymsp[-2].minor.yy528); + yymsp[-2].minor.yy528 = sqlite3ExprFunction(pParse, pList, &yymsp[-1].minor.yy0, 0); + if( bNot ) yymsp[-2].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-2].minor.yy528, 0); + if( yymsp[-2].minor.yy528 ) yymsp[-2].minor.yy528->flags |= EP_InfixFunc; +} + break; + case 204: /* expr ::= expr likeop expr ESCAPE expr */ +{ + ExprList *pList; + int bNot = yymsp[-3].minor.yy0.n & 0x80000000; + yymsp[-3].minor.yy0.n &= 0x7fffffff; + pList = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy528); + pList = sqlite3ExprListAppend(pParse,pList, yymsp[-4].minor.yy528); + pList = sqlite3ExprListAppend(pParse,pList, yymsp[0].minor.yy528); + yymsp[-4].minor.yy528 = sqlite3ExprFunction(pParse, pList, &yymsp[-3].minor.yy0, 0); + if( bNot ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0); + if( yymsp[-4].minor.yy528 ) yymsp[-4].minor.yy528->flags |= EP_InfixFunc; +} + break; + case 205: /* expr ::= expr ISNULL|NOTNULL */ +{yymsp[-1].minor.yy528 = sqlite3PExpr(pParse,yymsp[0].major,yymsp[-1].minor.yy528,0);} + break; + case 206: /* expr ::= expr NOT NULL */ +{yymsp[-2].minor.yy528 = sqlite3PExpr(pParse,TK_NOTNULL,yymsp[-2].minor.yy528,0);} + break; + case 207: /* expr ::= expr IS expr */ +{ + yymsp[-2].minor.yy528 = sqlite3PExpr(pParse,TK_IS,yymsp[-2].minor.yy528,yymsp[0].minor.yy528); + binaryToUnaryIfNull(pParse, yymsp[0].minor.yy528, yymsp[-2].minor.yy528, TK_ISNULL); +} + break; + case 208: /* expr ::= expr IS NOT expr */ +{ + yymsp[-3].minor.yy528 = sqlite3PExpr(pParse,TK_ISNOT,yymsp[-3].minor.yy528,yymsp[0].minor.yy528); + binaryToUnaryIfNull(pParse, yymsp[0].minor.yy528, yymsp[-3].minor.yy528, TK_NOTNULL); +} + break; + case 209: /* expr ::= NOT expr */ + case 210: /* expr ::= BITNOT expr */ yytestcase(yyruleno==210); +{yymsp[-1].minor.yy528 = sqlite3PExpr(pParse, yymsp[-1].major, yymsp[0].minor.yy528, 0);/*A-overwrites-B*/} + break; + case 211: /* expr ::= PLUS|MINUS expr */ +{ + yymsp[-1].minor.yy528 = sqlite3PExpr(pParse, yymsp[-1].major==TK_PLUS ? TK_UPLUS : TK_UMINUS, yymsp[0].minor.yy528, 0); + /*A-overwrites-B*/ +} + break; + case 212: /* expr ::= expr PTR expr */ +{ + ExprList *pList = sqlite3ExprListAppend(pParse, 0, yymsp[-2].minor.yy528); + pList = sqlite3ExprListAppend(pParse, pList, yymsp[0].minor.yy528); + yylhsminor.yy528 = sqlite3ExprFunction(pParse, pList, &yymsp[-1].minor.yy0, 0); +} + yymsp[-2].minor.yy528 = yylhsminor.yy528; + break; + case 213: /* between_op ::= BETWEEN */ + case 216: /* in_op ::= IN */ yytestcase(yyruleno==216); +{yymsp[0].minor.yy394 = 0;} + break; + case 215: /* expr ::= expr between_op expr AND expr */ +{ + ExprList *pList = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy528); + pList = sqlite3ExprListAppend(pParse,pList, yymsp[0].minor.yy528); + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_BETWEEN, yymsp[-4].minor.yy528, 0); + if( yymsp[-4].minor.yy528 ){ + yymsp[-4].minor.yy528->x.pList = pList; + }else{ + sqlite3ExprListDelete(pParse->db, pList); + } + if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0); +} + break; + case 218: /* expr ::= expr in_op LP exprlist RP */ +{ + if( yymsp[-1].minor.yy322==0 ){ + /* Expressions of the form + ** + ** expr1 IN () + ** expr1 NOT IN () + ** + ** simplify to constants 0 (false) and 1 (true), respectively, + ** regardless of the value of expr1. + */ + sqlite3ExprUnmapAndDelete(pParse, yymsp[-4].minor.yy528); + yymsp[-4].minor.yy528 = sqlite3Expr(pParse->db, TK_INTEGER, yymsp[-3].minor.yy394 ? "1" : "0"); + }else{ + Expr *pRHS = yymsp[-1].minor.yy322->a[0].pExpr; + if( yymsp[-1].minor.yy322->nExpr==1 && sqlite3ExprIsConstant(pRHS) && yymsp[-4].minor.yy528->op!=TK_VECTOR ){ + yymsp[-1].minor.yy322->a[0].pExpr = 0; + sqlite3ExprListDelete(pParse->db, yymsp[-1].minor.yy322); + pRHS = sqlite3PExpr(pParse, TK_UPLUS, pRHS, 0); + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_EQ, yymsp[-4].minor.yy528, pRHS); + }else{ + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy528, 0); + if( yymsp[-4].minor.yy528==0 ){ + sqlite3ExprListDelete(pParse->db, yymsp[-1].minor.yy322); + }else if( yymsp[-4].minor.yy528->pLeft->op==TK_VECTOR ){ + int nExpr = yymsp[-4].minor.yy528->pLeft->x.pList->nExpr; + Select *pSelectRHS = sqlite3ExprListToValues(pParse, nExpr, yymsp[-1].minor.yy322); + if( pSelectRHS ){ + parserDoubleLinkSelect(pParse, pSelectRHS); + sqlite3PExprAddSelect(pParse, yymsp[-4].minor.yy528, pSelectRHS); + } + }else{ + yymsp[-4].minor.yy528->x.pList = yymsp[-1].minor.yy322; + sqlite3ExprSetHeightAndFlags(pParse, yymsp[-4].minor.yy528); + } + } + if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0); + } + } + break; + case 219: /* expr ::= LP select RP */ +{ + yymsp[-2].minor.yy528 = sqlite3PExpr(pParse, TK_SELECT, 0, 0); + sqlite3PExprAddSelect(pParse, yymsp[-2].minor.yy528, yymsp[-1].minor.yy47); + } + break; + case 220: /* expr ::= expr in_op LP select RP */ +{ + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy528, 0); + sqlite3PExprAddSelect(pParse, yymsp[-4].minor.yy528, yymsp[-1].minor.yy47); + if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0); + } + break; + case 221: /* expr ::= expr in_op nm dbnm paren_exprlist */ +{ + SrcList *pSrc = sqlite3SrcListAppend(pParse, 0,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0); + Select *pSelect = sqlite3SelectNew(pParse, 0,pSrc,0,0,0,0,0,0); + if( yymsp[0].minor.yy322 ) sqlite3SrcListFuncArgs(pParse, pSelect ? pSrc : 0, yymsp[0].minor.yy322); + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy528, 0); + sqlite3PExprAddSelect(pParse, yymsp[-4].minor.yy528, pSelect); + if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0); + } + break; + case 222: /* expr ::= EXISTS LP select RP */ +{ + Expr *p; + p = yymsp[-3].minor.yy528 = sqlite3PExpr(pParse, TK_EXISTS, 0, 0); + sqlite3PExprAddSelect(pParse, p, yymsp[-1].minor.yy47); + } + break; + case 223: /* expr ::= CASE case_operand case_exprlist case_else END */ +{ + yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_CASE, yymsp[-3].minor.yy528, 0); + if( yymsp[-4].minor.yy528 ){ + yymsp[-4].minor.yy528->x.pList = yymsp[-1].minor.yy528 ? sqlite3ExprListAppend(pParse,yymsp[-2].minor.yy322,yymsp[-1].minor.yy528) : yymsp[-2].minor.yy322; + sqlite3ExprSetHeightAndFlags(pParse, yymsp[-4].minor.yy528); + }else{ + sqlite3ExprListDelete(pParse->db, yymsp[-2].minor.yy322); + sqlite3ExprDelete(pParse->db, yymsp[-1].minor.yy528); + } +} + break; + case 224: /* case_exprlist ::= case_exprlist WHEN expr THEN expr */ +{ + yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322, yymsp[-2].minor.yy528); + yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322, yymsp[0].minor.yy528); +} + break; + case 225: /* case_exprlist ::= WHEN expr THEN expr */ +{ + yymsp[-3].minor.yy322 = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy528); + yymsp[-3].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-3].minor.yy322, yymsp[0].minor.yy528); +} + break; + case 228: /* case_operand ::= expr */ +{yymsp[0].minor.yy528 = yymsp[0].minor.yy528; /*A-overwrites-X*/} + break; + case 231: /* nexprlist ::= nexprlist COMMA expr */ +{yymsp[-2].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-2].minor.yy322,yymsp[0].minor.yy528);} + break; + case 232: /* nexprlist ::= expr */ +{yymsp[0].minor.yy322 = sqlite3ExprListAppend(pParse,0,yymsp[0].minor.yy528); /*A-overwrites-Y*/} + break; + case 234: /* paren_exprlist ::= LP exprlist RP */ + case 239: /* eidlist_opt ::= LP eidlist RP */ yytestcase(yyruleno==239); +{yymsp[-2].minor.yy322 = yymsp[-1].minor.yy322;} + break; + case 235: /* cmd ::= createkw uniqueflag INDEX ifnotexists nm dbnm ON nm LP sortlist RP where_opt */ +{ + sqlite3CreateIndex(pParse, &yymsp[-7].minor.yy0, &yymsp[-6].minor.yy0, + sqlite3SrcListAppend(pParse,0,&yymsp[-4].minor.yy0,0), yymsp[-2].minor.yy322, yymsp[-10].minor.yy394, + &yymsp[-11].minor.yy0, yymsp[0].minor.yy528, SQLITE_SO_ASC, yymsp[-8].minor.yy394, SQLITE_IDXTYPE_APPDEF); + if( IN_RENAME_OBJECT && pParse->pNewIndex ){ + sqlite3RenameTokenMap(pParse, pParse->pNewIndex->zName, &yymsp[-4].minor.yy0); + } +} + break; + case 236: /* uniqueflag ::= UNIQUE */ + case 278: /* raisetype ::= ABORT */ yytestcase(yyruleno==278); +{yymsp[0].minor.yy394 = OE_Abort;} + break; + case 237: /* uniqueflag ::= */ +{yymsp[1].minor.yy394 = OE_None;} + break; + case 240: /* eidlist ::= eidlist COMMA nm collate sortorder */ +{ + yymsp[-4].minor.yy322 = parserAddExprIdListTerm(pParse, yymsp[-4].minor.yy322, &yymsp[-2].minor.yy0, yymsp[-1].minor.yy394, yymsp[0].minor.yy394); +} + break; + case 241: /* eidlist ::= nm collate sortorder */ +{ + yymsp[-2].minor.yy322 = parserAddExprIdListTerm(pParse, 0, &yymsp[-2].minor.yy0, yymsp[-1].minor.yy394, yymsp[0].minor.yy394); /*A-overwrites-Y*/ +} + break; + case 244: /* cmd ::= DROP INDEX ifexists fullname */ +{sqlite3DropIndex(pParse, yymsp[0].minor.yy131, yymsp[-1].minor.yy394);} + break; + case 245: /* cmd ::= VACUUM vinto */ +{sqlite3Vacuum(pParse,0,yymsp[0].minor.yy528);} + break; + case 246: /* cmd ::= VACUUM nm vinto */ +{sqlite3Vacuum(pParse,&yymsp[-1].minor.yy0,yymsp[0].minor.yy528);} + break; + case 249: /* cmd ::= PRAGMA nm dbnm */ +{sqlite3Pragma(pParse,&yymsp[-1].minor.yy0,&yymsp[0].minor.yy0,0,0);} + break; + case 250: /* cmd ::= PRAGMA nm dbnm EQ nmnum */ +{sqlite3Pragma(pParse,&yymsp[-3].minor.yy0,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0,0);} + break; + case 251: /* cmd ::= PRAGMA nm dbnm LP nmnum RP */ +{sqlite3Pragma(pParse,&yymsp[-4].minor.yy0,&yymsp[-3].minor.yy0,&yymsp[-1].minor.yy0,0);} + break; + case 252: /* cmd ::= PRAGMA nm dbnm EQ minus_num */ +{sqlite3Pragma(pParse,&yymsp[-3].minor.yy0,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0,1);} + break; + case 253: /* cmd ::= PRAGMA nm dbnm LP minus_num RP */ +{sqlite3Pragma(pParse,&yymsp[-4].minor.yy0,&yymsp[-3].minor.yy0,&yymsp[-1].minor.yy0,1);} + break; + case 256: /* cmd ::= createkw trigger_decl BEGIN trigger_cmd_list END */ +{ + Token all; + all.z = yymsp[-3].minor.yy0.z; + all.n = (int)(yymsp[0].minor.yy0.z - yymsp[-3].minor.yy0.z) + yymsp[0].minor.yy0.n; + sqlite3FinishTrigger(pParse, yymsp[-1].minor.yy33, &all); +} + break; + case 257: /* trigger_decl ::= temp TRIGGER ifnotexists nm dbnm trigger_time trigger_event ON fullname foreach_clause when_clause */ +{ + sqlite3BeginTrigger(pParse, &yymsp[-7].minor.yy0, &yymsp[-6].minor.yy0, yymsp[-5].minor.yy394, yymsp[-4].minor.yy180.a, yymsp[-4].minor.yy180.b, yymsp[-2].minor.yy131, yymsp[0].minor.yy528, yymsp[-10].minor.yy394, yymsp[-8].minor.yy394); + yymsp[-10].minor.yy0 = (yymsp[-6].minor.yy0.n==0?yymsp[-7].minor.yy0:yymsp[-6].minor.yy0); /*A-overwrites-T*/ +} + break; + case 258: /* trigger_time ::= BEFORE|AFTER */ +{ yymsp[0].minor.yy394 = yymsp[0].major; /*A-overwrites-X*/ } + break; + case 259: /* trigger_time ::= INSTEAD OF */ +{ yymsp[-1].minor.yy394 = TK_INSTEAD;} + break; + case 260: /* trigger_time ::= */ +{ yymsp[1].minor.yy394 = TK_BEFORE; } + break; + case 261: /* trigger_event ::= DELETE|INSERT */ + case 262: /* trigger_event ::= UPDATE */ yytestcase(yyruleno==262); +{yymsp[0].minor.yy180.a = yymsp[0].major; /*A-overwrites-X*/ yymsp[0].minor.yy180.b = 0;} + break; + case 263: /* trigger_event ::= UPDATE OF idlist */ +{yymsp[-2].minor.yy180.a = TK_UPDATE; yymsp[-2].minor.yy180.b = yymsp[0].minor.yy254;} + break; + case 264: /* when_clause ::= */ + case 283: /* key_opt ::= */ yytestcase(yyruleno==283); +{ yymsp[1].minor.yy528 = 0; } + break; + case 265: /* when_clause ::= WHEN expr */ + case 284: /* key_opt ::= KEY expr */ yytestcase(yyruleno==284); +{ yymsp[-1].minor.yy528 = yymsp[0].minor.yy528; } + break; + case 266: /* trigger_cmd_list ::= trigger_cmd_list trigger_cmd SEMI */ +{ + assert( yymsp[-2].minor.yy33!=0 ); + yymsp[-2].minor.yy33->pLast->pNext = yymsp[-1].minor.yy33; + yymsp[-2].minor.yy33->pLast = yymsp[-1].minor.yy33; +} + break; + case 267: /* trigger_cmd_list ::= trigger_cmd SEMI */ +{ + assert( yymsp[-1].minor.yy33!=0 ); + yymsp[-1].minor.yy33->pLast = yymsp[-1].minor.yy33; +} + break; + case 268: /* trnm ::= nm DOT nm */ +{ + yymsp[-2].minor.yy0 = yymsp[0].minor.yy0; + sqlite3ErrorMsg(pParse, + "qualified table names are not allowed on INSERT, UPDATE, and DELETE " + "statements within triggers"); +} + break; + case 269: /* tridxby ::= INDEXED BY nm */ +{ + sqlite3ErrorMsg(pParse, + "the INDEXED BY clause is not allowed on UPDATE or DELETE statements " + "within triggers"); +} + break; + case 270: /* tridxby ::= NOT INDEXED */ +{ + sqlite3ErrorMsg(pParse, + "the NOT INDEXED clause is not allowed on UPDATE or DELETE statements " + "within triggers"); +} + break; + case 271: /* trigger_cmd ::= UPDATE orconf trnm tridxby SET setlist from where_opt scanpt */ +{yylhsminor.yy33 = sqlite3TriggerUpdateStep(pParse, &yymsp[-6].minor.yy0, yymsp[-2].minor.yy131, yymsp[-3].minor.yy322, yymsp[-1].minor.yy528, yymsp[-7].minor.yy394, yymsp[-8].minor.yy0.z, yymsp[0].minor.yy522);} + yymsp[-8].minor.yy33 = yylhsminor.yy33; + break; + case 272: /* trigger_cmd ::= scanpt insert_cmd INTO trnm idlist_opt select upsert scanpt */ +{ + yylhsminor.yy33 = sqlite3TriggerInsertStep(pParse,&yymsp[-4].minor.yy0,yymsp[-3].minor.yy254,yymsp[-2].minor.yy47,yymsp[-6].minor.yy394,yymsp[-1].minor.yy444,yymsp[-7].minor.yy522,yymsp[0].minor.yy522);/*yylhsminor.yy33-overwrites-yymsp[-6].minor.yy394*/ +} + yymsp[-7].minor.yy33 = yylhsminor.yy33; + break; + case 273: /* trigger_cmd ::= DELETE FROM trnm tridxby where_opt scanpt */ +{yylhsminor.yy33 = sqlite3TriggerDeleteStep(pParse, &yymsp[-3].minor.yy0, yymsp[-1].minor.yy528, yymsp[-5].minor.yy0.z, yymsp[0].minor.yy522);} + yymsp[-5].minor.yy33 = yylhsminor.yy33; + break; + case 274: /* trigger_cmd ::= scanpt select scanpt */ +{yylhsminor.yy33 = sqlite3TriggerSelectStep(pParse->db, yymsp[-1].minor.yy47, yymsp[-2].minor.yy522, yymsp[0].minor.yy522); /*yylhsminor.yy33-overwrites-yymsp[-1].minor.yy47*/} + yymsp[-2].minor.yy33 = yylhsminor.yy33; + break; + case 275: /* expr ::= RAISE LP IGNORE RP */ +{ + yymsp[-3].minor.yy528 = sqlite3PExpr(pParse, TK_RAISE, 0, 0); + if( yymsp[-3].minor.yy528 ){ + yymsp[-3].minor.yy528->affExpr = OE_Ignore; + } +} + break; + case 276: /* expr ::= RAISE LP raisetype COMMA nm RP */ +{ + yymsp[-5].minor.yy528 = sqlite3ExprAlloc(pParse->db, TK_RAISE, &yymsp[-1].minor.yy0, 1); + if( yymsp[-5].minor.yy528 ) { + yymsp[-5].minor.yy528->affExpr = (char)yymsp[-3].minor.yy394; + } +} + break; + case 277: /* raisetype ::= ROLLBACK */ +{yymsp[0].minor.yy394 = OE_Rollback;} + break; + case 279: /* raisetype ::= FAIL */ +{yymsp[0].minor.yy394 = OE_Fail;} + break; + case 280: /* cmd ::= DROP TRIGGER ifexists fullname */ +{ + sqlite3DropTrigger(pParse,yymsp[0].minor.yy131,yymsp[-1].minor.yy394); +} + break; + case 281: /* cmd ::= ATTACH database_kw_opt expr AS expr key_opt */ +{ + sqlite3Attach(pParse, yymsp[-3].minor.yy528, yymsp[-1].minor.yy528, yymsp[0].minor.yy528); +} + break; + case 282: /* cmd ::= DETACH database_kw_opt expr */ +{ + sqlite3Detach(pParse, yymsp[0].minor.yy528); +} + break; + case 285: /* cmd ::= REINDEX */ +{sqlite3Reindex(pParse, 0, 0);} + break; + case 286: /* cmd ::= REINDEX nm dbnm */ +{sqlite3Reindex(pParse, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy0);} + break; + case 287: /* cmd ::= ANALYZE */ +{sqlite3Analyze(pParse, 0, 0);} + break; + case 288: /* cmd ::= ANALYZE nm dbnm */ +{sqlite3Analyze(pParse, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy0);} + break; + case 289: /* cmd ::= ALTER TABLE fullname RENAME TO nm */ +{ + sqlite3AlterRenameTable(pParse,yymsp[-3].minor.yy131,&yymsp[0].minor.yy0); +} + break; + case 290: /* cmd ::= ALTER TABLE add_column_fullname ADD kwcolumn_opt columnname carglist */ +{ + yymsp[-1].minor.yy0.n = (int)(pParse->sLastToken.z-yymsp[-1].minor.yy0.z) + pParse->sLastToken.n; + sqlite3AlterFinishAddColumn(pParse, &yymsp[-1].minor.yy0); +} + break; + case 291: /* cmd ::= ALTER TABLE fullname DROP kwcolumn_opt nm */ +{ + sqlite3AlterDropColumn(pParse, yymsp[-3].minor.yy131, &yymsp[0].minor.yy0); +} + break; + case 292: /* add_column_fullname ::= fullname */ +{ + disableLookaside(pParse); + sqlite3AlterBeginAddColumn(pParse, yymsp[0].minor.yy131); +} + break; + case 293: /* cmd ::= ALTER TABLE fullname RENAME kwcolumn_opt nm TO nm */ +{ + sqlite3AlterRenameColumn(pParse, yymsp[-5].minor.yy131, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0); +} + break; + case 294: /* cmd ::= create_vtab */ +{sqlite3VtabFinishParse(pParse,0);} + break; + case 295: /* cmd ::= create_vtab LP vtabarglist RP */ +{sqlite3VtabFinishParse(pParse,&yymsp[0].minor.yy0);} + break; + case 296: /* create_vtab ::= createkw VIRTUAL TABLE ifnotexists nm dbnm USING nm */ +{ + sqlite3VtabBeginParse(pParse, &yymsp[-3].minor.yy0, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0, yymsp[-4].minor.yy394); +} + break; + case 297: /* vtabarg ::= */ +{sqlite3VtabArgInit(pParse);} + break; + case 298: /* vtabargtoken ::= ANY */ + case 299: /* vtabargtoken ::= lp anylist RP */ yytestcase(yyruleno==299); + case 300: /* lp ::= LP */ yytestcase(yyruleno==300); +{sqlite3VtabArgExtend(pParse,&yymsp[0].minor.yy0);} + break; + case 301: /* with ::= WITH wqlist */ + case 302: /* with ::= WITH RECURSIVE wqlist */ yytestcase(yyruleno==302); +{ sqlite3WithPush(pParse, yymsp[0].minor.yy521, 1); } + break; + case 303: /* wqas ::= AS */ +{yymsp[0].minor.yy516 = M10d_Any;} + break; + case 304: /* wqas ::= AS MATERIALIZED */ +{yymsp[-1].minor.yy516 = M10d_Yes;} + break; + case 305: /* wqas ::= AS NOT MATERIALIZED */ +{yymsp[-2].minor.yy516 = M10d_No;} + break; + case 306: /* wqitem ::= nm eidlist_opt wqas LP select RP */ +{ + yymsp[-5].minor.yy385 = sqlite3CteNew(pParse, &yymsp[-5].minor.yy0, yymsp[-4].minor.yy322, yymsp[-1].minor.yy47, yymsp[-3].minor.yy516); /*A-overwrites-X*/ +} + break; + case 307: /* wqlist ::= wqitem */ +{ + yymsp[0].minor.yy521 = sqlite3WithAdd(pParse, 0, yymsp[0].minor.yy385); /*A-overwrites-X*/ +} + break; + case 308: /* wqlist ::= wqlist COMMA wqitem */ +{ + yymsp[-2].minor.yy521 = sqlite3WithAdd(pParse, yymsp[-2].minor.yy521, yymsp[0].minor.yy385); +} + break; + case 309: /* windowdefn_list ::= windowdefn */ +{ yylhsminor.yy41 = yymsp[0].minor.yy41; } + yymsp[0].minor.yy41 = yylhsminor.yy41; + break; + case 310: /* windowdefn_list ::= windowdefn_list COMMA windowdefn */ +{ + assert( yymsp[0].minor.yy41!=0 ); + sqlite3WindowChain(pParse, yymsp[0].minor.yy41, yymsp[-2].minor.yy41); + yymsp[0].minor.yy41->pNextWin = yymsp[-2].minor.yy41; + yylhsminor.yy41 = yymsp[0].minor.yy41; +} + yymsp[-2].minor.yy41 = yylhsminor.yy41; + break; + case 311: /* windowdefn ::= nm AS LP window RP */ +{ + if( ALWAYS(yymsp[-1].minor.yy41) ){ + yymsp[-1].minor.yy41->zName = sqlite3DbStrNDup(pParse->db, yymsp[-4].minor.yy0.z, yymsp[-4].minor.yy0.n); + } + yylhsminor.yy41 = yymsp[-1].minor.yy41; +} + yymsp[-4].minor.yy41 = yylhsminor.yy41; + break; + case 312: /* window ::= PARTITION BY nexprlist orderby_opt frame_opt */ +{ + yymsp[-4].minor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, yymsp[-2].minor.yy322, yymsp[-1].minor.yy322, 0); +} + break; + case 313: /* window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */ +{ + yylhsminor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, yymsp[-2].minor.yy322, yymsp[-1].minor.yy322, &yymsp[-5].minor.yy0); +} + yymsp[-5].minor.yy41 = yylhsminor.yy41; + break; + case 314: /* window ::= ORDER BY sortlist frame_opt */ +{ + yymsp[-3].minor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, 0, yymsp[-1].minor.yy322, 0); +} + break; + case 315: /* window ::= nm ORDER BY sortlist frame_opt */ +{ + yylhsminor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, 0, yymsp[-1].minor.yy322, &yymsp[-4].minor.yy0); +} + yymsp[-4].minor.yy41 = yylhsminor.yy41; + break; + case 316: /* window ::= frame_opt */ + case 335: /* filter_over ::= over_clause */ yytestcase(yyruleno==335); +{ + yylhsminor.yy41 = yymsp[0].minor.yy41; +} + yymsp[0].minor.yy41 = yylhsminor.yy41; + break; + case 317: /* window ::= nm frame_opt */ +{ + yylhsminor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, 0, 0, &yymsp[-1].minor.yy0); +} + yymsp[-1].minor.yy41 = yylhsminor.yy41; + break; + case 318: /* frame_opt ::= */ +{ + yymsp[1].minor.yy41 = sqlite3WindowAlloc(pParse, 0, TK_UNBOUNDED, 0, TK_CURRENT, 0, 0); +} + break; + case 319: /* frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */ +{ + yylhsminor.yy41 = sqlite3WindowAlloc(pParse, yymsp[-2].minor.yy394, yymsp[-1].minor.yy595.eType, yymsp[-1].minor.yy595.pExpr, TK_CURRENT, 0, yymsp[0].minor.yy516); +} + yymsp[-2].minor.yy41 = yylhsminor.yy41; + break; + case 320: /* frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */ +{ + yylhsminor.yy41 = sqlite3WindowAlloc(pParse, yymsp[-5].minor.yy394, yymsp[-3].minor.yy595.eType, yymsp[-3].minor.yy595.pExpr, yymsp[-1].minor.yy595.eType, yymsp[-1].minor.yy595.pExpr, yymsp[0].minor.yy516); +} + yymsp[-5].minor.yy41 = yylhsminor.yy41; + break; + case 322: /* frame_bound_s ::= frame_bound */ + case 324: /* frame_bound_e ::= frame_bound */ yytestcase(yyruleno==324); +{yylhsminor.yy595 = yymsp[0].minor.yy595;} + yymsp[0].minor.yy595 = yylhsminor.yy595; + break; + case 323: /* frame_bound_s ::= UNBOUNDED PRECEDING */ + case 325: /* frame_bound_e ::= UNBOUNDED FOLLOWING */ yytestcase(yyruleno==325); + case 327: /* frame_bound ::= CURRENT ROW */ yytestcase(yyruleno==327); +{yylhsminor.yy595.eType = yymsp[-1].major; yylhsminor.yy595.pExpr = 0;} + yymsp[-1].minor.yy595 = yylhsminor.yy595; + break; + case 326: /* frame_bound ::= expr PRECEDING|FOLLOWING */ +{yylhsminor.yy595.eType = yymsp[0].major; yylhsminor.yy595.pExpr = yymsp[-1].minor.yy528;} + yymsp[-1].minor.yy595 = yylhsminor.yy595; + break; + case 328: /* frame_exclude_opt ::= */ +{yymsp[1].minor.yy516 = 0;} + break; + case 329: /* frame_exclude_opt ::= EXCLUDE frame_exclude */ +{yymsp[-1].minor.yy516 = yymsp[0].minor.yy516;} + break; + case 330: /* frame_exclude ::= NO OTHERS */ + case 331: /* frame_exclude ::= CURRENT ROW */ yytestcase(yyruleno==331); +{yymsp[-1].minor.yy516 = yymsp[-1].major; /*A-overwrites-X*/} + break; + case 332: /* frame_exclude ::= GROUP|TIES */ +{yymsp[0].minor.yy516 = yymsp[0].major; /*A-overwrites-X*/} + break; + case 333: /* window_clause ::= WINDOW windowdefn_list */ +{ yymsp[-1].minor.yy41 = yymsp[0].minor.yy41; } + break; + case 334: /* filter_over ::= filter_clause over_clause */ +{ + if( yymsp[0].minor.yy41 ){ + yymsp[0].minor.yy41->pFilter = yymsp[-1].minor.yy528; + }else{ + sqlite3ExprDelete(pParse->db, yymsp[-1].minor.yy528); + } + yylhsminor.yy41 = yymsp[0].minor.yy41; +} + yymsp[-1].minor.yy41 = yylhsminor.yy41; + break; + case 336: /* filter_over ::= filter_clause */ +{ + yylhsminor.yy41 = (Window*)sqlite3DbMallocZero(pParse->db, sizeof(Window)); + if( yylhsminor.yy41 ){ + yylhsminor.yy41->eFrmType = TK_FILTER; + yylhsminor.yy41->pFilter = yymsp[0].minor.yy528; + }else{ + sqlite3ExprDelete(pParse->db, yymsp[0].minor.yy528); + } +} + yymsp[0].minor.yy41 = yylhsminor.yy41; + break; + case 337: /* over_clause ::= OVER LP window RP */ +{ + yymsp[-3].minor.yy41 = yymsp[-1].minor.yy41; + assert( yymsp[-3].minor.yy41!=0 ); +} + break; + case 338: /* over_clause ::= OVER nm */ +{ + yymsp[-1].minor.yy41 = (Window*)sqlite3DbMallocZero(pParse->db, sizeof(Window)); + if( yymsp[-1].minor.yy41 ){ + yymsp[-1].minor.yy41->zName = sqlite3DbStrNDup(pParse->db, yymsp[0].minor.yy0.z, yymsp[0].minor.yy0.n); + } +} + break; + case 339: /* filter_clause ::= FILTER LP WHERE expr RP */ +{ yymsp[-4].minor.yy528 = yymsp[-1].minor.yy528; } + break; + default: + /* (340) input ::= cmdlist */ yytestcase(yyruleno==340); + /* (341) cmdlist ::= cmdlist ecmd */ yytestcase(yyruleno==341); + /* (342) cmdlist ::= ecmd (OPTIMIZED OUT) */ assert(yyruleno!=342); + /* (343) ecmd ::= SEMI */ yytestcase(yyruleno==343); + /* (344) ecmd ::= cmdx SEMI */ yytestcase(yyruleno==344); + /* (345) ecmd ::= explain cmdx SEMI (NEVER REDUCES) */ assert(yyruleno!=345); + /* (346) trans_opt ::= */ yytestcase(yyruleno==346); + /* (347) trans_opt ::= TRANSACTION */ yytestcase(yyruleno==347); + /* (348) trans_opt ::= TRANSACTION nm */ yytestcase(yyruleno==348); + /* (349) savepoint_opt ::= SAVEPOINT */ yytestcase(yyruleno==349); + /* (350) savepoint_opt ::= */ yytestcase(yyruleno==350); + /* (351) cmd ::= create_table create_table_args */ yytestcase(yyruleno==351); + /* (352) table_option_set ::= table_option (OPTIMIZED OUT) */ assert(yyruleno!=352); + /* (353) columnlist ::= columnlist COMMA columnname carglist */ yytestcase(yyruleno==353); + /* (354) columnlist ::= columnname carglist */ yytestcase(yyruleno==354); + /* (355) nm ::= ID|INDEXED */ yytestcase(yyruleno==355); + /* (356) nm ::= STRING */ yytestcase(yyruleno==356); + /* (357) nm ::= JOIN_KW */ yytestcase(yyruleno==357); + /* (358) typetoken ::= typename */ yytestcase(yyruleno==358); + /* (359) typename ::= ID|STRING */ yytestcase(yyruleno==359); + /* (360) signed ::= plus_num (OPTIMIZED OUT) */ assert(yyruleno!=360); + /* (361) signed ::= minus_num (OPTIMIZED OUT) */ assert(yyruleno!=361); + /* (362) carglist ::= carglist ccons */ yytestcase(yyruleno==362); + /* (363) carglist ::= */ yytestcase(yyruleno==363); + /* (364) ccons ::= NULL onconf */ yytestcase(yyruleno==364); + /* (365) ccons ::= GENERATED ALWAYS AS generated */ yytestcase(yyruleno==365); + /* (366) ccons ::= AS generated */ yytestcase(yyruleno==366); + /* (367) conslist_opt ::= COMMA conslist */ yytestcase(yyruleno==367); + /* (368) conslist ::= conslist tconscomma tcons */ yytestcase(yyruleno==368); + /* (369) conslist ::= tcons (OPTIMIZED OUT) */ assert(yyruleno!=369); + /* (370) tconscomma ::= */ yytestcase(yyruleno==370); + /* (371) defer_subclause_opt ::= defer_subclause (OPTIMIZED OUT) */ assert(yyruleno!=371); + /* (372) resolvetype ::= raisetype (OPTIMIZED OUT) */ assert(yyruleno!=372); + /* (373) selectnowith ::= oneselect (OPTIMIZED OUT) */ assert(yyruleno!=373); + /* (374) oneselect ::= values */ yytestcase(yyruleno==374); + /* (375) sclp ::= selcollist COMMA */ yytestcase(yyruleno==375); + /* (376) as ::= ID|STRING */ yytestcase(yyruleno==376); + /* (377) returning ::= */ yytestcase(yyruleno==377); + /* (378) expr ::= term (OPTIMIZED OUT) */ assert(yyruleno!=378); + /* (379) likeop ::= LIKE_KW|MATCH */ yytestcase(yyruleno==379); + /* (380) exprlist ::= nexprlist */ yytestcase(yyruleno==380); + /* (381) nmnum ::= plus_num (OPTIMIZED OUT) */ assert(yyruleno!=381); + /* (382) nmnum ::= nm (OPTIMIZED OUT) */ assert(yyruleno!=382); + /* (383) nmnum ::= ON */ yytestcase(yyruleno==383); + /* (384) nmnum ::= DELETE */ yytestcase(yyruleno==384); + /* (385) nmnum ::= DEFAULT */ yytestcase(yyruleno==385); + /* (386) plus_num ::= INTEGER|FLOAT */ yytestcase(yyruleno==386); + /* (387) foreach_clause ::= */ yytestcase(yyruleno==387); + /* (388) foreach_clause ::= FOR EACH ROW */ yytestcase(yyruleno==388); + /* (389) trnm ::= nm */ yytestcase(yyruleno==389); + /* (390) tridxby ::= */ yytestcase(yyruleno==390); + /* (391) database_kw_opt ::= DATABASE */ yytestcase(yyruleno==391); + /* (392) database_kw_opt ::= */ yytestcase(yyruleno==392); + /* (393) kwcolumn_opt ::= */ yytestcase(yyruleno==393); + /* (394) kwcolumn_opt ::= COLUMNKW */ yytestcase(yyruleno==394); + /* (395) vtabarglist ::= vtabarg */ yytestcase(yyruleno==395); + /* (396) vtabarglist ::= vtabarglist COMMA vtabarg */ yytestcase(yyruleno==396); + /* (397) vtabarg ::= vtabarg vtabargtoken */ yytestcase(yyruleno==397); + /* (398) anylist ::= */ yytestcase(yyruleno==398); + /* (399) anylist ::= anylist LP anylist RP */ yytestcase(yyruleno==399); + /* (400) anylist ::= anylist ANY */ yytestcase(yyruleno==400); + /* (401) with ::= */ yytestcase(yyruleno==401); + break; +/********** End reduce actions ************************************************/ + }; + assert( yyrulenoYY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) ); + + /* It is not possible for a REDUCE to be followed by an error */ + assert( yyact!=YY_ERROR_ACTION ); + + yymsp += yysize+1; + yypParser->yytos = yymsp; + yymsp->stateno = (YYACTIONTYPE)yyact; + yymsp->major = (YYCODETYPE)yygoto; + yyTraceShift(yypParser, yyact, "... then shift"); + return yyact; +} + +/* +** The following code executes when the parse fails +*/ +#ifndef YYNOERRORRECOVERY +static void yy_parse_failed( + yyParser *yypParser /* The parser */ +){ + sqlite3ParserARG_FETCH + sqlite3ParserCTX_FETCH +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + } +#endif + while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser fails */ +/************ Begin %parse_failure code ***************************************/ +/************ End %parse_failure code *****************************************/ + sqlite3ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ + sqlite3ParserCTX_STORE +} +#endif /* YYNOERRORRECOVERY */ + +/* +** The following code executes when a syntax error first occurs. +*/ +static void yy_syntax_error( + yyParser *yypParser, /* The parser */ + int yymajor, /* The major type of the error token */ + sqlite3ParserTOKENTYPE yyminor /* The minor type of the error token */ +){ + sqlite3ParserARG_FETCH + sqlite3ParserCTX_FETCH +#define TOKEN yyminor +/************ Begin %syntax_error code ****************************************/ + + UNUSED_PARAMETER(yymajor); /* Silence some compiler warnings */ + if( TOKEN.z[0] ){ + sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &TOKEN); + }else{ + sqlite3ErrorMsg(pParse, "incomplete input"); + } +/************ End %syntax_error code ******************************************/ + sqlite3ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ + sqlite3ParserCTX_STORE +} + +/* +** The following is executed when the parser accepts +*/ +static void yy_accept( + yyParser *yypParser /* The parser */ +){ + sqlite3ParserARG_FETCH + sqlite3ParserCTX_FETCH +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); + } +#endif +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +/*********** Begin %parse_accept code *****************************************/ +/*********** End %parse_accept code *******************************************/ + sqlite3ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ + sqlite3ParserCTX_STORE +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "sqlite3ParserAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
      +**
    • A pointer to the parser (an opaque structure.) +**
    • The major token number. +**
    • The minor token number. +**
    • An option argument of a grammar-specified type. +**
    +** +** Outputs: +** None. +*/ +SQLITE_PRIVATE void sqlite3Parser( + void *yyp, /* The parser */ + int yymajor, /* The major token code number */ + sqlite3ParserTOKENTYPE yyminor /* The value for the token */ + sqlite3ParserARG_PDECL /* Optional %extra_argument parameter */ +){ + YYMINORTYPE yyminorunion; + YYACTIONTYPE yyact; /* The parser action. */ +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + int yyendofinput; /* True if we are at the end of input */ +#endif +#ifdef YYERRORSYMBOL + int yyerrorhit = 0; /* True if yymajor has invoked an error */ +#endif + yyParser *yypParser = (yyParser*)yyp; /* The parser */ + sqlite3ParserCTX_FETCH + sqlite3ParserARG_STORE + + assert( yypParser->yytos!=0 ); +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + yyendofinput = (yymajor==0); +#endif + + yyact = yypParser->yytos->stateno; +#ifndef NDEBUG + if( yyTraceFILE ){ + if( yyact < YY_MIN_REDUCE ){ + fprintf(yyTraceFILE,"%sInput '%s' in state %d\n", + yyTracePrompt,yyTokenName[yymajor],yyact); + }else{ + fprintf(yyTraceFILE,"%sInput '%s' with pending reduce %d\n", + yyTracePrompt,yyTokenName[yymajor],yyact-YY_MIN_REDUCE); + } + } +#endif + + while(1){ /* Exit by "break" */ + assert( yypParser->yytos>=yypParser->yystack ); + assert( yyact==yypParser->yytos->stateno ); + yyact = yy_find_shift_action((YYCODETYPE)yymajor,yyact); + if( yyact >= YY_MIN_REDUCE ){ + unsigned int yyruleno = yyact - YY_MIN_REDUCE; /* Reduce by this rule */ +#ifndef NDEBUG + assert( yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ); + if( yyTraceFILE ){ + int yysize = yyRuleInfoNRhs[yyruleno]; + if( yysize ){ + fprintf(yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n", + yyTracePrompt, + yyruleno, yyRuleName[yyruleno], + yyrulenoyytos[yysize].stateno); + }else{ + fprintf(yyTraceFILE, "%sReduce %d [%s]%s.\n", + yyTracePrompt, yyruleno, yyRuleName[yyruleno], + yyrulenoyytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == + (int)(yypParser->yytos - yypParser->yystack)); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>=yypParser->yystackEnd ){ + yyStackOverflow(yypParser); + break; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){ + if( yyGrowStack(yypParser) ){ + yyStackOverflow(yypParser); + break; + } + } +#endif + } + yyact = yy_reduce(yypParser,yyruleno,yymajor,yyminor sqlite3ParserCTX_PARAM); + }else if( yyact <= YY_MAX_SHIFTREDUCE ){ + yy_shift(yypParser,yyact,(YYCODETYPE)yymajor,yyminor); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt--; +#endif + break; + }else if( yyact==YY_ACCEPT_ACTION ){ + yypParser->yytos--; + yy_accept(yypParser); + return; + }else{ + assert( yyact == YY_ERROR_ACTION ); + yyminorunion.yy0 = yyminor; +#ifdef YYERRORSYMBOL + int yymx; +#endif +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); + } +#endif +#ifdef YYERRORSYMBOL + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if( yypParser->yyerrcnt<0 ){ + yy_syntax_error(yypParser,yymajor,yyminor); + } + yymx = yypParser->yytos->major; + if( yymx==YYERRORSYMBOL || yyerrorhit ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sDiscard input token %s\n", + yyTracePrompt,yyTokenName[yymajor]); + } +#endif + yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion); + yymajor = YYNOCODE; + }else{ + while( yypParser->yytos > yypParser->yystack ){ + yyact = yy_find_reduce_action(yypParser->yytos->stateno, + YYERRORSYMBOL); + if( yyact<=YY_MAX_SHIFTREDUCE ) break; + yy_pop_parser_stack(yypParser); + } + if( yypParser->yytos <= yypParser->yystack || yymajor==0 ){ + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + yymajor = YYNOCODE; + }else if( yymx!=YYERRORSYMBOL ){ + yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor); + } + } + yypParser->yyerrcnt = 3; + yyerrorhit = 1; + if( yymajor==YYNOCODE ) break; + yyact = yypParser->yytos->stateno; +#elif defined(YYNOERRORRECOVERY) + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + yy_syntax_error(yypParser,yymajor, yyminor); + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + break; +#else /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if( yypParser->yyerrcnt<=0 ){ + yy_syntax_error(yypParser,yymajor, yyminor); + } + yypParser->yyerrcnt = 3; + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + if( yyendofinput ){ + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + } + break; +#endif + } + } +#ifndef NDEBUG + if( yyTraceFILE ){ + yyStackEntry *i; + char cDiv = '['; + fprintf(yyTraceFILE,"%sReturn. Stack=",yyTracePrompt); + for(i=&yypParser->yystack[1]; i<=yypParser->yytos; i++){ + fprintf(yyTraceFILE,"%c%s", cDiv, yyTokenName[i->major]); + cDiv = ' '; + } + fprintf(yyTraceFILE,"]\n"); + } +#endif + return; +} + +/* +** Return the fallback token corresponding to canonical token iToken, or +** 0 if iToken has no fallback. +*/ +SQLITE_PRIVATE int sqlite3ParserFallback(int iToken){ +#ifdef YYFALLBACK + assert( iToken<(int)(sizeof(yyFallback)/sizeof(yyFallback[0])) ); + return yyFallback[iToken]; +#else + (void)iToken; + return 0; +#endif +} + +/************** End of parse.c ***********************************************/ +/************** Begin file tokenize.c ****************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** An tokenizer for SQL +** +** This file contains C code that splits an SQL input string up into +** individual tokens and sends those tokens one-by-one over to the +** parser for analysis. +*/ +/* #include "sqliteInt.h" */ +/* #include */ + +/* Character classes for tokenizing +** +** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented +** using a lookup table, whereas a switch() directly on c uses a binary search. +** The lookup table is much faster. To maximize speed, and to ensure that +** a lookup table is used, all of the classes need to be small integers and +** all of them need to be used within the switch. +*/ +#define CC_X 0 /* The letter 'x', or start of BLOB literal */ +#define CC_KYWD0 1 /* First letter of a keyword */ +#define CC_KYWD 2 /* Alphabetics or '_'. Usable in a keyword */ +#define CC_DIGIT 3 /* Digits */ +#define CC_DOLLAR 4 /* '$' */ +#define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */ +#define CC_VARNUM 6 /* '?'. Numeric SQL variables */ +#define CC_SPACE 7 /* Space characters */ +#define CC_QUOTE 8 /* '"', '\'', or '`'. String literals, quoted ids */ +#define CC_QUOTE2 9 /* '['. [...] style quoted ids */ +#define CC_PIPE 10 /* '|'. Bitwise OR or concatenate */ +#define CC_MINUS 11 /* '-'. Minus or SQL-style comment */ +#define CC_LT 12 /* '<'. Part of < or <= or <> */ +#define CC_GT 13 /* '>'. Part of > or >= */ +#define CC_EQ 14 /* '='. Part of = or == */ +#define CC_BANG 15 /* '!'. Part of != */ +#define CC_SLASH 16 /* '/'. / or c-style comment */ +#define CC_LP 17 /* '(' */ +#define CC_RP 18 /* ')' */ +#define CC_SEMI 19 /* ';' */ +#define CC_PLUS 20 /* '+' */ +#define CC_STAR 21 /* '*' */ +#define CC_PERCENT 22 /* '%' */ +#define CC_COMMA 23 /* ',' */ +#define CC_AND 24 /* '&' */ +#define CC_TILDA 25 /* '~' */ +#define CC_DOT 26 /* '.' */ +#define CC_ID 27 /* unicode characters usable in IDs */ +#define CC_ILLEGAL 28 /* Illegal character */ +#define CC_NUL 29 /* 0x00 */ +#define CC_BOM 30 /* First byte of UTF8 BOM: 0xEF 0xBB 0xBF */ + +static const unsigned char aiClass[] = { +#ifdef SQLITE_ASCII +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ +/* 0x */ 29, 28, 28, 28, 28, 28, 28, 28, 28, 7, 7, 28, 7, 7, 28, 28, +/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16, +/* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6, +/* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 9, 28, 28, 28, 2, +/* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 28, 10, 28, 25, 28, +/* 8x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 9x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Ax */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Cx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Dx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Ex */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 30, +/* Fx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27 +#endif +#ifdef SQLITE_EBCDIC +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ +/* 0x */ 29, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 7, 7, 28, 28, +/* 1x */ 28, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 2x */ 28, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 3x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 4x */ 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 26, 12, 17, 20, 10, +/* 5x */ 24, 28, 28, 28, 28, 28, 28, 28, 28, 28, 15, 4, 21, 18, 19, 28, +/* 6x */ 11, 16, 28, 28, 28, 28, 28, 28, 28, 28, 28, 23, 22, 2, 13, 6, +/* 7x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 8, 5, 5, 5, 8, 14, 8, +/* 8x */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* 9x */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* Ax */ 28, 25, 1, 1, 1, 1, 1, 0, 2, 2, 28, 28, 28, 28, 28, 28, +/* Bx */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 9, 28, 28, 28, 28, 28, +/* Cx */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* Dx */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* Ex */ 28, 28, 1, 1, 1, 1, 1, 0, 2, 2, 28, 28, 28, 28, 28, 28, +/* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 28, 28, 28, 28, 28, 28, +#endif +}; + +/* +** The charMap() macro maps alphabetic characters (only) into their +** lower-case ASCII equivalent. On ASCII machines, this is just +** an upper-to-lower case map. On EBCDIC machines we also need +** to adjust the encoding. The mapping is only valid for alphabetics +** which are the only characters for which this feature is used. +** +** Used by keywordhash.h +*/ +#ifdef SQLITE_ASCII +# define charMap(X) sqlite3UpperToLower[(unsigned char)X] +#endif +#ifdef SQLITE_EBCDIC +# define charMap(X) ebcdicToAscii[(unsigned char)X] +const unsigned char ebcdicToAscii[] = { +/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ + 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ + 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ + 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ + 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ + 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ +}; +#endif + +/* +** The sqlite3KeywordCode function looks up an identifier to determine if +** it is a keyword. If it is a keyword, the token code of that keyword is +** returned. If the input is not a keyword, TK_ID is returned. +** +** The implementation of this routine was generated by a program, +** mkkeywordhash.c, located in the tool subdirectory of the distribution. +** The output of the mkkeywordhash.c program is written into a file +** named keywordhash.h and then included into this source file by +** the #include below. +*/ +/************** Include keywordhash.h in the middle of tokenize.c ************/ +/************** Begin file keywordhash.h *************************************/ +/***** This file contains automatically generated code ****** +** +** The code in this file has been automatically generated by +** +** sqlite/tool/mkkeywordhash.c +** +** The code in this file implements a function that determines whether +** or not a given identifier is really an SQL keyword. The same thing +** might be implemented more directly using a hand-written hash table. +** But by using this automatically generated code, the size of the code +** is substantially reduced. This is important for embedded applications +** on platforms with limited memory. +*/ +/* Hash score: 231 */ +/* zKWText[] encodes 1007 bytes of keyword text in 667 bytes */ +/* REINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXPLAINSTEADDATABASELECT */ +/* ABLEFTHENDEFERRABLELSEXCLUDELETEMPORARYISNULLSAVEPOINTERSECT */ +/* IESNOTNULLIKEXCEPTRANSACTIONATURALTERAISEXCLUSIVEXISTS */ +/* CONSTRAINTOFFSETRIGGERANGENERATEDETACHAVINGLOBEGINNEREFERENCES */ +/* UNIQUERYWITHOUTERELEASEATTACHBETWEENOTHINGROUPSCASCADEFAULT */ +/* CASECOLLATECREATECURRENT_DATEIMMEDIATEJOINSERTMATCHPLANALYZE */ +/* PRAGMATERIALIZEDEFERREDISTINCTUPDATEVALUESVIRTUALWAYSWHENWHERE */ +/* CURSIVEABORTAFTERENAMEANDROPARTITIONAUTOINCREMENTCASTCOLUMN */ +/* COMMITCONFLICTCROSSCURRENT_TIMESTAMPRECEDINGFAILASTFILTER */ +/* EPLACEFIRSTFOLLOWINGFROMFULLIMITIFORDERESTRICTOTHERSOVER */ +/* ETURNINGRIGHTROLLBACKROWSUNBOUNDEDUNIONUSINGVACUUMVIEWINDOWBY */ +/* INITIALLYPRIMARY */ +static const char zKWText[666] = { + 'R','E','I','N','D','E','X','E','D','E','S','C','A','P','E','A','C','H', + 'E','C','K','E','Y','B','E','F','O','R','E','I','G','N','O','R','E','G', + 'E','X','P','L','A','I','N','S','T','E','A','D','D','A','T','A','B','A', + 'S','E','L','E','C','T','A','B','L','E','F','T','H','E','N','D','E','F', + 'E','R','R','A','B','L','E','L','S','E','X','C','L','U','D','E','L','E', + 'T','E','M','P','O','R','A','R','Y','I','S','N','U','L','L','S','A','V', + 'E','P','O','I','N','T','E','R','S','E','C','T','I','E','S','N','O','T', + 'N','U','L','L','I','K','E','X','C','E','P','T','R','A','N','S','A','C', + 'T','I','O','N','A','T','U','R','A','L','T','E','R','A','I','S','E','X', + 'C','L','U','S','I','V','E','X','I','S','T','S','C','O','N','S','T','R', + 'A','I','N','T','O','F','F','S','E','T','R','I','G','G','E','R','A','N', + 'G','E','N','E','R','A','T','E','D','E','T','A','C','H','A','V','I','N', + 'G','L','O','B','E','G','I','N','N','E','R','E','F','E','R','E','N','C', + 'E','S','U','N','I','Q','U','E','R','Y','W','I','T','H','O','U','T','E', + 'R','E','L','E','A','S','E','A','T','T','A','C','H','B','E','T','W','E', + 'E','N','O','T','H','I','N','G','R','O','U','P','S','C','A','S','C','A', + 'D','E','F','A','U','L','T','C','A','S','E','C','O','L','L','A','T','E', + 'C','R','E','A','T','E','C','U','R','R','E','N','T','_','D','A','T','E', + 'I','M','M','E','D','I','A','T','E','J','O','I','N','S','E','R','T','M', + 'A','T','C','H','P','L','A','N','A','L','Y','Z','E','P','R','A','G','M', + 'A','T','E','R','I','A','L','I','Z','E','D','E','F','E','R','R','E','D', + 'I','S','T','I','N','C','T','U','P','D','A','T','E','V','A','L','U','E', + 'S','V','I','R','T','U','A','L','W','A','Y','S','W','H','E','N','W','H', + 'E','R','E','C','U','R','S','I','V','E','A','B','O','R','T','A','F','T', + 'E','R','E','N','A','M','E','A','N','D','R','O','P','A','R','T','I','T', + 'I','O','N','A','U','T','O','I','N','C','R','E','M','E','N','T','C','A', + 'S','T','C','O','L','U','M','N','C','O','M','M','I','T','C','O','N','F', + 'L','I','C','T','C','R','O','S','S','C','U','R','R','E','N','T','_','T', + 'I','M','E','S','T','A','M','P','R','E','C','E','D','I','N','G','F','A', + 'I','L','A','S','T','F','I','L','T','E','R','E','P','L','A','C','E','F', + 'I','R','S','T','F','O','L','L','O','W','I','N','G','F','R','O','M','F', + 'U','L','L','I','M','I','T','I','F','O','R','D','E','R','E','S','T','R', + 'I','C','T','O','T','H','E','R','S','O','V','E','R','E','T','U','R','N', + 'I','N','G','R','I','G','H','T','R','O','L','L','B','A','C','K','R','O', + 'W','S','U','N','B','O','U','N','D','E','D','U','N','I','O','N','U','S', + 'I','N','G','V','A','C','U','U','M','V','I','E','W','I','N','D','O','W', + 'B','Y','I','N','I','T','I','A','L','L','Y','P','R','I','M','A','R','Y', +}; +/* aKWHash[i] is the hash value for the i-th keyword */ +static const unsigned char aKWHash[127] = { + 84, 92, 134, 82, 105, 29, 0, 0, 94, 0, 85, 72, 0, + 53, 35, 86, 15, 0, 42, 97, 54, 89, 135, 19, 0, 0, + 140, 0, 40, 129, 0, 22, 107, 0, 9, 0, 0, 123, 80, + 0, 78, 6, 0, 65, 103, 147, 0, 136, 115, 0, 0, 48, + 0, 90, 24, 0, 17, 0, 27, 70, 23, 26, 5, 60, 142, + 110, 122, 0, 73, 91, 71, 145, 61, 120, 74, 0, 49, 0, + 11, 41, 0, 113, 0, 0, 0, 109, 10, 111, 116, 125, 14, + 50, 124, 0, 100, 0, 18, 121, 144, 56, 130, 139, 88, 83, + 37, 30, 126, 0, 0, 108, 51, 131, 128, 0, 34, 0, 0, + 132, 0, 98, 38, 39, 0, 20, 45, 117, 93, +}; +/* aKWNext[] forms the hash collision chain. If aKWHash[i]==0 +** then the i-th keyword has no more hash collisions. Otherwise, +** the next keyword with the same hash is aKWHash[i]-1. */ +static const unsigned char aKWNext[147] = { + 0, 0, 0, 0, 4, 0, 43, 0, 0, 106, 114, 0, 0, + 0, 2, 0, 0, 143, 0, 0, 0, 13, 0, 0, 0, 0, + 141, 0, 0, 119, 52, 0, 0, 137, 12, 0, 0, 62, 0, + 138, 0, 133, 0, 0, 36, 0, 0, 28, 77, 0, 0, 0, + 0, 59, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 69, 0, 0, 0, 0, 0, 146, 3, 0, 58, 0, 1, + 75, 0, 0, 0, 31, 0, 0, 0, 0, 0, 127, 0, 104, + 0, 64, 66, 63, 0, 0, 0, 0, 0, 46, 0, 16, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 101, 0, + 112, 21, 7, 67, 0, 79, 96, 118, 0, 0, 68, 0, 0, + 99, 44, 0, 55, 0, 76, 0, 95, 32, 33, 57, 25, 0, + 102, 0, 0, 87, +}; +/* aKWLen[i] is the length (in bytes) of the i-th keyword */ +static const unsigned char aKWLen[147] = { + 7, 7, 5, 4, 6, 4, 5, 3, 6, 7, 3, 6, 6, + 7, 7, 3, 8, 2, 6, 5, 4, 4, 3, 10, 4, 7, + 6, 9, 4, 2, 6, 5, 9, 9, 4, 7, 3, 2, 4, + 4, 6, 11, 6, 2, 7, 5, 5, 9, 6, 10, 4, 6, + 2, 3, 7, 5, 9, 6, 6, 4, 5, 5, 10, 6, 5, + 7, 4, 5, 7, 6, 7, 7, 6, 5, 7, 3, 7, 4, + 7, 6, 12, 9, 4, 6, 5, 4, 7, 6, 12, 8, 8, + 2, 6, 6, 7, 6, 4, 5, 9, 5, 5, 6, 3, 4, + 9, 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, 9, + 4, 4, 6, 7, 5, 9, 4, 4, 5, 2, 5, 8, 6, + 4, 9, 5, 8, 4, 3, 9, 5, 5, 6, 4, 6, 2, + 2, 9, 3, 7, +}; +/* aKWOffset[i] is the index into zKWText[] of the start of +** the text for the i-th keyword. */ +static const unsigned short int aKWOffset[147] = { + 0, 2, 2, 8, 9, 14, 16, 20, 23, 25, 25, 29, 33, + 36, 41, 46, 48, 53, 54, 59, 62, 65, 67, 69, 78, 81, + 86, 90, 90, 94, 99, 101, 105, 111, 119, 123, 123, 123, 126, + 129, 132, 137, 142, 146, 147, 152, 156, 160, 168, 174, 181, 184, + 184, 187, 189, 195, 198, 206, 211, 216, 219, 222, 226, 236, 239, + 244, 244, 248, 252, 259, 265, 271, 277, 277, 283, 284, 288, 295, + 299, 306, 312, 324, 333, 335, 341, 346, 348, 355, 359, 370, 377, + 378, 385, 391, 397, 402, 408, 412, 415, 424, 429, 433, 439, 441, + 444, 453, 455, 457, 466, 470, 476, 482, 490, 495, 495, 495, 511, + 520, 523, 527, 532, 539, 544, 553, 557, 560, 565, 567, 571, 579, + 585, 588, 597, 602, 610, 610, 614, 623, 628, 633, 639, 642, 645, + 648, 650, 655, 659, +}; +/* aKWCode[i] is the parser symbol code for the i-th keyword */ +static const unsigned char aKWCode[147] = { + TK_REINDEX, TK_INDEXED, TK_INDEX, TK_DESC, TK_ESCAPE, + TK_EACH, TK_CHECK, TK_KEY, TK_BEFORE, TK_FOREIGN, + TK_FOR, TK_IGNORE, TK_LIKE_KW, TK_EXPLAIN, TK_INSTEAD, + TK_ADD, TK_DATABASE, TK_AS, TK_SELECT, TK_TABLE, + TK_JOIN_KW, TK_THEN, TK_END, TK_DEFERRABLE, TK_ELSE, + TK_EXCLUDE, TK_DELETE, TK_TEMP, TK_TEMP, TK_OR, + TK_ISNULL, TK_NULLS, TK_SAVEPOINT, TK_INTERSECT, TK_TIES, + TK_NOTNULL, TK_NOT, TK_NO, TK_NULL, TK_LIKE_KW, + TK_EXCEPT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_JOIN_KW, + TK_ALTER, TK_RAISE, TK_EXCLUSIVE, TK_EXISTS, TK_CONSTRAINT, + TK_INTO, TK_OFFSET, TK_OF, TK_SET, TK_TRIGGER, + TK_RANGE, TK_GENERATED, TK_DETACH, TK_HAVING, TK_LIKE_KW, + TK_BEGIN, TK_JOIN_KW, TK_REFERENCES, TK_UNIQUE, TK_QUERY, + TK_WITHOUT, TK_WITH, TK_JOIN_KW, TK_RELEASE, TK_ATTACH, + TK_BETWEEN, TK_NOTHING, TK_GROUPS, TK_GROUP, TK_CASCADE, + TK_ASC, TK_DEFAULT, TK_CASE, TK_COLLATE, TK_CREATE, + TK_CTIME_KW, TK_IMMEDIATE, TK_JOIN, TK_INSERT, TK_MATCH, + TK_PLAN, TK_ANALYZE, TK_PRAGMA, TK_MATERIALIZED, TK_DEFERRED, + TK_DISTINCT, TK_IS, TK_UPDATE, TK_VALUES, TK_VIRTUAL, + TK_ALWAYS, TK_WHEN, TK_WHERE, TK_RECURSIVE, TK_ABORT, + TK_AFTER, TK_RENAME, TK_AND, TK_DROP, TK_PARTITION, + TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, TK_COLUMNKW, + TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, TK_CTIME_KW, + TK_CURRENT, TK_PRECEDING, TK_FAIL, TK_LAST, TK_FILTER, + TK_REPLACE, TK_FIRST, TK_FOLLOWING, TK_FROM, TK_JOIN_KW, + TK_LIMIT, TK_IF, TK_ORDER, TK_RESTRICT, TK_OTHERS, + TK_OVER, TK_RETURNING, TK_JOIN_KW, TK_ROLLBACK, TK_ROWS, + TK_ROW, TK_UNBOUNDED, TK_UNION, TK_USING, TK_VACUUM, + TK_VIEW, TK_WINDOW, TK_DO, TK_BY, TK_INITIALLY, + TK_ALL, TK_PRIMARY, +}; +/* Hash table decoded: +** 0: INSERT +** 1: IS +** 2: ROLLBACK TRIGGER +** 3: IMMEDIATE +** 4: PARTITION +** 5: TEMP +** 6: +** 7: +** 8: VALUES WITHOUT +** 9: +** 10: MATCH +** 11: NOTHING +** 12: +** 13: OF +** 14: TIES IGNORE +** 15: PLAN +** 16: INSTEAD INDEXED +** 17: +** 18: TRANSACTION RIGHT +** 19: WHEN +** 20: SET HAVING +** 21: MATERIALIZED IF +** 22: ROWS +** 23: SELECT +** 24: +** 25: +** 26: VACUUM SAVEPOINT +** 27: +** 28: LIKE UNION VIRTUAL REFERENCES +** 29: RESTRICT +** 30: +** 31: THEN REGEXP +** 32: TO +** 33: +** 34: BEFORE +** 35: +** 36: +** 37: FOLLOWING COLLATE CASCADE +** 38: CREATE +** 39: +** 40: CASE REINDEX +** 41: EACH +** 42: +** 43: QUERY +** 44: AND ADD +** 45: PRIMARY ANALYZE +** 46: +** 47: ROW ASC DETACH +** 48: CURRENT_TIME CURRENT_DATE +** 49: +** 50: +** 51: EXCLUSIVE TEMPORARY +** 52: +** 53: DEFERRED +** 54: DEFERRABLE +** 55: +** 56: DATABASE +** 57: +** 58: DELETE VIEW GENERATED +** 59: ATTACH +** 60: END +** 61: EXCLUDE +** 62: ESCAPE DESC +** 63: GLOB +** 64: WINDOW ELSE +** 65: COLUMN +** 66: FIRST +** 67: +** 68: GROUPS ALL +** 69: DISTINCT DROP KEY +** 70: BETWEEN +** 71: INITIALLY +** 72: BEGIN +** 73: FILTER CHECK ACTION +** 74: GROUP INDEX +** 75: +** 76: EXISTS DEFAULT +** 77: +** 78: FOR CURRENT_TIMESTAMP +** 79: EXCEPT +** 80: +** 81: CROSS +** 82: +** 83: +** 84: +** 85: CAST +** 86: FOREIGN AUTOINCREMENT +** 87: COMMIT +** 88: CURRENT AFTER ALTER +** 89: FULL FAIL CONFLICT +** 90: EXPLAIN +** 91: CONSTRAINT +** 92: FROM ALWAYS +** 93: +** 94: ABORT +** 95: +** 96: AS DO +** 97: REPLACE WITH RELEASE +** 98: BY RENAME +** 99: RANGE RAISE +** 100: OTHERS +** 101: USING NULLS +** 102: PRAGMA +** 103: JOIN ISNULL OFFSET +** 104: NOT +** 105: OR LAST LEFT +** 106: LIMIT +** 107: +** 108: +** 109: IN +** 110: INTO +** 111: OVER RECURSIVE +** 112: ORDER OUTER +** 113: +** 114: INTERSECT UNBOUNDED +** 115: +** 116: +** 117: RETURNING ON +** 118: +** 119: WHERE +** 120: NO INNER +** 121: NULL +** 122: +** 123: TABLE +** 124: NATURAL NOTNULL +** 125: PRECEDING +** 126: UPDATE UNIQUE +*/ +/* Check to see if z[0..n-1] is a keyword. If it is, write the +** parser symbol code for that keyword into *pType. Always +** return the integer n (the length of the token). */ +static int keywordCode(const char *z, int n, int *pType){ + int i, j; + const char *zKW; + if( n>=2 ){ + i = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n*1) % 127; + for(i=((int)aKWHash[i])-1; i>=0; i=((int)aKWNext[i])-1){ + if( aKWLen[i]!=n ) continue; + zKW = &zKWText[aKWOffset[i]]; +#ifdef SQLITE_ASCII + if( (z[0]&~0x20)!=zKW[0] ) continue; + if( (z[1]&~0x20)!=zKW[1] ) continue; + j = 2; + while( j=SQLITE_N_KEYWORD ) return SQLITE_ERROR; + *pzName = zKWText + aKWOffset[i]; + *pnName = aKWLen[i]; + return SQLITE_OK; +} +SQLITE_API int sqlite3_keyword_count(void){ return SQLITE_N_KEYWORD; } +SQLITE_API int sqlite3_keyword_check(const char *zName, int nName){ + return TK_ID!=sqlite3KeywordCode((const u8*)zName, nName); +} + +/************** End of keywordhash.h *****************************************/ +/************** Continuing where we left off in tokenize.c *******************/ + + +/* +** If X is a character that can be used in an identifier then +** IdChar(X) will be true. Otherwise it is false. +** +** For ASCII, any character with the high-order bit set is +** allowed in an identifier. For 7-bit characters, +** sqlite3IsIdChar[X] must be 1. +** +** For EBCDIC, the rules are more complex but have the same +** end result. +** +** Ticket #1066. the SQL standard does not allow '$' in the +** middle of identifiers. But many SQL implementations do. +** SQLite will allow '$' in identifiers for compatibility. +** But the feature is undocumented. +*/ +#ifdef SQLITE_ASCII +#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) +#endif +#ifdef SQLITE_EBCDIC +SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ +}; +#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) +#endif + +/* Make the IdChar function accessible from ctime.c and alter.c */ +SQLITE_PRIVATE int sqlite3IsIdChar(u8 c){ return IdChar(c); } + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** Return the id of the next token in string (*pz). Before returning, set +** (*pz) to point to the byte following the parsed token. +*/ +static int getToken(const unsigned char **pz){ + const unsigned char *z = *pz; + int t; /* Token type to return */ + do { + z += sqlite3GetToken(z, &t); + }while( t==TK_SPACE ); + if( t==TK_ID + || t==TK_STRING + || t==TK_JOIN_KW + || t==TK_WINDOW + || t==TK_OVER + || sqlite3ParserFallback(t)==TK_ID + ){ + t = TK_ID; + } + *pz = z; + return t; +} + +/* +** The following three functions are called immediately after the tokenizer +** reads the keywords WINDOW, OVER and FILTER, respectively, to determine +** whether the token should be treated as a keyword or an SQL identifier. +** This cannot be handled by the usual lemon %fallback method, due to +** the ambiguity in some constructions. e.g. +** +** SELECT sum(x) OVER ... +** +** In the above, "OVER" might be a keyword, or it might be an alias for the +** sum(x) expression. If a "%fallback ID OVER" directive were added to +** grammar, then SQLite would always treat "OVER" as an alias, making it +** impossible to call a window-function without a FILTER clause. +** +** WINDOW is treated as a keyword if: +** +** * the following token is an identifier, or a keyword that can fallback +** to being an identifier, and +** * the token after than one is TK_AS. +** +** OVER is a keyword if: +** +** * the previous token was TK_RP, and +** * the next token is either TK_LP or an identifier. +** +** FILTER is a keyword if: +** +** * the previous token was TK_RP, and +** * the next token is TK_LP. +*/ +static int analyzeWindowKeyword(const unsigned char *z){ + int t; + t = getToken(&z); + if( t!=TK_ID ) return TK_ID; + t = getToken(&z); + if( t!=TK_AS ) return TK_ID; + return TK_WINDOW; +} +static int analyzeOverKeyword(const unsigned char *z, int lastToken){ + if( lastToken==TK_RP ){ + int t = getToken(&z); + if( t==TK_LP || t==TK_ID ) return TK_OVER; + } + return TK_ID; +} +static int analyzeFilterKeyword(const unsigned char *z, int lastToken){ + if( lastToken==TK_RP && getToken(&z)==TK_LP ){ + return TK_FILTER; + } + return TK_ID; +} +#endif /* SQLITE_OMIT_WINDOWFUNC */ + +/* +** Return the length (in bytes) of the token that begins at z[0]. +** Store the token type in *tokenType before returning. +*/ +SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *z, int *tokenType){ + int i, c; + switch( aiClass[*z] ){ /* Switch on the character-class of the first byte + ** of the token. See the comment on the CC_ defines + ** above. */ + case CC_SPACE: { + testcase( z[0]==' ' ); + testcase( z[0]=='\t' ); + testcase( z[0]=='\n' ); + testcase( z[0]=='\f' ); + testcase( z[0]=='\r' ); + for(i=1; sqlite3Isspace(z[i]); i++){} + *tokenType = TK_SPACE; + return i; + } + case CC_MINUS: { + if( z[1]=='-' ){ + for(i=2; (c=z[i])!=0 && c!='\n'; i++){} + *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ + return i; + }else if( z[1]=='>' ){ + *tokenType = TK_PTR; + return 2 + (z[2]=='>'); + } + *tokenType = TK_MINUS; + return 1; + } + case CC_LP: { + *tokenType = TK_LP; + return 1; + } + case CC_RP: { + *tokenType = TK_RP; + return 1; + } + case CC_SEMI: { + *tokenType = TK_SEMI; + return 1; + } + case CC_PLUS: { + *tokenType = TK_PLUS; + return 1; + } + case CC_STAR: { + *tokenType = TK_STAR; + return 1; + } + case CC_SLASH: { + if( z[1]!='*' || z[2]==0 ){ + *tokenType = TK_SLASH; + return 1; + } + for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} + if( c ) i++; + *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ + return i; + } + case CC_PERCENT: { + *tokenType = TK_REM; + return 1; + } + case CC_EQ: { + *tokenType = TK_EQ; + return 1 + (z[1]=='='); + } + case CC_LT: { + if( (c=z[1])=='=' ){ + *tokenType = TK_LE; + return 2; + }else if( c=='>' ){ + *tokenType = TK_NE; + return 2; + }else if( c=='<' ){ + *tokenType = TK_LSHIFT; + return 2; + }else{ + *tokenType = TK_LT; + return 1; + } + } + case CC_GT: { + if( (c=z[1])=='=' ){ + *tokenType = TK_GE; + return 2; + }else if( c=='>' ){ + *tokenType = TK_RSHIFT; + return 2; + }else{ + *tokenType = TK_GT; + return 1; + } + } + case CC_BANG: { + if( z[1]!='=' ){ + *tokenType = TK_ILLEGAL; + return 1; + }else{ + *tokenType = TK_NE; + return 2; + } + } + case CC_PIPE: { + if( z[1]!='|' ){ + *tokenType = TK_BITOR; + return 1; + }else{ + *tokenType = TK_CONCAT; + return 2; + } + } + case CC_COMMA: { + *tokenType = TK_COMMA; + return 1; + } + case CC_AND: { + *tokenType = TK_BITAND; + return 1; + } + case CC_TILDA: { + *tokenType = TK_BITNOT; + return 1; + } + case CC_QUOTE: { + int delim = z[0]; + testcase( delim=='`' ); + testcase( delim=='\'' ); + testcase( delim=='"' ); + for(i=1; (c=z[i])!=0; i++){ + if( c==delim ){ + if( z[i+1]==delim ){ + i++; + }else{ + break; + } + } + } + if( c=='\'' ){ + *tokenType = TK_STRING; + return i+1; + }else if( c!=0 ){ + *tokenType = TK_ID; + return i+1; + }else{ + *tokenType = TK_ILLEGAL; + return i; + } + } + case CC_DOT: { +#ifndef SQLITE_OMIT_FLOATING_POINT + if( !sqlite3Isdigit(z[1]) ) +#endif + { + *tokenType = TK_DOT; + return 1; + } + /* If the next character is a digit, this is a floating point + ** number that begins with ".". Fall thru into the next case */ + /* no break */ deliberate_fall_through + } + case CC_DIGIT: { + testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); + testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); + testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); + testcase( z[0]=='9' ); + *tokenType = TK_INTEGER; +#ifndef SQLITE_OMIT_HEX_INTEGER + if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ + for(i=3; sqlite3Isxdigit(z[i]); i++){} + return i; + } +#endif + for(i=0; sqlite3Isdigit(z[i]); i++){} +#ifndef SQLITE_OMIT_FLOATING_POINT + if( z[i]=='.' ){ + i++; + while( sqlite3Isdigit(z[i]) ){ i++; } + *tokenType = TK_FLOAT; + } + if( (z[i]=='e' || z[i]=='E') && + ( sqlite3Isdigit(z[i+1]) + || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) + ) + ){ + i += 2; + while( sqlite3Isdigit(z[i]) ){ i++; } + *tokenType = TK_FLOAT; + } +#endif + while( IdChar(z[i]) ){ + *tokenType = TK_ILLEGAL; + i++; + } + return i; + } + case CC_QUOTE2: { + for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} + *tokenType = c==']' ? TK_ID : TK_ILLEGAL; + return i; + } + case CC_VARNUM: { + *tokenType = TK_VARIABLE; + for(i=1; sqlite3Isdigit(z[i]); i++){} + return i; + } + case CC_DOLLAR: + case CC_VARALPHA: { + int n = 0; + testcase( z[0]=='$' ); testcase( z[0]=='@' ); + testcase( z[0]==':' ); testcase( z[0]=='#' ); + *tokenType = TK_VARIABLE; + for(i=1; (c=z[i])!=0; i++){ + if( IdChar(c) ){ + n++; +#ifndef SQLITE_OMIT_TCL_VARIABLE + }else if( c=='(' && n>0 ){ + do{ + i++; + }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); + if( c==')' ){ + i++; + }else{ + *tokenType = TK_ILLEGAL; + } + break; + }else if( c==':' && z[i+1]==':' ){ + i++; +#endif + }else{ + break; + } + } + if( n==0 ) *tokenType = TK_ILLEGAL; + return i; + } + case CC_KYWD0: { + for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} + if( IdChar(z[i]) ){ + /* This token started out using characters that can appear in keywords, + ** but z[i] is a character not allowed within keywords, so this must + ** be an identifier instead */ + i++; + break; + } + *tokenType = TK_ID; + return keywordCode((char*)z, i, tokenType); + } + case CC_X: { +#ifndef SQLITE_OMIT_BLOB_LITERAL + testcase( z[0]=='x' ); testcase( z[0]=='X' ); + if( z[1]=='\'' ){ + *tokenType = TK_BLOB; + for(i=2; sqlite3Isxdigit(z[i]); i++){} + if( z[i]!='\'' || i%2 ){ + *tokenType = TK_ILLEGAL; + while( z[i] && z[i]!='\'' ){ i++; } + } + if( z[i] ) i++; + return i; + } +#endif + /* If it is not a BLOB literal, then it must be an ID, since no + ** SQL keywords start with the letter 'x'. Fall through */ + /* no break */ deliberate_fall_through + } + case CC_KYWD: + case CC_ID: { + i = 1; + break; + } + case CC_BOM: { + if( z[1]==0xbb && z[2]==0xbf ){ + *tokenType = TK_SPACE; + return 3; + } + i = 1; + break; + } + case CC_NUL: { + *tokenType = TK_ILLEGAL; + return 0; + } + default: { + *tokenType = TK_ILLEGAL; + return 1; + } + } + while( IdChar(z[i]) ){ i++; } + *tokenType = TK_ID; + return i; +} + +/* +** Run the parser on the given SQL string. +*/ +SQLITE_PRIVATE int sqlite3RunParser(Parse *pParse, const char *zSql){ + int nErr = 0; /* Number of errors encountered */ + void *pEngine; /* The LEMON-generated LALR(1) parser */ + int n = 0; /* Length of the next token token */ + int tokenType; /* type of the next token */ + int lastTokenParsed = -1; /* type of the previous token */ + sqlite3 *db = pParse->db; /* The database connection */ + int mxSqlLen; /* Max length of an SQL string */ + Parse *pParentParse = 0; /* Outer parse context, if any */ +#ifdef sqlite3Parser_ENGINEALWAYSONSTACK + yyParser sEngine; /* Space to hold the Lemon-generated Parser object */ +#endif + VVA_ONLY( u8 startedWithOom = db->mallocFailed ); + + assert( zSql!=0 ); + mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; + if( db->nVdbeActive==0 ){ + AtomicStore(&db->u1.isInterrupted, 0); + } + pParse->rc = SQLITE_OK; + pParse->zTail = zSql; +#ifdef SQLITE_DEBUG + if( db->flags & SQLITE_ParserTrace ){ + printf("parser: [[[%s]]]\n", zSql); + sqlite3ParserTrace(stdout, "parser: "); + }else{ + sqlite3ParserTrace(0, 0); + } +#endif +#ifdef sqlite3Parser_ENGINEALWAYSONSTACK + pEngine = &sEngine; + sqlite3ParserInit(pEngine, pParse); +#else + pEngine = sqlite3ParserAlloc(sqlite3Malloc, pParse); + if( pEngine==0 ){ + sqlite3OomFault(db); + return SQLITE_NOMEM_BKPT; + } +#endif + assert( pParse->pNewTable==0 ); + assert( pParse->pNewTrigger==0 ); + assert( pParse->nVar==0 ); + assert( pParse->pVList==0 ); + pParentParse = db->pParse; + db->pParse = pParse; + while( 1 ){ + n = sqlite3GetToken((u8*)zSql, &tokenType); + mxSqlLen -= n; + if( mxSqlLen<0 ){ + pParse->rc = SQLITE_TOOBIG; + break; + } +#ifndef SQLITE_OMIT_WINDOWFUNC + if( tokenType>=TK_WINDOW ){ + assert( tokenType==TK_SPACE || tokenType==TK_OVER || tokenType==TK_FILTER + || tokenType==TK_ILLEGAL || tokenType==TK_WINDOW + ); +#else + if( tokenType>=TK_SPACE ){ + assert( tokenType==TK_SPACE || tokenType==TK_ILLEGAL ); +#endif /* SQLITE_OMIT_WINDOWFUNC */ + if( AtomicLoad(&db->u1.isInterrupted) ){ + pParse->rc = SQLITE_INTERRUPT; + pParse->nErr++; + break; + } + if( tokenType==TK_SPACE ){ + zSql += n; + continue; + } + if( zSql[0]==0 ){ + /* Upon reaching the end of input, call the parser two more times + ** with tokens TK_SEMI and 0, in that order. */ + if( lastTokenParsed==TK_SEMI ){ + tokenType = 0; + }else if( lastTokenParsed==0 ){ + break; + }else{ + tokenType = TK_SEMI; + } + n = 0; +#ifndef SQLITE_OMIT_WINDOWFUNC + }else if( tokenType==TK_WINDOW ){ + assert( n==6 ); + tokenType = analyzeWindowKeyword((const u8*)&zSql[6]); + }else if( tokenType==TK_OVER ){ + assert( n==4 ); + tokenType = analyzeOverKeyword((const u8*)&zSql[4], lastTokenParsed); + }else if( tokenType==TK_FILTER ){ + assert( n==6 ); + tokenType = analyzeFilterKeyword((const u8*)&zSql[6], lastTokenParsed); +#endif /* SQLITE_OMIT_WINDOWFUNC */ + }else{ + Token x; + x.z = zSql; + x.n = n; + sqlite3ErrorMsg(pParse, "unrecognized token: \"%T\"", &x); + break; + } + } + pParse->sLastToken.z = zSql; + pParse->sLastToken.n = n; + sqlite3Parser(pEngine, tokenType, pParse->sLastToken); + lastTokenParsed = tokenType; + zSql += n; + assert( db->mallocFailed==0 || pParse->rc!=SQLITE_OK || startedWithOom ); + if( pParse->rc!=SQLITE_OK ) break; + } + assert( nErr==0 ); +#ifdef YYTRACKMAXSTACKDEPTH + sqlite3_mutex_enter(sqlite3MallocMutex()); + sqlite3StatusHighwater(SQLITE_STATUS_PARSER_STACK, + sqlite3ParserStackPeak(pEngine) + ); + sqlite3_mutex_leave(sqlite3MallocMutex()); +#endif /* YYDEBUG */ +#ifdef sqlite3Parser_ENGINEALWAYSONSTACK + sqlite3ParserFinalize(pEngine); +#else + sqlite3ParserFree(pEngine, sqlite3_free); +#endif + if( db->mallocFailed ){ + pParse->rc = SQLITE_NOMEM_BKPT; + } + if( pParse->zErrMsg || (pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE) ){ + if( pParse->zErrMsg==0 ){ + pParse->zErrMsg = sqlite3MPrintf(db, "%s", sqlite3ErrStr(pParse->rc)); + } + sqlite3_log(pParse->rc, "%s in \"%s\"", pParse->zErrMsg, pParse->zTail); + nErr++; + } + pParse->zTail = zSql; +#ifndef SQLITE_OMIT_VIRTUALTABLE + sqlite3_free(pParse->apVtabLock); +#endif + + if( pParse->pNewTable && !IN_SPECIAL_PARSE ){ + /* If the pParse->declareVtab flag is set, do not delete any table + ** structure built up in pParse->pNewTable. The calling code (see vtab.c) + ** will take responsibility for freeing the Table structure. + */ + sqlite3DeleteTable(db, pParse->pNewTable); + } + if( pParse->pNewTrigger && !IN_RENAME_OBJECT ){ + sqlite3DeleteTrigger(db, pParse->pNewTrigger); + } + sqlite3DbFree(db, pParse->pVList); + db->pParse = pParentParse; + assert( nErr==0 || pParse->rc!=SQLITE_OK ); + return nErr; +} + + +#ifdef SQLITE_ENABLE_NORMALIZE +/* +** Insert a single space character into pStr if the current string +** ends with an identifier +*/ +static void addSpaceSeparator(sqlite3_str *pStr){ + if( pStr->nChar && sqlite3IsIdChar(pStr->zText[pStr->nChar-1]) ){ + sqlite3_str_append(pStr, " ", 1); + } +} + +/* +** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return +** the normalization in space obtained from sqlite3DbMalloc(). Or return +** NULL if anything goes wrong or if zSql is NULL. +*/ +SQLITE_PRIVATE char *sqlite3Normalize( + Vdbe *pVdbe, /* VM being reprepared */ + const char *zSql /* The original SQL string */ +){ + sqlite3 *db; /* The database connection */ + int i; /* Next unread byte of zSql[] */ + int n; /* length of current token */ + int tokenType; /* type of current token */ + int prevType = 0; /* Previous non-whitespace token */ + int nParen; /* Number of nested levels of parentheses */ + int iStartIN; /* Start of RHS of IN operator in z[] */ + int nParenAtIN; /* Value of nParent at start of RHS of IN operator */ + u32 j; /* Bytes of normalized SQL generated so far */ + sqlite3_str *pStr; /* The normalized SQL string under construction */ + + db = sqlite3VdbeDb(pVdbe); + tokenType = -1; + nParen = iStartIN = nParenAtIN = 0; + pStr = sqlite3_str_new(db); + assert( pStr!=0 ); /* sqlite3_str_new() never returns NULL */ + for(i=0; zSql[i] && pStr->accError==0; i+=n){ + if( tokenType!=TK_SPACE ){ + prevType = tokenType; + } + n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType); + if( NEVER(n<=0) ) break; + switch( tokenType ){ + case TK_SPACE: { + break; + } + case TK_NULL: { + if( prevType==TK_IS || prevType==TK_NOT ){ + sqlite3_str_append(pStr, " NULL", 5); + break; + } + /* Fall through */ + } + case TK_STRING: + case TK_INTEGER: + case TK_FLOAT: + case TK_VARIABLE: + case TK_BLOB: { + sqlite3_str_append(pStr, "?", 1); + break; + } + case TK_LP: { + nParen++; + if( prevType==TK_IN ){ + iStartIN = pStr->nChar; + nParenAtIN = nParen; + } + sqlite3_str_append(pStr, "(", 1); + break; + } + case TK_RP: { + if( iStartIN>0 && nParen==nParenAtIN ){ + assert( pStr->nChar>=(u32)iStartIN ); + pStr->nChar = iStartIN+1; + sqlite3_str_append(pStr, "?,?,?", 5); + iStartIN = 0; + } + nParen--; + sqlite3_str_append(pStr, ")", 1); + break; + } + case TK_ID: { + iStartIN = 0; + j = pStr->nChar; + if( sqlite3Isquote(zSql[i]) ){ + char *zId = sqlite3DbStrNDup(db, zSql+i, n); + int nId; + int eType = 0; + if( zId==0 ) break; + sqlite3Dequote(zId); + if( zSql[i]=='"' && sqlite3VdbeUsesDoubleQuotedString(pVdbe, zId) ){ + sqlite3_str_append(pStr, "?", 1); + sqlite3DbFree(db, zId); + break; + } + nId = sqlite3Strlen30(zId); + if( sqlite3GetToken((u8*)zId, &eType)==nId && eType==TK_ID ){ + addSpaceSeparator(pStr); + sqlite3_str_append(pStr, zId, nId); + }else{ + sqlite3_str_appendf(pStr, "\"%w\"", zId); + } + sqlite3DbFree(db, zId); + }else{ + addSpaceSeparator(pStr); + sqlite3_str_append(pStr, zSql+i, n); + } + while( jnChar ){ + pStr->zText[j] = sqlite3Tolower(pStr->zText[j]); + j++; + } + break; + } + case TK_SELECT: { + iStartIN = 0; + /* fall through */ + } + default: { + if( sqlite3IsIdChar(zSql[i]) ) addSpaceSeparator(pStr); + j = pStr->nChar; + sqlite3_str_append(pStr, zSql+i, n); + while( jnChar ){ + pStr->zText[j] = sqlite3Toupper(pStr->zText[j]); + j++; + } + break; + } + } + } + if( tokenType!=TK_SEMI ) sqlite3_str_append(pStr, ";", 1); + return sqlite3_str_finish(pStr); +} +#endif /* SQLITE_ENABLE_NORMALIZE */ + +/************** End of tokenize.c ********************************************/ +/************** Begin file complete.c ****************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** An tokenizer for SQL +** +** This file contains C code that implements the sqlite3_complete() API. +** This code used to be part of the tokenizer.c source file. But by +** separating it out, the code will be automatically omitted from +** static links that do not use it. +*/ +/* #include "sqliteInt.h" */ +#ifndef SQLITE_OMIT_COMPLETE + +/* +** This is defined in tokenize.c. We just have to import the definition. +*/ +#ifndef SQLITE_AMALGAMATION +#ifdef SQLITE_ASCII +#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) +#endif +#ifdef SQLITE_EBCDIC +SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[]; +#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) +#endif +#endif /* SQLITE_AMALGAMATION */ + + +/* +** Token types used by the sqlite3_complete() routine. See the header +** comments on that procedure for additional information. +*/ +#define tkSEMI 0 +#define tkWS 1 +#define tkOTHER 2 +#ifndef SQLITE_OMIT_TRIGGER +#define tkEXPLAIN 3 +#define tkCREATE 4 +#define tkTEMP 5 +#define tkTRIGGER 6 +#define tkEND 7 +#endif + +/* +** Return TRUE if the given SQL string ends in a semicolon. +** +** Special handling is require for CREATE TRIGGER statements. +** Whenever the CREATE TRIGGER keywords are seen, the statement +** must end with ";END;". +** +** This implementation uses a state machine with 8 states: +** +** (0) INVALID We have not yet seen a non-whitespace character. +** +** (1) START At the beginning or end of an SQL statement. This routine +** returns 1 if it ends in the START state and 0 if it ends +** in any other state. +** +** (2) NORMAL We are in the middle of statement which ends with a single +** semicolon. +** +** (3) EXPLAIN The keyword EXPLAIN has been seen at the beginning of +** a statement. +** +** (4) CREATE The keyword CREATE has been seen at the beginning of a +** statement, possibly preceded by EXPLAIN and/or followed by +** TEMP or TEMPORARY +** +** (5) TRIGGER We are in the middle of a trigger definition that must be +** ended by a semicolon, the keyword END, and another semicolon. +** +** (6) SEMI We've seen the first semicolon in the ";END;" that occurs at +** the end of a trigger definition. +** +** (7) END We've seen the ";END" of the ";END;" that occurs at the end +** of a trigger definition. +** +** Transitions between states above are determined by tokens extracted +** from the input. The following tokens are significant: +** +** (0) tkSEMI A semicolon. +** (1) tkWS Whitespace. +** (2) tkOTHER Any other SQL token. +** (3) tkEXPLAIN The "explain" keyword. +** (4) tkCREATE The "create" keyword. +** (5) tkTEMP The "temp" or "temporary" keyword. +** (6) tkTRIGGER The "trigger" keyword. +** (7) tkEND The "end" keyword. +** +** Whitespace never causes a state transition and is always ignored. +** This means that a SQL string of all whitespace is invalid. +** +** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed +** to recognize the end of a trigger can be omitted. All we have to do +** is look for a semicolon that is not part of an string or comment. +*/ +SQLITE_API int sqlite3_complete(const char *zSql){ + u8 state = 0; /* Current state, using numbers defined in header comment */ + u8 token; /* Value of the next token */ + +#ifndef SQLITE_OMIT_TRIGGER + /* A complex statement machine used to detect the end of a CREATE TRIGGER + ** statement. This is the normal case. + */ + static const u8 trans[8][8] = { + /* Token: */ + /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ + /* 0 INVALID: */ { 1, 0, 2, 3, 4, 2, 2, 2, }, + /* 1 START: */ { 1, 1, 2, 3, 4, 2, 2, 2, }, + /* 2 NORMAL: */ { 1, 2, 2, 2, 2, 2, 2, 2, }, + /* 3 EXPLAIN: */ { 1, 3, 3, 2, 4, 2, 2, 2, }, + /* 4 CREATE: */ { 1, 4, 2, 2, 2, 4, 5, 2, }, + /* 5 TRIGGER: */ { 6, 5, 5, 5, 5, 5, 5, 5, }, + /* 6 SEMI: */ { 6, 6, 5, 5, 5, 5, 5, 7, }, + /* 7 END: */ { 1, 7, 5, 5, 5, 5, 5, 5, }, + }; +#else + /* If triggers are not supported by this compile then the statement machine + ** used to detect the end of a statement is much simpler + */ + static const u8 trans[3][3] = { + /* Token: */ + /* State: ** SEMI WS OTHER */ + /* 0 INVALID: */ { 1, 0, 2, }, + /* 1 START: */ { 1, 1, 2, }, + /* 2 NORMAL: */ { 1, 2, 2, }, + }; +#endif /* SQLITE_OMIT_TRIGGER */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( zSql==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + + while( *zSql ){ + switch( *zSql ){ + case ';': { /* A semicolon */ + token = tkSEMI; + break; + } + case ' ': + case '\r': + case '\t': + case '\n': + case '\f': { /* White space is ignored */ + token = tkWS; + break; + } + case '/': { /* C-style comments */ + if( zSql[1]!='*' ){ + token = tkOTHER; + break; + } + zSql += 2; + while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } + if( zSql[0]==0 ) return 0; + zSql++; + token = tkWS; + break; + } + case '-': { /* SQL-style comments from "--" to end of line */ + if( zSql[1]!='-' ){ + token = tkOTHER; + break; + } + while( *zSql && *zSql!='\n' ){ zSql++; } + if( *zSql==0 ) return state==1; + token = tkWS; + break; + } + case '[': { /* Microsoft-style identifiers in [...] */ + zSql++; + while( *zSql && *zSql!=']' ){ zSql++; } + if( *zSql==0 ) return 0; + token = tkOTHER; + break; + } + case '`': /* Grave-accent quoted symbols used by MySQL */ + case '"': /* single- and double-quoted strings */ + case '\'': { + int c = *zSql; + zSql++; + while( *zSql && *zSql!=c ){ zSql++; } + if( *zSql==0 ) return 0; + token = tkOTHER; + break; + } + default: { +#ifdef SQLITE_EBCDIC + unsigned char c; +#endif + if( IdChar((u8)*zSql) ){ + /* Keywords and unquoted identifiers */ + int nId; + for(nId=1; IdChar(zSql[nId]); nId++){} +#ifdef SQLITE_OMIT_TRIGGER + token = tkOTHER; +#else + switch( *zSql ){ + case 'c': case 'C': { + if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ + token = tkCREATE; + }else{ + token = tkOTHER; + } + break; + } + case 't': case 'T': { + if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ + token = tkTRIGGER; + }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ + token = tkTEMP; + }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ + token = tkTEMP; + }else{ + token = tkOTHER; + } + break; + } + case 'e': case 'E': { + if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ + token = tkEND; + }else +#ifndef SQLITE_OMIT_EXPLAIN + if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ + token = tkEXPLAIN; + }else +#endif + { + token = tkOTHER; + } + break; + } + default: { + token = tkOTHER; + break; + } + } +#endif /* SQLITE_OMIT_TRIGGER */ + zSql += nId-1; + }else{ + /* Operators and special symbols */ + token = tkOTHER; + } + break; + } + } + state = trans[state][token]; + zSql++; + } + return state==1; +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** This routine is the same as the sqlite3_complete() routine described +** above, except that the parameter is required to be UTF-16 encoded, not +** UTF-8. +*/ +SQLITE_API int sqlite3_complete16(const void *zSql){ + sqlite3_value *pVal; + char const *zSql8; + int rc; + +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + pVal = sqlite3ValueNew(0); + sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); + zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); + if( zSql8 ){ + rc = sqlite3_complete(zSql8); + }else{ + rc = SQLITE_NOMEM_BKPT; + } + sqlite3ValueFree(pVal); + return rc & 0xff; +} +#endif /* SQLITE_OMIT_UTF16 */ +#endif /* SQLITE_OMIT_COMPLETE */ + +/************** End of complete.c ********************************************/ +/************** Begin file main.c ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Main file for the SQLite library. The routines in this file +** implement the programmer interface to the library. Routines in +** other files are for internal use by SQLite and should not be +** accessed by users of the library. +*/ +/* #include "sqliteInt.h" */ + +#ifdef SQLITE_ENABLE_FTS3 +/************** Include fts3.h in the middle of main.c ***********************/ +/************** Begin file fts3.h ********************************************/ +/* +** 2006 Oct 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file is used by programs that want to link against the +** FTS3 library. All it does is declare the sqlite3Fts3Init() interface. +*/ +/* #include "sqlite3.h" */ + +#if 0 +extern "C" { +#endif /* __cplusplus */ + +SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db); + +#if 0 +} /* extern "C" */ +#endif /* __cplusplus */ + +/************** End of fts3.h ************************************************/ +/************** Continuing where we left off in main.c ***********************/ +#endif +#ifdef SQLITE_ENABLE_RTREE +/************** Include rtree.h in the middle of main.c **********************/ +/************** Begin file rtree.h *******************************************/ +/* +** 2008 May 26 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file is used by programs that want to link against the +** RTREE library. All it does is declare the sqlite3RtreeInit() interface. +*/ +/* #include "sqlite3.h" */ + +#ifdef SQLITE_OMIT_VIRTUALTABLE +# undef SQLITE_ENABLE_RTREE +#endif + +#if 0 +extern "C" { +#endif /* __cplusplus */ + +SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db); + +#if 0 +} /* extern "C" */ +#endif /* __cplusplus */ + +/************** End of rtree.h ***********************************************/ +/************** Continuing where we left off in main.c ***********************/ +#endif +#if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS) +/************** Include sqliteicu.h in the middle of main.c ******************/ +/************** Begin file sqliteicu.h ***************************************/ +/* +** 2008 May 26 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file is used by programs that want to link against the +** ICU extension. All it does is declare the sqlite3IcuInit() interface. +*/ +/* #include "sqlite3.h" */ + +#if 0 +extern "C" { +#endif /* __cplusplus */ + +SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db); + +#if 0 +} /* extern "C" */ +#endif /* __cplusplus */ + +/************** End of sqliteicu.h *******************************************/ +/************** Continuing where we left off in main.c ***********************/ +#endif + +/* +** This is an extension initializer that is a no-op and always +** succeeds, except that it fails if the fault-simulation is set +** to 500. +*/ +static int sqlite3TestExtInit(sqlite3 *db){ + (void)db; + return sqlite3FaultSim(500); +} + + +/* +** Forward declarations of external module initializer functions +** for modules that need them. +*/ +#ifdef SQLITE_ENABLE_FTS1 +SQLITE_PRIVATE int sqlite3Fts1Init(sqlite3*); +#endif +#ifdef SQLITE_ENABLE_FTS2 +SQLITE_PRIVATE int sqlite3Fts2Init(sqlite3*); +#endif +#ifdef SQLITE_ENABLE_FTS5 +SQLITE_PRIVATE int sqlite3Fts5Init(sqlite3*); +#endif +#ifdef SQLITE_ENABLE_STMTVTAB +SQLITE_PRIVATE int sqlite3StmtVtabInit(sqlite3*); +#endif + +/* +** An array of pointers to extension initializer functions for +** built-in extensions. +*/ +static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = { +#ifdef SQLITE_ENABLE_FTS1 + sqlite3Fts1Init, +#endif +#ifdef SQLITE_ENABLE_FTS2 + sqlite3Fts2Init, +#endif +#ifdef SQLITE_ENABLE_FTS3 + sqlite3Fts3Init, +#endif +#ifdef SQLITE_ENABLE_FTS5 + sqlite3Fts5Init, +#endif +#if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS) + sqlite3IcuInit, +#endif +#ifdef SQLITE_ENABLE_RTREE + sqlite3RtreeInit, +#endif +#ifdef SQLITE_ENABLE_DBPAGE_VTAB + sqlite3DbpageRegister, +#endif +#ifdef SQLITE_ENABLE_DBSTAT_VTAB + sqlite3DbstatRegister, +#endif + sqlite3TestExtInit, +#if !defined(SQLITE_OMIT_VIRTUALTABLE) && !defined(SQLITE_OMIT_JSON) + sqlite3JsonTableFunctions, +#endif +#ifdef SQLITE_ENABLE_STMTVTAB + sqlite3StmtVtabInit, +#endif +#ifdef SQLITE_ENABLE_BYTECODE_VTAB + sqlite3VdbeBytecodeVtabInit, +#endif +}; + +#ifndef SQLITE_AMALGAMATION +/* IMPLEMENTATION-OF: R-46656-45156 The sqlite3_version[] string constant +** contains the text of SQLITE_VERSION macro. +*/ +SQLITE_API const char sqlite3_version[] = SQLITE_VERSION; +#endif + +/* IMPLEMENTATION-OF: R-53536-42575 The sqlite3_libversion() function returns +** a pointer to the to the sqlite3_version[] string constant. +*/ +SQLITE_API const char *sqlite3_libversion(void){ return sqlite3_version; } + +/* IMPLEMENTATION-OF: R-25063-23286 The sqlite3_sourceid() function returns a +** pointer to a string constant whose value is the same as the +** SQLITE_SOURCE_ID C preprocessor macro. Except if SQLite is built using +** an edited copy of the amalgamation, then the last four characters of +** the hash might be different from SQLITE_SOURCE_ID. +*/ +/* SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } */ + +/* IMPLEMENTATION-OF: R-35210-63508 The sqlite3_libversion_number() function +** returns an integer equal to SQLITE_VERSION_NUMBER. +*/ +SQLITE_API int sqlite3_libversion_number(void){ return SQLITE_VERSION_NUMBER; } + +/* IMPLEMENTATION-OF: R-20790-14025 The sqlite3_threadsafe() function returns +** zero if and only if SQLite was compiled with mutexing code omitted due to +** the SQLITE_THREADSAFE compile-time option being set to 0. +*/ +SQLITE_API int sqlite3_threadsafe(void){ return SQLITE_THREADSAFE; } + +/* +** When compiling the test fixture or with debugging enabled (on Win32), +** this variable being set to non-zero will cause OSTRACE macros to emit +** extra diagnostic information. +*/ +#ifdef SQLITE_HAVE_OS_TRACE +# ifndef SQLITE_DEBUG_OS_TRACE +# define SQLITE_DEBUG_OS_TRACE 0 +# endif + int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE; +#endif + +#if !defined(SQLITE_OMIT_TRACE) && defined(SQLITE_ENABLE_IOTRACE) +/* +** If the following function pointer is not NULL and if +** SQLITE_ENABLE_IOTRACE is enabled, then messages describing +** I/O active are written using this function. These messages +** are intended for debugging activity only. +*/ +SQLITE_API void (SQLITE_CDECL *sqlite3IoTrace)(const char*, ...) = 0; +#endif + +/* +** If the following global variable points to a string which is the +** name of a directory, then that directory will be used to store +** temporary files. +** +** See also the "PRAGMA temp_store_directory" SQL command. +*/ +SQLITE_API char *sqlite3_temp_directory = 0; + +/* +** If the following global variable points to a string which is the +** name of a directory, then that directory will be used to store +** all database files specified with a relative pathname. +** +** See also the "PRAGMA data_store_directory" SQL command. +*/ +SQLITE_API char *sqlite3_data_directory = 0; + +/* +** Initialize SQLite. +** +** This routine must be called to initialize the memory allocation, +** VFS, and mutex subsystems prior to doing any serious work with +** SQLite. But as long as you do not compile with SQLITE_OMIT_AUTOINIT +** this routine will be called automatically by key routines such as +** sqlite3_open(). +** +** This routine is a no-op except on its very first call for the process, +** or for the first call after a call to sqlite3_shutdown. +** +** The first thread to call this routine runs the initialization to +** completion. If subsequent threads call this routine before the first +** thread has finished the initialization process, then the subsequent +** threads must block until the first thread finishes with the initialization. +** +** The first thread might call this routine recursively. Recursive +** calls to this routine should not block, of course. Otherwise the +** initialization process would never complete. +** +** Let X be the first thread to enter this routine. Let Y be some other +** thread. Then while the initial invocation of this routine by X is +** incomplete, it is required that: +** +** * Calls to this routine from Y must block until the outer-most +** call by X completes. +** +** * Recursive calls to this routine from thread X return immediately +** without blocking. +*/ +SQLITE_API int sqlite3_initialize(void){ + MUTEX_LOGIC( sqlite3_mutex *pMainMtx; ) /* The main static mutex */ + int rc; /* Result code */ +#ifdef SQLITE_EXTRA_INIT + int bRunExtraInit = 0; /* Extra initialization needed */ +#endif + +#ifdef SQLITE_OMIT_WSD + rc = sqlite3_wsd_init(4096, 24); + if( rc!=SQLITE_OK ){ + return rc; + } +#endif + + /* If the following assert() fails on some obscure processor/compiler + ** combination, the work-around is to set the correct pointer + ** size at compile-time using -DSQLITE_PTRSIZE=n compile-time option */ + assert( SQLITE_PTRSIZE==sizeof(char*) ); + + /* If SQLite is already completely initialized, then this call + ** to sqlite3_initialize() should be a no-op. But the initialization + ** must be complete. So isInit must not be set until the very end + ** of this routine. + */ + if( sqlite3GlobalConfig.isInit ){ + sqlite3MemoryBarrier(); + return SQLITE_OK; + } + + /* Make sure the mutex subsystem is initialized. If unable to + ** initialize the mutex subsystem, return early with the error. + ** If the system is so sick that we are unable to allocate a mutex, + ** there is not much SQLite is going to be able to do. + ** + ** The mutex subsystem must take care of serializing its own + ** initialization. + */ + rc = sqlite3MutexInit(); + if( rc ) return rc; + + /* Initialize the malloc() system and the recursive pInitMutex mutex. + ** This operation is protected by the STATIC_MAIN mutex. Note that + ** MutexAlloc() is called for a static mutex prior to initializing the + ** malloc subsystem - this implies that the allocation of a static + ** mutex must not require support from the malloc subsystem. + */ + MUTEX_LOGIC( pMainMtx = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); ) + sqlite3_mutex_enter(pMainMtx); + sqlite3GlobalConfig.isMutexInit = 1; + if( !sqlite3GlobalConfig.isMallocInit ){ + rc = sqlite3MallocInit(); + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.isMallocInit = 1; + if( !sqlite3GlobalConfig.pInitMutex ){ + sqlite3GlobalConfig.pInitMutex = + sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); + if( sqlite3GlobalConfig.bCoreMutex && !sqlite3GlobalConfig.pInitMutex ){ + rc = SQLITE_NOMEM_BKPT; + } + } + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.nRefInitMutex++; + } + sqlite3_mutex_leave(pMainMtx); + + /* If rc is not SQLITE_OK at this point, then either the malloc + ** subsystem could not be initialized or the system failed to allocate + ** the pInitMutex mutex. Return an error in either case. */ + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Do the rest of the initialization under the recursive mutex so + ** that we will be able to handle recursive calls into + ** sqlite3_initialize(). The recursive calls normally come through + ** sqlite3_os_init() when it invokes sqlite3_vfs_register(), but other + ** recursive calls might also be possible. + ** + ** IMPLEMENTATION-OF: R-00140-37445 SQLite automatically serializes calls + ** to the xInit method, so the xInit method need not be threadsafe. + ** + ** The following mutex is what serializes access to the appdef pcache xInit + ** methods. The sqlite3_pcache_methods.xInit() all is embedded in the + ** call to sqlite3PcacheInitialize(). + */ + sqlite3_mutex_enter(sqlite3GlobalConfig.pInitMutex); + if( sqlite3GlobalConfig.isInit==0 && sqlite3GlobalConfig.inProgress==0 ){ + sqlite3GlobalConfig.inProgress = 1; +#ifdef SQLITE_ENABLE_SQLLOG + { + extern void sqlite3_init_sqllog(void); + sqlite3_init_sqllog(); + } +#endif + memset(&sqlite3BuiltinFunctions, 0, sizeof(sqlite3BuiltinFunctions)); + sqlite3RegisterBuiltinFunctions(); + if( sqlite3GlobalConfig.isPCacheInit==0 ){ + rc = sqlite3PcacheInitialize(); + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.isPCacheInit = 1; + rc = sqlite3OsInit(); + } +#ifndef SQLITE_OMIT_DESERIALIZE + if( rc==SQLITE_OK ){ + rc = sqlite3MemdbInit(); + } +#endif + if( rc==SQLITE_OK ){ + sqlite3PCacheBufferSetup( sqlite3GlobalConfig.pPage, + sqlite3GlobalConfig.szPage, sqlite3GlobalConfig.nPage); + sqlite3MemoryBarrier(); + sqlite3GlobalConfig.isInit = 1; +#ifdef SQLITE_EXTRA_INIT + bRunExtraInit = 1; +#endif + } + sqlite3GlobalConfig.inProgress = 0; + } + sqlite3_mutex_leave(sqlite3GlobalConfig.pInitMutex); + + /* Go back under the static mutex and clean up the recursive + ** mutex to prevent a resource leak. + */ + sqlite3_mutex_enter(pMainMtx); + sqlite3GlobalConfig.nRefInitMutex--; + if( sqlite3GlobalConfig.nRefInitMutex<=0 ){ + assert( sqlite3GlobalConfig.nRefInitMutex==0 ); + sqlite3_mutex_free(sqlite3GlobalConfig.pInitMutex); + sqlite3GlobalConfig.pInitMutex = 0; + } + sqlite3_mutex_leave(pMainMtx); + + /* The following is just a sanity check to make sure SQLite has + ** been compiled correctly. It is important to run this code, but + ** we don't want to run it too often and soak up CPU cycles for no + ** reason. So we run it once during initialization. + */ +#ifndef NDEBUG +#ifndef SQLITE_OMIT_FLOATING_POINT + /* This section of code's only "output" is via assert() statements. */ + if( rc==SQLITE_OK ){ + u64 x = (((u64)1)<<63)-1; + double y; + assert(sizeof(x)==8); + assert(sizeof(x)==sizeof(y)); + memcpy(&y, &x, 8); + assert( sqlite3IsNaN(y) ); + } +#endif +#endif + + /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT + ** compile-time option. + */ +#ifdef SQLITE_EXTRA_INIT + if( bRunExtraInit ){ + int SQLITE_EXTRA_INIT(const char*); + rc = SQLITE_EXTRA_INIT(0); + } +#endif + + return rc; +} + +/* +** Undo the effects of sqlite3_initialize(). Must not be called while +** there are outstanding database connections or memory allocations or +** while any part of SQLite is otherwise in use in any thread. This +** routine is not threadsafe. But it is safe to invoke this routine +** on when SQLite is already shut down. If SQLite is already shut down +** when this routine is invoked, then this routine is a harmless no-op. +*/ +SQLITE_API int sqlite3_shutdown(void){ +#ifdef SQLITE_OMIT_WSD + int rc = sqlite3_wsd_init(4096, 24); + if( rc!=SQLITE_OK ){ + return rc; + } +#endif + + if( sqlite3GlobalConfig.isInit ){ +#ifdef SQLITE_EXTRA_SHUTDOWN + void SQLITE_EXTRA_SHUTDOWN(void); + SQLITE_EXTRA_SHUTDOWN(); +#endif + sqlite3_os_end(); + sqlite3_reset_auto_extension(); + sqlite3GlobalConfig.isInit = 0; + } + if( sqlite3GlobalConfig.isPCacheInit ){ + sqlite3PcacheShutdown(); + sqlite3GlobalConfig.isPCacheInit = 0; + } + if( sqlite3GlobalConfig.isMallocInit ){ + sqlite3MallocEnd(); + sqlite3GlobalConfig.isMallocInit = 0; + +#ifndef SQLITE_OMIT_SHUTDOWN_DIRECTORIES + /* The heap subsystem has now been shutdown and these values are supposed + ** to be NULL or point to memory that was obtained from sqlite3_malloc(), + ** which would rely on that heap subsystem; therefore, make sure these + ** values cannot refer to heap memory that was just invalidated when the + ** heap subsystem was shutdown. This is only done if the current call to + ** this function resulted in the heap subsystem actually being shutdown. + */ + sqlite3_data_directory = 0; + sqlite3_temp_directory = 0; +#endif + } + if( sqlite3GlobalConfig.isMutexInit ){ + sqlite3MutexEnd(); + sqlite3GlobalConfig.isMutexInit = 0; + } + + return SQLITE_OK; +} + +/* +** This API allows applications to modify the global configuration of +** the SQLite library at run-time. +** +** This routine should only be called when there are no outstanding +** database connections or memory allocations. This routine is not +** threadsafe. Failure to heed these warnings can lead to unpredictable +** behavior. +*/ +SQLITE_API int sqlite3_config(int op, ...){ + va_list ap; + int rc = SQLITE_OK; + + /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while + ** the SQLite library is in use. */ + if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT; + + va_start(ap, op); + switch( op ){ + + /* Mutex configuration options are only available in a threadsafe + ** compile. + */ +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-54466-46756 */ + case SQLITE_CONFIG_SINGLETHREAD: { + /* EVIDENCE-OF: R-02748-19096 This option sets the threading mode to + ** Single-thread. */ + sqlite3GlobalConfig.bCoreMutex = 0; /* Disable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-20520-54086 */ + case SQLITE_CONFIG_MULTITHREAD: { + /* EVIDENCE-OF: R-14374-42468 This option sets the threading mode to + ** Multi-thread. */ + sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-59593-21810 */ + case SQLITE_CONFIG_SERIALIZED: { + /* EVIDENCE-OF: R-41220-51800 This option sets the threading mode to + ** Serialized. */ + sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 1; /* Enable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-63666-48755 */ + case SQLITE_CONFIG_MUTEX: { + /* Specify an alternative mutex implementation */ + sqlite3GlobalConfig.mutex = *va_arg(ap, sqlite3_mutex_methods*); + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-14450-37597 */ + case SQLITE_CONFIG_GETMUTEX: { + /* Retrieve the current mutex implementation */ + *va_arg(ap, sqlite3_mutex_methods*) = sqlite3GlobalConfig.mutex; + break; + } +#endif + + case SQLITE_CONFIG_MALLOC: { + /* EVIDENCE-OF: R-55594-21030 The SQLITE_CONFIG_MALLOC option takes a + ** single argument which is a pointer to an instance of the + ** sqlite3_mem_methods structure. The argument specifies alternative + ** low-level memory allocation routines to be used in place of the memory + ** allocation routines built into SQLite. */ + sqlite3GlobalConfig.m = *va_arg(ap, sqlite3_mem_methods*); + break; + } + case SQLITE_CONFIG_GETMALLOC: { + /* EVIDENCE-OF: R-51213-46414 The SQLITE_CONFIG_GETMALLOC option takes a + ** single argument which is a pointer to an instance of the + ** sqlite3_mem_methods structure. The sqlite3_mem_methods structure is + ** filled with the currently defined memory allocation routines. */ + if( sqlite3GlobalConfig.m.xMalloc==0 ) sqlite3MemSetDefault(); + *va_arg(ap, sqlite3_mem_methods*) = sqlite3GlobalConfig.m; + break; + } + case SQLITE_CONFIG_MEMSTATUS: { + /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes + ** single argument of type int, interpreted as a boolean, which enables + ** or disables the collection of memory allocation statistics. */ + sqlite3GlobalConfig.bMemstat = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_SMALL_MALLOC: { + sqlite3GlobalConfig.bSmallMalloc = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_PAGECACHE: { + /* EVIDENCE-OF: R-18761-36601 There are three arguments to + ** SQLITE_CONFIG_PAGECACHE: A pointer to 8-byte aligned memory (pMem), + ** the size of each page cache line (sz), and the number of cache lines + ** (N). */ + sqlite3GlobalConfig.pPage = va_arg(ap, void*); + sqlite3GlobalConfig.szPage = va_arg(ap, int); + sqlite3GlobalConfig.nPage = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_PCACHE_HDRSZ: { + /* EVIDENCE-OF: R-39100-27317 The SQLITE_CONFIG_PCACHE_HDRSZ option takes + ** a single parameter which is a pointer to an integer and writes into + ** that integer the number of extra bytes per page required for each page + ** in SQLITE_CONFIG_PAGECACHE. */ + *va_arg(ap, int*) = + sqlite3HeaderSizeBtree() + + sqlite3HeaderSizePcache() + + sqlite3HeaderSizePcache1(); + break; + } + + case SQLITE_CONFIG_PCACHE: { + /* no-op */ + break; + } + case SQLITE_CONFIG_GETPCACHE: { + /* now an error */ + rc = SQLITE_ERROR; + break; + } + + case SQLITE_CONFIG_PCACHE2: { + /* EVIDENCE-OF: R-63325-48378 The SQLITE_CONFIG_PCACHE2 option takes a + ** single argument which is a pointer to an sqlite3_pcache_methods2 + ** object. This object specifies the interface to a custom page cache + ** implementation. */ + sqlite3GlobalConfig.pcache2 = *va_arg(ap, sqlite3_pcache_methods2*); + break; + } + case SQLITE_CONFIG_GETPCACHE2: { + /* EVIDENCE-OF: R-22035-46182 The SQLITE_CONFIG_GETPCACHE2 option takes a + ** single argument which is a pointer to an sqlite3_pcache_methods2 + ** object. SQLite copies of the current page cache implementation into + ** that object. */ + if( sqlite3GlobalConfig.pcache2.xInit==0 ){ + sqlite3PCacheSetDefault(); + } + *va_arg(ap, sqlite3_pcache_methods2*) = sqlite3GlobalConfig.pcache2; + break; + } + +/* EVIDENCE-OF: R-06626-12911 The SQLITE_CONFIG_HEAP option is only +** available if SQLite is compiled with either SQLITE_ENABLE_MEMSYS3 or +** SQLITE_ENABLE_MEMSYS5 and returns SQLITE_ERROR if invoked otherwise. */ +#if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5) + case SQLITE_CONFIG_HEAP: { + /* EVIDENCE-OF: R-19854-42126 There are three arguments to + ** SQLITE_CONFIG_HEAP: An 8-byte aligned pointer to the memory, the + ** number of bytes in the memory buffer, and the minimum allocation size. + */ + sqlite3GlobalConfig.pHeap = va_arg(ap, void*); + sqlite3GlobalConfig.nHeap = va_arg(ap, int); + sqlite3GlobalConfig.mnReq = va_arg(ap, int); + + if( sqlite3GlobalConfig.mnReq<1 ){ + sqlite3GlobalConfig.mnReq = 1; + }else if( sqlite3GlobalConfig.mnReq>(1<<12) ){ + /* cap min request size at 2^12 */ + sqlite3GlobalConfig.mnReq = (1<<12); + } + + if( sqlite3GlobalConfig.pHeap==0 ){ + /* EVIDENCE-OF: R-49920-60189 If the first pointer (the memory pointer) + ** is NULL, then SQLite reverts to using its default memory allocator + ** (the system malloc() implementation), undoing any prior invocation of + ** SQLITE_CONFIG_MALLOC. + ** + ** Setting sqlite3GlobalConfig.m to all zeros will cause malloc to + ** revert to its default implementation when sqlite3_initialize() is run + */ + memset(&sqlite3GlobalConfig.m, 0, sizeof(sqlite3GlobalConfig.m)); + }else{ + /* EVIDENCE-OF: R-61006-08918 If the memory pointer is not NULL then the + ** alternative memory allocator is engaged to handle all of SQLites + ** memory allocation needs. */ +#ifdef SQLITE_ENABLE_MEMSYS3 + sqlite3GlobalConfig.m = *sqlite3MemGetMemsys3(); +#endif +#ifdef SQLITE_ENABLE_MEMSYS5 + sqlite3GlobalConfig.m = *sqlite3MemGetMemsys5(); +#endif + } + break; + } +#endif + + case SQLITE_CONFIG_LOOKASIDE: { + sqlite3GlobalConfig.szLookaside = va_arg(ap, int); + sqlite3GlobalConfig.nLookaside = va_arg(ap, int); + break; + } + + /* Record a pointer to the logger function and its first argument. + ** The default is NULL. Logging is disabled if the function pointer is + ** NULL. + */ + case SQLITE_CONFIG_LOG: { + /* MSVC is picky about pulling func ptrs from va lists. + ** http://support.microsoft.com/kb/47961 + ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*)); + */ + typedef void(*LOGFUNC_t)(void*,int,const char*); + sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t); + sqlite3GlobalConfig.pLogArg = va_arg(ap, void*); + break; + } + + /* EVIDENCE-OF: R-55548-33817 The compile-time setting for URI filenames + ** can be changed at start-time using the + ** sqlite3_config(SQLITE_CONFIG_URI,1) or + ** sqlite3_config(SQLITE_CONFIG_URI,0) configuration calls. + */ + case SQLITE_CONFIG_URI: { + /* EVIDENCE-OF: R-25451-61125 The SQLITE_CONFIG_URI option takes a single + ** argument of type int. If non-zero, then URI handling is globally + ** enabled. If the parameter is zero, then URI handling is globally + ** disabled. */ + sqlite3GlobalConfig.bOpenUri = va_arg(ap, int); + break; + } + + case SQLITE_CONFIG_COVERING_INDEX_SCAN: { + /* EVIDENCE-OF: R-36592-02772 The SQLITE_CONFIG_COVERING_INDEX_SCAN + ** option takes a single integer argument which is interpreted as a + ** boolean in order to enable or disable the use of covering indices for + ** full table scans in the query optimizer. */ + sqlite3GlobalConfig.bUseCis = va_arg(ap, int); + break; + } + +#ifdef SQLITE_ENABLE_SQLLOG + case SQLITE_CONFIG_SQLLOG: { + typedef void(*SQLLOGFUNC_t)(void*, sqlite3*, const char*, int); + sqlite3GlobalConfig.xSqllog = va_arg(ap, SQLLOGFUNC_t); + sqlite3GlobalConfig.pSqllogArg = va_arg(ap, void *); + break; + } +#endif + + case SQLITE_CONFIG_MMAP_SIZE: { + /* EVIDENCE-OF: R-58063-38258 SQLITE_CONFIG_MMAP_SIZE takes two 64-bit + ** integer (sqlite3_int64) values that are the default mmap size limit + ** (the default setting for PRAGMA mmap_size) and the maximum allowed + ** mmap size limit. */ + sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64); + sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); + /* EVIDENCE-OF: R-53367-43190 If either argument to this option is + ** negative, then that argument is changed to its compile-time default. + ** + ** EVIDENCE-OF: R-34993-45031 The maximum allowed mmap size will be + ** silently truncated if necessary so that it does not exceed the + ** compile-time maximum mmap size set by the SQLITE_MAX_MMAP_SIZE + ** compile-time option. + */ + if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){ + mxMmap = SQLITE_MAX_MMAP_SIZE; + } + if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE; + if( szMmap>mxMmap) szMmap = mxMmap; + sqlite3GlobalConfig.mxMmap = mxMmap; + sqlite3GlobalConfig.szMmap = szMmap; + break; + } + +#if SQLITE_OS_WIN && defined(SQLITE_WIN32_MALLOC) /* IMP: R-04780-55815 */ + case SQLITE_CONFIG_WIN32_HEAPSIZE: { + /* EVIDENCE-OF: R-34926-03360 SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit + ** unsigned integer value that specifies the maximum size of the created + ** heap. */ + sqlite3GlobalConfig.nHeap = va_arg(ap, int); + break; + } +#endif + + case SQLITE_CONFIG_PMASZ: { + sqlite3GlobalConfig.szPma = va_arg(ap, unsigned int); + break; + } + + case SQLITE_CONFIG_STMTJRNL_SPILL: { + sqlite3GlobalConfig.nStmtSpill = va_arg(ap, int); + break; + } + +#ifdef SQLITE_ENABLE_SORTER_REFERENCES + case SQLITE_CONFIG_SORTERREF_SIZE: { + int iVal = va_arg(ap, int); + if( iVal<0 ){ + iVal = SQLITE_DEFAULT_SORTERREF_SIZE; + } + sqlite3GlobalConfig.szSorterRef = (u32)iVal; + break; + } +#endif /* SQLITE_ENABLE_SORTER_REFERENCES */ + +#ifndef SQLITE_OMIT_DESERIALIZE + case SQLITE_CONFIG_MEMDB_MAXSIZE: { + sqlite3GlobalConfig.mxMemdbSize = va_arg(ap, sqlite3_int64); + break; + } +#endif /* SQLITE_OMIT_DESERIALIZE */ + + default: { + rc = SQLITE_ERROR; + break; + } + } + va_end(ap); + return rc; +} + +/* +** Set up the lookaside buffers for a database connection. +** Return SQLITE_OK on success. +** If lookaside is already active, return SQLITE_BUSY. +** +** The sz parameter is the number of bytes in each lookaside slot. +** The cnt parameter is the number of slots. If pStart is NULL the +** space for the lookaside memory is obtained from sqlite3_malloc(). +** If pStart is not NULL then it is sz*cnt bytes of memory to use for +** the lookaside memory. +*/ +static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){ +#ifndef SQLITE_OMIT_LOOKASIDE + void *pStart; + sqlite3_int64 szAlloc = sz*(sqlite3_int64)cnt; + int nBig; /* Number of full-size slots */ + int nSm; /* Number smaller LOOKASIDE_SMALL-byte slots */ + + if( sqlite3LookasideUsed(db,0)>0 ){ + return SQLITE_BUSY; + } + /* Free any existing lookaside buffer for this handle before + ** allocating a new one so we don't have to have space for + ** both at the same time. + */ + if( db->lookaside.bMalloced ){ + sqlite3_free(db->lookaside.pStart); + } + /* The size of a lookaside slot after ROUNDDOWN8 needs to be larger + ** than a pointer to be useful. + */ + sz = ROUNDDOWN8(sz); /* IMP: R-33038-09382 */ + if( sz<=(int)sizeof(LookasideSlot*) ) sz = 0; + if( cnt<0 ) cnt = 0; + if( sz==0 || cnt==0 ){ + sz = 0; + pStart = 0; + }else if( pBuf==0 ){ + sqlite3BeginBenignMalloc(); + pStart = sqlite3Malloc( szAlloc ); /* IMP: R-61949-35727 */ + sqlite3EndBenignMalloc(); + if( pStart ) szAlloc = sqlite3MallocSize(pStart); + }else{ + pStart = pBuf; + } +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + if( sz>=LOOKASIDE_SMALL*3 ){ + nBig = szAlloc/(3*LOOKASIDE_SMALL+sz); + nSm = (szAlloc - sz*nBig)/LOOKASIDE_SMALL; + }else if( sz>=LOOKASIDE_SMALL*2 ){ + nBig = szAlloc/(LOOKASIDE_SMALL+sz); + nSm = (szAlloc - sz*nBig)/LOOKASIDE_SMALL; + }else +#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */ + if( sz>0 ){ + nBig = szAlloc/sz; + nSm = 0; + }else{ + nBig = nSm = 0; + } + db->lookaside.pStart = pStart; + db->lookaside.pInit = 0; + db->lookaside.pFree = 0; + db->lookaside.sz = (u16)sz; + db->lookaside.szTrue = (u16)sz; + if( pStart ){ + int i; + LookasideSlot *p; + assert( sz > (int)sizeof(LookasideSlot*) ); + p = (LookasideSlot*)pStart; + for(i=0; ipNext = db->lookaside.pInit; + db->lookaside.pInit = p; + p = (LookasideSlot*)&((u8*)p)[sz]; + } +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + db->lookaside.pSmallInit = 0; + db->lookaside.pSmallFree = 0; + db->lookaside.pMiddle = p; + for(i=0; ipNext = db->lookaside.pSmallInit; + db->lookaside.pSmallInit = p; + p = (LookasideSlot*)&((u8*)p)[LOOKASIDE_SMALL]; + } +#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */ + assert( ((uptr)p)<=szAlloc + (uptr)pStart ); + db->lookaside.pEnd = p; + db->lookaside.bDisable = 0; + db->lookaside.bMalloced = pBuf==0 ?1:0; + db->lookaside.nSlot = nBig+nSm; + }else{ + db->lookaside.pStart = db; +#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE + db->lookaside.pSmallInit = 0; + db->lookaside.pSmallFree = 0; + db->lookaside.pMiddle = db; +#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */ + db->lookaside.pEnd = db; + db->lookaside.bDisable = 1; + db->lookaside.sz = 0; + db->lookaside.bMalloced = 0; + db->lookaside.nSlot = 0; + } + assert( sqlite3LookasideUsed(db,0)==0 ); +#endif /* SQLITE_OMIT_LOOKASIDE */ + return SQLITE_OK; +} + +/* +** Return the mutex associated with a database connection. +*/ +SQLITE_API sqlite3_mutex *sqlite3_db_mutex(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return db->mutex; +} + +/* +** Free up as much memory as we can from the given database +** connection. +*/ +SQLITE_API int sqlite3_db_release_memory(sqlite3 *db){ + int i; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + Pager *pPager = sqlite3BtreePager(pBt); + sqlite3PagerShrink(pPager); + } + } + sqlite3BtreeLeaveAll(db); + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +/* +** Flush any dirty pages in the pager-cache for any attached database +** to disk. +*/ +SQLITE_API int sqlite3_db_cacheflush(sqlite3 *db){ + int i; + int rc = SQLITE_OK; + int bSeenBusy = 0; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeEnterAll(db); + for(i=0; rc==SQLITE_OK && inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt && sqlite3BtreeTxnState(pBt)==SQLITE_TXN_WRITE ){ + Pager *pPager = sqlite3BtreePager(pBt); + rc = sqlite3PagerFlush(pPager); + if( rc==SQLITE_BUSY ){ + bSeenBusy = 1; + rc = SQLITE_OK; + } + } + } + sqlite3BtreeLeaveAll(db); + sqlite3_mutex_leave(db->mutex); + return ((rc==SQLITE_OK && bSeenBusy) ? SQLITE_BUSY : rc); +} + +/* +** Configuration settings for an individual database connection +*/ +SQLITE_API int sqlite3_db_config(sqlite3 *db, int op, ...){ + va_list ap; + int rc; + va_start(ap, op); + switch( op ){ + case SQLITE_DBCONFIG_MAINDBNAME: { + /* IMP: R-06824-28531 */ + /* IMP: R-36257-52125 */ + db->aDb[0].zDbSName = va_arg(ap,char*); + rc = SQLITE_OK; + break; + } + case SQLITE_DBCONFIG_LOOKASIDE: { + void *pBuf = va_arg(ap, void*); /* IMP: R-26835-10964 */ + int sz = va_arg(ap, int); /* IMP: R-47871-25994 */ + int cnt = va_arg(ap, int); /* IMP: R-04460-53386 */ + rc = setupLookaside(db, pBuf, sz, cnt); + break; + } + default: { + static const struct { + int op; /* The opcode */ + u32 mask; /* Mask of the bit in sqlite3.flags to set/clear */ + } aFlagOp[] = { + { SQLITE_DBCONFIG_ENABLE_FKEY, SQLITE_ForeignKeys }, + { SQLITE_DBCONFIG_ENABLE_TRIGGER, SQLITE_EnableTrigger }, + { SQLITE_DBCONFIG_ENABLE_VIEW, SQLITE_EnableView }, + { SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, SQLITE_Fts3Tokenizer }, + { SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION, SQLITE_LoadExtension }, + { SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE, SQLITE_NoCkptOnClose }, + { SQLITE_DBCONFIG_ENABLE_QPSG, SQLITE_EnableQPSG }, + { SQLITE_DBCONFIG_TRIGGER_EQP, SQLITE_TriggerEQP }, + { SQLITE_DBCONFIG_RESET_DATABASE, SQLITE_ResetDatabase }, + { SQLITE_DBCONFIG_DEFENSIVE, SQLITE_Defensive }, + { SQLITE_DBCONFIG_WRITABLE_SCHEMA, SQLITE_WriteSchema| + SQLITE_NoSchemaError }, + { SQLITE_DBCONFIG_LEGACY_ALTER_TABLE, SQLITE_LegacyAlter }, + { SQLITE_DBCONFIG_DQS_DDL, SQLITE_DqsDDL }, + { SQLITE_DBCONFIG_DQS_DML, SQLITE_DqsDML }, + { SQLITE_DBCONFIG_LEGACY_FILE_FORMAT, SQLITE_LegacyFileFmt }, + { SQLITE_DBCONFIG_TRUSTED_SCHEMA, SQLITE_TrustedSchema }, + }; + unsigned int i; + rc = SQLITE_ERROR; /* IMP: R-42790-23372 */ + for(i=0; iflags; + if( onoff>0 ){ + db->flags |= aFlagOp[i].mask; + }else if( onoff==0 ){ + db->flags &= ~(u64)aFlagOp[i].mask; + } + if( oldFlags!=db->flags ){ + sqlite3ExpirePreparedStatements(db, 0); + } + if( pRes ){ + *pRes = (db->flags & aFlagOp[i].mask)!=0; + } + rc = SQLITE_OK; + break; + } + } + break; + } + } + va_end(ap); + return rc; +} + +/* +** This is the default collating function named "BINARY" which is always +** available. +*/ +static int binCollFunc( + void *NotUsed, + int nKey1, const void *pKey1, + int nKey2, const void *pKey2 +){ + int rc, n; + UNUSED_PARAMETER(NotUsed); + n = nKey1xCmp!=binCollFunc || strcmp(p->zName,"BINARY")==0 ); + return p==0 || p->xCmp==binCollFunc; +} + +/* +** Another built-in collating sequence: NOCASE. +** +** This collating sequence is intended to be used for "case independent +** comparison". SQLite's knowledge of upper and lower case equivalents +** extends only to the 26 characters used in the English language. +** +** At the moment there is only a UTF-8 implementation. +*/ +static int nocaseCollatingFunc( + void *NotUsed, + int nKey1, const void *pKey1, + int nKey2, const void *pKey2 +){ + int r = sqlite3StrNICmp( + (const char *)pKey1, (const char *)pKey2, (nKey1lastRowid; +} + +/* +** Set the value returned by the sqlite3_last_insert_rowid() API function. +*/ +SQLITE_API void sqlite3_set_last_insert_rowid(sqlite3 *db, sqlite3_int64 iRowid){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return; + } +#endif + sqlite3_mutex_enter(db->mutex); + db->lastRowid = iRowid; + sqlite3_mutex_leave(db->mutex); +} + +/* +** Return the number of changes in the most recent call to sqlite3_exec(). +*/ +SQLITE_API sqlite3_int64 sqlite3_changes64(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return db->nChange; +} +SQLITE_API int sqlite3_changes(sqlite3 *db){ + return (int)sqlite3_changes64(db); +} + +/* +** Return the number of changes since the database handle was opened. +*/ +SQLITE_API sqlite3_int64 sqlite3_total_changes64(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return db->nTotalChange; +} +SQLITE_API int sqlite3_total_changes(sqlite3 *db){ + return (int)sqlite3_total_changes64(db); +} + +/* +** Close all open savepoints. This function only manipulates fields of the +** database handle object, it does not close any savepoints that may be open +** at the b-tree/pager level. +*/ +SQLITE_PRIVATE void sqlite3CloseSavepoints(sqlite3 *db){ + while( db->pSavepoint ){ + Savepoint *pTmp = db->pSavepoint; + db->pSavepoint = pTmp->pNext; + sqlite3DbFree(db, pTmp); + } + db->nSavepoint = 0; + db->nStatement = 0; + db->isTransactionSavepoint = 0; +} + +/* +** Invoke the destructor function associated with FuncDef p, if any. Except, +** if this is not the last copy of the function, do not invoke it. Multiple +** copies of a single function are created when create_function() is called +** with SQLITE_ANY as the encoding. +*/ +static void functionDestroy(sqlite3 *db, FuncDef *p){ + FuncDestructor *pDestructor; + assert( (p->funcFlags & SQLITE_FUNC_BUILTIN)==0 ); + pDestructor = p->u.pDestructor; + if( pDestructor ){ + pDestructor->nRef--; + if( pDestructor->nRef==0 ){ + pDestructor->xDestroy(pDestructor->pUserData); + sqlite3DbFree(db, pDestructor); + } + } +} + +/* +** Disconnect all sqlite3_vtab objects that belong to database connection +** db. This is called when db is being closed. +*/ +static void disconnectAllVtab(sqlite3 *db){ +#ifndef SQLITE_OMIT_VIRTUALTABLE + int i; + HashElem *p; + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ + Schema *pSchema = db->aDb[i].pSchema; + if( pSchema ){ + for(p=sqliteHashFirst(&pSchema->tblHash); p; p=sqliteHashNext(p)){ + Table *pTab = (Table *)sqliteHashData(p); + if( IsVirtual(pTab) ) sqlite3VtabDisconnect(db, pTab); + } + } + } + for(p=sqliteHashFirst(&db->aModule); p; p=sqliteHashNext(p)){ + Module *pMod = (Module *)sqliteHashData(p); + if( pMod->pEpoTab ){ + sqlite3VtabDisconnect(db, pMod->pEpoTab); + } + } + sqlite3VtabUnlockList(db); + sqlite3BtreeLeaveAll(db); +#else + UNUSED_PARAMETER(db); +#endif +} + +/* +** Return TRUE if database connection db has unfinalized prepared +** statements or unfinished sqlite3_backup objects. +*/ +static int connectionIsBusy(sqlite3 *db){ + int j; + assert( sqlite3_mutex_held(db->mutex) ); + if( db->pVdbe ) return 1; + for(j=0; jnDb; j++){ + Btree *pBt = db->aDb[j].pBt; + if( pBt && sqlite3BtreeIsInBackup(pBt) ) return 1; + } + return 0; +} + +/* +** Close an existing SQLite database +*/ +static int sqlite3Close(sqlite3 *db, int forceZombie){ + if( !db ){ + /* EVIDENCE-OF: R-63257-11740 Calling sqlite3_close() or + ** sqlite3_close_v2() with a NULL pointer argument is a harmless no-op. */ + return SQLITE_OK; + } + if( !sqlite3SafetyCheckSickOrOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + sqlite3_mutex_enter(db->mutex); + if( db->mTrace & SQLITE_TRACE_CLOSE ){ + db->trace.xV2(SQLITE_TRACE_CLOSE, db->pTraceArg, db, 0); + } + + /* Force xDisconnect calls on all virtual tables */ + disconnectAllVtab(db); + + /* If a transaction is open, the disconnectAllVtab() call above + ** will not have called the xDisconnect() method on any virtual + ** tables in the db->aVTrans[] array. The following sqlite3VtabRollback() + ** call will do so. We need to do this before the check for active + ** SQL statements below, as the v-table implementation may be storing + ** some prepared statements internally. + */ + sqlite3VtabRollback(db); + + /* Legacy behavior (sqlite3_close() behavior) is to return + ** SQLITE_BUSY if the connection can not be closed immediately. + */ + if( !forceZombie && connectionIsBusy(db) ){ + sqlite3ErrorWithMsg(db, SQLITE_BUSY, "unable to close due to unfinalized " + "statements or unfinished backups"); + sqlite3_mutex_leave(db->mutex); + return SQLITE_BUSY; + } + +#ifdef SQLITE_ENABLE_SQLLOG + if( sqlite3GlobalConfig.xSqllog ){ + /* Closing the handle. Fourth parameter is passed the value 2. */ + sqlite3GlobalConfig.xSqllog(sqlite3GlobalConfig.pSqllogArg, db, 0, 2); + } +#endif + + /* Convert the connection into a zombie and then close it. + */ + db->eOpenState = SQLITE_STATE_ZOMBIE; + sqlite3LeaveMutexAndCloseZombie(db); + return SQLITE_OK; +} + +/* +** Return the transaction state for a single databse, or the maximum +** transaction state over all attached databases if zSchema is null. +*/ +SQLITE_API int sqlite3_txn_state(sqlite3 *db, const char *zSchema){ + int iDb, nDb; + int iTxn = -1; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return -1; + } +#endif + sqlite3_mutex_enter(db->mutex); + if( zSchema ){ + nDb = iDb = sqlite3FindDbName(db, zSchema); + if( iDb<0 ) nDb--; + }else{ + iDb = 0; + nDb = db->nDb-1; + } + for(; iDb<=nDb; iDb++){ + Btree *pBt = db->aDb[iDb].pBt; + int x = pBt!=0 ? sqlite3BtreeTxnState(pBt) : SQLITE_TXN_NONE; + if( x>iTxn ) iTxn = x; + } + sqlite3_mutex_leave(db->mutex); + return iTxn; +} + +/* +** Two variations on the public interface for closing a database +** connection. The sqlite3_close() version returns SQLITE_BUSY and +** leaves the connection open if there are unfinalized prepared +** statements or unfinished sqlite3_backups. The sqlite3_close_v2() +** version forces the connection to become a zombie if there are +** unclosed resources, and arranges for deallocation when the last +** prepare statement or sqlite3_backup closes. +*/ +SQLITE_API int sqlite3_close(sqlite3 *db){ return sqlite3Close(db,0); } +SQLITE_API int sqlite3_close_v2(sqlite3 *db){ return sqlite3Close(db,1); } + + +/* +** Close the mutex on database connection db. +** +** Furthermore, if database connection db is a zombie (meaning that there +** has been a prior call to sqlite3_close(db) or sqlite3_close_v2(db)) and +** every sqlite3_stmt has now been finalized and every sqlite3_backup has +** finished, then free all resources. +*/ +SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){ + HashElem *i; /* Hash table iterator */ + int j; + + /* If there are outstanding sqlite3_stmt or sqlite3_backup objects + ** or if the connection has not yet been closed by sqlite3_close_v2(), + ** then just leave the mutex and return. + */ + if( db->eOpenState!=SQLITE_STATE_ZOMBIE || connectionIsBusy(db) ){ + sqlite3_mutex_leave(db->mutex); + return; + } + + /* If we reach this point, it means that the database connection has + ** closed all sqlite3_stmt and sqlite3_backup objects and has been + ** passed to sqlite3_close (meaning that it is a zombie). Therefore, + ** go ahead and free all resources. + */ + + /* If a transaction is open, roll it back. This also ensures that if + ** any database schemas have been modified by an uncommitted transaction + ** they are reset. And that the required b-tree mutex is held to make + ** the pager rollback and schema reset an atomic operation. */ + sqlite3RollbackAll(db, SQLITE_OK); + + /* Free any outstanding Savepoint structures. */ + sqlite3CloseSavepoints(db); + + /* Close all database connections */ + for(j=0; jnDb; j++){ + struct Db *pDb = &db->aDb[j]; + if( pDb->pBt ){ + sqlite3BtreeClose(pDb->pBt); + pDb->pBt = 0; + if( j!=1 ){ + pDb->pSchema = 0; + } + } + } + /* Clear the TEMP schema separately and last */ + if( db->aDb[1].pSchema ){ + sqlite3SchemaClear(db->aDb[1].pSchema); + } + sqlite3VtabUnlockList(db); + + /* Free up the array of auxiliary databases */ + sqlite3CollapseDatabaseArray(db); + assert( db->nDb<=2 ); + assert( db->aDb==db->aDbStatic ); + + /* Tell the code in notify.c that the connection no longer holds any + ** locks and does not require any further unlock-notify callbacks. + */ + sqlite3ConnectionClosed(db); + + for(i=sqliteHashFirst(&db->aFunc); i; i=sqliteHashNext(i)){ + FuncDef *pNext, *p; + p = sqliteHashData(i); + do{ + functionDestroy(db, p); + pNext = p->pNext; + sqlite3DbFree(db, p); + p = pNext; + }while( p ); + } + sqlite3HashClear(&db->aFunc); + for(i=sqliteHashFirst(&db->aCollSeq); i; i=sqliteHashNext(i)){ + CollSeq *pColl = (CollSeq *)sqliteHashData(i); + /* Invoke any destructors registered for collation sequence user data. */ + for(j=0; j<3; j++){ + if( pColl[j].xDel ){ + pColl[j].xDel(pColl[j].pUser); + } + } + sqlite3DbFree(db, pColl); + } + sqlite3HashClear(&db->aCollSeq); +#ifndef SQLITE_OMIT_VIRTUALTABLE + for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){ + Module *pMod = (Module *)sqliteHashData(i); + sqlite3VtabEponymousTableClear(db, pMod); + sqlite3VtabModuleUnref(db, pMod); + } + sqlite3HashClear(&db->aModule); +#endif + + sqlite3Error(db, SQLITE_OK); /* Deallocates any cached error strings. */ + sqlite3ValueFree(db->pErr); + sqlite3CloseExtensions(db); +#if SQLITE_USER_AUTHENTICATION + sqlite3_free(db->auth.zAuthUser); + sqlite3_free(db->auth.zAuthPW); +#endif + + db->eOpenState = SQLITE_STATE_ERROR; + + /* The temp-database schema is allocated differently from the other schema + ** objects (using sqliteMalloc() directly, instead of sqlite3BtreeSchema()). + ** So it needs to be freed here. Todo: Why not roll the temp schema into + ** the same sqliteMalloc() as the one that allocates the database + ** structure? + */ + sqlite3DbFree(db, db->aDb[1].pSchema); + if( db->xAutovacDestr ){ + db->xAutovacDestr(db->pAutovacPagesArg); + } + sqlite3_mutex_leave(db->mutex); + db->eOpenState = SQLITE_STATE_CLOSED; + sqlite3_mutex_free(db->mutex); + assert( sqlite3LookasideUsed(db,0)==0 ); + if( db->lookaside.bMalloced ){ + sqlite3_free(db->lookaside.pStart); + } + sqlite3_free(db); +} + +/* +** Rollback all database files. If tripCode is not SQLITE_OK, then +** any write cursors are invalidated ("tripped" - as in "tripping a circuit +** breaker") and made to return tripCode if there are any further +** attempts to use that cursor. Read cursors remain open and valid +** but are "saved" in case the table pages are moved around. +*/ +SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){ + int i; + int inTrans = 0; + int schemaChange; + assert( sqlite3_mutex_held(db->mutex) ); + sqlite3BeginBenignMalloc(); + + /* Obtain all b-tree mutexes before making any calls to BtreeRollback(). + ** This is important in case the transaction being rolled back has + ** modified the database schema. If the b-tree mutexes are not taken + ** here, then another shared-cache connection might sneak in between + ** the database rollback and schema reset, which can cause false + ** corruption reports in some cases. */ + sqlite3BtreeEnterAll(db); + schemaChange = (db->mDbFlags & DBFLAG_SchemaChange)!=0 && db->init.busy==0; + + for(i=0; inDb; i++){ + Btree *p = db->aDb[i].pBt; + if( p ){ + if( sqlite3BtreeTxnState(p)==SQLITE_TXN_WRITE ){ + inTrans = 1; + } + sqlite3BtreeRollback(p, tripCode, !schemaChange); + } + } + sqlite3VtabRollback(db); + sqlite3EndBenignMalloc(); + + if( schemaChange ){ + sqlite3ExpirePreparedStatements(db, 0); + sqlite3ResetAllSchemasOfConnection(db); + } + sqlite3BtreeLeaveAll(db); + + /* Any deferred constraint violations have now been resolved. */ + db->nDeferredCons = 0; + db->nDeferredImmCons = 0; + db->flags &= ~(u64)(SQLITE_DeferFKs|SQLITE_CorruptRdOnly); + + /* If one has been configured, invoke the rollback-hook callback */ + if( db->xRollbackCallback && (inTrans || !db->autoCommit) ){ + db->xRollbackCallback(db->pRollbackArg); + } +} + +/* +** Return a static string containing the name corresponding to the error code +** specified in the argument. +*/ +#if defined(SQLITE_NEED_ERR_NAME) +SQLITE_PRIVATE const char *sqlite3ErrName(int rc){ + const char *zName = 0; + int i, origRc = rc; + for(i=0; i<2 && zName==0; i++, rc &= 0xff){ + switch( rc ){ + case SQLITE_OK: zName = "SQLITE_OK"; break; + case SQLITE_ERROR: zName = "SQLITE_ERROR"; break; + case SQLITE_ERROR_SNAPSHOT: zName = "SQLITE_ERROR_SNAPSHOT"; break; + case SQLITE_INTERNAL: zName = "SQLITE_INTERNAL"; break; + case SQLITE_PERM: zName = "SQLITE_PERM"; break; + case SQLITE_ABORT: zName = "SQLITE_ABORT"; break; + case SQLITE_ABORT_ROLLBACK: zName = "SQLITE_ABORT_ROLLBACK"; break; + case SQLITE_BUSY: zName = "SQLITE_BUSY"; break; + case SQLITE_BUSY_RECOVERY: zName = "SQLITE_BUSY_RECOVERY"; break; + case SQLITE_BUSY_SNAPSHOT: zName = "SQLITE_BUSY_SNAPSHOT"; break; + case SQLITE_LOCKED: zName = "SQLITE_LOCKED"; break; + case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break; + case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break; + case SQLITE_READONLY: zName = "SQLITE_READONLY"; break; + case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break; + case SQLITE_READONLY_CANTINIT: zName = "SQLITE_READONLY_CANTINIT"; break; + case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break; + case SQLITE_READONLY_DBMOVED: zName = "SQLITE_READONLY_DBMOVED"; break; + case SQLITE_READONLY_DIRECTORY: zName = "SQLITE_READONLY_DIRECTORY";break; + case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break; + case SQLITE_IOERR: zName = "SQLITE_IOERR"; break; + case SQLITE_IOERR_READ: zName = "SQLITE_IOERR_READ"; break; + case SQLITE_IOERR_SHORT_READ: zName = "SQLITE_IOERR_SHORT_READ"; break; + case SQLITE_IOERR_WRITE: zName = "SQLITE_IOERR_WRITE"; break; + case SQLITE_IOERR_FSYNC: zName = "SQLITE_IOERR_FSYNC"; break; + case SQLITE_IOERR_DIR_FSYNC: zName = "SQLITE_IOERR_DIR_FSYNC"; break; + case SQLITE_IOERR_TRUNCATE: zName = "SQLITE_IOERR_TRUNCATE"; break; + case SQLITE_IOERR_FSTAT: zName = "SQLITE_IOERR_FSTAT"; break; + case SQLITE_IOERR_UNLOCK: zName = "SQLITE_IOERR_UNLOCK"; break; + case SQLITE_IOERR_RDLOCK: zName = "SQLITE_IOERR_RDLOCK"; break; + case SQLITE_IOERR_DELETE: zName = "SQLITE_IOERR_DELETE"; break; + case SQLITE_IOERR_NOMEM: zName = "SQLITE_IOERR_NOMEM"; break; + case SQLITE_IOERR_ACCESS: zName = "SQLITE_IOERR_ACCESS"; break; + case SQLITE_IOERR_CHECKRESERVEDLOCK: + zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break; + case SQLITE_IOERR_LOCK: zName = "SQLITE_IOERR_LOCK"; break; + case SQLITE_IOERR_CLOSE: zName = "SQLITE_IOERR_CLOSE"; break; + case SQLITE_IOERR_DIR_CLOSE: zName = "SQLITE_IOERR_DIR_CLOSE"; break; + case SQLITE_IOERR_SHMOPEN: zName = "SQLITE_IOERR_SHMOPEN"; break; + case SQLITE_IOERR_SHMSIZE: zName = "SQLITE_IOERR_SHMSIZE"; break; + case SQLITE_IOERR_SHMLOCK: zName = "SQLITE_IOERR_SHMLOCK"; break; + case SQLITE_IOERR_SHMMAP: zName = "SQLITE_IOERR_SHMMAP"; break; + case SQLITE_IOERR_SEEK: zName = "SQLITE_IOERR_SEEK"; break; + case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break; + case SQLITE_IOERR_MMAP: zName = "SQLITE_IOERR_MMAP"; break; + case SQLITE_IOERR_GETTEMPPATH: zName = "SQLITE_IOERR_GETTEMPPATH"; break; + case SQLITE_IOERR_CONVPATH: zName = "SQLITE_IOERR_CONVPATH"; break; + case SQLITE_CORRUPT: zName = "SQLITE_CORRUPT"; break; + case SQLITE_CORRUPT_VTAB: zName = "SQLITE_CORRUPT_VTAB"; break; + case SQLITE_NOTFOUND: zName = "SQLITE_NOTFOUND"; break; + case SQLITE_FULL: zName = "SQLITE_FULL"; break; + case SQLITE_CANTOPEN: zName = "SQLITE_CANTOPEN"; break; + case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break; + case SQLITE_CANTOPEN_ISDIR: zName = "SQLITE_CANTOPEN_ISDIR"; break; + case SQLITE_CANTOPEN_FULLPATH: zName = "SQLITE_CANTOPEN_FULLPATH"; break; + case SQLITE_CANTOPEN_CONVPATH: zName = "SQLITE_CANTOPEN_CONVPATH"; break; + case SQLITE_CANTOPEN_SYMLINK: zName = "SQLITE_CANTOPEN_SYMLINK"; break; + case SQLITE_PROTOCOL: zName = "SQLITE_PROTOCOL"; break; + case SQLITE_EMPTY: zName = "SQLITE_EMPTY"; break; + case SQLITE_SCHEMA: zName = "SQLITE_SCHEMA"; break; + case SQLITE_TOOBIG: zName = "SQLITE_TOOBIG"; break; + case SQLITE_CONSTRAINT: zName = "SQLITE_CONSTRAINT"; break; + case SQLITE_CONSTRAINT_UNIQUE: zName = "SQLITE_CONSTRAINT_UNIQUE"; break; + case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break; + case SQLITE_CONSTRAINT_FOREIGNKEY: + zName = "SQLITE_CONSTRAINT_FOREIGNKEY"; break; + case SQLITE_CONSTRAINT_CHECK: zName = "SQLITE_CONSTRAINT_CHECK"; break; + case SQLITE_CONSTRAINT_PRIMARYKEY: + zName = "SQLITE_CONSTRAINT_PRIMARYKEY"; break; + case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break; + case SQLITE_CONSTRAINT_COMMITHOOK: + zName = "SQLITE_CONSTRAINT_COMMITHOOK"; break; + case SQLITE_CONSTRAINT_VTAB: zName = "SQLITE_CONSTRAINT_VTAB"; break; + case SQLITE_CONSTRAINT_FUNCTION: + zName = "SQLITE_CONSTRAINT_FUNCTION"; break; + case SQLITE_CONSTRAINT_ROWID: zName = "SQLITE_CONSTRAINT_ROWID"; break; + case SQLITE_MISMATCH: zName = "SQLITE_MISMATCH"; break; + case SQLITE_MISUSE: zName = "SQLITE_MISUSE"; break; + case SQLITE_NOLFS: zName = "SQLITE_NOLFS"; break; + case SQLITE_AUTH: zName = "SQLITE_AUTH"; break; + case SQLITE_FORMAT: zName = "SQLITE_FORMAT"; break; + case SQLITE_RANGE: zName = "SQLITE_RANGE"; break; + case SQLITE_NOTADB: zName = "SQLITE_NOTADB"; break; + case SQLITE_ROW: zName = "SQLITE_ROW"; break; + case SQLITE_NOTICE: zName = "SQLITE_NOTICE"; break; + case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break; + case SQLITE_NOTICE_RECOVER_ROLLBACK: + zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break; + case SQLITE_WARNING: zName = "SQLITE_WARNING"; break; + case SQLITE_WARNING_AUTOINDEX: zName = "SQLITE_WARNING_AUTOINDEX"; break; + case SQLITE_DONE: zName = "SQLITE_DONE"; break; + } + } + if( zName==0 ){ + static char zBuf[50]; + sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc); + zName = zBuf; + } + return zName; +} +#endif + +/* +** Return a static string that describes the kind of error specified in the +** argument. +*/ +SQLITE_PRIVATE const char *sqlite3ErrStr(int rc){ + static const char* const aMsg[] = { + /* SQLITE_OK */ "not an error", + /* SQLITE_ERROR */ "SQL logic error", + /* SQLITE_INTERNAL */ 0, + /* SQLITE_PERM */ "access permission denied", + /* SQLITE_ABORT */ "query aborted", + /* SQLITE_BUSY */ "database is locked", + /* SQLITE_LOCKED */ "database table is locked", + /* SQLITE_NOMEM */ "out of memory", + /* SQLITE_READONLY */ "attempt to write a readonly database", + /* SQLITE_INTERRUPT */ "interrupted", + /* SQLITE_IOERR */ "disk I/O error", + /* SQLITE_CORRUPT */ "database disk image is malformed", + /* SQLITE_NOTFOUND */ "unknown operation", + /* SQLITE_FULL */ "database or disk is full", + /* SQLITE_CANTOPEN */ "unable to open database file", + /* SQLITE_PROTOCOL */ "locking protocol", + /* SQLITE_EMPTY */ 0, + /* SQLITE_SCHEMA */ "database schema has changed", + /* SQLITE_TOOBIG */ "string or blob too big", + /* SQLITE_CONSTRAINT */ "constraint failed", + /* SQLITE_MISMATCH */ "datatype mismatch", + /* SQLITE_MISUSE */ "bad parameter or other API misuse", +#ifdef SQLITE_DISABLE_LFS + /* SQLITE_NOLFS */ "large file support is disabled", +#else + /* SQLITE_NOLFS */ 0, +#endif + /* SQLITE_AUTH */ "authorization denied", + /* SQLITE_FORMAT */ 0, + /* SQLITE_RANGE */ "column index out of range", + /* SQLITE_NOTADB */ "file is not a database", + /* SQLITE_NOTICE */ "notification message", + /* SQLITE_WARNING */ "warning message", + }; + const char *zErr = "unknown error"; + switch( rc ){ + case SQLITE_ABORT_ROLLBACK: { + zErr = "abort due to ROLLBACK"; + break; + } + case SQLITE_ROW: { + zErr = "another row available"; + break; + } + case SQLITE_DONE: { + zErr = "no more rows available"; + break; + } + default: { + rc &= 0xff; + if( ALWAYS(rc>=0) && rcbusyTimeout; + int delay, prior; + + assert( count>=0 ); + if( count < NDELAY ){ + delay = delays[count]; + prior = totals[count]; + }else{ + delay = delays[NDELAY-1]; + prior = totals[NDELAY-1] + delay*(count-(NDELAY-1)); + } + if( prior + delay > tmout ){ + delay = tmout - prior; + if( delay<=0 ) return 0; + } + sqlite3OsSleep(db->pVfs, delay*1000); + return 1; +#else + /* This case for unix systems that lack usleep() support. Sleeping + ** must be done in increments of whole seconds */ + sqlite3 *db = (sqlite3 *)ptr; + int tmout = ((sqlite3 *)ptr)->busyTimeout; + if( (count+1)*1000 > tmout ){ + return 0; + } + sqlite3OsSleep(db->pVfs, 1000000); + return 1; +#endif +} + +/* +** Invoke the given busy handler. +** +** This routine is called when an operation failed to acquire a +** lock on VFS file pFile. +** +** If this routine returns non-zero, the lock is retried. If it +** returns 0, the operation aborts with an SQLITE_BUSY error. +*/ +SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler *p){ + int rc; + if( p->xBusyHandler==0 || p->nBusy<0 ) return 0; + rc = p->xBusyHandler(p->pBusyArg, p->nBusy); + if( rc==0 ){ + p->nBusy = -1; + }else{ + p->nBusy++; + } + return rc; +} + +/* +** This routine sets the busy callback for an Sqlite database to the +** given callback function with the given argument. +*/ +SQLITE_API int sqlite3_busy_handler( + sqlite3 *db, + int (*xBusy)(void*,int), + void *pArg +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->busyHandler.xBusyHandler = xBusy; + db->busyHandler.pBusyArg = pArg; + db->busyHandler.nBusy = 0; + db->busyTimeout = 0; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK +/* +** This routine sets the progress callback for an Sqlite database to the +** given callback function with the given argument. The progress callback will +** be invoked every nOps opcodes. +*/ +SQLITE_API void sqlite3_progress_handler( + sqlite3 *db, + int nOps, + int (*xProgress)(void*), + void *pArg +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return; + } +#endif + sqlite3_mutex_enter(db->mutex); + if( nOps>0 ){ + db->xProgress = xProgress; + db->nProgressOps = (unsigned)nOps; + db->pProgressArg = pArg; + }else{ + db->xProgress = 0; + db->nProgressOps = 0; + db->pProgressArg = 0; + } + sqlite3_mutex_leave(db->mutex); +} +#endif + + +/* +** This routine installs a default busy handler that waits for the +** specified number of milliseconds before returning 0. +*/ +SQLITE_API int sqlite3_busy_timeout(sqlite3 *db, int ms){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + if( ms>0 ){ + sqlite3_busy_handler(db, (int(*)(void*,int))sqliteDefaultBusyCallback, + (void*)db); + db->busyTimeout = ms; + }else{ + sqlite3_busy_handler(db, 0, 0); + } + return SQLITE_OK; +} + +/* +** Cause any pending operation to stop at its earliest opportunity. +*/ +SQLITE_API void sqlite3_interrupt(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) && (db==0 || db->eOpenState!=SQLITE_STATE_ZOMBIE) ){ + (void)SQLITE_MISUSE_BKPT; + return; + } +#endif + AtomicStore(&db->u1.isInterrupted, 1); +} + + +/* +** This function is exactly the same as sqlite3_create_function(), except +** that it is designed to be called by internal code. The difference is +** that if a malloc() fails in sqlite3_create_function(), an error code +** is returned and the mallocFailed flag cleared. +*/ +SQLITE_PRIVATE int sqlite3CreateFunc( + sqlite3 *db, + const char *zFunctionName, + int nArg, + int enc, + void *pUserData, + void (*xSFunc)(sqlite3_context*,int,sqlite3_value **), + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*), + void (*xValue)(sqlite3_context*), + void (*xInverse)(sqlite3_context*,int,sqlite3_value **), + FuncDestructor *pDestructor +){ + FuncDef *p; + int extraFlags; + + assert( sqlite3_mutex_held(db->mutex) ); + assert( xValue==0 || xSFunc==0 ); + if( zFunctionName==0 /* Must have a valid name */ + || (xSFunc!=0 && xFinal!=0) /* Not both xSFunc and xFinal */ + || ((xFinal==0)!=(xStep==0)) /* Both or neither of xFinal and xStep */ + || ((xValue==0)!=(xInverse==0)) /* Both or neither of xValue, xInverse */ + || (nArg<-1 || nArg>SQLITE_MAX_FUNCTION_ARG) + || (255funcFlags & SQLITE_FUNC_ENCMASK)==(u32)enc && p->nArg==nArg ){ + if( db->nVdbeActive ){ + sqlite3ErrorWithMsg(db, SQLITE_BUSY, + "unable to delete/modify user-function due to active statements"); + assert( !db->mallocFailed ); + return SQLITE_BUSY; + }else{ + sqlite3ExpirePreparedStatements(db, 0); + } + }else if( xSFunc==0 && xFinal==0 ){ + /* Trying to delete a function that does not exist. This is a no-op. + ** https://sqlite.org/forum/forumpost/726219164b */ + return SQLITE_OK; + } + + p = sqlite3FindFunction(db, zFunctionName, nArg, (u8)enc, 1); + assert(p || db->mallocFailed); + if( !p ){ + return SQLITE_NOMEM_BKPT; + } + + /* If an older version of the function with a configured destructor is + ** being replaced invoke the destructor function here. */ + functionDestroy(db, p); + + if( pDestructor ){ + pDestructor->nRef++; + } + p->u.pDestructor = pDestructor; + p->funcFlags = (p->funcFlags & SQLITE_FUNC_ENCMASK) | extraFlags; + testcase( p->funcFlags & SQLITE_DETERMINISTIC ); + testcase( p->funcFlags & SQLITE_DIRECTONLY ); + p->xSFunc = xSFunc ? xSFunc : xStep; + p->xFinalize = xFinal; + p->xValue = xValue; + p->xInverse = xInverse; + p->pUserData = pUserData; + p->nArg = (u16)nArg; + return SQLITE_OK; +} + +/* +** Worker function used by utf-8 APIs that create new functions: +** +** sqlite3_create_function() +** sqlite3_create_function_v2() +** sqlite3_create_window_function() +*/ +static int createFunctionApi( + sqlite3 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xSFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void (*xValue)(sqlite3_context*), + void (*xInverse)(sqlite3_context*,int,sqlite3_value**), + void(*xDestroy)(void*) +){ + int rc = SQLITE_ERROR; + FuncDestructor *pArg = 0; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + if( xDestroy ){ + pArg = (FuncDestructor *)sqlite3Malloc(sizeof(FuncDestructor)); + if( !pArg ){ + sqlite3OomFault(db); + xDestroy(p); + goto out; + } + pArg->nRef = 0; + pArg->xDestroy = xDestroy; + pArg->pUserData = p; + } + rc = sqlite3CreateFunc(db, zFunc, nArg, enc, p, + xSFunc, xStep, xFinal, xValue, xInverse, pArg + ); + if( pArg && pArg->nRef==0 ){ + assert( rc!=SQLITE_OK || (xStep==0 && xFinal==0) ); + xDestroy(p); + sqlite3_free(pArg); + } + + out: + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Create new user functions. +*/ +SQLITE_API int sqlite3_create_function( + sqlite3 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xSFunc)(sqlite3_context*,int,sqlite3_value **), + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*) +){ + return createFunctionApi(db, zFunc, nArg, enc, p, xSFunc, xStep, + xFinal, 0, 0, 0); +} +SQLITE_API int sqlite3_create_function_v2( + sqlite3 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xSFunc)(sqlite3_context*,int,sqlite3_value **), + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*), + void (*xDestroy)(void *) +){ + return createFunctionApi(db, zFunc, nArg, enc, p, xSFunc, xStep, + xFinal, 0, 0, xDestroy); +} +SQLITE_API int sqlite3_create_window_function( + sqlite3 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*), + void (*xValue)(sqlite3_context*), + void (*xInverse)(sqlite3_context*,int,sqlite3_value **), + void (*xDestroy)(void *) +){ + return createFunctionApi(db, zFunc, nArg, enc, p, 0, xStep, + xFinal, xValue, xInverse, xDestroy); +} + +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API int sqlite3_create_function16( + sqlite3 *db, + const void *zFunctionName, + int nArg, + int eTextRep, + void *p, + void (*xSFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*) +){ + int rc; + char *zFunc8; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zFunctionName==0 ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + assert( !db->mallocFailed ); + zFunc8 = sqlite3Utf16to8(db, zFunctionName, -1, SQLITE_UTF16NATIVE); + rc = sqlite3CreateFunc(db, zFunc8, nArg, eTextRep, p, xSFunc,xStep,xFinal,0,0,0); + sqlite3DbFree(db, zFunc8); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} +#endif + + +/* +** The following is the implementation of an SQL function that always +** fails with an error message stating that the function is used in the +** wrong context. The sqlite3_overload_function() API might construct +** SQL function that use this routine so that the functions will exist +** for name resolution but are actually overloaded by the xFindFunction +** method of virtual tables. +*/ +static void sqlite3InvalidFunction( + sqlite3_context *context, /* The function calling context */ + int NotUsed, /* Number of arguments to the function */ + sqlite3_value **NotUsed2 /* Value of each argument */ +){ + const char *zName = (const char*)sqlite3_user_data(context); + char *zErr; + UNUSED_PARAMETER2(NotUsed, NotUsed2); + zErr = sqlite3_mprintf( + "unable to use function %s in the requested context", zName); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); +} + +/* +** Declare that a function has been overloaded by a virtual table. +** +** If the function already exists as a regular global function, then +** this routine is a no-op. If the function does not exist, then create +** a new one that always throws a run-time error. +** +** When virtual tables intend to provide an overloaded function, they +** should call this routine to make sure the global function exists. +** A global function must exist in order for name resolution to work +** properly. +*/ +SQLITE_API int sqlite3_overload_function( + sqlite3 *db, + const char *zName, + int nArg +){ + int rc; + char *zCopy; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zName==0 || nArg<-2 ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + rc = sqlite3FindFunction(db, zName, nArg, SQLITE_UTF8, 0)!=0; + sqlite3_mutex_leave(db->mutex); + if( rc ) return SQLITE_OK; + zCopy = sqlite3_mprintf(zName); + if( zCopy==0 ) return SQLITE_NOMEM; + return sqlite3_create_function_v2(db, zName, nArg, SQLITE_UTF8, + zCopy, sqlite3InvalidFunction, 0, 0, sqlite3_free); +} + +#ifndef SQLITE_OMIT_TRACE +/* +** Register a trace function. The pArg from the previously registered trace +** is returned. +** +** A NULL trace function means that no tracing is executes. A non-NULL +** trace is a pointer to a function that is invoked at the start of each +** SQL statement. +*/ +#ifndef SQLITE_OMIT_DEPRECATED +SQLITE_API void *sqlite3_trace(sqlite3 *db, void(*xTrace)(void*,const char*), void *pArg){ + void *pOld; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pOld = db->pTraceArg; + db->mTrace = xTrace ? SQLITE_TRACE_LEGACY : 0; + db->trace.xLegacy = xTrace; + db->pTraceArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pOld; +} +#endif /* SQLITE_OMIT_DEPRECATED */ + +/* Register a trace callback using the version-2 interface. +*/ +SQLITE_API int sqlite3_trace_v2( + sqlite3 *db, /* Trace this connection */ + unsigned mTrace, /* Mask of events to be traced */ + int(*xTrace)(unsigned,void*,void*,void*), /* Callback to invoke */ + void *pArg /* Context */ +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + if( mTrace==0 ) xTrace = 0; + if( xTrace==0 ) mTrace = 0; + db->mTrace = mTrace; + db->trace.xV2 = xTrace; + db->pTraceArg = pArg; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** Register a profile function. The pArg from the previously registered +** profile function is returned. +** +** A NULL profile function means that no profiling is executes. A non-NULL +** profile is a pointer to a function that is invoked at the conclusion of +** each SQL statement that is run. +*/ +SQLITE_API void *sqlite3_profile( + sqlite3 *db, + void (*xProfile)(void*,const char*,sqlite_uint64), + void *pArg +){ + void *pOld; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pOld = db->pProfileArg; + db->xProfile = xProfile; + db->pProfileArg = pArg; + db->mTrace &= SQLITE_TRACE_NONLEGACY_MASK; + if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE; + sqlite3_mutex_leave(db->mutex); + return pOld; +} +#endif /* SQLITE_OMIT_DEPRECATED */ +#endif /* SQLITE_OMIT_TRACE */ + +/* +** Register a function to be invoked when a transaction commits. +** If the invoked function returns non-zero, then the commit becomes a +** rollback. +*/ +SQLITE_API void *sqlite3_commit_hook( + sqlite3 *db, /* Attach the hook to this database */ + int (*xCallback)(void*), /* Function to invoke on each commit */ + void *pArg /* Argument to the function */ +){ + void *pOld; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pOld = db->pCommitArg; + db->xCommitCallback = xCallback; + db->pCommitArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pOld; +} + +/* +** Register a callback to be invoked each time a row is updated, +** inserted or deleted using this database connection. +*/ +SQLITE_API void *sqlite3_update_hook( + sqlite3 *db, /* Attach the hook to this database */ + void (*xCallback)(void*,int,char const *,char const *,sqlite_int64), + void *pArg /* Argument to the function */ +){ + void *pRet; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pRet = db->pUpdateArg; + db->xUpdateCallback = xCallback; + db->pUpdateArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +} + +/* +** Register a callback to be invoked each time a transaction is rolled +** back by this database connection. +*/ +SQLITE_API void *sqlite3_rollback_hook( + sqlite3 *db, /* Attach the hook to this database */ + void (*xCallback)(void*), /* Callback function */ + void *pArg /* Argument to the function */ +){ + void *pRet; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pRet = db->pRollbackArg; + db->xRollbackCallback = xCallback; + db->pRollbackArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +} + +#ifdef SQLITE_ENABLE_PREUPDATE_HOOK +/* +** Register a callback to be invoked each time a row is updated, +** inserted or deleted using this database connection. +*/ +SQLITE_API void *sqlite3_preupdate_hook( + sqlite3 *db, /* Attach the hook to this database */ + void(*xCallback)( /* Callback function */ + void*,sqlite3*,int,char const*,char const*,sqlite3_int64,sqlite3_int64), + void *pArg /* First callback argument */ +){ + void *pRet; + sqlite3_mutex_enter(db->mutex); + pRet = db->pPreUpdateArg; + db->xPreUpdateCallback = xCallback; + db->pPreUpdateArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +} +#endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +/* +** Register a function to be invoked prior to each autovacuum that +** determines the number of pages to vacuum. +*/ +SQLITE_API int sqlite3_autovacuum_pages( + sqlite3 *db, /* Attach the hook to this database */ + unsigned int (*xCallback)(void*,const char*,u32,u32,u32), + void *pArg, /* Argument to the function */ + void (*xDestructor)(void*) /* Destructor for pArg */ +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + if( xDestructor ) xDestructor(pArg); + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + if( db->xAutovacDestr ){ + db->xAutovacDestr(db->pAutovacPagesArg); + } + db->xAutovacPages = xCallback; + db->pAutovacPagesArg = pArg; + db->xAutovacDestr = xDestructor; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + + +#ifndef SQLITE_OMIT_WAL +/* +** The sqlite3_wal_hook() callback registered by sqlite3_wal_autocheckpoint(). +** Invoke sqlite3_wal_checkpoint if the number of frames in the log file +** is greater than sqlite3.pWalArg cast to an integer (the value configured by +** wal_autocheckpoint()). +*/ +SQLITE_PRIVATE int sqlite3WalDefaultHook( + void *pClientData, /* Argument */ + sqlite3 *db, /* Connection */ + const char *zDb, /* Database */ + int nFrame /* Size of WAL */ +){ + if( nFrame>=SQLITE_PTR_TO_INT(pClientData) ){ + sqlite3BeginBenignMalloc(); + sqlite3_wal_checkpoint(db, zDb); + sqlite3EndBenignMalloc(); + } + return SQLITE_OK; +} +#endif /* SQLITE_OMIT_WAL */ + +/* +** Configure an sqlite3_wal_hook() callback to automatically checkpoint +** a database after committing a transaction if there are nFrame or +** more frames in the log file. Passing zero or a negative value as the +** nFrame parameter disables automatic checkpoints entirely. +** +** The callback registered by this function replaces any existing callback +** registered using sqlite3_wal_hook(). Likewise, registering a callback +** using sqlite3_wal_hook() disables the automatic checkpoint mechanism +** configured by this function. +*/ +SQLITE_API int sqlite3_wal_autocheckpoint(sqlite3 *db, int nFrame){ +#ifdef SQLITE_OMIT_WAL + UNUSED_PARAMETER(db); + UNUSED_PARAMETER(nFrame); +#else +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + if( nFrame>0 ){ + sqlite3_wal_hook(db, sqlite3WalDefaultHook, SQLITE_INT_TO_PTR(nFrame)); + }else{ + sqlite3_wal_hook(db, 0, 0); + } +#endif + return SQLITE_OK; +} + +/* +** Register a callback to be invoked each time a transaction is written +** into the write-ahead-log by this database connection. +*/ +SQLITE_API void *sqlite3_wal_hook( + sqlite3 *db, /* Attach the hook to this db handle */ + int(*xCallback)(void *, sqlite3*, const char*, int), + void *pArg /* First argument passed to xCallback() */ +){ +#ifndef SQLITE_OMIT_WAL + void *pRet; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pRet = db->pWalArg; + db->xWalCallback = xCallback; + db->pWalArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +#else + return 0; +#endif +} + +/* +** Checkpoint database zDb. +*/ +SQLITE_API int sqlite3_wal_checkpoint_v2( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of attached database (or NULL) */ + int eMode, /* SQLITE_CHECKPOINT_* value */ + int *pnLog, /* OUT: Size of WAL log in frames */ + int *pnCkpt /* OUT: Total number of frames checkpointed */ +){ +#ifdef SQLITE_OMIT_WAL + return SQLITE_OK; +#else + int rc; /* Return code */ + int iDb; /* Schema to checkpoint */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + + /* Initialize the output variables to -1 in case an error occurs. */ + if( pnLog ) *pnLog = -1; + if( pnCkpt ) *pnCkpt = -1; + + assert( SQLITE_CHECKPOINT_PASSIVE==0 ); + assert( SQLITE_CHECKPOINT_FULL==1 ); + assert( SQLITE_CHECKPOINT_RESTART==2 ); + assert( SQLITE_CHECKPOINT_TRUNCATE==3 ); + if( eModeSQLITE_CHECKPOINT_TRUNCATE ){ + /* EVIDENCE-OF: R-03996-12088 The M parameter must be a valid checkpoint + ** mode: */ + return SQLITE_MISUSE; + } + + sqlite3_mutex_enter(db->mutex); + if( zDb && zDb[0] ){ + iDb = sqlite3FindDbName(db, zDb); + }else{ + iDb = SQLITE_MAX_DB; /* This means process all schemas */ + } + if( iDb<0 ){ + rc = SQLITE_ERROR; + sqlite3ErrorWithMsg(db, SQLITE_ERROR, "unknown database: %s", zDb); + }else{ + db->busyHandler.nBusy = 0; + rc = sqlite3Checkpoint(db, iDb, eMode, pnLog, pnCkpt); + sqlite3Error(db, rc); + } + rc = sqlite3ApiExit(db, rc); + + /* If there are no active statements, clear the interrupt flag at this + ** point. */ + if( db->nVdbeActive==0 ){ + AtomicStore(&db->u1.isInterrupted, 0); + } + + sqlite3_mutex_leave(db->mutex); + return rc; +#endif +} + + +/* +** Checkpoint database zDb. If zDb is NULL, or if the buffer zDb points +** to contains a zero-length string, all attached databases are +** checkpointed. +*/ +SQLITE_API int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb){ + /* EVIDENCE-OF: R-41613-20553 The sqlite3_wal_checkpoint(D,X) is equivalent to + ** sqlite3_wal_checkpoint_v2(D,X,SQLITE_CHECKPOINT_PASSIVE,0,0). */ + return sqlite3_wal_checkpoint_v2(db,zDb,SQLITE_CHECKPOINT_PASSIVE,0,0); +} + +#ifndef SQLITE_OMIT_WAL +/* +** Run a checkpoint on database iDb. This is a no-op if database iDb is +** not currently open in WAL mode. +** +** If a transaction is open on the database being checkpointed, this +** function returns SQLITE_LOCKED and a checkpoint is not attempted. If +** an error occurs while running the checkpoint, an SQLite error code is +** returned (i.e. SQLITE_IOERR). Otherwise, SQLITE_OK. +** +** The mutex on database handle db should be held by the caller. The mutex +** associated with the specific b-tree being checkpointed is taken by +** this function while the checkpoint is running. +** +** If iDb is passed SQLITE_MAX_DB then all attached databases are +** checkpointed. If an error is encountered it is returned immediately - +** no attempt is made to checkpoint any remaining databases. +** +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL, RESTART +** or TRUNCATE. +*/ +SQLITE_PRIVATE int sqlite3Checkpoint(sqlite3 *db, int iDb, int eMode, int *pnLog, int *pnCkpt){ + int rc = SQLITE_OK; /* Return code */ + int i; /* Used to iterate through attached dbs */ + int bBusy = 0; /* True if SQLITE_BUSY has been encountered */ + + assert( sqlite3_mutex_held(db->mutex) ); + assert( !pnLog || *pnLog==-1 ); + assert( !pnCkpt || *pnCkpt==-1 ); + testcase( iDb==SQLITE_MAX_ATTACHED ); /* See forum post a006d86f72 */ + testcase( iDb==SQLITE_MAX_DB ); + + for(i=0; inDb && rc==SQLITE_OK; i++){ + if( i==iDb || iDb==SQLITE_MAX_DB ){ + rc = sqlite3BtreeCheckpoint(db->aDb[i].pBt, eMode, pnLog, pnCkpt); + pnLog = 0; + pnCkpt = 0; + if( rc==SQLITE_BUSY ){ + bBusy = 1; + rc = SQLITE_OK; + } + } + } + + return (rc==SQLITE_OK && bBusy) ? SQLITE_BUSY : rc; +} +#endif /* SQLITE_OMIT_WAL */ + +/* +** This function returns true if main-memory should be used instead of +** a temporary file for transient pager files and statement journals. +** The value returned depends on the value of db->temp_store (runtime +** parameter) and the compile time value of SQLITE_TEMP_STORE. The +** following table describes the relationship between these two values +** and this functions return value. +** +** SQLITE_TEMP_STORE db->temp_store Location of temporary database +** ----------------- -------------- ------------------------------ +** 0 any file (return 0) +** 1 1 file (return 0) +** 1 2 memory (return 1) +** 1 0 file (return 0) +** 2 1 file (return 0) +** 2 2 memory (return 1) +** 2 0 memory (return 1) +** 3 any memory (return 1) +*/ +SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3 *db){ +#if SQLITE_TEMP_STORE==1 + return ( db->temp_store==2 ); +#endif +#if SQLITE_TEMP_STORE==2 + return ( db->temp_store!=1 ); +#endif +#if SQLITE_TEMP_STORE==3 + UNUSED_PARAMETER(db); + return 1; +#endif +#if SQLITE_TEMP_STORE<1 || SQLITE_TEMP_STORE>3 + UNUSED_PARAMETER(db); + return 0; +#endif +} + +/* +** Return UTF-8 encoded English language explanation of the most recent +** error. +*/ +SQLITE_API const char *sqlite3_errmsg(sqlite3 *db){ + const char *z; + if( !db ){ + return sqlite3ErrStr(SQLITE_NOMEM_BKPT); + } + if( !sqlite3SafetyCheckSickOrOk(db) ){ + return sqlite3ErrStr(SQLITE_MISUSE_BKPT); + } + sqlite3_mutex_enter(db->mutex); + if( db->mallocFailed ){ + z = sqlite3ErrStr(SQLITE_NOMEM_BKPT); + }else{ + testcase( db->pErr==0 ); + z = db->errCode ? (char*)sqlite3_value_text(db->pErr) : 0; + assert( !db->mallocFailed ); + if( z==0 ){ + z = sqlite3ErrStr(db->errCode); + } + } + sqlite3_mutex_leave(db->mutex); + return z; +} + +/* +** Return the byte offset of the most recent error +*/ +SQLITE_API int sqlite3_error_offset(sqlite3 *db){ + int iOffset = -1; + if( db && sqlite3SafetyCheckSickOrOk(db) && db->errCode ){ + sqlite3_mutex_enter(db->mutex); + iOffset = db->errByteOffset; + sqlite3_mutex_leave(db->mutex); + } + return iOffset; +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** Return UTF-16 encoded English language explanation of the most recent +** error. +*/ +SQLITE_API const void *sqlite3_errmsg16(sqlite3 *db){ + static const u16 outOfMem[] = { + 'o', 'u', 't', ' ', 'o', 'f', ' ', 'm', 'e', 'm', 'o', 'r', 'y', 0 + }; + static const u16 misuse[] = { + 'b', 'a', 'd', ' ', 'p', 'a', 'r', 'a', 'm', 'e', 't', 'e', 'r', ' ', + 'o', 'r', ' ', 'o', 't', 'h', 'e', 'r', ' ', 'A', 'P', 'I', ' ', + 'm', 'i', 's', 'u', 's', 'e', 0 + }; + + const void *z; + if( !db ){ + return (void *)outOfMem; + } + if( !sqlite3SafetyCheckSickOrOk(db) ){ + return (void *)misuse; + } + sqlite3_mutex_enter(db->mutex); + if( db->mallocFailed ){ + z = (void *)outOfMem; + }else{ + z = sqlite3_value_text16(db->pErr); + if( z==0 ){ + sqlite3ErrorWithMsg(db, db->errCode, sqlite3ErrStr(db->errCode)); + z = sqlite3_value_text16(db->pErr); + } + /* A malloc() may have failed within the call to sqlite3_value_text16() + ** above. If this is the case, then the db->mallocFailed flag needs to + ** be cleared before returning. Do this directly, instead of via + ** sqlite3ApiExit(), to avoid setting the database handle error message. + */ + sqlite3OomClear(db); + } + sqlite3_mutex_leave(db->mutex); + return z; +} +#endif /* SQLITE_OMIT_UTF16 */ + +/* +** Return the most recent error code generated by an SQLite routine. If NULL is +** passed to this function, we assume a malloc() failed during sqlite3_open(). +*/ +SQLITE_API int sqlite3_errcode(sqlite3 *db){ + if( db && !sqlite3SafetyCheckSickOrOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + if( !db || db->mallocFailed ){ + return SQLITE_NOMEM_BKPT; + } + return db->errCode & db->errMask; +} +SQLITE_API int sqlite3_extended_errcode(sqlite3 *db){ + if( db && !sqlite3SafetyCheckSickOrOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + if( !db || db->mallocFailed ){ + return SQLITE_NOMEM_BKPT; + } + return db->errCode; +} +SQLITE_API int sqlite3_system_errno(sqlite3 *db){ + return db ? db->iSysErrno : 0; +} + +/* +** Return a string that describes the kind of error specified in the +** argument. For now, this simply calls the internal sqlite3ErrStr() +** function. +*/ +SQLITE_API const char *sqlite3_errstr(int rc){ + return sqlite3ErrStr(rc); +} + +/* +** Create a new collating function for database "db". The name is zName +** and the encoding is enc. +*/ +static int createCollation( + sqlite3* db, + const char *zName, + u8 enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*), + void(*xDel)(void*) +){ + CollSeq *pColl; + int enc2; + + assert( sqlite3_mutex_held(db->mutex) ); + + /* If SQLITE_UTF16 is specified as the encoding type, transform this + ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the + ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally. + */ + enc2 = enc; + testcase( enc2==SQLITE_UTF16 ); + testcase( enc2==SQLITE_UTF16_ALIGNED ); + if( enc2==SQLITE_UTF16 || enc2==SQLITE_UTF16_ALIGNED ){ + enc2 = SQLITE_UTF16NATIVE; + } + if( enc2SQLITE_UTF16BE ){ + return SQLITE_MISUSE_BKPT; + } + + /* Check if this call is removing or replacing an existing collation + ** sequence. If so, and there are active VMs, return busy. If there + ** are no active VMs, invalidate any pre-compiled statements. + */ + pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 0); + if( pColl && pColl->xCmp ){ + if( db->nVdbeActive ){ + sqlite3ErrorWithMsg(db, SQLITE_BUSY, + "unable to delete/modify collation sequence due to active statements"); + return SQLITE_BUSY; + } + sqlite3ExpirePreparedStatements(db, 0); + + /* If collation sequence pColl was created directly by a call to + ** sqlite3_create_collation, and not generated by synthCollSeq(), + ** then any copies made by synthCollSeq() need to be invalidated. + ** Also, collation destructor - CollSeq.xDel() - function may need + ** to be called. + */ + if( (pColl->enc & ~SQLITE_UTF16_ALIGNED)==enc2 ){ + CollSeq *aColl = sqlite3HashFind(&db->aCollSeq, zName); + int j; + for(j=0; j<3; j++){ + CollSeq *p = &aColl[j]; + if( p->enc==pColl->enc ){ + if( p->xDel ){ + p->xDel(p->pUser); + } + p->xCmp = 0; + } + } + } + } + + pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 1); + if( pColl==0 ) return SQLITE_NOMEM_BKPT; + pColl->xCmp = xCompare; + pColl->pUser = pCtx; + pColl->xDel = xDel; + pColl->enc = (u8)(enc2 | (enc & SQLITE_UTF16_ALIGNED)); + sqlite3Error(db, SQLITE_OK); + return SQLITE_OK; +} + + +/* +** This array defines hard upper bounds on limit values. The +** initializer must be kept in sync with the SQLITE_LIMIT_* +** #defines in sqlite3.h. +*/ +static const int aHardLimit[] = { + SQLITE_MAX_LENGTH, + SQLITE_MAX_SQL_LENGTH, + SQLITE_MAX_COLUMN, + SQLITE_MAX_EXPR_DEPTH, + SQLITE_MAX_COMPOUND_SELECT, + SQLITE_MAX_VDBE_OP, + SQLITE_MAX_FUNCTION_ARG, + SQLITE_MAX_ATTACHED, + SQLITE_MAX_LIKE_PATTERN_LENGTH, + SQLITE_MAX_VARIABLE_NUMBER, /* IMP: R-38091-32352 */ + SQLITE_MAX_TRIGGER_DEPTH, + SQLITE_MAX_WORKER_THREADS, +}; + +/* +** Make sure the hard limits are set to reasonable values +*/ +#if SQLITE_MAX_LENGTH<100 +# error SQLITE_MAX_LENGTH must be at least 100 +#endif +#if SQLITE_MAX_SQL_LENGTH<100 +# error SQLITE_MAX_SQL_LENGTH must be at least 100 +#endif +#if SQLITE_MAX_SQL_LENGTH>SQLITE_MAX_LENGTH +# error SQLITE_MAX_SQL_LENGTH must not be greater than SQLITE_MAX_LENGTH +#endif +#if SQLITE_MAX_COMPOUND_SELECT<2 +# error SQLITE_MAX_COMPOUND_SELECT must be at least 2 +#endif +#if SQLITE_MAX_VDBE_OP<40 +# error SQLITE_MAX_VDBE_OP must be at least 40 +#endif +#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>127 +# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 127 +#endif +#if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125 +# error SQLITE_MAX_ATTACHED must be between 0 and 125 +#endif +#if SQLITE_MAX_LIKE_PATTERN_LENGTH<1 +# error SQLITE_MAX_LIKE_PATTERN_LENGTH must be at least 1 +#endif +#if SQLITE_MAX_COLUMN>32767 +# error SQLITE_MAX_COLUMN must not exceed 32767 +#endif +#if SQLITE_MAX_TRIGGER_DEPTH<1 +# error SQLITE_MAX_TRIGGER_DEPTH must be at least 1 +#endif +#if SQLITE_MAX_WORKER_THREADS<0 || SQLITE_MAX_WORKER_THREADS>50 +# error SQLITE_MAX_WORKER_THREADS must be between 0 and 50 +#endif + + +/* +** Change the value of a limit. Report the old value. +** If an invalid limit index is supplied, report -1. +** Make no changes but still report the old value if the +** new limit is negative. +** +** A new lower limit does not shrink existing constructs. +** It merely prevents new constructs that exceed the limit +** from forming. +*/ +SQLITE_API int sqlite3_limit(sqlite3 *db, int limitId, int newLimit){ + int oldLimit; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return -1; + } +#endif + + /* EVIDENCE-OF: R-30189-54097 For each limit category SQLITE_LIMIT_NAME + ** there is a hard upper bound set at compile-time by a C preprocessor + ** macro called SQLITE_MAX_NAME. (The "_LIMIT_" in the name is changed to + ** "_MAX_".) + */ + assert( aHardLimit[SQLITE_LIMIT_LENGTH]==SQLITE_MAX_LENGTH ); + assert( aHardLimit[SQLITE_LIMIT_SQL_LENGTH]==SQLITE_MAX_SQL_LENGTH ); + assert( aHardLimit[SQLITE_LIMIT_COLUMN]==SQLITE_MAX_COLUMN ); + assert( aHardLimit[SQLITE_LIMIT_EXPR_DEPTH]==SQLITE_MAX_EXPR_DEPTH ); + assert( aHardLimit[SQLITE_LIMIT_COMPOUND_SELECT]==SQLITE_MAX_COMPOUND_SELECT); + assert( aHardLimit[SQLITE_LIMIT_VDBE_OP]==SQLITE_MAX_VDBE_OP ); + assert( aHardLimit[SQLITE_LIMIT_FUNCTION_ARG]==SQLITE_MAX_FUNCTION_ARG ); + assert( aHardLimit[SQLITE_LIMIT_ATTACHED]==SQLITE_MAX_ATTACHED ); + assert( aHardLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]== + SQLITE_MAX_LIKE_PATTERN_LENGTH ); + assert( aHardLimit[SQLITE_LIMIT_VARIABLE_NUMBER]==SQLITE_MAX_VARIABLE_NUMBER); + assert( aHardLimit[SQLITE_LIMIT_TRIGGER_DEPTH]==SQLITE_MAX_TRIGGER_DEPTH ); + assert( aHardLimit[SQLITE_LIMIT_WORKER_THREADS]==SQLITE_MAX_WORKER_THREADS ); + assert( SQLITE_LIMIT_WORKER_THREADS==(SQLITE_N_LIMIT-1) ); + + + if( limitId<0 || limitId>=SQLITE_N_LIMIT ){ + return -1; + } + oldLimit = db->aLimit[limitId]; + if( newLimit>=0 ){ /* IMP: R-52476-28732 */ + if( newLimit>aHardLimit[limitId] ){ + newLimit = aHardLimit[limitId]; /* IMP: R-51463-25634 */ + }else if( newLimit<1 && limitId==SQLITE_LIMIT_LENGTH ){ + newLimit = 1; + } + db->aLimit[limitId] = newLimit; + } + return oldLimit; /* IMP: R-53341-35419 */ +} + +/* +** This function is used to parse both URIs and non-URI filenames passed by the +** user to API functions sqlite3_open() or sqlite3_open_v2(), and for database +** URIs specified as part of ATTACH statements. +** +** The first argument to this function is the name of the VFS to use (or +** a NULL to signify the default VFS) if the URI does not contain a "vfs=xxx" +** query parameter. The second argument contains the URI (or non-URI filename) +** itself. When this function is called the *pFlags variable should contain +** the default flags to open the database handle with. The value stored in +** *pFlags may be updated before returning if the URI filename contains +** "cache=xxx" or "mode=xxx" query parameters. +** +** If successful, SQLITE_OK is returned. In this case *ppVfs is set to point to +** the VFS that should be used to open the database file. *pzFile is set to +** point to a buffer containing the name of the file to open. The value +** stored in *pzFile is a database name acceptable to sqlite3_uri_parameter() +** and is in the same format as names created using sqlite3_create_filename(). +** The caller must invoke sqlite3_free_filename() (not sqlite3_free()!) on +** the value returned in *pzFile to avoid a memory leak. +** +** If an error occurs, then an SQLite error code is returned and *pzErrMsg +** may be set to point to a buffer containing an English language error +** message. It is the responsibility of the caller to eventually release +** this buffer by calling sqlite3_free(). +*/ +SQLITE_PRIVATE int sqlite3ParseUri( + const char *zDefaultVfs, /* VFS to use if no "vfs=xxx" query option */ + const char *zUri, /* Nul-terminated URI to parse */ + unsigned int *pFlags, /* IN/OUT: SQLITE_OPEN_XXX flags */ + sqlite3_vfs **ppVfs, /* OUT: VFS to use */ + char **pzFile, /* OUT: Filename component of URI */ + char **pzErrMsg /* OUT: Error message (if rc!=SQLITE_OK) */ +){ + int rc = SQLITE_OK; + unsigned int flags = *pFlags; + const char *zVfs = zDefaultVfs; + char *zFile; + char c; + int nUri = sqlite3Strlen30(zUri); + + assert( *pzErrMsg==0 ); + + if( ((flags & SQLITE_OPEN_URI) /* IMP: R-48725-32206 */ + || sqlite3GlobalConfig.bOpenUri) /* IMP: R-51689-46548 */ + && nUri>=5 && memcmp(zUri, "file:", 5)==0 /* IMP: R-57884-37496 */ + ){ + char *zOpt; + int eState; /* Parser state when parsing URI */ + int iIn; /* Input character index */ + int iOut = 0; /* Output character index */ + u64 nByte = nUri+8; /* Bytes of space to allocate */ + + /* Make sure the SQLITE_OPEN_URI flag is set to indicate to the VFS xOpen + ** method that there may be extra parameters following the file-name. */ + flags |= SQLITE_OPEN_URI; + + for(iIn=0; iIn=0 && octet<256 ); + if( octet==0 ){ +#ifndef SQLITE_ENABLE_URI_00_ERROR + /* This branch is taken when "%00" appears within the URI. In this + ** case we ignore all text in the remainder of the path, name or + ** value currently being parsed. So ignore the current character + ** and skip to the next "?", "=" or "&", as appropriate. */ + while( (c = zUri[iIn])!=0 && c!='#' + && (eState!=0 || c!='?') + && (eState!=1 || (c!='=' && c!='&')) + && (eState!=2 || c!='&') + ){ + iIn++; + } + continue; +#else + /* If ENABLE_URI_00_ERROR is defined, "%00" in a URI is an error. */ + *pzErrMsg = sqlite3_mprintf("unexpected %%00 in uri"); + rc = SQLITE_ERROR; + goto parse_uri_out; +#endif + } + c = octet; + }else if( eState==1 && (c=='&' || c=='=') ){ + if( zFile[iOut-1]==0 ){ + /* An empty option name. Ignore this option altogether. */ + while( zUri[iIn] && zUri[iIn]!='#' && zUri[iIn-1]!='&' ) iIn++; + continue; + } + if( c=='&' ){ + zFile[iOut++] = '\0'; + }else{ + eState = 2; + } + c = 0; + }else if( (eState==0 && c=='?') || (eState==2 && c=='&') ){ + c = 0; + eState = 1; + } + zFile[iOut++] = c; + } + if( eState==1 ) zFile[iOut++] = '\0'; + memset(zFile+iOut, 0, 4); /* end-of-options + empty journal filenames */ + + /* Check if there were any options specified that should be interpreted + ** here. Options that are interpreted here include "vfs" and those that + ** correspond to flags that may be passed to the sqlite3_open_v2() + ** method. */ + zOpt = &zFile[sqlite3Strlen30(zFile)+1]; + while( zOpt[0] ){ + int nOpt = sqlite3Strlen30(zOpt); + char *zVal = &zOpt[nOpt+1]; + int nVal = sqlite3Strlen30(zVal); + + if( nOpt==3 && memcmp("vfs", zOpt, 3)==0 ){ + zVfs = zVal; + }else{ + struct OpenMode { + const char *z; + int mode; + } *aMode = 0; + char *zModeType = 0; + int mask = 0; + int limit = 0; + + if( nOpt==5 && memcmp("cache", zOpt, 5)==0 ){ + static struct OpenMode aCacheMode[] = { + { "shared", SQLITE_OPEN_SHAREDCACHE }, + { "private", SQLITE_OPEN_PRIVATECACHE }, + { 0, 0 } + }; + + mask = SQLITE_OPEN_SHAREDCACHE|SQLITE_OPEN_PRIVATECACHE; + aMode = aCacheMode; + limit = mask; + zModeType = "cache"; + } + if( nOpt==4 && memcmp("mode", zOpt, 4)==0 ){ + static struct OpenMode aOpenMode[] = { + { "ro", SQLITE_OPEN_READONLY }, + { "rw", SQLITE_OPEN_READWRITE }, + { "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE }, + { "memory", SQLITE_OPEN_MEMORY }, + { 0, 0 } + }; + + mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE + | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY; + aMode = aOpenMode; + limit = mask & flags; + zModeType = "access"; + } + + if( aMode ){ + int i; + int mode = 0; + for(i=0; aMode[i].z; i++){ + const char *z = aMode[i].z; + if( nVal==sqlite3Strlen30(z) && 0==memcmp(zVal, z, nVal) ){ + mode = aMode[i].mode; + break; + } + } + if( mode==0 ){ + *pzErrMsg = sqlite3_mprintf("no such %s mode: %s", zModeType, zVal); + rc = SQLITE_ERROR; + goto parse_uri_out; + } + if( (mode & ~SQLITE_OPEN_MEMORY)>limit ){ + *pzErrMsg = sqlite3_mprintf("%s mode not allowed: %s", + zModeType, zVal); + rc = SQLITE_PERM; + goto parse_uri_out; + } + flags = (flags & ~mask) | mode; + } + } + + zOpt = &zVal[nVal+1]; + } + + }else{ + zFile = sqlite3_malloc64(nUri+8); + if( !zFile ) return SQLITE_NOMEM_BKPT; + memset(zFile, 0, 4); + zFile += 4; + if( nUri ){ + memcpy(zFile, zUri, nUri); + } + memset(zFile+nUri, 0, 4); + flags &= ~SQLITE_OPEN_URI; + } + + *ppVfs = sqlite3_vfs_find(zVfs); + if( *ppVfs==0 ){ + *pzErrMsg = sqlite3_mprintf("no such vfs: %s", zVfs); + rc = SQLITE_ERROR; + } + parse_uri_out: + if( rc!=SQLITE_OK ){ + sqlite3_free_filename(zFile); + zFile = 0; + } + *pFlags = flags; + *pzFile = zFile; + return rc; +} + +/* +** This routine does the core work of extracting URI parameters from a +** database filename for the sqlite3_uri_parameter() interface. +*/ +static const char *uriParameter(const char *zFilename, const char *zParam){ + zFilename += sqlite3Strlen30(zFilename) + 1; + while( ALWAYS(zFilename!=0) && zFilename[0] ){ + int x = strcmp(zFilename, zParam); + zFilename += sqlite3Strlen30(zFilename) + 1; + if( x==0 ) return zFilename; + zFilename += sqlite3Strlen30(zFilename) + 1; + } + return 0; +} + + + +/* +** This routine does the work of opening a database on behalf of +** sqlite3_open() and sqlite3_open16(). The database filename "zFilename" +** is UTF-8 encoded. +*/ +static int openDatabase( + const char *zFilename, /* Database filename UTF-8 encoded */ + sqlite3 **ppDb, /* OUT: Returned database handle */ + unsigned int flags, /* Operational flags */ + const char *zVfs /* Name of the VFS to use */ +){ + sqlite3 *db; /* Store allocated handle here */ + int rc; /* Return code */ + int isThreadsafe; /* True for threadsafe connections */ + char *zOpen = 0; /* Filename argument to pass to BtreeOpen() */ + char *zErrMsg = 0; /* Error message from sqlite3ParseUri() */ + int i; /* Loop counter */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppDb==0 ) return SQLITE_MISUSE_BKPT; +#endif + *ppDb = 0; +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + + if( sqlite3GlobalConfig.bCoreMutex==0 ){ + isThreadsafe = 0; + }else if( flags & SQLITE_OPEN_NOMUTEX ){ + isThreadsafe = 0; + }else if( flags & SQLITE_OPEN_FULLMUTEX ){ + isThreadsafe = 1; + }else{ + isThreadsafe = sqlite3GlobalConfig.bFullMutex; + } + + if( flags & SQLITE_OPEN_PRIVATECACHE ){ + flags &= ~SQLITE_OPEN_SHAREDCACHE; + }else if( sqlite3GlobalConfig.sharedCacheEnabled ){ + flags |= SQLITE_OPEN_SHAREDCACHE; + } + + /* Remove harmful bits from the flags parameter + ** + ** The SQLITE_OPEN_NOMUTEX and SQLITE_OPEN_FULLMUTEX flags were + ** dealt with in the previous code block. Besides these, the only + ** valid input flags for sqlite3_open_v2() are SQLITE_OPEN_READONLY, + ** SQLITE_OPEN_READWRITE, SQLITE_OPEN_CREATE, SQLITE_OPEN_SHAREDCACHE, + ** SQLITE_OPEN_PRIVATECACHE, SQLITE_OPEN_EXRESCODE, and some reserved + ** bits. Silently mask off all other flags. + */ + flags &= ~( SQLITE_OPEN_DELETEONCLOSE | + SQLITE_OPEN_EXCLUSIVE | + SQLITE_OPEN_MAIN_DB | + SQLITE_OPEN_TEMP_DB | + SQLITE_OPEN_TRANSIENT_DB | + SQLITE_OPEN_MAIN_JOURNAL | + SQLITE_OPEN_TEMP_JOURNAL | + SQLITE_OPEN_SUBJOURNAL | + SQLITE_OPEN_SUPER_JOURNAL | + SQLITE_OPEN_NOMUTEX | + SQLITE_OPEN_FULLMUTEX | + SQLITE_OPEN_WAL + ); + + /* Allocate the sqlite data structure */ + db = sqlite3MallocZero( sizeof(sqlite3) ); + if( db==0 ) goto opendb_out; + if( isThreadsafe +#ifdef SQLITE_ENABLE_MULTITHREADED_CHECKS + || sqlite3GlobalConfig.bCoreMutex +#endif + ){ + db->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); + if( db->mutex==0 ){ + sqlite3_free(db); + db = 0; + goto opendb_out; + } + if( isThreadsafe==0 ){ + sqlite3MutexWarnOnContention(db->mutex); + } + } + sqlite3_mutex_enter(db->mutex); + db->errMask = (flags & SQLITE_OPEN_EXRESCODE)!=0 ? 0xffffffff : 0xff; + db->nDb = 2; + db->eOpenState = SQLITE_STATE_BUSY; + db->aDb = db->aDbStatic; + db->lookaside.bDisable = 1; + db->lookaside.sz = 0; + + assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); + memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); + db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS; + db->autoCommit = 1; + db->nextAutovac = -1; + db->szMmap = sqlite3GlobalConfig.szMmap; + db->nextPagesize = 0; + db->init.azInit = sqlite3StdType; /* Any array of string ptrs will do */ +#ifdef SQLITE_ENABLE_SORTER_MMAP + /* Beginning with version 3.37.0, using the VFS xFetch() API to memory-map + ** the temporary files used to do external sorts (see code in vdbesort.c) + ** is disabled. It can still be used either by defining + ** SQLITE_ENABLE_SORTER_MMAP at compile time or by using the + ** SQLITE_TESTCTRL_SORTER_MMAP test-control at runtime. */ + db->nMaxSorterMmap = 0x7FFFFFFF; +#endif + db->flags |= SQLITE_ShortColNames + | SQLITE_EnableTrigger + | SQLITE_EnableView + | SQLITE_CacheSpill +#if !defined(SQLITE_TRUSTED_SCHEMA) || SQLITE_TRUSTED_SCHEMA+0!=0 + | SQLITE_TrustedSchema +#endif +/* The SQLITE_DQS compile-time option determines the default settings +** for SQLITE_DBCONFIG_DQS_DDL and SQLITE_DBCONFIG_DQS_DML. +** +** SQLITE_DQS SQLITE_DBCONFIG_DQS_DDL SQLITE_DBCONFIG_DQS_DML +** ---------- ----------------------- ----------------------- +** undefined on on +** 3 on on +** 2 on off +** 1 off on +** 0 off off +** +** Legacy behavior is 3 (double-quoted string literals are allowed anywhere) +** and so that is the default. But developers are encouranged to use +** -DSQLITE_DQS=0 (best) or -DSQLITE_DQS=1 (second choice) if possible. +*/ +#if !defined(SQLITE_DQS) +# define SQLITE_DQS 3 +#endif +#if (SQLITE_DQS&1)==1 + | SQLITE_DqsDML +#endif +#if (SQLITE_DQS&2)==2 + | SQLITE_DqsDDL +#endif + +#if !defined(SQLITE_DEFAULT_AUTOMATIC_INDEX) || SQLITE_DEFAULT_AUTOMATIC_INDEX + | SQLITE_AutoIndex +#endif +#if SQLITE_DEFAULT_CKPTFULLFSYNC + | SQLITE_CkptFullFSync +#endif +#if SQLITE_DEFAULT_FILE_FORMAT<4 + | SQLITE_LegacyFileFmt +#endif +#ifdef SQLITE_ENABLE_LOAD_EXTENSION + | SQLITE_LoadExtension +#endif +#if SQLITE_DEFAULT_RECURSIVE_TRIGGERS + | SQLITE_RecTriggers +#endif +#if defined(SQLITE_DEFAULT_FOREIGN_KEYS) && SQLITE_DEFAULT_FOREIGN_KEYS + | SQLITE_ForeignKeys +#endif +#if defined(SQLITE_REVERSE_UNORDERED_SELECTS) + | SQLITE_ReverseOrder +#endif +#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) + | SQLITE_CellSizeCk +#endif +#if defined(SQLITE_ENABLE_FTS3_TOKENIZER) + | SQLITE_Fts3Tokenizer +#endif +#if defined(SQLITE_ENABLE_QPSG) + | SQLITE_EnableQPSG +#endif +#if defined(SQLITE_DEFAULT_DEFENSIVE) + | SQLITE_Defensive +#endif +#if defined(SQLITE_DEFAULT_LEGACY_ALTER_TABLE) + | SQLITE_LegacyAlter +#endif + ; + sqlite3HashInit(&db->aCollSeq); +#ifndef SQLITE_OMIT_VIRTUALTABLE + sqlite3HashInit(&db->aModule); +#endif + + /* Add the default collation sequence BINARY. BINARY works for both UTF-8 + ** and UTF-16, so add a version for each to avoid any unnecessary + ** conversions. The only error that can occur here is a malloc() failure. + ** + ** EVIDENCE-OF: R-52786-44878 SQLite defines three built-in collating + ** functions: + */ + createCollation(db, sqlite3StrBINARY, SQLITE_UTF8, 0, binCollFunc, 0); + createCollation(db, sqlite3StrBINARY, SQLITE_UTF16BE, 0, binCollFunc, 0); + createCollation(db, sqlite3StrBINARY, SQLITE_UTF16LE, 0, binCollFunc, 0); + createCollation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc, 0); + createCollation(db, "RTRIM", SQLITE_UTF8, 0, rtrimCollFunc, 0); + if( db->mallocFailed ){ + goto opendb_out; + } + + /* Parse the filename/URI argument + ** + ** Only allow sensible combinations of bits in the flags argument. + ** Throw an error if any non-sense combination is used. If we + ** do not block illegal combinations here, it could trigger + ** assert() statements in deeper layers. Sensible combinations + ** are: + ** + ** 1: SQLITE_OPEN_READONLY + ** 2: SQLITE_OPEN_READWRITE + ** 6: SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE + */ + db->openFlags = flags; + assert( SQLITE_OPEN_READONLY == 0x01 ); + assert( SQLITE_OPEN_READWRITE == 0x02 ); + assert( SQLITE_OPEN_CREATE == 0x04 ); + testcase( (1<<(flags&7))==0x02 ); /* READONLY */ + testcase( (1<<(flags&7))==0x04 ); /* READWRITE */ + testcase( (1<<(flags&7))==0x40 ); /* READWRITE | CREATE */ + if( ((1<<(flags&7)) & 0x46)==0 ){ + rc = SQLITE_MISUSE_BKPT; /* IMP: R-18321-05872 */ + }else{ + rc = sqlite3ParseUri(zVfs, zFilename, &flags, &db->pVfs, &zOpen, &zErrMsg); + } + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ) sqlite3OomFault(db); + sqlite3ErrorWithMsg(db, rc, zErrMsg ? "%s" : 0, zErrMsg); + sqlite3_free(zErrMsg); + goto opendb_out; + } + + /* Open the backend database driver */ + rc = sqlite3BtreeOpen(db->pVfs, zOpen, db, &db->aDb[0].pBt, 0, + flags | SQLITE_OPEN_MAIN_DB); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_IOERR_NOMEM ){ + rc = SQLITE_NOMEM_BKPT; + } + sqlite3Error(db, rc); + goto opendb_out; + } + sqlite3BtreeEnter(db->aDb[0].pBt); + db->aDb[0].pSchema = sqlite3SchemaGet(db, db->aDb[0].pBt); + if( !db->mallocFailed ){ + sqlite3SetTextEncoding(db, SCHEMA_ENC(db)); + } + sqlite3BtreeLeave(db->aDb[0].pBt); + db->aDb[1].pSchema = sqlite3SchemaGet(db, 0); + + /* The default safety_level for the main database is FULL; for the temp + ** database it is OFF. This matches the pager layer defaults. + */ + db->aDb[0].zDbSName = "main"; + db->aDb[0].safety_level = SQLITE_DEFAULT_SYNCHRONOUS+1; + db->aDb[1].zDbSName = "temp"; + db->aDb[1].safety_level = PAGER_SYNCHRONOUS_OFF; + + db->eOpenState = SQLITE_STATE_OPEN; + if( db->mallocFailed ){ + goto opendb_out; + } + + /* Register all built-in functions, but do not attempt to read the + ** database schema yet. This is delayed until the first time the database + ** is accessed. + */ + sqlite3Error(db, SQLITE_OK); + sqlite3RegisterPerConnectionBuiltinFunctions(db); + rc = sqlite3_errcode(db); + + + /* Load compiled-in extensions */ + for(i=0; rc==SQLITE_OK && imDbFlags |= DBFLAG_InternalFunc; +#endif + + /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking + ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking + ** mode. Doing nothing at all also makes NORMAL the default. + */ +#ifdef SQLITE_DEFAULT_LOCKING_MODE + db->dfltLockMode = SQLITE_DEFAULT_LOCKING_MODE; + sqlite3PagerLockingMode(sqlite3BtreePager(db->aDb[0].pBt), + SQLITE_DEFAULT_LOCKING_MODE); +#endif + + if( rc ) sqlite3Error(db, rc); + + /* Enable the lookaside-malloc subsystem */ + setupLookaside(db, 0, sqlite3GlobalConfig.szLookaside, + sqlite3GlobalConfig.nLookaside); + + sqlite3_wal_autocheckpoint(db, SQLITE_DEFAULT_WAL_AUTOCHECKPOINT); + +opendb_out: + if( db ){ + assert( db->mutex!=0 || isThreadsafe==0 + || sqlite3GlobalConfig.bFullMutex==0 ); + sqlite3_mutex_leave(db->mutex); + } + rc = sqlite3_errcode(db); + assert( db!=0 || (rc&0xff)==SQLITE_NOMEM ); + if( (rc&0xff)==SQLITE_NOMEM ){ + sqlite3_close(db); + db = 0; + }else if( rc!=SQLITE_OK ){ + db->eOpenState = SQLITE_STATE_SICK; + } + *ppDb = db; +#ifdef SQLITE_ENABLE_SQLLOG + if( sqlite3GlobalConfig.xSqllog ){ + /* Opening a db handle. Fourth parameter is passed 0. */ + void *pArg = sqlite3GlobalConfig.pSqllogArg; + sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0); + } +#endif + sqlite3_free_filename(zOpen); + return rc; +} + + +/* +** Open a new database handle. +*/ +SQLITE_API int sqlite3_open( + const char *zFilename, + sqlite3 **ppDb +){ + return openDatabase(zFilename, ppDb, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); +} +SQLITE_API int sqlite3_open_v2( + const char *filename, /* Database filename (UTF-8) */ + sqlite3 **ppDb, /* OUT: SQLite db handle */ + int flags, /* Flags */ + const char *zVfs /* Name of VFS module to use */ +){ + return openDatabase(filename, ppDb, (unsigned int)flags, zVfs); +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** Open a new database handle. +*/ +SQLITE_API int sqlite3_open16( + const void *zFilename, + sqlite3 **ppDb +){ + char const *zFilename8; /* zFilename encoded in UTF-8 instead of UTF-16 */ + sqlite3_value *pVal; + int rc; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppDb==0 ) return SQLITE_MISUSE_BKPT; +#endif + *ppDb = 0; +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + if( zFilename==0 ) zFilename = "\000\000"; + pVal = sqlite3ValueNew(0); + sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC); + zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8); + if( zFilename8 ){ + rc = openDatabase(zFilename8, ppDb, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); + assert( *ppDb || rc==SQLITE_NOMEM ); + if( rc==SQLITE_OK && !DbHasProperty(*ppDb, 0, DB_SchemaLoaded) ){ + SCHEMA_ENC(*ppDb) = ENC(*ppDb) = SQLITE_UTF16NATIVE; + } + }else{ + rc = SQLITE_NOMEM_BKPT; + } + sqlite3ValueFree(pVal); + + return rc & 0xff; +} +#endif /* SQLITE_OMIT_UTF16 */ + +/* +** Register a new collation sequence with the database handle db. +*/ +SQLITE_API int sqlite3_create_collation( + sqlite3* db, + const char *zName, + int enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*) +){ + return sqlite3_create_collation_v2(db, zName, enc, pCtx, xCompare, 0); +} + +/* +** Register a new collation sequence with the database handle db. +*/ +SQLITE_API int sqlite3_create_collation_v2( + sqlite3* db, + const char *zName, + int enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*), + void(*xDel)(void*) +){ + int rc; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + assert( !db->mallocFailed ); + rc = createCollation(db, zName, (u8)enc, pCtx, xCompare, xDel); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** Register a new collation sequence with the database handle db. +*/ +SQLITE_API int sqlite3_create_collation16( + sqlite3* db, + const void *zName, + int enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*) +){ + int rc = SQLITE_OK; + char *zName8; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + assert( !db->mallocFailed ); + zName8 = sqlite3Utf16to8(db, zName, -1, SQLITE_UTF16NATIVE); + if( zName8 ){ + rc = createCollation(db, zName8, (u8)enc, pCtx, xCompare, 0); + sqlite3DbFree(db, zName8); + } + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} +#endif /* SQLITE_OMIT_UTF16 */ + +/* +** Register a collation sequence factory callback with the database handle +** db. Replace any previously installed collation sequence factory. +*/ +SQLITE_API int sqlite3_collation_needed( + sqlite3 *db, + void *pCollNeededArg, + void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*) +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->xCollNeeded = xCollNeeded; + db->xCollNeeded16 = 0; + db->pCollNeededArg = pCollNeededArg; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** Register a collation sequence factory callback with the database handle +** db. Replace any previously installed collation sequence factory. +*/ +SQLITE_API int sqlite3_collation_needed16( + sqlite3 *db, + void *pCollNeededArg, + void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*) +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->xCollNeeded = 0; + db->xCollNeeded16 = xCollNeeded16; + db->pCollNeededArg = pCollNeededArg; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} +#endif /* SQLITE_OMIT_UTF16 */ + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** This function is now an anachronism. It used to be used to recover from a +** malloc() failure, but SQLite now does this automatically. +*/ +SQLITE_API int sqlite3_global_recover(void){ + return SQLITE_OK; +} +#endif + +/* +** Test to see whether or not the database connection is in autocommit +** mode. Return TRUE if it is and FALSE if not. Autocommit mode is on +** by default. Autocommit is disabled by a BEGIN statement and reenabled +** by the next COMMIT or ROLLBACK. +*/ +SQLITE_API int sqlite3_get_autocommit(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return db->autoCommit; +} + +/* +** The following routines are substitutes for constants SQLITE_CORRUPT, +** SQLITE_MISUSE, SQLITE_CANTOPEN, SQLITE_NOMEM and possibly other error +** constants. They serve two purposes: +** +** 1. Serve as a convenient place to set a breakpoint in a debugger +** to detect when version error conditions occurs. +** +** 2. Invoke sqlite3_log() to provide the source code location where +** a low-level error is first detected. +*/ +SQLITE_PRIVATE int sqlite3ReportError(int iErr, int lineno, const char *zType){ + sqlite3_log(iErr, "%s at line %d of [%.10s]", + zType, lineno, 20+sqlite3_sourceid()); + return iErr; +} +SQLITE_PRIVATE int sqlite3CorruptError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + return sqlite3ReportError(SQLITE_CORRUPT, lineno, "database corruption"); +} +SQLITE_PRIVATE int sqlite3MisuseError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + return sqlite3ReportError(SQLITE_MISUSE, lineno, "misuse"); +} +SQLITE_PRIVATE int sqlite3CantopenError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + return sqlite3ReportError(SQLITE_CANTOPEN, lineno, "cannot open file"); +} +#if defined(SQLITE_DEBUG) || defined(SQLITE_ENABLE_CORRUPT_PGNO) +SQLITE_PRIVATE int sqlite3CorruptPgnoError(int lineno, Pgno pgno){ + char zMsg[100]; + sqlite3_snprintf(sizeof(zMsg), zMsg, "database corruption page %d", pgno); + testcase( sqlite3GlobalConfig.xLog!=0 ); + return sqlite3ReportError(SQLITE_CORRUPT, lineno, zMsg); +} +#endif +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3NomemError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + return sqlite3ReportError(SQLITE_NOMEM, lineno, "OOM"); +} +SQLITE_PRIVATE int sqlite3IoerrnomemError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + return sqlite3ReportError(SQLITE_IOERR_NOMEM, lineno, "I/O OOM error"); +} +#endif + +#ifndef SQLITE_OMIT_DEPRECATED +/* +** This is a convenience routine that makes sure that all thread-specific +** data for this thread has been deallocated. +** +** SQLite no longer uses thread-specific data so this routine is now a +** no-op. It is retained for historical compatibility. +*/ +SQLITE_API void sqlite3_thread_cleanup(void){ +} +#endif + +/* +** Return meta information about a specific column of a database table. +** See comment in sqlite3.h (sqlite.h.in) for details. +*/ +SQLITE_API int sqlite3_table_column_metadata( + sqlite3 *db, /* Connection handle */ + const char *zDbName, /* Database name or NULL */ + const char *zTableName, /* Table name */ + const char *zColumnName, /* Column name */ + char const **pzDataType, /* OUTPUT: Declared data type */ + char const **pzCollSeq, /* OUTPUT: Collation sequence name */ + int *pNotNull, /* OUTPUT: True if NOT NULL constraint exists */ + int *pPrimaryKey, /* OUTPUT: True if column part of PK */ + int *pAutoinc /* OUTPUT: True if column is auto-increment */ +){ + int rc; + char *zErrMsg = 0; + Table *pTab = 0; + Column *pCol = 0; + int iCol = 0; + char const *zDataType = 0; + char const *zCollSeq = 0; + int notnull = 0; + int primarykey = 0; + int autoinc = 0; + + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zTableName==0 ){ + return SQLITE_MISUSE_BKPT; + } +#endif + + /* Ensure the database schema has been loaded */ + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeEnterAll(db); + rc = sqlite3Init(db, &zErrMsg); + if( SQLITE_OK!=rc ){ + goto error_out; + } + + /* Locate the table in question */ + pTab = sqlite3FindTable(db, zTableName, zDbName); + if( !pTab || IsView(pTab) ){ + pTab = 0; + goto error_out; + } + + /* Find the column for which info is requested */ + if( zColumnName==0 ){ + /* Query for existance of table only */ + }else{ + for(iCol=0; iColnCol; iCol++){ + pCol = &pTab->aCol[iCol]; + if( 0==sqlite3StrICmp(pCol->zCnName, zColumnName) ){ + break; + } + } + if( iCol==pTab->nCol ){ + if( HasRowid(pTab) && sqlite3IsRowid(zColumnName) ){ + iCol = pTab->iPKey; + pCol = iCol>=0 ? &pTab->aCol[iCol] : 0; + }else{ + pTab = 0; + goto error_out; + } + } + } + + /* The following block stores the meta information that will be returned + ** to the caller in local variables zDataType, zCollSeq, notnull, primarykey + ** and autoinc. At this point there are two possibilities: + ** + ** 1. The specified column name was rowid", "oid" or "_rowid_" + ** and there is no explicitly declared IPK column. + ** + ** 2. The table is not a view and the column name identified an + ** explicitly declared column. Copy meta information from *pCol. + */ + if( pCol ){ + zDataType = sqlite3ColumnType(pCol,0); + zCollSeq = sqlite3ColumnColl(pCol); + notnull = pCol->notNull!=0; + primarykey = (pCol->colFlags & COLFLAG_PRIMKEY)!=0; + autoinc = pTab->iPKey==iCol && (pTab->tabFlags & TF_Autoincrement)!=0; + }else{ + zDataType = "INTEGER"; + primarykey = 1; + } + if( !zCollSeq ){ + zCollSeq = sqlite3StrBINARY; + } + +error_out: + sqlite3BtreeLeaveAll(db); + + /* Whether the function call succeeded or failed, set the output parameters + ** to whatever their local counterparts contain. If an error did occur, + ** this has the effect of zeroing all output parameters. + */ + if( pzDataType ) *pzDataType = zDataType; + if( pzCollSeq ) *pzCollSeq = zCollSeq; + if( pNotNull ) *pNotNull = notnull; + if( pPrimaryKey ) *pPrimaryKey = primarykey; + if( pAutoinc ) *pAutoinc = autoinc; + + if( SQLITE_OK==rc && !pTab ){ + sqlite3DbFree(db, zErrMsg); + zErrMsg = sqlite3MPrintf(db, "no such table column: %s.%s", zTableName, + zColumnName); + rc = SQLITE_ERROR; + } + sqlite3ErrorWithMsg(db, rc, (zErrMsg?"%s":0), zErrMsg); + sqlite3DbFree(db, zErrMsg); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Sleep for a little while. Return the amount of time slept. +*/ +SQLITE_API int sqlite3_sleep(int ms){ + sqlite3_vfs *pVfs; + int rc; + pVfs = sqlite3_vfs_find(0); + if( pVfs==0 ) return 0; + + /* This function works in milliseconds, but the underlying OsSleep() + ** API uses microseconds. Hence the 1000's. + */ + rc = (sqlite3OsSleep(pVfs, 1000*ms)/1000); + return rc; +} + +/* +** Enable or disable the extended result codes. +*/ +SQLITE_API int sqlite3_extended_result_codes(sqlite3 *db, int onoff){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->errMask = onoff ? 0xffffffff : 0xff; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +/* +** Invoke the xFileControl method on a particular database. +*/ +SQLITE_API int sqlite3_file_control(sqlite3 *db, const char *zDbName, int op, void *pArg){ + int rc = SQLITE_ERROR; + Btree *pBtree; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + pBtree = sqlite3DbNameToBtree(db, zDbName); + if( pBtree ){ + Pager *pPager; + sqlite3_file *fd; + sqlite3BtreeEnter(pBtree); + pPager = sqlite3BtreePager(pBtree); + assert( pPager!=0 ); + fd = sqlite3PagerFile(pPager); + assert( fd!=0 ); + if( op==SQLITE_FCNTL_FILE_POINTER ){ + *(sqlite3_file**)pArg = fd; + rc = SQLITE_OK; + }else if( op==SQLITE_FCNTL_VFS_POINTER ){ + *(sqlite3_vfs**)pArg = sqlite3PagerVfs(pPager); + rc = SQLITE_OK; + }else if( op==SQLITE_FCNTL_JOURNAL_POINTER ){ + *(sqlite3_file**)pArg = sqlite3PagerJrnlFile(pPager); + rc = SQLITE_OK; + }else if( op==SQLITE_FCNTL_DATA_VERSION ){ + *(unsigned int*)pArg = sqlite3PagerDataVersion(pPager); + rc = SQLITE_OK; + }else if( op==SQLITE_FCNTL_RESERVE_BYTES ){ + int iNew = *(int*)pArg; + *(int*)pArg = sqlite3BtreeGetRequestedReserve(pBtree); + if( iNew>=0 && iNew<=255 ){ + sqlite3BtreeSetPageSize(pBtree, 0, iNew, 0); + } + rc = SQLITE_OK; + }else{ + int nSave = db->busyHandler.nBusy; + rc = sqlite3OsFileControl(fd, op, pArg); + db->busyHandler.nBusy = nSave; + } + sqlite3BtreeLeave(pBtree); + } + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** Interface to the testing logic. +*/ +SQLITE_API int sqlite3_test_control(int op, ...){ + int rc = 0; +#ifdef SQLITE_UNTESTABLE + UNUSED_PARAMETER(op); +#else + va_list ap; + va_start(ap, op); + switch( op ){ + + /* + ** Save the current state of the PRNG. + */ + case SQLITE_TESTCTRL_PRNG_SAVE: { + sqlite3PrngSaveState(); + break; + } + + /* + ** Restore the state of the PRNG to the last state saved using + ** PRNG_SAVE. If PRNG_SAVE has never before been called, then + ** this verb acts like PRNG_RESET. + */ + case SQLITE_TESTCTRL_PRNG_RESTORE: { + sqlite3PrngRestoreState(); + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_PRNG_SEED, int x, sqlite3 *db); + ** + ** Control the seed for the pseudo-random number generator (PRNG) that + ** is built into SQLite. Cases: + ** + ** x!=0 && db!=0 Seed the PRNG to the current value of the + ** schema cookie in the main database for db, or + ** x if the schema cookie is zero. This case + ** is convenient to use with database fuzzers + ** as it allows the fuzzer some control over the + ** the PRNG seed. + ** + ** x!=0 && db==0 Seed the PRNG to the value of x. + ** + ** x==0 && db==0 Revert to default behavior of using the + ** xRandomness method on the primary VFS. + ** + ** This test-control also resets the PRNG so that the new seed will + ** be used for the next call to sqlite3_randomness(). + */ +#ifndef SQLITE_OMIT_WSD + case SQLITE_TESTCTRL_PRNG_SEED: { + int x = va_arg(ap, int); + int y; + sqlite3 *db = va_arg(ap, sqlite3*); + assert( db==0 || db->aDb[0].pSchema!=0 ); + if( db && (y = db->aDb[0].pSchema->schema_cookie)!=0 ){ x = y; } + sqlite3Config.iPrngSeed = x; + sqlite3_randomness(0,0); + break; + } +#endif + + /* + ** sqlite3_test_control(BITVEC_TEST, size, program) + ** + ** Run a test against a Bitvec object of size. The program argument + ** is an array of integers that defines the test. Return -1 on a + ** memory allocation error, 0 on success, or non-zero for an error. + ** See the sqlite3BitvecBuiltinTest() for additional information. + */ + case SQLITE_TESTCTRL_BITVEC_TEST: { + int sz = va_arg(ap, int); + int *aProg = va_arg(ap, int*); + rc = sqlite3BitvecBuiltinTest(sz, aProg); + break; + } + + /* + ** sqlite3_test_control(FAULT_INSTALL, xCallback) + ** + ** Arrange to invoke xCallback() whenever sqlite3FaultSim() is called, + ** if xCallback is not NULL. + ** + ** As a test of the fault simulator mechanism itself, sqlite3FaultSim(0) + ** is called immediately after installing the new callback and the return + ** value from sqlite3FaultSim(0) becomes the return from + ** sqlite3_test_control(). + */ + case SQLITE_TESTCTRL_FAULT_INSTALL: { + /* A bug in MSVC prevents it from understanding pointers to functions + ** types in the second argument to va_arg(). Work around the problem + ** using a typedef. + ** http://support.microsoft.com/kb/47961 <-- dead hyperlink + ** Search at http://web.archive.org/ to find the 2015-03-16 archive + ** of the link above to see the original text. + ** sqlite3GlobalConfig.xTestCallback = va_arg(ap, int(*)(int)); + */ + typedef int(*sqlite3FaultFuncType)(int); + sqlite3GlobalConfig.xTestCallback = va_arg(ap, sqlite3FaultFuncType); + rc = sqlite3FaultSim(0); + break; + } + + /* + ** sqlite3_test_control(BENIGN_MALLOC_HOOKS, xBegin, xEnd) + ** + ** Register hooks to call to indicate which malloc() failures + ** are benign. + */ + case SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS: { + typedef void (*void_function)(void); + void_function xBenignBegin; + void_function xBenignEnd; + xBenignBegin = va_arg(ap, void_function); + xBenignEnd = va_arg(ap, void_function); + sqlite3BenignMallocHooks(xBenignBegin, xBenignEnd); + break; + } + + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_PENDING_BYTE, unsigned int X) + ** + ** Set the PENDING byte to the value in the argument, if X>0. + ** Make no changes if X==0. Return the value of the pending byte + ** as it existing before this routine was called. + ** + ** IMPORTANT: Changing the PENDING byte from 0x40000000 results in + ** an incompatible database file format. Changing the PENDING byte + ** while any database connection is open results in undefined and + ** deleterious behavior. + */ + case SQLITE_TESTCTRL_PENDING_BYTE: { + rc = PENDING_BYTE; +#ifndef SQLITE_OMIT_WSD + { + unsigned int newVal = va_arg(ap, unsigned int); + if( newVal ) sqlite3PendingByte = newVal; + } +#endif + break; + } + + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, int X) + ** + ** This action provides a run-time test to see whether or not + ** assert() was enabled at compile-time. If X is true and assert() + ** is enabled, then the return value is true. If X is true and + ** assert() is disabled, then the return value is zero. If X is + ** false and assert() is enabled, then the assertion fires and the + ** process aborts. If X is false and assert() is disabled, then the + ** return value is zero. + */ + case SQLITE_TESTCTRL_ASSERT: { + volatile int x = 0; + assert( /*side-effects-ok*/ (x = va_arg(ap,int))!=0 ); + rc = x; + break; + } + + + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, int X) + ** + ** This action provides a run-time test to see how the ALWAYS and + ** NEVER macros were defined at compile-time. + ** + ** The return value is ALWAYS(X) if X is true, or 0 if X is false. + ** + ** The recommended test is X==2. If the return value is 2, that means + ** ALWAYS() and NEVER() are both no-op pass-through macros, which is the + ** default setting. If the return value is 1, then ALWAYS() is either + ** hard-coded to true or else it asserts if its argument is false. + ** The first behavior (hard-coded to true) is the case if + ** SQLITE_TESTCTRL_ASSERT shows that assert() is disabled and the second + ** behavior (assert if the argument to ALWAYS() is false) is the case if + ** SQLITE_TESTCTRL_ASSERT shows that assert() is enabled. + ** + ** The run-time test procedure might look something like this: + ** + ** if( sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, 2)==2 ){ + ** // ALWAYS() and NEVER() are no-op pass-through macros + ** }else if( sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, 1) ){ + ** // ALWAYS(x) asserts that x is true. NEVER(x) asserts x is false. + ** }else{ + ** // ALWAYS(x) is a constant 1. NEVER(x) is a constant 0. + ** } + */ + case SQLITE_TESTCTRL_ALWAYS: { + int x = va_arg(ap,int); + rc = x ? ALWAYS(x) : 0; + break; + } + + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_BYTEORDER); + ** + ** The integer returned reveals the byte-order of the computer on which + ** SQLite is running: + ** + ** 1 big-endian, determined at run-time + ** 10 little-endian, determined at run-time + ** 432101 big-endian, determined at compile-time + ** 123410 little-endian, determined at compile-time + */ + case SQLITE_TESTCTRL_BYTEORDER: { + rc = SQLITE_BYTEORDER*100 + SQLITE_LITTLEENDIAN*10 + SQLITE_BIGENDIAN; + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS, sqlite3 *db, int N) + ** + ** Enable or disable various optimizations for testing purposes. The + ** argument N is a bitmask of optimizations to be disabled. For normal + ** operation N should be 0. The idea is that a test program (like the + ** SQL Logic Test or SLT test module) can run the same SQL multiple times + ** with various optimizations disabled to verify that the same answer + ** is obtained in every case. + */ + case SQLITE_TESTCTRL_OPTIMIZATIONS: { + sqlite3 *db = va_arg(ap, sqlite3*); + db->dbOptFlags = va_arg(ap, u32); + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_LOCALTIME_FAULT, onoff, xAlt); + ** + ** If parameter onoff is 1, subsequent calls to localtime() fail. + ** If 2, then invoke xAlt() instead of localtime(). If 0, normal + ** processing. + ** + ** xAlt arguments are void pointers, but they really want to be: + ** + ** int xAlt(const time_t*, struct tm*); + ** + ** xAlt should write results in to struct tm object of its 2nd argument + ** and return zero on success, or return non-zero on failure. + */ + case SQLITE_TESTCTRL_LOCALTIME_FAULT: { + sqlite3GlobalConfig.bLocaltimeFault = va_arg(ap, int); + if( sqlite3GlobalConfig.bLocaltimeFault==2 ){ + typedef int(*sqlite3LocaltimeType)(const void*,void*); + sqlite3GlobalConfig.xAltLocaltime = va_arg(ap, sqlite3LocaltimeType); + }else{ + sqlite3GlobalConfig.xAltLocaltime = 0; + } + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_INTERNAL_FUNCTIONS, sqlite3*); + ** + ** Toggle the ability to use internal functions on or off for + ** the database connection given in the argument. + */ + case SQLITE_TESTCTRL_INTERNAL_FUNCTIONS: { + sqlite3 *db = va_arg(ap, sqlite3*); + db->mDbFlags ^= DBFLAG_InternalFunc; + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_NEVER_CORRUPT, int); + ** + ** Set or clear a flag that indicates that the database file is always well- + ** formed and never corrupt. This flag is clear by default, indicating that + ** database files might have arbitrary corruption. Setting the flag during + ** testing causes certain assert() statements in the code to be activated + ** that demonstrat invariants on well-formed database files. + */ + case SQLITE_TESTCTRL_NEVER_CORRUPT: { + sqlite3GlobalConfig.neverCorrupt = va_arg(ap, int); + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_EXTRA_SCHEMA_CHECKS, int); + ** + ** Set or clear a flag that causes SQLite to verify that type, name, + ** and tbl_name fields of the sqlite_schema table. This is normally + ** on, but it is sometimes useful to turn it off for testing. + ** + ** 2020-07-22: Disabling EXTRA_SCHEMA_CHECKS also disables the + ** verification of rootpage numbers when parsing the schema. This + ** is useful to make it easier to reach strange internal error states + ** during testing. The EXTRA_SCHEMA_CHECKS setting is always enabled + ** in production. + */ + case SQLITE_TESTCTRL_EXTRA_SCHEMA_CHECKS: { + sqlite3GlobalConfig.bExtraSchemaChecks = va_arg(ap, int); + break; + } + + /* Set the threshold at which OP_Once counters reset back to zero. + ** By default this is 0x7ffffffe (over 2 billion), but that value is + ** too big to test in a reasonable amount of time, so this control is + ** provided to set a small and easily reachable reset value. + */ + case SQLITE_TESTCTRL_ONCE_RESET_THRESHOLD: { + sqlite3GlobalConfig.iOnceResetThreshold = va_arg(ap, int); + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_VDBE_COVERAGE, xCallback, ptr); + ** + ** Set the VDBE coverage callback function to xCallback with context + ** pointer ptr. + */ + case SQLITE_TESTCTRL_VDBE_COVERAGE: { +#ifdef SQLITE_VDBE_COVERAGE + typedef void (*branch_callback)(void*,unsigned int, + unsigned char,unsigned char); + sqlite3GlobalConfig.xVdbeBranch = va_arg(ap,branch_callback); + sqlite3GlobalConfig.pVdbeBranchArg = va_arg(ap,void*); +#endif + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, nMax); */ + case SQLITE_TESTCTRL_SORTER_MMAP: { + sqlite3 *db = va_arg(ap, sqlite3*); + db->nMaxSorterMmap = va_arg(ap, int); + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_ISINIT); + ** + ** Return SQLITE_OK if SQLite has been initialized and SQLITE_ERROR if + ** not. + */ + case SQLITE_TESTCTRL_ISINIT: { + if( sqlite3GlobalConfig.isInit==0 ) rc = SQLITE_ERROR; + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, db, dbName, onOff, tnum); + ** + ** This test control is used to create imposter tables. "db" is a pointer + ** to the database connection. dbName is the database name (ex: "main" or + ** "temp") which will receive the imposter. "onOff" turns imposter mode on + ** or off. "tnum" is the root page of the b-tree to which the imposter + ** table should connect. + ** + ** Enable imposter mode only when the schema has already been parsed. Then + ** run a single CREATE TABLE statement to construct the imposter table in + ** the parsed schema. Then turn imposter mode back off again. + ** + ** If onOff==0 and tnum>0 then reset the schema for all databases, causing + ** the schema to be reparsed the next time it is needed. This has the + ** effect of erasing all imposter tables. + */ + case SQLITE_TESTCTRL_IMPOSTER: { + sqlite3 *db = va_arg(ap, sqlite3*); + int iDb; + sqlite3_mutex_enter(db->mutex); + iDb = sqlite3FindDbName(db, va_arg(ap,const char*)); + if( iDb>=0 ){ + db->init.iDb = iDb; + db->init.busy = db->init.imposterTable = va_arg(ap,int); + db->init.newTnum = va_arg(ap,int); + if( db->init.busy==0 && db->init.newTnum>0 ){ + sqlite3ResetAllSchemasOfConnection(db); + } + } + sqlite3_mutex_leave(db->mutex); + break; + } + +#if defined(YYCOVERAGE) + /* sqlite3_test_control(SQLITE_TESTCTRL_PARSER_COVERAGE, FILE *out) + ** + ** This test control (only available when SQLite is compiled with + ** -DYYCOVERAGE) writes a report onto "out" that shows all + ** state/lookahead combinations in the parser state machine + ** which are never exercised. If any state is missed, make the + ** return code SQLITE_ERROR. + */ + case SQLITE_TESTCTRL_PARSER_COVERAGE: { + FILE *out = va_arg(ap, FILE*); + if( sqlite3ParserCoverage(out) ) rc = SQLITE_ERROR; + break; + } +#endif /* defined(YYCOVERAGE) */ + + /* sqlite3_test_control(SQLITE_TESTCTRL_RESULT_INTREAL, sqlite3_context*); + ** + ** This test-control causes the most recent sqlite3_result_int64() value + ** to be interpreted as a MEM_IntReal instead of as an MEM_Int. Normally, + ** MEM_IntReal values only arise during an INSERT operation of integer + ** values into a REAL column, so they can be challenging to test. This + ** test-control enables us to write an intreal() SQL function that can + ** inject an intreal() value at arbitrary places in an SQL statement, + ** for testing purposes. + */ + case SQLITE_TESTCTRL_RESULT_INTREAL: { + sqlite3_context *pCtx = va_arg(ap, sqlite3_context*); + sqlite3ResultIntReal(pCtx); + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_SEEK_COUNT, + ** sqlite3 *db, // Database connection + ** u64 *pnSeek // Write seek count here + ** ); + ** + ** This test-control queries the seek-counter on the "main" database + ** file. The seek-counter is written into *pnSeek and is then reset. + ** The seek-count is only available if compiled with SQLITE_DEBUG. + */ + case SQLITE_TESTCTRL_SEEK_COUNT: { + sqlite3 *db = va_arg(ap, sqlite3*); + u64 *pn = va_arg(ap, sqlite3_uint64*); + *pn = sqlite3BtreeSeekCount(db->aDb->pBt); + (void)db; /* Silence harmless unused variable warning */ + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_TRACEFLAGS, op, ptr) + ** + ** "ptr" is a pointer to a u32. + ** + ** op==0 Store the current sqlite3SelectTrace in *ptr + ** op==1 Set sqlite3SelectTrace to the value *ptr + ** op==3 Store the current sqlite3WhereTrace in *ptr + ** op==3 Set sqlite3WhereTrace to the value *ptr + */ + case SQLITE_TESTCTRL_TRACEFLAGS: { + int opTrace = va_arg(ap, int); + u32 *ptr = va_arg(ap, u32*); + switch( opTrace ){ + case 0: *ptr = sqlite3SelectTrace; break; + case 1: sqlite3SelectTrace = *ptr; break; + case 2: *ptr = sqlite3WhereTrace; break; + case 3: sqlite3WhereTrace = *ptr; break; + } + break; + } + + /* sqlite3_test_control(SQLITE_TESTCTRL_LOGEST, + ** double fIn, // Input value + ** int *pLogEst, // sqlite3LogEstFromDouble(fIn) + ** u64 *pInt, // sqlite3LogEstToInt(*pLogEst) + ** int *pLogEst2 // sqlite3LogEst(*pInt) + ** ); + ** + ** Test access for the LogEst conversion routines. + */ + case SQLITE_TESTCTRL_LOGEST: { + double rIn = va_arg(ap, double); + LogEst rLogEst = sqlite3LogEstFromDouble(rIn); + u64 iInt = sqlite3LogEstToInt(rLogEst); + va_arg(ap, int*)[0] = rLogEst; + va_arg(ap, u64*)[0] = iInt; + va_arg(ap, int*)[0] = sqlite3LogEst(iInt); + break; + } + + +#if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_WSD) + /* sqlite3_test_control(SQLITE_TESTCTRL_TUNE, id, *piValue) + ** + ** If "id" is an integer between 1 and SQLITE_NTUNE then set the value + ** of the id-th tuning parameter to *piValue. If "id" is between -1 + ** and -SQLITE_NTUNE, then write the current value of the (-id)-th + ** tuning parameter into *piValue. + ** + ** Tuning parameters are for use during transient development builds, + ** to help find the best values for constants in the query planner. + ** Access tuning parameters using the Tuning(ID) macro. Set the + ** parameters in the CLI using ".testctrl tune ID VALUE". + ** + ** Transient use only. Tuning parameters should not be used in + ** checked-in code. + */ + case SQLITE_TESTCTRL_TUNE: { + int id = va_arg(ap, int); + int *piValue = va_arg(ap, int*); + if( id>0 && id<=SQLITE_NTUNE ){ + Tuning(id) = *piValue; + }else if( id<0 && id>=-SQLITE_NTUNE ){ + *piValue = Tuning(-id); + }else{ + rc = SQLITE_NOTFOUND; + } + break; + } +#endif + } + va_end(ap); +#endif /* SQLITE_UNTESTABLE */ + return rc; +} + +/* +** The Pager stores the Database filename, Journal filename, and WAL filename +** consecutively in memory, in that order. The database filename is prefixed +** by four zero bytes. Locate the start of the database filename by searching +** backwards for the first byte following four consecutive zero bytes. +** +** This only works if the filename passed in was obtained from the Pager. +*/ +static const char *databaseName(const char *zName){ + while( zName[-1]!=0 || zName[-2]!=0 || zName[-3]!=0 || zName[-4]!=0 ){ + zName--; + } + return zName; +} + +/* +** Append text z[] to the end of p[]. Return a pointer to the first +** character after then zero terminator on the new text in p[]. +*/ +static char *appendText(char *p, const char *z){ + size_t n = strlen(z); + memcpy(p, z, n+1); + return p+n+1; +} + +/* +** Allocate memory to hold names for a database, journal file, WAL file, +** and query parameters. The pointer returned is valid for use by +** sqlite3_filename_database() and sqlite3_uri_parameter() and related +** functions. +** +** Memory layout must be compatible with that generated by the pager +** and expected by sqlite3_uri_parameter() and databaseName(). +*/ +SQLITE_API char *sqlite3_create_filename( + const char *zDatabase, + const char *zJournal, + const char *zWal, + int nParam, + const char **azParam +){ + sqlite3_int64 nByte; + int i; + char *pResult, *p; + nByte = strlen(zDatabase) + strlen(zJournal) + strlen(zWal) + 10; + for(i=0; i0 ){ + zFilename += sqlite3Strlen30(zFilename) + 1; + zFilename += sqlite3Strlen30(zFilename) + 1; + } + return zFilename[0] ? zFilename : 0; +} + +/* +** Return a boolean value for a query parameter. +*/ +SQLITE_API int sqlite3_uri_boolean(const char *zFilename, const char *zParam, int bDflt){ + const char *z = sqlite3_uri_parameter(zFilename, zParam); + bDflt = bDflt!=0; + return z ? sqlite3GetBoolean(z, bDflt) : bDflt; +} + +/* +** Return a 64-bit integer value for a query parameter. +*/ +SQLITE_API sqlite3_int64 sqlite3_uri_int64( + const char *zFilename, /* Filename as passed to xOpen */ + const char *zParam, /* URI parameter sought */ + sqlite3_int64 bDflt /* return if parameter is missing */ +){ + const char *z = sqlite3_uri_parameter(zFilename, zParam); + sqlite3_int64 v; + if( z && sqlite3DecOrHexToI64(z, &v)==0 ){ + bDflt = v; + } + return bDflt; +} + +/* +** Translate a filename that was handed to a VFS routine into the corresponding +** database, journal, or WAL file. +** +** It is an error to pass this routine a filename string that was not +** passed into the VFS from the SQLite core. Doing so is similar to +** passing free() a pointer that was not obtained from malloc() - it is +** an error that we cannot easily detect but that will likely cause memory +** corruption. +*/ +SQLITE_API const char *sqlite3_filename_database(const char *zFilename){ + if( zFilename==0 ) return 0; + return databaseName(zFilename); +} +SQLITE_API const char *sqlite3_filename_journal(const char *zFilename){ + if( zFilename==0 ) return 0; + zFilename = databaseName(zFilename); + zFilename += sqlite3Strlen30(zFilename) + 1; + while( ALWAYS(zFilename) && zFilename[0] ){ + zFilename += sqlite3Strlen30(zFilename) + 1; + zFilename += sqlite3Strlen30(zFilename) + 1; + } + return zFilename + 1; +} +SQLITE_API const char *sqlite3_filename_wal(const char *zFilename){ +#ifdef SQLITE_OMIT_WAL + return 0; +#else + zFilename = sqlite3_filename_journal(zFilename); + if( zFilename ) zFilename += sqlite3Strlen30(zFilename) + 1; + return zFilename; +#endif +} + +/* +** Return the Btree pointer identified by zDbName. Return NULL if not found. +*/ +SQLITE_PRIVATE Btree *sqlite3DbNameToBtree(sqlite3 *db, const char *zDbName){ + int iDb = zDbName ? sqlite3FindDbName(db, zDbName) : 0; + return iDb<0 ? 0 : db->aDb[iDb].pBt; +} + +/* +** Return the filename of the database associated with a database +** connection. +*/ +SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName){ + Btree *pBt; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + pBt = sqlite3DbNameToBtree(db, zDbName); + return pBt ? sqlite3BtreeGetFilename(pBt) : 0; +} + +/* +** Return 1 if database is read-only or 0 if read/write. Return -1 if +** no such database exists. +*/ +SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName){ + Btree *pBt; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return -1; + } +#endif + pBt = sqlite3DbNameToBtree(db, zDbName); + return pBt ? sqlite3BtreeIsReadonly(pBt) : -1; +} + +#ifdef SQLITE_ENABLE_SNAPSHOT +/* +** Obtain a snapshot handle for the snapshot of database zDb currently +** being read by handle db. +*/ +SQLITE_API int sqlite3_snapshot_get( + sqlite3 *db, + const char *zDb, + sqlite3_snapshot **ppSnapshot +){ + int rc = SQLITE_ERROR; +#ifndef SQLITE_OMIT_WAL + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + + if( db->autoCommit==0 ){ + int iDb = sqlite3FindDbName(db, zDb); + if( iDb==0 || iDb>1 ){ + Btree *pBt = db->aDb[iDb].pBt; + if( SQLITE_TXN_WRITE!=sqlite3BtreeTxnState(pBt) ){ + rc = sqlite3BtreeBeginTrans(pBt, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerSnapshotGet(sqlite3BtreePager(pBt), ppSnapshot); + } + } + } + } + + sqlite3_mutex_leave(db->mutex); +#endif /* SQLITE_OMIT_WAL */ + return rc; +} + +/* +** Open a read-transaction on the snapshot idendified by pSnapshot. +*/ +SQLITE_API int sqlite3_snapshot_open( + sqlite3 *db, + const char *zDb, + sqlite3_snapshot *pSnapshot +){ + int rc = SQLITE_ERROR; +#ifndef SQLITE_OMIT_WAL + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + sqlite3_mutex_enter(db->mutex); + if( db->autoCommit==0 ){ + int iDb; + iDb = sqlite3FindDbName(db, zDb); + if( iDb==0 || iDb>1 ){ + Btree *pBt = db->aDb[iDb].pBt; + if( sqlite3BtreeTxnState(pBt)!=SQLITE_TXN_WRITE ){ + Pager *pPager = sqlite3BtreePager(pBt); + int bUnlock = 0; + if( sqlite3BtreeTxnState(pBt)!=SQLITE_TXN_NONE ){ + if( db->nVdbeActive==0 ){ + rc = sqlite3PagerSnapshotCheck(pPager, pSnapshot); + if( rc==SQLITE_OK ){ + bUnlock = 1; + rc = sqlite3BtreeCommit(pBt); + } + } + }else{ + rc = SQLITE_OK; + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerSnapshotOpen(pPager, pSnapshot); + } + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeBeginTrans(pBt, 0, 0); + sqlite3PagerSnapshotOpen(pPager, 0); + } + if( bUnlock ){ + sqlite3PagerSnapshotUnlock(pPager); + } + } + } + } + + sqlite3_mutex_leave(db->mutex); +#endif /* SQLITE_OMIT_WAL */ + return rc; +} + +/* +** Recover as many snapshots as possible from the wal file associated with +** schema zDb of database db. +*/ +SQLITE_API int sqlite3_snapshot_recover(sqlite3 *db, const char *zDb){ + int rc = SQLITE_ERROR; + int iDb; +#ifndef SQLITE_OMIT_WAL + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + + sqlite3_mutex_enter(db->mutex); + iDb = sqlite3FindDbName(db, zDb); + if( iDb==0 || iDb>1 ){ + Btree *pBt = db->aDb[iDb].pBt; + if( SQLITE_TXN_NONE==sqlite3BtreeTxnState(pBt) ){ + rc = sqlite3BtreeBeginTrans(pBt, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerSnapshotRecover(sqlite3BtreePager(pBt)); + sqlite3BtreeCommit(pBt); + } + } + } + sqlite3_mutex_leave(db->mutex); +#endif /* SQLITE_OMIT_WAL */ + return rc; +} + +/* +** Free a snapshot handle obtained from sqlite3_snapshot_get(). +*/ +SQLITE_API void sqlite3_snapshot_free(sqlite3_snapshot *pSnapshot){ + sqlite3_free(pSnapshot); +} +#endif /* SQLITE_ENABLE_SNAPSHOT */ + +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +/* +** Given the name of a compile-time option, return true if that option +** was used and false if not. +** +** The name can optionally begin with "SQLITE_" but the "SQLITE_" prefix +** is not required for a match. +*/ +SQLITE_API int sqlite3_compileoption_used(const char *zOptName){ + int i, n; + int nOpt; + const char **azCompileOpt; + +#if SQLITE_ENABLE_API_ARMOR + if( zOptName==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + + azCompileOpt = sqlite3CompileOptions(&nOpt); + + if( sqlite3StrNICmp(zOptName, "SQLITE_", 7)==0 ) zOptName += 7; + n = sqlite3Strlen30(zOptName); + + /* Since nOpt is normally in single digits, a linear search is + ** adequate. No need for a binary search. */ + for(i=0; i=0 && NpNextBlocked){ + int seen = 0; + sqlite3 *p2; + + /* Verify property (1) */ + assert( p->pUnlockConnection || p->pBlockingConnection ); + + /* Verify property (2) */ + for(p2=sqlite3BlockedList; p2!=p; p2=p2->pNextBlocked){ + if( p2->xUnlockNotify==p->xUnlockNotify ) seen = 1; + assert( p2->xUnlockNotify==p->xUnlockNotify || !seen ); + assert( db==0 || p->pUnlockConnection!=db ); + assert( db==0 || p->pBlockingConnection!=db ); + } + } +} +#else +# define checkListProperties(x) +#endif + +/* +** Remove connection db from the blocked connections list. If connection +** db is not currently a part of the list, this function is a no-op. +*/ +static void removeFromBlockedList(sqlite3 *db){ + sqlite3 **pp; + assertMutexHeld(); + for(pp=&sqlite3BlockedList; *pp; pp = &(*pp)->pNextBlocked){ + if( *pp==db ){ + *pp = (*pp)->pNextBlocked; + break; + } + } +} + +/* +** Add connection db to the blocked connections list. It is assumed +** that it is not already a part of the list. +*/ +static void addToBlockedList(sqlite3 *db){ + sqlite3 **pp; + assertMutexHeld(); + for( + pp=&sqlite3BlockedList; + *pp && (*pp)->xUnlockNotify!=db->xUnlockNotify; + pp=&(*pp)->pNextBlocked + ); + db->pNextBlocked = *pp; + *pp = db; +} + +/* +** Obtain the STATIC_MAIN mutex. +*/ +static void enterMutex(void){ + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN)); + checkListProperties(0); +} + +/* +** Release the STATIC_MAIN mutex. +*/ +static void leaveMutex(void){ + assertMutexHeld(); + checkListProperties(0); + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN)); +} + +/* +** Register an unlock-notify callback. +** +** This is called after connection "db" has attempted some operation +** but has received an SQLITE_LOCKED error because another connection +** (call it pOther) in the same process was busy using the same shared +** cache. pOther is found by looking at db->pBlockingConnection. +** +** If there is no blocking connection, the callback is invoked immediately, +** before this routine returns. +** +** If pOther is already blocked on db, then report SQLITE_LOCKED, to indicate +** a deadlock. +** +** Otherwise, make arrangements to invoke xNotify when pOther drops +** its locks. +** +** Each call to this routine overrides any prior callbacks registered +** on the same "db". If xNotify==0 then any prior callbacks are immediately +** cancelled. +*/ +SQLITE_API int sqlite3_unlock_notify( + sqlite3 *db, + void (*xNotify)(void **, int), + void *pArg +){ + int rc = SQLITE_OK; + + sqlite3_mutex_enter(db->mutex); + enterMutex(); + + if( xNotify==0 ){ + removeFromBlockedList(db); + db->pBlockingConnection = 0; + db->pUnlockConnection = 0; + db->xUnlockNotify = 0; + db->pUnlockArg = 0; + }else if( 0==db->pBlockingConnection ){ + /* The blocking transaction has been concluded. Or there never was a + ** blocking transaction. In either case, invoke the notify callback + ** immediately. + */ + xNotify(&pArg, 1); + }else{ + sqlite3 *p; + + for(p=db->pBlockingConnection; p && p!=db; p=p->pUnlockConnection){} + if( p ){ + rc = SQLITE_LOCKED; /* Deadlock detected. */ + }else{ + db->pUnlockConnection = db->pBlockingConnection; + db->xUnlockNotify = xNotify; + db->pUnlockArg = pArg; + removeFromBlockedList(db); + addToBlockedList(db); + } + } + + leaveMutex(); + assert( !db->mallocFailed ); + sqlite3ErrorWithMsg(db, rc, (rc?"database is deadlocked":0)); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +/* +** This function is called while stepping or preparing a statement +** associated with connection db. The operation will return SQLITE_LOCKED +** to the user because it requires a lock that will not be available +** until connection pBlocker concludes its current transaction. +*/ +SQLITE_PRIVATE void sqlite3ConnectionBlocked(sqlite3 *db, sqlite3 *pBlocker){ + enterMutex(); + if( db->pBlockingConnection==0 && db->pUnlockConnection==0 ){ + addToBlockedList(db); + } + db->pBlockingConnection = pBlocker; + leaveMutex(); +} + +/* +** This function is called when +** the transaction opened by database db has just finished. Locks held +** by database connection db have been released. +** +** This function loops through each entry in the blocked connections +** list and does the following: +** +** 1) If the sqlite3.pBlockingConnection member of a list entry is +** set to db, then set pBlockingConnection=0. +** +** 2) If the sqlite3.pUnlockConnection member of a list entry is +** set to db, then invoke the configured unlock-notify callback and +** set pUnlockConnection=0. +** +** 3) If the two steps above mean that pBlockingConnection==0 and +** pUnlockConnection==0, remove the entry from the blocked connections +** list. +*/ +SQLITE_PRIVATE void sqlite3ConnectionUnlocked(sqlite3 *db){ + void (*xUnlockNotify)(void **, int) = 0; /* Unlock-notify cb to invoke */ + int nArg = 0; /* Number of entries in aArg[] */ + sqlite3 **pp; /* Iterator variable */ + void **aArg; /* Arguments to the unlock callback */ + void **aDyn = 0; /* Dynamically allocated space for aArg[] */ + void *aStatic[16]; /* Starter space for aArg[]. No malloc required */ + + aArg = aStatic; + enterMutex(); /* Enter STATIC_MAIN mutex */ + + /* This loop runs once for each entry in the blocked-connections list. */ + for(pp=&sqlite3BlockedList; *pp; /* no-op */ ){ + sqlite3 *p = *pp; + + /* Step 1. */ + if( p->pBlockingConnection==db ){ + p->pBlockingConnection = 0; + } + + /* Step 2. */ + if( p->pUnlockConnection==db ){ + assert( p->xUnlockNotify ); + if( p->xUnlockNotify!=xUnlockNotify && nArg!=0 ){ + xUnlockNotify(aArg, nArg); + nArg = 0; + } + + sqlite3BeginBenignMalloc(); + assert( aArg==aDyn || (aDyn==0 && aArg==aStatic) ); + assert( nArg<=(int)ArraySize(aStatic) || aArg==aDyn ); + if( (!aDyn && nArg==(int)ArraySize(aStatic)) + || (aDyn && nArg==(int)(sqlite3MallocSize(aDyn)/sizeof(void*))) + ){ + /* The aArg[] array needs to grow. */ + void **pNew = (void **)sqlite3Malloc(nArg*sizeof(void *)*2); + if( pNew ){ + memcpy(pNew, aArg, nArg*sizeof(void *)); + sqlite3_free(aDyn); + aDyn = aArg = pNew; + }else{ + /* This occurs when the array of context pointers that need to + ** be passed to the unlock-notify callback is larger than the + ** aStatic[] array allocated on the stack and the attempt to + ** allocate a larger array from the heap has failed. + ** + ** This is a difficult situation to handle. Returning an error + ** code to the caller is insufficient, as even if an error code + ** is returned the transaction on connection db will still be + ** closed and the unlock-notify callbacks on blocked connections + ** will go unissued. This might cause the application to wait + ** indefinitely for an unlock-notify callback that will never + ** arrive. + ** + ** Instead, invoke the unlock-notify callback with the context + ** array already accumulated. We can then clear the array and + ** begin accumulating any further context pointers without + ** requiring any dynamic allocation. This is sub-optimal because + ** it means that instead of one callback with a large array of + ** context pointers the application will receive two or more + ** callbacks with smaller arrays of context pointers, which will + ** reduce the applications ability to prioritize multiple + ** connections. But it is the best that can be done under the + ** circumstances. + */ + xUnlockNotify(aArg, nArg); + nArg = 0; + } + } + sqlite3EndBenignMalloc(); + + aArg[nArg++] = p->pUnlockArg; + xUnlockNotify = p->xUnlockNotify; + p->pUnlockConnection = 0; + p->xUnlockNotify = 0; + p->pUnlockArg = 0; + } + + /* Step 3. */ + if( p->pBlockingConnection==0 && p->pUnlockConnection==0 ){ + /* Remove connection p from the blocked connections list. */ + *pp = p->pNextBlocked; + p->pNextBlocked = 0; + }else{ + pp = &p->pNextBlocked; + } + } + + if( nArg!=0 ){ + xUnlockNotify(aArg, nArg); + } + sqlite3_free(aDyn); + leaveMutex(); /* Leave STATIC_MAIN mutex */ +} + +/* +** This is called when the database connection passed as an argument is +** being closed. The connection is removed from the blocked list. +*/ +SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db){ + sqlite3ConnectionUnlocked(db); + enterMutex(); + removeFromBlockedList(db); + checkListProperties(db); + leaveMutex(); +} +#endif + +/************** End of notify.c **********************************************/ +/************** Begin file fts3.c ********************************************/ +/* +** 2006 Oct 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ + +/* The full-text index is stored in a series of b+tree (-like) +** structures called segments which map terms to doclists. The +** structures are like b+trees in layout, but are constructed from the +** bottom up in optimal fashion and are not updatable. Since trees +** are built from the bottom up, things will be described from the +** bottom up. +** +** +**** Varints **** +** The basic unit of encoding is a variable-length integer called a +** varint. We encode variable-length integers in little-endian order +** using seven bits * per byte as follows: +** +** KEY: +** A = 0xxxxxxx 7 bits of data and one flag bit +** B = 1xxxxxxx 7 bits of data and one flag bit +** +** 7 bits - A +** 14 bits - BA +** 21 bits - BBA +** and so on. +** +** This is similar in concept to how sqlite encodes "varints" but +** the encoding is not the same. SQLite varints are big-endian +** are are limited to 9 bytes in length whereas FTS3 varints are +** little-endian and can be up to 10 bytes in length (in theory). +** +** Example encodings: +** +** 1: 0x01 +** 127: 0x7f +** 128: 0x81 0x00 +** +** +**** Document lists **** +** A doclist (document list) holds a docid-sorted list of hits for a +** given term. Doclists hold docids and associated token positions. +** A docid is the unique integer identifier for a single document. +** A position is the index of a word within the document. The first +** word of the document has a position of 0. +** +** FTS3 used to optionally store character offsets using a compile-time +** option. But that functionality is no longer supported. +** +** A doclist is stored like this: +** +** array { +** varint docid; (delta from previous doclist) +** array { (position list for column 0) +** varint position; (2 more than the delta from previous position) +** } +** array { +** varint POS_COLUMN; (marks start of position list for new column) +** varint column; (index of new column) +** array { +** varint position; (2 more than the delta from previous position) +** } +** } +** varint POS_END; (marks end of positions for this document. +** } +** +** Here, array { X } means zero or more occurrences of X, adjacent in +** memory. A "position" is an index of a token in the token stream +** generated by the tokenizer. Note that POS_END and POS_COLUMN occur +** in the same logical place as the position element, and act as sentinals +** ending a position list array. POS_END is 0. POS_COLUMN is 1. +** The positions numbers are not stored literally but rather as two more +** than the difference from the prior position, or the just the position plus +** 2 for the first position. Example: +** +** label: A B C D E F G H I J K +** value: 123 5 9 1 1 14 35 0 234 72 0 +** +** The 123 value is the first docid. For column zero in this document +** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1 +** at D signals the start of a new column; the 1 at E indicates that the +** new column is column number 1. There are two positions at 12 and 45 +** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The +** 234 at I is the delta to next docid (357). It has one position 70 +** (72-2) and then terminates with the 0 at K. +** +** A "position-list" is the list of positions for multiple columns for +** a single docid. A "column-list" is the set of positions for a single +** column. Hence, a position-list consists of one or more column-lists, +** a document record consists of a docid followed by a position-list and +** a doclist consists of one or more document records. +** +** A bare doclist omits the position information, becoming an +** array of varint-encoded docids. +** +**** Segment leaf nodes **** +** Segment leaf nodes store terms and doclists, ordered by term. Leaf +** nodes are written using LeafWriter, and read using LeafReader (to +** iterate through a single leaf node's data) and LeavesReader (to +** iterate through a segment's entire leaf layer). Leaf nodes have +** the format: +** +** varint iHeight; (height from leaf level, always 0) +** varint nTerm; (length of first term) +** char pTerm[nTerm]; (content of first term) +** varint nDoclist; (length of term's associated doclist) +** char pDoclist[nDoclist]; (content of doclist) +** array { +** (further terms are delta-encoded) +** varint nPrefix; (length of prefix shared with previous term) +** varint nSuffix; (length of unshared suffix) +** char pTermSuffix[nSuffix];(unshared suffix of next term) +** varint nDoclist; (length of term's associated doclist) +** char pDoclist[nDoclist]; (content of doclist) +** } +** +** Here, array { X } means zero or more occurrences of X, adjacent in +** memory. +** +** Leaf nodes are broken into blocks which are stored contiguously in +** the %_segments table in sorted order. This means that when the end +** of a node is reached, the next term is in the node with the next +** greater node id. +** +** New data is spilled to a new leaf node when the current node +** exceeds LEAF_MAX bytes (default 2048). New data which itself is +** larger than STANDALONE_MIN (default 1024) is placed in a standalone +** node (a leaf node with a single term and doclist). The goal of +** these settings is to pack together groups of small doclists while +** making it efficient to directly access large doclists. The +** assumption is that large doclists represent terms which are more +** likely to be query targets. +** +** TODO(shess) It may be useful for blocking decisions to be more +** dynamic. For instance, it may make more sense to have a 2.5k leaf +** node rather than splitting into 2k and .5k nodes. My intuition is +** that this might extend through 2x or 4x the pagesize. +** +** +**** Segment interior nodes **** +** Segment interior nodes store blockids for subtree nodes and terms +** to describe what data is stored by the each subtree. Interior +** nodes are written using InteriorWriter, and read using +** InteriorReader. InteriorWriters are created as needed when +** SegmentWriter creates new leaf nodes, or when an interior node +** itself grows too big and must be split. The format of interior +** nodes: +** +** varint iHeight; (height from leaf level, always >0) +** varint iBlockid; (block id of node's leftmost subtree) +** optional { +** varint nTerm; (length of first term) +** char pTerm[nTerm]; (content of first term) +** array { +** (further terms are delta-encoded) +** varint nPrefix; (length of shared prefix with previous term) +** varint nSuffix; (length of unshared suffix) +** char pTermSuffix[nSuffix]; (unshared suffix of next term) +** } +** } +** +** Here, optional { X } means an optional element, while array { X } +** means zero or more occurrences of X, adjacent in memory. +** +** An interior node encodes n terms separating n+1 subtrees. The +** subtree blocks are contiguous, so only the first subtree's blockid +** is encoded. The subtree at iBlockid will contain all terms less +** than the first term encoded (or all terms if no term is encoded). +** Otherwise, for terms greater than or equal to pTerm[i] but less +** than pTerm[i+1], the subtree for that term will be rooted at +** iBlockid+i. Interior nodes only store enough term data to +** distinguish adjacent children (if the rightmost term of the left +** child is "something", and the leftmost term of the right child is +** "wicked", only "w" is stored). +** +** New data is spilled to a new interior node at the same height when +** the current node exceeds INTERIOR_MAX bytes (default 2048). +** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing +** interior nodes and making the tree too skinny. The interior nodes +** at a given height are naturally tracked by interior nodes at +** height+1, and so on. +** +** +**** Segment directory **** +** The segment directory in table %_segdir stores meta-information for +** merging and deleting segments, and also the root node of the +** segment's tree. +** +** The root node is the top node of the segment's tree after encoding +** the entire segment, restricted to ROOT_MAX bytes (default 1024). +** This could be either a leaf node or an interior node. If the top +** node requires more than ROOT_MAX bytes, it is flushed to %_segments +** and a new root interior node is generated (which should always fit +** within ROOT_MAX because it only needs space for 2 varints, the +** height and the blockid of the previous root). +** +** The meta-information in the segment directory is: +** level - segment level (see below) +** idx - index within level +** - (level,idx uniquely identify a segment) +** start_block - first leaf node +** leaves_end_block - last leaf node +** end_block - last block (including interior nodes) +** root - contents of root node +** +** If the root node is a leaf node, then start_block, +** leaves_end_block, and end_block are all 0. +** +** +**** Segment merging **** +** To amortize update costs, segments are grouped into levels and +** merged in batches. Each increase in level represents exponentially +** more documents. +** +** New documents (actually, document updates) are tokenized and +** written individually (using LeafWriter) to a level 0 segment, with +** incrementing idx. When idx reaches MERGE_COUNT (default 16), all +** level 0 segments are merged into a single level 1 segment. Level 1 +** is populated like level 0, and eventually MERGE_COUNT level 1 +** segments are merged to a single level 2 segment (representing +** MERGE_COUNT^2 updates), and so on. +** +** A segment merge traverses all segments at a given level in +** parallel, performing a straightforward sorted merge. Since segment +** leaf nodes are written in to the %_segments table in order, this +** merge traverses the underlying sqlite disk structures efficiently. +** After the merge, all segment blocks from the merged level are +** deleted. +** +** MERGE_COUNT controls how often we merge segments. 16 seems to be +** somewhat of a sweet spot for insertion performance. 32 and 64 show +** very similar performance numbers to 16 on insertion, though they're +** a tiny bit slower (perhaps due to more overhead in merge-time +** sorting). 8 is about 20% slower than 16, 4 about 50% slower than +** 16, 2 about 66% slower than 16. +** +** At query time, high MERGE_COUNT increases the number of segments +** which need to be scanned and merged. For instance, with 100k docs +** inserted: +** +** MERGE_COUNT segments +** 16 25 +** 8 12 +** 4 10 +** 2 6 +** +** This appears to have only a moderate impact on queries for very +** frequent terms (which are somewhat dominated by segment merge +** costs), and infrequent and non-existent terms still seem to be fast +** even with many segments. +** +** TODO(shess) That said, it would be nice to have a better query-side +** argument for MERGE_COUNT of 16. Also, it is possible/likely that +** optimizations to things like doclist merging will swing the sweet +** spot around. +** +** +** +**** Handling of deletions and updates **** +** Since we're using a segmented structure, with no docid-oriented +** index into the term index, we clearly cannot simply update the term +** index when a document is deleted or updated. For deletions, we +** write an empty doclist (varint(docid) varint(POS_END)), for updates +** we simply write the new doclist. Segment merges overwrite older +** data for a particular docid with newer data, so deletes or updates +** will eventually overtake the earlier data and knock it out. The +** query logic likewise merges doclists so that newer data knocks out +** older data. +*/ + +/************** Include fts3Int.h in the middle of fts3.c ********************/ +/************** Begin file fts3Int.h *****************************************/ +/* +** 2009 Nov 12 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ +#ifndef _FTSINT_H +#define _FTSINT_H + +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 +#endif + +/* FTS3/FTS4 require virtual tables */ +#ifdef SQLITE_OMIT_VIRTUALTABLE +# undef SQLITE_ENABLE_FTS3 +# undef SQLITE_ENABLE_FTS4 +#endif + +/* +** FTS4 is really an extension for FTS3. It is enabled using the +** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all +** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3. +*/ +#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) +# define SQLITE_ENABLE_FTS3 +#endif + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* If not building as part of the core, include sqlite3ext.h. */ +#ifndef SQLITE_CORE +/* # include "sqlite3ext.h" */ +SQLITE_EXTENSION_INIT3 +#endif + +/* #include "sqlite3.h" */ +/************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/ +/************** Begin file fts3_tokenizer.h **********************************/ +/* +** 2006 July 10 +** +** The author disclaims copyright to this source code. +** +************************************************************************* +** Defines the interface to tokenizers used by fulltext-search. There +** are three basic components: +** +** sqlite3_tokenizer_module is a singleton defining the tokenizer +** interface functions. This is essentially the class structure for +** tokenizers. +** +** sqlite3_tokenizer is used to define a particular tokenizer, perhaps +** including customization information defined at creation time. +** +** sqlite3_tokenizer_cursor is generated by a tokenizer to generate +** tokens from a particular input. +*/ +#ifndef _FTS3_TOKENIZER_H_ +#define _FTS3_TOKENIZER_H_ + +/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. +** If tokenizers are to be allowed to call sqlite3_*() functions, then +** we will need a way to register the API consistently. +*/ +/* #include "sqlite3.h" */ + +/* +** Structures used by the tokenizer interface. When a new tokenizer +** implementation is registered, the caller provides a pointer to +** an sqlite3_tokenizer_module containing pointers to the callback +** functions that make up an implementation. +** +** When an fts3 table is created, it passes any arguments passed to +** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the +** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer +** implementation. The xCreate() function in turn returns an +** sqlite3_tokenizer structure representing the specific tokenizer to +** be used for the fts3 table (customized by the tokenizer clause arguments). +** +** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() +** method is called. It returns an sqlite3_tokenizer_cursor object +** that may be used to tokenize a specific input buffer based on +** the tokenization rules supplied by a specific sqlite3_tokenizer +** object. +*/ +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; +typedef struct sqlite3_tokenizer sqlite3_tokenizer; +typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; + +struct sqlite3_tokenizer_module { + + /* + ** Structure version. Should always be set to 0 or 1. + */ + int iVersion; + + /* + ** Create a new tokenizer. The values in the argv[] array are the + ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL + ** TABLE statement that created the fts3 table. For example, if + ** the following SQL is executed: + ** + ** CREATE .. USING fts3( ... , tokenizer arg1 arg2) + ** + ** then argc is set to 2, and the argv[] array contains pointers + ** to the strings "arg1" and "arg2". + ** + ** This method should return either SQLITE_OK (0), or an SQLite error + ** code. If SQLITE_OK is returned, then *ppTokenizer should be set + ** to point at the newly created tokenizer structure. The generic + ** sqlite3_tokenizer.pModule variable should not be initialized by + ** this callback. The caller will do so. + */ + int (*xCreate)( + int argc, /* Size of argv array */ + const char *const*argv, /* Tokenizer argument strings */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ + ); + + /* + ** Destroy an existing tokenizer. The fts3 module calls this method + ** exactly once for each successful call to xCreate(). + */ + int (*xDestroy)(sqlite3_tokenizer *pTokenizer); + + /* + ** Create a tokenizer cursor to tokenize an input buffer. The caller + ** is responsible for ensuring that the input buffer remains valid + ** until the cursor is closed (using the xClose() method). + */ + int (*xOpen)( + sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ + const char *pInput, int nBytes, /* Input buffer */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ + ); + + /* + ** Destroy an existing tokenizer cursor. The fts3 module calls this + ** method exactly once for each successful call to xOpen(). + */ + int (*xClose)(sqlite3_tokenizer_cursor *pCursor); + + /* + ** Retrieve the next token from the tokenizer cursor pCursor. This + ** method should either return SQLITE_OK and set the values of the + ** "OUT" variables identified below, or SQLITE_DONE to indicate that + ** the end of the buffer has been reached, or an SQLite error code. + ** + ** *ppToken should be set to point at a buffer containing the + ** normalized version of the token (i.e. after any case-folding and/or + ** stemming has been performed). *pnBytes should be set to the length + ** of this buffer in bytes. The input text that generated the token is + ** identified by the byte offsets returned in *piStartOffset and + ** *piEndOffset. *piStartOffset should be set to the index of the first + ** byte of the token in the input buffer. *piEndOffset should be set + ** to the index of the first byte just past the end of the token in + ** the input buffer. + ** + ** The buffer *ppToken is set to point at is managed by the tokenizer + ** implementation. It is only required to be valid until the next call + ** to xNext() or xClose(). + */ + /* TODO(shess) current implementation requires pInput to be + ** nul-terminated. This should either be fixed, or pInput/nBytes + ** should be converted to zInput. + */ + int (*xNext)( + sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ + const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ + int *piStartOffset, /* OUT: Byte offset of token in input buffer */ + int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ + int *piPosition /* OUT: Number of tokens returned before this one */ + ); + + /*********************************************************************** + ** Methods below this point are only available if iVersion>=1. + */ + + /* + ** Configure the language id of a tokenizer cursor. + */ + int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid); +}; + +struct sqlite3_tokenizer { + const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ + /* Tokenizer implementations will typically add additional fields */ +}; + +struct sqlite3_tokenizer_cursor { + sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ + /* Tokenizer implementations will typically add additional fields */ +}; + +int fts3_global_term_cnt(int iTerm, int iCol); +int fts3_term_cnt(int iTerm, int iCol); + + +#endif /* _FTS3_TOKENIZER_H_ */ + +/************** End of fts3_tokenizer.h **************************************/ +/************** Continuing where we left off in fts3Int.h ********************/ +/************** Include fts3_hash.h in the middle of fts3Int.h ***************/ +/************** Begin file fts3_hash.h ***************************************/ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the header file for the generic hash-table implementation +** used in SQLite. We've modified it slightly to serve as a standalone +** hash table implementation for the full-text indexing module. +** +*/ +#ifndef _FTS3_HASH_H_ +#define _FTS3_HASH_H_ + +/* Forward declarations of structures. */ +typedef struct Fts3Hash Fts3Hash; +typedef struct Fts3HashElem Fts3HashElem; + +/* A complete hash table is an instance of the following structure. +** The internals of this structure are intended to be opaque -- client +** code should not attempt to access or modify the fields of this structure +** directly. Change this structure only by using the routines below. +** However, many of the "procedures" and "functions" for modifying and +** accessing this structure are really macros, so we can't really make +** this structure opaque. +*/ +struct Fts3Hash { + char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ + char copyKey; /* True if copy of key made on insert */ + int count; /* Number of entries in this table */ + Fts3HashElem *first; /* The first element of the array */ + int htsize; /* Number of buckets in the hash table */ + struct _fts3ht { /* the hash table */ + int count; /* Number of entries with this hash */ + Fts3HashElem *chain; /* Pointer to first entry with this hash */ + } *ht; +}; + +/* Each element in the hash table is an instance of the following +** structure. All elements are stored on a single doubly-linked list. +** +** Again, this structure is intended to be opaque, but it can't really +** be opaque because it is used by macros. +*/ +struct Fts3HashElem { + Fts3HashElem *next, *prev; /* Next and previous elements in the table */ + void *data; /* Data associated with this element */ + void *pKey; int nKey; /* Key associated with this element */ +}; + +/* +** There are 2 different modes of operation for a hash table: +** +** FTS3_HASH_STRING pKey points to a string that is nKey bytes long +** (including the null-terminator, if any). Case +** is respected in comparisons. +** +** FTS3_HASH_BINARY pKey points to binary data nKey bytes long. +** memcmp() is used to compare keys. +** +** A copy of the key is made if the copyKey parameter to fts3HashInit is 1. +*/ +#define FTS3_HASH_STRING 1 +#define FTS3_HASH_BINARY 2 + +/* +** Access routines. To delete, insert a NULL pointer. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey); +SQLITE_PRIVATE void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData); +SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey); +SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash*); +SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int); + +/* +** Shorthand for the functions above +*/ +#define fts3HashInit sqlite3Fts3HashInit +#define fts3HashInsert sqlite3Fts3HashInsert +#define fts3HashFind sqlite3Fts3HashFind +#define fts3HashClear sqlite3Fts3HashClear +#define fts3HashFindElem sqlite3Fts3HashFindElem + +/* +** Macros for looping over all elements of a hash table. The idiom is +** like this: +** +** Fts3Hash h; +** Fts3HashElem *p; +** ... +** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){ +** SomeStructure *pData = fts3HashData(p); +** // do something with pData +** } +*/ +#define fts3HashFirst(H) ((H)->first) +#define fts3HashNext(E) ((E)->next) +#define fts3HashData(E) ((E)->data) +#define fts3HashKey(E) ((E)->pKey) +#define fts3HashKeysize(E) ((E)->nKey) + +/* +** Number of entries in a hash table +*/ +#define fts3HashCount(H) ((H)->count) + +#endif /* _FTS3_HASH_H_ */ + +/************** End of fts3_hash.h *******************************************/ +/************** Continuing where we left off in fts3Int.h ********************/ + +/* +** This constant determines the maximum depth of an FTS expression tree +** that the library will create and use. FTS uses recursion to perform +** various operations on the query tree, so the disadvantage of a large +** limit is that it may allow very large queries to use large amounts +** of stack space (perhaps causing a stack overflow). +*/ +#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH +# define SQLITE_FTS3_MAX_EXPR_DEPTH 12 +#endif + + +/* +** This constant controls how often segments are merged. Once there are +** FTS3_MERGE_COUNT segments of level N, they are merged into a single +** segment of level N+1. +*/ +#define FTS3_MERGE_COUNT 16 + +/* +** This is the maximum amount of data (in bytes) to store in the +** Fts3Table.pendingTerms hash table. Normally, the hash table is +** populated as documents are inserted/updated/deleted in a transaction +** and used to create a new segment when the transaction is committed. +** However if this limit is reached midway through a transaction, a new +** segment is created and the hash table cleared immediately. +*/ +#define FTS3_MAX_PENDING_DATA (1*1024*1024) + +/* +** Macro to return the number of elements in an array. SQLite has a +** similar macro called ArraySize(). Use a different name to avoid +** a collision when building an amalgamation with built-in FTS3. +*/ +#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) + + +#ifndef MIN +# define MIN(x,y) ((x)<(y)?(x):(y)) +#endif +#ifndef MAX +# define MAX(x,y) ((x)>(y)?(x):(y)) +#endif + +/* +** Maximum length of a varint encoded integer. The varint format is different +** from that used by SQLite, so the maximum length is 10, not 9. +*/ +#define FTS3_VARINT_MAX 10 + +#define FTS3_BUFFER_PADDING 8 + +/* +** FTS4 virtual tables may maintain multiple indexes - one index of all terms +** in the document set and zero or more prefix indexes. All indexes are stored +** as one or more b+-trees in the %_segments and %_segdir tables. +** +** It is possible to determine which index a b+-tree belongs to based on the +** value stored in the "%_segdir.level" column. Given this value L, the index +** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with +** level values between 0 and 1023 (inclusive) belong to index 0, all levels +** between 1024 and 2047 to index 1, and so on. +** +** It is considered impossible for an index to use more than 1024 levels. In +** theory though this may happen, but only after at least +** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. +*/ +#define FTS3_SEGDIR_MAXLEVEL 1024 +#define FTS3_SEGDIR_MAXLEVEL_STR "1024" + +/* +** The testcase() macro is only used by the amalgamation. If undefined, +** make it a no-op. +*/ +#ifndef testcase +# define testcase(X) +#endif + +/* +** Terminator values for position-lists and column-lists. +*/ +#define POS_COLUMN (1) /* Column-list terminator */ +#define POS_END (0) /* Position-list terminator */ + +/* +** The assert_fts3_nc() macro is similar to the assert() macro, except that it +** is used for assert() conditions that are true only if it can be +** guranteed that the database is not corrupt. +*/ +#ifdef SQLITE_DEBUG +SQLITE_API extern int sqlite3_fts3_may_be_corrupt; +# define assert_fts3_nc(x) assert(sqlite3_fts3_may_be_corrupt || (x)) +#else +# define assert_fts3_nc(x) assert(x) +#endif + +/* +** This section provides definitions to allow the +** FTS3 extension to be compiled outside of the +** amalgamation. +*/ +#ifndef SQLITE_AMALGAMATION +/* +** Macros indicating that conditional expressions are always true or +** false. +*/ +#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) +# define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 +#endif +#if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) +# define ALWAYS(X) (1) +# define NEVER(X) (0) +#elif !defined(NDEBUG) +# define ALWAYS(X) ((X)?1:(assert(0),0)) +# define NEVER(X) ((X)?(assert(0),1):0) +#else +# define ALWAYS(X) (X) +# define NEVER(X) (X) +#endif + +/* +** Internal types used by SQLite. +*/ +typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ +typedef short int i16; /* 2-byte (or larger) signed integer */ +typedef unsigned int u32; /* 4-byte unsigned integer */ +typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ +typedef sqlite3_int64 i64; /* 8-byte signed integer */ + +/* +** Macro used to suppress compiler warnings for unused parameters. +*/ +#define UNUSED_PARAMETER(x) (void)(x) + +/* +** Activate assert() only if SQLITE_TEST is enabled. +*/ +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 +#endif + +/* +** The TESTONLY macro is used to enclose variable declarations or +** other bits of code that are needed to support the arguments +** within testcase() and assert() macros. +*/ +#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) +# define TESTONLY(X) X +#else +# define TESTONLY(X) +#endif + +#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) +#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) + +#define deliberate_fall_through + +#endif /* SQLITE_AMALGAMATION */ + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3Fts3Corrupt(void); +# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() +#else +# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB +#endif + +typedef struct Fts3Table Fts3Table; +typedef struct Fts3Cursor Fts3Cursor; +typedef struct Fts3Expr Fts3Expr; +typedef struct Fts3Phrase Fts3Phrase; +typedef struct Fts3PhraseToken Fts3PhraseToken; + +typedef struct Fts3Doclist Fts3Doclist; +typedef struct Fts3SegFilter Fts3SegFilter; +typedef struct Fts3DeferredToken Fts3DeferredToken; +typedef struct Fts3SegReader Fts3SegReader; +typedef struct Fts3MultiSegReader Fts3MultiSegReader; + +typedef struct MatchinfoBuffer MatchinfoBuffer; + +/* +** A connection to a fulltext index is an instance of the following +** structure. The xCreate and xConnect methods create an instance +** of this structure and xDestroy and xDisconnect free that instance. +** All other methods receive a pointer to the structure as one of their +** arguments. +*/ +struct Fts3Table { + sqlite3_vtab base; /* Base class used by SQLite core */ + sqlite3 *db; /* The database connection */ + const char *zDb; /* logical database name */ + const char *zName; /* virtual table name */ + int nColumn; /* number of named columns in virtual table */ + char **azColumn; /* column names. malloced */ + u8 *abNotindexed; /* True for 'notindexed' columns */ + sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ + char *zContentTbl; /* content=xxx option, or NULL */ + char *zLanguageid; /* languageid=xxx option, or NULL */ + int nAutoincrmerge; /* Value configured by 'automerge' */ + u32 nLeafAdd; /* Number of leaf blocks added this trans */ + int bLock; /* Used to prevent recursive content= tbls */ + + /* Precompiled statements used by the implementation. Each of these + ** statements is run and reset within a single virtual table API call. + */ + sqlite3_stmt *aStmt[40]; + sqlite3_stmt *pSeekStmt; /* Cache for fts3CursorSeekStmt() */ + + char *zReadExprlist; + char *zWriteExprlist; + + int nNodeSize; /* Soft limit for node size */ + u8 bFts4; /* True for FTS4, false for FTS3 */ + u8 bHasStat; /* True if %_stat table exists (2==unknown) */ + u8 bHasDocsize; /* True if %_docsize table exists */ + u8 bDescIdx; /* True if doclists are in reverse order */ + u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ + int nPgsz; /* Page size for host database */ + char *zSegmentsTbl; /* Name of %_segments table */ + sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ + + /* + ** The following array of hash tables is used to buffer pending index + ** updates during transactions. All pending updates buffered at any one + ** time must share a common language-id (see the FTS4 langid= feature). + ** The current language id is stored in variable iPrevLangid. + ** + ** A single FTS4 table may have multiple full-text indexes. For each index + ** there is an entry in the aIndex[] array. Index 0 is an index of all the + ** terms that appear in the document set. Each subsequent index in aIndex[] + ** is an index of prefixes of a specific length. + ** + ** Variable nPendingData contains an estimate the memory consumed by the + ** pending data structures, including hash table overhead, but not including + ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash + ** tables are flushed to disk. Variable iPrevDocid is the docid of the most + ** recently inserted record. + */ + int nIndex; /* Size of aIndex[] */ + struct Fts3Index { + int nPrefix; /* Prefix length (0 for main terms index) */ + Fts3Hash hPending; /* Pending terms table for this index */ + } *aIndex; + int nMaxPendingData; /* Max pending data before flush to disk */ + int nPendingData; /* Current bytes of pending data */ + sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ + int iPrevLangid; /* Langid of recently inserted document */ + int bPrevDelete; /* True if last operation was a delete */ + +#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) + /* State variables used for validating that the transaction control + ** methods of the virtual table are called at appropriate times. These + ** values do not contribute to FTS functionality; they are used for + ** verifying the operation of the SQLite core. + */ + int inTransaction; /* True after xBegin but before xCommit/xRollback */ + int mxSavepoint; /* Largest valid xSavepoint integer */ +#endif + +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + /* True to disable the incremental doclist optimization. This is controled + ** by special insert command 'test-no-incr-doclist'. */ + int bNoIncrDoclist; + + /* Number of segments in a level */ + int nMergeCount; +#endif +}; + +/* Macro to find the number of segments to merge */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) +# define MergeCount(P) ((P)->nMergeCount) +#else +# define MergeCount(P) FTS3_MERGE_COUNT +#endif + +/* +** When the core wants to read from the virtual table, it creates a +** virtual table cursor (an instance of the following structure) using +** the xOpen method. Cursors are destroyed using the xClose method. +*/ +struct Fts3Cursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + i16 eSearch; /* Search strategy (see below) */ + u8 isEof; /* True if at End Of Results */ + u8 isRequireSeek; /* True if must seek pStmt to %_content row */ + u8 bSeekStmt; /* True if pStmt is a seek */ + sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ + Fts3Expr *pExpr; /* Parsed MATCH query string */ + int iLangid; /* Language being queried for */ + int nPhrase; /* Number of matchable phrases in query */ + Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ + sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ + char *pNextId; /* Pointer into the body of aDoclist */ + char *aDoclist; /* List of docids for full-text queries */ + int nDoclist; /* Size of buffer at aDoclist */ + u8 bDesc; /* True to sort in descending order */ + int eEvalmode; /* An FTS3_EVAL_XX constant */ + int nRowAvg; /* Average size of database rows, in pages */ + sqlite3_int64 nDoc; /* Documents in table */ + i64 iMinDocid; /* Minimum docid to return */ + i64 iMaxDocid; /* Maximum docid to return */ + int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ + MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */ +}; + +#define FTS3_EVAL_FILTER 0 +#define FTS3_EVAL_NEXT 1 +#define FTS3_EVAL_MATCHINFO 2 + +/* +** The Fts3Cursor.eSearch member is always set to one of the following. +** Actualy, Fts3Cursor.eSearch can be greater than or equal to +** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index +** of the column to be searched. For example, in +** +** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); +** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; +** +** Because the LHS of the MATCH operator is 2nd column "b", +** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, +** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" +** indicating that all columns should be searched, +** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. +*/ +#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ +#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ +#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ + +/* +** The lower 16-bits of the sqlite3_index_info.idxNum value set by +** the xBestIndex() method contains the Fts3Cursor.eSearch value described +** above. The upper 16-bits contain a combination of the following +** bits, used to describe extra constraints on full-text searches. +*/ +#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ +#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ +#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ + +struct Fts3Doclist { + char *aAll; /* Array containing doclist (or NULL) */ + int nAll; /* Size of a[] in bytes */ + char *pNextDocid; /* Pointer to next docid */ + + sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ + int bFreeList; /* True if pList should be sqlite3_free()d */ + char *pList; /* Pointer to position list following iDocid */ + int nList; /* Length of position list */ +}; + +/* +** A "phrase" is a sequence of one or more tokens that must match in +** sequence. A single token is the base case and the most common case. +** For a sequence of tokens contained in double-quotes (i.e. "one two three") +** nToken will be the number of tokens in the string. +*/ +struct Fts3PhraseToken { + char *z; /* Text of the token */ + int n; /* Number of bytes in buffer z */ + int isPrefix; /* True if token ends with a "*" character */ + int bFirst; /* True if token must appear at position 0 */ + + /* Variables above this point are populated when the expression is + ** parsed (by code in fts3_expr.c). Below this point the variables are + ** used when evaluating the expression. */ + Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ + Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ +}; + +struct Fts3Phrase { + /* Cache of doclist for this phrase. */ + Fts3Doclist doclist; + int bIncr; /* True if doclist is loaded incrementally */ + int iDoclistToken; + + /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an + ** OR condition. */ + char *pOrPoslist; + i64 iOrDocid; + + /* Variables below this point are populated by fts3_expr.c when parsing + ** a MATCH expression. Everything above is part of the evaluation phase. + */ + int nToken; /* Number of tokens in the phrase */ + int iColumn; /* Index of column this phrase must match */ + Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ +}; + +/* +** A tree of these objects forms the RHS of a MATCH operator. +** +** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist +** points to a malloced buffer, size nDoclist bytes, containing the results +** of this phrase query in FTS3 doclist format. As usual, the initial +** "Length" field found in doclists stored on disk is omitted from this +** buffer. +** +** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global +** matchinfo data. If it is not NULL, it points to an array of size nCol*3, +** where nCol is the number of columns in the queried FTS table. The array +** is populated as follows: +** +** aMI[iCol*3 + 0] = Undefined +** aMI[iCol*3 + 1] = Number of occurrences +** aMI[iCol*3 + 2] = Number of rows containing at least one instance +** +** The aMI array is allocated using sqlite3_malloc(). It should be freed +** when the expression node is. +*/ +struct Fts3Expr { + int eType; /* One of the FTSQUERY_XXX values defined below */ + int nNear; /* Valid if eType==FTSQUERY_NEAR */ + Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ + Fts3Expr *pLeft; /* Left operand */ + Fts3Expr *pRight; /* Right operand */ + Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ + + /* The following are used by the fts3_eval.c module. */ + sqlite3_int64 iDocid; /* Current docid */ + u8 bEof; /* True this expression is at EOF already */ + u8 bStart; /* True if iDocid is valid */ + u8 bDeferred; /* True if this expression is entirely deferred */ + + /* The following are used by the fts3_snippet.c module. */ + int iPhrase; /* Index of this phrase in matchinfo() results */ + u32 *aMI; /* See above */ +}; + +/* +** Candidate values for Fts3Query.eType. Note that the order of the first +** four values is in order of precedence when parsing expressions. For +** example, the following: +** +** "a OR b AND c NOT d NEAR e" +** +** is equivalent to: +** +** "a OR (b AND (c NOT (d NEAR e)))" +*/ +#define FTSQUERY_NEAR 1 +#define FTSQUERY_NOT 2 +#define FTSQUERY_AND 3 +#define FTSQUERY_OR 4 +#define FTSQUERY_PHRASE 5 + + +/* fts3_write.c */ +SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); +SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *); +SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *); +SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *); +SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, + sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); +SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( + Fts3Table*,int,const char*,int,int,Fts3SegReader**); +SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *); +SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); +SQLITE_PRIVATE int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); + +SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); +SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); + +#ifndef SQLITE_DISABLE_FTS4_DEFERRED +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); +SQLITE_PRIVATE int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); +SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); +SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); +#else +# define sqlite3Fts3FreeDeferredTokens(x) +# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK +# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK +# define sqlite3Fts3FreeDeferredDoclists(x) +# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK +#endif + +SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *); +SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *, int *); + +/* Special values interpreted by sqlite3SegReaderCursor() */ +#define FTS3_SEGCURSOR_PENDING -1 +#define FTS3_SEGCURSOR_ALL -2 + +SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); +SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); +SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); + +SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(Fts3Table *, + int, int, int, const char *, int, int, int, Fts3MultiSegReader *); + +/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ +#define FTS3_SEGMENT_REQUIRE_POS 0x00000001 +#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 +#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 +#define FTS3_SEGMENT_PREFIX 0x00000008 +#define FTS3_SEGMENT_SCAN 0x00000010 +#define FTS3_SEGMENT_FIRST 0x00000020 + +/* Type passed as 4th argument to SegmentReaderIterate() */ +struct Fts3SegFilter { + const char *zTerm; + int nTerm; + int iCol; + int flags; +}; + +struct Fts3MultiSegReader { + /* Used internally by sqlite3Fts3SegReaderXXX() calls */ + Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ + int nSegment; /* Size of apSegment array */ + int nAdvance; /* How many seg-readers to advance */ + Fts3SegFilter *pFilter; /* Pointer to filter object */ + char *aBuffer; /* Buffer to merge doclists in */ + int nBuffer; /* Allocated size of aBuffer[] in bytes */ + + int iColFilter; /* If >=0, filter for this column */ + int bRestart; + + /* Used by fts3.c only. */ + int nCost; /* Cost of running iterator */ + int bLookup; /* True if a lookup of a single entry. */ + + /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ + char *zTerm; /* Pointer to term buffer */ + int nTerm; /* Size of zTerm in bytes */ + char *aDoclist; /* Pointer to doclist buffer */ + int nDoclist; /* Size of aDoclist[] in bytes */ +}; + +SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table*,int,int); + +#define fts3GetVarint32(p, piVal) ( \ + (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ +) + +/* fts3.c */ +SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char**,const char*,...); +SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *, sqlite3_int64); +SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); +SQLITE_PRIVATE int sqlite3Fts3GetVarintU(const char *, sqlite_uint64 *); +SQLITE_PRIVATE int sqlite3Fts3GetVarintBounded(const char*,const char*,sqlite3_int64*); +SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *); +SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64); +SQLITE_PRIVATE void sqlite3Fts3Dequote(char *); +SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); +SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); +SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); +SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*); +SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc); +SQLITE_PRIVATE int sqlite3Fts3ReadInt(const char *z, int *pnOut); + +/* fts3_tokenizer.c */ +SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *); +SQLITE_PRIVATE int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); +SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, + sqlite3_tokenizer **, char ** +); +SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char); + +/* fts3_snippet.c */ +SQLITE_PRIVATE void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); +SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, + const char *, const char *, int, int +); +SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); +SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p); + +/* fts3_expr.c */ +SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, + char **, int, int, int, const char *, int, Fts3Expr **, char ** +); +SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *); +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash*); +SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db); +#endif +SQLITE_PRIVATE void *sqlite3Fts3MallocZero(i64 nByte); + +SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int, + sqlite3_tokenizer_cursor ** +); + +/* fts3_aux.c */ +SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db); + +SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); + +SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( + Fts3Table*, Fts3MultiSegReader*, int, const char*, int); +SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( + Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); +SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); +SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); +SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); + +/* fts3_tokenize_vtab.c */ +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *, void(*xDestroy)(void*)); + +/* fts3_unicode2.c (functions generated by parsing unicode text files) */ +#ifndef SQLITE_DISABLE_FTS3_UNICODE +SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int); +SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int); +SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int); +#endif + +#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ +#endif /* _FTSINT_H */ + +/************** End of fts3Int.h *********************************************/ +/************** Continuing where we left off in fts3.c ***********************/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) +# define SQLITE_CORE 1 +#endif + +/* #include */ +/* #include */ +/* #include */ +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3.h" */ +#ifndef SQLITE_CORE +/* # include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 +#endif + +typedef struct Fts3HashWrapper Fts3HashWrapper; +struct Fts3HashWrapper { + Fts3Hash hash; /* Hash table */ + int nRef; /* Number of pointers to this object */ +}; + +static int fts3EvalNext(Fts3Cursor *pCsr); +static int fts3EvalStart(Fts3Cursor *pCsr); +static int fts3TermSegReaderCursor( + Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); + +/* +** This variable is set to false when running tests for which the on disk +** structures should not be corrupt. Otherwise, true. If it is false, extra +** assert() conditions in the fts3 code are activated - conditions that are +** only true if it is guaranteed that the fts3 database is not corrupt. +*/ +#ifdef SQLITE_DEBUG +SQLITE_API int sqlite3_fts3_may_be_corrupt = 1; +#endif + +/* +** Write a 64-bit variable-length integer to memory starting at p[0]. +** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. +** The number of bytes written is returned. +*/ +SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ + unsigned char *q = (unsigned char *) p; + sqlite_uint64 vu = v; + do{ + *q++ = (unsigned char) ((vu & 0x7f) | 0x80); + vu >>= 7; + }while( vu!=0 ); + q[-1] &= 0x7f; /* turn off high bit in final byte */ + assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); + return (int) (q - (unsigned char *)p); +} + +#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ + v = (v & mask1) | ( (*(const unsigned char*)(ptr++)) << shift ); \ + if( (v & mask2)==0 ){ var = v; return ret; } +#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ + v = (*ptr++); \ + if( (v & mask2)==0 ){ var = v; return ret; } + +SQLITE_PRIVATE int sqlite3Fts3GetVarintU(const char *pBuf, sqlite_uint64 *v){ + const unsigned char *p = (const unsigned char*)pBuf; + const unsigned char *pStart = p; + u32 a; + u64 b; + int shift; + + GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1); + GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2); + GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3); + GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); + b = (a & 0x0FFFFFFF ); + + for(shift=28; shift<=63; shift+=7){ + u64 c = *p++; + b += (c&0x7F) << shift; + if( (c & 0x80)==0 ) break; + } + *v = b; + return (int)(p - pStart); +} + +/* +** Read a 64-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read, or 0 on error. +** The value is stored in *v. +*/ +SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *pBuf, sqlite_int64 *v){ + return sqlite3Fts3GetVarintU(pBuf, (sqlite3_uint64*)v); +} + +/* +** Read a 64-bit variable-length integer from memory starting at p[0] and +** not extending past pEnd[-1]. +** Return the number of bytes read, or 0 on error. +** The value is stored in *v. +*/ +SQLITE_PRIVATE int sqlite3Fts3GetVarintBounded( + const char *pBuf, + const char *pEnd, + sqlite_int64 *v +){ + const unsigned char *p = (const unsigned char*)pBuf; + const unsigned char *pStart = p; + const unsigned char *pX = (const unsigned char*)pEnd; + u64 b = 0; + int shift; + for(shift=0; shift<=63; shift+=7){ + u64 c = p=0 ); + return 5; +} + +/* +** Return the number of bytes required to encode v as a varint +*/ +SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64 v){ + int i = 0; + do{ + i++; + v >>= 7; + }while( v!=0 ); + return i; +} + +/* +** Convert an SQL-style quoted string into a normal string by removing +** the quote characters. The conversion is done in-place. If the +** input does not begin with a quote character, then this routine +** is a no-op. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +** +*/ +SQLITE_PRIVATE void sqlite3Fts3Dequote(char *z){ + char quote; /* Quote character (if any ) */ + + quote = z[0]; + if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ + int iIn = 1; /* Index of next byte to read from input */ + int iOut = 0; /* Index of next byte to write to output */ + + /* If the first byte was a '[', then the close-quote character is a ']' */ + if( quote=='[' ) quote = ']'; + + while( z[iIn] ){ + if( z[iIn]==quote ){ + if( z[iIn+1]!=quote ) break; + z[iOut++] = quote; + iIn += 2; + }else{ + z[iOut++] = z[iIn++]; + } + } + z[iOut] = '\0'; + } +} + +/* +** Read a single varint from the doclist at *pp and advance *pp to point +** to the first byte past the end of the varint. Add the value of the varint +** to *pVal. +*/ +static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ + sqlite3_int64 iVal; + *pp += sqlite3Fts3GetVarint(*pp, &iVal); + *pVal += iVal; +} + +/* +** When this function is called, *pp points to the first byte following a +** varint that is part of a doclist (or position-list, or any other list +** of varints). This function moves *pp to point to the start of that varint, +** and sets *pVal by the varint value. +** +** Argument pStart points to the first byte of the doclist that the +** varint is part of. +*/ +static void fts3GetReverseVarint( + char **pp, + char *pStart, + sqlite3_int64 *pVal +){ + sqlite3_int64 iVal; + char *p; + + /* Pointer p now points at the first byte past the varint we are + ** interested in. So, unless the doclist is corrupt, the 0x80 bit is + ** clear on character p[-1]. */ + for(p = (*pp)-2; p>=pStart && *p&0x80; p--); + p++; + *pp = p; + + sqlite3Fts3GetVarint(p, &iVal); + *pVal = iVal; +} + +/* +** The xDisconnect() virtual table method. +*/ +static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table *)pVtab; + int i; + + assert( p->nPendingData==0 ); + assert( p->pSegments==0 ); + + /* Free any prepared statements held */ + sqlite3_finalize(p->pSeekStmt); + for(i=0; iaStmt); i++){ + sqlite3_finalize(p->aStmt[i]); + } + sqlite3_free(p->zSegmentsTbl); + sqlite3_free(p->zReadExprlist); + sqlite3_free(p->zWriteExprlist); + sqlite3_free(p->zContentTbl); + sqlite3_free(p->zLanguageid); + + /* Invoke the tokenizer destructor to free the tokenizer. */ + p->pTokenizer->pModule->xDestroy(p->pTokenizer); + + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Write an error message into *pzErr +*/ +SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){ + va_list ap; + sqlite3_free(*pzErr); + va_start(ap, zFormat); + *pzErr = sqlite3_vmprintf(zFormat, ap); + va_end(ap); +} + +/* +** Construct one or more SQL statements from the format string given +** and then evaluate those statements. The success code is written +** into *pRc. +** +** If *pRc is initially non-zero then this routine is a no-op. +*/ +static void fts3DbExec( + int *pRc, /* Success code */ + sqlite3 *db, /* Database in which to run SQL */ + const char *zFormat, /* Format string for SQL */ + ... /* Arguments to the format string */ +){ + va_list ap; + char *zSql; + if( *pRc ) return; + va_start(ap, zFormat); + zSql = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + if( zSql==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + *pRc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } +} + +/* +** The xDestroy() virtual table method. +*/ +static int fts3DestroyMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table *)pVtab; + int rc = SQLITE_OK; /* Return code */ + const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ + sqlite3 *db = p->db; /* Database handle */ + + /* Drop the shadow tables */ + fts3DbExec(&rc, db, + "DROP TABLE IF EXISTS %Q.'%q_segments';" + "DROP TABLE IF EXISTS %Q.'%q_segdir';" + "DROP TABLE IF EXISTS %Q.'%q_docsize';" + "DROP TABLE IF EXISTS %Q.'%q_stat';" + "%s DROP TABLE IF EXISTS %Q.'%q_content';", + zDb, p->zName, + zDb, p->zName, + zDb, p->zName, + zDb, p->zName, + (p->zContentTbl ? "--" : ""), zDb,p->zName + ); + + /* If everything has worked, invoke fts3DisconnectMethod() to free the + ** memory associated with the Fts3Table structure and return SQLITE_OK. + ** Otherwise, return an SQLite error code. + */ + return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc); +} + + +/* +** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table +** passed as the first argument. This is done as part of the xConnect() +** and xCreate() methods. +** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. +*/ +static void fts3DeclareVtab(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int i; /* Iterator variable */ + int rc; /* Return code */ + char *zSql; /* SQL statement passed to declare_vtab() */ + char *zCols; /* List of user defined columns */ + const char *zLanguageid; + + zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); + sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); + + /* Create a list of user columns for the virtual table */ + zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); + for(i=1; zCols && inColumn; i++){ + zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); + } + + /* Create the whole "CREATE TABLE" statement to pass to SQLite */ + zSql = sqlite3_mprintf( + "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", + zCols, p->zName, zLanguageid + ); + if( !zCols || !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_declare_vtab(p->db, zSql); + } + + sqlite3_free(zSql); + sqlite3_free(zCols); + *pRc = rc; + } +} + +/* +** Create the %_stat table if it does not already exist. +*/ +SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ + fts3DbExec(pRc, p->db, + "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" + "(id INTEGER PRIMARY KEY, value BLOB);", + p->zDb, p->zName + ); + if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; +} + +/* +** Create the backing store tables (%_content, %_segments and %_segdir) +** required by the FTS3 table passed as the only argument. This is done +** as part of the vtab xCreate() method. +** +** If the p->bHasDocsize boolean is true (indicating that this is an +** FTS4 table, not an FTS3 table) then also create the %_docsize and +** %_stat tables required by FTS4. +*/ +static int fts3CreateTables(Fts3Table *p){ + int rc = SQLITE_OK; /* Return code */ + int i; /* Iterator variable */ + sqlite3 *db = p->db; /* The database connection */ + + if( p->zContentTbl==0 ){ + const char *zLanguageid = p->zLanguageid; + char *zContentCols; /* Columns of %_content table */ + + /* Create a list of user columns for the content table */ + zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); + for(i=0; zContentCols && inColumn; i++){ + char *z = p->azColumn[i]; + zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); + } + if( zLanguageid && zContentCols ){ + zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); + } + if( zContentCols==0 ) rc = SQLITE_NOMEM; + + /* Create the content table */ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_content'(%s)", + p->zDb, p->zName, zContentCols + ); + sqlite3_free(zContentCols); + } + + /* Create other tables */ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", + p->zDb, p->zName + ); + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_segdir'(" + "level INTEGER," + "idx INTEGER," + "start_block INTEGER," + "leaves_end_block INTEGER," + "end_block INTEGER," + "root BLOB," + "PRIMARY KEY(level, idx)" + ");", + p->zDb, p->zName + ); + if( p->bHasDocsize ){ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);", + p->zDb, p->zName + ); + } + assert( p->bHasStat==p->bFts4 ); + if( p->bHasStat ){ + sqlite3Fts3CreateStatTable(&rc, p); + } + return rc; +} + +/* +** Store the current database page-size in bytes in p->nPgsz. +** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. +*/ +static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int rc; /* Return code */ + char *zSql; /* SQL text "PRAGMA %Q.page_size" */ + sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ + + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_step(pStmt); + p->nPgsz = sqlite3_column_int(pStmt, 0); + rc = sqlite3_finalize(pStmt); + }else if( rc==SQLITE_AUTH ){ + p->nPgsz = 1024; + rc = SQLITE_OK; + } + } + assert( p->nPgsz>0 || rc!=SQLITE_OK ); + sqlite3_free(zSql); + *pRc = rc; + } +} + +/* +** "Special" FTS4 arguments are column specifications of the following form: +** +** = +** +** There may not be whitespace surrounding the "=" character. The +** term may be quoted, but the may not. +*/ +static int fts3IsSpecialColumn( + const char *z, + int *pnKey, + char **pzValue +){ + char *zValue; + const char *zCsr = z; + + while( *zCsr!='=' ){ + if( *zCsr=='\0' ) return 0; + zCsr++; + } + + *pnKey = (int)(zCsr-z); + zValue = sqlite3_mprintf("%s", &zCsr[1]); + if( zValue ){ + sqlite3Fts3Dequote(zValue); + } + *pzValue = zValue; + return 1; +} + +/* +** Append the output of a printf() style formatting to an existing string. +*/ +static void fts3Appendf( + int *pRc, /* IN/OUT: Error code */ + char **pz, /* IN/OUT: Pointer to string buffer */ + const char *zFormat, /* Printf format string to append */ + ... /* Arguments for printf format string */ +){ + if( *pRc==SQLITE_OK ){ + va_list ap; + char *z; + va_start(ap, zFormat); + z = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + if( z && *pz ){ + char *z2 = sqlite3_mprintf("%s%s", *pz, z); + sqlite3_free(z); + z = z2; + } + if( z==0 ) *pRc = SQLITE_NOMEM; + sqlite3_free(*pz); + *pz = z; + } +} + +/* +** Return a copy of input string zInput enclosed in double-quotes (") and +** with all double quote characters escaped. For example: +** +** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\"" +** +** The pointer returned points to memory obtained from sqlite3_malloc(). It +** is the callers responsibility to call sqlite3_free() to release this +** memory. +*/ +static char *fts3QuoteId(char const *zInput){ + sqlite3_int64 nRet; + char *zRet; + nRet = 2 + (int)strlen(zInput)*2 + 1; + zRet = sqlite3_malloc64(nRet); + if( zRet ){ + int i; + char *z = zRet; + *(z++) = '"'; + for(i=0; zInput[i]; i++){ + if( zInput[i]=='"' ) *(z++) = '"'; + *(z++) = zInput[i]; + } + *(z++) = '"'; + *(z++) = '\0'; + } + return zRet; +} + +/* +** Return a list of comma separated SQL expressions and a FROM clause that +** could be used in a SELECT statement such as the following: +** +** SELECT FROM %_content AS x ... +** +** to return the docid, followed by each column of text data in order +** from left to write. If parameter zFunc is not NULL, then instead of +** being returned directly each column of text data is passed to an SQL +** function named zFunc first. For example, if zFunc is "unzip" and the +** table has the three user-defined columns "a", "b", and "c", the following +** string is returned: +** +** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x" +** +** The pointer returned points to a buffer allocated by sqlite3_malloc(). It +** is the responsibility of the caller to eventually free it. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and +** a NULL pointer is returned). Otherwise, if an OOM error is encountered +** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If +** no error occurs, *pRc is left unmodified. +*/ +static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ + char *zRet = 0; + char *zFree = 0; + char *zFunction; + int i; + + if( p->zContentTbl==0 ){ + if( !zFunc ){ + zFunction = ""; + }else{ + zFree = zFunction = fts3QuoteId(zFunc); + } + fts3Appendf(pRc, &zRet, "docid"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); + } + sqlite3_free(zFree); + }else{ + fts3Appendf(pRc, &zRet, "rowid"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); + } + } + fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", + p->zDb, + (p->zContentTbl ? p->zContentTbl : p->zName), + (p->zContentTbl ? "" : "_content") + ); + return zRet; +} + +/* +** Return a list of N comma separated question marks, where N is the number +** of columns in the %_content table (one for the docid plus one for each +** user-defined text column). +** +** If argument zFunc is not NULL, then all but the first question mark +** is preceded by zFunc and an open bracket, and followed by a closed +** bracket. For example, if zFunc is "zip" and the FTS3 table has three +** user-defined text columns, the following string is returned: +** +** "?, zip(?), zip(?), zip(?)" +** +** The pointer returned points to a buffer allocated by sqlite3_malloc(). It +** is the responsibility of the caller to eventually free it. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and +** a NULL pointer is returned). Otherwise, if an OOM error is encountered +** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If +** no error occurs, *pRc is left unmodified. +*/ +static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ + char *zRet = 0; + char *zFree = 0; + char *zFunction; + int i; + + if( !zFunc ){ + zFunction = ""; + }else{ + zFree = zFunction = fts3QuoteId(zFunc); + } + fts3Appendf(pRc, &zRet, "?"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", ?"); + } + sqlite3_free(zFree); + return zRet; +} + +/* +** Buffer z contains a positive integer value encoded as utf-8 text. +** Decode this value and store it in *pnOut, returning the number of bytes +** consumed. If an overflow error occurs return a negative value. +*/ +SQLITE_PRIVATE int sqlite3Fts3ReadInt(const char *z, int *pnOut){ + u64 iVal = 0; + int i; + for(i=0; z[i]>='0' && z[i]<='9'; i++){ + iVal = iVal*10 + (z[i] - '0'); + if( iVal>0x7FFFFFFF ) return -1; + } + *pnOut = (int)iVal; + return i; +} + +/* +** This function interprets the string at (*pp) as a non-negative integer +** value. It reads the integer and sets *pnOut to the value read, then +** sets *pp to point to the byte immediately following the last byte of +** the integer value. +** +** Only decimal digits ('0'..'9') may be part of an integer value. +** +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and +** the output value undefined. Otherwise SQLITE_OK is returned. +** +** This function is used when parsing the "prefix=" FTS4 parameter. +*/ +static int fts3GobbleInt(const char **pp, int *pnOut){ + const int MAX_NPREFIX = 10000000; + int nInt = 0; /* Output value */ + int nByte; + nByte = sqlite3Fts3ReadInt(*pp, &nInt); + if( nInt>MAX_NPREFIX ){ + nInt = 0; + } + if( nByte==0 ){ + return SQLITE_ERROR; + } + *pnOut = nInt; + *pp += nByte; + return SQLITE_OK; +} + +/* +** This function is called to allocate an array of Fts3Index structures +** representing the indexes maintained by the current FTS table. FTS tables +** always maintain the main "terms" index, but may also maintain one or +** more "prefix" indexes, depending on the value of the "prefix=" parameter +** (if any) specified as part of the CREATE VIRTUAL TABLE statement. +** +** Argument zParam is passed the value of the "prefix=" option if one was +** specified, or NULL otherwise. +** +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to +** the allocated array. *pnIndex is set to the number of elements in the +** array. If an error does occur, an SQLite error code is returned. +** +** Regardless of whether or not an error is returned, it is the responsibility +** of the caller to call sqlite3_free() on the output array to free it. +*/ +static int fts3PrefixParameter( + const char *zParam, /* ABC in prefix=ABC parameter to parse */ + int *pnIndex, /* OUT: size of *apIndex[] array */ + struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ +){ + struct Fts3Index *aIndex; /* Allocated array */ + int nIndex = 1; /* Number of entries in array */ + + if( zParam && zParam[0] ){ + const char *p; + nIndex++; + for(p=zParam; *p; p++){ + if( *p==',' ) nIndex++; + } + } + + aIndex = sqlite3_malloc64(sizeof(struct Fts3Index) * nIndex); + *apIndex = aIndex; + if( !aIndex ){ + return SQLITE_NOMEM; + } + + memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); + if( zParam ){ + const char *p = zParam; + int i; + for(i=1; i=0 ); + if( nPrefix==0 ){ + nIndex--; + i--; + }else{ + aIndex[i].nPrefix = nPrefix; + } + p++; + } + } + + *pnIndex = nIndex; + return SQLITE_OK; +} + +/* +** This function is called when initializing an FTS4 table that uses the +** content=xxx option. It determines the number of and names of the columns +** of the new FTS4 table. +** +** The third argument passed to this function is the value passed to the +** config=xxx option (i.e. "xxx"). This function queries the database for +** a table of that name. If found, the output variables are populated +** as follows: +** +** *pnCol: Set to the number of columns table xxx has, +** +** *pnStr: Set to the total amount of space required to store a copy +** of each columns name, including the nul-terminator. +** +** *pazCol: Set to point to an array of *pnCol strings. Each string is +** the name of the corresponding column in table xxx. The array +** and its contents are allocated using a single allocation. It +** is the responsibility of the caller to free this allocation +** by eventually passing the *pazCol value to sqlite3_free(). +** +** If the table cannot be found, an error code is returned and the output +** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is +** returned (and the output variables are undefined). +*/ +static int fts3ContentColumns( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */ + const char *zTbl, /* Name of content table */ + const char ***pazCol, /* OUT: Malloc'd array of column names */ + int *pnCol, /* OUT: Size of array *pazCol */ + int *pnStr, /* OUT: Bytes of string content */ + char **pzErr /* OUT: error message */ +){ + int rc = SQLITE_OK; /* Return code */ + char *zSql; /* "SELECT *" statement on zTbl */ + sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */ + + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db)); + } + } + sqlite3_free(zSql); + + if( rc==SQLITE_OK ){ + const char **azCol; /* Output array */ + sqlite3_int64 nStr = 0; /* Size of all column names (incl. 0x00) */ + int nCol; /* Number of table columns */ + int i; /* Used to iterate through columns */ + + /* Loop through the returned columns. Set nStr to the number of bytes of + ** space required to store a copy of each column name, including the + ** nul-terminator byte. */ + nCol = sqlite3_column_count(pStmt); + for(i=0; i module name ("fts3" or "fts4") +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> "column name" and other module argument fields. +*/ +static int fts3InitVtab( + int isCreate, /* True for xCreate, false for xConnect */ + sqlite3 *db, /* The SQLite database connection */ + void *pAux, /* Hash table containing tokenizers */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ + char **pzErr /* Write any error message here */ +){ + Fts3Hash *pHash = &((Fts3HashWrapper*)pAux)->hash; + Fts3Table *p = 0; /* Pointer to allocated vtab */ + int rc = SQLITE_OK; /* Return code */ + int i; /* Iterator variable */ + sqlite3_int64 nByte; /* Size of allocation used for *p */ + int iCol; /* Column index */ + int nString = 0; /* Bytes required to hold all column names */ + int nCol = 0; /* Number of columns in the FTS table */ + char *zCsr; /* Space for holding column names */ + int nDb; /* Bytes required to hold database name */ + int nName; /* Bytes required to hold table name */ + int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ + const char **aCol; /* Array of column names */ + sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ + + int nIndex = 0; /* Size of aIndex[] array */ + struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ + + /* The results of parsing supported FTS4 key=value options: */ + int bNoDocsize = 0; /* True to omit %_docsize table */ + int bDescIdx = 0; /* True to store descending indexes */ + char *zPrefix = 0; /* Prefix parameter value (or NULL) */ + char *zCompress = 0; /* compress=? parameter (or NULL) */ + char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ + char *zContent = 0; /* content=? parameter (or NULL) */ + char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ + char **azNotindexed = 0; /* The set of notindexed= columns */ + int nNotindexed = 0; /* Size of azNotindexed[] array */ + + assert( strlen(argv[0])==4 ); + assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) + || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) + ); + + nDb = (int)strlen(argv[1]) + 1; + nName = (int)strlen(argv[2]) + 1; + + nByte = sizeof(const char *) * (argc-2); + aCol = (const char **)sqlite3_malloc64(nByte); + if( aCol ){ + memset((void*)aCol, 0, nByte); + azNotindexed = (char **)sqlite3_malloc64(nByte); + } + if( azNotindexed ){ + memset(azNotindexed, 0, nByte); + } + if( !aCol || !azNotindexed ){ + rc = SQLITE_NOMEM; + goto fts3_init_out; + } + + /* Loop through all of the arguments passed by the user to the FTS3/4 + ** module (i.e. all the column names and special arguments). This loop + ** does the following: + ** + ** + Figures out the number of columns the FTSX table will have, and + ** the number of bytes of space that must be allocated to store copies + ** of the column names. + ** + ** + If there is a tokenizer specification included in the arguments, + ** initializes the tokenizer pTokenizer. + */ + for(i=3; rc==SQLITE_OK && i8 + && 0==sqlite3_strnicmp(z, "tokenize", 8) + && 0==sqlite3Fts3IsIdChar(z[8]) + ){ + rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr); + } + + /* Check if it is an FTS4 special argument. */ + else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ + struct Fts4Option { + const char *zOpt; + int nOpt; + } aFts4Opt[] = { + { "matchinfo", 9 }, /* 0 -> MATCHINFO */ + { "prefix", 6 }, /* 1 -> PREFIX */ + { "compress", 8 }, /* 2 -> COMPRESS */ + { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ + { "order", 5 }, /* 4 -> ORDER */ + { "content", 7 }, /* 5 -> CONTENT */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "notindexed", 10 } /* 7 -> NOTINDEXED */ + }; + + int iOpt; + if( !zVal ){ + rc = SQLITE_NOMEM; + }else{ + for(iOpt=0; iOptnOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ + break; + } + } + switch( iOpt ){ + case 0: /* MATCHINFO */ + if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ + sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal); + rc = SQLITE_ERROR; + } + bNoDocsize = 1; + break; + + case 1: /* PREFIX */ + sqlite3_free(zPrefix); + zPrefix = zVal; + zVal = 0; + break; + + case 2: /* COMPRESS */ + sqlite3_free(zCompress); + zCompress = zVal; + zVal = 0; + break; + + case 3: /* UNCOMPRESS */ + sqlite3_free(zUncompress); + zUncompress = zVal; + zVal = 0; + break; + + case 4: /* ORDER */ + if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) + && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) + ){ + sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal); + rc = SQLITE_ERROR; + } + bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); + break; + + case 5: /* CONTENT */ + sqlite3_free(zContent); + zContent = zVal; + zVal = 0; + break; + + case 6: /* LANGUAGEID */ + assert( iOpt==6 ); + sqlite3_free(zLanguageid); + zLanguageid = zVal; + zVal = 0; + break; + + case 7: /* NOTINDEXED */ + azNotindexed[nNotindexed++] = zVal; + zVal = 0; + break; + + default: + assert( iOpt==SizeofArray(aFts4Opt) ); + sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z); + rc = SQLITE_ERROR; + break; + } + sqlite3_free(zVal); + } + } + + /* Otherwise, the argument is a column name. */ + else { + nString += (int)(strlen(z) + 1); + aCol[nCol++] = z; + } + } + + /* If a content=xxx option was specified, the following: + ** + ** 1. Ignore any compress= and uncompress= options. + ** + ** 2. If no column names were specified as part of the CREATE VIRTUAL + ** TABLE statement, use all columns from the content table. + */ + if( rc==SQLITE_OK && zContent ){ + sqlite3_free(zCompress); + sqlite3_free(zUncompress); + zCompress = 0; + zUncompress = 0; + if( nCol==0 ){ + sqlite3_free((void*)aCol); + aCol = 0; + rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr); + + /* If a languageid= option was specified, remove the language id + ** column from the aCol[] array. */ + if( rc==SQLITE_OK && zLanguageid ){ + int j; + for(j=0; jdb = db; + p->nColumn = nCol; + p->nPendingData = 0; + p->azColumn = (char **)&p[1]; + p->pTokenizer = pTokenizer; + p->nMaxPendingData = FTS3_MAX_PENDING_DATA; + p->bHasDocsize = (isFts4 && bNoDocsize==0); + p->bHasStat = (u8)isFts4; + p->bFts4 = (u8)isFts4; + p->bDescIdx = (u8)bDescIdx; + p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ + p->zContentTbl = zContent; + p->zLanguageid = zLanguageid; + zContent = 0; + zLanguageid = 0; + TESTONLY( p->inTransaction = -1 ); + TESTONLY( p->mxSavepoint = -1 ); + + p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; + memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); + p->nIndex = nIndex; + for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); + } + p->abNotindexed = (u8 *)&p->aIndex[nIndex]; + + /* Fill in the zName and zDb fields of the vtab structure. */ + zCsr = (char *)&p->abNotindexed[nCol]; + p->zName = zCsr; + memcpy(zCsr, argv[2], nName); + zCsr += nName; + p->zDb = zCsr; + memcpy(zCsr, argv[1], nDb); + zCsr += nDb; + + /* Fill in the azColumn array */ + for(iCol=0; iCol0 ){ + memcpy(zCsr, z, n); + } + zCsr[n] = '\0'; + sqlite3Fts3Dequote(zCsr); + p->azColumn[iCol] = zCsr; + zCsr += n+1; + assert( zCsr <= &((char *)p)[nByte] ); + } + + /* Fill in the abNotindexed array */ + for(iCol=0; iColazColumn[iCol]); + for(i=0; iazColumn[iCol], zNot, n) + ){ + p->abNotindexed[iCol] = 1; + sqlite3_free(zNot); + azNotindexed[i] = 0; + } + } + } + for(i=0; izReadExprlist = fts3ReadExprList(p, zUncompress, &rc); + p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); + if( rc!=SQLITE_OK ) goto fts3_init_out; + + /* If this is an xCreate call, create the underlying tables in the + ** database. TODO: For xConnect(), it could verify that said tables exist. + */ + if( isCreate ){ + rc = fts3CreateTables(p); + } + + /* Check to see if a legacy fts3 table has been "upgraded" by the + ** addition of a %_stat table so that it can use incremental merge. + */ + if( !isFts4 && !isCreate ){ + p->bHasStat = 2; + } + + /* Figure out the page-size for the database. This is required in order to + ** estimate the cost of loading large doclists from the database. */ + fts3DatabasePageSize(&rc, p); + p->nNodeSize = p->nPgsz-35; + +#if defined(SQLITE_DEBUG)||defined(SQLITE_TEST) + p->nMergeCount = FTS3_MERGE_COUNT; +#endif + + /* Declare the table schema to SQLite. */ + fts3DeclareVtab(&rc, p); + +fts3_init_out: + sqlite3_free(zPrefix); + sqlite3_free(aIndex); + sqlite3_free(zCompress); + sqlite3_free(zUncompress); + sqlite3_free(zContent); + sqlite3_free(zLanguageid); + for(i=0; ipModule->xDestroy(pTokenizer); + } + }else{ + assert( p->pSegments==0 ); + *ppVTab = &p->base; + } + return rc; +} + +/* +** The xConnect() and xCreate() methods for the virtual table. All the +** work is done in function fts3InitVtab(). +*/ +static int fts3ConnectMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); +} +static int fts3CreateMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); +} + +/* +** Set the pIdxInfo->estimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. +*/ +static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; + } +#endif +} + +/* +** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this +** extension is currently being used by a version of SQLite too old to +** support index-info flags. In that case this function is a no-op. +*/ +static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){ +#if SQLITE_VERSION_NUMBER>=3008012 + if( sqlite3_libversion_number()>=3008012 ){ + pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE; + } +#endif +} + +/* +** Implementation of the xBestIndex method for FTS3 tables. There +** are three possible strategies, in order of preference: +** +** 1. Direct lookup by rowid or docid. +** 2. Full-text search using a MATCH operator on a non-docid column. +** 3. Linear scan of %_content table. +*/ +static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ + Fts3Table *p = (Fts3Table *)pVTab; + int i; /* Iterator variable */ + int iCons = -1; /* Index of constraint to use */ + + int iLangidCons = -1; /* Index of langid=x constraint, if present */ + int iDocidGe = -1; /* Index of docid>=x constraint, if present */ + int iDocidLe = -1; /* Index of docid<=x constraint, if present */ + int iIdx; + + if( p->bLock ){ + return SQLITE_ERROR; + } + + /* By default use a full table scan. This is an expensive option, + ** so search through the constraints to see if a more efficient + ** strategy is possible. + */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 5000000; + for(i=0; inConstraint; i++){ + int bDocid; /* True if this constraint is on docid */ + struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; + if( pCons->usable==0 ){ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + /* There exists an unusable MATCH constraint. This means that if + ** the planner does elect to use the results of this call as part + ** of the overall query plan the user will see an "unable to use + ** function MATCH in the requested context" error. To discourage + ** this, return a very high cost here. */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 1e50; + fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); + return SQLITE_OK; + } + continue; + } + + bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); + + /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ + pInfo->idxNum = FTS3_DOCID_SEARCH; + pInfo->estimatedCost = 1.0; + iCons = i; + } + + /* A MATCH constraint. Use a full-text search. + ** + ** If there is more than one MATCH constraint available, use the first + ** one encountered. If there is both a MATCH constraint and a direct + ** rowid/docid lookup, prefer the MATCH strategy. This is done even + ** though the rowid/docid lookup is faster than a MATCH query, selecting + ** it would lead to an "unable to use function MATCH in the requested + ** context" error. + */ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH + && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn + ){ + pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; + pInfo->estimatedCost = 2.0; + iCons = i; + } + + /* Equality constraint on the langid column */ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ + && pCons->iColumn==p->nColumn + 2 + ){ + iLangidCons = i; + } + + if( bDocid ){ + switch( pCons->op ){ + case SQLITE_INDEX_CONSTRAINT_GE: + case SQLITE_INDEX_CONSTRAINT_GT: + iDocidGe = i; + break; + + case SQLITE_INDEX_CONSTRAINT_LE: + case SQLITE_INDEX_CONSTRAINT_LT: + iDocidLe = i; + break; + } + } + } + + /* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */ + if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo); + + iIdx = 1; + if( iCons>=0 ){ + pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; + pInfo->aConstraintUsage[iCons].omit = 1; + } + if( iLangidCons>=0 ){ + pInfo->idxNum |= FTS3_HAVE_LANGID; + pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; + } + if( iDocidGe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_GE; + pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; + } + if( iDocidLe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_LE; + pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; + } + + /* Regardless of the strategy selected, FTS can deliver rows in rowid (or + ** docid) order. Both ascending and descending are possible. + */ + if( pInfo->nOrderBy==1 ){ + struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; + if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ + if( pOrder->desc ){ + pInfo->idxStr = "DESC"; + }else{ + pInfo->idxStr = "ASC"; + } + pInfo->orderByConsumed = 1; + } + } + + assert( p->pSegments==0 ); + return SQLITE_OK; +} + +/* +** Implementation of xOpen method. +*/ +static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + sqlite3_vtab_cursor *pCsr; /* Allocated cursor */ + + UNUSED_PARAMETER(pVTab); + + /* Allocate a buffer large enough for an Fts3Cursor structure. If the + ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, + ** if the allocation fails, return SQLITE_NOMEM. + */ + *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor)); + if( !pCsr ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(Fts3Cursor)); + return SQLITE_OK; +} + +/* +** Finalize the statement handle at pCsr->pStmt. +** +** Or, if that statement handle is one created by fts3CursorSeekStmt(), +** and the Fts3Table.pSeekStmt slot is currently NULL, save the statement +** pointer there instead of finalizing it. +*/ +static void fts3CursorFinalizeStmt(Fts3Cursor *pCsr){ + if( pCsr->bSeekStmt ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + if( p->pSeekStmt==0 ){ + p->pSeekStmt = pCsr->pStmt; + sqlite3_reset(pCsr->pStmt); + pCsr->pStmt = 0; + } + pCsr->bSeekStmt = 0; + } + sqlite3_finalize(pCsr->pStmt); +} + +/* +** Free all resources currently held by the cursor passed as the only +** argument. +*/ +static void fts3ClearCursor(Fts3Cursor *pCsr){ + fts3CursorFinalizeStmt(pCsr); + sqlite3Fts3FreeDeferredTokens(pCsr); + sqlite3_free(pCsr->aDoclist); + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); + sqlite3Fts3ExprFree(pCsr->pExpr); + memset(&(&pCsr->base)[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); +} + +/* +** Close the cursor. For additional information see the documentation +** on the xClose method of the virtual table interface. +*/ +static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + fts3ClearCursor(pCsr); + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then +** compose and prepare an SQL statement of the form: +** +** "SELECT FROM %_content WHERE rowid = ?" +** +** (or the equivalent for a content=xxx table) and set pCsr->pStmt to +** it. If an error occurs, return an SQLite error code. +*/ +static int fts3CursorSeekStmt(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; + if( pCsr->pStmt==0 ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + char *zSql; + if( p->pSeekStmt ){ + pCsr->pStmt = p->pSeekStmt; + p->pSeekStmt = 0; + }else{ + zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); + if( !zSql ) return SQLITE_NOMEM; + p->bLock++; + rc = sqlite3_prepare_v3( + p->db, zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0 + ); + p->bLock--; + sqlite3_free(zSql); + } + if( rc==SQLITE_OK ) pCsr->bSeekStmt = 1; + } + return rc; +} + +/* +** Position the pCsr->pStmt statement so that it is on the row +** of the %_content table that contains the last match. Return +** SQLITE_OK on success. +*/ +static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ + int rc = SQLITE_OK; + if( pCsr->isRequireSeek ){ + rc = fts3CursorSeekStmt(pCsr); + if( rc==SQLITE_OK ){ + Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab; + pTab->bLock++; + sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); + pCsr->isRequireSeek = 0; + if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ + pTab->bLock--; + return SQLITE_OK; + }else{ + pTab->bLock--; + rc = sqlite3_reset(pCsr->pStmt); + if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ + /* If no row was found and no error has occurred, then the %_content + ** table is missing a row that is present in the full-text index. + ** The data structures are corrupt. */ + rc = FTS_CORRUPT_VTAB; + pCsr->isEof = 1; + } + } + } + } + + if( rc!=SQLITE_OK && pContext ){ + sqlite3_result_error_code(pContext, rc); + } + return rc; +} + +/* +** This function is used to process a single interior node when searching +** a b-tree for a term or term prefix. The node data is passed to this +** function via the zNode/nNode parameters. The term to search for is +** passed in zTerm/nTerm. +** +** If piFirst is not NULL, then this function sets *piFirst to the blockid +** of the child node that heads the sub-tree that may contain the term. +** +** If piLast is not NULL, then *piLast is set to the right-most child node +** that heads a sub-tree that may contain a term for which zTerm/nTerm is +** a prefix. +** +** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. +*/ +static int fts3ScanInteriorNode( + const char *zTerm, /* Term to select leaves for */ + int nTerm, /* Size of term zTerm in bytes */ + const char *zNode, /* Buffer containing segment interior node */ + int nNode, /* Size of buffer at zNode */ + sqlite3_int64 *piFirst, /* OUT: Selected child node */ + sqlite3_int64 *piLast /* OUT: Selected child node */ +){ + int rc = SQLITE_OK; /* Return code */ + const char *zCsr = zNode; /* Cursor to iterate through node */ + const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ + char *zBuffer = 0; /* Buffer to load terms into */ + i64 nAlloc = 0; /* Size of allocated buffer */ + int isFirstTerm = 1; /* True when processing first term on page */ + u64 iChild; /* Block id of child node to descend to */ + int nBuffer = 0; /* Total term size */ + + /* Skip over the 'height' varint that occurs at the start of every + ** interior node. Then load the blockid of the left-child of the b-tree + ** node into variable iChild. + ** + ** Even if the data structure on disk is corrupted, this (reading two + ** varints from the buffer) does not risk an overread. If zNode is a + ** root node, then the buffer comes from a SELECT statement. SQLite does + ** not make this guarantee explicitly, but in practice there are always + ** either more than 20 bytes of allocated space following the nNode bytes of + ** contents, or two zero bytes. Or, if the node is read from the %_segments + ** table, then there are always 20 bytes of zeroed padding following the + ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details). + */ + zCsr += sqlite3Fts3GetVarintU(zCsr, &iChild); + zCsr += sqlite3Fts3GetVarintU(zCsr, &iChild); + if( zCsr>zEnd ){ + return FTS_CORRUPT_VTAB; + } + + while( zCsrnBuffer ){ + rc = FTS_CORRUPT_VTAB; + goto finish_scan; + } + } + isFirstTerm = 0; + zCsr += fts3GetVarint32(zCsr, &nSuffix); + + assert( nPrefix>=0 && nSuffix>=0 ); + if( nPrefix>zCsr-zNode || nSuffix>zEnd-zCsr || nSuffix==0 ){ + rc = FTS_CORRUPT_VTAB; + goto finish_scan; + } + if( (i64)nPrefix+nSuffix>nAlloc ){ + char *zNew; + nAlloc = ((i64)nPrefix+nSuffix) * 2; + zNew = (char *)sqlite3_realloc64(zBuffer, nAlloc); + if( !zNew ){ + rc = SQLITE_NOMEM; + goto finish_scan; + } + zBuffer = zNew; + } + assert( zBuffer ); + memcpy(&zBuffer[nPrefix], zCsr, nSuffix); + nBuffer = nPrefix + nSuffix; + zCsr += nSuffix; + + /* Compare the term we are searching for with the term just loaded from + ** the interior node. If the specified term is greater than or equal + ** to the term from the interior node, then all terms on the sub-tree + ** headed by node iChild are smaller than zTerm. No need to search + ** iChild. + ** + ** If the interior node term is larger than the specified term, then + ** the tree headed by iChild may contain the specified term. + */ + cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); + if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ + *piFirst = (i64)iChild; + piFirst = 0; + } + + if( piLast && cmp<0 ){ + *piLast = (i64)iChild; + piLast = 0; + } + + iChild++; + }; + + if( piFirst ) *piFirst = (i64)iChild; + if( piLast ) *piLast = (i64)iChild; + + finish_scan: + sqlite3_free(zBuffer); + return rc; +} + + +/* +** The buffer pointed to by argument zNode (size nNode bytes) contains an +** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes) +** contains a term. This function searches the sub-tree headed by the zNode +** node for the range of leaf nodes that may contain the specified term +** or terms for which the specified term is a prefix. +** +** If piLeaf is not NULL, then *piLeaf is set to the blockid of the +** left-most leaf node in the tree that may contain the specified term. +** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the +** right-most leaf node that may contain a term for which the specified +** term is a prefix. +** +** It is possible that the range of returned leaf nodes does not contain +** the specified term or any terms for which it is a prefix. However, if the +** segment does contain any such terms, they are stored within the identified +** range. Because this function only inspects interior segment nodes (and +** never loads leaf nodes into memory), it is not possible to be sure. +** +** If an error occurs, an error code other than SQLITE_OK is returned. +*/ +static int fts3SelectLeaf( + Fts3Table *p, /* Virtual table handle */ + const char *zTerm, /* Term to select leaves for */ + int nTerm, /* Size of term zTerm in bytes */ + const char *zNode, /* Buffer containing segment interior node */ + int nNode, /* Size of buffer at zNode */ + sqlite3_int64 *piLeaf, /* Selected leaf node */ + sqlite3_int64 *piLeaf2 /* Selected leaf node */ +){ + int rc = SQLITE_OK; /* Return code */ + int iHeight; /* Height of this node in tree */ + + assert( piLeaf || piLeaf2 ); + + fts3GetVarint32(zNode, &iHeight); + rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); + assert_fts3_nc( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); + + if( rc==SQLITE_OK && iHeight>1 ){ + char *zBlob = 0; /* Blob read from %_segments table */ + int nBlob = 0; /* Size of zBlob in bytes */ + + if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ + rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); + if( rc==SQLITE_OK ){ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); + } + sqlite3_free(zBlob); + piLeaf = 0; + zBlob = 0; + } + + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); + } + if( rc==SQLITE_OK ){ + int iNewHeight = 0; + fts3GetVarint32(zBlob, &iNewHeight); + if( iNewHeight>=iHeight ){ + rc = FTS_CORRUPT_VTAB; + }else{ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); + } + } + sqlite3_free(zBlob); + } + + return rc; +} + +/* +** This function is used to create delta-encoded serialized lists of FTS3 +** varints. Each call to this function appends a single varint to a list. +*/ +static void fts3PutDeltaVarint( + char **pp, /* IN/OUT: Output pointer */ + sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ + sqlite3_int64 iVal /* Write this value to the list */ +){ + assert_fts3_nc( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) ); + *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev); + *piPrev = iVal; +} + +/* +** When this function is called, *ppPoslist is assumed to point to the +** start of a position-list. After it returns, *ppPoslist points to the +** first byte after the position-list. +** +** A position list is list of positions (delta encoded) and columns for +** a single document record of a doclist. So, in other words, this +** routine advances *ppPoslist so that it points to the next docid in +** the doclist, or to the first byte past the end of the doclist. +** +** If pp is not NULL, then the contents of the position list are copied +** to *pp. *pp is set to point to the first byte past the last byte copied +** before this function returns. +*/ +static void fts3PoslistCopy(char **pp, char **ppPoslist){ + char *pEnd = *ppPoslist; + char c = 0; + + /* The end of a position list is marked by a zero encoded as an FTS3 + ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by + ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail + ** of some other, multi-byte, value. + ** + ** The following while-loop moves pEnd to point to the first byte that is not + ** immediately preceded by a byte with the 0x80 bit set. Then increments + ** pEnd once more so that it points to the byte immediately following the + ** last byte in the position-list. + */ + while( *pEnd | c ){ + c = *pEnd++ & 0x80; + testcase( c!=0 && (*pEnd)==0 ); + } + pEnd++; /* Advance past the POS_END terminator byte */ + + if( pp ){ + int n = (int)(pEnd - *ppPoslist); + char *p = *pp; + memcpy(p, *ppPoslist, n); + p += n; + *pp = p; + } + *ppPoslist = pEnd; +} + +/* +** When this function is called, *ppPoslist is assumed to point to the +** start of a column-list. After it returns, *ppPoslist points to the +** to the terminator (POS_COLUMN or POS_END) byte of the column-list. +** +** A column-list is list of delta-encoded positions for a single column +** within a single document within a doclist. +** +** The column-list is terminated either by a POS_COLUMN varint (1) or +** a POS_END varint (0). This routine leaves *ppPoslist pointing to +** the POS_COLUMN or POS_END that terminates the column-list. +** +** If pp is not NULL, then the contents of the column-list are copied +** to *pp. *pp is set to point to the first byte past the last byte copied +** before this function returns. The POS_COLUMN or POS_END terminator +** is not copied into *pp. +*/ +static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ + char *pEnd = *ppPoslist; + char c = 0; + + /* A column-list is terminated by either a 0x01 or 0x00 byte that is + ** not part of a multi-byte varint. + */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + testcase( c!=0 && ((*pEnd)&0xfe)==0 ); + } + if( pp ){ + int n = (int)(pEnd - *ppPoslist); + char *p = *pp; + memcpy(p, *ppPoslist, n); + p += n; + *pp = p; + } + *ppPoslist = pEnd; +} + +/* +** Value used to signify the end of an position-list. This must be +** as large or larger than any value that might appear on the +** position-list, even a position list that has been corrupted. +*/ +#define POSITION_LIST_END LARGEST_INT64 + +/* +** This function is used to help parse position-lists. When this function is +** called, *pp may point to the start of the next varint in the position-list +** being parsed, or it may point to 1 byte past the end of the position-list +** (in which case **pp will be a terminator bytes POS_END (0) or +** (1)). +** +** If *pp points past the end of the current position-list, set *pi to +** POSITION_LIST_END and return. Otherwise, read the next varint from *pp, +** increment the current value of *pi by the value read, and set *pp to +** point to the next value before returning. +** +** Before calling this routine *pi must be initialized to the value of +** the previous position, or zero if we are reading the first position +** in the position-list. Because positions are delta-encoded, the value +** of the previous position is needed in order to compute the value of +** the next position. +*/ +static void fts3ReadNextPos( + char **pp, /* IN/OUT: Pointer into position-list buffer */ + sqlite3_int64 *pi /* IN/OUT: Value read from position-list */ +){ + if( (**pp)&0xFE ){ + int iVal; + *pp += fts3GetVarint32((*pp), &iVal); + *pi += iVal; + *pi -= 2; + }else{ + *pi = POSITION_LIST_END; + } +} + +/* +** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by +** the value of iCol encoded as a varint to *pp. This will start a new +** column list. +** +** Set *pp to point to the byte just after the last byte written before +** returning (do not modify it if iCol==0). Return the total number of bytes +** written (0 if iCol==0). +*/ +static int fts3PutColNumber(char **pp, int iCol){ + int n = 0; /* Number of bytes written */ + if( iCol ){ + char *p = *pp; /* Output pointer */ + n = 1 + sqlite3Fts3PutVarint(&p[1], iCol); + *p = 0x01; + *pp = &p[n]; + } + return n; +} + +/* +** Compute the union of two position lists. The output written +** into *pp contains all positions of both *pp1 and *pp2 in sorted +** order and with any duplicates removed. All pointers are +** updated appropriately. The caller is responsible for insuring +** that there is enough space in *pp to hold the complete output. +*/ +static int fts3PoslistMerge( + char **pp, /* Output buffer */ + char **pp1, /* Left input list */ + char **pp2 /* Right input list */ +){ + char *p = *pp; + char *p1 = *pp1; + char *p2 = *pp2; + + while( *p1 || *p2 ){ + int iCol1; /* The current column index in pp1 */ + int iCol2; /* The current column index in pp2 */ + + if( *p1==POS_COLUMN ){ + fts3GetVarint32(&p1[1], &iCol1); + if( iCol1==0 ) return FTS_CORRUPT_VTAB; + } + else if( *p1==POS_END ) iCol1 = 0x7fffffff; + else iCol1 = 0; + + if( *p2==POS_COLUMN ){ + fts3GetVarint32(&p2[1], &iCol2); + if( iCol2==0 ) return FTS_CORRUPT_VTAB; + } + else if( *p2==POS_END ) iCol2 = 0x7fffffff; + else iCol2 = 0; + + if( iCol1==iCol2 ){ + sqlite3_int64 i1 = 0; /* Last position from pp1 */ + sqlite3_int64 i2 = 0; /* Last position from pp2 */ + sqlite3_int64 iPrev = 0; + int n = fts3PutColNumber(&p, iCol1); + p1 += n; + p2 += n; + + /* At this point, both p1 and p2 point to the start of column-lists + ** for the same column (the column with index iCol1 and iCol2). + ** A column-list is a list of non-negative delta-encoded varints, each + ** incremented by 2 before being stored. Each list is terminated by a + ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists + ** and writes the results to buffer p. p is left pointing to the byte + ** after the list written. No terminator (POS_END or POS_COLUMN) is + ** written to the output. + */ + fts3GetDeltaVarint(&p1, &i1); + fts3GetDeltaVarint(&p2, &i2); + if( i1<2 || i2<2 ){ + break; + } + do { + fts3PutDeltaVarint(&p, &iPrev, (i1pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. +** when the *pp1 token appears before the *pp2 token, but not more than nToken +** slots before it. +** +** e.g. nToken==1 searches for adjacent positions. +*/ +static int fts3PoslistPhraseMerge( + char **pp, /* IN/OUT: Preallocated output buffer */ + int nToken, /* Maximum difference in token positions */ + int isSaveLeft, /* Save the left position */ + int isExact, /* If *pp1 is exactly nTokens before *pp2 */ + char **pp1, /* IN/OUT: Left input list */ + char **pp2 /* IN/OUT: Right input list */ +){ + char *p = *pp; + char *p1 = *pp1; + char *p2 = *pp2; + int iCol1 = 0; + int iCol2 = 0; + + /* Never set both isSaveLeft and isExact for the same invocation. */ + assert( isSaveLeft==0 || isExact==0 ); + + assert_fts3_nc( p!=0 && *p1!=0 && *p2!=0 ); + if( *p1==POS_COLUMN ){ + p1++; + p1 += fts3GetVarint32(p1, &iCol1); + } + if( *p2==POS_COLUMN ){ + p2++; + p2 += fts3GetVarint32(p2, &iCol2); + } + + while( 1 ){ + if( iCol1==iCol2 ){ + char *pSave = p; + sqlite3_int64 iPrev = 0; + sqlite3_int64 iPos1 = 0; + sqlite3_int64 iPos2 = 0; + + if( iCol1 ){ + *p++ = POS_COLUMN; + p += sqlite3Fts3PutVarint(p, iCol1); + } + + fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; + fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; + if( iPos1<0 || iPos2<0 ) break; + + while( 1 ){ + if( iPos2==iPos1+nToken + || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) + ){ + sqlite3_int64 iSave; + iSave = isSaveLeft ? iPos1 : iPos2; + fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; + pSave = 0; + assert( p ); + } + if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ + if( (*p2&0xFE)==0 ) break; + fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; + }else{ + if( (*p1&0xFE)==0 ) break; + fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; + } + } + + if( pSave ){ + assert( pp && p ); + p = pSave; + } + + fts3ColumnlistCopy(0, &p1); + fts3ColumnlistCopy(0, &p2); + assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 ); + if( 0==*p1 || 0==*p2 ) break; + + p1++; + p1 += fts3GetVarint32(p1, &iCol1); + p2++; + p2 += fts3GetVarint32(p2, &iCol2); + } + + /* Advance pointer p1 or p2 (whichever corresponds to the smaller of + ** iCol1 and iCol2) so that it points to either the 0x00 that marks the + ** end of the position list, or the 0x01 that precedes the next + ** column-number in the position list. + */ + else if( iCol1=pEnd ){ + *pp = 0; + }else{ + u64 iVal; + *pp += sqlite3Fts3GetVarintU(*pp, &iVal); + if( bDescIdx ){ + *pVal = (i64)((u64)*pVal - iVal); + }else{ + *pVal = (i64)((u64)*pVal + iVal); + } + } +} + +/* +** This function is used to write a single varint to a buffer. The varint +** is written to *pp. Before returning, *pp is set to point 1 byte past the +** end of the value written. +** +** If *pbFirst is zero when this function is called, the value written to +** the buffer is that of parameter iVal. +** +** If *pbFirst is non-zero when this function is called, then the value +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) +** (if bDescIdx is non-zero). +** +** Before returning, this function always sets *pbFirst to 1 and *piPrev +** to the value of parameter iVal. +*/ +static void fts3PutDeltaVarint3( + char **pp, /* IN/OUT: Output pointer */ + int bDescIdx, /* True for descending docids */ + sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ + int *pbFirst, /* IN/OUT: True after first int written */ + sqlite3_int64 iVal /* Write this value to the list */ +){ + sqlite3_uint64 iWrite; + if( bDescIdx==0 || *pbFirst==0 ){ + assert_fts3_nc( *pbFirst==0 || iVal>=*piPrev ); + iWrite = (u64)iVal - (u64)*piPrev; + }else{ + assert_fts3_nc( *piPrev>=iVal ); + iWrite = (u64)*piPrev - (u64)iVal; + } + assert( *pbFirst || *piPrev==0 ); + assert_fts3_nc( *pbFirst==0 || iWrite>0 ); + *pp += sqlite3Fts3PutVarint(*pp, iWrite); + *piPrev = iVal; + *pbFirst = 1; +} + + +/* +** This macro is used by various functions that merge doclists. The two +** arguments are 64-bit docid values. If the value of the stack variable +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). +** Otherwise, (i2-i1). +** +** Using this makes it easier to write code that can merge doclists that are +** sorted in either ascending or descending order. +*/ +/* #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i64)((u64)i1-i2)) */ +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1>i2?1:((i1==i2)?0:-1))) + +/* +** This function does an "OR" merge of two doclists (output contains all +** positions contained in either argument doclist). If the docids in the +** input doclists are sorted in ascending order, parameter bDescDoclist +** should be false. If they are sorted in ascending order, it should be +** passed a non-zero value. +** +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer +** containing the output doclist and SQLITE_OK is returned. In this case +** *pnOut is set to the number of bytes in the output doclist. +** +** If an error occurs, an SQLite error code is returned. The output values +** are undefined in this case. +*/ +static int fts3DoclistOrMerge( + int bDescDoclist, /* True if arguments are desc */ + char *a1, int n1, /* First doclist */ + char *a2, int n2, /* Second doclist */ + char **paOut, int *pnOut /* OUT: Malloc'd doclist */ +){ + int rc = SQLITE_OK; + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *pEnd1 = &a1[n1]; + char *pEnd2 = &a2[n2]; + char *p1 = a1; + char *p2 = a2; + char *p; + char *aOut; + int bFirstOut = 0; + + *paOut = 0; + *pnOut = 0; + + /* Allocate space for the output. Both the input and output doclists + ** are delta encoded. If they are in ascending order (bDescDoclist==0), + ** then the first docid in each list is simply encoded as a varint. For + ** each subsequent docid, the varint stored is the difference between the + ** current and previous docid (a positive number - since the list is in + ** ascending order). + ** + ** The first docid written to the output is therefore encoded using the + ** same number of bytes as it is in whichever of the input lists it is + ** read from. And each subsequent docid read from the same input list + ** consumes either the same or less bytes as it did in the input (since + ** the difference between it and the previous value in the output must + ** be a positive value less than or equal to the delta value read from + ** the input list). The same argument applies to all but the first docid + ** read from the 'other' list. And to the contents of all position lists + ** that will be copied and merged from the input to the output. + ** + ** However, if the first docid copied to the output is a negative number, + ** then the encoding of the first docid from the 'other' input list may + ** be larger in the output than it was in the input (since the delta value + ** may be a larger positive integer than the actual docid). + ** + ** The space required to store the output is therefore the sum of the + ** sizes of the two inputs, plus enough space for exactly one of the input + ** docids to grow. + ** + ** A symetric argument may be made if the doclists are in descending + ** order. + */ + aOut = sqlite3_malloc64((i64)n1+n2+FTS3_VARINT_MAX-1+FTS3_BUFFER_PADDING); + if( !aOut ) return SQLITE_NOMEM; + + p = aOut; + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + while( p1 || p2 ){ + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + + if( p2 && p1 && iDiff==0 ){ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + rc = fts3PoslistMerge(&p, &p1, &p2); + if( rc ) break; + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + }else if( !p2 || (p1 && iDiff<0) ){ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + fts3PoslistCopy(&p, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + }else{ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); + fts3PoslistCopy(&p, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + } + + assert( (p-aOut)<=((p1?(p1-a1):n1)+(p2?(p2-a2):n2)+FTS3_VARINT_MAX-1) ); + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(aOut); + p = aOut = 0; + }else{ + assert( (p-aOut)<=n1+n2+FTS3_VARINT_MAX-1 ); + memset(&aOut[(p-aOut)], 0, FTS3_BUFFER_PADDING); + } + *paOut = aOut; + *pnOut = (int)(p-aOut); + return rc; +} + +/* +** This function does a "phrase" merge of two doclists. In a phrase merge, +** the output contains a copy of each position from the right-hand input +** doclist for which there is a position in the left-hand input doclist +** exactly nDist tokens before it. +** +** If the docids in the input doclists are sorted in ascending order, +** parameter bDescDoclist should be false. If they are sorted in ascending +** order, it should be passed a non-zero value. +** +** The right-hand input doclist is overwritten by this function. +*/ +static int fts3DoclistPhraseMerge( + int bDescDoclist, /* True if arguments are desc */ + int nDist, /* Distance from left to right (1=adjacent) */ + char *aLeft, int nLeft, /* Left doclist */ + char **paRight, int *pnRight /* IN/OUT: Right/output doclist */ +){ + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *aRight = *paRight; + char *pEnd1 = &aLeft[nLeft]; + char *pEnd2 = &aRight[*pnRight]; + char *p1 = aLeft; + char *p2 = aRight; + char *p; + int bFirstOut = 0; + char *aOut; + + assert( nDist>0 ); + if( bDescDoclist ){ + aOut = sqlite3_malloc64((sqlite3_int64)*pnRight + FTS3_VARINT_MAX); + if( aOut==0 ) return SQLITE_NOMEM; + }else{ + aOut = aRight; + } + p = aOut; + + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + + while( p1 && p2 ){ + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + if( iDiff==0 ){ + char *pSave = p; + sqlite3_int64 iPrevSave = iPrev; + int bFirstOutSave = bFirstOut; + + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ + p = pSave; + iPrev = iPrevSave; + bFirstOut = bFirstOutSave; + } + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + }else if( iDiff<0 ){ + fts3PoslistCopy(0, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + }else{ + fts3PoslistCopy(0, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + } + } + + *pnRight = (int)(p - aOut); + if( bDescDoclist ){ + sqlite3_free(aRight); + *paRight = aOut; + } + + return SQLITE_OK; +} + +/* +** Argument pList points to a position list nList bytes in size. This +** function checks to see if the position list contains any entries for +** a token in position 0 (of any column). If so, it writes argument iDelta +** to the output buffer pOut, followed by a position list consisting only +** of the entries from pList at position 0, and terminated by an 0x00 byte. +** The value returned is the number of bytes written to pOut (if any). +*/ +SQLITE_PRIVATE int sqlite3Fts3FirstFilter( + sqlite3_int64 iDelta, /* Varint that may be written to pOut */ + char *pList, /* Position list (no 0x00 term) */ + int nList, /* Size of pList in bytes */ + char *pOut /* Write output here */ +){ + int nOut = 0; + int bWritten = 0; /* True once iDelta has been written */ + char *p = pList; + char *pEnd = &pList[nList]; + + if( *p!=0x01 ){ + if( *p==0x02 ){ + nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); + pOut[nOut++] = 0x02; + bWritten = 1; + } + fts3ColumnlistCopy(0, &p); + } + + while( paaOutput); i++){ + if( pTS->aaOutput[i] ){ + if( !aOut ){ + aOut = pTS->aaOutput[i]; + nOut = pTS->anOutput[i]; + pTS->aaOutput[i] = 0; + }else{ + int nNew; + char *aNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, + pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ + sqlite3_free(aOut); + return rc; + } + + sqlite3_free(pTS->aaOutput[i]); + sqlite3_free(aOut); + pTS->aaOutput[i] = 0; + aOut = aNew; + nOut = nNew; + } + } + } + + pTS->aaOutput[0] = aOut; + pTS->anOutput[0] = nOut; + return SQLITE_OK; +} + +/* +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed +** as the first argument. The merge is an "OR" merge (see function +** fts3DoclistOrMerge() for details). +** +** This function is called with the doclist for each term that matches +** a queried prefix. It merges all these doclists into one, the doclist +** for the specified prefix. Since there can be a very large number of +** doclists to merge, the merging is done pair-wise using the TermSelect +** object. +** +** This function returns SQLITE_OK if the merge is successful, or an +** SQLite error code (SQLITE_NOMEM) if an error occurs. +*/ +static int fts3TermSelectMerge( + Fts3Table *p, /* FTS table handle */ + TermSelect *pTS, /* TermSelect object to merge into */ + char *aDoclist, /* Pointer to doclist */ + int nDoclist /* Size of aDoclist in bytes */ +){ + if( pTS->aaOutput[0]==0 ){ + /* If this is the first term selected, copy the doclist to the output + ** buffer using memcpy(). + ** + ** Add FTS3_VARINT_MAX bytes of unused space to the end of the + ** allocation. This is so as to ensure that the buffer is big enough + ** to hold the current doclist AND'd with any other doclist. If the + ** doclists are stored in order=ASC order, this padding would not be + ** required (since the size of [doclistA AND doclistB] is always less + ** than or equal to the size of [doclistA] in that case). But this is + ** not true for order=DESC. For example, a doclist containing (1, -1) + ** may be smaller than (-1), as in the first example the -1 may be stored + ** as a single-byte delta, whereas in the second it must be stored as a + ** FTS3_VARINT_MAX byte varint. + ** + ** Similar padding is added in the fts3DoclistOrMerge() function. + */ + pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1); + pTS->anOutput[0] = nDoclist; + if( pTS->aaOutput[0] ){ + memcpy(pTS->aaOutput[0], aDoclist, nDoclist); + memset(&pTS->aaOutput[0][nDoclist], 0, FTS3_VARINT_MAX); + }else{ + return SQLITE_NOMEM; + } + }else{ + char *aMerge = aDoclist; + int nMerge = nDoclist; + int iOut; + + for(iOut=0; iOutaaOutput); iOut++){ + if( pTS->aaOutput[iOut]==0 ){ + assert( iOut>0 ); + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + break; + }else{ + char *aNew; + int nNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, + pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + return rc; + } + + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + sqlite3_free(pTS->aaOutput[iOut]); + pTS->aaOutput[iOut] = 0; + + aMerge = aNew; + nMerge = nNew; + if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + } + } + } + } + return SQLITE_OK; +} + +/* +** Append SegReader object pNew to the end of the pCsr->apSegment[] array. +*/ +static int fts3SegReaderCursorAppend( + Fts3MultiSegReader *pCsr, + Fts3SegReader *pNew +){ + if( (pCsr->nSegment%16)==0 ){ + Fts3SegReader **apNew; + sqlite3_int64 nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); + apNew = (Fts3SegReader **)sqlite3_realloc64(pCsr->apSegment, nByte); + if( !apNew ){ + sqlite3Fts3SegReaderFree(pNew); + return SQLITE_NOMEM; + } + pCsr->apSegment = apNew; + } + pCsr->apSegment[pCsr->nSegment++] = pNew; + return SQLITE_OK; +} + +/* +** Add seg-reader objects to the Fts3MultiSegReader object passed as the +** 8th argument. +** +** This function returns SQLITE_OK if successful, or an SQLite error code +** otherwise. +*/ +static int fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + int isScan, /* True to scan from zTerm to EOF */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + int rc = SQLITE_OK; /* Error code */ + sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ + int rc2; /* Result of sqlite3_reset() */ + + /* If iLevel is less than 0 and this is not a scan, include a seg-reader + ** for the pending-terms. If this is a scan, then this call must be being + ** made by an fts4aux module, not an FTS table. In this case calling + ** Fts3SegReaderPending might segfault, as the data structures used by + ** fts4aux are not completely populated. So it's easiest to filter these + ** calls out here. */ + if( iLevel<0 && p->aIndex && p->iPrevLangid==iLangid ){ + Fts3SegReader *pSeg = 0; + rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg); + if( rc==SQLITE_OK && pSeg ){ + rc = fts3SegReaderCursorAppend(pCsr, pSeg); + } + } + + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt); + } + + while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ + Fts3SegReader *pSeg = 0; + + /* Read the values returned by the SELECT into local variables. */ + sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); + sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); + sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); + int nRoot = sqlite3_column_bytes(pStmt, 4); + char const *zRoot = sqlite3_column_blob(pStmt, 4); + + /* If zTerm is not NULL, and this segment is not stored entirely on its + ** root node, the range of leaves scanned can be reduced. Do this. */ + if( iStartBlock && zTerm && zRoot ){ + sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); + rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); + if( rc!=SQLITE_OK ) goto finished; + if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; + } + + rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, + (isPrefix==0 && isScan==0), + iStartBlock, iLeavesEndBlock, + iEndBlock, zRoot, nRoot, &pSeg + ); + if( rc!=SQLITE_OK ) goto finished; + rc = fts3SegReaderCursorAppend(pCsr, pSeg); + } + } + + finished: + rc2 = sqlite3_reset(pStmt); + if( rc==SQLITE_DONE ) rc = rc2; + + return rc; +} + +/* +** Set up a cursor object for iterating through a full-text index or a +** single level therein. +*/ +SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language-id to search */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + int isScan, /* True to scan from zTerm to EOF */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + assert( iIndex>=0 && iIndexnIndex ); + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevelbase.pVtab; + + if( isPrefix ){ + for(i=1; bFound==0 && inIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr + ); + pSegcsr->bLookup = 1; + } + } + + for(i=1; bFound==0 && inIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm+1 ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr + ); + if( rc==SQLITE_OK ){ + rc = fts3SegReaderCursorAddZero( + p, pCsr->iLangid, zTerm, nTerm, pSegcsr + ); + } + } + } + } + + if( bFound==0 ){ + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr + ); + pSegcsr->bLookup = !isPrefix; + } + } + + *ppSegcsr = pSegcsr; + return rc; +} + +/* +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). +*/ +static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ + sqlite3Fts3SegReaderFinish(pSegcsr); + sqlite3_free(pSegcsr); +} + +/* +** This function retrieves the doclist for the specified term (or term +** prefix) from the database. +*/ +static int fts3TermSelect( + Fts3Table *p, /* Virtual table handle */ + Fts3PhraseToken *pTok, /* Token to query for */ + int iColumn, /* Column to query (or -ve for all columns) */ + int *pnOut, /* OUT: Size of buffer at *ppOut */ + char **ppOut /* OUT: Malloced result buffer */ +){ + int rc; /* Return code */ + Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ + TermSelect tsc; /* Object for pair-wise doclist merging */ + Fts3SegFilter filter; /* Segment term filter configuration */ + + pSegcsr = pTok->pSegcsr; + memset(&tsc, 0, sizeof(TermSelect)); + + filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS + | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) + | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) + | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); + filter.iCol = iColumn; + filter.zTerm = pTok->z; + filter.nTerm = pTok->n; + + rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); + while( SQLITE_OK==rc + && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) + ){ + rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); + } + + if( rc==SQLITE_OK ){ + rc = fts3TermSelectFinishMerge(p, &tsc); + } + if( rc==SQLITE_OK ){ + *ppOut = tsc.aaOutput[0]; + *pnOut = tsc.anOutput[0]; + }else{ + int i; + for(i=0; ipSegcsr = 0; + return rc; +} + +/* +** This function counts the total number of docids in the doclist stored +** in buffer aList[], size nList bytes. +** +** If the isPoslist argument is true, then it is assumed that the doclist +** contains a position-list following each docid. Otherwise, it is assumed +** that the doclist is simply a list of docids stored as delta encoded +** varints. +*/ +static int fts3DoclistCountDocids(char *aList, int nList){ + int nDoc = 0; /* Return value */ + if( aList ){ + char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ + char *p = aList; /* Cursor */ + while( peSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ + Fts3Table *pTab = (Fts3Table*)pCursor->pVtab; + pTab->bLock++; + if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ + pCsr->isEof = 1; + rc = sqlite3_reset(pCsr->pStmt); + }else{ + pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); + rc = SQLITE_OK; + } + pTab->bLock--; + }else{ + rc = fts3EvalNext((Fts3Cursor *)pCursor); + } + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + return rc; +} + +/* +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. +*/ +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); + } + } + return iDefault; +} + +/* +** This is the xFilter interface for the virtual table. See +** the virtual table xFilter method documentation for additional +** information. +** +** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against +** the %_content table. +** +** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry +** in the %_content table. +** +** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The +** column on the left-hand side of the MATCH operator is column +** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand +** side of the MATCH operator. +*/ +static int fts3FilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + int rc = SQLITE_OK; + char *zSql; /* SQL statement used to access %_content */ + int eSearch; + Fts3Table *p = (Fts3Table *)pCursor->pVtab; + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + + sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ + sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ + sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ + sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ + int iIdx; + + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); + + if( p->bLock ){ + return SQLITE_ERROR; + } + + eSearch = (idxNum & 0x0000FFFF); + assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); + assert( p->pSegments==0 ); + + /* Collect arguments into local variables */ + iIdx = 0; + if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; + assert( iIdx==nVal ); + + /* In case the cursor has been used before, clear it now. */ + fts3ClearCursor(pCsr); + + /* Set the lower and upper bounds on docids to return */ + pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); + pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + + if( idxStr ){ + pCsr->bDesc = (idxStr[0]=='D'); + }else{ + pCsr->bDesc = p->bDescIdx; + } + pCsr->eSearch = (i16)eSearch; + + if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ + int iCol = eSearch-FTS3_FULLTEXT_SEARCH; + const char *zQuery = (const char *)sqlite3_value_text(pCons); + + if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } + + pCsr->iLangid = 0; + if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); + + assert( p->base.zErrMsg==0 ); + rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, + p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, + &p->base.zErrMsg + ); + if( rc!=SQLITE_OK ){ + return rc; + } + + rc = fts3EvalStart(pCsr); + sqlite3Fts3SegmentsClose(p); + if( rc!=SQLITE_OK ) return rc; + pCsr->pNextId = pCsr->aDoclist; + pCsr->iPrevId = 0; + } + + /* Compile a SELECT statement for this cursor. For a full-table-scan, the + ** statement loops through all rows of the %_content table. For a + ** full-text query or docid lookup, the statement retrieves a single + ** row by docid. + */ + if( eSearch==FTS3_FULLSCAN_SEARCH ){ + if( pDocidGe || pDocidLe ){ + zSql = sqlite3_mprintf( + "SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s", + p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid, + (pCsr->bDesc ? "DESC" : "ASC") + ); + }else{ + zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s", + p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") + ); + } + if( zSql ){ + p->bLock++; + rc = sqlite3_prepare_v3( + p->db,zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0 + ); + p->bLock--; + sqlite3_free(zSql); + }else{ + rc = SQLITE_NOMEM; + } + }else if( eSearch==FTS3_DOCID_SEARCH ){ + rc = fts3CursorSeekStmt(pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); + } + } + if( rc!=SQLITE_OK ) return rc; + + return fts3NextMethod(pCursor); +} + +/* +** This is the xEof method of the virtual table. SQLite calls this +** routine to find out if it has reached the end of a result set. +*/ +static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3Cursor *pCsr = (Fts3Cursor*)pCursor; + if( pCsr->isEof ){ + fts3ClearCursor(pCsr); + pCsr->isEof = 1; + } + return pCsr->isEof; +} + +/* +** This is the xRowid method. The SQLite core calls this routine to +** retrieve the rowid for the current row of the result set. fts3 +** exposes %_content.docid as the rowid for the virtual table. The +** rowid should be written to *pRowid. +*/ +static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ + Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; + *pRowid = pCsr->iPrevId; + return SQLITE_OK; +} + +/* +** This is the xColumn method, called by SQLite to request a value from +** the row that the supplied cursor currently points to. +** +** If: +** +** (iCol < p->nColumn) -> The value of the iCol'th user column. +** (iCol == p->nColumn) -> Magic column with the same name as the table. +** (iCol == p->nColumn+1) -> Docid column +** (iCol == p->nColumn+2) -> Langid column +*/ +static int fts3ColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; + Fts3Table *p = (Fts3Table *)pCursor->pVtab; + + /* The column value supplied by SQLite must be in range. */ + assert( iCol>=0 && iCol<=p->nColumn+2 ); + + switch( iCol-p->nColumn ){ + case 0: + /* The special 'table-name' column */ + sqlite3_result_pointer(pCtx, pCsr, "fts3cursor", 0); + break; + + case 1: + /* The docid column */ + sqlite3_result_int64(pCtx, pCsr->iPrevId); + break; + + case 2: + if( pCsr->pExpr ){ + sqlite3_result_int64(pCtx, pCsr->iLangid); + break; + }else if( p->zLanguageid==0 ){ + sqlite3_result_int(pCtx, 0); + break; + }else{ + iCol = p->nColumn; + /* no break */ deliberate_fall_through + } + + default: + /* A user column. Or, if this is a full-table scan, possibly the + ** language-id column. Seek the cursor. */ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)-1>iCol ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + } + break; + } + + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + return rc; +} + +/* +** This function is the implementation of the xUpdate callback used by +** FTS3 virtual tables. It is invoked by SQLite each time a row is to be +** inserted, updated or deleted. +*/ +static int fts3UpdateMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ +){ + return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid); +} + +/* +** Implementation of xSync() method. Flush the contents of the pending-terms +** hash-table to the database. +*/ +static int fts3SyncMethod(sqlite3_vtab *pVtab){ + + /* Following an incremental-merge operation, assuming that the input + ** segments are not completely consumed (the usual case), they are updated + ** in place to remove the entries that have already been merged. This + ** involves updating the leaf block that contains the smallest unmerged + ** entry and each block (if any) between the leaf and the root node. So + ** if the height of the input segment b-trees is N, and input segments + ** are merged eight at a time, updating the input segments at the end + ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually + ** small - often between 0 and 2. So the overhead of the incremental + ** merge is somewhere between 8 and 24 blocks. To avoid this overhead + ** dwarfing the actual productive work accomplished, the incremental merge + ** is only attempted if it will write at least 64 leaf blocks. Hence + ** nMinMerge. + ** + ** Of course, updating the input segments also involves deleting a bunch + ** of blocks from the segments table. But this is not considered overhead + ** as it would also be required by a crisis-merge that used the same input + ** segments. + */ + const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */ + + Fts3Table *p = (Fts3Table*)pVtab; + int rc; + i64 iLastRowid = sqlite3_last_insert_rowid(p->db); + + rc = sqlite3Fts3PendingTermsFlush(p); + if( rc==SQLITE_OK + && p->nLeafAdd>(nMinMerge/16) + && p->nAutoincrmerge && p->nAutoincrmerge!=0xff + ){ + int mxLevel = 0; /* Maximum relative level value in db */ + int A; /* Incr-merge parameter A */ + + rc = sqlite3Fts3MaxLevel(p, &mxLevel); + assert( rc==SQLITE_OK || mxLevel==0 ); + A = p->nLeafAdd * mxLevel; + A += (A/2); + if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); + } + sqlite3Fts3SegmentsClose(p); + sqlite3_set_last_insert_rowid(p->db, iLastRowid); + return rc; +} + +/* +** If it is currently unknown whether or not the FTS table has an %_stat +** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat +** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +static int fts3SetHasStat(Fts3Table *p){ + int rc = SQLITE_OK; + if( p->bHasStat==2 ){ + char *zTbl = sqlite3_mprintf("%s_stat", p->zName); + if( zTbl ){ + int res = sqlite3_table_column_metadata(p->db, p->zDb, zTbl, 0,0,0,0,0,0); + sqlite3_free(zTbl); + p->bHasStat = (res==SQLITE_OK); + }else{ + rc = SQLITE_NOMEM; + } + } + return rc; +} + +/* +** Implementation of xBegin() method. +*/ +static int fts3BeginMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table*)pVtab; + int rc; + UNUSED_PARAMETER(pVtab); + assert( p->pSegments==0 ); + assert( p->nPendingData==0 ); + assert( p->inTransaction!=1 ); + p->nLeafAdd = 0; + rc = fts3SetHasStat(p); +#ifdef SQLITE_DEBUG + if( rc==SQLITE_OK ){ + p->inTransaction = 1; + p->mxSavepoint = -1; + } +#endif + return rc; +} + +/* +** Implementation of xCommit() method. This is a no-op. The contents of +** the pending-terms hash-table have already been flushed into the database +** by fts3SyncMethod(). +*/ +static int fts3CommitMethod(sqlite3_vtab *pVtab){ + TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); + UNUSED_PARAMETER(pVtab); + assert( p->nPendingData==0 ); + assert( p->inTransaction!=0 ); + assert( p->pSegments==0 ); + TESTONLY( p->inTransaction = 0 ); + TESTONLY( p->mxSavepoint = -1; ); + return SQLITE_OK; +} + +/* +** Implementation of xRollback(). Discard the contents of the pending-terms +** hash-table. Any changes made to the database are reverted by SQLite. +*/ +static int fts3RollbackMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table*)pVtab; + sqlite3Fts3PendingTermsClear(p); + assert( p->inTransaction!=0 ); + TESTONLY( p->inTransaction = 0 ); + TESTONLY( p->mxSavepoint = -1; ); + return SQLITE_OK; +} + +/* +** When called, *ppPoslist must point to the byte immediately following the +** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function +** moves *ppPoslist so that it instead points to the first byte of the +** same position list. +*/ +static void fts3ReversePoslist(char *pStart, char **ppPoslist){ + char *p = &(*ppPoslist)[-2]; + char c = 0; + + /* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */ + while( p>pStart && (c=*p--)==0 ); + + /* Search backwards for a varint with value zero (the end of the previous + ** poslist). This is an 0x00 byte preceded by some byte that does not + ** have the 0x80 bit set. */ + while( p>pStart && (*p & 0x80) | c ){ + c = *p--; + } + assert( p==pStart || c==0 ); + + /* At this point p points to that preceding byte without the 0x80 bit + ** set. So to find the start of the poslist, skip forward 2 bytes then + ** over a varint. + ** + ** Normally. The other case is that p==pStart and the poslist to return + ** is the first in the doclist. In this case do not skip forward 2 bytes. + ** The second part of the if condition (c==0 && *ppPoslist>&p[2]) + ** is required for cases where the first byte of a doclist and the + ** doclist is empty. For example, if the first docid is 10, a doclist + ** that begins with: + ** + ** 0x0A 0x00 + */ + if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; } + while( *p++&0x80 ); + *ppPoslist = p; +} + +/* +** Helper function used by the implementation of the overloaded snippet(), +** offsets() and optimize() SQL functions. +** +** If the value passed as the third argument is a blob of size +** sizeof(Fts3Cursor*), then the blob contents are copied to the +** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error +** message is written to context pContext and SQLITE_ERROR returned. The +** string passed via zFunc is used as part of the error message. +*/ +static int fts3FunctionArg( + sqlite3_context *pContext, /* SQL function call context */ + const char *zFunc, /* Function name */ + sqlite3_value *pVal, /* argv[0] passed to function */ + Fts3Cursor **ppCsr /* OUT: Store cursor handle here */ +){ + int rc; + *ppCsr = (Fts3Cursor*)sqlite3_value_pointer(pVal, "fts3cursor"); + if( (*ppCsr)!=0 ){ + rc = SQLITE_OK; + }else{ + char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc); + sqlite3_result_error(pContext, zErr, -1); + sqlite3_free(zErr); + rc = SQLITE_ERROR; + } + return rc; +} + +/* +** Implementation of the snippet() function for FTS3 +*/ +static void fts3SnippetFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of apVal[] array */ + sqlite3_value **apVal /* Array of arguments */ +){ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ + const char *zStart = ""; + const char *zEnd = ""; + const char *zEllipsis = "..."; + int iCol = -1; + int nToken = 15; /* Default number of tokens in snippet */ + + /* There must be at least one argument passed to this function (otherwise + ** the non-overloaded version would have been called instead of this one). + */ + assert( nVal>=1 ); + + if( nVal>6 ){ + sqlite3_result_error(pContext, + "wrong number of arguments to function snippet()", -1); + return; + } + if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; + + switch( nVal ){ + case 6: nToken = sqlite3_value_int(apVal[5]); + /* no break */ deliberate_fall_through + case 5: iCol = sqlite3_value_int(apVal[4]); + /* no break */ deliberate_fall_through + case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); + /* no break */ deliberate_fall_through + case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); + /* no break */ deliberate_fall_through + case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); + } + if( !zEllipsis || !zEnd || !zStart ){ + sqlite3_result_error_nomem(pContext); + }else if( nToken==0 ){ + sqlite3_result_text(pContext, "", -1, SQLITE_STATIC); + }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ + sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); + } +} + +/* +** Implementation of the offsets() function for FTS3 +*/ +static void fts3OffsetsFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ +){ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ + + UNUSED_PARAMETER(nVal); + + assert( nVal==1 ); + if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return; + assert( pCsr ); + if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ + sqlite3Fts3Offsets(pContext, pCsr); + } +} + +/* +** Implementation of the special optimize() function for FTS3. This +** function merges all segments in the database to a single segment. +** Example usage is: +** +** SELECT optimize(t) FROM t LIMIT 1; +** +** where 't' is the name of an FTS3 table. +*/ +static void fts3OptimizeFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ +){ + int rc; /* Return code */ + Fts3Table *p; /* Virtual table handle */ + Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */ + + UNUSED_PARAMETER(nVal); + + assert( nVal==1 ); + if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return; + p = (Fts3Table *)pCursor->base.pVtab; + assert( p ); + + rc = sqlite3Fts3Optimize(p); + + switch( rc ){ + case SQLITE_OK: + sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); + break; + case SQLITE_DONE: + sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); + break; + default: + sqlite3_result_error_code(pContext, rc); + break; + } +} + +/* +** Implementation of the matchinfo() function for FTS3 +*/ +static void fts3MatchinfoFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ +){ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ + assert( nVal==1 || nVal==2 ); + if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ + const char *zArg = 0; + if( nVal>1 ){ + zArg = (const char *)sqlite3_value_text(apVal[1]); + } + sqlite3Fts3Matchinfo(pContext, pCsr, zArg); + } +} + +/* +** This routine implements the xFindFunction method for the FTS3 +** virtual table. +*/ +static int fts3FindFunctionMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Number of SQL function arguments */ + const char *zName, /* Name of SQL function */ + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ + void **ppArg /* Unused */ +){ + struct Overloaded { + const char *zName; + void (*xFunc)(sqlite3_context*,int,sqlite3_value**); + } aOverload[] = { + { "snippet", fts3SnippetFunc }, + { "offsets", fts3OffsetsFunc }, + { "optimize", fts3OptimizeFunc }, + { "matchinfo", fts3MatchinfoFunc }, + }; + int i; /* Iterator variable */ + + UNUSED_PARAMETER(pVtab); + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(ppArg); + + for(i=0; idb; /* Database connection */ + int rc; /* Return Code */ + + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + rc = fts3SetHasStat(p); + + /* As it happens, the pending terms table is always empty here. This is + ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction + ** always opens a savepoint transaction. And the xSavepoint() method + ** flushes the pending terms table. But leave the (no-op) call to + ** PendingTermsFlush() in in case that changes. + */ + assert( p->nPendingData==0 ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3PendingTermsFlush(p); + } + + if( p->zContentTbl==0 ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", + p->zDb, p->zName, zName + ); + } + + if( p->bHasDocsize ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';", + p->zDb, p->zName, zName + ); + } + if( p->bHasStat ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';", + p->zDb, p->zName, zName + ); + } + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';", + p->zDb, p->zName, zName + ); + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", + p->zDb, p->zName, zName + ); + return rc; +} + +/* +** The xSavepoint() method. +** +** Flush the contents of the pending-terms table to disk. +*/ +static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ + int rc = SQLITE_OK; + UNUSED_PARAMETER(iSavepoint); + assert( ((Fts3Table *)pVtab)->inTransaction ); + assert( ((Fts3Table *)pVtab)->mxSavepoint <= iSavepoint ); + TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); + if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ + rc = fts3SyncMethod(pVtab); + } + return rc; +} + +/* +** The xRelease() method. +** +** This is a no-op. +*/ +static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ + TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); + UNUSED_PARAMETER(iSavepoint); + UNUSED_PARAMETER(pVtab); + assert( p->inTransaction ); + assert( p->mxSavepoint >= iSavepoint ); + TESTONLY( p->mxSavepoint = iSavepoint-1 ); + return SQLITE_OK; +} + +/* +** The xRollbackTo() method. +** +** Discard the contents of the pending terms table. +*/ +static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ + Fts3Table *p = (Fts3Table*)pVtab; + UNUSED_PARAMETER(iSavepoint); + assert( p->inTransaction ); + TESTONLY( p->mxSavepoint = iSavepoint ); + sqlite3Fts3PendingTermsClear(p); + return SQLITE_OK; +} + +/* +** Return true if zName is the extension on one of the shadow tables used +** by this module. +*/ +static int fts3ShadowName(const char *zName){ + static const char *azName[] = { + "content", "docsize", "segdir", "segments", "stat", + }; + unsigned int i; + for(i=0; inRef--; + if( pHash->nRef<=0 ){ + sqlite3Fts3HashClear(&pHash->hash); + sqlite3_free(pHash); + } +} + +/* +** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are +** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c +** respectively. The following three forward declarations are for functions +** declared in these files used to retrieve the respective implementations. +** +** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed +** to by the argument to point to the "simple" tokenizer implementation. +** And so on. +*/ +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); +SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); +#ifndef SQLITE_DISABLE_FTS3_UNICODE +SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); +#endif +#ifdef SQLITE_ENABLE_ICU +SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); +#endif + +/* +** Initialize the fts3 extension. If this extension is built as part +** of the sqlite library, then this function is called directly by +** SQLite. If fts3 is built as a dynamically loadable extension, this +** function is called by the sqlite3_extension_init() entry point. +*/ +SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){ + int rc = SQLITE_OK; + Fts3HashWrapper *pHash = 0; + const sqlite3_tokenizer_module *pSimple = 0; + const sqlite3_tokenizer_module *pPorter = 0; +#ifndef SQLITE_DISABLE_FTS3_UNICODE + const sqlite3_tokenizer_module *pUnicode = 0; +#endif + +#ifdef SQLITE_ENABLE_ICU + const sqlite3_tokenizer_module *pIcu = 0; + sqlite3Fts3IcuTokenizerModule(&pIcu); +#endif + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + sqlite3Fts3UnicodeTokenizer(&pUnicode); +#endif + +#ifdef SQLITE_TEST + rc = sqlite3Fts3InitTerm(db); + if( rc!=SQLITE_OK ) return rc; +#endif + + rc = sqlite3Fts3InitAux(db); + if( rc!=SQLITE_OK ) return rc; + + sqlite3Fts3SimpleTokenizerModule(&pSimple); + sqlite3Fts3PorterTokenizerModule(&pPorter); + + /* Allocate and initialize the hash-table used to store tokenizers. */ + pHash = sqlite3_malloc(sizeof(Fts3HashWrapper)); + if( !pHash ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3Fts3HashInit(&pHash->hash, FTS3_HASH_STRING, 1); + pHash->nRef = 0; + } + + /* Load the built-in tokenizers into the hash table */ + if( rc==SQLITE_OK ){ + if( sqlite3Fts3HashInsert(&pHash->hash, "simple", 7, (void *)pSimple) + || sqlite3Fts3HashInsert(&pHash->hash, "porter", 7, (void *)pPorter) + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + || sqlite3Fts3HashInsert(&pHash->hash, "unicode61", 10, (void *)pUnicode) +#endif +#ifdef SQLITE_ENABLE_ICU + || (pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu)) +#endif + ){ + rc = SQLITE_NOMEM; + } + } + +#ifdef SQLITE_TEST + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3ExprInitTestInterface(db, &pHash->hash); + } +#endif + + /* Create the virtual table wrapper around the hash-table and overload + ** the four scalar functions. If this is successful, register the + ** module with sqlite. + */ + if( SQLITE_OK==rc + && SQLITE_OK==(rc=sqlite3Fts3InitHashTable(db,&pHash->hash,"fts3_tokenizer")) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) + ){ + pHash->nRef++; + rc = sqlite3_create_module_v2( + db, "fts3", &fts3Module, (void *)pHash, hashDestroy + ); + if( rc==SQLITE_OK ){ + pHash->nRef++; + rc = sqlite3_create_module_v2( + db, "fts4", &fts3Module, (void *)pHash, hashDestroy + ); + } + if( rc==SQLITE_OK ){ + pHash->nRef++; + rc = sqlite3Fts3InitTok(db, (void *)pHash, hashDestroy); + } + return rc; + } + + + /* An error has occurred. Delete the hash table and return the error code. */ + assert( rc!=SQLITE_OK ); + if( pHash ){ + sqlite3Fts3HashClear(&pHash->hash); + sqlite3_free(pHash); + } + return rc; +} + +/* +** Allocate an Fts3MultiSegReader for each token in the expression headed +** by pExpr. +** +** An Fts3SegReader object is a cursor that can seek or scan a range of +** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple +** Fts3SegReader objects internally to provide an interface to seek or scan +** within the union of all segments of a b-tree. Hence the name. +** +** If the allocated Fts3MultiSegReader just seeks to a single entry in a +** segment b-tree (if the term is not a prefix or it is a prefix for which +** there exists prefix b-tree of the right length) then it may be traversed +** and merged incrementally. Otherwise, it has to be merged into an in-memory +** doclist and then traversed. +*/ +static void fts3EvalAllocateReaders( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Allocate readers for this expression */ + int *pnToken, /* OUT: Total number of tokens in phrase. */ + int *pnOr, /* OUT: Total number of OR nodes in expr. */ + int *pRc /* IN/OUT: Error code */ +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + *pnToken += nToken; + for(i=0; ipPhrase->aToken[i]; + int rc = fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + } + assert( pExpr->pPhrase->iDoclistToken==0 ); + pExpr->pPhrase->iDoclistToken = -1; + }else{ + *pnOr += (pExpr->eType==FTSQUERY_OR); + fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); + fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); + } + } +} + +/* +** Arguments pList/nList contain the doclist for token iToken of phrase p. +** It is merged into the main doclist stored in p->doclist.aAll/nAll. +** +** This function assumes that pList points to a buffer allocated using +** sqlite3_malloc(). This function takes responsibility for eventually +** freeing the buffer. +** +** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs. +*/ +static int fts3EvalPhraseMergeToken( + Fts3Table *pTab, /* FTS Table pointer */ + Fts3Phrase *p, /* Phrase to merge pList/nList into */ + int iToken, /* Token pList/nList corresponds to */ + char *pList, /* Pointer to doclist */ + int nList /* Number of bytes in pList */ +){ + int rc = SQLITE_OK; + assert( iToken!=p->iDoclistToken ); + + if( pList==0 ){ + sqlite3_free(p->doclist.aAll); + p->doclist.aAll = 0; + p->doclist.nAll = 0; + } + + else if( p->iDoclistToken<0 ){ + p->doclist.aAll = pList; + p->doclist.nAll = nList; + } + + else if( p->doclist.aAll==0 ){ + sqlite3_free(pList); + } + + else { + char *pLeft; + char *pRight; + int nLeft; + int nRight; + int nDiff; + + if( p->iDoclistTokendoclist.aAll; + nLeft = p->doclist.nAll; + pRight = pList; + nRight = nList; + nDiff = iToken - p->iDoclistToken; + }else{ + pRight = p->doclist.aAll; + nRight = p->doclist.nAll; + pLeft = pList; + nLeft = nList; + nDiff = p->iDoclistToken - iToken; + } + + rc = fts3DoclistPhraseMerge( + pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight + ); + sqlite3_free(pLeft); + p->doclist.aAll = pRight; + p->doclist.nAll = nRight; + } + + if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; + return rc; +} + +/* +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist +** does not take deferred tokens into account. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalPhraseLoad( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p /* Phrase object */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int iToken; + int rc = SQLITE_OK; + + for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &p->aToken[iToken]; + assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); + + if( pToken->pSegcsr ){ + int nThis = 0; + char *pThis = 0; + rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); + if( rc==SQLITE_OK ){ + rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); + } + } + assert( pToken->pSegcsr==0 ); + } + + return rc; +} + +#ifndef SQLITE_DISABLE_FTS4_DEFERRED +/* +** This function is called on each phrase after the position lists for +** any deferred tokens have been loaded into memory. It updates the phrases +** current position list to include only those positions that are really +** instances of the phrase (after considering deferred tokens). If this +** means that the phrase does not appear in the current row, doclist.pList +** and doclist.nList are both zeroed. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ + int iToken; /* Used to iterate through phrase tokens */ + char *aPoslist = 0; /* Position list for deferred tokens */ + int nPoslist = 0; /* Number of bytes in aPoslist */ + int iPrev = -1; /* Token number of previous deferred token */ + + assert( pPhrase->doclist.bFreeList==0 ); + + for(iToken=0; iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + Fts3DeferredToken *pDeferred = pToken->pDeferred; + + if( pDeferred ){ + char *pList; + int nList; + int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); + if( rc!=SQLITE_OK ) return rc; + + if( pList==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + + }else if( aPoslist==0 ){ + aPoslist = pList; + nPoslist = nList; + + }else{ + char *aOut = pList; + char *p1 = aPoslist; + char *p2 = aOut; + + assert( iPrev>=0 ); + fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); + sqlite3_free(aPoslist); + aPoslist = pList; + nPoslist = (int)(aOut - aPoslist); + if( nPoslist==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + } + } + iPrev = iToken; + } + } + + if( iPrev>=0 ){ + int nMaxUndeferred = pPhrase->iDoclistToken; + if( nMaxUndeferred<0 ){ + pPhrase->doclist.pList = aPoslist; + pPhrase->doclist.nList = nPoslist; + pPhrase->doclist.iDocid = pCsr->iPrevId; + pPhrase->doclist.bFreeList = 1; + }else{ + int nDistance; + char *p1; + char *p2; + char *aOut; + + if( nMaxUndeferred>iPrev ){ + p1 = aPoslist; + p2 = pPhrase->doclist.pList; + nDistance = nMaxUndeferred - iPrev; + }else{ + p1 = pPhrase->doclist.pList; + p2 = aPoslist; + nDistance = iPrev - nMaxUndeferred; + } + + aOut = (char *)sqlite3_malloc(nPoslist+8); + if( !aOut ){ + sqlite3_free(aPoslist); + return SQLITE_NOMEM; + } + + pPhrase->doclist.pList = aOut; + if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ + pPhrase->doclist.bFreeList = 1; + pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList); + }else{ + sqlite3_free(aOut); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + } + sqlite3_free(aPoslist); + } + } + + return SQLITE_OK; +} +#endif /* SQLITE_DISABLE_FTS4_DEFERRED */ + +/* +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + +/* +** This function is called for each Fts3Phrase in a full-text query +** expression to initialize the mechanism for returning rows. Once this +** function has been called successfully on an Fts3Phrase, it may be +** used with fts3EvalPhraseNext() to iterate through the matching docids. +** +** If parameter bOptOk is true, then the phrase may (or may not) use the +** incremental loading strategy. Otherwise, the entire doclist is loaded into +** memory within this call. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Error code */ + int i; + + /* Determine if doclists may be loaded from disk incrementally. This is + ** possible if the bOptOk argument is true, the FTS doclists will be + ** scanned in forward order, and the phrase consists of + ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" + ** tokens or prefix tokens that cannot use a prefix-index. */ + int bHaveIncr = 0; + int bIncrOk = (bOptOk + && pCsr->bDesc==pTab->bDescIdx + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + && pTab->bNoIncrDoclist==0 +#endif + ); + for(i=0; bIncrOk==1 && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ + bIncrOk = 0; + } + if( pToken->pSegcsr ) bHaveIncr = 1; + } + + if( bIncrOk && bHaveIncr ){ + /* Use the incremental approach. */ + int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); + for(i=0; rc==SQLITE_OK && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; + if( pSegcsr ){ + rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); + } + } + p->bIncr = 1; + }else{ + /* Load the full doclist for the phrase into memory. */ + rc = fts3EvalPhraseLoad(pCsr, p); + p->bIncr = 0; + } + + assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); + return rc; +} + +/* +** This function is used to iterate backwards (from the end to start) +** through doclists. It is used by this module to iterate through phrase +** doclists in reverse and by the fts3_write.c module to iterate through +** pending-terms lists when writing to databases with "order=desc". +** +** The doclist may be sorted in ascending (parameter bDescIdx==0) or +** descending (parameter bDescIdx==1) order of docid. Regardless, this +** function iterates from the end of the doclist to the beginning. +*/ +SQLITE_PRIVATE void sqlite3Fts3DoclistPrev( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + int *pnList, /* OUT: List length pointer */ + u8 *pbEof /* OUT: End-of-file flag */ +){ + char *p = *ppIter; + + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert_fts3_nc( p || *piDocid==0 ); + assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); + + if( p==0 ){ + sqlite3_int64 iDocid = 0; + char *pNext = 0; + char *pDocid = aDoclist; + char *pEnd = &aDoclist[nDoclist]; + int iMul = 1; + + while( pDocid0 ); + assert( *pbEof==0 ); + assert_fts3_nc( p || *piDocid==0 ); + assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) ); + + if( p==0 ){ + p = aDoclist; + p += sqlite3Fts3GetVarint(p, piDocid); + }else{ + fts3PoslistCopy(0, &p); + while( p<&aDoclist[nDoclist] && *p==0 ) p++; + if( p>=&aDoclist[nDoclist] ){ + *pbEof = 1; + }else{ + sqlite3_int64 iVar; + p += sqlite3Fts3GetVarint(p, &iVar); + *piDocid += ((bDescIdx ? -1 : 1) * iVar); + } + } + + *ppIter = p; +} + +/* +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. +*/ +static void fts3EvalDlPhraseNext( + Fts3Table *pTab, + Fts3Doclist *pDL, + u8 *pbEof +){ + char *pIter; /* Used to iterate through aAll */ + char *pEnd; /* 1 byte past end of aAll */ + + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + assert( pDL->aAll!=0 || pIter==0 ); + }else{ + pIter = pDL->aAll; + } + + if( pIter==0 || pIter>=(pEnd = pDL->aAll + pDL->nAll) ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (int)(pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim(). */ + while( pIterpNextDocid = pIter; + assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); + *pbEof = 0; + } +} + +/* +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +*/ +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { + int bIgnore; + sqlite3_int64 iDocid; + char *pList; + int nList; +}; + +/* +** Token pToken is an incrementally loaded token that is part of a +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int incrPhraseTokenNext( + Fts3Table *pTab, /* Virtual table handle */ + Fts3Phrase *pPhrase, /* Phrase to advance token of */ + int iToken, /* Specific token to advance */ + TokenDoclist *p, /* OUT: Docid and doclist for new entry */ + u8 *pbEof /* OUT: True if iterator is at EOF */ +){ + int rc = SQLITE_OK; + + if( pPhrase->iDoclistToken==iToken ){ + assert( p->bIgnore==0 ); + assert( pPhrase->aToken[iToken].pSegcsr==0 ); + fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); + p->pList = pPhrase->doclist.pList; + p->nList = pPhrase->doclist.nList; + p->iDocid = pPhrase->doclist.iDocid; + }else{ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + assert( pToken->pDeferred==0 ); + assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); + if( pToken->pSegcsr ){ + assert( p->bIgnore==0 ); + rc = sqlite3Fts3MsrIncrNext( + pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList + ); + if( p->pList==0 ) *pbEof = 1; + }else{ + p->bIgnore = 1; + } + } + + return rc; +} + + +/* +** The phrase iterator passed as the second argument: +** +** * features at least one token that uses an incremental doclist, and +** +** * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int fts3EvalIncrPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + u8 bEof = 0; + + /* This is only called if it is guaranteed that the phrase has at least + ** one incremental token. In which case the bIncr flag is set. */ + assert( p->bIncr==1 ); + + if( p->nToken==1 ){ + rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, + &pDL->iDocid, &pDL->pList, &pDL->nList + ); + if( pDL->pList==0 ) bEof = 1; + }else{ + int bDescDoclist = pCsr->bDesc; + struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + + memset(a, 0, sizeof(a)); + assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); + assert( p->iDoclistTokennToken && bEof==0; i++){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ + iMax = a[i].iDocid; + bMaxSet = 1; + } + } + assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) ); + assert( rc!=SQLITE_OK || bMaxSet ); + + /* Keep advancing iterators until they all point to the same document */ + for(i=0; inToken; i++){ + while( rc==SQLITE_OK && bEof==0 + && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 + ){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ + iMax = a[i].iDocid; + i = 0; + } + } + } + + /* Check if the current entries really are a phrase match */ + if( bEof==0 ){ + int nList = 0; + int nByte = a[p->nToken-1].nList; + char *aDoclist = sqlite3_malloc(nByte+FTS3_BUFFER_PADDING); + if( !aDoclist ) return SQLITE_NOMEM; + memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + memset(&aDoclist[nByte], 0, FTS3_BUFFER_PADDING); + + for(i=0; i<(p->nToken-1); i++){ + if( a[i].bIgnore==0 ){ + char *pL = a[i].pList; + char *pR = aDoclist; + char *pOut = aDoclist; + int nDist = p->nToken-1-i; + int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); + if( res==0 ) break; + nList = (int)(pOut - aDoclist); + } + } + if( i==(p->nToken-1) ){ + pDL->iDocid = iMax; + pDL->pList = aDoclist; + pDL->nList = nList; + pDL->bFreeList = 1; + break; + } + sqlite3_free(aDoclist); + } + } + } + + *pbEof = bEof; + return rc; +} + +/* +** Attempt to move the phrase iterator to point to the next matching docid. +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +*/ +static int fts3EvalPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + if( p->bIncr ){ + rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); + }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ + sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, + &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof + ); + pDL->pList = pDL->pNextDocid; + }else{ + fts3EvalDlPhraseNext(pTab, pDL, pbEof); + } + + return rc; +} + +/* +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the +** expression. Also the Fts3Expr.bDeferred variable is set to true for any +** expressions for which all descendent tokens are deferred. +** +** If parameter bOptOk is zero, then it is guaranteed that the +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for +** each phrase in the expression (subject to deferred token processing). +** Or, if bOptOk is non-zero, then one or more tokens within the expression +** may be loaded incrementally, meaning doclist.aAll/nAll is not available. +** +** If an error occurs within this function, *pRc is set to an SQLite error +** code before returning. +*/ +static void fts3EvalStartReaders( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expression to initialize phrases in */ + int *pRc /* IN/OUT: Error code */ +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int nToken = pExpr->pPhrase->nToken; + if( nToken ){ + int i; + for(i=0; ipPhrase->aToken[i].pDeferred==0 ) break; + } + pExpr->bDeferred = (i==nToken); + } + *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); + }else{ + fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); + pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); + } + } +} + +/* +** An array of the following structures is assembled as part of the process +** of selecting tokens to defer before the query starts executing (as part +** of the xFilter() method). There is one element in the array for each +** token in the FTS expression. +** +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong +** to phrases that are connected only by AND and NEAR operators (not OR or +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered +** separately. The root of a tokens AND/NEAR cluster is stored in +** Fts3TokenAndCost.pRoot. +*/ +typedef struct Fts3TokenAndCost Fts3TokenAndCost; +struct Fts3TokenAndCost { + Fts3Phrase *pPhrase; /* The phrase the token belongs to */ + int iToken; /* Position of token in phrase */ + Fts3PhraseToken *pToken; /* The token itself */ + Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ + int nOvfl; /* Number of overflow pages to load doclist */ + int iCol; /* The column the token must match */ +}; + +/* +** This function is used to populate an allocated Fts3TokenAndCost array. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if an error occurs during execution, *pRc is set to an +** SQLite error code. +*/ +static void fts3EvalTokenCosts( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ + Fts3Expr *pExpr, /* Expression to consider */ + Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ + Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ + int *pRc /* IN/OUT: Error code */ +){ + if( *pRc==SQLITE_OK ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + for(i=0; *pRc==SQLITE_OK && inToken; i++){ + Fts3TokenAndCost *pTC = (*ppTC)++; + pTC->pPhrase = pPhrase; + pTC->iToken = i; + pTC->pRoot = pRoot; + pTC->pToken = &pPhrase->aToken[i]; + pTC->iCol = pPhrase->iColumn; + *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); + } + }else if( pExpr->eType!=FTSQUERY_NOT ){ + assert( pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_AND + || pExpr->eType==FTSQUERY_NEAR + ); + assert( pExpr->pLeft && pExpr->pRight ); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pLeft; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pRight; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); + } + } +} + +/* +** Determine the average document (row) size in pages. If successful, +** write this value to *pnPage and return SQLITE_OK. Otherwise, return +** an SQLite error code. +** +** The average document size in pages is calculated by first calculating +** determining the average size in bytes, B. If B is less than the amount +** of data that will fit on a single leaf page of an intkey table in +** this database, then the average docsize is 1. Otherwise, it is 1 plus +** the number of overflow pages consumed by a record B bytes in size. +*/ +static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ + int rc = SQLITE_OK; + if( pCsr->nRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + sqlite3_stmt *pStmt; + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *pEnd; + const char *a; + + rc = sqlite3Fts3SelectDoctotal(p, &pStmt); + if( rc!=SQLITE_OK ) return rc; + a = sqlite3_column_blob(pStmt, 0); + testcase( a==0 ); /* If %_stat.value set to X'' */ + if( a ){ + pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc); + while( anDoc = nDoc; + pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); + assert( pCsr->nRowAvg>0 ); + rc = sqlite3_reset(pStmt); + } + + *pnPage = pCsr->nRowAvg; + return rc; +} + +/* +** This function is called to select the tokens (if any) that will be +** deferred. The array aTC[] has already been populated when this is +** called. +** +** This function is called once for each AND/NEAR cluster in the +** expression. Each invocation determines which tokens to defer within +** the cluster with root node pRoot. See comments above the definition +** of struct Fts3TokenAndCost for more details. +** +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() +** called on each token to defer. Otherwise, an SQLite error code is +** returned. +*/ +static int fts3EvalSelectDeferred( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Consider tokens with this root node */ + Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ + int nTC /* Number of entries in aTC[] */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int nDocSize = 0; /* Number of pages per doc loaded */ + int rc = SQLITE_OK; /* Return code */ + int ii; /* Iterator variable for various purposes */ + int nOvfl = 0; /* Total overflow pages used by doclists */ + int nToken = 0; /* Total number of tokens in cluster */ + + int nMinEst = 0; /* The minimum count for any phrase so far. */ + int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ + + /* Tokens are never deferred for FTS tables created using the content=xxx + ** option. The reason being that it is not guaranteed that the content + ** table actually contains the same data as the index. To prevent this from + ** causing any problems, the deferred token optimization is completely + ** disabled for content=xxx tables. */ + if( pTab->zContentTbl ){ + return SQLITE_OK; + } + + /* Count the tokens in this AND/NEAR cluster. If none of the doclists + ** associated with the tokens spill onto overflow pages, or if there is + ** only 1 token, exit early. No tokens to defer in this case. */ + for(ii=0; ii0 ); + + + /* Iterate through all tokens in this AND/NEAR cluster, in ascending order + ** of the number of overflow pages that will be loaded by the pager layer + ** to retrieve the entire doclist for the token from the full-text index. + ** Load the doclists for tokens that are either: + ** + ** a. The cheapest token in the entire query (i.e. the one visited by the + ** first iteration of this loop), or + ** + ** b. Part of a multi-token phrase. + ** + ** After each token doclist is loaded, merge it with the others from the + ** same phrase and count the number of documents that the merged doclist + ** contains. Set variable "nMinEst" to the smallest number of documents in + ** any phrase doclist for which 1 or more token doclists have been loaded. + ** Let nOther be the number of other phrases for which it is certain that + ** one or more tokens will not be deferred. + ** + ** Then, for each token, defer it if loading the doclist would result in + ** loading N or more overflow pages into memory, where N is computed as: + ** + ** (nMinEst + 4^nOther - 1) / (4^nOther) + */ + for(ii=0; iinOvfl) + ){ + pTC = &aTC[iTC]; + } + } + assert( pTC ); + + if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ + /* The number of overflow pages to load for this (and therefore all + ** subsequent) tokens is greater than the estimated number of pages + ** that will be loaded if all subsequent tokens are deferred. + */ + Fts3PhraseToken *pToken = pTC->pToken; + rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); + fts3SegReaderCursorFree(pToken->pSegcsr); + pToken->pSegcsr = 0; + }else{ + /* Set nLoad4 to the value of (4^nOther) for the next iteration of the + ** for-loop. Except, limit the value to 2^24 to prevent it from + ** overflowing the 32-bit integer it is stored in. */ + if( ii<12 ) nLoad4 = nLoad4*4; + + if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ + /* Either this is the cheapest token in the entire query, or it is + ** part of a multi-token phrase. Either way, the entire doclist will + ** (eventually) be loaded into memory. It may as well be now. */ + Fts3PhraseToken *pToken = pTC->pToken; + int nList = 0; + char *pList = 0; + rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); + assert( rc==SQLITE_OK || pList==0 ); + if( rc==SQLITE_OK ){ + rc = fts3EvalPhraseMergeToken( + pTab, pTC->pPhrase, pTC->iToken,pList,nList + ); + } + if( rc==SQLITE_OK ){ + int nCount; + nCount = fts3DoclistCountDocids( + pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll + ); + if( ii==0 || nCountpToken = 0; + } + + return rc; +} + +/* +** This function is called from within the xFilter method. It initializes +** the full-text query currently stored in pCsr->pExpr. To iterate through +** the results of a query, the caller does: +** +** fts3EvalStart(pCsr); +** while( 1 ){ +** fts3EvalNext(pCsr); +** if( pCsr->bEof ) break; +** ... return row pCsr->iPrevId to the caller ... +** } +*/ +static int fts3EvalStart(Fts3Cursor *pCsr){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int nToken = 0; + int nOr = 0; + + /* Allocate a MultiSegReader for each token in the expression. */ + fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); + + /* Determine which, if any, tokens in the expression should be deferred. */ +#ifndef SQLITE_DISABLE_FTS4_DEFERRED + if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){ + Fts3TokenAndCost *aTC; + aTC = (Fts3TokenAndCost *)sqlite3_malloc64( + sizeof(Fts3TokenAndCost) * nToken + + sizeof(Fts3Expr *) * nOr * 2 + ); + + if( !aTC ){ + rc = SQLITE_NOMEM; + }else{ + Fts3Expr **apOr = (Fts3Expr **)&aTC[nToken]; + int ii; + Fts3TokenAndCost *pTC = aTC; + Fts3Expr **ppOr = apOr; + + fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); + nToken = (int)(pTC-aTC); + nOr = (int)(ppOr-apOr); + + if( rc==SQLITE_OK ){ + rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); + for(ii=0; rc==SQLITE_OK && iipExpr, &rc); + return rc; +} + +/* +** Invalidate the current position list for phrase pPhrase. +*/ +static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ + if( pPhrase->doclist.bFreeList ){ + sqlite3_free(pPhrase->doclist.pList); + } + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + pPhrase->doclist.bFreeList = 0; +} + +/* +** This function is called to edit the position list associated with +** the phrase object passed as the fifth argument according to a NEAR +** condition. For example: +** +** abc NEAR/5 "def ghi" +** +** Parameter nNear is passed the NEAR distance of the expression (5 in +** the example above). When this function is called, *paPoslist points to +** the position list, and *pnToken is the number of phrase tokens in the +** phrase on the other side of the NEAR operator to pPhrase. For example, +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to +** the position list associated with phrase "abc". +** +** All positions in the pPhrase position list that are not sufficiently +** close to a position in the *paPoslist position list are removed. If this +** leaves 0 positions, zero is returned. Otherwise, non-zero. +** +** Before returning, *paPoslist is set to point to the position lsit +** associated with pPhrase. And *pnToken is set to the number of tokens in +** pPhrase. +*/ +static int fts3EvalNearTrim( + int nNear, /* NEAR distance. As in "NEAR/nNear". */ + char *aTmp, /* Temporary space to use */ + char **paPoslist, /* IN/OUT: Position list */ + int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ + Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ +){ + int nParam1 = nNear + pPhrase->nToken; + int nParam2 = nNear + *pnToken; + int nNew; + char *p2; + char *pOut; + int res; + + assert( pPhrase->doclist.pList ); + + p2 = pOut = pPhrase->doclist.pList; + res = fts3PoslistNearMerge( + &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 + ); + if( res ){ + nNew = (int)(pOut - pPhrase->doclist.pList) - 1; + assert_fts3_nc( nNew<=pPhrase->doclist.nList && nNew>0 ); + if( nNew>=0 && nNew<=pPhrase->doclist.nList ){ + assert( pPhrase->doclist.pList[nNew]=='\0' ); + memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); + pPhrase->doclist.nList = nNew; + } + *paPoslist = pPhrase->doclist.pList; + *pnToken = pPhrase->nToken; + } + + return res; +} + +/* +** This function is a no-op if *pRc is other than SQLITE_OK when it is called. +** Otherwise, it advances the expression passed as the second argument to +** point to the next matching row in the database. Expressions iterate through +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, +** or descending if it is non-zero. +** +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if +** successful, the following variables in pExpr are set: +** +** Fts3Expr.bEof (non-zero if EOF - there is no next row) +** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) +** +** If the expression is of type FTSQUERY_PHRASE, and the expression is not +** at EOF, then the following variables are populated with the position list +** for the phrase for the visited row: +** +** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) +** FTs3Expr.pPhrase->doclist.pList (pointer to position list) +** +** It says above that this function advances the expression to the next +** matching row. This is usually true, but there are the following exceptions: +** +** 1. Deferred tokens are not taken into account. If a phrase consists +** entirely of deferred tokens, it is assumed to match every row in +** the db. In this case the position-list is not populated at all. +** +** Or, if a phrase contains one or more deferred tokens and one or +** more non-deferred tokens, then the expression is advanced to the +** next possible match, considering only non-deferred tokens. In other +** words, if the phrase is "A B C", and "B" is deferred, the expression +** is advanced to the next row that contains an instance of "A * C", +** where "*" may match any single token. The position list in this case +** is populated as for "A * C" before returning. +** +** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is +** advanced to point to the next row that matches "x AND y". +** +** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is +** really a match, taking into account deferred tokens and NEAR operators. +*/ +static void fts3EvalNextRow( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expr. to advance to next matching row */ + int *pRc /* IN/OUT: Error code */ +){ + if( *pRc==SQLITE_OK ){ + int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ + assert( pExpr->bEof==0 ); + pExpr->bStart = 1; + + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + assert( !pLeft->bDeferred || !pRight->bDeferred ); + + if( pLeft->bDeferred ){ + /* LHS is entirely deferred. So we assume it matches every row. + ** Advance the RHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pRight, pRc); + pExpr->iDocid = pRight->iDocid; + pExpr->bEof = pRight->bEof; + }else if( pRight->bDeferred ){ + /* RHS is entirely deferred. So we assume it matches every row. + ** Advance the LHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + }else{ + /* Neither the RHS or LHS are deferred. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ + sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( iDiff==0 ) break; + if( iDiff<0 ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else{ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = (pLeft->bEof || pRight->bEof); + if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){ + assert( pRight->eType==FTSQUERY_PHRASE ); + if( pRight->pPhrase->doclist.aAll ){ + Fts3Doclist *pDl = &pRight->pPhrase->doclist; + while( *pRc==SQLITE_OK && pRight->bEof==0 ){ + memset(pDl->pList, 0, pDl->nList); + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){ + Fts3Doclist *pDl = &pLeft->pPhrase->doclist; + while( *pRc==SQLITE_OK && pLeft->bEof==0 ){ + memset(pDl->pList, 0, pDl->nList); + fts3EvalNextRow(pCsr, pLeft, pRc); + } + } + pRight->bEof = pLeft->bEof = 1; + } + } + break; + } + + case FTSQUERY_OR: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + + assert_fts3_nc( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); + assert_fts3_nc( pRight->bStart || pLeft->iDocid==pRight->iDocid ); + + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else if( pLeft->bEof || iCmp>0 ){ + fts3EvalNextRow(pCsr, pRight, pRc); + }else{ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + } + + pExpr->bEof = (pLeft->bEof && pRight->bEof); + iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + pExpr->iDocid = pLeft->iDocid; + }else{ + pExpr->iDocid = pRight->iDocid; + } + + break; + } + + case FTSQUERY_NOT: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + + if( pRight->bStart==0 ){ + fts3EvalNextRow(pCsr, pRight, pRc); + assert( *pRc!=SQLITE_OK || pRight->bStart ); + } + + fts3EvalNextRow(pCsr, pLeft, pRc); + if( pLeft->bEof==0 ){ + while( !*pRc + && !pRight->bEof + && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 + ){ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + break; + } + + default: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + fts3EvalInvalidatePoslist(pPhrase); + *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); + pExpr->iDocid = pPhrase->doclist.iDocid; + break; + } + } + } +} + +/* +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR +** cluster, then this function returns 1 immediately. +** +** Otherwise, it checks if the current row really does match the NEAR +** expression, using the data currently stored in the position lists +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. +** +** If the current row is a match, the position list associated with each +** phrase in the NEAR expression is edited in place to contain only those +** phrase instances sufficiently close to their peers to satisfy all NEAR +** constraints. In this case it returns 1. If the NEAR expression does not +** match the current row, 0 is returned. The position lists may or may not +** be edited if 0 is returned. +*/ +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ + int res = 1; + + /* The following block runs if pExpr is the root of a NEAR query. + ** For example, the query: + ** + ** "w" NEAR "x" NEAR "y" NEAR "z" + ** + ** which is represented in tree form as: + ** + ** | + ** +--NEAR--+ <-- root of NEAR query + ** | | + ** +--NEAR--+ "z" + ** | | + ** +--NEAR--+ "y" + ** | | + ** "w" "x" + ** + ** The right-hand child of a NEAR node is always a phrase. The + ** left-hand child may be either a phrase or a NEAR node. There are + ** no exceptions to this - it's the way the parser in fts3_expr.c works. + */ + if( *pRc==SQLITE_OK + && pExpr->eType==FTSQUERY_NEAR + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + sqlite3_int64 nTmp = 0; /* Bytes of temp space */ + char *aTmp; /* Temp space for PoslistNearMerge() */ + + /* Allocate temporary working space. */ + for(p=pExpr; p->pLeft; p=p->pLeft){ + assert( p->pRight->pPhrase->doclist.nList>0 ); + nTmp += p->pRight->pPhrase->doclist.nList; + } + nTmp += p->pPhrase->doclist.nList; + aTmp = sqlite3_malloc64(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; + + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear; + Fts3Phrase *pPhrase; + assert( p->pParent && p->pParent->pLeft==p ); + nNear = p->pParent->nNear; + pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + } + + sqlite3_free(aTmp); + } + + return res; +} + +/* +** This function is a helper function for sqlite3Fts3EvalTestDeferred(). +** Assuming no error occurs or has occurred, It returns non-zero if the +** expression passed as the second argument matches the row that pCsr +** currently points to, or zero if it does not. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** If an error occurs during execution of this function, *pRc is set to +** the appropriate SQLite error code. In this case the returned value is +** undefined. +*/ +static int fts3EvalTestExpr( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ + int *pRc /* IN/OUT: Error code */ +){ + int bHit = 1; /* Return value */ + if( *pRc==SQLITE_OK ){ + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: + bHit = ( + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) + && fts3EvalNearTest(pExpr, pRc) + ); + + /* If the NEAR expression does not match any rows, zero the doclist for + ** all phrases involved in the NEAR. This is because the snippet(), + ** offsets() and matchinfo() functions are not supposed to recognize + ** any instances of phrases that are part of unmatched NEAR queries. + ** For example if this expression: + ** + ** ... MATCH 'a OR (b NEAR c)' + ** + ** is matched against a row containing: + ** + ** 'a b d e' + ** + ** then any snippet() should ony highlight the "a" term, not the "b" + ** (as "b" is part of a non-matching NEAR clause). + */ + if( bHit==0 + && pExpr->eType==FTSQUERY_NEAR + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + for(p=pExpr; p->pPhrase==0; p=p->pLeft){ + if( p->pRight->iDocid==pCsr->iPrevId ){ + fts3EvalInvalidatePoslist(p->pRight->pPhrase); + } + } + if( p->iDocid==pCsr->iPrevId ){ + fts3EvalInvalidatePoslist(p->pPhrase); + } + } + + break; + + case FTSQUERY_OR: { + int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); + bHit = bHit1 || bHit2; + break; + } + + case FTSQUERY_NOT: + bHit = ( + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) + ); + break; + + default: { +#ifndef SQLITE_DISABLE_FTS4_DEFERRED + if( pCsr->pDeferred + && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) + ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); + if( pExpr->bDeferred ){ + fts3EvalInvalidatePoslist(pPhrase); + } + *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); + bHit = (pPhrase->doclist.pList!=0); + pExpr->iDocid = pCsr->iPrevId; + }else +#endif + { + bHit = ( + pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId + && pExpr->pPhrase->doclist.nList>0 + ); + } + break; + } + } + } + return bHit; +} + +/* +** This function is called as the second part of each xNext operation when +** iterating through the results of a full-text query. At this point the +** cursor points to a row that matches the query expression, with the +** following caveats: +** +** * Up until this point, "NEAR" operators in the expression have been +** treated as "AND". +** +** * Deferred tokens have not yet been considered. +** +** If *pRc is not SQLITE_OK when this function is called, it immediately +** returns 0. Otherwise, it tests whether or not after considering NEAR +** operators and deferred tokens the current row is still a match for the +** expression. It returns 1 if both of the following are true: +** +** 1. *pRc is SQLITE_OK when this function returns, and +** +** 2. After scanning the current FTS table row for the deferred tokens, +** it is determined that the row does *not* match the query. +** +** Or, if no error occurs and it seems the current row does match the FTS +** query, return 0. +*/ +SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){ + int rc = *pRc; + int bMiss = 0; + if( rc==SQLITE_OK ){ + + /* If there are one or more deferred tokens, load the current row into + ** memory and scan it to determine the position list for each deferred + ** token. Then, see if this row is really a match, considering deferred + ** tokens and NEAR operators (neither of which were taken into account + ** earlier, by fts3EvalNextRow()). + */ + if( pCsr->pDeferred ){ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + } + bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); + + /* Free the position-lists accumulated for each deferred token above. */ + sqlite3Fts3FreeDeferredDoclists(pCsr); + *pRc = rc; + } + return (rc==SQLITE_OK && bMiss); +} + +/* +** Advance to the next document that matches the FTS expression in +** Fts3Cursor.pExpr. +*/ +static int fts3EvalNext(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Expr *pExpr = pCsr->pExpr; + assert( pCsr->isEof==0 ); + if( pExpr==0 ){ + pCsr->isEof = 1; + }else{ + do { + if( pCsr->isRequireSeek==0 ){ + sqlite3_reset(pCsr->pStmt); + } + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + fts3EvalNextRow(pCsr, pExpr, &rc); + pCsr->isEof = pExpr->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pExpr->iDocid; + }while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); + } + + /* Check if the cursor is past the end of the docid range specified + ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ + if( rc==SQLITE_OK && ( + (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) + || (pCsr->bDesc!=0 && pCsr->iPrevIdiMinDocid) + )){ + pCsr->isEof = 1; + } + + return rc; +} + +/* +** Restart interation for expression pExpr so that the next call to +** fts3EvalNext() visits the first row. Do not allow incremental +** loading or merging of phrase doclists for this iteration. +** +** If *pRc is other than SQLITE_OK when this function is called, it is +** a no-op. If an error occurs within this function, *pRc is set to an +** SQLite error code before returning. +*/ +static void fts3EvalRestart( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc +){ + if( pExpr && *pRc==SQLITE_OK ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + + if( pPhrase ){ + fts3EvalInvalidatePoslist(pPhrase); + if( pPhrase->bIncr ){ + int i; + for(i=0; inToken; i++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[i]; + assert( pToken->pDeferred==0 ); + if( pToken->pSegcsr ){ + sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); + } + } + *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); + } + pPhrase->doclist.pNextDocid = 0; + pPhrase->doclist.iDocid = 0; + pPhrase->pOrPoslist = 0; + } + + pExpr->iDocid = 0; + pExpr->bEof = 0; + pExpr->bStart = 0; + + fts3EvalRestart(pCsr, pExpr->pLeft, pRc); + fts3EvalRestart(pCsr, pExpr->pRight, pRc); + } +} + +/* +** After allocating the Fts3Expr.aMI[] array for each phrase in the +** expression rooted at pExpr, the cursor iterates through all rows matched +** by pExpr, calling this function for each row. This function increments +** the values in Fts3Expr.aMI[] according to the position-list currently +** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase +** expression nodes. +*/ +static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase && pPhrase->doclist.pList ){ + int iCol = 0; + char *p = pPhrase->doclist.pList; + + do{ + u8 c = 0; + int iCnt = 0; + while( 0xFE & (*p | c) ){ + if( (c&0x80)==0 ) iCnt++; + c = *p++ & 0x80; + } + + /* aMI[iCol*3 + 1] = Number of occurrences + ** aMI[iCol*3 + 2] = Number of rows containing at least one instance + */ + pExpr->aMI[iCol*3 + 1] += iCnt; + pExpr->aMI[iCol*3 + 2] += (iCnt>0); + if( *p==0x00 ) break; + p++; + p += fts3GetVarint32(p, &iCol); + }while( iColpLeft, nCol); + fts3EvalUpdateCounts(pExpr->pRight, nCol); + } +} + +/* +** Expression pExpr must be of type FTSQUERY_PHRASE. +** +** If it is not already allocated and populated, this function allocates and +** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part +** of a NEAR expression, then it also allocates and populates the same array +** for all other phrases that are part of the NEAR expression. +** +** SQLITE_OK is returned if the aMI[] array is successfully allocated and +** populated. Otherwise, if an error occurs, an SQLite error code is returned. +*/ +static int fts3EvalGatherStats( + Fts3Cursor *pCsr, /* Cursor object */ + Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( pExpr->eType==FTSQUERY_PHRASE ); + if( pExpr->aMI==0 ){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + Fts3Expr *pRoot; /* Root of NEAR expression */ + Fts3Expr *p; /* Iterator used for several purposes */ + + sqlite3_int64 iPrevId = pCsr->iPrevId; + sqlite3_int64 iDocid; + u8 bEof; + + /* Find the root of the NEAR expression */ + pRoot = pExpr; + while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ + pRoot = pRoot->pParent; + } + iDocid = pRoot->iDocid; + bEof = pRoot->bEof; + assert( pRoot->bStart ); + + /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ + for(p=pRoot; p; p=p->pLeft){ + Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); + assert( pE->aMI==0 ); + pE->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32)); + if( !pE->aMI ) return SQLITE_NOMEM; + memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); + } + + fts3EvalRestart(pCsr, pRoot, &rc); + + while( pCsr->isEof==0 && rc==SQLITE_OK ){ + + do { + /* Ensure the %_content statement is reset. */ + if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + + /* Advance to the next document */ + fts3EvalNextRow(pCsr, pRoot, &rc); + pCsr->isEof = pRoot->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pRoot->iDocid; + }while( pCsr->isEof==0 + && pRoot->eType==FTSQUERY_NEAR + && sqlite3Fts3EvalTestDeferred(pCsr, &rc) + ); + + if( rc==SQLITE_OK && pCsr->isEof==0 ){ + fts3EvalUpdateCounts(pRoot, pTab->nColumn); + } + } + + pCsr->isEof = 0; + pCsr->iPrevId = iPrevId; + + if( bEof ){ + pRoot->bEof = bEof; + }else{ + /* Caution: pRoot may iterate through docids in ascending or descending + ** order. For this reason, even though it seems more defensive, the + ** do loop can not be written: + ** + ** do {...} while( pRoot->iDocidbEof==0 ); + if( pRoot->bEof ) rc = FTS_CORRUPT_VTAB; + }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); + } + } + return rc; +} + +/* +** This function is used by the matchinfo() module to query a phrase +** expression node for the following information: +** +** 1. The total number of occurrences of the phrase in each column of +** the FTS table (considering all rows), and +** +** 2. For each column, the number of rows in the table for which the +** column contains at least one instance of the phrase. +** +** If no error occurs, SQLITE_OK is returned and the values for each column +** written into the array aiOut as follows: +** +** aiOut[iCol*3 + 1] = Number of occurrences +** aiOut[iCol*3 + 2] = Number of rows containing at least one instance +** +** Caveats: +** +** * If a phrase consists entirely of deferred tokens, then all output +** values are set to the number of documents in the table. In other +** words we assume that very common tokens occur exactly once in each +** column of each row of the table. +** +** * If a phrase contains some deferred tokens (and some non-deferred +** tokens), count the potential occurrence identified by considering +** the non-deferred tokens instead of actual phrase occurrences. +** +** * If the phrase is part of a NEAR expression, then only phrase instances +** that meet the NEAR constraint are included in the counts. +*/ +SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Phrase expression */ + u32 *aiOut /* Array to write results into (see above) */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int iCol; + + if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ + assert( pCsr->nDoc>0 ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = (u32)pCsr->nDoc; + aiOut[iCol*3 + 2] = (u32)pCsr->nDoc; + } + }else{ + rc = fts3EvalGatherStats(pCsr, pExpr); + if( rc==SQLITE_OK ){ + assert( pExpr->aMI ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; + aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; + } + } + } + + return rc; +} + +/* +** The expression pExpr passed as the second argument to this function +** must be of type FTSQUERY_PHRASE. +** +** The returned value is either NULL or a pointer to a buffer containing +** a position-list indicating the occurrences of the phrase in column iCol +** of the current row. +** +** More specifically, the returned buffer contains 1 varint for each +** occurrence of the phrase in the column, stored using the normal (delta+2) +** compression and is terminated by either an 0x01 or 0x00 byte. For example, +** if the requested column contains "a b X c d X X" and the position-list +** for 'X' is requested, the buffer returned may contain: +** +** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 +** +** This function works regardless of whether or not the phrase is deferred, +** incremental, or neither. +*/ +SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + Fts3Expr *pExpr, /* Phrase to return doclist for */ + int iCol, /* Column to return position list for */ + char **ppOut /* OUT: Pointer to position list */ +){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + char *pIter; + int iThis; + sqlite3_int64 iDocid; + + /* If this phrase is applies specifically to some column other than + ** column iCol, return a NULL pointer. */ + *ppOut = 0; + assert( iCol>=0 && iColnColumn ); + if( (pPhrase->iColumnnColumn && pPhrase->iColumn!=iCol) ){ + return SQLITE_OK; + } + + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; + if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ + int rc = SQLITE_OK; + int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ + int bOr = 0; + u8 bTreeEof = 0; + Fts3Expr *p; /* Used to iterate from pExpr to root */ + Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ + int bMatch; + + /* Check if this phrase descends from an OR expression node. If not, + ** return NULL. Otherwise, the entry that corresponds to docid + ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the + ** tree that the node is part of has been marked as EOF, but the node + ** itself is not EOF, then it may point to an earlier entry. */ + pNear = pExpr; + for(p=pExpr->pParent; p; p=p->pParent){ + if( p->eType==FTSQUERY_OR ) bOr = 1; + if( p->eType==FTSQUERY_NEAR ) pNear = p; + if( p->bEof ) bTreeEof = 1; + } + if( bOr==0 ) return SQLITE_OK; + + /* This is the descendent of an OR node. In this case we cannot use + ** an incremental phrase. Load the entire doclist for the phrase + ** into memory in this case. */ + if( pPhrase->bIncr ){ + int bEofSave = pNear->bEof; + fts3EvalRestart(pCsr, pNear, &rc); + while( rc==SQLITE_OK && !pNear->bEof ){ + fts3EvalNextRow(pCsr, pNear, &rc); + if( bEofSave==0 && pNear->iDocid==iDocid ) break; + } + assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); + if( rc==SQLITE_OK && pNear->bEof!=bEofSave ){ + rc = FTS_CORRUPT_VTAB; + } + } + if( bTreeEof ){ + while( rc==SQLITE_OK && !pNear->bEof ){ + fts3EvalNextRow(pCsr, pNear, &rc); + } + } + if( rc!=SQLITE_OK ) return rc; + + bMatch = 1; + for(p=pNear; p; p=p->pLeft){ + u8 bEof = 0; + Fts3Expr *pTest = p; + Fts3Phrase *pPh; + assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE ); + if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight; + assert( pTest->eType==FTSQUERY_PHRASE ); + pPh = pTest->pPhrase; + + pIter = pPh->pOrPoslist; + iDocid = pPh->iOrDocid; + if( pCsr->bDesc==bDescDoclist ){ + bEof = !pPh->doclist.nAll || + (pIter >= (pPh->doclist.aAll + pPh->doclist.nAll)); + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } + }else{ + bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll); + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + int dummy; + sqlite3Fts3DoclistPrev( + bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } + } + pPh->pOrPoslist = pIter; + pPh->iOrDocid = iDocid; + if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0; + } + + if( bMatch ){ + pIter = pPhrase->pOrPoslist; + }else{ + pIter = 0; + } + } + if( pIter==0 ) return SQLITE_OK; + + if( *pIter==0x01 ){ + pIter++; + pIter += fts3GetVarint32(pIter, &iThis); + }else{ + iThis = 0; + } + while( iThisdoclist, and +** * any Fts3MultiSegReader objects held by phrase tokens. +*/ +SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ + if( pPhrase ){ + int i; + sqlite3_free(pPhrase->doclist.aAll); + fts3EvalInvalidatePoslist(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + for(i=0; inToken; i++){ + fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); + pPhrase->aToken[i].pSegcsr = 0; + } + } +} + + +/* +** Return SQLITE_CORRUPT_VTAB. +*/ +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ + return SQLITE_CORRUPT_VTAB; +} +#endif + +#if !SQLITE_CORE +/* +** Initialize API pointer table, if required. +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int sqlite3_fts3_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3Fts3Init(db); +} +#endif + +#endif + +/************** End of fts3.c ************************************************/ +/************** Begin file fts3_aux.c ****************************************/ +/* +** 2011 Jan 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +typedef struct Fts3auxTable Fts3auxTable; +typedef struct Fts3auxCursor Fts3auxCursor; + +struct Fts3auxTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + Fts3Table *pFts3Tab; +}; + +struct Fts3auxCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + Fts3MultiSegReader csr; /* Must be right after "base" */ + Fts3SegFilter filter; + char *zStop; + int nStop; /* Byte-length of string zStop */ + int iLangid; /* Language id to query */ + int isEof; /* True if cursor is at EOF */ + sqlite3_int64 iRowid; /* Current rowid */ + + int iCol; /* Current value of 'col' column */ + int nStat; /* Size of aStat[] array */ + struct Fts3auxColstats { + sqlite3_int64 nDoc; /* 'documents' values for current csr row */ + sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */ + } *aStat; +}; + +/* +** Schema of the terms table. +*/ +#define FTS3_AUX_SCHEMA \ + "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)" + +/* +** This function does all the work for both the xConnect and xCreate methods. +** These tables have no persistent representation of their own, so xConnect +** and xCreate are identical operations. +*/ +static int fts3auxConnectMethod( + sqlite3 *db, /* Database connection */ + void *pUnused, /* Unused */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + char const *zDb; /* Name of database (e.g. "main") */ + char const *zFts3; /* Name of fts3 table */ + int nDb; /* Result of strlen(zDb) */ + int nFts3; /* Result of strlen(zFts3) */ + sqlite3_int64 nByte; /* Bytes of space to allocate here */ + int rc; /* value returned by declare_vtab() */ + Fts3auxTable *p; /* Virtual table object to return */ + + UNUSED_PARAMETER(pUnused); + + /* The user should invoke this in one of two forms: + ** + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); + */ + if( argc!=4 && argc!=5 ) goto bad_args; + + zDb = argv[1]; + nDb = (int)strlen(zDb); + if( argc==5 ){ + if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ + zDb = argv[3]; + nDb = (int)strlen(zDb); + zFts3 = argv[4]; + }else{ + goto bad_args; + } + }else{ + zFts3 = argv[3]; + } + nFts3 = (int)strlen(zFts3); + + rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); + if( rc!=SQLITE_OK ) return rc; + + nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; + p = (Fts3auxTable *)sqlite3_malloc64(nByte); + if( !p ) return SQLITE_NOMEM; + memset(p, 0, nByte); + + p->pFts3Tab = (Fts3Table *)&p[1]; + p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; + p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; + p->pFts3Tab->db = db; + p->pFts3Tab->nIndex = 1; + + memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); + memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); + sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); + + *ppVtab = (sqlite3_vtab *)p; + return SQLITE_OK; + + bad_args: + sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor"); + return SQLITE_ERROR; +} + +/* +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. +*/ +static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3auxTable *p = (Fts3auxTable *)pVtab; + Fts3Table *pFts3 = p->pFts3Tab; + int i; + + /* Free any prepared statements held */ + for(i=0; iaStmt); i++){ + sqlite3_finalize(pFts3->aStmt[i]); + } + sqlite3_free(pFts3->zSegmentsTbl); + sqlite3_free(p); + return SQLITE_OK; +} + +#define FTS4AUX_EQ_CONSTRAINT 1 +#define FTS4AUX_GE_CONSTRAINT 2 +#define FTS4AUX_LE_CONSTRAINT 4 + +/* +** xBestIndex - Analyze a WHERE and ORDER BY clause. +*/ +static int fts3auxBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + int i; + int iEq = -1; + int iGe = -1; + int iLe = -1; + int iLangid = -1; + int iNext = 1; /* Next free argvIndex value */ + + UNUSED_PARAMETER(pVTab); + + /* This vtab delivers always results in "ORDER BY term ASC" order. */ + if( pInfo->nOrderBy==1 + && pInfo->aOrderBy[0].iColumn==0 + && pInfo->aOrderBy[0].desc==0 + ){ + pInfo->orderByConsumed = 1; + } + + /* Search for equality and range constraints on the "term" column. + ** And equality constraints on the hidden "languageid" column. */ + for(i=0; inConstraint; i++){ + if( pInfo->aConstraint[i].usable ){ + int op = pInfo->aConstraint[i].op; + int iCol = pInfo->aConstraint[i].iColumn; + + if( iCol==0 ){ + if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; + if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; + if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; + if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; + if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; + } + if( iCol==4 ){ + if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; + } + } + } + + if( iEq>=0 ){ + pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; + pInfo->aConstraintUsage[iEq].argvIndex = iNext++; + pInfo->estimatedCost = 5; + }else{ + pInfo->idxNum = 0; + pInfo->estimatedCost = 20000; + if( iGe>=0 ){ + pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; + pInfo->aConstraintUsage[iGe].argvIndex = iNext++; + pInfo->estimatedCost /= 2; + } + if( iLe>=0 ){ + pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; + pInfo->aConstraintUsage[iLe].argvIndex = iNext++; + pInfo->estimatedCost /= 2; + } + } + if( iLangid>=0 ){ + pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; + pInfo->estimatedCost--; + } + + return SQLITE_OK; +} + +/* +** xOpen - Open a cursor. +*/ +static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3auxCursor *pCsr; /* Pointer to cursor object to return */ + + UNUSED_PARAMETER(pVTab); + + pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor)); + if( !pCsr ) return SQLITE_NOMEM; + memset(pCsr, 0, sizeof(Fts3auxCursor)); + + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** xClose - Close a cursor. +*/ +static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + + sqlite3Fts3SegmentsClose(pFts3); + sqlite3Fts3SegReaderFinish(&pCsr->csr); + sqlite3_free((void *)pCsr->filter.zTerm); + sqlite3_free(pCsr->zStop); + sqlite3_free(pCsr->aStat); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){ + if( nSize>pCsr->nStat ){ + struct Fts3auxColstats *aNew; + aNew = (struct Fts3auxColstats *)sqlite3_realloc64(pCsr->aStat, + sizeof(struct Fts3auxColstats) * nSize + ); + if( aNew==0 ) return SQLITE_NOMEM; + memset(&aNew[pCsr->nStat], 0, + sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat) + ); + pCsr->aStat = aNew; + pCsr->nStat = nSize; + } + return SQLITE_OK; +} + +/* +** xNext - Advance the cursor to the next row, if any. +*/ +static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; + int rc; + + /* Increment our pretend rowid value. */ + pCsr->iRowid++; + + for(pCsr->iCol++; pCsr->iColnStat; pCsr->iCol++){ + if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK; + } + + rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr); + if( rc==SQLITE_ROW ){ + int i = 0; + int nDoclist = pCsr->csr.nDoclist; + char *aDoclist = pCsr->csr.aDoclist; + int iCol; + + int eState = 0; + + if( pCsr->zStop ){ + int n = (pCsr->nStopcsr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm; + int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n); + if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){ + pCsr->isEof = 1; + return SQLITE_OK; + } + } + + if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM; + memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat); + iCol = 0; + rc = SQLITE_OK; + + while( iaStat[0].nDoc++; + eState = 1; + iCol = 0; + break; + + /* State 1. In this state we are expecting either a 1, indicating + ** that the following integer will be a column number, or the + ** start of a position list for column 0. + ** + ** The only difference between state 1 and state 2 is that if the + ** integer encountered in state 1 is not 0 or 1, then we need to + ** increment the column 0 "nDoc" count for this term. + */ + case 1: + assert( iCol==0 ); + if( v>1 ){ + pCsr->aStat[1].nDoc++; + } + eState = 2; + /* fall through */ + + case 2: + if( v==0 ){ /* 0x00. Next integer will be a docid. */ + eState = 0; + }else if( v==1 ){ /* 0x01. Next integer will be a column number. */ + eState = 3; + }else{ /* 2 or greater. A position. */ + pCsr->aStat[iCol+1].nOcc++; + pCsr->aStat[0].nOcc++; + } + break; + + /* State 3. The integer just read is a column number. */ + default: assert( eState==3 ); + iCol = (int)v; + if( iCol<1 ){ + rc = SQLITE_CORRUPT_VTAB; + break; + } + if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM; + pCsr->aStat[iCol+1].nDoc++; + eState = 2; + break; + } + } + + pCsr->iCol = 0; + }else{ + pCsr->isEof = 1; + } + return rc; +} + +/* +** xFilter - Initialize a cursor to point at the start of its data. +*/ +static int fts3auxFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; + int rc; + int isScan = 0; + int iLangVal = 0; /* Language id to query */ + + int iEq = -1; /* Index of term=? value in apVal */ + int iGe = -1; /* Index of term>=? value in apVal */ + int iLe = -1; /* Index of term<=? value in apVal */ + int iLangid = -1; /* Index of languageid=? value in apVal */ + int iNext = 0; + + UNUSED_PARAMETER(nVal); + UNUSED_PARAMETER(idxStr); + + assert( idxStr==0 ); + assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0 + || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT + || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) + ); + + if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ + iEq = iNext++; + }else{ + isScan = 1; + if( idxNum & FTS4AUX_GE_CONSTRAINT ){ + iGe = iNext++; + } + if( idxNum & FTS4AUX_LE_CONSTRAINT ){ + iLe = iNext++; + } + } + if( iNextfilter.zTerm); + sqlite3Fts3SegReaderFinish(&pCsr->csr); + sqlite3_free((void *)pCsr->filter.zTerm); + sqlite3_free(pCsr->aStat); + sqlite3_free(pCsr->zStop); + memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); + + pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; + if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; + + if( iEq>=0 || iGe>=0 ){ + const unsigned char *zStr = sqlite3_value_text(apVal[0]); + assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); + if( zStr ){ + pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); + if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; + pCsr->filter.nTerm = (int)strlen(pCsr->filter.zTerm); + } + } + + if( iLe>=0 ){ + pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); + if( pCsr->zStop==0 ) return SQLITE_NOMEM; + pCsr->nStop = (int)strlen(pCsr->zStop); + } + + if( iLangid>=0 ){ + iLangVal = sqlite3_value_int(apVal[iLangid]); + + /* If the user specified a negative value for the languageid, use zero + ** instead. This works, as the "languageid=?" constraint will also + ** be tested by the VDBE layer. The test will always be false (since + ** this module will not return a row with a negative languageid), and + ** so the overall query will return zero rows. */ + if( iLangVal<0 ) iLangVal = 0; + } + pCsr->iLangid = iLangVal; + + rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL, + pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); + } + + if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor); + return rc; +} + +/* +** xEof - Return true if the cursor is at EOF, or false otherwise. +*/ +static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + return pCsr->isEof; +} + +/* +** xColumn - Return a column value. +*/ +static int fts3auxColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts3auxCursor *p = (Fts3auxCursor *)pCursor; + + assert( p->isEof==0 ); + switch( iCol ){ + case 0: /* term */ + sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); + break; + + case 1: /* col */ + if( p->iCol ){ + sqlite3_result_int(pCtx, p->iCol-1); + }else{ + sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); + } + break; + + case 2: /* documents */ + sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc); + break; + + case 3: /* occurrences */ + sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); + break; + + default: /* languageid */ + assert( iCol==4 ); + sqlite3_result_int(pCtx, p->iLangid); + break; + } + + return SQLITE_OK; +} + +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3auxRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ +){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + *pRowid = pCsr->iRowid; + return SQLITE_OK; +} + +/* +** Register the fts3aux module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ + static const sqlite3_module fts3aux_module = { + 0, /* iVersion */ + fts3auxConnectMethod, /* xCreate */ + fts3auxConnectMethod, /* xConnect */ + fts3auxBestIndexMethod, /* xBestIndex */ + fts3auxDisconnectMethod, /* xDisconnect */ + fts3auxDisconnectMethod, /* xDestroy */ + fts3auxOpenMethod, /* xOpen */ + fts3auxCloseMethod, /* xClose */ + fts3auxFilterMethod, /* xFilter */ + fts3auxNextMethod, /* xNext */ + fts3auxEofMethod, /* xEof */ + fts3auxColumnMethod, /* xColumn */ + fts3auxRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + 0 /* xShadowName */ + }; + int rc; /* Return code */ + + rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0); + return rc; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_aux.c ********************************************/ +/************** Begin file fts3_expr.c ***************************************/ +/* +** 2008 Nov 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This module contains code that implements a parser for fts3 query strings +** (the right-hand argument to the MATCH operator). Because the supported +** syntax is relatively simple, the whole tokenizer/parser system is +** hand-coded. +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* +** By default, this module parses the legacy syntax that has been +** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS +** is defined, then it uses the new syntax. The differences between +** the new and the old syntaxes are: +** +** a) The new syntax supports parenthesis. The old does not. +** +** b) The new syntax supports the AND and NOT operators. The old does not. +** +** c) The old syntax supports the "-" token qualifier. This is not +** supported by the new syntax (it is replaced by the NOT operator). +** +** d) When using the old syntax, the OR operator has a greater precedence +** than an implicit AND. When using the new, both implicity and explicit +** AND operators have a higher precedence than OR. +** +** If compiled with SQLITE_TEST defined, then this module exports the +** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable +** to zero causes the module to use the old syntax. If it is set to +** non-zero the new syntax is activated. This is so both syntaxes can +** be tested using a single build of testfixture. +** +** The following describes the syntax supported by the fts3 MATCH +** operator in a similar format to that used by the lemon parser +** generator. This module does not use actually lemon, it uses a +** custom parser. +** +** query ::= andexpr (OR andexpr)*. +** +** andexpr ::= notexpr (AND? notexpr)*. +** +** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. +** notexpr ::= LP query RP. +** +** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. +** +** distance_opt ::= . +** distance_opt ::= / INTEGER. +** +** phrase ::= TOKEN. +** phrase ::= COLUMN:TOKEN. +** phrase ::= "TOKEN TOKEN TOKEN...". +*/ + +#ifdef SQLITE_TEST +SQLITE_API int sqlite3_fts3_enable_parentheses = 0; +#else +# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS +# define sqlite3_fts3_enable_parentheses 1 +# else +# define sqlite3_fts3_enable_parentheses 0 +# endif +#endif + +/* +** Default span for NEAR operators. +*/ +#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 + +/* #include */ +/* #include */ + +/* +** isNot: +** This variable is used by function getNextNode(). When getNextNode() is +** called, it sets ParseContext.isNot to true if the 'next node' is a +** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the +** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to +** zero. +*/ +typedef struct ParseContext ParseContext; +struct ParseContext { + sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ + int iLangid; /* Language id used with tokenizer */ + const char **azCol; /* Array of column names for fts3 table */ + int bFts4; /* True to allow FTS4-only syntax */ + int nCol; /* Number of entries in azCol[] */ + int iDefaultCol; /* Default column to query */ + int isNot; /* True if getNextNode() sees a unary - */ + sqlite3_context *pCtx; /* Write error message here */ + int nNest; /* Number of nested brackets */ +}; + +/* +** This function is equivalent to the standard isspace() function. +** +** The standard isspace() can be awkward to use safely, because although it +** is defined to accept an argument of type int, its behavior when passed +** an integer that falls outside of the range of the unsigned char type +** is undefined (and sometimes, "undefined" means segfault). This wrapper +** is defined to accept an argument of type char, and always returns 0 for +** any values that fall outside of the range of the unsigned char type (i.e. +** negative values). +*/ +static int fts3isspace(char c){ + return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; +} + +/* +** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, +** zero the memory before returning a pointer to it. If unsuccessful, +** return NULL. +*/ +SQLITE_PRIVATE void *sqlite3Fts3MallocZero(sqlite3_int64 nByte){ + void *pRet = sqlite3_malloc64(nByte); + if( pRet ) memset(pRet, 0, nByte); + return pRet; +} + +SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer( + sqlite3_tokenizer *pTokenizer, + int iLangid, + const char *z, + int n, + sqlite3_tokenizer_cursor **ppCsr +){ + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr = 0; + int rc; + + rc = pModule->xOpen(pTokenizer, z, n, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( rc==SQLITE_OK ){ + pCsr->pTokenizer = pTokenizer; + if( pModule->iVersion>=1 ){ + rc = pModule->xLanguageid(pCsr, iLangid); + if( rc!=SQLITE_OK ){ + pModule->xClose(pCsr); + pCsr = 0; + } + } + } + *ppCsr = pCsr; + return rc; +} + +/* +** Function getNextNode(), which is called by fts3ExprParse(), may itself +** call fts3ExprParse(). So this forward declaration is required. +*/ +static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); + +/* +** Extract the next token from buffer z (length n) using the tokenizer +** and other information (column names etc.) in pParse. Create an Fts3Expr +** structure of type FTSQUERY_PHRASE containing a phrase consisting of this +** single token and set *ppExpr to point to it. If the end of the buffer is +** reached before a token is found, set *ppExpr to zero. It is the +** responsibility of the caller to eventually deallocate the allocated +** Fts3Expr structure (if any) by passing it to sqlite3_free(). +** +** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation +** fails. +*/ +static int getNextToken( + ParseContext *pParse, /* fts3 query parse context */ + int iCol, /* Value for Fts3Phrase.iColumn */ + const char *z, int n, /* Input string */ + Fts3Expr **ppExpr, /* OUT: expression */ + int *pnConsumed /* OUT: Number of bytes consumed */ +){ + sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + int rc; + sqlite3_tokenizer_cursor *pCursor; + Fts3Expr *pRet = 0; + int i = 0; + + /* Set variable i to the maximum number of bytes of input to tokenize. */ + for(i=0; iiLangid, z, i, &pCursor); + if( rc==SQLITE_OK ){ + const char *zToken; + int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; + sqlite3_int64 nByte; /* total space to allocate */ + + rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); + if( rc==SQLITE_OK ){ + nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; + pRet = (Fts3Expr *)sqlite3Fts3MallocZero(nByte); + if( !pRet ){ + rc = SQLITE_NOMEM; + }else{ + pRet->eType = FTSQUERY_PHRASE; + pRet->pPhrase = (Fts3Phrase *)&pRet[1]; + pRet->pPhrase->nToken = 1; + pRet->pPhrase->iColumn = iCol; + pRet->pPhrase->aToken[0].n = nToken; + pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; + memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); + + if( iEndpPhrase->aToken[0].isPrefix = 1; + iEnd++; + } + + while( 1 ){ + if( !sqlite3_fts3_enable_parentheses + && iStart>0 && z[iStart-1]=='-' + ){ + pParse->isNot = 1; + iStart--; + }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ + pRet->pPhrase->aToken[0].bFirst = 1; + iStart--; + }else{ + break; + } + } + + } + *pnConsumed = iEnd; + }else if( i && rc==SQLITE_DONE ){ + rc = SQLITE_OK; + } + + pModule->xClose(pCursor); + } + + *ppExpr = pRet; + return rc; +} + + +/* +** Enlarge a memory allocation. If an out-of-memory allocation occurs, +** then free the old allocation. +*/ +static void *fts3ReallocOrFree(void *pOrig, sqlite3_int64 nNew){ + void *pRet = sqlite3_realloc64(pOrig, nNew); + if( !pRet ){ + sqlite3_free(pOrig); + } + return pRet; +} + +/* +** Buffer zInput, length nInput, contains the contents of a quoted string +** that appeared as part of an fts3 query expression. Neither quote character +** is included in the buffer. This function attempts to tokenize the entire +** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE +** containing the results. +** +** If successful, SQLITE_OK is returned and *ppExpr set to point at the +** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory +** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set +** to 0. +*/ +static int getNextString( + ParseContext *pParse, /* fts3 query parse context */ + const char *zInput, int nInput, /* Input string */ + Fts3Expr **ppExpr /* OUT: expression */ +){ + sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + int rc; + Fts3Expr *p = 0; + sqlite3_tokenizer_cursor *pCursor = 0; + char *zTemp = 0; + int nTemp = 0; + + const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); + int nToken = 0; + + /* The final Fts3Expr data structure, including the Fts3Phrase, + ** Fts3PhraseToken structures token buffers are all stored as a single + ** allocation so that the expression can be freed with a single call to + ** sqlite3_free(). Setting this up requires a two pass approach. + ** + ** The first pass, in the block below, uses a tokenizer cursor to iterate + ** through the tokens in the expression. This pass uses fts3ReallocOrFree() + ** to assemble data in two dynamic buffers: + ** + ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase + ** structure, followed by the array of Fts3PhraseToken + ** structures. This pass only populates the Fts3PhraseToken array. + ** + ** Buffer zTemp: Contains copies of all tokens. + ** + ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, + ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase + ** structures. + */ + rc = sqlite3Fts3OpenTokenizer( + pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); + if( rc==SQLITE_OK ){ + int ii; + for(ii=0; rc==SQLITE_OK; ii++){ + const char *zByte; + int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; + rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); + if( rc==SQLITE_OK ){ + Fts3PhraseToken *pToken; + + p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); + if( !p ) goto no_mem; + + zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); + if( !zTemp ) goto no_mem; + + assert( nToken==ii ); + pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; + memset(pToken, 0, sizeof(Fts3PhraseToken)); + + memcpy(&zTemp[nTemp], zByte, nByte); + nTemp += nByte; + + pToken->n = nByte; + pToken->isPrefix = (iEndbFirst = (iBegin>0 && zInput[iBegin-1]=='^'); + nToken = ii+1; + } + } + + pModule->xClose(pCursor); + pCursor = 0; + } + + if( rc==SQLITE_DONE ){ + int jj; + char *zBuf = 0; + + p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); + if( !p ) goto no_mem; + memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); + p->eType = FTSQUERY_PHRASE; + p->pPhrase = (Fts3Phrase *)&p[1]; + p->pPhrase->iColumn = pParse->iDefaultCol; + p->pPhrase->nToken = nToken; + + zBuf = (char *)&p->pPhrase->aToken[nToken]; + if( zTemp ){ + memcpy(zBuf, zTemp, nTemp); + sqlite3_free(zTemp); + }else{ + assert( nTemp==0 ); + } + + for(jj=0; jjpPhrase->nToken; jj++){ + p->pPhrase->aToken[jj].z = zBuf; + zBuf += p->pPhrase->aToken[jj].n; + } + rc = SQLITE_OK; + } + + *ppExpr = p; + return rc; +no_mem: + + if( pCursor ){ + pModule->xClose(pCursor); + } + sqlite3_free(zTemp); + sqlite3_free(p); + *ppExpr = 0; + return SQLITE_NOMEM; +} + +/* +** The output variable *ppExpr is populated with an allocated Fts3Expr +** structure, or set to 0 if the end of the input buffer is reached. +** +** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM +** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. +** If SQLITE_ERROR is returned, pContext is populated with an error message. +*/ +static int getNextNode( + ParseContext *pParse, /* fts3 query parse context */ + const char *z, int n, /* Input string */ + Fts3Expr **ppExpr, /* OUT: expression */ + int *pnConsumed /* OUT: Number of bytes consumed */ +){ + static const struct Fts3Keyword { + char *z; /* Keyword text */ + unsigned char n; /* Length of the keyword */ + unsigned char parenOnly; /* Only valid in paren mode */ + unsigned char eType; /* Keyword code */ + } aKeyword[] = { + { "OR" , 2, 0, FTSQUERY_OR }, + { "AND", 3, 1, FTSQUERY_AND }, + { "NOT", 3, 1, FTSQUERY_NOT }, + { "NEAR", 4, 0, FTSQUERY_NEAR } + }; + int ii; + int iCol; + int iColLen; + int rc; + Fts3Expr *pRet = 0; + + const char *zInput = z; + int nInput = n; + + pParse->isNot = 0; + + /* Skip over any whitespace before checking for a keyword, an open or + ** close bracket, or a quoted string. + */ + while( nInput>0 && fts3isspace(*zInput) ){ + nInput--; + zInput++; + } + if( nInput==0 ){ + return SQLITE_DONE; + } + + /* See if we are dealing with a keyword. */ + for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ + const struct Fts3Keyword *pKey = &aKeyword[ii]; + + if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ + continue; + } + + if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ + int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; + int nKey = pKey->n; + char cNext; + + /* If this is a "NEAR" keyword, check for an explicit nearness. */ + if( pKey->eType==FTSQUERY_NEAR ){ + assert( nKey==4 ); + if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ + nKey += 1+sqlite3Fts3ReadInt(&zInput[nKey+1], &nNear); + } + } + + /* At this point this is probably a keyword. But for that to be true, + ** the next byte must contain either whitespace, an open or close + ** parenthesis, a quote character, or EOF. + */ + cNext = zInput[nKey]; + if( fts3isspace(cNext) + || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 + ){ + pRet = (Fts3Expr *)sqlite3Fts3MallocZero(sizeof(Fts3Expr)); + if( !pRet ){ + return SQLITE_NOMEM; + } + pRet->eType = pKey->eType; + pRet->nNear = nNear; + *ppExpr = pRet; + *pnConsumed = (int)((zInput - z) + nKey); + return SQLITE_OK; + } + + /* Turns out that wasn't a keyword after all. This happens if the + ** user has supplied a token such as "ORacle". Continue. + */ + } + } + + /* See if we are dealing with a quoted phrase. If this is the case, then + ** search for the closing quote and pass the whole string to getNextString() + ** for processing. This is easy to do, as fts3 has no syntax for escaping + ** a quote character embedded in a string. + */ + if( *zInput=='"' ){ + for(ii=1; iinNest++; +#if !defined(SQLITE_MAX_EXPR_DEPTH) + if( pParse->nNest>1000 ) return SQLITE_ERROR; +#elif SQLITE_MAX_EXPR_DEPTH>0 + if( pParse->nNest>SQLITE_MAX_EXPR_DEPTH ) return SQLITE_ERROR; +#endif + rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); + *pnConsumed = (int)(zInput - z) + 1 + nConsumed; + return rc; + }else if( *zInput==')' ){ + pParse->nNest--; + *pnConsumed = (int)((zInput - z) + 1); + *ppExpr = 0; + return SQLITE_DONE; + } + } + + /* If control flows to this point, this must be a regular token, or + ** the end of the input. Read a regular token using the sqlite3_tokenizer + ** interface. Before doing so, figure out if there is an explicit + ** column specifier for the token. + ** + ** TODO: Strangely, it is not possible to associate a column specifier + ** with a quoted phrase, only with a single token. Not sure if this was + ** an implementation artifact or an intentional decision when fts3 was + ** first implemented. Whichever it was, this module duplicates the + ** limitation. + */ + iCol = pParse->iDefaultCol; + iColLen = 0; + for(ii=0; iinCol; ii++){ + const char *zStr = pParse->azCol[ii]; + int nStr = (int)strlen(zStr); + if( nInput>nStr && zInput[nStr]==':' + && sqlite3_strnicmp(zStr, zInput, nStr)==0 + ){ + iCol = ii; + iColLen = (int)((zInput - z) + nStr + 1); + break; + } + } + rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); + *pnConsumed += iColLen; + return rc; +} + +/* +** The argument is an Fts3Expr structure for a binary operator (any type +** except an FTSQUERY_PHRASE). Return an integer value representing the +** precedence of the operator. Lower values have a higher precedence (i.e. +** group more tightly). For example, in the C language, the == operator +** groups more tightly than ||, and would therefore have a higher precedence. +** +** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS +** is defined), the order of the operators in precedence from highest to +** lowest is: +** +** NEAR +** NOT +** AND (including implicit ANDs) +** OR +** +** Note that when using the old query syntax, the OR operator has a higher +** precedence than the AND operator. +*/ +static int opPrecedence(Fts3Expr *p){ + assert( p->eType!=FTSQUERY_PHRASE ); + if( sqlite3_fts3_enable_parentheses ){ + return p->eType; + }else if( p->eType==FTSQUERY_NEAR ){ + return 1; + }else if( p->eType==FTSQUERY_OR ){ + return 2; + } + assert( p->eType==FTSQUERY_AND ); + return 3; +} + +/* +** Argument ppHead contains a pointer to the current head of a query +** expression tree being parsed. pPrev is the expression node most recently +** inserted into the tree. This function adds pNew, which is always a binary +** operator node, into the expression tree based on the relative precedence +** of pNew and the existing nodes of the tree. This may result in the head +** of the tree changing, in which case *ppHead is set to the new root node. +*/ +static void insertBinaryOperator( + Fts3Expr **ppHead, /* Pointer to the root node of a tree */ + Fts3Expr *pPrev, /* Node most recently inserted into the tree */ + Fts3Expr *pNew /* New binary node to insert into expression tree */ +){ + Fts3Expr *pSplit = pPrev; + while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ + pSplit = pSplit->pParent; + } + + if( pSplit->pParent ){ + assert( pSplit->pParent->pRight==pSplit ); + pSplit->pParent->pRight = pNew; + pNew->pParent = pSplit->pParent; + }else{ + *ppHead = pNew; + } + pNew->pLeft = pSplit; + pSplit->pParent = pNew; +} + +/* +** Parse the fts3 query expression found in buffer z, length n. This function +** returns either when the end of the buffer is reached or an unmatched +** closing bracket - ')' - is encountered. +** +** If successful, SQLITE_OK is returned, *ppExpr is set to point to the +** parsed form of the expression and *pnConsumed is set to the number of +** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM +** (out of memory error) or SQLITE_ERROR (parse error) is returned. +*/ +static int fts3ExprParse( + ParseContext *pParse, /* fts3 query parse context */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + int *pnConsumed /* OUT: Number of bytes consumed */ +){ + Fts3Expr *pRet = 0; + Fts3Expr *pPrev = 0; + Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ + int nIn = n; + const char *zIn = z; + int rc = SQLITE_OK; + int isRequirePhrase = 1; + + while( rc==SQLITE_OK ){ + Fts3Expr *p = 0; + int nByte = 0; + + rc = getNextNode(pParse, zIn, nIn, &p, &nByte); + assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); + if( rc==SQLITE_OK ){ + if( p ){ + int isPhrase; + + if( !sqlite3_fts3_enable_parentheses + && p->eType==FTSQUERY_PHRASE && pParse->isNot + ){ + /* Create an implicit NOT operator. */ + Fts3Expr *pNot = sqlite3Fts3MallocZero(sizeof(Fts3Expr)); + if( !pNot ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pNot->eType = FTSQUERY_NOT; + pNot->pRight = p; + p->pParent = pNot; + if( pNotBranch ){ + pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; + } + pNotBranch = pNot; + p = pPrev; + }else{ + int eType = p->eType; + isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); + + /* The isRequirePhrase variable is set to true if a phrase or + ** an expression contained in parenthesis is required. If a + ** binary operator (AND, OR, NOT or NEAR) is encounted when + ** isRequirePhrase is set, this is a syntax error. + */ + if( !isPhrase && isRequirePhrase ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase && !isRequirePhrase ){ + /* Insert an implicit AND operator. */ + Fts3Expr *pAnd; + assert( pRet && pPrev ); + pAnd = sqlite3Fts3MallocZero(sizeof(Fts3Expr)); + if( !pAnd ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pAnd->eType = FTSQUERY_AND; + insertBinaryOperator(&pRet, pPrev, pAnd); + pPrev = pAnd; + } + + /* This test catches attempts to make either operand of a NEAR + ** operator something other than a phrase. For example, either of + ** the following: + ** + ** (bracketed expression) NEAR phrase + ** phrase NEAR (bracketed expression) + ** + ** Return an error in either case. + */ + if( pPrev && ( + (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) + || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) + )){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase ){ + if( pRet ){ + assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); + pPrev->pRight = p; + p->pParent = pPrev; + }else{ + pRet = p; + } + }else{ + insertBinaryOperator(&pRet, pPrev, p); + } + isRequirePhrase = !isPhrase; + } + pPrev = p; + } + assert( nByte>0 ); + } + assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); + nIn -= nByte; + zIn += nByte; + } + + if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ + if( !pRet ){ + rc = SQLITE_ERROR; + }else{ + Fts3Expr *pIter = pNotBranch; + while( pIter->pLeft ){ + pIter = pIter->pLeft; + } + pIter->pLeft = pRet; + pRet->pParent = pIter; + pRet = pNotBranch; + } + } + } + *pnConsumed = n - nIn; + +exprparse_out: + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRet); + sqlite3Fts3ExprFree(pNotBranch); + pRet = 0; + } + *ppExpr = pRet; + return rc; +} + +/* +** Return SQLITE_ERROR if the maximum depth of the expression tree passed +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ + int rc = SQLITE_OK; + if( p ){ + if( nMaxDepth<0 ){ + rc = SQLITE_TOOBIG; + }else{ + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); + } + } + } + return rc; +} + +/* +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the +** new root expression node. +** +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. +** +** Otherwise, if an error occurs, an SQLite error code is returned and +** expression (*pp) freed. +*/ +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ + int rc = SQLITE_OK; /* Return code */ + Fts3Expr *pRoot = *pp; /* Initial root node */ + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ + int eType = pRoot->eType; /* Type of node in this tree */ + + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK ){ + if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ + Fts3Expr **apLeaf; + apLeaf = (Fts3Expr **)sqlite3_malloc64(sizeof(Fts3Expr *) * nMaxDepth); + if( 0==apLeaf ){ + rc = SQLITE_NOMEM; + }else{ + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + } + + if( rc==SQLITE_OK ){ + int i; + Fts3Expr *p; + + /* Set $p to point to the left-most leaf in the tree of eType nodes. */ + for(p=pRoot; p->eType==eType; p=p->pLeft){ + assert( p->pParent==0 || p->pParent->pLeft==p ); + assert( p->pLeft && p->pRight ); + } + + /* This loop runs once for each leaf in the tree of eType nodes. */ + while( 1 ){ + int iLvl; + Fts3Expr *pParent = p->pParent; /* Current parent of p */ + + assert( pParent==0 || pParent->pLeft==p ); + p->pParent = 0; + if( pParent ){ + pParent->pLeft = 0; + }else{ + pRoot = 0; + } + rc = fts3ExprBalance(&p, nMaxDepth-1); + if( rc!=SQLITE_OK ) break; + + for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; + pFree->pRight = p; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + apLeaf[iLvl] = 0; + } + } + if( p ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_TOOBIG; + break; + } + + /* If that was the last leaf node, break out of the loop */ + if( pParent==0 ) break; + + /* Set $p to point to the next leaf in the tree of eType nodes */ + for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + + /* Remove pParent from the original tree. */ + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); + pParent->pRight->pParent = pParent->pParent; + if( pParent->pParent ){ + pParent->pParent->pLeft = pParent->pRight; + }else{ + assert( pParent==pRoot ); + pRoot = pParent->pRight; + } + + /* Link pParent into the free node list. It will be used as an + ** internal node of the new tree. */ + pParent->pParent = pFree; + pFree = pParent; + } + + if( rc==SQLITE_OK ){ + p = 0; + for(i=0; ipParent = 0; + }else{ + assert( pFree!=0 ); + pFree->pRight = p; + pFree->pLeft = apLeaf[i]; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + } + } + } + pRoot = p; + }else{ + /* An error occurred. Delete the contents of the apLeaf[] array + ** and pFree list. Everything else is cleaned up by the call to + ** sqlite3Fts3ExprFree(pRoot) below. */ + Fts3Expr *pDel; + for(i=0; ipParent; + sqlite3_free(pDel); + } + } + + assert( pFree==0 ); + sqlite3_free( apLeaf ); + } + }else if( eType==FTSQUERY_NOT ){ + Fts3Expr *pLeft = pRoot->pLeft; + Fts3Expr *pRight = pRoot->pRight; + + pRoot->pLeft = 0; + pRoot->pRight = 0; + pLeft->pParent = 0; + pRight->pParent = 0; + + rc = fts3ExprBalance(&pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprBalance(&pRight, nMaxDepth-1); + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRight); + sqlite3Fts3ExprFree(pLeft); + }else{ + assert( pLeft && pRight ); + pRoot->pLeft = pLeft; + pLeft->pParent = pRoot; + pRoot->pRight = pRight; + pRight->pParent = pRoot; + } + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRoot); + pRoot = 0; + } + *pp = pRoot; + return rc; +} + +/* +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences: +** +** 1. It does not do expression rebalancing. +** 2. It does not check that the expression does not exceed the +** maximum allowable depth. +** 3. Even if it fails, *ppExpr may still be set to point to an +** expression tree. It should be deleted using sqlite3Fts3ExprFree() +** in this case. +*/ +static int fts3ExprParseUnbalanced( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr /* OUT: Parsed query structure */ +){ + int nParsed; + int rc; + ParseContext sParse; + + memset(&sParse, 0, sizeof(ParseContext)); + sParse.pTokenizer = pTokenizer; + sParse.iLangid = iLangid; + sParse.azCol = (const char **)azCol; + sParse.nCol = nCol; + sParse.iDefaultCol = iDefaultCol; + sParse.bFts4 = bFts4; + if( z==0 ){ + *ppExpr = 0; + return SQLITE_OK; + } + if( n<0 ){ + n = (int)strlen(z); + } + rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); + assert( rc==SQLITE_OK || *ppExpr==0 ); + + /* Check for mismatched parenthesis */ + if( rc==SQLITE_OK && sParse.nNest ){ + rc = SQLITE_ERROR; + } + + return rc; +} + +/* +** Parameters z and n contain a pointer to and length of a buffer containing +** an fts3 query expression, respectively. This function attempts to parse the +** query expression and create a tree of Fts3Expr structures representing the +** parsed expression. If successful, *ppExpr is set to point to the head +** of the parsed expression tree and SQLITE_OK is returned. If an error +** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse +** error) is returned and *ppExpr is set to 0. +** +** If parameter n is a negative number, then z is assumed to point to a +** nul-terminated string and the length is determined using strlen(). +** +** The first parameter, pTokenizer, is passed the fts3 tokenizer module to +** use to normalize query tokens while parsing the expression. The azCol[] +** array, which is assumed to contain nCol entries, should contain the names +** of each column in the target fts3 table, in order from left to right. +** Column names must be nul-terminated strings. +** +** The iDefaultCol parameter should be passed the index of the table column +** that appears on the left-hand-side of the MATCH operator (the default +** column to match against for tokens for which a column name is not explicitly +** specified as part of the query string), or -1 if tokens may by default +** match any table column. +*/ +SQLITE_PRIVATE int sqlite3Fts3ExprParse( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + char **pzErr /* OUT: Error message (sqlite3_malloc) */ +){ + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(*ppExpr); + *ppExpr = 0; + if( rc==SQLITE_TOOBIG ){ + sqlite3Fts3ErrMsg(pzErr, + "FTS expression tree is too large (maximum depth %d)", + SQLITE_FTS3_MAX_EXPR_DEPTH + ); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ERROR ){ + sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); + } + } + + return rc; +} + +/* +** Free a single node of an expression tree. +*/ +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} + +/* +** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. +*/ +SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); + } + }else{ + p = pParent; + } + } +} + +/**************************************************************************** +***************************************************************************** +** Everything after this point is just test code. +*/ + +#ifdef SQLITE_TEST + +/* #include */ + +/* +** Return a pointer to a buffer containing a text representation of the +** expression passed as the first argument. The buffer is obtained from +** sqlite3_malloc(). It is the responsibility of the caller to use +** sqlite3_free() to release the memory. If an OOM condition is encountered, +** NULL is returned. +** +** If the second argument is not NULL, then its contents are prepended to +** the returned expression text and then freed using sqlite3_free(). +*/ +static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } + switch( pExpr->eType ){ + case FTSQUERY_PHRASE: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + zBuf = sqlite3_mprintf( + "%zPHRASE %d 0", zBuf, pPhrase->iColumn); + for(i=0; zBuf && inToken; i++){ + zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, + pPhrase->aToken[i].n, pPhrase->aToken[i].z, + (pPhrase->aToken[i].isPrefix?"+":"") + ); + } + return zBuf; + } + + case FTSQUERY_NEAR: + zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); + break; + case FTSQUERY_NOT: + zBuf = sqlite3_mprintf("%zNOT ", zBuf); + break; + case FTSQUERY_AND: + zBuf = sqlite3_mprintf("%zAND ", zBuf); + break; + case FTSQUERY_OR: + zBuf = sqlite3_mprintf("%zOR ", zBuf); + break; + } + + if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); + if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); + if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); + + if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); + if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); + + return zBuf; +} + +/* +** This is the implementation of a scalar SQL function used to test the +** expression parser. It should be called as follows: +** +** fts3_exprtest(, , , ...); +** +** The first argument, , is the name of the fts3 tokenizer used +** to parse the query expression (see README.tokenizers). The second argument +** is the query expression to parse. Each subsequent argument is the name +** of a column of the fts3 table that the query expression may refer to. +** For example: +** +** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); +*/ +static void fts3ExprTestCommon( + int bRebalance, + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + sqlite3_tokenizer *pTokenizer = 0; + int rc; + char **azCol = 0; + const char *zExpr; + int nExpr; + int nCol; + int ii; + Fts3Expr *pExpr; + char *zBuf = 0; + Fts3Hash *pHash = (Fts3Hash*)sqlite3_user_data(context); + const char *zTokenizer = 0; + char *zErr = 0; + + if( argc<3 ){ + sqlite3_result_error(context, + "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 + ); + return; + } + + zTokenizer = (const char*)sqlite3_value_text(argv[0]); + rc = sqlite3Fts3InitTokenizer(pHash, zTokenizer, &pTokenizer, &zErr); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_error(context, zErr, -1); + } + sqlite3_free(zErr); + return; + } + + zExpr = (const char *)sqlite3_value_text(argv[1]); + nExpr = sqlite3_value_bytes(argv[1]); + nCol = argc-2; + azCol = (char **)sqlite3_malloc64(nCol*sizeof(char *)); + if( !azCol ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + } + for(ii=0; iipModule->xDestroy(pTokenizer); + } + sqlite3_free(azCol); +} + +static void fts3ExprTest( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + fts3ExprTestCommon(0, context, argc, argv); +} +static void fts3ExprTestRebalance( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + fts3ExprTestCommon(1, context, argc, argv); +} + +/* +** Register the query expression parser test function fts3_exprtest() +** with database connection db. +*/ +SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash *pHash){ + int rc = sqlite3_create_function( + db, "fts3_exprtest", -1, SQLITE_UTF8, (void*)pHash, fts3ExprTest, 0, 0 + ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", + -1, SQLITE_UTF8, (void*)pHash, fts3ExprTestRebalance, 0, 0 + ); + } + return rc; +} + +#endif +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_expr.c *******************************************/ +/************** Begin file fts3_hash.c ***************************************/ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the implementation of generic hash-tables used in SQLite. +** We've modified it slightly to serve as a standalone hash table +** implementation for the full-text indexing module. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3_hash.h" */ + +/* +** Malloc and Free functions +*/ +static void *fts3HashMalloc(sqlite3_int64 n){ + void *p = sqlite3_malloc64(n); + if( p ){ + memset(p, 0, n); + } + return p; +} +static void fts3HashFree(void *p){ + sqlite3_free(p); +} + +/* Turn bulk memory into a hash table object by initializing the +** fields of the Hash structure. +** +** "pNew" is a pointer to the hash table that is to be initialized. +** keyClass is one of the constants +** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass +** determines what kind of key the hash table will use. "copyKey" is +** true if the hash table should make its own private copy of keys and +** false if it should just use the supplied pointer. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){ + assert( pNew!=0 ); + assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); + pNew->keyClass = keyClass; + pNew->copyKey = copyKey; + pNew->first = 0; + pNew->count = 0; + pNew->htsize = 0; + pNew->ht = 0; +} + +/* Remove all entries from a hash table. Reclaim all memory. +** Call this routine to delete a hash table or to reset a hash table +** to the empty state. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ + Fts3HashElem *elem; /* For looping over all elements of the table */ + + assert( pH!=0 ); + elem = pH->first; + pH->first = 0; + fts3HashFree(pH->ht); + pH->ht = 0; + pH->htsize = 0; + while( elem ){ + Fts3HashElem *next_elem = elem->next; + if( pH->copyKey && elem->pKey ){ + fts3HashFree(elem->pKey); + } + fts3HashFree(elem); + elem = next_elem; + } + pH->count = 0; +} + +/* +** Hash and comparison functions when the mode is FTS3_HASH_STRING +*/ +static int fts3StrHash(const void *pKey, int nKey){ + const char *z = (const char *)pKey; + unsigned h = 0; + if( nKey<=0 ) nKey = (int) strlen(z); + while( nKey > 0 ){ + h = (h<<3) ^ h ^ *z++; + nKey--; + } + return (int)(h & 0x7fffffff); +} +static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return strncmp((const char*)pKey1,(const char*)pKey2,n1); +} + +/* +** Hash and comparison functions when the mode is FTS3_HASH_BINARY +*/ +static int fts3BinHash(const void *pKey, int nKey){ + int h = 0; + const char *z = (const char *)pKey; + while( nKey-- > 0 ){ + h = (h<<3) ^ h ^ *(z++); + } + return h & 0x7fffffff; +} +static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return memcmp(pKey1,pKey2,n1); +} + +/* +** Return a pointer to the appropriate hash function given the key class. +** +** The C syntax in this function definition may be unfamilar to some +** programmers, so we provide the following additional explanation: +** +** The name of the function is "ftsHashFunction". The function takes a +** single parameter "keyClass". The return value of ftsHashFunction() +** is a pointer to another function. Specifically, the return value +** of ftsHashFunction() is a pointer to a function that takes two parameters +** with types "const void*" and "int" and returns an "int". +*/ +static int (*ftsHashFunction(int keyClass))(const void*,int){ + if( keyClass==FTS3_HASH_STRING ){ + return &fts3StrHash; + }else{ + assert( keyClass==FTS3_HASH_BINARY ); + return &fts3BinHash; + } +} + +/* +** Return a pointer to the appropriate hash function given the key class. +** +** For help in interpreted the obscure C code in the function definition, +** see the header comment on the previous function. +*/ +static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ + if( keyClass==FTS3_HASH_STRING ){ + return &fts3StrCompare; + }else{ + assert( keyClass==FTS3_HASH_BINARY ); + return &fts3BinCompare; + } +} + +/* Link an element into the hash table +*/ +static void fts3HashInsertElement( + Fts3Hash *pH, /* The complete hash table */ + struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ + Fts3HashElem *pNew /* The element to be inserted */ +){ + Fts3HashElem *pHead; /* First element already in pEntry */ + pHead = pEntry->chain; + if( pHead ){ + pNew->next = pHead; + pNew->prev = pHead->prev; + if( pHead->prev ){ pHead->prev->next = pNew; } + else { pH->first = pNew; } + pHead->prev = pNew; + }else{ + pNew->next = pH->first; + if( pH->first ){ pH->first->prev = pNew; } + pNew->prev = 0; + pH->first = pNew; + } + pEntry->count++; + pEntry->chain = pNew; +} + + +/* Resize the hash table so that it cantains "new_size" buckets. +** "new_size" must be a power of 2. The hash table might fail +** to resize if sqliteMalloc() fails. +** +** Return non-zero if a memory allocation error occurs. +*/ +static int fts3Rehash(Fts3Hash *pH, int new_size){ + struct _fts3ht *new_ht; /* The new hash table */ + Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( (new_size & (new_size-1))==0 ); + new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); + if( new_ht==0 ) return 1; + fts3HashFree(pH->ht); + pH->ht = new_ht; + pH->htsize = new_size; + xHash = ftsHashFunction(pH->keyClass); + for(elem=pH->first, pH->first=0; elem; elem = next_elem){ + int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); + next_elem = elem->next; + fts3HashInsertElement(pH, &new_ht[h], elem); + } + return 0; +} + +/* This function (for internal use only) locates an element in an +** hash table that matches the given key. The hash for this key has +** already been computed and is passed as the 4th parameter. +*/ +static Fts3HashElem *fts3FindElementByHash( + const Fts3Hash *pH, /* The pH to be searched */ + const void *pKey, /* The key we are searching for */ + int nKey, + int h /* The hash for this key. */ +){ + Fts3HashElem *elem; /* Used to loop thru the element list */ + int count; /* Number of elements left to test */ + int (*xCompare)(const void*,int,const void*,int); /* comparison function */ + + if( pH->ht ){ + struct _fts3ht *pEntry = &pH->ht[h]; + elem = pEntry->chain; + count = pEntry->count; + xCompare = ftsCompareFunction(pH->keyClass); + while( count-- && elem ){ + if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ + return elem; + } + elem = elem->next; + } + } + return 0; +} + +/* Remove a single entry from the hash table given a pointer to that +** element and a hash on the element's key. +*/ +static void fts3RemoveElementByHash( + Fts3Hash *pH, /* The pH containing "elem" */ + Fts3HashElem* elem, /* The element to be removed from the pH */ + int h /* Hash value for the element */ +){ + struct _fts3ht *pEntry; + if( elem->prev ){ + elem->prev->next = elem->next; + }else{ + pH->first = elem->next; + } + if( elem->next ){ + elem->next->prev = elem->prev; + } + pEntry = &pH->ht[h]; + if( pEntry->chain==elem ){ + pEntry->chain = elem->next; + } + pEntry->count--; + if( pEntry->count<=0 ){ + pEntry->chain = 0; + } + if( pH->copyKey && elem->pKey ){ + fts3HashFree(elem->pKey); + } + fts3HashFree( elem ); + pH->count--; + if( pH->count<=0 ){ + assert( pH->first==0 ); + assert( pH->count==0 ); + fts3HashClear(pH); + } +} + +SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem( + const Fts3Hash *pH, + const void *pKey, + int nKey +){ + int h; /* A hash on key */ + int (*xHash)(const void*,int); /* The hash function */ + + if( pH==0 || pH->ht==0 ) return 0; + xHash = ftsHashFunction(pH->keyClass); + assert( xHash!=0 ); + h = (*xHash)(pKey,nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); +} + +/* +** Attempt to locate an element of the hash table pH with a key +** that matches pKey,nKey. Return the data for this element if it is +** found, or NULL if there is no match. +*/ +SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){ + Fts3HashElem *pElem; /* The element that matches key (if any) */ + + pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); + return pElem ? pElem->data : 0; +} + +/* Insert an element into the hash table pH. The key is pKey,nKey +** and the data is "data". +** +** If no element exists with a matching key, then a new +** element is created. A copy of the key is made if the copyKey +** flag is set. NULL is returned. +** +** If another element already exists with the same key, then the +** new data replaces the old data and the old data is returned. +** The key is not copied in this instance. If a malloc fails, then +** the new data is returned and the hash table is unchanged. +** +** If the "data" parameter to this function is NULL, then the +** element corresponding to "key" is removed from the hash table. +*/ +SQLITE_PRIVATE void *sqlite3Fts3HashInsert( + Fts3Hash *pH, /* The hash table to insert into */ + const void *pKey, /* The key */ + int nKey, /* Number of bytes in the key */ + void *data /* The data */ +){ + int hraw; /* Raw hash value of the key */ + int h; /* the hash of the key modulo hash table size */ + Fts3HashElem *elem; /* Used to loop thru the element list */ + Fts3HashElem *new_elem; /* New element added to the pH */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( pH!=0 ); + xHash = ftsHashFunction(pH->keyClass); + assert( xHash!=0 ); + hraw = (*xHash)(pKey, nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + elem = fts3FindElementByHash(pH,pKey,nKey,h); + if( elem ){ + void *old_data = elem->data; + if( data==0 ){ + fts3RemoveElementByHash(pH,elem,h); + }else{ + elem->data = data; + } + return old_data; + } + if( data==0 ) return 0; + if( (pH->htsize==0 && fts3Rehash(pH,8)) + || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) + ){ + pH->count = 0; + return data; + } + assert( pH->htsize>0 ); + new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); + if( new_elem==0 ) return data; + if( pH->copyKey && pKey!=0 ){ + new_elem->pKey = fts3HashMalloc( nKey ); + if( new_elem->pKey==0 ){ + fts3HashFree(new_elem); + return data; + } + memcpy((void*)new_elem->pKey, pKey, nKey); + }else{ + new_elem->pKey = (void*)pKey; + } + new_elem->nKey = nKey; + pH->count++; + assert( pH->htsize>0 ); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + fts3HashInsertElement(pH, &pH->ht[h], new_elem); + new_elem->data = data; + return 0; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_hash.c *******************************************/ +/************** Begin file fts3_porter.c *************************************/ +/* +** 2006 September 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Implementation of the full-text-search tokenizer that implements +** a Porter stemmer. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3_tokenizer.h" */ + +/* +** Class derived from sqlite3_tokenizer +*/ +typedef struct porter_tokenizer { + sqlite3_tokenizer base; /* Base class */ +} porter_tokenizer; + +/* +** Class derived from sqlite3_tokenizer_cursor +*/ +typedef struct porter_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *zInput; /* input we are tokenizing */ + int nInput; /* size of the input */ + int iOffset; /* current position in zInput */ + int iToken; /* index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAllocated; /* space allocated to zToken buffer */ +} porter_tokenizer_cursor; + + +/* +** Create a new tokenizer instance. +*/ +static int porterCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + porter_tokenizer *t; + + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); + + t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); + if( t==NULL ) return SQLITE_NOMEM; + memset(t, 0, sizeof(*t)); + *ppTokenizer = &t->base; + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int porterDestroy(sqlite3_tokenizer *pTokenizer){ + sqlite3_free(pTokenizer); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is zInput[0..nInput-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int porterOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, int nInput, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + porter_tokenizer_cursor *c; + + UNUSED_PARAMETER(pTokenizer); + + c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + if( c==NULL ) return SQLITE_NOMEM; + + c->zInput = zInput; + if( zInput==0 ){ + c->nInput = 0; + }else if( nInput<0 ){ + c->nInput = (int)strlen(zInput); + }else{ + c->nInput = nInput; + } + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->zToken = NULL; /* no space allocated, yet. */ + c->nAllocated = 0; + + *ppCursor = &c->base; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** porterOpen() above. +*/ +static int porterClose(sqlite3_tokenizer_cursor *pCursor){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + sqlite3_free(c->zToken); + sqlite3_free(c); + return SQLITE_OK; +} +/* +** Vowel or consonant +*/ +static const char cType[] = { + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 2, 1 +}; + +/* +** isConsonant() and isVowel() determine if their first character in +** the string they point to is a consonant or a vowel, according +** to Porter ruls. +** +** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. +** 'Y' is a consonant unless it follows another consonant, +** in which case it is a vowel. +** +** In these routine, the letters are in reverse order. So the 'y' rule +** is that 'y' is a consonant unless it is followed by another +** consonent. +*/ +static int isVowel(const char*); +static int isConsonant(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return j; + return z[1]==0 || isVowel(z + 1); +} +static int isVowel(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return 1-j; + return isConsonant(z + 1); +} + +/* +** Let any sequence of one or more vowels be represented by V and let +** C be sequence of one or more consonants. Then every word can be +** represented as: +** +** [C] (VC){m} [V] +** +** In prose: A word is an optional consonant followed by zero or +** vowel-consonant pairs followed by an optional vowel. "m" is the +** number of vowel consonant pairs. This routine computes the value +** of m for the first i bytes of a word. +** +** Return true if the m-value for z is 1 or more. In other words, +** return true if z contains at least one vowel that is followed +** by a consonant. +** +** In this routine z[] is in reverse order. So we are really looking +** for an instance of a consonant followed by a vowel. +*/ +static int m_gt_0(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} + +/* Like mgt0 above except we are looking for a value of m which is +** exactly 1 +*/ +static int m_eq_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 1; + while( isConsonant(z) ){ z++; } + return *z==0; +} + +/* Like mgt0 above except we are looking for a value of m>1 instead +** or m>0 +*/ +static int m_gt_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} + +/* +** Return TRUE if there is a vowel anywhere within z[0..n-1] +*/ +static int hasVowel(const char *z){ + while( isConsonant(z) ){ z++; } + return *z!=0; +} + +/* +** Return TRUE if the word ends in a double consonant. +** +** The text is reversed here. So we are really looking at +** the first two characters of z[]. +*/ +static int doubleConsonant(const char *z){ + return isConsonant(z) && z[0]==z[1]; +} + +/* +** Return TRUE if the word ends with three letters which +** are consonant-vowel-consonent and where the final consonant +** is not 'w', 'x', or 'y'. +** +** The word is reversed here. So we are really checking the +** first three letters and the first one cannot be in [wxy]. +*/ +static int star_oh(const char *z){ + return + isConsonant(z) && + z[0]!='w' && z[0]!='x' && z[0]!='y' && + isVowel(z+1) && + isConsonant(z+2); +} + +/* +** If the word ends with zFrom and xCond() is true for the stem +** of the word that preceeds the zFrom ending, then change the +** ending to zTo. +** +** The input word *pz and zFrom are both in reverse order. zTo +** is in normal order. +** +** Return TRUE if zFrom matches. Return FALSE if zFrom does not +** match. Not that TRUE is returned even if xCond() fails and +** no substitution occurs. +*/ +static int stem( + char **pz, /* The word being stemmed (Reversed) */ + const char *zFrom, /* If the ending matches this... (Reversed) */ + const char *zTo, /* ... change the ending to this (not reversed) */ + int (*xCond)(const char*) /* Condition that must be true */ +){ + char *z = *pz; + while( *zFrom && *zFrom==*z ){ z++; zFrom++; } + if( *zFrom!=0 ) return 0; + if( xCond && !xCond(z) ) return 1; + while( *zTo ){ + *(--z) = *(zTo++); + } + *pz = z; + return 1; +} + +/* +** This is the fallback stemmer used when the porter stemmer is +** inappropriate. The input word is copied into the output with +** US-ASCII case folding. If the input word is too long (more +** than 20 bytes if it contains no digits or more than 6 bytes if +** it contains digits) then word is truncated to 20 or 6 bytes +** by taking 10 or 3 bytes from the beginning and end. +*/ +static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ + int i, mx, j; + int hasDigit = 0; + for(i=0; i='A' && c<='Z' ){ + zOut[i] = c - 'A' + 'a'; + }else{ + if( c>='0' && c<='9' ) hasDigit = 1; + zOut[i] = c; + } + } + mx = hasDigit ? 3 : 10; + if( nIn>mx*2 ){ + for(j=mx, i=nIn-mx; i=(int)sizeof(zReverse)-7 ){ + /* The word is too big or too small for the porter stemmer. + ** Fallback to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ + zReverse[j] = c + 'a' - 'A'; + }else if( c>='a' && c<='z' ){ + zReverse[j] = c; + }else{ + /* The use of a character not in [a-zA-Z] means that we fallback + ** to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + } + memset(&zReverse[sizeof(zReverse)-5], 0, 5); + z = &zReverse[j+1]; + + + /* Step 1a */ + if( z[0]=='s' ){ + if( + !stem(&z, "sess", "ss", 0) && + !stem(&z, "sei", "i", 0) && + !stem(&z, "ss", "ss", 0) + ){ + z++; + } + } + + /* Step 1b */ + z2 = z; + if( stem(&z, "dee", "ee", m_gt_0) ){ + /* Do nothing. The work was all in the test */ + }else if( + (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) + && z!=z2 + ){ + if( stem(&z, "ta", "ate", 0) || + stem(&z, "lb", "ble", 0) || + stem(&z, "zi", "ize", 0) ){ + /* Do nothing. The work was all in the test */ + }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ + z++; + }else if( m_eq_1(z) && star_oh(z) ){ + *(--z) = 'e'; + } + } + + /* Step 1c */ + if( z[0]=='y' && hasVowel(z+1) ){ + z[0] = 'i'; + } + + /* Step 2 */ + switch( z[1] ){ + case 'a': + if( !stem(&z, "lanoita", "ate", m_gt_0) ){ + stem(&z, "lanoit", "tion", m_gt_0); + } + break; + case 'c': + if( !stem(&z, "icne", "ence", m_gt_0) ){ + stem(&z, "icna", "ance", m_gt_0); + } + break; + case 'e': + stem(&z, "rezi", "ize", m_gt_0); + break; + case 'g': + stem(&z, "igol", "log", m_gt_0); + break; + case 'l': + if( !stem(&z, "ilb", "ble", m_gt_0) + && !stem(&z, "illa", "al", m_gt_0) + && !stem(&z, "iltne", "ent", m_gt_0) + && !stem(&z, "ile", "e", m_gt_0) + ){ + stem(&z, "ilsuo", "ous", m_gt_0); + } + break; + case 'o': + if( !stem(&z, "noitazi", "ize", m_gt_0) + && !stem(&z, "noita", "ate", m_gt_0) + ){ + stem(&z, "rota", "ate", m_gt_0); + } + break; + case 's': + if( !stem(&z, "msila", "al", m_gt_0) + && !stem(&z, "ssenevi", "ive", m_gt_0) + && !stem(&z, "ssenluf", "ful", m_gt_0) + ){ + stem(&z, "ssensuo", "ous", m_gt_0); + } + break; + case 't': + if( !stem(&z, "itila", "al", m_gt_0) + && !stem(&z, "itivi", "ive", m_gt_0) + ){ + stem(&z, "itilib", "ble", m_gt_0); + } + break; + } + + /* Step 3 */ + switch( z[0] ){ + case 'e': + if( !stem(&z, "etaci", "ic", m_gt_0) + && !stem(&z, "evita", "", m_gt_0) + ){ + stem(&z, "ezila", "al", m_gt_0); + } + break; + case 'i': + stem(&z, "itici", "ic", m_gt_0); + break; + case 'l': + if( !stem(&z, "laci", "ic", m_gt_0) ){ + stem(&z, "luf", "", m_gt_0); + } + break; + case 's': + stem(&z, "ssen", "", m_gt_0); + break; + } + + /* Step 4 */ + switch( z[1] ){ + case 'a': + if( z[0]=='l' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'c': + if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'e': + if( z[0]=='r' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'i': + if( z[0]=='c' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'l': + if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'n': + if( z[0]=='t' ){ + if( z[2]=='a' ){ + if( m_gt_1(z+3) ){ + z += 3; + } + }else if( z[2]=='e' ){ + if( !stem(&z, "tneme", "", m_gt_1) + && !stem(&z, "tnem", "", m_gt_1) + ){ + stem(&z, "tne", "", m_gt_1); + } + } + } + break; + case 'o': + if( z[0]=='u' ){ + if( m_gt_1(z+2) ){ + z += 2; + } + }else if( z[3]=='s' || z[3]=='t' ){ + stem(&z, "noi", "", m_gt_1); + } + break; + case 's': + if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 't': + if( !stem(&z, "eta", "", m_gt_1) ){ + stem(&z, "iti", "", m_gt_1); + } + break; + case 'u': + if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 'v': + case 'z': + if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; + } + + /* Step 5a */ + if( z[0]=='e' ){ + if( m_gt_1(z+1) ){ + z++; + }else if( m_eq_1(z+1) && !star_oh(z+1) ){ + z++; + } + } + + /* Step 5b */ + if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ + z++; + } + + /* z[] is now the stemmed word in reverse order. Flip it back + ** around into forward order and return. + */ + *pnOut = i = (int)strlen(z); + zOut[i] = 0; + while( *z ){ + zOut[--i] = *(z++); + } +} + +/* +** Characters that can be part of a token. We assume any character +** whose value is greater than 0x80 (any UTF character) can be +** part of a token. In other words, delimiters all must have +** values of 0x7f or lower. +*/ +static const char porterIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ +}; +#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to porterOpen(). +*/ +static int porterNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ + const char **pzToken, /* OUT: *pzToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + const char *z = c->zInput; + + while( c->iOffsetnInput ){ + int iStartOffset, ch; + + /* Scan past delimiter characters */ + while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ + c->iOffset++; + } + + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ + c->iOffset++; + } + + if( c->iOffset>iStartOffset ){ + int n = c->iOffset-iStartOffset; + if( n>c->nAllocated ){ + char *pNew; + c->nAllocated = n+20; + pNew = sqlite3_realloc(c->zToken, c->nAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->zToken = pNew; + } + porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); + *pzToken = c->zToken; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + return SQLITE_OK; + } + } + return SQLITE_DONE; +} + +/* +** The set of routines that implement the porter-stemmer tokenizer +*/ +static const sqlite3_tokenizer_module porterTokenizerModule = { + 0, + porterCreate, + porterDestroy, + porterOpen, + porterClose, + porterNext, + 0 +}; + +/* +** Allocate a new porter tokenizer. Return a pointer to the new +** tokenizer in *ppModule +*/ +SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &porterTokenizerModule; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_porter.c *****************************************/ +/************** Begin file fts3_tokenizer.c **********************************/ +/* +** 2007 June 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is part of an SQLite module implementing full-text search. +** This particular file implements the generic tokenizer interface. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +/* +** Return true if the two-argument version of fts3_tokenizer() +** has been activated via a prior call to sqlite3_db_config(db, +** SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); +*/ +static int fts3TokenizerEnabled(sqlite3_context *context){ + sqlite3 *db = sqlite3_context_db_handle(context); + int isEnabled = 0; + sqlite3_db_config(db,SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER,-1,&isEnabled); + return isEnabled; +} + +/* +** Implementation of the SQL scalar function for accessing the underlying +** hash table. This function may be called as follows: +** +** SELECT (); +** SELECT (, ); +** +** where is the name passed as the second argument +** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). +** +** If the argument is specified, it must be a blob value +** containing a pointer to be stored as the hash data corresponding +** to the string . If is not specified, then +** the string must already exist in the has table. Otherwise, +** an error is returned. +** +** Whether or not the argument is specified, the value returned +** is a blob containing the pointer stored as the hash data corresponding +** to string (after the hash-table is updated, if applicable). +*/ +static void fts3TokenizerFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + Fts3Hash *pHash; + void *pPtr = 0; + const unsigned char *zName; + int nName; + + assert( argc==1 || argc==2 ); + + pHash = (Fts3Hash *)sqlite3_user_data(context); + + zName = sqlite3_value_text(argv[0]); + nName = sqlite3_value_bytes(argv[0])+1; + + if( argc==2 ){ + if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[1]) ){ + void *pOld; + int n = sqlite3_value_bytes(argv[1]); + if( zName==0 || n!=sizeof(pPtr) ){ + sqlite3_result_error(context, "argument type mismatch", -1); + return; + } + pPtr = *(void **)sqlite3_value_blob(argv[1]); + pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); + if( pOld==pPtr ){ + sqlite3_result_error(context, "out of memory", -1); + } + }else{ + sqlite3_result_error(context, "fts3tokenize disabled", -1); + return; + } + }else{ + if( zName ){ + pPtr = sqlite3Fts3HashFind(pHash, zName, nName); + } + if( !pPtr ){ + char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); + return; + } + } + if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[0]) ){ + sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); + } +} + +SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ + static const char isFtsIdChar[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ + }; + return (c&0x80 || isFtsIdChar[(int)(c)]); +} + +SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ + const char *z1; + const char *z2 = 0; + + /* Find the start of the next token. */ + z1 = zStr; + while( z2==0 ){ + char c = *z1; + switch( c ){ + case '\0': return 0; /* No more tokens here */ + case '\'': + case '"': + case '`': { + z2 = z1; + while( *++z2 && (*z2!=c || *++z2==c) ); + break; + } + case '[': + z2 = &z1[1]; + while( *z2 && z2[0]!=']' ) z2++; + if( *z2 ) z2++; + break; + + default: + if( sqlite3Fts3IsIdChar(*z1) ){ + z2 = &z1[1]; + while( sqlite3Fts3IsIdChar(*z2) ) z2++; + }else{ + z1++; + } + } + } + + *pn = (int)(z2-z1); + return z1; +} + +SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( + Fts3Hash *pHash, /* Tokenizer hash table */ + const char *zArg, /* Tokenizer name */ + sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ + char **pzErr /* OUT: Set to malloced error message */ +){ + int rc; + char *z = (char *)zArg; + int n = 0; + char *zCopy; + char *zEnd; /* Pointer to nul-term of zCopy */ + sqlite3_tokenizer_module *m; + + zCopy = sqlite3_mprintf("%s", zArg); + if( !zCopy ) return SQLITE_NOMEM; + zEnd = &zCopy[strlen(zCopy)]; + + z = (char *)sqlite3Fts3NextToken(zCopy, &n); + if( z==0 ){ + assert( n==0 ); + z = zCopy; + } + z[n] = '\0'; + sqlite3Fts3Dequote(z); + + m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); + if( !m ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); + rc = SQLITE_ERROR; + }else{ + char const **aArg = 0; + int iArg = 0; + z = &z[n+1]; + while( zxCreate(iArg, aArg, ppTok); + assert( rc!=SQLITE_OK || *ppTok ); + if( rc!=SQLITE_OK ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); + }else{ + (*ppTok)->pModule = m; + } + sqlite3_free((void *)aArg); + } + + sqlite3_free(zCopy); + return rc; +} + + +#ifdef SQLITE_TEST + +#if defined(INCLUDE_SQLITE_TCL_H) +# include "sqlite_tcl.h" +#else +# include "tcl.h" +#endif +/* #include */ + +/* +** Implementation of a special SQL scalar function for testing tokenizers +** designed to be used in concert with the Tcl testing framework. This +** function must be called with two or more arguments: +** +** SELECT (, ..., ); +** +** where is the name passed as the second argument +** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') +** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). +** +** The return value is a string that may be interpreted as a Tcl +** list. For each token in the , three elements are +** added to the returned list. The first is the token position, the +** second is the token text (folded, stemmed, etc.) and the third is the +** substring of associated with the token. For example, +** using the built-in "simple" tokenizer: +** +** SELECT fts_tokenizer_test('simple', 'I don't see how'); +** +** will return the string: +** +** "{0 i I 1 dont don't 2 see see 3 how how}" +** +*/ +static void testFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + Fts3Hash *pHash; + sqlite3_tokenizer_module *p; + sqlite3_tokenizer *pTokenizer = 0; + sqlite3_tokenizer_cursor *pCsr = 0; + + const char *zErr = 0; + + const char *zName; + int nName; + const char *zInput; + int nInput; + + const char *azArg[64]; + + const char *zToken; + int nToken = 0; + int iStart = 0; + int iEnd = 0; + int iPos = 0; + int i; + + Tcl_Obj *pRet; + + if( argc<2 ){ + sqlite3_result_error(context, "insufficient arguments", -1); + return; + } + + nName = sqlite3_value_bytes(argv[0]); + zName = (const char *)sqlite3_value_text(argv[0]); + nInput = sqlite3_value_bytes(argv[argc-1]); + zInput = (const char *)sqlite3_value_text(argv[argc-1]); + + pHash = (Fts3Hash *)sqlite3_user_data(context); + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + + if( !p ){ + char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr2, -1); + sqlite3_free(zErr2); + return; + } + + pRet = Tcl_NewObj(); + Tcl_IncrRefCount(pRet); + + for(i=1; ixCreate(argc-2, azArg, &pTokenizer) ){ + zErr = "error in xCreate()"; + goto finish; + } + pTokenizer->pModule = p; + if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ + zErr = "error in xOpen()"; + goto finish; + } + + while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + zToken = &zInput[iStart]; + nToken = iEnd-iStart; + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + } + + if( SQLITE_OK!=p->xClose(pCsr) ){ + zErr = "error in xClose()"; + goto finish; + } + if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ + zErr = "error in xDestroy()"; + goto finish; + } + +finish: + if( zErr ){ + sqlite3_result_error(context, zErr, -1); + }else{ + sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); + } + Tcl_DecrRefCount(pRet); +} + +static +int registerTokenizer( + sqlite3 *db, + char *zName, + const sqlite3_tokenizer_module *p +){ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); + sqlite3_step(pStmt); + + return sqlite3_finalize(pStmt); +} + + +static +int queryTokenizer( + sqlite3 *db, + char *zName, + const sqlite3_tokenizer_module **pp +){ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?)"; + + *pp = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB + && sqlite3_column_bytes(pStmt, 0)==sizeof(*pp) + ){ + memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); + } + } + + return sqlite3_finalize(pStmt); +} + +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); + +/* +** Implementation of the scalar function fts3_tokenizer_internal_test(). +** This function is used for testing only, it is not included in the +** build unless SQLITE_TEST is defined. +** +** The purpose of this is to test that the fts3_tokenizer() function +** can be used as designed by the C-code in the queryTokenizer and +** registerTokenizer() functions above. These two functions are repeated +** in the README.tokenizer file as an example, so it is important to +** test them. +** +** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar +** function with no arguments. An assert() will fail if a problem is +** detected. i.e.: +** +** SELECT fts3_tokenizer_internal_test(); +** +*/ +static void intTestFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int rc; + const sqlite3_tokenizer_module *p1; + const sqlite3_tokenizer_module *p2; + sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); + + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); + + /* Test the query function */ + sqlite3Fts3SimpleTokenizerModule(&p1); + rc = queryTokenizer(db, "simple", &p2); + assert( rc==SQLITE_OK ); + assert( p1==p2 ); + rc = queryTokenizer(db, "nosuchtokenizer", &p2); + assert( rc==SQLITE_ERROR ); + assert( p2==0 ); + assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); + + /* Test the storage function */ + if( fts3TokenizerEnabled(context) ){ + rc = registerTokenizer(db, "nosuchtokenizer", p1); + assert( rc==SQLITE_OK ); + rc = queryTokenizer(db, "nosuchtokenizer", &p2); + assert( rc==SQLITE_OK ); + assert( p2==p1 ); + } + + sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); +} + +#endif + +/* +** Set up SQL objects in database db used to access the contents of +** the hash table pointed to by argument pHash. The hash table must +** been initialized to use string keys, and to take a private copy +** of the key when a value is inserted. i.e. by a call similar to: +** +** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); +** +** This function adds a scalar function (see header comment above +** fts3TokenizerFunc() in this file for details) and, if ENABLE_TABLE is +** defined at compilation time, a temporary virtual table (see header +** comment above struct HashTableVtab) to the database schema. Both +** provide read/write access to the contents of *pHash. +** +** The third argument to this function, zName, is used as the name +** of both the scalar and, if created, the virtual table. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitHashTable( + sqlite3 *db, + Fts3Hash *pHash, + const char *zName +){ + int rc = SQLITE_OK; + void *p = (void *)pHash; + const int any = SQLITE_UTF8|SQLITE_DIRECTONLY; + +#ifdef SQLITE_TEST + char *zTest = 0; + char *zTest2 = 0; + void *pdb = (void *)db; + zTest = sqlite3_mprintf("%s_test", zName); + zTest2 = sqlite3_mprintf("%s_internal_test", zName); + if( !zTest || !zTest2 ){ + rc = SQLITE_NOMEM; + } +#endif + + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zName, 1, any, p, fts3TokenizerFunc, 0, 0); + } + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zName, 2, any, p, fts3TokenizerFunc, 0, 0); + } +#ifdef SQLITE_TEST + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); + } + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); + } +#endif + +#ifdef SQLITE_TEST + sqlite3_free(zTest); + sqlite3_free(zTest2); +#endif + + return rc; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_tokenizer.c **************************************/ +/************** Begin file fts3_tokenizer1.c *********************************/ +/* +** 2006 Oct 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Implementation of the "simple" full-text-search tokenizer. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3_tokenizer.h" */ + +typedef struct simple_tokenizer { + sqlite3_tokenizer base; + char delim[128]; /* flag ASCII delimiters */ +} simple_tokenizer; + +typedef struct simple_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *pInput; /* input we are tokenizing */ + int nBytes; /* size of the input */ + int iOffset; /* current position in pInput */ + int iToken; /* index of next token to be returned */ + char *pToken; /* storage for current token */ + int nTokenAllocated; /* space allocated to zToken buffer */ +} simple_tokenizer_cursor; + + +static int simpleDelim(simple_tokenizer *t, unsigned char c){ + return c<0x80 && t->delim[c]; +} +static int fts3_isalnum(int x){ + return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); +} + +/* +** Create a new tokenizer instance. +*/ +static int simpleCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + simple_tokenizer *t; + + t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); + if( t==NULL ) return SQLITE_NOMEM; + memset(t, 0, sizeof(*t)); + + /* TODO(shess) Delimiters need to remain the same from run to run, + ** else we need to reindex. One solution would be a meta-table to + ** track such information in the database, then we'd only want this + ** information on the initial create. + */ + if( argc>1 ){ + int i, n = (int)strlen(argv[1]); + for(i=0; i=0x80 ){ + sqlite3_free(t); + return SQLITE_ERROR; + } + t->delim[ch] = 1; + } + } else { + /* Mark non-alphanumeric ASCII characters as delimiters */ + int i; + for(i=1; i<0x80; i++){ + t->delim[i] = !fts3_isalnum(i) ? -1 : 0; + } + } + + *ppTokenizer = &t->base; + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ + sqlite3_free(pTokenizer); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int simpleOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *pInput, int nBytes, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + simple_tokenizer_cursor *c; + + UNUSED_PARAMETER(pTokenizer); + + c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + if( c==NULL ) return SQLITE_NOMEM; + + c->pInput = pInput; + if( pInput==0 ){ + c->nBytes = 0; + }else if( nBytes<0 ){ + c->nBytes = (int)strlen(pInput); + }else{ + c->nBytes = nBytes; + } + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->pToken = NULL; /* no space allocated, yet. */ + c->nTokenAllocated = 0; + + *ppCursor = &c->base; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** simpleOpen() above. +*/ +static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + sqlite3_free(c->pToken); + sqlite3_free(c); + return SQLITE_OK; +} + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). +*/ +static int simpleNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; + unsigned char *p = (unsigned char *)c->pInput; + + while( c->iOffsetnBytes ){ + int iStartOffset; + + /* Scan past delimiter characters */ + while( c->iOffsetnBytes && simpleDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } + + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffsetnBytes && !simpleDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } + + if( c->iOffset>iStartOffset ){ + int i, n = c->iOffset-iStartOffset; + if( n>c->nTokenAllocated ){ + char *pNew; + c->nTokenAllocated = n+20; + pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->pToken = pNew; + } + for(i=0; ipToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); + } + *ppToken = c->pToken; + *pnBytes = n; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + + return SQLITE_OK; + } + } + return SQLITE_DONE; +} + +/* +** The set of routines that implement the simple tokenizer +*/ +static const sqlite3_tokenizer_module simpleTokenizerModule = { + 0, + simpleCreate, + simpleDestroy, + simpleOpen, + simpleClose, + simpleNext, + 0, +}; + +/* +** Allocate a new simple tokenizer. Return a pointer to the new +** tokenizer in *ppModule +*/ +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &simpleTokenizerModule; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_tokenizer1.c *************************************/ +/************** Begin file fts3_tokenize_vtab.c ******************************/ +/* +** 2013 Apr 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code for the "fts3tokenize" virtual table module. +** An fts3tokenize virtual table is created as follows: +** +** CREATE VIRTUAL TABLE USING fts3tokenize( +** , , ... +** ); +** +** The table created has the following schema: +** +** CREATE TABLE (input, token, start, end, position) +** +** When queried, the query must include a WHERE clause of type: +** +** input = +** +** The virtual table module tokenizes this , using the FTS3 +** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE +** statement and returns one row for each token in the result. With +** fields set as follows: +** +** input: Always set to a copy of +** token: A token from the input. +** start: Byte offset of the token within the input . +** end: Byte offset of the byte immediately following the end of the +** token within the input string. +** pos: Token offset of token within input. +** +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +typedef struct Fts3tokTable Fts3tokTable; +typedef struct Fts3tokCursor Fts3tokCursor; + +/* +** Virtual table structure. +*/ +struct Fts3tokTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + const sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer *pTok; +}; + +/* +** Virtual table cursor structure. +*/ +struct Fts3tokCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + char *zInput; /* Input string */ + sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ + int iRowid; /* Current 'rowid' value */ + const char *zToken; /* Current 'token' value */ + int nToken; /* Size of zToken in bytes */ + int iStart; /* Current 'start' value */ + int iEnd; /* Current 'end' value */ + int iPos; /* Current 'pos' value */ +}; + +/* +** Query FTS for the tokenizer implementation named zName. +*/ +static int fts3tokQueryTokenizer( + Fts3Hash *pHash, + const char *zName, + const sqlite3_tokenizer_module **pp, + char **pzErr +){ + sqlite3_tokenizer_module *p; + int nName = (int)strlen(zName); + + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + if( !p ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); + return SQLITE_ERROR; + } + + *pp = p; + return SQLITE_OK; +} + +/* +** The second argument, argv[], is an array of pointers to nul-terminated +** strings. This function makes a copy of the array and strings into a +** single block of memory. It then dequotes any of the strings that appear +** to be quoted. +** +** If successful, output parameter *pazDequote is set to point at the +** array of dequoted strings and SQLITE_OK is returned. The caller is +** responsible for eventually calling sqlite3_free() to free the array +** in this case. Or, if an error occurs, an SQLite error code is returned. +** The final value of *pazDequote is undefined in this case. +*/ +static int fts3tokDequoteArray( + int argc, /* Number of elements in argv[] */ + const char * const *argv, /* Input array */ + char ***pazDequote /* Output array */ +){ + int rc = SQLITE_OK; /* Return code */ + if( argc==0 ){ + *pazDequote = 0; + }else{ + int i; + int nByte = 0; + char **azDequote; + + for(i=0; i1 ) azArg = (const char * const *)&azDequote[1]; + rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); + } + + if( rc==SQLITE_OK ){ + pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + } + } + + if( rc==SQLITE_OK ){ + memset(pTab, 0, sizeof(Fts3tokTable)); + pTab->pMod = pMod; + pTab->pTok = pTok; + *ppVtab = &pTab->base; + }else{ + if( pTok ){ + pMod->xDestroy(pTok); + } + } + + sqlite3_free(azDequote); + return rc; +} + +/* +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. +*/ +static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + + pTab->pMod->xDestroy(pTab->pTok); + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* +** xBestIndex - Analyze a WHERE and ORDER BY clause. +*/ +static int fts3tokBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + int i; + UNUSED_PARAMETER(pVTab); + + for(i=0; inConstraint; i++){ + if( pInfo->aConstraint[i].usable + && pInfo->aConstraint[i].iColumn==0 + && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + pInfo->idxNum = 1; + pInfo->aConstraintUsage[i].argvIndex = 1; + pInfo->aConstraintUsage[i].omit = 1; + pInfo->estimatedCost = 1; + return SQLITE_OK; + } + } + + pInfo->idxNum = 0; + assert( pInfo->estimatedCost>1000000.0 ); + + return SQLITE_OK; +} + +/* +** xOpen - Open a cursor. +*/ +static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3tokCursor *pCsr; + UNUSED_PARAMETER(pVTab); + + pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); + if( pCsr==0 ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(Fts3tokCursor)); + + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** Reset the tokenizer cursor passed as the only argument. As if it had +** just been returned by fts3tokOpenMethod(). +*/ +static void fts3tokResetCursor(Fts3tokCursor *pCsr){ + if( pCsr->pCsr ){ + Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); + pTab->pMod->xClose(pCsr->pCsr); + pCsr->pCsr = 0; + } + sqlite3_free(pCsr->zInput); + pCsr->zInput = 0; + pCsr->zToken = 0; + pCsr->nToken = 0; + pCsr->iStart = 0; + pCsr->iEnd = 0; + pCsr->iPos = 0; + pCsr->iRowid = 0; +} + +/* +** xClose - Close a cursor. +*/ +static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + fts3tokResetCursor(pCsr); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** xNext - Advance the cursor to the next row, if any. +*/ +static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + int rc; /* Return code */ + + pCsr->iRowid++; + rc = pTab->pMod->xNext(pCsr->pCsr, + &pCsr->zToken, &pCsr->nToken, + &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos + ); + + if( rc!=SQLITE_OK ){ + fts3tokResetCursor(pCsr); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + + return rc; +} + +/* +** xFilter - Initialize a cursor to point at the start of its data. +*/ +static int fts3tokFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + int rc = SQLITE_ERROR; + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); + + fts3tokResetCursor(pCsr); + if( idxNum==1 ){ + const char *zByte = (const char *)sqlite3_value_text(apVal[0]); + int nByte = sqlite3_value_bytes(apVal[0]); + pCsr->zInput = sqlite3_malloc64(nByte+1); + if( pCsr->zInput==0 ){ + rc = SQLITE_NOMEM; + }else{ + if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte); + pCsr->zInput[nByte] = 0; + rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); + if( rc==SQLITE_OK ){ + pCsr->pCsr->pTokenizer = pTab->pTok; + } + } + } + + if( rc!=SQLITE_OK ) return rc; + return fts3tokNextMethod(pCursor); +} + +/* +** xEof - Return true if the cursor is at EOF, or false otherwise. +*/ +static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + return (pCsr->zToken==0); +} + +/* +** xColumn - Return a column value. +*/ +static int fts3tokColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + /* CREATE TABLE x(input, token, start, end, position) */ + switch( iCol ){ + case 0: + sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); + break; + case 1: + sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); + break; + case 2: + sqlite3_result_int(pCtx, pCsr->iStart); + break; + case 3: + sqlite3_result_int(pCtx, pCsr->iEnd); + break; + default: + assert( iCol==4 ); + sqlite3_result_int(pCtx, pCsr->iPos); + break; + } + return SQLITE_OK; +} + +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3tokRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + *pRowid = (sqlite3_int64)pCsr->iRowid; + return SQLITE_OK; +} + +/* +** Register the fts3tok module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash, void(*xDestroy)(void*)){ + static const sqlite3_module fts3tok_module = { + 0, /* iVersion */ + fts3tokConnectMethod, /* xCreate */ + fts3tokConnectMethod, /* xConnect */ + fts3tokBestIndexMethod, /* xBestIndex */ + fts3tokDisconnectMethod, /* xDisconnect */ + fts3tokDisconnectMethod, /* xDestroy */ + fts3tokOpenMethod, /* xOpen */ + fts3tokCloseMethod, /* xClose */ + fts3tokFilterMethod, /* xFilter */ + fts3tokNextMethod, /* xNext */ + fts3tokEofMethod, /* xEof */ + fts3tokColumnMethod, /* xColumn */ + fts3tokRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + 0 /* xShadowName */ + }; + int rc; /* Return code */ + + rc = sqlite3_create_module_v2( + db, "fts3tokenize", &fts3tok_module, (void*)pHash, xDestroy + ); + return rc; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_tokenize_vtab.c **********************************/ +/************** Begin file fts3_write.c **************************************/ +/* +** 2009 Oct 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file is part of the SQLite FTS3 extension module. Specifically, +** this file contains code to insert, update and delete rows from FTS3 +** tables. It also contains code to merge FTS3 b-tree segments. Some +** of the sub-routines used to merge segments are also used by the query +** code in fts3.c. +*/ + +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ +/* #include */ + +#define FTS_MAX_APPENDABLE_HEIGHT 16 + +/* +** When full-text index nodes are loaded from disk, the buffer that they +** are loaded into has the following number of bytes of padding at the end +** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer +** of 920 bytes is allocated for it. +** +** This means that if we have a pointer into a buffer containing node data, +** it is always safe to read up to two varints from it without risking an +** overread, even if the node data is corrupted. +*/ +#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) + +/* +** Under certain circumstances, b-tree nodes (doclists) can be loaded into +** memory incrementally instead of all at once. This can be a big performance +** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() +** method before retrieving all query results (as may happen, for example, +** if a query has a LIMIT clause). +** +** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD +** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. +** The code is written so that the hard lower-limit for each of these values +** is 1. Clearly such small values would be inefficient, but can be useful +** for testing purposes. +** +** If this module is built with SQLITE_TEST defined, these constants may +** be overridden at runtime for testing purposes. File fts3_test.c contains +** a Tcl interface to read and write the values. +*/ +#ifdef SQLITE_TEST +int test_fts3_node_chunksize = (4*1024); +int test_fts3_node_chunk_threshold = (4*1024)*4; +# define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize +# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold +#else +# define FTS3_NODE_CHUNKSIZE (4*1024) +# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) +#endif + +/* +** The values that may be meaningfully bound to the :1 parameter in +** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. +*/ +#define FTS_STAT_DOCTOTAL 0 +#define FTS_STAT_INCRMERGEHINT 1 +#define FTS_STAT_AUTOINCRMERGE 2 + +/* +** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic +** and incremental merge operation that takes place. This is used for +** debugging FTS only, it should not usually be turned on in production +** systems. +*/ +#ifdef FTS3_LOG_MERGES +static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ + sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); +} +#else +#define fts3LogMerge(x, y) +#endif + + +typedef struct PendingList PendingList; +typedef struct SegmentNode SegmentNode; +typedef struct SegmentWriter SegmentWriter; + +/* +** An instance of the following data structure is used to build doclists +** incrementally. See function fts3PendingListAppend() for details. +*/ +struct PendingList { + int nData; + char *aData; + int nSpace; + sqlite3_int64 iLastDocid; + sqlite3_int64 iLastCol; + sqlite3_int64 iLastPos; +}; + + +/* +** Each cursor has a (possibly empty) linked list of the following objects. +*/ +struct Fts3DeferredToken { + Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ + int iCol; /* Column token must occur in */ + Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ + PendingList *pList; /* Doclist is assembled here */ +}; + +/* +** An instance of this structure is used to iterate through the terms on +** a contiguous set of segment b-tree leaf nodes. Although the details of +** this structure are only manipulated by code in this file, opaque handles +** of type Fts3SegReader* are also used by code in fts3.c to iterate through +** terms when querying the full-text index. See functions: +** +** sqlite3Fts3SegReaderNew() +** sqlite3Fts3SegReaderFree() +** sqlite3Fts3SegReaderIterate() +** +** Methods used to manipulate Fts3SegReader structures: +** +** fts3SegReaderNext() +** fts3SegReaderFirstDocid() +** fts3SegReaderNextDocid() +*/ +struct Fts3SegReader { + int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ + u8 bLookup; /* True for a lookup only */ + u8 rootOnly; /* True for a root-only reader */ + + sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ + sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ + sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ + sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ + + char *aNode; /* Pointer to node data (or NULL) */ + int nNode; /* Size of buffer at aNode (or 0) */ + int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ + sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ + + Fts3HashElem **ppNextElem; + + /* Variables set by fts3SegReaderNext(). These may be read directly + ** by the caller. They are valid from the time SegmentReaderNew() returns + ** until SegmentReaderNext() returns something other than SQLITE_OK + ** (i.e. SQLITE_DONE). + */ + int nTerm; /* Number of bytes in current term */ + char *zTerm; /* Pointer to current term */ + int nTermAlloc; /* Allocated size of zTerm buffer */ + char *aDoclist; /* Pointer to doclist of current entry */ + int nDoclist; /* Size of doclist in current entry */ + + /* The following variables are used by fts3SegReaderNextDocid() to iterate + ** through the current doclist (aDoclist/nDoclist). + */ + char *pOffsetList; + int nOffsetList; /* For descending pending seg-readers only */ + sqlite3_int64 iDocid; +}; + +#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) +#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) + +/* +** An instance of this structure is used to create a segment b-tree in the +** database. The internal details of this type are only accessed by the +** following functions: +** +** fts3SegWriterAdd() +** fts3SegWriterFlush() +** fts3SegWriterFree() +*/ +struct SegmentWriter { + SegmentNode *pTree; /* Pointer to interior tree structure */ + sqlite3_int64 iFirst; /* First slot in %_segments written */ + sqlite3_int64 iFree; /* Next free slot in %_segments */ + char *zTerm; /* Pointer to previous term buffer */ + int nTerm; /* Number of bytes in zTerm */ + int nMalloc; /* Size of malloc'd buffer at zMalloc */ + char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ + int nSize; /* Size of allocation at aData */ + int nData; /* Bytes of data in aData */ + char *aData; /* Pointer to block from malloc() */ + i64 nLeafData; /* Number of bytes of leaf data written */ +}; + +/* +** Type SegmentNode is used by the following three functions to create +** the interior part of the segment b+-tree structures (everything except +** the leaf nodes). These functions and type are only ever used by code +** within the fts3SegWriterXXX() family of functions described above. +** +** fts3NodeAddTerm() +** fts3NodeWrite() +** fts3NodeFree() +** +** When a b+tree is written to the database (either as a result of a merge +** or the pending-terms table being flushed), leaves are written into the +** database file as soon as they are completely populated. The interior of +** the tree is assembled in memory and written out only once all leaves have +** been populated and stored. This is Ok, as the b+-tree fanout is usually +** very large, meaning that the interior of the tree consumes relatively +** little memory. +*/ +struct SegmentNode { + SegmentNode *pParent; /* Parent node (or NULL for root node) */ + SegmentNode *pRight; /* Pointer to right-sibling */ + SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ + int nEntry; /* Number of terms written to node so far */ + char *zTerm; /* Pointer to previous term buffer */ + int nTerm; /* Number of bytes in zTerm */ + int nMalloc; /* Size of malloc'd buffer at zMalloc */ + char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ + int nData; /* Bytes of valid data so far */ + char *aData; /* Node data */ +}; + +/* +** Valid values for the second argument to fts3SqlStmt(). +*/ +#define SQL_DELETE_CONTENT 0 +#define SQL_IS_EMPTY 1 +#define SQL_DELETE_ALL_CONTENT 2 +#define SQL_DELETE_ALL_SEGMENTS 3 +#define SQL_DELETE_ALL_SEGDIR 4 +#define SQL_DELETE_ALL_DOCSIZE 5 +#define SQL_DELETE_ALL_STAT 6 +#define SQL_SELECT_CONTENT_BY_ROWID 7 +#define SQL_NEXT_SEGMENT_INDEX 8 +#define SQL_INSERT_SEGMENTS 9 +#define SQL_NEXT_SEGMENTS_ID 10 +#define SQL_INSERT_SEGDIR 11 +#define SQL_SELECT_LEVEL 12 +#define SQL_SELECT_LEVEL_RANGE 13 +#define SQL_SELECT_LEVEL_COUNT 14 +#define SQL_SELECT_SEGDIR_MAX_LEVEL 15 +#define SQL_DELETE_SEGDIR_LEVEL 16 +#define SQL_DELETE_SEGMENTS_RANGE 17 +#define SQL_CONTENT_INSERT 18 +#define SQL_DELETE_DOCSIZE 19 +#define SQL_REPLACE_DOCSIZE 20 +#define SQL_SELECT_DOCSIZE 21 +#define SQL_SELECT_STAT 22 +#define SQL_REPLACE_STAT 23 + +#define SQL_SELECT_ALL_PREFIX_LEVEL 24 +#define SQL_DELETE_ALL_TERMS_SEGDIR 25 +#define SQL_DELETE_SEGDIR_RANGE 26 +#define SQL_SELECT_ALL_LANGID 27 +#define SQL_FIND_MERGE_LEVEL 28 +#define SQL_MAX_LEAF_NODE_ESTIMATE 29 +#define SQL_DELETE_SEGDIR_ENTRY 30 +#define SQL_SHIFT_SEGDIR_ENTRY 31 +#define SQL_SELECT_SEGDIR 32 +#define SQL_CHOMP_SEGDIR 33 +#define SQL_SEGMENT_IS_APPENDABLE 34 +#define SQL_SELECT_INDEXES 35 +#define SQL_SELECT_MXLEVEL 36 + +#define SQL_SELECT_LEVEL_RANGE2 37 +#define SQL_UPDATE_LEVEL_IDX 38 +#define SQL_UPDATE_LEVEL 39 + +/* +** This function is used to obtain an SQLite prepared statement handle +** for the statement identified by the second argument. If successful, +** *pp is set to the requested statement handle and SQLITE_OK returned. +** Otherwise, an SQLite error code is returned and *pp is set to 0. +** +** If argument apVal is not NULL, then it must point to an array with +** at least as many entries as the requested statement has bound +** parameters. The values are bound to the statements parameters before +** returning. +*/ +static int fts3SqlStmt( + Fts3Table *p, /* Virtual table handle */ + int eStmt, /* One of the SQL_XXX constants above */ + sqlite3_stmt **pp, /* OUT: Statement handle */ + sqlite3_value **apVal /* Values to bind to statement */ +){ + const char *azSql[] = { +/* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", +/* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", +/* 2 */ "DELETE FROM %Q.'%q_content'", +/* 3 */ "DELETE FROM %Q.'%q_segments'", +/* 4 */ "DELETE FROM %Q.'%q_segdir'", +/* 5 */ "DELETE FROM %Q.'%q_docsize'", +/* 6 */ "DELETE FROM %Q.'%q_stat'", +/* 7 */ "SELECT %s WHERE rowid=?", +/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", +/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", +/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", +/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", + + /* Return segments in order from oldest to newest.*/ +/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", +/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" + "ORDER BY level DESC, idx ASC", + +/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", +/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", + +/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", +/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", +/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", +/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", +/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", +/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", +/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", +/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", +/* 24 */ "", +/* 25 */ "", + +/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", +/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'", + +/* This statement is used to determine which level to read the input from +** when performing an incremental merge. It returns the absolute level number +** of the oldest level in the db that contains at least ? segments. Or, +** if no level in the FTS index contains more than ? segments, the statement +** returns zero rows. */ +/* 28 */ "SELECT level, count(*) AS cnt FROM %Q.'%q_segdir' " + " GROUP BY level HAVING cnt>=?" + " ORDER BY (level %% 1024) ASC, 2 DESC LIMIT 1", + +/* Estimate the upper limit on the number of leaf nodes in a new segment +** created by merging the oldest :2 segments from absolute level :1. See +** function sqlite3Fts3Incrmerge() for details. */ +/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " + " FROM (SELECT * FROM %Q.'%q_segdir' " + " WHERE level = ? ORDER BY idx ASC LIMIT ?" + " )", + +/* SQL_DELETE_SEGDIR_ENTRY +** Delete the %_segdir entry on absolute level :1 with index :2. */ +/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + +/* SQL_SHIFT_SEGDIR_ENTRY +** Modify the idx value for the segment with idx=:3 on absolute level :2 +** to :1. */ +/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", + +/* SQL_SELECT_SEGDIR +** Read a single entry from the %_segdir table. The entry from absolute +** level :1 with index value :2. */ +/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + +/* SQL_CHOMP_SEGDIR +** Update the start_block (:1) and root (:2) fields of the %_segdir +** entry located on absolute level :3 with index :4. */ +/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" + "WHERE level = ? AND idx = ?", + +/* SQL_SEGMENT_IS_APPENDABLE +** Return a single row if the segment with end_block=? is appendable. Or +** no rows otherwise. */ +/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", + +/* SQL_SELECT_INDEXES +** Return the list of valid segment indexes for absolute level ? */ +/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", + +/* SQL_SELECT_MXLEVEL +** Return the largest relative level in the FTS index or indexes. */ +/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", + + /* Return segments in order from oldest to newest.*/ +/* 37 */ "SELECT level, idx, end_block " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " + "ORDER BY level DESC, idx ASC", + + /* Update statements used while promoting segments */ +/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " + "WHERE level=? AND idx=?", +/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" + + }; + int rc = SQLITE_OK; + sqlite3_stmt *pStmt; + + assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); + assert( eStmt=0 ); + + pStmt = p->aStmt[eStmt]; + if( !pStmt ){ + int f = SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB; + char *zSql; + if( eStmt==SQL_CONTENT_INSERT ){ + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); + }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ + f &= ~SQLITE_PREPARE_NO_VTAB; + zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); + }else{ + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); + } + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v3(p->db, zSql, -1, f, &pStmt, NULL); + sqlite3_free(zSql); + assert( rc==SQLITE_OK || pStmt==0 ); + p->aStmt[eStmt] = pStmt; + } + } + if( apVal ){ + int i; + int nParam = sqlite3_bind_parameter_count(pStmt); + for(i=0; rc==SQLITE_OK && inPendingData==0 ){ + sqlite3_stmt *pStmt; + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_null(pStmt, 1); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + } + + return rc; +} + +/* +** FTS maintains a separate indexes for each language-id (a 32-bit integer). +** Within each language id, a separate index is maintained to store the +** document terms, and each configured prefix size (configured the FTS +** "prefix=" option). And each index consists of multiple levels ("relative +** levels"). +** +** All three of these values (the language id, the specific index and the +** level within the index) are encoded in 64-bit integer values stored +** in the %_segdir table on disk. This function is used to convert three +** separate component values into the single 64-bit integer value that +** can be used to query the %_segdir table. +** +** Specifically, each language-id/index combination is allocated 1024 +** 64-bit integer level values ("absolute levels"). The main terms index +** for language-id 0 is allocate values 0-1023. The first prefix index +** (if any) for language-id 0 is allocated values 1024-2047. And so on. +** Language 1 indexes are allocated immediately following language 0. +** +** So, for a system with nPrefix prefix indexes configured, the block of +** absolute levels that corresponds to language-id iLangid and index +** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). +*/ +static sqlite3_int64 getAbsoluteLevel( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index in p->aIndex[] */ + int iLevel /* Level of segments */ +){ + sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ + assert_fts3_nc( iLangid>=0 ); + assert( p->nIndex>0 ); + assert( iIndex>=0 && iIndexnIndex ); + + iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; + return iBase + iLevel; +} + +/* +** Set *ppStmt to a statement handle that may be used to iterate through +** all rows in the %_segdir table, from oldest to newest. If successful, +** return SQLITE_OK. If an error occurs while preparing the statement, +** return an SQLite error code. +** +** There is only ever one instance of this SQL statement compiled for +** each FTS3 table. +** +** The statement returns the following columns from the %_segdir table: +** +** 0: idx +** 1: start_block +** 2: leaves_end_block +** 3: end_block +** 4: root +*/ +SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( + Fts3Table *p, /* FTS3 table */ + int iLangid, /* Language being queried */ + int iIndex, /* Index for p->aIndex[] */ + int iLevel, /* Level to select (relative level) */ + sqlite3_stmt **ppStmt /* OUT: Compiled statement */ +){ + int rc; + sqlite3_stmt *pStmt = 0; + + assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); + assert( iLevel=0 && iIndexnIndex ); + + if( iLevel<0 ){ + /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + } + }else{ + /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); + } + } + *ppStmt = pStmt; + return rc; +} + + +/* +** Append a single varint to a PendingList buffer. SQLITE_OK is returned +** if successful, or an SQLite error code otherwise. +** +** This function also serves to allocate the PendingList structure itself. +** For example, to create a new PendingList structure containing two +** varints: +** +** PendingList *p = 0; +** fts3PendingListAppendVarint(&p, 1); +** fts3PendingListAppendVarint(&p, 2); +*/ +static int fts3PendingListAppendVarint( + PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ + sqlite3_int64 i /* Value to append to data */ +){ + PendingList *p = *pp; + + /* Allocate or grow the PendingList as required. */ + if( !p ){ + p = sqlite3_malloc(sizeof(*p) + 100); + if( !p ){ + return SQLITE_NOMEM; + } + p->nSpace = 100; + p->aData = (char *)&p[1]; + p->nData = 0; + } + else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ + int nNew = p->nSpace * 2; + p = sqlite3_realloc(p, sizeof(*p) + nNew); + if( !p ){ + sqlite3_free(*pp); + *pp = 0; + return SQLITE_NOMEM; + } + p->nSpace = nNew; + p->aData = (char *)&p[1]; + } + + /* Append the new serialized varint to the end of the list. */ + p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); + p->aData[p->nData] = '\0'; + *pp = p; + return SQLITE_OK; +} + +/* +** Add a docid/column/position entry to a PendingList structure. Non-zero +** is returned if the structure is sqlite3_realloced as part of adding +** the entry. Otherwise, zero. +** +** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. +** Zero is always returned in this case. Otherwise, if no OOM error occurs, +** it is set to SQLITE_OK. +*/ +static int fts3PendingListAppend( + PendingList **pp, /* IN/OUT: PendingList structure */ + sqlite3_int64 iDocid, /* Docid for entry to add */ + sqlite3_int64 iCol, /* Column for entry to add */ + sqlite3_int64 iPos, /* Position of term for entry to add */ + int *pRc /* OUT: Return code */ +){ + PendingList *p = *pp; + int rc = SQLITE_OK; + + assert( !p || p->iLastDocid<=iDocid ); + + if( !p || p->iLastDocid!=iDocid ){ + u64 iDelta = (u64)iDocid - (u64)(p ? p->iLastDocid : 0); + if( p ){ + assert( p->nDatanSpace ); + assert( p->aData[p->nData]==0 ); + p->nData++; + } + if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ + goto pendinglistappend_out; + } + p->iLastCol = -1; + p->iLastPos = 0; + p->iLastDocid = iDocid; + } + if( iCol>0 && p->iLastCol!=iCol ){ + if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) + || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) + ){ + goto pendinglistappend_out; + } + p->iLastCol = iCol; + p->iLastPos = 0; + } + if( iCol>=0 ){ + assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); + rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); + if( rc==SQLITE_OK ){ + p->iLastPos = iPos; + } + } + + pendinglistappend_out: + *pRc = rc; + if( p!=*pp ){ + *pp = p; + return 1; + } + return 0; +} + +/* +** Free a PendingList object allocated by fts3PendingListAppend(). +*/ +static void fts3PendingListDelete(PendingList *pList){ + sqlite3_free(pList); +} + +/* +** Add an entry to one of the pending-terms hash tables. +*/ +static int fts3PendingTermsAddOne( + Fts3Table *p, + int iCol, + int iPos, + Fts3Hash *pHash, /* Pending terms hash table to add entry to */ + const char *zToken, + int nToken +){ + PendingList *pList; + int rc = SQLITE_OK; + + pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); + if( pList ){ + p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); + } + if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ + if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ + /* Malloc failed while inserting the new entry. This can only + ** happen if there was no previous entry for this token. + */ + assert( 0==fts3HashFind(pHash, zToken, nToken) ); + sqlite3_free(pList); + rc = SQLITE_NOMEM; + } + } + if( rc==SQLITE_OK ){ + p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); + } + return rc; +} + +/* +** Tokenize the nul-terminated string zText and add all tokens to the +** pending-terms hash-table. The docid used is that currently stored in +** p->iPrevDocid, and the column is specified by argument iCol. +** +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. +*/ +static int fts3PendingTermsAdd( + Fts3Table *p, /* Table into which text will be inserted */ + int iLangid, /* Language id to use */ + const char *zText, /* Text of document to be inserted */ + int iCol, /* Column into which text is being inserted */ + u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ +){ + int rc; + int iStart = 0; + int iEnd = 0; + int iPos = 0; + int nWord = 0; + + char const *zToken; + int nToken = 0; + + sqlite3_tokenizer *pTokenizer = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr; + int (*xNext)(sqlite3_tokenizer_cursor *pCursor, + const char**,int*,int*,int*,int*); + + assert( pTokenizer && pModule ); + + /* If the user has inserted a NULL value, this function may be called with + ** zText==0. In this case, add zero token entries to the hash table and + ** return early. */ + if( zText==0 ){ + *pnWord = 0; + return SQLITE_OK; + } + + rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); + if( rc!=SQLITE_OK ){ + return rc; + } + + xNext = pModule->xNext; + while( SQLITE_OK==rc + && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) + ){ + int i; + if( iPos>=nWord ) nWord = iPos+1; + + /* Positions cannot be negative; we use -1 as a terminator internally. + ** Tokens must have a non-zero length. + */ + if( iPos<0 || !zToken || nToken<=0 ){ + rc = SQLITE_ERROR; + break; + } + + /* Add the term to the terms index */ + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken + ); + + /* Add the term to each of the prefix indexes that it is not too + ** short for. */ + for(i=1; rc==SQLITE_OK && inIndex; i++){ + struct Fts3Index *pIndex = &p->aIndex[i]; + if( nTokennPrefix ) continue; + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix + ); + } + } + + pModule->xClose(pCsr); + *pnWord += nWord; + return (rc==SQLITE_DONE ? SQLITE_OK : rc); +} + +/* +** Calling this function indicates that subsequent calls to +** fts3PendingTermsAdd() are to add term/position-list pairs for the +** contents of the document with docid iDocid. +*/ +static int fts3PendingTermsDocid( + Fts3Table *p, /* Full-text table handle */ + int bDelete, /* True if this op is a delete */ + int iLangid, /* Language id of row being written */ + sqlite_int64 iDocid /* Docid of row being written */ +){ + assert( iLangid>=0 ); + assert( bDelete==1 || bDelete==0 ); + + /* TODO(shess) Explore whether partially flushing the buffer on + ** forced-flush would provide better performance. I suspect that if + ** we ordered the doclists by size and flushed the largest until the + ** buffer was half empty, that would let the less frequent terms + ** generate longer doclists. + */ + if( iDocidiPrevDocid + || (iDocid==p->iPrevDocid && p->bPrevDelete==0) + || p->iPrevLangid!=iLangid + || p->nPendingData>p->nMaxPendingData + ){ + int rc = sqlite3Fts3PendingTermsFlush(p); + if( rc!=SQLITE_OK ) return rc; + } + p->iPrevDocid = iDocid; + p->iPrevLangid = iLangid; + p->bPrevDelete = bDelete; + return SQLITE_OK; +} + +/* +** Discard the contents of the pending-terms hash tables. +*/ +SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ + int i; + for(i=0; inIndex; i++){ + Fts3HashElem *pElem; + Fts3Hash *pHash = &p->aIndex[i].hPending; + for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ + PendingList *pList = (PendingList *)fts3HashData(pElem); + fts3PendingListDelete(pList); + } + fts3HashClear(pHash); + } + p->nPendingData = 0; +} + +/* +** This function is called by the xUpdate() method as part of an INSERT +** operation. It adds entries for each term in the new record to the +** pendingTerms hash table. +** +** Argument apVal is the same as the similarly named argument passed to +** fts3InsertData(). Parameter iDocid is the docid of the new row. +*/ +static int fts3InsertTerms( + Fts3Table *p, + int iLangid, + sqlite3_value **apVal, + u32 *aSz +){ + int i; /* Iterator variable */ + for(i=2; inColumn+2; i++){ + int iCol = i-2; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_value_text(apVal[i]); + int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); + if( rc!=SQLITE_OK ){ + return rc; + } + aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); + } + } + return SQLITE_OK; +} + +/* +** This function is called by the xUpdate() method for an INSERT operation. +** The apVal parameter is passed a copy of the apVal argument passed by +** SQLite to the xUpdate() method. i.e: +** +** apVal[0] Not used for INSERT. +** apVal[1] rowid +** apVal[2] Left-most user-defined column +** ... +** apVal[p->nColumn+1] Right-most user-defined column +** apVal[p->nColumn+2] Hidden column with same name as table +** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) +** apVal[p->nColumn+4] Hidden languageid column +*/ +static int fts3InsertData( + Fts3Table *p, /* Full-text table */ + sqlite3_value **apVal, /* Array of values to insert */ + sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ +){ + int rc; /* Return code */ + sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ + + if( p->zContentTbl ){ + sqlite3_value *pRowid = apVal[p->nColumn+3]; + if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ + pRowid = apVal[1]; + } + if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ + return SQLITE_CONSTRAINT; + } + *piDocid = sqlite3_value_int64(pRowid); + return SQLITE_OK; + } + + /* Locate the statement handle used to insert data into the %_content + ** table. The SQL for this statement is: + ** + ** INSERT INTO %_content VALUES(?, ?, ?, ...) + ** + ** The statement features N '?' variables, where N is the number of user + ** defined columns in the FTS3 table, plus one for the docid field. + */ + rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); + if( rc==SQLITE_OK && p->zLanguageid ){ + rc = sqlite3_bind_int( + pContentInsert, p->nColumn+2, + sqlite3_value_int(apVal[p->nColumn+4]) + ); + } + if( rc!=SQLITE_OK ) return rc; + + /* There is a quirk here. The users INSERT statement may have specified + ** a value for the "rowid" field, for the "docid" field, or for both. + ** Which is a problem, since "rowid" and "docid" are aliases for the + ** same value. For example: + ** + ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); + ** + ** In FTS3, this is an error. It is an error to specify non-NULL values + ** for both docid and some other rowid alias. + */ + if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ + if( SQLITE_NULL==sqlite3_value_type(apVal[0]) + && SQLITE_NULL!=sqlite3_value_type(apVal[1]) + ){ + /* A rowid/docid conflict. */ + return SQLITE_ERROR; + } + rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); + if( rc!=SQLITE_OK ) return rc; + } + + /* Execute the statement to insert the record. Set *piDocid to the + ** new docid value. + */ + sqlite3_step(pContentInsert); + rc = sqlite3_reset(pContentInsert); + + *piDocid = sqlite3_last_insert_rowid(p->db); + return rc; +} + + + +/* +** Remove all data from the FTS3 table. Clear the hash table containing +** pending terms. +*/ +static int fts3DeleteAll(Fts3Table *p, int bContent){ + int rc = SQLITE_OK; /* Return code */ + + /* Discard the contents of the pending-terms hash table. */ + sqlite3Fts3PendingTermsClear(p); + + /* Delete everything from the shadow tables. Except, leave %_content as + ** is if bContent is false. */ + assert( p->zContentTbl==0 || bContent==0 ); + if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); + } + if( p->bHasStat ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); + } + return rc; +} + +/* +** +*/ +static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ + int iLangid = 0; + if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); + return iLangid; +} + +/* +** The first element in the apVal[] array is assumed to contain the docid +** (an integer) of a row about to be deleted. Remove all terms from the +** full-text index. +*/ +static void fts3DeleteTerms( + int *pRC, /* Result code */ + Fts3Table *p, /* The FTS table to delete from */ + sqlite3_value *pRowid, /* The docid to be deleted */ + u32 *aSz, /* Sizes of deleted document written here */ + int *pbFound /* OUT: Set to true if row really does exist */ +){ + int rc; + sqlite3_stmt *pSelect; + + assert( *pbFound==0 ); + if( *pRC ) return; + rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pSelect) ){ + int i; + int iLangid = langidFromSelect(p, pSelect); + i64 iDocid = sqlite3_column_int64(pSelect, 0); + rc = fts3PendingTermsDocid(p, 1, iLangid, iDocid); + for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ + int iCol = i-1; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pSelect, i); + rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + } + } + if( rc!=SQLITE_OK ){ + sqlite3_reset(pSelect); + *pRC = rc; + return; + } + *pbFound = 1; + } + rc = sqlite3_reset(pSelect); + }else{ + sqlite3_reset(pSelect); + } + *pRC = rc; +} + +/* +** Forward declaration to account for the circular dependency between +** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). +*/ +static int fts3SegmentMerge(Fts3Table *, int, int, int); + +/* +** This function allocates a new level iLevel index in the segdir table. +** Usually, indexes are allocated within a level sequentially starting +** with 0, so the allocated index is one greater than the value returned +** by: +** +** SELECT max(idx) FROM %_segdir WHERE level = :iLevel +** +** However, if there are already FTS3_MERGE_COUNT indexes at the requested +** level, they are merged into a single level (iLevel+1) segment and the +** allocated index is 0. +** +** If successful, *piIdx is set to the allocated index slot and SQLITE_OK +** returned. Otherwise, an SQLite error code is returned. +*/ +static int fts3AllocateSegdirIdx( + Fts3Table *p, + int iLangid, /* Language id */ + int iIndex, /* Index for p->aIndex */ + int iLevel, + int *piIdx +){ + int rc; /* Return Code */ + sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ + int iNext = 0; /* Result of query pNextIdx */ + + assert( iLangid>=0 ); + assert( p->nIndex>=1 ); + + /* Set variable iNext to the next available segdir index at level iLevel. */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64( + pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); + if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ + iNext = sqlite3_column_int(pNextIdx, 0); + } + rc = sqlite3_reset(pNextIdx); + } + + if( rc==SQLITE_OK ){ + /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already + ** full, merge all segments in level iLevel into a single iLevel+1 + ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, + ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. + */ + if( iNext>=MergeCount(p) ){ + fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); + rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); + *piIdx = 0; + }else{ + *piIdx = iNext; + } + } + + return rc; +} + +/* +** The %_segments table is declared as follows: +** +** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) +** +** This function reads data from a single row of the %_segments table. The +** specific row is identified by the iBlockid parameter. If paBlob is not +** NULL, then a buffer is allocated using sqlite3_malloc() and populated +** with the contents of the blob stored in the "block" column of the +** identified table row is. Whether or not paBlob is NULL, *pnBlob is set +** to the size of the blob in bytes before returning. +** +** If an error occurs, or the table does not contain the specified row, +** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If +** paBlob is non-NULL, then it is the responsibility of the caller to +** eventually free the returned buffer. +** +** This function may leave an open sqlite3_blob* handle in the +** Fts3Table.pSegments variable. This handle is reused by subsequent calls +** to this function. The handle may be closed by calling the +** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy +** performance improvement, but the blob handle should always be closed +** before control is returned to the user (to prevent a lock being held +** on the database file for longer than necessary). Thus, any virtual table +** method (xFilter etc.) that may directly or indirectly call this function +** must call sqlite3Fts3SegmentsClose() before returning. +*/ +SQLITE_PRIVATE int sqlite3Fts3ReadBlock( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ + char **paBlob, /* OUT: Blob data in malloc'd buffer */ + int *pnBlob, /* OUT: Size of blob data */ + int *pnLoad /* OUT: Bytes actually loaded */ +){ + int rc; /* Return code */ + + /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ + assert( pnBlob ); + + if( p->pSegments ){ + rc = sqlite3_blob_reopen(p->pSegments, iBlockid); + }else{ + if( 0==p->zSegmentsTbl ){ + p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); + if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; + } + rc = sqlite3_blob_open( + p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments + ); + } + + if( rc==SQLITE_OK ){ + int nByte = sqlite3_blob_bytes(p->pSegments); + *pnBlob = nByte; + if( paBlob ){ + char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); + if( !aByte ){ + rc = SQLITE_NOMEM; + }else{ + if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ + nByte = FTS3_NODE_CHUNKSIZE; + *pnLoad = nByte; + } + rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); + memset(&aByte[nByte], 0, FTS3_NODE_PADDING); + if( rc!=SQLITE_OK ){ + sqlite3_free(aByte); + aByte = 0; + } + } + *paBlob = aByte; + } + }else if( rc==SQLITE_ERROR ){ + rc = FTS_CORRUPT_VTAB; + } + + return rc; +} + +/* +** Close the blob handle at p->pSegments, if it is open. See comments above +** the sqlite3Fts3ReadBlock() function for details. +*/ +SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ + sqlite3_blob_close(p->pSegments); + p->pSegments = 0; +} + +static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ + int nRead; /* Number of bytes to read */ + int rc; /* Return code */ + + nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); + rc = sqlite3_blob_read( + pReader->pBlob, + &pReader->aNode[pReader->nPopulate], + nRead, + pReader->nPopulate + ); + + if( rc==SQLITE_OK ){ + pReader->nPopulate += nRead; + memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); + if( pReader->nPopulate==pReader->nNode ){ + sqlite3_blob_close(pReader->pBlob); + pReader->pBlob = 0; + pReader->nPopulate = 0; + } + } + return rc; +} + +static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ + int rc = SQLITE_OK; + assert( !pReader->pBlob + || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) + ); + while( pReader->pBlob && rc==SQLITE_OK + && (pFrom - pReader->aNode + nByte)>pReader->nPopulate + ){ + rc = fts3SegReaderIncrRead(pReader); + } + return rc; +} + +/* +** Set an Fts3SegReader cursor to point at EOF. +*/ +static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ + if( !fts3SegReaderIsRootOnly(pSeg) ){ + sqlite3_free(pSeg->aNode); + sqlite3_blob_close(pSeg->pBlob); + pSeg->pBlob = 0; + } + pSeg->aNode = 0; +} + +/* +** Move the iterator passed as the first argument to the next term in the +** segment. If successful, SQLITE_OK is returned. If there is no next term, +** SQLITE_DONE. Otherwise, an SQLite error code. +*/ +static int fts3SegReaderNext( + Fts3Table *p, + Fts3SegReader *pReader, + int bIncr +){ + int rc; /* Return code of various sub-routines */ + char *pNext; /* Cursor variable */ + int nPrefix; /* Number of bytes in term prefix */ + int nSuffix; /* Number of bytes in term suffix */ + + if( !pReader->aDoclist ){ + pNext = pReader->aNode; + }else{ + pNext = &pReader->aDoclist[pReader->nDoclist]; + } + + if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ + + if( fts3SegReaderIsPending(pReader) ){ + Fts3HashElem *pElem = *(pReader->ppNextElem); + sqlite3_free(pReader->aNode); + pReader->aNode = 0; + if( pElem ){ + char *aCopy; + PendingList *pList = (PendingList *)fts3HashData(pElem); + int nCopy = pList->nData+1; + + int nTerm = fts3HashKeysize(pElem); + if( (nTerm+1)>pReader->nTermAlloc ){ + sqlite3_free(pReader->zTerm); + pReader->zTerm = (char*)sqlite3_malloc((nTerm+1)*2); + if( !pReader->zTerm ) return SQLITE_NOMEM; + pReader->nTermAlloc = (nTerm+1)*2; + } + memcpy(pReader->zTerm, fts3HashKey(pElem), nTerm); + pReader->zTerm[nTerm] = '\0'; + pReader->nTerm = nTerm; + + aCopy = (char*)sqlite3_malloc(nCopy); + if( !aCopy ) return SQLITE_NOMEM; + memcpy(aCopy, pList->aData, nCopy); + pReader->nNode = pReader->nDoclist = nCopy; + pReader->aNode = pReader->aDoclist = aCopy; + pReader->ppNextElem++; + assert( pReader->aNode ); + } + return SQLITE_OK; + } + + fts3SegReaderSetEof(pReader); + + /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf + ** blocks have already been traversed. */ +#ifdef CORRUPT_DB + assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock || CORRUPT_DB ); +#endif + if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ + return SQLITE_OK; + } + + rc = sqlite3Fts3ReadBlock( + p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, + (bIncr ? &pReader->nPopulate : 0) + ); + if( rc!=SQLITE_OK ) return rc; + assert( pReader->pBlob==0 ); + if( bIncr && pReader->nPopulatenNode ){ + pReader->pBlob = p->pSegments; + p->pSegments = 0; + } + pNext = pReader->aNode; + } + + assert( !fts3SegReaderIsPending(pReader) ); + + rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); + if( rc!=SQLITE_OK ) return rc; + + /* Because of the FTS3_NODE_PADDING bytes of padding, the following is + ** safe (no risk of overread) even if the node data is corrupted. */ + pNext += fts3GetVarint32(pNext, &nPrefix); + pNext += fts3GetVarint32(pNext, &nSuffix); + if( nSuffix<=0 + || (&pReader->aNode[pReader->nNode] - pNext)pReader->nTerm + ){ + return FTS_CORRUPT_VTAB; + } + + /* Both nPrefix and nSuffix were read by fts3GetVarint32() and so are + ** between 0 and 0x7FFFFFFF. But the sum of the two may cause integer + ** overflow - hence the (i64) casts. */ + if( (i64)nPrefix+nSuffix>(i64)pReader->nTermAlloc ){ + i64 nNew = ((i64)nPrefix+nSuffix)*2; + char *zNew = sqlite3_realloc64(pReader->zTerm, nNew); + if( !zNew ){ + return SQLITE_NOMEM; + } + pReader->zTerm = zNew; + pReader->nTermAlloc = nNew; + } + + rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); + if( rc!=SQLITE_OK ) return rc; + + memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); + pReader->nTerm = nPrefix+nSuffix; + pNext += nSuffix; + pNext += fts3GetVarint32(pNext, &pReader->nDoclist); + pReader->aDoclist = pNext; + pReader->pOffsetList = 0; + + /* Check that the doclist does not appear to extend past the end of the + ** b-tree node. And that the final byte of the doclist is 0x00. If either + ** of these statements is untrue, then the data structure is corrupt. + */ + if( pReader->nDoclist > pReader->nNode-(pReader->aDoclist-pReader->aNode) + || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) + || pReader->nDoclist==0 + ){ + return FTS_CORRUPT_VTAB; + } + return SQLITE_OK; +} + +/* +** Set the SegReader to point to the first docid in the doclist associated +** with the current term. +*/ +static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ + int rc = SQLITE_OK; + assert( pReader->aDoclist ); + assert( !pReader->pOffsetList ); + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + u8 bEof = 0; + pReader->iDocid = 0; + pReader->nOffsetList = 0; + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, + &pReader->iDocid, &pReader->nOffsetList, &bEof + ); + }else{ + rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); + pReader->pOffsetList = &pReader->aDoclist[n]; + } + } + return rc; +} + +/* +** Advance the SegReader to point to the next docid in the doclist +** associated with the current term. +** +** If arguments ppOffsetList and pnOffsetList are not NULL, then +** *ppOffsetList is set to point to the first column-offset list +** in the doclist entry (i.e. immediately past the docid varint). +** *pnOffsetList is set to the length of the set of column-offset +** lists, not including the nul-terminator byte. For example: +*/ +static int fts3SegReaderNextDocid( + Fts3Table *pTab, + Fts3SegReader *pReader, /* Reader to advance to next docid */ + char **ppOffsetList, /* OUT: Pointer to current position-list */ + int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ +){ + int rc = SQLITE_OK; + char *p = pReader->pOffsetList; + char c = 0; + + assert( p ); + + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + /* A pending-terms seg-reader for an FTS4 table that uses order=desc. + ** Pending-terms doclists are always built up in ascending order, so + ** we have to iterate through them backwards here. */ + u8 bEof = 0; + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = pReader->nOffsetList - 1; + } + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, + &pReader->nOffsetList, &bEof + ); + if( bEof ){ + pReader->pOffsetList = 0; + }else{ + pReader->pOffsetList = p; + } + }else{ + char *pEnd = &pReader->aDoclist[pReader->nDoclist]; + + /* Pointer p currently points at the first byte of an offset list. The + ** following block advances it to point one byte past the end of + ** the same offset list. */ + while( 1 ){ + + /* The following line of code (and the "p++" below the while() loop) is + ** normally all that is required to move pointer p to the desired + ** position. The exception is if this node is being loaded from disk + ** incrementally and pointer "p" now points to the first byte past + ** the populated part of pReader->aNode[]. + */ + while( *p | c ) c = *p++ & 0x80; + assert( *p==0 ); + + if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; + rc = fts3SegReaderIncrRead(pReader); + if( rc!=SQLITE_OK ) return rc; + } + p++; + + /* If required, populate the output variables with a pointer to and the + ** size of the previous offset-list. + */ + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = (int)(p - pReader->pOffsetList - 1); + } + + /* List may have been edited in place by fts3EvalNearTrim() */ + while( p=pEnd ){ + pReader->pOffsetList = 0; + }else{ + rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + u64 iDelta; + pReader->pOffsetList = p + sqlite3Fts3GetVarintU(p, &iDelta); + if( pTab->bDescIdx ){ + pReader->iDocid = (i64)((u64)pReader->iDocid - iDelta); + }else{ + pReader->iDocid = (i64)((u64)pReader->iDocid + iDelta); + } + } + } + } + + return rc; +} + + +SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( + Fts3Cursor *pCsr, + Fts3MultiSegReader *pMsr, + int *pnOvfl +){ + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + int nOvfl = 0; + int ii; + int rc = SQLITE_OK; + int pgsz = p->nPgsz; + + assert( p->bFts4 ); + assert( pgsz>0 ); + + for(ii=0; rc==SQLITE_OK && iinSegment; ii++){ + Fts3SegReader *pReader = pMsr->apSegment[ii]; + if( !fts3SegReaderIsPending(pReader) + && !fts3SegReaderIsRootOnly(pReader) + ){ + sqlite3_int64 jj; + for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ + int nBlob; + rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); + if( rc!=SQLITE_OK ) break; + if( (nBlob+35)>pgsz ){ + nOvfl += (nBlob + 34)/pgsz; + } + } + } + } + *pnOvfl = nOvfl; + return rc; +} + +/* +** Free all allocations associated with the iterator passed as the +** second argument. +*/ +SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ + if( pReader ){ + sqlite3_free(pReader->zTerm); + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); + } + sqlite3_blob_close(pReader->pBlob); + } + sqlite3_free(pReader); +} + +/* +** Allocate a new SegReader object. +*/ +SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( + int iAge, /* Segment "age". */ + int bLookup, /* True for a lookup only */ + sqlite3_int64 iStartLeaf, /* First leaf to traverse */ + sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ + sqlite3_int64 iEndBlock, /* Final block of segment */ + const char *zRoot, /* Buffer containing root node */ + int nRoot, /* Size of buffer containing root node */ + Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ +){ + Fts3SegReader *pReader; /* Newly allocated SegReader object */ + int nExtra = 0; /* Bytes to allocate segment root node */ + + assert( zRoot!=0 || nRoot==0 ); +#ifdef CORRUPT_DB + assert( zRoot!=0 || CORRUPT_DB ); +#endif + + if( iStartLeaf==0 ){ + if( iEndLeaf!=0 ) return FTS_CORRUPT_VTAB; + nExtra = nRoot + FTS3_NODE_PADDING; + } + + pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); + if( !pReader ){ + return SQLITE_NOMEM; + } + memset(pReader, 0, sizeof(Fts3SegReader)); + pReader->iIdx = iAge; + pReader->bLookup = bLookup!=0; + pReader->iStartBlock = iStartLeaf; + pReader->iLeafEndBlock = iEndLeaf; + pReader->iEndBlock = iEndBlock; + + if( nExtra ){ + /* The entire segment is stored in the root node. */ + pReader->aNode = (char *)&pReader[1]; + pReader->rootOnly = 1; + pReader->nNode = nRoot; + if( nRoot ) memcpy(pReader->aNode, zRoot, nRoot); + memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); + }else{ + pReader->iCurrentBlock = iStartLeaf-1; + } + *ppReader = pReader; + return SQLITE_OK; +} + +/* +** This is a comparison function used as a qsort() callback when sorting +** an array of pending terms by term. This occurs as part of flushing +** the contents of the pending-terms hash table to the database. +*/ +static int SQLITE_CDECL fts3CompareElemByTerm( + const void *lhs, + const void *rhs +){ + char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); + char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); + int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); + int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); + + int n = (n1aIndex */ + const char *zTerm, /* Term to search for */ + int nTerm, /* Size of buffer zTerm */ + int bPrefix, /* True for a prefix iterator */ + Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ +){ + Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ + Fts3HashElem *pE; /* Iterator variable */ + Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ + int nElem = 0; /* Size of array at aElem */ + int rc = SQLITE_OK; /* Return Code */ + Fts3Hash *pHash; + + pHash = &p->aIndex[iIndex].hPending; + if( bPrefix ){ + int nAlloc = 0; /* Size of allocated array at aElem */ + + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ + char *zKey = (char *)fts3HashKey(pE); + int nKey = fts3HashKeysize(pE); + if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ + if( nElem==nAlloc ){ + Fts3HashElem **aElem2; + nAlloc += 16; + aElem2 = (Fts3HashElem **)sqlite3_realloc( + aElem, nAlloc*sizeof(Fts3HashElem *) + ); + if( !aElem2 ){ + rc = SQLITE_NOMEM; + nElem = 0; + break; + } + aElem = aElem2; + } + + aElem[nElem++] = pE; + } + } + + /* If more than one term matches the prefix, sort the Fts3HashElem + ** objects in term order using qsort(). This uses the same comparison + ** callback as is used when flushing terms to disk. + */ + if( nElem>1 ){ + qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); + } + + }else{ + /* The query is a simple term lookup that matches at most one term in + ** the index. All that is required is a straight hash-lookup. + ** + ** Because the stack address of pE may be accessed via the aElem pointer + ** below, the "Fts3HashElem *pE" must be declared so that it is valid + ** within this entire function, not just this "else{...}" block. + */ + pE = fts3HashFindElem(pHash, zTerm, nTerm); + if( pE ){ + aElem = &pE; + nElem = 1; + } + } + + if( nElem>0 ){ + sqlite3_int64 nByte; + nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); + pReader = (Fts3SegReader *)sqlite3_malloc64(nByte); + if( !pReader ){ + rc = SQLITE_NOMEM; + }else{ + memset(pReader, 0, nByte); + pReader->iIdx = 0x7FFFFFFF; + pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; + memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); + } + } + + if( bPrefix ){ + sqlite3_free(aElem); + } + *ppReader = pReader; + return rc; +} + +/* +** Compare the entries pointed to by two Fts3SegReader structures. +** Comparison is as follows: +** +** 1) EOF is greater than not EOF. +** +** 2) The current terms (if any) are compared using memcmp(). If one +** term is a prefix of another, the longer term is considered the +** larger. +** +** 3) By segment age. An older segment is considered larger. +*/ +static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc; + if( pLhs->aNode && pRhs->aNode ){ + int rc2 = pLhs->nTerm - pRhs->nTerm; + if( rc2<0 ){ + rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); + }else{ + rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); + } + if( rc==0 ){ + rc = rc2; + } + }else{ + rc = (pLhs->aNode==0) - (pRhs->aNode==0); + } + if( rc==0 ){ + rc = pRhs->iIdx - pLhs->iIdx; + } + assert_fts3_nc( rc!=0 ); + return rc; +} + +/* +** A different comparison function for SegReader structures. In this +** version, it is assumed that each SegReader points to an entry in +** a doclist for identical terms. Comparison is made as follows: +** +** 1) EOF (end of doclist in this case) is greater than not EOF. +** +** 2) By current docid. +** +** 3) By segment age. An older segment is considered larger. +*/ +static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; + } + } + assert( pLhs->aNode && pRhs->aNode ); + return rc; +} +static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; + } + } + assert( pLhs->aNode && pRhs->aNode ); + return rc; +} + +/* +** Compare the term that the Fts3SegReader object passed as the first argument +** points to with the term specified by arguments zTerm and nTerm. +** +** If the pSeg iterator is already at EOF, return 0. Otherwise, return +** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are +** equal, or +ve if the pSeg term is greater than zTerm/nTerm. +*/ +static int fts3SegReaderTermCmp( + Fts3SegReader *pSeg, /* Segment reader object */ + const char *zTerm, /* Term to compare to */ + int nTerm /* Size of term zTerm in bytes */ +){ + int res = 0; + if( pSeg->aNode ){ + if( pSeg->nTerm>nTerm ){ + res = memcmp(pSeg->zTerm, zTerm, nTerm); + }else{ + res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); + } + if( res==0 ){ + res = pSeg->nTerm-nTerm; + } + } + return res; +} + +/* +** Argument apSegment is an array of nSegment elements. It is known that +** the final (nSegment-nSuspect) members are already in sorted order +** (according to the comparison function provided). This function shuffles +** the array around until all entries are in sorted order. +*/ +static void fts3SegReaderSort( + Fts3SegReader **apSegment, /* Array to sort entries of */ + int nSegment, /* Size of apSegment array */ + int nSuspect, /* Unsorted entry count */ + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ +){ + int i; /* Iterator variable */ + + assert( nSuspect<=nSegment ); + + if( nSuspect==nSegment ) nSuspect--; + for(i=nSuspect-1; i>=0; i--){ + int j; + for(j=i; j<(nSegment-1); j++){ + Fts3SegReader *pTmp; + if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; + pTmp = apSegment[j+1]; + apSegment[j+1] = apSegment[j]; + apSegment[j] = pTmp; + } + } + +#ifndef NDEBUG + /* Check that the list really is sorted now. */ + for(i=0; i<(nSuspect-1); i++){ + assert( xCmp(apSegment[i], apSegment[i+1])<0 ); + } +#endif +} + +/* +** Insert a record into the %_segments table. +*/ +static int fts3WriteSegment( + Fts3Table *p, /* Virtual table handle */ + sqlite3_int64 iBlock, /* Block id for new block */ + char *z, /* Pointer to buffer containing block data */ + int n /* Size of buffer z in bytes */ +){ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iBlock); + sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + sqlite3_bind_null(pStmt, 2); + } + return rc; +} + +/* +** Find the largest relative level number in the table. If successful, set +** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, +** set *pnMax to zero and return an SQLite error code. +*/ +SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ + int rc; + int mxLevel = 0; + sqlite3_stmt *pStmt = 0; + + rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + mxLevel = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_reset(pStmt); + } + *pnMax = mxLevel; + return rc; +} + +/* +** Insert a record into the %_segdir table. +*/ +static int fts3WriteSegdir( + Fts3Table *p, /* Virtual table handle */ + sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ + int iIdx, /* Value for "idx" field */ + sqlite3_int64 iStartBlock, /* Value for "start_block" field */ + sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ + sqlite3_int64 iEndBlock, /* Value for "end_block" field */ + sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ + char *zRoot, /* Blob value for "root" field */ + int nRoot /* Number of bytes in buffer zRoot */ +){ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iLevel); + sqlite3_bind_int(pStmt, 2, iIdx); + sqlite3_bind_int64(pStmt, 3, iStartBlock); + sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); + if( nLeafData==0 ){ + sqlite3_bind_int64(pStmt, 5, iEndBlock); + }else{ + char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); + if( !zEnd ) return SQLITE_NOMEM; + sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + } + sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + sqlite3_bind_null(pStmt, 6); + } + return rc; +} + +/* +** Return the size of the common prefix (if any) shared by zPrev and +** zNext, in bytes. For example, +** +** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 +** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 +** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 +*/ +static int fts3PrefixCompress( + const char *zPrev, /* Buffer containing previous term */ + int nPrev, /* Size of buffer zPrev in bytes */ + const char *zNext, /* Buffer containing next term */ + int nNext /* Size of buffer zNext in bytes */ +){ + int n; + for(n=0; nnData; /* Current size of node in bytes */ + int nReq = nData; /* Required space after adding zTerm */ + int nPrefix; /* Number of bytes of prefix compression */ + int nSuffix; /* Suffix length */ + + nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); + nSuffix = nTerm-nPrefix; + + /* If nSuffix is zero or less, then zTerm/nTerm must be a prefix of + ** pWriter->zTerm/pWriter->nTerm. i.e. must be equal to or less than when + ** compared with BINARY collation. This indicates corruption. */ + if( nSuffix<=0 ) return FTS_CORRUPT_VTAB; + + nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; + if( nReq<=p->nNodeSize || !pTree->zTerm ){ + + if( nReq>p->nNodeSize ){ + /* An unusual case: this is the first term to be added to the node + ** and the static node buffer (p->nNodeSize bytes) is not large + ** enough. Use a separately malloced buffer instead This wastes + ** p->nNodeSize bytes, but since this scenario only comes about when + ** the database contain two terms that share a prefix of almost 2KB, + ** this is not expected to be a serious problem. + */ + assert( pTree->aData==(char *)&pTree[1] ); + pTree->aData = (char *)sqlite3_malloc(nReq); + if( !pTree->aData ){ + return SQLITE_NOMEM; + } + } + + if( pTree->zTerm ){ + /* There is no prefix-length field for first term in a node */ + nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); + } + + nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); + memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); + pTree->nData = nData + nSuffix; + pTree->nEntry++; + + if( isCopyTerm ){ + if( pTree->nMalloczMalloc, nTerm*2); + if( !zNew ){ + return SQLITE_NOMEM; + } + pTree->nMalloc = nTerm*2; + pTree->zMalloc = zNew; + } + pTree->zTerm = pTree->zMalloc; + memcpy(pTree->zTerm, zTerm, nTerm); + pTree->nTerm = nTerm; + }else{ + pTree->zTerm = (char *)zTerm; + pTree->nTerm = nTerm; + } + return SQLITE_OK; + } + } + + /* If control flows to here, it was not possible to append zTerm to the + ** current node. Create a new node (a right-sibling of the current node). + ** If this is the first node in the tree, the term is added to it. + ** + ** Otherwise, the term is not added to the new node, it is left empty for + ** now. Instead, the term is inserted into the parent of pTree. If pTree + ** has no parent, one is created here. + */ + pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); + if( !pNew ){ + return SQLITE_NOMEM; + } + memset(pNew, 0, sizeof(SegmentNode)); + pNew->nData = 1 + FTS3_VARINT_MAX; + pNew->aData = (char *)&pNew[1]; + + if( pTree ){ + SegmentNode *pParent = pTree->pParent; + rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); + if( pTree->pParent==0 ){ + pTree->pParent = pParent; + } + pTree->pRight = pNew; + pNew->pLeftmost = pTree->pLeftmost; + pNew->pParent = pParent; + pNew->zMalloc = pTree->zMalloc; + pNew->nMalloc = pTree->nMalloc; + pTree->zMalloc = 0; + }else{ + pNew->pLeftmost = pNew; + rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); + } + + *ppTree = pNew; + return rc; +} + +/* +** Helper function for fts3NodeWrite(). +*/ +static int fts3TreeFinishNode( + SegmentNode *pTree, + int iHeight, + sqlite3_int64 iLeftChild +){ + int nStart; + assert( iHeight>=1 && iHeight<128 ); + nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); + pTree->aData[nStart] = (char)iHeight; + sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); + return nStart; +} + +/* +** Write the buffer for the segment node pTree and all of its peers to the +** database. Then call this function recursively to write the parent of +** pTree and its peers to the database. +** +** Except, if pTree is a root node, do not write it to the database. Instead, +** set output variables *paRoot and *pnRoot to contain the root node. +** +** If successful, SQLITE_OK is returned and output variable *piLast is +** set to the largest blockid written to the database (or zero if no +** blocks were written to the db). Otherwise, an SQLite error code is +** returned. +*/ +static int fts3NodeWrite( + Fts3Table *p, /* Virtual table handle */ + SegmentNode *pTree, /* SegmentNode handle */ + int iHeight, /* Height of this node in tree */ + sqlite3_int64 iLeaf, /* Block id of first leaf node */ + sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ + sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ + char **paRoot, /* OUT: Data for root node */ + int *pnRoot /* OUT: Size of root node in bytes */ +){ + int rc = SQLITE_OK; + + if( !pTree->pParent ){ + /* Root node of the tree. */ + int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); + *piLast = iFree-1; + *pnRoot = pTree->nData - nStart; + *paRoot = &pTree->aData[nStart]; + }else{ + SegmentNode *pIter; + sqlite3_int64 iNextFree = iFree; + sqlite3_int64 iNextLeaf = iLeaf; + for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ + int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); + int nWrite = pIter->nData - nStart; + + rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); + iNextFree++; + iNextLeaf += (pIter->nEntry+1); + } + if( rc==SQLITE_OK ){ + assert( iNextLeaf==iFree ); + rc = fts3NodeWrite( + p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot + ); + } + } + + return rc; +} + +/* +** Free all memory allocations associated with the tree pTree. +*/ +static void fts3NodeFree(SegmentNode *pTree){ + if( pTree ){ + SegmentNode *p = pTree->pLeftmost; + fts3NodeFree(p->pParent); + while( p ){ + SegmentNode *pRight = p->pRight; + if( p->aData!=(char *)&p[1] ){ + sqlite3_free(p->aData); + } + assert( pRight==0 || p->zMalloc==0 ); + sqlite3_free(p->zMalloc); + sqlite3_free(p); + p = pRight; + } + } +} + +/* +** Add a term to the segment being constructed by the SegmentWriter object +** *ppWriter. When adding the first term to a segment, *ppWriter should +** be passed NULL. This function will allocate a new SegmentWriter object +** and return it via the input/output variable *ppWriter in this case. +** +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. +*/ +static int fts3SegWriterAdd( + Fts3Table *p, /* Virtual table handle */ + SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ + int isCopyTerm, /* True if buffer zTerm must be copied */ + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of term in bytes */ + const char *aDoclist, /* Pointer to buffer containing doclist */ + int nDoclist /* Size of doclist in bytes */ +){ + int nPrefix; /* Size of term prefix in bytes */ + int nSuffix; /* Size of term suffix in bytes */ + int nReq; /* Number of bytes required on leaf page */ + int nData; + SegmentWriter *pWriter = *ppWriter; + + if( !pWriter ){ + int rc; + sqlite3_stmt *pStmt; + + /* Allocate the SegmentWriter structure */ + pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); + if( !pWriter ) return SQLITE_NOMEM; + memset(pWriter, 0, sizeof(SegmentWriter)); + *ppWriter = pWriter; + + /* Allocate a buffer in which to accumulate data */ + pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); + if( !pWriter->aData ) return SQLITE_NOMEM; + pWriter->nSize = p->nNodeSize; + + /* Find the next free blockid in the %_segments table */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + pWriter->iFree = sqlite3_column_int64(pStmt, 0); + pWriter->iFirst = pWriter->iFree; + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; + } + nData = pWriter->nData; + + nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); + nSuffix = nTerm-nPrefix; + + /* If nSuffix is zero or less, then zTerm/nTerm must be a prefix of + ** pWriter->zTerm/pWriter->nTerm. i.e. must be equal to or less than when + ** compared with BINARY collation. This indicates corruption. */ + if( nSuffix<=0 ) return FTS_CORRUPT_VTAB; + + /* Figure out how many bytes are required by this new entry */ + nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ + sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ + nSuffix + /* Term suffix */ + sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ + nDoclist; /* Doclist data */ + + if( nData>0 && nData+nReq>p->nNodeSize ){ + int rc; + + /* The current leaf node is full. Write it out to the database. */ + if( pWriter->iFree==LARGEST_INT64 ) return FTS_CORRUPT_VTAB; + rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); + if( rc!=SQLITE_OK ) return rc; + p->nLeafAdd++; + + /* Add the current term to the interior node tree. The term added to + ** the interior tree must: + ** + ** a) be greater than the largest term on the leaf node just written + ** to the database (still available in pWriter->zTerm), and + ** + ** b) be less than or equal to the term about to be added to the new + ** leaf node (zTerm/nTerm). + ** + ** In other words, it must be the prefix of zTerm 1 byte longer than + ** the common prefix (if any) of zTerm and pWriter->zTerm. + */ + assert( nPrefixpTree, isCopyTerm, zTerm, nPrefix+1); + if( rc!=SQLITE_OK ) return rc; + + nData = 0; + pWriter->nTerm = 0; + + nPrefix = 0; + nSuffix = nTerm; + nReq = 1 + /* varint containing prefix size */ + sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ + nTerm + /* Term suffix */ + sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ + nDoclist; /* Doclist data */ + } + + /* Increase the total number of bytes written to account for the new entry. */ + pWriter->nLeafData += nReq; + + /* If the buffer currently allocated is too small for this entry, realloc + ** the buffer to make it large enough. + */ + if( nReq>pWriter->nSize ){ + char *aNew = sqlite3_realloc(pWriter->aData, nReq); + if( !aNew ) return SQLITE_NOMEM; + pWriter->aData = aNew; + pWriter->nSize = nReq; + } + assert( nData+nReq<=pWriter->nSize ); + + /* Append the prefix-compressed term and doclist to the buffer. */ + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); + assert( nSuffix>0 ); + memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); + nData += nSuffix; + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); + assert( nDoclist>0 ); + memcpy(&pWriter->aData[nData], aDoclist, nDoclist); + pWriter->nData = nData + nDoclist; + + /* Save the current term so that it can be used to prefix-compress the next. + ** If the isCopyTerm parameter is true, then the buffer pointed to by + ** zTerm is transient, so take a copy of the term data. Otherwise, just + ** store a copy of the pointer. + */ + if( isCopyTerm ){ + if( nTerm>pWriter->nMalloc ){ + char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); + if( !zNew ){ + return SQLITE_NOMEM; + } + pWriter->nMalloc = nTerm*2; + pWriter->zMalloc = zNew; + pWriter->zTerm = zNew; + } + assert( pWriter->zTerm==pWriter->zMalloc ); + assert( nTerm>0 ); + memcpy(pWriter->zTerm, zTerm, nTerm); + }else{ + pWriter->zTerm = (char *)zTerm; + } + pWriter->nTerm = nTerm; + + return SQLITE_OK; +} + +/* +** Flush all data associated with the SegmentWriter object pWriter to the +** database. This function must be called after all terms have been added +** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is +** returned. Otherwise, an SQLite error code. +*/ +static int fts3SegWriterFlush( + Fts3Table *p, /* Virtual table handle */ + SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ + sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ + int iIdx /* Value for 'idx' column of %_segdir */ +){ + int rc; /* Return code */ + if( pWriter->pTree ){ + sqlite3_int64 iLast = 0; /* Largest block id written to database */ + sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ + char *zRoot = NULL; /* Pointer to buffer containing root node */ + int nRoot = 0; /* Size of buffer zRoot */ + + iLastLeaf = pWriter->iFree; + rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); + if( rc==SQLITE_OK ){ + rc = fts3NodeWrite(p, pWriter->pTree, 1, + pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); + } + if( rc==SQLITE_OK ){ + rc = fts3WriteSegdir(p, iLevel, iIdx, + pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); + } + }else{ + /* The entire tree fits on the root node. Write it to the segdir table. */ + rc = fts3WriteSegdir(p, iLevel, iIdx, + 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); + } + p->nLeafAdd++; + return rc; +} + +/* +** Release all memory held by the SegmentWriter object passed as the +** first argument. +*/ +static void fts3SegWriterFree(SegmentWriter *pWriter){ + if( pWriter ){ + sqlite3_free(pWriter->aData); + sqlite3_free(pWriter->zMalloc); + fts3NodeFree(pWriter->pTree); + sqlite3_free(pWriter); + } +} + +/* +** The first value in the apVal[] array is assumed to contain an integer. +** This function tests if there exist any documents with docid values that +** are different from that integer. i.e. if deleting the document with docid +** pRowid would mean the FTS3 table were empty. +** +** If successful, *pisEmpty is set to true if the table is empty except for +** document pRowid, or false otherwise, and SQLITE_OK is returned. If an +** error occurs, an SQLite error code is returned. +*/ +static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ + sqlite3_stmt *pStmt; + int rc; + if( p->zContentTbl ){ + /* If using the content=xxx option, assume the table is never empty */ + *pisEmpty = 0; + rc = SQLITE_OK; + }else{ + rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pisEmpty = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_reset(pStmt); + } + } + return rc; +} + +/* +** Set *pnMax to the largest segment level in the database for the index +** iIndex. +** +** Segment levels are stored in the 'level' column of the %_segdir table. +** +** Return SQLITE_OK if successful, or an SQLite error code if not. +*/ +static int fts3SegmentMaxLevel( + Fts3Table *p, + int iLangid, + int iIndex, + sqlite3_int64 *pnMax +){ + sqlite3_stmt *pStmt; + int rc; + assert( iIndex>=0 && iIndexnIndex ); + + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pnMax = sqlite3_column_int64(pStmt, 0); + } + return sqlite3_reset(pStmt); +} + +/* +** iAbsLevel is an absolute level that may be assumed to exist within +** the database. This function checks if it is the largest level number +** within its index. Assuming no error occurs, *pbMax is set to 1 if +** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK +** is returned. If an error occurs, an error code is returned and the +** final value of *pbMax is undefined. +*/ +static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ + + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); + sqlite3_bind_int64(pStmt, 2, + (((u64)iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL + ); + + *pbMax = 0; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; + } + return sqlite3_reset(pStmt); +} + +/* +** Delete all entries in the %_segments table associated with the segment +** opened with seg-reader pSeg. This function does not affect the contents +** of the %_segdir table. +*/ +static int fts3DeleteSegment( + Fts3Table *p, /* FTS table handle */ + Fts3SegReader *pSeg /* Segment to delete */ +){ + int rc = SQLITE_OK; /* Return code */ + if( pSeg->iStartBlock ){ + sqlite3_stmt *pDelete; /* SQL statement to delete rows */ + rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); + sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } + } + return rc; +} + +/* +** This function is used after merging multiple segments into a single large +** segment to delete the old, now redundant, segment b-trees. Specifically, +** it: +** +** 1) Deletes all %_segments entries for the segments associated with +** each of the SegReader objects in the array passed as the third +** argument, and +** +** 2) deletes all %_segdir entries with level iLevel, or all %_segdir +** entries regardless of level if (iLevel<0). +** +** SQLITE_OK is returned if successful, otherwise an SQLite error code. +*/ +static int fts3DeleteSegdir( + Fts3Table *p, /* Virtual table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index for p->aIndex */ + int iLevel, /* Level of %_segdir entries to delete */ + Fts3SegReader **apSegment, /* Array of SegReader objects */ + int nReader /* Size of array apSegment */ +){ + int rc = SQLITE_OK; /* Return Code */ + int i; /* Iterator variable */ + sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ + + for(i=0; rc==SQLITE_OK && i=0 || iLevel==FTS3_SEGCURSOR_ALL ); + if( iLevel==FTS3_SEGCURSOR_ALL ){ + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pDelete, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + } + }else{ + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64( + pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); + } + } + + if( rc==SQLITE_OK ){ + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } + + return rc; +} + +/* +** When this function is called, buffer *ppList (size *pnList bytes) contains +** a position list that may (or may not) feature multiple columns. This +** function adjusts the pointer *ppList and the length *pnList so that they +** identify the subset of the position list that corresponds to column iCol. +** +** If there are no entries in the input position list for column iCol, then +** *pnList is set to zero before returning. +** +** If parameter bZero is non-zero, then any part of the input list following +** the end of the output list is zeroed before returning. +*/ +static void fts3ColumnFilter( + int iCol, /* Column to filter on */ + int bZero, /* Zero out anything following *ppList */ + char **ppList, /* IN/OUT: Pointer to position list */ + int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ +){ + char *pList = *ppList; + int nList = *pnList; + char *pEnd = &pList[nList]; + int iCurrent = 0; + char *p = pList; + + assert( iCol>=0 ); + while( 1 ){ + char c = 0; + while( p0){ + memset(&pList[nList], 0, pEnd - &pList[nList]); + } + *ppList = pList; + *pnList = nList; +} + +/* +** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any +** existing data). Grow the buffer if required. +** +** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered +** trying to resize the buffer, return SQLITE_NOMEM. +*/ +static int fts3MsrBufferData( + Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ + char *pList, + int nList +){ + if( nList>pMsr->nBuffer ){ + char *pNew; + pMsr->nBuffer = nList*2; + pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); + if( !pNew ) return SQLITE_NOMEM; + pMsr->aBuffer = pNew; + } + + assert( nList>0 ); + memcpy(pMsr->aBuffer, pList, nList); + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ + sqlite3_int64 *piDocid, /* OUT: Docid value */ + char **paPoslist, /* OUT: Pointer to position list */ + int *pnPoslist /* OUT: Size of position list in bytes */ +){ + int nMerge = pMsr->nAdvance; + Fts3SegReader **apSegment = pMsr->apSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + if( nMerge==0 ){ + *paPoslist = 0; + return SQLITE_OK; + } + + while( 1 ){ + Fts3SegReader *pSeg; + pSeg = pMsr->apSegment[0]; + + if( pSeg->pOffsetList==0 ){ + *paPoslist = 0; + break; + }else{ + int rc; + char *pList; + int nList; + int j; + sqlite3_int64 iDocid = apSegment[0]->iDocid; + + rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( rc==SQLITE_OK + && jpOffsetList + && apSegment[j]->iDocid==iDocid + ){ + rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + if( rc!=SQLITE_OK ) return rc; + fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); + + if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pMsr, pList, nList+1); + if( rc!=SQLITE_OK ) return rc; + assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); + pList = pMsr->aBuffer; + } + + if( pMsr->iColFilter>=0 ){ + fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); + } + + if( nList>0 ){ + *paPoslist = pList; + *piDocid = iDocid; + *pnPoslist = nList; + break; + } + } + } + + return SQLITE_OK; +} + +static int fts3SegReaderStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + const char *zTerm, /* Term searched for (or NULL) */ + int nTerm /* Length of zTerm in bytes */ +){ + int i; + int nSeg = pCsr->nSegment; + + /* If the Fts3SegFilter defines a specific term (or term prefix) to search + ** for, then advance each segment iterator until it points to a term of + ** equal or greater value than the specified term. This prevents many + ** unnecessary merge/sort operations for the case where single segment + ** b-tree leaf nodes contain more than one term. + */ + for(i=0; pCsr->bRestart==0 && inSegment; i++){ + int res = 0; + Fts3SegReader *pSeg = pCsr->apSegment[i]; + do { + int rc = fts3SegReaderNext(p, pSeg, 0); + if( rc!=SQLITE_OK ) return rc; + }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); + + if( pSeg->bLookup && res!=0 ){ + fts3SegReaderSetEof(pSeg); + } + } + fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); + + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + Fts3SegFilter *pFilter /* Restrictions on range of iteration */ +){ + pCsr->pFilter = pFilter; + return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); +} + +SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + int iCol, /* Column to match on. */ + const char *zTerm, /* Term to iterate through a doclist for */ + int nTerm /* Number of bytes in zTerm */ +){ + int i; + int rc; + int nSegment = pCsr->nSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + assert( pCsr->pFilter==0 ); + assert( zTerm && nTerm>0 ); + + /* Advance each segment iterator until it points to the term zTerm/nTerm. */ + rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); + if( rc!=SQLITE_OK ) return rc; + + /* Determine how many of the segments actually point to zTerm/nTerm. */ + for(i=0; iapSegment[i]; + if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ + break; + } + } + pCsr->nAdvance = i; + + /* Advance each of the segments to point to the first docid. */ + for(i=0; inAdvance; i++){ + rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); + if( rc!=SQLITE_OK ) return rc; + } + fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); + + assert( iCol<0 || iColnColumn ); + pCsr->iColFilter = iCol; + + return SQLITE_OK; +} + +/* +** This function is called on a MultiSegReader that has been started using +** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also +** have been made. Calling this function puts the MultiSegReader in such +** a state that if the next two calls are: +** +** sqlite3Fts3SegReaderStart() +** sqlite3Fts3SegReaderStep() +** +** then the entire doclist for the term is available in +** MultiSegReader.aDoclist/nDoclist. +*/ +SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ + int i; /* Used to iterate through segment-readers */ + + assert( pCsr->zTerm==0 ); + assert( pCsr->nTerm==0 ); + assert( pCsr->aDoclist==0 ); + assert( pCsr->nDoclist==0 ); + + pCsr->nAdvance = 0; + pCsr->bRestart = 1; + for(i=0; inSegment; i++){ + pCsr->apSegment[i]->pOffsetList = 0; + pCsr->apSegment[i]->nOffsetList = 0; + pCsr->apSegment[i]->iDocid = 0; + } + + return SQLITE_OK; +} + +static int fts3GrowSegReaderBuffer(Fts3MultiSegReader *pCsr, int nReq){ + if( nReq>pCsr->nBuffer ){ + char *aNew; + pCsr->nBuffer = nReq*2; + aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); + if( !aNew ){ + return SQLITE_NOMEM; + } + pCsr->aBuffer = aNew; + } + return SQLITE_OK; +} + + +SQLITE_PRIVATE int sqlite3Fts3SegReaderStep( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr /* Cursor object */ +){ + int rc = SQLITE_OK; + + int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); + int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); + int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); + int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); + int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); + int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); + + Fts3SegReader **apSegment = pCsr->apSegment; + int nSegment = pCsr->nSegment; + Fts3SegFilter *pFilter = pCsr->pFilter; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + if( pCsr->nSegment==0 ) return SQLITE_OK; + + do { + int nMerge; + int i; + + /* Advance the first pCsr->nAdvance entries in the apSegment[] array + ** forward. Then sort the list in order of current term again. + */ + for(i=0; inAdvance; i++){ + Fts3SegReader *pSeg = apSegment[i]; + if( pSeg->bLookup ){ + fts3SegReaderSetEof(pSeg); + }else{ + rc = fts3SegReaderNext(p, pSeg, 0); + } + if( rc!=SQLITE_OK ) return rc; + } + fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); + pCsr->nAdvance = 0; + + /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ + assert( rc==SQLITE_OK ); + if( apSegment[0]->aNode==0 ) break; + + pCsr->nTerm = apSegment[0]->nTerm; + pCsr->zTerm = apSegment[0]->zTerm; + + /* If this is a prefix-search, and if the term that apSegment[0] points + ** to does not share a suffix with pFilter->zTerm/nTerm, then all + ** required callbacks have been made. In this case exit early. + ** + ** Similarly, if this is a search for an exact match, and the first term + ** of segment apSegment[0] is not a match, exit early. + */ + if( pFilter->zTerm && !isScan ){ + if( pCsr->nTermnTerm + || (!isPrefix && pCsr->nTerm>pFilter->nTerm) + || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) + ){ + break; + } + } + + nMerge = 1; + while( nMergeaNode + && apSegment[nMerge]->nTerm==pCsr->nTerm + && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) + ){ + nMerge++; + } + + assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); + if( nMerge==1 + && !isIgnoreEmpty + && !isFirst + && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) + ){ + pCsr->nDoclist = apSegment[0]->nDoclist; + if( fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); + pCsr->aDoclist = pCsr->aBuffer; + }else{ + pCsr->aDoclist = apSegment[0]->aDoclist; + } + if( rc==SQLITE_OK ) rc = SQLITE_ROW; + }else{ + int nDoclist = 0; /* Size of doclist */ + sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ + + /* The current term of the first nMerge entries in the array + ** of Fts3SegReader objects is the same. The doclists must be merged + ** and a single term returned with the merged doclist. + */ + for(i=0; ipOffsetList ){ + int j; /* Number of segments that share a docid */ + char *pList = 0; + int nList = 0; + int nByte; + sqlite3_int64 iDocid = apSegment[0]->iDocid; + fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( jpOffsetList + && apSegment[j]->iDocid==iDocid + ){ + fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + + if( isColFilter ){ + fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); + } + + if( !isIgnoreEmpty || nList>0 ){ + + /* Calculate the 'docid' delta value to write into the merged + ** doclist. */ + sqlite3_int64 iDelta; + if( p->bDescIdx && nDoclist>0 ){ + if( iPrev<=iDocid ) return FTS_CORRUPT_VTAB; + iDelta = (i64)((u64)iPrev - (u64)iDocid); + }else{ + if( nDoclist>0 && iPrev>=iDocid ) return FTS_CORRUPT_VTAB; + iDelta = (i64)((u64)iDocid - (u64)iPrev); + } + + nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); + + rc = fts3GrowSegReaderBuffer(pCsr, nByte+nDoclist+FTS3_NODE_PADDING); + if( rc ) return rc; + + if( isFirst ){ + char *a = &pCsr->aBuffer[nDoclist]; + int nWrite; + + nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); + if( nWrite ){ + iPrev = iDocid; + nDoclist += nWrite; + } + }else{ + nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); + iPrev = iDocid; + if( isRequirePos ){ + memcpy(&pCsr->aBuffer[nDoclist], pList, nList); + nDoclist += nList; + pCsr->aBuffer[nDoclist++] = '\0'; + } + } + } + + fts3SegReaderSort(apSegment, nMerge, j, xCmp); + } + if( nDoclist>0 ){ + rc = fts3GrowSegReaderBuffer(pCsr, nDoclist+FTS3_NODE_PADDING); + if( rc ) return rc; + memset(&pCsr->aBuffer[nDoclist], 0, FTS3_NODE_PADDING); + pCsr->aDoclist = pCsr->aBuffer; + pCsr->nDoclist = nDoclist; + rc = SQLITE_ROW; + } + } + pCsr->nAdvance = nMerge; + }while( rc==SQLITE_OK ); + + return rc; +} + + +SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( + Fts3MultiSegReader *pCsr /* Cursor object */ +){ + if( pCsr ){ + int i; + for(i=0; inSegment; i++){ + sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); + } + sqlite3_free(pCsr->apSegment); + sqlite3_free(pCsr->aBuffer); + + pCsr->nSegment = 0; + pCsr->apSegment = 0; + pCsr->aBuffer = 0; + } +} + +/* +** Decode the "end_block" field, selected by column iCol of the SELECT +** statement passed as the first argument. +** +** The "end_block" field may contain either an integer, or a text field +** containing the text representation of two non-negative integers separated +** by one or more space (0x20) characters. In the first case, set *piEndBlock +** to the integer value and *pnByte to zero before returning. In the second, +** set *piEndBlock to the first value and *pnByte to the second. +*/ +static void fts3ReadEndBlockField( + sqlite3_stmt *pStmt, + int iCol, + i64 *piEndBlock, + i64 *pnByte +){ + const unsigned char *zText = sqlite3_column_text(pStmt, iCol); + if( zText ){ + int i; + int iMul = 1; + u64 iVal = 0; + for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *piEndBlock = (i64)iVal; + while( zText[i]==' ' ) i++; + iVal = 0; + if( zText[i]=='-' ){ + i++; + iMul = -1; + } + for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *pnByte = ((i64)iVal * (i64)iMul); + } +} + + +/* +** A segment of size nByte bytes has just been written to absolute level +** iAbsLevel. Promote any segments that should be promoted as a result. +*/ +static int fts3PromoteSegments( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level just updated */ + sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ +){ + int rc = SQLITE_OK; + sqlite3_stmt *pRange; + + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); + + if( rc==SQLITE_OK ){ + int bOk = 0; + i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; + i64 nLimit = (nByte*3)/2; + + /* Loop through all entries in the %_segdir table corresponding to + ** segments in this index on levels greater than iAbsLevel. If there is + ** at least one such segment, and it is possible to determine that all + ** such segments are smaller than nLimit bytes in size, they will be + ** promoted to level iAbsLevel. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel+1); + sqlite3_bind_int64(pRange, 2, iLast); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + i64 nSize = 0, dummy; + fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); + if( nSize<=0 || nSize>nLimit ){ + /* If nSize==0, then the %_segdir.end_block field does not not + ** contain a size value. This happens if it was written by an + ** old version of FTS. In this case it is not possible to determine + ** the size of the segment, and so segment promotion does not + ** take place. */ + bOk = 0; + break; + } + bOk = 1; + } + rc = sqlite3_reset(pRange); + + if( bOk ){ + int iIdx = 0; + sqlite3_stmt *pUpdate1 = 0; + sqlite3_stmt *pUpdate2 = 0; + + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); + } + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); + } + + if( rc==SQLITE_OK ){ + + /* Loop through all %_segdir entries for segments in this index with + ** levels equal to or greater than iAbsLevel. As each entry is visited, + ** updated it to set (level = -1) and (idx = N), where N is 0 for the + ** oldest segment in the range, 1 for the next oldest, and so on. + ** + ** In other words, move all segments being promoted to level -1, + ** setting the "idx" fields as appropriate to keep them in the same + ** order. The contents of level -1 (which is never used, except + ** transiently here), will be moved back to level iAbsLevel below. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + sqlite3_bind_int(pUpdate1, 1, iIdx++); + sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); + sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); + sqlite3_step(pUpdate1); + rc = sqlite3_reset(pUpdate1); + if( rc!=SQLITE_OK ){ + sqlite3_reset(pRange); + break; + } + } + } + if( rc==SQLITE_OK ){ + rc = sqlite3_reset(pRange); + } + + /* Move level -1 to level iAbsLevel */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); + sqlite3_step(pUpdate2); + rc = sqlite3_reset(pUpdate2); + } + } + } + + + return rc; +} + +/* +** Merge all level iLevel segments in the database into a single +** iLevel+1 segment. Or, if iLevel<0, merge all segments into a +** single segment with a level equal to the numerically largest level +** currently present in the database. +** +** If this function is called with iLevel<0, but there is only one +** segment in the database, SQLITE_DONE is returned immediately. +** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, +** an SQLite error code is returned. +*/ +static int fts3SegmentMerge( + Fts3Table *p, + int iLangid, /* Language id to merge */ + int iIndex, /* Index in p->aIndex[] to merge */ + int iLevel /* Level to merge */ +){ + int rc; /* Return code */ + int iIdx = 0; /* Index of new segment */ + sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ + SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ + Fts3SegFilter filter; /* Segment term filter condition */ + Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ + int bIgnoreEmpty = 0; /* True to ignore empty segments */ + i64 iMaxLevel = 0; /* Max level number for this index/langid */ + + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevel=0 && iIndexnIndex ); + + rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); + if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); + if( rc!=SQLITE_OK ) goto finished; + } + + if( iLevel==FTS3_SEGCURSOR_ALL ){ + /* This call is to merge all segments in the database to a single + ** segment. The level of the new segment is equal to the numerically + ** greatest segment level currently present in the database for this + ** index. The idx of the new segment is always 0. */ + if( csr.nSegment==1 && 0==fts3SegReaderIsPending(csr.apSegment[0]) ){ + rc = SQLITE_DONE; + goto finished; + } + iNewLevel = iMaxLevel; + bIgnoreEmpty = 1; + + }else{ + /* This call is to merge all segments at level iLevel. find the next + ** available segment index at level iLevel+1. The call to + ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to + ** a single iLevel+2 segment if necessary. */ + assert( FTS3_SEGCURSOR_PENDING==-1 ); + iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); + } + if( rc!=SQLITE_OK ) goto finished; + + assert( csr.nSegment>0 ); + assert_fts3_nc( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); + assert_fts3_nc( + iNewLevelnLeafData); + } + } + } + + finished: + fts3SegWriterFree(pWriter); + sqlite3Fts3SegReaderFinish(&csr); + return rc; +} + + +/* +** Flush the contents of pendingTerms to level 0 segments. +*/ +SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ + int rc = SQLITE_OK; + int i; + + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + sqlite3Fts3PendingTermsClear(p); + + /* Determine the auto-incr-merge setting if unknown. If enabled, + ** estimate the number of leaf blocks of content to be written + */ + if( rc==SQLITE_OK && p->bHasStat + && p->nAutoincrmerge==0xff && p->nLeafAdd>0 + ){ + sqlite3_stmt *pStmt = 0; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); + rc = sqlite3_step(pStmt); + if( rc==SQLITE_ROW ){ + p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); + if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; + }else if( rc==SQLITE_DONE ){ + p->nAutoincrmerge = 0; + } + rc = sqlite3_reset(pStmt); + } + } + return rc; +} + +/* +** Encode N integers as varints into a blob. +*/ +static void fts3EncodeIntArray( + int N, /* The number of integers to encode */ + u32 *a, /* The integer values */ + char *zBuf, /* Write the BLOB here */ + int *pNBuf /* Write number of bytes if zBuf[] used here */ +){ + int i, j; + for(i=j=0; iiPrevDocid. The sizes are encoded as +** a blob of varints. +*/ +static void fts3InsertDocsize( + int *pRC, /* Result code */ + Fts3Table *p, /* Table into which to insert */ + u32 *aSz /* Sizes of each column, in tokens */ +){ + char *pBlob; /* The BLOB encoding of the document size */ + int nBlob; /* Number of bytes in the BLOB */ + sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ + int rc; /* Result code from subfunctions */ + + if( *pRC ) return; + pBlob = sqlite3_malloc64( 10*(sqlite3_int64)p->nColumn ); + if( pBlob==0 ){ + *pRC = SQLITE_NOMEM; + return; + } + fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); + rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); + if( rc ){ + sqlite3_free(pBlob); + *pRC = rc; + return; + } + sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); + sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); + sqlite3_step(pStmt); + *pRC = sqlite3_reset(pStmt); +} + +/* +** Record 0 of the %_stat table contains a blob consisting of N varints, +** where N is the number of user defined columns in the fts3 table plus +** two. If nCol is the number of user defined columns, then values of the +** varints are set as follows: +** +** Varint 0: Total number of rows in the table. +** +** Varint 1..nCol: For each column, the total number of tokens stored in +** the column for all rows of the table. +** +** Varint 1+nCol: The total size, in bytes, of all text values in all +** columns of all rows of the table. +** +*/ +static void fts3UpdateDocTotals( + int *pRC, /* The result code */ + Fts3Table *p, /* Table being updated */ + u32 *aSzIns, /* Size increases */ + u32 *aSzDel, /* Size decreases */ + int nChng /* Change in the number of documents */ +){ + char *pBlob; /* Storage for BLOB written into %_stat */ + int nBlob; /* Size of BLOB written into %_stat */ + u32 *a; /* Array of integers that becomes the BLOB */ + sqlite3_stmt *pStmt; /* Statement for reading and writing */ + int i; /* Loop counter */ + int rc; /* Result code from subfunctions */ + + const int nStat = p->nColumn+2; + + if( *pRC ) return; + a = sqlite3_malloc64( (sizeof(u32)+10)*(sqlite3_int64)nStat ); + if( a==0 ){ + *pRC = SQLITE_NOMEM; + return; + } + pBlob = (char*)&a[nStat]; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); + if( rc ){ + sqlite3_free(a); + *pRC = rc; + return; + } + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + if( sqlite3_step(pStmt)==SQLITE_ROW ){ + fts3DecodeIntArray(nStat, a, + sqlite3_column_blob(pStmt, 0), + sqlite3_column_bytes(pStmt, 0)); + }else{ + memset(a, 0, sizeof(u32)*(nStat) ); + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ){ + sqlite3_free(a); + *pRC = rc; + return; + } + if( nChng<0 && a[0]<(u32)(-nChng) ){ + a[0] = 0; + }else{ + a[0] += nChng; + } + for(i=0; inColumn+1; i++){ + u32 x = a[i+1]; + if( x+aSzIns[i] < aSzDel[i] ){ + x = 0; + }else{ + x = x + aSzIns[i] - aSzDel[i]; + } + a[i+1] = x; + } + fts3EncodeIntArray(nStat, a, pBlob, &nBlob); + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); + if( rc ){ + sqlite3_free(a); + *pRC = rc; + return; + } + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); + sqlite3_step(pStmt); + *pRC = sqlite3_reset(pStmt); + sqlite3_bind_null(pStmt, 2); + sqlite3_free(a); +} + +/* +** Merge the entire database so that there is one segment for each +** iIndex/iLangid combination. +*/ +static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ + int bSeenDone = 0; + int rc; + sqlite3_stmt *pAllLangid = 0; + + rc = sqlite3Fts3PendingTermsFlush(p); + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + } + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); + sqlite3_bind_int(pAllLangid, 2, p->nIndex); + while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int i; + int iLangid = sqlite3_column_int(pAllLangid, 0); + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); + if( rc==SQLITE_DONE ){ + bSeenDone = 1; + rc = SQLITE_OK; + } + } + } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; + } + + sqlite3Fts3SegmentsClose(p); + + return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; +} + +/* +** This function is called when the user executes the following statement: +** +** INSERT INTO () VALUES('rebuild'); +** +** The entire FTS index is discarded and rebuilt. If the table is one +** created using the content=xxx option, then the new index is based on +** the current contents of the xxx table. Otherwise, it is rebuilt based +** on the contents of the %_content table. +*/ +static int fts3DoRebuild(Fts3Table *p){ + int rc; /* Return Code */ + + rc = fts3DeleteAll(p, 0); + if( rc==SQLITE_OK ){ + u32 *aSz = 0; + u32 *aSzIns = 0; + u32 *aSzDel = 0; + sqlite3_stmt *pStmt = 0; + int nEntry = 0; + + /* Compose and prepare an SQL statement to loop through the content table */ + char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + } + + if( rc==SQLITE_OK ){ + sqlite3_int64 nByte = sizeof(u32) * ((sqlite3_int64)p->nColumn+1)*3; + aSz = (u32 *)sqlite3_malloc64(nByte); + if( aSz==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(aSz, 0, nByte); + aSzIns = &aSz[p->nColumn+1]; + aSzDel = &aSzIns[p->nColumn+1]; + } + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + int iCol; + int iLangid = langidFromSelect(p, pStmt); + rc = fts3PendingTermsDocid(p, 0, iLangid, sqlite3_column_int64(pStmt, 0)); + memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); + for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ + if( p->abNotindexed[iCol]==0 ){ + const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); + rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); + } + } + if( p->bHasDocsize ){ + fts3InsertDocsize(&rc, p, aSz); + } + if( rc!=SQLITE_OK ){ + sqlite3_finalize(pStmt); + pStmt = 0; + }else{ + nEntry++; + for(iCol=0; iCol<=p->nColumn; iCol++){ + aSzIns[iCol] += aSz[iCol]; + } + } + } + if( p->bFts4 ){ + fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); + } + sqlite3_free(aSz); + + if( pStmt ){ + int rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + + return rc; +} + + +/* +** This function opens a cursor used to read the input data for an +** incremental merge operation. Specifically, it opens a cursor to scan +** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute +** level iAbsLevel. +*/ +static int fts3IncrmergeCsr( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level to open */ + int nSeg, /* Number of segments to merge */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + int rc; /* Return Code */ + sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ + sqlite3_int64 nByte; /* Bytes allocated at pCsr->apSegment[] */ + + /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ + memset(pCsr, 0, sizeof(*pCsr)); + nByte = sizeof(Fts3SegReader *) * nSeg; + pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc64(nByte); + + if( pCsr->apSegment==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pCsr->apSegment, 0, nByte); + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + } + if( rc==SQLITE_OK ){ + int i; + int rc2; + sqlite3_bind_int64(pStmt, 1, iAbsLevel); + assert( pCsr->nSegment==0 ); + for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && iapSegment[i] + ); + pCsr->nSegment++; + } + rc2 = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + } + + return rc; +} + +typedef struct IncrmergeWriter IncrmergeWriter; +typedef struct NodeWriter NodeWriter; +typedef struct Blob Blob; +typedef struct NodeReader NodeReader; + +/* +** An instance of the following structure is used as a dynamic buffer +** to build up nodes or other blobs of data in. +** +** The function blobGrowBuffer() is used to extend the allocation. +*/ +struct Blob { + char *a; /* Pointer to allocation */ + int n; /* Number of valid bytes of data in a[] */ + int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ +}; + +/* +** This structure is used to build up buffers containing segment b-tree +** nodes (blocks). +*/ +struct NodeWriter { + sqlite3_int64 iBlock; /* Current block id */ + Blob key; /* Last key written to the current block */ + Blob block; /* Current block image */ +}; + +/* +** An object of this type contains the state required to create or append +** to an appendable b-tree segment. +*/ +struct IncrmergeWriter { + int nLeafEst; /* Space allocated for leaf blocks */ + int nWork; /* Number of leaf pages flushed */ + sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ + int iIdx; /* Index of *output* segment in iAbsLevel+1 */ + sqlite3_int64 iStart; /* Block number of first allocated block */ + sqlite3_int64 iEnd; /* Block number of last allocated block */ + sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ + u8 bNoLeafData; /* If true, store 0 for segment size */ + NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; +}; + +/* +** An object of the following type is used to read data from a single +** FTS segment node. See the following functions: +** +** nodeReaderInit() +** nodeReaderNext() +** nodeReaderRelease() +*/ +struct NodeReader { + const char *aNode; + int nNode; + int iOff; /* Current offset within aNode[] */ + + /* Output variables. Containing the current node entry. */ + sqlite3_int64 iChild; /* Pointer to child node */ + Blob term; /* Current term */ + const char *aDoclist; /* Pointer to doclist */ + int nDoclist; /* Size of doclist in bytes */ +}; + +/* +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if the allocation at pBlob->a is not already at least nMin +** bytes in size, extend (realloc) it to be so. +** +** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a +** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc +** to reflect the new size of the pBlob->a[] buffer. +*/ +static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ + if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ + int nAlloc = nMin; + char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); + if( a ){ + pBlob->nAlloc = nAlloc; + pBlob->a = a; + }else{ + *pRc = SQLITE_NOMEM; + } + } +} + +/* +** Attempt to advance the node-reader object passed as the first argument to +** the next entry on the node. +** +** Return an error code if an error occurs (SQLITE_NOMEM is possible). +** Otherwise return SQLITE_OK. If there is no next entry on the node +** (e.g. because the current entry is the last) set NodeReader->aNode to +** NULL to indicate EOF. Otherwise, populate the NodeReader structure output +** variables for the new entry. +*/ +static int nodeReaderNext(NodeReader *p){ + int bFirst = (p->term.n==0); /* True for first term on the node */ + int nPrefix = 0; /* Bytes to copy from previous term */ + int nSuffix = 0; /* Bytes to append to the prefix */ + int rc = SQLITE_OK; /* Return code */ + + assert( p->aNode ); + if( p->iChild && bFirst==0 ) p->iChild++; + if( p->iOff>=p->nNode ){ + /* EOF */ + p->aNode = 0; + }else{ + if( bFirst==0 ){ + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); + } + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); + + if( nPrefix>p->term.n || nSuffix>p->nNode-p->iOff || nSuffix==0 ){ + return FTS_CORRUPT_VTAB; + } + blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); + if( rc==SQLITE_OK && ALWAYS(p->term.a!=0) ){ + memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); + p->term.n = nPrefix+nSuffix; + p->iOff += nSuffix; + if( p->iChild==0 ){ + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); + if( (p->nNode-p->iOff)nDoclist ){ + return FTS_CORRUPT_VTAB; + } + p->aDoclist = &p->aNode[p->iOff]; + p->iOff += p->nDoclist; + } + } + } + + assert_fts3_nc( p->iOff<=p->nNode ); + return rc; +} + +/* +** Release all dynamic resources held by node-reader object *p. +*/ +static void nodeReaderRelease(NodeReader *p){ + sqlite3_free(p->term.a); +} + +/* +** Initialize a node-reader object to read the node in buffer aNode/nNode. +** +** If successful, SQLITE_OK is returned and the NodeReader object set to +** point to the first entry on the node (if any). Otherwise, an SQLite +** error code is returned. +*/ +static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ + memset(p, 0, sizeof(NodeReader)); + p->aNode = aNode; + p->nNode = nNode; + + /* Figure out if this is a leaf or an internal node. */ + if( aNode && aNode[0] ){ + /* An internal node. */ + p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); + }else{ + p->iOff = 1; + } + + return aNode ? nodeReaderNext(p) : SQLITE_OK; +} + +/* +** This function is called while writing an FTS segment each time a leaf o +** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed +** to be greater than the largest key on the node just written, but smaller +** than or equal to the first key that will be written to the next leaf +** node. +** +** The block id of the leaf node just written to disk may be found in +** (pWriter->aNodeWriter[0].iBlock) when this function is called. +*/ +static int fts3IncrmergePush( + Fts3Table *p, /* Fts3 table handle */ + IncrmergeWriter *pWriter, /* Writer object */ + const char *zTerm, /* Term to write to internal node */ + int nTerm /* Bytes at zTerm */ +){ + sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; + int iLayer; + + assert( nTerm>0 ); + for(iLayer=1; ALWAYS(iLayeraNodeWriter[iLayer]; + int rc = SQLITE_OK; + int nPrefix; + int nSuffix; + int nSpace; + + /* Figure out how much space the key will consume if it is written to + ** the current node of layer iLayer. Due to the prefix compression, + ** the space required changes depending on which node the key is to + ** be added to. */ + nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + if(nSuffix<=0 ) return FTS_CORRUPT_VTAB; + nSpace = sqlite3Fts3VarintLen(nPrefix); + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + + if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ + /* If the current node of layer iLayer contains zero keys, or if adding + ** the key to it will not cause it to grow to larger than nNodeSize + ** bytes in size, write the key here. */ + + Blob *pBlk = &pNode->block; + if( pBlk->n==0 ){ + blobGrowBuffer(pBlk, p->nNodeSize, &rc); + if( rc==SQLITE_OK ){ + pBlk->a[0] = (char)iLayer; + pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); + } + } + blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); + blobGrowBuffer(&pNode->key, nTerm, &rc); + + if( rc==SQLITE_OK ){ + if( pNode->key.n ){ + pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); + } + pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); + memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); + pBlk->n += nSuffix; + + memcpy(pNode->key.a, zTerm, nTerm); + pNode->key.n = nTerm; + } + }else{ + /* Otherwise, flush the current node of layer iLayer to disk. + ** Then allocate a new, empty sibling node. The key will be written + ** into the parent of this node. */ + rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); + + assert( pNode->block.nAlloc>=p->nNodeSize ); + pNode->block.a[0] = (char)iLayer; + pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); + + iNextPtr = pNode->iBlock; + pNode->iBlock++; + pNode->key.n = 0; + } + + if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; + iPtr = iNextPtr; + } + + assert( 0 ); + return 0; +} + +/* +** Append a term and (optionally) doclist to the FTS segment node currently +** stored in blob *pNode. The node need not contain any terms, but the +** header must be written before this function is called. +** +** A node header is a single 0x00 byte for a leaf node, or a height varint +** followed by the left-hand-child varint for an internal node. +** +** The term to be appended is passed via arguments zTerm/nTerm. For a +** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal +** node, both aDoclist and nDoclist must be passed 0. +** +** If the size of the value in blob pPrev is zero, then this is the first +** term written to the node. Otherwise, pPrev contains a copy of the +** previous term. Before this function returns, it is updated to contain a +** copy of zTerm/nTerm. +** +** It is assumed that the buffer associated with pNode is already large +** enough to accommodate the new entry. The buffer associated with pPrev +** is extended by this function if requrired. +** +** If an error (i.e. OOM condition) occurs, an SQLite error code is +** returned. Otherwise, SQLITE_OK. +*/ +static int fts3AppendToNode( + Blob *pNode, /* Current node image to append to */ + Blob *pPrev, /* Buffer containing previous term written */ + const char *zTerm, /* New term to write */ + int nTerm, /* Size of zTerm in bytes */ + const char *aDoclist, /* Doclist (or NULL) to write */ + int nDoclist /* Size of aDoclist in bytes */ +){ + int rc = SQLITE_OK; /* Return code */ + int bFirst = (pPrev->n==0); /* True if this is the first term written */ + int nPrefix; /* Size of term prefix in bytes */ + int nSuffix; /* Size of term suffix in bytes */ + + /* Node must have already been started. There must be a doclist for a + ** leaf node, and there must not be a doclist for an internal node. */ + assert( pNode->n>0 ); + assert_fts3_nc( (pNode->a[0]=='\0')==(aDoclist!=0) ); + + blobGrowBuffer(pPrev, nTerm, &rc); + if( rc!=SQLITE_OK ) return rc; + + nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + if( nSuffix<=0 ) return FTS_CORRUPT_VTAB; + memcpy(pPrev->a, zTerm, nTerm); + pPrev->n = nTerm; + + if( bFirst==0 ){ + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); + } + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); + memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); + pNode->n += nSuffix; + + if( aDoclist ){ + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); + memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); + pNode->n += nDoclist; + } + + assert( pNode->n<=pNode->nAlloc ); + + return SQLITE_OK; +} + +/* +** Append the current term and doclist pointed to by cursor pCsr to the +** appendable b-tree segment opened for writing by pWriter. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. +*/ +static int fts3IncrmergeAppend( + Fts3Table *p, /* Fts3 table handle */ + IncrmergeWriter *pWriter, /* Writer object */ + Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ +){ + const char *zTerm = pCsr->zTerm; + int nTerm = pCsr->nTerm; + const char *aDoclist = pCsr->aDoclist; + int nDoclist = pCsr->nDoclist; + int rc = SQLITE_OK; /* Return code */ + int nSpace; /* Total space in bytes required on leaf */ + int nPrefix; /* Size of prefix shared with previous term */ + int nSuffix; /* Size of suffix (nTerm - nPrefix) */ + NodeWriter *pLeaf; /* Object used to write leaf nodes */ + + pLeaf = &pWriter->aNodeWriter[0]; + nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + + nSpace = sqlite3Fts3VarintLen(nPrefix); + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; + + /* If the current block is not empty, and if adding this term/doclist + ** to the current block would make it larger than Fts3Table.nNodeSize + ** bytes, write this block out to the database. */ + if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ + rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); + pWriter->nWork++; + + /* Add the current term to the parent node. The term added to the + ** parent must: + ** + ** a) be greater than the largest term on the leaf node just written + ** to the database (still available in pLeaf->key), and + ** + ** b) be less than or equal to the term about to be added to the new + ** leaf node (zTerm/nTerm). + ** + ** In other words, it must be the prefix of zTerm 1 byte longer than + ** the common prefix (if any) of zTerm and pWriter->zTerm. + */ + if( rc==SQLITE_OK ){ + rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); + } + + /* Advance to the next output block */ + pLeaf->iBlock++; + pLeaf->key.n = 0; + pLeaf->block.n = 0; + + nSuffix = nTerm; + nSpace = 1; + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; + } + + pWriter->nLeafData += nSpace; + blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); + if( rc==SQLITE_OK ){ + if( pLeaf->block.n==0 ){ + pLeaf->block.n = 1; + pLeaf->block.a[0] = '\0'; + } + rc = fts3AppendToNode( + &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist + ); + } + + return rc; +} + +/* +** This function is called to release all dynamic resources held by the +** merge-writer object pWriter, and if no error has occurred, to flush +** all outstanding node buffers held by pWriter to disk. +** +** If *pRc is not SQLITE_OK when this function is called, then no attempt +** is made to write any data to disk. Instead, this function serves only +** to release outstanding resources. +** +** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while +** flushing buffers to disk, *pRc is set to an SQLite error code before +** returning. +*/ +static void fts3IncrmergeRelease( + Fts3Table *p, /* FTS3 table handle */ + IncrmergeWriter *pWriter, /* Merge-writer object */ + int *pRc /* IN/OUT: Error code */ +){ + int i; /* Used to iterate through non-root layers */ + int iRoot; /* Index of root in pWriter->aNodeWriter */ + NodeWriter *pRoot; /* NodeWriter for root node */ + int rc = *pRc; /* Error code */ + + /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment + ** root node. If the segment fits entirely on a single leaf node, iRoot + ** will be set to 0. If the root node is the parent of the leaves, iRoot + ** will be 1. And so on. */ + for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ + NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; + if( pNode->block.n>0 ) break; + assert( *pRc || pNode->block.nAlloc==0 ); + assert( *pRc || pNode->key.nAlloc==0 ); + sqlite3_free(pNode->block.a); + sqlite3_free(pNode->key.a); + } + + /* Empty output segment. This is a no-op. */ + if( iRoot<0 ) return; + + /* The entire output segment fits on a single node. Normally, this means + ** the node would be stored as a blob in the "root" column of the %_segdir + ** table. However, this is not permitted in this case. The problem is that + ** space has already been reserved in the %_segments table, and so the + ** start_block and end_block fields of the %_segdir table must be populated. + ** And, by design or by accident, released versions of FTS cannot handle + ** segments that fit entirely on the root node with start_block!=0. + ** + ** Instead, create a synthetic root node that contains nothing but a + ** pointer to the single content node. So that the segment consists of a + ** single leaf and a single interior (root) node. + ** + ** Todo: Better might be to defer allocating space in the %_segments + ** table until we are sure it is needed. + */ + if( iRoot==0 ){ + Blob *pBlock = &pWriter->aNodeWriter[1].block; + blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); + if( rc==SQLITE_OK ){ + pBlock->a[0] = 0x01; + pBlock->n = 1 + sqlite3Fts3PutVarint( + &pBlock->a[1], pWriter->aNodeWriter[0].iBlock + ); + } + iRoot = 1; + } + pRoot = &pWriter->aNodeWriter[iRoot]; + + /* Flush all currently outstanding nodes to disk. */ + for(i=0; iaNodeWriter[i]; + if( pNode->block.n>0 && rc==SQLITE_OK ){ + rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); + } + sqlite3_free(pNode->block.a); + sqlite3_free(pNode->key.a); + } + + /* Write the %_segdir record. */ + if( rc==SQLITE_OK ){ + rc = fts3WriteSegdir(p, + pWriter->iAbsLevel+1, /* level */ + pWriter->iIdx, /* idx */ + pWriter->iStart, /* start_block */ + pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ + pWriter->iEnd, /* end_block */ + (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ + pRoot->block.a, pRoot->block.n /* root */ + ); + } + sqlite3_free(pRoot->block.a); + sqlite3_free(pRoot->key.a); + + *pRc = rc; +} + +/* +** Compare the term in buffer zLhs (size in bytes nLhs) with that in +** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of +** the other, it is considered to be smaller than the other. +** +** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve +** if it is greater. +*/ +static int fts3TermCmp( + const char *zLhs, int nLhs, /* LHS of comparison */ + const char *zRhs, int nRhs /* RHS of comparison */ +){ + int nCmp = MIN(nLhs, nRhs); + int res; + + if( nCmp && ALWAYS(zLhs) && ALWAYS(zRhs) ){ + res = memcmp(zLhs, zRhs, nCmp); + }else{ + res = 0; + } + if( res==0 ) res = nLhs - nRhs; + + return res; +} + + +/* +** Query to see if the entry in the %_segments table with blockid iEnd is +** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before +** returning. Otherwise, set *pbRes to 0. +** +** Or, if an error occurs while querying the database, return an SQLite +** error code. The final value of *pbRes is undefined in this case. +** +** This is used to test if a segment is an "appendable" segment. If it +** is, then a NULL entry has been inserted into the %_segments table +** with blockid %_segdir.end_block. +*/ +static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ + int bRes = 0; /* Result to set *pbRes to */ + sqlite3_stmt *pCheck = 0; /* Statement to query database with */ + int rc; /* Return code */ + + rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pCheck, 1, iEnd); + if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; + rc = sqlite3_reset(pCheck); + } + + *pbRes = bRes; + return rc; +} + +/* +** This function is called when initializing an incremental-merge operation. +** It checks if the existing segment with index value iIdx at absolute level +** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the +** merge-writer object *pWriter is initialized to write to it. +** +** An existing segment can be appended to by an incremental merge if: +** +** * It was initially created as an appendable segment (with all required +** space pre-allocated), and +** +** * The first key read from the input (arguments zKey and nKey) is +** greater than the largest key currently stored in the potential +** output segment. +*/ +static int fts3IncrmergeLoad( + Fts3Table *p, /* Fts3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ + int iIdx, /* Index of candidate output segment */ + const char *zKey, /* First key to write */ + int nKey, /* Number of bytes in nKey */ + IncrmergeWriter *pWriter /* Populate this object */ +){ + int rc; /* Return code */ + sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */ + + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); + if( rc==SQLITE_OK ){ + sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ + sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ + sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ + const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ + int nRoot = 0; /* Size of aRoot[] in bytes */ + int rc2; /* Return code from sqlite3_reset() */ + int bAppendable = 0; /* Set to true if segment is appendable */ + + /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ + sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); + sqlite3_bind_int(pSelect, 2, iIdx); + if( sqlite3_step(pSelect)==SQLITE_ROW ){ + iStart = sqlite3_column_int64(pSelect, 1); + iLeafEnd = sqlite3_column_int64(pSelect, 2); + fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); + if( pWriter->nLeafData<0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + pWriter->bNoLeafData = (pWriter->nLeafData==0); + nRoot = sqlite3_column_bytes(pSelect, 4); + aRoot = sqlite3_column_blob(pSelect, 4); + if( aRoot==0 ){ + sqlite3_reset(pSelect); + return nRoot ? SQLITE_NOMEM : FTS_CORRUPT_VTAB; + } + }else{ + return sqlite3_reset(pSelect); + } + + /* Check for the zero-length marker in the %_segments table */ + rc = fts3IsAppendable(p, iEnd, &bAppendable); + + /* Check that zKey/nKey is larger than the largest key the candidate */ + if( rc==SQLITE_OK && bAppendable ){ + char *aLeaf = 0; + int nLeaf = 0; + + rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); + if( rc==SQLITE_OK ){ + NodeReader reader; + for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); + rc==SQLITE_OK && reader.aNode; + rc = nodeReaderNext(&reader) + ){ + assert( reader.aNode ); + } + if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ + bAppendable = 0; + } + nodeReaderRelease(&reader); + } + sqlite3_free(aLeaf); + } + + if( rc==SQLITE_OK && bAppendable ){ + /* It is possible to append to this segment. Set up the IncrmergeWriter + ** object to do so. */ + int i; + int nHeight = (int)aRoot[0]; + NodeWriter *pNode; + if( nHeight<1 || nHeight>=FTS_MAX_APPENDABLE_HEIGHT ){ + sqlite3_reset(pSelect); + return FTS_CORRUPT_VTAB; + } + + pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; + pWriter->iStart = iStart; + pWriter->iEnd = iEnd; + pWriter->iAbsLevel = iAbsLevel; + pWriter->iIdx = iIdx; + + for(i=nHeight+1; iaNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; + } + + pNode = &pWriter->aNodeWriter[nHeight]; + pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; + blobGrowBuffer(&pNode->block, + MAX(nRoot, p->nNodeSize)+FTS3_NODE_PADDING, &rc + ); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aRoot, nRoot); + pNode->block.n = nRoot; + memset(&pNode->block.a[nRoot], 0, FTS3_NODE_PADDING); + } + + for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ + NodeReader reader; + pNode = &pWriter->aNodeWriter[i]; + + if( pNode->block.a){ + rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); + while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); + blobGrowBuffer(&pNode->key, reader.term.n, &rc); + if( rc==SQLITE_OK ){ + assert_fts3_nc( reader.term.n>0 || reader.aNode==0 ); + if( reader.term.n>0 ){ + memcpy(pNode->key.a, reader.term.a, reader.term.n); + } + pNode->key.n = reader.term.n; + if( i>0 ){ + char *aBlock = 0; + int nBlock = 0; + pNode = &pWriter->aNodeWriter[i-1]; + pNode->iBlock = reader.iChild; + rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock,0); + blobGrowBuffer(&pNode->block, + MAX(nBlock, p->nNodeSize)+FTS3_NODE_PADDING, &rc + ); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aBlock, nBlock); + pNode->block.n = nBlock; + memset(&pNode->block.a[nBlock], 0, FTS3_NODE_PADDING); + } + sqlite3_free(aBlock); + } + } + } + nodeReaderRelease(&reader); + } + } + + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; + } + + return rc; +} + +/* +** Determine the largest segment index value that exists within absolute +** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus +** one before returning SQLITE_OK. Or, if there are no segments at all +** within level iAbsLevel, set *piIdx to zero. +** +** If an error occurs, return an SQLite error code. The final value of +** *piIdx is undefined in this case. +*/ +static int fts3IncrmergeOutputIdx( + Fts3Table *p, /* FTS Table handle */ + sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ + int *piIdx /* OUT: Next free index at iAbsLevel+1 */ +){ + int rc; + sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ + + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); + sqlite3_step(pOutputIdx); + *piIdx = sqlite3_column_int(pOutputIdx, 0); + rc = sqlite3_reset(pOutputIdx); + } + + return rc; +} + +/* +** Allocate an appendable output segment on absolute level iAbsLevel+1 +** with idx value iIdx. +** +** In the %_segdir table, a segment is defined by the values in three +** columns: +** +** start_block +** leaves_end_block +** end_block +** +** When an appendable segment is allocated, it is estimated that the +** maximum number of leaf blocks that may be required is the sum of the +** number of leaf blocks consumed by the input segments, plus the number +** of input segments, multiplied by two. This value is stored in stack +** variable nLeafEst. +** +** A total of 16*nLeafEst blocks are allocated when an appendable segment +** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous +** array of leaf nodes starts at the first block allocated. The array +** of interior nodes that are parents of the leaf nodes start at block +** (start_block + (1 + end_block - start_block) / 16). And so on. +** +** In the actual code below, the value "16" is replaced with the +** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. +*/ +static int fts3IncrmergeWriter( + Fts3Table *p, /* Fts3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ + int iIdx, /* Index of new output segment */ + Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ + IncrmergeWriter *pWriter /* Populate this object */ +){ + int rc; /* Return Code */ + int i; /* Iterator variable */ + int nLeafEst = 0; /* Blocks allocated for leaf nodes */ + sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ + sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ + + /* Calculate nLeafEst. */ + rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); + sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); + if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ + nLeafEst = sqlite3_column_int(pLeafEst, 0); + } + rc = sqlite3_reset(pLeafEst); + } + if( rc!=SQLITE_OK ) return rc; + + /* Calculate the first block to use in the output segment */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ + pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); + pWriter->iEnd = pWriter->iStart - 1; + pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; + } + rc = sqlite3_reset(pFirstBlock); + } + if( rc!=SQLITE_OK ) return rc; + + /* Insert the marker in the %_segments table to make sure nobody tries + ** to steal the space just allocated. This is also used to identify + ** appendable segments. */ + rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); + if( rc!=SQLITE_OK ) return rc; + + pWriter->iAbsLevel = iAbsLevel; + pWriter->nLeafEst = nLeafEst; + pWriter->iIdx = iIdx; + + /* Set up the array of NodeWriter objects */ + for(i=0; iaNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; + } + return SQLITE_OK; +} + +/* +** Remove an entry from the %_segdir table. This involves running the +** following two statements: +** +** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx +** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx +** +** The DELETE statement removes the specific %_segdir level. The UPDATE +** statement ensures that the remaining segments have contiguously allocated +** idx values. +*/ +static int fts3RemoveSegdirEntry( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ + int iIdx /* Index of %_segdir entry to delete */ +){ + int rc; /* Return code */ + sqlite3_stmt *pDelete = 0; /* DELETE statement */ + + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, iAbsLevel); + sqlite3_bind_int(pDelete, 2, iIdx); + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } + + return rc; +} + +/* +** One or more segments have just been removed from absolute level iAbsLevel. +** Update the 'idx' values of the remaining segments in the level so that +** the idx values are a contiguous sequence starting from 0. +*/ +static int fts3RepackSegdirLevel( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel /* Absolute level to repack */ +){ + int rc; /* Return code */ + int *aIdx = 0; /* Array of remaining idx values */ + int nIdx = 0; /* Valid entries in aIdx[] */ + int nAlloc = 0; /* Allocated size of aIdx[] */ + int i; /* Iterator variable */ + sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ + sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ + + rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int64(pSelect, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pSelect) ){ + if( nIdx>=nAlloc ){ + int *aNew; + nAlloc += 16; + aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); + if( !aNew ){ + rc = SQLITE_NOMEM; + break; + } + aIdx = aNew; + } + aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); + } + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; + } + + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate, 2, iAbsLevel); + } + + assert( p->bIgnoreSavepoint==0 ); + p->bIgnoreSavepoint = 1; + for(i=0; rc==SQLITE_OK && ibIgnoreSavepoint = 0; + + sqlite3_free(aIdx); + return rc; +} + +static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ + pNode->a[0] = (char)iHeight; + if( iChild ){ + assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); + pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); + }else{ + assert( pNode->nAlloc>=1 ); + pNode->n = 1; + } +} + +/* +** The first two arguments are a pointer to and the size of a segment b-tree +** node. The node may be a leaf or an internal node. +** +** This function creates a new node image in blob object *pNew by copying +** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) +** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. +*/ +static int fts3TruncateNode( + const char *aNode, /* Current node image */ + int nNode, /* Size of aNode in bytes */ + Blob *pNew, /* OUT: Write new node image here */ + const char *zTerm, /* Omit all terms smaller than this */ + int nTerm, /* Size of zTerm in bytes */ + sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ +){ + NodeReader reader; /* Reader object */ + Blob prev = {0, 0, 0}; /* Previous term written to new node */ + int rc = SQLITE_OK; /* Return code */ + int bLeaf; /* True for a leaf node */ + + if( nNode<1 ) return FTS_CORRUPT_VTAB; + bLeaf = aNode[0]=='\0'; + + /* Allocate required output space */ + blobGrowBuffer(pNew, nNode, &rc); + if( rc!=SQLITE_OK ) return rc; + pNew->n = 0; + + /* Populate new node buffer */ + for(rc = nodeReaderInit(&reader, aNode, nNode); + rc==SQLITE_OK && reader.aNode; + rc = nodeReaderNext(&reader) + ){ + if( pNew->n==0 ){ + int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); + if( res<0 || (bLeaf==0 && res==0) ) continue; + fts3StartNode(pNew, (int)aNode[0], reader.iChild); + *piBlock = reader.iChild; + } + rc = fts3AppendToNode( + pNew, &prev, reader.term.a, reader.term.n, + reader.aDoclist, reader.nDoclist + ); + if( rc!=SQLITE_OK ) break; + } + if( pNew->n==0 ){ + fts3StartNode(pNew, (int)aNode[0], reader.iChild); + *piBlock = reader.iChild; + } + assert( pNew->n<=pNew->nAlloc ); + + nodeReaderRelease(&reader); + sqlite3_free(prev.a); + return rc; +} + +/* +** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute +** level iAbsLevel. This may involve deleting entries from the %_segments +** table, and modifying existing entries in both the %_segments and %_segdir +** tables. +** +** SQLITE_OK is returned if the segment is updated successfully. Or an +** SQLite error code otherwise. +*/ +static int fts3TruncateSegment( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ + int iIdx, /* Index within level of segment to modify */ + const char *zTerm, /* Remove terms smaller than this */ + int nTerm /* Number of bytes in buffer zTerm */ +){ + int rc = SQLITE_OK; /* Return code */ + Blob root = {0,0,0}; /* New root page image */ + Blob block = {0,0,0}; /* Buffer used for any other block */ + sqlite3_int64 iBlock = 0; /* Block id */ + sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ + sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ + sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ + + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); + if( rc==SQLITE_OK ){ + int rc2; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pFetch, 1, iAbsLevel); + sqlite3_bind_int(pFetch, 2, iIdx); + if( SQLITE_ROW==sqlite3_step(pFetch) ){ + const char *aRoot = sqlite3_column_blob(pFetch, 4); + int nRoot = sqlite3_column_bytes(pFetch, 4); + iOldStart = sqlite3_column_int64(pFetch, 1); + rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); + } + rc2 = sqlite3_reset(pFetch); + if( rc==SQLITE_OK ) rc = rc2; + } + + while( rc==SQLITE_OK && iBlock ){ + char *aBlock = 0; + int nBlock = 0; + iNewStart = iBlock; + + rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); + if( rc==SQLITE_OK ){ + rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); + } + if( rc==SQLITE_OK ){ + rc = fts3WriteSegment(p, iNewStart, block.a, block.n); + } + sqlite3_free(aBlock); + } + + /* Variable iNewStart now contains the first valid leaf node. */ + if( rc==SQLITE_OK && iNewStart ){ + sqlite3_stmt *pDel = 0; + rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDel, 1, iOldStart); + sqlite3_bind_int64(pDel, 2, iNewStart-1); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } + } + + if( rc==SQLITE_OK ){ + sqlite3_stmt *pChomp = 0; + rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pChomp, 1, iNewStart); + sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); + sqlite3_bind_int64(pChomp, 3, iAbsLevel); + sqlite3_bind_int(pChomp, 4, iIdx); + sqlite3_step(pChomp); + rc = sqlite3_reset(pChomp); + sqlite3_bind_null(pChomp, 2); + } + } + + sqlite3_free(root.a); + sqlite3_free(block.a); + return rc; +} + +/* +** This function is called after an incrmental-merge operation has run to +** merge (or partially merge) two or more segments from absolute level +** iAbsLevel. +** +** Each input segment is either removed from the db completely (if all of +** its data was copied to the output segment by the incrmerge operation) +** or modified in place so that it no longer contains those entries that +** have been duplicated in the output segment. +*/ +static int fts3IncrmergeChomp( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ + Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ + int *pnRem /* Number of segments not deleted */ +){ + int i; + int nRem = 0; + int rc = SQLITE_OK; + + for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ + Fts3SegReader *pSeg = 0; + int j; + + /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding + ** somewhere in the pCsr->apSegment[] array. */ + for(j=0; ALWAYS(jnSegment); j++){ + pSeg = pCsr->apSegment[j]; + if( pSeg->iIdx==i ) break; + } + assert( jnSegment && pSeg->iIdx==i ); + + if( pSeg->aNode==0 ){ + /* Seg-reader is at EOF. Remove the entire input segment. */ + rc = fts3DeleteSegment(p, pSeg); + if( rc==SQLITE_OK ){ + rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); + } + *pnRem = 0; + }else{ + /* The incremental merge did not copy all the data from this + ** segment to the upper level. The segment is modified in place + ** so that it contains no keys smaller than zTerm/nTerm. */ + const char *zTerm = pSeg->zTerm; + int nTerm = pSeg->nTerm; + rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); + nRem++; + } + } + + if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ + rc = fts3RepackSegdirLevel(p, iAbsLevel); + } + + *pnRem = nRem; + return rc; +} + +/* +** Store an incr-merge hint in the database. +*/ +static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){ + sqlite3_stmt *pReplace = 0; + int rc; /* Return code */ + + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); + sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + sqlite3_bind_null(pReplace, 2); + } + + return rc; +} + +/* +** Load an incr-merge hint from the database. The incr-merge hint, if one +** exists, is stored in the rowid==1 row of the %_stat table. +** +** If successful, populate blob *pHint with the value read from the %_stat +** table and return SQLITE_OK. Otherwise, if an error occurs, return an +** SQLite error code. +*/ +static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){ + sqlite3_stmt *pSelect = 0; + int rc; + + pHint->n = 0; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); + if( SQLITE_ROW==sqlite3_step(pSelect) ){ + const char *aHint = sqlite3_column_blob(pSelect, 0); + int nHint = sqlite3_column_bytes(pSelect, 0); + if( aHint ){ + blobGrowBuffer(pHint, nHint, &rc); + if( rc==SQLITE_OK ){ + if( ALWAYS(pHint->a!=0) ) memcpy(pHint->a, aHint, nHint); + pHint->n = nHint; + } + } + } + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; + } + + return rc; +} + +/* +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, append an entry to the hint stored in blob *pHint. Each entry +** consists of two varints, the absolute level number of the input segments +** and the number of input segments. +** +** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, +** set *pRc to an SQLite error code before returning. +*/ +static void fts3IncrmergeHintPush( + Blob *pHint, /* Hint blob to append to */ + i64 iAbsLevel, /* First varint to store in hint */ + int nInput, /* Second varint to store in hint */ + int *pRc /* IN/OUT: Error code */ +){ + blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); + if( *pRc==SQLITE_OK ){ + pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); + pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); + } +} + +/* +** Read the last entry (most recently pushed) from the hint blob *pHint +** and then remove the entry. Write the two values read to *piAbsLevel and +** *pnInput before returning. +** +** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does +** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. +*/ +static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ + const int nHint = pHint->n; + int i; + + i = pHint->n-1; + if( (pHint->a[i] & 0x80) ) return FTS_CORRUPT_VTAB; + while( i>0 && (pHint->a[i-1] & 0x80) ) i--; + if( i==0 ) return FTS_CORRUPT_VTAB; + i--; + while( i>0 && (pHint->a[i-1] & 0x80) ) i--; + + pHint->n = i; + i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); + i += fts3GetVarint32(&pHint->a[i], pnInput); + assert( i<=nHint ); + if( i!=nHint ) return FTS_CORRUPT_VTAB; + + return SQLITE_OK; +} + + +/* +** Attempt an incremental merge that writes nMerge leaf blocks. +** +** Incremental merges happen nMin segments at a time. The segments +** to be merged are the nMin oldest segments (the ones with the smallest +** values for the _segdir.idx field) in the highest level that contains +** at least nMin segments. Multiple merges might occur in an attempt to +** write the quota of nMerge leaf blocks. +*/ +SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ + int rc; /* Return code */ + int nRem = nMerge; /* Number of leaf pages yet to be written */ + Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ + Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ + IncrmergeWriter *pWriter; /* Writer object */ + int nSeg = 0; /* Number of input segments */ + sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ + Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ + int bDirtyHint = 0; /* True if blob 'hint' has been modified */ + + /* Allocate space for the cursor, filter and writer objects */ + const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); + pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); + if( !pWriter ) return SQLITE_NOMEM; + pFilter = (Fts3SegFilter *)&pWriter[1]; + pCsr = (Fts3MultiSegReader *)&pFilter[1]; + + rc = fts3IncrmergeHintLoad(p, &hint); + while( rc==SQLITE_OK && nRem>0 ){ + const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; + sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ + int bUseHint = 0; /* True if attempting to append */ + int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ + + /* Search the %_segdir table for the absolute level with the smallest + ** relative level number that contains at least nMin segments, if any. + ** If one is found, set iAbsLevel to the absolute level number and + ** nSeg to nMin. If no level with at least nMin segments can be found, + ** set nSeg to -1. + */ + rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); + sqlite3_bind_int(pFindLevel, 1, MAX(2, nMin)); + if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ + iAbsLevel = sqlite3_column_int64(pFindLevel, 0); + nSeg = sqlite3_column_int(pFindLevel, 1); + assert( nSeg>=2 ); + }else{ + nSeg = -1; + } + rc = sqlite3_reset(pFindLevel); + + /* If the hint read from the %_stat table is not empty, check if the + ** last entry in it specifies a relative level smaller than or equal + ** to the level identified by the block above (if any). If so, this + ** iteration of the loop will work on merging at the hinted level. + */ + if( rc==SQLITE_OK && hint.n ){ + int nHint = hint.n; + sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ + int nHintSeg = 0; /* Hint number of segments */ + + rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); + if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ + /* Based on the scan in the block above, it is known that there + ** are no levels with a relative level smaller than that of + ** iAbsLevel with more than nSeg segments, or if nSeg is -1, + ** no levels with more than nMin segments. Use this to limit the + ** value of nHintSeg to avoid a large memory allocation in case the + ** merge-hint is corrupt*/ + iAbsLevel = iHintAbsLevel; + nSeg = MIN(MAX(nMin,nSeg), nHintSeg); + bUseHint = 1; + bDirtyHint = 1; + }else{ + /* This undoes the effect of the HintPop() above - so that no entry + ** is removed from the hint blob. */ + hint.n = nHint; + } + } + + /* If nSeg is less that zero, then there is no level with at least + ** nMin segments and no hint in the %_stat table. No work to do. + ** Exit early in this case. */ + if( nSeg<=0 ) break; + + assert( nMod<=0x7FFFFFFF ); + if( iAbsLevel<0 || iAbsLevel>(nMod<<32) ){ + rc = FTS_CORRUPT_VTAB; + break; + } + + /* Open a cursor to iterate through the contents of the oldest nSeg + ** indexes of absolute level iAbsLevel. If this cursor is opened using + ** the 'hint' parameters, it is possible that there are less than nSeg + ** segments available in level iAbsLevel. In this case, no work is + ** done on iAbsLevel - fall through to the next iteration of the loop + ** to start work on some other level. */ + memset(pWriter, 0, nAlloc); + pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); + assert( bUseHint==1 || bUseHint==0 ); + if( iIdx==0 || (bUseHint && iIdx==1) ){ + int bIgnore = 0; + rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); + if( bIgnore ){ + pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; + } + } + } + + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); + } + if( SQLITE_OK==rc && pCsr->nSegment==nSeg + && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) + ){ + int bEmpty = 0; + rc = sqlite3Fts3SegReaderStep(p, pCsr); + if( rc==SQLITE_OK ){ + bEmpty = 1; + }else if( rc!=SQLITE_ROW ){ + sqlite3Fts3SegReaderFinish(pCsr); + break; + } + if( bUseHint && iIdx>0 ){ + const char *zKey = pCsr->zTerm; + int nKey = pCsr->nTerm; + rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); + }else{ + rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); + } + + if( rc==SQLITE_OK && pWriter->nLeafEst ){ + fts3LogMerge(nSeg, iAbsLevel); + if( bEmpty==0 ){ + do { + rc = fts3IncrmergeAppend(p, pWriter, pCsr); + if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); + if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; + }while( rc==SQLITE_ROW ); + } + + /* Update or delete the input segments */ + if( rc==SQLITE_OK ){ + nRem -= (1 + pWriter->nWork); + rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); + if( nSeg!=0 ){ + bDirtyHint = 1; + fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); + } + } + } + + if( nSeg!=0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + fts3IncrmergeRelease(p, pWriter, &rc); + if( nSeg==0 && pWriter->bNoLeafData==0 ){ + fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); + } + } + + sqlite3Fts3SegReaderFinish(pCsr); + } + + /* Write the hint values into the %_stat table for the next incr-merger */ + if( bDirtyHint && rc==SQLITE_OK ){ + rc = fts3IncrmergeHintStore(p, &hint); + } + + sqlite3_free(pWriter); + sqlite3_free(hint.a); + return rc; +} + +/* +** Convert the text beginning at *pz into an integer and return +** its value. Advance *pz to point to the first character past +** the integer. +** +** This function used for parameters to merge= and incrmerge= +** commands. +*/ +static int fts3Getint(const char **pz){ + const char *z = *pz; + int i = 0; + while( (*z)>='0' && (*z)<='9' && i<214748363 ) i = 10*i + *(z++) - '0'; + *pz = z; + return i; +} + +/* +** Process statements of the form: +** +** INSERT INTO table(table) VALUES('merge=A,B'); +** +** A and B are integers that decode to be the number of leaf pages +** written for the merge, and the minimum number of segments on a level +** before it will be selected for a merge, respectively. +*/ +static int fts3DoIncrmerge( + Fts3Table *p, /* FTS3 table handle */ + const char *zParam /* Nul-terminated string containing "A,B" */ +){ + int rc; + int nMin = (MergeCount(p) / 2); + int nMerge = 0; + const char *z = zParam; + + /* Read the first integer value */ + nMerge = fts3Getint(&z); + + /* If the first integer value is followed by a ',', read the second + ** integer value. */ + if( z[0]==',' && z[1]!='\0' ){ + z++; + nMin = fts3Getint(&z); + } + + if( z[0]!='\0' || nMin<2 ){ + rc = SQLITE_ERROR; + }else{ + rc = SQLITE_OK; + if( !p->bHasStat ){ + assert( p->bFts4==0 ); + sqlite3Fts3CreateStatTable(&rc, p); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); + } + sqlite3Fts3SegmentsClose(p); + } + return rc; +} + +/* +** Process statements of the form: +** +** INSERT INTO table(table) VALUES('automerge=X'); +** +** where X is an integer. X==0 means to turn automerge off. X!=0 means +** turn it on. The setting is persistent. +*/ +static int fts3DoAutoincrmerge( + Fts3Table *p, /* FTS3 table handle */ + const char *zParam /* Nul-terminated string containing boolean */ +){ + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = 0; + p->nAutoincrmerge = fts3Getint(&zParam); + if( p->nAutoincrmerge==1 || p->nAutoincrmerge>MergeCount(p) ){ + p->nAutoincrmerge = 8; + } + if( !p->bHasStat ){ + assert( p->bFts4==0 ); + sqlite3Fts3CreateStatTable(&rc, p); + if( rc ) return rc; + } + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); + if( rc ) return rc; + sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); + sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + return rc; +} + +/* +** Return a 64-bit checksum for the FTS index entry specified by the +** arguments to this function. +*/ +static u64 fts3ChecksumEntry( + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of zTerm in bytes */ + int iLangid, /* Language id for current row */ + int iIndex, /* Index (0..Fts3Table.nIndex-1) */ + i64 iDocid, /* Docid for current row. */ + int iCol, /* Column number */ + int iPos /* Position */ +){ + int i; + u64 ret = (u64)iDocid; + + ret += (ret<<3) + iLangid; + ret += (ret<<3) + iIndex; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; inIndex-1) */ + int *pRc /* OUT: Return code */ +){ + Fts3SegFilter filter; + Fts3MultiSegReader csr; + int rc; + u64 cksum = 0; + + assert( *pRc==SQLITE_OK ); + + memset(&filter, 0, sizeof(filter)); + memset(&csr, 0, sizeof(csr)); + filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; + filter.flags |= FTS3_SEGMENT_SCAN; + + rc = sqlite3Fts3SegReaderCursor( + p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + } + + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ + char *pCsr = csr.aDoclist; + char *pEnd = &pCsr[csr.nDoclist]; + + i64 iDocid = 0; + i64 iCol = 0; + u64 iPos = 0; + + pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); + while( pCsrbDescIdx ){ + iDocid = (i64)((u64)iDocid - iVal); + }else{ + iDocid = (i64)((u64)iDocid + iVal); + } + } + }else{ + iPos += (iVal - 2); + cksum = cksum ^ fts3ChecksumEntry( + csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, + (int)iCol, (int)iPos + ); + } + } + } + } + } + sqlite3Fts3SegReaderFinish(&csr); + + *pRc = rc; + return cksum; +} + +/* +** Check if the contents of the FTS index match the current contents of the +** content table. If no error occurs and the contents do match, set *pbOk +** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk +** to false before returning. +** +** If an error occurs (e.g. an OOM or IO error), return an SQLite error +** code. The final value of *pbOk is undefined in this case. +*/ +static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ + int rc = SQLITE_OK; /* Return code */ + u64 cksum1 = 0; /* Checksum based on FTS index contents */ + u64 cksum2 = 0; /* Checksum based on %_content contents */ + sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ + + /* This block calculates the checksum according to the FTS index. */ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); + sqlite3_bind_int(pAllLangid, 2, p->nIndex); + while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int iLangid = sqlite3_column_int(pAllLangid, 0); + int i; + for(i=0; inIndex; i++){ + cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); + } + } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; + } + + /* This block calculates the checksum according to the %_content table */ + if( rc==SQLITE_OK ){ + sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; + sqlite3_stmt *pStmt = 0; + char *zSql; + + zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + i64 iDocid = sqlite3_column_int64(pStmt, 0); + int iLang = langidFromSelect(p, pStmt); + int iCol; + + for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); + sqlite3_tokenizer_cursor *pT = 0; + + rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, -1, &pT); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); + if( rc==SQLITE_OK ){ + int i; + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, nToken, iLang, 0, iDocid, iCol, iPos + ); + for(i=1; inIndex; i++){ + if( p->aIndex[i].nPrefix<=nToken ){ + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos + ); + } + } + } + } + if( pT ) pModule->xClose(pT); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + } + } + + sqlite3_finalize(pStmt); + } + + *pbOk = (cksum1==cksum2); + return rc; +} + +/* +** Run the integrity-check. If no error occurs and the current contents of +** the FTS index are correct, return SQLITE_OK. Or, if the contents of the +** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. +** +** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite +** error code. +** +** The integrity-check works as follows. For each token and indexed token +** prefix in the document set, a 64-bit checksum is calculated (by code +** in fts3ChecksumEntry()) based on the following: +** +** + The index number (0 for the main index, 1 for the first prefix +** index etc.), +** + The token (or token prefix) text itself, +** + The language-id of the row it appears in, +** + The docid of the row it appears in, +** + The column it appears in, and +** + The tokens position within that column. +** +** The checksums for all entries in the index are XORed together to create +** a single checksum for the entire index. +** +** The integrity-check code calculates the same checksum in two ways: +** +** 1. By scanning the contents of the FTS index, and +** 2. By scanning and tokenizing the content table. +** +** If the two checksums are identical, the integrity-check is deemed to have +** passed. +*/ +static int fts3DoIntegrityCheck( + Fts3Table *p /* FTS3 table handle */ +){ + int rc; + int bOk = 0; + rc = fts3IntegrityCheck(p, &bOk); + if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB; + return rc; +} + +/* +** Handle a 'special' INSERT of the form: +** +** "INSERT INTO tbl(tbl) VALUES()" +** +** Argument pVal contains the result of . Currently the only +** meaningful value to insert is the text 'optimize'. +*/ +static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ + int rc = SQLITE_ERROR; /* Return Code */ + const char *zVal = (const char *)sqlite3_value_text(pVal); + int nVal = sqlite3_value_bytes(pVal); + + if( !zVal ){ + return SQLITE_NOMEM; + }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ + rc = fts3DoOptimize(p, 0); + }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ + rc = fts3DoRebuild(p); + }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ + rc = fts3DoIntegrityCheck(p); + }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ + rc = fts3DoIncrmerge(p, &zVal[6]); + }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ + rc = fts3DoAutoincrmerge(p, &zVal[10]); +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + }else{ + int v; + if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ + v = atoi(&zVal[9]); + if( v>=24 && v<=p->nPgsz-35 ) p->nNodeSize = v; + rc = SQLITE_OK; + }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ + v = atoi(&zVal[11]); + if( v>=64 && v<=FTS3_MAX_PENDING_DATA ) p->nMaxPendingData = v; + rc = SQLITE_OK; + }else if( nVal>21 && 0==sqlite3_strnicmp(zVal,"test-no-incr-doclist=",21) ){ + p->bNoIncrDoclist = atoi(&zVal[21]); + rc = SQLITE_OK; + }else if( nVal>11 && 0==sqlite3_strnicmp(zVal,"mergecount=",11) ){ + v = atoi(&zVal[11]); + if( v>=4 && v<=FTS3_MERGE_COUNT && (v&1)==0 ) p->nMergeCount = v; + rc = SQLITE_OK; + } +#endif + } + return rc; +} + +#ifndef SQLITE_DISABLE_FTS4_DEFERRED +/* +** Delete all cached deferred doclists. Deferred doclists are cached +** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +*/ +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ + fts3PendingListDelete(pDef->pList); + pDef->pList = 0; + } +} + +/* +** Free all entries in the pCsr->pDeffered list. Entries are added to +** this list using sqlite3Fts3DeferToken(). +*/ +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + Fts3DeferredToken *pNext; + for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ + pNext = pDef->pNext; + fts3PendingListDelete(pDef->pList); + sqlite3_free(pDef); + } + pCsr->pDeferred = 0; +} + +/* +** Generate deferred-doclists for all tokens in the pCsr->pDeferred list +** based on the row that pCsr currently points to. +** +** A deferred-doclist is like any other doclist with position information +** included, except that it only contains entries for a single row of the +** table, not for all rows. +*/ +SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return code */ + if( pCsr->pDeferred ){ + int i; /* Used to iterate through table columns */ + sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ + Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ + + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer *pT = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pT->pModule; + + assert( pCsr->isRequireSeek==0 ); + iDocid = sqlite3_column_int64(pCsr->pStmt, 0); + + for(i=0; inColumn && rc==SQLITE_OK; i++){ + if( p->abNotindexed[i]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; + + rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->bFirst==0 || iPos==0) + && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + } + } + } + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + } + + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + if( pDef->pList ){ + rc = fts3PendingListAppendVarint(&pDef->pList, 0); + } + } + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( + Fts3DeferredToken *p, + char **ppData, + int *pnData +){ + char *pRet; + int nSkip; + sqlite3_int64 dummy; + + *ppData = 0; + *pnData = 0; + + if( p->pList==0 ){ + return SQLITE_OK; + } + + pRet = (char *)sqlite3_malloc(p->pList->nData); + if( !pRet ) return SQLITE_NOMEM; + + nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); + *pnData = p->pList->nData - nSkip; + *ppData = pRet; + + memcpy(pRet, &p->pList->aData[nSkip], *pnData); + return SQLITE_OK; +} + +/* +** Add an entry for token pToken to the pCsr->pDeferred list. +*/ +SQLITE_PRIVATE int sqlite3Fts3DeferToken( + Fts3Cursor *pCsr, /* Fts3 table cursor */ + Fts3PhraseToken *pToken, /* Token to defer */ + int iCol /* Column that token must appear in (or -1) */ +){ + Fts3DeferredToken *pDeferred; + pDeferred = sqlite3_malloc(sizeof(*pDeferred)); + if( !pDeferred ){ + return SQLITE_NOMEM; + } + memset(pDeferred, 0, sizeof(*pDeferred)); + pDeferred->pToken = pToken; + pDeferred->pNext = pCsr->pDeferred; + pDeferred->iCol = iCol; + pCsr->pDeferred = pDeferred; + + assert( pToken->pDeferred==0 ); + pToken->pDeferred = pDeferred; + + return SQLITE_OK; +} +#endif + +/* +** SQLite value pRowid contains the rowid of a row that may or may not be +** present in the FTS3 table. If it is, delete it and adjust the contents +** of subsiduary data structures accordingly. +*/ +static int fts3DeleteByRowid( + Fts3Table *p, + sqlite3_value *pRowid, + int *pnChng, /* IN/OUT: Decrement if row is deleted */ + u32 *aSzDel +){ + int rc = SQLITE_OK; /* Return code */ + int bFound = 0; /* True if *pRowid really is in the table */ + + fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); + if( bFound && rc==SQLITE_OK ){ + int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ + rc = fts3IsEmpty(p, pRowid, &isEmpty); + if( rc==SQLITE_OK ){ + if( isEmpty ){ + /* Deleting this row means the whole table is empty. In this case + ** delete the contents of all three tables and throw away any + ** data in the pendingTerms hash table. */ + rc = fts3DeleteAll(p, 1); + *pnChng = 0; + memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); + }else{ + *pnChng = *pnChng - 1; + if( p->zContentTbl==0 ){ + fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); + } + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); + } + } + } + } + + return rc; +} + +/* +** This function does the work for the xUpdate method of FTS3 virtual +** tables. The schema of the virtual table being: +** +** CREATE TABLE
    ( +** , +**
    HIDDEN, +** docid HIDDEN, +** HIDDEN +** ); +** +** +*/ +SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( + sqlite3_vtab *pVtab, /* FTS3 vtab object */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ +){ + Fts3Table *p = (Fts3Table *)pVtab; + int rc = SQLITE_OK; /* Return Code */ + u32 *aSzIns = 0; /* Sizes of inserted documents */ + u32 *aSzDel = 0; /* Sizes of deleted documents */ + int nChng = 0; /* Net change in number of documents */ + int bInsertDone = 0; + + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + assert( p->bHasStat==0 || p->bHasStat==1 ); + + assert( p->pSegments==0 ); + assert( + nArg==1 /* DELETE operations */ + || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ + ); + + /* Check for a "special" INSERT operation. One of the form: + ** + ** INSERT INTO xyz(xyz) VALUES('command'); + */ + if( nArg>1 + && sqlite3_value_type(apVal[0])==SQLITE_NULL + && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL + ){ + rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); + goto update_out; + } + + if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } + + /* Allocate space to hold the change in document sizes */ + aSzDel = sqlite3_malloc64(sizeof(aSzDel[0])*((sqlite3_int64)p->nColumn+1)*2); + if( aSzDel==0 ){ + rc = SQLITE_NOMEM; + goto update_out; + } + aSzIns = &aSzDel[p->nColumn+1]; + memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); + + rc = fts3Writelock(p); + if( rc!=SQLITE_OK ) goto update_out; + + /* If this is an INSERT operation, or an UPDATE that modifies the rowid + ** value, then this operation requires constraint handling. + ** + ** If the on-conflict mode is REPLACE, this means that the existing row + ** should be deleted from the database before inserting the new row. Or, + ** if the on-conflict mode is other than REPLACE, then this method must + ** detect the conflict and return SQLITE_CONSTRAINT before beginning to + ** modify the database file. + */ + if( nArg>1 && p->zContentTbl==0 ){ + /* Find the value object that holds the new rowid value. */ + sqlite3_value *pNewRowid = apVal[3+p->nColumn]; + if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ + pNewRowid = apVal[1]; + } + + if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( + sqlite3_value_type(apVal[0])==SQLITE_NULL + || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) + )){ + /* The new rowid is not NULL (in this case the rowid will be + ** automatically assigned and there is no chance of a conflict), and + ** the statement is either an INSERT or an UPDATE that modifies the + ** rowid column. So if the conflict mode is REPLACE, then delete any + ** existing row with rowid=pNewRowid. + ** + ** Or, if the conflict mode is not REPLACE, insert the new record into + ** the %_content table. If we hit the duplicate rowid constraint (or any + ** other error) while doing so, return immediately. + ** + ** This branch may also run if pNewRowid contains a value that cannot + ** be losslessly converted to an integer. In this case, the eventual + ** call to fts3InsertData() (either just below or further on in this + ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is + ** invoked, it will delete zero rows (since no row will have + ** docid=$pNewRowid if $pNewRowid is not an integer value). + */ + if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ + rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); + }else{ + rc = fts3InsertData(p, apVal, pRowid); + bInsertDone = 1; + } + } + } + if( rc!=SQLITE_OK ){ + goto update_out; + } + + /* If this is a DELETE or UPDATE operation, remove the old record. */ + if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); + rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); + } + + /* If this is an INSERT or UPDATE operation, insert the new record. */ + if( nArg>1 && rc==SQLITE_OK ){ + int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); + if( bInsertDone==0 ){ + rc = fts3InsertData(p, apVal, pRowid); + if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; + } + } + if( rc==SQLITE_OK ){ + rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid); + } + if( rc==SQLITE_OK ){ + assert( p->iPrevDocid==*pRowid ); + rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); + } + if( p->bHasDocsize ){ + fts3InsertDocsize(&rc, p, aSzIns); + } + nChng++; + } + + if( p->bFts4 ){ + fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); + } + + update_out: + sqlite3_free(aSzDel); + sqlite3Fts3SegmentsClose(p); + return rc; +} + +/* +** Flush any data in the pending-terms hash table to disk. If successful, +** merge all segments in the database (including the new segment, if +** there was any data to flush) into a single segment. +*/ +SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ + int rc; + rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3DoOptimize(p, 1); + if( rc==SQLITE_OK || rc==SQLITE_DONE ){ + int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); + if( rc2!=SQLITE_OK ) rc = rc2; + }else{ + sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); + sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); + } + } + sqlite3Fts3SegmentsClose(p); + return rc; +} + +#endif + +/************** End of fts3_write.c ******************************************/ +/************** Begin file fts3_snippet.c ************************************/ +/* +** 2009 Oct 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +#ifndef SQLITE_AMALGAMATION +typedef sqlite3_int64 i64; +#endif + +/* +** Characters that may appear in the second argument to matchinfo(). +*/ +#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ +#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ +#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ +#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ +#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ +#define FTS3_MATCHINFO_LCS 's' /* nCol values */ +#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ +#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ +#define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ + +/* +** The default value for the second argument to matchinfo(). +*/ +#define FTS3_MATCHINFO_DEFAULT "pcx" + + +/* +** Used as an fts3ExprIterate() context when loading phrase doclists to +** Fts3Expr.aDoclist[]/nDoclist. +*/ +typedef struct LoadDoclistCtx LoadDoclistCtx; +struct LoadDoclistCtx { + Fts3Cursor *pCsr; /* FTS3 Cursor */ + int nPhrase; /* Number of phrases seen so far */ + int nToken; /* Number of tokens seen so far */ +}; + +/* +** The following types are used as part of the implementation of the +** fts3BestSnippet() routine. +*/ +typedef struct SnippetIter SnippetIter; +typedef struct SnippetPhrase SnippetPhrase; +typedef struct SnippetFragment SnippetFragment; + +struct SnippetIter { + Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ + int iCol; /* Extract snippet from this column */ + int nSnippet; /* Requested snippet length (in tokens) */ + int nPhrase; /* Number of phrases in query */ + SnippetPhrase *aPhrase; /* Array of size nPhrase */ + int iCurrent; /* First token of current snippet */ +}; + +struct SnippetPhrase { + int nToken; /* Number of tokens in phrase */ + char *pList; /* Pointer to start of phrase position list */ + i64 iHead; /* Next value in position list */ + char *pHead; /* Position list data following iHead */ + i64 iTail; /* Next value in trailing position list */ + char *pTail; /* Position list data following iTail */ +}; + +struct SnippetFragment { + int iCol; /* Column snippet is extracted from */ + int iPos; /* Index of first token in snippet */ + u64 covered; /* Mask of query phrases covered */ + u64 hlmask; /* Mask of snippet terms to highlight */ +}; + +/* +** This type is used as an fts3ExprIterate() context object while +** accumulating the data returned by the matchinfo() function. +*/ +typedef struct MatchInfo MatchInfo; +struct MatchInfo { + Fts3Cursor *pCursor; /* FTS3 Cursor */ + int nCol; /* Number of columns in table */ + int nPhrase; /* Number of matchable phrases in query */ + sqlite3_int64 nDoc; /* Number of docs in database */ + char flag; + u32 *aMatchinfo; /* Pre-allocated buffer */ +}; + +/* +** An instance of this structure is used to manage a pair of buffers, each +** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below +** for details. +*/ +struct MatchinfoBuffer { + u8 aRef[3]; + int nElem; + int bGlobal; /* Set if global data is loaded */ + char *zMatchinfo; + u32 aMatchinfo[1]; +}; + + +/* +** The snippet() and offsets() functions both return text values. An instance +** of the following structure is used to accumulate those values while the +** functions are running. See fts3StringAppend() for details. +*/ +typedef struct StrBuffer StrBuffer; +struct StrBuffer { + char *z; /* Pointer to buffer containing string */ + int n; /* Length of z in bytes (excl. nul-term) */ + int nAlloc; /* Allocated size of buffer z in bytes */ +}; + + +/************************************************************************* +** Start of MatchinfoBuffer code. +*/ + +/* +** Allocate a two-slot MatchinfoBuffer object. +*/ +static MatchinfoBuffer *fts3MIBufferNew(size_t nElem, const char *zMatchinfo){ + MatchinfoBuffer *pRet; + sqlite3_int64 nByte = sizeof(u32) * (2*(sqlite3_int64)nElem + 1) + + sizeof(MatchinfoBuffer); + sqlite3_int64 nStr = strlen(zMatchinfo); + + pRet = sqlite3Fts3MallocZero(nByte + nStr+1); + if( pRet ){ + pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; + pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + + sizeof(u32)*((int)nElem+1); + pRet->nElem = (int)nElem; + pRet->zMatchinfo = ((char*)pRet) + nByte; + memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); + pRet->aRef[0] = 1; + } + + return pRet; +} + +static void fts3MIBufferFree(void *p){ + MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); + + assert( (u32*)p==&pBuf->aMatchinfo[1] + || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] + ); + if( (u32*)p==&pBuf->aMatchinfo[1] ){ + pBuf->aRef[1] = 0; + }else{ + pBuf->aRef[2] = 0; + } + + if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ + sqlite3_free(pBuf); + } +} + +static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ + void (*xRet)(void*) = 0; + u32 *aOut = 0; + + if( p->aRef[1]==0 ){ + p->aRef[1] = 1; + aOut = &p->aMatchinfo[1]; + xRet = fts3MIBufferFree; + } + else if( p->aRef[2]==0 ){ + p->aRef[2] = 1; + aOut = &p->aMatchinfo[p->nElem+2]; + xRet = fts3MIBufferFree; + }else{ + aOut = (u32*)sqlite3_malloc64(p->nElem * sizeof(u32)); + if( aOut ){ + xRet = sqlite3_free; + if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); + } + } + + *paOut = aOut; + return xRet; +} + +static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ + p->bGlobal = 1; + memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); +} + +/* +** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() +*/ +SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ + if( p ){ + assert( p->aRef[0]==1 ); + p->aRef[0] = 0; + if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ + sqlite3_free(p); + } + } +} + +/* +** End of MatchinfoBuffer code. +*************************************************************************/ + + +/* +** This function is used to help iterate through a position-list. A position +** list is a list of unique integers, sorted from smallest to largest. Each +** element of the list is represented by an FTS3 varint that takes the value +** of the difference between the current element and the previous one plus +** two. For example, to store the position-list: +** +** 4 9 113 +** +** the three varints: +** +** 6 7 106 +** +** are encoded. +** +** When this function is called, *pp points to the start of an element of +** the list. *piPos contains the value of the previous entry in the list. +** After it returns, *piPos contains the value of the next element of the +** list and *pp is advanced to the following varint. +*/ +static void fts3GetDeltaPosition(char **pp, i64 *piPos){ + int iVal; + *pp += fts3GetVarint32(*pp, &iVal); + *piPos += (iVal-2); +} + +/* +** Helper function for fts3ExprIterate() (see below). +*/ +static int fts3ExprIterate2( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int *piPhrase, /* Pointer to phrase counter */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ +){ + int rc; /* Return code */ + int eType = pExpr->eType; /* Type of expression node pExpr */ + + if( eType!=FTSQUERY_PHRASE ){ + assert( pExpr->pLeft && pExpr->pRight ); + rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); + if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ + rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); + } + }else{ + rc = x(pExpr, *piPhrase, pCtx); + (*piPhrase)++; + } + return rc; +} + +/* +** Iterate through all phrase nodes in an FTS3 query, except those that +** are part of a sub-tree that is the right-hand-side of a NOT operator. +** For each phrase node found, the supplied callback function is invoked. +** +** If the callback function returns anything other than SQLITE_OK, +** the iteration is abandoned and the error code returned immediately. +** Otherwise, SQLITE_OK is returned after a callback has been made for +** all eligible phrase nodes. +*/ +static int fts3ExprIterate( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ +){ + int iPhrase = 0; /* Variable used as the phrase counter */ + return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); +} + + +/* +** This is an fts3ExprIterate() callback used while loading the doclists +** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also +** fts3ExprLoadDoclists(). +*/ +static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + int rc = SQLITE_OK; + Fts3Phrase *pPhrase = pExpr->pPhrase; + LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; + + UNUSED_PARAMETER(iPhrase); + + p->nPhrase++; + p->nToken += pPhrase->nToken; + + return rc; +} + +/* +** Load the doclists for each phrase in the query associated with FTS3 cursor +** pCsr. +** +** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable +** phrases in the expression (all phrases except those directly or +** indirectly descended from the right-hand-side of a NOT operator). If +** pnToken is not NULL, then it is set to the number of tokens in all +** matchable phrases of the expression. +*/ +static int fts3ExprLoadDoclists( + Fts3Cursor *pCsr, /* Fts3 cursor for current query */ + int *pnPhrase, /* OUT: Number of phrases in query */ + int *pnToken /* OUT: Number of tokens in query */ +){ + int rc; /* Return Code */ + LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ + sCtx.pCsr = pCsr; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); + if( pnPhrase ) *pnPhrase = sCtx.nPhrase; + if( pnToken ) *pnToken = sCtx.nToken; + return rc; +} + +static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + (*(int *)ctx)++; + pExpr->iPhrase = iPhrase; + return SQLITE_OK; +} +static int fts3ExprPhraseCount(Fts3Expr *pExpr){ + int nPhrase = 0; + (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); + return nPhrase; +} + +/* +** Advance the position list iterator specified by the first two +** arguments so that it points to the first element with a value greater +** than or equal to parameter iNext. +*/ +static void fts3SnippetAdvance(char **ppIter, i64 *piIter, int iNext){ + char *pIter = *ppIter; + if( pIter ){ + i64 iIter = *piIter; + + while( iIteriCurrent<0 ){ + /* The SnippetIter object has just been initialized. The first snippet + ** candidate always starts at offset 0 (even if this candidate has a + ** score of 0.0). + */ + pIter->iCurrent = 0; + + /* Advance the 'head' iterator of each phrase to the first offset that + ** is greater than or equal to (iNext+nSnippet). + */ + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); + } + }else{ + int iStart; + int iEnd = 0x7FFFFFFF; + + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pHead && pPhrase->iHeadiHead; + } + } + if( iEnd==0x7FFFFFFF ){ + return 1; + } + + pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); + fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); + } + } + + return 0; +} + +/* +** Retrieve information about the current candidate snippet of snippet +** iterator pIter. +*/ +static void fts3SnippetDetails( + SnippetIter *pIter, /* Snippet iterator */ + u64 mCovered, /* Bitmask of phrases already covered */ + int *piToken, /* OUT: First token of proposed snippet */ + int *piScore, /* OUT: "Score" for this snippet */ + u64 *pmCover, /* OUT: Bitmask of phrases covered */ + u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ +){ + int iStart = pIter->iCurrent; /* First token of snippet */ + int iScore = 0; /* Score of this snippet */ + int i; /* Loop counter */ + u64 mCover = 0; /* Mask of phrases covered by this snippet */ + u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ + + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pTail ){ + char *pCsr = pPhrase->pTail; + i64 iCsr = pPhrase->iTail; + + while( iCsr<(iStart+pIter->nSnippet) && iCsr>=iStart ){ + int j; + u64 mPhrase = (u64)1 << (i%64); + u64 mPos = (u64)1 << (iCsr - iStart); + assert( iCsr>=iStart && (iCsr - iStart)<=64 ); + assert( i>=0 ); + if( (mCover|mCovered)&mPhrase ){ + iScore++; + }else{ + iScore += 1000; + } + mCover |= mPhrase; + + for(j=0; jnToken; j++){ + mHighlight |= (mPos>>j); + } + + if( 0==(*pCsr & 0x0FE) ) break; + fts3GetDeltaPosition(&pCsr, &iCsr); + } + } + } + + /* Set the output variables before returning. */ + *piToken = iStart; + *piScore = iScore; + *pmCover = mCover; + *pmHighlight = mHighlight; +} + +/* +** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). +** Each invocation populates an element of the SnippetIter.aPhrase[] array. +*/ +static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ + SnippetIter *p = (SnippetIter *)ctx; + SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; + char *pCsr; + int rc; + + pPhrase->nToken = pExpr->pPhrase->nToken; + rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( pCsr ){ + i64 iFirst = 0; + pPhrase->pList = pCsr; + fts3GetDeltaPosition(&pCsr, &iFirst); + if( iFirst<0 ){ + rc = FTS_CORRUPT_VTAB; + }else{ + pPhrase->pHead = pCsr; + pPhrase->pTail = pCsr; + pPhrase->iHead = iFirst; + pPhrase->iTail = iFirst; + } + }else{ + assert( rc!=SQLITE_OK || ( + pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 + )); + } + + return rc; +} + +/* +** Select the fragment of text consisting of nFragment contiguous tokens +** from column iCol that represent the "best" snippet. The best snippet +** is the snippet with the highest score, where scores are calculated +** by adding: +** +** (a) +1 point for each occurrence of a matchable phrase in the snippet. +** +** (b) +1000 points for the first occurrence of each matchable phrase in +** the snippet for which the corresponding mCovered bit is not set. +** +** The selected snippet parameters are stored in structure *pFragment before +** returning. The score of the selected snippet is stored in *piScore +** before returning. +*/ +static int fts3BestSnippet( + int nSnippet, /* Desired snippet length */ + Fts3Cursor *pCsr, /* Cursor to create snippet for */ + int iCol, /* Index of column to create snippet from */ + u64 mCovered, /* Mask of phrases already covered */ + u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ + SnippetFragment *pFragment, /* OUT: Best snippet found */ + int *piScore /* OUT: Score of snippet pFragment */ +){ + int rc; /* Return Code */ + int nList; /* Number of phrases in expression */ + SnippetIter sIter; /* Iterates through snippet candidates */ + sqlite3_int64 nByte; /* Number of bytes of space to allocate */ + int iBestScore = -1; /* Best snippet score found so far */ + int i; /* Loop counter */ + + memset(&sIter, 0, sizeof(sIter)); + + /* Iterate through the phrases in the expression to count them. The same + ** callback makes sure the doclists are loaded for each phrase. + */ + rc = fts3ExprLoadDoclists(pCsr, &nList, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Now that it is known how many phrases there are, allocate and zero + ** the required space using malloc(). + */ + nByte = sizeof(SnippetPhrase) * nList; + sIter.aPhrase = (SnippetPhrase *)sqlite3Fts3MallocZero(nByte); + if( !sIter.aPhrase ){ + return SQLITE_NOMEM; + } + + /* Initialize the contents of the SnippetIter object. Then iterate through + ** the set of phrases in the expression to populate the aPhrase[] array. + */ + sIter.pCsr = pCsr; + sIter.iCol = iCol; + sIter.nSnippet = nSnippet; + sIter.nPhrase = nList; + sIter.iCurrent = -1; + rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); + if( rc==SQLITE_OK ){ + + /* Set the *pmSeen output variable. */ + for(i=0; iiCol = iCol; + while( !fts3SnippetNextCandidate(&sIter) ){ + int iPos; + int iScore; + u64 mCover; + u64 mHighlite; + fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); + assert( iScore>=0 ); + if( iScore>iBestScore ){ + pFragment->iPos = iPos; + pFragment->hlmask = mHighlite; + pFragment->covered = mCover; + iBestScore = iScore; + } + } + + *piScore = iBestScore; + } + sqlite3_free(sIter.aPhrase); + return rc; +} + + +/* +** Append a string to the string-buffer passed as the first argument. +** +** If nAppend is negative, then the length of the string zAppend is +** determined using strlen(). +*/ +static int fts3StringAppend( + StrBuffer *pStr, /* Buffer to append to */ + const char *zAppend, /* Pointer to data to append to buffer */ + int nAppend /* Size of zAppend in bytes (or -1) */ +){ + if( nAppend<0 ){ + nAppend = (int)strlen(zAppend); + } + + /* If there is insufficient space allocated at StrBuffer.z, use realloc() + ** to grow the buffer until so that it is big enough to accomadate the + ** appended data. + */ + if( pStr->n+nAppend+1>=pStr->nAlloc ){ + sqlite3_int64 nAlloc = pStr->nAlloc+(sqlite3_int64)nAppend+100; + char *zNew = sqlite3_realloc64(pStr->z, nAlloc); + if( !zNew ){ + return SQLITE_NOMEM; + } + pStr->z = zNew; + pStr->nAlloc = nAlloc; + } + assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); + + /* Append the data to the string buffer. */ + memcpy(&pStr->z[pStr->n], zAppend, nAppend); + pStr->n += nAppend; + pStr->z[pStr->n] = '\0'; + + return SQLITE_OK; +} + +/* +** The fts3BestSnippet() function often selects snippets that end with a +** query term. That is, the final term of the snippet is always a term +** that requires highlighting. For example, if 'X' is a highlighted term +** and '.' is a non-highlighted term, BestSnippet() may select: +** +** ........X.....X +** +** This function "shifts" the beginning of the snippet forward in the +** document so that there are approximately the same number of +** non-highlighted terms to the right of the final highlighted term as there +** are to the left of the first highlighted term. For example, to this: +** +** ....X.....X.... +** +** This is done as part of extracting the snippet text, not when selecting +** the snippet. Snippet selection is done based on doclists only, so there +** is no way for fts3BestSnippet() to know whether or not the document +** actually contains terms that follow the final highlighted term. +*/ +static int fts3SnippetShift( + Fts3Table *pTab, /* FTS3 table snippet comes from */ + int iLangid, /* Language id to use in tokenizing */ + int nSnippet, /* Number of tokens desired for snippet */ + const char *zDoc, /* Document text to extract snippet from */ + int nDoc, /* Size of buffer zDoc in bytes */ + int *piPos, /* IN/OUT: First token of snippet */ + u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ +){ + u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ + + if( hlmask ){ + int nLeft; /* Tokens to the left of first highlight */ + int nRight; /* Tokens to the right of last highlight */ + int nDesired; /* Ideal number of tokens to shift forward */ + + for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); + for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); + assert( (nSnippet-1-nRight)<=63 && (nSnippet-1-nRight)>=0 ); + nDesired = (nLeft-nRight)/2; + + /* Ideally, the start of the snippet should be pushed forward in the + ** document nDesired tokens. This block checks if there are actually + ** nDesired tokens to the right of the snippet. If so, *piPos and + ** *pHlMask are updated to shift the snippet nDesired tokens to the + ** right. Otherwise, the snippet is shifted by the number of tokens + ** available. + */ + if( nDesired>0 ){ + int nShift; /* Number of tokens to shift snippet by */ + int iCurrent = 0; /* Token counter */ + int rc; /* Return Code */ + sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer_cursor *pC; + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + + /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) + ** or more tokens in zDoc/nDoc. + */ + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ){ + return rc; + } + while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ + const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); + } + pMod->xClose(pC); + if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } + + nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; + assert( nShift<=nDesired ); + if( nShift>0 ){ + *piPos += nShift; + *pHlmask = hlmask >> nShift; + } + } + } + return SQLITE_OK; +} + +/* +** Extract the snippet text for fragment pFragment from cursor pCsr and +** append it to string buffer pOut. +*/ +static int fts3SnippetText( + Fts3Cursor *pCsr, /* FTS3 Cursor */ + SnippetFragment *pFragment, /* Snippet to extract */ + int iFragment, /* Fragment number */ + int isLast, /* True for final fragment in snippet */ + int nSnippet, /* Number of tokens in extracted snippet */ + const char *zOpen, /* String inserted before highlighted term */ + const char *zClose, /* String inserted after highlighted term */ + const char *zEllipsis, /* String inserted between snippets */ + StrBuffer *pOut /* Write output here */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc; /* Return code */ + const char *zDoc; /* Document text to extract snippet from */ + int nDoc; /* Size of zDoc in bytes */ + int iCurrent = 0; /* Current token number of document */ + int iEnd = 0; /* Byte offset of end of current token */ + int isShiftDone = 0; /* True after snippet is shifted */ + int iPos = pFragment->iPos; /* First token of snippet */ + u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ + int iCol = pFragment->iCol+1; /* Query column to extract text from */ + sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ + + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } + return SQLITE_OK; + } + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); + + /* Open a token cursor on the document. */ + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); + if( rc!=SQLITE_OK ){ + return rc; + } + + while( rc==SQLITE_OK ){ + const char *ZDUMMY; /* Dummy argument used with tokenizer */ + int DUMMY1 = -1; /* Dummy argument used with tokenizer */ + int iBegin = 0; /* Offset in zDoc of start of token */ + int iFin = 0; /* Offset in zDoc of end of token */ + int isHighlight = 0; /* True for highlighted terms */ + + /* Variable DUMMY1 is initialized to a negative value above. Elsewhere + ** in the FTS code the variable that the third argument to xNext points to + ** is initialized to zero before the first (*but not necessarily + ** subsequent*) call to xNext(). This is done for a particular application + ** that needs to know whether or not the tokenizer is being used for + ** snippet generation or for some other purpose. + ** + ** Extreme care is required when writing code to depend on this + ** initialization. It is not a documented part of the tokenizer interface. + ** If a tokenizer is used directly by any code outside of FTS, this + ** convention might not be respected. */ + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + /* Special case - the last token of the snippet is also the last token + ** of the column. Append any punctuation that occurred between the end + ** of the previous token and the end of the document to the output. + ** Then break out of the loop. */ + rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + } + break; + } + if( iCurrentiLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask + ); + isShiftDone = 1; + + /* Now that the shift has been done, check if the initial "..." are + ** required. They are required if (a) this is not the first fragment, + ** or (b) this fragment does not begin at position 0 of its column. + */ + if( rc==SQLITE_OK ){ + if( iPos>0 || iFragment>0 ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + }else if( iBegin ){ + rc = fts3StringAppend(pOut, zDoc, iBegin); + } + } + if( rc!=SQLITE_OK || iCurrent=(iPos+nSnippet) ){ + if( isLast ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + } + break; + } + + /* Set isHighlight to true if this term should be highlighted. */ + isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; + + if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); + if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); + + iEnd = iFin; + } + + pMod->xClose(pC); + return rc; +} + + +/* +** This function is used to count the entries in a column-list (a +** delta-encoded list of term offsets within a single column of a single +** row). When this function is called, *ppCollist should point to the +** beginning of the first varint in the column-list (the varint that +** contains the position of the first matching term in the column data). +** Before returning, *ppCollist is set to point to the first byte after +** the last varint in the column-list (either the 0x00 signifying the end +** of the position-list, or the 0x01 that precedes the column number of +** the next column in the position-list). +** +** The number of elements in the column-list is returned. +*/ +static int fts3ColumnlistCount(char **ppCollist){ + char *pEnd = *ppCollist; + char c = 0; + int nEntry = 0; + + /* A column-list is terminated by either a 0x01 or 0x00. */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + if( !c ) nEntry++; + } + + *ppCollist = pEnd; + return nEntry; +} + +/* +** This function gathers 'y' or 'b' data for a single phrase. +*/ +static int fts3ExprLHits( + Fts3Expr *pExpr, /* Phrase expression node */ + MatchInfo *p /* Matchinfo context */ +){ + Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; + int iStart; + Fts3Phrase *pPhrase = pExpr->pPhrase; + char *pIter = pPhrase->doclist.pList; + int iCol = 0; + + assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); + if( p->flag==FTS3_MATCHINFO_LHITS ){ + iStart = pExpr->iPhrase * p->nCol; + }else{ + iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); + } + + if( pIter ) while( 1 ){ + int nHit = fts3ColumnlistCount(&pIter); + if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ + if( p->flag==FTS3_MATCHINFO_LHITS ){ + p->aMatchinfo[iStart + iCol] = (u32)nHit; + }else if( nHit ){ + p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); + } + } + assert( *pIter==0x00 || *pIter==0x01 ); + if( *pIter!=0x01 ) break; + pIter++; + pIter += fts3GetVarint32(pIter, &iCol); + if( iCol>=p->nCol ) return FTS_CORRUPT_VTAB; + } + return SQLITE_OK; +} + +/* +** Gather the results for matchinfo directives 'y' and 'b'. +*/ +static int fts3ExprLHitGather( + Fts3Expr *pExpr, + MatchInfo *p +){ + int rc = SQLITE_OK; + assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); + if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ + if( pExpr->pLeft ){ + rc = fts3ExprLHitGather(pExpr->pLeft, p); + if( rc==SQLITE_OK ) rc = fts3ExprLHitGather(pExpr->pRight, p); + }else{ + rc = fts3ExprLHits(pExpr, p); + } + } + return rc; +} + +/* +** fts3ExprIterate() callback used to collect the "global" matchinfo stats +** for a single query. +** +** fts3ExprIterate() callback to load the 'global' elements of a +** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements +** of the matchinfo array that are constant for all rows returned by the +** current query. +** +** Argument pCtx is actually a pointer to a struct of type MatchInfo. This +** function populates Matchinfo.aMatchinfo[] as follows: +** +** for(iCol=0; iColpCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] + ); +} + +/* +** fts3ExprIterate() callback used to collect the "local" part of the +** FTS3_MATCHINFO_HITS array. The local stats are those elements of the +** array that are different for each row returned by the query. +*/ +static int fts3ExprLocalHitsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + int rc = SQLITE_OK; + MatchInfo *p = (MatchInfo *)pCtx; + int iStart = iPhrase * p->nCol * 3; + int i; + + for(i=0; inCol && rc==SQLITE_OK; i++){ + char *pCsr; + rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); + if( pCsr ){ + p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); + }else{ + p->aMatchinfo[iStart+i*3] = 0; + } + } + + return rc; +} + +static int fts3MatchinfoCheck( + Fts3Table *pTab, + char cArg, + char **pzErr +){ + if( (cArg==FTS3_MATCHINFO_NPHRASE) + || (cArg==FTS3_MATCHINFO_NCOL) + || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) + || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) + || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) + || (cArg==FTS3_MATCHINFO_LCS) + || (cArg==FTS3_MATCHINFO_HITS) + || (cArg==FTS3_MATCHINFO_LHITS) + || (cArg==FTS3_MATCHINFO_LHITS_BM) + ){ + return SQLITE_OK; + } + sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); + return SQLITE_ERROR; +} + +static size_t fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ + size_t nVal; /* Number of integers output by cArg */ + + switch( cArg ){ + case FTS3_MATCHINFO_NDOC: + case FTS3_MATCHINFO_NPHRASE: + case FTS3_MATCHINFO_NCOL: + nVal = 1; + break; + + case FTS3_MATCHINFO_AVGLENGTH: + case FTS3_MATCHINFO_LENGTH: + case FTS3_MATCHINFO_LCS: + nVal = pInfo->nCol; + break; + + case FTS3_MATCHINFO_LHITS: + nVal = pInfo->nCol * pInfo->nPhrase; + break; + + case FTS3_MATCHINFO_LHITS_BM: + nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); + break; + + default: + assert( cArg==FTS3_MATCHINFO_HITS ); + nVal = pInfo->nCol * pInfo->nPhrase * 3; + break; + } + + return nVal; +} + +static int fts3MatchinfoSelectDoctotal( + Fts3Table *pTab, + sqlite3_stmt **ppStmt, + sqlite3_int64 *pnDoc, + const char **paLen, + const char **ppEnd +){ + sqlite3_stmt *pStmt; + const char *a; + const char *pEnd; + sqlite3_int64 nDoc; + int n; + + + if( !*ppStmt ){ + int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); + if( rc!=SQLITE_OK ) return rc; + } + pStmt = *ppStmt; + assert( sqlite3_data_count(pStmt)==1 ); + + n = sqlite3_column_bytes(pStmt, 0); + a = sqlite3_column_blob(pStmt, 0); + if( a==0 ){ + return FTS_CORRUPT_VTAB; + } + pEnd = a + n; + a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc); + if( nDoc<=0 || a>pEnd ){ + return FTS_CORRUPT_VTAB; + } + *pnDoc = nDoc; + + if( paLen ) *paLen = a; + if( ppEnd ) *ppEnd = pEnd; + return SQLITE_OK; +} + +/* +** An instance of the following structure is used to store state while +** iterating through a multi-column position-list corresponding to the +** hits for a single phrase on a single row in order to calculate the +** values for a matchinfo() FTS3_MATCHINFO_LCS request. +*/ +typedef struct LcsIterator LcsIterator; +struct LcsIterator { + Fts3Expr *pExpr; /* Pointer to phrase expression */ + int iPosOffset; /* Tokens count up to end of this phrase */ + char *pRead; /* Cursor used to iterate through aDoclist */ + int iPos; /* Current position */ +}; + +/* +** If LcsIterator.iCol is set to the following value, the iterator has +** finished iterating through all offsets for all columns. +*/ +#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; + +static int fts3MatchinfoLcsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + LcsIterator *aIter = (LcsIterator *)pCtx; + aIter[iPhrase].pExpr = pExpr; + return SQLITE_OK; +} + +/* +** Advance the iterator passed as an argument to the next position. Return +** 1 if the iterator is at EOF or if it now points to the start of the +** position list for the next column. +*/ +static int fts3LcsIteratorAdvance(LcsIterator *pIter){ + char *pRead; + sqlite3_int64 iRead; + int rc = 0; + + if( NEVER(pIter==0) ) return 1; + pRead = pIter->pRead; + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + if( iRead==0 || iRead==1 ){ + pRead = 0; + rc = 1; + }else{ + pIter->iPos += (int)(iRead-2); + } + + pIter->pRead = pRead; + return rc; +} + +/* +** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. +** +** If the call is successful, the longest-common-substring lengths for each +** column are written into the first nCol elements of the pInfo->aMatchinfo[] +** array before returning. SQLITE_OK is returned in this case. +** +** Otherwise, if an error occurs, an SQLite error code is returned and the +** data written to the first nCol elements of pInfo->aMatchinfo[] is +** undefined. +*/ +static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ + LcsIterator *aIter; + int i; + int iCol; + int nToken = 0; + int rc = SQLITE_OK; + + /* Allocate and populate the array of LcsIterator objects. The array + ** contains one element for each matchable phrase in the query. + **/ + aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase); + if( !aIter ) return SQLITE_NOMEM; + (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + nToken -= pIter->pExpr->pPhrase->nToken; + pIter->iPosOffset = nToken; + } + + for(iCol=0; iColnCol; iCol++){ + int nLcs = 0; /* LCS value for this column */ + int nLive = 0; /* Number of iterators in aIter not at EOF */ + + for(i=0; inPhrase; i++){ + LcsIterator *pIt = &aIter[i]; + rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); + if( rc!=SQLITE_OK ) goto matchinfo_lcs_out; + if( pIt->pRead ){ + pIt->iPos = pIt->iPosOffset; + fts3LcsIteratorAdvance(pIt); + if( pIt->pRead==0 ){ + rc = FTS_CORRUPT_VTAB; + goto matchinfo_lcs_out; + } + nLive++; + } + } + + while( nLive>0 ){ + LcsIterator *pAdv = 0; /* The iterator to advance by one position */ + int nThisLcs = 0; /* LCS for the current iterator positions */ + + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + if( pIter->pRead==0 ){ + /* This iterator is already at EOF for this column. */ + nThisLcs = 0; + }else{ + if( pAdv==0 || pIter->iPosiPos ){ + pAdv = pIter; + } + if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ + nThisLcs++; + }else{ + nThisLcs = 1; + } + if( nThisLcs>nLcs ) nLcs = nThisLcs; + } + } + if( fts3LcsIteratorAdvance(pAdv) ) nLive--; + } + + pInfo->aMatchinfo[iCol] = nLcs; + } + + matchinfo_lcs_out: + sqlite3_free(aIter); + return rc; +} + +/* +** Populate the buffer pInfo->aMatchinfo[] with an array of integers to +** be returned by the matchinfo() function. Argument zArg contains the +** format string passed as the second argument to matchinfo (or the +** default value "pcx" if no second argument was specified). The format +** string has already been validated and the pInfo->aMatchinfo[] array +** is guaranteed to be large enough for the output. +** +** If bGlobal is true, then populate all fields of the matchinfo() output. +** If it is false, then assume that those fields that do not change between +** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) +** have already been populated. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. If a value other than SQLITE_OK is returned, the state the +** pInfo->aMatchinfo[] buffer is left in is undefined. +*/ +static int fts3MatchinfoValues( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + int bGlobal, /* True to grab the global stats */ + MatchInfo *pInfo, /* Matchinfo context object */ + const char *zArg /* Matchinfo format string */ +){ + int rc = SQLITE_OK; + int i; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_stmt *pSelect = 0; + + for(i=0; rc==SQLITE_OK && zArg[i]; i++){ + pInfo->flag = zArg[i]; + switch( zArg[i] ){ + case FTS3_MATCHINFO_NPHRASE: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; + break; + + case FTS3_MATCHINFO_NCOL: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; + break; + + case FTS3_MATCHINFO_NDOC: + if( bGlobal ){ + sqlite3_int64 nDoc = 0; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0, 0); + pInfo->aMatchinfo[0] = (u32)nDoc; + } + break; + + case FTS3_MATCHINFO_AVGLENGTH: + if( bGlobal ){ + sqlite3_int64 nDoc; /* Number of rows in table */ + const char *a; /* Aggregate column length array */ + const char *pEnd; /* First byte past end of length array */ + + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a, &pEnd); + if( rc==SQLITE_OK ){ + int iCol; + for(iCol=0; iColnCol; iCol++){ + u32 iVal; + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + if( a>pEnd ){ + rc = SQLITE_CORRUPT_VTAB; + break; + } + iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); + pInfo->aMatchinfo[iCol] = iVal; + } + } + } + break; + + case FTS3_MATCHINFO_LENGTH: { + sqlite3_stmt *pSelectDocsize = 0; + rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); + if( rc==SQLITE_OK ){ + int iCol; + const char *a = sqlite3_column_blob(pSelectDocsize, 0); + const char *pEnd = a + sqlite3_column_bytes(pSelectDocsize, 0); + for(iCol=0; iColnCol; iCol++){ + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarintBounded(a, pEnd, &nToken); + if( a>pEnd ){ + rc = SQLITE_CORRUPT_VTAB; + break; + } + pInfo->aMatchinfo[iCol] = (u32)nToken; + } + } + sqlite3_reset(pSelectDocsize); + break; + } + + case FTS3_MATCHINFO_LCS: + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3MatchinfoLcs(pCsr, pInfo); + } + break; + + case FTS3_MATCHINFO_LHITS_BM: + case FTS3_MATCHINFO_LHITS: { + size_t nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); + memset(pInfo->aMatchinfo, 0, nZero); + rc = fts3ExprLHitGather(pCsr->pExpr, pInfo); + break; + } + + default: { + Fts3Expr *pExpr; + assert( zArg[i]==FTS3_MATCHINFO_HITS ); + pExpr = pCsr->pExpr; + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc!=SQLITE_OK ) break; + if( bGlobal ){ + if( pCsr->pDeferred ){ + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0); + if( rc!=SQLITE_OK ) break; + } + rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); + sqlite3Fts3EvalTestDeferred(pCsr, &rc); + if( rc!=SQLITE_OK ) break; + } + (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); + break; + } + } + + pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); + } + + sqlite3_reset(pSelect); + return rc; +} + + +/* +** Populate pCsr->aMatchinfo[] with data for the current row. The +** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). +*/ +static void fts3GetMatchinfo( + sqlite3_context *pCtx, /* Return results here */ + Fts3Cursor *pCsr, /* FTS3 Cursor object */ + const char *zArg /* Second argument to matchinfo() function */ +){ + MatchInfo sInfo; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int bGlobal = 0; /* Collect 'global' stats as well as local */ + + u32 *aOut = 0; + void (*xDestroyOut)(void*) = 0; + + memset(&sInfo, 0, sizeof(MatchInfo)); + sInfo.pCursor = pCsr; + sInfo.nCol = pTab->nColumn; + + /* If there is cached matchinfo() data, but the format string for the + ** cache does not match the format string for this request, discard + ** the cached data. */ + if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); + pCsr->pMIBuffer = 0; + } + + /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the + ** matchinfo function has been called for this query. In this case + ** allocate the array used to accumulate the matchinfo data and + ** initialize those elements that are constant for every row. + */ + if( pCsr->pMIBuffer==0 ){ + size_t nMatchinfo = 0; /* Number of u32 elements in match-info */ + int i; /* Used to iterate through zArg */ + + /* Determine the number of phrases in the query */ + pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); + sInfo.nPhrase = pCsr->nPhrase; + + /* Determine the number of integers in the buffer returned by this call. */ + for(i=0; zArg[i]; i++){ + char *zErr = 0; + if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ + sqlite3_result_error(pCtx, zErr, -1); + sqlite3_free(zErr); + return; + } + nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); + } + + /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ + pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); + if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; + + pCsr->isMatchinfoNeeded = 1; + bGlobal = 1; + } + + if( rc==SQLITE_OK ){ + xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); + if( xDestroyOut==0 ){ + rc = SQLITE_NOMEM; + } + } + + if( rc==SQLITE_OK ){ + sInfo.aMatchinfo = aOut; + sInfo.nPhrase = pCsr->nPhrase; + rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); + if( bGlobal ){ + fts3MIBufferSetGlobal(pCsr->pMIBuffer); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + if( xDestroyOut ) xDestroyOut(aOut); + }else{ + int n = pCsr->pMIBuffer->nElem * sizeof(u32); + sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); + } +} + +/* +** Implementation of snippet() function. +*/ +SQLITE_PRIVATE void sqlite3Fts3Snippet( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr, /* Cursor object */ + const char *zStart, /* Snippet start text - "" */ + const char *zEnd, /* Snippet end text - "" */ + const char *zEllipsis, /* Snippet ellipsis text - "..." */ + int iCol, /* Extract snippet from this column */ + int nToken /* Approximate number of tokens in snippet */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int i; + StrBuffer res = {0, 0, 0}; + + /* The returned text includes up to four fragments of text extracted from + ** the data in the current row. The first iteration of the for(...) loop + ** below attempts to locate a single fragment of text nToken tokens in + ** size that contains at least one instance of all phrases in the query + ** expression that appear in the current row. If such a fragment of text + ** cannot be found, the second iteration of the loop attempts to locate + ** a pair of fragments, and so on. + */ + int nSnippet = 0; /* Number of fragments in this snippet */ + SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ + int nFToken = -1; /* Number of tokens in each fragment */ + + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + + /* Limit the snippet length to 64 tokens. */ + if( nToken<-64 ) nToken = -64; + if( nToken>+64 ) nToken = +64; + + for(nSnippet=1; 1; nSnippet++){ + + int iSnip; /* Loop counter 0..nSnippet-1 */ + u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ + u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ + + if( nToken>=0 ){ + nFToken = (nToken+nSnippet-1) / nSnippet; + }else{ + nFToken = -1 * nToken; + } + + for(iSnip=0; iSnipnColumn; iRead++){ + SnippetFragment sF = {0, 0, 0, 0}; + int iS = 0; + if( iCol>=0 && iRead!=iCol ) continue; + + /* Find the best snippet of nFToken tokens in column iRead. */ + rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); + if( rc!=SQLITE_OK ){ + goto snippet_out; + } + if( iS>iBestScore ){ + *pFragment = sF; + iBestScore = iS; + } + } + + mCovered |= pFragment->covered; + } + + /* If all query phrases seen by fts3BestSnippet() are present in at least + ** one of the nSnippet snippet fragments, break out of the loop. + */ + assert( (mCovered&mSeen)==mCovered ); + if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; + } + + assert( nFToken>0 ); + + for(i=0; ipCsr, pExpr, p->iCol, &pList); + nTerm = pExpr->pPhrase->nToken; + if( pList ){ + fts3GetDeltaPosition(&pList, &iPos); + assert_fts3_nc( iPos>=0 ); + } + + for(iTerm=0; iTermaTerm[p->iTerm++]; + pT->iOff = nTerm-iTerm-1; + pT->pList = pList; + pT->iPos = iPos; + } + + return rc; +} + +/* +** Implementation of offsets() function. +*/ +SQLITE_PRIVATE void sqlite3Fts3Offsets( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr /* Cursor object */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; + int rc; /* Return Code */ + int nToken; /* Number of tokens in query */ + int iCol; /* Column currently being processed */ + StrBuffer res = {0, 0, 0}; /* Result string */ + TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ + + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + + memset(&sCtx, 0, sizeof(sCtx)); + assert( pCsr->isRequireSeek==0 ); + + /* Count the number of terms in the query */ + rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); + if( rc!=SQLITE_OK ) goto offsets_out; + + /* Allocate the array of TermOffset iterators. */ + sCtx.aTerm = (TermOffset *)sqlite3Fts3MallocZero(sizeof(TermOffset)*nToken); + if( 0==sCtx.aTerm ){ + rc = SQLITE_NOMEM; + goto offsets_out; + } + sCtx.iDocid = pCsr->iPrevId; + sCtx.pCsr = pCsr; + + /* Loop through the table columns, appending offset information to + ** string-buffer res for each column. + */ + for(iCol=0; iColnColumn; iCol++){ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ + const char *ZDUMMY; /* Dummy argument used with xNext() */ + int NDUMMY = 0; /* Dummy argument used with xNext() */ + int iStart = 0; + int iEnd = 0; + int iCurrent = 0; + const char *zDoc; + int nDoc; + + /* Initialize the contents of sCtx.aTerm[] for column iCol. This + ** operation may fail if the database contains corrupt records. + */ + sCtx.iCol = iCol; + sCtx.iTerm = 0; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); + if( rc!=SQLITE_OK ) goto offsets_out; + + /* Retreive the text stored in column iCol. If an SQL NULL is stored + ** in column iCol, jump immediately to the next iteration of the loop. + ** If an OOM occurs while retrieving the data (this can happen if SQLite + ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM + ** to the caller. + */ + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ + continue; + } + rc = SQLITE_NOMEM; + goto offsets_out; + } + + /* Initialize a tokenizer iterator to iterate through column iCol. */ + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, + zDoc, nDoc, &pC + ); + if( rc!=SQLITE_OK ) goto offsets_out; + + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + while( rc==SQLITE_OK ){ + int i; /* Used to loop through terms */ + int iMinPos = 0x7FFFFFFF; /* Position of next token */ + TermOffset *pTerm = 0; /* TermOffset associated with next token */ + + for(i=0; ipList && (pT->iPos-pT->iOff)iPos-pT->iOff; + pTerm = pT; + } + } + + if( !pTerm ){ + /* All offsets for this column have been gathered. */ + rc = SQLITE_DONE; + }else{ + assert_fts3_nc( iCurrent<=iMinPos ); + if( 0==(0xFE&*pTerm->pList) ){ + pTerm->pList = 0; + }else{ + fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); + } + while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + } + if( rc==SQLITE_OK ){ + char aBuffer[64]; + sqlite3_snprintf(sizeof(aBuffer), aBuffer, + "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart + ); + rc = fts3StringAppend(&res, aBuffer, -1); + }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; + } + } + } + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + } + + pMod->xClose(pC); + if( rc!=SQLITE_OK ) goto offsets_out; + } + + offsets_out: + sqlite3_free(sCtx.aTerm); + assert( rc!=SQLITE_DONE ); + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); + } + return; +} + +/* +** Implementation of matchinfo() function. +*/ +SQLITE_PRIVATE void sqlite3Fts3Matchinfo( + sqlite3_context *pContext, /* Function call context */ + Fts3Cursor *pCsr, /* FTS3 table cursor */ + const char *zArg /* Second arg to matchinfo() function */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + const char *zFormat; + + if( zArg ){ + zFormat = zArg; + }else{ + zFormat = FTS3_MATCHINFO_DEFAULT; + } + + if( !pCsr->pExpr ){ + sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); + return; + }else{ + /* Retrieve matchinfo() data. */ + fts3GetMatchinfo(pContext, pCsr, zFormat); + sqlite3Fts3SegmentsClose(pTab); + } +} + +#endif + +/************** End of fts3_snippet.c ****************************************/ +/************** Begin file fts3_unicode.c ************************************/ +/* +** 2012 May 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Implementation of the "unicode" full-text-search tokenizer. +*/ + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3_tokenizer.h" */ + +/* +** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied +** from the sqlite3 source file utf.c. If this file is compiled as part +** of the amalgamation, they are not required. +*/ +#ifndef SQLITE_AMALGAMATION + +static const unsigned char sqlite3Utf8Trans1[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +}; + +#define READ_UTF8(zIn, zTerm, c) \ + c = *(zIn++); \ + if( c>=0xc0 ){ \ + c = sqlite3Utf8Trans1[c-0xc0]; \ + while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } + +#define WRITE_UTF8(zOut, c) { \ + if( c<0x00080 ){ \ + *zOut++ = (u8)(c&0xFF); \ + } \ + else if( c<0x00800 ){ \ + *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ + else if( c<0x10000 ){ \ + *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + }else{ \ + *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ + *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ +} + +#endif /* ifndef SQLITE_AMALGAMATION */ + +typedef struct unicode_tokenizer unicode_tokenizer; +typedef struct unicode_cursor unicode_cursor; + +struct unicode_tokenizer { + sqlite3_tokenizer base; + int eRemoveDiacritic; + int nException; + int *aiException; +}; + +struct unicode_cursor { + sqlite3_tokenizer_cursor base; + const unsigned char *aInput; /* Input text being tokenized */ + int nInput; /* Size of aInput[] in bytes */ + int iOff; /* Current offset within aInput[] */ + int iToken; /* Index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAlloc; /* space allocated at zToken */ +}; + + +/* +** Destroy a tokenizer allocated by unicodeCreate(). +*/ +static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ + if( pTokenizer ){ + unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; + sqlite3_free(p->aiException); + sqlite3_free(p); + } + return SQLITE_OK; +} + +/* +** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE +** statement has specified that the tokenizer for this table shall consider +** all characters in string zIn/nIn to be separators (if bAlnum==0) or +** token characters (if bAlnum==1). +** +** For each codepoint in the zIn/nIn string, this function checks if the +** sqlite3FtsUnicodeIsalnum() function already returns the desired result. +** If so, no action is taken. Otherwise, the codepoint is added to the +** unicode_tokenizer.aiException[] array. For the purposes of tokenization, +** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all +** codepoints in the aiException[] array. +** +** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() +** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. +** It is not possible to change the behavior of the tokenizer with respect +** to these codepoints. +*/ +static int unicodeAddExceptions( + unicode_tokenizer *p, /* Tokenizer to add exceptions to */ + int bAlnum, /* Replace Isalnum() return value with this */ + const char *zIn, /* Array of characters to make exceptions */ + int nIn /* Length of z in bytes */ +){ + const unsigned char *z = (const unsigned char *)zIn; + const unsigned char *zTerm = &z[nIn]; + unsigned int iCode; + int nEntry = 0; + + assert( bAlnum==0 || bAlnum==1 ); + + while( zaiException,(p->nException+nEntry)*sizeof(int)); + if( aNew==0 ) return SQLITE_NOMEM; + nNew = p->nException; + + z = (const unsigned char *)zIn; + while( zi; j--) aNew[j] = aNew[j-1]; + aNew[i] = (int)iCode; + nNew++; + } + } + p->aiException = aNew; + p->nException = nNew; + } + + return SQLITE_OK; +} + +/* +** Return true if the p->aiException[] array contains the value iCode. +*/ +static int unicodeIsException(unicode_tokenizer *p, int iCode){ + if( p->nException>0 ){ + int *a = p->aiException; + int iLo = 0; + int iHi = p->nException-1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( iCode==a[iTest] ){ + return 1; + }else if( iCode>a[iTest] ){ + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + } + + return 0; +} + +/* +** Return true if, for the purposes of tokenization, codepoint iCode is +** considered a token character (not a separator). +*/ +static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ + assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); +} + +/* +** Create a new tokenizer instance. +*/ +static int unicodeCreate( + int nArg, /* Size of array argv[] */ + const char * const *azArg, /* Tokenizer creation arguments */ + sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ +){ + unicode_tokenizer *pNew; /* New tokenizer object */ + int i; + int rc = SQLITE_OK; + + pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); + if( pNew==NULL ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(unicode_tokenizer)); + pNew->eRemoveDiacritic = 1; + + for(i=0; rc==SQLITE_OK && ieRemoveDiacritic = 1; + } + else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ + pNew->eRemoveDiacritic = 0; + } + else if( n==19 && memcmp("remove_diacritics=2", z, 19)==0 ){ + pNew->eRemoveDiacritic = 2; + } + else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); + } + else if( n>=11 && memcmp("separators=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); + } + else{ + /* Unrecognized argument */ + rc = SQLITE_ERROR; + } + } + + if( rc!=SQLITE_OK ){ + unicodeDestroy((sqlite3_tokenizer *)pNew); + pNew = 0; + } + *pp = (sqlite3_tokenizer *)pNew; + return rc; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int unicodeOpen( + sqlite3_tokenizer *p, /* The tokenizer */ + const char *aInput, /* Input string */ + int nInput, /* Size of string aInput in bytes */ + sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ +){ + unicode_cursor *pCsr; + + pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); + if( pCsr==0 ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(unicode_cursor)); + + pCsr->aInput = (const unsigned char *)aInput; + if( aInput==0 ){ + pCsr->nInput = 0; + pCsr->aInput = (const unsigned char*)""; + }else if( nInput<0 ){ + pCsr->nInput = (int)strlen(aInput); + }else{ + pCsr->nInput = nInput; + } + + *pp = &pCsr->base; + UNUSED_PARAMETER(p); + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** simpleOpen() above. +*/ +static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ + unicode_cursor *pCsr = (unicode_cursor *) pCursor; + sqlite3_free(pCsr->zToken); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). +*/ +static int unicodeNext( + sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ + const char **paToken, /* OUT: Token text */ + int *pnToken, /* OUT: Number of bytes at *paToken */ + int *piStart, /* OUT: Starting offset of token */ + int *piEnd, /* OUT: Ending offset of token */ + int *piPos /* OUT: Position integer of token */ +){ + unicode_cursor *pCsr = (unicode_cursor *)pC; + unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); + unsigned int iCode = 0; + char *zOut; + const unsigned char *z = &pCsr->aInput[pCsr->iOff]; + const unsigned char *zStart = z; + const unsigned char *zEnd; + const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; + + /* Scan past any delimiter characters before the start of the next token. + ** Return SQLITE_DONE early if this takes us all the way to the end of + ** the input. */ + while( z=zTerm ) return SQLITE_DONE; + + zOut = pCsr->zToken; + do { + int iOut; + + /* Grow the output buffer if required. */ + if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ + char *zNew = sqlite3_realloc64(pCsr->zToken, pCsr->nAlloc+64); + if( !zNew ) return SQLITE_NOMEM; + zOut = &zNew[zOut - pCsr->zToken]; + pCsr->zToken = zNew; + pCsr->nAlloc += 64; + } + + /* Write the folded case of the last character read to the output */ + zEnd = z; + iOut = sqlite3FtsUnicodeFold((int)iCode, p->eRemoveDiacritic); + if( iOut ){ + WRITE_UTF8(zOut, iOut); + } + + /* If the cursor is not at EOF, read the next character */ + if( z>=zTerm ) break; + READ_UTF8(z, zTerm, iCode); + }while( unicodeIsAlnum(p, (int)iCode) + || sqlite3FtsUnicodeIsdiacritic((int)iCode) + ); + + /* Set the output variables and return. */ + pCsr->iOff = (int)(z - pCsr->aInput); + *paToken = pCsr->zToken; + *pnToken = (int)(zOut - pCsr->zToken); + *piStart = (int)(zStart - pCsr->aInput); + *piEnd = (int)(zEnd - pCsr->aInput); + *piPos = pCsr->iToken++; + return SQLITE_OK; +} + +/* +** Set *ppModule to a pointer to the sqlite3_tokenizer_module +** structure for the unicode tokenizer. +*/ +SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ + static const sqlite3_tokenizer_module module = { + 0, + unicodeCreate, + unicodeDestroy, + unicodeOpen, + unicodeClose, + unicodeNext, + 0, + }; + *ppModule = &module; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ +#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ + +/************** End of fts3_unicode.c ****************************************/ +/************** Begin file fts3_unicode2.c ***********************************/ +/* +** 2012-05-25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +/* +** DO NOT EDIT THIS MACHINE GENERATED FILE. +*/ + +#ifndef SQLITE_DISABLE_FTS3_UNICODE +#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) + +/* #include */ + +/* +** Return true if the argument corresponds to a unicode codepoint +** classified as either a letter or a number. Otherwise false. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){ + /* Each unsigned integer in the following array corresponds to a contiguous + ** range of unicode codepoints that are not either letters or numbers (i.e. + ** codepoints for which this function should return 0). + ** + ** The most significant 22 bits in each 32-bit value contain the first + ** codepoint in the range. The least significant 10 bits are used to store + ** the size of the range (always at least 1). In other words, the value + ** ((C<<22) + N) represents a range of N codepoints starting with codepoint + ** C. It is not possible to represent a range larger than 1023 codepoints + ** using this format. + */ + static const unsigned int aEntry[] = { + 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, + 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, + 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, + 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, + 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, + 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, + 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, + 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, + 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, + 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, + 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, + 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, + 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, + 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, + 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, + 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, + 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, + 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, + 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, + 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, + 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, + 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, + 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, + 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, + 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, + 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, + 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, + 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, + 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, + 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, + 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, + 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, + 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, + 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, + 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, + 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, + 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, + 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, + 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, + 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, + 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, + 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, + 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, + 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, + 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, + 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, + 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, + 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, + 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, + 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, + 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, + 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, + 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, + 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, + 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, + 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, + 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, + 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, + 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, + 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, + 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, + }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; + + if( (unsigned int)c<128 ){ + return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 ); + }else if( (unsigned int)c<(1<<22) ){ + unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; + int iRes = 0; + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aEntry[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( aEntry[0]=aEntry[iRes] ); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + } + return 1; +} + + +/* +** If the argument is a codepoint corresponding to a lowercase letter +** in the ASCII range with a diacritic added, return the codepoint +** of the ASCII letter only. For example, if passed 235 - "LATIN +** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER +** E"). The resuls of passing a codepoint that corresponds to an +** uppercase letter are undefined. +*/ +static int remove_diacritic(int c, int bComplex){ + unsigned short aDia[] = { + 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, + 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, + 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, + 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, + 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, + 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, + 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, + 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, + 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, + 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, + 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, + 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, + 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, + 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, + 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, + 63182, 63242, 63274, 63310, 63368, 63390, + }; +#define HIBIT ((unsigned char)0x80) + unsigned char aChar[] = { + '\0', 'a', 'c', 'e', 'i', 'n', + 'o', 'u', 'y', 'y', 'a', 'c', + 'd', 'e', 'e', 'g', 'h', 'i', + 'j', 'k', 'l', 'n', 'o', 'r', + 's', 't', 'u', 'u', 'w', 'y', + 'z', 'o', 'u', 'a', 'i', 'o', + 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o', + 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a', + 'e', 'i', 'o', 'r', 'u', 's', + 't', 'h', 'a', 'e', 'o'|HIBIT, 'o', + 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a', 'b', + 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT, + 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, + 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', + 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', + 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', + 'w', 'x', 'y', 'z', 'h', 't', + 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, + 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, + 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', + }; + + unsigned int key = (((unsigned int)c)<<3) | 0x00000007; + int iRes = 0; + int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aDia[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( key>=aDia[iRes] ); + if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; + return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); +} + + +/* +** Return true if the argument interpreted as a unicode codepoint +** is a diacritical modifier character. +*/ +SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){ + unsigned int mask0 = 0x08029FDF; + unsigned int mask1 = 0x000361F8; + if( c<768 || c>817 ) return 0; + return (c < 768+32) ? + (mask0 & ((unsigned int)1 << (c-768))) : + (mask1 & ((unsigned int)1 << (c-768-32))); +} + + +/* +** Interpret the argument as a unicode codepoint. If the codepoint +** is an upper case character that has a lower case equivalent, +** return the codepoint corresponding to the lower case version. +** Otherwise, return a copy of the argument. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){ + /* Each entry in the following array defines a rule for folding a range + ** of codepoints to lower case. The rule applies to a range of nRange + ** codepoints starting at codepoint iCode. + ** + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). + ** + ** The contents of this array are generated by parsing the CaseFolding.txt + ** file distributed as part of the "Unicode Character Database". See + ** http://www.unicode.org for details. + */ + static const struct TableEntry { + unsigned short iCode; + unsigned char flags; + unsigned char nRange; + } aEntry[] = { + {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, + {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, + {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, + {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, + {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, + {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, + {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, + {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, + {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, + {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, + {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, + {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, + {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, + {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, + {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, + {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, + {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, + {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, + {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, + {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, + {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, + {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, + {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, + {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, + {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, + {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, + {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, + {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, + {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, + {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, + {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, + {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, + {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, + {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, + {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, + {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, + {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, + {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, + {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, + {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, + {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, + {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, + {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, + {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, + {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, + {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, + {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, + {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, + {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, + {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, + {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, + {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, + {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, + {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, + {65313, 14, 26}, + }; + static const unsigned short aiOff[] = { + 1, 2, 8, 15, 16, 26, 28, 32, + 37, 38, 40, 48, 63, 64, 69, 71, + 79, 80, 116, 202, 203, 205, 206, 207, + 209, 210, 211, 213, 214, 217, 218, 219, + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, + 65514, 65521, 65527, 65528, 65529, + }; + + int ret = c; + + assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); + + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ + const struct TableEntry *p; + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + int iRes = -1; + + assert( c>aEntry[0].iCode ); + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + int cmp = (c - aEntry[iTest].iCode); + if( cmp>=0 ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + + assert( iRes>=0 && c>=aEntry[iRes].iCode ); + p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); + } + + if( eRemoveDiacritic ){ + ret = remove_diacritic(ret, eRemoveDiacritic==2); + } + } + + else if( c>=66560 && c<66600 ){ + ret = c + 40; + } + + return ret; +} +#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ +#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ + +/************** End of fts3_unicode2.c ***************************************/ +/************** Begin file json.c ********************************************/ +/* +** 2015-08-12 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This SQLite JSON functions. +** +** This file began as an extension in ext/misc/json1.c in 2015. That +** extension proved so useful that it has now been moved into the core. +** +** For the time being, all JSON is stored as pure text. (We might add +** a JSONB type in the future which stores a binary encoding of JSON in +** a BLOB, but there is no support for JSONB in the current implementation. +** This implementation parses JSON text at 250 MB/s, so it is hard to see +** how JSONB might improve on that.) +*/ +#ifndef SQLITE_OMIT_JSON +/* #include "sqliteInt.h" */ + +/* +** Growing our own isspace() routine this way is twice as fast as +** the library isspace() function, resulting in a 7% overall performance +** increase for the parser. (Ubuntu14.10 gcc 4.8.4 x64 with -Os). +*/ +static const char jsonIsSpace[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; +#define fast_isspace(x) (jsonIsSpace[(unsigned char)x]) + +#if !defined(SQLITE_DEBUG) && !defined(SQLITE_COVERAGE_TEST) +# define VVA(X) +#else +# define VVA(X) X +#endif + +/* Objects */ +typedef struct JsonString JsonString; +typedef struct JsonNode JsonNode; +typedef struct JsonParse JsonParse; + +/* An instance of this object represents a JSON string +** under construction. Really, this is a generic string accumulator +** that can be and is used to create strings other than JSON. +*/ +struct JsonString { + sqlite3_context *pCtx; /* Function context - put error messages here */ + char *zBuf; /* Append JSON content here */ + u64 nAlloc; /* Bytes of storage available in zBuf[] */ + u64 nUsed; /* Bytes of zBuf[] currently used */ + u8 bStatic; /* True if zBuf is static space */ + u8 bErr; /* True if an error has been encountered */ + char zSpace[100]; /* Initial static space */ +}; + +/* JSON type values +*/ +#define JSON_NULL 0 +#define JSON_TRUE 1 +#define JSON_FALSE 2 +#define JSON_INT 3 +#define JSON_REAL 4 +#define JSON_STRING 5 +#define JSON_ARRAY 6 +#define JSON_OBJECT 7 + +/* The "subtype" set for JSON values */ +#define JSON_SUBTYPE 74 /* Ascii for "J" */ + +/* +** Names of the various JSON types: +*/ +static const char * const jsonType[] = { + "null", "true", "false", "integer", "real", "text", "array", "object" +}; + +/* Bit values for the JsonNode.jnFlag field +*/ +#define JNODE_RAW 0x01 /* Content is raw, not JSON encoded */ +#define JNODE_ESCAPE 0x02 /* Content is text with \ escapes */ +#define JNODE_REMOVE 0x04 /* Do not output */ +#define JNODE_REPLACE 0x08 /* Replace with JsonNode.u.iReplace */ +#define JNODE_PATCH 0x10 /* Patch with JsonNode.u.pPatch */ +#define JNODE_APPEND 0x20 /* More ARRAY/OBJECT entries at u.iAppend */ +#define JNODE_LABEL 0x40 /* Is a label of an object */ + + +/* A single node of parsed JSON +*/ +struct JsonNode { + u8 eType; /* One of the JSON_ type values */ + u8 jnFlags; /* JNODE flags */ + u8 eU; /* Which union element to use */ + u32 n; /* Bytes of content, or number of sub-nodes */ + union { + const char *zJContent; /* 1: Content for INT, REAL, and STRING */ + u32 iAppend; /* 2: More terms for ARRAY and OBJECT */ + u32 iKey; /* 3: Key for ARRAY objects in json_tree() */ + u32 iReplace; /* 4: Replacement content for JNODE_REPLACE */ + JsonNode *pPatch; /* 5: Node chain of patch for JNODE_PATCH */ + } u; +}; + +/* A completely parsed JSON string +*/ +struct JsonParse { + u32 nNode; /* Number of slots of aNode[] used */ + u32 nAlloc; /* Number of slots of aNode[] allocated */ + JsonNode *aNode; /* Array of nodes containing the parse */ + const char *zJson; /* Original JSON string */ + u32 *aUp; /* Index of parent of each node */ + u8 oom; /* Set to true if out of memory */ + u8 nErr; /* Number of errors seen */ + u16 iDepth; /* Nesting depth */ + int nJson; /* Length of the zJson string in bytes */ + u32 iHold; /* Replace cache line with the lowest iHold value */ +}; + +/* +** Maximum nesting depth of JSON for this implementation. +** +** This limit is needed to avoid a stack overflow in the recursive +** descent parser. A depth of 2000 is far deeper than any sane JSON +** should go. +*/ +#define JSON_MAX_DEPTH 2000 + +/************************************************************************** +** Utility routines for dealing with JsonString objects +**************************************************************************/ + +/* Set the JsonString object to an empty string +*/ +static void jsonZero(JsonString *p){ + p->zBuf = p->zSpace; + p->nAlloc = sizeof(p->zSpace); + p->nUsed = 0; + p->bStatic = 1; +} + +/* Initialize the JsonString object +*/ +static void jsonInit(JsonString *p, sqlite3_context *pCtx){ + p->pCtx = pCtx; + p->bErr = 0; + jsonZero(p); +} + + +/* Free all allocated memory and reset the JsonString object back to its +** initial state. +*/ +static void jsonReset(JsonString *p){ + if( !p->bStatic ) sqlite3_free(p->zBuf); + jsonZero(p); +} + + +/* Report an out-of-memory (OOM) condition +*/ +static void jsonOom(JsonString *p){ + p->bErr = 1; + sqlite3_result_error_nomem(p->pCtx); + jsonReset(p); +} + +/* Enlarge pJson->zBuf so that it can hold at least N more bytes. +** Return zero on success. Return non-zero on an OOM error +*/ +static int jsonGrow(JsonString *p, u32 N){ + u64 nTotal = NnAlloc ? p->nAlloc*2 : p->nAlloc+N+10; + char *zNew; + if( p->bStatic ){ + if( p->bErr ) return 1; + zNew = sqlite3_malloc64(nTotal); + if( zNew==0 ){ + jsonOom(p); + return SQLITE_NOMEM; + } + memcpy(zNew, p->zBuf, (size_t)p->nUsed); + p->zBuf = zNew; + p->bStatic = 0; + }else{ + zNew = sqlite3_realloc64(p->zBuf, nTotal); + if( zNew==0 ){ + jsonOom(p); + return SQLITE_NOMEM; + } + p->zBuf = zNew; + } + p->nAlloc = nTotal; + return SQLITE_OK; +} + +/* Append N bytes from zIn onto the end of the JsonString string. +*/ +static void jsonAppendRaw(JsonString *p, const char *zIn, u32 N){ + if( N==0 ) return; + if( (N+p->nUsed >= p->nAlloc) && jsonGrow(p,N)!=0 ) return; + memcpy(p->zBuf+p->nUsed, zIn, N); + p->nUsed += N; +} + +/* Append formatted text (not to exceed N bytes) to the JsonString. +*/ +static void jsonPrintf(int N, JsonString *p, const char *zFormat, ...){ + va_list ap; + if( (p->nUsed + N >= p->nAlloc) && jsonGrow(p, N) ) return; + va_start(ap, zFormat); + sqlite3_vsnprintf(N, p->zBuf+p->nUsed, zFormat, ap); + va_end(ap); + p->nUsed += (int)strlen(p->zBuf+p->nUsed); +} + +/* Append a single character +*/ +static void jsonAppendChar(JsonString *p, char c){ + if( p->nUsed>=p->nAlloc && jsonGrow(p,1)!=0 ) return; + p->zBuf[p->nUsed++] = c; +} + +/* Append a comma separator to the output buffer, if the previous +** character is not '[' or '{'. +*/ +static void jsonAppendSeparator(JsonString *p){ + char c; + if( p->nUsed==0 ) return; + c = p->zBuf[p->nUsed-1]; + if( c!='[' && c!='{' ) jsonAppendChar(p, ','); +} + +/* Append the N-byte string in zIn to the end of the JsonString string +** under construction. Enclose the string in "..." and escape +** any double-quotes or backslash characters contained within the +** string. +*/ +static void jsonAppendString(JsonString *p, const char *zIn, u32 N){ + u32 i; + if( zIn==0 || ((N+p->nUsed+2 >= p->nAlloc) && jsonGrow(p,N+2)!=0) ) return; + p->zBuf[p->nUsed++] = '"'; + for(i=0; inUsed+N+3-i > p->nAlloc) && jsonGrow(p,N+3-i)!=0 ) return; + p->zBuf[p->nUsed++] = '\\'; + }else if( c<=0x1f ){ + static const char aSpecial[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 'b', 't', 'n', 0, 'f', 'r', 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + assert( sizeof(aSpecial)==32 ); + assert( aSpecial['\b']=='b' ); + assert( aSpecial['\f']=='f' ); + assert( aSpecial['\n']=='n' ); + assert( aSpecial['\r']=='r' ); + assert( aSpecial['\t']=='t' ); + if( aSpecial[c] ){ + c = aSpecial[c]; + goto json_simple_escape; + } + if( (p->nUsed+N+7+i > p->nAlloc) && jsonGrow(p,N+7-i)!=0 ) return; + p->zBuf[p->nUsed++] = '\\'; + p->zBuf[p->nUsed++] = 'u'; + p->zBuf[p->nUsed++] = '0'; + p->zBuf[p->nUsed++] = '0'; + p->zBuf[p->nUsed++] = '0' + (c>>4); + c = "0123456789abcdef"[c&0xf]; + } + p->zBuf[p->nUsed++] = c; + } + p->zBuf[p->nUsed++] = '"'; + assert( p->nUsednAlloc ); +} + +/* +** Append a function parameter value to the JSON string under +** construction. +*/ +static void jsonAppendValue( + JsonString *p, /* Append to this JSON string */ + sqlite3_value *pValue /* Value to append */ +){ + switch( sqlite3_value_type(pValue) ){ + case SQLITE_NULL: { + jsonAppendRaw(p, "null", 4); + break; + } + case SQLITE_INTEGER: + case SQLITE_FLOAT: { + const char *z = (const char*)sqlite3_value_text(pValue); + u32 n = (u32)sqlite3_value_bytes(pValue); + jsonAppendRaw(p, z, n); + break; + } + case SQLITE_TEXT: { + const char *z = (const char*)sqlite3_value_text(pValue); + u32 n = (u32)sqlite3_value_bytes(pValue); + if( sqlite3_value_subtype(pValue)==JSON_SUBTYPE ){ + jsonAppendRaw(p, z, n); + }else{ + jsonAppendString(p, z, n); + } + break; + } + default: { + if( p->bErr==0 ){ + sqlite3_result_error(p->pCtx, "JSON cannot hold BLOB values", -1); + p->bErr = 2; + jsonReset(p); + } + break; + } + } +} + + +/* Make the JSON in p the result of the SQL function. +*/ +static void jsonResult(JsonString *p){ + if( p->bErr==0 ){ + sqlite3_result_text64(p->pCtx, p->zBuf, p->nUsed, + p->bStatic ? SQLITE_TRANSIENT : sqlite3_free, + SQLITE_UTF8); + jsonZero(p); + } + assert( p->bStatic ); +} + +/************************************************************************** +** Utility routines for dealing with JsonNode and JsonParse objects +**************************************************************************/ + +/* +** Return the number of consecutive JsonNode slots need to represent +** the parsed JSON at pNode. The minimum answer is 1. For ARRAY and +** OBJECT types, the number might be larger. +** +** Appended elements are not counted. The value returned is the number +** by which the JsonNode counter should increment in order to go to the +** next peer value. +*/ +static u32 jsonNodeSize(JsonNode *pNode){ + return pNode->eType>=JSON_ARRAY ? pNode->n+1 : 1; +} + +/* +** Reclaim all memory allocated by a JsonParse object. But do not +** delete the JsonParse object itself. +*/ +static void jsonParseReset(JsonParse *pParse){ + sqlite3_free(pParse->aNode); + pParse->aNode = 0; + pParse->nNode = 0; + pParse->nAlloc = 0; + sqlite3_free(pParse->aUp); + pParse->aUp = 0; +} + +/* +** Free a JsonParse object that was obtained from sqlite3_malloc(). +*/ +static void jsonParseFree(JsonParse *pParse){ + jsonParseReset(pParse); + sqlite3_free(pParse); +} + +/* +** Convert the JsonNode pNode into a pure JSON string and +** append to pOut. Subsubstructure is also included. Return +** the number of JsonNode objects that are encoded. +*/ +static void jsonRenderNode( + JsonNode *pNode, /* The node to render */ + JsonString *pOut, /* Write JSON here */ + sqlite3_value **aReplace /* Replacement values */ +){ + assert( pNode!=0 ); + if( pNode->jnFlags & (JNODE_REPLACE|JNODE_PATCH) ){ + if( (pNode->jnFlags & JNODE_REPLACE)!=0 && ALWAYS(aReplace!=0) ){ + assert( pNode->eU==4 ); + jsonAppendValue(pOut, aReplace[pNode->u.iReplace]); + return; + } + assert( pNode->eU==5 ); + pNode = pNode->u.pPatch; + } + switch( pNode->eType ){ + default: { + assert( pNode->eType==JSON_NULL ); + jsonAppendRaw(pOut, "null", 4); + break; + } + case JSON_TRUE: { + jsonAppendRaw(pOut, "true", 4); + break; + } + case JSON_FALSE: { + jsonAppendRaw(pOut, "false", 5); + break; + } + case JSON_STRING: { + if( pNode->jnFlags & JNODE_RAW ){ + assert( pNode->eU==1 ); + jsonAppendString(pOut, pNode->u.zJContent, pNode->n); + break; + } + /* no break */ deliberate_fall_through + } + case JSON_REAL: + case JSON_INT: { + assert( pNode->eU==1 ); + jsonAppendRaw(pOut, pNode->u.zJContent, pNode->n); + break; + } + case JSON_ARRAY: { + u32 j = 1; + jsonAppendChar(pOut, '['); + for(;;){ + while( j<=pNode->n ){ + if( (pNode[j].jnFlags & JNODE_REMOVE)==0 ){ + jsonAppendSeparator(pOut); + jsonRenderNode(&pNode[j], pOut, aReplace); + } + j += jsonNodeSize(&pNode[j]); + } + if( (pNode->jnFlags & JNODE_APPEND)==0 ) break; + assert( pNode->eU==2 ); + pNode = &pNode[pNode->u.iAppend]; + j = 1; + } + jsonAppendChar(pOut, ']'); + break; + } + case JSON_OBJECT: { + u32 j = 1; + jsonAppendChar(pOut, '{'); + for(;;){ + while( j<=pNode->n ){ + if( (pNode[j+1].jnFlags & JNODE_REMOVE)==0 ){ + jsonAppendSeparator(pOut); + jsonRenderNode(&pNode[j], pOut, aReplace); + jsonAppendChar(pOut, ':'); + jsonRenderNode(&pNode[j+1], pOut, aReplace); + } + j += 1 + jsonNodeSize(&pNode[j+1]); + } + if( (pNode->jnFlags & JNODE_APPEND)==0 ) break; + assert( pNode->eU==2 ); + pNode = &pNode[pNode->u.iAppend]; + j = 1; + } + jsonAppendChar(pOut, '}'); + break; + } + } +} + +/* +** Return a JsonNode and all its descendents as a JSON string. +*/ +static void jsonReturnJson( + JsonNode *pNode, /* Node to return */ + sqlite3_context *pCtx, /* Return value for this function */ + sqlite3_value **aReplace /* Array of replacement values */ +){ + JsonString s; + jsonInit(&s, pCtx); + jsonRenderNode(pNode, &s, aReplace); + jsonResult(&s); + sqlite3_result_subtype(pCtx, JSON_SUBTYPE); +} + +/* +** Translate a single byte of Hex into an integer. +** This routine only works if h really is a valid hexadecimal +** character: 0..9a..fA..F +*/ +static u8 jsonHexToInt(int h){ + assert( (h>='0' && h<='9') || (h>='a' && h<='f') || (h>='A' && h<='F') ); +#ifdef SQLITE_EBCDIC + h += 9*(1&~(h>>4)); +#else + h += 9*(1&(h>>6)); +#endif + return (u8)(h & 0xf); +} + +/* +** Convert a 4-byte hex string into an integer +*/ +static u32 jsonHexToInt4(const char *z){ + u32 v; + assert( sqlite3Isxdigit(z[0]) ); + assert( sqlite3Isxdigit(z[1]) ); + assert( sqlite3Isxdigit(z[2]) ); + assert( sqlite3Isxdigit(z[3]) ); + v = (jsonHexToInt(z[0])<<12) + + (jsonHexToInt(z[1])<<8) + + (jsonHexToInt(z[2])<<4) + + jsonHexToInt(z[3]); + return v; +} + +/* +** Make the JsonNode the return value of the function. +*/ +static void jsonReturn( + JsonNode *pNode, /* Node to return */ + sqlite3_context *pCtx, /* Return value for this function */ + sqlite3_value **aReplace /* Array of replacement values */ +){ + switch( pNode->eType ){ + default: { + assert( pNode->eType==JSON_NULL ); + sqlite3_result_null(pCtx); + break; + } + case JSON_TRUE: { + sqlite3_result_int(pCtx, 1); + break; + } + case JSON_FALSE: { + sqlite3_result_int(pCtx, 0); + break; + } + case JSON_INT: { + sqlite3_int64 i = 0; + const char *z; + assert( pNode->eU==1 ); + z = pNode->u.zJContent; + if( z[0]=='-' ){ z++; } + while( z[0]>='0' && z[0]<='9' ){ + unsigned v = *(z++) - '0'; + if( i>=LARGEST_INT64/10 ){ + if( i>LARGEST_INT64/10 ) goto int_as_real; + if( z[0]>='0' && z[0]<='9' ) goto int_as_real; + if( v==9 ) goto int_as_real; + if( v==8 ){ + if( pNode->u.zJContent[0]=='-' ){ + sqlite3_result_int64(pCtx, SMALLEST_INT64); + goto int_done; + }else{ + goto int_as_real; + } + } + } + i = i*10 + v; + } + if( pNode->u.zJContent[0]=='-' ){ i = -i; } + sqlite3_result_int64(pCtx, i); + int_done: + break; + int_as_real: ; /* no break */ deliberate_fall_through + } + case JSON_REAL: { + double r; +#ifdef SQLITE_AMALGAMATION + const char *z; + assert( pNode->eU==1 ); + z = pNode->u.zJContent; + sqlite3AtoF(z, &r, sqlite3Strlen30(z), SQLITE_UTF8); +#else + assert( pNode->eU==1 ); + r = strtod(pNode->u.zJContent, 0); +#endif + sqlite3_result_double(pCtx, r); + break; + } + case JSON_STRING: { +#if 0 /* Never happens because JNODE_RAW is only set by json_set(), + ** json_insert() and json_replace() and those routines do not + ** call jsonReturn() */ + if( pNode->jnFlags & JNODE_RAW ){ + assert( pNode->eU==1 ); + sqlite3_result_text(pCtx, pNode->u.zJContent, pNode->n, + SQLITE_TRANSIENT); + }else +#endif + assert( (pNode->jnFlags & JNODE_RAW)==0 ); + if( (pNode->jnFlags & JNODE_ESCAPE)==0 ){ + /* JSON formatted without any backslash-escapes */ + assert( pNode->eU==1 ); + sqlite3_result_text(pCtx, pNode->u.zJContent+1, pNode->n-2, + SQLITE_TRANSIENT); + }else{ + /* Translate JSON formatted string into raw text */ + u32 i; + u32 n = pNode->n; + const char *z; + char *zOut; + u32 j; + assert( pNode->eU==1 ); + z = pNode->u.zJContent; + zOut = sqlite3_malloc( n+1 ); + if( zOut==0 ){ + sqlite3_result_error_nomem(pCtx); + break; + } + for(i=1, j=0; i>6)); + zOut[j++] = 0x80 | (v&0x3f); + }else{ + u32 vlo; + if( (v&0xfc00)==0xd800 + && i>18); + zOut[j++] = 0x80 | ((v>>12)&0x3f); + zOut[j++] = 0x80 | ((v>>6)&0x3f); + zOut[j++] = 0x80 | (v&0x3f); + }else{ + zOut[j++] = 0xe0 | (v>>12); + zOut[j++] = 0x80 | ((v>>6)&0x3f); + zOut[j++] = 0x80 | (v&0x3f); + } + } + }else{ + if( c=='b' ){ + c = '\b'; + }else if( c=='f' ){ + c = '\f'; + }else if( c=='n' ){ + c = '\n'; + }else if( c=='r' ){ + c = '\r'; + }else if( c=='t' ){ + c = '\t'; + } + zOut[j++] = c; + } + } + } + zOut[j] = 0; + sqlite3_result_text(pCtx, zOut, j, sqlite3_free); + } + break; + } + case JSON_ARRAY: + case JSON_OBJECT: { + jsonReturnJson(pNode, pCtx, aReplace); + break; + } + } +} + +/* Forward reference */ +static int jsonParseAddNode(JsonParse*,u32,u32,const char*); + +/* +** A macro to hint to the compiler that a function should not be +** inlined. +*/ +#if defined(__GNUC__) +# define JSON_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) && _MSC_VER>=1310 +# define JSON_NOINLINE __declspec(noinline) +#else +# define JSON_NOINLINE +#endif + + +static JSON_NOINLINE int jsonParseAddNodeExpand( + JsonParse *pParse, /* Append the node to this object */ + u32 eType, /* Node type */ + u32 n, /* Content size or sub-node count */ + const char *zContent /* Content */ +){ + u32 nNew; + JsonNode *pNew; + assert( pParse->nNode>=pParse->nAlloc ); + if( pParse->oom ) return -1; + nNew = pParse->nAlloc*2 + 10; + pNew = sqlite3_realloc64(pParse->aNode, sizeof(JsonNode)*nNew); + if( pNew==0 ){ + pParse->oom = 1; + return -1; + } + pParse->nAlloc = nNew; + pParse->aNode = pNew; + assert( pParse->nNodenAlloc ); + return jsonParseAddNode(pParse, eType, n, zContent); +} + +/* +** Create a new JsonNode instance based on the arguments and append that +** instance to the JsonParse. Return the index in pParse->aNode[] of the +** new node, or -1 if a memory allocation fails. +*/ +static int jsonParseAddNode( + JsonParse *pParse, /* Append the node to this object */ + u32 eType, /* Node type */ + u32 n, /* Content size or sub-node count */ + const char *zContent /* Content */ +){ + JsonNode *p; + if( pParse->aNode==0 || pParse->nNode>=pParse->nAlloc ){ + return jsonParseAddNodeExpand(pParse, eType, n, zContent); + } + p = &pParse->aNode[pParse->nNode]; + p->eType = (u8)eType; + p->jnFlags = 0; + VVA( p->eU = zContent ? 1 : 0 ); + p->n = n; + p->u.zJContent = zContent; + return pParse->nNode++; +} + +/* +** Return true if z[] begins with 4 (or more) hexadecimal digits +*/ +static int jsonIs4Hex(const char *z){ + int i; + for(i=0; i<4; i++) if( !sqlite3Isxdigit(z[i]) ) return 0; + return 1; +} + +/* +** Parse a single JSON value which begins at pParse->zJson[i]. Return the +** index of the first character past the end of the value parsed. +** +** Return negative for a syntax error. Special cases: return -2 if the +** first non-whitespace character is '}' and return -3 if the first +** non-whitespace character is ']'. +*/ +static int jsonParseValue(JsonParse *pParse, u32 i){ + char c; + u32 j; + int iThis; + int x; + JsonNode *pNode; + const char *z = pParse->zJson; + while( fast_isspace(z[i]) ){ i++; } + if( (c = z[i])=='{' ){ + /* Parse object */ + iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0); + if( iThis<0 ) return -1; + for(j=i+1;;j++){ + while( fast_isspace(z[j]) ){ j++; } + if( ++pParse->iDepth > JSON_MAX_DEPTH ) return -1; + x = jsonParseValue(pParse, j); + if( x<0 ){ + pParse->iDepth--; + if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1; + return -1; + } + if( pParse->oom ) return -1; + pNode = &pParse->aNode[pParse->nNode-1]; + if( pNode->eType!=JSON_STRING ) return -1; + pNode->jnFlags |= JNODE_LABEL; + j = x; + while( fast_isspace(z[j]) ){ j++; } + if( z[j]!=':' ) return -1; + j++; + x = jsonParseValue(pParse, j); + pParse->iDepth--; + if( x<0 ) return -1; + j = x; + while( fast_isspace(z[j]) ){ j++; } + c = z[j]; + if( c==',' ) continue; + if( c!='}' ) return -1; + break; + } + pParse->aNode[iThis].n = pParse->nNode - (u32)iThis - 1; + return j+1; + }else if( c=='[' ){ + /* Parse array */ + iThis = jsonParseAddNode(pParse, JSON_ARRAY, 0, 0); + if( iThis<0 ) return -1; + memset(&pParse->aNode[iThis].u, 0, sizeof(pParse->aNode[iThis].u)); + for(j=i+1;;j++){ + while( fast_isspace(z[j]) ){ j++; } + if( ++pParse->iDepth > JSON_MAX_DEPTH ) return -1; + x = jsonParseValue(pParse, j); + pParse->iDepth--; + if( x<0 ){ + if( x==(-3) && pParse->nNode==(u32)iThis+1 ) return j+1; + return -1; + } + j = x; + while( fast_isspace(z[j]) ){ j++; } + c = z[j]; + if( c==',' ) continue; + if( c!=']' ) return -1; + break; + } + pParse->aNode[iThis].n = pParse->nNode - (u32)iThis - 1; + return j+1; + }else if( c=='"' ){ + /* Parse string */ + u8 jnFlags = 0; + j = i+1; + for(;;){ + c = z[j]; + if( (c & ~0x1f)==0 ){ + /* Control characters are not allowed in strings */ + return -1; + } + if( c=='\\' ){ + c = z[++j]; + if( c=='"' || c=='\\' || c=='/' || c=='b' || c=='f' + || c=='n' || c=='r' || c=='t' + || (c=='u' && jsonIs4Hex(z+j+1)) ){ + jnFlags = JNODE_ESCAPE; + }else{ + return -1; + } + }else if( c=='"' ){ + break; + } + j++; + } + jsonParseAddNode(pParse, JSON_STRING, j+1-i, &z[i]); + if( !pParse->oom ) pParse->aNode[pParse->nNode-1].jnFlags = jnFlags; + return j+1; + }else if( c=='n' + && strncmp(z+i,"null",4)==0 + && !sqlite3Isalnum(z[i+4]) ){ + jsonParseAddNode(pParse, JSON_NULL, 0, 0); + return i+4; + }else if( c=='t' + && strncmp(z+i,"true",4)==0 + && !sqlite3Isalnum(z[i+4]) ){ + jsonParseAddNode(pParse, JSON_TRUE, 0, 0); + return i+4; + }else if( c=='f' + && strncmp(z+i,"false",5)==0 + && !sqlite3Isalnum(z[i+5]) ){ + jsonParseAddNode(pParse, JSON_FALSE, 0, 0); + return i+5; + }else if( c=='-' || (c>='0' && c<='9') ){ + /* Parse number */ + u8 seenDP = 0; + u8 seenE = 0; + assert( '-' < '0' ); + if( c<='0' ){ + j = c=='-' ? i+1 : i; + if( z[j]=='0' && z[j+1]>='0' && z[j+1]<='9' ) return -1; + } + j = i+1; + for(;; j++){ + c = z[j]; + if( c>='0' && c<='9' ) continue; + if( c=='.' ){ + if( z[j-1]=='-' ) return -1; + if( seenDP ) return -1; + seenDP = 1; + continue; + } + if( c=='e' || c=='E' ){ + if( z[j-1]<'0' ) return -1; + if( seenE ) return -1; + seenDP = seenE = 1; + c = z[j+1]; + if( c=='+' || c=='-' ){ + j++; + c = z[j+1]; + } + if( c<'0' || c>'9' ) return -1; + continue; + } + break; + } + if( z[j-1]<'0' ) return -1; + jsonParseAddNode(pParse, seenDP ? JSON_REAL : JSON_INT, + j - i, &z[i]); + return j; + }else if( c=='}' ){ + return -2; /* End of {...} */ + }else if( c==']' ){ + return -3; /* End of [...] */ + }else if( c==0 ){ + return 0; /* End of file */ + }else{ + return -1; /* Syntax error */ + } +} + +/* +** Parse a complete JSON string. Return 0 on success or non-zero if there +** are any errors. If an error occurs, free all memory associated with +** pParse. +** +** pParse is uninitialized when this routine is called. +*/ +static int jsonParse( + JsonParse *pParse, /* Initialize and fill this JsonParse object */ + sqlite3_context *pCtx, /* Report errors here */ + const char *zJson /* Input JSON text to be parsed */ +){ + int i; + memset(pParse, 0, sizeof(*pParse)); + if( zJson==0 ) return 1; + pParse->zJson = zJson; + i = jsonParseValue(pParse, 0); + if( pParse->oom ) i = -1; + if( i>0 ){ + assert( pParse->iDepth==0 ); + while( fast_isspace(zJson[i]) ) i++; + if( zJson[i] ) i = -1; + } + if( i<=0 ){ + if( pCtx!=0 ){ + if( pParse->oom ){ + sqlite3_result_error_nomem(pCtx); + }else{ + sqlite3_result_error(pCtx, "malformed JSON", -1); + } + } + jsonParseReset(pParse); + return 1; + } + return 0; +} + +/* Mark node i of pParse as being a child of iParent. Call recursively +** to fill in all the descendants of node i. +*/ +static void jsonParseFillInParentage(JsonParse *pParse, u32 i, u32 iParent){ + JsonNode *pNode = &pParse->aNode[i]; + u32 j; + pParse->aUp[i] = iParent; + switch( pNode->eType ){ + case JSON_ARRAY: { + for(j=1; j<=pNode->n; j += jsonNodeSize(pNode+j)){ + jsonParseFillInParentage(pParse, i+j, i); + } + break; + } + case JSON_OBJECT: { + for(j=1; j<=pNode->n; j += jsonNodeSize(pNode+j+1)+1){ + pParse->aUp[i+j] = i; + jsonParseFillInParentage(pParse, i+j+1, i); + } + break; + } + default: { + break; + } + } +} + +/* +** Compute the parentage of all nodes in a completed parse. +*/ +static int jsonParseFindParents(JsonParse *pParse){ + u32 *aUp; + assert( pParse->aUp==0 ); + aUp = pParse->aUp = sqlite3_malloc64( sizeof(u32)*pParse->nNode ); + if( aUp==0 ){ + pParse->oom = 1; + return SQLITE_NOMEM; + } + jsonParseFillInParentage(pParse, 0, 0); + return SQLITE_OK; +} + +/* +** Magic number used for the JSON parse cache in sqlite3_get_auxdata() +*/ +#define JSON_CACHE_ID (-429938) /* First cache entry */ +#define JSON_CACHE_SZ 4 /* Max number of cache entries */ + +/* +** Obtain a complete parse of the JSON found in the first argument +** of the argv array. Use the sqlite3_get_auxdata() cache for this +** parse if it is available. If the cache is not available or if it +** is no longer valid, parse the JSON again and return the new parse, +** and also register the new parse so that it will be available for +** future sqlite3_get_auxdata() calls. +*/ +static JsonParse *jsonParseCached( + sqlite3_context *pCtx, + sqlite3_value **argv, + sqlite3_context *pErrCtx +){ + const char *zJson = (const char*)sqlite3_value_text(argv[0]); + int nJson = sqlite3_value_bytes(argv[0]); + JsonParse *p; + JsonParse *pMatch = 0; + int iKey; + int iMinKey = 0; + u32 iMinHold = 0xffffffff; + u32 iMaxHold = 0; + if( zJson==0 ) return 0; + for(iKey=0; iKeynJson==nJson + && memcmp(p->zJson,zJson,nJson)==0 + ){ + p->nErr = 0; + pMatch = p; + }else if( p->iHoldiHold; + iMinKey = iKey; + } + if( p->iHold>iMaxHold ){ + iMaxHold = p->iHold; + } + } + if( pMatch ){ + pMatch->nErr = 0; + pMatch->iHold = iMaxHold+1; + return pMatch; + } + p = sqlite3_malloc64( sizeof(*p) + nJson + 1 ); + if( p==0 ){ + sqlite3_result_error_nomem(pCtx); + return 0; + } + memset(p, 0, sizeof(*p)); + p->zJson = (char*)&p[1]; + memcpy((char*)p->zJson, zJson, nJson+1); + if( jsonParse(p, pErrCtx, p->zJson) ){ + sqlite3_free(p); + return 0; + } + p->nJson = nJson; + p->iHold = iMaxHold+1; + sqlite3_set_auxdata(pCtx, JSON_CACHE_ID+iMinKey, p, + (void(*)(void*))jsonParseFree); + return (JsonParse*)sqlite3_get_auxdata(pCtx, JSON_CACHE_ID+iMinKey); +} + +/* +** Compare the OBJECT label at pNode against zKey,nKey. Return true on +** a match. +*/ +static int jsonLabelCompare(JsonNode *pNode, const char *zKey, u32 nKey){ + assert( pNode->eU==1 ); + if( pNode->jnFlags & JNODE_RAW ){ + if( pNode->n!=nKey ) return 0; + return strncmp(pNode->u.zJContent, zKey, nKey)==0; + }else{ + if( pNode->n!=nKey+2 ) return 0; + return strncmp(pNode->u.zJContent+1, zKey, nKey)==0; + } +} + +/* forward declaration */ +static JsonNode *jsonLookupAppend(JsonParse*,const char*,int*,const char**); + +/* +** Search along zPath to find the node specified. Return a pointer +** to that node, or NULL if zPath is malformed or if there is no such +** node. +** +** If pApnd!=0, then try to append new nodes to complete zPath if it is +** possible to do so and if no existing node corresponds to zPath. If +** new nodes are appended *pApnd is set to 1. +*/ +static JsonNode *jsonLookupStep( + JsonParse *pParse, /* The JSON to search */ + u32 iRoot, /* Begin the search at this node */ + const char *zPath, /* The path to search */ + int *pApnd, /* Append nodes to complete path if not NULL */ + const char **pzErr /* Make *pzErr point to any syntax error in zPath */ +){ + u32 i, j, nKey; + const char *zKey; + JsonNode *pRoot = &pParse->aNode[iRoot]; + if( zPath[0]==0 ) return pRoot; + if( pRoot->jnFlags & JNODE_REPLACE ) return 0; + if( zPath[0]=='.' ){ + if( pRoot->eType!=JSON_OBJECT ) return 0; + zPath++; + if( zPath[0]=='"' ){ + zKey = zPath + 1; + for(i=1; zPath[i] && zPath[i]!='"'; i++){} + nKey = i-1; + if( zPath[i] ){ + i++; + }else{ + *pzErr = zPath; + return 0; + } + testcase( nKey==0 ); + }else{ + zKey = zPath; + for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){} + nKey = i; + if( nKey==0 ){ + *pzErr = zPath; + return 0; + } + } + j = 1; + for(;;){ + while( j<=pRoot->n ){ + if( jsonLabelCompare(pRoot+j, zKey, nKey) ){ + return jsonLookupStep(pParse, iRoot+j+1, &zPath[i], pApnd, pzErr); + } + j++; + j += jsonNodeSize(&pRoot[j]); + } + if( (pRoot->jnFlags & JNODE_APPEND)==0 ) break; + assert( pRoot->eU==2 ); + iRoot += pRoot->u.iAppend; + pRoot = &pParse->aNode[iRoot]; + j = 1; + } + if( pApnd ){ + u32 iStart, iLabel; + JsonNode *pNode; + iStart = jsonParseAddNode(pParse, JSON_OBJECT, 2, 0); + iLabel = jsonParseAddNode(pParse, JSON_STRING, nKey, zKey); + zPath += i; + pNode = jsonLookupAppend(pParse, zPath, pApnd, pzErr); + if( pParse->oom ) return 0; + if( pNode ){ + pRoot = &pParse->aNode[iRoot]; + assert( pRoot->eU==0 ); + pRoot->u.iAppend = iStart - iRoot; + pRoot->jnFlags |= JNODE_APPEND; + VVA( pRoot->eU = 2 ); + pParse->aNode[iLabel].jnFlags |= JNODE_RAW; + } + return pNode; + } + }else if( zPath[0]=='[' ){ + i = 0; + j = 1; + while( sqlite3Isdigit(zPath[j]) ){ + i = i*10 + zPath[j] - '0'; + j++; + } + if( j<2 || zPath[j]!=']' ){ + if( zPath[1]=='#' ){ + JsonNode *pBase = pRoot; + int iBase = iRoot; + if( pRoot->eType!=JSON_ARRAY ) return 0; + for(;;){ + while( j<=pBase->n ){ + if( (pBase[j].jnFlags & JNODE_REMOVE)==0 ) i++; + j += jsonNodeSize(&pBase[j]); + } + if( (pBase->jnFlags & JNODE_APPEND)==0 ) break; + assert( pBase->eU==2 ); + iBase += pBase->u.iAppend; + pBase = &pParse->aNode[iBase]; + j = 1; + } + j = 2; + if( zPath[2]=='-' && sqlite3Isdigit(zPath[3]) ){ + unsigned int x = 0; + j = 3; + do{ + x = x*10 + zPath[j] - '0'; + j++; + }while( sqlite3Isdigit(zPath[j]) ); + if( x>i ) return 0; + i -= x; + } + if( zPath[j]!=']' ){ + *pzErr = zPath; + return 0; + } + }else{ + *pzErr = zPath; + return 0; + } + } + if( pRoot->eType!=JSON_ARRAY ) return 0; + zPath += j + 1; + j = 1; + for(;;){ + while( j<=pRoot->n && (i>0 || (pRoot[j].jnFlags & JNODE_REMOVE)!=0) ){ + if( (pRoot[j].jnFlags & JNODE_REMOVE)==0 ) i--; + j += jsonNodeSize(&pRoot[j]); + } + if( (pRoot->jnFlags & JNODE_APPEND)==0 ) break; + assert( pRoot->eU==2 ); + iRoot += pRoot->u.iAppend; + pRoot = &pParse->aNode[iRoot]; + j = 1; + } + if( j<=pRoot->n ){ + return jsonLookupStep(pParse, iRoot+j, zPath, pApnd, pzErr); + } + if( i==0 && pApnd ){ + u32 iStart; + JsonNode *pNode; + iStart = jsonParseAddNode(pParse, JSON_ARRAY, 1, 0); + pNode = jsonLookupAppend(pParse, zPath, pApnd, pzErr); + if( pParse->oom ) return 0; + if( pNode ){ + pRoot = &pParse->aNode[iRoot]; + assert( pRoot->eU==0 ); + pRoot->u.iAppend = iStart - iRoot; + pRoot->jnFlags |= JNODE_APPEND; + VVA( pRoot->eU = 2 ); + } + return pNode; + } + }else{ + *pzErr = zPath; + } + return 0; +} + +/* +** Append content to pParse that will complete zPath. Return a pointer +** to the inserted node, or return NULL if the append fails. +*/ +static JsonNode *jsonLookupAppend( + JsonParse *pParse, /* Append content to the JSON parse */ + const char *zPath, /* Description of content to append */ + int *pApnd, /* Set this flag to 1 */ + const char **pzErr /* Make this point to any syntax error */ +){ + *pApnd = 1; + if( zPath[0]==0 ){ + jsonParseAddNode(pParse, JSON_NULL, 0, 0); + return pParse->oom ? 0 : &pParse->aNode[pParse->nNode-1]; + } + if( zPath[0]=='.' ){ + jsonParseAddNode(pParse, JSON_OBJECT, 0, 0); + }else if( strncmp(zPath,"[0]",3)==0 ){ + jsonParseAddNode(pParse, JSON_ARRAY, 0, 0); + }else{ + return 0; + } + if( pParse->oom ) return 0; + return jsonLookupStep(pParse, pParse->nNode-1, zPath, pApnd, pzErr); +} + +/* +** Return the text of a syntax error message on a JSON path. Space is +** obtained from sqlite3_malloc(). +*/ +static char *jsonPathSyntaxError(const char *zErr){ + return sqlite3_mprintf("JSON path error near '%q'", zErr); +} + +/* +** Do a node lookup using zPath. Return a pointer to the node on success. +** Return NULL if not found or if there is an error. +** +** On an error, write an error message into pCtx and increment the +** pParse->nErr counter. +** +** If pApnd!=NULL then try to append missing nodes and set *pApnd = 1 if +** nodes are appended. +*/ +static JsonNode *jsonLookup( + JsonParse *pParse, /* The JSON to search */ + const char *zPath, /* The path to search */ + int *pApnd, /* Append nodes to complete path if not NULL */ + sqlite3_context *pCtx /* Report errors here, if not NULL */ +){ + const char *zErr = 0; + JsonNode *pNode = 0; + char *zMsg; + + if( zPath==0 ) return 0; + if( zPath[0]!='$' ){ + zErr = zPath; + goto lookup_err; + } + zPath++; + pNode = jsonLookupStep(pParse, 0, zPath, pApnd, &zErr); + if( zErr==0 ) return pNode; + +lookup_err: + pParse->nErr++; + assert( zErr!=0 && pCtx!=0 ); + zMsg = jsonPathSyntaxError(zErr); + if( zMsg ){ + sqlite3_result_error(pCtx, zMsg, -1); + sqlite3_free(zMsg); + }else{ + sqlite3_result_error_nomem(pCtx); + } + return 0; +} + + +/* +** Report the wrong number of arguments for json_insert(), json_replace() +** or json_set(). +*/ +static void jsonWrongNumArgs( + sqlite3_context *pCtx, + const char *zFuncName +){ + char *zMsg = sqlite3_mprintf("json_%s() needs an odd number of arguments", + zFuncName); + sqlite3_result_error(pCtx, zMsg, -1); + sqlite3_free(zMsg); +} + +/* +** Mark all NULL entries in the Object passed in as JNODE_REMOVE. +*/ +static void jsonRemoveAllNulls(JsonNode *pNode){ + int i, n; + assert( pNode->eType==JSON_OBJECT ); + n = pNode->n; + for(i=2; i<=n; i += jsonNodeSize(&pNode[i])+1){ + switch( pNode[i].eType ){ + case JSON_NULL: + pNode[i].jnFlags |= JNODE_REMOVE; + break; + case JSON_OBJECT: + jsonRemoveAllNulls(&pNode[i]); + break; + } + } +} + + +/**************************************************************************** +** SQL functions used for testing and debugging +****************************************************************************/ + +#ifdef SQLITE_DEBUG +/* +** The json_parse(JSON) function returns a string which describes +** a parse of the JSON provided. Or it returns NULL if JSON is not +** well-formed. +*/ +static void jsonParseFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonString s; /* Output string - not real JSON */ + JsonParse x; /* The parse */ + u32 i; + + assert( argc==1 ); + if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; + jsonParseFindParents(&x); + jsonInit(&s, ctx); + for(i=0; inNode ); + if( argc==2 ){ + const char *zPath = (const char*)sqlite3_value_text(argv[1]); + pNode = jsonLookup(p, zPath, 0, ctx); + }else{ + pNode = p->aNode; + } + if( pNode==0 ){ + return; + } + if( pNode->eType==JSON_ARRAY ){ + assert( (pNode->jnFlags & JNODE_APPEND)==0 ); + for(i=1; i<=pNode->n; n++){ + i += jsonNodeSize(&pNode[i]); + } + } + sqlite3_result_int64(ctx, n); +} + +/* +** Bit values for the flags passed into jsonExtractFunc() or +** jsonSetFunc() via the user-data value. +*/ +#define JSON_JSON 0x01 /* Result is always JSON */ +#define JSON_SQL 0x02 /* Result is always SQL */ +#define JSON_ABPATH 0x03 /* Allow abbreviated JSON path specs */ +#define JSON_ISSET 0x04 /* json_set(), not json_insert() */ + +/* +** json_extract(JSON, PATH, ...) +** "->"(JSON,PATH) +** "->>"(JSON,PATH) +** +** Return the element described by PATH. Return NULL if that PATH element +** is not found. +** +** If JSON_JSON is set or if more that one PATH argument is supplied then +** always return a JSON representation of the result. If JSON_SQL is set, +** then always return an SQL representation of the result. If neither flag +** is present and argc==2, then return JSON for objects and arrays and SQL +** for all other values. +** +** When multiple PATH arguments are supplied, the result is a JSON array +** containing the result of each PATH. +** +** Abbreviated JSON path expressions are allows if JSON_ABPATH, for +** compatibility with PG. +*/ +static void jsonExtractFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonParse *p; /* The parse */ + JsonNode *pNode; + const char *zPath; + int flags = SQLITE_PTR_TO_INT(sqlite3_user_data(ctx)); + JsonString jx; + + if( argc<2 ) return; + p = jsonParseCached(ctx, argv, ctx); + if( p==0 ) return; + if( argc==2 ){ + /* With a single PATH argument */ + zPath = (const char*)sqlite3_value_text(argv[1]); + if( zPath==0 ) return; + if( flags & JSON_ABPATH ){ + if( zPath[0]!='$' ){ + /* The -> and ->> operators accept abbreviated PATH arguments. This + ** is mostly for compatibility with PostgreSQL, but also for + ** convenience. + ** + ** NUMBER ==> $[NUMBER] // PG compatible + ** LABEL ==> $.LABEL // PG compatible + ** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience + */ + jsonInit(&jx, ctx); + if( sqlite3Isdigit(zPath[0]) ){ + jsonAppendRaw(&jx, "$[", 2); + jsonAppendRaw(&jx, zPath, (int)strlen(zPath)); + jsonAppendRaw(&jx, "]", 2); + }else{ + jsonAppendRaw(&jx, "$.", 1 + (zPath[0]!='[')); + jsonAppendRaw(&jx, zPath, (int)strlen(zPath)); + jsonAppendChar(&jx, 0); + } + pNode = jx.bErr ? 0 : jsonLookup(p, jx.zBuf, 0, ctx); + jsonReset(&jx); + }else{ + pNode = jsonLookup(p, zPath, 0, ctx); + } + if( pNode ){ + if( flags & JSON_JSON ){ + jsonReturnJson(pNode, ctx, 0); + }else{ + jsonReturn(pNode, ctx, 0); + sqlite3_result_subtype(ctx, 0); + } + } + }else{ + pNode = jsonLookup(p, zPath, 0, ctx); + if( p->nErr==0 && pNode ) jsonReturn(pNode, ctx, 0); + } + }else{ + /* Two or more PATH arguments results in a JSON array with each + ** element of the array being the value selected by one of the PATHs */ + int i; + jsonInit(&jx, ctx); + jsonAppendChar(&jx, '['); + for(i=1; inErr ) break; + jsonAppendSeparator(&jx); + if( pNode ){ + jsonRenderNode(pNode, &jx, 0); + }else{ + jsonAppendRaw(&jx, "null", 4); + } + } + if( i==argc ){ + jsonAppendChar(&jx, ']'); + jsonResult(&jx); + sqlite3_result_subtype(ctx, JSON_SUBTYPE); + } + jsonReset(&jx); + } +} + +/* This is the RFC 7396 MergePatch algorithm. +*/ +static JsonNode *jsonMergePatch( + JsonParse *pParse, /* The JSON parser that contains the TARGET */ + u32 iTarget, /* Node of the TARGET in pParse */ + JsonNode *pPatch /* The PATCH */ +){ + u32 i, j; + u32 iRoot; + JsonNode *pTarget; + if( pPatch->eType!=JSON_OBJECT ){ + return pPatch; + } + assert( iTarget>=0 && iTargetnNode ); + pTarget = &pParse->aNode[iTarget]; + assert( (pPatch->jnFlags & JNODE_APPEND)==0 ); + if( pTarget->eType!=JSON_OBJECT ){ + jsonRemoveAllNulls(pPatch); + return pPatch; + } + iRoot = iTarget; + for(i=1; in; i += jsonNodeSize(&pPatch[i+1])+1){ + u32 nKey; + const char *zKey; + assert( pPatch[i].eType==JSON_STRING ); + assert( pPatch[i].jnFlags & JNODE_LABEL ); + assert( pPatch[i].eU==1 ); + nKey = pPatch[i].n; + zKey = pPatch[i].u.zJContent; + assert( (pPatch[i].jnFlags & JNODE_RAW)==0 ); + for(j=1; jn; j += jsonNodeSize(&pTarget[j+1])+1 ){ + assert( pTarget[j].eType==JSON_STRING ); + assert( pTarget[j].jnFlags & JNODE_LABEL ); + assert( (pPatch[i].jnFlags & JNODE_RAW)==0 ); + if( pTarget[j].n==nKey && strncmp(pTarget[j].u.zJContent,zKey,nKey)==0 ){ + if( pTarget[j+1].jnFlags & (JNODE_REMOVE|JNODE_PATCH) ) break; + if( pPatch[i+1].eType==JSON_NULL ){ + pTarget[j+1].jnFlags |= JNODE_REMOVE; + }else{ + JsonNode *pNew = jsonMergePatch(pParse, iTarget+j+1, &pPatch[i+1]); + if( pNew==0 ) return 0; + pTarget = &pParse->aNode[iTarget]; + if( pNew!=&pTarget[j+1] ){ + assert( pTarget[j+1].eU==0 + || pTarget[j+1].eU==1 + || pTarget[j+1].eU==2 ); + testcase( pTarget[j+1].eU==1 ); + testcase( pTarget[j+1].eU==2 ); + VVA( pTarget[j+1].eU = 5 ); + pTarget[j+1].u.pPatch = pNew; + pTarget[j+1].jnFlags |= JNODE_PATCH; + } + } + break; + } + } + if( j>=pTarget->n && pPatch[i+1].eType!=JSON_NULL ){ + int iStart, iPatch; + iStart = jsonParseAddNode(pParse, JSON_OBJECT, 2, 0); + jsonParseAddNode(pParse, JSON_STRING, nKey, zKey); + iPatch = jsonParseAddNode(pParse, JSON_TRUE, 0, 0); + if( pParse->oom ) return 0; + jsonRemoveAllNulls(pPatch); + pTarget = &pParse->aNode[iTarget]; + assert( pParse->aNode[iRoot].eU==0 || pParse->aNode[iRoot].eU==2 ); + testcase( pParse->aNode[iRoot].eU==2 ); + pParse->aNode[iRoot].jnFlags |= JNODE_APPEND; + VVA( pParse->aNode[iRoot].eU = 2 ); + pParse->aNode[iRoot].u.iAppend = iStart - iRoot; + iRoot = iStart; + assert( pParse->aNode[iPatch].eU==0 ); + VVA( pParse->aNode[iPatch].eU = 5 ); + pParse->aNode[iPatch].jnFlags |= JNODE_PATCH; + pParse->aNode[iPatch].u.pPatch = &pPatch[i+1]; + } + } + return pTarget; +} + +/* +** Implementation of the json_mergepatch(JSON1,JSON2) function. Return a JSON +** object that is the result of running the RFC 7396 MergePatch() algorithm +** on the two arguments. +*/ +static void jsonPatchFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonParse x; /* The JSON that is being patched */ + JsonParse y; /* The patch */ + JsonNode *pResult; /* The result of the merge */ + + UNUSED_PARAMETER(argc); + if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; + if( jsonParse(&y, ctx, (const char*)sqlite3_value_text(argv[1])) ){ + jsonParseReset(&x); + return; + } + pResult = jsonMergePatch(&x, 0, y.aNode); + assert( pResult!=0 || x.oom ); + if( pResult ){ + jsonReturnJson(pResult, ctx, 0); + }else{ + sqlite3_result_error_nomem(ctx); + } + jsonParseReset(&x); + jsonParseReset(&y); +} + + +/* +** Implementation of the json_object(NAME,VALUE,...) function. Return a JSON +** object that contains all name/value given in arguments. Or if any name +** is not a string or if any value is a BLOB, throw an error. +*/ +static void jsonObjectFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + int i; + JsonString jx; + const char *z; + u32 n; + + if( argc&1 ){ + sqlite3_result_error(ctx, "json_object() requires an even number " + "of arguments", -1); + return; + } + jsonInit(&jx, ctx); + jsonAppendChar(&jx, '{'); + for(i=0; ijnFlags |= JNODE_REMOVE; + } + if( (x.aNode[0].jnFlags & JNODE_REMOVE)==0 ){ + jsonReturnJson(x.aNode, ctx, 0); + } +remove_done: + jsonParseReset(&x); +} + +/* +** json_replace(JSON, PATH, VALUE, ...) +** +** Replace the value at PATH with VALUE. If PATH does not already exist, +** this routine is a no-op. If JSON or PATH is malformed, throw an error. +*/ +static void jsonReplaceFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonParse x; /* The parse */ + JsonNode *pNode; + const char *zPath; + u32 i; + + if( argc<1 ) return; + if( (argc&1)==0 ) { + jsonWrongNumArgs(ctx, "replace"); + return; + } + if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; + assert( x.nNode ); + for(i=1; i<(u32)argc; i+=2){ + zPath = (const char*)sqlite3_value_text(argv[i]); + pNode = jsonLookup(&x, zPath, 0, ctx); + if( x.nErr ) goto replace_err; + if( pNode ){ + assert( pNode->eU==0 || pNode->eU==1 || pNode->eU==4 ); + testcase( pNode->eU!=0 && pNode->eU!=1 ); + pNode->jnFlags |= (u8)JNODE_REPLACE; + VVA( pNode->eU = 4 ); + pNode->u.iReplace = i + 1; + } + } + if( x.aNode[0].jnFlags & JNODE_REPLACE ){ + assert( x.aNode[0].eU==4 ); + sqlite3_result_value(ctx, argv[x.aNode[0].u.iReplace]); + }else{ + jsonReturnJson(x.aNode, ctx, argv); + } +replace_err: + jsonParseReset(&x); +} + + +/* +** json_set(JSON, PATH, VALUE, ...) +** +** Set the value at PATH to VALUE. Create the PATH if it does not already +** exist. Overwrite existing values that do exist. +** If JSON or PATH is malformed, throw an error. +** +** json_insert(JSON, PATH, VALUE, ...) +** +** Create PATH and initialize it to VALUE. If PATH already exists, this +** routine is a no-op. If JSON or PATH is malformed, throw an error. +*/ +static void jsonSetFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonParse x; /* The parse */ + JsonNode *pNode; + const char *zPath; + u32 i; + int bApnd; + int bIsSet = sqlite3_user_data(ctx)!=0; + + if( argc<1 ) return; + if( (argc&1)==0 ) { + jsonWrongNumArgs(ctx, bIsSet ? "set" : "insert"); + return; + } + if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; + assert( x.nNode ); + for(i=1; i<(u32)argc; i+=2){ + zPath = (const char*)sqlite3_value_text(argv[i]); + bApnd = 0; + pNode = jsonLookup(&x, zPath, &bApnd, ctx); + if( x.oom ){ + sqlite3_result_error_nomem(ctx); + goto jsonSetDone; + }else if( x.nErr ){ + goto jsonSetDone; + }else if( pNode && (bApnd || bIsSet) ){ + testcase( pNode->eU!=0 && pNode->eU!=1 ); + assert( pNode->eU!=3 && pNode->eU!=5 ); + VVA( pNode->eU = 4 ); + pNode->jnFlags |= (u8)JNODE_REPLACE; + pNode->u.iReplace = i + 1; + } + } + if( x.aNode[0].jnFlags & JNODE_REPLACE ){ + assert( x.aNode[0].eU==4 ); + sqlite3_result_value(ctx, argv[x.aNode[0].u.iReplace]); + }else{ + jsonReturnJson(x.aNode, ctx, argv); + } +jsonSetDone: + jsonParseReset(&x); +} + +/* +** json_type(JSON) +** json_type(JSON, PATH) +** +** Return the top-level "type" of a JSON string. json_type() raises an +** error if either the JSON or PATH inputs are not well-formed. +*/ +static void jsonTypeFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonParse *p; /* The parse */ + const char *zPath; + JsonNode *pNode; + + p = jsonParseCached(ctx, argv, ctx); + if( p==0 ) return; + if( argc==2 ){ + zPath = (const char*)sqlite3_value_text(argv[1]); + pNode = jsonLookup(p, zPath, 0, ctx); + }else{ + pNode = p->aNode; + } + if( pNode ){ + sqlite3_result_text(ctx, jsonType[pNode->eType], -1, SQLITE_STATIC); + } +} + +/* +** json_valid(JSON) +** +** Return 1 if JSON is a well-formed JSON string according to RFC-7159. +** Return 0 otherwise. +*/ +static void jsonValidFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonParse *p; /* The parse */ + UNUSED_PARAMETER(argc); + p = jsonParseCached(ctx, argv, 0); + sqlite3_result_int(ctx, p!=0); +} + + +/**************************************************************************** +** Aggregate SQL function implementations +****************************************************************************/ +/* +** json_group_array(VALUE) +** +** Return a JSON array composed of all values in the aggregate. +*/ +static void jsonArrayStep( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonString *pStr; + UNUSED_PARAMETER(argc); + pStr = (JsonString*)sqlite3_aggregate_context(ctx, sizeof(*pStr)); + if( pStr ){ + if( pStr->zBuf==0 ){ + jsonInit(pStr, ctx); + jsonAppendChar(pStr, '['); + }else if( pStr->nUsed>1 ){ + jsonAppendChar(pStr, ','); + } + pStr->pCtx = ctx; + jsonAppendValue(pStr, argv[0]); + } +} +static void jsonArrayCompute(sqlite3_context *ctx, int isFinal){ + JsonString *pStr; + pStr = (JsonString*)sqlite3_aggregate_context(ctx, 0); + if( pStr ){ + pStr->pCtx = ctx; + jsonAppendChar(pStr, ']'); + if( pStr->bErr ){ + if( pStr->bErr==1 ) sqlite3_result_error_nomem(ctx); + assert( pStr->bStatic ); + }else if( isFinal ){ + sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed, + pStr->bStatic ? SQLITE_TRANSIENT : sqlite3_free); + pStr->bStatic = 1; + }else{ + sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed, SQLITE_TRANSIENT); + pStr->nUsed--; + } + }else{ + sqlite3_result_text(ctx, "[]", 2, SQLITE_STATIC); + } + sqlite3_result_subtype(ctx, JSON_SUBTYPE); +} +static void jsonArrayValue(sqlite3_context *ctx){ + jsonArrayCompute(ctx, 0); +} +static void jsonArrayFinal(sqlite3_context *ctx){ + jsonArrayCompute(ctx, 1); +} + +#ifndef SQLITE_OMIT_WINDOWFUNC +/* +** This method works for both json_group_array() and json_group_object(). +** It works by removing the first element of the group by searching forward +** to the first comma (",") that is not within a string and deleting all +** text through that comma. +*/ +static void jsonGroupInverse( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + unsigned int i; + int inStr = 0; + int nNest = 0; + char *z; + char c; + JsonString *pStr; + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); + pStr = (JsonString*)sqlite3_aggregate_context(ctx, 0); +#ifdef NEVER + /* pStr is always non-NULL since jsonArrayStep() or jsonObjectStep() will + ** always have been called to initalize it */ + if( NEVER(!pStr) ) return; +#endif + z = pStr->zBuf; + for(i=1; inUsed && ((c = z[i])!=',' || inStr || nNest); i++){ + if( c=='"' ){ + inStr = !inStr; + }else if( c=='\\' ){ + i++; + }else if( !inStr ){ + if( c=='{' || c=='[' ) nNest++; + if( c=='}' || c==']' ) nNest--; + } + } + if( inUsed ){ + pStr->nUsed -= i; + memmove(&z[1], &z[i+1], (size_t)pStr->nUsed-1); + z[pStr->nUsed] = 0; + }else{ + pStr->nUsed = 1; + } +} +#else +# define jsonGroupInverse 0 +#endif + + +/* +** json_group_obj(NAME,VALUE) +** +** Return a JSON object composed of all names and values in the aggregate. +*/ +static void jsonObjectStep( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + JsonString *pStr; + const char *z; + u32 n; + UNUSED_PARAMETER(argc); + pStr = (JsonString*)sqlite3_aggregate_context(ctx, sizeof(*pStr)); + if( pStr ){ + if( pStr->zBuf==0 ){ + jsonInit(pStr, ctx); + jsonAppendChar(pStr, '{'); + }else if( pStr->nUsed>1 ){ + jsonAppendChar(pStr, ','); + } + pStr->pCtx = ctx; + z = (const char*)sqlite3_value_text(argv[0]); + n = (u32)sqlite3_value_bytes(argv[0]); + jsonAppendString(pStr, z, n); + jsonAppendChar(pStr, ':'); + jsonAppendValue(pStr, argv[1]); + } +} +static void jsonObjectCompute(sqlite3_context *ctx, int isFinal){ + JsonString *pStr; + pStr = (JsonString*)sqlite3_aggregate_context(ctx, 0); + if( pStr ){ + jsonAppendChar(pStr, '}'); + if( pStr->bErr ){ + if( pStr->bErr==1 ) sqlite3_result_error_nomem(ctx); + assert( pStr->bStatic ); + }else if( isFinal ){ + sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed, + pStr->bStatic ? SQLITE_TRANSIENT : sqlite3_free); + pStr->bStatic = 1; + }else{ + sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed, SQLITE_TRANSIENT); + pStr->nUsed--; + } + }else{ + sqlite3_result_text(ctx, "{}", 2, SQLITE_STATIC); + } + sqlite3_result_subtype(ctx, JSON_SUBTYPE); +} +static void jsonObjectValue(sqlite3_context *ctx){ + jsonObjectCompute(ctx, 0); +} +static void jsonObjectFinal(sqlite3_context *ctx){ + jsonObjectCompute(ctx, 1); +} + + + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/**************************************************************************** +** The json_each virtual table +****************************************************************************/ +typedef struct JsonEachCursor JsonEachCursor; +struct JsonEachCursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + u32 iRowid; /* The rowid */ + u32 iBegin; /* The first node of the scan */ + u32 i; /* Index in sParse.aNode[] of current row */ + u32 iEnd; /* EOF when i equals or exceeds this value */ + u8 eType; /* Type of top-level element */ + u8 bRecursive; /* True for json_tree(). False for json_each() */ + char *zJson; /* Input JSON */ + char *zRoot; /* Path by which to filter zJson */ + JsonParse sParse; /* Parse of the input JSON */ +}; + +/* Constructor for the json_each virtual table */ +static int jsonEachConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + sqlite3_vtab *pNew; + int rc; + +/* Column numbers */ +#define JEACH_KEY 0 +#define JEACH_VALUE 1 +#define JEACH_TYPE 2 +#define JEACH_ATOM 3 +#define JEACH_ID 4 +#define JEACH_PARENT 5 +#define JEACH_FULLKEY 6 +#define JEACH_PATH 7 +/* The xBestIndex method assumes that the JSON and ROOT columns are +** the last two columns in the table. Should this ever changes, be +** sure to update the xBestIndex method. */ +#define JEACH_JSON 8 +#define JEACH_ROOT 9 + + UNUSED_PARAMETER(pzErr); + UNUSED_PARAMETER(argv); + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(pAux); + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(key,value,type,atom,id,parent,fullkey,path," + "json HIDDEN,root HIDDEN)"); + if( rc==SQLITE_OK ){ + pNew = *ppVtab = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS); + } + return rc; +} + +/* destructor for json_each virtual table */ +static int jsonEachDisconnect(sqlite3_vtab *pVtab){ + sqlite3_free(pVtab); + return SQLITE_OK; +} + +/* constructor for a JsonEachCursor object for json_each(). */ +static int jsonEachOpenEach(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + JsonEachCursor *pCur; + + UNUSED_PARAMETER(p); + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* constructor for a JsonEachCursor object for json_tree(). */ +static int jsonEachOpenTree(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + int rc = jsonEachOpenEach(p, ppCursor); + if( rc==SQLITE_OK ){ + JsonEachCursor *pCur = (JsonEachCursor*)*ppCursor; + pCur->bRecursive = 1; + } + return rc; +} + +/* Reset a JsonEachCursor back to its original state. Free any memory +** held. */ +static void jsonEachCursorReset(JsonEachCursor *p){ + sqlite3_free(p->zJson); + sqlite3_free(p->zRoot); + jsonParseReset(&p->sParse); + p->iRowid = 0; + p->i = 0; + p->iEnd = 0; + p->eType = 0; + p->zJson = 0; + p->zRoot = 0; +} + +/* Destructor for a jsonEachCursor object */ +static int jsonEachClose(sqlite3_vtab_cursor *cur){ + JsonEachCursor *p = (JsonEachCursor*)cur; + jsonEachCursorReset(p); + sqlite3_free(cur); + return SQLITE_OK; +} + +/* Return TRUE if the jsonEachCursor object has been advanced off the end +** of the JSON object */ +static int jsonEachEof(sqlite3_vtab_cursor *cur){ + JsonEachCursor *p = (JsonEachCursor*)cur; + return p->i >= p->iEnd; +} + +/* Advance the cursor to the next element for json_tree() */ +static int jsonEachNext(sqlite3_vtab_cursor *cur){ + JsonEachCursor *p = (JsonEachCursor*)cur; + if( p->bRecursive ){ + if( p->sParse.aNode[p->i].jnFlags & JNODE_LABEL ) p->i++; + p->i++; + p->iRowid++; + if( p->iiEnd ){ + u32 iUp = p->sParse.aUp[p->i]; + JsonNode *pUp = &p->sParse.aNode[iUp]; + p->eType = pUp->eType; + if( pUp->eType==JSON_ARRAY ){ + assert( pUp->eU==0 || pUp->eU==3 ); + testcase( pUp->eU==3 ); + VVA( pUp->eU = 3 ); + if( iUp==p->i-1 ){ + pUp->u.iKey = 0; + }else{ + pUp->u.iKey++; + } + } + } + }else{ + switch( p->eType ){ + case JSON_ARRAY: { + p->i += jsonNodeSize(&p->sParse.aNode[p->i]); + p->iRowid++; + break; + } + case JSON_OBJECT: { + p->i += 1 + jsonNodeSize(&p->sParse.aNode[p->i+1]); + p->iRowid++; + break; + } + default: { + p->i = p->iEnd; + break; + } + } + } + return SQLITE_OK; +} + +/* Append an object label to the JSON Path being constructed +** in pStr. +*/ +static void jsonAppendObjectPathElement( + JsonString *pStr, + JsonNode *pNode +){ + int jj, nn; + const char *z; + assert( pNode->eType==JSON_STRING ); + assert( pNode->jnFlags & JNODE_LABEL ); + assert( pNode->eU==1 ); + z = pNode->u.zJContent; + nn = pNode->n; + assert( nn>=2 ); + assert( z[0]=='"' ); + assert( z[nn-1]=='"' ); + if( nn>2 && sqlite3Isalpha(z[1]) ){ + for(jj=2; jjsParse.aUp[i]; + jsonEachComputePath(p, pStr, iUp); + pNode = &p->sParse.aNode[i]; + pUp = &p->sParse.aNode[iUp]; + if( pUp->eType==JSON_ARRAY ){ + assert( pUp->eU==3 || (pUp->eU==0 && pUp->u.iKey==0) ); + testcase( pUp->eU==0 ); + jsonPrintf(30, pStr, "[%d]", pUp->u.iKey); + }else{ + assert( pUp->eType==JSON_OBJECT ); + if( (pNode->jnFlags & JNODE_LABEL)==0 ) pNode--; + jsonAppendObjectPathElement(pStr, pNode); + } +} + +/* Return the value of a column */ +static int jsonEachColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + JsonEachCursor *p = (JsonEachCursor*)cur; + JsonNode *pThis = &p->sParse.aNode[p->i]; + switch( i ){ + case JEACH_KEY: { + if( p->i==0 ) break; + if( p->eType==JSON_OBJECT ){ + jsonReturn(pThis, ctx, 0); + }else if( p->eType==JSON_ARRAY ){ + u32 iKey; + if( p->bRecursive ){ + if( p->iRowid==0 ) break; + assert( p->sParse.aNode[p->sParse.aUp[p->i]].eU==3 ); + iKey = p->sParse.aNode[p->sParse.aUp[p->i]].u.iKey; + }else{ + iKey = p->iRowid; + } + sqlite3_result_int64(ctx, (sqlite3_int64)iKey); + } + break; + } + case JEACH_VALUE: { + if( pThis->jnFlags & JNODE_LABEL ) pThis++; + jsonReturn(pThis, ctx, 0); + break; + } + case JEACH_TYPE: { + if( pThis->jnFlags & JNODE_LABEL ) pThis++; + sqlite3_result_text(ctx, jsonType[pThis->eType], -1, SQLITE_STATIC); + break; + } + case JEACH_ATOM: { + if( pThis->jnFlags & JNODE_LABEL ) pThis++; + if( pThis->eType>=JSON_ARRAY ) break; + jsonReturn(pThis, ctx, 0); + break; + } + case JEACH_ID: { + sqlite3_result_int64(ctx, + (sqlite3_int64)p->i + ((pThis->jnFlags & JNODE_LABEL)!=0)); + break; + } + case JEACH_PARENT: { + if( p->i>p->iBegin && p->bRecursive ){ + sqlite3_result_int64(ctx, (sqlite3_int64)p->sParse.aUp[p->i]); + } + break; + } + case JEACH_FULLKEY: { + JsonString x; + jsonInit(&x, ctx); + if( p->bRecursive ){ + jsonEachComputePath(p, &x, p->i); + }else{ + if( p->zRoot ){ + jsonAppendRaw(&x, p->zRoot, (int)strlen(p->zRoot)); + }else{ + jsonAppendChar(&x, '$'); + } + if( p->eType==JSON_ARRAY ){ + jsonPrintf(30, &x, "[%d]", p->iRowid); + }else if( p->eType==JSON_OBJECT ){ + jsonAppendObjectPathElement(&x, pThis); + } + } + jsonResult(&x); + break; + } + case JEACH_PATH: { + if( p->bRecursive ){ + JsonString x; + jsonInit(&x, ctx); + jsonEachComputePath(p, &x, p->sParse.aUp[p->i]); + jsonResult(&x); + break; + } + /* For json_each() path and root are the same so fall through + ** into the root case */ + /* no break */ deliberate_fall_through + } + default: { + const char *zRoot = p->zRoot; + if( zRoot==0 ) zRoot = "$"; + sqlite3_result_text(ctx, zRoot, -1, SQLITE_STATIC); + break; + } + case JEACH_JSON: { + assert( i==JEACH_JSON ); + sqlite3_result_text(ctx, p->sParse.zJson, -1, SQLITE_STATIC); + break; + } + } + return SQLITE_OK; +} + +/* Return the current rowid value */ +static int jsonEachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + JsonEachCursor *p = (JsonEachCursor*)cur; + *pRowid = p->iRowid; + return SQLITE_OK; +} + +/* The query strategy is to look for an equality constraint on the json +** column. Without such a constraint, the table cannot operate. idxNum is +** 1 if the constraint is found, 3 if the constraint and zRoot are found, +** and 0 otherwise. +*/ +static int jsonEachBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int i; /* Loop counter or computed array index */ + int aIdx[2]; /* Index of constraints for JSON and ROOT */ + int unusableMask = 0; /* Mask of unusable JSON and ROOT constraints */ + int idxMask = 0; /* Mask of usable == constraints JSON and ROOT */ + const struct sqlite3_index_constraint *pConstraint; + + /* This implementation assumes that JSON and ROOT are the last two + ** columns in the table */ + assert( JEACH_ROOT == JEACH_JSON+1 ); + UNUSED_PARAMETER(tab); + aIdx[0] = aIdx[1] = -1; + pConstraint = pIdxInfo->aConstraint; + for(i=0; inConstraint; i++, pConstraint++){ + int iCol; + int iMask; + if( pConstraint->iColumn < JEACH_JSON ) continue; + iCol = pConstraint->iColumn - JEACH_JSON; + assert( iCol==0 || iCol==1 ); + testcase( iCol==0 ); + iMask = 1 << iCol; + if( pConstraint->usable==0 ){ + unusableMask |= iMask; + }else if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ + aIdx[iCol] = i; + idxMask |= iMask; + } + } + if( (unusableMask & ~idxMask)!=0 ){ + /* If there are any unusable constraints on JSON or ROOT, then reject + ** this entire plan */ + return SQLITE_CONSTRAINT; + } + if( aIdx[0]<0 ){ + /* No JSON input. Leave estimatedCost at the huge value that it was + ** initialized to to discourage the query planner from selecting this + ** plan. */ + pIdxInfo->idxNum = 0; + }else{ + pIdxInfo->estimatedCost = 1.0; + i = aIdx[0]; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + if( aIdx[1]<0 ){ + pIdxInfo->idxNum = 1; /* Only JSON supplied. Plan 1 */ + }else{ + i = aIdx[1]; + pIdxInfo->aConstraintUsage[i].argvIndex = 2; + pIdxInfo->aConstraintUsage[i].omit = 1; + pIdxInfo->idxNum = 3; /* Both JSON and ROOT are supplied. Plan 3 */ + } + } + return SQLITE_OK; +} + +/* Start a search on a new JSON string */ +static int jsonEachFilter( + sqlite3_vtab_cursor *cur, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + JsonEachCursor *p = (JsonEachCursor*)cur; + const char *z; + const char *zRoot = 0; + sqlite3_int64 n; + + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(argc); + jsonEachCursorReset(p); + if( idxNum==0 ) return SQLITE_OK; + z = (const char*)sqlite3_value_text(argv[0]); + if( z==0 ) return SQLITE_OK; + n = sqlite3_value_bytes(argv[0]); + p->zJson = sqlite3_malloc64( n+1 ); + if( p->zJson==0 ) return SQLITE_NOMEM; + memcpy(p->zJson, z, (size_t)n+1); + if( jsonParse(&p->sParse, 0, p->zJson) ){ + int rc = SQLITE_NOMEM; + if( p->sParse.oom==0 ){ + sqlite3_free(cur->pVtab->zErrMsg); + cur->pVtab->zErrMsg = sqlite3_mprintf("malformed JSON"); + if( cur->pVtab->zErrMsg ) rc = SQLITE_ERROR; + } + jsonEachCursorReset(p); + return rc; + }else if( p->bRecursive && jsonParseFindParents(&p->sParse) ){ + jsonEachCursorReset(p); + return SQLITE_NOMEM; + }else{ + JsonNode *pNode = 0; + if( idxNum==3 ){ + const char *zErr = 0; + zRoot = (const char*)sqlite3_value_text(argv[1]); + if( zRoot==0 ) return SQLITE_OK; + n = sqlite3_value_bytes(argv[1]); + p->zRoot = sqlite3_malloc64( n+1 ); + if( p->zRoot==0 ) return SQLITE_NOMEM; + memcpy(p->zRoot, zRoot, (size_t)n+1); + if( zRoot[0]!='$' ){ + zErr = zRoot; + }else{ + pNode = jsonLookupStep(&p->sParse, 0, p->zRoot+1, 0, &zErr); + } + if( zErr ){ + sqlite3_free(cur->pVtab->zErrMsg); + cur->pVtab->zErrMsg = jsonPathSyntaxError(zErr); + jsonEachCursorReset(p); + return cur->pVtab->zErrMsg ? SQLITE_ERROR : SQLITE_NOMEM; + }else if( pNode==0 ){ + return SQLITE_OK; + } + }else{ + pNode = p->sParse.aNode; + } + p->iBegin = p->i = (int)(pNode - p->sParse.aNode); + p->eType = pNode->eType; + if( p->eType>=JSON_ARRAY ){ + assert( pNode->eU==0 ); + VVA( pNode->eU = 3 ); + pNode->u.iKey = 0; + p->iEnd = p->i + pNode->n + 1; + if( p->bRecursive ){ + p->eType = p->sParse.aNode[p->sParse.aUp[p->i]].eType; + if( p->i>0 && (p->sParse.aNode[p->i-1].jnFlags & JNODE_LABEL)!=0 ){ + p->i--; + } + }else{ + p->i++; + } + }else{ + p->iEnd = p->i+1; + } + } + return SQLITE_OK; +} + +/* The methods of the json_each virtual table */ +static sqlite3_module jsonEachModule = { + 0, /* iVersion */ + 0, /* xCreate */ + jsonEachConnect, /* xConnect */ + jsonEachBestIndex, /* xBestIndex */ + jsonEachDisconnect, /* xDisconnect */ + 0, /* xDestroy */ + jsonEachOpenEach, /* xOpen - open a cursor */ + jsonEachClose, /* xClose - close a cursor */ + jsonEachFilter, /* xFilter - configure scan constraints */ + jsonEachNext, /* xNext - advance a cursor */ + jsonEachEof, /* xEof - check for end of scan */ + jsonEachColumn, /* xColumn - read data */ + jsonEachRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + 0 /* xShadowName */ +}; + +/* The methods of the json_tree virtual table. */ +static sqlite3_module jsonTreeModule = { + 0, /* iVersion */ + 0, /* xCreate */ + jsonEachConnect, /* xConnect */ + jsonEachBestIndex, /* xBestIndex */ + jsonEachDisconnect, /* xDisconnect */ + 0, /* xDestroy */ + jsonEachOpenTree, /* xOpen - open a cursor */ + jsonEachClose, /* xClose - close a cursor */ + jsonEachFilter, /* xFilter - configure scan constraints */ + jsonEachNext, /* xNext - advance a cursor */ + jsonEachEof, /* xEof - check for end of scan */ + jsonEachColumn, /* xColumn - read data */ + jsonEachRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + 0 /* xShadowName */ +}; +#endif /* SQLITE_OMIT_VIRTUALTABLE */ +#endif /* !defined(SQLITE_OMIT_JSON) */ + +/* +** Register JSON functions. +*/ +SQLITE_PRIVATE void sqlite3RegisterJsonFunctions(void){ +#ifndef SQLITE_OMIT_JSON + static FuncDef aJsonFunc[] = { + JFUNCTION(json, 1, 0, jsonRemoveFunc), + JFUNCTION(json_array, -1, 0, jsonArrayFunc), + JFUNCTION(json_array_length, 1, 0, jsonArrayLengthFunc), + JFUNCTION(json_array_length, 2, 0, jsonArrayLengthFunc), + JFUNCTION(json_extract, -1, 0, jsonExtractFunc), + JFUNCTION(->, 2, JSON_JSON, jsonExtractFunc), + JFUNCTION(->>, 2, JSON_SQL, jsonExtractFunc), + JFUNCTION(json_insert, -1, 0, jsonSetFunc), + JFUNCTION(json_object, -1, 0, jsonObjectFunc), + JFUNCTION(json_patch, 2, 0, jsonPatchFunc), + JFUNCTION(json_quote, 1, 0, jsonQuoteFunc), + JFUNCTION(json_remove, -1, 0, jsonRemoveFunc), + JFUNCTION(json_replace, -1, 0, jsonReplaceFunc), + JFUNCTION(json_set, -1, JSON_ISSET, jsonSetFunc), + JFUNCTION(json_type, 1, 0, jsonTypeFunc), + JFUNCTION(json_type, 2, 0, jsonTypeFunc), + JFUNCTION(json_valid, 1, 0, jsonValidFunc), +#if SQLITE_DEBUG + JFUNCTION(json_parse, 1, 0, jsonParseFunc), + JFUNCTION(json_test1, 1, 0, jsonTest1Func), +#endif + WAGGREGATE(json_group_array, 1, 0, 0, + jsonArrayStep, jsonArrayFinal, jsonArrayValue, jsonGroupInverse, + SQLITE_SUBTYPE|SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS), + WAGGREGATE(json_group_object, 2, 0, 0, + jsonObjectStep, jsonObjectFinal, jsonObjectValue, jsonGroupInverse, + SQLITE_SUBTYPE|SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS) + }; + sqlite3InsertBuiltinFuncs(aJsonFunc, ArraySize(aJsonFunc)); +#endif +} + +#if !defined(SQLITE_OMIT_VIRTUALTABLE) && !defined(SQLITE_OMIT_JSON) +/* +** Register the JSON table-valued functions +*/ +SQLITE_PRIVATE int sqlite3JsonTableFunctions(sqlite3 *db){ + int rc = SQLITE_OK; + static const struct { + const char *zName; + sqlite3_module *pModule; + } aMod[] = { + { "json_each", &jsonEachModule }, + { "json_tree", &jsonTreeModule }, + }; + unsigned int i; + for(i=0; i */ +/* #include */ +/* #include */ +/* #include */ + +/* The following macro is used to suppress compiler warnings. +*/ +#ifndef UNUSED_PARAMETER +# define UNUSED_PARAMETER(x) (void)(x) +#endif + +typedef struct Rtree Rtree; +typedef struct RtreeCursor RtreeCursor; +typedef struct RtreeNode RtreeNode; +typedef struct RtreeCell RtreeCell; +typedef struct RtreeConstraint RtreeConstraint; +typedef struct RtreeMatchArg RtreeMatchArg; +typedef struct RtreeGeomCallback RtreeGeomCallback; +typedef union RtreeCoord RtreeCoord; +typedef struct RtreeSearchPoint RtreeSearchPoint; + +/* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ +#define RTREE_MAX_DIMENSIONS 5 + +/* Maximum number of auxiliary columns */ +#define RTREE_MAX_AUX_COLUMN 100 + +/* Size of hash table Rtree.aHash. This hash table is not expected to +** ever contain very many entries, so a fixed number of buckets is +** used. +*/ +#define HASHSIZE 97 + +/* The xBestIndex method of this virtual table requires an estimate of +** the number of rows in the virtual table to calculate the costs of +** various strategies. If possible, this estimate is loaded from the +** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). +** Otherwise, if no sqlite_stat1 entry is available, use +** RTREE_DEFAULT_ROWEST. +*/ +#define RTREE_DEFAULT_ROWEST 1048576 +#define RTREE_MIN_ROWEST 100 + +/* +** An rtree virtual-table object. +*/ +struct Rtree { + sqlite3_vtab base; /* Base class. Must be first */ + sqlite3 *db; /* Host database connection */ + int iNodeSize; /* Size in bytes of each node in the node table */ + u8 nDim; /* Number of dimensions */ + u8 nDim2; /* Twice the number of dimensions */ + u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ + u8 nBytesPerCell; /* Bytes consumed per cell */ + u8 inWrTrans; /* True if inside write transaction */ + u8 nAux; /* # of auxiliary columns in %_rowid */ +#ifdef SQLITE_ENABLE_GEOPOLY + u8 nAuxNotNull; /* Number of initial not-null aux columns */ +#endif +#ifdef SQLITE_DEBUG + u8 bCorrupt; /* Shadow table corruption detected */ +#endif + int iDepth; /* Current depth of the r-tree structure */ + char *zDb; /* Name of database containing r-tree table */ + char *zName; /* Name of r-tree table */ + u32 nBusy; /* Current number of users of this structure */ + i64 nRowEst; /* Estimated number of rows in this table */ + u32 nCursor; /* Number of open cursors */ + u32 nNodeRef; /* Number RtreeNodes with positive nRef */ + char *zReadAuxSql; /* SQL for statement to read aux data */ + + /* List of nodes removed during a CondenseTree operation. List is + ** linked together via the pointer normally used for hash chains - + ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree + ** headed by the node (leaf nodes have RtreeNode.iNode==0). + */ + RtreeNode *pDeleted; + int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ + + /* Blob I/O on xxx_node */ + sqlite3_blob *pNodeBlob; + + /* Statements to read/write/delete a record from xxx_node */ + sqlite3_stmt *pWriteNode; + sqlite3_stmt *pDeleteNode; + + /* Statements to read/write/delete a record from xxx_rowid */ + sqlite3_stmt *pReadRowid; + sqlite3_stmt *pWriteRowid; + sqlite3_stmt *pDeleteRowid; + + /* Statements to read/write/delete a record from xxx_parent */ + sqlite3_stmt *pReadParent; + sqlite3_stmt *pWriteParent; + sqlite3_stmt *pDeleteParent; + + /* Statement for writing to the "aux:" fields, if there are any */ + sqlite3_stmt *pWriteAux; + + RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ +}; + +/* Possible values for Rtree.eCoordType: */ +#define RTREE_COORD_REAL32 0 +#define RTREE_COORD_INT32 1 + +/* +** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will +** only deal with integer coordinates. No floating point operations +** will be done. +*/ +#ifdef SQLITE_RTREE_INT_ONLY + typedef sqlite3_int64 RtreeDValue; /* High accuracy coordinate */ + typedef int RtreeValue; /* Low accuracy coordinate */ +# define RTREE_ZERO 0 +#else + typedef double RtreeDValue; /* High accuracy coordinate */ + typedef float RtreeValue; /* Low accuracy coordinate */ +# define RTREE_ZERO 0.0 +#endif + +/* +** Set the Rtree.bCorrupt flag +*/ +#ifdef SQLITE_DEBUG +# define RTREE_IS_CORRUPT(X) ((X)->bCorrupt = 1) +#else +# define RTREE_IS_CORRUPT(X) +#endif + +/* +** When doing a search of an r-tree, instances of the following structure +** record intermediate results from the tree walk. +** +** The id is always a node-id. For iLevel>=1 the id is the node-id of +** the node that the RtreeSearchPoint represents. When iLevel==0, however, +** the id is of the parent node and the cell that RtreeSearchPoint +** represents is the iCell-th entry in the parent node. +*/ +struct RtreeSearchPoint { + RtreeDValue rScore; /* The score for this node. Smallest goes first. */ + sqlite3_int64 id; /* Node ID */ + u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ + u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ + u8 iCell; /* Cell index within the node */ +}; + +/* +** The minimum number of cells allowed for a node is a third of the +** maximum. In Gutman's notation: +** +** m = M/3 +** +** If an R*-tree "Reinsert" operation is required, the same number of +** cells are removed from the overfull node and reinserted into the tree. +*/ +#define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) +#define RTREE_REINSERT(p) RTREE_MINCELLS(p) +#define RTREE_MAXCELLS 51 + +/* +** The smallest possible node-size is (512-64)==448 bytes. And the largest +** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). +** Therefore all non-root nodes must contain at least 3 entries. Since +** 3^40 is greater than 2^64, an r-tree structure always has a depth of +** 40 or less. +*/ +#define RTREE_MAX_DEPTH 40 + + +/* +** Number of entries in the cursor RtreeNode cache. The first entry is +** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining +** entries cache the RtreeNode for the first elements of the priority queue. +*/ +#define RTREE_CACHE_SZ 5 + +/* +** An rtree cursor object. +*/ +struct RtreeCursor { + sqlite3_vtab_cursor base; /* Base class. Must be first */ + u8 atEOF; /* True if at end of search */ + u8 bPoint; /* True if sPoint is valid */ + u8 bAuxValid; /* True if pReadAux is valid */ + int iStrategy; /* Copy of idxNum search parameter */ + int nConstraint; /* Number of entries in aConstraint */ + RtreeConstraint *aConstraint; /* Search constraints. */ + int nPointAlloc; /* Number of slots allocated for aPoint[] */ + int nPoint; /* Number of slots used in aPoint[] */ + int mxLevel; /* iLevel value for root of the tree */ + RtreeSearchPoint *aPoint; /* Priority queue for search points */ + sqlite3_stmt *pReadAux; /* Statement to read aux-data */ + RtreeSearchPoint sPoint; /* Cached next search point */ + RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ + u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ +}; + +/* Return the Rtree of a RtreeCursor */ +#define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) + +/* +** A coordinate can be either a floating point number or a integer. All +** coordinates within a single R-Tree are always of the same time. +*/ +union RtreeCoord { + RtreeValue f; /* Floating point value */ + int i; /* Integer value */ + u32 u; /* Unsigned for byte-order conversions */ +}; + +/* +** The argument is an RtreeCoord. Return the value stored within the RtreeCoord +** formatted as a RtreeDValue (double or int64). This macro assumes that local +** variable pRtree points to the Rtree structure associated with the +** RtreeCoord. +*/ +#ifdef SQLITE_RTREE_INT_ONLY +# define DCOORD(coord) ((RtreeDValue)coord.i) +#else +# define DCOORD(coord) ( \ + (pRtree->eCoordType==RTREE_COORD_REAL32) ? \ + ((double)coord.f) : \ + ((double)coord.i) \ + ) +#endif + +/* +** A search constraint. +*/ +struct RtreeConstraint { + int iCoord; /* Index of constrained coordinate */ + int op; /* Constraining operation */ + union { + RtreeDValue rValue; /* Constraint value. */ + int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); + int (*xQueryFunc)(sqlite3_rtree_query_info*); + } u; + sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ +}; + +/* Possible values for RtreeConstraint.op */ +#define RTREE_EQ 0x41 /* A */ +#define RTREE_LE 0x42 /* B */ +#define RTREE_LT 0x43 /* C */ +#define RTREE_GE 0x44 /* D */ +#define RTREE_GT 0x45 /* E */ +#define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ +#define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ + +/* Special operators available only on cursors. Needs to be consecutive +** with the normal values above, but must be less than RTREE_MATCH. These +** are used in the cursor for contraints such as x=NULL (RTREE_FALSE) or +** x<'xyz' (RTREE_TRUE) */ +#define RTREE_TRUE 0x3f /* ? */ +#define RTREE_FALSE 0x40 /* @ */ + +/* +** An rtree structure node. +*/ +struct RtreeNode { + RtreeNode *pParent; /* Parent node */ + i64 iNode; /* The node number */ + int nRef; /* Number of references to this node */ + int isDirty; /* True if the node needs to be written to disk */ + u8 *zData; /* Content of the node, as should be on disk */ + RtreeNode *pNext; /* Next node in this hash collision chain */ +}; + +/* Return the number of cells in a node */ +#define NCELL(pNode) readInt16(&(pNode)->zData[2]) + +/* +** A single cell from a node, deserialized +*/ +struct RtreeCell { + i64 iRowid; /* Node or entry ID */ + RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ +}; + + +/* +** This object becomes the sqlite3_user_data() for the SQL functions +** that are created by sqlite3_rtree_geometry_callback() and +** sqlite3_rtree_query_callback() and which appear on the right of MATCH +** operators in order to constrain a search. +** +** xGeom and xQueryFunc are the callback functions. Exactly one of +** xGeom and xQueryFunc fields is non-NULL, depending on whether the +** SQL function was created using sqlite3_rtree_geometry_callback() or +** sqlite3_rtree_query_callback(). +** +** This object is deleted automatically by the destructor mechanism in +** sqlite3_create_function_v2(). +*/ +struct RtreeGeomCallback { + int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); + int (*xQueryFunc)(sqlite3_rtree_query_info*); + void (*xDestructor)(void*); + void *pContext; +}; + +/* +** An instance of this structure (in the form of a BLOB) is returned by +** the SQL functions that sqlite3_rtree_geometry_callback() and +** sqlite3_rtree_query_callback() create, and is read as the right-hand +** operand to the MATCH operator of an R-Tree. +*/ +struct RtreeMatchArg { + u32 iSize; /* Size of this object */ + RtreeGeomCallback cb; /* Info about the callback functions */ + int nParam; /* Number of parameters to the SQL function */ + sqlite3_value **apSqlParam; /* Original SQL parameter values */ + RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ +}; + +#ifndef MAX +# define MAX(x,y) ((x) < (y) ? (y) : (x)) +#endif +#ifndef MIN +# define MIN(x,y) ((x) > (y) ? (y) : (x)) +#endif + +/* What version of GCC is being used. 0 means GCC is not being used . +** Note that the GCC_VERSION macro will also be set correctly when using +** clang, since clang works hard to be gcc compatible. So the gcc +** optimizations will also work when compiling with clang. +*/ +#ifndef GCC_VERSION +#if defined(__GNUC__) && !defined(SQLITE_DISABLE_INTRINSIC) +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif +#endif + +/* The testcase() macro should already be defined in the amalgamation. If +** it is not, make it a no-op. +*/ +#ifndef SQLITE_AMALGAMATION +# if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) + unsigned int sqlite3RtreeTestcase = 0; +# define testcase(X) if( X ){ sqlite3RtreeTestcase += __LINE__; } +# else +# define testcase(X) +# endif +#endif + +/* +** Make sure that the compiler intrinsics we desire are enabled when +** compiling with an appropriate version of MSVC unless prevented by +** the SQLITE_DISABLE_INTRINSIC define. +*/ +#if !defined(SQLITE_DISABLE_INTRINSIC) +# if defined(_MSC_VER) && _MSC_VER>=1400 +# if !defined(_WIN32_WCE) +/* # include */ +# pragma intrinsic(_byteswap_ulong) +# pragma intrinsic(_byteswap_uint64) +# else +/* # include */ +# endif +# endif +#endif + +/* +** Macros to determine whether the machine is big or little endian, +** and whether or not that determination is run-time or compile-time. +** +** For best performance, an attempt is made to guess at the byte-order +** using C-preprocessor macros. If that is unsuccessful, or if +** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined +** at run-time. +*/ +#ifndef SQLITE_BYTEORDER +#if defined(i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ + defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \ + defined(__arm__) +# define SQLITE_BYTEORDER 1234 +#elif defined(sparc) || defined(__ppc__) +# define SQLITE_BYTEORDER 4321 +#else +# define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */ +#endif +#endif + + +/* What version of MSVC is being used. 0 means MSVC is not being used */ +#ifndef MSVC_VERSION +#if defined(_MSC_VER) && !defined(SQLITE_DISABLE_INTRINSIC) +# define MSVC_VERSION _MSC_VER +#else +# define MSVC_VERSION 0 +#endif +#endif + +/* +** Functions to deserialize a 16 bit integer, 32 bit real number and +** 64 bit integer. The deserialized value is returned. +*/ +static int readInt16(u8 *p){ + return (p[0]<<8) + p[1]; +} +static void readCoord(u8 *p, RtreeCoord *pCoord){ + assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ +#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + pCoord->u = _byteswap_ulong(*(u32*)p); +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 + pCoord->u = __builtin_bswap32(*(u32*)p); +#elif SQLITE_BYTEORDER==4321 + pCoord->u = *(u32*)p; +#else + pCoord->u = ( + (((u32)p[0]) << 24) + + (((u32)p[1]) << 16) + + (((u32)p[2]) << 8) + + (((u32)p[3]) << 0) + ); +#endif +} +static i64 readInt64(u8 *p){ +#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + u64 x; + memcpy(&x, p, 8); + return (i64)_byteswap_uint64(x); +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 + u64 x; + memcpy(&x, p, 8); + return (i64)__builtin_bswap64(x); +#elif SQLITE_BYTEORDER==4321 + i64 x; + memcpy(&x, p, 8); + return x; +#else + return (i64)( + (((u64)p[0]) << 56) + + (((u64)p[1]) << 48) + + (((u64)p[2]) << 40) + + (((u64)p[3]) << 32) + + (((u64)p[4]) << 24) + + (((u64)p[5]) << 16) + + (((u64)p[6]) << 8) + + (((u64)p[7]) << 0) + ); +#endif +} + +/* +** Functions to serialize a 16 bit integer, 32 bit real number and +** 64 bit integer. The value returned is the number of bytes written +** to the argument buffer (always 2, 4 and 8 respectively). +*/ +static void writeInt16(u8 *p, int i){ + p[0] = (i>> 8)&0xFF; + p[1] = (i>> 0)&0xFF; +} +static int writeCoord(u8 *p, RtreeCoord *pCoord){ + u32 i; + assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ + assert( sizeof(RtreeCoord)==4 ); + assert( sizeof(u32)==4 ); +#if SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 + i = __builtin_bswap32(pCoord->u); + memcpy(p, &i, 4); +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + i = _byteswap_ulong(pCoord->u); + memcpy(p, &i, 4); +#elif SQLITE_BYTEORDER==4321 + i = pCoord->u; + memcpy(p, &i, 4); +#else + i = pCoord->u; + p[0] = (i>>24)&0xFF; + p[1] = (i>>16)&0xFF; + p[2] = (i>> 8)&0xFF; + p[3] = (i>> 0)&0xFF; +#endif + return 4; +} +static int writeInt64(u8 *p, i64 i){ +#if SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 + i = (i64)__builtin_bswap64((u64)i); + memcpy(p, &i, 8); +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + i = (i64)_byteswap_uint64((u64)i); + memcpy(p, &i, 8); +#elif SQLITE_BYTEORDER==4321 + memcpy(p, &i, 8); +#else + p[0] = (i>>56)&0xFF; + p[1] = (i>>48)&0xFF; + p[2] = (i>>40)&0xFF; + p[3] = (i>>32)&0xFF; + p[4] = (i>>24)&0xFF; + p[5] = (i>>16)&0xFF; + p[6] = (i>> 8)&0xFF; + p[7] = (i>> 0)&0xFF; +#endif + return 8; +} + +/* +** Increment the reference count of node p. +*/ +static void nodeReference(RtreeNode *p){ + if( p ){ + assert( p->nRef>0 ); + p->nRef++; + } +} + +/* +** Clear the content of node p (set all bytes to 0x00). +*/ +static void nodeZero(Rtree *pRtree, RtreeNode *p){ + memset(&p->zData[2], 0, pRtree->iNodeSize-2); + p->isDirty = 1; +} + +/* +** Given a node number iNode, return the corresponding key to use +** in the Rtree.aHash table. +*/ +static unsigned int nodeHash(i64 iNode){ + return ((unsigned)iNode) % HASHSIZE; +} + +/* +** Search the node hash table for node iNode. If found, return a pointer +** to it. Otherwise, return 0. +*/ +static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ + RtreeNode *p; + for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); + return p; +} + +/* +** Add node pNode to the node hash table. +*/ +static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ + int iHash; + assert( pNode->pNext==0 ); + iHash = nodeHash(pNode->iNode); + pNode->pNext = pRtree->aHash[iHash]; + pRtree->aHash[iHash] = pNode; +} + +/* +** Remove node pNode from the node hash table. +*/ +static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){ + RtreeNode **pp; + if( pNode->iNode!=0 ){ + pp = &pRtree->aHash[nodeHash(pNode->iNode)]; + for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } + *pp = pNode->pNext; + pNode->pNext = 0; + } +} + +/* +** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), +** indicating that node has not yet been assigned a node number. It is +** assigned a node number when nodeWrite() is called to write the +** node contents out to the database. +*/ +static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ + RtreeNode *pNode; + pNode = (RtreeNode *)sqlite3_malloc64(sizeof(RtreeNode) + pRtree->iNodeSize); + if( pNode ){ + memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pRtree->nNodeRef++; + pNode->pParent = pParent; + pNode->isDirty = 1; + nodeReference(pParent); + } + return pNode; +} + +/* +** Clear the Rtree.pNodeBlob object +*/ +static void nodeBlobReset(Rtree *pRtree){ + if( pRtree->pNodeBlob && pRtree->inWrTrans==0 && pRtree->nCursor==0 ){ + sqlite3_blob *pBlob = pRtree->pNodeBlob; + pRtree->pNodeBlob = 0; + sqlite3_blob_close(pBlob); + } +} + +/* +** Obtain a reference to an r-tree node. +*/ +static int nodeAcquire( + Rtree *pRtree, /* R-tree structure */ + i64 iNode, /* Node number to load */ + RtreeNode *pParent, /* Either the parent node or NULL */ + RtreeNode **ppNode /* OUT: Acquired node */ +){ + int rc = SQLITE_OK; + RtreeNode *pNode = 0; + + /* Check if the requested node is already in the hash table. If so, + ** increase its reference count and return it. + */ + if( (pNode = nodeHashLookup(pRtree, iNode))!=0 ){ + if( pParent && pParent!=pNode->pParent ){ + RTREE_IS_CORRUPT(pRtree); + return SQLITE_CORRUPT_VTAB; + } + pNode->nRef++; + *ppNode = pNode; + return SQLITE_OK; + } + + if( pRtree->pNodeBlob ){ + sqlite3_blob *pBlob = pRtree->pNodeBlob; + pRtree->pNodeBlob = 0; + rc = sqlite3_blob_reopen(pBlob, iNode); + pRtree->pNodeBlob = pBlob; + if( rc ){ + nodeBlobReset(pRtree); + if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM; + } + } + if( pRtree->pNodeBlob==0 ){ + char *zTab = sqlite3_mprintf("%s_node", pRtree->zName); + if( zTab==0 ) return SQLITE_NOMEM; + rc = sqlite3_blob_open(pRtree->db, pRtree->zDb, zTab, "data", iNode, 0, + &pRtree->pNodeBlob); + sqlite3_free(zTab); + } + if( rc ){ + nodeBlobReset(pRtree); + *ppNode = 0; + /* If unable to open an sqlite3_blob on the desired row, that can only + ** be because the shadow tables hold erroneous data. */ + if( rc==SQLITE_ERROR ){ + rc = SQLITE_CORRUPT_VTAB; + RTREE_IS_CORRUPT(pRtree); + } + }else if( pRtree->iNodeSize==sqlite3_blob_bytes(pRtree->pNodeBlob) ){ + pNode = (RtreeNode *)sqlite3_malloc64(sizeof(RtreeNode)+pRtree->iNodeSize); + if( !pNode ){ + rc = SQLITE_NOMEM; + }else{ + pNode->pParent = pParent; + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pRtree->nNodeRef++; + pNode->iNode = iNode; + pNode->isDirty = 0; + pNode->pNext = 0; + rc = sqlite3_blob_read(pRtree->pNodeBlob, pNode->zData, + pRtree->iNodeSize, 0); + } + } + + /* If the root node was just loaded, set pRtree->iDepth to the height + ** of the r-tree structure. A height of zero means all data is stored on + ** the root node. A height of one means the children of the root node + ** are the leaves, and so on. If the depth as specified on the root node + ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. + */ + if( rc==SQLITE_OK && pNode && iNode==1 ){ + pRtree->iDepth = readInt16(pNode->zData); + if( pRtree->iDepth>RTREE_MAX_DEPTH ){ + rc = SQLITE_CORRUPT_VTAB; + RTREE_IS_CORRUPT(pRtree); + } + } + + /* If no error has occurred so far, check if the "number of entries" + ** field on the node is too large. If so, set the return code to + ** SQLITE_CORRUPT_VTAB. + */ + if( pNode && rc==SQLITE_OK ){ + if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ + rc = SQLITE_CORRUPT_VTAB; + RTREE_IS_CORRUPT(pRtree); + } + } + + if( rc==SQLITE_OK ){ + if( pNode!=0 ){ + nodeReference(pParent); + nodeHashInsert(pRtree, pNode); + }else{ + rc = SQLITE_CORRUPT_VTAB; + RTREE_IS_CORRUPT(pRtree); + } + *ppNode = pNode; + }else{ + if( pNode ){ + pRtree->nNodeRef--; + sqlite3_free(pNode); + } + *ppNode = 0; + } + + return rc; +} + +/* +** Overwrite cell iCell of node pNode with the contents of pCell. +*/ +static void nodeOverwriteCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node into which the cell is to be written */ + RtreeCell *pCell, /* The cell to write */ + int iCell /* Index into pNode into which pCell is written */ +){ + int ii; + u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; + p += writeInt64(p, pCell->iRowid); + for(ii=0; iinDim2; ii++){ + p += writeCoord(p, &pCell->aCoord[ii]); + } + pNode->isDirty = 1; +} + +/* +** Remove the cell with index iCell from node pNode. +*/ +static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ + u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; + u8 *pSrc = &pDst[pRtree->nBytesPerCell]; + int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; + memmove(pDst, pSrc, nByte); + writeInt16(&pNode->zData[2], NCELL(pNode)-1); + pNode->isDirty = 1; +} + +/* +** Insert the contents of cell pCell into node pNode. If the insert +** is successful, return SQLITE_OK. +** +** If there is not enough free space in pNode, return SQLITE_FULL. +*/ +static int nodeInsertCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* Write new cell into this node */ + RtreeCell *pCell /* The cell to be inserted */ +){ + int nCell; /* Current number of cells in pNode */ + int nMaxCell; /* Maximum number of cells for pNode */ + + nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; + nCell = NCELL(pNode); + + assert( nCell<=nMaxCell ); + if( nCellzData[2], nCell+1); + pNode->isDirty = 1; + } + + return (nCell==nMaxCell); +} + +/* +** If the node is dirty, write it out to the database. +*/ +static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ + int rc = SQLITE_OK; + if( pNode->isDirty ){ + sqlite3_stmt *p = pRtree->pWriteNode; + if( pNode->iNode ){ + sqlite3_bind_int64(p, 1, pNode->iNode); + }else{ + sqlite3_bind_null(p, 1); + } + sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); + sqlite3_step(p); + pNode->isDirty = 0; + rc = sqlite3_reset(p); + sqlite3_bind_null(p, 2); + if( pNode->iNode==0 && rc==SQLITE_OK ){ + pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); + nodeHashInsert(pRtree, pNode); + } + } + return rc; +} + +/* +** Release a reference to a node. If the node is dirty and the reference +** count drops to zero, the node data is written to the database. +*/ +static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ + int rc = SQLITE_OK; + if( pNode ){ + assert( pNode->nRef>0 ); + assert( pRtree->nNodeRef>0 ); + pNode->nRef--; + if( pNode->nRef==0 ){ + pRtree->nNodeRef--; + if( pNode->iNode==1 ){ + pRtree->iDepth = -1; + } + if( pNode->pParent ){ + rc = nodeRelease(pRtree, pNode->pParent); + } + if( rc==SQLITE_OK ){ + rc = nodeWrite(pRtree, pNode); + } + nodeHashDelete(pRtree, pNode); + sqlite3_free(pNode); + } + } + return rc; +} + +/* +** Return the 64-bit integer value associated with cell iCell of +** node pNode. If pNode is a leaf node, this is a rowid. If it is +** an internal node, then the 64-bit integer is a child page number. +*/ +static i64 nodeGetRowid( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node from which to extract the ID */ + int iCell /* The cell index from which to extract the ID */ +){ + assert( iCellzData[4 + pRtree->nBytesPerCell*iCell]); +} + +/* +** Return coordinate iCoord from cell iCell in node pNode. +*/ +static void nodeGetCoord( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node from which to extract a coordinate */ + int iCell, /* The index of the cell within the node */ + int iCoord, /* Which coordinate to extract */ + RtreeCoord *pCoord /* OUT: Space to write result to */ +){ + readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); +} + +/* +** Deserialize cell iCell of node pNode. Populate the structure pointed +** to by pCell with the results. +*/ +static void nodeGetCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node containing the cell to be read */ + int iCell, /* Index of the cell within the node */ + RtreeCell *pCell /* OUT: Write the cell contents here */ +){ + u8 *pData; + RtreeCoord *pCoord; + int ii = 0; + pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); + pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); + pCoord = pCell->aCoord; + do{ + readCoord(pData, &pCoord[ii]); + readCoord(pData+4, &pCoord[ii+1]); + pData += 8; + ii += 2; + }while( iinDim2 ); +} + + +/* Forward declaration for the function that does the work of +** the virtual table module xCreate() and xConnect() methods. +*/ +static int rtreeInit( + sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int +); + +/* +** Rtree virtual table module xCreate method. +*/ +static int rtreeCreate( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); +} + +/* +** Rtree virtual table module xConnect method. +*/ +static int rtreeConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); +} + +/* +** Increment the r-tree reference count. +*/ +static void rtreeReference(Rtree *pRtree){ + pRtree->nBusy++; +} + +/* +** Decrement the r-tree reference count. When the reference count reaches +** zero the structure is deleted. +*/ +static void rtreeRelease(Rtree *pRtree){ + pRtree->nBusy--; + if( pRtree->nBusy==0 ){ + pRtree->inWrTrans = 0; + assert( pRtree->nCursor==0 ); + nodeBlobReset(pRtree); + assert( pRtree->nNodeRef==0 || pRtree->bCorrupt ); + sqlite3_finalize(pRtree->pWriteNode); + sqlite3_finalize(pRtree->pDeleteNode); + sqlite3_finalize(pRtree->pReadRowid); + sqlite3_finalize(pRtree->pWriteRowid); + sqlite3_finalize(pRtree->pDeleteRowid); + sqlite3_finalize(pRtree->pReadParent); + sqlite3_finalize(pRtree->pWriteParent); + sqlite3_finalize(pRtree->pDeleteParent); + sqlite3_finalize(pRtree->pWriteAux); + sqlite3_free(pRtree->zReadAuxSql); + sqlite3_free(pRtree); + } +} + +/* +** Rtree virtual table module xDisconnect method. +*/ +static int rtreeDisconnect(sqlite3_vtab *pVtab){ + rtreeRelease((Rtree *)pVtab); + return SQLITE_OK; +} + +/* +** Rtree virtual table module xDestroy method. +*/ +static int rtreeDestroy(sqlite3_vtab *pVtab){ + Rtree *pRtree = (Rtree *)pVtab; + int rc; + char *zCreate = sqlite3_mprintf( + "DROP TABLE '%q'.'%q_node';" + "DROP TABLE '%q'.'%q_rowid';" + "DROP TABLE '%q'.'%q_parent';", + pRtree->zDb, pRtree->zName, + pRtree->zDb, pRtree->zName, + pRtree->zDb, pRtree->zName + ); + if( !zCreate ){ + rc = SQLITE_NOMEM; + }else{ + nodeBlobReset(pRtree); + rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); + sqlite3_free(zCreate); + } + if( rc==SQLITE_OK ){ + rtreeRelease(pRtree); + } + + return rc; +} + +/* +** Rtree virtual table module xOpen method. +*/ +static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + int rc = SQLITE_NOMEM; + Rtree *pRtree = (Rtree *)pVTab; + RtreeCursor *pCsr; + + pCsr = (RtreeCursor *)sqlite3_malloc64(sizeof(RtreeCursor)); + if( pCsr ){ + memset(pCsr, 0, sizeof(RtreeCursor)); + pCsr->base.pVtab = pVTab; + rc = SQLITE_OK; + pRtree->nCursor++; + } + *ppCursor = (sqlite3_vtab_cursor *)pCsr; + + return rc; +} + + +/* +** Reset a cursor back to its initial state. +*/ +static void resetCursor(RtreeCursor *pCsr){ + Rtree *pRtree = (Rtree *)(pCsr->base.pVtab); + int ii; + sqlite3_stmt *pStmt; + if( pCsr->aConstraint ){ + int i; /* Used to iterate through constraint array */ + for(i=0; inConstraint; i++){ + sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; + if( pInfo ){ + if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); + sqlite3_free(pInfo); + } + } + sqlite3_free(pCsr->aConstraint); + pCsr->aConstraint = 0; + } + for(ii=0; iiaNode[ii]); + sqlite3_free(pCsr->aPoint); + pStmt = pCsr->pReadAux; + memset(pCsr, 0, sizeof(RtreeCursor)); + pCsr->base.pVtab = (sqlite3_vtab*)pRtree; + pCsr->pReadAux = pStmt; + +} + +/* +** Rtree virtual table module xClose method. +*/ +static int rtreeClose(sqlite3_vtab_cursor *cur){ + Rtree *pRtree = (Rtree *)(cur->pVtab); + RtreeCursor *pCsr = (RtreeCursor *)cur; + assert( pRtree->nCursor>0 ); + resetCursor(pCsr); + sqlite3_finalize(pCsr->pReadAux); + sqlite3_free(pCsr); + pRtree->nCursor--; + nodeBlobReset(pRtree); + return SQLITE_OK; +} + +/* +** Rtree virtual table module xEof method. +** +** Return non-zero if the cursor does not currently point to a valid +** record (i.e if the scan has finished), or zero otherwise. +*/ +static int rtreeEof(sqlite3_vtab_cursor *cur){ + RtreeCursor *pCsr = (RtreeCursor *)cur; + return pCsr->atEOF; +} + +/* +** Convert raw bits from the on-disk RTree record into a coordinate value. +** The on-disk format is big-endian and needs to be converted for little- +** endian platforms. The on-disk record stores integer coordinates if +** eInt is true and it stores 32-bit floating point records if eInt is +** false. a[] is the four bytes of the on-disk record to be decoded. +** Store the results in "r". +** +** There are five versions of this macro. The last one is generic. The +** other four are various architectures-specific optimizations. +*/ +#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = _byteswap_ulong(*(u32*)a); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = __builtin_bswap32(*(u32*)a); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif SQLITE_BYTEORDER==1234 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + memcpy(&c.u,a,4); \ + c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ + ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif SQLITE_BYTEORDER==4321 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + memcpy(&c.u,a,4); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#else +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ + +((u32)a[2]<<8) + a[3]; \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#endif + +/* +** Check the RTree node or entry given by pCellData and p against the MATCH +** constraint pConstraint. +*/ +static int rtreeCallbackConstraint( + RtreeConstraint *pConstraint, /* The constraint to test */ + int eInt, /* True if RTree holding integer coordinates */ + u8 *pCellData, /* Raw cell content */ + RtreeSearchPoint *pSearch, /* Container of this cell */ + sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ + int *peWithin /* OUT: visibility of the cell */ +){ + sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ + int nCoord = pInfo->nCoord; /* No. of coordinates */ + int rc; /* Callback return code */ + RtreeCoord c; /* Translator union */ + sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ + + assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); + assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); + + if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ + pInfo->iRowid = readInt64(pCellData); + } + pCellData += 8; +#ifndef SQLITE_RTREE_INT_ONLY + if( eInt==0 ){ + switch( nCoord ){ + case 10: readCoord(pCellData+36, &c); aCoord[9] = c.f; + readCoord(pCellData+32, &c); aCoord[8] = c.f; + case 8: readCoord(pCellData+28, &c); aCoord[7] = c.f; + readCoord(pCellData+24, &c); aCoord[6] = c.f; + case 6: readCoord(pCellData+20, &c); aCoord[5] = c.f; + readCoord(pCellData+16, &c); aCoord[4] = c.f; + case 4: readCoord(pCellData+12, &c); aCoord[3] = c.f; + readCoord(pCellData+8, &c); aCoord[2] = c.f; + default: readCoord(pCellData+4, &c); aCoord[1] = c.f; + readCoord(pCellData, &c); aCoord[0] = c.f; + } + }else +#endif + { + switch( nCoord ){ + case 10: readCoord(pCellData+36, &c); aCoord[9] = c.i; + readCoord(pCellData+32, &c); aCoord[8] = c.i; + case 8: readCoord(pCellData+28, &c); aCoord[7] = c.i; + readCoord(pCellData+24, &c); aCoord[6] = c.i; + case 6: readCoord(pCellData+20, &c); aCoord[5] = c.i; + readCoord(pCellData+16, &c); aCoord[4] = c.i; + case 4: readCoord(pCellData+12, &c); aCoord[3] = c.i; + readCoord(pCellData+8, &c); aCoord[2] = c.i; + default: readCoord(pCellData+4, &c); aCoord[1] = c.i; + readCoord(pCellData, &c); aCoord[0] = c.i; + } + } + if( pConstraint->op==RTREE_MATCH ){ + int eWithin = 0; + rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, + nCoord, aCoord, &eWithin); + if( eWithin==0 ) *peWithin = NOT_WITHIN; + *prScore = RTREE_ZERO; + }else{ + pInfo->aCoord = aCoord; + pInfo->iLevel = pSearch->iLevel - 1; + pInfo->rScore = pInfo->rParentScore = pSearch->rScore; + pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; + rc = pConstraint->u.xQueryFunc(pInfo); + if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; + if( pInfo->rScore<*prScore || *prScorerScore; + } + } + return rc; +} + +/* +** Check the internal RTree node given by pCellData against constraint p. +** If this constraint cannot be satisfied by any child within the node, +** set *peWithin to NOT_WITHIN. +*/ +static void rtreeNonleafConstraint( + RtreeConstraint *p, /* The constraint to test */ + int eInt, /* True if RTree holds integer coordinates */ + u8 *pCellData, /* Raw cell content as appears on disk */ + int *peWithin /* Adjust downward, as appropriate */ +){ + sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ + + /* p->iCoord might point to either a lower or upper bound coordinate + ** in a coordinate pair. But make pCellData point to the lower bound. + */ + pCellData += 8 + 4*(p->iCoord&0xfe); + + assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE + || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE + || p->op==RTREE_FALSE ); + assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ + switch( p->op ){ + case RTREE_TRUE: return; /* Always satisfied */ + case RTREE_FALSE: break; /* Never satisfied */ + case RTREE_EQ: + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the lower bound of the coordinate pair */ + if( p->u.rValue>=val ){ + pCellData += 4; + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the upper bound of the coordinate pair */ + if( p->u.rValue<=val ) return; + } + break; + case RTREE_LE: + case RTREE_LT: + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the lower bound of the coordinate pair */ + if( p->u.rValue>=val ) return; + break; + + default: + pCellData += 4; + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the upper bound of the coordinate pair */ + if( p->u.rValue<=val ) return; + break; + } + *peWithin = NOT_WITHIN; +} + +/* +** Check the leaf RTree cell given by pCellData against constraint p. +** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. +** If the constraint is satisfied, leave *peWithin unchanged. +** +** The constraint is of the form: xN op $val +** +** The op is given by p->op. The xN is p->iCoord-th coordinate in +** pCellData. $val is given by p->u.rValue. +*/ +static void rtreeLeafConstraint( + RtreeConstraint *p, /* The constraint to test */ + int eInt, /* True if RTree holds integer coordinates */ + u8 *pCellData, /* Raw cell content as appears on disk */ + int *peWithin /* Adjust downward, as appropriate */ +){ + RtreeDValue xN; /* Coordinate value converted to a double */ + + assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE + || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE + || p->op==RTREE_FALSE ); + pCellData += 8 + p->iCoord*4; + assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ + RTREE_DECODE_COORD(eInt, pCellData, xN); + switch( p->op ){ + case RTREE_TRUE: return; /* Always satisfied */ + case RTREE_FALSE: break; /* Never satisfied */ + case RTREE_LE: if( xN <= p->u.rValue ) return; break; + case RTREE_LT: if( xN < p->u.rValue ) return; break; + case RTREE_GE: if( xN >= p->u.rValue ) return; break; + case RTREE_GT: if( xN > p->u.rValue ) return; break; + default: if( xN == p->u.rValue ) return; break; + } + *peWithin = NOT_WITHIN; +} + +/* +** One of the cells in node pNode is guaranteed to have a 64-bit +** integer value equal to iRowid. Return the index of this cell. +*/ +static int nodeRowidIndex( + Rtree *pRtree, + RtreeNode *pNode, + i64 iRowid, + int *piIndex +){ + int ii; + int nCell = NCELL(pNode); + assert( nCell<200 ); + for(ii=0; iipParent; + if( ALWAYS(pParent) ){ + return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); + }else{ + *piIndex = -1; + return SQLITE_OK; + } +} + +/* +** Compare two search points. Return negative, zero, or positive if the first +** is less than, equal to, or greater than the second. +** +** The rScore is the primary key. Smaller rScore values come first. +** If the rScore is a tie, then use iLevel as the tie breaker with smaller +** iLevel values coming first. In this way, if rScore is the same for all +** SearchPoints, then iLevel becomes the deciding factor and the result +** is a depth-first search, which is the desired default behavior. +*/ +static int rtreeSearchPointCompare( + const RtreeSearchPoint *pA, + const RtreeSearchPoint *pB +){ + if( pA->rScorerScore ) return -1; + if( pA->rScore>pB->rScore ) return +1; + if( pA->iLeveliLevel ) return -1; + if( pA->iLevel>pB->iLevel ) return +1; + return 0; +} + +/* +** Interchange two search points in a cursor. +*/ +static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ + RtreeSearchPoint t = p->aPoint[i]; + assert( iaPoint[i] = p->aPoint[j]; + p->aPoint[j] = t; + i++; j++; + if( i=RTREE_CACHE_SZ ){ + nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); + p->aNode[i] = 0; + }else{ + RtreeNode *pTemp = p->aNode[i]; + p->aNode[i] = p->aNode[j]; + p->aNode[j] = pTemp; + } + } +} + +/* +** Return the search point with the lowest current score. +*/ +static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ + return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; +} + +/* +** Get the RtreeNode for the search point with the lowest score. +*/ +static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ + sqlite3_int64 id; + int ii = 1 - pCur->bPoint; + assert( ii==0 || ii==1 ); + assert( pCur->bPoint || pCur->nPoint ); + if( pCur->aNode[ii]==0 ){ + assert( pRC!=0 ); + id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; + *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); + } + return pCur->aNode[ii]; +} + +/* +** Push a new element onto the priority queue +*/ +static RtreeSearchPoint *rtreeEnqueue( + RtreeCursor *pCur, /* The cursor */ + RtreeDValue rScore, /* Score for the new search point */ + u8 iLevel /* Level for the new search point */ +){ + int i, j; + RtreeSearchPoint *pNew; + if( pCur->nPoint>=pCur->nPointAlloc ){ + int nNew = pCur->nPointAlloc*2 + 8; + pNew = sqlite3_realloc64(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); + if( pNew==0 ) return 0; + pCur->aPoint = pNew; + pCur->nPointAlloc = nNew; + } + i = pCur->nPoint++; + pNew = pCur->aPoint + i; + pNew->rScore = rScore; + pNew->iLevel = iLevel; + assert( iLevel<=RTREE_MAX_DEPTH ); + while( i>0 ){ + RtreeSearchPoint *pParent; + j = (i-1)/2; + pParent = pCur->aPoint + j; + if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; + rtreeSearchPointSwap(pCur, j, i); + i = j; + pNew = pParent; + } + return pNew; +} + +/* +** Allocate a new RtreeSearchPoint and return a pointer to it. Return +** NULL if malloc fails. +*/ +static RtreeSearchPoint *rtreeSearchPointNew( + RtreeCursor *pCur, /* The cursor */ + RtreeDValue rScore, /* Score for the new search point */ + u8 iLevel /* Level for the new search point */ +){ + RtreeSearchPoint *pNew, *pFirst; + pFirst = rtreeSearchPointFirst(pCur); + pCur->anQueue[iLevel]++; + if( pFirst==0 + || pFirst->rScore>rScore + || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) + ){ + if( pCur->bPoint ){ + int ii; + pNew = rtreeEnqueue(pCur, rScore, iLevel); + if( pNew==0 ) return 0; + ii = (int)(pNew - pCur->aPoint) + 1; + assert( ii==1 ); + if( ALWAYS(iiaNode[ii]==0 ); + pCur->aNode[ii] = pCur->aNode[0]; + }else{ + nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); + } + pCur->aNode[0] = 0; + *pNew = pCur->sPoint; + } + pCur->sPoint.rScore = rScore; + pCur->sPoint.iLevel = iLevel; + pCur->bPoint = 1; + return &pCur->sPoint; + }else{ + return rtreeEnqueue(pCur, rScore, iLevel); + } +} + +#if 0 +/* Tracing routines for the RtreeSearchPoint queue */ +static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ + if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } + printf(" %d.%05lld.%02d %g %d", + p->iLevel, p->id, p->iCell, p->rScore, p->eWithin + ); + idx++; + if( idxaNode[idx]); + }else{ + printf("\n"); + } +} +static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ + int ii; + printf("=== %9s ", zPrefix); + if( pCur->bPoint ){ + tracePoint(&pCur->sPoint, -1, pCur); + } + for(ii=0; iinPoint; ii++){ + if( ii>0 || pCur->bPoint ) printf(" "); + tracePoint(&pCur->aPoint[ii], ii, pCur); + } +} +# define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) +#else +# define RTREE_QUEUE_TRACE(A,B) /* no-op */ +#endif + +/* Remove the search point with the lowest current score. +*/ +static void rtreeSearchPointPop(RtreeCursor *p){ + int i, j, k, n; + i = 1 - p->bPoint; + assert( i==0 || i==1 ); + if( p->aNode[i] ){ + nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); + p->aNode[i] = 0; + } + if( p->bPoint ){ + p->anQueue[p->sPoint.iLevel]--; + p->bPoint = 0; + }else if( ALWAYS(p->nPoint) ){ + p->anQueue[p->aPoint[0].iLevel]--; + n = --p->nPoint; + p->aPoint[0] = p->aPoint[n]; + if( naNode[1] = p->aNode[n+1]; + p->aNode[n+1] = 0; + } + i = 0; + while( (j = i*2+1)aPoint[k], &p->aPoint[j])<0 ){ + if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ + rtreeSearchPointSwap(p, i, k); + i = k; + }else{ + break; + } + }else{ + if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ + rtreeSearchPointSwap(p, i, j); + i = j; + }else{ + break; + } + } + } + } +} + + +/* +** Continue the search on cursor pCur until the front of the queue +** contains an entry suitable for returning as a result-set row, +** or until the RtreeSearchPoint queue is empty, indicating that the +** query has completed. +*/ +static int rtreeStepToLeaf(RtreeCursor *pCur){ + RtreeSearchPoint *p; + Rtree *pRtree = RTREE_OF_CURSOR(pCur); + RtreeNode *pNode; + int eWithin; + int rc = SQLITE_OK; + int nCell; + int nConstraint = pCur->nConstraint; + int ii; + int eInt; + RtreeSearchPoint x; + + eInt = pRtree->eCoordType==RTREE_COORD_INT32; + while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ + u8 *pCellData; + pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); + if( rc ) return rc; + nCell = NCELL(pNode); + assert( nCell<200 ); + pCellData = pNode->zData + (4+pRtree->nBytesPerCell*p->iCell); + while( p->iCellaConstraint + ii; + if( pConstraint->op>=RTREE_MATCH ){ + rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, + &rScore, &eWithin); + if( rc ) return rc; + }else if( p->iLevel==1 ){ + rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); + }else{ + rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); + } + if( eWithin==NOT_WITHIN ){ + p->iCell++; + pCellData += pRtree->nBytesPerCell; + break; + } + } + if( eWithin==NOT_WITHIN ) continue; + p->iCell++; + x.iLevel = p->iLevel - 1; + if( x.iLevel ){ + x.id = readInt64(pCellData); + for(ii=0; iinPoint; ii++){ + if( pCur->aPoint[ii].id==x.id ){ + RTREE_IS_CORRUPT(pRtree); + return SQLITE_CORRUPT_VTAB; + } + } + x.iCell = 0; + }else{ + x.id = p->id; + x.iCell = p->iCell - 1; + } + if( p->iCell>=nCell ){ + RTREE_QUEUE_TRACE(pCur, "POP-S:"); + rtreeSearchPointPop(pCur); + } + if( rScoreeWithin = (u8)eWithin; + p->id = x.id; + p->iCell = x.iCell; + RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); + break; + } + if( p->iCell>=nCell ){ + RTREE_QUEUE_TRACE(pCur, "POP-Se:"); + rtreeSearchPointPop(pCur); + } + } + pCur->atEOF = p==0; + return SQLITE_OK; +} + +/* +** Rtree virtual table module xNext method. +*/ +static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + int rc = SQLITE_OK; + + /* Move to the next entry that matches the configured constraints. */ + RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); + if( pCsr->bAuxValid ){ + pCsr->bAuxValid = 0; + sqlite3_reset(pCsr->pReadAux); + } + rtreeSearchPointPop(pCsr); + rc = rtreeStepToLeaf(pCsr); + return rc; +} + +/* +** Rtree virtual table module xRowid method. +*/ +static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + if( rc==SQLITE_OK && ALWAYS(p) ){ + *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); + } + return rc; +} + +/* +** Rtree virtual table module xColumn method. +*/ +static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + Rtree *pRtree = (Rtree *)cur->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)cur; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + RtreeCoord c; + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + + if( rc ) return rc; + if( NEVER(p==0) ) return SQLITE_OK; + if( i==0 ){ + sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); + }else if( i<=pRtree->nDim2 ){ + nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + sqlite3_result_double(ctx, c.f); + }else +#endif + { + assert( pRtree->eCoordType==RTREE_COORD_INT32 ); + sqlite3_result_int(ctx, c.i); + } + }else{ + if( !pCsr->bAuxValid ){ + if( pCsr->pReadAux==0 ){ + rc = sqlite3_prepare_v3(pRtree->db, pRtree->zReadAuxSql, -1, 0, + &pCsr->pReadAux, 0); + if( rc ) return rc; + } + sqlite3_bind_int64(pCsr->pReadAux, 1, + nodeGetRowid(pRtree, pNode, p->iCell)); + rc = sqlite3_step(pCsr->pReadAux); + if( rc==SQLITE_ROW ){ + pCsr->bAuxValid = 1; + }else{ + sqlite3_reset(pCsr->pReadAux); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + return rc; + } + } + sqlite3_result_value(ctx, + sqlite3_column_value(pCsr->pReadAux, i - pRtree->nDim2 + 1)); + } + return SQLITE_OK; +} + +/* +** Use nodeAcquire() to obtain the leaf node containing the record with +** rowid iRowid. If successful, set *ppLeaf to point to the node and +** return SQLITE_OK. If there is no such record in the table, set +** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf +** to zero and return an SQLite error code. +*/ +static int findLeafNode( + Rtree *pRtree, /* RTree to search */ + i64 iRowid, /* The rowid searching for */ + RtreeNode **ppLeaf, /* Write the node here */ + sqlite3_int64 *piNode /* Write the node-id here */ +){ + int rc; + *ppLeaf = 0; + sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); + if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ + i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); + if( piNode ) *piNode = iNode; + rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); + sqlite3_reset(pRtree->pReadRowid); + }else{ + rc = sqlite3_reset(pRtree->pReadRowid); + } + return rc; +} + +/* +** This function is called to configure the RtreeConstraint object passed +** as the second argument for a MATCH constraint. The value passed as the +** first argument to this function is the right-hand operand to the MATCH +** operator. +*/ +static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ + RtreeMatchArg *pBlob, *pSrc; /* BLOB returned by geometry function */ + sqlite3_rtree_query_info *pInfo; /* Callback information */ + + pSrc = sqlite3_value_pointer(pValue, "RtreeMatchArg"); + if( pSrc==0 ) return SQLITE_ERROR; + pInfo = (sqlite3_rtree_query_info*) + sqlite3_malloc64( sizeof(*pInfo)+pSrc->iSize ); + if( !pInfo ) return SQLITE_NOMEM; + memset(pInfo, 0, sizeof(*pInfo)); + pBlob = (RtreeMatchArg*)&pInfo[1]; + memcpy(pBlob, pSrc, pSrc->iSize); + pInfo->pContext = pBlob->cb.pContext; + pInfo->nParam = pBlob->nParam; + pInfo->aParam = pBlob->aParam; + pInfo->apSqlParam = pBlob->apSqlParam; + + if( pBlob->cb.xGeom ){ + pCons->u.xGeom = pBlob->cb.xGeom; + }else{ + pCons->op = RTREE_QUERY; + pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; + } + pCons->pInfo = pInfo; + return SQLITE_OK; +} + +/* +** Rtree virtual table module xFilter method. +*/ +static int rtreeFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeNode *pRoot = 0; + int ii; + int rc = SQLITE_OK; + int iCell = 0; + + rtreeReference(pRtree); + + /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ + resetCursor(pCsr); + + pCsr->iStrategy = idxNum; + if( idxNum==1 ){ + /* Special case - lookup by rowid. */ + RtreeNode *pLeaf; /* Leaf on which the required cell resides */ + RtreeSearchPoint *p; /* Search point for the leaf */ + i64 iRowid = sqlite3_value_int64(argv[0]); + i64 iNode = 0; + int eType = sqlite3_value_numeric_type(argv[0]); + if( eType==SQLITE_INTEGER + || (eType==SQLITE_FLOAT && sqlite3_value_double(argv[0])==iRowid) + ){ + rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); + }else{ + rc = SQLITE_OK; + pLeaf = 0; + } + if( rc==SQLITE_OK && pLeaf!=0 ){ + p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); + assert( p!=0 ); /* Always returns pCsr->sPoint */ + pCsr->aNode[0] = pLeaf; + p->id = iNode; + p->eWithin = PARTLY_WITHIN; + rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); + p->iCell = (u8)iCell; + RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); + }else{ + pCsr->atEOF = 1; + } + }else{ + /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array + ** with the configured constraints. + */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); + if( rc==SQLITE_OK && argc>0 ){ + pCsr->aConstraint = sqlite3_malloc64(sizeof(RtreeConstraint)*argc); + pCsr->nConstraint = argc; + if( !pCsr->aConstraint ){ + rc = SQLITE_NOMEM; + }else{ + memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); + memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); + assert( (idxStr==0 && argc==0) + || (idxStr && (int)strlen(idxStr)==argc*2) ); + for(ii=0; iiaConstraint[ii]; + int eType = sqlite3_value_numeric_type(argv[ii]); + p->op = idxStr[ii*2]; + p->iCoord = idxStr[ii*2+1]-'0'; + if( p->op>=RTREE_MATCH ){ + /* A MATCH operator. The right-hand-side must be a blob that + ** can be cast into an RtreeMatchArg object. One created using + ** an sqlite3_rtree_geometry_callback() SQL user function. + */ + rc = deserializeGeometry(argv[ii], p); + if( rc!=SQLITE_OK ){ + break; + } + p->pInfo->nCoord = pRtree->nDim2; + p->pInfo->anQueue = pCsr->anQueue; + p->pInfo->mxLevel = pRtree->iDepth + 1; + }else if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){ +#ifdef SQLITE_RTREE_INT_ONLY + p->u.rValue = sqlite3_value_int64(argv[ii]); +#else + p->u.rValue = sqlite3_value_double(argv[ii]); +#endif + }else{ + p->u.rValue = RTREE_ZERO; + if( eType==SQLITE_NULL ){ + p->op = RTREE_FALSE; + }else if( p->op==RTREE_LT || p->op==RTREE_LE ){ + p->op = RTREE_TRUE; + }else{ + p->op = RTREE_FALSE; + } + } + } + } + } + if( rc==SQLITE_OK ){ + RtreeSearchPoint *pNew; + assert( pCsr->bPoint==0 ); /* Due to the resetCursor() call above */ + pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, (u8)(pRtree->iDepth+1)); + if( NEVER(pNew==0) ){ /* Because pCsr->bPoint was FALSE */ + return SQLITE_NOMEM; + } + pNew->id = 1; + pNew->iCell = 0; + pNew->eWithin = PARTLY_WITHIN; + assert( pCsr->bPoint==1 ); + pCsr->aNode[0] = pRoot; + pRoot = 0; + RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); + rc = rtreeStepToLeaf(pCsr); + } + } + + nodeRelease(pRtree, pRoot); + rtreeRelease(pRtree); + return rc; +} + +/* +** Rtree virtual table module xBestIndex method. There are three +** table scan strategies to choose from (in order from most to +** least desirable): +** +** idxNum idxStr Strategy +** ------------------------------------------------ +** 1 Unused Direct lookup by rowid. +** 2 See below R-tree query or full-table scan. +** ------------------------------------------------ +** +** If strategy 1 is used, then idxStr is not meaningful. If strategy +** 2 is used, idxStr is formatted to contain 2 bytes for each +** constraint used. The first two bytes of idxStr correspond to +** the constraint in sqlite3_index_info.aConstraintUsage[] with +** (argvIndex==1) etc. +** +** The first of each pair of bytes in idxStr identifies the constraint +** operator as follows: +** +** Operator Byte Value +** ---------------------- +** = 0x41 ('A') +** <= 0x42 ('B') +** < 0x43 ('C') +** >= 0x44 ('D') +** > 0x45 ('E') +** MATCH 0x46 ('F') +** ---------------------- +** +** The second of each pair of bytes identifies the coordinate column +** to which the constraint applies. The leftmost coordinate column +** is 'a', the second from the left 'b' etc. +*/ +static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + Rtree *pRtree = (Rtree*)tab; + int rc = SQLITE_OK; + int ii; + int bMatch = 0; /* True if there exists a MATCH constraint */ + i64 nRow; /* Estimated rows returned by this scan */ + + int iIdx = 0; + char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; + memset(zIdxStr, 0, sizeof(zIdxStr)); + + /* Check if there exists a MATCH constraint - even an unusable one. If there + ** is, do not consider the lookup-by-rowid plan as using such a plan would + ** require the VDBE to evaluate the MATCH constraint, which is not currently + ** possible. */ + for(ii=0; iinConstraint; ii++){ + if( pIdxInfo->aConstraint[ii].op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + bMatch = 1; + } + } + + assert( pIdxInfo->idxStr==0 ); + for(ii=0; iinConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ + struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; + + if( bMatch==0 && p->usable + && p->iColumn<=0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + /* We have an equality constraint on the rowid. Use strategy 1. */ + int jj; + for(jj=0; jjaConstraintUsage[jj].argvIndex = 0; + pIdxInfo->aConstraintUsage[jj].omit = 0; + } + pIdxInfo->idxNum = 1; + pIdxInfo->aConstraintUsage[ii].argvIndex = 1; + pIdxInfo->aConstraintUsage[jj].omit = 1; + + /* This strategy involves a two rowid lookups on an B-Tree structures + ** and then a linear search of an R-Tree node. This should be + ** considered almost as quick as a direct rowid lookup (for which + ** sqlite uses an internal cost of 0.0). It is expected to return + ** a single row. + */ + pIdxInfo->estimatedCost = 30.0; + pIdxInfo->estimatedRows = 1; + pIdxInfo->idxFlags = SQLITE_INDEX_SCAN_UNIQUE; + return SQLITE_OK; + } + + if( p->usable + && ((p->iColumn>0 && p->iColumn<=pRtree->nDim2) + || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) + ){ + u8 op; + switch( p->op ){ + case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; + case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; + case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; + case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; + case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; + case SQLITE_INDEX_CONSTRAINT_MATCH: op = RTREE_MATCH; break; + default: op = 0; break; + } + if( op ){ + zIdxStr[iIdx++] = op; + zIdxStr[iIdx++] = (char)(p->iColumn - 1 + '0'); + pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); + pIdxInfo->aConstraintUsage[ii].omit = 1; + } + } + } + + pIdxInfo->idxNum = 2; + pIdxInfo->needToFreeIdxStr = 1; + if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ + return SQLITE_NOMEM; + } + + nRow = pRtree->nRowEst >> (iIdx/2); + pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; + pIdxInfo->estimatedRows = nRow; + + return rc; +} + +/* +** Return the N-dimensional volumn of the cell stored in *p. +*/ +static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ + RtreeDValue area = (RtreeDValue)1; + assert( pRtree->nDim>=1 && pRtree->nDim<=5 ); +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + switch( pRtree->nDim ){ + case 5: area = p->aCoord[9].f - p->aCoord[8].f; + case 4: area *= p->aCoord[7].f - p->aCoord[6].f; + case 3: area *= p->aCoord[5].f - p->aCoord[4].f; + case 2: area *= p->aCoord[3].f - p->aCoord[2].f; + default: area *= p->aCoord[1].f - p->aCoord[0].f; + } + }else +#endif + { + switch( pRtree->nDim ){ + case 5: area = (i64)p->aCoord[9].i - (i64)p->aCoord[8].i; + case 4: area *= (i64)p->aCoord[7].i - (i64)p->aCoord[6].i; + case 3: area *= (i64)p->aCoord[5].i - (i64)p->aCoord[4].i; + case 2: area *= (i64)p->aCoord[3].i - (i64)p->aCoord[2].i; + default: area *= (i64)p->aCoord[1].i - (i64)p->aCoord[0].i; + } + } + return area; +} + +/* +** Return the margin length of cell p. The margin length is the sum +** of the objects size in each dimension. +*/ +static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ + RtreeDValue margin = 0; + int ii = pRtree->nDim2 - 2; + do{ + margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); + ii -= 2; + }while( ii>=0 ); + return margin; +} + +/* +** Store the union of cells p1 and p2 in p1. +*/ +static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ + int ii = 0; + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + do{ + p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); + p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); + ii += 2; + }while( iinDim2 ); + }else{ + do{ + p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); + p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); + ii += 2; + }while( iinDim2 ); + } +} + +/* +** Return true if the area covered by p2 is a subset of the area covered +** by p1. False otherwise. +*/ +static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ + int ii; + int isInt = (pRtree->eCoordType==RTREE_COORD_INT32); + for(ii=0; iinDim2; ii+=2){ + RtreeCoord *a1 = &p1->aCoord[ii]; + RtreeCoord *a2 = &p2->aCoord[ii]; + if( (!isInt && (a2[0].fa1[1].f)) + || ( isInt && (a2[0].ia1[1].i)) + ){ + return 0; + } + } + return 1; +} + +/* +** Return the amount cell p would grow by if it were unioned with pCell. +*/ +static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ + RtreeDValue area; + RtreeCell cell; + memcpy(&cell, p, sizeof(RtreeCell)); + area = cellArea(pRtree, &cell); + cellUnion(pRtree, &cell, pCell); + return (cellArea(pRtree, &cell)-area); +} + +static RtreeDValue cellOverlap( + Rtree *pRtree, + RtreeCell *p, + RtreeCell *aCell, + int nCell +){ + int ii; + RtreeDValue overlap = RTREE_ZERO; + for(ii=0; iinDim2; jj+=2){ + RtreeDValue x1, x2; + x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); + x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); + if( x2iDepth-iHeight); ii++){ + int iCell; + sqlite3_int64 iBest = 0; + + RtreeDValue fMinGrowth = RTREE_ZERO; + RtreeDValue fMinArea = RTREE_ZERO; + + int nCell = NCELL(pNode); + RtreeCell cell; + RtreeNode *pChild = 0; + + RtreeCell *aCell = 0; + + /* Select the child node which will be enlarged the least if pCell + ** is inserted into it. Resolve ties by choosing the entry with + ** the smallest area. + */ + for(iCell=0; iCellpParent ){ + RtreeNode *pParent = p->pParent; + RtreeCell cell; + int iCell; + + cnt++; + if( NEVER(cnt>100) ){ + RTREE_IS_CORRUPT(pRtree); + return SQLITE_CORRUPT_VTAB; + } + rc = nodeParentIndex(pRtree, p, &iCell); + if( NEVER(rc!=SQLITE_OK) ){ + RTREE_IS_CORRUPT(pRtree); + return SQLITE_CORRUPT_VTAB; + } + + nodeGetCell(pRtree, pParent, iCell, &cell); + if( !cellContains(pRtree, &cell, pCell) ){ + cellUnion(pRtree, &cell, pCell); + nodeOverwriteCell(pRtree, pParent, &cell, iCell); + } + + p = pParent; + } + return SQLITE_OK; +} + +/* +** Write mapping (iRowid->iNode) to the _rowid table. +*/ +static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ + sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); + sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); + sqlite3_step(pRtree->pWriteRowid); + return sqlite3_reset(pRtree->pWriteRowid); +} + +/* +** Write mapping (iNode->iPar) to the _parent table. +*/ +static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ + sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); + sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); + sqlite3_step(pRtree->pWriteParent); + return sqlite3_reset(pRtree->pWriteParent); +} + +static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); + + +/* +** Arguments aIdx, aDistance and aSpare all point to arrays of size +** nIdx. The aIdx array contains the set of integers from 0 to +** (nIdx-1) in no particular order. This function sorts the values +** in aIdx according to the indexed values in aDistance. For +** example, assuming the inputs: +** +** aIdx = { 0, 1, 2, 3 } +** aDistance = { 5.0, 2.0, 7.0, 6.0 } +** +** this function sets the aIdx array to contain: +** +** aIdx = { 0, 1, 2, 3 } +** +** The aSpare array is used as temporary working space by the +** sorting algorithm. +*/ +static void SortByDistance( + int *aIdx, + int nIdx, + RtreeDValue *aDistance, + int *aSpare +){ + if( nIdx>1 ){ + int iLeft = 0; + int iRight = 0; + + int nLeft = nIdx/2; + int nRight = nIdx-nLeft; + int *aLeft = aIdx; + int *aRight = &aIdx[nLeft]; + + SortByDistance(aLeft, nLeft, aDistance, aSpare); + SortByDistance(aRight, nRight, aDistance, aSpare); + + memcpy(aSpare, aLeft, sizeof(int)*nLeft); + aLeft = aSpare; + + while( iLeft1 ){ + + int iLeft = 0; + int iRight = 0; + + int nLeft = nIdx/2; + int nRight = nIdx-nLeft; + int *aLeft = aIdx; + int *aRight = &aIdx[nLeft]; + + SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); + SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); + + memcpy(aSpare, aLeft, sizeof(int)*nLeft); + aLeft = aSpare; + while( iLeftnDim+1)*(sizeof(int*)+nCell*sizeof(int)); + + aaSorted = (int **)sqlite3_malloc64(nByte); + if( !aaSorted ){ + return SQLITE_NOMEM; + } + + aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; + memset(aaSorted, 0, nByte); + for(ii=0; iinDim; ii++){ + int jj; + aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; + for(jj=0; jjnDim; ii++){ + RtreeDValue margin = RTREE_ZERO; + RtreeDValue fBestOverlap = RTREE_ZERO; + RtreeDValue fBestArea = RTREE_ZERO; + int iBestLeft = 0; + int nLeft; + + for( + nLeft=RTREE_MINCELLS(pRtree); + nLeft<=(nCell-RTREE_MINCELLS(pRtree)); + nLeft++ + ){ + RtreeCell left; + RtreeCell right; + int kk; + RtreeDValue overlap; + RtreeDValue area; + + memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); + memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); + for(kk=1; kk<(nCell-1); kk++){ + if( kk0 ){ + RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); + RtreeNode *p; + for(p=pNode; p; p=p->pParent){ + if( p==pChild ) return SQLITE_CORRUPT_VTAB; + } + if( pChild ){ + nodeRelease(pRtree, pChild->pParent); + nodeReference(pNode); + pChild->pParent = pNode; + } + } + if( NEVER(pNode==0) ) return SQLITE_ERROR; + return xSetMapping(pRtree, iRowid, pNode->iNode); +} + +static int SplitNode( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int i; + int newCellIsRight = 0; + + int rc = SQLITE_OK; + int nCell = NCELL(pNode); + RtreeCell *aCell; + int *aiUsed; + + RtreeNode *pLeft = 0; + RtreeNode *pRight = 0; + + RtreeCell leftbbox; + RtreeCell rightbbox; + + /* Allocate an array and populate it with a copy of pCell and + ** all cells from node pLeft. Then zero the original node. + */ + aCell = sqlite3_malloc64((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); + if( !aCell ){ + rc = SQLITE_NOMEM; + goto splitnode_out; + } + aiUsed = (int *)&aCell[nCell+1]; + memset(aiUsed, 0, sizeof(int)*(nCell+1)); + for(i=0; iiNode==1 ){ + pRight = nodeNew(pRtree, pNode); + pLeft = nodeNew(pRtree, pNode); + pRtree->iDepth++; + pNode->isDirty = 1; + writeInt16(pNode->zData, pRtree->iDepth); + }else{ + pLeft = pNode; + pRight = nodeNew(pRtree, pLeft->pParent); + pLeft->nRef++; + } + + if( !pLeft || !pRight ){ + rc = SQLITE_NOMEM; + goto splitnode_out; + } + + memset(pLeft->zData, 0, pRtree->iNodeSize); + memset(pRight->zData, 0, pRtree->iNodeSize); + + rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, + &leftbbox, &rightbbox); + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + + /* Ensure both child nodes have node numbers assigned to them by calling + ** nodeWrite(). Node pRight always needs a node number, as it was created + ** by nodeNew() above. But node pLeft sometimes already has a node number. + ** In this case avoid the all to nodeWrite(). + */ + if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) + || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) + ){ + goto splitnode_out; + } + + rightbbox.iRowid = pRight->iNode; + leftbbox.iRowid = pLeft->iNode; + + if( pNode->iNode==1 ){ + rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + }else{ + RtreeNode *pParent = pLeft->pParent; + int iCell; + rc = nodeParentIndex(pRtree, pLeft, &iCell); + if( ALWAYS(rc==SQLITE_OK) ){ + nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); + rc = AdjustTree(pRtree, pParent, &leftbbox); + assert( rc==SQLITE_OK ); + } + if( NEVER(rc!=SQLITE_OK) ){ + goto splitnode_out; + } + } + if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ + goto splitnode_out; + } + + for(i=0; iiRowid ){ + newCellIsRight = 1; + } + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + } + if( pNode->iNode==1 ){ + for(i=0; iiRowid, pLeft, iHeight); + } + + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pRight); + pRight = 0; + } + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pLeft); + pLeft = 0; + } + +splitnode_out: + nodeRelease(pRtree, pRight); + nodeRelease(pRtree, pLeft); + sqlite3_free(aCell); + return rc; +} + +/* +** If node pLeaf is not the root of the r-tree and its pParent pointer is +** still NULL, load all ancestor nodes of pLeaf into memory and populate +** the pLeaf->pParent chain all the way up to the root node. +** +** This operation is required when a row is deleted (or updated - an update +** is implemented as a delete followed by an insert). SQLite provides the +** rowid of the row to delete, which can be used to find the leaf on which +** the entry resides (argument pLeaf). Once the leaf is located, this +** function is called to determine its ancestry. +*/ +static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ + int rc = SQLITE_OK; + RtreeNode *pChild = pLeaf; + while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ + int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); + rc = sqlite3_step(pRtree->pReadParent); + if( rc==SQLITE_ROW ){ + RtreeNode *pTest; /* Used to test for reference loops */ + i64 iNode; /* Node number of parent node */ + + /* Before setting pChild->pParent, test that we are not creating a + ** loop of references (as we would if, say, pChild==pParent). We don't + ** want to do this as it leads to a memory leak when trying to delete + ** the referenced counted node structures. + */ + iNode = sqlite3_column_int64(pRtree->pReadParent, 0); + for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); + if( pTest==0 ){ + rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); + } + } + rc = sqlite3_reset(pRtree->pReadParent); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK && !pChild->pParent ){ + RTREE_IS_CORRUPT(pRtree); + rc = SQLITE_CORRUPT_VTAB; + } + pChild = pChild->pParent; + } + return rc; +} + +static int deleteCell(Rtree *, RtreeNode *, int, int); + +static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ + int rc; + int rc2; + RtreeNode *pParent = 0; + int iCell; + + assert( pNode->nRef==1 ); + + /* Remove the entry in the parent cell. */ + rc = nodeParentIndex(pRtree, pNode, &iCell); + if( rc==SQLITE_OK ){ + pParent = pNode->pParent; + pNode->pParent = 0; + rc = deleteCell(pRtree, pParent, iCell, iHeight+1); + testcase( rc!=SQLITE_OK ); + } + rc2 = nodeRelease(pRtree, pParent); + if( rc==SQLITE_OK ){ + rc = rc2; + } + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Remove the xxx_node entry. */ + sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); + sqlite3_step(pRtree->pDeleteNode); + if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ + return rc; + } + + /* Remove the xxx_parent entry. */ + sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); + sqlite3_step(pRtree->pDeleteParent); + if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ + return rc; + } + + /* Remove the node from the in-memory hash table and link it into + ** the Rtree.pDeleted list. Its contents will be re-inserted later on. + */ + nodeHashDelete(pRtree, pNode); + pNode->iNode = iHeight; + pNode->pNext = pRtree->pDeleted; + pNode->nRef++; + pRtree->pDeleted = pNode; + + return SQLITE_OK; +} + +static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ + RtreeNode *pParent = pNode->pParent; + int rc = SQLITE_OK; + if( pParent ){ + int ii; + int nCell = NCELL(pNode); + RtreeCell box; /* Bounding box for pNode */ + nodeGetCell(pRtree, pNode, 0, &box); + for(ii=1; iiiNode; + rc = nodeParentIndex(pRtree, pNode, &ii); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &box, ii); + rc = fixBoundingBox(pRtree, pParent); + } + } + return rc; +} + +/* +** Delete the cell at index iCell of node pNode. After removing the +** cell, adjust the r-tree data structure if required. +*/ +static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ + RtreeNode *pParent; + int rc; + + if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ + return rc; + } + + /* Remove the cell from the node. This call just moves bytes around + ** the in-memory node image, so it cannot fail. + */ + nodeDeleteCell(pRtree, pNode, iCell); + + /* If the node is not the tree root and now has less than the minimum + ** number of cells, remove it from the tree. Otherwise, update the + ** cell in the parent node so that it tightly contains the updated + ** node. + */ + pParent = pNode->pParent; + assert( pParent || pNode->iNode==1 ); + if( pParent ){ + if( NCELL(pNode)nDim; iDim++){ + aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); + aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); + } + } + for(iDim=0; iDimnDim; iDim++){ + aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); + } + + for(ii=0; iinDim; iDim++){ + RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - + DCOORD(aCell[ii].aCoord[iDim*2])); + aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); + } + } + + SortByDistance(aOrder, nCell, aDistance, aSpare); + nodeZero(pRtree, pNode); + + for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ + RtreeCell *p = &aCell[aOrder[ii]]; + nodeInsertCell(pRtree, pNode, p); + if( p->iRowid==pCell->iRowid ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, p->iRowid, pNode->iNode); + } + } + } + if( rc==SQLITE_OK ){ + rc = fixBoundingBox(pRtree, pNode); + } + for(; rc==SQLITE_OK && iiiNode currently contains + ** the height of the sub-tree headed by the cell. + */ + RtreeNode *pInsert; + RtreeCell *p = &aCell[aOrder[ii]]; + rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); + if( rc==SQLITE_OK ){ + int rc2; + rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); + rc2 = nodeRelease(pRtree, pInsert); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + + sqlite3_free(aCell); + return rc; +} + +/* +** Insert cell pCell into node pNode. Node pNode is the head of a +** subtree iHeight high (leaf nodes have iHeight==0). +*/ +static int rtreeInsertCell( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int rc = SQLITE_OK; + if( iHeight>0 ){ + RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); + if( pChild ){ + nodeRelease(pRtree, pChild->pParent); + nodeReference(pNode); + pChild->pParent = pNode; + } + } + if( nodeInsertCell(pRtree, pNode, pCell) ){ + if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ + rc = SplitNode(pRtree, pNode, pCell, iHeight); + }else{ + pRtree->iReinsertHeight = iHeight; + rc = Reinsert(pRtree, pNode, pCell, iHeight); + } + }else{ + rc = AdjustTree(pRtree, pNode, pCell); + if( ALWAYS(rc==SQLITE_OK) ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); + } + } + } + return rc; +} + +static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ + int ii; + int rc = SQLITE_OK; + int nCell = NCELL(pNode); + + for(ii=0; rc==SQLITE_OK && iiiNode currently contains + ** the height of the sub-tree headed by the cell. + */ + rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); + if( rc==SQLITE_OK ){ + int rc2; + rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); + rc2 = nodeRelease(pRtree, pInsert); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + return rc; +} + +/* +** Select a currently unused rowid for a new r-tree record. +*/ +static int rtreeNewRowid(Rtree *pRtree, i64 *piRowid){ + int rc; + sqlite3_bind_null(pRtree->pWriteRowid, 1); + sqlite3_bind_null(pRtree->pWriteRowid, 2); + sqlite3_step(pRtree->pWriteRowid); + rc = sqlite3_reset(pRtree->pWriteRowid); + *piRowid = sqlite3_last_insert_rowid(pRtree->db); + return rc; +} + +/* +** Remove the entry with rowid=iDelete from the r-tree structure. +*/ +static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ + int rc; /* Return code */ + RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ + int iCell; /* Index of iDelete cell in pLeaf */ + RtreeNode *pRoot = 0; /* Root node of rtree structure */ + + + /* Obtain a reference to the root node to initialize Rtree.iDepth */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); + + /* Obtain a reference to the leaf node that contains the entry + ** about to be deleted. + */ + if( rc==SQLITE_OK ){ + rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); + } + +#ifdef CORRUPT_DB + assert( pLeaf!=0 || rc!=SQLITE_OK || CORRUPT_DB ); +#endif + + /* Delete the cell in question from the leaf node. */ + if( rc==SQLITE_OK && pLeaf ){ + int rc2; + rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); + if( rc==SQLITE_OK ){ + rc = deleteCell(pRtree, pLeaf, iCell, 0); + } + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + + /* Delete the corresponding entry in the _rowid table. */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); + sqlite3_step(pRtree->pDeleteRowid); + rc = sqlite3_reset(pRtree->pDeleteRowid); + } + + /* Check if the root node now has exactly one child. If so, remove + ** it, schedule the contents of the child for reinsertion and + ** reduce the tree height by one. + ** + ** This is equivalent to copying the contents of the child into + ** the root node (the operation that Gutman's paper says to perform + ** in this scenario). + */ + if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ + int rc2; + RtreeNode *pChild = 0; + i64 iChild = nodeGetRowid(pRtree, pRoot, 0); + rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); /* tag-20210916a */ + if( rc==SQLITE_OK ){ + rc = removeNode(pRtree, pChild, pRtree->iDepth-1); + } + rc2 = nodeRelease(pRtree, pChild); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK ){ + pRtree->iDepth--; + writeInt16(pRoot->zData, pRtree->iDepth); + pRoot->isDirty = 1; + } + } + + /* Re-insert the contents of any underfull nodes removed from the tree. */ + for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ + if( rc==SQLITE_OK ){ + rc = reinsertNodeContent(pRtree, pLeaf); + } + pRtree->pDeleted = pLeaf->pNext; + pRtree->nNodeRef--; + sqlite3_free(pLeaf); + } + + /* Release the reference to the root node. */ + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pRoot); + }else{ + nodeRelease(pRtree, pRoot); + } + + return rc; +} + +/* +** Rounding constants for float->double conversion. +*/ +#define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ +#define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ + +#if !defined(SQLITE_RTREE_INT_ONLY) +/* +** Convert an sqlite3_value into an RtreeValue (presumably a float) +** while taking care to round toward negative or positive, respectively. +*/ +static RtreeValue rtreeValueDown(sqlite3_value *v){ + double d = sqlite3_value_double(v); + float f = (float)d; + if( f>d ){ + f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS)); + } + return f; +} +static RtreeValue rtreeValueUp(sqlite3_value *v){ + double d = sqlite3_value_double(v); + float f = (float)d; + if( fbase.zErrMsg) to an appropriate value and returns +** SQLITE_CONSTRAINT. +** +** Parameter iCol is the index of the leftmost column involved in the +** constraint failure. If it is 0, then the constraint that failed is +** the unique constraint on the id column. Otherwise, it is the rtree +** (c1<=c2) constraint on columns iCol and iCol+1 that has failed. +** +** If an OOM occurs, SQLITE_NOMEM is returned instead of SQLITE_CONSTRAINT. +*/ +static int rtreeConstraintError(Rtree *pRtree, int iCol){ + sqlite3_stmt *pStmt = 0; + char *zSql; + int rc; + + assert( iCol==0 || iCol%2 ); + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", pRtree->zDb, pRtree->zName); + if( zSql ){ + rc = sqlite3_prepare_v2(pRtree->db, zSql, -1, &pStmt, 0); + }else{ + rc = SQLITE_NOMEM; + } + sqlite3_free(zSql); + + if( rc==SQLITE_OK ){ + if( iCol==0 ){ + const char *zCol = sqlite3_column_name(pStmt, 0); + pRtree->base.zErrMsg = sqlite3_mprintf( + "UNIQUE constraint failed: %s.%s", pRtree->zName, zCol + ); + }else{ + const char *zCol1 = sqlite3_column_name(pStmt, iCol); + const char *zCol2 = sqlite3_column_name(pStmt, iCol+1); + pRtree->base.zErrMsg = sqlite3_mprintf( + "rtree constraint failed: %s.(%s<=%s)", pRtree->zName, zCol1, zCol2 + ); + } + } + + sqlite3_finalize(pStmt); + return (rc==SQLITE_OK ? SQLITE_CONSTRAINT : rc); +} + + + +/* +** The xUpdate method for rtree module virtual tables. +*/ +static int rtreeUpdate( + sqlite3_vtab *pVtab, + int nData, + sqlite3_value **aData, + sqlite_int64 *pRowid +){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_OK; + RtreeCell cell; /* New cell to insert if nData>1 */ + int bHaveRowid = 0; /* Set to 1 after new rowid is determined */ + + if( pRtree->nNodeRef ){ + /* Unable to write to the btree while another cursor is reading from it, + ** since the write might do a rebalance which would disrupt the read + ** cursor. */ + return SQLITE_LOCKED_VTAB; + } + rtreeReference(pRtree); + assert(nData>=1); + + cell.iRowid = 0; /* Used only to suppress a compiler warning */ + + /* Constraint handling. A write operation on an r-tree table may return + ** SQLITE_CONSTRAINT for two reasons: + ** + ** 1. A duplicate rowid value, or + ** 2. The supplied data violates the "x2>=x1" constraint. + ** + ** In the first case, if the conflict-handling mode is REPLACE, then + ** the conflicting row can be removed before proceeding. In the second + ** case, SQLITE_CONSTRAINT must be returned regardless of the + ** conflict-handling mode specified by the user. + */ + if( nData>1 ){ + int ii; + int nn = nData - 4; + + if( nn > pRtree->nDim2 ) nn = pRtree->nDim2; + /* Populate the cell.aCoord[] array. The first coordinate is aData[3]. + ** + ** NB: nData can only be less than nDim*2+3 if the rtree is mis-declared + ** with "column" that are interpreted as table constraints. + ** Example: CREATE VIRTUAL TABLE bad USING rtree(x,y,CHECK(y>5)); + ** This problem was discovered after years of use, so we silently ignore + ** these kinds of misdeclared tables to avoid breaking any legacy. + */ + +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + for(ii=0; iicell.aCoord[ii+1].f ){ + rc = rtreeConstraintError(pRtree, ii+1); + goto constraint; + } + } + }else +#endif + { + for(ii=0; iicell.aCoord[ii+1].i ){ + rc = rtreeConstraintError(pRtree, ii+1); + goto constraint; + } + } + } + + /* If a rowid value was supplied, check if it is already present in + ** the table. If so, the constraint has failed. */ + if( sqlite3_value_type(aData[2])!=SQLITE_NULL ){ + cell.iRowid = sqlite3_value_int64(aData[2]); + if( sqlite3_value_type(aData[0])==SQLITE_NULL + || sqlite3_value_int64(aData[0])!=cell.iRowid + ){ + int steprc; + sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); + steprc = sqlite3_step(pRtree->pReadRowid); + rc = sqlite3_reset(pRtree->pReadRowid); + if( SQLITE_ROW==steprc ){ + if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ + rc = rtreeDeleteRowid(pRtree, cell.iRowid); + }else{ + rc = rtreeConstraintError(pRtree, 0); + goto constraint; + } + } + } + bHaveRowid = 1; + } + } + + /* If aData[0] is not an SQL NULL value, it is the rowid of a + ** record to delete from the r-tree table. The following block does + ** just that. + */ + if( sqlite3_value_type(aData[0])!=SQLITE_NULL ){ + rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(aData[0])); + } + + /* If the aData[] array contains more than one element, elements + ** (aData[2]..aData[argc-1]) contain a new record to insert into + ** the r-tree structure. + */ + if( rc==SQLITE_OK && nData>1 ){ + /* Insert the new record into the r-tree */ + RtreeNode *pLeaf = 0; + + /* Figure out the rowid of the new row. */ + if( bHaveRowid==0 ){ + rc = rtreeNewRowid(pRtree, &cell.iRowid); + } + *pRowid = cell.iRowid; + + if( rc==SQLITE_OK ){ + rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); + } + if( rc==SQLITE_OK ){ + int rc2; + pRtree->iReinsertHeight = -1; + rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + if( rc==SQLITE_OK && pRtree->nAux ){ + sqlite3_stmt *pUp = pRtree->pWriteAux; + int jj; + sqlite3_bind_int64(pUp, 1, *pRowid); + for(jj=0; jjnAux; jj++){ + sqlite3_bind_value(pUp, jj+2, aData[pRtree->nDim2+3+jj]); + } + sqlite3_step(pUp); + rc = sqlite3_reset(pUp); + } + } + +constraint: + rtreeRelease(pRtree); + return rc; +} + +/* +** Called when a transaction starts. +*/ +static int rtreeBeginTransaction(sqlite3_vtab *pVtab){ + Rtree *pRtree = (Rtree *)pVtab; + assert( pRtree->inWrTrans==0 ); + pRtree->inWrTrans++; + return SQLITE_OK; +} + +/* +** Called when a transaction completes (either by COMMIT or ROLLBACK). +** The sqlite3_blob object should be released at this point. +*/ +static int rtreeEndTransaction(sqlite3_vtab *pVtab){ + Rtree *pRtree = (Rtree *)pVtab; + pRtree->inWrTrans = 0; + nodeBlobReset(pRtree); + return SQLITE_OK; +} + +/* +** The xRename method for rtree module virtual tables. +*/ +static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_NOMEM; + char *zSql = sqlite3_mprintf( + "ALTER TABLE %Q.'%q_node' RENAME TO \"%w_node\";" + "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";" + "ALTER TABLE %Q.'%q_rowid' RENAME TO \"%w_rowid\";" + , pRtree->zDb, pRtree->zName, zNewName + , pRtree->zDb, pRtree->zName, zNewName + , pRtree->zDb, pRtree->zName, zNewName + ); + if( zSql ){ + nodeBlobReset(pRtree); + rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } + return rc; +} + +/* +** The xSavepoint method. +** +** This module does not need to do anything to support savepoints. However, +** it uses this hook to close any open blob handle. This is done because a +** DROP TABLE command - which fortunately always opens a savepoint - cannot +** succeed if there are any open blob handles. i.e. if the blob handle were +** not closed here, the following would fail: +** +** BEGIN; +** INSERT INTO rtree... +** DROP TABLE ; -- Would fail with SQLITE_LOCKED +** COMMIT; +*/ +static int rtreeSavepoint(sqlite3_vtab *pVtab, int iSavepoint){ + Rtree *pRtree = (Rtree *)pVtab; + u8 iwt = pRtree->inWrTrans; + UNUSED_PARAMETER(iSavepoint); + pRtree->inWrTrans = 0; + nodeBlobReset(pRtree); + pRtree->inWrTrans = iwt; + return SQLITE_OK; +} + +/* +** This function populates the pRtree->nRowEst variable with an estimate +** of the number of rows in the virtual table. If possible, this is based +** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST. +*/ +static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){ + const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'"; + char *zSql; + sqlite3_stmt *p; + int rc; + i64 nRow = RTREE_MIN_ROWEST; + + rc = sqlite3_table_column_metadata( + db, pRtree->zDb, "sqlite_stat1",0,0,0,0,0,0 + ); + if( rc!=SQLITE_OK ){ + pRtree->nRowEst = RTREE_DEFAULT_ROWEST; + return rc==SQLITE_ERROR ? SQLITE_OK : rc; + } + zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0); + if( rc==SQLITE_OK ){ + if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0); + rc = sqlite3_finalize(p); + } + sqlite3_free(zSql); + } + pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST); + return rc; +} + + +/* +** Return true if zName is the extension on one of the shadow tables used +** by this module. +*/ +static int rtreeShadowName(const char *zName){ + static const char *azName[] = { + "node", "parent", "rowid" + }; + unsigned int i; + for(i=0; idb = db; + + if( isCreate ){ + char *zCreate; + sqlite3_str *p = sqlite3_str_new(db); + int ii; + sqlite3_str_appendf(p, + "CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY,nodeno", + zDb, zPrefix); + for(ii=0; iinAux; ii++){ + sqlite3_str_appendf(p,",a%d",ii); + } + sqlite3_str_appendf(p, + ");CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY,data);", + zDb, zPrefix); + sqlite3_str_appendf(p, + "CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY,parentnode);", + zDb, zPrefix); + sqlite3_str_appendf(p, + "INSERT INTO \"%w\".\"%w_node\"VALUES(1,zeroblob(%d))", + zDb, zPrefix, pRtree->iNodeSize); + zCreate = sqlite3_str_finish(p); + if( !zCreate ){ + return SQLITE_NOMEM; + } + rc = sqlite3_exec(db, zCreate, 0, 0, 0); + sqlite3_free(zCreate); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + appStmt[0] = &pRtree->pWriteNode; + appStmt[1] = &pRtree->pDeleteNode; + appStmt[2] = &pRtree->pReadRowid; + appStmt[3] = &pRtree->pWriteRowid; + appStmt[4] = &pRtree->pDeleteRowid; + appStmt[5] = &pRtree->pReadParent; + appStmt[6] = &pRtree->pWriteParent; + appStmt[7] = &pRtree->pDeleteParent; + + rc = rtreeQueryStat1(db, pRtree); + for(i=0; inAux==0 ){ + zFormat = azSql[i]; + }else { + /* An UPSERT is very slightly slower than REPLACE, but it is needed + ** if there are auxiliary columns */ + zFormat = "INSERT INTO\"%w\".\"%w_rowid\"(rowid,nodeno)VALUES(?1,?2)" + "ON CONFLICT(rowid)DO UPDATE SET nodeno=excluded.nodeno"; + } + zSql = sqlite3_mprintf(zFormat, zDb, zPrefix); + if( zSql ){ + rc = sqlite3_prepare_v3(db, zSql, -1, f, appStmt[i], 0); + }else{ + rc = SQLITE_NOMEM; + } + sqlite3_free(zSql); + } + if( pRtree->nAux ){ + pRtree->zReadAuxSql = sqlite3_mprintf( + "SELECT * FROM \"%w\".\"%w_rowid\" WHERE rowid=?1", + zDb, zPrefix); + if( pRtree->zReadAuxSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_str *p = sqlite3_str_new(db); + int ii; + char *zSql; + sqlite3_str_appendf(p, "UPDATE \"%w\".\"%w_rowid\"SET ", zDb, zPrefix); + for(ii=0; iinAux; ii++){ + if( ii ) sqlite3_str_append(p, ",", 1); +#ifdef SQLITE_ENABLE_GEOPOLY + if( iinAuxNotNull ){ + sqlite3_str_appendf(p,"a%d=coalesce(?%d,a%d)",ii,ii+2,ii); + }else +#endif + { + sqlite3_str_appendf(p,"a%d=?%d",ii,ii+2); + } + } + sqlite3_str_appendf(p, " WHERE rowid=?1"); + zSql = sqlite3_str_finish(p); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v3(db, zSql, -1, f, &pRtree->pWriteAux, 0); + sqlite3_free(zSql); + } + } + } + + return rc; +} + +/* +** The second argument to this function contains the text of an SQL statement +** that returns a single integer value. The statement is compiled and executed +** using database connection db. If successful, the integer value returned +** is written to *piVal and SQLITE_OK returned. Otherwise, an SQLite error +** code is returned and the value of *piVal after returning is not defined. +*/ +static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){ + int rc = SQLITE_NOMEM; + if( zSql ){ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *piVal = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } + } + return rc; +} + +/* +** This function is called from within the xConnect() or xCreate() method to +** determine the node-size used by the rtree table being created or connected +** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned. +** Otherwise, an SQLite error code is returned. +** +** If this function is being called as part of an xConnect(), then the rtree +** table already exists. In this case the node-size is determined by inspecting +** the root node of the tree. +** +** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. +** This ensures that each node is stored on a single database page. If the +** database page-size is so large that more than RTREE_MAXCELLS entries +** would fit in a single node, use a smaller node-size. +*/ +static int getNodeSize( + sqlite3 *db, /* Database handle */ + Rtree *pRtree, /* Rtree handle */ + int isCreate, /* True for xCreate, false for xConnect */ + char **pzErr /* OUT: Error message, if any */ +){ + int rc; + char *zSql; + if( isCreate ){ + int iPageSize = 0; + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb); + rc = getIntFromStmt(db, zSql, &iPageSize); + if( rc==SQLITE_OK ){ + pRtree->iNodeSize = iPageSize-64; + if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)iNodeSize ){ + pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS; + } + }else{ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } + }else{ + zSql = sqlite3_mprintf( + "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1", + pRtree->zDb, pRtree->zName + ); + rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + }else if( pRtree->iNodeSize<(512-64) ){ + rc = SQLITE_CORRUPT_VTAB; + RTREE_IS_CORRUPT(pRtree); + *pzErr = sqlite3_mprintf("undersize RTree blobs in \"%q_node\"", + pRtree->zName); + } + } + + sqlite3_free(zSql); + return rc; +} + +/* +** Return the length of a token +*/ +static int rtreeTokenLength(const char *z){ + int dummy = 0; + return sqlite3GetToken((const unsigned char*)z,&dummy); +} + +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the r-tree virtual table. +** +** argv[0] -> module name +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> column names... +*/ +static int rtreeInit( + sqlite3 *db, /* Database connection */ + void *pAux, /* One of the RTREE_COORD_* constants */ + int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ + sqlite3_vtab **ppVtab, /* OUT: New virtual table */ + char **pzErr, /* OUT: Error message, if any */ + int isCreate /* True for xCreate, false for xConnect */ +){ + int rc = SQLITE_OK; + Rtree *pRtree; + int nDb; /* Length of string argv[1] */ + int nName; /* Length of string argv[2] */ + int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); + sqlite3_str *pSql; + char *zSql; + int ii = 4; + int iErr; + + const char *aErrMsg[] = { + 0, /* 0 */ + "Wrong number of columns for an rtree table", /* 1 */ + "Too few columns for an rtree table", /* 2 */ + "Too many columns for an rtree table", /* 3 */ + "Auxiliary rtree columns must be last" /* 4 */ + }; + + assert( RTREE_MAX_AUX_COLUMN<256 ); /* Aux columns counted by a u8 */ + if( argc<6 || argc>RTREE_MAX_AUX_COLUMN+3 ){ + *pzErr = sqlite3_mprintf("%s", aErrMsg[2 + (argc>=6)]); + return SQLITE_ERROR; + } + + sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); + + /* Allocate the sqlite3_vtab structure */ + nDb = (int)strlen(argv[1]); + nName = (int)strlen(argv[2]); + pRtree = (Rtree *)sqlite3_malloc64(sizeof(Rtree)+nDb+nName+2); + if( !pRtree ){ + return SQLITE_NOMEM; + } + memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); + pRtree->nBusy = 1; + pRtree->base.pModule = &rtreeModule; + pRtree->zDb = (char *)&pRtree[1]; + pRtree->zName = &pRtree->zDb[nDb+1]; + pRtree->eCoordType = (u8)eCoordType; + memcpy(pRtree->zDb, argv[1], nDb); + memcpy(pRtree->zName, argv[2], nName); + + + /* Create/Connect to the underlying relational database schema. If + ** that is successful, call sqlite3_declare_vtab() to configure + ** the r-tree table schema. + */ + pSql = sqlite3_str_new(db); + sqlite3_str_appendf(pSql, "CREATE TABLE x(%.*s INT", + rtreeTokenLength(argv[3]), argv[3]); + for(ii=4; iinAux++; + sqlite3_str_appendf(pSql, ",%.*s", rtreeTokenLength(zArg+1), zArg+1); + }else if( pRtree->nAux>0 ){ + break; + }else{ + static const char *azFormat[] = {",%.*s REAL", ",%.*s INT"}; + pRtree->nDim2++; + sqlite3_str_appendf(pSql, azFormat[eCoordType], + rtreeTokenLength(zArg), zArg); + } + } + sqlite3_str_appendf(pSql, ");"); + zSql = sqlite3_str_finish(pSql); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else if( iinDim = pRtree->nDim2/2; + if( pRtree->nDim<1 ){ + iErr = 2; + }else if( pRtree->nDim2>RTREE_MAX_DIMENSIONS*2 ){ + iErr = 3; + }else if( pRtree->nDim2 % 2 ){ + iErr = 1; + }else{ + iErr = 0; + } + if( iErr ){ + *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]); + goto rtreeInit_fail; + } + pRtree->nBytesPerCell = 8 + pRtree->nDim2*4; + + /* Figure out the node size to use. */ + rc = getNodeSize(db, pRtree, isCreate, pzErr); + if( rc ) goto rtreeInit_fail; + rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate); + if( rc ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + goto rtreeInit_fail; + } + + *ppVtab = (sqlite3_vtab *)pRtree; + return SQLITE_OK; + +rtreeInit_fail: + if( rc==SQLITE_OK ) rc = SQLITE_ERROR; + assert( *ppVtab==0 ); + assert( pRtree->nBusy==1 ); + rtreeRelease(pRtree); + return rc; +} + + +/* +** Implementation of a scalar function that decodes r-tree nodes to +** human readable strings. This can be used for debugging and analysis. +** +** The scalar function takes two arguments: (1) the number of dimensions +** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing +** an r-tree node. For a two-dimensional r-tree structure called "rt", to +** deserialize all nodes, a statement like: +** +** SELECT rtreenode(2, data) FROM rt_node; +** +** The human readable string takes the form of a Tcl list with one +** entry for each cell in the r-tree node. Each entry is itself a +** list, containing the 8-byte rowid/pageno followed by the +** *2 coordinates. +*/ +static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ + RtreeNode node; + Rtree tree; + int ii; + int nData; + int errCode; + sqlite3_str *pOut; + + UNUSED_PARAMETER(nArg); + memset(&node, 0, sizeof(RtreeNode)); + memset(&tree, 0, sizeof(Rtree)); + tree.nDim = (u8)sqlite3_value_int(apArg[0]); + if( tree.nDim<1 || tree.nDim>5 ) return; + tree.nDim2 = tree.nDim*2; + tree.nBytesPerCell = 8 + 8 * tree.nDim; + node.zData = (u8 *)sqlite3_value_blob(apArg[1]); + if( node.zData==0 ) return; + nData = sqlite3_value_bytes(apArg[1]); + if( nData<4 ) return; + if( nData0 ) sqlite3_str_append(pOut, " ", 1); + sqlite3_str_appendf(pOut, "{%lld", cell.iRowid); + for(jj=0; jjrc==SQLITE_OK ) pCheck->rc = rc; +} + +/* +** The second and subsequent arguments to this function are a format string +** and printf style arguments. This function formats the string and attempts +** to compile it as an SQL statement. +** +** If successful, a pointer to the new SQL statement is returned. Otherwise, +** NULL is returned and an error code left in RtreeCheck.rc. +*/ +static sqlite3_stmt *rtreeCheckPrepare( + RtreeCheck *pCheck, /* RtreeCheck object */ + const char *zFmt, ... /* Format string and trailing args */ +){ + va_list ap; + char *z; + sqlite3_stmt *pRet = 0; + + va_start(ap, zFmt); + z = sqlite3_vmprintf(zFmt, ap); + + if( pCheck->rc==SQLITE_OK ){ + if( z==0 ){ + pCheck->rc = SQLITE_NOMEM; + }else{ + pCheck->rc = sqlite3_prepare_v2(pCheck->db, z, -1, &pRet, 0); + } + } + + sqlite3_free(z); + va_end(ap); + return pRet; +} + +/* +** The second and subsequent arguments to this function are a printf() +** style format string and arguments. This function formats the string and +** appends it to the report being accumuated in pCheck. +*/ +static void rtreeCheckAppendMsg(RtreeCheck *pCheck, const char *zFmt, ...){ + va_list ap; + va_start(ap, zFmt); + if( pCheck->rc==SQLITE_OK && pCheck->nErrrc = SQLITE_NOMEM; + }else{ + pCheck->zReport = sqlite3_mprintf("%z%s%z", + pCheck->zReport, (pCheck->zReport ? "\n" : ""), z + ); + if( pCheck->zReport==0 ){ + pCheck->rc = SQLITE_NOMEM; + } + } + pCheck->nErr++; + } + va_end(ap); +} + +/* +** This function is a no-op if there is already an error code stored +** in the RtreeCheck object indicated by the first argument. NULL is +** returned in this case. +** +** Otherwise, the contents of rtree table node iNode are loaded from +** the database and copied into a buffer obtained from sqlite3_malloc(). +** If no error occurs, a pointer to the buffer is returned and (*pnNode) +** is set to the size of the buffer in bytes. +** +** Or, if an error does occur, NULL is returned and an error code left +** in the RtreeCheck object. The final value of *pnNode is undefined in +** this case. +*/ +static u8 *rtreeCheckGetNode(RtreeCheck *pCheck, i64 iNode, int *pnNode){ + u8 *pRet = 0; /* Return value */ + + if( pCheck->rc==SQLITE_OK && pCheck->pGetNode==0 ){ + pCheck->pGetNode = rtreeCheckPrepare(pCheck, + "SELECT data FROM %Q.'%q_node' WHERE nodeno=?", + pCheck->zDb, pCheck->zTab + ); + } + + if( pCheck->rc==SQLITE_OK ){ + sqlite3_bind_int64(pCheck->pGetNode, 1, iNode); + if( sqlite3_step(pCheck->pGetNode)==SQLITE_ROW ){ + int nNode = sqlite3_column_bytes(pCheck->pGetNode, 0); + const u8 *pNode = (const u8*)sqlite3_column_blob(pCheck->pGetNode, 0); + pRet = sqlite3_malloc64(nNode); + if( pRet==0 ){ + pCheck->rc = SQLITE_NOMEM; + }else{ + memcpy(pRet, pNode, nNode); + *pnNode = nNode; + } + } + rtreeCheckReset(pCheck, pCheck->pGetNode); + if( pCheck->rc==SQLITE_OK && pRet==0 ){ + rtreeCheckAppendMsg(pCheck, "Node %lld missing from database", iNode); + } + } + + return pRet; +} + +/* +** This function is used to check that the %_parent (if bLeaf==0) or %_rowid +** (if bLeaf==1) table contains a specified entry. The schemas of the +** two tables are: +** +** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) +** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER, ...) +** +** In both cases, this function checks that there exists an entry with +** IPK value iKey and the second column set to iVal. +** +*/ +static void rtreeCheckMapping( + RtreeCheck *pCheck, /* RtreeCheck object */ + int bLeaf, /* True for a leaf cell, false for interior */ + i64 iKey, /* Key for mapping */ + i64 iVal /* Expected value for mapping */ +){ + int rc; + sqlite3_stmt *pStmt; + const char *azSql[2] = { + "SELECT parentnode FROM %Q.'%q_parent' WHERE nodeno=?1", + "SELECT nodeno FROM %Q.'%q_rowid' WHERE rowid=?1" + }; + + assert( bLeaf==0 || bLeaf==1 ); + if( pCheck->aCheckMapping[bLeaf]==0 ){ + pCheck->aCheckMapping[bLeaf] = rtreeCheckPrepare(pCheck, + azSql[bLeaf], pCheck->zDb, pCheck->zTab + ); + } + if( pCheck->rc!=SQLITE_OK ) return; + + pStmt = pCheck->aCheckMapping[bLeaf]; + sqlite3_bind_int64(pStmt, 1, iKey); + rc = sqlite3_step(pStmt); + if( rc==SQLITE_DONE ){ + rtreeCheckAppendMsg(pCheck, "Mapping (%lld -> %lld) missing from %s table", + iKey, iVal, (bLeaf ? "%_rowid" : "%_parent") + ); + }else if( rc==SQLITE_ROW ){ + i64 ii = sqlite3_column_int64(pStmt, 0); + if( ii!=iVal ){ + rtreeCheckAppendMsg(pCheck, + "Found (%lld -> %lld) in %s table, expected (%lld -> %lld)", + iKey, ii, (bLeaf ? "%_rowid" : "%_parent"), iKey, iVal + ); + } + } + rtreeCheckReset(pCheck, pStmt); +} + +/* +** Argument pCell points to an array of coordinates stored on an rtree page. +** This function checks that the coordinates are internally consistent (no +** x1>x2 conditions) and adds an error message to the RtreeCheck object +** if they are not. +** +** Additionally, if pParent is not NULL, then it is assumed to point to +** the array of coordinates on the parent page that bound the page +** containing pCell. In this case it is also verified that the two +** sets of coordinates are mutually consistent and an error message added +** to the RtreeCheck object if they are not. +*/ +static void rtreeCheckCellCoord( + RtreeCheck *pCheck, + i64 iNode, /* Node id to use in error messages */ + int iCell, /* Cell number to use in error messages */ + u8 *pCell, /* Pointer to cell coordinates */ + u8 *pParent /* Pointer to parent coordinates */ +){ + RtreeCoord c1, c2; + RtreeCoord p1, p2; + int i; + + for(i=0; inDim; i++){ + readCoord(&pCell[4*2*i], &c1); + readCoord(&pCell[4*(2*i + 1)], &c2); + + /* printf("%e, %e\n", c1.u.f, c2.u.f); */ + if( pCheck->bInt ? c1.i>c2.i : c1.f>c2.f ){ + rtreeCheckAppendMsg(pCheck, + "Dimension %d of cell %d on node %lld is corrupt", i, iCell, iNode + ); + } + + if( pParent ){ + readCoord(&pParent[4*2*i], &p1); + readCoord(&pParent[4*(2*i + 1)], &p2); + + if( (pCheck->bInt ? c1.ibInt ? c2.i>p2.i : c2.f>p2.f) + ){ + rtreeCheckAppendMsg(pCheck, + "Dimension %d of cell %d on node %lld is corrupt relative to parent" + , i, iCell, iNode + ); + } + } + } +} + +/* +** Run rtreecheck() checks on node iNode, which is at depth iDepth within +** the r-tree structure. Argument aParent points to the array of coordinates +** that bound node iNode on the parent node. +** +** If any problems are discovered, an error message is appended to the +** report accumulated in the RtreeCheck object. +*/ +static void rtreeCheckNode( + RtreeCheck *pCheck, + int iDepth, /* Depth of iNode (0==leaf) */ + u8 *aParent, /* Buffer containing parent coords */ + i64 iNode /* Node to check */ +){ + u8 *aNode = 0; + int nNode = 0; + + assert( iNode==1 || aParent!=0 ); + assert( pCheck->nDim>0 ); + + aNode = rtreeCheckGetNode(pCheck, iNode, &nNode); + if( aNode ){ + if( nNode<4 ){ + rtreeCheckAppendMsg(pCheck, + "Node %lld is too small (%d bytes)", iNode, nNode + ); + }else{ + int nCell; /* Number of cells on page */ + int i; /* Used to iterate through cells */ + if( aParent==0 ){ + iDepth = readInt16(aNode); + if( iDepth>RTREE_MAX_DEPTH ){ + rtreeCheckAppendMsg(pCheck, "Rtree depth out of range (%d)", iDepth); + sqlite3_free(aNode); + return; + } + } + nCell = readInt16(&aNode[2]); + if( (4 + nCell*(8 + pCheck->nDim*2*4))>nNode ){ + rtreeCheckAppendMsg(pCheck, + "Node %lld is too small for cell count of %d (%d bytes)", + iNode, nCell, nNode + ); + }else{ + for(i=0; inDim*2*4)]; + i64 iVal = readInt64(pCell); + rtreeCheckCellCoord(pCheck, iNode, i, &pCell[8], aParent); + + if( iDepth>0 ){ + rtreeCheckMapping(pCheck, 0, iVal, iNode); + rtreeCheckNode(pCheck, iDepth-1, &pCell[8], iVal); + pCheck->nNonLeaf++; + }else{ + rtreeCheckMapping(pCheck, 1, iVal, iNode); + pCheck->nLeaf++; + } + } + } + } + sqlite3_free(aNode); + } +} + +/* +** The second argument to this function must be either "_rowid" or +** "_parent". This function checks that the number of entries in the +** %_rowid or %_parent table is exactly nExpect. If not, it adds +** an error message to the report in the RtreeCheck object indicated +** by the first argument. +*/ +static void rtreeCheckCount(RtreeCheck *pCheck, const char *zTbl, i64 nExpect){ + if( pCheck->rc==SQLITE_OK ){ + sqlite3_stmt *pCount; + pCount = rtreeCheckPrepare(pCheck, "SELECT count(*) FROM %Q.'%q%s'", + pCheck->zDb, pCheck->zTab, zTbl + ); + if( pCount ){ + if( sqlite3_step(pCount)==SQLITE_ROW ){ + i64 nActual = sqlite3_column_int64(pCount, 0); + if( nActual!=nExpect ){ + rtreeCheckAppendMsg(pCheck, "Wrong number of entries in %%%s table" + " - expected %lld, actual %lld" , zTbl, nExpect, nActual + ); + } + } + pCheck->rc = sqlite3_finalize(pCount); + } + } +} + +/* +** This function does the bulk of the work for the rtree integrity-check. +** It is called by rtreecheck(), which is the SQL function implementation. +*/ +static int rtreeCheckTable( + sqlite3 *db, /* Database handle to access db through */ + const char *zDb, /* Name of db ("main", "temp" etc.) */ + const char *zTab, /* Name of rtree table to check */ + char **pzReport /* OUT: sqlite3_malloc'd report text */ +){ + RtreeCheck check; /* Common context for various routines */ + sqlite3_stmt *pStmt = 0; /* Used to find column count of rtree table */ + int bEnd = 0; /* True if transaction should be closed */ + int nAux = 0; /* Number of extra columns. */ + + /* Initialize the context object */ + memset(&check, 0, sizeof(check)); + check.db = db; + check.zDb = zDb; + check.zTab = zTab; + + /* If there is not already an open transaction, open one now. This is + ** to ensure that the queries run as part of this integrity-check operate + ** on a consistent snapshot. */ + if( sqlite3_get_autocommit(db) ){ + check.rc = sqlite3_exec(db, "BEGIN", 0, 0, 0); + bEnd = 1; + } + + /* Find the number of auxiliary columns */ + if( check.rc==SQLITE_OK ){ + pStmt = rtreeCheckPrepare(&check, "SELECT * FROM %Q.'%q_rowid'", zDb, zTab); + if( pStmt ){ + nAux = sqlite3_column_count(pStmt) - 2; + sqlite3_finalize(pStmt); + }else + if( check.rc!=SQLITE_NOMEM ){ + check.rc = SQLITE_OK; + } + } + + /* Find number of dimensions in the rtree table. */ + pStmt = rtreeCheckPrepare(&check, "SELECT * FROM %Q.%Q", zDb, zTab); + if( pStmt ){ + int rc; + check.nDim = (sqlite3_column_count(pStmt) - 1 - nAux) / 2; + if( check.nDim<1 ){ + rtreeCheckAppendMsg(&check, "Schema corrupt or not an rtree"); + }else if( SQLITE_ROW==sqlite3_step(pStmt) ){ + check.bInt = (sqlite3_column_type(pStmt, 1)==SQLITE_INTEGER); + } + rc = sqlite3_finalize(pStmt); + if( rc!=SQLITE_CORRUPT ) check.rc = rc; + } + + /* Do the actual integrity-check */ + if( check.nDim>=1 ){ + if( check.rc==SQLITE_OK ){ + rtreeCheckNode(&check, 0, 0, 1); + } + rtreeCheckCount(&check, "_rowid", check.nLeaf); + rtreeCheckCount(&check, "_parent", check.nNonLeaf); + } + + /* Finalize SQL statements used by the integrity-check */ + sqlite3_finalize(check.pGetNode); + sqlite3_finalize(check.aCheckMapping[0]); + sqlite3_finalize(check.aCheckMapping[1]); + + /* If one was opened, close the transaction */ + if( bEnd ){ + int rc = sqlite3_exec(db, "END", 0, 0, 0); + if( check.rc==SQLITE_OK ) check.rc = rc; + } + *pzReport = check.zReport; + return check.rc; +} + +/* +** Usage: +** +** rtreecheck(); +** rtreecheck(, ); +** +** Invoking this SQL function runs an integrity-check on the named rtree +** table. The integrity-check verifies the following: +** +** 1. For each cell in the r-tree structure (%_node table), that: +** +** a) for each dimension, (coord1 <= coord2). +** +** b) unless the cell is on the root node, that the cell is bounded +** by the parent cell on the parent node. +** +** c) for leaf nodes, that there is an entry in the %_rowid +** table corresponding to the cell's rowid value that +** points to the correct node. +** +** d) for cells on non-leaf nodes, that there is an entry in the +** %_parent table mapping from the cell's child node to the +** node that it resides on. +** +** 2. That there are the same number of entries in the %_rowid table +** as there are leaf cells in the r-tree structure, and that there +** is a leaf cell that corresponds to each entry in the %_rowid table. +** +** 3. That there are the same number of entries in the %_parent table +** as there are non-leaf cells in the r-tree structure, and that +** there is a non-leaf cell that corresponds to each entry in the +** %_parent table. +*/ +static void rtreecheck( + sqlite3_context *ctx, + int nArg, + sqlite3_value **apArg +){ + if( nArg!=1 && nArg!=2 ){ + sqlite3_result_error(ctx, + "wrong number of arguments to function rtreecheck()", -1 + ); + }else{ + int rc; + char *zReport = 0; + const char *zDb = (const char*)sqlite3_value_text(apArg[0]); + const char *zTab; + if( nArg==1 ){ + zTab = zDb; + zDb = "main"; + }else{ + zTab = (const char*)sqlite3_value_text(apArg[1]); + } + rc = rtreeCheckTable(sqlite3_context_db_handle(ctx), zDb, zTab, &zReport); + if( rc==SQLITE_OK ){ + sqlite3_result_text(ctx, zReport ? zReport : "ok", -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(ctx, rc); + } + sqlite3_free(zReport); + } +} + +/* Conditionally include the geopoly code */ +#ifdef SQLITE_ENABLE_GEOPOLY +/************** Include geopoly.c in the middle of rtree.c *******************/ +/************** Begin file geopoly.c *****************************************/ +/* +** 2018-05-25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file implements an alternative R-Tree virtual table that +** uses polygons to express the boundaries of 2-dimensional objects. +** +** This file is #include-ed onto the end of "rtree.c" so that it has +** access to all of the R-Tree internals. +*/ +/* #include */ + +/* Enable -DGEOPOLY_ENABLE_DEBUG for debugging facilities */ +#ifdef GEOPOLY_ENABLE_DEBUG + static int geo_debug = 0; +# define GEODEBUG(X) if(geo_debug)printf X +#else +# define GEODEBUG(X) +#endif + +/* Character class routines */ +#ifdef sqlite3Isdigit + /* Use the SQLite core versions if this routine is part of the + ** SQLite amalgamation */ +# define safe_isdigit(x) sqlite3Isdigit(x) +# define safe_isalnum(x) sqlite3Isalnum(x) +# define safe_isxdigit(x) sqlite3Isxdigit(x) +#else + /* Use the standard library for separate compilation */ +#include /* amalgamator: keep */ +# define safe_isdigit(x) isdigit((unsigned char)(x)) +# define safe_isalnum(x) isalnum((unsigned char)(x)) +# define safe_isxdigit(x) isxdigit((unsigned char)(x)) +#endif + +#ifndef JSON_NULL /* The following stuff repeats things found in json1 */ +/* +** Growing our own isspace() routine this way is twice as fast as +** the library isspace() function. +*/ +static const char geopolyIsSpace[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; +#define fast_isspace(x) (geopolyIsSpace[(unsigned char)x]) +#endif /* JSON NULL - back to original code */ + +/* Compiler and version */ +#ifndef GCC_VERSION +#if defined(__GNUC__) && !defined(SQLITE_DISABLE_INTRINSIC) +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif +#endif +#ifndef MSVC_VERSION +#if defined(_MSC_VER) && !defined(SQLITE_DISABLE_INTRINSIC) +# define MSVC_VERSION _MSC_VER +#else +# define MSVC_VERSION 0 +#endif +#endif + +/* Datatype for coordinates +*/ +typedef float GeoCoord; + +/* +** Internal representation of a polygon. +** +** The polygon consists of a sequence of vertexes. There is a line +** segment between each pair of vertexes, and one final segment from +** the last vertex back to the first. (This differs from the GeoJSON +** standard in which the final vertex is a repeat of the first.) +** +** The polygon follows the right-hand rule. The area to the right of +** each segment is "outside" and the area to the left is "inside". +** +** The on-disk representation consists of a 4-byte header followed by +** the values. The 4-byte header is: +** +** encoding (1 byte) 0=big-endian, 1=little-endian +** nvertex (3 bytes) Number of vertexes as a big-endian integer +** +** Enough space is allocated for 4 coordinates, to work around over-zealous +** warnings coming from some compiler (notably, clang). In reality, the size +** of each GeoPoly memory allocate is adjusted as necessary so that the +** GeoPoly.a[] array at the end is the appropriate size. +*/ +typedef struct GeoPoly GeoPoly; +struct GeoPoly { + int nVertex; /* Number of vertexes */ + unsigned char hdr[4]; /* Header for on-disk representation */ + GeoCoord a[8]; /* 2*nVertex values. X (longitude) first, then Y */ +}; + +/* The size of a memory allocation needed for a GeoPoly object sufficient +** to hold N coordinate pairs. +*/ +#define GEOPOLY_SZ(N) (sizeof(GeoPoly) + sizeof(GeoCoord)*2*((N)-4)) + +/* Macros to access coordinates of a GeoPoly. +** We have to use these macros, rather than just say p->a[i] in order +** to silence (incorrect) UBSAN warnings if the array index is too large. +*/ +#define GeoX(P,I) (((GeoCoord*)(P)->a)[(I)*2]) +#define GeoY(P,I) (((GeoCoord*)(P)->a)[(I)*2+1]) + + +/* +** State of a parse of a GeoJSON input. +*/ +typedef struct GeoParse GeoParse; +struct GeoParse { + const unsigned char *z; /* Unparsed input */ + int nVertex; /* Number of vertexes in a[] */ + int nAlloc; /* Space allocated to a[] */ + int nErr; /* Number of errors encountered */ + GeoCoord *a; /* Array of vertexes. From sqlite3_malloc64() */ +}; + +/* Do a 4-byte byte swap */ +static void geopolySwab32(unsigned char *a){ + unsigned char t = a[0]; + a[0] = a[3]; + a[3] = t; + t = a[1]; + a[1] = a[2]; + a[2] = t; +} + +/* Skip whitespace. Return the next non-whitespace character. */ +static char geopolySkipSpace(GeoParse *p){ + while( fast_isspace(p->z[0]) ) p->z++; + return p->z[0]; +} + +/* Parse out a number. Write the value into *pVal if pVal!=0. +** return non-zero on success and zero if the next token is not a number. +*/ +static int geopolyParseNumber(GeoParse *p, GeoCoord *pVal){ + char c = geopolySkipSpace(p); + const unsigned char *z = p->z; + int j = 0; + int seenDP = 0; + int seenE = 0; + if( c=='-' ){ + j = 1; + c = z[j]; + } + if( c=='0' && z[j+1]>='0' && z[j+1]<='9' ) return 0; + for(;; j++){ + c = z[j]; + if( safe_isdigit(c) ) continue; + if( c=='.' ){ + if( z[j-1]=='-' ) return 0; + if( seenDP ) return 0; + seenDP = 1; + continue; + } + if( c=='e' || c=='E' ){ + if( z[j-1]<'0' ) return 0; + if( seenE ) return -1; + seenDP = seenE = 1; + c = z[j+1]; + if( c=='+' || c=='-' ){ + j++; + c = z[j+1]; + } + if( c<'0' || c>'9' ) return 0; + continue; + } + break; + } + if( z[j-1]<'0' ) return 0; + if( pVal ){ +#ifdef SQLITE_AMALGAMATION + /* The sqlite3AtoF() routine is much much faster than atof(), if it + ** is available */ + double r; + (void)sqlite3AtoF((const char*)p->z, &r, j, SQLITE_UTF8); + *pVal = r; +#else + *pVal = (GeoCoord)atof((const char*)p->z); +#endif + } + p->z += j; + return 1; +} + +/* +** If the input is a well-formed JSON array of coordinates with at least +** four coordinates and where each coordinate is itself a two-value array, +** then convert the JSON into a GeoPoly object and return a pointer to +** that object. +** +** If any error occurs, return NULL. +*/ +static GeoPoly *geopolyParseJson(const unsigned char *z, int *pRc){ + GeoParse s; + int rc = SQLITE_OK; + memset(&s, 0, sizeof(s)); + s.z = z; + if( geopolySkipSpace(&s)=='[' ){ + s.z++; + while( geopolySkipSpace(&s)=='[' ){ + int ii = 0; + char c; + s.z++; + if( s.nVertex>=s.nAlloc ){ + GeoCoord *aNew; + s.nAlloc = s.nAlloc*2 + 16; + aNew = sqlite3_realloc64(s.a, s.nAlloc*sizeof(GeoCoord)*2 ); + if( aNew==0 ){ + rc = SQLITE_NOMEM; + s.nErr++; + break; + } + s.a = aNew; + } + while( geopolyParseNumber(&s, ii<=1 ? &s.a[s.nVertex*2+ii] : 0) ){ + ii++; + if( ii==2 ) s.nVertex++; + c = geopolySkipSpace(&s); + s.z++; + if( c==',' ) continue; + if( c==']' && ii>=2 ) break; + s.nErr++; + rc = SQLITE_ERROR; + goto parse_json_err; + } + if( geopolySkipSpace(&s)==',' ){ + s.z++; + continue; + } + break; + } + if( geopolySkipSpace(&s)==']' + && s.nVertex>=4 + && s.a[0]==s.a[s.nVertex*2-2] + && s.a[1]==s.a[s.nVertex*2-1] + && (s.z++, geopolySkipSpace(&s)==0) + ){ + GeoPoly *pOut; + int x = 1; + s.nVertex--; /* Remove the redundant vertex at the end */ + pOut = sqlite3_malloc64( GEOPOLY_SZ((sqlite3_int64)s.nVertex) ); + x = 1; + if( pOut==0 ) goto parse_json_err; + pOut->nVertex = s.nVertex; + memcpy(pOut->a, s.a, s.nVertex*2*sizeof(GeoCoord)); + pOut->hdr[0] = *(unsigned char*)&x; + pOut->hdr[1] = (s.nVertex>>16)&0xff; + pOut->hdr[2] = (s.nVertex>>8)&0xff; + pOut->hdr[3] = s.nVertex&0xff; + sqlite3_free(s.a); + if( pRc ) *pRc = SQLITE_OK; + return pOut; + }else{ + s.nErr++; + rc = SQLITE_ERROR; + } + } +parse_json_err: + if( pRc ) *pRc = rc; + sqlite3_free(s.a); + return 0; +} + +/* +** Given a function parameter, try to interpret it as a polygon, either +** in the binary format or JSON text. Compute a GeoPoly object and +** return a pointer to that object. Or if the input is not a well-formed +** polygon, put an error message in sqlite3_context and return NULL. +*/ +static GeoPoly *geopolyFuncParam( + sqlite3_context *pCtx, /* Context for error messages */ + sqlite3_value *pVal, /* The value to decode */ + int *pRc /* Write error here */ +){ + GeoPoly *p = 0; + int nByte; + testcase( pCtx==0 ); + if( sqlite3_value_type(pVal)==SQLITE_BLOB + && (nByte = sqlite3_value_bytes(pVal))>=(4+6*sizeof(GeoCoord)) + ){ + const unsigned char *a = sqlite3_value_blob(pVal); + int nVertex; + if( a==0 ){ + if( pCtx ) sqlite3_result_error_nomem(pCtx); + return 0; + } + nVertex = (a[1]<<16) + (a[2]<<8) + a[3]; + if( (a[0]==0 || a[0]==1) + && (nVertex*2*sizeof(GeoCoord) + 4)==(unsigned int)nByte + ){ + p = sqlite3_malloc64( sizeof(*p) + (nVertex-1)*2*sizeof(GeoCoord) ); + if( p==0 ){ + if( pRc ) *pRc = SQLITE_NOMEM; + if( pCtx ) sqlite3_result_error_nomem(pCtx); + }else{ + int x = 1; + p->nVertex = nVertex; + memcpy(p->hdr, a, nByte); + if( a[0] != *(unsigned char*)&x ){ + int ii; + for(ii=0; iihdr[0] ^= 1; + } + } + } + if( pRc ) *pRc = SQLITE_OK; + return p; + }else if( sqlite3_value_type(pVal)==SQLITE_TEXT ){ + const unsigned char *zJson = sqlite3_value_text(pVal); + if( zJson==0 ){ + if( pRc ) *pRc = SQLITE_NOMEM; + return 0; + } + return geopolyParseJson(zJson, pRc); + }else{ + if( pRc ) *pRc = SQLITE_ERROR; + return 0; + } +} + +/* +** Implementation of the geopoly_blob(X) function. +** +** If the input is a well-formed Geopoly BLOB or JSON string +** then return the BLOB representation of the polygon. Otherwise +** return NULL. +*/ +static void geopolyBlobFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + if( p ){ + sqlite3_result_blob(context, p->hdr, + 4+8*p->nVertex, SQLITE_TRANSIENT); + sqlite3_free(p); + } +} + +/* +** SQL function: geopoly_json(X) +** +** Interpret X as a polygon and render it as a JSON array +** of coordinates. Or, if X is not a valid polygon, return NULL. +*/ +static void geopolyJsonFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + if( p ){ + sqlite3 *db = sqlite3_context_db_handle(context); + sqlite3_str *x = sqlite3_str_new(db); + int i; + sqlite3_str_append(x, "[", 1); + for(i=0; inVertex; i++){ + sqlite3_str_appendf(x, "[%!g,%!g],", GeoX(p,i), GeoY(p,i)); + } + sqlite3_str_appendf(x, "[%!g,%!g]]", GeoX(p,0), GeoY(p,0)); + sqlite3_result_text(context, sqlite3_str_finish(x), -1, sqlite3_free); + sqlite3_free(p); + } +} + +/* +** SQL function: geopoly_svg(X, ....) +** +** Interpret X as a polygon and render it as a SVG . +** Additional arguments are added as attributes to the . +*/ +static void geopolySvgFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p; + if( argc<1 ) return; + p = geopolyFuncParam(context, argv[0], 0); + if( p ){ + sqlite3 *db = sqlite3_context_db_handle(context); + sqlite3_str *x = sqlite3_str_new(db); + int i; + char cSep = '\''; + sqlite3_str_appendf(x, ""); + sqlite3_result_text(context, sqlite3_str_finish(x), -1, sqlite3_free); + sqlite3_free(p); + } +} + +/* +** SQL Function: geopoly_xform(poly, A, B, C, D, E, F) +** +** Transform and/or translate a polygon as follows: +** +** x1 = A*x0 + B*y0 + E +** y1 = C*x0 + D*y0 + F +** +** For a translation: +** +** geopoly_xform(poly, 1, 0, 0, 1, x-offset, y-offset) +** +** Rotate by R around the point (0,0): +** +** geopoly_xform(poly, cos(R), sin(R), -sin(R), cos(R), 0, 0) +*/ +static void geopolyXformFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + double A = sqlite3_value_double(argv[1]); + double B = sqlite3_value_double(argv[2]); + double C = sqlite3_value_double(argv[3]); + double D = sqlite3_value_double(argv[4]); + double E = sqlite3_value_double(argv[5]); + double F = sqlite3_value_double(argv[6]); + GeoCoord x1, y1, x0, y0; + int ii; + if( p ){ + for(ii=0; iinVertex; ii++){ + x0 = GeoX(p,ii); + y0 = GeoY(p,ii); + x1 = (GeoCoord)(A*x0 + B*y0 + E); + y1 = (GeoCoord)(C*x0 + D*y0 + F); + GeoX(p,ii) = x1; + GeoY(p,ii) = y1; + } + sqlite3_result_blob(context, p->hdr, + 4+8*p->nVertex, SQLITE_TRANSIENT); + sqlite3_free(p); + } +} + +/* +** Compute the area enclosed by the polygon. +** +** This routine can also be used to detect polygons that rotate in +** the wrong direction. Polygons are suppose to be counter-clockwise (CCW). +** This routine returns a negative value for clockwise (CW) polygons. +*/ +static double geopolyArea(GeoPoly *p){ + double rArea = 0.0; + int ii; + for(ii=0; iinVertex-1; ii++){ + rArea += (GeoX(p,ii) - GeoX(p,ii+1)) /* (x0 - x1) */ + * (GeoY(p,ii) + GeoY(p,ii+1)) /* (y0 + y1) */ + * 0.5; + } + rArea += (GeoX(p,ii) - GeoX(p,0)) /* (xN - x0) */ + * (GeoY(p,ii) + GeoY(p,0)) /* (yN + y0) */ + * 0.5; + return rArea; +} + +/* +** Implementation of the geopoly_area(X) function. +** +** If the input is a well-formed Geopoly BLOB then return the area +** enclosed by the polygon. If the polygon circulates clockwise instead +** of counterclockwise (as it should) then return the negative of the +** enclosed area. Otherwise return NULL. +*/ +static void geopolyAreaFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + if( p ){ + sqlite3_result_double(context, geopolyArea(p)); + sqlite3_free(p); + } +} + +/* +** Implementation of the geopoly_ccw(X) function. +** +** If the rotation of polygon X is clockwise (incorrect) instead of +** counter-clockwise (the correct winding order according to RFC7946) +** then reverse the order of the vertexes in polygon X. +** +** In other words, this routine returns a CCW polygon regardless of the +** winding order of its input. +** +** Use this routine to sanitize historical inputs that that sometimes +** contain polygons that wind in the wrong direction. +*/ +static void geopolyCcwFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + if( p ){ + if( geopolyArea(p)<0.0 ){ + int ii, jj; + for(ii=1, jj=p->nVertex-1; iihdr, + 4+8*p->nVertex, SQLITE_TRANSIENT); + sqlite3_free(p); + } +} + +#define GEOPOLY_PI 3.1415926535897932385 + +/* Fast approximation for sine(X) for X between -0.5*pi and 2*pi +*/ +static double geopolySine(double r){ + assert( r>=-0.5*GEOPOLY_PI && r<=2.0*GEOPOLY_PI ); + if( r>=1.5*GEOPOLY_PI ){ + r -= 2.0*GEOPOLY_PI; + } + if( r>=0.5*GEOPOLY_PI ){ + return -geopolySine(r-GEOPOLY_PI); + }else{ + double r2 = r*r; + double r3 = r2*r; + double r5 = r3*r2; + return 0.9996949*r - 0.1656700*r3 + 0.0075134*r5; + } +} + +/* +** Function: geopoly_regular(X,Y,R,N) +** +** Construct a simple, convex, regular polygon centered at X, Y +** with circumradius R and with N sides. +*/ +static void geopolyRegularFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + double x = sqlite3_value_double(argv[0]); + double y = sqlite3_value_double(argv[1]); + double r = sqlite3_value_double(argv[2]); + int n = sqlite3_value_int(argv[3]); + int i; + GeoPoly *p; + + if( n<3 || r<=0.0 ) return; + if( n>1000 ) n = 1000; + p = sqlite3_malloc64( sizeof(*p) + (n-1)*2*sizeof(GeoCoord) ); + if( p==0 ){ + sqlite3_result_error_nomem(context); + return; + } + i = 1; + p->hdr[0] = *(unsigned char*)&i; + p->hdr[1] = 0; + p->hdr[2] = (n>>8)&0xff; + p->hdr[3] = n&0xff; + for(i=0; ihdr, 4+8*n, SQLITE_TRANSIENT); + sqlite3_free(p); +} + +/* +** If pPoly is a polygon, compute its bounding box. Then: +** +** (1) if aCoord!=0 store the bounding box in aCoord, returning NULL +** (2) otherwise, compute a GeoPoly for the bounding box and return the +** new GeoPoly +** +** If pPoly is NULL but aCoord is not NULL, then compute a new GeoPoly from +** the bounding box in aCoord and return a pointer to that GeoPoly. +*/ +static GeoPoly *geopolyBBox( + sqlite3_context *context, /* For recording the error */ + sqlite3_value *pPoly, /* The polygon */ + RtreeCoord *aCoord, /* Results here */ + int *pRc /* Error code here */ +){ + GeoPoly *pOut = 0; + GeoPoly *p; + float mnX, mxX, mnY, mxY; + if( pPoly==0 && aCoord!=0 ){ + p = 0; + mnX = aCoord[0].f; + mxX = aCoord[1].f; + mnY = aCoord[2].f; + mxY = aCoord[3].f; + goto geopolyBboxFill; + }else{ + p = geopolyFuncParam(context, pPoly, pRc); + } + if( p ){ + int ii; + mnX = mxX = GeoX(p,0); + mnY = mxY = GeoY(p,0); + for(ii=1; iinVertex; ii++){ + double r = GeoX(p,ii); + if( rmxX ) mxX = (float)r; + r = GeoY(p,ii); + if( rmxY ) mxY = (float)r; + } + if( pRc ) *pRc = SQLITE_OK; + if( aCoord==0 ){ + geopolyBboxFill: + pOut = sqlite3_realloc64(p, GEOPOLY_SZ(4)); + if( pOut==0 ){ + sqlite3_free(p); + if( context ) sqlite3_result_error_nomem(context); + if( pRc ) *pRc = SQLITE_NOMEM; + return 0; + } + pOut->nVertex = 4; + ii = 1; + pOut->hdr[0] = *(unsigned char*)ⅈ + pOut->hdr[1] = 0; + pOut->hdr[2] = 0; + pOut->hdr[3] = 4; + GeoX(pOut,0) = mnX; + GeoY(pOut,0) = mnY; + GeoX(pOut,1) = mxX; + GeoY(pOut,1) = mnY; + GeoX(pOut,2) = mxX; + GeoY(pOut,2) = mxY; + GeoX(pOut,3) = mnX; + GeoY(pOut,3) = mxY; + }else{ + sqlite3_free(p); + aCoord[0].f = mnX; + aCoord[1].f = mxX; + aCoord[2].f = mnY; + aCoord[3].f = mxY; + } + }else if( aCoord ){ + memset(aCoord, 0, sizeof(RtreeCoord)*4); + } + return pOut; +} + +/* +** Implementation of the geopoly_bbox(X) SQL function. +*/ +static void geopolyBBoxFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p = geopolyBBox(context, argv[0], 0, 0); + if( p ){ + sqlite3_result_blob(context, p->hdr, + 4+8*p->nVertex, SQLITE_TRANSIENT); + sqlite3_free(p); + } +} + +/* +** State vector for the geopoly_group_bbox() aggregate function. +*/ +typedef struct GeoBBox GeoBBox; +struct GeoBBox { + int isInit; + RtreeCoord a[4]; +}; + + +/* +** Implementation of the geopoly_group_bbox(X) aggregate SQL function. +*/ +static void geopolyBBoxStep( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + RtreeCoord a[4]; + int rc = SQLITE_OK; + (void)geopolyBBox(context, argv[0], a, &rc); + if( rc==SQLITE_OK ){ + GeoBBox *pBBox; + pBBox = (GeoBBox*)sqlite3_aggregate_context(context, sizeof(*pBBox)); + if( pBBox==0 ) return; + if( pBBox->isInit==0 ){ + pBBox->isInit = 1; + memcpy(pBBox->a, a, sizeof(RtreeCoord)*4); + }else{ + if( a[0].f < pBBox->a[0].f ) pBBox->a[0] = a[0]; + if( a[1].f > pBBox->a[1].f ) pBBox->a[1] = a[1]; + if( a[2].f < pBBox->a[2].f ) pBBox->a[2] = a[2]; + if( a[3].f > pBBox->a[3].f ) pBBox->a[3] = a[3]; + } + } +} +static void geopolyBBoxFinal( + sqlite3_context *context +){ + GeoPoly *p; + GeoBBox *pBBox; + pBBox = (GeoBBox*)sqlite3_aggregate_context(context, 0); + if( pBBox==0 ) return; + p = geopolyBBox(context, 0, pBBox->a, 0); + if( p ){ + sqlite3_result_blob(context, p->hdr, + 4+8*p->nVertex, SQLITE_TRANSIENT); + sqlite3_free(p); + } +} + + +/* +** Determine if point (x0,y0) is beneath line segment (x1,y1)->(x2,y2). +** Returns: +** +** +2 x0,y0 is on the line segement +** +** +1 x0,y0 is beneath line segment +** +** 0 x0,y0 is not on or beneath the line segment or the line segment +** is vertical and x0,y0 is not on the line segment +** +** The left-most coordinate min(x1,x2) is not considered to be part of +** the line segment for the purposes of this analysis. +*/ +static int pointBeneathLine( + double x0, double y0, + double x1, double y1, + double x2, double y2 +){ + double y; + if( x0==x1 && y0==y1 ) return 2; + if( x1x2 ) return 0; + }else if( x1>x2 ){ + if( x0<=x2 || x0>x1 ) return 0; + }else{ + /* Vertical line segment */ + if( x0!=x1 ) return 0; + if( y0y1 && y0>y2 ) return 0; + return 2; + } + y = y1 + (y2-y1)*(x0-x1)/(x2-x1); + if( y0==y ) return 2; + if( y0nVertex-1; ii++){ + v = pointBeneathLine(x0,y0,GeoX(p1,ii), GeoY(p1,ii), + GeoX(p1,ii+1),GeoY(p1,ii+1)); + if( v==2 ) break; + cnt += v; + } + if( v!=2 ){ + v = pointBeneathLine(x0,y0,GeoX(p1,ii), GeoY(p1,ii), + GeoX(p1,0), GeoY(p1,0)); + } + if( v==2 ){ + sqlite3_result_int(context, 1); + }else if( ((v+cnt)&1)==0 ){ + sqlite3_result_int(context, 0); + }else{ + sqlite3_result_int(context, 2); + } + sqlite3_free(p1); +} + +/* Forward declaration */ +static int geopolyOverlap(GeoPoly *p1, GeoPoly *p2); + +/* +** SQL function: geopoly_within(P1,P2) +** +** Return +2 if P1 and P2 are the same polygon +** Return +1 if P2 is contained within P1 +** Return 0 if any part of P2 is on the outside of P1 +** +*/ +static void geopolyWithinFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0); + GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0); + if( p1 && p2 ){ + int x = geopolyOverlap(p1, p2); + if( x<0 ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_int(context, x==2 ? 1 : x==4 ? 2 : 0); + } + } + sqlite3_free(p1); + sqlite3_free(p2); +} + +/* Objects used by the overlap algorihm. */ +typedef struct GeoEvent GeoEvent; +typedef struct GeoSegment GeoSegment; +typedef struct GeoOverlap GeoOverlap; +struct GeoEvent { + double x; /* X coordinate at which event occurs */ + int eType; /* 0 for ADD, 1 for REMOVE */ + GeoSegment *pSeg; /* The segment to be added or removed */ + GeoEvent *pNext; /* Next event in the sorted list */ +}; +struct GeoSegment { + double C, B; /* y = C*x + B */ + double y; /* Current y value */ + float y0; /* Initial y value */ + unsigned char side; /* 1 for p1, 2 for p2 */ + unsigned int idx; /* Which segment within the side */ + GeoSegment *pNext; /* Next segment in a list sorted by y */ +}; +struct GeoOverlap { + GeoEvent *aEvent; /* Array of all events */ + GeoSegment *aSegment; /* Array of all segments */ + int nEvent; /* Number of events */ + int nSegment; /* Number of segments */ +}; + +/* +** Add a single segment and its associated events. +*/ +static void geopolyAddOneSegment( + GeoOverlap *p, + GeoCoord x0, + GeoCoord y0, + GeoCoord x1, + GeoCoord y1, + unsigned char side, + unsigned int idx +){ + GeoSegment *pSeg; + GeoEvent *pEvent; + if( x0==x1 ) return; /* Ignore vertical segments */ + if( x0>x1 ){ + GeoCoord t = x0; + x0 = x1; + x1 = t; + t = y0; + y0 = y1; + y1 = t; + } + pSeg = p->aSegment + p->nSegment; + p->nSegment++; + pSeg->C = (y1-y0)/(x1-x0); + pSeg->B = y1 - x1*pSeg->C; + pSeg->y0 = y0; + pSeg->side = side; + pSeg->idx = idx; + pEvent = p->aEvent + p->nEvent; + p->nEvent++; + pEvent->x = x0; + pEvent->eType = 0; + pEvent->pSeg = pSeg; + pEvent = p->aEvent + p->nEvent; + p->nEvent++; + pEvent->x = x1; + pEvent->eType = 1; + pEvent->pSeg = pSeg; +} + + + +/* +** Insert all segments and events for polygon pPoly. +*/ +static void geopolyAddSegments( + GeoOverlap *p, /* Add segments to this Overlap object */ + GeoPoly *pPoly, /* Take all segments from this polygon */ + unsigned char side /* The side of pPoly */ +){ + unsigned int i; + GeoCoord *x; + for(i=0; i<(unsigned)pPoly->nVertex-1; i++){ + x = &GeoX(pPoly,i); + geopolyAddOneSegment(p, x[0], x[1], x[2], x[3], side, i); + } + x = &GeoX(pPoly,i); + geopolyAddOneSegment(p, x[0], x[1], pPoly->a[0], pPoly->a[1], side, i); +} + +/* +** Merge two lists of sorted events by X coordinate +*/ +static GeoEvent *geopolyEventMerge(GeoEvent *pLeft, GeoEvent *pRight){ + GeoEvent head, *pLast; + head.pNext = 0; + pLast = &head; + while( pRight && pLeft ){ + if( pRight->x <= pLeft->x ){ + pLast->pNext = pRight; + pLast = pRight; + pRight = pRight->pNext; + }else{ + pLast->pNext = pLeft; + pLast = pLeft; + pLeft = pLeft->pNext; + } + } + pLast->pNext = pRight ? pRight : pLeft; + return head.pNext; +} + +/* +** Sort an array of nEvent event objects into a list. +*/ +static GeoEvent *geopolySortEventsByX(GeoEvent *aEvent, int nEvent){ + int mx = 0; + int i, j; + GeoEvent *p; + GeoEvent *a[50]; + for(i=0; ipNext = 0; + for(j=0; j=mx ) mx = j+1; + } + p = 0; + for(i=0; iy - pLeft->y; + if( r==0.0 ) r = pRight->C - pLeft->C; + if( r<0.0 ){ + pLast->pNext = pRight; + pLast = pRight; + pRight = pRight->pNext; + }else{ + pLast->pNext = pLeft; + pLast = pLeft; + pLeft = pLeft->pNext; + } + } + pLast->pNext = pRight ? pRight : pLeft; + return head.pNext; +} + +/* +** Sort a list of GeoSegments in order of increasing Y and in the event of +** a tie, increasing C (slope). +*/ +static GeoSegment *geopolySortSegmentsByYAndC(GeoSegment *pList){ + int mx = 0; + int i; + GeoSegment *p; + GeoSegment *a[50]; + while( pList ){ + p = pList; + pList = pList->pNext; + p->pNext = 0; + for(i=0; i=mx ) mx = i+1; + } + p = 0; + for(i=0; inVertex + p2->nVertex + 2; + GeoOverlap *p; + sqlite3_int64 nByte; + GeoEvent *pThisEvent; + double rX; + int rc = 0; + int needSort = 0; + GeoSegment *pActive = 0; + GeoSegment *pSeg; + unsigned char aOverlap[4]; + + nByte = sizeof(GeoEvent)*nVertex*2 + + sizeof(GeoSegment)*nVertex + + sizeof(GeoOverlap); + p = sqlite3_malloc64( nByte ); + if( p==0 ) return -1; + p->aEvent = (GeoEvent*)&p[1]; + p->aSegment = (GeoSegment*)&p->aEvent[nVertex*2]; + p->nEvent = p->nSegment = 0; + geopolyAddSegments(p, p1, 1); + geopolyAddSegments(p, p2, 2); + pThisEvent = geopolySortEventsByX(p->aEvent, p->nEvent); + rX = pThisEvent && pThisEvent->x==0.0 ? -1.0 : 0.0; + memset(aOverlap, 0, sizeof(aOverlap)); + while( pThisEvent ){ + if( pThisEvent->x!=rX ){ + GeoSegment *pPrev = 0; + int iMask = 0; + GEODEBUG(("Distinct X: %g\n", pThisEvent->x)); + rX = pThisEvent->x; + if( needSort ){ + GEODEBUG(("SORT\n")); + pActive = geopolySortSegmentsByYAndC(pActive); + needSort = 0; + } + for(pSeg=pActive; pSeg; pSeg=pSeg->pNext){ + if( pPrev ){ + if( pPrev->y!=pSeg->y ){ + GEODEBUG(("MASK: %d\n", iMask)); + aOverlap[iMask] = 1; + } + } + iMask ^= pSeg->side; + pPrev = pSeg; + } + pPrev = 0; + for(pSeg=pActive; pSeg; pSeg=pSeg->pNext){ + double y = pSeg->C*rX + pSeg->B; + GEODEBUG(("Segment %d.%d %g->%g\n", pSeg->side, pSeg->idx, pSeg->y, y)); + pSeg->y = y; + if( pPrev ){ + if( pPrev->y>pSeg->y && pPrev->side!=pSeg->side ){ + rc = 1; + GEODEBUG(("Crossing: %d.%d and %d.%d\n", + pPrev->side, pPrev->idx, + pSeg->side, pSeg->idx)); + goto geopolyOverlapDone; + }else if( pPrev->y!=pSeg->y ){ + GEODEBUG(("MASK: %d\n", iMask)); + aOverlap[iMask] = 1; + } + } + iMask ^= pSeg->side; + pPrev = pSeg; + } + } + GEODEBUG(("%s %d.%d C=%g B=%g\n", + pThisEvent->eType ? "RM " : "ADD", + pThisEvent->pSeg->side, pThisEvent->pSeg->idx, + pThisEvent->pSeg->C, + pThisEvent->pSeg->B)); + if( pThisEvent->eType==0 ){ + /* Add a segment */ + pSeg = pThisEvent->pSeg; + pSeg->y = pSeg->y0; + pSeg->pNext = pActive; + pActive = pSeg; + needSort = 1; + }else{ + /* Remove a segment */ + if( pActive==pThisEvent->pSeg ){ + pActive = ALWAYS(pActive) ? pActive->pNext : 0; + }else{ + for(pSeg=pActive; pSeg; pSeg=pSeg->pNext){ + if( pSeg->pNext==pThisEvent->pSeg ){ + pSeg->pNext = ALWAYS(pSeg->pNext) ? pSeg->pNext->pNext : 0; + break; + } + } + } + } + pThisEvent = pThisEvent->pNext; + } + if( aOverlap[3]==0 ){ + rc = 0; + }else if( aOverlap[1]!=0 && aOverlap[2]==0 ){ + rc = 3; + }else if( aOverlap[1]==0 && aOverlap[2]!=0 ){ + rc = 2; + }else if( aOverlap[1]==0 && aOverlap[2]==0 ){ + rc = 4; + }else{ + rc = 1; + } + +geopolyOverlapDone: + sqlite3_free(p); + return rc; +} + +/* +** SQL function: geopoly_overlap(P1,P2) +** +** Determine whether or not P1 and P2 overlap. Return value: +** +** 0 The two polygons are disjoint +** 1 They overlap +** 2 P1 is completely contained within P2 +** 3 P2 is completely contained within P1 +** 4 P1 and P2 are the same polygon +** NULL Either P1 or P2 or both are not valid polygons +*/ +static void geopolyOverlapFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0); + GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0); + if( p1 && p2 ){ + int x = geopolyOverlap(p1, p2); + if( x<0 ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_int(context, x); + } + } + sqlite3_free(p1); + sqlite3_free(p2); +} + +/* +** Enable or disable debugging output +*/ +static void geopolyDebugFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ +#ifdef GEOPOLY_ENABLE_DEBUG + geo_debug = sqlite3_value_int(argv[0]); +#endif +} + +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the geopoly virtual table. +** +** argv[0] -> module name +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> column names... +*/ +static int geopolyInit( + sqlite3 *db, /* Database connection */ + void *pAux, /* One of the RTREE_COORD_* constants */ + int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ + sqlite3_vtab **ppVtab, /* OUT: New virtual table */ + char **pzErr, /* OUT: Error message, if any */ + int isCreate /* True for xCreate, false for xConnect */ +){ + int rc = SQLITE_OK; + Rtree *pRtree; + sqlite3_int64 nDb; /* Length of string argv[1] */ + sqlite3_int64 nName; /* Length of string argv[2] */ + sqlite3_str *pSql; + char *zSql; + int ii; + + sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); + + /* Allocate the sqlite3_vtab structure */ + nDb = strlen(argv[1]); + nName = strlen(argv[2]); + pRtree = (Rtree *)sqlite3_malloc64(sizeof(Rtree)+nDb+nName+2); + if( !pRtree ){ + return SQLITE_NOMEM; + } + memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); + pRtree->nBusy = 1; + pRtree->base.pModule = &rtreeModule; + pRtree->zDb = (char *)&pRtree[1]; + pRtree->zName = &pRtree->zDb[nDb+1]; + pRtree->eCoordType = RTREE_COORD_REAL32; + pRtree->nDim = 2; + pRtree->nDim2 = 4; + memcpy(pRtree->zDb, argv[1], nDb); + memcpy(pRtree->zName, argv[2], nName); + + + /* Create/Connect to the underlying relational database schema. If + ** that is successful, call sqlite3_declare_vtab() to configure + ** the r-tree table schema. + */ + pSql = sqlite3_str_new(db); + sqlite3_str_appendf(pSql, "CREATE TABLE x(_shape"); + pRtree->nAux = 1; /* Add one for _shape */ + pRtree->nAuxNotNull = 1; /* The _shape column is always not-null */ + for(ii=3; iinAux++; + sqlite3_str_appendf(pSql, ",%s", argv[ii]); + } + sqlite3_str_appendf(pSql, ");"); + zSql = sqlite3_str_finish(pSql); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else if( SQLITE_OK!=(rc = sqlite3_declare_vtab(db, zSql)) ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } + sqlite3_free(zSql); + if( rc ) goto geopolyInit_fail; + pRtree->nBytesPerCell = 8 + pRtree->nDim2*4; + + /* Figure out the node size to use. */ + rc = getNodeSize(db, pRtree, isCreate, pzErr); + if( rc ) goto geopolyInit_fail; + rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate); + if( rc ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + goto geopolyInit_fail; + } + + *ppVtab = (sqlite3_vtab *)pRtree; + return SQLITE_OK; + +geopolyInit_fail: + if( rc==SQLITE_OK ) rc = SQLITE_ERROR; + assert( *ppVtab==0 ); + assert( pRtree->nBusy==1 ); + rtreeRelease(pRtree); + return rc; +} + + +/* +** GEOPOLY virtual table module xCreate method. +*/ +static int geopolyCreate( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return geopolyInit(db, pAux, argc, argv, ppVtab, pzErr, 1); +} + +/* +** GEOPOLY virtual table module xConnect method. +*/ +static int geopolyConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return geopolyInit(db, pAux, argc, argv, ppVtab, pzErr, 0); +} + + +/* +** GEOPOLY virtual table module xFilter method. +** +** Query plans: +** +** 1 rowid lookup +** 2 search for objects overlapping the same bounding box +** that contains polygon argv[0] +** 3 search for objects overlapping the same bounding box +** that contains polygon argv[0] +** 4 full table scan +*/ +static int geopolyFilter( + sqlite3_vtab_cursor *pVtabCursor, /* The cursor to initialize */ + int idxNum, /* Query plan */ + const char *idxStr, /* Not Used */ + int argc, sqlite3_value **argv /* Parameters to the query plan */ +){ + Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeNode *pRoot = 0; + int rc = SQLITE_OK; + int iCell = 0; + + rtreeReference(pRtree); + + /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ + resetCursor(pCsr); + + pCsr->iStrategy = idxNum; + if( idxNum==1 ){ + /* Special case - lookup by rowid. */ + RtreeNode *pLeaf; /* Leaf on which the required cell resides */ + RtreeSearchPoint *p; /* Search point for the leaf */ + i64 iRowid = sqlite3_value_int64(argv[0]); + i64 iNode = 0; + rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); + if( rc==SQLITE_OK && pLeaf!=0 ){ + p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); + assert( p!=0 ); /* Always returns pCsr->sPoint */ + pCsr->aNode[0] = pLeaf; + p->id = iNode; + p->eWithin = PARTLY_WITHIN; + rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); + p->iCell = (u8)iCell; + RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); + }else{ + pCsr->atEOF = 1; + } + }else{ + /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array + ** with the configured constraints. + */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); + if( rc==SQLITE_OK && idxNum<=3 ){ + RtreeCoord bbox[4]; + RtreeConstraint *p; + assert( argc==1 ); + assert( argv[0]!=0 ); + geopolyBBox(0, argv[0], bbox, &rc); + if( rc ){ + goto geopoly_filter_end; + } + pCsr->aConstraint = p = sqlite3_malloc(sizeof(RtreeConstraint)*4); + pCsr->nConstraint = 4; + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*4); + memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); + if( idxNum==2 ){ + /* Overlap query */ + p->op = 'B'; + p->iCoord = 0; + p->u.rValue = bbox[1].f; + p++; + p->op = 'D'; + p->iCoord = 1; + p->u.rValue = bbox[0].f; + p++; + p->op = 'B'; + p->iCoord = 2; + p->u.rValue = bbox[3].f; + p++; + p->op = 'D'; + p->iCoord = 3; + p->u.rValue = bbox[2].f; + }else{ + /* Within query */ + p->op = 'D'; + p->iCoord = 0; + p->u.rValue = bbox[0].f; + p++; + p->op = 'B'; + p->iCoord = 1; + p->u.rValue = bbox[1].f; + p++; + p->op = 'D'; + p->iCoord = 2; + p->u.rValue = bbox[2].f; + p++; + p->op = 'B'; + p->iCoord = 3; + p->u.rValue = bbox[3].f; + } + } + } + if( rc==SQLITE_OK ){ + RtreeSearchPoint *pNew; + pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, (u8)(pRtree->iDepth+1)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + goto geopoly_filter_end; + } + pNew->id = 1; + pNew->iCell = 0; + pNew->eWithin = PARTLY_WITHIN; + assert( pCsr->bPoint==1 ); + pCsr->aNode[0] = pRoot; + pRoot = 0; + RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); + rc = rtreeStepToLeaf(pCsr); + } + } + +geopoly_filter_end: + nodeRelease(pRtree, pRoot); + rtreeRelease(pRtree); + return rc; +} + +/* +** Rtree virtual table module xBestIndex method. There are three +** table scan strategies to choose from (in order from most to +** least desirable): +** +** idxNum idxStr Strategy +** ------------------------------------------------ +** 1 "rowid" Direct lookup by rowid. +** 2 "rtree" R-tree overlap query using geopoly_overlap() +** 3 "rtree" R-tree within query using geopoly_within() +** 4 "fullscan" full-table scan. +** ------------------------------------------------ +*/ +static int geopolyBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + int ii; + int iRowidTerm = -1; + int iFuncTerm = -1; + int idxNum = 0; + + for(ii=0; iinConstraint; ii++){ + struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; + if( !p->usable ) continue; + if( p->iColumn<0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ ){ + iRowidTerm = ii; + break; + } + if( p->iColumn==0 && p->op>=SQLITE_INDEX_CONSTRAINT_FUNCTION ){ + /* p->op==SQLITE_INDEX_CONSTRAINT_FUNCTION for geopoly_overlap() + ** p->op==(SQLITE_INDEX_CONTRAINT_FUNCTION+1) for geopoly_within(). + ** See geopolyFindFunction() */ + iFuncTerm = ii; + idxNum = p->op - SQLITE_INDEX_CONSTRAINT_FUNCTION + 2; + } + } + + if( iRowidTerm>=0 ){ + pIdxInfo->idxNum = 1; + pIdxInfo->idxStr = "rowid"; + pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1; + pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1; + pIdxInfo->estimatedCost = 30.0; + pIdxInfo->estimatedRows = 1; + pIdxInfo->idxFlags = SQLITE_INDEX_SCAN_UNIQUE; + return SQLITE_OK; + } + if( iFuncTerm>=0 ){ + pIdxInfo->idxNum = idxNum; + pIdxInfo->idxStr = "rtree"; + pIdxInfo->aConstraintUsage[iFuncTerm].argvIndex = 1; + pIdxInfo->aConstraintUsage[iFuncTerm].omit = 0; + pIdxInfo->estimatedCost = 300.0; + pIdxInfo->estimatedRows = 10; + return SQLITE_OK; + } + pIdxInfo->idxNum = 4; + pIdxInfo->idxStr = "fullscan"; + pIdxInfo->estimatedCost = 3000000.0; + pIdxInfo->estimatedRows = 100000; + return SQLITE_OK; +} + + +/* +** GEOPOLY virtual table module xColumn method. +*/ +static int geopolyColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + Rtree *pRtree = (Rtree *)cur->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)cur; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + + if( rc ) return rc; + if( p==0 ) return SQLITE_OK; + if( i==0 && sqlite3_vtab_nochange(ctx) ) return SQLITE_OK; + if( i<=pRtree->nAux ){ + if( !pCsr->bAuxValid ){ + if( pCsr->pReadAux==0 ){ + rc = sqlite3_prepare_v3(pRtree->db, pRtree->zReadAuxSql, -1, 0, + &pCsr->pReadAux, 0); + if( rc ) return rc; + } + sqlite3_bind_int64(pCsr->pReadAux, 1, + nodeGetRowid(pRtree, pNode, p->iCell)); + rc = sqlite3_step(pCsr->pReadAux); + if( rc==SQLITE_ROW ){ + pCsr->bAuxValid = 1; + }else{ + sqlite3_reset(pCsr->pReadAux); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + return rc; + } + } + sqlite3_result_value(ctx, sqlite3_column_value(pCsr->pReadAux, i+2)); + } + return SQLITE_OK; +} + + +/* +** The xUpdate method for GEOPOLY module virtual tables. +** +** For DELETE: +** +** argv[0] = the rowid to be deleted +** +** For INSERT: +** +** argv[0] = SQL NULL +** argv[1] = rowid to insert, or an SQL NULL to select automatically +** argv[2] = _shape column +** argv[3] = first application-defined column.... +** +** For UPDATE: +** +** argv[0] = rowid to modify. Never NULL +** argv[1] = rowid after the change. Never NULL +** argv[2] = new value for _shape +** argv[3] = new value for first application-defined column.... +*/ +static int geopolyUpdate( + sqlite3_vtab *pVtab, + int nData, + sqlite3_value **aData, + sqlite_int64 *pRowid +){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_OK; + RtreeCell cell; /* New cell to insert if nData>1 */ + i64 oldRowid; /* The old rowid */ + int oldRowidValid; /* True if oldRowid is valid */ + i64 newRowid; /* The new rowid */ + int newRowidValid; /* True if newRowid is valid */ + int coordChange = 0; /* Change in coordinates */ + + if( pRtree->nNodeRef ){ + /* Unable to write to the btree while another cursor is reading from it, + ** since the write might do a rebalance which would disrupt the read + ** cursor. */ + return SQLITE_LOCKED_VTAB; + } + rtreeReference(pRtree); + assert(nData>=1); + + oldRowidValid = sqlite3_value_type(aData[0])!=SQLITE_NULL;; + oldRowid = oldRowidValid ? sqlite3_value_int64(aData[0]) : 0; + newRowidValid = nData>1 && sqlite3_value_type(aData[1])!=SQLITE_NULL; + newRowid = newRowidValid ? sqlite3_value_int64(aData[1]) : 0; + cell.iRowid = newRowid; + + if( nData>1 /* not a DELETE */ + && (!oldRowidValid /* INSERT */ + || !sqlite3_value_nochange(aData[2]) /* UPDATE _shape */ + || oldRowid!=newRowid) /* Rowid change */ + ){ + assert( aData[2]!=0 ); + geopolyBBox(0, aData[2], cell.aCoord, &rc); + if( rc ){ + if( rc==SQLITE_ERROR ){ + pVtab->zErrMsg = + sqlite3_mprintf("_shape does not contain a valid polygon"); + } + goto geopoly_update_end; + } + coordChange = 1; + + /* If a rowid value was supplied, check if it is already present in + ** the table. If so, the constraint has failed. */ + if( newRowidValid && (!oldRowidValid || oldRowid!=newRowid) ){ + int steprc; + sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); + steprc = sqlite3_step(pRtree->pReadRowid); + rc = sqlite3_reset(pRtree->pReadRowid); + if( SQLITE_ROW==steprc ){ + if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ + rc = rtreeDeleteRowid(pRtree, cell.iRowid); + }else{ + rc = rtreeConstraintError(pRtree, 0); + } + } + } + } + + /* If aData[0] is not an SQL NULL value, it is the rowid of a + ** record to delete from the r-tree table. The following block does + ** just that. + */ + if( rc==SQLITE_OK && (nData==1 || (coordChange && oldRowidValid)) ){ + rc = rtreeDeleteRowid(pRtree, oldRowid); + } + + /* If the aData[] array contains more than one element, elements + ** (aData[2]..aData[argc-1]) contain a new record to insert into + ** the r-tree structure. + */ + if( rc==SQLITE_OK && nData>1 && coordChange ){ + /* Insert the new record into the r-tree */ + RtreeNode *pLeaf = 0; + if( !newRowidValid ){ + rc = rtreeNewRowid(pRtree, &cell.iRowid); + } + *pRowid = cell.iRowid; + if( rc==SQLITE_OK ){ + rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); + } + if( rc==SQLITE_OK ){ + int rc2; + pRtree->iReinsertHeight = -1; + rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + + /* Change the data */ + if( rc==SQLITE_OK && nData>1 ){ + sqlite3_stmt *pUp = pRtree->pWriteAux; + int jj; + int nChange = 0; + sqlite3_bind_int64(pUp, 1, cell.iRowid); + assert( pRtree->nAux>=1 ); + if( sqlite3_value_nochange(aData[2]) ){ + sqlite3_bind_null(pUp, 2); + }else{ + GeoPoly *p = 0; + if( sqlite3_value_type(aData[2])==SQLITE_TEXT + && (p = geopolyFuncParam(0, aData[2], &rc))!=0 + && rc==SQLITE_OK + ){ + sqlite3_bind_blob(pUp, 2, p->hdr, 4+8*p->nVertex, SQLITE_TRANSIENT); + }else{ + sqlite3_bind_value(pUp, 2, aData[2]); + } + sqlite3_free(p); + nChange = 1; + } + for(jj=1; jjnAux; jj++){ + nChange++; + sqlite3_bind_value(pUp, jj+2, aData[jj+2]); + } + if( nChange ){ + sqlite3_step(pUp); + rc = sqlite3_reset(pUp); + } + } + +geopoly_update_end: + rtreeRelease(pRtree); + return rc; +} + +/* +** Report that geopoly_overlap() is an overloaded function suitable +** for use in xBestIndex. +*/ +static int geopolyFindFunction( + sqlite3_vtab *pVtab, + int nArg, + const char *zName, + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), + void **ppArg +){ + if( sqlite3_stricmp(zName, "geopoly_overlap")==0 ){ + *pxFunc = geopolyOverlapFunc; + *ppArg = 0; + return SQLITE_INDEX_CONSTRAINT_FUNCTION; + } + if( sqlite3_stricmp(zName, "geopoly_within")==0 ){ + *pxFunc = geopolyWithinFunc; + *ppArg = 0; + return SQLITE_INDEX_CONSTRAINT_FUNCTION+1; + } + return 0; +} + + +static sqlite3_module geopolyModule = { + 3, /* iVersion */ + geopolyCreate, /* xCreate - create a table */ + geopolyConnect, /* xConnect - connect to an existing table */ + geopolyBestIndex, /* xBestIndex - Determine search strategy */ + rtreeDisconnect, /* xDisconnect - Disconnect from a table */ + rtreeDestroy, /* xDestroy - Drop a table */ + rtreeOpen, /* xOpen - open a cursor */ + rtreeClose, /* xClose - close a cursor */ + geopolyFilter, /* xFilter - configure scan constraints */ + rtreeNext, /* xNext - advance a cursor */ + rtreeEof, /* xEof */ + geopolyColumn, /* xColumn - read data */ + rtreeRowid, /* xRowid - read data */ + geopolyUpdate, /* xUpdate - write data */ + rtreeBeginTransaction, /* xBegin - begin transaction */ + rtreeEndTransaction, /* xSync - sync transaction */ + rtreeEndTransaction, /* xCommit - commit transaction */ + rtreeEndTransaction, /* xRollback - rollback transaction */ + geopolyFindFunction, /* xFindFunction - function overloading */ + rtreeRename, /* xRename - rename the table */ + rtreeSavepoint, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + rtreeShadowName /* xShadowName */ +}; + +static int sqlite3_geopoly_init(sqlite3 *db){ + int rc = SQLITE_OK; + static const struct { + void (*xFunc)(sqlite3_context*,int,sqlite3_value**); + signed char nArg; + unsigned char bPure; + const char *zName; + } aFunc[] = { + { geopolyAreaFunc, 1, 1, "geopoly_area" }, + { geopolyBlobFunc, 1, 1, "geopoly_blob" }, + { geopolyJsonFunc, 1, 1, "geopoly_json" }, + { geopolySvgFunc, -1, 1, "geopoly_svg" }, + { geopolyWithinFunc, 2, 1, "geopoly_within" }, + { geopolyContainsPointFunc, 3, 1, "geopoly_contains_point" }, + { geopolyOverlapFunc, 2, 1, "geopoly_overlap" }, + { geopolyDebugFunc, 1, 0, "geopoly_debug" }, + { geopolyBBoxFunc, 1, 1, "geopoly_bbox" }, + { geopolyXformFunc, 7, 1, "geopoly_xform" }, + { geopolyRegularFunc, 4, 1, "geopoly_regular" }, + { geopolyCcwFunc, 1, 1, "geopoly_ccw" }, + }; + static const struct { + void (*xStep)(sqlite3_context*,int,sqlite3_value**); + void (*xFinal)(sqlite3_context*); + const char *zName; + } aAgg[] = { + { geopolyBBoxStep, geopolyBBoxFinal, "geopoly_group_bbox" }, + }; + int i; + for(i=0; ixDestructor ) pInfo->xDestructor(pInfo->pContext); + sqlite3_free(p); +} + +/* +** This routine frees the BLOB that is returned by geomCallback(). +*/ +static void rtreeMatchArgFree(void *pArg){ + int i; + RtreeMatchArg *p = (RtreeMatchArg*)pArg; + for(i=0; inParam; i++){ + sqlite3_value_free(p->apSqlParam[i]); + } + sqlite3_free(p); +} + +/* +** Each call to sqlite3_rtree_geometry_callback() or +** sqlite3_rtree_query_callback() creates an ordinary SQLite +** scalar function that is implemented by this routine. +** +** All this function does is construct an RtreeMatchArg object that +** contains the geometry-checking callback routines and a list of +** parameters to this function, then return that RtreeMatchArg object +** as a BLOB. +** +** The R-Tree MATCH operator will read the returned BLOB, deserialize +** the RtreeMatchArg object, and use the RtreeMatchArg object to figure +** out which elements of the R-Tree should be returned by the query. +*/ +static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ + RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); + RtreeMatchArg *pBlob; + sqlite3_int64 nBlob; + int memErr = 0; + + nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue) + + nArg*sizeof(sqlite3_value*); + pBlob = (RtreeMatchArg *)sqlite3_malloc64(nBlob); + if( !pBlob ){ + sqlite3_result_error_nomem(ctx); + }else{ + int i; + pBlob->iSize = nBlob; + pBlob->cb = pGeomCtx[0]; + pBlob->apSqlParam = (sqlite3_value**)&pBlob->aParam[nArg]; + pBlob->nParam = nArg; + for(i=0; iapSqlParam[i] = sqlite3_value_dup(aArg[i]); + if( pBlob->apSqlParam[i]==0 ) memErr = 1; +#ifdef SQLITE_RTREE_INT_ONLY + pBlob->aParam[i] = sqlite3_value_int64(aArg[i]); +#else + pBlob->aParam[i] = sqlite3_value_double(aArg[i]); +#endif + } + if( memErr ){ + sqlite3_result_error_nomem(ctx); + rtreeMatchArgFree(pBlob); + }else{ + sqlite3_result_pointer(ctx, pBlob, "RtreeMatchArg", rtreeMatchArgFree); + } + } +} + +/* +** Register a new geometry function for use with the r-tree MATCH operator. +*/ +SQLITE_API int sqlite3_rtree_geometry_callback( + sqlite3 *db, /* Register SQL function on this connection */ + const char *zGeom, /* Name of the new SQL function */ + int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */ + void *pContext /* Extra data associated with the callback */ +){ + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ + + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ) return SQLITE_NOMEM; + pGeomCtx->xGeom = xGeom; + pGeomCtx->xQueryFunc = 0; + pGeomCtx->xDestructor = 0; + pGeomCtx->pContext = pContext; + return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + ); +} + +/* +** Register a new 2nd-generation geometry function for use with the +** r-tree MATCH operator. +*/ +SQLITE_API int sqlite3_rtree_query_callback( + sqlite3 *db, /* Register SQL function on this connection */ + const char *zQueryFunc, /* Name of new SQL function */ + int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */ + void *pContext, /* Extra data passed into the callback */ + void (*xDestructor)(void*) /* Destructor for the extra data */ +){ + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ + + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ){ + if( xDestructor ) xDestructor(pContext); + return SQLITE_NOMEM; + } + pGeomCtx->xGeom = 0; + pGeomCtx->xQueryFunc = xQueryFunc; + pGeomCtx->xDestructor = xDestructor; + pGeomCtx->pContext = pContext; + return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + ); +} + +#if !SQLITE_CORE +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int sqlite3_rtree_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3RtreeInit(db); +} +#endif + +#endif + +/************** End of rtree.c ***********************************************/ +/************** Begin file icu.c *********************************************/ +/* +** 2007 May 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ +** +** This file implements an integration between the ICU library +** ("International Components for Unicode", an open-source library +** for handling unicode data) and SQLite. The integration uses +** ICU to provide the following to SQLite: +** +** * An implementation of the SQL regexp() function (and hence REGEXP +** operator) using the ICU uregex_XX() APIs. +** +** * Implementations of the SQL scalar upper() and lower() functions +** for case mapping. +** +** * Integration of ICU and SQLite collation sequences. +** +** * An implementation of the LIKE operator that uses ICU to +** provide case-independent matching. +*/ + +#if !defined(SQLITE_CORE) \ + || defined(SQLITE_ENABLE_ICU) \ + || defined(SQLITE_ENABLE_ICU_COLLATIONS) + +/* Include ICU headers */ +#include +#include +#include +#include + +/* #include */ + +#ifndef SQLITE_CORE +/* #include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 +#else +/* #include "sqlite3.h" */ +#endif + +/* +** This function is called when an ICU function called from within +** the implementation of an SQL scalar function returns an error. +** +** The scalar function context passed as the first argument is +** loaded with an error message based on the following two args. +*/ +static void icuFunctionError( + sqlite3_context *pCtx, /* SQLite scalar function context */ + const char *zName, /* Name of ICU function that failed */ + UErrorCode e /* Error code returned by ICU function */ +){ + char zBuf[128]; + sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); + zBuf[127] = '\0'; + sqlite3_result_error(pCtx, zBuf, -1); +} + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) + +/* +** Maximum length (in bytes) of the pattern in a LIKE or GLOB +** operator. +*/ +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 +#endif + +/* +** Version of sqlite3_free() that is always a function, never a macro. +*/ +static void xFree(void *p){ + sqlite3_free(p); +} + +/* +** This lookup table is used to help decode the first byte of +** a multi-byte UTF8 character. It is copied here from SQLite source +** code file utf8.c. +*/ +static const unsigned char icuUtf8Trans1[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +}; + +#define SQLITE_ICU_READ_UTF8(zIn, c) \ + c = *(zIn++); \ + if( c>=0xc0 ){ \ + c = icuUtf8Trans1[c-0xc0]; \ + while( (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + } + +#define SQLITE_ICU_SKIP_UTF8(zIn) \ + assert( *zIn ); \ + if( *(zIn++)>=0xc0 ){ \ + while( (*zIn & 0xc0)==0x80 ){zIn++;} \ + } + + +/* +** Compare two UTF-8 strings for equality where the first string is +** a "LIKE" expression. Return true (1) if they are the same and +** false (0) if they are different. +*/ +static int icuLikeCompare( + const uint8_t *zPattern, /* LIKE pattern */ + const uint8_t *zString, /* The UTF-8 string to compare against */ + const UChar32 uEsc /* The escape character */ +){ + static const uint32_t MATCH_ONE = (uint32_t)'_'; + static const uint32_t MATCH_ALL = (uint32_t)'%'; + + int prevEscape = 0; /* True if the previous character was uEsc */ + + while( 1 ){ + + /* Read (and consume) the next character from the input pattern. */ + uint32_t uPattern; + SQLITE_ICU_READ_UTF8(zPattern, uPattern); + if( uPattern==0 ) break; + + /* There are now 4 possibilities: + ** + ** 1. uPattern is an unescaped match-all character "%", + ** 2. uPattern is an unescaped match-one character "_", + ** 3. uPattern is an unescaped escape character, or + ** 4. uPattern is to be handled as an ordinary character + */ + if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){ + /* Case 1. */ + uint8_t c; + + /* Skip any MATCH_ALL or MATCH_ONE characters that follow a + ** MATCH_ALL. For each MATCH_ONE, skip one character in the + ** test string. + */ + while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){ + if( c==MATCH_ONE ){ + if( *zString==0 ) return 0; + SQLITE_ICU_SKIP_UTF8(zString); + } + zPattern++; + } + + if( *zPattern==0 ) return 1; + + while( *zString ){ + if( icuLikeCompare(zPattern, zString, uEsc) ){ + return 1; + } + SQLITE_ICU_SKIP_UTF8(zString); + } + return 0; + + }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){ + /* Case 2. */ + if( *zString==0 ) return 0; + SQLITE_ICU_SKIP_UTF8(zString); + + }else if( uPattern==(uint32_t)uEsc && !prevEscape ){ + /* Case 3. */ + prevEscape = 1; + + }else{ + /* Case 4. */ + uint32_t uString; + SQLITE_ICU_READ_UTF8(zString, uString); + uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT); + uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT); + if( uString!=uPattern ){ + return 0; + } + prevEscape = 0; + } + } + + return *zString==0; +} + +/* +** Implementation of the like() SQL function. This function implements +** the build-in LIKE operator. The first argument to the function is the +** pattern and the second argument is the string. So, the SQL statements: +** +** A LIKE B +** +** is implemented as like(B, A). If there is an escape character E, +** +** A LIKE B ESCAPE E +** +** is mapped to like(B, A, E). +*/ +static void icuLikeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zA = sqlite3_value_text(argv[0]); + const unsigned char *zB = sqlite3_value_text(argv[1]); + UChar32 uEsc = 0; + + /* Limit the length of the LIKE or GLOB pattern to avoid problems + ** of deep recursion and N*N behavior in patternCompare(). + */ + if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ + sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); + return; + } + + + if( argc==3 ){ + /* The escape character string must consist of a single UTF-8 character. + ** Otherwise, return an error. + */ + int nE= sqlite3_value_bytes(argv[2]); + const unsigned char *zE = sqlite3_value_text(argv[2]); + int i = 0; + if( zE==0 ) return; + U8_NEXT(zE, i, nE, uEsc); + if( i!=nE){ + sqlite3_result_error(context, + "ESCAPE expression must be a single character", -1); + return; + } + } + + if( zA && zB ){ + sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); + } +} + +/* +** Function to delete compiled regexp objects. Registered as +** a destructor function with sqlite3_set_auxdata(). +*/ +static void icuRegexpDelete(void *p){ + URegularExpression *pExpr = (URegularExpression *)p; + uregex_close(pExpr); +} + +/* +** Implementation of SQLite REGEXP operator. This scalar function takes +** two arguments. The first is a regular expression pattern to compile +** the second is a string to match against that pattern. If either +** argument is an SQL NULL, then NULL Is returned. Otherwise, the result +** is 1 if the string matches the pattern, or 0 otherwise. +** +** SQLite maps the regexp() function to the regexp() operator such +** that the following two are equivalent: +** +** zString REGEXP zPattern +** regexp(zPattern, zString) +** +** Uses the following ICU regexp APIs: +** +** uregex_open() +** uregex_matches() +** uregex_close() +*/ +static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ + UErrorCode status = U_ZERO_ERROR; + URegularExpression *pExpr; + UBool res; + const UChar *zString = sqlite3_value_text16(apArg[1]); + + (void)nArg; /* Unused parameter */ + + /* If the left hand side of the regexp operator is NULL, + ** then the result is also NULL. + */ + if( !zString ){ + return; + } + + pExpr = sqlite3_get_auxdata(p, 0); + if( !pExpr ){ + const UChar *zPattern = sqlite3_value_text16(apArg[0]); + if( !zPattern ){ + return; + } + pExpr = uregex_open(zPattern, -1, 0, 0, &status); + + if( U_SUCCESS(status) ){ + sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); + }else{ + assert(!pExpr); + icuFunctionError(p, "uregex_open", status); + return; + } + } + + /* Configure the text that the regular expression operates on. */ + uregex_setText(pExpr, zString, -1, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "uregex_setText", status); + return; + } + + /* Attempt the match */ + res = uregex_matches(pExpr, 0, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "uregex_matches", status); + return; + } + + /* Set the text that the regular expression operates on to a NULL + ** pointer. This is not really necessary, but it is tidier than + ** leaving the regular expression object configured with an invalid + ** pointer after this function returns. + */ + uregex_setText(pExpr, 0, 0, &status); + + /* Return 1 or 0. */ + sqlite3_result_int(p, res ? 1 : 0); +} + +/* +** Implementations of scalar functions for case mapping - upper() and +** lower(). Function upper() converts its input to upper-case (ABC). +** Function lower() converts to lower-case (abc). +** +** ICU provides two types of case mapping, "general" case mapping and +** "language specific". Refer to ICU documentation for the differences +** between the two. +** +** To utilise "general" case mapping, the upper() or lower() scalar +** functions are invoked with one argument: +** +** upper('ABC') -> 'abc' +** lower('abc') -> 'ABC' +** +** To access ICU "language specific" case mapping, upper() or lower() +** should be invoked with two arguments. The second argument is the name +** of the locale to use. Passing an empty string ("") or SQL NULL value +** as the second argument is the same as invoking the 1 argument version +** of upper() or lower(). +** +** lower('I', 'en_us') -> 'i' +** lower('I', 'tr_tr') -> '\u131' (small dotless i) +** +** http://www.icu-project.org/userguide/posix.html#case_mappings +*/ +static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ + const UChar *zInput; /* Pointer to input string */ + UChar *zOutput = 0; /* Pointer to output buffer */ + int nInput; /* Size of utf-16 input string in bytes */ + int nOut; /* Size of output buffer in bytes */ + int cnt; + int bToUpper; /* True for toupper(), false for tolower() */ + UErrorCode status; + const char *zLocale = 0; + + assert(nArg==1 || nArg==2); + bToUpper = (sqlite3_user_data(p)!=0); + if( nArg==2 ){ + zLocale = (const char *)sqlite3_value_text(apArg[1]); + } + + zInput = sqlite3_value_text16(apArg[0]); + if( !zInput ){ + return; + } + nOut = nInput = sqlite3_value_bytes16(apArg[0]); + if( nOut==0 ){ + sqlite3_result_text16(p, "", 0, SQLITE_STATIC); + return; + } + + for(cnt=0; cnt<2; cnt++){ + UChar *zNew = sqlite3_realloc(zOutput, nOut); + if( zNew==0 ){ + sqlite3_free(zOutput); + sqlite3_result_error_nomem(p); + return; + } + zOutput = zNew; + status = U_ZERO_ERROR; + if( bToUpper ){ + nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); + }else{ + nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); + } + + if( U_SUCCESS(status) ){ + sqlite3_result_text16(p, zOutput, nOut, xFree); + }else if( status==U_BUFFER_OVERFLOW_ERROR ){ + assert( cnt==0 ); + continue; + }else{ + icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status); + } + return; + } + assert( 0 ); /* Unreachable */ +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */ + +/* +** Collation sequence destructor function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). +*/ +static void icuCollationDel(void *pCtx){ + UCollator *p = (UCollator *)pCtx; + ucol_close(p); +} + +/* +** Collation sequence comparison function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). +*/ +static int icuCollationColl( + void *pCtx, + int nLeft, + const void *zLeft, + int nRight, + const void *zRight +){ + UCollationResult res; + UCollator *p = (UCollator *)pCtx; + res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); + switch( res ){ + case UCOL_LESS: return -1; + case UCOL_GREATER: return +1; + case UCOL_EQUAL: return 0; + } + assert(!"Unexpected return value from ucol_strcoll()"); + return 0; +} + +/* +** Implementation of the scalar function icu_load_collation(). +** +** This scalar function is used to add ICU collation based collation +** types to an SQLite database connection. It is intended to be called +** as follows: +** +** SELECT icu_load_collation(, ); +** +** Where is a string containing an ICU locale identifier (i.e. +** "en_AU", "tr_TR" etc.) and is the name of the +** collation sequence to create. +*/ +static void icuLoadCollation( + sqlite3_context *p, + int nArg, + sqlite3_value **apArg +){ + sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); + UErrorCode status = U_ZERO_ERROR; + const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ + const char *zName; /* SQL Collation sequence name (eg. "japanese") */ + UCollator *pUCollator; /* ICU library collation object */ + int rc; /* Return code from sqlite3_create_collation_x() */ + + assert(nArg==2); + (void)nArg; /* Unused parameter */ + zLocale = (const char *)sqlite3_value_text(apArg[0]); + zName = (const char *)sqlite3_value_text(apArg[1]); + + if( !zLocale || !zName ){ + return; + } + + pUCollator = ucol_open(zLocale, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "ucol_open", status); + return; + } + assert(p); + + rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, + icuCollationColl, icuCollationDel + ); + if( rc!=SQLITE_OK ){ + ucol_close(pUCollator); + sqlite3_result_error(p, "Error registering collation function", -1); + } +} + +/* +** Register the ICU extension functions with database db. +*/ +SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ +# define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS) + static const struct IcuScalar { + const char *zName; /* Function name */ + unsigned char nArg; /* Number of arguments */ + unsigned int enc; /* Optimal text encoding */ + unsigned char iContext; /* sqlite3_user_data() context */ + void (*xFunc)(sqlite3_context*,int,sqlite3_value**); + } scalars[] = { + {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation}, +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) + {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS, 0, icuRegexpFunc}, + {"lower", 1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16}, + {"lower", 2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16}, + {"upper", 1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16}, + {"upper", 2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16}, + {"lower", 1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16}, + {"lower", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16}, + {"upper", 1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16}, + {"upper", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16}, + {"like", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuLikeFunc}, + {"like", 3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuLikeFunc}, +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */ + }; + int rc = SQLITE_OK; + int i; + + for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ + const struct IcuScalar *p = &scalars[i]; + rc = sqlite3_create_function( + db, p->zName, p->nArg, p->enc, + p->iContext ? (void*)db : (void*)0, + p->xFunc, 0, 0 + ); + } + + return rc; +} + +#if !SQLITE_CORE +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int sqlite3_icu_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3IcuInit(db); +} +#endif + +#endif + +/************** End of icu.c *************************************************/ +/************** Begin file fts3_icu.c ****************************************/ +/* +** 2007 June 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements a tokenizer for fts3 based on the ICU library. +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +#ifdef SQLITE_ENABLE_ICU + +/* #include */ +/* #include */ +/* #include "fts3_tokenizer.h" */ + +#include +/* #include */ +/* #include */ +#include + +typedef struct IcuTokenizer IcuTokenizer; +typedef struct IcuCursor IcuCursor; + +struct IcuTokenizer { + sqlite3_tokenizer base; + char *zLocale; +}; + +struct IcuCursor { + sqlite3_tokenizer_cursor base; + + UBreakIterator *pIter; /* ICU break-iterator object */ + int nChar; /* Number of UChar elements in pInput */ + UChar *aChar; /* Copy of input using utf-16 encoding */ + int *aOffset; /* Offsets of each character in utf-8 input */ + + int nBuffer; + char *zBuffer; + + int iToken; +}; + +/* +** Create a new tokenizer instance. +*/ +static int icuCreate( + int argc, /* Number of entries in argv[] */ + const char * const *argv, /* Tokenizer creation arguments */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ +){ + IcuTokenizer *p; + int n = 0; + + if( argc>0 ){ + n = strlen(argv[0])+1; + } + p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n); + if( !p ){ + return SQLITE_NOMEM; + } + memset(p, 0, sizeof(IcuTokenizer)); + + if( n ){ + p->zLocale = (char *)&p[1]; + memcpy(p->zLocale, argv[0], n); + } + + *ppTokenizer = (sqlite3_tokenizer *)p; + + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int icuDestroy(sqlite3_tokenizer *pTokenizer){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int icuOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, /* Input string */ + int nInput, /* Length of zInput in bytes */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + IcuCursor *pCsr; + + const int32_t opt = U_FOLD_CASE_DEFAULT; + UErrorCode status = U_ZERO_ERROR; + int nChar; + + UChar32 c; + int iInput = 0; + int iOut = 0; + + *ppCursor = 0; + + if( zInput==0 ){ + nInput = 0; + zInput = ""; + }else if( nInput<0 ){ + nInput = strlen(zInput); + } + nChar = nInput+1; + pCsr = (IcuCursor *)sqlite3_malloc64( + sizeof(IcuCursor) + /* IcuCursor */ + ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ + (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ + ); + if( !pCsr ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(IcuCursor)); + pCsr->aChar = (UChar *)&pCsr[1]; + pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; + + pCsr->aOffset[iOut] = iInput; + U8_NEXT(zInput, iInput, nInput, c); + while( c>0 ){ + int isError = 0; + c = u_foldCase(c, opt); + U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); + if( isError ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; + } + pCsr->aOffset[iOut] = iInput; + + if( iInputpIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); + if( !U_SUCCESS(status) ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; + } + pCsr->nChar = iOut; + + ubrk_first(pCsr->pIter); + *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to icuOpen(). +*/ +static int icuClose(sqlite3_tokenizer_cursor *pCursor){ + IcuCursor *pCsr = (IcuCursor *)pCursor; + ubrk_close(pCsr->pIter); + sqlite3_free(pCsr->zBuffer); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** Extract the next token from a tokenization cursor. +*/ +static int icuNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + IcuCursor *pCsr = (IcuCursor *)pCursor; + + int iStart = 0; + int iEnd = 0; + int nByte = 0; + + while( iStart==iEnd ){ + UChar32 c; + + iStart = ubrk_current(pCsr->pIter); + iEnd = ubrk_next(pCsr->pIter); + if( iEnd==UBRK_DONE ){ + return SQLITE_DONE; + } + + while( iStartaChar, iWhite, pCsr->nChar, c); + if( u_isspace(c) ){ + iStart = iWhite; + }else{ + break; + } + } + assert(iStart<=iEnd); + } + + do { + UErrorCode status = U_ZERO_ERROR; + if( nByte ){ + char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); + if( !zNew ){ + return SQLITE_NOMEM; + } + pCsr->zBuffer = zNew; + pCsr->nBuffer = nByte; + } + + u_strToUTF8( + pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ + &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ + &status /* Output success/failure */ + ); + } while( nByte>pCsr->nBuffer ); + + *ppToken = pCsr->zBuffer; + *pnBytes = nByte; + *piStartOffset = pCsr->aOffset[iStart]; + *piEndOffset = pCsr->aOffset[iEnd]; + *piPosition = pCsr->iToken++; + + return SQLITE_OK; +} + +/* +** The set of routines that implement the simple tokenizer +*/ +static const sqlite3_tokenizer_module icuTokenizerModule = { + 0, /* iVersion */ + icuCreate, /* xCreate */ + icuDestroy, /* xCreate */ + icuOpen, /* xOpen */ + icuClose, /* xClose */ + icuNext, /* xNext */ + 0, /* xLanguageid */ +}; + +/* +** Set *ppModule to point at the implementation of the ICU tokenizer. +*/ +SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &icuTokenizerModule; +} + +#endif /* defined(SQLITE_ENABLE_ICU) */ +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_icu.c ********************************************/ +/************** Begin file sqlite3rbu.c **************************************/ +/* +** 2014 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** +** OVERVIEW +** +** The RBU extension requires that the RBU update be packaged as an +** SQLite database. The tables it expects to find are described in +** sqlite3rbu.h. Essentially, for each table xyz in the target database +** that the user wishes to write to, a corresponding data_xyz table is +** created in the RBU database and populated with one row for each row to +** update, insert or delete from the target table. +** +** The update proceeds in three stages: +** +** 1) The database is updated. The modified database pages are written +** to a *-oal file. A *-oal file is just like a *-wal file, except +** that it is named "-oal" instead of "-wal". +** Because regular SQLite clients do not look for file named +** "-oal", they go on using the original database in +** rollback mode while the *-oal file is being generated. +** +** During this stage RBU does not update the database by writing +** directly to the target tables. Instead it creates "imposter" +** tables using the SQLITE_TESTCTRL_IMPOSTER interface that it uses +** to update each b-tree individually. All updates required by each +** b-tree are completed before moving on to the next, and all +** updates are done in sorted key order. +** +** 2) The "-oal" file is moved to the equivalent "-wal" +** location using a call to rename(2). Before doing this the RBU +** module takes an EXCLUSIVE lock on the database file, ensuring +** that there are no other active readers. +** +** Once the EXCLUSIVE lock is released, any other database readers +** detect the new *-wal file and read the database in wal mode. At +** this point they see the new version of the database - including +** the updates made as part of the RBU update. +** +** 3) The new *-wal file is checkpointed. This proceeds in the same way +** as a regular database checkpoint, except that a single frame is +** checkpointed each time sqlite3rbu_step() is called. If the RBU +** handle is closed before the entire *-wal file is checkpointed, +** the checkpoint progress is saved in the RBU database and the +** checkpoint can be resumed by another RBU client at some point in +** the future. +** +** POTENTIAL PROBLEMS +** +** The rename() call might not be portable. And RBU is not currently +** syncing the directory after renaming the file. +** +** When state is saved, any commit to the *-oal file and the commit to +** the RBU update database are not atomic. So if the power fails at the +** wrong moment they might get out of sync. As the main database will be +** committed before the RBU update database this will likely either just +** pass unnoticed, or result in SQLITE_CONSTRAINT errors (due to UNIQUE +** constraint violations). +** +** If some client does modify the target database mid RBU update, or some +** other error occurs, the RBU extension will keep throwing errors. It's +** not really clear how to get out of this state. The system could just +** by delete the RBU update database and *-oal file and have the device +** download the update again and start over. +** +** At present, for an UPDATE, both the new.* and old.* records are +** collected in the rbu_xyz table. And for both UPDATEs and DELETEs all +** fields are collected. This means we're probably writing a lot more +** data to disk when saving the state of an ongoing update to the RBU +** update database than is strictly necessary. +** +*/ + +/* #include */ +/* #include */ +/* #include */ + +/* #include "sqlite3.h" */ + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RBU) +/************** Include sqlite3rbu.h in the middle of sqlite3rbu.c ***********/ +/************** Begin file sqlite3rbu.h **************************************/ +/* +** 2014 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the public interface for the RBU extension. +*/ + +/* +** SUMMARY +** +** Writing a transaction containing a large number of operations on +** b-tree indexes that are collectively larger than the available cache +** memory can be very inefficient. +** +** The problem is that in order to update a b-tree, the leaf page (at least) +** containing the entry being inserted or deleted must be modified. If the +** working set of leaves is larger than the available cache memory, then a +** single leaf that is modified more than once as part of the transaction +** may be loaded from or written to the persistent media multiple times. +** Additionally, because the index updates are likely to be applied in +** random order, access to pages within the database is also likely to be in +** random order, which is itself quite inefficient. +** +** One way to improve the situation is to sort the operations on each index +** by index key before applying them to the b-tree. This leads to an IO +** pattern that resembles a single linear scan through the index b-tree, +** and all but guarantees each modified leaf page is loaded and stored +** exactly once. SQLite uses this trick to improve the performance of +** CREATE INDEX commands. This extension allows it to be used to improve +** the performance of large transactions on existing databases. +** +** Additionally, this extension allows the work involved in writing the +** large transaction to be broken down into sub-transactions performed +** sequentially by separate processes. This is useful if the system cannot +** guarantee that a single update process will run for long enough to apply +** the entire update, for example because the update is being applied on a +** mobile device that is frequently rebooted. Even after the writer process +** has committed one or more sub-transactions, other database clients continue +** to read from the original database snapshot. In other words, partially +** applied transactions are not visible to other clients. +** +** "RBU" stands for "Resumable Bulk Update". As in a large database update +** transmitted via a wireless network to a mobile device. A transaction +** applied using this extension is hence refered to as an "RBU update". +** +** +** LIMITATIONS +** +** An "RBU update" transaction is subject to the following limitations: +** +** * The transaction must consist of INSERT, UPDATE and DELETE operations +** only. +** +** * INSERT statements may not use any default values. +** +** * UPDATE and DELETE statements must identify their target rows by +** non-NULL PRIMARY KEY values. Rows with NULL values stored in PRIMARY +** KEY fields may not be updated or deleted. If the table being written +** has no PRIMARY KEY, affected rows must be identified by rowid. +** +** * UPDATE statements may not modify PRIMARY KEY columns. +** +** * No triggers will be fired. +** +** * No foreign key violations are detected or reported. +** +** * CHECK constraints are not enforced. +** +** * No constraint handling mode except for "OR ROLLBACK" is supported. +** +** +** PREPARATION +** +** An "RBU update" is stored as a separate SQLite database. A database +** containing an RBU update is an "RBU database". For each table in the +** target database to be updated, the RBU database should contain a table +** named "data_" containing the same set of columns as the +** target table, and one more - "rbu_control". The data_% table should +** have no PRIMARY KEY or UNIQUE constraints, but each column should have +** the same type as the corresponding column in the target database. +** The "rbu_control" column should have no type at all. For example, if +** the target database contains: +** +** CREATE TABLE t1(a INTEGER PRIMARY KEY, b TEXT, c UNIQUE); +** +** Then the RBU database should contain: +** +** CREATE TABLE data_t1(a INTEGER, b TEXT, c, rbu_control); +** +** The order of the columns in the data_% table does not matter. +** +** Instead of a regular table, the RBU database may also contain virtual +** tables or view named using the data_ naming scheme. +** +** Instead of the plain data_ naming scheme, RBU database tables +** may also be named data_, where is any sequence +** of zero or more numeric characters (0-9). This can be significant because +** tables within the RBU database are always processed in order sorted by +** name. By judicious selection of the portion of the names +** of the RBU tables the user can therefore control the order in which they +** are processed. This can be useful, for example, to ensure that "external +** content" FTS4 tables are updated before their underlying content tables. +** +** If the target database table is a virtual table or a table that has no +** PRIMARY KEY declaration, the data_% table must also contain a column +** named "rbu_rowid". This column is mapped to the tables implicit primary +** key column - "rowid". Virtual tables for which the "rowid" column does +** not function like a primary key value cannot be updated using RBU. For +** example, if the target db contains either of the following: +** +** CREATE VIRTUAL TABLE x1 USING fts3(a, b); +** CREATE TABLE x1(a, b) +** +** then the RBU database should contain: +** +** CREATE TABLE data_x1(a, b, rbu_rowid, rbu_control); +** +** All non-hidden columns (i.e. all columns matched by "SELECT *") of the +** target table must be present in the input table. For virtual tables, +** hidden columns are optional - they are updated by RBU if present in +** the input table, or not otherwise. For example, to write to an fts4 +** table with a hidden languageid column such as: +** +** CREATE VIRTUAL TABLE ft1 USING fts4(a, b, languageid='langid'); +** +** Either of the following input table schemas may be used: +** +** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control); +** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control); +** +** For each row to INSERT into the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain integer value 0. The +** other columns should be set to the values that make up the new record +** to insert. +** +** If the target database table has an INTEGER PRIMARY KEY, it is not +** possible to insert a NULL value into the IPK column. Attempting to +** do so results in an SQLITE_MISMATCH error. +** +** For each row to DELETE from the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain integer value 1. The +** real primary key values of the row to delete should be stored in the +** corresponding columns of the data_% table. The values stored in the +** other columns are not used. +** +** For each row to UPDATE from the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain a value of type text. +** The real primary key values identifying the row to update should be +** stored in the corresponding columns of the data_% table row, as should +** the new values of all columns being update. The text value in the +** "rbu_control" column must contain the same number of characters as +** there are columns in the target database table, and must consist entirely +** of 'x' and '.' characters (or in some special cases 'd' - see below). For +** each column that is being updated, the corresponding character is set to +** 'x'. For those that remain as they are, the corresponding character of the +** rbu_control value should be set to '.'. For example, given the tables +** above, the update statement: +** +** UPDATE t1 SET c = 'usa' WHERE a = 4; +** +** is represented by the data_t1 row created by: +** +** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..x'); +** +** Instead of an 'x' character, characters of the rbu_control value specified +** for UPDATEs may also be set to 'd'. In this case, instead of updating the +** target table with the value stored in the corresponding data_% column, the +** user-defined SQL function "rbu_delta()" is invoked and the result stored in +** the target table column. rbu_delta() is invoked with two arguments - the +** original value currently stored in the target table column and the +** value specified in the data_xxx table. +** +** For example, this row: +** +** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..d'); +** +** is similar to an UPDATE statement such as: +** +** UPDATE t1 SET c = rbu_delta(c, 'usa') WHERE a = 4; +** +** Finally, if an 'f' character appears in place of a 'd' or 's' in an +** ota_control string, the contents of the data_xxx table column is assumed +** to be a "fossil delta" - a patch to be applied to a blob value in the +** format used by the fossil source-code management system. In this case +** the existing value within the target database table must be of type BLOB. +** It is replaced by the result of applying the specified fossil delta to +** itself. +** +** If the target database table is a virtual table or a table with no PRIMARY +** KEY, the rbu_control value should not include a character corresponding +** to the rbu_rowid value. For example, this: +** +** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control) +** VALUES(NULL, 'usa', 12, '.x'); +** +** causes a result similar to: +** +** UPDATE ft1 SET b = 'usa' WHERE rowid = 12; +** +** The data_xxx tables themselves should have no PRIMARY KEY declarations. +** However, RBU is more efficient if reading the rows in from each data_xxx +** table in "rowid" order is roughly the same as reading them sorted by +** the PRIMARY KEY of the corresponding target database table. In other +** words, rows should be sorted using the destination table PRIMARY KEY +** fields before they are inserted into the data_xxx tables. +** +** USAGE +** +** The API declared below allows an application to apply an RBU update +** stored on disk to an existing target database. Essentially, the +** application: +** +** 1) Opens an RBU handle using the sqlite3rbu_open() function. +** +** 2) Registers any required virtual table modules with the database +** handle returned by sqlite3rbu_db(). Also, if required, register +** the rbu_delta() implementation. +** +** 3) Calls the sqlite3rbu_step() function one or more times on +** the new handle. Each call to sqlite3rbu_step() performs a single +** b-tree operation, so thousands of calls may be required to apply +** a complete update. +** +** 4) Calls sqlite3rbu_close() to close the RBU update handle. If +** sqlite3rbu_step() has been called enough times to completely +** apply the update to the target database, then the RBU database +** is marked as fully applied. Otherwise, the state of the RBU +** update application is saved in the RBU database for later +** resumption. +** +** See comments below for more detail on APIs. +** +** If an update is only partially applied to the target database by the +** time sqlite3rbu_close() is called, various state information is saved +** within the RBU database. This allows subsequent processes to automatically +** resume the RBU update from where it left off. +** +** To remove all RBU extension state information, returning an RBU database +** to its original contents, it is sufficient to drop all tables that begin +** with the prefix "rbu_" +** +** DATABASE LOCKING +** +** An RBU update may not be applied to a database in WAL mode. Attempting +** to do so is an error (SQLITE_ERROR). +** +** While an RBU handle is open, a SHARED lock may be held on the target +** database file. This means it is possible for other clients to read the +** database, but not to write it. +** +** If an RBU update is started and then suspended before it is completed, +** then an external client writes to the database, then attempting to resume +** the suspended RBU update is also an error (SQLITE_BUSY). +*/ + +#ifndef _SQLITE3RBU_H +#define _SQLITE3RBU_H + +/* #include "sqlite3.h" ** Required for error code definitions ** */ + +#if 0 +extern "C" { +#endif + +typedef struct sqlite3rbu sqlite3rbu; + +/* +** Open an RBU handle. +** +** Argument zTarget is the path to the target database. Argument zRbu is +** the path to the RBU database. Each call to this function must be matched +** by a call to sqlite3rbu_close(). When opening the databases, RBU passes +** the SQLITE_CONFIG_URI flag to sqlite3_open_v2(). So if either zTarget +** or zRbu begin with "file:", it will be interpreted as an SQLite +** database URI, not a regular file name. +** +** If the zState argument is passed a NULL value, the RBU extension stores +** the current state of the update (how many rows have been updated, which +** indexes are yet to be updated etc.) within the RBU database itself. This +** can be convenient, as it means that the RBU application does not need to +** organize removing a separate state file after the update is concluded. +** Or, if zState is non-NULL, it must be a path to a database file in which +** the RBU extension can store the state of the update. +** +** When resuming an RBU update, the zState argument must be passed the same +** value as when the RBU update was started. +** +** Once the RBU update is finished, the RBU extension does not +** automatically remove any zState database file, even if it created it. +** +** By default, RBU uses the default VFS to access the files on disk. To +** use a VFS other than the default, an SQLite "file:" URI containing a +** "vfs=..." option may be passed as the zTarget option. +** +** IMPORTANT NOTE FOR ZIPVFS USERS: The RBU extension works with all of +** SQLite's built-in VFSs, including the multiplexor VFS. However it does +** not work out of the box with zipvfs. Refer to the comment describing +** the zipvfs_create_vfs() API below for details on using RBU with zipvfs. +*/ +SQLITE_API sqlite3rbu *sqlite3rbu_open( + const char *zTarget, + const char *zRbu, + const char *zState +); + +/* +** Open an RBU handle to perform an RBU vacuum on database file zTarget. +** An RBU vacuum is similar to SQLite's built-in VACUUM command, except +** that it can be suspended and resumed like an RBU update. +** +** The second argument to this function identifies a database in which +** to store the state of the RBU vacuum operation if it is suspended. The +** first time sqlite3rbu_vacuum() is called, to start an RBU vacuum +** operation, the state database should either not exist or be empty +** (contain no tables). If an RBU vacuum is suspended by calling +** sqlite3rbu_close() on the RBU handle before sqlite3rbu_step() has +** returned SQLITE_DONE, the vacuum state is stored in the state database. +** The vacuum can be resumed by calling this function to open a new RBU +** handle specifying the same target and state databases. +** +** If the second argument passed to this function is NULL, then the +** name of the state database is "-vacuum", where +** is the name of the target database file. In this case, on UNIX, if the +** state database is not already present in the file-system, it is created +** with the same permissions as the target db is made. +** +** With an RBU vacuum, it is an SQLITE_MISUSE error if the name of the +** state database ends with "-vactmp". This name is reserved for internal +** use. +** +** This function does not delete the state database after an RBU vacuum +** is completed, even if it created it. However, if the call to +** sqlite3rbu_close() returns any value other than SQLITE_OK, the contents +** of the state tables within the state database are zeroed. This way, +** the next call to sqlite3rbu_vacuum() opens a handle that starts a +** new RBU vacuum operation. +** +** As with sqlite3rbu_open(), Zipvfs users should rever to the comment +** describing the sqlite3rbu_create_vfs() API function below for +** a description of the complications associated with using RBU with +** zipvfs databases. +*/ +SQLITE_API sqlite3rbu *sqlite3rbu_vacuum( + const char *zTarget, + const char *zState +); + +/* +** Configure a limit for the amount of temp space that may be used by +** the RBU handle passed as the first argument. The new limit is specified +** in bytes by the second parameter. If it is positive, the limit is updated. +** If the second parameter to this function is passed zero, then the limit +** is removed entirely. If the second parameter is negative, the limit is +** not modified (this is useful for querying the current limit). +** +** In all cases the returned value is the current limit in bytes (zero +** indicates unlimited). +** +** If the temp space limit is exceeded during operation, an SQLITE_FULL +** error is returned. +*/ +SQLITE_API sqlite3_int64 sqlite3rbu_temp_size_limit(sqlite3rbu*, sqlite3_int64); + +/* +** Return the current amount of temp file space, in bytes, currently used by +** the RBU handle passed as the only argument. +*/ +SQLITE_API sqlite3_int64 sqlite3rbu_temp_size(sqlite3rbu*); + +/* +** Internally, each RBU connection uses a separate SQLite database +** connection to access the target and rbu update databases. This +** API allows the application direct access to these database handles. +** +** The first argument passed to this function must be a valid, open, RBU +** handle. The second argument should be passed zero to access the target +** database handle, or non-zero to access the rbu update database handle. +** Accessing the underlying database handles may be useful in the +** following scenarios: +** +** * If any target tables are virtual tables, it may be necessary to +** call sqlite3_create_module() on the target database handle to +** register the required virtual table implementations. +** +** * If the data_xxx tables in the RBU source database are virtual +** tables, the application may need to call sqlite3_create_module() on +** the rbu update db handle to any required virtual table +** implementations. +** +** * If the application uses the "rbu_delta()" feature described above, +** it must use sqlite3_create_function() or similar to register the +** rbu_delta() implementation with the target database handle. +** +** If an error has occurred, either while opening or stepping the RBU object, +** this function may return NULL. The error code and message may be collected +** when sqlite3rbu_close() is called. +** +** Database handles returned by this function remain valid until the next +** call to any sqlite3rbu_xxx() function other than sqlite3rbu_db(). +*/ +SQLITE_API sqlite3 *sqlite3rbu_db(sqlite3rbu*, int bRbu); + +/* +** Do some work towards applying the RBU update to the target db. +** +** Return SQLITE_DONE if the update has been completely applied, or +** SQLITE_OK if no error occurs but there remains work to do to apply +** the RBU update. If an error does occur, some other error code is +** returned. +** +** Once a call to sqlite3rbu_step() has returned a value other than +** SQLITE_OK, all subsequent calls on the same RBU handle are no-ops +** that immediately return the same value. +*/ +SQLITE_API int sqlite3rbu_step(sqlite3rbu *pRbu); + +/* +** Force RBU to save its state to disk. +** +** If a power failure or application crash occurs during an update, following +** system recovery RBU may resume the update from the point at which the state +** was last saved. In other words, from the most recent successful call to +** sqlite3rbu_close() or this function. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +SQLITE_API int sqlite3rbu_savestate(sqlite3rbu *pRbu); + +/* +** Close an RBU handle. +** +** If the RBU update has been completely applied, mark the RBU database +** as fully applied. Otherwise, assuming no error has occurred, save the +** current state of the RBU update appliation to the RBU database. +** +** If an error has already occurred as part of an sqlite3rbu_step() +** or sqlite3rbu_open() call, or if one occurs within this function, an +** SQLite error code is returned. Additionally, if pzErrmsg is not NULL, +** *pzErrmsg may be set to point to a buffer containing a utf-8 formatted +** English language error message. It is the responsibility of the caller to +** eventually free any such buffer using sqlite3_free(). +** +** Otherwise, if no error occurs, this function returns SQLITE_OK if the +** update has been partially applied, or SQLITE_DONE if it has been +** completely applied. +*/ +SQLITE_API int sqlite3rbu_close(sqlite3rbu *pRbu, char **pzErrmsg); + +/* +** Return the total number of key-value operations (inserts, deletes or +** updates) that have been performed on the target database since the +** current RBU update was started. +*/ +SQLITE_API sqlite3_int64 sqlite3rbu_progress(sqlite3rbu *pRbu); + +/* +** Obtain permyriadage (permyriadage is to 10000 as percentage is to 100) +** progress indications for the two stages of an RBU update. This API may +** be useful for driving GUI progress indicators and similar. +** +** An RBU update is divided into two stages: +** +** * Stage 1, in which changes are accumulated in an oal/wal file, and +** * Stage 2, in which the contents of the wal file are copied into the +** main database. +** +** The update is visible to non-RBU clients during stage 2. During stage 1 +** non-RBU reader clients may see the original database. +** +** If this API is called during stage 2 of the update, output variable +** (*pnOne) is set to 10000 to indicate that stage 1 has finished and (*pnTwo) +** to a value between 0 and 10000 to indicate the permyriadage progress of +** stage 2. A value of 5000 indicates that stage 2 is half finished, +** 9000 indicates that it is 90% finished, and so on. +** +** If this API is called during stage 1 of the update, output variable +** (*pnTwo) is set to 0 to indicate that stage 2 has not yet started. The +** value to which (*pnOne) is set depends on whether or not the RBU +** database contains an "rbu_count" table. The rbu_count table, if it +** exists, must contain the same columns as the following: +** +** CREATE TABLE rbu_count(tbl TEXT PRIMARY KEY, cnt INTEGER) WITHOUT ROWID; +** +** There must be one row in the table for each source (data_xxx) table within +** the RBU database. The 'tbl' column should contain the name of the source +** table. The 'cnt' column should contain the number of rows within the +** source table. +** +** If the rbu_count table is present and populated correctly and this +** API is called during stage 1, the *pnOne output variable is set to the +** permyriadage progress of the same stage. If the rbu_count table does +** not exist, then (*pnOne) is set to -1 during stage 1. If the rbu_count +** table exists but is not correctly populated, the value of the *pnOne +** output variable during stage 1 is undefined. +*/ +SQLITE_API void sqlite3rbu_bp_progress(sqlite3rbu *pRbu, int *pnOne, int*pnTwo); + +/* +** Obtain an indication as to the current stage of an RBU update or vacuum. +** This function always returns one of the SQLITE_RBU_STATE_XXX constants +** defined in this file. Return values should be interpreted as follows: +** +** SQLITE_RBU_STATE_OAL: +** RBU is currently building a *-oal file. The next call to sqlite3rbu_step() +** may either add further data to the *-oal file, or compute data that will +** be added by a subsequent call. +** +** SQLITE_RBU_STATE_MOVE: +** RBU has finished building the *-oal file. The next call to sqlite3rbu_step() +** will move the *-oal file to the equivalent *-wal path. If the current +** operation is an RBU update, then the updated version of the database +** file will become visible to ordinary SQLite clients following the next +** call to sqlite3rbu_step(). +** +** SQLITE_RBU_STATE_CHECKPOINT: +** RBU is currently performing an incremental checkpoint. The next call to +** sqlite3rbu_step() will copy a page of data from the *-wal file into +** the target database file. +** +** SQLITE_RBU_STATE_DONE: +** The RBU operation has finished. Any subsequent calls to sqlite3rbu_step() +** will immediately return SQLITE_DONE. +** +** SQLITE_RBU_STATE_ERROR: +** An error has occurred. Any subsequent calls to sqlite3rbu_step() will +** immediately return the SQLite error code associated with the error. +*/ +#define SQLITE_RBU_STATE_OAL 1 +#define SQLITE_RBU_STATE_MOVE 2 +#define SQLITE_RBU_STATE_CHECKPOINT 3 +#define SQLITE_RBU_STATE_DONE 4 +#define SQLITE_RBU_STATE_ERROR 5 + +SQLITE_API int sqlite3rbu_state(sqlite3rbu *pRbu); + +/* +** Create an RBU VFS named zName that accesses the underlying file-system +** via existing VFS zParent. Or, if the zParent parameter is passed NULL, +** then the new RBU VFS uses the default system VFS to access the file-system. +** The new object is registered as a non-default VFS with SQLite before +** returning. +** +** Part of the RBU implementation uses a custom VFS object. Usually, this +** object is created and deleted automatically by RBU. +** +** The exception is for applications that also use zipvfs. In this case, +** the custom VFS must be explicitly created by the user before the RBU +** handle is opened. The RBU VFS should be installed so that the zipvfs +** VFS uses the RBU VFS, which in turn uses any other VFS layers in use +** (for example multiplexor) to access the file-system. For example, +** to assemble an RBU enabled VFS stack that uses both zipvfs and +** multiplexor (error checking omitted): +** +** // Create a VFS named "multiplex" (not the default). +** sqlite3_multiplex_initialize(0, 0); +** +** // Create an rbu VFS named "rbu" that uses multiplexor. If the +** // second argument were replaced with NULL, the "rbu" VFS would +** // access the file-system via the system default VFS, bypassing the +** // multiplexor. +** sqlite3rbu_create_vfs("rbu", "multiplex"); +** +** // Create a zipvfs VFS named "zipvfs" that uses rbu. +** zipvfs_create_vfs_v3("zipvfs", "rbu", 0, xCompressorAlgorithmDetector); +** +** // Make zipvfs the default VFS. +** sqlite3_vfs_register(sqlite3_vfs_find("zipvfs"), 1); +** +** Because the default VFS created above includes a RBU functionality, it +** may be used by RBU clients. Attempting to use RBU with a zipvfs VFS stack +** that does not include the RBU layer results in an error. +** +** The overhead of adding the "rbu" VFS to the system is negligible for +** non-RBU users. There is no harm in an application accessing the +** file-system via "rbu" all the time, even if it only uses RBU functionality +** occasionally. +*/ +SQLITE_API int sqlite3rbu_create_vfs(const char *zName, const char *zParent); + +/* +** Deregister and destroy an RBU vfs created by an earlier call to +** sqlite3rbu_create_vfs(). +** +** VFS objects are not reference counted. If a VFS object is destroyed +** before all database handles that use it have been closed, the results +** are undefined. +*/ +SQLITE_API void sqlite3rbu_destroy_vfs(const char *zName); + +#if 0 +} /* end of the 'extern "C"' block */ +#endif + +#endif /* _SQLITE3RBU_H */ + +/************** End of sqlite3rbu.h ******************************************/ +/************** Continuing where we left off in sqlite3rbu.c *****************/ + +#if defined(_WIN32_WCE) +/* #include "windows.h" */ +#endif + +/* Maximum number of prepared UPDATE statements held by this module */ +#define SQLITE_RBU_UPDATE_CACHESIZE 16 + +/* Delta checksums disabled by default. Compile with -DRBU_ENABLE_DELTA_CKSUM +** to enable checksum verification. +*/ +#ifndef RBU_ENABLE_DELTA_CKSUM +# define RBU_ENABLE_DELTA_CKSUM 0 +#endif + +/* +** Swap two objects of type TYPE. +*/ +#if !defined(SQLITE_AMALGAMATION) +# define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;} +#endif + +/* +** Name of the URI option that causes RBU to take an exclusive lock as +** part of the incremental checkpoint operation. +*/ +#define RBU_EXCLUSIVE_CHECKPOINT "rbu_exclusive_checkpoint" + + +/* +** The rbu_state table is used to save the state of a partially applied +** update so that it can be resumed later. The table consists of integer +** keys mapped to values as follows: +** +** RBU_STATE_STAGE: +** May be set to integer values 1, 2, 4 or 5. As follows: +** 1: the *-rbu file is currently under construction. +** 2: the *-rbu file has been constructed, but not yet moved +** to the *-wal path. +** 4: the checkpoint is underway. +** 5: the rbu update has been checkpointed. +** +** RBU_STATE_TBL: +** Only valid if STAGE==1. The target database name of the table +** currently being written. +** +** RBU_STATE_IDX: +** Only valid if STAGE==1. The target database name of the index +** currently being written, or NULL if the main table is currently being +** updated. +** +** RBU_STATE_ROW: +** Only valid if STAGE==1. Number of rows already processed for the current +** table/index. +** +** RBU_STATE_PROGRESS: +** Trbul number of sqlite3rbu_step() calls made so far as part of this +** rbu update. +** +** RBU_STATE_CKPT: +** Valid if STAGE==4. The 64-bit checksum associated with the wal-index +** header created by recovering the *-wal file. This is used to detect +** cases when another client appends frames to the *-wal file in the +** middle of an incremental checkpoint (an incremental checkpoint cannot +** be continued if this happens). +** +** RBU_STATE_COOKIE: +** Valid if STAGE==1. The current change-counter cookie value in the +** target db file. +** +** RBU_STATE_OALSZ: +** Valid if STAGE==1. The size in bytes of the *-oal file. +** +** RBU_STATE_DATATBL: +** Only valid if STAGE==1. The RBU database name of the table +** currently being read. +*/ +#define RBU_STATE_STAGE 1 +#define RBU_STATE_TBL 2 +#define RBU_STATE_IDX 3 +#define RBU_STATE_ROW 4 +#define RBU_STATE_PROGRESS 5 +#define RBU_STATE_CKPT 6 +#define RBU_STATE_COOKIE 7 +#define RBU_STATE_OALSZ 8 +#define RBU_STATE_PHASEONESTEP 9 +#define RBU_STATE_DATATBL 10 + +#define RBU_STAGE_OAL 1 +#define RBU_STAGE_MOVE 2 +#define RBU_STAGE_CAPTURE 3 +#define RBU_STAGE_CKPT 4 +#define RBU_STAGE_DONE 5 + + +#define RBU_CREATE_STATE \ + "CREATE TABLE IF NOT EXISTS %s.rbu_state(k INTEGER PRIMARY KEY, v)" + +typedef struct RbuFrame RbuFrame; +typedef struct RbuObjIter RbuObjIter; +typedef struct RbuState RbuState; +typedef struct RbuSpan RbuSpan; +typedef struct rbu_vfs rbu_vfs; +typedef struct rbu_file rbu_file; +typedef struct RbuUpdateStmt RbuUpdateStmt; + +#if !defined(SQLITE_AMALGAMATION) +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; +typedef sqlite3_int64 i64; +#endif + +/* +** These values must match the values defined in wal.c for the equivalent +** locks. These are not magic numbers as they are part of the SQLite file +** format. +*/ +#define WAL_LOCK_WRITE 0 +#define WAL_LOCK_CKPT 1 +#define WAL_LOCK_READ0 3 + +#define SQLITE_FCNTL_RBUCNT 5149216 + +/* +** A structure to store values read from the rbu_state table in memory. +*/ +struct RbuState { + int eStage; + char *zTbl; + char *zDataTbl; + char *zIdx; + i64 iWalCksum; + int nRow; + i64 nProgress; + u32 iCookie; + i64 iOalSz; + i64 nPhaseOneStep; +}; + +struct RbuUpdateStmt { + char *zMask; /* Copy of update mask used with pUpdate */ + sqlite3_stmt *pUpdate; /* Last update statement (or NULL) */ + RbuUpdateStmt *pNext; +}; + +struct RbuSpan { + const char *zSpan; + int nSpan; +}; + +/* +** An iterator of this type is used to iterate through all objects in +** the target database that require updating. For each such table, the +** iterator visits, in order: +** +** * the table itself, +** * each index of the table (zero or more points to visit), and +** * a special "cleanup table" state. +** +** abIndexed: +** If the table has no indexes on it, abIndexed is set to NULL. Otherwise, +** it points to an array of flags nTblCol elements in size. The flag is +** set for each column that is either a part of the PK or a part of an +** index. Or clear otherwise. +** +** If there are one or more partial indexes on the table, all fields of +** this array set set to 1. This is because in that case, the module has +** no way to tell which fields will be required to add and remove entries +** from the partial indexes. +** +*/ +struct RbuObjIter { + sqlite3_stmt *pTblIter; /* Iterate through tables */ + sqlite3_stmt *pIdxIter; /* Index iterator */ + int nTblCol; /* Size of azTblCol[] array */ + char **azTblCol; /* Array of unquoted target column names */ + char **azTblType; /* Array of target column types */ + int *aiSrcOrder; /* src table col -> target table col */ + u8 *abTblPk; /* Array of flags, set on target PK columns */ + u8 *abNotNull; /* Array of flags, set on NOT NULL columns */ + u8 *abIndexed; /* Array of flags, set on indexed & PK cols */ + int eType; /* Table type - an RBU_PK_XXX value */ + + /* Output variables. zTbl==0 implies EOF. */ + int bCleanup; /* True in "cleanup" state */ + const char *zTbl; /* Name of target db table */ + const char *zDataTbl; /* Name of rbu db table (or null) */ + const char *zIdx; /* Name of target db index (or null) */ + int iTnum; /* Root page of current object */ + int iPkTnum; /* If eType==EXTERNAL, root of PK index */ + int bUnique; /* Current index is unique */ + int nIndex; /* Number of aux. indexes on table zTbl */ + + /* Statements created by rbuObjIterPrepareAll() */ + int nCol; /* Number of columns in current object */ + sqlite3_stmt *pSelect; /* Source data */ + sqlite3_stmt *pInsert; /* Statement for INSERT operations */ + sqlite3_stmt *pDelete; /* Statement for DELETE ops */ + sqlite3_stmt *pTmpInsert; /* Insert into rbu_tmp_$zDataTbl */ + int nIdxCol; + RbuSpan *aIdxCol; + char *zIdxSql; + + /* Last UPDATE used (for PK b-tree updates only), or NULL. */ + RbuUpdateStmt *pRbuUpdate; +}; + +/* +** Values for RbuObjIter.eType +** +** 0: Table does not exist (error) +** 1: Table has an implicit rowid. +** 2: Table has an explicit IPK column. +** 3: Table has an external PK index. +** 4: Table is WITHOUT ROWID. +** 5: Table is a virtual table. +*/ +#define RBU_PK_NOTABLE 0 +#define RBU_PK_NONE 1 +#define RBU_PK_IPK 2 +#define RBU_PK_EXTERNAL 3 +#define RBU_PK_WITHOUT_ROWID 4 +#define RBU_PK_VTAB 5 + + +/* +** Within the RBU_STAGE_OAL stage, each call to sqlite3rbu_step() performs +** one of the following operations. +*/ +#define RBU_INSERT 1 /* Insert on a main table b-tree */ +#define RBU_DELETE 2 /* Delete a row from a main table b-tree */ +#define RBU_REPLACE 3 /* Delete and then insert a row */ +#define RBU_IDX_DELETE 4 /* Delete a row from an aux. index b-tree */ +#define RBU_IDX_INSERT 5 /* Insert on an aux. index b-tree */ + +#define RBU_UPDATE 6 /* Update a row in a main table b-tree */ + +/* +** A single step of an incremental checkpoint - frame iWalFrame of the wal +** file should be copied to page iDbPage of the database file. +*/ +struct RbuFrame { + u32 iDbPage; + u32 iWalFrame; +}; + +/* +** RBU handle. +** +** nPhaseOneStep: +** If the RBU database contains an rbu_count table, this value is set to +** a running estimate of the number of b-tree operations required to +** finish populating the *-oal file. This allows the sqlite3_bp_progress() +** API to calculate the permyriadage progress of populating the *-oal file +** using the formula: +** +** permyriadage = (10000 * nProgress) / nPhaseOneStep +** +** nPhaseOneStep is initialized to the sum of: +** +** nRow * (nIndex + 1) +** +** for all source tables in the RBU database, where nRow is the number +** of rows in the source table and nIndex the number of indexes on the +** corresponding target database table. +** +** This estimate is accurate if the RBU update consists entirely of +** INSERT operations. However, it is inaccurate if: +** +** * the RBU update contains any UPDATE operations. If the PK specified +** for an UPDATE operation does not exist in the target table, then +** no b-tree operations are required on index b-trees. Or if the +** specified PK does exist, then (nIndex*2) such operations are +** required (one delete and one insert on each index b-tree). +** +** * the RBU update contains any DELETE operations for which the specified +** PK does not exist. In this case no operations are required on index +** b-trees. +** +** * the RBU update contains REPLACE operations. These are similar to +** UPDATE operations. +** +** nPhaseOneStep is updated to account for the conditions above during the +** first pass of each source table. The updated nPhaseOneStep value is +** stored in the rbu_state table if the RBU update is suspended. +*/ +struct sqlite3rbu { + int eStage; /* Value of RBU_STATE_STAGE field */ + sqlite3 *dbMain; /* target database handle */ + sqlite3 *dbRbu; /* rbu database handle */ + char *zTarget; /* Path to target db */ + char *zRbu; /* Path to rbu db */ + char *zState; /* Path to state db (or NULL if zRbu) */ + char zStateDb[5]; /* Db name for state ("stat" or "main") */ + int rc; /* Value returned by last rbu_step() call */ + char *zErrmsg; /* Error message if rc!=SQLITE_OK */ + int nStep; /* Rows processed for current object */ + int nProgress; /* Rows processed for all objects */ + RbuObjIter objiter; /* Iterator for skipping through tbl/idx */ + const char *zVfsName; /* Name of automatically created rbu vfs */ + rbu_file *pTargetFd; /* File handle open on target db */ + int nPagePerSector; /* Pages per sector for pTargetFd */ + i64 iOalSz; + i64 nPhaseOneStep; + + /* The following state variables are used as part of the incremental + ** checkpoint stage (eStage==RBU_STAGE_CKPT). See comments surrounding + ** function rbuSetupCheckpoint() for details. */ + u32 iMaxFrame; /* Largest iWalFrame value in aFrame[] */ + u32 mLock; + int nFrame; /* Entries in aFrame[] array */ + int nFrameAlloc; /* Allocated size of aFrame[] array */ + RbuFrame *aFrame; + int pgsz; + u8 *aBuf; + i64 iWalCksum; + i64 szTemp; /* Current size of all temp files in use */ + i64 szTempLimit; /* Total size limit for temp files */ + + /* Used in RBU vacuum mode only */ + int nRbu; /* Number of RBU VFS in the stack */ + rbu_file *pRbuFd; /* Fd for main db of dbRbu */ +}; + +/* +** An rbu VFS is implemented using an instance of this structure. +** +** Variable pRbu is only non-NULL for automatically created RBU VFS objects. +** It is NULL for RBU VFS objects created explicitly using +** sqlite3rbu_create_vfs(). It is used to track the total amount of temp +** space used by the RBU handle. +*/ +struct rbu_vfs { + sqlite3_vfs base; /* rbu VFS shim methods */ + sqlite3_vfs *pRealVfs; /* Underlying VFS */ + sqlite3_mutex *mutex; /* Mutex to protect pMain */ + sqlite3rbu *pRbu; /* Owner RBU object */ + rbu_file *pMain; /* List of main db files */ + rbu_file *pMainRbu; /* List of main db files with pRbu!=0 */ +}; + +/* +** Each file opened by an rbu VFS is represented by an instance of +** the following structure. +** +** If this is a temporary file (pRbu!=0 && flags&DELETE_ON_CLOSE), variable +** "sz" is set to the current size of the database file. +*/ +struct rbu_file { + sqlite3_file base; /* sqlite3_file methods */ + sqlite3_file *pReal; /* Underlying file handle */ + rbu_vfs *pRbuVfs; /* Pointer to the rbu_vfs object */ + sqlite3rbu *pRbu; /* Pointer to rbu object (rbu target only) */ + i64 sz; /* Size of file in bytes (temp only) */ + + int openFlags; /* Flags this file was opened with */ + u32 iCookie; /* Cookie value for main db files */ + u8 iWriteVer; /* "write-version" value for main db files */ + u8 bNolock; /* True to fail EXCLUSIVE locks */ + + int nShm; /* Number of entries in apShm[] array */ + char **apShm; /* Array of mmap'd *-shm regions */ + char *zDel; /* Delete this when closing file */ + + const char *zWal; /* Wal filename for this main db file */ + rbu_file *pWalFd; /* Wal file descriptor for this main db */ + rbu_file *pMainNext; /* Next MAIN_DB file */ + rbu_file *pMainRbuNext; /* Next MAIN_DB file with pRbu!=0 */ +}; + +/* +** True for an RBU vacuum handle, or false otherwise. +*/ +#define rbuIsVacuum(p) ((p)->zTarget==0) + + +/************************************************************************* +** The following three functions, found below: +** +** rbuDeltaGetInt() +** rbuDeltaChecksum() +** rbuDeltaApply() +** +** are lifted from the fossil source code (http://fossil-scm.org). They +** are used to implement the scalar SQL function rbu_fossil_delta(). +*/ + +/* +** Read bytes from *pz and convert them into a positive integer. When +** finished, leave *pz pointing to the first character past the end of +** the integer. The *pLen parameter holds the length of the string +** in *pz and is decremented once for each character in the integer. +*/ +static unsigned int rbuDeltaGetInt(const char **pz, int *pLen){ + static const signed char zValue[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36, + -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1, + }; + unsigned int v = 0; + int c; + unsigned char *z = (unsigned char*)*pz; + unsigned char *zStart = z; + while( (c = zValue[0x7f&*(z++)])>=0 ){ + v = (v<<6) + c; + } + z--; + *pLen -= z - zStart; + *pz = (char*)z; + return v; +} + +#if RBU_ENABLE_DELTA_CKSUM +/* +** Compute a 32-bit checksum on the N-byte buffer. Return the result. +*/ +static unsigned int rbuDeltaChecksum(const char *zIn, size_t N){ + const unsigned char *z = (const unsigned char *)zIn; + unsigned sum0 = 0; + unsigned sum1 = 0; + unsigned sum2 = 0; + unsigned sum3 = 0; + while(N >= 16){ + sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); + sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); + sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); + sum3 += ((unsigned)z[3] + z[7] + z[11]+ z[15]); + z += 16; + N -= 16; + } + while(N >= 4){ + sum0 += z[0]; + sum1 += z[1]; + sum2 += z[2]; + sum3 += z[3]; + z += 4; + N -= 4; + } + sum3 += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); + switch(N){ + case 3: sum3 += (z[2] << 8); + case 2: sum3 += (z[1] << 16); + case 1: sum3 += (z[0] << 24); + default: ; + } + return sum3; +} +#endif + +/* +** Apply a delta. +** +** The output buffer should be big enough to hold the whole output +** file and a NUL terminator at the end. The delta_output_size() +** routine will determine this size for you. +** +** The delta string should be null-terminated. But the delta string +** may contain embedded NUL characters (if the input and output are +** binary files) so we also have to pass in the length of the delta in +** the lenDelta parameter. +** +** This function returns the size of the output file in bytes (excluding +** the final NUL terminator character). Except, if the delta string is +** malformed or intended for use with a source file other than zSrc, +** then this routine returns -1. +** +** Refer to the delta_create() documentation above for a description +** of the delta file format. +*/ +static int rbuDeltaApply( + const char *zSrc, /* The source or pattern file */ + int lenSrc, /* Length of the source file */ + const char *zDelta, /* Delta to apply to the pattern */ + int lenDelta, /* Length of the delta */ + char *zOut /* Write the output into this preallocated buffer */ +){ + unsigned int limit; + unsigned int total = 0; +#if RBU_ENABLE_DELTA_CKSUM + char *zOrigOut = zOut; +#endif + + limit = rbuDeltaGetInt(&zDelta, &lenDelta); + if( *zDelta!='\n' ){ + /* ERROR: size integer not terminated by "\n" */ + return -1; + } + zDelta++; lenDelta--; + while( *zDelta && lenDelta>0 ){ + unsigned int cnt, ofst; + cnt = rbuDeltaGetInt(&zDelta, &lenDelta); + switch( zDelta[0] ){ + case '@': { + zDelta++; lenDelta--; + ofst = rbuDeltaGetInt(&zDelta, &lenDelta); + if( lenDelta>0 && zDelta[0]!=',' ){ + /* ERROR: copy command not terminated by ',' */ + return -1; + } + zDelta++; lenDelta--; + total += cnt; + if( total>limit ){ + /* ERROR: copy exceeds output file size */ + return -1; + } + if( (int)(ofst+cnt) > lenSrc ){ + /* ERROR: copy extends past end of input */ + return -1; + } + memcpy(zOut, &zSrc[ofst], cnt); + zOut += cnt; + break; + } + case ':': { + zDelta++; lenDelta--; + total += cnt; + if( total>limit ){ + /* ERROR: insert command gives an output larger than predicted */ + return -1; + } + if( (int)cnt>lenDelta ){ + /* ERROR: insert count exceeds size of delta */ + return -1; + } + memcpy(zOut, zDelta, cnt); + zOut += cnt; + zDelta += cnt; + lenDelta -= cnt; + break; + } + case ';': { + zDelta++; lenDelta--; + zOut[0] = 0; +#if RBU_ENABLE_DELTA_CKSUM + if( cnt!=rbuDeltaChecksum(zOrigOut, total) ){ + /* ERROR: bad checksum */ + return -1; + } +#endif + if( total!=limit ){ + /* ERROR: generated size does not match predicted size */ + return -1; + } + return total; + } + default: { + /* ERROR: unknown delta operator */ + return -1; + } + } + } + /* ERROR: unterminated delta */ + return -1; +} + +static int rbuDeltaOutputSize(const char *zDelta, int lenDelta){ + int size; + size = rbuDeltaGetInt(&zDelta, &lenDelta); + if( *zDelta!='\n' ){ + /* ERROR: size integer not terminated by "\n" */ + return -1; + } + return size; +} + +/* +** End of code taken from fossil. +*************************************************************************/ + +/* +** Implementation of SQL scalar function rbu_fossil_delta(). +** +** This function applies a fossil delta patch to a blob. Exactly two +** arguments must be passed to this function. The first is the blob to +** patch and the second the patch to apply. If no error occurs, this +** function returns the patched blob. +*/ +static void rbuFossilDeltaFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *aDelta; + int nDelta; + const char *aOrig; + int nOrig; + + int nOut; + int nOut2; + char *aOut; + + assert( argc==2 ); + + nOrig = sqlite3_value_bytes(argv[0]); + aOrig = (const char*)sqlite3_value_blob(argv[0]); + nDelta = sqlite3_value_bytes(argv[1]); + aDelta = (const char*)sqlite3_value_blob(argv[1]); + + /* Figure out the size of the output */ + nOut = rbuDeltaOutputSize(aDelta, nDelta); + if( nOut<0 ){ + sqlite3_result_error(context, "corrupt fossil delta", -1); + return; + } + + aOut = sqlite3_malloc(nOut+1); + if( aOut==0 ){ + sqlite3_result_error_nomem(context); + }else{ + nOut2 = rbuDeltaApply(aOrig, nOrig, aDelta, nDelta, aOut); + if( nOut2!=nOut ){ + sqlite3_free(aOut); + sqlite3_result_error(context, "corrupt fossil delta", -1); + }else{ + sqlite3_result_blob(context, aOut, nOut, sqlite3_free); + } + } +} + + +/* +** Prepare the SQL statement in buffer zSql against database handle db. +** If successful, set *ppStmt to point to the new statement and return +** SQLITE_OK. +** +** Otherwise, if an error does occur, set *ppStmt to NULL and return +** an SQLite error code. Additionally, set output variable *pzErrmsg to +** point to a buffer containing an error message. It is the responsibility +** of the caller to (eventually) free this buffer using sqlite3_free(). +*/ +static int prepareAndCollectError( + sqlite3 *db, + sqlite3_stmt **ppStmt, + char **pzErrmsg, + const char *zSql +){ + int rc = sqlite3_prepare_v2(db, zSql, -1, ppStmt, 0); + if( rc!=SQLITE_OK ){ + *pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + *ppStmt = 0; + } + return rc; +} + +/* +** Reset the SQL statement passed as the first argument. Return a copy +** of the value returned by sqlite3_reset(). +** +** If an error has occurred, then set *pzErrmsg to point to a buffer +** containing an error message. It is the responsibility of the caller +** to eventually free this buffer using sqlite3_free(). +*/ +static int resetAndCollectError(sqlite3_stmt *pStmt, char **pzErrmsg){ + int rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ){ + *pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(sqlite3_db_handle(pStmt))); + } + return rc; +} + +/* +** Unless it is NULL, argument zSql points to a buffer allocated using +** sqlite3_malloc containing an SQL statement. This function prepares the SQL +** statement against database db and frees the buffer. If statement +** compilation is successful, *ppStmt is set to point to the new statement +** handle and SQLITE_OK is returned. +** +** Otherwise, if an error occurs, *ppStmt is set to NULL and an error code +** returned. In this case, *pzErrmsg may also be set to point to an error +** message. It is the responsibility of the caller to free this error message +** buffer using sqlite3_free(). +** +** If argument zSql is NULL, this function assumes that an OOM has occurred. +** In this case SQLITE_NOMEM is returned and *ppStmt set to NULL. +*/ +static int prepareFreeAndCollectError( + sqlite3 *db, + sqlite3_stmt **ppStmt, + char **pzErrmsg, + char *zSql +){ + int rc; + assert( *pzErrmsg==0 ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + *ppStmt = 0; + }else{ + rc = prepareAndCollectError(db, ppStmt, pzErrmsg, zSql); + sqlite3_free(zSql); + } + return rc; +} + +/* +** Free the RbuObjIter.azTblCol[] and RbuObjIter.abTblPk[] arrays allocated +** by an earlier call to rbuObjIterCacheTableInfo(). +*/ +static void rbuObjIterFreeCols(RbuObjIter *pIter){ + int i; + for(i=0; inTblCol; i++){ + sqlite3_free(pIter->azTblCol[i]); + sqlite3_free(pIter->azTblType[i]); + } + sqlite3_free(pIter->azTblCol); + pIter->azTblCol = 0; + pIter->azTblType = 0; + pIter->aiSrcOrder = 0; + pIter->abTblPk = 0; + pIter->abNotNull = 0; + pIter->nTblCol = 0; + pIter->eType = 0; /* Invalid value */ +} + +/* +** Finalize all statements and free all allocations that are specific to +** the current object (table/index pair). +*/ +static void rbuObjIterClearStatements(RbuObjIter *pIter){ + RbuUpdateStmt *pUp; + + sqlite3_finalize(pIter->pSelect); + sqlite3_finalize(pIter->pInsert); + sqlite3_finalize(pIter->pDelete); + sqlite3_finalize(pIter->pTmpInsert); + pUp = pIter->pRbuUpdate; + while( pUp ){ + RbuUpdateStmt *pTmp = pUp->pNext; + sqlite3_finalize(pUp->pUpdate); + sqlite3_free(pUp); + pUp = pTmp; + } + sqlite3_free(pIter->aIdxCol); + sqlite3_free(pIter->zIdxSql); + + pIter->pSelect = 0; + pIter->pInsert = 0; + pIter->pDelete = 0; + pIter->pRbuUpdate = 0; + pIter->pTmpInsert = 0; + pIter->nCol = 0; + pIter->nIdxCol = 0; + pIter->aIdxCol = 0; + pIter->zIdxSql = 0; +} + +/* +** Clean up any resources allocated as part of the iterator object passed +** as the only argument. +*/ +static void rbuObjIterFinalize(RbuObjIter *pIter){ + rbuObjIterClearStatements(pIter); + sqlite3_finalize(pIter->pTblIter); + sqlite3_finalize(pIter->pIdxIter); + rbuObjIterFreeCols(pIter); + memset(pIter, 0, sizeof(RbuObjIter)); +} + +/* +** Advance the iterator to the next position. +** +** If no error occurs, SQLITE_OK is returned and the iterator is left +** pointing to the next entry. Otherwise, an error code and message is +** left in the RBU handle passed as the first argument. A copy of the +** error code is returned. +*/ +static int rbuObjIterNext(sqlite3rbu *p, RbuObjIter *pIter){ + int rc = p->rc; + if( rc==SQLITE_OK ){ + + /* Free any SQLite statements used while processing the previous object */ + rbuObjIterClearStatements(pIter); + if( pIter->zIdx==0 ){ + rc = sqlite3_exec(p->dbMain, + "DROP TRIGGER IF EXISTS temp.rbu_insert_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_update1_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_update2_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_delete_tr;" + , 0, 0, &p->zErrmsg + ); + } + + if( rc==SQLITE_OK ){ + if( pIter->bCleanup ){ + rbuObjIterFreeCols(pIter); + pIter->bCleanup = 0; + rc = sqlite3_step(pIter->pTblIter); + if( rc!=SQLITE_ROW ){ + rc = resetAndCollectError(pIter->pTblIter, &p->zErrmsg); + pIter->zTbl = 0; + }else{ + pIter->zTbl = (const char*)sqlite3_column_text(pIter->pTblIter, 0); + pIter->zDataTbl = (const char*)sqlite3_column_text(pIter->pTblIter,1); + rc = (pIter->zDataTbl && pIter->zTbl) ? SQLITE_OK : SQLITE_NOMEM; + } + }else{ + if( pIter->zIdx==0 ){ + sqlite3_stmt *pIdx = pIter->pIdxIter; + rc = sqlite3_bind_text(pIdx, 1, pIter->zTbl, -1, SQLITE_STATIC); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_step(pIter->pIdxIter); + if( rc!=SQLITE_ROW ){ + rc = resetAndCollectError(pIter->pIdxIter, &p->zErrmsg); + pIter->bCleanup = 1; + pIter->zIdx = 0; + }else{ + pIter->zIdx = (const char*)sqlite3_column_text(pIter->pIdxIter, 0); + pIter->iTnum = sqlite3_column_int(pIter->pIdxIter, 1); + pIter->bUnique = sqlite3_column_int(pIter->pIdxIter, 2); + rc = pIter->zIdx ? SQLITE_OK : SQLITE_NOMEM; + } + } + } + } + } + + if( rc!=SQLITE_OK ){ + rbuObjIterFinalize(pIter); + p->rc = rc; + } + return rc; +} + + +/* +** The implementation of the rbu_target_name() SQL function. This function +** accepts one or two arguments. The first argument is the name of a table - +** the name of a table in the RBU database. The second, if it is present, is 1 +** for a view or 0 for a table. +** +** For a non-vacuum RBU handle, if the table name matches the pattern: +** +** data[0-9]_ +** +** where is any sequence of 1 or more characters, is returned. +** Otherwise, if the only argument does not match the above pattern, an SQL +** NULL is returned. +** +** "data_t1" -> "t1" +** "data0123_t2" -> "t2" +** "dataAB_t3" -> NULL +** +** For an rbu vacuum handle, a copy of the first argument is returned if +** the second argument is either missing or 0 (not a view). +*/ +static void rbuTargetNameFunc( + sqlite3_context *pCtx, + int argc, + sqlite3_value **argv +){ + sqlite3rbu *p = sqlite3_user_data(pCtx); + const char *zIn; + assert( argc==1 || argc==2 ); + + zIn = (const char*)sqlite3_value_text(argv[0]); + if( zIn ){ + if( rbuIsVacuum(p) ){ + assert( argc==2 || argc==1 ); + if( argc==1 || 0==sqlite3_value_int(argv[1]) ){ + sqlite3_result_text(pCtx, zIn, -1, SQLITE_STATIC); + } + }else{ + if( strlen(zIn)>4 && memcmp("data", zIn, 4)==0 ){ + int i; + for(i=4; zIn[i]>='0' && zIn[i]<='9'; i++); + if( zIn[i]=='_' && zIn[i+1] ){ + sqlite3_result_text(pCtx, &zIn[i+1], -1, SQLITE_STATIC); + } + } + } + } +} + +/* +** Initialize the iterator structure passed as the second argument. +** +** If no error occurs, SQLITE_OK is returned and the iterator is left +** pointing to the first entry. Otherwise, an error code and message is +** left in the RBU handle passed as the first argument. A copy of the +** error code is returned. +*/ +static int rbuObjIterFirst(sqlite3rbu *p, RbuObjIter *pIter){ + int rc; + memset(pIter, 0, sizeof(RbuObjIter)); + + rc = prepareFreeAndCollectError(p->dbRbu, &pIter->pTblIter, &p->zErrmsg, + sqlite3_mprintf( + "SELECT rbu_target_name(name, type='view') AS target, name " + "FROM sqlite_schema " + "WHERE type IN ('table', 'view') AND target IS NOT NULL " + " %s " + "ORDER BY name" + , rbuIsVacuum(p) ? "AND rootpage!=0 AND rootpage IS NOT NULL" : "")); + + if( rc==SQLITE_OK ){ + rc = prepareAndCollectError(p->dbMain, &pIter->pIdxIter, &p->zErrmsg, + "SELECT name, rootpage, sql IS NULL OR substr(8, 6)=='UNIQUE' " + " FROM main.sqlite_schema " + " WHERE type='index' AND tbl_name = ?" + ); + } + + pIter->bCleanup = 1; + p->rc = rc; + return rbuObjIterNext(p, pIter); +} + +/* +** This is a wrapper around "sqlite3_mprintf(zFmt, ...)". If an OOM occurs, +** an error code is stored in the RBU handle passed as the first argument. +** +** If an error has already occurred (p->rc is already set to something other +** than SQLITE_OK), then this function returns NULL without modifying the +** stored error code. In this case it still calls sqlite3_free() on any +** printf() parameters associated with %z conversions. +*/ +static char *rbuMPrintf(sqlite3rbu *p, const char *zFmt, ...){ + char *zSql = 0; + va_list ap; + va_start(ap, zFmt); + zSql = sqlite3_vmprintf(zFmt, ap); + if( p->rc==SQLITE_OK ){ + if( zSql==0 ) p->rc = SQLITE_NOMEM; + }else{ + sqlite3_free(zSql); + zSql = 0; + } + va_end(ap); + return zSql; +} + +/* +** Argument zFmt is a sqlite3_mprintf() style format string. The trailing +** arguments are the usual subsitution values. This function performs +** the printf() style substitutions and executes the result as an SQL +** statement on the RBU handles database. +** +** If an error occurs, an error code and error message is stored in the +** RBU handle. If an error has already occurred when this function is +** called, it is a no-op. +*/ +static int rbuMPrintfExec(sqlite3rbu *p, sqlite3 *db, const char *zFmt, ...){ + va_list ap; + char *zSql; + va_start(ap, zFmt); + zSql = sqlite3_vmprintf(zFmt, ap); + if( p->rc==SQLITE_OK ){ + if( zSql==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + p->rc = sqlite3_exec(db, zSql, 0, 0, &p->zErrmsg); + } + } + sqlite3_free(zSql); + va_end(ap); + return p->rc; +} + +/* +** Attempt to allocate and return a pointer to a zeroed block of nByte +** bytes. +** +** If an error (i.e. an OOM condition) occurs, return NULL and leave an +** error code in the rbu handle passed as the first argument. Or, if an +** error has already occurred when this function is called, return NULL +** immediately without attempting the allocation or modifying the stored +** error code. +*/ +static void *rbuMalloc(sqlite3rbu *p, sqlite3_int64 nByte){ + void *pRet = 0; + if( p->rc==SQLITE_OK ){ + assert( nByte>0 ); + pRet = sqlite3_malloc64(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + } + return pRet; +} + + +/* +** Allocate and zero the pIter->azTblCol[] and abTblPk[] arrays so that +** there is room for at least nCol elements. If an OOM occurs, store an +** error code in the RBU handle passed as the first argument. +*/ +static void rbuAllocateIterArrays(sqlite3rbu *p, RbuObjIter *pIter, int nCol){ + sqlite3_int64 nByte = (2*sizeof(char*) + sizeof(int) + 3*sizeof(u8)) * nCol; + char **azNew; + + azNew = (char**)rbuMalloc(p, nByte); + if( azNew ){ + pIter->azTblCol = azNew; + pIter->azTblType = &azNew[nCol]; + pIter->aiSrcOrder = (int*)&pIter->azTblType[nCol]; + pIter->abTblPk = (u8*)&pIter->aiSrcOrder[nCol]; + pIter->abNotNull = (u8*)&pIter->abTblPk[nCol]; + pIter->abIndexed = (u8*)&pIter->abNotNull[nCol]; + } +} + +/* +** The first argument must be a nul-terminated string. This function +** returns a copy of the string in memory obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free this memory +** using sqlite3_free(). +** +** If an OOM condition is encountered when attempting to allocate memory, +** output variable (*pRc) is set to SQLITE_NOMEM before returning. Otherwise, +** if the allocation succeeds, (*pRc) is left unchanged. +*/ +static char *rbuStrndup(const char *zStr, int *pRc){ + char *zRet = 0; + + if( *pRc==SQLITE_OK ){ + if( zStr ){ + size_t nCopy = strlen(zStr) + 1; + zRet = (char*)sqlite3_malloc64(nCopy); + if( zRet ){ + memcpy(zRet, zStr, nCopy); + }else{ + *pRc = SQLITE_NOMEM; + } + } + } + + return zRet; +} + +/* +** Finalize the statement passed as the second argument. +** +** If the sqlite3_finalize() call indicates that an error occurs, and the +** rbu handle error code is not already set, set the error code and error +** message accordingly. +*/ +static void rbuFinalize(sqlite3rbu *p, sqlite3_stmt *pStmt){ + sqlite3 *db = sqlite3_db_handle(pStmt); + int rc = sqlite3_finalize(pStmt); + if( p->rc==SQLITE_OK && rc!=SQLITE_OK ){ + p->rc = rc; + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } +} + +/* Determine the type of a table. +** +** peType is of type (int*), a pointer to an output parameter of type +** (int). This call sets the output parameter as follows, depending +** on the type of the table specified by parameters dbName and zTbl. +** +** RBU_PK_NOTABLE: No such table. +** RBU_PK_NONE: Table has an implicit rowid. +** RBU_PK_IPK: Table has an explicit IPK column. +** RBU_PK_EXTERNAL: Table has an external PK index. +** RBU_PK_WITHOUT_ROWID: Table is WITHOUT ROWID. +** RBU_PK_VTAB: Table is a virtual table. +** +** Argument *piPk is also of type (int*), and also points to an output +** parameter. Unless the table has an external primary key index +** (i.e. unless *peType is set to 3), then *piPk is set to zero. Or, +** if the table does have an external primary key index, then *piPk +** is set to the root page number of the primary key index before +** returning. +** +** ALGORITHM: +** +** if( no entry exists in sqlite_schema ){ +** return RBU_PK_NOTABLE +** }else if( sql for the entry starts with "CREATE VIRTUAL" ){ +** return RBU_PK_VTAB +** }else if( "PRAGMA index_list()" for the table contains a "pk" index ){ +** if( the index that is the pk exists in sqlite_schema ){ +** *piPK = rootpage of that index. +** return RBU_PK_EXTERNAL +** }else{ +** return RBU_PK_WITHOUT_ROWID +** } +** }else if( "PRAGMA table_info()" lists one or more "pk" columns ){ +** return RBU_PK_IPK +** }else{ +** return RBU_PK_NONE +** } +*/ +static void rbuTableType( + sqlite3rbu *p, + const char *zTab, + int *peType, + int *piTnum, + int *piPk +){ + /* + ** 0) SELECT count(*) FROM sqlite_schema where name=%Q AND IsVirtual(%Q) + ** 1) PRAGMA index_list = ? + ** 2) SELECT count(*) FROM sqlite_schema where name=%Q + ** 3) PRAGMA table_info = ? + */ + sqlite3_stmt *aStmt[4] = {0, 0, 0, 0}; + + *peType = RBU_PK_NOTABLE; + *piPk = 0; + + assert( p->rc==SQLITE_OK ); + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[0], &p->zErrmsg, + sqlite3_mprintf( + "SELECT " + " (sql COLLATE nocase BETWEEN 'CREATE VIRTUAL' AND 'CREATE VIRTUAM')," + " rootpage" + " FROM sqlite_schema" + " WHERE name=%Q", zTab + )); + if( p->rc!=SQLITE_OK || sqlite3_step(aStmt[0])!=SQLITE_ROW ){ + /* Either an error, or no such table. */ + goto rbuTableType_end; + } + if( sqlite3_column_int(aStmt[0], 0) ){ + *peType = RBU_PK_VTAB; /* virtual table */ + goto rbuTableType_end; + } + *piTnum = sqlite3_column_int(aStmt[0], 1); + + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[1], &p->zErrmsg, + sqlite3_mprintf("PRAGMA index_list=%Q",zTab) + ); + if( p->rc ) goto rbuTableType_end; + while( sqlite3_step(aStmt[1])==SQLITE_ROW ){ + const u8 *zOrig = sqlite3_column_text(aStmt[1], 3); + const u8 *zIdx = sqlite3_column_text(aStmt[1], 1); + if( zOrig && zIdx && zOrig[0]=='p' ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[2], &p->zErrmsg, + sqlite3_mprintf( + "SELECT rootpage FROM sqlite_schema WHERE name = %Q", zIdx + )); + if( p->rc==SQLITE_OK ){ + if( sqlite3_step(aStmt[2])==SQLITE_ROW ){ + *piPk = sqlite3_column_int(aStmt[2], 0); + *peType = RBU_PK_EXTERNAL; + }else{ + *peType = RBU_PK_WITHOUT_ROWID; + } + } + goto rbuTableType_end; + } + } + + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[3], &p->zErrmsg, + sqlite3_mprintf("PRAGMA table_info=%Q",zTab) + ); + if( p->rc==SQLITE_OK ){ + while( sqlite3_step(aStmt[3])==SQLITE_ROW ){ + if( sqlite3_column_int(aStmt[3],5)>0 ){ + *peType = RBU_PK_IPK; /* explicit IPK column */ + goto rbuTableType_end; + } + } + *peType = RBU_PK_NONE; + } + +rbuTableType_end: { + unsigned int i; + for(i=0; iabIndexed[] array. +*/ +static void rbuObjIterCacheIndexedCols(sqlite3rbu *p, RbuObjIter *pIter){ + sqlite3_stmt *pList = 0; + int bIndex = 0; + + if( p->rc==SQLITE_OK ){ + memcpy(pIter->abIndexed, pIter->abTblPk, sizeof(u8)*pIter->nTblCol); + p->rc = prepareFreeAndCollectError(p->dbMain, &pList, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_list = %Q", pIter->zTbl) + ); + } + + pIter->nIndex = 0; + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pList) ){ + const char *zIdx = (const char*)sqlite3_column_text(pList, 1); + int bPartial = sqlite3_column_int(pList, 4); + sqlite3_stmt *pXInfo = 0; + if( zIdx==0 ) break; + if( bPartial ){ + memset(pIter->abIndexed, 0x01, sizeof(u8)*pIter->nTblCol); + } + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + if( iCid>=0 ) pIter->abIndexed[iCid] = 1; + if( iCid==-2 ){ + memset(pIter->abIndexed, 0x01, sizeof(u8)*pIter->nTblCol); + } + } + rbuFinalize(p, pXInfo); + bIndex = 1; + pIter->nIndex++; + } + + if( pIter->eType==RBU_PK_WITHOUT_ROWID ){ + /* "PRAGMA index_list" includes the main PK b-tree */ + pIter->nIndex--; + } + + rbuFinalize(p, pList); + if( bIndex==0 ) pIter->abIndexed = 0; +} + + +/* +** If they are not already populated, populate the pIter->azTblCol[], +** pIter->abTblPk[], pIter->nTblCol and pIter->bRowid variables according to +** the table (not index) that the iterator currently points to. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. If +** an error does occur, an error code and error message are also left in +** the RBU handle. +*/ +static int rbuObjIterCacheTableInfo(sqlite3rbu *p, RbuObjIter *pIter){ + if( pIter->azTblCol==0 ){ + sqlite3_stmt *pStmt = 0; + int nCol = 0; + int i; /* for() loop iterator variable */ + int bRbuRowid = 0; /* If input table has column "rbu_rowid" */ + int iOrder = 0; + int iTnum = 0; + + /* Figure out the type of table this step will deal with. */ + assert( pIter->eType==0 ); + rbuTableType(p, pIter->zTbl, &pIter->eType, &iTnum, &pIter->iPkTnum); + if( p->rc==SQLITE_OK && pIter->eType==RBU_PK_NOTABLE ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("no such table: %s", pIter->zTbl); + } + if( p->rc ) return p->rc; + if( pIter->zIdx==0 ) pIter->iTnum = iTnum; + + assert( pIter->eType==RBU_PK_NONE || pIter->eType==RBU_PK_IPK + || pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_WITHOUT_ROWID + || pIter->eType==RBU_PK_VTAB + ); + + /* Populate the azTblCol[] and nTblCol variables based on the columns + ** of the input table. Ignore any input table columns that begin with + ** "rbu_". */ + p->rc = prepareFreeAndCollectError(p->dbRbu, &pStmt, &p->zErrmsg, + sqlite3_mprintf("SELECT * FROM '%q'", pIter->zDataTbl) + ); + if( p->rc==SQLITE_OK ){ + nCol = sqlite3_column_count(pStmt); + rbuAllocateIterArrays(p, pIter, nCol); + } + for(i=0; p->rc==SQLITE_OK && irc); + pIter->aiSrcOrder[pIter->nTblCol] = pIter->nTblCol; + pIter->azTblCol[pIter->nTblCol++] = zCopy; + } + else if( 0==sqlite3_stricmp("rbu_rowid", zName) ){ + bRbuRowid = 1; + } + } + sqlite3_finalize(pStmt); + pStmt = 0; + + if( p->rc==SQLITE_OK + && rbuIsVacuum(p)==0 + && bRbuRowid!=(pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE) + ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf( + "table %q %s rbu_rowid column", pIter->zDataTbl, + (bRbuRowid ? "may not have" : "requires") + ); + } + + /* Check that all non-HIDDEN columns in the destination table are also + ** present in the input table. Populate the abTblPk[], azTblType[] and + ** aiTblOrder[] arrays at the same time. */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pStmt, &p->zErrmsg, + sqlite3_mprintf("PRAGMA table_info(%Q)", pIter->zTbl) + ); + } + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + const char *zName = (const char*)sqlite3_column_text(pStmt, 1); + if( zName==0 ) break; /* An OOM - finalize() below returns S_NOMEM */ + for(i=iOrder; inTblCol; i++){ + if( 0==strcmp(zName, pIter->azTblCol[i]) ) break; + } + if( i==pIter->nTblCol ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("column missing from %q: %s", + pIter->zDataTbl, zName + ); + }else{ + int iPk = sqlite3_column_int(pStmt, 5); + int bNotNull = sqlite3_column_int(pStmt, 3); + const char *zType = (const char*)sqlite3_column_text(pStmt, 2); + + if( i!=iOrder ){ + SWAP(int, pIter->aiSrcOrder[i], pIter->aiSrcOrder[iOrder]); + SWAP(char*, pIter->azTblCol[i], pIter->azTblCol[iOrder]); + } + + pIter->azTblType[iOrder] = rbuStrndup(zType, &p->rc); + assert( iPk>=0 ); + pIter->abTblPk[iOrder] = (u8)iPk; + pIter->abNotNull[iOrder] = (u8)bNotNull || (iPk!=0); + iOrder++; + } + } + + rbuFinalize(p, pStmt); + rbuObjIterCacheIndexedCols(p, pIter); + assert( pIter->eType!=RBU_PK_VTAB || pIter->abIndexed==0 ); + assert( pIter->eType!=RBU_PK_VTAB || pIter->nIndex==0 ); + } + + return p->rc; +} + +/* +** This function constructs and returns a pointer to a nul-terminated +** string containing some SQL clause or list based on one or more of the +** column names currently stored in the pIter->azTblCol[] array. +*/ +static char *rbuObjIterGetCollist( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter /* Object iterator for column names */ +){ + char *zList = 0; + const char *zSep = ""; + int i; + for(i=0; inTblCol; i++){ + const char *z = pIter->azTblCol[i]; + zList = rbuMPrintf(p, "%z%s\"%w\"", zList, zSep, z); + zSep = ", "; + } + return zList; +} + +/* +** Return a comma separated list of the quoted PRIMARY KEY column names, +** in order, for the current table. Before each column name, add the text +** zPre. After each column name, add the zPost text. Use zSeparator as +** the separator text (usually ", "). +*/ +static char *rbuObjIterGetPkList( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter, /* Object iterator for column names */ + const char *zPre, /* Before each quoted column name */ + const char *zSeparator, /* Separator to use between columns */ + const char *zPost /* After each quoted column name */ +){ + int iPk = 1; + char *zRet = 0; + const char *zSep = ""; + while( 1 ){ + int i; + for(i=0; inTblCol; i++){ + if( (int)pIter->abTblPk[i]==iPk ){ + const char *zCol = pIter->azTblCol[i]; + zRet = rbuMPrintf(p, "%z%s%s\"%w\"%s", zRet, zSep, zPre, zCol, zPost); + zSep = zSeparator; + break; + } + } + if( i==pIter->nTblCol ) break; + iPk++; + } + return zRet; +} + +/* +** This function is called as part of restarting an RBU vacuum within +** stage 1 of the process (while the *-oal file is being built) while +** updating a table (not an index). The table may be a rowid table or +** a WITHOUT ROWID table. It queries the target database to find the +** largest key that has already been written to the target table and +** constructs a WHERE clause that can be used to extract the remaining +** rows from the source table. For a rowid table, the WHERE clause +** is of the form: +** +** "WHERE _rowid_ > ?" +** +** and for WITHOUT ROWID tables: +** +** "WHERE (key1, key2) > (?, ?)" +** +** Instead of "?" placeholders, the actual WHERE clauses created by +** this function contain literal SQL values. +*/ +static char *rbuVacuumTableStart( + sqlite3rbu *p, /* RBU handle */ + RbuObjIter *pIter, /* RBU iterator object */ + int bRowid, /* True for a rowid table */ + const char *zWrite /* Target table name prefix */ +){ + sqlite3_stmt *pMax = 0; + char *zRet = 0; + if( bRowid ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pMax, &p->zErrmsg, + sqlite3_mprintf( + "SELECT max(_rowid_) FROM \"%s%w\"", zWrite, pIter->zTbl + ) + ); + if( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pMax) ){ + sqlite3_int64 iMax = sqlite3_column_int64(pMax, 0); + zRet = rbuMPrintf(p, " WHERE _rowid_ > %lld ", iMax); + } + rbuFinalize(p, pMax); + }else{ + char *zOrder = rbuObjIterGetPkList(p, pIter, "", ", ", " DESC"); + char *zSelect = rbuObjIterGetPkList(p, pIter, "quote(", "||','||", ")"); + char *zList = rbuObjIterGetPkList(p, pIter, "", ", ", ""); + + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pMax, &p->zErrmsg, + sqlite3_mprintf( + "SELECT %s FROM \"%s%w\" ORDER BY %s LIMIT 1", + zSelect, zWrite, pIter->zTbl, zOrder + ) + ); + if( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pMax) ){ + const char *zVal = (const char*)sqlite3_column_text(pMax, 0); + zRet = rbuMPrintf(p, " WHERE (%s) > (%s) ", zList, zVal); + } + rbuFinalize(p, pMax); + } + + sqlite3_free(zOrder); + sqlite3_free(zSelect); + sqlite3_free(zList); + } + return zRet; +} + +/* +** This function is called as part of restating an RBU vacuum when the +** current operation is writing content to an index. If possible, it +** queries the target index b-tree for the largest key already written to +** it, then composes and returns an expression that can be used in a WHERE +** clause to select the remaining required rows from the source table. +** It is only possible to return such an expression if: +** +** * The index contains no DESC columns, and +** * The last key written to the index before the operation was +** suspended does not contain any NULL values. +** +** The expression is of the form: +** +** (index-field1, index-field2, ...) > (?, ?, ...) +** +** except that the "?" placeholders are replaced with literal values. +** +** If the expression cannot be created, NULL is returned. In this case, +** the caller has to use an OFFSET clause to extract only the required +** rows from the sourct table, just as it does for an RBU update operation. +*/ +static char *rbuVacuumIndexStart( + sqlite3rbu *p, /* RBU handle */ + RbuObjIter *pIter /* RBU iterator object */ +){ + char *zOrder = 0; + char *zLhs = 0; + char *zSelect = 0; + char *zVector = 0; + char *zRet = 0; + int bFailed = 0; + const char *zSep = ""; + int iCol = 0; + sqlite3_stmt *pXInfo = 0; + + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", pIter->zIdx) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + const char *zCol; + if( sqlite3_column_int(pXInfo, 3) ){ + bFailed = 1; + break; + } + + if( iCid<0 ){ + if( pIter->eType==RBU_PK_IPK ){ + int i; + for(i=0; pIter->abTblPk[i]==0; i++); + assert( inTblCol ); + zCol = pIter->azTblCol[i]; + }else{ + zCol = "_rowid_"; + } + }else{ + zCol = pIter->azTblCol[iCid]; + } + + zLhs = rbuMPrintf(p, "%z%s \"%w\" COLLATE %Q", + zLhs, zSep, zCol, zCollate + ); + zOrder = rbuMPrintf(p, "%z%s \"rbu_imp_%d%w\" COLLATE %Q DESC", + zOrder, zSep, iCol, zCol, zCollate + ); + zSelect = rbuMPrintf(p, "%z%s quote(\"rbu_imp_%d%w\")", + zSelect, zSep, iCol, zCol + ); + zSep = ", "; + iCol++; + } + rbuFinalize(p, pXInfo); + if( bFailed ) goto index_start_out; + + if( p->rc==SQLITE_OK ){ + sqlite3_stmt *pSel = 0; + + p->rc = prepareFreeAndCollectError(p->dbMain, &pSel, &p->zErrmsg, + sqlite3_mprintf("SELECT %s FROM \"rbu_imp_%w\" ORDER BY %s LIMIT 1", + zSelect, pIter->zTbl, zOrder + ) + ); + if( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pSel) ){ + zSep = ""; + for(iCol=0; iColnCol; iCol++){ + const char *zQuoted = (const char*)sqlite3_column_text(pSel, iCol); + if( zQuoted==0 ){ + p->rc = SQLITE_NOMEM; + }else if( zQuoted[0]=='N' ){ + bFailed = 1; + break; + } + zVector = rbuMPrintf(p, "%z%s%s", zVector, zSep, zQuoted); + zSep = ", "; + } + + if( !bFailed ){ + zRet = rbuMPrintf(p, "(%s) > (%s)", zLhs, zVector); + } + } + rbuFinalize(p, pSel); + } + + index_start_out: + sqlite3_free(zOrder); + sqlite3_free(zSelect); + sqlite3_free(zVector); + sqlite3_free(zLhs); + return zRet; +} + +/* +** This function is used to create a SELECT list (the list of SQL +** expressions that follows a SELECT keyword) for a SELECT statement +** used to read from an data_xxx or rbu_tmp_xxx table while updating the +** index object currently indicated by the iterator object passed as the +** second argument. A "PRAGMA index_xinfo = " statement is used +** to obtain the required information. +** +** If the index is of the following form: +** +** CREATE INDEX i1 ON t1(c, b COLLATE nocase); +** +** and "t1" is a table with an explicit INTEGER PRIMARY KEY column +** "ipk", the returned string is: +** +** "`c` COLLATE 'BINARY', `b` COLLATE 'NOCASE', `ipk` COLLATE 'BINARY'" +** +** As well as the returned string, three other malloc'd strings are +** returned via output parameters. As follows: +** +** pzImposterCols: ... +** pzImposterPk: ... +** pzWhere: ... +*/ +static char *rbuObjIterGetIndexCols( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter, /* Object iterator for column names */ + char **pzImposterCols, /* OUT: Columns for imposter table */ + char **pzImposterPk, /* OUT: Imposter PK clause */ + char **pzWhere, /* OUT: WHERE clause */ + int *pnBind /* OUT: Trbul number of columns */ +){ + int rc = p->rc; /* Error code */ + int rc2; /* sqlite3_finalize() return code */ + char *zRet = 0; /* String to return */ + char *zImpCols = 0; /* String to return via *pzImposterCols */ + char *zImpPK = 0; /* String to return via *pzImposterPK */ + char *zWhere = 0; /* String to return via *pzWhere */ + int nBind = 0; /* Value to return via *pnBind */ + const char *zCom = ""; /* Set to ", " later on */ + const char *zAnd = ""; /* Set to " AND " later on */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA index_xinfo = ? */ + + if( rc==SQLITE_OK ){ + assert( p->zErrmsg==0 ); + rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", pIter->zIdx) + ); + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + int bDesc = sqlite3_column_int(pXInfo, 3); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + const char *zCol = 0; + const char *zType; + + if( iCid==-2 ){ + int iSeq = sqlite3_column_int(pXInfo, 0); + zRet = sqlite3_mprintf("%z%s(%.*s) COLLATE %Q", zRet, zCom, + pIter->aIdxCol[iSeq].nSpan, pIter->aIdxCol[iSeq].zSpan, zCollate + ); + zType = ""; + }else { + if( iCid<0 ){ + /* An integer primary key. If the table has an explicit IPK, use + ** its name. Otherwise, use "rbu_rowid". */ + if( pIter->eType==RBU_PK_IPK ){ + int i; + for(i=0; pIter->abTblPk[i]==0; i++); + assert( inTblCol ); + zCol = pIter->azTblCol[i]; + }else if( rbuIsVacuum(p) ){ + zCol = "_rowid_"; + }else{ + zCol = "rbu_rowid"; + } + zType = "INTEGER"; + }else{ + zCol = pIter->azTblCol[iCid]; + zType = pIter->azTblType[iCid]; + } + zRet = sqlite3_mprintf("%z%s\"%w\" COLLATE %Q", zRet, zCom,zCol,zCollate); + } + + if( pIter->bUnique==0 || sqlite3_column_int(pXInfo, 5) ){ + const char *zOrder = (bDesc ? " DESC" : ""); + zImpPK = sqlite3_mprintf("%z%s\"rbu_imp_%d%w\"%s", + zImpPK, zCom, nBind, zCol, zOrder + ); + } + zImpCols = sqlite3_mprintf("%z%s\"rbu_imp_%d%w\" %s COLLATE %Q", + zImpCols, zCom, nBind, zCol, zType, zCollate + ); + zWhere = sqlite3_mprintf( + "%z%s\"rbu_imp_%d%w\" IS ?", zWhere, zAnd, nBind, zCol + ); + if( zRet==0 || zImpPK==0 || zImpCols==0 || zWhere==0 ) rc = SQLITE_NOMEM; + zCom = ", "; + zAnd = " AND "; + nBind++; + } + + rc2 = sqlite3_finalize(pXInfo); + if( rc==SQLITE_OK ) rc = rc2; + + if( rc!=SQLITE_OK ){ + sqlite3_free(zRet); + sqlite3_free(zImpCols); + sqlite3_free(zImpPK); + sqlite3_free(zWhere); + zRet = 0; + zImpCols = 0; + zImpPK = 0; + zWhere = 0; + p->rc = rc; + } + + *pzImposterCols = zImpCols; + *pzImposterPk = zImpPK; + *pzWhere = zWhere; + *pnBind = nBind; + return zRet; +} + +/* +** Assuming the current table columns are "a", "b" and "c", and the zObj +** paramter is passed "old", return a string of the form: +** +** "old.a, old.b, old.b" +** +** With the column names escaped. +** +** For tables with implicit rowids - RBU_PK_EXTERNAL and RBU_PK_NONE, append +** the text ", old._rowid_" to the returned value. +*/ +static char *rbuObjIterGetOldlist( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zObj +){ + char *zList = 0; + if( p->rc==SQLITE_OK && pIter->abIndexed ){ + const char *zS = ""; + int i; + for(i=0; inTblCol; i++){ + if( pIter->abIndexed[i] ){ + const char *zCol = pIter->azTblCol[i]; + zList = sqlite3_mprintf("%z%s%s.\"%w\"", zList, zS, zObj, zCol); + }else{ + zList = sqlite3_mprintf("%z%sNULL", zList, zS); + } + zS = ", "; + if( zList==0 ){ + p->rc = SQLITE_NOMEM; + break; + } + } + + /* For a table with implicit rowids, append "old._rowid_" to the list. */ + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zList = rbuMPrintf(p, "%z, %s._rowid_", zList, zObj); + } + } + return zList; +} + +/* +** Return an expression that can be used in a WHERE clause to match the +** primary key of the current table. For example, if the table is: +** +** CREATE TABLE t1(a, b, c, PRIMARY KEY(b, c)); +** +** Return the string: +** +** "b = ?1 AND c = ?2" +*/ +static char *rbuObjIterGetWhere( + sqlite3rbu *p, + RbuObjIter *pIter +){ + char *zList = 0; + if( pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE ){ + zList = rbuMPrintf(p, "_rowid_ = ?%d", pIter->nTblCol+1); + }else if( pIter->eType==RBU_PK_EXTERNAL ){ + const char *zSep = ""; + int i; + for(i=0; inTblCol; i++){ + if( pIter->abTblPk[i] ){ + zList = rbuMPrintf(p, "%z%sc%d=?%d", zList, zSep, i, i+1); + zSep = " AND "; + } + } + zList = rbuMPrintf(p, + "_rowid_ = (SELECT id FROM rbu_imposter2 WHERE %z)", zList + ); + + }else{ + const char *zSep = ""; + int i; + for(i=0; inTblCol; i++){ + if( pIter->abTblPk[i] ){ + const char *zCol = pIter->azTblCol[i]; + zList = rbuMPrintf(p, "%z%s\"%w\"=?%d", zList, zSep, zCol, i+1); + zSep = " AND "; + } + } + } + return zList; +} + +/* +** The SELECT statement iterating through the keys for the current object +** (p->objiter.pSelect) currently points to a valid row. However, there +** is something wrong with the rbu_control value in the rbu_control value +** stored in the (p->nCol+1)'th column. Set the error code and error message +** of the RBU handle to something reflecting this. +*/ +static void rbuBadControlError(sqlite3rbu *p){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("invalid rbu_control value"); +} + + +/* +** Return a nul-terminated string containing the comma separated list of +** assignments that should be included following the "SET" keyword of +** an UPDATE statement used to update the table object that the iterator +** passed as the second argument currently points to if the rbu_control +** column of the data_xxx table entry is set to zMask. +** +** The memory for the returned string is obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free it using +** sqlite3_free(). +** +** If an OOM error is encountered when allocating space for the new +** string, an error code is left in the rbu handle passed as the first +** argument and NULL is returned. Or, if an error has already occurred +** when this function is called, NULL is returned immediately, without +** attempting the allocation or modifying the stored error code. +*/ +static char *rbuObjIterGetSetlist( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zMask +){ + char *zList = 0; + if( p->rc==SQLITE_OK ){ + int i; + + if( (int)strlen(zMask)!=pIter->nTblCol ){ + rbuBadControlError(p); + }else{ + const char *zSep = ""; + for(i=0; inTblCol; i++){ + char c = zMask[pIter->aiSrcOrder[i]]; + if( c=='x' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=?%d", + zList, zSep, pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } + else if( c=='d' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=rbu_delta(\"%w\", ?%d)", + zList, zSep, pIter->azTblCol[i], pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } + else if( c=='f' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=rbu_fossil_delta(\"%w\", ?%d)", + zList, zSep, pIter->azTblCol[i], pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } + } + } + } + return zList; +} + +/* +** Return a nul-terminated string consisting of nByte comma separated +** "?" expressions. For example, if nByte is 3, return a pointer to +** a buffer containing the string "?,?,?". +** +** The memory for the returned string is obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free it using +** sqlite3_free(). +** +** If an OOM error is encountered when allocating space for the new +** string, an error code is left in the rbu handle passed as the first +** argument and NULL is returned. Or, if an error has already occurred +** when this function is called, NULL is returned immediately, without +** attempting the allocation or modifying the stored error code. +*/ +static char *rbuObjIterGetBindlist(sqlite3rbu *p, int nBind){ + char *zRet = 0; + sqlite3_int64 nByte = 2*(sqlite3_int64)nBind + 1; + + zRet = (char*)rbuMalloc(p, nByte); + if( zRet ){ + int i; + for(i=0; izIdx==0 ); + if( p->rc==SQLITE_OK ){ + const char *zSep = "PRIMARY KEY("; + sqlite3_stmt *pXList = 0; /* PRAGMA index_list = (pIter->zTbl) */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA index_xinfo = */ + + p->rc = prepareFreeAndCollectError(p->dbMain, &pXList, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_list = %Q", pIter->zTbl) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXList) ){ + const char *zOrig = (const char*)sqlite3_column_text(pXList,3); + if( zOrig && strcmp(zOrig, "pk")==0 ){ + const char *zIdx = (const char*)sqlite3_column_text(pXList,1); + if( zIdx ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + } + break; + } + } + rbuFinalize(p, pXList); + + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + if( sqlite3_column_int(pXInfo, 5) ){ + /* int iCid = sqlite3_column_int(pXInfo, 0); */ + const char *zCol = (const char*)sqlite3_column_text(pXInfo, 2); + const char *zDesc = sqlite3_column_int(pXInfo, 3) ? " DESC" : ""; + z = rbuMPrintf(p, "%z%s\"%w\"%s", z, zSep, zCol, zDesc); + zSep = ", "; + } + } + z = rbuMPrintf(p, "%z)", z); + rbuFinalize(p, pXInfo); + } + return z; +} + +/* +** This function creates the second imposter table used when writing to +** a table b-tree where the table has an external primary key. If the +** iterator passed as the second argument does not currently point to +** a table (not index) with an external primary key, this function is a +** no-op. +** +** Assuming the iterator does point to a table with an external PK, this +** function creates a WITHOUT ROWID imposter table named "rbu_imposter2" +** used to access that PK index. For example, if the target table is +** declared as follows: +** +** CREATE TABLE t1(a, b TEXT, c REAL, PRIMARY KEY(b, c)); +** +** then the imposter table schema is: +** +** CREATE TABLE rbu_imposter2(c1 TEXT, c2 REAL, id INTEGER) WITHOUT ROWID; +** +*/ +static void rbuCreateImposterTable2(sqlite3rbu *p, RbuObjIter *pIter){ + if( p->rc==SQLITE_OK && pIter->eType==RBU_PK_EXTERNAL ){ + int tnum = pIter->iPkTnum; /* Root page of PK index */ + sqlite3_stmt *pQuery = 0; /* SELECT name ... WHERE rootpage = $tnum */ + const char *zIdx = 0; /* Name of PK index */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA main.index_xinfo = $zIdx */ + const char *zComma = ""; + char *zCols = 0; /* Used to build up list of table cols */ + char *zPk = 0; /* Used to build up table PK declaration */ + + /* Figure out the name of the primary key index for the current table. + ** This is needed for the argument to "PRAGMA index_xinfo". Set + ** zIdx to point to a nul-terminated string containing this name. */ + p->rc = prepareAndCollectError(p->dbMain, &pQuery, &p->zErrmsg, + "SELECT name FROM sqlite_schema WHERE rootpage = ?" + ); + if( p->rc==SQLITE_OK ){ + sqlite3_bind_int(pQuery, 1, tnum); + if( SQLITE_ROW==sqlite3_step(pQuery) ){ + zIdx = (const char*)sqlite3_column_text(pQuery, 0); + } + } + if( zIdx ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + } + rbuFinalize(p, pQuery); + + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int bKey = sqlite3_column_int(pXInfo, 5); + if( bKey ){ + int iCid = sqlite3_column_int(pXInfo, 1); + int bDesc = sqlite3_column_int(pXInfo, 3); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + zCols = rbuMPrintf(p, "%z%sc%d %s COLLATE %Q", zCols, zComma, + iCid, pIter->azTblType[iCid], zCollate + ); + zPk = rbuMPrintf(p, "%z%sc%d%s", zPk, zComma, iCid, bDesc?" DESC":""); + zComma = ", "; + } + } + zCols = rbuMPrintf(p, "%z, id INTEGER", zCols); + rbuFinalize(p, pXInfo); + + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1, tnum); + rbuMPrintfExec(p, p->dbMain, + "CREATE TABLE rbu_imposter2(%z, PRIMARY KEY(%z)) WITHOUT ROWID", + zCols, zPk + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + } +} + +/* +** If an error has already occurred when this function is called, it +** immediately returns zero (without doing any work). Or, if an error +** occurs during the execution of this function, it sets the error code +** in the sqlite3rbu object indicated by the first argument and returns +** zero. +** +** The iterator passed as the second argument is guaranteed to point to +** a table (not an index) when this function is called. This function +** attempts to create any imposter table required to write to the main +** table b-tree of the table before returning. Non-zero is returned if +** an imposter table are created, or zero otherwise. +** +** An imposter table is required in all cases except RBU_PK_VTAB. Only +** virtual tables are written to directly. The imposter table has the +** same schema as the actual target table (less any UNIQUE constraints). +** More precisely, the "same schema" means the same columns, types, +** collation sequences. For tables that do not have an external PRIMARY +** KEY, it also means the same PRIMARY KEY declaration. +*/ +static void rbuCreateImposterTable(sqlite3rbu *p, RbuObjIter *pIter){ + if( p->rc==SQLITE_OK && pIter->eType!=RBU_PK_VTAB ){ + int tnum = pIter->iTnum; + const char *zComma = ""; + char *zSql = 0; + int iCol; + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 1); + + for(iCol=0; p->rc==SQLITE_OK && iColnTblCol; iCol++){ + const char *zPk = ""; + const char *zCol = pIter->azTblCol[iCol]; + const char *zColl = 0; + + p->rc = sqlite3_table_column_metadata( + p->dbMain, "main", pIter->zTbl, zCol, 0, &zColl, 0, 0, 0 + ); + + if( pIter->eType==RBU_PK_IPK && pIter->abTblPk[iCol] ){ + /* If the target table column is an "INTEGER PRIMARY KEY", add + ** "PRIMARY KEY" to the imposter table column declaration. */ + zPk = "PRIMARY KEY "; + } + zSql = rbuMPrintf(p, "%z%s\"%w\" %s %sCOLLATE %Q%s", + zSql, zComma, zCol, pIter->azTblType[iCol], zPk, zColl, + (pIter->abNotNull[iCol] ? " NOT NULL" : "") + ); + zComma = ", "; + } + + if( pIter->eType==RBU_PK_WITHOUT_ROWID ){ + char *zPk = rbuWithoutRowidPK(p, pIter); + if( zPk ){ + zSql = rbuMPrintf(p, "%z, %z", zSql, zPk); + } + } + + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1, tnum); + rbuMPrintfExec(p, p->dbMain, "CREATE TABLE \"rbu_imp_%w\"(%z)%s", + pIter->zTbl, zSql, + (pIter->eType==RBU_PK_WITHOUT_ROWID ? " WITHOUT ROWID" : "") + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + } +} + +/* +** Prepare a statement used to insert rows into the "rbu_tmp_xxx" table. +** Specifically a statement of the form: +** +** INSERT INTO rbu_tmp_xxx VALUES(?, ?, ? ...); +** +** The number of bound variables is equal to the number of columns in +** the target table, plus one (for the rbu_control column), plus one more +** (for the rbu_rowid column) if the target table is an implicit IPK or +** virtual table. +*/ +static void rbuObjIterPrepareTmpInsert( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zCollist, + const char *zRbuRowid +){ + int bRbuRowid = (pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE); + char *zBind = rbuObjIterGetBindlist(p, pIter->nTblCol + 1 + bRbuRowid); + if( zBind ){ + assert( pIter->pTmpInsert==0 ); + p->rc = prepareFreeAndCollectError( + p->dbRbu, &pIter->pTmpInsert, &p->zErrmsg, sqlite3_mprintf( + "INSERT INTO %s.'rbu_tmp_%q'(rbu_control,%s%s) VALUES(%z)", + p->zStateDb, pIter->zDataTbl, zCollist, zRbuRowid, zBind + )); + } +} + +static void rbuTmpInsertFunc( + sqlite3_context *pCtx, + int nVal, + sqlite3_value **apVal +){ + sqlite3rbu *p = sqlite3_user_data(pCtx); + int rc = SQLITE_OK; + int i; + + assert( sqlite3_value_int(apVal[0])!=0 + || p->objiter.eType==RBU_PK_EXTERNAL + || p->objiter.eType==RBU_PK_NONE + ); + if( sqlite3_value_int(apVal[0])!=0 ){ + p->nPhaseOneStep += p->objiter.nIndex; + } + + for(i=0; rc==SQLITE_OK && iobjiter.pTmpInsert, i+1, apVal[i]); + } + if( rc==SQLITE_OK ){ + sqlite3_step(p->objiter.pTmpInsert); + rc = sqlite3_reset(p->objiter.pTmpInsert); + } + + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } +} + +static char *rbuObjIterGetIndexWhere(sqlite3rbu *p, RbuObjIter *pIter){ + sqlite3_stmt *pStmt = 0; + int rc = p->rc; + char *zRet = 0; + + assert( pIter->zIdxSql==0 && pIter->nIdxCol==0 && pIter->aIdxCol==0 ); + + if( rc==SQLITE_OK ){ + rc = prepareAndCollectError(p->dbMain, &pStmt, &p->zErrmsg, + "SELECT trim(sql) FROM sqlite_schema WHERE type='index' AND name=?" + ); + } + if( rc==SQLITE_OK ){ + int rc2; + rc = sqlite3_bind_text(pStmt, 1, pIter->zIdx, -1, SQLITE_STATIC); + if( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + char *zSql = (char*)sqlite3_column_text(pStmt, 0); + if( zSql ){ + pIter->zIdxSql = zSql = rbuStrndup(zSql, &rc); + } + if( zSql ){ + int nParen = 0; /* Number of open parenthesis */ + int i; + int iIdxCol = 0; + int nIdxAlloc = 0; + for(i=0; zSql[i]; i++){ + char c = zSql[i]; + + /* If necessary, grow the pIter->aIdxCol[] array */ + if( iIdxCol==nIdxAlloc ){ + RbuSpan *aIdxCol = (RbuSpan*)sqlite3_realloc( + pIter->aIdxCol, (nIdxAlloc+16)*sizeof(RbuSpan) + ); + if( aIdxCol==0 ){ + rc = SQLITE_NOMEM; + break; + } + pIter->aIdxCol = aIdxCol; + nIdxAlloc += 16; + } + + if( c=='(' ){ + if( nParen==0 ){ + assert( iIdxCol==0 ); + pIter->aIdxCol[0].zSpan = &zSql[i+1]; + } + nParen++; + } + else if( c==')' ){ + nParen--; + if( nParen==0 ){ + int nSpan = &zSql[i] - pIter->aIdxCol[iIdxCol].zSpan; + pIter->aIdxCol[iIdxCol++].nSpan = nSpan; + i++; + break; + } + }else if( c==',' && nParen==1 ){ + int nSpan = &zSql[i] - pIter->aIdxCol[iIdxCol].zSpan; + pIter->aIdxCol[iIdxCol++].nSpan = nSpan; + pIter->aIdxCol[iIdxCol].zSpan = &zSql[i+1]; + }else if( c=='"' || c=='\'' || c=='`' ){ + for(i++; 1; i++){ + if( zSql[i]==c ){ + if( zSql[i+1]!=c ) break; + i++; + } + } + }else if( c=='[' ){ + for(i++; 1; i++){ + if( zSql[i]==']' ) break; + } + }else if( c=='-' && zSql[i+1]=='-' ){ + for(i=i+2; zSql[i] && zSql[i]!='\n'; i++); + if( zSql[i]=='\0' ) break; + }else if( c=='/' && zSql[i+1]=='*' ){ + for(i=i+2; zSql[i] && (zSql[i]!='*' || zSql[i+1]!='/'); i++); + if( zSql[i]=='\0' ) break; + i++; + } + } + if( zSql[i] ){ + zRet = rbuStrndup(&zSql[i], &rc); + } + pIter->nIdxCol = iIdxCol; + } + } + + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + } + + p->rc = rc; + return zRet; +} + +/* +** Ensure that the SQLite statement handles required to update the +** target database object currently indicated by the iterator passed +** as the second argument are available. +*/ +static int rbuObjIterPrepareAll( + sqlite3rbu *p, + RbuObjIter *pIter, + int nOffset /* Add "LIMIT -1 OFFSET $nOffset" to SELECT */ +){ + assert( pIter->bCleanup==0 ); + if( pIter->pSelect==0 && rbuObjIterCacheTableInfo(p, pIter)==SQLITE_OK ){ + const int tnum = pIter->iTnum; + char *zCollist = 0; /* List of indexed columns */ + char **pz = &p->zErrmsg; + const char *zIdx = pIter->zIdx; + char *zLimit = 0; + + if( nOffset ){ + zLimit = sqlite3_mprintf(" LIMIT -1 OFFSET %d", nOffset); + if( !zLimit ) p->rc = SQLITE_NOMEM; + } + + if( zIdx ){ + const char *zTbl = pIter->zTbl; + char *zImposterCols = 0; /* Columns for imposter table */ + char *zImposterPK = 0; /* Primary key declaration for imposter */ + char *zWhere = 0; /* WHERE clause on PK columns */ + char *zBind = 0; + char *zPart = 0; + int nBind = 0; + + assert( pIter->eType!=RBU_PK_VTAB ); + zPart = rbuObjIterGetIndexWhere(p, pIter); + zCollist = rbuObjIterGetIndexCols( + p, pIter, &zImposterCols, &zImposterPK, &zWhere, &nBind + ); + zBind = rbuObjIterGetBindlist(p, nBind); + + /* Create the imposter table used to write to this index. */ + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 1); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1,tnum); + rbuMPrintfExec(p, p->dbMain, + "CREATE TABLE \"rbu_imp_%w\"( %s, PRIMARY KEY( %s ) ) WITHOUT ROWID", + zTbl, zImposterCols, zImposterPK + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + + /* Create the statement to insert index entries */ + pIter->nCol = nBind; + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError( + p->dbMain, &pIter->pInsert, &p->zErrmsg, + sqlite3_mprintf("INSERT INTO \"rbu_imp_%w\" VALUES(%s)", zTbl, zBind) + ); + } + + /* And to delete index entries */ + if( rbuIsVacuum(p)==0 && p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError( + p->dbMain, &pIter->pDelete, &p->zErrmsg, + sqlite3_mprintf("DELETE FROM \"rbu_imp_%w\" WHERE %s", zTbl, zWhere) + ); + } + + /* Create the SELECT statement to read keys in sorted order */ + if( p->rc==SQLITE_OK ){ + char *zSql; + if( rbuIsVacuum(p) ){ + char *zStart = 0; + if( nOffset ){ + zStart = rbuVacuumIndexStart(p, pIter); + if( zStart ){ + sqlite3_free(zLimit); + zLimit = 0; + } + } + + zSql = sqlite3_mprintf( + "SELECT %s, 0 AS rbu_control FROM '%q' %s %s %s ORDER BY %s%s", + zCollist, + pIter->zDataTbl, + zPart, + (zStart ? (zPart ? "AND" : "WHERE") : ""), zStart, + zCollist, zLimit + ); + sqlite3_free(zStart); + }else + + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zSql = sqlite3_mprintf( + "SELECT %s, rbu_control FROM %s.'rbu_tmp_%q' %s ORDER BY %s%s", + zCollist, p->zStateDb, pIter->zDataTbl, + zPart, zCollist, zLimit + ); + }else{ + zSql = sqlite3_mprintf( + "SELECT %s, rbu_control FROM %s.'rbu_tmp_%q' %s " + "UNION ALL " + "SELECT %s, rbu_control FROM '%q' " + "%s %s typeof(rbu_control)='integer' AND rbu_control!=1 " + "ORDER BY %s%s", + zCollist, p->zStateDb, pIter->zDataTbl, zPart, + zCollist, pIter->zDataTbl, + zPart, + (zPart ? "AND" : "WHERE"), + zCollist, zLimit + ); + } + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbRbu,&pIter->pSelect,pz,zSql); + }else{ + sqlite3_free(zSql); + } + } + + sqlite3_free(zImposterCols); + sqlite3_free(zImposterPK); + sqlite3_free(zWhere); + sqlite3_free(zBind); + sqlite3_free(zPart); + }else{ + int bRbuRowid = (pIter->eType==RBU_PK_VTAB) + ||(pIter->eType==RBU_PK_NONE) + ||(pIter->eType==RBU_PK_EXTERNAL && rbuIsVacuum(p)); + const char *zTbl = pIter->zTbl; /* Table this step applies to */ + const char *zWrite; /* Imposter table name */ + + char *zBindings = rbuObjIterGetBindlist(p, pIter->nTblCol + bRbuRowid); + char *zWhere = rbuObjIterGetWhere(p, pIter); + char *zOldlist = rbuObjIterGetOldlist(p, pIter, "old"); + char *zNewlist = rbuObjIterGetOldlist(p, pIter, "new"); + + zCollist = rbuObjIterGetCollist(p, pIter); + pIter->nCol = pIter->nTblCol; + + /* Create the imposter table or tables (if required). */ + rbuCreateImposterTable(p, pIter); + rbuCreateImposterTable2(p, pIter); + zWrite = (pIter->eType==RBU_PK_VTAB ? "" : "rbu_imp_"); + + /* Create the INSERT statement to write to the target PK b-tree */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pIter->pInsert, pz, + sqlite3_mprintf( + "INSERT INTO \"%s%w\"(%s%s) VALUES(%s)", + zWrite, zTbl, zCollist, (bRbuRowid ? ", _rowid_" : ""), zBindings + ) + ); + } + + /* Create the DELETE statement to write to the target PK b-tree. + ** Because it only performs INSERT operations, this is not required for + ** an rbu vacuum handle. */ + if( rbuIsVacuum(p)==0 && p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pIter->pDelete, pz, + sqlite3_mprintf( + "DELETE FROM \"%s%w\" WHERE %s", zWrite, zTbl, zWhere + ) + ); + } + + if( rbuIsVacuum(p)==0 && pIter->abIndexed ){ + const char *zRbuRowid = ""; + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zRbuRowid = ", rbu_rowid"; + } + + /* Create the rbu_tmp_xxx table and the triggers to populate it. */ + rbuMPrintfExec(p, p->dbRbu, + "CREATE TABLE IF NOT EXISTS %s.'rbu_tmp_%q' AS " + "SELECT *%s FROM '%q' WHERE 0;" + , p->zStateDb, pIter->zDataTbl + , (pIter->eType==RBU_PK_EXTERNAL ? ", 0 AS rbu_rowid" : "") + , pIter->zDataTbl + ); + + rbuMPrintfExec(p, p->dbMain, + "CREATE TEMP TRIGGER rbu_delete_tr BEFORE DELETE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(3, %s);" + "END;" + + "CREATE TEMP TRIGGER rbu_update1_tr BEFORE UPDATE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(3, %s);" + "END;" + + "CREATE TEMP TRIGGER rbu_update2_tr AFTER UPDATE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(4, %s);" + "END;", + zWrite, zTbl, zOldlist, + zWrite, zTbl, zOldlist, + zWrite, zTbl, zNewlist + ); + + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + rbuMPrintfExec(p, p->dbMain, + "CREATE TEMP TRIGGER rbu_insert_tr AFTER INSERT ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(0, %s);" + "END;", + zWrite, zTbl, zNewlist + ); + } + + rbuObjIterPrepareTmpInsert(p, pIter, zCollist, zRbuRowid); + } + + /* Create the SELECT statement to read keys from data_xxx */ + if( p->rc==SQLITE_OK ){ + const char *zRbuRowid = ""; + char *zStart = 0; + char *zOrder = 0; + if( bRbuRowid ){ + zRbuRowid = rbuIsVacuum(p) ? ",_rowid_ " : ",rbu_rowid"; + } + + if( rbuIsVacuum(p) ){ + if( nOffset ){ + zStart = rbuVacuumTableStart(p, pIter, bRbuRowid, zWrite); + if( zStart ){ + sqlite3_free(zLimit); + zLimit = 0; + } + } + if( bRbuRowid ){ + zOrder = rbuMPrintf(p, "_rowid_"); + }else{ + zOrder = rbuObjIterGetPkList(p, pIter, "", ", ", ""); + } + } + + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbRbu, &pIter->pSelect, pz, + sqlite3_mprintf( + "SELECT %s,%s rbu_control%s FROM '%q'%s %s %s %s", + zCollist, + (rbuIsVacuum(p) ? "0 AS " : ""), + zRbuRowid, + pIter->zDataTbl, (zStart ? zStart : ""), + (zOrder ? "ORDER BY" : ""), zOrder, + zLimit + ) + ); + } + sqlite3_free(zStart); + sqlite3_free(zOrder); + } + + sqlite3_free(zWhere); + sqlite3_free(zOldlist); + sqlite3_free(zNewlist); + sqlite3_free(zBindings); + } + sqlite3_free(zCollist); + sqlite3_free(zLimit); + } + + return p->rc; +} + +/* +** Set output variable *ppStmt to point to an UPDATE statement that may +** be used to update the imposter table for the main table b-tree of the +** table object that pIter currently points to, assuming that the +** rbu_control column of the data_xyz table contains zMask. +** +** If the zMask string does not specify any columns to update, then this +** is not an error. Output variable *ppStmt is set to NULL in this case. +*/ +static int rbuGetUpdateStmt( + sqlite3rbu *p, /* RBU handle */ + RbuObjIter *pIter, /* Object iterator */ + const char *zMask, /* rbu_control value ('x.x.') */ + sqlite3_stmt **ppStmt /* OUT: UPDATE statement handle */ +){ + RbuUpdateStmt **pp; + RbuUpdateStmt *pUp = 0; + int nUp = 0; + + /* In case an error occurs */ + *ppStmt = 0; + + /* Search for an existing statement. If one is found, shift it to the front + ** of the LRU queue and return immediately. Otherwise, leave nUp pointing + ** to the number of statements currently in the cache and pUp to the + ** last object in the list. */ + for(pp=&pIter->pRbuUpdate; *pp; pp=&((*pp)->pNext)){ + pUp = *pp; + if( strcmp(pUp->zMask, zMask)==0 ){ + *pp = pUp->pNext; + pUp->pNext = pIter->pRbuUpdate; + pIter->pRbuUpdate = pUp; + *ppStmt = pUp->pUpdate; + return SQLITE_OK; + } + nUp++; + } + assert( pUp==0 || pUp->pNext==0 ); + + if( nUp>=SQLITE_RBU_UPDATE_CACHESIZE ){ + for(pp=&pIter->pRbuUpdate; *pp!=pUp; pp=&((*pp)->pNext)); + *pp = 0; + sqlite3_finalize(pUp->pUpdate); + pUp->pUpdate = 0; + }else{ + pUp = (RbuUpdateStmt*)rbuMalloc(p, sizeof(RbuUpdateStmt)+pIter->nTblCol+1); + } + + if( pUp ){ + char *zWhere = rbuObjIterGetWhere(p, pIter); + char *zSet = rbuObjIterGetSetlist(p, pIter, zMask); + char *zUpdate = 0; + + pUp->zMask = (char*)&pUp[1]; + memcpy(pUp->zMask, zMask, pIter->nTblCol); + pUp->pNext = pIter->pRbuUpdate; + pIter->pRbuUpdate = pUp; + + if( zSet ){ + const char *zPrefix = ""; + + if( pIter->eType!=RBU_PK_VTAB ) zPrefix = "rbu_imp_"; + zUpdate = sqlite3_mprintf("UPDATE \"%s%w\" SET %s WHERE %s", + zPrefix, pIter->zTbl, zSet, zWhere + ); + p->rc = prepareFreeAndCollectError( + p->dbMain, &pUp->pUpdate, &p->zErrmsg, zUpdate + ); + *ppStmt = pUp->pUpdate; + } + sqlite3_free(zWhere); + sqlite3_free(zSet); + } + + return p->rc; +} + +static sqlite3 *rbuOpenDbhandle( + sqlite3rbu *p, + const char *zName, + int bUseVfs +){ + sqlite3 *db = 0; + if( p->rc==SQLITE_OK ){ + const int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_URI; + p->rc = sqlite3_open_v2(zName, &db, flags, bUseVfs ? p->zVfsName : 0); + if( p->rc ){ + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + sqlite3_close(db); + db = 0; + } + } + return db; +} + +/* +** Free an RbuState object allocated by rbuLoadState(). +*/ +static void rbuFreeState(RbuState *p){ + if( p ){ + sqlite3_free(p->zTbl); + sqlite3_free(p->zDataTbl); + sqlite3_free(p->zIdx); + sqlite3_free(p); + } +} + +/* +** Allocate an RbuState object and load the contents of the rbu_state +** table into it. Return a pointer to the new object. It is the +** responsibility of the caller to eventually free the object using +** sqlite3_free(). +** +** If an error occurs, leave an error code and message in the rbu handle +** and return NULL. +*/ +static RbuState *rbuLoadState(sqlite3rbu *p){ + RbuState *pRet = 0; + sqlite3_stmt *pStmt = 0; + int rc; + int rc2; + + pRet = (RbuState*)rbuMalloc(p, sizeof(RbuState)); + if( pRet==0 ) return 0; + + rc = prepareFreeAndCollectError(p->dbRbu, &pStmt, &p->zErrmsg, + sqlite3_mprintf("SELECT k, v FROM %s.rbu_state", p->zStateDb) + ); + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + switch( sqlite3_column_int(pStmt, 0) ){ + case RBU_STATE_STAGE: + pRet->eStage = sqlite3_column_int(pStmt, 1); + if( pRet->eStage!=RBU_STAGE_OAL + && pRet->eStage!=RBU_STAGE_MOVE + && pRet->eStage!=RBU_STAGE_CKPT + ){ + p->rc = SQLITE_CORRUPT; + } + break; + + case RBU_STATE_TBL: + pRet->zTbl = rbuStrndup((char*)sqlite3_column_text(pStmt, 1), &rc); + break; + + case RBU_STATE_IDX: + pRet->zIdx = rbuStrndup((char*)sqlite3_column_text(pStmt, 1), &rc); + break; + + case RBU_STATE_ROW: + pRet->nRow = sqlite3_column_int(pStmt, 1); + break; + + case RBU_STATE_PROGRESS: + pRet->nProgress = sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_CKPT: + pRet->iWalCksum = sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_COOKIE: + pRet->iCookie = (u32)sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_OALSZ: + pRet->iOalSz = sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_PHASEONESTEP: + pRet->nPhaseOneStep = sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_DATATBL: + pRet->zDataTbl = rbuStrndup((char*)sqlite3_column_text(pStmt, 1), &rc); + break; + + default: + rc = SQLITE_CORRUPT; + break; + } + } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + + p->rc = rc; + return pRet; +} + + +/* +** Open the database handle and attach the RBU database as "rbu". If an +** error occurs, leave an error code and message in the RBU handle. +** +** If argument dbMain is not NULL, then it is a database handle already +** open on the target database. Use this handle instead of opening a new +** one. +*/ +static void rbuOpenDatabase(sqlite3rbu *p, sqlite3 *dbMain, int *pbRetry){ + assert( p->rc || (p->dbMain==0 && p->dbRbu==0) ); + assert( p->rc || rbuIsVacuum(p) || p->zTarget!=0 ); + assert( dbMain==0 || rbuIsVacuum(p)==0 ); + + /* Open the RBU database */ + p->dbRbu = rbuOpenDbhandle(p, p->zRbu, 1); + p->dbMain = dbMain; + + if( p->rc==SQLITE_OK && rbuIsVacuum(p) ){ + sqlite3_file_control(p->dbRbu, "main", SQLITE_FCNTL_RBUCNT, (void*)p); + if( p->zState==0 ){ + const char *zFile = sqlite3_db_filename(p->dbRbu, "main"); + p->zState = rbuMPrintf(p, "file://%s-vacuum?modeof=%s", zFile, zFile); + } + } + + /* If using separate RBU and state databases, attach the state database to + ** the RBU db handle now. */ + if( p->zState ){ + rbuMPrintfExec(p, p->dbRbu, "ATTACH %Q AS stat", p->zState); + memcpy(p->zStateDb, "stat", 4); + }else{ + memcpy(p->zStateDb, "main", 4); + } + +#if 0 + if( p->rc==SQLITE_OK && rbuIsVacuum(p) ){ + p->rc = sqlite3_exec(p->dbRbu, "BEGIN", 0, 0, 0); + } +#endif + + /* If it has not already been created, create the rbu_state table */ + rbuMPrintfExec(p, p->dbRbu, RBU_CREATE_STATE, p->zStateDb); + +#if 0 + if( rbuIsVacuum(p) ){ + if( p->rc==SQLITE_OK ){ + int rc2; + int bOk = 0; + sqlite3_stmt *pCnt = 0; + p->rc = prepareAndCollectError(p->dbRbu, &pCnt, &p->zErrmsg, + "SELECT count(*) FROM stat.sqlite_schema" + ); + if( p->rc==SQLITE_OK + && sqlite3_step(pCnt)==SQLITE_ROW + && 1==sqlite3_column_int(pCnt, 0) + ){ + bOk = 1; + } + rc2 = sqlite3_finalize(pCnt); + if( p->rc==SQLITE_OK ) p->rc = rc2; + + if( p->rc==SQLITE_OK && bOk==0 ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("invalid state database"); + } + + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbRbu, "COMMIT", 0, 0, 0); + } + } + } +#endif + + if( p->rc==SQLITE_OK && rbuIsVacuum(p) ){ + int bOpen = 0; + int rc; + p->nRbu = 0; + p->pRbuFd = 0; + rc = sqlite3_file_control(p->dbRbu, "main", SQLITE_FCNTL_RBUCNT, (void*)p); + if( rc!=SQLITE_NOTFOUND ) p->rc = rc; + if( p->eStage>=RBU_STAGE_MOVE ){ + bOpen = 1; + }else{ + RbuState *pState = rbuLoadState(p); + if( pState ){ + bOpen = (pState->eStage>=RBU_STAGE_MOVE); + rbuFreeState(pState); + } + } + if( bOpen ) p->dbMain = rbuOpenDbhandle(p, p->zRbu, p->nRbu<=1); + } + + p->eStage = 0; + if( p->rc==SQLITE_OK && p->dbMain==0 ){ + if( !rbuIsVacuum(p) ){ + p->dbMain = rbuOpenDbhandle(p, p->zTarget, 1); + }else if( p->pRbuFd->pWalFd ){ + if( pbRetry ){ + p->pRbuFd->bNolock = 0; + sqlite3_close(p->dbRbu); + sqlite3_close(p->dbMain); + p->dbMain = 0; + p->dbRbu = 0; + *pbRetry = 1; + return; + } + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("cannot vacuum wal mode database"); + }else{ + char *zTarget; + char *zExtra = 0; + if( strlen(p->zRbu)>=5 && 0==memcmp("file:", p->zRbu, 5) ){ + zExtra = &p->zRbu[5]; + while( *zExtra ){ + if( *zExtra++=='?' ) break; + } + if( *zExtra=='\0' ) zExtra = 0; + } + + zTarget = sqlite3_mprintf("file:%s-vactmp?rbu_memory=1%s%s", + sqlite3_db_filename(p->dbRbu, "main"), + (zExtra==0 ? "" : "&"), (zExtra==0 ? "" : zExtra) + ); + + if( zTarget==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + p->dbMain = rbuOpenDbhandle(p, zTarget, p->nRbu<=1); + sqlite3_free(zTarget); + } + } + + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_create_function(p->dbMain, + "rbu_tmp_insert", -1, SQLITE_UTF8, (void*)p, rbuTmpInsertFunc, 0, 0 + ); + } + + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_create_function(p->dbMain, + "rbu_fossil_delta", 2, SQLITE_UTF8, 0, rbuFossilDeltaFunc, 0, 0 + ); + } + + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_create_function(p->dbRbu, + "rbu_target_name", -1, SQLITE_UTF8, (void*)p, rbuTargetNameFunc, 0, 0 + ); + } + + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_RBU, (void*)p); + } + rbuMPrintfExec(p, p->dbMain, "SELECT * FROM sqlite_schema"); + + /* Mark the database file just opened as an RBU target database. If + ** this call returns SQLITE_NOTFOUND, then the RBU vfs is not in use. + ** This is an error. */ + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_RBU, (void*)p); + } + + if( p->rc==SQLITE_NOTFOUND ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("rbu vfs not found"); + } +} + +/* +** This routine is a copy of the sqlite3FileSuffix3() routine from the core. +** It is a no-op unless SQLITE_ENABLE_8_3_NAMES is defined. +** +** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database +** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and +** if filename in z[] has a suffix (a.k.a. "extension") that is longer than +** three characters, then shorten the suffix on z[] to be the last three +** characters of the original suffix. +** +** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always +** do the suffix shortening regardless of URI parameter. +** +** Examples: +** +** test.db-journal => test.nal +** test.db-wal => test.wal +** test.db-shm => test.shm +** test.db-mj7f3319fa => test.9fa +*/ +static void rbuFileSuffix3(const char *zBase, char *z){ +#ifdef SQLITE_ENABLE_8_3_NAMES +#if SQLITE_ENABLE_8_3_NAMES<2 + if( sqlite3_uri_boolean(zBase, "8_3_names", 0) ) +#endif + { + int i, sz; + sz = (int)strlen(z)&0xffffff; + for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} + if( z[i]=='.' && sz>i+4 ) memmove(&z[i+1], &z[sz-3], 4); + } +#endif +} + +/* +** Return the current wal-index header checksum for the target database +** as a 64-bit integer. +** +** The checksum is store in the first page of xShmMap memory as an 8-byte +** blob starting at byte offset 40. +*/ +static i64 rbuShmChecksum(sqlite3rbu *p){ + i64 iRet = 0; + if( p->rc==SQLITE_OK ){ + sqlite3_file *pDb = p->pTargetFd->pReal; + u32 volatile *ptr; + p->rc = pDb->pMethods->xShmMap(pDb, 0, 32*1024, 0, (void volatile**)&ptr); + if( p->rc==SQLITE_OK ){ + iRet = ((i64)ptr[10] << 32) + ptr[11]; + } + } + return iRet; +} + +/* +** This function is called as part of initializing or reinitializing an +** incremental checkpoint. +** +** It populates the sqlite3rbu.aFrame[] array with the set of +** (wal frame -> db page) copy operations required to checkpoint the +** current wal file, and obtains the set of shm locks required to safely +** perform the copy operations directly on the file-system. +** +** If argument pState is not NULL, then the incremental checkpoint is +** being resumed. In this case, if the checksum of the wal-index-header +** following recovery is not the same as the checksum saved in the RbuState +** object, then the rbu handle is set to DONE state. This occurs if some +** other client appends a transaction to the wal file in the middle of +** an incremental checkpoint. +*/ +static void rbuSetupCheckpoint(sqlite3rbu *p, RbuState *pState){ + + /* If pState is NULL, then the wal file may not have been opened and + ** recovered. Running a read-statement here to ensure that doing so + ** does not interfere with the "capture" process below. */ + if( pState==0 ){ + p->eStage = 0; + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbMain, "SELECT * FROM sqlite_schema", 0, 0, 0); + } + } + + /* Assuming no error has occurred, run a "restart" checkpoint with the + ** sqlite3rbu.eStage variable set to CAPTURE. This turns on the following + ** special behaviour in the rbu VFS: + ** + ** * If the exclusive shm WRITER or READ0 lock cannot be obtained, + ** the checkpoint fails with SQLITE_BUSY (normally SQLite would + ** proceed with running a passive checkpoint instead of failing). + ** + ** * Attempts to read from the *-wal file or write to the database file + ** do not perform any IO. Instead, the frame/page combinations that + ** would be read/written are recorded in the sqlite3rbu.aFrame[] + ** array. + ** + ** * Calls to xShmLock(UNLOCK) to release the exclusive shm WRITER, + ** READ0 and CHECKPOINT locks taken as part of the checkpoint are + ** no-ops. These locks will not be released until the connection + ** is closed. + ** + ** * Attempting to xSync() the database file causes an SQLITE_INTERNAL + ** error. + ** + ** As a result, unless an error (i.e. OOM or SQLITE_BUSY) occurs, the + ** checkpoint below fails with SQLITE_INTERNAL, and leaves the aFrame[] + ** array populated with a set of (frame -> page) mappings. Because the + ** WRITER, CHECKPOINT and READ0 locks are still held, it is safe to copy + ** data from the wal file into the database file according to the + ** contents of aFrame[]. + */ + if( p->rc==SQLITE_OK ){ + int rc2; + p->eStage = RBU_STAGE_CAPTURE; + rc2 = sqlite3_exec(p->dbMain, "PRAGMA main.wal_checkpoint=restart", 0, 0,0); + if( rc2!=SQLITE_INTERNAL ) p->rc = rc2; + } + + if( p->rc==SQLITE_OK && p->nFrame>0 ){ + p->eStage = RBU_STAGE_CKPT; + p->nStep = (pState ? pState->nRow : 0); + p->aBuf = rbuMalloc(p, p->pgsz); + p->iWalCksum = rbuShmChecksum(p); + } + + if( p->rc==SQLITE_OK ){ + if( p->nFrame==0 || (pState && pState->iWalCksum!=p->iWalCksum) ){ + p->rc = SQLITE_DONE; + p->eStage = RBU_STAGE_DONE; + }else{ + int nSectorSize; + sqlite3_file *pDb = p->pTargetFd->pReal; + sqlite3_file *pWal = p->pTargetFd->pWalFd->pReal; + assert( p->nPagePerSector==0 ); + nSectorSize = pDb->pMethods->xSectorSize(pDb); + if( nSectorSize>p->pgsz ){ + p->nPagePerSector = nSectorSize / p->pgsz; + }else{ + p->nPagePerSector = 1; + } + + /* Call xSync() on the wal file. This causes SQLite to sync the + ** directory in which the target database and the wal file reside, in + ** case it has not been synced since the rename() call in + ** rbuMoveOalFile(). */ + p->rc = pWal->pMethods->xSync(pWal, SQLITE_SYNC_NORMAL); + } + } +} + +/* +** Called when iAmt bytes are read from offset iOff of the wal file while +** the rbu object is in capture mode. Record the frame number of the frame +** being read in the aFrame[] array. +*/ +static int rbuCaptureWalRead(sqlite3rbu *pRbu, i64 iOff, int iAmt){ + const u32 mReq = (1<mLock!=mReq ){ + pRbu->rc = SQLITE_BUSY; + return SQLITE_INTERNAL; + } + + pRbu->pgsz = iAmt; + if( pRbu->nFrame==pRbu->nFrameAlloc ){ + int nNew = (pRbu->nFrameAlloc ? pRbu->nFrameAlloc : 64) * 2; + RbuFrame *aNew; + aNew = (RbuFrame*)sqlite3_realloc64(pRbu->aFrame, nNew * sizeof(RbuFrame)); + if( aNew==0 ) return SQLITE_NOMEM; + pRbu->aFrame = aNew; + pRbu->nFrameAlloc = nNew; + } + + iFrame = (u32)((iOff-32) / (i64)(iAmt+24)) + 1; + if( pRbu->iMaxFrame + + +
    + + + + + + \ No newline at end of file diff --git a/web/gui/dashboard/css/bootstrap-3.3.7.css b/web/gui/dashboard/css/bootstrap-3.3.7.css new file mode 100644 index 0000000..55fa2ac --- /dev/null +++ b/web/gui/dashboard/css/bootstrap-3.3.7.css @@ -0,0 +1,6760 @@ +/*! + * Bootstrap v3.3.7 (http://getbootstrap.com) + * Copyright 2011-2016 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) + * SPDX-License-Identifier: MIT + */ +/*! normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css */ +html { + font-family: sans-serif; + -webkit-text-size-adjust: 100%; + -ms-text-size-adjust: 100%; +} +body { + margin: 0; +} +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +menu, +nav, +section, +summary { + display: block; +} +audio, +canvas, +progress, +video { + display: inline-block; + vertical-align: baseline; +} +audio:not([controls]) { + display: none; + height: 0; +} +[hidden], +template { + display: none; +} +a { + background-color: transparent; +} +a:active, +a:hover { + outline: 0; +} +abbr[title] { + border-bottom: 1px dotted; +} +b, +strong { + font-weight: bold; +} +dfn { + font-style: italic; +} +h1 { + margin: .67em 0; + font-size: 2em; +} +mark { + color: #000; + background: #ff0; +} +small { + font-size: 80%; +} +sub, +sup { + position: relative; + font-size: 75%; + line-height: 0; + vertical-align: baseline; +} +sup { + top: -.5em; +} +sub { + bottom: -.25em; +} +img { + border: 0; +} +svg:not(:root) { + overflow: hidden; +} +figure { + margin: 1em 40px; +} +hr { + height: 0; + -webkit-box-sizing: content-box; + -moz-box-sizing: content-box; + box-sizing: content-box; +} +pre { + overflow: auto; +} +code, +kbd, +pre, +samp { + font-family: monospace, monospace; + font-size: 1em; +} +button, +input, +optgroup, +select, +textarea { + margin: 0; + font: inherit; + color: inherit; +} +button { + overflow: visible; +} +button, +select { + text-transform: none; +} +button, +html input[type="button"], +input[type="reset"], +input[type="submit"] { + -webkit-appearance: button; + cursor: pointer; +} +button[disabled], +html input[disabled] { + cursor: default; +} +button::-moz-focus-inner, +input::-moz-focus-inner { + padding: 0; + border: 0; +} +input { + line-height: normal; +} +input[type="checkbox"], +input[type="radio"] { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; + padding: 0; +} +input[type="number"]::-webkit-inner-spin-button, +input[type="number"]::-webkit-outer-spin-button { + height: auto; +} +input[type="search"] { + -webkit-box-sizing: content-box; + -moz-box-sizing: content-box; + box-sizing: content-box; + -webkit-appearance: textfield; +} +input[type="search"]::-webkit-search-cancel-button, +input[type="search"]::-webkit-search-decoration { + -webkit-appearance: none; +} +fieldset { + padding: .35em .625em .75em; + margin: 0 2px; + border: 1px solid #c0c0c0; +} +legend { + padding: 0; + border: 0; +} +textarea { + overflow: auto; +} +optgroup { + font-weight: bold; +} +table { + border-spacing: 0; + border-collapse: collapse; +} +td, +th { + padding: 0; +} +/*! Source: https://github.com/h5bp/html5-boilerplate/blob/master/src/css/main.css */ +@media print { + *, + *:before, + *:after { + color: #000 !important; + text-shadow: none !important; + background: transparent !important; + -webkit-box-shadow: none !important; + box-shadow: none !important; + } + a, + a:visited { + text-decoration: underline; + } + a[href]:after { + content: " (" attr(href) ")"; + } + abbr[title]:after { + content: " (" attr(title) ")"; + } + a[href^="#"]:after, + a[href^="javascript:"]:after { + content: ""; + } + pre, + blockquote { + border: 1px solid #999; + + page-break-inside: avoid; + } + thead { + display: table-header-group; + } + tr, + img { + page-break-inside: avoid; + } + img { + max-width: 100% !important; + } + p, + h2, + h3 { + orphans: 3; + widows: 3; + } + h2, + h3 { + page-break-after: avoid; + } + .navbar { + display: none; + } + .btn > .caret, + .dropup > .btn > .caret { + border-top-color: #000 !important; + } + .label { + border: 1px solid #000; + } + .table { + border-collapse: collapse !important; + } + .table td, + .table th { + background-color: #fff !important; + } + .table-bordered th, + .table-bordered td { + border: 1px solid #ddd !important; + } +} +@font-face { + font-family: 'Glyphicons Halflings'; + + src: url('../fonts/glyphicons-halflings-regular.eot'); + /* removed svg font, because 1) it's incompatible with current cloud webpack loaders */ + /* 2) our supported browsers don't need svg anyway */ + src: url('../fonts/glyphicons-halflings-regular.eot?#iefix') format('embedded-opentype'), url('../fonts/glyphicons-halflings-regular.woff2') format('woff2'), url('../fonts/glyphicons-halflings-regular.woff') format('woff'), url('../fonts/glyphicons-halflings-regular.ttf') format('truetype'); +} +.glyphicon { + position: relative; + top: 1px; + display: inline-block; + font-family: 'Glyphicons Halflings'; + font-style: normal; + font-weight: normal; + line-height: 1; + + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} +.glyphicon-asterisk:before { + content: "\002a"; +} +.glyphicon-plus:before { + content: "\002b"; +} +.glyphicon-euro:before, +.glyphicon-eur:before { + content: "\20ac"; +} +.glyphicon-minus:before { + content: "\2212"; +} +.glyphicon-cloud:before { + content: "\2601"; +} +.glyphicon-envelope:before { + content: "\2709"; +} +.glyphicon-pencil:before { + content: "\270f"; +} +.glyphicon-glass:before { + content: "\e001"; +} +.glyphicon-music:before { + content: "\e002"; +} +.glyphicon-search:before { + content: "\e003"; +} +.glyphicon-heart:before { + content: "\e005"; +} +.glyphicon-star:before { + content: "\e006"; +} +.glyphicon-star-empty:before { + content: "\e007"; +} +.glyphicon-user:before { + content: "\e008"; +} +.glyphicon-film:before { + content: "\e009"; +} +.glyphicon-th-large:before { + content: "\e010"; +} +.glyphicon-th:before { + content: "\e011"; +} +.glyphicon-th-list:before { + content: "\e012"; +} +.glyphicon-ok:before { + content: "\e013"; +} +.glyphicon-remove:before { + content: "\e014"; +} +.glyphicon-zoom-in:before { + content: "\e015"; +} +.glyphicon-zoom-out:before { + content: "\e016"; +} +.glyphicon-off:before { + content: "\e017"; +} +.glyphicon-signal:before { + content: "\e018"; +} +.glyphicon-cog:before { + content: "\e019"; +} +.glyphicon-trash:before { + content: "\e020"; +} +.glyphicon-home:before { + content: "\e021"; +} +.glyphicon-file:before { + content: "\e022"; +} +.glyphicon-time:before { + content: "\e023"; +} +.glyphicon-road:before { + content: "\e024"; +} +.glyphicon-download-alt:before { + content: "\e025"; +} +.glyphicon-download:before { + content: "\e026"; +} +.glyphicon-upload:before { + content: "\e027"; +} +.glyphicon-inbox:before { + content: "\e028"; +} +.glyphicon-play-circle:before { + content: "\e029"; +} +.glyphicon-repeat:before { + content: "\e030"; +} +.glyphicon-refresh:before { + content: "\e031"; +} +.glyphicon-list-alt:before { + content: "\e032"; +} +.glyphicon-lock:before { + content: "\e033"; +} +.glyphicon-flag:before { + content: "\e034"; +} +.glyphicon-headphones:before { + content: "\e035"; +} +.glyphicon-volume-off:before { + content: "\e036"; +} +.glyphicon-volume-down:before { + content: "\e037"; +} +.glyphicon-volume-up:before { + content: "\e038"; +} +.glyphicon-qrcode:before { + content: "\e039"; +} +.glyphicon-barcode:before { + content: "\e040"; +} +.glyphicon-tag:before { + content: "\e041"; +} +.glyphicon-tags:before { + content: "\e042"; +} +.glyphicon-book:before { + content: "\e043"; +} +.glyphicon-bookmark:before { + content: "\e044"; +} +.glyphicon-print:before { + content: "\e045"; +} +.glyphicon-camera:before { + content: "\e046"; +} +.glyphicon-font:before { + content: "\e047"; +} +.glyphicon-bold:before { + content: "\e048"; +} +.glyphicon-italic:before { + content: "\e049"; +} +.glyphicon-text-height:before { + content: "\e050"; +} +.glyphicon-text-width:before { + content: "\e051"; +} +.glyphicon-align-left:before { + content: "\e052"; +} +.glyphicon-align-center:before { + content: "\e053"; +} +.glyphicon-align-right:before { + content: "\e054"; +} +.glyphicon-align-justify:before { + content: "\e055"; +} +.glyphicon-list:before { + content: "\e056"; +} +.glyphicon-indent-left:before { + content: "\e057"; +} +.glyphicon-indent-right:before { + content: "\e058"; +} +.glyphicon-facetime-video:before { + content: "\e059"; +} +.glyphicon-picture:before { + content: "\e060"; +} +.glyphicon-map-marker:before { + content: "\e062"; +} +.glyphicon-adjust:before { + content: "\e063"; +} +.glyphicon-tint:before { + content: "\e064"; +} +.glyphicon-edit:before { + content: "\e065"; +} +.glyphicon-share:before { + content: "\e066"; +} +.glyphicon-check:before { + content: "\e067"; +} +.glyphicon-move:before { + content: "\e068"; +} +.glyphicon-step-backward:before { + content: "\e069"; +} +.glyphicon-fast-backward:before { + content: "\e070"; +} +.glyphicon-backward:before { + content: "\e071"; +} +.glyphicon-play:before { + content: "\e072"; +} +.glyphicon-pause:before { + content: "\e073"; +} +.glyphicon-stop:before { + content: "\e074"; +} +.glyphicon-forward:before { + content: "\e075"; +} +.glyphicon-fast-forward:before { + content: "\e076"; +} +.glyphicon-step-forward:before { + content: "\e077"; +} +.glyphicon-eject:before { + content: "\e078"; +} +.glyphicon-chevron-left:before { + content: "\e079"; +} +.glyphicon-chevron-right:before { + content: "\e080"; +} +.glyphicon-plus-sign:before { + content: "\e081"; +} +.glyphicon-minus-sign:before { + content: "\e082"; +} +.glyphicon-remove-sign:before { + content: "\e083"; +} +.glyphicon-ok-sign:before { + content: "\e084"; +} +.glyphicon-question-sign:before { + content: "\e085"; +} +.glyphicon-info-sign:before { + content: "\e086"; +} +.glyphicon-screenshot:before { + content: "\e087"; +} +.glyphicon-remove-circle:before { + content: "\e088"; +} +.glyphicon-ok-circle:before { + content: "\e089"; +} +.glyphicon-ban-circle:before { + content: "\e090"; +} +.glyphicon-arrow-left:before { + content: "\e091"; +} +.glyphicon-arrow-right:before { + content: "\e092"; +} +.glyphicon-arrow-up:before { + content: "\e093"; +} +.glyphicon-arrow-down:before { + content: "\e094"; +} +.glyphicon-share-alt:before { + content: "\e095"; +} +.glyphicon-resize-full:before { + content: "\e096"; +} +.glyphicon-resize-small:before { + content: "\e097"; +} +.glyphicon-exclamation-sign:before { + content: "\e101"; +} +.glyphicon-gift:before { + content: "\e102"; +} +.glyphicon-leaf:before { + content: "\e103"; +} +.glyphicon-fire:before { + content: "\e104"; +} +.glyphicon-eye-open:before { + content: "\e105"; +} +.glyphicon-eye-close:before { + content: "\e106"; +} +.glyphicon-warning-sign:before { + content: "\e107"; +} +.glyphicon-plane:before { + content: "\e108"; +} +.glyphicon-calendar:before { + content: "\e109"; +} +.glyphicon-random:before { + content: "\e110"; +} +.glyphicon-comment:before { + content: "\e111"; +} +.glyphicon-magnet:before { + content: "\e112"; +} +.glyphicon-chevron-up:before { + content: "\e113"; +} +.glyphicon-chevron-down:before { + content: "\e114"; +} +.glyphicon-retweet:before { + content: "\e115"; +} +.glyphicon-shopping-cart:before { + content: "\e116"; +} +.glyphicon-folder-close:before { + content: "\e117"; +} +.glyphicon-folder-open:before { + content: "\e118"; +} +.glyphicon-resize-vertical:before { + content: "\e119"; +} +.glyphicon-resize-horizontal:before { + content: "\e120"; +} +.glyphicon-hdd:before { + content: "\e121"; +} +.glyphicon-bullhorn:before { + content: "\e122"; +} +.glyphicon-bell:before { + content: "\e123"; +} +.glyphicon-certificate:before { + content: "\e124"; +} +.glyphicon-thumbs-up:before { + content: "\e125"; +} +.glyphicon-thumbs-down:before { + content: "\e126"; +} +.glyphicon-hand-right:before { + content: "\e127"; +} +.glyphicon-hand-left:before { + content: "\e128"; +} +.glyphicon-hand-up:before { + content: "\e129"; +} +.glyphicon-hand-down:before { + content: "\e130"; +} +.glyphicon-circle-arrow-right:before { + content: "\e131"; +} +.glyphicon-circle-arrow-left:before { + content: "\e132"; +} +.glyphicon-circle-arrow-up:before { + content: "\e133"; +} +.glyphicon-circle-arrow-down:before { + content: "\e134"; +} +.glyphicon-globe:before { + content: "\e135"; +} +.glyphicon-wrench:before { + content: "\e136"; +} +.glyphicon-tasks:before { + content: "\e137"; +} +.glyphicon-filter:before { + content: "\e138"; +} +.glyphicon-briefcase:before { + content: "\e139"; +} +.glyphicon-fullscreen:before { + content: "\e140"; +} +.glyphicon-dashboard:before { + content: "\e141"; +} +.glyphicon-paperclip:before { + content: "\e142"; +} +.glyphicon-heart-empty:before { + content: "\e143"; +} +.glyphicon-link:before { + content: "\e144"; +} +.glyphicon-phone:before { + content: "\e145"; +} +.glyphicon-pushpin:before { + content: "\e146"; +} +.glyphicon-usd:before { + content: "\e148"; +} +.glyphicon-gbp:before { + content: "\e149"; +} +.glyphicon-sort:before { + content: "\e150"; +} +.glyphicon-sort-by-alphabet:before { + content: "\e151"; +} +.glyphicon-sort-by-alphabet-alt:before { + content: "\e152"; +} +.glyphicon-sort-by-order:before { + content: "\e153"; +} +.glyphicon-sort-by-order-alt:before { + content: "\e154"; +} +.glyphicon-sort-by-attributes:before { + content: "\e155"; +} +.glyphicon-sort-by-attributes-alt:before { + content: "\e156"; +} +.glyphicon-unchecked:before { + content: "\e157"; +} +.glyphicon-expand:before { + content: "\e158"; +} +.glyphicon-collapse-down:before { + content: "\e159"; +} +.glyphicon-collapse-up:before { + content: "\e160"; +} +.glyphicon-log-in:before { + content: "\e161"; +} +.glyphicon-flash:before { + content: "\e162"; +} +.glyphicon-log-out:before { + content: "\e163"; +} +.glyphicon-new-window:before { + content: "\e164"; +} +.glyphicon-record:before { + content: "\e165"; +} +.glyphicon-save:before { + content: "\e166"; +} +.glyphicon-open:before { + content: "\e167"; +} +.glyphicon-saved:before { + content: "\e168"; +} +.glyphicon-import:before { + content: "\e169"; +} +.glyphicon-export:before { + content: "\e170"; +} +.glyphicon-send:before { + content: "\e171"; +} +.glyphicon-floppy-disk:before { + content: "\e172"; +} +.glyphicon-floppy-saved:before { + content: "\e173"; +} +.glyphicon-floppy-remove:before { + content: "\e174"; +} +.glyphicon-floppy-save:before { + content: "\e175"; +} +.glyphicon-floppy-open:before { + content: "\e176"; +} +.glyphicon-credit-card:before { + content: "\e177"; +} +.glyphicon-transfer:before { + content: "\e178"; +} +.glyphicon-cutlery:before { + content: "\e179"; +} +.glyphicon-header:before { + content: "\e180"; +} +.glyphicon-compressed:before { + content: "\e181"; +} +.glyphicon-earphone:before { + content: "\e182"; +} +.glyphicon-phone-alt:before { + content: "\e183"; +} +.glyphicon-tower:before { + content: "\e184"; +} +.glyphicon-stats:before { + content: "\e185"; +} +.glyphicon-sd-video:before { + content: "\e186"; +} +.glyphicon-hd-video:before { + content: "\e187"; +} +.glyphicon-subtitles:before { + content: "\e188"; +} +.glyphicon-sound-stereo:before { + content: "\e189"; +} +.glyphicon-sound-dolby:before { + content: "\e190"; +} +.glyphicon-sound-5-1:before { + content: "\e191"; +} +.glyphicon-sound-6-1:before { + content: "\e192"; +} +.glyphicon-sound-7-1:before { + content: "\e193"; +} +.glyphicon-copyright-mark:before { + content: "\e194"; +} +.glyphicon-registration-mark:before { + content: "\e195"; +} +.glyphicon-cloud-download:before { + content: "\e197"; +} +.glyphicon-cloud-upload:before { + content: "\e198"; +} +.glyphicon-tree-conifer:before { + content: "\e199"; +} +.glyphicon-tree-deciduous:before { + content: "\e200"; +} +.glyphicon-cd:before { + content: "\e201"; +} +.glyphicon-save-file:before { + content: "\e202"; +} +.glyphicon-open-file:before { + content: "\e203"; +} +.glyphicon-level-up:before { + content: "\e204"; +} +.glyphicon-copy:before { + content: "\e205"; +} +.glyphicon-paste:before { + content: "\e206"; +} +.glyphicon-alert:before { + content: "\e209"; +} +.glyphicon-equalizer:before { + content: "\e210"; +} +.glyphicon-king:before { + content: "\e211"; +} +.glyphicon-queen:before { + content: "\e212"; +} +.glyphicon-pawn:before { + content: "\e213"; +} +.glyphicon-bishop:before { + content: "\e214"; +} +.glyphicon-knight:before { + content: "\e215"; +} +.glyphicon-baby-formula:before { + content: "\e216"; +} +.glyphicon-tent:before { + content: "\26fa"; +} +.glyphicon-blackboard:before { + content: "\e218"; +} +.glyphicon-bed:before { + content: "\e219"; +} +.glyphicon-apple:before { + content: "\f8ff"; +} +.glyphicon-erase:before { + content: "\e221"; +} +.glyphicon-hourglass:before { + content: "\231b"; +} +.glyphicon-lamp:before { + content: "\e223"; +} +.glyphicon-duplicate:before { + content: "\e224"; +} +.glyphicon-piggy-bank:before { + content: "\e225"; +} +.glyphicon-scissors:before { + content: "\e226"; +} +.glyphicon-bitcoin:before { + content: "\e227"; +} +.glyphicon-btc:before { + content: "\e227"; +} +.glyphicon-xbt:before { + content: "\e227"; +} +.glyphicon-yen:before { + content: "\00a5"; +} +.glyphicon-jpy:before { + content: "\00a5"; +} +.glyphicon-ruble:before { + content: "\20bd"; +} +.glyphicon-rub:before { + content: "\20bd"; +} +.glyphicon-scale:before { + content: "\e230"; +} +.glyphicon-ice-lolly:before { + content: "\e231"; +} +.glyphicon-ice-lolly-tasted:before { + content: "\e232"; +} +.glyphicon-education:before { + content: "\e233"; +} +.glyphicon-option-horizontal:before { + content: "\e234"; +} +.glyphicon-option-vertical:before { + content: "\e235"; +} +.glyphicon-menu-hamburger:before { + content: "\e236"; +} +.glyphicon-modal-window:before { + content: "\e237"; +} +.glyphicon-oil:before { + content: "\e238"; +} +.glyphicon-grain:before { + content: "\e239"; +} +.glyphicon-sunglasses:before { + content: "\e240"; +} +.glyphicon-text-size:before { + content: "\e241"; +} +.glyphicon-text-color:before { + content: "\e242"; +} +.glyphicon-text-background:before { + content: "\e243"; +} +.glyphicon-object-align-top:before { + content: "\e244"; +} +.glyphicon-object-align-bottom:before { + content: "\e245"; +} +.glyphicon-object-align-horizontal:before { + content: "\e246"; +} +.glyphicon-object-align-left:before { + content: "\e247"; +} +.glyphicon-object-align-vertical:before { + content: "\e248"; +} +.glyphicon-object-align-right:before { + content: "\e249"; +} +.glyphicon-triangle-right:before { + content: "\e250"; +} +.glyphicon-triangle-left:before { + content: "\e251"; +} +.glyphicon-triangle-bottom:before { + content: "\e252"; +} +.glyphicon-triangle-top:before { + content: "\e253"; +} +.glyphicon-console:before { + content: "\e254"; +} +.glyphicon-superscript:before { + content: "\e255"; +} +.glyphicon-subscript:before { + content: "\e256"; +} +.glyphicon-menu-left:before { + content: "\e257"; +} +.glyphicon-menu-right:before { + content: "\e258"; +} +.glyphicon-menu-down:before { + content: "\e259"; +} +.glyphicon-menu-up:before { + content: "\e260"; +} +* { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +*:before, +*:after { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +html { + font-size: 10px; + + -webkit-tap-highlight-color: rgba(0, 0, 0, 0); +} +body { + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 14px; + line-height: 1.42857143; + color: var(--color-text, #333); + background-color: var(--color-mainBackground, #fff); +} +input, +button, +select, +textarea { + font-family: inherit; + font-size: inherit; + line-height: inherit; +} +a { + color: #337ab7; + text-decoration: none; +} +a:hover, +a:focus { + color: #23527c; + text-decoration: underline; +} +a:focus { + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +figure { + margin: 0; +} +img { + vertical-align: middle; +} +.img-responsive, +.thumbnail > img, +.thumbnail a > img, +.carousel-inner > .item > img, +.carousel-inner > .item > a > img { + display: block; + max-width: 100%; + height: auto; +} +.img-rounded { + border-radius: 6px; +} +.img-thumbnail { + display: inline-block; + max-width: 100%; + height: auto; + padding: 4px; + line-height: 1.42857143; + background-color: var(--color-mainBackground, #fff); + border: 1px solid var(--color-border, #ddd); + border-radius: 4px; + -webkit-transition: all .2s ease-in-out; + -o-transition: all .2s ease-in-out; + transition: all .2s ease-in-out; +} +.img-circle { + border-radius: 50%; +} +hr { + margin-top: 20px; + margin-bottom: 20px; + border: 0; + border-top: 1px solid #eee; +} +.sr-only { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + border: 0; +} +.sr-only-focusable:active, +.sr-only-focusable:focus { + position: static; + width: auto; + height: auto; + margin: 0; + overflow: visible; + clip: auto; +} +[role="button"] { + cursor: pointer; +} +h1, +h2, +h3, +h4, +h5, +h6, +.h1, +.h2, +.h3, +.h4, +.h5, +.h6 { + font-family: inherit; + font-weight: 500; + line-height: 1.1; + color: inherit; +} +h1 small, +h2 small, +h3 small, +h4 small, +h5 small, +h6 small, +.h1 small, +.h2 small, +.h3 small, +.h4 small, +.h5 small, +.h6 small, +h1 .small, +h2 .small, +h3 .small, +h4 .small, +h5 .small, +h6 .small, +.h1 .small, +.h2 .small, +.h3 .small, +.h4 .small, +.h5 .small, +.h6 .small { + font-weight: normal; + line-height: 1; + color: #777; +} +h1, +.h1, +h2, +.h2, +h3, +.h3 { + margin-top: 20px; + margin-bottom: 10px; +} +h1 small, +.h1 small, +h2 small, +.h2 small, +h3 small, +.h3 small, +h1 .small, +.h1 .small, +h2 .small, +.h2 .small, +h3 .small, +.h3 .small { + font-size: 65%; +} +h4, +.h4, +h5, +.h5, +h6, +.h6 { + margin-top: 10px; + margin-bottom: 10px; +} +h4 small, +.h4 small, +h5 small, +.h5 small, +h6 small, +.h6 small, +h4 .small, +.h4 .small, +h5 .small, +.h5 .small, +h6 .small, +.h6 .small { + font-size: 75%; +} +h1, +.h1 { + font-size: 36px; +} +h2, +.h2 { + font-size: 30px; +} +h3, +.h3 { + font-size: 24px; +} +h4, +.h4 { + font-size: 18px; +} +h5, +.h5 { + font-size: 14px; +} +h6, +.h6 { + font-size: 12px; +} +p { + margin: 0 0 10px; +} +.lead { + margin-bottom: 20px; + font-size: 16px; + font-weight: 300; + line-height: 1.4; +} +@media (min-width: 768px) { + .lead { + font-size: 21px; + } +} +small, +.small { + font-size: 85%; +} +mark, +.mark { + padding: .2em; + background-color: #fcf8e3; +} +.text-left { + text-align: left; +} +.text-right { + text-align: right; +} +.text-center { + text-align: center; +} +.text-justify { + text-align: justify; +} +.text-nowrap { + white-space: nowrap; +} +.text-lowercase { + text-transform: lowercase; +} +.text-uppercase { + text-transform: uppercase; +} +.text-capitalize { + text-transform: capitalize; +} +.text-muted { + color: #777; +} +.text-primary { + color: #337ab7; +} +a.text-primary:hover, +a.text-primary:focus { + color: #286090; +} +.text-success { + color: #3c763d; +} +a.text-success:hover, +a.text-success:focus { + color: #2b542c; +} +.text-info { + color: #31708f; +} +a.text-info:hover, +a.text-info:focus { + color: #245269; +} +.text-warning { + color: #8a6d3b; +} +a.text-warning:hover, +a.text-warning:focus { + color: #66512c; +} +.text-danger { + color: #a94442; +} +a.text-danger:hover, +a.text-danger:focus { + color: #843534; +} +.bg-primary { + color: #fff; + background-color: #337ab7; +} +a.bg-primary:hover, +a.bg-primary:focus { + background-color: #286090; +} +.bg-success { + background-color: #dff0d8; +} +a.bg-success:hover, +a.bg-success:focus { + background-color: #c1e2b3; +} +.bg-info { + background-color: #d9edf7; +} +a.bg-info:hover, +a.bg-info:focus { + background-color: #afd9ee; +} +.bg-warning { + background-color: #fcf8e3; +} +a.bg-warning:hover, +a.bg-warning:focus { + background-color: #f7ecb5; +} +.bg-danger { + background-color: #f2dede; +} +a.bg-danger:hover, +a.bg-danger:focus { + background-color: #e4b9b9; +} +.page-header { + padding-bottom: 9px; + margin: 40px 0 20px; + border-bottom: 1px solid #eee; +} +ul, +ol { + margin-top: 0; + margin-bottom: 10px; +} +ul ul, +ol ul, +ul ol, +ol ol { + margin-bottom: 0; +} +.list-unstyled { + padding-left: 0; + list-style: none; +} +.list-inline { + padding-left: 0; + margin-left: -5px; + list-style: none; +} +.list-inline > li { + display: inline-block; + padding-right: 5px; + padding-left: 5px; +} +dl { + margin-top: 0; + margin-bottom: 20px; +} +dt, +dd { + line-height: 1.42857143; +} +dt { + font-weight: bold; +} +dd { + margin-left: 0; +} +@media (min-width: 768px) { + .dl-horizontal dt { + float: left; + width: 160px; + overflow: hidden; + clear: left; + text-align: right; + text-overflow: ellipsis; + white-space: nowrap; + } + .dl-horizontal dd { + margin-left: 180px; + } +} +abbr[title], +abbr[data-original-title] { + cursor: help; + border-bottom: 1px dotted #777; +} +.initialism { + font-size: 90%; + text-transform: uppercase; +} +blockquote { + padding: 10px 20px; + margin: 0 0 20px; + font-size: 17.5px; + border-left: 5px solid #eee; +} +blockquote p:last-child, +blockquote ul:last-child, +blockquote ol:last-child { + margin-bottom: 0; +} +blockquote footer, +blockquote small, +blockquote .small { + display: block; + font-size: 80%; + line-height: 1.42857143; + color: #777; +} +blockquote footer:before, +blockquote small:before, +blockquote .small:before { + content: '\2014 \00A0'; +} +.blockquote-reverse, +blockquote.pull-right { + padding-right: 15px; + padding-left: 0; + text-align: right; + border-right: 5px solid #eee; + border-left: 0; +} +.blockquote-reverse footer:before, +blockquote.pull-right footer:before, +.blockquote-reverse small:before, +blockquote.pull-right small:before, +.blockquote-reverse .small:before, +blockquote.pull-right .small:before { + content: ''; +} +.blockquote-reverse footer:after, +blockquote.pull-right footer:after, +.blockquote-reverse small:after, +blockquote.pull-right small:after, +.blockquote-reverse .small:after, +blockquote.pull-right .small:after { + content: '\00A0 \2014'; +} +address { + margin-bottom: 20px; + font-style: normal; + line-height: 1.42857143; +} +code, +kbd, +pre, +samp { + font-family: Menlo, Monaco, Consolas, "Courier New", monospace; +} +code { + padding: 2px 4px; + font-size: 90%; + color: #c7254e; + background-color: #f9f2f4; + border-radius: 4px; +} +kbd { + padding: 2px 4px; + font-size: 90%; + color: #fff; + background-color: #333; + border-radius: 3px; + -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, .25); + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, .25); +} +kbd kbd { + padding: 0; + font-size: 100%; + font-weight: bold; + -webkit-box-shadow: none; + box-shadow: none; +} +pre { + display: block; + padding: 9.5px; + margin: 0 0 10px; + font-size: 13px; + line-height: 1.42857143; + color: #333; + word-break: break-all; + word-wrap: break-word; + background-color: #f5f5f5; + border: 1px solid #ccc; + border-radius: 4px; +} +pre code { + padding: 0; + font-size: inherit; + color: inherit; + white-space: pre-wrap; + background-color: transparent; + border-radius: 0; +} +.pre-scrollable { + max-height: 340px; + overflow-y: scroll; +} +.container { + padding-right: 15px; + padding-left: 15px; + margin-right: auto; + margin-left: auto; +} +@media (min-width: 768px) { + .container { + width: 750px; + } +} +@media (min-width: 992px) { + .container { + width: 970px; + } +} +@media (min-width: 1200px) { + .container { + width: 1170px; + } +} +.container-fluid { + padding-right: 15px; + padding-left: 15px; + margin-right: auto; + margin-left: auto; +} +.row { + margin-right: -15px; + margin-left: -15px; +} +.col-xs-1, .col-sm-1, .col-md-1, .col-lg-1, .col-xs-2, .col-sm-2, .col-md-2, .col-lg-2, .col-xs-3, .col-sm-3, .col-md-3, .col-lg-3, .col-xs-4, .col-sm-4, .col-md-4, .col-lg-4, .col-xs-5, .col-sm-5, .col-md-5, .col-lg-5, .col-xs-6, .col-sm-6, .col-md-6, .col-lg-6, .col-xs-7, .col-sm-7, .col-md-7, .col-lg-7, .col-xs-8, .col-sm-8, .col-md-8, .col-lg-8, .col-xs-9, .col-sm-9, .col-md-9, .col-lg-9, .col-xs-10, .col-sm-10, .col-md-10, .col-lg-10, .col-xs-11, .col-sm-11, .col-md-11, .col-lg-11, .col-xs-12, .col-sm-12, .col-md-12, .col-lg-12 { + position: relative; + min-height: 1px; + padding-right: 15px; + padding-left: 15px; +} +.col-xs-1, .col-xs-2, .col-xs-3, .col-xs-4, .col-xs-5, .col-xs-6, .col-xs-7, .col-xs-8, .col-xs-9, .col-xs-10, .col-xs-11, .col-xs-12 { + float: left; +} +.col-xs-12 { + width: 100%; +} +.col-xs-11 { + width: 91.66666667%; +} +.col-xs-10 { + width: 83.33333333%; +} +.col-xs-9 { + width: 75%; +} +.col-xs-8 { + width: 66.66666667%; +} +.col-xs-7 { + width: 58.33333333%; +} +.col-xs-6 { + width: 50%; +} +.col-xs-5 { + width: 41.66666667%; +} +.col-xs-4 { + width: 33.33333333%; +} +.col-xs-3 { + width: 25%; +} +.col-xs-2 { + width: 16.66666667%; +} +.col-xs-1 { + width: 8.33333333%; +} +.col-xs-pull-12 { + right: 100%; +} +.col-xs-pull-11 { + right: 91.66666667%; +} +.col-xs-pull-10 { + right: 83.33333333%; +} +.col-xs-pull-9 { + right: 75%; +} +.col-xs-pull-8 { + right: 66.66666667%; +} +.col-xs-pull-7 { + right: 58.33333333%; +} +.col-xs-pull-6 { + right: 50%; +} +.col-xs-pull-5 { + right: 41.66666667%; +} +.col-xs-pull-4 { + right: 33.33333333%; +} +.col-xs-pull-3 { + right: 25%; +} +.col-xs-pull-2 { + right: 16.66666667%; +} +.col-xs-pull-1 { + right: 8.33333333%; +} +.col-xs-pull-0 { + right: auto; +} +.col-xs-push-12 { + left: 100%; +} +.col-xs-push-11 { + left: 91.66666667%; +} +.col-xs-push-10 { + left: 83.33333333%; +} +.col-xs-push-9 { + left: 75%; +} +.col-xs-push-8 { + left: 66.66666667%; +} +.col-xs-push-7 { + left: 58.33333333%; +} +.col-xs-push-6 { + left: 50%; +} +.col-xs-push-5 { + left: 41.66666667%; +} +.col-xs-push-4 { + left: 33.33333333%; +} +.col-xs-push-3 { + left: 25%; +} +.col-xs-push-2 { + left: 16.66666667%; +} +.col-xs-push-1 { + left: 8.33333333%; +} +.col-xs-push-0 { + left: auto; +} +.col-xs-offset-12 { + margin-left: 100%; +} +.col-xs-offset-11 { + margin-left: 91.66666667%; +} +.col-xs-offset-10 { + margin-left: 83.33333333%; +} +.col-xs-offset-9 { + margin-left: 75%; +} +.col-xs-offset-8 { + margin-left: 66.66666667%; +} +.col-xs-offset-7 { + margin-left: 58.33333333%; +} +.col-xs-offset-6 { + margin-left: 50%; +} +.col-xs-offset-5 { + margin-left: 41.66666667%; +} +.col-xs-offset-4 { + margin-left: 33.33333333%; +} +.col-xs-offset-3 { + margin-left: 25%; +} +.col-xs-offset-2 { + margin-left: 16.66666667%; +} +.col-xs-offset-1 { + margin-left: 8.33333333%; +} +.col-xs-offset-0 { + margin-left: 0; +} +@media (min-width: 768px) { + .col-sm-1, .col-sm-2, .col-sm-3, .col-sm-4, .col-sm-5, .col-sm-6, .col-sm-7, .col-sm-8, .col-sm-9, .col-sm-10, .col-sm-11, .col-sm-12 { + float: left; + } + .col-sm-12 { + width: 100%; + } + .col-sm-11 { + width: 91.66666667%; + } + .col-sm-10 { + width: 83.33333333%; + } + .col-sm-9 { + width: 75%; + } + .col-sm-8 { + width: 66.66666667%; + } + .col-sm-7 { + width: 58.33333333%; + } + .col-sm-6 { + width: 50%; + } + .col-sm-5 { + width: 41.66666667%; + } + .col-sm-4 { + width: 33.33333333%; + } + .col-sm-3 { + width: 25%; + } + .col-sm-2 { + width: 16.66666667%; + } + .col-sm-1 { + width: 8.33333333%; + } + .col-sm-pull-12 { + right: 100%; + } + .col-sm-pull-11 { + right: 91.66666667%; + } + .col-sm-pull-10 { + right: 83.33333333%; + } + .col-sm-pull-9 { + right: 75%; + } + .col-sm-pull-8 { + right: 66.66666667%; + } + .col-sm-pull-7 { + right: 58.33333333%; + } + .col-sm-pull-6 { + right: 50%; + } + .col-sm-pull-5 { + right: 41.66666667%; + } + .col-sm-pull-4 { + right: 33.33333333%; + } + .col-sm-pull-3 { + right: 25%; + } + .col-sm-pull-2 { + right: 16.66666667%; + } + .col-sm-pull-1 { + right: 8.33333333%; + } + .col-sm-pull-0 { + right: auto; + } + .col-sm-push-12 { + left: 100%; + } + .col-sm-push-11 { + left: 91.66666667%; + } + .col-sm-push-10 { + left: 83.33333333%; + } + .col-sm-push-9 { + left: 75%; + } + .col-sm-push-8 { + left: 66.66666667%; + } + .col-sm-push-7 { + left: 58.33333333%; + } + .col-sm-push-6 { + left: 50%; + } + .col-sm-push-5 { + left: 41.66666667%; + } + .col-sm-push-4 { + left: 33.33333333%; + } + .col-sm-push-3 { + left: 25%; + } + .col-sm-push-2 { + left: 16.66666667%; + } + .col-sm-push-1 { + left: 8.33333333%; + } + .col-sm-push-0 { + left: auto; + } + .col-sm-offset-12 { + margin-left: 100%; + } + .col-sm-offset-11 { + margin-left: 91.66666667%; + } + .col-sm-offset-10 { + margin-left: 83.33333333%; + } + .col-sm-offset-9 { + margin-left: 75%; + } + .col-sm-offset-8 { + margin-left: 66.66666667%; + } + .col-sm-offset-7 { + margin-left: 58.33333333%; + } + .col-sm-offset-6 { + margin-left: 50%; + } + .col-sm-offset-5 { + margin-left: 41.66666667%; + } + .col-sm-offset-4 { + margin-left: 33.33333333%; + } + .col-sm-offset-3 { + margin-left: 25%; + } + .col-sm-offset-2 { + margin-left: 16.66666667%; + } + .col-sm-offset-1 { + margin-left: 8.33333333%; + } + .col-sm-offset-0 { + margin-left: 0; + } +} +@media (min-width: 992px) { + .col-md-1, .col-md-2, .col-md-3, .col-md-4, .col-md-5, .col-md-6, .col-md-7, .col-md-8, .col-md-9, .col-md-10, .col-md-11, .col-md-12 { + float: left; + } + .col-md-12 { + width: 100%; + } + .col-md-11 { + width: 91.66666667%; + } + .col-md-10 { + width: 83.33333333%; + } + .col-md-9 { + width: 75%; + } + .col-md-8 { + width: 66.66666667%; + } + .col-md-7 { + width: 58.33333333%; + } + .col-md-6 { + width: 50%; + } + .col-md-5 { + width: 41.66666667%; + } + .col-md-4 { + width: 33.33333333%; + } + .col-md-3 { + width: 25%; + } + .col-md-2 { + width: 16.66666667%; + } + .col-md-1 { + width: 8.33333333%; + } + .col-md-pull-12 { + right: 100%; + } + .col-md-pull-11 { + right: 91.66666667%; + } + .col-md-pull-10 { + right: 83.33333333%; + } + .col-md-pull-9 { + right: 75%; + } + .col-md-pull-8 { + right: 66.66666667%; + } + .col-md-pull-7 { + right: 58.33333333%; + } + .col-md-pull-6 { + right: 50%; + } + .col-md-pull-5 { + right: 41.66666667%; + } + .col-md-pull-4 { + right: 33.33333333%; + } + .col-md-pull-3 { + right: 25%; + } + .col-md-pull-2 { + right: 16.66666667%; + } + .col-md-pull-1 { + right: 8.33333333%; + } + .col-md-pull-0 { + right: auto; + } + .col-md-push-12 { + left: 100%; + } + .col-md-push-11 { + left: 91.66666667%; + } + .col-md-push-10 { + left: 83.33333333%; + } + .col-md-push-9 { + left: 75%; + } + .col-md-push-8 { + left: 66.66666667%; + } + .col-md-push-7 { + left: 58.33333333%; + } + .col-md-push-6 { + left: 50%; + } + .col-md-push-5 { + left: 41.66666667%; + } + .col-md-push-4 { + left: 33.33333333%; + } + .col-md-push-3 { + left: 25%; + } + .col-md-push-2 { + left: 16.66666667%; + } + .col-md-push-1 { + left: 8.33333333%; + } + .col-md-push-0 { + left: auto; + } + .col-md-offset-12 { + margin-left: 100%; + } + .col-md-offset-11 { + margin-left: 91.66666667%; + } + .col-md-offset-10 { + margin-left: 83.33333333%; + } + .col-md-offset-9 { + margin-left: 75%; + } + .col-md-offset-8 { + margin-left: 66.66666667%; + } + .col-md-offset-7 { + margin-left: 58.33333333%; + } + .col-md-offset-6 { + margin-left: 50%; + } + .col-md-offset-5 { + margin-left: 41.66666667%; + } + .col-md-offset-4 { + margin-left: 33.33333333%; + } + .col-md-offset-3 { + margin-left: 25%; + } + .col-md-offset-2 { + margin-left: 16.66666667%; + } + .col-md-offset-1 { + margin-left: 8.33333333%; + } + .col-md-offset-0 { + margin-left: 0; + } +} +@media (min-width: 1200px) { + .col-lg-1, .col-lg-2, .col-lg-3, .col-lg-4, .col-lg-5, .col-lg-6, .col-lg-7, .col-lg-8, .col-lg-9, .col-lg-10, .col-lg-11, .col-lg-12 { + float: left; + } + .col-lg-12 { + width: 100%; + } + .col-lg-11 { + width: 91.66666667%; + } + .col-lg-10 { + width: 83.33333333%; + } + .col-lg-9 { + width: 75%; + } + .col-lg-8 { + width: 66.66666667%; + } + .col-lg-7 { + width: 58.33333333%; + } + .col-lg-6 { + width: 50%; + } + .col-lg-5 { + width: 41.66666667%; + } + .col-lg-4 { + width: 33.33333333%; + } + .col-lg-3 { + width: 25%; + } + .col-lg-2 { + width: 16.66666667%; + } + .col-lg-1 { + width: 8.33333333%; + } + .col-lg-pull-12 { + right: 100%; + } + .col-lg-pull-11 { + right: 91.66666667%; + } + .col-lg-pull-10 { + right: 83.33333333%; + } + .col-lg-pull-9 { + right: 75%; + } + .col-lg-pull-8 { + right: 66.66666667%; + } + .col-lg-pull-7 { + right: 58.33333333%; + } + .col-lg-pull-6 { + right: 50%; + } + .col-lg-pull-5 { + right: 41.66666667%; + } + .col-lg-pull-4 { + right: 33.33333333%; + } + .col-lg-pull-3 { + right: 25%; + } + .col-lg-pull-2 { + right: 16.66666667%; + } + .col-lg-pull-1 { + right: 8.33333333%; + } + .col-lg-pull-0 { + right: auto; + } + .col-lg-push-12 { + left: 100%; + } + .col-lg-push-11 { + left: 91.66666667%; + } + .col-lg-push-10 { + left: 83.33333333%; + } + .col-lg-push-9 { + left: 75%; + } + .col-lg-push-8 { + left: 66.66666667%; + } + .col-lg-push-7 { + left: 58.33333333%; + } + .col-lg-push-6 { + left: 50%; + } + .col-lg-push-5 { + left: 41.66666667%; + } + .col-lg-push-4 { + left: 33.33333333%; + } + .col-lg-push-3 { + left: 25%; + } + .col-lg-push-2 { + left: 16.66666667%; + } + .col-lg-push-1 { + left: 8.33333333%; + } + .col-lg-push-0 { + left: auto; + } + .col-lg-offset-12 { + margin-left: 100%; + } + .col-lg-offset-11 { + margin-left: 91.66666667%; + } + .col-lg-offset-10 { + margin-left: 83.33333333%; + } + .col-lg-offset-9 { + margin-left: 75%; + } + .col-lg-offset-8 { + margin-left: 66.66666667%; + } + .col-lg-offset-7 { + margin-left: 58.33333333%; + } + .col-lg-offset-6 { + margin-left: 50%; + } + .col-lg-offset-5 { + margin-left: 41.66666667%; + } + .col-lg-offset-4 { + margin-left: 33.33333333%; + } + .col-lg-offset-3 { + margin-left: 25%; + } + .col-lg-offset-2 { + margin-left: 16.66666667%; + } + .col-lg-offset-1 { + margin-left: 8.33333333%; + } + .col-lg-offset-0 { + margin-left: 0; + } +} +table { + background-color: transparent; +} +caption { + padding-top: 8px; + padding-bottom: 8px; + color: #777; + text-align: left; +} +th { + text-align: left; +} +.table { + width: 100%; + max-width: 100%; + margin-bottom: 20px; +} +.table > thead > tr > th, +.table > tbody > tr > th, +.table > tfoot > tr > th, +.table > thead > tr > td, +.table > tbody > tr > td, +.table > tfoot > tr > td { + padding: 8px; + line-height: 1.42857143; + vertical-align: top; + border-top: 1px solid #ddd; +} +.table > thead > tr > th { + vertical-align: bottom; + border-bottom: 2px solid #ddd; +} +.table > caption + thead > tr:first-child > th, +.table > colgroup + thead > tr:first-child > th, +.table > thead:first-child > tr:first-child > th, +.table > caption + thead > tr:first-child > td, +.table > colgroup + thead > tr:first-child > td, +.table > thead:first-child > tr:first-child > td { + border-top: 0; +} +.table > tbody + tbody { + border-top: 2px solid #ddd; +} +.table .table { + background-color: #fff; +} +.table-condensed > thead > tr > th, +.table-condensed > tbody > tr > th, +.table-condensed > tfoot > tr > th, +.table-condensed > thead > tr > td, +.table-condensed > tbody > tr > td, +.table-condensed > tfoot > tr > td { + padding: 5px; +} +.table-bordered { + border: 1px solid #ddd; +} +.table-bordered > thead > tr > th, +.table-bordered > tbody > tr > th, +.table-bordered > tfoot > tr > th, +.table-bordered > thead > tr > td, +.table-bordered > tbody > tr > td, +.table-bordered > tfoot > tr > td { + border: 1px solid #ddd; +} +.table-bordered > thead > tr > th, +.table-bordered > thead > tr > td { + border-bottom-width: 2px; +} +.table-striped > tbody > tr:nth-of-type(odd) { + background-color: #f9f9f9; +} +.table-hover > tbody > tr:hover { + background-color: #f5f5f5; +} +table col[class*="col-"] { + position: static; + display: table-column; + float: none; +} +table td[class*="col-"], +table th[class*="col-"] { + position: static; + display: table-cell; + float: none; +} +.table > thead > tr > td.active, +.table > tbody > tr > td.active, +.table > tfoot > tr > td.active, +.table > thead > tr > th.active, +.table > tbody > tr > th.active, +.table > tfoot > tr > th.active, +.table > thead > tr.active > td, +.table > tbody > tr.active > td, +.table > tfoot > tr.active > td, +.table > thead > tr.active > th, +.table > tbody > tr.active > th, +.table > tfoot > tr.active > th { + background-color: #f5f5f5; +} +.table-hover > tbody > tr > td.active:hover, +.table-hover > tbody > tr > th.active:hover, +.table-hover > tbody > tr.active:hover > td, +.table-hover > tbody > tr:hover > .active, +.table-hover > tbody > tr.active:hover > th { + background-color: #e8e8e8; +} +.table > thead > tr > td.success, +.table > tbody > tr > td.success, +.table > tfoot > tr > td.success, +.table > thead > tr > th.success, +.table > tbody > tr > th.success, +.table > tfoot > tr > th.success, +.table > thead > tr.success > td, +.table > tbody > tr.success > td, +.table > tfoot > tr.success > td, +.table > thead > tr.success > th, +.table > tbody > tr.success > th, +.table > tfoot > tr.success > th { + background-color: #dff0d8; +} +.table-hover > tbody > tr > td.success:hover, +.table-hover > tbody > tr > th.success:hover, +.table-hover > tbody > tr.success:hover > td, +.table-hover > tbody > tr:hover > .success, +.table-hover > tbody > tr.success:hover > th { + background-color: #d0e9c6; +} +.table > thead > tr > td.info, +.table > tbody > tr > td.info, +.table > tfoot > tr > td.info, +.table > thead > tr > th.info, +.table > tbody > tr > th.info, +.table > tfoot > tr > th.info, +.table > thead > tr.info > td, +.table > tbody > tr.info > td, +.table > tfoot > tr.info > td, +.table > thead > tr.info > th, +.table > tbody > tr.info > th, +.table > tfoot > tr.info > th { + background-color: #d9edf7; +} +.table-hover > tbody > tr > td.info:hover, +.table-hover > tbody > tr > th.info:hover, +.table-hover > tbody > tr.info:hover > td, +.table-hover > tbody > tr:hover > .info, +.table-hover > tbody > tr.info:hover > th { + background-color: #c4e3f3; +} +.table > thead > tr > td.warning, +.table > tbody > tr > td.warning, +.table > tfoot > tr > td.warning, +.table > thead > tr > th.warning, +.table > tbody > tr > th.warning, +.table > tfoot > tr > th.warning, +.table > thead > tr.warning > td, +.table > tbody > tr.warning > td, +.table > tfoot > tr.warning > td, +.table > thead > tr.warning > th, +.table > tbody > tr.warning > th, +.table > tfoot > tr.warning > th { + background-color: #fcf8e3; +} +.table-hover > tbody > tr > td.warning:hover, +.table-hover > tbody > tr > th.warning:hover, +.table-hover > tbody > tr.warning:hover > td, +.table-hover > tbody > tr:hover > .warning, +.table-hover > tbody > tr.warning:hover > th { + background-color: #faf2cc; +} +.table > thead > tr > td.danger, +.table > tbody > tr > td.danger, +.table > tfoot > tr > td.danger, +.table > thead > tr > th.danger, +.table > tbody > tr > th.danger, +.table > tfoot > tr > th.danger, +.table > thead > tr.danger > td, +.table > tbody > tr.danger > td, +.table > tfoot > tr.danger > td, +.table > thead > tr.danger > th, +.table > tbody > tr.danger > th, +.table > tfoot > tr.danger > th { + background-color: #f2dede; +} +.table-hover > tbody > tr > td.danger:hover, +.table-hover > tbody > tr > th.danger:hover, +.table-hover > tbody > tr.danger:hover > td, +.table-hover > tbody > tr:hover > .danger, +.table-hover > tbody > tr.danger:hover > th { + background-color: #ebcccc; +} +.table-responsive { + min-height: .01%; + overflow-x: auto; +} +@media screen and (max-width: 767px) { + .table-responsive { + width: 100%; + margin-bottom: 15px; + overflow-y: hidden; + -ms-overflow-style: -ms-autohiding-scrollbar; + border: 1px solid #ddd; + } + .table-responsive > .table { + margin-bottom: 0; + } + .table-responsive > .table > thead > tr > th, + .table-responsive > .table > tbody > tr > th, + .table-responsive > .table > tfoot > tr > th, + .table-responsive > .table > thead > tr > td, + .table-responsive > .table > tbody > tr > td, + .table-responsive > .table > tfoot > tr > td { + white-space: nowrap; + } + .table-responsive > .table-bordered { + border: 0; + } + .table-responsive > .table-bordered > thead > tr > th:first-child, + .table-responsive > .table-bordered > tbody > tr > th:first-child, + .table-responsive > .table-bordered > tfoot > tr > th:first-child, + .table-responsive > .table-bordered > thead > tr > td:first-child, + .table-responsive > .table-bordered > tbody > tr > td:first-child, + .table-responsive > .table-bordered > tfoot > tr > td:first-child { + border-left: 0; + } + .table-responsive > .table-bordered > thead > tr > th:last-child, + .table-responsive > .table-bordered > tbody > tr > th:last-child, + .table-responsive > .table-bordered > tfoot > tr > th:last-child, + .table-responsive > .table-bordered > thead > tr > td:last-child, + .table-responsive > .table-bordered > tbody > tr > td:last-child, + .table-responsive > .table-bordered > tfoot > tr > td:last-child { + border-right: 0; + } + .table-responsive > .table-bordered > tbody > tr:last-child > th, + .table-responsive > .table-bordered > tfoot > tr:last-child > th, + .table-responsive > .table-bordered > tbody > tr:last-child > td, + .table-responsive > .table-bordered > tfoot > tr:last-child > td { + border-bottom: 0; + } +} +fieldset { + min-width: 0; + padding: 0; + margin: 0; + border: 0; +} +legend { + display: block; + width: 100%; + padding: 0; + margin-bottom: 20px; + font-size: 21px; + line-height: inherit; + color: #333; + border: 0; + border-bottom: 1px solid #e5e5e5; +} +label { + display: inline-block; + max-width: 100%; + margin-bottom: 5px; + font-weight: bold; +} +input[type="search"] { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +input[type="radio"], +input[type="checkbox"] { + margin: 4px 0 0; + margin-top: 1px \9; + line-height: normal; +} +input[type="file"] { + display: block; +} +input[type="range"] { + display: block; + width: 100%; +} +select[multiple], +select[size] { + height: auto; +} +input[type="file"]:focus, +input[type="radio"]:focus, +input[type="checkbox"]:focus { + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +output { + display: block; + padding-top: 7px; + font-size: 14px; + line-height: 1.42857143; + color: #555; +} +.form-control { + display: block; + width: 100%; + height: 34px; + padding: 6px 12px; + font-size: 14px; + line-height: 1.42857143; + color: #555; + background-color: #fff; + background-image: none; + border: 1px solid #ccc; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); + -webkit-transition: border-color ease-in-out .15s, -webkit-box-shadow ease-in-out .15s; + -o-transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s; + transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s; +} +.form-control:focus { + border-color: #66afe9; + outline: 0; + -webkit-box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(102, 175, 233, .6); + box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(102, 175, 233, .6); +} +.form-control::-moz-placeholder { + color: #999; + opacity: 1; +} +.form-control:-ms-input-placeholder { + color: #999; +} +.form-control::-webkit-input-placeholder { + color: #999; +} +.form-control::-ms-expand { + background-color: transparent; + border: 0; +} +.form-control[disabled], +.form-control[readonly], +fieldset[disabled] .form-control { + background-color: #eee; + opacity: 1; +} +.form-control[disabled], +fieldset[disabled] .form-control { + cursor: not-allowed; +} +textarea.form-control { + height: auto; +} +input[type="search"] { + -webkit-appearance: none; +} +@media screen and (-webkit-min-device-pixel-ratio: 0) { + input[type="date"].form-control, + input[type="time"].form-control, + input[type="datetime-local"].form-control, + input[type="month"].form-control { + line-height: 34px; + } + input[type="date"].input-sm, + input[type="time"].input-sm, + input[type="datetime-local"].input-sm, + input[type="month"].input-sm, + .input-group-sm input[type="date"], + .input-group-sm input[type="time"], + .input-group-sm input[type="datetime-local"], + .input-group-sm input[type="month"] { + line-height: 30px; + } + input[type="date"].input-lg, + input[type="time"].input-lg, + input[type="datetime-local"].input-lg, + input[type="month"].input-lg, + .input-group-lg input[type="date"], + .input-group-lg input[type="time"], + .input-group-lg input[type="datetime-local"], + .input-group-lg input[type="month"] { + line-height: 46px; + } +} +.form-group { + margin-bottom: 15px; +} +.radio, +.checkbox { + position: relative; + display: block; + margin-top: 10px; + margin-bottom: 10px; +} +.radio label, +.checkbox label { + min-height: 20px; + padding-left: 20px; + margin-bottom: 0; + font-weight: normal; + cursor: pointer; +} +.radio input[type="radio"], +.radio-inline input[type="radio"], +.checkbox input[type="checkbox"], +.checkbox-inline input[type="checkbox"] { + position: absolute; + margin-top: 4px \9; + margin-left: -20px; +} +.radio + .radio, +.checkbox + .checkbox { + margin-top: -5px; +} +.radio-inline, +.checkbox-inline { + position: relative; + display: inline-block; + padding-left: 20px; + margin-bottom: 0; + font-weight: normal; + vertical-align: middle; + cursor: pointer; +} +.radio-inline + .radio-inline, +.checkbox-inline + .checkbox-inline { + margin-top: 0; + margin-left: 10px; +} +input[type="radio"][disabled], +input[type="checkbox"][disabled], +input[type="radio"].disabled, +input[type="checkbox"].disabled, +fieldset[disabled] input[type="radio"], +fieldset[disabled] input[type="checkbox"] { + cursor: not-allowed; +} +.radio-inline.disabled, +.checkbox-inline.disabled, +fieldset[disabled] .radio-inline, +fieldset[disabled] .checkbox-inline { + cursor: not-allowed; +} +.radio.disabled label, +.checkbox.disabled label, +fieldset[disabled] .radio label, +fieldset[disabled] .checkbox label { + cursor: not-allowed; +} +.form-control-static { + min-height: 34px; + padding-top: 7px; + padding-bottom: 7px; + margin-bottom: 0; +} +.form-control-static.input-lg, +.form-control-static.input-sm { + padding-right: 0; + padding-left: 0; +} +.input-sm { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +select.input-sm { + height: 30px; + line-height: 30px; +} +textarea.input-sm, +select[multiple].input-sm { + height: auto; +} +.form-group-sm .form-control { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.form-group-sm select.form-control { + height: 30px; + line-height: 30px; +} +.form-group-sm textarea.form-control, +.form-group-sm select[multiple].form-control { + height: auto; +} +.form-group-sm .form-control-static { + height: 30px; + min-height: 32px; + padding: 6px 10px; + font-size: 12px; + line-height: 1.5; +} +.input-lg { + height: 46px; + padding: 10px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +select.input-lg { + height: 46px; + line-height: 46px; +} +textarea.input-lg, +select[multiple].input-lg { + height: auto; +} +.form-group-lg .form-control { + height: 46px; + padding: 10px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +.form-group-lg select.form-control { + height: 46px; + line-height: 46px; +} +.form-group-lg textarea.form-control, +.form-group-lg select[multiple].form-control { + height: auto; +} +.form-group-lg .form-control-static { + height: 46px; + min-height: 38px; + padding: 11px 16px; + font-size: 18px; + line-height: 1.3333333; +} +.has-feedback { + position: relative; +} +.has-feedback .form-control { + padding-right: 42.5px; +} +.form-control-feedback { + position: absolute; + top: 0; + right: 0; + z-index: 2; + display: block; + width: 34px; + height: 34px; + line-height: 34px; + text-align: center; + pointer-events: none; +} +.input-lg + .form-control-feedback, +.input-group-lg + .form-control-feedback, +.form-group-lg .form-control + .form-control-feedback { + width: 46px; + height: 46px; + line-height: 46px; +} +.input-sm + .form-control-feedback, +.input-group-sm + .form-control-feedback, +.form-group-sm .form-control + .form-control-feedback { + width: 30px; + height: 30px; + line-height: 30px; +} +.has-success .help-block, +.has-success .control-label, +.has-success .radio, +.has-success .checkbox, +.has-success .radio-inline, +.has-success .checkbox-inline, +.has-success.radio label, +.has-success.checkbox label, +.has-success.radio-inline label, +.has-success.checkbox-inline label { + color: #3c763d; +} +.has-success .form-control { + border-color: #3c763d; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); +} +.has-success .form-control:focus { + border-color: #2b542c; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075), 0 0 6px #67b168; + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075), 0 0 6px #67b168; +} +.has-success .input-group-addon { + color: #3c763d; + background-color: #dff0d8; + border-color: #3c763d; +} +.has-success .form-control-feedback { + color: #3c763d; +} +.has-warning .help-block, +.has-warning .control-label, +.has-warning .radio, +.has-warning .checkbox, +.has-warning .radio-inline, +.has-warning .checkbox-inline, +.has-warning.radio label, +.has-warning.checkbox label, +.has-warning.radio-inline label, +.has-warning.checkbox-inline label { + color: #8a6d3b; +} +.has-warning .form-control { + border-color: #8a6d3b; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); +} +.has-warning .form-control:focus { + border-color: #66512c; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075), 0 0 6px #c0a16b; + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075), 0 0 6px #c0a16b; +} +.has-warning .input-group-addon { + color: #8a6d3b; + background-color: #fcf8e3; + border-color: #8a6d3b; +} +.has-warning .form-control-feedback { + color: #8a6d3b; +} +.has-error .help-block, +.has-error .control-label, +.has-error .radio, +.has-error .checkbox, +.has-error .radio-inline, +.has-error .checkbox-inline, +.has-error.radio label, +.has-error.checkbox label, +.has-error.radio-inline label, +.has-error.checkbox-inline label { + color: #a94442; +} +.has-error .form-control { + border-color: #a94442; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075); +} +.has-error .form-control:focus { + border-color: #843534; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075), 0 0 6px #ce8483; + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .075), 0 0 6px #ce8483; +} +.has-error .input-group-addon { + color: #a94442; + background-color: #f2dede; + border-color: #a94442; +} +.has-error .form-control-feedback { + color: #a94442; +} +.has-feedback label ~ .form-control-feedback { + top: 25px; +} +.has-feedback label.sr-only ~ .form-control-feedback { + top: 0; +} +.help-block { + display: block; + margin-top: 5px; + margin-bottom: 10px; + color: #737373; +} +@media (min-width: 768px) { + .form-inline .form-group { + display: inline-block; + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .form-control { + display: inline-block; + width: auto; + vertical-align: middle; + } + .form-inline .form-control-static { + display: inline-block; + } + .form-inline .input-group { + display: inline-table; + vertical-align: middle; + } + .form-inline .input-group .input-group-addon, + .form-inline .input-group .input-group-btn, + .form-inline .input-group .form-control { + width: auto; + } + .form-inline .input-group > .form-control { + width: 100%; + } + .form-inline .control-label { + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .radio, + .form-inline .checkbox { + display: inline-block; + margin-top: 0; + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .radio label, + .form-inline .checkbox label { + padding-left: 0; + } + .form-inline .radio input[type="radio"], + .form-inline .checkbox input[type="checkbox"] { + position: relative; + margin-left: 0; + } + .form-inline .has-feedback .form-control-feedback { + top: 0; + } +} +.form-horizontal .radio, +.form-horizontal .checkbox, +.form-horizontal .radio-inline, +.form-horizontal .checkbox-inline { + padding-top: 7px; + margin-top: 0; + margin-bottom: 0; +} +.form-horizontal .radio, +.form-horizontal .checkbox { + min-height: 27px; +} +.form-horizontal .form-group { + margin-right: -15px; + margin-left: -15px; +} +@media (min-width: 768px) { + .form-horizontal .control-label { + padding-top: 7px; + margin-bottom: 0; + text-align: right; + } +} +.form-horizontal .has-feedback .form-control-feedback { + right: 15px; +} +@media (min-width: 768px) { + .form-horizontal .form-group-lg .control-label { + padding-top: 11px; + font-size: 18px; + } +} +@media (min-width: 768px) { + .form-horizontal .form-group-sm .control-label { + padding-top: 6px; + font-size: 12px; + } +} +.btn { + display: inline-block; + padding: 6px 12px; + margin-bottom: 0; + font-size: 14px; + font-weight: normal; + line-height: 1.42857143; + text-align: center; + white-space: nowrap; + vertical-align: middle; + -ms-touch-action: manipulation; + touch-action: manipulation; + cursor: pointer; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; + background-image: none; + border: 1px solid transparent; + border-radius: 4px; +} +.btn:focus, +.btn:active:focus, +.btn.active:focus, +.btn.focus, +.btn:active.focus, +.btn.active.focus { + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +.btn:hover, +.btn:focus, +.btn.focus { + color: #333; + text-decoration: none; +} +.btn:active, +.btn.active { + background-image: none; + outline: 0; + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, .125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, .125); +} +.btn.disabled, +.btn[disabled], +fieldset[disabled] .btn { + cursor: not-allowed; + filter: alpha(opacity=65); + -webkit-box-shadow: none; + box-shadow: none; + opacity: .65; +} +a.btn.disabled, +fieldset[disabled] a.btn { + pointer-events: none; +} +.btn-default { + color: #333; + background-color: #fff; + border-color: #ccc; +} +.btn-default:focus, +.btn-default.focus { + color: #333; + background-color: #e6e6e6; + border-color: #8c8c8c; +} +.btn-default:hover { + color: #333; + background-color: #e6e6e6; + border-color: #adadad; +} +.btn-default:active, +.btn-default.active, +.open > .dropdown-toggle.btn-default { + color: #333; + background-color: #e6e6e6; + border-color: #adadad; +} +.btn-default:active:hover, +.btn-default.active:hover, +.open > .dropdown-toggle.btn-default:hover, +.btn-default:active:focus, +.btn-default.active:focus, +.open > .dropdown-toggle.btn-default:focus, +.btn-default:active.focus, +.btn-default.active.focus, +.open > .dropdown-toggle.btn-default.focus { + color: #333; + background-color: #d4d4d4; + border-color: #8c8c8c; +} +.btn-default:active, +.btn-default.active, +.open > .dropdown-toggle.btn-default { + background-image: none; +} +.btn-default.disabled:hover, +.btn-default[disabled]:hover, +fieldset[disabled] .btn-default:hover, +.btn-default.disabled:focus, +.btn-default[disabled]:focus, +fieldset[disabled] .btn-default:focus, +.btn-default.disabled.focus, +.btn-default[disabled].focus, +fieldset[disabled] .btn-default.focus { + background-color: #fff; + border-color: #ccc; +} +.btn-default .badge { + color: #fff; + background-color: #333; +} +.btn-primary { + color: #fff; + background-color: #337ab7; + border-color: #2e6da4; +} +.btn-primary:focus, +.btn-primary.focus { + color: #fff; + background-color: #286090; + border-color: #122b40; +} +.btn-primary:hover { + color: #fff; + background-color: #286090; + border-color: #204d74; +} +.btn-primary:active, +.btn-primary.active, +.open > .dropdown-toggle.btn-primary { + color: #fff; + background-color: #286090; + border-color: #204d74; +} +.btn-primary:active:hover, +.btn-primary.active:hover, +.open > .dropdown-toggle.btn-primary:hover, +.btn-primary:active:focus, +.btn-primary.active:focus, +.open > .dropdown-toggle.btn-primary:focus, +.btn-primary:active.focus, +.btn-primary.active.focus, +.open > .dropdown-toggle.btn-primary.focus { + color: #fff; + background-color: #204d74; + border-color: #122b40; +} +.btn-primary:active, +.btn-primary.active, +.open > .dropdown-toggle.btn-primary { + background-image: none; +} +.btn-primary.disabled:hover, +.btn-primary[disabled]:hover, +fieldset[disabled] .btn-primary:hover, +.btn-primary.disabled:focus, +.btn-primary[disabled]:focus, +fieldset[disabled] .btn-primary:focus, +.btn-primary.disabled.focus, +.btn-primary[disabled].focus, +fieldset[disabled] .btn-primary.focus { + background-color: #337ab7; + border-color: #2e6da4; +} +.btn-primary .badge { + color: #337ab7; + background-color: #fff; +} +.btn-success { + color: #fff; + background-color: #5cb85c; + border-color: #4cae4c; +} +.btn-success:focus, +.btn-success.focus { + color: #fff; + background-color: #449d44; + border-color: #255625; +} +.btn-success:hover { + color: #fff; + background-color: #449d44; + border-color: #398439; +} +.btn-success:active, +.btn-success.active, +.open > .dropdown-toggle.btn-success { + color: #fff; + background-color: #449d44; + border-color: #398439; +} +.btn-success:active:hover, +.btn-success.active:hover, +.open > .dropdown-toggle.btn-success:hover, +.btn-success:active:focus, +.btn-success.active:focus, +.open > .dropdown-toggle.btn-success:focus, +.btn-success:active.focus, +.btn-success.active.focus, +.open > .dropdown-toggle.btn-success.focus { + color: #fff; + background-color: #398439; + border-color: #255625; +} +.btn-success:active, +.btn-success.active, +.open > .dropdown-toggle.btn-success { + background-image: none; +} +.btn-success.disabled:hover, +.btn-success[disabled]:hover, +fieldset[disabled] .btn-success:hover, +.btn-success.disabled:focus, +.btn-success[disabled]:focus, +fieldset[disabled] .btn-success:focus, +.btn-success.disabled.focus, +.btn-success[disabled].focus, +fieldset[disabled] .btn-success.focus { + background-color: #5cb85c; + border-color: #4cae4c; +} +.btn-success .badge { + color: #5cb85c; + background-color: #fff; +} +.btn-info { + color: #fff; + background-color: #5bc0de; + border-color: #46b8da; +} +.btn-info:focus, +.btn-info.focus { + color: #fff; + background-color: #31b0d5; + border-color: #1b6d85; +} +.btn-info:hover { + color: #fff; + background-color: #31b0d5; + border-color: #269abc; +} +.btn-info:active, +.btn-info.active, +.open > .dropdown-toggle.btn-info { + color: #fff; + background-color: #31b0d5; + border-color: #269abc; +} +.btn-info:active:hover, +.btn-info.active:hover, +.open > .dropdown-toggle.btn-info:hover, +.btn-info:active:focus, +.btn-info.active:focus, +.open > .dropdown-toggle.btn-info:focus, +.btn-info:active.focus, +.btn-info.active.focus, +.open > .dropdown-toggle.btn-info.focus { + color: #fff; + background-color: #269abc; + border-color: #1b6d85; +} +.btn-info:active, +.btn-info.active, +.open > .dropdown-toggle.btn-info { + background-image: none; +} +.btn-info.disabled:hover, +.btn-info[disabled]:hover, +fieldset[disabled] .btn-info:hover, +.btn-info.disabled:focus, +.btn-info[disabled]:focus, +fieldset[disabled] .btn-info:focus, +.btn-info.disabled.focus, +.btn-info[disabled].focus, +fieldset[disabled] .btn-info.focus { + background-color: #5bc0de; + border-color: #46b8da; +} +.btn-info .badge { + color: #5bc0de; + background-color: #fff; +} +.btn-warning { + color: #fff; + background-color: #f0ad4e; + border-color: #eea236; +} +.btn-warning:focus, +.btn-warning.focus { + color: #fff; + background-color: #ec971f; + border-color: #985f0d; +} +.btn-warning:hover { + color: #fff; + background-color: #ec971f; + border-color: #d58512; +} +.btn-warning:active, +.btn-warning.active, +.open > .dropdown-toggle.btn-warning { + color: #fff; + background-color: #ec971f; + border-color: #d58512; +} +.btn-warning:active:hover, +.btn-warning.active:hover, +.open > .dropdown-toggle.btn-warning:hover, +.btn-warning:active:focus, +.btn-warning.active:focus, +.open > .dropdown-toggle.btn-warning:focus, +.btn-warning:active.focus, +.btn-warning.active.focus, +.open > .dropdown-toggle.btn-warning.focus { + color: #fff; + background-color: #d58512; + border-color: #985f0d; +} +.btn-warning:active, +.btn-warning.active, +.open > .dropdown-toggle.btn-warning { + background-image: none; +} +.btn-warning.disabled:hover, +.btn-warning[disabled]:hover, +fieldset[disabled] .btn-warning:hover, +.btn-warning.disabled:focus, +.btn-warning[disabled]:focus, +fieldset[disabled] .btn-warning:focus, +.btn-warning.disabled.focus, +.btn-warning[disabled].focus, +fieldset[disabled] .btn-warning.focus { + background-color: #f0ad4e; + border-color: #eea236; +} +.btn-warning .badge { + color: #f0ad4e; + background-color: #fff; +} +.btn-danger { + color: #fff; + background-color: #d9534f; + border-color: #d43f3a; +} +.btn-danger:focus, +.btn-danger.focus { + color: #fff; + background-color: #c9302c; + border-color: #761c19; +} +.btn-danger:hover { + color: #fff; + background-color: #c9302c; + border-color: #ac2925; +} +.btn-danger:active, +.btn-danger.active, +.open > .dropdown-toggle.btn-danger { + color: #fff; + background-color: #c9302c; + border-color: #ac2925; +} +.btn-danger:active:hover, +.btn-danger.active:hover, +.open > .dropdown-toggle.btn-danger:hover, +.btn-danger:active:focus, +.btn-danger.active:focus, +.open > .dropdown-toggle.btn-danger:focus, +.btn-danger:active.focus, +.btn-danger.active.focus, +.open > .dropdown-toggle.btn-danger.focus { + color: #fff; + background-color: #ac2925; + border-color: #761c19; +} +.btn-danger:active, +.btn-danger.active, +.open > .dropdown-toggle.btn-danger { + background-image: none; +} +.btn-danger.disabled:hover, +.btn-danger[disabled]:hover, +fieldset[disabled] .btn-danger:hover, +.btn-danger.disabled:focus, +.btn-danger[disabled]:focus, +fieldset[disabled] .btn-danger:focus, +.btn-danger.disabled.focus, +.btn-danger[disabled].focus, +fieldset[disabled] .btn-danger.focus { + background-color: #d9534f; + border-color: #d43f3a; +} +.btn-danger .badge { + color: #d9534f; + background-color: #fff; +} +.btn-link { + font-weight: normal; + color: #337ab7; + border-radius: 0; +} +.btn-link, +.btn-link:active, +.btn-link.active, +.btn-link[disabled], +fieldset[disabled] .btn-link { + background-color: transparent; + -webkit-box-shadow: none; + box-shadow: none; +} +.btn-link, +.btn-link:hover, +.btn-link:focus, +.btn-link:active { + border-color: transparent; +} +.btn-link:hover, +.btn-link:focus { + color: #23527c; + text-decoration: underline; + background-color: transparent; +} +.btn-link[disabled]:hover, +fieldset[disabled] .btn-link:hover, +.btn-link[disabled]:focus, +fieldset[disabled] .btn-link:focus { + color: #777; + text-decoration: none; +} +.btn-lg, +.btn-group-lg > .btn { + padding: 10px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +.btn-sm, +.btn-group-sm > .btn { + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-xs, +.btn-group-xs > .btn { + padding: 1px 5px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-block { + display: block; + width: 100%; +} +.btn-block + .btn-block { + margin-top: 5px; +} +input[type="submit"].btn-block, +input[type="reset"].btn-block, +input[type="button"].btn-block { + width: 100%; +} +.fade { + opacity: 0; + -webkit-transition: opacity .15s linear; + -o-transition: opacity .15s linear; + transition: opacity .15s linear; +} +.fade.in { + opacity: 1; +} +.collapse { + display: none; +} +.collapse.in { + display: block; +} +tr.collapse.in { + display: table-row; +} +tbody.collapse.in { + display: table-row-group; +} +.collapsing { + position: relative; + height: 0; + overflow: hidden; + -webkit-transition-timing-function: ease; + -o-transition-timing-function: ease; + transition-timing-function: ease; + -webkit-transition-duration: .35s; + -o-transition-duration: .35s; + transition-duration: .35s; + -webkit-transition-property: height, visibility; + -o-transition-property: height, visibility; + transition-property: height, visibility; +} +.caret { + display: inline-block; + width: 0; + height: 0; + margin-left: 2px; + vertical-align: middle; + border-top: 4px dashed; + border-top: 4px solid \9; + border-right: 4px solid transparent; + border-left: 4px solid transparent; +} +.dropup, +.dropdown { + position: relative; +} +.dropdown-toggle:focus { + outline: 0; +} +.dropdown-menu { + position: absolute; + top: 100%; + left: 0; + z-index: 1000; + display: none; + float: left; + min-width: 160px; + padding: 5px 0; + margin: 2px 0 0; + font-size: 14px; + text-align: left; + list-style: none; + background-color: #fff; + -webkit-background-clip: padding-box; + background-clip: padding-box; + border: 1px solid #ccc; + border: 1px solid rgba(0, 0, 0, .15); + border-radius: 4px; + -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, .175); + box-shadow: 0 6px 12px rgba(0, 0, 0, .175); +} +.dropdown-menu.pull-right { + right: 0; + left: auto; +} +.dropdown-menu .divider { + height: 1px; + margin: 9px 0; + overflow: hidden; + background-color: #e5e5e5; +} +.dropdown-menu > li > a { + display: block; + padding: 3px 20px; + clear: both; + font-weight: normal; + line-height: 1.42857143; + color: #333; + white-space: nowrap; +} +.dropdown-menu > li > a:hover, +.dropdown-menu > li > a:focus { + color: #262626; + text-decoration: none; + background-color: #f5f5f5; +} +.dropdown-menu > .active > a, +.dropdown-menu > .active > a:hover, +.dropdown-menu > .active > a:focus { + color: #fff; + text-decoration: none; + background-color: #337ab7; + outline: 0; +} +.dropdown-menu > .disabled > a, +.dropdown-menu > .disabled > a:hover, +.dropdown-menu > .disabled > a:focus { + color: #777; +} +.dropdown-menu > .disabled > a:hover, +.dropdown-menu > .disabled > a:focus { + text-decoration: none; + cursor: not-allowed; + background-color: transparent; + background-image: none; + filter: progid:DXImageTransform.Microsoft.gradient(enabled = false); +} +.open > .dropdown-menu { + display: block; +} +.open > a { + outline: 0; +} +.dropdown-menu-right { + right: 0; + left: auto; +} +.dropdown-menu-left { + right: auto; + left: 0; +} +.dropdown-header { + display: block; + padding: 3px 20px; + font-size: 12px; + line-height: 1.42857143; + color: #777; + white-space: nowrap; +} +.dropdown-backdrop { + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 990; +} +.pull-right > .dropdown-menu { + right: 0; + left: auto; +} +.dropup .caret, +.navbar-fixed-bottom .dropdown .caret { + content: ""; + border-top: 0; + border-bottom: 4px dashed; + border-bottom: 4px solid \9; +} +.dropup .dropdown-menu, +.navbar-fixed-bottom .dropdown .dropdown-menu { + top: auto; + bottom: 100%; + margin-bottom: 2px; +} +@media (min-width: 768px) { + .navbar-right .dropdown-menu { + right: 0; + left: auto; + } + .navbar-right .dropdown-menu-left { + right: auto; + left: 0; + } +} +.btn-group, +.btn-group-vertical { + position: relative; + display: inline-block; + vertical-align: middle; +} +.btn-group > .btn, +.btn-group-vertical > .btn { + position: relative; + float: left; +} +.btn-group > .btn:hover, +.btn-group-vertical > .btn:hover, +.btn-group > .btn:focus, +.btn-group-vertical > .btn:focus, +.btn-group > .btn:active, +.btn-group-vertical > .btn:active, +.btn-group > .btn.active, +.btn-group-vertical > .btn.active { + z-index: 2; +} +.btn-group .btn + .btn, +.btn-group .btn + .btn-group, +.btn-group .btn-group + .btn, +.btn-group .btn-group + .btn-group { + margin-left: -1px; +} +.btn-toolbar { + margin-left: -5px; +} +.btn-toolbar .btn, +.btn-toolbar .btn-group, +.btn-toolbar .input-group { + float: left; +} +.btn-toolbar > .btn, +.btn-toolbar > .btn-group, +.btn-toolbar > .input-group { + margin-left: 5px; +} +.btn-group > .btn:not(:first-child):not(:last-child):not(.dropdown-toggle) { + border-radius: 0; +} +.btn-group > .btn:first-child { + margin-left: 0; +} +.btn-group > .btn:first-child:not(:last-child):not(.dropdown-toggle) { + border-top-right-radius: 0; + border-bottom-right-radius: 0; +} +.btn-group > .btn:last-child:not(:first-child), +.btn-group > .dropdown-toggle:not(:first-child) { + border-top-left-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group > .btn-group { + float: left; +} +.btn-group > .btn-group:not(:first-child):not(:last-child) > .btn { + border-radius: 0; +} +.btn-group > .btn-group:first-child:not(:last-child) > .btn:last-child, +.btn-group > .btn-group:first-child:not(:last-child) > .dropdown-toggle { + border-top-right-radius: 0; + border-bottom-right-radius: 0; +} +.btn-group > .btn-group:last-child:not(:first-child) > .btn:first-child { + border-top-left-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group .dropdown-toggle:active, +.btn-group.open .dropdown-toggle { + outline: 0; +} +.btn-group > .btn + .dropdown-toggle { + padding-right: 8px; + padding-left: 8px; +} +.btn-group > .btn-lg + .dropdown-toggle { + padding-right: 12px; + padding-left: 12px; +} +.btn-group.open .dropdown-toggle { + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, .125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, .125); +} +.btn-group.open .dropdown-toggle.btn-link { + -webkit-box-shadow: none; + box-shadow: none; +} +.btn .caret { + margin-left: 0; +} +.btn-lg .caret { + border-width: 5px 5px 0; + border-bottom-width: 0; +} +.dropup .btn-lg .caret { + border-width: 0 5px 5px; +} +.btn-group-vertical > .btn, +.btn-group-vertical > .btn-group, +.btn-group-vertical > .btn-group > .btn { + display: block; + float: none; + width: 100%; + max-width: 100%; +} +.btn-group-vertical > .btn-group > .btn { + float: none; +} +.btn-group-vertical > .btn + .btn, +.btn-group-vertical > .btn + .btn-group, +.btn-group-vertical > .btn-group + .btn, +.btn-group-vertical > .btn-group + .btn-group { + margin-top: -1px; + margin-left: 0; +} +.btn-group-vertical > .btn:not(:first-child):not(:last-child) { + border-radius: 0; +} +.btn-group-vertical > .btn:first-child:not(:last-child) { + border-top-left-radius: 4px; + border-top-right-radius: 4px; + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group-vertical > .btn:last-child:not(:first-child) { + border-top-left-radius: 0; + border-top-right-radius: 0; + border-bottom-right-radius: 4px; + border-bottom-left-radius: 4px; +} +.btn-group-vertical > .btn-group:not(:first-child):not(:last-child) > .btn { + border-radius: 0; +} +.btn-group-vertical > .btn-group:first-child:not(:last-child) > .btn:last-child, +.btn-group-vertical > .btn-group:first-child:not(:last-child) > .dropdown-toggle { + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group-vertical > .btn-group:last-child:not(:first-child) > .btn:first-child { + border-top-left-radius: 0; + border-top-right-radius: 0; +} +.btn-group-justified { + display: table; + width: 100%; + table-layout: fixed; + border-collapse: separate; +} +.btn-group-justified > .btn, +.btn-group-justified > .btn-group { + display: table-cell; + float: none; + width: 1%; +} +.btn-group-justified > .btn-group .btn { + width: 100%; +} +.btn-group-justified > .btn-group .dropdown-menu { + left: auto; +} +[data-toggle="buttons"] > .btn input[type="radio"], +[data-toggle="buttons"] > .btn-group > .btn input[type="radio"], +[data-toggle="buttons"] > .btn input[type="checkbox"], +[data-toggle="buttons"] > .btn-group > .btn input[type="checkbox"] { + position: absolute; + clip: rect(0, 0, 0, 0); + pointer-events: none; +} +.input-group { + position: relative; + display: table; + border-collapse: separate; +} +.input-group[class*="col-"] { + float: none; + padding-right: 0; + padding-left: 0; +} +.input-group .form-control { + position: relative; + z-index: 2; + float: left; + width: 100%; + margin-bottom: 0; +} +.input-group .form-control:focus { + z-index: 3; +} +.input-group-lg > .form-control, +.input-group-lg > .input-group-addon, +.input-group-lg > .input-group-btn > .btn { + height: 46px; + padding: 10px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +select.input-group-lg > .form-control, +select.input-group-lg > .input-group-addon, +select.input-group-lg > .input-group-btn > .btn { + height: 46px; + line-height: 46px; +} +textarea.input-group-lg > .form-control, +textarea.input-group-lg > .input-group-addon, +textarea.input-group-lg > .input-group-btn > .btn, +select[multiple].input-group-lg > .form-control, +select[multiple].input-group-lg > .input-group-addon, +select[multiple].input-group-lg > .input-group-btn > .btn { + height: auto; +} +.input-group-sm > .form-control, +.input-group-sm > .input-group-addon, +.input-group-sm > .input-group-btn > .btn { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +select.input-group-sm > .form-control, +select.input-group-sm > .input-group-addon, +select.input-group-sm > .input-group-btn > .btn { + height: 30px; + line-height: 30px; +} +textarea.input-group-sm > .form-control, +textarea.input-group-sm > .input-group-addon, +textarea.input-group-sm > .input-group-btn > .btn, +select[multiple].input-group-sm > .form-control, +select[multiple].input-group-sm > .input-group-addon, +select[multiple].input-group-sm > .input-group-btn > .btn { + height: auto; +} +.input-group-addon, +.input-group-btn, +.input-group .form-control { + display: table-cell; +} +.input-group-addon:not(:first-child):not(:last-child), +.input-group-btn:not(:first-child):not(:last-child), +.input-group .form-control:not(:first-child):not(:last-child) { + border-radius: 0; +} +.input-group-addon, +.input-group-btn { + width: 1%; + white-space: nowrap; + vertical-align: middle; +} +.input-group-addon { + padding: 6px 12px; + font-size: 14px; + font-weight: normal; + line-height: 1; + color: #555; + text-align: center; + background-color: #eee; + border: 1px solid #ccc; + border-radius: 4px; +} +.input-group-addon.input-sm { + padding: 5px 10px; + font-size: 12px; + border-radius: 3px; +} +.input-group-addon.input-lg { + padding: 10px 16px; + font-size: 18px; + border-radius: 6px; +} +.input-group-addon input[type="radio"], +.input-group-addon input[type="checkbox"] { + margin-top: 0; +} +.input-group .form-control:first-child, +.input-group-addon:first-child, +.input-group-btn:first-child > .btn, +.input-group-btn:first-child > .btn-group > .btn, +.input-group-btn:first-child > .dropdown-toggle, +.input-group-btn:last-child > .btn:not(:last-child):not(.dropdown-toggle), +.input-group-btn:last-child > .btn-group:not(:last-child) > .btn { + border-top-right-radius: 0; + border-bottom-right-radius: 0; +} +.input-group-addon:first-child { + border-right: 0; +} +.input-group .form-control:last-child, +.input-group-addon:last-child, +.input-group-btn:last-child > .btn, +.input-group-btn:last-child > .btn-group > .btn, +.input-group-btn:last-child > .dropdown-toggle, +.input-group-btn:first-child > .btn:not(:first-child), +.input-group-btn:first-child > .btn-group:not(:first-child) > .btn { + border-top-left-radius: 0; + border-bottom-left-radius: 0; +} +.input-group-addon:last-child { + border-left: 0; +} +.input-group-btn { + position: relative; + font-size: 0; + white-space: nowrap; +} +.input-group-btn > .btn { + position: relative; +} +.input-group-btn > .btn + .btn { + margin-left: -1px; +} +.input-group-btn > .btn:hover, +.input-group-btn > .btn:focus, +.input-group-btn > .btn:active { + z-index: 2; +} +.input-group-btn:first-child > .btn, +.input-group-btn:first-child > .btn-group { + margin-right: -1px; +} +.input-group-btn:last-child > .btn, +.input-group-btn:last-child > .btn-group { + z-index: 2; + margin-left: -1px; +} +.nav { + padding-left: 0; + margin-bottom: 0; + list-style: none; +} +.nav > li { + position: relative; + display: block; +} +.nav > li > a { + position: relative; + display: block; + padding: 10px 15px; +} +.nav > li > a:hover, +.nav > li > a:focus { + text-decoration: none; + background-color: #eee; +} +.nav > li.disabled > a { + color: #777; +} +.nav > li.disabled > a:hover, +.nav > li.disabled > a:focus { + color: #777; + text-decoration: none; + cursor: not-allowed; + background-color: transparent; +} +.nav .open > a, +.nav .open > a:hover, +.nav .open > a:focus { + background-color: #eee; + border-color: #337ab7; +} +.nav .nav-divider { + height: 1px; + margin: 9px 0; + overflow: hidden; + background-color: #e5e5e5; +} +.nav > li > a > img { + max-width: none; +} +.nav-tabs { + border-bottom: 1px solid #ddd; +} +.nav-tabs > li { + float: left; + margin-bottom: -1px; +} +.nav-tabs > li > a { + margin-right: 2px; + line-height: 1.42857143; + border: 1px solid transparent; + border-radius: 4px 4px 0 0; +} +.nav-tabs > li > a:hover { + border-color: #eee #eee #ddd; +} +.nav-tabs > li.active > a, +.nav-tabs > li.active > a:hover, +.nav-tabs > li.active > a:focus { + color: #555; + cursor: default; + background-color: #fff; + border: 1px solid #ddd; + border-bottom-color: transparent; +} +.nav-tabs.nav-justified { + width: 100%; + border-bottom: 0; +} +.nav-tabs.nav-justified > li { + float: none; +} +.nav-tabs.nav-justified > li > a { + margin-bottom: 5px; + text-align: center; +} +.nav-tabs.nav-justified > .dropdown .dropdown-menu { + top: auto; + left: auto; +} +@media (min-width: 768px) { + .nav-tabs.nav-justified > li { + display: table-cell; + width: 1%; + } + .nav-tabs.nav-justified > li > a { + margin-bottom: 0; + } +} +.nav-tabs.nav-justified > li > a { + margin-right: 0; + border-radius: 4px; +} +.nav-tabs.nav-justified > .active > a, +.nav-tabs.nav-justified > .active > a:hover, +.nav-tabs.nav-justified > .active > a:focus { + border: 1px solid #ddd; +} +@media (min-width: 768px) { + .nav-tabs.nav-justified > li > a { + border-bottom: 1px solid #ddd; + border-radius: 4px 4px 0 0; + } + .nav-tabs.nav-justified > .active > a, + .nav-tabs.nav-justified > .active > a:hover, + .nav-tabs.nav-justified > .active > a:focus { + border-bottom-color: #fff; + } +} +.nav-pills > li { + float: left; +} +.nav-pills > li > a { + border-radius: 4px; +} +.nav-pills > li + li { + margin-left: 2px; +} +.nav-pills > li.active > a, +.nav-pills > li.active > a:hover, +.nav-pills > li.active > a:focus { + color: #fff; + background-color: #337ab7; +} +.nav-stacked > li { + float: none; +} +.nav-stacked > li + li { + margin-top: 2px; + margin-left: 0; +} +.nav-justified { + width: 100%; +} +.nav-justified > li { + float: none; +} +.nav-justified > li > a { + margin-bottom: 5px; + text-align: center; +} +.nav-justified > .dropdown .dropdown-menu { + top: auto; + left: auto; +} +@media (min-width: 768px) { + .nav-justified > li { + display: table-cell; + width: 1%; + } + .nav-justified > li > a { + margin-bottom: 0; + } +} +.nav-tabs-justified { + border-bottom: 0; +} +.nav-tabs-justified > li > a { + margin-right: 0; + border-radius: 4px; +} +.nav-tabs-justified > .active > a, +.nav-tabs-justified > .active > a:hover, +.nav-tabs-justified > .active > a:focus { + border: 1px solid #ddd; +} +@media (min-width: 768px) { + .nav-tabs-justified > li > a { + border-bottom: 1px solid #ddd; + border-radius: 4px 4px 0 0; + } + .nav-tabs-justified > .active > a, + .nav-tabs-justified > .active > a:hover, + .nav-tabs-justified > .active > a:focus { + border-bottom-color: #fff; + } +} +.tab-content > .tab-pane { + display: none; +} +.tab-content > .active { + display: block; +} +.nav-tabs .dropdown-menu { + margin-top: -1px; + border-top-left-radius: 0; + border-top-right-radius: 0; +} +.navbar { + position: relative; + min-height: 50px; + margin-bottom: 20px; + border: 1px solid transparent; +} +@media (min-width: 768px) { + .navbar { + border-radius: 4px; + } +} +@media (min-width: 768px) { + .navbar-header { + float: left; + } +} +.navbar-collapse { + padding-right: 15px; + padding-left: 15px; + overflow-x: visible; + -webkit-overflow-scrolling: touch; + border-top: 1px solid transparent; + -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, .1); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, .1); +} +.navbar-collapse.in { + overflow-y: auto; +} +@media (min-width: 768px) { + .navbar-collapse { + width: auto; + border-top: 0; + -webkit-box-shadow: none; + box-shadow: none; + } + .navbar-collapse.collapse { + display: block !important; + height: auto !important; + padding-bottom: 0; + overflow: visible !important; + } + .navbar-collapse.in { + overflow-y: visible; + } + .navbar-fixed-top .navbar-collapse, + .navbar-static-top .navbar-collapse, + .navbar-fixed-bottom .navbar-collapse { + padding-right: 0; + padding-left: 0; + } +} +.navbar-fixed-top .navbar-collapse, +.navbar-fixed-bottom .navbar-collapse { + max-height: 340px; +} +@media (max-device-width: 480px) and (orientation: landscape) { + .navbar-fixed-top .navbar-collapse, + .navbar-fixed-bottom .navbar-collapse { + max-height: 200px; + } +} +.container > .navbar-header, +.container-fluid > .navbar-header, +.container > .navbar-collapse, +.container-fluid > .navbar-collapse { + margin-right: -15px; + margin-left: -15px; +} +@media (min-width: 768px) { + .container > .navbar-header, + .container-fluid > .navbar-header, + .container > .navbar-collapse, + .container-fluid > .navbar-collapse { + margin-right: 0; + margin-left: 0; + } +} +.navbar-static-top { + z-index: 1000; + border-width: 0 0 1px; +} +@media (min-width: 768px) { + .navbar-static-top { + border-radius: 0; + } +} +.navbar-fixed-top, +.navbar-fixed-bottom { + position: fixed; + right: 0; + left: 0; + z-index: 1030; +} +@media (min-width: 768px) { + .navbar-fixed-top, + .navbar-fixed-bottom { + border-radius: 0; + } +} +.navbar-fixed-top { + top: 0; + border-width: 0 0 1px; +} +.navbar-fixed-bottom { + bottom: 0; + margin-bottom: 0; + border-width: 1px 0 0; +} +.navbar-brand { + float: left; + height: 50px; + padding: 15px 15px; + font-size: 18px; + line-height: 20px; +} +.navbar-brand:hover, +.navbar-brand:focus { + text-decoration: none; +} +.navbar-brand > img { + display: block; +} +@media (min-width: 768px) { + .navbar > .container .navbar-brand, + .navbar > .container-fluid .navbar-brand { + margin-left: -15px; + } +} +.navbar-toggle { + position: relative; + float: right; + padding: 9px 10px; + margin-top: 8px; + margin-right: 15px; + margin-bottom: 8px; + background-color: transparent; + background-image: none; + border: 1px solid transparent; + border-radius: 4px; +} +.navbar-toggle:focus { + outline: 0; +} +.navbar-toggle .icon-bar { + display: block; + width: 22px; + height: 2px; + border-radius: 1px; +} +.navbar-toggle .icon-bar + .icon-bar { + margin-top: 4px; +} +@media (min-width: 768px) { + .navbar-toggle { + display: none; + } +} +.navbar-nav { + margin: 7.5px -15px; +} +.navbar-nav > li > a { + padding-top: 10px; + padding-bottom: 10px; + line-height: 20px; +} +@media (max-width: 767px) { + .navbar-nav .open .dropdown-menu { + position: static; + float: none; + width: auto; + margin-top: 0; + background-color: transparent; + border: 0; + -webkit-box-shadow: none; + box-shadow: none; + } + .navbar-nav .open .dropdown-menu > li > a, + .navbar-nav .open .dropdown-menu .dropdown-header { + padding: 5px 15px 5px 25px; + } + .navbar-nav .open .dropdown-menu > li > a { + line-height: 20px; + } + .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-nav .open .dropdown-menu > li > a:focus { + background-image: none; + } +} +@media (min-width: 768px) { + .navbar-nav { + float: left; + margin: 0; + } + .navbar-nav > li { + float: left; + } + .navbar-nav > li > a { + padding-top: 15px; + padding-bottom: 15px; + } +} +.navbar-form { + padding: 10px 15px; + margin-top: 8px; + margin-right: -15px; + margin-bottom: 8px; + margin-left: -15px; + border-top: 1px solid transparent; + border-bottom: 1px solid transparent; + -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, .1), 0 1px 0 rgba(255, 255, 255, .1); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, .1), 0 1px 0 rgba(255, 255, 255, .1); +} +@media (min-width: 768px) { + .navbar-form .form-group { + display: inline-block; + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .form-control { + display: inline-block; + width: auto; + vertical-align: middle; + } + .navbar-form .form-control-static { + display: inline-block; + } + .navbar-form .input-group { + display: inline-table; + vertical-align: middle; + } + .navbar-form .input-group .input-group-addon, + .navbar-form .input-group .input-group-btn, + .navbar-form .input-group .form-control { + width: auto; + } + .navbar-form .input-group > .form-control { + width: 100%; + } + .navbar-form .control-label { + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .radio, + .navbar-form .checkbox { + display: inline-block; + margin-top: 0; + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .radio label, + .navbar-form .checkbox label { + padding-left: 0; + } + .navbar-form .radio input[type="radio"], + .navbar-form .checkbox input[type="checkbox"] { + position: relative; + margin-left: 0; + } + .navbar-form .has-feedback .form-control-feedback { + top: 0; + } +} +@media (max-width: 767px) { + .navbar-form .form-group { + margin-bottom: 5px; + } + .navbar-form .form-group:last-child { + margin-bottom: 0; + } +} +@media (min-width: 768px) { + .navbar-form { + width: auto; + padding-top: 0; + padding-bottom: 0; + margin-right: 0; + margin-left: 0; + border: 0; + -webkit-box-shadow: none; + box-shadow: none; + } +} +.navbar-nav > li > .dropdown-menu { + margin-top: 0; + border-top-left-radius: 0; + border-top-right-radius: 0; +} +.navbar-fixed-bottom .navbar-nav > li > .dropdown-menu { + margin-bottom: 0; + border-top-left-radius: 4px; + border-top-right-radius: 4px; + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.navbar-btn { + margin-top: 8px; + margin-bottom: 8px; +} +.navbar-btn.btn-sm { + margin-top: 10px; + margin-bottom: 10px; +} +.navbar-btn.btn-xs { + margin-top: 14px; + margin-bottom: 14px; +} +.navbar-text { + margin-top: 15px; + margin-bottom: 15px; +} +@media (min-width: 768px) { + .navbar-text { + float: left; + margin-right: 15px; + margin-left: 15px; + } +} +@media (min-width: 768px) { + .navbar-left { + float: left !important; + } + .navbar-right { + float: right !important; + margin-right: -15px; + } + .navbar-right ~ .navbar-right { + margin-right: 0; + } +} +.navbar-default { + background-color: #f8f8f8; + border-color: #e7e7e7; +} +.navbar-default .navbar-brand { + color: #777; +} +.navbar-default .navbar-brand:hover, +.navbar-default .navbar-brand:focus { + color: #5e5e5e; + background-color: transparent; +} +.navbar-default .navbar-text { + color: #777; +} +.navbar-default .navbar-nav > li > a { + color: #777; +} +.navbar-default .navbar-nav > li > a:hover, +.navbar-default .navbar-nav > li > a:focus { + color: #333; + background-color: transparent; +} +.navbar-default .navbar-nav > .active > a, +.navbar-default .navbar-nav > .active > a:hover, +.navbar-default .navbar-nav > .active > a:focus { + color: #555; + background-color: #e7e7e7; +} +.navbar-default .navbar-nav > .disabled > a, +.navbar-default .navbar-nav > .disabled > a:hover, +.navbar-default .navbar-nav > .disabled > a:focus { + color: #ccc; + background-color: transparent; +} +.navbar-default .navbar-toggle { + border-color: #ddd; +} +.navbar-default .navbar-toggle:hover, +.navbar-default .navbar-toggle:focus { + background-color: #ddd; +} +.navbar-default .navbar-toggle .icon-bar { + background-color: #888; +} +.navbar-default .navbar-collapse, +.navbar-default .navbar-form { + border-color: #e7e7e7; +} +.navbar-default .navbar-nav > .open > a, +.navbar-default .navbar-nav > .open > a:hover, +.navbar-default .navbar-nav > .open > a:focus { + color: #555; + background-color: #e7e7e7; +} +@media (max-width: 767px) { + .navbar-default .navbar-nav .open .dropdown-menu > li > a { + color: #777; + } + .navbar-default .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > li > a:focus { + color: #333; + background-color: transparent; + } + .navbar-default .navbar-nav .open .dropdown-menu > .active > a, + .navbar-default .navbar-nav .open .dropdown-menu > .active > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > .active > a:focus { + color: #555; + background-color: #e7e7e7; + } + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a, + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a:focus { + color: #ccc; + background-color: transparent; + } +} +.navbar-default .navbar-link { + color: #777; +} +.navbar-default .navbar-link:hover { + color: #333; +} +.navbar-default .btn-link { + color: #777; +} +.navbar-default .btn-link:hover, +.navbar-default .btn-link:focus { + color: #333; +} +.navbar-default .btn-link[disabled]:hover, +fieldset[disabled] .navbar-default .btn-link:hover, +.navbar-default .btn-link[disabled]:focus, +fieldset[disabled] .navbar-default .btn-link:focus { + color: #ccc; +} +.navbar-inverse { + background-color: #222; + border-color: #080808; +} +.navbar-inverse .navbar-brand { + color: #9d9d9d; +} +.navbar-inverse .navbar-brand:hover, +.navbar-inverse .navbar-brand:focus { + color: #fff; + background-color: transparent; +} +.navbar-inverse .navbar-text { + color: #9d9d9d; +} +.navbar-inverse .navbar-nav > li > a { + color: #9d9d9d; +} +.navbar-inverse .navbar-nav > li > a:hover, +.navbar-inverse .navbar-nav > li > a:focus { + color: #fff; + background-color: transparent; +} +.navbar-inverse .navbar-nav > .active > a, +.navbar-inverse .navbar-nav > .active > a:hover, +.navbar-inverse .navbar-nav > .active > a:focus { + color: #fff; + background-color: #080808; +} +.navbar-inverse .navbar-nav > .disabled > a, +.navbar-inverse .navbar-nav > .disabled > a:hover, +.navbar-inverse .navbar-nav > .disabled > a:focus { + color: #444; + background-color: transparent; +} +.navbar-inverse .navbar-toggle { + border-color: #333; +} +.navbar-inverse .navbar-toggle:hover, +.navbar-inverse .navbar-toggle:focus { + background-color: #333; +} +.navbar-inverse .navbar-toggle .icon-bar { + background-color: #fff; +} +.navbar-inverse .navbar-collapse, +.navbar-inverse .navbar-form { + border-color: #101010; +} +.navbar-inverse .navbar-nav > .open > a, +.navbar-inverse .navbar-nav > .open > a:hover, +.navbar-inverse .navbar-nav > .open > a:focus { + color: #fff; + background-color: #080808; +} +@media (max-width: 767px) { + .navbar-inverse .navbar-nav .open .dropdown-menu > .dropdown-header { + border-color: #080808; + } + .navbar-inverse .navbar-nav .open .dropdown-menu .divider { + background-color: #080808; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a { + color: #9d9d9d; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a:focus { + color: #fff; + background-color: transparent; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a, + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a:focus { + color: #fff; + background-color: #080808; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a, + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a:focus { + color: #444; + background-color: transparent; + } +} +.navbar-inverse .navbar-link { + color: #9d9d9d; +} +.navbar-inverse .navbar-link:hover { + color: #fff; +} +.navbar-inverse .btn-link { + color: #9d9d9d; +} +.navbar-inverse .btn-link:hover, +.navbar-inverse .btn-link:focus { + color: #fff; +} +.navbar-inverse .btn-link[disabled]:hover, +fieldset[disabled] .navbar-inverse .btn-link:hover, +.navbar-inverse .btn-link[disabled]:focus, +fieldset[disabled] .navbar-inverse .btn-link:focus { + color: #444; +} +.breadcrumb { + padding: 8px 15px; + margin-bottom: 20px; + list-style: none; + background-color: #f5f5f5; + border-radius: 4px; +} +.breadcrumb > li { + display: inline-block; +} +.breadcrumb > li + li:before { + padding: 0 5px; + color: #ccc; + content: "/\00a0"; +} +.breadcrumb > .active { + color: #777; +} +.pagination { + display: inline-block; + padding-left: 0; + margin: 20px 0; + border-radius: 4px; +} +.pagination > li { + display: inline; +} +.pagination > li > a, +.pagination > li > span { + position: relative; + float: left; + padding: 6px 12px; + margin-left: -1px; + line-height: 1.42857143; + color: #337ab7; + text-decoration: none; + background-color: #fff; + border: 1px solid #ddd; +} +.pagination > li:first-child > a, +.pagination > li:first-child > span { + margin-left: 0; + border-top-left-radius: 4px; + border-bottom-left-radius: 4px; +} +.pagination > li:last-child > a, +.pagination > li:last-child > span { + border-top-right-radius: 4px; + border-bottom-right-radius: 4px; +} +.pagination > li > a:hover, +.pagination > li > span:hover, +.pagination > li > a:focus, +.pagination > li > span:focus { + z-index: 2; + color: #23527c; + background-color: #eee; + border-color: #ddd; +} +.pagination > .active > a, +.pagination > .active > span, +.pagination > .active > a:hover, +.pagination > .active > span:hover, +.pagination > .active > a:focus, +.pagination > .active > span:focus { + z-index: 3; + color: #fff; + cursor: default; + background-color: #337ab7; + border-color: #337ab7; +} +.pagination > .disabled > span, +.pagination > .disabled > span:hover, +.pagination > .disabled > span:focus, +.pagination > .disabled > a, +.pagination > .disabled > a:hover, +.pagination > .disabled > a:focus { + color: #777; + cursor: not-allowed; + background-color: #fff; + border-color: #ddd; +} +.pagination-lg > li > a, +.pagination-lg > li > span { + padding: 10px 16px; + font-size: 18px; + line-height: 1.3333333; +} +.pagination-lg > li:first-child > a, +.pagination-lg > li:first-child > span { + border-top-left-radius: 6px; + border-bottom-left-radius: 6px; +} +.pagination-lg > li:last-child > a, +.pagination-lg > li:last-child > span { + border-top-right-radius: 6px; + border-bottom-right-radius: 6px; +} +.pagination-sm > li > a, +.pagination-sm > li > span { + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; +} +.pagination-sm > li:first-child > a, +.pagination-sm > li:first-child > span { + border-top-left-radius: 3px; + border-bottom-left-radius: 3px; +} +.pagination-sm > li:last-child > a, +.pagination-sm > li:last-child > span { + border-top-right-radius: 3px; + border-bottom-right-radius: 3px; +} +.pager { + padding-left: 0; + margin: 20px 0; + text-align: center; + list-style: none; +} +.pager li { + display: inline; +} +.pager li > a, +.pager li > span { + display: inline-block; + padding: 5px 14px; + background-color: #fff; + border: 1px solid #ddd; + border-radius: 15px; +} +.pager li > a:hover, +.pager li > a:focus { + text-decoration: none; + background-color: #eee; +} +.pager .next > a, +.pager .next > span { + float: right; +} +.pager .previous > a, +.pager .previous > span { + float: left; +} +.pager .disabled > a, +.pager .disabled > a:hover, +.pager .disabled > a:focus, +.pager .disabled > span { + color: #777; + cursor: not-allowed; + background-color: #fff; +} +.label { + display: inline; + padding: .2em .6em .3em; + font-size: 75%; + font-weight: bold; + line-height: 1; + color: #fff; + text-align: center; + white-space: nowrap; + vertical-align: baseline; + border-radius: .25em; +} +a.label:hover, +a.label:focus { + color: #fff; + text-decoration: none; + cursor: pointer; +} +.label:empty { + display: none; +} +.btn .label { + position: relative; + top: -1px; +} +.label-default { + background-color: #777; +} +.label-default[href]:hover, +.label-default[href]:focus { + background-color: #5e5e5e; +} +.label-primary { + background-color: #337ab7; +} +.label-primary[href]:hover, +.label-primary[href]:focus { + background-color: #286090; +} +.label-success { + background-color: #5cb85c; +} +.label-success[href]:hover, +.label-success[href]:focus { + background-color: #449d44; +} +.label-info { + background-color: #5bc0de; +} +.label-info[href]:hover, +.label-info[href]:focus { + background-color: #31b0d5; +} +.label-warning { + background-color: #f0ad4e; +} +.label-warning[href]:hover, +.label-warning[href]:focus { + background-color: #ec971f; +} +.label-danger { + background-color: #d9534f; +} +.label-danger[href]:hover, +.label-danger[href]:focus { + background-color: #c9302c; +} +.badge { + display: inline-block; + min-width: 10px; + padding: 3px 7px; + font-size: 12px; + font-weight: bold; + line-height: 1; + color: #fff; + text-align: center; + white-space: nowrap; + vertical-align: middle; + background-color: #777; + border-radius: 10px; +} +.badge:empty { + display: none; +} +.btn .badge { + position: relative; + top: -1px; +} +.btn-xs .badge, +.btn-group-xs > .btn .badge { + top: 0; + padding: 1px 5px; +} +a.badge:hover, +a.badge:focus { + color: #fff; + text-decoration: none; + cursor: pointer; +} +.list-group-item.active > .badge, +.nav-pills > .active > a > .badge { + color: #337ab7; + background-color: #fff; +} +.list-group-item > .badge { + float: right; +} +.list-group-item > .badge + .badge { + margin-right: 5px; +} +.nav-pills > li > a > .badge { + margin-left: 3px; +} +.jumbotron { + padding-top: 30px; + padding-bottom: 30px; + margin-bottom: 30px; + color: inherit; + background-color: #eee; +} +.jumbotron h1, +.jumbotron .h1 { + color: inherit; +} +.jumbotron p { + margin-bottom: 15px; + font-size: 21px; + font-weight: 200; +} +.jumbotron > hr { + border-top-color: #d5d5d5; +} +.container .jumbotron, +.container-fluid .jumbotron { + padding-right: 15px; + padding-left: 15px; + border-radius: 6px; +} +.jumbotron .container { + max-width: 100%; +} +@media screen and (min-width: 768px) { + .jumbotron { + padding-top: 48px; + padding-bottom: 48px; + } + .container .jumbotron, + .container-fluid .jumbotron { + padding-right: 60px; + padding-left: 60px; + } + .jumbotron h1, + .jumbotron .h1 { + font-size: 63px; + } +} +.thumbnail { + display: block; + padding: 4px; + margin-bottom: 20px; + line-height: 1.42857143; + background-color: #fff; + border: 1px solid #ddd; + border-radius: 4px; + -webkit-transition: border .2s ease-in-out; + -o-transition: border .2s ease-in-out; + transition: border .2s ease-in-out; +} +.thumbnail > img, +.thumbnail a > img { + margin-right: auto; + margin-left: auto; +} +a.thumbnail:hover, +a.thumbnail:focus, +a.thumbnail.active { + border-color: #337ab7; +} +.thumbnail .caption { + padding: 9px; + color: #333; +} +.alert { + padding: 15px; + margin-bottom: 20px; + border: 1px solid transparent; + border-radius: 4px; +} +.alert h4 { + margin-top: 0; + color: inherit; +} +.alert .alert-link { + font-weight: bold; +} +.alert > p, +.alert > ul { + margin-bottom: 0; +} +.alert > p + p { + margin-top: 5px; +} +.alert-dismissable, +.alert-dismissible { + padding-right: 35px; +} +.alert-dismissable .close, +.alert-dismissible .close { + position: relative; + top: -2px; + right: -21px; + color: inherit; +} +.alert-success { + color: #3c763d; + background-color: #dff0d8; + border-color: #d6e9c6; +} +.alert-success hr { + border-top-color: #c9e2b3; +} +.alert-success .alert-link { + color: #2b542c; +} +.alert-info { + color: #31708f; + background-color: #d9edf7; + border-color: #bce8f1; +} +.alert-info hr { + border-top-color: #a6e1ec; +} +.alert-info .alert-link { + color: #245269; +} +.alert-warning { + color: #8a6d3b; + background-color: #fcf8e3; + border-color: #faebcc; +} +.alert-warning hr { + border-top-color: #f7e1b5; +} +.alert-warning .alert-link { + color: #66512c; +} +.alert-danger { + color: #a94442; + background-color: #f2dede; + border-color: #ebccd1; +} +.alert-danger hr { + border-top-color: #e4b9c0; +} +.alert-danger .alert-link { + color: #843534; +} +@-webkit-keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +@-o-keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +@keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +.progress { + height: 20px; + margin-bottom: 20px; + overflow: hidden; + background-color: #f5f5f5; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, .1); + box-shadow: inset 0 1px 2px rgba(0, 0, 0, .1); +} +.progress-bar { + float: left; + width: 0; + height: 100%; + font-size: 12px; + line-height: 20px; + color: #fff; + text-align: center; + background-color: #337ab7; + -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, .15); + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, .15); + -webkit-transition: width .6s ease; + -o-transition: width .6s ease; + transition: width .6s ease; +} +.progress-striped .progress-bar, +.progress-bar-striped { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + -webkit-background-size: 40px 40px; + background-size: 40px 40px; +} +.progress.active .progress-bar, +.progress-bar.active { + -webkit-animation: progress-bar-stripes 2s linear infinite; + -o-animation: progress-bar-stripes 2s linear infinite; + animation: progress-bar-stripes 2s linear infinite; +} +.progress-bar-success { + background-color: #5cb85c; +} +.progress-striped .progress-bar-success { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); +} +.progress-bar-info { + background-color: #5bc0de; +} +.progress-striped .progress-bar-info { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); +} +.progress-bar-warning { + background-color: #f0ad4e; +} +.progress-striped .progress-bar-warning { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); +} +.progress-bar-danger { + background-color: #d9534f; +} +.progress-striped .progress-bar-danger { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent); +} +.media { + margin-top: 15px; +} +.media:first-child { + margin-top: 0; +} +.media, +.media-body { + overflow: hidden; + zoom: 1; +} +.media-body { + width: 10000px; +} +.media-object { + display: block; +} +.media-object.img-thumbnail { + max-width: none; +} +.media-right, +.media > .pull-right { + padding-left: 10px; +} +.media-left, +.media > .pull-left { + padding-right: 10px; +} +.media-left, +.media-right, +.media-body { + display: table-cell; + vertical-align: top; +} +.media-middle { + vertical-align: middle; +} +.media-bottom { + vertical-align: bottom; +} +.media-heading { + margin-top: 0; + margin-bottom: 5px; +} +.media-list { + padding-left: 0; + list-style: none; +} +.list-group { + padding-left: 0; + margin-bottom: 20px; +} +.list-group-item { + position: relative; + display: block; + padding: 10px 15px; + margin-bottom: -1px; + background-color: #fff; + border: 1px solid #ddd; +} +.list-group-item:first-child { + border-top-left-radius: 4px; + border-top-right-radius: 4px; +} +.list-group-item:last-child { + margin-bottom: 0; + border-bottom-right-radius: 4px; + border-bottom-left-radius: 4px; +} +a.list-group-item, +button.list-group-item { + color: #555; +} +a.list-group-item .list-group-item-heading, +button.list-group-item .list-group-item-heading { + color: #333; +} +a.list-group-item:hover, +button.list-group-item:hover, +a.list-group-item:focus, +button.list-group-item:focus { + color: #555; + text-decoration: none; + background-color: #f5f5f5; +} +button.list-group-item { + width: 100%; + text-align: left; +} +.list-group-item.disabled, +.list-group-item.disabled:hover, +.list-group-item.disabled:focus { + color: #777; + cursor: not-allowed; + background-color: #eee; +} +.list-group-item.disabled .list-group-item-heading, +.list-group-item.disabled:hover .list-group-item-heading, +.list-group-item.disabled:focus .list-group-item-heading { + color: inherit; +} +.list-group-item.disabled .list-group-item-text, +.list-group-item.disabled:hover .list-group-item-text, +.list-group-item.disabled:focus .list-group-item-text { + color: #777; +} +.list-group-item.active, +.list-group-item.active:hover, +.list-group-item.active:focus { + z-index: 2; + color: #fff; + background-color: #337ab7; + border-color: #337ab7; +} +.list-group-item.active .list-group-item-heading, +.list-group-item.active:hover .list-group-item-heading, +.list-group-item.active:focus .list-group-item-heading, +.list-group-item.active .list-group-item-heading > small, +.list-group-item.active:hover .list-group-item-heading > small, +.list-group-item.active:focus .list-group-item-heading > small, +.list-group-item.active .list-group-item-heading > .small, +.list-group-item.active:hover .list-group-item-heading > .small, +.list-group-item.active:focus .list-group-item-heading > .small { + color: inherit; +} +.list-group-item.active .list-group-item-text, +.list-group-item.active:hover .list-group-item-text, +.list-group-item.active:focus .list-group-item-text { + color: #c7ddef; +} +.list-group-item-success { + color: #3c763d; + background-color: #dff0d8; +} +a.list-group-item-success, +button.list-group-item-success { + color: #3c763d; +} +a.list-group-item-success .list-group-item-heading, +button.list-group-item-success .list-group-item-heading { + color: inherit; +} +a.list-group-item-success:hover, +button.list-group-item-success:hover, +a.list-group-item-success:focus, +button.list-group-item-success:focus { + color: #3c763d; + background-color: #d0e9c6; +} +a.list-group-item-success.active, +button.list-group-item-success.active, +a.list-group-item-success.active:hover, +button.list-group-item-success.active:hover, +a.list-group-item-success.active:focus, +button.list-group-item-success.active:focus { + color: #fff; + background-color: #3c763d; + border-color: #3c763d; +} +.list-group-item-info { + color: #31708f; + background-color: #d9edf7; +} +a.list-group-item-info, +button.list-group-item-info { + color: #31708f; +} +a.list-group-item-info .list-group-item-heading, +button.list-group-item-info .list-group-item-heading { + color: inherit; +} +a.list-group-item-info:hover, +button.list-group-item-info:hover, +a.list-group-item-info:focus, +button.list-group-item-info:focus { + color: #31708f; + background-color: #c4e3f3; +} +a.list-group-item-info.active, +button.list-group-item-info.active, +a.list-group-item-info.active:hover, +button.list-group-item-info.active:hover, +a.list-group-item-info.active:focus, +button.list-group-item-info.active:focus { + color: #fff; + background-color: #31708f; + border-color: #31708f; +} +.list-group-item-warning { + color: #8a6d3b; + background-color: #fcf8e3; +} +a.list-group-item-warning, +button.list-group-item-warning { + color: #8a6d3b; +} +a.list-group-item-warning .list-group-item-heading, +button.list-group-item-warning .list-group-item-heading { + color: inherit; +} +a.list-group-item-warning:hover, +button.list-group-item-warning:hover, +a.list-group-item-warning:focus, +button.list-group-item-warning:focus { + color: #8a6d3b; + background-color: #faf2cc; +} +a.list-group-item-warning.active, +button.list-group-item-warning.active, +a.list-group-item-warning.active:hover, +button.list-group-item-warning.active:hover, +a.list-group-item-warning.active:focus, +button.list-group-item-warning.active:focus { + color: #fff; + background-color: #8a6d3b; + border-color: #8a6d3b; +} +.list-group-item-danger { + color: #a94442; + background-color: #f2dede; +} +a.list-group-item-danger, +button.list-group-item-danger { + color: #a94442; +} +a.list-group-item-danger .list-group-item-heading, +button.list-group-item-danger .list-group-item-heading { + color: inherit; +} +a.list-group-item-danger:hover, +button.list-group-item-danger:hover, +a.list-group-item-danger:focus, +button.list-group-item-danger:focus { + color: #a94442; + background-color: #ebcccc; +} +a.list-group-item-danger.active, +button.list-group-item-danger.active, +a.list-group-item-danger.active:hover, +button.list-group-item-danger.active:hover, +a.list-group-item-danger.active:focus, +button.list-group-item-danger.active:focus { + color: #fff; + background-color: #a94442; + border-color: #a94442; +} +.list-group-item-heading { + margin-top: 0; + margin-bottom: 5px; +} +.list-group-item-text { + margin-bottom: 0; + line-height: 1.3; +} +.panel { + margin-bottom: 20px; + background-color: #fff; + border: 1px solid transparent; + border-radius: 4px; + -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, .05); + box-shadow: 0 1px 1px rgba(0, 0, 0, .05); +} +.panel-body { + padding: 15px; +} +.panel-heading { + padding: 10px 15px; + border-bottom: 1px solid transparent; + border-top-left-radius: 3px; + border-top-right-radius: 3px; +} +.panel-heading > .dropdown .dropdown-toggle { + color: inherit; +} +.panel-title { + margin-top: 0; + margin-bottom: 0; + font-size: 16px; + color: inherit; +} +.panel-title > a, +.panel-title > small, +.panel-title > .small, +.panel-title > small > a, +.panel-title > .small > a { + color: inherit; +} +.panel-footer { + padding: 10px 15px; + background-color: #f5f5f5; + border-top: 1px solid #ddd; + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .list-group, +.panel > .panel-collapse > .list-group { + margin-bottom: 0; +} +.panel > .list-group .list-group-item, +.panel > .panel-collapse > .list-group .list-group-item { + border-width: 1px 0; + border-radius: 0; +} +.panel > .list-group:first-child .list-group-item:first-child, +.panel > .panel-collapse > .list-group:first-child .list-group-item:first-child { + border-top: 0; + border-top-left-radius: 3px; + border-top-right-radius: 3px; +} +.panel > .list-group:last-child .list-group-item:last-child, +.panel > .panel-collapse > .list-group:last-child .list-group-item:last-child { + border-bottom: 0; + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .panel-heading + .panel-collapse > .list-group .list-group-item:first-child { + border-top-left-radius: 0; + border-top-right-radius: 0; +} +.panel-heading + .list-group .list-group-item:first-child { + border-top-width: 0; +} +.list-group + .panel-footer { + border-top-width: 0; +} +.panel > .table, +.panel > .table-responsive > .table, +.panel > .panel-collapse > .table { + margin-bottom: 0; +} +.panel > .table caption, +.panel > .table-responsive > .table caption, +.panel > .panel-collapse > .table caption { + padding-right: 15px; + padding-left: 15px; +} +.panel > .table:first-child, +.panel > .table-responsive:first-child > .table:first-child { + border-top-left-radius: 3px; + border-top-right-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child { + border-top-left-radius: 3px; + border-top-right-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child td:first-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child td:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child td:first-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child td:first-child, +.panel > .table:first-child > thead:first-child > tr:first-child th:first-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child th:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child th:first-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child th:first-child { + border-top-left-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child td:last-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child td:last-child, +.panel > .table:first-child > tbody:first-child > tr:first-child td:last-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child td:last-child, +.panel > .table:first-child > thead:first-child > tr:first-child th:last-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child th:last-child, +.panel > .table:first-child > tbody:first-child > tr:first-child th:last-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child th:last-child { + border-top-right-radius: 3px; +} +.panel > .table:last-child, +.panel > .table-responsive:last-child > .table:last-child { + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child { + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child td:first-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child td:first-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child td:first-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child td:first-child, +.panel > .table:last-child > tbody:last-child > tr:last-child th:first-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child th:first-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child th:first-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child th:first-child { + border-bottom-left-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child td:last-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child td:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child td:last-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child td:last-child, +.panel > .table:last-child > tbody:last-child > tr:last-child th:last-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child th:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child th:last-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child th:last-child { + border-bottom-right-radius: 3px; +} +.panel > .panel-body + .table, +.panel > .panel-body + .table-responsive, +.panel > .table + .panel-body, +.panel > .table-responsive + .panel-body { + border-top: 1px solid #ddd; +} +.panel > .table > tbody:first-child > tr:first-child th, +.panel > .table > tbody:first-child > tr:first-child td { + border-top: 0; +} +.panel > .table-bordered, +.panel > .table-responsive > .table-bordered { + border: 0; +} +.panel > .table-bordered > thead > tr > th:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:first-child, +.panel > .table-bordered > tbody > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:first-child, +.panel > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-bordered > thead > tr > td:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:first-child, +.panel > .table-bordered > tbody > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:first-child, +.panel > .table-bordered > tfoot > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:first-child { + border-left: 0; +} +.panel > .table-bordered > thead > tr > th:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:last-child, +.panel > .table-bordered > tbody > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:last-child, +.panel > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-bordered > thead > tr > td:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:last-child, +.panel > .table-bordered > tbody > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:last-child, +.panel > .table-bordered > tfoot > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:last-child { + border-right: 0; +} +.panel > .table-bordered > thead > tr:first-child > td, +.panel > .table-responsive > .table-bordered > thead > tr:first-child > td, +.panel > .table-bordered > tbody > tr:first-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:first-child > td, +.panel > .table-bordered > thead > tr:first-child > th, +.panel > .table-responsive > .table-bordered > thead > tr:first-child > th, +.panel > .table-bordered > tbody > tr:first-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:first-child > th { + border-bottom: 0; +} +.panel > .table-bordered > tbody > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > td, +.panel > .table-bordered > tfoot > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > td, +.panel > .table-bordered > tbody > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > th, +.panel > .table-bordered > tfoot > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > th { + border-bottom: 0; +} +.panel > .table-responsive { + margin-bottom: 0; + border: 0; +} +.panel-group { + margin-bottom: 20px; +} +.panel-group .panel { + margin-bottom: 0; + border-radius: 4px; +} +.panel-group .panel + .panel { + margin-top: 5px; +} +.panel-group .panel-heading { + border-bottom: 0; +} +.panel-group .panel-heading + .panel-collapse > .panel-body, +.panel-group .panel-heading + .panel-collapse > .list-group { + border-top: 1px solid #ddd; +} +.panel-group .panel-footer { + border-top: 0; +} +.panel-group .panel-footer + .panel-collapse .panel-body { + border-bottom: 1px solid #ddd; +} +.panel-default { + border-color: #ddd; +} +.panel-default > .panel-heading { + color: #333; + background-color: #f5f5f5; + border-color: #ddd; +} +.panel-default > .panel-heading + .panel-collapse > .panel-body { + border-top-color: #ddd; +} +.panel-default > .panel-heading .badge { + color: #f5f5f5; + background-color: #333; +} +.panel-default > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: #ddd; +} +.panel-primary { + border-color: #337ab7; +} +.panel-primary > .panel-heading { + color: #fff; + background-color: #337ab7; + border-color: #337ab7; +} +.panel-primary > .panel-heading + .panel-collapse > .panel-body { + border-top-color: #337ab7; +} +.panel-primary > .panel-heading .badge { + color: #337ab7; + background-color: #fff; +} +.panel-primary > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: #337ab7; +} +.panel-success { + border-color: #d6e9c6; +} +.panel-success > .panel-heading { + color: #3c763d; + background-color: #dff0d8; + border-color: #d6e9c6; +} +.panel-success > .panel-heading + .panel-collapse > .panel-body { + border-top-color: #d6e9c6; +} +.panel-success > .panel-heading .badge { + color: #dff0d8; + background-color: #3c763d; +} +.panel-success > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: #d6e9c6; +} +.panel-info { + border-color: #bce8f1; +} +.panel-info > .panel-heading { + color: #31708f; + background-color: #d9edf7; + border-color: #bce8f1; +} +.panel-info > .panel-heading + .panel-collapse > .panel-body { + border-top-color: #bce8f1; +} +.panel-info > .panel-heading .badge { + color: #d9edf7; + background-color: #31708f; +} +.panel-info > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: #bce8f1; +} +.panel-warning { + border-color: #faebcc; +} +.panel-warning > .panel-heading { + color: #8a6d3b; + background-color: #fcf8e3; + border-color: #faebcc; +} +.panel-warning > .panel-heading + .panel-collapse > .panel-body { + border-top-color: #faebcc; +} +.panel-warning > .panel-heading .badge { + color: #fcf8e3; + background-color: #8a6d3b; +} +.panel-warning > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: #faebcc; +} +.panel-danger { + border-color: #ebccd1; +} +.panel-danger > .panel-heading { + color: #a94442; + background-color: #f2dede; + border-color: #ebccd1; +} +.panel-danger > .panel-heading + .panel-collapse > .panel-body { + border-top-color: #ebccd1; +} +.panel-danger > .panel-heading .badge { + color: #f2dede; + background-color: #a94442; +} +.panel-danger > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: #ebccd1; +} +.embed-responsive { + position: relative; + display: block; + height: 0; + padding: 0; + overflow: hidden; +} +.embed-responsive .embed-responsive-item, +.embed-responsive iframe, +.embed-responsive embed, +.embed-responsive object, +.embed-responsive video { + position: absolute; + top: 0; + bottom: 0; + left: 0; + width: 100%; + height: 100%; + border: 0; +} +.embed-responsive-16by9 { + padding-bottom: 56.25%; +} +.embed-responsive-4by3 { + padding-bottom: 75%; +} +.well { + min-height: 20px; + padding: 19px; + margin-bottom: 20px; + background-color: #f5f5f5; + border: 1px solid #e3e3e3; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, .05); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, .05); +} +.well blockquote { + border-color: #ddd; + border-color: rgba(0, 0, 0, .15); +} +.well-lg { + padding: 24px; + border-radius: 6px; +} +.well-sm { + padding: 9px; + border-radius: 3px; +} +.close { + float: right; + font-size: 21px; + font-weight: bold; + line-height: 1; + color: #000; + text-shadow: 0 1px 0 #fff; + filter: alpha(opacity=20); + opacity: .2; +} +.close:hover, +.close:focus { + color: #000; + text-decoration: none; + cursor: pointer; + filter: alpha(opacity=50); + opacity: .5; +} +button.close { + -webkit-appearance: none; + padding: 0; + cursor: pointer; + background: transparent; + border: 0; +} +.modal-open { + overflow: hidden; +} +.modal { + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 1050; + display: none; + overflow: hidden; + -webkit-overflow-scrolling: touch; + outline: 0; +} +.modal.fade .modal-dialog { + -webkit-transition: -webkit-transform .3s ease-out; + -o-transition: -o-transform .3s ease-out; + transition: transform .3s ease-out; + -webkit-transform: translate(0, -25%); + -ms-transform: translate(0, -25%); + -o-transform: translate(0, -25%); + transform: translate(0, -25%); +} +.modal.in .modal-dialog { + -webkit-transform: translate(0, 0); + -ms-transform: translate(0, 0); + -o-transform: translate(0, 0); + transform: translate(0, 0); +} +.modal-open .modal { + overflow-x: hidden; + overflow-y: auto; +} +.modal-dialog { + position: relative; + width: auto; + margin: 10px; +} +.modal-content { + position: relative; + background-color: #fff; + -webkit-background-clip: padding-box; + background-clip: padding-box; + border: 1px solid #999; + border: 1px solid rgba(0, 0, 0, .2); + border-radius: 6px; + outline: 0; + -webkit-box-shadow: 0 3px 9px rgba(0, 0, 0, .5); + box-shadow: 0 3px 9px rgba(0, 0, 0, .5); +} +.modal-backdrop { + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 1040; + background-color: #000; +} +.modal-backdrop.fade { + filter: alpha(opacity=0); + opacity: 0; +} +.modal-backdrop.in { + filter: alpha(opacity=50); + opacity: .5; +} +.modal-header { + padding: 15px; + border-bottom: 1px solid #e5e5e5; +} +.modal-header .close { + margin-top: -2px; +} +.modal-title { + margin: 0; + line-height: 1.42857143; +} +.modal-body { + position: relative; + padding: 15px; +} +.modal-footer { + padding: 15px; + text-align: right; + border-top: 1px solid #e5e5e5; +} +.modal-footer .btn + .btn { + margin-bottom: 0; + margin-left: 5px; +} +.modal-footer .btn-group .btn + .btn { + margin-left: -1px; +} +.modal-footer .btn-block + .btn-block { + margin-left: 0; +} +.modal-scrollbar-measure { + position: absolute; + top: -9999px; + width: 50px; + height: 50px; + overflow: scroll; +} +@media (min-width: 768px) { + .modal-dialog { + width: 600px; + margin: 30px auto; + } + .modal-content { + -webkit-box-shadow: 0 5px 15px rgba(0, 0, 0, .5); + box-shadow: 0 5px 15px rgba(0, 0, 0, .5); + } + .modal-sm { + width: 300px; + } +} +@media (min-width: 992px) { + .modal-lg { + width: 900px; + } +} +.tooltip { + position: absolute; + z-index: 1070; + display: block; + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 12px; + font-style: normal; + font-weight: normal; + line-height: 1.42857143; + text-align: left; + text-align: start; + text-decoration: none; + text-shadow: none; + text-transform: none; + letter-spacing: normal; + word-break: normal; + word-spacing: normal; + word-wrap: normal; + white-space: normal; + filter: alpha(opacity=0); + opacity: 0; + + line-break: auto; +} +.tooltip.in { + filter: alpha(opacity=90); + opacity: .9; +} +.tooltip.top { + padding: 5px 0; + margin-top: -3px; +} +.tooltip.right { + padding: 0 5px; + margin-left: 3px; +} +.tooltip.bottom { + padding: 5px 0; + margin-top: 3px; +} +.tooltip.left { + padding: 0 5px; + margin-left: -3px; +} +.tooltip-inner { + max-width: 200px; + padding: 3px 8px; + color: #fff; + text-align: center; + background-color: #000; + border-radius: 4px; +} +.tooltip-arrow { + position: absolute; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; +} +.tooltip.top .tooltip-arrow { + bottom: 0; + left: 50%; + margin-left: -5px; + border-width: 5px 5px 0; + border-top-color: #000; +} +.tooltip.top-left .tooltip-arrow { + right: 5px; + bottom: 0; + margin-bottom: -5px; + border-width: 5px 5px 0; + border-top-color: #000; +} +.tooltip.top-right .tooltip-arrow { + bottom: 0; + left: 5px; + margin-bottom: -5px; + border-width: 5px 5px 0; + border-top-color: #000; +} +.tooltip.right .tooltip-arrow { + top: 50%; + left: 0; + margin-top: -5px; + border-width: 5px 5px 5px 0; + border-right-color: #000; +} +.tooltip.left .tooltip-arrow { + top: 50%; + right: 0; + margin-top: -5px; + border-width: 5px 0 5px 5px; + border-left-color: #000; +} +.tooltip.bottom .tooltip-arrow { + top: 0; + left: 50%; + margin-left: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000; +} +.tooltip.bottom-left .tooltip-arrow { + top: 0; + right: 5px; + margin-top: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000; +} +.tooltip.bottom-right .tooltip-arrow { + top: 0; + left: 5px; + margin-top: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000; +} +.popover { + position: absolute; + top: 0; + left: 0; + z-index: 1060; + display: none; + max-width: 276px; + padding: 1px; + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 14px; + font-style: normal; + font-weight: normal; + line-height: 1.42857143; + text-align: left; + text-align: start; + text-decoration: none; + text-shadow: none; + text-transform: none; + letter-spacing: normal; + word-break: normal; + word-spacing: normal; + word-wrap: normal; + white-space: normal; + background-color: #fff; + -webkit-background-clip: padding-box; + background-clip: padding-box; + border: 1px solid #ccc; + border: 1px solid rgba(0, 0, 0, .2); + border-radius: 6px; + -webkit-box-shadow: 0 5px 10px rgba(0, 0, 0, .2); + box-shadow: 0 5px 10px rgba(0, 0, 0, .2); + + line-break: auto; +} +.popover.top { + margin-top: -10px; +} +.popover.right { + margin-left: 10px; +} +.popover.bottom { + margin-top: 10px; +} +.popover.left { + margin-left: -10px; +} +.popover-title { + padding: 8px 14px; + margin: 0; + font-size: 14px; + background-color: #f7f7f7; + border-bottom: 1px solid #ebebeb; + border-radius: 5px 5px 0 0; +} +.popover-content { + padding: 9px 14px; +} +.popover > .arrow, +.popover > .arrow:after { + position: absolute; + display: block; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; +} +.popover > .arrow { + border-width: 11px; +} +.popover > .arrow:after { + content: ""; + border-width: 10px; +} +.popover.top > .arrow { + bottom: -11px; + left: 50%; + margin-left: -11px; + border-top-color: #999; + border-top-color: rgba(0, 0, 0, .25); + border-bottom-width: 0; +} +.popover.top > .arrow:after { + bottom: 1px; + margin-left: -10px; + content: " "; + border-top-color: #fff; + border-bottom-width: 0; +} +.popover.right > .arrow { + top: 50%; + left: -11px; + margin-top: -11px; + border-right-color: #999; + border-right-color: rgba(0, 0, 0, .25); + border-left-width: 0; +} +.popover.right > .arrow:after { + bottom: -10px; + left: 1px; + content: " "; + border-right-color: #fff; + border-left-width: 0; +} +.popover.bottom > .arrow { + top: -11px; + left: 50%; + margin-left: -11px; + border-top-width: 0; + border-bottom-color: #999; + border-bottom-color: rgba(0, 0, 0, .25); +} +.popover.bottom > .arrow:after { + top: 1px; + margin-left: -10px; + content: " "; + border-top-width: 0; + border-bottom-color: #fff; +} +.popover.left > .arrow { + top: 50%; + right: -11px; + margin-top: -11px; + border-right-width: 0; + border-left-color: #999; + border-left-color: rgba(0, 0, 0, .25); +} +.popover.left > .arrow:after { + right: 1px; + bottom: -10px; + content: " "; + border-right-width: 0; + border-left-color: #fff; +} +.carousel { + position: relative; +} +.carousel-inner { + position: relative; + width: 100%; + overflow: hidden; +} +.carousel-inner > .item { + position: relative; + display: none; + -webkit-transition: .6s ease-in-out left; + -o-transition: .6s ease-in-out left; + transition: .6s ease-in-out left; +} +.carousel-inner > .item > img, +.carousel-inner > .item > a > img { + line-height: 1; +} +@media all and (transform-3d), (-webkit-transform-3d) { + .carousel-inner > .item { + -webkit-transition: -webkit-transform .6s ease-in-out; + -o-transition: -o-transform .6s ease-in-out; + transition: transform .6s ease-in-out; + + -webkit-backface-visibility: hidden; + backface-visibility: hidden; + -webkit-perspective: 1000px; + perspective: 1000px; + } + .carousel-inner > .item.next, + .carousel-inner > .item.active.right { + left: 0; + -webkit-transform: translate3d(100%, 0, 0); + transform: translate3d(100%, 0, 0); + } + .carousel-inner > .item.prev, + .carousel-inner > .item.active.left { + left: 0; + -webkit-transform: translate3d(-100%, 0, 0); + transform: translate3d(-100%, 0, 0); + } + .carousel-inner > .item.next.left, + .carousel-inner > .item.prev.right, + .carousel-inner > .item.active { + left: 0; + -webkit-transform: translate3d(0, 0, 0); + transform: translate3d(0, 0, 0); + } +} +.carousel-inner > .active, +.carousel-inner > .next, +.carousel-inner > .prev { + display: block; +} +.carousel-inner > .active { + left: 0; +} +.carousel-inner > .next, +.carousel-inner > .prev { + position: absolute; + top: 0; + width: 100%; +} +.carousel-inner > .next { + left: 100%; +} +.carousel-inner > .prev { + left: -100%; +} +.carousel-inner > .next.left, +.carousel-inner > .prev.right { + left: 0; +} +.carousel-inner > .active.left { + left: -100%; +} +.carousel-inner > .active.right { + left: 100%; +} +.carousel-control { + position: absolute; + top: 0; + bottom: 0; + left: 0; + width: 15%; + font-size: 20px; + color: #fff; + text-align: center; + text-shadow: 0 1px 2px rgba(0, 0, 0, .6); + background-color: rgba(0, 0, 0, 0); + filter: alpha(opacity=50); + opacity: .5; +} +.carousel-control.left { + background-image: -webkit-linear-gradient(left, rgba(0, 0, 0, .5) 0%, rgba(0, 0, 0, .0001) 100%); + background-image: -o-linear-gradient(left, rgba(0, 0, 0, .5) 0%, rgba(0, 0, 0, .0001) 100%); + background-image: -webkit-gradient(linear, left top, right top, from(rgba(0, 0, 0, .5)), to(rgba(0, 0, 0, .0001))); + background-image: linear-gradient(to right, rgba(0, 0, 0, .5) 0%, rgba(0, 0, 0, .0001) 100%); + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1); + background-repeat: repeat-x; +} +.carousel-control.right { + right: 0; + left: auto; + background-image: -webkit-linear-gradient(left, rgba(0, 0, 0, .0001) 0%, rgba(0, 0, 0, .5) 100%); + background-image: -o-linear-gradient(left, rgba(0, 0, 0, .0001) 0%, rgba(0, 0, 0, .5) 100%); + background-image: -webkit-gradient(linear, left top, right top, from(rgba(0, 0, 0, .0001)), to(rgba(0, 0, 0, .5))); + background-image: linear-gradient(to right, rgba(0, 0, 0, .0001) 0%, rgba(0, 0, 0, .5) 100%); + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1); + background-repeat: repeat-x; +} +.carousel-control:hover, +.carousel-control:focus { + color: #fff; + text-decoration: none; + filter: alpha(opacity=90); + outline: 0; + opacity: .9; +} +.carousel-control .icon-prev, +.carousel-control .icon-next, +.carousel-control .glyphicon-chevron-left, +.carousel-control .glyphicon-chevron-right { + position: absolute; + top: 50%; + z-index: 5; + display: inline-block; + margin-top: -10px; +} +.carousel-control .icon-prev, +.carousel-control .glyphicon-chevron-left { + left: 50%; + margin-left: -10px; +} +.carousel-control .icon-next, +.carousel-control .glyphicon-chevron-right { + right: 50%; + margin-right: -10px; +} +.carousel-control .icon-prev, +.carousel-control .icon-next { + width: 20px; + height: 20px; + font-family: serif; + line-height: 1; +} +.carousel-control .icon-prev:before { + content: '\2039'; +} +.carousel-control .icon-next:before { + content: '\203a'; +} +.carousel-indicators { + position: absolute; + bottom: 10px; + left: 50%; + z-index: 15; + width: 60%; + padding-left: 0; + margin-left: -30%; + text-align: center; + list-style: none; +} +.carousel-indicators li { + display: inline-block; + width: 10px; + height: 10px; + margin: 1px; + text-indent: -999px; + cursor: pointer; + background-color: #000 \9; + background-color: rgba(0, 0, 0, 0); + border: 1px solid #fff; + border-radius: 10px; +} +.carousel-indicators .active { + width: 12px; + height: 12px; + margin: 0; + background-color: #fff; +} +.carousel-caption { + position: absolute; + right: 15%; + bottom: 20px; + left: 15%; + z-index: 10; + padding-top: 20px; + padding-bottom: 20px; + color: #fff; + text-align: center; + text-shadow: 0 1px 2px rgba(0, 0, 0, .6); +} +.carousel-caption .btn { + text-shadow: none; +} +@media screen and (min-width: 768px) { + .carousel-control .glyphicon-chevron-left, + .carousel-control .glyphicon-chevron-right, + .carousel-control .icon-prev, + .carousel-control .icon-next { + width: 30px; + height: 30px; + margin-top: -10px; + font-size: 30px; + } + .carousel-control .glyphicon-chevron-left, + .carousel-control .icon-prev { + margin-left: -10px; + } + .carousel-control .glyphicon-chevron-right, + .carousel-control .icon-next { + margin-right: -10px; + } + .carousel-caption { + right: 20%; + left: 20%; + padding-bottom: 30px; + } + .carousel-indicators { + bottom: 20px; + } +} +.clearfix:before, +.clearfix:after, +.dl-horizontal dd:before, +.dl-horizontal dd:after, +.container:before, +.container:after, +.container-fluid:before, +.container-fluid:after, +.row:before, +.row:after, +.form-horizontal .form-group:before, +.form-horizontal .form-group:after, +.btn-toolbar:before, +.btn-toolbar:after, +.btn-group-vertical > .btn-group:before, +.btn-group-vertical > .btn-group:after, +.nav:before, +.nav:after, +.navbar:before, +.navbar:after, +.navbar-header:before, +.navbar-header:after, +.navbar-collapse:before, +.navbar-collapse:after, +.pager:before, +.pager:after, +.panel-body:before, +.panel-body:after, +.modal-header:before, +.modal-header:after, +.modal-footer:before, +.modal-footer:after { + display: table; + content: " "; +} +.clearfix:after, +.dl-horizontal dd:after, +.container:after, +.container-fluid:after, +.row:after, +.form-horizontal .form-group:after, +.btn-toolbar:after, +.btn-group-vertical > .btn-group:after, +.nav:after, +.navbar:after, +.navbar-header:after, +.navbar-collapse:after, +.pager:after, +.panel-body:after, +.modal-header:after, +.modal-footer:after { + clear: both; +} +.center-block { + display: block; + margin-right: auto; + margin-left: auto; +} +.pull-right { + float: right !important; +} +.pull-left { + float: left !important; +} +.hide { + display: none !important; +} +.show { + display: block !important; +} +.invisible { + visibility: hidden; +} +.text-hide { + font: 0/0 a; + color: transparent; + text-shadow: none; + background-color: transparent; + border: 0; +} +.hidden { + display: none !important; +} +.affix { + position: fixed; +} +@-ms-viewport { + width: device-width; +} +.visible-xs, +.visible-sm, +.visible-md, +.visible-lg { + display: none !important; +} +.visible-xs-block, +.visible-xs-inline, +.visible-xs-inline-block, +.visible-sm-block, +.visible-sm-inline, +.visible-sm-inline-block, +.visible-md-block, +.visible-md-inline, +.visible-md-inline-block, +.visible-lg-block, +.visible-lg-inline, +.visible-lg-inline-block { + display: none !important; +} +@media (max-width: 767px) { + .visible-xs { + display: block !important; + } + table.visible-xs { + display: table !important; + } + tr.visible-xs { + display: table-row !important; + } + th.visible-xs, + td.visible-xs { + display: table-cell !important; + } +} +@media (max-width: 767px) { + .visible-xs-block { + display: block !important; + } +} +@media (max-width: 767px) { + .visible-xs-inline { + display: inline !important; + } +} +@media (max-width: 767px) { + .visible-xs-inline-block { + display: inline-block !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm { + display: block !important; + } + table.visible-sm { + display: table !important; + } + tr.visible-sm { + display: table-row !important; + } + th.visible-sm, + td.visible-sm { + display: table-cell !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm-block { + display: block !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm-inline { + display: inline !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm-inline-block { + display: inline-block !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md { + display: block !important; + } + table.visible-md { + display: table !important; + } + tr.visible-md { + display: table-row !important; + } + th.visible-md, + td.visible-md { + display: table-cell !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md-block { + display: block !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md-inline { + display: inline !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md-inline-block { + display: inline-block !important; + } +} +@media (min-width: 1200px) { + .visible-lg { + display: block !important; + } + table.visible-lg { + display: table !important; + } + tr.visible-lg { + display: table-row !important; + } + th.visible-lg, + td.visible-lg { + display: table-cell !important; + } +} +@media (min-width: 1200px) { + .visible-lg-block { + display: block !important; + } +} +@media (min-width: 1200px) { + .visible-lg-inline { + display: inline !important; + } +} +@media (min-width: 1200px) { + .visible-lg-inline-block { + display: inline-block !important; + } +} +@media (max-width: 767px) { + .hidden-xs { + display: none !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .hidden-sm { + display: none !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .hidden-md { + display: none !important; + } +} +@media (min-width: 1200px) { + .hidden-lg { + display: none !important; + } +} +.visible-print { + display: none !important; +} +@media print { + .visible-print { + display: block !important; + } + table.visible-print { + display: table !important; + } + tr.visible-print { + display: table-row !important; + } + th.visible-print, + td.visible-print { + display: table-cell !important; + } +} +.visible-print-block { + display: none !important; +} +@media print { + .visible-print-block { + display: block !important; + } +} +.visible-print-inline { + display: none !important; +} +@media print { + .visible-print-inline { + display: inline !important; + } +} +.visible-print-inline-block { + display: none !important; +} +@media print { + .visible-print-inline-block { + display: inline-block !important; + } +} +@media print { + .hidden-print { + display: none !important; + } +} +/*# sourceMappingURL=bootstrap.css.map */ diff --git a/web/gui/dashboard/css/bootstrap-slate-flat-3.3.7.css b/web/gui/dashboard/css/bootstrap-slate-flat-3.3.7.css new file mode 100644 index 0000000..7ce384f --- /dev/null +++ b/web/gui/dashboard/css/bootstrap-slate-flat-3.3.7.css @@ -0,0 +1,7101 @@ +/*! + * bootswatch v3.3.7 + * Homepage: http://bootswatch.com + * Copyright 2012-2016 Thomas Park + * Licensed under MIT + * SPDX-License-Identifier: MIT + * Based on Bootstrap +*/ +/*! + * Bootstrap v3.3.7 (http://getbootstrap.com) + * Copyright 2011-2016 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) + */ +/*! normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css */ +html { + font-family: sans-serif; + -ms-text-size-adjust: 100%; + -webkit-text-size-adjust: 100%; +} +body { + margin: 0; +} +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +menu, +nav, +section, +summary { + display: block; +} +audio, +canvas, +progress, +video { + display: inline-block; + vertical-align: baseline; +} +audio:not([controls]) { + display: none; + height: 0; +} +[hidden], +template { + display: none; +} +a { + background-color: transparent; +} +a:active, +a:hover { + outline: 0; +} +abbr[title] { + border-bottom: 1px dotted; +} +b, +strong { + font-weight: bold; +} +dfn { + font-style: italic; +} +h1 { + font-size: 2em; + margin: 0.67em 0; +} +mark { + background: #ff0; + color: #000; +} +small { + font-size: 80%; +} +sub, +sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} +sup { + top: -0.5em; +} +sub { + bottom: -0.25em; +} +img { + border: 0; +} +svg:not(:root) { + overflow: hidden; +} +figure { + margin: 1em 40px; +} +hr { + -webkit-box-sizing: content-box; + -moz-box-sizing: content-box; + box-sizing: content-box; + height: 0; +} +pre { + overflow: auto; +} +code, +kbd, +pre, +samp { + font-family: monospace, monospace; + font-size: 1em; +} +button, +input, +optgroup, +select, +textarea { + color: inherit; + font: inherit; + margin: 0; +} +button { + overflow: visible; +} +button, +select { + text-transform: none; +} +button, +html input[type="button"], +input[type="reset"], +input[type="submit"] { + -webkit-appearance: button; + cursor: pointer; +} +button[disabled], +html input[disabled] { + cursor: default; +} +button::-moz-focus-inner, +input::-moz-focus-inner { + border: 0; + padding: 0; +} +input { + line-height: normal; +} +input[type="checkbox"], +input[type="radio"] { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; + padding: 0; +} +input[type="number"]::-webkit-inner-spin-button, +input[type="number"]::-webkit-outer-spin-button { + height: auto; +} +input[type="search"] { + -webkit-appearance: textfield; + -webkit-box-sizing: content-box; + -moz-box-sizing: content-box; + box-sizing: content-box; +} +input[type="search"]::-webkit-search-cancel-button, +input[type="search"]::-webkit-search-decoration { + -webkit-appearance: none; +} +fieldset { + border: 1px solid #c0c0c0; + margin: 0 2px; + padding: 0.35em 0.625em 0.75em; +} +legend { + border: 0; + padding: 0; +} +textarea { + overflow: auto; +} +optgroup { + font-weight: bold; +} +table { + border-collapse: collapse; + border-spacing: 0; +} +td, +th { + padding: 0; +} +/*! Source: https://github.com/h5bp/html5-boilerplate/blob/master/src/css/main.css */ +@media print { + *, + *:before, + *:after { + background: transparent !important; + color: #000 !important; + -webkit-box-shadow: none !important; + box-shadow: none !important; + text-shadow: none !important; + } + a, + a:visited { + text-decoration: underline; + } + a[href]:after { + content: " (" attr(href) ")"; + } + abbr[title]:after { + content: " (" attr(title) ")"; + } + a[href^="#"]:after, + a[href^="javascript:"]:after { + content: ""; + } + pre, + blockquote { + border: 1px solid #999; + page-break-inside: avoid; + } + thead { + display: table-header-group; + } + tr, + img { + page-break-inside: avoid; + } + img { + max-width: 100% !important; + } + p, + h2, + h3 { + orphans: 3; + widows: 3; + } + h2, + h3 { + page-break-after: avoid; + } + .navbar { + display: none; + } + .btn > .caret, + .dropup > .btn > .caret { + border-top-color: #000 !important; + } + .label { + border: 1px solid #000; + } + .table { + border-collapse: collapse !important; + } + .table td, + .table th { + background-color: #fff !important; + } + .table-bordered th, + .table-bordered td { + border: 1px solid #ddd !important; + } +} +@font-face { + font-family: 'Glyphicons Halflings'; + src: url('../fonts/glyphicons-halflings-regular.eot'); + src: url('../fonts/glyphicons-halflings-regular.eot?#iefix') format('embedded-opentype'), url('../fonts/glyphicons-halflings-regular.woff2') format('woff2'), url('../fonts/glyphicons-halflings-regular.woff') format('woff'), url('../fonts/glyphicons-halflings-regular.ttf') format('truetype'), url('../fonts/glyphicons-halflings-regular.svg#glyphicons_halflingsregular') format('svg'); +} +.glyphicon { + position: relative; + top: 1px; + display: inline-block; + font-family: 'Glyphicons Halflings'; + font-style: normal; + font-weight: normal; + line-height: 1; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} +.glyphicon-asterisk:before { + content: "\002a"; +} +.glyphicon-plus:before { + content: "\002b"; +} +.glyphicon-euro:before, +.glyphicon-eur:before { + content: "\20ac"; +} +.glyphicon-minus:before { + content: "\2212"; +} +.glyphicon-cloud:before { + content: "\2601"; +} +.glyphicon-envelope:before { + content: "\2709"; +} +.glyphicon-pencil:before { + content: "\270f"; +} +.glyphicon-glass:before { + content: "\e001"; +} +.glyphicon-music:before { + content: "\e002"; +} +.glyphicon-search:before { + content: "\e003"; +} +.glyphicon-heart:before { + content: "\e005"; +} +.glyphicon-star:before { + content: "\e006"; +} +.glyphicon-star-empty:before { + content: "\e007"; +} +.glyphicon-user:before { + content: "\e008"; +} +.glyphicon-film:before { + content: "\e009"; +} +.glyphicon-th-large:before { + content: "\e010"; +} +.glyphicon-th:before { + content: "\e011"; +} +.glyphicon-th-list:before { + content: "\e012"; +} +.glyphicon-ok:before { + content: "\e013"; +} +.glyphicon-remove:before { + content: "\e014"; +} +.glyphicon-zoom-in:before { + content: "\e015"; +} +.glyphicon-zoom-out:before { + content: "\e016"; +} +.glyphicon-off:before { + content: "\e017"; +} +.glyphicon-signal:before { + content: "\e018"; +} +.glyphicon-cog:before { + content: "\e019"; +} +.glyphicon-trash:before { + content: "\e020"; +} +.glyphicon-home:before { + content: "\e021"; +} +.glyphicon-file:before { + content: "\e022"; +} +.glyphicon-time:before { + content: "\e023"; +} +.glyphicon-road:before { + content: "\e024"; +} +.glyphicon-download-alt:before { + content: "\e025"; +} +.glyphicon-download:before { + content: "\e026"; +} +.glyphicon-upload:before { + content: "\e027"; +} +.glyphicon-inbox:before { + content: "\e028"; +} +.glyphicon-play-circle:before { + content: "\e029"; +} +.glyphicon-repeat:before { + content: "\e030"; +} +.glyphicon-refresh:before { + content: "\e031"; +} +.glyphicon-list-alt:before { + content: "\e032"; +} +.glyphicon-lock:before { + content: "\e033"; +} +.glyphicon-flag:before { + content: "\e034"; +} +.glyphicon-headphones:before { + content: "\e035"; +} +.glyphicon-volume-off:before { + content: "\e036"; +} +.glyphicon-volume-down:before { + content: "\e037"; +} +.glyphicon-volume-up:before { + content: "\e038"; +} +.glyphicon-qrcode:before { + content: "\e039"; +} +.glyphicon-barcode:before { + content: "\e040"; +} +.glyphicon-tag:before { + content: "\e041"; +} +.glyphicon-tags:before { + content: "\e042"; +} +.glyphicon-book:before { + content: "\e043"; +} +.glyphicon-bookmark:before { + content: "\e044"; +} +.glyphicon-print:before { + content: "\e045"; +} +.glyphicon-camera:before { + content: "\e046"; +} +.glyphicon-font:before { + content: "\e047"; +} +.glyphicon-bold:before { + content: "\e048"; +} +.glyphicon-italic:before { + content: "\e049"; +} +.glyphicon-text-height:before { + content: "\e050"; +} +.glyphicon-text-width:before { + content: "\e051"; +} +.glyphicon-align-left:before { + content: "\e052"; +} +.glyphicon-align-center:before { + content: "\e053"; +} +.glyphicon-align-right:before { + content: "\e054"; +} +.glyphicon-align-justify:before { + content: "\e055"; +} +.glyphicon-list:before { + content: "\e056"; +} +.glyphicon-indent-left:before { + content: "\e057"; +} +.glyphicon-indent-right:before { + content: "\e058"; +} +.glyphicon-facetime-video:before { + content: "\e059"; +} +.glyphicon-picture:before { + content: "\e060"; +} +.glyphicon-map-marker:before { + content: "\e062"; +} +.glyphicon-adjust:before { + content: "\e063"; +} +.glyphicon-tint:before { + content: "\e064"; +} +.glyphicon-edit:before { + content: "\e065"; +} +.glyphicon-share:before { + content: "\e066"; +} +.glyphicon-check:before { + content: "\e067"; +} +.glyphicon-move:before { + content: "\e068"; +} +.glyphicon-step-backward:before { + content: "\e069"; +} +.glyphicon-fast-backward:before { + content: "\e070"; +} +.glyphicon-backward:before { + content: "\e071"; +} +.glyphicon-play:before { + content: "\e072"; +} +.glyphicon-pause:before { + content: "\e073"; +} +.glyphicon-stop:before { + content: "\e074"; +} +.glyphicon-forward:before { + content: "\e075"; +} +.glyphicon-fast-forward:before { + content: "\e076"; +} +.glyphicon-step-forward:before { + content: "\e077"; +} +.glyphicon-eject:before { + content: "\e078"; +} +.glyphicon-chevron-left:before { + content: "\e079"; +} +.glyphicon-chevron-right:before { + content: "\e080"; +} +.glyphicon-plus-sign:before { + content: "\e081"; +} +.glyphicon-minus-sign:before { + content: "\e082"; +} +.glyphicon-remove-sign:before { + content: "\e083"; +} +.glyphicon-ok-sign:before { + content: "\e084"; +} +.glyphicon-question-sign:before { + content: "\e085"; +} +.glyphicon-info-sign:before { + content: "\e086"; +} +.glyphicon-screenshot:before { + content: "\e087"; +} +.glyphicon-remove-circle:before { + content: "\e088"; +} +.glyphicon-ok-circle:before { + content: "\e089"; +} +.glyphicon-ban-circle:before { + content: "\e090"; +} +.glyphicon-arrow-left:before { + content: "\e091"; +} +.glyphicon-arrow-right:before { + content: "\e092"; +} +.glyphicon-arrow-up:before { + content: "\e093"; +} +.glyphicon-arrow-down:before { + content: "\e094"; +} +.glyphicon-share-alt:before { + content: "\e095"; +} +.glyphicon-resize-full:before { + content: "\e096"; +} +.glyphicon-resize-small:before { + content: "\e097"; +} +.glyphicon-exclamation-sign:before { + content: "\e101"; +} +.glyphicon-gift:before { + content: "\e102"; +} +.glyphicon-leaf:before { + content: "\e103"; +} +.glyphicon-fire:before { + content: "\e104"; +} +.glyphicon-eye-open:before { + content: "\e105"; +} +.glyphicon-eye-close:before { + content: "\e106"; +} +.glyphicon-warning-sign:before { + content: "\e107"; +} +.glyphicon-plane:before { + content: "\e108"; +} +.glyphicon-calendar:before { + content: "\e109"; +} +.glyphicon-random:before { + content: "\e110"; +} +.glyphicon-comment:before { + content: "\e111"; +} +.glyphicon-magnet:before { + content: "\e112"; +} +.glyphicon-chevron-up:before { + content: "\e113"; +} +.glyphicon-chevron-down:before { + content: "\e114"; +} +.glyphicon-retweet:before { + content: "\e115"; +} +.glyphicon-shopping-cart:before { + content: "\e116"; +} +.glyphicon-folder-close:before { + content: "\e117"; +} +.glyphicon-folder-open:before { + content: "\e118"; +} +.glyphicon-resize-vertical:before { + content: "\e119"; +} +.glyphicon-resize-horizontal:before { + content: "\e120"; +} +.glyphicon-hdd:before { + content: "\e121"; +} +.glyphicon-bullhorn:before { + content: "\e122"; +} +.glyphicon-bell:before { + content: "\e123"; +} +.glyphicon-certificate:before { + content: "\e124"; +} +.glyphicon-thumbs-up:before { + content: "\e125"; +} +.glyphicon-thumbs-down:before { + content: "\e126"; +} +.glyphicon-hand-right:before { + content: "\e127"; +} +.glyphicon-hand-left:before { + content: "\e128"; +} +.glyphicon-hand-up:before { + content: "\e129"; +} +.glyphicon-hand-down:before { + content: "\e130"; +} +.glyphicon-circle-arrow-right:before { + content: "\e131"; +} +.glyphicon-circle-arrow-left:before { + content: "\e132"; +} +.glyphicon-circle-arrow-up:before { + content: "\e133"; +} +.glyphicon-circle-arrow-down:before { + content: "\e134"; +} +.glyphicon-globe:before { + content: "\e135"; +} +.glyphicon-wrench:before { + content: "\e136"; +} +.glyphicon-tasks:before { + content: "\e137"; +} +.glyphicon-filter:before { + content: "\e138"; +} +.glyphicon-briefcase:before { + content: "\e139"; +} +.glyphicon-fullscreen:before { + content: "\e140"; +} +.glyphicon-dashboard:before { + content: "\e141"; +} +.glyphicon-paperclip:before { + content: "\e142"; +} +.glyphicon-heart-empty:before { + content: "\e143"; +} +.glyphicon-link:before { + content: "\e144"; +} +.glyphicon-phone:before { + content: "\e145"; +} +.glyphicon-pushpin:before { + content: "\e146"; +} +.glyphicon-usd:before { + content: "\e148"; +} +.glyphicon-gbp:before { + content: "\e149"; +} +.glyphicon-sort:before { + content: "\e150"; +} +.glyphicon-sort-by-alphabet:before { + content: "\e151"; +} +.glyphicon-sort-by-alphabet-alt:before { + content: "\e152"; +} +.glyphicon-sort-by-order:before { + content: "\e153"; +} +.glyphicon-sort-by-order-alt:before { + content: "\e154"; +} +.glyphicon-sort-by-attributes:before { + content: "\e155"; +} +.glyphicon-sort-by-attributes-alt:before { + content: "\e156"; +} +.glyphicon-unchecked:before { + content: "\e157"; +} +.glyphicon-expand:before { + content: "\e158"; +} +.glyphicon-collapse-down:before { + content: "\e159"; +} +.glyphicon-collapse-up:before { + content: "\e160"; +} +.glyphicon-log-in:before { + content: "\e161"; +} +.glyphicon-flash:before { + content: "\e162"; +} +.glyphicon-log-out:before { + content: "\e163"; +} +.glyphicon-new-window:before { + content: "\e164"; +} +.glyphicon-record:before { + content: "\e165"; +} +.glyphicon-save:before { + content: "\e166"; +} +.glyphicon-open:before { + content: "\e167"; +} +.glyphicon-saved:before { + content: "\e168"; +} +.glyphicon-import:before { + content: "\e169"; +} +.glyphicon-export:before { + content: "\e170"; +} +.glyphicon-send:before { + content: "\e171"; +} +.glyphicon-floppy-disk:before { + content: "\e172"; +} +.glyphicon-floppy-saved:before { + content: "\e173"; +} +.glyphicon-floppy-remove:before { + content: "\e174"; +} +.glyphicon-floppy-save:before { + content: "\e175"; +} +.glyphicon-floppy-open:before { + content: "\e176"; +} +.glyphicon-credit-card:before { + content: "\e177"; +} +.glyphicon-transfer:before { + content: "\e178"; +} +.glyphicon-cutlery:before { + content: "\e179"; +} +.glyphicon-header:before { + content: "\e180"; +} +.glyphicon-compressed:before { + content: "\e181"; +} +.glyphicon-earphone:before { + content: "\e182"; +} +.glyphicon-phone-alt:before { + content: "\e183"; +} +.glyphicon-tower:before { + content: "\e184"; +} +.glyphicon-stats:before { + content: "\e185"; +} +.glyphicon-sd-video:before { + content: "\e186"; +} +.glyphicon-hd-video:before { + content: "\e187"; +} +.glyphicon-subtitles:before { + content: "\e188"; +} +.glyphicon-sound-stereo:before { + content: "\e189"; +} +.glyphicon-sound-dolby:before { + content: "\e190"; +} +.glyphicon-sound-5-1:before { + content: "\e191"; +} +.glyphicon-sound-6-1:before { + content: "\e192"; +} +.glyphicon-sound-7-1:before { + content: "\e193"; +} +.glyphicon-copyright-mark:before { + content: "\e194"; +} +.glyphicon-registration-mark:before { + content: "\e195"; +} +.glyphicon-cloud-download:before { + content: "\e197"; +} +.glyphicon-cloud-upload:before { + content: "\e198"; +} +.glyphicon-tree-conifer:before { + content: "\e199"; +} +.glyphicon-tree-deciduous:before { + content: "\e200"; +} +.glyphicon-cd:before { + content: "\e201"; +} +.glyphicon-save-file:before { + content: "\e202"; +} +.glyphicon-open-file:before { + content: "\e203"; +} +.glyphicon-level-up:before { + content: "\e204"; +} +.glyphicon-copy:before { + content: "\e205"; +} +.glyphicon-paste:before { + content: "\e206"; +} +.glyphicon-alert:before { + content: "\e209"; +} +.glyphicon-equalizer:before { + content: "\e210"; +} +.glyphicon-king:before { + content: "\e211"; +} +.glyphicon-queen:before { + content: "\e212"; +} +.glyphicon-pawn:before { + content: "\e213"; +} +.glyphicon-bishop:before { + content: "\e214"; +} +.glyphicon-knight:before { + content: "\e215"; +} +.glyphicon-baby-formula:before { + content: "\e216"; +} +.glyphicon-tent:before { + content: "\26fa"; +} +.glyphicon-blackboard:before { + content: "\e218"; +} +.glyphicon-bed:before { + content: "\e219"; +} +.glyphicon-apple:before { + content: "\f8ff"; +} +.glyphicon-erase:before { + content: "\e221"; +} +.glyphicon-hourglass:before { + content: "\231b"; +} +.glyphicon-lamp:before { + content: "\e223"; +} +.glyphicon-duplicate:before { + content: "\e224"; +} +.glyphicon-piggy-bank:before { + content: "\e225"; +} +.glyphicon-scissors:before { + content: "\e226"; +} +.glyphicon-bitcoin:before { + content: "\e227"; +} +.glyphicon-btc:before { + content: "\e227"; +} +.glyphicon-xbt:before { + content: "\e227"; +} +.glyphicon-yen:before { + content: "\00a5"; +} +.glyphicon-jpy:before { + content: "\00a5"; +} +.glyphicon-ruble:before { + content: "\20bd"; +} +.glyphicon-rub:before { + content: "\20bd"; +} +.glyphicon-scale:before { + content: "\e230"; +} +.glyphicon-ice-lolly:before { + content: "\e231"; +} +.glyphicon-ice-lolly-tasted:before { + content: "\e232"; +} +.glyphicon-education:before { + content: "\e233"; +} +.glyphicon-option-horizontal:before { + content: "\e234"; +} +.glyphicon-option-vertical:before { + content: "\e235"; +} +.glyphicon-menu-hamburger:before { + content: "\e236"; +} +.glyphicon-modal-window:before { + content: "\e237"; +} +.glyphicon-oil:before { + content: "\e238"; +} +.glyphicon-grain:before { + content: "\e239"; +} +.glyphicon-sunglasses:before { + content: "\e240"; +} +.glyphicon-text-size:before { + content: "\e241"; +} +.glyphicon-text-color:before { + content: "\e242"; +} +.glyphicon-text-background:before { + content: "\e243"; +} +.glyphicon-object-align-top:before { + content: "\e244"; +} +.glyphicon-object-align-bottom:before { + content: "\e245"; +} +.glyphicon-object-align-horizontal:before { + content: "\e246"; +} +.glyphicon-object-align-left:before { + content: "\e247"; +} +.glyphicon-object-align-vertical:before { + content: "\e248"; +} +.glyphicon-object-align-right:before { + content: "\e249"; +} +.glyphicon-triangle-right:before { + content: "\e250"; +} +.glyphicon-triangle-left:before { + content: "\e251"; +} +.glyphicon-triangle-bottom:before { + content: "\e252"; +} +.glyphicon-triangle-top:before { + content: "\e253"; +} +.glyphicon-console:before { + content: "\e254"; +} +.glyphicon-superscript:before { + content: "\e255"; +} +.glyphicon-subscript:before { + content: "\e256"; +} +.glyphicon-menu-left:before { + content: "\e257"; +} +.glyphicon-menu-right:before { + content: "\e258"; +} +.glyphicon-menu-down:before { + content: "\e259"; +} +.glyphicon-menu-up:before { + content: "\e260"; +} +* { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +*:before, +*:after { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +html { + font-size: 10px; + -webkit-tap-highlight-color: rgba(0, 0, 0, 0); +} +body { + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 14px; + line-height: 1.42857143; + color: #c8c8c8; + background-color: #272b30; +} +input, +button, +select, +textarea { + font-family: inherit; + font-size: inherit; + line-height: inherit; +} +a { + color: #ffffff; + text-decoration: none; +} +a:hover, +a:focus { + color: #ffffff; + text-decoration: underline; +} +a:focus { + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +figure { + margin: 0; +} +img { + vertical-align: middle; +} +.img-responsive, +.thumbnail > img, +.thumbnail a > img, +.carousel-inner > .item > img, +.carousel-inner > .item > a > img { + display: block; + max-width: 100%; + height: auto; +} +.img-rounded { + border-radius: 6px; +} +.img-thumbnail { + padding: 4px; + line-height: 1.42857143; + background-color: #1c1e22; + border: 1px solid #0c0d0e; + border-radius: 4px; + -webkit-transition: all 0.2s ease-in-out; + -o-transition: all 0.2s ease-in-out; + transition: all 0.2s ease-in-out; + display: inline-block; + max-width: 100%; + height: auto; +} +.img-circle { + border-radius: 50%; +} +hr { + margin-top: 20px; + margin-bottom: 20px; + border: 0; + border-top: 1px solid #1c1e22; +} +.sr-only { + position: absolute; + width: 1px; + height: 1px; + margin: -1px; + padding: 0; + overflow: hidden; + clip: rect(0, 0, 0, 0); + border: 0; +} +.sr-only-focusable:active, +.sr-only-focusable:focus { + position: static; + width: auto; + height: auto; + margin: 0; + overflow: visible; + clip: auto; +} +[role="button"] { + cursor: pointer; +} +h1, +h2, +h3, +h4, +h5, +h6, +.h1, +.h2, +.h3, +.h4, +.h5, +.h6 { + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-weight: 500; + line-height: 1.1; + color: inherit; +} +h1 small, +h2 small, +h3 small, +h4 small, +h5 small, +h6 small, +.h1 small, +.h2 small, +.h3 small, +.h4 small, +.h5 small, +.h6 small, +h1 .small, +h2 .small, +h3 .small, +h4 .small, +h5 .small, +h6 .small, +.h1 .small, +.h2 .small, +.h3 .small, +.h4 .small, +.h5 .small, +.h6 .small { + font-weight: normal; + line-height: 1; + color: #7a8288; +} +h1, +.h1, +h2, +.h2, +h3, +.h3 { + margin-top: 20px; + margin-bottom: 10px; +} +h1 small, +.h1 small, +h2 small, +.h2 small, +h3 small, +.h3 small, +h1 .small, +.h1 .small, +h2 .small, +.h2 .small, +h3 .small, +.h3 .small { + font-size: 65%; +} +h4, +.h4, +h5, +.h5, +h6, +.h6 { + margin-top: 10px; + margin-bottom: 10px; +} +h4 small, +.h4 small, +h5 small, +.h5 small, +h6 small, +.h6 small, +h4 .small, +.h4 .small, +h5 .small, +.h5 .small, +h6 .small, +.h6 .small { + font-size: 75%; +} +h1, +.h1 { + font-size: 36px; +} +h2, +.h2 { + font-size: 30px; +} +h3, +.h3 { + font-size: 24px; +} +h4, +.h4 { + font-size: 18px; +} +h5, +.h5 { + font-size: 14px; +} +h6, +.h6 { + font-size: 12px; +} +p { + margin: 0 0 10px; +} +.lead { + margin-bottom: 20px; + font-size: 16px; + font-weight: 300; + line-height: 1.4; +} +@media (min-width: 768px) { + .lead { + font-size: 21px; + } +} +small, +.small { + font-size: 85%; +} +mark, +.mark { + background-color: #f89406; + padding: .2em; +} +.text-left { + text-align: left; +} +.text-right { + text-align: right; +} +.text-center { + text-align: center; +} +.text-justify { + text-align: justify; +} +.text-nowrap { + white-space: nowrap; +} +.text-lowercase { + text-transform: lowercase; +} +.text-uppercase { + text-transform: uppercase; +} +.text-capitalize { + text-transform: capitalize; +} +.text-muted { + color: #7a8288; +} +.text-primary { + color: #7a8288; +} +a.text-primary:hover, +a.text-primary:focus { + color: #62686d; +} +.text-success { + color: #ffffff; +} +a.text-success:hover, +a.text-success:focus { + color: #e6e6e6; +} +.text-info { + color: #ffffff; +} +a.text-info:hover, +a.text-info:focus { + color: #e6e6e6; +} +.text-warning { + color: #ffffff; +} +a.text-warning:hover, +a.text-warning:focus { + color: #e6e6e6; +} +.text-danger { + color: #ffffff; +} +a.text-danger:hover, +a.text-danger:focus { + color: #e6e6e6; +} +.bg-primary { + color: #fff; + background-color: #7a8288; +} +a.bg-primary:hover, +a.bg-primary:focus { + background-color: #62686d; +} +.bg-success { + background-color: #62c462; +} +a.bg-success:hover, +a.bg-success:focus { + background-color: #42b142; +} +.bg-info { + background-color: #5bc0de; +} +a.bg-info:hover, +a.bg-info:focus { + background-color: #31b0d5; +} +.bg-warning { + background-color: #f89406; +} +a.bg-warning:hover, +a.bg-warning:focus { + background-color: #c67605; +} +.bg-danger { + background-color: #ee5f5b; +} +a.bg-danger:hover, +a.bg-danger:focus { + background-color: #e9322d; +} +.page-header { + padding-bottom: 9px; + margin: 40px 0 20px; + border-bottom: 1px solid #1c1e22; +} +ul, +ol { + margin-top: 0; + margin-bottom: 10px; +} +ul ul, +ol ul, +ul ol, +ol ol { + margin-bottom: 0; +} +.list-unstyled { + padding-left: 0; + list-style: none; +} +.list-inline { + padding-left: 0; + list-style: none; + margin-left: -5px; +} +.list-inline > li { + display: inline-block; + padding-left: 5px; + padding-right: 5px; +} +dl { + margin-top: 0; + margin-bottom: 20px; +} +dt, +dd { + line-height: 1.42857143; +} +dt { + font-weight: bold; +} +dd { + margin-left: 0; +} +@media (min-width: 768px) { + .dl-horizontal dt { + float: left; + width: 160px; + clear: left; + text-align: right; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .dl-horizontal dd { + margin-left: 180px; + } +} +abbr[title], +abbr[data-original-title] { + cursor: help; + border-bottom: 1px dotted #7a8288; +} +.initialism { + font-size: 90%; + text-transform: uppercase; +} +blockquote { + padding: 10px 20px; + margin: 0 0 20px; + font-size: 17.5px; + border-left: 5px solid #7a8288; +} +blockquote p:last-child, +blockquote ul:last-child, +blockquote ol:last-child { + margin-bottom: 0; +} +blockquote footer, +blockquote small, +blockquote .small { + display: block; + font-size: 80%; + line-height: 1.42857143; + color: #7a8288; +} +blockquote footer:before, +blockquote small:before, +blockquote .small:before { + content: '\2014 \00A0'; +} +.blockquote-reverse, +blockquote.pull-right { + padding-right: 15px; + padding-left: 0; + border-right: 5px solid #7a8288; + border-left: 0; + text-align: right; +} +.blockquote-reverse footer:before, +blockquote.pull-right footer:before, +.blockquote-reverse small:before, +blockquote.pull-right small:before, +.blockquote-reverse .small:before, +blockquote.pull-right .small:before { + content: ''; +} +.blockquote-reverse footer:after, +blockquote.pull-right footer:after, +.blockquote-reverse small:after, +blockquote.pull-right small:after, +.blockquote-reverse .small:after, +blockquote.pull-right .small:after { + content: '\00A0 \2014'; +} +address { + margin-bottom: 20px; + font-style: normal; + line-height: 1.42857143; +} +code, +kbd, +pre, +samp { + font-family: Menlo, Monaco, Consolas, "Courier New", monospace; +} +code { + padding: 2px 4px; + font-size: 90%; + color: #c7254e; + background-color: #f9f2f4; + border-radius: 4px; +} +kbd { + padding: 2px 4px; + font-size: 90%; + color: #ffffff; + background-color: #333333; + border-radius: 3px; + -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.25); + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.25); +} +kbd kbd { + padding: 0; + font-size: 100%; + font-weight: bold; + -webkit-box-shadow: none; + box-shadow: none; +} +pre { + display: block; + padding: 9.5px; + margin: 0 0 10px; + font-size: 13px; + line-height: 1.42857143; + word-break: break-all; + word-wrap: break-word; + color: #3a3f44; + background-color: #f5f5f5; + border: 1px solid #cccccc; + border-radius: 4px; +} +pre code { + padding: 0; + font-size: inherit; + color: inherit; + white-space: pre-wrap; + background-color: transparent; + border-radius: 0; +} +.pre-scrollable { + max-height: 340px; + overflow-y: scroll; +} +.container { + margin-right: auto; + margin-left: auto; + padding-left: 15px; + padding-right: 15px; +} +@media (min-width: 768px) { + .container { + width: 750px; + } +} +@media (min-width: 992px) { + .container { + width: 970px; + } +} +@media (min-width: 1200px) { + .container { + width: 1170px; + } +} +.container-fluid { + margin-right: auto; + margin-left: auto; + padding-left: 15px; + padding-right: 15px; +} +.row { + margin-left: -15px; + margin-right: -15px; +} +.col-xs-1, .col-sm-1, .col-md-1, .col-lg-1, .col-xs-2, .col-sm-2, .col-md-2, .col-lg-2, .col-xs-3, .col-sm-3, .col-md-3, .col-lg-3, .col-xs-4, .col-sm-4, .col-md-4, .col-lg-4, .col-xs-5, .col-sm-5, .col-md-5, .col-lg-5, .col-xs-6, .col-sm-6, .col-md-6, .col-lg-6, .col-xs-7, .col-sm-7, .col-md-7, .col-lg-7, .col-xs-8, .col-sm-8, .col-md-8, .col-lg-8, .col-xs-9, .col-sm-9, .col-md-9, .col-lg-9, .col-xs-10, .col-sm-10, .col-md-10, .col-lg-10, .col-xs-11, .col-sm-11, .col-md-11, .col-lg-11, .col-xs-12, .col-sm-12, .col-md-12, .col-lg-12 { + position: relative; + min-height: 1px; + padding-left: 15px; + padding-right: 15px; +} +.col-xs-1, .col-xs-2, .col-xs-3, .col-xs-4, .col-xs-5, .col-xs-6, .col-xs-7, .col-xs-8, .col-xs-9, .col-xs-10, .col-xs-11, .col-xs-12 { + float: left; +} +.col-xs-12 { + width: 100%; +} +.col-xs-11 { + width: 91.66666667%; +} +.col-xs-10 { + width: 83.33333333%; +} +.col-xs-9 { + width: 75%; +} +.col-xs-8 { + width: 66.66666667%; +} +.col-xs-7 { + width: 58.33333333%; +} +.col-xs-6 { + width: 50%; +} +.col-xs-5 { + width: 41.66666667%; +} +.col-xs-4 { + width: 33.33333333%; +} +.col-xs-3 { + width: 25%; +} +.col-xs-2 { + width: 16.66666667%; +} +.col-xs-1 { + width: 8.33333333%; +} +.col-xs-pull-12 { + right: 100%; +} +.col-xs-pull-11 { + right: 91.66666667%; +} +.col-xs-pull-10 { + right: 83.33333333%; +} +.col-xs-pull-9 { + right: 75%; +} +.col-xs-pull-8 { + right: 66.66666667%; +} +.col-xs-pull-7 { + right: 58.33333333%; +} +.col-xs-pull-6 { + right: 50%; +} +.col-xs-pull-5 { + right: 41.66666667%; +} +.col-xs-pull-4 { + right: 33.33333333%; +} +.col-xs-pull-3 { + right: 25%; +} +.col-xs-pull-2 { + right: 16.66666667%; +} +.col-xs-pull-1 { + right: 8.33333333%; +} +.col-xs-pull-0 { + right: auto; +} +.col-xs-push-12 { + left: 100%; +} +.col-xs-push-11 { + left: 91.66666667%; +} +.col-xs-push-10 { + left: 83.33333333%; +} +.col-xs-push-9 { + left: 75%; +} +.col-xs-push-8 { + left: 66.66666667%; +} +.col-xs-push-7 { + left: 58.33333333%; +} +.col-xs-push-6 { + left: 50%; +} +.col-xs-push-5 { + left: 41.66666667%; +} +.col-xs-push-4 { + left: 33.33333333%; +} +.col-xs-push-3 { + left: 25%; +} +.col-xs-push-2 { + left: 16.66666667%; +} +.col-xs-push-1 { + left: 8.33333333%; +} +.col-xs-push-0 { + left: auto; +} +.col-xs-offset-12 { + margin-left: 100%; +} +.col-xs-offset-11 { + margin-left: 91.66666667%; +} +.col-xs-offset-10 { + margin-left: 83.33333333%; +} +.col-xs-offset-9 { + margin-left: 75%; +} +.col-xs-offset-8 { + margin-left: 66.66666667%; +} +.col-xs-offset-7 { + margin-left: 58.33333333%; +} +.col-xs-offset-6 { + margin-left: 50%; +} +.col-xs-offset-5 { + margin-left: 41.66666667%; +} +.col-xs-offset-4 { + margin-left: 33.33333333%; +} +.col-xs-offset-3 { + margin-left: 25%; +} +.col-xs-offset-2 { + margin-left: 16.66666667%; +} +.col-xs-offset-1 { + margin-left: 8.33333333%; +} +.col-xs-offset-0 { + margin-left: 0%; +} +@media (min-width: 768px) { + .col-sm-1, .col-sm-2, .col-sm-3, .col-sm-4, .col-sm-5, .col-sm-6, .col-sm-7, .col-sm-8, .col-sm-9, .col-sm-10, .col-sm-11, .col-sm-12 { + float: left; + } + .col-sm-12 { + width: 100%; + } + .col-sm-11 { + width: 91.66666667%; + } + .col-sm-10 { + width: 83.33333333%; + } + .col-sm-9 { + width: 75%; + } + .col-sm-8 { + width: 66.66666667%; + } + .col-sm-7 { + width: 58.33333333%; + } + .col-sm-6 { + width: 50%; + } + .col-sm-5 { + width: 41.66666667%; + } + .col-sm-4 { + width: 33.33333333%; + } + .col-sm-3 { + width: 25%; + } + .col-sm-2 { + width: 16.66666667%; + } + .col-sm-1 { + width: 8.33333333%; + } + .col-sm-pull-12 { + right: 100%; + } + .col-sm-pull-11 { + right: 91.66666667%; + } + .col-sm-pull-10 { + right: 83.33333333%; + } + .col-sm-pull-9 { + right: 75%; + } + .col-sm-pull-8 { + right: 66.66666667%; + } + .col-sm-pull-7 { + right: 58.33333333%; + } + .col-sm-pull-6 { + right: 50%; + } + .col-sm-pull-5 { + right: 41.66666667%; + } + .col-sm-pull-4 { + right: 33.33333333%; + } + .col-sm-pull-3 { + right: 25%; + } + .col-sm-pull-2 { + right: 16.66666667%; + } + .col-sm-pull-1 { + right: 8.33333333%; + } + .col-sm-pull-0 { + right: auto; + } + .col-sm-push-12 { + left: 100%; + } + .col-sm-push-11 { + left: 91.66666667%; + } + .col-sm-push-10 { + left: 83.33333333%; + } + .col-sm-push-9 { + left: 75%; + } + .col-sm-push-8 { + left: 66.66666667%; + } + .col-sm-push-7 { + left: 58.33333333%; + } + .col-sm-push-6 { + left: 50%; + } + .col-sm-push-5 { + left: 41.66666667%; + } + .col-sm-push-4 { + left: 33.33333333%; + } + .col-sm-push-3 { + left: 25%; + } + .col-sm-push-2 { + left: 16.66666667%; + } + .col-sm-push-1 { + left: 8.33333333%; + } + .col-sm-push-0 { + left: auto; + } + .col-sm-offset-12 { + margin-left: 100%; + } + .col-sm-offset-11 { + margin-left: 91.66666667%; + } + .col-sm-offset-10 { + margin-left: 83.33333333%; + } + .col-sm-offset-9 { + margin-left: 75%; + } + .col-sm-offset-8 { + margin-left: 66.66666667%; + } + .col-sm-offset-7 { + margin-left: 58.33333333%; + } + .col-sm-offset-6 { + margin-left: 50%; + } + .col-sm-offset-5 { + margin-left: 41.66666667%; + } + .col-sm-offset-4 { + margin-left: 33.33333333%; + } + .col-sm-offset-3 { + margin-left: 25%; + } + .col-sm-offset-2 { + margin-left: 16.66666667%; + } + .col-sm-offset-1 { + margin-left: 8.33333333%; + } + .col-sm-offset-0 { + margin-left: 0%; + } +} +@media (min-width: 992px) { + .col-md-1, .col-md-2, .col-md-3, .col-md-4, .col-md-5, .col-md-6, .col-md-7, .col-md-8, .col-md-9, .col-md-10, .col-md-11, .col-md-12 { + float: left; + } + .col-md-12 { + width: 100%; + } + .col-md-11 { + width: 91.66666667%; + } + .col-md-10 { + width: 83.33333333%; + } + .col-md-9 { + width: 75%; + } + .col-md-8 { + width: 66.66666667%; + } + .col-md-7 { + width: 58.33333333%; + } + .col-md-6 { + width: 50%; + } + .col-md-5 { + width: 41.66666667%; + } + .col-md-4 { + width: 33.33333333%; + } + .col-md-3 { + width: 25%; + } + .col-md-2 { + width: 16.66666667%; + } + .col-md-1 { + width: 8.33333333%; + } + .col-md-pull-12 { + right: 100%; + } + .col-md-pull-11 { + right: 91.66666667%; + } + .col-md-pull-10 { + right: 83.33333333%; + } + .col-md-pull-9 { + right: 75%; + } + .col-md-pull-8 { + right: 66.66666667%; + } + .col-md-pull-7 { + right: 58.33333333%; + } + .col-md-pull-6 { + right: 50%; + } + .col-md-pull-5 { + right: 41.66666667%; + } + .col-md-pull-4 { + right: 33.33333333%; + } + .col-md-pull-3 { + right: 25%; + } + .col-md-pull-2 { + right: 16.66666667%; + } + .col-md-pull-1 { + right: 8.33333333%; + } + .col-md-pull-0 { + right: auto; + } + .col-md-push-12 { + left: 100%; + } + .col-md-push-11 { + left: 91.66666667%; + } + .col-md-push-10 { + left: 83.33333333%; + } + .col-md-push-9 { + left: 75%; + } + .col-md-push-8 { + left: 66.66666667%; + } + .col-md-push-7 { + left: 58.33333333%; + } + .col-md-push-6 { + left: 50%; + } + .col-md-push-5 { + left: 41.66666667%; + } + .col-md-push-4 { + left: 33.33333333%; + } + .col-md-push-3 { + left: 25%; + } + .col-md-push-2 { + left: 16.66666667%; + } + .col-md-push-1 { + left: 8.33333333%; + } + .col-md-push-0 { + left: auto; + } + .col-md-offset-12 { + margin-left: 100%; + } + .col-md-offset-11 { + margin-left: 91.66666667%; + } + .col-md-offset-10 { + margin-left: 83.33333333%; + } + .col-md-offset-9 { + margin-left: 75%; + } + .col-md-offset-8 { + margin-left: 66.66666667%; + } + .col-md-offset-7 { + margin-left: 58.33333333%; + } + .col-md-offset-6 { + margin-left: 50%; + } + .col-md-offset-5 { + margin-left: 41.66666667%; + } + .col-md-offset-4 { + margin-left: 33.33333333%; + } + .col-md-offset-3 { + margin-left: 25%; + } + .col-md-offset-2 { + margin-left: 16.66666667%; + } + .col-md-offset-1 { + margin-left: 8.33333333%; + } + .col-md-offset-0 { + margin-left: 0%; + } +} +@media (min-width: 1200px) { + .col-lg-1, .col-lg-2, .col-lg-3, .col-lg-4, .col-lg-5, .col-lg-6, .col-lg-7, .col-lg-8, .col-lg-9, .col-lg-10, .col-lg-11, .col-lg-12 { + float: left; + } + .col-lg-12 { + width: 100%; + } + .col-lg-11 { + width: 91.66666667%; + } + .col-lg-10 { + width: 83.33333333%; + } + .col-lg-9 { + width: 75%; + } + .col-lg-8 { + width: 66.66666667%; + } + .col-lg-7 { + width: 58.33333333%; + } + .col-lg-6 { + width: 50%; + } + .col-lg-5 { + width: 41.66666667%; + } + .col-lg-4 { + width: 33.33333333%; + } + .col-lg-3 { + width: 25%; + } + .col-lg-2 { + width: 16.66666667%; + } + .col-lg-1 { + width: 8.33333333%; + } + .col-lg-pull-12 { + right: 100%; + } + .col-lg-pull-11 { + right: 91.66666667%; + } + .col-lg-pull-10 { + right: 83.33333333%; + } + .col-lg-pull-9 { + right: 75%; + } + .col-lg-pull-8 { + right: 66.66666667%; + } + .col-lg-pull-7 { + right: 58.33333333%; + } + .col-lg-pull-6 { + right: 50%; + } + .col-lg-pull-5 { + right: 41.66666667%; + } + .col-lg-pull-4 { + right: 33.33333333%; + } + .col-lg-pull-3 { + right: 25%; + } + .col-lg-pull-2 { + right: 16.66666667%; + } + .col-lg-pull-1 { + right: 8.33333333%; + } + .col-lg-pull-0 { + right: auto; + } + .col-lg-push-12 { + left: 100%; + } + .col-lg-push-11 { + left: 91.66666667%; + } + .col-lg-push-10 { + left: 83.33333333%; + } + .col-lg-push-9 { + left: 75%; + } + .col-lg-push-8 { + left: 66.66666667%; + } + .col-lg-push-7 { + left: 58.33333333%; + } + .col-lg-push-6 { + left: 50%; + } + .col-lg-push-5 { + left: 41.66666667%; + } + .col-lg-push-4 { + left: 33.33333333%; + } + .col-lg-push-3 { + left: 25%; + } + .col-lg-push-2 { + left: 16.66666667%; + } + .col-lg-push-1 { + left: 8.33333333%; + } + .col-lg-push-0 { + left: auto; + } + .col-lg-offset-12 { + margin-left: 100%; + } + .col-lg-offset-11 { + margin-left: 91.66666667%; + } + .col-lg-offset-10 { + margin-left: 83.33333333%; + } + .col-lg-offset-9 { + margin-left: 75%; + } + .col-lg-offset-8 { + margin-left: 66.66666667%; + } + .col-lg-offset-7 { + margin-left: 58.33333333%; + } + .col-lg-offset-6 { + margin-left: 50%; + } + .col-lg-offset-5 { + margin-left: 41.66666667%; + } + .col-lg-offset-4 { + margin-left: 33.33333333%; + } + .col-lg-offset-3 { + margin-left: 25%; + } + .col-lg-offset-2 { + margin-left: 16.66666667%; + } + .col-lg-offset-1 { + margin-left: 8.33333333%; + } + .col-lg-offset-0 { + margin-left: 0%; + } +} +table { + background-color: #2e3338; +} +caption { + padding-top: 8px; + padding-bottom: 8px; + color: #7a8288; + text-align: left; +} +th { + text-align: left; +} +.table { + width: 100%; + max-width: 100%; + margin-bottom: 20px; +} +.table > thead > tr > th, +.table > tbody > tr > th, +.table > tfoot > tr > th, +.table > thead > tr > td, +.table > tbody > tr > td, +.table > tfoot > tr > td { + padding: 8px; + line-height: 1.42857143; + vertical-align: top; + border-top: 1px solid #1c1e22; +} +.table > thead > tr > th { + vertical-align: bottom; + border-bottom: 2px solid #1c1e22; +} +.table > caption + thead > tr:first-child > th, +.table > colgroup + thead > tr:first-child > th, +.table > thead:first-child > tr:first-child > th, +.table > caption + thead > tr:first-child > td, +.table > colgroup + thead > tr:first-child > td, +.table > thead:first-child > tr:first-child > td { + border-top: 0; +} +.table > tbody + tbody { + border-top: 2px solid #1c1e22; +} +.table .table { + background-color: #272b30; +} +.table-condensed > thead > tr > th, +.table-condensed > tbody > tr > th, +.table-condensed > tfoot > tr > th, +.table-condensed > thead > tr > td, +.table-condensed > tbody > tr > td, +.table-condensed > tfoot > tr > td { + padding: 5px; +} +.table-bordered { + border: 1px solid #1c1e22; +} +.table-bordered > thead > tr > th, +.table-bordered > tbody > tr > th, +.table-bordered > tfoot > tr > th, +.table-bordered > thead > tr > td, +.table-bordered > tbody > tr > td, +.table-bordered > tfoot > tr > td { + border: 1px solid #1c1e22; +} +.table-bordered > thead > tr > th, +.table-bordered > thead > tr > td { + border-bottom-width: 2px; +} +.table-striped > tbody > tr:nth-of-type(odd) { + background-color: #353a41; +} +.table-hover > tbody > tr:hover { + background-color: #49515a; +} +table col[class*="col-"] { + position: static; + float: none; + display: table-column; +} +table td[class*="col-"], +table th[class*="col-"] { + position: static; + float: none; + display: table-cell; +} +.table > thead > tr > td.active, +.table > tbody > tr > td.active, +.table > tfoot > tr > td.active, +.table > thead > tr > th.active, +.table > tbody > tr > th.active, +.table > tfoot > tr > th.active, +.table > thead > tr.active > td, +.table > tbody > tr.active > td, +.table > tfoot > tr.active > td, +.table > thead > tr.active > th, +.table > tbody > tr.active > th, +.table > tfoot > tr.active > th { + background-color: #49515a; +} +.table-hover > tbody > tr > td.active:hover, +.table-hover > tbody > tr > th.active:hover, +.table-hover > tbody > tr.active:hover > td, +.table-hover > tbody > tr:hover > .active, +.table-hover > tbody > tr.active:hover > th { + background-color: #3e444c; +} +.table > thead > tr > td.success, +.table > tbody > tr > td.success, +.table > tfoot > tr > td.success, +.table > thead > tr > th.success, +.table > tbody > tr > th.success, +.table > tfoot > tr > th.success, +.table > thead > tr.success > td, +.table > tbody > tr.success > td, +.table > tfoot > tr.success > td, +.table > thead > tr.success > th, +.table > tbody > tr.success > th, +.table > tfoot > tr.success > th { + background-color: #62c462; +} +.table-hover > tbody > tr > td.success:hover, +.table-hover > tbody > tr > th.success:hover, +.table-hover > tbody > tr.success:hover > td, +.table-hover > tbody > tr:hover > .success, +.table-hover > tbody > tr.success:hover > th { + background-color: #4fbd4f; +} +.table > thead > tr > td.info, +.table > tbody > tr > td.info, +.table > tfoot > tr > td.info, +.table > thead > tr > th.info, +.table > tbody > tr > th.info, +.table > tfoot > tr > th.info, +.table > thead > tr.info > td, +.table > tbody > tr.info > td, +.table > tfoot > tr.info > td, +.table > thead > tr.info > th, +.table > tbody > tr.info > th, +.table > tfoot > tr.info > th { + background-color: #5bc0de; +} +.table-hover > tbody > tr > td.info:hover, +.table-hover > tbody > tr > th.info:hover, +.table-hover > tbody > tr.info:hover > td, +.table-hover > tbody > tr:hover > .info, +.table-hover > tbody > tr.info:hover > th { + background-color: #46b8da; +} +.table > thead > tr > td.warning, +.table > tbody > tr > td.warning, +.table > tfoot > tr > td.warning, +.table > thead > tr > th.warning, +.table > tbody > tr > th.warning, +.table > tfoot > tr > th.warning, +.table > thead > tr.warning > td, +.table > tbody > tr.warning > td, +.table > tfoot > tr.warning > td, +.table > thead > tr.warning > th, +.table > tbody > tr.warning > th, +.table > tfoot > tr.warning > th { + background-color: #f89406; +} +.table-hover > tbody > tr > td.warning:hover, +.table-hover > tbody > tr > th.warning:hover, +.table-hover > tbody > tr.warning:hover > td, +.table-hover > tbody > tr:hover > .warning, +.table-hover > tbody > tr.warning:hover > th { + background-color: #df8505; +} +.table > thead > tr > td.danger, +.table > tbody > tr > td.danger, +.table > tfoot > tr > td.danger, +.table > thead > tr > th.danger, +.table > tbody > tr > th.danger, +.table > tfoot > tr > th.danger, +.table > thead > tr.danger > td, +.table > tbody > tr.danger > td, +.table > tfoot > tr.danger > td, +.table > thead > tr.danger > th, +.table > tbody > tr.danger > th, +.table > tfoot > tr.danger > th { + background-color: #ee5f5b; +} +.table-hover > tbody > tr > td.danger:hover, +.table-hover > tbody > tr > th.danger:hover, +.table-hover > tbody > tr.danger:hover > td, +.table-hover > tbody > tr:hover > .danger, +.table-hover > tbody > tr.danger:hover > th { + background-color: #ec4844; +} +.table-responsive { + overflow-x: auto; + min-height: 0.01%; +} +@media screen and (max-width: 767px) { + .table-responsive { + width: 100%; + margin-bottom: 15px; + overflow-y: hidden; + -ms-overflow-style: -ms-autohiding-scrollbar; + border: 1px solid #1c1e22; + } + .table-responsive > .table { + margin-bottom: 0; + } + .table-responsive > .table > thead > tr > th, + .table-responsive > .table > tbody > tr > th, + .table-responsive > .table > tfoot > tr > th, + .table-responsive > .table > thead > tr > td, + .table-responsive > .table > tbody > tr > td, + .table-responsive > .table > tfoot > tr > td { + white-space: nowrap; + } + .table-responsive > .table-bordered { + border: 0; + } + .table-responsive > .table-bordered > thead > tr > th:first-child, + .table-responsive > .table-bordered > tbody > tr > th:first-child, + .table-responsive > .table-bordered > tfoot > tr > th:first-child, + .table-responsive > .table-bordered > thead > tr > td:first-child, + .table-responsive > .table-bordered > tbody > tr > td:first-child, + .table-responsive > .table-bordered > tfoot > tr > td:first-child { + border-left: 0; + } + .table-responsive > .table-bordered > thead > tr > th:last-child, + .table-responsive > .table-bordered > tbody > tr > th:last-child, + .table-responsive > .table-bordered > tfoot > tr > th:last-child, + .table-responsive > .table-bordered > thead > tr > td:last-child, + .table-responsive > .table-bordered > tbody > tr > td:last-child, + .table-responsive > .table-bordered > tfoot > tr > td:last-child { + border-right: 0; + } + .table-responsive > .table-bordered > tbody > tr:last-child > th, + .table-responsive > .table-bordered > tfoot > tr:last-child > th, + .table-responsive > .table-bordered > tbody > tr:last-child > td, + .table-responsive > .table-bordered > tfoot > tr:last-child > td { + border-bottom: 0; + } +} +fieldset { + padding: 0; + margin: 0; + border: 0; + min-width: 0; +} +legend { + display: block; + width: 100%; + padding: 0; + margin-bottom: 20px; + font-size: 21px; + line-height: inherit; + color: #c8c8c8; + border: 0; + border-bottom: 1px solid #1c1e22; +} +label { + display: inline-block; + max-width: 100%; + margin-bottom: 5px; + font-weight: bold; +} +input[type="search"] { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +input[type="radio"], +input[type="checkbox"] { + margin: 4px 0 0; + margin-top: 1px \9; + line-height: normal; +} +input[type="file"] { + display: block; +} +input[type="range"] { + display: block; + width: 100%; +} +select[multiple], +select[size] { + height: auto; +} +input[type="file"]:focus, +input[type="radio"]:focus, +input[type="checkbox"]:focus { + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +output { + display: block; + padding-top: 9px; + font-size: 14px; + line-height: 1.42857143; + color: #272b30; +} +.form-control { + display: block; + width: 100%; + height: 38px; + padding: 8px 12px; + font-size: 14px; + line-height: 1.42857143; + color: #272b30; + background-color: #ffffff; + background-image: none; + border: 1px solid #000000; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + -webkit-transition: border-color ease-in-out .15s, -webkit-box-shadow ease-in-out .15s; + -o-transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s; + transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s; +} +.form-control:focus { + border-color: #66afe9; + outline: 0; + -webkit-box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(102, 175, 233, 0.6); + box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(102, 175, 233, 0.6); +} +.form-control::-moz-placeholder { + color: #7a8288; + opacity: 1; +} +.form-control:-ms-input-placeholder { + color: #7a8288; +} +.form-control::-webkit-input-placeholder { + color: #7a8288; +} +.form-control::-ms-expand { + border: 0; + background-color: transparent; +} +.form-control[disabled], +.form-control[readonly], +fieldset[disabled] .form-control { + background-color: #999999; + opacity: 1; +} +.form-control[disabled], +fieldset[disabled] .form-control { + cursor: not-allowed; +} +textarea.form-control { + height: auto; +} +input[type="search"] { + -webkit-appearance: none; +} +@media screen and (-webkit-min-device-pixel-ratio: 0) { + input[type="date"].form-control, + input[type="time"].form-control, + input[type="datetime-local"].form-control, + input[type="month"].form-control { + line-height: 38px; + } + input[type="date"].input-sm, + input[type="time"].input-sm, + input[type="datetime-local"].input-sm, + input[type="month"].input-sm, + .input-group-sm input[type="date"], + .input-group-sm input[type="time"], + .input-group-sm input[type="datetime-local"], + .input-group-sm input[type="month"] { + line-height: 30px; + } + input[type="date"].input-lg, + input[type="time"].input-lg, + input[type="datetime-local"].input-lg, + input[type="month"].input-lg, + .input-group-lg input[type="date"], + .input-group-lg input[type="time"], + .input-group-lg input[type="datetime-local"], + .input-group-lg input[type="month"] { + line-height: 54px; + } +} +.form-group { + margin-bottom: 15px; +} +.radio, +.checkbox { + position: relative; + display: block; + margin-top: 10px; + margin-bottom: 10px; +} +.radio label, +.checkbox label { + min-height: 20px; + padding-left: 20px; + margin-bottom: 0; + font-weight: normal; + cursor: pointer; +} +.radio input[type="radio"], +.radio-inline input[type="radio"], +.checkbox input[type="checkbox"], +.checkbox-inline input[type="checkbox"] { + position: absolute; + margin-left: -20px; + margin-top: 4px \9; +} +.radio + .radio, +.checkbox + .checkbox { + margin-top: -5px; +} +.radio-inline, +.checkbox-inline { + position: relative; + display: inline-block; + padding-left: 20px; + margin-bottom: 0; + vertical-align: middle; + font-weight: normal; + cursor: pointer; +} +.radio-inline + .radio-inline, +.checkbox-inline + .checkbox-inline { + margin-top: 0; + margin-left: 10px; +} +input[type="radio"][disabled], +input[type="checkbox"][disabled], +input[type="radio"].disabled, +input[type="checkbox"].disabled, +fieldset[disabled] input[type="radio"], +fieldset[disabled] input[type="checkbox"] { + cursor: not-allowed; +} +.radio-inline.disabled, +.checkbox-inline.disabled, +fieldset[disabled] .radio-inline, +fieldset[disabled] .checkbox-inline { + cursor: not-allowed; +} +.radio.disabled label, +.checkbox.disabled label, +fieldset[disabled] .radio label, +fieldset[disabled] .checkbox label { + cursor: not-allowed; +} +.form-control-static { + padding-top: 9px; + padding-bottom: 9px; + margin-bottom: 0; + min-height: 34px; +} +.form-control-static.input-lg, +.form-control-static.input-sm { + padding-left: 0; + padding-right: 0; +} +.input-sm { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +select.input-sm { + height: 30px; + line-height: 30px; +} +textarea.input-sm, +select[multiple].input-sm { + height: auto; +} +.form-group-sm .form-control { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.form-group-sm select.form-control { + height: 30px; + line-height: 30px; +} +.form-group-sm textarea.form-control, +.form-group-sm select[multiple].form-control { + height: auto; +} +.form-group-sm .form-control-static { + height: 30px; + min-height: 32px; + padding: 6px 10px; + font-size: 12px; + line-height: 1.5; +} +.input-lg { + height: 54px; + padding: 14px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +select.input-lg { + height: 54px; + line-height: 54px; +} +textarea.input-lg, +select[multiple].input-lg { + height: auto; +} +.form-group-lg .form-control { + height: 54px; + padding: 14px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +.form-group-lg select.form-control { + height: 54px; + line-height: 54px; +} +.form-group-lg textarea.form-control, +.form-group-lg select[multiple].form-control { + height: auto; +} +.form-group-lg .form-control-static { + height: 54px; + min-height: 38px; + padding: 15px 16px; + font-size: 18px; + line-height: 1.3333333; +} +.has-feedback { + position: relative; +} +.has-feedback .form-control { + padding-right: 47.5px; +} +.form-control-feedback { + position: absolute; + top: 0; + right: 0; + z-index: 2; + display: block; + width: 38px; + height: 38px; + line-height: 38px; + text-align: center; + pointer-events: none; +} +.input-lg + .form-control-feedback, +.input-group-lg + .form-control-feedback, +.form-group-lg .form-control + .form-control-feedback { + width: 54px; + height: 54px; + line-height: 54px; +} +.input-sm + .form-control-feedback, +.input-group-sm + .form-control-feedback, +.form-group-sm .form-control + .form-control-feedback { + width: 30px; + height: 30px; + line-height: 30px; +} +.has-success .help-block, +.has-success .control-label, +.has-success .radio, +.has-success .checkbox, +.has-success .radio-inline, +.has-success .checkbox-inline, +.has-success.radio label, +.has-success.checkbox label, +.has-success.radio-inline label, +.has-success.checkbox-inline label { + color: #ffffff; +} +.has-success .form-control { + border-color: #ffffff; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); +} +.has-success .form-control:focus { + border-color: #e6e6e6; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #ffffff; + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #ffffff; +} +.has-success .input-group-addon { + color: #ffffff; + border-color: #ffffff; + background-color: #62c462; +} +.has-success .form-control-feedback { + color: #ffffff; +} +.has-warning .help-block, +.has-warning .control-label, +.has-warning .radio, +.has-warning .checkbox, +.has-warning .radio-inline, +.has-warning .checkbox-inline, +.has-warning.radio label, +.has-warning.checkbox label, +.has-warning.radio-inline label, +.has-warning.checkbox-inline label { + color: #ffffff; +} +.has-warning .form-control { + border-color: #ffffff; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); +} +.has-warning .form-control:focus { + border-color: #e6e6e6; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #ffffff; + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #ffffff; +} +.has-warning .input-group-addon { + color: #ffffff; + border-color: #ffffff; + background-color: #f89406; +} +.has-warning .form-control-feedback { + color: #ffffff; +} +.has-error .help-block, +.has-error .control-label, +.has-error .radio, +.has-error .checkbox, +.has-error .radio-inline, +.has-error .checkbox-inline, +.has-error.radio label, +.has-error.checkbox label, +.has-error.radio-inline label, +.has-error.checkbox-inline label { + color: #ffffff; +} +.has-error .form-control { + border-color: #ffffff; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); +} +.has-error .form-control:focus { + border-color: #e6e6e6; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #ffffff; + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #ffffff; +} +.has-error .input-group-addon { + color: #ffffff; + border-color: #ffffff; + background-color: #ee5f5b; +} +.has-error .form-control-feedback { + color: #ffffff; +} +.has-feedback label ~ .form-control-feedback { + top: 25px; +} +.has-feedback label.sr-only ~ .form-control-feedback { + top: 0; +} +.help-block { + display: block; + margin-top: 5px; + margin-bottom: 10px; + color: #ffffff; +} +@media (min-width: 768px) { + .form-inline .form-group { + display: inline-block; + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .form-control { + display: inline-block; + width: auto; + vertical-align: middle; + } + .form-inline .form-control-static { + display: inline-block; + } + .form-inline .input-group { + display: inline-table; + vertical-align: middle; + } + .form-inline .input-group .input-group-addon, + .form-inline .input-group .input-group-btn, + .form-inline .input-group .form-control { + width: auto; + } + .form-inline .input-group > .form-control { + width: 100%; + } + .form-inline .control-label { + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .radio, + .form-inline .checkbox { + display: inline-block; + margin-top: 0; + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .radio label, + .form-inline .checkbox label { + padding-left: 0; + } + .form-inline .radio input[type="radio"], + .form-inline .checkbox input[type="checkbox"] { + position: relative; + margin-left: 0; + } + .form-inline .has-feedback .form-control-feedback { + top: 0; + } +} +.form-horizontal .radio, +.form-horizontal .checkbox, +.form-horizontal .radio-inline, +.form-horizontal .checkbox-inline { + margin-top: 0; + margin-bottom: 0; + padding-top: 9px; +} +.form-horizontal .radio, +.form-horizontal .checkbox { + min-height: 29px; +} +.form-horizontal .form-group { + margin-left: -15px; + margin-right: -15px; +} +@media (min-width: 768px) { + .form-horizontal .control-label { + text-align: right; + margin-bottom: 0; + padding-top: 9px; + } +} +.form-horizontal .has-feedback .form-control-feedback { + right: 15px; +} +@media (min-width: 768px) { + .form-horizontal .form-group-lg .control-label { + padding-top: 15px; + font-size: 18px; + } +} +@media (min-width: 768px) { + .form-horizontal .form-group-sm .control-label { + padding-top: 6px; + font-size: 12px; + } +} +.btn { + display: inline-block; + margin-bottom: 0; + font-weight: normal; + text-align: center; + vertical-align: middle; + -ms-touch-action: manipulation; + touch-action: manipulation; + cursor: pointer; + background-image: none; + border: 0px solid transparent; + white-space: nowrap; + padding: 8px 12px; + font-size: 14px; + line-height: 1.42857143; + border-radius: 4px; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} +.btn:focus, +.btn:active:focus, +.btn.active:focus, +.btn.focus, +.btn:active.focus, +.btn.active.focus { + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +.btn:hover, +.btn:focus, +.btn.focus { + color: #ffffff; + text-decoration: none; +} +.btn:active, +.btn.active { + outline: 0; + background-image: none; + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); +} +.btn.disabled, +.btn[disabled], +fieldset[disabled] .btn { + cursor: not-allowed; + opacity: 0.65; + filter: alpha(opacity=65); + -webkit-box-shadow: none; + box-shadow: none; +} +a.btn.disabled, +fieldset[disabled] a.btn { + pointer-events: none; +} +.btn-default { + color: #ffffff; + background-color: #3a3f44; + border-color: #3a3f44; +} +.btn-default:focus, +.btn-default.focus { + color: #ffffff; + background-color: #232628; + border-color: #000000; +} +.btn-default:hover { + color: #ffffff; + background-color: #232628; + border-color: #1e2023; +} +.btn-default:active, +.btn-default.active, +.open > .dropdown-toggle.btn-default { + color: #ffffff; + background-color: #232628; + border-color: #1e2023; +} +.btn-default:active:hover, +.btn-default.active:hover, +.open > .dropdown-toggle.btn-default:hover, +.btn-default:active:focus, +.btn-default.active:focus, +.open > .dropdown-toggle.btn-default:focus, +.btn-default:active.focus, +.btn-default.active.focus, +.open > .dropdown-toggle.btn-default.focus { + color: #ffffff; + background-color: #121415; + border-color: #000000; +} +.btn-default:active, +.btn-default.active, +.open > .dropdown-toggle.btn-default { + background-image: none; +} +.btn-default.disabled:hover, +.btn-default[disabled]:hover, +fieldset[disabled] .btn-default:hover, +.btn-default.disabled:focus, +.btn-default[disabled]:focus, +fieldset[disabled] .btn-default:focus, +.btn-default.disabled.focus, +.btn-default[disabled].focus, +fieldset[disabled] .btn-default.focus { + background-color: #3a3f44; + border-color: #3a3f44; +} +.btn-default .badge { + color: #3a3f44; + background-color: #ffffff; +} +.btn-primary { + color: #ffffff; + background-color: #7a8288; + border-color: #7a8288; +} +.btn-primary:focus, +.btn-primary.focus { + color: #ffffff; + background-color: #62686d; + border-color: #3e4245; +} +.btn-primary:hover { + color: #ffffff; + background-color: #62686d; + border-color: #5d6368; +} +.btn-primary:active, +.btn-primary.active, +.open > .dropdown-toggle.btn-primary { + color: #ffffff; + background-color: #62686d; + border-color: #5d6368; +} +.btn-primary:active:hover, +.btn-primary.active:hover, +.open > .dropdown-toggle.btn-primary:hover, +.btn-primary:active:focus, +.btn-primary.active:focus, +.open > .dropdown-toggle.btn-primary:focus, +.btn-primary:active.focus, +.btn-primary.active.focus, +.open > .dropdown-toggle.btn-primary.focus { + color: #ffffff; + background-color: #51565a; + border-color: #3e4245; +} +.btn-primary:active, +.btn-primary.active, +.open > .dropdown-toggle.btn-primary { + background-image: none; +} +.btn-primary.disabled:hover, +.btn-primary[disabled]:hover, +fieldset[disabled] .btn-primary:hover, +.btn-primary.disabled:focus, +.btn-primary[disabled]:focus, +fieldset[disabled] .btn-primary:focus, +.btn-primary.disabled.focus, +.btn-primary[disabled].focus, +fieldset[disabled] .btn-primary.focus { + background-color: #7a8288; + border-color: #7a8288; +} +.btn-primary .badge { + color: #7a8288; + background-color: #ffffff; +} +.btn-success { + color: #ffffff; + background-color: #62c462; + border-color: #62c462; +} +.btn-success:focus, +.btn-success.focus { + color: #ffffff; + background-color: #42b142; + border-color: #2d792d; +} +.btn-success:hover { + color: #ffffff; + background-color: #42b142; + border-color: #40a940; +} +.btn-success:active, +.btn-success.active, +.open > .dropdown-toggle.btn-success { + color: #ffffff; + background-color: #42b142; + border-color: #40a940; +} +.btn-success:active:hover, +.btn-success.active:hover, +.open > .dropdown-toggle.btn-success:hover, +.btn-success:active:focus, +.btn-success.active:focus, +.open > .dropdown-toggle.btn-success:focus, +.btn-success:active.focus, +.btn-success.active.focus, +.open > .dropdown-toggle.btn-success.focus { + color: #ffffff; + background-color: #399739; + border-color: #2d792d; +} +.btn-success:active, +.btn-success.active, +.open > .dropdown-toggle.btn-success { + background-image: none; +} +.btn-success.disabled:hover, +.btn-success[disabled]:hover, +fieldset[disabled] .btn-success:hover, +.btn-success.disabled:focus, +.btn-success[disabled]:focus, +fieldset[disabled] .btn-success:focus, +.btn-success.disabled.focus, +.btn-success[disabled].focus, +fieldset[disabled] .btn-success.focus { + background-color: #62c462; + border-color: #62c462; +} +.btn-success .badge { + color: #62c462; + background-color: #ffffff; +} +.btn-info { + color: #ffffff; + background-color: #5bc0de; + border-color: #5bc0de; +} +.btn-info:focus, +.btn-info.focus { + color: #ffffff; + background-color: #31b0d5; + border-color: #1f7e9a; +} +.btn-info:hover { + color: #ffffff; + background-color: #31b0d5; + border-color: #2aabd2; +} +.btn-info:active, +.btn-info.active, +.open > .dropdown-toggle.btn-info { + color: #ffffff; + background-color: #31b0d5; + border-color: #2aabd2; +} +.btn-info:active:hover, +.btn-info.active:hover, +.open > .dropdown-toggle.btn-info:hover, +.btn-info:active:focus, +.btn-info.active:focus, +.open > .dropdown-toggle.btn-info:focus, +.btn-info:active.focus, +.btn-info.active.focus, +.open > .dropdown-toggle.btn-info.focus { + color: #ffffff; + background-color: #269abc; + border-color: #1f7e9a; +} +.btn-info:active, +.btn-info.active, +.open > .dropdown-toggle.btn-info { + background-image: none; +} +.btn-info.disabled:hover, +.btn-info[disabled]:hover, +fieldset[disabled] .btn-info:hover, +.btn-info.disabled:focus, +.btn-info[disabled]:focus, +fieldset[disabled] .btn-info:focus, +.btn-info.disabled.focus, +.btn-info[disabled].focus, +fieldset[disabled] .btn-info.focus { + background-color: #5bc0de; + border-color: #5bc0de; +} +.btn-info .badge { + color: #5bc0de; + background-color: #ffffff; +} +.btn-warning { + color: #ffffff; + background-color: #f89406; + border-color: #f89406; +} +.btn-warning:focus, +.btn-warning.focus { + color: #ffffff; + background-color: #c67605; + border-color: #7c4a03; +} +.btn-warning:hover { + color: #ffffff; + background-color: #c67605; + border-color: #bc7005; +} +.btn-warning:active, +.btn-warning.active, +.open > .dropdown-toggle.btn-warning { + color: #ffffff; + background-color: #c67605; + border-color: #bc7005; +} +.btn-warning:active:hover, +.btn-warning.active:hover, +.open > .dropdown-toggle.btn-warning:hover, +.btn-warning:active:focus, +.btn-warning.active:focus, +.open > .dropdown-toggle.btn-warning:focus, +.btn-warning:active.focus, +.btn-warning.active.focus, +.open > .dropdown-toggle.btn-warning.focus { + color: #ffffff; + background-color: #a36104; + border-color: #7c4a03; +} +.btn-warning:active, +.btn-warning.active, +.open > .dropdown-toggle.btn-warning { + background-image: none; +} +.btn-warning.disabled:hover, +.btn-warning[disabled]:hover, +fieldset[disabled] .btn-warning:hover, +.btn-warning.disabled:focus, +.btn-warning[disabled]:focus, +fieldset[disabled] .btn-warning:focus, +.btn-warning.disabled.focus, +.btn-warning[disabled].focus, +fieldset[disabled] .btn-warning.focus { + background-color: #f89406; + border-color: #f89406; +} +.btn-warning .badge { + color: #f89406; + background-color: #ffffff; +} +.btn-danger { + color: #ffffff; + background-color: #ee5f5b; + border-color: #ee5f5b; +} +.btn-danger:focus, +.btn-danger.focus { + color: #ffffff; + background-color: #e9322d; + border-color: #b71713; +} +.btn-danger:hover { + color: #ffffff; + background-color: #e9322d; + border-color: #e82924; +} +.btn-danger:active, +.btn-danger.active, +.open > .dropdown-toggle.btn-danger { + color: #ffffff; + background-color: #e9322d; + border-color: #e82924; +} +.btn-danger:active:hover, +.btn-danger.active:hover, +.open > .dropdown-toggle.btn-danger:hover, +.btn-danger:active:focus, +.btn-danger.active:focus, +.open > .dropdown-toggle.btn-danger:focus, +.btn-danger:active.focus, +.btn-danger.active.focus, +.open > .dropdown-toggle.btn-danger.focus { + color: #ffffff; + background-color: #dc1c17; + border-color: #b71713; +} +.btn-danger:active, +.btn-danger.active, +.open > .dropdown-toggle.btn-danger { + background-image: none; +} +.btn-danger.disabled:hover, +.btn-danger[disabled]:hover, +fieldset[disabled] .btn-danger:hover, +.btn-danger.disabled:focus, +.btn-danger[disabled]:focus, +fieldset[disabled] .btn-danger:focus, +.btn-danger.disabled.focus, +.btn-danger[disabled].focus, +fieldset[disabled] .btn-danger.focus { + background-color: #ee5f5b; + border-color: #ee5f5b; +} +.btn-danger .badge { + color: #ee5f5b; + background-color: #ffffff; +} +.btn-link { + color: #ffffff; + font-weight: normal; + border-radius: 0; +} +.btn-link, +.btn-link:active, +.btn-link.active, +.btn-link[disabled], +fieldset[disabled] .btn-link { + background-color: transparent; + -webkit-box-shadow: none; + box-shadow: none; +} +.btn-link, +.btn-link:hover, +.btn-link:focus, +.btn-link:active { + border-color: transparent; +} +.btn-link:hover, +.btn-link:focus { + color: #ffffff; + text-decoration: underline; + background-color: transparent; +} +.btn-link[disabled]:hover, +fieldset[disabled] .btn-link:hover, +.btn-link[disabled]:focus, +fieldset[disabled] .btn-link:focus { + color: #7a8288; + text-decoration: none; +} +.btn-lg, +.btn-group-lg > .btn { + padding: 14px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +.btn-sm, +.btn-group-sm > .btn { + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-xs, +.btn-group-xs > .btn { + padding: 1px 5px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-block { + display: block; + width: 100%; +} +.btn-block + .btn-block { + margin-top: 5px; +} +input[type="submit"].btn-block, +input[type="reset"].btn-block, +input[type="button"].btn-block { + width: 100%; +} +.fade { + opacity: 0; + -webkit-transition: opacity 0.15s linear; + -o-transition: opacity 0.15s linear; + transition: opacity 0.15s linear; +} +.fade.in { + opacity: 1; +} +.collapse { + display: none; +} +.collapse.in { + display: block; +} +tr.collapse.in { + display: table-row; +} +tbody.collapse.in { + display: table-row-group; +} +.collapsing { + position: relative; + height: 0; + overflow: hidden; + -webkit-transition-property: height, visibility; + -o-transition-property: height, visibility; + transition-property: height, visibility; + -webkit-transition-duration: 0.35s; + -o-transition-duration: 0.35s; + transition-duration: 0.35s; + -webkit-transition-timing-function: ease; + -o-transition-timing-function: ease; + transition-timing-function: ease; +} +.caret { + display: inline-block; + width: 0; + height: 0; + margin-left: 2px; + vertical-align: middle; + border-top: 4px dashed; + border-top: 4px solid \9; + border-right: 4px solid transparent; + border-left: 4px solid transparent; +} +.dropup, +.dropdown { + position: relative; +} +.dropdown-toggle:focus { + outline: 0; +} +.dropdown-menu { + position: absolute; + top: 100%; + left: 0; + z-index: 1000; + display: none; + float: left; + min-width: 160px; + padding: 5px 0; + margin: 2px 0 0; + list-style: none; + font-size: 14px; + text-align: left; + background-color: #3a3f44; + border: 1px solid #272b30; + border: 1px solid rgba(0, 0, 0, 0.15); + border-radius: 4px; + -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); + -webkit-background-clip: padding-box; + background-clip: padding-box; +} +.dropdown-menu.pull-right { + right: 0; + left: auto; +} +.dropdown-menu .divider { + height: 1px; + margin: 9px 0; + overflow: hidden; + background-color: #272b30; +} +.dropdown-menu > li > a { + display: block; + padding: 3px 20px; + clear: both; + font-weight: normal; + line-height: 1.42857143; + color: #c8c8c8; + white-space: nowrap; +} +.dropdown-menu > li > a:hover, +.dropdown-menu > li > a:focus { + text-decoration: none; + color: #ffffff; + background-color: #272b30; +} +.dropdown-menu > .active > a, +.dropdown-menu > .active > a:hover, +.dropdown-menu > .active > a:focus { + color: #ffffff; + text-decoration: none; + outline: 0; + background-color: #272b30; +} +.dropdown-menu > .disabled > a, +.dropdown-menu > .disabled > a:hover, +.dropdown-menu > .disabled > a:focus { + color: #7a8288; +} +.dropdown-menu > .disabled > a:hover, +.dropdown-menu > .disabled > a:focus { + text-decoration: none; + background-color: transparent; + background-image: none; + filter: progid:DXImageTransform.Microsoft.gradient(enabled = false); + cursor: not-allowed; +} +.open > .dropdown-menu { + display: block; +} +.open > a { + outline: 0; +} +.dropdown-menu-right { + left: auto; + right: 0; +} +.dropdown-menu-left { + left: 0; + right: auto; +} +.dropdown-header { + display: block; + padding: 3px 20px; + font-size: 12px; + line-height: 1.42857143; + color: #7a8288; + white-space: nowrap; +} +.dropdown-backdrop { + position: fixed; + left: 0; + right: 0; + bottom: 0; + top: 0; + z-index: 990; +} +.pull-right > .dropdown-menu { + right: 0; + left: auto; +} +.dropup .caret, +.navbar-fixed-bottom .dropdown .caret { + border-top: 0; + border-bottom: 4px dashed; + border-bottom: 4px solid \9; + content: ""; +} +.dropup .dropdown-menu, +.navbar-fixed-bottom .dropdown .dropdown-menu { + top: auto; + bottom: 100%; + margin-bottom: 2px; +} +@media (min-width: 768px) { + .navbar-right .dropdown-menu { + left: auto; + right: 0; + } + .navbar-right .dropdown-menu-left { + left: 0; + right: auto; + } +} +.btn-group, +.btn-group-vertical { + position: relative; + display: inline-block; + vertical-align: middle; +} +.btn-group > .btn, +.btn-group-vertical > .btn { + position: relative; + float: left; +} +.btn-group > .btn:hover, +.btn-group-vertical > .btn:hover, +.btn-group > .btn:focus, +.btn-group-vertical > .btn:focus, +.btn-group > .btn:active, +.btn-group-vertical > .btn:active, +.btn-group > .btn.active, +.btn-group-vertical > .btn.active { + z-index: 2; +} +.btn-group .btn + .btn, +.btn-group .btn + .btn-group, +.btn-group .btn-group + .btn, +.btn-group .btn-group + .btn-group { + margin-left: -1px; +} +.btn-toolbar { + margin-left: -5px; +} +.btn-toolbar .btn, +.btn-toolbar .btn-group, +.btn-toolbar .input-group { + float: left; +} +.btn-toolbar > .btn, +.btn-toolbar > .btn-group, +.btn-toolbar > .input-group { + margin-left: 5px; +} +.btn-group > .btn:not(:first-child):not(:last-child):not(.dropdown-toggle) { + border-radius: 0; +} +.btn-group > .btn:first-child { + margin-left: 0; +} +.btn-group > .btn:first-child:not(:last-child):not(.dropdown-toggle) { + border-bottom-right-radius: 0; + border-top-right-radius: 0; +} +.btn-group > .btn:last-child:not(:first-child), +.btn-group > .dropdown-toggle:not(:first-child) { + border-bottom-left-radius: 0; + border-top-left-radius: 0; +} +.btn-group > .btn-group { + float: left; +} +.btn-group > .btn-group:not(:first-child):not(:last-child) > .btn { + border-radius: 0; +} +.btn-group > .btn-group:first-child:not(:last-child) > .btn:last-child, +.btn-group > .btn-group:first-child:not(:last-child) > .dropdown-toggle { + border-bottom-right-radius: 0; + border-top-right-radius: 0; +} +.btn-group > .btn-group:last-child:not(:first-child) > .btn:first-child { + border-bottom-left-radius: 0; + border-top-left-radius: 0; +} +.btn-group .dropdown-toggle:active, +.btn-group.open .dropdown-toggle { + outline: 0; +} +.btn-group > .btn + .dropdown-toggle { + padding-left: 8px; + padding-right: 8px; +} +.btn-group > .btn-lg + .dropdown-toggle { + padding-left: 12px; + padding-right: 12px; +} +.btn-group.open .dropdown-toggle { + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); +} +.btn-group.open .dropdown-toggle.btn-link { + -webkit-box-shadow: none; + box-shadow: none; +} +.btn .caret { + margin-left: 0; +} +.btn-lg .caret { + border-width: 5px 5px 0; + border-bottom-width: 0; +} +.dropup .btn-lg .caret { + border-width: 0 5px 5px; +} +.btn-group-vertical > .btn, +.btn-group-vertical > .btn-group, +.btn-group-vertical > .btn-group > .btn { + display: block; + float: none; + width: 100%; + max-width: 100%; +} +.btn-group-vertical > .btn-group > .btn { + float: none; +} +.btn-group-vertical > .btn + .btn, +.btn-group-vertical > .btn + .btn-group, +.btn-group-vertical > .btn-group + .btn, +.btn-group-vertical > .btn-group + .btn-group { + margin-top: -1px; + margin-left: 0; +} +.btn-group-vertical > .btn:not(:first-child):not(:last-child) { + border-radius: 0; +} +.btn-group-vertical > .btn:first-child:not(:last-child) { + border-top-right-radius: 4px; + border-top-left-radius: 4px; + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group-vertical > .btn:last-child:not(:first-child) { + border-top-right-radius: 0; + border-top-left-radius: 0; + border-bottom-right-radius: 4px; + border-bottom-left-radius: 4px; +} +.btn-group-vertical > .btn-group:not(:first-child):not(:last-child) > .btn { + border-radius: 0; +} +.btn-group-vertical > .btn-group:first-child:not(:last-child) > .btn:last-child, +.btn-group-vertical > .btn-group:first-child:not(:last-child) > .dropdown-toggle { + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group-vertical > .btn-group:last-child:not(:first-child) > .btn:first-child { + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.btn-group-justified { + display: table; + width: 100%; + table-layout: fixed; + border-collapse: separate; +} +.btn-group-justified > .btn, +.btn-group-justified > .btn-group { + float: none; + display: table-cell; + width: 1%; +} +.btn-group-justified > .btn-group .btn { + width: 100%; +} +.btn-group-justified > .btn-group .dropdown-menu { + left: auto; +} +[data-toggle="buttons"] > .btn input[type="radio"], +[data-toggle="buttons"] > .btn-group > .btn input[type="radio"], +[data-toggle="buttons"] > .btn input[type="checkbox"], +[data-toggle="buttons"] > .btn-group > .btn input[type="checkbox"] { + position: absolute; + clip: rect(0, 0, 0, 0); + pointer-events: none; +} +.input-group { + position: relative; + display: table; + border-collapse: separate; +} +.input-group[class*="col-"] { + float: none; + padding-left: 0; + padding-right: 0; +} +.input-group .form-control { + position: relative; + z-index: 2; + float: left; + width: 100%; + margin-bottom: 0; +} +.input-group .form-control:focus { + z-index: 3; +} +.input-group-lg > .form-control, +.input-group-lg > .input-group-addon, +.input-group-lg > .input-group-btn > .btn { + height: 54px; + padding: 14px 16px; + font-size: 18px; + line-height: 1.3333333; + border-radius: 6px; +} +select.input-group-lg > .form-control, +select.input-group-lg > .input-group-addon, +select.input-group-lg > .input-group-btn > .btn { + height: 54px; + line-height: 54px; +} +textarea.input-group-lg > .form-control, +textarea.input-group-lg > .input-group-addon, +textarea.input-group-lg > .input-group-btn > .btn, +select[multiple].input-group-lg > .form-control, +select[multiple].input-group-lg > .input-group-addon, +select[multiple].input-group-lg > .input-group-btn > .btn { + height: auto; +} +.input-group-sm > .form-control, +.input-group-sm > .input-group-addon, +.input-group-sm > .input-group-btn > .btn { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +select.input-group-sm > .form-control, +select.input-group-sm > .input-group-addon, +select.input-group-sm > .input-group-btn > .btn { + height: 30px; + line-height: 30px; +} +textarea.input-group-sm > .form-control, +textarea.input-group-sm > .input-group-addon, +textarea.input-group-sm > .input-group-btn > .btn, +select[multiple].input-group-sm > .form-control, +select[multiple].input-group-sm > .input-group-addon, +select[multiple].input-group-sm > .input-group-btn > .btn { + height: auto; +} +.input-group-addon, +.input-group-btn, +.input-group .form-control { + display: table-cell; +} +.input-group-addon:not(:first-child):not(:last-child), +.input-group-btn:not(:first-child):not(:last-child), +.input-group .form-control:not(:first-child):not(:last-child) { + border-radius: 0; +} +.input-group-addon, +.input-group-btn { + width: 1%; + white-space: nowrap; + vertical-align: middle; +} +.input-group-addon { + padding: 8px 12px; + font-size: 14px; + font-weight: normal; + line-height: 1; + color: #272b30; + text-align: center; + background-color: #3a3f44; + border: 1px solid rgba(0, 0, 0, 0.6); + border-radius: 4px; +} +.input-group-addon.input-sm { + padding: 5px 10px; + font-size: 12px; + border-radius: 3px; +} +.input-group-addon.input-lg { + padding: 14px 16px; + font-size: 18px; + border-radius: 6px; +} +.input-group-addon input[type="radio"], +.input-group-addon input[type="checkbox"] { + margin-top: 0; +} +.input-group .form-control:first-child, +.input-group-addon:first-child, +.input-group-btn:first-child > .btn, +.input-group-btn:first-child > .btn-group > .btn, +.input-group-btn:first-child > .dropdown-toggle, +.input-group-btn:last-child > .btn:not(:last-child):not(.dropdown-toggle), +.input-group-btn:last-child > .btn-group:not(:last-child) > .btn { + border-bottom-right-radius: 0; + border-top-right-radius: 0; +} +.input-group-addon:first-child { + border-right: 0; +} +.input-group .form-control:last-child, +.input-group-addon:last-child, +.input-group-btn:last-child > .btn, +.input-group-btn:last-child > .btn-group > .btn, +.input-group-btn:last-child > .dropdown-toggle, +.input-group-btn:first-child > .btn:not(:first-child), +.input-group-btn:first-child > .btn-group:not(:first-child) > .btn { + border-bottom-left-radius: 0; + border-top-left-radius: 0; +} +.input-group-addon:last-child { + border-left: 0; +} +.input-group-btn { + position: relative; + font-size: 0; + white-space: nowrap; +} +.input-group-btn > .btn { + position: relative; +} +.input-group-btn > .btn + .btn { + margin-left: -1px; +} +.input-group-btn > .btn:hover, +.input-group-btn > .btn:focus, +.input-group-btn > .btn:active { + z-index: 2; +} +.input-group-btn:first-child > .btn, +.input-group-btn:first-child > .btn-group { + margin-right: -1px; +} +.input-group-btn:last-child > .btn, +.input-group-btn:last-child > .btn-group { + z-index: 2; + margin-left: -1px; +} +.nav { + margin-bottom: 0; + padding-left: 0; + list-style: none; +} +.nav > li { + position: relative; + display: block; +} +.nav > li > a { + position: relative; + display: block; + padding: 10px 15px; +} +.nav > li > a:hover, +.nav > li > a:focus { + text-decoration: none; + background-color: #3e444c; +} +.nav > li.disabled > a { + color: #7a8288; +} +.nav > li.disabled > a:hover, +.nav > li.disabled > a:focus { + color: #7a8288; + text-decoration: none; + background-color: transparent; + cursor: not-allowed; +} +.nav .open > a, +.nav .open > a:hover, +.nav .open > a:focus { + background-color: #3e444c; + border-color: #ffffff; +} +.nav .nav-divider { + height: 1px; + margin: 9px 0; + overflow: hidden; + background-color: #e5e5e5; +} +.nav > li > a > img { + max-width: none; +} +.nav-tabs { + border-bottom: 1px solid #1c1e22; +} +.nav-tabs > li { + float: left; + margin-bottom: -1px; +} +.nav-tabs > li > a { + margin-right: 2px; + line-height: 1.42857143; + border: 1px solid transparent; + border-radius: 4px 4px 0 0; +} +.nav-tabs > li > a:hover { + border-color: #1c1e22 #1c1e22 #1c1e22; +} +.nav-tabs > li.active > a, +.nav-tabs > li.active > a:hover, +.nav-tabs > li.active > a:focus { + color: #ffffff; + background-color: #3e444c; + border: 1px solid #1c1e22; + border-bottom-color: transparent; + cursor: default; +} +.nav-tabs.nav-justified { + width: 100%; + border-bottom: 0; +} +.nav-tabs.nav-justified > li { + float: none; +} +.nav-tabs.nav-justified > li > a { + text-align: center; + margin-bottom: 5px; +} +.nav-tabs.nav-justified > .dropdown .dropdown-menu { + top: auto; + left: auto; +} +@media (min-width: 768px) { + .nav-tabs.nav-justified > li { + display: table-cell; + width: 1%; + } + .nav-tabs.nav-justified > li > a { + margin-bottom: 0; + } +} +.nav-tabs.nav-justified > li > a { + margin-right: 0; + border-radius: 4px; +} +.nav-tabs.nav-justified > .active > a, +.nav-tabs.nav-justified > .active > a:hover, +.nav-tabs.nav-justified > .active > a:focus { + border: 1px solid #1c1e22; +} +@media (min-width: 768px) { + .nav-tabs.nav-justified > li > a { + border-bottom: 1px solid #1c1e22; + border-radius: 4px 4px 0 0; + } + .nav-tabs.nav-justified > .active > a, + .nav-tabs.nav-justified > .active > a:hover, + .nav-tabs.nav-justified > .active > a:focus { + border-bottom-color: #272b30; + } +} +.nav-pills > li { + float: left; +} +.nav-pills > li > a { + border-radius: 4px; +} +.nav-pills > li + li { + margin-left: 2px; +} +.nav-pills > li.active > a, +.nav-pills > li.active > a:hover, +.nav-pills > li.active > a:focus { + color: #ffffff; + background-color: transparent; +} +.nav-stacked > li { + float: none; +} +.nav-stacked > li + li { + margin-top: 2px; + margin-left: 0; +} +.nav-justified { + width: 100%; +} +.nav-justified > li { + float: none; +} +.nav-justified > li > a { + text-align: center; + margin-bottom: 5px; +} +.nav-justified > .dropdown .dropdown-menu { + top: auto; + left: auto; +} +@media (min-width: 768px) { + .nav-justified > li { + display: table-cell; + width: 1%; + } + .nav-justified > li > a { + margin-bottom: 0; + } +} +.nav-tabs-justified { + border-bottom: 0; +} +.nav-tabs-justified > li > a { + margin-right: 0; + border-radius: 4px; +} +.nav-tabs-justified > .active > a, +.nav-tabs-justified > .active > a:hover, +.nav-tabs-justified > .active > a:focus { + border: 1px solid #1c1e22; +} +@media (min-width: 768px) { + .nav-tabs-justified > li > a { + border-bottom: 1px solid #1c1e22; + border-radius: 4px 4px 0 0; + } + .nav-tabs-justified > .active > a, + .nav-tabs-justified > .active > a:hover, + .nav-tabs-justified > .active > a:focus { + border-bottom-color: #272b30; + } +} +.tab-content > .tab-pane { + display: none; +} +.tab-content > .active { + display: block; +} +.nav-tabs .dropdown-menu { + margin-top: -1px; + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.navbar { + position: relative; + min-height: 50px; + margin-bottom: 20px; + border: 1px solid transparent; +} +@media (min-width: 768px) { + .navbar { + border-radius: 4px; + } +} +@media (min-width: 768px) { + .navbar-header { + float: left; + } +} +.navbar-collapse { + overflow-x: visible; + padding-right: 15px; + padding-left: 15px; + border-top: 1px solid transparent; + -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1); + -webkit-overflow-scrolling: touch; +} +.navbar-collapse.in { + overflow-y: auto; +} +@media (min-width: 768px) { + .navbar-collapse { + width: auto; + border-top: 0; + -webkit-box-shadow: none; + box-shadow: none; + } + .navbar-collapse.collapse { + display: block !important; + height: auto !important; + padding-bottom: 0; + overflow: visible !important; + } + .navbar-collapse.in { + overflow-y: visible; + } + .navbar-fixed-top .navbar-collapse, + .navbar-static-top .navbar-collapse, + .navbar-fixed-bottom .navbar-collapse { + padding-left: 0; + padding-right: 0; + } +} +.navbar-fixed-top .navbar-collapse, +.navbar-fixed-bottom .navbar-collapse { + max-height: 340px; +} +@media (max-device-width: 480px) and (orientation: landscape) { + .navbar-fixed-top .navbar-collapse, + .navbar-fixed-bottom .navbar-collapse { + max-height: 200px; + } +} +.container > .navbar-header, +.container-fluid > .navbar-header, +.container > .navbar-collapse, +.container-fluid > .navbar-collapse { + margin-right: -15px; + margin-left: -15px; +} +@media (min-width: 768px) { + .container > .navbar-header, + .container-fluid > .navbar-header, + .container > .navbar-collapse, + .container-fluid > .navbar-collapse { + margin-right: 0; + margin-left: 0; + } +} +.navbar-static-top { + z-index: 1000; + border-width: 0 0 1px; +} +@media (min-width: 768px) { + .navbar-static-top { + border-radius: 0; + } +} +.navbar-fixed-top, +.navbar-fixed-bottom { + position: fixed; + right: 0; + left: 0; + z-index: 1030; +} +@media (min-width: 768px) { + .navbar-fixed-top, + .navbar-fixed-bottom { + border-radius: 0; + } +} +.navbar-fixed-top { + top: 0; + border-width: 0 0 1px; +} +.navbar-fixed-bottom { + bottom: 0; + margin-bottom: 0; + border-width: 1px 0 0; +} +.navbar-brand { + float: left; + padding: 15px 15px; + font-size: 18px; + line-height: 20px; + height: 50px; +} +.navbar-brand:hover, +.navbar-brand:focus { + text-decoration: none; +} +.navbar-brand > img { + display: block; +} +@media (min-width: 768px) { + .navbar > .container .navbar-brand, + .navbar > .container-fluid .navbar-brand { + margin-left: -15px; + } +} +.navbar-toggle { + position: relative; + float: right; + margin-right: 15px; + padding: 9px 10px; + margin-top: 8px; + margin-bottom: 8px; + background-color: transparent; + background-image: none; + border: 1px solid transparent; + border-radius: 4px; +} +.navbar-toggle:focus { + outline: 0; +} +.navbar-toggle .icon-bar { + display: block; + width: 22px; + height: 2px; + border-radius: 1px; +} +.navbar-toggle .icon-bar + .icon-bar { + margin-top: 4px; +} +@media (min-width: 768px) { + .navbar-toggle { + display: none; + } +} +.navbar-nav { + margin: 7.5px -15px; +} +.navbar-nav > li > a { + padding-top: 10px; + padding-bottom: 10px; + line-height: 20px; +} +@media (max-width: 767px) { + .navbar-nav .open .dropdown-menu { + position: static; + float: none; + width: auto; + margin-top: 0; + background-color: transparent; + border: 0; + -webkit-box-shadow: none; + box-shadow: none; + } + .navbar-nav .open .dropdown-menu > li > a, + .navbar-nav .open .dropdown-menu .dropdown-header { + padding: 5px 15px 5px 25px; + } + .navbar-nav .open .dropdown-menu > li > a { + line-height: 20px; + } + .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-nav .open .dropdown-menu > li > a:focus { + background-image: none; + } +} +@media (min-width: 768px) { + .navbar-nav { + float: left; + margin: 0; + } + .navbar-nav > li { + float: left; + } + .navbar-nav > li > a { + padding-top: 15px; + padding-bottom: 15px; + } +} +.navbar-form { + margin-left: -15px; + margin-right: -15px; + padding: 10px 15px; + border-top: 1px solid transparent; + border-bottom: 1px solid transparent; + -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 1px 0 rgba(255, 255, 255, 0.1); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 1px 0 rgba(255, 255, 255, 0.1); + margin-top: 6px; + margin-bottom: 6px; +} +@media (min-width: 768px) { + .navbar-form .form-group { + display: inline-block; + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .form-control { + display: inline-block; + width: auto; + vertical-align: middle; + } + .navbar-form .form-control-static { + display: inline-block; + } + .navbar-form .input-group { + display: inline-table; + vertical-align: middle; + } + .navbar-form .input-group .input-group-addon, + .navbar-form .input-group .input-group-btn, + .navbar-form .input-group .form-control { + width: auto; + } + .navbar-form .input-group > .form-control { + width: 100%; + } + .navbar-form .control-label { + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .radio, + .navbar-form .checkbox { + display: inline-block; + margin-top: 0; + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .radio label, + .navbar-form .checkbox label { + padding-left: 0; + } + .navbar-form .radio input[type="radio"], + .navbar-form .checkbox input[type="checkbox"] { + position: relative; + margin-left: 0; + } + .navbar-form .has-feedback .form-control-feedback { + top: 0; + } +} +@media (max-width: 767px) { + .navbar-form .form-group { + margin-bottom: 5px; + } + .navbar-form .form-group:last-child { + margin-bottom: 0; + } +} +@media (min-width: 768px) { + .navbar-form { + width: auto; + border: 0; + margin-left: 0; + margin-right: 0; + padding-top: 0; + padding-bottom: 0; + -webkit-box-shadow: none; + box-shadow: none; + } +} +.navbar-nav > li > .dropdown-menu { + margin-top: 0; + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.navbar-fixed-bottom .navbar-nav > li > .dropdown-menu { + margin-bottom: 0; + border-top-right-radius: 4px; + border-top-left-radius: 4px; + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.navbar-btn { + margin-top: 6px; + margin-bottom: 6px; +} +.navbar-btn.btn-sm { + margin-top: 10px; + margin-bottom: 10px; +} +.navbar-btn.btn-xs { + margin-top: 14px; + margin-bottom: 14px; +} +.navbar-text { + margin-top: 15px; + margin-bottom: 15px; +} +@media (min-width: 768px) { + .navbar-text { + float: left; + margin-left: 15px; + margin-right: 15px; + } +} +@media (min-width: 768px) { + .navbar-left { + float: left !important; + } + .navbar-right { + float: right !important; + margin-right: -15px; + } + .navbar-right ~ .navbar-right { + margin-right: 0; + } +} +.navbar-default { + background-color: #3a3f44; + border-color: #2b2e32; +} +.navbar-default .navbar-brand { + color: #c8c8c8; +} +.navbar-default .navbar-brand:hover, +.navbar-default .navbar-brand:focus { + color: #ffffff; + background-color: none; +} +.navbar-default .navbar-text { + color: #c8c8c8; +} +.navbar-default .navbar-nav > li > a { + color: #c8c8c8; +} +.navbar-default .navbar-nav > li > a:hover, +.navbar-default .navbar-nav > li > a:focus { + color: #ffffff; + background-color: #272b2e; +} +.navbar-default .navbar-nav > .active > a, +.navbar-default .navbar-nav > .active > a:hover, +.navbar-default .navbar-nav > .active > a:focus { + color: #ffffff; + background-color: #272b2e; +} +.navbar-default .navbar-nav > .disabled > a, +.navbar-default .navbar-nav > .disabled > a:hover, +.navbar-default .navbar-nav > .disabled > a:focus { + color: #cccccc; + background-color: transparent; +} +.navbar-default .navbar-toggle { + border-color: #272b2e; +} +.navbar-default .navbar-toggle:hover, +.navbar-default .navbar-toggle:focus { + background-color: #272b2e; +} +.navbar-default .navbar-toggle .icon-bar { + background-color: #c8c8c8; +} +.navbar-default .navbar-collapse, +.navbar-default .navbar-form { + border-color: #2b2e32; +} +.navbar-default .navbar-nav > .open > a, +.navbar-default .navbar-nav > .open > a:hover, +.navbar-default .navbar-nav > .open > a:focus { + background-color: #272b2e; + color: #ffffff; +} +@media (max-width: 767px) { + .navbar-default .navbar-nav .open .dropdown-menu > li > a { + color: #c8c8c8; + } + .navbar-default .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > li > a:focus { + color: #ffffff; + background-color: #272b2e; + } + .navbar-default .navbar-nav .open .dropdown-menu > .active > a, + .navbar-default .navbar-nav .open .dropdown-menu > .active > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > .active > a:focus { + color: #ffffff; + background-color: #272b2e; + } + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a, + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a:focus { + color: #cccccc; + background-color: transparent; + } +} +.navbar-default .navbar-link { + color: #c8c8c8; +} +.navbar-default .navbar-link:hover { + color: #ffffff; +} +.navbar-default .btn-link { + color: #c8c8c8; +} +.navbar-default .btn-link:hover, +.navbar-default .btn-link:focus { + color: #ffffff; +} +.navbar-default .btn-link[disabled]:hover, +fieldset[disabled] .navbar-default .btn-link:hover, +.navbar-default .btn-link[disabled]:focus, +fieldset[disabled] .navbar-default .btn-link:focus { + color: #cccccc; +} +.navbar-inverse { + background-color: #7a8288; + border-color: #62686d; +} +.navbar-inverse .navbar-brand { + color: #cccccc; +} +.navbar-inverse .navbar-brand:hover, +.navbar-inverse .navbar-brand:focus { + color: #ffffff; + background-color: none; +} +.navbar-inverse .navbar-text { + color: #cccccc; +} +.navbar-inverse .navbar-nav > li > a { + color: #cccccc; +} +.navbar-inverse .navbar-nav > li > a:hover, +.navbar-inverse .navbar-nav > li > a:focus { + color: #ffffff; + background-color: #5d6368; +} +.navbar-inverse .navbar-nav > .active > a, +.navbar-inverse .navbar-nav > .active > a:hover, +.navbar-inverse .navbar-nav > .active > a:focus { + color: #ffffff; + background-color: #5d6368; +} +.navbar-inverse .navbar-nav > .disabled > a, +.navbar-inverse .navbar-nav > .disabled > a:hover, +.navbar-inverse .navbar-nav > .disabled > a:focus { + color: #cccccc; + background-color: transparent; +} +.navbar-inverse .navbar-toggle { + border-color: #5d6368; +} +.navbar-inverse .navbar-toggle:hover, +.navbar-inverse .navbar-toggle:focus { + background-color: #5d6368; +} +.navbar-inverse .navbar-toggle .icon-bar { + background-color: #ffffff; +} +.navbar-inverse .navbar-collapse, +.navbar-inverse .navbar-form { + border-color: #697075; +} +.navbar-inverse .navbar-nav > .open > a, +.navbar-inverse .navbar-nav > .open > a:hover, +.navbar-inverse .navbar-nav > .open > a:focus { + background-color: #5d6368; + color: #ffffff; +} +@media (max-width: 767px) { + .navbar-inverse .navbar-nav .open .dropdown-menu > .dropdown-header { + border-color: #62686d; + } + .navbar-inverse .navbar-nav .open .dropdown-menu .divider { + background-color: #62686d; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a { + color: #cccccc; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a:focus { + color: #ffffff; + background-color: #5d6368; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a, + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a:focus { + color: #ffffff; + background-color: #5d6368; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a, + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a:focus { + color: #cccccc; + background-color: transparent; + } +} +.navbar-inverse .navbar-link { + color: #cccccc; +} +.navbar-inverse .navbar-link:hover { + color: #ffffff; +} +.navbar-inverse .btn-link { + color: #cccccc; +} +.navbar-inverse .btn-link:hover, +.navbar-inverse .btn-link:focus { + color: #ffffff; +} +.navbar-inverse .btn-link[disabled]:hover, +fieldset[disabled] .navbar-inverse .btn-link:hover, +.navbar-inverse .btn-link[disabled]:focus, +fieldset[disabled] .navbar-inverse .btn-link:focus { + color: #cccccc; +} +.breadcrumb { + padding: 8px 15px; + margin-bottom: 20px; + list-style: none; + background-color: transparent; + border-radius: 4px; +} +.breadcrumb > li { + display: inline-block; +} +.breadcrumb > li + li:before { + content: "/\00a0"; + padding: 0 5px; + color: #cccccc; +} +.breadcrumb > .active { + color: #7a8288; +} +.pagination { + display: inline-block; + padding-left: 0; + margin: 20px 0; + border-radius: 4px; +} +.pagination > li { + display: inline; +} +.pagination > li > a, +.pagination > li > span { + position: relative; + float: left; + padding: 8px 12px; + line-height: 1.42857143; + text-decoration: none; + color: #ffffff; + background-color: #3a3f44; + border: 1px solid rgba(0, 0, 0, 0.6); + margin-left: -1px; +} +.pagination > li:first-child > a, +.pagination > li:first-child > span { + margin-left: 0; + border-bottom-left-radius: 4px; + border-top-left-radius: 4px; +} +.pagination > li:last-child > a, +.pagination > li:last-child > span { + border-bottom-right-radius: 4px; + border-top-right-radius: 4px; +} +.pagination > li > a:hover, +.pagination > li > span:hover, +.pagination > li > a:focus, +.pagination > li > span:focus { + z-index: 2; + color: #ffffff; + background-color: transparent; + border-color: rgba(0, 0, 0, 0.6); +} +.pagination > .active > a, +.pagination > .active > span, +.pagination > .active > a:hover, +.pagination > .active > span:hover, +.pagination > .active > a:focus, +.pagination > .active > span:focus { + z-index: 3; + color: #ffffff; + background-color: #232628; + border-color: rgba(0, 0, 0, 0.6); + cursor: default; +} +.pagination > .disabled > span, +.pagination > .disabled > span:hover, +.pagination > .disabled > span:focus, +.pagination > .disabled > a, +.pagination > .disabled > a:hover, +.pagination > .disabled > a:focus { + color: #7a8288; + background-color: #ffffff; + border-color: rgba(0, 0, 0, 0.6); + cursor: not-allowed; +} +.pagination-lg > li > a, +.pagination-lg > li > span { + padding: 14px 16px; + font-size: 18px; + line-height: 1.3333333; +} +.pagination-lg > li:first-child > a, +.pagination-lg > li:first-child > span { + border-bottom-left-radius: 6px; + border-top-left-radius: 6px; +} +.pagination-lg > li:last-child > a, +.pagination-lg > li:last-child > span { + border-bottom-right-radius: 6px; + border-top-right-radius: 6px; +} +.pagination-sm > li > a, +.pagination-sm > li > span { + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; +} +.pagination-sm > li:first-child > a, +.pagination-sm > li:first-child > span { + border-bottom-left-radius: 3px; + border-top-left-radius: 3px; +} +.pagination-sm > li:last-child > a, +.pagination-sm > li:last-child > span { + border-bottom-right-radius: 3px; + border-top-right-radius: 3px; +} +.pager { + padding-left: 0; + margin: 20px 0; + list-style: none; + text-align: center; +} +.pager li { + display: inline; +} +.pager li > a, +.pager li > span { + display: inline-block; + padding: 5px 14px; + background-color: #3a3f44; + border: 1px solid rgba(0, 0, 0, 0.6); + border-radius: 15px; +} +.pager li > a:hover, +.pager li > a:focus { + text-decoration: none; + background-color: transparent; +} +.pager .next > a, +.pager .next > span { + float: right; +} +.pager .previous > a, +.pager .previous > span { + float: left; +} +.pager .disabled > a, +.pager .disabled > a:hover, +.pager .disabled > a:focus, +.pager .disabled > span { + color: #7a8288; + background-color: #3a3f44; + cursor: not-allowed; +} +.label { + display: inline; + padding: .2em .6em .3em; + font-size: 75%; + font-weight: bold; + line-height: 1; + color: #ffffff; + text-align: center; + white-space: nowrap; + vertical-align: baseline; + border-radius: .25em; +} +a.label:hover, +a.label:focus { + color: #ffffff; + text-decoration: none; + cursor: pointer; +} +.label:empty { + display: none; +} +.btn .label { + position: relative; + top: -1px; +} +.label-default { + background-color: #3a3f44; +} +.label-default[href]:hover, +.label-default[href]:focus { + background-color: #232628; +} +.label-primary { + background-color: #7a8288; +} +.label-primary[href]:hover, +.label-primary[href]:focus { + background-color: #62686d; +} +.label-success { + background-color: #62c462; +} +.label-success[href]:hover, +.label-success[href]:focus { + background-color: #42b142; +} +.label-info { + background-color: #5bc0de; +} +.label-info[href]:hover, +.label-info[href]:focus { + background-color: #31b0d5; +} +.label-warning { + background-color: #f89406; +} +.label-warning[href]:hover, +.label-warning[href]:focus { + background-color: #c67605; +} +.label-danger { + background-color: #ee5f5b; +} +.label-danger[href]:hover, +.label-danger[href]:focus { + background-color: #e9322d; +} +.badge { + display: inline-block; + min-width: 10px; + padding: 3px 7px; + font-size: 12px; + font-weight: bold; + color: #ffffff; + line-height: 1; + vertical-align: middle; + white-space: nowrap; + text-align: center; + background-color: #7a8288; + border-radius: 10px; +} +.badge:empty { + display: none; +} +.btn .badge { + position: relative; + top: -1px; +} +.btn-xs .badge, +.btn-group-xs > .btn .badge { + top: 0; + padding: 1px 5px; +} +a.badge:hover, +a.badge:focus { + color: #ffffff; + text-decoration: none; + cursor: pointer; +} +.list-group-item.active > .badge, +.nav-pills > .active > a > .badge { + color: #ffffff; + background-color: #7a8288; +} +.list-group-item > .badge { + float: right; +} +.list-group-item > .badge + .badge { + margin-right: 5px; +} +.nav-pills > li > a > .badge { + margin-left: 3px; +} +.jumbotron { + padding-top: 30px; + padding-bottom: 30px; + margin-bottom: 30px; + color: inherit; + background-color: #1c1e22; +} +.jumbotron h1, +.jumbotron .h1 { + color: inherit; +} +.jumbotron p { + margin-bottom: 15px; + font-size: 21px; + font-weight: 200; +} +.jumbotron > hr { + border-top-color: #050506; +} +.container .jumbotron, +.container-fluid .jumbotron { + border-radius: 6px; + padding-left: 15px; + padding-right: 15px; +} +.jumbotron .container { + max-width: 100%; +} +@media screen and (min-width: 768px) { + .jumbotron { + padding-top: 48px; + padding-bottom: 48px; + } + .container .jumbotron, + .container-fluid .jumbotron { + padding-left: 60px; + padding-right: 60px; + } + .jumbotron h1, + .jumbotron .h1 { + font-size: 63px; + } +} +.thumbnail { + display: block; + padding: 4px; + margin-bottom: 20px; + line-height: 1.42857143; + background-color: #1c1e22; + border: 1px solid #0c0d0e; + border-radius: 4px; + -webkit-transition: border 0.2s ease-in-out; + -o-transition: border 0.2s ease-in-out; + transition: border 0.2s ease-in-out; +} +.thumbnail > img, +.thumbnail a > img { + margin-left: auto; + margin-right: auto; +} +a.thumbnail:hover, +a.thumbnail:focus, +a.thumbnail.active { + border-color: #ffffff; +} +.thumbnail .caption { + padding: 9px; + color: #c8c8c8; +} +.alert { + padding: 15px; + margin-bottom: 20px; + border: 1px solid transparent; + border-radius: 4px; +} +.alert h4 { + margin-top: 0; + color: inherit; +} +.alert .alert-link { + font-weight: bold; +} +.alert > p, +.alert > ul { + margin-bottom: 0; +} +.alert > p + p { + margin-top: 5px; +} +.alert-dismissable, +.alert-dismissible { + padding-right: 35px; +} +.alert-dismissable .close, +.alert-dismissible .close { + position: relative; + top: -2px; + right: -21px; + color: inherit; +} +.alert-success { + background-color: #62c462; + border-color: #62bd4f; + color: #ffffff; +} +.alert-success hr { + border-top-color: #55b142; +} +.alert-success .alert-link { + color: #e6e6e6; +} +.alert-info { + background-color: #5bc0de; + border-color: #3dced8; + color: #ffffff; +} +.alert-info hr { + border-top-color: #2ac7d2; +} +.alert-info .alert-link { + color: #e6e6e6; +} +.alert-warning { + background-color: #f89406; + border-color: #e96506; + color: #ffffff; +} +.alert-warning hr { + border-top-color: #d05a05; +} +.alert-warning .alert-link { + color: #e6e6e6; +} +.alert-danger { + background-color: #ee5f5b; + border-color: #ed4d63; + color: #ffffff; +} +.alert-danger hr { + border-top-color: #ea364f; +} +.alert-danger .alert-link { + color: #e6e6e6; +} +@-webkit-keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +@-o-keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +@keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +.progress { + overflow: hidden; + height: 20px; + margin-bottom: 20px; + background-color: #1c1e22; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1); + box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1); +} +.progress-bar { + float: left; + width: 0%; + height: 100%; + font-size: 12px; + line-height: 20px; + color: #ffffff; + text-align: center; + background-color: #7a8288; + -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15); + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15); + -webkit-transition: width 0.6s ease; + -o-transition: width 0.6s ease; + transition: width 0.6s ease; +} +.progress-striped .progress-bar, +.progress-bar-striped { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + -webkit-background-size: 40px 40px; + background-size: 40px 40px; +} +.progress.active .progress-bar, +.progress-bar.active { + -webkit-animation: progress-bar-stripes 2s linear infinite; + -o-animation: progress-bar-stripes 2s linear infinite; + animation: progress-bar-stripes 2s linear infinite; +} +.progress-bar-success { + background-color: #62c462; +} +.progress-striped .progress-bar-success { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); +} +.progress-bar-info { + background-color: #5bc0de; +} +.progress-striped .progress-bar-info { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); +} +.progress-bar-warning { + background-color: #f89406; +} +.progress-striped .progress-bar-warning { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); +} +.progress-bar-danger { + background-color: #ee5f5b; +} +.progress-striped .progress-bar-danger { + background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); + background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent); +} +.media { + margin-top: 15px; +} +.media:first-child { + margin-top: 0; +} +.media, +.media-body { + zoom: 1; + overflow: hidden; +} +.media-body { + width: 10000px; +} +.media-object { + display: block; +} +.media-object.img-thumbnail { + max-width: none; +} +.media-right, +.media > .pull-right { + padding-left: 10px; +} +.media-left, +.media > .pull-left { + padding-right: 10px; +} +.media-left, +.media-right, +.media-body { + display: table-cell; + vertical-align: top; +} +.media-middle { + vertical-align: middle; +} +.media-bottom { + vertical-align: bottom; +} +.media-heading { + margin-top: 0; + margin-bottom: 5px; +} +.media-list { + padding-left: 0; + list-style: none; +} +.list-group { + margin-bottom: 20px; + padding-left: 0; +} +.list-group-item { + position: relative; + display: block; + padding: 10px 15px; + margin-bottom: -1px; + background-color: #32383e; + border: 1px solid rgba(0, 0, 0, 0.6); +} +.list-group-item:first-child { + border-top-right-radius: 4px; + border-top-left-radius: 4px; +} +.list-group-item:last-child { + margin-bottom: 0; + border-bottom-right-radius: 4px; + border-bottom-left-radius: 4px; +} +a.list-group-item, +button.list-group-item { + color: #c8c8c8; +} +a.list-group-item .list-group-item-heading, +button.list-group-item .list-group-item-heading { + color: #ffffff; +} +a.list-group-item:hover, +button.list-group-item:hover, +a.list-group-item:focus, +button.list-group-item:focus { + text-decoration: none; + color: #c8c8c8; + background-color: #3e444c; +} +button.list-group-item { + width: 100%; + text-align: left; +} +.list-group-item.disabled, +.list-group-item.disabled:hover, +.list-group-item.disabled:focus { + background-color: #999999; + color: #7a8288; + cursor: not-allowed; +} +.list-group-item.disabled .list-group-item-heading, +.list-group-item.disabled:hover .list-group-item-heading, +.list-group-item.disabled:focus .list-group-item-heading { + color: inherit; +} +.list-group-item.disabled .list-group-item-text, +.list-group-item.disabled:hover .list-group-item-text, +.list-group-item.disabled:focus .list-group-item-text { + color: #7a8288; +} +.list-group-item.active, +.list-group-item.active:hover, +.list-group-item.active:focus { + z-index: 2; + color: #ffffff; + background-color: #3e444c; + border-color: rgba(0, 0, 0, 0.6); +} +.list-group-item.active .list-group-item-heading, +.list-group-item.active:hover .list-group-item-heading, +.list-group-item.active:focus .list-group-item-heading, +.list-group-item.active .list-group-item-heading > small, +.list-group-item.active:hover .list-group-item-heading > small, +.list-group-item.active:focus .list-group-item-heading > small, +.list-group-item.active .list-group-item-heading > .small, +.list-group-item.active:hover .list-group-item-heading > .small, +.list-group-item.active:focus .list-group-item-heading > .small { + color: inherit; +} +.list-group-item.active .list-group-item-text, +.list-group-item.active:hover .list-group-item-text, +.list-group-item.active:focus .list-group-item-text { + color: #a2aab4; +} +.list-group-item-success { + color: #ffffff; + background-color: #62c462; +} +a.list-group-item-success, +button.list-group-item-success { + color: #ffffff; +} +a.list-group-item-success .list-group-item-heading, +button.list-group-item-success .list-group-item-heading { + color: inherit; +} +a.list-group-item-success:hover, +button.list-group-item-success:hover, +a.list-group-item-success:focus, +button.list-group-item-success:focus { + color: #ffffff; + background-color: #4fbd4f; +} +a.list-group-item-success.active, +button.list-group-item-success.active, +a.list-group-item-success.active:hover, +button.list-group-item-success.active:hover, +a.list-group-item-success.active:focus, +button.list-group-item-success.active:focus { + color: #fff; + background-color: #ffffff; + border-color: #ffffff; +} +.list-group-item-info { + color: #ffffff; + background-color: #5bc0de; +} +a.list-group-item-info, +button.list-group-item-info { + color: #ffffff; +} +a.list-group-item-info .list-group-item-heading, +button.list-group-item-info .list-group-item-heading { + color: inherit; +} +a.list-group-item-info:hover, +button.list-group-item-info:hover, +a.list-group-item-info:focus, +button.list-group-item-info:focus { + color: #ffffff; + background-color: #46b8da; +} +a.list-group-item-info.active, +button.list-group-item-info.active, +a.list-group-item-info.active:hover, +button.list-group-item-info.active:hover, +a.list-group-item-info.active:focus, +button.list-group-item-info.active:focus { + color: #fff; + background-color: #ffffff; + border-color: #ffffff; +} +.list-group-item-warning { + color: #ffffff; + background-color: #f89406; +} +a.list-group-item-warning, +button.list-group-item-warning { + color: #ffffff; +} +a.list-group-item-warning .list-group-item-heading, +button.list-group-item-warning .list-group-item-heading { + color: inherit; +} +a.list-group-item-warning:hover, +button.list-group-item-warning:hover, +a.list-group-item-warning:focus, +button.list-group-item-warning:focus { + color: #ffffff; + background-color: #df8505; +} +a.list-group-item-warning.active, +button.list-group-item-warning.active, +a.list-group-item-warning.active:hover, +button.list-group-item-warning.active:hover, +a.list-group-item-warning.active:focus, +button.list-group-item-warning.active:focus { + color: #fff; + background-color: #ffffff; + border-color: #ffffff; +} +.list-group-item-danger { + color: #ffffff; + background-color: #ee5f5b; +} +a.list-group-item-danger, +button.list-group-item-danger { + color: #ffffff; +} +a.list-group-item-danger .list-group-item-heading, +button.list-group-item-danger .list-group-item-heading { + color: inherit; +} +a.list-group-item-danger:hover, +button.list-group-item-danger:hover, +a.list-group-item-danger:focus, +button.list-group-item-danger:focus { + color: #ffffff; + background-color: #ec4844; +} +a.list-group-item-danger.active, +button.list-group-item-danger.active, +a.list-group-item-danger.active:hover, +button.list-group-item-danger.active:hover, +a.list-group-item-danger.active:focus, +button.list-group-item-danger.active:focus { + color: #fff; + background-color: #ffffff; + border-color: #ffffff; +} +.list-group-item-heading { + margin-top: 0; + margin-bottom: 5px; +} +.list-group-item-text { + margin-bottom: 0; + line-height: 1.3; +} +.panel { + margin-bottom: 20px; + background-color: #2e3338; + border: 1px solid transparent; + border-radius: 4px; + -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); + box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); +} +.panel-body { + padding: 15px; +} +.panel-heading { + padding: 10px 15px; + border-bottom: 1px solid transparent; + border-top-right-radius: 3px; + border-top-left-radius: 3px; +} +.panel-heading > .dropdown .dropdown-toggle { + color: inherit; +} +.panel-title { + margin-top: 0; + margin-bottom: 0; + font-size: 16px; + color: inherit; +} +.panel-title > a, +.panel-title > small, +.panel-title > .small, +.panel-title > small > a, +.panel-title > .small > a { + color: inherit; +} +.panel-footer { + padding: 10px 15px; + background-color: #3e444c; + border-top: 1px solid rgba(0, 0, 0, 0.6); + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .list-group, +.panel > .panel-collapse > .list-group { + margin-bottom: 0; +} +.panel > .list-group .list-group-item, +.panel > .panel-collapse > .list-group .list-group-item { + border-width: 1px 0; + border-radius: 0; +} +.panel > .list-group:first-child .list-group-item:first-child, +.panel > .panel-collapse > .list-group:first-child .list-group-item:first-child { + border-top: 0; + border-top-right-radius: 3px; + border-top-left-radius: 3px; +} +.panel > .list-group:last-child .list-group-item:last-child, +.panel > .panel-collapse > .list-group:last-child .list-group-item:last-child { + border-bottom: 0; + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .panel-heading + .panel-collapse > .list-group .list-group-item:first-child { + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.panel-heading + .list-group .list-group-item:first-child { + border-top-width: 0; +} +.list-group + .panel-footer { + border-top-width: 0; +} +.panel > .table, +.panel > .table-responsive > .table, +.panel > .panel-collapse > .table { + margin-bottom: 0; +} +.panel > .table caption, +.panel > .table-responsive > .table caption, +.panel > .panel-collapse > .table caption { + padding-left: 15px; + padding-right: 15px; +} +.panel > .table:first-child, +.panel > .table-responsive:first-child > .table:first-child { + border-top-right-radius: 3px; + border-top-left-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child { + border-top-left-radius: 3px; + border-top-right-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child td:first-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child td:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child td:first-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child td:first-child, +.panel > .table:first-child > thead:first-child > tr:first-child th:first-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child th:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child th:first-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child th:first-child { + border-top-left-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child td:last-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child td:last-child, +.panel > .table:first-child > tbody:first-child > tr:first-child td:last-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child td:last-child, +.panel > .table:first-child > thead:first-child > tr:first-child th:last-child, +.panel > .table-responsive:first-child > .table:first-child > thead:first-child > tr:first-child th:last-child, +.panel > .table:first-child > tbody:first-child > tr:first-child th:last-child, +.panel > .table-responsive:first-child > .table:first-child > tbody:first-child > tr:first-child th:last-child { + border-top-right-radius: 3px; +} +.panel > .table:last-child, +.panel > .table-responsive:last-child > .table:last-child { + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child { + border-bottom-left-radius: 3px; + border-bottom-right-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child td:first-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child td:first-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child td:first-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child td:first-child, +.panel > .table:last-child > tbody:last-child > tr:last-child th:first-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child th:first-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child th:first-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child th:first-child { + border-bottom-left-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child td:last-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child td:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child td:last-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child td:last-child, +.panel > .table:last-child > tbody:last-child > tr:last-child th:last-child, +.panel > .table-responsive:last-child > .table:last-child > tbody:last-child > tr:last-child th:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child th:last-child, +.panel > .table-responsive:last-child > .table:last-child > tfoot:last-child > tr:last-child th:last-child { + border-bottom-right-radius: 3px; +} +.panel > .panel-body + .table, +.panel > .panel-body + .table-responsive, +.panel > .table + .panel-body, +.panel > .table-responsive + .panel-body { + border-top: 1px solid #1c1e22; +} +.panel > .table > tbody:first-child > tr:first-child th, +.panel > .table > tbody:first-child > tr:first-child td { + border-top: 0; +} +.panel > .table-bordered, +.panel > .table-responsive > .table-bordered { + border: 0; +} +.panel > .table-bordered > thead > tr > th:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:first-child, +.panel > .table-bordered > tbody > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:first-child, +.panel > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-bordered > thead > tr > td:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:first-child, +.panel > .table-bordered > tbody > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:first-child, +.panel > .table-bordered > tfoot > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:first-child { + border-left: 0; +} +.panel > .table-bordered > thead > tr > th:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:last-child, +.panel > .table-bordered > tbody > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:last-child, +.panel > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-bordered > thead > tr > td:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:last-child, +.panel > .table-bordered > tbody > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:last-child, +.panel > .table-bordered > tfoot > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:last-child { + border-right: 0; +} +.panel > .table-bordered > thead > tr:first-child > td, +.panel > .table-responsive > .table-bordered > thead > tr:first-child > td, +.panel > .table-bordered > tbody > tr:first-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:first-child > td, +.panel > .table-bordered > thead > tr:first-child > th, +.panel > .table-responsive > .table-bordered > thead > tr:first-child > th, +.panel > .table-bordered > tbody > tr:first-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:first-child > th { + border-bottom: 0; +} +.panel > .table-bordered > tbody > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > td, +.panel > .table-bordered > tfoot > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > td, +.panel > .table-bordered > tbody > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > th, +.panel > .table-bordered > tfoot > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > th { + border-bottom: 0; +} +.panel > .table-responsive { + border: 0; + margin-bottom: 0; +} +.panel-group { + margin-bottom: 20px; +} +.panel-group .panel { + margin-bottom: 0; + border-radius: 4px; +} +.panel-group .panel + .panel { + margin-top: 5px; +} +.panel-group .panel-heading { + border-bottom: 0; +} +.panel-group .panel-heading + .panel-collapse > .panel-body, +.panel-group .panel-heading + .panel-collapse > .list-group { + border-top: 1px solid rgba(0, 0, 0, 0.6); +} +.panel-group .panel-footer { + border-top: 0; +} +.panel-group .panel-footer + .panel-collapse .panel-body { + border-bottom: 1px solid rgba(0, 0, 0, 0.6); +} +.panel-default { + border-color: rgba(0, 0, 0, 0.6); +} +.panel-default > .panel-heading { + color: #c8c8c8; + background-color: #3e444c; + border-color: rgba(0, 0, 0, 0.6); +} +.panel-default > .panel-heading + .panel-collapse > .panel-body { + border-top-color: rgba(0, 0, 0, 0.6); +} +.panel-default > .panel-heading .badge { + color: #3e444c; + background-color: #c8c8c8; +} +.panel-default > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: rgba(0, 0, 0, 0.6); +} +.panel-primary { + border-color: rgba(0, 0, 0, 0.6); +} +.panel-primary > .panel-heading { + color: #ffffff; + background-color: #7a8288; + border-color: rgba(0, 0, 0, 0.6); +} +.panel-primary > .panel-heading + .panel-collapse > .panel-body { + border-top-color: rgba(0, 0, 0, 0.6); +} +.panel-primary > .panel-heading .badge { + color: #7a8288; + background-color: #ffffff; +} +.panel-primary > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: rgba(0, 0, 0, 0.6); +} +.panel-success { + border-color: rgba(0, 0, 0, 0.6); +} +.panel-success > .panel-heading { + color: #ffffff; + background-color: #62c462; + border-color: rgba(0, 0, 0, 0.6); +} +.panel-success > .panel-heading + .panel-collapse > .panel-body { + border-top-color: rgba(0, 0, 0, 0.6); +} +.panel-success > .panel-heading .badge { + color: #62c462; + background-color: #ffffff; +} +.panel-success > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: rgba(0, 0, 0, 0.6); +} +.panel-info { + border-color: rgba(0, 0, 0, 0.6); +} +.panel-info > .panel-heading { + color: #ffffff; + background-color: #5bc0de; + border-color: rgba(0, 0, 0, 0.6); +} +.panel-info > .panel-heading + .panel-collapse > .panel-body { + border-top-color: rgba(0, 0, 0, 0.6); +} +.panel-info > .panel-heading .badge { + color: #5bc0de; + background-color: #ffffff; +} +.panel-info > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: rgba(0, 0, 0, 0.6); +} +.panel-warning { + border-color: rgba(0, 0, 0, 0.6); +} +.panel-warning > .panel-heading { + color: #ffffff; + background-color: #f89406; + border-color: rgba(0, 0, 0, 0.6); +} +.panel-warning > .panel-heading + .panel-collapse > .panel-body { + border-top-color: rgba(0, 0, 0, 0.6); +} +.panel-warning > .panel-heading .badge { + color: #f89406; + background-color: #ffffff; +} +.panel-warning > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: rgba(0, 0, 0, 0.6); +} +.panel-danger { + border-color: rgba(0, 0, 0, 0.6); +} +.panel-danger > .panel-heading { + color: #ffffff; + background-color: #ee5f5b; + border-color: rgba(0, 0, 0, 0.6); +} +.panel-danger > .panel-heading + .panel-collapse > .panel-body { + border-top-color: rgba(0, 0, 0, 0.6); +} +.panel-danger > .panel-heading .badge { + color: #ee5f5b; + background-color: #ffffff; +} +.panel-danger > .panel-footer + .panel-collapse > .panel-body { + border-bottom-color: rgba(0, 0, 0, 0.6); +} +.embed-responsive { + position: relative; + display: block; + height: 0; + padding: 0; + overflow: hidden; +} +.embed-responsive .embed-responsive-item, +.embed-responsive iframe, +.embed-responsive embed, +.embed-responsive object, +.embed-responsive video { + position: absolute; + top: 0; + left: 0; + bottom: 0; + height: 100%; + width: 100%; + border: 0; +} +.embed-responsive-16by9 { + padding-bottom: 56.25%; +} +.embed-responsive-4by3 { + padding-bottom: 75%; +} +.well { + min-height: 20px; + padding: 19px; + margin-bottom: 20px; + background-color: #1c1e22; + border: 1px solid #0c0d0e; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05); +} +.well blockquote { + border-color: #ddd; + border-color: rgba(0, 0, 0, 0.15); +} +.well-lg { + padding: 24px; + border-radius: 6px; +} +.well-sm { + padding: 9px; + border-radius: 3px; +} +.close { + float: right; + font-size: 21px; + font-weight: bold; + line-height: 1; + color: #000000; + text-shadow: 0 1px 0 #ffffff; + opacity: 0.2; + filter: alpha(opacity=20); +} +.close:hover, +.close:focus { + color: #000000; + text-decoration: none; + cursor: pointer; + opacity: 0.5; + filter: alpha(opacity=50); +} +button.close { + padding: 0; + cursor: pointer; + background: transparent; + border: 0; + -webkit-appearance: none; +} +.modal-open { + overflow: hidden; +} +.modal { + display: none; + overflow: hidden; + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 1050; + -webkit-overflow-scrolling: touch; + outline: 0; +} +.modal.fade .modal-dialog { + -webkit-transform: translate(0, -25%); + -ms-transform: translate(0, -25%); + -o-transform: translate(0, -25%); + transform: translate(0, -25%); + -webkit-transition: -webkit-transform 0.3s ease-out; + -o-transition: -o-transform 0.3s ease-out; + transition: transform 0.3s ease-out; +} +.modal.in .modal-dialog { + -webkit-transform: translate(0, 0); + -ms-transform: translate(0, 0); + -o-transform: translate(0, 0); + transform: translate(0, 0); +} +.modal-open .modal { + overflow-x: hidden; + overflow-y: auto; +} +.modal-dialog { + position: relative; + width: auto; + margin: 10px; +} +.modal-content { + position: relative; + background-color: #2e3338; + border: 1px solid #999999; + border: 1px solid rgba(0, 0, 0, 0.2); + border-radius: 6px; + -webkit-box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5); + box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5); + -webkit-background-clip: padding-box; + background-clip: padding-box; + outline: 0; +} +.modal-backdrop { + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 1040; + background-color: #000000; +} +.modal-backdrop.fade { + opacity: 0; + filter: alpha(opacity=0); +} +.modal-backdrop.in { + opacity: 0.5; + filter: alpha(opacity=50); +} +.modal-header { + padding: 15px; + border-bottom: 1px solid #1c1e22; +} +.modal-header .close { + margin-top: -2px; +} +.modal-title { + margin: 0; + line-height: 1.42857143; +} +.modal-body { + position: relative; + padding: 20px; +} +.modal-footer { + padding: 20px; + text-align: right; + border-top: 1px solid #1c1e22; +} +.modal-footer .btn + .btn { + margin-left: 5px; + margin-bottom: 0; +} +.modal-footer .btn-group .btn + .btn { + margin-left: -1px; +} +.modal-footer .btn-block + .btn-block { + margin-left: 0; +} +.modal-scrollbar-measure { + position: absolute; + top: -9999px; + width: 50px; + height: 50px; + overflow: scroll; +} +@media (min-width: 768px) { + .modal-dialog { + width: 600px; + margin: 30px auto; + } + .modal-content { + -webkit-box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5); + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5); + } + .modal-sm { + width: 300px; + } +} +@media (min-width: 992px) { + .modal-lg { + width: 900px; + } +} +.tooltip { + position: absolute; + z-index: 1070; + display: block; + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-weight: normal; + letter-spacing: normal; + line-break: auto; + line-height: 1.42857143; + text-align: left; + text-align: start; + text-decoration: none; + text-shadow: none; + text-transform: none; + white-space: normal; + word-break: normal; + word-spacing: normal; + word-wrap: normal; + font-size: 12px; + opacity: 0; + filter: alpha(opacity=0); +} +.tooltip.in { + opacity: 0.9; + filter: alpha(opacity=90); +} +.tooltip.top { + margin-top: -3px; + padding: 5px 0; +} +.tooltip.right { + margin-left: 3px; + padding: 0 5px; +} +.tooltip.bottom { + margin-top: 3px; + padding: 5px 0; +} +.tooltip.left { + margin-left: -3px; + padding: 0 5px; +} +.tooltip-inner { + max-width: 200px; + padding: 3px 8px; + color: #ffffff; + text-align: center; + background-color: #000000; + border-radius: 4px; +} +.tooltip-arrow { + position: absolute; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; +} +.tooltip.top .tooltip-arrow { + bottom: 0; + left: 50%; + margin-left: -5px; + border-width: 5px 5px 0; + border-top-color: #000000; +} +.tooltip.top-left .tooltip-arrow { + bottom: 0; + right: 5px; + margin-bottom: -5px; + border-width: 5px 5px 0; + border-top-color: #000000; +} +.tooltip.top-right .tooltip-arrow { + bottom: 0; + left: 5px; + margin-bottom: -5px; + border-width: 5px 5px 0; + border-top-color: #000000; +} +.tooltip.right .tooltip-arrow { + top: 50%; + left: 0; + margin-top: -5px; + border-width: 5px 5px 5px 0; + border-right-color: #000000; +} +.tooltip.left .tooltip-arrow { + top: 50%; + right: 0; + margin-top: -5px; + border-width: 5px 0 5px 5px; + border-left-color: #000000; +} +.tooltip.bottom .tooltip-arrow { + top: 0; + left: 50%; + margin-left: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000000; +} +.tooltip.bottom-left .tooltip-arrow { + top: 0; + right: 5px; + margin-top: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000000; +} +.tooltip.bottom-right .tooltip-arrow { + top: 0; + left: 5px; + margin-top: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000000; +} +.popover { + position: absolute; + top: 0; + left: 0; + z-index: 1060; + display: none; + max-width: 276px; + padding: 1px; + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-weight: normal; + letter-spacing: normal; + line-break: auto; + line-height: 1.42857143; + text-align: left; + text-align: start; + text-decoration: none; + text-shadow: none; + text-transform: none; + white-space: normal; + word-break: normal; + word-spacing: normal; + word-wrap: normal; + font-size: 14px; + background-color: #2e3338; + -webkit-background-clip: padding-box; + background-clip: padding-box; + border: 1px solid #999999; + border: 1px solid rgba(0, 0, 0, 0.2); + border-radius: 6px; + -webkit-box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2); + box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2); +} +.popover.top { + margin-top: -10px; +} +.popover.right { + margin-left: 10px; +} +.popover.bottom { + margin-top: 10px; +} +.popover.left { + margin-left: -10px; +} +.popover-title { + margin: 0; + padding: 8px 14px; + font-size: 14px; + background-color: #2e3338; + border-bottom: 1px solid #22262a; + border-radius: 5px 5px 0 0; +} +.popover-content { + padding: 9px 14px; +} +.popover > .arrow, +.popover > .arrow:after { + position: absolute; + display: block; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; +} +.popover > .arrow { + border-width: 11px; +} +.popover > .arrow:after { + border-width: 10px; + content: ""; +} +.popover.top > .arrow { + left: 50%; + margin-left: -11px; + border-bottom-width: 0; + border-top-color: #666666; + border-top-color: rgba(0, 0, 0, 0.25); + bottom: -11px; +} +.popover.top > .arrow:after { + content: " "; + bottom: 1px; + margin-left: -10px; + border-bottom-width: 0; + border-top-color: #2e3338; +} +.popover.right > .arrow { + top: 50%; + left: -11px; + margin-top: -11px; + border-left-width: 0; + border-right-color: #666666; + border-right-color: rgba(0, 0, 0, 0.25); +} +.popover.right > .arrow:after { + content: " "; + left: 1px; + bottom: -10px; + border-left-width: 0; + border-right-color: #2e3338; +} +.popover.bottom > .arrow { + left: 50%; + margin-left: -11px; + border-top-width: 0; + border-bottom-color: #666666; + border-bottom-color: rgba(0, 0, 0, 0.25); + top: -11px; +} +.popover.bottom > .arrow:after { + content: " "; + top: 1px; + margin-left: -10px; + border-top-width: 0; + border-bottom-color: #2e3338; +} +.popover.left > .arrow { + top: 50%; + right: -11px; + margin-top: -11px; + border-right-width: 0; + border-left-color: #666666; + border-left-color: rgba(0, 0, 0, 0.25); +} +.popover.left > .arrow:after { + content: " "; + right: 1px; + border-right-width: 0; + border-left-color: #2e3338; + bottom: -10px; +} +.carousel { + position: relative; +} +.carousel-inner { + position: relative; + overflow: hidden; + width: 100%; +} +.carousel-inner > .item { + display: none; + position: relative; + -webkit-transition: 0.6s ease-in-out left; + -o-transition: 0.6s ease-in-out left; + transition: 0.6s ease-in-out left; +} +.carousel-inner > .item > img, +.carousel-inner > .item > a > img { + line-height: 1; +} +@media all and (transform-3d), (-webkit-transform-3d) { + .carousel-inner > .item { + -webkit-transition: -webkit-transform 0.6s ease-in-out; + -o-transition: -o-transform 0.6s ease-in-out; + transition: transform 0.6s ease-in-out; + -webkit-backface-visibility: hidden; + backface-visibility: hidden; + -webkit-perspective: 1000px; + perspective: 1000px; + } + .carousel-inner > .item.next, + .carousel-inner > .item.active.right { + -webkit-transform: translate3d(100%, 0, 0); + transform: translate3d(100%, 0, 0); + left: 0; + } + .carousel-inner > .item.prev, + .carousel-inner > .item.active.left { + -webkit-transform: translate3d(-100%, 0, 0); + transform: translate3d(-100%, 0, 0); + left: 0; + } + .carousel-inner > .item.next.left, + .carousel-inner > .item.prev.right, + .carousel-inner > .item.active { + -webkit-transform: translate3d(0, 0, 0); + transform: translate3d(0, 0, 0); + left: 0; + } +} +.carousel-inner > .active, +.carousel-inner > .next, +.carousel-inner > .prev { + display: block; +} +.carousel-inner > .active { + left: 0; +} +.carousel-inner > .next, +.carousel-inner > .prev { + position: absolute; + top: 0; + width: 100%; +} +.carousel-inner > .next { + left: 100%; +} +.carousel-inner > .prev { + left: -100%; +} +.carousel-inner > .next.left, +.carousel-inner > .prev.right { + left: 0; +} +.carousel-inner > .active.left { + left: -100%; +} +.carousel-inner > .active.right { + left: 100%; +} +.carousel-control { + position: absolute; + top: 0; + left: 0; + bottom: 0; + width: 15%; + opacity: 0.5; + filter: alpha(opacity=50); + font-size: 20px; + color: #ffffff; + text-align: center; + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6); + background-color: rgba(0, 0, 0, 0); +} +.carousel-control.left { + background-image: -webkit-linear-gradient(left, rgba(0, 0, 0, 0.5) 0%, rgba(0, 0, 0, 0.0001) 100%); + background-image: -o-linear-gradient(left, rgba(0, 0, 0, 0.5) 0%, rgba(0, 0, 0, 0.0001) 100%); + background-image: -webkit-gradient(linear, left top, right top, from(rgba(0, 0, 0, 0.5)), to(rgba(0, 0, 0, 0.0001))); + background-image: linear-gradient(to right, rgba(0, 0, 0, 0.5) 0%, rgba(0, 0, 0, 0.0001) 100%); + background-repeat: repeat-x; + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1); +} +.carousel-control.right { + left: auto; + right: 0; + background-image: -webkit-linear-gradient(left, rgba(0, 0, 0, 0.0001) 0%, rgba(0, 0, 0, 0.5) 100%); + background-image: -o-linear-gradient(left, rgba(0, 0, 0, 0.0001) 0%, rgba(0, 0, 0, 0.5) 100%); + background-image: -webkit-gradient(linear, left top, right top, from(rgba(0, 0, 0, 0.0001)), to(rgba(0, 0, 0, 0.5))); + background-image: linear-gradient(to right, rgba(0, 0, 0, 0.0001) 0%, rgba(0, 0, 0, 0.5) 100%); + background-repeat: repeat-x; + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1); +} +.carousel-control:hover, +.carousel-control:focus { + outline: 0; + color: #ffffff; + text-decoration: none; + opacity: 0.9; + filter: alpha(opacity=90); +} +.carousel-control .icon-prev, +.carousel-control .icon-next, +.carousel-control .glyphicon-chevron-left, +.carousel-control .glyphicon-chevron-right { + position: absolute; + top: 50%; + margin-top: -10px; + z-index: 5; + display: inline-block; +} +.carousel-control .icon-prev, +.carousel-control .glyphicon-chevron-left { + left: 50%; + margin-left: -10px; +} +.carousel-control .icon-next, +.carousel-control .glyphicon-chevron-right { + right: 50%; + margin-right: -10px; +} +.carousel-control .icon-prev, +.carousel-control .icon-next { + width: 20px; + height: 20px; + line-height: 1; + font-family: serif; +} +.carousel-control .icon-prev:before { + content: '\2039'; +} +.carousel-control .icon-next:before { + content: '\203a'; +} +.carousel-indicators { + position: absolute; + bottom: 10px; + left: 50%; + z-index: 15; + width: 60%; + margin-left: -30%; + padding-left: 0; + list-style: none; + text-align: center; +} +.carousel-indicators li { + display: inline-block; + width: 10px; + height: 10px; + margin: 1px; + text-indent: -999px; + border: 1px solid #ffffff; + border-radius: 10px; + cursor: pointer; + background-color: #000 \9; + background-color: rgba(0, 0, 0, 0); +} +.carousel-indicators .active { + margin: 0; + width: 12px; + height: 12px; + background-color: #ffffff; +} +.carousel-caption { + position: absolute; + left: 15%; + right: 15%; + bottom: 20px; + z-index: 10; + padding-top: 20px; + padding-bottom: 20px; + color: #ffffff; + text-align: center; + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6); +} +.carousel-caption .btn { + text-shadow: none; +} +@media screen and (min-width: 768px) { + .carousel-control .glyphicon-chevron-left, + .carousel-control .glyphicon-chevron-right, + .carousel-control .icon-prev, + .carousel-control .icon-next { + width: 30px; + height: 30px; + margin-top: -10px; + font-size: 30px; + } + .carousel-control .glyphicon-chevron-left, + .carousel-control .icon-prev { + margin-left: -10px; + } + .carousel-control .glyphicon-chevron-right, + .carousel-control .icon-next { + margin-right: -10px; + } + .carousel-caption { + left: 20%; + right: 20%; + padding-bottom: 30px; + } + .carousel-indicators { + bottom: 20px; + } +} +.clearfix:before, +.clearfix:after, +.dl-horizontal dd:before, +.dl-horizontal dd:after, +.container:before, +.container:after, +.container-fluid:before, +.container-fluid:after, +.row:before, +.row:after, +.form-horizontal .form-group:before, +.form-horizontal .form-group:after, +.btn-toolbar:before, +.btn-toolbar:after, +.btn-group-vertical > .btn-group:before, +.btn-group-vertical > .btn-group:after, +.nav:before, +.nav:after, +.navbar:before, +.navbar:after, +.navbar-header:before, +.navbar-header:after, +.navbar-collapse:before, +.navbar-collapse:after, +.pager:before, +.pager:after, +.panel-body:before, +.panel-body:after, +.modal-header:before, +.modal-header:after, +.modal-footer:before, +.modal-footer:after { + content: " "; + display: table; +} +.clearfix:after, +.dl-horizontal dd:after, +.container:after, +.container-fluid:after, +.row:after, +.form-horizontal .form-group:after, +.btn-toolbar:after, +.btn-group-vertical > .btn-group:after, +.nav:after, +.navbar:after, +.navbar-header:after, +.navbar-collapse:after, +.pager:after, +.panel-body:after, +.modal-header:after, +.modal-footer:after { + clear: both; +} +.center-block { + display: block; + margin-left: auto; + margin-right: auto; +} +.pull-right { + float: right !important; +} +.pull-left { + float: left !important; +} +.hide { + display: none !important; +} +.show { + display: block !important; +} +.invisible { + visibility: hidden; +} +.text-hide { + font: 0/0 a; + color: transparent; + text-shadow: none; + background-color: transparent; + border: 0; +} +.hidden { + display: none !important; +} +.affix { + position: fixed; +} +@-ms-viewport { + width: device-width; +} +.visible-xs, +.visible-sm, +.visible-md, +.visible-lg { + display: none !important; +} +.visible-xs-block, +.visible-xs-inline, +.visible-xs-inline-block, +.visible-sm-block, +.visible-sm-inline, +.visible-sm-inline-block, +.visible-md-block, +.visible-md-inline, +.visible-md-inline-block, +.visible-lg-block, +.visible-lg-inline, +.visible-lg-inline-block { + display: none !important; +} +@media (max-width: 767px) { + .visible-xs { + display: block !important; + } + table.visible-xs { + display: table !important; + } + tr.visible-xs { + display: table-row !important; + } + th.visible-xs, + td.visible-xs { + display: table-cell !important; + } +} +@media (max-width: 767px) { + .visible-xs-block { + display: block !important; + } +} +@media (max-width: 767px) { + .visible-xs-inline { + display: inline !important; + } +} +@media (max-width: 767px) { + .visible-xs-inline-block { + display: inline-block !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm { + display: block !important; + } + table.visible-sm { + display: table !important; + } + tr.visible-sm { + display: table-row !important; + } + th.visible-sm, + td.visible-sm { + display: table-cell !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm-block { + display: block !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm-inline { + display: inline !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm-inline-block { + display: inline-block !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md { + display: block !important; + } + table.visible-md { + display: table !important; + } + tr.visible-md { + display: table-row !important; + } + th.visible-md, + td.visible-md { + display: table-cell !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md-block { + display: block !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md-inline { + display: inline !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md-inline-block { + display: inline-block !important; + } +} +@media (min-width: 1200px) { + .visible-lg { + display: block !important; + } + table.visible-lg { + display: table !important; + } + tr.visible-lg { + display: table-row !important; + } + th.visible-lg, + td.visible-lg { + display: table-cell !important; + } +} +@media (min-width: 1200px) { + .visible-lg-block { + display: block !important; + } +} +@media (min-width: 1200px) { + .visible-lg-inline { + display: inline !important; + } +} +@media (min-width: 1200px) { + .visible-lg-inline-block { + display: inline-block !important; + } +} +@media (max-width: 767px) { + .hidden-xs { + display: none !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .hidden-sm { + display: none !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .hidden-md { + display: none !important; + } +} +@media (min-width: 1200px) { + .hidden-lg { + display: none !important; + } +} +.visible-print { + display: none !important; +} +@media print { + .visible-print { + display: block !important; + } + table.visible-print { + display: table !important; + } + tr.visible-print { + display: table-row !important; + } + th.visible-print, + td.visible-print { + display: table-cell !important; + } +} +.visible-print-block { + display: none !important; +} +@media print { + .visible-print-block { + display: block !important; + } +} +.visible-print-inline { + display: none !important; +} +@media print { + .visible-print-inline { + display: inline !important; + } +} +.visible-print-inline-block { + display: none !important; +} +@media print { + .visible-print-inline-block { + display: inline-block !important; + } +} +@media print { + .hidden-print { + display: none !important; + } +} +.navbar-default, +.navbar-inverse { + border: 1px solid rgba(0, 0, 0, 0.6); + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); +} +@media (min-width: 768px) { + .navbar-default .navbar-nav > li > a, + .navbar-inverse .navbar-nav > li > a { + border-right: 1px solid rgba(0, 0, 0, 0.2); + border-left: 1px solid rgba(255, 255, 255, 0.1); + } + .navbar-default .navbar-nav > li > a:hover, + .navbar-inverse .navbar-nav > li > a:hover { + border-left-color: transparent; + } + .navbar-default .nav .open > a, + .navbar-inverse .nav .open > a { + border-color: transparent; + } + .navbar-default .navbar-nav > li.active > a, + .navbar-inverse .navbar-nav > li.active > a { + border-left-color: transparent; + } + .navbar-default .navbar-form, + .navbar-inverse .navbar-form { + margin-left: 5px; + margin-right: 5px; + } +} +.navbar-default { + -webkit-filter: none; + filter: none; +} +.navbar-default .navbar-nav > li > a:hover { + -webkit-filter: none; + filter: none; +} +.navbar-inverse { + -webkit-filter: none; + filter: none; +} +.navbar-inverse .badge { + background-color: #5d6368; +} +.navbar-inverse .navbar-nav > li > a:hover { + -webkit-filter: none; + filter: none; +} +.btn, +.btn:hover { + border-color: rgba(0, 0, 0, 0.6); + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); +} +.btn-default { + -webkit-filter: none; + filter: none; +} +.btn-default:hover { + -webkit-filter: none; + filter: none; +} +.btn-primary { + -webkit-filter: none; + filter: none; +} +.btn-primary:hover { + -webkit-filter: none; + filter: none; +} +.btn-success { + -webkit-filter: none; + filter: none; +} +.btn-success:hover { + -webkit-filter: none; + filter: none; +} +.btn-info { + -webkit-filter: none; + filter: none; +} +.btn-info:hover { + -webkit-filter: none; + filter: none; +} +.btn-warning { + -webkit-filter: none; + filter: none; +} +.btn-warning:hover { + -webkit-filter: none; + filter: none; +} +.btn-danger { + -webkit-filter: none; + filter: none; +} +.btn-danger:hover { + -webkit-filter: none; + filter: none; +} +.btn-link, +.btn-link:hover { + border-color: transparent; +} +h1, +h2, +h3, +h4, +h5, +h6 { + text-shadow: -1px -1px 0 rgba(0, 0, 0, 0.3); +} +.text-primary, +.text-primary:hover { + color: #7a8288; +} +.text-success, +.text-success:hover { + color: #62c462; +} +.text-danger, +.text-danger:hover { + color: #ee5f5b; +} +.text-warning, +.text-warning:hover { + color: #f89406; +} +.text-info, +.text-info:hover { + color: #5bc0de; +} +.table .success, +.table .warning, +.table .danger, +.table .info { + color: #fff; +} +.table-bordered tbody tr.success td, +.table-bordered tbody tr.warning td, +.table-bordered tbody tr.danger td, +.table-bordered tbody tr.success:hover td, +.table-bordered tbody tr.warning:hover td, +.table-bordered tbody tr.danger:hover td { + border-color: #1c1e22; +} +.table-responsive > .table { + background-color: #2e3338; +} +input, +textarea { + color: #272b30; +} +.has-warning .help-block, +.has-warning .control-label, +.has-warning .radio, +.has-warning .checkbox, +.has-warning .radio-inline, +.has-warning .checkbox-inline, +.has-warning.radio label, +.has-warning.checkbox label, +.has-warning.radio-inline label, +.has-warning.checkbox-inline label, +.has-warning .form-control-feedback { + color: #f89406; +} +.has-warning .form-control, +.has-warning .form-control:focus { + border-color: #f89406; +} +.has-warning .input-group-addon { + border-color: rgba(0, 0, 0, 0.6); +} +.has-error .help-block, +.has-error .control-label, +.has-error .radio, +.has-error .checkbox, +.has-error .radio-inline, +.has-error .checkbox-inline, +.has-error.radio label, +.has-error.checkbox label, +.has-error.radio-inline label, +.has-error.checkbox-inline label, +.has-error .form-control-feedback { + color: #ee5f5b; +} +.has-error .form-control, +.has-error .form-control:focus { + border-color: #ee5f5b; +} +.has-error .input-group-addon { + border-color: rgba(0, 0, 0, 0.6); +} +.has-success .help-block, +.has-success .control-label, +.has-success .radio, +.has-success .checkbox, +.has-success .radio-inline, +.has-success .checkbox-inline, +.has-success.radio label, +.has-success.checkbox label, +.has-success.radio-inline label, +.has-success.checkbox-inline label, +.has-success .form-control-feedback { + color: #62c462; +} +.has-success .form-control, +.has-success .form-control:focus { + border-color: #62c462; +} +.has-success .input-group-addon { + border-color: rgba(0, 0, 0, 0.6); +} +legend { + color: #fff; +} +.input-group-addon { + -webkit-filter: none; + filter: none; + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); + color: #ffffff; +} +.nav .open > a, +.nav .open > a:hover, +.nav .open > a:focus { + border-color: rgba(0, 0, 0, 0.6); +} +.nav-pills > li > a { + -webkit-filter: none; + filter: none; + border: 1px solid rgba(0, 0, 0, 0.6); + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); +} +.nav-pills > li > a:hover { + -webkit-filter: none; + filter: none; + border: 1px solid rgba(0, 0, 0, 0.6); +} +.nav-pills > li.active > a, +.nav-pills > li.active > a:hover { + background-color: none; + -webkit-filter: none; + filter: none; + border: 1px solid rgba(0, 0, 0, 0.6); +} +.nav-pills > li.disabled > a, +.nav-pills > li.disabled > a:hover { + -webkit-filter: none; + filter: none; +} +.pagination > li > a, +.pagination > li > span { + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); + -webkit-filter: none; + filter: none; +} +.pagination > li > a:hover, +.pagination > li > span:hover { + -webkit-filter: none; + filter: none; +} +.pagination > li.active > a, +.pagination > li.active > span { + -webkit-filter: none; + filter: none; +} +.pagination > li.disabled > a, +.pagination > li.disabled > a:hover, +.pagination > li.disabled > span, +.pagination > li.disabled > span:hover { + -webkit-filter: none; + filter: none; +} +.pager > li > a { + -webkit-filter: none; + filter: none; + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); +} +.pager > li > a:hover { + -webkit-filter: none; + filter: none; +} +.pager > li.disabled > a, +.pager > li.disabled > a:hover { + -webkit-filter: none; + filter: none; +} +.breadcrumb { + border: 1px solid rgba(0, 0, 0, 0.6); + text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.3); + -webkit-filter: none; + filter: none; +} +.alert .alert-link, +.alert a { + color: #fff; + text-decoration: underline; +} +.alert .close { + color: #000000; + text-decoration: none; +} +a.thumbnail:hover, +a.thumbnail:focus, +a.thumbnail.active { + border-color: #0c0d0e; +} +a.list-group-item.active, +a.list-group-item.active:hover, +a.list-group-item.active:focus { + border-color: rgba(0, 0, 0, 0.6); +} +a.list-group-item-success.active { + background-color: #62c462; +} +a.list-group-item-success.active:hover, +a.list-group-item-success.active:focus { + background-color: #4fbd4f; +} +a.list-group-item-warning.active { + background-color: #f89406; +} +a.list-group-item-warning.active:hover, +a.list-group-item-warning.active:focus { + background-color: #df8505; +} +a.list-group-item-danger.active { + background-color: #ee5f5b; +} +a.list-group-item-danger.active:hover, +a.list-group-item-danger.active:focus { + background-color: #ec4844; +} +.jumbotron { + border: 1px solid rgba(0, 0, 0, 0.6); +} +.panel-primary .panel-heading, +.panel-success .panel-heading, +.panel-danger .panel-heading, +.panel-warning .panel-heading, +.panel-info .panel-heading { + border-color: #000; +} diff --git a/web/gui/dashboard/css/bootstrap-slider-10.0.0.min.css b/web/gui/dashboard/css/bootstrap-slider-10.0.0.min.css new file mode 100644 index 0000000..095be95 --- /dev/null +++ b/web/gui/dashboard/css/bootstrap-slider-10.0.0.min.css @@ -0,0 +1,22 @@ +/*! ======================================================= + VERSION 10.0.0 +========================================================= */ +/*! ========================================================= + * bootstrap-slider.js + * + * Maintainers: + * Kyle Kemp + * - Twitter: @seiyria + * - Github: seiyria + * Rohit Kalkur + * - Twitter: @Rovolutionary + * - Github: rovolution + * + * ========================================================= + * + * bootstrap-slider is released under the MIT License + * Copyright (c) 2017 Kyle Kemp, Rohit Kalkur, and contributors + * + * SPDX-License-Identifier: MIT + * + * ========================================================= */.slider{display:inline-block;vertical-align:middle;position:relative}.slider.slider-horizontal{width:210px;height:20px}.slider.slider-horizontal .slider-track{height:10px;width:100%;margin-top:-5px;top:50%;left:0}.slider.slider-horizontal .slider-selection,.slider.slider-horizontal .slider-track-low,.slider.slider-horizontal .slider-track-high{height:100%;top:0;bottom:0}.slider.slider-horizontal .slider-tick,.slider.slider-horizontal .slider-handle{margin-left:-10px}.slider.slider-horizontal .slider-tick.triangle,.slider.slider-horizontal .slider-handle.triangle{position:relative;top:50%;-ms-transform:translateY(-50%);transform:translateY(-50%);border-width:0 10px 10px 10px;width:0;height:0;border-bottom-color:#2e6da4;margin-top:0}.slider.slider-horizontal .slider-tick-container{white-space:nowrap;position:absolute;top:0;left:0;width:100%}.slider.slider-horizontal .slider-tick-label-container{white-space:nowrap;margin-top:20px}.slider.slider-horizontal .slider-tick-label-container .slider-tick-label{padding-top:4px;display:inline-block;text-align:center}.slider.slider-horizontal .tooltip{-ms-transform:translateX(-50%);transform:translateX(-50%)}.slider.slider-horizontal.slider-rtl .slider-track{left:initial;right:0}.slider.slider-horizontal.slider-rtl .slider-tick,.slider.slider-horizontal.slider-rtl .slider-handle{margin-left:initial;margin-right:-10px}.slider.slider-horizontal.slider-rtl .slider-tick-container{left:initial;right:0}.slider.slider-horizontal.slider-rtl .tooltip{-ms-transform:translateX(50%);transform:translateX(50%)}.slider.slider-vertical{height:210px;width:20px}.slider.slider-vertical .slider-track{width:10px;height:100%;left:25%;top:0}.slider.slider-vertical .slider-selection{width:100%;left:0;top:0;bottom:0}.slider.slider-vertical .slider-track-low,.slider.slider-vertical .slider-track-high{width:100%;left:0;right:0}.slider.slider-vertical .slider-tick,.slider.slider-vertical .slider-handle{margin-top:-10px}.slider.slider-vertical .slider-tick.triangle,.slider.slider-vertical .slider-handle.triangle{border-width:10px 0 10px 10px;width:1px;height:1px;border-left-color:#2e6da4;border-right-color:#2e6da4;margin-left:0;margin-right:0}.slider.slider-vertical .slider-tick-label-container{white-space:nowrap}.slider.slider-vertical .slider-tick-label-container .slider-tick-label{padding-left:4px}.slider.slider-vertical .tooltip{-ms-transform:translateY(-50%);transform:translateY(-50%)}.slider.slider-vertical.slider-rtl .slider-track{left:initial;right:25%}.slider.slider-vertical.slider-rtl .slider-selection{left:initial;right:0}.slider.slider-vertical.slider-rtl .slider-tick.triangle,.slider.slider-vertical.slider-rtl .slider-handle.triangle{border-width:10px 10px 10px 0}.slider.slider-vertical.slider-rtl .slider-tick-label-container .slider-tick-label{padding-left:initial;padding-right:4px}.slider.slider-disabled .slider-handle{background-image:-webkit-linear-gradient(top,#dfdfdf 0,#bebebe 100%);background-image:-o-linear-gradient(top,#dfdfdf 0,#bebebe 100%);background-image:linear-gradient(to bottom,#dfdfdf 0,#bebebe 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdfdfdf',endColorstr='#ffbebebe',GradientType=0)}.slider.slider-disabled .slider-track{background-image:-webkit-linear-gradient(top,#e5e5e5 0,#e9e9e9 100%);background-image:-o-linear-gradient(top,#e5e5e5 0,#e9e9e9 100%);background-image:linear-gradient(to bottom,#e5e5e5 0,#e9e9e9 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffe5e5e5',endColorstr='#ffe9e9e9',GradientType=0);cursor:not-allowed}.slider input{display:none}.slider .tooltip.top{margin-top:-36px}.slider .tooltip-inner{white-space:nowrap;max-width:none}.slider .hide{display:none}.slider-track{position:absolute;cursor:pointer;background-image:-webkit-linear-gradient(top,#f5f5f5 0,#f9f9f9 100%);background-image:-o-linear-gradient(top,#f5f5f5 0,#f9f9f9 100%);background-image:linear-gradient(to bottom,#f5f5f5 0,#f9f9f9 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);border-radius:4px}.slider-selection{position:absolute;background-image:-webkit-linear-gradient(top,#f9f9f9 0,#f5f5f5 100%);background-image:-o-linear-gradient(top,#f9f9f9 0,#f5f5f5 100%);background-image:linear-gradient(to bottom,#f9f9f9 0,#f5f5f5 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff9f9f9',endColorstr='#fff5f5f5',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;border-radius:4px}.slider-selection.tick-slider-selection{background-image:-webkit-linear-gradient(top,#8ac1ef 0,#82b3de 100%);background-image:-o-linear-gradient(top,#8ac1ef 0,#82b3de 100%);background-image:linear-gradient(to bottom,#8ac1ef 0,#82b3de 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff8ac1ef',endColorstr='#ff82b3de',GradientType=0)}.slider-track-low,.slider-track-high{position:absolute;background:transparent;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;border-radius:4px}.slider-handle{position:absolute;top:0;width:20px;height:20px;background-color:#337ab7;background-image:-webkit-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-o-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:linear-gradient(to bottom,#337ab7 0,#2e6da4 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7',endColorstr='#ff2e6da4',GradientType=0);filter:none;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.2),0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 1px 0 rgba(255,255,255,.2),0 1px 2px rgba(0,0,0,.05);border:0 solid transparent}.slider-handle.round{border-radius:50%}.slider-handle.triangle{background:transparent none}.slider-handle.custom{background:transparent none}.slider-handle.custom::before{line-height:20px;font-size:20px;content:'\2605';color:#726204}.slider-tick{position:absolute;width:20px;height:20px;background-image:-webkit-linear-gradient(top,#f9f9f9 0,#f5f5f5 100%);background-image:-o-linear-gradient(top,#f9f9f9 0,#f5f5f5 100%);background-image:linear-gradient(to bottom,#f9f9f9 0,#f5f5f5 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff9f9f9',endColorstr='#fff5f5f5',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;filter:none;opacity:.8;border:0 solid transparent}.slider-tick.round{border-radius:50%}.slider-tick.triangle{background:transparent none}.slider-tick.custom{background:transparent none}.slider-tick.custom::before{line-height:20px;font-size:20px;content:'\2605';color:#726204}.slider-tick.in-selection{background-image:-webkit-linear-gradient(top,#8ac1ef 0,#82b3de 100%);background-image:-o-linear-gradient(top,#8ac1ef 0,#82b3de 100%);background-image:linear-gradient(to bottom,#8ac1ef 0,#82b3de 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff8ac1ef',endColorstr='#ff82b3de',GradientType=0);opacity:1} diff --git a/web/gui/dashboard/css/bootstrap-theme-3.3.7.min.css b/web/gui/dashboard/css/bootstrap-theme-3.3.7.min.css new file mode 100644 index 0000000..ba77cff --- /dev/null +++ b/web/gui/dashboard/css/bootstrap-theme-3.3.7.min.css @@ -0,0 +1,7 @@ +/*! + * Bootstrap v3.3.7 (http://getbootstrap.com) + * Copyright 2011-2016 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) + * SPDX-License-Identifier: MIT + */.btn-danger,.btn-default,.btn-info,.btn-primary,.btn-success,.btn-warning{text-shadow:0 -1px 0 rgba(0,0,0,.2);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.15),0 1px 1px rgba(0,0,0,.075);box-shadow:inset 0 1px 0 rgba(255,255,255,.15),0 1px 1px rgba(0,0,0,.075)}.btn-danger.active,.btn-danger:active,.btn-default.active,.btn-default:active,.btn-info.active,.btn-info:active,.btn-primary.active,.btn-primary:active,.btn-success.active,.btn-success:active,.btn-warning.active,.btn-warning:active{-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,.125);box-shadow:inset 0 3px 5px rgba(0,0,0,.125)}.btn-danger.disabled,.btn-danger[disabled],.btn-default.disabled,.btn-default[disabled],.btn-info.disabled,.btn-info[disabled],.btn-primary.disabled,.btn-primary[disabled],.btn-success.disabled,.btn-success[disabled],.btn-warning.disabled,.btn-warning[disabled],fieldset[disabled] .btn-danger,fieldset[disabled] .btn-default,fieldset[disabled] .btn-info,fieldset[disabled] .btn-primary,fieldset[disabled] .btn-success,fieldset[disabled] .btn-warning{-webkit-box-shadow:none;box-shadow:none}.btn-danger .badge,.btn-default .badge,.btn-info .badge,.btn-primary .badge,.btn-success .badge,.btn-warning .badge{text-shadow:none}.btn.active,.btn:active{background-image:none}.btn-default{text-shadow:0 1px 0 #fff;background-image:-webkit-linear-gradient(top,#fff 0,#e0e0e0 100%);background-image:-o-linear-gradient(top,#fff 0,#e0e0e0 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#fff),to(#e0e0e0));background-image:linear-gradient(to bottom,#fff 0,#e0e0e0 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#ffe0e0e0', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-color:#dbdbdb;border-color:#ccc}.btn-default:focus,.btn-default:hover{background-color:#e0e0e0;background-position:0 -15px}.btn-default.active,.btn-default:active{background-color:#e0e0e0;border-color:#dbdbdb}.btn-default.disabled,.btn-default.disabled.active,.btn-default.disabled.focus,.btn-default.disabled:active,.btn-default.disabled:focus,.btn-default.disabled:hover,.btn-default[disabled],.btn-default[disabled].active,.btn-default[disabled].focus,.btn-default[disabled]:active,.btn-default[disabled]:focus,.btn-default[disabled]:hover,fieldset[disabled] .btn-default,fieldset[disabled] .btn-default.active,fieldset[disabled] .btn-default.focus,fieldset[disabled] .btn-default:active,fieldset[disabled] .btn-default:focus,fieldset[disabled] .btn-default:hover{background-color:#e0e0e0;background-image:none}.btn-primary{background-image:-webkit-linear-gradient(top,#337ab7 0,#265a88 100%);background-image:-o-linear-gradient(top,#337ab7 0,#265a88 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#337ab7),to(#265a88));background-image:linear-gradient(to bottom,#337ab7 0,#265a88 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff265a88', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-color:#245580}.btn-primary:focus,.btn-primary:hover{background-color:#265a88;background-position:0 -15px}.btn-primary.active,.btn-primary:active{background-color:#265a88;border-color:#245580}.btn-primary.disabled,.btn-primary.disabled.active,.btn-primary.disabled.focus,.btn-primary.disabled:active,.btn-primary.disabled:focus,.btn-primary.disabled:hover,.btn-primary[disabled],.btn-primary[disabled].active,.btn-primary[disabled].focus,.btn-primary[disabled]:active,.btn-primary[disabled]:focus,.btn-primary[disabled]:hover,fieldset[disabled] .btn-primary,fieldset[disabled] .btn-primary.active,fieldset[disabled] .btn-primary.focus,fieldset[disabled] .btn-primary:active,fieldset[disabled] .btn-primary:focus,fieldset[disabled] .btn-primary:hover{background-color:#265a88;background-image:none}.btn-success{background-image:-webkit-linear-gradient(top,#5cb85c 0,#419641 100%);background-image:-o-linear-gradient(top,#5cb85c 0,#419641 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#5cb85c),to(#419641));background-image:linear-gradient(to bottom,#5cb85c 0,#419641 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5cb85c', endColorstr='#ff419641', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-color:#3e8f3e}.btn-success:focus,.btn-success:hover{background-color:#419641;background-position:0 -15px}.btn-success.active,.btn-success:active{background-color:#419641;border-color:#3e8f3e}.btn-success.disabled,.btn-success.disabled.active,.btn-success.disabled.focus,.btn-success.disabled:active,.btn-success.disabled:focus,.btn-success.disabled:hover,.btn-success[disabled],.btn-success[disabled].active,.btn-success[disabled].focus,.btn-success[disabled]:active,.btn-success[disabled]:focus,.btn-success[disabled]:hover,fieldset[disabled] .btn-success,fieldset[disabled] .btn-success.active,fieldset[disabled] .btn-success.focus,fieldset[disabled] .btn-success:active,fieldset[disabled] .btn-success:focus,fieldset[disabled] .btn-success:hover{background-color:#419641;background-image:none}.btn-info{background-image:-webkit-linear-gradient(top,#5bc0de 0,#2aabd2 100%);background-image:-o-linear-gradient(top,#5bc0de 0,#2aabd2 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#5bc0de),to(#2aabd2));background-image:linear-gradient(to bottom,#5bc0de 0,#2aabd2 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff2aabd2', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-color:#28a4c9}.btn-info:focus,.btn-info:hover{background-color:#2aabd2;background-position:0 -15px}.btn-info.active,.btn-info:active{background-color:#2aabd2;border-color:#28a4c9}.btn-info.disabled,.btn-info.disabled.active,.btn-info.disabled.focus,.btn-info.disabled:active,.btn-info.disabled:focus,.btn-info.disabled:hover,.btn-info[disabled],.btn-info[disabled].active,.btn-info[disabled].focus,.btn-info[disabled]:active,.btn-info[disabled]:focus,.btn-info[disabled]:hover,fieldset[disabled] .btn-info,fieldset[disabled] .btn-info.active,fieldset[disabled] .btn-info.focus,fieldset[disabled] .btn-info:active,fieldset[disabled] .btn-info:focus,fieldset[disabled] .btn-info:hover{background-color:#2aabd2;background-image:none}.btn-warning{background-image:-webkit-linear-gradient(top,#f0ad4e 0,#eb9316 100%);background-image:-o-linear-gradient(top,#f0ad4e 0,#eb9316 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#f0ad4e),to(#eb9316));background-image:linear-gradient(to bottom,#f0ad4e 0,#eb9316 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff0ad4e', endColorstr='#ffeb9316', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-color:#e38d13}.btn-warning:focus,.btn-warning:hover{background-color:#eb9316;background-position:0 -15px}.btn-warning.active,.btn-warning:active{background-color:#eb9316;border-color:#e38d13}.btn-warning.disabled,.btn-warning.disabled.active,.btn-warning.disabled.focus,.btn-warning.disabled:active,.btn-warning.disabled:focus,.btn-warning.disabled:hover,.btn-warning[disabled],.btn-warning[disabled].active,.btn-warning[disabled].focus,.btn-warning[disabled]:active,.btn-warning[disabled]:focus,.btn-warning[disabled]:hover,fieldset[disabled] .btn-warning,fieldset[disabled] .btn-warning.active,fieldset[disabled] .btn-warning.focus,fieldset[disabled] .btn-warning:active,fieldset[disabled] .btn-warning:focus,fieldset[disabled] .btn-warning:hover{background-color:#eb9316;background-image:none}.btn-danger{background-image:-webkit-linear-gradient(top,#d9534f 0,#c12e2a 100%);background-image:-o-linear-gradient(top,#d9534f 0,#c12e2a 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#d9534f),to(#c12e2a));background-image:linear-gradient(to bottom,#d9534f 0,#c12e2a 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9534f', endColorstr='#ffc12e2a', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-color:#b92c28}.btn-danger:focus,.btn-danger:hover{background-color:#c12e2a;background-position:0 -15px}.btn-danger.active,.btn-danger:active{background-color:#c12e2a;border-color:#b92c28}.btn-danger.disabled,.btn-danger.disabled.active,.btn-danger.disabled.focus,.btn-danger.disabled:active,.btn-danger.disabled:focus,.btn-danger.disabled:hover,.btn-danger[disabled],.btn-danger[disabled].active,.btn-danger[disabled].focus,.btn-danger[disabled]:active,.btn-danger[disabled]:focus,.btn-danger[disabled]:hover,fieldset[disabled] .btn-danger,fieldset[disabled] .btn-danger.active,fieldset[disabled] .btn-danger.focus,fieldset[disabled] .btn-danger:active,fieldset[disabled] .btn-danger:focus,fieldset[disabled] .btn-danger:hover{background-color:#c12e2a;background-image:none}.img-thumbnail,.thumbnail{-webkit-box-shadow:0 1px 2px rgba(0,0,0,.075);box-shadow:0 1px 2px rgba(0,0,0,.075)}.dropdown-menu>li>a:focus,.dropdown-menu>li>a:hover{background-color:#e8e8e8;background-image:-webkit-linear-gradient(top,#f5f5f5 0,#e8e8e8 100%);background-image:-o-linear-gradient(top,#f5f5f5 0,#e8e8e8 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#f5f5f5),to(#e8e8e8));background-image:linear-gradient(to bottom,#f5f5f5 0,#e8e8e8 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5', endColorstr='#ffe8e8e8', GradientType=0);background-repeat:repeat-x}.dropdown-menu>.active>a,.dropdown-menu>.active>a:focus,.dropdown-menu>.active>a:hover{background-color:#2e6da4;background-image:-webkit-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-o-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#337ab7),to(#2e6da4));background-image:linear-gradient(to bottom,#337ab7 0,#2e6da4 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2e6da4', GradientType=0);background-repeat:repeat-x}.navbar-default{background-image:-webkit-linear-gradient(top,#fff 0,#f8f8f8 100%);background-image:-o-linear-gradient(top,#fff 0,#f8f8f8 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#fff),to(#f8f8f8));background-image:linear-gradient(to bottom,#fff 0,#f8f8f8 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#fff8f8f8', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-radius:4px;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.15),0 1px 5px rgba(0,0,0,.075);box-shadow:inset 0 1px 0 rgba(255,255,255,.15),0 1px 5px rgba(0,0,0,.075)}.navbar-default .navbar-nav>.active>a,.navbar-default .navbar-nav>.open>a{background-image:-webkit-linear-gradient(top,#dbdbdb 0,#e2e2e2 100%);background-image:-o-linear-gradient(top,#dbdbdb 0,#e2e2e2 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#dbdbdb),to(#e2e2e2));background-image:linear-gradient(to bottom,#dbdbdb 0,#e2e2e2 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdbdbdb', endColorstr='#ffe2e2e2', GradientType=0);background-repeat:repeat-x;-webkit-box-shadow:inset 0 3px 9px rgba(0,0,0,.075);box-shadow:inset 0 3px 9px rgba(0,0,0,.075)}.navbar-brand,.navbar-nav>li>a{text-shadow:0 1px 0 rgba(255,255,255,.25)}.navbar-inverse{background-image:-webkit-linear-gradient(top,#3c3c3c 0,#222 100%);background-image:-o-linear-gradient(top,#3c3c3c 0,#222 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#3c3c3c),to(#222));background-image:linear-gradient(to bottom,#3c3c3c 0,#222 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff3c3c3c', endColorstr='#ff222222', GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);background-repeat:repeat-x;border-radius:4px}.navbar-inverse .navbar-nav>.active>a,.navbar-inverse .navbar-nav>.open>a{background-image:-webkit-linear-gradient(top,#080808 0,#0f0f0f 100%);background-image:-o-linear-gradient(top,#080808 0,#0f0f0f 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#080808),to(#0f0f0f));background-image:linear-gradient(to bottom,#080808 0,#0f0f0f 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff080808', endColorstr='#ff0f0f0f', GradientType=0);background-repeat:repeat-x;-webkit-box-shadow:inset 0 3px 9px rgba(0,0,0,.25);box-shadow:inset 0 3px 9px rgba(0,0,0,.25)}.navbar-inverse .navbar-brand,.navbar-inverse .navbar-nav>li>a{text-shadow:0 -1px 0 rgba(0,0,0,.25)}.navbar-fixed-bottom,.navbar-fixed-top,.navbar-static-top{border-radius:0}@media (max-width:767px){.navbar .navbar-nav .open .dropdown-menu>.active>a,.navbar .navbar-nav .open .dropdown-menu>.active>a:focus,.navbar .navbar-nav .open .dropdown-menu>.active>a:hover{color:#fff;background-image:-webkit-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-o-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#337ab7),to(#2e6da4));background-image:linear-gradient(to bottom,#337ab7 0,#2e6da4 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2e6da4', GradientType=0);background-repeat:repeat-x}}.alert{text-shadow:0 1px 0 rgba(255,255,255,.2);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.25),0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 1px 0 rgba(255,255,255,.25),0 1px 2px rgba(0,0,0,.05)}.alert-success{background-image:-webkit-linear-gradient(top,#dff0d8 0,#c8e5bc 100%);background-image:-o-linear-gradient(top,#dff0d8 0,#c8e5bc 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#dff0d8),to(#c8e5bc));background-image:linear-gradient(to bottom,#dff0d8 0,#c8e5bc 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdff0d8', endColorstr='#ffc8e5bc', GradientType=0);background-repeat:repeat-x;border-color:#b2dba1}.alert-info{background-image:-webkit-linear-gradient(top,#d9edf7 0,#b9def0 100%);background-image:-o-linear-gradient(top,#d9edf7 0,#b9def0 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#d9edf7),to(#b9def0));background-image:linear-gradient(to bottom,#d9edf7 0,#b9def0 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9edf7', endColorstr='#ffb9def0', GradientType=0);background-repeat:repeat-x;border-color:#9acfea}.alert-warning{background-image:-webkit-linear-gradient(top,#fcf8e3 0,#f8efc0 100%);background-image:-o-linear-gradient(top,#fcf8e3 0,#f8efc0 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#fcf8e3),to(#f8efc0));background-image:linear-gradient(to bottom,#fcf8e3 0,#f8efc0 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffcf8e3', endColorstr='#fff8efc0', GradientType=0);background-repeat:repeat-x;border-color:#f5e79e}.alert-danger{background-image:-webkit-linear-gradient(top,#f2dede 0,#e7c3c3 100%);background-image:-o-linear-gradient(top,#f2dede 0,#e7c3c3 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#f2dede),to(#e7c3c3));background-image:linear-gradient(to bottom,#f2dede 0,#e7c3c3 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2dede', endColorstr='#ffe7c3c3', GradientType=0);background-repeat:repeat-x;border-color:#dca7a7}.progress{background-image:-webkit-linear-gradient(top,#ebebeb 0,#f5f5f5 100%);background-image:-o-linear-gradient(top,#ebebeb 0,#f5f5f5 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#ebebeb),to(#f5f5f5));background-image:linear-gradient(to bottom,#ebebeb 0,#f5f5f5 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffebebeb', endColorstr='#fff5f5f5', GradientType=0);background-repeat:repeat-x}.progress-bar{background-image:-webkit-linear-gradient(top,#337ab7 0,#286090 100%);background-image:-o-linear-gradient(top,#337ab7 0,#286090 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#337ab7),to(#286090));background-image:linear-gradient(to bottom,#337ab7 0,#286090 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff286090', GradientType=0);background-repeat:repeat-x}.progress-bar-success{background-image:-webkit-linear-gradient(top,#5cb85c 0,#449d44 100%);background-image:-o-linear-gradient(top,#5cb85c 0,#449d44 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#5cb85c),to(#449d44));background-image:linear-gradient(to bottom,#5cb85c 0,#449d44 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5cb85c', endColorstr='#ff449d44', GradientType=0);background-repeat:repeat-x}.progress-bar-info{background-image:-webkit-linear-gradient(top,#5bc0de 0,#31b0d5 100%);background-image:-o-linear-gradient(top,#5bc0de 0,#31b0d5 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#5bc0de),to(#31b0d5));background-image:linear-gradient(to bottom,#5bc0de 0,#31b0d5 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff31b0d5', GradientType=0);background-repeat:repeat-x}.progress-bar-warning{background-image:-webkit-linear-gradient(top,#f0ad4e 0,#ec971f 100%);background-image:-o-linear-gradient(top,#f0ad4e 0,#ec971f 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#f0ad4e),to(#ec971f));background-image:linear-gradient(to bottom,#f0ad4e 0,#ec971f 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff0ad4e', endColorstr='#ffec971f', GradientType=0);background-repeat:repeat-x}.progress-bar-danger{background-image:-webkit-linear-gradient(top,#d9534f 0,#c9302c 100%);background-image:-o-linear-gradient(top,#d9534f 0,#c9302c 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#d9534f),to(#c9302c));background-image:linear-gradient(to bottom,#d9534f 0,#c9302c 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9534f', endColorstr='#ffc9302c', GradientType=0);background-repeat:repeat-x}.progress-bar-striped{background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,.15) 50%,rgba(255,255,255,.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,.15) 50%,rgba(255,255,255,.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,.15) 50%,rgba(255,255,255,.15) 75%,transparent 75%,transparent)}.list-group{border-radius:4px;-webkit-box-shadow:0 1px 2px rgba(0,0,0,.075);box-shadow:0 1px 2px rgba(0,0,0,.075)}.list-group-item.active,.list-group-item.active:focus,.list-group-item.active:hover{text-shadow:0 -1px 0 #286090;background-image:-webkit-linear-gradient(top,#337ab7 0,#2b669a 100%);background-image:-o-linear-gradient(top,#337ab7 0,#2b669a 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#337ab7),to(#2b669a));background-image:linear-gradient(to bottom,#337ab7 0,#2b669a 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2b669a', GradientType=0);background-repeat:repeat-x;border-color:#2b669a}.list-group-item.active .badge,.list-group-item.active:focus .badge,.list-group-item.active:hover .badge{text-shadow:none}.panel{-webkit-box-shadow:0 1px 2px rgba(0,0,0,.05);box-shadow:0 1px 2px rgba(0,0,0,.05)}.panel-default>.panel-heading{background-image:-webkit-linear-gradient(top,#f5f5f5 0,#e8e8e8 100%);background-image:-o-linear-gradient(top,#f5f5f5 0,#e8e8e8 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#f5f5f5),to(#e8e8e8));background-image:linear-gradient(to bottom,#f5f5f5 0,#e8e8e8 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5', endColorstr='#ffe8e8e8', GradientType=0);background-repeat:repeat-x}.panel-primary>.panel-heading{background-image:-webkit-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-o-linear-gradient(top,#337ab7 0,#2e6da4 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#337ab7),to(#2e6da4));background-image:linear-gradient(to bottom,#337ab7 0,#2e6da4 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2e6da4', GradientType=0);background-repeat:repeat-x}.panel-success>.panel-heading{background-image:-webkit-linear-gradient(top,#dff0d8 0,#d0e9c6 100%);background-image:-o-linear-gradient(top,#dff0d8 0,#d0e9c6 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#dff0d8),to(#d0e9c6));background-image:linear-gradient(to bottom,#dff0d8 0,#d0e9c6 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdff0d8', endColorstr='#ffd0e9c6', GradientType=0);background-repeat:repeat-x}.panel-info>.panel-heading{background-image:-webkit-linear-gradient(top,#d9edf7 0,#c4e3f3 100%);background-image:-o-linear-gradient(top,#d9edf7 0,#c4e3f3 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#d9edf7),to(#c4e3f3));background-image:linear-gradient(to bottom,#d9edf7 0,#c4e3f3 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9edf7', endColorstr='#ffc4e3f3', GradientType=0);background-repeat:repeat-x}.panel-warning>.panel-heading{background-image:-webkit-linear-gradient(top,#fcf8e3 0,#faf2cc 100%);background-image:-o-linear-gradient(top,#fcf8e3 0,#faf2cc 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#fcf8e3),to(#faf2cc));background-image:linear-gradient(to bottom,#fcf8e3 0,#faf2cc 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffcf8e3', endColorstr='#fffaf2cc', GradientType=0);background-repeat:repeat-x}.panel-danger>.panel-heading{background-image:-webkit-linear-gradient(top,#f2dede 0,#ebcccc 100%);background-image:-o-linear-gradient(top,#f2dede 0,#ebcccc 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#f2dede),to(#ebcccc));background-image:linear-gradient(to bottom,#f2dede 0,#ebcccc 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2dede', endColorstr='#ffebcccc', GradientType=0);background-repeat:repeat-x}.well{background-image:-webkit-linear-gradient(top,#e8e8e8 0,#f5f5f5 100%);background-image:-o-linear-gradient(top,#e8e8e8 0,#f5f5f5 100%);background-image:-webkit-gradient(linear,left top,left bottom,from(#e8e8e8),to(#f5f5f5));background-image:linear-gradient(to bottom,#e8e8e8 0,#f5f5f5 100%);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffe8e8e8', endColorstr='#fff5f5f5', GradientType=0);background-repeat:repeat-x;border-color:#dcdcdc;-webkit-box-shadow:inset 0 1px 3px rgba(0,0,0,.05),0 1px 0 rgba(255,255,255,.1);box-shadow:inset 0 1px 3px rgba(0,0,0,.05),0 1px 0 rgba(255,255,255,.1)} +/*# sourceMappingURL=bootstrap-theme.min.css.map */ diff --git a/web/gui/dashboard/css/bootstrap-toggle-2.2.2.min.css b/web/gui/dashboard/css/bootstrap-toggle-2.2.2.min.css new file mode 100644 index 0000000..a3daa37 --- /dev/null +++ b/web/gui/dashboard/css/bootstrap-toggle-2.2.2.min.css @@ -0,0 +1,29 @@ +/*! ======================================================================== + * Bootstrap Toggle: bootstrap-toggle.css v2.2.0 + * http://www.bootstraptoggle.com + * ======================================================================== + * Copyright 2014 Min Hur, The New York Times Company + * Licensed under MIT + * SPDX-License-Identifier: MIT + * ======================================================================== */ +.checkbox label .toggle,.checkbox-inline .toggle{margin-left:-20px;margin-right:5px} +.toggle{position:relative;overflow:hidden} +.toggle input[type=checkbox]{display:none} +.toggle-group{position:absolute;width:200%;top:0;bottom:0;left:0;transition:left .35s;-webkit-transition:left .35s;-moz-user-select:none;-webkit-user-select:none} +.toggle.off .toggle-group{left:-100%} +.toggle-on{position:absolute;top:0;bottom:0;left:0;right:50%;margin:0;border:0;border-radius:0} +.toggle-off{position:absolute;top:0;bottom:0;left:50%;right:0;margin:0;border:0;border-radius:0} +.toggle-handle{position:relative;margin:0 auto;padding-top:0;padding-bottom:0;height:100%;width:0;border-width:0 1px} +.toggle.btn{min-width:59px;min-height:34px} +.toggle-on.btn{padding-right:24px} +.toggle-off.btn{padding-left:24px} +.toggle.btn-lg{min-width:79px;min-height:45px} +.toggle-on.btn-lg{padding-right:31px} +.toggle-off.btn-lg{padding-left:31px} +.toggle-handle.btn-lg{width:40px} +.toggle.btn-sm{min-width:50px;min-height:30px} +.toggle-on.btn-sm{padding-right:20px} +.toggle-off.btn-sm{padding-left:20px} +.toggle.btn-xs{min-width:35px;min-height:22px} +.toggle-on.btn-xs{padding-right:12px} +.toggle-off.btn-xs{padding-left:12px} diff --git a/web/gui/dashboard/css/dashboard.css b/web/gui/dashboard/css/dashboard.css new file mode 100644 index 0000000..2d95a33 --- /dev/null +++ b/web/gui/dashboard/css/dashboard.css @@ -0,0 +1,785 @@ +/* SPDX-License-Identfier: GPL-3.0-or-later */ +:root { + --color-main: #35414A; + --color-primary: #00ab44; + --color-border: #93a3b0; +} + +html, +body { + /*font-family: Calibri,"Segoe UI","Helvetica Neue",Helvetica,Arial,sans-serif;*/ + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-variant: normal; +} + +.morelink { + color: var(--color-attention, #765d9c); + text-decoration: none; +} + +.morelink:hover { + color: var(--color-attentionSecondary, #563d7c); + text-decoration: none; +} + +.morelink:focus { + color: var(--color-attention, #765d9c); + text-decoration: none; +} + +.netdata-chart-alignment { + /* 55 for legend-right */ + margin-left: 35px; +} + +.netdata-chart-row { + width: 100%; + text-align: center; + display: flex; + display: -webkit-flex; + display: -moz-flex; + align-items: baseline; + -moz-align-items: baseline; + -webkit-align-items: baseline; + justify-content: center; + -webkit-justify-content: center; + -moz-justify-content: center; + padding-top: 10px; +} + +.netdata-container { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge:after { + padding-top: 60%; + display: block; + content: ''; +} + +.netdata-container-easypiechart { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-easypiechart:after { + padding-top: 100%; + display: block; + content: ''; +} + +.netdata-aspect { + position: relative; + width: 100%; + padding: 0px; + margin: 0px; +} + +.netdata-container-with-legend { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* fix minimum scrollbar issue in firefox */ + min-height: 99px; + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-with-legend.netdata-container-with-legend--bottom { + display: flex; + flex-direction: column; +} + +.netdata-legend-resize-handler { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 20px; + background-color: var(--color-mainBackground, #fff); + font-size: 15px; + vertical-align: middle; + line-height: 15px; + cursor: ns-resize; + color: var(--color-selected, #CDCDCD); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; +} + +.netdata-legend-toolbox { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 110px; + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: var(--color-placeholder, #DDDDDD); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-legend-toolbox-button { + display: inline-block; + position: relative; + height: 15px; + width: 18px; + background-color: var(--color-mainBackground, #fff); + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: var(--color-selected, #CDCDCD); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + cursor: pointer; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-message { + display: inline-block; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + text-align: left; + vertical-align: top; + font-weight: bold; + font-size: x-small; + overflow: hidden; + background: inherit; + z-index: 0; +} + +.netdata-message.hidden { + display: none; +} + +.netdata-message.icon { + color: var(--color-borderSecondary, #F8F8F8); + text-align: center; + vertical-align: middle; +} + +.netdata-chart-legend { + position: absolute; /* within .netdata-container */ + top: 26px; + bottom: 18px; + right: 0; + overflow-x: hidden; + overflow-y: auto; + text-overflow: ellipsis; + line-height: 14px; + display: block; + width: 140px; /* --legend-width */ + font-size: 10px; + margin-top: 5px; + text-align: left; + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-legend-title-date { + font-size: 10px; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-time { + font-size: 11px; + font-weight: bold; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-units { + position: absolute; + right: 10px; + float: right; + font-size: 11px; + vertical-align: top; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-series { + position: absolute; + width: 140px; /* legend-width */ + height: calc(100% - 50px); + overflow: hidden; + text-overflow: ellipsis; + line-height: 14.5px; /* line spacing at the legend */ + display: block; + font-size: 10px; + margin-top: 0px; +} + +.netdata-legend-name-table-line { + display: inline-block; + width: 13px; + height: 4px; + border-width: 0px; + border-bottom-width: 2px; + border-bottom-style: solid; + border-bottom-color: white; +} + +.netdata-legend-name-table-area { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-table-stacked { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-tr { +} + +.netdata-legend-name-td { +} + +.netdata-legend-name { + text-align: left; + font-size: 11px; /* legend: dimension name size */ + font-weight: bold; + vertical-align: bottom; + margin-top: 0px; + z-index: 9; + padding: 0px; + width: 80px !important; + max-width: 80px !important; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + display: inline-block; + cursor: pointer; + -webkit-print-color-adjust: exact; +} + +.netdata-legend-value { + /*margin-left: 14px;*/ + position: absolute; + right: 10px; + float: right; + text-align: right; + font-size: 11px; /* legend: dimension value size */ + font-weight: bold; + vertical-align: bottom; + background-color: var(--color-mainBackground, #fff); + margin-top: 0px; + z-index: 10; + padding: 0px; + padding-left: 15px; + cursor: pointer; + /* -webkit-font-smoothing: none; */ +} + +.netdata-legend-value, .netdata-legend-name { + /* prevent highlight when shift-clicking */ + -webkit-user-select: none; + -ms-user-select: none; + -moz-user-select: none; + user-select: none; +} + +/* eslint recommends adding tabIndex for a11y, but the outline is showing on click */ +.netdata-legend-name:focus, .netdata-legend-value:focus { + outline: none; +} + +.netdata-legend-name.not-selected { + font-weight: normal; + opacity: 0.3; +} + +.netdata-chart { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: inline-block; + overflow: hidden; + width: 100%; + height: 100%; + z-index: 5; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-right { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: block; + overflow: hidden; + margin-right: 140px; /* --legend-width */ + width: calc(100% - 140px); /* --legend-width */ + height: 100%; + z-index: 5; + flex-grow: 1; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-bottom { + display: block; + overflow: hidden; + flex-grow: 1; +} + +.netdata-peity-chart { + +} + +.netdata-sparkline-chart { + +} + +.netdata-dygraph-chart { + +} + +.netdata-morris-chart { + +} + +.netdata-google-chart { + +} + +.dygraph-ylabel { +} + +.dygraph-axis-label-x { + overflow-x: hidden; +} + +.dygraph-label-rotate-left { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(90deg); + -webkit-transform: rotate(90deg); + -moz-transform: rotate(90deg); + -o-transform: rotate(90deg); + -ms-transform: rotate(90deg); +} + +/* For y2-axis label */ +.dygraph-label-rotate-right { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(-90deg); + -webkit-transform: rotate(-90deg); + -moz-transform: rotate(-90deg); + -o-transform: rotate(-90deg); + -ms-transform: rotate(-90deg); +} + +.dygraph-title { + /* 56 for legend-right */ + text-indent: 36px; + text-align: left; + position: absolute; + left: 0px; + top: 4px; + font-size: 11px; + font-weight: bold; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; +} + +/* fix for sparkline tooltip under bootstrap */ +.jqstooltip { + width: auto !important; + height: auto !important; +} + +.easyPieChart { + position: relative; + text-align: center; +} + +.easyPieChart canvas { + position: absolute; + top: 0; + left: 0; +} + +.easyPieChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: normal; + text-shadow: var(--color-elementBackground, #BBB) 0px 0px 1px; + /* -webkit-font-smoothing: none; */ +} + +.easyPieChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 64%; + margin-left: 18% !important; + text-align: center; + color: var(--color-border, #999999); + font-weight: bold; +} + +.easyPieChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 60%; + margin-left: 20% !important; + text-align: center; + color: var(--color-border, #999999); + font-weight: normal; +} + +.gaugeChart { + position: relative; + text-align: center; +} + +.gaugeChart canvas { + position: absolute; + top: 0; + left: 0; + bottom: 0; + right: 0; + z-index: 0; +} + +.gaugeChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: bold; + z-index: 1; + text-shadow: var(--color-elementBackground, #777) 0px 0px 1px; + /* text-shadow: #CCC 1px 1px 0px, #CCC -1px -1px 0px, #CCC 1px -1px 0px, #CCC -1px 1px 0px; */ + /* -webkit-text-stroke: 1px #777; */ + /* -webkit-font-smoothing: none; */ +} + +.gaugeChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-border, #999999); + font-weight: bold; +} + +.gaugeChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 0; + width: 100%; + text-align: left; + margin-left: 5%; + color: var(--color-border, #999999); + font-weight: normal; +} + +.gaugeChartMin { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 92%; + margin-left: 8%; + text-align: left; + color: var(--color-main); + font-weight: normal; +} + +.gaugeChartMax { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 95%; + margin-right: 5%; + text-align: right; + color: var(--color-main); + font-weight: normal; +} + +.popover-title { + font-weight: bold; + font-size: 12px; +} + +.popover-content { + font-size: 11px; +} + +/* ---------------------------------------------------------------------------- + perfect-scrollbar settings + */ + +.ps-container { + -ms-touch-action: auto; + touch-action: auto; + overflow: hidden !important; + -ms-overflow-style: none; +} + +@supports (-ms-overflow-style: none) { + .ps-container { + overflow: auto !important; + } +} + +@media screen and (-ms-high-contrast: active), (-ms-high-contrast: none) { + .ps-container { + overflow: auto !important; + } +} + +.ps-container.ps-active-x > .ps-scrollbar-x-rail, +.ps-container.ps-active-y > .ps-scrollbar-y-rail { + display: block; + background-color: transparent; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #aaa; /* scrollbar color when dragged away */ + height: 5px; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #aaa; /* scrollbar color when dragged away */ + width: 5px; +} + +.ps-container > .ps-scrollbar-x-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + bottom: 0px; + /* there must be 'bottom' for ps-scrollbar-x-rail */ + height: 15px; +} + +.ps-container > .ps-scrollbar-x-rail > .ps-scrollbar-x { + position: absolute; + /* please don't change 'position' */ + background-color: var(--color-elementBackground, #666); /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + bottom: 2px; + /* there must be 'bottom' for ps-scrollbar-x */ + height: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x, .ps-container > .ps-scrollbar-x-rail:active > .ps-scrollbar-x { + height: 4px; +} + +.ps-container > .ps-scrollbar-y-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + right: 0; + /* there must be 'right' for ps-scrollbar-y-rail */ + width: 15px; +} + +.ps-container > .ps-scrollbar-y-rail > .ps-scrollbar-y { + position: absolute; + /* please don't change 'position' */ + background-color: var(--color-elementBackground, #666); /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + right: 2px; + /* there must be 'right' for ps-scrollbar-y */ + width: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y, .ps-container > .ps-scrollbar-y-rail:active > .ps-scrollbar-y { + width: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #bbb; /* scrollbar color when dragged */ + height: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #bbb; /* scrollbar color when dragged */ + width: 5px; +} + +.ps-container:hover > .ps-scrollbar-x-rail, +.ps-container:hover > .ps-scrollbar-y-rail { + opacity: 0.6; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x { + background-color: #999; /* scrollbar color on hover */ +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y { + background-color: #999; /* scrollbar color on hover */ +} + +.dygraph__history-tip { + position: absolute; + transform: translateY(-50%); + display: none; /* overridden in js */ + margin-right: 25px; + direction: rtl; + overflow: hidden; + pointer-events: none; +} + +.dygraph__history-tip-content { + display: inline-block; + white-space: nowrap; + direction: ltr; + pointer-events: auto; +} diff --git a/web/gui/dashboard/css/dashboard.slate.css b/web/gui/dashboard/css/dashboard.slate.css new file mode 100644 index 0000000..c4e5c65 --- /dev/null +++ b/web/gui/dashboard/css/dashboard.slate.css @@ -0,0 +1,803 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +:root { + --color-main: #FFFFFF; + --color-primary: #00ab44; + --color-border: #878b90; +} + +html, +body { + /*font-family: Calibri,"Segoe UI","Helvetica Neue",Helvetica,Arial,sans-serif;*/ + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-variant: normal; + color: #878b90; +} + +/* fixes for default slate theme */ +code { + color: #bbb; /*#c7254e;*/ + background-color: #555; /* #f9f2f4; */ +} + +.dashboard-sidebar .nav > .active > a, +.dashboard-sidebar .nav > .active:hover > a, +.dashboard-sidebar .nav > .active:focus > a { + color: #765d9c; + border-left: 2px solid #765d9c; +} + +.morelink { + color: #765d9c; + text-decoration: none; +} + +.morelink:hover { + color: #563d7c; + text-decoration: none; +} + +.morelink:focus { + color: #765d9c; + text-decoration: none; +} + +.netdata-chart-alignment { + /* 55 for legend-right */ + margin-left: 35px; +} + +.netdata-chart-row { + width: 100%; + text-align: center; + display: flex; + display: -webkit-flex; + display: -moz-flex; + align-items: flex-end; + -moz-align-items: flex-end; + -webkit-align-items: flex-end; + justify-content: center; + -moz--webkit-justify-content: center; + -moz-justify-content: center; + padding-top: 10px; +} + +.netdata-container { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge:after { + padding-top: 60%; + display: block; + content: ''; +} + +.netdata-container-easypiechart { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-easypiechart:after { + padding-top: 100%; + display: block; + content: ''; +} + +.netdata-aspect { + position: relative; + width: 100%; + padding: 0px; + margin: 0px; +} + +.netdata-container-with-legend { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* fix minimum scrollbar issue in firefox */ + min-height: 99px; + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-with-legend.netdata-container-with-legend--bottom { + display: flex; + flex-direction: column; +} + +.netdata-legend-resize-handler { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 20px; + background-color: #272b30; + font-size: 15px; + vertical-align: middle; + line-height: 15px; + cursor: ns-resize; + color: var(--color-selected, #373b40); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; +} + +.netdata-legend-toolbox { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 110px; + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: #373b40; + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-legend-toolbox-button { + display: inline-block; + position: relative; + height: 15px; + width: 18px; + background-color: #272b30; + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: var(--color-selected, #373b40); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + cursor: pointer; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-message { + display: inline-block; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + text-align: left; + vertical-align: top; + font-weight: bold; + font-size: x-small; + overflow: hidden; + background: inherit; + z-index: 0; +} + +.netdata-message.hidden { + display: none; +} + +.netdata-message.icon { + color: #2f3338; + text-align: center; + vertical-align: middle; +} + +.netdata-chart-legend { + position: absolute; /* within .netdata-container */ + top: 0; + right: 0; + overflow: hidden; + text-overflow: ellipsis; + line-height: 14px; + display: block; + width: 140px; /* --legend-width */ + height: calc(100% - 15px); /* 10px for the resize handler and 5px for the top margin */ + font-size: 10px; + margin-top: 5px; + text-align: left; + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-legend-title-date { + font-size: 10px; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-time { + font-size: 11px; + font-weight: bold; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-units { + position: absolute; + right: 10px; + float: right; + font-size: 11px; + vertical-align: top; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-series { + position: absolute; + width: 140px; /* legend-width */ + height: calc(100% - 50px); + overflow: hidden; + text-overflow: ellipsis; + line-height: 14.5px; /* line spacing at the legend */ + display: block; + font-size: 10px; + margin-top: 0px; +} + +.netdata-legend-name-table-line { + display: inline-block; + width: 13px; + height: 4px; + border-width: 0px; + border-bottom-width: 2px; + border-bottom-style: solid; + border-bottom-color: #272b30; +} + +.netdata-legend-name-table-area { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-table-stacked { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-tr { +} + +.netdata-legend-name-td { +} + +.netdata-legend-name { + text-align: left; + font-size: 11px; /* legend: dimension name size */ + font-weight: bold; + vertical-align: bottom; + margin-top: 0px; + z-index: 9; + padding: 0px; + width: 80px !important; + max-width: 80px !important; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + display: inline-block; + cursor: pointer; + -webkit-print-color-adjust: exact; +} + +.netdata-legend-value { + /*margin-left: 14px;*/ + position: absolute; + right: 10px; + float: right; + text-align: right; + font-size: 11px; /* legend: dimension value size */ + font-weight: bold; + vertical-align: bottom; + background-color: #272b30; + margin-top: 0px; + z-index: 10; + padding: 0px; + padding-left: 15px; + cursor: pointer; + /* -webkit-font-smoothing: none; */ +} + +.netdata-legend-value, .netdata-legend-name { + /* prevent highlight when shift-clicking */ + -webkit-user-select: none; + -ms-user-select: none; + -moz-user-select: none; + user-select: none; +} + +/* eslint recommends adding tabIndex for a11y, but the outline is showing on click */ +.netdata-legend-name:focus, .netdata-legend-value:focus { + outline: none; +} + +.netdata-legend-name.not-selected { + font-weight: normal; + opacity: 0.3; +} + +.netdata-chart { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: inline-block; + overflow: hidden; + width: 100%; + height: 100%; + z-index: 5; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-right { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: block; + overflow: hidden; + margin-right: 140px; /* --legend-width */ + width: calc(100% - 140px); /* --legend-width */ + height: 100%; + z-index: 5; + flex-grow: 1; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-bottom { + display: block; + overflow: hidden; + flex-grow: 1; +} + +.netdata-peity-chart { + +} + +.netdata-sparkline-chart { + +} + +.netdata-dygraph-chart { + +} + +.netdata-morris-chart { + +} + +.netdata-google-chart { + +} + +.dygraph-ylabel { +} + +.dygraph-axis-label-x { + overflow-x: hidden; +} + +.dygraph-axis-label { + color: #6c7075; +} + +.dygraph-label-rotate-left { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(90deg); + -webkit-transform: rotate(90deg); + -moz-transform: rotate(90deg); + -o-transform: rotate(90deg); + -ms-transform: rotate(90deg); +} + +/* For y2-axis label */ +.dygraph-label-rotate-right { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(-90deg); + -webkit-transform: rotate(-90deg); + -moz-transform: rotate(-90deg); + -o-transform: rotate(-90deg); + -ms-transform: rotate(-90deg); +} + +.dygraph-title { + /* 56 for legend-right */ + text-indent: 36px; + text-align: left; + position: absolute; + left: 0px; + top: 4px; + font-size: 11px; + font-weight: bold; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; +} + +/* fix for sparkline tooltip under bootstrap */ +.jqstooltip { + width: auto !important; + height: auto !important; +} + +.easyPieChart { + position: relative; + text-align: center; +} + +.easyPieChart canvas { + position: absolute; + top: 0; + left: 0; +} + +.easyPieChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: normal; + text-shadow: #272b30 0px 0px 1px; + /* -webkit-font-smoothing: none; */ +} + +.easyPieChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 64%; + margin-left: 18% !important; + text-align: center; + color: var(--color-border,#676b70); + font-weight: bold; +} + +.easyPieChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 60%; + margin-left: 20% !important; + text-align: center; + color:var(--color-border,#676b70); + font-weight: normal; +} + +.gaugeChart { + position: relative; + text-align: center; +} + +.gaugeChart canvas { + position: absolute; + top: 0; + left: 0; + bottom: 0; + right: 0; + z-index: 0; +} + +.gaugeChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: bold; + z-index: 1; + text-shadow: #272b30 0px 0px 1px; + /* text-shadow: #CCC 1px 1px 0px, #CCC -1px -1px 0px, #CCC 1px -1px 0px, #CCC -1px 1px 0px; */ + /* -webkit-text-stroke: 1px #777; */ + /* -webkit-font-smoothing: none; */ +} + +.gaugeChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-border, #676b70); + font-weight: bold; +} + +.gaugeChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 0; + width: 100%; + text-align: left; + margin-left: 5%; + color: var(--color-border,#676b70); + font-weight: normal; +} + +.gaugeChartMin { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 92%; + margin-left: 8%; + text-align: left; + color: var(--color-main); + font-weight: normal; +} + +.gaugeChartMax { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 95%; + margin-right: 5%; + text-align: right; + color: var(--color-main); + font-weight: normal; +} + +.popover-title { + font-weight: bold; + font-size: 12px; +} + +.popover-content { + font-size: 11px; +} + +/* ---------------------------------------------------------------------------- + perfect-scrollbar settings + */ + +.ps-container { + -ms-touch-action: auto; + touch-action: auto; + overflow: hidden !important; + -ms-overflow-style: none; +} + +@supports (-ms-overflow-style: none) { + .ps-container { + overflow: auto !important; + } +} + +@media screen and (-ms-high-contrast: active), (-ms-high-contrast: none) { + .ps-container { + overflow: auto !important; + } +} + +.ps-container.ps-active-x > .ps-scrollbar-x-rail, +.ps-container.ps-active-y > .ps-scrollbar-y-rail { + display: block; + background-color: transparent; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #aaa; /* scrollbar color when dragged away */ + height: 5px; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #aaa; /* scrollbar color when dragged away */ + width: 5px; +} + +.ps-container > .ps-scrollbar-x-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + bottom: 0px; + /* there must be 'bottom' for ps-scrollbar-x-rail */ + height: 15px; +} + +.ps-container > .ps-scrollbar-x-rail > .ps-scrollbar-x { + position: absolute; + /* please don't change 'position' */ + background-color: #666; /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + bottom: 2px; + /* there must be 'bottom' for ps-scrollbar-x */ + height: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x, .ps-container > .ps-scrollbar-x-rail:active > .ps-scrollbar-x { + height: 5px; +} + +.ps-container > .ps-scrollbar-y-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + right: 0; + /* there must be 'right' for ps-scrollbar-y-rail */ + width: 15px; +} + +.ps-container > .ps-scrollbar-y-rail > .ps-scrollbar-y { + position: absolute; + /* please don't change 'position' */ + background-color: #666; /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + right: 2px; + /* there must be 'right' for ps-scrollbar-y */ + width: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y, .ps-container > .ps-scrollbar-y-rail:active > .ps-scrollbar-y { + width: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #bbb; /* scrollbar color when dragged */ + height: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #bbb; /* scrollbar color when dragged */ + width: 5px; +} + +.ps-container:hover > .ps-scrollbar-x-rail, +.ps-container:hover > .ps-scrollbar-y-rail { + opacity: 0.6; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x { + background-color: #999; /* scrollbar color on hover */ +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y { + background-color: #999; /* scrollbar color on hover */ +} + +.dygraph__history-tip { + position: absolute; + top: 50%; + transform: translateY(-50%); + display: none; /* overridden in js */ + margin-right: 25px; + direction: rtl; + overflow: hidden; + pointer-events: none; +} + +.dygraph__history-tip-content { + display: inline-block; + white-space: nowrap; + direction: ltr; + pointer-events: auto; +} diff --git a/web/gui/dashboard/dash-example.html b/web/gui/dashboard/dash-example.html new file mode 100644 index 0000000..b9bc803 --- /dev/null +++ b/web/gui/dashboard/dash-example.html @@ -0,0 +1,1020 @@ + + + + + + + + + + + + +
    +
    +
    host
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + + + + + + diff --git a/web/gui/dashboard/dashboard-react.js b/web/gui/dashboard/dashboard-react.js new file mode 100644 index 0000000..99f868f --- /dev/null +++ b/web/gui/dashboard/dashboard-react.js @@ -0,0 +1,675 @@ +/* eslint-disable */ + +/** + * after react-dashboard refractor, this file can be renamed to 'dashboard.js' + * and it will: + * - setup global objects, so any assignments like 'NETDATA.options.current.destroy_on_hide = true' + * will not break. we need to add it in places where 'dashboard.js' is + * - create react root DOM node + * - load react app + * + * Later, for performance improvement, the bundle can be added to dashboard-rect.js, + * but we need to run the react-app part after DOM is created and ready + */ + +// ---------------------------------------------------------------------------- +// global namespace + +// Should stay var! +var NETDATA = window.NETDATA || {}; +window.NETDATA = NETDATA // when imported as npm module + +/// A heuristic for detecting slow devices. +let isSlowDeviceResult; +const isSlowDevice = function () { + if (!isSlowDeviceResult) { + return isSlowDeviceResult; + } + + try { + let ua = navigator.userAgent.toLowerCase(); + + let iOS = /ipad|iphone|ipod/.test(ua) && !window.MSStream; + let android = /android/.test(ua) && !window.MSStream; + isSlowDeviceResult = (iOS || android); + } catch (e) { + isSlowDeviceResult = false; + } + + return isSlowDeviceResult; +}; + +if (typeof window.netdataSnapshotData === 'undefined') { + window.netdataSnapshotData = null; +} + +if (typeof window.netdataShowHelp === 'undefined') { + window.netdataShowHelp = true; +} + +if (typeof window.netdataShowAlarms === 'undefined') { + window.netdataShowAlarms = false; +} + +if (typeof window.netdataRegistryAfterMs !== 'number' || window.netdataRegistryAfterMs < 0) { + window.netdataRegistryAfterMs = 0; // 1500; +} + +if (typeof window.netdataRegistry === 'undefined') { + // backward compatibility + window.netdataRegistry = (typeof netdataNoRegistry !== 'undefined' && netdataNoRegistry === false); +} + +if (window.netdataRegistry === false && typeof netdataRegistryCallback === 'function') { + window.netdataRegistry = true; +} + +// ---------------------------------------------------------------------------------------------------------------- +// the defaults for all charts + +// if the user does not specify any of these, the following will be used + +NETDATA.chartDefaults = { + width: '100%', // the chart width - can be null + height: '100%', // the chart height - can be null + min_width: null, // the chart minimum width - can be null + library: 'dygraph', // the graphing library to use + method: 'average', // the grouping method + before: 0, // panning + after: -600, // panning + pixels_per_point: 1, // the detail of the chart + fill_luminance: 0.8 // luminance of colors in solid areas +}; + + + +// ---------------------------------------------------------------------------------------------------------------- +// global options + +NETDATA.options = { + pauseCallback: null, // a callback when we are really paused + + pause: false, // when enabled we don't auto-refresh the charts + + targets: [], // an array of all the state objects that are + // currently active (independently of their + // viewport visibility) + + updated_dom: true, // when true, the DOM has been updated with + // new elements we have to check. + + auto_refresher_fast_weight: 0, // this is the current time in ms, spent + // rendering charts continuously. + // used with .current.fast_render_timeframe + + page_is_visible: true, // when true, this page is visible + + auto_refresher_stop_until: 0, // timestamp in ms - used internally, to stop the + // auto-refresher for some time (when a chart is + // performing pan or zoom, we need to stop refreshing + // all other charts, to have the maximum speed for + // rendering the chart that is panned or zoomed). + // Used with .current.global_pan_sync_time + + on_scroll_refresher_stop_until: 0, // timestamp in ms - used to stop evaluating + // charts for some time, after a page scroll + + last_page_resize: Date.now(), // the timestamp of the last resize request + + last_page_scroll: 0, // the timestamp the last time the page was scrolled + + browser_timezone: (Intl && Intl.DateTimeFormat) + ? Intl.DateTimeFormat().resolvedOptions().timeZone // timezone detected by javascript + : "cannot-detect-it", + + server_timezone: 'unknown', // timezone reported by the server + + force_data_points: 0, // force the number of points to be returned for charts + fake_chart_rendering: false, // when set to true, the dashboard will download data but will not render the charts + + passive_events: null, // true if the browser supports passive events + + // the current profile + // we may have many... + current: { + units: 'auto', // can be 'auto' or 'original' + temperature: 'celsius', // can be 'celsius' or 'fahrenheit' + seconds_as_time: true, // show seconds as DDd:HH:MM:SS ? + timezone: 'default', // the timezone to use, or 'default' + user_set_server_timezone: 'default', // as set by the user on the dashboard + + legend_toolbox: true, // show the legend toolbox on charts + resize_charts: true, // show the resize handler on charts + + pixels_per_point: isSlowDevice() ? 5 : 1, // the minimum pixels per point for all charts + // increase this to speed javascript up + // each chart library has its own limit too + // the max of this and the chart library is used + // the final is calculated every time, so a change + // here will have immediate effect on the next chart + // update + + idle_between_charts: 100, // ms - how much time to wait between chart updates + + fast_render_timeframe: 200, // ms - render continuously until this time of continuous + // rendering has been reached + // this setting is used to make it render e.g. 10 + // charts at once, sleep idle_between_charts time + // and continue for another 10 charts. + + idle_between_loops: 500, // ms - if all charts have been updated, wait this + // time before starting again. + + idle_parallel_loops: 100, // ms - the time between parallel refresher updates + + idle_lost_focus: 500, // ms - when the window does not have focus, check + // if focus has been regained, every this time + + global_pan_sync_time: 300, // ms - when you pan or zoom a chart, the background + // auto-refreshing of charts is paused for this amount + // of time + + sync_selection_delay: 400, // ms - when you pan or zoom a chart, wait this amount + // of time before setting up synchronized selections + // on hover. + + sync_selection: true, // enable or disable selection sync + + pan_and_zoom_delay: 50, // when panning or zooming, how ofter to update the chart + + sync_pan_and_zoom: true, // enable or disable pan and zoom sync + + pan_and_zoom_data_padding: true, // fetch more data for the master chart when panning or zooming + + update_only_visible: true, // enable or disable visibility management / used for printing + + parallel_refresher: !isSlowDevice(), // enable parallel refresh of charts + + concurrent_refreshes: true, // when parallel_refresher is enabled, sync also the charts + + destroy_on_hide: isSlowDevice(), // destroy charts when they are not visible + + // when enabled the charts will show some help + // when there's no bootstrap, we can't show it + show_help: netdataShowHelp && !window.netdataNoBootstrap, + show_help_delay_show_ms: 500, + show_help_delay_hide_ms: 0, + + eliminate_zero_dimensions: true, // do not show dimensions with just zeros + + stop_updates_when_focus_is_lost: true, // boolean - shall we stop auto-refreshes when document does not have user focus + stop_updates_while_resizing: 1000, // ms - time to stop auto-refreshes while resizing the charts + + double_click_speed: 500, // ms - time between clicks / taps to detect double click/tap + + smooth_plot: !isSlowDevice(), // enable smooth plot, where possible + + color_fill_opacity_line: 1.0, + color_fill_opacity_area: 0.2, + color_fill_opacity_fake_stacked: 1, + color_fill_opacity_stacked: 0.8, + + pan_and_zoom_factor: 0.25, // the increment when panning and zooming with the toolbox + pan_and_zoom_factor_multiplier_control: 2.0, + pan_and_zoom_factor_multiplier_shift: 3.0, + pan_and_zoom_factor_multiplier_alt: 4.0, + + abort_ajax_on_scroll: false, // kill pending ajax page scroll + async_on_scroll: false, // sync/async onscroll handler + onscroll_worker_duration_threshold: 30, // time in ms, for async scroll handler + + retries_on_data_failures: 3, // how many retries to make if we can't fetch chart data from the server + + setOptionCallback: function () { + } + }, + + debug: { + show_boxes: false, + main_loop: false, + focus: false, + visibility: false, + chart_data_url: false, + chart_errors: true, // remember to set it to false before merging + chart_timing: false, + chart_calls: false, + libraries: false, + dygraph: false, + globalSelectionSync: false, + globalPanAndZoom: false + } +}; + + +NETDATA.statistics = { + refreshes_total: 0, + refreshes_active: 0, + refreshes_active_max: 0 +}; + +NETDATA.themes = { + white: { + bootstrap_css: "css/bootstrap-3.3.7.css", + dashboard_css: "css/dashboard.css?v20180210-1", + background: "#FFFFFF", + foreground: "#000000", + grid: "#F0F0F0", + axis: "#F0F0F0", + highlight: "#F5F5F5", + colors: ["#3366CC", "#DC3912", "#109618", "#FF9900", "#990099", "#DD4477", + "#3B3EAC", "#66AA00", "#0099C6", "#B82E2E", "#AAAA11", "#5574A6", + "#994499", "#22AA99", "#6633CC", "#E67300", "#316395", "#8B0707", + "#329262", "#3B3EAC"], + easypiechart_track: "#f0f0f0", + easypiechart_scale: "#dfe0e0", + gauge_pointer: "#C0C0C0", + gauge_stroke: "#F0F0F0", + gauge_gradient: false, + gauge_stop_color: "#FC8D5E", + gauge_start_color: "#B0E952", + d3pie: { + title: "#333333", + subtitle: "#666666", + footer: "#888888", + other: "#aaaaaa", + mainlabel: "#333333", + percentage: "#dddddd", + value: "#aaaa22", + tooltip_bg: "#000000", + tooltip_fg: "#efefef", + segment_stroke: "#ffffff", + gradient_color: "#000000", + }, + }, + slate: { + bootstrap_css: "css/bootstrap-slate-flat-3.3.7.css?v20161229-1", + dashboard_css: "css/dashboard.slate.css?v20180210-1", + background: "#272b30", + foreground: "#C8C8C8", + grid: "#283236", + axis: "#283236", + highlight: "#383838", + colors: ["#66AA00", "#FE3912", "#3366CC", "#D66300", "#0099C6", "#DDDD00", + "#5054e6", "#EE9911", "#BB44CC", "#e45757", "#ef0aef", "#CC7700", + "#22AA99", "#109618", "#905bfd", "#f54882", "#4381bf", "#ff3737", + "#329262", "#3B3EFF"], + easypiechart_track: "#373b40", + easypiechart_scale: "#373b40", + gauge_pointer: "#474b50", + gauge_stroke: "#373b40", + gauge_gradient: false, + gauge_stop_color: "#FC8D5E", + gauge_start_color: "#B0E952", + d3pie: { + title: "#C8C8C8", + subtitle: "#283236", + footer: "#283236", + other: "#283236", + mainlabel: "#C8C8C8", + percentage: "#dddddd", + value: "#cccc44", + tooltip_bg: "#272b30", + tooltip_fg: "#C8C8C8", + segment_stroke: "#283236", + gradient_color: "#000000", + }, + }, +} + +// Codacy declarations +/* global netdataTheme */ + +NETDATA.updateTheme = function () { + if (typeof window.netdataTheme !== 'undefined' + && typeof NETDATA.themes[netdataTheme] !== 'undefined' + ) { + NETDATA.themes.current = NETDATA.themes[window.netdataTheme]; + } else { + NETDATA.themes.current = NETDATA.themes.white; + } + + NETDATA.colors = NETDATA.themes.current.colors; +} + +NETDATA.updateTheme() + +// these are the colors Google Charts are using +// we have them here to attempt emulate their look and feel on the other chart libraries +// http://there4.io/2012/05/02/google-chart-color-list/ +//NETDATA.colors = [ '#3366CC', '#DC3912', '#FF9900', '#109618', '#990099', '#3B3EAC', '#0099C6', +// '#DD4477', '#66AA00', '#B82E2E', '#316395', '#994499', '#22AA99', '#AAAA11', +// '#6633CC', '#E67300', '#8B0707', '#329262', '#5574A6', '#3B3EAC' ]; + +// an alternative set +// http://www.mulinblog.com/a-color-palette-optimized-for-data-visualization/ +// (blue) (red) (orange) (green) (pink) (brown) (purple) (yellow) (gray) +//NETDATA.colors = [ '#5DA5DA', '#F15854', '#FAA43A', '#60BD68', '#F17CB0', '#B2912F', '#B276B2', '#DECF3F', '#4D4D4D' ]; +// dygraph + +// local storage options + +NETDATA.localStorage = { + default: {}, + current: {}, + callback: {} // only used for resetting back to defaults +}; + + +// todo temporary stuff which was originally in dashboard.js +// but needs to be refractored +NETDATA.name2id = function (s) { + return s + .replace(/ /g, '_') + .replace(/:/g, '_') + .replace(/\(/g, '_') + .replace(/\)/g, '_') + .replace(/\./g, '_') + .replace(/\//g, '_'); +}; + +NETDATA.globalChartUnderlay = { + clear: () => {}, + init: () => {}, +} + +NETDATA.globalPanAndZoom = { + callback: () => {}, +} +NETDATA.unpause = () => {} + + +// ---------------------------------------------------------------------------------------------------------------- +// XSS checks + +NETDATA.xss = { + enabled: (typeof netdataCheckXSS === 'undefined') ? false : netdataCheckXSS, + enabled_for_data: (typeof netdataCheckXSS === 'undefined') ? false : netdataCheckXSS, + + string: function (s) { + return s.toString() + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + }, + + object: function (name, obj, ignore_regex) { + if (typeof ignore_regex !== 'undefined' && ignore_regex.test(name)) { + // console.log('XSS: ignoring "' + name + '"'); + return obj; + } + + switch (typeof(obj)) { + case 'string': + const ret = this.string(obj); + if (ret !== obj) { + console.log('XSS protection changed string ' + name + ' from "' + obj + '" to "' + ret + '"'); + } + return ret; + + case 'object': + if (obj === null) { + return obj; + } + + if (Array.isArray(obj)) { + // console.log('checking array "' + name + '"'); + + let len = obj.length; + while (len--) { + obj[len] = this.object(name + '[' + len + ']', obj[len], ignore_regex); + } + } else { + // console.log('checking object "' + name + '"'); + + for (var i in obj) { + if (obj.hasOwnProperty(i) === false) { + continue; + } + if (this.string(i) !== i) { + console.log('XSS protection removed invalid object member "' + name + '.' + i + '"'); + delete obj[i]; + } else { + obj[i] = this.object(name + '.' + i, obj[i], ignore_regex); + } + } + } + return obj; + + default: + return obj; + } + }, + + checkOptional: function (name, obj, ignore_regex) { + if (this.enabled) { + //console.log('XSS: checking optional "' + name + '"...'); + return this.object(name, obj, ignore_regex); + } + return obj; + }, + + checkAlways: function (name, obj, ignore_regex) { + //console.log('XSS: checking always "' + name + '"...'); + return this.object(name, obj, ignore_regex); + }, + + checkData: function (name, obj, ignore_regex) { + if (this.enabled_for_data) { + //console.log('XSS: checking data "' + name + '"...'); + return this.object(name, obj, ignore_regex); + } + return obj; + } +}; + + + +const fixHost = (host) => { + while (host.slice(-1) === '/') { + host = host.substring(0, host.length - 1); + } + + return host; +} + +NETDATA.chartRegistry = { + charts: {}, + + globalReset: function () { + this.charts = {}; + }, + + add: function (host, id, data) { + if (typeof this.charts[host] === 'undefined') { + this.charts[host] = {}; + } + + //console.log('added ' + host + '/' + id); + this.charts[host][id] = data; + }, + + get: function (host, id) { + if (typeof this.charts[host] === 'undefined') { + return null; + } + + if (typeof this.charts[host][id] === 'undefined') { + return null; + } + + //console.log('cached ' + host + '/' + id); + return this.charts[host][id]; + }, + + downloadAll: function (host, callback) { + host = fixHost(host); + + let self = this; + + function got_data(h, data, callback) { + if (data !== null) { + self.charts[h] = data.charts; + window.charts = data.charts + + // update the server timezone in our options + if (typeof data.timezone === 'string') { + NETDATA.options.server_timezone = data.timezone; + } + } else { + NETDATA.error(406, h + '/api/v1/charts'); + } + + if (typeof callback === 'function') { + callback(data); + } + } + + if (window.netdataSnapshotData !== null) { + got_data(host, window.netdataSnapshotData.charts, callback); + } else { + $.ajax({ + url: host + '/api/v1/charts', + async: true, + cache: false, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkOptional('/api/v1/charts', data); + got_data(host, data, callback); + }) + .fail(function () { + NETDATA.error(405, host + '/api/v1/charts'); + + if (typeof callback === 'function') { + callback(null); + } + }); + } + } +}; + + +NETDATA.fixHost = function (host) { + while (host.slice(-1) === '/') { + host = host.substring(0, host.length - 1); + } + + return host; +}; + + +NETDATA.registryHello = function (host, callback) { + host = NETDATA.fixHost(host); + + // send HELLO to a netdata server: + // 1. verifies the server is reachable + // 2. responds with the registry URL, the machine GUID of this netdata server and its hostname + $.ajax({ + url: host + '/api/v1/registry?action=hello', + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkOptional('/api/v1/registry?action=hello', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + // NETDATA.error(408, host + ' response: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + // NETDATA.error(407, host); + + if (typeof callback === 'function') { + return callback(null); + } + }); +} + +NETDATA.registrySearch = function (machine_guid, getFromRegistry, serverDefault, callback) { + // SEARCH for the URLs of a machine: + $.ajax({ + url: getFromRegistry("registryServer") + '/api/v1/registry?action=search&machine=' + + getFromRegistry("machineGuid") + '&name=' + encodeURIComponent(getFromRegistry("hostname")) + + '&url=' + encodeURIComponent(serverDefault) + '&for=' + machine_guid, + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkAlways('/api/v1/registry?action=search', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + // NETDATA.error(417, getFromRegistry("registryServer") + ' responded with: ' + JSON.stringify(data)); + console.warn(getFromRegistry("registryServer") + ' responded with: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + // NETDATA.error(418, getFromRegistry("registryServer")); + console.warn("registry search call failed", getFromRegistry("registryServer")) + + if (typeof callback === 'function') { + return callback(null); + } + }); +} + +NETDATA.registryDelete = function (getFromRegistry, serverDefault, delete_url, callback) { + // send DELETE to a netdata registry: + $.ajax({ + url: getFromRegistry("registryServer") + '/api/v1/registry?action=delete&machine=' + + getFromRegistry("machineGuid") + '&name=' + encodeURIComponent(getFromRegistry("hostname")) + + '&url=' + encodeURIComponent(serverDefault) + '&delete_url=' + encodeURIComponent(delete_url), + // + '&url=' + encodeURIComponent("http://n5.katsuna.com:19999/") + '&delete_url=' + encodeURIComponent(delete_url), + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + // data = NETDATA.xss.checkAlways('/api/v1/registry?action=delete', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + // NETDATA.error(411, NETDATA.registry.server + ' responded with: ' + JSON.stringify(data)); + console.warn(411, getFromRegistry("registryServer") + ' responded with: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + // NETDATA.error(412, NETDATA.registry.server); + console.warn(412, getFromRegistry("registryServer")); + + if (typeof callback === 'function') { + return callback(null); + } + }); +} + + +// NETDATA.currentScript = document.currentScript diff --git a/web/gui/dashboard/dashboard.css b/web/gui/dashboard/dashboard.css new file mode 100644 index 0000000..2d95a33 --- /dev/null +++ b/web/gui/dashboard/dashboard.css @@ -0,0 +1,785 @@ +/* SPDX-License-Identfier: GPL-3.0-or-later */ +:root { + --color-main: #35414A; + --color-primary: #00ab44; + --color-border: #93a3b0; +} + +html, +body { + /*font-family: Calibri,"Segoe UI","Helvetica Neue",Helvetica,Arial,sans-serif;*/ + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-variant: normal; +} + +.morelink { + color: var(--color-attention, #765d9c); + text-decoration: none; +} + +.morelink:hover { + color: var(--color-attentionSecondary, #563d7c); + text-decoration: none; +} + +.morelink:focus { + color: var(--color-attention, #765d9c); + text-decoration: none; +} + +.netdata-chart-alignment { + /* 55 for legend-right */ + margin-left: 35px; +} + +.netdata-chart-row { + width: 100%; + text-align: center; + display: flex; + display: -webkit-flex; + display: -moz-flex; + align-items: baseline; + -moz-align-items: baseline; + -webkit-align-items: baseline; + justify-content: center; + -webkit-justify-content: center; + -moz-justify-content: center; + padding-top: 10px; +} + +.netdata-container { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge:after { + padding-top: 60%; + display: block; + content: ''; +} + +.netdata-container-easypiechart { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-easypiechart:after { + padding-top: 100%; + display: block; + content: ''; +} + +.netdata-aspect { + position: relative; + width: 100%; + padding: 0px; + margin: 0px; +} + +.netdata-container-with-legend { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* fix minimum scrollbar issue in firefox */ + min-height: 99px; + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-with-legend.netdata-container-with-legend--bottom { + display: flex; + flex-direction: column; +} + +.netdata-legend-resize-handler { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 20px; + background-color: var(--color-mainBackground, #fff); + font-size: 15px; + vertical-align: middle; + line-height: 15px; + cursor: ns-resize; + color: var(--color-selected, #CDCDCD); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; +} + +.netdata-legend-toolbox { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 110px; + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: var(--color-placeholder, #DDDDDD); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-legend-toolbox-button { + display: inline-block; + position: relative; + height: 15px; + width: 18px; + background-color: var(--color-mainBackground, #fff); + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: var(--color-selected, #CDCDCD); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + cursor: pointer; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-message { + display: inline-block; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + text-align: left; + vertical-align: top; + font-weight: bold; + font-size: x-small; + overflow: hidden; + background: inherit; + z-index: 0; +} + +.netdata-message.hidden { + display: none; +} + +.netdata-message.icon { + color: var(--color-borderSecondary, #F8F8F8); + text-align: center; + vertical-align: middle; +} + +.netdata-chart-legend { + position: absolute; /* within .netdata-container */ + top: 26px; + bottom: 18px; + right: 0; + overflow-x: hidden; + overflow-y: auto; + text-overflow: ellipsis; + line-height: 14px; + display: block; + width: 140px; /* --legend-width */ + font-size: 10px; + margin-top: 5px; + text-align: left; + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-legend-title-date { + font-size: 10px; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-time { + font-size: 11px; + font-weight: bold; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-units { + position: absolute; + right: 10px; + float: right; + font-size: 11px; + vertical-align: top; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-series { + position: absolute; + width: 140px; /* legend-width */ + height: calc(100% - 50px); + overflow: hidden; + text-overflow: ellipsis; + line-height: 14.5px; /* line spacing at the legend */ + display: block; + font-size: 10px; + margin-top: 0px; +} + +.netdata-legend-name-table-line { + display: inline-block; + width: 13px; + height: 4px; + border-width: 0px; + border-bottom-width: 2px; + border-bottom-style: solid; + border-bottom-color: white; +} + +.netdata-legend-name-table-area { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-table-stacked { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-tr { +} + +.netdata-legend-name-td { +} + +.netdata-legend-name { + text-align: left; + font-size: 11px; /* legend: dimension name size */ + font-weight: bold; + vertical-align: bottom; + margin-top: 0px; + z-index: 9; + padding: 0px; + width: 80px !important; + max-width: 80px !important; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + display: inline-block; + cursor: pointer; + -webkit-print-color-adjust: exact; +} + +.netdata-legend-value { + /*margin-left: 14px;*/ + position: absolute; + right: 10px; + float: right; + text-align: right; + font-size: 11px; /* legend: dimension value size */ + font-weight: bold; + vertical-align: bottom; + background-color: var(--color-mainBackground, #fff); + margin-top: 0px; + z-index: 10; + padding: 0px; + padding-left: 15px; + cursor: pointer; + /* -webkit-font-smoothing: none; */ +} + +.netdata-legend-value, .netdata-legend-name { + /* prevent highlight when shift-clicking */ + -webkit-user-select: none; + -ms-user-select: none; + -moz-user-select: none; + user-select: none; +} + +/* eslint recommends adding tabIndex for a11y, but the outline is showing on click */ +.netdata-legend-name:focus, .netdata-legend-value:focus { + outline: none; +} + +.netdata-legend-name.not-selected { + font-weight: normal; + opacity: 0.3; +} + +.netdata-chart { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: inline-block; + overflow: hidden; + width: 100%; + height: 100%; + z-index: 5; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-right { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: block; + overflow: hidden; + margin-right: 140px; /* --legend-width */ + width: calc(100% - 140px); /* --legend-width */ + height: 100%; + z-index: 5; + flex-grow: 1; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-bottom { + display: block; + overflow: hidden; + flex-grow: 1; +} + +.netdata-peity-chart { + +} + +.netdata-sparkline-chart { + +} + +.netdata-dygraph-chart { + +} + +.netdata-morris-chart { + +} + +.netdata-google-chart { + +} + +.dygraph-ylabel { +} + +.dygraph-axis-label-x { + overflow-x: hidden; +} + +.dygraph-label-rotate-left { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(90deg); + -webkit-transform: rotate(90deg); + -moz-transform: rotate(90deg); + -o-transform: rotate(90deg); + -ms-transform: rotate(90deg); +} + +/* For y2-axis label */ +.dygraph-label-rotate-right { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(-90deg); + -webkit-transform: rotate(-90deg); + -moz-transform: rotate(-90deg); + -o-transform: rotate(-90deg); + -ms-transform: rotate(-90deg); +} + +.dygraph-title { + /* 56 for legend-right */ + text-indent: 36px; + text-align: left; + position: absolute; + left: 0px; + top: 4px; + font-size: 11px; + font-weight: bold; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; +} + +/* fix for sparkline tooltip under bootstrap */ +.jqstooltip { + width: auto !important; + height: auto !important; +} + +.easyPieChart { + position: relative; + text-align: center; +} + +.easyPieChart canvas { + position: absolute; + top: 0; + left: 0; +} + +.easyPieChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: normal; + text-shadow: var(--color-elementBackground, #BBB) 0px 0px 1px; + /* -webkit-font-smoothing: none; */ +} + +.easyPieChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 64%; + margin-left: 18% !important; + text-align: center; + color: var(--color-border, #999999); + font-weight: bold; +} + +.easyPieChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 60%; + margin-left: 20% !important; + text-align: center; + color: var(--color-border, #999999); + font-weight: normal; +} + +.gaugeChart { + position: relative; + text-align: center; +} + +.gaugeChart canvas { + position: absolute; + top: 0; + left: 0; + bottom: 0; + right: 0; + z-index: 0; +} + +.gaugeChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: bold; + z-index: 1; + text-shadow: var(--color-elementBackground, #777) 0px 0px 1px; + /* text-shadow: #CCC 1px 1px 0px, #CCC -1px -1px 0px, #CCC 1px -1px 0px, #CCC -1px 1px 0px; */ + /* -webkit-text-stroke: 1px #777; */ + /* -webkit-font-smoothing: none; */ +} + +.gaugeChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-border, #999999); + font-weight: bold; +} + +.gaugeChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 0; + width: 100%; + text-align: left; + margin-left: 5%; + color: var(--color-border, #999999); + font-weight: normal; +} + +.gaugeChartMin { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 92%; + margin-left: 8%; + text-align: left; + color: var(--color-main); + font-weight: normal; +} + +.gaugeChartMax { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 95%; + margin-right: 5%; + text-align: right; + color: var(--color-main); + font-weight: normal; +} + +.popover-title { + font-weight: bold; + font-size: 12px; +} + +.popover-content { + font-size: 11px; +} + +/* ---------------------------------------------------------------------------- + perfect-scrollbar settings + */ + +.ps-container { + -ms-touch-action: auto; + touch-action: auto; + overflow: hidden !important; + -ms-overflow-style: none; +} + +@supports (-ms-overflow-style: none) { + .ps-container { + overflow: auto !important; + } +} + +@media screen and (-ms-high-contrast: active), (-ms-high-contrast: none) { + .ps-container { + overflow: auto !important; + } +} + +.ps-container.ps-active-x > .ps-scrollbar-x-rail, +.ps-container.ps-active-y > .ps-scrollbar-y-rail { + display: block; + background-color: transparent; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #aaa; /* scrollbar color when dragged away */ + height: 5px; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #aaa; /* scrollbar color when dragged away */ + width: 5px; +} + +.ps-container > .ps-scrollbar-x-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + bottom: 0px; + /* there must be 'bottom' for ps-scrollbar-x-rail */ + height: 15px; +} + +.ps-container > .ps-scrollbar-x-rail > .ps-scrollbar-x { + position: absolute; + /* please don't change 'position' */ + background-color: var(--color-elementBackground, #666); /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + bottom: 2px; + /* there must be 'bottom' for ps-scrollbar-x */ + height: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x, .ps-container > .ps-scrollbar-x-rail:active > .ps-scrollbar-x { + height: 4px; +} + +.ps-container > .ps-scrollbar-y-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + right: 0; + /* there must be 'right' for ps-scrollbar-y-rail */ + width: 15px; +} + +.ps-container > .ps-scrollbar-y-rail > .ps-scrollbar-y { + position: absolute; + /* please don't change 'position' */ + background-color: var(--color-elementBackground, #666); /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + right: 2px; + /* there must be 'right' for ps-scrollbar-y */ + width: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y, .ps-container > .ps-scrollbar-y-rail:active > .ps-scrollbar-y { + width: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #bbb; /* scrollbar color when dragged */ + height: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #bbb; /* scrollbar color when dragged */ + width: 5px; +} + +.ps-container:hover > .ps-scrollbar-x-rail, +.ps-container:hover > .ps-scrollbar-y-rail { + opacity: 0.6; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x { + background-color: #999; /* scrollbar color on hover */ +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y { + background-color: #999; /* scrollbar color on hover */ +} + +.dygraph__history-tip { + position: absolute; + transform: translateY(-50%); + display: none; /* overridden in js */ + margin-right: 25px; + direction: rtl; + overflow: hidden; + pointer-events: none; +} + +.dygraph__history-tip-content { + display: inline-block; + white-space: nowrap; + direction: ltr; + pointer-events: auto; +} diff --git a/web/gui/dashboard/dashboard.html b/web/gui/dashboard/dashboard.html new file mode 100644 index 0000000..be0febf --- /dev/null +++ b/web/gui/dashboard/dashboard.html @@ -0,0 +1,699 @@ + + + + + Netdata Dashboard + + + + + + + + + + + + + + + + + + + + +
    + +

    Netdata Custom Dashboard

    + +This is a template for building custom dashboards. To build a dashboard you just do this: + +
    +<!DOCTYPE html>
    +<html lang="en">
    +<head>
    +    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    +    <meta charset="utf-8">
    +    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    +    <meta name="viewport" content="width=device-width, initial-scale=1">
    +    <meta name="apple-mobile-web-app-capable" content="yes">
    +    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    +</head>
    +<body>
    +    <div data-netdata="system.processes"
    +        data-chart-library="dygraph"
    +        data-width="600"
    +        data-height="200"
    +        data-after="-600"
    +        ></div>
    +</body>
    +<script type="text/javascript" src="http://netdata.server:19999/dashboard.js"></script>
    +</html>
    +
    + +
      +
    • You can host your dashboard anywhere.
    • +
    • You can add as many charts as you like.
    • +
    • You can have charts from many different netdata servers (add
      data-host="http://another.netdata.server:19999/"
      to each chart).
    • +
    • You can use different chart libraries on the same page: peity, sparkline, dygraph, google
    • +
    • You can customize each chart to your preferences. For each chart library most of their attributes can be given in data- attributes.
    • +
    • Each chart can have each own duration - it is controlled with the data-after attribute to give that many seconds of data.
    • +
    • Depending on the width of the chart and data-after attribute, netdata will automatically refresh the chart when it needs to be updated. For example giving 600 pixels for width for -600 seconds of data, using a chart library that needs 3 pixels per point, will yield in a chart updated once every 3 seconds.
    • +
    + + +
    +

    Sparkline Charts

    +Sparkline charts support 'NULL' values, so the charts can indicate that values are missing. +Sparkline charts stretch the values to show the variations between values in more detail. +They also have mouse-hover support. +
    +Sparklines are fantastic. You can inline charts in text. For example this +
    is my current cpu usage (last 30 seconds), + while this +
    is the bandwidth my netdata server is currently transmitting (last minute) + and this +
    is the requests/sec it serves (last 3 minutes). + +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    + + + +
    +

    Peity Charts

    +Peity charts do not support 'NULL' values, so the charts cannot indicate that values are missing. +Peity charts cannot have multiple dimensions on the charts - so netdata will use 'min2max' to show +the total of all dimensions. +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    + + + + +
    +

    Dygraph Charts

    +The fastest charting engine that can chart complete charts (not just sparklines). +The charts are zoomable (drag their contents to pan, shift with mouse wheel to zoom-in or zoom-out, double click to reset it). +Netdata magic! Realtime charts on your web page! +
    +Sparklines using dygraphs +
    + are also possible! This +
    + is an area chart, while this +
    is a stacked area chart! + + +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    + + + +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    + + + +
    +

    EasyPieChart

    +
    +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    + + +
    +

    Gauge.js

    +
    +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    + + +
    +

    Google Charts

    +Netdata was originally developed with Google Charts. +Netdata is a complete Google Visualization API provider. +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    +
    +
    +
    + rendered in X ms +
    + + + +
    +

    d3pie Charts

    +
    +
    +
    + rendered in X ms +
    + +
    +
    +
    + rendered in X ms +
    + +
    +
    +
    + rendered in X ms +
    +
    + + + + + + + + + + diff --git a/web/gui/dashboard/dashboard.js b/web/gui/dashboard/dashboard.js new file mode 100644 index 0000000..83f7b58 --- /dev/null +++ b/web/gui/dashboard/dashboard.js @@ -0,0 +1,10340 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// DO NOT EDIT: This file is automatically generated from the source files in src/ + +// ---------------------------------------------------------------------------- +// You can set the following variables before loading this script: + +// 'use strict'; + +/*global netdataNoDygraphs *//* boolean, disable dygraph charts + * (default: false) */ +/*global netdataNoSparklines *//* boolean, disable sparkline charts + * (default: false) */ +/*global netdataNoPeitys *//* boolean, disable peity charts + * (default: false) */ +/*global netdataNoGoogleCharts *//* boolean, disable google charts + * (default: false) */ +/*global netdataNoMorris *//* boolean, disable morris charts + * (default: false) */ +/*global netdataNoEasyPieChart *//* boolean, disable easypiechart charts + * (default: false) */ +/*global netdataNoGauge *//* boolean, disable gauge.js charts + * (default: false) */ +/*global netdataNoD3 *//* boolean, disable d3 charts + * (default: false) */ +/*global netdataNoC3 *//* boolean, disable c3 charts + * (default: false) */ +/*global netdataNoD3pie *//* boolean, disable d3pie charts + * (default: false) */ +/*global netdataNoBootstrap *//* boolean, disable bootstrap - disables help too + * (default: false) */ +/*global netdataNoFontAwesome *//* boolean, disable fontawesome (do not load it) + * (default: false) */ +/*global netdataIcons *//* object, overwrite netdata fontawesome icons + * (default: null) */ +/*global netdataDontStart *//* boolean, do not start the thread to process the charts + * (default: false) */ +/*global netdataErrorCallback *//* function, callback to be called when the dashboard encounters an error + * (default: null) */ +/*global netdataRegistry:true *//* boolean, use the netdata registry + * (default: false) */ +/*global netdataNoRegistry *//* boolean, included only for compatibility with existing custom dashboard + * (obsolete - do not use this any more) */ +/*global netdataRegistryCallback *//* function, callback that will be invoked with one param: the URLs from the registry + * (default: null) */ +/*global netdataShowHelp:true *//* boolean, disable charts help + * (default: true) */ +/*global netdataShowAlarms:true *//* boolean, enable alarms checks and notifications + * (default: false) */ +/*global netdataRegistryAfterMs:true *//* ms, delay registry use at started + * (default: 1500) */ +/*global netdataCallback *//* function, callback to be called when netdata is ready to start + * (default: null) + * netdata will be running while this is called + * (call NETDATA.pause to stop it) */ +/*global netdataPrepCallback *//* function, callback to be called before netdata does anything else + * (default: null) */ +/*global netdataServer *//* string, the URL of the netdata server to use + * (default: the URL the page is hosted at) */ +/*global netdataServerStatic *//* string, the URL of the netdata server to use for static files + * (default: netdataServer) */ +/*global netdataSnapshotData *//* object, a netdata snapshot loaded + * (default: null) */ +/*global netdataAlarmsRecipients *//* array, an array of alarm recipients to show notifications for + * (default: null) */ +/*global netdataAlarmsRemember *//* boolean, keep our position in the alarm log at browser local storage + * (default: true) */ +/*global netdataAlarmsActiveCallback *//* function, a hook for the alarm logs + * (default: undefined) */ +/*global netdataAlarmsNotifCallback *//* function, a hook for alarm notifications + * (default: undefined) */ +/*global netdataIntersectionObserver *//* boolean, enable or disable the use of intersection observer + * (default: true) */ +/*global netdataCheckXSS *//* boolean, enable or disable checking for XSS issues + * (default: false) */ + +// ---------------------------------------------------------------------------- +// global namespace + +// Should stay var! +var NETDATA = window.NETDATA || {}; + +(function(window, document, $, undefined) { + +// *** src/dashboard.js/utils.js + +NETDATA.name2id = function (s) { + return s + .replace(/ /g, '_') + .replace(/:/g, '_') + .replace(/\(/g, '_') + .replace(/\)/g, '_') + .replace(/\./g, '_') + .replace(/\//g, '_'); +}; + +NETDATA.encodeURIComponent = function (s) { + if (typeof(s) === 'string') { + return encodeURIComponent(s); + } + + return s; +}; + +/// A heuristic for detecting slow devices. +let isSlowDeviceResult = undefined; +const isSlowDevice = function () { + if (!isSlowDeviceResult) { + return isSlowDeviceResult; + } + + try { + let ua = navigator.userAgent.toLowerCase(); + + let iOS = /ipad|iphone|ipod/.test(ua) && !window.MSStream; + let android = /android/.test(ua) && !window.MSStream; + isSlowDeviceResult = (iOS || android); + } catch (e) { + isSlowDeviceResult = false; + } + + return isSlowDeviceResult; +}; + +NETDATA.guid = function () { + function s4() { + return Math.floor((1 + Math.random()) * 0x10000) + .toString(16) + .substring(1); + } + + return s4() + s4() + '-' + s4() + '-' + s4() + '-' + s4() + '-' + s4() + s4() + s4(); +}; + +NETDATA.zeropad = function (x) { + if (x > -10 && x < 10) { + return '0' + x.toString(); + } else { + return x.toString(); + } +}; + +NETDATA.seconds4human = function (seconds, options) { + let defaultOptions = { + now: 'now', + space: ' ', + negative_suffix: 'ago', + day: 'day', + days: 'days', + hour: 'hour', + hours: 'hours', + minute: 'min', + minutes: 'mins', + second: 'sec', + seconds: 'secs', + and: 'and' + }; + + if (typeof options !== 'object') { + options = defaultOptions; + } else { + for (var x in defaultOptions) { + if (typeof options[x] !== 'string') { + options[x] = defaultOptions[x]; + } + } + } + + if (typeof seconds === 'string') { + seconds = parseInt(seconds, 10); + } + + if (seconds === 0) { + return options.now; + } + + let suffix = ''; + if (seconds < 0) { + seconds = -seconds; + if (options.negative_suffix !== '') { + suffix = options.space + options.negative_suffix; + } + } + + let days = Math.floor(seconds / 86400); + seconds -= (days * 86400); + + let hours = Math.floor(seconds / 3600); + seconds -= (hours * 3600); + + let minutes = Math.floor(seconds / 60); + seconds -= (minutes * 60); + + let strings = []; + + if (days > 1) { + strings.push(days.toString() + options.space + options.days); + } else if (days === 1) { + strings.push(days.toString() + options.space + options.day); + } + + if (hours > 1) { + strings.push(hours.toString() + options.space + options.hours); + } else if (hours === 1) { + strings.push(hours.toString() + options.space + options.hour); + } + + if (minutes > 1) { + strings.push(minutes.toString() + options.space + options.minutes); + } else if (minutes === 1) { + strings.push(minutes.toString() + options.space + options.minute); + } + + if (seconds > 1) { + strings.push(Math.floor(seconds).toString() + options.space + options.seconds); + } else if (seconds === 1) { + strings.push(Math.floor(seconds).toString() + options.space + options.second); + } + + if (strings.length === 1) { + return strings.pop() + suffix; + } + + let last = strings.pop(); + return strings.join(", ") + " " + options.and + " " + last + suffix; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// element data attributes + +NETDATA.dataAttribute = function (element, attribute, def) { + let key = 'data-' + attribute.toString(); + if (element.hasAttribute(key)) { + let data = element.getAttribute(key); + + if (data === 'true') { + return true; + } + if (data === 'false') { + return false; + } + if (data === 'null') { + return null; + } + + // Only convert to a number if it doesn't change the string + if (data === +data + '') { + return +data; + } + + if (/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/.test(data)) { + return JSON.parse(data); + } + + return data; + } else { + return def; + } +}; + +NETDATA.dataAttributeBoolean = function (element, attribute, def) { + let value = NETDATA.dataAttribute(element, attribute, def); + + if (value === true || value === false) // gmosx: Love this :) + { + return value; + } + + if (typeof(value) === 'string') { + if (value === 'yes' || value === 'on') { + return true; + } + + if (value === '' || value === 'no' || value === 'off' || value === 'null') { + return false; + } + + return def; + } + + if (typeof(value) === 'number') { + return value !== 0; + } + + return def; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// fast numbers formatting + +NETDATA.fastNumberFormat = { + formattersFixed: [], + formattersZeroBased: [], + + // this is the fastest and the preferred + getIntlNumberFormat: function (min, max) { + let key = max; + if (min === max) { + if (typeof this.formattersFixed[key] === 'undefined') { + this.formattersFixed[key] = new Intl.NumberFormat(undefined, { + // style: 'decimal', + // minimumIntegerDigits: 1, + // minimumSignificantDigits: 1, + // maximumSignificantDigits: 1, + useGrouping: true, + minimumFractionDigits: min, + maximumFractionDigits: max + }); + } + + return this.formattersFixed[key]; + } else if (min === 0) { + if (typeof this.formattersZeroBased[key] === 'undefined') { + this.formattersZeroBased[key] = new Intl.NumberFormat(undefined, { + // style: 'decimal', + // minimumIntegerDigits: 1, + // minimumSignificantDigits: 1, + // maximumSignificantDigits: 1, + useGrouping: true, + minimumFractionDigits: min, + maximumFractionDigits: max + }); + } + + return this.formattersZeroBased[key]; + } else { + // this is never used + // it is added just for completeness + return new Intl.NumberFormat(undefined, { + // style: 'decimal', + // minimumIntegerDigits: 1, + // minimumSignificantDigits: 1, + // maximumSignificantDigits: 1, + useGrouping: true, + minimumFractionDigits: min, + maximumFractionDigits: max + }); + } + }, + + // this respects locale + getLocaleString: function (min, max) { + let key = max; + if (min === max) { + if (typeof this.formattersFixed[key] === 'undefined') { + this.formattersFixed[key] = { + format: function (value) { + return value.toLocaleString(undefined, { + // style: 'decimal', + // minimumIntegerDigits: 1, + // minimumSignificantDigits: 1, + // maximumSignificantDigits: 1, + useGrouping: true, + minimumFractionDigits: min, + maximumFractionDigits: max + }); + } + }; + } + + return this.formattersFixed[key]; + } else if (min === 0) { + if (typeof this.formattersZeroBased[key] === 'undefined') { + this.formattersZeroBased[key] = { + format: function (value) { + return value.toLocaleString(undefined, { + // style: 'decimal', + // minimumIntegerDigits: 1, + // minimumSignificantDigits: 1, + // maximumSignificantDigits: 1, + useGrouping: true, + minimumFractionDigits: min, + maximumFractionDigits: max + }); + } + }; + } + + return this.formattersZeroBased[key]; + } else { + return { + format: function (value) { + return value.toLocaleString(undefined, { + // style: 'decimal', + // minimumIntegerDigits: 1, + // minimumSignificantDigits: 1, + // maximumSignificantDigits: 1, + useGrouping: true, + minimumFractionDigits: min, + maximumFractionDigits: max + }); + } + }; + } + }, + + // the fallback + getFixed: function (min, max) { + let key = max; + if (min === max) { + if (typeof this.formattersFixed[key] === 'undefined') { + this.formattersFixed[key] = { + format: function (value) { + if (value === 0) { + return "0"; + } + return value.toFixed(max); + } + }; + } + + return this.formattersFixed[key]; + } else if (min === 0) { + if (typeof this.formattersZeroBased[key] === 'undefined') { + this.formattersZeroBased[key] = { + format: function (value) { + if (value === 0) { + return "0"; + } + return value.toFixed(max); + } + }; + } + + return this.formattersZeroBased[key]; + } else { + return { + format: function (value) { + if (value === 0) { + return "0"; + } + return value.toFixed(max); + } + }; + } + }, + + testIntlNumberFormat: function () { + let value = 1.12345; + let e1 = "1.12", e2 = "1,12"; + let s = ""; + + try { + let x = new Intl.NumberFormat(undefined, { + useGrouping: true, + minimumFractionDigits: 2, + maximumFractionDigits: 2 + }); + + s = x.format(value); + } catch (e) { + s = ""; + } + + // console.log('NumberFormat: ', s); + return (s === e1 || s === e2); + }, + + testLocaleString: function () { + let value = 1.12345; + let e1 = "1.12", e2 = "1,12"; + let s = ""; + + try { + s = value.toLocaleString(undefined, { + useGrouping: true, + minimumFractionDigits: 2, + maximumFractionDigits: 2 + }); + } catch (e) { + s = ""; + } + + // console.log('localeString: ', s); + return (s === e1 || s === e2); + }, + + // on first run we decide which formatter to use + get: function (min, max) { + if (this.testIntlNumberFormat()) { + // console.log('numberformat'); + this.get = this.getIntlNumberFormat; + } else if (this.testLocaleString()) { + // console.log('localestring'); + this.get = this.getLocaleString; + } else { + // console.log('fixed'); + this.get = this.getFixed; + } + return this.get(min, max); + } +}; + +// ---------------------------------------------------------------------------------------------------------------- +// Detect the netdata server + +// http://stackoverflow.com/questions/984510/what-is-my-script-src-url +// http://stackoverflow.com/questions/6941533/get-protocol-domain-and-port-from-url +NETDATA._scriptSource = function () { + let script = null; + + if (typeof document.currentScript !== 'undefined') { + script = document.currentScript; + } else { + const all_scripts = document.getElementsByTagName('script'); + script = all_scripts[all_scripts.length - 1]; + } + + if (typeof script.getAttribute.length !== 'undefined') { + script = script.src; + } else { + script = script.getAttribute('src', -1); + } + + return script; +}; + +// *** src/dashboard.js/server-detection.js + +if (typeof netdataServer !== 'undefined') { + NETDATA.serverDefault = netdataServer; +} else { + let s = NETDATA._scriptSource(); + if (s) { + NETDATA.serverDefault = s.replace(/\/dashboard.js(\?.*)?$/g, ""); + } else { + console.log('WARNING: Cannot detect the URL of the netdata server.'); + NETDATA.serverDefault = null; + } +} + +if (NETDATA.serverDefault === null) { + NETDATA.serverDefault = ''; +} else if (NETDATA.serverDefault.slice(-1) !== '/') { + NETDATA.serverDefault += '/'; +} + +if (typeof netdataServerStatic !== 'undefined' && netdataServerStatic !== null && netdataServerStatic !== '') { + NETDATA.serverStatic = netdataServerStatic; + if (NETDATA.serverStatic.slice(-1) !== '/') { + NETDATA.serverStatic += '/'; + } +} else { + NETDATA.serverStatic = NETDATA.serverDefault; +} + +// *** src/dashboard.js/dependencies.js + +// default URLs for all the external files we need +// make them RELATIVE so that the whole thing can also be +// installed under a web server +NETDATA.jQuery = NETDATA.serverStatic + 'lib/jquery-3.6.0.min.js'; +NETDATA.peity_js = NETDATA.serverStatic + 'lib/jquery.peity-3.2.0.min.js'; +NETDATA.sparkline_js = NETDATA.serverStatic + 'lib/jquery.sparkline-2.1.2.min.js'; +NETDATA.easypiechart_js = NETDATA.serverStatic + 'lib/jquery.easypiechart-97b5824.min.js'; +NETDATA.gauge_js = NETDATA.serverStatic + 'lib/gauge-1.3.2.min.js'; +NETDATA.dygraph_js = NETDATA.serverStatic + 'lib/dygraph-c91c859.min.js'; +NETDATA.dygraph_smooth_js = NETDATA.serverStatic + 'lib/dygraph-smooth-plotter-c91c859.js'; +// NETDATA.raphael_js = NETDATA.serverStatic + 'lib/raphael-2.2.4-min.js'; +// NETDATA.c3_js = NETDATA.serverStatic + 'lib/c3-0.4.18.min.js'; +// NETDATA.c3_css = NETDATA.serverStatic + 'css/c3-0.4.18.min.css'; +NETDATA.d3pie_js = NETDATA.serverStatic + 'lib/d3pie-0.2.1-netdata-3.js'; +NETDATA.d3_js = NETDATA.serverStatic + 'lib/d3-4.12.2.min.js'; +// NETDATA.morris_js = NETDATA.serverStatic + 'lib/morris-0.5.1.min.js'; +// NETDATA.morris_css = NETDATA.serverStatic + 'css/morris-0.5.1.css'; +NETDATA.google_js = 'https://www.google.com/jsapi'; +// Error Handling + +NETDATA.errorCodes = { + 100: {message: "Cannot load chart library", alert: true}, + 101: {message: "Cannot load jQuery", alert: true}, + 402: {message: "Chart library not found", alert: false}, + 403: {message: "Chart library not enabled/is failed", alert: false}, + 404: {message: "Chart not found", alert: false}, + 405: {message: "Cannot download charts index from server", alert: true}, + 406: {message: "Invalid charts index downloaded from server", alert: true}, + 407: {message: "Cannot HELLO netdata server", alert: false}, + 408: {message: "Netdata servers sent invalid response to HELLO", alert: false}, + 409: {message: "Cannot ACCESS netdata registry", alert: false}, + 410: {message: "Netdata registry ACCESS failed", alert: false}, + 411: {message: "Netdata registry server send invalid response to DELETE ", alert: false}, + 412: {message: "Netdata registry DELETE failed", alert: false}, + 413: {message: "Netdata registry server send invalid response to SWITCH ", alert: false}, + 414: {message: "Netdata registry SWITCH failed", alert: false}, + 415: {message: "Netdata alarms download failed", alert: false}, + 416: {message: "Netdata alarms log download failed", alert: false}, + 417: {message: "Netdata registry server send invalid response to SEARCH ", alert: false}, + 418: {message: "Netdata registry SEARCH failed", alert: false} +}; + +NETDATA.errorLast = { + code: 0, + message: "", + datetime: 0 +}; + +NETDATA.error = function (code, msg) { + NETDATA.errorLast.code = code; + NETDATA.errorLast.message = msg; + NETDATA.errorLast.datetime = Date.now(); + + console.log("ERROR " + code + ": " + NETDATA.errorCodes[code].message + ": " + msg); + + let ret = true; + if (typeof netdataErrorCallback === 'function') { + ret = netdataErrorCallback('system', code, msg); + } + + if (ret && NETDATA.errorCodes[code].alert) { + alert("ERROR " + code + ": " + NETDATA.errorCodes[code].message + ": " + msg); + } +}; + +NETDATA.errorReset = function () { + NETDATA.errorLast.code = 0; + NETDATA.errorLast.message = "You are doing fine!"; + NETDATA.errorLast.datetime = 0; +}; +// *** src/dashboard.js/compatibility.js + +// Compatibility fixes. + +// fix IE issue with console +if (!window.console) { + window.console = { + log: function () { + } + }; +} + +// if string.endsWith is not defined, define it +if (typeof String.prototype.endsWith !== 'function') { + String.prototype.endsWith = function (s) { + if (s.length > this.length) { + return false; + } + return this.slice(-s.length) === s; + }; +} + +// if string.startsWith is not defined, define it +if (typeof String.prototype.startsWith !== 'function') { + String.prototype.startsWith = function (s) { + if (s.length > this.length) { + return false; + } + return this.slice(s.length) === s; + }; +} +// ---------------------------------------------------------------------------------------------------------------- +// XSS checks + +NETDATA.xss = { + enabled: (typeof netdataCheckXSS === 'undefined') ? false : netdataCheckXSS, + enabled_for_data: (typeof netdataCheckXSS === 'undefined') ? false : netdataCheckXSS, + + string: function (s) { + return s.toString() + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + }, + + object: function (name, obj, ignore_regex) { + if (typeof ignore_regex !== 'undefined' && ignore_regex.test(name)) { + // console.log('XSS: ignoring "' + name + '"'); + return obj; + } + + switch (typeof(obj)) { + case 'string': + const ret = this.string(obj); + if (ret !== obj) { + console.log('XSS protection changed string ' + name + ' from "' + obj + '" to "' + ret + '"'); + } + return ret; + + case 'object': + if (obj === null) { + return obj; + } + + if (Array.isArray(obj)) { + // console.log('checking array "' + name + '"'); + + let len = obj.length; + while (len--) { + obj[len] = this.object(name + '[' + len + ']', obj[len], ignore_regex); + } + } else { + // console.log('checking object "' + name + '"'); + + for (var i in obj) { + if (obj.hasOwnProperty(i) === false) { + continue; + } + if (this.string(i) !== i) { + console.log('XSS protection removed invalid object member "' + name + '.' + i + '"'); + delete obj[i]; + } else { + obj[i] = this.object(name + '.' + i, obj[i], ignore_regex); + } + } + } + return obj; + + default: + return obj; + } + }, + + checkOptional: function (name, obj, ignore_regex) { + if (this.enabled) { + //console.log('XSS: checking optional "' + name + '"...'); + return this.object(name, obj, ignore_regex); + } + return obj; + }, + + checkAlways: function (name, obj, ignore_regex) { + //console.log('XSS: checking always "' + name + '"...'); + return this.object(name, obj, ignore_regex); + }, + + checkData: function (name, obj, ignore_regex) { + if (this.enabled_for_data) { + //console.log('XSS: checking data "' + name + '"...'); + return this.object(name, obj, ignore_regex); + } + return obj; + } +}; +NETDATA.colorHex2Rgb = function (hex) { + // Expand shorthand form (e.g. "03F") to full form (e.g. "0033FF") + let shorthandRegex = /^#?([a-f\d])([a-f\d])([a-f\d])$/i; + hex = hex.replace(shorthandRegex, function (m, r, g, b) { + return r + r + g + g + b + b; + }); + + let result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex); + return result ? { + r: parseInt(result[1], 16), + g: parseInt(result[2], 16), + b: parseInt(result[3], 16) + } : null; +}; + +NETDATA.colorLuminance = function (hex, lum) { + // validate hex string + hex = String(hex).replace(/[^0-9a-f]/gi, ''); + if (hex.length < 6) { + hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2]; + } + + lum = lum || 0; + + // convert to decimal and change luminosity + let rgb = "#"; + for (let i = 0; i < 3; i++) { + let c = parseInt(hex.substr(i * 2, 2), 16); + c = Math.round(Math.min(Math.max(0, c + (c * lum)), 255)).toString(16); + rgb += ("00" + c).substr(c.length); + } + + return rgb; +}; +NETDATA.unitsConversion = { + keys: {}, // keys for data-common-units + latest: {}, // latest selected units for data-common-units + + globalReset: function () { + this.keys = {}; + this.latest = {}; + }, + + scalableUnits: { + 'packets/s': { + 'pps': 1, + 'Kpps': 1000, + 'Mpps': 1000000 + }, + 'pps': { + 'pps': 1, + 'Kpps': 1000, + 'Mpps': 1000000 + }, + 'kilobits/s': { + 'bits/s': 1 / 1000, + 'kilobits/s': 1, + 'megabits/s': 1000, + 'gigabits/s': 1000000, + 'terabits/s': 1000000000 + }, + 'bytes/s': { + 'bytes/s': 1, + 'kilobytes/s': 1024, + 'megabytes/s': 1024 * 1024, + 'gigabytes/s': 1024 * 1024 * 1024, + 'terabytes/s': 1024 * 1024 * 1024 * 1024 + }, + 'kilobytes/s': { + 'bytes/s': 1 / 1024, + 'kilobytes/s': 1, + 'megabytes/s': 1024, + 'gigabytes/s': 1024 * 1024, + 'terabytes/s': 1024 * 1024 * 1024 + }, + 'B/s': { + 'B/s': 1, + 'KiB/s': 1024, + 'MiB/s': 1024 * 1024, + 'GiB/s': 1024 * 1024 * 1024, + 'TiB/s': 1024 * 1024 * 1024 * 1024 + }, + 'KB/s': { + 'B/s': 1 / 1024, + 'KB/s': 1, + 'MB/s': 1024, + 'GB/s': 1024 * 1024, + 'TB/s': 1024 * 1024 * 1024 + }, + 'KiB/s': { + 'B/s': 1 / 1024, + 'KiB/s': 1, + 'MiB/s': 1024, + 'GiB/s': 1024 * 1024, + 'TiB/s': 1024 * 1024 * 1024 + }, + 'bytes': { + 'bytes': 1, + 'kilobytes': 1024, + 'megabytes': 1024 * 1024, + 'gigabytes': 1024 * 1024 * 1024, + 'terabytes': 1024 * 1024 * 1024 * 1024 + }, + 'B': { + 'B': 1, + 'KiB': 1024, + 'MiB': 1024 * 1024, + 'GiB': 1024 * 1024 * 1024, + 'TiB': 1024 * 1024 * 1024 * 1024, + 'PiB': 1024 * 1024 * 1024 * 1024 * 1024 + }, + 'KB': { + 'B': 1 / 1024, + 'KB': 1, + 'MB': 1024, + 'GB': 1024 * 1024, + 'TB': 1024 * 1024 * 1024 + }, + 'KiB': { + 'B': 1 / 1024, + 'KiB': 1, + 'MiB': 1024, + 'GiB': 1024 * 1024, + 'TiB': 1024 * 1024 * 1024 + }, + 'MB': { + 'B': 1 / (1024 * 1024), + 'KB': 1 / 1024, + 'MB': 1, + 'GB': 1024, + 'TB': 1024 * 1024, + 'PB': 1024 * 1024 * 1024 + }, + 'MiB': { + 'B': 1 / (1024 * 1024), + 'KiB': 1 / 1024, + 'MiB': 1, + 'GiB': 1024, + 'TiB': 1024 * 1024, + 'PiB': 1024 * 1024 * 1024 + }, + 'GB': { + 'B': 1 / (1024 * 1024 * 1024), + 'KB': 1 / (1024 * 1024), + 'MB': 1 / 1024, + 'GB': 1, + 'TB': 1024, + 'PB': 1024 * 1024, + 'EB': 1024 * 1024 * 1024 + }, + 'GiB': { + 'B': 1 / (1024 * 1024 * 1024), + 'KiB': 1 / (1024 * 1024), + 'MiB': 1 / 1024, + 'GiB': 1, + 'TiB': 1024, + 'PiB': 1024 * 1024, + 'EiB': 1024 * 1024 * 1024 + }, + 'num': { + 'num': 1, + 'num (K)': 1000, + 'num (M)': 1000000, + 'num (G)': 1000000000, + 'num (T)': 1000000000000 + }, + 'Hz': { + 'Hz': 1, + 'kHz': 10 ** 3, + 'MHz': 10 ** 6, + 'GHz': 10 ** 9, + 'THz': 10 ** 12, + 'PHz': 10 ** 15, + 'EHz': 10 ** 18, + 'ZHz': 10 ** 21, + }, + /* + 'milliseconds': { + 'seconds': 1000 + }, + 'seconds': { + 'milliseconds': 0.001, + 'seconds': 1, + 'minutes': 60, + 'hours': 3600, + 'days': 86400 + } + */ + }, + + convertibleUnits: { + 'Celsius': { + 'Fahrenheit': { + check: function (max) { + void(max); + return NETDATA.options.current.temperature === 'fahrenheit'; + }, + convert: function (value) { + return value * 9 / 5 + 32; + } + } + }, + 'celsius': { + 'fahrenheit': { + check: function (max) { + void(max); + return NETDATA.options.current.temperature === 'fahrenheit'; + }, + convert: function (value) { + return value * 9 / 5 + 32; + } + } + }, + 'seconds': { + 'time': { + check: function (max) { + void(max); + return NETDATA.options.current.seconds_as_time; + }, + convert: function (seconds) { + return NETDATA.unitsConversion.seconds2time(seconds); + } + } + }, + 'milliseconds': { + 'milliseconds': { + check: function (max) { + return NETDATA.options.current.seconds_as_time && max < 1000; + }, + convert: function (milliseconds) { + let tms = Math.round(milliseconds * 10); + milliseconds = Math.floor(tms / 10); + + tms -= milliseconds * 10; + + return (milliseconds).toString() + '.' + tms.toString(); + } + }, + 'seconds': { + check: function (max) { + return NETDATA.options.current.seconds_as_time && max >= 1000 && max < 60000; + }, + convert: function (milliseconds) { + milliseconds = Math.round(milliseconds); + + let seconds = Math.floor(milliseconds / 1000); + milliseconds -= seconds * 1000; + + milliseconds = Math.round(milliseconds / 10); + + return seconds.toString() + '.' + + NETDATA.zeropad(milliseconds); + } + }, + 'M:SS.ms': { + check: function (max) { + return NETDATA.options.current.seconds_as_time && max >= 60000; + }, + convert: function (milliseconds) { + milliseconds = Math.round(milliseconds); + + let minutes = Math.floor(milliseconds / 60000); + milliseconds -= minutes * 60000; + + let seconds = Math.floor(milliseconds / 1000); + milliseconds -= seconds * 1000; + + milliseconds = Math.round(milliseconds / 10); + + return minutes.toString() + ':' + + NETDATA.zeropad(seconds) + '.' + + NETDATA.zeropad(milliseconds); + } + } + }, + 'nanoseconds': { + 'nanoseconds': { + check: function (max) { + return NETDATA.options.current.seconds_as_time && max < 1000; + }, + convert: function (nanoseconds) { + let tms = Math.round(nanoseconds * 10); + nanoseconds = Math.floor(tms / 10); + + tms -= nanoseconds * 10; + + return (nanoseconds).toString() + '.' + tms.toString(); + } + }, + 'microseconds': { + check: function (max) { + return NETDATA.options.current.seconds_as_time + && max >= 1000 && max < 1000 * 1000; + }, + convert: function (nanoseconds) { + nanoseconds = Math.round(nanoseconds); + + let microseconds = Math.floor(nanoseconds / 1000); + nanoseconds -= microseconds * 1000; + + nanoseconds = Math.round(nanoseconds / 10 ); + + return microseconds.toString() + '.' + + NETDATA.zeropad(nanoseconds); + } + }, + 'milliseconds': { + check: function (max) { + return NETDATA.options.current.seconds_as_time + && max >= 1000 * 1000 && max < 1000 * 1000 * 1000; + }, + convert: function (nanoseconds) { + nanoseconds = Math.round(nanoseconds); + + let milliseconds = Math.floor(nanoseconds / 1000 / 1000); + nanoseconds -= milliseconds * 1000 * 1000; + + nanoseconds = Math.round(nanoseconds / 1000 / 10); + + return milliseconds.toString() + '.' + + NETDATA.zeropad(nanoseconds); + } + }, + 'seconds': { + check: function (max) { + return NETDATA.options.current.seconds_as_time + && max >= 1000 * 1000 * 1000; + }, + convert: function (nanoseconds) { + nanoseconds = Math.round(nanoseconds); + + let seconds = Math.floor(nanoseconds / 1000 / 1000 / 1000); + nanoseconds -= seconds * 1000 * 1000 * 1000; + + nanoseconds = Math.round(nanoseconds / 1000 / 1000 / 10); + + return seconds.toString() + '.' + + NETDATA.zeropad(nanoseconds); + } + }, + } + }, + + seconds2time: function (seconds) { + seconds = Math.abs(seconds); + + let days = Math.floor(seconds / 86400); + seconds -= days * 86400; + + let hours = Math.floor(seconds / 3600); + seconds -= hours * 3600; + + let minutes = Math.floor(seconds / 60); + seconds -= minutes * 60; + + seconds = Math.round(seconds); + + let ms_txt = ''; + /* + let ms = seconds - Math.floor(seconds); + seconds -= ms; + ms = Math.round(ms * 1000); + + if (ms > 1) { + if (ms < 10) + ms_txt = '.00' + ms.toString(); + else if (ms < 100) + ms_txt = '.0' + ms.toString(); + else + ms_txt = '.' + ms.toString(); + } + */ + + return ((days > 0) ? days.toString() + 'd:' : '').toString() + + NETDATA.zeropad(hours) + ':' + + NETDATA.zeropad(minutes) + ':' + + NETDATA.zeropad(seconds) + + ms_txt; + }, + + // get a function that converts the units + // + every time units are switched call the callback + get: function (uuid, min, max, units, desired_units, common_units_name, switch_units_callback) { + // validate the parameters + if (typeof units === 'undefined') { + units = 'undefined'; + } + + // check if we support units conversion + if (typeof this.scalableUnits[units] === 'undefined' && typeof this.convertibleUnits[units] === 'undefined') { + // we can't convert these units + //console.log('DEBUG: ' + uuid.toString() + ' can\'t convert units: ' + units.toString()); + return function (value) { + return value; + }; + } + + // check if the caller wants the original units + if (typeof desired_units === 'undefined' || desired_units === null || desired_units === 'original' || desired_units === units) { + //console.log('DEBUG: ' + uuid.toString() + ' original units wanted'); + switch_units_callback(units); + return function (value) { + return value; + }; + } + + // now we know we can convert the units + // and the caller wants some kind of conversion + + let tunits = null; + let tdivider = 0; + + if (typeof this.scalableUnits[units] !== 'undefined') { + // units that can be scaled + // we decide a divider + + // console.log('NETDATA.unitsConversion.get(' + units.toString() + ', ' + desired_units.toString() + ', function()) decide divider with min = ' + min.toString() + ', max = ' + max.toString()); + + if (desired_units === 'auto') { + // the caller wants to auto-scale the units + + // find the absolute maximum value that is rendered on the chart + // based on this we decide the scale + min = Math.abs(min); + max = Math.abs(max); + if (min > max) { + max = min; + } + + // find the smallest scale that provides integers + // for (x in this.scalableUnits[units]) { + // if (this.scalableUnits[units].hasOwnProperty(x)) { + // let m = this.scalableUnits[units][x]; + // if (m <= max && m > tdivider) { + // tunits = x; + // tdivider = m; + // } + // } + // } + const sunit = this.scalableUnits[units]; + for (var x of Object.keys(sunit)) { + let m = sunit[x]; + if (m <= max && m > tdivider) { + tunits = x; + tdivider = m; + } + } + + if (tunits === null || tdivider <= 0) { + // we couldn't find one + //console.log('DEBUG: ' + uuid.toString() + ' cannot find an auto-scaling candidate for units: ' + units.toString() + ' (max: ' + max.toString() + ')'); + switch_units_callback(units); + return function (value) { + return value; + }; + } + + if (typeof common_units_name === 'string' && typeof uuid === 'string') { + // the caller wants several charts to have the same units + // data-common-units + + let common_units_key = common_units_name + '-' + units; + + // add our divider into the list of keys + let t = this.keys[common_units_key]; + if (typeof t === 'undefined') { + this.keys[common_units_key] = {}; + t = this.keys[common_units_key]; + } + t[uuid] = { + units: tunits, + divider: tdivider + }; + + // find the max divider of all charts + let common_units = t[uuid]; + for (var x in t) { + if (t.hasOwnProperty(x) && t[x].divider > common_units.divider) { + common_units = t[x]; + } + } + + // save our common_max to the latest keys + let latest = this.latest[common_units_key]; + if (typeof latest === 'undefined') { + this.latest[common_units_key] = {}; + latest = this.latest[common_units_key]; + } + latest.units = common_units.units; + latest.divider = common_units.divider; + + tunits = latest.units; + tdivider = latest.divider; + + //console.log('DEBUG: ' + uuid.toString() + ' converted units: ' + units.toString() + ' to units: ' + tunits.toString() + ' with divider ' + tdivider.toString() + ', common-units=' + common_units_name.toString() + ((t[uuid].divider !== tdivider)?' USED COMMON, mine was ' + t[uuid].units:' set common').toString()); + + // apply it to this chart + switch_units_callback(tunits); + return function (value) { + if (tdivider !== latest.divider) { + // another chart switched our common units + // we should switch them too + //console.log('DEBUG: ' + uuid + ' switching units due to a common-units change, from ' + tunits.toString() + ' to ' + latest.units.toString()); + tunits = latest.units; + tdivider = latest.divider; + switch_units_callback(tunits); + } + + return value / tdivider; + }; + } else { + // the caller did not give data-common-units + // this chart auto-scales independently of all others + //console.log('DEBUG: ' + uuid.toString() + ' converted units: ' + units.toString() + ' to units: ' + tunits.toString() + ' with divider ' + tdivider.toString() + ', autonomously'); + + switch_units_callback(tunits); + return function (value) { + return value / tdivider; + }; + } + } else { + // the caller wants specific units + + if (typeof this.scalableUnits[units][desired_units] !== 'undefined') { + // all good, set the new units + tdivider = this.scalableUnits[units][desired_units]; + // console.log('DEBUG: ' + uuid.toString() + ' converted units: ' + units.toString() + ' to units: ' + desired_units.toString() + ' with divider ' + tdivider.toString() + ', by reference'); + switch_units_callback(desired_units); + return function (value) { + return value / tdivider; + }; + } else { + // oops! switch back to original units + console.log('Units conversion from ' + units.toString() + ' to ' + desired_units.toString() + ' is not supported.'); + switch_units_callback(units); + return function (value) { + return value; + }; + } + } + } else if (typeof this.convertibleUnits[units] !== 'undefined') { + // units that can be converted + if (desired_units === 'auto') { + for (var x in this.convertibleUnits[units]) { + if (this.convertibleUnits[units].hasOwnProperty(x)) { + if (this.convertibleUnits[units][x].check(max)) { + //console.log('DEBUG: ' + uuid.toString() + ' converting ' + units.toString() + ' to: ' + x.toString()); + switch_units_callback(x); + return this.convertibleUnits[units][x].convert; + } + } + } + + // none checked ok + //console.log('DEBUG: ' + uuid.toString() + ' no conversion available for ' + units.toString() + ' to: ' + desired_units.toString()); + switch_units_callback(units); + return function (value) { + return value; + }; + } else if (typeof this.convertibleUnits[units][desired_units] !== 'undefined') { + switch_units_callback(desired_units); + return this.convertibleUnits[units][desired_units].convert; + } else { + console.log('Units conversion from ' + units.toString() + ' to ' + desired_units.toString() + ' is not supported.'); + switch_units_callback(units); + return function (value) { + return value; + }; + } + } else { + // hm... did we forget to implement the new type? + console.log(`Unmatched unit conversion method for units ${units.toString()}`); + switch_units_callback(units); + return function (value) { + return value; + }; + } + } +}; + +NETDATA.icons = { + left: '', + reset: '', + right: '', + zoomIn: '', + zoomOut: '', + resize: '', + lineChart: '', + areaChart: '', + noChart: '', + loading: '', + noData: '' +}; + +if (typeof netdataIcons === 'object') { + // for (let icon in NETDATA.icons) { + // if (NETDATA.icons.hasOwnProperty(icon) && typeof(netdataIcons[icon]) === 'string') + // NETDATA.icons[icon] = netdataIcons[icon]; + // } + for (var icon of Object.keys(NETDATA.icons)) { + if (typeof(netdataIcons[icon]) === 'string') { + NETDATA.icons[icon] = netdataIcons[icon] + } + } +} + +if (typeof netdataSnapshotData === 'undefined') { + netdataSnapshotData = null; +} + +if (typeof netdataShowHelp === 'undefined') { + netdataShowHelp = true; +} + +if (typeof netdataShowAlarms === 'undefined') { + netdataShowAlarms = false; +} + +if (typeof netdataRegistryAfterMs !== 'number' || netdataRegistryAfterMs < 0) { + netdataRegistryAfterMs = 0; // 1500; +} + +if (typeof netdataRegistry === 'undefined') { + // backward compatibility + netdataRegistry = (typeof netdataNoRegistry !== 'undefined' && netdataNoRegistry === false); +} + +if (netdataRegistry === false && typeof netdataRegistryCallback === 'function') { + netdataRegistry = true; +} + +// ---------------------------------------------------------------------------------------------------------------- +// the defaults for all charts + +// if the user does not specify any of these, the following will be used + +NETDATA.chartDefaults = { + width: '100%', // the chart width - can be null + height: '100%', // the chart height - can be null + min_width: null, // the chart minimum width - can be null + library: 'dygraph', // the graphing library to use + method: 'average', // the grouping method + before: 0, // panning + after: -600, // panning + pixels_per_point: 1, // the detail of the chart + fill_luminance: 0.8 // luminance of colors in solid areas +}; + +// ---------------------------------------------------------------------------------------------------------------- +// global options + +NETDATA.options = { + pauseCallback: null, // a callback when we are really paused + + pause: false, // when enabled we don't auto-refresh the charts + + targets: [], // an array of all the state objects that are + // currently active (independently of their + // viewport visibility) + + updated_dom: true, // when true, the DOM has been updated with + // new elements we have to check. + + auto_refresher_fast_weight: 0, // this is the current time in ms, spent + // rendering charts continuously. + // used with .current.fast_render_timeframe + + page_is_visible: true, // when true, this page is visible + + auto_refresher_stop_until: 0, // timestamp in ms - used internally, to stop the + // auto-refresher for some time (when a chart is + // performing pan or zoom, we need to stop refreshing + // all other charts, to have the maximum speed for + // rendering the chart that is panned or zoomed). + // Used with .current.global_pan_sync_time + + on_scroll_refresher_stop_until: 0, // timestamp in ms - used to stop evaluating + // charts for some time, after a page scroll + + last_page_resize: Date.now(), // the timestamp of the last resize request + + last_page_scroll: 0, // the timestamp the last time the page was scrolled + + browser_timezone: 'unknown', // timezone detected by javascript + server_timezone: 'unknown', // timezone reported by the server + + force_data_points: 0, // force the number of points to be returned for charts + fake_chart_rendering: false, // when set to true, the dashboard will download data but will not render the charts + + passive_events: null, // true if the browser supports passive events + + // the current profile + // we may have many... + current: { + units: 'auto', // can be 'auto' or 'original' + temperature: 'celsius', // can be 'celsius' or 'fahrenheit' + seconds_as_time: true, // show seconds as DDd:HH:MM:SS ? + timezone: 'default', // the timezone to use, or 'default' + user_set_server_timezone: 'default', // as set by the user on the dashboard + + legend_toolbox: true, // show the legend toolbox on charts + resize_charts: true, // show the resize handler on charts + + pixels_per_point: isSlowDevice() ? 5 : 1, // the minimum pixels per point for all charts + // increase this to speed javascript up + // each chart library has its own limit too + // the max of this and the chart library is used + // the final is calculated every time, so a change + // here will have immediate effect on the next chart + // update + + idle_between_charts: 100, // ms - how much time to wait between chart updates + + fast_render_timeframe: 200, // ms - render continuously until this time of continuous + // rendering has been reached + // this setting is used to make it render e.g. 10 + // charts at once, sleep idle_between_charts time + // and continue for another 10 charts. + + idle_between_loops: 500, // ms - if all charts have been updated, wait this + // time before starting again. + + idle_parallel_loops: 100, // ms - the time between parallel refresher updates + + idle_lost_focus: 500, // ms - when the window does not have focus, check + // if focus has been regained, every this time + + global_pan_sync_time: 300, // ms - when you pan or zoom a chart, the background + // auto-refreshing of charts is paused for this amount + // of time + + sync_selection_delay: 400, // ms - when you pan or zoom a chart, wait this amount + // of time before setting up synchronized selections + // on hover. + + sync_selection: true, // enable or disable selection sync + + pan_and_zoom_delay: 50, // when panning or zooming, how ofter to update the chart + + sync_pan_and_zoom: true, // enable or disable pan and zoom sync + + pan_and_zoom_data_padding: true, // fetch more data for the master chart when panning or zooming + + update_only_visible: true, // enable or disable visibility management / used for printing + + parallel_refresher: !isSlowDevice(), // enable parallel refresh of charts + + concurrent_refreshes: true, // when parallel_refresher is enabled, sync also the charts + + destroy_on_hide: isSlowDevice(), // destroy charts when they are not visible + + show_help: netdataShowHelp, // when enabled the charts will show some help + show_help_delay_show_ms: 500, + show_help_delay_hide_ms: 0, + + eliminate_zero_dimensions: true, // do not show dimensions with just zeros + + stop_updates_when_focus_is_lost: true, // boolean - shall we stop auto-refreshes when document does not have user focus + stop_updates_while_resizing: 1000, // ms - time to stop auto-refreshes while resizing the charts + + double_click_speed: 500, // ms - time between clicks / taps to detect double click/tap + + smooth_plot: !isSlowDevice(), // enable smooth plot, where possible + + color_fill_opacity_line: 1.0, + color_fill_opacity_area: 0.2, + color_fill_opacity_stacked: 0.8, + + pan_and_zoom_factor: 0.25, // the increment when panning and zooming with the toolbox + pan_and_zoom_factor_multiplier_control: 2.0, + pan_and_zoom_factor_multiplier_shift: 3.0, + pan_and_zoom_factor_multiplier_alt: 4.0, + + abort_ajax_on_scroll: false, // kill pending ajax page scroll + async_on_scroll: false, // sync/async onscroll handler + onscroll_worker_duration_threshold: 30, // time in ms, for async scroll handler + + retries_on_data_failures: 3, // how many retries to make if we can't fetch chart data from the server + + setOptionCallback: function () { + } + }, + + debug: { + show_boxes: false, + main_loop: false, + focus: false, + visibility: false, + chart_data_url: false, + chart_errors: true, // remember to set it to false before merging + chart_timing: false, + chart_calls: false, + libraries: false, + dygraph: false, + globalSelectionSync: false, + globalPanAndZoom: false + } +}; + +NETDATA.statistics = { + refreshes_total: 0, + refreshes_active: 0, + refreshes_active_max: 0 +}; + +// local storage options + +NETDATA.localStorage = { + default: {}, + current: {}, + callback: {} // only used for resetting back to defaults +}; + +NETDATA.localStorageTested = -1; +NETDATA.localStorageTest = function () { + if (NETDATA.localStorageTested !== -1) { + return NETDATA.localStorageTested; + } + + if (typeof Storage !== "undefined" && typeof localStorage === 'object') { + let test = 'test'; + try { + localStorage.setItem(test, test); + localStorage.removeItem(test); + NETDATA.localStorageTested = true; + } catch (e) { + NETDATA.localStorageTested = false; + } + } else { + NETDATA.localStorageTested = false; + } + + return NETDATA.localStorageTested; +}; + +NETDATA.localStorageGet = function (key, def, callback) { + let ret = def; + + if (typeof NETDATA.localStorage.default[key.toString()] === 'undefined') { + NETDATA.localStorage.default[key.toString()] = def; + NETDATA.localStorage.callback[key.toString()] = callback; + } + + if (NETDATA.localStorageTest()) { + try { + // console.log('localStorage: loading "' + key.toString() + '"'); + ret = localStorage.getItem(key.toString()); + // console.log('netdata loaded: ' + key.toString() + ' = ' + ret.toString()); + if (ret === null || ret === 'undefined') { + // console.log('localStorage: cannot load it, saving "' + key.toString() + '" with value "' + JSON.stringify(def) + '"'); + localStorage.setItem(key.toString(), JSON.stringify(def)); + ret = def; + } else { + // console.log('localStorage: got "' + key.toString() + '" with value "' + ret + '"'); + ret = JSON.parse(ret); + // console.log('localStorage: loaded "' + key.toString() + '" as value ' + ret + ' of type ' + typeof(ret)); + } + } catch (error) { + console.log('localStorage: failed to read "' + key.toString() + '", using default: "' + def.toString() + '"'); + ret = def; + } + } + + if (typeof ret === 'undefined' || ret === 'undefined') { + console.log('localStorage: LOADED UNDEFINED "' + key.toString() + '" as value ' + ret + ' of type ' + typeof(ret)); + ret = def; + } + + NETDATA.localStorage.current[key.toString()] = ret; + return ret; +}; + +NETDATA.localStorageSet = function (key, value, callback) { + if (typeof value === 'undefined' || value === 'undefined') { + console.log('localStorage: ATTEMPT TO SET UNDEFINED "' + key.toString() + '" as value ' + value + ' of type ' + typeof(value)); + } + + if (typeof NETDATA.localStorage.default[key.toString()] === 'undefined') { + NETDATA.localStorage.default[key.toString()] = value; + NETDATA.localStorage.current[key.toString()] = value; + NETDATA.localStorage.callback[key.toString()] = callback; + } + + if (NETDATA.localStorageTest()) { + // console.log('localStorage: saving "' + key.toString() + '" with value "' + JSON.stringify(value) + '"'); + try { + localStorage.setItem(key.toString(), JSON.stringify(value)); + } catch (e) { + console.log('localStorage: failed to save "' + key.toString() + '" with value: "' + value.toString() + '"'); + } + } + + NETDATA.localStorage.current[key.toString()] = value; + return value; +}; + +NETDATA.localStorageGetRecursive = function (obj, prefix, callback) { + let keys = Object.keys(obj); + let len = keys.length; + while (len--) { + let i = keys[len]; + + if (typeof obj[i] === 'object') { + //console.log('object ' + prefix + '.' + i.toString()); + NETDATA.localStorageGetRecursive(obj[i], prefix + '.' + i.toString(), callback); + continue; + } + + obj[i] = NETDATA.localStorageGet(prefix + '.' + i.toString(), obj[i], callback); + } +}; + +NETDATA.setOption = function (key, value) { + if (key.toString() === 'setOptionCallback') { + if (typeof NETDATA.options.current.setOptionCallback === 'function') { + NETDATA.options.current[key.toString()] = value; + NETDATA.options.current.setOptionCallback(); + } + } else if (NETDATA.options.current[key.toString()] !== value) { + let name = 'options.' + key.toString(); + + if (typeof NETDATA.localStorage.default[name.toString()] === 'undefined') { + console.log('localStorage: setOption() on unsaved option: "' + name.toString() + '", value: ' + value); + } + + //console.log(NETDATA.localStorage); + //console.log('setOption: setting "' + key.toString() + '" to "' + value + '" of type ' + typeof(value) + ' original type ' + typeof(NETDATA.options.current[key.toString()])); + //console.log(NETDATA.options); + NETDATA.options.current[key.toString()] = NETDATA.localStorageSet(name.toString(), value, null); + + if (typeof NETDATA.options.current.setOptionCallback === 'function') { + NETDATA.options.current.setOptionCallback(); + } + } + + return true; +}; + +NETDATA.getOption = function (key) { + return NETDATA.options.current[key.toString()]; +}; + +// read settings from local storage +NETDATA.localStorageGetRecursive(NETDATA.options.current, 'options', null); + +// always start with this option enabled. +NETDATA.setOption('stop_updates_when_focus_is_lost', true); + +NETDATA.resetOptions = function () { + let keys = Object.keys(NETDATA.localStorage.default); + let len = keys.length; + + while (len--) { + let i = keys[len]; + let a = i.split('.'); + + if (a[0] === 'options') { + if (a[1] === 'setOptionCallback') { + continue; + } + if (typeof NETDATA.localStorage.default[i] === 'undefined') { + continue; + } + if (NETDATA.options.current[i] === NETDATA.localStorage.default[i]) { + continue; + } + + NETDATA.setOption(a[1], NETDATA.localStorage.default[i]); + } else if (a[0] === 'chart_heights') { + if (typeof NETDATA.localStorage.callback[i] === 'function' && typeof NETDATA.localStorage.default[i] !== 'undefined') { + NETDATA.localStorage.callback[i](NETDATA.localStorage.default[i]); + } + } + } + + NETDATA.dateTime.init(NETDATA.options.current.timezone); +}; + +// *** src/dashboard.js/timeout.js + +// TODO: Better name needed + +NETDATA.timeout = { + // by default, these are just wrappers to setTimeout() / clearTimeout() + + step: function (callback) { + return window.setTimeout(callback, 1000 / 60); + }, + + set: function (callback, delay) { + return window.setTimeout(callback, delay); + }, + + clear: function (id) { + return window.clearTimeout(id); + }, + + init: function () { + let custom = true; + + if (window.requestAnimationFrame) { + this.step = function (callback) { + return window.requestAnimationFrame(callback); + }; + + this.clear = function (handle) { + return window.cancelAnimationFrame(handle.value); + }; + // } else if (window.webkitRequestAnimationFrame) { + // this.step = function (callback) { + // return window.webkitRequestAnimationFrame(callback); + // }; + + // if (window.webkitCancelAnimationFrame) { + // this.clear = function (handle) { + // return window.webkitCancelAnimationFrame(handle.value); + // }; + // } else if (window.webkitCancelRequestAnimationFrame) { + // this.clear = function (handle) { + // return window.webkitCancelRequestAnimationFrame(handle.value); + // }; + // } + // } else if (window.mozRequestAnimationFrame) { + // this.step = function (callback) { + // return window.mozRequestAnimationFrame(callback); + // }; + + // this.clear = function (handle) { + // return window.mozCancelRequestAnimationFrame(handle.value); + // }; + // } else if (window.oRequestAnimationFrame) { + // this.step = function (callback) { + // return window.oRequestAnimationFrame(callback); + // }; + + // this.clear = function (handle) { + // return window.oCancelRequestAnimationFrame(handle.value); + // }; + // } else if (window.msRequestAnimationFrame) { + // this.step = function (callback) { + // return window.msRequestAnimationFrame(callback); + // }; + + // this.clear = function (handle) { + // return window.msCancelRequestAnimationFrame(handle.value); + // }; + } else { + custom = false; + } + + if (custom) { + // we have installed custom .step() / .clear() functions + // overwrite the .set() too + + this.set = function (callback, delay) { + let start = Date.now(), + handle = new Object(); + + const loop = () => { + let current = Date.now(), + delta = current - start; + + if (delta >= delay) { + callback.call(); + } else { + handle.value = this.step(loop); + } + } + + handle.value = this.step(loop); + return handle; + }; + } + } +}; + +NETDATA.timeout.init(); +// Codacy declarations +/* global netdataTheme */ + +NETDATA.themes = { + white: { + bootstrap_css: NETDATA.serverStatic + 'css/bootstrap-3.3.7.css', + dashboard_css: NETDATA.serverStatic + 'dashboard.css?v20190902-0', + background: '#FFFFFF', + foreground: '#000000', + grid: '#F0F0F0', + axis: '#F0F0F0', + highlight: '#F5F5F5', + colors: ['#3366CC', '#DC3912', '#109618', '#FF9900', '#990099', '#DD4477', + '#3B3EAC', '#66AA00', '#0099C6', '#B82E2E', '#AAAA11', '#5574A6', + '#994499', '#22AA99', '#6633CC', '#E67300', '#316395', '#8B0707', + '#329262', '#3B3EAC'], + easypiechart_track: '#f0f0f0', + easypiechart_scale: '#dfe0e0', + gauge_pointer: '#C0C0C0', + gauge_stroke: '#F0F0F0', + gauge_gradient: false, + d3pie: { + title: '#333333', + subtitle: '#666666', + footer: '#888888', + other: '#aaaaaa', + mainlabel: '#333333', + percentage: '#dddddd', + value: '#aaaa22', + tooltip_bg: '#000000', + tooltip_fg: '#efefef', + segment_stroke: "#ffffff", + gradient_color: '#000000' + } + }, + slate: { + bootstrap_css: NETDATA.serverStatic + 'css/bootstrap-slate-flat-3.3.7.css?v20161229-1', + dashboard_css: NETDATA.serverStatic + 'dashboard.slate.css?v20190902-0', + background: '#272b30', + foreground: '#C8C8C8', + grid: '#283236', + axis: '#283236', + highlight: '#383838', + /* colors: [ '#55bb33', '#ff2222', '#0099C6', '#faa11b', '#adbce0', '#DDDD00', + '#4178ba', '#f58122', '#a5cc39', '#f58667', '#f5ef89', '#cf93c0', + '#a5d18a', '#b8539d', '#3954a3', '#c8a9cf', '#c7de8a', '#fad20a', + '#a6a479', '#a66da8' ], + */ + colors: ['#66AA00', '#FE3912', '#3366CC', '#D66300', '#0099C6', '#DDDD00', + '#5054e6', '#EE9911', '#BB44CC', '#e45757', '#ef0aef', '#CC7700', + '#22AA99', '#109618', '#905bfd', '#f54882', '#4381bf', '#ff3737', + '#329262', '#3B3EFF'], + easypiechart_track: '#373b40', + easypiechart_scale: '#373b40', + gauge_pointer: '#474b50', + gauge_stroke: '#373b40', + gauge_gradient: false, + d3pie: { + title: '#C8C8C8', + subtitle: '#283236', + footer: '#283236', + other: '#283236', + mainlabel: '#C8C8C8', + percentage: '#dddddd', + value: '#cccc44', + tooltip_bg: '#272b30', + tooltip_fg: '#C8C8C8', + segment_stroke: "#283236", + gradient_color: '#000000' + } + } +}; + +if (typeof netdataTheme !== 'undefined' && typeof NETDATA.themes[netdataTheme] !== 'undefined') { + NETDATA.themes.current = NETDATA.themes[netdataTheme]; +} else { + NETDATA.themes.current = NETDATA.themes.white; +} + +NETDATA.colors = NETDATA.themes.current.colors; + +// these are the colors Google Charts are using +// we have them here to attempt emulate their look and feel on the other chart libraries +// http://there4.io/2012/05/02/google-chart-color-list/ +//NETDATA.colors = [ '#3366CC', '#DC3912', '#FF9900', '#109618', '#990099', '#3B3EAC', '#0099C6', +// '#DD4477', '#66AA00', '#B82E2E', '#316395', '#994499', '#22AA99', '#AAAA11', +// '#6633CC', '#E67300', '#8B0707', '#329262', '#5574A6', '#3B3EAC' ]; + +// an alternative set +// http://www.mulinblog.com/a-color-palette-optimized-for-data-visualization/ +// (blue) (red) (orange) (green) (pink) (brown) (purple) (yellow) (gray) +//NETDATA.colors = [ '#5DA5DA', '#F15854', '#FAA43A', '#60BD68', '#F17CB0', '#B2912F', '#B276B2', '#DECF3F', '#4D4D4D' ]; +// dygraph + +// Codacy declarations +/* global smoothPlotter */ +/* global Dygraph */ + +NETDATA.dygraph = { + smooth: false +}; + +NETDATA.dygraphToolboxPanAndZoom = function (state, after, before) { + if (after < state.netdata_first) { + after = state.netdata_first; + } + + if (before > state.netdata_last) { + before = state.netdata_last; + } + + state.setMode('zoom'); + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + state.tmp.dygraph_user_action = true; + state.tmp.dygraph_force_zoom = true; + // state.log('toolboxPanAndZoom'); + state.updateChartPanOrZoom(after, before); + NETDATA.globalPanAndZoom.setMaster(state, after, before); +}; + +NETDATA.dygraphSetSelection = function (state, t) { + if (typeof state.tmp.dygraph_instance !== 'undefined') { + let r = state.calculateRowForTime(t); + if (r !== -1) { + state.tmp.dygraph_instance.setSelection(r); + return true; + } else { + state.tmp.dygraph_instance.clearSelection(); + state.legendShowUndefined(); + } + } + + return false; +}; + +NETDATA.dygraphClearSelection = function (state) { + if (typeof state.tmp.dygraph_instance !== 'undefined') { + state.tmp.dygraph_instance.clearSelection(); + } + return true; +}; + +NETDATA.dygraphSmoothInitialize = function (callback) { + $.ajax({ + url: NETDATA.dygraph_smooth_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.dygraph.smooth = true; + smoothPlotter.smoothing = 0.3; + }) + .fail(function () { + NETDATA.dygraph.smooth = false; + }) + .always(function () { + if (typeof callback === "function") { + return callback(); + } + }); +}; + +NETDATA.dygraphInitialize = function (callback) { + if (typeof netdataNoDygraphs === 'undefined' || !netdataNoDygraphs) { + $.ajax({ + url: NETDATA.dygraph_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('dygraph', NETDATA.dygraph_js); + }) + .fail(function () { + NETDATA.chartLibraries.dygraph.enabled = false; + NETDATA.error(100, NETDATA.dygraph_js); + }) + .always(function () { + if (NETDATA.chartLibraries.dygraph.enabled && NETDATA.options.current.smooth_plot) { + NETDATA.dygraphSmoothInitialize(callback); + } else if (typeof callback === "function") { + return callback(); + } + }); + } else { + NETDATA.chartLibraries.dygraph.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.dygraphChartUpdate = function (state, data) { + let dygraph = state.tmp.dygraph_instance; + + if (typeof dygraph === 'undefined') { + return NETDATA.dygraphChartCreate(state, data); + } + + // when the chart is not visible, and hidden + // if there is a window resize, dygraph detects + // its element size as 0x0. + // this will make it re-appear properly + + if (state.tm.last_unhidden > state.tmp.dygraph_last_rendered) { + dygraph.resize(); + } + + let options = { + file: data.result.data, + colors: state.chartColors(), + labels: data.result.labels, + //labelsDivWidth: state.chartWidth() - 70, + includeZero: state.tmp.dygraph_include_zero, + visibility: state.dimensions_visibility.selected2BooleanArray(state.data.dimension_names) + }; + + if (state.tmp.dygraph_chart_type === 'stacked') { + if (options.includeZero && state.dimensions_visibility.countSelected() < options.visibility.length) { + options.includeZero = 0; + } + } + + if (!NETDATA.chartLibraries.dygraph.isSparkline(state)) { + options.ylabel = state.units_current; // (state.units_desired === 'auto')?"":state.units_current; + } + + if (state.tmp.dygraph_force_zoom) { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('dygraphChartUpdate() forced zoom update'); + } + + options.dateWindow = (state.requested_padding !== null) ? [state.view_after, state.view_before] : null; + //options.isZoomedIgnoreProgrammaticZoom = true; + state.tmp.dygraph_force_zoom = false; + } else if (state.current.name !== 'auto') { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('dygraphChartUpdate() loose update'); + } + } else { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('dygraphChartUpdate() strict update'); + } + + options.dateWindow = (state.requested_padding !== null) ? [state.view_after, state.view_before] : null; + //options.isZoomedIgnoreProgrammaticZoom = true; + } + + options.valueRange = state.tmp.dygraph_options.valueRange; + + let oldMax = null, oldMin = null; + if (state.tmp.__commonMin !== null) { + state.data.min = state.tmp.dygraph_instance.axes_[0].extremeRange[0]; + oldMin = options.valueRange[0] = NETDATA.commonMin.get(state); + } + if (state.tmp.__commonMax !== null) { + state.data.max = state.tmp.dygraph_instance.axes_[0].extremeRange[1]; + oldMax = options.valueRange[1] = NETDATA.commonMax.get(state); + } + + if (state.tmp.dygraph_smooth_eligible) { + if ((NETDATA.options.current.smooth_plot && state.tmp.dygraph_options.plotter !== smoothPlotter) + || (NETDATA.options.current.smooth_plot === false && state.tmp.dygraph_options.plotter === smoothPlotter)) { + NETDATA.dygraphChartCreate(state, data); + return; + } + } + + if (netdataSnapshotData !== null && NETDATA.globalPanAndZoom.isActive() && NETDATA.globalPanAndZoom.isMaster(state) === false) { + // pan and zoom on snapshots + options.dateWindow = [NETDATA.globalPanAndZoom.force_after_ms, NETDATA.globalPanAndZoom.force_before_ms]; + //options.isZoomedIgnoreProgrammaticZoom = true; + } + + if (NETDATA.chartLibraries.dygraph.isLogScale(state)) { + if (Array.isArray(options.valueRange) && options.valueRange[0] <= 0) { + options.valueRange[0] = null; + } + } + + dygraph.updateOptions(options); + + let redraw = false; + if (oldMin !== null && oldMin > state.tmp.dygraph_instance.axes_[0].extremeRange[0]) { + state.data.min = state.tmp.dygraph_instance.axes_[0].extremeRange[0]; + options.valueRange[0] = NETDATA.commonMin.get(state); + redraw = true; + } + if (oldMax !== null && oldMax < state.tmp.dygraph_instance.axes_[0].extremeRange[1]) { + state.data.max = state.tmp.dygraph_instance.axes_[0].extremeRange[1]; + options.valueRange[1] = NETDATA.commonMax.get(state); + redraw = true; + } + + if (redraw) { + // state.log('forcing redraw to adapt to common- min/max'); + dygraph.updateOptions(options); + } + + state.tmp.dygraph_last_rendered = Date.now(); + return true; +}; + +NETDATA.dygraphChartCreate = function (state, data) { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('dygraphChartCreate()'); + } + + state.tmp.dygraph_chart_type = NETDATA.dataAttribute(state.element, 'dygraph-type', state.chart.chart_type); + if (state.tmp.dygraph_chart_type === 'stacked' && data.dimensions === 1) { + state.tmp.dygraph_chart_type = 'area'; + } + if (state.tmp.dygraph_chart_type === 'stacked' && NETDATA.chartLibraries.dygraph.isLogScale(state)) { + state.tmp.dygraph_chart_type = 'area'; + } + + let highlightCircleSize = NETDATA.chartLibraries.dygraph.isSparkline(state) ? 3 : 4; + + let smooth = NETDATA.dygraph.smooth + ? (NETDATA.dataAttributeBoolean(state.element, 'dygraph-smooth', (state.tmp.dygraph_chart_type === 'line' && NETDATA.chartLibraries.dygraph.isSparkline(state) === false))) + : false; + + state.tmp.dygraph_include_zero = NETDATA.dataAttribute(state.element, 'dygraph-includezero', (state.tmp.dygraph_chart_type === 'stacked')); + let drawAxis = NETDATA.dataAttributeBoolean(state.element, 'dygraph-drawaxis', true); + + state.tmp.dygraph_options = { + colors: NETDATA.dataAttribute(state.element, 'dygraph-colors', state.chartColors()), + + // leave a few pixels empty on the right of the chart + rightGap: NETDATA.dataAttribute(state.element, 'dygraph-rightgap', 5), + showRangeSelector: NETDATA.dataAttributeBoolean(state.element, 'dygraph-showrangeselector', false), + showRoller: NETDATA.dataAttributeBoolean(state.element, 'dygraph-showroller', false), + title: NETDATA.dataAttribute(state.element, 'dygraph-title', state.title), + titleHeight: NETDATA.dataAttribute(state.element, 'dygraph-titleheight', 19), + legend: NETDATA.dataAttribute(state.element, 'dygraph-legend', 'always'), // we need this to get selection events + labels: data.result.labels, + labelsDiv: NETDATA.dataAttribute(state.element, 'dygraph-labelsdiv', state.element_legend_childs.hidden), + //labelsDivStyles: NETDATA.dataAttribute(state.element, 'dygraph-labelsdivstyles', { 'fontSize':'1px' }), + //labelsDivWidth: NETDATA.dataAttribute(state.element, 'dygraph-labelsdivwidth', state.chartWidth() - 70), + labelsSeparateLines: NETDATA.dataAttributeBoolean(state.element, 'dygraph-labelsseparatelines', true), + labelsShowZeroValues: NETDATA.chartLibraries.dygraph.isLogScale(state) ? false : NETDATA.dataAttributeBoolean(state.element, 'dygraph-labelsshowzerovalues', true), + labelsKMB: false, + labelsKMG2: false, + showLabelsOnHighlight: NETDATA.dataAttributeBoolean(state.element, 'dygraph-showlabelsonhighlight', true), + hideOverlayOnMouseOut: NETDATA.dataAttributeBoolean(state.element, 'dygraph-hideoverlayonmouseout', true), + includeZero: state.tmp.dygraph_include_zero, + xRangePad: NETDATA.dataAttribute(state.element, 'dygraph-xrangepad', 0), + yRangePad: NETDATA.dataAttribute(state.element, 'dygraph-yrangepad', 1), + valueRange: NETDATA.dataAttribute(state.element, 'dygraph-valuerange', [null, null]), + ylabel: state.units_current, // (state.units_desired === 'auto')?"":state.units_current, + yLabelWidth: NETDATA.dataAttribute(state.element, 'dygraph-ylabelwidth', 12), + + // the function to plot the chart + plotter: null, + + // The width of the lines connecting data points. + // This can be used to increase the contrast or some graphs. + strokeWidth: NETDATA.dataAttribute(state.element, 'dygraph-strokewidth', ((state.tmp.dygraph_chart_type === 'stacked') ? 0.1 : ((smooth === true) ? 1.5 : 0.7))), + strokePattern: NETDATA.dataAttribute(state.element, 'dygraph-strokepattern', undefined), + + // The size of the dot to draw on each point in pixels (see drawPoints). + // A dot is always drawn when a point is "isolated", + // i.e. there is a missing point on either side of it. + // This also controls the size of those dots. + drawPoints: NETDATA.dataAttributeBoolean(state.element, 'dygraph-drawpoints', false), + + // Draw points at the edges of gaps in the data. + // This improves visibility of small data segments or other data irregularities. + drawGapEdgePoints: NETDATA.dataAttributeBoolean(state.element, 'dygraph-drawgapedgepoints', true), + connectSeparatedPoints: NETDATA.chartLibraries.dygraph.isLogScale(state) ? false : NETDATA.dataAttributeBoolean(state.element, 'dygraph-connectseparatedpoints', false), + pointSize: NETDATA.dataAttribute(state.element, 'dygraph-pointsize', 1), + + // enabling this makes the chart with little square lines + stepPlot: NETDATA.dataAttributeBoolean(state.element, 'dygraph-stepplot', false), + + // Draw a border around graph lines to make crossing lines more easily + // distinguishable. Useful for graphs with many lines. + strokeBorderColor: NETDATA.dataAttribute(state.element, 'dygraph-strokebordercolor', NETDATA.themes.current.background), + strokeBorderWidth: NETDATA.dataAttribute(state.element, 'dygraph-strokeborderwidth', (state.tmp.dygraph_chart_type === 'stacked') ? 0.0 : 0.0), + fillGraph: NETDATA.dataAttribute(state.element, 'dygraph-fillgraph', (state.tmp.dygraph_chart_type === 'area' || state.tmp.dygraph_chart_type === 'stacked')), + fillAlpha: NETDATA.dataAttribute(state.element, 'dygraph-fillalpha', + ((state.tmp.dygraph_chart_type === 'stacked') + ? NETDATA.options.current.color_fill_opacity_stacked + : NETDATA.options.current.color_fill_opacity_area) + ), + stackedGraph: NETDATA.dataAttribute(state.element, 'dygraph-stackedgraph', (state.tmp.dygraph_chart_type === 'stacked')), + stackedGraphNaNFill: NETDATA.dataAttribute(state.element, 'dygraph-stackedgraphnanfill', 'none'), + drawAxis: drawAxis, + axisLabelFontSize: NETDATA.dataAttribute(state.element, 'dygraph-axislabelfontsize', 10), + axisLineColor: NETDATA.dataAttribute(state.element, 'dygraph-axislinecolor', NETDATA.themes.current.axis), + axisLineWidth: NETDATA.dataAttribute(state.element, 'dygraph-axislinewidth', 1.0), + drawGrid: NETDATA.dataAttributeBoolean(state.element, 'dygraph-drawgrid', true), + gridLinePattern: NETDATA.dataAttribute(state.element, 'dygraph-gridlinepattern', null), + gridLineWidth: NETDATA.dataAttribute(state.element, 'dygraph-gridlinewidth', 1.0), + gridLineColor: NETDATA.dataAttribute(state.element, 'dygraph-gridlinecolor', NETDATA.themes.current.grid), + maxNumberWidth: NETDATA.dataAttribute(state.element, 'dygraph-maxnumberwidth', 8), + sigFigs: NETDATA.dataAttribute(state.element, 'dygraph-sigfigs', null), + digitsAfterDecimal: NETDATA.dataAttribute(state.element, 'dygraph-digitsafterdecimal', 2), + valueFormatter: NETDATA.dataAttribute(state.element, 'dygraph-valueformatter', undefined), + highlightCircleSize: NETDATA.dataAttribute(state.element, 'dygraph-highlightcirclesize', highlightCircleSize), + highlightSeriesOpts: NETDATA.dataAttribute(state.element, 'dygraph-highlightseriesopts', null), // TOO SLOW: { strokeWidth: 1.5 }, + highlightSeriesBackgroundAlpha: NETDATA.dataAttribute(state.element, 'dygraph-highlightseriesbackgroundalpha', null), // TOO SLOW: (state.tmp.dygraph_chart_type === 'stacked')?0.7:0.5, + pointClickCallback: NETDATA.dataAttribute(state.element, 'dygraph-pointclickcallback', undefined), + visibility: state.dimensions_visibility.selected2BooleanArray(state.data.dimension_names), + logscale: NETDATA.chartLibraries.dygraph.isLogScale(state) ? 'y' : undefined, + + axes: { + x: { + pixelsPerLabel: NETDATA.dataAttribute(state.element, 'dygraph-xpixelsperlabel', 50), + ticker: Dygraph.dateTicker, + axisLabelWidth: NETDATA.dataAttribute(state.element, 'dygraph-xaxislabelwidth', 60), + drawAxis: NETDATA.dataAttributeBoolean(state.element, 'dygraph-drawxaxis', drawAxis), + axisLabelFormatter: function (d, gran) { + void(gran); + return NETDATA.dateTime.xAxisTimeString(d); + } + }, + y: { + logscale: NETDATA.chartLibraries.dygraph.isLogScale(state) ? true : undefined, + pixelsPerLabel: NETDATA.dataAttribute(state.element, 'dygraph-ypixelsperlabel', 15), + axisLabelWidth: NETDATA.dataAttribute(state.element, 'dygraph-yaxislabelwidth', 50), + drawAxis: NETDATA.dataAttributeBoolean(state.element, 'dygraph-drawyaxis', drawAxis), + axisLabelFormatter: function (y) { + + // unfortunately, we have to call this every single time + state.legendFormatValueDecimalsFromMinMax( + this.axes_[0].extremeRange[0], + this.axes_[0].extremeRange[1] + ); + + let old_units = this.user_attrs_.ylabel; + let v = state.legendFormatValue(y); + let new_units = state.units_current; + + if (state.units_desired === 'auto' && typeof old_units !== 'undefined' && new_units !== old_units && !NETDATA.chartLibraries.dygraph.isSparkline(state)) { + // console.log(this); + // state.log('units discrepancy: old = ' + old_units + ', new = ' + new_units); + let len = this.plugins_.length; + while (len--) { + // console.log(this.plugins_[len]); + if (typeof this.plugins_[len].plugin.ylabel_div_ !== 'undefined' + && this.plugins_[len].plugin.ylabel_div_ !== null + && typeof this.plugins_[len].plugin.ylabel_div_.children !== 'undefined' + && this.plugins_[len].plugin.ylabel_div_.children !== null + && typeof this.plugins_[len].plugin.ylabel_div_.children[0].children !== 'undefined' + && this.plugins_[len].plugin.ylabel_div_.children[0].children !== null + ) { + this.plugins_[len].plugin.ylabel_div_.children[0].children[0].innerHTML = new_units; + this.user_attrs_.ylabel = new_units; + break; + } + } + + if (len < 0) { + state.log('units discrepancy, but cannot find dygraphs div to change: old = ' + old_units + ', new = ' + new_units); + } + } + + return v; + } + } + }, + legendFormatter: function (data) { + if (state.tmp.dygraph_mouse_down) { + return; + } + + let elements = state.element_legend_childs; + + // if the hidden div is not there + // we are not managing the legend + if (elements.hidden === null) { + return; + } + + if (typeof data.x !== 'undefined') { + state.legendSetDate(data.x); + let i = data.series.length; + while (i--) { + let series = data.series[i]; + if (series.isVisible) { + state.legendSetLabelValue(series.label, series.y); + } else { + state.legendSetLabelValue(series.label, null); + } + } + } + + return ''; + }, + drawCallback: function (dygraph, is_initial) { + + // the user has panned the chart and this is called to re-draw the chart + // 1. refresh this chart by adding data to it + // 2. notify all the other charts about the update they need + + // to prevent an infinite loop (feedback), we use + // state.tmp.dygraph_user_action + // - when true, this is initiated by a user + // - when false, this is feedback + + if (state.current.name !== 'auto' && state.tmp.dygraph_user_action) { + state.tmp.dygraph_user_action = false; + + let x_range = dygraph.xAxisRange(); + let after = Math.round(x_range[0]); + let before = Math.round(x_range[1]); + + if (NETDATA.options.debug.dygraph) { + state.log('dygraphDrawCallback(dygraph, ' + is_initial + '): mode ' + state.current.name + ' ' + (after / 1000).toString() + ' - ' + (before / 1000).toString()); + //console.log(state); + } + + if (before <= state.netdata_last && after >= state.netdata_first) { + // update only when we are within the data limits + state.updateChartPanOrZoom(after, before); + } + } + }, + zoomCallback: function (minDate, maxDate, yRanges) { + + // the user has selected a range on the chart + // 1. refresh this chart by adding data to it + // 2. notify all the other charts about the update they need + + void(yRanges); + + if (NETDATA.options.debug.dygraph) { + state.log('dygraphZoomCallback(): ' + state.current.name); + } + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + state.setMode('zoom'); + + // refresh it to the greatest possible zoom level + state.tmp.dygraph_user_action = true; + state.tmp.dygraph_force_zoom = true; + state.updateChartPanOrZoom(minDate, maxDate); + }, + highlightCallback: function (event, x, points, row, seriesName) { + void(seriesName); + + state.pauseChart(); + + // there is a bug in dygraph when the chart is zoomed enough + // the time it thinks is selected is wrong + // here we calculate the time t based on the row number selected + // which is ok + // let t = state.data_after + row * state.data_update_every; + // console.log('row = ' + row + ', x = ' + x + ', t = ' + t + ' ' + ((t === x)?'SAME':(Math.abs(x-t)<=state.data_update_every)?'SIMILAR':'DIFFERENT') + ', rows in db: ' + state.data_points + ' visible(x) = ' + state.timeIsVisible(x) + ' visible(t) = ' + state.timeIsVisible(t) + ' r(x) = ' + state.calculateRowForTime(x) + ' r(t) = ' + state.calculateRowForTime(t) + ' range: ' + state.data_after + ' - ' + state.data_before + ' real: ' + state.data.after + ' - ' + state.data.before + ' every: ' + state.data_update_every); + + if (state.tmp.dygraph_mouse_down !== true) { + NETDATA.globalSelectionSync.sync(state, x); + } + + // fix legend zIndex using the internal structures of dygraph legend module + // this works, but it is a hack! + // state.tmp.dygraph_instance.plugins_[0].plugin.legend_div_.style.zIndex = 10000; + }, + unhighlightCallback: function (event) { + void(event); + + if (state.tmp.dygraph_mouse_down) { + return; + } + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('dygraphUnhighlightCallback()'); + } + + state.unpauseChart(); + NETDATA.globalSelectionSync.stop(); + }, + underlayCallback: function (canvas, area, g) { + // the chart is about to be drawn + + // update history_tip_element + if (state.tmp.dygraph_history_tip_element) { + const xHookRightSide = g.toDomXCoord(state.netdata_first); + if (xHookRightSide > area.x) { + state.tmp.dygraph_history_tip_element_displayed = true; + // group the styles for possible better performance + state.tmp.dygraph_history_tip_element.setAttribute( + 'style', + `display: block; left: ${area.x}px; right: calc(100% - ${xHookRightSide}px);` + ) + } else { + if (state.tmp.dygraph_history_tip_element_displayed) { + // additional check just for performance + // don't update the DOM when it's not needed + state.tmp.dygraph_history_tip_element.style.display = 'none'; + state.tmp.dygraph_history_tip_element_displayed = false; + } + } + } + + // this function renders global highlighted time-frame + + if (NETDATA.globalChartUnderlay.isActive()) { + let after = NETDATA.globalChartUnderlay.after; + let before = NETDATA.globalChartUnderlay.before; + + if (after < state.view_after) { + after = state.view_after; + } + + if (before > state.view_before) { + before = state.view_before; + } + + if (after < before) { + let bottom_left = g.toDomCoords(after, -20); + let top_right = g.toDomCoords(before, +20); + + let left = bottom_left[0]; + let right = top_right[0]; + + canvas.fillStyle = NETDATA.themes.current.highlight; + canvas.fillRect(left, area.y, right - left, area.h); + } + } + }, + interactionModel: { + mousedown: function (event, dygraph, context) { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.mousedown()'); + } + + state.tmp.dygraph_user_action = true; + + if (NETDATA.options.debug.dygraph) { + state.log('dygraphMouseDown()'); + } + + // Right-click should not initiate anything. + if (event.button && event.button === 2) { + return; + } + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_mouse_down = true; + context.initializeMouseDown(event, dygraph, context); + + //console.log(event); + if (event.button && event.button === 1) { + if (event.shiftKey) { + //console.log('middle mouse button dragging (PAN)'); + + state.setMode('pan'); + // NETDATA.globalSelectionSync.delay(); + state.tmp.dygraph_highlight_after = null; + Dygraph.startPan(event, dygraph, context); + } else if (event.altKey || event.ctrlKey || event.metaKey) { + //console.log('middle mouse button highlight'); + + if (!(event.offsetX && event.offsetY)) { + event.offsetX = event.layerX - event.target.offsetLeft; + event.offsetY = event.layerY - event.target.offsetTop; + } + state.tmp.dygraph_highlight_after = dygraph.toDataXCoord(event.offsetX); + Dygraph.startZoom(event, dygraph, context); + } else { + //console.log('middle mouse button selection for zoom (ZOOM)'); + + state.setMode('zoom'); + // NETDATA.globalSelectionSync.delay(); + state.tmp.dygraph_highlight_after = null; + Dygraph.startZoom(event, dygraph, context); + } + } else { + if (event.shiftKey) { + //console.log('left mouse button selection for zoom (ZOOM)'); + + state.setMode('zoom'); + // NETDATA.globalSelectionSync.delay(); + state.tmp.dygraph_highlight_after = null; + Dygraph.startZoom(event, dygraph, context); + } else if (event.altKey || event.ctrlKey || event.metaKey) { + //console.log('left mouse button highlight'); + + if (!(event.offsetX && event.offsetY)) { + event.offsetX = event.layerX - event.target.offsetLeft; + event.offsetY = event.layerY - event.target.offsetTop; + } + state.tmp.dygraph_highlight_after = dygraph.toDataXCoord(event.offsetX); + Dygraph.startZoom(event, dygraph, context); + } else { + //console.log('left mouse button dragging (PAN)'); + + state.setMode('pan'); + // NETDATA.globalSelectionSync.delay(); + state.tmp.dygraph_highlight_after = null; + Dygraph.startPan(event, dygraph, context); + } + } + }, + mousemove: function (event, dygraph, context) { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.mousemove()'); + } + + if (state.tmp.dygraph_highlight_after !== null) { + //console.log('highlight selection...'); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + Dygraph.moveZoom(event, dygraph, context); + event.preventDefault(); + } else if (context.isPanning) { + //console.log('panning...'); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + //NETDATA.globalSelectionSync.stop(); + //NETDATA.globalSelectionSync.delay(); + state.setMode('pan'); + context.is2DPan = false; + Dygraph.movePan(event, dygraph, context); + } else if (context.isZooming) { + //console.log('zooming...'); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + //NETDATA.globalSelectionSync.stop(); + //NETDATA.globalSelectionSync.delay(); + state.setMode('zoom'); + Dygraph.moveZoom(event, dygraph, context); + } + }, + mouseup: function (event, dygraph, context) { + state.tmp.dygraph_mouse_down = false; + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.mouseup()'); + } + + if (state.tmp.dygraph_highlight_after !== null) { + //console.log('done highlight selection'); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + if (!(event.offsetX && event.offsetY)) { + event.offsetX = event.layerX - event.target.offsetLeft; + event.offsetY = event.layerY - event.target.offsetTop; + } + + NETDATA.globalChartUnderlay.set(state + , state.tmp.dygraph_highlight_after + , dygraph.toDataXCoord(event.offsetX) + , state.view_after + , state.view_before + ); + + state.tmp.dygraph_highlight_after = null; + + context.isZooming = false; + dygraph.clearZoomRect_(); + dygraph.drawGraph_(false); + + // refresh all the charts immediately + NETDATA.options.auto_refresher_stop_until = 0; + } else if (context.isPanning) { + //console.log('done panning'); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + Dygraph.endPan(event, dygraph, context); + + // refresh all the charts immediately + NETDATA.options.auto_refresher_stop_until = 0; + } else if (context.isZooming) { + //console.log('done zomming'); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + Dygraph.endZoom(event, dygraph, context); + + // refresh all the charts immediately + NETDATA.options.auto_refresher_stop_until = 0; + } + }, + click: function (event, dygraph, context) { + void(dygraph); + void(context); + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.click()'); + } + + event.preventDefault(); + }, + dblclick: function (event, dygraph, context) { + void(event); + void(dygraph); + void(context); + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.dblclick()'); + } + NETDATA.resetAllCharts(state); + }, + wheel: function (event, dygraph, context) { + void(context); + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.wheel()'); + } + + // Take the offset of a mouse event on the dygraph canvas and + // convert it to a pair of percentages from the bottom left. + // (Not top left, bottom is where the lower value is.) + function offsetToPercentage(g, offsetX, offsetY) { + // This is calculating the pixel offset of the leftmost date. + let xOffset = g.toDomCoords(g.xAxisRange()[0], null)[0]; + let yar0 = g.yAxisRange(0); + + // This is calculating the pixel of the highest value. (Top pixel) + let yOffset = g.toDomCoords(null, yar0[1])[1]; + + // x y w and h are relative to the corner of the drawing area, + // so that the upper corner of the drawing area is (0, 0). + let x = offsetX - xOffset; + let y = offsetY - yOffset; + + // This is computing the rightmost pixel, effectively defining the + // width. + let w = g.toDomCoords(g.xAxisRange()[1], null)[0] - xOffset; + + // This is computing the lowest pixel, effectively defining the height. + let h = g.toDomCoords(null, yar0[0])[1] - yOffset; + + // Percentage from the left. + let xPct = w === 0 ? 0 : (x / w); + // Percentage from the top. + let yPct = h === 0 ? 0 : (y / h); + + // The (1-) part below changes it from "% distance down from the top" + // to "% distance up from the bottom". + return [xPct, (1 - yPct)]; + } + + // Adjusts [x, y] toward each other by zoomInPercentage% + // Split it so the left/bottom axis gets xBias/yBias of that change and + // tight/top gets (1-xBias)/(1-yBias) of that change. + // + // If a bias is missing it splits it down the middle. + function zoomRange(g, zoomInPercentage, xBias, yBias) { + xBias = xBias || 0.5; + yBias = yBias || 0.5; + + function adjustAxis(axis, zoomInPercentage, bias) { + let delta = axis[1] - axis[0]; + let increment = delta * zoomInPercentage; + let foo = [increment * bias, increment * (1 - bias)]; + + return [axis[0] + foo[0], axis[1] - foo[1]]; + } + + let yAxes = g.yAxisRanges(); + let newYAxes = []; + for (let i = 0; i < yAxes.length; i++) { + newYAxes[i] = adjustAxis(yAxes[i], zoomInPercentage, yBias); + } + + return adjustAxis(g.xAxisRange(), zoomInPercentage, xBias); + } + + if (event.altKey || event.shiftKey) { + state.tmp.dygraph_user_action = true; + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + // http://dygraphs.com/gallery/interaction-api.js + let normal_def; + if (typeof event.wheelDelta === 'number' && !isNaN(event.wheelDelta)) + // chrome + { + normal_def = event.wheelDelta / 40; + } else + // firefox + { + normal_def = event.deltaY * -1.2; + } + + let normal = (event.detail) ? event.detail * -1 : normal_def; + let percentage = normal / 50; + + if (!(event.offsetX && event.offsetY)) { + event.offsetX = event.layerX - event.target.offsetLeft; + event.offsetY = event.layerY - event.target.offsetTop; + } + + let percentages = offsetToPercentage(dygraph, event.offsetX, event.offsetY); + let xPct = percentages[0]; + let yPct = percentages[1]; + + let new_x_range = zoomRange(dygraph, percentage, xPct, yPct); + let after = new_x_range[0]; + let before = new_x_range[1]; + + let first = state.netdata_first + state.data_update_every; + let last = state.netdata_last + state.data_update_every; + + if (before > last) { + after -= (before - last); + before = last; + } + if (after < first) { + after = first; + } + + state.setMode('zoom'); + state.updateChartPanOrZoom(after, before, function () { + dygraph.updateOptions({dateWindow: [after, before]}); + }); + + event.preventDefault(); + } + }, + touchstart: function (event, dygraph, context) { + state.tmp.dygraph_mouse_down = true; + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.touchstart()'); + } + + state.tmp.dygraph_user_action = true; + state.setMode('zoom'); + state.pauseChart(); + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + Dygraph.defaultInteractionModel.touchstart(event, dygraph, context); + + // we overwrite the touch directions at the end, to overwrite + // the internal default of dygraph + context.touchDirections = {x: true, y: false}; + + state.dygraph_last_touch_start = Date.now(); + state.dygraph_last_touch_move = 0; + + if (typeof event.touches[0].pageX === 'number') { + state.dygraph_last_touch_page_x = event.touches[0].pageX; + } else { + state.dygraph_last_touch_page_x = 0; + } + }, + touchmove: function (event, dygraph, context) { + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.touchmove()'); + } + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + Dygraph.defaultInteractionModel.touchmove(event, dygraph, context); + + state.dygraph_last_touch_move = Date.now(); + }, + touchend: function (event, dygraph, context) { + state.tmp.dygraph_mouse_down = false; + + if (NETDATA.options.debug.dygraph || state.debug) { + state.log('interactionModel.touchend()'); + } + + NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.delay(); + + state.tmp.dygraph_user_action = true; + Dygraph.defaultInteractionModel.touchend(event, dygraph, context); + + // if it didn't move, it is a selection + if (state.dygraph_last_touch_move === 0 && state.dygraph_last_touch_page_x !== 0) { + NETDATA.globalSelectionSync.dontSyncBefore = 0; + NETDATA.globalSelectionSync.setMaster(state); + + // internal api of dygraph + let pct = (state.dygraph_last_touch_page_x - (dygraph.plotter_.area.x + state.element.getBoundingClientRect().left)) / dygraph.plotter_.area.w; + console.log('pct: ' + pct.toString()); + + let t = Math.round(state.view_after + (state.view_before - state.view_after) * pct); + if (NETDATA.dygraphSetSelection(state, t)) { + NETDATA.globalSelectionSync.sync(state, t); + } + } + + // if it was double tap within double click time, reset the charts + let now = Date.now(); + if (typeof state.dygraph_last_touch_end !== 'undefined') { + if (state.dygraph_last_touch_move === 0) { + let dt = now - state.dygraph_last_touch_end; + if (dt <= NETDATA.options.current.double_click_speed) { + NETDATA.resetAllCharts(state); + } + } + } + + // remember the timestamp of the last touch end + state.dygraph_last_touch_end = now; + + // refresh all the charts immediately + NETDATA.options.auto_refresher_stop_until = 0; + } + } + }; + + if (NETDATA.chartLibraries.dygraph.isLogScale(state)) { + if (Array.isArray(state.tmp.dygraph_options.valueRange) && state.tmp.dygraph_options.valueRange[0] <= 0) { + state.tmp.dygraph_options.valueRange[0] = null; + } + } + + if (NETDATA.chartLibraries.dygraph.isSparkline(state)) { + state.tmp.dygraph_options.drawGrid = false; + state.tmp.dygraph_options.drawAxis = false; + state.tmp.dygraph_options.title = undefined; + state.tmp.dygraph_options.ylabel = undefined; + state.tmp.dygraph_options.yLabelWidth = 0; + //state.tmp.dygraph_options.labelsDivWidth = 120; + //state.tmp.dygraph_options.labelsDivStyles.width = '120px'; + state.tmp.dygraph_options.labelsSeparateLines = true; + state.tmp.dygraph_options.rightGap = 0; + state.tmp.dygraph_options.yRangePad = 1; + state.tmp.dygraph_options.axes.x.drawAxis = false; + state.tmp.dygraph_options.axes.y.drawAxis = false; + } + + if (smooth) { + state.tmp.dygraph_smooth_eligible = true; + + if (NETDATA.options.current.smooth_plot) { + state.tmp.dygraph_options.plotter = smoothPlotter; + } + } + else { + state.tmp.dygraph_smooth_eligible = false; + } + + if (netdataSnapshotData !== null && NETDATA.globalPanAndZoom.isActive() && NETDATA.globalPanAndZoom.isMaster(state) === false) { + // pan and zoom on snapshots + state.tmp.dygraph_options.dateWindow = [NETDATA.globalPanAndZoom.force_after_ms, NETDATA.globalPanAndZoom.force_before_ms]; + //state.tmp.dygraph_options.isZoomedIgnoreProgrammaticZoom = true; + } + + state.tmp.dygraph_instance = new Dygraph(state.element_chart, + data.result.data, state.tmp.dygraph_options); + + + state.tmp.dygraph_history_tip_element = document.createElement('div'); + state.tmp.dygraph_history_tip_element.innerHTML = ` + + Want to extend your history of real-time metrics? +
    + + Configure Netdata's history + or use the DB engine. +
    + `; + state.tmp.dygraph_history_tip_element.className = 'dygraph__history-tip'; + state.element_chart.appendChild(state.tmp.dygraph_history_tip_element); + + + state.tmp.dygraph_force_zoom = false; + state.tmp.dygraph_user_action = false; + state.tmp.dygraph_last_rendered = Date.now(); + state.tmp.dygraph_highlight_after = null; + + if (state.tmp.dygraph_options.valueRange[0] === null && state.tmp.dygraph_options.valueRange[1] === null) { + if (typeof state.tmp.dygraph_instance.axes_[0].extremeRange !== 'undefined') { + state.tmp.__commonMin = NETDATA.dataAttribute(state.element, 'common-min', null); + state.tmp.__commonMax = NETDATA.dataAttribute(state.element, 'common-max', null); + } else { + state.log('incompatible version of Dygraph detected'); + state.tmp.__commonMin = null; + state.tmp.__commonMax = null; + } + } else { + // if the user gave a valueRange, respect it + state.tmp.__commonMin = null; + state.tmp.__commonMax = null; + } + + return true; +}; +// ---------------------------------------------------------------------------------------------------------------- +// sparkline + +NETDATA.sparklineInitialize = function (callback) { + if (typeof netdataNoSparklines === 'undefined' || !netdataNoSparklines) { + $.ajax({ + url: NETDATA.sparkline_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('sparkline', NETDATA.sparkline_js); + }) + .fail(function () { + NETDATA.chartLibraries.sparkline.enabled = false; + NETDATA.error(100, NETDATA.sparkline_js); + }) + .always(function () { + if (typeof callback === "function") { + return callback(); + } + }); + } else { + NETDATA.chartLibraries.sparkline.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.sparklineChartUpdate = function (state, data) { + state.sparkline_options.width = state.chartWidth(); + state.sparkline_options.height = state.chartHeight(); + + $(state.element_chart).sparkline(data.result, state.sparkline_options); + return true; +}; + +NETDATA.sparklineChartCreate = function (state, data) { + let type = NETDATA.dataAttribute(state.element, 'sparkline-type', 'line'); + let lineColor = NETDATA.dataAttribute(state.element, 'sparkline-linecolor', state.chartCustomColors()[0]); + let fillColor = NETDATA.dataAttribute(state.element, 'sparkline-fillcolor', ((state.chart.chart_type === 'line') ? NETDATA.themes.current.background : NETDATA.colorLuminance(lineColor, NETDATA.chartDefaults.fill_luminance))); + let chartRangeMin = NETDATA.dataAttribute(state.element, 'sparkline-chartrangemin', undefined); + let chartRangeMax = NETDATA.dataAttribute(state.element, 'sparkline-chartrangemax', undefined); + let composite = NETDATA.dataAttribute(state.element, 'sparkline-composite', undefined); + let enableTagOptions = NETDATA.dataAttribute(state.element, 'sparkline-enabletagoptions', undefined); + let tagOptionPrefix = NETDATA.dataAttribute(state.element, 'sparkline-tagoptionprefix', undefined); + let tagValuesAttribute = NETDATA.dataAttribute(state.element, 'sparkline-tagvaluesattribute', undefined); + let disableHiddenCheck = NETDATA.dataAttribute(state.element, 'sparkline-disablehiddencheck', undefined); + let defaultPixelsPerValue = NETDATA.dataAttribute(state.element, 'sparkline-defaultpixelspervalue', undefined); + let spotColor = NETDATA.dataAttribute(state.element, 'sparkline-spotcolor', undefined); + let minSpotColor = NETDATA.dataAttribute(state.element, 'sparkline-minspotcolor', undefined); + let maxSpotColor = NETDATA.dataAttribute(state.element, 'sparkline-maxspotcolor', undefined); + let spotRadius = NETDATA.dataAttribute(state.element, 'sparkline-spotradius', undefined); + let valueSpots = NETDATA.dataAttribute(state.element, 'sparkline-valuespots', undefined); + let highlightSpotColor = NETDATA.dataAttribute(state.element, 'sparkline-highlightspotcolor', undefined); + let highlightLineColor = NETDATA.dataAttribute(state.element, 'sparkline-highlightlinecolor', undefined); + let lineWidth = NETDATA.dataAttribute(state.element, 'sparkline-linewidth', undefined); + let normalRangeMin = NETDATA.dataAttribute(state.element, 'sparkline-normalrangemin', undefined); + let normalRangeMax = NETDATA.dataAttribute(state.element, 'sparkline-normalrangemax', undefined); + let drawNormalOnTop = NETDATA.dataAttribute(state.element, 'sparkline-drawnormalontop', undefined); + let xvalues = NETDATA.dataAttribute(state.element, 'sparkline-xvalues', undefined); + let chartRangeClip = NETDATA.dataAttribute(state.element, 'sparkline-chartrangeclip', undefined); + let chartRangeMinX = NETDATA.dataAttribute(state.element, 'sparkline-chartrangeminx', undefined); + let chartRangeMaxX = NETDATA.dataAttribute(state.element, 'sparkline-chartrangemaxx', undefined); + let disableInteraction = NETDATA.dataAttributeBoolean(state.element, 'sparkline-disableinteraction', false); + let disableTooltips = NETDATA.dataAttributeBoolean(state.element, 'sparkline-disabletooltips', false); + let disableHighlight = NETDATA.dataAttributeBoolean(state.element, 'sparkline-disablehighlight', false); + let highlightLighten = NETDATA.dataAttribute(state.element, 'sparkline-highlightlighten', 1.4); + let highlightColor = NETDATA.dataAttribute(state.element, 'sparkline-highlightcolor', undefined); + let tooltipContainer = NETDATA.dataAttribute(state.element, 'sparkline-tooltipcontainer', undefined); + let tooltipClassname = NETDATA.dataAttribute(state.element, 'sparkline-tooltipclassname', undefined); + let tooltipFormat = NETDATA.dataAttribute(state.element, 'sparkline-tooltipformat', undefined); + let tooltipPrefix = NETDATA.dataAttribute(state.element, 'sparkline-tooltipprefix', undefined); + let tooltipSuffix = NETDATA.dataAttribute(state.element, 'sparkline-tooltipsuffix', ' ' + state.units_current); + let tooltipSkipNull = NETDATA.dataAttributeBoolean(state.element, 'sparkline-tooltipskipnull', true); + let tooltipValueLookups = NETDATA.dataAttribute(state.element, 'sparkline-tooltipvaluelookups', undefined); + let tooltipFormatFieldlist = NETDATA.dataAttribute(state.element, 'sparkline-tooltipformatfieldlist', undefined); + let tooltipFormatFieldlistKey = NETDATA.dataAttribute(state.element, 'sparkline-tooltipformatfieldlistkey', undefined); + let numberFormatter = NETDATA.dataAttribute(state.element, 'sparkline-numberformatter', function (n) { + return n.toFixed(2); + }); + let numberDigitGroupSep = NETDATA.dataAttribute(state.element, 'sparkline-numberdigitgroupsep', undefined); + let numberDecimalMark = NETDATA.dataAttribute(state.element, 'sparkline-numberdecimalmark', undefined); + let numberDigitGroupCount = NETDATA.dataAttribute(state.element, 'sparkline-numberdigitgroupcount', undefined); + let animatedZooms = NETDATA.dataAttributeBoolean(state.element, 'sparkline-animatedzooms', false); + + if (spotColor === 'disable') { + spotColor = ''; + } + if (minSpotColor === 'disable') { + minSpotColor = ''; + } + if (maxSpotColor === 'disable') { + maxSpotColor = ''; + } + + // state.log('sparkline type ' + type + ', lineColor: ' + lineColor + ', fillColor: ' + fillColor); + + state.sparkline_options = { + type: type, + lineColor: lineColor, + fillColor: fillColor, + chartRangeMin: chartRangeMin, + chartRangeMax: chartRangeMax, + composite: composite, + enableTagOptions: enableTagOptions, + tagOptionPrefix: tagOptionPrefix, + tagValuesAttribute: tagValuesAttribute, + disableHiddenCheck: disableHiddenCheck, + defaultPixelsPerValue: defaultPixelsPerValue, + spotColor: spotColor, + minSpotColor: minSpotColor, + maxSpotColor: maxSpotColor, + spotRadius: spotRadius, + valueSpots: valueSpots, + highlightSpotColor: highlightSpotColor, + highlightLineColor: highlightLineColor, + lineWidth: lineWidth, + normalRangeMin: normalRangeMin, + normalRangeMax: normalRangeMax, + drawNormalOnTop: drawNormalOnTop, + xvalues: xvalues, + chartRangeClip: chartRangeClip, + chartRangeMinX: chartRangeMinX, + chartRangeMaxX: chartRangeMaxX, + disableInteraction: disableInteraction, + disableTooltips: disableTooltips, + disableHighlight: disableHighlight, + highlightLighten: highlightLighten, + highlightColor: highlightColor, + tooltipContainer: tooltipContainer, + tooltipClassname: tooltipClassname, + tooltipChartTitle: state.title, + tooltipFormat: tooltipFormat, + tooltipPrefix: tooltipPrefix, + tooltipSuffix: tooltipSuffix, + tooltipSkipNull: tooltipSkipNull, + tooltipValueLookups: tooltipValueLookups, + tooltipFormatFieldlist: tooltipFormatFieldlist, + tooltipFormatFieldlistKey: tooltipFormatFieldlistKey, + numberFormatter: numberFormatter, + numberDigitGroupSep: numberDigitGroupSep, + numberDecimalMark: numberDecimalMark, + numberDigitGroupCount: numberDigitGroupCount, + animatedZooms: animatedZooms, + width: state.chartWidth(), + height: state.chartHeight() + }; + + $(state.element_chart).sparkline(data.result, state.sparkline_options); + + return true; +}; +// google charts + +// Codacy declarations +/* global google */ + +NETDATA.googleInitialize = function (callback) { + if (typeof netdataNoGoogleCharts === 'undefined' || !netdataNoGoogleCharts) { + $.ajax({ + url: NETDATA.google_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('google', NETDATA.google_js); + google.load('visualization', '1.1', { + 'packages': ['corechart', 'controls'], + 'callback': callback + }); + }) + .fail(function () { + NETDATA.chartLibraries.google.enabled = false; + NETDATA.error(100, NETDATA.google_js); + if (typeof callback === "function") { + return callback(); + } + }); + } else { + NETDATA.chartLibraries.google.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.googleChartUpdate = function (state, data) { + let datatable = new google.visualization.DataTable(data.result); + state.google_instance.draw(datatable, state.google_options); + return true; +}; + +NETDATA.googleChartCreate = function (state, data) { + let datatable = new google.visualization.DataTable(data.result); + + state.google_options = { + colors: state.chartColors(), + + // do not set width, height - the chart resizes itself + //width: state.chartWidth(), + //height: state.chartHeight(), + lineWidth: 1, + title: state.title, + fontSize: 11, + hAxis: { + // title: "Time of Day", + // format:'HH:mm:ss', + viewWindowMode: 'maximized', + slantedText: false, + format: 'HH:mm:ss', + textStyle: { + fontSize: 9 + }, + gridlines: { + color: '#EEE' + } + }, + vAxis: { + title: state.units_current, + viewWindowMode: 'pretty', + minValue: -0.1, + maxValue: 0.1, + direction: 1, + textStyle: { + fontSize: 9 + }, + gridlines: { + color: '#EEE' + } + }, + chartArea: { + width: '65%', + height: '80%' + }, + focusTarget: 'category', + annotation: { + '1': { + style: 'line' + } + }, + pointsVisible: 0, + titlePosition: 'out', + titleTextStyle: { + fontSize: 11 + }, + tooltip: { + isHtml: false, + ignoreBounds: true, + textStyle: { + fontSize: 9 + } + }, + curveType: 'function', + areaOpacity: 0.3, + isStacked: false + }; + + switch (state.chart.chart_type) { + case "area": + state.google_options.vAxis.viewWindowMode = 'maximized'; + state.google_options.areaOpacity = NETDATA.options.current.color_fill_opacity_area; + state.google_instance = new google.visualization.AreaChart(state.element_chart); + break; + + case "stacked": + state.google_options.isStacked = true; + state.google_options.areaOpacity = NETDATA.options.current.color_fill_opacity_stacked; + state.google_options.vAxis.viewWindowMode = 'maximized'; + state.google_options.vAxis.minValue = null; + state.google_options.vAxis.maxValue = null; + state.google_instance = new google.visualization.AreaChart(state.element_chart); + break; + + default: + case "line": + state.google_options.lineWidth = 2; + state.google_instance = new google.visualization.LineChart(state.element_chart); + break; + } + + state.google_instance.draw(datatable, state.google_options); + return true; +}; +// gauge.js + +NETDATA.gaugeInitialize = function (callback) { + if (typeof netdataNoGauge === 'undefined' || !netdataNoGauge) { + $.ajax({ + url: NETDATA.gauge_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('gauge', NETDATA.gauge_js); + }) + .fail(function () { + NETDATA.chartLibraries.gauge.enabled = false; + NETDATA.error(100, NETDATA.gauge_js); + }) + .always(function () { + if (typeof callback === "function") { + return callback(); + } + }) + } + else { + NETDATA.chartLibraries.gauge.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.gaugeAnimation = function (state, status) { + let speed = 32; + + if (typeof status === 'boolean' && status === false) { + speed = 1000000000; + } else if (typeof status === 'number') { + speed = status; + } + + // console.log('gauge speed ' + speed); + state.tmp.gauge_instance.animationSpeed = speed; + state.tmp.___gaugeOld__.speed = speed; +}; + +NETDATA.gaugeSet = function (state, value, min, max) { + if (typeof value !== 'number') { + value = 0; + } + if (typeof min !== 'number') { + min = 0; + } + if (typeof max !== 'number') { + max = 0; + } + if (value > max) { + max = value; + } + if (value < min) { + min = value; + } + if (min > max) { + let t = min; + min = max; + max = t; + } + else if (min === max) { + max = min + 1; + } + + state.legendFormatValueDecimalsFromMinMax(min, max); + + // gauge.js has an issue if the needle + // is smaller than min or larger than max + // when we set the new values + // the needle will go crazy + + // to prevent it, we always feed it + // with a percentage, so that the needle + // is always between min and max + let pcent = (value - min) * 100 / (max - min); + + // bug fix for gauge.js 1.3.1 + // if the value is the absolute min or max, the chart is broken + if (pcent < 0.001) { + pcent = 0.001; + } + if (pcent > 99.999) { + pcent = 99.999; + } + + state.tmp.gauge_instance.set(pcent); + // console.log('gauge set ' + pcent + ', value ' + value + ', min ' + min + ', max ' + max); + + state.tmp.___gaugeOld__.value = value; + state.tmp.___gaugeOld__.min = min; + state.tmp.___gaugeOld__.max = max; +}; + +NETDATA.gaugeSetLabels = function (state, value, min, max) { + if (state.tmp.___gaugeOld__.valueLabel !== value) { + state.tmp.___gaugeOld__.valueLabel = value; + state.tmp.gaugeChartLabel.innerText = state.legendFormatValue(value); + } + if (state.tmp.___gaugeOld__.minLabel !== min) { + state.tmp.___gaugeOld__.minLabel = min; + state.tmp.gaugeChartMin.innerText = state.legendFormatValue(min); + } + if (state.tmp.___gaugeOld__.maxLabel !== max) { + state.tmp.___gaugeOld__.maxLabel = max; + state.tmp.gaugeChartMax.innerText = state.legendFormatValue(max); + } +}; + +NETDATA.gaugeClearSelection = function (state, force) { + if (typeof state.tmp.gaugeEvent !== 'undefined' && typeof state.tmp.gaugeEvent.timer !== 'undefined') { + NETDATA.timeout.clear(state.tmp.gaugeEvent.timer); + state.tmp.gaugeEvent.timer = undefined; + } + + if (state.isAutoRefreshable() && state.data !== null && force !== true) { + NETDATA.gaugeChartUpdate(state, state.data); + } else { + NETDATA.gaugeAnimation(state, false); + NETDATA.gaugeSetLabels(state, null, null, null); + NETDATA.gaugeSet(state, null, null, null); + } + + NETDATA.gaugeAnimation(state, true); + return true; +}; + +NETDATA.gaugeSetSelection = function (state, t) { + if (state.timeIsVisible(t) !== true) { + return NETDATA.gaugeClearSelection(state, true); + } + + let slot = state.calculateRowForTime(t); + if (slot < 0 || slot >= state.data.result.length) { + return NETDATA.gaugeClearSelection(state, true); + } + + if (typeof state.tmp.gaugeEvent === 'undefined') { + state.tmp.gaugeEvent = { + timer: undefined, + value: 0, + min: 0, + max: 0 + }; + } + + let value = state.data.result[state.data.result.length - 1 - slot]; + let min = (state.tmp.gaugeMin === null) ? NETDATA.commonMin.get(state) : state.tmp.gaugeMin; + let max = (state.tmp.gaugeMax === null) ? NETDATA.commonMax.get(state) : state.tmp.gaugeMax; + + // make sure it is zero based + // but only if it has not been set by the user + if (state.tmp.gaugeMin === null && min > 0) { + min = 0; + } + if (state.tmp.gaugeMax === null && max < 0) { + max = 0; + } + + state.tmp.gaugeEvent.value = value; + state.tmp.gaugeEvent.min = min; + state.tmp.gaugeEvent.max = max; + NETDATA.gaugeSetLabels(state, value, min, max); + + if (state.tmp.gaugeEvent.timer === undefined) { + NETDATA.gaugeAnimation(state, false); + + state.tmp.gaugeEvent.timer = NETDATA.timeout.set(function () { + state.tmp.gaugeEvent.timer = undefined; + NETDATA.gaugeSet(state, state.tmp.gaugeEvent.value, state.tmp.gaugeEvent.min, state.tmp.gaugeEvent.max); + }, 0); + } + + return true; +}; + +NETDATA.gaugeChartUpdate = function (state, data) { + let value, min, max; + + if (NETDATA.globalPanAndZoom.isActive() || state.isAutoRefreshable() === false) { + NETDATA.gaugeSetLabels(state, null, null, null); + state.tmp.gauge_instance.set(0); + } else { + value = data.result[0]; + min = (state.tmp.gaugeMin === null) ? NETDATA.commonMin.get(state) : state.tmp.gaugeMin; + max = (state.tmp.gaugeMax === null) ? NETDATA.commonMax.get(state) : state.tmp.gaugeMax; + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + + // make sure it is zero based + // but only if it has not been set by the user + if (state.tmp.gaugeMin === null && min > 0) { + min = 0; + } + if (state.tmp.gaugeMax === null && max < 0) { + max = 0; + } + + NETDATA.gaugeSet(state, value, min, max); + NETDATA.gaugeSetLabels(state, value, min, max); + } + + return true; +}; + +NETDATA.gaugeChartCreate = function (state, data) { + // let chart = $(state.element_chart); + + let value = data.result[0]; + let min = NETDATA.dataAttribute(state.element, 'gauge-min-value', null); + let max = NETDATA.dataAttribute(state.element, 'gauge-max-value', null); + // let adjust = NETDATA.dataAttribute(state.element, 'gauge-adjust', null); + let pointerColor = NETDATA.dataAttribute(state.element, 'gauge-pointer-color', NETDATA.themes.current.gauge_pointer); + let strokeColor = NETDATA.dataAttribute(state.element, 'gauge-stroke-color', NETDATA.themes.current.gauge_stroke); + let startColor = NETDATA.dataAttribute(state.element, 'gauge-start-color', state.chartCustomColors()[0]); + let stopColor = NETDATA.dataAttribute(state.element, 'gauge-stop-color', void 0); + let generateGradient = NETDATA.dataAttribute(state.element, 'gauge-generate-gradient', false); + + if (min === null) { + min = NETDATA.commonMin.get(state); + state.tmp.gaugeMin = null; + } else { + state.tmp.gaugeMin = min; + } + + if (max === null) { + max = NETDATA.commonMax.get(state); + state.tmp.gaugeMax = null; + } else { + state.tmp.gaugeMax = max; + } + + // make sure it is zero based + // but only if it has not been set by the user + if (state.tmp.gaugeMin === null && min > 0) { + min = 0; + } + if (state.tmp.gaugeMax === null && max < 0) { + max = 0; + } + + let width = state.chartWidth(), height = state.chartHeight(); //, ratio = 1.5; + // console.log('gauge width: ' + width.toString() + ', height: ' + height.toString()); + //switch(adjust) { + // case 'width': width = height * ratio; break; + // case 'height': + // default: height = width / ratio; break; + //} + //state.element.style.width = width.toString() + 'px'; + //state.element.style.height = height.toString() + 'px'; + + let lum_d = 0.05; + + let options = { + lines: 12, // The number of lines to draw + angle: 0.14, // The span of the gauge arc + lineWidth: 0.57, // The line thickness + radiusScale: 1.0, // Relative radius + pointer: { + length: 0.85, // 0.9 The radius of the inner circle + strokeWidth: 0.045, // The rotation offset + color: pointerColor // Fill color + }, + limitMax: true, // If false, the max value of the gauge will be updated if value surpass max + limitMin: true, // If true, the min value of the gauge will be fixed unless you set it manually + colorStart: startColor, // Colors + colorStop: stopColor, // just experiment with them + strokeColor: strokeColor, // to see which ones work best for you + generateGradient: (generateGradient === true), // gmosx: + gradientType: 0, + highDpiSupport: true // High resolution support + }; + + if (generateGradient.constructor === Array) { + // example options: + // data-gauge-generate-gradient="[0, 50, 100]" + // data-gauge-gradient-percent-color-0="#FFFFFF" + // data-gauge-gradient-percent-color-50="#999900" + // data-gauge-gradient-percent-color-100="#000000" + + options.percentColors = []; + let len = generateGradient.length; + while (len--) { + let pcent = generateGradient[len]; + let color = NETDATA.dataAttribute(state.element, 'gauge-gradient-percent-color-' + pcent.toString(), false); + if (color !== false) { + let a = []; + a[0] = pcent / 100; + a[1] = color; + options.percentColors.unshift(a); + } + } + if (options.percentColors.length === 0) { + delete options.percentColors; + } + } else if (generateGradient === false && NETDATA.themes.current.gauge_gradient) { + //noinspection PointlessArithmeticExpressionJS + options.percentColors = [ + [0.0, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 0))], + [0.1, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 1))], + [0.2, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 2))], + [0.3, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 3))], + [0.4, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 4))], + [0.5, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 5))], + [0.6, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 6))], + [0.7, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 7))], + [0.8, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 8))], + [0.9, NETDATA.colorLuminance(startColor, (lum_d * 10) - (lum_d * 9))], + [1.0, NETDATA.colorLuminance(startColor, 0.0)]]; + } + + state.tmp.gauge_canvas = document.createElement('canvas'); + state.tmp.gauge_canvas.id = 'gauge-' + state.uuid + '-canvas'; + state.tmp.gauge_canvas.className = 'gaugeChart'; + state.tmp.gauge_canvas.width = width; + state.tmp.gauge_canvas.height = height; + state.element_chart.appendChild(state.tmp.gauge_canvas); + + let valuefontsize = Math.floor(height / 5); + let valuetop = Math.round((height - valuefontsize) / 3.2); + state.tmp.gaugeChartLabel = document.createElement('span'); + state.tmp.gaugeChartLabel.className = 'gaugeChartLabel'; + state.tmp.gaugeChartLabel.style.fontSize = valuefontsize + 'px'; + state.tmp.gaugeChartLabel.style.top = valuetop.toString() + 'px'; + state.element_chart.appendChild(state.tmp.gaugeChartLabel); + + let titlefontsize = Math.round(valuefontsize / 2.1); + let titletop = 0; + state.tmp.gaugeChartTitle = document.createElement('span'); + state.tmp.gaugeChartTitle.className = 'gaugeChartTitle'; + state.tmp.gaugeChartTitle.innerText = state.title; + state.tmp.gaugeChartTitle.style.fontSize = titlefontsize + 'px'; + state.tmp.gaugeChartTitle.style.lineHeight = titlefontsize + 'px'; + state.tmp.gaugeChartTitle.style.top = titletop.toString() + 'px'; + state.element_chart.appendChild(state.tmp.gaugeChartTitle); + + let unitfontsize = Math.round(titlefontsize * 0.9); + state.tmp.gaugeChartUnits = document.createElement('span'); + state.tmp.gaugeChartUnits.className = 'gaugeChartUnits'; + state.tmp.gaugeChartUnits.innerText = state.units_current; + state.tmp.gaugeChartUnits.style.fontSize = unitfontsize + 'px'; + state.element_chart.appendChild(state.tmp.gaugeChartUnits); + + state.tmp.gaugeChartMin = document.createElement('span'); + state.tmp.gaugeChartMin.className = 'gaugeChartMin'; + state.tmp.gaugeChartMin.style.fontSize = Math.round(valuefontsize * 0.75).toString() + 'px'; + state.element_chart.appendChild(state.tmp.gaugeChartMin); + + state.tmp.gaugeChartMax = document.createElement('span'); + state.tmp.gaugeChartMax.className = 'gaugeChartMax'; + state.tmp.gaugeChartMax.style.fontSize = Math.round(valuefontsize * 0.75).toString() + 'px'; + state.element_chart.appendChild(state.tmp.gaugeChartMax); + + // when we just re-create the chart + // do not animate the first update + let animate = true; + if (typeof state.tmp.gauge_instance !== 'undefined') { + animate = false; + } + + state.tmp.gauge_instance = new Gauge(state.tmp.gauge_canvas).setOptions(options); // create sexy gauge! + + state.tmp.___gaugeOld__ = { + value: value, + min: min, + max: max, + valueLabel: null, + minLabel: null, + maxLabel: null + }; + + // we will always feed a percentage + state.tmp.gauge_instance.minValue = 0; + state.tmp.gauge_instance.maxValue = 100; + + NETDATA.gaugeAnimation(state, animate); + NETDATA.gaugeSet(state, value, min, max); + NETDATA.gaugeSetLabels(state, value, min, max); + NETDATA.gaugeAnimation(state, true); + + state.legendSetUnitsString = function (units) { + if (typeof state.tmp.gaugeChartUnits !== 'undefined' && state.tmp.units !== units) { + state.tmp.gaugeChartUnits.innerText = units; + state.tmp.___gaugeOld__.valueLabel = null; + state.tmp.___gaugeOld__.minLabel = null; + state.tmp.___gaugeOld__.maxLabel = null; + state.tmp.units = units; + } + }; + state.legendShowUndefined = function () { + if (typeof state.tmp.gauge_instance !== 'undefined') { + NETDATA.gaugeClearSelection(state); + } + }; + + return true; +}; +// ---------------------------------------------------------------------------------------------------------------- + +NETDATA.easypiechartPercentFromValueMinMax = function (state, value, min, max) { + if (typeof value !== 'number') { + value = 0; + } + if (typeof min !== 'number') { + min = 0; + } + if (typeof max !== 'number') { + max = 0; + } + + if (min > max) { + let t = min; + min = max; + max = t; + } + + if (min > value) { + min = value; + } + if (max < value) { + max = value; + } + + state.legendFormatValueDecimalsFromMinMax(min, max); + + if (state.tmp.easyPieChartMin === null && min > 0) { + min = 0; + } + if (state.tmp.easyPieChartMax === null && max < 0) { + max = 0; + } + + let pcent; + + if (min < 0 && max > 0) { + // it is both positive and negative + // zero at the top center of the chart + max = (-min > max) ? -min : max; + pcent = Math.round(value * 100 / max); + } else if (value >= 0 && min >= 0 && max >= 0) { + // clockwise + pcent = Math.round((value - min) * 100 / (max - min)); + if (pcent === 0) { + pcent = 0.1; + } + } else { + // counter clockwise + pcent = Math.round((value - max) * 100 / (max - min)); + if (pcent === 0) { + pcent = -0.1; + } + } + + return pcent; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// easy-pie-chart + +NETDATA.easypiechartInitialize = function (callback) { + if (typeof netdataNoEasyPieChart === 'undefined' || !netdataNoEasyPieChart) { + $.ajax({ + url: NETDATA.easypiechart_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('easypiechart', NETDATA.easypiechart_js); + }) + .fail(function () { + NETDATA.chartLibraries.easypiechart.enabled = false; + NETDATA.error(100, NETDATA.easypiechart_js); + }) + .always(function () { + if (typeof callback === "function") { + return callback(); + } + }) + } else { + NETDATA.chartLibraries.easypiechart.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.easypiechartClearSelection = function (state, force) { + if (typeof state.tmp.easyPieChartEvent !== 'undefined' && typeof state.tmp.easyPieChartEvent.timer !== 'undefined') { + NETDATA.timeout.clear(state.tmp.easyPieChartEvent.timer); + state.tmp.easyPieChartEvent.timer = undefined; + } + + if (state.isAutoRefreshable() && state.data !== null && force !== true) { + NETDATA.easypiechartChartUpdate(state, state.data); + } + else { + state.tmp.easyPieChartLabel.innerText = state.legendFormatValue(null); + state.tmp.easyPieChart_instance.update(0); + } + state.tmp.easyPieChart_instance.enableAnimation(); + + return true; +}; + +NETDATA.easypiechartSetSelection = function (state, t) { + if (state.timeIsVisible(t) !== true) { + return NETDATA.easypiechartClearSelection(state, true); + } + + let slot = state.calculateRowForTime(t); + if (slot < 0 || slot >= state.data.result.length) { + return NETDATA.easypiechartClearSelection(state, true); + } + + if (typeof state.tmp.easyPieChartEvent === 'undefined') { + state.tmp.easyPieChartEvent = { + timer: undefined, + value: 0, + pcent: 0 + }; + } + + let value = state.data.result[state.data.result.length - 1 - slot]; + let min = (state.tmp.easyPieChartMin === null) ? NETDATA.commonMin.get(state) : state.tmp.easyPieChartMin; + let max = (state.tmp.easyPieChartMax === null) ? NETDATA.commonMax.get(state) : state.tmp.easyPieChartMax; + let pcent = NETDATA.easypiechartPercentFromValueMinMax(state, value, min, max); + + state.tmp.easyPieChartEvent.value = value; + state.tmp.easyPieChartEvent.pcent = pcent; + state.tmp.easyPieChartLabel.innerText = state.legendFormatValue(value); + + if (state.tmp.easyPieChartEvent.timer === undefined) { + state.tmp.easyPieChart_instance.disableAnimation(); + + state.tmp.easyPieChartEvent.timer = NETDATA.timeout.set(function () { + state.tmp.easyPieChartEvent.timer = undefined; + state.tmp.easyPieChart_instance.update(state.tmp.easyPieChartEvent.pcent); + }, 0); + } + + return true; +}; + +NETDATA.easypiechartChartUpdate = function (state, data) { + let value, min, max, pcent; + + if (NETDATA.globalPanAndZoom.isActive() || state.isAutoRefreshable() === false) { + value = null; + pcent = 0; + } + else { + value = data.result[0]; + min = (state.tmp.easyPieChartMin === null) ? NETDATA.commonMin.get(state) : state.tmp.easyPieChartMin; + max = (state.tmp.easyPieChartMax === null) ? NETDATA.commonMax.get(state) : state.tmp.easyPieChartMax; + pcent = NETDATA.easypiechartPercentFromValueMinMax(state, value, min, max); + } + + state.tmp.easyPieChartLabel.innerText = state.legendFormatValue(value); + state.tmp.easyPieChart_instance.update(pcent); + return true; +}; + +NETDATA.easypiechartChartCreate = function (state, data) { + let chart = $(state.element_chart); + + let value = data.result[0]; + let min = NETDATA.dataAttribute(state.element, 'easypiechart-min-value', null); + let max = NETDATA.dataAttribute(state.element, 'easypiechart-max-value', null); + + if (min === null) { + min = NETDATA.commonMin.get(state); + state.tmp.easyPieChartMin = null; + } + else { + state.tmp.easyPieChartMin = min; + } + + if (max === null) { + max = NETDATA.commonMax.get(state); + state.tmp.easyPieChartMax = null; + } + else { + state.tmp.easyPieChartMax = max; + } + + let size = state.chartWidth(); + let stroke = Math.floor(size / 22); + if (stroke < 3) { + stroke = 2; + } + + let valuefontsize = Math.floor((size * 2 / 3) / 5); + let valuetop = Math.round((size - valuefontsize - (size / 40)) / 2); + state.tmp.easyPieChartLabel = document.createElement('span'); + state.tmp.easyPieChartLabel.className = 'easyPieChartLabel'; + state.tmp.easyPieChartLabel.innerText = state.legendFormatValue(value); + state.tmp.easyPieChartLabel.style.fontSize = valuefontsize + 'px'; + state.tmp.easyPieChartLabel.style.top = valuetop.toString() + 'px'; + state.element_chart.appendChild(state.tmp.easyPieChartLabel); + + let titlefontsize = Math.round(valuefontsize * 1.6 / 3); + let titletop = Math.round(valuetop - (titlefontsize * 2) - (size / 40)); + state.tmp.easyPieChartTitle = document.createElement('span'); + state.tmp.easyPieChartTitle.className = 'easyPieChartTitle'; + state.tmp.easyPieChartTitle.innerText = state.title; + state.tmp.easyPieChartTitle.style.fontSize = titlefontsize + 'px'; + state.tmp.easyPieChartTitle.style.lineHeight = titlefontsize + 'px'; + state.tmp.easyPieChartTitle.style.top = titletop.toString() + 'px'; + state.element_chart.appendChild(state.tmp.easyPieChartTitle); + + let unitfontsize = Math.round(titlefontsize * 0.9); + let unittop = Math.round(valuetop + (valuefontsize + unitfontsize) + (size / 40)); + state.tmp.easyPieChartUnits = document.createElement('span'); + state.tmp.easyPieChartUnits.className = 'easyPieChartUnits'; + state.tmp.easyPieChartUnits.innerText = state.units_current; + state.tmp.easyPieChartUnits.style.fontSize = unitfontsize + 'px'; + state.tmp.easyPieChartUnits.style.top = unittop.toString() + 'px'; + state.element_chart.appendChild(state.tmp.easyPieChartUnits); + + let barColor = NETDATA.dataAttribute(state.element, 'easypiechart-barcolor', undefined); + if (typeof barColor === 'undefined' || barColor === null) { + barColor = state.chartCustomColors()[0]; + } else { + //
    + let tmp = eval(barColor); + if (typeof tmp === 'function') { + barColor = tmp; + } + } + + let pcent = NETDATA.easypiechartPercentFromValueMinMax(state, value, min, max); + chart.data('data-percent', pcent); + + chart.easyPieChart({ + barColor: barColor, + trackColor: NETDATA.dataAttribute(state.element, 'easypiechart-trackcolor', NETDATA.themes.current.easypiechart_track), + scaleColor: NETDATA.dataAttribute(state.element, 'easypiechart-scalecolor', NETDATA.themes.current.easypiechart_scale), + scaleLength: NETDATA.dataAttribute(state.element, 'easypiechart-scalelength', 5), + lineCap: NETDATA.dataAttribute(state.element, 'easypiechart-linecap', 'round'), + lineWidth: NETDATA.dataAttribute(state.element, 'easypiechart-linewidth', stroke), + trackWidth: NETDATA.dataAttribute(state.element, 'easypiechart-trackwidth', undefined), + size: NETDATA.dataAttribute(state.element, 'easypiechart-size', size), + rotate: NETDATA.dataAttribute(state.element, 'easypiechart-rotate', 0), + animate: NETDATA.dataAttribute(state.element, 'easypiechart-animate', {duration: 500, enabled: true}), + easing: NETDATA.dataAttribute(state.element, 'easypiechart-easing', undefined) + }); + + // when we just re-create the chart + // do not animate the first update + let animate = true; + if (typeof state.tmp.easyPieChart_instance !== 'undefined') { + animate = false; + } + + state.tmp.easyPieChart_instance = chart.data('easyPieChart'); + if (animate === false) { + state.tmp.easyPieChart_instance.disableAnimation(); + } + state.tmp.easyPieChart_instance.update(pcent); + if (animate === false) { + state.tmp.easyPieChart_instance.enableAnimation(); + } + + state.legendSetUnitsString = function (units) { + if (typeof state.tmp.easyPieChartUnits !== 'undefined' && state.tmp.units !== units) { + state.tmp.easyPieChartUnits.innerText = units; + state.tmp.units = units; + } + }; + state.legendShowUndefined = function () { + if (typeof state.tmp.easyPieChart_instance !== 'undefined') { + NETDATA.easypiechartClearSelection(state); + } + }; + + return true; +}; + +// d3pie + +NETDATA.d3pieInitialize = function (callback) { + if (typeof netdataNoD3pie === 'undefined' || !netdataNoD3pie) { + + // d3pie requires D3 + if (!NETDATA.chartLibraries.d3.initialized) { + if (NETDATA.chartLibraries.d3.enabled) { + NETDATA.d3Initialize(function () { + NETDATA.d3pieInitialize(callback); + }); + } else { + NETDATA.chartLibraries.d3pie.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } + } else { + $.ajax({ + url: NETDATA.d3pie_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('d3pie', NETDATA.d3pie_js); + }) + .fail(function () { + NETDATA.chartLibraries.d3pie.enabled = false; + NETDATA.error(100, NETDATA.d3pie_js); + }) + .always(function () { + if (typeof callback === "function") { + return callback(); + } + }); + } + } else { + NETDATA.chartLibraries.d3pie.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.d3pieSetContent = function (state, data, index) { + state.legendFormatValueDecimalsFromMinMax( + data.min, + data.max + ); + + let content = []; + let colors = state.chartColors(); + let len = data.result.labels.length; + for (let i = 1; i < len; i++) { + let label = data.result.labels[i]; + let value = data.result.data[index][label]; + let color = colors[i - 1]; + + if (value !== null && value > 0) { + content.push({ + label: label, + value: value, + color: color + }); + } + } + + if (content.length === 0) { + content.push({ + label: 'no data', + value: 100, + color: '#666666' + }); + } + + state.tmp.d3pie_last_slot = index; + return content; +}; + +NETDATA.d3pieDateRange = function (state, data, index) { + let dt = Math.round((data.before - data.after + 1) / data.points); + let dt_str = NETDATA.seconds4human(dt); + + let before = data.result.data[index].time; + let after = before - (dt * 1000); + + let d1 = NETDATA.dateTime.localeDateString(after); + let t1 = NETDATA.dateTime.localeTimeString(after); + let d2 = NETDATA.dateTime.localeDateString(before); + let t2 = NETDATA.dateTime.localeTimeString(before); + + if (d1 === d2) { + return d1 + ' ' + t1 + ' to ' + t2 + ', ' + dt_str; + } + + return d1 + ' ' + t1 + ' to ' + d2 + ' ' + t2 + ', ' + dt_str; +}; + +NETDATA.d3pieSetSelection = function (state, t) { + if (state.timeIsVisible(t) !== true) { + return NETDATA.d3pieClearSelection(state, true); + } + + let slot = state.calculateRowForTime(t); + slot = state.data.result.data.length - slot - 1; + + if (slot < 0 || slot >= state.data.result.length) { + return NETDATA.d3pieClearSelection(state, true); + } + + if (state.tmp.d3pie_last_slot === slot) { + // we already show this slot, don't do anything + return true; + } + + if (state.tmp.d3pie_timer === undefined) { + state.tmp.d3pie_timer = NETDATA.timeout.set(function () { + state.tmp.d3pie_timer = undefined; + NETDATA.d3pieChange(state, NETDATA.d3pieSetContent(state, state.data, slot), NETDATA.d3pieDateRange(state, state.data, slot)); + }, 0); + } + + return true; +}; + +NETDATA.d3pieClearSelection = function (state, force) { + if (typeof state.tmp.d3pie_timer !== 'undefined') { + NETDATA.timeout.clear(state.tmp.d3pie_timer); + state.tmp.d3pie_timer = undefined; + } + + if (state.isAutoRefreshable() && state.data !== null && force !== true) { + NETDATA.d3pieChartUpdate(state, state.data); + } else { + if (state.tmp.d3pie_last_slot !== -1) { + state.tmp.d3pie_last_slot = -1; + NETDATA.d3pieChange(state, [{label: 'no data', value: 1, color: '#666666'}], 'no data available'); + } + } + + return true; +}; + +NETDATA.d3pieChange = function (state, content, footer) { + if (state.d3pie_forced_subtitle === null) { + //state.d3pie_instance.updateProp("header.subtitle.text", state.units_current); + state.d3pie_instance.options.header.subtitle.text = state.units_current; + } + + if (state.d3pie_forced_footer === null) { + //state.d3pie_instance.updateProp("footer.text", footer); + state.d3pie_instance.options.footer.text = footer; + } + + //state.d3pie_instance.updateProp("data.content", content); + state.d3pie_instance.options.data.content = content; + state.d3pie_instance.destroy(); + state.d3pie_instance.recreate(); + return true; +}; + +NETDATA.d3pieChartUpdate = function (state, data) { + return NETDATA.d3pieChange(state, NETDATA.d3pieSetContent(state, data, 0), NETDATA.d3pieDateRange(state, data, 0)); +}; + +NETDATA.d3pieChartCreate = function (state, data) { + + state.element_chart.id = 'd3pie-' + state.uuid; + // console.log('id = ' + state.element_chart.id); + + let content = NETDATA.d3pieSetContent(state, data, 0); + + state.d3pie_forced_title = NETDATA.dataAttribute(state.element, 'd3pie-title', null); + state.d3pie_forced_subtitle = NETDATA.dataAttribute(state.element, 'd3pie-subtitle', null); + state.d3pie_forced_footer = NETDATA.dataAttribute(state.element, 'd3pie-footer', null); + + state.d3pie_options = { + header: { + title: { + text: (state.d3pie_forced_title !== null) ? state.d3pie_forced_title : state.title, + color: NETDATA.dataAttribute(state.element, 'd3pie-title-color', NETDATA.themes.current.d3pie.title), + fontSize: NETDATA.dataAttribute(state.element, 'd3pie-title-fontsize', 12), + fontWeight: NETDATA.dataAttribute(state.element, 'd3pie-title-fontweight', "bold"), + font: NETDATA.dataAttribute(state.element, 'd3pie-title-font', "arial") + }, + subtitle: { + text: (state.d3pie_forced_subtitle !== null) ? state.d3pie_forced_subtitle : state.units_current, + color: NETDATA.dataAttribute(state.element, 'd3pie-subtitle-color', NETDATA.themes.current.d3pie.subtitle), + fontSize: NETDATA.dataAttribute(state.element, 'd3pie-subtitle-fontsize', 10), + fontWeight: NETDATA.dataAttribute(state.element, 'd3pie-subtitle-fontweight', "normal"), + font: NETDATA.dataAttribute(state.element, 'd3pie-subtitle-font', "arial") + }, + titleSubtitlePadding: 1 + }, + footer: { + text: (state.d3pie_forced_footer !== null) ? state.d3pie_forced_footer : NETDATA.d3pieDateRange(state, data, 0), + color: NETDATA.dataAttribute(state.element, 'd3pie-footer-color', NETDATA.themes.current.d3pie.footer), + fontSize: NETDATA.dataAttribute(state.element, 'd3pie-footer-fontsize', 9), + fontWeight: NETDATA.dataAttribute(state.element, 'd3pie-footer-fontweight', "bold"), + font: NETDATA.dataAttribute(state.element, 'd3pie-footer-font', "arial"), + location: NETDATA.dataAttribute(state.element, 'd3pie-footer-location', "bottom-center") // bottom-left, bottom-center, bottom-right + }, + size: { + canvasHeight: state.chartHeight(), + canvasWidth: state.chartWidth(), + pieInnerRadius: NETDATA.dataAttribute(state.element, 'd3pie-pieinnerradius', "45%"), + pieOuterRadius: NETDATA.dataAttribute(state.element, 'd3pie-pieouterradius', "80%") + }, + data: { + // none, random, value-asc, value-desc, label-asc, label-desc + sortOrder: NETDATA.dataAttribute(state.element, 'd3pie-sortorder', "value-desc"), + smallSegmentGrouping: { + enabled: NETDATA.dataAttributeBoolean(state.element, "d3pie-smallsegmentgrouping-enabled", false), + value: NETDATA.dataAttribute(state.element, 'd3pie-smallsegmentgrouping-value', 1), + // percentage, value + valueType: NETDATA.dataAttribute(state.element, 'd3pie-smallsegmentgrouping-valuetype', "percentage"), + label: NETDATA.dataAttribute(state.element, 'd3pie-smallsegmentgrouping-label', "other"), + color: NETDATA.dataAttribute(state.element, 'd3pie-smallsegmentgrouping-color', NETDATA.themes.current.d3pie.other) + }, + + // REQUIRED! This is where you enter your pie data; it needs to be an array of objects + // of this form: { label: "label", value: 1.5, color: "#000000" } - color is optional + content: content + }, + labels: { + outer: { + // label, value, percentage, label-value1, label-value2, label-percentage1, label-percentage2 + format: NETDATA.dataAttribute(state.element, 'd3pie-labels-outer-format', "label-value1"), + hideWhenLessThanPercentage: NETDATA.dataAttribute(state.element, 'd3pie-labels-outer-hidewhenlessthanpercentage', null), + pieDistance: NETDATA.dataAttribute(state.element, 'd3pie-labels-outer-piedistance', 15) + }, + inner: { + // label, value, percentage, label-value1, label-value2, label-percentage1, label-percentage2 + format: NETDATA.dataAttribute(state.element, 'd3pie-labels-inner-format', "percentage"), + hideWhenLessThanPercentage: NETDATA.dataAttribute(state.element, 'd3pie-labels-inner-hidewhenlessthanpercentage', 2) + }, + mainLabel: { + color: NETDATA.dataAttribute(state.element, 'd3pie-labels-mainLabel-color', NETDATA.themes.current.d3pie.mainlabel), // or 'segment' for dynamic color + font: NETDATA.dataAttribute(state.element, 'd3pie-labels-mainLabel-font', "arial"), + fontSize: NETDATA.dataAttribute(state.element, 'd3pie-labels-mainLabel-fontsize', 10), + fontWeight: NETDATA.dataAttribute(state.element, 'd3pie-labels-mainLabel-fontweight', "normal") + }, + percentage: { + color: NETDATA.dataAttribute(state.element, 'd3pie-labels-percentage-color', NETDATA.themes.current.d3pie.percentage), + font: NETDATA.dataAttribute(state.element, 'd3pie-labels-percentage-font', "arial"), + fontSize: NETDATA.dataAttribute(state.element, 'd3pie-labels-percentage-fontsize', 10), + fontWeight: NETDATA.dataAttribute(state.element, 'd3pie-labels-percentage-fontweight', "bold"), + decimalPlaces: 0 + }, + value: { + color: NETDATA.dataAttribute(state.element, 'd3pie-labels-value-color', NETDATA.themes.current.d3pie.value), + font: NETDATA.dataAttribute(state.element, 'd3pie-labels-value-font', "arial"), + fontSize: NETDATA.dataAttribute(state.element, 'd3pie-labels-value-fontsize', 10), + fontWeight: NETDATA.dataAttribute(state.element, 'd3pie-labels-value-fontweight', "bold") + }, + lines: { + enabled: NETDATA.dataAttributeBoolean(state.element, 'd3pie-labels-lines-enabled', true), + style: NETDATA.dataAttribute(state.element, 'd3pie-labels-lines-style', "curved"), + color: NETDATA.dataAttribute(state.element, 'd3pie-labels-lines-color', "segment") // "segment" or a hex color + }, + truncation: { + enabled: NETDATA.dataAttributeBoolean(state.element, 'd3pie-labels-truncation-enabled', false), + truncateLength: NETDATA.dataAttribute(state.element, 'd3pie-labels-truncation-truncatelength', 30) + }, + formatter: function (context) { + // console.log(context); + if (context.part === 'value') { + return state.legendFormatValue(context.value); + } + if (context.part === 'percentage') { + return context.label + '%'; + } + + return context.label; + } + }, + effects: { + load: { + effect: "none", // none / default + speed: 0 // commented in the d3pie code to speed it up + }, + pullOutSegmentOnClick: { + effect: "bounce", // none / linear / bounce / elastic / back + speed: 400, + size: 5 + }, + highlightSegmentOnMouseover: true, + highlightLuminosity: -0.2 + }, + tooltips: { + enabled: false, + type: "placeholder", // caption|placeholder + string: "", + placeholderParser: null, // function + styles: { + fadeInSpeed: 250, + backgroundColor: NETDATA.themes.current.d3pie.tooltip_bg, + backgroundOpacity: 0.5, + color: NETDATA.themes.current.d3pie.tooltip_fg, + borderRadius: 2, + font: "arial", + fontSize: 12, + padding: 4 + } + }, + misc: { + colors: { + background: 'transparent', // transparent or color # + // segments: state.chartColors(), + segmentStroke: NETDATA.dataAttribute(state.element, 'd3pie-misc-colors-segmentstroke', NETDATA.themes.current.d3pie.segment_stroke) + }, + gradient: { + enabled: NETDATA.dataAttributeBoolean(state.element, 'd3pie-misc-gradient-enabled', false), + percentage: NETDATA.dataAttribute(state.element, 'd3pie-misc-colors-percentage', 95), + color: NETDATA.dataAttribute(state.element, 'd3pie-misc-gradient-color', NETDATA.themes.current.d3pie.gradient_color) + }, + canvasPadding: { + top: 5, + right: 5, + bottom: 5, + left: 5 + }, + pieCenterOffset: { + x: 0, + y: 0 + }, + cssPrefix: NETDATA.dataAttribute(state.element, 'd3pie-cssprefix', null) + }, + callbacks: { + onload: null, + onMouseoverSegment: null, + onMouseoutSegment: null, + onClickSegment: null + } + }; + + state.d3pie_instance = new d3pie(state.element_chart, state.d3pie_options); + return true; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// D3 + +NETDATA.d3Initialize = function(callback) { + if (typeof netdataStopD3 === 'undefined' || !netdataStopD3) { + $.ajax({ + url: NETDATA.d3_js, + cache: true, + dataType: "script", + xhrFields: { withCredentials: true } // required for the cookie + }) + .done(function() { + NETDATA.registerChartLibrary('d3', NETDATA.d3_js); + }) + .fail(function() { + NETDATA.chartLibraries.d3.enabled = false; + NETDATA.error(100, NETDATA.d3_js); + }) + .always(function() { + if (typeof callback === "function") + return callback(); + }); + } else { + NETDATA.chartLibraries.d3.enabled = false; + if (typeof callback === "function") + return callback(); + } +}; + +NETDATA.d3ChartUpdate = function(state, data) { + void(state); + void(data); + + return false; +}; + +NETDATA.d3ChartCreate = function(state, data) { + void(state); + void(data); + + return false; +}; + +// peity + +NETDATA.peityInitialize = function (callback) { + if (typeof netdataNoPeitys === 'undefined' || !netdataNoPeitys) { + $.ajax({ + url: NETDATA.peity_js, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + NETDATA.registerChartLibrary('peity', NETDATA.peity_js); + }) + .fail(function () { + NETDATA.chartLibraries.peity.enabled = false; + NETDATA.error(100, NETDATA.peity_js); + }) + .always(function () { + if (typeof callback === "function") { + return callback(); + } + }); + } else { + NETDATA.chartLibraries.peity.enabled = false; + if (typeof callback === "function") { + return callback(); + } + } +}; + +NETDATA.peityChartUpdate = function (state, data) { + state.peity_instance.innerHTML = data.result; + + if (state.peity_options.stroke !== state.chartCustomColors()[0]) { + state.peity_options.stroke = state.chartCustomColors()[0]; + if (state.chart.chart_type === 'line') { + state.peity_options.fill = NETDATA.themes.current.background; + } else { + state.peity_options.fill = NETDATA.colorLuminance(state.chartCustomColors()[0], NETDATA.chartDefaults.fill_luminance); + } + } + + $(state.peity_instance).peity('line', state.peity_options); + return true; +}; + +NETDATA.peityChartCreate = function (state, data) { + state.peity_instance = document.createElement('div'); + state.element_chart.appendChild(state.peity_instance); + + state.peity_options = { + stroke: NETDATA.themes.current.foreground, + strokeWidth: NETDATA.dataAttribute(state.element, 'peity-strokewidth', 1), + width: state.chartWidth(), + height: state.chartHeight(), + fill: NETDATA.themes.current.foreground + }; + + NETDATA.peityChartUpdate(state, data); + return true; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// "Text-only" chart - Just renders the raw value to the DOM + +NETDATA.textOnlyCreate = function(state, data) { + var decimalPlaces = NETDATA.dataAttribute(state.element, 'textonly-decimal-places', 1); + var prefix = NETDATA.dataAttribute(state.element, 'textonly-prefix', ''); + var suffix = NETDATA.dataAttribute(state.element, 'textonly-suffix', ''); + + // Round based on number of decimal places to show + var precision = Math.pow(10, decimalPlaces); + var value = Math.round(data.result[0] * precision) / precision; + + state.element.textContent = prefix + value + suffix; + return true; +} + +NETDATA.textOnlyUpdate = NETDATA.textOnlyCreate; +// Charts Libraries Registration + +NETDATA.chartLibraries = { + "dygraph": { + initialize: NETDATA.dygraphInitialize, + create: NETDATA.dygraphChartCreate, + update: NETDATA.dygraphChartUpdate, + resize: function (state) { + if (typeof state.tmp.dygraph_instance !== 'undefined' && typeof state.tmp.dygraph_instance.resize === 'function') { + state.tmp.dygraph_instance.resize(); + } + }, + setSelection: NETDATA.dygraphSetSelection, + clearSelection: NETDATA.dygraphClearSelection, + toolboxPanAndZoom: NETDATA.dygraphToolboxPanAndZoom, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result.data$'), + format: function (state) { + void(state); + return 'json'; + }, + options: function (state) { + return 'ms' + '%7C' + 'flip' + (this.isLogScale(state) ? ('%7C' + 'abs') : '').toString(); + }, + legend: function (state) { + return (this.isSparkline(state) === false && NETDATA.dataAttributeBoolean(state.element, 'legend', true) === true) ? 'right-side' : null; + }, + autoresize: function (state) { + void(state); + return true; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return true; + }, + pixels_per_point: function (state) { + return (this.isSparkline(state) === false) ? 3 : 2; + }, + isSparkline: function (state) { + if (typeof state.tmp.dygraph_sparkline === 'undefined') { + state.tmp.dygraph_sparkline = (this.theme(state) === 'sparkline'); + } + return state.tmp.dygraph_sparkline; + }, + isLogScale: function (state) { + if (typeof state.tmp.dygraph_logscale === 'undefined') { + state.tmp.dygraph_logscale = (this.theme(state) === 'logscale'); + } + return state.tmp.dygraph_logscale; + }, + theme: function (state) { + if (typeof state.tmp.dygraph_theme === 'undefined') { + state.tmp.dygraph_theme = NETDATA.dataAttribute(state.element, 'dygraph-theme', 'default'); + } + return state.tmp.dygraph_theme; + }, + container_class: function (state) { + if (this.legend(state) !== null) { + return 'netdata-container-with-legend'; + } + return 'netdata-container'; + } + }, + "sparkline": { + initialize: NETDATA.sparklineInitialize, + create: NETDATA.sparklineChartCreate, + update: NETDATA.sparklineChartUpdate, + resize: null, + setSelection: undefined, // function(state, t) { void(state); return true; }, + clearSelection: undefined, // function(state) { void(state); return true; }, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result$'), + format: function (state) { + void(state); + return 'array'; + }, + options: function (state) { + void(state); + return 'flip' + '%7C' + 'abs'; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return false; + }, + pixels_per_point: function (state) { + void(state); + return 3; + }, + container_class: function (state) { + void(state); + return 'netdata-container'; + } + }, + "peity": { + initialize: NETDATA.peityInitialize, + create: NETDATA.peityChartCreate, + update: NETDATA.peityChartUpdate, + resize: null, + setSelection: undefined, // function(state, t) { void(state); return true; }, + clearSelection: undefined, // function(state) { void(state); return true; }, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result$'), + format: function (state) { + void(state); + return 'ssvcomma'; + }, + options: function (state) { + void(state); + return 'null2zero' + '%7C' + 'flip' + '%7C' + 'abs'; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return false; + }, + pixels_per_point: function (state) { + void(state); + return 3; + }, + container_class: function (state) { + void(state); + return 'netdata-container'; + } + }, + // "morris": { + // initialize: NETDATA.morrisInitialize, + // create: NETDATA.morrisChartCreate, + // update: NETDATA.morrisChartUpdate, + // resize: null, + // setSelection: undefined, // function(state, t) { void(state); return true; }, + // clearSelection: undefined, // function(state) { void(state); return true; }, + // toolboxPanAndZoom: null, + // initialized: false, + // enabled: true, + // xssRegexIgnore: new RegExp('^/api/v1/data\.result.data$'), + // format: function(state) { void(state); return 'json'; }, + // options: function(state) { void(state); return 'objectrows' + '%7C' + 'ms'; }, + // legend: function(state) { void(state); return null; }, + // autoresize: function(state) { void(state); return false; }, + // max_updates_to_recreate: function(state) { void(state); return 50; }, + // track_colors: function(state) { void(state); return false; }, + // pixels_per_point: function(state) { void(state); return 15; }, + // container_class: function(state) { void(state); return 'netdata-container'; } + // }, + "google": { + initialize: NETDATA.googleInitialize, + create: NETDATA.googleChartCreate, + update: NETDATA.googleChartUpdate, + resize: null, + setSelection: undefined, //function(state, t) { void(state); return true; }, + clearSelection: undefined, //function(state) { void(state); return true; }, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result.rows$'), + format: function (state) { + void(state); + return 'datatable'; + }, + options: function (state) { + void(state); + return ''; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 300; + }, + track_colors: function (state) { + void(state); + return false; + }, + pixels_per_point: function (state) { + void(state); + return 4; + }, + container_class: function (state) { + void(state); + return 'netdata-container'; + } + }, + // "raphael": { + // initialize: NETDATA.raphaelInitialize, + // create: NETDATA.raphaelChartCreate, + // update: NETDATA.raphaelChartUpdate, + // resize: null, + // setSelection: undefined, // function(state, t) { void(state); return true; }, + // clearSelection: undefined, // function(state) { void(state); return true; }, + // toolboxPanAndZoom: null, + // initialized: false, + // enabled: true, + // xssRegexIgnore: new RegExp('^/api/v1/data\.result.data$'), + // format: function(state) { void(state); return 'json'; }, + // options: function(state) { void(state); return ''; }, + // legend: function(state) { void(state); return null; }, + // autoresize: function(state) { void(state); return false; }, + // max_updates_to_recreate: function(state) { void(state); return 5000; }, + // track_colors: function(state) { void(state); return false; }, + // pixels_per_point: function(state) { void(state); return 3; }, + // container_class: function(state) { void(state); return 'netdata-container'; } + // }, + // "c3": { + // initialize: NETDATA.c3Initialize, + // create: NETDATA.c3ChartCreate, + // update: NETDATA.c3ChartUpdate, + // resize: null, + // setSelection: undefined, // function(state, t) { void(state); return true; }, + // clearSelection: undefined, // function(state) { void(state); return true; }, + // toolboxPanAndZoom: null, + // initialized: false, + // enabled: true, + // xssRegexIgnore: new RegExp('^/api/v1/data\.result$'), + // format: function(state) { void(state); return 'csvjsonarray'; }, + // options: function(state) { void(state); return 'milliseconds'; }, + // legend: function(state) { void(state); return null; }, + // autoresize: function(state) { void(state); return false; }, + // max_updates_to_recreate: function(state) { void(state); return 5000; }, + // track_colors: function(state) { void(state); return false; }, + // pixels_per_point: function(state) { void(state); return 15; }, + // container_class: function(state) { void(state); return 'netdata-container'; } + // }, + "d3pie": { + initialize: NETDATA.d3pieInitialize, + create: NETDATA.d3pieChartCreate, + update: NETDATA.d3pieChartUpdate, + resize: null, + setSelection: NETDATA.d3pieSetSelection, + clearSelection: NETDATA.d3pieClearSelection, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result.data$'), + format: function (state) { + void(state); + return 'json'; + }, + options: function (state) { + void(state); + return 'objectrows' + '%7C' + 'ms'; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return false; + }, + pixels_per_point: function (state) { + void(state); + return 15; + }, + container_class: function (state) { + void(state); + return 'netdata-container'; + } + }, + "d3": { + initialize: NETDATA.d3Initialize, + create: NETDATA.d3ChartCreate, + update: NETDATA.d3ChartUpdate, + resize: null, + setSelection: undefined, // function(state, t) { void(state); return true; }, + clearSelection: undefined, // function(state) { void(state); return true; }, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result.data$'), + format: function (state) { + void(state); + return 'json'; + }, + options: function (state) { + void(state); + return ''; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return false; + }, + pixels_per_point: function (state) { + void(state); + return 3; + }, + container_class: function (state) { + void(state); + return 'netdata-container'; + } + }, + "easypiechart": { + initialize: NETDATA.easypiechartInitialize, + create: NETDATA.easypiechartChartCreate, + update: NETDATA.easypiechartChartUpdate, + resize: null, + setSelection: NETDATA.easypiechartSetSelection, + clearSelection: NETDATA.easypiechartClearSelection, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result$'), + format: function (state) { + void(state); + return 'array'; + }, + options: function (state) { + void(state); + return 'absolute'; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return true; + }, + pixels_per_point: function (state) { + void(state); + return 3; + }, + aspect_ratio: 100, + container_class: function (state) { + void(state); + return 'netdata-container-easypiechart'; + } + }, + "gauge": { + initialize: NETDATA.gaugeInitialize, + create: NETDATA.gaugeChartCreate, + update: NETDATA.gaugeChartUpdate, + resize: null, + setSelection: NETDATA.gaugeSetSelection, + clearSelection: NETDATA.gaugeClearSelection, + toolboxPanAndZoom: null, + initialized: false, + enabled: true, + xssRegexIgnore: new RegExp('^/api/v1/data\.result$'), + format: function (state) { + void(state); + return 'array'; + }, + options: function (state) { + void(state); + return 'absolute'; + }, + legend: function (state) { + void(state); + return null; + }, + autoresize: function (state) { + void(state); + return false; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + track_colors: function (state) { + void(state); + return true; + }, + pixels_per_point: function (state) { + void(state); + return 3; + }, + aspect_ratio: 60, + container_class: function (state) { + void(state); + return 'netdata-container-gauge'; + } + }, + "textonly": { + autoresize: function (state) { + void(state); + return false; + }, + container_class: function (state) { + void(state); + return 'netdata-container'; + }, + create: NETDATA.textOnlyCreate, + enabled: true, + format: function (state) { + void(state); + return 'array'; + }, + initialized: true, + initialize: function (callback) { + callback(); + }, + legend: function (state) { + void(state); + return null; + }, + max_updates_to_recreate: function (state) { + void(state); + return 5000; + }, + options: function (state) { + void(state); + return 'absolute'; + }, + pixels_per_point: function (state) { + void(state); + return 3; + }, + track_colors: function (state) { + void(state); + return false; + }, + update: NETDATA.textOnlyUpdate, + xssRegexIgnore: new RegExp('^/api/v1/data\.result$'), + } +}; + +NETDATA.registerChartLibrary = function (library, url) { + if (NETDATA.options.debug.libraries) { + console.log("registering chart library: " + library); + } + + NETDATA.chartLibraries[library].url = url; + NETDATA.chartLibraries[library].initialized = true; + NETDATA.chartLibraries[library].enabled = true; +}; + +// *** src/dashboard.js/chart-registry.js + +// Chart Registry + +// When multiple charts need the same chart, we avoid downloading it +// multiple times (and having it in browser memory multiple time) +// by using this registry. + +// Every time we download a chart definition, we save it here with .add() +// Then we try to get it back with .get(). If that fails, we download it. + +NETDATA.fixHost = function (host) { + while (host.slice(-1) === '/') { + host = host.substring(0, host.length - 1); + } + + return host; +}; + +NETDATA.chartRegistry = { + charts: {}, + + globalReset: function () { + this.charts = {}; + }, + + add: function (host, id, data) { + if (typeof this.charts[host] === 'undefined') { + this.charts[host] = {}; + } + + //console.log('added ' + host + '/' + id); + this.charts[host][id] = data; + }, + + get: function (host, id) { + if (typeof this.charts[host] === 'undefined') { + return null; + } + + if (typeof this.charts[host][id] === 'undefined') { + return null; + } + + //console.log('cached ' + host + '/' + id); + return this.charts[host][id]; + }, + + downloadAll: function (host, callback) { + host = NETDATA.fixHost(host); + + let self = this; + + function got_data(h, data, callback) { + if (data !== null) { + self.charts[h] = data.charts; + + // update the server timezone in our options + if (typeof data.timezone === 'string') { + NETDATA.options.server_timezone = data.timezone; + } + } else { + NETDATA.error(406, h + '/api/v1/charts'); + } + + if (typeof callback === 'function') { + callback(data); + } + } + + if (netdataSnapshotData !== null) { + got_data(host, netdataSnapshotData.charts, callback); + } else { + $.ajax({ + url: host + '/api/v1/charts', + async: true, + cache: false, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkOptional('/api/v1/charts', data); + got_data(host, data, callback); + }) + .fail(function () { + NETDATA.error(405, host + '/api/v1/charts'); + + if (typeof callback === 'function') { + callback(null); + } + }); + } + } +}; + +// Compute common (joint) values over multiple charts. + + +// commonMin & commonMax + +NETDATA.commonMin = { + keys: {}, + latest: {}, + + globalReset: function () { + this.keys = {}; + this.latest = {}; + }, + + get: function (state) { + if (typeof state.tmp.__commonMin === 'undefined') { + // get the commonMin setting + state.tmp.__commonMin = NETDATA.dataAttribute(state.element, 'common-min', null); + } + + let min = state.data.min; + let name = state.tmp.__commonMin; + + if (name === null) { + // we don't need commonMin + //state.log('no need for commonMin'); + return min; + } + + let t = this.keys[name]; + if (typeof t === 'undefined') { + // add our commonMin + this.keys[name] = {}; + t = this.keys[name]; + } + + let uuid = state.uuid; + if (typeof t[uuid] !== 'undefined') { + if (t[uuid] === min) { + //state.log('commonMin ' + state.tmp.__commonMin + ' not changed: ' + this.latest[name]); + return this.latest[name]; + } else if (min < this.latest[name]) { + //state.log('commonMin ' + state.tmp.__commonMin + ' increased: ' + min); + t[uuid] = min; + this.latest[name] = min; + return min; + } + } + + // add our min + t[uuid] = min; + + // find the common min + let m = min; + // for (let i in t) { + // if (t.hasOwnProperty(i) && t[i] < m) m = t[i]; + // } + for (var ti of Object.values(t)) { + if (ti < m) { + m = ti; + } + } + + //state.log('commonMin ' + state.tmp.__commonMin + ' updated: ' + m); + this.latest[name] = m; + return m; + } +}; + +NETDATA.commonMax = { + keys: {}, + latest: {}, + + globalReset: function () { + this.keys = {}; + this.latest = {}; + }, + + get: function (state) { + if (typeof state.tmp.__commonMax === 'undefined') { + // get the commonMax setting + state.tmp.__commonMax = NETDATA.dataAttribute(state.element, 'common-max', null); + } + + let max = state.data.max; + let name = state.tmp.__commonMax; + + if (name === null) { + // we don't need commonMax + //state.log('no need for commonMax'); + return max; + } + + let t = this.keys[name]; + if (typeof t === 'undefined') { + // add our commonMax + this.keys[name] = {}; + t = this.keys[name]; + } + + let uuid = state.uuid; + if (typeof t[uuid] !== 'undefined') { + if (t[uuid] === max) { + //state.log('commonMax ' + state.tmp.__commonMax + ' not changed: ' + this.latest[name]); + return this.latest[name]; + } else if (max > this.latest[name]) { + //state.log('commonMax ' + state.tmp.__commonMax + ' increased: ' + max); + t[uuid] = max; + this.latest[name] = max; + return max; + } + } + + // add our max + t[uuid] = max; + + // find the common max + let m = max; + // for (let i in t) { + // if (t.hasOwnProperty(i) && t[i] > m) m = t[i]; + // } + for (var ti of Object.values(t)) { + if (ti > m) { + m = ti; + } + } + + //state.log('commonMax ' + state.tmp.__commonMax + ' updated: ' + m); + this.latest[name] = m; + return m; + } +}; + +NETDATA.commonColors = { + keys: {}, + + globalReset: function () { + this.keys = {}; + }, + + get: function (state, label) { + let ret = this.refill(state); + + if (typeof ret.assigned[label] === 'undefined') { + ret.assigned[label] = ret.available.shift(); + } + + return ret.assigned[label]; + }, + + refill: function (state) { + let ret, len; + + if (typeof state.tmp.__commonColors === 'undefined') { + ret = this.prepare(state); + } else { + ret = this.keys[state.tmp.__commonColors]; + if (typeof ret === 'undefined') { + ret = this.prepare(state); + } + } + + if (ret.available.length === 0) { + if (ret.copy_theme || ret.custom.length === 0) { + // copy the theme colors + len = NETDATA.themes.current.colors.length; + while (len--) { + ret.available.unshift(NETDATA.themes.current.colors[len]); + } + } + + // copy the custom colors + len = ret.custom.length; + while (len--) { + ret.available.unshift(ret.custom[len]); + } + } + + state.colors_assigned = ret.assigned; + state.colors_available = ret.available; + state.colors_custom = ret.custom; + + return ret; + }, + + __read_custom_colors: function (state, ret) { + // add the user supplied colors + let c = NETDATA.dataAttribute(state.element, 'colors', undefined); + if (typeof c === 'string' && c.length > 0) { + c = c.split(' '); + let len = c.length; + + if (len > 0 && c[len - 1] === 'ONLY') { + len--; + ret.copy_theme = false; + } + + while (len--) { + ret.custom.unshift(c[len]); + } + } + }, + + prepare: function (state) { + let has_custom_colors = false; + + if (typeof state.tmp.__commonColors === 'undefined') { + let defname = state.chart.context; + + // if this chart has data-colors="" + // we should use the chart uuid as the default key (private palette) + // (data-common-colors="NAME" will be used anyways) + let c = NETDATA.dataAttribute(state.element, 'colors', undefined); + if (typeof c === 'string' && c.length > 0) { + defname = state.uuid; + has_custom_colors = true; + } + + // get the commonColors setting + state.tmp.__commonColors = NETDATA.dataAttribute(state.element, 'common-colors', defname); + } + + let name = state.tmp.__commonColors; + let ret = this.keys[name]; + + if (typeof ret === 'undefined') { + // add our commonMax + this.keys[name] = { + assigned: {}, // name-value of dimensions and their colors + available: [], // an array of colors available to be used + custom: [], // the array of colors defined by the user + charts: {}, // the charts linked to this + copy_theme: true + }; + ret = this.keys[name]; + } + + if (typeof ret.charts[state.uuid] === 'undefined') { + ret.charts[state.uuid] = state; + + if (has_custom_colors) { + this.__read_custom_colors(state, ret); + } + } + + return ret; + } +}; + +// *** src/dashboard.js/main.js + +// Codacy declarations +/* global clipboard */ +/* global Ps */ + +if (NETDATA.options.debug.main_loop) { + console.log('welcome to NETDATA'); +} + +NETDATA.onresizeCallback = null; +NETDATA.onresize = function () { + NETDATA.options.last_page_resize = Date.now(); + NETDATA.onscroll(); + + if (typeof NETDATA.onresizeCallback === 'function') { + NETDATA.onresizeCallback(); + } +}; + +NETDATA.abortAllRefreshes = function () { + let targets = NETDATA.options.targets; + let len = targets.length; + + while (len--) { + if (targets[len].fetching_data) { + if (typeof targets[len].xhr !== 'undefined') { + targets[len].xhr.abort(); + targets[len].running = false; + targets[len].fetching_data = false; + } + } + } +}; + +NETDATA.onscrollStartDelay = function () { + NETDATA.options.last_page_scroll = Date.now(); + + NETDATA.options.on_scroll_refresher_stop_until = + NETDATA.options.last_page_scroll + + (NETDATA.options.current.async_on_scroll ? 1000 : 0); +}; + +NETDATA.onscrollEndDelay = function () { + NETDATA.options.on_scroll_refresher_stop_until = + Date.now() + + (NETDATA.options.current.async_on_scroll ? NETDATA.options.current.onscroll_worker_duration_threshold : 0); +}; + +NETDATA.onscroll_updater_timeout_id = undefined; +NETDATA.onscrollUpdater = function () { + NETDATA.globalSelectionSync.stop(); + + if (NETDATA.options.abort_ajax_on_scroll) { + NETDATA.abortAllRefreshes(); + } + + // when the user scrolls he sees that we have + // hidden all the not-visible charts + // using this little function we try to switch + // the charts back to visible quickly + + if (!NETDATA.intersectionObserver.enabled()) { + if (!NETDATA.options.current.parallel_refresher) { + let targets = NETDATA.options.targets; + let len = targets.length; + + while (len--) { + if (!targets[len].running) { + targets[len].isVisible(); + } + } + } + } + + NETDATA.onscrollEndDelay(); +}; + +NETDATA.scrollUp = false; +NETDATA.scrollY = window.scrollY; +NETDATA.onscroll = function () { + //console.log('onscroll() begin'); + + NETDATA.onscrollStartDelay(); + NETDATA.chartRefresherReschedule(); + + NETDATA.scrollUp = (window.scrollY > NETDATA.scrollY); + NETDATA.scrollY = window.scrollY; + + if (NETDATA.onscroll_updater_timeout_id) { + NETDATA.timeout.clear(NETDATA.onscroll_updater_timeout_id); + } + + NETDATA.onscroll_updater_timeout_id = NETDATA.timeout.set(NETDATA.onscrollUpdater, 0); + //console.log('onscroll() end'); +}; + +NETDATA.supportsPassiveEvents = function () { + if (NETDATA.options.passive_events === null) { + let supportsPassive = false; + try { + let opts = Object.defineProperty({}, 'passive', { + get: function () { + supportsPassive = true; + } + }); + window.addEventListener("test", null, opts); + } catch (e) { + console.log('browser does not support passive events'); + } + + NETDATA.options.passive_events = supportsPassive; + } + + // console.log('passive ' + NETDATA.options.passive_events); + return NETDATA.options.passive_events; +}; + +window.addEventListener('resize', NETDATA.onresize, NETDATA.supportsPassiveEvents() ? {passive: true} : false); +window.addEventListener('scroll', NETDATA.onscroll, NETDATA.supportsPassiveEvents() ? {passive: true} : false); +// window.onresize = NETDATA.onresize; +// window.onscroll = NETDATA.onscroll; + +// ---------------------------------------------------------------------------------------------------------------- +// Global Pan and Zoom on charts + +// Using this structure are synchronize all the charts, so that +// when you pan or zoom one, all others are automatically refreshed +// to the same timespan. + +NETDATA.globalPanAndZoom = { + seq: 0, // timestamp ms + // every time a chart is panned or zoomed + // we set the timestamp here + // then we use it as a sequence number + // to find if other charts are synchronized + // to this time-range + + master: null, // the master chart (state), to which all others + // are synchronized + + force_before_ms: null, // the timespan to sync all other charts + force_after_ms: null, + + callback: null, + + globalReset: function () { + this.clearMaster(); + this.seq = 0; + this.master = null; + this.force_after_ms = null; + this.force_before_ms = null; + this.callback = null; + }, + + delay: function () { + if (NETDATA.options.debug.globalPanAndZoom) { + console.log('globalPanAndZoom.delay()'); + } + + NETDATA.options.auto_refresher_stop_until = Date.now() + NETDATA.options.current.global_pan_sync_time; + }, + + // set a new master + setMaster: function (state, after, before) { + this.delay(); + + if (!NETDATA.options.current.sync_pan_and_zoom) { + return; + } + + if (this.master === null) { + if (NETDATA.options.debug.globalPanAndZoom) { + console.log('globalPanAndZoom.setMaster(' + state.id + ', ' + after + ', ' + before + ') SET MASTER'); + } + } else if (this.master !== state) { + if (NETDATA.options.debug.globalPanAndZoom) { + console.log('globalPanAndZoom.setMaster(' + state.id + ', ' + after + ', ' + before + ') CHANGED MASTER'); + } + + this.master.resetChart(true, true); + } + + let now = Date.now(); + this.master = state; + this.seq = now; + this.force_after_ms = after; + this.force_before_ms = before; + + if (typeof this.callback === 'function') { + this.callback(true, after, before); + } + }, + + // clear the master + clearMaster: function () { + // if (NETDATA.options.debug.globalPanAndZoom === true) + // console.log('globalPanAndZoom.clearMaster()'); + if (NETDATA.options.debug.globalPanAndZoom) { + console.log('globalPanAndZoom.clearMaster()'); + } + + if (this.master !== null) { + let st = this.master; + this.master = null; + st.resetChart(); + } + + this.master = null; + this.seq = 0; + this.force_after_ms = null; + this.force_before_ms = null; + NETDATA.options.auto_refresher_stop_until = 0; + + if (typeof this.callback === 'function') { + this.callback(false, 0, 0); + } + }, + + // is the given state the master of the global + // pan and zoom sync? + isMaster: function (state) { + return (this.master === state); + }, + + // are we currently have a global pan and zoom sync? + isActive: function () { + return (this.master !== null && this.force_before_ms !== null && this.force_after_ms !== null && this.seq !== 0); + }, + + // check if a chart, other than the master + // needs to be refreshed, due to the global pan and zoom + shouldBeAutoRefreshed: function (state) { + if (this.master === null || this.seq === 0) { + return false; + } + + //if (state.needsRecreation()) + // return true; + + return (state.tm.pan_and_zoom_seq !== this.seq); + } +}; + +// ---------------------------------------------------------------------------------------------------------------- +// global chart underlay (time-frame highlighting) + +NETDATA.globalChartUnderlay = { + callback: null, // what to call when a highlighted range is setup + after: null, // highlight after this time + before: null, // highlight before this time + view_after: null, // the charts after_ms viewport when the highlight was setup + view_before: null, // the charts before_ms viewport, when the highlight was setup + state: null, // the chart the highlight was setup + + isActive: function () { + return (this.after !== null && this.before !== null); + }, + + hasViewport: function () { + return (this.state !== null && this.view_after !== null && this.view_before !== null); + }, + + init: function (state, after, before, view_after, view_before) { + this.state = (typeof state !== 'undefined') ? state : null; + this.after = (typeof after !== 'undefined' && after !== null && after > 0) ? after : null; + this.before = (typeof before !== 'undefined' && before !== null && before > 0) ? before : null; + this.view_after = (typeof view_after !== 'undefined' && view_after !== null && view_after > 0) ? view_after : null; + this.view_before = (typeof view_before !== 'undefined' && view_before !== null && view_before > 0) ? view_before : null; + }, + + setup: function () { + if (this.isActive()) { + if (this.state === null) { + this.state = NETDATA.options.targets[0]; + } + + if (typeof this.callback === 'function') { + this.callback(true, this.after, this.before); + } + } else { + if (typeof this.callback === 'function') { + this.callback(false, 0, 0); + } + } + }, + + set: function (state, after, before, view_after, view_before) { + if (after > before) { + let t = after; + after = before; + before = t; + } + + this.init(state, after, before, view_after, view_before); + + // if (this.hasViewport() === true) + // NETDATA.globalPanAndZoom.setMaster(this.state, this.view_after, this.view_before); + if (this.hasViewport()) { + NETDATA.globalPanAndZoom.setMaster(this.state, this.view_after, this.view_before); + } + + this.setup(); + }, + + clear: function () { + this.after = null; + this.before = null; + this.state = null; + this.view_after = null; + this.view_before = null; + + if (typeof this.callback === 'function') { + this.callback(false, 0, 0); + } + }, + + focus: function () { + if (this.isActive() && this.hasViewport()) { + if (this.state === null) { + this.state = NETDATA.options.targets[0]; + } + + if (NETDATA.globalPanAndZoom.isMaster(this.state)) { + NETDATA.globalPanAndZoom.clearMaster(); + } + + NETDATA.globalPanAndZoom.setMaster(this.state, this.view_after, this.view_before, true); + } + } +}; + +// ---------------------------------------------------------------------------------------------------------------- +// dimensions selection + +// TODO +// move color assignment to dimensions, here + +let dimensionStatus = function (parent, label, name_div, value_div, color) { + this.enabled = false; + this.parent = parent; + this.label = label; + this.name_div = null; + this.value_div = null; + this.color = NETDATA.themes.current.foreground; + this.selected = (parent.unselected_count === 0); + + this.setOptions(name_div, value_div, color); +}; + +dimensionStatus.prototype.invalidate = function () { + this.name_div = null; + this.value_div = null; + this.enabled = false; +}; + +dimensionStatus.prototype.setOptions = function (name_div, value_div, color) { + this.color = color; + + if (this.name_div !== name_div) { + this.name_div = name_div; + this.name_div.title = this.label; + this.name_div.style.setProperty('color', this.color, 'important'); + if (!this.selected) { + this.name_div.className = 'netdata-legend-name not-selected'; + } else { + this.name_div.className = 'netdata-legend-name selected'; + } + } + + if (this.value_div !== value_div) { + this.value_div = value_div; + this.value_div.title = this.label; + this.value_div.style.setProperty('color', this.color, 'important'); + if (!this.selected) { + this.value_div.className = 'netdata-legend-value not-selected'; + } else { + this.value_div.className = 'netdata-legend-value selected'; + } + } + + this.enabled = true; + this.setHandler(); +}; + +dimensionStatus.prototype.setHandler = function () { + if (!this.enabled) { + return; + } + + let ds = this; + + // this.name_div.onmousedown = this.value_div.onmousedown = function(e) { + this.name_div.onclick = this.value_div.onclick = function (e) { + e.preventDefault(); + if (ds.isSelected()) { + // this is selected + if (e.shiftKey || e.ctrlKey) { + // control or shift key is pressed -> unselect this (except is none will remain selected, in which case select all) + ds.unselect(); + + if (ds.parent.countSelected() === 0) { + ds.parent.selectAll(); + } + } else { + // no key is pressed -> select only this (except if it is the only selected already, in which case select all) + if (ds.parent.countSelected() === 1) { + ds.parent.selectAll(); + } else { + ds.parent.selectNone(); + ds.select(); + } + } + } + else { + // this is not selected + if (e.shiftKey || e.ctrlKey) { + // control or shift key is pressed -> select this too + ds.select(); + } else { + // no key is pressed -> select only this + ds.parent.selectNone(); + ds.select(); + } + } + + ds.parent.state.redrawChart(); + } +}; + +dimensionStatus.prototype.select = function () { + if (!this.enabled) { + return; + } + + this.name_div.className = 'netdata-legend-name selected'; + this.value_div.className = 'netdata-legend-value selected'; + this.selected = true; +}; + +dimensionStatus.prototype.unselect = function () { + if (!this.enabled) { + return; + } + + this.name_div.className = 'netdata-legend-name not-selected'; + this.value_div.className = 'netdata-legend-value hidden'; + this.selected = false; +}; + +dimensionStatus.prototype.isSelected = function () { + // return(this.enabled === true && this.selected === true); + return this.enabled && this.selected; +}; + +// ---------------------------------------------------------------------------------------------------------------- + +let dimensionsVisibility = function (state) { + this.state = state; + this.len = 0; + this.dimensions = {}; + this.selected_count = 0; + this.unselected_count = 0; +}; + +dimensionsVisibility.prototype.dimensionAdd = function (label, name_div, value_div, color) { + if (typeof this.dimensions[label] === 'undefined') { + this.len++; + this.dimensions[label] = new dimensionStatus(this, label, name_div, value_div, color); + } else { + this.dimensions[label].setOptions(name_div, value_div, color); + } + + return this.dimensions[label]; +}; + +dimensionsVisibility.prototype.dimensionGet = function (label) { + return this.dimensions[label]; +}; + +dimensionsVisibility.prototype.invalidateAll = function () { + let keys = Object.keys(this.dimensions); + let len = keys.length; + while (len--) { + this.dimensions[keys[len]].invalidate(); + } +}; + +dimensionsVisibility.prototype.selectAll = function () { + let keys = Object.keys(this.dimensions); + let len = keys.length; + while (len--) { + this.dimensions[keys[len]].select(); + } +}; + +dimensionsVisibility.prototype.countSelected = function () { + let selected = 0; + let keys = Object.keys(this.dimensions); + let len = keys.length; + while (len--) { + if (this.dimensions[keys[len]].isSelected()) { + selected++; + } + } + + return selected; +}; + +dimensionsVisibility.prototype.selectNone = function () { + let keys = Object.keys(this.dimensions); + let len = keys.length; + while (len--) { + this.dimensions[keys[len]].unselect(); + } +}; + +dimensionsVisibility.prototype.selected2BooleanArray = function (array) { + let ret = []; + this.selected_count = 0; + this.unselected_count = 0; + + let len = array.length; + while (len--) { + let ds = this.dimensions[array[len]]; + if (typeof ds === 'undefined') { + // console.log(array[i] + ' is not found'); + ret.unshift(false); + } else if (ds.isSelected()) { + ret.unshift(true); + this.selected_count++; + } else { + ret.unshift(false); + this.unselected_count++; + } + } + + if (this.selected_count === 0 && this.unselected_count !== 0) { + this.selectAll(); + return this.selected2BooleanArray(array); + } + + return ret; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// date/time conversion + +NETDATA.dateTime = { + using_timezone: false, + + // these are the old netdata functions + // we fallback to these, if the new ones fail + + localeDateStringNative: function (d) { + return d.toLocaleDateString(); + }, + + localeTimeStringNative: function (d) { + return d.toLocaleTimeString(); + }, + + xAxisTimeStringNative: function (d) { + return NETDATA.zeropad(d.getHours()) + ":" + + NETDATA.zeropad(d.getMinutes()) + ":" + + NETDATA.zeropad(d.getSeconds()); + }, + + // initialize the new date/time conversion + // functions. + // if this fails, we fallback to the above + init: function (timezone) { + //console.log('init with timezone: ' + timezone); + + // detect browser timezone + try { + NETDATA.options.browser_timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; + } catch (e) { + console.log('failed to detect browser timezone: ' + e.toString()); + NETDATA.options.browser_timezone = 'cannot-detect-it'; + } + + let ret = false; + + try { + let dateOptions = { + localeMatcher: 'best fit', + formatMatcher: 'best fit', + weekday: 'short', + year: 'numeric', + month: 'short', + day: '2-digit' + }; + + let timeOptions = { + localeMatcher: 'best fit', + hour12: false, + formatMatcher: 'best fit', + hour: '2-digit', + minute: '2-digit', + second: '2-digit' + }; + + let xAxisOptions = { + localeMatcher: 'best fit', + hour12: false, + formatMatcher: 'best fit', + hour: '2-digit', + minute: '2-digit', + second: '2-digit' + }; + + if (typeof timezone === 'string' && timezone !== '' && timezone !== 'default') { + dateOptions.timeZone = timezone; + timeOptions.timeZone = timezone; + timeOptions.timeZoneName = 'short'; + xAxisOptions.timeZone = timezone; + this.using_timezone = true; + } else { + timezone = 'default'; + this.using_timezone = false; + } + + this.dateFormat = new Intl.DateTimeFormat(navigator.language, dateOptions); + this.timeFormat = new Intl.DateTimeFormat(navigator.language, timeOptions); + this.xAxisFormat = new Intl.DateTimeFormat(navigator.language, xAxisOptions); + + this.localeDateString = function (d) { + return this.dateFormat.format(d); + }; + + this.localeTimeString = function (d) { + return this.timeFormat.format(d); + }; + + this.xAxisTimeString = function (d) { + return this.xAxisFormat.format(d); + }; + + //let d = new Date(); + //let t = this.dateFormat.format(d) + ' ' + this.timeFormat.format(d) + ' ' + this.xAxisFormat.format(d); + + ret = true; + } catch (e) { + console.log('Cannot setup Date/Time formatting: ' + e.toString()); + + timezone = 'default'; + this.localeDateString = this.localeDateStringNative; + this.localeTimeString = this.localeTimeStringNative; + this.xAxisTimeString = this.xAxisTimeStringNative; + this.using_timezone = false; + + ret = false; + } + + // save it + //console.log('init setOption timezone: ' + timezone); + NETDATA.setOption('timezone', timezone); + + return ret; + } +}; +NETDATA.dateTime.init(NETDATA.options.current.timezone); + +// ---------------------------------------------------------------------------------------------------------------- +// global selection sync + +NETDATA.globalSelectionSync = { + state: null, + dontSyncBefore: 0, + last_t: 0, + slaves: [], + timeoutId: undefined, + + globalReset: function () { + this.stop(); + this.state = null; + this.dontSyncBefore = 0; + this.last_t = 0; + this.slaves = []; + this.timeoutId = undefined; + }, + + active: function () { + return (this.state !== null); + }, + + // return true if global selection sync can be enabled now + enabled: function () { + // console.log('enabled()'); + // can we globally apply selection sync? + if (!NETDATA.options.current.sync_selection) { + return false; + } + + return (this.dontSyncBefore <= Date.now()); + }, + + // set the global selection sync master + setMaster: function (state) { + if (!this.enabled()) { + this.stop(); + return; + } + + if (this.state === state) { + return; + } + + if (this.state !== null) { + this.stop(); + } + + if (NETDATA.options.debug.globalSelectionSync) { + console.log('globalSelectionSync.setMaster(' + state.id + ')'); + } + + state.selected = true; + this.state = state; + this.last_t = 0; + + // find all slaves + let targets = NETDATA.intersectionObserver.targets(); + this.slaves = []; + let len = targets.length; + while (len--) { + let st = targets[len]; + if (this.state !== st && st.globalSelectionSyncIsEligible()) { + this.slaves.push(st); + } + } + + // this.delay(100); + }, + + // stop global selection sync + stop: function () { + if (this.state !== null) { + if (NETDATA.options.debug.globalSelectionSync) { + console.log('globalSelectionSync.stop()'); + } + + let len = this.slaves.length; + while (len--) { + this.slaves[len].clearSelection(); + } + + this.state.clearSelection(); + + this.last_t = 0; + this.slaves = []; + this.state = null; + } + }, + + // delay global selection sync for some time + delay: function (ms) { + if (NETDATA.options.current.sync_selection) { + // if (NETDATA.options.debug.globalSelectionSync === true) { + if (NETDATA.options.debug.globalSelectionSync) { + console.log('globalSelectionSync.delay()'); + } + + if (typeof ms === 'number') { + this.dontSyncBefore = Date.now() + ms; + } else { + this.dontSyncBefore = Date.now() + NETDATA.options.current.sync_selection_delay; + } + } + }, + + __syncSlaves: function () { + // if (NETDATA.globalSelectionSync.enabled() === true) { + if (NETDATA.globalSelectionSync.enabled()) { + // if (NETDATA.options.debug.globalSelectionSync === true) + if (NETDATA.options.debug.globalSelectionSync) { + console.log('globalSelectionSync.__syncSlaves()'); + } + + let t = NETDATA.globalSelectionSync.last_t; + let len = NETDATA.globalSelectionSync.slaves.length; + while (len--) { + NETDATA.globalSelectionSync.slaves[len].setSelection(t); + } + + this.timeoutId = undefined; + } + }, + + // sync all the visible charts to the given time + // this is to be called from the chart libraries + sync: function (state, t) { + // if (NETDATA.options.current.sync_selection === true) { + if (NETDATA.options.current.sync_selection) { + // if (NETDATA.options.debug.globalSelectionSync === true) + if (NETDATA.options.debug.globalSelectionSync) { + console.log('globalSelectionSync.sync(' + state.id + ', ' + t.toString() + ')'); + } + + this.setMaster(state); + + if (t === this.last_t) { + return; + } + + this.last_t = t; + + if (state.foreignElementSelection !== null) { + state.foreignElementSelection.innerText = NETDATA.dateTime.localeDateString(t) + ' ' + NETDATA.dateTime.localeTimeString(t); + } + + if (this.timeoutId) { + NETDATA.timeout.clear(this.timeoutId); + } + + this.timeoutId = NETDATA.timeout.set(this.__syncSlaves, 0); + } + } +}; + +NETDATA.intersectionObserver = { + observer: null, + visible_targets: [], + + options: { + root: null, + rootMargin: "0px", + threshold: null + }, + + enabled: function () { + return this.observer !== null; + }, + + globalReset: function () { + if (this.observer !== null) { + this.visible_targets = []; + this.observer.disconnect(); + this.init(); + } + }, + + targets: function () { + if (this.enabled() && this.visible_targets.length > 0) { + return this.visible_targets; + } else { + return NETDATA.options.targets; + } + }, + + switchChartVisibility: function () { + let old = this.__visibilityRatioOld; + + if (old !== this.__visibilityRatio) { + if (old === 0 && this.__visibilityRatio > 0) { + this.unhideChart(); + } else if (old > 0 && this.__visibilityRatio === 0) { + this.hideChart(); + } + + this.__visibilityRatioOld = this.__visibilityRatio; + } + }, + + handler: function (entries, observer) { + entries.forEach(function (entry) { + let state = NETDATA.chartState(entry.target); + + let idx; + if (entry.intersectionRatio > 0) { + idx = NETDATA.intersectionObserver.visible_targets.indexOf(state); + if (idx === -1) { + if (NETDATA.scrollUp) { + NETDATA.intersectionObserver.visible_targets.push(state); + } else { + NETDATA.intersectionObserver.visible_targets.unshift(state); + } + } + else if (state.__visibilityRatio === 0) { + state.log("was not visible until now, but was already in visible_targets"); + } + } else { + idx = NETDATA.intersectionObserver.visible_targets.indexOf(state); + if (idx !== -1) { + NETDATA.intersectionObserver.visible_targets.splice(idx, 1); + } else if (state.__visibilityRatio > 0) { + state.log("was visible, but not found in visible_targets"); + } + } + + state.__visibilityRatio = entry.intersectionRatio; + + if (!NETDATA.options.current.async_on_scroll) { + if (window.requestIdleCallback) { + window.requestIdleCallback(function () { + NETDATA.intersectionObserver.switchChartVisibility.call(state); + }, {timeout: 100}); + } else { + NETDATA.intersectionObserver.switchChartVisibility.call(state); + } + } + }); + }, + + observe: function (state) { + if (this.enabled()) { + state.__visibilityRatioOld = 0; + state.__visibilityRatio = 0; + this.observer.observe(state.element); + + state.isVisible = function () { + if (!NETDATA.options.current.update_only_visible) { + return true; + } + + NETDATA.intersectionObserver.switchChartVisibility.call(this); + + return this.__visibilityRatio > 0; + } + } + }, + + init: function () { + if (typeof netdataIntersectionObserver === 'undefined' || netdataIntersectionObserver) { + try { + this.observer = new IntersectionObserver(this.handler, this.options); + } catch (e) { + console.log("IntersectionObserver is not supported on this browser"); + this.observer = null; + } + } + //else { + // console.log("IntersectionObserver is disabled"); + //} + } +}; +NETDATA.intersectionObserver.init(); + +// ---------------------------------------------------------------------------------------------------------------- +// Our state object, where all per-chart values are stored + +let chartState = function (element) { + this.element = element; + + // IMPORTANT: + // all private functions should use 'that', instead of 'this' + // Alternatively, you can use arrow functions (related issue #4514) + let that = this; + + // ============================================================================================================ + // ERROR HANDLING + + /* error() - private + * show an error instead of the chart + */ + let error = (msg) => { + let ret = true; + + if (typeof netdataErrorCallback === 'function') { + ret = netdataErrorCallback('chart', this.id, msg); + } + + if (ret) { + this.element.innerHTML = this.id + ': ' + msg; + this.enabled = false; + this.current = this.pan; + } + }; + + // console logging + this.log = function (msg) { + console.log(this.id + ' (' + this.library_name + ' ' + this.uuid + '): ' + msg); + }; + + this.debugLog = function (msg) { + if (this.debug) { + this.log(msg); + } + }; + + // ============================================================================================================ + // EARLY INITIALIZATION + + // These are variables that should exist even if the chart is never to be rendered. + // Be careful what you add here - there may be thousands of charts on the page. + + // GUID - a unique identifier for the chart + this.uuid = NETDATA.guid(); + + // string - the name of chart + this.id = NETDATA.dataAttribute(this.element, 'netdata', undefined); + if (typeof this.id === 'undefined') { + error("netdata elements need data-netdata"); + return; + } + + // string - the key for localStorage settings + this.settings_id = NETDATA.dataAttribute(this.element, 'id', null); + + // the user given dimensions of the element + this.width = NETDATA.dataAttribute(this.element, 'width', NETDATA.chartDefaults.width); + this.height = NETDATA.dataAttribute(this.element, 'height', NETDATA.chartDefaults.height); + this.height_original = this.height; + + if (this.settings_id !== null) { + this.height = NETDATA.localStorageGet('chart_heights.' + this.settings_id, this.height, function (height) { + // this is the callback that will be called + // if and when the user resets all localStorage variables + // to their defaults + + resizeChartToHeight(height); + }); + } + + // the chart library requested by the user + this.library_name = NETDATA.dataAttribute(this.element, 'chart-library', NETDATA.chartDefaults.library); + + // check the requested library is available + // we don't initialize it here - it will be initialized when + // this chart will be first used + if (typeof NETDATA.chartLibraries[this.library_name] === 'undefined') { + NETDATA.error(402, this.library_name); + error('chart library "' + this.library_name + '" is not found'); + this.enabled = false; + } else if (!NETDATA.chartLibraries[this.library_name].enabled) { + NETDATA.error(403, this.library_name); + error('chart library "' + this.library_name + '" is not enabled'); + this.enabled = false; + } else { + this.library = NETDATA.chartLibraries[this.library_name]; + } + + this.auto = { + name: 'auto', + autorefresh: true, + force_update_at: 0, // the timestamp to force the update at + force_before_ms: null, + force_after_ms: null + }; + this.pan = { + name: 'pan', + autorefresh: false, + force_update_at: 0, // the timestamp to force the update at + force_before_ms: null, + force_after_ms: null + }; + this.zoom = { + name: 'zoom', + autorefresh: false, + force_update_at: 0, // the timestamp to force the update at + force_before_ms: null, + force_after_ms: null + }; + + // this is a pointer to one of the sub-classes below + // auto, pan, zoom + this.current = this.auto; + + this.running = false; // boolean - true when the chart is being refreshed now + this.enabled = true; // boolean - is the chart enabled for refresh? + + this.force_update_every = null; // number - overwrite the visualization update frequency of the chart + + this.tmp = {}; + + this.foreignElementBefore = null; + this.foreignElementAfter = null; + this.foreignElementDuration = null; + this.foreignElementUpdateEvery = null; + this.foreignElementSelection = null; + + // ============================================================================================================ + // PRIVATE FUNCTIONS + + // reset the runtime status variables to their defaults + const runtimeInit = () => { + this.paused = false; // boolean - is the chart paused for any reason? + this.selected = false; // boolean - is the chart shown a selection? + + this.chart_created = false; // boolean - is the library.create() been called? + this.dom_created = false; // boolean - is the chart DOM been created? + this.fetching_data = false; // boolean - true while we fetch data via ajax + + this.updates_counter = 0; // numeric - the number of refreshes made so far + this.updates_since_last_unhide = 0; // numeric - the number of refreshes made since the last time the chart was unhidden + this.updates_since_last_creation = 0; // numeric - the number of refreshes made since the last time the chart was created + + this.tm = { + last_initialized: 0, // milliseconds - the timestamp it was last initialized + last_dom_created: 0, // milliseconds - the timestamp its DOM was last created + last_mode_switch: 0, // milliseconds - the timestamp it switched modes + + last_info_downloaded: 0, // milliseconds - the timestamp we downloaded the chart + last_updated: 0, // the timestamp the chart last updated with data + pan_and_zoom_seq: 0, // the sequence number of the global synchronization + // between chart. + // Used with NETDATA.globalPanAndZoom.seq + last_visible_check: 0, // the time we last checked if it is visible + last_resized: 0, // the time the chart was resized + last_hidden: 0, // the time the chart was hidden + last_unhidden: 0, // the time the chart was unhidden + last_autorefreshed: 0 // the time the chart was last refreshed + }; + + this.data = null; // the last data as downloaded from the netdata server + this.data_url = 'invalid://'; // string - the last url used to update the chart + this.data_points = 0; // number - the number of points returned from netdata + this.data_after = 0; // milliseconds - the first timestamp of the data + this.data_before = 0; // milliseconds - the last timestamp of the data + this.data_update_every = 0; // milliseconds - the frequency to update the data + + this.tmp = {}; // members that can be destroyed to save memory + }; + + // initialize all the variables that are required for the chart to be rendered + const lateInitialization = () => { + if (typeof this.host !== 'undefined') { + return; + } + + // string - the netdata server URL, without any path + this.host = NETDATA.dataAttribute(this.element, 'host', NETDATA.serverDefault); + + // make sure the host does not end with / + // all netdata API requests use absolute paths + while (this.host.slice(-1) === '/') { + this.host = this.host.substring(0, this.host.length - 1); + } + + // string - the grouping method requested by the user + this.method = NETDATA.dataAttribute(this.element, 'method', NETDATA.chartDefaults.method); + this.gtime = NETDATA.dataAttribute(this.element, 'gtime', 0); + + // the time-range requested by the user + this.after = NETDATA.dataAttribute(this.element, 'after', NETDATA.chartDefaults.after); + this.before = NETDATA.dataAttribute(this.element, 'before', NETDATA.chartDefaults.before); + + // the pixels per point requested by the user + this.pixels_per_point = NETDATA.dataAttribute(this.element, 'pixels-per-point', 1); + this.points = NETDATA.dataAttribute(this.element, 'points', null); + + // the forced update_every + this.force_update_every = NETDATA.dataAttribute(this.element, 'update-every', null); + if (typeof this.force_update_every !== 'number' || this.force_update_every <= 1) { + if (this.force_update_every !== null) { + this.log('ignoring invalid value of property data-update-every'); + } + + this.force_update_every = null; + } else { + this.force_update_every *= 1000; + } + + // the dimensions requested by the user + this.dimensions = NETDATA.encodeURIComponent(NETDATA.dataAttribute(this.element, 'dimensions', null)); + + this.title = NETDATA.dataAttribute(this.element, 'title', null); // the title of the chart + this.units = NETDATA.dataAttribute(this.element, 'units', null); // the units of the chart dimensions + this.units_desired = NETDATA.dataAttribute(this.element, 'desired-units', NETDATA.options.current.units); // the units of the chart dimensions + this.units_current = this.units; + this.units_common = NETDATA.dataAttribute(this.element, 'common-units', null); + + // additional options to pass to netdata + this.append_options = NETDATA.encodeURIComponent(NETDATA.dataAttribute(this.element, 'append-options', null)); + + // override options to pass to netdata + this.override_options = NETDATA.encodeURIComponent(NETDATA.dataAttribute(this.element, 'override-options', null)); + + this.debug = NETDATA.dataAttributeBoolean(this.element, 'debug', false); + + this.value_decimal_detail = -1; + let d = NETDATA.dataAttribute(this.element, 'decimal-digits', -1); + if (typeof d === 'number') { + this.value_decimal_detail = d; + } else if (typeof d !== 'undefined') { + this.log('ignoring decimal-digits value: ' + d.toString()); + } + + // if we need to report the rendering speed + // find the element that needs to be updated + let refresh_dt_element_name = NETDATA.dataAttribute(this.element, 'dt-element-name', null); // string - the element to print refresh_dt_ms + + if (refresh_dt_element_name !== null) { + this.refresh_dt_element = document.getElementById(refresh_dt_element_name) || null; + } + else { + this.refresh_dt_element = null; + } + + this.dimensions_visibility = new dimensionsVisibility(that); + + this.netdata_first = 0; // milliseconds - the first timestamp in netdata + this.netdata_last = 0; // milliseconds - the last timestamp in netdata + this.requested_after = null; // milliseconds - the timestamp of the request after param + this.requested_before = null; // milliseconds - the timestamp of the request before param + this.requested_padding = null; + this.view_after = 0; + this.view_before = 0; + + this.refresh_dt_ms = 0; // milliseconds - the time the last refresh took + + // how many retries we have made to load chart data from the server + this.retries_on_data_failures = 0; + + // color management + this.colors = null; + this.colors_assigned = null; + this.colors_available = null; + this.colors_custom = null; + + this.element_message = null; // the element already created by the user + this.element_chart = null; // the element with the chart + this.element_legend = null; // the element with the legend of the chart (if created by us) + this.element_legend_childs = { + content: null, + hidden: null, + title_date: null, + title_time: null, + title_units: null, + perfect_scroller: null, // the container to apply perfect scroller to + series: null + }; + + this.chart_url = null; // string - the url to download chart info + this.chart = null; // object - the chart as downloaded from the server + + const getForeignElementById = (opt) => { + let id = NETDATA.dataAttribute(this.element, opt, null); + if (id === null) { + //this.log('option "' + opt + '" is undefined'); + return null; + } + + let el = document.getElementById(id); + if (typeof el === 'undefined') { + this.log('cannot find an element with name "' + id.toString() + '"'); + return null; + } + + return el; + }; + + this.foreignElementBefore = getForeignElementById('show-before-at'); + this.foreignElementAfter = getForeignElementById('show-after-at'); + this.foreignElementDuration = getForeignElementById('show-duration-at'); + this.foreignElementUpdateEvery = getForeignElementById('show-update-every-at'); + this.foreignElementSelection = getForeignElementById('show-selection-at'); + }; + + const destroyDOM = () => { + if (!this.enabled) { + return; + } + + if (this.debug) { + this.log('destroyDOM()'); + } + + // this.element.className = 'netdata-message icon'; + // this.element.innerHTML = ' netdata'; + this.element.innerHTML = ''; + this.element_message = null; + this.element_legend = null; + this.element_chart = null; + this.element_legend_childs.series = null; + + this.chart_created = false; + this.dom_created = false; + + this.tm.last_resized = 0; + this.tm.last_dom_created = 0; + }; + + const maxMessageFontSize = () => { + let screenHeight = screen.height; + let el = this.element; + + // normally we want a font size, as tall as the element + let h = el.clientHeight; + + // but give it some air, 20% let's say, or 5 pixels min + let lost = Math.max(h * 0.2, 5); + h -= lost; + + // center the text, vertically + let paddingTop = (lost - 5) / 2; + + // but check the width too + // it should fit 10 characters in it + let w = el.clientWidth / 10; + if (h > w) { + paddingTop += (h - w) / 2; + h = w; + } + + // and don't make it too huge + // 5% of the screen size is good + if (h > screenHeight / 20) { + paddingTop += (h - (screenHeight / 20)) / 2; + h = screenHeight / 20; + } + + // set it + this.element_message.style.fontSize = h.toString() + 'px'; + this.element_message.style.paddingTop = paddingTop.toString() + 'px'; + }; + + const showMessageIcon = (icon) => { + this.element_message.innerHTML = icon; + maxMessageFontSize(); + $(this.element_message).removeClass('hidden'); + this.tmp.___messageHidden___ = undefined; + }; + + const showLoading = () => { + if (!this.chart_created) { + showMessageIcon(NETDATA.icons.loading + ' netdata'); + return true; + } + return false; + }; + + let createDOM = () => { + if (!this.enabled) { + return; + } + lateInitialization(); + + destroyDOM(); + + if (this.debug) { + this.log('createDOM()'); + } + + this.element_message = document.createElement('div'); + this.element_message.className = 'netdata-message icon hidden'; + this.element.appendChild(this.element_message); + + this.dom_created = true; + this.chart_created = false; + + this.tm.last_dom_created = this.tm.last_resized = Date.now(); + + showLoading(); + }; + + const initDOM = () => { + this.element.className = this.library.container_class(that); + + if (typeof(this.width) === 'string') { + this.element.style.width = this.width; + } else if (typeof(this.width) === 'number') { + this.element.style.width = this.width.toString() + 'px'; + } + + if (typeof(this.library.aspect_ratio) === 'undefined') { + if (typeof(this.height) === 'string') { + this.element.style.height = this.height; + } else if (typeof(this.height) === 'number') { + this.element.style.height = this.height.toString() + 'px'; + } + } + + if (NETDATA.chartDefaults.min_width !== null) { + this.element.style.min_width = NETDATA.chartDefaults.min_width; + } + }; + + const invisibleSearchableText = () => { + return '' + this.id + ''; + }; + + /* init() private + * initialize state variables + * destroy all (possibly) created state elements + * create the basic DOM for a chart + */ + const init = (opt) => { + if (!this.enabled) { + return; + } + + runtimeInit(); + this.element.innerHTML = invisibleSearchableText(); + + this.tm.last_initialized = Date.now(); + this.setMode('auto'); + + if (opt !== 'fast') { + if (this.isVisible(true) || opt === 'force') { + createDOM(); + } + } + }; + + const hideMessage = () => { + if (typeof this.tmp.___messageHidden___ === 'undefined') { + this.tmp.___messageHidden___ = true; + $(this.element_message).addClass('hidden'); + } + }; + + const showRendering = () => { + let icon; + if (this.chart !== null) { + if (this.chart.chart_type === 'line') { + icon = NETDATA.icons.lineChart; + } else { + icon = NETDATA.icons.areaChart; + } + } + else { + icon = NETDATA.icons.noChart; + } + + showMessageIcon(icon + ' netdata' + invisibleSearchableText()); + }; + + const isHidden = () => { + return (typeof this.tmp.___chartIsHidden___ !== 'undefined'); + }; + + // hide the chart, when it is not visible - called from isVisible() + this.hideChart = function () { + // hide it, if it is not already hidden + if (isHidden()) { + return; + } + + if (this.chart_created) { + if (NETDATA.options.current.show_help) { + if (this.element_legend_childs.toolbox !== null) { + if (this.debug) { + this.log('hideChart(): hiding legend popovers'); + } + + $(this.element_legend_childs.toolbox_left).popover('hide'); + $(this.element_legend_childs.toolbox_reset).popover('hide'); + $(this.element_legend_childs.toolbox_right).popover('hide'); + $(this.element_legend_childs.toolbox_zoomin).popover('hide'); + $(this.element_legend_childs.toolbox_zoomout).popover('hide'); + } + + if (this.element_legend_childs.resize_handler !== null) { + $(this.element_legend_childs.resize_handler).popover('hide'); + } + + if (this.element_legend_childs.content !== null) { + $(this.element_legend_childs.content).popover('hide'); + } + } + + if (NETDATA.options.current.destroy_on_hide) { + if (this.debug) { + this.log('hideChart(): initializing chart'); + } + + // we should destroy it + init('force'); + } else { + if (this.debug) { + this.log('hideChart(): hiding chart'); + } + + showRendering(); + this.element_chart.style.display = 'none'; + this.element.style.willChange = 'auto'; + if (this.element_legend !== null) { + this.element_legend.style.display = 'none'; + } + if (this.element_legend_childs.toolbox !== null) { + this.element_legend_childs.toolbox.style.display = 'none'; + } + if (this.element_legend_childs.resize_handler !== null) { + this.element_legend_childs.resize_handler.style.display = 'none'; + } + + this.tm.last_hidden = Date.now(); + + // de-allocate data + // This works, but I not sure there are no corner cases somewhere + // so it is commented - if the user has memory issues he can + // set Destroy on Hide for all charts + // this.data = null; + } + } + + this.tmp.___chartIsHidden___ = true; + }; + + // unhide the chart, when it is visible - called from isVisible() + this.unhideChart = function () { + if (!isHidden()) { + return; + } + + this.tmp.___chartIsHidden___ = undefined; + this.updates_since_last_unhide = 0; + + if (!this.chart_created) { + if (this.debug) { + this.log('unhideChart(): initializing chart'); + } + + // we need to re-initialize it, to show our background + // logo in bootstrap tabs, until the chart loads + init('force'); + } else { + if (this.debug) { + this.log('unhideChart(): unhiding chart'); + } + + this.element.style.willChange = 'transform'; + this.tm.last_unhidden = Date.now(); + this.element_chart.style.display = ''; + if (this.element_legend !== null) { + this.element_legend.style.display = ''; + } + if (this.element_legend_childs.toolbox !== null) { + this.element_legend_childs.toolbox.style.display = ''; + } + if (this.element_legend_childs.resize_handler !== null) { + this.element_legend_childs.resize_handler.style.display = ''; + } + resizeChart(); + hideMessage(); + } + + if (this.__redraw_on_unhide) { + if (this.debug) { + this.log("redrawing chart on unhide"); + } + + this.__redraw_on_unhide = undefined; + this.redrawChart(); + } + }; + + const canBeRendered = (uncached_visibility) => { + if (this.debug) { + this.log('canBeRendered() called'); + } + + if (!NETDATA.options.current.update_only_visible) { + return true; + } + + let ret = ( + ( + NETDATA.options.page_is_visible || + NETDATA.options.current.stop_updates_when_focus_is_lost === false || + this.updates_since_last_unhide === 0 + ) + && isHidden() === false && this.isVisible(uncached_visibility) + ); + + if (this.debug) { + this.log('canBeRendered(): ' + ret); + } + + return ret; + }; + + // https://github.com/petkaantonov/bluebird/wiki/Optimization-killers + const callChartLibraryUpdateSafely = (data) => { + let status; + + // we should not do this here + // if we prevent rendering the chart then: + // 1. globalSelectionSync will be wrong + // 2. globalPanAndZoom will be wrong + //if (canBeRendered(true) === false) + // return false; + + if (NETDATA.options.fake_chart_rendering) { + return true; + } + + this.updates_counter++; + this.updates_since_last_unhide++; + this.updates_since_last_creation++; + + if (NETDATA.options.debug.chart_errors) { + status = this.library.update(that, data); + } else { + try { + status = this.library.update(that, data); + } catch (err) { + status = false; + } + } + + if (!status) { + error('chart failed to be updated as ' + this.library_name); + return false; + } + + return true; + }; + + // https://github.com/petkaantonov/bluebird/wiki/Optimization-killers + const callChartLibraryCreateSafely = (data) => { + let status; + + // we should not do this here + // if we prevent rendering the chart then: + // 1. globalSelectionSync will be wrong + // 2. globalPanAndZoom will be wrong + //if (canBeRendered(true) === false) + // return false; + + if (NETDATA.options.fake_chart_rendering) { + return true; + } + + this.updates_counter++; + this.updates_since_last_unhide++; + this.updates_since_last_creation++; + + if (NETDATA.options.debug.chart_errors) { + status = this.library.create(that, data); + } else { + try { + status = this.library.create(that, data); + } catch (err) { + status = false; + } + } + + if (!status) { + error('chart failed to be created as ' + this.library_name); + return false; + } + + this.chart_created = true; + this.updates_since_last_creation = 0; + return true; + }; + + // ---------------------------------------------------------------------------------------------------------------- + // Chart Resize + + // resizeChart() - private + // to be called just before the chart library to make sure that + // a properly sized dom is available + const resizeChart = () => { + if (this.tm.last_resized < NETDATA.options.last_page_resize) { + if (!this.chart_created) { + return; + } + + if (this.needsRecreation()) { + if (this.debug) { + this.log('resizeChart(): initializing chart'); + } + + init('force'); + } else if (typeof this.library.resize === 'function') { + if (this.debug) { + this.log('resizeChart(): resizing chart'); + } + + this.library.resize(that); + + if (this.element_legend_childs.perfect_scroller !== null) { + Ps.update(this.element_legend_childs.perfect_scroller); + } + + maxMessageFontSize(); + } + + this.tm.last_resized = Date.now(); + } + }; + + // this is the actual chart resize algorithm + // it will: + // - resize the entire container + // - update the internal states + // - resize the chart as the div changes height + // - update the scrollbar of the legend + const resizeChartToHeight = (h) => { + // console.log(h); + this.element.style.height = h; + + if (this.settings_id !== null) { + NETDATA.localStorageSet('chart_heights.' + this.settings_id, h); + } + + let now = Date.now(); + NETDATA.options.last_page_scroll = now; + NETDATA.options.auto_refresher_stop_until = now + NETDATA.options.current.stop_updates_while_resizing; + + // force a resize + this.tm.last_resized = 0; + resizeChart(); + }; + + this.resizeForPrint = function () { + if (typeof this.element_legend_childs !== 'undefined' && this.element_legend_childs.perfect_scroller !== null) { + let current = this.element.clientHeight; + let optimal = current + + this.element_legend_childs.perfect_scroller.scrollHeight + - this.element_legend_childs.perfect_scroller.clientHeight; + + if (optimal > current) { + // this.log('resized'); + this.element.style.height = optimal + 'px'; + this.library.resize(this); + } + } + }; + + this.resizeHandler = function (e) { + e.preventDefault(); + + if (typeof this.event_resize === 'undefined' + || this.event_resize.chart_original_w === 'undefined' + || this.event_resize.chart_original_h === 'undefined') { + this.event_resize = { + chart_original_w: this.element.clientWidth, + chart_original_h: this.element.clientHeight, + last: 0 + }; + } + + if (e.type === 'touchstart') { + this.event_resize.mouse_start_x = e.touches.item(0).pageX; + this.event_resize.mouse_start_y = e.touches.item(0).pageY; + } else { + this.event_resize.mouse_start_x = e.clientX; + this.event_resize.mouse_start_y = e.clientY; + } + + this.event_resize.chart_start_w = this.element.clientWidth; + this.event_resize.chart_start_h = this.element.clientHeight; + this.event_resize.chart_last_w = this.element.clientWidth; + this.event_resize.chart_last_h = this.element.clientHeight; + + let now = Date.now(); + if (now - this.event_resize.last <= NETDATA.options.current.double_click_speed && this.element_legend_childs.perfect_scroller !== null) { + // double click / double tap event + + // console.dir(this.element_legend_childs.content); + // console.dir(this.element_legend_childs.perfect_scroller); + + // the optimal height of the chart + // showing the entire legend + let optimal = this.event_resize.chart_last_h + + this.element_legend_childs.perfect_scroller.scrollHeight + - this.element_legend_childs.perfect_scroller.clientHeight; + + // if we are not optimal, be optimal + if (this.event_resize.chart_last_h !== optimal) { + // this.log('resize to optimal, current = ' + this.event_resize.chart_last_h.toString() + 'px, original = ' + this.event_resize.chart_original_h.toString() + 'px, optimal = ' + optimal.toString() + 'px, internal = ' + this.height_original.toString()); + resizeChartToHeight(optimal.toString() + 'px'); + } + + // else if the current height is not the original/saved height + // reset to the original/saved height + else if (this.event_resize.chart_last_h !== this.event_resize.chart_original_h) { + // this.log('resize to original, current = ' + this.event_resize.chart_last_h.toString() + 'px, original = ' + this.event_resize.chart_original_h.toString() + 'px, optimal = ' + optimal.toString() + 'px, internal = ' + this.height_original.toString()); + resizeChartToHeight(this.event_resize.chart_original_h.toString() + 'px'); + } + + // else if the current height is not the internal default height + // reset to the internal default height + else if ((this.event_resize.chart_last_h.toString() + 'px') !== this.height_original) { + // this.log('resize to internal default, current = ' + this.event_resize.chart_last_h.toString() + 'px, original = ' + this.event_resize.chart_original_h.toString() + 'px, optimal = ' + optimal.toString() + 'px, internal = ' + this.height_original.toString()); + resizeChartToHeight(this.height_original.toString()); + } + + // else if the current height is not the firstchild's clientheight + // resize to it + else if (typeof this.element_legend_childs.perfect_scroller.firstChild !== 'undefined') { + let parent_rect = this.element.getBoundingClientRect(); + let content_rect = this.element_legend_childs.perfect_scroller.firstElementChild.getBoundingClientRect(); + let wanted = content_rect.top - parent_rect.top + this.element_legend_childs.perfect_scroller.firstChild.clientHeight + 18; // 15 = toolbox + 3 space + + // console.log(parent_rect); + // console.log(content_rect); + // console.log(wanted); + + // this.log('resize to firstChild, current = ' + this.event_resize.chart_last_h.toString() + 'px, original = ' + this.event_resize.chart_original_h.toString() + 'px, optimal = ' + optimal.toString() + 'px, internal = ' + this.height_original.toString() + 'px, firstChild = ' + wanted.toString() + 'px' ); + if (this.event_resize.chart_last_h !== wanted) { + resizeChartToHeight(wanted.toString() + 'px'); + } + } + } else { + this.event_resize.last = now; + + // process movement event + document.onmousemove = + document.ontouchmove = + this.element_legend_childs.resize_handler.onmousemove = + this.element_legend_childs.resize_handler.ontouchmove = + function (e) { + let y = null; + + switch (e.type) { + case 'mousemove': + y = e.clientY; + break; + case 'touchmove': + y = e.touches.item(e.touches - 1).pageY; + break; + } + + if (y !== null) { + let newH = that.event_resize.chart_start_h + y - that.event_resize.mouse_start_y; + + if (newH >= 70 && newH !== that.event_resize.chart_last_h) { + resizeChartToHeight(newH.toString() + 'px'); + that.event_resize.chart_last_h = newH; + } + } + }; + + // process end event + document.onmouseup = + document.ontouchend = + this.element_legend_childs.resize_handler.onmouseup = + this.element_legend_childs.resize_handler.ontouchend = + function (e) { + void(e); + + // remove all the hooks + document.onmouseup = + document.onmousemove = + document.ontouchmove = + document.ontouchend = + that.element_legend_childs.resize_handler.onmousemove = + that.element_legend_childs.resize_handler.ontouchmove = + that.element_legend_childs.resize_handler.onmouseout = + that.element_legend_childs.resize_handler.onmouseup = + that.element_legend_childs.resize_handler.ontouchend = + null; + + // allow auto-refreshes + NETDATA.options.auto_refresher_stop_until = 0; + }; + } + }; + + const noDataToShow = () => { + showMessageIcon(NETDATA.icons.noData + ' empty'); + this.legendUpdateDOM(); + this.tm.last_autorefreshed = Date.now(); + // this.data_update_every = 30 * 1000; + //this.element_chart.style.display = 'none'; + //if (this.element_legend !== null) this.element_legend.style.display = 'none'; + //this.tmp.___chartIsHidden___ = true; + }; + + // ============================================================================================================ + // PUBLIC FUNCTIONS + + this.error = function (msg) { + error(msg); + }; + + this.setMode = function (m) { + if (this.current !== null && this.current.name === m) { + return; + } + + if (m === 'auto') { + this.current = this.auto; + } else if (m === 'pan') { + this.current = this.pan; + } else if (m === 'zoom') { + this.current = this.zoom; + } else { + this.current = this.auto; + } + + this.current.force_update_at = 0; + this.current.force_before_ms = null; + this.current.force_after_ms = null; + + this.tm.last_mode_switch = Date.now(); + }; + + // ---------------------------------------------------------------------------------------------------------------- + // global selection sync for slaves + + // can the chart participate to the global selection sync as a slave? + this.globalSelectionSyncIsEligible = function () { + return ( + this.enabled && + this.library !== null && + typeof this.library.setSelection === 'function' && + this.isVisible() && + this.chart_created + ); + }; + + this.setSelection = function (t) { + if (typeof this.library.setSelection === 'function') { + // this.selected = this.library.setSelection(this, t) === true; + this.selected = this.library.setSelection(this, t); + } else { + this.selected = true; + } + + if (this.selected && this.debug) { + this.log('selection set to ' + t.toString()); + } + + if (this.foreignElementSelection !== null) { + this.foreignElementSelection.innerText = NETDATA.dateTime.localeDateString(t) + ' ' + NETDATA.dateTime.localeTimeString(t); + } + + return this.selected; + }; + + this.clearSelection = function () { + if (this.selected) { + if (typeof this.library.clearSelection === 'function') { + this.selected = (this.library.clearSelection(this) !== true); + } else { + this.selected = false; + } + + if (this.selected === false && this.debug) { + this.log('selection cleared'); + } + + if (this.foreignElementSelection !== null) { + this.foreignElementSelection.innerText = ''; + } + + this.legendReset(); + } + + return this.selected; + }; + + // ---------------------------------------------------------------------------------------------------------------- + + // find if a timestamp (ms) is shown in the current chart + this.timeIsVisible = function (t) { + return (t >= this.data_after && t <= this.data_before); + }; + + this.calculateRowForTime = function (t) { + if (!this.timeIsVisible(t)) { + return -1; + } + return Math.floor((t - this.data_after) / this.data_update_every); + }; + + // ---------------------------------------------------------------------------------------------------------------- + + this.pauseChart = function () { + if (!this.paused) { + if (this.debug) { + this.log('pauseChart()'); + } + + this.paused = true; + } + }; + + this.unpauseChart = function () { + if (this.paused) { + if (this.debug) { + this.log('unpauseChart()'); + } + + this.paused = false; + } + }; + + this.resetChart = function (dontClearMaster, dontUpdate) { + if (this.debug) { + this.log('resetChart(' + dontClearMaster + ', ' + dontUpdate + ') called'); + } + + if (typeof dontClearMaster === 'undefined') { + dontClearMaster = false; + } + + if (typeof dontUpdate === 'undefined') { + dontUpdate = false; + } + + if (dontClearMaster !== true && NETDATA.globalPanAndZoom.isMaster(this)) { + if (this.debug) { + this.log('resetChart() diverting to clearMaster().'); + } + // this will call us back with master === true + NETDATA.globalPanAndZoom.clearMaster(); + return; + } + + this.clearSelection(); + + this.tm.pan_and_zoom_seq = 0; + + this.setMode('auto'); + this.current.force_update_at = 0; + this.current.force_before_ms = null; + this.current.force_after_ms = null; + this.tm.last_autorefreshed = 0; + this.paused = false; + this.selected = false; + this.enabled = true; + // this.debug = false; + + // do not update the chart here + // or the chart will flip-flop when it is the master + // of a selection sync and another chart becomes + // the new master + + if (dontUpdate !== true && this.isVisible()) { + this.updateChart(); + } + }; + + this.updateChartPanOrZoom = function (after, before, callback) { + let logme = 'updateChartPanOrZoom(' + after + ', ' + before + '): '; + let ret = true; + + NETDATA.globalPanAndZoom.delay(); + NETDATA.globalSelectionSync.delay(); + + if (this.debug) { + this.log(logme); + } + + if (before < after) { + if (this.debug) { + this.log(logme + 'flipped parameters, rejecting it.'); + } + return false; + } + + if (typeof this.fixed_min_duration === 'undefined') { + this.fixed_min_duration = Math.round((this.chartWidth() / 30) * this.chart.update_every * 1000); + } + + let min_duration = this.fixed_min_duration; + let current_duration = Math.round(this.view_before - this.view_after); + + // round the numbers + after = Math.round(after); + before = Math.round(before); + + // align them to update_every + // stretching them further away + after -= after % this.data_update_every; + before += this.data_update_every - (before % this.data_update_every); + + // the final wanted duration + let wanted_duration = before - after; + + // to allow panning, accept just a point below our minimum + if ((current_duration - this.data_update_every) < min_duration) { + min_duration = current_duration - this.data_update_every; + } + + // we do it, but we adjust to minimum size and return false + // when the wanted size is below the current and the minimum + // and we zoom + if (wanted_duration < current_duration && wanted_duration < min_duration) { + if (this.debug) { + this.log(logme + 'too small: min_duration: ' + (min_duration / 1000).toString() + ', wanted: ' + (wanted_duration / 1000).toString()); + } + + min_duration = this.fixed_min_duration; + + let dt = (min_duration - wanted_duration) / 2; + before += dt; + after -= dt; + wanted_duration = before - after; + ret = false; + } + + let tolerance = this.data_update_every * 2; + let movement = Math.abs(before - this.view_before); + + if (Math.abs(current_duration - wanted_duration) <= tolerance && movement <= tolerance && ret) { + if (this.debug) { + this.log(logme + 'REJECTING UPDATE: current/min duration: ' + (current_duration / 1000).toString() + '/' + (this.fixed_min_duration / 1000).toString() + ', wanted duration: ' + (wanted_duration / 1000).toString() + ', duration diff: ' + (Math.round(Math.abs(current_duration - wanted_duration) / 1000)).toString() + ', movement: ' + (movement / 1000).toString() + ', tolerance: ' + (tolerance / 1000).toString() + ', returning: ' + false); + } + return false; + } + + if (this.current.name === 'auto') { + this.log(logme + 'caller called me with mode: ' + this.current.name); + this.setMode('pan'); + } + + if (this.debug) { + this.log(logme + 'ACCEPTING UPDATE: current/min duration: ' + (current_duration / 1000).toString() + '/' + (this.fixed_min_duration / 1000).toString() + ', wanted duration: ' + (wanted_duration / 1000).toString() + ', duration diff: ' + (Math.round(Math.abs(current_duration - wanted_duration) / 1000)).toString() + ', movement: ' + (movement / 1000).toString() + ', tolerance: ' + (tolerance / 1000).toString() + ', returning: ' + ret); + } + + this.current.force_update_at = Date.now() + NETDATA.options.current.pan_and_zoom_delay; + this.current.force_after_ms = after; + this.current.force_before_ms = before; + NETDATA.globalPanAndZoom.setMaster(this, after, before); + + if (ret && typeof callback === 'function') { + callback(); + } + + return ret; + }; + + this.updateChartPanOrZoomAsyncTimeOutId = undefined; + this.updateChartPanOrZoomAsync = function (after, before, callback) { + NETDATA.globalPanAndZoom.delay(); + NETDATA.globalSelectionSync.delay(); + + if (!NETDATA.globalPanAndZoom.isMaster(this)) { + this.pauseChart(); + NETDATA.globalPanAndZoom.setMaster(this, after, before); + // NETDATA.globalSelectionSync.stop(); + NETDATA.globalSelectionSync.setMaster(this); + } + + if (this.updateChartPanOrZoomAsyncTimeOutId) { + NETDATA.timeout.clear(this.updateChartPanOrZoomAsyncTimeOutId); + } + + NETDATA.timeout.set(function () { + that.updateChartPanOrZoomAsyncTimeOutId = undefined; + that.updateChartPanOrZoom(after, before, callback); + }, 0); + }; + + let _unitsConversionLastUnits = undefined; + let _unitsConversionLastUnitsDesired = undefined; + let _unitsConversionLastMin = undefined; + let _unitsConversionLastMax = undefined; + let _unitsConversion = function (value) { + return value; + }; + this.unitsConversionSetup = function (min, max) { + if (this.units !== _unitsConversionLastUnits + || this.units_desired !== _unitsConversionLastUnitsDesired + || min !== _unitsConversionLastMin + || max !== _unitsConversionLastMax) { + + _unitsConversionLastUnits = this.units; + _unitsConversionLastUnitsDesired = this.units_desired; + _unitsConversionLastMin = min; + _unitsConversionLastMax = max; + + _unitsConversion = NETDATA.unitsConversion.get(this.uuid, min, max, this.units, this.units_desired, this.units_common, function (units) { + // console.log('switching units from ' + that.units.toString() + ' to ' + units.toString()); + that.units_current = units; + that.legendSetUnitsString(that.units_current); + }); + } + }; + + let _legendFormatValueChartDecimalsLastMin = undefined; + let _legendFormatValueChartDecimalsLastMax = undefined; + let _legendFormatValueChartDecimals = -1; + let _intlNumberFormat = null; + this.legendFormatValueDecimalsFromMinMax = function (min, max) { + if (min === _legendFormatValueChartDecimalsLastMin && max === _legendFormatValueChartDecimalsLastMax) { + return; + } + + this.unitsConversionSetup(min, max); + if (_unitsConversion !== null) { + min = _unitsConversion(min); + max = _unitsConversion(max); + + if (typeof min !== 'number' || typeof max !== 'number') { + return; + } + } + + _legendFormatValueChartDecimalsLastMin = min; + _legendFormatValueChartDecimalsLastMax = max; + + let old = _legendFormatValueChartDecimals; + + if (this.data !== null && this.data.min === this.data.max) + // it is a fixed number, let the visualizer decide based on the value + { + _legendFormatValueChartDecimals = -1; + } else if (this.value_decimal_detail !== -1) + // there is an override + { + _legendFormatValueChartDecimals = this.value_decimal_detail; + } else { + // ok, let's calculate the proper number of decimal points + let delta; + + if (min === max) { + delta = Math.abs(min); + } else { + delta = Math.abs(max - min); + } + + if (delta > 1000) { + _legendFormatValueChartDecimals = 0; + } else if (delta > 10) { + _legendFormatValueChartDecimals = 1; + } else if (delta > 1) { + _legendFormatValueChartDecimals = 2; + } else if (delta > 0.1) { + _legendFormatValueChartDecimals = 2; + } else if (delta > 0.01) { + _legendFormatValueChartDecimals = 4; + } else if (delta > 0.001) { + _legendFormatValueChartDecimals = 5; + } else if (delta > 0.0001) { + _legendFormatValueChartDecimals = 6; + } else { + _legendFormatValueChartDecimals = 7; + } + } + + if (_legendFormatValueChartDecimals !== old) { + if (_legendFormatValueChartDecimals < 0) { + _intlNumberFormat = null; + } else { + _intlNumberFormat = NETDATA.fastNumberFormat.get( + _legendFormatValueChartDecimals, + _legendFormatValueChartDecimals + ); + } + } + }; + + this.legendFormatValue = function (value) { + if (typeof value !== 'number') { + return '-'; + } + + value = _unitsConversion(value); + + if (typeof value !== 'number') { + return value; + } + + if (_intlNumberFormat !== null) { + return _intlNumberFormat.format(value); + } + + let dmin, dmax; + if (this.value_decimal_detail !== -1) { + dmin = dmax = this.value_decimal_detail; + } else { + dmin = 0; + let abs = (value < 0) ? -value : value; + if (abs > 1000) { + dmax = 0; + } else if (abs > 10) { + dmax = 1; + } else if (abs > 1) { + dmax = 2; + } else if (abs > 0.1) { + dmax = 2; + } else if (abs > 0.01) { + dmax = 4; + } else if (abs > 0.001) { + dmax = 5; + } else if (abs > 0.0001) { + dmax = 6; + } else { + dmax = 7; + } + } + + return NETDATA.fastNumberFormat.get(dmin, dmax).format(value); + }; + + this.legendSetLabelValue = function (label, value) { + let series = this.element_legend_childs.series[label]; + if (typeof series === 'undefined') { + return; + } + if (series.value === null && series.user === null) { + return; + } + + /* + // this slows down firefox and edge significantly + // since it requires to use innerHTML(), instead of innerText() + + // if the value has not changed, skip DOM update + //if (series.last === value) return; + + let s, r; + if (typeof value === 'number') { + let v = Math.abs(value); + s = r = this.legendFormatValue(value); + + if (typeof series.last === 'number') { + if (v > series.last) s += ''; + else if (v < series.last) s += ''; + else s += ''; + } + else s += ''; + + series.last = v; + } + else { + if (value === null) + s = r = ''; + else + s = r = value; + + series.last = value; + } + */ + + let s = this.legendFormatValue(value); + + // caching: do not update the update to show the same value again + if (s === series.last_shown_value) { + return; + } + series.last_shown_value = s; + + if (series.value !== null) { + series.value.innerText = s; + } + if (series.user !== null) { + series.user.innerText = s; + } + }; + + this.legendSetDateString = function (date) { + if (this.element_legend_childs.title_date !== null && date !== this.tmp.__last_shown_legend_date) { + this.element_legend_childs.title_date.innerText = date; + this.tmp.__last_shown_legend_date = date; + } + }; + + this.legendSetTimeString = function (time) { + if (this.element_legend_childs.title_time !== null && time !== this.tmp.__last_shown_legend_time) { + this.element_legend_childs.title_time.innerText = time; + this.tmp.__last_shown_legend_time = time; + } + }; + + this.legendSetUnitsString = function (units) { + if (this.element_legend_childs.title_units !== null && units !== this.tmp.__last_shown_legend_units) { + this.element_legend_childs.title_units.innerText = units; + this.tmp.__last_shown_legend_units = units; + } + }; + + this.legendSetDateLast = { + ms: 0, + date: undefined, + time: undefined + }; + + this.legendSetDate = function (ms) { + if (typeof ms !== 'number') { + this.legendShowUndefined(); + return; + } + + if (this.legendSetDateLast.ms !== ms) { + let d = new Date(ms); + this.legendSetDateLast.ms = ms; + this.legendSetDateLast.date = NETDATA.dateTime.localeDateString(d); + this.legendSetDateLast.time = NETDATA.dateTime.localeTimeString(d); + } + + this.legendSetDateString(this.legendSetDateLast.date); + this.legendSetTimeString(this.legendSetDateLast.time); + this.legendSetUnitsString(this.units_current) + }; + + this.legendShowUndefined = function () { + this.legendSetDateString(this.legendPluginModuleString(false)); + this.legendSetTimeString(this.chart.context.toString()); + // this.legendSetUnitsString(' '); + + if (this.data && this.element_legend_childs.series !== null) { + let labels = this.data.dimension_names; + let i = labels.length; + while (i--) { + let label = labels[i]; + + if (typeof label === 'undefined' || typeof this.element_legend_childs.series[label] === 'undefined') { + continue; + } + this.legendSetLabelValue(label, null); + } + } + }; + + this.legendShowLatestValues = function () { + if (this.chart === null) { + return; + } + if (this.selected) { + return; + } + + if (this.data === null || this.element_legend_childs.series === null) { + this.legendShowUndefined(); + return; + } + + let show_undefined = true; + if (Math.abs(this.netdata_last - this.view_before) <= this.data_update_every) { + show_undefined = false; + } + + if (show_undefined) { + this.legendShowUndefined(); + return; + } + + this.legendSetDate(this.view_before); + + let labels = this.data.dimension_names; + let i = labels.length; + while (i--) { + let label = labels[i]; + + if (typeof label === 'undefined') { + continue; + } + if (typeof this.element_legend_childs.series[label] === 'undefined') { + continue; + } + + this.legendSetLabelValue(label, this.data.view_latest_values[i]); + } + }; + + this.legendReset = function () { + this.legendShowLatestValues(); + }; + + // this should be called just ONCE per dimension per chart + this.__chartDimensionColor = function (label) { + let c = NETDATA.commonColors.get(this, label); + + // it is important to maintain a list of colors + // for this chart only, since the chart library + // uses this to assign colors to dimensions in the same + // order the dimension are given to it + this.colors.push(c); + + return c; + }; + + this.chartPrepareColorPalette = function () { + NETDATA.commonColors.refill(this); + }; + + // get the ordered list of chart colors + // this includes user defined colors + this.chartCustomColors = function () { + this.chartPrepareColorPalette(); + + let colors; + if (this.colors_custom.length) { + colors = this.colors_custom; + } else { + colors = this.colors; + } + + if (this.debug) { + this.log("chartCustomColors() returns:"); + this.log(colors); + } + + return colors; + }; + + // get the ordered list of chart ASSIGNED colors + // (this returns only the colors that have been + // assigned to dimensions, prepended with any + // custom colors defined) + this.chartColors = function () { + this.chartPrepareColorPalette(); + + if (this.debug) { + this.log("chartColors() returns:"); + this.log(this.colors); + } + + return this.colors; + }; + + this.legendPluginModuleString = function (withContext) { + let str = ' '; + let context = ''; + + if (typeof this.chart !== 'undefined') { + if (withContext && typeof this.chart.context === 'string') { + context = this.chart.context; + } + + if (typeof this.chart.plugin === 'string' && this.chart.plugin !== '') { + str = this.chart.plugin; + + if (str.endsWith(".plugin")) { + str = str.substring(0, str.length - 7); + } + + if (typeof this.chart.module === 'string' && this.chart.module !== '') { + str += ':' + this.chart.module; + } + + if (withContext && context !== '') { + str += ', ' + context; + } + } + else if (withContext && context !== '') { + str = context; + } + } + + return str; + }; + + this.legendResolutionTooltip = function () { + if (!this.chart) { + return ''; + } + + let collected = this.chart.update_every; + let viewed = (this.data) ? this.data.view_update_every : collected; + + if (collected === viewed) { + return "resolution " + NETDATA.seconds4human(collected); + } + + return "resolution " + NETDATA.seconds4human(viewed) + ", collected every " + NETDATA.seconds4human(collected); + }; + + this.legendUpdateDOM = function () { + let needed = false, dim, keys, len; + + // check that the legend DOM is up to date for the downloaded dimensions + if (typeof this.element_legend_childs.series !== 'object' || this.element_legend_childs.series === null) { + // this.log('the legend does not have any series - requesting legend update'); + needed = true; + } else if (this.data === null) { + // this.log('the chart does not have any data - requesting legend update'); + needed = true; + } else if (typeof this.element_legend_childs.series.labels_key === 'undefined') { + needed = true; + } else { + let labels = this.data.dimension_names.toString(); + if (labels !== this.element_legend_childs.series.labels_key) { + needed = true; + + if (this.debug) { + this.log('NEW LABELS: "' + labels + '" NOT EQUAL OLD LABELS: "' + this.element_legend_childs.series.labels_key + '"'); + } + } + } + + if (!needed) { + // make sure colors available + this.chartPrepareColorPalette(); + + // do we have to update the current values? + // we do this, only when the visible chart is current + if (Math.abs(this.netdata_last - this.view_before) <= this.data_update_every) { + if (this.debug) { + this.log('chart is in latest position... updating values on legend...'); + } + + //let labels = this.data.dimension_names; + //let i = labels.length; + //while (i--) + // this.legendSetLabelValue(labels[i], this.data.view_latest_values[i]); + } + return; + } + + if (this.colors === null) { + // this is the first time we update the chart + // let's assign colors to all dimensions + if (this.library.track_colors()) { + this.colors = []; + keys = Object.keys(this.chart.dimensions); + len = keys.length; + for (let i = 0; i < len; i++) { + NETDATA.commonColors.get(this, this.chart.dimensions[keys[i]].name); + } + } + } + + // we will re-generate the colors for the chart + // based on the dimensions this result has data for + this.colors = []; + + if (this.debug) { + this.log('updating Legend DOM'); + } + + // mark all dimensions as invalid + this.dimensions_visibility.invalidateAll(); + + const genLabel = function (state, parent, dim, name, count) { + let color = state.__chartDimensionColor(name); + + let user_element = null; + let user_id = NETDATA.dataAttribute(state.element, 'show-value-of-' + name.toLowerCase() + '-at', null); + if (user_id === null) { + user_id = NETDATA.dataAttribute(state.element, 'show-value-of-' + dim.toLowerCase() + '-at', null); + } + if (user_id !== null) { + user_element = document.getElementById(user_id) || null; + if (user_element === null) { + state.log('Cannot find element with id: ' + user_id); + } + } + + state.element_legend_childs.series[name] = { + name: document.createElement('span'), + value: document.createElement('span'), + user: user_element, + last: null, + last_shown_value: null + }; + + let label = state.element_legend_childs.series[name]; + + // create the dimension visibility tracking for this label + state.dimensions_visibility.dimensionAdd(name, label.name, label.value, color); + + let rgb = NETDATA.colorHex2Rgb(color); + label.name.innerHTML = '
    '; + + let text = document.createTextNode(' ' + name); + label.name.appendChild(text); + + if (count > 0) { + parent.appendChild(document.createElement('br')); + } + + parent.appendChild(label.name); + parent.appendChild(label.value); + }; + + let content = document.createElement('div'); + + if (this.element_chart === null) { + this.element_chart = document.createElement('div'); + this.element_chart.id = this.library_name + '-' + this.uuid + '-chart'; + this.element.appendChild(this.element_chart); + + if (this.hasLegend()) { + this.element_chart.className = 'netdata-chart-with-legend-right netdata-' + this.library_name + '-chart-with-legend-right'; + } else { + this.element_chart.className = ' netdata-chart netdata-' + this.library_name + '-chart'; + } + } + + if (this.hasLegend()) { + if (this.element_legend === null) { + this.element_legend = document.createElement('div'); + this.element_legend.className = 'netdata-chart-legend netdata-' + this.library_name + '-legend'; + this.element.appendChild(this.element_legend); + } else { + this.element_legend.innerHTML = ''; + } + + this.element_legend_childs = { + content: content, + resize_handler: null, + toolbox: null, + toolbox_left: null, + toolbox_right: null, + toolbox_reset: null, + toolbox_zoomin: null, + toolbox_zoomout: null, + toolbox_volume: null, + title_date: document.createElement('span'), + title_time: document.createElement('span'), + title_units: document.createElement('span'), + perfect_scroller: document.createElement('div'), + series: {} + }; + + if (NETDATA.options.current.legend_toolbox && this.library.toolboxPanAndZoom !== null) { + this.element_legend_childs.toolbox = document.createElement('div'); + this.element_legend_childs.toolbox_left = document.createElement('div'); + this.element_legend_childs.toolbox_right = document.createElement('div'); + this.element_legend_childs.toolbox_reset = document.createElement('div'); + this.element_legend_childs.toolbox_zoomin = document.createElement('div'); + this.element_legend_childs.toolbox_zoomout = document.createElement('div'); + this.element_legend_childs.toolbox_volume = document.createElement('div'); + + const getPanAndZoomStep = function (event) { + if (event.ctrlKey) { + return NETDATA.options.current.pan_and_zoom_factor * NETDATA.options.current.pan_and_zoom_factor_multiplier_control; + } else if (event.shiftKey) { + return NETDATA.options.current.pan_and_zoom_factor * NETDATA.options.current.pan_and_zoom_factor_multiplier_shift; + } else if (event.altKey) { + return NETDATA.options.current.pan_and_zoom_factor * NETDATA.options.current.pan_and_zoom_factor_multiplier_alt; + } else { + return NETDATA.options.current.pan_and_zoom_factor; + } + }; + + this.element_legend_childs.toolbox.className += ' netdata-legend-toolbox'; + this.element.appendChild(this.element_legend_childs.toolbox); + + this.element_legend_childs.toolbox_left.className += ' netdata-legend-toolbox-button'; + this.element_legend_childs.toolbox_left.innerHTML = NETDATA.icons.left; + this.element_legend_childs.toolbox.appendChild(this.element_legend_childs.toolbox_left); + this.element_legend_childs.toolbox_left.onclick = function (e) { + e.preventDefault(); + + let step = (that.view_before - that.view_after) * getPanAndZoomStep(e); + let before = that.view_before - step; + let after = that.view_after - step; + if (after >= that.netdata_first) { + that.library.toolboxPanAndZoom(that, after, before); + } + }; + if (NETDATA.options.current.show_help) { + $(this.element_legend_childs.toolbox_left).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + title: 'Pan Left', + content: 'Pan the chart to the left. You can also drag it with your mouse or your finger (on touch devices).
    Help can be disabled from the settings.' + }); + } + + this.element_legend_childs.toolbox_reset.className += ' netdata-legend-toolbox-button'; + this.element_legend_childs.toolbox_reset.innerHTML = NETDATA.icons.reset; + this.element_legend_childs.toolbox.appendChild(this.element_legend_childs.toolbox_reset); + this.element_legend_childs.toolbox_reset.onclick = function (e) { + e.preventDefault(); + NETDATA.resetAllCharts(that); + }; + if (NETDATA.options.current.show_help) { + $(this.element_legend_childs.toolbox_reset).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + title: 'Chart Reset', + content: 'Reset all the charts to their default auto-refreshing state. You can also double click the chart contents with your mouse or your finger (on touch devices).
    Help can be disabled from the settings.' + }); + } + + this.element_legend_childs.toolbox_right.className += ' netdata-legend-toolbox-button'; + this.element_legend_childs.toolbox_right.innerHTML = NETDATA.icons.right; + this.element_legend_childs.toolbox.appendChild(this.element_legend_childs.toolbox_right); + this.element_legend_childs.toolbox_right.onclick = function (e) { + e.preventDefault(); + let step = (that.view_before - that.view_after) * getPanAndZoomStep(e); + let before = that.view_before + step; + let after = that.view_after + step; + if (before <= that.netdata_last) { + that.library.toolboxPanAndZoom(that, after, before); + } + }; + if (NETDATA.options.current.show_help) { + $(this.element_legend_childs.toolbox_right).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + title: 'Pan Right', + content: 'Pan the chart to the right. You can also drag it with your mouse or your finger (on touch devices).
    Help can be disabled from the settings.' + }); + } + + this.element_legend_childs.toolbox_zoomin.className += ' netdata-legend-toolbox-button'; + this.element_legend_childs.toolbox_zoomin.innerHTML = NETDATA.icons.zoomIn; + this.element_legend_childs.toolbox.appendChild(this.element_legend_childs.toolbox_zoomin); + this.element_legend_childs.toolbox_zoomin.onclick = function (e) { + e.preventDefault(); + let dt = ((that.view_before - that.view_after) * (getPanAndZoomStep(e) * 0.8) / 2); + let before = that.view_before - dt; + let after = that.view_after + dt; + that.library.toolboxPanAndZoom(that, after, before); + }; + if (NETDATA.options.current.show_help) { + $(this.element_legend_childs.toolbox_zoomin).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + title: 'Chart Zoom In', + content: 'Zoom in the chart. You can also press SHIFT and select an area of the chart, or press SHIFT or ALT and use the mouse wheel or 2-finger touchpad scroll to zoom in or out.
    Help can be disabled from the settings.' + }); + } + + this.element_legend_childs.toolbox_zoomout.className += ' netdata-legend-toolbox-button'; + this.element_legend_childs.toolbox_zoomout.innerHTML = NETDATA.icons.zoomOut; + this.element_legend_childs.toolbox.appendChild(this.element_legend_childs.toolbox_zoomout); + this.element_legend_childs.toolbox_zoomout.onclick = function (e) { + e.preventDefault(); + let dt = (((that.view_before - that.view_after) / (1.0 - (getPanAndZoomStep(e) * 0.8)) - (that.view_before - that.view_after)) / 2); + let before = that.view_before + dt; + let after = that.view_after - dt; + + that.library.toolboxPanAndZoom(that, after, before); + }; + if (NETDATA.options.current.show_help) { + $(this.element_legend_childs.toolbox_zoomout).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + title: 'Chart Zoom Out', + content: 'Zoom out the chart. You can also press SHIFT or ALT and use the mouse wheel, or 2-finger touchpad scroll to zoom in or out.
    Help can be disabled from the settings.' + }); + } + + //this.element_legend_childs.toolbox_volume.className += ' netdata-legend-toolbox-button'; + //this.element_legend_childs.toolbox_volume.innerHTML = ''; + //this.element_legend_childs.toolbox_volume.title = 'Visible Volume'; + //this.element_legend_childs.toolbox.appendChild(this.element_legend_childs.toolbox_volume); + //this.element_legend_childs.toolbox_volume.onclick = function(e) { + //e.preventDefault(); + //alert('clicked toolbox_volume on ' + that.id); + //} + } + + if (NETDATA.options.current.resize_charts) { + this.element_legend_childs.resize_handler = document.createElement('div'); + + this.element_legend_childs.resize_handler.className += " netdata-legend-resize-handler"; + this.element_legend_childs.resize_handler.innerHTML = NETDATA.icons.resize; + this.element.appendChild(this.element_legend_childs.resize_handler); + if (NETDATA.options.current.show_help) { + $(this.element_legend_childs.resize_handler).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + title: 'Chart Resize', + content: 'Drag this point with your mouse or your finger (on touch devices), to resize the chart vertically. You can also double click it or double tap it to reset between 2 states: the default and the one that fits all the values.
    Help can be disabled from the settings.' + }); + } + + // mousedown event + this.element_legend_childs.resize_handler.onmousedown = + function (e) { + that.resizeHandler(e); + }; + + // touchstart event + this.element_legend_childs.resize_handler.addEventListener('touchstart', function (e) { + that.resizeHandler(e); + }, false); + } + + if (this.chart) { + this.element_legend_childs.title_date.title = this.legendPluginModuleString(true); + this.element_legend_childs.title_time.title = this.legendResolutionTooltip(); + } + + this.element_legend_childs.title_date.className += " netdata-legend-title-date"; + this.element_legend.appendChild(this.element_legend_childs.title_date); + this.tmp.__last_shown_legend_date = undefined; + + this.element_legend.appendChild(document.createElement('br')); + + this.element_legend_childs.title_time.className += " netdata-legend-title-time"; + this.element_legend.appendChild(this.element_legend_childs.title_time); + this.tmp.__last_shown_legend_time = undefined; + + this.element_legend.appendChild(document.createElement('br')); + + this.element_legend_childs.title_units.className += " netdata-legend-title-units"; + this.element_legend_childs.title_units.innerText = this.units_current; + this.element_legend.appendChild(this.element_legend_childs.title_units); + this.tmp.__last_shown_legend_units = undefined; + + this.element_legend.appendChild(document.createElement('br')); + + this.element_legend_childs.perfect_scroller.className = 'netdata-legend-series'; + this.element_legend.appendChild(this.element_legend_childs.perfect_scroller); + + content.className = 'netdata-legend-series-content'; + this.element_legend_childs.perfect_scroller.appendChild(content); + + this.element_legend_childs.content = content; + + if (NETDATA.options.current.show_help) { + $(content).popover({ + container: "body", + animation: false, + html: true, + trigger: 'hover', + placement: 'bottom', + title: 'Chart Legend', + delay: { + show: NETDATA.options.current.show_help_delay_show_ms, + hide: NETDATA.options.current.show_help_delay_hide_ms + }, + content: 'You can click or tap on the values or the labels to select dimensions. By pressing SHIFT or CONTROL, you can enable or disable multiple dimensions.
    Help can be disabled from the settings.' + }); + } + } else { + this.element_legend_childs = { + content: content, + resize_handler: null, + toolbox: null, + toolbox_left: null, + toolbox_right: null, + toolbox_reset: null, + toolbox_zoomin: null, + toolbox_zoomout: null, + toolbox_volume: null, + title_date: null, + title_time: null, + title_units: null, + perfect_scroller: null, + series: {} + }; + } + + if (this.data) { + this.element_legend_childs.series.labels_key = this.data.dimension_names.toString(); + if (this.debug) { + this.log('labels from data: "' + this.element_legend_childs.series.labels_key + '"'); + } + + for (let i = 0, len = this.data.dimension_names.length; i < len; i++) { + genLabel(this, content, this.data.dimension_ids[i], this.data.dimension_names[i], i); + } + } else { + let tmp = []; + keys = Object.keys(this.chart.dimensions); + for (let i = 0, len = keys.length; i < len; i++) { + dim = keys[i]; + tmp.push(this.chart.dimensions[dim].name); + genLabel(this, content, dim, this.chart.dimensions[dim].name, i); + } + this.element_legend_childs.series.labels_key = tmp.toString(); + if (this.debug) { + this.log('labels from chart: "' + this.element_legend_childs.series.labels_key + '"'); + } + } + + // create a hidden div to be used for hiding + // the original legend of the chart library + let el = document.createElement('div'); + if (this.element_legend !== null) { + this.element_legend.appendChild(el); + } + el.style.display = 'none'; + + this.element_legend_childs.hidden = document.createElement('div'); + el.appendChild(this.element_legend_childs.hidden); + + if (this.element_legend_childs.perfect_scroller !== null) { + Ps.initialize(this.element_legend_childs.perfect_scroller, { + wheelSpeed: 0.2, + wheelPropagation: true, + swipePropagation: true, + minScrollbarLength: null, + maxScrollbarLength: null, + useBothWheelAxes: false, + suppressScrollX: true, + suppressScrollY: false, + scrollXMarginOffset: 0, + scrollYMarginOffset: 0, + theme: 'default' + }); + Ps.update(this.element_legend_childs.perfect_scroller); + } + + this.legendShowLatestValues(); + }; + + this.hasLegend = function () { + if (typeof this.tmp.___hasLegendCache___ !== 'undefined') { + return this.tmp.___hasLegendCache___; + } + + let leg = false; + if (this.library && this.library.legend(this) === 'right-side') { + leg = true; + } + + this.tmp.___hasLegendCache___ = leg; + return leg; + }; + + this.legendWidth = function () { + return (this.hasLegend()) ? 140 : 0; + }; + + this.legendHeight = function () { + return $(this.element).height(); + }; + + this.chartWidth = function () { + return $(this.element).width() - this.legendWidth(); + }; + + this.chartHeight = function () { + return $(this.element).height(); + }; + + this.chartPixelsPerPoint = function () { + // force an options provided detail + let px = this.pixels_per_point; + + if (this.library && px < this.library.pixels_per_point(this)) { + px = this.library.pixels_per_point(this); + } + + if (px < NETDATA.options.current.pixels_per_point) { + px = NETDATA.options.current.pixels_per_point; + } + + return px; + }; + + this.needsRecreation = function () { + let ret = ( + this.chart_created && + this.library && + this.library.autoresize() === false && + this.tm.last_resized < NETDATA.options.last_page_resize + ); + + if (this.debug) { + this.log('needsRecreation(): ' + ret.toString() + ', chart_created = ' + this.chart_created.toString()); + } + + return ret; + }; + + this.chartDataUniqueID = function () { + return this.id + ',' + this.library_name + ',' + this.dimensions + ',' + this.chartURLOptions(true); + }; + + this.chartURLOptions = function (isForUniqueId) { + let ret = ''; + + if (this.override_options !== null) { + ret = this.override_options.toString(); + } else { + ret = this.library.options(this); + } + + if (this.append_options !== null) { + ret += '%7C' + this.append_options.toString(); + } + + ret += '%7C' + 'jsonwrap'; + + // always add `nonzero` when it's used to create a chartDataUniqueID + // we cannot just remove `nonzero` because of backwards compatibility with old snapshots + if (isForUniqueId || NETDATA.options.current.eliminate_zero_dimensions) { + ret += '%7C' + 'nonzero'; + } + + return ret; + }; + + this.chartURL = function () { + let after, before, points_multiplier = 1; + if (NETDATA.globalPanAndZoom.isActive()) { + if (this.current.force_before_ms !== null && this.current.force_after_ms !== null) { + this.tm.pan_and_zoom_seq = 0; + + before = Math.round(this.current.force_before_ms / 1000); + after = Math.round(this.current.force_after_ms / 1000); + this.view_after = after * 1000; + this.view_before = before * 1000; + + if (NETDATA.options.current.pan_and_zoom_data_padding) { + this.requested_padding = Math.round((before - after) / 2); + after -= this.requested_padding; + before += this.requested_padding; + this.requested_padding *= 1000; + points_multiplier = 2; + } + + this.current.force_before_ms = null; + this.current.force_after_ms = null; + } else { + this.tm.pan_and_zoom_seq = NETDATA.globalPanAndZoom.seq; + + after = Math.round(NETDATA.globalPanAndZoom.force_after_ms / 1000); + before = Math.round(NETDATA.globalPanAndZoom.force_before_ms / 1000); + this.view_after = after * 1000; + this.view_before = before * 1000; + + this.requested_padding = null; + points_multiplier = 1; + } + } else { + this.tm.pan_and_zoom_seq = 0; + + before = this.before; + after = this.after; + this.view_after = after * 1000; + this.view_before = before * 1000; + + this.requested_padding = null; + points_multiplier = 1; + } + + this.requested_after = after * 1000; + this.requested_before = before * 1000; + + let data_points; + if (NETDATA.options.force_data_points !== 0) { + data_points = NETDATA.options.force_data_points; + this.data_points = data_points; + } else { + this.data_points = this.points || Math.round(this.chartWidth() / this.chartPixelsPerPoint()); + data_points = this.data_points * points_multiplier; + } + + // build the data URL + this.data_url = this.host + this.chart.data_url; + this.data_url += "&format=" + this.library.format(); + this.data_url += "&points=" + (data_points).toString(); + this.data_url += "&group=" + this.method; + this.data_url += ">ime=" + this.gtime; + this.data_url += "&options=" + this.chartURLOptions(); + + if (after) { + this.data_url += "&after=" + after.toString(); + } + + if (before) { + this.data_url += "&before=" + before.toString(); + } + + if (this.dimensions) { + this.data_url += "&dimensions=" + this.dimensions; + } + if (NETDATA.options.debug.chart_data_url || this.debug) { + this.log('chartURL(): ' + this.data_url + ' WxH:' + this.chartWidth() + 'x' + this.chartHeight() + ' points: ' + data_points.toString() + ' library: ' + this.library_name); + } + }; + + this.redrawChart = function () { + if (this.data !== null) { + this.updateChartWithData(this.data); + } + }; + + this.updateChartWithData = function (data) { + if (this.debug) { + this.log('updateChartWithData() called.'); + } + + // this may force the chart to be re-created + resizeChart(); + + this.data = data; + + let started = Date.now(); + let view_update_every = data.view_update_every * 1000; + + if (this.data_update_every !== view_update_every) { + if (this.element_legend_childs.title_time) { + this.element_legend_childs.title_time.title = this.legendResolutionTooltip(); + } + } + + // if the result is JSON, find the latest update-every + this.data_update_every = view_update_every; + this.data_after = data.after * 1000; + this.data_before = data.before * 1000; + this.netdata_first = data.first_entry * 1000; + this.netdata_last = data.last_entry * 1000; + this.data_points = data.points; + + data.state = this; + + if (NETDATA.options.current.pan_and_zoom_data_padding && this.requested_padding !== null) { + if (this.view_after < this.data_after) { + // console.log('adjusting view_after from ' + this.view_after + ' to ' + this.data_after); + this.view_after = this.data_after; + } + + if (this.view_before > this.data_before) { + // console.log('adjusting view_before from ' + this.view_before + ' to ' + this.data_before); + this.view_before = this.data_before; + } + } else { + this.view_after = this.data_after; + this.view_before = this.data_before; + } + + if (this.debug) { + this.log('UPDATE No ' + this.updates_counter + ' COMPLETED'); + + if (this.current.force_after_ms) { + this.log('STATUS: forced : ' + (this.current.force_after_ms / 1000).toString() + ' - ' + (this.current.force_before_ms / 1000).toString()); + } else { + this.log('STATUS: forced : unset'); + } + + this.log('STATUS: requested : ' + (this.requested_after / 1000).toString() + ' - ' + (this.requested_before / 1000).toString()); + this.log('STATUS: downloaded: ' + (this.data_after / 1000).toString() + ' - ' + (this.data_before / 1000).toString()); + this.log('STATUS: rendered : ' + (this.view_after / 1000).toString() + ' - ' + (this.view_before / 1000).toString()); + this.log('STATUS: points : ' + (this.data_points).toString()); + } + + if (this.data_points === 0) { + noDataToShow(); + return; + } + + if (this.updates_since_last_creation >= this.library.max_updates_to_recreate()) { + if (this.debug) { + this.log('max updates of ' + this.updates_since_last_creation.toString() + ' reached. Forcing re-generation.'); + } + + init('force'); + return; + } + + // check and update the legend + this.legendUpdateDOM(); + + if (this.chart_created && typeof this.library.update === 'function') { + if (this.debug) { + this.log('updating chart...'); + } + + if (!callChartLibraryUpdateSafely(data)) { + return; + } + } else { + if (this.debug) { + this.log('creating chart...'); + } + + if (!callChartLibraryCreateSafely(data)) { + return; + } + } + + if (this.isVisible()) { + hideMessage(); + this.legendShowLatestValues(); + } else { + this.__redraw_on_unhide = true; + + if (this.debug) { + this.log("drawn while not visible"); + } + } + + if (this.selected) { + NETDATA.globalSelectionSync.stop(); + } + + // update the performance counters + let now = Date.now(); + this.tm.last_updated = now; + + // don't update last_autorefreshed if this chart is + // forced to be updated with global PanAndZoom + if (NETDATA.globalPanAndZoom.isActive()) { + this.tm.last_autorefreshed = 0; + } else { + if (NETDATA.options.current.parallel_refresher && NETDATA.options.current.concurrent_refreshes && typeof this.force_update_every !== 'number') { + this.tm.last_autorefreshed = now - (now % this.data_update_every); + } else { + this.tm.last_autorefreshed = now; + } + } + + this.refresh_dt_ms = now - started; + NETDATA.options.auto_refresher_fast_weight += this.refresh_dt_ms; + + if (this.refresh_dt_element !== null) { + this.refresh_dt_element.innerText = this.refresh_dt_ms.toString(); + } + + if (this.foreignElementBefore !== null) { + this.foreignElementBefore.innerText = NETDATA.dateTime.localeDateString(this.view_before) + ' ' + NETDATA.dateTime.localeTimeString(this.view_before); + } + + if (this.foreignElementAfter !== null) { + this.foreignElementAfter.innerText = NETDATA.dateTime.localeDateString(this.view_after) + ' ' + NETDATA.dateTime.localeTimeString(this.view_after); + } + + if (this.foreignElementDuration !== null) { + this.foreignElementDuration.innerText = NETDATA.seconds4human(Math.floor((this.view_before - this.view_after) / 1000) + 1); + } + + if (this.foreignElementUpdateEvery !== null) { + this.foreignElementUpdateEvery.innerText = NETDATA.seconds4human(Math.floor(this.data_update_every / 1000)); + } + }; + + this.getSnapshotData = function (key) { + if (this.debug) { + this.log('updating from snapshot: ' + key); + } + + if (typeof netdataSnapshotData.data[key] === 'undefined') { + this.log('snapshot does not include data for key "' + key + '"'); + return null; + } + + if (typeof netdataSnapshotData.data[key] !== 'string') { + this.log('snapshot data for key "' + key + '" is not string'); + return null; + } + + let uncompressed; + try { + uncompressed = netdataSnapshotData.uncompress(netdataSnapshotData.data[key]); + + if (uncompressed === null) { + this.log('uncompressed snapshot data for key ' + key + ' is null'); + return null; + } + + if (typeof uncompressed === 'undefined') { + this.log('uncompressed snapshot data for key ' + key + ' is undefined'); + return null; + } + } catch (e) { + this.log('decompression of snapshot data for key ' + key + ' failed'); + console.log(e); + uncompressed = null; + } + + if (typeof uncompressed !== 'string') { + this.log('uncompressed snapshot data for key ' + key + ' is not string'); + return null; + } + + let data; + try { + data = JSON.parse(uncompressed); + } catch (e) { + this.log('parsing snapshot data for key ' + key + ' failed'); + console.log(e); + data = null; + } + + return data; + }; + + this.updateChart = function (callback) { + if (this.debug) { + this.log('updateChart()'); + } + + if (this.fetching_data) { + if (this.debug) { + this.log('updateChart(): I am already updating...'); + } + + if (typeof callback === 'function') { + return callback(false, 'already running'); + } + + return; + } + + // due to late initialization of charts and libraries + // we need to check this too + if (!this.enabled) { + if (this.debug) { + this.log('updateChart(): I am not enabled'); + } + + if (typeof callback === 'function') { + return callback(false, 'not enabled'); + } + + return; + } + + if (!canBeRendered()) { + if (this.debug) { + this.log('updateChart(): cannot be rendered'); + } + + if (typeof callback === 'function') { + return callback(false, 'cannot be rendered'); + } + + return; + } + + if (that.dom_created !== true) { + if (this.debug) { + this.log('updateChart(): creating DOM'); + } + + createDOM(); + } + + if (this.chart === null) { + if (this.debug) { + this.log('updateChart(): getting chart'); + } + + return this.getChart(function () { + return that.updateChart(callback); + }); + } + + if (!this.library.initialized) { + if (this.library.enabled) { + if (this.debug) { + this.log('updateChart(): initializing chart library'); + } + + return this.library.initialize(function () { + return that.updateChart(callback); + }); + } else { + error('chart library "' + this.library_name + '" is not available.'); + + if (typeof callback === 'function') { + return callback(false, 'library not available'); + } + + return; + } + } + + this.clearSelection(); + this.chartURL(); + + NETDATA.statistics.refreshes_total++; + NETDATA.statistics.refreshes_active++; + + if (NETDATA.statistics.refreshes_active > NETDATA.statistics.refreshes_active_max) { + NETDATA.statistics.refreshes_active_max = NETDATA.statistics.refreshes_active; + } + + let ok = false; + this.fetching_data = true; + + if (netdataSnapshotData !== null) { + let key = this.chartDataUniqueID(); + let data = this.getSnapshotData(key); + if (data !== null) { + ok = true; + data = NETDATA.xss.checkData('/api/v1/data', data, this.library.xssRegexIgnore); + this.updateChartWithData(data); + } else { + ok = false; + error('cannot get data from snapshot for key: "' + key + '"'); + that.tm.last_autorefreshed = Date.now(); + } + + NETDATA.statistics.refreshes_active--; + this.fetching_data = false; + + if (typeof callback === 'function') { + callback(ok, 'snapshot'); + } + + return; + } + + if (this.debug) { + this.log('updating from ' + this.data_url); + } + + this.xhr = $.ajax({ + url: this.data_url, + cache: false, + async: true, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkData('/api/v1/data', data, that.library.xssRegexIgnore); + + that.xhr = undefined; + that.retries_on_data_failures = 0; + ok = true; + + if (that.debug) { + that.log('data received. updating chart.'); + } + + that.updateChartWithData(data); + }) + .fail(function (msg) { + that.xhr = undefined; + + if (msg.statusText !== 'abort') { + that.retries_on_data_failures++; + if (that.retries_on_data_failures > NETDATA.options.current.retries_on_data_failures) { + // that.log('failed ' + that.retries_on_data_failures.toString() + ' times - giving up'); + that.retries_on_data_failures = 0; + error('data download failed for url: ' + that.data_url); + } + else { + that.tm.last_autorefreshed = Date.now(); + // that.log('failed ' + that.retries_on_data_failures.toString() + ' times, but I will retry'); + } + } + }) + .always(function () { + that.xhr = undefined; + + NETDATA.statistics.refreshes_active--; + that.fetching_data = false; + + if (typeof callback === 'function') { + return callback(ok, 'download'); + } + }); + }; + + const __isVisible = function () { + let ret = true; + + if (NETDATA.options.current.update_only_visible !== false) { + // tolerance is the number of pixels a chart can be off-screen + // to consider it as visible and refresh it as if was visible + let tolerance = 0; + + that.tm.last_visible_check = Date.now(); + + let rect = that.element.getBoundingClientRect(); + + let screenTop = window.scrollY; + let screenBottom = screenTop + window.innerHeight; + + let chartTop = rect.top + screenTop; + let chartBottom = chartTop + rect.height; + + ret = !(rect.width === 0 || rect.height === 0 || chartBottom + tolerance < screenTop || chartTop - tolerance > screenBottom); + } + + if (that.debug) { + that.log('__isVisible(): ' + ret); + } + + return ret; + }; + + this.isVisible = function (nocache) { + // this.log('last_visible_check: ' + this.tm.last_visible_check + ', last_page_scroll: ' + NETDATA.options.last_page_scroll); + + // caching - we do not evaluate the charts visibility + // if the page has not been scrolled since the last check + if ((typeof nocache !== 'undefined' && nocache) + || typeof this.tmp.___isVisible___ === 'undefined' + || this.tm.last_visible_check <= NETDATA.options.last_page_scroll) { + this.tmp.___isVisible___ = __isVisible(); + if (this.tmp.___isVisible___) { + this.unhideChart(); + } else { + this.hideChart(); + } + } + + if (this.debug) { + this.log('isVisible(' + nocache + '): ' + this.tmp.___isVisible___); + } + + return this.tmp.___isVisible___; + }; + + this.isAutoRefreshable = function () { + return (this.current.autorefresh); + }; + + this.canBeAutoRefreshed = function () { + if (!this.enabled) { + if (this.debug) { + this.log('canBeAutoRefreshed() -> not enabled'); + } + + return false; + } + + if (this.running) { + if (this.debug) { + this.log('canBeAutoRefreshed() -> already running'); + } + + return false; + } + + if (this.library === null || this.library.enabled === false) { + error('charting library "' + this.library_name + '" is not available'); + if (this.debug) { + this.log('canBeAutoRefreshed() -> chart library ' + this.library_name + ' is not available'); + } + + return false; + } + + if (!this.isVisible()) { + if (NETDATA.options.debug.visibility || this.debug) { + this.log('canBeAutoRefreshed() -> not visible'); + } + + return false; + } + + let now = Date.now(); + + if (this.current.force_update_at !== 0 && this.current.force_update_at < now) { + if (this.debug) { + this.log('canBeAutoRefreshed() -> timed force update - allowing this update'); + } + + this.current.force_update_at = 0; + return true; + } + + if (!this.isAutoRefreshable()) { + if (this.debug) { + this.log('canBeAutoRefreshed() -> not auto-refreshable'); + } + + return false; + } + + // allow the first update, even if the page is not visible + if (NETDATA.options.page_is_visible === false && this.updates_counter && this.updates_since_last_unhide) { + if (NETDATA.options.debug.focus || this.debug) { + this.log('canBeAutoRefreshed() -> not the first update, and page does not have focus'); + } + + return false; + } + + if (this.needsRecreation()) { + if (this.debug) { + this.log('canBeAutoRefreshed() -> needs re-creation.'); + } + + return true; + } + + if (NETDATA.options.auto_refresher_stop_until >= now) { + if (this.debug) { + this.log('canBeAutoRefreshed() -> stopped until is in future.'); + } + + return false; + } + + // options valid only for autoRefresh() + if (NETDATA.globalPanAndZoom.isActive()) { + if (NETDATA.globalPanAndZoom.shouldBeAutoRefreshed(this)) { + if (this.debug) { + this.log('canBeAutoRefreshed(): global panning: I need an update.'); + } + + return true; + } + else { + if (this.debug) { + this.log('canBeAutoRefreshed(): global panning: I am already up to date.'); + } + + return false; + } + } + + if (this.selected) { + if (this.debug) { + this.log('canBeAutoRefreshed(): I have a selection in place.'); + } + + return false; + } + + if (this.paused) { + if (this.debug) { + this.log('canBeAutoRefreshed(): I am paused.'); + } + + return false; + } + + let data_update_every = this.data_update_every; + if (typeof this.force_update_every === 'number') { + data_update_every = this.force_update_every; + } + + if (now - this.tm.last_autorefreshed >= data_update_every) { + if (this.debug) { + this.log('canBeAutoRefreshed(): It is time to update me. Now: ' + now.toString() + ', last_autorefreshed: ' + this.tm.last_autorefreshed + ', data_update_every: ' + data_update_every + ', delta: ' + (now - this.tm.last_autorefreshed).toString()); + } + + return true; + } + + return false; + }; + + this.autoRefresh = function (callback) { + let state = that; + + if (state.canBeAutoRefreshed() && state.running === false) { + state.running = true; + state.updateChart(function () { + state.running = false; + + if (typeof callback === 'function') { + return callback(); + } + }); + } else { + if (typeof callback === 'function') { + return callback(); + } + } + }; + + this.__defaultsFromDownloadedChart = function (chart) { + this.chart = chart; + this.chart_url = chart.url; + this.data_update_every = chart.update_every * 1000; + this.data_points = Math.round(this.chartWidth() / this.chartPixelsPerPoint()); + this.tm.last_info_downloaded = Date.now(); + + if (this.title === null) { + this.title = chart.title; + } + + if (this.units === null) { + this.units = chart.units; + this.units_current = this.units; + } + }; + + // fetch the chart description from the netdata server + this.getChart = function (callback) { + this.chart = NETDATA.chartRegistry.get(this.host, this.id); + if (this.chart) { + this.__defaultsFromDownloadedChart(this.chart); + + if (typeof callback === 'function') { + return callback(); + } + } else if (netdataSnapshotData !== null) { + // console.log(this); + // console.log(NETDATA.chartRegistry); + NETDATA.error(404, 'host: ' + this.host + ', chart: ' + this.id); + error('chart not found in snapshot'); + + if (typeof callback === 'function') { + return callback(); + } + } else { + this.chart_url = "/api/v1/chart?chart=" + this.id; + + if (this.debug) { + this.log('downloading ' + this.chart_url); + } + + $.ajax({ + url: this.host + this.chart_url, + cache: false, + async: true, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (chart) { + chart = NETDATA.xss.checkOptional('/api/v1/chart', chart); + + chart.url = that.chart_url; + that.__defaultsFromDownloadedChart(chart); + NETDATA.chartRegistry.add(that.host, that.id, chart); + }) + .fail(function () { + NETDATA.error(404, that.chart_url); + error('chart not found on url "' + that.chart_url + '"'); + }) + .always(function () { + if (typeof callback === 'function') { + return callback(); + } + }); + } + }; + + // ============================================================================================================ + // INITIALIZATION + + initDOM(); + init('fast'); +}; + +NETDATA.resetAllCharts = function (state) { + // first clear the global selection sync + // to make sure no chart is in selected state + NETDATA.globalSelectionSync.stop(); + + // there are 2 possibilities here + // a. state is the global Pan and Zoom master + // b. state is not the global Pan and Zoom master + + // let master = true; + // if (NETDATA.globalPanAndZoom.isMaster(state) === false) { + // master = false; + // } + const master = NETDATA.globalPanAndZoom.isMaster(state); + + // clear the global Pan and Zoom + // this will also refresh the master + // and unblock any charts currently mirroring the master + NETDATA.globalPanAndZoom.clearMaster(); + + // if we were not the master, reset our status too + // this is required because most probably the mouse + // is over this chart, blocking it from auto-refreshing + if (master === false && (state.paused || state.selected)) { + state.resetChart(); + } +}; + +// get or create a chart state, given a DOM element +NETDATA.chartState = function (element) { + let self = $(element); + + let state = self.data('netdata-state-object') || null; + if (state === null) { + state = new chartState(element); + self.data('netdata-state-object', state); + } + return state; +}; + +// ---------------------------------------------------------------------------------------------------------------- +// Library functions + +// Load a script without jquery +// This is used to load jquery - after it is loaded, we use jquery +NETDATA._loadjQuery = function (callback) { + if (typeof jQuery === 'undefined') { + if (NETDATA.options.debug.main_loop) { + console.log('loading ' + NETDATA.jQuery); + } + + let script = document.createElement('script'); + script.type = 'text/javascript'; + script.async = true; + script.src = NETDATA.jQuery; + + // script.onabort = onError; + script.onerror = function () { + NETDATA.error(101, NETDATA.jQuery); + }; + if (typeof callback === "function") { + script.onload = function () { + $ = jQuery; + return callback(); + }; + } + + let s = document.getElementsByTagName('script')[0]; + s.parentNode.insertBefore(script, s); + } + else if (typeof callback === "function") { + $ = jQuery; + return callback(); + } +}; + +NETDATA._loadCSS = function (filename) { + // don't use jQuery here + // styles are loaded before jQuery + // to eliminate showing an unstyled page to the user + + let fileref = document.createElement("link"); + fileref.setAttribute("rel", "stylesheet"); + fileref.setAttribute("type", "text/css"); + fileref.setAttribute("href", filename); + + if (typeof fileref !== 'undefined') { + document.getElementsByTagName("head")[0].appendChild(fileref); + } +}; + +// user function to signal us the DOM has been +// updated. +NETDATA.updatedDom = function () { + NETDATA.options.updated_dom = true; +}; + +NETDATA.ready = function (callback) { + NETDATA.options.pauseCallback = callback; +}; + +NETDATA.pause = function (callback) { + if (typeof callback === 'function') { + if (NETDATA.options.pause) { + return callback(); + } else { + NETDATA.options.pauseCallback = callback; + } + } +}; + +NETDATA.unpause = function () { + NETDATA.options.pauseCallback = null; + NETDATA.options.updated_dom = true; + NETDATA.options.pause = false; +}; + +// ---------------------------------------------------------------------------------------------------------------- + +// this is purely sequential charts refresher +// it is meant to be autonomous +NETDATA.chartRefresherNoParallel = function (index, callback) { + let targets = NETDATA.intersectionObserver.targets(); + + if (NETDATA.options.debug.main_loop) { + console.log('NETDATA.chartRefresherNoParallel(' + index + ')'); + } + + if (NETDATA.options.updated_dom) { + // the dom has been updated + // get the dom parts again + NETDATA.parseDom(callback); + return; + } + if (index >= targets.length) { + if (NETDATA.options.debug.main_loop) { + console.log('waiting to restart main loop...'); + } + + NETDATA.options.auto_refresher_fast_weight = 0; + callback(); + } else { + let state = targets[index]; + + if (NETDATA.options.auto_refresher_fast_weight < NETDATA.options.current.fast_render_timeframe) { + if (NETDATA.options.debug.main_loop) { + console.log('fast rendering...'); + } + + if (state.isVisible()) { + NETDATA.timeout.set(function () { + state.autoRefresh(function () { + NETDATA.chartRefresherNoParallel(++index, callback); + }); + }, 0); + } else { + NETDATA.chartRefresherNoParallel(++index, callback); + } + } else { + if (NETDATA.options.debug.main_loop) { + console.log('waiting for next refresh...'); + } + NETDATA.options.auto_refresher_fast_weight = 0; + + NETDATA.timeout.set(function () { + state.autoRefresh(function () { + NETDATA.chartRefresherNoParallel(++index, callback); + }); + }, NETDATA.options.current.idle_between_charts); + } + } +}; + +NETDATA.chartRefresherWaitTime = function () { + return NETDATA.options.current.idle_parallel_loops; +}; + +// the default refresher +NETDATA.chartRefresherLastRun = 0; +NETDATA.chartRefresherRunsAfterParseDom = 0; +NETDATA.chartRefresherTimeoutId = undefined; + +NETDATA.chartRefresherReschedule = function () { + if (NETDATA.options.current.async_on_scroll) { + if (NETDATA.chartRefresherTimeoutId) { + NETDATA.timeout.clear(NETDATA.chartRefresherTimeoutId); + } + NETDATA.chartRefresherTimeoutId = NETDATA.timeout.set(NETDATA.chartRefresher, NETDATA.options.current.onscroll_worker_duration_threshold); + //console.log('chartRefresherReschedule()'); + } +}; + +NETDATA.chartRefresher = function () { + // console.log('chartRefresher() begin ' + (Date.now() - NETDATA.chartRefresherLastRun).toString() + ' ms since last run'); + + if (NETDATA.options.page_is_visible === false + && NETDATA.options.current.stop_updates_when_focus_is_lost + && NETDATA.chartRefresherLastRun > NETDATA.options.last_page_resize + && NETDATA.chartRefresherLastRun > NETDATA.options.last_page_scroll + && NETDATA.chartRefresherRunsAfterParseDom > 10 + ) { + setTimeout( + NETDATA.chartRefresher, + NETDATA.options.current.idle_lost_focus + ); + + // console.log('chartRefresher() page without focus, will run in ' + NETDATA.options.current.idle_lost_focus.toString() + ' ms, ' + NETDATA.chartRefresherRunsAfterParseDom.toString()); + return; + } + NETDATA.chartRefresherRunsAfterParseDom++; + + let now = Date.now(); + NETDATA.chartRefresherLastRun = now; + + if (now < NETDATA.options.on_scroll_refresher_stop_until) { + NETDATA.chartRefresherTimeoutId = NETDATA.timeout.set( + NETDATA.chartRefresher, + NETDATA.chartRefresherWaitTime() + ); + + // console.log('chartRefresher() end1 will run in ' + NETDATA.chartRefresherWaitTime().toString() + ' ms'); + return; + } + + if (now < NETDATA.options.auto_refresher_stop_until) { + NETDATA.chartRefresherTimeoutId = NETDATA.timeout.set( + NETDATA.chartRefresher, + NETDATA.chartRefresherWaitTime() + ); + + // console.log('chartRefresher() end2 will run in ' + NETDATA.chartRefresherWaitTime().toString() + ' ms'); + return; + } + + if (NETDATA.options.pause) { + // console.log('auto-refresher is paused'); + NETDATA.chartRefresherTimeoutId = NETDATA.timeout.set( + NETDATA.chartRefresher, + NETDATA.chartRefresherWaitTime() + ); + + // console.log('chartRefresher() end3 will run in ' + NETDATA.chartRefresherWaitTime().toString() + ' ms'); + return; + } + + if (typeof NETDATA.options.pauseCallback === 'function') { + // console.log('auto-refresher is calling pauseCallback'); + + NETDATA.options.pause = true; + NETDATA.options.pauseCallback(); + NETDATA.chartRefresher(); + + // console.log('chartRefresher() end4 (nested)'); + return; + } + + if (!NETDATA.options.current.parallel_refresher) { + // console.log('auto-refresher is calling chartRefresherNoParallel(0)'); + NETDATA.chartRefresherNoParallel(0, function () { + NETDATA.chartRefresherTimeoutId = NETDATA.timeout.set( + NETDATA.chartRefresher, + NETDATA.options.current.idle_between_loops + ); + }); + // console.log('chartRefresher() end5 (no parallel, nested)'); + return; + } + + if (NETDATA.options.updated_dom) { + // the dom has been updated + // get the dom parts again + // console.log('auto-refresher is calling parseDom()'); + NETDATA.parseDom(NETDATA.chartRefresher); + // console.log('chartRefresher() end6 (parseDom)'); + return; + } + + if (!NETDATA.globalSelectionSync.active()) { + let parallel = []; + let targets = NETDATA.intersectionObserver.targets(); + let len = targets.length; + let state; + while (len--) { + state = targets[len]; + if (state.running || state.isVisible() === false) { + continue; + } + + if (!state.library.initialized) { + if (state.library.enabled) { + state.library.initialize(NETDATA.chartRefresher); + //console.log('chartRefresher() end6 (library init)'); + return; + } + else { + state.error('chart library "' + state.library_name + '" is not enabled.'); + } + } + + if (NETDATA.scrollUp) { + parallel.unshift(state); + } else { + parallel.push(state); + } + } + + len = parallel.length; + while (len--) { + state = parallel[len]; + // console.log('auto-refresher executing in parallel for ' + parallel.length.toString() + ' charts'); + // this will execute the jobs in parallel + + if (!state.running) { + NETDATA.timeout.set(state.autoRefresh, 0); + } + } + //else { + // console.log('auto-refresher nothing to do'); + //} + } + + // run the next refresh iteration + NETDATA.chartRefresherTimeoutId = NETDATA.timeout.set( + NETDATA.chartRefresher, + NETDATA.chartRefresherWaitTime() + ); + + //console.log('chartRefresher() completed in ' + (Date.now() - now).toString() + ' ms'); +}; + +NETDATA.parseDom = function (callback) { + //console.log('parseDom()'); + + NETDATA.options.last_page_scroll = Date.now(); + NETDATA.options.updated_dom = false; + NETDATA.chartRefresherRunsAfterParseDom = 0; + + let targets = $('div[data-netdata]'); //.filter(':visible'); + + if (NETDATA.options.debug.main_loop) { + console.log('DOM updated - there are ' + targets.length + ' charts on page.'); + } + + NETDATA.intersectionObserver.globalReset(); + NETDATA.options.targets = []; + let len = targets.length; + while (len--) { + // the initialization will take care of sizing + // and the "loading..." message + let state = NETDATA.chartState(targets[len]); + NETDATA.options.targets.push(state); + NETDATA.intersectionObserver.observe(state); + } + + if (NETDATA.globalChartUnderlay.isActive()) { + NETDATA.globalChartUnderlay.setup(); + } else { + NETDATA.globalChartUnderlay.clear(); + } + + if (typeof callback === 'function') { + return callback(); + } +}; + +// this is the main function - where everything starts +NETDATA.started = false; +NETDATA.start = function () { + // this should be called only once + + if (NETDATA.started) { + console.log('netdata is already started'); + return; + } + + NETDATA.started = true; + NETDATA.options.page_is_visible = true; + + $(window).blur(function () { + if (NETDATA.options.current.stop_updates_when_focus_is_lost) { + NETDATA.options.page_is_visible = false; + if (NETDATA.options.debug.focus) { + console.log('Lost Focus!'); + } + } + }); + + $(window).focus(function () { + if (NETDATA.options.current.stop_updates_when_focus_is_lost) { + NETDATA.options.page_is_visible = true; + if (NETDATA.options.debug.focus) { + console.log('Focus restored!'); + } + } + }); + + if (typeof document.hasFocus === 'function' && !document.hasFocus()) { + if (NETDATA.options.current.stop_updates_when_focus_is_lost) { + NETDATA.options.page_is_visible = false; + if (NETDATA.options.debug.focus) { + console.log('Document has no focus!'); + } + } + } + + // bootstrap tab switching + $('a[data-toggle="tab"]').on('shown.bs.tab', NETDATA.onscroll); + + // bootstrap modal switching + let $modal = $('.modal'); + $modal.on('hidden.bs.modal', NETDATA.onscroll); + $modal.on('shown.bs.modal', NETDATA.onscroll); + + // bootstrap collapse switching + let $collapse = $('.collapse'); + $collapse.on('hidden.bs.collapse', NETDATA.onscroll); + $collapse.on('shown.bs.collapse', NETDATA.onscroll); + + NETDATA.parseDom(NETDATA.chartRefresher); + + // Alarms initialization + setTimeout(NETDATA.alarms.init, 1000); + + // Registry initialization + setTimeout(NETDATA.registry.init, netdataRegistryAfterMs); + + if (typeof netdataCallback === 'function') { + netdataCallback(); + } +}; + +NETDATA.globalReset = function () { + NETDATA.intersectionObserver.globalReset(); + NETDATA.globalSelectionSync.globalReset(); + NETDATA.globalPanAndZoom.globalReset(); + NETDATA.chartRegistry.globalReset(); + NETDATA.commonMin.globalReset(); + NETDATA.commonMax.globalReset(); + NETDATA.commonColors.globalReset(); + NETDATA.unitsConversion.globalReset(); + NETDATA.options.targets = []; + NETDATA.parseDom(); + NETDATA.unpause(); +}; + +// Registry of netdata hosts + +NETDATA.alarms = { + onclick: null, // the callback to handle the click - it will be called with the alarm log entry + chart_div_offset: -50, // give that space above the chart when scrolling to it + chart_div_id_prefix: 'chart_', // the chart DIV IDs have this prefix (they should be NETDATA.name2id(chart.id)) + chart_div_animation_duration: 0,// the duration of the animation while scrolling to a chart + + ms_penalty: 0, // the time penalty of the next alarm + ms_between_notifications: 500, // firefox moves the alarms off-screen (above, outside the top of the screen) + // if alarms are shown faster than: one per 500ms + + update_every: 10000, // the time in ms between alarm checks + + notifications: false, // when true, the browser supports notifications (may not be granted though) + last_notification_id: 0, // the id of the last alarm_log we have raised an alarm for + first_notification_id: 0, // the id of the first alarm_log entry for this session + // this is used to prevent CLEAR notifications for past events + // notifications_shown: [], + + server: null, // the server to connect to for fetching alarms + current: null, // the list of raised alarms - updated in the background + + // a callback function to call every time the list of raised alarms is refreshed + callback: (typeof netdataAlarmsActiveCallback === 'function') ? netdataAlarmsActiveCallback : null, + + // a callback function to call every time a notification is shown + // the return value is used to decide if the notification will be shown + notificationCallback: (typeof netdataAlarmsNotifCallback === 'function') ? netdataAlarmsNotifCallback : null, + + recipients: null, // the list (array) of recipients to show alarms for, or null + + recipientMatches: function (to_string, wanted_array) { + if (typeof wanted_array === 'undefined' || wanted_array === null || Array.isArray(wanted_array) === false) { + return true; + } + + let r = ' ' + to_string.toString() + ' '; + let len = wanted_array.length; + while (len--) { + if (r.indexOf(' ' + wanted_array[len] + ' ') >= 0) { + return true; + } + } + + return false; + }, + + activeForRecipients: function () { + let active = {}; + let data = NETDATA.alarms.current; + + if (typeof data === 'undefined' || data === null) { + return active; + } + + for (let x in data.alarms) { + if (!data.alarms.hasOwnProperty(x)) { + continue; + } + + let alarm = data.alarms[x]; + if ((alarm.status === 'WARNING' || alarm.status === 'CRITICAL') && NETDATA.alarms.recipientMatches(alarm.recipient, NETDATA.alarms.recipients)) { + active[x] = alarm; + } + } + + return active; + }, + + notify: function (entry) { + // console.log('alarm ' + entry.unique_id); + + if (entry.updated) { + // console.log('alarm ' + entry.unique_id + ' has been updated by another alarm'); + return; + } + + let value_string = entry.value_string; + + if (NETDATA.alarms.current !== null) { + // get the current value_string + let t = NETDATA.alarms.current.alarms[entry.chart + '.' + entry.name]; + if (typeof t !== 'undefined' && entry.status === t.status && typeof t.value_string !== 'undefined') { + value_string = t.value_string; + } + } + + let name = entry.name.replace(/_/g, ' '); + let status = entry.status.toLowerCase(); + let title = name + ' = ' + value_string.toString(); + let tag = entry.alarm_id; + let icon = 'images/banner-icon-144x144.png'; + let interaction = false; + let data = entry; + let show = true; + + // console.log('alarm ' + entry.unique_id + ' ' + entry.chart + '.' + entry.name + ' is ' + entry.status); + + switch (entry.status) { + case 'REMOVED': + show = false; + break; + + case 'UNDEFINED': + return; + + case 'UNINITIALIZED': + return; + + case 'CLEAR': + if (entry.unique_id < NETDATA.alarms.first_notification_id) { + // console.log('alarm ' + entry.unique_id + ' is not current'); + return; + } + if (entry.old_status === 'UNINITIALIZED' || entry.old_status === 'UNDEFINED') { + // console.log('alarm' + entry.unique_id + ' switch to CLEAR from ' + entry.old_status); + return; + } + if (entry.no_clear_notification) { + // console.log('alarm' + entry.unique_id + ' is CLEAR but has no_clear_notification flag'); + return; + } + title = name + ' back to normal (' + value_string.toString() + ')'; + icon = 'images/check-mark-2-128-green.png'; + interaction = false; + break; + + case 'WARNING': + if (entry.old_status === 'CRITICAL') { + status = 'demoted to ' + entry.status.toLowerCase(); + } + + icon = 'images/alert-128-orange.png'; + interaction = false; + break; + + case 'CRITICAL': + if (entry.old_status === 'WARNING') { + status = 'escalated to ' + entry.status.toLowerCase(); + } + + icon = 'images/alert-128-red.png'; + interaction = true; + break; + + default: + console.log('invalid alarm status ' + entry.status); + return; + } + + // filter recipients + if (show) { + show = NETDATA.alarms.recipientMatches(entry.recipient, NETDATA.alarms.recipients); + } + + /* + // cleanup old notifications with the same alarm_id as this one + // it does not seem to work on any web browser - so notifications cannot be removed + + let len = NETDATA.alarms.notifications_shown.length; + while (len--) { + let n = NETDATA.alarms.notifications_shown[len]; + if (n.data.alarm_id === entry.alarm_id) { + console.log('removing old alarm ' + n.data.unique_id); + + // close the notification + n.close.bind(n); + + // remove it from the array + NETDATA.alarms.notifications_shown.splice(len, 1); + len = NETDATA.alarms.notifications_shown.length; + } + } + */ + + if (show) { + if (typeof NETDATA.alarms.notificationCallback === 'function') { + show = NETDATA.alarms.notificationCallback(entry); + } + + if (show) { + setTimeout(function () { + // show this notification + // console.log('new notification: ' + title); + let n = new Notification(title, { + body: entry.hostname + ' - ' + entry.chart + ' (' + entry.family + ') - ' + status + ': ' + entry.info, + tag: tag, + requireInteraction: interaction, + icon: NETDATA.serverStatic + icon, + data: data + }); + + n.onclick = function (event) { + event.preventDefault(); + NETDATA.alarms.onclick(event.target.data); + }; + + // console.log(n); + // NETDATA.alarms.notifications_shown.push(n); + // console.log(entry); + }, NETDATA.alarms.ms_penalty); + + NETDATA.alarms.ms_penalty += NETDATA.alarms.ms_between_notifications; + } + } + }, + + scrollToChart: function (chart_id) { + if (typeof chart_id === 'string') { + let offset = $('#' + NETDATA.alarms.chart_div_id_prefix + NETDATA.name2id(chart_id)).offset(); + if (typeof offset !== 'undefined') { + $('html, body').animate({scrollTop: offset.top + NETDATA.alarms.chart_div_offset}, NETDATA.alarms.chart_div_animation_duration); + return true; + } + } + return false; + }, + + scrollToAlarm: function (alarm) { + if (typeof alarm === 'object') { + let ret = NETDATA.alarms.scrollToChart(alarm.chart); + + if (ret && NETDATA.options.page_is_visible === false) { + window.focus(); + } + // alert('netdata dashboard will now scroll to chart: ' + alarm.chart + '\n\nThis alarm opened to bring the browser window in front of the screen. Click on the dashboard to prevent it from appearing again.'); + } + + }, + + notifyAll: function () { + // console.log('FETCHING ALARM LOG'); + NETDATA.alarms.get_log(NETDATA.alarms.last_notification_id, function (data) { + // console.log('ALARM LOG FETCHED'); + + if (data === null || typeof data !== 'object') { + console.log('invalid alarms log response'); + return; + } + + if (data.length === 0) { + console.log('received empty alarm log'); + return; + } + + // console.log('received alarm log of ' + data.length + ' entries, from ' + data[data.length - 1].unique_id.toString() + ' to ' + data[0].unique_id.toString()); + + data.sort(function (a, b) { + if (a.unique_id > b.unique_id) { + return -1; + } + if (a.unique_id < b.unique_id) { + return 1; + } + return 0; + }); + + NETDATA.alarms.ms_penalty = 0; + + let len = data.length; + while (len--) { + if (data[len].unique_id > NETDATA.alarms.last_notification_id) { + NETDATA.alarms.notify(data[len]); + } + //else + // console.log('ignoring alarm (older) with id ' + data[len].unique_id.toString()); + } + + NETDATA.alarms.last_notification_id = data[0].unique_id; + + if (typeof netdataAlarmsRemember === 'undefined' || netdataAlarmsRemember) { + NETDATA.localStorageSet('last_notification_id', NETDATA.alarms.last_notification_id, null); + } + // console.log('last notification id = ' + NETDATA.alarms.last_notification_id); + }) + }, + + check_notifications: function () { + // returns true if we should fire 1+ notifications + + if (NETDATA.alarms.notifications !== true) { + // console.log('web notifications are not available'); + return false; + } + + if (Notification.permission !== 'granted') { + // console.log('web notifications are not granted'); + return false; + } + + if (typeof NETDATA.alarms.current !== 'undefined' && typeof NETDATA.alarms.current.alarms === 'object') { + // console.log('can do alarms: old id = ' + NETDATA.alarms.last_notification_id + ' new id = ' + NETDATA.alarms.current.latest_alarm_log_unique_id); + + if (NETDATA.alarms.current.latest_alarm_log_unique_id > NETDATA.alarms.last_notification_id) { + // console.log('new alarms detected'); + return true; + } + //else console.log('no new alarms'); + } + // else console.log('cannot process alarms'); + + return false; + }, + + get: function (what, callback) { + $.ajax({ + url: NETDATA.alarms.server + '/api/v1/alarms?' + what.toString(), + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkOptional('/api/v1/alarms', data /*, '.*\.(calc|calc_parsed|warn|warn_parsed|crit|crit_parsed)$' */); + + if (NETDATA.alarms.first_notification_id === 0 && typeof data.latest_alarm_log_unique_id === 'number') { + NETDATA.alarms.first_notification_id = data.latest_alarm_log_unique_id; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + NETDATA.error(415, NETDATA.alarms.server); + + if (typeof callback === 'function') { + return callback(null); + } + }); + }, + + update_forever: function () { + if (netdataShowAlarms !== true || netdataSnapshotData !== null) { + return; + } + + NETDATA.alarms.get('active', function (data) { + if (data !== null) { + NETDATA.alarms.current = data; + + if (NETDATA.alarms.check_notifications()) { + NETDATA.alarms.notifyAll(); + } + + if (typeof NETDATA.alarms.callback === 'function') { + NETDATA.alarms.callback(data); + } + + // Health monitoring is disabled on this netdata + if (data.status === false) { + return; + } + } + + setTimeout(NETDATA.alarms.update_forever, NETDATA.alarms.update_every); + }); + }, + + get_log: function (last_id, callback) { + // console.log('fetching all log after ' + last_id.toString()); + $.ajax({ + url: NETDATA.alarms.server + '/api/v1/alarm_log?after=' + last_id.toString(), + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkOptional('/api/v1/alarm_log', data); + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + NETDATA.error(416, NETDATA.alarms.server); + + if (typeof callback === 'function') { + return callback(null); + } + }); + }, + + init: function () { + NETDATA.alarms.server = NETDATA.fixHost(NETDATA.serverDefault); + + if (typeof netdataAlarmsRemember === 'undefined' || netdataAlarmsRemember) { + NETDATA.alarms.last_notification_id = + NETDATA.localStorageGet('last_notification_id', NETDATA.alarms.last_notification_id, null); + } + + if (NETDATA.alarms.onclick === null) { + NETDATA.alarms.onclick = NETDATA.alarms.scrollToAlarm; + } + + if (typeof netdataAlarmsRecipients !== 'undefined' && Array.isArray(netdataAlarmsRecipients)) { + NETDATA.alarms.recipients = netdataAlarmsRecipients; + } + + if (netdataShowAlarms) { + NETDATA.alarms.update_forever(); + + if ('Notification' in window) { + // console.log('notifications available'); + NETDATA.alarms.notifications = true; + + if (Notification.permission === 'default') { + Notification.requestPermission(); + } + } + } + } +}; + +// Registry of netdata hosts + +NETDATA.registry = { + server: null, // the netdata registry server + isCloudEnabled: false,// is netdata.cloud functionality enabled? + cloudBaseURL: null, // the netdata cloud base url + person_guid: null, // the unique ID of this browser / user + machine_guid: null, // the unique ID the netdata server that served dashboard.js + hostname: 'unknown', // the hostname of the netdata server that served dashboard.js + machines: null, // the user's other URLs + machines_array: null, // the user's other URLs in an array + person_urls: null, + anonymous_statistics_checked: false, + MASKED_DATA: "***", + + isUsingGlobalRegistry: function() { + return NETDATA.registry.server == "https://registry.my-netdata.io"; + }, + + isRegistryEnabled: function() { + return !(NETDATA.registry.isUsingGlobalRegistry() || isSignedIn()) + }, + + parsePersonUrls: function (person_urls) { + NETDATA.registry.person_urls = person_urls; + + if (person_urls) { + NETDATA.registry.machines = {}; + NETDATA.registry.machines_array = []; + + let apu = person_urls; + let i = apu.length; + while (i--) { + if (typeof NETDATA.registry.machines[apu[i][0]] === 'undefined') { + // console.log('adding: ' + apu[i][4] + ', ' + ((now - apu[i][2]) / 1000).toString()); + + let obj = { + guid: apu[i][0], + url: apu[i][1], + last_t: apu[i][2], + accesses: apu[i][3], + name: apu[i][4], + alternate_urls: [] + }; + obj.alternate_urls.push(apu[i][1]); + + NETDATA.registry.machines[apu[i][0]] = obj; + NETDATA.registry.machines_array.push(obj); + } else { + // console.log('appending: ' + apu[i][4] + ', ' + ((now - apu[i][2]) / 1000).toString()); + + let pu = NETDATA.registry.machines[apu[i][0]]; + if (pu.last_t < apu[i][2]) { + pu.url = apu[i][1]; + pu.last_t = apu[i][2]; + pu.name = apu[i][4]; + } + pu.accesses += apu[i][3]; + pu.alternate_urls.push(apu[i][1]); + } + } + } + + if (typeof netdataRegistryCallback === 'function') { + netdataRegistryCallback(NETDATA.registry.machines_array); + } + }, + + init: function () { + if (netdataRegistry !== true) { + return; + } + + NETDATA.registry.hello(NETDATA.serverDefault, function (data) { + if (data) { + NETDATA.registry.server = data.registry; + if (data.cloud_base_url !== "") { + NETDATA.registry.isCloudEnabled = true; + NETDATA.registry.cloudBaseURL = data.cloud_base_url; + } else { + NETDATA.registry.isCloudEnabled = false; + NETDATA.registry.cloudBaseURL = ""; + } + NETDATA.registry.machine_guid = data.machine_guid; + NETDATA.registry.hostname = data.hostname; + if (!NETDATA.registry.anonymous_statistics_checked) { + NETDATA.registry.anonymous_statistics_checked=true; + if (data.anonymous_statistics) { + (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': + new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], + j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=false;j.src= + 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); + })(window,document,'script','dataLayer','GTM-N6CBMJD'); + dataLayer.push({"anonymous_statistics" : "true", "machine_guid" : data.machine_guid}); + } + } + NETDATA.registry.access(2, function (person_urls) { + NETDATA.registry.parsePersonUrls(person_urls); + }); + } + }); + }, + + hello: function (host, callback) { + host = NETDATA.fixHost(host); + + // send HELLO to a netdata server: + // 1. verifies the server is reachable + // 2. responds with the registry URL, the machine GUID of this netdata server and its hostname + $.ajax({ + url: host + '/api/v1/registry?action=hello', + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkOptional('/api/v1/registry?action=hello', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + NETDATA.error(408, host + ' response: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + NETDATA.error(407, host); + + if (typeof callback === 'function') { + return callback(null); + } + }); + }, + + access: function (max_redirects, callback) { + let name = NETDATA.registry.MASKED_DATA; + let url = NETDATA.registry.MASKED_DATA; + + if (!NETDATA.registry.isUsingGlobalRegistry()) { + // If the user is using a private registry keep sending identifiable + // data. + name = NETDATA.registry.hostname; + url = NETDATA.serverDefault; + } + + console.log("ACCESS", name, url); + + // send ACCESS to a netdata registry: + // 1. it lets it know we are accessing a netdata server (its machine GUID and its URL) + // 2. it responds with a list of netdata servers we know + // the registry identifies us using a cookie it sets the first time we access it + // the registry may respond with a redirect URL to send us to another registry + $.ajax({ + url: NETDATA.registry.server + '/api/v1/registry?action=access&machine=' + NETDATA.registry.machine_guid + '&name=' + encodeURIComponent(name) + '&url=' + encodeURIComponent(url), // + '&visible_url=' + encodeURIComponent(document.location), + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkAlways('/api/v1/registry?action=access', data); + + let redirect = null; + if (typeof data.registry === 'string') { + redirect = data.registry; + } + + if (typeof data.status !== 'string' || data.status !== 'ok') { + NETDATA.error(409, NETDATA.registry.server + ' responded with: ' + JSON.stringify(data)); + data = null; + } + + if (data === null) { + if (redirect !== null && max_redirects > 0) { + NETDATA.registry.server = redirect; + NETDATA.registry.access(max_redirects - 1, callback); + } + else { + if (typeof callback === 'function') { + return callback(null); + } + } + } else { + if (typeof data.person_guid === 'string') { + NETDATA.registry.person_guid = data.person_guid; + } + + if (typeof callback === 'function') { + const urls = data.urls.filter((u) => u[1] !== NETDATA.registry.MASKED_DATA); + return callback(urls); + } + } + }) + .fail(function () { + NETDATA.error(410, NETDATA.registry.server); + + if (typeof callback === 'function') { + return callback(null); + } + }); + }, + + delete: function (delete_url, callback) { + // send DELETE to a netdata registry: + $.ajax({ + url: NETDATA.registry.server + '/api/v1/registry?action=delete&machine=' + NETDATA.registry.machine_guid + '&name=' + encodeURIComponent(NETDATA.registry.hostname) + '&url=' + encodeURIComponent(NETDATA.serverDefault) + '&delete_url=' + encodeURIComponent(delete_url), + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkAlways('/api/v1/registry?action=delete', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + NETDATA.error(411, NETDATA.registry.server + ' responded with: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + NETDATA.error(412, NETDATA.registry.server); + + if (typeof callback === 'function') { + return callback(null); + } + }); + }, + + search: function (machine_guid, callback) { + // SEARCH for the URLs of a machine: + $.ajax({ + url: NETDATA.registry.server + '/api/v1/registry?action=search&machine=' + NETDATA.registry.machine_guid + '&name=' + encodeURIComponent(NETDATA.registry.hostname) + '&url=' + encodeURIComponent(NETDATA.serverDefault) + '&for=' + machine_guid, + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkAlways('/api/v1/registry?action=search', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + NETDATA.error(417, NETDATA.registry.server + ' responded with: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + NETDATA.error(418, NETDATA.registry.server); + + if (typeof callback === 'function') { + return callback(null); + } + }); + }, + + switch: function (new_person_guid, callback) { + // impersonate + $.ajax({ + url: NETDATA.registry.server + '/api/v1/registry?action=switch&machine=' + NETDATA.registry.machine_guid + '&name=' + encodeURIComponent(NETDATA.registry.hostname) + '&url=' + encodeURIComponent(NETDATA.serverDefault) + '&to=' + new_person_guid, + async: true, + cache: false, + headers: { + 'Cache-Control': 'no-cache, no-store', + 'Pragma': 'no-cache' + }, + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function (data) { + data = NETDATA.xss.checkAlways('/api/v1/registry?action=switch', data); + + if (typeof data.status !== 'string' || data.status !== 'ok') { + NETDATA.error(413, NETDATA.registry.server + ' responded with: ' + JSON.stringify(data)); + data = null; + } + + if (typeof callback === 'function') { + return callback(data); + } + }) + .fail(function () { + NETDATA.error(414, NETDATA.registry.server); + + if (typeof callback === 'function') { + return callback(null); + } + }); + } +}; + +// Load required JS libraries and CSS + +NETDATA.requiredJs = [ + { + url: NETDATA.serverStatic + 'lib/bootstrap-3.3.7.min.js', + async: false, + isAlreadyLoaded: function () { + // check if bootstrap is loaded + if (typeof $().emulateTransitionEnd === 'function') { + return true; + } else { + return typeof netdataNoBootstrap !== 'undefined' && netdataNoBootstrap; + } + } + }, + { + url: NETDATA.serverStatic + 'lib/fontawesome-all-5.0.1.min.js', + async: true, + isAlreadyLoaded: function () { + return typeof netdataNoFontAwesome !== 'undefined' && netdataNoFontAwesome; + } + }, + { + url: NETDATA.serverStatic + 'lib/perfect-scrollbar-0.6.15.min.js', + isAlreadyLoaded: function () { + return false; + } + } +]; + +NETDATA.requiredCSS = [ + { + url: NETDATA.themes.current.bootstrap_css, + isAlreadyLoaded: function () { + return typeof netdataNoBootstrap !== 'undefined' && netdataNoBootstrap; + } + }, + { + url: NETDATA.themes.current.dashboard_css, + isAlreadyLoaded: function () { + return false; + } + } +]; + +NETDATA.loadedRequiredJs = 0; +NETDATA.loadRequiredJs = function (index, callback) { + if (index >= NETDATA.requiredJs.length) { + if (typeof callback === 'function') { + return callback(); + } + return; + } + + if (NETDATA.requiredJs[index].isAlreadyLoaded()) { + NETDATA.loadedRequiredJs++; + NETDATA.loadRequiredJs(++index, callback); + return; + } + + if (NETDATA.options.debug.main_loop) { + console.log('loading ' + NETDATA.requiredJs[index].url); + } + + let async = true; + if (typeof NETDATA.requiredJs[index].async !== 'undefined' && NETDATA.requiredJs[index].async === false) { + async = false; + } + + $.ajax({ + url: NETDATA.requiredJs[index].url, + cache: true, + dataType: "script", + xhrFields: {withCredentials: true} // required for the cookie + }) + .done(function () { + if (NETDATA.options.debug.main_loop) { + console.log('loaded ' + NETDATA.requiredJs[index].url); + } + }) + .fail(function () { + alert('Cannot load required JS library: ' + NETDATA.requiredJs[index].url); + }) + .always(function () { + NETDATA.loadedRequiredJs++; + + // if (async === false) + if (!async) { + NETDATA.loadRequiredJs(++index, callback); + } + }); + + // if (async === true) + if (async) { + NETDATA.loadRequiredJs(++index, callback); + } +}; + +NETDATA.loadRequiredCSS = function (index) { + if (index >= NETDATA.requiredCSS.length) { + return; + } + + if (NETDATA.requiredCSS[index].isAlreadyLoaded()) { + NETDATA.loadRequiredCSS(++index); + return; + } + + if (NETDATA.options.debug.main_loop) { + console.log('loading ' + NETDATA.requiredCSS[index].url); + } + + NETDATA._loadCSS(NETDATA.requiredCSS[index].url); + NETDATA.loadRequiredCSS(++index); +}; + +// Boot it! + +if (typeof netdataPrepCallback === 'function') { + netdataPrepCallback(); +} + +NETDATA.errorReset(); +NETDATA.loadRequiredCSS(0); + +NETDATA._loadjQuery(function () { + NETDATA.loadRequiredJs(0, function () { + if (typeof $().emulateTransitionEnd !== 'function') { + // bootstrap is not available + NETDATA.options.current.show_help = false; + } + + if (typeof netdataDontStart === 'undefined' || !netdataDontStart) { + if (NETDATA.options.debug.main_loop) { + console.log('starting chart refresh thread'); + } + + NETDATA.start(); + } + }); +}); +})(window, document, (typeof jQuery === 'function')?jQuery:undefined); diff --git a/web/gui/dashboard/dashboard.slate.css b/web/gui/dashboard/dashboard.slate.css new file mode 100644 index 0000000..c4e5c65 --- /dev/null +++ b/web/gui/dashboard/dashboard.slate.css @@ -0,0 +1,803 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +:root { + --color-main: #FFFFFF; + --color-primary: #00ab44; + --color-border: #878b90; +} + +html, +body { + /*font-family: Calibri,"Segoe UI","Helvetica Neue",Helvetica,Arial,sans-serif;*/ + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-variant: normal; + color: #878b90; +} + +/* fixes for default slate theme */ +code { + color: #bbb; /*#c7254e;*/ + background-color: #555; /* #f9f2f4; */ +} + +.dashboard-sidebar .nav > .active > a, +.dashboard-sidebar .nav > .active:hover > a, +.dashboard-sidebar .nav > .active:focus > a { + color: #765d9c; + border-left: 2px solid #765d9c; +} + +.morelink { + color: #765d9c; + text-decoration: none; +} + +.morelink:hover { + color: #563d7c; + text-decoration: none; +} + +.morelink:focus { + color: #765d9c; + text-decoration: none; +} + +.netdata-chart-alignment { + /* 55 for legend-right */ + margin-left: 35px; +} + +.netdata-chart-row { + width: 100%; + text-align: center; + display: flex; + display: -webkit-flex; + display: -moz-flex; + align-items: flex-end; + -moz-align-items: flex-end; + -webkit-align-items: flex-end; + justify-content: center; + -moz--webkit-justify-content: center; + -moz-justify-content: center; + padding-top: 10px; +} + +.netdata-container { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-gauge:after { + padding-top: 60%; + display: block; + content: ''; +} + +.netdata-container-easypiechart { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-easypiechart:after { + padding-top: 100%; + display: block; + content: ''; +} + +.netdata-aspect { + position: relative; + width: 100%; + padding: 0px; + margin: 0px; +} + +.netdata-container-with-legend { + display: inline-block; + overflow: hidden; + + transform: translate3d(0,0,0); + + /* fix minimum scrollbar issue in firefox */ + min-height: 99px; + + /* required for child elements to have absolute position */ + position: relative; + + /* width and height is given per chart with data-width and data-height */ +} + +.netdata-container-with-legend.netdata-container-with-legend--bottom { + display: flex; + flex-direction: column; +} + +.netdata-legend-resize-handler { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 20px; + background-color: #272b30; + font-size: 15px; + vertical-align: middle; + line-height: 15px; + cursor: ns-resize; + color: var(--color-selected, #373b40); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; +} + +.netdata-legend-toolbox { + display: block; + position: absolute; + bottom: 0px; + right: 0px; + height: 15px; + width: 110px; + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: #373b40; + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-legend-toolbox-button { + display: inline-block; + position: relative; + height: 15px; + width: 18px; + background-color: #272b30; + font-size: 12px; + vertical-align: middle; + line-height: 15px; + color: var(--color-selected, #373b40); + text-align: center; + overflow: hidden; + padding: 0px; + margin: 0px; + cursor: pointer; + + /* prevent text selection after double click */ + -webkit-user-select: none; /* webkit (safari, chrome) browsers */ + -moz-user-select: none; /* mozilla browsers */ + -khtml-user-select: none; /* webkit (konqueror) browsers */ + -ms-user-select: none; /* IE10+ */ +} + +.netdata-message { + display: inline-block; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + text-align: left; + vertical-align: top; + font-weight: bold; + font-size: x-small; + overflow: hidden; + background: inherit; + z-index: 0; +} + +.netdata-message.hidden { + display: none; +} + +.netdata-message.icon { + color: #2f3338; + text-align: center; + vertical-align: middle; +} + +.netdata-chart-legend { + position: absolute; /* within .netdata-container */ + top: 0; + right: 0; + overflow: hidden; + text-overflow: ellipsis; + line-height: 14px; + display: block; + width: 140px; /* --legend-width */ + height: calc(100% - 15px); /* 10px for the resize handler and 5px for the top margin */ + font-size: 10px; + margin-top: 5px; + text-align: left; + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-legend-title-date { + font-size: 10px; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-time { + font-size: 11px; + font-weight: bold; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-title-units { + position: absolute; + right: 10px; + float: right; + font-size: 11px; + vertical-align: top; + font-weight: normal; + margin-top: 0px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.netdata-legend-series { + position: absolute; + width: 140px; /* legend-width */ + height: calc(100% - 50px); + overflow: hidden; + text-overflow: ellipsis; + line-height: 14.5px; /* line spacing at the legend */ + display: block; + font-size: 10px; + margin-top: 0px; +} + +.netdata-legend-name-table-line { + display: inline-block; + width: 13px; + height: 4px; + border-width: 0px; + border-bottom-width: 2px; + border-bottom-style: solid; + border-bottom-color: #272b30; +} + +.netdata-legend-name-table-area { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-table-stacked { + display: inline-block; + width: 13px; + height: 5px; + border-width: 1px; + border-top-width: 1px; + border-top-style: solid; + border-top-color: inherit; +} + +.netdata-legend-name-tr { +} + +.netdata-legend-name-td { +} + +.netdata-legend-name { + text-align: left; + font-size: 11px; /* legend: dimension name size */ + font-weight: bold; + vertical-align: bottom; + margin-top: 0px; + z-index: 9; + padding: 0px; + width: 80px !important; + max-width: 80px !important; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + display: inline-block; + cursor: pointer; + -webkit-print-color-adjust: exact; +} + +.netdata-legend-value { + /*margin-left: 14px;*/ + position: absolute; + right: 10px; + float: right; + text-align: right; + font-size: 11px; /* legend: dimension value size */ + font-weight: bold; + vertical-align: bottom; + background-color: #272b30; + margin-top: 0px; + z-index: 10; + padding: 0px; + padding-left: 15px; + cursor: pointer; + /* -webkit-font-smoothing: none; */ +} + +.netdata-legend-value, .netdata-legend-name { + /* prevent highlight when shift-clicking */ + -webkit-user-select: none; + -ms-user-select: none; + -moz-user-select: none; + user-select: none; +} + +/* eslint recommends adding tabIndex for a11y, but the outline is showing on click */ +.netdata-legend-name:focus, .netdata-legend-value:focus { + outline: none; +} + +.netdata-legend-name.not-selected { + font-weight: normal; + opacity: 0.3; +} + +.netdata-chart { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: inline-block; + overflow: hidden; + width: 100%; + height: 100%; + z-index: 5; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-right { + position: absolute; /* within .netdata-container */ + top: 0; /* within .netdata-container */ + left: 0; /* within .netdata-container */ + display: block; + overflow: hidden; + margin-right: 140px; /* --legend-width */ + width: calc(100% - 140px); /* --legend-width */ + height: 100%; + z-index: 5; + flex-grow: 1; + + /* width and height is calculated (depends on the appearance of the legend) */ +} + +.netdata-chart-with-legend-bottom { + display: block; + overflow: hidden; + flex-grow: 1; +} + +.netdata-peity-chart { + +} + +.netdata-sparkline-chart { + +} + +.netdata-dygraph-chart { + +} + +.netdata-morris-chart { + +} + +.netdata-google-chart { + +} + +.dygraph-ylabel { +} + +.dygraph-axis-label-x { + overflow-x: hidden; +} + +.dygraph-axis-label { + color: #6c7075; +} + +.dygraph-label-rotate-left { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(90deg); + -webkit-transform: rotate(90deg); + -moz-transform: rotate(90deg); + -o-transform: rotate(90deg); + -ms-transform: rotate(90deg); +} + +/* For y2-axis label */ +.dygraph-label-rotate-right { + text-align: center; + /* See http://caniuse.com/#feat=transforms2d */ + transform: rotate(-90deg); + -webkit-transform: rotate(-90deg); + -moz-transform: rotate(-90deg); + -o-transform: rotate(-90deg); + -ms-transform: rotate(-90deg); +} + +.dygraph-title { + /* 56 for legend-right */ + text-indent: 36px; + text-align: left; + position: absolute; + left: 0px; + top: 4px; + font-size: 11px; + font-weight: bold; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; +} + +/* fix for sparkline tooltip under bootstrap */ +.jqstooltip { + width: auto !important; + height: auto !important; +} + +.easyPieChart { + position: relative; + text-align: center; +} + +.easyPieChart canvas { + position: absolute; + top: 0; + left: 0; +} + +.easyPieChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: normal; + text-shadow: #272b30 0px 0px 1px; + /* -webkit-font-smoothing: none; */ +} + +.easyPieChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 64%; + margin-left: 18% !important; + text-align: center; + color: var(--color-border,#676b70); + font-weight: bold; +} + +.easyPieChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 60%; + margin-left: 20% !important; + text-align: center; + color:var(--color-border,#676b70); + font-weight: normal; +} + +.gaugeChart { + position: relative; + text-align: center; +} + +.gaugeChart canvas { + position: absolute; + top: 0; + left: 0; + bottom: 0; + right: 0; + z-index: 0; +} + +.gaugeChartLabel { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-main); + font-weight: bold; + z-index: 1; + text-shadow: #272b30 0px 0px 1px; + /* text-shadow: #CCC 1px 1px 0px, #CCC -1px -1px 0px, #CCC 1px -1px 0px, #CCC -1px 1px 0px; */ + /* -webkit-text-stroke: 1px #777; */ + /* -webkit-font-smoothing: none; */ +} + +.gaugeChartTitle { + display: inline-block; + position: absolute; + float: left; + left: 0; + width: 100%; + text-align: center; + color: var(--color-border, #676b70); + font-weight: bold; +} + +.gaugeChartUnits { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 0; + width: 100%; + text-align: left; + margin-left: 5%; + color: var(--color-border,#676b70); + font-weight: normal; +} + +.gaugeChartMin { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 92%; + margin-left: 8%; + text-align: left; + color: var(--color-main); + font-weight: normal; +} + +.gaugeChartMax { + display: inline-block; + position: absolute; + float: left; + left: 0; + bottom: 8%; + width: 95%; + margin-right: 5%; + text-align: right; + color: var(--color-main); + font-weight: normal; +} + +.popover-title { + font-weight: bold; + font-size: 12px; +} + +.popover-content { + font-size: 11px; +} + +/* ---------------------------------------------------------------------------- + perfect-scrollbar settings + */ + +.ps-container { + -ms-touch-action: auto; + touch-action: auto; + overflow: hidden !important; + -ms-overflow-style: none; +} + +@supports (-ms-overflow-style: none) { + .ps-container { + overflow: auto !important; + } +} + +@media screen and (-ms-high-contrast: active), (-ms-high-contrast: none) { + .ps-container { + overflow: auto !important; + } +} + +.ps-container.ps-active-x > .ps-scrollbar-x-rail, +.ps-container.ps-active-y > .ps-scrollbar-y-rail { + display: block; + background-color: transparent; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #aaa; /* scrollbar color when dragged away */ + height: 5px; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged away */ + opacity: 0.9; +} + +.ps-container.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #aaa; /* scrollbar color when dragged away */ + width: 5px; +} + +.ps-container > .ps-scrollbar-x-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + bottom: 0px; + /* there must be 'bottom' for ps-scrollbar-x-rail */ + height: 15px; +} + +.ps-container > .ps-scrollbar-x-rail > .ps-scrollbar-x { + position: absolute; + /* please don't change 'position' */ + background-color: #666; /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + bottom: 2px; + /* there must be 'bottom' for ps-scrollbar-x */ + height: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x, .ps-container > .ps-scrollbar-x-rail:active > .ps-scrollbar-x { + height: 5px; +} + +.ps-container > .ps-scrollbar-y-rail { + display: none; + position: absolute; + /* please don't change 'position' */ + opacity: 0.2; /* the opacity when not on hover of the content */ + -webkit-transition: background-color .2s linear, opacity .2s linear; + -o-transition: background-color .2s linear, opacity .2s linear; + -moz-transition: background-color .2s linear, opacity .2s linear; + transition: background-color .2s linear, opacity .2s linear; + right: 0; + /* there must be 'right' for ps-scrollbar-y-rail */ + width: 15px; +} + +.ps-container > .ps-scrollbar-y-rail > .ps-scrollbar-y { + position: absolute; + /* please don't change 'position' */ + background-color: #666; /* #aaa; the color on content hover */ + -webkit-border-radius: 6px; + -moz-border-radius: 6px; + border-radius: 6px; + -webkit-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, -webkit-border-radius .2s ease-in-out; + -o-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + -moz-transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out; + transition: background-color .2s linear, height .2s linear, width .2s ease-in-out, border-radius .2s ease-in-out, -webkit-border-radius .2s ease-in-out, -moz-border-radius .2s ease-in-out; + right: 2px; + /* there must be 'right' for ps-scrollbar-y */ + width: 5px; /* the width of the scrollbar */ +} + +.ps-container > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y, .ps-container > .ps-scrollbar-y-rail:active > .ps-scrollbar-y { + width: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-x > .ps-scrollbar-x-rail > .ps-scrollbar-x { + background-color: #bbb; /* scrollbar color when dragged */ + height: 5px; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail { + background-color: transparent; /* background color when dragged */ + opacity: 0.9; +} + +.ps-container:hover.ps-in-scrolling.ps-y > .ps-scrollbar-y-rail > .ps-scrollbar-y { + background-color: #bbb; /* scrollbar color when dragged */ + width: 5px; +} + +.ps-container:hover > .ps-scrollbar-x-rail, +.ps-container:hover > .ps-scrollbar-y-rail { + opacity: 0.6; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-x-rail:hover > .ps-scrollbar-x { + background-color: #999; /* scrollbar color on hover */ +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover { + background-color: transparent; /* the background color on hover of the scrollbar */ + opacity: 0.9; +} + +.ps-container:hover > .ps-scrollbar-y-rail:hover > .ps-scrollbar-y { + background-color: #999; /* scrollbar color on hover */ +} + +.dygraph__history-tip { + position: absolute; + top: 50%; + transform: translateY(-50%); + display: none; /* overridden in js */ + margin-right: 25px; + direction: rtl; + overflow: hidden; + pointer-events: none; +} + +.dygraph__history-tip-content { + display: inline-block; + white-space: nowrap; + direction: ltr; + pointer-events: auto; +} diff --git a/web/gui/dashboard/demo.html b/web/gui/dashboard/demo.html new file mode 100644 index 0000000..642742a --- /dev/null +++ b/web/gui/dashboard/demo.html @@ -0,0 +1,51 @@ + + + + + Netdata Dashboard + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    +
    + + diff --git a/web/gui/dashboard/demo2.html b/web/gui/dashboard/demo2.html new file mode 100644 index 0000000..9a77fdd --- /dev/null +++ b/web/gui/dashboard/demo2.html @@ -0,0 +1,143 @@ + + + + + Netdata Dashboard + + + + + + + + + + + + + + + + + + + + + + +
    +
    why netdata?
    +
    +
    These charts visualize the same data...
    + + + + + + +
    +
    + +
    +
    I can trace an issue like this
    +
    +
    +
    +
    +
    Can you trace an issue like these?
     
    +
    +
    +
    +
    +
    + +
    +
    I can trace an issue like this
    +
    +
    +
    +
    +
    Can you trace an issue like these?
     
    +
    +
    +
    +
    +
    +
    Hover on the chart below, to see the selected value on the charts above!
    +
    +
    + + diff --git a/web/gui/dashboard/demosites.html b/web/gui/dashboard/demosites.html new file mode 100644 index 0000000..59435c2 --- /dev/null +++ b/web/gui/dashboard/demosites.html @@ -0,0 +1,1469 @@ + + + + + + + Netdata: Get control of your Linux Servers. Simple. Effective. Awesome. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +

    Monitor your systems and applications, the right way!

    +

    + Unparalleled insights, in real-time, + of everything happening on your systems and applications, + with stunning, interactive web dashboards + and powerful performance and health alarms. +

    + +
    +
    +
    +

    Enter the world of Netdata!

    +
    +

    1s granularity

    +
    +
    +
    +

    + Per second data collection and visualization, for all metrics! +

    +

    + Netdata zooms into the problems by providing higher resolution information, compared to any other monitoring solution. +

    +
    +
    +

    + The world goes real-time. +
     
    + High resolution metrics are required to effectively monitor and troubleshoot systems and applications, especially on virtual environments. +

    +
    +
    +
    + +
    + +
    +
    +
    +

    Unlimited metrics

    +
    +
    +
    +

    + Use all the metrics, from all available sources! +

    +

    + Netdata collects all the metrics native console tools do. It has been designed to kill the console for troubleshooting infrastructure slowdowns and outages. +

    +
    +
    +

    + All metrics are important and all should be available when you need them. +
     
    + Filtering out most metrics is like reading a book by skipping most of its pages. +

    +
    +
    +
    + +
    +
    +
    +
    +

    Meaningful presentation

    +
    +
    +
    +

    + Explore all metrics in a meaningful, easy to understand way! +

    +

    + Netdata engineers and experts on our community organize metrics in a meaningful way, so that you can learn and understand them right on the job, while troubleshooting issues of your infrastructure. +

    +
    +
    +

    + Metrics are a lot more than name-value pairs over time. +

    + It is just not practical to require from all users to have a deep understanding of all metrics for monitoring their systems and applications. +

    +
    +
    +
    + +
    + +
    +
    +
    +

    Immediate results

    +
    +
    +
    +

    + Install and use immediately! Get fully functional visualization and alarms, in just a couple of seconds after installation! +

    +

    + Netdata decouples your skills from your monitoring infrastructure. + No matter how skillful or novice you are, Netdata will apply all the community knowledge and expertise to your monitoring infrastructure. +

    +
    +
    +

    + Most of our infrastructure is based on standardized systems and applications. +
     
    + It is a tremendous waste of time and effort, in a global scale, to require from all users to configure their infrastructure dashboards and alarms metric by metric. +

    +
    +
    +
    + +
    + +
    +

    How it works

    +
    +

    + Netdata is a monitoring agent you install on all your systems: +
    + physical servers, virtual servers, containers, IoT. +

    +

    + Netdata is lightweight, designed to permanently run on all systems without disrupting their core function. + By default, it needs just 1% CPU of a single core, a few MB or RAM and no disk I/O at all. +

    +

    + Each Netdata is (by default) autonomous, taking care of all the following. +
    But all your Netdata are integrated into one large distributed application. +

    +
    +
    +

    Collect

    +

    + Netdata automatically detects data collection sources on the host it runs. + It comes with hundreds of plugins for collecting system and application metrics, + including databases, web servers, and commonly used application servers. +

    + Netdata is also a high performance, distributed statsd server, allowing custom + application metrics to be collected and visualized. +

    +
    +

    Check (alarms)

    +

    + Each Netdata spawns a thread that examines the metrics as they get collected, + evaluates pre-configured alarm expressions and triggers alarm notifications. +

    + Netdata comes with hundreds of alarms to detect common system and application issues, + that are automatically attached to the collected metrics, + supporting dozens of alarm notification integrations. +

    +
    +

    Stream

    +

    + Each Netdata can stream its metrics, in real-time, to any other Netdata. + Streaming allows Netdata to be used in ephemeral nodes and containers in auto-scaled environments, + but it also allows building Netdata hierarchies for aggregating the metrics of multiple Netdata nodes. +

    +
    +

    Store

    +

    + Each Netdata has its own internal metrics database. This database is optimized + for minimal memory footprint, and due to its lockless design allows one writer + and multiple readers per metric, concurrently, contributing significantly to + the performance of Netdata. +

    +
    +

    Archive

    +

    + Netdata can archive its metrics to time-series databases (prometheus, graphite, opentsdb, json document dbs, etc) + so that Netdata can be integrated to existing monitoring tool-chains. +

    +
    +

    Visualize

    +

    + The best part of Netdata is its visualization. Low latency, speedy and snazzy. +

    + Netdata dashboards are optimized for visual anomaly detection, a powerful tool to troubleshoot + performance issues. +

    +
    +
    +
    +
    + +
    +   +
    + + +

    netdata live demo sites

    +
    +
    + +
    +
    +
    +
    +
    + Enter London! +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    + Enter Atlanta! +
    + Donated by CDN77.com +
    +
    +
    +
    +
    +
    +
    +
    + Enter California! +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    + Enter Canada! +
    + Donated by DigitalOcean.com +
    +
    +
    +
     
    +
    +
    +
    +
    +
    + Enter Germany! +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    + Enter New York! +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    + Enter Singapore! +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    + Enter India! +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    + Israel +
    +
    +
    + requests/s +
    +
    +
    +
    +
    +
    +
    + Enter Israel! +
    + Donated by octopuscs.com +
    +
    +
    + +
    +
    +
    + EU - Spain +
    +
    +
    + requests/s +
    +
    +
    +
    +
    +
    +
    + Enter Madrid! +
    + Donated by stackscale.com +
    +
    +
    +
    +
    +
    +
    +
    +
    + +
    + Charts are coming from all servers, in parallel. +
    + The servers are not aware of this multi-server dashboard. +
    + +
    +
    +
    + EU - London connected clients +
    +
    +
    +
    +
    +
    + +
    + Each server is not aware of the other servers. +
    + But on this dashboard they are one! (hover on the chart above) +
    + + + +
    +
    + +

    Who uses netdata?

    +
    +
    +

    + Netdata is used by hundreds of thousands of users all over the world. +
     
    + Check our GitHub watchers list. +
    + You will find people working for Amazon, Atos, Baidu, Cisco Systems, Citrix, + Deutsche Telekom, DigitalOcean, Elastic, EPAM Systems, Ericsson, Google, + Groupon, Hortonworks, HP, Huawei, IBM, Microsoft, NewRelic, + Nvidia, Red Hat, SAP, Selectel, TicketMaster, Vimeo, and many more! +

    + + The following figures come from users using the netdata public global registry.
    Counting since May 16th 2016. Actual figures may be a lot higher.
    +
    +
    +
    + netdata unique users +
    +
    +
    +
    +
    +
    +
    +
    +
    + netdata monitored servers +
    +
    +
    +
    +
    +
    +
    +
    +
    + netdata sessions served +
    +
    +
    +
    +
    +
    +

    + + +

    +

    + + netdata can generate auto-refreshing badges, like these: + +
    + + + +
    + These badges auto-refresh every minute. +

    +
    +
    + netdata is featured at the GitHub's state of the Octoverse 2016 +
    + + + +
    +
    + +
    +
    +
    +
    + + + + + + +
    +
    + + + + + + + + + + +
    + + + + + + + diff --git a/web/gui/dashboard/demosites2.html b/web/gui/dashboard/demosites2.html new file mode 100644 index 0000000..4b2be5c --- /dev/null +++ b/web/gui/dashboard/demosites2.html @@ -0,0 +1,1112 @@ + + + + + Netdata - Real-time performance monitoring, done right! + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + netdata +
    +
    + real-time performance monitoring +
    +
    + scaled out! +
    +
    + pick a netdata demo server +
    +
    + these demo servers show what you will get by installing netdata +
    + +
    +
    + +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by CDN77.com +
    +
    +
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
     
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    +
    +
    +
    +
     
    + +
    + Donated by DigitalOcean.com +
    +
    +
    +
    +
    + +
    + this page is a custom netdata dashboard +
    +
    + charts are coming from 8 servers, in parallel +
    + the servers are not aware of this multi-server dashboard, +
    + each server is not aware of the other servers, +
    + but on this dashboard they are one! +
    +
    + + hover on a chart below, or drag it to show the past - the others will follow! +
    + double click on a chart to reset them all +
    + +
    + our nginx performance +
    +
    + (we proxy netdata through nginx, on the demo sites) +
    + + + + + +
    +
    +
    +
    + EU - London web requests/s +
    +
    +
    +
    +
    + +
    +
    + US - Atlanta web requests/s +
    +
    +
    +
    +
    + +
    +
    + US - California web requests/s +
    +
    +
    +
    +
    + +
    +
    + Canada web requests/s +
    +
    +
    +
    +
    +
    + +
    +
    +
    + EU - London active connections +
    +
    +
    +
    +
    + +
    +
    + US - Atlanta active connections +
    +
    +
    +
    +
    + +
    +
    + US - California active connections +
    +
    +
    +
    +
    + +
    +
    + Canada active connections +
    +
    +
    +
    +
    +
    +
    + +
    + these charts are draggable and touchable, double click them to reset them +
    + + +
    + bandwidth consumption on the demo sites +
    +
    + Linux QoS is configured by FireQOS +
    + + + + + +
    +
    +
    +
    +
    + +
    +
    + +
    + +
    +
    +
    + +
    +
    +
    +
    + +
    +
    +
    + +
    + +
    +
    + +
    + +
    +
    +
    + +
    +
    +
    +
    +
    +
    + these legends are interactive and the charts are resizable here ^^^ +
    + +
    + DDoS protection performance on the demo sites +
    +
    + iptables SYNPROXY configured by FireHOL +
    + +
    + +
    +
    + EU - London, TCP SYN packets/s received +
    +
    +
    +
    +
    + +
    +
    + US - Atlanta, TCP SYN packets/s received +
    +
    +
    +
    +
    + +
    +
    + US - California, TCP SYN packets/s received +
    +
    +
    +
    +
    + +
    +
    + Canada, TCP SYN packets/s received +
    +
    +
    +
    +
    +
    +
    + did you notice the decimal numbers? +
    netdata interpolates collected values at second boundaries, with nanosecond detail!
    +
    + + +
    + CPU Utilization of the demo sites +
    + +
    +
    +
    +
    + +
    +
    +
    + +
    +
    +
    + +
    +
    +
    +
    +
    + what is using so much CPU? +
    The site iplists.firehol.org is maintained by FireHOL - the CPU is used for comparing security IP Lists.
    +
    + +
    + Netdata performance +
    +
    + netdata monitors users, user groups, applications (process trees) +
    + containers (lxc, docker, etc.) and SNMP devices. +
    + + + + + +
    +
    +
    +
    + EU - London, CPU % of a single core +
    +
    +
    +
    +
    + +
    +
    + US - Atlanta, CPU % of a single core +
    +
    +
    +
    +
    + +
    +
    + US - California, CPU % of a single core +
    +
    +
    +
    +
    + +
    +
    + Toronto, CPU % of a single core +
    +
    +
    +
    +
    + +
    + this utilization is about the whole netdata process tree and the percentage is of a single core! +
    including BASH plugins (it monitors mysql on the demo sites), node.js plugins (it monitors bind9 on the demo sites), etc. +
    and including the chart refreshes for the dashboards of all viewers.
    +
    +
    + +
    +
    +
    + EU - London, API average response time in milliseconds +
    +
    +
    +
    +
    + +
    +
    + US - Atlanta, API average response time in milliseconds +
    +
    +
    +
    +
    + +
    +
    + US - California, API average response time in milliseconds +
    +
    +
    +
    +
    + +
    +
    + Canada, API average response time in milliseconds +
    +
    +
    +
    +
    + +
    + netdata is really fast (the values are milliseconds!) +
    + These values include everything, from the reception of the first byte to the dispatch of the last, including gzip compression. +
    + Values above 2-3ms are usually chart refreshes of charts with several dimensions, charts with very long durations (zoomed out), or file transfers. +
    +
    +
    +
    + +
    + want to know more? +
    + jump to the netdata page at github +
    + it needs just 3 mins to be installed on your servers! +
    +   +
    +
    + + + diff --git a/web/gui/dashboard/favicon.ico b/web/gui/dashboard/favicon.ico new file mode 100644 index 0000000..064032a Binary files /dev/null and b/web/gui/dashboard/favicon.ico differ diff --git a/web/gui/dashboard/fonts/glyphicons-halflings-regular.eot b/web/gui/dashboard/fonts/glyphicons-halflings-regular.eot new file mode 100644 index 0000000..b93a495 Binary files /dev/null and b/web/gui/dashboard/fonts/glyphicons-halflings-regular.eot differ diff --git a/web/gui/dashboard/fonts/glyphicons-halflings-regular.svg b/web/gui/dashboard/fonts/glyphicons-halflings-regular.svg new file mode 100644 index 0000000..2a4aaba --- /dev/null +++ b/web/gui/dashboard/fonts/glyphicons-halflings-regular.svg @@ -0,0 +1,289 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/gui/dashboard/fonts/glyphicons-halflings-regular.ttf b/web/gui/dashboard/fonts/glyphicons-halflings-regular.ttf new file mode 100644 index 0000000..1413fc6 Binary files /dev/null and b/web/gui/dashboard/fonts/glyphicons-halflings-regular.ttf differ diff --git a/web/gui/dashboard/fonts/glyphicons-halflings-regular.woff b/web/gui/dashboard/fonts/glyphicons-halflings-regular.woff new file mode 100644 index 0000000..9e61285 Binary files /dev/null and b/web/gui/dashboard/fonts/glyphicons-halflings-regular.woff differ diff --git a/web/gui/dashboard/fonts/glyphicons-halflings-regular.woff2 b/web/gui/dashboard/fonts/glyphicons-halflings-regular.woff2 new file mode 100644 index 0000000..64539b5 Binary files /dev/null and b/web/gui/dashboard/fonts/glyphicons-halflings-regular.woff2 differ diff --git a/web/gui/dashboard/goto-host-from-alarm.html b/web/gui/dashboard/goto-host-from-alarm.html new file mode 100644 index 0000000..5d424c6 --- /dev/null +++ b/web/gui/dashboard/goto-host-from-alarm.html @@ -0,0 +1,250 @@ + + + + + Goto a host you know... + + + + + + + + + + + + + +
    +
    + Please wait... + +
    + +
    +
    +

    + This page can only find netdata URLs you have already visited and are linked to your account on this netdata registry. +

    +
    +
    + +
    + + diff --git a/web/gui/dashboard/images/alert-128-orange.png b/web/gui/dashboard/images/alert-128-orange.png new file mode 100644 index 0000000..c6182bf Binary files /dev/null and b/web/gui/dashboard/images/alert-128-orange.png differ diff --git a/web/gui/dashboard/images/alert-128-red.png b/web/gui/dashboard/images/alert-128-red.png new file mode 100644 index 0000000..90b9c73 Binary files /dev/null and b/web/gui/dashboard/images/alert-128-red.png differ diff --git a/web/gui/dashboard/images/alert-multi-size-orange.ico b/web/gui/dashboard/images/alert-multi-size-orange.ico new file mode 100644 index 0000000..edca438 Binary files /dev/null and b/web/gui/dashboard/images/alert-multi-size-orange.ico differ diff --git a/web/gui/dashboard/images/alert-multi-size-red.ico b/web/gui/dashboard/images/alert-multi-size-red.ico new file mode 100644 index 0000000..8f7cbd0 Binary files /dev/null and b/web/gui/dashboard/images/alert-multi-size-red.ico differ diff --git a/web/gui/dashboard/images/alerts.jpg b/web/gui/dashboard/images/alerts.jpg new file mode 100644 index 0000000..6593c72 Binary files /dev/null and b/web/gui/dashboard/images/alerts.jpg differ diff --git a/web/gui/dashboard/images/alerts.png b/web/gui/dashboard/images/alerts.png new file mode 100644 index 0000000..fe9940d Binary files /dev/null and b/web/gui/dashboard/images/alerts.png differ diff --git a/web/gui/dashboard/images/android-icon-144x144.png b/web/gui/dashboard/images/android-icon-144x144.png new file mode 100644 index 0000000..69efa5a Binary files /dev/null and b/web/gui/dashboard/images/android-icon-144x144.png differ diff --git a/web/gui/dashboard/images/android-icon-192x192.png b/web/gui/dashboard/images/android-icon-192x192.png new file mode 100644 index 0000000..e574435 Binary files /dev/null and b/web/gui/dashboard/images/android-icon-192x192.png differ diff --git a/web/gui/dashboard/images/android-icon-36x36.png b/web/gui/dashboard/images/android-icon-36x36.png new file mode 100644 index 0000000..4ba804d Binary files /dev/null and b/web/gui/dashboard/images/android-icon-36x36.png differ diff --git a/web/gui/dashboard/images/android-icon-48x48.png b/web/gui/dashboard/images/android-icon-48x48.png new file mode 100644 index 0000000..04970d4 Binary files /dev/null and b/web/gui/dashboard/images/android-icon-48x48.png differ diff --git a/web/gui/dashboard/images/android-icon-72x72.png b/web/gui/dashboard/images/android-icon-72x72.png new file mode 100644 index 0000000..5cbc701 Binary files /dev/null and b/web/gui/dashboard/images/android-icon-72x72.png differ diff --git a/web/gui/dashboard/images/android-icon-96x96.png b/web/gui/dashboard/images/android-icon-96x96.png new file mode 100644 index 0000000..21f27ce Binary files /dev/null and b/web/gui/dashboard/images/android-icon-96x96.png differ diff --git a/web/gui/dashboard/images/animated.gif b/web/gui/dashboard/images/animated.gif new file mode 100644 index 0000000..0e94a20 Binary files /dev/null and b/web/gui/dashboard/images/animated.gif differ diff --git a/web/gui/dashboard/images/apple-icon-114x114.png b/web/gui/dashboard/images/apple-icon-114x114.png new file mode 100644 index 0000000..7993e05 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-114x114.png differ diff --git a/web/gui/dashboard/images/apple-icon-120x120.png b/web/gui/dashboard/images/apple-icon-120x120.png new file mode 100644 index 0000000..3fbe8fd Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-120x120.png differ diff --git a/web/gui/dashboard/images/apple-icon-144x144.png b/web/gui/dashboard/images/apple-icon-144x144.png new file mode 100644 index 0000000..8d46569 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-144x144.png differ diff --git a/web/gui/dashboard/images/apple-icon-152x152.png b/web/gui/dashboard/images/apple-icon-152x152.png new file mode 100644 index 0000000..11a1072 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-152x152.png differ diff --git a/web/gui/dashboard/images/apple-icon-180x180.png b/web/gui/dashboard/images/apple-icon-180x180.png new file mode 100644 index 0000000..314efb1 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-180x180.png differ diff --git a/web/gui/dashboard/images/apple-icon-57x57.png b/web/gui/dashboard/images/apple-icon-57x57.png new file mode 100644 index 0000000..8528361 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-57x57.png differ diff --git a/web/gui/dashboard/images/apple-icon-60x60.png b/web/gui/dashboard/images/apple-icon-60x60.png new file mode 100644 index 0000000..2662e85 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-60x60.png differ diff --git a/web/gui/dashboard/images/apple-icon-72x72.png b/web/gui/dashboard/images/apple-icon-72x72.png new file mode 100644 index 0000000..4a6b056 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-72x72.png differ diff --git a/web/gui/dashboard/images/apple-icon-76x76.png b/web/gui/dashboard/images/apple-icon-76x76.png new file mode 100644 index 0000000..c2bf6c9 Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-76x76.png differ diff --git a/web/gui/dashboard/images/apple-icon-precomposed.png b/web/gui/dashboard/images/apple-icon-precomposed.png new file mode 100644 index 0000000..9c3e73e Binary files /dev/null and b/web/gui/dashboard/images/apple-icon-precomposed.png differ diff --git a/web/gui/dashboard/images/apple-icon.png b/web/gui/dashboard/images/apple-icon.png new file mode 100644 index 0000000..9c3e73e Binary files /dev/null and b/web/gui/dashboard/images/apple-icon.png differ diff --git a/web/gui/dashboard/images/banner-icon-144x144.png b/web/gui/dashboard/images/banner-icon-144x144.png new file mode 100644 index 0000000..fef3dca Binary files /dev/null and b/web/gui/dashboard/images/banner-icon-144x144.png differ diff --git a/web/gui/dashboard/images/check-mark-2-128-green.png b/web/gui/dashboard/images/check-mark-2-128-green.png new file mode 100644 index 0000000..e04ddca Binary files /dev/null and b/web/gui/dashboard/images/check-mark-2-128-green.png differ diff --git a/web/gui/dashboard/images/check-mark-2-multi-size-green.ico b/web/gui/dashboard/images/check-mark-2-multi-size-green.ico new file mode 100644 index 0000000..2fc4141 Binary files /dev/null and b/web/gui/dashboard/images/check-mark-2-multi-size-green.ico differ diff --git a/web/gui/dashboard/images/dashboards.png b/web/gui/dashboard/images/dashboards.png new file mode 100644 index 0000000..46cdfbb Binary files /dev/null and b/web/gui/dashboard/images/dashboards.png differ diff --git a/web/gui/dashboard/images/favicon-128.png b/web/gui/dashboard/images/favicon-128.png new file mode 100644 index 0000000..5371f92 Binary files /dev/null and b/web/gui/dashboard/images/favicon-128.png differ diff --git a/web/gui/dashboard/images/favicon-16x16.png b/web/gui/dashboard/images/favicon-16x16.png new file mode 100644 index 0000000..5729f5a Binary files /dev/null and b/web/gui/dashboard/images/favicon-16x16.png differ diff --git a/web/gui/dashboard/images/favicon-196x196.png b/web/gui/dashboard/images/favicon-196x196.png new file mode 100644 index 0000000..a208c27 Binary files /dev/null and b/web/gui/dashboard/images/favicon-196x196.png differ diff --git a/web/gui/dashboard/images/favicon-32x32.png b/web/gui/dashboard/images/favicon-32x32.png new file mode 100644 index 0000000..cdb0a48 Binary files /dev/null and b/web/gui/dashboard/images/favicon-32x32.png differ diff --git a/web/gui/dashboard/images/favicon-96x96.png b/web/gui/dashboard/images/favicon-96x96.png new file mode 100644 index 0000000..dbe7dea Binary files /dev/null and b/web/gui/dashboard/images/favicon-96x96.png differ diff --git a/web/gui/dashboard/images/favicon.ico b/web/gui/dashboard/images/favicon.ico new file mode 100644 index 0000000..064032a Binary files /dev/null and b/web/gui/dashboard/images/favicon.ico differ diff --git a/web/gui/dashboard/images/home.png b/web/gui/dashboard/images/home.png new file mode 100644 index 0000000..cf7db9d Binary files /dev/null and b/web/gui/dashboard/images/home.png differ diff --git a/web/gui/dashboard/images/ms-icon-144x144.png b/web/gui/dashboard/images/ms-icon-144x144.png new file mode 100644 index 0000000..8d46569 Binary files /dev/null and b/web/gui/dashboard/images/ms-icon-144x144.png differ diff --git a/web/gui/dashboard/images/ms-icon-150x150.png b/web/gui/dashboard/images/ms-icon-150x150.png new file mode 100644 index 0000000..4683d56 Binary files /dev/null and b/web/gui/dashboard/images/ms-icon-150x150.png differ diff --git a/web/gui/dashboard/images/ms-icon-310x150.png b/web/gui/dashboard/images/ms-icon-310x150.png new file mode 100644 index 0000000..5d4ac57 Binary files /dev/null and b/web/gui/dashboard/images/ms-icon-310x150.png differ diff --git a/web/gui/dashboard/images/ms-icon-310x310.png b/web/gui/dashboard/images/ms-icon-310x310.png new file mode 100644 index 0000000..bdb591b Binary files /dev/null and b/web/gui/dashboard/images/ms-icon-310x310.png differ diff --git a/web/gui/dashboard/images/ms-icon-36x36.png b/web/gui/dashboard/images/ms-icon-36x36.png new file mode 100644 index 0000000..e251302 Binary files /dev/null and b/web/gui/dashboard/images/ms-icon-36x36.png differ diff --git a/web/gui/dashboard/images/ms-icon-70x70.png b/web/gui/dashboard/images/ms-icon-70x70.png new file mode 100644 index 0000000..5371f92 Binary files /dev/null and b/web/gui/dashboard/images/ms-icon-70x70.png differ diff --git a/web/gui/dashboard/images/netdata-logomark.svg b/web/gui/dashboard/images/netdata-logomark.svg new file mode 100644 index 0000000..18152fb --- /dev/null +++ b/web/gui/dashboard/images/netdata-logomark.svg @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/web/gui/dashboard/images/netdata.svg b/web/gui/dashboard/images/netdata.svg new file mode 100644 index 0000000..f8ddbda --- /dev/null +++ b/web/gui/dashboard/images/netdata.svg @@ -0,0 +1,18 @@ + + + + + + Created by potrace 1.15, written by Peter Selinger 2001-2017 + + + + + + + + + + diff --git a/web/gui/dashboard/images/nodeView.png b/web/gui/dashboard/images/nodeView.png new file mode 100644 index 0000000..e9b3e1b Binary files /dev/null and b/web/gui/dashboard/images/nodeView.png differ diff --git a/web/gui/dashboard/images/nodes.jpg b/web/gui/dashboard/images/nodes.jpg new file mode 100644 index 0000000..3e93c0d Binary files /dev/null and b/web/gui/dashboard/images/nodes.jpg differ diff --git a/web/gui/dashboard/images/overview.png b/web/gui/dashboard/images/overview.png new file mode 100644 index 0000000..6e54db6 Binary files /dev/null and b/web/gui/dashboard/images/overview.png differ diff --git a/web/gui/dashboard/images/packaging-beta-tag.svg b/web/gui/dashboard/images/packaging-beta-tag.svg new file mode 100644 index 0000000..cebdc08 --- /dev/null +++ b/web/gui/dashboard/images/packaging-beta-tag.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/gui/dashboard/images/post.png b/web/gui/dashboard/images/post.png new file mode 100644 index 0000000..6bad547 Binary files /dev/null and b/web/gui/dashboard/images/post.png differ diff --git a/web/gui/dashboard/images/pricing.png b/web/gui/dashboard/images/pricing.png new file mode 100644 index 0000000..d5406e8 Binary files /dev/null and b/web/gui/dashboard/images/pricing.png differ diff --git a/web/gui/dashboard/images/seo-performance-128.png b/web/gui/dashboard/images/seo-performance-128.png new file mode 100644 index 0000000..2a212a4 Binary files /dev/null and b/web/gui/dashboard/images/seo-performance-128.png differ diff --git a/web/gui/dashboard/index-node-view.html b/web/gui/dashboard/index-node-view.html new file mode 100644 index 0000000..8483892 --- /dev/null +++ b/web/gui/dashboard/index-node-view.html @@ -0,0 +1,30 @@ + + + + + Netdata TV Dashboard + + + + + + + + + + + + + + + + + + + + + +
    + + + diff --git a/web/gui/dashboard/index.html b/web/gui/dashboard/index.html new file mode 100644 index 0000000..db46d07 --- /dev/null +++ b/web/gui/dashboard/index.html @@ -0,0 +1,16 @@ +netdata dashboard
    You must enable JavaScript in order to use Netdata!
    You can do this in your browser settings.
    \ No newline at end of file diff --git a/web/gui/dashboard/infographic.html b/web/gui/dashboard/infographic.html new file mode 100644 index 0000000..0d3f563 --- /dev/null +++ b/web/gui/dashboard/infographic.html @@ -0,0 +1,171 @@ + + + + + + Netdata: Get control of your Linux Servers. Simple. Effective. Awesome. + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    +

    + Interactive infographic of netdata features and functions +

    +

    + Hover and click on the infographic, to open the related wiki page. +
    + + The links and the docs are still a work in progress. + The interactive infographic is a feature of draw.io. + +

    +
    + +
    +

    + New to netdata? Have a look at a netdata demo. You will love it! +

    +

    + + + +

    +
    +
    +
    +
    +
    +
    + + + + + + + + + +
    + + + + + + diff --git a/web/gui/dashboard/lib/bootstrap-3.3.7.min.js b/web/gui/dashboard/lib/bootstrap-3.3.7.min.js new file mode 100644 index 0000000..03a9716 --- /dev/null +++ b/web/gui/dashboard/lib/bootstrap-3.3.7.min.js @@ -0,0 +1,8 @@ +/*! + * Bootstrap v3.3.7 (http://getbootstrap.com) + * Copyright 2011-2016 Twitter, Inc. + * Licensed under the MIT license + * SPDX-License-Identifier: MIT + */ +if("undefined"==typeof jQuery)throw new Error("Bootstrap's JavaScript requires jQuery");+function(a){"use strict";var b=a.fn.jquery.split(" ")[0].split(".");if(b[0]<2&&b[1]<9||1==b[0]&&9==b[1]&&b[2]<1||b[0]>3)throw new Error("Bootstrap's JavaScript requires jQuery version 1.9.1 or higher, but lower than version 4")}(jQuery),+function(a){"use strict";function b(){var a=document.createElement("bootstrap"),b={WebkitTransition:"webkitTransitionEnd",MozTransition:"transitionend",OTransition:"oTransitionEnd otransitionend",transition:"transitionend"};for(var c in b)if(void 0!==a.style[c])return{end:b[c]};return!1}a.fn.emulateTransitionEnd=function(b){var c=!1,d=this;a(this).one("bsTransitionEnd",function(){c=!0});var e=function(){c||a(d).trigger(a.support.transition.end)};return setTimeout(e,b),this},a(function(){a.support.transition=b(),a.support.transition&&(a.event.special.bsTransitionEnd={bindType:a.support.transition.end,delegateType:a.support.transition.end,handle:function(b){if(a(b.target).is(this))return b.handleObj.handler.apply(this,arguments)}})})}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var c=a(this),e=c.data("bs.alert");e||c.data("bs.alert",e=new d(this)),"string"==typeof b&&e[b].call(c)})}var c='[data-dismiss="alert"]',d=function(b){a(b).on("click",c,this.close)};d.VERSION="3.3.7",d.TRANSITION_DURATION=150,d.prototype.close=function(b){function c(){g.detach().trigger("closed.bs.alert").remove()}var e=a(this),f=e.attr("data-target");f||(f=e.attr("href"),f=f&&f.replace(/.*(?=#[^\s]*$)/,""));var g=a("#"===f?[]:f);b&&b.preventDefault(),g.length||(g=e.closest(".alert")),g.trigger(b=a.Event("close.bs.alert")),b.isDefaultPrevented()||(g.removeClass("in"),a.support.transition&&g.hasClass("fade")?g.one("bsTransitionEnd",c).emulateTransitionEnd(d.TRANSITION_DURATION):c())};var e=a.fn.alert;a.fn.alert=b,a.fn.alert.Constructor=d,a.fn.alert.noConflict=function(){return a.fn.alert=e,this},a(document).on("click.bs.alert.data-api",c,d.prototype.close)}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.button"),f="object"==typeof b&&b;e||d.data("bs.button",e=new c(this,f)),"toggle"==b?e.toggle():b&&e.setState(b)})}var c=function(b,d){this.$element=a(b),this.options=a.extend({},c.DEFAULTS,d),this.isLoading=!1};c.VERSION="3.3.7",c.DEFAULTS={loadingText:"loading..."},c.prototype.setState=function(b){var c="disabled",d=this.$element,e=d.is("input")?"val":"html",f=d.data();b+="Text",null==f.resetText&&d.data("resetText",d[e]()),setTimeout(a.proxy(function(){d[e](null==f[b]?this.options[b]:f[b]),"loadingText"==b?(this.isLoading=!0,d.addClass(c).attr(c,c).prop(c,!0)):this.isLoading&&(this.isLoading=!1,d.removeClass(c).removeAttr(c).prop(c,!1))},this),0)},c.prototype.toggle=function(){var a=!0,b=this.$element.closest('[data-toggle="buttons"]');if(b.length){var c=this.$element.find("input");"radio"==c.prop("type")?(c.prop("checked")&&(a=!1),b.find(".active").removeClass("active"),this.$element.addClass("active")):"checkbox"==c.prop("type")&&(c.prop("checked")!==this.$element.hasClass("active")&&(a=!1),this.$element.toggleClass("active")),c.prop("checked",this.$element.hasClass("active")),a&&c.trigger("change")}else this.$element.attr("aria-pressed",!this.$element.hasClass("active")),this.$element.toggleClass("active")};var d=a.fn.button;a.fn.button=b,a.fn.button.Constructor=c,a.fn.button.noConflict=function(){return a.fn.button=d,this},a(document).on("click.bs.button.data-api",'[data-toggle^="button"]',function(c){var d=a(c.target).closest(".btn");b.call(d,"toggle"),a(c.target).is('input[type="radio"], input[type="checkbox"]')||(c.preventDefault(),d.is("input,button")?d.trigger("focus"):d.find("input:visible,button:visible").first().trigger("focus"))}).on("focus.bs.button.data-api blur.bs.button.data-api",'[data-toggle^="button"]',function(b){a(b.target).closest(".btn").toggleClass("focus",/^focus(in)?$/.test(b.type))})}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.carousel"),f=a.extend({},c.DEFAULTS,d.data(),"object"==typeof b&&b),g="string"==typeof b?b:f.slide;e||d.data("bs.carousel",e=new c(this,f)),"number"==typeof b?e.to(b):g?e[g]():f.interval&&e.pause().cycle()})}var c=function(b,c){this.$element=a(b),this.$indicators=this.$element.find(".carousel-indicators"),this.options=c,this.paused=null,this.sliding=null,this.interval=null,this.$active=null,this.$items=null,this.options.keyboard&&this.$element.on("keydown.bs.carousel",a.proxy(this.keydown,this)),"hover"==this.options.pause&&!("ontouchstart"in document.documentElement)&&this.$element.on("mouseenter.bs.carousel",a.proxy(this.pause,this)).on("mouseleave.bs.carousel",a.proxy(this.cycle,this))};c.VERSION="3.3.7",c.TRANSITION_DURATION=600,c.DEFAULTS={interval:5e3,pause:"hover",wrap:!0,keyboard:!0},c.prototype.keydown=function(a){if(!/input|textarea/i.test(a.target.tagName)){switch(a.which){case 37:this.prev();break;case 39:this.next();break;default:return}a.preventDefault()}},c.prototype.cycle=function(b){return b||(this.paused=!1),this.interval&&clearInterval(this.interval),this.options.interval&&!this.paused&&(this.interval=setInterval(a.proxy(this.next,this),this.options.interval)),this},c.prototype.getItemIndex=function(a){return this.$items=a.parent().children(".item"),this.$items.index(a||this.$active)},c.prototype.getItemForDirection=function(a,b){var c=this.getItemIndex(b),d="prev"==a&&0===c||"next"==a&&c==this.$items.length-1;if(d&&!this.options.wrap)return b;var e="prev"==a?-1:1,f=(c+e)%this.$items.length;return this.$items.eq(f)},c.prototype.to=function(a){var b=this,c=this.getItemIndex(this.$active=this.$element.find(".item.active"));if(!(a>this.$items.length-1||a<0))return this.sliding?this.$element.one("slid.bs.carousel",function(){b.to(a)}):c==a?this.pause().cycle():this.slide(a>c?"next":"prev",this.$items.eq(a))},c.prototype.pause=function(b){return b||(this.paused=!0),this.$element.find(".next, .prev").length&&a.support.transition&&(this.$element.trigger(a.support.transition.end),this.cycle(!0)),this.interval=clearInterval(this.interval),this},c.prototype.next=function(){if(!this.sliding)return this.slide("next")},c.prototype.prev=function(){if(!this.sliding)return this.slide("prev")},c.prototype.slide=function(b,d){var e=this.$element.find(".item.active"),f=d||this.getItemForDirection(b,e),g=this.interval,h="next"==b?"left":"right",i=this;if(f.hasClass("active"))return this.sliding=!1;var j=f[0],k=a.Event("slide.bs.carousel",{relatedTarget:j,direction:h});if(this.$element.trigger(k),!k.isDefaultPrevented()){if(this.sliding=!0,g&&this.pause(),this.$indicators.length){this.$indicators.find(".active").removeClass("active");var l=a(this.$indicators.children()[this.getItemIndex(f)]);l&&l.addClass("active")}var m=a.Event("slid.bs.carousel",{relatedTarget:j,direction:h});return a.support.transition&&this.$element.hasClass("slide")?(f.addClass(b),f[0].offsetWidth,e.addClass(h),f.addClass(h),e.one("bsTransitionEnd",function(){f.removeClass([b,h].join(" ")).addClass("active"),e.removeClass(["active",h].join(" ")),i.sliding=!1,setTimeout(function(){i.$element.trigger(m)},0)}).emulateTransitionEnd(c.TRANSITION_DURATION)):(e.removeClass("active"),f.addClass("active"),this.sliding=!1,this.$element.trigger(m)),g&&this.cycle(),this}};var d=a.fn.carousel;a.fn.carousel=b,a.fn.carousel.Constructor=c,a.fn.carousel.noConflict=function(){return a.fn.carousel=d,this};var e=function(c){var d,e=a(this),f=a(e.attr("data-target")||(d=e.attr("href"))&&d.replace(/.*(?=#[^\s]+$)/,""));if(f.hasClass("carousel")){var g=a.extend({},f.data(),e.data()),h=e.attr("data-slide-to");h&&(g.interval=!1),b.call(f,g),h&&f.data("bs.carousel").to(h),c.preventDefault()}};a(document).on("click.bs.carousel.data-api","[data-slide]",e).on("click.bs.carousel.data-api","[data-slide-to]",e),a(window).on("load",function(){a('[data-ride="carousel"]').each(function(){var c=a(this);b.call(c,c.data())})})}(jQuery),+function(a){"use strict";function b(b){var c,d=b.attr("data-target")||(c=b.attr("href"))&&c.replace(/.*(?=#[^\s]+$)/,"");return a(d)}function c(b){return this.each(function(){var c=a(this),e=c.data("bs.collapse"),f=a.extend({},d.DEFAULTS,c.data(),"object"==typeof b&&b);!e&&f.toggle&&/show|hide/.test(b)&&(f.toggle=!1),e||c.data("bs.collapse",e=new d(this,f)),"string"==typeof b&&e[b]()})}var d=function(b,c){this.$element=a(b),this.options=a.extend({},d.DEFAULTS,c),this.$trigger=a('[data-toggle="collapse"][href="#'+b.id+'"],[data-toggle="collapse"][data-target="#'+b.id+'"]'),this.transitioning=null,this.options.parent?this.$parent=this.getParent():this.addAriaAndCollapsedClass(this.$element,this.$trigger),this.options.toggle&&this.toggle()};d.VERSION="3.3.7",d.TRANSITION_DURATION=350,d.DEFAULTS={toggle:!0},d.prototype.dimension=function(){var a=this.$element.hasClass("width");return a?"width":"height"},d.prototype.show=function(){if(!this.transitioning&&!this.$element.hasClass("in")){var b,e=this.$parent&&this.$parent.children(".panel").children(".in, .collapsing");if(!(e&&e.length&&(b=e.data("bs.collapse"),b&&b.transitioning))){var f=a.Event("show.bs.collapse");if(this.$element.trigger(f),!f.isDefaultPrevented()){e&&e.length&&(c.call(e,"hide"),b||e.data("bs.collapse",null));var g=this.dimension();this.$element.removeClass("collapse").addClass("collapsing")[g](0).attr("aria-expanded",!0),this.$trigger.removeClass("collapsed").attr("aria-expanded",!0),this.transitioning=1;var h=function(){this.$element.removeClass("collapsing").addClass("collapse in")[g](""),this.transitioning=0,this.$element.trigger("shown.bs.collapse")};if(!a.support.transition)return h.call(this);var i=a.camelCase(["scroll",g].join("-"));this.$element.one("bsTransitionEnd",a.proxy(h,this)).emulateTransitionEnd(d.TRANSITION_DURATION)[g](this.$element[0][i])}}}},d.prototype.hide=function(){if(!this.transitioning&&this.$element.hasClass("in")){var b=a.Event("hide.bs.collapse");if(this.$element.trigger(b),!b.isDefaultPrevented()){var c=this.dimension();this.$element[c](this.$element[c]())[0].offsetHeight,this.$element.addClass("collapsing").removeClass("collapse in").attr("aria-expanded",!1),this.$trigger.addClass("collapsed").attr("aria-expanded",!1),this.transitioning=1;var e=function(){this.transitioning=0,this.$element.removeClass("collapsing").addClass("collapse").trigger("hidden.bs.collapse")};return a.support.transition?void this.$element[c](0).one("bsTransitionEnd",a.proxy(e,this)).emulateTransitionEnd(d.TRANSITION_DURATION):e.call(this)}}},d.prototype.toggle=function(){this[this.$element.hasClass("in")?"hide":"show"]()},d.prototype.getParent=function(){return a(this.options.parent).find('[data-toggle="collapse"][data-parent="'+this.options.parent+'"]').each(a.proxy(function(c,d){var e=a(d);this.addAriaAndCollapsedClass(b(e),e)},this)).end()},d.prototype.addAriaAndCollapsedClass=function(a,b){var c=a.hasClass("in");a.attr("aria-expanded",c),b.toggleClass("collapsed",!c).attr("aria-expanded",c)};var e=a.fn.collapse;a.fn.collapse=c,a.fn.collapse.Constructor=d,a.fn.collapse.noConflict=function(){return a.fn.collapse=e,this},a(document).on("click.bs.collapse.data-api",'[data-toggle="collapse"]',function(d){var e=a(this);e.attr("data-target")||d.preventDefault();var f=b(e),g=f.data("bs.collapse"),h=g?"toggle":e.data();c.call(f,h)})}(jQuery),+function(a){"use strict";function b(b){var c=b.attr("data-target");c||(c=b.attr("href"),c=c&&/#[A-Za-z]/.test(c)&&c.replace(/.*(?=#[^\s]*$)/,""));var d=c&&a(c);return d&&d.length?d:b.parent()}function c(c){c&&3===c.which||(a(e).remove(),a(f).each(function(){var d=a(this),e=b(d),f={relatedTarget:this};e.hasClass("open")&&(c&&"click"==c.type&&/input|textarea/i.test(c.target.tagName)&&a.contains(e[0],c.target)||(e.trigger(c=a.Event("hide.bs.dropdown",f)),c.isDefaultPrevented()||(d.attr("aria-expanded","false"),e.removeClass("open").trigger(a.Event("hidden.bs.dropdown",f)))))}))}function d(b){return this.each(function(){var c=a(this),d=c.data("bs.dropdown");d||c.data("bs.dropdown",d=new g(this)),"string"==typeof b&&d[b].call(c)})}var e=".dropdown-backdrop",f='[data-toggle="dropdown"]',g=function(b){a(b).on("click.bs.dropdown",this.toggle)};g.VERSION="3.3.7",g.prototype.toggle=function(d){var e=a(this);if(!e.is(".disabled, :disabled")){var f=b(e),g=f.hasClass("open");if(c(),!g){"ontouchstart"in document.documentElement&&!f.closest(".navbar-nav").length&&a(document.createElement("div")).addClass("dropdown-backdrop").insertAfter(a(this)).on("click",c);var h={relatedTarget:this};if(f.trigger(d=a.Event("show.bs.dropdown",h)),d.isDefaultPrevented())return;e.trigger("focus").attr("aria-expanded","true"),f.toggleClass("open").trigger(a.Event("shown.bs.dropdown",h))}return!1}},g.prototype.keydown=function(c){if(/(38|40|27|32)/.test(c.which)&&!/input|textarea/i.test(c.target.tagName)){var d=a(this);if(c.preventDefault(),c.stopPropagation(),!d.is(".disabled, :disabled")){var e=b(d),g=e.hasClass("open");if(!g&&27!=c.which||g&&27==c.which)return 27==c.which&&e.find(f).trigger("focus"),d.trigger("click");var h=" li:not(.disabled):visible a",i=e.find(".dropdown-menu"+h);if(i.length){var j=i.index(c.target);38==c.which&&j>0&&j--,40==c.which&&jdocument.documentElement.clientHeight;this.$element.css({paddingLeft:!this.bodyIsOverflowing&&a?this.scrollbarWidth:"",paddingRight:this.bodyIsOverflowing&&!a?this.scrollbarWidth:""})},c.prototype.resetAdjustments=function(){this.$element.css({paddingLeft:"",paddingRight:""})},c.prototype.checkScrollbar=function(){var a=window.innerWidth;if(!a){var b=document.documentElement.getBoundingClientRect();a=b.right-Math.abs(b.left)}this.bodyIsOverflowing=document.body.clientWidth
    ',trigger:"hover focus",title:"",delay:0,html:!1,container:!1,viewport:{selector:"body",padding:0}},c.prototype.init=function(b,c,d){if(this.enabled=!0,this.type=b,this.$element=a(c),this.options=this.getOptions(d),this.$viewport=this.options.viewport&&a(a.isFunction(this.options.viewport)?this.options.viewport.call(this,this.$element):this.options.viewport.selector||this.options.viewport),this.inState={click:!1,hover:!1,focus:!1},this.$element[0]instanceof document.constructor&&!this.options.selector)throw new Error("`selector` option must be specified when initializing "+this.type+" on the window.document object!");for(var e=this.options.trigger.split(" "),f=e.length;f--;){var g=e[f];if("click"==g)this.$element.on("click."+this.type,this.options.selector,a.proxy(this.toggle,this));else if("manual"!=g){var h="hover"==g?"mouseenter":"focusin",i="hover"==g?"mouseleave":"focusout";this.$element.on(h+"."+this.type,this.options.selector,a.proxy(this.enter,this)),this.$element.on(i+"."+this.type,this.options.selector,a.proxy(this.leave,this))}}this.options.selector?this._options=a.extend({},this.options,{trigger:"manual",selector:""}):this.fixTitle()},c.prototype.getDefaults=function(){return c.DEFAULTS},c.prototype.getOptions=function(b){return b=a.extend({},this.getDefaults(),this.$element.data(),b),b.delay&&"number"==typeof b.delay&&(b.delay={show:b.delay,hide:b.delay}),b},c.prototype.getDelegateOptions=function(){var b={},c=this.getDefaults();return this._options&&a.each(this._options,function(a,d){c[a]!=d&&(b[a]=d)}),b},c.prototype.enter=function(b){var c=b instanceof this.constructor?b:a(b.currentTarget).data("bs."+this.type);return c||(c=new this.constructor(b.currentTarget,this.getDelegateOptions()),a(b.currentTarget).data("bs."+this.type,c)),b instanceof a.Event&&(c.inState["focusin"==b.type?"focus":"hover"]=!0),c.tip().hasClass("in")||"in"==c.hoverState?void(c.hoverState="in"):(clearTimeout(c.timeout),c.hoverState="in",c.options.delay&&c.options.delay.show?void(c.timeout=setTimeout(function(){"in"==c.hoverState&&c.show()},c.options.delay.show)):c.show())},c.prototype.isInStateTrue=function(){for(var a in this.inState)if(this.inState[a])return!0;return!1},c.prototype.leave=function(b){var c=b instanceof this.constructor?b:a(b.currentTarget).data("bs."+this.type);if(c||(c=new this.constructor(b.currentTarget,this.getDelegateOptions()),a(b.currentTarget).data("bs."+this.type,c)),b instanceof a.Event&&(c.inState["focusout"==b.type?"focus":"hover"]=!1),!c.isInStateTrue())return clearTimeout(c.timeout),c.hoverState="out",c.options.delay&&c.options.delay.hide?void(c.timeout=setTimeout(function(){"out"==c.hoverState&&c.hide()},c.options.delay.hide)):c.hide()},c.prototype.show=function(){var b=a.Event("show.bs."+this.type);if(this.hasContent()&&this.enabled){this.$element.trigger(b);var d=a.contains(this.$element[0].ownerDocument.documentElement,this.$element[0]);if(b.isDefaultPrevented()||!d)return;var e=this,f=this.tip(),g=this.getUID(this.type);this.setContent(),f.attr("id",g),this.$element.attr("aria-describedby",g),this.options.animation&&f.addClass("fade");var h="function"==typeof this.options.placement?this.options.placement.call(this,f[0],this.$element[0]):this.options.placement,i=/\s?auto?\s?/i,j=i.test(h);j&&(h=h.replace(i,"")||"top"),f.detach().css({top:0,left:0,display:"block"}).addClass(h).data("bs."+this.type,this),this.options.container?f.appendTo(this.options.container):f.insertAfter(this.$element),this.$element.trigger("inserted.bs."+this.type);var k=this.getPosition(),l=f[0].offsetWidth,m=f[0].offsetHeight;if(j){var n=h,o=this.getPosition(this.$viewport);h="bottom"==h&&k.bottom+m>o.bottom?"top":"top"==h&&k.top-mo.width?"left":"left"==h&&k.left-lg.top+g.height&&(e.top=g.top+g.height-i)}else{var j=b.left-f,k=b.left+f+c;jg.right&&(e.left=g.left+g.width-k)}return e},c.prototype.getTitle=function(){var a,b=this.$element,c=this.options;return a=b.attr("data-original-title")||("function"==typeof c.title?c.title.call(b[0]):c.title)},c.prototype.getUID=function(a){do a+=~~(1e6*Math.random());while(document.getElementById(a));return a},c.prototype.tip=function(){if(!this.$tip&&(this.$tip=a(this.options.template),1!=this.$tip.length))throw new Error(this.type+" `template` option must consist of exactly 1 top-level element!");return this.$tip},c.prototype.arrow=function(){return this.$arrow=this.$arrow||this.tip().find(".tooltip-arrow")},c.prototype.enable=function(){this.enabled=!0},c.prototype.disable=function(){this.enabled=!1},c.prototype.toggleEnabled=function(){this.enabled=!this.enabled},c.prototype.toggle=function(b){var c=this;b&&(c=a(b.currentTarget).data("bs."+this.type),c||(c=new this.constructor(b.currentTarget,this.getDelegateOptions()),a(b.currentTarget).data("bs."+this.type,c))),b?(c.inState.click=!c.inState.click,c.isInStateTrue()?c.enter(c):c.leave(c)):c.tip().hasClass("in")?c.leave(c):c.enter(c)},c.prototype.destroy=function(){var a=this;clearTimeout(this.timeout),this.hide(function(){a.$element.off("."+a.type).removeData("bs."+a.type),a.$tip&&a.$tip.detach(),a.$tip=null,a.$arrow=null,a.$viewport=null,a.$element=null})};var d=a.fn.tooltip;a.fn.tooltip=b,a.fn.tooltip.Constructor=c,a.fn.tooltip.noConflict=function(){return a.fn.tooltip=d,this}}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.popover"),f="object"==typeof b&&b;!e&&/destroy|hide/.test(b)||(e||d.data("bs.popover",e=new c(this,f)),"string"==typeof b&&e[b]())})}var c=function(a,b){this.init("popover",a,b)};if(!a.fn.tooltip)throw new Error("Popover requires tooltip.js");c.VERSION="3.3.7",c.DEFAULTS=a.extend({},a.fn.tooltip.Constructor.DEFAULTS,{placement:"right",trigger:"click",content:"",template:''}),c.prototype=a.extend({},a.fn.tooltip.Constructor.prototype),c.prototype.constructor=c,c.prototype.getDefaults=function(){return c.DEFAULTS},c.prototype.setContent=function(){var a=this.tip(),b=this.getTitle(),c=this.getContent();a.find(".popover-title")[this.options.html?"html":"text"](b),a.find(".popover-content").children().detach().end()[this.options.html?"string"==typeof c?"html":"append":"text"](c),a.removeClass("fade top bottom left right in"),a.find(".popover-title").html()||a.find(".popover-title").hide()},c.prototype.hasContent=function(){return this.getTitle()||this.getContent()},c.prototype.getContent=function(){var a=this.$element,b=this.options;return a.attr("data-content")||("function"==typeof b.content?b.content.call(a[0]):b.content)},c.prototype.arrow=function(){return this.$arrow=this.$arrow||this.tip().find(".arrow")};var d=a.fn.popover;a.fn.popover=b,a.fn.popover.Constructor=c,a.fn.popover.noConflict=function(){return a.fn.popover=d,this}}(jQuery),+function(a){"use strict";function b(c,d){this.$body=a(document.body),this.$scrollElement=a(a(c).is(document.body)?window:c),this.options=a.extend({},b.DEFAULTS,d),this.selector=(this.options.target||"")+" .nav li > a",this.offsets=[],this.targets=[],this.activeTarget=null,this.scrollHeight=0,this.$scrollElement.on("scroll.bs.scrollspy",a.proxy(this.process,this)),this.refresh(),this.process()}function c(c){return this.each(function(){var d=a(this),e=d.data("bs.scrollspy"),f="object"==typeof c&&c;e||d.data("bs.scrollspy",e=new b(this,f)),"string"==typeof c&&e[c]()})}b.VERSION="3.3.7",b.DEFAULTS={offset:10},b.prototype.getScrollHeight=function(){return this.$scrollElement[0].scrollHeight||Math.max(this.$body[0].scrollHeight,document.documentElement.scrollHeight)},b.prototype.refresh=function(){var b=this,c="offset",d=0;this.offsets=[],this.targets=[],this.scrollHeight=this.getScrollHeight(),a.isWindow(this.$scrollElement[0])||(c="position",d=this.$scrollElement.scrollTop()),this.$body.find(this.selector).map(function(){var b=a(this),e=b.data("target")||b.attr("href"),f=/^#./.test(e)&&a(e);return f&&f.length&&f.is(":visible")&&[[f[c]().top+d,e]]||null}).sort(function(a,b){return a[0]-b[0]}).each(function(){b.offsets.push(this[0]),b.targets.push(this[1])})},b.prototype.process=function(){var a,b=this.$scrollElement.scrollTop()+this.options.offset,c=this.getScrollHeight(),d=this.options.offset+c-this.$scrollElement.height(),e=this.offsets,f=this.targets,g=this.activeTarget;if(this.scrollHeight!=c&&this.refresh(),b>=d)return g!=(a=f[f.length-1])&&this.activate(a);if(g&&b=e[a]&&(void 0===e[a+1]||b .dropdown-menu > .active").removeClass("active").end().find('[data-toggle="tab"]').attr("aria-expanded",!1),b.addClass("active").find('[data-toggle="tab"]').attr("aria-expanded",!0),h?(b[0].offsetWidth,b.addClass("in")):b.removeClass("fade"),b.parent(".dropdown-menu").length&&b.closest("li.dropdown").addClass("active").end().find('[data-toggle="tab"]').attr("aria-expanded",!0),e&&e()}var g=d.find("> .active"),h=e&&a.support.transition&&(g.length&&g.hasClass("fade")||!!d.find("> .fade").length);g.length&&h?g.one("bsTransitionEnd",f).emulateTransitionEnd(c.TRANSITION_DURATION):f(),g.removeClass("in")};var d=a.fn.tab;a.fn.tab=b,a.fn.tab.Constructor=c,a.fn.tab.noConflict=function(){return a.fn.tab=d,this};var e=function(c){c.preventDefault(),b.call(a(this),"show")};a(document).on("click.bs.tab.data-api",'[data-toggle="tab"]',e).on("click.bs.tab.data-api",'[data-toggle="pill"]',e)}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.affix"),f="object"==typeof b&&b;e||d.data("bs.affix",e=new c(this,f)),"string"==typeof b&&e[b]()})}var c=function(b,d){this.options=a.extend({},c.DEFAULTS,d),this.$target=a(this.options.target).on("scroll.bs.affix.data-api",a.proxy(this.checkPosition,this)).on("click.bs.affix.data-api",a.proxy(this.checkPositionWithEventLoop,this)),this.$element=a(b),this.affixed=null,this.unpin=null,this.pinnedOffset=null,this.checkPosition()};c.VERSION="3.3.7",c.RESET="affix affix-top affix-bottom",c.DEFAULTS={offset:0,target:window},c.prototype.getState=function(a,b,c,d){var e=this.$target.scrollTop(),f=this.$element.offset(),g=this.$target.height();if(null!=c&&"top"==this.affixed)return e=a-d&&"bottom"},c.prototype.getPinnedOffset=function(){if(this.pinnedOffset)return this.pinnedOffset;this.$element.removeClass(c.RESET).addClass("affix");var a=this.$target.scrollTop(),b=this.$element.offset();return this.pinnedOffset=b.top-a},c.prototype.checkPositionWithEventLoop=function(){setTimeout(a.proxy(this.checkPosition,this),1)},c.prototype.checkPosition=function(){if(this.$element.is(":visible")){var b=this.$element.height(),d=this.options.offset,e=d.top,f=d.bottom,g=Math.max(a(document).height(),a(document.body).height());"object"!=typeof d&&(f=e=d),"function"==typeof e&&(e=d.top(this.$element)),"function"==typeof f&&(f=d.bottom(this.$element));var h=this.getState(g,b,e,f);if(this.affixed!=h){null!=this.unpin&&this.$element.css("top","");var i="affix"+(h?"-"+h:""),j=a.Event(i+".bs.affix");if(this.$element.trigger(j),j.isDefaultPrevented())return;this.affixed=h,this.unpin="bottom"==h?this.getPinnedOffset():null,this.$element.removeClass(c.RESET).addClass(i).trigger(i.replace("affix","affixed")+".bs.affix")}"bottom"==h&&this.$element.offset({top:g-b-f})}};var d=a.fn.affix;a.fn.affix=b,a.fn.affix.Constructor=c,a.fn.affix.noConflict=function(){return a.fn.affix=d,this},a(window).on("load",function(){a('[data-spy="affix"]').each(function(){var c=a(this),d=c.data();d.offset=d.offset||{},null!=d.offsetBottom&&(d.offset.bottom=d.offsetBottom),null!=d.offsetTop&&(d.offset.top=d.offsetTop),b.call(c,d)})})}(jQuery); diff --git a/web/gui/dashboard/lib/bootstrap-slider-10.0.0.min.js b/web/gui/dashboard/lib/bootstrap-slider-10.0.0.min.js new file mode 100644 index 0000000..87e8349 --- /dev/null +++ b/web/gui/dashboard/lib/bootstrap-slider-10.0.0.min.js @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: MIT +/*! ======================================================= + VERSION 10.0.0 +========================================================= */ +"use strict";var _typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(a){return typeof a}:function(a){return a&&"function"==typeof Symbol&&a.constructor===Symbol&&a!==Symbol.prototype?"symbol":typeof a},windowIsDefined="object"===("undefined"==typeof window?"undefined":_typeof(window));!function(a){if("function"==typeof define&&define.amd)define(["jquery"],a);else if("object"===("undefined"==typeof module?"undefined":_typeof(module))&&module.exports){var b;try{b=require("jquery")}catch(c){b=null}module.exports=a(b)}else window&&(window.Slider=a(window.jQuery))}(function(a){var b="slider",c="bootstrapSlider";windowIsDefined&&!window.console&&(window.console={}),windowIsDefined&&!window.console.log&&(window.console.log=function(){}),windowIsDefined&&!window.console.warn&&(window.console.warn=function(){});var d;return function(a){function b(){}function c(a){function c(b){b.prototype.option||(b.prototype.option=function(b){a.isPlainObject(b)&&(this.options=a.extend(!0,this.options,b))})}function e(b,c){a.fn[b]=function(e){if("string"==typeof e){for(var g=d.call(arguments,1),h=0,i=this.length;i>h;h++){var j=this[h],k=a.data(j,b);if(k)if(a.isFunction(k[e])&&"_"!==e.charAt(0)){var l=k[e].apply(k,g);if(void 0!==l&&l!==k)return l}else f("no such method '"+e+"' for "+b+" instance");else f("cannot call methods on "+b+" prior to initialization; attempted to call '"+e+"'")}return this}var m=this.map(function(){var d=a.data(this,b);return d?(d.option(e),d._init()):(d=new c(this,e),a.data(this,b,d)),a(this)});return!m||m.length>1?m:m[0]}}if(a){var f="undefined"==typeof console?b:function(a){console.error(a)};return a.bridget=function(a,b){c(b),e(a,b)},a.bridget}}var d=Array.prototype.slice;c(a)}(a),function(a){function e(b,c){function d(a,b){var c="data-slider-"+b.replace(/_/g,"-"),d=a.getAttribute(c);try{return JSON.parse(d)}catch(e){return d}}this._state={value:null,enabled:null,offset:null,size:null,percentage:null,inDrag:!1,over:!1},this.ticksCallbackMap={},this.handleCallbackMap={},"string"==typeof b?this.element=document.querySelector(b):b instanceof HTMLElement&&(this.element=b),c=c?c:{};for(var e=Object.keys(this.defaultOptions),f=0;f0)for(var t=0;t0){for(this.ticksContainer=document.createElement("div"),this.ticksContainer.className="slider-tick-container",f=0;f0)for(this.tickLabelContainer=document.createElement("div"),this.tickLabelContainer.className="slider-tick-label-container",f=0;f0&&(this.options.max=Math.max.apply(Math,this.options.ticks),this.options.min=Math.min.apply(Math,this.options.ticks)),Array.isArray(this.options.value)?(this.options.range=!0,this._state.value=this.options.value):this.options.range?this._state.value=[this.options.value,this.options.max]:this._state.value=this.options.value,this.trackLow=k||this.trackLow,this.trackSelection=j||this.trackSelection,this.trackHigh=l||this.trackHigh,"none"===this.options.selection?(this._addClass(this.trackLow,"hide"),this._addClass(this.trackSelection,"hide"),this._addClass(this.trackHigh,"hide")):("after"===this.options.selection||"before"===this.options.selection)&&(this._removeClass(this.trackLow,"hide"),this._removeClass(this.trackSelection,"hide"),this._removeClass(this.trackHigh,"hide")),this.handle1=m||this.handle1,this.handle2=n||this.handle2,p===!0)for(this._removeClass(this.handle1,"round triangle"),this._removeClass(this.handle2,"round triangle hide"),f=0;f0){for(var d,e,f,g=0,h=1;hthis.options.max?this.options.max:k},toPercentage:function(a){if(this.options.max===this.options.min)return 0;if(this.options.ticks_positions.length>0){for(var b,c,d,e=0,f=0;f0?this.options.ticks[f-1]:0,d=f>0?this.options.ticks_positions[f-1]:0,c=this.options.ticks[f],e=this.options.ticks_positions[f];break}if(f>0){var g=(a-b)/(c-b);return d+g*(e-d)}}return 100*(a-this.options.min)/(this.options.max-this.options.min)}},logarithmic:{toValue:function(a){var b=0===this.options.min?0:Math.log(this.options.min),c=Math.log(this.options.max),d=Math.exp(b+(c-b)*a/100);return Math.round(d)===this.options.max?this.options.max:(d=this.options.min+Math.round((d-this.options.min)/this.options.step)*this.options.step,dthis.options.max?this.options.max:d)},toPercentage:function(a){if(this.options.max===this.options.min)return 0;var b=Math.log(this.options.max),c=0===this.options.min?0:Math.log(this.options.min),d=0===a?0:Math.log(a);return 100*(d-c)/(b-c)}}};if(d=function(a,b){return e.call(this,a,b),this},d.prototype={_init:function(){},constructor:d,defaultOptions:{id:"",min:0,max:10,step:1,precision:0,orientation:"horizontal",value:5,range:!1,selection:"before",tooltip:"show",tooltip_split:!1,handle:"round",reversed:!1,rtl:"auto",enabled:!0,formatter:function(a){return Array.isArray(a)?a[0]+" : "+a[1]:a},natural_arrow_keys:!1,ticks:[],ticks_positions:[],ticks_labels:[],ticks_snap_bounds:0,ticks_tooltip:!1,scale:"linear",focus:!1,tooltip_position:null,labelledby:null,rangeHighlights:[]},getElement:function(){return this.sliderElem},getValue:function(){return this.options.range?this._state.value:this._state.value[0]},setValue:function(a,b,c){a||(a=0);var d=this.getValue();this._state.value=this._validateInputValue(a);var e=this._applyPrecision.bind(this);this.options.range?(this._state.value[0]=e(this._state.value[0]),this._state.value[1]=e(this._state.value[1]),this._state.value[0]=Math.max(this.options.min,Math.min(this.options.max,this._state.value[0])),this._state.value[1]=Math.max(this.options.min,Math.min(this.options.max,this._state.value[1]))):(this._state.value=e(this._state.value),this._state.value=[Math.max(this.options.min,Math.min(this.options.max,this._state.value))],this._addClass(this.handle2,"hide"),"after"===this.options.selection?this._state.value[1]=this.options.max:this._state.value[1]=this.options.min),this.options.max>this.options.min?this._state.percentage=[this._toPercentage(this._state.value[0]),this._toPercentage(this._state.value[1]),100*this.options.step/(this.options.max-this.options.min)]:this._state.percentage=[0,0,100],this._layout();var f=this.options.range?this._state.value:this._state.value[0];return this._setDataVal(f),b===!0&&this._trigger("slide",f),d!==f&&c===!0&&this._trigger("change",{oldValue:d,newValue:f}),this},destroy:function(){this._removeSliderEventHandlers(),this.sliderElem.parentNode.removeChild(this.sliderElem),this.element.style.display="",this._cleanUpEventCallbacksMap(),this.element.removeAttribute("data"),a&&(this._unbindJQueryEventHandlers(),this.$element.removeData("slider"))},disable:function(){return this._state.enabled=!1,this.handle1.removeAttribute("tabindex"),this.handle2.removeAttribute("tabindex"),this._addClass(this.sliderElem,"slider-disabled"),this._trigger("slideDisabled"),this},enable:function(){return this._state.enabled=!0,this.handle1.setAttribute("tabindex",0),this.handle2.setAttribute("tabindex",0),this._removeClass(this.sliderElem,"slider-disabled"),this._trigger("slideEnabled"),this},toggle:function(){return this._state.enabled?this.disable():this.enable(),this},isEnabled:function(){return this._state.enabled},on:function(a,b){return this._bindNonQueryEventHandler(a,b),this},off:function(b,c){a?(this.$element.off(b,c),this.$sliderElem.off(b,c)):this._unbindNonQueryEventHandler(b,c)},getAttribute:function(a){return a?this.options[a]:this.options},setAttribute:function(a,b){return this.options[a]=b,this},refresh:function(){return this._removeSliderEventHandlers(),e.call(this,this.element,this.options),a&&a.data(this.element,"slider",this),this},relayout:function(){return this._resize(),this._layout(),this},_removeSliderEventHandlers:function(){if(this.handle1.removeEventListener("keydown",this.handle1Keydown,!1),this.handle2.removeEventListener("keydown",this.handle2Keydown,!1),this.options.ticks_tooltip){for(var a=this.ticksContainer.getElementsByClassName("slider-tick"),b=0;b=0?c:this.attributes["aria-valuenow"].value,e=parseInt(d,10);b.value[0]=e,b.percentage[0]=a.options.ticks_positions[e],a._setToolTipOnMouseOver(b),a._showTooltip()};return b.addEventListener("mouseenter",d,!1),d},addMouseLeave:function(a,b){var c=function(){a._hideTooltip()};return b.addEventListener("mouseleave",c,!1),c}}},_layout:function(){var a;if(a=this.options.reversed?[100-this._state.percentage[0],this.options.range?100-this._state.percentage[1]:this._state.percentage[1]]:[this._state.percentage[0],this._state.percentage[1]],this.handle1.style[this.stylePos]=a[0]+"%",this.handle1.setAttribute("aria-valuenow",this._state.value[0]),isNaN(this.options.formatter(this._state.value[0]))&&this.handle1.setAttribute("aria-valuetext",this.options.formatter(this._state.value[0])),this.handle2.style[this.stylePos]=a[1]+"%",this.handle2.setAttribute("aria-valuenow",this._state.value[1]),isNaN(this.options.formatter(this._state.value[1]))&&this.handle2.setAttribute("aria-valuetext",this.options.formatter(this._state.value[1])),this.rangeHighlightElements.length>0&&Array.isArray(this.options.rangeHighlights)&&this.options.rangeHighlights.length>0)for(var b=0;b0){var g,h="vertical"===this.options.orientation?"height":"width";g="vertical"===this.options.orientation?"marginTop":this.options.rtl?"marginRight":"marginLeft";var i=this._state.size/(this.options.ticks.length-1);if(this.tickLabelContainer){var j=0;if(0===this.options.ticks_positions.length)"vertical"!==this.options.orientation&&(this.tickLabelContainer.style[g]=-i/2+"px"),j=this.tickLabelContainer.offsetHeight;else for(k=0;kj&&(j=this.tickLabelContainer.childNodes[k].offsetHeight);"horizontal"===this.options.orientation&&(this.sliderElem.style.marginBottom=j+"px")}for(var k=0;k=a[0]&&l<=a[1]&&this._addClass(this.ticks[k],"in-selection"):"after"===this.options.selection&&l>=a[0]?this._addClass(this.ticks[k],"in-selection"):"before"===this.options.selection&&l<=a[0]&&this._addClass(this.ticks[k],"in-selection"),this.tickLabels[k]&&(this.tickLabels[k].style[h]=i+"px","vertical"!==this.options.orientation&&void 0!==this.options.ticks_positions[k]?(this.tickLabels[k].style.position="absolute",this.tickLabels[k].style[this.stylePos]=l+"%",this.tickLabels[k].style[g]=-i/2+"px"):"vertical"===this.options.orientation&&(this.options.rtl?this.tickLabels[k].style.marginRight=this.sliderElem.offsetWidth+"px":this.tickLabels[k].style.marginLeft=this.sliderElem.offsetWidth+"px",this.tickLabelContainer.style[g]=this.sliderElem.offsetWidth/2*-1+"px"))}}var m;if(this.options.range){m=this.options.formatter(this._state.value),this._setText(this.tooltipInner,m),this.tooltip.style[this.stylePos]=(a[1]+a[0])/2+"%";var n=this.options.formatter(this._state.value[0]);this._setText(this.tooltipInner_min,n);var o=this.options.formatter(this._state.value[1]);this._setText(this.tooltipInner_max,o),this.tooltip_min.style[this.stylePos]=a[0]+"%",this.tooltip_max.style[this.stylePos]=a[1]+"%"}else m=this.options.formatter(this._state.value[0]),this._setText(this.tooltipInner,m),this.tooltip.style[this.stylePos]=a[0]+"%";if("vertical"===this.options.orientation)this.trackLow.style.top="0",this.trackLow.style.height=Math.min(a[0],a[1])+"%",this.trackSelection.style.top=Math.min(a[0],a[1])+"%",this.trackSelection.style.height=Math.abs(a[0]-a[1])+"%",this.trackHigh.style.bottom="0",this.trackHigh.style.height=100-Math.min(a[0],a[1])-Math.abs(a[0]-a[1])+"%";else{"right"===this.stylePos?this.trackLow.style.right="0":this.trackLow.style.left="0",this.trackLow.style.width=Math.min(a[0],a[1])+"%","right"===this.stylePos?this.trackSelection.style.right=Math.min(a[0],a[1])+"%":this.trackSelection.style.left=Math.min(a[0],a[1])+"%",this.trackSelection.style.width=Math.abs(a[0]-a[1])+"%","right"===this.stylePos?this.trackHigh.style.left="0":this.trackHigh.style.right="0",this.trackHigh.style.width=100-Math.min(a[0],a[1])-Math.abs(a[0]-a[1])+"%";var p=this.tooltip_min.getBoundingClientRect(),q=this.tooltip_max.getBoundingClientRect();"bottom"===this.options.tooltip_position?p.right>q.left?(this._removeClass(this.tooltip_max,"bottom"),this._addClass(this.tooltip_max,"top"),this.tooltip_max.style.top="",this.tooltip_max.style.bottom="22px"):(this._removeClass(this.tooltip_max,"top"),this._addClass(this.tooltip_max,"bottom"),this.tooltip_max.style.top=this.tooltip_min.style.top,this.tooltip_max.style.bottom=""):p.right>q.left?(this._removeClass(this.tooltip_max,"top"),this._addClass(this.tooltip_max,"bottom"),this.tooltip_max.style.top="18px"):(this._removeClass(this.tooltip_max,"bottom"),this._addClass(this.tooltip_max,"top"),this.tooltip_max.style.top=this.tooltip_min.style.top)}},_createHighlightRange:function(a,b){return this._isHighlightRange(a,b)?a>b?{start:b,size:a-b}:{start:a,size:b-a}:null},_isHighlightRange:function(a,b){return a>=0&&100>=a&&b>=0&&100>=b?!0:!1},_resize:function(a){this._state.offset=this._offset(this.sliderElem),this._state.size=this.sliderElem[this.sizePos],this._layout()},_removeProperty:function(a,b){a.style.removeProperty?a.style.removeProperty(b):a.style.removeAttribute(b)},_mousedown:function(a){if(!this._state.enabled)return!1;this._state.offset=this._offset(this.sliderElem),this._state.size=this.sliderElem[this.sizePos];var b=this._getPercentage(a);if(this.options.range){var c=Math.abs(this._state.percentage[0]-b),d=Math.abs(this._state.percentage[1]-b);this._state.dragged=d>c?0:1,this._adjustPercentageForRangeSliders(b)}else this._state.dragged=0;this._state.percentage[this._state.dragged]=b,this._layout(),this.touchCapable&&(document.removeEventListener("touchmove",this.mousemove,!1),document.removeEventListener("touchend",this.mouseup,!1)),this.mousemove&&document.removeEventListener("mousemove",this.mousemove,!1),this.mouseup&&document.removeEventListener("mouseup",this.mouseup,!1),this.mousemove=this._mousemove.bind(this),this.mouseup=this._mouseup.bind(this),this.touchCapable&&(document.addEventListener("touchmove",this.mousemove,!1),document.addEventListener("touchend",this.mouseup,!1)),document.addEventListener("mousemove",this.mousemove,!1),document.addEventListener("mouseup",this.mouseup,!1),this._state.inDrag=!0;var e=this._calculateValue();return this._trigger("slideStart",e),this._setDataVal(e),this.setValue(e,!1,!0),a.returnValue=!1,this.options.focus&&this._triggerFocusOnHandle(this._state.dragged),!0},_touchstart:function(a){if(void 0===a.changedTouches)return void this._mousedown(a);var b=a.changedTouches[0];this.touchX=b.pageX,this.touchY=b.pageY},_triggerFocusOnHandle:function(a){0===a&&this.handle1.focus(),1===a&&this.handle2.focus()},_keydown:function(a,b){if(!this._state.enabled)return!1;var c;switch(b.keyCode){case 37:case 40:c=-1;break;case 39:case 38:c=1}if(c){if(this.options.natural_arrow_keys){var d="vertical"===this.options.orientation&&!this.options.reversed,e="horizontal"===this.options.orientation&&this.options.reversed;(d||e)&&(c=-c)}var f=this._state.value[a]+c*this.options.step,g=f/this.options.max*100;if(this._state.keyCtrl=a,this.options.range){this._adjustPercentageForRangeSliders(g);var h=this._state.keyCtrl?this._state.value[0]:f,i=this._state.keyCtrl?f:this._state.value[1];f=[h,i]}return this._trigger("slideStart",f),this._setDataVal(f),this.setValue(f,!0,!0),this._setDataVal(f),this._trigger("slideStop",f),this._layout(),this._pauseEvent(b),delete this._state.keyCtrl,!1}},_pauseEvent:function(a){a.stopPropagation&&a.stopPropagation(),a.preventDefault&&a.preventDefault(),a.cancelBubble=!0,a.returnValue=!1},_mousemove:function(a){if(!this._state.enabled)return!1;var b=this._getPercentage(a);this._adjustPercentageForRangeSliders(b),this._state.percentage[this._state.dragged]=b,this._layout();var c=this._calculateValue(!0);return this.setValue(c,!0,!0),!1},_touchmove:function(a){if(void 0!==a.changedTouches){var b=a.changedTouches[0],c=b.pageX-this.touchX,d=b.pageY-this.touchY;this._state.inDrag||("vertical"===this.options.orientation&&5>=c&&c>=-5&&(d>=15||-15>=d)?this._mousedown(a):5>=d&&d>=-5&&(c>=15||-15>=c)&&this._mousedown(a))}},_adjustPercentageForRangeSliders:function(a){if(this.options.range){var b=this._getNumDigitsAfterDecimalPlace(a);b=b?b-1:0;var c=this._applyToFixedAndParseFloat(a,b);0===this._state.dragged&&this._applyToFixedAndParseFloat(this._state.percentage[1],b)c?(this._state.percentage[1]=this._state.percentage[0],this._state.dragged=0):0===this._state.keyCtrl&&this._state.value[1]/this.options.max*100a&&(this._state.percentage[1]=this._state.percentage[0],this._state.keyCtrl=0,this.handle1.focus())}},_mouseup:function(){if(!this._state.enabled)return!1;this.touchCapable&&(document.removeEventListener("touchmove",this.mousemove,!1),document.removeEventListener("touchend",this.mouseup,!1)),document.removeEventListener("mousemove",this.mousemove,!1),document.removeEventListener("mouseup",this.mouseup,!1),this._state.inDrag=!1,this._state.over===!1&&this._hideTooltip();var a=this._calculateValue(!0);return this._layout(),this._setDataVal(a),this._trigger("slideStop",a),!1},_calculateValue:function(a){var b;if(this.options.range?(b=[this.options.min,this.options.max],0!==this._state.percentage[0]&&(b[0]=this._toValue(this._state.percentage[0]),b[0]=this._applyPrecision(b[0])),100!==this._state.percentage[1]&&(b[1]=this._toValue(this._state.percentage[1]),b[1]=this._applyPrecision(b[1]))):(b=this._toValue(this._state.percentage[0]),b=parseFloat(b),b=this._applyPrecision(b)),a){for(var c=[b,1/0],d=0;dd;d++)g[c][d]=!1;for(c=0;ce;e++)g[c+e][k]=!0;for(e=0;j>e;e++)g[c][k+e]=!0}},g=function(){if(null===b){var c,d,e=a("

    ").addClass("fixed-table-scroll-inner"),f=a("

    ").addClass("fixed-table-scroll-outer");f.append(e),a("body").append(f),c=e[0].offsetWidth,f.css("overflow","scroll"),d=e[0].offsetWidth,c===d&&(d=f[0].clientWidth),f.remove(),b=c-d}return b},h=function(b,d,e,f){var g=d;if("string"==typeof d){var h=d.split(".");h.length>1?(g=window,a.each(h,function(a,b){g=g[b]})):g=window[d]}return"object"==typeof g?g:"function"==typeof g?g.apply(b,e):!g&&"string"==typeof d&&c.apply(this,[d].concat(e))?c.apply(this,[d].concat(e)):f},i=function(b,c,d){var e=Object.getOwnPropertyNames(b),f=Object.getOwnPropertyNames(c),g="";if(d&&e.length!==f.length)return!1;for(var h=0;h-1&&b[g]!==c[g])return!1;return!0},j=function(a){return"string"==typeof a?a.replace(/&/g,"&").replace(//g,">").replace(/"/g,""").replace(/'/g,"'").replace(/`/g,"`"):a},k=function(b){var c=0;return b.children().each(function(){c0||navigator.userAgent.match(/Trident.*rv\:11\./))},o=function(){Object.keys||(Object.keys=function(){var a=Object.prototype.hasOwnProperty,b=!{toString:null}.propertyIsEnumerable("toString"),c=["toString","toLocaleString","valueOf","hasOwnProperty","isPrototypeOf","propertyIsEnumerable","constructor"],d=c.length;return function(e){if("object"!=typeof e&&("function"!=typeof e||null===e))throw new TypeError("Object.keys called on non-object");var f,g,h=[];for(f in e)a.call(e,f)&&h.push(f);if(b)for(g=0;d>g;g++)a.call(e,c[g])&&h.push(c[g]);return h}}())},p=function(b,c){this.options=c,this.$el=a(b),this.$el_=this.$el.clone(),this.timeoutId_=0,this.timeoutFooter_=0,this.init()};p.DEFAULTS={classes:"table table-hover",locale:void 0,height:void 0,undefinedText:"-",sortName:void 0,sortOrder:"asc",sortStable:!1,striped:!1,columns:[[]],data:[],dataField:"rows",method:"get",url:void 0,ajax:void 0,cache:!0,contentType:"application/json",dataType:"json",ajaxOptions:{},queryParams:function(a){return a},queryParamsType:"limit",responseHandler:function(a){return a},pagination:!1,onlyInfoPagination:!1,sidePagination:"client",totalRows:0,pageNumber:1,pageSize:10,pageList:[10,25,50,100],paginationHAlign:"right",paginationVAlign:"bottom",paginationDetailHAlign:"left",paginationPreText:"‹",paginationNextText:"›",search:!1,searchOnEnterKey:!1,strictSearch:!1,searchAlign:"right",selectItemName:"btSelectItem",showHeader:!0,showFooter:!1,showColumns:!1,showPaginationSwitch:!1,showRefresh:!1,showToggle:!1,buttonsAlign:"right",smartDisplay:!0,escape:!1,minimumCountColumns:1,idField:void 0,uniqueId:void 0,cardView:!1,detailView:!1,detailFormatter:function(){return""},trimOnSearch:!0,clickToSelect:!1,singleSelect:!1,toolbar:void 0,toolbarAlign:"left",checkboxHeader:!0,sortable:!0,silentSort:!0,maintainSelected:!1,searchTimeOut:500,searchText:"",iconSize:void 0,buttonsClass:"default",iconsPrefix:"glyphicon",icons:{paginationSwitchDown:"glyphicon-collapse-down icon-chevron-down",paginationSwitchUp:"glyphicon-collapse-up icon-chevron-up",refresh:"glyphicon-refresh icon-refresh",toggle:"glyphicon-list-alt icon-list-alt",columns:"glyphicon-th icon-th",detailOpen:"glyphicon-plus icon-plus",detailClose:"glyphicon-minus icon-minus"},customSearch:a.noop,customSort:a.noop,rowStyle:function(){return{}},rowAttributes:function(){return{}},footerStyle:function(){return{}},onAll:function(){return!1},onClickCell:function(){return!1},onDblClickCell:function(){return!1},onClickRow:function(){return!1},onDblClickRow:function(){return!1},onSort:function(){return!1},onCheck:function(){return!1},onUncheck:function(){return!1},onCheckAll:function(){return!1},onUncheckAll:function(){return!1},onCheckSome:function(){return!1},onUncheckSome:function(){return!1},onLoadSuccess:function(){return!1},onLoadError:function(){return!1},onColumnSwitch:function(){return!1},onPageChange:function(){return!1},onSearch:function(){return!1},onToggle:function(){return!1},onPreBody:function(){return!1},onPostBody:function(){return!1},onPostHeader:function(){return!1},onExpandRow:function(){return!1},onCollapseRow:function(){return!1},onRefreshOptions:function(){return!1},onRefresh:function(){return!1},onResetView:function(){return!1}},p.LOCALES={},p.LOCALES["en-US"]=p.LOCALES.en={formatLoadingMessage:function(){return"Loading, please wait..."},formatRecordsPerPage:function(a){return c("%s rows per page",a)},formatShowingRows:function(a,b,d){return c("Showing %s to %s of %s rows",a,b,d)},formatDetailPagination:function(a){return c("Showing %s rows",a)},formatSearch:function(){return"Search"},formatNoMatches:function(){return"No matching records found"},formatPaginationSwitch:function(){return"Hide/Show pagination"},formatRefresh:function(){return"Refresh"},formatToggle:function(){return"Toggle"},formatColumns:function(){return"Columns"},formatAllRows:function(){return"All"}},a.extend(p.DEFAULTS,p.LOCALES["en-US"]),p.COLUMN_DEFAULTS={radio:!1,checkbox:!1,checkboxEnabled:!0,field:void 0,title:void 0,titleTooltip:void 0,"class":void 0,align:void 0,halign:void 0,falign:void 0,valign:void 0,width:void 0,sortable:!1,order:"asc",visible:!0,switchable:!0,clickToSelect:!0,formatter:void 0,footerFormatter:void 0,events:void 0,sorter:void 0,sortName:void 0,cellStyle:void 0,searchable:!0,searchFormatter:!0,cardVisible:!0},p.EVENTS={"all.bs.table":"onAll","click-cell.bs.table":"onClickCell","dbl-click-cell.bs.table":"onDblClickCell","click-row.bs.table":"onClickRow","dbl-click-row.bs.table":"onDblClickRow","sort.bs.table":"onSort","check.bs.table":"onCheck","uncheck.bs.table":"onUncheck","check-all.bs.table":"onCheckAll","uncheck-all.bs.table":"onUncheckAll","check-some.bs.table":"onCheckSome","uncheck-some.bs.table":"onUncheckSome","load-success.bs.table":"onLoadSuccess","load-error.bs.table":"onLoadError","column-switch.bs.table":"onColumnSwitch","page-change.bs.table":"onPageChange","search.bs.table":"onSearch","toggle.bs.table":"onToggle","pre-body.bs.table":"onPreBody","post-body.bs.table":"onPostBody","post-header.bs.table":"onPostHeader","expand-row.bs.table":"onExpandRow","collapse-row.bs.table":"onCollapseRow","refresh-options.bs.table":"onRefreshOptions","reset-view.bs.table":"onResetView","refresh.bs.table":"onRefresh"},p.prototype.init=function(){this.initLocale(),this.initContainer(),this.initTable(),this.initHeader(),this.initData(),this.initFooter(),this.initToolbar(),this.initPagination(),this.initBody(),this.initSearchText(),this.initServer()},p.prototype.initLocale=function(){if(this.options.locale){var b=this.options.locale.split(/-|_/);b[0].toLowerCase(),b[1]&&b[1].toUpperCase(),a.fn.bootstrapTable.locales[this.options.locale]?a.extend(this.options,a.fn.bootstrapTable.locales[this.options.locale]):a.fn.bootstrapTable.locales[b.join("-")]?a.extend(this.options,a.fn.bootstrapTable.locales[b.join("-")]):a.fn.bootstrapTable.locales[b[0]]&&a.extend(this.options,a.fn.bootstrapTable.locales[b[0]])}},p.prototype.initContainer=function(){this.$container=a(['
    ','
    ',"top"===this.options.paginationVAlign||"both"===this.options.paginationVAlign?'
    ':"",'
    ','
    ','
    ','
    ',this.options.formatLoadingMessage(),"
    ","
    ",'',"bottom"===this.options.paginationVAlign||"both"===this.options.paginationVAlign?'
    ':"","
    ","
    "].join("")),this.$container.insertAfter(this.$el),this.$tableContainer=this.$container.find(".fixed-table-container"),this.$tableHeader=this.$container.find(".fixed-table-header"),this.$tableBody=this.$container.find(".fixed-table-body"),this.$tableLoading=this.$container.find(".fixed-table-loading"),this.$tableFooter=this.$container.find(".fixed-table-footer"),this.$toolbar=this.$container.find(".fixed-table-toolbar"),this.$pagination=this.$container.find(".fixed-table-pagination"),this.$tableBody.append(this.$el),this.$container.after('
    '),this.$el.addClass(this.options.classes),this.options.striped&&this.$el.addClass("table-striped"),-1!==a.inArray("table-no-bordered",this.options.classes.split(" "))&&this.$tableContainer.addClass("table-no-bordered")},p.prototype.initTable=function(){var b=this,c=[],d=[];if(this.$header=this.$el.find(">thead"),this.$header.length||(this.$header=a("").appendTo(this.$el)),this.$header.find("tr").each(function(){var b=[];a(this).find("th").each(function(){"undefined"!=typeof a(this).data("field")&&a(this).data("field",a(this).data("field")+""),b.push(a.extend({},{title:a(this).html(),"class":a(this).attr("class"),titleTooltip:a(this).attr("title"),rowspan:a(this).attr("rowspan")?+a(this).attr("rowspan"):void 0,colspan:a(this).attr("colspan")?+a(this).attr("colspan"):void 0},a(this).data()))}),c.push(b)}),a.isArray(this.options.columns[0])||(this.options.columns=[this.options.columns]),this.options.columns=a.extend(!0,[],c,this.options.columns),this.columns=[],f(this.options.columns),a.each(this.options.columns,function(c,d){a.each(d,function(d,e){e=a.extend({},p.COLUMN_DEFAULTS,e),"undefined"!=typeof e.fieldIndex&&(b.columns[e.fieldIndex]=e),b.options.columns[c][d]=e})}),!this.options.data.length){var e=[];this.$el.find(">tbody>tr").each(function(c){var f={};f._id=a(this).attr("id"),f._class=a(this).attr("class"),f._data=l(a(this).data()),a(this).find(">td").each(function(d){for(var g,h,i=a(this),j=+i.attr("colspan")||1,k=+i.attr("rowspan")||1;e[c]&&e[c][d];d++);for(g=d;d+j>g;g++)for(h=c;c+k>h;h++)e[h]||(e[h]=[]),e[h][g]=!0;var m=b.columns[d].field;f[m]=a(this).html(),f["_"+m+"_id"]=a(this).attr("id"),f["_"+m+"_class"]=a(this).attr("class"),f["_"+m+"_rowspan"]=a(this).attr("rowspan"),f["_"+m+"_colspan"]=a(this).attr("colspan"),f["_"+m+"_title"]=a(this).attr("title"),f["_"+m+"_data"]=l(a(this).data())}),d.push(f)}),this.options.data=d,d.length&&(this.fromHtml=!0)}},p.prototype.initHeader=function(){var b=this,d={},e=[];this.header={fields:[],styles:[],classes:[],formatters:[],events:[],sorters:[],sortNames:[],cellStyles:[],searchables:[]},a.each(this.options.columns,function(f,g){e.push(""),0===f&&!b.options.cardView&&b.options.detailView&&e.push(c('
    ',b.options.columns.length)),a.each(g,function(a,f){var g="",h="",i="",j="",k=c(' class="%s"',f["class"]),l=(b.options.sortOrder||f.order,"px"),m=f.width;if(void 0===f.width||b.options.cardView||"string"==typeof f.width&&-1!==f.width.indexOf("%")&&(l="%"),f.width&&"string"==typeof f.width&&(m=f.width.replace("%","").replace("px","")),h=c("text-align: %s; ",f.halign?f.halign:f.align),i=c("text-align: %s; ",f.align),j=c("vertical-align: %s; ",f.valign),j+=c("width: %s; ",!f.checkbox&&!f.radio||m?m?m+l:void 0:"36px"),"undefined"!=typeof f.fieldIndex){if(b.header.fields[f.fieldIndex]=f.field,b.header.styles[f.fieldIndex]=i+j,b.header.classes[f.fieldIndex]=k,b.header.formatters[f.fieldIndex]=f.formatter,b.header.events[f.fieldIndex]=f.events,b.header.sorters[f.fieldIndex]=f.sorter,b.header.sortNames[f.fieldIndex]=f.sortName,b.header.cellStyles[f.fieldIndex]=f.cellStyle,b.header.searchables[f.fieldIndex]=f.searchable,!f.visible)return;if(b.options.cardView&&!f.cardVisible)return;d[f.field]=f}e.push(""),e.push(c('
    ',b.options.sortable&&f.sortable?"sortable both":"")),g=f.title,f.checkbox&&(!b.options.singleSelect&&b.options.checkboxHeader&&(g=''),b.header.stateField=f.field),f.radio&&(g="",b.header.stateField=f.field,b.options.singleSelect=!0),e.push(g),e.push("
    "),e.push('
    '),e.push("
    "),e.push("")}),e.push("")}),this.$header.html(e.join("")),this.$header.find("th[data-field]").each(function(){a(this).data(d[a(this).data("field")])}),this.$container.off("click",".th-inner").on("click",".th-inner",function(c){var d=a(this);return b.options.detailView&&d.closest(".bootstrap-table")[0]!==b.$container[0]?!1:void(b.options.sortable&&d.parent().data().sortable&&b.onSort(c))}),this.$header.children().children().off("keypress").on("keypress",function(c){if(b.options.sortable&&a(this).data().sortable){var d=c.keyCode||c.which;13==d&&b.onSort(c)}}),a(window).off("resize.bootstrap-table"),!this.options.showHeader||this.options.cardView?(this.$header.hide(),this.$tableHeader.hide(),this.$tableLoading.css("top",0)):(this.$header.show(),this.$tableHeader.show(),this.$tableLoading.css("top",this.$header.outerHeight()+1),this.getCaret(),a(window).on("resize.bootstrap-table",a.proxy(this.resetWidth,this))),this.$selectAll=this.$header.find('[name="btSelectAll"]'),this.$selectAll.off("click").on("click",function(){var c=a(this).prop("checked");b[c?"checkAll":"uncheckAll"](),b.updateSelected()})},p.prototype.initFooter=function(){!this.options.showFooter||this.options.cardView?this.$tableFooter.hide():this.$tableFooter.show()},p.prototype.initData=function(a,b){this.data="append"===b?this.data.concat(a):"prepend"===b?[].concat(a).concat(this.data):a||this.options.data,this.options.data="append"===b?this.options.data.concat(a):"prepend"===b?[].concat(a).concat(this.options.data):this.data,"server"!==this.options.sidePagination&&this.initSort()},p.prototype.initSort=function(){var b=this,c=this.options.sortName,d="desc"===this.options.sortOrder?-1:1,e=a.inArray(this.options.sortName,this.header.fields);return this.options.customSort!==a.noop?void this.options.customSort.apply(this,[this.options.sortName,this.options.sortOrder]):void(-1!==e&&(this.options.sortStable&&a.each(this.data,function(a,b){b.hasOwnProperty("_position")||(b._position=a)}),this.data.sort(function(f,g){b.header.sortNames[e]&&(c=b.header.sortNames[e]);var i=m(f,c,b.options.escape),j=m(g,c,b.options.escape),k=h(b.header,b.header.sorters[e],[i,j]);return void 0!==k?d*k:((void 0===i||null===i)&&(i=""),(void 0===j||null===j)&&(j=""),b.options.sortStable&&i===j&&(i=f._position,j=g._position),a.isNumeric(i)&&a.isNumeric(j)?(i=parseFloat(i),j=parseFloat(j),j>i?-1*d:d):i===j?0:("string"!=typeof i&&(i=i.toString()),-1===i.localeCompare(j)?-1*d:d))})))},p.prototype.onSort=function(b){var c="keypress"===b.type?a(b.currentTarget):a(b.currentTarget).parent(),d=this.$header.find("th").eq(c.index());return this.$header.add(this.$header_).find("span.order").remove(),this.options.sortName===c.data("field")?this.options.sortOrder="asc"===this.options.sortOrder?"desc":"asc":(this.options.sortName=c.data("field"),this.options.sortOrder="asc"===c.data("order")?"desc":"asc"),this.trigger("sort",this.options.sortName,this.options.sortOrder),c.add(d).data("order",this.options.sortOrder),this.getCaret(),"server"===this.options.sidePagination?void this.initServer(this.options.silentSort):(this.initSort(),void this.initBody())},p.prototype.initToolbar=function(){var b,d,e=this,f=[],g=0,i=0;this.$toolbar.find(".bs-bars").children().length&&a("body").append(a(this.options.toolbar)),this.$toolbar.html(""),("string"==typeof this.options.toolbar||"object"==typeof this.options.toolbar)&&a(c('
    ',this.options.toolbarAlign)).appendTo(this.$toolbar).append(a(this.options.toolbar)),f=[c('
    ',this.options.buttonsAlign,this.options.buttonsAlign)],"string"==typeof this.options.icons&&(this.options.icons=h(null,this.options.icons)),this.options.showPaginationSwitch&&f.push(c('"),this.options.showRefresh&&f.push(c('"),this.options.showToggle&&f.push(c('"),this.options.showColumns&&(f.push(c('
    ',this.options.formatColumns()),'",'","
    ")),f.push("
    "),(this.showToolbar||f.length>2)&&this.$toolbar.append(f.join("")),this.options.showPaginationSwitch&&this.$toolbar.find('button[name="paginationSwitch"]').off("click").on("click",a.proxy(this.togglePagination,this)),this.options.showRefresh&&this.$toolbar.find('button[name="refresh"]').off("click").on("click",a.proxy(this.refresh,this)),this.options.showToggle&&this.$toolbar.find('button[name="toggle"]').off("click").on("click",function(){e.toggleView()}),this.options.showColumns&&(b=this.$toolbar.find(".keep-open"),i<=this.options.minimumCountColumns&&b.find("input").prop("disabled",!0),b.find("li").off("click").on("click",function(a){a.stopImmediatePropagation()}),b.find("input").off("click").on("click",function(){var b=a(this);e.toggleColumn(a(this).val(),b.prop("checked"),!1),e.trigger("column-switch",a(this).data("field"),b.prop("checked"))})),this.options.search&&(f=[],f.push('"),this.$toolbar.append(f.join("")),d=this.$toolbar.find(".search input"),d.off("keyup drop").on("keyup drop",function(b){e.options.searchOnEnterKey&&13!==b.keyCode||a.inArray(b.keyCode,[37,38,39,40])>-1||(clearTimeout(g),g=setTimeout(function(){e.onSearch(b)},e.options.searchTimeOut))}),n()&&d.off("mouseup").on("mouseup",function(a){clearTimeout(g),g=setTimeout(function(){e.onSearch(a)},e.options.searchTimeOut)}))},p.prototype.onSearch=function(b){var c=a.trim(a(b.currentTarget).val());this.options.trimOnSearch&&a(b.currentTarget).val()!==c&&a(b.currentTarget).val(c),c!==this.searchText&&(this.searchText=c,this.options.searchText=c,this.options.pageNumber=1,this.initSearch(),this.updatePagination(),this.trigger("search",c))},p.prototype.initSearch=function(){var b=this;if("server"!==this.options.sidePagination){if(this.options.customSearch!==a.noop)return void this.options.customSearch.apply(this,[this.searchText]);var c=this.searchText&&(this.options.escape?j(this.searchText):this.searchText).toLowerCase(),d=a.isEmptyObject(this.filterColumns)?null:this.filterColumns;this.data=d?a.grep(this.options.data,function(b){for(var c in d)if(a.isArray(d[c])&&-1===a.inArray(b[c],d[c])||b[c]!==d[c])return!1;return!0}):this.options.data,this.data=c?a.grep(this.data,function(d,f){for(var g=0;g-1&&(n=!0)}this.totalPages=~~((this.options.totalRows-1)/this.options.pageSize)+1,this.options.totalPages=this.totalPages}if(this.totalPages>0&&this.options.pageNumber>this.totalPages&&(this.options.pageNumber=this.totalPages),this.pageFrom=(this.options.pageNumber-1)*this.options.pageSize+1,this.pageTo=this.options.pageNumber*this.options.pageSize,this.pageTo>this.options.totalRows&&(this.pageTo=this.options.totalRows),m.push('
    ','',this.options.onlyInfoPagination?this.options.formatDetailPagination(this.options.totalRows):this.options.formatShowingRows(this.pageFrom,this.pageTo,this.options.totalRows),""),!this.options.onlyInfoPagination){m.push('');var r=[c('',"top"===this.options.paginationVAlign||"both"===this.options.paginationVAlign?"dropdown":"dropup"),'",'"),m.push(this.options.formatRecordsPerPage(r.join(""))),m.push(""),m.push("
    ",'")}this.$pagination.html(m.join("")),this.options.onlyInfoPagination||(f=this.$pagination.find(".page-list a"),g=this.$pagination.find(".page-first"),h=this.$pagination.find(".page-pre"),i=this.$pagination.find(".page-next"),j=this.$pagination.find(".page-last"),k=this.$pagination.find(".page-number"),this.options.smartDisplay&&(this.totalPages<=1&&this.$pagination.find("div.pagination").hide(),(p.length<2||this.options.totalRows<=p[0])&&this.$pagination.find("span.page-list").hide(),this.$pagination[this.getData().length?"show":"hide"]()),n&&(this.options.pageSize=this.options.formatAllRows()),f.off("click").on("click",a.proxy(this.onPageListChange,this)),g.off("click").on("click",a.proxy(this.onPageFirst,this)),h.off("click").on("click",a.proxy(this.onPagePre,this)),i.off("click").on("click",a.proxy(this.onPageNext,this)),j.off("click").on("click",a.proxy(this.onPageLast,this)),k.off("click").on("click",a.proxy(this.onPageNumber,this)))},p.prototype.updatePagination=function(b){b&&a(b.currentTarget).hasClass("disabled")||(this.options.maintainSelected||this.resetRows(),this.initPagination(),"server"===this.options.sidePagination?this.initServer():this.initBody(),this.trigger("page-change",this.options.pageNumber,this.options.pageSize))},p.prototype.onPageListChange=function(b){var c=a(b.currentTarget);c.parent().addClass("active").siblings().removeClass("active"),this.options.pageSize=c.text().toUpperCase()===this.options.formatAllRows().toUpperCase()?this.options.formatAllRows():+c.text(),this.$toolbar.find(".page-size").text(this.options.pageSize),this.updatePagination(b)},p.prototype.onPageFirst=function(a){this.options.pageNumber=1,this.updatePagination(a)},p.prototype.onPagePre=function(a){this.options.pageNumber-1===0?this.options.pageNumber=this.options.totalPages:this.options.pageNumber--,this.updatePagination(a)},p.prototype.onPageNext=function(a){this.options.pageNumber+1>this.options.totalPages?this.options.pageNumber=1:this.options.pageNumber++,this.updatePagination(a)},p.prototype.onPageLast=function(a){this.options.pageNumber=this.totalPages,this.updatePagination(a)},p.prototype.onPageNumber=function(b){this.options.pageNumber!==+a(b.currentTarget).text()&&(this.options.pageNumber=+a(b.currentTarget).text(),this.updatePagination(b))},p.prototype.initBody=function(b){var f=this,g=[],i=this.getData();this.trigger("pre-body",i),this.$body=this.$el.find(">tbody"),this.$body.length||(this.$body=a("").appendTo(this.$el)),this.options.pagination&&"server"!==this.options.sidePagination||(this.pageFrom=1,this.pageTo=i.length);for(var k=this.pageFrom-1;k"),this.options.cardView&&g.push(c('
    ',this.header.fields.length)),!this.options.cardView&&this.options.detailView&&g.push("",'',c('',this.options.iconsPrefix,this.options.icons.detailOpen),"",""),a.each(this.header.fields,function(b,e){var i="",j=m(n,e,f.options.escape),l="",q={},r="",s=f.header.classes[b],t="",u="",v="",w="",x=f.columns[b];if(!(f.fromHtml&&"undefined"==typeof j||!x.visible||f.options.cardView&&!x.cardVisible)){if(o=c('style="%s"',p.concat(f.header.styles[b]).join("; ")),n["_"+e+"_id"]&&(r=c(' id="%s"',n["_"+e+"_id"])),n["_"+e+"_class"]&&(s=c(' class="%s"',n["_"+e+"_class"])),n["_"+e+"_rowspan"]&&(u=c(' rowspan="%s"',n["_"+e+"_rowspan"])),n["_"+e+"_colspan"]&&(v=c(' colspan="%s"',n["_"+e+"_colspan"])),n["_"+e+"_title"]&&(w=c(' title="%s"',n["_"+e+"_title"])),q=h(f.header,f.header.cellStyles[b],[j,n,k,e],q),q.classes&&(s=c(' class="%s"',q.classes)),q.css){var y=[];for(var z in q.css)y.push(z+": "+q.css[z]);o=c('style="%s"',y.concat(f.header.styles[b]).join("; "))}j=h(x,f.header.formatters[b],[j,n,k],j),n["_"+e+"_data"]&&!a.isEmptyObject(n["_"+e+"_data"])&&a.each(n["_"+e+"_data"],function(a,b){"index"!==a&&(t+=c(' data-%s="%s"',a,b))}),x.checkbox||x.radio?(l=x.checkbox?"checkbox":l,l=x.radio?"radio":l,i=[c(f.options.cardView?'
    ':'',x["class"]||""),"",f.header.formatters[b]&&"string"==typeof j?j:"",f.options.cardView?"
    ":""].join(""),n[f.header.stateField]=j===!0||j&&j.checked):(j="undefined"==typeof j||null===j?f.options.undefinedText:j,i=f.options.cardView?['
    ',f.options.showHeader?c('%s',o,d(f.columns,"field","title",e)):"",c('%s',j),"
    "].join(""):[c("",r,s,o,t,u,v,w),j,""].join(""),f.options.cardView&&f.options.smartDisplay&&""===j&&(i='
    ')),g.push(i)}}),this.options.cardView&&g.push("
    "),g.push("")}g.length||g.push('',c('%s',this.$header.find("th").length,this.options.formatNoMatches()),""),this.$body.html(g.join("")),b||this.scrollTo(0),this.$body.find("> tr[data-index] > td").off("click dblclick").on("click dblclick",function(b){var d=a(this),g=d.parent(),h=f.data[g.data("index")],i=d[0].cellIndex,j=f.getVisibleFields(),k=j[f.options.detailView&&!f.options.cardView?i-1:i],l=f.columns[e(f.columns,k)],n=m(h,k,f.options.escape);if(!d.find(".detail-icon").length&&(f.trigger("click"===b.type?"click-cell":"dbl-click-cell",k,n,h,d),f.trigger("click"===b.type?"click-row":"dbl-click-row",h,g,k), +"click"===b.type&&f.options.clickToSelect&&l.clickToSelect)){var o=g.find(c('[name="%s"]',f.options.selectItemName));o.length&&o[0].click()}}),this.$body.find("> tr[data-index] > td > .detail-icon").off("click").on("click",function(){var b=a(this),d=b.parent().parent(),e=d.data("index"),g=i[e];if(d.next().is("tr.detail-view"))b.find("i").attr("class",c("%s %s",f.options.iconsPrefix,f.options.icons.detailOpen)),d.next().remove(),f.trigger("collapse-row",e,g);else{b.find("i").attr("class",c("%s %s",f.options.iconsPrefix,f.options.icons.detailClose)),d.after(c('',d.find("td").length));var j=d.next().find("td"),k=h(f.options,f.options.detailFormatter,[e,g,j],"");1===j.length&&j.append(k),f.trigger("expand-row",e,g,j)}f.resetView()}),this.$selectItem=this.$body.find(c('[name="%s"]',this.options.selectItemName)),this.$selectItem.off("click").on("click",function(b){b.stopImmediatePropagation();var c=a(this),d=c.prop("checked"),e=f.data[c.data("index")];f.options.maintainSelected&&a(this).is(":radio")&&a.each(f.options.data,function(a,b){b[f.header.stateField]=!1}),e[f.header.stateField]=d,f.options.singleSelect&&(f.$selectItem.not(this).each(function(){f.data[a(this).data("index")][f.header.stateField]=!1}),f.$selectItem.filter(":checked").not(this).prop("checked",!1)),f.updateSelected(),f.trigger(d?"check":"uncheck",e,c)}),a.each(this.header.events,function(b,c){if(c){"string"==typeof c&&(c=h(null,c));var d=f.header.fields[b],e=a.inArray(d,f.getVisibleFields());f.options.detailView&&!f.options.cardView&&(e+=1);for(var g in c)f.$body.find(">tr:not(.no-records-found)").each(function(){var b=a(this),h=b.find(f.options.cardView?".card-view":"td").eq(e),i=g.indexOf(" "),j=g.substring(0,i),k=g.substring(i+1),l=c[g];h.find(k).off(j).on(j,function(a){var c=b.data("index"),e=f.data[c],g=e[d];l.apply(this,[a,g,e,c])})})}}),this.updateSelected(),this.resetView(),this.trigger("post-body",i)},p.prototype.initServer=function(b,c,d){var e,f=this,g={},i={searchText:this.searchText,sortName:this.options.sortName,sortOrder:this.options.sortOrder};this.options.pagination&&(i.pageSize=this.options.pageSize===this.options.formatAllRows()?this.options.totalRows:this.options.pageSize,i.pageNumber=this.options.pageNumber),(d||this.options.url||this.options.ajax)&&("limit"===this.options.queryParamsType&&(i={search:i.searchText,sort:i.sortName,order:i.sortOrder},this.options.pagination&&(i.offset=this.options.pageSize===this.options.formatAllRows()?0:this.options.pageSize*(this.options.pageNumber-1),i.limit=this.options.pageSize===this.options.formatAllRows()?this.options.totalRows:this.options.pageSize)),a.isEmptyObject(this.filterColumnsPartial)||(i.filter=JSON.stringify(this.filterColumnsPartial,null)),g=h(this.options,this.options.queryParams,[i],g),a.extend(g,c||{}),g!==!1&&(b||this.$tableLoading.show(),e=a.extend({},h(null,this.options.ajaxOptions),{type:this.options.method,url:d||this.options.url,data:"application/json"===this.options.contentType&&"post"===this.options.method?JSON.stringify(g):g,cache:this.options.cache,contentType:this.options.contentType,dataType:this.options.dataType,success:function(a){a=h(f.options,f.options.responseHandler,[a],a),f.load(a),f.trigger("load-success",a),b||f.$tableLoading.hide()},error:function(a){f.trigger("load-error",a.status,a),b||f.$tableLoading.hide()}}),this.options.ajax?h(this,this.options.ajax,[e],null):(this._xhr&&4!==this._xhr.readyState&&this._xhr.abort(),this._xhr=a.ajax(e))))},p.prototype.initSearchText=function(){if(this.options.search&&""!==this.options.searchText){var a=this.$toolbar.find(".search input");a.val(this.options.searchText),this.onSearch({currentTarget:a})}},p.prototype.getCaret=function(){var b=this;a.each(this.$header.find("th"),function(c,d){a(d).find(".sortable").removeClass("desc asc").addClass(a(d).data("field")===b.options.sortName?b.options.sortOrder:"both")})},p.prototype.updateSelected=function(){var b=this.$selectItem.filter(":enabled").length&&this.$selectItem.filter(":enabled").length===this.$selectItem.filter(":enabled").filter(":checked").length;this.$selectAll.add(this.$selectAll_).prop("checked",b),this.$selectItem.each(function(){a(this).closest("tr")[a(this).prop("checked")?"addClass":"removeClass"]("selected")})},p.prototype.updateRows=function(){var b=this;this.$selectItem.each(function(){b.data[a(this).data("index")][b.header.stateField]=a(this).prop("checked")})},p.prototype.resetRows=function(){var b=this;a.each(this.data,function(a,c){b.$selectAll.prop("checked",!1),b.$selectItem.prop("checked",!1),b.header.stateField&&(c[b.header.stateField]=!1)})},p.prototype.trigger=function(b){var c=Array.prototype.slice.call(arguments,1);b+=".bs.table",this.options[p.EVENTS[b]].apply(this.options,c),this.$el.trigger(a.Event(b),c),this.options.onAll(b,c),this.$el.trigger(a.Event("all.bs.table"),[b,c])},p.prototype.resetHeader=function(){clearTimeout(this.timeoutId_),this.timeoutId_=setTimeout(a.proxy(this.fitHeader,this),this.$el.is(":hidden")?100:0)},p.prototype.fitHeader=function(){var b,d,e,f,h=this;if(h.$el.is(":hidden"))return void(h.timeoutId_=setTimeout(a.proxy(h.fitHeader,h),100));if(b=this.$tableBody.get(0),d=b.scrollWidth>b.clientWidth&&b.scrollHeight>b.clientHeight+this.$header.outerHeight()?g():0,this.$el.css("margin-top",-this.$header.outerHeight()),e=a(":focus"),e.length>0){var i=e.parents("th");if(i.length>0){var j=i.attr("data-field");if(void 0!==j){var k=this.$header.find("[data-field='"+j+"']");k.length>0&&k.find(":input").addClass("focus-temp")}}}this.$header_=this.$header.clone(!0,!0),this.$selectAll_=this.$header_.find('[name="btSelectAll"]'),this.$tableHeader.css({"margin-right":d}).find("table").css("width",this.$el.outerWidth()).html("").attr("class",this.$el.attr("class")).append(this.$header_),f=a(".focus-temp:visible:eq(0)"),f.length>0&&(f.focus(),this.$header.find(".focus-temp").removeClass("focus-temp")),this.$header.find("th[data-field]").each(function(){h.$header_.find(c('th[data-field="%s"]',a(this).data("field"))).data(a(this).data())});var l=this.getVisibleFields(),m=this.$header_.find("th");this.$body.find(">tr:first-child:not(.no-records-found) > *").each(function(b){var d=a(this),e=b;h.options.detailView&&!h.options.cardView&&(0===b&&h.$header_.find("th.detail").find(".fht-cell").width(d.innerWidth()),e=b-1);var f=h.$header_.find(c('th[data-field="%s"]',l[e]));f.length>1&&(f=a(m[d[0].cellIndex])),f.find(".fht-cell").width(d.innerWidth())}),this.$tableBody.off("scroll").on("scroll",function(){h.$tableHeader.scrollLeft(a(this).scrollLeft()),h.options.showFooter&&!h.options.cardView&&h.$tableFooter.scrollLeft(a(this).scrollLeft())}),h.trigger("post-header")},p.prototype.resetFooter=function(){var b=this,d=b.getData(),e=[];this.options.showFooter&&!this.options.cardView&&(!this.options.cardView&&this.options.detailView&&e.push('
     
    '),a.each(this.columns,function(a,f){var g,i="",j="",k=[],l={},m=c(' class="%s"',f["class"]);if(f.visible&&(!b.options.cardView||f.cardVisible)){if(i=c("text-align: %s; ",f.falign?f.falign:f.align),j=c("vertical-align: %s; ",f.valign),l=h(null,b.options.footerStyle),l&&l.css)for(g in l.css)k.push(g+": "+l.css[g]);e.push(""),e.push('
    '),e.push(h(f,f.footerFormatter,[d]," ")||" "),e.push("
    "),e.push('
    '),e.push(""),e.push("")}}),this.$tableFooter.find("tr").html(e.join("")),this.$tableFooter.show(),clearTimeout(this.timeoutFooter_),this.timeoutFooter_=setTimeout(a.proxy(this.fitFooter,this),this.$el.is(":hidden")?100:0))},p.prototype.fitFooter=function(){var b,c,d;return clearTimeout(this.timeoutFooter_),this.$el.is(":hidden")?void(this.timeoutFooter_=setTimeout(a.proxy(this.fitFooter,this),100)):(c=this.$el.css("width"),d=c>this.$tableBody.width()?g():0,this.$tableFooter.css({"margin-right":d}).find("table").css("width",c).attr("class",this.$el.attr("class")),b=this.$tableFooter.find("td"),void this.$body.find(">tr:first-child:not(.no-records-found) > *").each(function(c){var d=a(this);b.eq(c).find(".fht-cell").width(d.innerWidth())}))},p.prototype.toggleColumn=function(a,b,d){if(-1!==a&&(this.columns[a].visible=b,this.initHeader(),this.initSearch(),this.initPagination(),this.initBody(),this.options.showColumns)){var e=this.$toolbar.find(".keep-open input").prop("disabled",!1);d&&e.filter(c('[value="%s"]',a)).prop("checked",b),e.filter(":checked").length<=this.options.minimumCountColumns&&e.filter(":checked").prop("disabled",!0)}},p.prototype.toggleRow=function(a,b,d){-1!==a&&this.$body.find("undefined"!=typeof a?c('tr[data-index="%s"]',a):c('tr[data-uniqueid="%s"]',b))[d?"show":"hide"]()},p.prototype.getVisibleFields=function(){var b=this,c=[];return a.each(this.header.fields,function(a,d){var f=b.columns[e(b.columns,d)];f.visible&&c.push(d)}),c},p.prototype.resetView=function(a){var b=0;if(a&&a.height&&(this.options.height=a.height),this.$selectAll.prop("checked",this.$selectItem.length>0&&this.$selectItem.length===this.$selectItem.filter(":checked").length),this.options.height){var c=k(this.$toolbar),d=k(this.$pagination),e=this.options.height-c-d;this.$tableContainer.css("height",e+"px")}return this.options.cardView?(this.$el.css("margin-top","0"),this.$tableContainer.css("padding-bottom","0"),void this.$tableFooter.hide()):(this.options.showHeader&&this.options.height?(this.$tableHeader.show(),this.resetHeader(),b+=this.$header.outerHeight()):(this.$tableHeader.hide(),this.trigger("post-header")),this.options.showFooter&&(this.resetFooter(),this.options.height&&(b+=this.$tableFooter.outerHeight()+1)),this.getCaret(),this.$tableContainer.css("padding-bottom",b+"px"),void this.trigger("reset-view"))},p.prototype.getData=function(b){return!this.searchText&&a.isEmptyObject(this.filterColumns)&&a.isEmptyObject(this.filterColumnsPartial)?b?this.options.data.slice(this.pageFrom-1,this.pageTo):this.options.data:b?this.data.slice(this.pageFrom-1,this.pageTo):this.data},p.prototype.load=function(b){var c=!1;"server"===this.options.sidePagination?(this.options.totalRows=b.total,c=b.fixedScroll,b=b[this.options.dataField]):a.isArray(b)||(c=b.fixedScroll,b=b.data),this.initData(b),this.initSearch(),this.initPagination(),this.initBody(c)},p.prototype.append=function(a){this.initData(a,"append"),this.initSearch(),this.initPagination(),this.initSort(),this.initBody(!0)},p.prototype.prepend=function(a){this.initData(a,"prepend"),this.initSearch(),this.initPagination(),this.initSort(),this.initBody(!0)},p.prototype.remove=function(b){var c,d,e=this.options.data.length;if(b.hasOwnProperty("field")&&b.hasOwnProperty("values")){for(c=e-1;c>=0;c--)d=this.options.data[c],d.hasOwnProperty(b.field)&&-1!==a.inArray(d[b.field],b.values)&&this.options.data.splice(c,1);e!==this.options.data.length&&(this.initSearch(),this.initPagination(),this.initSort(),this.initBody(!0))}},p.prototype.removeAll=function(){this.options.data.length>0&&(this.options.data.splice(0,this.options.data.length),this.initSearch(),this.initPagination(),this.initBody(!0))},p.prototype.getRowByUniqueId=function(a){var b,c,d,e=this.options.uniqueId,f=this.options.data.length,g=null;for(b=f-1;b>=0;b--){if(c=this.options.data[b],c.hasOwnProperty(e))d=c[e];else{if(!c._data.hasOwnProperty(e))continue;d=c._data[e]}if("string"==typeof d?a=a.toString():"number"==typeof d&&(Number(d)===d&&d%1===0?a=parseInt(a):d===Number(d)&&0!==d&&(a=parseFloat(a))),d===a){g=c;break}}return g},p.prototype.removeByUniqueId=function(a){var b=this.options.data.length,c=this.getRowByUniqueId(a);c&&this.options.data.splice(this.options.data.indexOf(c),1),b!==this.options.data.length&&(this.initSearch(),this.initPagination(),this.initBody(!0))},p.prototype.updateByUniqueId=function(b){var c=this,d=a.isArray(b)?b:[b];a.each(d,function(b,d){var e;d.hasOwnProperty("id")&&d.hasOwnProperty("row")&&(e=a.inArray(c.getRowByUniqueId(d.id),c.options.data),-1!==e&&a.extend(c.options.data[e],d.row))}),this.initSearch(),this.initSort(),this.initBody(!0)},p.prototype.insertRow=function(a){a.hasOwnProperty("index")&&a.hasOwnProperty("row")&&(this.data.splice(a.index,0,a.row),this.initSearch(),this.initPagination(),this.initSort(),this.initBody(!0))},p.prototype.updateRow=function(b){var c=this,d=a.isArray(b)?b:[b];a.each(d,function(b,d){d.hasOwnProperty("index")&&d.hasOwnProperty("row")&&a.extend(c.options.data[d.index],d.row)}),this.initSearch(),this.initSort(),this.initBody(!0)},p.prototype.showRow=function(a){(a.hasOwnProperty("index")||a.hasOwnProperty("uniqueId"))&&this.toggleRow(a.index,a.uniqueId,!0)},p.prototype.hideRow=function(a){(a.hasOwnProperty("index")||a.hasOwnProperty("uniqueId"))&&this.toggleRow(a.index,a.uniqueId,!1)},p.prototype.getRowsHidden=function(b){var c=a(this.$body[0]).children().filter(":hidden"),d=0;if(b)for(;dtr");if(this.options.detailView&&!this.options.cardView&&(g+=1),e=j.eq(f).find(">td").eq(g),!(0>f||0>g||f>=this.data.length)){for(c=f;f+h>c;c++)for(d=g;g+i>d;d++)j.eq(c).find(">td").eq(d).hide();e.attr("rowspan",h).attr("colspan",i).show()}},p.prototype.updateCell=function(a){a.hasOwnProperty("index")&&a.hasOwnProperty("field")&&a.hasOwnProperty("value")&&(this.data[a.index][a.field]=a.value,a.reinit!==!1&&(this.initSort(),this.initBody(!0)))},p.prototype.getOptions=function(){return this.options},p.prototype.getSelections=function(){var b=this;return a.grep(this.options.data,function(a){return a[b.header.stateField]})},p.prototype.getAllSelections=function(){var b=this;return a.grep(this.options.data,function(a){return a[b.header.stateField]})},p.prototype.checkAll=function(){this.checkAll_(!0)},p.prototype.uncheckAll=function(){this.checkAll_(!1)},p.prototype.checkInvert=function(){var b=this,c=b.$selectItem.filter(":enabled"),d=c.filter(":checked");c.each(function(){a(this).prop("checked",!a(this).prop("checked"))}),b.updateRows(),b.updateSelected(),b.trigger("uncheck-some",d),d=b.getSelections(),b.trigger("check-some",d)},p.prototype.checkAll_=function(a){var b;a||(b=this.getSelections()),this.$selectAll.add(this.$selectAll_).prop("checked",a),this.$selectItem.filter(":enabled").prop("checked",a),this.updateRows(),a&&(b=this.getSelections()),this.trigger(a?"check-all":"uncheck-all",b)},p.prototype.check=function(a){this.check_(!0,a)},p.prototype.uncheck=function(a){this.check_(!1,a)},p.prototype.check_=function(a,b){var d=this.$selectItem.filter(c('[data-index="%s"]',b)).prop("checked",a);this.data[b][this.header.stateField]=a,this.updateSelected(),this.trigger(a?"check":"uncheck",this.data[b],d)},p.prototype.checkBy=function(a){this.checkBy_(!0,a)},p.prototype.uncheckBy=function(a){this.checkBy_(!1,a)},p.prototype.checkBy_=function(b,d){if(d.hasOwnProperty("field")&&d.hasOwnProperty("values")){var e=this,f=[];a.each(this.options.data,function(g,h){if(!h.hasOwnProperty(d.field))return!1;if(-1!==a.inArray(h[d.field],d.values)){var i=e.$selectItem.filter(":enabled").filter(c('[data-index="%s"]',g)).prop("checked",b);h[e.header.stateField]=b,f.push(h),e.trigger(b?"check":"uncheck",h,i)}}),this.updateSelected(),this.trigger(b?"check-some":"uncheck-some",f)}},p.prototype.destroy=function(){this.$el.insertBefore(this.$container),a(this.options.toolbar).insertBefore(this.$el),this.$container.next().remove(),this.$container.remove(),this.$el.html(this.$el_.html()).css("margin-top","0").attr("class",this.$el_.attr("class")||"")},p.prototype.showLoading=function(){this.$tableLoading.show()},p.prototype.hideLoading=function(){this.$tableLoading.hide()},p.prototype.togglePagination=function(){this.options.pagination=!this.options.pagination;var a=this.$toolbar.find('button[name="paginationSwitch"] i');this.options.pagination?a.attr("class",this.options.iconsPrefix+" "+this.options.icons.paginationSwitchDown):a.attr("class",this.options.iconsPrefix+" "+this.options.icons.paginationSwitchUp),this.updatePagination()},p.prototype.refresh=function(a){a&&a.url&&(this.options.pageNumber=1),this.initServer(a&&a.silent,a&&a.query,a&&a.url),this.trigger("refresh",a)},p.prototype.resetWidth=function(){this.options.showHeader&&this.options.height&&this.fitHeader(),this.options.showFooter&&this.fitFooter()},p.prototype.showColumn=function(a){this.toggleColumn(e(this.columns,a),!0,!0)},p.prototype.hideColumn=function(a){this.toggleColumn(e(this.columns,a),!1,!0)},p.prototype.getHiddenColumns=function(){return a.grep(this.columns,function(a){return!a.visible})},p.prototype.getVisibleColumns=function(){return a.grep(this.columns,function(a){return a.visible})},p.prototype.toggleAllColumns=function(b){if(a.each(this.columns,function(a){this.columns[a].visible=b}),this.initHeader(),this.initSearch(),this.initPagination(),this.initBody(),this.options.showColumns){var c=this.$toolbar.find(".keep-open input").prop("disabled",!1);c.filter(":checked").length<=this.options.minimumCountColumns&&c.filter(":checked").prop("disabled",!0)}},p.prototype.showAllColumns=function(){this.toggleAllColumns(!0)},p.prototype.hideAllColumns=function(){this.toggleAllColumns(!1)},p.prototype.filterBy=function(b){this.filterColumns=a.isEmptyObject(b)?{}:b,this.options.pageNumber=1,this.initSearch(),this.updatePagination()},p.prototype.scrollTo=function(a){return"string"==typeof a&&(a="bottom"===a?this.$tableBody[0].scrollHeight:0),"number"==typeof a&&this.$tableBody.scrollTop(a),"undefined"==typeof a?this.$tableBody.scrollTop():void 0},p.prototype.getScrollPosition=function(){return this.scrollTo()},p.prototype.selectPage=function(a){a>0&&a<=this.options.totalPages&&(this.options.pageNumber=a,this.updatePagination())},p.prototype.prevPage=function(){this.options.pageNumber>1&&(this.options.pageNumber--,this.updatePagination())},p.prototype.nextPage=function(){this.options.pageNumber tr[data-index="%s"]',b));d.next().is("tr.detail-view")===(a?!1:!0)&&d.find("> td > .detail-icon").click()},p.prototype.expandRow=function(a){this.expandRow_(!0,a)},p.prototype.collapseRow=function(a){this.expandRow_(!1,a)},p.prototype.expandAllRows=function(b){if(b){var d=this.$body.find(c('> tr[data-index="%s"]',0)),e=this,f=null,g=!1,h=-1;if(d.next().is("tr.detail-view")?d.next().next().is("tr.detail-view")||(d.next().find(".detail-icon").click(),g=!0):(d.find("> td > .detail-icon").click(),g=!0),g)try{h=setInterval(function(){f=e.$body.find("tr.detail-view").last().find(".detail-icon"),f.length>0?f.click():clearInterval(h)},1)}catch(i){clearInterval(h)}}else for(var j=this.$body.children(),k=0;k.btn-group"),g=f.find("div.export");if(!g.length){g=a(['
    ','",'","
    "].join("")).appendTo(f);var h=g.find(".dropdown-menu"),i=this.options.exportTypes;if("string"==typeof this.options.exportTypes){var j=this.options.exportTypes.slice(1,-1).replace(/ /g,"").split(",");i=[],a.each(j,function(a,b){i.push(b.slice(1,-1))})}a.each(i,function(a,b){c.hasOwnProperty(b)&&h.append(['
  • ','',c[b],"","
  • "].join(""))}),h.find("li").click(function(){var b=a(this).data("type"),c=function(){d.$el.tableExport(a.extend({},d.options.exportOptions,{type:b,escape:!1}))};if("all"===d.options.exportDataType&&d.options.pagination)d.$el.one("server"===d.options.sidePagination?"post-body.bs.table":"page-change.bs.table",function(){c(),d.togglePagination()}),d.togglePagination();else if("selected"===d.options.exportDataType){var e=d.getData(),f=d.getAllSelections();d.load(f),c(),d.load(e)}else c()})}}}}(jQuery); diff --git a/web/gui/dashboard/lib/bootstrap-toggle-2.2.2.min.js b/web/gui/dashboard/lib/bootstrap-toggle-2.2.2.min.js new file mode 100644 index 0000000..a11e156 --- /dev/null +++ b/web/gui/dashboard/lib/bootstrap-toggle-2.2.2.min.js @@ -0,0 +1,10 @@ +/*! ======================================================================== + * Bootstrap Toggle: bootstrap-toggle.js v2.2.0 + * http://www.bootstraptoggle.com + * ======================================================================== + * Copyright 2014 Min Hur, The New York Times Company + * Licensed under MIT + * SPDX-License-Identifier: MIT + * ======================================================================== */ ++function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.toggle"),f="object"==typeof b&&b;e||d.data("bs.toggle",e=new c(this,f)),"string"==typeof b&&e[b]&&e[b]()})}var c=function(b,c){this.$element=a(b),this.options=a.extend({},this.defaults(),c),this.render()};c.VERSION="2.2.0",c.DEFAULTS={on:"On",off:"Off",onstyle:"primary",offstyle:"default",size:"normal",style:"",width:null,height:null},c.prototype.defaults=function(){return{on:this.$element.attr("data-on")||c.DEFAULTS.on,off:this.$element.attr("data-off")||c.DEFAULTS.off,onstyle:this.$element.attr("data-onstyle")||c.DEFAULTS.onstyle,offstyle:this.$element.attr("data-offstyle")||c.DEFAULTS.offstyle,size:this.$element.attr("data-size")||c.DEFAULTS.size,style:this.$element.attr("data-style")||c.DEFAULTS.style,width:this.$element.attr("data-width")||c.DEFAULTS.width,height:this.$element.attr("data-height")||c.DEFAULTS.height}},c.prototype.render=function(){this._onstyle="btn-"+this.options.onstyle,this._offstyle="btn-"+this.options.offstyle;var b="large"===this.options.size?"btn-lg":"small"===this.options.size?"btn-sm":"mini"===this.options.size?"btn-xs":"",c=a('